diff --git a/.bazelrc b/.bazelrc index 49e9fdb83b9e..8664d43f8680 100644 --- a/.bazelrc +++ b/.bazelrc @@ -137,9 +137,6 @@ build:windows --experimental_strict_action_env=true # Verbose failure logs when something goes wrong build:windows --verbose_failures -# On windows, we never cross compile -build:windows --distinct_host_configuration=false - # Suppress all warning messages. build:short_logs --output_filter=DONT_MATCH_ANYTHING build:verbose_logs --output_filter= diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 000000000000..1e20ec35c642 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +5.4.0 \ No newline at end of file diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 17b6f699a330..a200d9d64547 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,29 +1,8 @@ -FROM python:3.9 - -# https://code.visualstudio.com/docs/remote/containers-advanced#_creating-a-nonroot-user -ARG USERNAME=keras-vscode -ARG USER_UID=1000 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # - # [Optional] Add sudo support. Omit if you don't need to install software after connecting. - && apt-get update \ - && apt-get install -y sudo bash \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME +FROM mcr.microsoft.com/vscode/devcontainers/python:3.9 +COPY setup.sh /setup.sh # Install Bazel -RUN apt update -RUN apt install curl gnupg -y -RUN curl -fsSL https://bazel.build/bazel-release.pub.gpg | gpg --dearmor > bazel.gpg -RUN mv bazel.gpg /etc/apt/trusted.gpg.d/ -RUN echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list -RUN apt update && apt install bazel -y - -USER $USERNAME -ENV PATH="/home/$USERNAME/.local/bin:${PATH}" - -CMD ["/bin/bash"] \ No newline at end of file +RUN sudo apt install wget -y +RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 +RUN chmod a+x bazelisk-linux-amd64 +RUN mv bazelisk-linux-amd64 /usr/bin/bazel \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cc164d3f85c2..9c7b688f524d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,6 @@ { "dockerFile": "Dockerfile", - "postCreateCommand": "pip install -r requirements.txt && pip uninstall keras-nightly -y", + "postCreateCommand": "sh /setup.sh", "extensions": ["ms-python.python"], "settings": { "files.watcherExclude": { @@ -8,8 +8,6 @@ }, "search.exclude": { "**/bazel-*/**": true - }, - "terminal.integrated.defaultProfile.linux": "bash" - }, - "remoteUser": "keras-vscode" -} + } + } +} \ No newline at end of file diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100644 index 000000000000..dc6232affd6e --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +sudo pip install -r requirements.txt +sudo pip uninstall keras-nightly -y + +wget https://github.com/cli/cli/releases/download/v2.17.0/gh_2.17.0_linux_amd64.deb -P /tmp +sudo apt install /tmp/gh_2.17.0_linux_amd64.deb -y \ No newline at end of file diff --git a/.github/bot_config.yml b/.github/bot_config.yml index 11cb9eb6cccf..758d1c24fce9 100644 --- a/.github/bot_config.yml +++ b/.github/bot_config.yml @@ -16,4 +16,3 @@ # A list of assignees assignees: - tilakrayal - - sushreebarsa diff --git a/.github/stale.yml b/.github/stale.yml deleted file mode 100644 index f0432f4a8d56..000000000000 --- a/.github/stale.yml +++ /dev/null @@ -1,18 +0,0 @@ -# Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 7 -# Number of days of inactivity before a stale Issue or Pull Request is closed -daysUntilClose: 7 -# Only issues or pull requests with all of these labels are checked if stale. Defaults to `[]` (disabled) -onlyLabels: - - stat:awaiting response -# Comment to post when marking as stale. Set to `false` to disable -markComment: > - This issue has been automatically marked as stale because it has no - recent activity. It will be closed if no further activity occurs. Thank you. -# Comment to post when removing the stale label. Set to `false` to disable -unmarkComment: false -closeComment: > - Closing as stale. Please reopen if you'd like to work on this further. -limitPerRun: 30 -# Limit to only `issues` or `pulls` -only: issues diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index b1b8fc1866ae..68e0256ba2b3 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -3,8 +3,13 @@ name: Format the code on: workflow_dispatch: +permissions: {} jobs: createPullRequest: + permissions: + contents: write # to create branch (peter-evans/create-pull-request) + pull-requests: write # to create a PR (peter-evans/create-pull-request) + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -23,17 +28,17 @@ jobs: ${{ runner.os }}-pip- - name: Install dependencies run: | - pip install -r requirements.txt && pip uninstall keras-nightly -y + pip install black==22.3.0 isort==5.10.1 flake8==4.0.1 - name: Format the code - run: black --line-length 80 keras + run: sh shell/format.sh - name: Create Pull Request id: cpr uses: peter-evans/create-pull-request@v4 with: commit-message: format the code - committer: TensorFlower Gardener - author: TensorFlower Gardener + committer: A. Unique TensorFlower + author: A. Unique TensorFlower branch: format delete-branch: true title: 'Format the code' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000000..66388041bc5b --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,34 @@ +name: Lint + +on: + push: + pull_request: + workflow_dispatch: + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + lint: + name: Check the code format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Get pip cache dir + id: pip-cache + run: | + python -m pip install --upgrade pip setuptools + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install dependencies + run: | + pip install black==22.3.0 isort==5.10.1 flake8==4.0.1 + - name: Lint the code + run: sh shell/lint.sh diff --git a/.github/workflows/stale-issues-pr.yml b/.github/workflows/stale-issues-pr.yml new file mode 100644 index 000000000000..3eab7a47959f --- /dev/null +++ b/.github/workflows/stale-issues-pr.yml @@ -0,0 +1,47 @@ +name: Close inactive issues +on: + schedule: + - cron: "30 1 * * *" + +jobs: + close-issues: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - name: Awaiting response issues + uses: actions/stale@v5 + with: + days-before-issue-stale: 14 + days-before-issue-close: 14 + stale-issue-label: "stale" + # reason for closed the issue default value is not_planned + close-issue-reason: completed + only-labels: "stat:awaiting response from contributor" + stale-issue-message: > + This issue is stale because it has been open for 14 days with no activity. + It will be closed if no further activity occurs. Thank you. + close-issue-message: > + This issue was closed because it has been inactive for 28 days. + Please reopen if you'd like to work on this further. + days-before-pr-stale: 14 + days-before-pr-close: 14 + stale-pr-message: "This PR is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you." + close-pr-message: "This PR was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further." + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Contribution issues + uses: actions/stale@v5 + with: + days-before-issue-stale: 180 + days-before-issue-close: 365 + stale-issue-label: "stale" + # reason for closed the issue default value is not_planned + close-issue-reason: not_planned + any-of-labels: "stat:contributions welcome,good first issue" + stale-issue-message: > + This issue is stale because it has been open for 180 days with no activity. + It will be closed if no further activity occurs. Thank you. + close-issue-message: > + This issue was closed because it has been inactive for more than 1 year. + repo-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index d23c516b846e..000000000000 --- a/.pylintrc +++ /dev/null @@ -1,38 +0,0 @@ -[MESSAGES CONTROL] - -disable= - abstract-method, - access-member-before-definition, - arguments-differ, - attribute-defined-outside-init, - bad-continuation, - bad-option-value, - bad-whitespace, - c-extension-no-member, - design, - file-ignored, - fixme, - global-statement, - import-error, - import-outside-toplevel, - import-self, - interface-is-not-class, - invalid-metaclass, - invalid-name, - locally-disabled, - locally-enabled, - maybe-no-member, - method-hidden, - misplaced-comparison-constant, - missing-interface-method, - multiple-imports, - multiple-statements, - no-else-break, - no-else-continue, - no-else-raise, - no-else-return, - no-init, - no-member, - no-name-in-module, - no-self-use, - pointless-except, \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000000..4c3bb7528b99 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + "python.linting.flake8Enabled": true, + "python.linting.pylintEnabled": false, + "python.linting.enabled": true, + "editor.rulers": [ + 80 + ], + "editor.formatOnSave": true, + "python.formatting.provider": "black", + "python.formatting.blackArgs": [ + "--line-length", + "80" + ], + "python.sortImports.args": [ + "--profile", + "black", + "--sl" + ], + "[python]": { + "editor.codeActionsOnSave": { + "source.organizeImports": true + } + }, + "python.analysis.diagnosticSeverityOverrides": { + "reportMissingImports": "none" + } +} diff --git a/BUILD b/BUILD index 37d69b2d69be..73742ab2ae12 100644 --- a/BUILD +++ b/BUILD @@ -106,3 +106,13 @@ py_library( visibility = ["//visibility:public"], deps = [], ) + +# Note that this dependency is for testing only. +py_library( + name = "expect_tensorflow_io_installed", + # This is a dummy rule used as a tensorflow_io dependency in open-source. + # We expect tensorflow_io to already be installed on the system, e.g. via + # `pip install tensorflow-io` + visibility = ["//visibility:public"], + deps = [], +) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0e314a4e256d..7dc9fe96eeb3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ to open a PR without discussion. ### Step 2. Make code changes To make code changes, you need to fork the repository. You will need to setup a -development environment and run the unit tests. This is covered in section +development environment and run the unit tests. This is covered in the section "Setup environment". ### Step 3. Create a pull request @@ -39,7 +39,7 @@ add a `kokoro:force-run` label to trigger the continuous integration tests. ![CI tests tag](https://i.imgur.com/58NOCB0.png) -If the tests fail, look into the error messages and try to fix it. +If the tests fail, look into the error messages and try to fix them. ![CI tests](https://i.imgur.com/vVY0dZD.png) @@ -63,7 +63,7 @@ for your reference. To setup the development environment, We provide two options. One is to use our Dockerfile, which builds into a container the required dev tools. Another one is -to setup a local environment by install the dev tools needed. +to setup a local environment by installing the dev tools needed. ### Option 1: Use a Docker container @@ -99,7 +99,7 @@ You may modify the Dockerfile to your specific needs, like installing your own dev tools. You may also mount more volumes with the `-v` option, like your SSH credentials. -Many popular editors today support developing in a container. Here is list of +Many popular editors today support developing in a container. Here is the list of [supported editors](https://discuss.tensorflow.org/t/setup-your-favorite-editor-to-develop-keras) with setup instructions. @@ -113,7 +113,7 @@ To setup your local dev environment, you will need the following tools. 2. [git](https://github.com/) for code repository management. 3. [python](https://www.python.org/) to build and code in Keras. -The following commands checks the tools above are successfully installed. Note +The following commands check the tools above are successfully installed. Note that Keras requires at least Python 3.7 to run. ```shell @@ -125,7 +125,7 @@ python --version A [Python virtual environment](https://docs.python.org/3/tutorial/venv.html) (venv) is a powerful tool to create a self-contained environment that isolates any change from the system level config. It is highly recommended to avoid any -unexpected dependency or version issue. +unexpected dependency or version issues. With the following commands, you create a new venv, named `venv_dir`. @@ -139,14 +139,14 @@ tests with the venv activated. You need to activate the venv every time you open a new shell. ```shell -source venv_dir/bin/activate # for linux or MacOS +source venv_dir/bin/activate # for Linux or MacOS venv_dir\Scripts\activate.bat # for Windows ``` Clone your forked repo to your local machine. Go to the cloned directory to install the dependencies into the venv. Since `tf-nightly` uses `keras-nightly` as a dependency, we need to uninstall `keras-nightly` so that tests will run -against Keras code in local workspace. +against Keras code in the local workspace. ```shell git clone https://github.com/YOUR_GITHUB_USERNAME/keras.git @@ -165,18 +165,54 @@ pip install --upgrade tf-nightly ## Code style -The Keras codebase uses the PEP 8 Python style conventions -- with the -exception that it uses 2 spaces for indentation instead of 4. -To check code style, please run the `pylint` command from the repo's -root directory so that the configuration in -`.pylintrc` is taken into account. +The Keras uses [Black](https://black.readthedocs.io/en/stable/) and +[isort](https://pycqa.github.io/isort/) to format the code. Please refer to +[requirements.txt](https://github.com/keras-team/keras/blob/master/requirements.txt) +for the required versions. Run the following command **at the root directory of +the repo** to format your code. -```shell -pylint path/to/changed_file.py ``` +sh shell/format.sh +``` + +It will also display the errors that cannot be resolved by autoformatting. You +need to follow the output of the command to resolve them manually. + +If you do not want to auto format the code but only show the lint errors, you +can run `sh shell/lint.sh` **at the root directory of the repo**. + +### Docstrings + +We do not have an automated way to check docstring style, so if you write +or edit any docstring, please make sure to check them manually. +Keras docstrings follow the conventions below: + +A **class docstring** may contain the following items: + +* A one-line description of the class. +* Paragraph(s) of more detailed information. +* Optional `Examples` section. +* `Args` section for arguments in `__init__()`. +* If it's a layer: + * `Call arguments` section for arguments in `Layer.call()`. + * `Returns` section for the return values of `Layer.call()`. + * Optional `Raises` section for possible errors. + +You can check out `MultiHeadAttention` as an example +[(link)](https://github.com/keras-team/keras/blob/v2.12.0-rc1/keras/layers/attention/multi_head_attention.py#L131). + +A **function docstring** may contain the following items: + +* One-line description of the function. +* Paragraph(s) of more detailed information. +* Optional `Examples` section. +* `Args` section for the function arguments. +* `Returns` section for the return values. +* Optional `Raises` section for possible errors. + +You can check out `text_dataset_from_directory` as an example +[(link)](https://github.com/keras-team/keras/blob/v2.12.0-rc1/keras/utils/text_dataset.py#L31). -Please ignore the errors in the rest of the codebase and only fix the ones -relevant to your changes. ## Run tests @@ -198,7 +234,7 @@ defining the test. `base_layer_test` is the test target name defined with ### Run a single test case To run a single test, you can use `--test_filter=` -to use regular expression to match the test you want to run. For example, you +to use the regular expression to match the test you want to run. For example, you can use the following command to run all the tests in `activations_test.py`, whose names contain `test_serialization`. @@ -212,7 +248,7 @@ You can run all the tests locally by running the following command in the repo root directory. ``` -bazel test --test_timeout 300,450,1200,3600 --test_output=errors --keep_going --define=use_fast_cpp_protos=false --build_tests_only --build_tag_filters=-no_oss --test_tag_filters=-no_oss keras/... +bazel test --test_timeout 300,450,1200,3600 --test_output=errors --keep_going --define=use_fast_cpp_protos=false --build_tests_only --build_tag_filters=-no_oss,-oss_excluded --test_tag_filters=-no_oss,-oss_excluded keras/... ``` ### Useful configs @@ -258,7 +294,7 @@ mind. - You should add any new applications to the unit tests defined in `applications_test.py` and `applications_load_weight_test.py`. - For backwards compatibility, all applications should provide a - `preprocess_input()` function. For new applciations, you should leave the + `preprocess_input()` function. For new applications, you should leave the function empty (pass through inputs unaltered), and write the model so it can handle raw inputs directly. Adding [preprocessing layers](https://keras.io/guides/preprocessing_layers/) to the @@ -272,4 +308,9 @@ mind. - As every PR requires several CPU/GPU hours of CI testing, we discourage submitting PRs to fix one typo, one warning,etc. We recommend fixing the same issue at the file level at least (e.g.: fix all typos in a file, fix - all compiler warning in a file, etc.) + all compiler warnings in a file, etc.) + +## Security vulnerability reports + +Since Keras is the high-level API of TensorFlow 2, Keras follows same security practices as TensorFlow. +For details on guidelines on vulnerabilities and reporting them, you can refer [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). diff --git a/README.md b/README.md index 37675e0a4c9c..dfbe1608883d 100644 --- a/README.md +++ b/README.md @@ -9,20 +9,25 @@ Read the documentation at [keras.io](https://keras.io/). Keras is a deep learning API written in Python, running on top of the machine learning platform [TensorFlow](https://github.com/tensorflow/tensorflow). -It was developed with a focus on enabling fast experimentation. -*Being able to go from idea to result as fast as possible is key to doing good research.* +It was developed with a focus on enabling fast experimentation and +providing a delightful developer experience. + +**The purpose of Keras is to give an *unfair advantage* to any developer looking to ship ML-powered apps.** Keras is: - **Simple** -- but not simplistic. Keras reduces developer *cognitive load* to free you to focus on the parts of the problem that really matter. + Keras focuses on ease of use, debugging speed, code elegance & conciseness, + maintainability, and deployability (via TFServing, TFLite, TF.js). - **Flexible** -- Keras adopts the principle of *progressive disclosure of complexity*: simple workflows should be quick and easy, while arbitrarily advanced workflows should be *possible* via a clear path that builds upon what you've already learned. - **Powerful** -- Keras provides industry-strength performance and scalability: it is used by organizations and companies including NASA, - YouTube, and Waymo. + YouTube, and Waymo. That's right -- your YouTube recommendations are + powered by Keras, and so is the world's most advanced driverless vehicle. --- @@ -52,9 +57,9 @@ and you can export your Keras models to run in the browser or on a mobile device ## First contact with Keras The core data structures of Keras are __layers__ and __models__. -The simplest type of model is the [`Sequential` model](/guides/sequential_model/), a linear stack of layers. -For more complex architectures, you should use the [Keras functional API](/guides/functional_api/), -which allows to build arbitrary graphs of layers, or [write models entirely from scratch via subclasssing](/guides/making_new_layers_and_models_via_subclassing/). +The simplest type of model is the [`Sequential` model](https://keras.io/guides/sequential_model/), a linear stack of layers. +For more complex architectures, you should use the [Keras functional API](https://keras.io/guides/functional_api/), +which allows you to build arbitrary graphs of layers or [write models entirely from scratch via subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/). Here is the `Sequential` model: @@ -82,7 +87,7 @@ model.compile(loss='categorical_crossentropy', ``` If you need to, you can further configure your optimizer. The Keras philosophy is to keep simple things simple, -while allowing the user to be fully in control when they need to (the ultimate control being the easy extensibility of the source code via subclassing). +while allowing the user to be fully in control when they need to be (the ultimate control being the easy extensibility of the source code via subclassing). ```python model.compile(loss=tf.keras.losses.categorical_crossentropy, @@ -116,7 +121,7 @@ Keras follows the principle of **progressive disclosure of complexity**: it make yet it makes it possible to handle arbitrarily advanced use cases, only requiring incremental learning at each step. -In much the same way that you were able to train & evaluate a simple neural network above in a few lines, +In pretty much the same way that you were able to train & evaluate a simple neural network above in a few lines, you can use Keras to quickly develop new training procedures or exotic model architectures. Here's a low-level training loop example, combining Keras functionality with the TensorFlow `GradientTape`: @@ -156,6 +161,11 @@ For more in-depth tutorials about Keras, you can check out: Keras comes packaged with TensorFlow 2 as `tensorflow.keras`. To start using Keras, simply [install TensorFlow 2](https://www.tensorflow.org/install). +You can then import Keras as follows: + +```python +from tensorflow import keras +``` --- @@ -174,26 +184,17 @@ version maps to a specific stable version of TensorFlow. The table below shows the compatibility version mapping between TensorFlow versions and Keras versions. -All the release branches can be found on [Github](https://github.com/keras-team/keras/releases). +All the release branches can be found on [GitHub](https://github.com/keras-team/keras/releases). All the release binaries can be found on [Pypi](https://pypi.org/project/keras/#history). -| Keras release | Note | Compatible Tensorflow version | -| ----------- | ----------- | ----------- | -| [2.4](https://github.com/keras-team/keras/releases/tag/2.4.0) | Last stable release of multi-backend Keras | < 2.5 -| 2.5-pre| Pre-release (not formal) for standalone Keras repo | >= 2.5 < 2.6 -| [2.6](https://github.com/keras-team/keras/releases/tag/v2.6.0) | First formal release of standalone Keras. | >= 2.6 < 2.7 -| [2.7](https://github.com/keras-team/keras/releases/tag/v2.7.0-rc0) | (Upcoming release) | >= 2.7 < 2.8 -| nightly| | tf-nightly - --- ## Support You can ask questions and join the development discussion: - In the [TensorFlow forum](https://discuss.tensorflow.org/). -- On the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users). -- On the [Keras Slack channel](https://kerasteam.slack.com). Use [this link](https://keras-slack-autojoin.herokuapp.com/) to request an invitation to the channel. +- On the [Keras mailing list](https://groups.google.com/forum/#!forum/keras-users). --- diff --git a/WORKSPACE b/WORKSPACE index 898b5b6dffce..c0ebc4e52ac5 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -6,8 +6,11 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "bazel_skylib", - url = "https://github.com/bazelbuild/bazel-skylib/releases/download/1.0.1/bazel-skylib-1.0.1.tar.gz", - sha256 = "f1c8360c01fcf276778d3519394805dc2a71a64274a3a0908bc9edff7b5aebc8", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + ], + sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506", ) load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") bazel_skylib_workspace() @@ -16,12 +19,9 @@ bazel_skylib_workspace() http_archive( name = "six_archive", build_file = "//third_party:six.BUILD", - sha256 = "d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73", - strip_prefix = "six-1.12.0", - urls = [ - "http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.12.0.tar.gz", - "https://pypi.python.org/packages/source/s/six/six-1.12.0.tar.gz", # 2018-12-10 - ], + sha256 = "1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + strip_prefix = "six-1.16.0", + urls = ["https://pypi.python.org/packages/source/s/six/six-1.16.0.tar.gz"], ) bind( @@ -31,18 +31,21 @@ bind( http_archive( name = "com_google_protobuf", - sha256 = "1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0", - strip_prefix = "protobuf-3.9.2", - urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.9.2.tar.gz"], + sha256 = "f66073dee0bc159157b0bd7f502d7d1ee0bc76b3c1eac9836927511bdc4b3fc1", + strip_prefix = "protobuf-3.21.9", + urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.21.9.zip"], ) # ZLIB. Need by com_google_protobuf. http_archive( name = "zlib", build_file = "@com_google_protobuf//:third_party/zlib.BUILD", - sha256 = "91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9", - strip_prefix = "zlib-1.2.12", - urls = ["https://zlib.net/zlib-1.2.12.tar.gz"], + sha256 = "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30", + strip_prefix = "zlib-1.2.13", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/zlib.net/zlib-1.2.13.tar.gz", + "https://zlib.net/zlib-1.2.13.tar.gz", + ], ) diff --git a/keras/BUILD b/keras/BUILD index 6d94758b2b6c..d31fcbc2b0e3 100644 --- a/keras/BUILD +++ b/keras/BUILD @@ -1,16 +1,16 @@ # Description: # Contains the Keras API (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") +# copybara:uncomment_begin(google-only) +# load("//tools/build_defs/license:license.bzl", "license") +# copybara:uncomment_end + package( - default_visibility = [ - ":friends", - "//third_party/py/tensorflow:__subpackages__", - "//third_party/tensorflow/python/feature_column:__subpackages__", # For unit test - "//third_party/tensorflow/python/tpu:__subpackages__", # For unit test - "//third_party/tensorflow_estimator:__subpackages__", - ], + # copybara:uncomment default_applicable_licenses = [":license"], + default_visibility = [":friends"], licenses = ["notice"], ) @@ -48,7 +48,6 @@ py_library( "//keras/applications", "//keras/datasets", "//keras/distribute", - "//keras/dtensor:optimizers", "//keras/estimator", "//keras/feature_column", "//keras/layers", @@ -64,7 +63,6 @@ py_library( "//keras/testing_infra:keras_doctest_lib", "//keras/testing_infra:test_utils", # For keras.__internal__ API "//keras/utils", - "//keras/wrappers", ], ) @@ -183,7 +181,7 @@ py_library( deps = [ ":backend", "//:expect_tensorflow_installed", - "//keras/saving/experimental", + "//keras/saving:saving_lib", "//keras/utils:engine_utils", "//keras/utils:generic_utils", "//keras/utils:tf_utils", @@ -213,6 +211,20 @@ py_library( # ) # copybara:uncomment_end +# Some tf.distribute related feature requires detecting platform. +# Internally we'd like to recognize Borg, which is not needed in OSS. +# copybara:uncomment_begin(google-only) +# py_library( +# name = "distribute_utils", +# srcs = ["google/distribute_utils.py"], +# deps = [ +# "//:expect_six_installed", +# "//:expect_tensorflow_installed", +# "//third_party/py/requests", +# ], +# ) +# copybara:uncomment_end + tf_py_test( name = "activations_test", size = "small", @@ -264,6 +276,7 @@ tf_py_test( size = "small", srcs = ["losses_test.py"], python_version = "PY3", + shard_count = 4, tags = [ "noasan", # b/186128525 ], @@ -284,6 +297,7 @@ tf_py_test( python_version = "PY3", shard_count = 6, tags = [ + "no_pip", # TODO(b/276923757) "no_tfrt", # TODO(b/179690526) "notsan", ], @@ -301,7 +315,10 @@ tf_py_test( size = "medium", srcs = ["callbacks_v1_test.py"], python_version = "PY3", - tags = ["notsan"], + tags = [ + "nomac", # Using profiler causes segfault in MacOS runs. + "notsan", + ], deps = [ ":callbacks", ":callbacks_v1", @@ -357,4 +374,21 @@ tf_py_test( # "//testing/pymocks:matchers", # ], # ) +# +# tf_py_test( +# name = "distribute_utils_test", +# srcs = ["google/distribute_utils_test.py"], +# python_version = "PY3", +# deps = [ +# ":distribute_utils", +# "//:expect_tensorflow_installed", +# "//keras/distribute", +# "//testing/pymocks:matchers", +# ], +# ) +# +# license( +# name = "license", +# package_name = "keras", +# ) # copybara:uncomment_end diff --git a/keras/__init__.py b/keras/__init__.py index 9dbe10b3e4f0..f4a25e8f3447 100644 --- a/keras/__init__.py +++ b/keras/__init__.py @@ -17,18 +17,17 @@ Detailed documentation and user guides are available at [keras.io](https://keras.io). """ -# pylint: disable=unused-import -from tensorflow.python import tf2 from keras import distribute - from keras import models - from keras.engine.input_layer import Input from keras.engine.sequential import Sequential from keras.engine.training import Model +# isort: off + +from tensorflow.python import tf2 from tensorflow.python.util.tf_export import keras_export -__version__ = '2.10.0' +__version__ = "2.15.0" -keras_export('keras.__version__').export_constant(__name__, '__version__') +keras_export("keras.__version__").export_constant(__name__, "__version__") diff --git a/keras/activations.py b/keras/activations.py index 7499adea7df8..776f8e0322ab 100644 --- a/keras/activations.py +++ b/keras/activations.py @@ -15,12 +15,19 @@ """Built-in activation functions.""" import sys +import types import tensorflow.compat.v2 as tf -from keras import backend import keras.layers.activation as activation_layers +from keras import backend +from keras.saving import object_registration +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.legacy.saved_model import utils as saved_model_utils from keras.utils import generic_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export # b/123041942 @@ -32,484 +39,546 @@ # This dict maps the activation function name from its v2 version to its # canonical name. _TF_ACTIVATIONS_V2 = { - 'softmax_v2': 'softmax', + "softmax_v2": "softmax", } -@keras_export('keras.activations.softmax') +@keras_export("keras.activations.softmax") @tf.__internal__.dispatch.add_dispatch_support def softmax(x, axis=-1): - """Softmax converts a vector of values to a probability distribution. + """Softmax converts a vector of values to a probability distribution. - The elements of the output vector are in range (0, 1) and sum to 1. + The elements of the output vector are in range (0, 1) and sum to 1. - Each vector is handled independently. The `axis` argument sets which axis - of the input the function is applied along. + Each vector is handled independently. The `axis` argument sets which axis + of the input the function is applied along. - Softmax is often used as the activation for the last - layer of a classification network because the result could be interpreted as - a probability distribution. + Softmax is often used as the activation for the last + layer of a classification network because the result could be interpreted as + a probability distribution. - The softmax of each vector x is computed as - `exp(x) / tf.reduce_sum(exp(x))`. + The softmax of each vector x is computed as + `exp(x) / tf.reduce_sum(exp(x))`. - The input values in are the log-odds of the resulting probability. + The input values in are the log-odds of the resulting probability. - Args: - x : Input tensor. - axis: Integer, axis along which the softmax normalization is applied. + Args: + x : Input tensor. + axis: Integer, axis along which the softmax normalization is applied. - Returns: - Tensor, output of softmax transformation (all values are non-negative - and sum to 1). + Returns: + Tensor, output of softmax transformation (all values are non-negative + and sum to 1). - Examples: + Examples: - **Example 1: standalone usage** + **Example 1: standalone usage** - >>> inputs = tf.random.normal(shape=(32, 10)) - >>> outputs = tf.keras.activations.softmax(inputs) - >>> tf.reduce_sum(outputs[0, :]) # Each sample in the batch now sums to 1 - + >>> inputs = tf.random.normal(shape=(32, 10)) + >>> outputs = tf.keras.activations.softmax(inputs) + >>> tf.reduce_sum(outputs[0, :]) # Each sample in the batch now sums to 1 + - **Example 2: usage in a `Dense` layer** + **Example 2: usage in a `Dense` layer** - >>> layer = tf.keras.layers.Dense(32, activation=tf.keras.activations.softmax) - """ - if x.shape.rank > 1: - if isinstance(axis, int): - output = tf.nn.softmax(x, axis=axis) - else: - # nn.softmax does not support tuple axis. - e = tf.exp(x - tf.reduce_max(x, axis=axis, keepdims=True)) - s = tf.reduce_sum(e, axis=axis, keepdims=True) - output = e / s - else: - raise ValueError('Cannot apply softmax to a tensor that is 1D. ' - f'Received input: {x}') + >>> layer = tf.keras.layers.Dense(32, + ... activation=tf.keras.activations.softmax) + """ + return backend.softmax(x, axis) - # Cache the logits to use for crossentropy loss. - output._keras_logits = x # pylint: disable=protected-access - return output - -@keras_export('keras.activations.elu') +@keras_export("keras.activations.elu") @tf.__internal__.dispatch.add_dispatch_support def elu(x, alpha=1.0): - """Exponential Linear Unit. - - The exponential linear unit (ELU) with `alpha > 0` is: - `x` if `x > 0` and - `alpha * (exp(x) - 1)` if `x < 0` - The ELU hyperparameter `alpha` controls the value to which an - ELU saturates for negative net inputs. ELUs diminish the - vanishing gradient effect. - - ELUs have negative values which pushes the mean of the activations - closer to zero. - Mean activations that are closer to zero enable faster learning as they - bring the gradient closer to the natural gradient. - ELUs saturate to a negative value when the argument gets smaller. - Saturation means a small derivative which decreases the variation - and the information that is propagated to the next layer. - - Example Usage: - - >>> import tensorflow as tf - >>> model = tf.keras.Sequential() - >>> model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='elu', - ... input_shape=(28, 28, 1))) - >>> model.add(tf.keras.layers.MaxPooling2D((2, 2))) - >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu')) - >>> model.add(tf.keras.layers.MaxPooling2D((2, 2))) - >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu')) - - - - Args: - x: Input tensor. - alpha: A scalar, slope of negative section. `alpha` controls the value to - which an ELU saturates for negative net inputs. - - Returns: - The exponential linear unit (ELU) activation function: `x` if `x > 0` and - `alpha * (exp(x) - 1)` if `x < 0`. - - - Reference: - [Fast and Accurate Deep Network Learning by Exponential Linear Units - (ELUs) (Clevert et al, 2016)](https://arxiv.org/abs/1511.07289) - """ - return backend.elu(x, alpha) - - -@keras_export('keras.activations.selu') + """Exponential Linear Unit. + + The exponential linear unit (ELU) with `alpha > 0` is: + `x` if `x > 0` and + `alpha * (exp(x) - 1)` if `x < 0` + The ELU hyperparameter `alpha` controls the value to which an + ELU saturates for negative net inputs. ELUs diminish the + vanishing gradient effect. + + ELUs have negative values which pushes the mean of the activations + closer to zero. + Mean activations that are closer to zero enable faster learning as they + bring the gradient closer to the natural gradient. + ELUs saturate to a negative value when the argument gets smaller. + Saturation means a small derivative which decreases the variation + and the information that is propagated to the next layer. + + Example Usage: + + >>> import tensorflow as tf + >>> model = tf.keras.Sequential() + >>> model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='elu', + ... input_shape=(28, 28, 1))) + >>> model.add(tf.keras.layers.MaxPooling2D((2, 2))) + >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu')) + >>> model.add(tf.keras.layers.MaxPooling2D((2, 2))) + >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu')) + + + + Args: + x: Input tensor. + alpha: A scalar, slope of negative section. `alpha` controls the value + to which an ELU saturates for negative net inputs. + + Returns: + The exponential linear unit (ELU) activation function: `x` if `x > 0` + and `alpha * (exp(x) - 1)` if `x < 0`. + + + Reference: + - [Fast and Accurate Deep Network Learning by Exponential Linear Units + (ELUs) (Clevert et al, 2016)](https://arxiv.org/abs/1511.07289) + """ + return backend.elu(x, alpha) + + +@keras_export("keras.activations.selu") @tf.__internal__.dispatch.add_dispatch_support def selu(x): - """Scaled Exponential Linear Unit (SELU). + """Scaled Exponential Linear Unit (SELU). - The Scaled Exponential Linear Unit (SELU) activation function is defined as: + The Scaled Exponential Linear Unit (SELU) activation function is defined as: - - `if x > 0: return scale * x` - - `if x < 0: return scale * alpha * (exp(x) - 1)` + - `if x > 0: return scale * x` + - `if x < 0: return scale * alpha * (exp(x) - 1)` - where `alpha` and `scale` are pre-defined constants - (`alpha=1.67326324` and `scale=1.05070098`). + where `alpha` and `scale` are pre-defined constants + (`alpha=1.67326324` and `scale=1.05070098`). - Basically, the SELU activation function multiplies `scale` (> 1) with the - output of the `tf.keras.activations.elu` function to ensure a slope larger - than one for positive inputs. + Basically, the SELU activation function multiplies `scale` (> 1) with the + output of the `tf.keras.activations.elu` function to ensure a slope larger + than one for positive inputs. - The values of `alpha` and `scale` are - chosen so that the mean and variance of the inputs are preserved - between two consecutive layers as long as the weights are initialized - correctly (see `tf.keras.initializers.LecunNormal` initializer) - and the number of input units is "large enough" - (see reference paper for more information). + The values of `alpha` and `scale` are + chosen so that the mean and variance of the inputs are preserved + between two consecutive layers as long as the weights are initialized + correctly (see `tf.keras.initializers.LecunNormal` initializer) + and the number of input units is "large enough" + (see reference paper for more information). - Example Usage: + Example Usage: - >>> num_classes = 10 # 10-class problem - >>> model = tf.keras.Sequential() - >>> model.add(tf.keras.layers.Dense(64, kernel_initializer='lecun_normal', - ... activation='selu')) - >>> model.add(tf.keras.layers.Dense(32, kernel_initializer='lecun_normal', - ... activation='selu')) - >>> model.add(tf.keras.layers.Dense(16, kernel_initializer='lecun_normal', - ... activation='selu')) - >>> model.add(tf.keras.layers.Dense(num_classes, activation='softmax')) + >>> num_classes = 10 # 10-class problem + >>> model = tf.keras.Sequential() + >>> model.add(tf.keras.layers.Dense(64, kernel_initializer='lecun_normal', + ... activation='selu')) + >>> model.add(tf.keras.layers.Dense(32, kernel_initializer='lecun_normal', + ... activation='selu')) + >>> model.add(tf.keras.layers.Dense(16, kernel_initializer='lecun_normal', + ... activation='selu')) + >>> model.add(tf.keras.layers.Dense(num_classes, activation='softmax')) - Args: - x: A tensor or variable to compute the activation function for. + Args: + x: A tensor or variable to compute the activation function for. - Returns: - The scaled exponential unit activation: `scale * elu(x, alpha)`. + Returns: + The scaled exponential unit activation: `scale * elu(x, alpha)`. - Notes: - - To be used together with the - `tf.keras.initializers.LecunNormal` initializer. - - To be used together with the dropout variant - `tf.keras.layers.AlphaDropout` (not regular dropout). + Notes: + - To be used together with the + `tf.keras.initializers.LecunNormal` initializer. + - To be used together with the dropout variant + `tf.keras.layers.AlphaDropout` (not regular dropout). - References: - - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) - """ - return tf.nn.selu(x) + References: + - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) + """ + return tf.nn.selu(x) -@keras_export('keras.activations.softplus') +@keras_export("keras.activations.softplus") @tf.__internal__.dispatch.add_dispatch_support def softplus(x): - """Softplus activation function, `softplus(x) = log(exp(x) + 1)`. + """Softplus activation function, `softplus(x) = log(exp(x) + 1)`. - Example Usage: + Example Usage: - >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) - >>> b = tf.keras.activations.softplus(a) - >>> b.numpy() - array([2.0611537e-09, 3.1326166e-01, 6.9314718e-01, 1.3132616e+00, - 2.0000000e+01], dtype=float32) + >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) + >>> b = tf.keras.activations.softplus(a) + >>> b.numpy() + array([2.0611537e-09, 3.1326166e-01, 6.9314718e-01, 1.3132616e+00, + 2.0000000e+01], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - The softplus activation: `log(exp(x) + 1)`. - """ - return tf.math.softplus(x) + Returns: + The softplus activation: `log(exp(x) + 1)`. + """ + return tf.math.softplus(x) -@keras_export('keras.activations.softsign') +@keras_export("keras.activations.softsign") @tf.__internal__.dispatch.add_dispatch_support def softsign(x): - """Softsign activation function, `softsign(x) = x / (abs(x) + 1)`. + """Softsign activation function, `softsign(x) = x / (abs(x) + 1)`. - Example Usage: + Example Usage: - >>> a = tf.constant([-1.0, 0.0, 1.0], dtype = tf.float32) - >>> b = tf.keras.activations.softsign(a) - >>> b.numpy() - array([-0.5, 0. , 0.5], dtype=float32) + >>> a = tf.constant([-1.0, 0.0, 1.0], dtype = tf.float32) + >>> b = tf.keras.activations.softsign(a) + >>> b.numpy() + array([-0.5, 0. , 0.5], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - The softsign activation: `x / (abs(x) + 1)`. - """ - return tf.math.softsign(x) + Returns: + The softsign activation: `x / (abs(x) + 1)`. + """ + return tf.math.softsign(x) -@keras_export('keras.activations.swish') +@keras_export("keras.activations.swish") @tf.__internal__.dispatch.add_dispatch_support def swish(x): - """Swish activation function, `swish(x) = x * sigmoid(x)`. + """Swish activation function, `swish(x) = x * sigmoid(x)`. - Swish activation function which returns `x*sigmoid(x)`. - It is a smooth, non-monotonic function that consistently matches - or outperforms ReLU on deep networks, it is unbounded above and - bounded below. + Swish activation function which returns `x*sigmoid(x)`. + It is a smooth, non-monotonic function that consistently matches + or outperforms ReLU on deep networks, it is unbounded above and + bounded below. - Example Usage: + Example Usage: - >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) - >>> b = tf.keras.activations.swish(a) - >>> b.numpy() - array([-4.1223075e-08, -2.6894143e-01, 0.0000000e+00, 7.3105860e-01, - 2.0000000e+01], dtype=float32) + >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) + >>> b = tf.keras.activations.swish(a) + >>> b.numpy() + array([-4.1223075e-08, -2.6894143e-01, 0.0000000e+00, 7.3105860e-01, + 2.0000000e+01], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - The swish activation applied to `x` (see reference paper for details). + Returns: + The swish activation applied to `x` (see reference paper for details). - Reference: - - [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941) - """ - return tf.nn.silu(x) + Reference: + - [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941) + """ + return tf.nn.silu(x) -@keras_export('keras.activations.relu') +@keras_export("keras.activations.relu") @tf.__internal__.dispatch.add_dispatch_support -def relu(x, alpha=0., max_value=None, threshold=0.): - """Applies the rectified linear unit activation function. - - With default values, this returns the standard ReLU activation: - `max(x, 0)`, the element-wise maximum of 0 and the input tensor. - - Modifying default parameters allows you to use non-zero thresholds, - change the max value of the activation, - and to use a non-zero multiple of the input for values below the threshold. - - For example: - - >>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32) - >>> tf.keras.activations.relu(foo).numpy() - array([ 0., 0., 0., 5., 10.], dtype=float32) - >>> tf.keras.activations.relu(foo, alpha=0.5).numpy() - array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32) - >>> tf.keras.activations.relu(foo, max_value=5.).numpy() - array([0., 0., 0., 5., 5.], dtype=float32) - >>> tf.keras.activations.relu(foo, threshold=5.).numpy() - array([-0., -0., 0., 0., 10.], dtype=float32) - - Args: - x: Input `tensor` or `variable`. - alpha: A `float` that governs the slope for values lower than the - threshold. - max_value: A `float` that sets the saturation threshold (the largest value - the function will return). - threshold: A `float` giving the threshold value of the activation function - below which values will be damped or set to zero. - - Returns: - A `Tensor` representing the input tensor, - transformed by the relu activation function. - Tensor will be of the same shape and dtype of input `x`. - """ - return backend.relu(x, alpha=alpha, max_value=max_value, threshold=threshold) - - -@keras_export('keras.activations.gelu', v1=[]) +def relu(x, alpha=0.0, max_value=None, threshold=0.0): + """Applies the rectified linear unit activation function. + + With default values, this returns the standard ReLU activation: + `max(x, 0)`, the element-wise maximum of 0 and the input tensor. + + Modifying default parameters allows you to use non-zero thresholds, + change the max value of the activation, + and to use a non-zero multiple of the input for values below the threshold. + + Example: + + >>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32) + >>> tf.keras.activations.relu(foo).numpy() + array([ 0., 0., 0., 5., 10.], dtype=float32) + >>> tf.keras.activations.relu(foo, alpha=0.5).numpy() + array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32) + >>> tf.keras.activations.relu(foo, max_value=5.).numpy() + array([0., 0., 0., 5., 5.], dtype=float32) + >>> tf.keras.activations.relu(foo, threshold=5.).numpy() + array([-0., -0., 0., 0., 10.], dtype=float32) + + Args: + x: Input `tensor` or `variable`. + alpha: A `float` that governs the slope for values lower than the + threshold. + max_value: A `float` that sets the saturation threshold (the largest + value the function will return). + threshold: A `float` giving the threshold value of the activation + function below which values will be damped or set to zero. + + Returns: + A `Tensor` representing the input tensor, transformed by the relu + activation function. Tensor will be of the same shape and dtype of + input `x`. + """ + return backend.relu( + x, alpha=alpha, max_value=max_value, threshold=threshold + ) + + +@keras_export("keras.activations.gelu", v1=[]) @tf.__internal__.dispatch.add_dispatch_support def gelu(x, approximate=False): - """Applies the Gaussian error linear unit (GELU) activation function. - - Gaussian error linear unit (GELU) computes - `x * P(X <= x)`, where `P(X) ~ N(0, 1)`. - The (GELU) nonlinearity weights inputs by their value, rather than gates - inputs by their sign as in ReLU. - - For example: - - >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) - >>> y = tf.keras.activations.gelu(x) - >>> y.numpy() - array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], - dtype=float32) - >>> y = tf.keras.activations.gelu(x, approximate=True) - >>> y.numpy() - array([-0.00363752, -0.15880796, 0. , 0.841192 , 2.9963627 ], - dtype=float32) - - Args: - x: Input tensor. - approximate: A `bool`, whether to enable approximation. - - Returns: - The gaussian error linear activation: - `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` - if `approximate` is `True` or - `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, - where `P(X) ~ N(0, 1)`, - if `approximate` is `False`. - - Reference: - - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415) - """ - return tf.nn.gelu(x, approximate) - - -@keras_export('keras.activations.tanh') + """Applies the Gaussian error linear unit (GELU) activation function. + + Gaussian error linear unit (GELU) computes + `x * P(X <= x)`, where `P(X) ~ N(0, 1)`. + The (GELU) nonlinearity weights inputs by their value, rather than gates + inputs by their sign as in ReLU. + + Example: + + >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) + >>> y = tf.keras.activations.gelu(x) + >>> y.numpy() + array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], + dtype=float32) + >>> y = tf.keras.activations.gelu(x, approximate=True) + >>> y.numpy() + array([-0.00363752, -0.15880796, 0. , 0.841192 , 2.9963627 ], + dtype=float32) + + Args: + x: Input tensor. + approximate: A `bool`, whether to enable approximation. + + Returns: + The gaussian error linear activation: + `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` + if `approximate` is `True` or + `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, + where `P(X) ~ N(0, 1)`, + if `approximate` is `False`. + + Reference: + - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415) + """ # noqa: E501 + return tf.nn.gelu(x, approximate) + + +@keras_export("keras.activations.tanh") @tf.__internal__.dispatch.add_dispatch_support def tanh(x): - """Hyperbolic tangent activation function. + """Hyperbolic tangent activation function. - For example: + Example: - >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32) - >>> b = tf.keras.activations.tanh(a) - >>> b.numpy() - array([-0.9950547, -0.7615942, 0., 0.7615942, 0.9950547], dtype=float32) + >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32) + >>> b = tf.keras.activations.tanh(a) + >>> b.numpy() + array([-0.9950547, -0.7615942, 0., 0.7615942, 0.9950547], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - Tensor of same shape and dtype of input `x`, with tanh activation: - `tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))`. - """ - return tf.tanh(x) + Returns: + Tensor of same shape and dtype of input `x`, with tanh activation: + `tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))`. + """ + return tf.tanh(x) -@keras_export('keras.activations.sigmoid') +@keras_export("keras.activations.sigmoid") @tf.__internal__.dispatch.add_dispatch_support def sigmoid(x): - """Sigmoid activation function, `sigmoid(x) = 1 / (1 + exp(-x))`. + """Sigmoid activation function, `sigmoid(x) = 1 / (1 + exp(-x))`. - Applies the sigmoid activation function. For small values (<-5), - `sigmoid` returns a value close to zero, and for large values (>5) - the result of the function gets close to 1. + Applies the sigmoid activation function. For small values (<-5), + `sigmoid` returns a value close to zero, and for large values (>5) + the result of the function gets close to 1. - Sigmoid is equivalent to a 2-element Softmax, where the second element is - assumed to be zero. The sigmoid function always returns a value between - 0 and 1. + Sigmoid is equivalent to a 2-element Softmax, where the second element is + assumed to be zero. The sigmoid function always returns a value between + 0 and 1. - For example: + Example: - >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) - >>> b = tf.keras.activations.sigmoid(a) - >>> b.numpy() - array([2.0611537e-09, 2.6894143e-01, 5.0000000e-01, 7.3105860e-01, - 1.0000000e+00], dtype=float32) + >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) + >>> b = tf.keras.activations.sigmoid(a) + >>> b.numpy() + array([2.0611537e-09, 2.6894143e-01, 5.0000000e-01, 7.3105860e-01, + 1.0000000e+00], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - Tensor with the sigmoid activation: `1 / (1 + exp(-x))`. - """ - output = tf.sigmoid(x) - # Cache the logits to use for crossentropy loss. - output._keras_logits = x # pylint: disable=protected-access - return output + Returns: + Tensor with the sigmoid activation: `1 / (1 + exp(-x))`. + """ + return backend.sigmoid(x) -@keras_export('keras.activations.exponential') +@keras_export("keras.activations.exponential") @tf.__internal__.dispatch.add_dispatch_support def exponential(x): - """Exponential activation function. + """Exponential activation function. - For example: + Example: - >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32) - >>> b = tf.keras.activations.exponential(a) - >>> b.numpy() - array([0.04978707, 0.36787945, 1., 2.7182817 , 20.085537], dtype=float32) + >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32) + >>> b = tf.keras.activations.exponential(a) + >>> b.numpy() + array([0.04978707, 0.36787945, 1., 2.7182817 , 20.085537], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - Tensor with exponential activation: `exp(x)`. - """ - return tf.exp(x) + Returns: + Tensor with exponential activation: `exp(x)`. + """ + return tf.exp(x) -@keras_export('keras.activations.hard_sigmoid') +@keras_export("keras.activations.hard_sigmoid") @tf.__internal__.dispatch.add_dispatch_support def hard_sigmoid(x): - """Hard sigmoid activation function. + """Hard sigmoid activation function. - A faster approximation of the sigmoid activation. - Piecewise linear approximation of the sigmoid function. - Ref: 'https://en.wikipedia.org/wiki/Hard_sigmoid' + A faster approximation of the sigmoid activation. + Piecewise linear approximation of the sigmoid function. + Ref: 'https://en.wikipedia.org/wiki/Hard_sigmoid' - For example: + Example: - >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32) - >>> b = tf.keras.activations.hard_sigmoid(a) - >>> b.numpy() - array([0. , 0.3, 0.5, 0.7, 1. ], dtype=float32) + >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32) + >>> b = tf.keras.activations.hard_sigmoid(a) + >>> b.numpy() + array([0. , 0.3, 0.5, 0.7, 1. ], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - The hard sigmoid activation, defined as: + Returns: + The hard sigmoid activation, defined as: - - `if x < -2.5: return 0` - - `if x > 2.5: return 1` - - `if -2.5 <= x <= 2.5: return 0.2 * x + 0.5` - """ - return backend.hard_sigmoid(x) + - `if x < -2.5: return 0` + - `if x > 2.5: return 1` + - `if -2.5 <= x <= 2.5: return 0.2 * x + 0.5` + """ + return backend.hard_sigmoid(x) -@keras_export('keras.activations.linear') +@keras_export("keras.activations.linear") @tf.__internal__.dispatch.add_dispatch_support def linear(x): - """Linear activation function (pass-through). + """Linear activation function (pass-through). - For example: + Example: - >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32) - >>> b = tf.keras.activations.linear(a) - >>> b.numpy() - array([-3., -1., 0., 1., 3.], dtype=float32) + >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32) + >>> b = tf.keras.activations.linear(a) + >>> b.numpy() + array([-3., -1., 0., 1., 3.], dtype=float32) - Args: - x: Input tensor. + Args: + x: Input tensor. - Returns: - The input, unmodified. - """ - return x + Returns: + The input, unmodified. + """ + return x -@keras_export('keras.activations.serialize') +@keras_export("keras.activations.mish") @tf.__internal__.dispatch.add_dispatch_support -def serialize(activation): - """Returns the string identifier of an activation function. +def mish(x): + """Mish activation function. + + It is defined as: - Args: - activation : Function object. + ```python + def mish(x): + return x * tanh(softplus(x)) + ``` - Returns: - String denoting the name attribute of the input function + where `softplus` is defined as: - For example: + ```python + def softplus(x): + return log(exp(x) + 1) + ``` - >>> tf.keras.activations.serialize(tf.keras.activations.tanh) - 'tanh' - >>> tf.keras.activations.serialize(tf.keras.activations.sigmoid) - 'sigmoid' - >>> tf.keras.activations.serialize('abcd') - Traceback (most recent call last): - ... - ValueError: ('Cannot serialize', 'abcd') + Example: - Raises: - ValueError: The input function is not a valid one. - """ - if (hasattr(activation, '__name__') and - activation.__name__ in _TF_ACTIVATIONS_V2): - return _TF_ACTIVATIONS_V2[activation.__name__] - return generic_utils.serialize_keras_object(activation) + >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0], dtype = tf.float32) + >>> b = tf.keras.activations.mish(a) + >>> b.numpy() + array([-0.14564745, -0.30340144, 0., 0.86509836], dtype=float32) + Args: + x: Input tensor. -# Add additional globals so that deserialize can find these common activation + Returns: + The mish activation. + + Reference: + - [Mish: A Self Regularized Non-Monotonic + Activation Function](https://arxiv.org/abs/1908.08681) + """ + return x * tf.math.tanh(tf.math.softplus(x)) + + +@keras_export("keras.activations.serialize") +@tf.__internal__.dispatch.add_dispatch_support +def serialize(activation, use_legacy_format=False): + """Returns the string identifier of an activation function. + + Args: + activation : Function object. + use_legacy_format: Boolean, whether to use the legacy format for + serialization. Defaults to False. + + Returns: + String denoting the name attribute of the input function + + Example: + + >>> tf.keras.activations.serialize(tf.keras.activations.tanh) + 'tanh' + >>> tf.keras.activations.serialize(tf.keras.activations.sigmoid) + 'sigmoid' + >>> tf.keras.activations.serialize('abcd') + Traceback (most recent call last): + ... + ValueError: Unknown activation function 'abcd' cannot be serialized. + + Raises: + ValueError: The input function is not a valid one. + """ + if ( + hasattr(activation, "__name__") + and activation.__name__ in _TF_ACTIVATIONS_V2 + ): + return _TF_ACTIVATIONS_V2[activation.__name__] + + if use_legacy_format: + return legacy_serialization.serialize_keras_object(activation) + + fn_config = serialization_lib.serialize_keras_object(activation) + if ( + not tf.__internal__.tf2.enabled() + or saved_model_utils.in_tf_saved_model_scope() + ): + return fn_config + if "config" not in fn_config: + raise ValueError( + f"Unknown activation function '{activation}' cannot be " + "serialized due to invalid function name. Make sure to use " + "an activation name that matches the references defined in " + "activations.py or use " + "`@keras.saving.register_keras_serializable()` " + "to register any custom activations. " + f"config={fn_config}" + ) + if not isinstance(activation, types.FunctionType): + # Case for additional custom activations represented by objects + return fn_config + if ( + isinstance(fn_config["config"], str) + and fn_config["config"] not in globals() + ): + # Case for custom activation functions from external activations modules + fn_config["config"] = object_registration.get_registered_name( + activation + ) + return fn_config + return fn_config["config"] + # Case for keras.activations builtins (simply return name) + + +# Add additional globals so that deserialize() can find these common activation # functions leaky_relu = tf.nn.leaky_relu log_softmax = tf.nn.log_softmax @@ -517,87 +586,111 @@ def serialize(activation): silu = tf.nn.silu -@keras_export('keras.activations.deserialize') +@keras_export("keras.activations.deserialize") @tf.__internal__.dispatch.add_dispatch_support -def deserialize(name, custom_objects=None): - """Returns activation function given a string identifier. - - Args: - name: The name of the activation function. - custom_objects: Optional `{function_name: function_obj}` - dictionary listing user-provided activation functions. - - Returns: - Corresponding activation function. - - For example: - - >>> tf.keras.activations.deserialize('linear') - - >>> tf.keras.activations.deserialize('sigmoid') - - >>> tf.keras.activations.deserialize('abcd') - Traceback (most recent call last): - ... - ValueError: Unknown activation function:abcd - - Raises: - ValueError: `Unknown activation function` if the input string does not - denote any defined Tensorflow activation function. - """ - activation_functions = {} - current_module = sys.modules[__name__] - - # we put 'current_module' after 'activation_layers' to prefer the local one - # if there is a collision - generic_utils.populate_dict_with_module_objects( - activation_functions, - (activation_layers, current_module), - obj_filter=callable) - - return generic_utils.deserialize_keras_object( - name, - module_objects=activation_functions, - custom_objects=custom_objects, - printable_module_name='activation function') - - -@keras_export('keras.activations.get') +def deserialize(name, custom_objects=None, use_legacy_format=False): + """Returns activation function given a string identifier. + + Args: + name: The name of the activation function. + custom_objects: Optional `{function_name: function_obj}` + dictionary listing user-provided activation functions. + use_legacy_format: Boolean, whether to use the legacy format for + deserialization. Defaults to False. + + Returns: + Corresponding activation function. + + Example: + + >>> tf.keras.activations.deserialize('linear') + + >>> tf.keras.activations.deserialize('sigmoid') + + >>> tf.keras.activations.deserialize('abcd') + Traceback (most recent call last): + ... + ValueError: Unknown activation function 'abcd' cannot be deserialized. + + Raises: + ValueError: `Unknown activation function` if the input string does not + denote any defined Tensorflow activation function. + """ + activation_functions = {} + current_module = sys.modules[__name__] + + # we put 'current_module' after 'activation_layers' to prefer the local one + # if there is a collision + generic_utils.populate_dict_with_module_objects( + activation_functions, + (activation_layers, current_module), + obj_filter=callable, + ) + + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + name, + module_objects=activation_functions, + custom_objects=custom_objects, + printable_module_name="activation function", + ) + + returned_fn = serialization_lib.deserialize_keras_object( + name, + module_objects=activation_functions, + custom_objects=custom_objects, + printable_module_name="activation function", + ) + + if isinstance(returned_fn, str): + raise ValueError( + f"Unknown activation function '{name}' cannot be deserialized." + ) + + return returned_fn + + +@keras_export("keras.activations.get") @tf.__internal__.dispatch.add_dispatch_support def get(identifier): - """Returns function. - - Args: - identifier: Function or string - - Returns: - Function corresponding to the input string or input function. - - For example: - - >>> tf.keras.activations.get('softmax') - - >>> tf.keras.activations.get(tf.keras.activations.softmax) - - >>> tf.keras.activations.get(None) - - >>> tf.keras.activations.get(abs) - - >>> tf.keras.activations.get('abcd') - Traceback (most recent call last): - ... - ValueError: Unknown activation function:abcd - - Raises: - ValueError: Input is an unknown function or string, i.e., the input does - not denote any defined function. - """ - if identifier is None: - return linear - if isinstance(identifier, (str, dict)): - return deserialize(identifier) - elif callable(identifier): - return identifier - else: + """Returns function. + + Args: + identifier: Function or string + + Returns: + Function corresponding to the input string or input function. + + Example: + + >>> tf.keras.activations.get('softmax') + + >>> tf.keras.activations.get(tf.keras.activations.softmax) + + >>> tf.keras.activations.get(None) + + >>> tf.keras.activations.get(abs) + + >>> tf.keras.activations.get('abcd') + Traceback (most recent call last): + ... + ValueError: Unknown activation function:abcd + + Raises: + ValueError: Input is an unknown function or string, i.e., the input does + not denote any defined function. + """ + if identifier is None: + return linear + if isinstance(identifier, (str, dict)): + use_legacy_format = ( + "module" not in identifier + if isinstance(identifier, dict) + else False + ) + return deserialize(identifier, use_legacy_format=use_legacy_format) + elif callable(identifier): + return identifier raise TypeError( - f'Could not interpret activation function identifier: {identifier}') + f"Could not interpret activation function identifier: {identifier}" + ) diff --git a/keras/activations_test.py b/keras/activations_test.py index 81b7e6fb702b..2222d1574ec3 100644 --- a/keras/activations_test.py +++ b/keras/activations_test.py @@ -14,249 +14,286 @@ # ============================================================================== """Tests for Keras activation functions.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np +import keras.layers.activation as activation_layers from keras import activations from keras import backend -from keras.testing_infra import test_combinations -import keras.layers.activation as activation_layers from keras.layers import core from keras.layers import serialization +from keras.testing_infra import test_combinations def _ref_softmax(values): - m = np.max(values) - e = np.exp(values - m) - return e / np.sum(e) + m = np.max(values) + e = np.exp(values - m) + return e / np.sum(e) -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class KerasActivationsTest(tf.test.TestCase, parameterized.TestCase): +def _ref_softplus(x): + return np.log(np.ones_like(x) + np.exp(x)) - def test_serialization(self): - all_activations = [ - 'softmax', 'relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', - 'softplus', 'softsign', 'selu', 'gelu', 'relu6' - ] - for name in all_activations: - fn = activations.get(name) - ref_fn = getattr(activations, name) - assert fn == ref_fn - config = activations.serialize(fn) - fn = activations.deserialize(config) - assert fn == ref_fn - - def test_serialization_v2(self): - activation_map = {tf.math.softmax: 'softmax'} - for fn_v2_key in activation_map: - fn_v2 = activations.get(fn_v2_key) - config = activations.serialize(fn_v2) - fn = activations.deserialize(config) - assert fn.__name__ == activation_map[fn_v2_key] - - def test_serialization_with_layers(self): - activation = activation_layers.LeakyReLU(alpha=0.1) - layer = core.Dense(3, activation=activation) - config = serialization.serialize(layer) - # with custom objects - deserialized_layer = serialization.deserialize( - config, custom_objects={'LeakyReLU': activation}) - self.assertEqual(deserialized_layer.__class__.__name__, - layer.__class__.__name__) - self.assertEqual(deserialized_layer.activation.__class__.__name__, - activation.__class__.__name__) - # without custom objects - deserialized_layer = serialization.deserialize(config) - self.assertEqual(deserialized_layer.__class__.__name__, - layer.__class__.__name__) - self.assertEqual(deserialized_layer.activation.__class__.__name__, - activation.__class__.__name__) - - def test_softmax(self): - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.softmax(x)]) - test_values = np.random.random((2, 5)) - - result = f([test_values])[0] - expected = _ref_softmax(test_values[0]) - self.assertAllClose(result[0], expected, rtol=1e-05) - - x = backend.placeholder(ndim=1) - with self.assertRaises(ValueError): - activations.softmax(x) - - def test_softmax_2d_axis_0(self): - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.softmax(x, axis=0)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = np.zeros((2, 5)) - for i in range(5): - expected[:, i] = _ref_softmax(test_values[:, i]) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_softmax_3d_axis_tuple(self): - x = backend.placeholder(ndim=3) - f = backend.function([x], [activations.softmax(x, axis=(1, 2))]) - test_values = np.random.random((2, 3, 5)) - result = f([test_values])[0] - expected = np.zeros((2, 3, 5)) - for i in range(2): - expected[i, :, :] = _ref_softmax(test_values[i, :, :]) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_temporal_softmax(self): - x = backend.placeholder(shape=(2, 2, 3)) - f = backend.function([x], [activations.softmax(x)]) - test_values = np.random.random((2, 2, 3)) * 10 - result = f([test_values])[0] - expected = _ref_softmax(test_values[0, 0]) - self.assertAllClose(result[0, 0], expected, rtol=1e-05) - - def test_selu(self): - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.selu(x)]) - alpha = 1.6732632423543772848170429916717 - scale = 1.0507009873554804934193349852946 - - positive_values = np.array([[1, 2]], dtype=backend.floatx()) - result = f([positive_values])[0] - self.assertAllClose(result, positive_values * scale, rtol=1e-05) - - negative_values = np.array([[-1, -2]], dtype=backend.floatx()) - result = f([negative_values])[0] - true_result = (np.exp(negative_values) - 1) * scale * alpha - self.assertAllClose(result, true_result) - - def test_softplus(self): - def softplus(x): - return np.log(np.ones_like(x) + np.exp(x)) - - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.softplus(x)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = softplus(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_softsign(self): - def softsign(x): - return np.divide(x, np.ones_like(x) + np.absolute(x)) - - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.softsign(x)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = softsign(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_sigmoid(self): - def ref_sigmoid(x): - if x >= 0: - return 1 / (1 + np.exp(-x)) - else: - z = np.exp(x) - return z / (1 + z) - sigmoid = np.vectorize(ref_sigmoid) - - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.sigmoid(x)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = sigmoid(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_hard_sigmoid(self): - def ref_hard_sigmoid(x): - x = (x * 0.2) + 0.5 - z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) - return z - hard_sigmoid = np.vectorize(ref_hard_sigmoid) - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.hard_sigmoid(x)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = hard_sigmoid(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_relu(self): - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.relu(x)]) - positive_values = np.random.random((2, 5)) - result = f([positive_values])[0] - self.assertAllClose(result, positive_values, rtol=1e-05) - - negative_values = np.random.uniform(-1, 0, (2, 5)) - result = f([negative_values])[0] - expected = np.zeros((2, 5)) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_gelu(self): - - def gelu(x, approximate=False): - if approximate: - return 0.5 * x * (1.0 + np.tanh( - np.sqrt(2.0 / np.pi) * (x + 0.044715 * np.power(x, 3)))) - else: - from scipy.stats import norm # pylint: disable=g-import-not-at-top - return x * norm.cdf(x) - - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.gelu(x)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = gelu(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - f = backend.function([x], [activations.gelu(x, True)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - expected = gelu(test_values, True) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_elu(self): - x = backend.placeholder(ndim=2) - f = backend.function([x], [activations.elu(x, 0.5)]) - test_values = np.random.random((2, 5)) - result = f([test_values])[0] - self.assertAllClose(result, test_values, rtol=1e-05) - negative_values = np.array([[-1, -2]], dtype=backend.floatx()) - result = f([negative_values])[0] - true_result = (np.exp(negative_values) - 1) / 2 - self.assertAllClose(result, true_result) - - def test_tanh(self): - test_values = np.random.random((2, 5)) - x = backend.placeholder(ndim=2) - exp = activations.tanh(x) - f = backend.function([x], [exp]) - result = f([test_values])[0] - expected = np.tanh(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_exponential(self): - test_values = np.random.random((2, 5)) - x = backend.placeholder(ndim=2) - exp = activations.exponential(x) - f = backend.function([x], [exp]) - result = f([test_values])[0] - expected = np.exp(test_values) - self.assertAllClose(result, expected, rtol=1e-05) - - def test_linear(self): - x = np.random.random((10, 5)) - self.assertAllClose(x, activations.linear(x)) - - def test_invalid_usage(self): - with self.assertRaises(ValueError): - activations.get('unknown') - - # The following should be possible but should raise a warning: - activations.get(activation_layers.LeakyReLU()) - - -if __name__ == '__main__': - tf.test.main() + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class KerasActivationsTest(tf.test.TestCase, parameterized.TestCase): + def test_serialization(self): + all_activations = [ + "softmax", + "relu", + "elu", + "tanh", + "sigmoid", + "hard_sigmoid", + "linear", + "softplus", + "softsign", + "selu", + "gelu", + "relu6", + "mish", + ] + for name in all_activations: + fn = activations.get(name) + ref_fn = getattr(activations, name) + assert fn == ref_fn + config = activations.serialize(fn) + fn = activations.deserialize(config) + assert fn == ref_fn + + def test_serialization_v2(self): + activation_map = {tf.math.softmax: "softmax"} + for fn_v2_key in activation_map: + fn_v2 = activations.get(fn_v2_key) + config = activations.serialize(fn_v2) + fn = activations.deserialize(config) + assert fn.__name__ == activation_map[fn_v2_key] + + def test_serialization_with_layers(self): + activation = activation_layers.LeakyReLU(alpha=0.1) + layer = core.Dense(3, activation=activation) + config = serialization.serialize(layer) + # with custom objects + deserialized_layer = serialization.deserialize( + config, custom_objects={"LeakyReLU": activation} + ) + self.assertEqual( + deserialized_layer.__class__.__name__, layer.__class__.__name__ + ) + self.assertEqual( + deserialized_layer.activation.__class__.__name__, + activation.__class__.__name__, + ) + # without custom objects + deserialized_layer = serialization.deserialize(config) + self.assertEqual( + deserialized_layer.__class__.__name__, layer.__class__.__name__ + ) + self.assertEqual( + deserialized_layer.activation.__class__.__name__, + activation.__class__.__name__, + ) + + def test_softmax(self): + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.softmax(x)]) + test_values = np.random.random((2, 5)) + + result = f([test_values])[0] + expected = _ref_softmax(test_values[0]) + self.assertAllClose(result[0], expected, rtol=1e-05) + + x = backend.placeholder(ndim=1) + with self.assertRaises(ValueError): + activations.softmax(x) + + def test_softmax_2d_axis_0(self): + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.softmax(x, axis=0)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = np.zeros((2, 5)) + for i in range(5): + expected[:, i] = _ref_softmax(test_values[:, i]) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_softmax_3d_axis_tuple(self): + x = backend.placeholder(ndim=3) + f = backend.function([x], [activations.softmax(x, axis=(1, 2))]) + test_values = np.random.random((2, 3, 5)) + result = f([test_values])[0] + expected = np.zeros((2, 3, 5)) + for i in range(2): + expected[i, :, :] = _ref_softmax(test_values[i, :, :]) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_temporal_softmax(self): + x = backend.placeholder(shape=(2, 2, 3)) + f = backend.function([x], [activations.softmax(x)]) + test_values = np.random.random((2, 2, 3)) * 10 + result = f([test_values])[0] + expected = _ref_softmax(test_values[0, 0]) + self.assertAllClose(result[0, 0], expected, rtol=1e-05) + + def test_selu(self): + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.selu(x)]) + alpha = 1.6732632423543772848170429916717 + scale = 1.0507009873554804934193349852946 + + positive_values = np.array([[1, 2]], dtype=backend.floatx()) + result = f([positive_values])[0] + self.assertAllClose(result, positive_values * scale, rtol=1e-05) + + negative_values = np.array([[-1, -2]], dtype=backend.floatx()) + result = f([negative_values])[0] + true_result = (np.exp(negative_values) - 1) * scale * alpha + self.assertAllClose(result, true_result) + + def test_softplus(self): + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.softplus(x)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = _ref_softplus(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_softsign(self): + def softsign(x): + return np.divide(x, np.ones_like(x) + np.absolute(x)) + + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.softsign(x)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = softsign(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_sigmoid(self): + def ref_sigmoid(x): + if x >= 0: + return 1 / (1 + np.exp(-x)) + else: + z = np.exp(x) + return z / (1 + z) + + sigmoid = np.vectorize(ref_sigmoid) + + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.sigmoid(x)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = sigmoid(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_hard_sigmoid(self): + def ref_hard_sigmoid(x): + x = (x * 0.2) + 0.5 + z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) + return z + + hard_sigmoid = np.vectorize(ref_hard_sigmoid) + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.hard_sigmoid(x)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = hard_sigmoid(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_relu(self): + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.relu(x)]) + positive_values = np.random.random((2, 5)) + result = f([positive_values])[0] + self.assertAllClose(result, positive_values, rtol=1e-05) + + negative_values = np.random.uniform(-1, 0, (2, 5)) + result = f([negative_values])[0] + expected = np.zeros((2, 5)) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_gelu(self): + def gelu(x, approximate=False): + if approximate: + return ( + 0.5 + * x + * ( + 1.0 + + np.tanh( + np.sqrt(2.0 / np.pi) + * (x + 0.044715 * np.power(x, 3)) + ) + ) + ) + else: + from scipy.stats import norm + + return x * norm.cdf(x) + + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.gelu(x)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = gelu(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + f = backend.function([x], [activations.gelu(x, True)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = gelu(test_values, True) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_elu(self): + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.elu(x, 0.5)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + self.assertAllClose(result, test_values, rtol=1e-05) + negative_values = np.array([[-1, -2]], dtype=backend.floatx()) + result = f([negative_values])[0] + true_result = (np.exp(negative_values) - 1) / 2 + self.assertAllClose(result, true_result) + + def test_tanh(self): + test_values = np.random.random((2, 5)) + x = backend.placeholder(ndim=2) + exp = activations.tanh(x) + f = backend.function([x], [exp]) + result = f([test_values])[0] + expected = np.tanh(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_exponential(self): + test_values = np.random.random((2, 5)) + x = backend.placeholder(ndim=2) + exp = activations.exponential(x) + f = backend.function([x], [exp]) + result = f([test_values])[0] + expected = np.exp(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_mish(self): + test_values = np.random.random((2, 5)) + x = backend.placeholder(ndim=2) + output = activations.mish(x) + f = backend.function([x], [output]) + result = f([test_values])[0] + expected = test_values * np.tanh(_ref_softplus(test_values)) + self.assertAllClose(result, expected, rtol=1e-05) + + def test_linear(self): + x = np.random.random((10, 5)) + self.assertAllClose(x, activations.linear(x)) + + def test_invalid_usage(self): + with self.assertRaises(ValueError): + activations.get("unknown") + + # The following should be possible but should raise a warning: + activations.get(activation_layers.LeakyReLU()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/api/BUILD b/keras/api/BUILD index 3707baa50007..3bcfc7a2d61b 100644 --- a/keras/api/BUILD +++ b/keras/api/BUILD @@ -1,10 +1,12 @@ # Description: # Package for Keras. +# Placeholder: load unaliased py_library load("//keras/api:api_gen.bzl", "gen_api_init_files") load("//keras/api:api_init_files.bzl", "KERAS_API_INIT_FILES", "KERAS_API_INIT_FILES_V1") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/py/tensorflow:__subpackages__", @@ -52,7 +54,6 @@ keras_packages = [ "keras.datasets.mnist", "keras.datasets.reuters", "keras.dtensor.layout_map", - "keras.dtensor.optimizers", "keras.engine.base_layer", "keras.engine.data_adapter", "keras.engine.input_layer", @@ -60,22 +61,21 @@ keras_packages = [ "keras.engine.sequential", "keras.engine.training", "keras.estimator", + "keras.export.export_lib", "keras.feature_column.dense_features", "keras.feature_column.dense_features_v2", "keras.feature_column.sequence_feature_column", # Placeholder for internal API "keras.initializers", + "keras.initializers.initializers", "keras.initializers.initializers_v1", - "keras.initializers.initializers_v2", "keras.layers.activation", "keras.layers.attention", "keras.layers.convolutional", "keras.layers.core", "keras.layers.locally_connected", "keras.layers.merging", - "keras.layers.normalization.batch_normalization", - "keras.layers.normalization.batch_normalization_v1", - "keras.layers.normalization.layer_normalization", + "keras.layers.normalization", "keras.layers.preprocessing", "keras.layers.pooling", "keras.layers.regularization", @@ -93,33 +93,24 @@ keras_packages = [ "keras.mixed_precision.loss_scale_optimizer", "keras.mixed_precision.policy", "keras.models", + "keras.optimizers.adadelta", + "keras.optimizers.adagrad", + "keras.optimizers.adam", + "keras.optimizers.adamax", + "keras.optimizers.ftrl", + "keras.optimizers.nadam", + "keras.optimizers.sgd", + "keras.optimizers.optimizer", + "keras.optimizers.rmsprop", "keras.optimizers.legacy.adadelta", "keras.optimizers.legacy.adagrad", "keras.optimizers.legacy.adam", "keras.optimizers.legacy.adamax", "keras.optimizers.legacy.ftrl", + "keras.optimizers.legacy.gradient_descent", "keras.optimizers.legacy.nadam", - "keras.optimizers.legacy.optimizer", + "keras.optimizers.legacy.optimizer_v2", "keras.optimizers.legacy.rmsprop", - "keras.optimizers.legacy.sgd", - "keras.optimizers.optimizer_experimental.adadelta", - "keras.optimizers.optimizer_experimental.adagrad", - "keras.optimizers.optimizer_experimental.adam", - "keras.optimizers.optimizer_experimental.adamax", - "keras.optimizers.optimizer_experimental.ftrl", - "keras.optimizers.optimizer_experimental.nadam", - "keras.optimizers.optimizer_experimental.sgd", - "keras.optimizers.optimizer_experimental.optimizer", - "keras.optimizers.optimizer_experimental.rmsprop", - "keras.optimizers.optimizer_v2.adadelta", - "keras.optimizers.optimizer_v2.adagrad", - "keras.optimizers.optimizer_v2.adam", - "keras.optimizers.optimizer_v2.adamax", - "keras.optimizers.optimizer_v2.ftrl", - "keras.optimizers.optimizer_v2.gradient_descent", - "keras.optimizers.optimizer_v2.nadam", - "keras.optimizers.optimizer_v2.optimizer_v2", - "keras.optimizers.optimizer_v2.rmsprop", "keras.optimizers.schedules.learning_rate_schedule", "keras.optimizers", "keras.premade_models.linear", @@ -128,9 +119,9 @@ keras_packages = [ "keras.preprocessing.sequence", "keras.preprocessing.text", "keras.regularizers", - "keras.saving.model_config", - "keras.saving.save", - "keras.saving.saved_model_experimental", + "keras.saving.legacy.model_config", + "keras.saving.legacy.save", + "keras.saving.legacy.serialization", "keras.testing_infra.test_utils", "keras.utils.data_utils", "keras.utils.generic_utils", @@ -140,7 +131,6 @@ keras_packages = [ "keras.utils.np_utils", "keras.utils.tf_utils", "keras.utils.vis_utils", - "keras.wrappers.scikit_learn", ] # The target used by PIP package which need to generate API init files during OSS build. @@ -167,6 +157,9 @@ gen_api_init_files( package_deps = [ "//keras", "//:expect_tensorflow_installed", + # "//third_party/tensorflow/lite/python:analyzer", + # "//third_party/tensorflow/lite/python:lite", + # "//third_party/tensorflow/lite/python/authoring", ], packages = keras_packages, ) @@ -181,6 +174,9 @@ gen_api_init_files( package_deps = [ "//keras", "//:expect_tensorflow_installed", + # "//third_party/tensorflow/lite/python:analyzer", + # "//third_party/tensorflow/lite/python:lite", + # "//third_party/tensorflow/lite/python/authoring", ], packages = keras_packages, ) @@ -195,6 +191,9 @@ gen_api_init_files( package_deps = [ "//keras", "//:expect_tensorflow_installed", + # "//third_party/tensorflow/lite/python:analyzer", + # "//third_party/tensorflow/lite/python:lite", + # "//third_party/tensorflow/lite/python/authoring", ], packages = keras_packages, ) diff --git a/keras/api/api_gen.bzl b/keras/api/api_gen.bzl index 225c0900e0b2..f0d0cc067eba 100644 --- a/keras/api/api_gen.bzl +++ b/keras/api/api_gen.bzl @@ -9,6 +9,9 @@ and it imports TensorFlow code, that installing TensorFlow python package is required to Bazel build Keras. """ +load("@org_keras//keras:keras.bzl", "if_indexing_source_code") +# Placeholder: load aliased py_binary + def gen_api_init_files( name, output_files, @@ -19,7 +22,9 @@ def gen_api_init_files( compat_api_versions = [], compat_init_templates = [], packages = ["keras"], - package_deps = ["//keras:keras"], + package_deps = [ + "//keras:keras", + ], output_package = "keras.api", output_dir = "", root_file_name = "__init__.py"): @@ -94,19 +99,32 @@ def gen_api_init_files( # Disable them for now so that we don't get SymbolExposedTwiceError # from create_python_api.py packages_to_ignore = ["tensorflow.python.keras", "tensorflow.keras"] + + flags = [ + root_init_template_flag, + "--apidir=$(@D)" + output_dir, + "--apiname=" + api_name, + "--apiversion=" + str(api_version), + compat_api_version_flags, + compat_init_template_flags, + "--packages=" + ",".join(packages), + "--packages_to_ignore=" + ",".join(packages_to_ignore), + "--output_package=" + output_package, + ] + native.genrule( name = name, outs = all_output_files, - cmd = ( - "$(location :" + api_gen_binary_target + ") " + - root_init_template_flag + " --apidir=$(@D)" + output_dir + - " --apiname=" + api_name + " --apiversion=" + str(api_version) + - compat_api_version_flags + " " + compat_init_template_flags + - " --packages=" + ",".join(packages) + - " --packages_to_ignore=" + ",".join(packages_to_ignore) + - " --output_package=" + output_package + " $(OUTS)" + cmd = if_indexing_source_code( + _make_cmd(api_gen_binary_target, flags, loading = "static"), + _make_cmd(api_gen_binary_target, flags, loading = "default"), ), srcs = srcs, - exec_tools = [":" + api_gen_binary_target], + tools = [":" + api_gen_binary_target], visibility = ["//visibility:public"], ) + +def _make_cmd(api_gen_binary_target, flags, loading = "default"): + binary = "$(location :" + api_gen_binary_target + ")" + flags.append("--loading=" + loading) + return " ".join([binary] + flags + ["$(OUTS)"]) diff --git a/keras/api/api_init_files.bzl b/keras/api/api_init_files.bzl index a7007e1dd235..48cfef198d73 100644 --- a/keras/api/api_init_files.bzl +++ b/keras/api/api_init_files.bzl @@ -9,6 +9,7 @@ KERAS_API_INIT_FILES = [ "keras/__internal__/layers/__init__.py", "keras/__internal__/losses/__init__.py", "keras/__internal__/models/__init__.py", + "keras/__internal__/optimizers/__init__.py", "keras/__internal__/utils/__init__.py", "keras/activations/__init__.py", "keras/applications/__init__.py", @@ -49,6 +50,7 @@ KERAS_API_INIT_FILES = [ "keras/dtensor/experimental/optimizers/__init__.py", "keras/estimator/__init__.py", "keras/experimental/__init__.py", + "keras/export/__init__.py", # Placeholder for internal API "keras/initializers/__init__.py", "keras/layers/__init__.py", @@ -56,6 +58,7 @@ KERAS_API_INIT_FILES = [ "keras/layers/experimental/preprocessing/__init__.py", "keras/losses/__init__.py", "keras/metrics/__init__.py", + "keras/metrics/experimental/__init__.py", "keras/mixed_precision/__init__.py", "keras/models/__init__.py", "keras/models/experimental/__init__.py", @@ -69,8 +72,10 @@ KERAS_API_INIT_FILES = [ "keras/preprocessing/sequence/__init__.py", "keras/preprocessing/text/__init__.py", "keras/regularizers/__init__.py", + "keras/saving/__init__.py", "keras/utils/__init__.py", "keras/utils/experimental/__init__.py", + "keras/utils/legacy/__init__.py", "keras/wrappers/__init__.py", "keras/wrappers/scikit_learn/__init__.py", ] @@ -119,6 +124,7 @@ KERAS_API_INIT_FILES_V1 = [ "keras/datasets/reuters/__init__.py", "keras/estimator/__init__.py", "keras/experimental/__init__.py", + "keras/export/__init__.py", "keras/initializers/__init__.py", "keras/layers/__init__.py", "keras/layers/experimental/__init__.py", @@ -136,7 +142,9 @@ KERAS_API_INIT_FILES_V1 = [ "keras/preprocessing/sequence/__init__.py", "keras/preprocessing/text/__init__.py", "keras/regularizers/__init__.py", + "keras/saving/__init__.py", "keras/utils/__init__.py", + "keras/utils/legacy/__init__.py", "keras/wrappers/__init__.py", "keras/wrappers/scikit_learn/__init__.py", ] diff --git a/keras/api/create_python_api_wrapper.py b/keras/api/create_python_api_wrapper.py index 83602c3aace3..c02c26e2cf99 100644 --- a/keras/api/create_python_api_wrapper.py +++ b/keras/api/create_python_api_wrapper.py @@ -23,8 +23,12 @@ from __future__ import division from __future__ import print_function -import keras # pylint: disable=unused-import -from tensorflow.python.tools.api.generator import create_python_api +import keras # noqa: F401 -if __name__ == '__main__': - create_python_api.main() +# isort: off +from tensorflow.python.tools.api.generator import ( + create_python_api, +) + +if __name__ == "__main__": + create_python_api.main() diff --git a/keras/api/golden/BUILD b/keras/api/golden/BUILD index 5c2a24c0669e..68d1e26f28fe 100644 --- a/keras/api/golden/BUILD +++ b/keras/api/golden/BUILD @@ -1,6 +1,7 @@ # TensorFlow API backwards compatibility test goldens. package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = ["//visibility:public"], licenses = ["notice"], # Apache 2.0 ) diff --git a/keras/api/golden/v1/tensorflow.keras.-model.pbtxt b/keras/api/golden/v1/tensorflow.keras.-model.pbtxt index 679bc3d70094..a867fb43ebd1 100644 --- a/keras/api/golden/v1/tensorflow.keras.-model.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.-model.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -12,10 +12,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -52,6 +60,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -112,6 +124,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -176,13 +192,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -216,6 +240,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +260,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -252,6 +288,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -264,13 +304,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -310,7 +358,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -326,7 +378,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/keras/api/golden/v1/tensorflow.keras.-sequential.pbtxt index 9c322a1e659a..fc9edeb88c5f 100644 --- a/keras/api/golden/v1/tensorflow.keras.-sequential.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.-sequential.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -14,10 +14,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -54,6 +62,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -114,6 +126,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -182,13 +198,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -222,6 +246,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +266,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -258,6 +294,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -270,13 +310,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -320,7 +368,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -336,7 +388,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt index 7f8976f0c0bf..68aa8fd65565 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -129,7 +129,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'seed\', \'force_generator\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + argspec: "args=[\'self\', \'seed\', \'force_generator\', \'rng_type\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\', \'None\'], " } member_method { name: "add_loss" @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.layers.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.layers.pbtxt index 429049587d64..1a3ec3c07eb7 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.layers.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.layers.pbtxt @@ -1,9 +1,5 @@ path: "tensorflow.keras.__internal__.layers" tf_module { - member { - name: "BaseImageAugmentationLayer" - mtype: "" - } member { name: "BaseRandomLayer" mtype: "" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling1-d.pbtxt index 32026fb12491..b724000004d0 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling1-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling2-d.pbtxt index eb8ca29e8d1d..509a218c1f55 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling2-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling3-d.pbtxt index 6f813150220b..4a4f882460b4 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-average-pooling3-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-batch-normalization.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-batch-normalization.pbtxt index 38842e3849c2..77ae4cffed0c 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-batch-normalization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-batch-normalization.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,9 +171,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " } member_method { name: "compute_mask" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv1-d.pbtxt index 699e2f4e8eeb..70cef7d5638a 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv1-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -203,6 +207,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -243,6 +251,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d-transpose.pbtxt index f899e0e7a152..2a1dc3989ad9 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d-transpose.pbtxt @@ -8,8 +8,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -172,6 +172,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -204,6 +208,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -244,6 +252,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d.pbtxt index 4986cbfc2c67..3562610db383 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv2-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -203,6 +207,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -243,6 +251,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d-transpose.pbtxt index 6739698fb60c..743619e0478e 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d-transpose.pbtxt @@ -8,8 +8,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -172,6 +172,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -204,6 +208,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -244,6 +252,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d.pbtxt index 36f8e63244ae..1975283a7815 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-conv3-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -203,6 +207,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -243,6 +251,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dense.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dense.pbtxt index 93db8f2a0118..9c3540980571 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dense.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dense.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -170,6 +170,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -198,6 +202,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +246,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dropout.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dropout.pbtxt index 177e51b470b9..99f55801f524 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dropout.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-dropout.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], " @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-flatten.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-flatten.pbtxt index 0b5594ac61c7..d390aade084f 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-flatten.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-flatten.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -170,6 +170,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -198,6 +202,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +246,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-layer.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-layer.pbtxt index b04e90fe0aa1..fa5c90d9b193 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-layer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-layer.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -168,6 +168,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +244,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling1-d.pbtxt index 3c40a6c2a881..5a57d0d4f744 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling1-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling2-d.pbtxt index fc781675d783..f0a9659a69de 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling2-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling3-d.pbtxt index cc87c1d42329..dd0436a5821a 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-max-pooling3-d.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -171,6 +171,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -199,6 +203,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -239,6 +247,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv1-d.pbtxt index 69a8b2e51d19..f1169e363e5f 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv1-d.pbtxt @@ -8,8 +8,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -172,6 +172,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -204,6 +208,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -244,6 +252,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv2-d.pbtxt index 7fe6d5194b2f..9815b88f8fa0 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.layers.-separable-conv2-d.pbtxt @@ -8,8 +8,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -172,6 +172,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -204,6 +208,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -244,6 +252,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-l-s-t-m-cell.pbtxt index 7924e21ee229..91129cd63d4c 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-l-s-t-m-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-l-s-t-m-cell.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -179,6 +179,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" @@ -207,6 +211,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -251,6 +259,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-r-n-n-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-r-n-n-cell.pbtxt index 78bc3cad1b66..a056b2db71ee 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-r-n-n-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-basic-r-n-n-cell.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -179,6 +179,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" @@ -207,6 +211,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -251,6 +259,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-device-wrapper.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-device-wrapper.pbtxt index 8ba415e602e8..06e5a0742dcb 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-device-wrapper.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-device-wrapper.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -179,6 +179,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" @@ -207,6 +211,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -251,6 +259,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-dropout-wrapper.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-dropout-wrapper.pbtxt index 3bcd4f8b03e0..560abe76df77 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-dropout-wrapper.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-dropout-wrapper.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -183,6 +183,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'inputs_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" @@ -211,6 +215,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -255,6 +263,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-g-r-u-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-g-r-u-cell.pbtxt index ff5a9c974c42..f047c7b161cc 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-g-r-u-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-g-r-u-cell.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -179,6 +179,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" @@ -207,6 +211,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -251,6 +259,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-l-s-t-m-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-l-s-t-m-cell.pbtxt index 91f2d4ea5d12..917b7da630f8 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-l-s-t-m-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-l-s-t-m-cell.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -179,6 +179,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" @@ -207,6 +211,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -251,6 +259,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-multi-r-n-n-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-multi-r-n-n-cell.pbtxt index 99c6ee484e32..b87a1077437e 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-multi-r-n-n-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-multi-r-n-n-cell.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -178,6 +178,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" @@ -206,6 +210,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -250,6 +258,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-r-n-n-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-r-n-n-cell.pbtxt index 931f25495034..b12bdab443b1 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-r-n-n-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-r-n-n-cell.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -177,6 +177,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" @@ -205,6 +209,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -249,6 +257,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-residual-wrapper.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-residual-wrapper.pbtxt index 584643d04a57..0c537a8bdea9 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-residual-wrapper.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.legacy.rnn_cell.-residual-wrapper.pbtxt @@ -7,8 +7,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -179,6 +179,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" @@ -207,6 +211,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -251,6 +259,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.pbtxt b/keras/api/golden/v1/tensorflow.keras.__internal__.pbtxt index 2d6a3892f43b..6b25413391c4 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.__internal__.pbtxt @@ -8,4 +8,8 @@ tf_module { name: "legacy" mtype: "" } + member_method { + name: "enable_unsafe_deserialization" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } } diff --git a/keras/api/golden/v1/tensorflow.keras.activations.pbtxt b/keras/api/golden/v1/tensorflow.keras.activations.pbtxt index 28814e567e8d..ab982a5c4e4a 100644 --- a/keras/api/golden/v1/tensorflow.keras.activations.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.activations.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.activations" tf_module { member_method { name: "deserialize" - argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'name\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "elu" @@ -24,6 +24,10 @@ tf_module { name: "linear" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mish" + argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "relu" argspec: "args=[\'x\', \'alpha\', \'max_value\', \'threshold\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'0.0\'], " @@ -34,7 +38,7 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'activation\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'activation\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "sigmoid" diff --git a/keras/api/golden/v1/tensorflow.keras.backend.pbtxt b/keras/api/golden/v1/tensorflow.keras.backend.pbtxt index a66ad258c8e0..6cc28ec691ae 100644 --- a/keras/api/golden/v1/tensorflow.keras.backend.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.backend.pbtxt @@ -62,11 +62,7 @@ tf_module { } member_method { name: "binary_focal_crossentropy" - argspec: "args=[\'target\', \'output\', \'gamma\', \'from_logits\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\'], " - } - member_method { - name: "binary_weighted_focal_crossentropy" - argspec: "args=[\'target\', \'output\', \'alpha\', \'gamma\', \'from_logits\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\'], " + argspec: "args=[\'target\', \'output\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\'], " } member_method { name: "cast" @@ -80,6 +76,10 @@ tf_module { name: "categorical_crossentropy" argspec: "args=[\'target\', \'output\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " } + member_method { + name: "categorical_focal_crossentropy" + argspec: "args=[\'target\', \'output\', \'alpha\', \'gamma\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'-1\'], " + } member_method { name: "clear_session" argspec: "args=[], varargs=None, keywords=None, defaults=None" @@ -502,7 +502,7 @@ tf_module { } member_method { name: "sparse_categorical_crossentropy" - argspec: "args=[\'target\', \'output\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " + argspec: "args=[\'target\', \'output\', \'from_logits\', \'axis\', \'ignore_class\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\', \'None\'], " } member_method { name: "spatial_2d_padding" diff --git a/keras/api/golden/v1/tensorflow.keras.callbacks.-callback-list.pbtxt b/keras/api/golden/v1/tensorflow.keras.callbacks.-callback-list.pbtxt index 3835ea4c944a..d3b5171b22c1 100644 --- a/keras/api/golden/v1/tensorflow.keras.callbacks.-callback-list.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.callbacks.-callback-list.pbtxt @@ -10,6 +10,10 @@ tf_class { name: "append" argspec: "args=[\'self\', \'callback\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "make_logs" + argspec: "args=[\'self\', \'model\', \'logs\', \'outputs\', \'mode\', \'prefix\'], varargs=None, keywords=None, defaults=[\'\'], " + } member_method { name: "on_batch_begin" argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt b/keras/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt index 75512300c8ab..2f6f3059b9b0 100644 --- a/keras/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], " + argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\', \'start_from_epoch\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\', \'0\'], " } member_method { name: "get_monitor_value" diff --git a/keras/api/golden/v1/tensorflow.keras.callbacks.-sidecar-evaluator-model-export.pbtxt b/keras/api/golden/v1/tensorflow.keras.callbacks.-sidecar-evaluator-model-export.pbtxt new file mode 100644 index 000000000000..0a33bbb4e389 --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.callbacks.-sidecar-evaluator-model-export.pbtxt @@ -0,0 +1,83 @@ +path: "tensorflow.keras.callbacks.SidecarEvaluatorModelExport" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'export_filepath\', \'checkpoint_filepath\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "on_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_epoch_begin" + argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_epoch_end" + argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_begin" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_end" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_begin" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_end" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "on_train_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_train_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_train_begin" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_train_end" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "set_model" + argspec: "args=[\'self\', \'model\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_params" + argspec: "args=[\'self\', \'params\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.callbacks.pbtxt b/keras/api/golden/v1/tensorflow.keras.callbacks.pbtxt index 31716a24407a..1d92b38192a5 100644 --- a/keras/api/golden/v1/tensorflow.keras.callbacks.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.callbacks.pbtxt @@ -48,6 +48,10 @@ tf_module { name: "RemoteMonitor" mtype: "" } + member { + name: "SidecarEvaluatorModelExport" + mtype: "" + } member { name: "TensorBoard" mtype: "" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.-constraint.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.-constraint.pbtxt index b13e4c558f14..ebce5a630d42 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.-constraint.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.-constraint.pbtxt @@ -5,6 +5,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.-max-norm.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.-max-norm.pbtxt index b96e2fdc7649..751357a36cbf 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.-max-norm.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.-max-norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'max_value\', \'axis\'], varargs=None, keywords=None, defaults=[\'2\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.-min-max-norm.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.-min-max-norm.pbtxt index 85017a5ab9fa..f385c813ca5c 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.-min-max-norm.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.-min-max-norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'min_value\', \'max_value\', \'rate\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'1.0\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.-non-neg.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.-non-neg.pbtxt index 278f33d15b82..ab3251209eff 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.-non-neg.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.-non-neg.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.-radial-constraint.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.-radial-constraint.pbtxt index 9fa92b2ccc62..54e6adf3e719 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.-radial-constraint.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.-radial-constraint.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.-unit-norm.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.-unit-norm.pbtxt index a8ebd4eb371b..b821bbb8acc0 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.-unit-norm.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.-unit-norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'axis\'], varargs=None, keywords=None, defaults=[\'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.max_norm.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.max_norm.pbtxt index bc201d9df1fb..42aeaf7e0f02 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.max_norm.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.max_norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'max_value\', \'axis\'], varargs=None, keywords=None, defaults=[\'2\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.min_max_norm.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.min_max_norm.pbtxt index e260340d0c25..47ab0d1105bf 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.min_max_norm.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.min_max_norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'min_value\', \'max_value\', \'rate\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'1.0\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.non_neg.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.non_neg.pbtxt index 4f8c1d767db8..0a8c23153108 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.non_neg.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.non_neg.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.pbtxt index 29444ef3405f..be3658a12225 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.pbtxt @@ -46,7 +46,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -54,6 +54,6 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'constraint\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'constraint\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.radial_constraint.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.radial_constraint.pbtxt index 8dca693a318b..78d401b280ff 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.radial_constraint.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.radial_constraint.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.constraints.unit_norm.pbtxt b/keras/api/golden/v1/tensorflow.keras.constraints.unit_norm.pbtxt index 1aa9da9db057..137cb505e73c 100644 --- a/keras/api/golden/v1/tensorflow.keras.constraints.unit_norm.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.constraints.unit_norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'axis\'], varargs=None, keywords=None, defaults=[\'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.datasets.reuters.pbtxt b/keras/api/golden/v1/tensorflow.keras.datasets.reuters.pbtxt index 2da4a13067f2..6f6446eb4296 100644 --- a/keras/api/golden/v1/tensorflow.keras.datasets.reuters.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.datasets.reuters.pbtxt @@ -1,5 +1,9 @@ path: "tensorflow.keras.datasets.reuters" tf_module { + member_method { + name: "get_label_names" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_word_index" argspec: "args=[\'path\'], varargs=None, keywords=None, defaults=[\'reuters_word_index.json\'], " diff --git a/keras/api/golden/v1/tensorflow.keras.experimental.-cosine-decay.pbtxt b/keras/api/golden/v1/tensorflow.keras.experimental.-cosine-decay.pbtxt index cd4acbef5375..81bdedcb4e2e 100644 --- a/keras/api/golden/v1/tensorflow.keras.experimental.-cosine-decay.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.experimental.-cosine-decay.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], " + argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\', \'warmup_target\', \'warmup_steps\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'None\', \'0\'], " } member_method { name: "from_config" diff --git a/keras/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt b/keras/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt index 4324f56e2fc7..8301a65833d6 100644 --- a/keras/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt b/keras/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt index 5f0bfddb6bb1..e87a1ec3ddc6 100644 --- a/keras/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'features\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/keras/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt index ed849f0c4597..44e02e9b4cad 100644 --- a/keras/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.experimental.pbtxt b/keras/api/golden/v1/tensorflow.keras.experimental.pbtxt index d719121da99f..c658bcdc5b69 100644 --- a/keras/api/golden/v1/tensorflow.keras.experimental.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.experimental.pbtxt @@ -20,12 +20,4 @@ tf_module { name: "WideDeepModel" mtype: "" } - member_method { - name: "export_saved_model" - argspec: "args=[\'model\', \'saved_model_path\', \'custom_objects\', \'as_text\', \'input_signature\', \'serving_only\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], " - } - member_method { - name: "load_from_saved_model" - argspec: "args=[\'saved_model_path\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " - } } diff --git a/keras/api/golden/v1/tensorflow.keras.export.-export-archive.pbtxt b/keras/api/golden/v1/tensorflow.keras.export.-export-archive.pbtxt new file mode 100644 index 000000000000..4b245b4b999e --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.export.-export-archive.pbtxt @@ -0,0 +1,27 @@ +path: "tensorflow.keras.export.ExportArchive" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_endpoint" + argspec: "args=[\'self\', \'name\', \'fn\', \'input_signature\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable_collection" + argspec: "args=[\'self\', \'name\', \'variables\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "track" + argspec: "args=[\'self\', \'resource\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "write_out" + argspec: "args=[\'self\', \'filepath\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.export.pbtxt b/keras/api/golden/v1/tensorflow.keras.export.pbtxt new file mode 100644 index 000000000000..ee81034d6104 --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.export.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.keras.export" +tf_module { + member { + name: "ExportArchive" + mtype: "" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.initializers.-initializer.pbtxt b/keras/api/golden/v1/tensorflow.keras.initializers.-initializer.pbtxt index bbbf17dcface..848e5d352657 100644 --- a/keras/api/golden/v1/tensorflow.keras.initializers.-initializer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.initializers.-initializer.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.initializers.Initializer" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v1/tensorflow.keras.initializers.pbtxt b/keras/api/golden/v1/tensorflow.keras.initializers.pbtxt index 11794d5005ad..b8832017c3c3 100644 --- a/keras/api/golden/v1/tensorflow.keras.initializers.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.initializers.pbtxt @@ -106,7 +106,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -114,6 +114,6 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'initializer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'initializer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt index bb63c66b2c51..d7238394f940 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -163,6 +163,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" @@ -191,6 +195,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -227,6 +235,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt index 22ac65768a1c..d1ee21e3e902 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt index 4b2adcb785c0..8c47a61250e0 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-add.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-add.pbtxt index d6fc58c323b4..5127ff3dfaf2 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-add.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-add.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt index a182400aba45..8ed84a4a760b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,9 +157,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], " + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " } member_method { name: "compute_mask" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt index f6ae42888aa8..b65b0c1c182c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt index dfba79459a37..c8c3027e9f66 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,9 +157,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], " + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " } member_method { name: "compute_mask" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt index 5a2274e65da3..d1d687125d83 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt index 0758cd27ac34..c3c3f70274a7 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt index bcf4b5d80bf0..cdd976ab992b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-average.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-average.pbtxt index 85dabd3a64c1..5552bd555473 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-average.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-average.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 43b071dc39ac..0fb5acc44d0a 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt index 003b77ca6d25..b46848ddfc0d 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 3eec8aea498d..c5f4a9b9b827 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt index fc3a6fca4d7a..81ab7531f219 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -130,7 +130,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'axis\', \'momentum\', \'epsilon\', \'center\', \'scale\', \'beta_initializer\', \'gamma_initializer\', \'moving_mean_initializer\', \'moving_variance_initializer\', \'beta_regularizer\', \'gamma_regularizer\', \'beta_constraint\', \'gamma_constraint\', \'renorm\', \'renorm_clipping\', \'renorm_momentum\', \'fused\', \'trainable\', \'virtual_batch_size\', \'adjustment\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'0.99\', \'0.001\', \'True\', \'True\', \'zeros\', \'ones\', \'zeros\', \'ones\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'0.99\', \'None\', \'True\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_loss" @@ -156,9 +156,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compute_mask" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt index 19f50844e54d..4df4de9226d7 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -160,6 +160,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,9 +228,17 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-category-encoding.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-category-encoding.pbtxt index 0df48cefb4b3..dfa0cbabae9c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-category-encoding.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-category-encoding.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'count_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-center-crop.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-center-crop.pbtxt index c52a54221059..c4a5aa0e3c9a 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-center-crop.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-center-crop.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt index 60920b75bbd6..229006d485a4 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt index c47f2afd7e18..13da3b785c9f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -246,6 +246,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -274,6 +278,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -310,10 +318,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index 341952bb31f3..341d73a2cc91 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -246,6 +246,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -274,6 +278,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -310,10 +318,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt index 2fb22764b37b..e6257107a1d1 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -246,6 +246,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -274,6 +278,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -310,10 +318,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt index 2ee4dbc50c27..5b3beb8b16d3 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt index af41da6af123..5dff50a6f509 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 1989036fe4c0..67f03d1ce309 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt index ae13a9283a5f..7413b8674afa 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index 64875c946786..c66d6ffb327b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt index 7ab3a6d14952..5c0774f967b4 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt index ba7e168af377..7484ce7ebb52 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt index 497bfe47f8b3..418e5d2b6bde 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 54a19a815066..dc4369ec905b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt index a277662f5333..47258f5833e4 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 5f7efd7d6859..8219381a59ec 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt index 9dc46686425b..b334463bb54e 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt index a049e4297da2..1d516ece0c4f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt index c7b804272d5e..569ff8d26659 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt index 95d47a6b9c23..0d1f2865f73d 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt index cdb54fafc989..1827cda0cf38 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -165,6 +165,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -193,6 +197,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,10 +241,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt index 66519a796e59..cdad1bfac324 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -165,6 +165,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -193,6 +197,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,10 +241,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt index 128f7e636d27..4e91e6e6709a 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'features\', \'cols_to_output_tensors\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt index 5adb1b1ebce6..b29161038bc4 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt index bdf88e8ca557..5d3179479b72 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt index 531c33aaa3a5..42f987270aaf 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-discretization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-discretization.pbtxt index 7a127fa7b94c..5563d613800d 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-discretization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-discretization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt index be8dd47922f4..a43e3ea8e126 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt index 7b5db859f05f..0c504b38714b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt index bc6cae7d82bf..338f8569be21 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-einsum-dense.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-einsum-dense.pbtxt index e29b94e2fe12..0d878e1b6c76 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-einsum-dense.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-einsum-dense.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt index db9812c187b4..d0acb29f450e 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -129,7 +129,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'input_dim\', \'output_dim\', \'embeddings_initializer\', \'embeddings_regularizer\', \'activity_regularizer\', \'embeddings_constraint\', \'mask_zero\', \'input_length\'], varargs=None, keywords=kwargs, defaults=[\'uniform\', \'None\', \'None\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'input_dim\', \'output_dim\', \'embeddings_initializer\', \'embeddings_regularizer\', \'activity_regularizer\', \'embeddings_constraint\', \'mask_zero\', \'input_length\', \'sparse\'], varargs=None, keywords=kwargs, defaults=[\'uniform\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], " } member_method { name: "add_loss" @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt index 496304ff4865..26ff207938f5 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt index 2643beaa5715..f6fe569b9525 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -230,6 +238,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +250,10 @@ tf_class { name: "reset_recurrent_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt index 4a3099b0d687..a6e6dec7d7b7 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -228,6 +228,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -256,6 +260,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -292,10 +300,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt index f57338d6e9b0..cfafd9e73d29 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt index f9ffe97e40e2..03c265aeb58b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index bb0ca41b58cb..aaffbb42402c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index 78c5b4570884..5a5d64006850 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index f767993ce840..d211a3a0ac13 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index d13f9da6e9e9..f98c5fe73db4 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index b3c9acb03564..93ccb22cc8ac 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index aed9b8ebb0f7..f8a2802d8e5f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index eceeb2398af5..0c9d82c99469 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index 0770d689735b..6aa97dfdc59e 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 4b61d5b49001..80177870bba2 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 99304d23491f..8b9a4c6e7c68 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index b4b2e891654f..8f4bf30b4514 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index 6f8359590304..b165d98428f1 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-hashing.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-hashing.pbtxt index 866f602987d8..ef1b9e56c2b2 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-hashing.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-hashing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-identity.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-identity.pbtxt new file mode 100644 index 000000000000..3c3e39996588 --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.layers.-identity.pbtxt @@ -0,0 +1,242 @@ +path: "tensorflow.keras.layers.Identity" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'dynamic\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'False\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt index 796d62350d8f..7564a7f8bc7c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index b52a8fee62b1..b86e2487a1ea 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -230,6 +238,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +250,10 @@ tf_class { name: "reset_recurrent_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt index adf9bc7ca5ba..07d70ebe6935 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -228,6 +228,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -256,6 +260,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -292,10 +300,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt index a20e5aaa6404..bb97d088dad2 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt index 40f56df8297f..1a81ce6f16e0 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt index a4b82d09fc3c..b50481b62f7b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt @@ -2,8 +2,8 @@ path: "tensorflow.keras.layers.Layer" tf_class { is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -154,6 +154,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -182,6 +186,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -214,6 +222,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index 6999a0d8ec4c..96cc14f91e00 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt index 3b1a787ccda2..f8b6b11e281f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt index b078db2d0529..fb34dfb1c8e0 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt index 5021731d2885..cb3ac42a4afa 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt index 8dc902d78f47..0d9dc7499d58 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt index 0d0d4841e616..e1092bf07672 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt index 514ca738be10..4696c58634a4 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt index e6b925656d73..a021d15e3615 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 456185fa892b..8bea460ac28f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt index a3267fed10f6..14a7d00de1cd 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt index cdbe440dedee..cc8218f7a9db 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt index 5285f5c3220e..709c847a6953 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-multi-head-attention.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-multi-head-attention.pbtxt index e8cb5e7f8a68..4b8080a1b78b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-multi-head-attention.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-multi-head-attention.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,9 +155,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'query\', \'value\', \'key\', \'attention_mask\', \'return_attention_scores\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'query\', \'value\', \'key\', \'attention_mask\', \'return_attention_scores\', \'training\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\', \'False\'], " } member_method { name: "compute_mask" @@ -165,7 +169,7 @@ tf_class { } member_method { name: "compute_output_shape" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'query_shape\', \'value_shape\', \'key_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_signature" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt index d0c3cbb0d595..3ef05dd0015f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-normalization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-normalization.pbtxt index f43dcd2f9b27..baa8fba13bdd 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-normalization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt index 21589d6bb696..899af13f3363 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt index 22c083ff6d12..e08c6381543c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt index 703f18bbe89f..4dc7b8c60319 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -223,10 +231,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt index 1687d54efa2a..831131154f98 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt index fd5601eddeb2..a401a54ae021 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-rescaling.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-rescaling.pbtxt index 238b54fb3e7e..2b52e5fa301f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-rescaling.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-rescaling.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt index 55b178a767a7..8af2743e9061 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-resizing.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-resizing.pbtxt index 3bf862774281..f04ecffd3a19 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-resizing.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-resizing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt index 730d65cdc6e7..6922c5910055 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt index bfa77c16d89c..b4d943239992 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index f91360016768..d21d6693bcc2 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index 94962ea83281..312c27f69b33 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index d0fae29f2f6c..20da793c2a37 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -229,6 +237,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -237,6 +249,10 @@ tf_class { name: "reset_recurrent_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt index b5f215070dc0..60a8f5172402 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -216,6 +216,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -244,6 +248,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -280,10 +288,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt index b9be91a03f91..e8e05a00ece5 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index 606b7bc5f895..0f926be02b9b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index db4d2c885fc5..1bb81438fca3 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index 1137eac88299..f31ec33f7cfd 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 1c7dda9c0dc6..747de047f96c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -163,6 +163,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'constants\', \'training\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " @@ -191,6 +195,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -227,6 +235,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt index dec895ec98ee..d6bba621d770 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index 9e04347d2a22..835f784b295f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt index 3e13ed5ab652..814d7168679b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt index c2f1d3d12cc2..ff61b890ceef 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 00cc45f498f3..383e28967517 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 89a07682e536..b2a2d89c1748 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt index a05086a1651d..149f9e61613f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt index eeb09f5a6a85..2ef8d53b6940 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt index 86805c95d9d0..5f5c510ec23f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt index 1789d6ec811c..03fc8519bb09 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt index 82e611df04e5..0da8e034e5a8 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt index ba2ad738ee29..fb529f555a8c 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt index 63f019cf6868..a741778c72dd 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'count_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt index a5358c4b811a..b2b7d584a5fc 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt index 06c517cf9c26..f61c4f82c5bb 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt index 5f9c8f541ac5..a608049a6d8a 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt index c93956fe0e79..e6f797f63416 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt index ce3100e121f0..942ce222c3e9 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -163,6 +163,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -195,6 +199,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -227,6 +235,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt index fb98877a03cc..fdbab246741b 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt index 6135cdea2bbe..c11fb59691fb 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt index bf1243851874..a624e03a4d94 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt @@ -12,10 +12,6 @@ tf_module { name: "Discretization" mtype: "" } - member { - name: "HashedCrossing" - mtype: "" - } member { name: "Hashing" mtype: "" diff --git a/keras/api/golden/v1/tensorflow.keras.layers.pbtxt b/keras/api/golden/v1/tensorflow.keras.layers.pbtxt index 3596baa6505d..6ae37c06b75f 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.layers.pbtxt @@ -268,6 +268,10 @@ tf_module { name: "Hashing" mtype: "" } + member { + name: "Identity" + mtype: "" + } member { name: "InputLayer" mtype: "" @@ -494,7 +498,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "disable_v2_dtype_behavior" @@ -522,7 +526,7 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'layer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'layer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "subtract" diff --git a/keras/api/golden/v1/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt b/keras/api/golden/v1/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt index 2c2a286f740e..ac49b8fc8701 100644 --- a/keras/api/golden/v1/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt @@ -6,7 +6,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\', \'0.0\', \'-1\', \'auto\', \'binary_focal_crossentropy\'], " + argspec: "args=[\'self\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\', \'auto\', \'binary_focal_crossentropy\'], " } member_method { name: "call" diff --git a/keras/api/golden/v1/tensorflow.keras.losses.-categorical-focal-crossentropy.pbtxt b/keras/api/golden/v1/tensorflow.keras.losses.-categorical-focal-crossentropy.pbtxt new file mode 100644 index 000000000000..f06b44ec8765 --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.losses.-categorical-focal-crossentropy.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.keras.losses.CategoricalFocalCrossentropy" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\', \'auto\', \'categorical_focal_crossentropy\'], " + } + member_method { + name: "call" + argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt b/keras/api/golden/v1/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt index 06d9cfe145ee..389b05c75d5d 100644 --- a/keras/api/golden/v1/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt @@ -6,7 +6,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'from_logits\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'auto\', \'sparse_categorical_crossentropy\'], " + argspec: "args=[\'self\', \'from_logits\', \'ignore_class\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'auto\', \'sparse_categorical_crossentropy\'], " } member_method { name: "call" diff --git a/keras/api/golden/v1/tensorflow.keras.losses.pbtxt b/keras/api/golden/v1/tensorflow.keras.losses.pbtxt index b3294965eeff..2b628cdc7943 100644 --- a/keras/api/golden/v1/tensorflow.keras.losses.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.losses.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "CategoricalCrossentropy" mtype: "" } + member { + name: "CategoricalFocalCrossentropy" + mtype: "" + } member { name: "CategoricalHinge" mtype: "" @@ -94,12 +98,16 @@ tf_module { } member_method { name: "binary_focal_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\', \'0.0\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " } member_method { name: "categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.0\', \'-1\'], " } + member_method { + name: "categorical_focal_crossentropy" + argspec: "args=[\'y_true\', \'y_pred\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " + } member_method { name: "categorical_hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -118,7 +126,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'name\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -186,11 +194,11 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'loss\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'loss\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "sparse_categorical_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\', \'ignore_class\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\', \'None\'], " } member_method { name: "squared_hinge" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt index 35f9a429b865..171da23f3bc1 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt @@ -1,11 +1,11 @@ path: "tensorflow.keras.metrics.AUC" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -160,6 +160,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -224,6 +232,10 @@ tf_class { name: "interpolate_pr_auc" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -240,6 +252,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt index 26fbd0b585bf..863b948441e9 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.Accuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt index 789c93e9c821..4b8759cf7628 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.BinaryAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt index 4e88a2ad5ddd..16228d4229f2 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.BinaryCrossentropy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-binary-io-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-binary-io-u.pbtxt index 590f84d1e583..49e4ac2946e7 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-binary-io-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-binary-io-u.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.BinaryIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt index f910dc4b0696..c56abceaeb13 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt index 27abc004b332..92d50ec7a5f1 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CategoricalCrossentropy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -133,7 +133,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'categorical_crossentropy\', \'None\', \'False\', \'0\'], " + argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'categorical_crossentropy\', \'None\', \'False\', \'0\', \'-1\'], " } member_method { name: "add_loss" @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt index 4bb20d940f1f..f4386171e6f5 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CategoricalHinge" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-cosine-similarity.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-cosine-similarity.pbtxt index 0dbf94fa93df..221cbe34edd0 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-cosine-similarity.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-cosine-similarity.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CosineSimilarity" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-f-beta-score.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-f-beta-score.pbtxt new file mode 100644 index 000000000000..37847a1f933d --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-f-beta-score.pbtxt @@ -0,0 +1,263 @@ +path: "tensorflow.keras.metrics.FBetaScore" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'average\', \'beta\', \'threshold\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'None\', \'fbeta_score\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'y_true_shape\', \'y_pred_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-f1-score.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-f1-score.pbtxt new file mode 100644 index 000000000000..56d233b0b5fc --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-f1-score.pbtxt @@ -0,0 +1,264 @@ +path: "tensorflow.keras.metrics.F1Score" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'average\', \'threshold\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'f1_score\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'y_true_shape\', \'y_pred_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt index ad1ffb7d5e1d..12518c046e4d 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.FalseNegatives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt index 0dfa8b5ee1a6..d3a260bc7f5f 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.FalsePositives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt index b9ef8b808189..c01adca8b432 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.Hinge" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-io-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-io-u.pbtxt index c8e3cac66dac..3b3e4ed1e707 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-io-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-io-u.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.IoU" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -131,7 +131,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_true\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'True\', \'-1\'], " } member_method { name: "add_loss" @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt index 2c31b5fccac2..8fe4028c968d 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.KLDivergence" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt index 81ff9033cdac..862a2c127f69 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.LogCoshError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt index 50832f259e8d..4db047358108 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanAbsoluteError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt index dfc975031555..c1a4285ba95d 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanAbsolutePercentageError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt index a2c1fbea4afa..eb8b2c471f44 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.MeanIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -132,7 +132,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_true\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'True\', \'-1\'], " } member_method { name: "add_loss" @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt index 951c151fdc79..d84345e14e31 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt index 10b3a82a0c8c..697c4e0bb74b 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.MeanRelativeError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt index ec4d424986b5..ceb5282f0746 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanSquaredError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt index ecfebc72ad3b..2d5cf64c2c3d 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanSquaredLogarithmicError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt index a4ee5fc8e909..6e8ba1767c97 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -192,6 +196,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -224,6 +232,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -240,6 +252,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt index 80d830fb7efc..c31d49e14b7f 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt index 905c92a33ab9..916ae93096e5 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -231,6 +243,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-io-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-io-u.pbtxt index 853ae3bcf38e..23fd50224c5c 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-io-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-io-u.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.OneHotIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -132,7 +132,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'-1\'], " } member_method { name: "add_loss" @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt index e20224e9b14e..98b63a62da97 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.OneHotMeanIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -133,7 +133,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'-1\'], " } member_method { name: "add_loss" @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt index 29ccceda1abe..1d5f8c6efcb7 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.Poisson" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-precision-at-recall.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-precision-at-recall.pbtxt index ec505dc742e9..21f1c36bdc1b 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-precision-at-recall.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-precision-at-recall.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.PrecisionAtRecall" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt index fe1822fc8d53..d9c49540edcb 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt @@ -1,11 +1,11 @@ path: "tensorflow.keras.metrics.Precision" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +244,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-r2-score.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-r2-score.pbtxt new file mode 100644 index 000000000000..1e76ffb29ad4 --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-r2-score.pbtxt @@ -0,0 +1,263 @@ +path: "tensorflow.keras.metrics.R2Score" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'class_aggregation\', \'num_regressors\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'uniform_average\', \'0\', \'r2_score\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'y_true_shape\', \'y_pred_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-recall-at-precision.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-recall-at-precision.pbtxt index e8ab0f6ce1c6..5aa668718b0e 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-recall-at-precision.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-recall-at-precision.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.RecallAtPrecision" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt index 52e9879a3446..e7c4864a1bbd 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt @@ -1,11 +1,11 @@ path: "tensorflow.keras.metrics.Recall" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +244,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt index cd99b1e8e29e..64671f63b4c0 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.RootMeanSquaredError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt index 0da727a14110..9b35e4f14197 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.SensitivityAtSpecificity" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt index d47d06739b2a..d960b99eccb4 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SparseCategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt index 4fdc705aa389..c5bd4c6f59db 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SparseCategoricalCrossentropy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -133,7 +133,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_crossentropy\', \'None\', \'False\', \'-1\'], " + argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'ignore_class\', \'axis\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_crossentropy\', \'None\', \'False\', \'None\', \'-1\'], " } member_method { name: "add_loss" @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt index dd386c6cba5d..069a3e3b2727 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SparseTopKCategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt index 15dfa9412558..9f42d1f0b3c2 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.SpecificityAtSensitivity" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt index 0f76c4a43b47..83437f332258 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SquaredHinge" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt index ccd3ac0c8752..6cb46d1f93e4 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt index dd26258eb1bb..6355e88e1858 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.TopKCategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt index af0fb7936462..95bc523abd0c 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.TrueNegatives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt index 0e1124fbc296..863fb2911873 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.TruePositives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.metrics.pbtxt b/keras/api/golden/v1/tensorflow.keras.metrics.pbtxt index b9b466ae381b..40356586b0ad 100644 --- a/keras/api/golden/v1/tensorflow.keras.metrics.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.metrics.pbtxt @@ -36,6 +36,14 @@ tf_module { name: "CosineSimilarity" mtype: "" } + member { + name: "F1Score" + mtype: "" + } + member { + name: "FBetaScore" + mtype: "" + } member { name: "FalseNegatives" mtype: "" @@ -120,6 +128,10 @@ tf_module { name: "PrecisionAtRecall" mtype: "" } + member { + name: "R2Score" + mtype: "" + } member { name: "Recall" mtype: "" @@ -202,7 +214,7 @@ tf_module { } member_method { name: "binary_focal_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\', \'0.0\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " } member_method { name: "categorical_accuracy" @@ -212,6 +224,10 @@ tf_module { name: "categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.0\', \'-1\'], " } + member_method { + name: "categorical_focal_crossentropy" + argspec: "args=[\'y_true\', \'y_pred\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " + } member_method { name: "cosine" argspec: "args=[\'y_true\', \'y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'-1\'], " @@ -222,7 +238,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -290,7 +306,7 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'metric\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "sparse_categorical_accuracy" @@ -298,7 +314,7 @@ tf_module { } member_method { name: "sparse_categorical_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\', \'ignore_class\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\', \'None\'], " } member_method { name: "sparse_top_k_categorical_accuracy" diff --git a/keras/api/golden/v1/tensorflow.keras.models.-linear-model.pbtxt b/keras/api/golden/v1/tensorflow.keras.models.-linear-model.pbtxt index d98738fda8cd..a7e40b8a197c 100644 --- a/keras/api/golden/v1/tensorflow.keras.models.-linear-model.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.models.-linear-model.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/keras/api/golden/v1/tensorflow.keras.models.-model.pbtxt index 8f0115b30ac0..af5a892ca740 100644 --- a/keras/api/golden/v1/tensorflow.keras.models.-model.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.models.-model.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -12,10 +12,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -52,6 +60,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -112,6 +124,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -176,13 +192,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -216,6 +240,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +260,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -252,6 +288,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -264,13 +304,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -310,7 +358,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -326,7 +378,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/keras/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt index 8b85b77488b4..a6f046c2e06a 100644 --- a/keras/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -14,10 +14,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -54,6 +62,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -114,6 +126,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -182,13 +198,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -222,6 +246,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +266,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -258,6 +294,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -270,13 +310,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -320,7 +368,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -336,7 +388,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.models.-wide-deep-model.pbtxt b/keras/api/golden/v1/tensorflow.keras.models.-wide-deep-model.pbtxt index 337ec78ac8f7..ee3b09f7c98d 100644 --- a/keras/api/golden/v1/tensorflow.keras.models.-wide-deep-model.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.models.-wide-deep-model.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v1/tensorflow.keras.models.pbtxt b/keras/api/golden/v1/tensorflow.keras.models.pbtxt index f2a185c0b9d1..8d5fd58f2776 100644 --- a/keras/api/golden/v1/tensorflow.keras.models.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.models.pbtxt @@ -22,7 +22,7 @@ tf_module { } member_method { name: "load_model" - argspec: "args=[\'filepath\', \'custom_objects\', \'compile\', \'options\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\'], " + argspec: "args=[\'filepath\', \'custom_objects\', \'compile\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\', \'True\'], " } member_method { name: "model_from_config" @@ -38,6 +38,6 @@ tf_module { } member_method { name: "save_model" - argspec: "args=[\'model\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'model\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " } } diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt index d0856c75be4a..ff4531cd44fb 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.Adadelta" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt index 17f68fd67db0..4e35fed07fd1 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.Adagrad" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt index 74fa9869ad54..697ca03f6150 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.Adam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt index ae0d88760eb5..c488d88b72e8 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.Adamax" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt index 2cfd1ca6b71c..e75a11b74f4b 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.Ftrl" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt index 2d18b1b4774b..a09e7ac9a467 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.Nadam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt index 5a9d33eea359..43c247557a69 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.optimizers.Optimizer" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index d53b8c656ddc..8b093190fb74 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.RMSprop" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt index f354c71298ce..78fdecf4d12d 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.optimizers.SGD" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt index 9c1b406a1d6f..05ae2888d367 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adadelta" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt index 736ee08e4efb..507148f08dbb 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adagrad" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adam.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adam.pbtxt index 7d0d3b23614c..d79093442bd9 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adam.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adam.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adam" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adamax.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adamax.pbtxt index 149d0f213893..b18db03163b8 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adamax.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-adamax.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adamax" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt index 9ce47c161678..b852c98df0e6 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Ftrl" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-nadam.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-nadam.pbtxt index 8a612f6b89b2..ef505faade82 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-nadam.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-nadam.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Nadam" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt index 6b4bf1701f22..f28c01037044 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt @@ -1,8 +1,7 @@ path: "tensorflow.keras.optimizers.legacy.Optimizer" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt index 77a6e72a9411..f53b0568fe11 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.RMSprop" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt index f6a6dd836e72..ab1041592075 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.SGD" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.pbtxt index 94bf1bf82da6..a06dbfc73903 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.pbtxt @@ -46,14 +46,14 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "get" - argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'identifier\'], varargs=None, keywords=kwargs, defaults=None" } member_method { name: "serialize" - argspec: "args=[\'optimizer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'optimizer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt index 13a711fe288b..6df561f3342e 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], " + argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\', \'warmup_target\', \'warmup_steps\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'None\', \'0\'], " } member_method { name: "from_config" diff --git a/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.pbtxt b/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.pbtxt index 3ecc437199f6..8ed0edccf925 100644 --- a/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.optimizers.schedules.pbtxt @@ -30,10 +30,10 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "serialize" - argspec: "args=[\'learning_rate_schedule\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'learning_rate_schedule\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v1/tensorflow.keras.pbtxt b/keras/api/golden/v1/tensorflow.keras.pbtxt index c83d9ad57524..a5592a0f08b7 100644 --- a/keras/api/golden/v1/tensorflow.keras.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.pbtxt @@ -40,6 +40,10 @@ tf_module { name: "experimental" mtype: "" } + member { + name: "export" + mtype: "" + } member { name: "initializers" mtype: "" @@ -77,11 +81,11 @@ tf_module { mtype: "" } member { - name: "utils" + name: "saving" mtype: "" } member { - name: "wrappers" + name: "utils" mtype: "" } member_method { diff --git a/keras/api/golden/v1/tensorflow.keras.regularizers.pbtxt b/keras/api/golden/v1/tensorflow.keras.regularizers.pbtxt index 96a4b193b1bd..f424d54785b0 100644 --- a/keras/api/golden/v1/tensorflow.keras.regularizers.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.regularizers.pbtxt @@ -26,7 +26,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -38,6 +38,6 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'regularizer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'regularizer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v1/tensorflow.keras.saving.custom_object_scope.pbtxt b/keras/api/golden/v1/tensorflow.keras.saving.custom_object_scope.pbtxt new file mode 100644 index 000000000000..cf877e5ae4dd --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.saving.custom_object_scope.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.keras.saving.custom_object_scope" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=args, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.saving.pbtxt b/keras/api/golden/v1/tensorflow.keras.saving.pbtxt new file mode 100644 index 000000000000..e1df1e64293c --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.saving.pbtxt @@ -0,0 +1,39 @@ +path: "tensorflow.keras.saving" +tf_module { + member { + name: "custom_object_scope" + mtype: "" + } + member_method { + name: "deserialize_keras_object" + argspec: "args=[\'config\', \'custom_objects\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\'], " + } + member_method { + name: "get_custom_objects" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_registered_name" + argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_registered_object" + argspec: "args=[\'name\', \'custom_objects\', \'module_objects\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "load_model" + argspec: "args=[\'filepath\', \'custom_objects\', \'compile\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\', \'True\'], " + } + member_method { + name: "register_keras_serializable" + argspec: "args=[\'package\', \'name\'], varargs=None, keywords=None, defaults=[\'Custom\', \'None\'], " + } + member_method { + name: "save_model" + argspec: "args=[\'model\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "serialize_keras_object" + argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.utils.-custom-object-scope.pbtxt b/keras/api/golden/v1/tensorflow.keras.utils.-custom-object-scope.pbtxt index 9e9370be68f8..3ccf719d8c8c 100644 --- a/keras/api/golden/v1/tensorflow.keras.utils.-custom-object-scope.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.utils.-custom-object-scope.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.utils.CustomObjectScope" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v1/tensorflow.keras.utils.-steps-per-execution-tuner.pbtxt b/keras/api/golden/v1/tensorflow.keras.utils.-steps-per-execution-tuner.pbtxt new file mode 100644 index 000000000000..1363d2190e1e --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.utils.-steps-per-execution-tuner.pbtxt @@ -0,0 +1,21 @@ +path: "tensorflow.keras.utils.StepsPerExecutionTuner" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "steps_per_execution" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'optimizer\', \'spe_variable\', \'interval\', \'change_spe_interval\', \'change_threshold\'], varargs=None, keywords=None, defaults=[\'5\', \'10\', \'0.1\'], " + } + member_method { + name: "start" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "stop" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.utils.custom_object_scope.pbtxt b/keras/api/golden/v1/tensorflow.keras.utils.custom_object_scope.pbtxt index 4fa8c7af04e4..08f84e0f825f 100644 --- a/keras/api/golden/v1/tensorflow.keras.utils.custom_object_scope.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.utils.custom_object_scope.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.utils.custom_object_scope" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v1/tensorflow.keras.utils.legacy.pbtxt b/keras/api/golden/v1/tensorflow.keras.utils.legacy.pbtxt new file mode 100644 index 000000000000..267629bf49c2 --- /dev/null +++ b/keras/api/golden/v1/tensorflow.keras.utils.legacy.pbtxt @@ -0,0 +1,11 @@ +path: "tensorflow.keras.utils.legacy" +tf_module { + member_method { + name: "deserialize_keras_object" + argspec: "args=[\'identifier\', \'module_objects\', \'custom_objects\', \'printable_module_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'object\'], " + } + member_method { + name: "serialize_keras_object" + argspec: "args=[\'instance\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.utils.pbtxt b/keras/api/golden/v1/tensorflow.keras.utils.pbtxt index 675db2735114..09a7c4059fae 100644 --- a/keras/api/golden/v1/tensorflow.keras.utils.pbtxt +++ b/keras/api/golden/v1/tensorflow.keras.utils.pbtxt @@ -28,17 +28,25 @@ tf_module { name: "SequenceEnqueuer" mtype: "" } + member { + name: "StepsPerExecutionTuner" + mtype: "" + } member { name: "custom_object_scope" mtype: "" } + member { + name: "legacy" + mtype: "" + } member_method { name: "array_to_img" argspec: "args=[\'x\', \'data_format\', \'scale\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\'], " } member_method { name: "deserialize_keras_object" - argspec: "args=[\'identifier\', \'module_objects\', \'custom_objects\', \'printable_module_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'object\'], " + argspec: "args=[\'config\', \'custom_objects\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\'], " } member_method { name: "disable_interactive_logging" @@ -86,7 +94,7 @@ tf_module { } member_method { name: "model_to_dot" - argspec: "args=[\'model\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'subgraph\', \'layer_range\', \'show_layer_activations\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'False\', \'None\', \'False\'], " + argspec: "args=[\'model\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'subgraph\', \'layer_range\', \'show_layer_activations\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'False\', \'None\', \'False\', \'False\'], " } member_method { name: "normalize" @@ -98,7 +106,7 @@ tf_module { } member_method { name: "plot_model" - argspec: "args=[\'model\', \'to_file\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'layer_range\', \'show_layer_activations\'], varargs=None, keywords=None, defaults=[\'model.png\', \'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'None\', \'False\'], " + argspec: "args=[\'model\', \'to_file\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'layer_range\', \'show_layer_activations\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'model.png\', \'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'None\', \'False\', \'False\'], " } member_method { name: "register_keras_serializable" @@ -110,14 +118,22 @@ tf_module { } member_method { name: "serialize_keras_object" - argspec: "args=[\'instance\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None" } member_method { name: "to_categorical" argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], " } + member_method { + name: "to_ordinal" + argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], " + } member_method { name: "track_tf1_style_variables" argspec: "args=[\'method\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "warmstart_embedding_matrix" + argspec: "args=[\'base_vocabulary\', \'new_vocabulary\', \'base_embeddings\', \'new_embeddings_initializer\'], varargs=None, keywords=None, defaults=[\'uniform\'], " + } } diff --git a/keras/api/golden/v1/tensorflow.keras.wrappers.pbtxt b/keras/api/golden/v1/tensorflow.keras.wrappers.pbtxt deleted file mode 100644 index 0b2fac9b7d99..000000000000 --- a/keras/api/golden/v1/tensorflow.keras.wrappers.pbtxt +++ /dev/null @@ -1,7 +0,0 @@ -path: "tensorflow.keras.wrappers" -tf_module { - member { - name: "scikit_learn" - mtype: "" - } -} diff --git a/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt b/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt deleted file mode 100644 index 180e05527f31..000000000000 --- a/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt +++ /dev/null @@ -1,42 +0,0 @@ -path: "tensorflow.keras.wrappers.scikit_learn.KerasClassifier" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member_method { - name: "__init__" - argspec: "args=[\'self\', \'build_fn\'], varargs=None, keywords=sk_params, defaults=[\'None\'], " - } - member_method { - name: "check_params" - argspec: "args=[\'self\', \'params\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "filter_sk_params" - argspec: "args=[\'self\', \'fn\', \'override\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "fit" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "get_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } - member_method { - name: "predict" - argspec: "args=[\'self\', \'x\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "predict_proba" - argspec: "args=[\'self\', \'x\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "score" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "set_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } -} diff --git a/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt b/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt deleted file mode 100644 index 0dfc03fb05e5..000000000000 --- a/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt +++ /dev/null @@ -1,38 +0,0 @@ -path: "tensorflow.keras.wrappers.scikit_learn.KerasRegressor" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member_method { - name: "__init__" - argspec: "args=[\'self\', \'build_fn\'], varargs=None, keywords=sk_params, defaults=[\'None\'], " - } - member_method { - name: "check_params" - argspec: "args=[\'self\', \'params\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "filter_sk_params" - argspec: "args=[\'self\', \'fn\', \'override\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "fit" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "get_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } - member_method { - name: "predict" - argspec: "args=[\'self\', \'x\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "score" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "set_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } -} diff --git a/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.pbtxt b/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.pbtxt deleted file mode 100644 index fbd4d13387a9..000000000000 --- a/keras/api/golden/v1/tensorflow.keras.wrappers.scikit_learn.pbtxt +++ /dev/null @@ -1,11 +0,0 @@ -path: "tensorflow.keras.wrappers.scikit_learn" -tf_module { - member { - name: "KerasClassifier" - mtype: "" - } - member { - name: "KerasRegressor" - mtype: "" - } -} diff --git a/keras/api/golden/v2/tensorflow.keras.-model.pbtxt b/keras/api/golden/v2/tensorflow.keras.-model.pbtxt index 679bc3d70094..a867fb43ebd1 100644 --- a/keras/api/golden/v2/tensorflow.keras.-model.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.-model.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -12,10 +12,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -52,6 +60,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -112,6 +124,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -176,13 +192,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -216,6 +240,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +260,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -252,6 +288,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -264,13 +304,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -310,7 +358,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -326,7 +378,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/keras/api/golden/v2/tensorflow.keras.-sequential.pbtxt index 9c322a1e659a..fc9edeb88c5f 100644 --- a/keras/api/golden/v2/tensorflow.keras.-sequential.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.-sequential.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -14,10 +14,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -54,6 +62,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -114,6 +126,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -182,13 +198,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -222,6 +246,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +266,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -258,6 +294,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -270,13 +310,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -320,7 +368,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -336,7 +388,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.-keras-tensor.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.-keras-tensor.pbtxt new file mode 100644 index 000000000000..9b09b44a8bfb --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.-keras-tensor.pbtxt @@ -0,0 +1,61 @@ +path: "tensorflow.keras.__internal__.KerasTensor" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "dtype" + mtype: "" + } + member { + name: "is_tensor_like" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "node" + mtype: "" + } + member { + name: "op" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "type_spec" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'type_spec\', \'inferred_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "experimental_ref" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_tensor" + argspec: "args=[\'cls\', \'tensor\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_type_spec" + argspec: "args=[\'cls\', \'type_spec\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_shape" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "ref" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_shape" + argspec: "args=[\'self\', \'shape\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.-ragged-keras-tensor.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.-ragged-keras-tensor.pbtxt new file mode 100644 index 000000000000..7c91676b2f7e --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.-ragged-keras-tensor.pbtxt @@ -0,0 +1,138 @@ +path: "tensorflow.keras.__internal__.RaggedKerasTensor" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "dtype" + mtype: "" + } + member { + name: "flat_values" + mtype: "" + } + member { + name: "is_tensor_like" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "nested_row_splits" + mtype: "" + } + member { + name: "node" + mtype: "" + } + member { + name: "op" + mtype: "" + } + member { + name: "ragged_rank" + mtype: "" + } + member { + name: "row_splits" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "type_spec" + mtype: "" + } + member { + name: "values" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'type_spec\', \'inferred_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "bounding_shape" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "experimental_ref" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_tensor" + argspec: "args=[\'cls\', \'tensor\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_type_spec" + argspec: "args=[\'cls\', \'type_spec\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_shape" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_dims" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "nested_row_lengths" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "nested_value_rowids" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "nrows" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "ref" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "row_lengths" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "row_limits" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "row_starts" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "set_shape" + argspec: "args=[\'self\', \'shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "to_sparse" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "to_tensor" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "value_rowids" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "with_flat_values" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "with_row_splits_dtype" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "with_values" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.-sparse-keras-tensor.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.-sparse-keras-tensor.pbtxt new file mode 100644 index 000000000000..c25a8784dd48 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.-sparse-keras-tensor.pbtxt @@ -0,0 +1,78 @@ +path: "tensorflow.keras.__internal__.SparseKerasTensor" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "dense_shape" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "indices" + mtype: "" + } + member { + name: "is_tensor_like" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "node" + mtype: "" + } + member { + name: "op" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "type_spec" + mtype: "" + } + member { + name: "values" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'type_spec\', \'inferred_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "experimental_ref" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_tensor" + argspec: "args=[\'cls\', \'tensor\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_type_spec" + argspec: "args=[\'cls\', \'type_spec\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_shape" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "ref" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_shape" + argspec: "args=[\'self\', \'shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "with_values" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-image-augmentation-layer.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-dense-attention.pbtxt similarity index 80% rename from keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-image-augmentation-layer.pbtxt rename to keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-dense-attention.pbtxt index 8e7c54168a7c..bb4b16600324 100644 --- a/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-image-augmentation-layer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-dense-attention.pbtxt @@ -1,21 +1,17 @@ -path: "tensorflow.keras.__internal__.layers.BaseImageAugmentationLayer" +path: "tensorflow.keras.__internal__.layers.BaseDenseAttention" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -134,7 +130,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'rate\', \'seed\'], varargs=None, keywords=kwargs, defaults=[\'1.0\', \'None\'], " + argspec: "args=[\'self\', \'dropout\'], varargs=None, keywords=kwargs, defaults=[\'0.0\'], " } member_method { name: "add_loss" @@ -156,29 +152,17 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " } member_method { name: "compute_mask" @@ -204,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt index 7f8976f0c0bf..68aa8fd65565 100644 --- a/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.layers.-base-random-layer.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -129,7 +129,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'seed\', \'force_generator\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + argspec: "args=[\'self\', \'seed\', \'force_generator\', \'rng_type\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\', \'None\'], " } member_method { name: "add_loss" @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.layers.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.layers.pbtxt index 429049587d64..8f5b1b170689 100644 --- a/keras/api/golden/v2/tensorflow.keras.__internal__.layers.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.layers.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.__internal__.layers" tf_module { member { - name: "BaseImageAugmentationLayer" + name: "BaseDenseAttention" mtype: "" } member { diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.losses.-loss-function-wrapper.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.losses.-loss-function-wrapper.pbtxt new file mode 100644 index 000000000000..b59c57da8ce6 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.losses.-loss-function-wrapper.pbtxt @@ -0,0 +1,22 @@ +path: "tensorflow.keras.__internal__.losses.LossFunctionWrapper" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'fn\', \'reduction\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'auto\', \'None\'], " + } + member_method { + name: "call" + argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.losses.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.losses.pbtxt index 02bc17e14dc5..d2b2abf80f42 100644 --- a/keras/api/golden/v2/tensorflow.keras.__internal__.losses.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.losses.pbtxt @@ -1,5 +1,9 @@ path: "tensorflow.keras.__internal__.losses" tf_module { + member { + name: "LossFunctionWrapper" + mtype: "" + } member_method { name: "compute_weighted_loss" argspec: "args=[\'losses\', \'sample_weight\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'sum_over_batch_size\', \'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.optimizers.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.optimizers.pbtxt new file mode 100644 index 000000000000..5afce7e73dd1 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.optimizers.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.keras.__internal__.optimizers" +tf_module { + member_method { + name: "convert_to_legacy_optimizer" + argspec: "args=[\'optimizer\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.pbtxt index eca0637f5fb8..aadf3076c120 100644 --- a/keras/api/golden/v2/tensorflow.keras.__internal__.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.pbtxt @@ -1,5 +1,17 @@ path: "tensorflow.keras.__internal__" tf_module { + member { + name: "KerasTensor" + mtype: "" + } + member { + name: "RaggedKerasTensor" + mtype: "" + } + member { + name: "SparseKerasTensor" + mtype: "" + } member { name: "backend" mtype: "" @@ -16,6 +28,10 @@ tf_module { name: "models" mtype: "" } + member { + name: "optimizers" + mtype: "" + } member { name: "utils" mtype: "" @@ -24,4 +40,8 @@ tf_module { name: "apply_name_scope_on_model_declaration" argspec: "args=[\'enable\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "enable_unsafe_deserialization" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } } diff --git a/keras/api/golden/v2/tensorflow.keras.__internal__.utils.pbtxt b/keras/api/golden/v2/tensorflow.keras.__internal__.utils.pbtxt index f604525fb8f0..ab38e0f70014 100644 --- a/keras/api/golden/v2/tensorflow.keras.__internal__.utils.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.__internal__.utils.pbtxt @@ -6,7 +6,7 @@ tf_module { } member_method { name: "layer_test" - argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + argspec: "args=[\'layer_cls\', \'kwargs\', \'input_shape\', \'input_dtype\', \'input_data\', \'expected_output\', \'expected_output_dtype\', \'expected_output_shape\', \'validate_training\', \'adapt_data\', \'custom_objects\', \'test_harness\', \'supports_masking\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "register_symbolic_tensor_type" diff --git a/keras/api/golden/v2/tensorflow.keras.activations.pbtxt b/keras/api/golden/v2/tensorflow.keras.activations.pbtxt index 7acce4f5f6fa..863800e05306 100644 --- a/keras/api/golden/v2/tensorflow.keras.activations.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.activations.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.activations" tf_module { member_method { name: "deserialize" - argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'name\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "elu" @@ -28,6 +28,10 @@ tf_module { name: "linear" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mish" + argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "relu" argspec: "args=[\'x\', \'alpha\', \'max_value\', \'threshold\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'0.0\'], " @@ -38,7 +42,7 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'activation\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'activation\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "sigmoid" diff --git a/keras/api/golden/v2/tensorflow.keras.backend.pbtxt b/keras/api/golden/v2/tensorflow.keras.backend.pbtxt index 0e1be9b5ad83..6e29da804c4d 100644 --- a/keras/api/golden/v2/tensorflow.keras.backend.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.backend.pbtxt @@ -62,11 +62,7 @@ tf_module { } member_method { name: "binary_focal_crossentropy" - argspec: "args=[\'target\', \'output\', \'gamma\', \'from_logits\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\'], " - } - member_method { - name: "binary_weighted_focal_crossentropy" - argspec: "args=[\'target\', \'output\', \'alpha\', \'gamma\', \'from_logits\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\'], " + argspec: "args=[\'target\', \'output\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\'], " } member_method { name: "cast" @@ -80,6 +76,10 @@ tf_module { name: "categorical_crossentropy" argspec: "args=[\'target\', \'output\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " } + member_method { + name: "categorical_focal_crossentropy" + argspec: "args=[\'target\', \'output\', \'alpha\', \'gamma\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'-1\'], " + } member_method { name: "clear_session" argspec: "args=[], varargs=None, keywords=None, defaults=None" @@ -498,7 +498,7 @@ tf_module { } member_method { name: "sparse_categorical_crossentropy" - argspec: "args=[\'target\', \'output\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " + argspec: "args=[\'target\', \'output\', \'from_logits\', \'axis\', \'ignore_class\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\', \'None\'], " } member_method { name: "spatial_2d_padding" diff --git a/keras/api/golden/v2/tensorflow.keras.callbacks.-backup-and-restore.pbtxt b/keras/api/golden/v2/tensorflow.keras.callbacks.-backup-and-restore.pbtxt index 55ee0aae41d2..ea38be4adcd1 100644 --- a/keras/api/golden/v2/tensorflow.keras.callbacks.-backup-and-restore.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.callbacks.-backup-and-restore.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'backup_dir\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'backup_dir\', \'save_freq\', \'delete_checkpoint\', \'save_before_preemption\'], varargs=None, keywords=None, defaults=[\'epoch\', \'True\', \'False\'], " } member_method { name: "on_batch_begin" diff --git a/keras/api/golden/v2/tensorflow.keras.callbacks.-callback-list.pbtxt b/keras/api/golden/v2/tensorflow.keras.callbacks.-callback-list.pbtxt index 3835ea4c944a..d3b5171b22c1 100644 --- a/keras/api/golden/v2/tensorflow.keras.callbacks.-callback-list.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.callbacks.-callback-list.pbtxt @@ -10,6 +10,10 @@ tf_class { name: "append" argspec: "args=[\'self\', \'callback\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "make_logs" + argspec: "args=[\'self\', \'model\', \'logs\', \'outputs\', \'mode\', \'prefix\'], varargs=None, keywords=None, defaults=[\'\'], " + } member_method { name: "on_batch_begin" argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt b/keras/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt index 75512300c8ab..2f6f3059b9b0 100644 --- a/keras/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], " + argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\', \'start_from_epoch\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\', \'0\'], " } member_method { name: "get_monitor_value" diff --git a/keras/api/golden/v2/tensorflow.keras.callbacks.-sidecar-evaluator-model-export.pbtxt b/keras/api/golden/v2/tensorflow.keras.callbacks.-sidecar-evaluator-model-export.pbtxt new file mode 100644 index 000000000000..0a33bbb4e389 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.callbacks.-sidecar-evaluator-model-export.pbtxt @@ -0,0 +1,83 @@ +path: "tensorflow.keras.callbacks.SidecarEvaluatorModelExport" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'export_filepath\', \'checkpoint_filepath\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "on_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_epoch_begin" + argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_epoch_end" + argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_begin" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_predict_end" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_begin" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_test_end" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "on_train_batch_begin" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_train_batch_end" + argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_train_begin" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "on_train_end" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "set_model" + argspec: "args=[\'self\', \'model\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_params" + argspec: "args=[\'self\', \'params\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.callbacks.pbtxt b/keras/api/golden/v2/tensorflow.keras.callbacks.pbtxt index 1ae71bfee1af..6b162ce1e347 100644 --- a/keras/api/golden/v2/tensorflow.keras.callbacks.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.callbacks.pbtxt @@ -52,6 +52,10 @@ tf_module { name: "RemoteMonitor" mtype: "" } + member { + name: "SidecarEvaluatorModelExport" + mtype: "" + } member { name: "TensorBoard" mtype: "" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.-constraint.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.-constraint.pbtxt index b13e4c558f14..ebce5a630d42 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.-constraint.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.-constraint.pbtxt @@ -5,6 +5,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.-max-norm.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.-max-norm.pbtxt index b96e2fdc7649..751357a36cbf 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.-max-norm.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.-max-norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'max_value\', \'axis\'], varargs=None, keywords=None, defaults=[\'2\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.-min-max-norm.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.-min-max-norm.pbtxt index 85017a5ab9fa..f385c813ca5c 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.-min-max-norm.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.-min-max-norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'min_value\', \'max_value\', \'rate\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'1.0\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.-non-neg.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.-non-neg.pbtxt index 278f33d15b82..ab3251209eff 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.-non-neg.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.-non-neg.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.-radial-constraint.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.-radial-constraint.pbtxt index 9fa92b2ccc62..54e6adf3e719 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.-radial-constraint.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.-radial-constraint.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.-unit-norm.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.-unit-norm.pbtxt index a8ebd4eb371b..b821bbb8acc0 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.-unit-norm.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.-unit-norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'axis\'], varargs=None, keywords=None, defaults=[\'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.max_norm.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.max_norm.pbtxt index bc201d9df1fb..42aeaf7e0f02 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.max_norm.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.max_norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'max_value\', \'axis\'], varargs=None, keywords=None, defaults=[\'2\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.min_max_norm.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.min_max_norm.pbtxt index e260340d0c25..47ab0d1105bf 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.min_max_norm.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.min_max_norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'min_value\', \'max_value\', \'rate\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'1.0\', \'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.non_neg.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.non_neg.pbtxt index 4f8c1d767db8..0a8c23153108 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.non_neg.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.non_neg.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.pbtxt index 29444ef3405f..be3658a12225 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.pbtxt @@ -46,7 +46,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -54,6 +54,6 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'constraint\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'constraint\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.radial_constraint.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.radial_constraint.pbtxt index 8dca693a318b..78d401b280ff 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.radial_constraint.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.radial_constraint.pbtxt @@ -6,6 +6,10 @@ tf_class { member_method { name: "__init__" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.constraints.unit_norm.pbtxt b/keras/api/golden/v2/tensorflow.keras.constraints.unit_norm.pbtxt index 1aa9da9db057..137cb505e73c 100644 --- a/keras/api/golden/v2/tensorflow.keras.constraints.unit_norm.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.constraints.unit_norm.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'axis\'], varargs=None, keywords=None, defaults=[\'0\'], " } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.datasets.reuters.pbtxt b/keras/api/golden/v2/tensorflow.keras.datasets.reuters.pbtxt index 2da4a13067f2..6f6446eb4296 100644 --- a/keras/api/golden/v2/tensorflow.keras.datasets.reuters.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.datasets.reuters.pbtxt @@ -1,5 +1,9 @@ path: "tensorflow.keras.datasets.reuters" tf_module { + member_method { + name: "get_label_names" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_word_index" argspec: "args=[\'path\'], varargs=None, keywords=None, defaults=[\'reuters_word_index.json\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.-layout-map.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.-layout-map.pbtxt index bcc7983c5da7..15402cd02143 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.-layout-map.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.-layout-map.pbtxt @@ -34,6 +34,10 @@ tf_class { name: "popitem" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "scope" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "setdefault" argspec: "args=[\'self\', \'key\', \'default\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adadelta.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adadelta.pbtxt index eda0ec11e3ed..1bde9e5882c5 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adadelta.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adadelta.pbtxt @@ -1,13 +1,10 @@ path: "tensorflow.keras.dtensor.experimental.optimizers.Adadelta" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -21,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'gradients_clip_option\', \'ema_option\', \'name\', \'mesh\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'None\', \'None\', \'Adadelta\', \'None\'], " + argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adadelta\'], " } member_method { name: "add_variable" @@ -55,7 +32,7 @@ tf_class { } member_method { name: "add_variable_from_reference" - argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "aggregate_gradients" @@ -63,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -73,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adagrad.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adagrad.pbtxt index d4cf31e80321..792f67240803 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adagrad.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adagrad.pbtxt @@ -1,13 +1,10 @@ path: "tensorflow.keras.dtensor.experimental.optimizers.Adagrad" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -21,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'gradients_clip_option\', \'ema_option\', \'name\', \'mesh\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'None\', \'None\', \'Adagrad\', \'None\'], " + argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adagrad\'], " } member_method { name: "add_variable" @@ -55,7 +32,7 @@ tf_class { } member_method { name: "add_variable_from_reference" - argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "aggregate_gradients" @@ -63,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -73,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam-w.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam-w.pbtxt new file mode 100644 index 000000000000..2e5c929d6d21 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam-w.pbtxt @@ -0,0 +1,89 @@ +path: "tensorflow.keras.dtensor.experimental.optimizers.AdamW" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "iterations" + mtype: "" + } + member { + name: "learning_rate" + mtype: "" + } + member { + name: "lr" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'learning_rate\', \'weight_decay\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.004\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'AdamW\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " + } + member_method { + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "minimize" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam.pbtxt index e2e9b31c73e8..93fe2d44bd9f 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-adam.pbtxt @@ -1,13 +1,10 @@ path: "tensorflow.keras.dtensor.experimental.optimizers.Adam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -21,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'gradients_clip_option\', \'ema_option\', \'name\', \'mesh\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'Adam\', \'None\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adam\'], " } member_method { name: "add_variable" @@ -55,7 +32,7 @@ tf_class { } member_method { name: "add_variable_from_reference" - argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "aggregate_gradients" @@ -63,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -73,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-r-m-sprop.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-r-m-sprop.pbtxt index ad3117262b0e..16efcd4fc38f 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-r-m-sprop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-r-m-sprop.pbtxt @@ -1,13 +1,10 @@ path: "tensorflow.keras.dtensor.experimental.optimizers.RMSprop" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -21,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'gradients_clip_option\', \'ema_option\', \'jit_compile\', \'name\', \'mesh\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'None\', \'None\', \'False\', \'RMSprop\', \'None\'], " + argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'100\', \'True\', \'RMSprop\'], " } member_method { name: "add_variable" @@ -55,7 +32,7 @@ tf_class { } member_method { name: "add_variable_from_reference" - argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "aggregate_gradients" @@ -63,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -73,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-s-g-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-s-g-d.pbtxt index ad465d8a168f..e994213fe416 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-s-g-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.-s-g-d.pbtxt @@ -1,13 +1,10 @@ path: "tensorflow.keras.dtensor.experimental.optimizers.SGD" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -21,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'amsgrad\', \'gradients_clip_option\', \'ema_option\', \'jit_compile\', \'name\', \'mesh\'], varargs=None, keywords=None, defaults=[\'0.01\', \'0.0\', \'False\', \'False\', \'None\', \'None\', \'False\', \'SGD\', \'None\'], " + argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'SGD\'], " } member_method { name: "add_variable" @@ -55,7 +32,7 @@ tf_class { } member_method { name: "add_variable_from_reference" - argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "aggregate_gradients" @@ -63,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -73,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.pbtxt index aac7440b4a86..18bd1acf13e1 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.optimizers.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "Adam" mtype: "" } + member { + name: "AdamW" + mtype: "" + } member { name: "RMSprop" mtype: "" diff --git a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.pbtxt b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.pbtxt index 20f3bd29b566..dd963f6657dc 100644 --- a/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.dtensor.experimental.pbtxt @@ -8,8 +8,4 @@ tf_module { name: "optimizers" mtype: "" } - member_method { - name: "layout_map_scope" - argspec: "args=[], varargs=args, keywords=kwds, defaults=None" - } } diff --git a/keras/api/golden/v2/tensorflow.keras.experimental.-cosine-decay.pbtxt b/keras/api/golden/v2/tensorflow.keras.experimental.-cosine-decay.pbtxt index cd4acbef5375..81bdedcb4e2e 100644 --- a/keras/api/golden/v2/tensorflow.keras.experimental.-cosine-decay.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.experimental.-cosine-decay.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], " + argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\', \'warmup_target\', \'warmup_steps\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'None\', \'0\'], " } member_method { name: "from_config" diff --git a/keras/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt b/keras/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt index 4324f56e2fc7..8301a65833d6 100644 --- a/keras/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt b/keras/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt index 5f0bfddb6bb1..e87a1ec3ddc6 100644 --- a/keras/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'features\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.experimental.-sidecar-evaluator.pbtxt b/keras/api/golden/v2/tensorflow.keras.experimental.-sidecar-evaluator.pbtxt index 605736dd4938..9ca14da2e737 100644 --- a/keras/api/golden/v2/tensorflow.keras.experimental.-sidecar-evaluator.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.experimental.-sidecar-evaluator.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.experimental.SidecarEvaluator" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/keras/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt index ed849f0c4597..44e02e9b4cad 100644 --- a/keras/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.export.-export-archive.pbtxt b/keras/api/golden/v2/tensorflow.keras.export.-export-archive.pbtxt new file mode 100644 index 000000000000..4b245b4b999e --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.export.-export-archive.pbtxt @@ -0,0 +1,27 @@ +path: "tensorflow.keras.export.ExportArchive" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_endpoint" + argspec: "args=[\'self\', \'name\', \'fn\', \'input_signature\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable_collection" + argspec: "args=[\'self\', \'name\', \'variables\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "track" + argspec: "args=[\'self\', \'resource\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "write_out" + argspec: "args=[\'self\', \'filepath\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.export.pbtxt b/keras/api/golden/v2/tensorflow.keras.export.pbtxt new file mode 100644 index 000000000000..ee81034d6104 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.export.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.keras.export" +tf_module { + member { + name: "ExportArchive" + mtype: "" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-constant.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-constant.pbtxt index cd56d7c7027b..026836fe4606 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-constant.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-constant.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.Constant" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-normal.pbtxt index 7a4f2f695b19..570cb6015a70 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-normal.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.GlorotNormal" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-uniform.pbtxt index 39e8dceebd21..4f6b5719e75c 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-glorot-uniform.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.GlorotUniform" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-he-normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-he-normal.pbtxt index e2392a1de059..af6f28ad7bd9 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-he-normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-he-normal.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.HeNormal" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-he-uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-he-uniform.pbtxt index a1d0b78df694..a3ae35b25e82 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-he-uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-he-uniform.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.HeUniform" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-identity.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-identity.pbtxt index bdf11c0d346b..11d9180d0e45 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-identity.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-identity.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.Identity" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-initializer.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-initializer.pbtxt index bbbf17dcface..848e5d352657 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-initializer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-initializer.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.initializers.Initializer" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-normal.pbtxt index 4dc8579c6726..1a3b20240c36 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-normal.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.LecunNormal" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-uniform.pbtxt index 1cf25acc880c..cb09e8963051 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-lecun-uniform.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.LecunUniform" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-ones.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-ones.pbtxt index 949254b493fe..78065e847a27 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-ones.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-ones.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.Ones" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-orthogonal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-orthogonal.pbtxt index 7cf7a32a86c8..1623468564f8 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-orthogonal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-orthogonal.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.Orthogonal" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-random-normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-random-normal.pbtxt index 8301dbbf2ecc..d56e2e30d60f 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-random-normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-random-normal.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.RandomNormal" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-random-uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-random-uniform.pbtxt index 809b742218b2..a80f1ea48f5e 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-random-uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-random-uniform.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.RandomUniform" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-truncated-normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-truncated-normal.pbtxt index 9ea077f5e2b2..38c1b18ae58d 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-truncated-normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-truncated-normal.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.TruncatedNormal" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-variance-scaling.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-variance-scaling.pbtxt index bf6aecad7088..52b639a1ac21 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-variance-scaling.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-variance-scaling.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.VarianceScaling" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.-zeros.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.-zeros.pbtxt index 40b430b1a17e..263040949a2d 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.-zeros.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.-zeros.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.Zeros" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.constant.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.constant.pbtxt index e560d7e5a529..fedf0b9a178e 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.constant.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.constant.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.constant" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.glorot_normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.glorot_normal.pbtxt index a2aaabf88dd4..35bbb24fa5d4 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.glorot_normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.glorot_normal.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.glorot_normal" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.glorot_uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.glorot_uniform.pbtxt index 841e2648282c..76eb02bbf5bd 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.glorot_uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.glorot_uniform.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.glorot_uniform" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.he_normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.he_normal.pbtxt index cc9a8717cdc2..59ee38972d47 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.he_normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.he_normal.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.he_normal" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.he_uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.he_uniform.pbtxt index e3228e20d552..f1b7ce285b21 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.he_uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.he_uniform.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.he_uniform" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.identity.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.identity.pbtxt index abf9a4d3c025..6b4b4cee8083 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.identity.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.identity.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.identity" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.lecun_normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.lecun_normal.pbtxt index df5b58e28453..e6802630101b 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.lecun_normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.lecun_normal.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.lecun_normal" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.lecun_uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.lecun_uniform.pbtxt index 741054185c4f..1d8f833fcfcd 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.lecun_uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.lecun_uniform.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.initializers.lecun_uniform" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.ones.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.ones.pbtxt index 73fb315ecc4f..4b6fccb960ff 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.ones.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.ones.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.ones" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.orthogonal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.orthogonal.pbtxt index 94025290bc98..5e9e3cad98a1 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.orthogonal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.orthogonal.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.orthogonal" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.pbtxt index f39b701806a2..7c3b8f1f8d4f 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.pbtxt @@ -126,7 +126,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -134,6 +134,6 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'initializer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'initializer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.random_normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.random_normal.pbtxt index d445f96f8c99..15ab42e95575 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.random_normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.random_normal.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.random_normal" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.random_uniform.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.random_uniform.pbtxt index b02d8cd54bd0..3e54ce21b24e 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.random_uniform.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.random_uniform.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.random_uniform" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.truncated_normal.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.truncated_normal.pbtxt index a9d0650a5742..65d698377d32 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.truncated_normal.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.truncated_normal.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.truncated_normal" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.variance_scaling.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.variance_scaling.pbtxt index eaa0ed75dc95..f598610395f2 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.variance_scaling.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.variance_scaling.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.variance_scaling" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.initializers.zeros.pbtxt b/keras/api/golden/v2/tensorflow.keras.initializers.zeros.pbtxt index 88770d1be604..2c4213342440 100644 --- a/keras/api/golden/v2/tensorflow.keras.initializers.zeros.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.initializers.zeros.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.initializers.zeros" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt index bb63c66b2c51..d7238394f940 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -163,6 +163,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" @@ -191,6 +195,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -227,6 +235,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt index 22ac65768a1c..d1ee21e3e902 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt index 4b2adcb785c0..8c47a61250e0 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-add.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-add.pbtxt index d6fc58c323b4..5127ff3dfaf2 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-add.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-add.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt index a182400aba45..8ed84a4a760b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,9 +157,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], " + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " } member_method { name: "compute_mask" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt index f6ae42888aa8..b65b0c1c182c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt index dfba79459a37..c8c3027e9f66 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,9 +157,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], " + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'return_attention_scores\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " } member_method { name: "compute_mask" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt index 5a2274e65da3..d1d687125d83 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt index 0758cd27ac34..c3c3f70274a7 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt index bcf4b5d80bf0..cdd976ab992b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-average.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-average.pbtxt index 85dabd3a64c1..5552bd555473 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-average.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-average.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 43b071dc39ac..0fb5acc44d0a 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt index 003b77ca6d25..b46848ddfc0d 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 3eec8aea498d..c5f4a9b9b827 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt index 53892cff4a58..0429225779da 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -130,7 +130,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'axis\', \'momentum\', \'epsilon\', \'center\', \'scale\', \'beta_initializer\', \'gamma_initializer\', \'moving_mean_initializer\', \'moving_variance_initializer\', \'beta_regularizer\', \'gamma_regularizer\', \'beta_constraint\', \'gamma_constraint\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'0.99\', \'0.001\', \'True\', \'True\', \'zeros\', \'ones\', \'zeros\', \'ones\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'axis\', \'momentum\', \'epsilon\', \'center\', \'scale\', \'beta_initializer\', \'gamma_initializer\', \'moving_mean_initializer\', \'moving_variance_initializer\', \'beta_regularizer\', \'gamma_regularizer\', \'beta_constraint\', \'gamma_constraint\', \'synchronized\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'0.99\', \'0.001\', \'True\', \'True\', \'zeros\', \'ones\', \'zeros\', \'ones\', \'None\', \'None\', \'None\', \'None\', \'False\'], " } member_method { name: "add_loss" @@ -156,9 +156,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compute_mask" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt index 19f50844e54d..4df4de9226d7 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -160,6 +160,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,9 +228,17 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-category-encoding.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-category-encoding.pbtxt index 0df48cefb4b3..dfa0cbabae9c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-category-encoding.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-category-encoding.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'count_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-center-crop.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-center-crop.pbtxt index c52a54221059..c4a5aa0e3c9a 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-center-crop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-center-crop.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt index 60920b75bbd6..229006d485a4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt index c47f2afd7e18..13da3b785c9f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m1-d.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -246,6 +246,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -274,6 +278,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -310,10 +318,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index 341952bb31f3..341d73a2cc91 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -246,6 +246,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -274,6 +278,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -310,10 +318,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt index 2fb22764b37b..e6257107a1d1 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m3-d.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -246,6 +246,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -274,6 +278,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -310,10 +318,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt index 2ee4dbc50c27..5b3beb8b16d3 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt index af41da6af123..5dff50a6f509 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 1989036fe4c0..67f03d1ce309 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt index ae13a9283a5f..7413b8674afa 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index 64875c946786..c66d6ffb327b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt index 7ab3a6d14952..5c0774f967b4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt index ba7e168af377..7484ce7ebb52 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt index 497bfe47f8b3..418e5d2b6bde 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 54a19a815066..dc4369ec905b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt index a277662f5333..47258f5833e4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 5f7efd7d6859..8219381a59ec 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt index 9dc46686425b..b334463bb54e 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -220,6 +228,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt index a049e4297da2..1d516ece0c4f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt index c7b804272d5e..569ff8d26659 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt index 95d47a6b9c23..0d1f2865f73d 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt index ec3fb1e2c1ff..cb71ae4d69c9 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'features\', \'cols_to_output_tensors\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt index 5adb1b1ebce6..b29161038bc4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt index bdf88e8ca557..5d3179479b72 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt index 531c33aaa3a5..42f987270aaf 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-discretization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-discretization.pbtxt index 7a127fa7b94c..5563d613800d 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-discretization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-discretization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt index be8dd47922f4..a43e3ea8e126 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt index 7b5db859f05f..0c504b38714b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt index bc6cae7d82bf..338f8569be21 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-einsum-dense.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-einsum-dense.pbtxt index e29b94e2fe12..0d878e1b6c76 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-einsum-dense.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-einsum-dense.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt index db9812c187b4..d0acb29f450e 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -129,7 +129,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'input_dim\', \'output_dim\', \'embeddings_initializer\', \'embeddings_regularizer\', \'activity_regularizer\', \'embeddings_constraint\', \'mask_zero\', \'input_length\'], varargs=None, keywords=kwargs, defaults=[\'uniform\', \'None\', \'None\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'input_dim\', \'output_dim\', \'embeddings_initializer\', \'embeddings_regularizer\', \'activity_regularizer\', \'embeddings_constraint\', \'mask_zero\', \'input_length\', \'sparse\'], varargs=None, keywords=kwargs, defaults=[\'uniform\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], " } member_method { name: "add_loss" @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt index 496304ff4865..26ff207938f5 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt index fc0c048df50a..0ecc1109cfac 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -229,6 +237,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -237,6 +249,10 @@ tf_class { name: "reset_recurrent_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt index cd3eb6a40001..cabd8b355be3 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -230,6 +230,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -258,6 +262,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -302,6 +310,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -314,6 +326,10 @@ tf_class { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt index f57338d6e9b0..cfafd9e73d29 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt index f9ffe97e40e2..03c265aeb58b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index bb0ca41b58cb..aaffbb42402c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index 78c5b4570884..5a5d64006850 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index f767993ce840..d211a3a0ac13 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index d13f9da6e9e9..f98c5fe73db4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index b3c9acb03564..93ccb22cc8ac 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index aed9b8ebb0f7..f8a2802d8e5f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index eceeb2398af5..0c9d82c99469 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index 0770d689735b..6aa97dfdc59e 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 4b61d5b49001..80177870bba2 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 99304d23491f..8b9a4c6e7c68 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index b4b2e891654f..8f4bf30b4514 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index 6f8359590304..b165d98428f1 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-group-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-group-normalization.pbtxt new file mode 100644 index 000000000000..fced5da8192b --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.layers.-group-normalization.pbtxt @@ -0,0 +1,242 @@ +path: "tensorflow.keras.layers.GroupNormalization" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'groups\', \'axis\', \'epsilon\', \'center\', \'scale\', \'beta_initializer\', \'gamma_initializer\', \'beta_regularizer\', \'gamma_regularizer\', \'beta_constraint\', \'gamma_constraint\'], varargs=None, keywords=kwargs, defaults=[\'32\', \'-1\', \'0.001\', \'True\', \'True\', \'zeros\', \'ones\', \'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-hashed-crossing.pbtxt similarity index 89% rename from keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt rename to keras/api/golden/v2/tensorflow.keras.layers.-hashed-crossing.pbtxt index 071f3088f661..eab9f207e7bb 100644 --- a/keras/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-hashed-crossing.pbtxt @@ -1,10 +1,10 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.HashedCrossing" +path: "tensorflow.keras.layers.HashedCrossing" tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-hashing.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-hashing.pbtxt index 866f602987d8..ef1b9e56c2b2 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-hashing.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-hashing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-identity.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-identity.pbtxt new file mode 100644 index 000000000000..3c3e39996588 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.layers.-identity.pbtxt @@ -0,0 +1,242 @@ +path: "tensorflow.keras.layers.Identity" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'dynamic\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'False\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt index 796d62350d8f..7564a7f8bc7c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-integer-lookup.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-integer-lookup.pbtxt index 68bdae207b82..60e70390c051 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-integer-lookup.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-integer-lookup.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -165,6 +165,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -197,6 +201,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +241,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -241,6 +257,14 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_vocabulary" argspec: "args=[\'self\', \'vocabulary\', \'idf_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index 92842b09bb2e..d038c1493fc7 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -229,6 +237,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -237,6 +249,10 @@ tf_class { name: "reset_recurrent_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt index 5b3dbd75a9d2..893a35071d8e 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -230,6 +230,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -258,6 +262,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -302,6 +310,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -314,6 +326,10 @@ tf_class { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt index a20e5aaa6404..bb97d088dad2 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt index 40f56df8297f..1a81ce6f16e0 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt index a4b82d09fc3c..b50481b62f7b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt @@ -2,8 +2,8 @@ path: "tensorflow.keras.layers.Layer" tf_class { is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -154,6 +154,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -182,6 +186,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -214,6 +222,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index 6999a0d8ec4c..96cc14f91e00 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt index 3b1a787ccda2..f8b6b11e281f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt index b078db2d0529..fb34dfb1c8e0 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt index 5021731d2885..cb3ac42a4afa 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt index 8dc902d78f47..0d9dc7499d58 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt index 0d0d4841e616..e1092bf07672 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt index 514ca738be10..4696c58634a4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt index e6b925656d73..a021d15e3615 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 456185fa892b..8bea460ac28f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt index a3267fed10f6..14a7d00de1cd 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt index cdbe440dedee..cc8218f7a9db 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt index 5285f5c3220e..709c847a6953 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-multi-head-attention.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-multi-head-attention.pbtxt index e8cb5e7f8a68..4b8080a1b78b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-multi-head-attention.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-multi-head-attention.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,9 +155,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'query\', \'value\', \'key\', \'attention_mask\', \'return_attention_scores\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'query\', \'value\', \'key\', \'attention_mask\', \'return_attention_scores\', \'training\', \'use_causal_mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\', \'False\'], " } member_method { name: "compute_mask" @@ -165,7 +169,7 @@ tf_class { } member_method { name: "compute_output_shape" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'query_shape\', \'value_shape\', \'key_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_signature" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt index d0c3cbb0d595..3ef05dd0015f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-normalization.pbtxt index f43dcd2f9b27..baa8fba13bdd 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-normalization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt index 21589d6bb696..899af13f3363 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt index 22c083ff6d12..e08c6381543c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt index 703f18bbe89f..4dc7b8c60319 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -223,10 +231,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-brightness.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-brightness.pbtxt index 817053c52aef..d246250fbd2a 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-brightness.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-brightness.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomBrightness" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-contrast.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-contrast.pbtxt index 608d7216123c..85454d842005 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-contrast.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-contrast.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomContrast" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-crop.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-crop.pbtxt index b196d62db2af..23f80ad15a04 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-crop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-crop.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomCrop" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-flip.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-flip.pbtxt index b03109243455..0807d1d10d8d 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-flip.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-flip.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomFlip" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-height.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-height.pbtxt index 2cfb51b0eb9d..9ce1de081c0f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-height.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-height.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomHeight" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-rotation.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-rotation.pbtxt index 6335724e4784..df4e253ee924 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-rotation.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-rotation.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomRotation" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-translation.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-translation.pbtxt index 802d6fefb05c..97cbab083bbb 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-translation.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-translation.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomTranslation" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-width.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-width.pbtxt index e5cb35110730..2f566e0cf939 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-width.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-width.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomWidth" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-random-zoom.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-random-zoom.pbtxt index 3a7099acf4b2..9997add64fd2 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-random-zoom.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-random-zoom.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.RandomZoom" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt index 1687d54efa2a..831131154f98 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt index fd5601eddeb2..a401a54ae021 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-rescaling.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-rescaling.pbtxt index 238b54fb3e7e..2b52e5fa301f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-rescaling.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-rescaling.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt index 55b178a767a7..8af2743e9061 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-resizing.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-resizing.pbtxt index 3bf862774281..f04ecffd3a19 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-resizing.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-resizing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt index 730d65cdc6e7..6922c5910055 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt index bfa77c16d89c..b4d943239992 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index f91360016768..d21d6693bcc2 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index 94962ea83281..312c27f69b33 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -189,6 +193,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -221,6 +229,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index d0fae29f2f6c..20da793c2a37 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -229,6 +237,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -237,6 +249,10 @@ tf_class { name: "reset_recurrent_dropout_mask" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt index b5f215070dc0..60a8f5172402 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -216,6 +216,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -244,6 +248,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -280,10 +288,18 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt index b9be91a03f91..e8e05a00ece5 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index 606b7bc5f895..0f926be02b9b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index db4d2c885fc5..1bb81438fca3 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index 1137eac88299..f31ec33f7cfd 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-image-augmentation-layer.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-spectral-normalization.pbtxt similarity index 80% rename from keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-image-augmentation-layer.pbtxt rename to keras/api/golden/v2/tensorflow.keras.layers.-spectral-normalization.pbtxt index 8e7c54168a7c..72a04d40b891 100644 --- a/keras/api/golden/v1/tensorflow.keras.__internal__.layers.-base-image-augmentation-layer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-spectral-normalization.pbtxt @@ -1,21 +1,17 @@ -path: "tensorflow.keras.__internal__.layers.BaseImageAugmentationLayer" +path: "tensorflow.keras.layers.SpectralNormalization" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -134,7 +130,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'rate\', \'seed\'], varargs=None, keywords=kwargs, defaults=[\'1.0\', \'None\'], " + argspec: "args=[\'self\', \'layer\', \'power_iterations\'], varargs=None, keywords=kwargs, defaults=[\'1\'], " } member_method { name: "add_loss" @@ -156,29 +152,17 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "compute_mask" @@ -202,7 +186,11 @@ tf_class { } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } member_method { name: "get_config" @@ -233,13 +221,21 @@ tf_class { argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_weights" + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "normalize_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 1c7dda9c0dc6..747de047f96c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -163,6 +163,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'states\', \'constants\', \'training\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " @@ -191,6 +195,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -227,6 +235,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-string-lookup.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-string-lookup.pbtxt index d176221ddd2d..2b3d513fef9e 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-string-lookup.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-string-lookup.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -135,7 +135,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'max_tokens\', \'num_oov_indices\', \'mask_token\', \'oov_token\', \'vocabulary\', \'idf_weights\', \'encoding\', \'invert\', \'output_mode\', \'sparse\', \'pad_to_max_tokens\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'None\', \'[UNK]\', \'None\', \'None\', \'None\', \'False\', \'int\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'max_tokens\', \'num_oov_indices\', \'mask_token\', \'oov_token\', \'vocabulary\', \'idf_weights\', \'encoding\', \'invert\', \'output_mode\', \'sparse\', \'pad_to_max_tokens\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'None\', \'[UNK]\', \'None\', \'None\', \'utf-8\', \'False\', \'int\', \'False\', \'False\'], " } member_method { name: "adapt" @@ -165,6 +165,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -197,6 +201,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +241,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -241,6 +257,14 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_vocabulary" argspec: "args=[\'self\', \'vocabulary\', \'idf_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt index dec895ec98ee..d6bba621d770 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-text-vectorization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-text-vectorization.pbtxt index 00f3338c59fb..8824c0eac147 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-text-vectorization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-text-vectorization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -134,7 +134,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'max_tokens\', \'standardize\', \'split\', \'ngrams\', \'output_mode\', \'output_sequence_length\', \'pad_to_max_tokens\', \'vocabulary\', \'idf_weights\', \'sparse\', \'ragged\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'lower_and_strip_punctuation\', \'whitespace\', \'None\', \'int\', \'None\', \'False\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'max_tokens\', \'standardize\', \'split\', \'ngrams\', \'output_mode\', \'output_sequence_length\', \'pad_to_max_tokens\', \'vocabulary\', \'idf_weights\', \'sparse\', \'ragged\', \'encoding\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'lower_and_strip_punctuation\', \'whitespace\', \'None\', \'int\', \'None\', \'False\', \'None\', \'None\', \'False\', \'False\', \'utf-8\'], " } member_method { name: "adapt" @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +240,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -240,6 +256,14 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_vocabulary" argspec: "args=[\'self\', \'vocabulary\', \'idf_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index 9e04347d2a22..835f784b295f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt index 3e13ed5ab652..814d7168679b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-unit-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-unit-normalization.pbtxt index 0bcb985a0b59..ae5f06b382a7 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-unit-normalization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-unit-normalization.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt index c2f1d3d12cc2..ff61b890ceef 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 00cc45f498f3..383e28967517 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 89a07682e536..b2a2d89c1748 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt index a05086a1651d..149f9e61613f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt index eeb09f5a6a85..2ef8d53b6940 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt index 86805c95d9d0..5f5c510ec23f 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt index 1789d6ec811c..03fc8519bb09 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt index 82e611df04e5..0da8e034e5a8 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt index ba2ad738ee29..fb529f555a8c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt index b848b7bea001..63b1be08dc46 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,9 +156,13 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compute_mask" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt index 63f019cf6868..a741778c72dd 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'count_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt index a5358c4b811a..b2b7d584a5fc 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt index 06c517cf9c26..f61c4f82c5bb 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt index 071f3088f661..9a9602229b26 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashed-crossing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt index 5f9c8f541ac5..a608049a6d8a 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt index 5170b3b1fb65..d221e8bc29be 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -165,6 +165,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -197,6 +201,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +241,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -241,6 +257,14 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_vocabulary" argspec: "args=[\'self\', \'vocabulary\', \'idf_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt index c93956fe0e79..e6f797f63416 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -228,6 +236,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -236,6 +248,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt index ce3100e121f0..942ce222c3e9 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -163,6 +163,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -195,6 +199,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -227,6 +235,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt index 0c0ebcb55fa6..4a98b7dc741d 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomContrast" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt index bf2d56e3eb5b..ff0e93b7a3d7 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomCrop" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt index 025dd55fd6f2..dcd4bc07bb1c 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomFlip" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt index c5fa5143983f..2d5ada3de9cb 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomHeight" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt index 69b8e2a539b4..634d29f45055 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomRotation" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt index ad1098a6d246..bfb7693580b4 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomTranslation" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt index 6fcd5815b885..c2d3ef92be9a 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomWidth" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt index bc9cfaca33ff..ff3f05b1f9cc 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt @@ -1,22 +1,17 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomZoom" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { name: "activity_regularizer" mtype: "" } - member { - name: "auto_vectorize" - mtype: "" - } member { name: "compute_dtype" mtype: "" @@ -157,26 +152,14 @@ tf_class { name: "add_weight" argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " } - member_method { - name: "augment_bounding_boxes" - argspec: "args=[\'self\', \'image\', \'bounding_boxes\', \'transformation\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "augment_image" - argspec: "args=[\'self\', \'image\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_label" - argspec: "args=[\'self\', \'label\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "augment_target" - argspec: "args=[\'self\', \'target\', \'transformation\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], " @@ -205,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,14 +220,18 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_random_transformation" - argspec: "args=[\'self\', \'image\', \'label\', \'bounding_box\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt index fb98877a03cc..fdbab246741b 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt index 6135cdea2bbe..c11fb59691fb 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt index 076f8c3681ab..f7ee995f2eaa 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -135,7 +135,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'max_tokens\', \'num_oov_indices\', \'mask_token\', \'oov_token\', \'vocabulary\', \'idf_weights\', \'encoding\', \'invert\', \'output_mode\', \'sparse\', \'pad_to_max_tokens\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'None\', \'[UNK]\', \'None\', \'None\', \'None\', \'False\', \'int\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'max_tokens\', \'num_oov_indices\', \'mask_token\', \'oov_token\', \'vocabulary\', \'idf_weights\', \'encoding\', \'invert\', \'output_mode\', \'sparse\', \'pad_to_max_tokens\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'None\', \'[UNK]\', \'None\', \'None\', \'utf-8\', \'False\', \'int\', \'False\', \'False\'], " } member_method { name: "adapt" @@ -165,6 +165,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -197,6 +201,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +241,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -241,6 +257,14 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_vocabulary" argspec: "args=[\'self\', \'vocabulary\', \'idf_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt index 3e2f9b7e68b4..d9c28d3a36d7 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -134,7 +134,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'max_tokens\', \'standardize\', \'split\', \'ngrams\', \'output_mode\', \'output_sequence_length\', \'pad_to_max_tokens\', \'vocabulary\', \'idf_weights\', \'sparse\', \'ragged\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'lower_and_strip_punctuation\', \'whitespace\', \'None\', \'int\', \'None\', \'False\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'max_tokens\', \'standardize\', \'split\', \'ngrams\', \'output_mode\', \'output_sequence_length\', \'pad_to_max_tokens\', \'vocabulary\', \'idf_weights\', \'sparse\', \'ragged\', \'encoding\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'lower_and_strip_punctuation\', \'whitespace\', \'None\', \'int\', \'None\', \'False\', \'None\', \'None\', \'False\', \'False\', \'utf-8\'], " } member_method { name: "adapt" @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -196,6 +200,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +240,14 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_adapt_function" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -240,6 +256,14 @@ tf_class { name: "reset_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_assets" + argspec: "args=[\'self\', \'dir_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_vocabulary" argspec: "args=[\'self\', \'vocabulary\', \'idf_weights\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/keras/api/golden/v2/tensorflow.keras.layers.pbtxt b/keras/api/golden/v2/tensorflow.keras.layers.pbtxt index eef6e02c9efe..a2b218a4c0d5 100644 --- a/keras/api/golden/v2/tensorflow.keras.layers.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.layers.pbtxt @@ -256,10 +256,22 @@ tf_module { name: "GlobalMaxPooling3D" mtype: "" } + member { + name: "GroupNormalization" + mtype: "" + } + member { + name: "HashedCrossing" + mtype: "" + } member { name: "Hashing" mtype: "" } + member { + name: "Identity" + mtype: "" + } member { name: "InputLayer" mtype: "" @@ -460,6 +472,10 @@ tf_module { name: "SpatialDropout3D" mtype: "" } + member { + name: "SpectralNormalization" + mtype: "" + } member { name: "StackedRNNCells" mtype: "" @@ -538,7 +554,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "dot" @@ -558,7 +574,7 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'layer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'layer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "subtract" diff --git a/keras/api/golden/v2/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt b/keras/api/golden/v2/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt index 2c2a286f740e..ac49b8fc8701 100644 --- a/keras/api/golden/v2/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.losses.-binary-focal-crossentropy.pbtxt @@ -6,7 +6,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\', \'0.0\', \'-1\', \'auto\', \'binary_focal_crossentropy\'], " + argspec: "args=[\'self\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\', \'auto\', \'binary_focal_crossentropy\'], " } member_method { name: "call" diff --git a/keras/api/golden/v2/tensorflow.keras.losses.-categorical-focal-crossentropy.pbtxt b/keras/api/golden/v2/tensorflow.keras.losses.-categorical-focal-crossentropy.pbtxt new file mode 100644 index 000000000000..f06b44ec8765 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.losses.-categorical-focal-crossentropy.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.keras.losses.CategoricalFocalCrossentropy" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\', \'auto\', \'categorical_focal_crossentropy\'], " + } + member_method { + name: "call" + argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt b/keras/api/golden/v2/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt index 06d9cfe145ee..389b05c75d5d 100644 --- a/keras/api/golden/v2/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.losses.-sparse-categorical-crossentropy.pbtxt @@ -6,7 +6,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'from_logits\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'auto\', \'sparse_categorical_crossentropy\'], " + argspec: "args=[\'self\', \'from_logits\', \'ignore_class\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'auto\', \'sparse_categorical_crossentropy\'], " } member_method { name: "call" diff --git a/keras/api/golden/v2/tensorflow.keras.losses.pbtxt b/keras/api/golden/v2/tensorflow.keras.losses.pbtxt index e64d82d71eae..8fb5dcb54f79 100644 --- a/keras/api/golden/v2/tensorflow.keras.losses.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.losses.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "CategoricalCrossentropy" mtype: "" } + member { + name: "CategoricalFocalCrossentropy" + mtype: "" + } member { name: "CategoricalHinge" mtype: "" @@ -98,12 +102,16 @@ tf_module { } member_method { name: "binary_focal_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\', \'0.0\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " } member_method { name: "categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.0\', \'-1\'], " } + member_method { + name: "categorical_focal_crossentropy" + argspec: "args=[\'y_true\', \'y_pred\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " + } member_method { name: "categorical_hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -114,7 +122,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'name\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -186,11 +194,11 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'loss\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'loss\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "sparse_categorical_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\', \'ignore_class\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\', \'None\'], " } member_method { name: "squared_hinge" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt index 35f9a429b865..171da23f3bc1 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt @@ -1,11 +1,11 @@ path: "tensorflow.keras.metrics.AUC" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -160,6 +160,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -188,6 +192,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -224,6 +232,10 @@ tf_class { name: "interpolate_pr_auc" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -240,6 +252,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt index 26fbd0b585bf..863b948441e9 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.Accuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt index 789c93e9c821..4b8759cf7628 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.BinaryAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt index 4e88a2ad5ddd..16228d4229f2 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.BinaryCrossentropy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-binary-io-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-binary-io-u.pbtxt index 590f84d1e583..49e4ac2946e7 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-binary-io-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-binary-io-u.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.BinaryIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt index f910dc4b0696..c56abceaeb13 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt index 27abc004b332..92d50ec7a5f1 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CategoricalCrossentropy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -133,7 +133,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'categorical_crossentropy\', \'None\', \'False\', \'0\'], " + argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'categorical_crossentropy\', \'None\', \'False\', \'0\', \'-1\'], " } member_method { name: "add_loss" @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt index 4bb20d940f1f..f4386171e6f5 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CategoricalHinge" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-cosine-similarity.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-cosine-similarity.pbtxt index 0dbf94fa93df..221cbe34edd0 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-cosine-similarity.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-cosine-similarity.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.CosineSimilarity" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-f-beta-score.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-f-beta-score.pbtxt new file mode 100644 index 000000000000..37847a1f933d --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-f-beta-score.pbtxt @@ -0,0 +1,263 @@ +path: "tensorflow.keras.metrics.FBetaScore" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'average\', \'beta\', \'threshold\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'None\', \'fbeta_score\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'y_true_shape\', \'y_pred_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-f1-score.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-f1-score.pbtxt new file mode 100644 index 000000000000..56d233b0b5fc --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-f1-score.pbtxt @@ -0,0 +1,264 @@ +path: "tensorflow.keras.metrics.F1Score" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'average\', \'threshold\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'f1_score\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'y_true_shape\', \'y_pred_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt index ad1ffb7d5e1d..12518c046e4d 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.FalseNegatives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt index 0dfa8b5ee1a6..d3a260bc7f5f 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.FalsePositives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt index b9ef8b808189..c01adca8b432 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.Hinge" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-io-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-io-u.pbtxt index c8e3cac66dac..3b3e4ed1e707 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-io-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-io-u.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.IoU" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -131,7 +131,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_true\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'True\', \'-1\'], " } member_method { name: "add_loss" @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt index 2c31b5fccac2..8fe4028c968d 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.KLDivergence" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt index 81ff9033cdac..862a2c127f69 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.LogCoshError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt index 50832f259e8d..4db047358108 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanAbsoluteError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt index dfc975031555..c1a4285ba95d 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanAbsolutePercentageError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt index a2c1fbea4afa..eb8b2c471f44 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.MeanIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -132,7 +132,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_true\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'True\', \'-1\'], " } member_method { name: "add_loss" @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt index 951c151fdc79..d84345e14e31 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-metric-wrapper.pbtxt @@ -6,8 +6,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt index 10b3a82a0c8c..697c4e0bb74b 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.MeanRelativeError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt index ec4d424986b5..ceb5282f0746 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanSquaredError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt index ecfebc72ad3b..2d5cf64c2c3d 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.MeanSquaredLogarithmicError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt index a4ee5fc8e909..6e8ba1767c97 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -164,6 +164,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -192,6 +196,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -224,6 +232,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -240,6 +252,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt index 80d830fb7efc..c31d49e14b7f 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt index 905c92a33ab9..916ae93096e5 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -155,6 +155,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -183,6 +187,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -215,6 +223,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -231,6 +243,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-io-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-io-u.pbtxt index 853ae3bcf38e..23fd50224c5c 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-io-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-io-u.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.OneHotIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -132,7 +132,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'target_class_ids\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'-1\'], " } member_method { name: "add_loss" @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt index e20224e9b14e..98b63a62da97 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-one-hot-mean-io-u.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.OneHotMeanIoU" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -133,7 +133,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\', \'ignore_class\', \'sparse_y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'-1\'], " } member_method { name: "add_loss" @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt index 29ccceda1abe..1d5f8c6efcb7 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.Poisson" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-precision-at-recall.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-precision-at-recall.pbtxt index ec505dc742e9..21f1c36bdc1b 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-precision-at-recall.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-precision-at-recall.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.PrecisionAtRecall" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt index fe1822fc8d53..d9c49540edcb 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt @@ -1,11 +1,11 @@ path: "tensorflow.keras.metrics.Precision" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +244,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-r2-score.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-r2-score.pbtxt new file mode 100644 index 000000000000..1e76ffb29ad4 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-r2-score.pbtxt @@ -0,0 +1,263 @@ +path: "tensorflow.keras.metrics.R2Score" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'class_aggregation\', \'num_regressors\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'uniform_average\', \'0\', \'r2_score\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'y_true_shape\', \'y_pred_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-recall-at-precision.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-recall-at-precision.pbtxt index e8ab0f6ce1c6..5aa668718b0e 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-recall-at-precision.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-recall-at-precision.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.RecallAtPrecision" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt index 52e9879a3446..e7c4864a1bbd 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt @@ -1,11 +1,11 @@ path: "tensorflow.keras.metrics.Recall" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -156,6 +156,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -184,6 +188,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -216,6 +224,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +244,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt index cd99b1e8e29e..64671f63b4c0 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt @@ -1,13 +1,13 @@ path: "tensorflow.keras.metrics.RootMeanSquaredError" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -158,6 +158,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -186,6 +190,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -218,6 +226,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -234,6 +246,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt index 0da727a14110..9b35e4f14197 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.SensitivityAtSpecificity" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt index d47d06739b2a..d960b99eccb4 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SparseCategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt index 4fdc705aa389..c5bd4c6f59db 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SparseCategoricalCrossentropy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -133,7 +133,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_crossentropy\', \'None\', \'False\', \'-1\'], " + argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'ignore_class\', \'axis\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_crossentropy\', \'None\', \'False\', \'None\', \'-1\'], " } member_method { name: "add_loss" @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt index dd386c6cba5d..069a3e3b2727 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SparseTopKCategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt index 15dfa9412558..9f42d1f0b3c2 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.SpecificityAtSensitivity" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt index 0f76c4a43b47..83437f332258 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.SquaredHinge" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt index ccd3ac0c8752..6cb46d1f93e4 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt index dd26258eb1bb..6355e88e1858 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt @@ -1,14 +1,14 @@ path: "tensorflow.keras.metrics.TopKCategoricalAccuracy" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -159,6 +159,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -187,6 +191,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -219,6 +227,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -235,6 +247,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt index af0fb7936462..95bc523abd0c 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.TrueNegatives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt index 0e1124fbc296..863fb2911873 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt @@ -1,12 +1,12 @@ path: "tensorflow.keras.metrics.TruePositives" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -157,6 +157,10 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" @@ -185,6 +189,10 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -217,6 +225,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_state" argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +245,10 @@ tf_class { name: "result" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.experimental.-py-metric.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.experimental.-py-metric.pbtxt new file mode 100644 index 000000000000..468898868b32 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.metrics.experimental.-py-metric.pbtxt @@ -0,0 +1,263 @@ +path: "tensorflow.keras.metrics.experimental.PyMetric" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregationV2.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "merge_state" + argspec: "args=[\'self\', \'metrics\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_state" + argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.experimental.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.experimental.pbtxt new file mode 100644 index 000000000000..f5614c4b76ae --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.metrics.experimental.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.keras.metrics.experimental" +tf_module { + member { + name: "PyMetric" + mtype: "" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.metrics.pbtxt b/keras/api/golden/v2/tensorflow.keras.metrics.pbtxt index f05d1a6f89c5..1ab81fed6868 100644 --- a/keras/api/golden/v2/tensorflow.keras.metrics.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.metrics.pbtxt @@ -36,6 +36,14 @@ tf_module { name: "CosineSimilarity" mtype: "" } + member { + name: "F1Score" + mtype: "" + } + member { + name: "FBetaScore" + mtype: "" + } member { name: "FalseNegatives" mtype: "" @@ -120,6 +128,10 @@ tf_module { name: "PrecisionAtRecall" mtype: "" } + member { + name: "R2Score" + mtype: "" + } member { name: "Recall" mtype: "" @@ -172,6 +184,10 @@ tf_module { name: "TruePositives" mtype: "" } + member { + name: "experimental" + mtype: "" + } member_method { name: "KLD" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -202,7 +218,7 @@ tf_module { } member_method { name: "binary_focal_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'2.0\', \'False\', \'0.0\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'apply_class_balancing\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " } member_method { name: "categorical_accuracy" @@ -212,9 +228,13 @@ tf_module { name: "categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'0.0\', \'-1\'], " } + member_method { + name: "categorical_focal_crossentropy" + argspec: "args=[\'y_true\', \'y_pred\', \'alpha\', \'gamma\', \'from_logits\', \'label_smoothing\', \'axis\'], varargs=None, keywords=None, defaults=[\'0.25\', \'2.0\', \'False\', \'0.0\', \'-1\'], " + } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -282,7 +302,7 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'metric\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "sparse_categorical_accuracy" @@ -290,7 +310,7 @@ tf_module { } member_method { name: "sparse_categorical_crossentropy" - argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], " + argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\', \'ignore_class\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\', \'None\'], " } member_method { name: "sparse_top_k_categorical_accuracy" diff --git a/keras/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/keras/api/golden/v2/tensorflow.keras.models.-model.pbtxt index 8f0115b30ac0..af5a892ca740 100644 --- a/keras/api/golden/v2/tensorflow.keras.models.-model.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.models.-model.pbtxt @@ -3,8 +3,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -12,10 +12,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -52,6 +60,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -112,6 +124,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -176,13 +192,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -216,6 +240,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -232,6 +260,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -252,6 +288,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -264,13 +304,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -310,7 +358,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -326,7 +378,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/keras/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt index 8b85b77488b4..a6f046c2e06a 100644 --- a/keras/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt @@ -5,8 +5,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -14,10 +14,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -54,6 +62,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -114,6 +126,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -182,13 +198,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -222,6 +246,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -238,6 +266,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -258,6 +294,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -270,13 +310,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -320,7 +368,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -336,7 +388,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.models.experimental.-sharpness-aware-minimization.pbtxt b/keras/api/golden/v2/tensorflow.keras.models.experimental.-sharpness-aware-minimization.pbtxt index 84a7524beb47..65e117c4573c 100644 --- a/keras/api/golden/v2/tensorflow.keras.models.experimental.-sharpness-aware-minimization.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.models.experimental.-sharpness-aware-minimization.pbtxt @@ -4,8 +4,8 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -13,10 +13,18 @@ tf_class { name: "activity_regularizer" mtype: "" } + member { + name: "autotune_steps_per_execution" + mtype: "" + } member { name: "compute_dtype" mtype: "" } + member { + name: "distribute_reduction_method" + mtype: "" + } member { name: "distribute_strategy" mtype: "" @@ -53,6 +61,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "jit_compile" + mtype: "" + } member { name: "layers" mtype: "" @@ -113,6 +125,10 @@ tf_class { name: "stateful" mtype: "" } + member { + name: "steps_per_execution" + mtype: "" + } member { name: "submodules" mtype: "" @@ -177,13 +193,21 @@ tf_class { name: "build" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'weighted_metrics\', \'run_eagerly\', \'steps_per_execution\', \'jit_compile\', \'pss_evaluation_shards\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0\'], " + } + member_method { + name: "compile_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_loss" @@ -217,6 +241,10 @@ tf_class { name: "evaluate_generator" argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " } + member_method { + name: "export" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize_state" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -233,6 +261,14 @@ tf_class { name: "from_config" argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_compile_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" @@ -253,6 +289,10 @@ tf_class { name: "get_layer" argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "get_metrics_result" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -265,13 +305,21 @@ tf_class { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weight_paths" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_weights" - argspec: "args=[\'self\', \'filepath\', \'by_name\', \'skip_mismatch\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " + argspec: "args=[\'self\', \'filepath\', \'skip_mismatch\', \'by_name\', \'options\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], " } member_method { name: "make_predict_function" @@ -311,7 +359,11 @@ tf_class { } member_method { name: "save" - argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "save_spec" @@ -327,7 +379,7 @@ tf_class { } member_method { name: "summary" - argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\', \'expand_nested\', \'show_trainable\', \'layer_range\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'False\', \'None\'], " } member_method { name: "test_on_batch" diff --git a/keras/api/golden/v2/tensorflow.keras.models.pbtxt b/keras/api/golden/v2/tensorflow.keras.models.pbtxt index a12db424d210..49ba3fbf4642 100644 --- a/keras/api/golden/v2/tensorflow.keras.models.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.models.pbtxt @@ -18,7 +18,7 @@ tf_module { } member_method { name: "load_model" - argspec: "args=[\'filepath\', \'custom_objects\', \'compile\', \'options\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\'], " + argspec: "args=[\'filepath\', \'custom_objects\', \'compile\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\', \'True\'], " } member_method { name: "model_from_config" @@ -34,6 +34,6 @@ tf_module { } member_method { name: "save_model" - argspec: "args=[\'model\', \'filepath\', \'overwrite\', \'include_optimizer\', \'save_format\', \'signatures\', \'options\', \'save_traces\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'None\', \'None\', \'None\', \'True\'], " + argspec: "args=[\'model\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt index d0856c75be4a..bc24d928cb41 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.Adadelta" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'Adadelta\'], " + argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adadelta\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-adafactor.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-adafactor.pbtxt new file mode 100644 index 000000000000..fb3952d2b260 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-adafactor.pbtxt @@ -0,0 +1,89 @@ +path: "tensorflow.keras.optimizers.Adafactor" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "iterations" + mtype: "" + } + member { + name: "learning_rate" + mtype: "" + } + member { + name: "lr" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'learning_rate\', \'beta_2_decay\', \'epsilon_1\', \'epsilon_2\', \'clip_threshold\', \'relative_step\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'-0.8\', \'1e-30\', \'0.001\', \'1.0\', \'True\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adafactor\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " + } + member_method { + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "minimize" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt index 17f68fd67db0..4e6b8a67982b 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.Adagrad" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'Adagrad\'], " + argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adagrad\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-adam-w.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-adam-w.pbtxt new file mode 100644 index 000000000000..12b1548926be --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-adam-w.pbtxt @@ -0,0 +1,89 @@ +path: "tensorflow.keras.optimizers.AdamW" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "iterations" + mtype: "" + } + member { + name: "learning_rate" + mtype: "" + } + member { + name: "lr" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'learning_rate\', \'weight_decay\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.004\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'AdamW\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " + } + member_method { + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "minimize" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt index 74fa9869ad54..978f3b874892 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.Adam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'Adam\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adam\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt index ae0d88760eb5..302da145cd5d 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.Adamax" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Adamax\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adamax\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt index 2cfd1ca6b71c..be804558c675 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.Ftrl" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'name\', \'l2_shrinkage_regularization_strength\', \'beta\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'Ftrl\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'l2_shrinkage_regularization_strength\', \'beta\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Ftrl\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-lion.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-lion.pbtxt new file mode 100644 index 000000000000..5d4faf4150be --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-lion.pbtxt @@ -0,0 +1,89 @@ +path: "tensorflow.keras.optimizers.Lion" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "iterations" + mtype: "" + } + member { + name: "learning_rate" + mtype: "" + } + member { + name: "lr" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.0001\', \'0.9\', \'0.99\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Lion\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " + } + member_method { + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "minimize" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt index 2d18b1b4774b..b6c91c10e99d 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.Nadam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Nadam\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Nadam\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt index 5a9d33eea359..d30f25489a37 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -1,82 +1,88 @@ path: "tensorflow.keras.optimizers.Optimizer" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'name\', \'gradient_aggregator\', \'gradient_transformers\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'0\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index d53b8c656ddc..9bcb35ea798a 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.RMSprop" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'RMSprop\'], " + argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'100\', \'True\', \'RMSprop\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt index f354c71298ce..73dc46d85980 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -1,83 +1,89 @@ path: "tensorflow.keras.optimizers.SGD" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" + name: "iterations" mtype: "" } member { - name: "global_clipnorm" + name: "learning_rate" mtype: "" } member { - name: "iterations" + name: "lr" mtype: "" } member { - name: "weights" + name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'False\', \'SGD\'], " + argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'SGD\'], " } member_method { - name: "add_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\', \'shape\'], varargs=None, keywords=None, defaults=[\'zeros\', \'None\'], " + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " } member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_gradients" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_slot" - argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None" + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { - name: "get_slot_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "get_updates" - argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None" + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "get_weights" + name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adadelta.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adadelta.pbtxt index d9b8cf3c3065..2ada86ac054e 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adadelta.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adadelta.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.Adadelta" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adadelta\'], " + argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adadelta\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adafactor.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adafactor.pbtxt new file mode 100644 index 000000000000..30a77095af10 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adafactor.pbtxt @@ -0,0 +1,89 @@ +path: "tensorflow.keras.optimizers.experimental.Adafactor" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "iterations" + mtype: "" + } + member { + name: "learning_rate" + mtype: "" + } + member { + name: "lr" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'learning_rate\', \'beta_2_decay\', \'epsilon_1\', \'epsilon_2\', \'clip_threshold\', \'relative_step\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'-0.8\', \'1e-30\', \'0.001\', \'1.0\', \'True\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adafactor\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'shape\', \'dtype\', \'initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\'], " + } + member_method { + name: "add_variable_from_reference" + argspec: "args=[\'self\', \'model_variable\', \'variable_name\', \'shape\', \'initial_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "aggregate_gradients" + argspec: "args=[\'self\', \'grads_and_vars\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "apply_gradients" + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_gradients" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "finalize_variable_values" + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "minimize" + argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adagrad.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adagrad.pbtxt index 222cc5cb0621..bcdc12926a78 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adagrad.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adagrad.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.Adagrad" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adagrad\'], " + argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adagrad\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'grad\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam-w.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam-w.pbtxt index 496446ac3c60..240e92cf9621 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam-w.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam-w.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.AdamW" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,26 +18,6 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" @@ -61,11 +40,11 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" - argspec: "args=[\'self\', \'var_list\', \'exclude_from_weight_decay\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "compute_gradients" @@ -73,7 +52,7 @@ tf_class { } member_method { name: "exclude_from_weight_decay" - argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "finalize_variable_values" @@ -81,22 +60,30 @@ tf_class { } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam.pbtxt index b468f301f986..a36751778545 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adam.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.Adam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adam\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adam\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adamax.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adamax.pbtxt index 2421170c4641..f8b070a6b707 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adamax.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-adamax.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.Adamax" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adamax\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Adamax\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-ftrl.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-ftrl.pbtxt index 096106ba41d4..892d407e86ed 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-ftrl.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-ftrl.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.Ftrl" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'l2_shrinkage_regularization_strength\', \'beta\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Ftrl\'], " + argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'l2_shrinkage_regularization_strength\', \'beta\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Ftrl\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-nadam.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-nadam.pbtxt index d6b8adfcc788..887e8bb52784 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-nadam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-nadam.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.Nadam" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Nadam\'], " + argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'Nadam\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-optimizer.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-optimizer.pbtxt index f8add2a3e1e9..f4a84d454881 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-optimizer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-optimizer.pbtxt @@ -1,10 +1,9 @@ path: "tensorflow.keras.optimizers.experimental.Optimizer" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -18,33 +17,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'name\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'name\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\'], varargs=None, keywords=kwargs, defaults=[\'0\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\'], " } member_method { name: "add_variable" @@ -60,7 +39,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -70,28 +49,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-r-m-sprop.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-r-m-sprop.pbtxt index 1c5325a505e5..c8998cffcf40 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-r-m-sprop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-r-m-sprop.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.RMSprop" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'100\', \'True\', \'RMSprop\'], " + argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'100\', \'True\', \'RMSprop\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-s-g-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-s-g-d.pbtxt index 3ac6cdda7aa6..7a73dc7f4238 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-s-g-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.-s-g-d.pbtxt @@ -1,11 +1,10 @@ path: "tensorflow.keras.optimizers.experimental.SGD" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "iterations" @@ -19,33 +18,13 @@ tf_class { name: "lr" mtype: "" } - member { - name: "name" - mtype: "" - } - member { - name: "name_scope" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "submodules" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } member { name: "variables" mtype: "" } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'amsgrad\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'False\', \'False\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'SGD\'], " + argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'weight_decay\', \'clipnorm\', \'clipvalue\', \'global_clipnorm\', \'use_ema\', \'ema_momentum\', \'ema_overwrite_frequency\', \'jit_compile\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'False\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0.99\', \'None\', \'True\', \'SGD\'], " } member_method { name: "add_variable" @@ -61,7 +40,7 @@ tf_class { } member_method { name: "apply_gradients" - argspec: "args=[\'self\', \'grads_and_vars\', \'skip_gradients_aggregation\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'skip_gradients_aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "build" @@ -71,28 +50,40 @@ tf_class { name: "compute_gradients" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "exclude_from_weight_decay" + argspec: "args=[\'self\', \'var_list\', \'var_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "finalize_variable_values" argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "minimize" argspec: "args=[\'self\', \'loss\', \'var_list\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "update_step" - argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } member_method { - name: "with_name_scope" - argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + name: "update_step" + argspec: "args=[\'self\', \'gradient\', \'variable\'], varargs=None, keywords=None, defaults=None" } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.pbtxt index 95a90dcaea0a..9d9f9cfe72da 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.experimental.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "Adadelta" mtype: "" } + member { + name: "Adafactor" + mtype: "" + } member { name: "Adagrad" mtype: "" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt index 9c1b406a1d6f..05ae2888d367 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adadelta.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adadelta" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt index 736ee08e4efb..507148f08dbb 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adagrad.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adagrad" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adam.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adam.pbtxt index 7d0d3b23614c..d79093442bd9 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adam.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adam" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adamax.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adamax.pbtxt index 149d0f213893..b18db03163b8 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adamax.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-adamax.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Adamax" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt index 9ce47c161678..b852c98df0e6 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-ftrl.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Ftrl" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-nadam.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-nadam.pbtxt index 8a612f6b89b2..ef505faade82 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-nadam.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-nadam.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.Nadam" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt index 6b4bf1701f22..f28c01037044 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-optimizer.pbtxt @@ -1,8 +1,7 @@ path: "tensorflow.keras.optimizers.legacy.Optimizer" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt index 77a6e72a9411..f53b0568fe11 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-r-m-sprop.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.RMSprop" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt index f6a6dd836e72..ab1041592075 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.legacy.-s-g-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.optimizers.legacy.SGD" tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "clipnorm" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.pbtxt index f12ace047ee2..00b8c8fd3425 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "Adadelta" mtype: "" } + member { + name: "Adafactor" + mtype: "" + } member { name: "Adagrad" mtype: "" @@ -12,6 +16,10 @@ tf_module { name: "Adam" mtype: "" } + member { + name: "AdamW" + mtype: "" + } member { name: "Adamax" mtype: "" @@ -20,6 +28,10 @@ tf_module { name: "Ftrl" mtype: "" } + member { + name: "Lion" + mtype: "" + } member { name: "Nadam" mtype: "" @@ -50,14 +62,14 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'False\'], " } member_method { name: "get" - argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'identifier\'], varargs=None, keywords=kwargs, defaults=None" } member_method { name: "serialize" - argspec: "args=[\'optimizer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'optimizer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt index 13a711fe288b..6df561f3342e 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.-cosine-decay.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], " + argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\', \'warmup_target\', \'warmup_steps\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'None\', \'0\'], " } member_method { name: "from_config" diff --git a/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.pbtxt b/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.pbtxt index 3ecc437199f6..8ed0edccf925 100644 --- a/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.optimizers.schedules.pbtxt @@ -30,10 +30,10 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "serialize" - argspec: "args=[\'learning_rate_schedule\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'learning_rate_schedule\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v2/tensorflow.keras.pbtxt b/keras/api/golden/v2/tensorflow.keras.pbtxt index cdaeea7f8244..c080bc27539a 100644 --- a/keras/api/golden/v2/tensorflow.keras.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.pbtxt @@ -44,6 +44,10 @@ tf_module { name: "experimental" mtype: "" } + member { + name: "export" + mtype: "" + } # Placeholder for internal API member { name: "initializers" @@ -82,11 +86,11 @@ tf_module { mtype: "" } member { - name: "utils" + name: "saving" mtype: "" } member { - name: "wrappers" + name: "utils" mtype: "" } member_method { diff --git a/keras/api/golden/v2/tensorflow.keras.regularizers.pbtxt b/keras/api/golden/v2/tensorflow.keras.regularizers.pbtxt index 48f1ec4fa1b7..7272c0fb6702 100644 --- a/keras/api/golden/v2/tensorflow.keras.regularizers.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.regularizers.pbtxt @@ -34,7 +34,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " } member_method { name: "get" @@ -46,6 +46,6 @@ tf_module { } member_method { name: "serialize" - argspec: "args=[\'regularizer\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'regularizer\', \'use_legacy_format\'], varargs=None, keywords=None, defaults=[\'False\'], " } } diff --git a/keras/api/golden/v2/tensorflow.keras.saving.custom_object_scope.pbtxt b/keras/api/golden/v2/tensorflow.keras.saving.custom_object_scope.pbtxt new file mode 100644 index 000000000000..cf877e5ae4dd --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.saving.custom_object_scope.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.keras.saving.custom_object_scope" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=args, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.saving.pbtxt b/keras/api/golden/v2/tensorflow.keras.saving.pbtxt new file mode 100644 index 000000000000..e1df1e64293c --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.saving.pbtxt @@ -0,0 +1,39 @@ +path: "tensorflow.keras.saving" +tf_module { + member { + name: "custom_object_scope" + mtype: "" + } + member_method { + name: "deserialize_keras_object" + argspec: "args=[\'config\', \'custom_objects\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\'], " + } + member_method { + name: "get_custom_objects" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_registered_name" + argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_registered_object" + argspec: "args=[\'name\', \'custom_objects\', \'module_objects\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "load_model" + argspec: "args=[\'filepath\', \'custom_objects\', \'compile\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\', \'True\'], " + } + member_method { + name: "register_keras_serializable" + argspec: "args=[\'package\', \'name\'], varargs=None, keywords=None, defaults=[\'Custom\', \'None\'], " + } + member_method { + name: "save_model" + argspec: "args=[\'model\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], " + } + member_method { + name: "serialize_keras_object" + argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.utils.-custom-object-scope.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.-custom-object-scope.pbtxt index 9e9370be68f8..3ccf719d8c8c 100644 --- a/keras/api/golden/v2/tensorflow.keras.utils.-custom-object-scope.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.utils.-custom-object-scope.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.utils.CustomObjectScope" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.utils.-feature-space.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.-feature-space.pbtxt new file mode 100644 index 000000000000..1ae0313d8ecd --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.utils.-feature-space.pbtxt @@ -0,0 +1,298 @@ +path: "tensorflow.keras.utils.FeatureSpace" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "compute_dtype" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "dtype_policy" + mtype: "" + } + member { + name: "dynamic" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "input_spec" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "metrics" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "stateful" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "supports_masking" + mtype: "" + } + member { + name: "trainable" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variable_dtype" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'features\', \'output_mode\', \'crosses\', \'crossing_dim\', \'hashing_dim\', \'num_discretization_bins\'], varargs=None, keywords=None, defaults=[\'concat\', \'None\', \'32\', \'32\', \'32\'], " + } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_metric" + argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregationV2.NONE\'], " + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "build_from_config" + argspec: "args=[\'self\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "compute_output_signature" + argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "cross" + argspec: "args=[\'cls\', \'feature_names\', \'crossing_dim\', \'output_mode\'], varargs=None, keywords=None, defaults=[\'one_hot\'], " + } + member_method { + name: "feature" + argspec: "args=[\'cls\', \'dtype\', \'preprocessor\', \'output_mode\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "finalize_state" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "float" + argspec: "args=[\'cls\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "float_discretized" + argspec: "args=[\'cls\', \'num_bins\', \'bin_boundaries\', \'output_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'one_hot\', \'None\'], " + } + member_method { + name: "float_normalized" + argspec: "args=[\'cls\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "float_rescaled" + argspec: "args=[\'cls\', \'scale\', \'offset\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'0.0\', \'None\'], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_build_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_encoded_features" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_inputs" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "integer_categorical" + argspec: "args=[\'cls\', \'max_tokens\', \'num_oov_indices\', \'output_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'one_hot\', \'None\'], " + } + member_method { + name: "integer_hashed" + argspec: "args=[\'cls\', \'num_bins\', \'output_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'one_hot\', \'None\'], " + } + member_method { + name: "load_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save" + argspec: "args=[\'self\', \'filepath\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save_own_variables" + argspec: "args=[\'self\', \'store\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "string_categorical" + argspec: "args=[\'cls\', \'max_tokens\', \'num_oov_indices\', \'output_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'one_hot\', \'None\'], " + } + member_method { + name: "string_hashed" + argspec: "args=[\'cls\', \'num_bins\', \'output_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'one_hot\', \'None\'], " + } + member_method { + name: "with_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.utils.-sidecar-evaluator.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.-sidecar-evaluator.pbtxt index 4161e90e916b..1d3a83fa52eb 100644 --- a/keras/api/golden/v2/tensorflow.keras.utils.-sidecar-evaluator.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.utils.-sidecar-evaluator.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.utils.SidecarEvaluator" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.utils.-steps-per-execution-tuner.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.-steps-per-execution-tuner.pbtxt new file mode 100644 index 000000000000..1363d2190e1e --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.utils.-steps-per-execution-tuner.pbtxt @@ -0,0 +1,21 @@ +path: "tensorflow.keras.utils.StepsPerExecutionTuner" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "steps_per_execution" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'optimizer\', \'spe_variable\', \'interval\', \'change_spe_interval\', \'change_threshold\'], varargs=None, keywords=None, defaults=[\'5\', \'10\', \'0.1\'], " + } + member_method { + name: "start" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "stop" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.utils.-timed-thread.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.-timed-thread.pbtxt new file mode 100644 index 000000000000..bd3947c59a52 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.utils.-timed-thread.pbtxt @@ -0,0 +1,25 @@ +path: "tensorflow.keras.utils.TimedThread" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'interval\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "is_alive" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "on_interval" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "start" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "stop" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.utils.custom_object_scope.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.custom_object_scope.pbtxt index 4fa8c7af04e4..08f84e0f825f 100644 --- a/keras/api/golden/v2/tensorflow.keras.utils.custom_object_scope.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.utils.custom_object_scope.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.utils.custom_object_scope" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/keras/api/golden/v2/tensorflow.keras.utils.legacy.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.legacy.pbtxt new file mode 100644 index 000000000000..267629bf49c2 --- /dev/null +++ b/keras/api/golden/v2/tensorflow.keras.utils.legacy.pbtxt @@ -0,0 +1,11 @@ +path: "tensorflow.keras.utils.legacy" +tf_module { + member_method { + name: "deserialize_keras_object" + argspec: "args=[\'identifier\', \'module_objects\', \'custom_objects\', \'printable_module_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'object\'], " + } + member_method { + name: "serialize_keras_object" + argspec: "args=[\'instance\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/keras/api/golden/v2/tensorflow.keras.utils.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.pbtxt index 18dc92498862..b084948598ba 100644 --- a/keras/api/golden/v2/tensorflow.keras.utils.pbtxt +++ b/keras/api/golden/v2/tensorflow.keras.utils.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "CustomObjectScope" mtype: "" } + member { + name: "FeatureSpace" + mtype: "" + } member { name: "GeneratorEnqueuer" mtype: "" @@ -28,6 +32,14 @@ tf_module { name: "SidecarEvaluator" mtype: "" } + member { + name: "StepsPerExecutionTuner" + mtype: "" + } + member { + name: "TimedThread" + mtype: "" + } member { name: "custom_object_scope" mtype: "" @@ -36,6 +48,10 @@ tf_module { name: "experimental" mtype: "" } + member { + name: "legacy" + mtype: "" + } member_method { name: "array_to_img" argspec: "args=[\'x\', \'data_format\', \'scale\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\'], " @@ -46,7 +62,7 @@ tf_module { } member_method { name: "deserialize_keras_object" - argspec: "args=[\'identifier\', \'module_objects\', \'custom_objects\', \'printable_module_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'object\'], " + argspec: "args=[\'config\', \'custom_objects\', \'safe_mode\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'True\'], " } member_method { name: "disable_interactive_logging" @@ -94,7 +110,7 @@ tf_module { } member_method { name: "model_to_dot" - argspec: "args=[\'model\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'subgraph\', \'layer_range\', \'show_layer_activations\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'False\', \'None\', \'False\'], " + argspec: "args=[\'model\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'subgraph\', \'layer_range\', \'show_layer_activations\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'False\', \'None\', \'False\', \'False\'], " } member_method { name: "normalize" @@ -110,7 +126,7 @@ tf_module { } member_method { name: "plot_model" - argspec: "args=[\'model\', \'to_file\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'layer_range\', \'show_layer_activations\'], varargs=None, keywords=None, defaults=[\'model.png\', \'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'None\', \'False\'], " + argspec: "args=[\'model\', \'to_file\', \'show_shapes\', \'show_dtype\', \'show_layer_names\', \'rankdir\', \'expand_nested\', \'dpi\', \'layer_range\', \'show_layer_activations\', \'show_trainable\'], varargs=None, keywords=None, defaults=[\'model.png\', \'False\', \'False\', \'True\', \'TB\', \'False\', \'96\', \'None\', \'False\', \'False\'], " } member_method { name: "register_keras_serializable" @@ -122,7 +138,7 @@ tf_module { } member_method { name: "serialize_keras_object" - argspec: "args=[\'instance\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_random_seed" @@ -144,8 +160,16 @@ tf_module { name: "to_categorical" argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], " } + member_method { + name: "to_ordinal" + argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], " + } member_method { name: "unpack_x_y_sample_weight" argspec: "args=[\'data\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "warmstart_embedding_matrix" + argspec: "args=[\'base_vocabulary\', \'new_vocabulary\', \'base_embeddings\', \'new_embeddings_initializer\'], varargs=None, keywords=None, defaults=[\'uniform\'], " + } } diff --git a/keras/api/golden/v2/tensorflow.keras.wrappers.pbtxt b/keras/api/golden/v2/tensorflow.keras.wrappers.pbtxt deleted file mode 100644 index 0b2fac9b7d99..000000000000 --- a/keras/api/golden/v2/tensorflow.keras.wrappers.pbtxt +++ /dev/null @@ -1,7 +0,0 @@ -path: "tensorflow.keras.wrappers" -tf_module { - member { - name: "scikit_learn" - mtype: "" - } -} diff --git a/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt b/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt deleted file mode 100644 index 180e05527f31..000000000000 --- a/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt +++ /dev/null @@ -1,42 +0,0 @@ -path: "tensorflow.keras.wrappers.scikit_learn.KerasClassifier" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member_method { - name: "__init__" - argspec: "args=[\'self\', \'build_fn\'], varargs=None, keywords=sk_params, defaults=[\'None\'], " - } - member_method { - name: "check_params" - argspec: "args=[\'self\', \'params\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "filter_sk_params" - argspec: "args=[\'self\', \'fn\', \'override\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "fit" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "get_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } - member_method { - name: "predict" - argspec: "args=[\'self\', \'x\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "predict_proba" - argspec: "args=[\'self\', \'x\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "score" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "set_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } -} diff --git a/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt b/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt deleted file mode 100644 index 0dfc03fb05e5..000000000000 --- a/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt +++ /dev/null @@ -1,38 +0,0 @@ -path: "tensorflow.keras.wrappers.scikit_learn.KerasRegressor" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member_method { - name: "__init__" - argspec: "args=[\'self\', \'build_fn\'], varargs=None, keywords=sk_params, defaults=[\'None\'], " - } - member_method { - name: "check_params" - argspec: "args=[\'self\', \'params\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "filter_sk_params" - argspec: "args=[\'self\', \'fn\', \'override\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "fit" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "get_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } - member_method { - name: "predict" - argspec: "args=[\'self\', \'x\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "score" - argspec: "args=[\'self\', \'x\', \'y\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "set_params" - argspec: "args=[\'self\'], varargs=None, keywords=params, defaults=None" - } -} diff --git a/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.pbtxt b/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.pbtxt deleted file mode 100644 index fbd4d13387a9..000000000000 --- a/keras/api/golden/v2/tensorflow.keras.wrappers.scikit_learn.pbtxt +++ /dev/null @@ -1,11 +0,0 @@ -path: "tensorflow.keras.wrappers.scikit_learn" -tf_module { - member { - name: "KerasClassifier" - mtype: "" - } - member { - name: "KerasRegressor" - mtype: "" - } -} diff --git a/keras/api/tests/BUILD b/keras/api/tests/BUILD index 3077ff5e6443..951ec210e8b3 100644 --- a/keras/api/tests/BUILD +++ b/keras/api/tests/BUILD @@ -3,6 +3,7 @@ load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = ["//keras/api:__subpackages__"], licenses = ["notice"], # Apache 2.0 ) @@ -32,8 +33,7 @@ tf_py_test( deps = [ "//:expect_six_installed", "//third_party/py/tensorflow", - "//third_party/tensorflow/python:lib", - "//third_party/tensorflow/python:platform", + "//third_party/tensorflow/python/lib/io:lib", "//third_party/tensorflow/tools/api/lib:python_object_to_proto_visitor", "//third_party/tensorflow/tools/common:public_api", "//third_party/tensorflow/tools/common:traverse", diff --git a/keras/api/tests/api_compatibility_test.py b/keras/api/tests/api_compatibility_test.py index 2aa1e357a00a..10e31601abdb 100644 --- a/keras/api/tests/api_compatibility_test.py +++ b/keras/api/tests/api_compatibility_test.py @@ -27,26 +27,25 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf - import argparse import os import re import sys import six +import tensorflow as tf +# isort: off from google.protobuf import message from google.protobuf import text_format - from tensorflow.python.lib.io import file_io -from tensorflow.python.platform import tf_logging as logging from tensorflow.tools.api.lib import api_objects_pb2 -from tensorflow.tools.api.lib import python_object_to_proto_visitor +from tensorflow.tools.api.lib import ( + python_object_to_proto_visitor, +) from tensorflow.tools.common import public_api from tensorflow.tools.common import traverse - # FLAGS defined at the bottom: FLAGS = None # DEFINE_boolean, update_goldens, default False: @@ -67,304 +66,354 @@ def _InitPathConstants(): - global _API_GOLDEN_FOLDER_V1 - global _API_GOLDEN_FOLDER_V2 - root_golden_path_v2 = os.path.join( - tf.compat.v1.resource_loader.get_data_files_path(), - '..', 'golden', 'v2', 'tensorflow.keras.pbtxt') - - if FLAGS.update_goldens: - root_golden_path_v2 = os.path.realpath(root_golden_path_v2) - # Get API directories based on the root golden file. This way - # we make sure to resolve symbolic links before creating new files. - _API_GOLDEN_FOLDER_V2 = os.path.dirname(root_golden_path_v2) - _API_GOLDEN_FOLDER_V1 = os.path.normpath( - os.path.join(_API_GOLDEN_FOLDER_V2, '..', 'v1')) + global _API_GOLDEN_FOLDER_V1 + global _API_GOLDEN_FOLDER_V2 + root_golden_path_v2 = os.path.join( + tf.compat.v1.resource_loader.get_data_files_path(), + "..", + "golden", + "v2", + "tensorflow.keras.pbtxt", + ) + + if FLAGS.update_goldens: + root_golden_path_v2 = os.path.realpath(root_golden_path_v2) + # Get API directories based on the root golden file. This way + # we make sure to resolve symbolic links before creating new files. + _API_GOLDEN_FOLDER_V2 = os.path.dirname(root_golden_path_v2) + _API_GOLDEN_FOLDER_V1 = os.path.normpath( + os.path.join(_API_GOLDEN_FOLDER_V2, "..", "v1") + ) _TEST_README_FILE = os.path.join( - tf.compat.v1.resource_loader.get_data_files_path(), 'README.txt') + tf.compat.v1.resource_loader.get_data_files_path(), "README.txt" +) _UPDATE_WARNING_FILE = os.path.join( - tf.compat.v1.resource_loader.get_data_files_path(), - 'API_UPDATE_WARNING.txt') + tf.compat.v1.resource_loader.get_data_files_path(), "API_UPDATE_WARNING.txt" +) def _KeyToFilePath(key, api_version): - """From a given key, construct a filepath. + """From a given key, construct a filepath. - Filepath will be inside golden folder for api_version. + Filepath will be inside golden folder for api_version. - Args: - key: a string used to determine the file path - api_version: a number indicating the tensorflow API version, e.g. 1 or 2. + Args: + key: a string used to determine the file path + api_version: a number indicating the tensorflow API version, e.g. 1 or 2. - Returns: - A string of file path to the pbtxt file which describes the public API - """ + Returns: + A string of file path to the pbtxt file which describes the public API + """ - def _ReplaceCapsWithDash(matchobj): - match = matchobj.group(0) - return '-%s' % (match.lower()) + def _ReplaceCapsWithDash(matchobj): + match = matchobj.group(0) + return f"-{match.lower()}" - case_insensitive_key = re.sub('([A-Z]{1})', _ReplaceCapsWithDash, - six.ensure_str(key)) - api_folder = ( - _API_GOLDEN_FOLDER_V2 if api_version == 2 else _API_GOLDEN_FOLDER_V1) - return os.path.join(api_folder, '%s.pbtxt' % case_insensitive_key) + case_insensitive_key = re.sub( + "([A-Z]{1})", _ReplaceCapsWithDash, six.ensure_str(key) + ) + api_folder = ( + _API_GOLDEN_FOLDER_V2 if api_version == 2 else _API_GOLDEN_FOLDER_V1 + ) + return os.path.join(api_folder, f"{case_insensitive_key}.pbtxt") def _FileNameToKey(filename): - """From a given filename, construct a key we use for api objects.""" + """From a given filename, construct a key we use for api objects.""" - def _ReplaceDashWithCaps(matchobj): - match = matchobj.group(0) - return match[1].upper() + def _ReplaceDashWithCaps(matchobj): + match = matchobj.group(0) + return match[1].upper() - base_filename = os.path.basename(filename) - base_filename_without_ext = os.path.splitext(base_filename)[0] - api_object_key = re.sub('((-[a-z]){1})', _ReplaceDashWithCaps, - six.ensure_str(base_filename_without_ext)) - return api_object_key + base_filename = os.path.basename(filename) + base_filename_without_ext = os.path.splitext(base_filename)[0] + api_object_key = re.sub( + "((-[a-z]){1})", + _ReplaceDashWithCaps, + six.ensure_str(base_filename_without_ext), + ) + return api_object_key def _VerifyNoSubclassOfMessageVisitor(path, parent, unused_children): - """A Visitor that crashes on subclasses of generated proto classes.""" - # If the traversed object is a proto Message class - if not (isinstance(parent, type) and issubclass(parent, message.Message)): - return - if parent is message.Message: - return - # Check that it is a direct subclass of Message. - if message.Message not in parent.__bases__: - raise NotImplementedError( - 'Object tf.%s is a subclass of a generated proto Message. ' - 'They are not yet supported by the API tools.' % path) + """A Visitor that crashes on subclasses of generated proto classes.""" + # If the traversed object is a proto Message class + if not (isinstance(parent, type) and issubclass(parent, message.Message)): + return + if parent is message.Message: + return + # Check that it is a direct subclass of Message. + if message.Message not in parent.__bases__: + raise NotImplementedError( + "Object tf.%s is a subclass of a generated proto Message. " + "They are not yet supported by the API tools." % path + ) def _FilterGoldenProtoDict(golden_proto_dict, omit_golden_symbols_map): - """Filter out golden proto dict symbols that should be omitted.""" - if not omit_golden_symbols_map: - return golden_proto_dict - filtered_proto_dict = dict(golden_proto_dict) - for key, symbol_list in six.iteritems(omit_golden_symbols_map): - api_object = api_objects_pb2.TFAPIObject() - api_object.CopyFrom(filtered_proto_dict[key]) - filtered_proto_dict[key] = api_object - module_or_class = None - if api_object.HasField('tf_module'): - module_or_class = api_object.tf_module - elif api_object.HasField('tf_class'): - module_or_class = api_object.tf_class - if module_or_class is not None: - for members in (module_or_class.member, module_or_class.member_method): - filtered_members = [m for m in members if m.name not in symbol_list] - # Two steps because protobuf repeated fields disallow slice assignment. - del members[:] - members.extend(filtered_members) - return filtered_proto_dict + """Filter out golden proto dict symbols that should be omitted.""" + if not omit_golden_symbols_map: + return golden_proto_dict + filtered_proto_dict = dict(golden_proto_dict) + for key, symbol_list in six.iteritems(omit_golden_symbols_map): + api_object = api_objects_pb2.TFAPIObject() + api_object.CopyFrom(filtered_proto_dict[key]) + filtered_proto_dict[key] = api_object + module_or_class = None + if api_object.HasField("tf_module"): + module_or_class = api_object.tf_module + elif api_object.HasField("tf_class"): + module_or_class = api_object.tf_class + if module_or_class is not None: + for members in ( + module_or_class.member, + module_or_class.member_method, + ): + filtered_members = [ + m for m in members if m.name not in symbol_list + ] + # Two steps because protobuf repeated fields disallow slice + # assignment. + del members[:] + members.extend(filtered_members) + return filtered_proto_dict class ApiCompatibilityTest(tf.test.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self._update_golden_warning = file_io.read_file_to_string( - _UPDATE_WARNING_FILE) - - self._test_readme_message = file_io.read_file_to_string(_TEST_README_FILE) - - def _AssertProtoDictEquals(self, - expected_dict, - actual_dict, - verbose=False, - update_goldens=False, - additional_missing_object_message='', - api_version=2): - """Diff given dicts of protobufs and report differences a readable way. - - Args: - expected_dict: a dict of TFAPIObject protos constructed from golden files. - actual_dict: a ict of TFAPIObject protos constructed by reading from the - TF package linked to the test. - verbose: Whether to log the full diffs, or simply report which files were - different. - update_goldens: Whether to update goldens when there are diffs found. - additional_missing_object_message: Message to print when a symbol is - missing. - api_version: TensorFlow API version to test. - """ - diffs = [] - verbose_diffs = [] - - expected_keys = set(expected_dict.keys()) - actual_keys = set(actual_dict.keys()) - only_in_expected = expected_keys - actual_keys - only_in_actual = actual_keys - expected_keys - all_keys = expected_keys | actual_keys - - # This will be populated below. - updated_keys = [] - - for key in all_keys: - diff_message = '' - verbose_diff_message = '' - # First check if the key is not found in one or the other. - if key in only_in_expected: - diff_message = 'Object %s expected but not found (removed). %s' % ( - key, additional_missing_object_message) - verbose_diff_message = diff_message - elif key in only_in_actual: - diff_message = 'New object %s found (added).' % key - verbose_diff_message = diff_message - else: - # Do not truncate diff - self.maxDiff = None # pylint: disable=invalid-name - # Now we can run an actual proto diff. - try: - self.assertProtoEquals(expected_dict[key], actual_dict[key]) - except AssertionError as e: - updated_keys.append(key) - diff_message = 'Change detected in python object: %s.' % key - verbose_diff_message = str(e) - - # All difference cases covered above. If any difference found, add to the - # list. - if diff_message: - diffs.append(diff_message) - verbose_diffs.append(verbose_diff_message) - - # If diffs are found, handle them based on flags. - if diffs: - diff_count = len(diffs) - logging.error(self._test_readme_message) - logging.error('%d differences found between API and golden.', diff_count) - - if update_goldens: - # Write files if requested. - logging.warning(self._update_golden_warning) - - # If the keys are only in expected, some objects are deleted. - # Remove files. - for key in only_in_expected: - filepath = _KeyToFilePath(key, api_version) - tf.io.gfile.remove(filepath) - - # If the files are only in actual (current library), these are new - # modules. Write them to files. Also record all updates in files. - for key in only_in_actual | set(updated_keys): - filepath = _KeyToFilePath(key, api_version) - file_io.write_string_to_file( - filepath, text_format.MessageToString(actual_dict[key])) - else: - # Include the actual differences to help debugging. - for d, verbose_d in zip(diffs, verbose_diffs): - logging.error(' %s', d) - logging.error(' %s', verbose_d) - # Fail if we cannot fix the test by updating goldens. - self.fail('%d differences found between API and golden.' % diff_count) - - else: - logging.info('No differences found between API and golden.') - - def _checkBackwardsCompatibility(self, - root, - golden_file_patterns, - api_version, - additional_private_map=None, - omit_golden_symbols_map=None): - # Extract all API stuff. - visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor( - default_path='tensorflow.keras') - - public_api_visitor = public_api.PublicAPIVisitor(visitor) - if additional_private_map: - public_api_visitor.private_map.update(additional_private_map) - public_api_visitor.set_root_name('tf.keras') - - traverse.traverse(root, public_api_visitor) - proto_dict = visitor.GetProtos() - - # Read all golden files. - golden_file_list = tf.compat.v1.gfile.Glob(golden_file_patterns) - - def _ReadFileToProto(filename): - """Read a filename, create a protobuf from its contents.""" - ret_val = api_objects_pb2.TFAPIObject() - text_format.Merge(file_io.read_file_to_string(filename), ret_val) - return ret_val - - golden_proto_dict = { - _FileNameToKey(filename): _ReadFileToProto(filename) - for filename in golden_file_list - } - golden_proto_dict = _FilterGoldenProtoDict(golden_proto_dict, - omit_golden_symbols_map) - - # Diff them. Do not fail if called with update. - # If the test is run to update goldens, only report diffs but do not fail. - self._AssertProtoDictEquals( - golden_proto_dict, - proto_dict, - verbose=FLAGS.verbose_diffs, - update_goldens=FLAGS.update_goldens, - api_version=api_version) - - def testAPIBackwardsCompatibility(self): - api_version = 1 - if hasattr(tf, '_major_api_version') and tf._major_api_version == 2: - api_version = 2 - golden_file_patterns = [ - os.path.join( - tf.compat.v1.resource_loader.get_root_dir_with_all_resources(), - _KeyToFilePath('*', api_version))] - - self._checkBackwardsCompatibility( - tf.keras, + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self._update_golden_warning = file_io.read_file_to_string( + _UPDATE_WARNING_FILE + ) + + self._test_readme_message = file_io.read_file_to_string( + _TEST_README_FILE + ) + + def _AssertProtoDictEquals( + self, + expected_dict, + actual_dict, + verbose=False, + update_goldens=False, + additional_missing_object_message="", + api_version=2, + ): + """Diff given dicts of protobufs and report differences a readable way. + + Args: + expected_dict: a dict of TFAPIObject protos constructed from golden + files. + actual_dict: a ict of TFAPIObject protos constructed by reading from + the TF package linked to the test. + verbose: Whether to log the full diffs, or simply report which files + were different. + update_goldens: Whether to update goldens when there are diffs found. + additional_missing_object_message: Message to print when a symbol is + missing. + api_version: TensorFlow API version to test. + """ + diffs = [] + verbose_diffs = [] + + expected_keys = set(expected_dict.keys()) + actual_keys = set(actual_dict.keys()) + only_in_expected = expected_keys - actual_keys + only_in_actual = actual_keys - expected_keys + all_keys = expected_keys | actual_keys + + # This will be populated below. + updated_keys = [] + + for key in all_keys: + diff_message = "" + verbose_diff_message = "" + # First check if the key is not found in one or the other. + if key in only_in_expected: + diff_message = ( + "Object %s expected but not found (removed). %s" + % (key, additional_missing_object_message) + ) + verbose_diff_message = diff_message + elif key in only_in_actual: + diff_message = f"New object {key} found (added)." + verbose_diff_message = diff_message + else: + # Do not truncate diff + self.maxDiff = None + # Now we can run an actual proto diff. + try: + self.assertProtoEquals(expected_dict[key], actual_dict[key]) + except AssertionError as e: + updated_keys.append(key) + diff_message = f"Change detected in python object: {key}." + verbose_diff_message = str(e) + + # All difference cases covered above. If any difference found, add + # to the list. + if diff_message: + diffs.append(diff_message) + verbose_diffs.append(verbose_diff_message) + + # If diffs are found, handle them based on flags. + if diffs: + diff_count = len(diffs) + tf.compat.v1.logging.error(self._test_readme_message) + tf.compat.v1.logging.error( + "%d differences found between API and golden.", diff_count + ) + + if update_goldens: + # Write files if requested. + tf.compat.v1.logging.warning(self._update_golden_warning) + + # If the keys are only in expected, some objects are deleted. + # Remove files. + for key in only_in_expected: + filepath = _KeyToFilePath(key, api_version) + tf.io.gfile.remove(filepath) + + # If the files are only in actual (current library), these are + # new modules. Write them to files. Also record all updates in + # files. + for key in only_in_actual | set(updated_keys): + filepath = _KeyToFilePath(key, api_version) + file_io.write_string_to_file( + filepath, text_format.MessageToString(actual_dict[key]) + ) + else: + # Include the actual differences to help debugging. + for d, verbose_d in zip(diffs, verbose_diffs): + tf.compat.v1.logging.error(" %s", d) + tf.compat.v1.logging.error(" %s", verbose_d) + # Fail if we cannot fix the test by updating goldens. + self.fail( + "%d differences found between API and golden." % diff_count + ) + + else: + tf.compat.v1.logging.info( + "No differences found between API and golden." + ) + + def _checkBackwardsCompatibility( + self, + root, golden_file_patterns, api_version, - # Skip compat.v1 and compat.v2 since they are validated - # in separate tests. - additional_private_map={'tf.compat': ['v1', 'v2']}, - omit_golden_symbols_map={}) - - def testAPIBackwardsCompatibilityV1(self): - api_version = 1 - golden_file_patterns = os.path.join( - tf.compat.v1.resource_loader.get_root_dir_with_all_resources(), - _KeyToFilePath('*', api_version)) - self._checkBackwardsCompatibility( - tf.compat.v1.keras, - golden_file_patterns, - api_version, - additional_private_map={ - 'tf': ['pywrap_tensorflow'], - 'tf.compat': ['v1', 'v2'], - }, - omit_golden_symbols_map={}) - - def testAPIBackwardsCompatibilityV2(self): - api_version = 2 - golden_file_patterns = [os.path.join( - tf.compat.v1.resource_loader.get_root_dir_with_all_resources(), - _KeyToFilePath('*', api_version))] - self._checkBackwardsCompatibility( - tf.compat.v2.keras, - golden_file_patterns, - api_version, - additional_private_map={'tf.compat': ['v1', 'v2']}, - omit_golden_symbols_map={}) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument( - '--update_goldens', type=bool, default=False, help=_UPDATE_GOLDENS_HELP) - parser.add_argument( - '--verbose_diffs', type=bool, default=True, help=_VERBOSE_DIFFS_HELP) - FLAGS, unparsed = parser.parse_known_args() - _InitPathConstants() - - # Now update argv, so that unittest library does not get confused. - sys.argv = [sys.argv[0]] + unparsed - tf.test.main() + additional_private_map=None, + omit_golden_symbols_map=None, + ): + # Extract all API stuff. + visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor( + default_path="tensorflow.keras" + ) + + public_api_visitor = public_api.PublicAPIVisitor(visitor) + if additional_private_map: + public_api_visitor.private_map.update(additional_private_map) + public_api_visitor.set_root_name("tf.keras") + + traverse.traverse(root, public_api_visitor) + proto_dict = visitor.GetProtos() + + # Read all golden files. + golden_file_list = tf.compat.v1.gfile.Glob(golden_file_patterns) + + def _ReadFileToProto(filename): + """Read a filename, create a protobuf from its contents.""" + ret_val = api_objects_pb2.TFAPIObject() + text_format.Merge(file_io.read_file_to_string(filename), ret_val) + return ret_val + + golden_proto_dict = { + _FileNameToKey(filename): _ReadFileToProto(filename) + for filename in golden_file_list + } + golden_proto_dict = _FilterGoldenProtoDict( + golden_proto_dict, omit_golden_symbols_map + ) + + # Diff them. Do not fail if called with update. + # If the test is run to update goldens, only report diffs but do not + # fail. + self._AssertProtoDictEquals( + golden_proto_dict, + proto_dict, + verbose=FLAGS.verbose_diffs, + update_goldens=FLAGS.update_goldens, + api_version=api_version, + ) + + def testAPIBackwardsCompatibility(self): + api_version = 1 + if hasattr(tf, "_major_api_version") and tf._major_api_version == 2: + api_version = 2 + golden_file_patterns = [ + os.path.join( + tf.compat.v1.resource_loader.get_root_dir_with_all_resources(), + _KeyToFilePath("*", api_version), + ) + ] + + self._checkBackwardsCompatibility( + tf.keras, + golden_file_patterns, + api_version, + # Skip compat.v1 and compat.v2 since they are validated + # in separate tests. + additional_private_map={"tf.compat": ["v1", "v2"]}, + omit_golden_symbols_map={}, + ) + + def testAPIBackwardsCompatibilityV1(self): + api_version = 1 + golden_file_patterns = os.path.join( + tf.compat.v1.resource_loader.get_root_dir_with_all_resources(), + _KeyToFilePath("*", api_version), + ) + self._checkBackwardsCompatibility( + tf.compat.v1.keras, + golden_file_patterns, + api_version, + additional_private_map={ + "tf": ["pywrap_tensorflow"], + "tf.compat": ["v1", "v2"], + }, + omit_golden_symbols_map={}, + ) + + def testAPIBackwardsCompatibilityV2(self): + api_version = 2 + golden_file_patterns = [ + os.path.join( + tf.compat.v1.resource_loader.get_root_dir_with_all_resources(), + _KeyToFilePath("*", api_version), + ) + ] + self._checkBackwardsCompatibility( + tf.compat.v2.keras, + golden_file_patterns, + api_version, + additional_private_map={"tf.compat": ["v1", "v2"]}, + omit_golden_symbols_map={}, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--update_goldens", type=bool, default=False, help=_UPDATE_GOLDENS_HELP + ) + parser.add_argument( + "--verbose_diffs", type=bool, default=True, help=_VERBOSE_DIFFS_HELP + ) + FLAGS, unparsed = parser.parse_known_args() + _InitPathConstants() + + # Now update argv, so that unittest library does not get confused. + sys.argv = [sys.argv[0]] + unparsed + tf.test.main() diff --git a/keras/applications/BUILD b/keras/applications/BUILD index b9960fb8bad4..90969468ef99 100644 --- a/keras/applications/BUILD +++ b/keras/applications/BUILD @@ -1,9 +1,11 @@ # Description: # Contains the Keras Application package (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ # Remove this deps to integration test. "//keras:friends", @@ -53,7 +55,7 @@ tf_py_test( name = "applications_test", size = "medium", srcs = ["applications_test.py"], - shard_count = 40, + shard_count = 50, tags = [ "no_rocm", "notsan", # b/168814536 diff --git a/keras/applications/__init__.py b/keras/applications/__init__.py index ac88213e2c8c..c08ee2843fda 100644 --- a/keras/applications/__init__.py +++ b/keras/applications/__init__.py @@ -13,18 +13,16 @@ # limitations under the License. # ============================================================================== """Keras Applications are premade architectures with pre-trained weights.""" -# pylint: disable=g-bad-import-order -from keras.applications.convnext import ConvNeXtTiny -from keras.applications.convnext import ConvNeXtSmall + from keras.applications.convnext import ConvNeXtBase from keras.applications.convnext import ConvNeXtLarge +from keras.applications.convnext import ConvNeXtSmall +from keras.applications.convnext import ConvNeXtTiny from keras.applications.convnext import ConvNeXtXLarge - from keras.applications.densenet import DenseNet121 from keras.applications.densenet import DenseNet169 from keras.applications.densenet import DenseNet201 - from keras.applications.efficientnet import EfficientNetB0 from keras.applications.efficientnet import EfficientNetB1 from keras.applications.efficientnet import EfficientNetB2 @@ -33,7 +31,6 @@ from keras.applications.efficientnet import EfficientNetB5 from keras.applications.efficientnet import EfficientNetB6 from keras.applications.efficientnet import EfficientNetB7 - from keras.applications.efficientnet_v2 import EfficientNetV2B0 from keras.applications.efficientnet_v2 import EfficientNetV2B1 from keras.applications.efficientnet_v2 import EfficientNetV2B2 @@ -41,25 +38,17 @@ from keras.applications.efficientnet_v2 import EfficientNetV2L from keras.applications.efficientnet_v2 import EfficientNetV2M from keras.applications.efficientnet_v2 import EfficientNetV2S - from keras.applications.inception_resnet_v2 import InceptionResNetV2 from keras.applications.inception_v3 import InceptionV3 - from keras.applications.mobilenet import MobileNet from keras.applications.mobilenet_v2 import MobileNetV2 -from keras.applications.mobilenet_v3 import MobileNetV3Small from keras.applications.mobilenet_v3 import MobileNetV3Large - +from keras.applications.mobilenet_v3 import MobileNetV3Small from keras.applications.nasnet import NASNetLarge from keras.applications.nasnet import NASNetMobile - from keras.applications.resnet import ResNet50 from keras.applications.resnet import ResNet101 from keras.applications.resnet import ResNet152 -from keras.applications.resnet_v2 import ResNet50V2 -from keras.applications.resnet_v2 import ResNet101V2 -from keras.applications.resnet_v2 import ResNet152V2 - from keras.applications.resnet_rs import ResNetRS50 from keras.applications.resnet_rs import ResNetRS101 from keras.applications.resnet_rs import ResNetRS152 @@ -67,8 +56,9 @@ from keras.applications.resnet_rs import ResNetRS270 from keras.applications.resnet_rs import ResNetRS350 from keras.applications.resnet_rs import ResNetRS420 - +from keras.applications.resnet_v2 import ResNet50V2 +from keras.applications.resnet_v2 import ResNet101V2 +from keras.applications.resnet_v2 import ResNet152V2 from keras.applications.vgg16 import VGG16 from keras.applications.vgg19 import VGG19 - from keras.applications.xception import Xception diff --git a/keras/applications/applications_load_weight_test.py b/keras/applications/applications_load_weight_test.py index 42ff88fd1bc7..875f0e4cd3e2 100644 --- a/keras/applications/applications_load_weight_test.py +++ b/keras/applications/applications_load_weight_test.py @@ -14,11 +14,10 @@ # ============================================================================== """Integration tests for Keras applications.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl import flags from absl.testing import parameterized -import numpy as np from keras.applications import convnext from keras.applications import densenet @@ -40,62 +39,112 @@ from keras.utils import data_utils from keras.utils import image_utils - ARG_TO_MODEL = { - 'resnet': (resnet, [resnet.ResNet50, resnet.ResNet101, resnet.ResNet152]), - 'resnet_v2': - (resnet_v2, - [resnet_v2.ResNet50V2, resnet_v2.ResNet101V2, resnet_v2.ResNet152V2]), - 'vgg16': (vgg16, [vgg16.VGG16]), - 'vgg19': (vgg19, [vgg19.VGG19]), - 'xception': (xception, [xception.Xception]), - 'inception_v3': (inception_v3, [inception_v3.InceptionV3]), - 'inception_resnet_v2': - (inception_resnet_v2, [inception_resnet_v2.InceptionResNetV2]), - 'mobilenet': (mobilenet, [mobilenet.MobileNet]), - 'mobilenet_v2': (mobilenet_v2, [mobilenet_v2.MobileNetV2]), - 'mobilenet_v3_small': (mobilenet_v3, [mobilenet_v3.MobileNetV3Small]), - 'mobilenet_v3_large': (mobilenet_v3, [mobilenet_v3.MobileNetV3Large]), - 'convnext': - (convnext, - [convnext.ConvNeXtTiny, convnext.ConvNeXtSmall, convnext.ConvNeXtBase, - convnext.ConvNeXtLarge, convnext.ConvNeXtXLarge]), - 'densenet': - (densenet, - [densenet.DenseNet121, densenet.DenseNet169, densenet.DenseNet201]), - 'nasnet_mobile': (nasnet, [nasnet.NASNetMobile]), - 'nasnet_large': (nasnet, [nasnet.NASNetLarge]), - 'efficientnet': (efficientnet, [ - efficientnet.EfficientNetB0, efficientnet.EfficientNetB1, - efficientnet.EfficientNetB2, efficientnet.EfficientNetB3, - efficientnet.EfficientNetB4, efficientnet.EfficientNetB5, - efficientnet.EfficientNetB6, efficientnet.EfficientNetB7 - ]), - 'efficientnet_v2': (efficientnet_v2, [ - efficientnet_v2.EfficientNetV2B0, efficientnet_v2.EfficientNetV2B1, - efficientnet_v2.EfficientNetV2B2, efficientnet_v2.EfficientNetV2B3, - efficientnet_v2.EfficientNetV2S, efficientnet_v2.EfficientNetV2M, - efficientnet_v2.EfficientNetV2L - ]), - 'resnet_rs': (resnet_rs, [ - resnet_rs.ResNetRS50, resnet_rs.ResNetRS101, resnet_rs.ResNetRS152, - resnet_rs.ResNetRS200, resnet_rs.ResNetRS270, resnet_rs.ResNetRS350, - resnet_rs.ResNetRS420 - ]), - 'regnet': (regnet, [ - regnet.RegNetX002, regnet.RegNetX004, regnet.RegNetX006, - regnet.RegNetX008, regnet.RegNetX016, regnet.RegNetX032, - regnet.RegNetX040, regnet.RegNetX064, regnet.RegNetX080, - regnet.RegNetX120, regnet.RegNetX160, regnet.RegNetX320, - regnet.RegNetY002, regnet.RegNetY004, regnet.RegNetY006, - regnet.RegNetY008, regnet.RegNetY016, regnet.RegNetY032, - regnet.RegNetY040, regnet.RegNetY064, regnet.RegNetY080, - regnet.RegNetY120, regnet.RegNetY160, regnet.RegNetY320 - ]) + "resnet": (resnet, [resnet.ResNet50, resnet.ResNet101, resnet.ResNet152]), + "resnet_v2": ( + resnet_v2, + [resnet_v2.ResNet50V2, resnet_v2.ResNet101V2, resnet_v2.ResNet152V2], + ), + "vgg16": (vgg16, [vgg16.VGG16]), + "vgg19": (vgg19, [vgg19.VGG19]), + "xception": (xception, [xception.Xception]), + "inception_v3": (inception_v3, [inception_v3.InceptionV3]), + "inception_resnet_v2": ( + inception_resnet_v2, + [inception_resnet_v2.InceptionResNetV2], + ), + "mobilenet": (mobilenet, [mobilenet.MobileNet]), + "mobilenet_v2": (mobilenet_v2, [mobilenet_v2.MobileNetV2]), + "mobilenet_v3_small": (mobilenet_v3, [mobilenet_v3.MobileNetV3Small]), + "mobilenet_v3_large": (mobilenet_v3, [mobilenet_v3.MobileNetV3Large]), + "convnext": ( + convnext, + [ + convnext.ConvNeXtTiny, + convnext.ConvNeXtSmall, + convnext.ConvNeXtBase, + convnext.ConvNeXtLarge, + convnext.ConvNeXtXLarge, + ], + ), + "densenet": ( + densenet, + [densenet.DenseNet121, densenet.DenseNet169, densenet.DenseNet201], + ), + "nasnet_mobile": (nasnet, [nasnet.NASNetMobile]), + "nasnet_large": (nasnet, [nasnet.NASNetLarge]), + "efficientnet": ( + efficientnet, + [ + efficientnet.EfficientNetB0, + efficientnet.EfficientNetB1, + efficientnet.EfficientNetB2, + efficientnet.EfficientNetB3, + efficientnet.EfficientNetB4, + efficientnet.EfficientNetB5, + efficientnet.EfficientNetB6, + efficientnet.EfficientNetB7, + ], + ), + "efficientnet_v2": ( + efficientnet_v2, + [ + efficientnet_v2.EfficientNetV2B0, + efficientnet_v2.EfficientNetV2B1, + efficientnet_v2.EfficientNetV2B2, + efficientnet_v2.EfficientNetV2B3, + efficientnet_v2.EfficientNetV2S, + efficientnet_v2.EfficientNetV2M, + efficientnet_v2.EfficientNetV2L, + ], + ), + "resnet_rs": ( + resnet_rs, + [ + resnet_rs.ResNetRS50, + resnet_rs.ResNetRS101, + resnet_rs.ResNetRS152, + resnet_rs.ResNetRS200, + resnet_rs.ResNetRS270, + resnet_rs.ResNetRS350, + resnet_rs.ResNetRS420, + ], + ), + "regnet": ( + regnet, + [ + regnet.RegNetX002, + regnet.RegNetX004, + regnet.RegNetX006, + regnet.RegNetX008, + regnet.RegNetX016, + regnet.RegNetX032, + regnet.RegNetX040, + regnet.RegNetX064, + regnet.RegNetX080, + regnet.RegNetX120, + regnet.RegNetX160, + regnet.RegNetX320, + regnet.RegNetY002, + regnet.RegNetY004, + regnet.RegNetY006, + regnet.RegNetY008, + regnet.RegNetY016, + regnet.RegNetY032, + regnet.RegNetY040, + regnet.RegNetY064, + regnet.RegNetY080, + regnet.RegNetY120, + regnet.RegNetY160, + regnet.RegNetY320, + ], + ), } -TEST_IMAGE_PATH = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/tests/elephant.jpg') +TEST_IMAGE_PATH = ( + "https://storage.googleapis.com/tensorflow/" + "keras-applications/tests/elephant.jpg" +) _IMAGENET_CLASSES = 1000 # Add a flag to define which application module file is tested. @@ -103,48 +152,47 @@ # it only triggers the tests of the application models in the module # if that module file has been modified. FLAGS = flags.FLAGS -flags.DEFINE_string('module', None, - 'Application module used in this test.') +flags.DEFINE_string("module", None, "Application module used in this test.") def _get_elephant(target_size): - # For models that don't include a Flatten step, - # the default is to accept variable-size inputs - # even when loading ImageNet weights (since it is possible). - # In this case, default to 299x299. - if target_size[0] is None: - target_size = (299, 299) - test_image = data_utils.get_file('elephant.jpg', TEST_IMAGE_PATH) - img = image_utils.load_img(test_image, target_size=tuple(target_size)) - x = image_utils.img_to_array(img) - return np.expand_dims(x, axis=0) + # For models that don't include a Flatten step, + # the default is to accept variable-size inputs + # even when loading ImageNet weights (since it is possible). + # In this case, default to 299x299. + if target_size[0] is None: + target_size = (299, 299) + test_image = data_utils.get_file("elephant.jpg", TEST_IMAGE_PATH) + img = image_utils.load_img(test_image, target_size=tuple(target_size)) + x = image_utils.img_to_array(img) + return np.expand_dims(x, axis=0) class ApplicationsLoadWeightTest(tf.test.TestCase, parameterized.TestCase): + def assertShapeEqual(self, shape1, shape2): + if len(shape1) != len(shape2): + raise AssertionError( + f"Shapes are different rank: {shape1} vs {shape2}" + ) + if shape1 != shape2: + raise AssertionError(f"Shapes differ: {shape1} vs {shape2}") - def assertShapeEqual(self, shape1, shape2): - if len(shape1) != len(shape2): - raise AssertionError( - 'Shapes are different rank: %s vs %s' % (shape1, shape2)) - if shape1 != shape2: - raise AssertionError('Shapes differ: %s vs %s' % (shape1, shape2)) - - def test_application_pretrained_weights_loading(self): - app_module = ARG_TO_MODEL[FLAGS.module][0] - apps = ARG_TO_MODEL[FLAGS.module][1] - for app in apps: - try: - model = app(weights='imagenet') - except Exception: # pylint: disable=broad-except - self.skipTest('TODO(b/227700184): Re-enable.') - self.assertShapeEqual(model.output_shape, (None, _IMAGENET_CLASSES)) - x = _get_elephant(model.input_shape[1:3]) - x = app_module.preprocess_input(x) - preds = model.predict(x) - names = [p[1] for p in app_module.decode_predictions(preds)[0]] - # Test correct label is in top 3 (weak correctness test). - self.assertIn('African_elephant', names[:3]) + def test_application_pretrained_weights_loading(self): + app_module = ARG_TO_MODEL[FLAGS.module][0] + apps = ARG_TO_MODEL[FLAGS.module][1] + for app in apps: + try: + model = app(weights="imagenet") + except Exception: + self.skipTest("TODO(b/227700184): Re-enable.") + self.assertShapeEqual(model.output_shape, (None, _IMAGENET_CLASSES)) + x = _get_elephant(model.input_shape[1:3]) + x = app_module.preprocess_input(x) + preds = model.predict(x) + names = [p[1] for p in app_module.decode_predictions(preds)[0]] + # Test correct label is in top 3 (weak correctness test). + self.assertIn("African_elephant", names[:3]) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/applications/applications_test.py b/keras/applications/applications_test.py index e11e2119e437..d74ae95ec33f 100644 --- a/keras/applications/applications_test.py +++ b/keras/applications/applications_test.py @@ -14,8 +14,13 @@ # ============================================================================== """Integration tests for Keras applications.""" +import os + +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import backend +from keras import utils from keras.applications import convnext from keras.applications import densenet from keras.applications import efficientnet @@ -33,62 +38,79 @@ from keras.applications import vgg16 from keras.applications import vgg19 from keras.applications import xception -from keras import utils -import tensorflow.compat.v2 as tf +from keras.testing_infra import test_utils -MODEL_LIST_NO_NASNET = [(resnet.ResNet50, 2048), (resnet.ResNet101, 2048), - (resnet.ResNet152, 2048), (resnet_v2.ResNet50V2, 2048), - (resnet_v2.ResNet101V2, 2048), - (resnet_v2.ResNet152V2, 2048), (vgg16.VGG16, 512), - (vgg19.VGG19, 512), (xception.Xception, 2048), - (inception_v3.InceptionV3, 2048), - (inception_resnet_v2.InceptionResNetV2, 1536), - (mobilenet.MobileNet, 1024), - (mobilenet_v2.MobileNetV2, 1280), - (mobilenet_v3.MobileNetV3Small, 576), - (mobilenet_v3.MobileNetV3Large, 960), - (convnext.ConvNeXtTiny, 768), - (convnext.ConvNeXtSmall, 768), - (convnext.ConvNeXtBase, 1024), - (convnext.ConvNeXtLarge, 1536), - (convnext.ConvNeXtXLarge, 2048), - (densenet.DenseNet121, 1024), - (densenet.DenseNet169, 1664), - (densenet.DenseNet201, 1920), - (efficientnet.EfficientNetB0, 1280), - (efficientnet.EfficientNetB1, 1280), - (efficientnet.EfficientNetB2, 1408), - (efficientnet.EfficientNetB3, 1536), - (efficientnet.EfficientNetB4, 1792), - (efficientnet.EfficientNetB5, 2048), - (efficientnet.EfficientNetB6, 2304), - (efficientnet.EfficientNetB7, 2560), - (efficientnet_v2.EfficientNetV2B0, 1280), - (efficientnet_v2.EfficientNetV2B1, 1280), - (efficientnet_v2.EfficientNetV2B2, 1408), - (efficientnet_v2.EfficientNetV2B3, 1536), - (efficientnet_v2.EfficientNetV2S, 1280), - (efficientnet_v2.EfficientNetV2M, 1280), - (efficientnet_v2.EfficientNetV2L, 1280), - (regnet.RegNetX002, 368), (regnet.RegNetX004, 384), - (regnet.RegNetX006, 528), (regnet.RegNetX008, 672), - (regnet.RegNetX016, 912), (regnet.RegNetX032, 1008), - (regnet.RegNetX040, 1360), (regnet.RegNetX064, 1624), - (regnet.RegNetX080, 1920), (regnet.RegNetX120, 2240), - (regnet.RegNetX160, 2048), (regnet.RegNetX320, 2520), - (regnet.RegNetY002, 368), (regnet.RegNetY004, 440), - (regnet.RegNetY006, 608), (regnet.RegNetY008, 768), - (regnet.RegNetY016, 888), (regnet.RegNetY032, 1512), - (regnet.RegNetY040, 1088), (regnet.RegNetY064, 1296), - (regnet.RegNetY080, 2016), (regnet.RegNetY120, 2240), - (regnet.RegNetY160, 3024), (regnet.RegNetY320, 3712), - (resnet_rs.ResNetRS50, 2048), - (resnet_rs.ResNetRS101, 2048), - (resnet_rs.ResNetRS152, 2048), - (resnet_rs.ResNetRS200, 2048), - (resnet_rs.ResNetRS270, 2048), - (resnet_rs.ResNetRS350, 2048), - (resnet_rs.ResNetRS420, 2048)] +MODEL_LIST_NO_NASNET = [ + (resnet.ResNet50, 2048), + (resnet.ResNet101, 2048), + (resnet.ResNet152, 2048), + (resnet_v2.ResNet50V2, 2048), + (resnet_v2.ResNet101V2, 2048), + (resnet_v2.ResNet152V2, 2048), + (vgg16.VGG16, 512), + (vgg19.VGG19, 512), + (xception.Xception, 2048), + (inception_v3.InceptionV3, 2048), + (inception_resnet_v2.InceptionResNetV2, 1536), + (mobilenet.MobileNet, 1024), + (mobilenet_v2.MobileNetV2, 1280), + (mobilenet_v3.MobileNetV3Small, 576), + (mobilenet_v3.MobileNetV3Large, 960), + (convnext.ConvNeXtTiny, 768), + (convnext.ConvNeXtSmall, 768), + (convnext.ConvNeXtBase, 1024), + (convnext.ConvNeXtLarge, 1536), + (convnext.ConvNeXtXLarge, 2048), + (densenet.DenseNet121, 1024), + (densenet.DenseNet169, 1664), + (densenet.DenseNet201, 1920), + (efficientnet.EfficientNetB0, 1280), + (efficientnet.EfficientNetB1, 1280), + (efficientnet.EfficientNetB2, 1408), + (efficientnet.EfficientNetB3, 1536), + (efficientnet.EfficientNetB4, 1792), + (efficientnet.EfficientNetB5, 2048), + (efficientnet.EfficientNetB6, 2304), + (efficientnet.EfficientNetB7, 2560), + (efficientnet_v2.EfficientNetV2B0, 1280), + (efficientnet_v2.EfficientNetV2B1, 1280), + (efficientnet_v2.EfficientNetV2B2, 1408), + (efficientnet_v2.EfficientNetV2B3, 1536), + (efficientnet_v2.EfficientNetV2S, 1280), + (efficientnet_v2.EfficientNetV2M, 1280), + (efficientnet_v2.EfficientNetV2L, 1280), + (regnet.RegNetX002, 368), + (regnet.RegNetX004, 384), + (regnet.RegNetX006, 528), + (regnet.RegNetX008, 672), + (regnet.RegNetX016, 912), + (regnet.RegNetX032, 1008), + (regnet.RegNetX040, 1360), + (regnet.RegNetX064, 1624), + (regnet.RegNetX080, 1920), + (regnet.RegNetX120, 2240), + (regnet.RegNetX160, 2048), + (regnet.RegNetX320, 2520), + (regnet.RegNetY002, 368), + (regnet.RegNetY004, 440), + (regnet.RegNetY006, 608), + (regnet.RegNetY008, 768), + (regnet.RegNetY016, 888), + (regnet.RegNetY032, 1512), + (regnet.RegNetY040, 1088), + (regnet.RegNetY064, 1296), + (regnet.RegNetY080, 2016), + (regnet.RegNetY120, 2240), + (regnet.RegNetY160, 3024), + (regnet.RegNetY320, 3712), + (resnet_rs.ResNetRS50, 2048), + (resnet_rs.ResNetRS101, 2048), + (resnet_rs.ResNetRS152, 2048), + (resnet_rs.ResNetRS200, 2048), + (resnet_rs.ResNetRS270, 2048), + (resnet_rs.ResNetRS350, 2048), + (resnet_rs.ResNetRS420, 2048), +] NASNET_LIST = [ (nasnet.NASNetMobile, 1056), @@ -116,89 +138,128 @@ class ApplicationsTest(tf.test.TestCase, parameterized.TestCase): + def assertShapeEqual(self, shape1, shape2): + if len(shape1) != len(shape2): + raise AssertionError( + f"Shapes are different rank: {shape1} vs {shape2}" + ) + for v1, v2 in zip(shape1, shape2): + if v1 != v2: + raise AssertionError(f"Shapes differ: {shape1} vs {shape2}") + + @parameterized.parameters(*MODEL_LIST) + def test_application_base(self, app, _): + # Can be instantiated with default arguments + model = app(weights=None) + # Can be serialized and deserialized + config = model.get_config() + if "ConvNeXt" in app.__name__: + custom_objects = {"LayerScale": convnext.LayerScale} + with utils.custom_object_scope(custom_objects): + reconstructed_model = model.__class__.from_config(config) + else: + reconstructed_model = model.__class__.from_config(config) + self.assertEqual(len(model.weights), len(reconstructed_model.weights)) + backend.clear_session() + + @parameterized.parameters(*MODEL_LIST) + def test_application_notop(self, app, last_dim): + if "NASNet" in app.__name__: + only_check_last_dim = True + else: + only_check_last_dim = False + output_shape = _get_output_shape( + lambda: app(weights=None, include_top=False) + ) + if only_check_last_dim: + self.assertEqual(output_shape[-1], last_dim) + else: + self.assertShapeEqual(output_shape, (None, None, None, last_dim)) + backend.clear_session() + + @parameterized.parameters(*MODEL_LIST) + def test_application_notop_custom_input_shape(self, app, last_dim): + output_shape = _get_output_shape( + lambda: app( + weights="imagenet", include_top=False, input_shape=(224, 224, 3) + ) + ) + + self.assertEqual(output_shape[-1], last_dim) + + @parameterized.parameters(MODEL_LIST) + def test_application_pooling(self, app, last_dim): + output_shape = _get_output_shape( + lambda: app(weights=None, include_top=False, pooling="avg") + ) + self.assertShapeEqual(output_shape, (None, last_dim)) + + @parameterized.parameters(MODEL_LIST) + def test_application_classifier_activation(self, app, _): + if "RegNet" in app.__name__: + self.skipTest("RegNet models do not support classifier activation") + model = app( + weights=None, include_top=True, classifier_activation="softmax" + ) + last_layer_act = model.layers[-1].activation.__name__ + self.assertEqual(last_layer_act, "softmax") + + @parameterized.parameters(*MODEL_LIST_NO_NASNET) + def test_application_variable_input_channels(self, app, last_dim): + if backend.image_data_format() == "channels_first": + input_shape = (1, None, None) + else: + input_shape = (None, None, 1) + output_shape = _get_output_shape( + lambda: app( + weights=None, include_top=False, input_shape=input_shape + ) + ) + self.assertShapeEqual(output_shape, (None, None, None, last_dim)) + backend.clear_session() + + if backend.image_data_format() == "channels_first": + input_shape = (4, None, None) + else: + input_shape = (None, None, 4) + output_shape = _get_output_shape( + lambda: app( + weights=None, include_top=False, input_shape=input_shape + ) + ) + self.assertShapeEqual(output_shape, (None, None, None, last_dim)) + backend.clear_session() + + @parameterized.parameters(*MOBILENET_V3_FOR_WEIGHTS) + def test_mobilenet_v3_load_weights( + self, mobilenet_class, alpha, minimalistic, include_top + ): + mobilenet_class( + input_shape=(224, 224, 3), + weights="imagenet", + alpha=alpha, + minimalistic=minimalistic, + include_top=include_top, + ) + + @parameterized.parameters(MODEL_LIST) + @test_utils.run_v2_only + def test_model_checkpoint(self, app, _): + model = app(weights=None) - def assertShapeEqual(self, shape1, shape2): - if len(shape1) != len(shape2): - raise AssertionError( - 'Shapes are different rank: %s vs %s' % (shape1, shape2)) - for v1, v2 in zip(shape1, shape2): - if v1 != v2: - raise AssertionError('Shapes differ: %s vs %s' % (shape1, shape2)) - - @parameterized.parameters(*MODEL_LIST) - def test_application_base(self, app, _): - # Can be instantiated with default arguments - model = app(weights=None) - # Can be serialized and deserialized - config = model.get_config() - if "ConvNeXt" in app.__name__: - custom_objects = {"LayerScale": convnext.LayerScale} - with utils.custom_object_scope(custom_objects): - reconstructed_model = model.__class__.from_config(config) - else: - reconstructed_model = model.__class__.from_config(config) - self.assertEqual(len(model.weights), len(reconstructed_model.weights)) - backend.clear_session() - - @parameterized.parameters(*MODEL_LIST) - def test_application_notop(self, app, last_dim): - if 'NASNet' in app.__name__: - only_check_last_dim = True - else: - only_check_last_dim = False - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False)) - if only_check_last_dim: - self.assertEqual(output_shape[-1], last_dim) - else: - self.assertShapeEqual(output_shape, (None, None, None, last_dim)) - backend.clear_session() - - @parameterized.parameters(MODEL_LIST) - def test_application_pooling(self, app, last_dim): - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False, pooling='avg')) - self.assertShapeEqual(output_shape, (None, last_dim)) - - @parameterized.parameters(*MODEL_LIST_NO_NASNET) - def test_application_variable_input_channels(self, app, last_dim): - if backend.image_data_format() == 'channels_first': - input_shape = (1, None, None) - else: - input_shape = (None, None, 1) - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False, input_shape=input_shape)) - self.assertShapeEqual(output_shape, (None, None, None, last_dim)) - backend.clear_session() - - if backend.image_data_format() == 'channels_first': - input_shape = (4, None, None) - else: - input_shape = (None, None, 4) - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False, input_shape=input_shape)) - self.assertShapeEqual(output_shape, (None, None, None, last_dim)) - backend.clear_session() - - @parameterized.parameters(*MOBILENET_V3_FOR_WEIGHTS) - def test_mobilenet_v3_load_weights( - self, - mobilenet_class, - alpha, - minimalistic, - include_top): - mobilenet_class( - input_shape=(224, 224, 3), - weights='imagenet', - alpha=alpha, - minimalistic=minimalistic, - include_top=include_top) + checkpoint = tf.train.Checkpoint(model=model) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + directory=os.path.join(self.get_temp_dir(), model.name), + max_to_keep=1, + ) + checkpoint_manager.save(checkpoint_number=1) def _get_output_shape(model_fn): - model = model_fn() - return model.output_shape + model = model_fn() + return model.output_shape -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/applications/convnext.py b/keras/applications/convnext.py index 7efa1820b669..829466a6312b 100644 --- a/keras/applications/convnext.py +++ b/keras/applications/convnext.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=missing-docstring -# pylint: disable=g-classes-have-attributes -# pylint: disable=g-direct-tensorflow-import + + """ConvNeXt models for Keras. References: @@ -24,64 +22,74 @@ (CVPR 2022) """ +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend +from keras import initializers from keras import layers from keras import utils from keras.applications import imagenet_utils from keras.engine import sequential from keras.engine import training as training_lib -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/convnext/" +BASE_WEIGHTS_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/convnext/" +) WEIGHTS_HASHES = { - "tiny": - ("8ae6e78ce2933352b1ef4008e6dd2f17bc40771563877d156bc6426c7cf503ff", - "d547c096cabd03329d7be5562c5e14798aa39ed24b474157cef5e85ab9e49ef1"), - "small": - ("ce1277d8f1ee5a0ef0e171469089c18f5233860ceaf9b168049cb9263fd7483c", - "6fc8009faa2f00c1c1dfce59feea9b0745eb260a7dd11bee65c8e20843da6eab"), - "base": - ("52cbb006d3dadd03f6e095a8ca1aca47aecdd75acb4bc74bce1f5c695d0086e6", - "40a20c5548a5e9202f69735ecc06c990e6b7c9d2de39f0361e27baeb24cb7c45"), - "large": - ("070c5ed9ed289581e477741d3b34beffa920db8cf590899d6d2c67fba2a198a6", - "96f02b6f0753d4f543261bc9d09bed650f24dd6bc02ddde3066135b63d23a1cd"), - "xlarge": - ("c1f5ccab661354fc3a79a10fa99af82f0fbf10ec65cb894a3ae0815f17a889ee", - "de3f8a54174130e0cecdc71583354753d557fcf1f4487331558e2a16ba0cfe05"), + "convnext_tiny": ( + "8ae6e78ce2933352b1ef4008e6dd2f17bc40771563877d156bc6426c7cf503ff", + "d547c096cabd03329d7be5562c5e14798aa39ed24b474157cef5e85ab9e49ef1", + ), + "convnext_small": ( + "ce1277d8f1ee5a0ef0e171469089c18f5233860ceaf9b168049cb9263fd7483c", + "6fc8009faa2f00c1c1dfce59feea9b0745eb260a7dd11bee65c8e20843da6eab", + ), + "convnext_base": ( + "52cbb006d3dadd03f6e095a8ca1aca47aecdd75acb4bc74bce1f5c695d0086e6", + "40a20c5548a5e9202f69735ecc06c990e6b7c9d2de39f0361e27baeb24cb7c45", + ), + "convnext_large": ( + "070c5ed9ed289581e477741d3b34beffa920db8cf590899d6d2c67fba2a198a6", + "96f02b6f0753d4f543261bc9d09bed650f24dd6bc02ddde3066135b63d23a1cd", + ), + "convnext_xlarge": ( + "c1f5ccab661354fc3a79a10fa99af82f0fbf10ec65cb894a3ae0815f17a889ee", + "de3f8a54174130e0cecdc71583354753d557fcf1f4487331558e2a16ba0cfe05", + ), } MODEL_CONFIGS = { - "tiny": { - "depths": [3, 3, 9, 3], - "projection_dims": [96, 192, 384, 768], - "default_size": 224, - }, - "small": { - "depths": [3, 3, 27, 3], - "projection_dims": [96, 192, 384, 768], - "default_size": 224, - }, - "base": { - "depths": [3, 3, 27, 3], - "projection_dims": [128, 256, 512, 1024], - "default_size": 224, - }, - "large": { - "depths": [3, 3, 27, 3], - "projection_dims": [192, 384, 768, 1536], - "default_size": 224, - }, - "xlarge": { - "depths": [3, 3, 27, 3], - "projection_dims": [256, 512, 1024, 2048], - "default_size": 224, - }, + "tiny": { + "depths": [3, 3, 9, 3], + "projection_dims": [96, 192, 384, 768], + "default_size": 224, + }, + "small": { + "depths": [3, 3, 27, 3], + "projection_dims": [96, 192, 384, 768], + "default_size": 224, + }, + "base": { + "depths": [3, 3, 27, 3], + "projection_dims": [128, 256, 512, 1024], + "default_size": 224, + }, + "large": { + "depths": [3, 3, 27, 3], + "projection_dims": [192, 384, 768, 1536], + "default_size": 224, + }, + "xlarge": { + "depths": [3, 3, 27, 3], + "projection_dims": [256, 512, 1024, 2048], + "default_size": 224, + }, } BASE_DOCSTRING = """Instantiates the {name} architecture. @@ -96,6 +104,7 @@ For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning]( https://keras.io/guides/transfer_learning/). + The `base`, `large`, and `xlarge` models were first pre-trained on the ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The pre-trained parameters of the models were assembled from the @@ -103,20 +112,22 @@ sense of how these parameters were converted to Keras compatible parameters, please refer to [this repository](https://github.com/sayakpaul/keras-convnext-conversion). + Note: Each Keras Application expects a specific kind of input preprocessing. For ConvNeXt, preprocessing is included in the model using a `Normalization` layer. ConvNeXt models expect their inputs to be float or uint8 tensors of pixels with values in the [0-255] range. + When calling the `summary()` method after instantiating a ConvNeXt model, prefer setting the `expand_nested` argument `summary()` to `True` to better investigate the instantiated model. Args: include_top: Whether to include the fully-connected - layer at the top of the network. Defaults to True. + layer at the top of the network. Defaults to `True`. weights: One of `None` (random initialization), - `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights file - to be loaded. Defaults to `"imagenet"`. + `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights + file to be loaded. Defaults to `"imagenet"`. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. @@ -124,7 +135,7 @@ if `include_top` is False. It should have exactly 3 inputs channels. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. Defaults to None. + when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling @@ -133,517 +144,597 @@ the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. + Defaults to `None`. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. Defaults to 1000 (number of - ImageNet classes). + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. - Defaults to `"softmax"`. When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. + be `None` or `"softmax"`. Defaults to `"softmax"`. Returns: A `keras.Model` instance. """ + class StochasticDepth(layers.Layer): - """Stochastic Depth module. + """Stochastic Depth module. - It performs batch-wise dropping rather than sample-wise. In libraries like - `timm`, it's similar to `DropPath` layers that drops residual paths - sample-wise. + It performs batch-wise dropping rather than sample-wise. In libraries like + `timm`, it's similar to `DropPath` layers that drops residual paths + sample-wise. - References: - - https://github.com/rwightman/pytorch-image-models + References: + - https://github.com/rwightman/pytorch-image-models - Args: - drop_path_rate (float): Probability of dropping paths. Should be within - [0, 1]. + Args: + drop_path_rate (float): Probability of dropping paths. Should be within + [0, 1]. - Returns: - Tensor either with the residual path dropped or kept. - """ - def __init__(self, drop_path_rate, **kwargs): - super().__init__(**kwargs) - self.drop_path_rate = drop_path_rate - - def call(self, x, training=None): - if training: - keep_prob = 1 - self.drop_path_rate - shape = (tf.shape(x)[0],) + (1,) * (len(tf.shape(x)) - 1) - random_tensor = keep_prob + tf.random.uniform(shape, 0, 1) - random_tensor = tf.floor(random_tensor) - return (x / keep_prob) * random_tensor - return x + Returns: + Tensor either with the residual path dropped or kept. + """ - def get_config(self): - config = super().get_config() - config.update({"drop_path_rate": self.drop_path_rate}) - return config + def __init__(self, drop_path_rate, **kwargs): + super().__init__(**kwargs) + self.drop_path_rate = drop_path_rate + def call(self, x, training=None): + if training: + keep_prob = 1 - self.drop_path_rate + shape = (tf.shape(x)[0],) + (1,) * (len(tf.shape(x)) - 1) + random_tensor = keep_prob + tf.random.uniform(shape, 0, 1) + random_tensor = tf.floor(random_tensor) + return (x / keep_prob) * random_tensor + return x -class LayerScale(layers.Layer): - """Layer scale module. + def get_config(self): + config = super().get_config() + config.update({"drop_path_rate": self.drop_path_rate}) + return config - References: - - https://arxiv.org/abs/2103.17239 - Args: - init_values (float): Initial value for layer scale. Should be within - [0, 1]. - projection_dim (int): Projection dimensionality. +class LayerScale(layers.Layer): + """Layer scale module. + + References: + - https://arxiv.org/abs/2103.17239 + + Args: + init_values (float): Initial value for layer scale. Should be within + [0, 1]. + projection_dim (int): Projection dimensionality. + + Returns: + Tensor multiplied to the scale. + """ + + def __init__(self, init_values, projection_dim, **kwargs): + super().__init__(**kwargs) + self.init_values = init_values + self.projection_dim = projection_dim + + def build(self, input_shape): + self.gamma = self.add_weight( + name="gamma", + shape=(self.projection_dim,), + initializer=initializers.Constant(self.init_values), + trainable=True, + ) + + def call(self, x): + return x * self.gamma + + def get_config(self): + config = super().get_config() + config.update( + { + "init_values": self.init_values, + "projection_dim": self.projection_dim, + } + ) + return config - Returns: - Tensor multiplied to the scale. - """ - def __init__(self, init_values, projection_dim, **kwargs): - super().__init__(**kwargs) - self.init_values = init_values - self.projection_dim = projection_dim - - def build(self, input_shape): - self.gamma = tf.Variable(self.init_values * tf.ones((self.projection_dim,))) - - def call(self, x): - return x * self.gamma - - def get_config(self): - config = super().get_config() - config.update( - {"init_values": self.init_values, "projection_dim": self.projection_dim} - ) - return config def ConvNeXtBlock( - projection_dim, - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - name=None - ): - """ConvNeXt block. + projection_dim, drop_path_rate=0.0, layer_scale_init_value=1e-6, name=None +): + """ConvNeXt block. - References: + References: - https://arxiv.org/abs/2201.03545 - https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py - Notes: - In the original ConvNeXt implementation (linked above), the authors use - `Dense` layers for pointwise convolutions for increased efficiency. - Following that, this implementation also uses the same. + Notes: + In the original ConvNeXt implementation (linked above), the authors use + `Dense` layers for pointwise convolutions for increased efficiency. + Following that, this implementation also uses the same. + + Args: + projection_dim (int): Number of filters for convolution layers. In the + ConvNeXt paper, this is referred to as projection dimension. + drop_path_rate (float): Probability of dropping paths. Should be within + [0, 1]. + layer_scale_init_value (float): Layer scale value. Should be a small float + number. + name: name to path to the keras layer. + + Returns: + A function representing a ConvNeXtBlock block. + """ + if name is None: + name = "prestem" + str(backend.get_uid("prestem")) + + def apply(inputs): + x = inputs + + x = layers.Conv2D( + filters=projection_dim, + kernel_size=7, + padding="same", + groups=projection_dim, + name=name + "_depthwise_conv", + )(x) + x = layers.LayerNormalization(epsilon=1e-6, name=name + "_layernorm")(x) + x = layers.Dense(4 * projection_dim, name=name + "_pointwise_conv_1")(x) + x = layers.Activation("gelu", name=name + "_gelu")(x) + x = layers.Dense(projection_dim, name=name + "_pointwise_conv_2")(x) + + if layer_scale_init_value is not None: + x = LayerScale( + layer_scale_init_value, + projection_dim, + name=name + "_layer_scale", + )(x) + if drop_path_rate: + layer = StochasticDepth( + drop_path_rate, name=name + "_stochastic_depth" + ) + else: + layer = layers.Activation("linear", name=name + "_identity") + + return inputs + layer(x) + + return apply - Args: - projection_dim (int): Number of filters for convolution layers. In the - ConvNeXt paper, this is referred to as projection dimension. - drop_path_rate (float): Probability of dropping paths. Should be within - [0, 1]. - layer_scale_init_value (float): Layer scale value. Should be a small float - number. - name: name to path to the keras layer. - - Returns: - A function representing a ConvNeXtBlock block. - """ - if name is None: - name = "prestem" + str(backend.get_uid("prestem")) - def apply(inputs): - x = inputs +def PreStem(name=None): + """Normalizes inputs with ImageNet-1k mean and std. + + Args: + name (str): Name prefix. + + Returns: + A presemt function. + """ + if name is None: + name = "prestem" + str(backend.get_uid("prestem")) + + def apply(x): + x = layers.Normalization( + mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + variance=[ + (0.229 * 255) ** 2, + (0.224 * 255) ** 2, + (0.225 * 255) ** 2, + ], + name=name + "_prestem_normalization", + )(x) + return x + + return apply + + +def Head(num_classes=1000, classifier_activation=None, name=None): + """Implementation of classification head of ConvNeXt. + + Args: + num_classes: number of classes for Dense layer + classifier_activation: activation function for the Dense layer + name: name prefix + + Returns: + Classification head function. + """ + if name is None: + name = str(backend.get_uid("head")) + + def apply(x): + x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) + x = layers.LayerNormalization( + epsilon=1e-6, name=name + "_head_layernorm" + )(x) + x = layers.Dense( + num_classes, + activation=classifier_activation, + name=name + "_head_dense", + )(x) + return x + + return apply + + +def ConvNeXt( + depths, + projection_dims, + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=224, + model_name="convnext", + include_preprocessing=True, + include_top=True, + weights=None, + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates ConvNeXt architecture given specific configuration. + + Args: + depths: An iterable containing depths for each individual stages. + projection_dims: An iterable containing output number of channels of + each individual stages. + drop_path_rate: Stochastic depth probability. If 0.0, then stochastic + depth won't be used. + layer_scale_init_value: Layer scale coefficient. If 0.0, layer scaling + won't be used. + default_size: Default input image size. + model_name: An optional name for the model. + include_preprocessing: boolean denoting whther to include preprocessing in + the model. When `weights="imagenet"` this should be always set to True. + But for other models (e.g., randomly initialized) users should set it + to False and apply preprocessing to data accordingly. + include_top: Boolean denoting whether to include classification head to + the model. + weights: one of `None` (random initialization), `"imagenet"` (pre-training + on ImageNet-1k), or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to + use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is False. It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor output + of the last convolutional layer. + - `avg` means that global average pooling will be applied to the output + of the last convolutional layer, and thus the output of the model will + be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is True, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + + Returns: + A `keras.Model` instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + ValueError: if `classifier_activation` is not `softmax`, or `None` + when using a pretrained top layer. + ValueError: if `include_top` is True but `num_classes` is not 1000 + when using ImageNet. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + "If using `weights` as `'imagenet'` with `include_top`" + " as true, `classes` should be 1000" + ) + + # Determine proper input shape. + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) - x = layers.Conv2D( - filters=projection_dim, kernel_size=7, padding="same", - groups=projection_dim, name=name + "_depthwise_conv")(x) - x = layers.LayerNormalization(epsilon=1e-6, name=name + "_layernorm")(x) - x = layers.Dense(4 * projection_dim, name=name + "_pointwise_conv_1")(x) - x = layers.Activation("gelu", name=name + "_gelu")(x) - x = layers.Dense(projection_dim, name=name + "_pointwise_conv_2")(x) - - if layer_scale_init_value is not None: - x = LayerScale(layer_scale_init_value, projection_dim, - name=name + "_layer_scale")(x) - if drop_path_rate: - layer = StochasticDepth(drop_path_rate, name=name + "_stochastic_depth") + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - layer = layers.Activation("linear", name=name + "_identity") - - return inputs + layer(x) - return apply + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + if input_tensor is not None: + inputs = utils.layer_utils.get_source_inputs(input_tensor)[0] + else: + inputs = img_input -def PreStem(name=None): - """Normalizes inputs with ImageNet-1k mean and std. + x = inputs + if include_preprocessing: + channel_axis = ( + 3 if backend.image_data_format() == "channels_last" else 1 + ) + num_channels = input_shape[channel_axis - 1] + if num_channels == 3: + x = PreStem(name=model_name)(x) + + # Stem block. + stem = sequential.Sequential( + [ + layers.Conv2D( + projection_dims[0], + kernel_size=4, + strides=4, + name=model_name + "_stem_conv", + ), + layers.LayerNormalization( + epsilon=1e-6, name=model_name + "_stem_layernorm" + ), + ], + name=model_name + "_stem", + ) - Args: - name (str): Name prefix. + # Downsampling blocks. + downsample_layers = [] + downsample_layers.append(stem) + + num_downsample_layers = 3 + for i in range(num_downsample_layers): + downsample_layer = sequential.Sequential( + [ + layers.LayerNormalization( + epsilon=1e-6, + name=model_name + "_downsampling_layernorm_" + str(i), + ), + layers.Conv2D( + projection_dims[i + 1], + kernel_size=2, + strides=2, + name=model_name + "_downsampling_conv_" + str(i), + ), + ], + name=model_name + "_downsampling_block_" + str(i), + ) + downsample_layers.append(downsample_layer) + + # Stochastic depth schedule. + # This is referred from the original ConvNeXt codebase: + # https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py#L86 + depth_drop_rates = [ + float(x) for x in np.linspace(0.0, drop_path_rate, sum(depths)) + ] + + # First apply downsampling blocks and then apply ConvNeXt stages. + cur = 0 + + num_convnext_blocks = 4 + for i in range(num_convnext_blocks): + x = downsample_layers[i](x) + for j in range(depths[i]): + x = ConvNeXtBlock( + projection_dim=projection_dims[i], + drop_path_rate=depth_drop_rates[cur + j], + layer_scale_init_value=layer_scale_init_value, + name=model_name + f"_stage_{i}_block_{j}", + )(x) + cur += depths[i] - Returns: - A presemt function. - """ - if name is None: - name = "prestem" + str(backend.get_uid("prestem")) - - def apply(x): - x = layers.Normalization( - mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], - variance=[(0.229 * 255) ** 2, (0.224 * 255) ** 2, (0.225 * 255) ** 2], - name=name + "_prestem_normalization" - )(x) - return x + if include_top: + imagenet_utils.validate_activation(classifier_activation, weights) + x = Head( + num_classes=classes, + classifier_activation=classifier_activation, + name=model_name, + )(x) - return apply + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + x = layers.LayerNormalization(epsilon=1e-6)(x) + + model = training_lib.Model(inputs=inputs, outputs=x, name=model_name) + + # Load weights. + if weights == "imagenet": + if include_top: + file_suffix = ".h5" + file_hash = WEIGHTS_HASHES[model_name][0] + else: + file_suffix = "_notop.h5" + file_hash = WEIGHTS_HASHES[model_name][1] + file_name = model_name + file_suffix + weights_path = utils.data_utils.get_file( + file_name, + BASE_WEIGHTS_PATH + file_name, + cache_subdir="models", + file_hash=file_hash, + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model -def Head(num_classes=1000, name=None): - """Implementation of classification head of RegNet. +## Instantiating variants ## - Args: - num_classes: number of classes for Dense layer - name: name prefix - Returns: - Classification head function. - """ - if name is None: - name = str(backend.get_uid("head")) - - def apply(x): - x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) - x = layers.LayerNormalization( - epsilon=1e-6, name=name + "_head_layernorm")(x) - x = layers.Dense(num_classes, name=name + "_head_dense")(x) - return x +@keras_export( + "keras.applications.convnext.ConvNeXtTiny", + "keras.applications.ConvNeXtTiny", +) +def ConvNeXtTiny( + model_name="convnext_tiny", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return ConvNeXt( + depths=MODEL_CONFIGS["tiny"]["depths"], + projection_dims=MODEL_CONFIGS["tiny"]["projection_dims"], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=MODEL_CONFIGS["tiny"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) - return apply - - -def ConvNeXt(depths, - projection_dims, - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - default_size=224, - model_name="convnext", - include_preprocessing=True, - include_top=True, - weights=None, - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - """Instantiates ConvNeXt architecture given specific configuration. - Args: - depths: An iterable containing depths for each individual stages. - projection_dims: An iterable containing output number of channels of - each individual stages. - drop_path_rate: Stochastic depth probability. If 0.0, then stochastic depth - won't be used. - layer_scale_init_value: Layer scale coefficient. If 0.0, layer scaling won't - be used. - default_size: Default input image size. - model_name: An optional name for the model. - include_preprocessing: boolean denoting whther to include preprocessing in - the model. When `weights="imagenet"` this should be always set to True. - But for other models (e.g., randomly initialized) users should set it - to False and apply preprocessing to data accordingly. - include_top: Boolean denoting whether to include classification head to the - model. - weights: one of `None` (random initialization), `"imagenet"` (pre-training - on ImageNet-1k), or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use - as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is - False. It should have exactly 3 inputs channels. - pooling: optional pooling mode for feature extraction when `include_top` is - `False`. - `None` means that the output of the model will be the 4D tensor - output of the last convolutional layer. - `avg` means that global average - pooling will be applied to the output of the last convolutional layer, and - thus the output of the model will be a 2D tensor. - `max` means that - global max pooling will be applied. - classes: optional number of classes to classify images into, only to be - specified if `include_top` is True, and if no `weights` argument is - specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. +@keras_export( + "keras.applications.convnext.ConvNeXtSmall", + "keras.applications.ConvNeXtSmall", +) +def ConvNeXtSmall( + model_name="convnext_small", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return ConvNeXt( + depths=MODEL_CONFIGS["small"]["depths"], + projection_dims=MODEL_CONFIGS["small"]["projection_dims"], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=MODEL_CONFIGS["small"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) - Returns: - A `keras.Model` instance. - Raises: - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - ValueError: if `classifier_activation` is not `softmax`, or `None` - when using a pretrained top layer. - ValueError: if `include_top` is True but `num_classes` is not 1000 - when using ImageNet. - """ - if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): - raise ValueError("The `weights` argument should be either " - "`None` (random initialization), `imagenet` " - "(pre-training on ImageNet), " - "or the path to the weights file to be loaded.") - - if weights == "imagenet" and include_top and classes != 1000: - raise ValueError("If using `weights` as `'imagenet'` with `include_top`" - " as true, `classes` should be 1000") - - # Determine proper input shape. - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if input_tensor is not None: - inputs = utils.layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - x = inputs - if include_preprocessing: - channel_axis = 3 if backend.image_data_format() == "channels_last" else 1 - num_channels = input_shape[channel_axis - 1] - if num_channels == 3: - x = PreStem(name=model_name)(x) - - # Stem block. - stem = sequential.Sequential( - [ - layers.Conv2D(projection_dims[0], kernel_size=4, strides=4, - name=model_name + "_stem_conv"), - layers.LayerNormalization( - epsilon=1e-6, - name=model_name + "_stem_layernorm" - ), - ], - name=model_name + "_stem", - ) - - # Downsampling blocks. - downsample_layers = [] - downsample_layers.append(stem) - - num_downsample_layers = 3 - for i in range(num_downsample_layers): - downsample_layer = sequential.Sequential( - [ - layers.LayerNormalization(epsilon=1e-6, - name=model_name + "_downsampling_layernorm_" + str(i)), - layers.Conv2D(projection_dims[i + 1], kernel_size=2, strides=2, - name=model_name + "_downsampling_conv_" + str(i)), - ], - name=model_name + "_downsampling_block_" + str(i), +@keras_export( + "keras.applications.convnext.ConvNeXtBase", + "keras.applications.ConvNeXtBase", +) +def ConvNeXtBase( + model_name="convnext_base", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return ConvNeXt( + depths=MODEL_CONFIGS["base"]["depths"], + projection_dims=MODEL_CONFIGS["base"]["projection_dims"], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=MODEL_CONFIGS["base"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, ) - downsample_layers.append(downsample_layer) - - # Stochastic depth schedule. - # This is referred from the original ConvNeXt codebase: - # https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py#L86 - depth_drop_rates = [ - float(x) for x in np.linspace(0.0, drop_path_rate, sum(depths)) - ] - - # First apply downsampling blocks and then apply ConvNeXt stages. - cur = 0 - - num_convnext_blocks = 4 - for i in range(num_convnext_blocks): - x = downsample_layers[i](x) - for j in range(depths[i]): - x = ConvNeXtBlock( - projection_dim=projection_dims[i], - drop_path_rate=depth_drop_rates[cur + j], - layer_scale_init_value=layer_scale_init_value, - name=model_name + f"_stage_{i}_block_{j}", - )(x) - cur += depths[i] - - if include_top: - x = Head(num_classes=classes, name=model_name)(x) - imagenet_utils.validate_activation(classifier_activation, weights) - - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D()(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D()(x) - x = layers.LayerNormalization(epsilon=1e-6)(x) - - model = training_lib.Model(inputs=inputs, outputs=x, name=model_name) - - # Load weights. - if weights == "imagenet": - if include_top: - file_suffix = ".h5" - file_hash = WEIGHTS_HASHES[model_name][0] - else: - file_suffix = "_notop.h5" - file_hash = WEIGHTS_HASHES[model_name][1] - file_name = model_name + file_suffix - weights_path = utils.data_utils.get_file( - file_name, - BASE_WEIGHTS_PATH + file_name, - cache_subdir="models", - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - return model +@keras_export( + "keras.applications.convnext.ConvNeXtLarge", + "keras.applications.ConvNeXtLarge", +) +def ConvNeXtLarge( + model_name="convnext_large", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return ConvNeXt( + depths=MODEL_CONFIGS["large"]["depths"], + projection_dims=MODEL_CONFIGS["large"]["projection_dims"], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=MODEL_CONFIGS["large"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) -## Instantiating variants ## -@keras_export("keras.applications.convnext.ConvNeXtTiny", - "keras.applications.ConvNeXtTiny") -def ConvNeXtTiny(model_name="convnext_tiny", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return ConvNeXt( - depths=MODEL_CONFIGS["tiny"]["depths"], - projection_dims=MODEL_CONFIGS["tiny"]["projection_dims"], - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - default_size=MODEL_CONFIGS["tiny"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.convnext.ConvNeXtSmall", - "keras.applications.ConvNeXtSmall") -def ConvNeXtSmall(model_name="convnext_small", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return ConvNeXt( - depths=MODEL_CONFIGS["small"]["depths"], - projection_dims=MODEL_CONFIGS["small"]["projection_dims"], - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - default_size=MODEL_CONFIGS["small"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.convnext.ConvNeXtBase", - "keras.applications.ConvNeXtBase") -def ConvNeXtBase(model_name="convnext_base", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return ConvNeXt( - depths=MODEL_CONFIGS["base"]["depths"], - projection_dims=MODEL_CONFIGS["base"]["projection_dims"], - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - default_size=MODEL_CONFIGS["base"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.convnext.ConvNeXtLarge", - "keras.applications.ConvNeXtLarge") -def ConvNeXtLarge(model_name="convnext_large", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return ConvNeXt( - depths=MODEL_CONFIGS["large"]["depths"], - projection_dims=MODEL_CONFIGS["large"]["projection_dims"], - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - default_size=MODEL_CONFIGS["large"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.convnext.ConvNeXtXLarge", - "keras.applications.ConvNeXtXLarge") -def ConvNeXtXLarge(model_name="convnext_xlarge", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return ConvNeXt( - depths=MODEL_CONFIGS["xlarge"]["depths"], - projection_dims=MODEL_CONFIGS["xlarge"]["projection_dims"], - drop_path_rate=0.0, - layer_scale_init_value=1e-6, - default_size=MODEL_CONFIGS["xlarge"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) +@keras_export( + "keras.applications.convnext.ConvNeXtXLarge", + "keras.applications.ConvNeXtXLarge", +) +def ConvNeXtXLarge( + model_name="convnext_xlarge", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return ConvNeXt( + depths=MODEL_CONFIGS["xlarge"]["depths"], + projection_dims=MODEL_CONFIGS["xlarge"]["projection_dims"], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=MODEL_CONFIGS["xlarge"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) ConvNeXtTiny.__doc__ = BASE_DOCSTRING.format(name="ConvNeXtTiny") @@ -654,30 +745,30 @@ def ConvNeXtXLarge(model_name="convnext_xlarge", @keras_export("keras.applications.convnext.preprocess_input") -def preprocess_input(x, data_format=None): # pylint: disable=unused-argument - """A placeholder method for backward compatibility. - - The preprocessing logic has been included in the efficientnet model - implementation. Users are no longer required to call this method to normalize - the input data. This method does nothing and only kept as a placeholder to - align the API surface between old and new version of model. - - Args: - x: A floating point `numpy.array` or a `tf.Tensor`. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, it - defaults to "channels_last").{mode} - - Returns: - Unchanged `numpy.array` or `tf.Tensor`. - """ - return x +def preprocess_input(x, data_format=None): + """A placeholder method for backward compatibility. + + The preprocessing logic has been included in the convnext model + implementation. Users are no longer required to call this method to + normalize the input data. This method does nothing and only kept as a + placeholder to align the API surface between old and new version of model. + + Args: + x: A floating point `numpy.array` or a `tf.Tensor`. + data_format: Optional data format of the image tensor/array. `None` means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last"). + Defaults to `None`. + + Returns: + Unchanged `numpy.array` or `tf.Tensor`. + """ + return x @keras_export("keras.applications.convnext.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/densenet.py b/keras/applications/densenet.py index e32066036487..57372d6a123e 100644 --- a/keras/applications/densenet.py +++ b/keras/applications/densenet.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """DenseNet models for Keras. Reference: @@ -28,356 +28,412 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHTS_PATH = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/densenet/') +BASE_WEIGHTS_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/densenet/" +) DENSENET121_WEIGHT_PATH = ( - BASE_WEIGHTS_PATH + 'densenet121_weights_tf_dim_ordering_tf_kernels.h5') + BASE_WEIGHTS_PATH + "densenet121_weights_tf_dim_ordering_tf_kernels.h5" +) DENSENET121_WEIGHT_PATH_NO_TOP = ( - BASE_WEIGHTS_PATH + - 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5') + BASE_WEIGHTS_PATH + + "densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5" +) DENSENET169_WEIGHT_PATH = ( - BASE_WEIGHTS_PATH + 'densenet169_weights_tf_dim_ordering_tf_kernels.h5') + BASE_WEIGHTS_PATH + "densenet169_weights_tf_dim_ordering_tf_kernels.h5" +) DENSENET169_WEIGHT_PATH_NO_TOP = ( - BASE_WEIGHTS_PATH + - 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5') + BASE_WEIGHTS_PATH + + "densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5" +) DENSENET201_WEIGHT_PATH = ( - BASE_WEIGHTS_PATH + 'densenet201_weights_tf_dim_ordering_tf_kernels.h5') + BASE_WEIGHTS_PATH + "densenet201_weights_tf_dim_ordering_tf_kernels.h5" +) DENSENET201_WEIGHT_PATH_NO_TOP = ( - BASE_WEIGHTS_PATH + - 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5') + BASE_WEIGHTS_PATH + + "densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5" +) layers = VersionAwareLayers() def dense_block(x, blocks, name): - """A dense block. + """A dense block. - Args: - x: input tensor. - blocks: integer, the number of building blocks. - name: string, block label. + Args: + x: input tensor. + blocks: integer, the number of building blocks. + name: string, block label. - Returns: - Output tensor for the block. - """ - for i in range(blocks): - x = conv_block(x, 32, name=name + '_block' + str(i + 1)) - return x + Returns: + Output tensor for the block. + """ + for i in range(blocks): + x = conv_block(x, 32, name=name + "_block" + str(i + 1)) + return x def transition_block(x, reduction, name): - """A transition block. - - Args: - x: input tensor. - reduction: float, compression rate at transition layers. - name: string, block label. - - Returns: - output tensor for the block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')( - x) - x = layers.Activation('relu', name=name + '_relu')(x) - x = layers.Conv2D( - int(backend.int_shape(x)[bn_axis] * reduction), - 1, - use_bias=False, - name=name + '_conv')( - x) - x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x) - return x + """A transition block. + + Args: + x: input tensor. + reduction: float, compression rate at transition layers. + name: string, block label. + + Returns: + output tensor for the block. + """ + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_bn" + )(x) + x = layers.Activation("relu", name=name + "_relu")(x) + x = layers.Conv2D( + int(backend.int_shape(x)[bn_axis] * reduction), + 1, + use_bias=False, + name=name + "_conv", + )(x) + x = layers.AveragePooling2D(2, strides=2, name=name + "_pool")(x) + return x def conv_block(x, growth_rate, name): - """A building block for a dense block. - - Args: - x: input tensor. - growth_rate: float, growth rate at dense layers. - name: string, block label. - - Returns: - Output tensor for the block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - x1 = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')( - x) - x1 = layers.Activation('relu', name=name + '_0_relu')(x1) - x1 = layers.Conv2D( - 4 * growth_rate, 1, use_bias=False, name=name + '_1_conv')( - x1) - x1 = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')( - x1) - x1 = layers.Activation('relu', name=name + '_1_relu')(x1) - x1 = layers.Conv2D( - growth_rate, 3, padding='same', use_bias=False, name=name + '_2_conv')( - x1) - x = layers.Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) - return x + """A building block for a dense block. + + Args: + x: input tensor. + growth_rate: float, growth rate at dense layers. + name: string, block label. + + Returns: + Output tensor for the block. + """ + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + x1 = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_0_bn" + )(x) + x1 = layers.Activation("relu", name=name + "_0_relu")(x1) + x1 = layers.Conv2D( + 4 * growth_rate, 1, use_bias=False, name=name + "_1_conv" + )(x1) + x1 = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_1_bn" + )(x1) + x1 = layers.Activation("relu", name=name + "_1_relu")(x1) + x1 = layers.Conv2D( + growth_rate, 3, padding="same", use_bias=False, name=name + "_2_conv" + )(x1) + x = layers.Concatenate(axis=bn_axis, name=name + "_concat")([x, x1]) + return x def DenseNet( blocks, include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the DenseNet architecture. - - Reference: - - [Densely Connected Convolutional Networks]( - https://arxiv.org/abs/1608.06993) (CVPR 2017) - - This function returns a Keras image classification model, - optionally loaded with weights pre-trained on ImageNet. - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - Note: each Keras Application expects a specific kind of input preprocessing. - For DenseNet, call `tf.keras.applications.densenet.preprocess_input` on your - inputs before passing them to the model. - `densenet.preprocess_input` will scale pixels between 0 and 1 and then - will normalize each channel with respect to the ImageNet dataset statistics. + classifier_activation="softmax", +): + """Instantiates the DenseNet architecture. + + Reference: + - [Densely Connected Convolutional Networks]( + https://arxiv.org/abs/1608.06993) (CVPR 2017) + + This function returns a Keras image classification model, + optionally loaded with weights pre-trained on ImageNet. + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + Note: each Keras Application expects a specific kind of input preprocessing. + For DenseNet, call `tf.keras.applications.densenet.preprocess_input` on your + inputs before passing them to the model. + `densenet.preprocess_input` will scale pixels between 0 and 1 and then + will normalize each channel with respect to the ImageNet dataset statistics. + + Args: + blocks: numbers of building blocks for the four dense layers. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `'channels_last'` data format) + or `(3, 224, 224)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + + Returns: + A `keras.Model` instance. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top`' + " as true, `classes` should be 1000" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=224, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input) + x = layers.Conv2D(64, 7, strides=2, use_bias=False, name="conv1/conv")(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name="conv1/bn" + )(x) + x = layers.Activation("relu", name="conv1/relu")(x) + x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x) + x = layers.MaxPooling2D(3, strides=2, name="pool1")(x) + + x = dense_block(x, blocks[0], name="conv2") + x = transition_block(x, 0.5, name="pool2") + x = dense_block(x, blocks[1], name="conv3") + x = transition_block(x, 0.5, name="pool3") + x = dense_block(x, blocks[2], name="conv4") + x = transition_block(x, 0.5, name="pool4") + x = dense_block(x, blocks[3], name="conv5") + + x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name="bn")(x) + x = layers.Activation("relu", name="relu")(x) - Args: - blocks: numbers of building blocks for the four dense layers. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `'channels_last'` data format) - or `(3, 224, 224)` (with `'channels_first'` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. + if include_top: + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - Returns: - A `keras.Model` instance. - """ - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: - img_input = input_tensor - - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input) - x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')( - x) - x = layers.Activation('relu', name='conv1/relu')(x) - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x) - x = layers.MaxPooling2D(3, strides=2, name='pool1')(x) - - x = dense_block(x, blocks[0], name='conv2') - x = transition_block(x, 0.5, name='pool2') - x = dense_block(x, blocks[1], name='conv3') - x = transition_block(x, 0.5, name='pool3') - x = dense_block(x, blocks[2], name='conv4') - x = transition_block(x, 0.5, name='pool4') - x = dense_block(x, blocks[3], name='conv5') - - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='bn')(x) - x = layers.Activation('relu', name='relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D(name='max_pool')(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - if blocks == [6, 12, 24, 16]: - model = training.Model(inputs, x, name='densenet121') - elif blocks == [6, 12, 32, 32]: - model = training.Model(inputs, x, name='densenet169') - elif blocks == [6, 12, 48, 32]: - model = training.Model(inputs, x, name='densenet201') - else: - model = training.Model(inputs, x, name='densenet') - - # Load weights. - if weights == 'imagenet': - if include_top: - if blocks == [6, 12, 24, 16]: - weights_path = data_utils.get_file( - 'densenet121_weights_tf_dim_ordering_tf_kernels.h5', - DENSENET121_WEIGHT_PATH, - cache_subdir='models', - file_hash='9d60b8095a5708f2dcce2bca79d332c7') - elif blocks == [6, 12, 32, 32]: - weights_path = data_utils.get_file( - 'densenet169_weights_tf_dim_ordering_tf_kernels.h5', - DENSENET169_WEIGHT_PATH, - cache_subdir='models', - file_hash='d699b8f76981ab1b30698df4c175e90b') - elif blocks == [6, 12, 48, 32]: - weights_path = data_utils.get_file( - 'densenet201_weights_tf_dim_ordering_tf_kernels.h5', - DENSENET201_WEIGHT_PATH, - cache_subdir='models', - file_hash='1ceb130c1ea1b78c3bf6114dbdfd8807') + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) else: - if blocks == [6, 12, 24, 16]: - weights_path = data_utils.get_file( - 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5', - DENSENET121_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='30ee3e1110167f948a6b9946edeeb738') - elif blocks == [6, 12, 32, 32]: - weights_path = data_utils.get_file( - 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5', - DENSENET169_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='b8c4d4c20dd625c148057b9ff1c1176b') - elif blocks == [6, 12, 48, 32]: - weights_path = data_utils.get_file( - 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5', - DENSENET201_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='c13680b51ded0fb44dff2d8f86ac8bb1') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -@keras_export('keras.applications.densenet.DenseNet121', - 'keras.applications.DenseNet121') -def DenseNet121(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax'): - """Instantiates the Densenet121 architecture.""" - return DenseNet([6, 12, 24, 16], include_top, weights, input_tensor, - input_shape, pooling, classes, classifier_activation) - - -@keras_export('keras.applications.densenet.DenseNet169', - 'keras.applications.DenseNet169') -def DenseNet169(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax'): - """Instantiates the Densenet169 architecture.""" - return DenseNet([6, 12, 32, 32], include_top, weights, input_tensor, - input_shape, pooling, classes, classifier_activation) - - -@keras_export('keras.applications.densenet.DenseNet201', - 'keras.applications.DenseNet201') -def DenseNet201(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax'): - """Instantiates the Densenet201 architecture.""" - return DenseNet([6, 12, 48, 32], include_top, weights, input_tensor, - input_shape, pooling, classes, classifier_activation) - - -@keras_export('keras.applications.densenet.preprocess_input') + inputs = img_input + + # Create model. + if blocks == [6, 12, 24, 16]: + model = training.Model(inputs, x, name="densenet121") + elif blocks == [6, 12, 32, 32]: + model = training.Model(inputs, x, name="densenet169") + elif blocks == [6, 12, 48, 32]: + model = training.Model(inputs, x, name="densenet201") + else: + model = training.Model(inputs, x, name="densenet") + + # Load weights. + if weights == "imagenet": + if include_top: + if blocks == [6, 12, 24, 16]: + weights_path = data_utils.get_file( + "densenet121_weights_tf_dim_ordering_tf_kernels.h5", + DENSENET121_WEIGHT_PATH, + cache_subdir="models", + file_hash="9d60b8095a5708f2dcce2bca79d332c7", + ) + elif blocks == [6, 12, 32, 32]: + weights_path = data_utils.get_file( + "densenet169_weights_tf_dim_ordering_tf_kernels.h5", + DENSENET169_WEIGHT_PATH, + cache_subdir="models", + file_hash="d699b8f76981ab1b30698df4c175e90b", + ) + elif blocks == [6, 12, 48, 32]: + weights_path = data_utils.get_file( + "densenet201_weights_tf_dim_ordering_tf_kernels.h5", + DENSENET201_WEIGHT_PATH, + cache_subdir="models", + file_hash="1ceb130c1ea1b78c3bf6114dbdfd8807", + ) + else: + if blocks == [6, 12, 24, 16]: + weights_path = data_utils.get_file( + "densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5", + DENSENET121_WEIGHT_PATH_NO_TOP, + cache_subdir="models", + file_hash="30ee3e1110167f948a6b9946edeeb738", + ) + elif blocks == [6, 12, 32, 32]: + weights_path = data_utils.get_file( + "densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5", + DENSENET169_WEIGHT_PATH_NO_TOP, + cache_subdir="models", + file_hash="b8c4d4c20dd625c148057b9ff1c1176b", + ) + elif blocks == [6, 12, 48, 32]: + weights_path = data_utils.get_file( + "densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5", + DENSENET201_WEIGHT_PATH_NO_TOP, + cache_subdir="models", + file_hash="c13680b51ded0fb44dff2d8f86ac8bb1", + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +@keras_export( + "keras.applications.densenet.DenseNet121", "keras.applications.DenseNet121" +) +def DenseNet121( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates the Densenet121 architecture.""" + return DenseNet( + [6, 12, 24, 16], + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation, + ) + + +@keras_export( + "keras.applications.densenet.DenseNet169", "keras.applications.DenseNet169" +) +def DenseNet169( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates the Densenet169 architecture.""" + return DenseNet( + [6, 12, 32, 32], + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation, + ) + + +@keras_export( + "keras.applications.densenet.DenseNet201", "keras.applications.DenseNet201" +) +def DenseNet201( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates the Densenet201 architecture.""" + return DenseNet( + [6, 12, 48, 32], + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation, + ) + + +@keras_export("keras.applications.densenet.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input( - x, data_format=data_format, mode='torch') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="torch" + ) -@keras_export('keras.applications.densenet.decode_predictions') +@keras_export("keras.applications.densenet.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TORCH, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ DOC = """ @@ -433,6 +489,6 @@ def decode_predictions(preds, top=5): A Keras model instance. """ -setattr(DenseNet121, '__doc__', DenseNet121.__doc__ + DOC) -setattr(DenseNet169, '__doc__', DenseNet169.__doc__ + DOC) -setattr(DenseNet201, '__doc__', DenseNet201.__doc__ + DOC) +setattr(DenseNet121, "__doc__", DenseNet121.__doc__ + DOC) +setattr(DenseNet169, "__doc__", DenseNet169.__doc__ + DOC) +setattr(DenseNet201, "__doc__", DenseNet201.__doc__ + DOC) diff --git a/keras/applications/efficientnet.py b/keras/applications/efficientnet.py index f615ff278761..a7d9639eb5f5 100644 --- a/keras/applications/efficientnet.py +++ b/keras/applications/efficientnet.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=missing-docstring + + """EfficientNet models for Keras. Reference: @@ -24,6 +24,8 @@ import copy import math +import tensorflow.compat.v2 as tf + from keras import backend from keras.applications import imagenet_utils from keras.engine import training @@ -31,113 +33,135 @@ from keras.utils import data_utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf - +# isort: off from tensorflow.python.util.tf_export import keras_export - -BASE_WEIGHTS_PATH = 'https://storage.googleapis.com/keras-applications/' +BASE_WEIGHTS_PATH = "https://storage.googleapis.com/keras-applications/" WEIGHTS_HASHES = { - 'b0': ('902e53a9f72be733fc0bcb005b3ebbac', - '50bc09e76180e00e4465e1a485ddc09d'), - 'b1': ('1d254153d4ab51201f1646940f018540', - '74c4e6b3e1f6a1eea24c589628592432'), - 'b2': ('b15cce36ff4dcbd00b6dd88e7857a6ad', - '111f8e2ac8aa800a7a99e3239f7bfb39'), - 'b3': ('ffd1fdc53d0ce67064dc6a9c7960ede0', - 'af6d107764bb5b1abb91932881670226'), - 'b4': ('18c95ad55216b8f92d7e70b3a046e2fc', - 'ebc24e6d6c33eaebbd558eafbeedf1ba'), - 'b5': ('ace28f2a6363774853a83a0b21b9421a', - '38879255a25d3c92d5e44e04ae6cec6f'), - 'b6': ('165f6e37dce68623721b423839de8be5', - '9ecce42647a20130c1f39a5d4cb75743'), - 'b7': ('8c03f828fec3ef71311cd463b6759d99', - 'cbcfe4450ddf6f3ad90b1b398090fe4a'), + "b0": ( + "902e53a9f72be733fc0bcb005b3ebbac", + "50bc09e76180e00e4465e1a485ddc09d", + ), + "b1": ( + "1d254153d4ab51201f1646940f018540", + "74c4e6b3e1f6a1eea24c589628592432", + ), + "b2": ( + "b15cce36ff4dcbd00b6dd88e7857a6ad", + "111f8e2ac8aa800a7a99e3239f7bfb39", + ), + "b3": ( + "ffd1fdc53d0ce67064dc6a9c7960ede0", + "af6d107764bb5b1abb91932881670226", + ), + "b4": ( + "18c95ad55216b8f92d7e70b3a046e2fc", + "ebc24e6d6c33eaebbd558eafbeedf1ba", + ), + "b5": ( + "ace28f2a6363774853a83a0b21b9421a", + "38879255a25d3c92d5e44e04ae6cec6f", + ), + "b6": ( + "165f6e37dce68623721b423839de8be5", + "9ecce42647a20130c1f39a5d4cb75743", + ), + "b7": ( + "8c03f828fec3ef71311cd463b6759d99", + "cbcfe4450ddf6f3ad90b1b398090fe4a", + ), } -DEFAULT_BLOCKS_ARGS = [{ - 'kernel_size': 3, - 'repeats': 1, - 'filters_in': 32, - 'filters_out': 16, - 'expand_ratio': 1, - 'id_skip': True, - 'strides': 1, - 'se_ratio': 0.25 -}, { - 'kernel_size': 3, - 'repeats': 2, - 'filters_in': 16, - 'filters_out': 24, - 'expand_ratio': 6, - 'id_skip': True, - 'strides': 2, - 'se_ratio': 0.25 -}, { - 'kernel_size': 5, - 'repeats': 2, - 'filters_in': 24, - 'filters_out': 40, - 'expand_ratio': 6, - 'id_skip': True, - 'strides': 2, - 'se_ratio': 0.25 -}, { - 'kernel_size': 3, - 'repeats': 3, - 'filters_in': 40, - 'filters_out': 80, - 'expand_ratio': 6, - 'id_skip': True, - 'strides': 2, - 'se_ratio': 0.25 -}, { - 'kernel_size': 5, - 'repeats': 3, - 'filters_in': 80, - 'filters_out': 112, - 'expand_ratio': 6, - 'id_skip': True, - 'strides': 1, - 'se_ratio': 0.25 -}, { - 'kernel_size': 5, - 'repeats': 4, - 'filters_in': 112, - 'filters_out': 192, - 'expand_ratio': 6, - 'id_skip': True, - 'strides': 2, - 'se_ratio': 0.25 -}, { - 'kernel_size': 3, - 'repeats': 1, - 'filters_in': 192, - 'filters_out': 320, - 'expand_ratio': 6, - 'id_skip': True, - 'strides': 1, - 'se_ratio': 0.25 -}] +DEFAULT_BLOCKS_ARGS = [ + { + "kernel_size": 3, + "repeats": 1, + "filters_in": 32, + "filters_out": 16, + "expand_ratio": 1, + "id_skip": True, + "strides": 1, + "se_ratio": 0.25, + }, + { + "kernel_size": 3, + "repeats": 2, + "filters_in": 16, + "filters_out": 24, + "expand_ratio": 6, + "id_skip": True, + "strides": 2, + "se_ratio": 0.25, + }, + { + "kernel_size": 5, + "repeats": 2, + "filters_in": 24, + "filters_out": 40, + "expand_ratio": 6, + "id_skip": True, + "strides": 2, + "se_ratio": 0.25, + }, + { + "kernel_size": 3, + "repeats": 3, + "filters_in": 40, + "filters_out": 80, + "expand_ratio": 6, + "id_skip": True, + "strides": 2, + "se_ratio": 0.25, + }, + { + "kernel_size": 5, + "repeats": 3, + "filters_in": 80, + "filters_out": 112, + "expand_ratio": 6, + "id_skip": True, + "strides": 1, + "se_ratio": 0.25, + }, + { + "kernel_size": 5, + "repeats": 4, + "filters_in": 112, + "filters_out": 192, + "expand_ratio": 6, + "id_skip": True, + "strides": 2, + "se_ratio": 0.25, + }, + { + "kernel_size": 3, + "repeats": 1, + "filters_in": 192, + "filters_out": 320, + "expand_ratio": 6, + "id_skip": True, + "strides": 1, + "se_ratio": 0.25, + }, +] CONV_KERNEL_INITIALIZER = { - 'class_name': 'VarianceScaling', - 'config': { - 'scale': 2.0, - 'mode': 'fan_out', - 'distribution': 'truncated_normal' - } + "class_name": "VarianceScaling", + "config": { + "scale": 2.0, + "mode": "fan_out", + "distribution": "truncated_normal", + }, } DENSE_KERNEL_INITIALIZER = { - 'class_name': 'VarianceScaling', - 'config': { - 'scale': 1. / 3., - 'mode': 'fan_out', - 'distribution': 'uniform' - } + "class_name": "VarianceScaling", + "config": { + "scale": 1.0 / 3.0, + "mode": "fan_out", + "distribution": "uniform", + }, } layers = VersionAwareLayers() @@ -168,7 +192,7 @@ Args: include_top: Whether to include the fully-connected - layer at the top of the network. Defaults to True. + layer at the top of the network. Defaults to `True`. weights: One of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. Defaults to 'imagenet'. @@ -179,7 +203,7 @@ if `include_top` is False. It should have exactly 3 inputs channels. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. Defaults to None. + when `include_top` is `False`. Defaults to `None`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. @@ -191,8 +215,8 @@ be applied. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. Defaults to 1000 (number of - ImageNet classes). + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. @@ -215,569 +239,633 @@ def EfficientNet( dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, - activation='swish', - blocks_args='default', - model_name='efficientnet', + activation="swish", + blocks_args="default", + model_name="efficientnet", include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the EfficientNet architecture using given scaling coefficients. - - Args: - width_coefficient: float, scaling coefficient for network width. - depth_coefficient: float, scaling coefficient for network depth. - default_size: integer, default input image size. - dropout_rate: float, dropout rate before final classifier layer. - drop_connect_rate: float, dropout rate at skip connections. - depth_divisor: integer, a unit of network width. - activation: activation function. - blocks_args: list of dicts, parameters to construct block modules. - model_name: string, model name. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False. - It should have exactly 3 inputs channels. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - - Returns: - A `keras.Model` instance. - - Raises: - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - ValueError: if `classifier_activation` is not `softmax` or `None` when - using a pretrained top layer. - """ - if blocks_args == 'default': - blocks_args = DEFAULT_BLOCKS_ARGS - - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - def round_filters(filters, divisor=depth_divisor): - """Round number of filters based on depth multiplier.""" - filters *= width_coefficient - new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_filters < 0.9 * filters: - new_filters += divisor - return int(new_filters) - - def round_repeats(repeats): - """Round number of repeats based on depth multiplier.""" - return int(math.ceil(depth_coefficient * repeats)) - - # Build stem - x = img_input - x = layers.Rescaling(1. / 255.)(x) - x = layers.Normalization(axis=bn_axis)(x) - if weights == 'imagenet': - # Note that the normaliztion layer uses square value of STDDEV as the - # variance for the layer: result = (input - mean) / sqrt(var) - # However, the orginal implemenetation uses (input - mean) / var to - # normalize the input, we need to divide another sqrt(var) to match the - # original implementation. - # See https://github.com/tensorflow/tensorflow/issues/49930 for more details - x = layers.Rescaling(1. / tf.math.sqrt(IMAGENET_STDDEV_RGB))(x) - - x = layers.ZeroPadding2D( - padding=imagenet_utils.correct_pad(x, 3), - name='stem_conv_pad')(x) - x = layers.Conv2D( - round_filters(32), - 3, - strides=2, - padding='valid', - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name='stem_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x) - x = layers.Activation(activation, name='stem_activation')(x) - - # Build blocks - blocks_args = copy.deepcopy(blocks_args) - - b = 0 - blocks = float(sum(round_repeats(args['repeats']) for args in blocks_args)) - for (i, args) in enumerate(blocks_args): - assert args['repeats'] > 0 - # Update block input and output filters based on depth multiplier. - args['filters_in'] = round_filters(args['filters_in']) - args['filters_out'] = round_filters(args['filters_out']) - - for j in range(round_repeats(args.pop('repeats'))): - # The first block needs to take care of stride and filter size increase. - if j > 0: - args['strides'] = 1 - args['filters_in'] = args['filters_out'] - x = block( - x, - activation, - drop_connect_rate * b / blocks, - name='block{}{}_'.format(i + 1, chr(j + 97)), - **args) - b += 1 - - # Build top - x = layers.Conv2D( - round_filters(1280), - 1, - padding='same', - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name='top_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x) - x = layers.Activation(activation, name='top_activation')(x) - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - if dropout_rate > 0: - x = layers.Dropout(dropout_rate, name='top_dropout')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense( - classes, - activation=classifier_activation, - kernel_initializer=DENSE_KERNEL_INITIALIZER, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D(name='max_pool')(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name=model_name) - - # Load weights. - if weights == 'imagenet': - if include_top: - file_suffix = '.h5' - file_hash = WEIGHTS_HASHES[model_name[-2:]][0] - else: - file_suffix = '_notop.h5' - file_hash = WEIGHTS_HASHES[model_name[-2:]][1] - file_name = model_name + file_suffix - weights_path = data_utils.get_file( - file_name, - BASE_WEIGHTS_PATH + file_name, - cache_subdir='models', - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - return model - - -def block(inputs, - activation='swish', - drop_rate=0., - name='', - filters_in=32, - filters_out=16, - kernel_size=3, - strides=1, - expand_ratio=1, - se_ratio=0., - id_skip=True): - """An inverted residual block. - - Args: - inputs: input tensor. + classifier_activation="softmax", +): + """Instantiates the EfficientNet architecture. + + Args: + width_coefficient: float, scaling coefficient for network width. + depth_coefficient: float, scaling coefficient for network depth. + default_size: integer, default input image size. + dropout_rate: float, dropout rate before final classifier layer. + drop_connect_rate: float, dropout rate at skip connections. + depth_divisor: integer, a unit of network width. activation: activation function. - drop_rate: float between 0 and 1, fraction of the input units to drop. - name: string, block label. - filters_in: integer, the number of input filters. - filters_out: integer, the number of output filters. - kernel_size: integer, the dimension of the convolution window. - strides: integer, the stride of the convolution. - expand_ratio: integer, scaling coefficient for the input filters. - se_ratio: float between 0 and 1, fraction to squeeze the input filters. - id_skip: boolean. - - Returns: - output tensor for the block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + blocks_args: list of dicts, parameters to construct block modules. + model_name: string, model name. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + + Returns: + A `keras.Model` instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + ValueError: if `classifier_activation` is not `softmax` or `None` when + using a pretrained top layer. + """ + if blocks_args == "default": + blocks_args = DEFAULT_BLOCKS_ARGS + + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top`' + " as true, `classes` should be 1000" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + def round_filters(filters, divisor=depth_divisor): + """Round number of filters based on depth multiplier.""" + filters *= width_coefficient + new_filters = max( + divisor, int(filters + divisor / 2) // divisor * divisor + ) + # Make sure that round down does not go down by more than 10%. + if new_filters < 0.9 * filters: + new_filters += divisor + return int(new_filters) + + def round_repeats(repeats): + """Round number of repeats based on depth multiplier.""" + return int(math.ceil(depth_coefficient * repeats)) + + # Build stem + x = img_input + x = layers.Rescaling(1.0 / 255.0)(x) + x = layers.Normalization(axis=bn_axis)(x) + if weights == "imagenet": + # Note that the normaliztion layer uses square value of STDDEV as the + # variance for the layer: result = (input - mean) / sqrt(var) + # However, the original implemenetation uses (input - mean) / var to + # normalize the input, we need to divide another sqrt(var) to match the + # original implementation. + # See https://github.com/tensorflow/tensorflow/issues/49930 for more + # details + x = layers.Rescaling( + [1.0 / math.sqrt(stddev) for stddev in IMAGENET_STDDEV_RGB] + )(x) - # Expansion phase - filters = filters_in * expand_ratio - if expand_ratio != 1: + x = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(x, 3), name="stem_conv_pad" + )(x) x = layers.Conv2D( - filters, - 1, - padding='same', + round_filters(32), + 3, + strides=2, + padding="valid", use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + 'expand_conv')( - inputs) - x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x) - x = layers.Activation(activation, name=name + 'expand_activation')(x) - else: - x = inputs - - # Depthwise Convolution - if strides == 2: - x = layers.ZeroPadding2D( - padding=imagenet_utils.correct_pad(x, kernel_size), - name=name + 'dwconv_pad')(x) - conv_pad = 'valid' - else: - conv_pad = 'same' - x = layers.DepthwiseConv2D( - kernel_size, - strides=strides, - padding=conv_pad, - use_bias=False, - depthwise_initializer=CONV_KERNEL_INITIALIZER, - name=name + 'dwconv')(x) - x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x) - x = layers.Activation(activation, name=name + 'activation')(x) - - # Squeeze and Excitation phase - if 0 < se_ratio <= 1: - filters_se = max(1, int(filters_in * se_ratio)) - se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x) - if bn_axis == 1: - se_shape = (filters, 1, 1) - else: - se_shape = (1, 1, filters) - se = layers.Reshape(se_shape, name=name + 'se_reshape')(se) - se = layers.Conv2D( - filters_se, + name="stem_conv", + )(x) + x = layers.BatchNormalization(axis=bn_axis, name="stem_bn")(x) + x = layers.Activation(activation, name="stem_activation")(x) + + # Build blocks + blocks_args = copy.deepcopy(blocks_args) + + b = 0 + blocks = float(sum(round_repeats(args["repeats"]) for args in blocks_args)) + for i, args in enumerate(blocks_args): + assert args["repeats"] > 0 + # Update block input and output filters based on depth multiplier. + args["filters_in"] = round_filters(args["filters_in"]) + args["filters_out"] = round_filters(args["filters_out"]) + + for j in range(round_repeats(args.pop("repeats"))): + # The first block needs to take care of stride and filter size + # increase. + if j > 0: + args["strides"] = 1 + args["filters_in"] = args["filters_out"] + x = block( + x, + activation, + drop_connect_rate * b / blocks, + name=f"block{i + 1}{chr(j + 97)}_", + **args, + ) + b += 1 + + # Build top + x = layers.Conv2D( + round_filters(1280), 1, - padding='same', - activation=activation, + padding="same", + use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + 'se_reduce')( - se) - se = layers.Conv2D( - filters, + name="top_conv", + )(x) + x = layers.BatchNormalization(axis=bn_axis, name="top_bn")(x) + x = layers.Activation(activation, name="top_activation")(x) + if include_top: + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + if dropout_rate > 0: + x = layers.Dropout(dropout_rate, name="top_dropout")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, + activation=classifier_activation, + kernel_initializer=DENSE_KERNEL_INITIALIZER, + name="predictions", + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name=model_name) + + # Load weights. + if weights == "imagenet": + if include_top: + file_suffix = ".h5" + file_hash = WEIGHTS_HASHES[model_name[-2:]][0] + else: + file_suffix = "_notop.h5" + file_hash = WEIGHTS_HASHES[model_name[-2:]][1] + file_name = model_name + file_suffix + weights_path = data_utils.get_file( + file_name, + BASE_WEIGHTS_PATH + file_name, + cache_subdir="models", + file_hash=file_hash, + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + return model + + +def block( + inputs, + activation="swish", + drop_rate=0.0, + name="", + filters_in=32, + filters_out=16, + kernel_size=3, + strides=1, + expand_ratio=1, + se_ratio=0.0, + id_skip=True, +): + """An inverted residual block. + + Args: + inputs: input tensor. + activation: activation function. + drop_rate: float between 0 and 1, fraction of the input units to drop. + name: string, block label. + filters_in: integer, the number of input filters. + filters_out: integer, the number of output filters. + kernel_size: integer, the dimension of the convolution window. + strides: integer, the stride of the convolution. + expand_ratio: integer, scaling coefficient for the input filters. + se_ratio: float between 0 and 1, fraction to squeeze the input filters. + id_skip: boolean. + + Returns: + output tensor for the block. + """ + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + # Expansion phase + filters = filters_in * expand_ratio + if expand_ratio != 1: + x = layers.Conv2D( + filters, + 1, + padding="same", + use_bias=False, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "expand_conv", + )(inputs) + x = layers.BatchNormalization(axis=bn_axis, name=name + "expand_bn")(x) + x = layers.Activation(activation, name=name + "expand_activation")(x) + else: + x = inputs + + # Depthwise Convolution + if strides == 2: + x = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(x, kernel_size), + name=name + "dwconv_pad", + )(x) + conv_pad = "valid" + else: + conv_pad = "same" + x = layers.DepthwiseConv2D( + kernel_size, + strides=strides, + padding=conv_pad, + use_bias=False, + depthwise_initializer=CONV_KERNEL_INITIALIZER, + name=name + "dwconv", + )(x) + x = layers.BatchNormalization(axis=bn_axis, name=name + "bn")(x) + x = layers.Activation(activation, name=name + "activation")(x) + + # Squeeze and Excitation phase + if 0 < se_ratio <= 1: + filters_se = max(1, int(filters_in * se_ratio)) + se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) + if bn_axis == 1: + se_shape = (filters, 1, 1) + else: + se_shape = (1, 1, filters) + se = layers.Reshape(se_shape, name=name + "se_reshape")(se) + se = layers.Conv2D( + filters_se, + 1, + padding="same", + activation=activation, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "se_reduce", + )(se) + se = layers.Conv2D( + filters, + 1, + padding="same", + activation="sigmoid", + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "se_expand", + )(se) + x = layers.multiply([x, se], name=name + "se_excite") + + # Output phase + x = layers.Conv2D( + filters_out, 1, - padding='same', - activation='sigmoid', + padding="same", + use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + 'se_expand')(se) - x = layers.multiply([x, se], name=name + 'se_excite') - - # Output phase - x = layers.Conv2D( - filters_out, - 1, - padding='same', - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + 'project_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x) - if id_skip and strides == 1 and filters_in == filters_out: - if drop_rate > 0: - x = layers.Dropout( - drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) - x = layers.add([x, inputs], name=name + 'add') - return x - - -@keras_export('keras.applications.efficientnet.EfficientNetB0', - 'keras.applications.EfficientNetB0') -def EfficientNetB0(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.0, - 1.0, - 224, - 0.2, - model_name='efficientnetb0', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB1', - 'keras.applications.EfficientNetB1') -def EfficientNetB1(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.0, - 1.1, - 240, - 0.2, - model_name='efficientnetb1', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB2', - 'keras.applications.EfficientNetB2') -def EfficientNetB2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.1, - 1.2, - 260, - 0.3, - model_name='efficientnetb2', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB3', - 'keras.applications.EfficientNetB3') -def EfficientNetB3(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.2, - 1.4, - 300, - 0.3, - model_name='efficientnetb3', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB4', - 'keras.applications.EfficientNetB4') -def EfficientNetB4(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.4, - 1.8, - 380, - 0.4, - model_name='efficientnetb4', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB5', - 'keras.applications.EfficientNetB5') -def EfficientNetB5(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.6, - 2.2, - 456, - 0.4, - model_name='efficientnetb5', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB6', - 'keras.applications.EfficientNetB6') -def EfficientNetB6(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 1.8, - 2.6, - 528, - 0.5, - model_name='efficientnetb6', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -@keras_export('keras.applications.efficientnet.EfficientNetB7', - 'keras.applications.EfficientNetB7') -def EfficientNetB7(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - return EfficientNet( - 2.0, - 3.1, - 600, - 0.5, - model_name='efficientnetb7', - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - **kwargs) - - -EfficientNetB0.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB0') -EfficientNetB1.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB1') -EfficientNetB2.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB2') -EfficientNetB3.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB3') -EfficientNetB4.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB4') -EfficientNetB5.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB5') -EfficientNetB6.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB6') -EfficientNetB7.__doc__ = BASE_DOCSTRING.format(name='EfficientNetB7') - - -@keras_export('keras.applications.efficientnet.preprocess_input') -def preprocess_input(x, data_format=None): # pylint: disable=unused-argument - """A placeholder method for backward compatibility. - - The preprocessing logic has been included in the efficientnet model - implementation. Users are no longer required to call this method to normalize - the input data. This method does nothing and only kept as a placeholder to - align the API surface between old and new version of model. - - Args: - x: A floating point `numpy.array` or a `tf.Tensor`. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, - it defaults to "channels_last").{mode} - - Returns: - Unchanged `numpy.array` or `tf.Tensor`. - """ - return x - - -@keras_export('keras.applications.efficientnet.decode_predictions') + name=name + "project_conv", + )(x) + x = layers.BatchNormalization(axis=bn_axis, name=name + "project_bn")(x) + if id_skip and strides == 1 and filters_in == filters_out: + if drop_rate > 0: + x = layers.Dropout( + drop_rate, noise_shape=(None, 1, 1, 1), name=name + "drop" + )(x) + x = layers.add([x, inputs], name=name + "add") + return x + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB0", + "keras.applications.EfficientNetB0", +) +def EfficientNetB0( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.0, + 1.0, + 224, + 0.2, + model_name="efficientnetb0", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB1", + "keras.applications.EfficientNetB1", +) +def EfficientNetB1( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.0, + 1.1, + 240, + 0.2, + model_name="efficientnetb1", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB2", + "keras.applications.EfficientNetB2", +) +def EfficientNetB2( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.1, + 1.2, + 260, + 0.3, + model_name="efficientnetb2", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB3", + "keras.applications.EfficientNetB3", +) +def EfficientNetB3( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.2, + 1.4, + 300, + 0.3, + model_name="efficientnetb3", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB4", + "keras.applications.EfficientNetB4", +) +def EfficientNetB4( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.4, + 1.8, + 380, + 0.4, + model_name="efficientnetb4", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB5", + "keras.applications.EfficientNetB5", +) +def EfficientNetB5( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.6, + 2.2, + 456, + 0.4, + model_name="efficientnetb5", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB6", + "keras.applications.EfficientNetB6", +) +def EfficientNetB6( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 1.8, + 2.6, + 528, + 0.5, + model_name="efficientnetb6", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +@keras_export( + "keras.applications.efficientnet.EfficientNetB7", + "keras.applications.EfficientNetB7", +) +def EfficientNetB7( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + return EfficientNet( + 2.0, + 3.1, + 600, + 0.5, + model_name="efficientnetb7", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + +EfficientNetB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB0") +EfficientNetB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB1") +EfficientNetB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB2") +EfficientNetB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB3") +EfficientNetB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB4") +EfficientNetB5.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB5") +EfficientNetB6.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB6") +EfficientNetB7.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB7") + + +@keras_export("keras.applications.efficientnet.preprocess_input") +def preprocess_input(x, data_format=None): + """A placeholder method for backward compatibility. + + The preprocessing logic has been included in the efficientnet model + implementation. Users are no longer required to call this method to + normalize the input data. This method does nothing and only kept as a + placeholder to align the API surface between old and new version of model. + + Args: + x: A floating point `numpy.array` or a `tf.Tensor`. + data_format: Optional data format of the image tensor/array. `None` means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last"). + Defaults to `None`. + + Returns: + Unchanged `numpy.array` or `tf.Tensor`. + """ + return x + + +@keras_export("keras.applications.efficientnet.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/efficientnet_v2.py b/keras/applications/efficientnet_v2.py index 783d6a848b9f..2d309e757568 100644 --- a/keras/applications/efficientnet_v2.py +++ b/keras/applications/efficientnet_v2.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=missing-docstring + + """EfficientNet V2 models for Keras. Reference: @@ -24,91 +24,114 @@ import copy import math +import tensorflow.compat.v2 as tf + from keras import backend from keras import layers from keras.applications import imagenet_utils from keras.engine import training from keras.utils import data_utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/" +BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/" # noqa: E501 WEIGHTS_HASHES = { - "b0": ("21ecbf6da12460d5c40bb2f29ceb2188", - "893217f2bb855e2983157299931e43ff"), - "b1": ("069f0534ff22adf035c89e2d9547a9dc", - "0e80663031ca32d657f9caa404b6ec37"), - "b2": ("424e49f28180edbde1e94797771950a7", - "1dfe2e7a5d45b6632553a8961ea609eb"), - "b3": ("1f1fc43bd98a6e4fd8fdfd551e02c7a0", - "f6abf7b5849ac99a89b50dd3fd532856"), - "-s": ("e1d88a8495beba45748fedd0cecbe016", - "af0682fb74e8c54910f2d4393339c070"), - "-m": ("a3bf6aa3276309f4fc6a34aa114c95cd", - "1b8dc055df72dde80d614482840fe342"), - "-l": ("27e6d408b53c7ebc868fefa357689935", - "b0b66b5c863aef5b46e8608fe1711615"), + "b0": ( + "21ecbf6da12460d5c40bb2f29ceb2188", + "893217f2bb855e2983157299931e43ff", + ), + "b1": ( + "069f0534ff22adf035c89e2d9547a9dc", + "0e80663031ca32d657f9caa404b6ec37", + ), + "b2": ( + "424e49f28180edbde1e94797771950a7", + "1dfe2e7a5d45b6632553a8961ea609eb", + ), + "b3": ( + "1f1fc43bd98a6e4fd8fdfd551e02c7a0", + "f6abf7b5849ac99a89b50dd3fd532856", + ), + "-s": ( + "e1d88a8495beba45748fedd0cecbe016", + "af0682fb74e8c54910f2d4393339c070", + ), + "-m": ( + "a3bf6aa3276309f4fc6a34aa114c95cd", + "1b8dc055df72dde80d614482840fe342", + ), + "-l": ( + "27e6d408b53c7ebc868fefa357689935", + "b0b66b5c863aef5b46e8608fe1711615", + ), } DEFAULT_BLOCKS_ARGS = { - "efficientnetv2-s": [{ - "kernel_size": 3, - "num_repeat": 2, - "input_filters": 24, - "output_filters": 24, - "expand_ratio": 1, - "se_ratio": 0.0, - "strides": 1, - "conv_type": 1, - }, { - "kernel_size": 3, - "num_repeat": 4, - "input_filters": 24, - "output_filters": 48, - "expand_ratio": 4, - "se_ratio": 0.0, - "strides": 2, - "conv_type": 1, - }, { - "conv_type": 1, - "expand_ratio": 4, - "input_filters": 48, - "kernel_size": 3, - "num_repeat": 4, - "output_filters": 64, - "se_ratio": 0, - "strides": 2, - }, { - "conv_type": 0, - "expand_ratio": 4, - "input_filters": 64, - "kernel_size": 3, - "num_repeat": 6, - "output_filters": 128, - "se_ratio": 0.25, - "strides": 2, - }, { - "conv_type": 0, - "expand_ratio": 6, - "input_filters": 128, - "kernel_size": 3, - "num_repeat": 9, - "output_filters": 160, - "se_ratio": 0.25, - "strides": 1, - }, { - "conv_type": 0, - "expand_ratio": 6, - "input_filters": 160, - "kernel_size": 3, - "num_repeat": 15, - "output_filters": 256, - "se_ratio": 0.25, - "strides": 2, - }], + "efficientnetv2-s": [ + { + "kernel_size": 3, + "num_repeat": 2, + "input_filters": 24, + "output_filters": 24, + "expand_ratio": 1, + "se_ratio": 0.0, + "strides": 1, + "conv_type": 1, + }, + { + "kernel_size": 3, + "num_repeat": 4, + "input_filters": 24, + "output_filters": 48, + "expand_ratio": 4, + "se_ratio": 0.0, + "strides": 2, + "conv_type": 1, + }, + { + "conv_type": 1, + "expand_ratio": 4, + "input_filters": 48, + "kernel_size": 3, + "num_repeat": 4, + "output_filters": 64, + "se_ratio": 0, + "strides": 2, + }, + { + "conv_type": 0, + "expand_ratio": 4, + "input_filters": 64, + "kernel_size": 3, + "num_repeat": 6, + "output_filters": 128, + "se_ratio": 0.25, + "strides": 2, + }, + { + "conv_type": 0, + "expand_ratio": 6, + "input_filters": 128, + "kernel_size": 3, + "num_repeat": 9, + "output_filters": 160, + "se_ratio": 0.25, + "strides": 1, + }, + { + "conv_type": 0, + "expand_ratio": 6, + "input_filters": 160, + "kernel_size": 3, + "num_repeat": 15, + "output_filters": 256, + "se_ratio": 0.25, + "strides": 2, + }, + ], "efficientnetv2-m": [ { "kernel_size": 3, @@ -508,17 +531,17 @@ "config": { "scale": 2.0, "mode": "fan_out", - "distribution": "truncated_normal" - } + "distribution": "truncated_normal", + }, } DENSE_KERNEL_INITIALIZER = { "class_name": "VarianceScaling", "config": { - "scale": 1. / 3., + "scale": 1.0 / 3.0, "mode": "fan_out", - "distribution": "uniform" - } + "distribution": "uniform", + }, } BASE_DOCSTRING = """Instantiates the {name} architecture. @@ -539,19 +562,19 @@ https://keras.io/guides/transfer_learning/). Note: each Keras Application expects a specific kind of input preprocessing. - For EfficientNetV2, by default input preprocessing is included as a part of the - model (as a `Rescaling` layer), and thus + For EfficientNetV2, by default input preprocessing is included as a part of + the model (as a `Rescaling` layer), and thus `tf.keras.applications.efficientnet_v2.preprocess_input` is actually a - pass-through function. In this use case, EfficientNetV2 models expect their inputs - to be float tensors of pixels with values in the [0-255] range. + pass-through function. In this use case, EfficientNetV2 models expect their + inputs to be float tensors of pixels with values in the [0-255] range. At the same time, preprocessing as a part of the model (i.e. `Rescaling` layer) can be disabled by setting `include_preprocessing` argument to False. - With preprocessing disabled EfficientNetV2 models expect their inputs to be float - tensors of pixels with values in the [-1, 1] range. + With preprocessing disabled EfficientNetV2 models expect their inputs to be + float tensors of pixels with values in the [-1, 1] range. Args: include_top: Boolean, whether to include the fully-connected - layer at the top of the network. Defaults to True. + layer at the top of the network. Defaults to `True`. weights: One of `None` (random initialization), `"imagenet"` (pre-training on ImageNet), or the path to the weights file to be loaded. Defaults to `"imagenet"`. @@ -562,7 +585,7 @@ if `include_top` is False. It should have exactly 3 inputs channels. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. Defaults to None. + when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. @@ -572,16 +595,17 @@ the output of the model will be a 2D tensor. - `"max"` means that global max pooling will be applied. + Defaults to `None`. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. Defaults to 1000 (number of - ImageNet classes). + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. classifier_activation: A string or callable. The activation function to use on the `"top"` layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. - Defaults to `"softmax"`. When loading pretrained weights, `classifier_activation` can only be `None` or `"softmax"`. + Defaults to `"softmax"`. Returns: A `keras.Model` instance. @@ -589,19 +613,19 @@ def round_filters(filters, width_coefficient, min_depth, depth_divisor): - """Round number of filters based on depth multiplier.""" - filters *= width_coefficient - minimum_depth = min_depth or depth_divisor - new_filters = max( - minimum_depth, - int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, - ) - return int(new_filters) + """Round number of filters based on depth multiplier.""" + filters *= width_coefficient + minimum_depth = min_depth or depth_divisor + new_filters = max( + minimum_depth, + int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, + ) + return int(new_filters) def round_repeats(repeats, depth_coefficient): - """Round number of repeats based on depth multiplier.""" - return int(math.ceil(depth_coefficient * repeats)) + """Round number of repeats based on depth multiplier.""" + return int(math.ceil(depth_coefficient * repeats)) def MBConvBlock( @@ -616,103 +640,108 @@ def MBConvBlock( survival_probability: float = 0.8, name=None, ): - """MBConv block: Mobile Inverted Residual Bottleneck.""" - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - - if name is None: - name = backend.get_uid("block0") - - def apply(inputs): - # Expansion phase - filters = input_filters * expand_ratio - if expand_ratio != 1: - x = layers.Conv2D( - filters=filters, - kernel_size=1, - strides=1, - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - data_format="channels_last", - use_bias=False, - name=name + "expand_conv", - )(inputs) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - name=name + "expand_bn", - )(x) - x = layers.Activation(activation, name=name + "expand_activation")(x) - else: - x = inputs + """MBConv block: Mobile Inverted Residual Bottleneck.""" + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + if name is None: + name = backend.get_uid("block0") + + def apply(inputs): + # Expansion phase + filters = input_filters * expand_ratio + if expand_ratio != 1: + x = layers.Conv2D( + filters=filters, + kernel_size=1, + strides=1, + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + data_format="channels_last", + use_bias=False, + name=name + "expand_conv", + )(inputs) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + name=name + "expand_bn", + )(x) + x = layers.Activation(activation, name=name + "expand_activation")( + x + ) + else: + x = inputs + + # Depthwise conv + x = layers.DepthwiseConv2D( + kernel_size=kernel_size, + strides=strides, + depthwise_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + data_format="channels_last", + use_bias=False, + name=name + "dwconv2", + )(x) + x = layers.BatchNormalization( + axis=bn_axis, momentum=bn_momentum, name=name + "bn" + )(x) + x = layers.Activation(activation, name=name + "activation")(x) + + # Squeeze and excite + if 0 < se_ratio <= 1: + filters_se = max(1, int(input_filters * se_ratio)) + se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) + if bn_axis == 1: + se_shape = (filters, 1, 1) + else: + se_shape = (1, 1, filters) + se = layers.Reshape(se_shape, name=name + "se_reshape")(se) + + se = layers.Conv2D( + filters_se, + 1, + padding="same", + activation=activation, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "se_reduce", + )(se) + se = layers.Conv2D( + filters, + 1, + padding="same", + activation="sigmoid", + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "se_expand", + )(se) + + x = layers.multiply([x, se], name=name + "se_excite") + + # Output phase + x = layers.Conv2D( + filters=output_filters, + kernel_size=1, + strides=1, + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + data_format="channels_last", + use_bias=False, + name=name + "project_conv", + )(x) + x = layers.BatchNormalization( + axis=bn_axis, momentum=bn_momentum, name=name + "project_bn" + )(x) - # Depthwise conv - x = layers.DepthwiseConv2D( - kernel_size=kernel_size, - strides=strides, - depthwise_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - data_format="channels_last", - use_bias=False, - name=name + "dwconv2", - )(x) - x = layers.BatchNormalization( - axis=bn_axis, momentum=bn_momentum, name=name + "bn")(x) - x = layers.Activation(activation, name=name + "activation")(x) - - # Squeeze and excite - if 0 < se_ratio <= 1: - filters_se = max(1, int(input_filters * se_ratio)) - se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) - if bn_axis == 1: - se_shape = (filters, 1, 1) - else: - se_shape = (1, 1, filters) - se = layers.Reshape(se_shape, name=name + "se_reshape")(se) - - se = layers.Conv2D( - filters_se, - 1, - padding="same", - activation=activation, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "se_reduce", - )(se) - se = layers.Conv2D( - filters, - 1, - padding="same", - activation="sigmoid", - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "se_expand", - )(se) - - x = layers.multiply([x, se], name=name + "se_excite") - - # Output phase - x = layers.Conv2D( - filters=output_filters, - kernel_size=1, - strides=1, - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - data_format="channels_last", - use_bias=False, - name=name + "project_conv", - )(x) - x = layers.BatchNormalization( - axis=bn_axis, momentum=bn_momentum, name=name + "project_bn")(x) - - if strides == 1 and input_filters == output_filters: - if survival_probability: - x = layers.Dropout( - survival_probability, - noise_shape=(None, 1, 1, 1), - name=name + "drop", - )(x) - x = layers.add([x, inputs], name=name + "add") - return x + if strides == 1 and input_filters == output_filters: + if survival_probability: + x = layers.Dropout( + survival_probability, + noise_shape=(None, 1, 1, 1), + name=name + "drop", + )(x) + x = layers.add([x, inputs], name=name + "add") + + return x - return apply + return apply def FusedMBConvBlock( @@ -727,90 +756,95 @@ def FusedMBConvBlock( survival_probability: float = 0.8, name=None, ): - """Fused MBConv Block: Fusing the proj conv1x1 and depthwise_conv into a conv2d.""" - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - - if name is None: - name = backend.get_uid("block0") - - def apply(inputs): - filters = input_filters * expand_ratio - if expand_ratio != 1: - x = layers.Conv2D( - filters, - kernel_size=kernel_size, - strides=strides, - kernel_initializer=CONV_KERNEL_INITIALIZER, - data_format="channels_last", - padding="same", - use_bias=False, - name=name + "expand_conv", - )(inputs) - x = layers.BatchNormalization( - axis=bn_axis, momentum=bn_momentum, name=name + "expand_bn")(x) - x = layers.Activation( - activation=activation, name=name + "expand_activation")(x) - else: - x = inputs - - # Squeeze and excite - if 0 < se_ratio <= 1: - filters_se = max(1, int(input_filters * se_ratio)) - se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) - if bn_axis == 1: - se_shape = (filters, 1, 1) - else: - se_shape = (1, 1, filters) - - se = layers.Reshape(se_shape, name=name + "se_reshape")(se) - - se = layers.Conv2D( - filters_se, - 1, - padding="same", - activation=activation, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "se_reduce", - )(se) - se = layers.Conv2D( - filters, - 1, - padding="same", - activation="sigmoid", - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "se_expand", - )(se) - - x = layers.multiply([x, se], name=name + "se_excite") - - # Output phase: - x = layers.Conv2D( - output_filters, - kernel_size=1 if expand_ratio != 1 else kernel_size, - strides=1 if expand_ratio != 1 else strides, - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - use_bias=False, - name=name + "project_conv", - )(x) - x = layers.BatchNormalization( - axis=bn_axis, momentum=bn_momentum, name=name + "project_bn")(x) - if expand_ratio == 1: - x = layers.Activation( - activation=activation, name=name + "project_activation")(x) - - # Residual: - if strides == 1 and input_filters == output_filters: - if survival_probability: - x = layers.Dropout( - survival_probability, - noise_shape=(None, 1, 1, 1), - name=name + "drop", + """Fused MBConv Block: Fusing the proj conv1x1 and depthwise_conv into a + conv2d.""" + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + if name is None: + name = backend.get_uid("block0") + + def apply(inputs): + filters = input_filters * expand_ratio + if expand_ratio != 1: + x = layers.Conv2D( + filters, + kernel_size=kernel_size, + strides=strides, + kernel_initializer=CONV_KERNEL_INITIALIZER, + data_format="channels_last", + padding="same", + use_bias=False, + name=name + "expand_conv", + )(inputs) + x = layers.BatchNormalization( + axis=bn_axis, momentum=bn_momentum, name=name + "expand_bn" + )(x) + x = layers.Activation( + activation=activation, name=name + "expand_activation" + )(x) + else: + x = inputs + + # Squeeze and excite + if 0 < se_ratio <= 1: + filters_se = max(1, int(input_filters * se_ratio)) + se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) + if bn_axis == 1: + se_shape = (filters, 1, 1) + else: + se_shape = (1, 1, filters) + + se = layers.Reshape(se_shape, name=name + "se_reshape")(se) + + se = layers.Conv2D( + filters_se, + 1, + padding="same", + activation=activation, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "se_reduce", + )(se) + se = layers.Conv2D( + filters, + 1, + padding="same", + activation="sigmoid", + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name + "se_expand", + )(se) + + x = layers.multiply([x, se], name=name + "se_excite") + + # Output phase: + x = layers.Conv2D( + output_filters, + kernel_size=1 if expand_ratio != 1 else kernel_size, + strides=1 if expand_ratio != 1 else strides, + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + use_bias=False, + name=name + "project_conv", )(x) - x = layers.add([x, inputs], name=name + "add") - return x + x = layers.BatchNormalization( + axis=bn_axis, momentum=bn_momentum, name=name + "project_bn" + )(x) + if expand_ratio == 1: + x = layers.Activation( + activation=activation, name=name + "project_activation" + )(x) + + # Residual: + if strides == 1 and input_filters == output_filters: + if survival_probability: + x = layers.Dropout( + survival_probability, + noise_shape=(None, 1, 1, 1), + name=name + "drop", + )(x) + x = layers.add([x, inputs], name=name + "add") + return x - return apply + return apply def EfficientNetV2( @@ -834,238 +868,255 @@ def EfficientNetV2( classifier_activation="softmax", include_preprocessing=True, ): - """Instantiates the EfficientNetV2 architecture using given scaling coefficients. - - Args: - width_coefficient: float, scaling coefficient for network width. - depth_coefficient: float, scaling coefficient for network depth. - default_size: integer, default input image size. - dropout_rate: float, dropout rate before final classifier layer. - drop_connect_rate: float, dropout rate at skip connections. - depth_divisor: integer, a unit of network width. - min_depth: integer, minimum number of filters. - bn_momentum: float. Momentum parameter for Batch Normalization layers. - activation: activation function. - blocks_args: list of dicts, parameters to construct block modules. - model_name: string, model name. - include_top: whether to include the fully-connected layer at the top of the - network. - weights: one of `None` (random initialization), `"imagenet"` (pre-training - on ImageNet), or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or - numpy array to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is - False. It should have exactly 3 inputs channels. - pooling: optional pooling mode for feature extraction when `include_top` is - `False`. - `None` means that the output of the model will be the 4D tensor - output of the last convolutional layer. - "avg" means that global average - pooling will be applied to the output of the last convolutional layer, and - thus the output of the model will be a 2D tensor. - `"max"` means that - global max pooling will be applied. - classes: optional number of classes to classify images into, only to be - specified if `include_top` is True, and if no `weights` argument is - specified. - classifier_activation: A string or callable. The activation function to use - on the `"top"` layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the `"top"` layer. - include_preprocessing: Boolean, whether to include the preprocessing layer - (`Rescaling`) at the bottom of the network. Defaults to `True`. - - Returns: - A `keras.Model` instance. - - Raises: - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - ValueError: if `classifier_activation` is not `"softmax"` or `None` when - using a pretrained top layer. - """ - - if blocks_args == "default": - blocks_args = DEFAULT_BLOCKS_ARGS[model_name] - - if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): - raise ValueError("The `weights` argument should be either " - "`None` (random initialization), `imagenet` " - "(pre-training on ImageNet), " - "or the path to the weights file to be loaded." - f"Received: weights={weights}") - - if weights == "imagenet" and include_top and classes != 1000: - raise ValueError("If using `weights` as `'imagenet'` with `include_top`" - " as true, `classes` should be 1000" - f"Received: classes={classes}") - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - - x = img_input - - if include_preprocessing: - # Apply original V1 preprocessing for Bx variants - # if number of channels allows it - num_channels = input_shape[bn_axis - 1] - if model_name.split("-")[-1].startswith("b") and num_channels == 3: - x = layers.Rescaling(scale=1. / 255)(x) - x = layers.Normalization( - mean=[0.485, 0.456, 0.406], - variance=[0.229**2, 0.224**2, 0.225**2], - axis=bn_axis, - )(x) + """Instantiates the EfficientNetV2 architecture using given scaling + coefficients. + + Args: + width_coefficient: float, scaling coefficient for network width. + depth_coefficient: float, scaling coefficient for network depth. + default_size: integer, default input image size. + dropout_rate: float, dropout rate before final classifier layer. + drop_connect_rate: float, dropout rate at skip connections. + depth_divisor: integer, a unit of network width. + min_depth: integer, minimum number of filters. + bn_momentum: float. Momentum parameter for Batch Normalization layers. + activation: activation function. + blocks_args: list of dicts, parameters to construct block modules. + model_name: string, model name. + include_top: whether to include the fully-connected layer at the top of + the network. + weights: one of `None` (random initialization), `"imagenet"` (pre-training + on ImageNet), or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or + numpy array to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is False. It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor output + of the last convolutional layer. + - "avg" means that global average pooling will be applied to the output + of the last convolutional layer, and thus the output of the model will + be a 2D tensor. + - `"max"` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is True, and if no `weights` argument is + specified. + classifier_activation: A string or callable. The activation function to + use on the `"top"` layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the `"top"` layer. + include_preprocessing: Boolean, whether to include the preprocessing layer + (`Rescaling`) at the bottom of the network. Defaults to `True`. + + Returns: + A `keras.Model` instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + ValueError: if `classifier_activation` is not `"softmax"` or `None` when + using a pretrained top layer. + """ + + if blocks_args == "default": + blocks_args = DEFAULT_BLOCKS_ARGS[model_name] + + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + f"Received: weights={weights}" + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + "If using `weights` as `'imagenet'` with `include_top`" + " as true, `classes` should be 1000" + f"Received: classes={classes}" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - x = layers.Rescaling(scale=1. / 128.0, offset=-1)(x) - - # Build stem - stem_filters = round_filters( - filters=blocks_args[0]["input_filters"], - width_coefficient=width_coefficient, - min_depth=min_depth, - depth_divisor=depth_divisor, - ) - x = layers.Conv2D( - filters=stem_filters, - kernel_size=3, - strides=2, - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - use_bias=False, - name="stem_conv", - )(x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - name="stem_bn", - )(x) - x = layers.Activation(activation, name="stem_activation")(x) - - # Build blocks - blocks_args = copy.deepcopy(blocks_args) - b = 0 - blocks = float(sum(args["num_repeat"] for args in blocks_args)) - - for (i, args) in enumerate(blocks_args): - assert args["num_repeat"] > 0 - - # Update block input and output filters based on depth multiplier. - args["input_filters"] = round_filters( - filters=args["input_filters"], + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + x = img_input + + if include_preprocessing: + # Apply original V1 preprocessing for Bx variants + # if number of channels allows it + num_channels = input_shape[bn_axis - 1] + if model_name.split("-")[-1].startswith("b") and num_channels == 3: + x = layers.Rescaling(scale=1.0 / 255)(x) + x = layers.Normalization( + mean=[0.485, 0.456, 0.406], + variance=[0.229**2, 0.224**2, 0.225**2], + axis=bn_axis, + )(x) + else: + x = layers.Rescaling(scale=1.0 / 128.0, offset=-1)(x) + + # Build stem + stem_filters = round_filters( + filters=blocks_args[0]["input_filters"], width_coefficient=width_coefficient, min_depth=min_depth, - depth_divisor=depth_divisor) - args["output_filters"] = round_filters( - filters=args["output_filters"], + depth_divisor=depth_divisor, + ) + x = layers.Conv2D( + filters=stem_filters, + kernel_size=3, + strides=2, + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + use_bias=False, + name="stem_conv", + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + name="stem_bn", + )(x) + x = layers.Activation(activation, name="stem_activation")(x) + + # Build blocks + blocks_args = copy.deepcopy(blocks_args) + b = 0 + blocks = float(sum(args["num_repeat"] for args in blocks_args)) + + for i, args in enumerate(blocks_args): + assert args["num_repeat"] > 0 + + # Update block input and output filters based on depth multiplier. + args["input_filters"] = round_filters( + filters=args["input_filters"], + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + ) + args["output_filters"] = round_filters( + filters=args["output_filters"], + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + ) + + # Determine which conv type to use: + block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")] + repeats = round_repeats( + repeats=args.pop("num_repeat"), depth_coefficient=depth_coefficient + ) + for j in range(repeats): + # The first block needs to take care of stride and filter size + # increase. + if j > 0: + args["strides"] = 1 + args["input_filters"] = args["output_filters"] + + x = block( + activation=activation, + bn_momentum=bn_momentum, + survival_probability=drop_connect_rate * b / blocks, + name=f"block{i + 1}{chr(j + 97)}_", + **args, + )(x) + b += 1 + + # Build top + top_filters = round_filters( + filters=1280, width_coefficient=width_coefficient, min_depth=min_depth, - depth_divisor=depth_divisor) - - # Determine which conv type to use: - block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")] - repeats = round_repeats( - repeats=args.pop("num_repeat"), depth_coefficient=depth_coefficient) - for j in range(repeats): - # The first block needs to take care of stride and filter size increase. - if j > 0: - args["strides"] = 1 - args["input_filters"] = args["output_filters"] - - x = block( - activation=activation, - bn_momentum=bn_momentum, - survival_probability=drop_connect_rate * b / blocks, - name="block{}{}_".format(i + 1, chr(j + 97)), - **args, - )(x) - b += 1 - - # Build top - top_filters = round_filters( - filters=1280, - width_coefficient=width_coefficient, - min_depth=min_depth, - depth_divisor=depth_divisor) - x = layers.Conv2D( - filters=top_filters, - kernel_size=1, - strides=1, - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - data_format="channels_last", - use_bias=False, - name="top_conv", - )(x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - name="top_bn", - )(x) - x = layers.Activation(activation=activation, name="top_activation")(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - if dropout_rate > 0: - x = layers.Dropout(dropout_rate, name="top_dropout")(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense( - classes, - activation=classifier_activation, - kernel_initializer=DENSE_KERNEL_INITIALIZER, - bias_initializer=tf.constant_initializer(0), - name="predictions")(x) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D(name="max_pool")(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name=model_name) - - # Load weights. - if weights == "imagenet": + depth_divisor=depth_divisor, + ) + x = layers.Conv2D( + filters=top_filters, + kernel_size=1, + strides=1, + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + data_format="channels_last", + use_bias=False, + name="top_conv", + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + name="top_bn", + )(x) + x = layers.Activation(activation=activation, name="top_activation")(x) + if include_top: - file_suffix = ".h5" - file_hash = WEIGHTS_HASHES[model_name[-2:]][0] + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + if dropout_rate > 0: + x = layers.Dropout(dropout_rate, name="top_dropout")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, + activation=classifier_activation, + kernel_initializer=DENSE_KERNEL_INITIALIZER, + bias_initializer=tf.constant_initializer(0), + name="predictions", + )(x) else: - file_suffix = "_notop.h5" - file_hash = WEIGHTS_HASHES[model_name[-2:]][1] - file_name = model_name + file_suffix - weights_path = data_utils.get_file( - file_name, - BASE_WEIGHTS_PATH + file_name, - cache_subdir="models", - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2B0", - "keras.applications.EfficientNetV2B0") + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name=model_name) + + # Load weights. + if weights == "imagenet": + if include_top: + file_suffix = ".h5" + file_hash = WEIGHTS_HASHES[model_name[-2:]][0] + else: + file_suffix = "_notop.h5" + file_hash = WEIGHTS_HASHES[model_name[-2:]][1] + file_name = model_name + file_suffix + weights_path = data_utils.get_file( + file_name, + BASE_WEIGHTS_PATH + file_name, + cache_subdir="models", + file_hash=file_hash, + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2B0", + "keras.applications.EfficientNetV2B0", +) def EfficientNetV2B0( include_top=True, weights="imagenet", @@ -1076,23 +1127,26 @@ def EfficientNetV2B0( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.0, - depth_coefficient=1.0, - default_size=224, - model_name="efficientnetv2-b0", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing) - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2B1", - "keras.applications.EfficientNetV2B1") + return EfficientNetV2( + width_coefficient=1.0, + depth_coefficient=1.0, + default_size=224, + model_name="efficientnetv2-b0", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2B1", + "keras.applications.EfficientNetV2B1", +) def EfficientNetV2B1( include_top=True, weights="imagenet", @@ -1103,24 +1157,26 @@ def EfficientNetV2B1( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.0, - depth_coefficient=1.1, - default_size=240, - model_name="efficientnetv2-b1", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2B2", - "keras.applications.EfficientNetV2B2") + return EfficientNetV2( + width_coefficient=1.0, + depth_coefficient=1.1, + default_size=240, + model_name="efficientnetv2-b1", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2B2", + "keras.applications.EfficientNetV2B2", +) def EfficientNetV2B2( include_top=True, weights="imagenet", @@ -1131,24 +1187,26 @@ def EfficientNetV2B2( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.1, - depth_coefficient=1.2, - default_size=260, - model_name="efficientnetv2-b2", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2B3", - "keras.applications.EfficientNetV2B3") + return EfficientNetV2( + width_coefficient=1.1, + depth_coefficient=1.2, + default_size=260, + model_name="efficientnetv2-b2", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2B3", + "keras.applications.EfficientNetV2B3", +) def EfficientNetV2B3( include_top=True, weights="imagenet", @@ -1159,24 +1217,26 @@ def EfficientNetV2B3( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.2, - depth_coefficient=1.4, - default_size=300, - model_name="efficientnetv2-b3", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2S", - "keras.applications.EfficientNetV2S") + return EfficientNetV2( + width_coefficient=1.2, + depth_coefficient=1.4, + default_size=300, + model_name="efficientnetv2-b3", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2S", + "keras.applications.EfficientNetV2S", +) def EfficientNetV2S( include_top=True, weights="imagenet", @@ -1187,24 +1247,26 @@ def EfficientNetV2S( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.0, - depth_coefficient=1.0, - default_size=384, - model_name="efficientnetv2-s", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2M", - "keras.applications.EfficientNetV2M") + return EfficientNetV2( + width_coefficient=1.0, + depth_coefficient=1.0, + default_size=384, + model_name="efficientnetv2-s", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2M", + "keras.applications.EfficientNetV2M", +) def EfficientNetV2M( include_top=True, weights="imagenet", @@ -1215,24 +1277,26 @@ def EfficientNetV2M( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.0, - depth_coefficient=1.0, - default_size=480, - model_name="efficientnetv2-m", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.efficientnet_v2.EfficientNetV2L", - "keras.applications.EfficientNetV2L") + return EfficientNetV2( + width_coefficient=1.0, + depth_coefficient=1.0, + default_size=480, + model_name="efficientnetv2-m", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.efficientnet_v2.EfficientNetV2L", + "keras.applications.EfficientNetV2L", +) def EfficientNetV2L( include_top=True, weights="imagenet", @@ -1243,20 +1307,20 @@ def EfficientNetV2L( classifier_activation="softmax", include_preprocessing=True, ): - return EfficientNetV2( - width_coefficient=1.0, - depth_coefficient=1.0, - default_size=480, - model_name="efficientnetv2-l", - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation, - include_preprocessing=include_preprocessing, - ) + return EfficientNetV2( + width_coefficient=1.0, + depth_coefficient=1.0, + default_size=480, + model_name="efficientnetv2-l", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + include_preprocessing=include_preprocessing, + ) EfficientNetV2B0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B0") @@ -1269,30 +1333,30 @@ def EfficientNetV2L( @keras_export("keras.applications.efficientnet_v2.preprocess_input") -def preprocess_input(x, data_format=None): # pylint: disable=unused-argument - """A placeholder method for backward compatibility. - - The preprocessing logic has been included in the EfficientNetV2 model - implementation. Users are no longer required to call this method to normalize - the input data. This method does nothing and only kept as a placeholder to - align the API surface between old and new version of model. - - Args: - x: A floating point `numpy.array` or a `tf.Tensor`. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, it - defaults to "channels_last").{mode} - - Returns: - Unchanged `numpy.array` or `tf.Tensor`. - """ - return x +def preprocess_input(x, data_format=None): + """A placeholder method for backward compatibility. + + The preprocessing logic has been included in the EfficientNetV2 model + implementation. Users are no longer required to call this method to + normalize the input data. This method does nothing and only kept as a + placeholder to align the API surface between old and new version of model. + + Args: + x: A floating point `numpy.array` or a `tf.Tensor`. + data_format: Optional data format of the image tensor/array. `None` means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last"). + Defaults to `None`. + + Returns: + Unchanged `numpy.array` or `tf.Tensor`. + """ + return x @keras_export("keras.applications.efficientnet_v2.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/efficientnet_weight_update_util.py b/keras/applications/efficientnet_weight_update_util.py deleted file mode 100644 index cc86cb02bbd1..000000000000 --- a/keras/applications/efficientnet_weight_update_util.py +++ /dev/null @@ -1,373 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Utils for EfficientNet models for Keras. - -Write weights from ckpt file as in original repo -(https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) -to h5 file for keras implementation of the models. - -Usage: - -# use checkpoint efficientnet-b0/model.ckpt (can be downloaded from -# https://storage.googleapis.com/cloud-tpu-checkpoints/ -# efficientnet/ckptsaug/efficientnet-b0.tar.gz) -# to update weight without top layers, saving to efficientnetb0_notop.h5 -python efficientnet_weight_update_util.py --model b0 --notop \ - --ckpt efficientnet-b0/model.ckpt --o efficientnetb0_notop.h5 - -# use checkpoint noisy_student_efficientnet-b3/model.ckpt (providing -# improved result for b3, can be downloaded from -# https://storage.googleapis.com/cloud-tpu-checkpoints/ -# efficientnet/noisystudent/noisy_student_efficientnet-b3.tar.gz) -# to update weight with top layers, saving to efficientnetb3_new.h5 -python efficientnet_weight_update_util.py --model b3 --notop \ - --ckpt noisy_student_efficientnet-b3/model.ckpt --o efficientnetb3_new.h5 -""" - -import argparse -import warnings - -from keras.utils import io_utils -import tensorflow.compat.v2 as tf -from tensorflow.keras.applications import efficientnet - - -def write_ckpt_to_h5(path_h5, path_ckpt, keras_model, use_ema=True): - """Map the weights in checkpoint file (tf) to h5 file (keras). - - Args: - path_h5: str, path to output hdf5 file to write weights loaded from ckpt - files. - path_ckpt: str, path to the ckpt files (e.g. 'efficientnet-b0/model.ckpt') - that records efficientnet weights from original repo - https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet - keras_model: keras model, built from keras.applications efficientnet - functions (e.g. EfficientNetB0) - use_ema: Bool, whether to use ExponentialMovingAverage result or not - """ - model_name_keras = keras_model.name - model_name_tf = model_name_keras.replace('efficientnet', 'efficientnet-') - - keras_weight_names = [w.name for w in keras_model.weights] - tf_weight_names = get_variable_names_from_ckpt(path_ckpt) - - keras_blocks = get_keras_blocks(keras_weight_names) - tf_blocks = get_tf_blocks(tf_weight_names) - - io_utils.print_msg('check variables match in each block') - for keras_block, tf_block in zip(keras_blocks, tf_blocks): - check_match(keras_block, tf_block, keras_weight_names, tf_weight_names, - model_name_tf) - io_utils.print_msg('{} and {} match.'.format(tf_block, keras_block)) - - block_mapping = {x[0]: x[1] for x in zip(keras_blocks, tf_blocks)} - - changed_weights = 0 - for w in keras_model.weights: - if 'block' in w.name: - # example: 'block1a_dwconv/depthwise_kernel:0' -> 'block1a' - keras_block = w.name.split('/')[0].split('_')[0] - tf_block = block_mapping[keras_block] - tf_name = keras_name_to_tf_name_block( - w.name, - keras_block=keras_block, - tf_block=tf_block, - use_ema=use_ema, - model_name_tf=model_name_tf) - elif any([x in w.name for x in ['stem', 'top', 'predictions', 'probs']]): - tf_name = keras_name_to_tf_name_stem_top( - w.name, use_ema=use_ema, model_name_tf=model_name_tf) - elif 'normalization' in w.name: - io_utils.print_msg( - f'Skipping variable {w.name}: normalization is a Keras ' - 'preprocessing layer, which does not exist in the TF ckpt.') - continue - else: - raise ValueError('{} failed to parse.'.format(w.name)) - - try: - w_tf = tf.train.load_variable(path_ckpt, tf_name) - if (w.value().numpy() != w_tf).any(): - w.assign(w_tf) - changed_weights += 1 - except ValueError as e: - if any([x in w.name for x in ['top', 'predictions', 'probs']]): - warnings.warn( - 'Fail to load top layer variable {}' - 'from {} because of {}.'.format(w.name, tf_name, e), - stacklevel=2) - else: - raise ValueError('Fail to load {} from {}'.format(w.name, tf_name)) - - total_weights = len(keras_model.weights) - io_utils.print_msg(f'{changed_weights}/{total_weights} weights updated') - keras_model.save_weights(path_h5) - - -def get_variable_names_from_ckpt(path_ckpt, use_ema=True): - """Get list of tensor names from checkpoint. - - Args: - path_ckpt: str, path to the ckpt files - use_ema: Bool, whether to use ExponentialMovingAverage result or not. - Returns: - List of variable names from checkpoint. - """ - v_all = tf.train.list_variables(path_ckpt) - - # keep name only - v_name_all = [x[0] for x in v_all] - - if use_ema: - v_name_all = [x for x in v_name_all if 'ExponentialMovingAverage' in x] - else: - v_name_all = [x for x in v_name_all if 'ExponentialMovingAverage' not in x] - - # remove util variables used for RMSprop - v_name_all = [x for x in v_name_all if 'RMS' not in x] - return v_name_all - - -def get_tf_blocks(tf_weight_names): - """Extract the block names from list of full weight names.""" - # Example: 'efficientnet-b0/blocks_0/conv2d/kernel' -> 'blocks_0' - tf_blocks = {x.split('/')[1] for x in tf_weight_names if 'block' in x} - # sort by number - tf_blocks = sorted(tf_blocks, key=lambda x: int(x.split('_')[1])) - return tf_blocks - - -def get_keras_blocks(keras_weight_names): - """Extract the block names from list of full weight names.""" - # example: 'block1a_dwconv/depthwise_kernel:0' -> 'block1a' - keras_blocks = {x.split('_')[0] for x in keras_weight_names if 'block' in x} - return sorted(keras_blocks) - - -def keras_name_to_tf_name_stem_top(keras_name, - use_ema=True, - model_name_tf='efficientnet-b0'): - """Mapping name in h5 to ckpt that is in stem or top (head). - - we map name keras_name that points to a weight in h5 file - to a name of weight in ckpt file. - - Args: - keras_name: str, the name of weight in the h5 file of keras implementation - use_ema: Bool, use the ExponentialMovingAverage resuolt in ckpt or not - model_name_tf: str, the name of model in ckpt. - - Returns: - String for the name of weight as in ckpt file. - - Raises: - KeyError: if we cannot parse the keras_name. - """ - if use_ema: - ema = '/ExponentialMovingAverage' - else: - ema = '' - - stem_top_dict = { - 'probs/bias:0': '{}/head/dense/bias{}', - 'probs/kernel:0': '{}/head/dense/kernel{}', - 'predictions/bias:0': '{}/head/dense/bias{}', - 'predictions/kernel:0': '{}/head/dense/kernel{}', - 'stem_conv/kernel:0': '{}/stem/conv2d/kernel{}', - 'top_conv/kernel:0': '{}/head/conv2d/kernel{}', - } - for x in stem_top_dict: - stem_top_dict[x] = stem_top_dict[x].format(model_name_tf, ema) - - # stem batch normalization - for bn_weights in ['beta', 'gamma', 'moving_mean', 'moving_variance']: - tf_name = '{}/stem/tpu_batch_normalization/{}{}'.format( - model_name_tf, bn_weights, ema) - stem_top_dict['stem_bn/{}:0'.format(bn_weights)] = tf_name - - # top / head batch normalization - for bn_weights in ['beta', 'gamma', 'moving_mean', 'moving_variance']: - tf_name = '{}/head/tpu_batch_normalization/{}{}'.format( - model_name_tf, bn_weights, ema) - stem_top_dict['top_bn/{}:0'.format(bn_weights)] = tf_name - - if keras_name in stem_top_dict: - return stem_top_dict[keras_name] - raise KeyError('{} from h5 file cannot be parsed'.format(keras_name)) - - -def keras_name_to_tf_name_block(keras_name, - keras_block='block1a', - tf_block='blocks_0', - use_ema=True, - model_name_tf='efficientnet-b0'): - """Mapping name in h5 to ckpt that belongs to a block. - - we map name keras_name that points to a weight in h5 file - to a name of weight in ckpt file. - - Args: - keras_name: str, the name of weight in the h5 file of keras implementation - keras_block: str, the block name for keras implementation (e.g. 'block1a') - tf_block: str, the block name for tf implementation (e.g. 'blocks_0') - use_ema: Bool, use the ExponentialMovingAverage resuolt in ckpt or not - model_name_tf: str, the name of model in ckpt. - - Returns: - String for the name of weight as in ckpt file. - - Raises: - ValueError if keras_block does not show up in keras_name - """ - - if keras_block not in keras_name: - raise ValueError('block name {} not found in {}'.format( - keras_block, keras_name)) - - # all blocks in the first group will not have expand conv and bn - is_first_blocks = (keras_block[5] == '1') - - tf_name = [model_name_tf, tf_block] - - # depthwide conv - if 'dwconv' in keras_name: - tf_name.append('depthwise_conv2d') - tf_name.append('depthwise_kernel') - - # conv layers - if is_first_blocks: - # first blocks only have one conv2d - if 'project_conv' in keras_name: - tf_name.append('conv2d') - tf_name.append('kernel') - else: - if 'project_conv' in keras_name: - tf_name.append('conv2d_1') - tf_name.append('kernel') - elif 'expand_conv' in keras_name: - tf_name.append('conv2d') - tf_name.append('kernel') - - # squeeze expansion layers - if '_se_' in keras_name: - if 'reduce' in keras_name: - tf_name.append('se/conv2d') - elif 'expand' in keras_name: - tf_name.append('se/conv2d_1') - - if 'kernel' in keras_name: - tf_name.append('kernel') - elif 'bias' in keras_name: - tf_name.append('bias') - - # batch normalization layers - if 'bn' in keras_name: - if is_first_blocks: - if 'project' in keras_name: - tf_name.append('tpu_batch_normalization_1') - else: - tf_name.append('tpu_batch_normalization') - else: - if 'project' in keras_name: - tf_name.append('tpu_batch_normalization_2') - elif 'expand' in keras_name: - tf_name.append('tpu_batch_normalization') - else: - tf_name.append('tpu_batch_normalization_1') - - for x in ['moving_mean', 'moving_variance', 'beta', 'gamma']: - if x in keras_name: - tf_name.append(x) - if use_ema: - tf_name.append('ExponentialMovingAverage') - return '/'.join(tf_name) - - -def check_match(keras_block, tf_block, keras_weight_names, tf_weight_names, - model_name_tf): - """Check if the weights in h5 and ckpt match. - - we match each name from keras_weight_names that is in keras_block - and check if there is 1-1 correspondence to names from tf_weight_names - that is in tf_block - - Args: - keras_block: str, the block name for keras implementation (e.g. 'block1a') - tf_block: str, the block name for tf implementation (e.g. 'blocks_0') - keras_weight_names: list of str, weight names in keras implementation - tf_weight_names: list of str, weight names in tf implementation - model_name_tf: str, the name of model in ckpt. - """ - names_from_keras = set() - for x in keras_weight_names: - if keras_block in x: - y = keras_name_to_tf_name_block( - x, - keras_block=keras_block, - tf_block=tf_block, - model_name_tf=model_name_tf) - names_from_keras.add(y) - - names_from_tf = set() - for x in tf_weight_names: - if tf_block in x and x.split('/')[1].endswith(tf_block): - names_from_tf.add(x) - - names_missing = names_from_keras - names_from_tf - if names_missing: - raise ValueError('{} variables not found in checkpoint file: {}'.format( - len(names_missing), names_missing)) - - names_unused = names_from_tf - names_from_keras - if names_unused: - warnings.warn( - '{} variables from checkpoint file are not used: {}'.format( - len(names_unused), names_unused), - stacklevel=2) - - -if __name__ == '__main__': - arg_to_model = { - 'b0': efficientnet.EfficientNetB0, - 'b1': efficientnet.EfficientNetB1, - 'b2': efficientnet.EfficientNetB2, - 'b3': efficientnet.EfficientNetB3, - 'b4': efficientnet.EfficientNetB4, - 'b5': efficientnet.EfficientNetB5, - 'b6': efficientnet.EfficientNetB6, - 'b7': efficientnet.EfficientNetB7 - } - - p = argparse.ArgumentParser(description='write weights from checkpoint to h5') - p.add_argument( - '--model', - required=True, - type=str, - help='name of efficient model', - choices=arg_to_model.keys()) - p.add_argument( - '--notop', - action='store_true', - help='do not include top layers', - default=False) - p.add_argument('--ckpt', required=True, type=str, help='checkpoint path') - p.add_argument( - '--output', '-o', required=True, type=str, help='output (h5) file path') - args = p.parse_args() - - include_top = not args.notop - - model = arg_to_model[args.model](include_top=include_top) - write_ckpt_to_h5(args.output, args.ckpt, keras_model=model) diff --git a/keras/applications/imagenet_utils.py b/keras/applications/imagenet_utils.py index acecccccdf68..3aafbad0a174 100644 --- a/keras/applications/imagenet_utils.py +++ b/keras/applications/imagenet_utils.py @@ -22,12 +22,15 @@ from keras import activations from keras import backend from keras.utils import data_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export CLASS_INDEX = None -CLASS_INDEX_PATH = ('https://storage.googleapis.com/download.tensorflow.org/' - 'data/imagenet_class_index.json') +CLASS_INDEX_PATH = ( + "https://storage.googleapis.com/download.tensorflow.org/" + "data/imagenet_class_index.json" +) PREPROCESS_INPUT_DOC = """ @@ -53,10 +56,10 @@ The preprocessed data are written over the input data if the data types are compatible. To avoid this behaviour, `numpy.copy(x)` can be used. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, - it defaults to "channels_last").{mode} + data_format: Optional data format of the image tensor/array. None, means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last").{mode} + Defaults to `None`. Returns: Preprocessed `numpy.array` or a `tf.Tensor` with type `float32`. @@ -67,7 +70,7 @@ """ PREPROCESS_INPUT_MODE_DOC = """ - mode: One of "caffe", "tf" or "torch". Defaults to "caffe". + mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, @@ -77,6 +80,7 @@ - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. + Defaults to "caffe". """ PREPROCESS_INPUT_DEFAULT_ERROR_DOC = """ @@ -97,344 +101,381 @@ zero-centered with respect to the ImageNet dataset, without scaling.""" -@keras_export('keras.applications.imagenet_utils.preprocess_input') -def preprocess_input(x, data_format=None, mode='caffe'): - """Preprocesses a tensor or Numpy array encoding a batch of images.""" - if mode not in {'caffe', 'tf', 'torch'}: - raise ValueError('Expected mode to be one of `caffe`, `tf` or `torch`. ' - f'Received: mode={mode}') - - if data_format is None: - data_format = backend.image_data_format() - elif data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Expected data_format to be one of `channels_first` or ' - f'`channels_last`. Received: data_format={data_format}') - - if isinstance(x, np.ndarray): - return _preprocess_numpy_input( - x, data_format=data_format, mode=mode) - else: - return _preprocess_symbolic_input( - x, data_format=data_format, mode=mode) +@keras_export("keras.applications.imagenet_utils.preprocess_input") +def preprocess_input(x, data_format=None, mode="caffe"): + """Preprocesses a tensor or Numpy array encoding a batch of images.""" + if mode not in {"caffe", "tf", "torch"}: + raise ValueError( + "Expected mode to be one of `caffe`, `tf` or `torch`. " + f"Received: mode={mode}" + ) + + if data_format is None: + data_format = backend.image_data_format() + elif data_format not in {"channels_first", "channels_last"}: + raise ValueError( + "Expected data_format to be one of `channels_first` or " + f"`channels_last`. Received: data_format={data_format}" + ) + + if isinstance(x, np.ndarray): + return _preprocess_numpy_input(x, data_format=data_format, mode=mode) + else: + return _preprocess_symbolic_input(x, data_format=data_format, mode=mode) preprocess_input.__doc__ = PREPROCESS_INPUT_DOC.format( mode=PREPROCESS_INPUT_MODE_DOC, - ret='', - error=PREPROCESS_INPUT_DEFAULT_ERROR_DOC) + ret="", + error=PREPROCESS_INPUT_DEFAULT_ERROR_DOC, +) -@keras_export('keras.applications.imagenet_utils.decode_predictions') +@keras_export("keras.applications.imagenet_utils.decode_predictions") def decode_predictions(preds, top=5): - """Decodes the prediction of an ImageNet model. - - Args: - preds: Numpy array encoding a batch of predictions. - top: Integer, how many top-guesses to return. Defaults to 5. - - Returns: - A list of lists of top class prediction tuples - `(class_name, class_description, score)`. - One list of tuples per sample in batch input. - - Raises: - ValueError: In case of invalid shape of the `pred` array - (must be 2D). - """ - global CLASS_INDEX - - if len(preds.shape) != 2 or preds.shape[1] != 1000: - raise ValueError('`decode_predictions` expects ' - 'a batch of predictions ' - '(i.e. a 2D array of shape (samples, 1000)). ' - 'Found array with shape: ' + str(preds.shape)) - if CLASS_INDEX is None: - fpath = data_utils.get_file( - 'imagenet_class_index.json', - CLASS_INDEX_PATH, - cache_subdir='models', - file_hash='c2c37ea517e94d9795004a39431a14cb') - with open(fpath) as f: - CLASS_INDEX = json.load(f) - results = [] - for pred in preds: - top_indices = pred.argsort()[-top:][::-1] - result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] - result.sort(key=lambda x: x[2], reverse=True) - results.append(result) - return results + """Decodes the prediction of an ImageNet model. + + Args: + preds: Numpy array encoding a batch of predictions. + top: Integer, how many top-guesses to return. Defaults to 5. + + Returns: + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises: + ValueError: In case of invalid shape of the `pred` array + (must be 2D). + """ + global CLASS_INDEX + + if len(preds.shape) != 2 or preds.shape[1] != 1000: + raise ValueError( + "`decode_predictions` expects " + "a batch of predictions " + "(i.e. a 2D array of shape (samples, 1000)). " + "Found array with shape: " + str(preds.shape) + ) + if CLASS_INDEX is None: + fpath = data_utils.get_file( + "imagenet_class_index.json", + CLASS_INDEX_PATH, + cache_subdir="models", + file_hash="c2c37ea517e94d9795004a39431a14cb", + ) + with open(fpath) as f: + CLASS_INDEX = json.load(f) + results = [] + for pred in preds: + top_indices = pred.argsort()[-top:][::-1] + result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] + result.sort(key=lambda x: x[2], reverse=True) + results.append(result) + return results def _preprocess_numpy_input(x, data_format, mode): - """Preprocesses a Numpy array encoding a batch of images. - - Args: - x: Input array, 3D or 4D. - data_format: Data format of the image array. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - Returns: - Preprocessed Numpy array. - """ - if not issubclass(x.dtype.type, np.floating): - x = x.astype(backend.floatx(), copy=False) - - if mode == 'tf': - x /= 127.5 - x -= 1. - return x - elif mode == 'torch': - x /= 255. - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - else: - if data_format == 'channels_first': - # 'RGB'->'BGR' - if x.ndim == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] + """Preprocesses a Numpy array encoding a batch of images. + + Args: + x: Input array, 3D or 4D. + data_format: Data format of the image array. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + + Returns: + Preprocessed Numpy array. + """ + if not issubclass(x.dtype.type, np.floating): + x = x.astype(backend.floatx(), copy=False) + + if mode == "tf": + x /= 127.5 + x -= 1.0 + return x + elif mode == "torch": + x /= 255.0 + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] else: - # 'RGB'->'BGR' - x = x[..., ::-1] - mean = [103.939, 116.779, 123.68] - std = None - - # Zero-center by mean pixel - if data_format == 'channels_first': - if x.ndim == 3: - x[0, :, :] -= mean[0] - x[1, :, :] -= mean[1] - x[2, :, :] -= mean[2] - if std is not None: - x[0, :, :] /= std[0] - x[1, :, :] /= std[1] - x[2, :, :] /= std[2] + if data_format == "channels_first": + # 'RGB'->'BGR' + if x.ndim == 3: + x = x[::-1, ...] + else: + x = x[:, ::-1, ...] + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + mean = [103.939, 116.779, 123.68] + std = None + + # Zero-center by mean pixel + if data_format == "channels_first": + if x.ndim == 3: + x[0, :, :] -= mean[0] + x[1, :, :] -= mean[1] + x[2, :, :] -= mean[2] + if std is not None: + x[0, :, :] /= std[0] + x[1, :, :] /= std[1] + x[2, :, :] /= std[2] + else: + x[:, 0, :, :] -= mean[0] + x[:, 1, :, :] -= mean[1] + x[:, 2, :, :] -= mean[2] + if std is not None: + x[:, 0, :, :] /= std[0] + x[:, 1, :, :] /= std[1] + x[:, 2, :, :] /= std[2] else: - x[:, 0, :, :] -= mean[0] - x[:, 1, :, :] -= mean[1] - x[:, 2, :, :] -= mean[2] - if std is not None: - x[:, 0, :, :] /= std[0] - x[:, 1, :, :] /= std[1] - x[:, 2, :, :] /= std[2] - else: - x[..., 0] -= mean[0] - x[..., 1] -= mean[1] - x[..., 2] -= mean[2] - if std is not None: - x[..., 0] /= std[0] - x[..., 1] /= std[1] - x[..., 2] /= std[2] - return x + x[..., 0] -= mean[0] + x[..., 1] -= mean[1] + x[..., 2] -= mean[2] + if std is not None: + x[..., 0] /= std[0] + x[..., 1] /= std[1] + x[..., 2] /= std[2] + return x def _preprocess_symbolic_input(x, data_format, mode): - """Preprocesses a tensor encoding a batch of images. - - Args: - x: Input tensor, 3D or 4D. - data_format: Data format of the image tensor. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - Returns: - Preprocessed tensor. - """ - if mode == 'tf': - x /= 127.5 - x -= 1. - return x - elif mode == 'torch': - x /= 255. - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - else: - if data_format == 'channels_first': - # 'RGB'->'BGR' - if backend.ndim(x) == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] + """Preprocesses a tensor encoding a batch of images. + + Args: + x: Input tensor, 3D or 4D. + data_format: Data format of the image tensor. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + + Returns: + Preprocessed tensor. + """ + if mode == "tf": + x /= 127.5 + x -= 1.0 + return x + elif mode == "torch": + x /= 255.0 + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] else: - # 'RGB'->'BGR' - x = x[..., ::-1] - mean = [103.939, 116.779, 123.68] - std = None - - mean_tensor = backend.constant(-np.array(mean)) - - # Zero-center by mean pixel - if backend.dtype(x) != backend.dtype(mean_tensor): - x = backend.bias_add( - x, backend.cast(mean_tensor, backend.dtype(x)), data_format=data_format) - else: - x = backend.bias_add(x, mean_tensor, data_format) - if std is not None: - std_tensor = backend.constant(np.array(std), dtype=backend.dtype(x)) - if data_format == 'channels_first': - std_tensor = backend.reshape(std_tensor, (-1, 1, 1)) - x /= std_tensor - return x - - -def obtain_input_shape(input_shape, - default_size, - min_size, - data_format, - require_flatten, - weights=None): - """Internal utility to compute/validate a model's input shape. - - Args: - input_shape: Either None (will return the default network input shape), - or a user-provided shape to be validated. - default_size: Default input width/height for the model. - min_size: Minimum input width/height accepted by the model. - data_format: Image data format to use. - require_flatten: Whether the model is expected to - be linked to a classifier via a Flatten layer. - weights: One of `None` (random initialization) - or 'imagenet' (pre-training on ImageNet). - If weights='imagenet' input channels must be equal to 3. + if data_format == "channels_first": + # 'RGB'->'BGR' + if backend.ndim(x) == 3: + x = x[::-1, ...] + else: + x = x[:, ::-1, ...] + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + mean = [103.939, 116.779, 123.68] + std = None + + mean_tensor = backend.constant(-np.array(mean)) + + # Zero-center by mean pixel + if backend.dtype(x) != backend.dtype(mean_tensor): + x = backend.bias_add( + x, + backend.cast(mean_tensor, backend.dtype(x)), + data_format=data_format, + ) + else: + x = backend.bias_add(x, mean_tensor, data_format) + if std is not None: + std_tensor = backend.constant(np.array(std), dtype=backend.dtype(x)) + if data_format == "channels_first": + std_tensor = backend.reshape(std_tensor, (-1, 1, 1)) + x /= std_tensor + return x - Returns: - An integer shape tuple (may include None entries). - Raises: - ValueError: In case of invalid argument values. - """ - if weights != 'imagenet' and input_shape and len(input_shape) == 3: - if data_format == 'channels_first': - if input_shape[0] not in {1, 3}: - warnings.warn( - 'This model usually expects 1 or 3 input channels. ' - 'However, it was passed an input_shape with ' + - str(input_shape[0]) + ' input channels.', - stacklevel=2) - default_shape = (input_shape[0], default_size, default_size) - else: - if input_shape[-1] not in {1, 3}: - warnings.warn( - 'This model usually expects 1 or 3 input channels. ' - 'However, it was passed an input_shape with ' + - str(input_shape[-1]) + ' input channels.', - stacklevel=2) - default_shape = (default_size, default_size, input_shape[-1]) - else: - if data_format == 'channels_first': - default_shape = (3, default_size, default_size) +def obtain_input_shape( + input_shape, + default_size, + min_size, + data_format, + require_flatten, + weights=None, +): + """Internal utility to compute/validate a model's input shape. + + Args: + input_shape: Either None (will return the default network input shape), + or a user-provided shape to be validated. + default_size: Default input width/height for the model. + min_size: Minimum input width/height accepted by the model. + data_format: Image data format to use. + require_flatten: Whether the model is expected to + be linked to a classifier via a Flatten layer. + weights: One of `None` (random initialization) + or 'imagenet' (pre-training on ImageNet). + If weights='imagenet' input channels must be equal to 3. + + Returns: + An integer shape tuple (may include None entries). + + Raises: + ValueError: In case of invalid argument values. + """ + if weights != "imagenet" and input_shape and len(input_shape) == 3: + if data_format == "channels_first": + if input_shape[0] not in {1, 3}: + warnings.warn( + "This model usually expects 1 or 3 input channels. " + "However, it was passed an input_shape with " + + str(input_shape[0]) + + " input channels.", + stacklevel=2, + ) + default_shape = (input_shape[0], default_size, default_size) + else: + if input_shape[-1] not in {1, 3}: + warnings.warn( + "This model usually expects 1 or 3 input channels. " + "However, it was passed an input_shape with " + + str(input_shape[-1]) + + " input channels.", + stacklevel=2, + ) + default_shape = (default_size, default_size, input_shape[-1]) else: - default_shape = (default_size, default_size, 3) - if weights == 'imagenet' and require_flatten: - if input_shape is not None: - if input_shape != default_shape: - raise ValueError('When setting `include_top=True` ' - 'and loading `imagenet` weights, ' - f'`input_shape` should be {default_shape}. ' - f'Received: input_shape={input_shape}') - return default_shape - if input_shape: - if data_format == 'channels_first': - if input_shape is not None: - if len(input_shape) != 3: - raise ValueError('`input_shape` must be a tuple of three integers.') - if input_shape[0] != 3 and weights == 'imagenet': - raise ValueError('The input must have 3 channels; Received ' - f'`input_shape={input_shape}`') - if ((input_shape[1] is not None and input_shape[1] < min_size) or - (input_shape[2] is not None and input_shape[2] < min_size)): - raise ValueError(f'Input size must be at least {min_size}' - f'x{min_size}; Received: ' - f'input_shape={input_shape}') + if data_format == "channels_first": + default_shape = (3, default_size, default_size) + else: + default_shape = (default_size, default_size, 3) + if weights == "imagenet" and require_flatten: + if input_shape is not None: + if input_shape != default_shape: + raise ValueError( + "When setting `include_top=True` " + "and loading `imagenet` weights, " + f"`input_shape` should be {default_shape}. " + f"Received: input_shape={input_shape}" + ) + return default_shape + if input_shape: + if data_format == "channels_first": + if input_shape is not None: + if len(input_shape) != 3: + raise ValueError( + "`input_shape` must be a tuple of three integers." + ) + if input_shape[0] != 3 and weights == "imagenet": + raise ValueError( + "The input must have 3 channels; Received " + f"`input_shape={input_shape}`" + ) + if ( + input_shape[1] is not None and input_shape[1] < min_size + ) or (input_shape[2] is not None and input_shape[2] < min_size): + raise ValueError( + f"Input size must be at least {min_size}" + f"x{min_size}; Received: " + f"input_shape={input_shape}" + ) + else: + if input_shape is not None: + if len(input_shape) != 3: + raise ValueError( + "`input_shape` must be a tuple of three integers." + ) + if input_shape[-1] != 3 and weights == "imagenet": + raise ValueError( + "The input must have 3 channels; Received " + f"`input_shape={input_shape}`" + ) + if ( + input_shape[0] is not None and input_shape[0] < min_size + ) or (input_shape[1] is not None and input_shape[1] < min_size): + raise ValueError( + "Input size must be at least " + f"{min_size}x{min_size}; Received: " + f"input_shape={input_shape}" + ) else: - if input_shape is not None: - if len(input_shape) != 3: - raise ValueError('`input_shape` must be a tuple of three integers.') - if input_shape[-1] != 3 and weights == 'imagenet': - raise ValueError('The input must have 3 channels; Received ' - f'`input_shape={input_shape}`') - if ((input_shape[0] is not None and input_shape[0] < min_size) or - (input_shape[1] is not None and input_shape[1] < min_size)): - raise ValueError('Input size must be at least ' - f'{min_size}x{min_size}; Received: ' - f'input_shape={input_shape}') - else: + if require_flatten: + input_shape = default_shape + else: + if data_format == "channels_first": + input_shape = (3, None, None) + else: + input_shape = (None, None, 3) if require_flatten: - input_shape = default_shape - else: - if data_format == 'channels_first': - input_shape = (3, None, None) - else: - input_shape = (None, None, 3) - if require_flatten: - if None in input_shape: - raise ValueError('If `include_top` is True, ' - 'you should specify a static `input_shape`. ' - f'Received: input_shape={input_shape}') - return input_shape + if None in input_shape: + raise ValueError( + "If `include_top` is True, " + "you should specify a static `input_shape`. " + f"Received: input_shape={input_shape}" + ) + return input_shape def correct_pad(inputs, kernel_size): - """Returns a tuple for zero-padding for 2D convolution with downsampling. - - Args: - inputs: Input tensor. - kernel_size: An integer or tuple/list of 2 integers. - - Returns: - A tuple. - """ - img_dim = 2 if backend.image_data_format() == 'channels_first' else 1 - input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)] - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if input_size[0] is None: - adjust = (1, 1) - else: - adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) - correct = (kernel_size[0] // 2, kernel_size[1] // 2) - return ((correct[0] - adjust[0], correct[0]), - (correct[1] - adjust[1], correct[1])) + """Returns a tuple for zero-padding for 2D convolution with downsampling. + + Args: + inputs: Input tensor. + kernel_size: An integer or tuple/list of 2 integers. + + Returns: + A tuple. + """ + img_dim = 2 if backend.image_data_format() == "channels_first" else 1 + input_size = backend.int_shape(inputs)[img_dim : (img_dim + 2)] + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if input_size[0] is None: + adjust = (1, 1) + else: + adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) + correct = (kernel_size[0] // 2, kernel_size[1] // 2) + return ( + (correct[0] - adjust[0], correct[0]), + (correct[1] - adjust[1], correct[1]), + ) def validate_activation(classifier_activation, weights): - """validates that the classifer_activation is compatible with the weights. - - Args: - classifier_activation: str or callable activation function - weights: The pretrained weights to load. - - Raises: - ValueError: if an activation other than `None` or `softmax` are used with - pretrained weights. - """ - if weights is None: - return - - classifier_activation = activations.get(classifier_activation) - if classifier_activation not in { - activations.get('softmax'), - activations.get(None) - }: - raise ValueError('Only `None` and `softmax` activations are allowed ' - 'for the `classifier_activation` argument when using ' - 'pretrained weights, with `include_top=True`; Received: ' - f'classifier_activation={classifier_activation}') + """validates that the classifer_activation is compatible with the weights. + + Args: + classifier_activation: str or callable activation function + weights: The pretrained weights to load. + + Raises: + ValueError: if an activation other than `None` or `softmax` are used with + pretrained weights. + """ + if weights is None: + return + + classifier_activation = activations.get(classifier_activation) + if classifier_activation not in { + activations.get("softmax"), + activations.get(None), + }: + raise ValueError( + "Only `None` and `softmax` activations are allowed " + "for the `classifier_activation` argument when using " + "pretrained weights, with `include_top=True`; Received: " + f"classifier_activation={classifier_activation}" + ) diff --git a/keras/applications/imagenet_utils_test.py b/keras/applications/imagenet_utils_test.py index 6ca7ee811e75..8369884ee6de 100644 --- a/keras/applications/imagenet_utils_test.py +++ b/keras/applications/imagenet_utils_test.py @@ -14,283 +14,312 @@ # ============================================================================== """Tests for imagenet_utils.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras -from keras.testing_infra import test_combinations from keras.applications import imagenet_utils as utils from keras.mixed_precision.policy import set_global_policy +from keras.testing_infra import test_combinations class TestImageNetUtils(test_combinations.TestCase): - - def test_preprocess_input(self): - # Test invalid mode check - x = np.random.uniform(0, 255, (10, 10, 3)) - with self.assertRaises(ValueError): - utils.preprocess_input(x, mode='some_unknown_mode') - - # Test image batch with float and int image input - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int32') - self.assertEqual(utils.preprocess_input(x).shape, x.shape) - self.assertEqual(utils.preprocess_input(xint).shape, xint.shape) - - out1 = utils.preprocess_input(x, 'channels_last') - out1int = utils.preprocess_input(xint, 'channels_last') - out2 = utils.preprocess_input( - np.transpose(x, (0, 3, 1, 2)), 'channels_first') - out2int = utils.preprocess_input( - np.transpose(xint, (0, 3, 1, 2)), 'channels_first') - self.assertAllClose(out1, out2.transpose(0, 2, 3, 1)) - self.assertAllClose(out1int, out2int.transpose(0, 2, 3, 1)) - - # Test single image - x = np.random.uniform(0, 255, (10, 10, 3)) - xint = x.astype('int32') - self.assertEqual(utils.preprocess_input(x).shape, x.shape) - self.assertEqual(utils.preprocess_input(xint).shape, xint.shape) - - out1 = utils.preprocess_input(x, 'channels_last') - out1int = utils.preprocess_input(xint, 'channels_last') - out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first') - out2int = utils.preprocess_input( - np.transpose(xint, (2, 0, 1)), 'channels_first') - self.assertAllClose(out1, out2.transpose(1, 2, 0)) - self.assertAllClose(out1int, out2int.transpose(1, 2, 0)) - - # Test that writing over the input data works predictably - for mode in ['torch', 'tf']: - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int') - x2 = utils.preprocess_input(x, mode=mode) - xint2 = utils.preprocess_input(xint) - self.assertAllClose(x, x2) - self.assertNotEqual(xint.astype('float').max(), xint2.max()) - - # Caffe mode works differently from the others - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int') - x2 = utils.preprocess_input(x, data_format='channels_last', mode='caffe') - xint2 = utils.preprocess_input(xint) - self.assertAllClose(x, x2[..., ::-1]) - self.assertNotEqual(xint.astype('float').max(), xint2.max()) - - @parameterized.named_parameters([ - { - 'testcase_name': 'mode_torch', - 'mode': 'torch' - }, - { - 'testcase_name': 'mode_tf', - 'mode': 'tf' - }, - { - 'testcase_name': 'mode_caffe', - 'mode': 'caffe' - }, - ]) - def test_preprocess_input_symbolic(self, mode): - # Test image batch - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - inputs = keras.layers.Input(shape=x.shape[1:]) - outputs = keras.layers.Lambda( - lambda x: utils.preprocess_input(x, mode=mode), - output_shape=x.shape[1:])( - inputs) - model = keras.Model(inputs, outputs) - self.assertEqual(model.predict(x).shape, x.shape) - - outputs1 = keras.layers.Lambda( - lambda x: utils.preprocess_input(x, 'channels_last', mode=mode), - output_shape=x.shape[1:])( - inputs) - model1 = keras.Model(inputs, outputs1) - out1 = model1.predict(x) - x2 = np.transpose(x, (0, 3, 1, 2)) - inputs2 = keras.layers.Input(shape=x2.shape[1:]) - outputs2 = keras.layers.Lambda( - lambda x: utils.preprocess_input(x, 'channels_first', mode=mode), - output_shape=x2.shape[1:])( - inputs2) - model2 = keras.Model(inputs2, outputs2) - out2 = model2.predict(x2) - self.assertAllClose(out1, out2.transpose(0, 2, 3, 1)) - - # Test single image - x = np.random.uniform(0, 255, (10, 10, 3)) - inputs = keras.layers.Input(shape=x.shape) - outputs = keras.layers.Lambda( - lambda x: utils.preprocess_input(x, mode=mode), output_shape=x.shape)( - inputs) - model = keras.Model(inputs, outputs) - self.assertEqual(model.predict(x[np.newaxis])[0].shape, x.shape) - - outputs1 = keras.layers.Lambda( - lambda x: utils.preprocess_input(x, 'channels_last', mode=mode), - output_shape=x.shape)( - inputs) - model1 = keras.Model(inputs, outputs1) - out1 = model1.predict(x[np.newaxis])[0] - x2 = np.transpose(x, (2, 0, 1)) - inputs2 = keras.layers.Input(shape=x2.shape) - outputs2 = keras.layers.Lambda( - lambda x: utils.preprocess_input(x, 'channels_first', mode=mode), - output_shape=x2.shape)( - inputs2) - model2 = keras.Model(inputs2, outputs2) - out2 = model2.predict(x2[np.newaxis])[0] - self.assertAllClose(out1, out2.transpose(1, 2, 0)) - - @parameterized.named_parameters([ - { - 'testcase_name': 'mode_torch', - 'mode': 'torch' - }, - { - 'testcase_name': 'mode_tf', - 'mode': 'tf' - }, - { - 'testcase_name': 'mode_caffe', - 'mode': 'caffe' - }, - ]) - def test_preprocess_input_symbolic_mixed_precision(self, mode): - if not tf.__internal__.tf2.enabled(): - self.skipTest('The global policy can only be tested in TensorFlow 2') - set_global_policy('mixed_float16') - shape = (20, 20, 3) - inputs = keras.layers.Input(shape=shape) - try: - keras.layers.Lambda( - lambda x: utils.preprocess_input(x, mode=mode), output_shape=shape)( - inputs) - finally: - set_global_policy('float32') - - @parameterized.named_parameters([ - {'testcase_name': 'channels_last_format', - 'data_format': 'channels_last'}, - {'testcase_name': 'channels_first_format', - 'data_format': 'channels_first'}, - ]) - def test_obtain_input_shape(self, data_format): - # input_shape and default_size are not identical. - with self.assertRaises(ValueError): - utils.obtain_input_shape( - input_shape=(224, 224, 3), - default_size=299, - min_size=139, - data_format='channels_last', - require_flatten=True, - weights='imagenet') - - # Test invalid use cases - - shape = (139, 139) - if data_format == 'channels_last': - input_shape = shape + (99,) - else: - input_shape = (99,) + shape - - # input_shape is smaller than min_size. - shape = (100, 100) - if data_format == 'channels_last': - input_shape = shape + (3,) - else: - input_shape = (3,) + shape - with self.assertRaises(ValueError): - utils.obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # shape is 1D. - shape = (100,) - if data_format == 'channels_last': - input_shape = shape + (3,) - else: - input_shape = (3,) + shape - with self.assertRaises(ValueError): - utils.obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # the number of channels is 5 not 3. - shape = (100, 100) - if data_format == 'channels_last': - input_shape = shape + (5,) - else: - input_shape = (5,) + shape - with self.assertRaises(ValueError): - utils.obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # require_flatten=True with dynamic input shape. - with self.assertRaises(ValueError): - utils.obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=True) - - # test include top - self.assertEqual(utils.obtain_input_shape( - input_shape=(3, 200, 200), - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=True), (3, 200, 200)) - - self.assertEqual(utils.obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False), (None, None, 3)) - - self.assertEqual(utils.obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=False), (3, None, None)) - - self.assertEqual(utils.obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False), (None, None, 3)) - - self.assertEqual(utils.obtain_input_shape( - input_shape=(150, 150, 3), - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False), (150, 150, 3)) - - self.assertEqual(utils.obtain_input_shape( - input_shape=(3, None, None), - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=False), (3, None, None)) - - -if __name__ == '__main__': - tf.test.main() + def test_preprocess_input(self): + # Test invalid mode check + x = np.random.uniform(0, 255, (10, 10, 3)) + with self.assertRaises(ValueError): + utils.preprocess_input(x, mode="some_unknown_mode") + + # Test image batch with float and int image input + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + xint = x.astype("int32") + self.assertEqual(utils.preprocess_input(x).shape, x.shape) + self.assertEqual(utils.preprocess_input(xint).shape, xint.shape) + + out1 = utils.preprocess_input(x, "channels_last") + out1int = utils.preprocess_input(xint, "channels_last") + out2 = utils.preprocess_input( + np.transpose(x, (0, 3, 1, 2)), "channels_first" + ) + out2int = utils.preprocess_input( + np.transpose(xint, (0, 3, 1, 2)), "channels_first" + ) + self.assertAllClose(out1, out2.transpose(0, 2, 3, 1)) + self.assertAllClose(out1int, out2int.transpose(0, 2, 3, 1)) + + # Test single image + x = np.random.uniform(0, 255, (10, 10, 3)) + xint = x.astype("int32") + self.assertEqual(utils.preprocess_input(x).shape, x.shape) + self.assertEqual(utils.preprocess_input(xint).shape, xint.shape) + + out1 = utils.preprocess_input(x, "channels_last") + out1int = utils.preprocess_input(xint, "channels_last") + out2 = utils.preprocess_input( + np.transpose(x, (2, 0, 1)), "channels_first" + ) + out2int = utils.preprocess_input( + np.transpose(xint, (2, 0, 1)), "channels_first" + ) + self.assertAllClose(out1, out2.transpose(1, 2, 0)) + self.assertAllClose(out1int, out2int.transpose(1, 2, 0)) + + # Test that writing over the input data works predictably + for mode in ["torch", "tf"]: + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + xint = x.astype("int") + x2 = utils.preprocess_input(x, mode=mode) + xint2 = utils.preprocess_input(xint) + self.assertAllClose(x, x2) + self.assertNotEqual(xint.astype("float").max(), xint2.max()) + + # Caffe mode works differently from the others + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + xint = x.astype("int") + x2 = utils.preprocess_input( + x, data_format="channels_last", mode="caffe" + ) + xint2 = utils.preprocess_input(xint) + self.assertAllClose(x, x2[..., ::-1]) + self.assertNotEqual(xint.astype("float").max(), xint2.max()) + + @parameterized.named_parameters( + [ + {"testcase_name": "mode_torch", "mode": "torch"}, + {"testcase_name": "mode_tf", "mode": "tf"}, + {"testcase_name": "mode_caffe", "mode": "caffe"}, + ] + ) + def test_preprocess_input_symbolic(self, mode): + # Test image batch + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + inputs = keras.layers.Input(shape=x.shape[1:]) + outputs = keras.layers.Lambda( + lambda x: utils.preprocess_input(x, mode=mode), + output_shape=x.shape[1:], + )(inputs) + model = keras.Model(inputs, outputs) + self.assertEqual(model.predict(x).shape, x.shape) + + outputs1 = keras.layers.Lambda( + lambda x: utils.preprocess_input(x, "channels_last", mode=mode), + output_shape=x.shape[1:], + )(inputs) + model1 = keras.Model(inputs, outputs1) + out1 = model1.predict(x) + x2 = np.transpose(x, (0, 3, 1, 2)) + inputs2 = keras.layers.Input(shape=x2.shape[1:]) + outputs2 = keras.layers.Lambda( + lambda x: utils.preprocess_input(x, "channels_first", mode=mode), + output_shape=x2.shape[1:], + )(inputs2) + model2 = keras.Model(inputs2, outputs2) + out2 = model2.predict(x2) + self.assertAllClose(out1, out2.transpose(0, 2, 3, 1)) + + # Test single image + x = np.random.uniform(0, 255, (10, 10, 3)) + inputs = keras.layers.Input(shape=x.shape) + outputs = keras.layers.Lambda( + lambda x: utils.preprocess_input(x, mode=mode), output_shape=x.shape + )(inputs) + model = keras.Model(inputs, outputs) + self.assertEqual(model.predict(x[np.newaxis])[0].shape, x.shape) + + outputs1 = keras.layers.Lambda( + lambda x: utils.preprocess_input(x, "channels_last", mode=mode), + output_shape=x.shape, + )(inputs) + model1 = keras.Model(inputs, outputs1) + out1 = model1.predict(x[np.newaxis])[0] + x2 = np.transpose(x, (2, 0, 1)) + inputs2 = keras.layers.Input(shape=x2.shape) + outputs2 = keras.layers.Lambda( + lambda x: utils.preprocess_input(x, "channels_first", mode=mode), + output_shape=x2.shape, + )(inputs2) + model2 = keras.Model(inputs2, outputs2) + out2 = model2.predict(x2[np.newaxis])[0] + self.assertAllClose(out1, out2.transpose(1, 2, 0)) + + @parameterized.named_parameters( + [ + {"testcase_name": "mode_torch", "mode": "torch"}, + {"testcase_name": "mode_tf", "mode": "tf"}, + {"testcase_name": "mode_caffe", "mode": "caffe"}, + ] + ) + def test_preprocess_input_symbolic_mixed_precision(self, mode): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "The global policy can only be tested in TensorFlow 2" + ) + set_global_policy("mixed_float16") + shape = (20, 20, 3) + inputs = keras.layers.Input(shape=shape) + try: + keras.layers.Lambda( + lambda x: utils.preprocess_input(x, mode=mode), + output_shape=shape, + )(inputs) + finally: + set_global_policy("float32") + + @parameterized.named_parameters( + [ + { + "testcase_name": "channels_last_format", + "data_format": "channels_last", + }, + { + "testcase_name": "channels_first_format", + "data_format": "channels_first", + }, + ] + ) + def test_obtain_input_shape(self, data_format): + # input_shape and default_size are not identical. + with self.assertRaises(ValueError): + utils.obtain_input_shape( + input_shape=(224, 224, 3), + default_size=299, + min_size=139, + data_format="channels_last", + require_flatten=True, + weights="imagenet", + ) + + # Test invalid use cases + + shape = (139, 139) + if data_format == "channels_last": + input_shape = shape + (99,) + else: + input_shape = (99,) + shape + + # input_shape is smaller than min_size. + shape = (100, 100) + if data_format == "channels_last": + input_shape = shape + (3,) + else: + input_shape = (3,) + shape + with self.assertRaises(ValueError): + utils.obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False, + ) + + # shape is 1D. + shape = (100,) + if data_format == "channels_last": + input_shape = shape + (3,) + else: + input_shape = (3,) + shape + with self.assertRaises(ValueError): + utils.obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False, + ) + + # the number of channels is 5 not 3. + shape = (100, 100) + if data_format == "channels_last": + input_shape = shape + (5,) + else: + input_shape = (5,) + shape + with self.assertRaises(ValueError): + utils.obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False, + ) + + # require_flatten=True with dynamic input shape. + with self.assertRaises(ValueError): + utils.obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format="channels_first", + require_flatten=True, + ) + + # test include top + self.assertEqual( + utils.obtain_input_shape( + input_shape=(3, 200, 200), + default_size=None, + min_size=139, + data_format="channels_first", + require_flatten=True, + ), + (3, 200, 200), + ) + + self.assertEqual( + utils.obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format="channels_last", + require_flatten=False, + ), + (None, None, 3), + ) + + self.assertEqual( + utils.obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format="channels_first", + require_flatten=False, + ), + (3, None, None), + ) + + self.assertEqual( + utils.obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format="channels_last", + require_flatten=False, + ), + (None, None, 3), + ) + + self.assertEqual( + utils.obtain_input_shape( + input_shape=(150, 150, 3), + default_size=None, + min_size=139, + data_format="channels_last", + require_flatten=False, + ), + (150, 150, 3), + ) + + self.assertEqual( + utils.obtain_input_shape( + input_shape=(3, None, None), + default_size=None, + min_size=139, + data_format="channels_first", + require_flatten=False, + ), + (3, None, None), + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/applications/inception_resnet_v2.py b/keras/applications/inception_resnet_v2.py index b30a4799f10c..937139189898 100644 --- a/keras/applications/inception_resnet_v2.py +++ b/keras/applications/inception_resnet_v2.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """Inception-ResNet V2 model for Keras. Reference: @@ -23,372 +23,416 @@ import tensorflow.compat.v2 as tf +import keras from keras import backend +from keras import layers as keras_layers from keras.applications import imagenet_utils from keras.engine import training from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHT_URL = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/inception_resnet_v2/') +BASE_WEIGHT_URL = ( + "https://storage.googleapis.com/tensorflow/" + "keras-applications/inception_resnet_v2/" +) layers = None -@keras_export('keras.applications.inception_resnet_v2.InceptionResNetV2', - 'keras.applications.InceptionResNetV2') -def InceptionResNetV2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - """Instantiates the Inception-ResNet v2 architecture. - - Reference: - - [Inception-v4, Inception-ResNet and the Impact of - Residual Connections on Learning](https://arxiv.org/abs/1602.07261) - (AAAI 2017) - - This function returns a Keras image classification model, - optionally loaded with weights pre-trained on ImageNet. - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - Note: each Keras Application expects a specific kind of input preprocessing. - For InceptionResNetV2, call - `tf.keras.applications.inception_resnet_v2.preprocess_input` - on your inputs before passing them to the model. - `inception_resnet_v2.preprocess_input` - will scale input pixels between -1 and 1. - - Args: - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is `False` (otherwise the input shape - has to be `(299, 299, 3)` (with `'channels_last'` data format) - or `(3, 299, 299)` (with `'channels_first'` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 75. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the last convolutional block. - - `'avg'` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is `True`, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - **kwargs: For backwards compatibility only. - - Returns: - A `keras.Model` instance. - """ - global layers - if 'layers' in kwargs: - layers = kwargs.pop('layers') - else: - layers = VersionAwareLayers() - if kwargs: - raise ValueError('Unknown argument(s): %s' % (kwargs,)) - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=299, - min_size=75, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) +@keras_export( + "keras.applications.inception_resnet_v2.InceptionResNetV2", + "keras.applications.InceptionResNetV2", +) +def InceptionResNetV2( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + """Instantiates the Inception-ResNet v2 architecture. + + Reference: + - [Inception-v4, Inception-ResNet and the Impact of + Residual Connections on Learning](https://arxiv.org/abs/1602.07261) + (AAAI 2017) + + This function returns a Keras image classification model, + optionally loaded with weights pre-trained on ImageNet. + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + Note: each Keras Application expects a specific kind of input preprocessing. + For InceptionResNetV2, call + `tf.keras.applications.inception_resnet_v2.preprocess_input` + on your inputs before passing them to the model. + `inception_resnet_v2.preprocess_input` + will scale input pixels between -1 and 1. + + Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is `False` (otherwise the input shape + has to be `(299, 299, 3)` (with `'channels_last'` data format) + or `(3, 299, 299)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 75. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `'avg'` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `'max'` means that global max pooling will be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + **kwargs: For backwards compatibility only. + + Returns: + A `keras.Model` instance. + """ + global layers + if "layers" in kwargs: + layers = kwargs.pop("layers") else: - img_input = input_tensor - - # Stem block: 35 x 35 x 192 - x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') - x = conv2d_bn(x, 32, 3, padding='valid') - x = conv2d_bn(x, 64, 3) - x = layers.MaxPooling2D(3, strides=2)(x) - x = conv2d_bn(x, 80, 1, padding='valid') - x = conv2d_bn(x, 192, 3, padding='valid') - x = layers.MaxPooling2D(3, strides=2)(x) - - # Mixed 5b (Inception-A block): 35 x 35 x 320 - branch_0 = conv2d_bn(x, 96, 1) - branch_1 = conv2d_bn(x, 48, 1) - branch_1 = conv2d_bn(branch_1, 64, 5) - branch_2 = conv2d_bn(x, 64, 1) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1) - branches = [branch_0, branch_1, branch_2, branch_pool] - channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 - x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches) - - # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 - for block_idx in range(1, 11): - x = inception_resnet_block( - x, scale=0.17, block_type='block35', block_idx=block_idx) - - # Mixed 6a (Reduction-A block): 17 x 17 x 1088 - branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 256, 3) - branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') - branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) - branches = [branch_0, branch_1, branch_pool] - x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches) - - # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 - for block_idx in range(1, 21): - x = inception_resnet_block( - x, scale=0.1, block_type='block17', block_idx=block_idx) - - # Mixed 7a (Reduction-B block): 8 x 8 x 2080 - branch_0 = conv2d_bn(x, 256, 1) - branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') - branch_2 = conv2d_bn(x, 256, 1) - branch_2 = conv2d_bn(branch_2, 288, 3) - branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') - branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) - branches = [branch_0, branch_1, branch_2, branch_pool] - x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches) - - # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 - for block_idx in range(1, 10): + layers = VersionAwareLayers() + if kwargs: + raise ValueError(f"Unknown argument(s): {kwargs}") + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top`' + " as true, `classes` should be 1000" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=299, + min_size=75, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + # Stem block: 35 x 35 x 192 + x = conv2d_bn(img_input, 32, 3, strides=2, padding="valid") + x = conv2d_bn(x, 32, 3, padding="valid") + x = conv2d_bn(x, 64, 3) + x = layers.MaxPooling2D(3, strides=2)(x) + x = conv2d_bn(x, 80, 1, padding="valid") + x = conv2d_bn(x, 192, 3, padding="valid") + x = layers.MaxPooling2D(3, strides=2)(x) + + # Mixed 5b (Inception-A block): 35 x 35 x 320 + branch_0 = conv2d_bn(x, 96, 1) + branch_1 = conv2d_bn(x, 48, 1) + branch_1 = conv2d_bn(branch_1, 64, 5) + branch_2 = conv2d_bn(x, 64, 1) + branch_2 = conv2d_bn(branch_2, 96, 3) + branch_2 = conv2d_bn(branch_2, 96, 3) + branch_pool = layers.AveragePooling2D(3, strides=1, padding="same")(x) + branch_pool = conv2d_bn(branch_pool, 64, 1) + branches = [branch_0, branch_1, branch_2, branch_pool] + channel_axis = 1 if backend.image_data_format() == "channels_first" else 3 + x = layers.Concatenate(axis=channel_axis, name="mixed_5b")(branches) + + # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 + for block_idx in range(1, 11): + x = inception_resnet_block( + x, scale=0.17, block_type="block35", block_idx=block_idx + ) + + # Mixed 6a (Reduction-A block): 17 x 17 x 1088 + branch_0 = conv2d_bn(x, 384, 3, strides=2, padding="valid") + branch_1 = conv2d_bn(x, 256, 1) + branch_1 = conv2d_bn(branch_1, 256, 3) + branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding="valid") + branch_pool = layers.MaxPooling2D(3, strides=2, padding="valid")(x) + branches = [branch_0, branch_1, branch_pool] + x = layers.Concatenate(axis=channel_axis, name="mixed_6a")(branches) + + # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 + for block_idx in range(1, 21): + x = inception_resnet_block( + x, scale=0.1, block_type="block17", block_idx=block_idx + ) + + # Mixed 7a (Reduction-B block): 8 x 8 x 2080 + branch_0 = conv2d_bn(x, 256, 1) + branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding="valid") + branch_1 = conv2d_bn(x, 256, 1) + branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding="valid") + branch_2 = conv2d_bn(x, 256, 1) + branch_2 = conv2d_bn(branch_2, 288, 3) + branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding="valid") + branch_pool = layers.MaxPooling2D(3, strides=2, padding="valid")(x) + branches = [branch_0, branch_1, branch_2, branch_pool] + x = layers.Concatenate(axis=channel_axis, name="mixed_7a")(branches) + + # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 + for block_idx in range(1, 10): + x = inception_resnet_block( + x, scale=0.2, block_type="block8", block_idx=block_idx + ) x = inception_resnet_block( - x, scale=0.2, block_type='block8', block_idx=block_idx) - x = inception_resnet_block( - x, scale=1., activation=None, block_type='block8', block_idx=10) - - # Final convolution block: 8 x 8 x 1536 - x = conv2d_bn(x, 1536, 1, name='conv_7b') - - if include_top: - # Classification block - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name='inception_resnet_v2') - - # Load weights. - if weights == 'imagenet': + x, scale=1.0, activation=None, block_type="block8", block_idx=10 + ) + + # Final convolution block: 8 x 8 x 1536 + x = conv2d_bn(x, 1536, 1, name="conv_7b") + if include_top: - fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' - weights_path = data_utils.get_file( - fname, - BASE_WEIGHT_URL + fname, - cache_subdir='models', - file_hash='e693bd0210a403b3192acc6073ad2e96') + # Classification block + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name="inception_resnet_v2") + + # Load weights. + if weights == "imagenet": + if include_top: + fname = "inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5" + weights_path = data_utils.get_file( + fname, + BASE_WEIGHT_URL + fname, + cache_subdir="models", + file_hash="e693bd0210a403b3192acc6073ad2e96", + ) + else: + fname = ( + "inception_resnet_v2_weights_" + "tf_dim_ordering_tf_kernels_notop.h5" + ) + weights_path = data_utils.get_file( + fname, + BASE_WEIGHT_URL + fname, + cache_subdir="models", + file_hash="d19885ff4a710c122648d3b5c3b684e4", + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +def conv2d_bn( + x, + filters, + kernel_size, + strides=1, + padding="same", + activation="relu", + use_bias=False, + name=None, +): + """Utility function to apply conv + BN. + + Args: + x: input tensor. + filters: filters in `Conv2D`. + kernel_size: kernel size as in `Conv2D`. + strides: strides in `Conv2D`. + padding: padding mode in `Conv2D`. + activation: activation in `Conv2D`. + use_bias: whether to use a bias in `Conv2D`. + name: name of the ops; will become `name + '_ac'` for the activation + and `name + '_bn'` for the batch norm layer. + + Returns: + Output tensor after applying `Conv2D` and `BatchNormalization`. + """ + x = layers.Conv2D( + filters, + kernel_size, + strides=strides, + padding=padding, + use_bias=use_bias, + name=name, + )(x) + if not use_bias: + bn_axis = 1 if backend.image_data_format() == "channels_first" else 3 + bn_name = None if name is None else name + "_bn" + x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)( + x + ) + if activation is not None: + ac_name = None if name is None else name + "_ac" + x = layers.Activation(activation, name=ac_name)(x) + return x + + +@keras.utils.register_keras_serializable() +class CustomScaleLayer(keras_layers.Layer): + def __init__(self, scale, **kwargs): + super().__init__(**kwargs) + self.scale = scale + + def get_config(self): + config = super().get_config() + config.update({"scale": self.scale}) + return config + + def call(self, inputs): + return inputs[0] + inputs[1] * self.scale + + +def inception_resnet_block(x, scale, block_type, block_idx, activation="relu"): + """Adds an Inception-ResNet block. + + This function builds 3 types of Inception-ResNet blocks mentioned + in the paper, controlled by the `block_type` argument (which is the + block name used in the official TF-slim implementation): + - Inception-ResNet-A: `block_type='block35'` + - Inception-ResNet-B: `block_type='block17'` + - Inception-ResNet-C: `block_type='block8'` + + Args: + x: input tensor. + scale: scaling factor to scale the residuals (i.e., the output of passing + `x` through an inception module) before adding them to the shortcut + branch. Let `r` be the output from the residual branch, the output of + this block will be `x + scale * r`. + block_type: `'block35'`, `'block17'` or `'block8'`, determines the network + structure in the residual branch. + block_idx: an `int` used for generating layer names. The Inception-ResNet + blocks are repeated many times in this network. We use `block_idx` to + identify each of the repetitions. For example, the first + Inception-ResNet-A block will have `block_type='block35', block_idx=0`, + and the layer names will have a common prefix `'block35_0'`. + activation: activation function to use at the end of the block (see + [activations](../activations.md)). When `activation=None`, no activation + is applied + (i.e., "linear" activation: `a(x) = x`). + + Returns: + Output tensor for the block. + + Raises: + ValueError: if `block_type` is not one of `'block35'`, + `'block17'` or `'block8'`. + """ + if block_type == "block35": + branch_0 = conv2d_bn(x, 32, 1) + branch_1 = conv2d_bn(x, 32, 1) + branch_1 = conv2d_bn(branch_1, 32, 3) + branch_2 = conv2d_bn(x, 32, 1) + branch_2 = conv2d_bn(branch_2, 48, 3) + branch_2 = conv2d_bn(branch_2, 64, 3) + branches = [branch_0, branch_1, branch_2] + elif block_type == "block17": + branch_0 = conv2d_bn(x, 192, 1) + branch_1 = conv2d_bn(x, 128, 1) + branch_1 = conv2d_bn(branch_1, 160, [1, 7]) + branch_1 = conv2d_bn(branch_1, 192, [7, 1]) + branches = [branch_0, branch_1] + elif block_type == "block8": + branch_0 = conv2d_bn(x, 192, 1) + branch_1 = conv2d_bn(x, 192, 1) + branch_1 = conv2d_bn(branch_1, 224, [1, 3]) + branch_1 = conv2d_bn(branch_1, 256, [3, 1]) + branches = [branch_0, branch_1] else: - fname = ('inception_resnet_v2_weights_' - 'tf_dim_ordering_tf_kernels_notop.h5') - weights_path = data_utils.get_file( - fname, - BASE_WEIGHT_URL + fname, - cache_subdir='models', - file_hash='d19885ff4a710c122648d3b5c3b684e4') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def conv2d_bn(x, - filters, - kernel_size, - strides=1, - padding='same', - activation='relu', - use_bias=False, - name=None): - """Utility function to apply conv + BN. - - Args: - x: input tensor. - filters: filters in `Conv2D`. - kernel_size: kernel size as in `Conv2D`. - strides: strides in `Conv2D`. - padding: padding mode in `Conv2D`. - activation: activation in `Conv2D`. - use_bias: whether to use a bias in `Conv2D`. - name: name of the ops; will become `name + '_ac'` for the activation - and `name + '_bn'` for the batch norm layer. - - Returns: - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - x = layers.Conv2D( - filters, - kernel_size, - strides=strides, - padding=padding, - use_bias=use_bias, - name=name)( - x) - if not use_bias: - bn_axis = 1 if backend.image_data_format() == 'channels_first' else 3 - bn_name = None if name is None else name + '_bn' - x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - if activation is not None: - ac_name = None if name is None else name + '_ac' - x = layers.Activation(activation, name=ac_name)(x) - return x - - -def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): - """Adds an Inception-ResNet block. - - This function builds 3 types of Inception-ResNet blocks mentioned - in the paper, controlled by the `block_type` argument (which is the - block name used in the official TF-slim implementation): - - Inception-ResNet-A: `block_type='block35'` - - Inception-ResNet-B: `block_type='block17'` - - Inception-ResNet-C: `block_type='block8'` - - Args: - x: input tensor. - scale: scaling factor to scale the residuals (i.e., the output of passing - `x` through an inception module) before adding them to the shortcut - branch. Let `r` be the output from the residual branch, the output of this - block will be `x + scale * r`. - block_type: `'block35'`, `'block17'` or `'block8'`, determines the network - structure in the residual branch. - block_idx: an `int` used for generating layer names. The Inception-ResNet - blocks are repeated many times in this network. We use `block_idx` to - identify each of the repetitions. For example, the first - Inception-ResNet-A block will have `block_type='block35', block_idx=0`, - and the layer names will have a common prefix `'block35_0'`. - activation: activation function to use at the end of the block (see - [activations](../activations.md)). When `activation=None`, no activation - is applied - (i.e., "linear" activation: `a(x) = x`). - - Returns: - Output tensor for the block. - - Raises: - ValueError: if `block_type` is not one of `'block35'`, - `'block17'` or `'block8'`. - """ - if block_type == 'block35': - branch_0 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(branch_1, 32, 3) - branch_2 = conv2d_bn(x, 32, 1) - branch_2 = conv2d_bn(branch_2, 48, 3) - branch_2 = conv2d_bn(branch_2, 64, 3) - branches = [branch_0, branch_1, branch_2] - elif block_type == 'block17': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 128, 1) - branch_1 = conv2d_bn(branch_1, 160, [1, 7]) - branch_1 = conv2d_bn(branch_1, 192, [7, 1]) - branches = [branch_0, branch_1] - elif block_type == 'block8': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(branch_1, 224, [1, 3]) - branch_1 = conv2d_bn(branch_1, 256, [3, 1]) - branches = [branch_0, branch_1] - else: - raise ValueError('Unknown Inception-ResNet block type. ' - 'Expects "block35", "block17" or "block8", ' - 'but got: ' + str(block_type)) - - block_name = block_type + '_' + str(block_idx) - channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 - mixed = layers.Concatenate( - axis=channel_axis, name=block_name + '_mixed')( - branches) - up = conv2d_bn( - mixed, - backend.int_shape(x)[channel_axis], - 1, - activation=None, - use_bias=True, - name=block_name + '_conv') - - x = layers.Lambda( - lambda inputs, scale: inputs[0] + inputs[1] * scale, - output_shape=backend.int_shape(x)[1:], - arguments={'scale': scale}, - name=block_name)([x, up]) - if activation is not None: - x = layers.Activation(activation, name=block_name + '_ac')(x) - return x - - -@keras_export('keras.applications.inception_resnet_v2.preprocess_input') + raise ValueError( + "Unknown Inception-ResNet block type. " + 'Expects "block35", "block17" or "block8", ' + "but got: " + str(block_type) + ) + + block_name = block_type + "_" + str(block_idx) + channel_axis = 1 if backend.image_data_format() == "channels_first" else 3 + mixed = layers.Concatenate(axis=channel_axis, name=block_name + "_mixed")( + branches + ) + up = conv2d_bn( + mixed, + backend.int_shape(x)[channel_axis], + 1, + activation=None, + use_bias=True, + name=block_name + "_conv", + ) + + x = CustomScaleLayer(scale)([x, up]) + if activation is not None: + x = layers.Activation(activation, name=block_name + "_ac")(x) + return x + + +@keras_export("keras.applications.inception_resnet_v2.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.inception_resnet_v2.decode_predictions') +@keras_export("keras.applications.inception_resnet_v2.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/inception_v3.py b/keras/applications/inception_v3.py index bd12b8f75fb6..d3ab844e16a9 100644 --- a/keras/applications/inception_v3.py +++ b/keras/applications/inception_v3.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """Inception V3 model for Keras. Reference: @@ -28,399 +28,436 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export WEIGHTS_PATH = ( - 'https://storage.googleapis.com/tensorflow/keras-applications/' - 'inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5') + "https://storage.googleapis.com/tensorflow/keras-applications/" + "inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5" +) WEIGHTS_PATH_NO_TOP = ( - 'https://storage.googleapis.com/tensorflow/keras-applications/' - 'inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5') + "https://storage.googleapis.com/tensorflow/keras-applications/" + "inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5" +) layers = VersionAwareLayers() -@keras_export('keras.applications.inception_v3.InceptionV3', - 'keras.applications.InceptionV3') +@keras_export( + "keras.applications.inception_v3.InceptionV3", + "keras.applications.InceptionV3", +) def InceptionV3( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the Inception v3 architecture. - - Reference: - - [Rethinking the Inception Architecture for Computer Vision]( - http://arxiv.org/abs/1512.00567) (CVPR 2016) + classifier_activation="softmax", +): + """Instantiates the Inception v3 architecture. + + Reference: + - [Rethinking the Inception Architecture for Computer Vision]( + http://arxiv.org/abs/1512.00567) (CVPR 2016) + + This function returns a Keras image classification model, + optionally loaded with weights pre-trained on ImageNet. + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + Note: each Keras Application expects a specific kind of input preprocessing. + For `InceptionV3`, call + `tf.keras.applications.inception_v3.preprocess_input` on your inputs before + passing them to the model. `inception_v3.preprocess_input` will scale input + pixels between -1 and 1. + + Args: + include_top: Boolean, whether to include the fully-connected + layer at the top, as the last layer of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `imagenet` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `imagenet`. + input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. `input_tensor` is useful for + sharing inputs between multiple different networks. Defaults to `None`. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)` (with `channels_last` data format) + or `(3, 299, 299)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 75. + E.g. `(150, 150, 3)` would be one valid value. + `input_shape` will be ignored if the `input_tensor` is provided. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` (default) means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. Defaults to 1000. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + + Returns: + A `keras.Model` instance. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded; " + f"Received: weights={weights}" + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top` ' + "as true, `classes` should be 1000; " + f"Received classes={classes}" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=299, + min_size=75, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor - This function returns a Keras image classification model, - optionally loaded with weights pre-trained on ImageNet. - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - Note: each Keras Application expects a specific kind of input preprocessing. - For `InceptionV3`, call `tf.keras.applications.inception_v3.preprocess_input` - on your inputs before passing them to the model. - `inception_v3.preprocess_input` will scale input pixels between -1 and 1. - - Args: - include_top: Boolean, whether to include the fully-connected - layer at the top, as the last layer of the network. Default to `True`. - weights: One of `None` (random initialization), - `imagenet` (pre-training on ImageNet), - or the path to the weights file to be loaded. Default to `imagenet`. - input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. `input_tensor` is useful for sharing - inputs between multiple different networks. Default to None. - input_shape: Optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(299, 299, 3)` (with `channels_last` data format) - or `(3, 299, 299)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 75. - E.g. `(150, 150, 3)` would be one valid value. - `input_shape` will be ignored if the `input_tensor` is provided. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` (default) means that the output of the model will be - the 4D tensor output of the last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. Default to 1000. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A `keras.Model` instance. - """ - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded; ' - f'Received: weights={weights}') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000; ' - f'Received classes={classes}') - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=299, - min_size=75, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + if backend.image_data_format() == "channels_first": + channel_axis = 1 else: - img_input = input_tensor - - if backend.image_data_format() == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - - x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') - x = conv2d_bn(x, 32, 3, 3, padding='valid') - x = conv2d_bn(x, 64, 3, 3) - x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv2d_bn(x, 80, 1, 1, padding='valid') - x = conv2d_bn(x, 192, 3, 3, padding='valid') - x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - - # mixed 0: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 32, 1, 1) - x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed0') - - # mixed 1: 35 x 35 x 288 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed1') - - # mixed 2: 35 x 35 x 288 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed2') - - # mixed 3: 17 x 17 x 768 - branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn( - branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') - - branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - x = layers.concatenate([branch3x3, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed3') - - # mixed 4: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 128, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 128, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed4') - - # mixed 5, 6: 17 x 17 x 768 - for i in range(2): + channel_axis = 3 + + x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding="valid") + x = conv2d_bn(x, 32, 3, 3, padding="valid") + x = conv2d_bn(x, 64, 3, 3) + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + x = conv2d_bn(x, 80, 1, 1, padding="valid") + x = conv2d_bn(x, 192, 3, 3, padding="valid") + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + # mixed 0: 35 x 35 x 256 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) + branch_pool = conv2d_bn(branch_pool, 32, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name="mixed0", + ) + + # mixed 1: 35 x 35 x 288 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) + branch_pool = conv2d_bn(branch_pool, 64, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name="mixed1", + ) + + # mixed 2: 35 x 35 x 288 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) + branch_pool = conv2d_bn(branch_pool, 64, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name="mixed2", + ) + + # mixed 3: 17 x 17 x 768 + branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding="valid") + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn( + branch3x3dbl, 96, 3, 3, strides=(2, 2), padding="valid" + ) + + branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + x = layers.concatenate( + [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name="mixed3" + ) + + # mixed 4: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) - branch7x7 = conv2d_bn(x, 160, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) + branch7x7 = conv2d_bn(x, 128, 1, 1) + branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - branch7x7dbl = conv2d_bn(x, 160, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) + branch7x7dbl = conv2d_bn(x, 128, 1, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) + branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')( - x) + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(5 + i)) - - # mixed 7: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 192, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 192, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed7') - - # mixed 8: 8 x 8 x 1280 - branch3x3 = conv2d_bn(x, 192, 1, 1) - branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, strides=(2, 2), padding='valid') - - branch7x7x3 = conv2d_bn(x, 192, 1, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) - branch7x7x3 = conv2d_bn( - branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') - - branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - x = layers.concatenate([branch3x3, branch7x7x3, branch_pool], - axis=channel_axis, - name='mixed8') - - # mixed 9: 8 x 8 x 2048 - for i in range(2): - branch1x1 = conv2d_bn(x, 320, 1, 1) - - branch3x3 = conv2d_bn(x, 384, 1, 1) - branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) - branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) - branch3x3 = layers.concatenate([branch3x3_1, branch3x3_2], - axis=channel_axis, - name='mixed9_' + str(i)) - - branch3x3dbl = conv2d_bn(x, 448, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) - branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) - branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) - branch3x3dbl = layers.concatenate([branch3x3dbl_1, branch3x3dbl_2], - axis=channel_axis) - - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')( - x) + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name="mixed4", + ) + + # mixed 5, 6: 17 x 17 x 768 + for i in range(2): + branch1x1 = conv2d_bn(x, 192, 1, 1) + + branch7x7 = conv2d_bn(x, 160, 1, 1) + branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) + branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) + + branch7x7dbl = conv2d_bn(x, 160, 1, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) + branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) + branch_pool = conv2d_bn(branch_pool, 192, 1, 1) + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name="mixed" + str(5 + i), + ) + + # mixed 7: 17 x 17 x 768 + branch1x1 = conv2d_bn(x, 192, 1, 1) + + branch7x7 = conv2d_bn(x, 192, 1, 1) + branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) + branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) + + branch7x7dbl = conv2d_bn(x, 192, 1, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate([branch1x1, branch3x3, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(9 + i)) - if include_top: - # Classification block - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = training.Model(inputs, x, name='inception_v3') - - # Load weights. - if weights == 'imagenet': + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name="mixed7", + ) + + # mixed 8: 8 x 8 x 1280 + branch3x3 = conv2d_bn(x, 192, 1, 1) + branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, strides=(2, 2), padding="valid") + + branch7x7x3 = conv2d_bn(x, 192, 1, 1) + branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) + branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) + branch7x7x3 = conv2d_bn( + branch7x7x3, 192, 3, 3, strides=(2, 2), padding="valid" + ) + + branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + x = layers.concatenate( + [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name="mixed8" + ) + + # mixed 9: 8 x 8 x 2048 + for i in range(2): + branch1x1 = conv2d_bn(x, 320, 1, 1) + + branch3x3 = conv2d_bn(x, 384, 1, 1) + branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) + branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) + branch3x3 = layers.concatenate( + [branch3x3_1, branch3x3_2], + axis=channel_axis, + name="mixed9_" + str(i), + ) + + branch3x3dbl = conv2d_bn(x, 448, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) + branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) + branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) + branch3x3dbl = layers.concatenate( + [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis + ) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding="same" + )(x) + branch_pool = conv2d_bn(branch_pool, 192, 1, 1) + x = layers.concatenate( + [branch1x1, branch3x3, branch3x3dbl, branch_pool], + axis=channel_axis, + name="mixed" + str(9 + i), + ) if include_top: - weights_path = data_utils.get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='9a0d58056eeedaa3f26cb7ebd46da564') + # Classification block + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = training.Model(inputs, x, name="inception_v3") + + # Load weights. + if weights == "imagenet": + if include_top: + weights_path = data_utils.get_file( + "inception_v3_weights_tf_dim_ordering_tf_kernels.h5", + WEIGHTS_PATH, + cache_subdir="models", + file_hash="9a0d58056eeedaa3f26cb7ebd46da564", + ) + else: + weights_path = data_utils.get_file( + "inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5", + WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + file_hash="bcbd6486424b2319ff4ef7d526e38f63", + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +def conv2d_bn( + x, filters, num_row, num_col, padding="same", strides=(1, 1), name=None +): + """Utility function to apply conv + BN. + + Args: + x: input tensor. + filters: filters in `Conv2D`. + num_row: height of the convolution kernel. + num_col: width of the convolution kernel. + padding: padding mode in `Conv2D`. + strides: strides in `Conv2D`. + name: name of the ops; will become `name + '_conv'` + for the convolution and `name + '_bn'` for the + batch norm layer. + + Returns: + Output tensor after applying `Conv2D` and `BatchNormalization`. + """ + if name is not None: + bn_name = name + "_bn" + conv_name = name + "_conv" + else: + bn_name = None + conv_name = None + if backend.image_data_format() == "channels_first": + bn_axis = 1 else: - weights_path = data_utils.get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='bcbd6486424b2319ff4ef7d526e38f63') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def conv2d_bn(x, - filters, - num_row, - num_col, - padding='same', - strides=(1, 1), - name=None): - """Utility function to apply conv + BN. - - Args: - x: input tensor. - filters: filters in `Conv2D`. - num_row: height of the convolution kernel. - num_col: width of the convolution kernel. - padding: padding mode in `Conv2D`. - strides: strides in `Conv2D`. - name: name of the ops; will become `name + '_conv'` - for the convolution and `name + '_bn'` for the - batch norm layer. - - Returns: - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - if name is not None: - bn_name = name + '_bn' - conv_name = name + '_conv' - else: - bn_name = None - conv_name = None - if backend.image_data_format() == 'channels_first': - bn_axis = 1 - else: - bn_axis = 3 - x = layers.Conv2D( - filters, (num_row, num_col), - strides=strides, - padding=padding, - use_bias=False, - name=conv_name)( - x) - x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - x = layers.Activation('relu', name=name)(x) - return x - - -@keras_export('keras.applications.inception_v3.preprocess_input') + bn_axis = 3 + x = layers.Conv2D( + filters, + (num_row, num_col), + strides=strides, + padding=padding, + use_bias=False, + name=conv_name, + )(x) + x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) + x = layers.Activation("relu", name=name)(x) + return x + + +@keras_export("keras.applications.inception_v3.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.inception_v3.decode_predictions') +@keras_export("keras.applications.inception_v3.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/mobilenet.py b/keras/applications/mobilenet.py index beaf22b18531..e3a0cdd09e18 100644 --- a/keras/applications/mobilenet.py +++ b/keras/applications/mobilenet.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """MobileNet v1 models for Keras. MobileNet is a general architecture and can be used for multiple use cases. @@ -38,23 +38,22 @@ on size 224 x 224: ---------------------------------------------------------------------------- Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) ----------------------------------------------------------------------------- +-------------------------|---------------|-------------------|-------------- | 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | | 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | | 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | | 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | ----------------------------------------------------------------------------- The following table describes the performance of the 100 % MobileNet on various input sizes: ------------------------------------------------------------------------ - Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) ------------------------------------------------------------------------- +Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) +----------------------|---------------|-------------------|---------------- | 1.0 MobileNet-224 | 70.6 % | 569 | 4.2 | | 1.0 MobileNet-192 | 69.1 % | 418 | 4.2 | | 1.0 MobileNet-160 | 67.2 % | 290 | 4.2 | | 1.0 MobileNet-128 | 64.4 % | 186 | 4.2 | ------------------------------------------------------------------------- + Reference: - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications]( @@ -69,388 +68,422 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/mobilenet/') +BASE_WEIGHT_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/" +) layers = None -@keras_export('keras.applications.mobilenet.MobileNet', - 'keras.applications.MobileNet') -def MobileNet(input_shape=None, - alpha=1.0, - depth_multiplier=1, - dropout=1e-3, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - """Instantiates the MobileNet architecture. - - Reference: - - [MobileNets: Efficient Convolutional Neural Networks - for Mobile Vision Applications]( - https://arxiv.org/abs/1704.04861) - - This function returns a Keras image classification model, - optionally loaded with weights pre-trained on ImageNet. - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - Note: each Keras Application expects a specific kind of input preprocessing. - For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input` - on your inputs before passing them to the model. - `mobilenet.preprocess_input` will scale input pixels between -1 and 1. - - Args: - input_shape: Optional shape tuple, only to be specified if `include_top` - is False (otherwise the input shape has to be `(224, 224, 3)` (with - `channels_last` data format) or (3, 224, 224) (with `channels_first` - data format). It should have exactly 3 inputs channels, and width and - height should be no smaller than 32. E.g. `(200, 200, 3)` would be one - valid value. Default to `None`. - `input_shape` will be ignored if the `input_tensor` is provided. - alpha: Controls the width of the network. This is known as the width - multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally - decreases the number of filters in each layer. - If `alpha` > 1.0, - proportionally increases the number of filters in each layer. - If - `alpha` = 1, default number of filters from the paper are used at each - layer. Default to 1.0. - depth_multiplier: Depth multiplier for depthwise convolution. This is - called the resolution multiplier in the MobileNet paper. Default to 1.0. - dropout: Dropout rate. Default to 0.001. - include_top: Boolean, whether to include the fully-connected layer at the - top of the network. Default to `True`. - weights: One of `None` (random initialization), 'imagenet' (pre-training - on ImageNet), or the path to the weights file to be loaded. Default to - `imagenet`. - input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to - use as image input for the model. `input_tensor` is useful for sharing - inputs between multiple different networks. Default to None. - pooling: Optional pooling mode for feature extraction when `include_top` - is `False`. - - `None` (default) means that the output of the model will be - the 4D tensor output of the last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will be applied. - classes: Optional number of classes to classify images into, only to be - specified if `include_top` is True, and if no `weights` argument is - specified. Defaults to 1000. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - **kwargs: For backwards compatibility only. - Returns: - A `keras.Model` instance. - """ - global layers - if 'layers' in kwargs: - layers = kwargs.pop('layers') - else: - layers = VersionAwareLayers() - if kwargs: - raise ValueError(f'Unknown argument(s): {(kwargs,)}') - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded. ' - f'Received weights={weights}') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000. ' - f'Received classes={classes}') - - # Determine proper input shape and default size. - if input_shape is None: - default_size = 224 - else: - if backend.image_data_format() == 'channels_first': - rows = input_shape[1] - cols = input_shape[2] +@keras_export( + "keras.applications.mobilenet.MobileNet", "keras.applications.MobileNet" +) +def MobileNet( + input_shape=None, + alpha=1.0, + depth_multiplier=1, + dropout=1e-3, + include_top=True, + weights="imagenet", + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + """Instantiates the MobileNet architecture. + + Reference: + - [MobileNets: Efficient Convolutional Neural Networks + for Mobile Vision Applications]( + https://arxiv.org/abs/1704.04861) + + This function returns a Keras image classification model, + optionally loaded with weights pre-trained on ImageNet. + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + Note: each Keras Application expects a specific kind of input preprocessing. + For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input` + on your inputs before passing them to the model. + `mobilenet.preprocess_input` will scale input pixels between -1 and 1. + + Args: + input_shape: Optional shape tuple, only to be specified if `include_top` + is False (otherwise the input shape has to be `(224, 224, 3)` (with + `channels_last` data format) or (3, 224, 224) (with `channels_first` + data format). It should have exactly 3 inputs channels, and width and + height should be no smaller than 32. E.g. `(200, 200, 3)` would be one + valid value. Defaults to `None`. + `input_shape` will be ignored if the `input_tensor` is provided. + alpha: Controls the width of the network. This is known as the width + multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally + decreases the number of filters in each layer. - If `alpha` > 1.0, + proportionally increases the number of filters in each layer. - If + `alpha` = 1, default number of filters from the paper are used at each + layer. Defaults to `1.0`. + depth_multiplier: Depth multiplier for depthwise convolution. This is + called the resolution multiplier in the MobileNet paper. + Defaults to `1.0`. + dropout: Dropout rate. Defaults to `0.001`. + include_top: Boolean, whether to include the fully-connected layer at the + top of the network. Defaults to `True`. + weights: One of `None` (random initialization), 'imagenet' (pre-training + on ImageNet), or the path to the weights file to be loaded. Defaults to + `imagenet`. + input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to + use as image input for the model. `input_tensor` is useful for sharing + inputs between multiple different networks. Defaults to `None`. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` (default) means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: Optional number of classes to classify images into, only to be + specified if `include_top` is True, and if no `weights` argument is + specified. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + **kwargs: For backwards compatibility only. + Returns: + A `keras.Model` instance. + """ + global layers + if "layers" in kwargs: + layers = kwargs.pop("layers") else: - rows = input_shape[0] - cols = input_shape[1] - - if rows == cols and rows in [128, 160, 192, 224]: - default_size = rows + layers = VersionAwareLayers() + if kwargs: + raise ValueError(f"Unknown argument(s): {(kwargs,)}") + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded. " + f"Received weights={weights}" + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top` ' + "as true, `classes` should be 1000. " + f"Received classes={classes}" + ) + + # Determine proper input shape and default size. + if input_shape is None: + default_size = 224 else: - default_size = 224 - - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if depth_multiplier != 1: - raise ValueError('If imagenet weights are being loaded, ' - 'depth multiplier must be 1. ' - f'Received depth_multiplier={depth_multiplier}') - - if alpha not in [0.25, 0.50, 0.75, 1.0]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of' - '`0.25`, `0.50`, `0.75` or `1.0` only. ' - f'Received alpha={alpha}') - - if rows != cols or rows not in [128, 160, 192, 224]: - rows = 224 - logging.warning('`input_shape` is undefined or non-square, ' - 'or `rows` is not in [128, 160, 192, 224]. ' - 'Weights for input shape (224, 224) will be ' - 'loaded as the default.') - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + if backend.image_data_format() == "channels_first": + rows = input_shape[1] + cols = input_shape[2] + else: + rows = input_shape[0] + cols = input_shape[1] + + if rows == cols and rows in [128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if backend.image_data_format() == "channels_last": + row_axis, col_axis = (0, 1) else: - img_input = input_tensor - - x = _conv_block(img_input, 32, alpha, strides=(2, 2)) - x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) - - x = _depthwise_conv_block( - x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) - - x = _depthwise_conv_block( - x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) - - x = _depthwise_conv_block( - x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) - - x = _depthwise_conv_block( - x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12) - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) - - if include_top: - x = layers.GlobalAveragePooling2D(keepdims=True)(x) - x = layers.Dropout(dropout, name='dropout')(x) - x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) - x = layers.Reshape((classes,), name='reshape_2')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Activation(activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) - - # Load weights. - if weights == 'imagenet': - if alpha == 1.0: - alpha_text = '1_0' - elif alpha == 0.75: - alpha_text = '7_5' - elif alpha == 0.50: - alpha_text = '5_0' + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + + if weights == "imagenet": + if depth_multiplier != 1: + raise ValueError( + "If imagenet weights are being loaded, " + "depth multiplier must be 1. " + f"Received depth_multiplier={depth_multiplier}" + ) + + if alpha not in [0.25, 0.50, 0.75, 1.0]: + raise ValueError( + "If imagenet weights are being loaded, " + "alpha can be one of" + "`0.25`, `0.50`, `0.75` or `1.0` only. " + f"Received alpha={alpha}" + ) + + if rows != cols or rows not in [128, 160, 192, 224]: + rows = 224 + logging.warning( + "`input_shape` is undefined or non-square, " + "or `rows` is not in [128, 160, 192, 224]. " + "Weights for input shape (224, 224) will be " + "loaded as the default." + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - alpha_text = '2_5' + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + x = _conv_block(img_input, 32, alpha, strides=(2, 2)) + x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) + + x = _depthwise_conv_block( + x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2 + ) + x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) + + x = _depthwise_conv_block( + x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4 + ) + x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) + + x = _depthwise_conv_block( + x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6 + ) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) + + x = _depthwise_conv_block( + x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12 + ) + x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) if include_top: - model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = data_utils.get_file( - model_name, weight_path, cache_subdir='models') + x = layers.GlobalAveragePooling2D(keepdims=True)(x) + x = layers.Dropout(dropout, name="dropout")(x) + x = layers.Conv2D(classes, (1, 1), padding="same", name="conv_preds")(x) + x = layers.Reshape((classes,), name="reshape_2")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Activation( + activation=classifier_activation, name="predictions" + )(x) else: - model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = data_utils.get_file( - model_name, weight_path, cache_subdir='models') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name=f"mobilenet_{alpha:0.2f}_{rows}") + + # Load weights. + if weights == "imagenet": + if alpha == 1.0: + alpha_text = "1_0" + elif alpha == 0.75: + alpha_text = "7_5" + elif alpha == 0.50: + alpha_text = "5_0" + else: + alpha_text = "2_5" + + if include_top: + model_name = "mobilenet_%s_%d_tf.h5" % (alpha_text, rows) + weight_path = BASE_WEIGHT_PATH + model_name + weights_path = data_utils.get_file( + model_name, weight_path, cache_subdir="models" + ) + else: + model_name = "mobilenet_%s_%d_tf_no_top.h5" % (alpha_text, rows) + weight_path = BASE_WEIGHT_PATH + model_name + weights_path = data_utils.get_file( + model_name, weight_path, cache_subdir="models" + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): - """Adds an initial convolution layer (with batch normalization and relu6). - - Args: - inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` - data format) or (3, rows, cols) (with `channels_first` data format). - It should have exactly 3 inputs channels, and width and height should - be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. - filters: Integer, the dimensionality of the output space (i.e. the - number of output filters in the convolution). - alpha: controls the width of the network. - If `alpha` < 1.0, - proportionally decreases the number of filters in each layer. - If - `alpha` > 1.0, proportionally increases the number of filters in each - layer. - If `alpha` = 1, default number of filters from the paper are - used at each layer. - kernel: An integer or tuple/list of 2 integers, specifying the width and - height of the 2D convolution window. Can be a single integer to - specify the same value for all spatial dimensions. - strides: An integer or tuple/list of 2 integers, specifying the strides - of the convolution along the width and height. Can be a single integer - to specify the same value for all spatial dimensions. Specifying any - stride value != 1 is incompatible with specifying any `dilation_rate` - value != 1. # Input shape - 4D tensor with shape: `(samples, channels, rows, cols)` if - data_format='channels_first' - or 4D tensor with shape: `(samples, rows, cols, channels)` if - data_format='channels_last'. # Output shape - 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if - data_format='channels_first' - or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if - data_format='channels_last'. `rows` and `cols` values might have - changed due to stride. - - Returns: - Output tensor of block. - """ - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - filters = int(filters * alpha) - x = layers.Conv2D( - filters, - kernel, - padding='same', - use_bias=False, - strides=strides, - name='conv1')(inputs) - x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) - return layers.ReLU(6., name='conv1_relu')(x) - - -def _depthwise_conv_block(inputs, - pointwise_conv_filters, - alpha, - depth_multiplier=1, - strides=(1, 1), - block_id=1): - """Adds a depthwise convolution block. - - A depthwise convolution block consists of a depthwise conv, - batch normalization, relu6, pointwise convolution, - batch normalization and relu6 activation. - - Args: - inputs: Input tensor of shape `(rows, cols, channels)` (with - `channels_last` data format) or (channels, rows, cols) (with - `channels_first` data format). - pointwise_conv_filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the pointwise convolution). - alpha: controls the width of the network. - If `alpha` < 1.0, - proportionally decreases the number of filters in each layer. - If - `alpha` > 1.0, proportionally increases the number of filters in each - layer. - If `alpha` = 1, default number of filters from the paper are - used at each layer. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. The total number of depthwise convolution - output channels will be equal to `filters_in * depth_multiplier`. - strides: An integer or tuple/list of 2 integers, specifying the strides - of the convolution along the width and height. Can be a single integer - to specify the same value for all spatial dimensions. Specifying any - stride value != 1 is incompatible with specifying any `dilation_rate` - value != 1. - block_id: Integer, a unique identification designating the block number. - # Input shape - 4D tensor with shape: `(batch, channels, rows, cols)` if - data_format='channels_first' - or 4D tensor with shape: `(batch, rows, cols, channels)` if - data_format='channels_last'. # Output shape - 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if - data_format='channels_first' - or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if - data_format='channels_last'. `rows` and `cols` values might have - changed due to stride. - - Returns: - Output tensor of block. - """ - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - if strides == (1, 1): - x = inputs - else: - x = layers.ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)( - inputs) - x = layers.DepthwiseConv2D((3, 3), - padding='same' if strides == (1, 1) else 'valid', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False, - name='conv_dw_%d' % block_id)( - x) - x = layers.BatchNormalization( - axis=channel_axis, name='conv_dw_%d_bn' % block_id)( - x) - x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) - - x = layers.Conv2D( - pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1), - name='conv_pw_%d' % block_id)( - x) - x = layers.BatchNormalization( - axis=channel_axis, name='conv_pw_%d_bn' % block_id)( - x) - return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) - - -@keras_export('keras.applications.mobilenet.preprocess_input') + """Adds an initial convolution layer (with batch normalization and relu6). + + Args: + inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` + data format) or (3, rows, cols) (with `channels_first` data format). + It should have exactly 3 inputs channels, and width and height should + be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. + filters: Integer, the dimensionality of the output space (i.e. the + number of output filters in the convolution). + alpha: controls the width of the network. - If `alpha` < 1.0, + proportionally decreases the number of filters in each layer. - If + `alpha` > 1.0, proportionally increases the number of filters in each + layer. - If `alpha` = 1, default number of filters from the paper are + used at each layer. + kernel: An integer or tuple/list of 2 integers, specifying the width and + height of the 2D convolution window. Can be a single integer to + specify the same value for all spatial dimensions. + strides: An integer or tuple/list of 2 integers, specifying the strides + of the convolution along the width and height. Can be a single integer + to specify the same value for all spatial dimensions. Specifying any + stride value != 1 is incompatible with specifying any `dilation_rate` + value != 1. # Input shape + 4D tensor with shape: `(samples, channels, rows, cols)` if + data_format='channels_first' + or 4D tensor with shape: `(samples, rows, cols, channels)` if + data_format='channels_last'. # Output shape + 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if + data_format='channels_first' + or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if + data_format='channels_last'. `rows` and `cols` values might have + changed due to stride. + + Returns: + Output tensor of block. + """ + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + filters = int(filters * alpha) + x = layers.Conv2D( + filters, + kernel, + padding="same", + use_bias=False, + strides=strides, + name="conv1", + )(inputs) + x = layers.BatchNormalization(axis=channel_axis, name="conv1_bn")(x) + return layers.ReLU(6.0, name="conv1_relu")(x) + + +def _depthwise_conv_block( + inputs, + pointwise_conv_filters, + alpha, + depth_multiplier=1, + strides=(1, 1), + block_id=1, +): + """Adds a depthwise convolution block. + + A depthwise convolution block consists of a depthwise conv, + batch normalization, relu6, pointwise convolution, + batch normalization and relu6 activation. + + Args: + inputs: Input tensor of shape `(rows, cols, channels)` (with + `channels_last` data format) or (channels, rows, cols) (with + `channels_first` data format). + pointwise_conv_filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the pointwise convolution). + alpha: controls the width of the network. - If `alpha` < 1.0, + proportionally decreases the number of filters in each layer. - If + `alpha` > 1.0, proportionally increases the number of filters in each + layer. - If `alpha` = 1, default number of filters from the paper are + used at each layer. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `filters_in * depth_multiplier`. + strides: An integer or tuple/list of 2 integers, specifying the strides + of the convolution along the width and height. Can be a single integer + to specify the same value for all spatial dimensions. Specifying any + stride value != 1 is incompatible with specifying any `dilation_rate` + value != 1. + block_id: Integer, a unique identification designating the block number. + # Input shape + 4D tensor with shape: `(batch, channels, rows, cols)` if + data_format='channels_first' + or 4D tensor with shape: `(batch, rows, cols, channels)` if + data_format='channels_last'. # Output shape + 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if + data_format='channels_first' + or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if + data_format='channels_last'. `rows` and `cols` values might have + changed due to stride. + + Returns: + Output tensor of block. + """ + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + pointwise_conv_filters = int(pointwise_conv_filters * alpha) + + if strides == (1, 1): + x = inputs + else: + x = layers.ZeroPadding2D( + ((0, 1), (0, 1)), name="conv_pad_%d" % block_id + )(inputs) + x = layers.DepthwiseConv2D( + (3, 3), + padding="same" if strides == (1, 1) else "valid", + depth_multiplier=depth_multiplier, + strides=strides, + use_bias=False, + name="conv_dw_%d" % block_id, + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name="conv_dw_%d_bn" % block_id + )(x) + x = layers.ReLU(6.0, name="conv_dw_%d_relu" % block_id)(x) + + x = layers.Conv2D( + pointwise_conv_filters, + (1, 1), + padding="same", + use_bias=False, + strides=(1, 1), + name="conv_pw_%d" % block_id, + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name="conv_pw_%d_bn" % block_id + )(x) + return layers.ReLU(6.0, name="conv_pw_%d_relu" % block_id)(x) + + +@keras_export("keras.applications.mobilenet.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.mobilenet.decode_predictions') +@keras_export("keras.applications.mobilenet.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/mobilenet_v2.py b/keras/applications/mobilenet_v2.py index eeacdb0c2deb..cc09e0e1713b 100644 --- a/keras/applications/mobilenet_v2.py +++ b/keras/applications/mobilenet_v2.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """MobileNet v2 models for Keras. MobileNetV2 is a general architecture and can be used for multiple use cases. @@ -44,8 +44,8 @@ MobileNet on various input sizes: ------------------------------------------------------------------------ MACs stands for Multiply Adds - Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy ---------------------------|------------|---------------|---------|----|--------- +Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy +--------------------------|------------|---------------|---------|------------ | [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | | [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | | [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | @@ -74,456 +74,517 @@ https://arxiv.org/abs/1801.04381) (CVPR 2018) """ +import tensorflow.compat.v2 as tf + from keras import backend from keras.applications import imagenet_utils from keras.engine import training from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/mobilenet_v2/') +BASE_WEIGHT_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/" +) layers = None -@keras_export('keras.applications.mobilenet_v2.MobileNetV2', - 'keras.applications.MobileNetV2') -def MobileNetV2(input_shape=None, - alpha=1.0, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - """Instantiates the MobileNetV2 architecture. - - MobileNetV2 is very similar to the original MobileNet, - except that it uses inverted residual blocks with - bottlenecking features. It has a drastically lower - parameter count than the original MobileNet. - MobileNets support any input size greater - than 32 x 32, with larger image sizes - offering better performance. - - Reference: - - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( - https://arxiv.org/abs/1801.04381) (CVPR 2018) +@keras_export( + "keras.applications.mobilenet_v2.MobileNetV2", + "keras.applications.MobileNetV2", +) +def MobileNetV2( + input_shape=None, + alpha=1.0, + include_top=True, + weights="imagenet", + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + """Instantiates the MobileNetV2 architecture. + + MobileNetV2 is very similar to the original MobileNet, + except that it uses inverted residual blocks with + bottlenecking features. It has a drastically lower + parameter count than the original MobileNet. + MobileNets support any input size greater + than 32 x 32, with larger image sizes + offering better performance. + + Reference: + - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( + https://arxiv.org/abs/1801.04381) (CVPR 2018) + + This function returns a Keras image classification model, + optionally loaded with weights pre-trained on ImageNet. + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + Note: each Keras Application expects a specific kind of input preprocessing. + For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input` + on your inputs before passing them to the model. + `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1. + + Args: + input_shape: Optional shape tuple, to be specified if you would + like to use a model with an input image resolution that is not + (224, 224, 3). + It should have exactly 3 inputs channels (224, 224, 3). + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: Float, larger than zero, controls the width of the network. This is + known as the width multiplier in the MobileNetV2 paper, but the name is + kept for consistency with `applications.MobileNetV1` model in Keras. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1.0, default number of filters from the paper + are used at each layer. + include_top: Boolean, whether to include the fully-connected layer at the + top of the network. Defaults to `True`. + weights: String, one of `None` (random initialization), 'imagenet' + (pre-training on ImageNet), or the path to the weights file to be + loaded. + input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + pooling: String, optional pooling mode for feature extraction when + `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional integer number of classes to classify images into, only + to be specified if `include_top` is True, and if no `weights` argument + is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + **kwargs: For backwards compatibility only. + + Returns: + A `keras.Model` instance. + """ + global layers + if "layers" in kwargs: + layers = kwargs.pop("layers") + else: + layers = VersionAwareLayers() + if kwargs: + raise ValueError(f"Unknown argument(s): {kwargs}") + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded. " + f"Received `weights={weights}`" + ) - This function returns a Keras image classification model, - optionally loaded with weights pre-trained on ImageNet. - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - Note: each Keras Application expects a specific kind of input preprocessing. - For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input` - on your inputs before passing them to the model. - `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1. - - Args: - input_shape: Optional shape tuple, to be specified if you would - like to use a model with an input image resolution that is not - (224, 224, 3). - It should have exactly 3 inputs channels (224, 224, 3). - You can also omit this option if you would like - to infer input_shape from an input_tensor. - If you choose to include both input_tensor and input_shape then - input_shape will be used if they match, if the shapes - do not match then we will throw an error. - E.g. `(160, 160, 3)` would be one valid value. - alpha: Float, larger than zero, controls the width of the network. This is - known as the width multiplier in the MobileNetV2 paper, but the name is - kept for consistency with `applications.MobileNetV1` model in Keras. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1.0, default number of filters from the paper - are used at each layer. - include_top: Boolean, whether to include the fully-connected layer at the - top of the network. Defaults to `True`. - weights: String, one of `None` (random initialization), 'imagenet' - (pre-training on ImageNet), or the path to the weights file to be loaded. - input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - pooling: String, optional pooling mode for feature extraction when - `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: Optional integer number of classes to classify images into, only to - be specified if `include_top` is True, and if no `weights` argument is - specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - **kwargs: For backwards compatibility only. - - Returns: - A `keras.Model` instance. - """ - global layers - if 'layers' in kwargs: - layers = kwargs.pop('layers') - else: - layers = VersionAwareLayers() - if kwargs: - raise ValueError(f'Unknown argument(s): {kwargs}') - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded. ' - f'Received `weights={weights}`') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError( - 'If using `weights` as `"imagenet"` with `include_top` ' - f'as true, `classes` should be 1000. Received `classes={classes}`') - - # Determine proper input shape and default size. - # If both input_shape and input_tensor are used, they should match - if input_shape is not None and input_tensor is not None: - try: - is_input_t_tensor = backend.is_keras_tensor(input_tensor) - except ValueError: - try: - is_input_t_tensor = backend.is_keras_tensor( - layer_utils.get_source_inputs(input_tensor)) - except ValueError: + if weights == "imagenet" and include_top and classes != 1000: raise ValueError( - f'input_tensor: {input_tensor}' - 'is not type input_tensor. ' - f'Received `type(input_tensor)={type(input_tensor)}`' + 'If using `weights` as `"imagenet"` with `include_top` ' + f"as true, `classes` should be 1000. Received `classes={classes}`" ) - if is_input_t_tensor: - if backend.image_data_format() == 'channels_first': - if backend.int_shape(input_tensor)[1] != input_shape[1]: - raise ValueError('input_shape[1] must equal shape(input_tensor)[1] ' - 'when `image_data_format` is `channels_first`; ' - 'Received `input_tensor.shape=' - f'{input_tensor.shape}`' - f', `input_shape={input_shape}`') - else: - if backend.int_shape(input_tensor)[2] != input_shape[1]: - raise ValueError( - 'input_tensor.shape[2] must equal input_shape[1]; ' - 'Received `input_tensor.shape=' - f'{input_tensor.shape}`, ' - f'`input_shape={input_shape}`') - else: - raise ValueError('input_tensor is not a Keras tensor; ' - f'Received `input_tensor={input_tensor}`') - - # If input_shape is None, infer shape from input_tensor. - if input_shape is None and input_tensor is not None: - - try: - backend.is_keras_tensor(input_tensor) - except ValueError: - raise ValueError('input_tensor must be a valid Keras tensor type; ' - f'Received {input_tensor} of type {type(input_tensor)}') - - if input_shape is None and not backend.is_keras_tensor(input_tensor): - default_size = 224 - elif input_shape is None and backend.is_keras_tensor(input_tensor): - if backend.image_data_format() == 'channels_first': - rows = backend.int_shape(input_tensor)[2] - cols = backend.int_shape(input_tensor)[3] - else: - rows = backend.int_shape(input_tensor)[1] - cols = backend.int_shape(input_tensor)[2] - - if rows == cols and rows in [96, 128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - # If input_shape is None and no input_tensor - elif input_shape is None: - default_size = 224 + # Determine proper input shape and default size. + # If both input_shape and input_tensor are used, they should match + if input_shape is not None and input_tensor is not None: + try: + is_input_t_tensor = backend.is_keras_tensor(input_tensor) + except ValueError: + try: + is_input_t_tensor = backend.is_keras_tensor( + layer_utils.get_source_inputs(input_tensor) + ) + except ValueError: + raise ValueError( + f"input_tensor: {input_tensor}" + "is not type input_tensor. " + f"Received `type(input_tensor)={type(input_tensor)}`" + ) + if is_input_t_tensor: + if backend.image_data_format() == "channels_first": + if backend.int_shape(input_tensor)[1] != input_shape[1]: + raise ValueError( + "input_shape[1] must equal shape(input_tensor)[1] " + "when `image_data_format` is `channels_first`; " + "Received `input_tensor.shape=" + f"{input_tensor.shape}`" + f", `input_shape={input_shape}`" + ) + else: + if backend.int_shape(input_tensor)[2] != input_shape[1]: + raise ValueError( + "input_tensor.shape[2] must equal input_shape[1]; " + "Received `input_tensor.shape=" + f"{input_tensor.shape}`, " + f"`input_shape={input_shape}`" + ) + else: + raise ValueError( + "input_tensor is not a Keras tensor; " + f"Received `input_tensor={input_tensor}`" + ) + + # If input_shape is None, infer shape from input_tensor. + if input_shape is None and input_tensor is not None: + + try: + backend.is_keras_tensor(input_tensor) + except ValueError: + raise ValueError( + "input_tensor must be a valid Keras tensor type; " + f"Received {input_tensor} of type {type(input_tensor)}" + ) + + if input_shape is None and not backend.is_keras_tensor(input_tensor): + default_size = 224 + elif input_shape is None and backend.is_keras_tensor(input_tensor): + if backend.image_data_format() == "channels_first": + rows = backend.int_shape(input_tensor)[2] + cols = backend.int_shape(input_tensor)[3] + else: + rows = backend.int_shape(input_tensor)[1] + cols = backend.int_shape(input_tensor)[2] + + if rows == cols and rows in [96, 128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + # If input_shape is None and no input_tensor + elif input_shape is None: + default_size = 224 - # If input_shape is not None, assume default size. - else: - if backend.image_data_format() == 'channels_first': - rows = input_shape[1] - cols = input_shape[2] + # If input_shape is not None, assume default size. else: - rows = input_shape[0] - cols = input_shape[1] - - if rows == cols and rows in [96, 128, 160, 192, 224]: - default_size = rows + if backend.image_data_format() == "channels_first": + rows = input_shape[1] + cols = input_shape[2] + else: + rows = input_shape[0] + cols = input_shape[1] + + if rows == cols and rows in [96, 128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if backend.image_data_format() == "channels_last": + row_axis, col_axis = (0, 1) else: - default_size = 224 - - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha must be one of `0.35`, `0.50`, `0.75`, ' - '`1.0`, `1.3` or `1.4` only;' - f' Received `alpha={alpha}`') - - if rows != cols or rows not in [96, 128, 160, 192, 224]: - rows = 224 - logging.warning('`input_shape` is undefined or non-square, ' - 'or `rows` is not in [96, 128, 160, 192, 224]. ' - 'Weights for input shape (224, 224) will be ' - 'loaded as the default.') - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + + if weights == "imagenet": + if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: + raise ValueError( + "If imagenet weights are being loaded, " + "alpha must be one of `0.35`, `0.50`, `0.75`, " + "`1.0`, `1.3` or `1.4` only;" + f" Received `alpha={alpha}`" + ) + + if rows != cols or rows not in [96, 128, 160, 192, 224]: + rows = 224 + logging.warning( + "`input_shape` is undefined or non-square, " + "or `rows` is not in [96, 128, 160, 192, 224]. " + "Weights for input shape (224, 224) will be " + "loaded as the default." + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - img_input = input_tensor - - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - first_block_filters = _make_divisible(32 * alpha, 8) - x = layers.Conv2D( - first_block_filters, - kernel_size=3, - strides=(2, 2), - padding='same', - use_bias=False, - name='Conv1')(img_input) - x = layers.BatchNormalization( - axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')( - x) - x = layers.ReLU(6., name='Conv1_relu')(x) - - x = _inverted_res_block( - x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) - - x = _inverted_res_block( - x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) - x = _inverted_res_block( - x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) - - x = _inverted_res_block( - x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) - x = _inverted_res_block( - x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) - x = _inverted_res_block( - x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) - - x = _inverted_res_block( - x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) - x = _inverted_res_block( - x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) - x = _inverted_res_block( - x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) - x = _inverted_res_block( - x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) - - x = _inverted_res_block( - x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) - x = _inverted_res_block( - x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) - x = _inverted_res_block( - x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) - - x = _inverted_res_block( - x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) - x = _inverted_res_block( - x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) - x = _inverted_res_block( - x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) - - x = _inverted_res_block( - x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) - - # no alpha applied to last conv as stated in the paper: - # if the width multiplier is greater than 1 we increase the number of output - # channels. - if alpha > 1.0: - last_block_filters = _make_divisible(1280 * alpha, 8) - else: - last_block_filters = 1280 - - x = layers.Conv2D( - last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')( - x) - x = layers.BatchNormalization( - axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')( - x) - x = layers.ReLU(6., name='out_relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D()(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account any potential predecessors of - # `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) - - # Load weights. - if weights == 'imagenet': - if include_top: - model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + - str(float(alpha)) + '_' + str(rows) + '.h5') - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = data_utils.get_file( - model_name, weight_path, cache_subdir='models') + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + + first_block_filters = _make_divisible(32 * alpha, 8) + x = layers.Conv2D( + first_block_filters, + kernel_size=3, + strides=(2, 2), + padding="same", + use_bias=False, + name="Conv1", + )(img_input) + x = layers.BatchNormalization( + axis=channel_axis, epsilon=1e-3, momentum=0.999, name="bn_Conv1" + )(x) + x = layers.ReLU(6.0, name="Conv1_relu")(x) + + x = _inverted_res_block( + x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0 + ) + + x = _inverted_res_block( + x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1 + ) + x = _inverted_res_block( + x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2 + ) + + x = _inverted_res_block( + x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3 + ) + x = _inverted_res_block( + x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4 + ) + x = _inverted_res_block( + x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5 + ) + + x = _inverted_res_block( + x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6 + ) + x = _inverted_res_block( + x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7 + ) + x = _inverted_res_block( + x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8 + ) + x = _inverted_res_block( + x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9 + ) + + x = _inverted_res_block( + x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10 + ) + x = _inverted_res_block( + x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11 + ) + x = _inverted_res_block( + x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12 + ) + + x = _inverted_res_block( + x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13 + ) + x = _inverted_res_block( + x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14 + ) + x = _inverted_res_block( + x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15 + ) + + x = _inverted_res_block( + x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16 + ) + + # no alpha applied to last conv as stated in the paper: + # if the width multiplier is greater than 1 we increase the number of output + # channels. + if alpha > 1.0: + last_block_filters = _make_divisible(1280 * alpha, 8) else: - model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + - str(float(alpha)) + '_' + str(rows) + '_no_top' + '.h5') - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = data_utils.get_file( - model_name, weight_path, cache_subdir='models') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) + last_block_filters = 1280 + + x = layers.Conv2D( + last_block_filters, kernel_size=1, use_bias=False, name="Conv_1" + )(x) + x = layers.BatchNormalization( + axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv_1_bn" + )(x) + x = layers.ReLU(6.0, name="out_relu")(x) - return model + if include_top: + x = layers.GlobalAveragePooling2D()(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account any potential predecessors of + # `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name=f"mobilenetv2_{alpha:0.2f}_{rows}") + + # Load weights. + if weights == "imagenet": + if include_top: + model_name = ( + "mobilenet_v2_weights_tf_dim_ordering_tf_kernels_" + + str(float(alpha)) + + "_" + + str(rows) + + ".h5" + ) + weight_path = BASE_WEIGHT_PATH + model_name + weights_path = data_utils.get_file( + model_name, weight_path, cache_subdir="models" + ) + else: + model_name = ( + "mobilenet_v2_weights_tf_dim_ordering_tf_kernels_" + + str(float(alpha)) + + "_" + + str(rows) + + "_no_top" + + ".h5" + ) + weight_path = BASE_WEIGHT_PATH + model_name + weights_path = data_utils.get_file( + model_name, weight_path, cache_subdir="models" + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): - """Inverted ResNet block.""" - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - in_channels = backend.int_shape(inputs)[channel_axis] - pointwise_conv_filters = int(filters * alpha) - # Ensure the number of filters on the last 1x1 convolution is divisible by 8. - pointwise_filters = _make_divisible(pointwise_conv_filters, 8) - x = inputs - prefix = 'block_{}_'.format(block_id) - - if block_id: - # Expand with a pointwise 1x1 convolution. + """Inverted ResNet block.""" + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + + in_channels = backend.int_shape(inputs)[channel_axis] + pointwise_conv_filters = int(filters * alpha) + # Ensure the number of filters on the last 1x1 convolution is divisible by + # 8. + pointwise_filters = _make_divisible(pointwise_conv_filters, 8) + x = inputs + prefix = f"block_{block_id}_" + + if block_id: + # Expand with a pointwise 1x1 convolution. + x = layers.Conv2D( + expansion * in_channels, + kernel_size=1, + padding="same", + use_bias=False, + activation=None, + name=prefix + "expand", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + "expand_BN", + )(x) + x = layers.ReLU(6.0, name=prefix + "expand_relu")(x) + else: + prefix = "expanded_conv_" + + # Depthwise 3x3 convolution. + if stride == 2: + x = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(x, 3), name=prefix + "pad" + )(x) + x = layers.DepthwiseConv2D( + kernel_size=3, + strides=stride, + activation=None, + use_bias=False, + padding="same" if stride == 1 else "valid", + name=prefix + "depthwise", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + "depthwise_BN", + )(x) + + x = layers.ReLU(6.0, name=prefix + "depthwise_relu")(x) + + # Project with a pointwise 1x1 convolution. x = layers.Conv2D( - expansion * in_channels, + pointwise_filters, kernel_size=1, - padding='same', + padding="same", use_bias=False, activation=None, - name=prefix + 'expand')( - x) + name=prefix + "project", + )(x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, - name=prefix + 'expand_BN')( - x) - x = layers.ReLU(6., name=prefix + 'expand_relu')(x) - else: - prefix = 'expanded_conv_' - - # Depthwise 3x3 convolution. - if stride == 2: - x = layers.ZeroPadding2D( - padding=imagenet_utils.correct_pad(x, 3), - name=prefix + 'pad')(x) - x = layers.DepthwiseConv2D( - kernel_size=3, - strides=stride, - activation=None, - use_bias=False, - padding='same' if stride == 1 else 'valid', - name=prefix + 'depthwise')( - x) - x = layers.BatchNormalization( - axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'depthwise_BN')( - x) - - x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) - - # Project with a pointwise 1x1 convolution. - x = layers.Conv2D( - pointwise_filters, - kernel_size=1, - padding='same', - use_bias=False, - activation=None, - name=prefix + 'project')( - x) - x = layers.BatchNormalization( - axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'project_BN')( - x) - - if in_channels == pointwise_filters and stride == 1: - return layers.Add(name=prefix + 'add')([inputs, x]) - return x + name=prefix + "project_BN", + )(x) + + if in_channels == pointwise_filters and stride == 1: + return layers.Add(name=prefix + "add")([inputs, x]) + return x def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v -@keras_export('keras.applications.mobilenet_v2.preprocess_input') +@keras_export("keras.applications.mobilenet_v2.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.mobilenet_v2.decode_predictions') +@keras_export("keras.applications.mobilenet_v2.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/mobilenet_v3.py b/keras/applications/mobilenet_v3.py index d149797b4ded..b79c4a663678 100644 --- a/keras/applications/mobilenet_v3.py +++ b/keras/applications/mobilenet_v3.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=missing-function-docstring + + """MobileNet v3 models for Keras.""" import tensorflow.compat.v2 as tf @@ -24,26 +24,40 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export - # TODO(scottzhu): Change this to the GCS path. -BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/mobilenet_v3/') +BASE_WEIGHT_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/" +) WEIGHTS_HASHES = { - 'large_224_0.75_float': ('765b44a33ad4005b3ac83185abf1d0eb', - '40af19a13ebea4e2ee0c676887f69a2e'), - 'large_224_1.0_float': ('59e551e166be033d707958cf9e29a6a7', - '07fb09a5933dd0c8eaafa16978110389'), - 'large_minimalistic_224_1.0_float': ('675e7b876c45c57e9e63e6d90a36599c', - 'ec5221f64a2f6d1ef965a614bdae7973'), - 'small_224_0.75_float': ('cb65d4e5be93758266aa0a7f2c6708b7', - 'ebdb5cc8e0b497cd13a7c275d475c819'), - 'small_224_1.0_float': ('8768d4c2e7dee89b9d02b2d03d65d862', - 'd3e8ec802a04aa4fc771ee12a9a9b836'), - 'small_minimalistic_224_1.0_float': ('99cd97fb2fcdad2bf028eb838de69e37', - 'cde8136e733e811080d9fcd8a252f7e4'), + "large_224_0.75_float": ( + "765b44a33ad4005b3ac83185abf1d0eb", + "40af19a13ebea4e2ee0c676887f69a2e", + ), + "large_224_1.0_float": ( + "59e551e166be033d707958cf9e29a6a7", + "07fb09a5933dd0c8eaafa16978110389", + ), + "large_minimalistic_224_1.0_float": ( + "675e7b876c45c57e9e63e6d90a36599c", + "ec5221f64a2f6d1ef965a614bdae7973", + ), + "small_224_0.75_float": ( + "cb65d4e5be93758266aa0a7f2c6708b7", + "ebdb5cc8e0b497cd13a7c275d475c819", + ), + "small_224_1.0_float": ( + "8768d4c2e7dee89b9d02b2d03d65d862", + "d3e8ec802a04aa4fc771ee12a9a9b836", + ), + "small_minimalistic_224_1.0_float": ( + "99cd97fb2fcdad2bf028eb838de69e37", + "cde8136e733e811080d9fcd8a252f7e4", + ), } layers = VersionAwareLayers() @@ -80,8 +94,8 @@ For MobileNetV3, by default input preprocessing is included as a part of the model (as a `Rescaling` layer), and thus `tf.keras.applications.mobilenet_v3.preprocess_input` is actually a - pass-through function. In this use case, MobileNetV3 models expect their inputs - to be float tensors of pixels with values in the [0-255] range. + pass-through function. In this use case, MobileNetV3 models expect their + inputs to be float tensors of pixels with values in the [0-255] range. At the same time, preprocessing as a part of the model (i.e. `Rescaling` layer) can be disabled by setting `include_preprocessing` argument to False. With preprocessing disabled MobileNetV3 models expect their inputs to be float @@ -155,309 +169,397 @@ """ -def MobileNetV3(stack_fn, - last_point_ch, - input_shape=None, - alpha=1.0, - model_type='large', - minimalistic=False, - include_top=True, - weights='imagenet', - input_tensor=None, - classes=1000, - pooling=None, - dropout_rate=0.2, - classifier_activation='softmax', - include_preprocessing=True): - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded. ' - f'Received weights={weights}') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000. ' - f'Received classes={classes}') - - # Determine proper input shape and default size. - # If both input_shape and input_tensor are used, they should match - if input_shape is not None and input_tensor is not None: - try: - is_input_t_tensor = backend.is_keras_tensor(input_tensor) - except ValueError: - try: - is_input_t_tensor = backend.is_keras_tensor( - layer_utils.get_source_inputs(input_tensor)) - except ValueError: - raise ValueError('input_tensor: ', input_tensor, - 'is not type input_tensor. ' - f'Received type(input_tensor)={type(input_tensor)}') - if is_input_t_tensor: - if backend.image_data_format() == 'channels_first': - if backend.int_shape(input_tensor)[1] != input_shape[1]: - raise ValueError('When backend.image_data_format()=channels_first, ' - 'input_shape[1] must equal ' - 'backend.int_shape(input_tensor)[1]. Received ' - f'input_shape={input_shape}, ' - 'backend.int_shape(input_tensor)=' - f'{backend.int_shape(input_tensor)}') - else: - if backend.int_shape(input_tensor)[2] != input_shape[1]: - raise ValueError('input_shape[1] must equal ' - 'backend.int_shape(input_tensor)[2]. Received ' - f'input_shape={input_shape}, ' - 'backend.int_shape(input_tensor)=' - f'{backend.int_shape(input_tensor)}') +def MobileNetV3( + stack_fn, + last_point_ch, + input_shape=None, + alpha=1.0, + model_type="large", + minimalistic=False, + include_top=True, + weights="imagenet", + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation="softmax", + include_preprocessing=True, +): + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded. " + f"Received weights={weights}" + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top` ' + "as true, `classes` should be 1000. " + f"Received classes={classes}" + ) + + # Determine proper input shape and default size. + # If both input_shape and input_tensor are used, they should match + if input_shape is not None and input_tensor is not None: + try: + is_input_t_tensor = backend.is_keras_tensor(input_tensor) + except ValueError: + try: + is_input_t_tensor = backend.is_keras_tensor( + layer_utils.get_source_inputs(input_tensor) + ) + except ValueError: + raise ValueError( + "input_tensor: ", + input_tensor, + "is not type input_tensor. " + f"Received type(input_tensor)={type(input_tensor)}", + ) + if is_input_t_tensor: + if backend.image_data_format() == "channels_first": + if backend.int_shape(input_tensor)[1] != input_shape[1]: + raise ValueError( + "When backend.image_data_format()=channels_first, " + "input_shape[1] must equal " + "backend.int_shape(input_tensor)[1]. Received " + f"input_shape={input_shape}, " + "backend.int_shape(input_tensor)=" + f"{backend.int_shape(input_tensor)}" + ) + else: + if backend.int_shape(input_tensor)[2] != input_shape[1]: + raise ValueError( + "input_shape[1] must equal " + "backend.int_shape(input_tensor)[2]. Received " + f"input_shape={input_shape}, " + "backend.int_shape(input_tensor)=" + f"{backend.int_shape(input_tensor)}" + ) + else: + raise ValueError( + "input_tensor specified: ", + input_tensor, + "is not a keras tensor", + ) + + # If input_shape is None, infer shape from input_tensor + if input_shape is None and input_tensor is not None: + + try: + backend.is_keras_tensor(input_tensor) + except ValueError: + raise ValueError( + "input_tensor: ", + input_tensor, + "is type: ", + type(input_tensor), + "which is not a valid type", + ) + + if backend.is_keras_tensor(input_tensor): + if backend.image_data_format() == "channels_first": + rows = backend.int_shape(input_tensor)[2] + cols = backend.int_shape(input_tensor)[3] + input_shape = (3, cols, rows) + else: + rows = backend.int_shape(input_tensor)[1] + cols = backend.int_shape(input_tensor)[2] + input_shape = (cols, rows, 3) + # If input_shape is None and input_tensor is None using standard shape + if input_shape is None and input_tensor is None: + input_shape = (None, None, 3) + + if backend.image_data_format() == "channels_last": + row_axis, col_axis = (0, 1) else: - raise ValueError('input_tensor specified: ', input_tensor, - 'is not a keras tensor') - - # If input_shape is None, infer shape from input_tensor - if input_shape is None and input_tensor is not None: - - try: - backend.is_keras_tensor(input_tensor) - except ValueError: - raise ValueError('input_tensor: ', input_tensor, 'is type: ', - type(input_tensor), 'which is not a valid type') - - if backend.is_keras_tensor(input_tensor): - if backend.image_data_format() == 'channels_first': - rows = backend.int_shape(input_tensor)[2] - cols = backend.int_shape(input_tensor)[3] - input_shape = (3, cols, rows) - else: - rows = backend.int_shape(input_tensor)[1] - cols = backend.int_shape(input_tensor)[2] - input_shape = (cols, rows, 3) - # If input_shape is None and input_tensor is None using standard shape - if input_shape is None and input_tensor is None: - input_shape = (None, None, 3) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - if rows and cols and (rows < 32 or cols < 32): - raise ValueError('Input size must be at least 32x32; Received `input_shape=' - f'{input_shape}`') - if weights == 'imagenet': - if (not minimalistic and alpha not in [0.75, 1.0] - or minimalistic and alpha != 1.0): - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of `0.75`, `1.0` for non minimalistic ' - 'or `1.0` for minimalistic only.') - - if rows != cols or rows != 224: - logging.warning('`input_shape` is undefined or non-square, ' - 'or `rows` is not 224. ' - 'Weights for input shape (224, 224) will be ' - 'loaded as the default.') - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + if rows and cols and (rows < 32 or cols < 32): + raise ValueError( + "Input size must be at least 32x32; Received `input_shape=" + f"{input_shape}`" + ) + if weights == "imagenet": + if ( + not minimalistic + and alpha not in [0.75, 1.0] + or minimalistic + and alpha != 1.0 + ): + raise ValueError( + "If imagenet weights are being loaded, " + "alpha can be one of `0.75`, `1.0` for non minimalistic " + "or `1.0` for minimalistic only." + ) + + if rows != cols or rows != 224: + logging.warning( + "`input_shape` is undefined or non-square, " + "or `rows` is not 224. " + "Weights for input shape (224, 224) will be " + "loaded as the default." + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - img_input = input_tensor - - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - if minimalistic: - kernel = 3 - activation = relu - se_ratio = None - else: - kernel = 5 - activation = hard_swish - se_ratio = 0.25 - - x = img_input - if include_preprocessing: - x = layers.Rescaling(scale=1. / 127.5, offset=-1.)(x) - x = layers.Conv2D( - 16, - kernel_size=3, - strides=(2, 2), - padding='same', - use_bias=False, - name='Conv')(x) - x = layers.BatchNormalization( - axis=channel_axis, epsilon=1e-3, - momentum=0.999, name='Conv/BatchNorm')(x) - x = activation(x) - - x = stack_fn(x, kernel, activation, se_ratio) - - last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6) - - # if the width multiplier is greater than 1 we - # increase the number of output channels - if alpha > 1.0: - last_point_ch = _depth(last_point_ch * alpha) - x = layers.Conv2D( - last_conv_ch, - kernel_size=1, - padding='same', - use_bias=False, - name='Conv_1')(x) - x = layers.BatchNormalization( - axis=channel_axis, epsilon=1e-3, - momentum=0.999, name='Conv_1/BatchNorm')(x) - x = activation(x) - if include_top: - x = layers.GlobalAveragePooling2D(keepdims=True)(x) - x = layers.Conv2D( - last_point_ch, - kernel_size=1, - padding='same', - use_bias=True, - name='Conv_2')(x) - x = activation(x) + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor - if dropout_rate > 0: - x = layers.Dropout(dropout_rate)(x) - x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) - x = layers.Flatten()(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Activation(activation=classifier_activation, - name='Predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D(name='max_pool')(x) - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, name='MobilenetV3' + model_type) - - # Load weights. - if weights == 'imagenet': - model_name = '{}{}_224_{}_float'.format( - model_type, '_minimalistic' if minimalistic else '', str(alpha)) - if include_top: - file_name = 'weights_mobilenet_v3_' + model_name + '.h5' - file_hash = WEIGHTS_HASHES[model_name][0] + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + + if minimalistic: + kernel = 3 + activation = relu + se_ratio = None else: - file_name = 'weights_mobilenet_v3_' + model_name + '_no_top_v2.h5' - file_hash = WEIGHTS_HASHES[model_name][1] - weights_path = data_utils.get_file( - file_name, - BASE_WEIGHT_PATH + file_name, - cache_subdir='models', - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -@keras_export('keras.applications.MobileNetV3Small') -def MobileNetV3Small(input_shape=None, - alpha=1.0, - minimalistic=False, - include_top=True, - weights='imagenet', - input_tensor=None, - classes=1000, - pooling=None, - dropout_rate=0.2, - classifier_activation='softmax', - include_preprocessing=True): - - def stack_fn(x, kernel, activation, se_ratio): - - def depth(d): - return _depth(d * alpha) - - x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0) - x = _inverted_res_block(x, 72. / 16, depth(24), 3, 2, None, relu, 1) - x = _inverted_res_block(x, 88. / 24, depth(24), 3, 1, None, relu, 2) - x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3) - x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4) - x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5) - x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6) - x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7) - x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8) - x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9) - x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, - 10) - return x + kernel = 5 + activation = hard_swish + se_ratio = 0.25 - return MobileNetV3(stack_fn, 1024, input_shape, alpha, 'small', minimalistic, - include_top, weights, input_tensor, classes, pooling, - dropout_rate, classifier_activation, include_preprocessing) - - -@keras_export('keras.applications.MobileNetV3Large') -def MobileNetV3Large(input_shape=None, - alpha=1.0, - minimalistic=False, - include_top=True, - weights='imagenet', - input_tensor=None, - classes=1000, - pooling=None, - dropout_rate=0.2, - classifier_activation='softmax', - include_preprocessing=True): - - def stack_fn(x, kernel, activation, se_ratio): - - def depth(d): - return _depth(d * alpha) - - x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0) - x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1) - x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2) - x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3) - x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4) - x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5) - x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6) - x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7) - x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8) - x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9) - x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10) - x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11) - x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, activation, - 12) - x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, - 13) - x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, - 14) - return x + x = img_input + if include_preprocessing: + x = layers.Rescaling(scale=1.0 / 127.5, offset=-1.0)(x) + x = layers.Conv2D( + 16, + kernel_size=3, + strides=(2, 2), + padding="same", + use_bias=False, + name="Conv", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv/BatchNorm" + )(x) + x = activation(x) - return MobileNetV3(stack_fn, 1280, input_shape, alpha, 'large', minimalistic, - include_top, weights, input_tensor, classes, pooling, - dropout_rate, classifier_activation, include_preprocessing) + x = stack_fn(x, kernel, activation, se_ratio) + last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6) -MobileNetV3Small.__doc__ = BASE_DOCSTRING.format(name='MobileNetV3Small') -MobileNetV3Large.__doc__ = BASE_DOCSTRING.format(name='MobileNetV3Large') + # if the width multiplier is greater than 1 we + # increase the number of output channels + if alpha > 1.0: + last_point_ch = _depth(last_point_ch * alpha) + x = layers.Conv2D( + last_conv_ch, + kernel_size=1, + padding="same", + use_bias=False, + name="Conv_1", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv_1/BatchNorm" + )(x) + x = activation(x) + if include_top: + x = layers.GlobalAveragePooling2D(keepdims=True)(x) + x = layers.Conv2D( + last_point_ch, + kernel_size=1, + padding="same", + use_bias=True, + name="Conv_2", + )(x) + x = activation(x) + + if dropout_rate > 0: + x = layers.Dropout(dropout_rate)(x) + x = layers.Conv2D( + classes, kernel_size=1, padding="same", name="Logits" + )(x) + x = layers.Flatten()(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Activation( + activation=classifier_activation, name="Predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = models.Model(inputs, x, name="MobilenetV3" + model_type) + + # Load weights. + if weights == "imagenet": + model_name = "{}{}_224_{}_float".format( + model_type, "_minimalistic" if minimalistic else "", str(alpha) + ) + if include_top: + file_name = "weights_mobilenet_v3_" + model_name + ".h5" + file_hash = WEIGHTS_HASHES[model_name][0] + else: + file_name = "weights_mobilenet_v3_" + model_name + "_no_top_v2.h5" + file_hash = WEIGHTS_HASHES[model_name][1] + weights_path = data_utils.get_file( + file_name, + BASE_WEIGHT_PATH + file_name, + cache_subdir="models", + file_hash=file_hash, + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +@keras_export("keras.applications.MobileNetV3Small") +def MobileNetV3Small( + input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights="imagenet", + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation="softmax", + include_preprocessing=True, +): + def stack_fn(x, kernel, activation, se_ratio): + def depth(d): + return _depth(d * alpha) + + x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0) + x = _inverted_res_block(x, 72.0 / 16, depth(24), 3, 2, None, relu, 1) + x = _inverted_res_block(x, 88.0 / 24, depth(24), 3, 1, None, relu, 2) + x = _inverted_res_block( + x, 4, depth(40), kernel, 2, se_ratio, activation, 3 + ) + x = _inverted_res_block( + x, 6, depth(40), kernel, 1, se_ratio, activation, 4 + ) + x = _inverted_res_block( + x, 6, depth(40), kernel, 1, se_ratio, activation, 5 + ) + x = _inverted_res_block( + x, 3, depth(48), kernel, 1, se_ratio, activation, 6 + ) + x = _inverted_res_block( + x, 3, depth(48), kernel, 1, se_ratio, activation, 7 + ) + x = _inverted_res_block( + x, 6, depth(96), kernel, 2, se_ratio, activation, 8 + ) + x = _inverted_res_block( + x, 6, depth(96), kernel, 1, se_ratio, activation, 9 + ) + x = _inverted_res_block( + x, 6, depth(96), kernel, 1, se_ratio, activation, 10 + ) + return x + + return MobileNetV3( + stack_fn, + 1024, + input_shape, + alpha, + "small", + minimalistic, + include_top, + weights, + input_tensor, + classes, + pooling, + dropout_rate, + classifier_activation, + include_preprocessing, + ) + + +@keras_export("keras.applications.MobileNetV3Large") +def MobileNetV3Large( + input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights="imagenet", + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation="softmax", + include_preprocessing=True, +): + def stack_fn(x, kernel, activation, se_ratio): + def depth(d): + return _depth(d * alpha) + + x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0) + x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1) + x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2) + x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3) + x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4) + x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5) + x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6) + x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7) + x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8) + x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9) + x = _inverted_res_block( + x, 6, depth(112), 3, 1, se_ratio, activation, 10 + ) + x = _inverted_res_block( + x, 6, depth(112), 3, 1, se_ratio, activation, 11 + ) + x = _inverted_res_block( + x, 6, depth(160), kernel, 2, se_ratio, activation, 12 + ) + x = _inverted_res_block( + x, 6, depth(160), kernel, 1, se_ratio, activation, 13 + ) + x = _inverted_res_block( + x, 6, depth(160), kernel, 1, se_ratio, activation, 14 + ) + return x + + return MobileNetV3( + stack_fn, + 1280, + input_shape, + alpha, + "large", + minimalistic, + include_top, + weights, + input_tensor, + classes, + pooling, + dropout_rate, + classifier_activation, + include_preprocessing, + ) + + +MobileNetV3Small.__doc__ = BASE_DOCSTRING.format(name="MobileNetV3Small") +MobileNetV3Large.__doc__ = BASE_DOCSTRING.format(name="MobileNetV3Large") def relu(x): - return layers.ReLU()(x) + return layers.ReLU()(x) def hard_sigmoid(x): - return layers.ReLU(6.)(x + 3.) * (1. / 6.) + return layers.ReLU(6.0)(x + 3.0) * (1.0 / 6.0) def hard_swish(x): - return layers.Multiply()([x, hard_sigmoid(x)]) + return layers.Multiply()([x, hard_sigmoid(x)]) # This function is taken from the original tf repo. @@ -468,128 +570,129 @@ def hard_swish(x): def _depth(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v def _se_block(inputs, filters, se_ratio, prefix): - x = layers.GlobalAveragePooling2D( - keepdims=True, name=prefix + 'squeeze_excite/AvgPool')( - inputs) - x = layers.Conv2D( - _depth(filters * se_ratio), - kernel_size=1, - padding='same', - name=prefix + 'squeeze_excite/Conv')( - x) - x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x) - x = layers.Conv2D( - filters, - kernel_size=1, - padding='same', - name=prefix + 'squeeze_excite/Conv_1')( - x) - x = hard_sigmoid(x) - x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) - return x - - -def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, - activation, block_id): - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - shortcut = x - prefix = 'expanded_conv/' - infilters = backend.int_shape(x)[channel_axis] - if block_id: - # Expand - prefix = 'expanded_conv_{}/'.format(block_id) + x = layers.GlobalAveragePooling2D( + keepdims=True, name=prefix + "squeeze_excite/AvgPool" + )(inputs) + x = layers.Conv2D( + _depth(filters * se_ratio), + kernel_size=1, + padding="same", + name=prefix + "squeeze_excite/Conv", + )(x) + x = layers.ReLU(name=prefix + "squeeze_excite/Relu")(x) x = layers.Conv2D( - _depth(infilters * expansion), + filters, kernel_size=1, - padding='same', + padding="same", + name=prefix + "squeeze_excite/Conv_1", + )(x) + x = hard_sigmoid(x) + x = layers.Multiply(name=prefix + "squeeze_excite/Mul")([inputs, x]) + return x + + +def _inverted_res_block( + x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id +): + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + shortcut = x + prefix = "expanded_conv/" + infilters = backend.int_shape(x)[channel_axis] + if block_id: + # Expand + prefix = f"expanded_conv_{block_id}/" + x = layers.Conv2D( + _depth(infilters * expansion), + kernel_size=1, + padding="same", + use_bias=False, + name=prefix + "expand", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + "expand/BatchNorm", + )(x) + x = activation(x) + + if stride == 2: + x = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(x, kernel_size), + name=prefix + "depthwise/pad", + )(x) + x = layers.DepthwiseConv2D( + kernel_size, + strides=stride, + padding="same" if stride == 1 else "valid", use_bias=False, - name=prefix + 'expand')( - x) + name=prefix + "depthwise", + )(x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, - name=prefix + 'expand/BatchNorm')( - x) + name=prefix + "depthwise/BatchNorm", + )(x) x = activation(x) - if stride == 2: - x = layers.ZeroPadding2D( - padding=imagenet_utils.correct_pad(x, kernel_size), - name=prefix + 'depthwise/pad')( - x) - x = layers.DepthwiseConv2D( - kernel_size, - strides=stride, - padding='same' if stride == 1 else 'valid', - use_bias=False, - name=prefix + 'depthwise')( - x) - x = layers.BatchNormalization( - axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'depthwise/BatchNorm')( - x) - x = activation(x) - - if se_ratio: - x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) - - x = layers.Conv2D( - filters, - kernel_size=1, - padding='same', - use_bias=False, - name=prefix + 'project')( - x) - x = layers.BatchNormalization( - axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'project/BatchNorm')( - x) - - if stride == 1 and infilters == filters: - x = layers.Add(name=prefix + 'Add')([shortcut, x]) - return x - - -@keras_export('keras.applications.mobilenet_v3.preprocess_input') -def preprocess_input(x, data_format=None): # pylint: disable=unused-argument - """A placeholder method for backward compatibility. - - The preprocessing logic has been included in the mobilenet_v3 model - implementation. Users are no longer required to call this method to normalize - the input data. This method does nothing and only kept as a placeholder to - align the API surface between old and new version of model. + if se_ratio: + x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) - Args: - x: A floating point `numpy.array` or a `tf.Tensor`. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, - it defaults to "channels_last").{mode} + x = layers.Conv2D( + filters, + kernel_size=1, + padding="same", + use_bias=False, + name=prefix + "project", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + "project/BatchNorm", + )(x) - Returns: - Unchanged `numpy.array` or `tf.Tensor`. - """ - return x + if stride == 1 and infilters == filters: + x = layers.Add(name=prefix + "Add")([shortcut, x]) + return x + + +@keras_export("keras.applications.mobilenet_v3.preprocess_input") +def preprocess_input(x, data_format=None): + """A placeholder method for backward compatibility. + + The preprocessing logic has been included in the mobilenet_v3 model + implementation. Users are no longer required to call this method to + normalize the input data. This method does nothing and only kept as a + placeholder to align the API surface between old and new version of model. + + Args: + x: A floating point `numpy.array` or a `tf.Tensor`. + data_format: Optional data format of the image tensor/array. `None` means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last"). + Defaults to `None`. + + Returns: + Unchanged `numpy.array` or `tf.Tensor`. + """ + return x -@keras_export('keras.applications.mobilenet_v3.decode_predictions') +@keras_export("keras.applications.mobilenet_v3.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/nasnet.py b/keras/applications/nasnet.py index 1635787846c2..7667d14d1b97 100644 --- a/keras/applications/nasnet.py +++ b/keras/applications/nasnet.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """NASNet-A models for Keras. NASNet refers to Neural Architecture Search Network, a family of models @@ -26,12 +26,11 @@ for ImageNet 2012 are provided. The below table describes the performance on ImageNet 2012: --------------------------------------------------------------------------------- - Architecture | Top-1 Acc | Top-5 Acc | Multiply-Adds | Params (M) --------------------------------------------------------------------------------- -| NASNet-A (4 @ 1056) | 74.0 % | 91.6 % | 564 M | 5.3 | -| NASNet-A (6 @ 4032) | 82.7 % | 96.2 % | 23.8 B | 88.9 | --------------------------------------------------------------------------------- +--------------------------------------------------------------------------- +Architecture | Top-1 Acc | Top-5 Acc | Multiply-Adds | Params (M) +---------------------|-----------|-----------|----------------|------------ +NASNet-A (4 @ 1056) | 74.0 % | 91.6 % | 564 M | 5.3 +NASNet-A (6 @ 4032) | 82.7 % | 96.2 % | 23.8 B | 88.9 Reference: - [Learning Transferable Architectures for Scalable Image Recognition]( @@ -46,786 +45,866 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export - -BASE_WEIGHTS_PATH = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/nasnet/') -NASNET_MOBILE_WEIGHT_PATH = BASE_WEIGHTS_PATH + 'NASNet-mobile.h5' -NASNET_MOBILE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + 'NASNet-mobile-no-top.h5' -NASNET_LARGE_WEIGHT_PATH = BASE_WEIGHTS_PATH + 'NASNet-large.h5' -NASNET_LARGE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + 'NASNet-large-no-top.h5' +BASE_WEIGHTS_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/nasnet/" +) +NASNET_MOBILE_WEIGHT_PATH = BASE_WEIGHTS_PATH + "NASNet-mobile.h5" +NASNET_MOBILE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + "NASNet-mobile-no-top.h5" +NASNET_LARGE_WEIGHT_PATH = BASE_WEIGHTS_PATH + "NASNet-large.h5" +NASNET_LARGE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + "NASNet-large-no-top.h5" layers = VersionAwareLayers() -def NASNet(input_shape=None, - penultimate_filters=4032, - num_blocks=6, - stem_block_filters=96, - skip_reduction=True, - filter_multiplier=2, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - default_size=None, - classifier_activation='softmax'): - """Instantiates a NASNet model. - - Reference: - - [Learning Transferable Architectures for Scalable Image Recognition]( - https://arxiv.org/abs/1707.07012) (CVPR 2018) - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - Note: each Keras Application expects a specific kind of input preprocessing. - For NasNet, call `tf.keras.applications.nasnet.preprocess_input` - on your inputs before passing them to the model. - `nasnet.preprocess_input` will scale input pixels between -1 and 1. - - Args: - input_shape: Optional shape tuple, the input shape - is by default `(331, 331, 3)` for NASNetLarge and - `(224, 224, 3)` for NASNetMobile. - It should have exactly 3 input channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - penultimate_filters: Number of filters in the penultimate layer. - NASNet models use the notation `NASNet (N @ P)`, where: - - N is the number of blocks - - P is the number of penultimate filters - num_blocks: Number of repeated blocks of the NASNet model. - NASNet models use the notation `NASNet (N @ P)`, where: - - N is the number of blocks - - P is the number of penultimate filters - stem_block_filters: Number of filters in the initial stem block - skip_reduction: Whether to skip the reduction step at the tail - end of the network. - filter_multiplier: Controls the width of the network. - - If `filter_multiplier` < 1.0, proportionally decreases the number - of filters in each layer. - - If `filter_multiplier` > 1.0, proportionally increases the number - of filters in each layer. - - If `filter_multiplier` = 1, default number of filters from the - paper are used at each layer. - include_top: Whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: Optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: Optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - default_size: Specifies the default image size of the model - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A `keras.Model` instance. - """ - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000') - - if (isinstance(input_shape, tuple) and None in input_shape and - weights == 'imagenet'): - raise ValueError('When specifying the input shape of a NASNet' - ' and loading `ImageNet` weights, ' - 'the input_shape argument must be static ' - '(no None entries). Got: `input_shape=' + - str(input_shape) + '`.') - - if default_size is None: - default_size = 331 - - # Determine proper input shape and default size. - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=True, - weights=weights) - - if backend.image_data_format() != 'channels_last': - logging.warning('The NASNet family of models is only available ' - 'for the input data format "channels_last" ' - '(width, height, channels). ' - 'However your settings specify the default ' - 'data format "channels_first" (channels, width, height).' - ' You should set `image_data_format="channels_last"` ' - 'in your Keras config located at ~/.keras/keras.json. ' - 'The model being returned right now will expect inputs ' - 'to follow the "channels_last" data format.') - backend.set_image_data_format('channels_last') - old_data_format = 'channels_first' - else: - old_data_format = None - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if penultimate_filters % (24 * (filter_multiplier**2)) != 0: - raise ValueError( - 'For NASNet-A models, the `penultimate_filters` must be a multiple ' - 'of 24 * (`filter_multiplier` ** 2). Current value: %d' % - penultimate_filters) - - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - filters = penultimate_filters // 24 - - x = layers.Conv2D( - stem_block_filters, (3, 3), - strides=(2, 2), - padding='valid', - use_bias=False, - name='stem_conv1', - kernel_initializer='he_normal')( - img_input) - - x = layers.BatchNormalization( - axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='stem_bn1')( - x) - - p = None - x, p = _reduction_a_cell( - x, p, filters // (filter_multiplier**2), block_id='stem_1') - x, p = _reduction_a_cell( - x, p, filters // filter_multiplier, block_id='stem_2') - - for i in range(num_blocks): - x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i)) - - x, p0 = _reduction_a_cell( - x, p, filters * filter_multiplier, block_id='reduce_%d' % (num_blocks)) - - p = p0 if not skip_reduction else p - - for i in range(num_blocks): - x, p = _normal_a_cell( - x, p, filters * filter_multiplier, block_id='%d' % (num_blocks + i + 1)) - - x, p0 = _reduction_a_cell( - x, - p, - filters * filter_multiplier**2, - block_id='reduce_%d' % (2 * num_blocks)) - - p = p0 if not skip_reduction else p - - for i in range(num_blocks): - x, p = _normal_a_cell( - x, - p, - filters * filter_multiplier**2, - block_id='%d' % (2 * num_blocks + i + 1)) - - x = layers.Activation('relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D()(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - model = training.Model(inputs, x, name='NASNet') - - # Load weights. - if weights == 'imagenet': - if default_size == 224: # mobile version - if include_top: - weights_path = data_utils.get_file( - 'nasnet_mobile.h5', - NASNET_MOBILE_WEIGHT_PATH, - cache_subdir='models', - file_hash='020fb642bf7360b370c678b08e0adf61') - else: - weights_path = data_utils.get_file( - 'nasnet_mobile_no_top.h5', - NASNET_MOBILE_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='1ed92395b5b598bdda52abe5c0dbfd63') - model.load_weights(weights_path) - elif default_size == 331: # large version - if include_top: - weights_path = data_utils.get_file( - 'nasnet_large.h5', - NASNET_LARGE_WEIGHT_PATH, - cache_subdir='models', - file_hash='11577c9a518f0070763c2b964a382f17') - else: - weights_path = data_utils.get_file( - 'nasnet_large_no_top.h5', - NASNET_LARGE_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='d81d89dc07e6e56530c4e77faddd61b5') - model.load_weights(weights_path) - else: - raise ValueError('ImageNet weights can only be loaded with NASNetLarge' - ' or NASNetMobile') - elif weights is not None: - model.load_weights(weights) - - if old_data_format: - backend.set_image_data_format(old_data_format) - - return model - - -@keras_export('keras.applications.nasnet.NASNetMobile', - 'keras.applications.NASNetMobile') -def NASNetMobile(input_shape=None, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - classifier_activation='softmax'): - """Instantiates a Mobile NASNet model in ImageNet mode. - - Reference: - - [Learning Transferable Architectures for Scalable Image Recognition]( - https://arxiv.org/abs/1707.07012) (CVPR 2018) - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - Note: each Keras Application expects a specific kind of input preprocessing. - For NASNet, call `tf.keras.applications.nasnet.preprocess_input` on your - inputs before passing them to the model. - - Args: - input_shape: Optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` for NASNetMobile - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - include_top: Whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - For loading `imagenet` weights, `input_shape` should be (224, 224, 3) - input_tensor: Optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: Optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A Keras model instance. - - Raises: - ValueError: In case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - return NASNet( - input_shape, - penultimate_filters=1056, - num_blocks=4, - stem_block_filters=32, - skip_reduction=False, - filter_multiplier=2, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=224, - classifier_activation=classifier_activation) - - -@keras_export('keras.applications.nasnet.NASNetLarge', - 'keras.applications.NASNetLarge') -def NASNetLarge(input_shape=None, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - classifier_activation='softmax'): - """Instantiates a NASNet model in ImageNet mode. - - Reference: - - [Learning Transferable Architectures for Scalable Image Recognition]( - https://arxiv.org/abs/1707.07012) (CVPR 2018) - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - Note: each Keras Application expects a specific kind of input preprocessing. - For NASNet, call `tf.keras.applications.nasnet.preprocess_input` on your - inputs before passing them to the model. - - Args: - input_shape: Optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(331, 331, 3)` for NASNetLarge. - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. +def NASNet( + input_shape=None, + penultimate_filters=4032, + num_blocks=6, + stem_block_filters=96, + skip_reduction=True, + filter_multiplier=2, + include_top=True, + weights="imagenet", + input_tensor=None, + pooling=None, + classes=1000, + default_size=None, + classifier_activation="softmax", +): + """Instantiates a NASNet model. + + Reference: + - [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + Note: each Keras Application expects a specific kind of input preprocessing. + For NasNet, call `tf.keras.applications.nasnet.preprocess_input` + on your inputs before passing them to the model. + `nasnet.preprocess_input` will scale input pixels between -1 and 1. + + Args: + input_shape: Optional shape tuple, the input shape + is by default `(331, 331, 3)` for NASNetLarge and + `(224, 224, 3)` for NASNetMobile. + It should have exactly 3 input channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + penultimate_filters: Number of filters in the penultimate layer. + NASNet models use the notation `NASNet (N @ P)`, where: + - N is the number of blocks + - P is the number of penultimate filters + num_blocks: Number of repeated blocks of the NASNet model. + NASNet models use the notation `NASNet (N @ P)`, where: + - N is the number of blocks + - P is the number of penultimate filters + stem_block_filters: Number of filters in the initial stem block + skip_reduction: Whether to skip the reduction step at the tail + end of the network. + filter_multiplier: Controls the width of the network. + - If `filter_multiplier` < 1.0, proportionally decreases the number + of filters in each layer. + - If `filter_multiplier` > 1.0, proportionally increases the number + of filters in each layer. + - If `filter_multiplier` = 1, default number of filters from the + paper are used at each layer. include_top: Whether to include the fully-connected - layer at the top of the network. + layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) - For loading `imagenet` weights, `input_shape` should be (331, 331, 3) input_tensor: Optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. + `layers.Input()`) + to use as image input for the model. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. classes: Optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + default_size: Specifies the default image size of the model classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A Keras model instance. - - Raises: - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - return NASNet( - input_shape, - penultimate_filters=4032, - num_blocks=6, - stem_block_filters=96, - skip_reduction=True, - filter_multiplier=2, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=331, - classifier_activation=classifier_activation) - - -def _separable_conv_block(ip, - filters, - kernel_size=(3, 3), - strides=(1, 1), - block_id=None): - """Adds 2 blocks of [relu-separable conv-batchnorm]. - - Args: - ip: Input tensor - filters: Number of output filters per layer - kernel_size: Kernel size of separable convolutions - strides: Strided convolution for downsampling - block_id: String block_id + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + + Returns: + A `keras.Model` instance. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top` ' + "as true, `classes` should be 1000" + ) + + if ( + isinstance(input_shape, tuple) + and None in input_shape + and weights == "imagenet" + ): + raise ValueError( + "When specifying the input shape of a NASNet" + " and loading `ImageNet` weights, " + "the input_shape argument must be static " + "(no None entries). Got: `input_shape=" + str(input_shape) + "`." + ) + + if default_size is None: + default_size = 331 + + # Determine proper input shape and default size. + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if backend.image_data_format() != "channels_last": + logging.warning( + "The NASNet family of models is only available " + 'for the input data format "channels_last" ' + "(width, height, channels). " + "However your settings specify the default " + 'data format "channels_first" (channels, width, height).' + ' You should set `image_data_format="channels_last"` ' + "in your Keras config located at ~/.keras/keras.json. " + "The model being returned right now will expect inputs " + 'to follow the "channels_last" data format.' + ) + backend.set_image_data_format("channels_last") + old_data_format = "channels_first" + else: + old_data_format = None - Returns: - A Keras tensor - """ - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - - with backend.name_scope('separable_conv_block_%s' % block_id): - x = layers.Activation('relu')(ip) - if strides == (2, 2): - x = layers.ZeroPadding2D( - padding=imagenet_utils.correct_pad(x, kernel_size), - name='separable_conv_1_pad_%s' % block_id)(x) - conv_pad = 'valid' + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - conv_pad = 'same' - x = layers.SeparableConv2D( - filters, - kernel_size, - strides=strides, - name='separable_conv_1_%s' % block_id, - padding=conv_pad, + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + if penultimate_filters % (24 * (filter_multiplier**2)) != 0: + raise ValueError( + "For NASNet-A models, the `penultimate_filters` must be a multiple " + "of 24 * (`filter_multiplier` ** 2). Current value: %d" + % penultimate_filters + ) + + channel_dim = 1 if backend.image_data_format() == "channels_first" else -1 + filters = penultimate_filters // 24 + + x = layers.Conv2D( + stem_block_filters, + (3, 3), + strides=(2, 2), + padding="valid", use_bias=False, - kernel_initializer='he_normal')( - x) - x = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='separable_conv_1_bn_%s' % (block_id))( - x) - x = layers.Activation('relu')(x) - x = layers.SeparableConv2D( - filters, - kernel_size, - name='separable_conv_2_%s' % block_id, - padding='same', - use_bias=False, - kernel_initializer='he_normal')( - x) - x = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='separable_conv_2_bn_%s' % (block_id))( - x) - return x - + name="stem_conv1", + kernel_initializer="he_normal", + )(img_input) -def _adjust_block(p, ip, filters, block_id=None): - """Adjusts the input `previous path` to match the shape of the `input`. - - Used in situations where the output number of filters needs to be changed. - - Args: - p: Input tensor which needs to be modified - ip: Input tensor whose shape needs to be matched - filters: Number of output filters to be matched - block_id: String block_id - - Returns: - Adjusted Keras tensor - """ - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - img_dim = 2 if backend.image_data_format() == 'channels_first' else -2 - - ip_shape = backend.int_shape(ip) - - if p is not None: - p_shape = backend.int_shape(p) - - with backend.name_scope('adjust_block'): - if p is None: - p = ip - - elif p_shape[img_dim] != ip_shape[img_dim]: - with backend.name_scope('adjust_reduction_block_%s' % block_id): - p = layers.Activation('relu', name='adjust_relu_1_%s' % block_id)(p) - p1 = layers.AveragePooling2D((1, 1), - strides=(2, 2), - padding='valid', - name='adjust_avg_pool_1_%s' % block_id)( - p) - p1 = layers.Conv2D( - filters // 2, (1, 1), - padding='same', + x = layers.BatchNormalization( + axis=channel_dim, momentum=0.9997, epsilon=1e-3, name="stem_bn1" + )(x) + + p = None + x, p = _reduction_a_cell( + x, p, filters // (filter_multiplier**2), block_id="stem_1" + ) + x, p = _reduction_a_cell( + x, p, filters // filter_multiplier, block_id="stem_2" + ) + + for i in range(num_blocks): + x, p = _normal_a_cell(x, p, filters, block_id="%d" % (i)) + + x, p0 = _reduction_a_cell( + x, p, filters * filter_multiplier, block_id="reduce_%d" % (num_blocks) + ) + + p = p0 if not skip_reduction else p + + for i in range(num_blocks): + x, p = _normal_a_cell( + x, + p, + filters * filter_multiplier, + block_id="%d" % (num_blocks + i + 1), + ) + + x, p0 = _reduction_a_cell( + x, + p, + filters * filter_multiplier**2, + block_id="reduce_%d" % (2 * num_blocks), + ) + + p = p0 if not skip_reduction else p + + for i in range(num_blocks): + x, p = _normal_a_cell( + x, + p, + filters * filter_multiplier**2, + block_id="%d" % (2 * num_blocks + i + 1), + ) + + x = layers.Activation("relu")(x) + + if include_top: + x = layers.GlobalAveragePooling2D()(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + model = training.Model(inputs, x, name="NASNet") + + # Load weights. + if weights == "imagenet": + if default_size == 224: # mobile version + if include_top: + weights_path = data_utils.get_file( + "nasnet_mobile.h5", + NASNET_MOBILE_WEIGHT_PATH, + cache_subdir="models", + file_hash="020fb642bf7360b370c678b08e0adf61", + ) + else: + weights_path = data_utils.get_file( + "nasnet_mobile_no_top.h5", + NASNET_MOBILE_WEIGHT_PATH_NO_TOP, + cache_subdir="models", + file_hash="1ed92395b5b598bdda52abe5c0dbfd63", + ) + model.load_weights(weights_path) + elif default_size == 331: # large version + if include_top: + weights_path = data_utils.get_file( + "nasnet_large.h5", + NASNET_LARGE_WEIGHT_PATH, + cache_subdir="models", + file_hash="11577c9a518f0070763c2b964a382f17", + ) + else: + weights_path = data_utils.get_file( + "nasnet_large_no_top.h5", + NASNET_LARGE_WEIGHT_PATH_NO_TOP, + cache_subdir="models", + file_hash="d81d89dc07e6e56530c4e77faddd61b5", + ) + model.load_weights(weights_path) + else: + raise ValueError( + "ImageNet weights can only be loaded with NASNetLarge" + " or NASNetMobile" + ) + elif weights is not None: + model.load_weights(weights) + + if old_data_format: + backend.set_image_data_format(old_data_format) + + return model + + +@keras_export( + "keras.applications.nasnet.NASNetMobile", "keras.applications.NASNetMobile" +) +def NASNetMobile( + input_shape=None, + include_top=True, + weights="imagenet", + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates a Mobile NASNet model in ImageNet mode. + + Reference: + - [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + Note: each Keras Application expects a specific kind of input preprocessing. + For NASNet, call `tf.keras.applications.nasnet.preprocess_input` on your + inputs before passing them to the model. + + Args: + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` for NASNetMobile + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights). For loading `imagenet` weights, + `input_shape` should be (224, 224, 3) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` can + only be `None` or `"softmax"`. + + Returns: + A Keras model instance. + + Raises: + ValueError: In case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + """ + return NASNet( + input_shape, + penultimate_filters=1056, + num_blocks=4, + stem_block_filters=32, + skip_reduction=False, + filter_multiplier=2, + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + pooling=pooling, + classes=classes, + default_size=224, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.nasnet.NASNetLarge", "keras.applications.NASNetLarge" +) +def NASNetLarge( + input_shape=None, + include_top=True, + weights="imagenet", + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates a NASNet model in ImageNet mode. + + Reference: + - [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + Note: each Keras Application expects a specific kind of input preprocessing. + For NASNet, call `tf.keras.applications.nasnet.preprocess_input` on your + inputs before passing them to the model. + + Args: + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(331, 331, 3)` for NASNetLarge. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights). For loading `imagenet` weights, + `input_shape` should be (331, 331, 3) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` can + only be `None` or `"softmax"`. + + Returns: + A Keras model instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + """ + return NASNet( + input_shape, + penultimate_filters=4032, + num_blocks=6, + stem_block_filters=96, + skip_reduction=True, + filter_multiplier=2, + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + pooling=pooling, + classes=classes, + default_size=331, + classifier_activation=classifier_activation, + ) + + +def _separable_conv_block( + ip, filters, kernel_size=(3, 3), strides=(1, 1), block_id=None +): + """Adds 2 blocks of [relu-separable conv-batchnorm]. + + Args: + ip: Input tensor + filters: Number of output filters per layer + kernel_size: Kernel size of separable convolutions + strides: Strided convolution for downsampling + block_id: String block_id + + Returns: + A Keras tensor + """ + channel_dim = 1 if backend.image_data_format() == "channels_first" else -1 + + with backend.name_scope(f"separable_conv_block_{block_id}"): + x = layers.Activation("relu")(ip) + if strides == (2, 2): + x = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(x, kernel_size), + name=f"separable_conv_1_pad_{block_id}", + )(x) + conv_pad = "valid" + else: + conv_pad = "same" + x = layers.SeparableConv2D( + filters, + kernel_size, + strides=strides, + name=f"separable_conv_1_{block_id}", + padding=conv_pad, use_bias=False, - name='adjust_conv_1_%s' % block_id, - kernel_initializer='he_normal')( - p1) - - p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) - p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) - p2 = layers.AveragePooling2D((1, 1), - strides=(2, 2), - padding='valid', - name='adjust_avg_pool_2_%s' % block_id)( - p2) - p2 = layers.Conv2D( - filters // 2, (1, 1), - padding='same', + kernel_initializer="he_normal", + )(x) + x = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name=f"separable_conv_1_bn_{block_id}", + )(x) + x = layers.Activation("relu")(x) + x = layers.SeparableConv2D( + filters, + kernel_size, + name=f"separable_conv_2_{block_id}", + padding="same", use_bias=False, - name='adjust_conv_2_%s' % block_id, - kernel_initializer='he_normal')( - p2) - - p = layers.concatenate([p1, p2], axis=channel_dim) - p = layers.BatchNormalization( + kernel_initializer="he_normal", + )(x) + x = layers.BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, - name='adjust_bn_%s' % block_id)( - p) - - elif p_shape[channel_dim] != filters: - with backend.name_scope('adjust_projection_block_%s' % block_id): - p = layers.Activation('relu')(p) - p = layers.Conv2D( - filters, (1, 1), + name=f"separable_conv_2_bn_{block_id}", + )(x) + return x + + +def _adjust_block(p, ip, filters, block_id=None): + """Adjusts the input `previous path` to match the shape of the `input`. + + Used in situations where the output number of filters needs to be changed. + + Args: + p: Input tensor which needs to be modified + ip: Input tensor whose shape needs to be matched + filters: Number of output filters to be matched + block_id: String block_id + + Returns: + Adjusted Keras tensor + """ + channel_dim = 1 if backend.image_data_format() == "channels_first" else -1 + img_dim = 2 if backend.image_data_format() == "channels_first" else -2 + + ip_shape = backend.int_shape(ip) + + if p is not None: + p_shape = backend.int_shape(p) + + with backend.name_scope("adjust_block"): + if p is None: + p = ip + + elif p_shape[img_dim] != ip_shape[img_dim]: + with backend.name_scope(f"adjust_reduction_block_{block_id}"): + p = layers.Activation("relu", name=f"adjust_relu_1_{block_id}")( + p + ) + p1 = layers.AveragePooling2D( + (1, 1), + strides=(2, 2), + padding="valid", + name=f"adjust_avg_pool_1_{block_id}", + )(p) + p1 = layers.Conv2D( + filters // 2, + (1, 1), + padding="same", + use_bias=False, + name=f"adjust_conv_1_{block_id}", + kernel_initializer="he_normal", + )(p1) + + p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) + p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) + p2 = layers.AveragePooling2D( + (1, 1), + strides=(2, 2), + padding="valid", + name=f"adjust_avg_pool_2_{block_id}", + )(p2) + p2 = layers.Conv2D( + filters // 2, + (1, 1), + padding="same", + use_bias=False, + name=f"adjust_conv_2_{block_id}", + kernel_initializer="he_normal", + )(p2) + + p = layers.concatenate([p1, p2], axis=channel_dim) + p = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name=f"adjust_bn_{block_id}", + )(p) + + elif p_shape[channel_dim] != filters: + with backend.name_scope(f"adjust_projection_block_{block_id}"): + p = layers.Activation("relu")(p) + p = layers.Conv2D( + filters, + (1, 1), + strides=(1, 1), + padding="same", + name=f"adjust_conv_projection_{block_id}", + use_bias=False, + kernel_initializer="he_normal", + )(p) + p = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name=f"adjust_bn_{block_id}", + )(p) + return p + + +def _normal_a_cell(ip, p, filters, block_id=None): + """Adds a Normal cell for NASNet-A (Fig. 4 in the paper). + + Args: + ip: Input tensor `x` + p: Input tensor `p` + filters: Number of output filters + block_id: String block_id + + Returns: + A Keras tensor + """ + channel_dim = 1 if backend.image_data_format() == "channels_first" else -1 + + with backend.name_scope(f"normal_A_block_{block_id}"): + p = _adjust_block(p, ip, filters, block_id) + + h = layers.Activation("relu")(ip) + h = layers.Conv2D( + filters, + (1, 1), strides=(1, 1), - padding='same', - name='adjust_conv_projection_%s' % block_id, + padding="same", + name=f"normal_conv_1_{block_id}", use_bias=False, - kernel_initializer='he_normal')( - p) - p = layers.BatchNormalization( + kernel_initializer="he_normal", + )(h) + h = layers.BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, - name='adjust_bn_%s' % block_id)( - p) - return p + name=f"normal_bn_1_{block_id}", + )(h) + + with backend.name_scope("block_1"): + x1_1 = _separable_conv_block( + h, + filters, + kernel_size=(5, 5), + block_id=f"normal_left1_{block_id}", + ) + x1_2 = _separable_conv_block( + p, filters, block_id=f"normal_right1_{block_id}" + ) + x1 = layers.add([x1_1, x1_2], name=f"normal_add_1_{block_id}") + + with backend.name_scope("block_2"): + x2_1 = _separable_conv_block( + p, filters, (5, 5), block_id=f"normal_left2_{block_id}" + ) + x2_2 = _separable_conv_block( + p, filters, (3, 3), block_id=f"normal_right2_{block_id}" + ) + x2 = layers.add([x2_1, x2_2], name=f"normal_add_2_{block_id}") + + with backend.name_scope("block_3"): + x3 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding="same", + name=f"normal_left3_{block_id}", + )(h) + x3 = layers.add([x3, p], name=f"normal_add_3_{block_id}") + + with backend.name_scope("block_4"): + x4_1 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding="same", + name=f"normal_left4_{block_id}", + )(p) + x4_2 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding="same", + name=f"normal_right4_{block_id}", + )(p) + x4 = layers.add([x4_1, x4_2], name=f"normal_add_4_{block_id}") + + with backend.name_scope("block_5"): + x5 = _separable_conv_block( + h, filters, block_id=f"normal_left5_{block_id}" + ) + x5 = layers.add([x5, h], name=f"normal_add_5_{block_id}") + + x = layers.concatenate( + [p, x1, x2, x3, x4, x5], + axis=channel_dim, + name=f"normal_concat_{block_id}", + ) + return x, ip -def _normal_a_cell(ip, p, filters, block_id=None): - """Adds a Normal cell for NASNet-A (Fig. 4 in the paper). +def _reduction_a_cell(ip, p, filters, block_id=None): + """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). - Args: + Args: ip: Input tensor `x` p: Input tensor `p` filters: Number of output filters block_id: String block_id - Returns: + Returns: A Keras tensor - """ - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - - with backend.name_scope('normal_A_block_%s' % block_id): - p = _adjust_block(p, ip, filters, block_id) - - h = layers.Activation('relu')(ip) - h = layers.Conv2D( - filters, (1, 1), - strides=(1, 1), - padding='same', - name='normal_conv_1_%s' % block_id, - use_bias=False, - kernel_initializer='he_normal')( - h) - h = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='normal_bn_1_%s' % block_id)( - h) - - with backend.name_scope('block_1'): - x1_1 = _separable_conv_block( - h, filters, kernel_size=(5, 5), block_id='normal_left1_%s' % block_id) - x1_2 = _separable_conv_block( - p, filters, block_id='normal_right1_%s' % block_id) - x1 = layers.add([x1_1, x1_2], name='normal_add_1_%s' % block_id) - - with backend.name_scope('block_2'): - x2_1 = _separable_conv_block( - p, filters, (5, 5), block_id='normal_left2_%s' % block_id) - x2_2 = _separable_conv_block( - p, filters, (3, 3), block_id='normal_right2_%s' % block_id) - x2 = layers.add([x2_1, x2_2], name='normal_add_2_%s' % block_id) - - with backend.name_scope('block_3'): - x3 = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same', - name='normal_left3_%s' % (block_id))( - h) - x3 = layers.add([x3, p], name='normal_add_3_%s' % block_id) - - with backend.name_scope('block_4'): - x4_1 = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same', - name='normal_left4_%s' % (block_id))( - p) - x4_2 = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same', - name='normal_right4_%s' % (block_id))( - p) - x4 = layers.add([x4_1, x4_2], name='normal_add_4_%s' % block_id) - - with backend.name_scope('block_5'): - x5 = _separable_conv_block( - h, filters, block_id='normal_left5_%s' % block_id) - x5 = layers.add([x5, h], name='normal_add_5_%s' % block_id) - - x = layers.concatenate([p, x1, x2, x3, x4, x5], - axis=channel_dim, - name='normal_concat_%s' % block_id) - return x, ip + """ + channel_dim = 1 if backend.image_data_format() == "channels_first" else -1 + with backend.name_scope(f"reduction_A_block_{block_id}"): + p = _adjust_block(p, ip, filters, block_id) -def _reduction_a_cell(ip, p, filters, block_id=None): - """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). - - Args: - ip: Input tensor `x` - p: Input tensor `p` - filters: Number of output filters - block_id: String block_id - - Returns: - A Keras tensor - """ - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - - with backend.name_scope('reduction_A_block_%s' % block_id): - p = _adjust_block(p, ip, filters, block_id) - - h = layers.Activation('relu')(ip) - h = layers.Conv2D( - filters, (1, 1), - strides=(1, 1), - padding='same', - name='reduction_conv_1_%s' % block_id, - use_bias=False, - kernel_initializer='he_normal')( - h) - h = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='reduction_bn_1_%s' % block_id)( - h) - h3 = layers.ZeroPadding2D( - padding=imagenet_utils.correct_pad(h, 3), - name='reduction_pad_1_%s' % block_id)( - h) - - with backend.name_scope('block_1'): - x1_1 = _separable_conv_block( - h, - filters, (5, 5), - strides=(2, 2), - block_id='reduction_left1_%s' % block_id) - x1_2 = _separable_conv_block( - p, - filters, (7, 7), - strides=(2, 2), - block_id='reduction_right1_%s' % block_id) - x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) - - with backend.name_scope('block_2'): - x2_1 = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='valid', - name='reduction_left2_%s' % block_id)( - h3) - x2_2 = _separable_conv_block( - p, - filters, (7, 7), - strides=(2, 2), - block_id='reduction_right2_%s' % block_id) - x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) - - with backend.name_scope('block_3'): - x3_1 = layers.AveragePooling2D((3, 3), - strides=(2, 2), - padding='valid', - name='reduction_left3_%s' % block_id)( - h3) - x3_2 = _separable_conv_block( - p, - filters, (5, 5), - strides=(2, 2), - block_id='reduction_right3_%s' % block_id) - x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) - - with backend.name_scope('block_4'): - x4 = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same', - name='reduction_left4_%s' % block_id)( - x1) - x4 = layers.add([x2, x4]) - - with backend.name_scope('block_5'): - x5_1 = _separable_conv_block( - x1, filters, (3, 3), block_id='reduction_left4_%s' % block_id) - x5_2 = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='valid', - name='reduction_right5_%s' % block_id)( - h3) - x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) - - x = layers.concatenate([x2, x3, x4, x5], - axis=channel_dim, - name='reduction_concat_%s' % block_id) - return x, ip + h = layers.Activation("relu")(ip) + h = layers.Conv2D( + filters, + (1, 1), + strides=(1, 1), + padding="same", + name=f"reduction_conv_1_{block_id}", + use_bias=False, + kernel_initializer="he_normal", + )(h) + h = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name=f"reduction_bn_1_{block_id}", + )(h) + h3 = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(h, 3), + name=f"reduction_pad_1_{block_id}", + )(h) + + with backend.name_scope("block_1"): + x1_1 = _separable_conv_block( + h, + filters, + (5, 5), + strides=(2, 2), + block_id=f"reduction_left1_{block_id}", + ) + x1_2 = _separable_conv_block( + p, + filters, + (7, 7), + strides=(2, 2), + block_id=f"reduction_right1_{block_id}", + ) + x1 = layers.add([x1_1, x1_2], name=f"reduction_add_1_{block_id}") + + with backend.name_scope("block_2"): + x2_1 = layers.MaxPooling2D( + (3, 3), + strides=(2, 2), + padding="valid", + name=f"reduction_left2_{block_id}", + )(h3) + x2_2 = _separable_conv_block( + p, + filters, + (7, 7), + strides=(2, 2), + block_id=f"reduction_right2_{block_id}", + ) + x2 = layers.add([x2_1, x2_2], name=f"reduction_add_2_{block_id}") + + with backend.name_scope("block_3"): + x3_1 = layers.AveragePooling2D( + (3, 3), + strides=(2, 2), + padding="valid", + name=f"reduction_left3_{block_id}", + )(h3) + x3_2 = _separable_conv_block( + p, + filters, + (5, 5), + strides=(2, 2), + block_id=f"reduction_right3_{block_id}", + ) + x3 = layers.add([x3_1, x3_2], name=f"reduction_add3_{block_id}") + + with backend.name_scope("block_4"): + x4 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding="same", + name=f"reduction_left4_{block_id}", + )(x1) + x4 = layers.add([x2, x4]) + + with backend.name_scope("block_5"): + x5_1 = _separable_conv_block( + x1, filters, (3, 3), block_id=f"reduction_left4_{block_id}" + ) + x5_2 = layers.MaxPooling2D( + (3, 3), + strides=(2, 2), + padding="valid", + name=f"reduction_right5_{block_id}", + )(h3) + x5 = layers.add([x5_1, x5_2], name=f"reduction_add4_{block_id}") + + x = layers.concatenate( + [x2, x3, x4, x5], + axis=channel_dim, + name=f"reduction_concat_{block_id}", + ) + return x, ip -@keras_export('keras.applications.nasnet.preprocess_input') +@keras_export("keras.applications.nasnet.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.nasnet.decode_predictions') +@keras_export("keras.applications.nasnet.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/regnet.py b/keras/applications/regnet.py index de035d8b9279..0c8ee7de0670 100644 --- a/keras/applications/regnet.py +++ b/keras/applications/regnet.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=missing-docstring -# pylint: disable=g-classes-have-attributes + """RegNet models for Keras. @@ -26,89 +24,119 @@ (CVPR 2021) """ +import tensorflow.compat.v2 as tf + from keras import backend from keras import layers from keras.applications import imagenet_utils from keras.engine import training from keras.utils import data_utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/regnet/" +BASE_WEIGHTS_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/regnet/" +) WEIGHTS_HASHES = { - "x002": - ("49fb46e56cde07fdaf57bffd851461a86548f6a3a4baef234dd37290b826c0b8", - "5445b66cd50445eb7ecab094c1e78d4d3d29375439d1a7798861c4af15ffff21"), - "x004": - ("3523c7f5ac0dbbcc2fd6d83b3570e7540f7449d3301cc22c29547302114e4088", - "de139bf07a66c9256f2277bf5c1b6dd2d5a3a891a5f8a925a10c8a0a113fd6f3"), - "x006": - ("340216ef334a7bae30daac9f414e693c136fac9ab868704bbfcc9ce6a5ec74bb", - "a43ec97ad62f86b2a96a783bfdc63a5a54de02eef54f26379ea05e1bf90a9505"), - "x008": - ("8f145d6a5fae6da62677bb8d26eb92d0b9dfe143ec1ebf68b24a57ae50a2763d", - "3c7e4b0917359304dc18e644475c5c1f5e88d795542b676439c4a3acd63b7207"), - "x016": - ("31c386f4c7bfef4c021a583099aa79c1b3928057ba1b7d182f174674c5ef3510", - "1b8e3d545d190271204a7b2165936a227d26b79bb7922bac5ee4d303091bf17a"), - "x032": - ("6c025df1409e5ea846375bc9dfa240956cca87ef57384d93fef7d6fa90ca8c7f", - "9cd4522806c0fcca01b37874188b2bd394d7c419956d77472a4e072b01d99041"), - "x040": - ("ba128046c588a26dbd3b3a011b26cb7fa3cf8f269c184c132372cb20b6eb54c1", - "b4ed0ca0b9a98e789e05000e830403a7ade4d8afa01c73491c44610195198afe"), - "x064": - ("0f4489c3cd3ad979bd6b0324213998bcb36dc861d178f977997ebfe53c3ba564", - "3e706fa416a18dfda14c713423eba8041ae2509db3e0a611d5f599b5268a46c4"), - "x080": - ("76320e43272719df648db37271a247c22eb6e810fe469c37a5db7e2cb696d162", - "7b1ce8e29ceefec10a6569640ee329dba7fbc98b5d0f6346aabade058b66cf29"), - "x120": - ("5cafc461b78897d5e4f24e68cb406d18e75f31105ef620e7682b611bb355eb3a", - "36174ddd0299db04a42631d028abcb1cc7afec2b705e42bd28fcd325e5d596bf"), - "x160": - ("8093f57a5824b181fb734ea21ae34b1f7ee42c5298e63cf6d587c290973195d2", - "9d1485050bdf19531ffa1ed7827c75850e0f2972118a996b91aa9264b088fd43"), - "x320": - ("91fb3e6f4e9e44b3687e80977f7f4412ee9937c0c704232664fc83e4322ea01e", - "9db7eacc37b85c98184070e1a172e6104c00846f44bcd4e727da9e50d9692398"), - "y002": - ("1e8091c674532b1a61c04f6393a9c570113e0197f22bd1b98cc4c4fe800c6465", - "f63221f63d625b8e201221499682587bfe29d33f50a4c4f4d53be00f66c0f12c"), - "y004": - ("752fdbad21c78911bf1dcb8c513e5a0e14697b068e5d9e73525dbaa416d18d8e", - "45e6ba8309a17a77e67afc05228454b2e0ee6be0dae65edc0f31f1da10cc066b"), - "y006": - ("98942e07b273da500ff9699a1f88aca78dfad4375faabb0bab784bb0dace80a9", - "b70261cba4e60013c99d130cc098d2fce629ff978a445663b6fa4f8fc099a2be"), - "y008": - ("1b099377cc9a4fb183159a6f9b24bc998e5659d25a449f40c90cbffcbcfdcae4", - "b11f5432a216ee640fe9be6e32939defa8d08b8d136349bf3690715a98752ca1"), - "y016": - ("b7ce1f5e223f0941c960602de922bcf846288ce7a4c33b2a4f2e4ac4b480045b", - "d7404f50205e82d793e219afb9eb2bfeb781b6b2d316a6128c6d7d7dacab7f57"), - "y032": - ("6a6a545cf3549973554c9b94f0cd40e25f229fffb1e7f7ac779a59dcbee612bd", - "eb3ac1c45ec60f4f031c3f5180573422b1cf7bebc26c004637517372f68f8937"), - "y040": - ("98d00118b335162bbffe8f1329e54e5c8e75ee09b2a5414f97b0ddfc56e796f6", - "b5be2a5e5f072ecdd9c0b8a437cd896df0efa1f6a1f77e41caa8719b7dfcb05d"), - "y064": - ("65c948c7a18aaecaad2d1bd4fd978987425604ba6669ef55a1faa0069a2804b7", - "885c4b7ed7ea339daca7dafa1a62cb7d41b1068897ef90a5a3d71b4a2e2db31a"), - "y080": - ("7a2c62da2982e369a4984d3c7c3b32d6f8d3748a71cb37a31156c436c37f3e95", - "3d119577e1e3bf8d153b895e8ea9e4ec150ff2d92abdca711b6e949c3fd7115d"), - "y120": - ("a96ab0d27d3ae35a422ee7df0d789069b3e3217a99334e0ce861a96595bc5986", - "4a6fa387108380b730b71feea2ad80b5224b5ea9dc21dc156c93fe3c6186485c"), - "y160": - ("45067240ffbc7ca2591313fee2f80dbdda6d66ec1a7451446f9a6d00d8f7ac6e", - "ead1e6b568be8f34447ec8941299a9df4368736ba9a8205de5427fa20a1fb316"), - "y320": ("b05e173e4ae635cfa22d06392ee3741284d17dadfee68f2aa6fd8cb2b7561112", - "cad78f74a586e24c61d38be17f3ae53bb9674380174d2585da1a526b8c20e1fd") + "x002": ( + "49fb46e56cde07fdaf57bffd851461a86548f6a3a4baef234dd37290b826c0b8", + "5445b66cd50445eb7ecab094c1e78d4d3d29375439d1a7798861c4af15ffff21", + ), + "x004": ( + "3523c7f5ac0dbbcc2fd6d83b3570e7540f7449d3301cc22c29547302114e4088", + "de139bf07a66c9256f2277bf5c1b6dd2d5a3a891a5f8a925a10c8a0a113fd6f3", + ), + "x006": ( + "340216ef334a7bae30daac9f414e693c136fac9ab868704bbfcc9ce6a5ec74bb", + "a43ec97ad62f86b2a96a783bfdc63a5a54de02eef54f26379ea05e1bf90a9505", + ), + "x008": ( + "8f145d6a5fae6da62677bb8d26eb92d0b9dfe143ec1ebf68b24a57ae50a2763d", + "3c7e4b0917359304dc18e644475c5c1f5e88d795542b676439c4a3acd63b7207", + ), + "x016": ( + "31c386f4c7bfef4c021a583099aa79c1b3928057ba1b7d182f174674c5ef3510", + "1b8e3d545d190271204a7b2165936a227d26b79bb7922bac5ee4d303091bf17a", + ), + "x032": ( + "6c025df1409e5ea846375bc9dfa240956cca87ef57384d93fef7d6fa90ca8c7f", + "9cd4522806c0fcca01b37874188b2bd394d7c419956d77472a4e072b01d99041", + ), + "x040": ( + "ba128046c588a26dbd3b3a011b26cb7fa3cf8f269c184c132372cb20b6eb54c1", + "b4ed0ca0b9a98e789e05000e830403a7ade4d8afa01c73491c44610195198afe", + ), + "x064": ( + "0f4489c3cd3ad979bd6b0324213998bcb36dc861d178f977997ebfe53c3ba564", + "3e706fa416a18dfda14c713423eba8041ae2509db3e0a611d5f599b5268a46c4", + ), + "x080": ( + "76320e43272719df648db37271a247c22eb6e810fe469c37a5db7e2cb696d162", + "7b1ce8e29ceefec10a6569640ee329dba7fbc98b5d0f6346aabade058b66cf29", + ), + "x120": ( + "5cafc461b78897d5e4f24e68cb406d18e75f31105ef620e7682b611bb355eb3a", + "36174ddd0299db04a42631d028abcb1cc7afec2b705e42bd28fcd325e5d596bf", + ), + "x160": ( + "8093f57a5824b181fb734ea21ae34b1f7ee42c5298e63cf6d587c290973195d2", + "9d1485050bdf19531ffa1ed7827c75850e0f2972118a996b91aa9264b088fd43", + ), + "x320": ( + "91fb3e6f4e9e44b3687e80977f7f4412ee9937c0c704232664fc83e4322ea01e", + "9db7eacc37b85c98184070e1a172e6104c00846f44bcd4e727da9e50d9692398", + ), + "y002": ( + "1e8091c674532b1a61c04f6393a9c570113e0197f22bd1b98cc4c4fe800c6465", + "f63221f63d625b8e201221499682587bfe29d33f50a4c4f4d53be00f66c0f12c", + ), + "y004": ( + "752fdbad21c78911bf1dcb8c513e5a0e14697b068e5d9e73525dbaa416d18d8e", + "45e6ba8309a17a77e67afc05228454b2e0ee6be0dae65edc0f31f1da10cc066b", + ), + "y006": ( + "98942e07b273da500ff9699a1f88aca78dfad4375faabb0bab784bb0dace80a9", + "b70261cba4e60013c99d130cc098d2fce629ff978a445663b6fa4f8fc099a2be", + ), + "y008": ( + "1b099377cc9a4fb183159a6f9b24bc998e5659d25a449f40c90cbffcbcfdcae4", + "b11f5432a216ee640fe9be6e32939defa8d08b8d136349bf3690715a98752ca1", + ), + "y016": ( + "b7ce1f5e223f0941c960602de922bcf846288ce7a4c33b2a4f2e4ac4b480045b", + "d7404f50205e82d793e219afb9eb2bfeb781b6b2d316a6128c6d7d7dacab7f57", + ), + "y032": ( + "6a6a545cf3549973554c9b94f0cd40e25f229fffb1e7f7ac779a59dcbee612bd", + "eb3ac1c45ec60f4f031c3f5180573422b1cf7bebc26c004637517372f68f8937", + ), + "y040": ( + "98d00118b335162bbffe8f1329e54e5c8e75ee09b2a5414f97b0ddfc56e796f6", + "b5be2a5e5f072ecdd9c0b8a437cd896df0efa1f6a1f77e41caa8719b7dfcb05d", + ), + "y064": ( + "65c948c7a18aaecaad2d1bd4fd978987425604ba6669ef55a1faa0069a2804b7", + "885c4b7ed7ea339daca7dafa1a62cb7d41b1068897ef90a5a3d71b4a2e2db31a", + ), + "y080": ( + "7a2c62da2982e369a4984d3c7c3b32d6f8d3748a71cb37a31156c436c37f3e95", + "3d119577e1e3bf8d153b895e8ea9e4ec150ff2d92abdca711b6e949c3fd7115d", + ), + "y120": ( + "a96ab0d27d3ae35a422ee7df0d789069b3e3217a99334e0ce861a96595bc5986", + "4a6fa387108380b730b71feea2ad80b5224b5ea9dc21dc156c93fe3c6186485c", + ), + "y160": ( + "45067240ffbc7ca2591313fee2f80dbdda6d66ec1a7451446f9a6d00d8f7ac6e", + "ead1e6b568be8f34447ec8941299a9df4368736ba9a8205de5427fa20a1fb316", + ), + "y320": ( + "b05e173e4ae635cfa22d06392ee3741284d17dadfee68f2aa6fd8cb2b7561112", + "cad78f74a586e24c61d38be17f3ae53bb9674380174d2585da1a526b8c20e1fd", + ), } # The widths and depths are deduced from a quantized linear function. For @@ -123,168 +151,168 @@ "widths": [24, 56, 152, 368], "group_width": 8, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x004": { "depths": [1, 2, 7, 12], "widths": [32, 64, 160, 384], "group_width": 16, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x006": { "depths": [1, 3, 5, 7], "widths": [48, 96, 240, 528], "group_width": 24, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x008": { "depths": [1, 3, 7, 5], "widths": [64, 128, 288, 672], "group_width": 16, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x016": { "depths": [2, 4, 10, 2], "widths": [72, 168, 408, 912], "group_width": 24, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x032": { "depths": [2, 6, 15, 2], "widths": [96, 192, 432, 1008], "group_width": 48, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x040": { "depths": [2, 5, 14, 2], "widths": [80, 240, 560, 1360], "group_width": 40, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x064": { "depths": [2, 4, 10, 1], "widths": [168, 392, 784, 1624], "group_width": 56, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x080": { "depths": [2, 5, 15, 1], "widths": [80, 240, 720, 1920], "group_width": 120, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x120": { "depths": [2, 5, 11, 1], "widths": [224, 448, 896, 2240], "group_width": 112, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x160": { "depths": [2, 6, 13, 1], "widths": [256, 512, 896, 2048], "group_width": 128, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "x320": { "depths": [2, 7, 13, 1], "widths": [336, 672, 1344, 2520], "group_width": 168, "default_size": 224, - "block_type": "X" + "block_type": "X", }, "y002": { "depths": [1, 1, 4, 7], "widths": [24, 56, 152, 368], "group_width": 8, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y004": { "depths": [1, 3, 6, 6], "widths": [48, 104, 208, 440], "group_width": 8, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y006": { "depths": [1, 3, 7, 4], "widths": [48, 112, 256, 608], "group_width": 16, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y008": { "depths": [1, 3, 8, 2], "widths": [64, 128, 320, 768], "group_width": 16, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y016": { "depths": [2, 6, 17, 2], "widths": [48, 120, 336, 888], "group_width": 24, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y032": { "depths": [2, 5, 13, 1], "widths": [72, 216, 576, 1512], "group_width": 24, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y040": { "depths": [2, 6, 12, 2], "widths": [128, 192, 512, 1088], "group_width": 64, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y064": { "depths": [2, 7, 14, 2], "widths": [144, 288, 576, 1296], "group_width": 72, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y080": { "depths": [2, 4, 10, 1], "widths": [168, 448, 896, 2016], "group_width": 56, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y120": { "depths": [2, 5, 11, 1], "widths": [224, 448, 896, 2240], "group_width": 112, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y160": { "depths": [2, 4, 11, 1], "widths": [224, 448, 1232, 3024], "group_width": 112, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, "y320": { "depths": [2, 5, 12, 1], "widths": [232, 696, 1392, 3712], "group_width": 232, "default_size": 224, - "block_type": "Y" + "block_type": "Y", }, } @@ -314,7 +342,7 @@ Args: include_top: Whether to include the fully-connected - layer at the top of the network. Defaults to True. + layer at the top of the network. Defaults to `True`. weights: One of `None` (random initialization), `"imagenet"` (pre-training on ImageNet), or the path to the weights file to be loaded. Defaults to `"imagenet"`. @@ -325,7 +353,7 @@ if `include_top` is False. It should have exactly 3 inputs channels. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. Defaults to None. + when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. @@ -335,16 +363,16 @@ the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. + Defaults to `None`. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. Defaults to 1000 (number of - ImageNet classes). + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. - Defaults to `"softmax"`. When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. + be `None` or `"softmax"`. Defaults to `"softmax"`. Returns: A `keras.Model` instance. @@ -352,1230 +380,1405 @@ def PreStem(name=None): - """Rescales and normalizes inputs to [0,1] and ImageNet mean and std. + """Rescales and normalizes inputs to [0,1] and ImageNet mean and std. - Args: - name: name prefix + Args: + name: name prefix - Returns: - Rescaled and normalized tensor - """ - if name is None: - name = "prestem" + str(backend.get_uid("prestem")) + Returns: + Rescaled and normalized tensor + """ + if name is None: + name = "prestem" + str(backend.get_uid("prestem")) - def apply(x): - x = layers.Rescaling(scale=1. / 255., name=name + "_prestem_rescaling")(x) - return x + def apply(x): + x = layers.Rescaling( + scale=1.0 / 255.0, name=name + "_prestem_rescaling" + )(x) + return x - return apply + return apply def Stem(name=None): - """Implementation of RegNet stem. - - (Common to all model variants) - Args: - name: name prefix - - Returns: - Output tensor of the Stem - """ - if name is None: - name = "stem" + str(backend.get_uid("stem")) - - def apply(x): - x = layers.Conv2D( - 32, (3, 3), - strides=2, - use_bias=False, - padding="same", - kernel_initializer="he_normal", - name=name + "_stem_conv")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_stem_bn")(x) - x = layers.ReLU(name=name + "_stem_relu")(x) - return x - - return apply + """Implementation of RegNet stem. + + (Common to all model variants) + Args: + name: name prefix + + Returns: + Output tensor of the Stem + """ + if name is None: + name = "stem" + str(backend.get_uid("stem")) + + def apply(x): + x = layers.Conv2D( + 32, + (3, 3), + strides=2, + use_bias=False, + padding="same", + kernel_initializer="he_normal", + name=name + "_stem_conv", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_stem_bn" + )(x) + x = layers.ReLU(name=name + "_stem_relu")(x) + return x + + return apply def SqueezeAndExciteBlock(filters_in, se_filters, name=None): - """Implements the Squeeze and excite block (https://arxiv.org/abs/1709.01507). - - Args: - filters_in: input filters to the block - se_filters: filters to squeeze to - name: name prefix - - Returns: - A function object - """ - if name is None: - name = str(backend.get_uid("squeeze_and_excite")) - - def apply(inputs): - x = layers.GlobalAveragePooling2D( - name=name + "_squeeze_and_excite_gap", keepdims=True)(inputs) - x = layers.Conv2D( - se_filters, (1, 1), - activation="relu", - kernel_initializer="he_normal", - name=name + "_squeeze_and_excite_squeeze")(x) - x = layers.Conv2D( - filters_in, (1, 1), - activation="sigmoid", - kernel_initializer="he_normal", - name=name + "_squeeze_and_excite_excite")(x) - x = tf.math.multiply(x, inputs) - return x - - return apply + """Implements the Squeeze & Excite block (https://arxiv.org/abs/1709.01507). + + Args: + filters_in: input filters to the block + se_filters: filters to squeeze to + name: name prefix + + Returns: + A function object + """ + if name is None: + name = str(backend.get_uid("squeeze_and_excite")) + + def apply(inputs): + x = layers.GlobalAveragePooling2D( + name=name + "_squeeze_and_excite_gap", keepdims=True + )(inputs) + x = layers.Conv2D( + se_filters, + (1, 1), + activation="relu", + kernel_initializer="he_normal", + name=name + "_squeeze_and_excite_squeeze", + )(x) + x = layers.Conv2D( + filters_in, + (1, 1), + activation="sigmoid", + kernel_initializer="he_normal", + name=name + "_squeeze_and_excite_excite", + )(x) + x = tf.math.multiply(x, inputs) + return x + + return apply def XBlock(filters_in, filters_out, group_width, stride=1, name=None): - """Implementation of X Block. + """Implementation of X Block. + + Reference: [Designing Network Design + Spaces](https://arxiv.org/abs/2003.13678) + Args: + filters_in: filters in the input tensor + filters_out: filters in the output tensor + group_width: group width + stride: stride + name: name prefix + Returns: + Output tensor of the block + """ + if name is None: + name = str(backend.get_uid("xblock")) + + def apply(inputs): + if filters_in != filters_out and stride == 1: + raise ValueError( + f"Input filters({filters_in}) and output " + f"filters({filters_out}) " + f"are not equal for stride {stride}. Input and output filters " + f"must be equal for stride={stride}." + ) + + # Declare layers + groups = filters_out // group_width + + if stride != 1: + skip = layers.Conv2D( + filters_out, + (1, 1), + strides=stride, + use_bias=False, + kernel_initializer="he_normal", + name=name + "_skip_1x1", + )(inputs) + skip = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_skip_bn" + )(skip) + else: + skip = inputs + + # Build block + # conv_1x1_1 + x = layers.Conv2D( + filters_out, + (1, 1), + use_bias=False, + kernel_initializer="he_normal", + name=name + "_conv_1x1_1", + )(inputs) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn" + )(x) + x = layers.ReLU(name=name + "_conv_1x1_1_relu")(x) + + # conv_3x3 + x = layers.Conv2D( + filters_out, + (3, 3), + use_bias=False, + strides=stride, + groups=groups, + padding="same", + kernel_initializer="he_normal", + name=name + "_conv_3x3", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn" + )(x) + x = layers.ReLU(name=name + "_conv_3x3_relu")(x) + + # conv_1x1_2 + x = layers.Conv2D( + filters_out, + (1, 1), + use_bias=False, + kernel_initializer="he_normal", + name=name + "_conv_1x1_2", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn" + )(x) + + x = layers.ReLU(name=name + "_exit_relu")(x + skip) + + return x + + return apply + + +def YBlock( + filters_in, + filters_out, + group_width, + stride=1, + squeeze_excite_ratio=0.25, + name=None, +): + """Implementation of Y Block. + + Reference: [Designing Network Design + Spaces](https://arxiv.org/abs/2003.13678) + Args: + filters_in: filters in the input tensor + filters_out: filters in the output tensor + group_width: group width + stride: stride + squeeze_excite_ratio: expansion ration for Squeeze and Excite block + name: name prefix + Returns: + Output tensor of the block + """ + if name is None: + name = str(backend.get_uid("yblock")) + + def apply(inputs): + if filters_in != filters_out and stride == 1: + raise ValueError( + f"Input filters({filters_in}) and output " + f"filters({filters_out}) " + f"are not equal for stride {stride}. Input and output filters " + f"must be equal for stride={stride}." + ) + + groups = filters_out // group_width + se_filters = int(filters_in * squeeze_excite_ratio) + + if stride != 1: + skip = layers.Conv2D( + filters_out, + (1, 1), + strides=stride, + use_bias=False, + kernel_initializer="he_normal", + name=name + "_skip_1x1", + )(inputs) + skip = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_skip_bn" + )(skip) + else: + skip = inputs + + # Build block + # conv_1x1_1 + x = layers.Conv2D( + filters_out, + (1, 1), + use_bias=False, + kernel_initializer="he_normal", + name=name + "_conv_1x1_1", + )(inputs) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn" + )(x) + x = layers.ReLU(name=name + "_conv_1x1_1_relu")(x) + + # conv_3x3 + x = layers.Conv2D( + filters_out, + (3, 3), + use_bias=False, + strides=stride, + groups=groups, + padding="same", + kernel_initializer="he_normal", + name=name + "_conv_3x3", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn" + )(x) + x = layers.ReLU(name=name + "_conv_3x3_relu")(x) + + # Squeeze-Excitation block + x = SqueezeAndExciteBlock(filters_out, se_filters, name=name)(x) + + # conv_1x1_2 + x = layers.Conv2D( + filters_out, + (1, 1), + use_bias=False, + kernel_initializer="he_normal", + name=name + "_conv_1x1_2", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn" + )(x) + + x = layers.ReLU(name=name + "_exit_relu")(x + skip) + + return x + + return apply + + +def ZBlock( + filters_in, + filters_out, + group_width, + stride=1, + squeeze_excite_ratio=0.25, + bottleneck_ratio=0.25, + name=None, +): + """Implementation of Z block Reference: [Fast and Accurate Model + Scaling](https://arxiv.org/abs/2103.06877). + + Args: + filters_in: filters in the input tensor + filters_out: filters in the output tensor + group_width: group width + stride: stride + squeeze_excite_ratio: expansion ration for Squeeze and Excite block + bottleneck_ratio: inverted bottleneck ratio + name: name prefix + Returns: + Output tensor of the block + """ + if name is None: + name = str(backend.get_uid("zblock")) + + def apply(inputs): + if filters_in != filters_out and stride == 1: + raise ValueError( + f"Input filters({filters_in}) and output filters({filters_out})" + f"are not equal for stride {stride}. Input and output filters " + f"must be equal for stride={stride}." + ) + + groups = filters_out // group_width + se_filters = int(filters_in * squeeze_excite_ratio) + + inv_btlneck_filters = int(filters_out / bottleneck_ratio) + + # Build block + # conv_1x1_1 + x = layers.Conv2D( + inv_btlneck_filters, + (1, 1), + use_bias=False, + kernel_initializer="he_normal", + name=name + "_conv_1x1_1", + )(inputs) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn" + )(x) + x = tf.nn.silu(x) + + # conv_3x3 + x = layers.Conv2D( + inv_btlneck_filters, + (3, 3), + use_bias=False, + strides=stride, + groups=groups, + padding="same", + kernel_initializer="he_normal", + name=name + "_conv_3x3", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn" + )(x) + x = tf.nn.silu(x) + + # Squeeze-Excitation block + x = SqueezeAndExciteBlock(inv_btlneck_filters, se_filters, name=name) + + # conv_1x1_2 + x = layers.Conv2D( + filters_out, + (1, 1), + use_bias=False, + kernel_initializer="he_normal", + name=name + "_conv_1x1_2", + )(x) + x = layers.BatchNormalization( + momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn" + )(x) + + if stride != 1: + return x + else: + return x + inputs + + return apply - Reference: [Designing Network Design - Spaces](https://arxiv.org/abs/2003.13678) - Args: - filters_in: filters in the input tensor - filters_out: filters in the output tensor - group_width: group width - stride: stride - name: name prefix - Returns: - Output tensor of the block - """ - if name is None: - name = str(backend.get_uid("xblock")) - - def apply(inputs): - if filters_in != filters_out and stride == 1: - raise ValueError( - f"Input filters({filters_in}) and output filters({filters_out}) " - f"are not equal for stride {stride}. Input and output filters must " - f"be equal for stride={stride}.") - - # Declare layers - groups = filters_out // group_width - - if stride != 1: - skip = layers.Conv2D( - filters_out, (1, 1), - strides=stride, - use_bias=False, - kernel_initializer="he_normal", - name=name + "_skip_1x1")(inputs) - skip = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_skip_bn")(skip) - else: - skip = inputs - - # Build block - # conv_1x1_1 - x = layers.Conv2D( - filters_out, (1, 1), - use_bias=False, - kernel_initializer="he_normal", - name=name + "_conv_1x1_1")(inputs) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn")(x) - x = layers.ReLU(name=name + "_conv_1x1_1_relu")(x) - - # conv_3x3 - x = layers.Conv2D( - filters_out, (3, 3), - use_bias=False, - strides=stride, - groups=groups, - padding="same", - kernel_initializer="he_normal", - name=name + "_conv_3x3")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn")(x) - x = layers.ReLU(name=name + "_conv_3x3_relu")(x) - - # conv_1x1_2 - x = layers.Conv2D( - filters_out, (1, 1), - use_bias=False, - kernel_initializer="he_normal", - name=name + "_conv_1x1_2")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn")(x) - - x = layers.ReLU(name=name + "_exit_relu")(x + skip) - return x - - return apply +def Stage(block_type, depth, group_width, filters_in, filters_out, name=None): + """Implementation of Stage in RegNet. + + Args: + block_type: must be one of "X", "Y", "Z" + depth: depth of stage, number of blocks to use + group_width: group width of all blocks in this stage + filters_in: input filters to this stage + filters_out: output filters from this stage + name: name prefix + + Returns: + Output tensor of Stage + """ + if name is None: + name = str(backend.get_uid("stage")) + + def apply(inputs): + x = inputs + if block_type == "X": + x = XBlock( + filters_in, + filters_out, + group_width, + stride=2, + name=f"{name}_XBlock_0", + )(x) + for i in range(1, depth): + x = XBlock( + filters_out, + filters_out, + group_width, + name=f"{name}_XBlock_{i}", + )(x) + elif block_type == "Y": + x = YBlock( + filters_in, + filters_out, + group_width, + stride=2, + name=name + "_YBlock_0", + )(x) + for i in range(1, depth): + x = YBlock( + filters_out, + filters_out, + group_width, + name=f"{name}_YBlock_{i}", + )(x) + elif block_type == "Z": + x = ZBlock( + filters_in, + filters_out, + group_width, + stride=2, + name=f"{name}_ZBlock_0", + )(x) + for i in range(1, depth): + x = ZBlock( + filters_out, + filters_out, + group_width, + name=f"{name}_ZBlock_{i}", + )(x) + else: + raise NotImplementedError( + f"Block type `{block_type}` not recognized." + "block_type must be one of (`X`, `Y`, `Z`). " + ) + return x + + return apply -def YBlock(filters_in, - filters_out, - group_width, - stride=1, - squeeze_excite_ratio=0.25, - name=None): - """Implementation of Y Block. +def Head(num_classes=1000, name=None): + """Implementation of classification head of RegNet. + + Args: + num_classes: number of classes for Dense layer + name: name prefix + + Returns: + Classification head function. + """ + if name is None: + name = str(backend.get_uid("head")) + + def apply(x): + x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) + x = layers.Dense(num_classes, name=name + "head_dense")(x) + return x + + return apply + + +def RegNet( + depths, + widths, + group_width, + block_type, + default_size, + model_name="regnet", + include_preprocessing=True, + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + """Instantiates RegNet architecture given specific configuration. + + Args: + depths: An iterable containing depths for each individual stages. + widths: An iterable containing output channel width of each individual + stages + group_width: Number of channels to be used in each group. See grouped + convolutions for more information. + block_type: Must be one of `{"X", "Y", "Z"}`. For more details see the + papers "Designing network design spaces" and "Fast and Accurate Model + Scaling" + default_size: Default input image size. + model_name: An optional name for the model. + include_preprocessing: boolean denoting whther to include preprocessing in + the model + include_top: Boolean denoting whether to include classification head to + the model. + weights: one of `None` (random initialization), "imagenet" (pre-training + on ImageNet), or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to + use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is False. It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction when `include_top` + is `False`. - `None` means that the output of the model will be the 4D + tensor output of the last convolutional layer. - `avg` means that global + average pooling will be applied to the output of the last convolutional + layer, and thus the output of the model will be a 2D tensor. - `max` + means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is True, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. - Reference: [Designing Network Design - Spaces](https://arxiv.org/abs/2003.13678) - Args: - filters_in: filters in the input tensor - filters_out: filters in the output tensor - group_width: group width - stride: stride - squeeze_excite_ratio: expansion ration for Squeeze and Excite block - name: name prefix - Returns: - Output tensor of the block - """ - if name is None: - name = str(backend.get_uid("yblock")) - - def apply(inputs): - if filters_in != filters_out and stride == 1: - raise ValueError( - f"Input filters({filters_in}) and output filters({filters_out}) " - f"are not equal for stride {stride}. Input and output filters must " - f"be equal for stride={stride}.") - - groups = filters_out // group_width - se_filters = int(filters_in * squeeze_excite_ratio) - - if stride != 1: - skip = layers.Conv2D( - filters_out, (1, 1), - strides=stride, - use_bias=False, - kernel_initializer="he_normal", - name=name + "_skip_1x1")(inputs) - skip = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_skip_bn")(skip) + Returns: + A `keras.Model` instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + ValueError: if `classifier_activation` is not `softmax` or `None` when + using a pretrained top layer. + ValueError: if `include_top` is True but `num_classes` is not 1000. + ValueError: if `block_type` is not one of `{"X", "Y", "Z"}` + + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + "If using `weights` as `'imagenet'` with `include_top`" + " as true, `classes` should be 1000" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - skip = inputs - - # Build block - # conv_1x1_1 - x = layers.Conv2D( - filters_out, (1, 1), - use_bias=False, - kernel_initializer="he_normal", - name=name + "_conv_1x1_1")(inputs) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn")(x) - x = layers.ReLU(name=name + "_conv_1x1_1_relu")(x) - - # conv_3x3 - x = layers.Conv2D( - filters_out, (3, 3), - use_bias=False, - strides=stride, - groups=groups, - padding="same", - kernel_initializer="he_normal", - name=name + "_conv_3x3")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn")(x) - x = layers.ReLU(name=name + "_conv_3x3_relu")(x) - - # Squeeze-Excitation block - x = SqueezeAndExciteBlock(filters_out, se_filters, name=name)(x) - - # conv_1x1_2 - x = layers.Conv2D( - filters_out, (1, 1), - use_bias=False, - kernel_initializer="he_normal", - name=name + "_conv_1x1_2")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn")(x) - - x = layers.ReLU(name=name + "_exit_relu")(x + skip) - - return x + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor - return apply - - -def ZBlock(filters_in, - filters_out, - group_width, - stride=1, - squeeze_excite_ratio=0.25, - bottleneck_ratio=0.25, - name=None): - """Implementation of Z block Reference: [Fast and Accurate Model Scaling](https://arxiv.org/abs/2103.06877). - - Args: - filters_in: filters in the input tensor - filters_out: filters in the output tensor - group_width: group width - stride: stride - squeeze_excite_ratio: expansion ration for Squeeze and Excite block - bottleneck_ratio: inverted bottleneck ratio - name: name prefix - Returns: - Output tensor of the block - """ - if name is None: - name = str(backend.get_uid("zblock")) - - def apply(inputs): - if filters_in != filters_out and stride == 1: - raise ValueError( - f"Input filters({filters_in}) and output filters({filters_out})" - f"are not equal for stride {stride}. Input and output filters must be" - f" equal for stride={stride}.") - - groups = filters_out // group_width - se_filters = int(filters_in * squeeze_excite_ratio) - - inv_btlneck_filters = int(filters_out / bottleneck_ratio) - - # Build block - # conv_1x1_1 - x = layers.Conv2D( - inv_btlneck_filters, (1, 1), - use_bias=False, - kernel_initializer="he_normal", - name=name + "_conv_1x1_1")(inputs) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn")(x) - x = tf.nn.silu(x) - - # conv_3x3 - x = layers.Conv2D( - inv_btlneck_filters, (3, 3), - use_bias=False, - strides=stride, - groups=groups, - padding="same", - kernel_initializer="he_normal", - name=name + "_conv_3x3")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn")(x) - x = tf.nn.silu(x) - - # Squeeze-Excitation block - x = SqueezeAndExciteBlock(inv_btlneck_filters, se_filters, name=name) - - # conv_1x1_2 - x = layers.Conv2D( - filters_out, (1, 1), - use_bias=False, - kernel_initializer="he_normal", - name=name + "_conv_1x1_2")(x) - x = layers.BatchNormalization( - momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn")(x) - - if stride != 1: - return x + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor)[0] else: - return x + inputs - - return apply - - -def Stage(block_type, depth, group_width, filters_in, filters_out, name=None): - """Implementation of Stage in RegNet. - - Args: - block_type: must be one of "X", "Y", "Z" - depth: depth of stage, number of blocks to use - group_width: group width of all blocks in this stage - filters_in: input filters to this stage - filters_out: output filters from this stage - name: name prefix - - Returns: - Output tensor of Stage - """ - if name is None: - name = str(backend.get_uid("stage")) + inputs = img_input - def apply(inputs): x = inputs - if block_type == "X": - x = XBlock( - filters_in, - filters_out, - group_width, - stride=2, - name=f"{name}_XBlock_0")(x) - for i in range(1, depth): - x = XBlock( - filters_out, filters_out, group_width, name=f"{name}_XBlock_{i}")(x) - elif block_type == "Y": - x = YBlock( - filters_in, - filters_out, - group_width, - stride=2, - name=name + "_YBlock_0")(x) - for i in range(1, depth): - x = YBlock( - filters_out, filters_out, group_width, name=f"{name}_YBlock_{i}")(x) - elif block_type == "Z": - x = ZBlock( - filters_in, - filters_out, - group_width, - stride=2, - name=f"{name}_ZBlock_0")(x) - for i in range(1, depth): - x = ZBlock( - filters_out, filters_out, group_width, name=f"{name}_ZBlock_{i}")(x) - else: - raise NotImplementedError(f"Block type `{block_type}` not recognized." - f"block_type must be one of (`X`, `Y`, `Z`). ") - return x - - return apply - - -def Head(num_classes=1000, name=None): - """Implementation of classification head of RegNet. - - Args: - num_classes: number of classes for Dense layer - name: name prefix - - Returns: - Output logits tensor. - """ - if name is None: - name = str(backend.get_uid("head")) - - def apply(x): - x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) - x = layers.Dense(num_classes, name=name + "head_dense")(x) - return x - - return apply - - -def RegNet(depths, - widths, - group_width, - block_type, - default_size, - model_name="regnet", - include_preprocessing=True, - include_top=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - """Instantiates RegNet architecture given specific configuration. - - Args: - depths: An iterable containing depths for each individual stages. - widths: An iterable containing output channel width of each individual - stages - group_width: Number of channels to be used in each group. See grouped - convolutions for more information. - block_type: Must be one of `{"X", "Y", "Z"}`. For more details see the - papers "Designing network design spaces" and "Fast and Accurate Model - Scaling" - default_size: Default input image size. - model_name: An optional name for the model. - include_preprocessing: boolean denoting whther to include preprocessing in - the model - include_top: Boolean denoting whether to include classification head to the - model. - weights: one of `None` (random initialization), "imagenet" (pre-training on - ImageNet), or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use - as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is - False. It should have exactly 3 inputs channels. - pooling: optional pooling mode for feature extraction when `include_top` is - `False`. - `None` means that the output of the model will be the 4D tensor - output of the last convolutional layer. - `avg` means that global average - pooling will be applied to the output of the last convolutional layer, and - thus the output of the model will be a 2D tensor. - `max` means that - global max pooling will be applied. - classes: optional number of classes to classify images into, only to be - specified if `include_top` is True, and if no `weights` argument is - specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - - Returns: - A `keras.Model` instance. + if include_preprocessing: + x = PreStem(name=model_name)(x) + x = Stem(name=model_name)(x) + + in_channels = 32 # Output from Stem + + for num_stage in range(4): + depth = depths[num_stage] + out_channels = widths[num_stage] + + x = Stage( + block_type, + depth, + group_width, + in_channels, + out_channels, + name=model_name + "_Stage_" + str(num_stage), + )(x) + in_channels = out_channels - Raises: - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - ValueError: if `classifier_activation` is not `softmax` or `None` when - using a pretrained top layer. - ValueError: if `include_top` is True but `num_classes` is not 1000. - ValueError: if `block_type` is not one of `{"X", "Y", "Z"}` - - """ - if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): - raise ValueError("The `weights` argument should be either " - "`None` (random initialization), `imagenet` " - "(pre-training on ImageNet), " - "or the path to the weights file to be loaded.") - - if weights == "imagenet" and include_top and classes != 1000: - raise ValueError("If using `weights` as `'imagenet'` with `include_top`" - " as true, `classes` should be 1000") - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - x = inputs - if include_preprocessing: - x = PreStem(name=model_name)(x) - x = Stem(name=model_name)(x) - - in_channels = 32 # Output from Stem - - for num_stage in range(4): - depth = depths[num_stage] - out_channels = widths[num_stage] - - x = Stage( - block_type, - depth, - group_width, - in_channels, - out_channels, - name=model_name + "_Stage_" + str(num_stage))(x) - in_channels = out_channels - - if include_top: - x = Head(num_classes=classes)(x) - imagenet_utils.validate_activation(classifier_activation, weights) - - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D()(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D()(x) - - model = training.Model(inputs=inputs, outputs=x, name=model_name) - - # Load weights. - if weights == "imagenet": if include_top: - file_suffix = ".h5" - file_hash = WEIGHTS_HASHES[model_name[-4:]][0] - else: - file_suffix = "_notop.h5" - file_hash = WEIGHTS_HASHES[model_name[-4:]][1] - file_name = model_name + file_suffix - weights_path = data_utils.get_file( - file_name, - BASE_WEIGHTS_PATH + file_name, - cache_subdir="models", - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) + x = Head(num_classes=classes)(x) + imagenet_utils.validate_activation(classifier_activation, weights) - return model + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + model = training.Model(inputs=inputs, outputs=x, name=model_name) + + # Load weights. + if weights == "imagenet": + if include_top: + file_suffix = ".h5" + file_hash = WEIGHTS_HASHES[model_name[-4:]][0] + else: + file_suffix = "_notop.h5" + file_hash = WEIGHTS_HASHES[model_name[-4:]][1] + file_name = model_name + file_suffix + weights_path = data_utils.get_file( + file_name, + BASE_WEIGHTS_PATH + file_name, + cache_subdir="models", + file_hash=file_hash, + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model ## Instantiating variants ## -@keras_export("keras.applications.regnet.RegNetX002", - "keras.applications.RegNetX002") -def RegNetX002(model_name="regnetx002", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x002"]["depths"], - MODEL_CONFIGS["x002"]["widths"], - MODEL_CONFIGS["x002"]["group_width"], - MODEL_CONFIGS["x002"]["block_type"], - MODEL_CONFIGS["x002"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX004", - "keras.applications.RegNetX004") -def RegNetX004(model_name="regnetx004", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x004"]["depths"], - MODEL_CONFIGS["x004"]["widths"], - MODEL_CONFIGS["x004"]["group_width"], - MODEL_CONFIGS["x004"]["block_type"], - MODEL_CONFIGS["x004"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX006", - "keras.applications.RegNetX006") -def RegNetX006(model_name="regnetx006", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x006"]["depths"], - MODEL_CONFIGS["x006"]["widths"], - MODEL_CONFIGS["x006"]["group_width"], - MODEL_CONFIGS["x006"]["block_type"], - MODEL_CONFIGS["x006"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX008", - "keras.applications.RegNetX008") -def RegNetX008(model_name="regnetx008", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x008"]["depths"], - MODEL_CONFIGS["x008"]["widths"], - MODEL_CONFIGS["x008"]["group_width"], - MODEL_CONFIGS["x008"]["block_type"], - MODEL_CONFIGS["x008"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX016", - "keras.applications.RegNetX016") -def RegNetX016(model_name="regnetx016", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x016"]["depths"], - MODEL_CONFIGS["x016"]["widths"], - MODEL_CONFIGS["x016"]["group_width"], - MODEL_CONFIGS["x016"]["block_type"], - MODEL_CONFIGS["x016"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX032", - "keras.applications.RegNetX032") -def RegNetX032(model_name="regnetx032", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x032"]["depths"], - MODEL_CONFIGS["x032"]["widths"], - MODEL_CONFIGS["x032"]["group_width"], - MODEL_CONFIGS["x032"]["block_type"], - MODEL_CONFIGS["x032"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX040", - "keras.applications.RegNetX040") -def RegNetX040(model_name="regnetx040", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x040"]["depths"], - MODEL_CONFIGS["x040"]["widths"], - MODEL_CONFIGS["x040"]["group_width"], - MODEL_CONFIGS["x040"]["block_type"], - MODEL_CONFIGS["x040"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX064", - "keras.applications.RegNetX064") -def RegNetX064(model_name="regnetx064", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x064"]["depths"], - MODEL_CONFIGS["x064"]["widths"], - MODEL_CONFIGS["x064"]["group_width"], - MODEL_CONFIGS["x064"]["block_type"], - MODEL_CONFIGS["x064"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX080", - "keras.applications.RegNetX080") -def RegNetX080(model_name="regnetx080", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x080"]["depths"], - MODEL_CONFIGS["x080"]["widths"], - MODEL_CONFIGS["x080"]["group_width"], - MODEL_CONFIGS["x080"]["block_type"], - MODEL_CONFIGS["x080"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX120", - "keras.applications.RegNetX120") -def RegNetX120(model_name="regnetx120", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x120"]["depths"], - MODEL_CONFIGS["x120"]["widths"], - MODEL_CONFIGS["x120"]["group_width"], - MODEL_CONFIGS["x120"]["block_type"], - MODEL_CONFIGS["x120"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX160", - "keras.applications.RegNetX160") -def RegNetX160(model_name="regnetx160", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x160"]["depths"], - MODEL_CONFIGS["x160"]["widths"], - MODEL_CONFIGS["x160"]["group_width"], - MODEL_CONFIGS["x160"]["block_type"], - MODEL_CONFIGS["x160"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetX320", - "keras.applications.RegNetX320") -def RegNetX320(model_name="regnetx320", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["x320"]["depths"], - MODEL_CONFIGS["x320"]["widths"], - MODEL_CONFIGS["x320"]["group_width"], - MODEL_CONFIGS["x320"]["block_type"], - MODEL_CONFIGS["x320"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY002", - "keras.applications.RegNetY002") -def RegNetY002(model_name="regnety002", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y002"]["depths"], - MODEL_CONFIGS["y002"]["widths"], - MODEL_CONFIGS["y002"]["group_width"], - MODEL_CONFIGS["y002"]["block_type"], - MODEL_CONFIGS["y002"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY004", - "keras.applications.RegNetY004") -def RegNetY004(model_name="regnety004", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y004"]["depths"], - MODEL_CONFIGS["y004"]["widths"], - MODEL_CONFIGS["y004"]["group_width"], - MODEL_CONFIGS["y004"]["block_type"], - MODEL_CONFIGS["y004"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY006", - "keras.applications.RegNetY006") -def RegNetY006(model_name="regnety006", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y006"]["depths"], - MODEL_CONFIGS["y006"]["widths"], - MODEL_CONFIGS["y006"]["group_width"], - MODEL_CONFIGS["y006"]["block_type"], - MODEL_CONFIGS["y006"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY008", - "keras.applications.RegNetY008") -def RegNetY008(model_name="regnety008", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y008"]["depths"], - MODEL_CONFIGS["y008"]["widths"], - MODEL_CONFIGS["y008"]["group_width"], - MODEL_CONFIGS["y008"]["block_type"], - MODEL_CONFIGS["y008"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY016", - "keras.applications.RegNetY016") -def RegNetY016(model_name="regnety016", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y016"]["depths"], - MODEL_CONFIGS["y016"]["widths"], - MODEL_CONFIGS["y016"]["group_width"], - MODEL_CONFIGS["y016"]["block_type"], - MODEL_CONFIGS["y016"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY032", - "keras.applications.RegNetY032") -def RegNetY032(model_name="regnety032", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y032"]["depths"], - MODEL_CONFIGS["y032"]["widths"], - MODEL_CONFIGS["y032"]["group_width"], - MODEL_CONFIGS["y032"]["block_type"], - MODEL_CONFIGS["y032"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY040", - "keras.applications.RegNetY040") -def RegNetY040(model_name="regnety040", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y040"]["depths"], - MODEL_CONFIGS["y040"]["widths"], - MODEL_CONFIGS["y040"]["group_width"], - MODEL_CONFIGS["y040"]["block_type"], - MODEL_CONFIGS["y040"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY064", - "keras.applications.RegNetY064") -def RegNetY064(model_name="regnety064", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y064"]["depths"], - MODEL_CONFIGS["y064"]["widths"], - MODEL_CONFIGS["y064"]["group_width"], - MODEL_CONFIGS["y064"]["block_type"], - MODEL_CONFIGS["y064"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY080", - "keras.applications.RegNetY080") -def RegNetY080(model_name="regnety080", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y080"]["depths"], - MODEL_CONFIGS["y080"]["widths"], - MODEL_CONFIGS["y080"]["group_width"], - MODEL_CONFIGS["y080"]["block_type"], - MODEL_CONFIGS["y080"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY120", - "keras.applications.RegNetY120") -def RegNetY120(model_name="regnety120", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y120"]["depths"], - MODEL_CONFIGS["y120"]["widths"], - MODEL_CONFIGS["y120"]["group_width"], - MODEL_CONFIGS["y120"]["block_type"], - MODEL_CONFIGS["y120"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY160", - "keras.applications.RegNetY160") -def RegNetY160(model_name="regnety160", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y160"]["depths"], - MODEL_CONFIGS["y160"]["widths"], - MODEL_CONFIGS["y160"]["group_width"], - MODEL_CONFIGS["y160"]["block_type"], - MODEL_CONFIGS["y160"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) - - -@keras_export("keras.applications.regnet.RegNetY320", - "keras.applications.RegNetY320") -def RegNetY320(model_name="regnety320", - include_top=True, - include_preprocessing=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax"): - return RegNet( - MODEL_CONFIGS["y320"]["depths"], - MODEL_CONFIGS["y320"]["widths"], - MODEL_CONFIGS["y320"]["group_width"], - MODEL_CONFIGS["y320"]["block_type"], - MODEL_CONFIGS["y320"]["default_size"], - model_name=model_name, - include_top=include_top, - include_preprocessing=include_preprocessing, - weights=weights, - input_tensor=input_tensor, - input_shape=input_shape, - pooling=pooling, - classes=classes, - classifier_activation=classifier_activation) +@keras_export( + "keras.applications.regnet.RegNetX002", "keras.applications.RegNetX002" +) +def RegNetX002( + model_name="regnetx002", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x002"]["depths"], + MODEL_CONFIGS["x002"]["widths"], + MODEL_CONFIGS["x002"]["group_width"], + MODEL_CONFIGS["x002"]["block_type"], + MODEL_CONFIGS["x002"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX004", "keras.applications.RegNetX004" +) +def RegNetX004( + model_name="regnetx004", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x004"]["depths"], + MODEL_CONFIGS["x004"]["widths"], + MODEL_CONFIGS["x004"]["group_width"], + MODEL_CONFIGS["x004"]["block_type"], + MODEL_CONFIGS["x004"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX006", "keras.applications.RegNetX006" +) +def RegNetX006( + model_name="regnetx006", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x006"]["depths"], + MODEL_CONFIGS["x006"]["widths"], + MODEL_CONFIGS["x006"]["group_width"], + MODEL_CONFIGS["x006"]["block_type"], + MODEL_CONFIGS["x006"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX008", "keras.applications.RegNetX008" +) +def RegNetX008( + model_name="regnetx008", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x008"]["depths"], + MODEL_CONFIGS["x008"]["widths"], + MODEL_CONFIGS["x008"]["group_width"], + MODEL_CONFIGS["x008"]["block_type"], + MODEL_CONFIGS["x008"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX016", "keras.applications.RegNetX016" +) +def RegNetX016( + model_name="regnetx016", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x016"]["depths"], + MODEL_CONFIGS["x016"]["widths"], + MODEL_CONFIGS["x016"]["group_width"], + MODEL_CONFIGS["x016"]["block_type"], + MODEL_CONFIGS["x016"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX032", "keras.applications.RegNetX032" +) +def RegNetX032( + model_name="regnetx032", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x032"]["depths"], + MODEL_CONFIGS["x032"]["widths"], + MODEL_CONFIGS["x032"]["group_width"], + MODEL_CONFIGS["x032"]["block_type"], + MODEL_CONFIGS["x032"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX040", "keras.applications.RegNetX040" +) +def RegNetX040( + model_name="regnetx040", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x040"]["depths"], + MODEL_CONFIGS["x040"]["widths"], + MODEL_CONFIGS["x040"]["group_width"], + MODEL_CONFIGS["x040"]["block_type"], + MODEL_CONFIGS["x040"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX064", "keras.applications.RegNetX064" +) +def RegNetX064( + model_name="regnetx064", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x064"]["depths"], + MODEL_CONFIGS["x064"]["widths"], + MODEL_CONFIGS["x064"]["group_width"], + MODEL_CONFIGS["x064"]["block_type"], + MODEL_CONFIGS["x064"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX080", "keras.applications.RegNetX080" +) +def RegNetX080( + model_name="regnetx080", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x080"]["depths"], + MODEL_CONFIGS["x080"]["widths"], + MODEL_CONFIGS["x080"]["group_width"], + MODEL_CONFIGS["x080"]["block_type"], + MODEL_CONFIGS["x080"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX120", "keras.applications.RegNetX120" +) +def RegNetX120( + model_name="regnetx120", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x120"]["depths"], + MODEL_CONFIGS["x120"]["widths"], + MODEL_CONFIGS["x120"]["group_width"], + MODEL_CONFIGS["x120"]["block_type"], + MODEL_CONFIGS["x120"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX160", "keras.applications.RegNetX160" +) +def RegNetX160( + model_name="regnetx160", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x160"]["depths"], + MODEL_CONFIGS["x160"]["widths"], + MODEL_CONFIGS["x160"]["group_width"], + MODEL_CONFIGS["x160"]["block_type"], + MODEL_CONFIGS["x160"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetX320", "keras.applications.RegNetX320" +) +def RegNetX320( + model_name="regnetx320", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["x320"]["depths"], + MODEL_CONFIGS["x320"]["widths"], + MODEL_CONFIGS["x320"]["group_width"], + MODEL_CONFIGS["x320"]["block_type"], + MODEL_CONFIGS["x320"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY002", "keras.applications.RegNetY002" +) +def RegNetY002( + model_name="regnety002", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y002"]["depths"], + MODEL_CONFIGS["y002"]["widths"], + MODEL_CONFIGS["y002"]["group_width"], + MODEL_CONFIGS["y002"]["block_type"], + MODEL_CONFIGS["y002"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY004", "keras.applications.RegNetY004" +) +def RegNetY004( + model_name="regnety004", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y004"]["depths"], + MODEL_CONFIGS["y004"]["widths"], + MODEL_CONFIGS["y004"]["group_width"], + MODEL_CONFIGS["y004"]["block_type"], + MODEL_CONFIGS["y004"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY006", "keras.applications.RegNetY006" +) +def RegNetY006( + model_name="regnety006", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y006"]["depths"], + MODEL_CONFIGS["y006"]["widths"], + MODEL_CONFIGS["y006"]["group_width"], + MODEL_CONFIGS["y006"]["block_type"], + MODEL_CONFIGS["y006"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY008", "keras.applications.RegNetY008" +) +def RegNetY008( + model_name="regnety008", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y008"]["depths"], + MODEL_CONFIGS["y008"]["widths"], + MODEL_CONFIGS["y008"]["group_width"], + MODEL_CONFIGS["y008"]["block_type"], + MODEL_CONFIGS["y008"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY016", "keras.applications.RegNetY016" +) +def RegNetY016( + model_name="regnety016", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y016"]["depths"], + MODEL_CONFIGS["y016"]["widths"], + MODEL_CONFIGS["y016"]["group_width"], + MODEL_CONFIGS["y016"]["block_type"], + MODEL_CONFIGS["y016"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY032", "keras.applications.RegNetY032" +) +def RegNetY032( + model_name="regnety032", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y032"]["depths"], + MODEL_CONFIGS["y032"]["widths"], + MODEL_CONFIGS["y032"]["group_width"], + MODEL_CONFIGS["y032"]["block_type"], + MODEL_CONFIGS["y032"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY040", "keras.applications.RegNetY040" +) +def RegNetY040( + model_name="regnety040", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y040"]["depths"], + MODEL_CONFIGS["y040"]["widths"], + MODEL_CONFIGS["y040"]["group_width"], + MODEL_CONFIGS["y040"]["block_type"], + MODEL_CONFIGS["y040"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY064", "keras.applications.RegNetY064" +) +def RegNetY064( + model_name="regnety064", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y064"]["depths"], + MODEL_CONFIGS["y064"]["widths"], + MODEL_CONFIGS["y064"]["group_width"], + MODEL_CONFIGS["y064"]["block_type"], + MODEL_CONFIGS["y064"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY080", "keras.applications.RegNetY080" +) +def RegNetY080( + model_name="regnety080", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y080"]["depths"], + MODEL_CONFIGS["y080"]["widths"], + MODEL_CONFIGS["y080"]["group_width"], + MODEL_CONFIGS["y080"]["block_type"], + MODEL_CONFIGS["y080"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY120", "keras.applications.RegNetY120" +) +def RegNetY120( + model_name="regnety120", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y120"]["depths"], + MODEL_CONFIGS["y120"]["widths"], + MODEL_CONFIGS["y120"]["group_width"], + MODEL_CONFIGS["y120"]["block_type"], + MODEL_CONFIGS["y120"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY160", "keras.applications.RegNetY160" +) +def RegNetY160( + model_name="regnety160", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y160"]["depths"], + MODEL_CONFIGS["y160"]["widths"], + MODEL_CONFIGS["y160"]["group_width"], + MODEL_CONFIGS["y160"]["block_type"], + MODEL_CONFIGS["y160"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.regnet.RegNetY320", "keras.applications.RegNetY320" +) +def RegNetY320( + model_name="regnety320", + include_top=True, + include_preprocessing=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", +): + return RegNet( + MODEL_CONFIGS["y320"]["depths"], + MODEL_CONFIGS["y320"]["widths"], + MODEL_CONFIGS["y320"]["group_width"], + MODEL_CONFIGS["y320"]["block_type"], + MODEL_CONFIGS["y320"]["default_size"], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_preprocessing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + ) RegNetX002.__doc__ = BASE_DOCSTRING.format(name="RegNetX002") @@ -1606,30 +1809,30 @@ def RegNetY320(model_name="regnety320", @keras_export("keras.applications.regnet.preprocess_input") -def preprocess_input(x, data_format=None): # pylint: disable=unused-argument - """A placeholder method for backward compatibility. - - The preprocessing logic has been included in the efficientnet model - implementation. Users are no longer required to call this method to normalize - the input data. This method does nothing and only kept as a placeholder to - align the API surface between old and new version of model. - - Args: - x: A floating point `numpy.array` or a `tf.Tensor`. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, it - defaults to "channels_last").{mode} - - Returns: - Unchanged `numpy.array` or `tf.Tensor`. - """ - return x +def preprocess_input(x, data_format=None): + """A placeholder method for backward compatibility. + + The preprocessing logic has been included in the regnet model + implementation. Users are no longer required to call this method to + normalize the input data. This method does nothing and only kept as a + placeholder to align the API surface between old and new version of model. + + Args: + x: A floating point `numpy.array` or a `tf.Tensor`. + data_format: Optional data format of the image tensor/array. `None` means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last"). + Defaults to `None`. + + Returns: + Unchanged `numpy.array` or `tf.Tensor`. + """ + return x @keras_export("keras.applications.regnet.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/resnet.py b/keras/applications/resnet.py index 46b4e81c8ad3..adcd2b746e08 100644 --- a/keras/applications/resnet.py +++ b/keras/applications/resnet.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """ResNet models for Keras. Reference: @@ -28,496 +28,604 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export BASE_WEIGHTS_PATH = ( - 'https://storage.googleapis.com/tensorflow/keras-applications/resnet/') + "https://storage.googleapis.com/tensorflow/keras-applications/resnet/" +) WEIGHTS_HASHES = { - 'resnet50': ('2cb95161c43110f7111970584f804107', - '4d473c1dd8becc155b73f8504c6f6626'), - 'resnet101': ('f1aeb4b969a6efcfb50fad2f0c20cfc5', - '88cf7a10940856eca736dc7b7e228a21'), - 'resnet152': ('100835be76be38e30d865e96f2aaae62', - 'ee4c566cf9a93f14d82f913c2dc6dd0c'), - 'resnet50v2': ('3ef43a0b657b3be2300d5770ece849e0', - 'fac2f116257151a9d068a22e544a4917'), - 'resnet101v2': ('6343647c601c52e1368623803854d971', - 'c0ed64b8031c3730f411d2eb4eea35b5'), - 'resnet152v2': ('a49b44d1979771252814e80f8ec446f9', - 'ed17cf2e0169df9d443503ef94b23b33'), - 'resnext50': ('67a5b30d522ed92f75a1f16eef299d1a', - '62527c363bdd9ec598bed41947b379fc'), - 'resnext101': - ('34fb605428fcc7aa4d62f44404c11509', '0f678c91647380debd923963594981b3') + "resnet50": ( + "2cb95161c43110f7111970584f804107", + "4d473c1dd8becc155b73f8504c6f6626", + ), + "resnet101": ( + "f1aeb4b969a6efcfb50fad2f0c20cfc5", + "88cf7a10940856eca736dc7b7e228a21", + ), + "resnet152": ( + "100835be76be38e30d865e96f2aaae62", + "ee4c566cf9a93f14d82f913c2dc6dd0c", + ), + "resnet50v2": ( + "3ef43a0b657b3be2300d5770ece849e0", + "fac2f116257151a9d068a22e544a4917", + ), + "resnet101v2": ( + "6343647c601c52e1368623803854d971", + "c0ed64b8031c3730f411d2eb4eea35b5", + ), + "resnet152v2": ( + "a49b44d1979771252814e80f8ec446f9", + "ed17cf2e0169df9d443503ef94b23b33", + ), + "resnext50": ( + "67a5b30d522ed92f75a1f16eef299d1a", + "62527c363bdd9ec598bed41947b379fc", + ), + "resnext101": ( + "34fb605428fcc7aa4d62f44404c11509", + "0f678c91647380debd923963594981b3", + ), } layers = None -def ResNet(stack_fn, - preact, - use_bias, - model_name='resnet', - include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation='softmax', - **kwargs): - """Instantiates the ResNet, ResNetV2, and ResNeXt architecture. - - Args: - stack_fn: a function that returns output tensor for the - stacked residual blocks. - preact: whether to use pre-activation or not - (True for ResNetV2, False for ResNet and ResNeXt). - use_bias: whether to use biases for convolutional layers or not - (True for ResNet and ResNetV2, False for ResNeXt). - model_name: string, model name. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - **kwargs: For backwards compatibility only. - - Returns: - A `keras.Model` instance. - """ - global layers - if 'layers' in kwargs: - layers = kwargs.pop('layers') - else: - layers = VersionAwareLayers() - if kwargs: - raise ValueError('Unknown argument(s): %s' % (kwargs,)) - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) +def ResNet( + stack_fn, + preact, + use_bias, + model_name="resnet", + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + """Instantiates the ResNet, ResNetV2, and ResNeXt architecture. + + Args: + stack_fn: a function that returns output tensor for the + stacked residual blocks. + preact: whether to use pre-activation or not + (True for ResNetV2, False for ResNet and ResNeXt). + use_bias: whether to use biases for convolutional layers or not + (True for ResNet and ResNetV2, False for ResNeXt). + model_name: string, model name. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + **kwargs: For backwards compatibility only. + + Returns: + A `keras.Model` instance. + """ + global layers + if "layers" in kwargs: + layers = kwargs.pop("layers") + else: + layers = VersionAwareLayers() + if kwargs: + raise ValueError(f"Unknown argument(s): {kwargs}") + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top`' + " as true, `classes` should be 1000" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=224, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - img_input = input_tensor + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - x = layers.ZeroPadding2D( - padding=((3, 3), (3, 3)), name='conv1_pad')(img_input) - x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x) + x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name="conv1_pad")( + img_input + ) + x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name="conv1_conv")(x) - if not preact: - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x) - x = layers.Activation('relu', name='conv1_relu')(x) + if not preact: + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name="conv1_bn" + )(x) + x = layers.Activation("relu", name="conv1_relu")(x) - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) - x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x) + x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name="pool1_pad")(x) + x = layers.MaxPooling2D(3, strides=2, name="pool1_pool")(x) - x = stack_fn(x) + x = stack_fn(x) + + if preact: + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name="post_bn" + )(x) + x = layers.Activation("relu", name="post_relu")(x) - if preact: - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name='post_bn')(x) - x = layers.Activation('relu', name='post_relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D(name='max_pool')(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name=model_name) - - # Load weights. - if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES): if include_top: - file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5' - file_hash = WEIGHTS_HASHES[model_name][0] + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: - file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5' - file_hash = WEIGHTS_HASHES[model_name][1] - weights_path = data_utils.get_file( - file_name, - BASE_WEIGHTS_PATH + file_name, - cache_subdir='models', - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name=model_name) + + # Load weights. + if (weights == "imagenet") and (model_name in WEIGHTS_HASHES): + if include_top: + file_name = model_name + "_weights_tf_dim_ordering_tf_kernels.h5" + file_hash = WEIGHTS_HASHES[model_name][0] + else: + file_name = ( + model_name + "_weights_tf_dim_ordering_tf_kernels_notop.h5" + ) + file_hash = WEIGHTS_HASHES[model_name][1] + weights_path = data_utils.get_file( + file_name, + BASE_WEIGHTS_PATH + file_name, + cache_subdir="models", + file_hash=file_hash, + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): - """A residual block. + """A residual block. - Args: - x: input tensor. - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - conv_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. + Args: + x: input tensor. + filters: integer, filters of the bottleneck layer. + kernel_size: default 3, kernel size of the bottleneck layer. + stride: default 1, stride of the first layer. + conv_shortcut: default True, use convolution shortcut if True, + otherwise identity shortcut. + name: string, block label. - Returns: - Output tensor for the residual block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - if conv_shortcut: - shortcut = layers.Conv2D( - 4 * filters, 1, strides=stride, name=name + '_0_conv')(x) - shortcut = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut) - else: - shortcut = x - - x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) - x = layers.Activation('relu', name=name + '_1_relu')(x) - - x = layers.Conv2D( - filters, kernel_size, padding='SAME', name=name + '_2_conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) - x = layers.Activation('relu', name=name + '_2_relu')(x) - - x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x) - - x = layers.Add(name=name + '_add')([shortcut, x]) - x = layers.Activation('relu', name=name + '_out')(x) - return x + Returns: + Output tensor for the residual block. + """ + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + if conv_shortcut: + shortcut = layers.Conv2D( + 4 * filters, 1, strides=stride, name=name + "_0_conv" + )(x) + shortcut = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_0_bn" + )(shortcut) + else: + shortcut = x + + x = layers.Conv2D(filters, 1, strides=stride, name=name + "_1_conv")(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) + + x = layers.Conv2D( + filters, kernel_size, padding="SAME", name=name + "_2_conv" + )(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_2_bn" + )(x) + x = layers.Activation("relu", name=name + "_2_relu")(x) + + x = layers.Conv2D(4 * filters, 1, name=name + "_3_conv")(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_3_bn" + )(x) + + x = layers.Add(name=name + "_add")([shortcut, x]) + x = layers.Activation("relu", name=name + "_out")(x) + return x def stack1(x, filters, blocks, stride1=2, name=None): - """A set of stacked residual blocks. + """A set of stacked residual blocks. - Args: - x: input tensor. - filters: integer, filters of the bottleneck layer in a block. - blocks: integer, blocks in the stacked blocks. - stride1: default 2, stride of the first layer in the first block. - name: string, stack label. + Args: + x: input tensor. + filters: integer, filters of the bottleneck layer in a block. + blocks: integer, blocks in the stacked blocks. + stride1: default 2, stride of the first layer in the first block. + name: string, stack label. - Returns: - Output tensor for the stacked blocks. - """ - x = block1(x, filters, stride=stride1, name=name + '_block1') - for i in range(2, blocks + 1): - x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i)) - return x + Returns: + Output tensor for the stacked blocks. + """ + x = block1(x, filters, stride=stride1, name=name + "_block1") + for i in range(2, blocks + 1): + x = block1( + x, filters, conv_shortcut=False, name=name + "_block" + str(i) + ) + return x def block2(x, filters, kernel_size=3, stride=1, conv_shortcut=False, name=None): - """A residual block. + """A residual block. + + Args: + x: input tensor. + filters: integer, filters of the bottleneck layer. + kernel_size: default 3, kernel size of the bottleneck layer. + stride: default 1, stride of the first layer. + conv_shortcut: default False, use convolution shortcut if True, + otherwise identity shortcut. + name: string, block label. + + Returns: + Output tensor for the residual block. + """ + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + preact = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_preact_bn" + )(x) + preact = layers.Activation("relu", name=name + "_preact_relu")(preact) + + if conv_shortcut: + shortcut = layers.Conv2D( + 4 * filters, 1, strides=stride, name=name + "_0_conv" + )(preact) + else: + shortcut = ( + layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x + ) - Args: + x = layers.Conv2D( + filters, 1, strides=1, use_bias=False, name=name + "_1_conv" + )(preact) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) + + x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + "_2_pad")(x) + x = layers.Conv2D( + filters, + kernel_size, + strides=stride, + use_bias=False, + name=name + "_2_conv", + )(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_2_bn" + )(x) + x = layers.Activation("relu", name=name + "_2_relu")(x) + + x = layers.Conv2D(4 * filters, 1, name=name + "_3_conv")(x) + x = layers.Add(name=name + "_out")([shortcut, x]) + return x + + +def stack2(x, filters, blocks, stride1=2, name=None): + """A set of stacked residual blocks. + + Args: + x: input tensor. + filters: integer, filters of the bottleneck layer in a block. + blocks: integer, blocks in the stacked blocks. + stride1: default 2, stride of the first layer in the first block. + name: string, stack label. + + Returns: + Output tensor for the stacked blocks. + """ + x = block2(x, filters, conv_shortcut=True, name=name + "_block1") + for i in range(2, blocks): + x = block2(x, filters, name=name + "_block" + str(i)) + x = block2(x, filters, stride=stride1, name=name + "_block" + str(blocks)) + return x + + +def block3( + x, + filters, + kernel_size=3, + stride=1, + groups=32, + conv_shortcut=True, + name=None, +): + """A residual block. + + Args: x: input tensor. filters: integer, filters of the bottleneck layer. kernel_size: default 3, kernel size of the bottleneck layer. stride: default 1, stride of the first layer. - conv_shortcut: default False, use convolution shortcut if True, - otherwise identity shortcut. + groups: default 32, group size for grouped convolution. + conv_shortcut: default True, use convolution shortcut if True, + otherwise identity shortcut. name: string, block label. - Returns: - Output tensor for the residual block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - preact = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(x) - preact = layers.Activation('relu', name=name + '_preact_relu')(preact) - - if conv_shortcut: - shortcut = layers.Conv2D( - 4 * filters, 1, strides=stride, name=name + '_0_conv')(preact) - else: - shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x - - x = layers.Conv2D( - filters, 1, strides=1, use_bias=False, name=name + '_1_conv')(preact) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) - x = layers.Activation('relu', name=name + '_1_relu')(x) - - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) - x = layers.Conv2D( - filters, - kernel_size, - strides=stride, - use_bias=False, - name=name + '_2_conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) - x = layers.Activation('relu', name=name + '_2_relu')(x) - - x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x) - x = layers.Add(name=name + '_out')([shortcut, x]) - return x + Returns: + Output tensor for the residual block. + """ + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + if conv_shortcut: + shortcut = layers.Conv2D( + (64 // groups) * filters, + 1, + strides=stride, + use_bias=False, + name=name + "_0_conv", + )(x) + shortcut = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_0_bn" + )(shortcut) + else: + shortcut = x + + x = layers.Conv2D(filters, 1, use_bias=False, name=name + "_1_conv")(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) + + c = filters // groups + x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + "_2_pad")(x) + x = layers.DepthwiseConv2D( + kernel_size, + strides=stride, + depth_multiplier=c, + use_bias=False, + name=name + "_2_conv", + )(x) + x_shape = backend.shape(x)[:-1] + x = backend.reshape(x, backend.concatenate([x_shape, (groups, c, c)])) + x = layers.Lambda( + lambda x: sum(x[:, :, :, :, i] for i in range(c)), + name=name + "_2_reduce", + )(x) + x = backend.reshape(x, backend.concatenate([x_shape, (filters,)])) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_2_bn" + )(x) + x = layers.Activation("relu", name=name + "_2_relu")(x) + x = layers.Conv2D( + (64 // groups) * filters, 1, use_bias=False, name=name + "_3_conv" + )(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + "_3_bn" + )(x) -def stack2(x, filters, blocks, stride1=2, name=None): - """A set of stacked residual blocks. + x = layers.Add(name=name + "_add")([shortcut, x]) + x = layers.Activation("relu", name=name + "_out")(x) + return x - Args: + +def stack3(x, filters, blocks, stride1=2, groups=32, name=None): + """A set of stacked residual blocks. + + Args: x: input tensor. filters: integer, filters of the bottleneck layer in a block. blocks: integer, blocks in the stacked blocks. stride1: default 2, stride of the first layer in the first block. + groups: default 32, group size for grouped convolution. name: string, stack label. - Returns: + Returns: Output tensor for the stacked blocks. - """ - x = block2(x, filters, conv_shortcut=True, name=name + '_block1') - for i in range(2, blocks): - x = block2(x, filters, name=name + '_block' + str(i)) - x = block2(x, filters, stride=stride1, name=name + '_block' + str(blocks)) - return x - - -def block3(x, - filters, - kernel_size=3, - stride=1, - groups=32, - conv_shortcut=True, - name=None): - """A residual block. - - Args: - x: input tensor. - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - groups: default 32, group size for grouped convolution. - conv_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. - - Returns: - Output tensor for the residual block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - if conv_shortcut: - shortcut = layers.Conv2D( - (64 // groups) * filters, - 1, - strides=stride, - use_bias=False, - name=name + '_0_conv')(x) - shortcut = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut) - else: - shortcut = x - - x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) - x = layers.Activation('relu', name=name + '_1_relu')(x) - - c = filters // groups - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) - x = layers.DepthwiseConv2D( - kernel_size, - strides=stride, - depth_multiplier=c, - use_bias=False, - name=name + '_2_conv')(x) - x_shape = backend.shape(x)[:-1] - x = backend.reshape(x, backend.concatenate([x_shape, (groups, c, c)])) - x = layers.Lambda( - lambda x: sum(x[:, :, :, :, i] for i in range(c)), - name=name + '_2_reduce')(x) - x = backend.reshape(x, backend.concatenate([x_shape, (filters,)])) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) - x = layers.Activation('relu', name=name + '_2_relu')(x) - - x = layers.Conv2D( - (64 // groups) * filters, 1, use_bias=False, name=name + '_3_conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x) - - x = layers.Add(name=name + '_add')([shortcut, x]) - x = layers.Activation('relu', name=name + '_out')(x) - return x - - -def stack3(x, filters, blocks, stride1=2, groups=32, name=None): - """A set of stacked residual blocks. - - Args: - x: input tensor. - filters: integer, filters of the bottleneck layer in a block. - blocks: integer, blocks in the stacked blocks. - stride1: default 2, stride of the first layer in the first block. - groups: default 32, group size for grouped convolution. - name: string, stack label. - - Returns: - Output tensor for the stacked blocks. - """ - x = block3(x, filters, stride=stride1, groups=groups, name=name + '_block1') - for i in range(2, blocks + 1): - x = block3( - x, - filters, - groups=groups, - conv_shortcut=False, - name=name + '_block' + str(i)) - return x - - -@keras_export('keras.applications.resnet50.ResNet50', - 'keras.applications.resnet.ResNet50', - 'keras.applications.ResNet50') -def ResNet50(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the ResNet50 architecture.""" - - def stack_fn(x): - x = stack1(x, 64, 3, stride1=1, name='conv2') - x = stack1(x, 128, 4, name='conv3') - x = stack1(x, 256, 6, name='conv4') - return stack1(x, 512, 3, name='conv5') - - return ResNet(stack_fn, False, True, 'resnet50', include_top, weights, - input_tensor, input_shape, pooling, classes, **kwargs) - - -@keras_export('keras.applications.resnet.ResNet101', - 'keras.applications.ResNet101') -def ResNet101(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the ResNet101 architecture.""" - - def stack_fn(x): - x = stack1(x, 64, 3, stride1=1, name='conv2') - x = stack1(x, 128, 4, name='conv3') - x = stack1(x, 256, 23, name='conv4') - return stack1(x, 512, 3, name='conv5') - - return ResNet(stack_fn, False, True, 'resnet101', include_top, weights, - input_tensor, input_shape, pooling, classes, **kwargs) - - -@keras_export('keras.applications.resnet.ResNet152', - 'keras.applications.ResNet152') -def ResNet152(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the ResNet152 architecture.""" - - def stack_fn(x): - x = stack1(x, 64, 3, stride1=1, name='conv2') - x = stack1(x, 128, 8, name='conv3') - x = stack1(x, 256, 36, name='conv4') - return stack1(x, 512, 3, name='conv5') - - return ResNet(stack_fn, False, True, 'resnet152', include_top, weights, - input_tensor, input_shape, pooling, classes, **kwargs) - - -@keras_export('keras.applications.resnet50.preprocess_input', - 'keras.applications.resnet.preprocess_input') + """ + x = block3(x, filters, stride=stride1, groups=groups, name=name + "_block1") + for i in range(2, blocks + 1): + x = block3( + x, + filters, + groups=groups, + conv_shortcut=False, + name=name + "_block" + str(i), + ) + return x + + +@keras_export( + "keras.applications.resnet50.ResNet50", + "keras.applications.resnet.ResNet50", + "keras.applications.ResNet50", +) +def ResNet50( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs, +): + """Instantiates the ResNet50 architecture.""" + + def stack_fn(x): + x = stack1(x, 64, 3, stride1=1, name="conv2") + x = stack1(x, 128, 4, name="conv3") + x = stack1(x, 256, 6, name="conv4") + return stack1(x, 512, 3, name="conv5") + + return ResNet( + stack_fn, + False, + True, + "resnet50", + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + **kwargs, + ) + + +@keras_export( + "keras.applications.resnet.ResNet101", "keras.applications.ResNet101" +) +def ResNet101( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs, +): + """Instantiates the ResNet101 architecture.""" + + def stack_fn(x): + x = stack1(x, 64, 3, stride1=1, name="conv2") + x = stack1(x, 128, 4, name="conv3") + x = stack1(x, 256, 23, name="conv4") + return stack1(x, 512, 3, name="conv5") + + return ResNet( + stack_fn, + False, + True, + "resnet101", + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + **kwargs, + ) + + +@keras_export( + "keras.applications.resnet.ResNet152", "keras.applications.ResNet152" +) +def ResNet152( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs, +): + """Instantiates the ResNet152 architecture.""" + + def stack_fn(x): + x = stack1(x, 64, 3, stride1=1, name="conv2") + x = stack1(x, 128, 8, name="conv3") + x = stack1(x, 256, 36, name="conv4") + return stack1(x, 512, 3, name="conv5") + + return ResNet( + stack_fn, + False, + True, + "resnet152", + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + **kwargs, + ) + + +@keras_export( + "keras.applications.resnet50.preprocess_input", + "keras.applications.resnet.preprocess_input", +) def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input( - x, data_format=data_format, mode='caffe') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="caffe" + ) -@keras_export('keras.applications.resnet50.decode_predictions', - 'keras.applications.resnet.decode_predictions') +@keras_export( + "keras.applications.resnet50.decode_predictions", + "keras.applications.resnet.decode_predictions", +) def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_CAFFE, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ DOC = """ @@ -580,6 +688,6 @@ def decode_predictions(preds, top=5): A Keras model instance. """ -setattr(ResNet50, '__doc__', ResNet50.__doc__ + DOC) -setattr(ResNet101, '__doc__', ResNet101.__doc__ + DOC) -setattr(ResNet152, '__doc__', ResNet152.__doc__ + DOC) +setattr(ResNet50, "__doc__", ResNet50.__doc__ + DOC) +setattr(ResNet101, "__doc__", ResNet101.__doc__ + DOC) +setattr(ResNet152, "__doc__", ResNet152.__doc__ + DOC) diff --git a/keras/applications/resnet_rs.py b/keras/applications/resnet_rs.py index 6b4baa117862..eafa79ec0c69 100644 --- a/keras/applications/resnet_rs.py +++ b/keras/applications/resnet_rs.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=missing-function-docstring + + """ResNet-RS models for Keras. Reference: @@ -21,7 +21,12 @@ https://arxiv.org/pdf/2103.07579.pdf) """ import sys -from typing import Callable, Dict, List, Union +from typing import Callable +from typing import Dict +from typing import List +from typing import Union + +import tensorflow.compat.v2 as tf from keras import backend from keras import layers @@ -29,13 +34,13 @@ from keras.engine import training from keras.utils import data_utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf - +# isort: off from tensorflow.python.util.tf_export import keras_export -BASE_WEIGHTS_URL = ("https://storage.googleapis.com/tensorflow/" - "keras-applications/resnet_rs/") +BASE_WEIGHTS_URL = ( + "https://storage.googleapis.com/tensorflow/keras-applications/resnet_rs/" +) WEIGHT_HASHES = { "resnet-rs-101-i160.h5": "544b3434d00efc199d66e9058c7f3379", @@ -73,130 +78,46 @@ } BLOCK_ARGS = { 50: [ - { - "input_filters": 64, - "num_repeats": 3 - }, - { - "input_filters": 128, - "num_repeats": 4 - }, - { - "input_filters": 256, - "num_repeats": 6 - }, - { - "input_filters": 512, - "num_repeats": 3 - }, + {"input_filters": 64, "num_repeats": 3}, + {"input_filters": 128, "num_repeats": 4}, + {"input_filters": 256, "num_repeats": 6}, + {"input_filters": 512, "num_repeats": 3}, ], 101: [ - { - "input_filters": 64, - "num_repeats": 3 - }, - { - "input_filters": 128, - "num_repeats": 4 - }, - { - "input_filters": 256, - "num_repeats": 23 - }, - { - "input_filters": 512, - "num_repeats": 3 - }, + {"input_filters": 64, "num_repeats": 3}, + {"input_filters": 128, "num_repeats": 4}, + {"input_filters": 256, "num_repeats": 23}, + {"input_filters": 512, "num_repeats": 3}, ], 152: [ - { - "input_filters": 64, - "num_repeats": 3 - }, - { - "input_filters": 128, - "num_repeats": 8 - }, - { - "input_filters": 256, - "num_repeats": 36 - }, - { - "input_filters": 512, - "num_repeats": 3 - }, + {"input_filters": 64, "num_repeats": 3}, + {"input_filters": 128, "num_repeats": 8}, + {"input_filters": 256, "num_repeats": 36}, + {"input_filters": 512, "num_repeats": 3}, ], 200: [ - { - "input_filters": 64, - "num_repeats": 3 - }, - { - "input_filters": 128, - "num_repeats": 24 - }, - { - "input_filters": 256, - "num_repeats": 36 - }, - { - "input_filters": 512, - "num_repeats": 3 - }, + {"input_filters": 64, "num_repeats": 3}, + {"input_filters": 128, "num_repeats": 24}, + {"input_filters": 256, "num_repeats": 36}, + {"input_filters": 512, "num_repeats": 3}, ], 270: [ - { - "input_filters": 64, - "num_repeats": 4 - }, - { - "input_filters": 128, - "num_repeats": 29 - }, - { - "input_filters": 256, - "num_repeats": 53 - }, - { - "input_filters": 512, - "num_repeats": 4 - }, + {"input_filters": 64, "num_repeats": 4}, + {"input_filters": 128, "num_repeats": 29}, + {"input_filters": 256, "num_repeats": 53}, + {"input_filters": 512, "num_repeats": 4}, ], 350: [ - { - "input_filters": 64, - "num_repeats": 4 - }, - { - "input_filters": 128, - "num_repeats": 36 - }, - { - "input_filters": 256, - "num_repeats": 72 - }, - { - "input_filters": 512, - "num_repeats": 4 - }, + {"input_filters": 64, "num_repeats": 4}, + {"input_filters": 128, "num_repeats": 36}, + {"input_filters": 256, "num_repeats": 72}, + {"input_filters": 512, "num_repeats": 4}, ], 420: [ - { - "input_filters": 64, - "num_repeats": 4 - }, - { - "input_filters": 128, - "num_repeats": 44 - }, - { - "input_filters": 256, - "num_repeats": 87 - }, - { - "input_filters": 512, - "num_repeats": 4 - }, + {"input_filters": 64, "num_repeats": 4}, + {"input_filters": 128, "num_repeats": 44}, + {"input_filters": 256, "num_repeats": 87}, + {"input_filters": 512, "num_repeats": 4}, ], } CONV_KERNEL_INITIALIZER = { @@ -204,7 +125,7 @@ "config": { "scale": 2.0, "mode": "fan_out", - "distribution": "truncated_normal" + "distribution": "truncated_normal", }, } @@ -272,10 +193,12 @@ specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - include_preprocessing: Boolean, whether to include the preprocessing layer - (`Rescaling`) at the bottom of the network. Defaults to `True`. - Note: Input image is normalized by ImageNet mean and standard deviation. + `classifier_activation=None` to return the logits of the "top" + layer. + include_preprocessing: Boolean, whether to include the preprocessing + layer (`Rescaling`) at the bottom of the network. Note: Input image + is normalized by ImageNet mean and standard deviation. + Defaults to `True`. Returns: A `keras.Model` instance. @@ -283,25 +206,25 @@ def Conv2DFixedPadding(filters, kernel_size, strides, name=None): - """Conv2D block with fixed padding.""" - if name is None: - counter = backend.get_uid("conv_") - name = f"conv_{counter}" - - def apply(inputs): - if strides > 1: - inputs = fixed_padding(inputs, kernel_size) - return layers.Conv2D( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding="same" if strides == 1 else "valid", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name, - )(inputs) + """Conv2D block with fixed padding.""" + if name is None: + counter = backend.get_uid("conv_") + name = f"conv_{counter}" + + def apply(inputs): + if strides > 1: + inputs = fixed_padding(inputs, kernel_size) + return layers.Conv2D( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding="same" if strides == 1 else "valid", + use_bias=False, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=name, + )(inputs) - return apply + return apply def STEM( @@ -310,111 +233,112 @@ def STEM( activation: str = "relu", name=None, ): - """ResNet-D type STEM block.""" - if name is None: - counter = backend.get_uid("stem_") - name = f"stem_{counter}" + """ResNet-D type STEM block.""" + if name is None: + counter = backend.get_uid("stem_") + name = f"stem_{counter}" - def apply(inputs): - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + def apply(inputs): + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - # First stem block - x = Conv2DFixedPadding( - filters=32, - kernel_size=3, - strides=2, - name=name + "_stem_conv_1" - )(inputs) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_stem_batch_norm_1", - )(x) - x = layers.Activation(activation, name=name + "_stem_act_1")(x) - - # Second stem block - x = Conv2DFixedPadding( - filters=32, kernel_size=3, strides=1, name=name + "_stem_conv_2")(x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_stem_batch_norm_2", - )(x) - x = layers.Activation(activation, name=name + "_stem_act_2")(x) - - # Final Stem block: - x = Conv2DFixedPadding( - filters=64, kernel_size=3, strides=1, name=name + "_stem_conv_3")(x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_stem_batch_norm_3", - )(x) - x = layers.Activation(activation, name=name + "_stem_act_3")(x) - - # Replace stem max pool: - x = Conv2DFixedPadding( - filters=64, kernel_size=3, strides=2, name=name + "_stem_conv_4")(x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_stem_batch_norm_4", - )(x) - x = layers.Activation(activation, name=name + "_stem_act_4")(x) - return x - - return apply - - -def SE(in_filters: int, - se_ratio: float = 0.25, - expand_ratio: int = 1, - name=None): - """Squeeze and Excitation block.""" - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - if name is None: - counter = backend.get_uid("se_") - name = f"se_{counter}" - - def apply(inputs): - x = layers.GlobalAveragePooling2D(name=name + "_se_squeeze")(inputs) - if bn_axis == 1: - se_shape = (x.shape[-1], 1, 1) - else: - se_shape = (1, 1, x.shape[-1]) - x = layers.Reshape(se_shape, name=name + "_se_reshape")(x) - - num_reduced_filters = max(1, int(in_filters * 4 * se_ratio)) - - x = layers.Conv2D( - filters=num_reduced_filters, - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - use_bias=True, - activation="relu", - name=name + "_se_reduce", - )(x) - - x = layers.Conv2D( - filters=4 * in_filters * expand_ratio, # Expand ratio is 1 by default - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", - use_bias=True, - activation="sigmoid", - name=name + "_se_expand", - )(x) + # First stem block + x = Conv2DFixedPadding( + filters=32, kernel_size=3, strides=2, name=name + "_stem_conv_1" + )(inputs) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_stem_batch_norm_1", + )(x) + x = layers.Activation(activation, name=name + "_stem_act_1")(x) + + # Second stem block + x = Conv2DFixedPadding( + filters=32, kernel_size=3, strides=1, name=name + "_stem_conv_2" + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_stem_batch_norm_2", + )(x) + x = layers.Activation(activation, name=name + "_stem_act_2")(x) + + # Final Stem block: + x = Conv2DFixedPadding( + filters=64, kernel_size=3, strides=1, name=name + "_stem_conv_3" + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_stem_batch_norm_3", + )(x) + x = layers.Activation(activation, name=name + "_stem_act_3")(x) + + # Replace stem max pool: + x = Conv2DFixedPadding( + filters=64, kernel_size=3, strides=2, name=name + "_stem_conv_4" + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_stem_batch_norm_4", + )(x) + x = layers.Activation(activation, name=name + "_stem_act_4")(x) + return x + + return apply + + +def SE( + in_filters: int, se_ratio: float = 0.25, expand_ratio: int = 1, name=None +): + """Squeeze and Excitation block.""" + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + if name is None: + counter = backend.get_uid("se_") + name = f"se_{counter}" + + def apply(inputs): + x = layers.GlobalAveragePooling2D(name=name + "_se_squeeze")(inputs) + if bn_axis == 1: + se_shape = (x.shape[-1], 1, 1) + else: + se_shape = (1, 1, x.shape[-1]) + x = layers.Reshape(se_shape, name=name + "_se_reshape")(x) + + num_reduced_filters = max(1, int(in_filters * 4 * se_ratio)) + + x = layers.Conv2D( + filters=num_reduced_filters, + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + use_bias=True, + activation="relu", + name=name + "_se_reduce", + )(x) + + x = layers.Conv2D( + filters=4 + * in_filters + * expand_ratio, # Expand ratio is 1 by default + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="same", + use_bias=True, + activation="sigmoid", + name=name + "_se_expand", + )(x) - return layers.multiply([inputs, x], name=name + "_se_excite") + return layers.multiply([inputs, x], name=name + "_se_excite") - return apply + return apply def BottleneckBlock( @@ -428,98 +352,100 @@ def BottleneckBlock( survival_probability: float = 0.8, name=None, ): - """Bottleneck block variant for residual networks with BN.""" - if name is None: - counter = backend.get_uid("block_0_") - name = f"block_0_{counter}" - - def apply(inputs): - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - - shortcut = inputs - - if use_projection: - filters_out = filters * 4 - if strides == 2: - shortcut = layers.AveragePooling2D( - pool_size=(2, 2), - strides=(2, 2), - padding="same", - name=name + "_projection_pooling", + """Bottleneck block variant for residual networks with BN.""" + if name is None: + counter = backend.get_uid("block_0_") + name = f"block_0_{counter}" + + def apply(inputs): + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + shortcut = inputs + + if use_projection: + filters_out = filters * 4 + if strides == 2: + shortcut = layers.AveragePooling2D( + pool_size=(2, 2), + strides=(2, 2), + padding="same", + name=name + "_projection_pooling", + )(inputs) + shortcut = Conv2DFixedPadding( + filters=filters_out, + kernel_size=1, + strides=1, + name=name + "_projection_conv", + )(shortcut) + else: + shortcut = Conv2DFixedPadding( + filters=filters_out, + kernel_size=1, + strides=strides, + name=name + "_projection_conv", + )(inputs) + + shortcut = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_projection_batch_norm", + )(shortcut) + + # First conv layer: + x = Conv2DFixedPadding( + filters=filters, kernel_size=1, strides=1, name=name + "_conv_1" )(inputs) - shortcut = Conv2DFixedPadding( - filters=filters_out, - kernel_size=1, - strides=1, - name=name + "_projection_conv", - )(shortcut) - else: - shortcut = Conv2DFixedPadding( - filters=filters_out, - kernel_size=1, + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "batch_norm_1", + )(x) + x = layers.Activation(activation, name=name + "_act_1")(x) + + # Second conv layer: + x = Conv2DFixedPadding( + filters=filters, + kernel_size=3, strides=strides, - name=name + "_projection_conv", - )(inputs) - - shortcut = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_projection_batch_norm", - )(shortcut) - - # First conv layer: - x = Conv2DFixedPadding( - filters=filters, - kernel_size=1, - strides=1, - name=name + "_conv_1" - )(inputs) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "batch_norm_1", - )(x) - x = layers.Activation(activation, name=name + "_act_1")(x) - - # Second conv layer: - x = Conv2DFixedPadding( - filters=filters, kernel_size=3, strides=strides, name=name + "_conv_2")( - x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_batch_norm_2", - )(x) - x = layers.Activation(activation, name=name + "_act_2")(x) - - # Third conv layer: - x = Conv2DFixedPadding( - filters=filters * 4, kernel_size=1, strides=1, name=name + "_conv_3")(x) - x = layers.BatchNormalization( - axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + "_batch_norm_3", - )(x) - - if 0 < se_ratio < 1: - x = SE(filters, se_ratio=se_ratio, name=name + "_se")(x) - - # Drop connect - if survival_probability: - x = layers.Dropout( - survival_probability, - noise_shape=(None, 1, 1, 1), - name=name + "_drop")(x) - - x = layers.Add()([x, shortcut]) - - return layers.Activation(activation, name=name + "_output_act")(x) - - return apply + name=name + "_conv_2", + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_batch_norm_2", + )(x) + x = layers.Activation(activation, name=name + "_act_2")(x) + + # Third conv layer: + x = Conv2DFixedPadding( + filters=filters * 4, kernel_size=1, strides=1, name=name + "_conv_3" + )(x) + x = layers.BatchNormalization( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + "_batch_norm_3", + )(x) + + if 0 < se_ratio < 1: + x = SE(filters, se_ratio=se_ratio, name=name + "_se")(x) + + # Drop connect + if survival_probability: + x = layers.Dropout( + survival_probability, + noise_shape=(None, 1, 1, 1), + name=name + "_drop", + )(x) + + x = layers.Add()([x, shortcut]) + + return layers.Activation(activation, name=name + "_output_act")(x) + + return apply def BlockGroup( @@ -533,65 +459,68 @@ def BlockGroup( survival_probability: float = 0.8, name=None, ): - """Create one group of blocks for the ResNet model.""" - if name is None: - counter = backend.get_uid("block_group_") - name = f"block_group_{counter}" - - def apply(inputs): - # Only the first block per block_group uses projection shortcut and strides. - x = BottleneckBlock( - filters=filters, - strides=strides, - use_projection=True, - se_ratio=se_ratio, - bn_epsilon=bn_epsilon, - bn_momentum=bn_momentum, - activation=activation, - survival_probability=survival_probability, - name=name + "_block_0_", - )(inputs) + """Create one group of blocks for the ResNet model.""" + if name is None: + counter = backend.get_uid("block_group_") + name = f"block_group_{counter}" + + def apply(inputs): + # Only the first block per block_group uses projection shortcut and + # strides. + x = BottleneckBlock( + filters=filters, + strides=strides, + use_projection=True, + se_ratio=se_ratio, + bn_epsilon=bn_epsilon, + bn_momentum=bn_momentum, + activation=activation, + survival_probability=survival_probability, + name=name + "_block_0_", + )(inputs) - for i in range(1, num_repeats): - x = BottleneckBlock( - filters=filters, - strides=1, - use_projection=False, - se_ratio=se_ratio, - activation=activation, - bn_epsilon=bn_epsilon, - bn_momentum=bn_momentum, - survival_probability=survival_probability, - name=name + f"_block_{i}_", - )(x) - return x + for i in range(1, num_repeats): + x = BottleneckBlock( + filters=filters, + strides=1, + use_projection=False, + se_ratio=se_ratio, + activation=activation, + bn_epsilon=bn_epsilon, + bn_momentum=bn_momentum, + survival_probability=survival_probability, + name=name + f"_block_{i}_", + )(x) + return x - return apply + return apply def get_survival_probability(init_rate, block_num, total_blocks): - """Get survival probability based on block number and initial rate.""" - return init_rate * float(block_num) / total_blocks + """Get survival probability based on block number and initial rate.""" + return init_rate * float(block_num) / total_blocks def allow_bigger_recursion(target_limit: int): - """Increase default recursion limit to create larger models.""" - current_limit = sys.getrecursionlimit() - if current_limit < target_limit: - sys.setrecursionlimit(target_limit) + """Increase default recursion limit to create larger models.""" + current_limit = sys.getrecursionlimit() + if current_limit < target_limit: + sys.setrecursionlimit(target_limit) def fixed_padding(inputs, kernel_size): - """Pad the input along the spatial dimensions independently of input size.""" - pad_total = kernel_size - 1 - pad_beg = pad_total // 2 - pad_end = pad_total - pad_beg - - # Use ZeroPadding as to avoid TFOpLambda layer - padded_inputs = layers.ZeroPadding2D( - padding=((pad_beg, pad_end), (pad_beg, pad_end)))(inputs) + """Pad the input along the spatial dimensions independently of input + size.""" + pad_total = kernel_size - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + + # Use ZeroPadding as to avoid TFOpLambda layer + padded_inputs = layers.ZeroPadding2D( + padding=((pad_beg, pad_end), (pad_beg, pad_end)) + )(inputs) - return padded_inputs + return padded_inputs def ResNetRS( @@ -610,193 +539,204 @@ def ResNetRS( weights="imagenet", input_tensor=None, classes=1000, - # pylint: disable=g-bare-generic classifier_activation: Union[str, Callable] = "softmax", include_preprocessing=True, ): - """Build Resnet-RS model, given provided parameters. - - Args: - depth: Depth of ResNet network. - input_shape: optional shape tuple. It should have exactly 3 inputs - channels, and width and height should be no smaller than 32. E.g. (200, - 200, 3) would be one valid value. - bn_momentum: Momentum parameter for Batch Normalization layers. - bn_epsilon: Epsilon parameter for Batch Normalization layers. - activation: activation function. - se_ratio: Squeeze and Excitation layer ratio. - dropout_rate: dropout rate before final classifier layer. - drop_connect_rate: dropout rate at skip connections. - include_top: whether to include the fully-connected layer at the top of - the network. - block_args: list of dicts, parameters to construct block modules. - model_name: name of the model. - pooling: optional pooling mode for feature extraction when `include_top` - is `False`. - `None` means that the output of the model will be the 4D - tensor output of the last convolutional layer. - `avg` means that global - average pooling will be applied to the output of the last convolutional - layer, and thus the output of the model will be a 2D tensor. - `max` - means that global max pooling will be applied. - weights: one of `None` (random initialization), `'imagenet'` (pre-training - on ImageNet), or the path to the weights file to be loaded. Note- one - model can have multiple imagenet variants depending on input shape it - was trained with. For input_shape 224x224 pass `imagenet-i224` as - argument. By default, highest input shape weights are downloaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to - use as image input for the model. - classes: optional number of classes to classify images into, only to be - specified if `include_top` is True, and if no `weights` argument is - specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - include_preprocessing: Boolean, whether to include the preprocessing layer - (`Rescaling`) at the bottom of the network. Defaults to `True`. Note- - Input image is normalized by ImageNet mean and standard deviation. - - Returns: - A `tf.keras.Model` instance. - - Raises: - ValueError: in case of invalid argument for `weights`, or invalid input - shape. - ValueError: if `classifier_activation` is not `softmax` or `None` when - using a pretrained top layer. - """ - # Validate parameters - available_weight_variants = DEPTH_TO_WEIGHT_VARIANTS[depth] - if weights == "imagenet": - max_input_shape = max(available_weight_variants) - # `imagenet` argument without explicit weights input size. - # Picking weights trained with biggest available shape - weights = f"{weights}-i{max_input_shape}" - - weights_allow_list = [f"imagenet-i{x}" for x in available_weight_variants] - if not (weights in {*weights_allow_list, None} or - tf.io.gfile.exists(weights)): - raise ValueError( - "The `weights` argument should be either " - "`None` (random initialization), `'imagenet'` " - "(pre-training on ImageNet, with highest available input shape)," - " or the path to the weights file to be loaded. " - f"For ResNetRS{depth} the following weight variants are " - f"available {weights_allow_list} (default=highest)." - f" Received weights={weights}") - - if weights in weights_allow_list and include_top and classes != 1000: - raise ValueError( - f"If using `weights` as `'imagenet'` or any of {weights_allow_list} " - f"with `include_top` as true, `classes` should be 1000. " - f"Received classes={classes}") - - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights, - ) - # Define input tensor - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 - - x = img_input - - if include_preprocessing: - num_channels = input_shape[bn_axis - 1] - x = layers.Rescaling(scale=1.0 / 255)(x) - if num_channels == 3: - x = layers.Normalization( - mean=[0.485, 0.456, 0.406], - variance=[0.229**2, 0.224**2, 0.225**2], - axis=bn_axis, - )(x) - - # Build stem - x = STEM( - bn_momentum=bn_momentum, bn_epsilon=bn_epsilon, activation=activation)(x) - - # Build blocks - if block_args is None: - block_args = BLOCK_ARGS[depth] - - for i, args in enumerate(block_args): - survival_probability = get_survival_probability( - init_rate=drop_connect_rate, - block_num=i + 2, - total_blocks=len(block_args) + 1, + """Build Resnet-RS model, given provided parameters. + + Args: + depth: Depth of ResNet network. + input_shape: optional shape tuple. It should have exactly 3 inputs + channels, and width and height should be no smaller than 32. E.g. + (200, 200, 3) would be one valid value. + bn_momentum: Momentum parameter for Batch Normalization layers. + bn_epsilon: Epsilon parameter for Batch Normalization layers. + activation: activation function. + se_ratio: Squeeze and Excitation layer ratio. + dropout_rate: dropout rate before final classifier layer. + drop_connect_rate: dropout rate at skip connections. + include_top: whether to include the fully-connected layer at the top of + the network. + block_args: list of dicts, parameters to construct block modules. + model_name: name of the model. + pooling: optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional layer. + - `avg` means that global average pooling will be applied to the + output of the last convolutional layer, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + weights: one of `None` (random initialization), `'imagenet'` + (pre-training on ImageNet), or the path to the weights file to be + loaded. Note- one model can have multiple imagenet variants depending + on input shape it was trained with. For input_shape 224x224 pass + `imagenet-i224` as argument. By default, highest input shape weights + are downloaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to + use as image input for the model. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is True, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + include_preprocessing: Boolean, whether to include the preprocessing + layer (`Rescaling`) at the bottom of the network. Note - Input image + is normalized by ImageNet mean and standard deviation. + Defaults to `True`. + + + Returns: + A `tf.keras.Model` instance. + + Raises: + ValueError: in case of invalid argument for `weights`, or invalid input + shape. + ValueError: if `classifier_activation` is not `softmax` or `None` when + using a pretrained top layer. + """ + # Validate parameters + available_weight_variants = DEPTH_TO_WEIGHT_VARIANTS[depth] + if weights == "imagenet": + max_input_shape = max(available_weight_variants) + # `imagenet` argument without explicit weights input size. + # Picking weights trained with biggest available shape + weights = f"{weights}-i{max_input_shape}" + + weights_allow_list = [f"imagenet-i{x}" for x in available_weight_variants] + if not ( + weights in {*weights_allow_list, None} or tf.io.gfile.exists(weights) + ): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `'imagenet'` " + "(pre-training on ImageNet, with highest available input shape)," + " or the path to the weights file to be loaded. " + f"For ResNetRS{depth} the following weight variants are " + f"available {weights_allow_list} (default=highest)." + f" Received weights={weights}" + ) + + if weights in weights_allow_list and include_top and classes != 1000: + raise ValueError( + "If using `weights` as `'imagenet'` or any " + f"of {weights_allow_list} " + "with `include_top` as true, `classes` should be 1000. " + f"Received classes={classes}" + ) + + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=224, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, ) + # Define input tensor + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor - x = BlockGroup( - filters=args["input_filters"], - activation=activation, - strides=(1 if i == 0 else 2), - num_repeats=args["num_repeats"], - se_ratio=se_ratio, - bn_momentum=bn_momentum, - bn_epsilon=bn_epsilon, - survival_probability=survival_probability, - name=f"BlockGroup{i + 2}_", + bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 + + x = img_input + + if include_preprocessing: + num_channels = input_shape[bn_axis - 1] + x = layers.Rescaling(scale=1.0 / 255)(x) + if num_channels == 3: + x = layers.Normalization( + mean=[0.485, 0.456, 0.406], + variance=[0.229**2, 0.224**2, 0.225**2], + axis=bn_axis, + )(x) + + # Build stem + x = STEM( + bn_momentum=bn_momentum, bn_epsilon=bn_epsilon, activation=activation )(x) - # Build head: - if include_top: - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - if dropout_rate > 0: - x = layers.Dropout(dropout_rate, name="top_dropout")(x) - - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense( - classes, activation=classifier_activation, name="predictions")(x) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D(name="max_pool")(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = training.Model(inputs, x, name=model_name) - - # Download weights - if weights in weights_allow_list: - weights_input_shape = weights.split("-")[-1] # e. g. "i160" - weights_name = f"{model_name}-{weights_input_shape}" - if not include_top: - weights_name += "_notop" - - filename = f"{weights_name}.h5" - download_url = BASE_WEIGHTS_URL + filename - weights_path = data_utils.get_file( - fname=filename, - origin=download_url, - cache_subdir="models", - file_hash=WEIGHT_HASHES[filename], - ) - model.load_weights(weights_path) + # Build blocks + if block_args is None: + block_args = BLOCK_ARGS[depth] + + for i, args in enumerate(block_args): + survival_probability = get_survival_probability( + init_rate=drop_connect_rate, + block_num=i + 2, + total_blocks=len(block_args) + 1, + ) + + x = BlockGroup( + filters=args["input_filters"], + activation=activation, + strides=(1 if i == 0 else 2), + num_repeats=args["num_repeats"], + se_ratio=se_ratio, + bn_momentum=bn_momentum, + bn_epsilon=bn_epsilon, + survival_probability=survival_probability, + name=f"BlockGroup{i + 2}_", + )(x) + + # Build head: + if include_top: + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + if dropout_rate > 0: + x = layers.Dropout(dropout_rate, name="top_dropout")(x) + + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = training.Model(inputs, x, name=model_name) - elif weights is not None: - model.load_weights(weights) + # Download weights + if weights in weights_allow_list: + weights_input_shape = weights.split("-")[-1] # e. g. "i160" + weights_name = f"{model_name}-{weights_input_shape}" + if not include_top: + weights_name += "_notop" - return model + filename = f"{weights_name}.h5" + download_url = BASE_WEIGHTS_URL + filename + weights_path = data_utils.get_file( + fname=filename, + origin=download_url, + cache_subdir="models", + file_hash=WEIGHT_HASHES[filename], + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) -@keras_export("keras.applications.resnet_rs.ResNetRS50", - "keras.applications.ResNetRS50") + return model + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS50", "keras.applications.ResNetRS50" +) def ResNetRS50( include_top=True, weights="imagenet", @@ -807,25 +747,26 @@ def ResNetRS50( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS50 model.""" - return ResNetRS( - depth=50, - include_top=include_top, - drop_connect_rate=0.0, - dropout_rate=0.25, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-50", - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.resnet_rs.ResNetRS101", - "keras.applications.ResNetRS101") + """Build ResNet-RS50 model.""" + return ResNetRS( + depth=50, + include_top=include_top, + drop_connect_rate=0.0, + dropout_rate=0.25, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-50", + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS101", "keras.applications.ResNetRS101" +) def ResNetRS101( include_top=True, weights="imagenet", @@ -836,25 +777,26 @@ def ResNetRS101( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS101 model.""" - return ResNetRS( - depth=101, - include_top=include_top, - drop_connect_rate=0.0, - dropout_rate=0.25, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-101", - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.resnet_rs.ResNetRS152", - "keras.applications.ResNetRS152") + """Build ResNet-RS101 model.""" + return ResNetRS( + depth=101, + include_top=include_top, + drop_connect_rate=0.0, + dropout_rate=0.25, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-101", + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS152", "keras.applications.ResNetRS152" +) def ResNetRS152( include_top=True, weights="imagenet", @@ -865,25 +807,26 @@ def ResNetRS152( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS152 model.""" - return ResNetRS( - depth=152, - include_top=include_top, - drop_connect_rate=0.0, - dropout_rate=0.25, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-152", - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.resnet_rs.ResNetRS200", - "keras.applications.ResNetRS200") + """Build ResNet-RS152 model.""" + return ResNetRS( + depth=152, + include_top=include_top, + drop_connect_rate=0.0, + dropout_rate=0.25, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-152", + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS200", "keras.applications.ResNetRS200" +) def ResNetRS200( include_top=True, weights="imagenet", @@ -894,25 +837,26 @@ def ResNetRS200( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS200 model.""" - return ResNetRS( - depth=200, - include_top=include_top, - drop_connect_rate=0.1, - dropout_rate=0.25, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-200", - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.resnet_rs.ResNetRS270", - "keras.applications.ResNetRS270") + """Build ResNet-RS200 model.""" + return ResNetRS( + depth=200, + include_top=include_top, + drop_connect_rate=0.1, + dropout_rate=0.25, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-200", + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS270", "keras.applications.ResNetRS270" +) def ResNetRS270( include_top=True, weights="imagenet", @@ -923,26 +867,27 @@ def ResNetRS270( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS-270 model.""" - allow_bigger_recursion(1300) - return ResNetRS( - depth=270, - include_top=include_top, - drop_connect_rate=0.1, - dropout_rate=0.25, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-270", - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.resnet_rs.ResNetRS350", - "keras.applications.ResNetRS350") + """Build ResNet-RS-270 model.""" + allow_bigger_recursion(1300) + return ResNetRS( + depth=270, + include_top=include_top, + drop_connect_rate=0.1, + dropout_rate=0.25, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-270", + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS350", "keras.applications.ResNetRS350" +) def ResNetRS350( include_top=True, weights="imagenet", @@ -953,26 +898,27 @@ def ResNetRS350( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS350 model.""" - allow_bigger_recursion(1500) - return ResNetRS( - depth=350, - include_top=include_top, - drop_connect_rate=0.1, - dropout_rate=0.4, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-350", - include_preprocessing=include_preprocessing, - ) - - -@keras_export("keras.applications.resnet_rs.ResNetRS420", - "keras.applications.ResNetRS420") + """Build ResNet-RS350 model.""" + allow_bigger_recursion(1500) + return ResNetRS( + depth=350, + include_top=include_top, + drop_connect_rate=0.1, + dropout_rate=0.4, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-350", + include_preprocessing=include_preprocessing, + ) + + +@keras_export( + "keras.applications.resnet_rs.ResNetRS420", "keras.applications.ResNetRS420" +) def ResNetRS420( include_top=True, weights="imagenet", @@ -983,56 +929,56 @@ def ResNetRS420( classifier_activation="softmax", include_preprocessing=True, ): - """Build ResNet-RS420 model.""" - allow_bigger_recursion(1800) - return ResNetRS( - depth=420, - include_top=include_top, - dropout_rate=0.4, - drop_connect_rate=0.1, - weights=weights, - classes=classes, - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - classifier_activation=classifier_activation, - model_name="resnet-rs-420", - include_preprocessing=include_preprocessing, - ) - - -# pylint: disable=unused-argument + """Build ResNet-RS420 model.""" + allow_bigger_recursion(1800) + return ResNetRS( + depth=420, + include_top=include_top, + dropout_rate=0.4, + drop_connect_rate=0.1, + weights=weights, + classes=classes, + input_shape=input_shape, + input_tensor=input_tensor, + pooling=pooling, + classifier_activation=classifier_activation, + model_name="resnet-rs-420", + include_preprocessing=include_preprocessing, + ) + + @keras_export("keras.applications.resnet_rs.preprocess_input") def preprocess_input(x, data_format=None): - """A placeholder method for backward compatibility. + """A placeholder method for backward compatibility. - The preprocessing logic has been included in the ResnetRS model - implementation. Users are no longer required to call this method to - normalize - the input data. This method does nothing and only kept as a placeholder to - align the API surface between old and new version of model. + The preprocessing logic has been included in the ResnetRS model + implementation. Users are no longer required to call this method to + normalize + the input data. This method does nothing and only kept as a placeholder to + align the API surface between old and new version of model. - Args: - x: A floating point `numpy.array` or a `tf.Tensor`. - data_format: Optional data format of the image tensor/array. Defaults to - None, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, - it defaults to "channels_last").{mode} + Args: + x: A floating point `numpy.array` or a `tf.Tensor`. + data_format: Optional data format of the image tensor/array. `None` means + the global setting `tf.keras.backend.image_data_format()` is used + (unless you changed it, it uses "channels_last"). + Defaults to `None`. - Returns: - Unchanged `numpy.array` or `tf.Tensor`. - """ - return x + Returns: + Unchanged `numpy.array` or `tf.Tensor`. + """ + return x @keras_export("keras.applications.resnet_rs.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ ResNetRS50.__doc__ = BASE_DOCSTRING.format(name="ResNetRS50") +ResNetRS101.__doc__ = BASE_DOCSTRING.format(name="ResNetRS101") ResNetRS152.__doc__ = BASE_DOCSTRING.format(name="ResNetRS152") ResNetRS200.__doc__ = BASE_DOCSTRING.format(name="ResNetRS200") ResNetRS270.__doc__ = BASE_DOCSTRING.format(name="ResNetRS270") diff --git a/keras/applications/resnet_v2.py b/keras/applications/resnet_v2.py index 01c327ae326c..98117d6acbd6 100644 --- a/keras/applications/resnet_v2.py +++ b/keras/applications/resnet_v2.py @@ -12,134 +12,150 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """ResNet v2 models for Keras. Reference: - - [Identity Mappings in Deep Residual Networks] - (https://arxiv.org/abs/1603.05027) (CVPR 2016) + - [Identity Mappings in Deep Residual Networks]( + https://arxiv.org/abs/1603.05027) (CVPR 2016) """ from keras.applications import imagenet_utils from keras.applications import resnet + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.applications.resnet_v2.ResNet50V2', - 'keras.applications.ResNet50V2') +@keras_export( + "keras.applications.resnet_v2.ResNet50V2", "keras.applications.ResNet50V2" +) def ResNet50V2( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the ResNet50V2 architecture.""" - def stack_fn(x): - x = resnet.stack2(x, 64, 3, name='conv2') - x = resnet.stack2(x, 128, 4, name='conv3') - x = resnet.stack2(x, 256, 6, name='conv4') - return resnet.stack2(x, 512, 3, stride1=1, name='conv5') - - return resnet.ResNet( - stack_fn, - True, - True, - 'resnet50v2', - include_top, - weights, - input_tensor, - input_shape, - pooling, - classes, - classifier_activation=classifier_activation) - - -@keras_export('keras.applications.resnet_v2.ResNet101V2', - 'keras.applications.ResNet101V2') + classifier_activation="softmax", +): + """Instantiates the ResNet50V2 architecture.""" + + def stack_fn(x): + x = resnet.stack2(x, 64, 3, name="conv2") + x = resnet.stack2(x, 128, 4, name="conv3") + x = resnet.stack2(x, 256, 6, name="conv4") + return resnet.stack2(x, 512, 3, stride1=1, name="conv5") + + return resnet.ResNet( + stack_fn, + True, + True, + "resnet50v2", + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.resnet_v2.ResNet101V2", "keras.applications.ResNet101V2" +) def ResNet101V2( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the ResNet101V2 architecture.""" - def stack_fn(x): - x = resnet.stack2(x, 64, 3, name='conv2') - x = resnet.stack2(x, 128, 4, name='conv3') - x = resnet.stack2(x, 256, 23, name='conv4') - return resnet.stack2(x, 512, 3, stride1=1, name='conv5') - - return resnet.ResNet( - stack_fn, - True, - True, - 'resnet101v2', - include_top, - weights, - input_tensor, - input_shape, - pooling, - classes, - classifier_activation=classifier_activation) - - -@keras_export('keras.applications.resnet_v2.ResNet152V2', - 'keras.applications.ResNet152V2') + classifier_activation="softmax", +): + """Instantiates the ResNet101V2 architecture.""" + + def stack_fn(x): + x = resnet.stack2(x, 64, 3, name="conv2") + x = resnet.stack2(x, 128, 4, name="conv3") + x = resnet.stack2(x, 256, 23, name="conv4") + return resnet.stack2(x, 512, 3, stride1=1, name="conv5") + + return resnet.ResNet( + stack_fn, + True, + True, + "resnet101v2", + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation=classifier_activation, + ) + + +@keras_export( + "keras.applications.resnet_v2.ResNet152V2", "keras.applications.ResNet152V2" +) def ResNet152V2( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the ResNet152V2 architecture.""" - def stack_fn(x): - x = resnet.stack2(x, 64, 3, name='conv2') - x = resnet.stack2(x, 128, 8, name='conv3') - x = resnet.stack2(x, 256, 36, name='conv4') - return resnet.stack2(x, 512, 3, stride1=1, name='conv5') - - return resnet.ResNet( - stack_fn, - True, - True, - 'resnet152v2', - include_top, - weights, - input_tensor, - input_shape, - pooling, - classes, - classifier_activation=classifier_activation) - - -@keras_export('keras.applications.resnet_v2.preprocess_input') + classifier_activation="softmax", +): + """Instantiates the ResNet152V2 architecture.""" + + def stack_fn(x): + x = resnet.stack2(x, 64, 3, name="conv2") + x = resnet.stack2(x, 128, 8, name="conv3") + x = resnet.stack2(x, 256, 36, name="conv4") + return resnet.stack2(x, 512, 3, stride1=1, name="conv5") + + return resnet.ResNet( + stack_fn, + True, + True, + "resnet152v2", + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation=classifier_activation, + ) + + +@keras_export("keras.applications.resnet_v2.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input( - x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.resnet_v2.decode_predictions') +@keras_export("keras.applications.resnet_v2.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ DOC = """ Reference: - - [Identity Mappings in Deep Residual Networks] - (https://arxiv.org/abs/1603.05027) (CVPR 2016) + - [Identity Mappings in Deep Residual Networks]( + https://arxiv.org/abs/1603.05027) (CVPR 2016) For image classification use cases, see [this page for detailed examples]( @@ -193,6 +209,6 @@ def decode_predictions(preds, top=5): A `keras.Model` instance. """ -setattr(ResNet50V2, '__doc__', ResNet50V2.__doc__ + DOC) -setattr(ResNet101V2, '__doc__', ResNet101V2.__doc__ + DOC) -setattr(ResNet152V2, '__doc__', ResNet152V2.__doc__ + DOC) +setattr(ResNet50V2, "__doc__", ResNet50V2.__doc__ + DOC) +setattr(ResNet101V2, "__doc__", ResNet101V2.__doc__ + DOC) +setattr(ResNet152V2, "__doc__", ResNet152V2.__doc__ + DOC) diff --git a/keras/applications/vgg16.py b/keras/applications/vgg16.py index adf633a777f3..f7eebee3d96d 100644 --- a/keras/applications/vgg16.py +++ b/keras/applications/vgg16.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """VGG16 model for Keras. Reference: @@ -28,218 +28,245 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export -WEIGHTS_PATH = ('https://storage.googleapis.com/tensorflow/keras-applications/' - 'vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5') -WEIGHTS_PATH_NO_TOP = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/vgg16/' - 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5') +WEIGHTS_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/" + "vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5" +) +WEIGHTS_PATH_NO_TOP = ( + "https://storage.googleapis.com/tensorflow/" + "keras-applications/vgg16/" + "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5" +) layers = VersionAwareLayers() -@keras_export('keras.applications.vgg16.VGG16', 'keras.applications.VGG16') +@keras_export("keras.applications.vgg16.VGG16", "keras.applications.VGG16") def VGG16( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the VGG16 model. - - Reference: - - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( - https://arxiv.org/abs/1409.1556) (ICLR 2015) - - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - The default input size for this model is 224x224. - - Note: each Keras Application expects a specific kind of input preprocessing. - For VGG16, call `tf.keras.applications.vgg16.preprocess_input` on your - inputs before passing them to the model. - `vgg16.preprocess_input` will convert the input images from RGB to BGR, - then will zero-center each color channel with respect to the ImageNet dataset, - without scaling. - - Args: - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` - (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 input channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A `keras.Model` instance. - """ - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError( - 'The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded. Received: ' - f'weights={weights}') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000. ' - f'Received `classes={classes}`') - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + classifier_activation="softmax", +): + """Instantiates the VGG16 model. + + Reference: + - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( + https://arxiv.org/abs/1409.1556) (ICLR 2015) + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + The default input size for this model is 224x224. + + Note: each Keras Application expects a specific kind of input preprocessing. + For VGG16, call `tf.keras.applications.vgg16.preprocess_input` on your + inputs before passing them to the model. + `vgg16.preprocess_input` will convert the input images from RGB to BGR, + then will zero-center each color channel with respect to the ImageNet + dataset, without scaling. + + Args: + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 input channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` can + only be `None` or `"softmax"`. + + Returns: + A `keras.Model` instance. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded. Received: " + f"weights={weights}" + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top` ' + "as true, `classes` should be 1000. " + f"Received `classes={classes}`" + ) + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=224, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - img_input = input_tensor - # Block 1 - x = layers.Conv2D( - 64, (3, 3), activation='relu', padding='same', name='block1_conv1')( - img_input) - x = layers.Conv2D( - 64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = layers.Conv2D( - 128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) - x = layers.Conv2D( - 128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = layers.Flatten(name='flatten')(x) - x = layers.Dense(4096, activation='relu', name='fc1')(x) - x = layers.Dense(4096, activation='relu', name='fc2')(x) - - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = training.Model(inputs, x, name='vgg16') - - # Load weights. - if weights == 'imagenet': + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + # Block 1 + x = layers.Conv2D( + 64, (3, 3), activation="relu", padding="same", name="block1_conv1" + )(img_input) + x = layers.Conv2D( + 64, (3, 3), activation="relu", padding="same", name="block1_conv2" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x) + + # Block 2 + x = layers.Conv2D( + 128, (3, 3), activation="relu", padding="same", name="block2_conv1" + )(x) + x = layers.Conv2D( + 128, (3, 3), activation="relu", padding="same", name="block2_conv2" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x) + + # Block 3 + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv1" + )(x) + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv2" + )(x) + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv3" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x) + + # Block 4 + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv1" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv2" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv3" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x) + + # Block 5 + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv1" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv2" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv3" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block5_pool")(x) + if include_top: - weights_path = data_utils.get_file( - 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='64373286793e3c8b2b4e3219cbf3544b') + # Classification block + x = layers.Flatten(name="flatten")(x) + x = layers.Dense(4096, activation="relu", name="fc1")(x) + x = layers.Dense(4096, activation="relu", name="fc2")(x) + + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: - weights_path = data_utils.get_file( - 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='6d6bbae143d832006294945121d1f1fc') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -@keras_export('keras.applications.vgg16.preprocess_input') + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = training.Model(inputs, x, name="vgg16") + + # Load weights. + if weights == "imagenet": + if include_top: + weights_path = data_utils.get_file( + "vgg16_weights_tf_dim_ordering_tf_kernels.h5", + WEIGHTS_PATH, + cache_subdir="models", + file_hash="64373286793e3c8b2b4e3219cbf3544b", + ) + else: + weights_path = data_utils.get_file( + "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5", + WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + file_hash="6d6bbae143d832006294945121d1f1fc", + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +@keras_export("keras.applications.vgg16.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input( - x, data_format=data_format, mode='caffe') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="caffe" + ) -@keras_export('keras.applications.vgg16.decode_predictions') +@keras_export("keras.applications.vgg16.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_CAFFE, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/vgg19.py b/keras/applications/vgg19.py index 8766003d8ab8..b763dff5f28e 100644 --- a/keras/applications/vgg19.py +++ b/keras/applications/vgg19.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """VGG19 model for Keras. Reference: @@ -28,222 +28,253 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export -WEIGHTS_PATH = ('https://storage.googleapis.com/tensorflow/keras-applications/' - 'vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5') -WEIGHTS_PATH_NO_TOP = ('https://storage.googleapis.com/tensorflow/' - 'keras-applications/vgg19/' - 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5') +WEIGHTS_PATH = ( + "https://storage.googleapis.com/tensorflow/keras-applications/" + "vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5" +) +WEIGHTS_PATH_NO_TOP = ( + "https://storage.googleapis.com/tensorflow/" + "keras-applications/vgg19/" + "vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5" +) layers = VersionAwareLayers() -@keras_export('keras.applications.vgg19.VGG19', 'keras.applications.VGG19') +@keras_export("keras.applications.vgg19.VGG19", "keras.applications.VGG19") def VGG19( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the VGG19 architecture. + classifier_activation="softmax", +): + """Instantiates the VGG19 architecture. - Reference: - - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( - https://arxiv.org/abs/1409.1556) (ICLR 2015) + Reference: + - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( + https://arxiv.org/abs/1409.1556) (ICLR 2015) + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + The default input size for this model is 224x224. + + Note: each Keras Application expects a specific kind of input preprocessing. + For VGG19, call `tf.keras.applications.vgg19.preprocess_input` on your + inputs before passing them to the model. + `vgg19.preprocess_input` will convert the input images from RGB to BGR, + then will zero-center each color channel with respect to the ImageNet + dataset, without scaling. + + Args: + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - The default input size for this model is 224x224. - - Note: each Keras Application expects a specific kind of input preprocessing. - For VGG19, call `tf.keras.applications.vgg19.preprocess_input` on your - inputs before passing them to the model. - `vgg19.preprocess_input` will convert the input images from RGB to BGR, - then will zero-center each color channel with respect to the ImageNet dataset, - without scaling. - - Args: - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` - (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A `keras.Model` instance. - """ - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded. ' - f'Received: `weights={weights}.`') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000. ' - f'Received: `classes={classes}.`') - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) + Returns: + A `keras.Model` instance. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded. " + f"Received: `weights={weights}.`" + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top` ' + "as true, `classes` should be 1000. " + f"Received: `classes={classes}.`" + ) + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=224, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) else: - img_input = input_tensor - # Block 1 - x = layers.Conv2D( - 64, (3, 3), activation='relu', padding='same', name='block1_conv1')( - img_input) - x = layers.Conv2D( - 64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = layers.Conv2D( - 128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) - x = layers.Conv2D( - 128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) - x = layers.Conv2D( - 256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) - x = layers.Conv2D( - 512, (3, 3), activation='relu', padding='same', name='block5_conv4')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = layers.Flatten(name='flatten')(x) - x = layers.Dense(4096, activation='relu', name='fc1')(x) - x = layers.Dense(4096, activation='relu', name='fc2')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = training.Model(inputs, x, name='vgg19') - - # Load weights. - if weights == 'imagenet': + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + # Block 1 + x = layers.Conv2D( + 64, (3, 3), activation="relu", padding="same", name="block1_conv1" + )(img_input) + x = layers.Conv2D( + 64, (3, 3), activation="relu", padding="same", name="block1_conv2" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x) + + # Block 2 + x = layers.Conv2D( + 128, (3, 3), activation="relu", padding="same", name="block2_conv1" + )(x) + x = layers.Conv2D( + 128, (3, 3), activation="relu", padding="same", name="block2_conv2" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x) + + # Block 3 + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv1" + )(x) + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv2" + )(x) + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv3" + )(x) + x = layers.Conv2D( + 256, (3, 3), activation="relu", padding="same", name="block3_conv4" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x) + + # Block 4 + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv1" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv2" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv3" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block4_conv4" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x) + + # Block 5 + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv1" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv2" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv3" + )(x) + x = layers.Conv2D( + 512, (3, 3), activation="relu", padding="same", name="block5_conv4" + )(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block5_pool")(x) + if include_top: - weights_path = data_utils.get_file( - 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='cbe5617147190e668d6c5d5026f83318') + # Classification block + x = layers.Flatten(name="flatten")(x) + x = layers.Dense(4096, activation="relu", name="fc1")(x) + x = layers.Dense(4096, activation="relu", name="fc2")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: - weights_path = data_utils.get_file( - 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='253f8cb515780f3b799900260a226db6') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = training.Model(inputs, x, name="vgg19") + + # Load weights. + if weights == "imagenet": + if include_top: + weights_path = data_utils.get_file( + "vgg19_weights_tf_dim_ordering_tf_kernels.h5", + WEIGHTS_PATH, + cache_subdir="models", + file_hash="cbe5617147190e668d6c5d5026f83318", + ) + else: + weights_path = data_utils.get_file( + "vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5", + WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + file_hash="253f8cb515780f3b799900260a226db6", + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) - return model + return model -@keras_export('keras.applications.vgg19.preprocess_input') +@keras_export("keras.applications.vgg19.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input( - x, data_format=data_format, mode='caffe') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="caffe" + ) -@keras_export('keras.applications.vgg19.decode_predictions') +@keras_export("keras.applications.vgg19.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_CAFFE, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/applications/xception.py b/keras/applications/xception.py index 5e931ecaadf6..e7e4ff597c89 100644 --- a/keras/applications/xception.py +++ b/keras/applications/xception.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name + """Xception V1 model for Keras. On ImageNet, this model gets to a top-1 validation accuracy of 0.790 @@ -31,301 +31,350 @@ from keras.layers import VersionAwareLayers from keras.utils import data_utils from keras.utils import layer_utils -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export TF_WEIGHTS_PATH = ( - 'https://storage.googleapis.com/tensorflow/keras-applications/' - 'xception/xception_weights_tf_dim_ordering_tf_kernels.h5') + "https://storage.googleapis.com/tensorflow/keras-applications/" + "xception/xception_weights_tf_dim_ordering_tf_kernels.h5" +) TF_WEIGHTS_PATH_NO_TOP = ( - 'https://storage.googleapis.com/tensorflow/keras-applications/' - 'xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5') + "https://storage.googleapis.com/tensorflow/keras-applications/" + "xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5" +) layers = VersionAwareLayers() -@keras_export('keras.applications.xception.Xception', - 'keras.applications.Xception') +@keras_export( + "keras.applications.xception.Xception", "keras.applications.Xception" +) def Xception( include_top=True, - weights='imagenet', + weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, - classifier_activation='softmax'): - """Instantiates the Xception architecture. + classifier_activation="softmax", +): + """Instantiates the Xception architecture. + + Reference: + - [Xception: Deep Learning with Depthwise Separable Convolutions]( + https://arxiv.org/abs/1610.02357) (CVPR 2017) + + For image classification use cases, see + [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + + The default input image size for this model is 299x299. + + Note: each Keras Application expects a specific kind of input preprocessing. + For Xception, call `tf.keras.applications.xception.preprocess_input` on your + inputs before passing them to the model. + `xception.preprocess_input` will scale input pixels between -1 and 1. + + Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)`. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 71. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, + and if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + + Returns: + A `keras.Model` instance. + """ + if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): + raise ValueError( + "The `weights` argument should be either " + "`None` (random initialization), `imagenet` " + "(pre-training on ImageNet), " + "or the path to the weights file to be loaded." + ) + + if weights == "imagenet" and include_top and classes != 1000: + raise ValueError( + 'If using `weights` as `"imagenet"` with `include_top`' + " as true, `classes` should be 1000" + ) + + # Determine proper input shape + input_shape = imagenet_utils.obtain_input_shape( + input_shape, + default_size=299, + min_size=71, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights, + ) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 + + x = layers.Conv2D( + 32, (3, 3), strides=(2, 2), use_bias=False, name="block1_conv1" + )(img_input) + x = layers.BatchNormalization(axis=channel_axis, name="block1_conv1_bn")(x) + x = layers.Activation("relu", name="block1_conv1_act")(x) + x = layers.Conv2D(64, (3, 3), use_bias=False, name="block1_conv2")(x) + x = layers.BatchNormalization(axis=channel_axis, name="block1_conv2_bn")(x) + x = layers.Activation("relu", name="block1_conv2_act")(x) + + residual = layers.Conv2D( + 128, (1, 1), strides=(2, 2), padding="same", use_bias=False + )(x) + residual = layers.BatchNormalization(axis=channel_axis)(residual) - Reference: - - [Xception: Deep Learning with Depthwise Separable Convolutions]( - https://arxiv.org/abs/1610.02357) (CVPR 2017) + x = layers.SeparableConv2D( + 128, (3, 3), padding="same", use_bias=False, name="block2_sepconv1" + )(x) + x = layers.BatchNormalization(axis=channel_axis, name="block2_sepconv1_bn")( + x + ) + x = layers.Activation("relu", name="block2_sepconv2_act")(x) + x = layers.SeparableConv2D( + 128, (3, 3), padding="same", use_bias=False, name="block2_sepconv2" + )(x) + x = layers.BatchNormalization(axis=channel_axis, name="block2_sepconv2_bn")( + x + ) + + x = layers.MaxPooling2D( + (3, 3), strides=(2, 2), padding="same", name="block2_pool" + )(x) + x = layers.add([x, residual]) - For image classification use cases, see - [this page for detailed examples]( - https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning]( - https://keras.io/guides/transfer_learning/). - - The default input image size for this model is 299x299. - - Note: each Keras Application expects a specific kind of input preprocessing. - For Xception, call `tf.keras.applications.xception.preprocess_input` on your - inputs before passing them to the model. - `xception.preprocess_input` will scale input pixels between -1 and 1. - - Args: - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(299, 299, 3)`. - It should have exactly 3 inputs channels, - and width and height should be no smaller than 71. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, - and if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - - Returns: - A `keras.Model` instance. - """ - if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = imagenet_utils.obtain_input_shape( - input_shape, - default_size=299, - min_size=71, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - x = layers.Conv2D( - 32, (3, 3), - strides=(2, 2), - use_bias=False, - name='block1_conv1')(img_input) - x = layers.BatchNormalization(axis=channel_axis, name='block1_conv1_bn')(x) - x = layers.Activation('relu', name='block1_conv1_act')(x) - x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block1_conv2_bn')(x) - x = layers.Activation('relu', name='block1_conv2_act')(x) - - residual = layers.Conv2D( - 128, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.SeparableConv2D( - 128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv1_bn')(x) - x = layers.Activation('relu', name='block2_sepconv2_act')(x) - x = layers.SeparableConv2D( - 128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='same', - name='block2_pool')(x) - x = layers.add([x, residual]) - - residual = layers.Conv2D( - 256, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.Activation('relu', name='block3_sepconv1_act')(x) - x = layers.SeparableConv2D( - 256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv1_bn')(x) - x = layers.Activation('relu', name='block3_sepconv2_act')(x) - x = layers.SeparableConv2D( - 256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='same', - name='block3_pool')(x) - x = layers.add([x, residual]) - - residual = layers.Conv2D( - 728, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.Activation('relu', name='block4_sepconv1_act')(x) - x = layers.SeparableConv2D( - 728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv1_bn')(x) - x = layers.Activation('relu', name='block4_sepconv2_act')(x) - x = layers.SeparableConv2D( - 728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='same', - name='block4_pool')(x) - x = layers.add([x, residual]) - - for i in range(8): - residual = x - prefix = 'block' + str(i + 5) - - x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x) + residual = layers.Conv2D( + 256, (1, 1), strides=(2, 2), padding="same", use_bias=False + )(x) + residual = layers.BatchNormalization(axis=channel_axis)(residual) + + x = layers.Activation("relu", name="block3_sepconv1_act")(x) x = layers.SeparableConv2D( - 728, (3, 3), - padding='same', - use_bias=False, - name=prefix + '_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name=prefix + '_sepconv1_bn')(x) - x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x) + 256, (3, 3), padding="same", use_bias=False, name="block3_sepconv1" + )(x) + x = layers.BatchNormalization(axis=channel_axis, name="block3_sepconv1_bn")( + x + ) + x = layers.Activation("relu", name="block3_sepconv2_act")(x) + x = layers.SeparableConv2D( + 256, (3, 3), padding="same", use_bias=False, name="block3_sepconv2" + )(x) + x = layers.BatchNormalization(axis=channel_axis, name="block3_sepconv2_bn")( + x + ) + + x = layers.MaxPooling2D( + (3, 3), strides=(2, 2), padding="same", name="block3_pool" + )(x) + x = layers.add([x, residual]) + + residual = layers.Conv2D( + 728, (1, 1), strides=(2, 2), padding="same", use_bias=False + )(x) + residual = layers.BatchNormalization(axis=channel_axis)(residual) + + x = layers.Activation("relu", name="block4_sepconv1_act")(x) + x = layers.SeparableConv2D( + 728, (3, 3), padding="same", use_bias=False, name="block4_sepconv1" + )(x) + x = layers.BatchNormalization(axis=channel_axis, name="block4_sepconv1_bn")( + x + ) + x = layers.Activation("relu", name="block4_sepconv2_act")(x) + x = layers.SeparableConv2D( + 728, (3, 3), padding="same", use_bias=False, name="block4_sepconv2" + )(x) + x = layers.BatchNormalization(axis=channel_axis, name="block4_sepconv2_bn")( + x + ) + + x = layers.MaxPooling2D( + (3, 3), strides=(2, 2), padding="same", name="block4_pool" + )(x) + x = layers.add([x, residual]) + + for i in range(8): + residual = x + prefix = "block" + str(i + 5) + + x = layers.Activation("relu", name=prefix + "_sepconv1_act")(x) + x = layers.SeparableConv2D( + 728, + (3, 3), + padding="same", + use_bias=False, + name=prefix + "_sepconv1", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name=prefix + "_sepconv1_bn" + )(x) + x = layers.Activation("relu", name=prefix + "_sepconv2_act")(x) + x = layers.SeparableConv2D( + 728, + (3, 3), + padding="same", + use_bias=False, + name=prefix + "_sepconv2", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name=prefix + "_sepconv2_bn" + )(x) + x = layers.Activation("relu", name=prefix + "_sepconv3_act")(x) + x = layers.SeparableConv2D( + 728, + (3, 3), + padding="same", + use_bias=False, + name=prefix + "_sepconv3", + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name=prefix + "_sepconv3_bn" + )(x) + + x = layers.add([x, residual]) + + residual = layers.Conv2D( + 1024, (1, 1), strides=(2, 2), padding="same", use_bias=False + )(x) + residual = layers.BatchNormalization(axis=channel_axis)(residual) + + x = layers.Activation("relu", name="block13_sepconv1_act")(x) x = layers.SeparableConv2D( - 728, (3, 3), - padding='same', - use_bias=False, - name=prefix + '_sepconv2')(x) + 728, (3, 3), padding="same", use_bias=False, name="block13_sepconv1" + )(x) x = layers.BatchNormalization( - axis=channel_axis, name=prefix + '_sepconv2_bn')(x) - x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x) + axis=channel_axis, name="block13_sepconv1_bn" + )(x) + x = layers.Activation("relu", name="block13_sepconv2_act")(x) x = layers.SeparableConv2D( - 728, (3, 3), - padding='same', - use_bias=False, - name=prefix + '_sepconv3')(x) + 1024, (3, 3), padding="same", use_bias=False, name="block13_sepconv2" + )(x) x = layers.BatchNormalization( - axis=channel_axis, name=prefix + '_sepconv3_bn')(x) + axis=channel_axis, name="block13_sepconv2_bn" + )(x) + x = layers.MaxPooling2D( + (3, 3), strides=(2, 2), padding="same", name="block13_pool" + )(x) x = layers.add([x, residual]) - residual = layers.Conv2D( - 1024, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.Activation('relu', name='block13_sepconv1_act')(x) - x = layers.SeparableConv2D( - 728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block13_sepconv1_bn')(x) - x = layers.Activation('relu', name='block13_sepconv2_act')(x) - x = layers.SeparableConv2D( - 1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block13_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='same', - name='block13_pool')(x) - x = layers.add([x, residual]) - - x = layers.SeparableConv2D( - 1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block14_sepconv1_bn')(x) - x = layers.Activation('relu', name='block14_sepconv1_act')(x) - - x = layers.SeparableConv2D( - 2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block14_sepconv2_bn')(x) - x = layers.Activation('relu', name='block14_sepconv2_act')(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - imagenet_utils.validate_activation(classifier_activation, weights) - x = layers.Dense(classes, activation=classifier_activation, - name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = layer_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = training.Model(inputs, x, name='xception') - - # Load weights. - if weights == 'imagenet': - if include_top: - weights_path = data_utils.get_file( - 'xception_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models', - file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') - else: - weights_path = data_utils.get_file( - 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='b0042744bf5b25fce3cb969f33bebb97') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model + x = layers.SeparableConv2D( + 1536, (3, 3), padding="same", use_bias=False, name="block14_sepconv1" + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name="block14_sepconv1_bn" + )(x) + x = layers.Activation("relu", name="block14_sepconv1_act")(x) + x = layers.SeparableConv2D( + 2048, (3, 3), padding="same", use_bias=False, name="block14_sepconv2" + )(x) + x = layers.BatchNormalization( + axis=channel_axis, name="block14_sepconv2_bn" + )(x) + x = layers.Activation("relu", name="block14_sepconv2_act")(x) -@keras_export('keras.applications.xception.preprocess_input') + if include_top: + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = training.Model(inputs, x, name="xception") + + # Load weights. + if weights == "imagenet": + if include_top: + weights_path = data_utils.get_file( + "xception_weights_tf_dim_ordering_tf_kernels.h5", + TF_WEIGHTS_PATH, + cache_subdir="models", + file_hash="0a58e3b7378bc2990ea3b43d5981f1f6", + ) + else: + weights_path = data_utils.get_file( + "xception_weights_tf_dim_ordering_tf_kernels_notop.h5", + TF_WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + file_hash="b0042744bf5b25fce3cb969f33bebb97", + ) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +@keras_export("keras.applications.xception.preprocess_input") def preprocess_input(x, data_format=None): - return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') + return imagenet_utils.preprocess_input( + x, data_format=data_format, mode="tf" + ) -@keras_export('keras.applications.xception.decode_predictions') +@keras_export("keras.applications.xception.decode_predictions") def decode_predictions(preds, top=5): - return imagenet_utils.decode_predictions(preds, top=top) + return imagenet_utils.decode_predictions(preds, top=top) preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( - mode='', + mode="", ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, - error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, +) decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/keras/backend.py b/keras/backend.py index cf69a175b794..7f5b6b1d4cc7 100644 --- a/keras/backend.py +++ b/keras/backend.py @@ -12,15 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -# pylint: disable=redefined-outer-name -# pylint: disable=redefined-builtin -# pylint: disable=g-classes-have-attributes -# pylint: disable=g-bad-import-order -# pylint: disable=missing-function-docstring -"""Keras backend API.""" -import tensorflow.compat.v2 as tf + +"""Keras backend API.""" import collections import itertools @@ -33,18 +27,22 @@ import weakref import numpy as np +import tensorflow.compat.v2 as tf -from tensorflow.core.protobuf import config_pb2 -from tensorflow.python.eager import context -from tensorflow.python.eager.context import get_config -from tensorflow.python.framework import config from keras import backend_config from keras.distribute import distribute_coordinator_utils as dc +from keras.dtensor import dtensor_api as dtensor from keras.engine import keras_tensor from keras.utils import control_flow_util from keras.utils import object_identity from keras.utils import tf_contextlib from keras.utils import tf_inspect +from keras.utils import tf_utils + +# isort: off +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.eager import context +from tensorflow.python.eager.context import get_config from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls @@ -69,10 +67,9 @@ # This is a thread local object that will hold the default internal TF session # used by Keras. It can be set manually via `set_session(sess)`. class SessionLocal(threading.local): - - def __init__(self): - super().__init__() - self.session = None + def __init__(self): + super().__init__() + self.session = None _SESSION = SessionLocal() @@ -96,32 +93,34 @@ def __init__(self): # thread local. This is needed to make set_learning_phase affect only the # current thread during eager execution (see b/123096885 for more details). class _DummyEagerGraph(threading.local): - """_DummyEagerGraph provides a thread local `key` attribute. + """_DummyEagerGraph provides a thread local `key` attribute. - We can't use threading.local directly, i.e. without subclassing, because - gevent monkey patches threading.local and its version does not support - weak references. - """ + We can't use threading.local directly, i.e. without subclassing, because + gevent monkey patches threading.local and its version does not support + weak references. + """ - class _WeakReferencableClass: - """This dummy class is needed for two reasons. + class _WeakReferencableClass: + """This dummy class is needed for two reasons. - - We need something that supports weak references. Basic types like string - and ints don't. - - We need something whose hash and equality are based on object identity - to make sure they are treated as different keys to _GRAPH_LEARNING_PHASES. + - We need something that supports weak references. Basic types like + string and ints don't. + - We need something whose hash and equality are based on object identity + to make sure they are treated as different keys to + _GRAPH_LEARNING_PHASES. - An empty Python class satisfies both of these requirements. - """ - pass + An empty Python class satisfies both of these requirements. + """ - def __init__(self): - # Constructors for classes subclassing threading.local run once - # per thread accessing something in the class. Thus, each thread will - # get a different key. - super().__init__() - self.key = _DummyEagerGraph._WeakReferencableClass() - self.learning_phase_is_set = False + pass + + def __init__(self): + # Constructors for classes subclassing threading.local run once + # per thread accessing something in the class. Thus, each thread will + # get a different key. + super().__init__() + self.key = _DummyEagerGraph._WeakReferencableClass() + self.learning_phase_is_set = False _DUMMY_EAGER_GRAPH = _DummyEagerGraph() @@ -145,623 +144,648 @@ def __init__(self): set_image_data_format = backend_config.set_image_data_format -@keras_export('keras.backend.backend') +@keras_export("keras.backend.backend") @doc_controls.do_not_generate_docs def backend(): - """Publicly accessible method for determining the current backend. + """Publicly accessible method for determining the current backend. - Only exists for API compatibility with multi-backend Keras. + Only exists for API compatibility with multi-backend Keras. - Returns: - The string "tensorflow". - """ - return 'tensorflow' + Returns: + The string "tensorflow". + """ + return "tensorflow" -@keras_export('keras.backend.cast_to_floatx') +@keras_export("keras.backend.cast_to_floatx") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def cast_to_floatx(x): - """Cast a Numpy array to the default Keras float type. - - Args: - x: Numpy array or TensorFlow tensor. + """Cast a Numpy array to the default Keras float type. - Returns: - The same array (Numpy array if `x` was a Numpy array, or TensorFlow tensor - if `x` was a tensor), cast to its new type. - - Example: + Args: + x: Numpy array or TensorFlow tensor. - >>> tf.keras.backend.floatx() - 'float32' - >>> arr = np.array([1.0, 2.0], dtype='float64') - >>> arr.dtype - dtype('float64') - >>> new_arr = cast_to_floatx(arr) - >>> new_arr - array([1., 2.], dtype=float32) - >>> new_arr.dtype - dtype('float32') + Returns: + The same array (Numpy array if `x` was a Numpy array, or TensorFlow + tensor if `x` was a tensor), cast to its new type. + + Example: + + >>> tf.keras.backend.floatx() + 'float32' + >>> arr = np.array([1.0, 2.0], dtype='float64') + >>> arr.dtype + dtype('float64') + >>> new_arr = cast_to_floatx(arr) + >>> new_arr + array([1., 2.], dtype=float32) + >>> new_arr.dtype + dtype('float32') - """ - if isinstance(x, (tf.Tensor, - tf.Variable, - tf.SparseTensor)): - return tf.cast(x, dtype=floatx()) - return np.asarray(x, dtype=floatx()) + """ + if isinstance(x, (tf.Tensor, tf.Variable, tf.SparseTensor)): + return tf.cast(x, dtype=floatx()) + return np.asarray(x, dtype=floatx()) -@keras_export('keras.backend.get_uid') -def get_uid(prefix=''): - """Associates a string prefix with an integer counter in a TensorFlow graph. +@keras_export("keras.backend.get_uid") +def get_uid(prefix=""): + """Associates a string prefix with an integer counter in a TensorFlow graph. - Args: - prefix: String prefix to index. + Args: + prefix: String prefix to index. - Returns: - Unique integer ID. + Returns: + Unique integer ID. - Example: + Example: - >>> get_uid('dense') - 1 - >>> get_uid('dense') - 2 + >>> get_uid('dense') + 1 + >>> get_uid('dense') + 2 - """ - graph = get_graph() - if graph not in PER_GRAPH_OBJECT_NAME_UIDS: - PER_GRAPH_OBJECT_NAME_UIDS[graph] = collections.defaultdict(int) - layer_name_uids = PER_GRAPH_OBJECT_NAME_UIDS[graph] - layer_name_uids[prefix] += 1 - return layer_name_uids[prefix] + """ + graph = get_graph() + if graph not in PER_GRAPH_OBJECT_NAME_UIDS: + PER_GRAPH_OBJECT_NAME_UIDS[graph] = collections.defaultdict(int) + layer_name_uids = PER_GRAPH_OBJECT_NAME_UIDS[graph] + layer_name_uids[prefix] += 1 + return layer_name_uids[prefix] -@keras_export('keras.backend.reset_uids') +@keras_export("keras.backend.reset_uids") def reset_uids(): - """Resets graph identifiers. - """ + """Resets graph identifiers.""" - PER_GRAPH_OBJECT_NAME_UIDS.clear() - OBSERVED_NAMES.clear() + PER_GRAPH_OBJECT_NAME_UIDS.clear() + OBSERVED_NAMES.clear() -@keras_export('keras.backend.clear_session') +@keras_export("keras.backend.clear_session") def clear_session(): - """Resets all state generated by Keras. - - Keras manages a global state, which it uses to implement the Functional - model-building API and to uniquify autogenerated layer names. - - If you are creating many models in a loop, this global state will consume - an increasing amount of memory over time, and you may want to clear it. - Calling `clear_session()` releases the global state: this helps avoid clutter - from old models and layers, especially when memory is limited. - - Example 1: calling `clear_session()` when creating models in a loop - - ```python - for _ in range(100): - # Without `clear_session()`, each iteration of this loop will - # slightly increase the size of the global state managed by Keras - model = tf.keras.Sequential([tf.keras.layers.Dense(10) for _ in range(10)]) - - for _ in range(100): - # With `clear_session()` called at the beginning, - # Keras starts with a blank state at each iteration - # and memory consumption is constant over time. - tf.keras.backend.clear_session() - model = tf.keras.Sequential([tf.keras.layers.Dense(10) for _ in range(10)]) - ``` - - Example 2: resetting the layer name generation counter - - >>> import tensorflow as tf - >>> layers = [tf.keras.layers.Dense(10) for _ in range(10)] - >>> new_layer = tf.keras.layers.Dense(10) - >>> print(new_layer.name) - dense_10 - >>> tf.keras.backend.set_learning_phase(1) - >>> print(tf.keras.backend.learning_phase()) - 1 - >>> tf.keras.backend.clear_session() - >>> new_layer = tf.keras.layers.Dense(10) - >>> print(new_layer.name) - dense - """ - global _SESSION - global _GRAPH_LEARNING_PHASES # pylint: disable=global-variable-not-assigned - global _GRAPH_VARIABLES # pylint: disable=global-variable-not-assigned - global _GRAPH_TF_OPTIMIZERS # pylint: disable=global-variable-not-assigned - global _GRAPH - _GRAPH.graph = None - tf.compat.v1.reset_default_graph() - reset_uids() - if _SESSION.session is not None: - _SESSION.session.close() - _SESSION.session = None - graph = get_graph() - with graph.as_default(): - _DUMMY_EAGER_GRAPH.learning_phase_is_set = False - - _GRAPH_LEARNING_PHASES = {} - # Create the learning phase placeholder in graph using the default factory - phase = _default_learning_phase() - _internal_set_learning_phase(graph, phase) - - _GRAPH_VARIABLES.pop(graph, None) - _GRAPH_TF_OPTIMIZERS.pop(graph, None) - if tf.executing_eagerly(): - # Clear pending nodes in eager executors, kernel caches and step_containers. - context.context().clear_kernel_cache() + """Resets all state generated by Keras. + + Keras manages a global state, which it uses to implement the Functional + model-building API and to uniquify autogenerated layer names. + + If you are creating many models in a loop, this global state will consume + an increasing amount of memory over time, and you may want to clear it. + Calling `clear_session()` releases the global state: this helps avoid + clutter from old models and layers, especially when memory is limited. + + Example 1: calling `clear_session()` when creating models in a loop + + ```python + for _ in range(100): + # Without `clear_session()`, each iteration of this loop will + # slightly increase the size of the global state managed by Keras + model = tf.keras.Sequential([ + tf.keras.layers.Dense(10) for _ in range(10)]) + + for _ in range(100): + # With `clear_session()` called at the beginning, + # Keras starts with a blank state at each iteration + # and memory consumption is constant over time. + tf.keras.backend.clear_session() + model = tf.keras.Sequential([ + tf.keras.layers.Dense(10) for _ in range(10)]) + ``` + + Example 2: resetting the layer name generation counter + + >>> import tensorflow as tf + >>> layers = [tf.keras.layers.Dense(10) for _ in range(10)] + >>> new_layer = tf.keras.layers.Dense(10) + >>> print(new_layer.name) + dense_10 + >>> tf.keras.backend.set_learning_phase(1) + >>> print(tf.keras.backend.learning_phase()) + 1 + >>> tf.keras.backend.clear_session() + >>> new_layer = tf.keras.layers.Dense(10) + >>> print(new_layer.name) + dense + """ + global _SESSION + global _GRAPH_LEARNING_PHASES + global _GRAPH_VARIABLES + global _GRAPH_TF_OPTIMIZERS + global _GRAPH + _GRAPH.graph = None + tf.compat.v1.reset_default_graph() + reset_uids() + if _SESSION.session is not None: + _SESSION.session.close() + _SESSION.session = None + graph = get_graph() + with graph.as_default(): + _DUMMY_EAGER_GRAPH.learning_phase_is_set = False + + _GRAPH_LEARNING_PHASES = {} + # Create the learning phase placeholder in graph using the default + # factory + phase = _default_learning_phase() + _internal_set_learning_phase(graph, phase) + + _GRAPH_VARIABLES.pop(graph, None) + _GRAPH_TF_OPTIMIZERS.pop(graph, None) + if tf.executing_eagerly(): + # Clear pending nodes in eager executors, kernel caches and + # step_containers. + context.context().clear_kernel_cache() + # Inject the clear_session function to keras_deps to remove the dependency # from TFLite to Keras. tf.__internal__.register_clear_session_function(clear_session) -@keras_export('keras.backend.manual_variable_initialization') +@keras_export("keras.backend.manual_variable_initialization") @doc_controls.do_not_generate_docs def manual_variable_initialization(value): - """Sets the manual variable initialization flag. + """Sets the manual variable initialization flag. - This boolean flag determines whether - variables should be initialized - as they are instantiated (default), or if - the user should handle the initialization - (e.g. via `tf.compat.v1.initialize_all_variables()`). + This boolean flag determines whether + variables should be initialized + as they are instantiated (default), or if + the user should handle the initialization + (e.g. via `tf.compat.v1.initialize_all_variables()`). - Args: - value: Python boolean. - """ - global _MANUAL_VAR_INIT - _MANUAL_VAR_INIT = value + Args: + value: Python boolean. + """ + global _MANUAL_VAR_INIT + _MANUAL_VAR_INIT = value -@keras_export('keras.backend.learning_phase') +@keras_export("keras.backend.learning_phase") @doc_controls.do_not_generate_docs def learning_phase(): - """Returns the learning phase flag. - - The learning phase flag is a bool tensor (0 = test, 1 = train) - to be passed as input to any Keras function - that uses a different behavior at train time and test time. - - Returns: - Learning phase (scalar integer tensor or Python integer). - """ - graph = tf.compat.v1.get_default_graph() - if graph is getattr(_GRAPH, 'graph', None): - # Don't enter an init_scope for the learning phase if eager execution - # is enabled but we're inside the Keras workspace graph. - learning_phase = symbolic_learning_phase() - else: - with tf.init_scope(): - # We always check & set the learning phase inside the init_scope, - # otherwise the wrong default_graph will be used to look up the learning - # phase inside of functions & defuns. - # - # This is because functions & defuns (both in graph & in eager mode) - # will always execute non-eagerly using a function-specific default - # subgraph. - if context.executing_eagerly(): - if _DUMMY_EAGER_GRAPH.key not in _GRAPH_LEARNING_PHASES: - phase = _default_learning_phase() - _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, phase) - _DUMMY_EAGER_GRAPH.learning_phase_is_set = True - return _internal_get_learning_phase(_DUMMY_EAGER_GRAPH.key) - else: + """Returns the learning phase flag. + + The learning phase flag is a bool tensor (0 = test, 1 = train) + to be passed as input to any Keras function + that uses a different behavior at train time and test time. + + Returns: + Learning phase (scalar integer tensor or Python integer). + """ + graph = tf.compat.v1.get_default_graph() + if graph is getattr(_GRAPH, "graph", None): + # Don't enter an init_scope for the learning phase if eager execution + # is enabled but we're inside the Keras workspace graph. learning_phase = symbolic_learning_phase() - _mark_func_graph_as_unsaveable(graph, learning_phase) - return learning_phase + else: + with tf.init_scope(): + # We always check & set the learning phase inside the init_scope, + # otherwise the wrong default_graph will be used to look up the + # learning phase inside of functions & defuns. + # + # This is because functions & defuns (both in graph & in eager mode) + # will always execute non-eagerly using a function-specific default + # subgraph. + if context.executing_eagerly(): + if _DUMMY_EAGER_GRAPH.key not in _GRAPH_LEARNING_PHASES: + return _default_learning_phase() + else: + return _internal_get_learning_phase(_DUMMY_EAGER_GRAPH.key) + else: + learning_phase = symbolic_learning_phase() + _mark_func_graph_as_unsaveable(graph, learning_phase) + return learning_phase def global_learning_phase_is_set(): - return _DUMMY_EAGER_GRAPH.learning_phase_is_set + return _DUMMY_EAGER_GRAPH.learning_phase_is_set def _mark_func_graph_as_unsaveable(graph, learning_phase): - """Mark func graph as unsaveable due to use of symbolic keras learning phase. + """Mark graph as unsaveable due to use of symbolic keras learning phase. - Functions that capture the symbolic learning phase cannot be exported to - SavedModel. Mark the funcgraph as unsaveable, so that an error will be raised - if it is exported. + Functions that capture the symbolic learning phase cannot be exported to + SavedModel. Mark the funcgraph as unsaveable, so that an error will be + raised if it is exported. - Args: - graph: Graph or FuncGraph object. - learning_phase: Learning phase placeholder or int defined in the graph. - """ - if graph.building_function and is_placeholder(learning_phase): - graph.mark_as_unsaveable( - 'The keras learning phase placeholder was used inside a function. ' - 'Exporting placeholders is not supported when saving out a SavedModel. ' - 'Please call `tf.keras.backend.set_learning_phase(0)` in the function ' - 'to set the learning phase to a constant value.') + Args: + graph: Graph or FuncGraph object. + learning_phase: Learning phase placeholder or int defined in the graph. + """ + if graph.building_function and is_placeholder(learning_phase): + graph.mark_as_unsaveable( + "The keras learning phase placeholder was used inside a function. " + "Exporting placeholders is not supported when saving out a " + "SavedModel. Please call `tf.keras.backend.set_learning_phase(0)` " + "in the function to set the learning phase to a constant value." + ) def symbolic_learning_phase(): - graph = get_graph() - with graph.as_default(): - if graph not in _GRAPH_LEARNING_PHASES: - phase = _default_learning_phase() - _internal_set_learning_phase(graph, phase) + graph = get_graph() + with graph.as_default(): + if graph not in _GRAPH_LEARNING_PHASES: + phase = _default_learning_phase() + _internal_set_learning_phase(graph, phase) - return _internal_get_learning_phase(graph) + return _internal_get_learning_phase(graph) def _internal_set_learning_phase(graph, value): - global _GRAPH_LEARNING_PHASES # pylint: disable=global-variable-not-assigned - - if isinstance(value, tf.Tensor): - # The 'value' here is a tf.Tensor with attribute 'graph'. - # There is a circular reference between key 'graph' and attribute 'graph'. - # So we need use a weakref.ref to refer to the 'value' tensor here. - # Otherwise, it would lead to memory leak. - value_ref = weakref.ref(value) - _GRAPH_LEARNING_PHASES[graph] = value_ref - else: - _GRAPH_LEARNING_PHASES[graph] = value + global _GRAPH_LEARNING_PHASES + + if isinstance(value, tf.Tensor): + # The 'value' here is a tf.Tensor with attribute 'graph'. + # There is a circular reference between key 'graph' and attribute + # 'graph'. So we need use a weakref.ref to refer to the 'value' tensor + # here. Otherwise, it would lead to memory leak. + value_ref = weakref.ref(value) + _GRAPH_LEARNING_PHASES[graph] = value_ref + else: + _GRAPH_LEARNING_PHASES[graph] = value def _internal_get_learning_phase(graph): - phase = _GRAPH_LEARNING_PHASES.get(graph, None) - if isinstance(phase, weakref.ref): - return phase() - else: - return phase + phase = _GRAPH_LEARNING_PHASES.get(graph, None) + if isinstance(phase, weakref.ref): + return phase() + else: + return phase def _default_learning_phase(): - if context.executing_eagerly(): - return 0 - else: - with name_scope(''): - return tf.compat.v1.placeholder_with_default( - False, shape=(), name='keras_learning_phase') + if context.executing_eagerly(): + return 0 + else: + with name_scope(""): + return tf.compat.v1.placeholder_with_default( + False, shape=(), name="keras_learning_phase" + ) -@keras_export('keras.backend.set_learning_phase') +@keras_export("keras.backend.set_learning_phase") @doc_controls.do_not_generate_docs def set_learning_phase(value): - """Sets the learning phase to a fixed value. + """Sets the learning phase to a fixed value. - The backend learning phase affects any code that calls - `backend.learning_phase()` - In particular, all Keras built-in layers use the learning phase as the default - for the `training` arg to `Layer.__call__`. + The backend learning phase affects any code that calls + `backend.learning_phase()` + In particular, all Keras built-in layers use the learning phase as the + default for the `training` arg to `Layer.__call__`. - User-written layers and models can achieve the same behavior with code that - looks like: + User-written layers and models can achieve the same behavior with code that + looks like: - ```python - def call(self, inputs, training=None): - if training is None: - training = backend.learning_phase() - ``` + ```python + def call(self, inputs, training=None): + if training is None: + training = backend.learning_phase() + ``` - Args: - value: Learning phase value, either 0 or 1 (integers). - 0 = test, 1 = train + Args: + value: Learning phase value, either 0 or 1 (integers). + 0 = test, 1 = train - Raises: - ValueError: if `value` is neither `0` nor `1`. - """ - warnings.warn('`tf.keras.backend.set_learning_phase` is deprecated and ' - 'will be removed after 2020-10-11. To update it, simply ' - 'pass a True/False value to the `training` argument of the ' - '`__call__` method of your layer or model.') - deprecated_internal_set_learning_phase(value) + Raises: + ValueError: if `value` is neither `0` nor `1`. + """ + warnings.warn( + "`tf.keras.backend.set_learning_phase` is deprecated and " + "will be removed after 2020-10-11. To update it, simply " + "pass a True/False value to the `training` argument of the " + "`__call__` method of your layer or model." + ) + deprecated_internal_set_learning_phase(value) def deprecated_internal_set_learning_phase(value): - """A deprecated internal implementation of set_learning_phase. + """A deprecated internal implementation of set_learning_phase. - This method is an internal-only version of `set_learning_phase` that - does not raise a deprecation error. It is required because - saved_model needs to keep working with user code that uses the deprecated - learning phase methods until those APIs are fully removed from the public API. + This method is an internal-only version of `set_learning_phase` that + does not raise a deprecation error. It is required because + saved_model needs to keep working with user code that uses the deprecated + learning phase methods until those APIs are fully removed from the public + API. - Specifically SavedModel saving needs to make sure the learning phase is 0 - during tracing even if users overwrote it to a different value. + Specifically SavedModel saving needs to make sure the learning phase is 0 + during tracing even if users overwrote it to a different value. - But, we don't want to raise deprecation warnings for users when savedmodel - sets learning phase just for compatibility with code that relied on - explicitly setting the learning phase for other values. + But, we don't want to raise deprecation warnings for users when savedmodel + sets learning phase just for compatibility with code that relied on + explicitly setting the learning phase for other values. - Args: - value: Learning phase value, either 0 or 1 (integers). 0 = test, 1 = train + Args: + value: Learning phase value, either 0 or 1 (integers). + 0 = test, 1 = train - Raises: - ValueError: if `value` is neither `0` nor `1`. - """ - if value not in {0, 1}: - raise ValueError('Expected learning phase to be 0 or 1.') - with tf.init_scope(): - if tf.executing_eagerly(): - # In an eager context, the learning phase values applies to both the eager - # context and the internal Keras graph. - _DUMMY_EAGER_GRAPH.learning_phase_is_set = True - _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, value) + Raises: + ValueError: if `value` is neither `0` nor `1`. + """ + if value not in {0, 1}: + raise ValueError("Expected learning phase to be 0 or 1.") + with tf.init_scope(): + if tf.executing_eagerly(): + # In an eager context, the learning phase values applies to both the + # eager context and the internal Keras graph. + _DUMMY_EAGER_GRAPH.learning_phase_is_set = True + _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, value) - _internal_set_learning_phase(get_graph(), value) + _internal_set_learning_phase(get_graph(), value) -@keras_export('keras.backend.learning_phase_scope') +@keras_export("keras.backend.learning_phase_scope") @tf_contextlib.contextmanager @doc_controls.do_not_generate_docs def learning_phase_scope(value): - """Provides a scope within which the learning phase is equal to `value`. + """Provides a scope within which the learning phase is equal to `value`. - The learning phase gets restored to its original value upon exiting the scope. + The learning phase gets restored to its original value upon exiting the + scope. - Args: - value: Learning phase value, either 0 or 1 (integers). - 0 = test, 1 = train + Args: + value: Learning phase value, either 0 or 1 (integers). + 0 = test, 1 = train - Yields: - None. - - Raises: - ValueError: if `value` is neither `0` nor `1`. - """ - warnings.warn( - '`tf.keras.backend.learning_phase_scope` is deprecated and ' - 'will be removed after 2020-10-11. To update it, simply ' - 'pass a True/False value to the `training` argument of the ' - '`__call__` method of your layer or model.', - stacklevel=2) - with deprecated_internal_learning_phase_scope(value): - try: - yield - finally: - pass + Yields: + None. + + Raises: + ValueError: if `value` is neither `0` nor `1`. + """ + warnings.warn( + "`tf.keras.backend.learning_phase_scope` is deprecated and " + "will be removed after 2020-10-11. To update it, simply " + "pass a True/False value to the `training` argument of the " + "`__call__` method of your layer or model.", + stacklevel=2, + ) + with deprecated_internal_learning_phase_scope(value): + try: + yield + finally: + pass @tf_contextlib.contextmanager def deprecated_internal_learning_phase_scope(value): - """An internal-only version of `learning_phase_scope`. + """An internal-only version of `learning_phase_scope`. - Unlike the public method, this method does not raise a deprecation warning. - This is needed because saved model saving needs to set learning phase - to maintain compatibility - with code that sets/gets the learning phase, but saved model - saving itself shouldn't raise a deprecation warning. + Unlike the public method, this method does not raise a deprecation warning. + This is needed because saved model saving needs to set learning phase + to maintain compatibility + with code that sets/gets the learning phase, but saved model + saving itself shouldn't raise a deprecation warning. - We can get rid of this method and its usages when the public API is - removed. + We can get rid of this method and its usages when the public API is + removed. - Args: - value: Learning phase value, either 0 or 1 (integers). 0 = test, 1 = train - - Yields: - None. + Args: + value: Learning phase value, either 0 or 1 (integers). + 0 = test, 1 = train - Raises: - ValueError: if `value` is neither `0` nor `1`. - """ - global _GRAPH_LEARNING_PHASES # pylint: disable=global-variable-not-assigned - if value not in {0, 1}: - raise ValueError('Expected learning phase to be 0 or 1.') + Yields: + None. - with tf.init_scope(): - if tf.executing_eagerly(): - previous_eager_value = _internal_get_learning_phase( - _DUMMY_EAGER_GRAPH.key) - previous_graph_value = _internal_get_learning_phase(get_graph()) + Raises: + ValueError: if `value` is neither `0` nor `1`. + """ + global _GRAPH_LEARNING_PHASES + if value not in {0, 1}: + raise ValueError("Expected learning phase to be 0 or 1.") - learning_phase_previously_set = _DUMMY_EAGER_GRAPH.learning_phase_is_set - try: - deprecated_internal_set_learning_phase(value) - yield - finally: - # Restore learning phase to initial value. - if not learning_phase_previously_set: - _DUMMY_EAGER_GRAPH.learning_phase_is_set = False with tf.init_scope(): - if tf.executing_eagerly(): - if previous_eager_value is not None: - _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, - previous_eager_value) - elif _DUMMY_EAGER_GRAPH.key in _GRAPH_LEARNING_PHASES: - del _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] + if tf.executing_eagerly(): + previous_eager_value = _internal_get_learning_phase( + _DUMMY_EAGER_GRAPH.key + ) + previous_graph_value = _internal_get_learning_phase(get_graph()) - graph = get_graph() - if previous_graph_value is not None: - _internal_set_learning_phase(graph, previous_graph_value) - elif graph in _GRAPH_LEARNING_PHASES: - del _GRAPH_LEARNING_PHASES[graph] + learning_phase_previously_set = _DUMMY_EAGER_GRAPH.learning_phase_is_set + try: + deprecated_internal_set_learning_phase(value) + yield + finally: + # Restore learning phase to initial value. + if not learning_phase_previously_set: + _DUMMY_EAGER_GRAPH.learning_phase_is_set = False + with tf.init_scope(): + if tf.executing_eagerly(): + if previous_eager_value is not None: + _internal_set_learning_phase( + _DUMMY_EAGER_GRAPH.key, previous_eager_value + ) + elif _DUMMY_EAGER_GRAPH.key in _GRAPH_LEARNING_PHASES: + del _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] + + graph = get_graph() + if previous_graph_value is not None: + _internal_set_learning_phase(graph, previous_graph_value) + elif graph in _GRAPH_LEARNING_PHASES: + del _GRAPH_LEARNING_PHASES[graph] @tf_contextlib.contextmanager def eager_learning_phase_scope(value): - """Internal scope that sets the learning phase in eager / tf.function only. - - Args: - value: Learning phase value, either 0 or 1 (integers). - 0 = test, 1 = train - - Yields: - None. - - Raises: - ValueError: if `value` is neither `0` nor `1`. - """ - global _GRAPH_LEARNING_PHASES # pylint: disable=global-variable-not-assigned - assert value in {0, 1} - assert tf.compat.v1.executing_eagerly_outside_functions() - global_learning_phase_was_set = global_learning_phase_is_set() - if global_learning_phase_was_set: - previous_value = learning_phase() - try: - _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, value) - yield - finally: - # Restore learning phase to initial value or unset. + """Internal scope that sets the learning phase in eager / tf.function only. + + Args: + value: Learning phase value, either 0 or 1 (integers). + 0 = test, 1 = train + + Yields: + None. + + Raises: + ValueError: if `value` is neither `0` nor `1`. + """ + global _GRAPH_LEARNING_PHASES + assert value in {0, 1} + assert tf.compat.v1.executing_eagerly_outside_functions() + global_learning_phase_was_set = global_learning_phase_is_set() if global_learning_phase_was_set: - _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, previous_value) - else: - del _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] + previous_value = learning_phase() + try: + _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, value) + yield + finally: + # Restore learning phase to initial value or unset. + if global_learning_phase_was_set: + _internal_set_learning_phase(_DUMMY_EAGER_GRAPH.key, previous_value) + else: + del _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] def _as_graph_element(obj): - """Convert `obj` to a graph element if possible, otherwise return `None`. + """Convert `obj` to a graph element if possible, otherwise return `None`. - Args: - obj: Object to convert. + Args: + obj: Object to convert. - Returns: - The result of `obj._as_graph_element()` if that method is available; - otherwise `None`. - """ - conv_fn = getattr(obj, '_as_graph_element', None) - if conv_fn and callable(conv_fn): - return conv_fn() - return None + Returns: + The result of `obj._as_graph_element()` if that method is available; + otherwise `None`. + """ + conv_fn = getattr(obj, "_as_graph_element", None) + if conv_fn and callable(conv_fn): + return conv_fn() + return None def _assert_same_graph(original_item, item): - """Fail if the 2 items are from different graphs. + """Fail if the 2 items are from different graphs. - Args: - original_item: Original item to check against. - item: Item to check. + Args: + original_item: Original item to check against. + item: Item to check. - Raises: - ValueError: if graphs do not match. - """ - original_graph = getattr(original_item, 'graph', None) - graph = getattr(item, 'graph', None) - if original_graph and graph and original_graph is not graph: - raise ValueError( - '%s must be from the same graph as %s (graphs are %s and %s).' % - (item, original_item, graph, original_graph)) + Raises: + ValueError: if graphs do not match. + """ + original_graph = getattr(original_item, "graph", None) + graph = getattr(item, "graph", None) + if original_graph and graph and original_graph is not graph: + raise ValueError( + "%s must be from the same graph as %s (graphs are %s and %s)." + % (item, original_item, graph, original_graph) + ) def _current_graph(op_input_list, graph=None): - """Returns the appropriate graph to use for the given inputs. - - This library method provides a consistent algorithm for choosing the graph - in which an Operation should be constructed: - - 1. If the default graph is being used to construct a function, we - use the default graph. - 2. If the "graph" is specified explicitly, we validate that all of the inputs - in "op_input_list" are compatible with that graph. - 3. Otherwise, we attempt to select a graph from the first Operation- - or Tensor-valued input in "op_input_list", and validate that all other - such inputs are in the same graph. - 4. If the graph was not specified and it could not be inferred from - "op_input_list", we attempt to use the default graph. - - Args: - op_input_list: A list of inputs to an operation, which may include `Tensor`, - `Operation`, and other objects that may be converted to a graph element. - graph: (Optional) The explicit graph to use. - - Raises: - TypeError: If op_input_list is not a list or tuple, or if graph is not a - Graph. - ValueError: If a graph is explicitly passed and not all inputs are from it, - or if the inputs are from multiple graphs, or we could not find a graph - and there was no default graph. - - Returns: - The appropriate graph to use for the given inputs. - - """ - current_default_graph = tf.compat.v1.get_default_graph() - if current_default_graph.building_function: - return current_default_graph - - op_input_list = tuple(op_input_list) # Handle generators correctly - if graph and not isinstance(graph, tf.Graph): - raise TypeError('Input graph needs to be a Graph: %s' % (graph,)) - - # 1. We validate that all of the inputs are from the same graph. This is - # either the supplied graph parameter, or the first one selected from one - # the graph-element-valued inputs. In the latter case, we hold onto - # that input in original_graph_element so we can provide a more - # informative error if a mismatch is found. - original_graph_element = None - for op_input in op_input_list: - # Determine if this is a valid graph_element. - # TODO(joshl): Note that we exclude subclasses of Tensor. Need to clean this - # up. - if (isinstance(op_input, ( - tf.Operation, tf.Tensor, tf.__internal__.CompositeTensor)) and - ((not isinstance(op_input, tf.Tensor)) - or type(op_input) == tf.Tensor)): # pylint: disable=unidiomatic-typecheck - graph_element = op_input - else: - graph_element = _as_graph_element(op_input) + """Returns the appropriate graph to use for the given inputs. + + This library method provides a consistent algorithm for choosing the graph + in which an Operation should be constructed: + + 1. If the default graph is being used to construct a function, we + use the default graph. + 2. If the "graph" is specified explicitly, we validate that all of the + inputs in "op_input_list" are compatible with that graph. + 3. Otherwise, we attempt to select a graph from the first Operation- + or Tensor-valued input in "op_input_list", and validate that all other + such inputs are in the same graph. + 4. If the graph was not specified and it could not be inferred from + "op_input_list", we attempt to use the default graph. + + Args: + op_input_list: A list of inputs to an operation, which may include + `Tensor`, `Operation`, and other objects that may be converted to a + graph element. + graph: (Optional) The explicit graph to use. + + Raises: + TypeError: If op_input_list is not a list or tuple, or if graph is not a + Graph. + ValueError: If a graph is explicitly passed and not all inputs are from + it, or if the inputs are from multiple graphs, or we could not find a + graph and there was no default graph. + + Returns: + The appropriate graph to use for the given inputs. + + """ + current_default_graph = tf.compat.v1.get_default_graph() + if current_default_graph.building_function: + return current_default_graph + + op_input_list = tuple(op_input_list) # Handle generators correctly + if graph and not isinstance(graph, tf.Graph): + raise TypeError(f"Input graph needs to be a Graph: {graph}") + + def _is_symbolic_tensor(tensor): + if hasattr(tf, "is_symbolic_tensor"): + return tf.is_symbolic_tensor(tensor) + return type(tensor) == tf.Tensor + + # 1. We validate that all of the inputs are from the same graph. This is + # either the supplied graph parameter, or the first one selected from one + # the graph-element-valued inputs. In the latter case, we hold onto + # that input in original_graph_element so we can provide a more + # informative error if a mismatch is found. + original_graph_element = None + for op_input in op_input_list: + if isinstance( + op_input, (tf.Operation, tf.__internal__.CompositeTensor) + ) or _is_symbolic_tensor(op_input): + graph_element = op_input + else: + graph_element = _as_graph_element(op_input) - if graph_element is not None: - if not graph: - original_graph_element = graph_element - graph = getattr(graph_element, 'graph', None) - elif original_graph_element is not None: - _assert_same_graph(original_graph_element, graph_element) - elif graph_element.graph is not graph: - raise ValueError('%s is not from the passed-in graph.' % graph_element) + if graph_element is not None: + if not graph: + original_graph_element = graph_element + graph = getattr(graph_element, "graph", None) + elif original_graph_element is not None: + _assert_same_graph(original_graph_element, graph_element) + elif graph_element.graph is not graph: + raise ValueError( + f"{graph_element} is not from the passed-in graph." + ) - # 2. If all else fails, we use the default graph, which is always there. - return graph or current_default_graph + # 2. If all else fails, we use the default graph, which is always there. + return graph or current_default_graph def _get_session(op_input_list=()): - """Returns the session object for the current thread.""" - global _SESSION - default_session = tf.compat.v1.get_default_session() - if default_session is not None: - session = default_session - else: - if tf.inside_function(): - raise RuntimeError('Cannot get session inside Tensorflow graph function.') - # If we don't have a session, or that session does not match the current - # graph, create and cache a new session. - if (getattr(_SESSION, 'session', None) is None or - _SESSION.session.graph is not _current_graph(op_input_list)): - # If we are creating the Session inside a tf.distribute.Strategy scope, - # we ask the strategy for the right session options to use. - if tf.distribute.has_strategy(): - configure_and_create_distributed_session( - tf.distribute.get_strategy()) - else: - _SESSION.session = tf.compat.v1.Session( - config=get_default_session_config()) - session = _SESSION.session - return session - - -@keras_export(v1=['keras.backend.get_session']) + """Returns the session object for the current thread.""" + global _SESSION + default_session = tf.compat.v1.get_default_session() + if default_session is not None: + session = default_session + else: + if tf.inside_function(): + raise RuntimeError( + "Cannot get session inside Tensorflow graph function." + ) + # If we don't have a session, or that session does not match the current + # graph, create and cache a new session. + if getattr( + _SESSION, "session", None + ) is None or _SESSION.session.graph is not _current_graph( + op_input_list + ): + # If we are creating the Session inside a tf.distribute.Strategy + # scope, we ask the strategy for the right session options to use. + if tf.distribute.has_strategy(): + configure_and_create_distributed_session( + tf.distribute.get_strategy() + ) + else: + _SESSION.session = tf.compat.v1.Session( + config=get_default_session_config() + ) + session = _SESSION.session + return session + + +@keras_export(v1=["keras.backend.get_session"]) def get_session(op_input_list=()): - """Returns the TF session to be used by the backend. + """Returns the TF session to be used by the backend. - If a default TensorFlow session is available, we will return it. + If a default TensorFlow session is available, we will return it. - Else, we will return the global Keras session assuming it matches - the current graph. + Else, we will return the global Keras session assuming it matches + the current graph. - If no global Keras session exists at this point: - we will create a new global session. + If no global Keras session exists at this point: + we will create a new global session. - Note that you can manually set the global session - via `K.set_session(sess)`. + Note that you can manually set the global session + via `K.set_session(sess)`. + + Args: + op_input_list: An option sequence of tensors or ops, which will be used + to determine the current graph. Otherwise the default graph will be + used. - Args: - op_input_list: An option sequence of tensors or ops, which will be used - to determine the current graph. Otherwise the default graph will be - used. + Returns: + A TensorFlow session. + """ + session = _get_session(op_input_list) + if not _MANUAL_VAR_INIT: + with session.graph.as_default(): + _initialize_variables(session) + return session - Returns: - A TensorFlow session. - """ - session = _get_session(op_input_list) - if not _MANUAL_VAR_INIT: - with session.graph.as_default(): - _initialize_variables(session) - return session # Inject the get_session function to keras_deps to remove the dependency # from TFLite to Keras. @@ -773,980 +797,1018 @@ def get_session(op_input_list=()): def get_graph(): - if tf.executing_eagerly(): - global _GRAPH - if not getattr(_GRAPH, 'graph', None): - _GRAPH.graph = tf.__internal__.FuncGraph('keras_graph') - return _GRAPH.graph - else: - return tf.compat.v1.get_default_graph() + if tf.executing_eagerly(): + global _GRAPH + if not getattr(_GRAPH, "graph", None): + _GRAPH.graph = tf.__internal__.FuncGraph("keras_graph") + return _GRAPH.graph + else: + return tf.compat.v1.get_default_graph() @tf_contextlib.contextmanager def _scratch_graph(graph=None): - """Retrieve a shared and temporary func graph. - - The eager execution path lifts a subgraph from the keras global graph into - a scratch graph in order to create a function. DistributionStrategies, in - turn, constructs multiple functions as well as a final combined function. In - order for that logic to work correctly, all of the functions need to be - created on the same scratch FuncGraph. - - Args: - graph: A graph to be used as the current scratch graph. If not set then - a scratch graph will either be retrieved or created: - - Yields: - The current scratch graph. - """ - global _CURRENT_SCRATCH_GRAPH - scratch_graph = getattr(_CURRENT_SCRATCH_GRAPH, 'graph', None) - # If scratch graph and `graph` are both configured, they must match. - if (scratch_graph is not None and graph is not None and - scratch_graph is not graph): - raise ValueError('Multiple scratch graphs specified.') - - if scratch_graph: - yield scratch_graph - return - - graph = graph or tf.__internal__.FuncGraph('keras_scratch_graph') - try: - _CURRENT_SCRATCH_GRAPH.graph = graph - yield graph - finally: - _CURRENT_SCRATCH_GRAPH.graph = None - - -@keras_export(v1=['keras.backend.set_session']) + """Retrieve a shared and temporary func graph. + + The eager execution path lifts a subgraph from the keras global graph into + a scratch graph in order to create a function. DistributionStrategies, in + turn, constructs multiple functions as well as a final combined function. In + order for that logic to work correctly, all of the functions need to be + created on the same scratch FuncGraph. + + Args: + graph: A graph to be used as the current scratch graph. If not set then + a scratch graph will either be retrieved or created: + + Yields: + The current scratch graph. + """ + global _CURRENT_SCRATCH_GRAPH + scratch_graph = getattr(_CURRENT_SCRATCH_GRAPH, "graph", None) + # If scratch graph and `graph` are both configured, they must match. + if ( + scratch_graph is not None + and graph is not None + and scratch_graph is not graph + ): + raise ValueError("Multiple scratch graphs specified.") + + if scratch_graph: + yield scratch_graph + return + + graph = graph or tf.__internal__.FuncGraph("keras_scratch_graph") + try: + _CURRENT_SCRATCH_GRAPH.graph = graph + yield graph + finally: + _CURRENT_SCRATCH_GRAPH.graph = None + + +@keras_export(v1=["keras.backend.set_session"]) def set_session(session): - """Sets the global TensorFlow session. + """Sets the global TensorFlow session. - Args: - session: A TF Session. - """ - global _SESSION - _SESSION.session = session + Args: + session: A TF Session. + """ + global _SESSION + _SESSION.session = session def get_default_session_config(): - if os.environ.get('OMP_NUM_THREADS'): - logging.warning( - 'OMP_NUM_THREADS is no longer used by the default Keras config. ' - 'To configure the number of threads, use tf.config.threading APIs.') + if os.environ.get("OMP_NUM_THREADS"): + logging.warning( + "OMP_NUM_THREADS is no longer used by the default Keras config. " + "To configure the number of threads, use tf.config.threading APIs." + ) - config = get_config() - config.allow_soft_placement = True + config = get_config() + config.allow_soft_placement = True - return config + return config def get_default_graph_uid_map(): - graph = tf.compat.v1.get_default_graph() - name_uid_map = PER_GRAPH_OBJECT_NAME_UIDS.get(graph, None) - if name_uid_map is None: - name_uid_map = collections.defaultdict(int) - PER_GRAPH_OBJECT_NAME_UIDS[graph] = name_uid_map - return name_uid_map + graph = tf.compat.v1.get_default_graph() + name_uid_map = PER_GRAPH_OBJECT_NAME_UIDS.get(graph, None) + if name_uid_map is None: + name_uid_map = collections.defaultdict(int) + PER_GRAPH_OBJECT_NAME_UIDS[graph] = name_uid_map + return name_uid_map # DEVICE MANIPULATION class _TfDeviceCaptureOp: - """Class for capturing the TF device scope.""" + """Class for capturing the TF device scope.""" - def __init__(self): - self.device = None + def __init__(self): + self.device = None - def _set_device(self, device): - """This method captures TF's explicit device scope setting.""" - if isinstance(device, tf.DeviceSpec): - device = device.to_string() - self.device = device + def _set_device(self, device): + """This method captures TF's explicit device scope setting.""" + if isinstance(device, tf.DeviceSpec): + device = device.to_string() + self.device = device - def _set_device_from_string(self, device_str): - self.device = device_str + def _set_device_from_string(self, device_str): + self.device = device_str def _get_current_tf_device(): - """Return explicit device of current context, otherwise returns `None`. - - Returns: - If the current device scope is explicitly set, it returns a string with - the device (`CPU` or `GPU`). If the scope is not explicitly set, it will - return `None`. - """ - graph = get_graph() - op = _TfDeviceCaptureOp() - graph._apply_device_functions(op) - if tf.__internal__.tf2.enabled(): - return tf.DeviceSpec.from_string(op.device) - else: - return tf.compat.v1.DeviceSpec.from_string(op.device) + """Return explicit device of current context, otherwise returns `None`. + + Returns: + If the current device scope is explicitly set, it returns a string with + the device (`CPU` or `GPU`). If the scope is not explicitly set, it will + return `None`. + """ + graph = get_graph() + op = _TfDeviceCaptureOp() + graph._apply_device_functions(op) + if tf.__internal__.tf2.enabled(): + return tf.DeviceSpec.from_string(op.device) + else: + return tf.compat.v1.DeviceSpec.from_string(op.device) def _is_current_explicit_device(device_type): - """Check if the current device is explicitly set on the device type specified. + """Check if the current device is explicitly set to `device_type`. - Args: - device_type: A string containing `GPU` or `CPU` (case-insensitive). + Args: + device_type: A string containing `GPU` or `CPU` (case-insensitive). - Returns: - A boolean indicating if the current device scope is explicitly set on the - device type. + Returns: + A boolean indicating if the current device scope is explicitly set on + the device type. - Raises: - ValueError: If the `device_type` string indicates an unsupported device. - """ - device_type = device_type.upper() - if device_type not in ['CPU', 'GPU']: - raise ValueError('`device_type` should be either "CPU" or "GPU".') - device = _get_current_tf_device() - return device is not None and device.device_type == device_type.upper() + Raises: + ValueError: If the `device_type` string indicates an unsupported device. + """ + device_type = device_type.upper() + if device_type not in ["CPU", "GPU"]: + raise ValueError('`device_type` should be either "CPU" or "GPU".') + device = _get_current_tf_device() + return device is not None and device.device_type == device_type.upper() def _get_available_gpus(): - """Get a list of available GPU devices (formatted as strings). + """Get a list of available GPU devices (formatted as strings). - Returns: - A list of available GPU devices. - """ - if tf.compat.v1.executing_eagerly_outside_functions(): - # Returns names of devices directly. - return [d.name for d in tf.config.list_logical_devices('GPU')] + Returns: + A list of available GPU devices. + """ + if tf.compat.v1.executing_eagerly_outside_functions(): + # Returns names of devices directly. + return [d.name for d in tf.config.list_logical_devices("GPU")] - global _LOCAL_DEVICES - if _LOCAL_DEVICES is None: - _LOCAL_DEVICES = get_session().list_devices() - return [x.name for x in _LOCAL_DEVICES if x.device_type == 'GPU'] + global _LOCAL_DEVICES + if _LOCAL_DEVICES is None: + _LOCAL_DEVICES = get_session().list_devices() + return [x.name for x in _LOCAL_DEVICES if x.device_type == "GPU"] def _has_nchw_support(): - """Check whether the current scope supports NCHW ops. + """Check whether the current scope supports NCHW ops. - TensorFlow does not support NCHW on CPU. Therefore we check if we are not - explicitly put on - CPU, and have GPUs available. In this case there will be soft-placing on the - GPU device. + TensorFlow does not support NCHW on CPU. Therefore we check if we are not + explicitly put on + CPU, and have GPUs available. In this case there will be soft-placing on the + GPU device. - Returns: - bool: if the current scope device placement would support nchw - """ - explicitly_on_cpu = _is_current_explicit_device('CPU') - gpus_available = bool(_get_available_gpus()) - return not explicitly_on_cpu and gpus_available + Returns: + bool: if the current scope device placement would support nchw + """ + explicitly_on_cpu = _is_current_explicit_device("CPU") + gpus_available = bool(_get_available_gpus()) + return not explicitly_on_cpu and gpus_available # VARIABLE MANIPULATION def _constant_to_tensor(x, dtype): - """Convert the input `x` to a tensor of type `dtype`. + """Convert the input `x` to a tensor of type `dtype`. - This is slightly faster than the _to_tensor function, at the cost of - handling fewer cases. + This is slightly faster than the _to_tensor function, at the cost of + handling fewer cases. - Args: - x: An object to be converted (numpy arrays, floats, ints and lists of - them). - dtype: The destination type. + Args: + x: An object to be converted (numpy arrays, floats, ints and lists of + them). + dtype: The destination type. - Returns: - A tensor. - """ - return tf.constant(x, dtype=dtype) + Returns: + A tensor. + """ + return tf.constant(x, dtype=dtype) def _to_tensor(x, dtype): - """Convert the input `x` to a tensor of type `dtype`. + """Convert the input `x` to a tensor of type `dtype`. - Args: - x: An object to be converted (numpy array, list, tensors). - dtype: The destination type. + Args: + x: An object to be converted (numpy array, list, tensors). + dtype: The destination type. - Returns: - A tensor. - """ - return tf.convert_to_tensor(x, dtype=dtype) + Returns: + A tensor. + """ + return tf.convert_to_tensor(x, dtype=dtype) -@keras_export('keras.backend.is_sparse') +@keras_export("keras.backend.is_sparse") @doc_controls.do_not_generate_docs def is_sparse(tensor): - """Returns whether a tensor is a sparse tensor. + """Returns whether a tensor is a sparse tensor. - Args: - tensor: A tensor instance. + Args: + tensor: A tensor instance. - Returns: - A boolean. + Returns: + A boolean. - Example: + Example: - >>> a = tf.keras.backend.placeholder((2, 2), sparse=False) - >>> print(tf.keras.backend.is_sparse(a)) - False - >>> b = tf.keras.backend.placeholder((2, 2), sparse=True) - >>> print(tf.keras.backend.is_sparse(b)) - True + >>> a = tf.keras.backend.placeholder((2, 2), sparse=False) + >>> print(tf.keras.backend.is_sparse(a)) + False + >>> b = tf.keras.backend.placeholder((2, 2), sparse=True) + >>> print(tf.keras.backend.is_sparse(b)) + True - """ - spec = getattr(tensor, '_type_spec', None) - if spec is not None: - return isinstance(spec, tf.SparseTensorSpec) - return isinstance(tensor, tf.SparseTensor) + """ + spec = getattr(tensor, "_type_spec", None) + if spec is not None: + return isinstance(spec, tf.SparseTensorSpec) + return isinstance(tensor, tf.SparseTensor) -@keras_export('keras.backend.to_dense') +@keras_export("keras.backend.to_dense") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def to_dense(tensor): - """Converts a sparse tensor into a dense tensor and returns it. + """Converts a sparse tensor into a dense tensor and returns it. - Args: - tensor: A tensor instance (potentially sparse). + Args: + tensor: A tensor instance (potentially sparse). - Returns: - A dense tensor. + Returns: + A dense tensor. - Examples: + Examples: - >>> b = tf.keras.backend.placeholder((2, 2), sparse=True) - >>> print(tf.keras.backend.is_sparse(b)) - True - >>> c = tf.keras.backend.to_dense(b) - >>> print(tf.keras.backend.is_sparse(c)) - False + >>> b = tf.keras.backend.placeholder((2, 2), sparse=True) + >>> print(tf.keras.backend.is_sparse(b)) + True + >>> c = tf.keras.backend.to_dense(b) + >>> print(tf.keras.backend.is_sparse(c)) + False - """ - if is_sparse(tensor): - return tf.sparse.to_dense(tensor) - else: - return tensor + """ + if is_sparse(tensor): + return tf.sparse.to_dense(tensor) + else: + return tensor -@keras_export('keras.backend.name_scope', v1=[]) +@keras_export("keras.backend.name_scope", v1=[]) @doc_controls.do_not_generate_docs def name_scope(name): - """A context manager for use when defining a Python op. + """A context manager for use when defining a Python op. - This context manager pushes a name scope, which will make the name of all - operations added within it have a prefix. + This context manager pushes a name scope, which will make the name of all + operations added within it have a prefix. - For example, to define a new Python op called `my_op`: + For example, to define a new Python op called `my_op`: - def my_op(a): - with tf.name_scope("MyOp") as scope: - a = tf.convert_to_tensor(a, name="a") - # Define some computation that uses `a`. - return foo_op(..., name=scope) + def my_op(a): + with tf.name_scope("MyOp") as scope: + a = tf.convert_to_tensor(a, name="a") + # Define some computation that uses `a`. + return foo_op(..., name=scope) - When executed, the Tensor `a` will have the name `MyOp/a`. + When executed, the Tensor `a` will have the name `MyOp/a`. + + Args: + name: The prefix to use on all names created within the name scope. - Args: - name: The prefix to use on all names created within the name scope. + Returns: + Name scope context manager. + """ + return tf.name_scope(name) - Returns: - Name scope context manager. - """ - return tf.name_scope(name) # Export V1 version. _v1_name_scope = tf.compat.v1.name_scope -keras_export(v1=['keras.backend.name_scope'], allow_multiple_exports=True)(_v1_name_scope) +keras_export(v1=["keras.backend.name_scope"])(_v1_name_scope) -@keras_export('keras.backend.variable') +@keras_export("keras.backend.variable") @doc_controls.do_not_generate_docs def variable(value, dtype=None, name=None, constraint=None): - """Instantiates a variable and returns it. - - Args: - value: Numpy array, initial value of the tensor. - dtype: Tensor type. - name: Optional name string for the tensor. - constraint: Optional projection function to be - applied to the variable after an optimizer update. - - Returns: - A variable instance (with Keras metadata included). - - Examples: - - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = tf.keras.backend.variable(value=val, dtype='float64', - ... name='example_var') - >>> tf.keras.backend.dtype(kvar) - 'float64' - >>> print(kvar) - - - """ - if dtype is None: - dtype = floatx() - if hasattr(value, 'tocoo'): - sparse_coo = value.tocoo() - indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims( - sparse_coo.col, 1)), 1) - v = tf.SparseTensor( - indices=indices, values=sparse_coo.data, dense_shape=sparse_coo.shape) - v._keras_shape = sparse_coo.shape + """Instantiates a variable and returns it. + + Args: + value: Numpy array, initial value of the tensor. + dtype: Tensor type. + name: Optional name string for the tensor. + constraint: Optional projection function to be + applied to the variable after an optimizer update. + + Returns: + A variable instance (with Keras metadata included). + + Examples: + + >>> val = np.array([[1, 2], [3, 4]]) + >>> kvar = tf.keras.backend.variable(value=val, dtype='float64', + ... name='example_var') + >>> tf.keras.backend.dtype(kvar) + 'float64' + >>> print(kvar) + + + """ + if dtype is None: + dtype = floatx() + if hasattr(value, "tocoo"): + sparse_coo = value.tocoo() + indices = np.concatenate( + ( + np.expand_dims(sparse_coo.row, 1), + np.expand_dims(sparse_coo.col, 1), + ), + 1, + ) + v = tf.SparseTensor( + indices=indices, + values=sparse_coo.data, + dense_shape=sparse_coo.shape, + ) + v._keras_shape = sparse_coo.shape + return v + v = tf.Variable( + value, dtype=tf.as_dtype(dtype), name=name, constraint=constraint + ) + if isinstance(value, np.ndarray): + v._keras_shape = value.shape + elif hasattr(value, "shape"): + v._keras_shape = int_shape(value) + track_variable(v) return v - v = tf.Variable( - value, - dtype=tf.as_dtype(dtype), - name=name, - constraint=constraint) - if isinstance(value, np.ndarray): - v._keras_shape = value.shape - elif hasattr(value, 'shape'): - v._keras_shape = int_shape(value) - track_variable(v) - return v def track_tf_optimizer(tf_optimizer): - """Tracks the given TF optimizer for initialization of its variables.""" - if tf.executing_eagerly(): - return - optimizers = _GRAPH_TF_OPTIMIZERS[None] - optimizers.add(tf_optimizer) + """Tracks the given TF optimizer for initialization of its variables.""" + if tf.executing_eagerly(): + return + optimizers = _GRAPH_TF_OPTIMIZERS[None] + optimizers.add(tf_optimizer) -@keras_export('keras.__internal__.backend.track_variable', v1=[]) +@keras_export("keras.__internal__.backend.track_variable", v1=[]) def track_variable(v): - """Tracks the given variable for initialization.""" - if tf.executing_eagerly(): - return - graph = v.graph if hasattr(v, 'graph') else get_graph() - _GRAPH_VARIABLES[graph].add(v) + """Tracks the given variable for initialization.""" + if tf.executing_eagerly(): + return + graph = v.graph if hasattr(v, "graph") else get_graph() + _GRAPH_VARIABLES[graph].add(v) def observe_object_name(name): - """Observe a name and make sure it won't be used by `unique_object_name`.""" - OBSERVED_NAMES.add(name) - - -def unique_object_name(name, - name_uid_map=None, - avoid_names=None, - namespace='', - zero_based=False, - avoid_observed_names=False): - """Makes a object name (or arbitrary string) unique within a TensorFlow graph. - - Args: - name: String name to make unique. - name_uid_map: An optional defaultdict(int) to use when creating unique - names. If None (default), uses a per-Graph dictionary. - avoid_names: An optional set or dict with names which should not be used. If - None (default), don't avoid any names unless `avoid_observed_names` is - True. - namespace: Gets a name which is unique within the (graph, namespace). Layers - which are not Networks use a blank namespace and so get graph-global - names. - zero_based: If True, name sequences start with no suffix (e.g. "dense", - "dense_1"). If False, naming is one-based ("dense_1", "dense_2"). - avoid_observed_names: If True, avoid any names that have been observed by - `backend.observe_object_name`. - - Returns: - Unique string name. - - Example: - - - unique_object_name('dense') # dense_1 - unique_object_name('dense') # dense_2 - - """ - if name_uid_map is None: - name_uid_map = get_default_graph_uid_map() - if avoid_names is None: - if avoid_observed_names: - avoid_names = OBSERVED_NAMES - else: - avoid_names = set() - proposed_name = None - while proposed_name is None or proposed_name in avoid_names: - name_key = (namespace, name) - if zero_based: - number = name_uid_map[name_key] - if number: - proposed_name = name + '_' + str(number) - else: - proposed_name = name - name_uid_map[name_key] += 1 - else: - name_uid_map[name_key] += 1 - proposed_name = name + '_' + str(name_uid_map[name_key]) - return proposed_name + """Observe a name and make sure it won't be used by `unique_object_name`.""" + OBSERVED_NAMES.add(name) + + +def unique_object_name( + name, + name_uid_map=None, + avoid_names=None, + namespace="", + zero_based=False, + avoid_observed_names=False, +): + """Makes a object name (or any string) unique within a Keras session. + + Args: + name: String name to make unique. + name_uid_map: An optional defaultdict(int) to use when creating unique + names. If None (default), uses a per-Graph dictionary. + avoid_names: An optional set or dict with names which should not be used. + If None (default), don't avoid any names unless `avoid_observed_names` + is True. + namespace: Gets a name which is unique within the (graph, namespace). + Layers which are not Networks use a blank namespace and so get + graph-global names. + zero_based: If True, name sequences start with no suffix (e.g. "dense", + "dense_1"). If False, naming is one-based ("dense_1", "dense_2"). + avoid_observed_names: If True, avoid any names that have been observed by + `backend.observe_object_name`. + + Returns: + Unique string name. + + Example: + + + unique_object_name('dense') # dense_1 + unique_object_name('dense') # dense_2 + + """ + if name_uid_map is None: + name_uid_map = get_default_graph_uid_map() + if avoid_names is None: + if avoid_observed_names: + avoid_names = OBSERVED_NAMES + else: + avoid_names = set() + proposed_name = None + while proposed_name is None or proposed_name in avoid_names: + name_key = (namespace, name) + if zero_based: + number = name_uid_map[name_key] + if number: + proposed_name = name + "_" + str(number) + else: + proposed_name = name + name_uid_map[name_key] += 1 + else: + name_uid_map[name_key] += 1 + proposed_name = name + "_" + str(name_uid_map[name_key]) + return proposed_name def _get_variables(graph=None): - """Returns variables corresponding to the given graph for initialization.""" - assert not tf.executing_eagerly() - variables = _GRAPH_VARIABLES[graph] - for opt in _GRAPH_TF_OPTIMIZERS[graph]: - variables.update(opt.optimizer.variables()) - return variables + """Returns variables corresponding to the given graph for initialization.""" + assert not tf.executing_eagerly() + variables = _GRAPH_VARIABLES[graph] + for opt in _GRAPH_TF_OPTIMIZERS[graph]: + variables.update(opt.optimizer.variables()) + return variables -@keras_export('keras.__internal__.backend.initialize_variables', v1=[]) +@keras_export("keras.__internal__.backend.initialize_variables", v1=[]) def _initialize_variables(session): - """Utility to initialize uninitialized variables on the fly.""" - variables = _get_variables(get_graph()) - candidate_vars = [] - for v in variables: - if not getattr(v, '_keras_initialized', False): - candidate_vars.append(v) - if candidate_vars: - # This step is expensive, so we only run it on variables not already - # marked as initialized. - is_initialized = session.run( - [tf.compat.v1.is_variable_initialized(v) for v in candidate_vars]) - # TODO(kathywu): Some metric variables loaded from SavedModel are never - # actually used, and do not have an initializer. - should_be_initialized = [ - (not is_initialized[n]) and v.initializer is not None - for n, v in enumerate(candidate_vars)] - uninitialized_vars = [] - for flag, v in zip(should_be_initialized, candidate_vars): - if flag: - uninitialized_vars.append(v) - v._keras_initialized = True - if uninitialized_vars: - session.run(tf.compat.v1.variables_initializer(uninitialized_vars)) - - -@keras_export('keras.backend.constant') + """Utility to initialize uninitialized variables on the fly.""" + variables = _get_variables(get_graph()) + candidate_vars = [] + for v in variables: + if not getattr(v, "_keras_initialized", False): + candidate_vars.append(v) + if candidate_vars: + # This step is expensive, so we only run it on variables not already + # marked as initialized. + is_initialized = session.run( + [tf.compat.v1.is_variable_initialized(v) for v in candidate_vars] + ) + # TODO(kathywu): Some metric variables loaded from SavedModel are never + # actually used, and do not have an initializer. + should_be_initialized = [ + (not is_initialized[n]) and v.initializer is not None + for n, v in enumerate(candidate_vars) + ] + uninitialized_vars = [] + for flag, v in zip(should_be_initialized, candidate_vars): + if flag: + uninitialized_vars.append(v) + v._keras_initialized = True + if uninitialized_vars: + session.run(tf.compat.v1.variables_initializer(uninitialized_vars)) + + +@keras_export("keras.backend.constant") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def constant(value, dtype=None, shape=None, name=None): - """Creates a constant tensor. + """Creates a constant tensor. - Args: - value: A constant value (or list) - dtype: The type of the elements of the resulting tensor. - shape: Optional dimensions of resulting tensor. - name: Optional name for the tensor. + Args: + value: A constant value (or list) + dtype: The type of the elements of the resulting tensor. + shape: Optional dimensions of resulting tensor. + name: Optional name for the tensor. - Returns: - A Constant Tensor. - """ - if dtype is None: - dtype = floatx() + Returns: + A Constant Tensor. + """ + if dtype is None: + dtype = floatx() - return tf.constant(value, dtype=dtype, shape=shape, name=name) + return tf.constant(value, dtype=dtype, shape=shape, name=name) -@keras_export('keras.backend.is_keras_tensor') +@keras_export("keras.backend.is_keras_tensor") def is_keras_tensor(x): - """Returns whether `x` is a Keras tensor. - - A "Keras tensor" is a tensor that was returned by a Keras layer, - (`Layer` class) or by `Input`. - - Args: - x: A candidate tensor. - - Returns: - A boolean: Whether the argument is a Keras tensor. - - Raises: - ValueError: In case `x` is not a symbolic tensor. - - Examples: - - >>> np_var = np.array([1, 2]) - >>> # A numpy array is not a symbolic tensor. - >>> tf.keras.backend.is_keras_tensor(np_var) - Traceback (most recent call last): - ... - ValueError: Unexpectedly found an instance of type ``. - Expected a symbolic tensor instance. - >>> keras_var = tf.keras.backend.variable(np_var) - >>> # A variable created with the keras backend is not a Keras tensor. - >>> tf.keras.backend.is_keras_tensor(keras_var) - False - >>> keras_placeholder = tf.keras.backend.placeholder(shape=(2, 4, 5)) - >>> # A placeholder is a Keras tensor. - >>> tf.keras.backend.is_keras_tensor(keras_placeholder) - True - >>> keras_input = tf.keras.layers.Input([10]) - >>> # An Input is a Keras tensor. - >>> tf.keras.backend.is_keras_tensor(keras_input) - True - >>> keras_layer_output = tf.keras.layers.Dense(10)(keras_input) - >>> # Any Keras layer output is a Keras tensor. - >>> tf.keras.backend.is_keras_tensor(keras_layer_output) - True - - """ - if not isinstance(x, - (tf.Tensor, tf.Variable, - tf.SparseTensor, tf.RaggedTensor, - keras_tensor.KerasTensor)): - raise ValueError('Unexpectedly found an instance of type `' + str(type(x)) + - '`. Expected a symbolic tensor instance.') - if tf.compat.v1.executing_eagerly_outside_functions(): - return isinstance(x, keras_tensor.KerasTensor) - return hasattr(x, '_keras_history') - - -@keras_export('keras.backend.placeholder') -@doc_controls.do_not_generate_docs -def placeholder(shape=None, - ndim=None, - dtype=None, - sparse=False, - name=None, - ragged=False): - """Instantiates a placeholder tensor and returns it. - - Args: - shape: Shape of the placeholder - (integer tuple, may include `None` entries). - ndim: Number of axes of the tensor. - At least one of {`shape`, `ndim`} must be specified. - If both are specified, `shape` is used. - dtype: Placeholder type. - sparse: Boolean, whether the placeholder should have a sparse type. - name: Optional name string for the placeholder. - ragged: Boolean, whether the placeholder should have a ragged type. - In this case, values of 'None' in the 'shape' argument represent - ragged dimensions. For more information about RaggedTensors, see this - [guide](https://www.tensorflow.org/guide/ragged_tensors). - - Raises: - ValueError: If called with sparse = True and ragged = True. - - Returns: - Tensor instance (with Keras metadata included). - - Examples: - - - >>> input_ph = tf.keras.backend.placeholder(shape=(2, 4, 5)) - >>> input_ph - - - """ - if sparse and ragged: - raise ValueError( - 'Cannot set both sparse and ragged to True when creating a placeholder.' - ) - if dtype is None: - dtype = floatx() - if not shape: - if ndim: - shape = (None,) * ndim - if tf.compat.v1.executing_eagerly_outside_functions(): - if sparse: - spec = tf.SparseTensorSpec( - shape=shape, dtype=dtype) - elif ragged: - ragged_rank = 0 - for i in range(1, len(shape)): - # Hacky because could be tensorshape or tuple maybe? - # Or just tensorshape? - if shape[i] is None or ( - hasattr(shape[i], 'value') and - shape[i].value is None): - ragged_rank = i - spec = tf.RaggedTensorSpec( - shape=shape, dtype=dtype, ragged_rank=ragged_rank) - else: - spec = tf.TensorSpec( - shape=shape, dtype=dtype, name=name) - x = keras_tensor.keras_tensor_from_type_spec(spec, name=name) - else: - with get_graph().as_default(): - if sparse: - x = tf.compat.v1.sparse_placeholder(dtype, shape=shape, name=name) - elif ragged: - ragged_rank = 0 - for i in range(1, len(shape)): - if shape[i] is None: - ragged_rank = i - type_spec = tf.RaggedTensorSpec( - shape=shape, dtype=dtype, ragged_rank=ragged_rank) - def tensor_spec_to_placeholder(tensorspec): - return tf.compat.v1.placeholder(tensorspec.dtype, tensorspec.shape) - x = tf.nest.map_structure(tensor_spec_to_placeholder, type_spec, - expand_composites=True) - else: - x = tf.compat.v1.placeholder(dtype, shape=shape, name=name) - - if tf.executing_eagerly(): - # Add keras_history connectivity information to the placeholder - # when the placeholder is built in a top-level eager context - # (intended to be used with keras.backend.function) - from keras.engine import input_layer # pylint: disable=g-import-not-at-top - x = input_layer.Input(tensor=x) - x._is_backend_placeholder = True - - return x + """Returns whether `x` is a Keras tensor. + A "Keras tensor" is a tensor that was returned by a Keras layer, + (`Layer` class) or by `Input`. -def is_placeholder(x): - """Returns whether `x` is a placeholder. + Args: + x: A candidate tensor. + + Returns: + A boolean: Whether the argument is a Keras tensor. + + Raises: + ValueError: In case `x` is not a symbolic tensor. + + Examples: + + >>> np_var = np.array([1, 2]) + >>> # A numpy array is not a symbolic tensor. + >>> tf.keras.backend.is_keras_tensor(np_var) + Traceback (most recent call last): + ... + ValueError: Unexpectedly found an instance of type + ``. + Expected a symbolic tensor instance. + >>> keras_var = tf.keras.backend.variable(np_var) + >>> # A variable created with the keras backend is not a Keras tensor. + >>> tf.keras.backend.is_keras_tensor(keras_var) + False + >>> keras_placeholder = tf.keras.backend.placeholder(shape=(2, 4, 5)) + >>> # A placeholder is a Keras tensor. + >>> tf.keras.backend.is_keras_tensor(keras_placeholder) + True + >>> keras_input = tf.keras.layers.Input([10]) + >>> # An Input is a Keras tensor. + >>> tf.keras.backend.is_keras_tensor(keras_input) + True + >>> keras_layer_output = tf.keras.layers.Dense(10)(keras_input) + >>> # Any Keras layer output is a Keras tensor. + >>> tf.keras.backend.is_keras_tensor(keras_layer_output) + True + + """ + if not isinstance( + x, + ( + tf.Tensor, + tf.Variable, + tf.SparseTensor, + tf.RaggedTensor, + keras_tensor.KerasTensor, + ), + ): + raise ValueError( + "Unexpectedly found an instance of type `" + + str(type(x)) + + "`. Expected a symbolic tensor instance." + ) + if tf.compat.v1.executing_eagerly_outside_functions(): + return isinstance(x, keras_tensor.KerasTensor) + return hasattr(x, "_keras_history") + + +@keras_export("keras.backend.placeholder") +@doc_controls.do_not_generate_docs +def placeholder( + shape=None, ndim=None, dtype=None, sparse=False, name=None, ragged=False +): + """Instantiates a placeholder tensor and returns it. + + Args: + shape: Shape of the placeholder + (integer tuple, may include `None` entries). + ndim: Number of axes of the tensor. + At least one of {`shape`, `ndim`} must be specified. + If both are specified, `shape` is used. + dtype: Placeholder type. + sparse: Boolean, whether the placeholder should have a sparse type. + name: Optional name string for the placeholder. + ragged: Boolean, whether the placeholder should have a ragged type. + In this case, values of 'None' in the 'shape' argument represent + ragged dimensions. For more information about RaggedTensors, see + this [guide](https://www.tensorflow.org/guide/ragged_tensor). + + Raises: + ValueError: If called with sparse = True and ragged = True. + + Returns: + Tensor instance (with Keras metadata included). + + Examples: - Args: - x: A candidate placeholder. - Returns: - Boolean. - """ - try: + >>> input_ph = tf.keras.backend.placeholder(shape=(2, 4, 5)) + >>> input_ph + + + """ + if sparse and ragged: + raise ValueError( + "Cannot set both sparse and ragged to " + "True when creating a placeholder." + ) + if dtype is None: + dtype = floatx() + if not shape: + if ndim: + shape = (None,) * ndim if tf.compat.v1.executing_eagerly_outside_functions(): - return hasattr(x, '_is_backend_placeholder') - from keras.utils import tf_utils # pylint: disable=g-import-not-at-top - if tf_utils.is_extension_type(x): - flat_components = tf.nest.flatten(x, expand_composites=True) - return py_any(is_placeholder(c) for c in flat_components) + if sparse: + spec = tf.SparseTensorSpec(shape=shape, dtype=dtype) + elif ragged: + ragged_rank = 0 + for i in range(1, len(shape)): + # Hacky because could be tensorshape or tuple maybe? + # Or just tensorshape? + if shape[i] is None or ( + hasattr(shape[i], "value") and shape[i].value is None + ): + ragged_rank = i + spec = tf.RaggedTensorSpec( + shape=shape, dtype=dtype, ragged_rank=ragged_rank + ) + else: + spec = tf.TensorSpec(shape=shape, dtype=dtype, name=name) + x = keras_tensor.keras_tensor_from_type_spec(spec, name=name) else: - return x.op.type == 'Placeholder' - except AttributeError: - return False + with get_graph().as_default(): + if sparse: + x = tf.compat.v1.sparse_placeholder( + dtype, shape=shape, name=name + ) + elif ragged: + ragged_rank = 0 + for i in range(1, len(shape)): + if shape[i] is None: + ragged_rank = i + type_spec = tf.RaggedTensorSpec( + shape=shape, dtype=dtype, ragged_rank=ragged_rank + ) + + def tensor_spec_to_placeholder(tensorspec): + return tf.compat.v1.placeholder( + tensorspec.dtype, tensorspec.shape + ) + + x = tf.nest.map_structure( + tensor_spec_to_placeholder, + type_spec, + expand_composites=True, + ) + else: + x = tf.compat.v1.placeholder(dtype, shape=shape, name=name) + + if tf.executing_eagerly(): + # Add keras_history connectivity information to the placeholder + # when the placeholder is built in a top-level eager context + # (intended to be used with keras.backend.function) + from keras.engine import ( + input_layer, + ) + + x = input_layer.Input(tensor=x) + x._is_backend_placeholder = True + + return x -@keras_export('keras.backend.shape') +def is_placeholder(x): + """Returns whether `x` is a placeholder. + + Args: + x: A candidate placeholder. + + Returns: + Boolean. + """ + try: + if tf.compat.v1.executing_eagerly_outside_functions(): + return hasattr(x, "_is_backend_placeholder") + + # TODO(b/246438937): Remove the special case for tf.Variable once + # tf.Variable becomes CompositeTensor and will be expanded into + # dt_resource tensors. + if tf_utils.is_extension_type(x) and not isinstance(x, tf.Variable): + flat_components = tf.nest.flatten(x, expand_composites=True) + return py_any(is_placeholder(c) for c in flat_components) + else: + return x.op.type == "Placeholder" + except AttributeError: + return False + + +@keras_export("keras.backend.shape") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def shape(x): - """Returns the symbolic shape of a tensor or variable. + """Returns the symbolic shape of a tensor or variable. - Args: - x: A tensor or variable. + Args: + x: A tensor or variable. - Returns: - A symbolic shape (which is itself a tensor). + Returns: + A symbolic shape (which is itself a tensor). - Examples: + Examples: - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = tf.keras.backend.variable(value=val) - >>> tf.keras.backend.shape(kvar) - - >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) - >>> tf.keras.backend.shape(input) - + >>> val = np.array([[1, 2], [3, 4]]) + >>> kvar = tf.keras.backend.variable(value=val) + >>> tf.keras.backend.shape(kvar) + + >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) + >>> tf.keras.backend.shape(input) + - """ - return tf.shape(x) + """ + return tf.shape(x) -@keras_export('keras.backend.int_shape') +@keras_export("keras.backend.int_shape") @doc_controls.do_not_generate_docs def int_shape(x): - """Returns the shape of tensor or variable as a tuple of int or None entries. - - Args: - x: Tensor or variable. - - Returns: - A tuple of integers (or None entries). - - Examples: - - >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) - >>> tf.keras.backend.int_shape(input) - (2, 4, 5) - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = tf.keras.backend.variable(value=val) - >>> tf.keras.backend.int_shape(kvar) - (2, 2) - - """ - try: - shape = x.shape - if not isinstance(shape, tuple): - shape = tuple(shape.as_list()) - return shape - except ValueError: - return None + """Returns shape of tensor/variable as a tuple of int/None entries. + + Args: + x: Tensor or variable. + Returns: + A tuple of integers (or None entries). + + Examples: + + >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) + >>> tf.keras.backend.int_shape(input) + (2, 4, 5) + >>> val = np.array([[1, 2], [3, 4]]) + >>> kvar = tf.keras.backend.variable(value=val) + >>> tf.keras.backend.int_shape(kvar) + (2, 2) + + """ + try: + shape = x.shape + if not isinstance(shape, tuple): + shape = tuple(shape.as_list()) + return shape + except ValueError: + return None -@keras_export('keras.backend.ndim') + +@keras_export("keras.backend.ndim") @doc_controls.do_not_generate_docs def ndim(x): - """Returns the number of axes in a tensor, as an integer. + """Returns the number of axes in a tensor, as an integer. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - Integer (scalar), number of axes. + Returns: + Integer (scalar), number of axes. - Examples: + Examples: - >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = tf.keras.backend.variable(value=val) - >>> tf.keras.backend.ndim(input) - 3 - >>> tf.keras.backend.ndim(kvar) - 2 + >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) + >>> val = np.array([[1, 2], [3, 4]]) + >>> kvar = tf.keras.backend.variable(value=val) + >>> tf.keras.backend.ndim(input) + 3 + >>> tf.keras.backend.ndim(kvar) + 2 - """ - return x.shape.rank + """ + return x.shape.rank -@keras_export('keras.backend.dtype') +@keras_export("keras.backend.dtype") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def dtype(x): - """Returns the dtype of a Keras tensor or variable, as a string. - - Args: - x: Tensor or variable. - - Returns: - String, dtype of `x`. + """Returns the dtype of a Keras tensor or variable, as a string. - Examples: + Args: + x: Tensor or variable. - >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5))) - 'float32' - >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5), - ... dtype='float32')) - 'float32' - >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5), - ... dtype='float64')) - 'float64' - >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]])) - >>> tf.keras.backend.dtype(kvar) - 'float32' - >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]), - ... dtype='float32') - >>> tf.keras.backend.dtype(kvar) - 'float32' + Returns: + String, dtype of `x`. + + Examples: + + >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5))) + 'float32' + >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5), + ... dtype='float32')) + 'float32' + >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5), + ... dtype='float64')) + 'float64' + >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]])) + >>> tf.keras.backend.dtype(kvar) + 'float32' + >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]), + ... dtype='float32') + >>> tf.keras.backend.dtype(kvar) + 'float32' - """ - return x.dtype.base_dtype.name + """ + return x.dtype.base_dtype.name @doc_controls.do_not_generate_docs def dtype_numpy(x): - """Returns the numpy dtype of a Keras tensor or variable. + """Returns the numpy dtype of a Keras tensor or variable. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - numpy.dtype, dtype of `x`. - """ - return tf.as_dtype(x.dtype).as_numpy_dtype + Returns: + numpy.dtype, dtype of `x`. + """ + return tf.as_dtype(x.dtype).as_numpy_dtype -@keras_export('keras.backend.eval') +@keras_export("keras.backend.eval") @doc_controls.do_not_generate_docs def eval(x): - """Evaluates the value of a variable. + """Evaluates the value of a variable. - Args: - x: A variable. + Args: + x: A variable. - Returns: - A Numpy array. + Returns: + A Numpy array. - Examples: + Examples: - >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]), - ... dtype='float32') - >>> tf.keras.backend.eval(kvar) - array([[1., 2.], - [3., 4.]], dtype=float32) + >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]), + ... dtype='float32') + >>> tf.keras.backend.eval(kvar) + array([[1., 2.], + [3., 4.]], dtype=float32) - """ - return get_value(to_dense(x)) + """ + return get_value(to_dense(x)) -@keras_export('keras.backend.zeros') +@keras_export("keras.backend.zeros") @doc_controls.do_not_generate_docs def zeros(shape, dtype=None, name=None): - """Instantiates an all-zeros variable and returns it. - - Args: - shape: Tuple or list of integers, shape of returned Keras variable - dtype: data type of returned Keras variable - name: name of returned Keras variable - - Returns: - A variable (including Keras metadata), filled with `0.0`. - Note that if `shape` was symbolic, we cannot return a variable, - and will return a dynamically-shaped tensor instead. - - Example: - - >>> kvar = tf.keras.backend.zeros((3,4)) - >>> tf.keras.backend.eval(kvar) - array([[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]], dtype=float32) - >>> A = tf.constant([1,2,3]) - >>> kvar2 = tf.keras.backend.zeros(A.shape) # [0., 0., 0.] - >>> tf.keras.backend.eval(kvar2) - array([0., 0., 0.], dtype=float32) - >>> kvar3 = tf.keras.backend.zeros(A.shape,dtype=tf.int32) - >>> tf.keras.backend.eval(kvar3) - array([0, 0, 0], dtype=int32) - >>> kvar4 = tf.keras.backend.zeros([2,3]) - >>> tf.keras.backend.eval(kvar4) - array([[0., 0., 0.], - [0., 0., 0.]], dtype=float32) - - """ - with tf.init_scope(): - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - v = tf.zeros(shape=shape, dtype=tf_dtype, name=name) - if py_all(v.shape.as_list()): - return variable(v, dtype=dtype, name=name) - return v + """Instantiates an all-zeros variable and returns it. + Args: + shape: Tuple or list of integers, shape of returned Keras variable + dtype: data type of returned Keras variable + name: name of returned Keras variable + + Returns: + A variable (including Keras metadata), filled with `0.0`. + Note that if `shape` was symbolic, we cannot return a variable, + and will return a dynamically-shaped tensor instead. + + Example: + + >>> kvar = tf.keras.backend.zeros((3,4)) + >>> tf.keras.backend.eval(kvar) + array([[0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]], dtype=float32) + >>> A = tf.constant([1,2,3]) + >>> kvar2 = tf.keras.backend.zeros(A.shape) # [0., 0., 0.] + >>> tf.keras.backend.eval(kvar2) + array([0., 0., 0.], dtype=float32) + >>> kvar3 = tf.keras.backend.zeros(A.shape,dtype=tf.int32) + >>> tf.keras.backend.eval(kvar3) + array([0, 0, 0], dtype=int32) + >>> kvar4 = tf.keras.backend.zeros([2,3]) + >>> tf.keras.backend.eval(kvar4) + array([[0., 0., 0.], + [0., 0., 0.]], dtype=float32) + + """ + with tf.init_scope(): + if dtype is None: + dtype = floatx() + tf_dtype = tf.as_dtype(dtype) + v = tf.zeros(shape=shape, dtype=tf_dtype, name=name) + if py_all(v.shape.as_list()): + return variable(v, dtype=dtype, name=name) + return v -@keras_export('keras.backend.ones') + +@keras_export("keras.backend.ones") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def ones(shape, dtype=None, name=None): - """Instantiates an all-ones variable and returns it. + """Instantiates an all-ones variable and returns it. - Args: - shape: Tuple of integers, shape of returned Keras variable. - dtype: String, data type of returned Keras variable. - name: String, name of returned Keras variable. + Args: + shape: Tuple of integers, shape of returned Keras variable. + dtype: String, data type of returned Keras variable. + name: String, name of returned Keras variable. - Returns: - A Keras variable, filled with `1.0`. - Note that if `shape` was symbolic, we cannot return a variable, - and will return a dynamically-shaped tensor instead. + Returns: + A Keras variable, filled with `1.0`. + Note that if `shape` was symbolic, we cannot return a variable, + and will return a dynamically-shaped tensor instead. - Example: + Example: - >>> kvar = tf.keras.backend.ones((3,4)) - >>> tf.keras.backend.eval(kvar) - array([[1., 1., 1., 1.], - [1., 1., 1., 1.], - [1., 1., 1., 1.]], dtype=float32) + >>> kvar = tf.keras.backend.ones((3,4)) + >>> tf.keras.backend.eval(kvar) + array([[1., 1., 1., 1.], + [1., 1., 1., 1.], + [1., 1., 1., 1.]], dtype=float32) - """ - with tf.init_scope(): - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - v = tf.ones(shape=shape, dtype=tf_dtype, name=name) - if py_all(v.shape.as_list()): - return variable(v, dtype=dtype, name=name) - return v + """ + with tf.init_scope(): + if dtype is None: + dtype = floatx() + tf_dtype = tf.as_dtype(dtype) + v = tf.ones(shape=shape, dtype=tf_dtype, name=name) + if py_all(v.shape.as_list()): + return variable(v, dtype=dtype, name=name) + return v -@keras_export('keras.backend.eye') +@keras_export("keras.backend.eye") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def eye(size, dtype=None, name=None): - """Instantiate an identity matrix and returns it. + """Instantiate an identity matrix and returns it. - Args: - size: Integer, number of rows/columns. - dtype: String, data type of returned Keras variable. - name: String, name of returned Keras variable. + Args: + size: Integer, number of rows/columns. + dtype: String, data type of returned Keras variable. + name: String, name of returned Keras variable. - Returns: - A Keras variable, an identity matrix. + Returns: + A Keras variable, an identity matrix. - Example: + Example: - >>> kvar = tf.keras.backend.eye(3) - >>> tf.keras.backend.eval(kvar) - array([[1., 0., 0.], - [0., 1., 0.], - [0., 0., 1.]], dtype=float32) + >>> kvar = tf.keras.backend.eye(3) + >>> tf.keras.backend.eval(kvar) + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]], dtype=float32) - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - return variable(tf.eye(size, dtype=tf_dtype), dtype, name) + """ + if dtype is None: + dtype = floatx() + tf_dtype = tf.as_dtype(dtype) + return variable(tf.eye(size, dtype=tf_dtype), dtype, name) -@keras_export('keras.backend.zeros_like') +@keras_export("keras.backend.zeros_like") @doc_controls.do_not_generate_docs def zeros_like(x, dtype=None, name=None): - """Instantiates an all-zeros variable of the same shape as another tensor. + """Instantiates an all-zeros variable of the same shape as another tensor. - Args: - x: Keras variable or Keras tensor. - dtype: dtype of returned Keras variable. - `None` uses the dtype of `x`. - name: name for the variable to create. + Args: + x: Keras variable or Keras tensor. + dtype: dtype of returned Keras variable. + `None` uses the dtype of `x`. + name: name for the variable to create. - Returns: - A Keras variable with the shape of `x` filled with zeros. + Returns: + A Keras variable with the shape of `x` filled with zeros. - Example: + Example: - ```python - kvar = tf.keras.backend.variable(np.random.random((2,3))) - kvar_zeros = tf.keras.backend.zeros_like(kvar) - K.eval(kvar_zeros) - # array([[ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) - ``` - """ - return tf.zeros_like(x, dtype=dtype, name=name) + ```python + kvar = tf.keras.backend.variable(np.random.random((2,3))) + kvar_zeros = tf.keras.backend.zeros_like(kvar) + K.eval(kvar_zeros) + # array([[ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) + ``` + """ + return tf.zeros_like(x, dtype=dtype, name=name) -@keras_export('keras.backend.ones_like') +@keras_export("keras.backend.ones_like") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def ones_like(x, dtype=None, name=None): - """Instantiates an all-ones variable of the same shape as another tensor. + """Instantiates an all-ones variable of the same shape as another tensor. - Args: - x: Keras variable or tensor. - dtype: String, dtype of returned Keras variable. - None uses the dtype of x. - name: String, name for the variable to create. + Args: + x: Keras variable or tensor. + dtype: String, dtype of returned Keras variable. + None uses the dtype of x. + name: String, name for the variable to create. - Returns: - A Keras variable with the shape of x filled with ones. + Returns: + A Keras variable with the shape of x filled with ones. - Example: + Example: - >>> kvar = tf.keras.backend.variable(np.random.random((2,3))) - >>> kvar_ones = tf.keras.backend.ones_like(kvar) - >>> tf.keras.backend.eval(kvar_ones) - array([[1., 1., 1.], - [1., 1., 1.]], dtype=float32) + >>> kvar = tf.keras.backend.variable(np.random.random((2,3))) + >>> kvar_ones = tf.keras.backend.ones_like(kvar) + >>> tf.keras.backend.eval(kvar_ones) + array([[1., 1., 1.], + [1., 1., 1.]], dtype=float32) - """ - return tf.ones_like(x, dtype=dtype, name=name) + """ + return tf.ones_like(x, dtype=dtype, name=name) def identity(x, name=None): - """Returns a tensor with the same content as the input tensor. + """Returns a tensor with the same content as the input tensor. - Args: - x: The input tensor. - name: String, name for the variable to create. + Args: + x: The input tensor. + name: String, name for the variable to create. - Returns: - A tensor of the same shape, type and content. - """ - return tf.identity(x, name=name) + Returns: + A tensor of the same shape, type and content. + """ + return tf.identity(x, name=name) # Global flag to enforce tf.random.Generator for RandomGenerator. @@ -1762,2244 +1824,2342 @@ def identity(x, name=None): # way, so that each client of the program could start with same seed. This is # very important for certain use case that requires all the client to have their # state in sync. This instance will be set when user call -# `tf.keras.util.set_random_seed()` +# `tf.keras.utils.set_random_seed()` _SEED_GENERATOR = threading.local() -@keras_export('keras.backend.experimental.is_tf_random_generator_enabled', - v1=[]) +@keras_export( + "keras.backend.experimental.is_tf_random_generator_enabled", v1=[] +) def is_tf_random_generator_enabled(): - """Check whether `tf.random.Generator` is used for RNG in Keras. - - Compared to existing TF stateful random ops, `tf.random.Generator` uses - `tf.Variable` and stateless random ops to generate random numbers, - which leads to better reproducibility in distributed training. - Note enabling it might introduce some breakage to existing code, - by producing differently-seeded random number sequences - and breaking tests that rely on specific random numbers being generated. - To disable the - usage of `tf.random.Generator`, please use - `tf.keras.backend.experimental.disable_random_generator`. - - We expect the `tf.random.Generator` code path to become the default, and will - remove the legacy stateful random ops such as `tf.random.uniform` in the - future (see the - [TF RNG guide](https://www.tensorflow.org/guide/random_numbers)). - - This API will also be removed in a future release as well, together with - `tf.keras.backend.experimental.enable_tf_random_generator()` and - `tf.keras.backend.experimental.disable_tf_random_generator()` - - Returns: - boolean: whether `tf.random.Generator` is used for random number generation - in Keras. - """ - return _USE_GENERATOR_FOR_RNG - - -@keras_export('keras.backend.experimental.enable_tf_random_generator', v1=[]) + """Check whether `tf.random.Generator` is used for RNG in Keras. + + Compared to existing TF stateful random ops, `tf.random.Generator` uses + `tf.Variable` and stateless random ops to generate random numbers, + which leads to better reproducibility in distributed training. + Note enabling it might introduce some breakage to existing code, + by producing differently-seeded random number sequences + and breaking tests that rely on specific random numbers being generated. + To disable the + usage of `tf.random.Generator`, please use + `tf.keras.backend.experimental.disable_random_generator`. + + We expect the `tf.random.Generator` code path to become the default, and + will remove the legacy stateful random ops such as `tf.random.uniform` in + the future (see the [TF RNG guide]( + https://www.tensorflow.org/guide/random_numbers)). + + This API will also be removed in a future release as well, together with + `tf.keras.backend.experimental.enable_tf_random_generator()` and + `tf.keras.backend.experimental.disable_tf_random_generator()` + + Returns: + boolean: whether `tf.random.Generator` is used for random number + generation in Keras. + """ + return _USE_GENERATOR_FOR_RNG + + +@keras_export("keras.backend.experimental.enable_tf_random_generator", v1=[]) def enable_tf_random_generator(): - """Enable the `tf.random.Generator` as the RNG for Keras. + """Enable the `tf.random.Generator` as the RNG for Keras. - See `tf.keras.backend.experimental.is_tf_random_generator_enabled` for more - details. - """ + See `tf.keras.backend.experimental.is_tf_random_generator_enabled` for more + details. + """ - global _USE_GENERATOR_FOR_RNG - _USE_GENERATOR_FOR_RNG = True + global _USE_GENERATOR_FOR_RNG + _USE_GENERATOR_FOR_RNG = True -@keras_export('keras.backend.experimental.disable_tf_random_generator', v1=[]) +@keras_export("keras.backend.experimental.disable_tf_random_generator", v1=[]) def disable_tf_random_generator(): - """Disable the `tf.random.Generator` as the RNG for Keras. + """Disable the `tf.random.Generator` as the RNG for Keras. - See `tf.keras.backend.experimental.is_tf_random_generator_enabled` for more - details. - """ - global _USE_GENERATOR_FOR_RNG - _USE_GENERATOR_FOR_RNG = False + See `tf.keras.backend.experimental.is_tf_random_generator_enabled` for more + details. + """ + global _USE_GENERATOR_FOR_RNG + _USE_GENERATOR_FOR_RNG = False class RandomGenerator(tf.__internal__.tracking.AutoTrackable): - """Random generator that selects appropriate random ops. - - This class contains the logic for legacy stateful random ops, as well as the - new stateless random ops with seeds and tf.random.Generator. Any class that - relies on RNG (eg initializer, shuffle, dropout) should use this class to - handle the transition from legacy RNGs to new RNGs. - - Args: - seed: Optional int seed. When `rng_type` is "stateful", the seed is used - to create `tf.random.Generator` to produce deterministic sequences. - When `rng_type` is "stateless", new seed will be created if it is not - provided by user, and it will be passed down to stateless random ops. - When `rng_type` is "legacy_stateful", the seed will be passed down to - stateful random ops. - rng_type: Type of RNG to use, one of "stateful", "stateless", - "legacy_stateful". It defaults to "stateful" if - `enable_tf_random_generator` has been activated, or to - "legacy_stateful" otherwise. - - When using "stateless", the random ops outputs are constant (the same - inputs result in the same outputs). - - When using "stateful" or "legacy_stateful", the random ops outputs are - non-constant, but deterministic: calling the same random op multiple - times with the same inputs results in a deterministic sequence of - different outputs. - - "legacy_stateful" is backed by TF1 stateful RNG ops - (e.g. `tf.random.uniform`), while "stateful" - is backed by TF2 APIs (e.g. `tf.random.Generator.uniform`). - """ - RNG_STATELESS = 'stateless' - RNG_STATEFUL = 'stateful' - RNG_LEGACY_STATEFUL = 'legacy_stateful' - - def __init__(self, seed=None, rng_type=None, **kwargs): - self._seed = seed - self._set_rng_type(rng_type, **kwargs) - self._built = False - - def _set_rng_type(self, rng_type, **kwargs): - # Only supported kwargs is "force_generator", which we will remove once we - # clean up all the caller. - # TODO(scottzhu): Remove the kwargs for force_generator. - if kwargs.get('force_generator', False): - rng_type = self.RNG_STATEFUL - if rng_type is None: - if is_tf_random_generator_enabled(): - self._rng_type = self.RNG_STATEFUL - else: - self._rng_type = self.RNG_LEGACY_STATEFUL - else: - if rng_type not in [self.RNG_STATEFUL, - self.RNG_LEGACY_STATEFUL, self.RNG_STATELESS]: - raise ValueError( - 'Invalid `rng_type` received. ' - 'Valid `rng_type` are ["stateless", "stateful", "legacy_stateful"].' - f' Got: {rng_type}') - self._rng_type = rng_type - - def _maybe_init(self): - """Lazily init the RandomGenerator. - - The TF API executing_eagerly_outside_functions() has some side effect, and - couldn't be used before API like tf.enable_eager_execution(). Some of the - client side code was creating the initializer at the code load time, which - triggers the creation of RandomGenerator. Lazy init this class to walkaround - this issue until it is resolved on TF side. - """ - # TODO(b/167482354): Change this back to normal init when the bug is fixed. - if self._built: - return - - if (self._rng_type == self.RNG_STATEFUL and - not tf.compat.v1.executing_eagerly_outside_functions()): - # Fall back to legacy stateful since the generator need to work in tf2. - self._rng_type = self.RNG_LEGACY_STATEFUL - - if self._rng_type == self.RNG_STATELESS: - self._seed = self._create_seed(self._seed) - self._generator = None - elif self._rng_type == self.RNG_STATEFUL: - from keras.utils import tf_utils # pylint: disable=g-import-not-at-top - with tf_utils.maybe_init_scope(self): - seed = self._create_seed(self._seed) - self._generator = tf.random.Generator.from_seed(seed) - else: - # In legacy stateful, we use stateful op, regardless whether user provide - # seed or not. Seeded stateful op will ensure generating same sequences. - self._generator = None - self._built = True + """Random generator that selects appropriate random ops. + + This class contains the logic for legacy stateful random ops, as well as the + new stateless random ops with seeds and tf.random.Generator. Any class that + relies on RNG (eg initializer, shuffle, dropout) should use this class to + handle the transition from legacy RNGs to new RNGs. + + Args: + seed: Optional int seed. When `rng_type` is "stateful", the seed is used + to create `tf.random.Generator` to produce deterministic sequences. + When `rng_type` is "stateless", new seed will be created if it is not + provided by user, and it will be passed down to stateless random ops. + When `rng_type` is "legacy_stateful", the seed will be passed down to + stateful random ops. + rng_type: Type of RNG to use, one of "stateful", "stateless", + "legacy_stateful". When `None` it uses "stateful" if + `enable_tf_random_generator` has been activated, or + "legacy_stateful" otherwise. + - When using "stateless", the random ops outputs are constant (the same + inputs result in the same outputs). + - When using "stateful" or "legacy_stateful", the random ops outputs are + non-constant, but deterministic: calling the same random op multiple + times with the same inputs results in a deterministic sequence of + different outputs. + - "legacy_stateful" is backed by TF1 stateful RNG ops + (e.g. `tf.random.uniform`), while "stateful" + is backed by TF2 APIs (e.g. `tf.random.Generator.uniform`). + Defaults to `None`. + """ + + RNG_STATELESS = "stateless" + RNG_STATEFUL = "stateful" + RNG_LEGACY_STATEFUL = "legacy_stateful" + + def __init__(self, seed=None, rng_type=None, **kwargs): + self._seed = seed + self._set_rng_type(rng_type, **kwargs) + self._built = False + + def _set_rng_type(self, rng_type, **kwargs): + # Only supported kwargs is "force_generator", which we will remove once + # we clean up all the caller. + # TODO(scottzhu): Remove the kwargs for force_generator. + if kwargs.get("force_generator", False): + rng_type = self.RNG_STATEFUL + if rng_type is None: + if is_tf_random_generator_enabled(): + self._rng_type = self.RNG_STATEFUL + else: + self._rng_type = self.RNG_LEGACY_STATEFUL + else: + if rng_type not in [ + self.RNG_STATEFUL, + self.RNG_LEGACY_STATEFUL, + self.RNG_STATELESS, + ]: + raise ValueError( + "Invalid `rng_type` received. " + 'Valid `rng_type` are ["stateless", ' + '"stateful", "legacy_stateful"].' + f" Got: {rng_type}" + ) + self._rng_type = rng_type + + def _maybe_init(self): + """Lazily init the RandomGenerator. + + The TF API executing_eagerly_outside_functions() has some side effect, + and couldn't be used before API like tf.enable_eager_execution(). Some + of the client side code was creating the initializer at the code load + time, which triggers the creation of RandomGenerator. Lazy init this + class to walkaround this issue until it is resolved on TF side. + """ + # TODO(b/167482354): Change this back to normal init when the bug is + # fixed. + if self._built: + return + + if ( + self._rng_type == self.RNG_STATEFUL + and not tf.compat.v1.executing_eagerly_outside_functions() + ): + # Fall back to legacy stateful since the generator need to work in + # tf2. + self._rng_type = self.RNG_LEGACY_STATEFUL + + if self._rng_type == self.RNG_STATELESS: + self._seed = self._create_seed(self._seed) + self._generator = None + elif self._rng_type == self.RNG_STATEFUL: + with tf_utils.maybe_init_scope(self): + seed = self._create_seed(self._seed) + self._generator = tf.random.Generator.from_seed( + seed, alg=tf.random.Algorithm.AUTO_SELECT + ) + else: + # In legacy stateful, we use stateful op, regardless whether user + # provide seed or not. Seeded stateful op will ensure generating + # same sequences. + self._generator = None + self._built = True + + def make_seed_for_stateless_op(self): + """Generate a new seed based on the init config. + + Note that this will not return python ints which will be frozen in the + graph and cause stateless op to return the same value. It will only + return value when generator is used, otherwise it will return None. + + Returns: + A tensor with shape [2,]. + """ + self._maybe_init() + if self._rng_type == self.RNG_STATELESS: + return [self._seed, 0] + elif self._rng_type == self.RNG_STATEFUL: + return self._generator.make_seeds()[:, 0] + return None + + def make_legacy_seed(self): + """Create a new seed for the legacy stateful ops to use. + + When user didn't provide any original seed, this method will return + None. Otherwise it will increment the counter and return as the new + seed. + + Note that it is important to generate different seed for stateful ops in + the `tf.function`. The random ops will return same value when same seed + is provided in the `tf.function`. + + Returns: + int as new seed, or None. + """ + if self._seed is not None: + result = self._seed + self._seed += 1 + return result + return None + + def _create_seed(self, user_specified_seed): + if user_specified_seed is not None: + return user_specified_seed + elif getattr(_SEED_GENERATOR, "generator", None): + return _SEED_GENERATOR.generator.randint(1, 1e9) + else: + return random.randint(1, int(1e9)) + + def random_normal( + self, shape, mean=0.0, stddev=1.0, dtype=None, nonce=None + ): + """Produce random number based on the normal distribution. + + Args: + shape: The shape of the random values to generate. + mean: Floats, default to 0. Mean of the random values to generate. + stddev: Floats, default to 1. Standard deviation of the random values + to generate. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise (via + `tf.keras.backend.set_floatx(float_dtype)`) + nonce: Optional integer scalar, that will be folded into the seed in + the stateless mode. + """ + self._maybe_init() + dtype = dtype or floatx() + if self._rng_type == self.RNG_STATEFUL: + return self._generator.normal( + shape=shape, mean=mean, stddev=stddev, dtype=dtype + ) + elif self._rng_type == self.RNG_STATELESS: + seed = self.make_seed_for_stateless_op() + if nonce: + seed = tf.random.experimental.stateless_fold_in(seed, nonce) + return tf.random.stateless_normal( + shape=shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed + ) + return tf.random.normal( + shape=shape, + mean=mean, + stddev=stddev, + dtype=dtype, + seed=self.make_legacy_seed(), + ) + + def random_uniform( + self, shape, minval=0.0, maxval=None, dtype=None, nonce=None + ): + """Produce random number based on the uniform distribution. + + Args: + shape: The shape of the random values to generate. + minval: Floats, default to 0. Lower bound of the range of + random values to generate (inclusive). + minval: Floats, default to None. Upper bound of the range of + random values to generate (exclusive). + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise (via + `tf.keras.backend.set_floatx(float_dtype)`) + nonce: Optional integer scalar, that will be folded into the seed in + the stateless mode. + """ + self._maybe_init() + dtype = dtype or floatx() + if self._rng_type == self.RNG_STATEFUL: + return self._generator.uniform( + shape=shape, minval=minval, maxval=maxval, dtype=dtype + ) + elif self._rng_type == self.RNG_STATELESS: + seed = self.make_seed_for_stateless_op() + if nonce: + seed = tf.random.experimental.stateless_fold_in(seed, nonce) + return tf.random.stateless_uniform( + shape=shape, + minval=minval, + maxval=maxval, + dtype=dtype, + seed=seed, + ) + return tf.random.uniform( + shape=shape, + minval=minval, + maxval=maxval, + dtype=dtype, + seed=self.make_legacy_seed(), + ) + + def truncated_normal( + self, shape, mean=0.0, stddev=1.0, dtype=None, nonce=None + ): + """Produce random number based on the truncated normal distribution. + + Args: + shape: The shape of the random values to generate. + mean: Floats, default to 0. Mean of the random values to generate. + stddev: Floats, default to 1. Standard deviation of the random values + to generate. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise (via + `tf.keras.backend.set_floatx(float_dtype)`) + nonce: Optional integer scalar, that will be folded into the seed in + the stateless mode. + """ + self._maybe_init() + dtype = dtype or floatx() + if self._rng_type == self.RNG_STATEFUL: + return self._generator.truncated_normal( + shape=shape, mean=mean, stddev=stddev, dtype=dtype + ) + elif self._rng_type == self.RNG_STATELESS: + seed = self.make_seed_for_stateless_op() + if nonce: + seed = tf.random.experimental.stateless_fold_in(seed, nonce) + return tf.random.stateless_truncated_normal( + shape=shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed + ) + return tf.random.truncated_normal( + shape=shape, + mean=mean, + stddev=stddev, + dtype=dtype, + seed=self.make_legacy_seed(), + ) + + def dropout(self, inputs, rate, noise_shape=None): + self._maybe_init() + if self._rng_type == self.RNG_STATEFUL: + return tf.nn.experimental.general_dropout( + inputs, + rate=rate, + noise_shape=noise_shape, + uniform_sampler=self._generator.uniform, + ) + elif self._rng_type == self.RNG_STATELESS: + return tf.nn.experimental.stateless_dropout( + inputs, + rate=rate, + noise_shape=noise_shape, + seed=self.make_seed_for_stateless_op(), + ) + else: + return tf.nn.dropout( + inputs, + rate=rate, + noise_shape=noise_shape, + seed=self.make_legacy_seed(), + ) + - def make_seed_for_stateless_op(self): - """Generate a new seed based on the init config. +@keras_export("keras.backend.random_uniform_variable") +@doc_controls.do_not_generate_docs +def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): + """Instantiates a variable with values drawn from a uniform distribution. - Note that this will not return python ints which will be frozen in the graph - and cause stateless op to return the same value. It will only return value - when generator is used, otherwise it will return None. + Args: + shape: Tuple of integers, shape of returned Keras variable. + low: Float, lower boundary of the output interval. + high: Float, upper boundary of the output interval. + dtype: String, dtype of returned Keras variable. + name: String, name of returned Keras variable. + seed: Integer, random seed. Returns: - A tensor with shape [2,]. + A Keras variable, filled with drawn samples. + + Example: + + >>> kvar = tf.keras.backend.random_uniform_variable(shape=(2,3), + ... low=0.0, high=1.0) + >>> kvar + """ - self._maybe_init() - if self._rng_type == self.RNG_STATELESS: - return [self._seed, 0] - elif self._rng_type == self.RNG_STATEFUL: - return self._generator.make_seeds()[:, 0] - return None + if dtype is None: + dtype = floatx() + tf_dtype = tf.as_dtype(dtype) + if seed is None: + # ensure that randomness is conditioned by the Numpy RNG + seed = np.random.randint(10e8) + value = tf.compat.v1.random_uniform_initializer( + low, high, dtype=tf_dtype, seed=seed + )(shape) + return variable(value, dtype=dtype, name=name) - def make_legacy_seed(self): - """Create a new seed for the legacy stateful ops to use. - When user didn't provide any original seed, this method will return None. - Otherwise it will increment the counter and return as the new seed. +@keras_export("keras.backend.random_normal_variable") +@doc_controls.do_not_generate_docs +def random_normal_variable( + shape, mean, scale, dtype=None, name=None, seed=None +): + """Instantiates a variable with values drawn from a normal distribution. - Note that it is important to generate different seed for stateful ops in - the `tf.function`. The random ops will return same value when same seed is - provided in the `tf.function`. + Args: + shape: Tuple of integers, shape of returned Keras variable. + mean: Float, mean of the normal distribution. + scale: Float, standard deviation of the normal distribution. + dtype: String, dtype of returned Keras variable. + name: String, name of returned Keras variable. + seed: Integer, random seed. Returns: - int as new seed, or None. - """ - if self._seed is not None: - result = self._seed - self._seed += 1 - return result - return None + A Keras variable, filled with drawn samples. - def _create_seed(self, user_specified_seed): - if user_specified_seed is not None: - return user_specified_seed - elif getattr(_SEED_GENERATOR, 'generator', None): - return _SEED_GENERATOR.generator.randint(1, 1e9) - else: - return random.randint(1, 1e9) - - def random_normal(self, shape, mean=0., stddev=1., dtype=None, nonce=None): - """Produce random number based on the normal distribution. - - Args: - shape: The shape of the random values to generate. - mean: Floats, default to 0. Mean of the random values to generate. - stddev: Floats, default to 1. Standard deviation of the random values to - generate. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, which - default to `float32` unless you configured it otherwise (via - `tf.keras.backend.set_floatx(float_dtype)`) - nonce: Optional integer scalar, that will be folded into the seed in the - stateless mode. - """ - self._maybe_init() - dtype = dtype or floatx() - if self._rng_type == self.RNG_STATEFUL: - return self._generator.normal( - shape=shape, mean=mean, stddev=stddev, dtype=dtype) - elif self._rng_type == self.RNG_STATELESS: - seed = self.make_seed_for_stateless_op() - if nonce: - seed = tf.random.experimental.stateless_fold_in(seed, nonce) - return tf.random.stateless_normal( - shape=shape, mean=mean, stddev=stddev, dtype=dtype, - seed=seed) - return tf.random.normal( - shape=shape, mean=mean, stddev=stddev, dtype=dtype, - seed=self.make_legacy_seed()) - - def random_uniform(self, shape, minval=0., maxval=None, dtype=None, - nonce=None): - """Produce random number based on the uniform distribution. - - Args: - shape: The shape of the random values to generate. - minval: Floats, default to 0. Lower bound of the range of - random values to generate (inclusive). - minval: Floats, default to None. Upper bound of the range of - random values to generate (exclusive). - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, which - default to `float32` unless you configured it otherwise (via - `tf.keras.backend.set_floatx(float_dtype)`) - nonce: Optional integer scalar, that will be folded into the seed in the - stateless mode. - """ - self._maybe_init() - dtype = dtype or floatx() - if self._rng_type == self.RNG_STATEFUL: - return self._generator.uniform( - shape=shape, minval=minval, maxval=maxval, dtype=dtype) - elif self._rng_type == self.RNG_STATELESS: - seed = self.make_seed_for_stateless_op() - if nonce: - seed = tf.random.experimental.stateless_fold_in(seed, nonce) - return tf.random.stateless_uniform( - shape=shape, minval=minval, maxval=maxval, dtype=dtype, - seed=seed) - return tf.random.uniform( - shape=shape, minval=minval, maxval=maxval, dtype=dtype, - seed=self.make_legacy_seed()) - - def truncated_normal(self, shape, mean=0., stddev=1., dtype=None, nonce=None): - """Produce random number based on the truncated normal distribution. - - Args: - shape: The shape of the random values to generate. - mean: Floats, default to 0. Mean of the random values to generate. - stddev: Floats, default to 1. Standard deviation of the random values to - generate. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, which - default to `float32` unless you configured it otherwise (via - `tf.keras.backend.set_floatx(float_dtype)`) - nonce: Optional integer scalar, that will be folded into the seed in the - stateless mode. - """ - self._maybe_init() - dtype = dtype or floatx() - if self._rng_type == self.RNG_STATEFUL: - return self._generator.truncated_normal( - shape=shape, mean=mean, stddev=stddev, dtype=dtype) - elif self._rng_type == self.RNG_STATELESS: - seed = self.make_seed_for_stateless_op() - if nonce: - seed = tf.random.experimental.stateless_fold_in(seed, nonce) - return tf.random.stateless_truncated_normal( - shape=shape, mean=mean, stddev=stddev, dtype=dtype, - seed=seed) - return tf.random.truncated_normal( - shape=shape, mean=mean, stddev=stddev, dtype=dtype, - seed=self.make_legacy_seed()) + Example: - def dropout(self, inputs, rate, noise_shape=None): - self._maybe_init() - if self._rng_type in [self.RNG_STATEFUL, self.RNG_STATELESS]: - return tf.nn.experimental.stateless_dropout( - inputs, rate=rate, noise_shape=noise_shape, - seed=self.make_seed_for_stateless_op()) - return tf.nn.dropout(inputs, rate=rate, noise_shape=noise_shape, - seed=self.make_legacy_seed()) + >>> kvar = tf.keras.backend.random_normal_variable(shape=(2,3), + ... mean=0.0, scale=1.0) + >>> kvar + + """ + if dtype is None: + dtype = floatx() + tf_dtype = tf.as_dtype(dtype) + if seed is None: + # ensure that randomness is conditioned by the Numpy RNG + seed = np.random.randint(10e8) + value = tf.compat.v1.random_normal_initializer( + mean, scale, dtype=tf_dtype, seed=seed + )(shape) + return variable(value, dtype=dtype, name=name) -@keras_export('keras.backend.random_uniform_variable') -@doc_controls.do_not_generate_docs -def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): - """Instantiates a variable with values drawn from a uniform distribution. - - Args: - shape: Tuple of integers, shape of returned Keras variable. - low: Float, lower boundary of the output interval. - high: Float, upper boundary of the output interval. - dtype: String, dtype of returned Keras variable. - name: String, name of returned Keras variable. - seed: Integer, random seed. - - Returns: - A Keras variable, filled with drawn samples. - - Example: - - >>> kvar = tf.keras.backend.random_uniform_variable(shape=(2,3), - ... low=0.0, high=1.0) - >>> kvar - - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e8) - value = tf.compat.v1.random_uniform_initializer( - low, high, dtype=tf_dtype, seed=seed)(shape) - return variable(value, dtype=dtype, name=name) - - -@keras_export('keras.backend.random_normal_variable') -@doc_controls.do_not_generate_docs -def random_normal_variable(shape, mean, scale, dtype=None, name=None, - seed=None): - """Instantiates a variable with values drawn from a normal distribution. - - Args: - shape: Tuple of integers, shape of returned Keras variable. - mean: Float, mean of the normal distribution. - scale: Float, standard deviation of the normal distribution. - dtype: String, dtype of returned Keras variable. - name: String, name of returned Keras variable. - seed: Integer, random seed. - - Returns: - A Keras variable, filled with drawn samples. - - Example: - - >>> kvar = tf.keras.backend.random_normal_variable(shape=(2,3), - ... mean=0.0, scale=1.0) - >>> kvar - - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e8) - value = tf.compat.v1.random_normal_initializer( - mean, scale, dtype=tf_dtype, seed=seed)(shape) - return variable(value, dtype=dtype, name=name) - - -@keras_export('keras.backend.count_params') +@keras_export("keras.backend.count_params") @doc_controls.do_not_generate_docs def count_params(x): - """Returns the static number of elements in a variable or tensor. + """Returns the static number of elements in a variable or tensor. - Args: - x: Variable or tensor. + Args: + x: Variable or tensor. - Returns: - Integer, the number of scalars in `x`. + Returns: + Integer, the number of scalars in `x`. - Example: + Example: - >>> kvar = tf.keras.backend.zeros((2,3)) - >>> tf.keras.backend.count_params(kvar) - 6 - >>> tf.keras.backend.eval(kvar) - array([[0., 0., 0.], - [0., 0., 0.]], dtype=float32) + >>> kvar = tf.keras.backend.zeros((2,3)) + >>> tf.keras.backend.count_params(kvar) + 6 + >>> tf.keras.backend.eval(kvar) + array([[0., 0., 0.], + [0., 0., 0.]], dtype=float32) - """ - return np.prod(x.shape.as_list()) + """ + return np.prod(x.shape.as_list()) -@keras_export('keras.backend.cast') +@keras_export("keras.backend.cast") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def cast(x, dtype): - """Casts a tensor to a different dtype and returns it. + """Casts a tensor to a different dtype and returns it. - You can cast a Keras variable but it still returns a Keras tensor. + You can cast a Keras variable but it still returns a Keras tensor. - Args: - x: Keras tensor (or variable). - dtype: String, either (`'float16'`, `'float32'`, or `'float64'`). + Args: + x: Keras tensor (or variable). + dtype: String, either (`'float16'`, `'float32'`, or `'float64'`). - Returns: - Keras tensor with dtype `dtype`. + Returns: + Keras tensor with dtype `dtype`. - Examples: - Cast a float32 variable to a float64 tensor + Examples: + Cast a float32 variable to a float64 tensor - >>> input = tf.keras.backend.ones(shape=(1,3)) - >>> print(input) - - >>> cast_input = tf.keras.backend.cast(input, dtype='float64') - >>> print(cast_input) - tf.Tensor([[1. 1. 1.]], shape=(1, 3), dtype=float64) + >>> input = tf.keras.backend.ones(shape=(1,3)) + >>> print(input) + + >>> cast_input = tf.keras.backend.cast(input, dtype='float64') + >>> print(cast_input) + tf.Tensor([[1. 1. 1.]], shape=(1, 3), dtype=float64) - """ - return tf.cast(x, dtype) + """ + return tf.cast(x, dtype) # UPDATES OPS -@keras_export('keras.backend.update') +@keras_export("keras.backend.update") @doc_controls.do_not_generate_docs def update(x, new_x): - return tf.compat.v1.assign(x, new_x) + return tf.compat.v1.assign(x, new_x) -@keras_export('keras.backend.update_add') +@keras_export("keras.backend.update_add") @doc_controls.do_not_generate_docs def update_add(x, increment): - """Update the value of `x` by adding `increment`. + """Update the value of `x` by adding `increment`. - Args: - x: A Variable. - increment: A tensor of same shape as `x`. + Args: + x: A Variable. + increment: A tensor of same shape as `x`. - Returns: - The variable `x` updated. - """ - return tf.compat.v1.assign_add(x, increment) + Returns: + The variable `x` updated. + """ + return tf.compat.v1.assign_add(x, increment) -@keras_export('keras.backend.update_sub') +@keras_export("keras.backend.update_sub") @doc_controls.do_not_generate_docs def update_sub(x, decrement): - """Update the value of `x` by subtracting `decrement`. + """Update the value of `x` by subtracting `decrement`. - Args: - x: A Variable. - decrement: A tensor of same shape as `x`. + Args: + x: A Variable. + decrement: A tensor of same shape as `x`. - Returns: - The variable `x` updated. - """ - return tf.compat.v1.assign_sub(x, decrement) + Returns: + The variable `x` updated. + """ + return tf.compat.v1.assign_sub(x, decrement) -@keras_export('keras.backend.moving_average_update') +@keras_export("keras.backend.moving_average_update") @doc_controls.do_not_generate_docs def moving_average_update(x, value, momentum): - """Compute the exponential moving average of a value. + """Compute the exponential moving average of a value. - The moving average 'x' is updated with 'value' following: + The moving average 'x' is updated with 'value' following: - ``` - x = x * momentum + value * (1 - momentum) - ``` + ``` + x = x * momentum + value * (1 - momentum) + ``` - For example: + For example: - >>> x = tf.Variable(0.0) - >>> momentum=0.9 - >>> moving_average_update(x, value = 2.0, momentum=momentum).numpy() - >>> x.numpy() - 0.2 + >>> x = tf.Variable(0.0) + >>> momentum=0.9 + >>> moving_average_update(x, value = 2.0, momentum=momentum).numpy() + >>> x.numpy() + 0.2 - The result will be biased towards the initial value of the variable. + The result will be biased towards the initial value of the variable. - If the variable was initialized to zero, you can divide by - `1 - momentum ** num_updates` to debias it (Section 3 of - [Kingma et al., 2015](https://arxiv.org/abs/1412.6980)): + If the variable was initialized to zero, you can divide by + `1 - momentum ** num_updates` to debias it (Section 3 of + [Kingma et al., 2015](https://arxiv.org/abs/1412.6980)): - >>> num_updates = 1.0 - >>> x_zdb = x/(1 - momentum**num_updates) - >>> x_zdb.numpy() - 2.0 + >>> num_updates = 1.0 + >>> x_zdb = x/(1 - momentum**num_updates) + >>> x_zdb.numpy() + 2.0 - Args: - x: A Variable, the moving average. - value: A tensor with the same shape as `x`, the new value to be - averaged in. - momentum: The moving average momentum. + Args: + x: A Variable, the moving average. + value: A tensor with the same shape as `x`, the new value to be + averaged in. + momentum: The moving average momentum. - Returns: - The updated variable. - """ - if tf.__internal__.tf2.enabled(): - momentum = tf.cast(momentum, x.dtype) - value = tf.cast(value, x.dtype) - return x.assign_sub((x - value) * (1 - momentum)) - else: - return tf.__internal__.train.assign_moving_average( - x, value, momentum, zero_debias=True) + Returns: + The updated variable. + """ + if tf.__internal__.tf2.enabled(): + momentum = tf.cast(momentum, x.dtype) + value = tf.cast(value, x.dtype) + return x.assign_sub((x - value) * (1 - momentum)) + else: + return tf.__internal__.train.assign_moving_average( + x, value, momentum, zero_debias=True + ) # LINEAR ALGEBRA -@keras_export('keras.backend.dot') +@keras_export("keras.backend.dot") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def dot(x, y): - """Multiplies 2 tensors (and/or variables) and returns a tensor. - - This operation corresponds to `numpy.dot(a, b, out=None)`. - - Args: - x: Tensor or variable. - y: Tensor or variable. - - Returns: - A tensor, dot product of `x` and `y`. - - Examples: - - If inputs `x` and `y` are 2-D arrays, then it is equivalent to `tf.matmul`. - >>> x = tf.keras.backend.placeholder(shape=(2, 3)) - >>> y = tf.keras.backend.placeholder(shape=(3, 4)) - >>> xy = tf.keras.backend.dot(x, y) - >>> xy - - - >>> x = tf.keras.backend.placeholder(shape=(32, 28, 3)) - >>> y = tf.keras.backend.placeholder(shape=(3, 4)) - >>> xy = tf.keras.backend.dot(x, y) - >>> xy - - - If `x` is an N-D array and `y` is an M-D array (where M>=2), it is a sum - product over the last axis of `x` and the second-to-last axis of `y`. - >>> x = tf.keras.backend.random_uniform_variable( - ... shape=(2, 3), low=0., high=1.) - >>> y = tf.keras.backend.ones((4, 3, 5)) - >>> xy = tf.keras.backend.dot(x, y) - >>> tf.keras.backend.int_shape(xy) - (2, 4, 5) - """ - if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2): - x_shape = [] - for i, s in zip(int_shape(x), tf.unstack(tf.shape(x))): - if i is not None: - x_shape.append(i) - else: - x_shape.append(s) - x_shape = tuple(x_shape) - y_shape = [] - for i, s in zip(int_shape(y), tf.unstack(tf.shape(y))): - if i is not None: - y_shape.append(i) - else: - y_shape.append(s) - y_shape = tuple(y_shape) - y_permute_dim = list(range(ndim(y))) - y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim - xt = tf.reshape(x, [-1, x_shape[-1]]) - yt = tf.reshape( - tf.compat.v1.transpose(y, perm=y_permute_dim), [y_shape[-2], -1]) - return tf.reshape( - tf.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:]) - if is_sparse(x): - out = tf.sparse.sparse_dense_matmul(x, y) - else: - out = tf.matmul(x, y) - return out - - -@keras_export('keras.backend.batch_dot') + """Multiplies 2 tensors (and/or variables) and returns a tensor. + + This operation corresponds to `numpy.dot(a, b, out=None)`. + + Args: + x: Tensor or variable. + y: Tensor or variable. + + Returns: + A tensor, dot product of `x` and `y`. + + Examples: + + If inputs `x` and `y` are 2-D arrays, then it is equivalent to `tf.matmul`. + >>> x = tf.keras.backend.placeholder(shape=(2, 3)) + >>> y = tf.keras.backend.placeholder(shape=(3, 4)) + >>> xy = tf.keras.backend.dot(x, y) + >>> xy + + + >>> x = tf.keras.backend.placeholder(shape=(32, 28, 3)) + >>> y = tf.keras.backend.placeholder(shape=(3, 4)) + >>> xy = tf.keras.backend.dot(x, y) + >>> xy + + + If `x` is an N-D array and `y` is an M-D array (where M>=2), it is a sum + product over the last axis of `x` and the second-to-last axis of `y`. + >>> x = tf.keras.backend.random_uniform_variable( + ... shape=(2, 3), low=0., high=1.) + >>> y = tf.keras.backend.ones((4, 3, 5)) + >>> xy = tf.keras.backend.dot(x, y) + >>> tf.keras.backend.int_shape(xy) + (2, 4, 5) + """ + if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2): + x_shape = [] + for i, s in zip(int_shape(x), tf.unstack(tf.shape(x))): + if i is not None: + x_shape.append(i) + else: + x_shape.append(s) + x_shape = tuple(x_shape) + y_shape = [] + for i, s in zip(int_shape(y), tf.unstack(tf.shape(y))): + if i is not None: + y_shape.append(i) + else: + y_shape.append(s) + y_shape = tuple(y_shape) + y_permute_dim = list(range(ndim(y))) + y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim + xt = tf.reshape(x, [-1, x_shape[-1]]) + yt = tf.reshape( + tf.compat.v1.transpose(y, perm=y_permute_dim), [y_shape[-2], -1] + ) + return tf.reshape( + tf.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:] + ) + if is_sparse(x): + out = tf.sparse.sparse_dense_matmul(x, y) + else: + out = tf.matmul(x, y) + return out + + +@keras_export("keras.backend.batch_dot") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def batch_dot(x, y, axes=None): - """Batchwise dot product. - - `batch_dot` is used to compute dot product of `x` and `y` when - `x` and `y` are data in batch, i.e. in a shape of - `(batch_size, :)`. - `batch_dot` results in a tensor or variable with less dimensions - than the input. If the number of dimensions is reduced to 1, - we use `expand_dims` to make sure that ndim is at least 2. - - Args: - x: Keras tensor or variable with `ndim >= 2`. - y: Keras tensor or variable with `ndim >= 2`. - axes: Tuple or list of integers with target dimensions, or single integer. - The sizes of `x.shape[axes[0]]` and `y.shape[axes[1]]` should be equal. - - Returns: - A tensor with shape equal to the concatenation of `x`'s shape - (less the dimension that was summed over) and `y`'s shape - (less the batch dimension and the dimension that was summed over). - If the final rank is 1, we reshape it to `(batch_size, 1)`. - - Examples: - - >>> x_batch = tf.keras.backend.ones(shape=(32, 20, 1)) - >>> y_batch = tf.keras.backend.ones(shape=(32, 30, 20)) - >>> xy_batch_dot = tf.keras.backend.batch_dot(x_batch, y_batch, axes=(1, 2)) - >>> tf.keras.backend.int_shape(xy_batch_dot) - (32, 1, 30) - - Shape inference: - Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`. - If `axes` is (1, 2), to find the output shape of resultant tensor, - loop through each dimension in `x`'s shape and `y`'s shape: - * `x.shape[0]` : 100 : append to output shape - * `x.shape[1]` : 20 : do not append to output shape, - dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1) - * `y.shape[0]` : 100 : do not append to output shape, - always ignore first dimension of `y` - * `y.shape[1]` : 30 : append to output shape - * `y.shape[2]` : 20 : do not append to output shape, - dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2) - `output_shape` = `(100, 30)` - """ - x_shape = int_shape(x) - y_shape = int_shape(y) - - x_ndim = len(x_shape) - y_ndim = len(y_shape) - - if x_ndim < 2 or y_ndim < 2: - raise ValueError('Cannot do batch_dot on inputs ' - 'with rank < 2. ' - 'Received inputs with shapes ' + - str(x_shape) + ' and ' + - str(y_shape) + '.') - - x_batch_size = x_shape[0] - y_batch_size = y_shape[0] - - if x_batch_size is not None and y_batch_size is not None: - if x_batch_size != y_batch_size: - raise ValueError('Cannot do batch_dot on inputs ' - 'with different batch sizes. ' - 'Received inputs with shapes ' + - str(x_shape) + ' and ' + - str(y_shape) + '.') - if isinstance(axes, int): - axes = [axes, axes] - - if axes is None: + """Batchwise dot product. + + `batch_dot` is used to compute dot product of `x` and `y` when + `x` and `y` are data in batch, i.e. in a shape of + `(batch_size, :)`. + `batch_dot` results in a tensor or variable with less dimensions + than the input. If the number of dimensions is reduced to 1, + we use `expand_dims` to make sure that ndim is at least 2. + + Args: + x: Keras tensor or variable with `ndim >= 2`. + y: Keras tensor or variable with `ndim >= 2`. + axes: Tuple or list of integers with target dimensions, or single integer. + The sizes of `x.shape[axes[0]]` and `y.shape[axes[1]]` should be equal. + + Returns: + A tensor with shape equal to the concatenation of `x`'s shape + (less the dimension that was summed over) and `y`'s shape + (less the batch dimension and the dimension that was summed over). + If the final rank is 1, we reshape it to `(batch_size, 1)`. + + Examples: + + >>> x_batch = tf.keras.backend.ones(shape=(32, 20, 1)) + >>> y_batch = tf.keras.backend.ones(shape=(32, 30, 20)) + >>> xy_batch_dot = tf.keras.backend.batch_dot(x_batch, y_batch, axes=(1, 2)) + >>> tf.keras.backend.int_shape(xy_batch_dot) + (32, 1, 30) + + Shape inference: + Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`. + If `axes` is (1, 2), to find the output shape of resultant tensor, + loop through each dimension in `x`'s shape and `y`'s shape: + * `x.shape[0]` : 100 : append to output shape + * `x.shape[1]` : 20 : do not append to output shape, + dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1) + * `y.shape[0]` : 100 : do not append to output shape, + always ignore first dimension of `y` + * `y.shape[1]` : 30 : append to output shape + * `y.shape[2]` : 20 : do not append to output shape, + dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2) + `output_shape` = `(100, 30)` + """ + x_shape = int_shape(x) + y_shape = int_shape(y) + + x_ndim = len(x_shape) + y_ndim = len(y_shape) + + if x_ndim < 2 or y_ndim < 2: + raise ValueError( + "Cannot do batch_dot on inputs " + "with rank < 2. " + "Received inputs with shapes " + + str(x_shape) + + " and " + + str(y_shape) + + "." + ) + + x_batch_size = x_shape[0] + y_batch_size = y_shape[0] + + if x_batch_size is not None and y_batch_size is not None: + if x_batch_size != y_batch_size: + raise ValueError( + "Cannot do batch_dot on inputs " + "with different batch sizes. " + "Received inputs with shapes " + + str(x_shape) + + " and " + + str(y_shape) + + "." + ) + if isinstance(axes, int): + axes = [axes, axes] + + if axes is None: + if y_ndim == 2: + axes = [x_ndim - 1, y_ndim - 1] + else: + axes = [x_ndim - 1, y_ndim - 2] + + if py_any(isinstance(a, (list, tuple)) for a in axes): + raise ValueError( + "Multiple target dimensions are not supported. " + + "Expected: None, int, (int, int), " + + "Provided: " + + str(axes) + ) + + # if tuple, convert to list. + axes = list(axes) + + # convert negative indices. + if axes[0] < 0: + axes[0] += x_ndim + if axes[1] < 0: + axes[1] += y_ndim + + # sanity checks + if 0 in axes: + raise ValueError( + "Cannot perform batch_dot over axis 0. " + "If your inputs are not batched, " + "add a dummy batch dimension to your " + "inputs using K.expand_dims(x, 0)" + ) + a0, a1 = axes + d1 = x_shape[a0] + d2 = y_shape[a1] + + if d1 is not None and d2 is not None and d1 != d2: + raise ValueError( + "Cannot do batch_dot on inputs with shapes " + + str(x_shape) + + " and " + + str(y_shape) + + " with axes=" + + str(axes) + + ". x.shape[%d] != y.shape[%d] (%d != %d)." + % (axes[0], axes[1], d1, d2) + ) + + # backup ndims. Need them later. + orig_x_ndim = x_ndim + orig_y_ndim = y_ndim + + # if rank is 2, expand to 3. + if x_ndim == 2: + x = tf.expand_dims(x, 1) + a0 += 1 + x_ndim += 1 if y_ndim == 2: - axes = [x_ndim - 1, y_ndim - 1] + y = tf.expand_dims(y, 2) + y_ndim += 1 + + # bring x's dimension to be reduced to last axis. + if a0 != x_ndim - 1: + pattern = list(range(x_ndim)) + for i in range(a0, x_ndim - 1): + pattern[i] = pattern[i + 1] + pattern[-1] = a0 + x = tf.compat.v1.transpose(x, pattern) + + # bring y's dimension to be reduced to axis 1. + if a1 != 1: + pattern = list(range(y_ndim)) + for i in range(a1, 1, -1): + pattern[i] = pattern[i - 1] + pattern[1] = a1 + y = tf.compat.v1.transpose(y, pattern) + + # normalize both inputs to rank 3. + if x_ndim > 3: + # squash middle dimensions of x. + x_shape = shape(x) + x_mid_dims = x_shape[1:-1] + x_squashed_shape = tf.stack([x_shape[0], -1, x_shape[-1]]) + x = tf.reshape(x, x_squashed_shape) + x_squashed = True else: - axes = [x_ndim - 1, y_ndim - 2] - - if py_any(isinstance(a, (list, tuple)) for a in axes): - raise ValueError('Multiple target dimensions are not supported. ' + - 'Expected: None, int, (int, int), ' + - 'Provided: ' + str(axes)) - - # if tuple, convert to list. - axes = list(axes) - - # convert negative indices. - if axes[0] < 0: - axes[0] += x_ndim - if axes[1] < 0: - axes[1] += y_ndim - - # sanity checks - if 0 in axes: - raise ValueError('Cannot perform batch_dot over axis 0. ' - 'If your inputs are not batched, ' - 'add a dummy batch dimension to your ' - 'inputs using K.expand_dims(x, 0)') - a0, a1 = axes - d1 = x_shape[a0] - d2 = y_shape[a1] - - if d1 is not None and d2 is not None and d1 != d2: - raise ValueError('Cannot do batch_dot on inputs with shapes ' + - str(x_shape) + ' and ' + str(y_shape) + - ' with axes=' + str(axes) + '. x.shape[%d] != ' - 'y.shape[%d] (%d != %d).' % (axes[0], axes[1], d1, d2)) - - # backup ndims. Need them later. - orig_x_ndim = x_ndim - orig_y_ndim = y_ndim - - # if rank is 2, expand to 3. - if x_ndim == 2: - x = tf.expand_dims(x, 1) - a0 += 1 - x_ndim += 1 - if y_ndim == 2: - y = tf.expand_dims(y, 2) - y_ndim += 1 - - # bring x's dimension to be reduced to last axis. - if a0 != x_ndim - 1: - pattern = list(range(x_ndim)) - for i in range(a0, x_ndim - 1): - pattern[i] = pattern[i + 1] - pattern[-1] = a0 - x = tf.compat.v1.transpose(x, pattern) - - # bring y's dimension to be reduced to axis 1. - if a1 != 1: - pattern = list(range(y_ndim)) - for i in range(a1, 1, -1): - pattern[i] = pattern[i - 1] - pattern[1] = a1 - y = tf.compat.v1.transpose(y, pattern) - - # normalize both inputs to rank 3. - if x_ndim > 3: - # squash middle dimensions of x. - x_shape = shape(x) - x_mid_dims = x_shape[1:-1] - x_squashed_shape = tf.stack( - [x_shape[0], -1, x_shape[-1]]) - x = tf.reshape(x, x_squashed_shape) - x_squashed = True - else: - x_squashed = False - - if y_ndim > 3: - # squash trailing dimensions of y. - y_shape = shape(y) - y_trail_dims = y_shape[2:] - y_squashed_shape = tf.stack( - [y_shape[0], y_shape[1], -1]) - y = tf.reshape(y, y_squashed_shape) - y_squashed = True - else: - y_squashed = False - - result = tf.matmul(x, y) - - # if inputs were squashed, we have to reshape the matmul output. - output_shape = tf.shape(result) - do_reshape = False - - if x_squashed: - output_shape = tf.concat( - [output_shape[:1], - x_mid_dims, - output_shape[-1:]], 0) - do_reshape = True - - if y_squashed: - output_shape = tf.concat([output_shape[:-1], y_trail_dims], 0) - do_reshape = True - - if do_reshape: - result = tf.reshape(result, output_shape) - - # if the inputs were originally rank 2, we remove the added 1 dim. - if orig_x_ndim == 2: - result = tf.squeeze(result, 1) - elif orig_y_ndim == 2: - result = tf.squeeze(result, -1) - - return result - - -@keras_export('keras.backend.transpose') + x_squashed = False + + if y_ndim > 3: + # squash trailing dimensions of y. + y_shape = shape(y) + y_trail_dims = y_shape[2:] + y_squashed_shape = tf.stack([y_shape[0], y_shape[1], -1]) + y = tf.reshape(y, y_squashed_shape) + y_squashed = True + else: + y_squashed = False + + result = tf.matmul(x, y) + + # if inputs were squashed, we have to reshape the matmul output. + output_shape = tf.shape(result) + do_reshape = False + + if x_squashed: + output_shape = tf.concat( + [output_shape[:1], x_mid_dims, output_shape[-1:]], 0 + ) + do_reshape = True + + if y_squashed: + output_shape = tf.concat([output_shape[:-1], y_trail_dims], 0) + do_reshape = True + + if do_reshape: + result = tf.reshape(result, output_shape) + + # if the inputs were originally rank 2, we remove the added 1 dim. + if orig_x_ndim == 2: + result = tf.squeeze(result, 1) + elif orig_y_ndim == 2: + result = tf.squeeze(result, -1) + + return result + + +@keras_export("keras.backend.transpose") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def transpose(x): - """Transposes a tensor and returns it. - - Args: - x: Tensor or variable. - - Returns: - A tensor. - - Examples: - - >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]]) - >>> tf.keras.backend.eval(var) - array([[1., 2., 3.], - [4., 5., 6.]], dtype=float32) - >>> var_transposed = tf.keras.backend.transpose(var) - >>> tf.keras.backend.eval(var_transposed) - array([[1., 4.], - [2., 5.], - [3., 6.]], dtype=float32) - >>> input = tf.keras.backend.placeholder((2, 3)) - >>> input - - >>> input_transposed = tf.keras.backend.transpose(input) - >>> input_transposed - - """ - return tf.compat.v1.transpose(x) - - -@keras_export('keras.backend.gather') + """Transposes a tensor and returns it. + + Args: + x: Tensor or variable. + + Returns: + A tensor. + + Examples: + + >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]]) + >>> tf.keras.backend.eval(var) + array([[1., 2., 3.], + [4., 5., 6.]], dtype=float32) + >>> var_transposed = tf.keras.backend.transpose(var) + >>> tf.keras.backend.eval(var_transposed) + array([[1., 4.], + [2., 5.], + [3., 6.]], dtype=float32) + >>> input = tf.keras.backend.placeholder((2, 3)) + >>> input + + >>> input_transposed = tf.keras.backend.transpose(input) + >>> input_transposed + + """ + return tf.compat.v1.transpose(x) + + +@keras_export("keras.backend.gather") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def gather(reference, indices): - """Retrieves the elements of indices `indices` in the tensor `reference`. - - Args: - reference: A tensor. - indices: An integer tensor of indices. - - Returns: - A tensor of same type as `reference`. - - Examples: - - >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]]) - >>> tf.keras.backend.eval(var) - array([[1., 2., 3.], - [4., 5., 6.]], dtype=float32) - >>> var_gathered = tf.keras.backend.gather(var, [0]) - >>> tf.keras.backend.eval(var_gathered) - array([[1., 2., 3.]], dtype=float32) - >>> var_gathered = tf.keras.backend.gather(var, [1]) - >>> tf.keras.backend.eval(var_gathered) - array([[4., 5., 6.]], dtype=float32) - >>> var_gathered = tf.keras.backend.gather(var, [0,1,0]) - >>> tf.keras.backend.eval(var_gathered) - array([[1., 2., 3.], - [4., 5., 6.], - [1., 2., 3.]], dtype=float32) - """ - return tf.compat.v1.gather(reference, indices) + """Retrieves the elements of indices `indices` in the tensor `reference`. + + Args: + reference: A tensor. + indices: An integer tensor of indices. + + Returns: + A tensor of same type as `reference`. + + Examples: + + >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]]) + >>> tf.keras.backend.eval(var) + array([[1., 2., 3.], + [4., 5., 6.]], dtype=float32) + >>> var_gathered = tf.keras.backend.gather(var, [0]) + >>> tf.keras.backend.eval(var_gathered) + array([[1., 2., 3.]], dtype=float32) + >>> var_gathered = tf.keras.backend.gather(var, [1]) + >>> tf.keras.backend.eval(var_gathered) + array([[4., 5., 6.]], dtype=float32) + >>> var_gathered = tf.keras.backend.gather(var, [0,1,0]) + >>> tf.keras.backend.eval(var_gathered) + array([[1., 2., 3.], + [4., 5., 6.], + [1., 2., 3.]], dtype=float32) + """ + return tf.compat.v1.gather(reference, indices) # ELEMENT-WISE OPERATIONS -@keras_export('keras.backend.max') +@keras_export("keras.backend.max") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def max(x, axis=None, keepdims=False): - """Maximum value in a tensor. + """Maximum value in a tensor. - Args: - x: A tensor or variable. - axis: An integer, the axis to find maximum values. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. + Args: + x: A tensor or variable. + axis: An integer, the axis to find maximum values. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. - Returns: - A tensor with maximum values of `x`. - """ - return tf.reduce_max(x, axis, keepdims) + Returns: + A tensor with maximum values of `x`. + """ + return tf.reduce_max(x, axis, keepdims) -@keras_export('keras.backend.min') +@keras_export("keras.backend.min") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def min(x, axis=None, keepdims=False): - """Minimum value in a tensor. + """Minimum value in a tensor. - Args: - x: A tensor or variable. - axis: An integer, the axis to find minimum values. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. + Args: + x: A tensor or variable. + axis: An integer, the axis to find minimum values. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. - Returns: - A tensor with minimum values of `x`. - """ - return tf.reduce_min(x, axis, keepdims) + Returns: + A tensor with minimum values of `x`. + """ + return tf.reduce_min(x, axis, keepdims) -@keras_export('keras.backend.sum') +@keras_export("keras.backend.sum") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def sum(x, axis=None, keepdims=False): - """Sum of the values in a tensor, alongside the specified axis. + """Sum of the values in a tensor, alongside the specified axis. - Args: - x: A tensor or variable. - axis: An integer, the axis to sum over. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. + Args: + x: A tensor or variable. + axis: An integer, the axis to sum over. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. - Returns: - A tensor with sum of `x`. - """ - return tf.reduce_sum(x, axis, keepdims) + Returns: + A tensor with sum of `x`. + """ + return tf.reduce_sum(x, axis, keepdims) -@keras_export('keras.backend.prod') +@keras_export("keras.backend.prod") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def prod(x, axis=None, keepdims=False): - """Multiplies the values in a tensor, alongside the specified axis. + """Multiplies the values in a tensor, alongside the specified axis. - Args: - x: A tensor or variable. - axis: An integer, the axis to compute the product. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. + Args: + x: A tensor or variable. + axis: An integer, the axis to compute the product. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. - Returns: - A tensor with the product of elements of `x`. - """ - return tf.reduce_prod(x, axis, keepdims) + Returns: + A tensor with the product of elements of `x`. + """ + return tf.reduce_prod(x, axis, keepdims) -@keras_export('keras.backend.cumsum') +@keras_export("keras.backend.cumsum") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def cumsum(x, axis=0): - """Cumulative sum of the values in a tensor, alongside the specified axis. + """Cumulative sum of the values in a tensor, alongside the specified axis. - Args: - x: A tensor or variable. - axis: An integer, the axis to compute the sum. + Args: + x: A tensor or variable. + axis: An integer, the axis to compute the sum. - Returns: - A tensor of the cumulative sum of values of `x` along `axis`. - """ - return tf.cumsum(x, axis=axis) + Returns: + A tensor of the cumulative sum of values of `x` along `axis`. + """ + return tf.cumsum(x, axis=axis) -@keras_export('keras.backend.cumprod') +@keras_export("keras.backend.cumprod") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def cumprod(x, axis=0): - """Cumulative product of the values in a tensor, alongside the specified axis. + """Cumulative product of the values in a tensor alongside `axis`. - Args: - x: A tensor or variable. - axis: An integer, the axis to compute the product. + Args: + x: A tensor or variable. + axis: An integer, the axis to compute the product. - Returns: - A tensor of the cumulative product of values of `x` along `axis`. - """ - return tf.math.cumprod(x, axis=axis) + Returns: + A tensor of the cumulative product of values of `x` along `axis`. + """ + return tf.math.cumprod(x, axis=axis) -@keras_export('keras.backend.var') +@keras_export("keras.backend.var") @doc_controls.do_not_generate_docs def var(x, axis=None, keepdims=False): - """Variance of a tensor, alongside the specified axis. + """Variance of a tensor, alongside the specified axis. - Args: - x: A tensor or variable. - axis: An integer, the axis to compute the variance. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. + Args: + x: A tensor or variable. + axis: An integer, the axis to compute the variance. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. - Returns: - A tensor with the variance of elements of `x`. - """ - if x.dtype.base_dtype == tf.bool: - x = tf.cast(x, floatx()) - return tf.math.reduce_variance(x, axis=axis, keepdims=keepdims) + Returns: + A tensor with the variance of elements of `x`. + """ + if x.dtype.base_dtype == tf.bool: + x = tf.cast(x, floatx()) + return tf.math.reduce_variance(x, axis=axis, keepdims=keepdims) -@keras_export('keras.backend.std') +@keras_export("keras.backend.std") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def std(x, axis=None, keepdims=False): - """Standard deviation of a tensor, alongside the specified axis. - - It is an alias to `tf.math.reduce_std`. - - Args: - x: A tensor or variable. It should have numerical dtypes. Boolean type - inputs will be converted to float. - axis: An integer, the axis to compute the standard deviation. If `None` - (the default), reduces all dimensions. Must be in the range - `[-rank(x), rank(x))`. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, the reduced dimension is retained with - length 1. - - Returns: - A tensor with the standard deviation of elements of `x` with same dtype. - Boolean type input will be converted to float. - """ - if x.dtype.base_dtype == tf.bool: - x = tf.cast(x, floatx()) - return tf.math.reduce_std(x, axis=axis, keepdims=keepdims) - - -@keras_export('keras.backend.mean') + """Standard deviation of a tensor, alongside the specified axis. + + It is an alias to `tf.math.reduce_std`. + + Args: + x: A tensor or variable. It should have numerical dtypes. Boolean type + inputs will be converted to float. + axis: An integer, the axis to compute the standard deviation. If `None` + (the default), reduces all dimensions. Must be in the range + `[-rank(x), rank(x))`. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, the reduced dimension is retained + with length 1. + + Returns: + A tensor with the standard deviation of elements of `x` with same dtype. + Boolean type input will be converted to float. + """ + if x.dtype.base_dtype == tf.bool: + x = tf.cast(x, floatx()) + return tf.math.reduce_std(x, axis=axis, keepdims=keepdims) + + +@keras_export("keras.backend.mean") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def mean(x, axis=None, keepdims=False): - """Mean of a tensor, alongside the specified axis. + """Mean of a tensor, alongside the specified axis. - Args: - x: A tensor or variable. - axis: A list of integer. Axes to compute the mean. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1 for each entry in `axis`. If `keepdims` is `True`, - the reduced dimensions are retained with length 1. + Args: + x: A tensor or variable. + axis: A list of integer. Axes to compute the mean. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1 for each entry in `axis`. If `keepdims` is `True`, + the reduced dimensions are retained with length 1. - Returns: - A tensor with the mean of elements of `x`. - """ - if x.dtype.base_dtype == tf.bool: - x = tf.cast(x, floatx()) - return tf.reduce_mean(x, axis, keepdims) + Returns: + A tensor with the mean of elements of `x`. + """ + if x.dtype.base_dtype == tf.bool: + x = tf.cast(x, floatx()) + return tf.reduce_mean(x, axis, keepdims) -@keras_export('keras.backend.any') +@keras_export("keras.backend.any") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def any(x, axis=None, keepdims=False): - """Bitwise reduction (logical OR). + """Bitwise reduction (logical OR). - Args: - x: Tensor or variable. - axis: axis along which to perform the reduction. - keepdims: whether the drop or broadcast the reduction axes. + Args: + x: Tensor or variable. + axis: axis along which to perform the reduction. + keepdims: whether the drop or broadcast the reduction axes. - Returns: - A uint8 tensor (0s and 1s). - """ - x = tf.cast(x, tf.bool) - return tf.reduce_any(x, axis, keepdims) + Returns: + A uint8 tensor (0s and 1s). + """ + x = tf.cast(x, tf.bool) + return tf.reduce_any(x, axis, keepdims) -@keras_export('keras.backend.all') +@keras_export("keras.backend.all") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def all(x, axis=None, keepdims=False): - """Bitwise reduction (logical AND). + """Bitwise reduction (logical AND). - Args: - x: Tensor or variable. - axis: axis along which to perform the reduction. - keepdims: whether the drop or broadcast the reduction axes. + Args: + x: Tensor or variable. + axis: axis along which to perform the reduction. + keepdims: whether the drop or broadcast the reduction axes. - Returns: - A uint8 tensor (0s and 1s). - """ - x = tf.cast(x, tf.bool) - return tf.reduce_all(x, axis, keepdims) + Returns: + A uint8 tensor (0s and 1s). + """ + x = tf.cast(x, tf.bool) + return tf.reduce_all(x, axis, keepdims) -@keras_export('keras.backend.argmax') +@keras_export("keras.backend.argmax") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def argmax(x, axis=-1): - """Returns the index of the maximum value along an axis. + """Returns the index of the maximum value along an axis. - Args: - x: Tensor or variable. - axis: axis along which to perform the reduction. + Args: + x: Tensor or variable. + axis: axis along which to perform the reduction. - Returns: - A tensor. - """ - return tf.argmax(x, axis) + Returns: + A tensor. + """ + return tf.argmax(x, axis) -@keras_export('keras.backend.argmin') +@keras_export("keras.backend.argmin") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def argmin(x, axis=-1): - """Returns the index of the minimum value along an axis. + """Returns the index of the minimum value along an axis. - Args: - x: Tensor or variable. - axis: axis along which to perform the reduction. + Args: + x: Tensor or variable. + axis: axis along which to perform the reduction. - Returns: - A tensor. - """ - return tf.argmin(x, axis) + Returns: + A tensor. + """ + return tf.argmin(x, axis) -@keras_export('keras.backend.square') +@keras_export("keras.backend.square") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def square(x): - """Element-wise square. + """Element-wise square. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.square(x) + Returns: + A tensor. + """ + return tf.square(x) -@keras_export('keras.backend.abs') +@keras_export("keras.backend.abs") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def abs(x): - """Element-wise absolute value. + """Element-wise absolute value. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.abs(x) + Returns: + A tensor. + """ + return tf.abs(x) -@keras_export('keras.backend.sqrt') +@keras_export("keras.backend.sqrt") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def sqrt(x): - """Element-wise square root. + """Element-wise square root. - This function clips negative tensor values to 0 before computing the - square root. + This function clips negative tensor values to 0 before computing the + square root. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - zero = _constant_to_tensor(0., x.dtype.base_dtype) - x = tf.maximum(x, zero) - return tf.sqrt(x) + Returns: + A tensor. + """ + zero = _constant_to_tensor(0.0, x.dtype.base_dtype) + x = tf.maximum(x, zero) + return tf.sqrt(x) -@keras_export('keras.backend.exp') +@keras_export("keras.backend.exp") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def exp(x): - """Element-wise exponential. + """Element-wise exponential. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.exp(x) + Returns: + A tensor. + """ + return tf.exp(x) -@keras_export('keras.backend.log') +@keras_export("keras.backend.log") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def log(x): - """Element-wise log. + """Element-wise log. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.math.log(x) + Returns: + A tensor. + """ + return tf.math.log(x) def logsumexp(x, axis=None, keepdims=False): - """Computes log(sum(exp(elements across dimensions of a tensor))). + """Computes log(sum(exp(elements across dimensions of a tensor))). - This function is more numerically stable than log(sum(exp(x))). - It avoids overflows caused by taking the exp of large inputs and - underflows caused by taking the log of small inputs. + This function is more numerically stable than log(sum(exp(x))). + It avoids overflows caused by taking the exp of large inputs and + underflows caused by taking the log of small inputs. - Args: - x: A tensor or variable. - axis: An integer, the axis to reduce over. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, the reduced dimension is - retained with length 1. + Args: + x: A tensor or variable. + axis: An integer, the axis to reduce over. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, the reduced dimension is + retained with length 1. - Returns: - The reduced tensor. - """ - return tf.reduce_logsumexp(x, axis, keepdims) + Returns: + The reduced tensor. + """ + return tf.reduce_logsumexp(x, axis, keepdims) -@keras_export('keras.backend.round') +@keras_export("keras.backend.round") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def round(x): - """Element-wise rounding to the closest integer. + """Element-wise rounding to the closest integer. - In case of tie, the rounding mode used is "half to even". + In case of tie, the rounding mode used is "half to even". - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.round(x) + Returns: + A tensor. + """ + return tf.round(x) -@keras_export('keras.backend.sign') +@keras_export("keras.backend.sign") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def sign(x): - """Element-wise sign. + """Element-wise sign. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.sign(x) + Returns: + A tensor. + """ + return tf.sign(x) -@keras_export('keras.backend.pow') +@keras_export("keras.backend.pow") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def pow(x, a): - """Element-wise exponentiation. + """Element-wise exponentiation. - Args: - x: Tensor or variable. - a: Python integer. + Args: + x: Tensor or variable. + a: Python integer. - Returns: - A tensor. - """ - return tf.pow(x, a) + Returns: + A tensor. + """ + return tf.pow(x, a) -@keras_export('keras.backend.clip') +@keras_export("keras.backend.clip") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def clip(x, min_value, max_value): - """Element-wise value clipping. - - Args: - x: Tensor or variable. - min_value: Python float, integer, or tensor. - max_value: Python float, integer, or tensor. - - Returns: - A tensor. - """ - if (isinstance(min_value, (int, float)) and - isinstance(max_value, (int, float))): - if max_value < min_value: - max_value = min_value - if min_value is None: - min_value = -np.inf - if max_value is None: - max_value = np.inf - return tf.clip_by_value(x, min_value, max_value) - - -@keras_export('keras.backend.equal') + """Element-wise value clipping. + + Args: + x: Tensor or variable. + min_value: Python float, integer, or tensor. + max_value: Python float, integer, or tensor. + + Returns: + A tensor. + """ + if isinstance(min_value, (int, float)) and isinstance( + max_value, (int, float) + ): + if max_value < min_value: + max_value = min_value + if min_value is None: + min_value = -np.inf + if max_value is None: + max_value = np.inf + return tf.clip_by_value(x, min_value, max_value) + + +@keras_export("keras.backend.equal") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def equal(x, y): - """Element-wise equality between two tensors. + """Element-wise equality between two tensors. - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A bool tensor. - """ - return tf.equal(x, y) + Returns: + A bool tensor. + """ + return tf.equal(x, y) -@keras_export('keras.backend.not_equal') +@keras_export("keras.backend.not_equal") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def not_equal(x, y): - """Element-wise inequality between two tensors. + """Element-wise inequality between two tensors. - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A bool tensor. - """ - return tf.not_equal(x, y) + Returns: + A bool tensor. + """ + return tf.not_equal(x, y) -@keras_export('keras.backend.greater') +@keras_export("keras.backend.greater") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def greater(x, y): - """Element-wise truth value of (x > y). + """Element-wise truth value of (x > y). - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A bool tensor. - """ - return tf.greater(x, y) + Returns: + A bool tensor. + """ + return tf.greater(x, y) -@keras_export('keras.backend.greater_equal') +@keras_export("keras.backend.greater_equal") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def greater_equal(x, y): - """Element-wise truth value of (x >= y). + """Element-wise truth value of (x >= y). - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A bool tensor. - """ - return tf.greater_equal(x, y) + Returns: + A bool tensor. + """ + return tf.greater_equal(x, y) -@keras_export('keras.backend.less') +@keras_export("keras.backend.less") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def less(x, y): - """Element-wise truth value of (x < y). + """Element-wise truth value of (x < y). - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A bool tensor. - """ - return tf.less(x, y) + Returns: + A bool tensor. + """ + return tf.less(x, y) -@keras_export('keras.backend.less_equal') +@keras_export("keras.backend.less_equal") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def less_equal(x, y): - """Element-wise truth value of (x <= y). + """Element-wise truth value of (x <= y). - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A bool tensor. - """ - return tf.less_equal(x, y) + Returns: + A bool tensor. + """ + return tf.less_equal(x, y) -@keras_export('keras.backend.maximum') +@keras_export("keras.backend.maximum") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def maximum(x, y): - """Element-wise maximum of two tensors. + """Element-wise maximum of two tensors. - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A tensor with the element wise maximum value(s) of `x` and `y`. + Returns: + A tensor with the element wise maximum value(s) of `x` and `y`. - Examples: + Examples: - >>> x = tf.Variable([[1, 2], [3, 4]]) - >>> y = tf.Variable([[2, 1], [0, -1]]) - >>> m = tf.keras.backend.maximum(x, y) - >>> m - - """ - return tf.maximum(x, y) + >>> x = tf.Variable([[1, 2], [3, 4]]) + >>> y = tf.Variable([[2, 1], [0, -1]]) + >>> m = tf.keras.backend.maximum(x, y) + >>> m + + """ + return tf.maximum(x, y) -@keras_export('keras.backend.minimum') +@keras_export("keras.backend.minimum") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def minimum(x, y): - """Element-wise minimum of two tensors. + """Element-wise minimum of two tensors. - Args: - x: Tensor or variable. - y: Tensor or variable. + Args: + x: Tensor or variable. + y: Tensor or variable. - Returns: - A tensor. - """ - return tf.minimum(x, y) + Returns: + A tensor. + """ + return tf.minimum(x, y) -@keras_export('keras.backend.sin') +@keras_export("keras.backend.sin") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def sin(x): - """Computes sin of x element-wise. + """Computes sin of x element-wise. - Args: - x: Tensor or variable. + Args: + x: Tensor or variable. - Returns: - A tensor. - """ - return tf.sin(x) + Returns: + A tensor. + """ + return tf.sin(x) -@keras_export('keras.backend.cos') +@keras_export("keras.backend.cos") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def cos(x): - """Computes cos of x element-wise. - - Args: - x: Tensor or variable. - - Returns: - A tensor. - """ - return tf.cos(x) - - -def _regular_normalize_batch_in_training(x, - gamma, - beta, - reduction_axes, - epsilon=1e-3): - """Non-fused version of `normalize_batch_in_training`. - - Args: - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - Returns: - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - mean, var = tf.compat.v1.nn.moments(x, reduction_axes, None, None, False) - normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) - return normed, mean, var - - -def _broadcast_normalize_batch_in_training(x, - gamma, - beta, - reduction_axes, - epsilon=1e-3): - """Non-fused, broadcast version of `normalize_batch_in_training`. - - Args: - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - Returns: - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - mean, var = tf.compat.v1.nn.moments(x, reduction_axes, None, None, False) - target_shape = [] - for axis in range(ndim(x)): - if axis in reduction_axes: - target_shape.append(1) - else: - target_shape.append(tf.shape(x)[axis]) - target_shape = tf.stack(target_shape) - - broadcast_mean = tf.reshape(mean, target_shape) - broadcast_var = tf.reshape(var, target_shape) - if gamma is None: - broadcast_gamma = None - else: - broadcast_gamma = tf.reshape(gamma, target_shape) - if beta is None: - broadcast_beta = None - else: - broadcast_beta = tf.reshape(beta, target_shape) - - normed = tf.nn.batch_normalization(x, broadcast_mean, broadcast_var, - broadcast_beta, broadcast_gamma, epsilon) - return normed, mean, var - - -def _fused_normalize_batch_in_training(x, - gamma, - beta, - reduction_axes, - epsilon=1e-3): - """Fused version of `normalize_batch_in_training`. - - Args: - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - Returns: - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - if list(reduction_axes) == [0, 1, 2]: - normalization_axis = 3 - tf_data_format = 'NHWC' - else: - normalization_axis = 1 - tf_data_format = 'NCHW' - - if gamma is None: - gamma = tf.constant( - 1.0, dtype=x.dtype, shape=[x.shape[normalization_axis]]) - if beta is None: - beta = tf.constant( - 0.0, dtype=x.dtype, shape=[x.shape[normalization_axis]]) - - return tf.compat.v1.nn.fused_batch_norm( - x, gamma, beta, epsilon=epsilon, data_format=tf_data_format) - - -@keras_export('keras.backend.normalize_batch_in_training') -@doc_controls.do_not_generate_docs -def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): - """Computes mean and std for batch then apply batch_normalization on batch. - - Args: - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - Returns: - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - if ndim(x) == 4 and list(reduction_axes) in [[0, 1, 2], [0, 2, 3]]: - if not _has_nchw_support() and list(reduction_axes) == [0, 2, 3]: - return _broadcast_normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=epsilon) - return _fused_normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=epsilon) - else: - if sorted(reduction_axes) == list(range(ndim(x)))[:-1]: - return _regular_normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=epsilon) - else: - return _broadcast_normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=epsilon) + """Computes cos of x element-wise. + Args: + x: Tensor or variable. -@keras_export('keras.backend.batch_normalization') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): - """Applies batch normalization on x given mean, var, beta and gamma. - - I.e. returns: - `output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta` - - Args: - x: Input tensor or variable. - mean: Mean of batch. - var: Variance of batch. - beta: Tensor with which to center the input. - gamma: Tensor by which to scale the input. - axis: Integer, the axis that should be normalized. - (typically the features axis). - epsilon: Fuzz factor. - - Returns: - A tensor. - """ - if ndim(x) == 4: - # The CPU implementation of `fused_batch_norm` only supports NHWC - if axis == 1 or axis == -3: - tf_data_format = 'NCHW' - elif axis == 3 or axis == -1: - tf_data_format = 'NHWC' + Returns: + A tensor. + """ + return tf.cos(x) + + +def _regular_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=1e-3 +): + """Non-fused version of `normalize_batch_in_training`. + + Args: + x: Input tensor or variable. + gamma: Tensor by which to scale the input. + beta: Tensor with which to center the input. + reduction_axes: iterable of integers, + axes over which to normalize. + epsilon: Fuzz factor. + + Returns: + A tuple length of 3, `(normalized_tensor, mean, variance)`. + """ + mean, var = tf.compat.v1.nn.moments(x, reduction_axes, None, None, False) + normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) + return normed, mean, var + + +def _broadcast_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=1e-3 +): + """Non-fused, broadcast version of `normalize_batch_in_training`. + + Args: + x: Input tensor or variable. + gamma: Tensor by which to scale the input. + beta: Tensor with which to center the input. + reduction_axes: iterable of integers, + axes over which to normalize. + epsilon: Fuzz factor. + + Returns: + A tuple length of 3, `(normalized_tensor, mean, variance)`. + """ + mean, var = tf.compat.v1.nn.moments(x, reduction_axes, None, None, False) + target_shape = [] + for axis in range(ndim(x)): + if axis in reduction_axes: + target_shape.append(1) + else: + target_shape.append(tf.shape(x)[axis]) + target_shape = tf.stack(target_shape) + + broadcast_mean = tf.reshape(mean, target_shape) + broadcast_var = tf.reshape(var, target_shape) + if gamma is None: + broadcast_gamma = None else: - tf_data_format = None - - if (tf_data_format == 'NHWC' or - tf_data_format == 'NCHW' and _has_nchw_support()): - # The mean / var / beta / gamma tensors may be broadcasted - # so they may have extra axes of size 1, which should be squeezed. - if ndim(mean) > 1: - mean = tf.reshape(mean, [-1]) - if ndim(var) > 1: - var = tf.reshape(var, [-1]) - if beta is None: - beta = zeros_like(mean) - elif ndim(beta) > 1: - beta = tf.reshape(beta, [-1]) - if gamma is None: - gamma = ones_like(mean) - elif ndim(gamma) > 1: - gamma = tf.reshape(gamma, [-1]) - y, _, _ = tf.compat.v1.nn.fused_batch_norm( + broadcast_gamma = tf.reshape(gamma, target_shape) + if beta is None: + broadcast_beta = None + else: + broadcast_beta = tf.reshape(beta, target_shape) + + normed = tf.nn.batch_normalization( x, - gamma, - beta, - epsilon=epsilon, - mean=mean, - variance=var, - data_format=tf_data_format, - is_training=False + broadcast_mean, + broadcast_var, + broadcast_beta, + broadcast_gamma, + epsilon, + ) + return normed, mean, var + + +def _fused_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=1e-3 +): + """Fused version of `normalize_batch_in_training`. + + Args: + x: Input tensor or variable. + gamma: Tensor by which to scale the input. + beta: Tensor with which to center the input. + reduction_axes: iterable of integers, + axes over which to normalize. + epsilon: Fuzz factor. + + Returns: + A tuple length of 3, `(normalized_tensor, mean, variance)`. + """ + if list(reduction_axes) == [0, 1, 2]: + normalization_axis = 3 + tf_data_format = "NHWC" + else: + normalization_axis = 1 + tf_data_format = "NCHW" + + if gamma is None: + gamma = tf.constant( + 1.0, dtype=x.dtype, shape=[x.shape[normalization_axis]] + ) + if beta is None: + beta = tf.constant( + 0.0, dtype=x.dtype, shape=[x.shape[normalization_axis]] + ) + + return tf.compat.v1.nn.fused_batch_norm( + x, gamma, beta, epsilon=epsilon, data_format=tf_data_format ) - return y - return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) + + +@keras_export("keras.backend.normalize_batch_in_training") +@doc_controls.do_not_generate_docs +def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): + """Computes mean and std for batch then apply batch_normalization on batch. + + Args: + x: Input tensor or variable. + gamma: Tensor by which to scale the input. + beta: Tensor with which to center the input. + reduction_axes: iterable of integers, + axes over which to normalize. + epsilon: Fuzz factor. + + Returns: + A tuple length of 3, `(normalized_tensor, mean, variance)`. + """ + if ndim(x) == 4 and list(reduction_axes) in [[0, 1, 2], [0, 2, 3]]: + if not _has_nchw_support() and list(reduction_axes) == [0, 2, 3]: + return _broadcast_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=epsilon + ) + return _fused_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=epsilon + ) + else: + if sorted(reduction_axes) == list(range(ndim(x)))[:-1]: + return _regular_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=epsilon + ) + else: + return _broadcast_normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=epsilon + ) + + +@keras_export("keras.backend.batch_normalization") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): + """Applies batch normalization on x given mean, var, beta and gamma. + + I.e. returns: + `output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta` + + Args: + x: Input tensor or variable. + mean: Mean of batch. + var: Variance of batch. + beta: Tensor with which to center the input. + gamma: Tensor by which to scale the input. + axis: Integer, the axis that should be normalized. + (typically the features axis). + epsilon: Fuzz factor. + + Returns: + A tensor. + """ + if ndim(x) == 4: + # The CPU implementation of `fused_batch_norm` only supports NHWC + if axis == 1 or axis == -3: + tf_data_format = "NCHW" + elif axis == 3 or axis == -1: + tf_data_format = "NHWC" + else: + tf_data_format = None + + if ( + tf_data_format == "NHWC" + or tf_data_format == "NCHW" + and _has_nchw_support() + ): + # The mean / var / beta / gamma tensors may be broadcasted + # so they may have extra axes of size 1, which should be squeezed. + if ndim(mean) > 1: + mean = tf.reshape(mean, [-1]) + if ndim(var) > 1: + var = tf.reshape(var, [-1]) + if beta is None: + beta = zeros_like(mean) + elif ndim(beta) > 1: + beta = tf.reshape(beta, [-1]) + if gamma is None: + gamma = ones_like(mean) + elif ndim(gamma) > 1: + gamma = tf.reshape(gamma, [-1]) + y, _, _ = tf.compat.v1.nn.fused_batch_norm( + x, + gamma, + beta, + epsilon=epsilon, + mean=mean, + variance=var, + data_format=tf_data_format, + is_training=False, + ) + return y + return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) # SHAPE OPERATIONS -@keras_export('keras.backend.concatenate') +@keras_export("keras.backend.concatenate") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def concatenate(tensors, axis=-1): - """Concatenates a list of tensors alongside the specified axis. - - Args: - tensors: list of tensors to concatenate. - axis: concatenation axis. - - Returns: - A tensor. - - Example: - - >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - >>> b = tf.constant([[10, 20, 30], [40, 50, 60], [70, 80, 90]]) - >>> tf.keras.backend.concatenate((a, b), axis=-1) - - - """ - if axis < 0: - rank = ndim(tensors[0]) - if rank: - axis %= rank - else: - axis = 0 + """Concatenates a list of tensors alongside the specified axis. + + Args: + tensors: list of tensors to concatenate. + axis: concatenation axis. + + Returns: + A tensor. + + Example: - if py_all(is_sparse(x) for x in tensors): - return tf.compat.v1.sparse_concat(axis, tensors) - elif py_all(isinstance(x, tf.RaggedTensor) for x in tensors): - return tf.concat(tensors, axis) - else: - return tf.concat([to_dense(x) for x in tensors], axis) + >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + >>> b = tf.constant([[10, 20, 30], [40, 50, 60], [70, 80, 90]]) + >>> tf.keras.backend.concatenate((a, b), axis=-1) + + """ + if axis < 0: + rank = ndim(tensors[0]) + if rank: + axis %= rank + else: + axis = 0 + + if py_all(is_sparse(x) for x in tensors): + return tf.compat.v1.sparse_concat(axis, tensors) + elif py_all(isinstance(x, tf.RaggedTensor) for x in tensors): + return tf.concat(tensors, axis) + else: + return tf.concat([to_dense(x) for x in tensors], axis) -@keras_export('keras.backend.reshape') + +@keras_export("keras.backend.reshape") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def reshape(x, shape): - """Reshapes a tensor to the specified shape. + """Reshapes a tensor to the specified shape. + + Args: + x: Tensor or variable. + shape: Target shape tuple. + + Returns: + A tensor. + + Example: + + >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + >>> a + + >>> tf.keras.backend.reshape(a, shape=(2, 6)) + + + """ + return tf.reshape(x, shape) - Args: - x: Tensor or variable. - shape: Target shape tuple. - Returns: - A tensor. +@keras_export("keras.backend.permute_dimensions") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def permute_dimensions(x, pattern): + """Permutes axes in a tensor. - Example: + Args: + x: Tensor or variable. + pattern: A tuple of + dimension indices, e.g. `(0, 2, 1)`. - >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - >>> a - - >>> tf.keras.backend.reshape(a, shape=(2, 6)) - + Returns: + A tensor. + + Example: + + >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + >>> a + + >>> tf.keras.backend.permute_dimensions(a, pattern=(1, 0)) + - """ - return tf.reshape(x, shape) + """ + return tf.compat.v1.transpose(x, perm=pattern) -@keras_export('keras.backend.permute_dimensions') +@keras_export("keras.backend.resize_images") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def permute_dimensions(x, pattern): - """Permutes axes in a tensor. +def resize_images( + x, height_factor, width_factor, data_format, interpolation="nearest" +): + """Resizes the images contained in a 4D tensor. + + Args: + x: Tensor or variable to resize. + height_factor: Positive integer. + width_factor: Positive integer. + data_format: One of `"channels_first"`, `"channels_last"`. + interpolation: A string, one of `"area"`, `"bicubic"`, `"bilinear"`, + `"gaussian"`, `"lanczos3"`, `"lanczos5"`, `"mitchellcubic"`, + `"nearest"`. - Args: - x: Tensor or variable. - pattern: A tuple of - dimension indices, e.g. `(0, 2, 1)`. + Returns: + A tensor. - Returns: - A tensor. + Raises: + ValueError: in case of incorrect value for + `data_format` or `interpolation`. + """ + if data_format == "channels_first": + rows, cols = 2, 3 + elif data_format == "channels_last": + rows, cols = 1, 2 + else: + raise ValueError(f"Invalid `data_format` argument: {data_format}") - Example: + new_shape = x.shape[rows : cols + 1] + if new_shape.is_fully_defined(): + new_shape = tf.constant(new_shape.as_list(), dtype="int32") + else: + new_shape = tf.shape(x)[rows : cols + 1] + new_shape *= tf.constant( + np.array([height_factor, width_factor], dtype="int32") + ) - >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - >>> a - - >>> tf.keras.backend.permute_dimensions(a, pattern=(1, 0)) - + if data_format == "channels_first": + x = permute_dimensions(x, [0, 2, 3, 1]) + interpolations = { + "area": tf.image.ResizeMethod.AREA, + "bicubic": tf.image.ResizeMethod.BICUBIC, + "bilinear": tf.image.ResizeMethod.BILINEAR, + "gaussian": tf.image.ResizeMethod.GAUSSIAN, + "lanczos3": tf.image.ResizeMethod.LANCZOS3, + "lanczos5": tf.image.ResizeMethod.LANCZOS5, + "mitchellcubic": tf.image.ResizeMethod.MITCHELLCUBIC, + "nearest": tf.image.ResizeMethod.NEAREST_NEIGHBOR, + } + interploations_list = '"' + '", "'.join(interpolations.keys()) + '"' + if interpolation in interpolations: + x = tf.image.resize(x, new_shape, method=interpolations[interpolation]) + else: + raise ValueError( + "`interpolation` argument should be one of: " + f'{interploations_list}. Received: "{interpolation}".' + ) + if data_format == "channels_first": + x = permute_dimensions(x, [0, 3, 1, 2]) - """ - return tf.compat.v1.transpose(x, perm=pattern) + return x -@keras_export('keras.backend.resize_images') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def resize_images(x, height_factor, width_factor, data_format, - interpolation='nearest'): - """Resizes the images contained in a 4D tensor. - - Args: - x: Tensor or variable to resize. - height_factor: Positive integer. - width_factor: Positive integer. - data_format: One of `"channels_first"`, `"channels_last"`. - interpolation: A string, one of `"area"`, `"bicubic"`, `"bilinear"`, - `"gaussian"`, `"lanczos3"`, `"lanczos5"`, `"mitchellcubic"`, - `"nearest"`. - - Returns: - A tensor. - - Raises: - ValueError: in case of incorrect value for - `data_format` or `interpolation`. - """ - if data_format == 'channels_first': - rows, cols = 2, 3 - elif data_format == 'channels_last': - rows, cols = 1, 2 - else: - raise ValueError('Invalid `data_format` argument: %s' % (data_format,)) - - new_shape = x.shape[rows:cols + 1] - if new_shape.is_fully_defined(): - new_shape = tf.constant(new_shape.as_list(), dtype='int32') - else: - new_shape = tf.shape(x)[rows:cols + 1] - new_shape *= tf.constant( - np.array([height_factor, width_factor], dtype='int32')) - - if data_format == 'channels_first': - x = permute_dimensions(x, [0, 2, 3, 1]) - interpolations = { - 'area': tf.image.ResizeMethod.AREA, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'gaussian': tf.image.ResizeMethod.GAUSSIAN, - 'lanczos3': tf.image.ResizeMethod.LANCZOS3, - 'lanczos5': tf.image.ResizeMethod.LANCZOS5, - 'mitchellcubic': tf.image.ResizeMethod.MITCHELLCUBIC, - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - } - interploations_list = '"' + '", "'.join(interpolations.keys()) + '"' - if interpolation in interpolations: - x = tf.image.resize(x, new_shape, method=interpolations[interpolation]) - else: - raise ValueError('`interpolation` argument should be one of: ' - f'{interploations_list}. Received: "{interpolation}".') - if data_format == 'channels_first': - x = permute_dimensions(x, [0, 3, 1, 2]) - - return x - - -@keras_export('keras.backend.resize_volumes') +@keras_export("keras.backend.resize_volumes") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): - """Resizes the volume contained in a 5D tensor. - - Args: - x: Tensor or variable to resize. - depth_factor: Positive integer. - height_factor: Positive integer. - width_factor: Positive integer. - data_format: One of `"channels_first"`, `"channels_last"`. - - Returns: - A tensor. - - Raises: - ValueError: if `data_format` is neither - `channels_last` or `channels_first`. - """ - if data_format == 'channels_first': - output = repeat_elements(x, depth_factor, axis=2) - output = repeat_elements(output, height_factor, axis=3) - output = repeat_elements(output, width_factor, axis=4) - return output - elif data_format == 'channels_last': - output = repeat_elements(x, depth_factor, axis=1) - output = repeat_elements(output, height_factor, axis=2) - output = repeat_elements(output, width_factor, axis=3) - return output - else: - raise ValueError('Invalid data_format: ' + str(data_format)) + """Resizes the volume contained in a 5D tensor. + + Args: + x: Tensor or variable to resize. + depth_factor: Positive integer. + height_factor: Positive integer. + width_factor: Positive integer. + data_format: One of `"channels_first"`, `"channels_last"`. + + Returns: + A tensor. + + Raises: + ValueError: if `data_format` is neither + `channels_last` or `channels_first`. + """ + if data_format == "channels_first": + output = repeat_elements(x, depth_factor, axis=2) + output = repeat_elements(output, height_factor, axis=3) + output = repeat_elements(output, width_factor, axis=4) + return output + elif data_format == "channels_last": + output = repeat_elements(x, depth_factor, axis=1) + output = repeat_elements(output, height_factor, axis=2) + output = repeat_elements(output, width_factor, axis=3) + return output + else: + raise ValueError("Invalid data_format: " + str(data_format)) -@keras_export('keras.backend.repeat_elements') +@keras_export("keras.backend.repeat_elements") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def repeat_elements(x, rep, axis): - """Repeats the elements of a tensor along an axis, like `np.repeat`. - - If `x` has shape `(s1, s2, s3)` and `axis` is `1`, the output - will have shape `(s1, s2 * rep, s3)`. - - Args: - x: Tensor or variable. - rep: Python integer, number of times to repeat. - axis: Axis along which to repeat. - - Returns: - A tensor. - - Example: - - >>> b = tf.constant([1, 2, 3]) - >>> tf.keras.backend.repeat_elements(b, rep=2, axis=0) - - - """ - x_shape = x.shape.as_list() - # For static axis - if x_shape[axis] is not None: - # slices along the repeat axis - splits = tf.split(value=x, - num_or_size_splits=x_shape[axis], - axis=axis) - # repeat each slice the given number of reps - x_rep = [s for s in splits for _ in range(rep)] - return concatenate(x_rep, axis) - - # Here we use tf.tile to mimic behavior of np.repeat so that - # we can handle dynamic shapes (that include None). - # To do that, we need an auxiliary axis to repeat elements along - # it and then merge them along the desired axis. - - # Repeating - auxiliary_axis = axis + 1 - x_shape = tf.shape(x) - x_rep = tf.expand_dims(x, axis=auxiliary_axis) - reps = np.ones(len(x.shape) + 1) - reps[auxiliary_axis] = rep - x_rep = tf.tile(x_rep, reps) - - # Merging - reps = np.delete(reps, auxiliary_axis) - reps[axis] = rep - reps = tf.constant(reps, dtype='int32') - x_shape *= reps - x_rep = tf.reshape(x_rep, x_shape) - - # Fix shape representation - x_shape = x.shape.as_list() - x_rep.set_shape(x_shape) - x_rep._keras_shape = tuple(x_shape) - return x_rep - - -@keras_export('keras.backend.repeat') + """Repeats the elements of a tensor along an axis, like `np.repeat`. + + If `x` has shape `(s1, s2, s3)` and `axis` is `1`, the output + will have shape `(s1, s2 * rep, s3)`. + + Args: + x: Tensor or variable. + rep: Python integer, number of times to repeat. + axis: Axis along which to repeat. + + Returns: + A tensor. + + Example: + + >>> b = tf.constant([1, 2, 3]) + >>> tf.keras.backend.repeat_elements(b, rep=2, axis=0) + + + """ + x_shape = x.shape.as_list() + # For static axis + if x_shape[axis] is not None: + # slices along the repeat axis + splits = tf.split(value=x, num_or_size_splits=x_shape[axis], axis=axis) + # repeat each slice the given number of reps + x_rep = [s for s in splits for _ in range(rep)] + return concatenate(x_rep, axis) + + # Here we use tf.tile to mimic behavior of np.repeat so that + # we can handle dynamic shapes (that include None). + # To do that, we need an auxiliary axis to repeat elements along + # it and then merge them along the desired axis. + + # Repeating + auxiliary_axis = axis + 1 + x_shape = tf.shape(x) + x_rep = tf.expand_dims(x, axis=auxiliary_axis) + reps = np.ones(len(x.shape) + 1) + reps[auxiliary_axis] = rep + x_rep = tf.tile(x_rep, reps) + + # Merging + reps = np.delete(reps, auxiliary_axis) + reps[axis] = rep + reps = tf.constant(reps, dtype="int32") + x_shape *= reps + x_rep = tf.reshape(x_rep, x_shape) + + # Fix shape representation + x_shape = x.shape.as_list() + x_rep.set_shape(x_shape) + x_rep._keras_shape = tuple(x_shape) + return x_rep + + +@keras_export("keras.backend.repeat") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def repeat(x, n): - """Repeats a 2D tensor. - - if `x` has shape (samples, dim) and `n` is `2`, - the output will have shape `(samples, 2, dim)`. - - Args: - x: Tensor or variable. - n: Python integer, number of times to repeat. + """Repeats a 2D tensor. - Returns: - A tensor. + if `x` has shape (samples, dim) and `n` is `2`, + the output will have shape `(samples, 2, dim)`. - Example: + Args: + x: Tensor or variable. + n: Python integer, number of times to repeat. - >>> b = tf.constant([[1, 2], [3, 4]]) - >>> b - - >>> tf.keras.backend.repeat(b, n=2) - + Returns: + A tensor. + + Example: + + >>> b = tf.constant([[1, 2], [3, 4]]) + >>> b + + >>> tf.keras.backend.repeat(b, n=2) + - """ - assert ndim(x) == 2 - x = tf.expand_dims(x, 1) - pattern = tf.stack([1, n, 1]) - return tf.tile(x, pattern) + """ + assert ndim(x) == 2 + x = tf.expand_dims(x, 1) + pattern = tf.stack([1, n, 1]) + return tf.tile(x, pattern) -@keras_export('keras.backend.arange') +@keras_export("keras.backend.arange") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def arange(start, stop=None, step=1, dtype='int32'): - """Creates a 1D tensor containing a sequence of integers. +def arange(start, stop=None, step=1, dtype="int32"): + """Creates a 1D tensor containing a sequence of integers. - The function arguments use the same convention as - Theano's arange: if only one argument is provided, - it is in fact the "stop" argument and "start" is 0. + The function arguments use the same convention as + Theano's arange: if only one argument is provided, + it is in fact the "stop" argument and "start" is 0. - The default type of the returned tensor is `'int32'` to - match TensorFlow's default. + The default type of the returned tensor is `'int32'` to + match TensorFlow's default. - Args: - start: Start value. - stop: Stop value. - step: Difference between two successive values. - dtype: Integer dtype to use. + Args: + start: Start value. + stop: Stop value. + step: Difference between two successive values. + dtype: Integer dtype to use. - Returns: - An integer tensor. + Returns: + An integer tensor. - Example: + Example: - >>> tf.keras.backend.arange(start=0, stop=10, step=1.5) - + >>> tf.keras.backend.arange(start=0, stop=10, step=1.5) + - """ - # Match the behavior of numpy and Theano by returning an empty sequence. - if stop is None and start < 0: - start = 0 - result = tf.range(start, limit=stop, delta=step, name='arange') - if dtype != 'int32': - result = cast(result, dtype) - return result + """ + # Match the behavior of numpy and Theano by returning an empty sequence. + if stop is None and start < 0: + start = 0 + result = tf.range(start, limit=stop, delta=step, name="arange") + if dtype != "int32": + result = cast(result, dtype) + return result -@keras_export('keras.backend.tile') +@keras_export("keras.backend.tile") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def tile(x, n): - """Creates a tensor by tiling `x` by `n`. + """Creates a tensor by tiling `x` by `n`. - Args: - x: A tensor or variable - n: A list of integer. The length must be the same as the number of - dimensions in `x`. + Args: + x: A tensor or variable + n: A list of integer. The length must be the same as the number of + dimensions in `x`. - Returns: - A tiled tensor. - """ - if isinstance(n, int): - n = [n] - return tf.tile(x, n) + Returns: + A tiled tensor. + """ + if isinstance(n, int): + n = [n] + return tf.tile(x, n) -@keras_export('keras.backend.flatten') +@keras_export("keras.backend.flatten") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def flatten(x): - """Flatten a tensor. + """Flatten a tensor. - Args: - x: A tensor or variable. + Args: + x: A tensor or variable. - Returns: - A tensor, reshaped into 1-D + Returns: + A tensor, reshaped into 1-D - Example: + Example: - >>> b = tf.constant([[1, 2], [3, 4]]) - >>> b - - >>> tf.keras.backend.flatten(b) - + >>> b = tf.constant([[1, 2], [3, 4]]) + >>> b + + >>> tf.keras.backend.flatten(b) + - """ - return tf.reshape(x, [-1]) + """ + return tf.reshape(x, [-1]) -@keras_export('keras.backend.batch_flatten') +@keras_export("keras.backend.batch_flatten") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def batch_flatten(x): - """Turn a nD tensor into a 2D tensor with same 0th dimension. + """Turn a nD tensor into a 2D tensor with same 0th dimension. - In other words, it flattens each data samples of a batch. + In other words, it flattens each data samples of a batch. - Args: - x: A tensor or variable. + Args: + x: A tensor or variable. - Returns: - A tensor. + Returns: + A tensor. - Examples: - Flattening a 3D tensor to 2D by collapsing the last dimension. + Examples: + Flattening a 3D tensor to 2D by collapsing the last dimension. - >>> x_batch = tf.keras.backend.ones(shape=(2, 3, 4, 5)) - >>> x_batch_flatten = batch_flatten(x_batch) - >>> tf.keras.backend.int_shape(x_batch_flatten) - (2, 60) + >>> x_batch = tf.keras.backend.ones(shape=(2, 3, 4, 5)) + >>> x_batch_flatten = batch_flatten(x_batch) + >>> tf.keras.backend.int_shape(x_batch_flatten) + (2, 60) - """ - x = tf.reshape(x, tf.stack([-1, prod(shape(x)[1:])])) - return x + """ + x = tf.reshape(x, tf.stack([-1, prod(shape(x)[1:])])) + return x -@keras_export('keras.backend.expand_dims') +@keras_export("keras.backend.expand_dims") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def expand_dims(x, axis=-1): - """Adds a 1-sized dimension at index "axis". + """Adds a 1-sized dimension at index "axis". - Args: - x: A tensor or variable. - axis: Position where to add a new axis. + Args: + x: A tensor or variable. + axis: Position where to add a new axis. - Returns: - A tensor with expanded dimensions. - """ - return tf.expand_dims(x, axis) + Returns: + A tensor with expanded dimensions. + """ + return tf.expand_dims(x, axis) -@keras_export('keras.backend.squeeze') +@keras_export("keras.backend.squeeze") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def squeeze(x, axis): - """Removes a 1-dimension from the tensor at index "axis". + """Removes a 1-dimension from the tensor at index "axis". - Args: - x: A tensor or variable. - axis: Axis to drop. + Args: + x: A tensor or variable. + axis: Axis to drop. - Returns: - A tensor with the same data as `x` but reduced dimensions. - """ - return tf.squeeze(x, [axis]) + Returns: + A tensor with the same data as `x` but reduced dimensions. + """ + return tf.squeeze(x, [axis]) -@keras_export('keras.backend.temporal_padding') +@keras_export("keras.backend.temporal_padding") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def temporal_padding(x, padding=(1, 1)): - """Pads the middle dimension of a 3D tensor. + """Pads the middle dimension of a 3D tensor. - Args: - x: Tensor or variable. - padding: Tuple of 2 integers, how many zeros to - add at the start and end of dim 1. + Args: + x: Tensor or variable. + padding: Tuple of 2 integers, how many zeros to + add at the start and end of dim 1. - Returns: - A padded 3D tensor. - """ - assert len(padding) == 2 - pattern = [[0, 0], [padding[0], padding[1]], [0, 0]] - return tf.compat.v1.pad(x, pattern) + Returns: + A padded 3D tensor. + """ + assert len(padding) == 2 + pattern = [[0, 0], [padding[0], padding[1]], [0, 0]] + return tf.compat.v1.pad(x, pattern) -@keras_export('keras.backend.spatial_2d_padding') +@keras_export("keras.backend.spatial_2d_padding") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): - """Pads the 2nd and 3rd dimensions of a 4D tensor. - - Args: - x: Tensor or variable. - padding: Tuple of 2 tuples, padding pattern. - data_format: One of `channels_last` or `channels_first`. - - Returns: - A padded 4D tensor. - - Raises: - ValueError: if `data_format` is neither - `channels_last` or `channels_first`. - """ - assert len(padding) == 2 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - if data_format == 'channels_first': - pattern = [[0, 0], [0, 0], list(padding[0]), list(padding[1])] - else: - pattern = [[0, 0], list(padding[0]), list(padding[1]), [0, 0]] - return tf.compat.v1.pad(x, pattern) - - -@keras_export('keras.backend.spatial_3d_padding') + """Pads the 2nd and 3rd dimensions of a 4D tensor. + + Args: + x: Tensor or variable. + padding: Tuple of 2 tuples, padding pattern. + data_format: One of `channels_last` or `channels_first`. + + Returns: + A padded 4D tensor. + + Raises: + ValueError: if `data_format` is neither + `channels_last` or `channels_first`. + """ + assert len(padding) == 2 + assert len(padding[0]) == 2 + assert len(padding[1]) == 2 + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + if data_format == "channels_first": + pattern = [[0, 0], [0, 0], list(padding[0]), list(padding[1])] + else: + pattern = [[0, 0], list(padding[0]), list(padding[1]), [0, 0]] + return tf.compat.v1.pad(x, pattern) + + +@keras_export("keras.backend.spatial_3d_padding") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): - """Pads 5D tensor with zeros along the depth, height, width dimensions. - - Pads these dimensions with respectively - "padding[0]", "padding[1]" and "padding[2]" zeros left and right. - - For 'channels_last' data_format, - the 2nd, 3rd and 4th dimension will be padded. - For 'channels_first' data_format, - the 3rd, 4th and 5th dimension will be padded. - - Args: - x: Tensor or variable. - padding: Tuple of 3 tuples, padding pattern. - data_format: One of `channels_last` or `channels_first`. - - Returns: - A padded 5D tensor. - - Raises: - ValueError: if `data_format` is neither - `channels_last` or `channels_first`. - - """ - assert len(padding) == 3 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - assert len(padding[2]) == 2 - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - if data_format == 'channels_first': - pattern = [[0, 0], [0, 0], [padding[0][0], padding[0][1]], - [padding[1][0], padding[1][1]], [padding[2][0], padding[2][1]]] - else: - pattern = [[0, 0], [padding[0][0], padding[0][1]], - [padding[1][0], padding[1][1]], [padding[2][0], - padding[2][1]], [0, 0]] - return tf.compat.v1.pad(x, pattern) - - -@keras_export('keras.backend.stack') + """Pads 5D tensor with zeros along the depth, height, width dimensions. + + Pads these dimensions with respectively + "padding[0]", "padding[1]" and "padding[2]" zeros left and right. + + For 'channels_last' data_format, + the 2nd, 3rd and 4th dimension will be padded. + For 'channels_first' data_format, + the 3rd, 4th and 5th dimension will be padded. + + Args: + x: Tensor or variable. + padding: Tuple of 3 tuples, padding pattern. + data_format: One of `channels_last` or `channels_first`. + + Returns: + A padded 5D tensor. + + Raises: + ValueError: if `data_format` is neither + `channels_last` or `channels_first`. + + """ + assert len(padding) == 3 + assert len(padding[0]) == 2 + assert len(padding[1]) == 2 + assert len(padding[2]) == 2 + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + if data_format == "channels_first": + pattern = [ + [0, 0], + [0, 0], + [padding[0][0], padding[0][1]], + [padding[1][0], padding[1][1]], + [padding[2][0], padding[2][1]], + ] + else: + pattern = [ + [0, 0], + [padding[0][0], padding[0][1]], + [padding[1][0], padding[1][1]], + [padding[2][0], padding[2][1]], + [0, 0], + ] + return tf.compat.v1.pad(x, pattern) + + +@keras_export("keras.backend.stack") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def stack(x, axis=0): - """Stacks a list of rank `R` tensors into a rank `R+1` tensor. + """Stacks a list of rank `R` tensors into a rank `R+1` tensor. - Args: - x: List of tensors. - axis: Axis along which to perform stacking. + Args: + x: List of tensors. + axis: Axis along which to perform stacking. - Returns: - A tensor. + Returns: + A tensor. - Example: + Example: - >>> a = tf.constant([[1, 2],[3, 4]]) - >>> b = tf.constant([[10, 20],[30, 40]]) - >>> tf.keras.backend.stack((a, b)) - + >>> a = tf.constant([[1, 2],[3, 4]]) + >>> b = tf.constant([[10, 20],[30, 40]]) + >>> tf.keras.backend.stack((a, b)) + - """ - return tf.stack(x, axis=axis) + """ + return tf.stack(x, axis=axis) -@keras_export('keras.backend.one_hot') +@keras_export("keras.backend.one_hot") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def one_hot(indices, num_classes): - """Computes the one-hot representation of an integer tensor. + """Computes the one-hot representation of an integer tensor. - Args: - indices: nD integer tensor of shape - `(batch_size, dim1, dim2, ... dim(n-1))` - num_classes: Integer, number of classes to consider. + Args: + indices: nD integer tensor of shape + `(batch_size, dim1, dim2, ... dim(n-1))` + num_classes: Integer, number of classes to consider. - Returns: - (n + 1)D one hot representation of the input - with shape `(batch_size, dim1, dim2, ... dim(n-1), num_classes)` + Returns: + (n + 1)D one hot representation of the input + with shape `(batch_size, dim1, dim2, ... dim(n-1), num_classes)` - Returns: - The one-hot tensor. - """ - return tf.one_hot(indices, depth=num_classes, axis=-1) + Returns: + The one-hot tensor. + """ + return tf.one_hot(indices, depth=num_classes, axis=-1) -@keras_export('keras.backend.reverse') +@keras_export("keras.backend.reverse") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def reverse(x, axes): - """Reverse a tensor along the specified axes. + """Reverse a tensor along the specified axes. - Args: - x: Tensor to reverse. - axes: Integer or iterable of integers. - Axes to reverse. + Args: + x: Tensor to reverse. + axes: Integer or iterable of integers. + Axes to reverse. - Returns: - A tensor. - """ - if isinstance(axes, int): - axes = [axes] - return tf.reverse(x, axes) + Returns: + A tensor. + """ + if isinstance(axes, int): + axes = [axes] + return tf.reverse(x, axes) # VALUE MANIPULATION @@ -4028,2630 +4188,2893 @@ def reverse(x, axes): >>> v.assign_add(1.) >>> print(v.numpy()) - 3.0"""[3:] # Prune first newline and indent to match the docstring template. + 3.0"""[ + 3: +] # Prune first newline and indent to match the docstring template. -@keras_export('keras.backend.get_value') +@keras_export("keras.backend.get_value") @doc_controls.do_not_generate_docs def get_value(x): - """Returns the value of a variable. + """Returns the value of a variable. - `backend.get_value` is the complement of `backend.set_value`, and provides - a generic interface for reading from variables while abstracting away the - differences between TensorFlow 1.x and 2.x semantics. + `backend.get_value` is the complement of `backend.set_value`, and provides + a generic interface for reading from variables while abstracting away the + differences between TensorFlow 1.x and 2.x semantics. - {snippet} + {snippet} - Args: - x: input variable. + Args: + x: input variable. - Returns: - A Numpy array. - """ - if not tf.is_tensor(x): - return x - if tf.executing_eagerly() or isinstance(x, tf.__internal__.EagerTensor): - return x.numpy() - if not getattr(x, '_in_graph_mode', True): - # This is a variable which was created in an eager context, but is being - # evaluated from a Graph. - with tf.__internal__.eager_context.eager_mode(): - return x.numpy() - - if tf.compat.v1.executing_eagerly_outside_functions(): - # This method of evaluating works inside the Keras FuncGraph. - with tf.init_scope(): - return x.numpy() + Returns: + A Numpy array. + """ + if not tf.is_tensor(x): + return x + if tf.executing_eagerly() or isinstance(x, tf.__internal__.EagerTensor): + return x.numpy() + if not getattr(x, "_in_graph_mode", True): + # This is a variable which was created in an eager context, but is being + # evaluated from a Graph. + with tf.__internal__.eager_context.eager_mode(): + return x.numpy() + + if tf.compat.v1.executing_eagerly_outside_functions(): + # This method of evaluating works inside the Keras FuncGraph. + with tf.init_scope(): + return x.numpy() - with x.graph.as_default(): - return x.eval(session=get_session((x,))) + with x.graph.as_default(): + return x.eval(session=get_session((x,))) -@keras_export('keras.backend.batch_get_value') +@keras_export("keras.backend.batch_get_value") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def batch_get_value(tensors): - """Returns the value of more than one tensor variable. + """Returns the value of more than one tensor variable. - Args: - tensors: list of ops to run. + Args: + tensors: list of ops to run. - Returns: - A list of Numpy arrays. + Returns: + A list of Numpy arrays. - Raises: - RuntimeError: If this method is called inside defun. - """ - if tf.executing_eagerly(): - return [x.numpy() for x in tensors] - elif tf.inside_function(): # pylint: disable=protected-access - raise RuntimeError('Cannot get value inside Tensorflow graph function.') - if tensors: - return get_session(tensors).run(tensors) - else: - return [] + Raises: + RuntimeError: If this method is called inside defun. + """ + if tf.executing_eagerly(): + return [x.numpy() for x in tensors] + elif tf.inside_function(): + raise RuntimeError("Cannot get value inside Tensorflow graph function.") + if tensors: + return get_session(tensors).run(tensors) + else: + return [] -@keras_export('keras.backend.set_value') +@keras_export("keras.backend.set_value") @doc_controls.do_not_generate_docs def set_value(x, value): - """Sets the value of a variable, from a Numpy array. - - `backend.set_value` is the complement of `backend.get_value`, and provides - a generic interface for assigning to variables while abstracting away the - differences between TensorFlow 1.x and 2.x semantics. - - {snippet} - - Args: - x: Variable to set to a new value. - value: Value to set the tensor to, as a Numpy array - (of the same shape). - """ - value = np.asarray(value, dtype=dtype_numpy(x)) - if tf.compat.v1.executing_eagerly_outside_functions(): - x.assign(value) - else: - with get_graph().as_default(): - tf_dtype = tf.as_dtype(x.dtype.name.split('_')[0]) - if hasattr(x, '_assign_placeholder'): - assign_placeholder = x._assign_placeholder - assign_op = x._assign_op - else: - # In order to support assigning weights to resizable variables in - # Keras, we make a placeholder with the correct number of dimensions - # but with None in each dimension. This way, we can assign weights - # of any size (as long as they have the correct dimensionality). - placeholder_shape = tf.TensorShape([None] * value.ndim) - assign_placeholder = tf.compat.v1.placeholder( - tf_dtype, shape=placeholder_shape) - assign_op = x.assign(assign_placeholder) - x._assign_placeholder = assign_placeholder - x._assign_op = assign_op - get_session().run(assign_op, feed_dict={assign_placeholder: value}) - - -@keras_export('keras.backend.batch_set_value') + """Sets the value of a variable, from a Numpy array. + + `backend.set_value` is the complement of `backend.get_value`, and provides + a generic interface for assigning to variables while abstracting away the + differences between TensorFlow 1.x and 2.x semantics. + + {snippet} + + Args: + x: Variable to set to a new value. + value: Value to set the tensor to, as a Numpy array + (of the same shape). + """ + value = np.asarray(value, dtype=dtype_numpy(x)) + if tf.compat.v1.executing_eagerly_outside_functions(): + _assign_value_to_variable(x, value) + else: + with get_graph().as_default(): + tf_dtype = tf.as_dtype(x.dtype.name.split("_")[0]) + if hasattr(x, "_assign_placeholder"): + assign_placeholder = x._assign_placeholder + assign_op = x._assign_op + else: + # In order to support assigning weights to resizable variables + # in Keras, we make a placeholder with the correct number of + # dimensions but with None in each dimension. This way, we can + # assign weights of any size (as long as they have the correct + # dimensionality). + placeholder_shape = tf.TensorShape([None] * value.ndim) + assign_placeholder = tf.compat.v1.placeholder( + tf_dtype, shape=placeholder_shape + ) + assign_op = x.assign(assign_placeholder) + x._assign_placeholder = assign_placeholder + x._assign_op = assign_op + get_session().run(assign_op, feed_dict={assign_placeholder: value}) + + +@keras_export("keras.backend.batch_set_value") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def batch_set_value(tuples): - """Sets the values of many tensor variables at once. - - Args: - tuples: a list of tuples `(tensor, value)`. - `value` should be a Numpy array. - """ - if tf.executing_eagerly() or tf.inside_function(): - for x, value in tuples: - x.assign(np.asarray(value, dtype=dtype_numpy(x))) - else: - with get_graph().as_default(): - if tuples: - assign_ops = [] - feed_dict = {} + """Sets the values of many tensor variables at once. + + Args: + tuples: a list of tuples `(tensor, value)`. + `value` should be a Numpy array. + """ + if tf.executing_eagerly() or tf.inside_function(): for x, value in tuples: - value = np.asarray(value, dtype=dtype_numpy(x)) - tf_dtype = tf.as_dtype(x.dtype.name.split('_')[0]) - if hasattr(x, '_assign_placeholder'): - assign_placeholder = x._assign_placeholder - assign_op = x._assign_op - else: - # In order to support assigning weights to resizable variables in - # Keras, we make a placeholder with the correct number of dimensions - # but with None in each dimension. This way, we can assign weights - # of any size (as long as they have the correct dimensionality). - placeholder_shape = tf.TensorShape([None] * value.ndim) - assign_placeholder = tf.compat.v1.placeholder( - tf_dtype, shape=placeholder_shape) - assign_op = x.assign(assign_placeholder) - x._assign_placeholder = assign_placeholder - x._assign_op = assign_op - assign_ops.append(assign_op) - feed_dict[assign_placeholder] = value - get_session().run(assign_ops, feed_dict=feed_dict) + value = np.asarray(value, dtype=dtype_numpy(x)) + _assign_value_to_variable(x, value) + else: + with get_graph().as_default(): + if tuples: + assign_ops = [] + feed_dict = {} + for x, value in tuples: + value = np.asarray(value, dtype=dtype_numpy(x)) + tf_dtype = tf.as_dtype(x.dtype.name.split("_")[0]) + if hasattr(x, "_assign_placeholder"): + assign_placeholder = x._assign_placeholder + assign_op = x._assign_op + else: + # In order to support assigning weights to resizable + # variables in Keras, we make a placeholder with the + # correct number of dimensions but with None in each + # dimension. This way, we can assign weights of any size + # (as long as they have the correct dimensionality). + placeholder_shape = tf.TensorShape([None] * value.ndim) + assign_placeholder = tf.compat.v1.placeholder( + tf_dtype, shape=placeholder_shape + ) + assign_op = x.assign(assign_placeholder) + x._assign_placeholder = assign_placeholder + x._assign_op = assign_op + assign_ops.append(assign_op) + feed_dict[assign_placeholder] = value + get_session().run(assign_ops, feed_dict=feed_dict) get_value.__doc__ = get_value.__doc__.format(snippet=_VALUE_SET_CODE_STRING) set_value.__doc__ = set_value.__doc__.format(snippet=_VALUE_SET_CODE_STRING) -@keras_export('keras.backend.print_tensor') +def _assign_value_to_variable(variable, value): + # Helper function to assign value to variable. It handles normal tf.Variable + # as well as DTensor variable. + if isinstance(variable, dtensor.DVariable): + mesh = variable.layout.mesh + replicate_layout = dtensor.Layout.replicated( + rank=variable.shape.rank, mesh=mesh + ) + # TODO(b/262894693): Avoid the broadcast of tensor to all devices. + d_value = dtensor.copy_to_mesh(value, replicate_layout) + d_value = dtensor.relayout(d_value, variable.layout) + variable.assign(d_value) + else: + # For the normal tf.Variable assign + variable.assign(value) + + +@keras_export("keras.backend.print_tensor") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def print_tensor(x, message='', summarize=3): - """Prints `message` and the tensor value when evaluated. - - Note that `print_tensor` returns a new tensor identical to `x` - which should be used in the following code. Otherwise the - print operation is not taken into account during evaluation. - - Example: - - >>> x = tf.constant([[1.0, 2.0], [3.0, 4.0]]) - >>> tf.keras.backend.print_tensor(x) - - - Args: - x: Tensor to print. - message: Message to print jointly with the tensor. - summarize: The first and last `summarize` elements within each dimension - are recursively printed per Tensor. If None, then the first 3 and last - 3 elements of each dimension are printed for each tensor. If set to - -1, it will print all elements of every tensor. - - Returns: - The same tensor `x`, unchanged. - """ - if isinstance(x, tf.Tensor) and hasattr(x, 'graph'): - with get_graph().as_default(): - op = tf.print( - message, x, output_stream=sys.stdout, summarize=summarize) - with tf.control_dependencies([op]): - return tf.identity(x) - else: - tf.print( - message, x, output_stream=sys.stdout, summarize=summarize) - return x +def print_tensor(x, message="", summarize=3): + """Prints `message` and the tensor value when evaluated. + + Note that `print_tensor` returns a new tensor identical to `x` + which should be used in the following code. Otherwise the + print operation is not taken into account during evaluation. + + Example: + + >>> x = tf.constant([[1.0, 2.0], [3.0, 4.0]]) + >>> tf.keras.backend.print_tensor(x) + + + Args: + x: Tensor to print. + message: Message to print jointly with the tensor. + summarize: The first and last `summarize` elements within each dimension + are recursively printed per Tensor. If None, then the first 3 and + last 3 elements of each dimension are printed for each tensor. If + set to -1, it will print all elements of every tensor. + + Returns: + The same tensor `x`, unchanged. + """ + if isinstance(x, tf.Tensor) and hasattr(x, "graph"): + with get_graph().as_default(): + op = tf.print( + message, x, output_stream=sys.stdout, summarize=summarize + ) + with tf.control_dependencies([op]): + return tf.identity(x) + else: + tf.print(message, x, output_stream=sys.stdout, summarize=summarize) + return x + # GRAPH MANIPULATION class GraphExecutionFunction: - """Runs a computation graph. - - It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. - In particular additional operations via `fetches` argument and additional - tensor substitutions via `feed_dict` arguments. Note that given - substitutions are merged with substitutions from `inputs`. Even though - `feed_dict` is passed once in the constructor (called in `model.compile()`) - we can modify the values in the dictionary. Through this feed_dict we can - provide additional substitutions besides Keras inputs. - - Args: - inputs: Feed placeholders to the computation graph. - outputs: Output tensors to fetch. - updates: Additional update ops to be run at function call. - name: A name to help users identify what this function does. - session_kwargs: Arguments to `tf.Session.run()`: - `fetches`, `feed_dict`, `options`, `run_metadata`. - """ - - def __init__(self, inputs, outputs, updates=None, name=None, - **session_kwargs): - updates = updates or [] - if not isinstance(updates, (list, tuple)): - raise TypeError('`updates` in a Keras backend function ' - 'should be a list or tuple.') - - self._inputs_structure = inputs - self.inputs = tf.nest.flatten(inputs, expand_composites=True) - self._outputs_structure = outputs - self.outputs = cast_variables_to_tensor( - tf.nest.flatten(outputs, expand_composites=True)) - # TODO(b/127668432): Consider using autograph to generate these - # dependencies in call. - # Index 0 = total loss or model output for `predict`. - with tf.control_dependencies([self.outputs[0]]): - updates_ops = [] - for update in updates: - if isinstance(update, tuple): - p, new_p = update - updates_ops.append(tf.compat.v1.assign(p, new_p)) + """Runs a computation graph. + + It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. + In particular additional operations via `fetches` argument and additional + tensor substitutions via `feed_dict` arguments. Note that given + substitutions are merged with substitutions from `inputs`. Even though + `feed_dict` is passed once in the constructor (called in `model.compile()`) + we can modify the values in the dictionary. Through this feed_dict we can + provide additional substitutions besides Keras inputs. + + Args: + inputs: Feed placeholders to the computation graph. + outputs: Output tensors to fetch. + updates: Additional update ops to be run at function call. + name: A name to help users identify what this function does. + session_kwargs: Arguments to `tf.Session.run()`: + `fetches`, `feed_dict`, `options`, `run_metadata`. + """ + + def __init__( + self, inputs, outputs, updates=None, name=None, **session_kwargs + ): + updates = updates or [] + if not isinstance(updates, (list, tuple)): + raise TypeError( + "`updates` in a Keras backend function " + "should be a list or tuple." + ) + + self.inputs = tf.nest.flatten( + tf_utils.convert_variables_to_tensors(inputs), + expand_composites=True, + ) + self._outputs_structure = tf_utils.convert_variables_to_tensors(outputs) + self.outputs = tf.nest.flatten( + self._outputs_structure, expand_composites=True + ) + # TODO(b/127668432): Consider using autograph to generate these + # dependencies in call. + # Index 0 = total loss or model output for `predict`. + with tf.control_dependencies([self.outputs[0]]): + updates_ops = [] + for update in updates: + if isinstance(update, tuple): + p, new_p = update + updates_ops.append(tf.compat.v1.assign(p, new_p)) + else: + # assumed already an op + updates_ops.append(update) + self.updates_op = tf.group(*updates_ops) + self.name = name + # additional tensor substitutions + self.feed_dict = session_kwargs.pop("feed_dict", None) + # additional operations + self.fetches = session_kwargs.pop("fetches", []) + if not isinstance(self.fetches, list): + self.fetches = [self.fetches] + self.run_options = session_kwargs.pop("options", None) + self.run_metadata = session_kwargs.pop("run_metadata", None) + # The main use case of `fetches` being passed to a model is the ability + # to run custom updates + # This requires us to wrap fetches in `identity` ops. + self.fetches = [tf.identity(x) for x in self.fetches] + self.session_kwargs = session_kwargs + # This mapping keeps track of the function that should receive the + # output from a fetch in `fetches`: { fetch: function(fetch_output) } + # A Callback can use this to register a function with access to the + # output values for a fetch it added. + self.fetch_callbacks = {} + + if session_kwargs: + raise ValueError( + "Some keys in session_kwargs are not supported at this time: %s" + % (session_kwargs.keys(),) + ) + + self._callable_fn = None + self._feed_arrays = None + self._feed_symbols = None + self._symbol_vals = None + self._fetches = None + self._session = None + + def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session): + """Generates a callable that runs the graph. + + Args: + feed_arrays: List of input tensors to be fed Numpy arrays at runtime. + feed_symbols: List of input tensors to be fed symbolic tensors at + runtime. + symbol_vals: List of symbolic tensors to be fed to `feed_symbols`. + session: Session to use to generate the callable. + + Returns: + Function that runs the graph according to the above options. + """ + # Prepare callable options. + callable_opts = config_pb2.CallableOptions() + # Handle external-data feed. + for x in feed_arrays: + callable_opts.feed.append(x.name) + if self.feed_dict: + for key in sorted(self.feed_dict.keys()): + callable_opts.feed.append(key.name) + # Handle symbolic feed. + for x, y in zip(feed_symbols, symbol_vals): + connection = callable_opts.tensor_connection.add() + if x.dtype != y.dtype: + y = tf.cast(y, dtype=x.dtype) + from_tensor = _as_graph_element(y) + if from_tensor is None: + from_tensor = y + connection.from_tensor = from_tensor.name # Data tensor + connection.to_tensor = x.name # Placeholder + # Handle fetches. + for x in self.outputs + self.fetches: + callable_opts.fetch.append(x.name) + # Handle updates. + callable_opts.target.append(self.updates_op.name) + # Handle run_options. + if self.run_options: + callable_opts.run_options.CopyFrom(self.run_options) + # Create callable. + callable_fn = session._make_callable_from_options(callable_opts) + # Cache parameters corresponding to the generated callable, so that + # we can detect future mismatches and refresh the callable. + self._callable_fn = callable_fn + self._feed_arrays = feed_arrays + self._feed_symbols = feed_symbols + self._symbol_vals = symbol_vals + self._fetches = list(self.fetches) + self._session = session + + def _call_fetch_callbacks(self, fetches_output): + for fetch, output in zip(self._fetches, fetches_output): + if fetch in self.fetch_callbacks: + self.fetch_callbacks[fetch](output) + + def _eval_if_composite(self, tensor): + """Helper method which evaluates any CompositeTensors passed to it.""" + # We need to evaluate any composite tensor objects that have been + # reconstructed in 'pack_sequence_as', since otherwise they'll be output + # as actual CompositeTensor objects instead of the value(s) contained in + # the CompositeTensors. E.g., if output_structure contains a + # SparseTensor, then this ensures that we return its value as a + # SparseTensorValue rather than a SparseTensor. + + if tf_utils.is_extension_type(tensor): + return self._session.run(tensor) else: - # assumed already an op - updates_ops.append(update) - self.updates_op = tf.group(*updates_ops) - self.name = name - # additional tensor substitutions - self.feed_dict = session_kwargs.pop('feed_dict', None) - # additional operations - self.fetches = session_kwargs.pop('fetches', []) - if not isinstance(self.fetches, list): - self.fetches = [self.fetches] - self.run_options = session_kwargs.pop('options', None) - self.run_metadata = session_kwargs.pop('run_metadata', None) - # The main use case of `fetches` being passed to a model is the ability - # to run custom updates - # This requires us to wrap fetches in `identity` ops. - self.fetches = [tf.identity(x) for x in self.fetches] - self.session_kwargs = session_kwargs - # This mapping keeps track of the function that should receive the - # output from a fetch in `fetches`: { fetch: function(fetch_output) } - # A Callback can use this to register a function with access to the - # output values for a fetch it added. - self.fetch_callbacks = {} - - if session_kwargs: - raise ValueError('Some keys in session_kwargs are not supported at this ' - 'time: %s' % (session_kwargs.keys(),)) - - self._callable_fn = None - self._feed_arrays = None - self._feed_symbols = None - self._symbol_vals = None - self._fetches = None - self._session = None - - def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session): - """Generates a callable that runs the graph. - - Args: - feed_arrays: List of input tensors to be fed Numpy arrays at runtime. - feed_symbols: List of input tensors to be fed symbolic tensors at runtime. - symbol_vals: List of symbolic tensors to be fed to `feed_symbols`. - session: Session to use to generate the callable. - - Returns: - Function that runs the graph according to the above options. - """ - # Prepare callable options. - callable_opts = config_pb2.CallableOptions() - # Handle external-data feed. - for x in feed_arrays: - callable_opts.feed.append(x.name) - if self.feed_dict: - for key in sorted(self.feed_dict.keys()): - callable_opts.feed.append(key.name) - # Handle symbolic feed. - for x, y in zip(feed_symbols, symbol_vals): - connection = callable_opts.tensor_connection.add() - if x.dtype != y.dtype: - y = tf.cast(y, dtype=x.dtype) - from_tensor = _as_graph_element(y) - if from_tensor is None: - from_tensor = y - connection.from_tensor = from_tensor.name # Data tensor - connection.to_tensor = x.name # Placeholder - # Handle fetches. - for x in self.outputs + self.fetches: - callable_opts.fetch.append(x.name) - # Handle updates. - callable_opts.target.append(self.updates_op.name) - # Handle run_options. - if self.run_options: - callable_opts.run_options.CopyFrom(self.run_options) - # Create callable. - callable_fn = session._make_callable_from_options(callable_opts) - # Cache parameters corresponding to the generated callable, so that - # we can detect future mismatches and refresh the callable. - self._callable_fn = callable_fn - self._feed_arrays = feed_arrays - self._feed_symbols = feed_symbols - self._symbol_vals = symbol_vals - self._fetches = list(self.fetches) - self._session = session - - def _call_fetch_callbacks(self, fetches_output): - for fetch, output in zip(self._fetches, fetches_output): - if fetch in self.fetch_callbacks: - self.fetch_callbacks[fetch](output) - - def _eval_if_composite(self, tensor): - """Helper method which evaluates any CompositeTensors passed to it.""" - # We need to evaluate any composite tensor objects that have been - # reconstructed in 'pack_sequence_as', since otherwise they'll be output as - # actual CompositeTensor objects instead of the value(s) contained in the - # CompositeTensors. E.g., if output_structure contains a SparseTensor, then - # this ensures that we return its value as a SparseTensorValue rather than - # a SparseTensor. - from keras.utils import tf_utils # pylint: disable=g-import-not-at-top - if tf_utils.is_extension_type(tensor): - return self._session.run(tensor) - else: - return tensor - - def __call__(self, inputs): - inputs = tf.nest.flatten(inputs, expand_composites=True) - - session = get_session(inputs) - feed_arrays = [] - array_vals = [] - feed_symbols = [] - symbol_vals = [] - for tensor, value in zip(self.inputs, inputs): - if value is None: - continue - - if tf.is_tensor(value): - # Case: feeding symbolic tensor. - feed_symbols.append(tensor) - symbol_vals.append(value) - else: - # Case: feeding Numpy array. - feed_arrays.append(tensor) - # We need to do array conversion and type casting at this level, since - # `callable_fn` only supports exact matches. - tensor_type = tf.as_dtype(tensor.dtype) - array_vals.append(np.asarray(value, - dtype=tensor_type.as_numpy_dtype)) - - if self.feed_dict: - for key in sorted(self.feed_dict.keys()): - array_vals.append( - np.asarray(self.feed_dict[key], dtype=key.dtype.as_numpy_dtype)) - - # Refresh callable if anything has changed. - if (self._callable_fn is None or feed_arrays != self._feed_arrays or - symbol_vals != self._symbol_vals or - feed_symbols != self._feed_symbols or self.fetches != self._fetches or - session != self._session): - self._make_callable(feed_arrays, feed_symbols, symbol_vals, session) - - fetched = self._callable_fn(*array_vals, - run_metadata=self.run_metadata) - self._call_fetch_callbacks(fetched[-len(self._fetches):]) - output_structure = tf.nest.pack_sequence_as( - self._outputs_structure, - fetched[:len(self.outputs)], - expand_composites=True) - # We need to evaluate any composite tensor objects that have been - # reconstructed in 'pack_sequence_as', since otherwise they'll be output as - # actual CompositeTensor objects instead of the value(s) contained in the - # CompositeTensors. E.g., if output_structure contains a SparseTensor, then - # this ensures that we return its value as a SparseTensorValue rather than - # a SparseTensor. - return tf.nest.map_structure(self._eval_if_composite, output_structure) - - -@keras_export('keras.backend.function') + return tensor + + def __call__(self, inputs): + inputs = tf.nest.flatten( + tf_utils.convert_variables_to_tensors(inputs), + expand_composites=True, + ) + + session = get_session(inputs) + feed_arrays = [] + array_vals = [] + feed_symbols = [] + symbol_vals = [] + for tensor, value in zip(self.inputs, inputs): + if value is None: + continue + + if tf.is_tensor(value): + # Case: feeding symbolic tensor. + feed_symbols.append(tensor) + symbol_vals.append(value) + else: + # Case: feeding Numpy array. + feed_arrays.append(tensor) + # We need to do array conversion and type casting at this level, + # since `callable_fn` only supports exact matches. + tensor_type = tf.as_dtype(tensor.dtype) + array_vals.append( + np.asarray(value, dtype=tensor_type.as_numpy_dtype) + ) + + if self.feed_dict: + for key in sorted(self.feed_dict.keys()): + array_vals.append( + np.asarray( + self.feed_dict[key], dtype=key.dtype.as_numpy_dtype + ) + ) + + # Refresh callable if anything has changed. + if ( + self._callable_fn is None + or feed_arrays != self._feed_arrays + or symbol_vals != self._symbol_vals + or feed_symbols != self._feed_symbols + or self.fetches != self._fetches + or session != self._session + ): + self._make_callable(feed_arrays, feed_symbols, symbol_vals, session) + + fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata) + self._call_fetch_callbacks(fetched[-len(self._fetches) :]) + output_structure = tf.nest.pack_sequence_as( + self._outputs_structure, + fetched[: len(self.outputs)], + expand_composites=True, + ) + # We need to evaluate any composite tensor objects that have been + # reconstructed in 'pack_sequence_as', since otherwise they'll be output + # as actual CompositeTensor objects instead of the value(s) contained in + # the CompositeTensors. E.g., if output_structure contains a + # SparseTensor, then this ensures that we return its value as a + # SparseTensorValue rather than a SparseTensor. + return tf.nest.map_structure(self._eval_if_composite, output_structure) + + +@keras_export("keras.backend.function") @doc_controls.do_not_generate_docs def function(inputs, outputs, updates=None, name=None, **kwargs): - """Instantiates a Keras function. - - Args: - inputs: List of placeholder tensors. - outputs: List of output tensors. - updates: List of update ops. - name: String, name of function. - **kwargs: Passed to `tf.Session.run`. - - Returns: - Output values as Numpy arrays. - - Raises: - ValueError: if invalid kwargs are passed in or if in eager execution. - """ - if tf.compat.v1.executing_eagerly_outside_functions(): + """Instantiates a Keras function. + + Args: + inputs: List of placeholder tensors. + outputs: List of output tensors. + updates: List of update ops. + name: String, name of function. + **kwargs: Passed to `tf.Session.run`. + + Returns: + Output values as Numpy arrays. + + Raises: + ValueError: if invalid kwargs are passed in or if in eager execution. + """ + if tf.compat.v1.executing_eagerly_outside_functions(): + if kwargs: + raise ValueError( + "Session keyword arguments are not supported during " + "eager execution. You passed: %s" % (kwargs,) + ) + if updates: + raise ValueError( + "`updates` argument is not supported during " + "eager execution. You passed: %s" % (updates,) + ) + from keras import models + + model = models.Model(inputs=inputs, outputs=outputs) + + wrap_outputs = isinstance(outputs, list) and len(outputs) == 1 + + def func(model_inputs): + outs = model(model_inputs) + if wrap_outputs: + outs = [outs] + return tf_utils.sync_to_numpy_or_python_type(outs) + + return func + if kwargs: - raise ValueError('Session keyword arguments are not supported during ' - 'eager execution. You passed: %s' % (kwargs,)) - if updates: - raise ValueError('`updates` argument is not supported during ' - 'eager execution. You passed: %s' % (updates,)) - from keras import models # pylint: disable=g-import-not-at-top - from keras.utils import tf_utils # pylint: disable=g-import-not-at-top - model = models.Model(inputs=inputs, outputs=outputs) - - wrap_outputs = isinstance(outputs, list) and len(outputs) == 1 - def func(model_inputs): - outs = model(model_inputs) - if wrap_outputs: - outs = [outs] - return tf_utils.sync_to_numpy_or_python_type(outs) - - return func - - if kwargs: - for key in kwargs: - if (key not in tf_inspect.getfullargspec(tf.compat.v1.Session.run)[0] - and key not in ['inputs', 'outputs', 'updates', 'name']): - msg = ('Invalid argument "%s" passed to K.function with TensorFlow ' - 'backend') % key - raise ValueError(msg) - return GraphExecutionFunction( - inputs, outputs, updates=updates, name=name, **kwargs) - - -@keras_export('keras.backend.gradients') + for key in kwargs: + if key not in tf_inspect.getfullargspec(tf.compat.v1.Session.run)[ + 0 + ] and key not in ["inputs", "outputs", "updates", "name"]: + msg = ( + 'Invalid argument "%s" passed to K.function with ' + "TensorFlow backend" % key + ) + raise ValueError(msg) + return GraphExecutionFunction( + inputs, outputs, updates=updates, name=name, **kwargs + ) + + +@keras_export("keras.backend.gradients") @doc_controls.do_not_generate_docs def gradients(loss, variables): - """Returns the gradients of `loss` w.r.t. `variables`. + """Returns the gradients of `loss` w.r.t. `variables`. - Args: - loss: Scalar tensor to minimize. - variables: List of variables. + Args: + loss: Scalar tensor to minimize. + variables: List of variables. - Returns: - A gradients tensor. - """ - return tf.compat.v1.gradients( - loss, variables, colocate_gradients_with_ops=True) + Returns: + A gradients tensor. + """ + return tf.compat.v1.gradients( + loss, variables, colocate_gradients_with_ops=True + ) -@keras_export('keras.backend.stop_gradient') +@keras_export("keras.backend.stop_gradient") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def stop_gradient(variables): - """Returns `variables` but with zero gradient w.r.t. every other variable. + """Returns `variables` but with zero gradient w.r.t. every other variable. - Args: - variables: Tensor or list of tensors to consider constant with respect - to any other variable. + Args: + variables: Tensor or list of tensors to consider constant with respect + to any other variable. - Returns: - A single tensor or a list of tensors (depending on the passed argument) - that has no gradient with respect to any other variable. - """ - if isinstance(variables, (list, tuple)): - return map(tf.stop_gradient, variables) - return tf.stop_gradient(variables) + Returns: + A single tensor or a list of tensors (depending on the passed argument) + that has no gradient with respect to any other variable. + """ + if isinstance(variables, (list, tuple)): + return map(tf.stop_gradient, variables) + return tf.stop_gradient(variables) # CONTROL FLOW -@keras_export('keras.backend.rnn') +@keras_export("keras.backend.rnn") @tf.__internal__.dispatch.add_dispatch_support -def rnn(step_function, - inputs, - initial_states, - go_backwards=False, - mask=None, - constants=None, - unroll=False, - input_length=None, - time_major=False, - zero_output_for_mask=False, - return_all_outputs=True): - """Iterates over the time dimension of a tensor. - - Args: - step_function: RNN step function. - Args; - input; Tensor with shape `(samples, ...)` (no time dimension), - representing input for the batch of samples at a certain - time step. - states; List of tensors. - Returns; - output; Tensor with shape `(samples, output_dim)` - (no time dimension). - new_states; List of tensors, same length and shapes - as 'states'. The first state in the list must be the - output tensor at the previous timestep. - inputs: Tensor of temporal data of shape `(samples, time, ...)` - (at least 3D), or nested tensors, and each of which has shape - `(samples, time, ...)`. - initial_states: Tensor with shape `(samples, state_size)` - (no time dimension), containing the initial values for the states used - in the step function. In the case that state_size is in a nested - shape, the shape of initial_states will also follow the nested - structure. - go_backwards: Boolean. If True, do the iteration over the time - dimension in reverse order and return the reversed sequence. - mask: Binary tensor with shape `(samples, time, 1)`, - with a zero for every element that is masked. - constants: List of constant values passed at each step. - unroll: Whether to unroll the RNN or to use a symbolic `while_loop`. - input_length: An integer or a 1-D Tensor, depending on whether - the time dimension is fixed-length or not. In case of variable length - input, it is used for masking in case there's no mask specified. - time_major: Boolean. If true, the inputs and outputs will be in shape - `(timesteps, batch, ...)`, whereas in the False case, it will be - `(batch, timesteps, ...)`. Using `time_major = True` is a bit more - efficient because it avoids transposes at the beginning and end of the - RNN calculation. However, most TensorFlow data is batch-major, so by - default this function accepts input and emits output in batch-major - form. - zero_output_for_mask: Boolean. If True, the output for masked timestep - will be zeros, whereas in the False case, output from previous - timestep is returned. - return_all_outputs: Boolean. If True, return the recurrent outputs for all - timesteps in the sequence. If False, only return the output for the - last timestep (which consumes less memory). - - Returns: - A tuple, `(last_output, outputs, new_states)`. - last_output: the latest output of the rnn, of shape `(samples, ...)` - outputs: - - If `return_all_outputs=True`: a tensor with shape - `(samples, time, ...)` where each entry `outputs[s, t]` is the - output of the step function at time `t` for sample `s` - - Else, a tensor equal to `last_output` with shape - `(samples, 1, ...)` - new_states: list of tensors, latest states returned by - the step function, of shape `(samples, ...)`. - - Raises: - ValueError: if input dimension is less than 3. - ValueError: if `unroll` is `True` but input timestep is not a fixed - number. - ValueError: if `mask` is provided (not `None`) but states is not provided - (`len(states)` == 0). - """ - if not tf.__internal__.tf2.enabled(): - return_all_outputs = True # Not supported in TF1. - - def swap_batch_timestep(input_t): - # Swap the batch and timestep dim for the incoming tensor. - axes = list(range(len(input_t.shape))) - axes[0], axes[1] = 1, 0 - return tf.compat.v1.transpose(input_t, axes) - - if not time_major: - inputs = tf.nest.map_structure(swap_batch_timestep, inputs) - - flatted_inputs = tf.nest.flatten(inputs) - time_steps = flatted_inputs[0].shape[0] - batch = flatted_inputs[0].shape[1] - time_steps_t = tf.shape(flatted_inputs[0])[0] - - for input_ in flatted_inputs: - input_.shape.with_rank_at_least(3) - - if mask is not None: - if mask.dtype != tf.bool: - mask = tf.cast(mask, tf.bool) - if len(mask.shape) == 2: - mask = expand_dims(mask) +def rnn( + step_function, + inputs, + initial_states, + go_backwards=False, + mask=None, + constants=None, + unroll=False, + input_length=None, + time_major=False, + zero_output_for_mask=False, + return_all_outputs=True, +): + """Iterates over the time dimension of a tensor. + + Args: + step_function: RNN step function. + Args; + input; Tensor with shape `(samples, ...)` (no time dimension), + representing input for the batch of samples at a certain + time step. + states; List of tensors. + Returns; + output; Tensor with shape `(samples, output_dim)` + (no time dimension). + new_states; List of tensors, same length and shapes + as 'states'. The first state in the list must be the + output tensor at the previous timestep. + inputs: Tensor of temporal data of shape `(samples, time, ...)` + (at least 3D), or nested tensors, and each of which has shape + `(samples, time, ...)`. + initial_states: Tensor with shape `(samples, state_size)` + (no time dimension), containing the initial values for the states + used in the step function. In the case that state_size is in a + nested shape, the shape of initial_states will also follow the + nested structure. + go_backwards: Boolean. If True, do the iteration over the time + dimension in reverse order and return the reversed sequence. + mask: Binary tensor with shape `(samples, time, 1)`, + with a zero for every element that is masked. + constants: List of constant values passed at each step. + unroll: Whether to unroll the RNN or to use a symbolic `while_loop`. + input_length: An integer or a 1-D Tensor, depending on whether + the time dimension is fixed-length or not. In case of variable + length input, it is used for masking in case there's no mask + specified. + time_major: Boolean. If true, the inputs and outputs will be in shape + `(timesteps, batch, ...)`, whereas in the False case, it will be + `(batch, timesteps, ...)`. Using `time_major = True` is a bit more + efficient because it avoids transposes at the beginning and end of + the RNN calculation. However, most TensorFlow data is batch-major, + so by default this function accepts input and emits output in + batch-major form. + zero_output_for_mask: Boolean. If True, the output for masked timestep + will be zeros, whereas in the False case, output from previous + timestep is returned. + return_all_outputs: Boolean. If True, return the recurrent outputs for + all timesteps in the sequence. If False, only return the output for + the last timestep (which consumes less memory). + + Returns: + A tuple, `(last_output, outputs, new_states)`. + last_output: the latest output of the rnn, of shape `(samples, ...)` + outputs: + - If `return_all_outputs=True`: a tensor with shape + `(samples, time, ...)` where each entry `outputs[s, t]` is the + output of the step function at time `t` for sample `s` + - Else, a tensor equal to `last_output` with shape + `(samples, 1, ...)` + new_states: list of tensors, latest states returned by + the step function, of shape `(samples, ...)`. + + Raises: + ValueError: if input dimension is less than 3. + ValueError: if `unroll` is `True` but input timestep is not a fixed + number. + ValueError: if `mask` is provided (not `None`) but states is not + provided (`len(states)` == 0). + """ + if not tf.__internal__.tf2.enabled(): + return_all_outputs = True # Not supported in TF1. + + def swap_batch_timestep(input_t): + # Swap the batch and timestep dim for the incoming tensor. + axes = list(range(len(input_t.shape))) + axes[0], axes[1] = 1, 0 + return tf.compat.v1.transpose(input_t, axes) + if not time_major: - mask = swap_batch_timestep(mask) - - if constants is None: - constants = [] - - # tf.where needs its condition tensor to be the same shape as its two - # result tensors, but in our case the condition (mask) tensor is - # (nsamples, 1), and inputs are (nsamples, ndimensions) or even more. - # So we need to broadcast the mask to match the shape of inputs. - # That's what the tile call does, it just repeats the mask along its - # second dimension n times. - def _expand_mask(mask_t, input_t, fixed_dim=1): - if tf.nest.is_nested(mask_t): - raise ValueError('mask_t is expected to be tensor, but got %s' % mask_t) - if tf.nest.is_nested(input_t): - raise ValueError('input_t is expected to be tensor, but got %s' % input_t) - rank_diff = len(input_t.shape) - len(mask_t.shape) - for _ in range(rank_diff): - mask_t = tf.expand_dims(mask_t, -1) - multiples = [1] * fixed_dim + input_t.shape.as_list()[fixed_dim:] - return tf.tile(mask_t, multiples) - - if unroll: - if not time_steps: - raise ValueError('Unrolling requires a fixed number of timesteps.') - states = tuple(initial_states) - successive_states = [] - successive_outputs = [] - - # Process the input tensors. The input tensor need to be split on the - # time_step dim, and reverse if go_backwards is True. In the case of nested - # input, the input is flattened and then transformed individually. - # The result of this will be a tuple of lists, each of the item in tuple is - # list of the tensor with shape (batch, feature) - def _process_single_input_t(input_t): - input_t = tf.unstack(input_t) # unstack for time_step dim - if go_backwards: - input_t.reverse() - return input_t - - if tf.nest.is_nested(inputs): - processed_input = tf.nest.map_structure(_process_single_input_t, inputs) - else: - processed_input = (_process_single_input_t(inputs),) + inputs = tf.nest.map_structure(swap_batch_timestep, inputs) - def _get_input_tensor(time): - inp = [t_[time] for t_ in processed_input] - return tf.nest.pack_sequence_as(inputs, inp) + flatted_inputs = tf.nest.flatten(inputs) + time_steps = flatted_inputs[0].shape[0] + batch = flatted_inputs[0].shape[1] + time_steps_t = tf.shape(flatted_inputs[0])[0] + + for input_ in flatted_inputs: + input_.shape.with_rank_at_least(3) if mask is not None: - mask_list = tf.unstack(mask) - if go_backwards: - mask_list.reverse() - - for i in range(time_steps): - inp = _get_input_tensor(i) - mask_t = mask_list[i] - output, new_states = step_function(inp, - tuple(states) + tuple(constants)) - tiled_mask_t = _expand_mask(mask_t, output) - - if not successive_outputs: - prev_output = zeros_like(output) + if mask.dtype != tf.bool: + mask = tf.cast(mask, tf.bool) + if len(mask.shape) == 2: + mask = expand_dims(mask) + if not time_major: + mask = swap_batch_timestep(mask) + + if constants is None: + constants = [] + + # tf.where needs its condition tensor to be the same shape as its two + # result tensors, but in our case the condition (mask) tensor is + # (nsamples, 1), and inputs are (nsamples, ndimensions) or even more. + # So we need to broadcast the mask to match the shape of inputs. + # That's what the tile call does, it just repeats the mask along its + # second dimension n times. + def _expand_mask(mask_t, input_t, fixed_dim=1): + if tf.nest.is_nested(mask_t): + raise ValueError( + f"mask_t is expected to be tensor, but got {mask_t}" + ) + if tf.nest.is_nested(input_t): + raise ValueError( + f"input_t is expected to be tensor, but got {input_t}" + ) + rank_diff = len(input_t.shape) - len(mask_t.shape) + for _ in range(rank_diff): + mask_t = tf.expand_dims(mask_t, -1) + multiples = [1] * fixed_dim + input_t.shape.as_list()[fixed_dim:] + return tf.tile(mask_t, multiples) + + if unroll: + if not time_steps: + raise ValueError("Unrolling requires a fixed number of timesteps.") + states = tuple(initial_states) + successive_states = [] + successive_outputs = [] + + # Process the input tensors. The input tensor need to be split on the + # time_step dim, and reverse if go_backwards is True. In the case of + # nested input, the input is flattened and then transformed + # individually. The result of this will be a tuple of lists, each of + # the item in tuple is list of the tensor with shape (batch, feature) + def _process_single_input_t(input_t): + input_t = tf.unstack(input_t) # unstack for time_step dim + if go_backwards: + input_t.reverse() + return input_t + + if tf.nest.is_nested(inputs): + processed_input = tf.nest.map_structure( + _process_single_input_t, inputs + ) else: - prev_output = successive_outputs[-1] - - output = tf.where(tiled_mask_t, output, prev_output) - - flat_states = tf.nest.flatten(states) - flat_new_states = tf.nest.flatten(new_states) - tiled_mask_t = tuple(_expand_mask(mask_t, s) for s in flat_states) - flat_final_states = tuple( - tf.where(m, s, ps) - for m, s, ps in zip(tiled_mask_t, flat_new_states, flat_states)) - states = tf.nest.pack_sequence_as(states, flat_final_states) + processed_input = (_process_single_input_t(inputs),) + + def _get_input_tensor(time): + inp = [t_[time] for t_ in processed_input] + return tf.nest.pack_sequence_as(inputs, inp) + + if mask is not None: + mask_list = tf.unstack(mask) + if go_backwards: + mask_list.reverse() + + for i in range(time_steps): + inp = _get_input_tensor(i) + mask_t = mask_list[i] + output, new_states = step_function( + inp, tuple(states) + tuple(constants) + ) + tiled_mask_t = _expand_mask(mask_t, output) + + if not successive_outputs: + prev_output = zeros_like(output) + else: + prev_output = successive_outputs[-1] + + output = tf.where(tiled_mask_t, output, prev_output) + + flat_states = tf.nest.flatten(states) + flat_new_states = tf.nest.flatten(new_states) + tiled_mask_t = tuple( + _expand_mask(mask_t, s) for s in flat_states + ) + flat_final_states = tuple( + tf.where(m, s, ps) + for m, s, ps in zip( + tiled_mask_t, flat_new_states, flat_states + ) + ) + states = tf.nest.pack_sequence_as(states, flat_final_states) + + if return_all_outputs: + successive_outputs.append(output) + successive_states.append(states) + else: + successive_outputs = [output] + successive_states = [states] + last_output = successive_outputs[-1] + new_states = successive_states[-1] + outputs = tf.stack(successive_outputs) + + if zero_output_for_mask: + last_output = tf.where( + _expand_mask(mask_list[-1], last_output), + last_output, + zeros_like(last_output), + ) + outputs = tf.where( + _expand_mask(mask, outputs, fixed_dim=2), + outputs, + zeros_like(outputs), + ) + + else: # mask is None + for i in range(time_steps): + inp = _get_input_tensor(i) + output, states = step_function( + inp, tuple(states) + tuple(constants) + ) + if return_all_outputs: + successive_outputs.append(output) + successive_states.append(states) + else: + successive_outputs = [output] + successive_states = [states] + last_output = successive_outputs[-1] + new_states = successive_states[-1] + outputs = tf.stack(successive_outputs) + + else: # Unroll == False + states = tuple(initial_states) + + # Create input tensor array, if the inputs is nested tensors, then it + # will be flattened first, and tensor array will be created one per + # flattened tensor. + input_ta = tuple( + tf.TensorArray( + dtype=inp.dtype, + size=time_steps_t, + tensor_array_name=f"input_ta_{i}", + ) + for i, inp in enumerate(flatted_inputs) + ) + input_ta = tuple( + ta.unstack(input_) + if not go_backwards + else ta.unstack(reverse(input_, 0)) + for ta, input_ in zip(input_ta, flatted_inputs) + ) + + # Get the time(0) input and compute the output for that, the output will + # be used to determine the dtype of output tensor array. Don't read from + # input_ta due to TensorArray clear_after_read default to True. + input_time_zero = tf.nest.pack_sequence_as( + inputs, [inp[0] for inp in flatted_inputs] + ) + # output_time_zero is used to determine the cell output shape and its + # dtype. the value is discarded. + output_time_zero, _ = step_function( + input_time_zero, tuple(initial_states) + tuple(constants) + ) + + output_ta_size = time_steps_t if return_all_outputs else 1 + output_ta = tuple( + tf.TensorArray( + dtype=out.dtype, + size=output_ta_size, + element_shape=out.shape, + tensor_array_name=f"output_ta_{i}", + ) + for i, out in enumerate(tf.nest.flatten(output_time_zero)) + ) + + time = tf.constant(0, dtype="int32", name="time") + + # We only specify the 'maximum_iterations' when building for XLA since + # that causes slowdowns on GPU in TF. + if ( + not tf.executing_eagerly() + and control_flow_util.GraphOrParentsInXlaContext( + tf.compat.v1.get_default_graph() + ) + ): + if input_length is None: + max_iterations = time_steps_t + else: + max_iterations = tf.reduce_max(input_length) + else: + max_iterations = None + + while_loop_kwargs = { + "cond": lambda time, *_: time < time_steps_t, + "maximum_iterations": max_iterations, + "parallel_iterations": 32, + "swap_memory": True, + } + if mask is not None: + if go_backwards: + mask = reverse(mask, 0) + + mask_ta = tf.TensorArray( + dtype=tf.bool, size=time_steps_t, tensor_array_name="mask_ta" + ) + mask_ta = mask_ta.unstack(mask) + + def masking_fn(time): + return mask_ta.read(time) + + def compute_masked_output(mask_t, flat_out, flat_mask): + tiled_mask_t = tuple( + _expand_mask(mask_t, o, fixed_dim=len(mask_t.shape)) + for o in flat_out + ) + return tuple( + tf.where(m, o, fm) + for m, o, fm in zip(tiled_mask_t, flat_out, flat_mask) + ) + + elif isinstance(input_length, tf.Tensor): + if go_backwards: + max_len = tf.reduce_max(input_length, axis=0) + rev_input_length = tf.subtract(max_len - 1, input_length) + + def masking_fn(time): + return tf.less(rev_input_length, time) + + else: + + def masking_fn(time): + return tf.greater(input_length, time) + + def compute_masked_output(mask_t, flat_out, flat_mask): + return tuple( + tf.compat.v1.where(mask_t, o, zo) + for (o, zo) in zip(flat_out, flat_mask) + ) - if return_all_outputs: - successive_outputs.append(output) - successive_states.append(states) else: - successive_outputs = [output] - successive_states = [states] - last_output = successive_outputs[-1] - new_states = successive_states[-1] - outputs = tf.stack(successive_outputs) - - if zero_output_for_mask: - last_output = tf.where( - _expand_mask(mask_list[-1], last_output), last_output, - zeros_like(last_output)) - outputs = tf.where( - _expand_mask(mask, outputs, fixed_dim=2), outputs, - zeros_like(outputs)) - - else: # mask is None - for i in range(time_steps): - inp = _get_input_tensor(i) - output, states = step_function(inp, tuple(states) + tuple(constants)) - if return_all_outputs: - successive_outputs.append(output) - successive_states.append(states) + masking_fn = None + + if masking_fn is not None: + # Mask for the T output will be base on the output of T - 1. In the + # case T = 0, a zero filled tensor will be used. + flat_zero_output = tuple( + tf.zeros_like(o) for o in tf.nest.flatten(output_time_zero) + ) + + def _step(time, output_ta_t, prev_output, *states): + """RNN step function. + + Args: + time: Current timestep value. + output_ta_t: TensorArray. + prev_output: tuple of outputs from time - 1. + *states: List of states. + + Returns: + Tuple: `(time + 1, output_ta_t, output) + tuple(new_states)` + """ + current_input = tuple(ta.read(time) for ta in input_ta) + # maybe set shape. + current_input = tf.nest.pack_sequence_as(inputs, current_input) + mask_t = masking_fn(time) + output, new_states = step_function( + current_input, tuple(states) + tuple(constants) + ) + # mask output + flat_output = tf.nest.flatten(output) + flat_mask_output = ( + flat_zero_output + if zero_output_for_mask + else tf.nest.flatten(prev_output) + ) + flat_new_output = compute_masked_output( + mask_t, flat_output, flat_mask_output + ) + + # mask states + flat_state = tf.nest.flatten(states) + flat_new_state = tf.nest.flatten(new_states) + for state, new_state in zip(flat_state, flat_new_state): + if isinstance(new_state, tf.Tensor): + new_state.set_shape(state.shape) + flat_final_state = compute_masked_output( + mask_t, flat_new_state, flat_state + ) + new_states = tf.nest.pack_sequence_as( + new_states, flat_final_state + ) + + ta_index_to_write = time if return_all_outputs else 0 + output_ta_t = tuple( + ta.write(ta_index_to_write, out) + for ta, out in zip(output_ta_t, flat_new_output) + ) + + return (time + 1, output_ta_t, tuple(flat_new_output)) + tuple( + new_states + ) + + final_outputs = tf.compat.v1.while_loop( + body=_step, + loop_vars=(time, output_ta, flat_zero_output) + states, + **while_loop_kwargs, + ) + # Skip final_outputs[2] which is the output for final timestep. + new_states = final_outputs[3:] else: - successive_outputs = [output] - successive_states = [states] - last_output = successive_outputs[-1] - new_states = successive_states[-1] - outputs = tf.stack(successive_outputs) - - else: # Unroll == False - states = tuple(initial_states) - - # Create input tensor array, if the inputs is nested tensors, then it will - # be flattened first, and tensor array will be created one per flattened - # tensor. - input_ta = tuple( - tf.TensorArray( - dtype=inp.dtype, - size=time_steps_t, - tensor_array_name='input_ta_%s' % i) - for i, inp in enumerate(flatted_inputs)) - input_ta = tuple( - ta.unstack(input_) if not go_backwards else ta - .unstack(reverse(input_, 0)) - for ta, input_ in zip(input_ta, flatted_inputs)) - - # Get the time(0) input and compute the output for that, the output will be - # used to determine the dtype of output tensor array. Don't read from - # input_ta due to TensorArray clear_after_read default to True. - input_time_zero = tf.nest.pack_sequence_as(inputs, - [inp[0] for inp in flatted_inputs]) - # output_time_zero is used to determine the cell output shape and its dtype. - # the value is discarded. - output_time_zero, _ = step_function( - input_time_zero, tuple(initial_states) + tuple(constants)) - - output_ta_size = time_steps_t if return_all_outputs else 1 - output_ta = tuple( - tf.TensorArray( - dtype=out.dtype, - size=output_ta_size, - element_shape=out.shape, - tensor_array_name='output_ta_%s' % i) - for i, out in enumerate(tf.nest.flatten(output_time_zero))) - - time = tf.constant(0, dtype='int32', name='time') - - # We only specify the 'maximum_iterations' when building for XLA since that - # causes slowdowns on GPU in TF. - if (not tf.executing_eagerly() and - control_flow_util.GraphOrParentsInXlaContext(tf.compat.v1.get_default_graph())): - max_iterations = tf.reduce_max(input_length) - else: - max_iterations = None - while_loop_kwargs = { - 'cond': lambda time, *_: time < time_steps_t, - 'maximum_iterations': max_iterations, - 'parallel_iterations': 32, - 'swap_memory': True, - } - if mask is not None: - if go_backwards: - mask = reverse(mask, 0) - - mask_ta = tf.TensorArray( - dtype=tf.bool, - size=time_steps_t, - tensor_array_name='mask_ta') - mask_ta = mask_ta.unstack(mask) - - def masking_fn(time): - return mask_ta.read(time) - - def compute_masked_output(mask_t, flat_out, flat_mask): - tiled_mask_t = tuple( - _expand_mask(mask_t, o, fixed_dim=len(mask_t.shape)) - for o in flat_out) - return tuple( - tf.where(m, o, fm) - for m, o, fm in zip(tiled_mask_t, flat_out, flat_mask)) - elif isinstance(input_length, tf.Tensor): - if go_backwards: - max_len = tf.reduce_max(input_length, axis=0) - rev_input_length = tf.subtract(max_len - 1, input_length) - - def masking_fn(time): - return tf.less(rev_input_length, time) - else: - - def masking_fn(time): - return tf.greater(input_length, time) - - def compute_masked_output(mask_t, flat_out, flat_mask): - return tuple( - tf.compat.v1.where(mask_t, o, zo) - for (o, zo) in zip(flat_out, flat_mask)) - else: - masking_fn = None + def _step(time, output_ta_t, *states): + """RNN step function. + + Args: + time: Current timestep value. + output_ta_t: TensorArray. + *states: List of states. + + Returns: + Tuple: `(time + 1,output_ta_t) + tuple(new_states)` + """ + current_input = tuple(ta.read(time) for ta in input_ta) + current_input = tf.nest.pack_sequence_as(inputs, current_input) + output, new_states = step_function( + current_input, tuple(states) + tuple(constants) + ) + flat_state = tf.nest.flatten(states) + flat_new_state = tf.nest.flatten(new_states) + for state, new_state in zip(flat_state, flat_new_state): + if isinstance(new_state, tf.Tensor): + new_state.set_shape(state.shape) + + flat_output = tf.nest.flatten(output) + ta_index_to_write = time if return_all_outputs else 0 + output_ta_t = tuple( + ta.write(ta_index_to_write, out) + for ta, out in zip(output_ta_t, flat_output) + ) + + new_states = tf.nest.pack_sequence_as( + initial_states, flat_new_state + ) + return (time + 1, output_ta_t) + tuple(new_states) + + final_outputs = tf.compat.v1.while_loop( + body=_step, + loop_vars=(time, output_ta) + states, + **while_loop_kwargs, + ) + new_states = final_outputs[2:] + + output_ta = final_outputs[1] + + outputs = tuple(o.stack() for o in output_ta) + last_output = tuple(o[-1] for o in outputs) + + outputs = tf.nest.pack_sequence_as(output_time_zero, outputs) + last_output = tf.nest.pack_sequence_as(output_time_zero, last_output) + + # static shape inference + def set_shape(output_): + if isinstance(output_, tf.Tensor): + shape = output_.shape.as_list() + if return_all_outputs: + shape[0] = time_steps + else: + shape[0] = 1 + shape[1] = batch + output_.set_shape(shape) + return output_ + + outputs = tf.nest.map_structure(set_shape, outputs) - if masking_fn is not None: - # Mask for the T output will be base on the output of T - 1. In the case - # T = 0, a zero filled tensor will be used. - flat_zero_output = tuple(tf.zeros_like(o) - for o in tf.nest.flatten(output_time_zero)) - def _step(time, output_ta_t, prev_output, *states): - """RNN step function. + if not time_major: + outputs = tf.nest.map_structure(swap_batch_timestep, outputs) - Args: - time: Current timestep value. - output_ta_t: TensorArray. - prev_output: tuple of outputs from time - 1. - *states: List of states. + return last_output, outputs, new_states - Returns: - Tuple: `(time + 1, output_ta_t, output) + tuple(new_states)` - """ - current_input = tuple(ta.read(time) for ta in input_ta) - # maybe set shape. - current_input = tf.nest.pack_sequence_as(inputs, current_input) - mask_t = masking_fn(time) - output, new_states = step_function(current_input, - tuple(states) + tuple(constants)) - # mask output - flat_output = tf.nest.flatten(output) - flat_mask_output = (flat_zero_output if zero_output_for_mask - else tf.nest.flatten(prev_output)) - flat_new_output = compute_masked_output(mask_t, flat_output, - flat_mask_output) - - # mask states - flat_state = tf.nest.flatten(states) - flat_new_state = tf.nest.flatten(new_states) - for state, new_state in zip(flat_state, flat_new_state): - if isinstance(new_state, tf.Tensor): - new_state.set_shape(state.shape) - flat_final_state = compute_masked_output(mask_t, flat_new_state, - flat_state) - new_states = tf.nest.pack_sequence_as(new_states, flat_final_state) - - ta_index_to_write = time if return_all_outputs else 0 - output_ta_t = tuple( - ta.write(ta_index_to_write, out) - for ta, out in zip(output_ta_t, flat_new_output)) - - return (time + 1, output_ta_t, - tuple(flat_new_output)) + tuple(new_states) - - final_outputs = tf.compat.v1.while_loop( - body=_step, - loop_vars=(time, output_ta, flat_zero_output) + states, - **while_loop_kwargs) - # Skip final_outputs[2] which is the output for final timestep. - new_states = final_outputs[3:] - else: - def _step(time, output_ta_t, *states): - """RNN step function. - - Args: - time: Current timestep value. - output_ta_t: TensorArray. - *states: List of states. - Returns: - Tuple: `(time + 1,output_ta_t) + tuple(new_states)` - """ - current_input = tuple(ta.read(time) for ta in input_ta) - current_input = tf.nest.pack_sequence_as(inputs, current_input) - output, new_states = step_function(current_input, - tuple(states) + tuple(constants)) - flat_state = tf.nest.flatten(states) - flat_new_state = tf.nest.flatten(new_states) - for state, new_state in zip(flat_state, flat_new_state): - if isinstance(new_state, tf.Tensor): - new_state.set_shape(state.shape) - - flat_output = tf.nest.flatten(output) - ta_index_to_write = time if return_all_outputs else 0 - output_ta_t = tuple( - ta.write(ta_index_to_write, out) - for ta, out in zip(output_ta_t, flat_output)) - - new_states = tf.nest.pack_sequence_as(initial_states, flat_new_state) - return (time + 1, output_ta_t) + tuple(new_states) - - final_outputs = tf.compat.v1.while_loop( - body=_step, - loop_vars=(time, output_ta) + states, - **while_loop_kwargs) - new_states = final_outputs[2:] - - output_ta = final_outputs[1] - - outputs = tuple(o.stack() for o in output_ta) - last_output = tuple(o[-1] for o in outputs) - - outputs = tf.nest.pack_sequence_as(output_time_zero, outputs) - last_output = tf.nest.pack_sequence_as(output_time_zero, last_output) - - # static shape inference - def set_shape(output_): - if isinstance(output_, tf.Tensor): - shape = output_.shape.as_list() - if return_all_outputs: - shape[0] = time_steps - else: - shape[0] = 1 - shape[1] = batch - output_.set_shape(shape) - return output_ - - outputs = tf.nest.map_structure(set_shape, outputs) - - if not time_major: - outputs = tf.nest.map_structure(swap_batch_timestep, outputs) - - return last_output, outputs, new_states - - -@keras_export('keras.backend.switch') +@keras_export("keras.backend.switch") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def switch(condition, then_expression, else_expression): - """Switches between two operations depending on a scalar value. - - Note that both `then_expression` and `else_expression` - should be symbolic tensors of the *same shape*. - - Args: - condition: tensor (`int` or `bool`). - then_expression: either a tensor, or a callable that returns a tensor. - else_expression: either a tensor, or a callable that returns a tensor. - - Returns: - The selected tensor. - - Raises: - ValueError: If rank of `condition` is greater than rank of expressions. - """ - if condition.dtype != tf.bool: - condition = tf.cast(condition, 'bool') - cond_ndim = ndim(condition) - if not cond_ndim: - if not callable(then_expression): - - def then_expression_fn(): - return then_expression - else: - then_expression_fn = then_expression - if not callable(else_expression): + """Switches between two operations depending on a scalar value. + + Note that both `then_expression` and `else_expression` + should be symbolic tensors of the *same shape*. + + Args: + condition: tensor (`int` or `bool`). + then_expression: either a tensor, or a callable that returns a tensor. + else_expression: either a tensor, or a callable that returns a tensor. + + Returns: + The selected tensor. + + Raises: + ValueError: If rank of `condition` is greater than rank of expressions. + """ + if condition.dtype != tf.bool: + condition = tf.cast(condition, "bool") + cond_ndim = ndim(condition) + if not cond_ndim: + if not callable(then_expression): + + def then_expression_fn(): + return then_expression + + else: + then_expression_fn = then_expression + if not callable(else_expression): - def else_expression_fn(): - return else_expression + def else_expression_fn(): + return else_expression + + else: + else_expression_fn = else_expression + x = tf.compat.v1.cond(condition, then_expression_fn, else_expression_fn) else: - else_expression_fn = else_expression - x = tf.compat.v1.cond(condition, then_expression_fn, else_expression_fn) - else: - # tf.where needs its condition tensor - # to be the same shape as its two - # result tensors - if callable(then_expression): - then_expression = then_expression() - if callable(else_expression): - else_expression = else_expression() - expr_ndim = ndim(then_expression) - if cond_ndim > expr_ndim: - raise ValueError('Rank of `condition` should be less than or' - ' equal to rank of `then_expression` and ' - '`else_expression`. ndim(condition)=' + str(cond_ndim) + - ', ndim(then_expression)' - '=' + str(expr_ndim)) - if cond_ndim > 1: - ndim_diff = expr_ndim - cond_ndim - cond_shape = tf.concat( - [tf.shape(condition), [1] * ndim_diff], axis=0) - condition = tf.reshape(condition, cond_shape) - expr_shape = tf.shape(then_expression) - shape_diff = expr_shape - cond_shape - tile_shape = tf.where(shape_diff > 0, expr_shape, - tf.ones_like(expr_shape)) - condition = tf.tile(condition, tile_shape) - x = tf.where(condition, then_expression, else_expression) - return x - - -@keras_export('keras.backend.in_train_phase') + # tf.where needs its condition tensor + # to be the same shape as its two + # result tensors + if callable(then_expression): + then_expression = then_expression() + if callable(else_expression): + else_expression = else_expression() + expr_ndim = ndim(then_expression) + if cond_ndim > expr_ndim: + raise ValueError( + "Rank of `condition` should be less than or" + " equal to rank of `then_expression` and " + "`else_expression`. ndim(condition)=" + + str(cond_ndim) + + ", ndim(then_expression)=" + + str(expr_ndim) + ) + if cond_ndim > 1: + ndim_diff = expr_ndim - cond_ndim + cond_shape = tf.concat( + [tf.shape(condition), [1] * ndim_diff], axis=0 + ) + condition = tf.reshape(condition, cond_shape) + expr_shape = tf.shape(then_expression) + shape_diff = expr_shape - cond_shape + tile_shape = tf.where( + shape_diff > 0, expr_shape, tf.ones_like(expr_shape) + ) + condition = tf.tile(condition, tile_shape) + x = tf.where(condition, then_expression, else_expression) + return x + + +@keras_export("keras.backend.in_train_phase") @doc_controls.do_not_generate_docs def in_train_phase(x, alt, training=None): - """Selects `x` in train phase, and `alt` otherwise. - - Note that `alt` should have the *same shape* as `x`. - - Args: - x: What to return in train phase - (tensor or callable that returns a tensor). - alt: What to return otherwise - (tensor or callable that returns a tensor). - training: Optional scalar tensor - (or Python boolean, or Python integer) - specifying the learning phase. - - Returns: - Either `x` or `alt` based on the `training` flag. - the `training` flag defaults to `K.learning_phase()`. - """ - from keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top - if training is None: - training = base_layer_utils.call_context().training - - if training is None: - training = learning_phase() - - # TODO(b/138862903): Handle the case when training is tensor. - if not tf.is_tensor(training): - if training == 1 or training is True: - if callable(x): - return x() - else: - return x + """Selects `x` in train phase, and `alt` otherwise. - elif training == 0 or training is False: - if callable(alt): - return alt() - else: - return alt + Note that `alt` should have the *same shape* as `x`. - # else: assume learning phase is a placeholder tensor. - x = switch(training, x, alt) - return x + Args: + x: What to return in train phase + (tensor or callable that returns a tensor). + alt: What to return otherwise + (tensor or callable that returns a tensor). + training: Optional scalar tensor + (or Python boolean, or Python integer) + specifying the learning phase. + + Returns: + Either `x` or `alt` based on the `training` flag. + the `training` flag defaults to `K.learning_phase()`. + """ + from keras.engine import ( + base_layer_utils, + ) + if training is None: + training = base_layer_utils.call_context().training -@keras_export('keras.backend.in_test_phase') + if training is None: + training = learning_phase() + + # TODO(b/138862903): Handle the case when training is tensor. + if not tf.is_tensor(training): + if training == 1 or training is True: + if callable(x): + return x() + else: + return x + + elif training == 0 or training is False: + if callable(alt): + return alt() + else: + return alt + + # else: assume learning phase is a placeholder tensor. + x = switch(training, x, alt) + return x + + +@keras_export("keras.backend.in_test_phase") @doc_controls.do_not_generate_docs def in_test_phase(x, alt, training=None): - """Selects `x` in test phase, and `alt` otherwise. + """Selects `x` in test phase, and `alt` otherwise. - Note that `alt` should have the *same shape* as `x`. + Note that `alt` should have the *same shape* as `x`. - Args: - x: What to return in test phase - (tensor or callable that returns a tensor). - alt: What to return otherwise - (tensor or callable that returns a tensor). - training: Optional scalar tensor - (or Python boolean, or Python integer) - specifying the learning phase. + Args: + x: What to return in test phase + (tensor or callable that returns a tensor). + alt: What to return otherwise + (tensor or callable that returns a tensor). + training: Optional scalar tensor + (or Python boolean, or Python integer) + specifying the learning phase. - Returns: - Either `x` or `alt` based on `K.learning_phase`. - """ - return in_train_phase(alt, x, training=training) + Returns: + Either `x` or `alt` based on `K.learning_phase`. + """ + return in_train_phase(alt, x, training=training) # NN OPERATIONS -@keras_export('keras.backend.relu') +@keras_export("keras.backend.relu") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def relu(x, alpha=0., max_value=None, threshold=0.): - """Rectified linear unit. - - With default values, it returns element-wise `max(x, 0)`. - - Otherwise, it follows: - `f(x) = max_value` for `x >= max_value`, - `f(x) = x` for `threshold <= x < max_value`, - `f(x) = alpha * (x - threshold)` otherwise. - - Args: - x: A tensor or variable. - alpha: A scalar, slope of negative section (default=`0.`). - max_value: float. Saturation threshold. - threshold: float. Threshold value for thresholded activation. - - Returns: - A tensor. - """ - # While x can be a tensor or variable, we also see cases where - # numpy arrays, lists, tuples are passed as well. - # lists, tuples do not have 'dtype' attribute. - dtype = getattr(x, 'dtype', floatx()) - if alpha != 0.: - if max_value is None and threshold == 0: - return tf.nn.leaky_relu(x, alpha=alpha) +def relu(x, alpha=0.0, max_value=None, threshold=0.0): + """Rectified linear unit. - if threshold != 0: - negative_part = tf.nn.relu(-x + threshold) - else: - negative_part = tf.nn.relu(-x) + With default values, it returns element-wise `max(x, 0)`. + + Otherwise, it follows: + `f(x) = max_value` for `x >= max_value`, + `f(x) = x` for `threshold <= x < max_value`, + `f(x) = alpha * (x - threshold)` otherwise. - clip_max = max_value is not None + Args: + x: A tensor or variable. + alpha: A scalar, slope of negative section (default=`0.`). + max_value: float. Saturation threshold. + threshold: float. Threshold value for thresholded activation. - if threshold != 0: - # computes x for x > threshold else 0 - x = x * tf.cast(tf.greater(x, threshold), dtype=dtype) - elif max_value == 6: - # if no threshold, then can use nn.relu6 native TF op for performance - x = tf.nn.relu6(x) - clip_max = False - else: - x = tf.nn.relu(x) + Returns: + A tensor. + """ + # While x can be a tensor or variable, we also see cases where + # numpy arrays, lists, tuples are passed as well. + # lists, tuples do not have 'dtype' attribute. + dtype = getattr(x, "dtype", floatx()) + if alpha != 0.0: + if max_value is None and threshold == 0: + return tf.nn.leaky_relu(x, alpha=alpha) + + if threshold != 0: + negative_part = tf.nn.relu(-x + threshold) + else: + negative_part = tf.nn.relu(-x) - if clip_max: - max_value = _constant_to_tensor(max_value, x.dtype.base_dtype) - zero = _constant_to_tensor(0, x.dtype.base_dtype) - x = tf.clip_by_value(x, zero, max_value) + clip_max = max_value is not None - if alpha != 0.: - alpha = _to_tensor(alpha, x.dtype.base_dtype) - x -= alpha * negative_part - return x + if threshold != 0: + # computes x for x > threshold else 0 + x = x * tf.cast(tf.greater(x, threshold), dtype=dtype) + elif max_value == 6: + # if no threshold, then can use nn.relu6 native TF op for performance + x = tf.nn.relu6(x) + clip_max = False + else: + x = tf.nn.relu(x) + + if clip_max: + max_value = _constant_to_tensor(max_value, x.dtype.base_dtype) + zero = _constant_to_tensor(0, x.dtype.base_dtype) + x = tf.clip_by_value(x, zero, max_value) + + if alpha != 0.0: + alpha = _to_tensor(alpha, x.dtype.base_dtype) + x -= alpha * negative_part + return x -@keras_export('keras.backend.elu') +@keras_export("keras.backend.elu") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def elu(x, alpha=1.): - """Exponential linear unit. +def elu(x, alpha=1.0): + """Exponential linear unit. - Args: - x: A tensor or variable to compute the activation function for. - alpha: A scalar, slope of negative section. + Args: + x: A tensor or variable to compute the activation function for. + alpha: A scalar, slope of negative section. - Returns: - A tensor. - """ - res = tf.nn.elu(x) - if alpha == 1: - return res - else: - return tf.where(x > 0, res, alpha * res) + Returns: + A tensor. + """ + res = tf.nn.elu(x) + if alpha == 1: + return res + else: + return tf.where(x > 0, res, alpha * res) -@keras_export('keras.backend.softmax') +@keras_export("keras.backend.softmax") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def softmax(x, axis=-1): - """Softmax of a tensor. + """Softmax of a tensor. + + Args: + x: A tensor or variable. + axis: The dimension softmax would be performed on. + The default is -1 which indicates the last dimension. + + Returns: + A tensor. + """ + if x.shape.rank <= 1: + raise ValueError( + f"Cannot apply softmax to a tensor that is 1D. Received input: {x}" + ) - Args: - x: A tensor or variable. - axis: The dimension softmax would be performed on. - The default is -1 which indicates the last dimension. + if isinstance(axis, int): + output = tf.nn.softmax(x, axis=axis) + else: + # nn.softmax does not support tuple axis. + numerator = tf.exp(x - tf.reduce_max(x, axis=axis, keepdims=True)) + denominator = tf.reduce_sum(numerator, axis=axis, keepdims=True) + output = numerator / denominator - Returns: - A tensor. - """ - return tf.nn.softmax(x, axis=axis) + # Cache the logits to use for crossentropy loss. + output._keras_logits = x + return output -@keras_export('keras.backend.softplus') +@keras_export("keras.backend.softplus") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def softplus(x): - """Softplus of a tensor. + """Softplus of a tensor. - Args: - x: A tensor or variable. + Args: + x: A tensor or variable. - Returns: - A tensor. - """ - return tf.math.softplus(x) + Returns: + A tensor. + """ + return tf.math.softplus(x) -@keras_export('keras.backend.softsign') +@keras_export("keras.backend.softsign") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def softsign(x): - """Softsign of a tensor. + """Softsign of a tensor. + + Args: + x: A tensor or variable. + + Returns: + A tensor. + """ + return tf.math.softsign(x) - Args: - x: A tensor or variable. - Returns: - A tensor. - """ - return tf.math.softsign(x) +def _get_logits(output, from_logits, op_type, fn_name): + output_ = output + from_logits_ = from_logits + has_keras_logits = hasattr(output, "_keras_logits") + if has_keras_logits: + output_ = output._keras_logits + from_logits_ = True -@keras_export('keras.backend.categorical_crossentropy') + from_expected_op_type = ( + not isinstance(output, (tf.__internal__.EagerTensor, tf.Variable)) + and output.op.type == op_type + ) and not has_keras_logits + + if from_expected_op_type: + # When softmax activation function is used for output operation, we + # use logits from the softmax function directly to compute loss in order + # to prevent collapsing zero when training. + # See b/117284466 + assert len(output.op.inputs) == 1 + output_ = output.op.inputs[0] + from_logits_ = True + + if from_logits and (has_keras_logits or from_expected_op_type): + warnings.warn( + f'"`{fn_name}` received `from_logits=True`, but ' + f"the `output` argument was produced by a {op_type} " + "activation and thus does not represent logits. " + "Was this intended?", + stacklevel=2, + ) + + return output_, from_logits_ + + +@keras_export("keras.backend.categorical_crossentropy") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def categorical_crossentropy(target, output, from_logits=False, axis=-1): - """Categorical crossentropy between an output tensor and a target tensor. - - Args: - target: A tensor of the same shape as `output`. - output: A tensor resulting from a softmax - (unless `from_logits` is True, in which - case `output` is expected to be the logits). - from_logits: Boolean, whether `output` is the - result of a softmax, or is a tensor of logits. - axis: Int specifying the channels axis. `axis=-1` corresponds to data - format `channels_last`, and `axis=1` corresponds to data format - `channels_first`. - - Returns: - Output tensor. - - Raises: - ValueError: if `axis` is neither -1 nor one of the axes of `output`. - - Example: - - >>> a = tf.constant([1., 0., 0., 0., 1., 0., 0., 0., 1.], shape=[3,3]) - >>> print(a) - tf.Tensor( - [[1. 0. 0.] - [0. 1. 0.] - [0. 0. 1.]], shape=(3, 3), dtype=float32) - >>> b = tf.constant([.9, .05, .05, .05, .89, .06, .05, .01, .94], shape=[3,3]) - >>> print(b) - tf.Tensor( - [[0.9 0.05 0.05] - [0.05 0.89 0.06] - [0.05 0.01 0.94]], shape=(3, 3), dtype=float32) - >>> loss = tf.keras.backend.categorical_crossentropy(a, b) - >>> print(np.around(loss, 5)) - [0.10536 0.11653 0.06188] - >>> loss = tf.keras.backend.categorical_crossentropy(a, a) - >>> print(np.around(loss, 5)) - [0. 0. 0.] - - """ - target = tf.convert_to_tensor(target) - output = tf.convert_to_tensor(output) - target.shape.assert_is_compatible_with(output.shape) - - # Use logits whenever they are available. `softmax` and `sigmoid` - # activations cache logits on the `output` Tensor. - if hasattr(output, '_keras_logits'): - output = output._keras_logits # pylint: disable=protected-access + """Categorical crossentropy between an output tensor and a target tensor. + + Args: + target: A tensor of the same shape as `output`. + output: A tensor resulting from a softmax + (unless `from_logits` is True, in which + case `output` is expected to be the logits). + from_logits: Boolean, whether `output` is the + result of a softmax, or is a tensor of logits. + axis: Int specifying the channels axis. `axis=-1` corresponds to data + format `channels_last`, and `axis=1` corresponds to data format + `channels_first`. + + Returns: + Output tensor. + + Raises: + ValueError: if `axis` is neither -1 nor one of the axes of `output`. + + Example: + + >>> a = tf.constant([1., 0., 0., 0., 1., 0., 0., 0., 1.], shape=[3,3]) + >>> print(a) + tf.Tensor( + [[1. 0. 0.] + [0. 1. 0.] + [0. 0. 1.]], shape=(3, 3), dtype=float32) + >>> b = tf.constant([.9, .05, .05, .05, .89, .06, .05, .01, .94], + ... shape=[3, 3]) + >>> print(b) + tf.Tensor( + [[0.9 0.05 0.05] + [0.05 0.89 0.06] + [0.05 0.01 0.94]], shape=(3, 3), dtype=float32) + >>> loss = tf.keras.backend.categorical_crossentropy(a, b) + >>> print(np.around(loss, 5)) + [0.10536 0.11653 0.06188] + >>> loss = tf.keras.backend.categorical_crossentropy(a, a) + >>> print(np.around(loss, 5)) + [0. 0. 0.] + + """ + target = tf.convert_to_tensor(target) + output = tf.convert_to_tensor(output) + target.shape.assert_is_compatible_with(output.shape) + + output, from_logits = _get_logits( + output, from_logits, "Softmax", "categorical_crossentropy" + ) if from_logits: - warnings.warn( - '"`categorical_crossentropy` received `from_logits=True`, but ' - 'the `output` argument was produced by a sigmoid or softmax ' - 'activation and thus does not represent logits. Was this intended?"', - stacklevel=2) - from_logits = True - - if from_logits: - return tf.nn.softmax_cross_entropy_with_logits( - labels=target, logits=output, axis=axis) - - if (not isinstance(output, (tf.__internal__.EagerTensor, tf.Variable)) and - output.op.type == 'Softmax') and not hasattr(output, '_keras_history'): - # When softmax activation function is used for output operation, we - # use logits from the softmax function directly to compute loss in order - # to prevent collapsing zero when training. - # See b/117284466 - assert len(output.op.inputs) == 1 - output = output.op.inputs[0] - return tf.nn.softmax_cross_entropy_with_logits( - labels=target, logits=output, axis=axis) - - # scale preds so that the class probas of each sample sum to 1 - output = output / tf.reduce_sum(output, axis, True) - # Compute cross entropy from probabilities. - epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) - output = tf.clip_by_value(output, epsilon_, 1. - epsilon_) - return -tf.reduce_sum(target * tf.math.log(output), axis) - - -@keras_export('keras.backend.sparse_categorical_crossentropy') + return tf.nn.softmax_cross_entropy_with_logits( + labels=target, logits=output, axis=axis + ) + + # Adjust the predictions so that the probability of + # each class for every sample adds up to 1 + # This is needed to ensure that the cross entropy is + # computed correctly. + output = output / tf.reduce_sum(output, axis, True) + + # Compute cross entropy from probabilities. + epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) + output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_) + return -tf.reduce_sum(target * tf.math.log(output), axis) + + +@keras_export("keras.backend.categorical_focal_crossentropy") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): - """Categorical crossentropy with integer targets. - - Args: - target: An integer tensor. - output: A tensor resulting from a softmax - (unless `from_logits` is True, in which - case `output` is expected to be the logits). - from_logits: Boolean, whether `output` is the - result of a softmax, or is a tensor of logits. - axis: Int specifying the channels axis. `axis=-1` corresponds to data - format `channels_last`, and `axis=1` corresponds to data format - `channels_first`. - - Returns: - Output tensor. - - Raises: - ValueError: if `axis` is neither -1 nor one of the axes of `output`. - """ - target = tf.convert_to_tensor(target) - output = tf.convert_to_tensor(output) - - # Use logits whenever they are available. `softmax` and `sigmoid` - # activations cache logits on the `output` Tensor. - if hasattr(output, '_keras_logits'): - output = output._keras_logits # pylint: disable=protected-access +def categorical_focal_crossentropy( + target, + output, + alpha=0.25, + gamma=2.0, + from_logits=False, + axis=-1, +): + """Computes the alpha balanced focal crossentropy loss. + + According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + helps to apply a focal factor to down-weight easy examples and focus more on + hard examples. The general formula for the focal loss (FL) + is as follows: + + `FL(p_t) = (1 − p_t)^gamma * log(p_t)` + + where `p_t` is defined as follows: + `p_t = output if y_true == 1, else 1 - output` + + `(1 − p_t)^gamma` is the `modulating_factor`, where `gamma` is a focusing + parameter. When `gamma` = 0, there is no focal effect on the cross entropy. + `gamma` reduces the importance given to simple examples in a smooth manner. + + The authors use alpha-balanced variant of focal loss (FL) in the paper: + `FL(p_t) = −alpha * (1 − p_t)^gamma * log(p_t)` + + where `alpha` is the weight factor for the classes. If `alpha` = 1, the + loss won't be able to handle class imbalance properly as all + classes will have the same weight. This can be a constant or a list of + constants. If alpha is a list, it must have the same length as the number + of classes. + + The formula above can be generalized to: + `FL(p_t) = alpha * (1 − p_t)^gamma * CrossEntropy(target, output)` + + where minus comes from `CrossEntropy(target, output)` (CE). + + Extending this to multi-class case is straightforward: + `FL(p_t) = alpha * (1 − p_t)^gamma * CategoricalCE(target, output)` + + Args: + target: Ground truth values from the dataset. + output: Predictions of the model. + alpha: A weight balancing factor for all classes, default is `0.25` as + mentioned in the reference. It can be a list of floats or a scalar. + In the multi-class case, alpha may be set by inverse class + frequency by using `compute_class_weight` from `sklearn.utils`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. It helps to gradually reduce the importance given to + simple examples in a smooth manner. + from_logits: Whether `output` is expected to be a logits tensor. By + default, we consider that `output` encodes a probability + distribution. + axis: Int specifying the channels axis. `axis=-1` corresponds to data + format `channels_last`, and `axis=1` corresponds to data format + `channels_first`. + + Returns: + A tensor. + """ + target = tf.convert_to_tensor(target) + output = tf.convert_to_tensor(output) + target.shape.assert_is_compatible_with(output.shape) + + output, from_logits = _get_logits( + output, from_logits, "Softmax", "categorical_focal_crossentropy" + ) + if from_logits: - warnings.warn( - '"`sparse_categorical_crossentropy` received `from_logits=True`, but ' - 'the `output` argument was produced by a sigmoid or softmax ' - 'activation and thus does not represent logits. Was this intended?"', - stacklevel=2) - from_logits = True - elif (not from_logits and - not isinstance(output, (tf.__internal__.EagerTensor, tf.Variable)) and - output.op.type == 'Softmax') and not hasattr(output, '_keras_history'): - # When softmax activation function is used for output operation, we - # use logits from the softmax function directly to compute loss in order - # to prevent collapsing zero when training. - # See b/117284466 - assert len(output.op.inputs) == 1 - output = output.op.inputs[0] - from_logits = True - elif not from_logits: + output = tf.nn.softmax(output, axis=axis) + + # Adjust the predictions so that the probability of + # each class for every sample adds up to 1 + # This is needed to ensure that the cross entropy is + # computed correctly. + output = output / tf.reduce_sum(output, axis=axis, keepdims=True) + epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) - output = tf.clip_by_value(output, epsilon_, 1 - epsilon_) - output = tf.math.log(output) - - if isinstance(output.shape, (tuple, list)): - output_rank = len(output.shape) - else: - output_rank = output.shape.ndims - if output_rank is not None: - axis %= output_rank - if axis != output_rank - 1: - permutation = list( - itertools.chain(range(axis), range(axis + 1, output_rank), [axis])) - output = tf.compat.v1.transpose(output, perm=permutation) - elif axis != -1: - raise ValueError( - 'Cannot compute sparse categorical crossentropy with `axis={}` on an ' - 'output tensor with unknown rank'.format(axis)) - - target = cast(target, 'int64') - - # Try to adjust the shape so that rank of labels = rank of logits - 1. - output_shape = tf.shape(output) - target_rank = target.shape.ndims - - update_shape = ( - target_rank is not None and output_rank is not None and - target_rank != output_rank - 1) - if update_shape: - target = flatten(target) - output = tf.reshape(output, [-1, output_shape[-1]]) - - if py_any(_is_symbolic_tensor(v) for v in [target, output]): - with get_graph().as_default(): - res = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=target, logits=output) - else: - res = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=target, logits=output) - - if update_shape and output_rank >= 3: - # If our output includes timesteps or spatial dimensions we need to reshape - return tf.reshape(res, output_shape[:-1]) - else: + output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_) + + # Calculate cross entropy + cce = -target * tf.math.log(output) + + # Calculate factors + modulating_factor = tf.pow(1.0 - output, gamma) + weighting_factor = tf.multiply(modulating_factor, alpha) + + # Apply weighting factor + focal_cce = tf.multiply(weighting_factor, cce) + focal_cce = tf.reduce_sum(focal_cce, axis=axis) + return focal_cce + + +@keras_export("keras.backend.sparse_categorical_crossentropy") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def sparse_categorical_crossentropy( + target, output, from_logits=False, axis=-1, ignore_class=None +): + """Categorical crossentropy with integer targets. + + Args: + target: An integer tensor. + output: A tensor resulting from a softmax + (unless `from_logits` is True, in which + case `output` is expected to be the logits). + from_logits: Boolean, whether `output` is the + result of a softmax, or is a tensor of logits. + axis: Int specifying the channels axis. `axis=-1` corresponds to data + format `channels_last`, and `axis=1` corresponds to data format + `channels_first`. + ignore_class: Optional integer. The ID of a class to be ignored + during loss computation. This is useful, for example, in + segmentation problems featuring a "void" class (commonly -1 + or 255) in segmentation maps. + By default (`ignore_class=None`), all classes are considered. + + Returns: + Output tensor. + + Raises: + ValueError: if `axis` is neither -1 nor one of the axes of `output`. + """ + target = tf.convert_to_tensor(target) + output = tf.convert_to_tensor(output) + + target = cast(target, "int64") + + output, from_logits = _get_logits( + output, from_logits, "Softmax", "sparse_categorical_crossentropy" + ) + if not from_logits: + epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) + output = tf.clip_by_value(output, epsilon_, 1 - epsilon_) + output = tf.math.log(output) + + # Permute output so that the last axis contains the logits/probabilities. + if isinstance(output.shape, (tuple, list)): + output_rank = len(output.shape) + else: + output_rank = output.shape.ndims + if output_rank is not None: + axis %= output_rank + if axis != output_rank - 1: + permutation = list( + itertools.chain( + range(axis), range(axis + 1, output_rank), [axis] + ) + ) + output = tf.compat.v1.transpose(output, perm=permutation) + elif axis != -1: + raise ValueError( + "Cannot compute sparse categorical crossentropy with `axis={}` " + "on an output tensor with unknown rank".format(axis) + ) + + # Try to adjust the shape so that rank of labels = rank of logits - 1. + output_shape = tf.shape(output) + target_rank = target.shape.ndims + + update_shape = ( + target_rank is not None + and output_rank is not None + and target_rank != output_rank - 1 + ) + if update_shape: + target = flatten(target) + output = tf.reshape(output, [-1, output_shape[-1]]) + + if ignore_class is not None: + valid_mask = tf.not_equal(target, cast(ignore_class, target.dtype)) + target = target[valid_mask] + output = output[valid_mask] + + if py_any(_is_symbolic_tensor(v) for v in [target, output]): + with get_graph().as_default(): + res = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=target, logits=output + ) + else: + res = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=target, logits=output + ) + + if ignore_class is not None: + res_shape = cast(output_shape[:-1], "int64") + valid_mask = tf.reshape(valid_mask, res_shape) + res = tf.scatter_nd(tf.where(valid_mask), res, res_shape) + res._keras_mask = valid_mask + + return res + + if update_shape and output_rank >= 3: + # If our output includes timesteps or + # spatial dimensions we need to reshape + res = tf.reshape(res, output_shape[:-1]) + return res -@keras_export('keras.backend.binary_crossentropy') +@keras_export("keras.backend.binary_crossentropy") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def binary_crossentropy(target, output, from_logits=False): - """Binary crossentropy between an output tensor and a target tensor. - - Args: - target: A tensor with the same shape as `output`. - output: A tensor. - from_logits: Whether `output` is expected to be a logits tensor. - By default, we consider that `output` - encodes a probability distribution. - - Returns: - A tensor. - """ - target = tf.convert_to_tensor(target) - output = tf.convert_to_tensor(output) - - # Use logits whenever they are available. `softmax` and `sigmoid` - # activations cache logits on the `output` Tensor. - if hasattr(output, '_keras_logits'): - output = output._keras_logits # pylint: disable=protected-access + """Binary crossentropy between an output tensor and a target tensor. + + Args: + target: A tensor with the same shape as `output`. + output: A tensor. + from_logits: Whether `output` is expected to be a logits tensor. + By default, we consider that `output` + encodes a probability distribution. + + Returns: + A tensor. + """ + target = tf.convert_to_tensor(target) + output = tf.convert_to_tensor(output) + + output, from_logits = _get_logits( + output, from_logits, "Sigmoid", "binary_crossentropy" + ) if from_logits: - warnings.warn( - '"`binary_crossentropy` received `from_logits=True`, but the `output`' - ' argument was produced by a sigmoid or softmax activation and thus ' - 'does not represent logits. Was this intended?"', - stacklevel=2) - from_logits = True - - if from_logits: - return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output) - - if (not isinstance(output, (tf.__internal__.EagerTensor, tf.Variable)) and - output.op.type == 'Sigmoid') and not hasattr(output, '_keras_history'): - # When sigmoid activation function is used for output operation, we - # use logits from the sigmoid function directly to compute loss in order - # to prevent collapsing zero when training. - assert len(output.op.inputs) == 1 - output = output.op.inputs[0] - return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output) - - epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) - output = tf.clip_by_value(output, epsilon_, 1. - epsilon_) - - # Compute cross entropy from probabilities. - bce = target * tf.math.log(output + epsilon()) - bce += (1 - target) * tf.math.log(1 - output + epsilon()) - return -bce - - -@keras_export('keras.backend.binary_focal_crossentropy') + return tf.nn.sigmoid_cross_entropy_with_logits( + labels=target, logits=output + ) + + epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) + output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_) + + # Compute cross entropy from probabilities. + bce = target * tf.math.log(output + epsilon()) + bce += (1 - target) * tf.math.log(1 - output + epsilon()) + return -bce + + +@keras_export("keras.backend.binary_focal_crossentropy") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def binary_focal_crossentropy( target, output, + apply_class_balancing=False, + alpha=0.25, gamma=2.0, from_logits=False, ): - """Binary focal crossentropy between an output tensor and a target tensor. - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a focal factor to down-weight easy examples and focus more on - hard examples. By default, the focal tensor is computed as follows: - - `focal_factor = (1 - output)**gamma` for class 1 - `focal_factor = output**gamma` for class 0 - where `gamma` is a focusing parameter. When `gamma` = 0, this function is - equivalent to the binary crossentropy. - - Args: - target: A tensor with the same shape as `output`. - output: A tensor. - gamma: A focusing parameter used to compute the focal factor, default is 2.0 - as mentioned in reference. - from_logits: Whether `output` is expected to be a logits tensor. By default, - we consider that `output` encodes a probability distribution. - - Returns: - A tensor. - """ - sigmoidal = tf.__internal__.smart_cond.smart_cond( - from_logits, - lambda: sigmoid(output), - lambda: output, - ) - p_t = (target * sigmoidal) + ((1 - target) * (1 - sigmoidal)) - # Calculate focal factor - focal_factor = tf.pow(1.0 - p_t, gamma) - # Binary crossentropy - bce = binary_crossentropy( - target=target, - output=output, - from_logits=from_logits, - ) - return focal_factor * bce - - -@keras_export('keras.backend.binary_weighted_focal_crossentropy') + """Binary focal crossentropy between an output tensor and a target tensor. + + According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + helps to apply a focal factor to down-weight easy examples and focus more on + hard examples. By default, the focal tensor is computed as follows: + + `focal_factor = (1 - output) ** gamma` for class 1 + `focal_factor = output ** gamma` for class 0 + where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal + effect on the binary crossentropy. + + If `apply_class_balancing == True`, this function also takes into account a + weight balancing factor for the binary classes 0 and 1 as follows: + + `weight = alpha` for class 1 (`target == 1`) + `weight = 1 - alpha` for class 0 + where `alpha` is a float in the range of `[0, 1]`. + + Args: + target: A tensor with the same shape as `output`. + output: A tensor. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in the reference. The weight for class 0 is `1.0 - alpha`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. + from_logits: Whether `output` is expected to be a logits tensor. By + default, we consider that `output` encodes a probability + distribution. + + Returns: + A tensor. + """ + + sigmoidal = sigmoid(output) if from_logits else output + + p_t = target * sigmoidal + (1 - target) * (1 - sigmoidal) + + # Calculate focal factor + focal_factor = tf.pow(1.0 - p_t, gamma) + + # Binary crossentropy + bce = binary_crossentropy( + target=target, + output=output, + from_logits=from_logits, + ) + focal_bce = focal_factor * bce + + if apply_class_balancing: + weight = target * alpha + (1 - target) * (1 - alpha) + focal_bce = weight * focal_bce + + return focal_bce + + +@keras_export("keras.backend.sigmoid") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def sigmoid(x): + """Element-wise sigmoid. + + Args: + x: A tensor or variable. + + Returns: + A tensor. + """ + output = tf.sigmoid(x) + # Cache the logits to use for crossentropy loss. + output._keras_logits = x + return output + + +@keras_export("keras.backend.hard_sigmoid") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def hard_sigmoid(x): + """Segment-wise linear approximation of sigmoid. + + Faster than sigmoid. + Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. + In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. + + Args: + x: A tensor or variable. + + Returns: + A tensor. + """ + point_two = _constant_to_tensor(0.2, x.dtype.base_dtype) + point_five = _constant_to_tensor(0.5, x.dtype.base_dtype) + x = tf.multiply(x, point_two) + x = tf.add(x, point_five) + x = tf.clip_by_value(x, 0.0, 1.0) + return x + + +@keras_export("keras.backend.tanh") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def tanh(x): + """Element-wise tanh. + + Args: + x: A tensor or variable. + + Returns: + A tensor. + """ + return tf.tanh(x) + + +@keras_export("keras.backend.dropout") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def dropout(x, level, noise_shape=None, seed=None): + """Sets entries in `x` to zero at random, while scaling the entire tensor. + + Args: + x: tensor + level: fraction of the entries in the tensor + that will be set to 0. + noise_shape: shape for randomly generated keep/drop flags, + must be broadcastable to the shape of `x` + seed: random seed to ensure determinism. + + Returns: + A tensor. + """ + if seed is None: + seed = np.random.randint(10e6) + return tf.nn.dropout(x, rate=level, noise_shape=noise_shape, seed=seed) + + +@keras_export("keras.backend.l2_normalize") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def l2_normalize(x, axis=None): + """Normalizes a tensor wrt the L2 norm alongside the specified axis. + + Args: + x: Tensor or variable. + axis: axis along which to perform normalization. + + Returns: + A tensor. + """ + return tf.linalg.l2_normalize(x, axis=axis) + + +@keras_export("keras.backend.in_top_k") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def in_top_k(predictions, targets, k): + """Returns whether the `targets` are in the top `k` `predictions`. + + Args: + predictions: A tensor of shape `(batch_size, classes)` and type + `float32`. + targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. + k: An `int`, number of top elements to consider. + + Returns: + A 1D tensor of length `batch_size` and type `bool`. + `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` + values of `predictions[i]`. + """ + return tf.compat.v1.math.in_top_k(predictions, targets, k) + + +# CONVOLUTIONS + + +def _preprocess_conv1d_input(x, data_format): + """Transpose and cast the input before the conv1d. + + Args: + x: input tensor. + data_format: string, `"channels_last"` or `"channels_first"`. + + Returns: + A tensor. + """ + tf_data_format = "NWC" # to pass TF Conv2dNative operations + if data_format == "channels_first": + if not _has_nchw_support(): + x = tf.compat.v1.transpose(x, (0, 2, 1)) # NCW -> NWC + else: + tf_data_format = "NCW" + return x, tf_data_format + + +def _preprocess_conv2d_input(x, data_format, force_transpose=False): + """Transpose and cast the input before the conv2d. + + Args: + x: input tensor. + data_format: string, `"channels_last"` or `"channels_first"`. + force_transpose: Boolean. If True, the input will always be transposed + from NCHW to NHWC if `data_format` is `"channels_first"`. + If False, the transposition only occurs on CPU (GPU ops are + assumed to support NCHW). + + Returns: + A tensor. + """ + tf_data_format = "NHWC" + if data_format == "channels_first": + if not _has_nchw_support() or force_transpose: + x = tf.compat.v1.transpose(x, (0, 2, 3, 1)) # NCHW -> NHWC + else: + tf_data_format = "NCHW" + return x, tf_data_format + + +def _preprocess_conv3d_input(x, data_format): + """Transpose and cast the input before the conv3d. + + Args: + x: input tensor. + data_format: string, `"channels_last"` or `"channels_first"`. + + Returns: + A tensor. + """ + tf_data_format = "NDHWC" + if data_format == "channels_first": + if not _has_nchw_support(): + x = tf.compat.v1.transpose(x, (0, 2, 3, 4, 1)) + else: + tf_data_format = "NCDHW" + return x, tf_data_format + + +def _preprocess_padding(padding): + """Convert keras' padding to TensorFlow's padding. + + Args: + padding: string, one of 'same' , 'valid' + + Returns: + a string, one of 'SAME', 'VALID'. + + Raises: + ValueError: if invalid `padding'` + """ + if padding == "same": + padding = "SAME" + elif padding == "valid": + padding = "VALID" + else: + raise ValueError("Invalid padding: " + str(padding)) + return padding + + +@keras_export("keras.backend.conv1d") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def conv1d( + x, kernel, strides=1, padding="valid", data_format=None, dilation_rate=1 +): + """1D convolution. + + Args: + x: Tensor or variable. + kernel: kernel tensor. + strides: stride integer. + padding: string, `"same"`, `"causal"` or `"valid"`. + data_format: string, one of "channels_last", "channels_first". + dilation_rate: integer dilate rate. + + Returns: + A tensor, result of 1D convolution. + + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + kernel_shape = kernel.shape.as_list() + if padding == "causal": + # causal (dilated) convolution: + left_pad = dilation_rate * (kernel_shape[0] - 1) + x = temporal_padding(x, (left_pad, 0)) + padding = "valid" + padding = _preprocess_padding(padding) + + x, tf_data_format = _preprocess_conv1d_input(x, data_format) + x = tf.compat.v1.nn.convolution( + input=x, + filter=kernel, + dilation_rate=dilation_rate, + strides=strides, + padding=padding, + data_format=tf_data_format, + ) + if data_format == "channels_first" and tf_data_format == "NWC": + x = tf.compat.v1.transpose(x, (0, 2, 1)) # NWC -> NCW + return x + + +@keras_export("keras.backend.conv2d") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def conv2d( + x, + kernel, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), +): + """2D convolution. + + Args: + x: Tensor or variable. + kernel: kernel tensor. + strides: strides tuple. + padding: string, `"same"` or `"valid"`. + data_format: `"channels_last"` or `"channels_first"`. + dilation_rate: tuple of 2 integers. + + Returns: + A tensor, result of 2D convolution. + + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + x, tf_data_format = _preprocess_conv2d_input(x, data_format) + padding = _preprocess_padding(padding) + x = tf.compat.v1.nn.convolution( + input=x, + filter=kernel, + dilation_rate=dilation_rate, + strides=strides, + padding=padding, + data_format=tf_data_format, + ) + if data_format == "channels_first" and tf_data_format == "NHWC": + x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW + return x + + +@keras_export("keras.backend.conv2d_transpose") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def conv2d_transpose( + x, + kernel, + output_shape, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), +): + """2D deconvolution (i.e. + + transposed convolution). + + Args: + x: Tensor or variable. + kernel: kernel tensor. + output_shape: 1D int tensor for the output shape. + strides: strides tuple. + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + dilation_rate: Tuple of 2 integers. + + Returns: + A tensor, result of transposed 2D convolution. + + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + # `atrous_conv2d_transpose` only supports NHWC format, even on GPU. + if data_format == "channels_first" and dilation_rate != (1, 1): + force_transpose = True + else: + force_transpose = False + + x, tf_data_format = _preprocess_conv2d_input( + x, data_format, force_transpose + ) + + if data_format == "channels_first" and tf_data_format == "NHWC": + output_shape = ( + output_shape[0], + output_shape[2], + output_shape[3], + output_shape[1], + ) + if output_shape[0] is None: + output_shape = (shape(x)[0],) + tuple(output_shape[1:]) + + if isinstance(output_shape, (tuple, list)): + output_shape = tf.stack(list(output_shape)) + + padding = _preprocess_padding(padding) + if tf_data_format == "NHWC": + strides = (1,) + strides + (1,) + else: + strides = (1, 1) + strides + + if dilation_rate == (1, 1): + x = tf.compat.v1.nn.conv2d_transpose( + x, + kernel, + output_shape, + strides, + padding=padding, + data_format=tf_data_format, + ) + else: + if dilation_rate[0] != dilation_rate[1]: + raise ValueError( + "Expected the 2 dimensions of the `dilation_rate` argument " + "to be equal to each other. " + f"Received: dilation_rate={dilation_rate}" + ) + x = tf.nn.atrous_conv2d_transpose( + x, kernel, output_shape, rate=dilation_rate[0], padding=padding + ) + if data_format == "channels_first" and tf_data_format == "NHWC": + x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW + return x + + +def separable_conv1d( + x, + depthwise_kernel, + pointwise_kernel, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, +): + """1D convolution with separable filters. + + Args: + x: input tensor + depthwise_kernel: convolution kernel for the depthwise convolution. + pointwise_kernel: kernel for the 1x1 convolution. + strides: stride integer. + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + dilation_rate: integer dilation rate. + + Returns: + Output tensor. + + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + if isinstance(strides, int): + strides = (strides,) + if isinstance(dilation_rate, int): + dilation_rate = (dilation_rate,) + + x, tf_data_format = _preprocess_conv1d_input(x, data_format) + padding = _preprocess_padding(padding) + if not isinstance(strides, tuple): + strides = tuple(strides) + if tf_data_format == "NWC": + spatial_start_dim = 1 + strides = (1,) + strides * 2 + (1,) + else: + spatial_start_dim = 2 + strides = (1, 1) + strides * 2 + x = tf.expand_dims(x, spatial_start_dim) + depthwise_kernel = tf.expand_dims(depthwise_kernel, 0) + pointwise_kernel = tf.expand_dims(pointwise_kernel, 0) + dilation_rate = (1,) + dilation_rate + + x = tf.nn.separable_conv2d( + x, + depthwise_kernel, + pointwise_kernel, + strides=strides, + padding=padding, + dilations=dilation_rate, + data_format=tf_data_format, + ) + + x = tf.squeeze(x, [spatial_start_dim]) + + if data_format == "channels_first" and tf_data_format == "NWC": + x = tf.compat.v1.transpose(x, (0, 2, 1)) # NWC -> NCW + + return x + + +@keras_export("keras.backend.separable_conv2d") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def binary_weighted_focal_crossentropy( - target, - output, - alpha=0.25, - gamma=2.0, - from_logits=False, +def separable_conv2d( + x, + depthwise_kernel, + pointwise_kernel, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), ): - """Binary weighted focal crossentropy between an output tensor and a target. - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a focal factor to down-weight easy examples and focus more on - hard examples. By default, the focal tensor is computed as follows: - - `focal_factor = (1 - output)**gamma` for class 1 - `focal_factor = output**gamma` for class 0 - where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal - effect on the binary crossentropy. - - This function also takes into account a weight balancing factor for the binary - classes 0 and 1 as follows: - - `weight = alpha` for class 1 (`target` = 1) - `weight = 1 - alpha` for class 0 - where `alpha` is a float in the range of [0, 1]. - - Args: - target: A tensor with the same shape as `output`. - output: A tensor. - alpha: A weight balancing factor for class 1, default is 0.25 as mentioned - in reference. The weight for class 0 is 1.0 - `alpha`. - gamma: A focusing parameter, default is 2.0 as mentioned in reference. - from_logits: Whether `output` is expected to be a logits tensor. By default, - we consider that `output` encodes a probability distribution. - - Returns: - A tensor. - """ - # Balancing weight for the binary classes - weight = target * alpha + (1 - target) * (1 - alpha) - - # Binary focal crossentropy - bfce = binary_focal_crossentropy( - target=target, - output=output, - gamma=gamma, - from_logits=from_logits, - ) - return weight * bfce - - -@keras_export('keras.backend.sigmoid') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def sigmoid(x): - """Element-wise sigmoid. + """2D convolution with separable filters. + + Args: + x: input tensor + depthwise_kernel: convolution kernel for the depthwise convolution. + pointwise_kernel: kernel for the 1x1 convolution. + strides: strides tuple (length 2). + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + dilation_rate: tuple of integers, + dilation rates for the separable convolution. - Args: - x: A tensor or variable. + Returns: + Output tensor. + + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + ValueError: if `strides` is not a tuple of 2 integers. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + if len(strides) != 2: + raise ValueError("`strides` must be a tuple of 2 integers.") + + x, tf_data_format = _preprocess_conv2d_input(x, data_format) + padding = _preprocess_padding(padding) + if not isinstance(strides, tuple): + strides = tuple(strides) + if tf_data_format == "NHWC": + strides = (1,) + strides + (1,) + else: + strides = (1, 1) + strides - Returns: - A tensor. - """ - return tf.sigmoid(x) + x = tf.nn.separable_conv2d( + x, + depthwise_kernel, + pointwise_kernel, + strides=strides, + padding=padding, + dilations=dilation_rate, + data_format=tf_data_format, + ) + if data_format == "channels_first" and tf_data_format == "NHWC": + x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW + return x -@keras_export('keras.backend.hard_sigmoid') +@keras_export("keras.backend.depthwise_conv2d") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def hard_sigmoid(x): - """Segment-wise linear approximation of sigmoid. +def depthwise_conv2d( + x, + depthwise_kernel, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), +): + """2D convolution with separable filters. + + Args: + x: input tensor + depthwise_kernel: convolution kernel for the depthwise convolution. + strides: strides tuple (length 2). + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + dilation_rate: tuple of integers, + dilation rates for the separable convolution. - Faster than sigmoid. - Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. - In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. + Returns: + Output tensor. - Args: - x: A tensor or variable. + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + x, tf_data_format = _preprocess_conv2d_input(x, data_format) + padding = _preprocess_padding(padding) + if tf_data_format == "NHWC": + strides = (1,) + strides + (1,) + else: + strides = (1, 1) + strides - Returns: - A tensor. - """ - point_two = _constant_to_tensor(0.2, x.dtype.base_dtype) - point_five = _constant_to_tensor(0.5, x.dtype.base_dtype) - x = tf.multiply(x, point_two) - x = tf.add(x, point_five) - x = tf.clip_by_value(x, 0., 1.) - return x + x = tf.nn.depthwise_conv2d( + x, + depthwise_kernel, + strides=strides, + padding=padding, + dilations=dilation_rate, + data_format=tf_data_format, + ) + if data_format == "channels_first" and tf_data_format == "NHWC": + x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW + return x -@keras_export('keras.backend.tanh') +@keras_export("keras.backend.conv3d") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def tanh(x): - """Element-wise tanh. +def conv3d( + x, + kernel, + strides=(1, 1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1, 1), +): + """3D convolution. - Args: - x: A tensor or variable. + Args: + x: Tensor or variable. + kernel: kernel tensor. + strides: strides tuple. + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + dilation_rate: tuple of 3 integers. - Returns: - A tensor. - """ - return tf.tanh(x) + Returns: + A tensor, result of 3D convolution. + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + x, tf_data_format = _preprocess_conv3d_input(x, data_format) + padding = _preprocess_padding(padding) + x = tf.compat.v1.nn.convolution( + input=x, + filter=kernel, + dilation_rate=dilation_rate, + strides=strides, + padding=padding, + data_format=tf_data_format, + ) + if data_format == "channels_first" and tf_data_format == "NDHWC": + x = tf.compat.v1.transpose(x, (0, 4, 1, 2, 3)) + return x -@keras_export('keras.backend.dropout') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def dropout(x, level, noise_shape=None, seed=None): - """Sets entries in `x` to zero at random, while scaling the entire tensor. - Args: - x: tensor - level: fraction of the entries in the tensor - that will be set to 0. - noise_shape: shape for randomly generated keep/drop flags, - must be broadcastable to the shape of `x` - seed: random seed to ensure determinism. +def conv3d_transpose( + x, + kernel, + output_shape, + strides=(1, 1, 1), + padding="valid", + data_format=None, +): + """3D deconvolution (i.e. - Returns: - A tensor. - """ - if seed is None: - seed = np.random.randint(10e6) - return tf.nn.dropout(x, rate=level, noise_shape=noise_shape, seed=seed) + transposed convolution). + Args: + x: input tensor. + kernel: kernel tensor. + output_shape: 1D int tensor for the output shape. + strides: strides tuple. + padding: string, "same" or "valid". + data_format: string, `"channels_last"` or `"channels_first"`. -@keras_export('keras.backend.l2_normalize') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def l2_normalize(x, axis=None): - """Normalizes a tensor wrt the L2 norm alongside the specified axis. + Returns: + A tensor, result of transposed 3D convolution. - Args: - x: Tensor or variable. - axis: axis along which to perform normalization. + Raises: + ValueError: if `data_format` is neither `channels_last` or + `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + if isinstance(output_shape, (tuple, list)): + output_shape = tf.stack(output_shape) + + x, tf_data_format = _preprocess_conv3d_input(x, data_format) + + if data_format == "channels_first" and tf_data_format == "NDHWC": + output_shape = ( + output_shape[0], + output_shape[2], + output_shape[3], + output_shape[4], + output_shape[1], + ) + if output_shape[0] is None: + output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:]) + output_shape = tf.stack(list(output_shape)) + + padding = _preprocess_padding(padding) + if tf_data_format == "NDHWC": + strides = (1,) + strides + (1,) + else: + strides = (1, 1) + strides - Returns: - A tensor. - """ - return tf.linalg.l2_normalize(x, axis=axis) + x = tf.compat.v1.nn.conv3d_transpose( + x, + kernel, + output_shape, + strides, + padding=padding, + data_format=tf_data_format, + ) + if data_format == "channels_first" and tf_data_format == "NDHWC": + x = tf.compat.v1.transpose(x, (0, 4, 1, 2, 3)) + return x -@keras_export('keras.backend.in_top_k') +@keras_export("keras.backend.pool2d") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def in_top_k(predictions, targets, k): - """Returns whether the `targets` are in the top `k` `predictions`. +def pool2d( + x, + pool_size, + strides=(1, 1), + padding="valid", + data_format=None, + pool_mode="max", +): + """2D Pooling. - Args: - predictions: A tensor of shape `(batch_size, classes)` and type `float32`. - targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. - k: An `int`, number of top elements to consider. + Args: + x: Tensor or variable. + pool_size: tuple of 2 integers. + strides: tuple of 2 integers. + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + pool_mode: string, `"max"` or `"avg"`. - Returns: - A 1D tensor of length `batch_size` and type `bool`. - `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` - values of `predictions[i]`. - """ - return tf.compat.v1.math.in_top_k(predictions, targets, k) + Returns: + A tensor, result of 2D pooling. + + Raises: + ValueError: if `data_format` is neither `"channels_last"` or + `"channels_first"`. + ValueError: if `pool_size` is not a tuple of 2 integers. + ValueError: if `strides` is not a tuple of 2 integers. + ValueError: if `pool_mode` is neither `"max"` or `"avg"`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + if len(pool_size) != 2: + raise ValueError("`pool_size` must be a tuple of 2 integers.") + if len(strides) != 2: + raise ValueError("`strides` must be a tuple of 2 integers.") + + x, tf_data_format = _preprocess_conv2d_input(x, data_format) + padding = _preprocess_padding(padding) + if tf_data_format == "NHWC": + strides = (1,) + strides + (1,) + pool_size = (1,) + pool_size + (1,) + else: + strides = (1, 1) + strides + pool_size = (1, 1) + pool_size + + if pool_mode == "max": + x = tf.compat.v1.nn.max_pool( + x, pool_size, strides, padding=padding, data_format=tf_data_format + ) + elif pool_mode == "avg": + x = tf.compat.v1.nn.avg_pool( + x, pool_size, strides, padding=padding, data_format=tf_data_format + ) + else: + raise ValueError("Invalid pooling mode: " + str(pool_mode)) + if data_format == "channels_first" and tf_data_format == "NHWC": + x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW + return x -# CONVOLUTIONS +@keras_export("keras.backend.pool3d") +@tf.__internal__.dispatch.add_dispatch_support +@doc_controls.do_not_generate_docs +def pool3d( + x, + pool_size, + strides=(1, 1, 1), + padding="valid", + data_format=None, + pool_mode="max", +): + """3D Pooling. -def _preprocess_conv1d_input(x, data_format): - """Transpose and cast the input before the conv1d. - - Args: - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - - Returns: - A tensor. - """ - tf_data_format = 'NWC' # to pass TF Conv2dNative operations - if data_format == 'channels_first': - if not _has_nchw_support(): - x = tf.compat.v1.transpose(x, (0, 2, 1)) # NCW -> NWC - else: - tf_data_format = 'NCW' - return x, tf_data_format + Args: + x: Tensor or variable. + pool_size: tuple of 3 integers. + strides: tuple of 3 integers. + padding: string, `"same"` or `"valid"`. + data_format: string, `"channels_last"` or `"channels_first"`. + pool_mode: string, `"max"` or `"avg"`. + Returns: + A tensor, result of 3D pooling. -def _preprocess_conv2d_input(x, data_format, force_transpose=False): - """Transpose and cast the input before the conv2d. - - Args: - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - force_transpose: Boolean. If True, the input will always be transposed - from NCHW to NHWC if `data_format` is `"channels_first"`. - If False, the transposition only occurs on CPU (GPU ops are - assumed to support NCHW). - - Returns: - A tensor. - """ - tf_data_format = 'NHWC' - if data_format == 'channels_first': - if not _has_nchw_support() or force_transpose: - x = tf.compat.v1.transpose(x, (0, 2, 3, 1)) # NCHW -> NHWC + Raises: + ValueError: if `data_format` is neither `"channels_last"` or + `"channels_first"`. + ValueError: if `pool_mode` is neither `"max"` or `"avg"`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + x, tf_data_format = _preprocess_conv3d_input(x, data_format) + padding = _preprocess_padding(padding) + if tf_data_format == "NDHWC": + strides = (1,) + strides + (1,) + pool_size = (1,) + pool_size + (1,) else: - tf_data_format = 'NCHW' - return x, tf_data_format - - -def _preprocess_conv3d_input(x, data_format): - """Transpose and cast the input before the conv3d. - - Args: - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - - Returns: - A tensor. - """ - tf_data_format = 'NDHWC' - if data_format == 'channels_first': - if not _has_nchw_support(): - x = tf.compat.v1.transpose(x, (0, 2, 3, 4, 1)) + strides = (1, 1) + strides + pool_size = (1, 1) + pool_size + + if pool_mode == "max": + x = tf.nn.max_pool3d( + x, pool_size, strides, padding=padding, data_format=tf_data_format + ) + elif pool_mode == "avg": + x = tf.nn.avg_pool3d( + x, pool_size, strides, padding=padding, data_format=tf_data_format + ) else: - tf_data_format = 'NCDHW' - return x, tf_data_format + raise ValueError("Invalid pooling mode: " + str(pool_mode)) + if data_format == "channels_first" and tf_data_format == "NDHWC": + x = tf.compat.v1.transpose(x, (0, 4, 1, 2, 3)) + return x -def _preprocess_padding(padding): - """Convert keras' padding to TensorFlow's padding. - Args: - padding: string, one of 'same' , 'valid' +def local_conv( + inputs, kernel, kernel_size, strides, output_shape, data_format=None +): + """Apply N-D convolution with un-shared weights. - Returns: - a string, one of 'SAME', 'VALID'. + Args: + inputs: (N+2)-D tensor with shape + (batch_size, channels_in, d_in1, ..., d_inN) + if data_format='channels_first', or + (batch_size, d_in1, ..., d_inN, channels_in) + if data_format='channels_last'. + kernel: the unshared weight for N-D convolution, + with shape (output_items, feature_dim, channels_out), where + feature_dim = np.prod(kernel_size) * channels_in, + output_items = np.prod(output_shape). + kernel_size: a tuple of N integers, specifying the + spatial dimensions of the N-D convolution window. + strides: a tuple of N integers, specifying the strides + of the convolution along the spatial dimensions. + output_shape: a tuple of (d_out1, ..., d_outN) specifying the spatial + dimensionality of the output. + data_format: string, "channels_first" or "channels_last". - Raises: - ValueError: if invalid `padding'` - """ - if padding == 'same': - padding = 'SAME' - elif padding == 'valid': - padding = 'VALID' - else: - raise ValueError('Invalid padding: ' + str(padding)) - return padding + Returns: + An (N+2)-D tensor with shape: + (batch_size, channels_out) + output_shape + if data_format='channels_first', or: + (batch_size,) + output_shape + (channels_out,) + if data_format='channels_last'. + + Raises: + ValueError: if `data_format` is neither + `channels_last` nor `channels_first`. + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + + kernel_shape = int_shape(kernel) + feature_dim = kernel_shape[1] + channels_out = kernel_shape[-1] + ndims = len(output_shape) + spatial_dimensions = list(range(ndims)) + + xs = [] + output_axes_ticks = [range(axis_max) for axis_max in output_shape] + for position in itertools.product(*output_axes_ticks): + slices = [slice(None)] + + if data_format == "channels_first": + slices.append(slice(None)) + + slices.extend( + slice( + position[d] * strides[d], + position[d] * strides[d] + kernel_size[d], + ) + for d in spatial_dimensions + ) + + if data_format == "channels_last": + slices.append(slice(None)) + + xs.append(reshape(inputs[slices], (1, -1, feature_dim))) + + x_aggregate = concatenate(xs, axis=0) + output = batch_dot(x_aggregate, kernel) + output = reshape(output, output_shape + (-1, channels_out)) + + if data_format == "channels_first": + permutation = [ndims, ndims + 1] + spatial_dimensions + else: + permutation = [ndims] + spatial_dimensions + [ndims + 1] + return permute_dimensions(output, permutation) -@keras_export('keras.backend.conv1d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def conv1d(x, - kernel, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1): - """1D convolution. - - Args: - x: Tensor or variable. - kernel: kernel tensor. - strides: stride integer. - padding: string, `"same"`, `"causal"` or `"valid"`. - data_format: string, one of "channels_last", "channels_first". - dilation_rate: integer dilate rate. - - Returns: - A tensor, result of 1D convolution. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - kernel_shape = kernel.shape.as_list() - if padding == 'causal': - # causal (dilated) convolution: - left_pad = dilation_rate * (kernel_shape[0] - 1) - x = temporal_padding(x, (left_pad, 0)) - padding = 'valid' - padding = _preprocess_padding(padding) - - x, tf_data_format = _preprocess_conv1d_input(x, data_format) - x = tf.compat.v1.nn.convolution( - input=x, - filter=kernel, - dilation_rate=dilation_rate, - strides=strides, - padding=padding, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NWC': - x = tf.compat.v1.transpose(x, (0, 2, 1)) # NWC -> NCW - return x - - -@keras_export('keras.backend.conv2d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def conv2d(x, - kernel, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1)): - """2D convolution. - - Args: - x: Tensor or variable. - kernel: kernel tensor. - strides: strides tuple. - padding: string, `"same"` or `"valid"`. - data_format: `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of 2 integers. - - Returns: - A tensor, result of 2D convolution. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - x = tf.compat.v1.nn.convolution( - input=x, - filter=kernel, - dilation_rate=dilation_rate, - strides=strides, - padding=padding, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -@keras_export('keras.backend.conv2d_transpose') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def conv2d_transpose(x, - kernel, - output_shape, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1)): - """2D deconvolution (i.e. - - transposed convolution). - - Args: - x: Tensor or variable. - kernel: kernel tensor. - output_shape: 1D int tensor for the output shape. - strides: strides tuple. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: Tuple of 2 integers. - - Returns: - A tensor, result of transposed 2D convolution. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - # `atrous_conv2d_transpose` only supports NHWC format, even on GPU. - if data_format == 'channels_first' and dilation_rate != (1, 1): - force_transpose = True - else: - force_transpose = False - - x, tf_data_format = _preprocess_conv2d_input(x, data_format, force_transpose) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - output_shape = (output_shape[0], output_shape[2], output_shape[3], - output_shape[1]) - if output_shape[0] is None: - output_shape = (shape(x)[0],) + tuple(output_shape[1:]) - - if isinstance(output_shape, (tuple, list)): - output_shape = tf.stack(list(output_shape)) - - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - if dilation_rate == (1, 1): - x = tf.compat.v1.nn.conv2d_transpose(x, kernel, output_shape, strides, - padding=padding, - data_format=tf_data_format) - else: - if dilation_rate[0] != dilation_rate[1]: - raise ValueError( - 'Expected the 2 dimensions of the `dilation_rate` argument ' - 'to be equal to each other. ' - f'Received: dilation_rate={dilation_rate}' - ) - x = tf.nn.atrous_conv2d_transpose( - x, - kernel, - output_shape, - rate=dilation_rate[0], - padding=padding) - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -def separable_conv1d(x, - depthwise_kernel, - pointwise_kernel, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1): - """1D convolution with separable filters. - - Args: - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - pointwise_kernel: kernel for the 1x1 convolution. - strides: stride integer. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: integer dilation rate. - - Returns: - Output tensor. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - if isinstance(strides, int): - strides = (strides,) - if isinstance(dilation_rate, int): - dilation_rate = (dilation_rate,) - - x, tf_data_format = _preprocess_conv1d_input(x, data_format) - padding = _preprocess_padding(padding) - if not isinstance(strides, tuple): - strides = tuple(strides) - if tf_data_format == 'NWC': - spatial_start_dim = 1 - strides = (1,) + strides * 2 + (1,) - else: - spatial_start_dim = 2 - strides = (1, 1) + strides * 2 - x = tf.expand_dims(x, spatial_start_dim) - depthwise_kernel = tf.expand_dims(depthwise_kernel, 0) - pointwise_kernel = tf.expand_dims(pointwise_kernel, 0) - dilation_rate = (1,) + dilation_rate - - x = tf.compat.v1.nn.separable_conv2d( - x, - depthwise_kernel, - pointwise_kernel, - strides=strides, - padding=padding, - rate=dilation_rate, - data_format=tf_data_format) - - x = tf.squeeze(x, [spatial_start_dim]) - - if data_format == 'channels_first' and tf_data_format == 'NWC': - x = tf.compat.v1.transpose(x, (0, 2, 1)) # NWC -> NCW - - return x - - -@keras_export('keras.backend.separable_conv2d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def separable_conv2d(x, - depthwise_kernel, - pointwise_kernel, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1)): - """2D convolution with separable filters. - - Args: - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - pointwise_kernel: kernel for the 1x1 convolution. - strides: strides tuple (length 2). - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of integers, - dilation rates for the separable convolution. - - Returns: - Output tensor. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - ValueError: if `strides` is not a tuple of 2 integers. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - if len(strides) != 2: - raise ValueError('`strides` must be a tuple of 2 integers.') - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - if not isinstance(strides, tuple): - strides = tuple(strides) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - x = tf.compat.v1.nn.separable_conv2d( - x, - depthwise_kernel, - pointwise_kernel, - strides=strides, - padding=padding, - rate=dilation_rate, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -@keras_export('keras.backend.depthwise_conv2d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def depthwise_conv2d(x, - depthwise_kernel, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1)): - """2D convolution with separable filters. - - Args: - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - strides: strides tuple (length 2). - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of integers, - dilation rates for the separable convolution. - - Returns: - Output tensor. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - x = tf.compat.v1.nn.depthwise_conv2d( - x, - depthwise_kernel, - strides=strides, - padding=padding, - rate=dilation_rate, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -@keras_export('keras.backend.conv3d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def conv3d(x, - kernel, - strides=(1, 1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1, 1)): - """3D convolution. - - Args: - x: Tensor or variable. - kernel: kernel tensor. - strides: strides tuple. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of 3 integers. - - Returns: - A tensor, result of 3D convolution. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - x, tf_data_format = _preprocess_conv3d_input(x, data_format) - padding = _preprocess_padding(padding) - x = tf.compat.v1.nn.convolution( - input=x, - filter=kernel, - dilation_rate=dilation_rate, - strides=strides, - padding=padding, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - x = tf.compat.v1.transpose(x, (0, 4, 1, 2, 3)) - return x - - -def conv3d_transpose(x, - kernel, - output_shape, - strides=(1, 1, 1), - padding='valid', - data_format=None): - """3D deconvolution (i.e. - - transposed convolution). - - Args: - x: input tensor. - kernel: kernel tensor. - output_shape: 1D int tensor for the output shape. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: string, `"channels_last"` or `"channels_first"`. - - Returns: - A tensor, result of transposed 3D convolution. - - Raises: - ValueError: if `data_format` is neither `channels_last` or - `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - if isinstance(output_shape, (tuple, list)): - output_shape = tf.stack(output_shape) - - x, tf_data_format = _preprocess_conv3d_input(x, data_format) - - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - output_shape = (output_shape[0], output_shape[2], output_shape[3], - output_shape[4], output_shape[1]) - if output_shape[0] is None: - output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:]) - output_shape = tf.stack(list(output_shape)) - - padding = _preprocess_padding(padding) - if tf_data_format == 'NDHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - x = tf.compat.v1.nn.conv3d_transpose( - x, - kernel, - output_shape, - strides, - padding=padding, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - x = tf.compat.v1.transpose(x, (0, 4, 1, 2, 3)) - return x - - -@keras_export('keras.backend.pool2d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def pool2d(x, - pool_size, - strides=(1, 1), - padding='valid', - data_format=None, - pool_mode='max'): - """2D Pooling. - - Args: - x: Tensor or variable. - pool_size: tuple of 2 integers. - strides: tuple of 2 integers. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - pool_mode: string, `"max"` or `"avg"`. - - Returns: - A tensor, result of 2D pooling. - - Raises: - ValueError: if `data_format` is neither `"channels_last"` or - `"channels_first"`. - ValueError: if `pool_size` is not a tuple of 2 integers. - ValueError: if `strides` is not a tuple of 2 integers. - ValueError: if `pool_mode` is neither `"max"` or `"avg"`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - if len(pool_size) != 2: - raise ValueError('`pool_size` must be a tuple of 2 integers.') - if len(strides) != 2: - raise ValueError('`strides` must be a tuple of 2 integers.') - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - pool_size = (1,) + pool_size + (1,) - else: - strides = (1, 1) + strides - pool_size = (1, 1) + pool_size - - if pool_mode == 'max': - x = tf.compat.v1.nn.max_pool( - x, pool_size, strides, padding=padding, data_format=tf_data_format) - elif pool_mode == 'avg': - x = tf.compat.v1.nn.avg_pool( - x, pool_size, strides, padding=padding, data_format=tf_data_format) - else: - raise ValueError('Invalid pooling mode: ' + str(pool_mode)) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.compat.v1.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -@keras_export('keras.backend.pool3d') -@tf.__internal__.dispatch.add_dispatch_support -@doc_controls.do_not_generate_docs -def pool3d(x, - pool_size, - strides=(1, 1, 1), - padding='valid', - data_format=None, - pool_mode='max'): - """3D Pooling. - - Args: - x: Tensor or variable. - pool_size: tuple of 3 integers. - strides: tuple of 3 integers. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - pool_mode: string, `"max"` or `"avg"`. - - Returns: - A tensor, result of 3D pooling. - - Raises: - ValueError: if `data_format` is neither `"channels_last"` or - `"channels_first"`. - ValueError: if `pool_mode` is neither `"max"` or `"avg"`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - x, tf_data_format = _preprocess_conv3d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NDHWC': - strides = (1,) + strides + (1,) - pool_size = (1,) + pool_size + (1,) - else: - strides = (1, 1) + strides - pool_size = (1, 1) + pool_size - - if pool_mode == 'max': - x = tf.nn.max_pool3d( - x, pool_size, strides, padding=padding, data_format=tf_data_format) - elif pool_mode == 'avg': - x = tf.nn.avg_pool3d( - x, pool_size, strides, padding=padding, data_format=tf_data_format) - else: - raise ValueError('Invalid pooling mode: ' + str(pool_mode)) - - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - x = tf.compat.v1.transpose(x, (0, 4, 1, 2, 3)) - return x - - -def local_conv(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format=None): - """Apply N-D convolution with un-shared weights. - - Args: - inputs: (N+2)-D tensor with shape - (batch_size, channels_in, d_in1, ..., d_inN) - if data_format='channels_first', or - (batch_size, d_in1, ..., d_inN, channels_in) - if data_format='channels_last'. - kernel: the unshared weight for N-D convolution, - with shape (output_items, feature_dim, channels_out), where - feature_dim = np.prod(kernel_size) * channels_in, - output_items = np.prod(output_shape). - kernel_size: a tuple of N integers, specifying the - spatial dimensions of the N-D convolution window. - strides: a tuple of N integers, specifying the strides - of the convolution along the spatial dimensions. - output_shape: a tuple of (d_out1, ..., d_outN) specifying the spatial - dimensionality of the output. - data_format: string, "channels_first" or "channels_last". - - Returns: - An (N+2)-D tensor with shape: - (batch_size, channels_out) + output_shape - if data_format='channels_first', or: - (batch_size,) + output_shape + (channels_out,) - if data_format='channels_last'. - - Raises: - ValueError: if `data_format` is neither - `channels_last` nor `channels_first`. - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - - kernel_shape = int_shape(kernel) - feature_dim = kernel_shape[1] - channels_out = kernel_shape[-1] - ndims = len(output_shape) - spatial_dimensions = list(range(ndims)) - - xs = [] - output_axes_ticks = [range(axis_max) for axis_max in output_shape] - for position in itertools.product(*output_axes_ticks): - slices = [slice(None)] - - if data_format == 'channels_first': - slices.append(slice(None)) - - slices.extend( - slice(position[d] * strides[d], position[d] * strides[d] + - kernel_size[d]) for d in spatial_dimensions) - - if data_format == 'channels_last': - slices.append(slice(None)) - - xs.append(reshape(inputs[slices], (1, -1, feature_dim))) - - x_aggregate = concatenate(xs, axis=0) - output = batch_dot(x_aggregate, kernel) - output = reshape(output, output_shape + (-1, channels_out)) - - if data_format == 'channels_first': - permutation = [ndims, ndims + 1] + spatial_dimensions - else: - permutation = [ndims] + spatial_dimensions + [ndims + 1] - - return permute_dimensions(output, permutation) - - -@keras_export('keras.backend.local_conv1d') + +@keras_export("keras.backend.local_conv1d") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): - """Apply 1D conv with un-shared weights. - - Args: - inputs: 3D tensor with shape: - (batch_size, steps, input_dim) - if data_format is "channels_last" or - (batch_size, input_dim, steps) - if data_format is "channels_first". - kernel: the unshared weight for convolution, - with shape (output_length, feature_dim, filters). - kernel_size: a tuple of a single integer, - specifying the length of the 1D convolution window. - strides: a tuple of a single integer, - specifying the stride length of the convolution. - data_format: the data format, channels_first or channels_last. - - Returns: - A 3d tensor with shape: - (batch_size, output_length, filters) - if data_format='channels_first' - or 3D tensor with shape: - (batch_size, filters, output_length) - if data_format='channels_last'. - """ - output_shape = (kernel.shape[0],) - return local_conv(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format) - - -@keras_export('keras.backend.local_conv2d') + """Apply 1D conv with un-shared weights. + + Args: + inputs: 3D tensor with shape: + (batch_size, steps, input_dim) + if data_format is "channels_last" or + (batch_size, input_dim, steps) + if data_format is "channels_first". + kernel: the unshared weight for convolution, + with shape (output_length, feature_dim, filters). + kernel_size: a tuple of a single integer, + specifying the length of the 1D convolution window. + strides: a tuple of a single integer, + specifying the stride length of the convolution. + data_format: the data format, channels_first or channels_last. + + Returns: + A 3d tensor with shape: + (batch_size, output_length, filters) + if data_format='channels_first' + or 3D tensor with shape: + (batch_size, filters, output_length) + if data_format='channels_last'. + """ + output_shape = (kernel.shape[0],) + return local_conv( + inputs, kernel, kernel_size, strides, output_shape, data_format + ) + + +@keras_export("keras.backend.local_conv2d") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs -def local_conv2d(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format=None): - """Apply 2D conv with un-shared weights. - - Args: - inputs: 4D tensor with shape: - (batch_size, filters, new_rows, new_cols) - if data_format='channels_first' - or 4D tensor with shape: - (batch_size, new_rows, new_cols, filters) - if data_format='channels_last'. - kernel: the unshared weight for convolution, - with shape (output_items, feature_dim, filters). - kernel_size: a tuple of 2 integers, specifying the - width and height of the 2D convolution window. - strides: a tuple of 2 integers, specifying the strides - of the convolution along the width and height. - output_shape: a tuple with (output_row, output_col). - data_format: the data format, channels_first or channels_last. - - Returns: - A 4D tensor with shape: - (batch_size, filters, new_rows, new_cols) - if data_format='channels_first' - or 4D tensor with shape: - (batch_size, new_rows, new_cols, filters) - if data_format='channels_last'. - """ - return local_conv(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format) - - -@keras_export('keras.backend.bias_add') +def local_conv2d( + inputs, kernel, kernel_size, strides, output_shape, data_format=None +): + """Apply 2D conv with un-shared weights. + + Args: + inputs: 4D tensor with shape: + (batch_size, filters, new_rows, new_cols) + if data_format='channels_first' + or 4D tensor with shape: + (batch_size, new_rows, new_cols, filters) + if data_format='channels_last'. + kernel: the unshared weight for convolution, + with shape (output_items, feature_dim, filters). + kernel_size: a tuple of 2 integers, specifying the + width and height of the 2D convolution window. + strides: a tuple of 2 integers, specifying the strides + of the convolution along the width and height. + output_shape: a tuple with (output_row, output_col). + data_format: the data format, channels_first or channels_last. + + Returns: + A 4D tensor with shape: + (batch_size, filters, new_rows, new_cols) + if data_format='channels_first' + or 4D tensor with shape: + (batch_size, new_rows, new_cols, filters) + if data_format='channels_last'. + """ + return local_conv( + inputs, kernel, kernel_size, strides, output_shape, data_format + ) + + +@keras_export("keras.backend.bias_add") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def bias_add(x, bias, data_format=None): - """Adds a bias vector to a tensor. - - Args: - x: Tensor or variable. - bias: Bias tensor to add. - data_format: string, `"channels_last"` or `"channels_first"`. - - Returns: - Output tensor. - - Raises: - ValueError: In one of the two cases below: - 1. invalid `data_format` argument. - 2. invalid bias shape. - the bias should be either a vector or - a tensor with ndim(x) - 1 dimension - """ - if data_format is None: - data_format = image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: ' + str(data_format)) - bias_shape = int_shape(bias) - if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1: - raise ValueError( - 'Unexpected bias dimensions %d, expect to be 1 or %d dimensions' % - (len(bias_shape), ndim(x) - 1)) - - if len(bias_shape) == 1: - if data_format == 'channels_first': - return tf.nn.bias_add(x, bias, data_format='NCHW') - return tf.nn.bias_add(x, bias, data_format='NHWC') - if ndim(x) in (3, 4, 5): - if data_format == 'channels_first': - bias_reshape_axis = (1, bias_shape[-1]) + bias_shape[:-1] - return x + reshape(bias, bias_reshape_axis) - return x + reshape(bias, (1,) + bias_shape) - return tf.nn.bias_add(x, bias) + """Adds a bias vector to a tensor. + + Args: + x: Tensor or variable. + bias: Bias tensor to add. + data_format: string, `"channels_last"` or `"channels_first"`. + + Returns: + Output tensor. + + Raises: + ValueError: In one of the two cases below: + 1. invalid `data_format` argument. + 2. invalid bias shape. + the bias should be either a vector or + a tensor with ndim(x) - 1 dimension + """ + if data_format is None: + data_format = image_data_format() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError("Unknown data_format: " + str(data_format)) + bias_shape = int_shape(bias) + if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1: + raise ValueError( + "Unexpected bias dimensions %d, expect to be 1 or %d dimensions" + % (len(bias_shape), ndim(x) - 1) + ) + + if len(bias_shape) == 1: + if data_format == "channels_first": + return tf.nn.bias_add(x, bias, data_format="NCHW") + return tf.nn.bias_add(x, bias, data_format="NHWC") + if ndim(x) in (3, 4, 5): + if data_format == "channels_first": + bias_reshape_axis = (1, bias_shape[-1]) + bias_shape[:-1] + return x + reshape(bias, bias_reshape_axis) + return x + reshape(bias, (1,) + bias_shape) + return tf.nn.bias_add(x, bias) # RANDOMNESS -@keras_export('keras.backend.random_normal') +@keras_export("keras.backend.random_normal") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - """Returns a tensor with normal distribution of values. - - It is an alias to `tf.random.normal`. - - Args: - shape: A tuple of integers, the shape of tensor to create. - mean: A float, the mean value of the normal distribution to draw samples. - Default to 0.0. - stddev: A float, the standard deviation of the normal distribution - to draw samples. Default to 1.0. - dtype: `tf.dtypes.DType`, dtype of returned tensor. Default to use Keras - backend dtype which is float32. - seed: Integer, random seed. Will use a random numpy integer when not - specified. - - Returns: - A tensor with normal distribution of values. - - Example: - - >>> random_normal_tensor = tf.keras.backend.random_normal(shape=(2,3), - ... mean=0.0, stddev=1.0) - >>> random_normal_tensor - - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.random.normal( - shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed) - - -@keras_export('keras.backend.random_uniform') + """Returns a tensor with normal distribution of values. + + It is an alias to `tf.random.normal`. + + Args: + shape: A tuple of integers, the shape of tensor to create. + mean: A float, the mean value of the normal distribution to draw + samples. Defaults to `0.0`. + stddev: A float, the standard deviation of the normal distribution + to draw samples. Defaults to `1.0`. + dtype: `tf.dtypes.DType`, dtype of returned tensor. None uses Keras + backend dtype which is float32. Defaults to `None`. + seed: Integer, random seed. Will use a random numpy integer when not + specified. + + Returns: + A tensor with normal distribution of values. + + Example: + + >>> random_normal_tensor = tf.keras.backend.random_normal(shape=(2,3), + ... mean=0.0, stddev=1.0) + >>> random_normal_tensor + + """ + if dtype is None: + dtype = floatx() + if seed is None: + seed = np.random.randint(10e6) + return tf.random.normal( + shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed + ) + + +@keras_export("keras.backend.random_uniform") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): - """Returns a tensor with uniform distribution of values. - - Args: - shape: A tuple of integers, the shape of tensor to create. - minval: A float, lower boundary of the uniform distribution - to draw samples. - maxval: A float, upper boundary of the uniform distribution - to draw samples. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - Returns: - A tensor. - - Example: - - >>> random_uniform_tensor = tf.keras.backend.random_uniform(shape=(2,3), - ... minval=0.0, maxval=1.0) - >>> random_uniform_tensor - - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.random.uniform( - shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed) - - -@keras_export('keras.backend.random_binomial') + """Returns a tensor with uniform distribution of values. + + Args: + shape: A tuple of integers, the shape of tensor to create. + minval: A float, lower boundary of the uniform distribution + to draw samples. + maxval: A float, upper boundary of the uniform distribution + to draw samples. + dtype: String, dtype of returned tensor. + seed: Integer, random seed. + + Returns: + A tensor. + + Example: + + >>> random_uniform_tensor = tf.keras.backend.random_uniform(shape=(2,3), + ... minval=0.0, maxval=1.0) + >>> random_uniform_tensor + + """ + if dtype is None: + dtype = floatx() + if seed is None: + seed = np.random.randint(10e6) + return tf.random.uniform( + shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed + ) + + +@keras_export("keras.backend.random_binomial") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def random_binomial(shape, p=0.0, dtype=None, seed=None): - """Returns a tensor with random binomial distribution of values. + """Returns a tensor with random binomial distribution of values. - DEPRECATED, use `tf.keras.backend.random_bernoulli` instead. + DEPRECATED, use `tf.keras.backend.random_bernoulli` instead. - The binomial distribution with parameters `n` and `p` is the probability - distribution of the number of successful Bernoulli process. Only supports - `n` = 1 for now. + The binomial distribution with parameters `n` and `p` is the probability + distribution of the number of successful Bernoulli process. Only supports + `n` = 1 for now. - Args: - shape: A tuple of integers, the shape of tensor to create. - p: A float, `0. <= p <= 1`, probability of binomial distribution. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. + Args: + shape: A tuple of integers, the shape of tensor to create. + p: A float, `0. <= p <= 1`, probability of binomial distribution. + dtype: String, dtype of returned tensor. + seed: Integer, random seed. - Returns: - A tensor. + Returns: + A tensor. - Example: + Example: - >>> random_binomial_tensor = tf.keras.backend.random_binomial(shape=(2,3), - ... p=0.5) - >>> random_binomial_tensor - - """ - warnings.warn( - '`tf.keras.backend.random_binomial` is deprecated, ' - 'and will be removed in a future version.' - 'Please use `tf.keras.backend.random_bernoulli` instead.', - stacklevel=2) - return random_bernoulli(shape, p, dtype, seed) + >>> random_binomial_tensor = tf.keras.backend.random_binomial(shape=(2,3), + ... p=0.5) + >>> random_binomial_tensor + + """ + warnings.warn( + "`tf.keras.backend.random_binomial` is deprecated, " + "and will be removed in a future version." + "Please use `tf.keras.backend.random_bernoulli` instead.", + stacklevel=2, + ) + return random_bernoulli(shape, p, dtype, seed) -@keras_export('keras.backend.random_bernoulli') +@keras_export("keras.backend.random_bernoulli") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def random_bernoulli(shape, p=0.0, dtype=None, seed=None): - """Returns a tensor with random bernoulli distribution of values. - - Args: - shape: A tuple of integers, the shape of tensor to create. - p: A float, `0. <= p <= 1`, probability of bernoulli distribution. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - Returns: - A tensor. - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.where( - tf.random.uniform(shape, dtype=dtype, seed=seed) <= p, - tf.ones(shape, dtype=dtype), tf.zeros(shape, dtype=dtype)) - - -@keras_export('keras.backend.truncated_normal') + """Returns a tensor with random bernoulli distribution of values. + + Args: + shape: A tuple of integers, the shape of tensor to create. + p: A float, `0. <= p <= 1`, probability of bernoulli distribution. + dtype: String, dtype of returned tensor. + seed: Integer, random seed. + + Returns: + A tensor. + """ + if dtype is None: + dtype = floatx() + if seed is None: + seed = np.random.randint(10e6) + return tf.where( + tf.random.uniform(shape, dtype=dtype, seed=seed) <= p, + tf.ones(shape, dtype=dtype), + tf.zeros(shape, dtype=dtype), + ) + + +@keras_export("keras.backend.truncated_normal") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - """Returns a tensor with truncated random normal distribution of values. - - The generated values follow a normal distribution - with specified mean and standard deviation, - except that values whose magnitude is more than - two standard deviations from the mean are dropped and re-picked. - - Args: - shape: A tuple of integers, the shape of tensor to create. - mean: Mean of the values. - stddev: Standard deviation of the values. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - Returns: - A tensor. - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.random.truncated_normal( - shape, mean, stddev, dtype=dtype, seed=seed) + """Returns a tensor with truncated random normal distribution of values. + + The generated values follow a normal distribution + with specified mean and standard deviation, + except that values whose magnitude is more than + two standard deviations from the mean are dropped and re-picked. + + Args: + shape: A tuple of integers, the shape of tensor to create. + mean: Mean of the values. + stddev: Standard deviation of the values. + dtype: String, dtype of returned tensor. + seed: Integer, random seed. + + Returns: + A tensor. + """ + if dtype is None: + dtype = floatx() + if seed is None: + seed = np.random.randint(10e6) + return tf.random.truncated_normal( + shape, mean, stddev, dtype=dtype, seed=seed + ) # CTC @@ -6661,472 +7084,484 @@ def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): # in TensorFlow's CTC implementation -@keras_export('keras.backend.ctc_label_dense_to_sparse') +@keras_export("keras.backend.ctc_label_dense_to_sparse") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def ctc_label_dense_to_sparse(labels, label_lengths): - """Converts CTC labels from dense to sparse. - - Args: - labels: dense CTC labels. - label_lengths: length of the labels. - - Returns: - A sparse tensor representation of the labels. - """ - label_shape = tf.shape(labels) - num_batches_tns = tf.stack([label_shape[0]]) - max_num_labels_tns = tf.stack([label_shape[1]]) + """Converts CTC labels from dense to sparse. - def range_less_than(old_input, current_input): - return tf.expand_dims( - tf.range(tf.shape(old_input)[1]), 0) < tf.fill( - max_num_labels_tns, current_input) + Args: + labels: dense CTC labels. + label_lengths: length of the labels. - init = tf.cast( - tf.fill([1, label_shape[1]], 0), tf.bool) - dense_mask = tf.compat.v1.scan( - range_less_than, label_lengths, initializer=init, parallel_iterations=1) - dense_mask = dense_mask[:, 0, :] + Returns: + A sparse tensor representation of the labels. + """ + label_shape = tf.shape(labels) + num_batches_tns = tf.stack([label_shape[0]]) + max_num_labels_tns = tf.stack([label_shape[1]]) + + def range_less_than(old_input, current_input): + return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( + max_num_labels_tns, current_input + ) + + init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) + dense_mask = tf.compat.v1.scan( + range_less_than, label_lengths, initializer=init, parallel_iterations=1 + ) + dense_mask = dense_mask[:, 0, :] - label_array = tf.reshape( - tf.tile(tf.range(0, label_shape[1]), num_batches_tns), - label_shape) - label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) + label_array = tf.reshape( + tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape + ) + label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) - batch_array = tf.compat.v1.transpose( - tf.reshape( - tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), - reverse(label_shape, 0))) - batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) - indices = tf.compat.v1.transpose( - tf.reshape(concatenate([batch_ind, label_ind], axis=0), [2, -1])) + batch_array = tf.compat.v1.transpose( + tf.reshape( + tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), + reverse(label_shape, 0), + ) + ) + batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) + indices = tf.compat.v1.transpose( + tf.reshape(concatenate([batch_ind, label_ind], axis=0), [2, -1]) + ) - vals_sparse = tf.compat.v1.gather_nd(labels, indices) + vals_sparse = tf.compat.v1.gather_nd(labels, indices) - return tf.SparseTensor( - tf.cast(indices, tf.int64), vals_sparse, - tf.cast(label_shape, tf.int64)) + return tf.SparseTensor( + tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) + ) -@keras_export('keras.backend.ctc_batch_cost') +@keras_export("keras.backend.ctc_batch_cost") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def ctc_batch_cost(y_true, y_pred, input_length, label_length): - """Runs CTC loss algorithm on each batch element. - - Args: - y_true: tensor `(samples, max_string_length)` - containing the truth labels. - y_pred: tensor `(samples, time_steps, num_categories)` - containing the prediction, or output of the softmax. - input_length: tensor `(samples, 1)` containing the sequence length for - each batch item in `y_pred`. - label_length: tensor `(samples, 1)` containing the sequence length for - each batch item in `y_true`. - - Returns: - Tensor with shape (samples,1) containing the - CTC loss of each element. - """ - label_length = tf.cast( - tf.squeeze(label_length, axis=-1), tf.int32) - input_length = tf.cast( - tf.squeeze(input_length, axis=-1), tf.int32) - sparse_labels = tf.cast( - ctc_label_dense_to_sparse(y_true, label_length), tf.int32) - - y_pred = tf.math.log(tf.compat.v1.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) - - return tf.expand_dims( - tf.compat.v1.nn.ctc_loss( - inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1) - - -@keras_export('keras.backend.ctc_decode') + """Runs CTC loss algorithm on each batch element. + + Args: + y_true: tensor `(samples, max_string_length)` + containing the truth labels. + y_pred: tensor `(samples, time_steps, num_categories)` + containing the prediction, or output of the softmax. + input_length: tensor `(samples, 1)` containing the sequence length for + each batch item in `y_pred`. + label_length: tensor `(samples, 1)` containing the sequence length for + each batch item in `y_true`. + + Returns: + Tensor with shape (samples,1) containing the + CTC loss of each element. + """ + label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) + input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) + sparse_labels = tf.cast( + ctc_label_dense_to_sparse(y_true, label_length), tf.int32 + ) + + y_pred = tf.math.log( + tf.compat.v1.transpose(y_pred, perm=[1, 0, 2]) + epsilon() + ) + + return tf.expand_dims( + tf.compat.v1.nn.ctc_loss( + inputs=y_pred, labels=sparse_labels, sequence_length=input_length + ), + 1, + ) + + +@keras_export("keras.backend.ctc_decode") @tf.__internal__.dispatch.add_dispatch_support @doc_controls.do_not_generate_docs def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): - """Decodes the output of a softmax. - - Can use either greedy search (also known as best path) - or a constrained dictionary search. - - Args: - y_pred: tensor `(samples, time_steps, num_categories)` - containing the prediction, or output of the softmax. - input_length: tensor `(samples, )` containing the sequence length for - each batch item in `y_pred`. - greedy: perform much faster best-path search if `true`. - This does not use a dictionary. - beam_width: if `greedy` is `false`: a beam search decoder will be used - with a beam of this width. - top_paths: if `greedy` is `false`, - how many of the most probable paths will be returned. - - Returns: - Tuple: - List: if `greedy` is `true`, returns a list of one element that - contains the decoded sequence. - If `false`, returns the `top_paths` most probable - decoded sequences. - Each decoded sequence has shape (samples, time_steps). - Important: blank labels are returned as `-1`. - Tensor `(top_paths, )` that contains - the log probability of each decoded sequence. - """ - input_shape = shape(y_pred) - num_samples, num_steps = input_shape[0], input_shape[1] - y_pred = tf.math.log(tf.compat.v1.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) - input_length = tf.cast(input_length, tf.int32) - - if greedy: - (decoded, log_prob) = tf.nn.ctc_greedy_decoder( - inputs=y_pred, sequence_length=input_length) - else: - (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder( - inputs=y_pred, - sequence_length=input_length, - beam_width=beam_width, - top_paths=top_paths) - decoded_dense = [] - for st in decoded: - st = tf.SparseTensor( - st.indices, st.values, (num_samples, num_steps)) - decoded_dense.append( - tf.sparse.to_dense(sp_input=st, default_value=-1)) - return (decoded_dense, log_prob) + """Decodes the output of a softmax. + + Can use either greedy search (also known as best path) + or a constrained dictionary search. + + Args: + y_pred: tensor `(samples, time_steps, num_categories)` + containing the prediction, or output of the softmax. + input_length: tensor `(samples, )` containing the sequence length for + each batch item in `y_pred`. + greedy: perform much faster best-path search if `true`. + This does not use a dictionary. + beam_width: if `greedy` is `false`: a beam search decoder will be used + with a beam of this width. + top_paths: if `greedy` is `false`, + how many of the most probable paths will be returned. + + Returns: + Tuple: + List: if `greedy` is `true`, returns a list of one element that + contains the decoded sequence. + If `false`, returns the `top_paths` most probable + decoded sequences. + Each decoded sequence has shape (samples, time_steps). + Important: blank labels are returned as `-1`. + Tensor `(top_paths, )` that contains + the log probability of each decoded sequence. + """ + input_shape = shape(y_pred) + num_samples, num_steps = input_shape[0], input_shape[1] + y_pred = tf.math.log( + tf.compat.v1.transpose(y_pred, perm=[1, 0, 2]) + epsilon() + ) + input_length = tf.cast(input_length, tf.int32) + + if greedy: + (decoded, log_prob) = tf.nn.ctc_greedy_decoder( + inputs=y_pred, sequence_length=input_length + ) + else: + (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder( + inputs=y_pred, + sequence_length=input_length, + beam_width=beam_width, + top_paths=top_paths, + ) + decoded_dense = [] + for st in decoded: + st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps)) + decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1)) + return (decoded_dense, log_prob) # HIGH ORDER FUNCTIONS -@keras_export('keras.backend.map_fn') +@keras_export("keras.backend.map_fn") @doc_controls.do_not_generate_docs def map_fn(fn, elems, name=None, dtype=None): - """Map the function fn over the elements elems and return the outputs. + """Map the function fn over the elements elems and return the outputs. - Args: - fn: Callable that will be called upon each element in elems - elems: tensor - name: A string name for the map node in the graph - dtype: Output data type. + Args: + fn: Callable that will be called upon each element in elems + elems: tensor + name: A string name for the map node in the graph + dtype: Output data type. - Returns: - Tensor with dtype `dtype`. - """ - return tf.compat.v1.map_fn(fn, elems, name=name, dtype=dtype) + Returns: + Tensor with dtype `dtype`. + """ + return tf.compat.v1.map_fn(fn, elems, name=name, dtype=dtype) -@keras_export('keras.backend.foldl') +@keras_export("keras.backend.foldl") @doc_controls.do_not_generate_docs def foldl(fn, elems, initializer=None, name=None): - """Reduce elems using fn to combine them from left to right. + """Reduce elems using fn to combine them from left to right. - Args: - fn: Callable that will be called upon each element in elems and an - accumulator, for instance `lambda acc, x: acc + x` - elems: tensor - initializer: The first value used (`elems[0]` in case of None) - name: A string name for the foldl node in the graph + Args: + fn: Callable that will be called upon each element in elems and an + accumulator, for instance `lambda acc, x: acc + x` + elems: tensor + initializer: The first value used (`elems[0]` in case of None) + name: A string name for the foldl node in the graph - Returns: - Tensor with same type and shape as `initializer`. - """ - return tf.compat.v1.foldl(fn, elems, initializer=initializer, name=name) + Returns: + Tensor with same type and shape as `initializer`. + """ + return tf.compat.v1.foldl(fn, elems, initializer=initializer, name=name) -@keras_export('keras.backend.foldr') +@keras_export("keras.backend.foldr") @doc_controls.do_not_generate_docs def foldr(fn, elems, initializer=None, name=None): - """Reduce elems using fn to combine them from right to left. + """Reduce elems using fn to combine them from right to left. + + Args: + fn: Callable that will be called upon each element in elems and an + accumulator, for instance `lambda acc, x: acc + x` + elems: tensor + initializer: The first value used (`elems[-1]` in case of None) + name: A string name for the foldr node in the graph - Args: - fn: Callable that will be called upon each element in elems and an - accumulator, for instance `lambda acc, x: acc + x` - elems: tensor - initializer: The first value used (`elems[-1]` in case of None) - name: A string name for the foldr node in the graph + Returns: + Same type and shape as initializer + """ + return tf.compat.v1.foldr(fn, elems, initializer=initializer, name=name) - Returns: - Same type and shape as initializer - """ - return tf.compat.v1.foldr(fn, elems, initializer=initializer, name=name) # Load Keras default configuration from config file if present. # Set Keras base dir path given KERAS_HOME env variable, if applicable. # Otherwise either ~/.keras or /tmp. -if 'KERAS_HOME' in os.environ: - _keras_dir = os.environ.get('KERAS_HOME') +if "KERAS_HOME" in os.environ: + _keras_dir = os.environ.get("KERAS_HOME") else: - _keras_base_dir = os.path.expanduser('~') - _keras_dir = os.path.join(_keras_base_dir, '.keras') -_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) + _keras_base_dir = os.path.expanduser("~") + _keras_dir = os.path.join(_keras_base_dir, ".keras") +_config_path = os.path.expanduser(os.path.join(_keras_dir, "keras.json")) if os.path.exists(_config_path): - try: - with open(_config_path) as fh: - _config = json.load(fh) - except ValueError: - _config = {} - _floatx = _config.get('floatx', floatx()) - assert _floatx in {'float16', 'float32', 'float64'} - _epsilon = _config.get('epsilon', epsilon()) - assert isinstance(_epsilon, float) - _image_data_format = _config.get('image_data_format', image_data_format()) - assert _image_data_format in {'channels_last', 'channels_first'} - set_floatx(_floatx) - set_epsilon(_epsilon) - set_image_data_format(_image_data_format) + try: + with open(_config_path) as fh: + _config = json.load(fh) + except ValueError: + _config = {} + _floatx = _config.get("floatx", floatx()) + assert _floatx in {"float16", "float32", "float64"} + _epsilon = _config.get("epsilon", epsilon()) + assert isinstance(_epsilon, float) + _image_data_format = _config.get("image_data_format", image_data_format()) + assert _image_data_format in {"channels_last", "channels_first"} + set_floatx(_floatx) + set_epsilon(_epsilon) + set_image_data_format(_image_data_format) # Save config file. if not os.path.exists(_keras_dir): - try: - os.makedirs(_keras_dir) - except OSError: - # Except permission denied and potential race conditions - # in multi-threaded environments. - pass + try: + os.makedirs(_keras_dir) + except OSError: + # Except permission denied and potential race conditions + # in multi-threaded environments. + pass if not os.path.exists(_config_path): - _config = { - 'floatx': floatx(), - 'epsilon': epsilon(), - 'backend': 'tensorflow', - 'image_data_format': image_data_format() - } - try: - with open(_config_path, 'w') as f: - f.write(json.dumps(_config, indent=4)) - except IOError: - # Except permission denied. - pass + _config = { + "floatx": floatx(), + "epsilon": epsilon(), + "backend": "tensorflow", + "image_data_format": image_data_format(), + } + try: + with open(_config_path, "w") as f: + f.write(json.dumps(_config, indent=4)) + except IOError: + # Except permission denied. + pass def configure_and_create_distributed_session(distribution_strategy): - """Configure session config and create a session with it.""" - - def _create_session(distribution_strategy): - """Create the Distributed Strategy session.""" - session_config = get_default_session_config() - - # If a session already exists, merge in its config; in the case there is a - # conflict, take values of the existing config. - global _SESSION - if getattr(_SESSION, 'session', None) and _SESSION.session._config: - session_config.MergeFrom(_SESSION.session._config) - - if is_tpu_strategy(distribution_strategy): - # TODO(priyag, yuefengz): Remove this workaround when Distribute - # Coordinator is integrated with keras and we can create a session from - # there. - distribution_strategy.configure(session_config) - master = distribution_strategy.extended._tpu_cluster_resolver.master() # pylint: disable=protected-access - session = tf.compat.v1.Session(config=session_config, target=master) + """Configure session config and create a session with it.""" + + def _create_session(distribution_strategy): + """Create the Distributed Strategy session.""" + session_config = get_default_session_config() + + # If a session already exists, merge in its config; in the case there is + # a conflict, take values of the existing config. + global _SESSION + if getattr(_SESSION, "session", None) and _SESSION.session._config: + session_config.MergeFrom(_SESSION.session._config) + + if is_tpu_strategy(distribution_strategy): + # TODO(priyag, yuefengz): Remove this workaround when Distribute + # Coordinator is integrated with keras and we can create a session + # from there. + distribution_strategy.configure(session_config) + master = ( + distribution_strategy.extended._tpu_cluster_resolver.master() + ) + session = tf.compat.v1.Session(config=session_config, target=master) + else: + worker_context = dc.get_current_worker_context() + if worker_context: + dc_session_config = worker_context.session_config + # Merge the default session config to the one from distribute + # coordinator, which is fine for now since they don't have + # conflicting configurations. + dc_session_config.MergeFrom(session_config) + session = tf.compat.v1.Session( + config=dc_session_config, + target=worker_context.master_target, + ) + else: + distribution_strategy.configure(session_config) + session = tf.compat.v1.Session(config=session_config) + + set_session(session) + + if distribution_strategy.extended._in_multi_worker_mode(): + dc.run_distribute_coordinator(_create_session, distribution_strategy) else: - worker_context = dc.get_current_worker_context() - if worker_context: - dc_session_config = worker_context.session_config - # Merge the default session config to the one from distribute - # coordinator, which is fine for now since they don't have - # conflicting configurations. - dc_session_config.MergeFrom(session_config) - session = tf.compat.v1.Session( - config=dc_session_config, target=worker_context.master_target) - else: - distribution_strategy.configure(session_config) - session = tf.compat.v1.Session(config=session_config) - - set_session(session) - - if distribution_strategy.extended._in_multi_worker_mode(): - dc.run_distribute_coordinator( - _create_session, - distribution_strategy) - else: - _create_session(distribution_strategy) + _create_session(distribution_strategy) def _is_tpu_strategy_class(clz): - is_tpu_strat = lambda k: k.__name__.startswith('TPUStrategy') - if is_tpu_strat(clz): - return True - return py_any(map(_is_tpu_strategy_class, clz.__bases__)) + is_tpu_strat = lambda k: k.__name__.startswith("TPUStrategy") + if is_tpu_strat(clz): + return True + return py_any(map(_is_tpu_strategy_class, clz.__bases__)) def is_tpu_strategy(strategy): - """Returns whether input is a TPUStrategy instance or subclass instance.""" - return _is_tpu_strategy_class(strategy.__class__) - - -def cast_variables_to_tensor(tensors): - - def _cast_variables_to_tensor(tensor): - if isinstance(tensor, tf.Variable): - return tf.identity(tensor) - return tensor - - return tf.nest.map_structure(_cast_variables_to_tensor, tensors) + """Returns whether input is a TPUStrategy instance or subclass instance.""" + return _is_tpu_strategy_class(strategy.__class__) def _is_symbolic_tensor(x): - return tf.is_tensor(x) and not isinstance(x, tf.__internal__.EagerTensor) + return tf.is_tensor(x) and not isinstance(x, tf.__internal__.EagerTensor) def convert_inputs_if_ragged(inputs): - """Converts any ragged tensors to dense.""" - - def _convert_ragged_input(inputs): - if isinstance(inputs, tf.RaggedTensor): - return inputs.to_tensor() - return inputs + """Converts any ragged tensors to dense.""" - flat_inputs = tf.nest.flatten(inputs) - contains_ragged = py_any( - isinstance(i, tf.RaggedTensor) for i in flat_inputs) + def _convert_ragged_input(inputs): + if isinstance(inputs, tf.RaggedTensor): + return inputs.to_tensor() + return inputs - if not contains_ragged: - return inputs, None + flat_inputs = tf.nest.flatten(inputs) + contains_ragged = py_any( + isinstance(i, tf.RaggedTensor) for i in flat_inputs + ) - inputs = tf.nest.map_structure(_convert_ragged_input, inputs) - # Multiple mask are not yet supported, so one mask is used on all inputs. - # We approach this similarly when using row lengths to ignore steps. - nested_row_lengths = tf.cast(flat_inputs[0].nested_row_lengths()[0], - 'int32') - return inputs, nested_row_lengths + if not contains_ragged: + return inputs, None + inputs = tf.nest.map_structure(_convert_ragged_input, inputs) + # Multiple mask are not yet supported, so one mask is used on all inputs. + # We approach this similarly when using row lengths to ignore steps. + nested_row_lengths = tf.cast( + flat_inputs[0].nested_row_lengths()[0], "int32" + ) + return inputs, nested_row_lengths -def maybe_convert_to_ragged(is_ragged_input, output, nested_row_lengths, - go_backwards=False): - """Converts any ragged input back to its initial structure.""" - if not is_ragged_input: - return output - if go_backwards: - # Reverse based on the timestep dim, so that nested_row_lengths will mask - # from the correct direction. Return the reverse ragged tensor. - output = reverse(output, [1]) - ragged = tf.RaggedTensor.from_tensor(output, nested_row_lengths) - return reverse(ragged, [1]) - else: - return tf.RaggedTensor.from_tensor(output, nested_row_lengths) +def maybe_convert_to_ragged( + is_ragged_input, output, nested_row_lengths, go_backwards=False +): + """Converts any ragged input back to its initial structure.""" + if not is_ragged_input: + return output + + if go_backwards: + # Reverse based on the timestep dim, so that nested_row_lengths will + # mask from the correct direction. Return the reverse ragged tensor. + output = reverse(output, [1]) + ragged = tf.RaggedTensor.from_tensor(output, nested_row_lengths) + return reverse(ragged, [1]) + else: + return tf.RaggedTensor.from_tensor(output, nested_row_lengths) class ContextValueCache(weakref.WeakKeyDictionary): - """Container that caches (possibly tensor) values based on the context. - - This class is similar to defaultdict, where values may be produced by the - default factory specified during initialization. This class also has a default - value for the key (when key is `None`) -- the key is set to the current graph - or eager context. The default factories for key and value are only used in - `__getitem__` and `setdefault`. The `.get()` behavior remains the same. - - This object will return the value of the current graph or closest parent graph - if the current graph is a function. This is to reflect the fact that if a - tensor is created in eager/graph, child functions may capture that tensor. - - The default factory method may accept keyword arguments (unlike defaultdict, - which only accepts callables with 0 arguments). To pass keyword arguments to - `default_factory`, use the `setdefault` method instead of `__getitem__`. - - An example of how this class can be used in different contexts: - - ``` - cache = ContextValueCache(int) - - # Eager mode - cache[None] += 2 - cache[None] += 4 - assert cache[None] == 6 - - # Graph mode - with tf.Graph().as_default() as g: - cache[None] += 5 - cache[g] += 3 - assert cache[g] == 8 - ``` - - Example of a default factory with arguments: - - ``` - cache = ContextValueCache(lambda x: x + 1) - g = tf.get_default_graph() - - # Example with keyword argument. - value = cache.setdefault(key=g, kwargs={'x': 3}) - assert cache[g] == 4 - ``` - """ + """Container that caches (possibly tensor) values based on the context. + + This class is similar to defaultdict, where values may be produced by the + default factory specified during initialization. This class also has a + default value for the key (when key is `None`) -- the key is set to the + current graph or eager context. The default factories for key and value are + only used in `__getitem__` and `setdefault`. The `.get()` behavior remains + the same. + + This object will return the value of the current graph or closest parent + graph if the current graph is a function. This is to reflect the fact that + if a tensor is created in eager/graph, child functions may capture that + tensor. + + The default factory method may accept keyword arguments (unlike defaultdict, + which only accepts callables with 0 arguments). To pass keyword arguments to + `default_factory`, use the `setdefault` method instead of `__getitem__`. + + An example of how this class can be used in different contexts: + + ``` + cache = ContextValueCache(int) + + # Eager mode + cache[None] += 2 + cache[None] += 4 + assert cache[None] == 6 + + # Graph mode + with tf.Graph().as_default() as g: + cache[None] += 5 + cache[g] += 3 + assert cache[g] == 8 + ``` + + Example of a default factory with arguments: + + ``` + cache = ContextValueCache(lambda x: x + 1) + g = tf.get_default_graph() + + # Example with keyword argument. + value = cache.setdefault(key=g, kwargs={'x': 3}) + assert cache[g] == 4 + ``` + """ - def __init__(self, default_factory): - self.default_factory = default_factory - weakref.WeakKeyDictionary.__init__(self) + def __init__(self, default_factory): + self.default_factory = default_factory + weakref.WeakKeyDictionary.__init__(self) - def _key(self): - if tf.executing_eagerly(): - return _DUMMY_EAGER_GRAPH.key - else: - return tf.compat.v1.get_default_graph() - - def _get_parent_graph(self, graph): - """Returns the parent graph or dummy eager object.""" - # TODO(b/149317164): Currently FuncGraphs use ops.get_default_graph() as the - # outer graph. This results in outer_graph always being a Graph, - # even in eager mode (get_default_graph will create a new Graph if there - # isn't a default graph). Because of this bug, we have to specially set the - # key when eager execution is enabled. - parent_graph = graph.outer_graph - if (not isinstance(parent_graph, tf.__internal__.FuncGraph) and - tf.compat.v1.executing_eagerly_outside_functions()): - return _DUMMY_EAGER_GRAPH.key - return parent_graph - - def _get_recursive(self, key): - """Gets the value at key or the closest parent graph.""" - value = self.get(key) - if value is not None: - return value - - # Since FuncGraphs are able to capture tensors and variables from their - # parent graphs, recursively search to see if there is a value stored for - # one of the parent graphs. - if isinstance(key, tf.__internal__.FuncGraph): - return self._get_recursive(self._get_parent_graph(key)) - return None + def _key(self): + if tf.executing_eagerly(): + return _DUMMY_EAGER_GRAPH.key + else: + return tf.compat.v1.get_default_graph() + + def _get_parent_graph(self, graph): + """Returns the parent graph or dummy eager object.""" + # TODO(b/149317164): Currently FuncGraphs use ops.get_default_graph() as + # the outer graph. This results in outer_graph always being a Graph, + # even in eager mode (get_default_graph will create a new Graph if there + # isn't a default graph). Because of this bug, we have to specially set + # the key when eager execution is enabled. + parent_graph = graph.outer_graph + if ( + not isinstance(parent_graph, tf.__internal__.FuncGraph) + and tf.compat.v1.executing_eagerly_outside_functions() + ): + return _DUMMY_EAGER_GRAPH.key + return parent_graph + + def _get_recursive(self, key): + """Gets the value at key or the closest parent graph.""" + value = self.get(key) + if value is not None: + return value + + # Since FuncGraphs are able to capture tensors and variables from their + # parent graphs, recursively search to see if there is a value stored + # for one of the parent graphs. + if isinstance(key, tf.__internal__.FuncGraph): + return self._get_recursive(self._get_parent_graph(key)) + return None + + def __getitem__(self, key): + """Gets the value at key (or current context), or sets default value. - def __getitem__(self, key): - """Gets the value at key (or current context), or sets default value. + Args: + key: May be `None` or `Graph`object. When `None`, the key is set to + the current context. - Args: - key: May be `None` or `Graph`object. When `None`, the key is set to the - current context. + Returns: + Either the cached or default value. + """ + if key is None: + key = self._key() - Returns: - Either the cached or default value. - """ - if key is None: - key = self._key() + value = self._get_recursive(key) + if value is None: + value = self[key] = self.default_factory() + return value - value = self._get_recursive(key) - if value is None: - value = self[key] = self.default_factory() # pylint:disable=not-callable - return value + def setdefault(self, key=None, default=None, kwargs=None): + """Sets the default value if key is not in dict, and returns the + value.""" + if key is None: + key = self._key() + kwargs = kwargs or {} - def setdefault(self, key=None, default=None, kwargs=None): - """Sets the default value if key is not in dict, and returns the value.""" - if key is None: - key = self._key() - kwargs = kwargs or {} + if default is None and key not in self: + default = self.default_factory(**kwargs) + return weakref.WeakKeyDictionary.setdefault(self, key, default) - if default is None and key not in self: - default = self.default_factory(**kwargs) - return weakref.WeakKeyDictionary.setdefault(self, key, default) # This dictionary holds a mapping {graph: learning_phase}. In eager mode, a # dummy object is used. # A learning phase is a bool tensor used to run Keras models in # either train mode (learning_phase == 1) or test mode (learning_phase == 0). _GRAPH_LEARNING_PHASES = ContextValueCache( - object_identity.ObjectIdentityWeakSet) + object_identity.ObjectIdentityWeakSet +) # This dictionary holds a mapping between a graph and variables to initialize # in the graph. diff --git a/keras/backend_config.py b/keras/backend_config.py index a1e64fac4b2d..948cec331849 100644 --- a/keras/backend_config.py +++ b/keras/backend_config.py @@ -15,138 +15,143 @@ """Keras backend config API.""" import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export # The type of float to use throughout a session. -_FLOATX = 'float32' +_FLOATX = "float32" # Epsilon fuzz factor used throughout the codebase. _EPSILON = 1e-7 # Default image data format, one of "channels_last", "channels_first". -_IMAGE_DATA_FORMAT = 'channels_last' +_IMAGE_DATA_FORMAT = "channels_last" -@keras_export('keras.backend.epsilon') +@keras_export("keras.backend.epsilon") @tf.__internal__.dispatch.add_dispatch_support def epsilon(): - """Returns the value of the fuzz factor used in numeric expressions. + """Returns the value of the fuzz factor used in numeric expressions. - Returns: - A float. + Returns: + A float. - Example: - >>> tf.keras.backend.epsilon() - 1e-07 - """ - return _EPSILON + Example: + >>> tf.keras.backend.epsilon() + 1e-07 + """ + return _EPSILON -@keras_export('keras.backend.set_epsilon') +@keras_export("keras.backend.set_epsilon") def set_epsilon(value): - """Sets the value of the fuzz factor used in numeric expressions. + """Sets the value of the fuzz factor used in numeric expressions. - Args: - value: float. New value of epsilon. + Args: + value: float. New value of epsilon. - Example: - >>> tf.keras.backend.epsilon() - 1e-07 - >>> tf.keras.backend.set_epsilon(1e-5) - >>> tf.keras.backend.epsilon() - 1e-05 - >>> tf.keras.backend.set_epsilon(1e-7) - """ - global _EPSILON - _EPSILON = value + Example: + >>> tf.keras.backend.epsilon() + 1e-07 + >>> tf.keras.backend.set_epsilon(1e-5) + >>> tf.keras.backend.epsilon() + 1e-05 + >>> tf.keras.backend.set_epsilon(1e-7) + """ + global _EPSILON + _EPSILON = value -@keras_export('keras.backend.floatx') +@keras_export("keras.backend.floatx") def floatx(): - """Returns the default float type, as a string. + """Returns the default float type, as a string. - E.g. `'float16'`, `'float32'`, `'float64'`. + E.g. `'float16'`, `'float32'`, `'float64'`. - Returns: - String, the current default float type. + Returns: + String, the current default float type. - Example: - >>> tf.keras.backend.floatx() - 'float32' - """ - return _FLOATX + Example: + >>> tf.keras.backend.floatx() + 'float32' + """ + return _FLOATX -@keras_export('keras.backend.set_floatx') +@keras_export("keras.backend.set_floatx") def set_floatx(value): - """Sets the default float type. - - Note: It is not recommended to set this to float16 for training, as this will - likely cause numeric stability issues. Instead, mixed precision, which is - using a mix of float16 and float32, can be used by calling - `tf.keras.mixed_precision.set_global_policy('mixed_float16')`. See the - [mixed precision guide]( - https://www.tensorflow.org/guide/keras/mixed_precision) for details. - - Args: - value: String; `'float16'`, `'float32'`, or `'float64'`. - - Example: - >>> tf.keras.backend.floatx() - 'float32' - >>> tf.keras.backend.set_floatx('float64') - >>> tf.keras.backend.floatx() - 'float64' - >>> tf.keras.backend.set_floatx('float32') - - Raises: - ValueError: In case of invalid value. - """ - global _FLOATX - accepted_dtypes = {'float16', 'float32', 'float64'} - if value not in accepted_dtypes: - raise ValueError( - f'Unknown `floatx` value: {value}. Expected one of {accepted_dtypes}') - _FLOATX = str(value) - - -@keras_export('keras.backend.image_data_format') + """Sets the default float type. + + Note: It is not recommended to set this to float16 for training, as this + will likely cause numeric stability issues. Instead, mixed precision, which + is using a mix of float16 and float32, can be used by calling + `tf.keras.mixed_precision.set_global_policy('mixed_float16')`. See the + [mixed precision guide]( + https://www.tensorflow.org/guide/keras/mixed_precision) for details. + + Args: + value: String; `'float16'`, `'float32'`, or `'float64'`. + + Example: + >>> tf.keras.backend.floatx() + 'float32' + >>> tf.keras.backend.set_floatx('float64') + >>> tf.keras.backend.floatx() + 'float64' + >>> tf.keras.backend.set_floatx('float32') + + Raises: + ValueError: In case of invalid value. + """ + global _FLOATX + accepted_dtypes = {"float16", "float32", "float64"} + if value not in accepted_dtypes: + raise ValueError( + f"Unknown `floatx` value: {value}. " + f"Expected one of {accepted_dtypes}" + ) + _FLOATX = str(value) + + +@keras_export("keras.backend.image_data_format") @tf.__internal__.dispatch.add_dispatch_support def image_data_format(): - """Returns the default image data format convention. + """Returns the default image data format convention. - Returns: - A string, either `'channels_first'` or `'channels_last'` + Returns: + A string, either `'channels_first'` or `'channels_last'` - Example: - >>> tf.keras.backend.image_data_format() - 'channels_last' - """ - return _IMAGE_DATA_FORMAT + Example: + >>> tf.keras.backend.image_data_format() + 'channels_last' + """ + return _IMAGE_DATA_FORMAT -@keras_export('keras.backend.set_image_data_format') +@keras_export("keras.backend.set_image_data_format") def set_image_data_format(data_format): - """Sets the value of the image data format convention. - - Args: - data_format: string. `'channels_first'` or `'channels_last'`. - - Example: - >>> tf.keras.backend.image_data_format() - 'channels_last' - >>> tf.keras.backend.set_image_data_format('channels_first') - >>> tf.keras.backend.image_data_format() - 'channels_first' - >>> tf.keras.backend.set_image_data_format('channels_last') - - Raises: - ValueError: In case of invalid `data_format` value. - """ - global _IMAGE_DATA_FORMAT - accepted_formats = {'channels_last', 'channels_first'} - if data_format not in accepted_formats: - raise ValueError( - f'Unknown `data_format`: {data_format}. ' - f'Expected one of {accepted_formats}') - _IMAGE_DATA_FORMAT = str(data_format) + """Sets the value of the image data format convention. + + Args: + data_format: string. `'channels_first'` or `'channels_last'`. + + Example: + >>> tf.keras.backend.image_data_format() + 'channels_last' + >>> tf.keras.backend.set_image_data_format('channels_first') + >>> tf.keras.backend.image_data_format() + 'channels_first' + >>> tf.keras.backend.set_image_data_format('channels_last') + + Raises: + ValueError: In case of invalid `data_format` value. + """ + global _IMAGE_DATA_FORMAT + accepted_formats = {"channels_last", "channels_first"} + if data_format not in accepted_formats: + raise ValueError( + f"Unknown `data_format`: {data_format}. " + f"Expected one of {accepted_formats}" + ) + _IMAGE_DATA_FORMAT = str(data_format) diff --git a/keras/backend_config_test.py b/keras/backend_config_test.py index e7e9dfd5bf39..5e8e9e2c0359 100644 --- a/keras/backend_config_test.py +++ b/keras/backend_config_test.py @@ -21,33 +21,32 @@ from keras.testing_infra import test_combinations -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class BackendConfigTest(tf.test.TestCase): - - def test_backend(self): - self.assertEqual(backend.backend(), 'tensorflow') - - def test_epsilon(self): - epsilon = 1e-2 - backend_config.set_epsilon(epsilon) - self.assertEqual(backend_config.epsilon(), epsilon) - backend_config.set_epsilon(1e-7) - self.assertEqual(backend_config.epsilon(), 1e-7) - - def test_floatx(self): - floatx = 'float64' - backend_config.set_floatx(floatx) - self.assertEqual(backend_config.floatx(), floatx) - backend_config.set_floatx('float32') - self.assertEqual(backend_config.floatx(), 'float32') - - def test_image_data_format(self): - image_data_format = 'channels_first' - backend_config.set_image_data_format(image_data_format) - self.assertEqual(backend_config.image_data_format(), image_data_format) - backend_config.set_image_data_format('channels_last') - self.assertEqual(backend_config.image_data_format(), 'channels_last') - - -if __name__ == '__main__': - tf.test.main() + def test_backend(self): + self.assertEqual(backend.backend(), "tensorflow") + + def test_epsilon(self): + epsilon = 1e-2 + backend_config.set_epsilon(epsilon) + self.assertEqual(backend_config.epsilon(), epsilon) + backend_config.set_epsilon(1e-7) + self.assertEqual(backend_config.epsilon(), 1e-7) + + def test_floatx(self): + floatx = "float64" + backend_config.set_floatx(floatx) + self.assertEqual(backend_config.floatx(), floatx) + backend_config.set_floatx("float32") + self.assertEqual(backend_config.floatx(), "float32") + + def test_image_data_format(self): + image_data_format = "channels_first" + backend_config.set_image_data_format(image_data_format) + self.assertEqual(backend_config.image_data_format(), image_data_format) + backend_config.set_image_data_format("channels_last") + self.assertEqual(backend_config.image_data_format(), "channels_last") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/backend_test.py b/keras/backend_test.py index cee51d964743..b47ca213d225 100644 --- a/keras/backend_test.py +++ b/keras/backend_test.py @@ -14,2504 +14,3159 @@ # ============================================================================== """Tests for Keras backend.""" -import tensorflow.compat.v2 as tf - import gc import warnings -from absl.testing import parameterized import numpy as np import scipy.sparse -from tensorflow.python.eager import context -from tensorflow.python.eager.context import get_config -from tensorflow.python.framework import test_util as tf_test_utils +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import activations from keras import backend -from keras.testing_infra import test_combinations from keras.engine import input_layer from keras.layers import activation from keras.layers.normalization import batch_normalization_v1 +from keras.testing_infra import test_combinations +from keras.utils import losses_utils from keras.utils import tf_inspect from keras.utils import tf_utils - -def compare_single_input_op_to_numpy(keras_op, - np_op, - input_shape, - dtype='float32', - negative_values=True, - keras_args=None, - keras_kwargs=None, - np_args=None, - np_kwargs=None): - keras_args = keras_args or [] - keras_kwargs = keras_kwargs or {} - np_args = np_args or [] - np_kwargs = np_kwargs or {} - inputs = 2. * np.random.random(input_shape) - if negative_values: - inputs -= 1. - keras_output = keras_op( - backend.variable(inputs, dtype=dtype), *keras_args, **keras_kwargs) - keras_output = backend.eval(keras_output) - np_output = np_op(inputs.astype(dtype), *np_args, **np_kwargs) - try: - np.testing.assert_allclose(keras_output, np_output, atol=1e-4) - except AssertionError: - raise AssertionError('Test for op `' + str(keras_op.__name__) + '` failed; ' - 'Expected ' + str(np_output) + ' but got ' + - str(keras_output)) - - -def compare_two_inputs_op_to_numpy(keras_op, - np_op, - input_shape_a, - input_shape_b, - dtype='float32', - keras_args=None, - keras_kwargs=None, - np_args=None, - np_kwargs=None): - keras_args = keras_args or [] - keras_kwargs = keras_kwargs or {} - np_args = np_args or [] - np_kwargs = np_kwargs or {} - input_a = np.random.random(input_shape_a) - input_b = np.random.random(input_shape_b) - keras_output = keras_op( - backend.variable(input_a, dtype=dtype), - backend.variable(input_b, dtype=dtype), *keras_args, **keras_kwargs) - keras_output = backend.eval(keras_output) - np_output = np_op( - input_a.astype(dtype), input_b.astype(dtype), *np_args, **np_kwargs) - try: - np.testing.assert_allclose(keras_output, np_output, atol=1e-4) - except AssertionError: - raise AssertionError('Test for op `' + str(keras_op.__name__) + '` failed; ' - 'Expected ' + str(np_output) + ' but got ' + - str(keras_output)) +# isort: off +from tensorflow.python.eager import context +from tensorflow.python.eager.context import get_config +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + + +def compare_single_input_op_to_numpy( + keras_op, + np_op, + input_shape, + dtype="float32", + negative_values=True, + keras_args=None, + keras_kwargs=None, + np_args=None, + np_kwargs=None, +): + keras_args = keras_args or [] + keras_kwargs = keras_kwargs or {} + np_args = np_args or [] + np_kwargs = np_kwargs or {} + inputs = 2.0 * np.random.random(input_shape) + if negative_values: + inputs -= 1.0 + keras_output = keras_op( + backend.variable(inputs, dtype=dtype), *keras_args, **keras_kwargs + ) + keras_output = backend.eval(keras_output) + np_output = np_op(inputs.astype(dtype), *np_args, **np_kwargs) + try: + np.testing.assert_allclose(keras_output, np_output, atol=1e-4) + except AssertionError: + raise AssertionError( + "Test for op `" + + str(keras_op.__name__) + + "` failed; Expected " + + str(np_output) + + " but got " + + str(keras_output) + ) + + +def compare_two_inputs_op_to_numpy( + keras_op, + np_op, + input_shape_a, + input_shape_b, + dtype="float32", + keras_args=None, + keras_kwargs=None, + np_args=None, + np_kwargs=None, +): + keras_args = keras_args or [] + keras_kwargs = keras_kwargs or {} + np_args = np_args or [] + np_kwargs = np_kwargs or {} + input_a = np.random.random(input_shape_a) + input_b = np.random.random(input_shape_b) + keras_output = keras_op( + backend.variable(input_a, dtype=dtype), + backend.variable(input_b, dtype=dtype), + *keras_args, + **keras_kwargs, + ) + keras_output = backend.eval(keras_output) + np_output = np_op( + input_a.astype(dtype), input_b.astype(dtype), *np_args, **np_kwargs + ) + try: + np.testing.assert_allclose(keras_output, np_output, atol=1e-4) + except AssertionError: + raise AssertionError( + "Test for op `" + + str(keras_op.__name__) + + "` failed; Expected " + + str(np_output) + + " but got " + + str(keras_output) + ) class BackendResetTest(tf.test.TestCase, parameterized.TestCase): - - def test_new_config(self): - # User defined jit setting - tf.config.optimizer.set_jit(False) - sess = backend.get_session() - default_config = get_config() - self.assertEqual( - sess._config.graph_options.optimizer_options.global_jit_level, - default_config.graph_options.optimizer_options.global_jit_level) - backend.clear_session() - - # New session has the same jit setting - sess = backend.get_session() - default_config = get_config() - self.assertEqual( - sess._config.graph_options.optimizer_options.global_jit_level, - default_config.graph_options.optimizer_options.global_jit_level) - backend.clear_session() - - # Change respected - tf.config.optimizer.set_jit(True) - sess = backend.get_session() - default_config = get_config() - self.assertEqual( - sess._config.graph_options.optimizer_options.global_jit_level, - default_config.graph_options.optimizer_options.global_jit_level) - backend.clear_session() - - # We can't use the normal parameterized decorator because the test session - # will block graph clearing. - @parameterized.named_parameters(('_v1', context.graph_mode), - ('_v2', tf.__internal__.eager_context.eager_mode)) - def test_new_graph(self, test_context): - with test_context(): - g_old = backend.get_graph() - backend.clear_session() - g = backend.get_graph() - - assert g_old is not g - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_new_config(self): + # User defined jit setting + tf.config.optimizer.set_jit(False) + sess = backend.get_session() + default_config = get_config() + self.assertEqual( + sess._config.graph_options.optimizer_options.global_jit_level, + default_config.graph_options.optimizer_options.global_jit_level, + ) + backend.clear_session() + + # New session has the same jit setting + sess = backend.get_session() + default_config = get_config() + self.assertEqual( + sess._config.graph_options.optimizer_options.global_jit_level, + default_config.graph_options.optimizer_options.global_jit_level, + ) + backend.clear_session() + + # Change respected + tf.config.optimizer.set_jit(True) + sess = backend.get_session() + default_config = get_config() + self.assertEqual( + sess._config.graph_options.optimizer_options.global_jit_level, + default_config.graph_options.optimizer_options.global_jit_level, + ) + backend.clear_session() + + # We can't use the normal parameterized decorator because the test session + # will block graph clearing. + @parameterized.named_parameters( + ("_v1", context.graph_mode), + ("_v2", tf.__internal__.eager_context.eager_mode), + ) + def test_new_graph(self, test_context): + with test_context(): + g_old = backend.get_graph() + backend.clear_session() + g = backend.get_graph() + + assert g_old is not g + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class BackendUtilsTest(tf.test.TestCase): + def test_backend(self): + self.assertEqual(backend.backend(), "tensorflow") + + def test_get_reset_uids(self): + self.assertEqual(backend.get_uid("foo"), 1) + self.assertEqual(backend.get_uid("foo"), 2) + + backend.reset_uids() + self.assertEqual(backend.get_uid("foo"), 1) + + def test_learning_phase(self): + with self.cached_session() as sess: + with self.assertRaises(ValueError): + backend.set_learning_phase(2) + + # Test running with a learning-phase-consuming layer + with backend.learning_phase_scope(0): + x = input_layer.Input((3,)) + y = batch_normalization_v1.BatchNormalization()(x) + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + sess.run(y, feed_dict={x: np.random.random((2, 3))}) + + def test_get_learning_phase_eager(self): + if not tf.executing_eagerly(): + self.skipTest("Check for eager only.") + # see b/251520266 for more details. + # By default the learning phase should be False + self.assertFalse(backend.learning_phase()) + # Also make sure retrieving the learning phase doesn't set the default + # value + self.assertFalse(backend.global_learning_phase_is_set()) + + with backend.learning_phase_scope(1): + self.assertTrue(backend.learning_phase()) + self.assertTrue(backend.global_learning_phase_is_set()) + + self.assertFalse(backend.global_learning_phase_is_set()) + + def test_learning_phase_name(self): + with backend.name_scope("test_scope"): + # Test that outer name scopes do not affect the learning phase's + # name. + lp = backend.symbolic_learning_phase() + self.assertEqual(lp.name, "keras_learning_phase:0") + + def test_learning_phase_scope(self): + initial_learning_phase = backend.learning_phase() + with backend.learning_phase_scope(1): + self.assertEqual(backend.learning_phase(), 1) + self.assertEqual(backend.learning_phase(), initial_learning_phase) + with backend.learning_phase_scope(0): + self.assertEqual(backend.learning_phase(), 0) + self.assertEqual(backend.learning_phase(), initial_learning_phase) + with self.assertRaises(ValueError): + with backend.learning_phase_scope(None): + pass + self.assertEqual(backend.learning_phase(), initial_learning_phase) + + new_learning_phase = 0 + backend.set_learning_phase(new_learning_phase) + self.assertEqual(backend.learning_phase(), new_learning_phase) + with backend.learning_phase_scope(1): + self.assertEqual(backend.learning_phase(), 1) + self.assertEqual(backend.learning_phase(), new_learning_phase) + + def test_learning_phase_scope_in_graph(self): + initial_learning_phase_outside_graph = backend.learning_phase() + with backend.get_graph().as_default(): + initial_learning_phase_in_graph = backend.learning_phase() + + self.assertEqual( + backend.learning_phase(), initial_learning_phase_outside_graph + ) + with backend.learning_phase_scope(1): + self.assertEqual(backend.learning_phase(), 1) + self.assertEqual( + backend.learning_phase(), initial_learning_phase_outside_graph + ) + + with backend.get_graph().as_default(): + self.assertIs( + backend.learning_phase(), initial_learning_phase_in_graph + ) + + self.assertEqual( + backend.learning_phase(), initial_learning_phase_outside_graph + ) + + def test_int_shape(self): + x = backend.ones(shape=(3, 4)) + self.assertEqual(backend.int_shape(x), (3, 4)) - def test_backend(self): - self.assertEqual(backend.backend(), 'tensorflow') + if not tf.executing_eagerly(): + x = backend.placeholder(shape=(None, 4)) + self.assertEqual(backend.int_shape(x), (None, 4)) + + def test_in_train_phase(self): + y1 = backend.variable(1) + y2 = backend.variable(2) + if tf.executing_eagerly(): + with backend.learning_phase_scope(0): + y_val_test = backend.in_train_phase(y1, y2).numpy() + with backend.learning_phase_scope(1): + y_val_train = backend.in_train_phase(y1, y2).numpy() + else: + y = backend.in_train_phase(y1, y2) + f = backend.function([backend.learning_phase()], [y]) + y_val_test = f([0])[0] + y_val_train = f([1])[0] + self.assertAllClose(y_val_test, 2) + self.assertAllClose(y_val_train, 1) + + def test_is_keras_tensor(self): + x = backend.variable(1) + self.assertEqual(backend.is_keras_tensor(x), False) + x = input_layer.Input(shape=(1,)) + self.assertEqual(backend.is_keras_tensor(x), True) + x = input_layer.Input(shape=(None,), ragged=True) + self.assertEqual(backend.is_keras_tensor(x), True) + x = input_layer.Input(shape=(None, None), sparse=True) + self.assertEqual(backend.is_keras_tensor(x), True) + with self.assertRaises(ValueError): + backend.is_keras_tensor(0) + + def test_stop_gradient(self): + x = backend.variable(1) + y = backend.stop_gradient(x) + if not tf.executing_eagerly(): + self.assertEqual(y.op.name[:12], "StopGradient") - def test_get_reset_uids(self): - self.assertEqual(backend.get_uid('foo'), 1) - self.assertEqual(backend.get_uid('foo'), 2) + xs = [backend.variable(1) for _ in range(3)] + ys = backend.stop_gradient(xs) + if not tf.executing_eagerly(): + for y in ys: + self.assertEqual(y.op.name[:12], "StopGradient") + + def test_placeholder(self): + x = backend.placeholder(shape=(3, 4)) + self.assertEqual(x.shape.as_list(), [3, 4]) + x = backend.placeholder(shape=(3, 4), sparse=True) + self.assertEqual(x.shape.as_list(), [3, 4]) + + def test_is_placeholder(self): + x = backend.placeholder(shape=(1,)) + self.assertEqual(backend.is_placeholder(x), True) + x = backend.variable(1) + self.assertEqual(backend.is_placeholder(x), False) + + def test_print_tensor(self): + # Unfortunately it seems impossible to use `mock` (or any other method) + # to capture stdout when used inside a graph or graph function, thus + # we cannot test correctness. + # The message gets correctly printed in practice. + x = backend.placeholder(shape=()) + y = backend.print_tensor(x, f"eager={tf.executing_eagerly()}") + f = backend.function(x, y) + f(0) + + def test_cast_to_floatx(self): + x = backend.variable(1, dtype="float64") + x = backend.cast_to_floatx(x) + self.assertEqual(x.dtype.name, "float32") + x = backend.cast_to_floatx(2) + self.assertEqual(x.dtype.name, "float32") + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BackendVariableTest(tf.test.TestCase): + def test_zeros(self): + x = backend.zeros((3, 4)) + val = backend.eval(x) + self.assertAllClose(val, np.zeros((3, 4))) + + def test_ones(self): + x = backend.ones((3, 4)) + val = backend.eval(x) + self.assertAllClose(val, np.ones((3, 4))) + + def test_eye(self): + x = backend.eye(4) + val = backend.eval(x) + self.assertAllClose(val, np.eye(4)) + + def test_zeros_like(self): + x = backend.zeros((3, 4)) + y = backend.zeros_like(x) + val = backend.eval(y) + self.assertAllClose(val, np.zeros((3, 4))) + + def test_ones_like(self): + x = backend.zeros((3, 4)) + y = backend.ones_like(x) + val = backend.eval(y) + self.assertAllClose(val, np.ones((3, 4))) + + def test_random_uniform_variable(self): + x = backend.random_uniform_variable((30, 20), low=1.0, high=2.0, seed=0) + val = backend.eval(x) + self.assertAllClose(val.mean(), 1.5, atol=1e-1) + self.assertAllClose(val.max(), 2.0, atol=1e-1) + self.assertAllClose(val.min(), 1.0, atol=1e-1) + + def test_random_normal_variable(self): + x = backend.random_normal_variable((30, 20), 1.0, 0.5, seed=0) + val = backend.eval(x) + self.assertAllClose(val.mean(), 1.0, atol=1e-1) + self.assertAllClose(val.std(), 0.5, atol=1e-1) + + def test_count_params(self): + x = backend.zeros((4, 5)) + val = backend.count_params(x) + self.assertAllClose(val, 20) + + def test_constant(self): + ref_val = np.random.random((3, 4)).astype("float32") + x = backend.constant(ref_val) + val = backend.eval(x) + self.assertAllClose(val, ref_val) + + def test_sparse_variable(self): + val = scipy.sparse.eye(10) + x = backend.variable(val) + self.assertTrue(isinstance(x, tf.SparseTensor)) + + y = backend.to_dense(x) + self.assertFalse(backend.is_sparse(y)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BackendLinearAlgebraTest(tf.test.TestCase, parameterized.TestCase): + def test_dot(self): + x = backend.ones(shape=(2, 3)) + y = backend.ones(shape=(3, 4)) + xy = backend.dot(x, y) + self.assertEqual(xy.shape.as_list(), [2, 4]) + + x = backend.ones(shape=(32, 28, 3)) + y = backend.ones(shape=(3, 4)) + xy = backend.dot(x, y) + self.assertEqual(xy.shape.as_list(), [32, 28, 4]) + + @parameterized.parameters( + [(2, 3, 4, 5), (2, 5, 6, 7), (2, 3, 4, 6, 7), (3, 1)], + [(2, 20, 1), (2, 30, 20), (2, 1, 30), (1, 2)], + [(4, 2, 3), (4, 5, 3), (4, 2, 5), (2, 2)], + [(4, 2), (4, 2, 3), (4, 3), (1, 1)], + [(4, 2), (4, 2, 3), (4, 3), 1], + [(4, 2, 3), (4, 3), (4, 2), (2, 1)], + ) + def test_batch_dot(self, x_shape, y_shape, output_shape, axes): + x_val = np.random.random(x_shape) + y_val = np.random.random(y_shape) + x = backend.variable(x_val) + y = backend.variable(y_val) + xy = backend.batch_dot(x, y, axes=axes) + self.assertEqual(tuple(xy.shape.as_list()), output_shape) + xy_val = backend.eval(xy) + ref_val = self._reference_batch_dot(x_val, y_val, axes) + self.assertAllClose(xy_val, ref_val, atol=1e-5) + + def _reference_batch_dot(self, x, y, axes): + if isinstance(axes, int): + axes = [axes, axes] + elif isinstance(axes, tuple): + axes = list(axes) + if axes is None: + if y.ndim == 2: + axes = [x.ndim - 1, y.ndim - 1] + else: + axes = [x.ndim - 1, y.ndim - 2] + if axes[0] < 0: + axes[0] += x.ndim + if axes[1] < 0: + axes[1] += y.ndim + result = [] + axes = [axes[0] - 1, axes[1] - 1] + for xi, yi in zip(x, y): + result.append(np.tensordot(xi, yi, axes)) + result = np.array(result) + if result.ndim == 1: + result = np.expand_dims(result, -1) + return result + + def test_reduction_ops(self): + ops_to_test = [ + (backend.max, np.max), + (backend.min, np.min), + (backend.sum, np.sum), + (backend.prod, np.prod), + (backend.var, np.var), + (backend.std, np.std), + (backend.mean, np.mean), + (backend.argmin, np.argmin), + (backend.argmax, np.argmax), + ] + for keras_op, np_op in ops_to_test: + compare_single_input_op_to_numpy( + keras_op, + np_op, + input_shape=(4, 7, 5), + keras_kwargs={"axis": 1}, + np_kwargs={"axis": 1}, + ) + compare_single_input_op_to_numpy( + keras_op, + np_op, + input_shape=(4, 7, 5), + keras_kwargs={"axis": -1}, + np_kwargs={"axis": -1}, + ) + if "keepdims" in tf_inspect.getargspec(keras_op).args: + compare_single_input_op_to_numpy( + keras_op, + np_op, + input_shape=(4, 7, 5), + keras_kwargs={"axis": 1, "keepdims": True}, + np_kwargs={"axis": 1, "keepdims": True}, + ) + + def test_elementwise_ops(self): + ops_to_test = [ + (backend.square, np.square), + (backend.abs, np.abs), + (backend.round, np.round), + (backend.sign, np.sign), + (backend.sin, np.sin), + (backend.cos, np.cos), + (backend.exp, np.exp), + ] + for keras_op, np_op in ops_to_test: + compare_single_input_op_to_numpy( + keras_op, np_op, input_shape=(4, 7) + ) + + ops_to_test = [ + (backend.sqrt, np.sqrt), + (backend.log, np.log), + ] + for keras_op, np_op in ops_to_test: + compare_single_input_op_to_numpy( + keras_op, np_op, input_shape=(4, 7), negative_values=False + ) - backend.reset_uids() - self.assertEqual(backend.get_uid('foo'), 1) + compare_single_input_op_to_numpy( + backend.clip, + np.clip, + input_shape=(6, 4), + keras_kwargs={"min_value": 0.1, "max_value": 2.4}, + np_kwargs={"a_min": 0.1, "a_max": 1.4}, + ) - def test_learning_phase(self): - with self.cached_session() as sess: - with self.assertRaises(ValueError): - backend.set_learning_phase(2) + compare_single_input_op_to_numpy( + backend.pow, + np.power, + input_shape=(6, 4), + keras_args=[3], + np_args=[3], + ) + + def test_two_tensor_ops(self): + ops_to_test = [ + (backend.equal, np.equal), + (backend.not_equal, np.not_equal), + (backend.greater, np.greater), + (backend.greater_equal, np.greater_equal), + (backend.less, np.less), + (backend.less_equal, np.less_equal), + (backend.maximum, np.maximum), + (backend.minimum, np.minimum), + ] + for keras_op, np_op in ops_to_test: + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 7), input_shape_b=(4, 7) + ) + + def test_relu(self): + x = tf.convert_to_tensor([[-4, 0], [2, 7]], "float32") + + # standard relu + relu_op = backend.relu(x) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]]) + + # alpha (leaky relu used) + relu_op = backend.relu(x, alpha=0.5) + if not tf.executing_eagerly(): + self.assertTrue("LeakyRelu" in relu_op.name) + self.assertAllClose(backend.eval(relu_op), [[-2, 0], [2, 7]]) - # Test running with a learning-phase-consuming layer - with backend.learning_phase_scope(0): - x = input_layer.Input((3,)) - y = batch_normalization_v1.BatchNormalization()(x) + # max_value < some elements + relu_op = backend.relu(x, max_value=5.0) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 5]]) + + # nn.relu6 used + relu_op = backend.relu(x, max_value=6.0) if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - sess.run(y, feed_dict={x: np.random.random((2, 3))}) - - def test_learning_phase_name(self): - with backend.name_scope('test_scope'): - # Test that outer name scopes do not affect the learning phase's name. - lp = backend.symbolic_learning_phase() - self.assertEqual(lp.name, 'keras_learning_phase:0') - - def test_learning_phase_scope(self): - initial_learning_phase = backend.learning_phase() - with backend.learning_phase_scope(1): - self.assertEqual(backend.learning_phase(), 1) - self.assertEqual(backend.learning_phase(), initial_learning_phase) - with backend.learning_phase_scope(0): - self.assertEqual(backend.learning_phase(), 0) - self.assertEqual(backend.learning_phase(), initial_learning_phase) - with self.assertRaises(ValueError): - with backend.learning_phase_scope(None): - pass - self.assertEqual(backend.learning_phase(), initial_learning_phase) - - new_learning_phase = 0 - backend.set_learning_phase(new_learning_phase) - self.assertEqual(backend.learning_phase(), new_learning_phase) - with backend.learning_phase_scope(1): - self.assertEqual(backend.learning_phase(), 1) - self.assertEqual(backend.learning_phase(), new_learning_phase) - - def test_learning_phase_scope_in_graph(self): - initial_learning_phase_outside_graph = backend.learning_phase() - with backend.get_graph().as_default(): - initial_learning_phase_in_graph = backend.learning_phase() - - self.assertEqual(backend.learning_phase(), - initial_learning_phase_outside_graph) - with backend.learning_phase_scope(1): - self.assertEqual(backend.learning_phase(), 1) - self.assertEqual(backend.learning_phase(), - initial_learning_phase_outside_graph) - - with backend.get_graph().as_default(): - self.assertIs(backend.learning_phase(), initial_learning_phase_in_graph) - - self.assertEqual(backend.learning_phase(), - initial_learning_phase_outside_graph) - - def test_int_shape(self): - x = backend.ones(shape=(3, 4)) - self.assertEqual(backend.int_shape(x), (3, 4)) - - if not tf.executing_eagerly(): - x = backend.placeholder(shape=(None, 4)) - self.assertEqual(backend.int_shape(x), (None, 4)) - - def test_in_train_phase(self): - y1 = backend.variable(1) - y2 = backend.variable(2) - if tf.executing_eagerly(): - with backend.learning_phase_scope(0): - y_val_test = backend.in_train_phase(y1, y2).numpy() - with backend.learning_phase_scope(1): - y_val_train = backend.in_train_phase(y1, y2).numpy() - else: - y = backend.in_train_phase(y1, y2) - f = backend.function([backend.learning_phase()], [y]) - y_val_test = f([0])[0] - y_val_train = f([1])[0] - self.assertAllClose(y_val_test, 2) - self.assertAllClose(y_val_train, 1) - - def test_is_keras_tensor(self): - x = backend.variable(1) - self.assertEqual(backend.is_keras_tensor(x), False) - x = input_layer.Input(shape=(1,)) - self.assertEqual(backend.is_keras_tensor(x), True) - x = input_layer.Input(shape=(None,), ragged=True) - self.assertEqual(backend.is_keras_tensor(x), True) - x = input_layer.Input(shape=(None, None), sparse=True) - self.assertEqual(backend.is_keras_tensor(x), True) - with self.assertRaises(ValueError): - backend.is_keras_tensor(0) - - def test_stop_gradient(self): - x = backend.variable(1) - y = backend.stop_gradient(x) - if not tf.executing_eagerly(): - self.assertEqual(y.op.name[:12], 'StopGradient') - - xs = [backend.variable(1) for _ in range(3)] - ys = backend.stop_gradient(xs) - if not tf.executing_eagerly(): - for y in ys: - self.assertEqual(y.op.name[:12], 'StopGradient') - - def test_placeholder(self): - x = backend.placeholder(shape=(3, 4)) - self.assertEqual(x.shape.as_list(), [3, 4]) - x = backend.placeholder(shape=(3, 4), sparse=True) - self.assertEqual(x.shape.as_list(), [3, 4]) - - def test_is_placeholder(self): - x = backend.placeholder(shape=(1,)) - self.assertEqual(backend.is_placeholder(x), True) - x = backend.variable(1) - self.assertEqual(backend.is_placeholder(x), False) - - def test_print_tensor(self): - # Unfortunately it seems impossible to use `mock` (or any other method) - # to capture stdout when used inside a graph or graph function, thus - # we cannot test correctness. - # The message gets correctly printed in practice. - x = backend.placeholder(shape=()) - y = backend.print_tensor(x, 'eager=%s' % tf.executing_eagerly()) - f = backend.function(x, y) - f(0) - - def test_cast_to_floatx(self): - x = backend.variable(1, dtype='float64') - x = backend.cast_to_floatx(x) - self.assertEqual(x.dtype.name, 'float32') - x = backend.cast_to_floatx(2) - self.assertEqual(x.dtype.name, 'float32') - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class BackendVariableTest(tf.test.TestCase): + self.assertTrue("Relu6" in relu_op.name) # uses tf.nn.relu6 + self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 6]]) - def test_zeros(self): - x = backend.zeros((3, 4)) - val = backend.eval(x) - self.assertAllClose(val, np.zeros((3, 4))) - - def test_ones(self): - x = backend.ones((3, 4)) - val = backend.eval(x) - self.assertAllClose(val, np.ones((3, 4))) - - def test_eye(self): - x = backend.eye(4) - val = backend.eval(x) - self.assertAllClose(val, np.eye(4)) - - def test_zeros_like(self): - x = backend.zeros((3, 4)) - y = backend.zeros_like(x) - val = backend.eval(y) - self.assertAllClose(val, np.zeros((3, 4))) - - def test_ones_like(self): - x = backend.zeros((3, 4)) - y = backend.ones_like(x) - val = backend.eval(y) - self.assertAllClose(val, np.ones((3, 4))) - - def test_random_uniform_variable(self): - x = backend.random_uniform_variable((30, 20), low=1., high=2., seed=0) - val = backend.eval(x) - self.assertAllClose(val.mean(), 1.5, atol=1e-1) - self.assertAllClose(val.max(), 2., atol=1e-1) - self.assertAllClose(val.min(), 1., atol=1e-1) - - def test_random_normal_variable(self): - x = backend.random_normal_variable((30, 20), 1., 0.5, seed=0) - val = backend.eval(x) - self.assertAllClose(val.mean(), 1., atol=1e-1) - self.assertAllClose(val.std(), 0.5, atol=1e-1) - - def test_count_params(self): - x = backend.zeros((4, 5)) - val = backend.count_params(x) - self.assertAllClose(val, 20) - - def test_constant(self): - ref_val = np.random.random((3, 4)).astype('float32') - x = backend.constant(ref_val) - val = backend.eval(x) - self.assertAllClose(val, ref_val) - - def test_sparse_variable(self): - val = scipy.sparse.eye(10) - x = backend.variable(val) - self.assertTrue(isinstance(x, tf.SparseTensor)) - - y = backend.to_dense(x) - self.assertFalse(backend.is_sparse(y)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class BackendLinearAlgebraTest(tf.test.TestCase, parameterized.TestCase): + # max value > 6 + relu_op = backend.relu(x, max_value=10.0) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]]) - def test_dot(self): - x = backend.ones(shape=(2, 3)) - y = backend.ones(shape=(3, 4)) - xy = backend.dot(x, y) - self.assertEqual(xy.shape.as_list(), [2, 4]) - - x = backend.ones(shape=(32, 28, 3)) - y = backend.ones(shape=(3, 4)) - xy = backend.dot(x, y) - self.assertEqual(xy.shape.as_list(), [32, 28, 4]) - - @parameterized.parameters( - [(2, 3, 4, 5), (2, 5, 6, 7), (2, 3, 4, 6, 7), (3, 1)], - [(2, 20, 1), (2, 30, 20), (2, 1, 30), (1, 2)], - [(4, 2, 3), (4, 5, 3), (4, 2, 5), (2, 2)], - [(4, 2), (4, 2, 3), (4, 3), (1, 1)], - [(4, 2), (4, 2, 3), (4, 3), 1], - [(4, 2, 3), (4, 3), (4, 2), (2, 1)], - ) - def test_batch_dot(self, x_shape, y_shape, output_shape, axes): - x_val = np.random.random(x_shape) - y_val = np.random.random(y_shape) - x = backend.variable(x_val) - y = backend.variable(y_val) - xy = backend.batch_dot(x, y, axes=axes) - self.assertEqual(tuple(xy.shape.as_list()), output_shape) - xy_val = backend.eval(xy) - ref_val = self._reference_batch_dot(x_val, y_val, axes) - self.assertAllClose(xy_val, ref_val, atol=1e-5) - - def _reference_batch_dot(self, x, y, axes): - if isinstance(axes, int): - axes = [axes, axes] - elif isinstance(axes, tuple): - axes = list(axes) - if axes is None: - if y.ndim == 2: - axes = [x.ndim - 1, y.ndim - 1] - else: - axes = [x.ndim - 1, y.ndim - 2] - if axes[0] < 0: - axes[0] += x.ndim - if axes[1] < 0: - axes[1] += y.ndim - result = [] - axes = [axes[0] - 1, axes[1] - 1] - for xi, yi in zip(x, y): - result.append(np.tensordot(xi, yi, axes)) - result = np.array(result) - if result.ndim == 1: - result = np.expand_dims(result, -1) - return result - - def test_reduction_ops(self): - ops_to_test = [ - (backend.max, np.max), - (backend.min, np.min), - (backend.sum, np.sum), - (backend.prod, np.prod), - (backend.var, np.var), - (backend.std, np.std), - (backend.mean, np.mean), - (backend.argmin, np.argmin), - (backend.argmax, np.argmax), - ] - for keras_op, np_op in ops_to_test: - compare_single_input_op_to_numpy( - keras_op, - np_op, - input_shape=(4, 7, 5), - keras_kwargs={'axis': 1}, - np_kwargs={'axis': 1}) - compare_single_input_op_to_numpy( - keras_op, - np_op, - input_shape=(4, 7, 5), - keras_kwargs={'axis': -1}, - np_kwargs={'axis': -1}) - if 'keepdims' in tf_inspect.getargspec(keras_op).args: - compare_single_input_op_to_numpy( - keras_op, - np_op, - input_shape=(4, 7, 5), - keras_kwargs={ - 'axis': 1, - 'keepdims': True - }, - np_kwargs={ - 'axis': 1, - 'keepdims': True - }) - - def test_elementwise_ops(self): - ops_to_test = [ - (backend.square, np.square), - (backend.abs, np.abs), - (backend.round, np.round), - (backend.sign, np.sign), - (backend.sin, np.sin), - (backend.cos, np.cos), - (backend.exp, np.exp), - ] - for keras_op, np_op in ops_to_test: - compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7)) - - ops_to_test = [ - (backend.sqrt, np.sqrt), - (backend.log, np.log), - ] - for keras_op, np_op in ops_to_test: - compare_single_input_op_to_numpy( - keras_op, np_op, input_shape=(4, 7), negative_values=False) - - compare_single_input_op_to_numpy( - backend.clip, - np.clip, - input_shape=(6, 4), - keras_kwargs={ - 'min_value': 0.1, - 'max_value': 2.4 - }, - np_kwargs={ - 'a_min': 0.1, - 'a_max': 1.4 - }) - - compare_single_input_op_to_numpy( - backend.pow, np.power, input_shape=(6, 4), keras_args=[3], np_args=[3]) - - def test_two_tensor_ops(self): - ops_to_test = [ - (backend.equal, np.equal), - (backend.not_equal, np.not_equal), - (backend.greater, np.greater), - (backend.greater_equal, np.greater_equal), - (backend.less, np.less), - (backend.less_equal, np.less_equal), - (backend.maximum, np.maximum), - (backend.minimum, np.minimum), - ] - for keras_op, np_op in ops_to_test: - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 7), input_shape_b=(4, 7)) - - def test_relu(self): - x = tf.convert_to_tensor([[-4, 0], [2, 7]], 'float32') - - # standard relu - relu_op = backend.relu(x) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]]) - - # alpha (leaky relu used) - relu_op = backend.relu(x, alpha=0.5) - if not tf.executing_eagerly(): - self.assertTrue('LeakyRelu' in relu_op.name) - self.assertAllClose(backend.eval(relu_op), [[-2, 0], [2, 7]]) - - # max_value < some elements - relu_op = backend.relu(x, max_value=5.) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 5]]) - - # nn.relu6 used - relu_op = backend.relu(x, max_value=6.) - if not tf.executing_eagerly(): - self.assertTrue('Relu6' in relu_op.name) # uses tf.nn.relu6 - self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 6]]) - - # max value > 6 - relu_op = backend.relu(x, max_value=10.) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]]) - - # max value is float - relu_op = backend.relu(x, max_value=4.3) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 4.3]]) - - # max value == 0 - relu_op = backend.relu(x, max_value=0.) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 0]]) - - # alpha and max_value - relu_op = backend.relu(x, alpha=0.25, max_value=3.) - self.assertAllClose(backend.eval(relu_op), [[-1, 0], [2, 3]]) - - # threshold - relu_op = backend.relu(x, threshold=3) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 7]]) - - # threshold is float - relu_op = backend.relu(x, threshold=1.5) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]]) - - # threshold is negative - relu_op = backend.relu(x, threshold=-5) - self.assertAllClose(backend.eval(relu_op), [[-4, 0], [2, 7]]) - - # threshold and max_value - relu_op = backend.relu(x, threshold=3, max_value=5.) - self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 5]]) - - # threshold and alpha - relu_op = backend.relu(x, alpha=0.25, threshold=4.) - self.assertAllClose(backend.eval(relu_op), [[-2, -1], [-0.5, 7]]) - - # threshold, alpha, and max_value - relu_op = backend.relu(x, alpha=0.25, threshold=4., max_value=5.) - self.assertAllClose(backend.eval(relu_op), [[-2, -1], [-0.5, 5]]) - - # Test case for GitHub issue 35430, with integer dtype - x = input_layer.Input(shape=(), name='x', dtype='int64') - _ = activation.ReLU(max_value=100., dtype='int64')(x) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class BackendShapeOpsTest(tf.test.TestCase): + # max value is float + relu_op = backend.relu(x, max_value=4.3) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 4.3]]) - def test_reshape(self): - compare_single_input_op_to_numpy( - backend.reshape, - np.reshape, - input_shape=(4, 7), - keras_args=[(2, 14)], - np_args=[(2, 14)]) - - def test_concatenate(self): - a = backend.variable(np.ones((1, 2, 3))) - b = backend.variable(np.ones((1, 2, 2))) - y = backend.concatenate([a, b], axis=-1) - self.assertEqual(y.shape.as_list(), [1, 2, 5]) - - def test_permute_dimensions(self): - compare_single_input_op_to_numpy( - backend.permute_dimensions, - np.transpose, - input_shape=(4, 7), - keras_args=[(1, 0)], - np_args=[(1, 0)]) - - def test_resize_images(self): - height_factor = 2 - width_factor = 2 - data_format = 'channels_last' - x = backend.variable(np.ones((1, 2, 2, 3))) - y = backend.resize_images(x, height_factor, width_factor, data_format) - self.assertEqual(y.shape.as_list(), [1, 4, 4, 3]) - - data_format = 'channels_first' - x = backend.variable(np.ones((1, 3, 2, 2))) - y = backend.resize_images(x, height_factor, width_factor, data_format) - self.assertEqual(y.shape.as_list(), [1, 3, 4, 4]) - - # Use with a dynamic axis: - if not tf.executing_eagerly(): - x = backend.placeholder(shape=(1, 3, None, None)) - y = backend.resize_images(x, height_factor, width_factor, data_format) - self.assertEqual(y.shape.as_list(), [1, 3, None, None]) - - # Invalid use: - with self.assertRaises(ValueError): - backend.resize_images( - x, height_factor, width_factor, data_format='unknown') - - def test_resize_volumes(self): - height_factor = 2 - width_factor = 2 - depth_factor = 2 - data_format = 'channels_last' - x = backend.variable(np.ones((1, 2, 2, 2, 3))) - y = backend.resize_volumes(x, depth_factor, height_factor, width_factor, - data_format) - self.assertEqual(y.shape.as_list(), [1, 4, 4, 4, 3]) - - data_format = 'channels_first' - x = backend.variable(np.ones((1, 3, 2, 2, 2))) - y = backend.resize_volumes(x, depth_factor, height_factor, width_factor, - data_format) - self.assertEqual(y.shape.as_list(), [1, 3, 4, 4, 4]) - - # Invalid use: - with self.assertRaises(ValueError): - backend.resize_volumes( - x, depth_factor, height_factor, width_factor, data_format='unknown') - - def test_repeat_elements(self): - x = backend.variable(np.ones((1, 3, 2))) - y = backend.repeat_elements(x, 3, axis=1) - self.assertEqual(y.shape.as_list(), [1, 9, 2]) - - # Use with a dynamic axis: - if not tf.executing_eagerly(): - x = backend.placeholder(shape=(2, None, 2)) - y = backend.repeat_elements(x, 3, axis=1) - self.assertEqual(y.shape.as_list(), [2, None, 2]) - - def test_repeat(self): - x = backend.variable(np.ones((1, 3))) - y = backend.repeat(x, 2) - self.assertEqual(y.shape.as_list(), [1, 2, 3]) - - def test_flatten(self): - compare_single_input_op_to_numpy( - backend.flatten, - np.reshape, - input_shape=(4, 7, 6), - np_args=[(4 * 7 * 6,)]) - - def test_batch_flatten(self): - compare_single_input_op_to_numpy( - backend.batch_flatten, - np.reshape, - input_shape=(4, 7, 6), - np_args=[(4, 7 * 6)]) - - def test_temporal_padding(self): - - def ref_op(x, padding): - shape = list(x.shape) - shape[1] += padding[0] + padding[1] - y = np.zeros(tuple(shape)) - y[:, padding[0]:-padding[1], :] = x - return y - - compare_single_input_op_to_numpy( - backend.temporal_padding, - ref_op, - input_shape=(4, 7, 6), - keras_args=[(2, 3)], - np_args=[(2, 3)]) - - def test_spatial_2d_padding(self): - - def ref_op(x, padding, data_format='channels_last'): - shape = list(x.shape) - if data_format == 'channels_last': - shape[1] += padding[0][0] + padding[0][1] - shape[2] += padding[1][0] + padding[1][1] - y = np.zeros(tuple(shape)) - y[:, padding[0][0]:-padding[0][1], padding[1][0]:-padding[1][1], :] = x - else: - shape[2] += padding[0][0] + padding[0][1] - shape[3] += padding[1][0] + padding[1][1] - y = np.zeros(tuple(shape)) - y[:, :, padding[0][0]:-padding[0][1], padding[1][0]:-padding[1][1]] = x - return y - - compare_single_input_op_to_numpy( - backend.spatial_2d_padding, - ref_op, - input_shape=(2, 3, 2, 3), - keras_args=[((2, 3), (1, 2))], - keras_kwargs={'data_format': 'channels_last'}, - np_args=[((2, 3), (1, 2))], - np_kwargs={'data_format': 'channels_last'}) - compare_single_input_op_to_numpy( - backend.spatial_2d_padding, - ref_op, - input_shape=(2, 3, 2, 3), - keras_args=[((2, 3), (1, 2))], - keras_kwargs={'data_format': 'channels_first'}, - np_args=[((2, 3), (1, 2))], - np_kwargs={'data_format': 'channels_first'}) - - def test_spatial_3d_padding(self): - - def ref_op(x, padding, data_format='channels_last'): - shape = list(x.shape) - if data_format == 'channels_last': - shape[1] += padding[0][0] + padding[0][1] - shape[2] += padding[1][0] + padding[1][1] - shape[3] += padding[2][0] + padding[2][1] - y = np.zeros(tuple(shape)) - y[:, padding[0][0]:-padding[0][1], padding[1][0]:-padding[1][1], - padding[2][0]:-padding[2][1], :] = x - else: - shape[2] += padding[0][0] + padding[0][1] - shape[3] += padding[1][0] + padding[1][1] - shape[4] += padding[2][0] + padding[2][1] - y = np.zeros(tuple(shape)) - y[:, :, padding[0][0]:-padding[0][1], padding[1][0]:-padding[1][1], - padding[2][0]:-padding[2][1]] = x - return y - - compare_single_input_op_to_numpy( - backend.spatial_3d_padding, - ref_op, - input_shape=(2, 3, 2, 3, 2), - keras_args=[((2, 3), (1, 2), (2, 3))], - keras_kwargs={'data_format': 'channels_last'}, - np_args=[((2, 3), (1, 2), (2, 3))], - np_kwargs={'data_format': 'channels_last'}) - compare_single_input_op_to_numpy( - backend.spatial_3d_padding, - ref_op, - input_shape=(2, 3, 2, 3, 2), - keras_args=[((2, 3), (1, 2), (2, 3))], - keras_kwargs={'data_format': 'channels_first'}, - np_args=[((2, 3), (1, 2), (2, 3))], - np_kwargs={'data_format': 'channels_first'}) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class BackendNNOpsTest(tf.test.TestCase, parameterized.TestCase): + # max value == 0 + relu_op = backend.relu(x, max_value=0.0) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 0]]) - def test_bias_add(self): - keras_op = backend.bias_add - np_op = np.add - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 7), input_shape_b=(7,)) - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 3, 7), input_shape_b=(7,)) - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 3, 5, 7), input_shape_b=(7,)) - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 3, 5, 2, 7), input_shape_b=(7,)) - - with self.assertRaises((ValueError, tf.errors.InvalidArgumentError)): - x = backend.variable((3, 4)) - b = backend.variable((3, 4)) - backend.bias_add(x, b) - with self.assertRaises(ValueError): - x = backend.variable((3, 4)) - b = backend.variable((4,)) - backend.bias_add(x, b, data_format='unknown') - - def test_bias_add_channels_first(self): - - def keras_op(x, b): - return backend.bias_add(x, b, data_format='channels_first') - - def np_op(x, b): - if x.ndim == 3: - b = b.reshape((1, b.shape[0], 1)) - if x.ndim == 4: - b = b.reshape((1, b.shape[0], 1, 1)) - return x + b - - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 3, 7), input_shape_b=(3,)) - compare_two_inputs_op_to_numpy( - keras_op, np_op, input_shape_a=(4, 3, 5, 7), input_shape_b=(3,)) - - def test_pool2d(self): - val = np.random.random((10, 3, 10, 10)) - x = backend.variable(val) - y = backend.pool2d( - x, (2, 2), - strides=(1, 1), - padding='valid', - data_format='channels_first', - pool_mode='max') - self.assertEqual(y.shape.as_list(), [10, 3, 9, 9]) - - y = backend.pool2d( - x, (2, 2), - strides=(1, 1), - padding='valid', - data_format='channels_first', - pool_mode='avg') - self.assertEqual(y.shape.as_list(), [10, 3, 9, 9]) - - val = np.random.random((10, 10, 10, 3)) - x = backend.variable(val) - y = backend.pool2d( - x, (2, 2), strides=(1, 1), padding='valid', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 9, 9, 3]) - - val = np.random.random((10, 10, 10, 3)) - x = backend.variable(val) - y = backend.pool2d( - x, (2, 2), strides=(1, 1), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 10, 3]) - - val = np.random.random((10, 10, 10, 3)) - x = backend.variable(val) - y = backend.pool2d( - x, (2, 2), strides=(2, 2), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 5, 5, 3]) - - with self.assertRaises(ValueError): - y = backend.pool2d( - x, (2, 2), - strides=(2, 2), - padding='other', - data_format='channels_last') - with self.assertRaises(ValueError): - y = backend.pool2d(x, (2, 2), strides=(2, 2), data_format='other') - with self.assertRaises(ValueError): - y = backend.pool2d(x, (2, 2, 2), strides=(2, 2)) - with self.assertRaises(ValueError): - y = backend.pool2d(x, (2, 2), strides=(2, 2, 2)) - with self.assertRaises(ValueError): - y = backend.pool2d(x, (2, 2), strides=(2, 2), pool_mode='other') - - def test_pool3d(self): - val = np.random.random((10, 3, 10, 10, 10)) - x = backend.variable(val) - y = backend.pool3d( - x, (2, 2, 2), - strides=(1, 1, 1), - padding='valid', - data_format='channels_first', - pool_mode='max') - self.assertEqual(y.shape.as_list(), [10, 3, 9, 9, 9]) - - y = backend.pool3d( - x, (2, 2, 2), - strides=(1, 1, 1), - padding='valid', - data_format='channels_first', - pool_mode='avg') - self.assertEqual(y.shape.as_list(), [10, 3, 9, 9, 9]) - - val = np.random.random((10, 10, 10, 10, 3)) - x = backend.variable(val) - y = backend.pool3d( - x, (2, 2, 2), - strides=(1, 1, 1), - padding='valid', - data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 9, 9, 9, 3]) - - val = np.random.random((10, 10, 10, 10, 3)) - x = backend.variable(val) - y = backend.pool3d( - x, (2, 2, 2), - strides=(1, 1, 1), - padding='same', - data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 10, 10, 3]) - - val = np.random.random((10, 10, 10, 10, 3)) - x = backend.variable(val) - y = backend.pool3d( - x, (2, 2, 2), - strides=(2, 2, 2), - padding='same', - data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 5, 5, 5, 3]) - - def test_conv1d(self): - val = np.random.random((10, 4, 10)) - x = backend.variable(val) - kernel_val = np.random.random((3, 4, 5)) - k = backend.variable(kernel_val) - y = backend.conv1d( - x, k, strides=(1,), padding='valid', data_format='channels_first') - self.assertEqual(y.shape.as_list(), [10, 5, 8]) - - val = np.random.random((10, 10, 4)) - x = backend.variable(val) - y = backend.conv1d( - x, k, strides=(1,), padding='valid', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 8, 5]) - - val = np.random.random((10, 10, 4)) - x = backend.variable(val) - y = backend.conv1d( - x, k, strides=(1,), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 5]) - - val = np.random.random((10, 10, 4)) - x = backend.variable(val) - y = backend.conv1d( - x, k, strides=(2,), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 5, 5]) - - def test_local_conv_channels_dim(self): - filters = 3 - batch_size = 2 - - for input_shape in [(3, 5), (2, 3, 5), (2, 5, 3, 4)]: - channels_in = input_shape[0] - input_spatial_shape = input_shape[1:] - dim = len(input_spatial_shape) - - inputs = np.random.normal(0, 1, (batch_size,) + input_shape) - inputs_cf = backend.variable(inputs) - - for kernel_size in [1, 2]: - for stride in [1, 2]: - kernel_sizes = (kernel_size,) * dim - strides = (stride,) * dim - - output_shape = tuple([ - (i - kernel_size + stride) // stride for i in input_spatial_shape - ]) - - kernel_shape = (np.prod(output_shape), - np.prod(kernel_sizes) * channels_in, filters) - - kernel = np.random.normal( - 0, 1, - output_shape + (channels_in, np.prod(kernel_sizes), filters)) - - kernel_cf = np.reshape(kernel, kernel_shape) - kernel_cf = backend.variable(kernel_cf) - - conv_cf = backend.local_conv(inputs_cf, kernel_cf, kernel_sizes, - strides, output_shape, 'channels_first') - - inputs_cl = np.transpose(inputs, - [0, 2] + list(range(3, dim + 2)) + [1]) - inputs_cl = backend.variable(inputs_cl) - - kernel_cl = np.reshape( - np.transpose(kernel, - list(range(dim)) + [dim + 1, dim, dim + 2]), - kernel_shape) - kernel_cl = backend.variable(kernel_cl) - - conv_cl = backend.local_conv(inputs_cl, kernel_cl, kernel_sizes, - strides, output_shape, 'channels_last') - - conv_cf = backend.eval(conv_cf) - conv_cl = backend.eval(conv_cl) - - self.assertAllCloseAccordingToType( - conv_cf, - np.transpose(conv_cl, [0, dim + 1] + list(range(1, dim + 1))), - atol=1e-5) - - @parameterized.named_parameters( - ('local_conv1d', (5, 6), (3,), (1,), (3,)), - ('local_conv2d', (4, 5, 6), (3, 3), (1, 1), (2, 3))) - def test_local_conv_1d_and_2d(self, input_shape, kernel_sizes, strides, - output_shape): - filters = 3 - batch_size = 2 - - inputs = np.random.normal(0, 1, (batch_size,) + input_shape) - inputs = backend.variable(inputs) - - kernel = np.random.normal(0, 1, - (np.prod(output_shape), np.prod(kernel_sizes) * - input_shape[-1], filters)) - kernel = backend.variable(kernel) - - local_conv = backend.local_conv(inputs, kernel, kernel_sizes, strides, - output_shape, 'channels_last') - if len(output_shape) == 1: - local_conv_dim = backend.local_conv1d(inputs, kernel, kernel_sizes, - strides, 'channels_last') - else: - local_conv_dim = backend.local_conv2d(inputs, kernel, kernel_sizes, - strides, output_shape, - 'channels_last') - - local_conv = backend.eval(local_conv) - local_conv_dim = backend.eval(local_conv_dim) - - self.assertAllCloseAccordingToType(local_conv, local_conv_dim) - - def test_conv2d(self): - kernel_val = np.random.random((3, 3, 4, 5)) - k = backend.variable(kernel_val) - - # Test channels_first - val = np.random.random((10, 4, 10, 10)) - x = backend.variable(val) - y = backend.conv2d(x, k, padding='valid', data_format='channels_first') - self.assertEqual(y.shape.as_list(), [10, 5, 8, 8]) - - # Test channels_last - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv2d( - x, k, strides=(1, 1), padding='valid', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 8, 8, 5]) - - # Test same padding - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv2d(x, k, padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 10, 5]) - - # Test dilation_rate - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv2d( - x, k, dilation_rate=(2, 2), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 10, 5]) - - # Test strides - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv2d( - x, k, strides=(2, 2), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 5, 5, 5]) - - # Test invalid arguments - with self.assertRaises(ValueError): - y = backend.conv2d( - x, k, (2, 2), padding='other', data_format='channels_last') - with self.assertRaises(ValueError): - y = backend.conv2d(x, k, (2, 2), data_format='other') - with self.assertRaises(ValueError): - y = backend.conv2d(x, k, (2, 2, 2)) - - def test_conv2d_transpose(self): - input_size = (7, 8) - kernel_size = (3, 3) - input_depth = 6 - filters = 6 - batch_size = 2 - - kernel_val = np.random.random(kernel_size + (input_depth, filters)) - k = backend.variable(kernel_val) - - # Test channels_first - input_val = np.random.random((batch_size, input_depth) + input_size) - x = backend.variable(input_val) - y = backend.conv2d_transpose( - x, - k, (batch_size, filters) + input_size, - padding='same', - data_format='channels_first') - self.assertEqual( - tuple(y.shape.as_list()), (batch_size, filters) + input_size) - - # Test channels_last - input_val = np.random.random((batch_size,) + input_size + (input_depth,)) - x = backend.variable(input_val) - y = backend.conv2d_transpose( - x, - k, (batch_size,) + input_size + (filters,), - padding='same', - data_format='channels_last') - self.assertEqual( - tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,)) - - # Test dilation_rate - y = backend.conv2d_transpose( - x, - k, (batch_size,) + input_size + (filters,), - padding='same', - data_format='channels_last', - dilation_rate=(2, 2)) - self.assertEqual( - tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,)) - - # Test dilation_rate error - with self.assertRaisesRegex( - ValueError, - 'Expected the 2 dimensions'): - y = backend.conv2d_transpose( - x, - k, (batch_size,) + input_size + (filters,), - padding='same', - data_format='channels_last', - dilation_rate=(1, 2)) - - # Test batch size of None in output_shape - y = backend.conv2d_transpose( - x, - k, (None,) + input_size + (filters,), - padding='same', - data_format='channels_last') - self.assertEqual( - tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,)) - - # Test invalid values - with self.assertRaises(ValueError): - y = backend.conv2d_transpose( - x, k, (2, 2, 8, 9), padding='other', data_format='channels_last') - with self.assertRaises(ValueError): - y = backend.conv2d_transpose(x, k, (2, 2, 8, 9), data_format='other') - - def test_separable_conv2d(self): - val = np.random.random((10, 4, 10, 10)) - x = backend.variable(val) - depthwise_kernel_val = np.random.random((3, 3, 4, 1)) - pointwise_kernel_val = np.random.random((1, 1, 4, 5)) - dk = backend.variable(depthwise_kernel_val) - pk = backend.variable(pointwise_kernel_val) - y = backend.separable_conv2d( - x, dk, pk, padding='valid', data_format='channels_first') - self.assertEqual(y.shape.as_list(), [10, 5, 8, 8]) - - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.separable_conv2d( - x, dk, pk, strides=(1, 1), padding='valid', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 8, 8, 5]) - - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.separable_conv2d( - x, dk, pk, strides=(1, 1), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 10, 5]) - - val = np.random.random((10, 10, 10, 4)) - x = backend.variable(val) - y = backend.separable_conv2d( - x, dk, pk, strides=(2, 2), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 5, 5, 5]) - with self.assertRaises(ValueError): - y = backend.separable_conv2d( - x, dk, pk, (2, 2), padding='other', data_format='channels_last') - with self.assertRaises(ValueError): - y = backend.separable_conv2d(x, dk, pk, (2, 2), data_format='other') - with self.assertRaises(ValueError): - y = backend.separable_conv2d(x, dk, pk, (2, 2, 2)) - - def test_conv3d(self): - val = np.random.random((10, 4, 10, 10, 10)) - x = backend.variable(val) - kernel_val = np.random.random((3, 3, 3, 4, 5)) - k = backend.variable(kernel_val) - y = backend.conv3d(x, k, padding='valid', data_format='channels_first') - self.assertEqual(y.shape.as_list(), [10, 5, 8, 8, 8]) - - val = np.random.random((10, 10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv3d( - x, k, strides=(1, 1, 1), padding='valid', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 8, 8, 8, 5]) - - val = np.random.random((10, 10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv3d( - x, k, strides=(1, 1, 1), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 10, 10, 10, 5]) - - val = np.random.random((10, 10, 10, 10, 4)) - x = backend.variable(val) - y = backend.conv3d( - x, k, strides=(2, 2, 2), padding='same', data_format='channels_last') - self.assertEqual(y.shape.as_list(), [10, 5, 5, 5, 5]) - with self.assertRaises(ValueError): - y = backend.conv3d( - x, k, (2, 2, 2), padding='other', data_format='channels_last') - with self.assertRaises(ValueError): - y = backend.conv3d(x, k, (2, 2, 2), data_format='other') - with self.assertRaises(ValueError): - y = backend.conv3d(x, k, (2, 2)) - - def test_rnn(self): - # implement a simple RNN - num_samples = 4 - input_dim = 5 - output_dim = 3 - timesteps = 6 - - input_val = np.random.random( - (num_samples, timesteps, input_dim)).astype(np.float32) - init_state_val = np.random.random( - (num_samples, output_dim)).astype(np.float32) - w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32) - w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32) - np_mask = np.random.randint(2, size=(num_samples, timesteps)) - - def rnn_step_fn(): - w_i = backend.variable(w_i_val) - w_o = backend.variable(w_o_val) - - def step_function(x, states): - assert len(states) == 1 - prev_output = states[0] - output = backend.dot(x, w_i) + backend.dot(prev_output, w_o) - return output, [output] - - return step_function - - # test default setup - last_output_list = [[], [], [], [], [], []] - outputs_list = [[], [], [], [], [], []] - state_list = [[], [], [], [], [], []] - - rnn_fn = rnn_step_fn() - inputs = backend.variable(input_val) - initial_states = [backend.variable(init_state_val)] - mask = backend.variable(np_mask) - - kwargs_list = [ - { - 'go_backwards': False, - 'mask': None - }, - { - 'go_backwards': False, - 'mask': None, - 'unroll': True - }, - { - 'go_backwards': True, - 'mask': None - }, - { - 'go_backwards': True, - 'mask': None, - 'unroll': True - }, - { - 'go_backwards': False, - 'mask': mask - }, - { - 'go_backwards': False, - 'mask': mask, - 'unroll': True - }, - ] - for i, kwargs in enumerate(kwargs_list): - last_output, outputs, new_states = backend.rnn(rnn_fn, inputs, - initial_states, **kwargs) - # check static shape inference - self.assertEqual(last_output.shape.as_list(), [num_samples, output_dim]) - self.assertEqual(outputs.shape.as_list(), - [num_samples, timesteps, output_dim]) - for state in new_states: - self.assertEqual(state.shape.as_list(), [num_samples, output_dim]) - - last_output_list[i].append(backend.eval(last_output)) - outputs_list[i].append(backend.eval(outputs)) - self.assertLen(new_states, 1) - state_list[i].append(backend.eval(new_states[0])) - - def assert_list_pairwise(z_list, atol=1e-05): - for (z1, z2) in zip(z_list[1:], z_list[:-1]): - self.assertAllClose(z1, z2, atol=atol) - - assert_list_pairwise(last_output_list[0], atol=1e-04) - assert_list_pairwise(outputs_list[0], atol=1e-04) - assert_list_pairwise(state_list[0], atol=1e-04) - assert_list_pairwise(last_output_list[2], atol=1e-04) - assert_list_pairwise(outputs_list[2], atol=1e-04) - assert_list_pairwise(state_list[2], atol=1e-04) - - for l, u_l in zip(last_output_list[0], last_output_list[1]): - self.assertAllClose(l, u_l, atol=1e-04) - - for o, u_o in zip(outputs_list[0], outputs_list[1]): - self.assertAllClose(o, u_o, atol=1e-04) - - for s, u_s in zip(state_list[0], state_list[1]): - self.assertAllClose(s, u_s, atol=1e-04) - - for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]): - self.assertAllClose(b_l, b_u_l, atol=1e-04) - - for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]): - self.assertAllClose(b_o, b_u_o, atol=1e-04) - - for b_s, b_u_s in zip(state_list[2], state_list[3]): - self.assertAllClose(b_s, b_u_s, atol=1e-04) - - def test_rnn_additional_states(self): - # implement a simple RNN - num_samples = 4 - input_dim = 5 - output_dim = 3 - timesteps = 6 - - input_val = np.random.random( - (num_samples, timesteps, input_dim)).astype(np.float32) - init_state_val = np.random.random( - (num_samples, output_dim)).astype(np.float32) - w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32) - w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32) - np_mask = np.random.randint(2, size=(num_samples, timesteps)) - - def rnn_step_fn(): - w_i = backend.variable(w_i_val) - w_o = backend.variable(w_o_val) - - def step_function(x, states): - assert len(states) == 2 - prev_output = states[0] - output = backend.dot(x, w_i) + backend.dot(prev_output, w_o) - return output, [output, backend.concatenate([output, output], axis=-1)] - - return step_function - - # test default setup - last_output_list = [[], [], [], [], [], []] - outputs_list = [[], [], [], [], [], []] - state_list = [[], [], [], [], [], []] - additional_state_list = [[], [], [], [], [], []] - - rnn_fn = rnn_step_fn() - inputs = backend.variable(input_val) - initial_states = [ - backend.variable(init_state_val), - tf.convert_to_tensor( - np.concatenate([init_state_val, init_state_val], axis=-1)) - ] - mask = backend.variable(np_mask) - - kwargs_list = [ - { - 'go_backwards': False, - 'mask': None - }, - { - 'go_backwards': False, - 'mask': None, - 'unroll': True - }, - { - 'go_backwards': True, - 'mask': None - }, - { - 'go_backwards': True, - 'mask': None, - 'unroll': True - }, - { - 'go_backwards': False, - 'mask': mask - }, - { - 'go_backwards': False, - 'mask': mask, - 'unroll': True - }, - ] - for i, kwargs in enumerate(kwargs_list): - last_output, outputs, new_states = backend.rnn(rnn_fn, inputs, - initial_states, **kwargs) - # check static shape inference - self.assertEqual(last_output.shape.as_list(), [num_samples, output_dim]) - self.assertEqual(outputs.shape.as_list(), - [num_samples, timesteps, output_dim]) - # for state in new_states: - # self.assertEqual(state.shape.as_list(), - # [num_samples, output_dim]) - self.assertEqual(new_states[0].shape.as_list(), [num_samples, output_dim]) - self.assertEqual(new_states[1].shape.as_list(), - [num_samples, 2 * output_dim]) - - last_output_list[i].append(backend.eval(last_output)) - outputs_list[i].append(backend.eval(outputs)) - self.assertLen(new_states, 2) - state_list[i].append(backend.eval(new_states[0])) - additional_state_list[i].append(backend.eval(new_states[1])) - - def assert_list_pairwise(z_list, atol=1e-05): - for (z1, z2) in zip(z_list[1:], z_list[:-1]): - self.assertAllClose(z1, z2, atol=atol) - - assert_list_pairwise(last_output_list[0], atol=1e-04) - assert_list_pairwise(outputs_list[0], atol=1e-04) - assert_list_pairwise(state_list[0], atol=1e-04) - assert_list_pairwise(additional_state_list[0], atol=1e-04) - assert_list_pairwise(last_output_list[2], atol=1e-04) - assert_list_pairwise(outputs_list[2], atol=1e-04) - assert_list_pairwise(state_list[2], atol=1e-04) - assert_list_pairwise(additional_state_list[2], atol=1e-04) - - for l, u_l in zip(last_output_list[0], last_output_list[1]): - self.assertAllClose(l, u_l, atol=1e-04) - - for o, u_o in zip(outputs_list[0], outputs_list[1]): - self.assertAllClose(o, u_o, atol=1e-04) - - for s, u_s in zip(state_list[0], state_list[1]): - self.assertAllClose(s, u_s, atol=1e-04) - - for s, u_s in zip(additional_state_list[0], additional_state_list[1]): - self.assertAllClose(s, u_s, atol=1e-04) - - for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]): - self.assertAllClose(b_l, b_u_l, atol=1e-04) - - for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]): - self.assertAllClose(b_o, b_u_o, atol=1e-04) - - for b_s, b_u_s in zip(state_list[2], state_list[3]): - self.assertAllClose(b_s, b_u_s, atol=1e-04) - - for s, u_s in zip(additional_state_list[2], additional_state_list[3]): - self.assertAllClose(s, u_s, atol=1e-04) - - def test_rnn_output_and_state_masking_independent(self): - num_samples = 2 - num_timesteps = 4 - state_and_io_size = 2 - mask_last_num_timesteps = 2 # for second sample only - - # a step function that just outputs inputs, - # but increments states +1 per timestep - def step_function(inputs, states): - return inputs, [s + 1 for s in states] - - inputs_vals = np.random.random( - (num_samples, num_timesteps, state_and_io_size)) - initial_state_vals = np.random.random((num_samples, state_and_io_size)) - # masking of two last timesteps for second sample only - mask_vals = np.ones((num_samples, num_timesteps)) - mask_vals[1, -mask_last_num_timesteps:] = 0 - - # outputs expected to be same as inputs for the first sample - expected_outputs = inputs_vals.copy() - # but for the second sample all outputs in masked region should be the same - # as last output before masked region - expected_outputs[1, -mask_last_num_timesteps:] = \ - expected_outputs[1, -(mask_last_num_timesteps + 1)] - - expected_last_state = initial_state_vals.copy() - # first state should be incremented for every timestep (no masking) - expected_last_state[0] += num_timesteps - # second state should not be incremented for last two timesteps - expected_last_state[1] += (num_timesteps - mask_last_num_timesteps) - - # verify same expected output for `unroll=true/false` - inputs = backend.variable(inputs_vals) - initial_states = [backend.variable(initial_state_vals)] - mask = backend.variable(mask_vals) - for unroll in [True, False]: - _, outputs, last_states = backend.rnn( - step_function, - inputs, - initial_states, - mask=mask, - unroll=unroll, - input_length=num_timesteps if unroll else None) - - self.assertAllClose(backend.eval(outputs), expected_outputs) - self.assertAllClose(backend.eval(last_states[0]), expected_last_state) - - def test_rnn_output_num_dim_larger_than_2_masking(self): - num_samples = 3 - num_timesteps = 4 - num_features = 5 - - def step_function(inputs, states): - outputs = backend.tile(backend.expand_dims(inputs), [1, 1, 2]) - return outputs, [backend.identity(s) for s in states] - # Note: cannot just return states (which can be a problem) -> - # tensorflow/python/ops/resource_variable_ops.py", line 824, in set_shape - # NotImplementedError: ResourceVariable does not implement set_shape() - - inputs_vals = np.random.random((num_samples, num_timesteps, num_features)) - initial_state_vals = np.random.random((num_samples, 6)) - mask_vals = np.ones((num_samples, num_timesteps)) - mask_vals[-1, -1] = 0 # final timestep masked for last sample - - expected_outputs = np.repeat(inputs_vals[..., None], repeats=2, axis=-1) - # for the last sample, the final timestep (in masked region) should be the - # same as the second to final output (before masked region) - expected_outputs[-1, -1] = expected_outputs[-1, -2] - - inputs = backend.variable(inputs_vals) - initial_states = [backend.variable(initial_state_vals)] - mask = backend.variable(mask_vals) - for unroll in [True, False]: - _, outputs, _ = backend.rnn( - step_function, - inputs, - initial_states, - mask=mask, - unroll=unroll, - input_length=num_timesteps if unroll else None) - - self.assertAllClose(backend.eval(outputs), expected_outputs) - - def test_rnn_state_num_dim_larger_than_2_masking(self): - num_samples = 3 - num_timesteps = 4 - - def step_function(inputs, states): - return inputs, [s + 1 for s in states] - - inputs_vals = np.random.random((num_samples, num_timesteps, 5)) - initial_state_vals = np.random.random((num_samples, 6, 7)) - mask_vals = np.ones((num_samples, num_timesteps)) - mask_vals[0, -2:] = 0 # final two timesteps masked for first sample - - expected_last_state = initial_state_vals.copy() - expected_last_state[0] += (num_timesteps - 2) - expected_last_state[1:] += num_timesteps - - inputs = backend.variable(inputs_vals) - initial_states = [backend.variable(initial_state_vals)] - mask = backend.variable(mask_vals) - for unroll in [True, False]: - _, _, last_states = backend.rnn( - step_function, - inputs, - initial_states, - mask=mask, - unroll=unroll, - input_length=num_timesteps if unroll else None) - - self.assertAllClose(backend.eval(last_states[0]), expected_last_state) - - def test_batch_normalization(self): - g_val = np.random.random((3,)) - b_val = np.random.random((3,)) - gamma = backend.variable(g_val) - beta = backend.variable(b_val) - - # 3D NHC case - val = np.random.random((10, 5, 3)) - x = backend.variable(val) - mean, var = tf.nn.moments(x, (0, 1), None, None, False) - normed = backend.batch_normalization( - x, mean, var, beta, gamma, axis=-1, epsilon=1e-3) - self.assertEqual(normed.shape.as_list(), [10, 5, 3]) - - # 4D NHWC case - val = np.random.random((10, 5, 5, 3)) - x = backend.variable(val) - mean, var = tf.nn.moments(x, (0, 1, 2), None, None, False) - normed = backend.batch_normalization( - x, mean, var, beta, gamma, axis=-1, epsilon=1e-3) - self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3]) - - # 4D NCHW case - if not tf.executing_eagerly(): - # Eager CPU kernel for NCHW does not exist. - val = np.random.random((10, 3, 5, 5)) - x = backend.variable(val) - mean, var = tf.nn.moments(x, (0, 2, 3), None, None, False) - normed = backend.batch_normalization( - x, mean, var, beta, gamma, axis=1, epsilon=1e-3) - self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5]) - - def test_normalize_batch_in_training(self): - val = np.random.random((10, 3, 10, 10)) - x = backend.variable(val) - reduction_axes = (0, 2, 3) - - g_val = np.random.random((3,)) - b_val = np.random.random((3,)) - gamma = backend.variable(g_val) - beta = backend.variable(b_val) - normed, mean, var = backend.normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=1e-3) - self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10]) - self.assertEqual(mean.shape.as_list(), [ - 3, - ]) - self.assertEqual(var.shape.as_list(), [ - 3, - ]) - - # case: gamma=None - gamma = None - normed, mean, var = backend.normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=1e-3) - self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10]) - self.assertEqual(mean.shape.as_list(), [ - 3, - ]) - self.assertEqual(var.shape.as_list(), [ - 3, - ]) - - # case: beta=None - beta = None - normed, mean, var = backend.normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon=1e-3) - self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10]) - self.assertEqual(mean.shape.as_list(), [ - 3, - ]) - self.assertEqual(var.shape.as_list(), [ - 3, - ]) - - def test_dropout(self): - inputs = tf.ones((200, 200)) - outputs = backend.dropout(inputs, 0.2) - outputs_val = backend.eval(outputs) - self.assertEqual(np.min(outputs_val), 0) - self.assertAllClose(np.count_nonzero(outputs_val), 32000, atol=1000) - # Test noise shape - outputs = backend.dropout(inputs, 0.2, noise_shape=(200, 1)) - outputs_val = backend.eval(outputs) - # Make sure the whole column gets the same dropout - self.assertEqual(np.min(outputs_val[0, :]), np.max(outputs_val[0, :])) + # alpha and max_value + relu_op = backend.relu(x, alpha=0.25, max_value=3.0) + self.assertAllClose(backend.eval(relu_op), [[-1, 0], [2, 3]]) + # threshold + relu_op = backend.relu(x, threshold=3) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 7]]) -class BackendCrossEntropyLossesTest(tf.test.TestCase, parameterized.TestCase): + # threshold is float + relu_op = backend.relu(x, threshold=1.5) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]]) - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_crossentropy_with_sigmoid(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - p = backend.sigmoid(logits) - p = tf.identity(tf.identity(p)) - result = self.evaluate(backend.binary_crossentropy(t, p)) - self.assertArrayNear(result[0], [8., 0.313, 1.313], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_loss(self): - t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - - p = backend.constant([[.9, .05, .05], [.05, .89, .06], [.05, .01, .94]]) - result = backend.categorical_crossentropy(t, p) - self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3) - - p = backend.constant([[.9, .05, .05], [.05, .89, .01], [.05, .06, .94]]) - result = backend.categorical_crossentropy(t, p, axis=0) - self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3) - - p = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - result = backend.categorical_crossentropy(t, p, from_logits=True), - self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3) - - p = backend.constant([[8., 0., 2.], [1., 9., 3.], [1., 1., 5.]]) - result = backend.categorical_crossentropy(t, p, from_logits=True, axis=0), - self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_loss_with_unknown_rank_tensor(self): - t = backend.placeholder() - p = backend.placeholder() - o = backend.categorical_crossentropy(t, p) - - t_val = tf.convert_to_tensor([[1., 0., 0.], [0., 1., 0.], - [0., 0., 1.]]) - p_val = tf.convert_to_tensor([[.9, .05, .05], - [.05, .89, .06], - [.05, .01, .94]]) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.105, .116, .062], 1e-3) - - # With axis set - o = backend.categorical_crossentropy(t, p, axis=0) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.105, .065, .111], 1e-3) - - # from logits - p_val = tf.convert_to_tensor([[8., 1., 1.], [0., 9., 1.], - [2., 3., 5.]]) - o = backend.categorical_crossentropy(t, p, from_logits=True) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.002, 0, .17], 1e-3) - - # from logits and axis set - o = backend.categorical_crossentropy(t, p, from_logits=True, axis=0) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.002, .003, .036], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_with_softmax(self): - t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - logits = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - p = backend.softmax(logits) - p = tf.identity(tf.identity(p)) - result = self.evaluate(backend.categorical_crossentropy(t, p)) - self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sparse_categorical_crossentropy_loss(self): - t = backend.constant([0, 1, 2]) - - p = backend.constant([[.9, .05, .05], [.05, .89, .06], [.05, .01, .94]]) - result = backend.sparse_categorical_crossentropy(t, p) - self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3) - - p = backend.constant([[.9, .05, .05], [.05, .89, .01], [.05, .06, .94]]) - result = backend.sparse_categorical_crossentropy(t, p, axis=0) - self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3) - - p = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - result = backend.sparse_categorical_crossentropy(t, p, from_logits=True), - self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3) - - p = backend.constant([[8., 0., 2.], [1., 9., 3.], [1., 1., 5.]]) - result = backend.sparse_categorical_crossentropy( - t, p, from_logits=True, axis=0), - self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3) - - @test_combinations.generate(test_combinations.combine(mode=['graph'])) - def test_sparse_categorical_crossentropy_loss_with_unknown_rank_tensor(self): - # This test only runs in graph because the TF op layer is not supported yet - # for sparse ops. - t = backend.placeholder() - p = backend.placeholder() - o = backend.sparse_categorical_crossentropy(t, p) - - t_val = tf.convert_to_tensor([0, 1, 2]) - p_val = tf.convert_to_tensor([[.9, .05, .05], - [.05, .89, .06], - [.05, .01, .94]]) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.105, .116, .062], 1e-3) - - # With axis set - with self.assertRaisesRegex( - ValueError, - 'Cannot compute sparse categorical crossentropy with `axis=0`'): - o = backend.sparse_categorical_crossentropy(t, p, axis=0) - f = backend.function([t, p], o) - - _ = f([t_val, p_val]) - - # from logits - p_val = tf.convert_to_tensor([[8., 1., 1.], [0., 9., 1.], - [2., 3., 5.]]) - o = backend.sparse_categorical_crossentropy(t, p, from_logits=True) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.002, 0, .17], 1e-3) - - # from logits and axis set - with self.assertRaisesRegex( - ValueError, - 'Cannot compute sparse categorical crossentropy with `axis=0`'): - o = backend.sparse_categorical_crossentropy( - t, p, from_logits=True, axis=0) - f = backend.function([t, p], o) - - _ = f([t_val, p_val]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sparse_categorical_crossentropy_with_softmax(self): - t = backend.constant([0, 1, 2]) - logits = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - p = backend.softmax(logits) - p = tf.identity(tf.identity(p)) - result = self.evaluate(backend.sparse_categorical_crossentropy(t, p)) - self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_crossentropy_from_logits_no_warnings(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - with warnings.catch_warnings(record=True) as w: - self.evaluate(backend.binary_crossentropy(t, logits, from_logits=True)) - self.assertEmpty(w) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_crossentropy_from_logits_with_sigmoid(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - p = activations.sigmoid(logits) - with warnings.catch_warnings(record=True) as w: - self.evaluate(backend.binary_crossentropy(t, p, from_logits=True)) - self.assertLen(w, 1) - self.assertIn('received `from_logits=True`', str(w[0].message)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_from_logits_with_softmax(self): - t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - logits = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - p = activations.softmax(logits) - with warnings.catch_warnings(record=True) as w: - self.evaluate(backend.categorical_crossentropy(t, p, from_logits=True)) - self.assertLen(w, 1) - self.assertIn('received `from_logits=True`', str(w[0].message)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sparse_categorical_crossentropy_from_logits_with_softmax(self): - t = backend.constant([0, 1, 2]) - logits = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - p = activations.softmax(logits) - with warnings.catch_warnings(record=True) as w: - self.evaluate( - backend.sparse_categorical_crossentropy(t, p, from_logits=True)) - self.assertLen(w, 1) - self.assertIn('received `from_logits=True`', str(w[0].message)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_focal_crossentropy_with_sigmoid(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - p = backend.sigmoid(logits) - p = tf.identity(tf.identity(p)) - result = self.evaluate(backend.binary_focal_crossentropy(t, p, gamma=2.0)) - self.assertArrayNear(result[0], [7.995, 0.022, 0.701], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_focal_crossentropy_from_logits(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - result = self.evaluate( - backend.binary_focal_crossentropy( - target=t, - output=logits, - gamma=2.0, - from_logits=True, - )) - self.assertArrayNear(result[0], [7.995, 0.022, 0.701], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_focal_crossentropy_no_focal_effect_with_zero_gamma(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - p = backend.sigmoid(logits) - p = tf.identity(tf.identity(p)) - gamma = 0 - focal_result = self.evaluate( - backend.binary_focal_crossentropy( - target=t, - output=p, - gamma=gamma, - )) - non_focal_result = self.evaluate(backend.binary_crossentropy(t, p)) - self.assertArrayNear(focal_result[0], non_focal_result[0], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_weighted_focal_crossentropy_with_sigmoid(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - p = backend.sigmoid(logits) - p = tf.identity(tf.identity(p)) - result = self.evaluate(backend.binary_weighted_focal_crossentropy(t, p)) - self.assertArrayNear(result[0], [5.996, 0.006, 0.526], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_weighted_focal_crossentropy_from_logits(self): - t = backend.constant([[0, 1, 0]]) - logits = backend.constant([[8., 1., 1.]]) - result = self.evaluate( - backend.binary_weighted_focal_crossentropy( - target=t, - output=logits, - from_logits=True, - )) - self.assertArrayNear(result[0], [5.996, 0.006, 0.526], 1e-3) + # threshold is negative + relu_op = backend.relu(x, threshold=-5) + self.assertAllClose(backend.eval(relu_op), [[-4, 0], [2, 7]]) + # threshold and max_value + relu_op = backend.relu(x, threshold=3, max_value=5.0) + self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 5]]) -@tf_test_utils.with_control_flow_v2 -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TestCTC(tf.test.TestCase): + # threshold and alpha + relu_op = backend.relu(x, alpha=0.25, threshold=4.0) + self.assertAllClose(backend.eval(relu_op), [[-2, -1], [-0.5, 7]]) - def test_ctc_decode(self): - depth = 6 - seq_len_0 = 5 - input_prob_matrix_0 = np.asarray( - [ - [0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908], - [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517], - [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763], - [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655], - [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878], - # Random entry added in at time=5 - [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671] - ], - dtype=np.float32) - - # len max_time_steps array of batch_size x depth matrices - inputs = ( - [input_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0) - ] + # Pad to max_time_steps = 8 - 2 * [np.zeros((1, depth), dtype=np.float32)]) - - inputs = backend.variable(np.asarray(inputs).transpose((1, 0, 2))) - - # batch_size length vector of sequence_lengths - input_length = backend.variable(np.array([seq_len_0], dtype=np.int32)) - # batch_size length vector of negative log probabilities - log_prob_truth = np.array( - [ - -3.5821197, # output beam 0 - -3.777835 # output beam 1 - ], - np.float32)[np.newaxis, :] - - decode_truth = [ - np.array([1, 0, -1, -1, -1, -1, -1]), - np.array([0, 1, 0, -1, -1, -1, -1]) - ] - beam_width = 2 - top_paths = 2 - - decode_pred_tf, log_prob_pred_tf = backend.ctc_decode( - inputs, - input_length, - greedy=False, - beam_width=beam_width, - top_paths=top_paths) - - self.assertEqual(len(decode_pred_tf), top_paths) - log_prob_pred = backend.eval(log_prob_pred_tf) - for i in range(top_paths): - self.assertTrue( - np.alltrue(decode_truth[i] == backend.eval(decode_pred_tf[i]))) - self.assertAllClose(log_prob_truth, log_prob_pred) - - def test_ctc_batch_cost(self): - with self.cached_session(): - label_lens = np.expand_dims(np.asarray([5, 4]), 1) - input_lens = np.expand_dims(np.asarray([5, 5]), 1) # number of timesteps - loss_log_probs = [3.34211, 5.42262] - - # dimensions are batch x time x categories - labels = np.asarray([[0, 1, 2, 1, 0], [0, 1, 1, 0, -1]]) - inputs = np.asarray( - [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], - [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], - [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688], - [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533], - [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]], - [[0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508], - [0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549], - [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456], - [0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345], - [0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]]], - dtype=np.float32) - - labels = backend.variable(labels, dtype='int32') - inputs = backend.variable(inputs, dtype='float32') - input_lens = backend.variable(input_lens, dtype='int32') - label_lens = backend.variable(label_lens, dtype='int32') - res = backend.eval( - backend.ctc_batch_cost(labels, inputs, input_lens, label_lens)) - self.assertAllClose(res[:, 0], loss_log_probs, atol=1e-05) - - # test when batch_size = 1, that is, one sample only - ref = [3.34211] - input_lens = np.expand_dims(np.asarray([5]), 1) - label_lens = np.expand_dims(np.asarray([5]), 1) - - labels = np.asarray([[0, 1, 2, 1, 0]]) - inputs = np.asarray( - [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], - [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], - [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688], - [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533], - [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]] - ], - dtype=np.float32) - - k_labels = backend.variable(labels, dtype='int32') - k_inputs = backend.variable(inputs, dtype='float32') - k_input_lens = backend.variable(input_lens, dtype='int32') - k_label_lens = backend.variable(label_lens, dtype='int32') - res = backend.eval( - backend.ctc_batch_cost(k_labels, k_inputs, k_input_lens, - k_label_lens)) - self.assertAllClose(res[:, 0], ref, atol=1e-05) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TestRandomOps(tf.test.TestCase): + # threshold, alpha, and max_value + relu_op = backend.relu(x, alpha=0.25, threshold=4.0, max_value=5.0) + self.assertAllClose(backend.eval(relu_op), [[-2, -1], [-0.5, 5]]) - def test_random_normal(self): - np.random.seed(123) - x = backend.random_normal((500, 500)) - val = backend.eval(x) - self.assertAllClose(np.mean(val), 0., atol=0.01) - self.assertAllClose(np.std(val), 1., atol=0.01) - - def test_random_uniform(self): - np.random.seed(123) - x = backend.random_uniform((500, 500)) - val = backend.eval(x) - self.assertAllClose(np.mean(val), 0.5, atol=0.01) - self.assertAllClose(np.max(val), 1., atol=0.01) - self.assertAllClose(np.min(val), 0., atol=0.01) - - def test_random_binomial(self): - np.random.seed(123) - x = backend.random_binomial((500, 500), p=0.5) - self.assertAllClose(np.mean(backend.eval(x)), 0.5, atol=0.01) - - def test_truncated_normal(self): - np.random.seed(123) - x = backend.truncated_normal((500, 500), mean=0.0, stddev=1.0) - x = backend.truncated_normal((1000, 1000), mean=0.0, stddev=1.0) - y = backend.eval(x) - self.assertAllClose(np.mean(y), 0., atol=0.01) - self.assertAllClose(np.std(y), 0.88, atol=0.01) - self.assertAllClose(np.max(y), 2., atol=0.01) - self.assertAllClose(np.min(y), -2., atol=0.01) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class FunctionTest(tf.test.TestCase): + # Test case for GitHub issue 35430, with integer dtype + x = input_layer.Input(shape=(), name="x", dtype="int64") + _ = activation.ReLU(max_value=100.0, dtype="int64")(x) - def test_function_basics(self): - if tf.executing_eagerly(): - self.skipTest('eager backend.function does not support updates') - x1 = backend.placeholder(shape=(), dtype='float32') - x2 = backend.placeholder(shape=(), dtype='int32') - v = backend.variable(10.) - - y1 = x1 + backend.cast(x2, 'float32') + v - y2 = x1 * backend.cast(x2, 'float32') - - with tf.control_dependencies([y1]): - u = backend.update(v, x1) - - f = backend.function([x1, x2], [y1, y2], updates=[u]) - output_values = f([2, 3]) - self.assertEqual(output_values, [15., 6.]) - self.assertEqual(backend.eval(v), 2.) - - def test_function_dict_outputs(self): - x_ph = backend.placeholder(shape=(), name='x') - y_ph = backend.placeholder(shape=(), name='y') - outputs = {'x*y': y_ph * x_ph, 'x*x': x_ph * x_ph} - - f = backend.function(inputs=[x_ph, y_ph], outputs=outputs) - x, y = 2., 5. - results = f([x, y]) - - self.assertEqual(results['x*y'], 10.) - self.assertEqual(results['x*x'], 4) - - def test_function_dict_inputs(self): - placeholders = { - 'x': backend.placeholder(shape=()), - 'y': backend.placeholder(shape=()) - } - outputs = [placeholders['x'] * placeholders['y']] - - f = backend.function(inputs=placeholders, outputs=outputs) - results = f({'x': 2., 'y': 3.}) - self.assertEqual(results[0], 6.) - - def test_function_single_input_output(self): - x_ph = backend.placeholder(shape=(), name='x') - output = x_ph * x_ph - f = backend.function(x_ph, output) - result = f(2.) - self.assertEqual(result, 4.) - - def test_tuple_updates(self): - if tf.executing_eagerly(): - self.skipTest('eager backend.function does not support updates') - - x_ph = backend.placeholder(ndim=2) - v = backend.variable(np.ones((4, 2))) - output = x_ph**2 + v - new_v = v + x_ph - f = backend.function(x_ph, output, updates=[(v, new_v)]) - input_val = np.random.random((4, 2)) - result = f(input_val) - self.assertAllClose(result, input_val**2 + 1) - self.assertAllClose(backend.get_value(v), np.ones((4, 2)) + input_val) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BackendShapeOpsTest(tf.test.TestCase): + def test_reshape(self): + compare_single_input_op_to_numpy( + backend.reshape, + np.reshape, + input_shape=(4, 7), + keras_args=[(2, 14)], + np_args=[(2, 14)], + ) + + def test_concatenate(self): + a = backend.variable(np.ones((1, 2, 3))) + b = backend.variable(np.ones((1, 2, 2))) + y = backend.concatenate([a, b], axis=-1) + self.assertEqual(y.shape.as_list(), [1, 2, 5]) + + def test_permute_dimensions(self): + compare_single_input_op_to_numpy( + backend.permute_dimensions, + np.transpose, + input_shape=(4, 7), + keras_args=[(1, 0)], + np_args=[(1, 0)], + ) + + def test_resize_images(self): + height_factor = 2 + width_factor = 2 + data_format = "channels_last" + x = backend.variable(np.ones((1, 2, 2, 3))) + y = backend.resize_images(x, height_factor, width_factor, data_format) + self.assertEqual(y.shape.as_list(), [1, 4, 4, 3]) + + data_format = "channels_first" + x = backend.variable(np.ones((1, 3, 2, 2))) + y = backend.resize_images(x, height_factor, width_factor, data_format) + self.assertEqual(y.shape.as_list(), [1, 3, 4, 4]) + + # Use with a dynamic axis: + if not tf.executing_eagerly(): + x = backend.placeholder(shape=(1, 3, None, None)) + y = backend.resize_images( + x, height_factor, width_factor, data_format + ) + self.assertEqual(y.shape.as_list(), [1, 3, None, None]) + + # Invalid use: + with self.assertRaises(ValueError): + backend.resize_images( + x, height_factor, width_factor, data_format="unknown" + ) + + def test_resize_volumes(self): + height_factor = 2 + width_factor = 2 + depth_factor = 2 + data_format = "channels_last" + x = backend.variable(np.ones((1, 2, 2, 2, 3))) + y = backend.resize_volumes( + x, depth_factor, height_factor, width_factor, data_format + ) + self.assertEqual(y.shape.as_list(), [1, 4, 4, 4, 3]) + + data_format = "channels_first" + x = backend.variable(np.ones((1, 3, 2, 2, 2))) + y = backend.resize_volumes( + x, depth_factor, height_factor, width_factor, data_format + ) + self.assertEqual(y.shape.as_list(), [1, 3, 4, 4, 4]) + + # Invalid use: + with self.assertRaises(ValueError): + backend.resize_volumes( + x, + depth_factor, + height_factor, + width_factor, + data_format="unknown", + ) + + def test_repeat_elements(self): + x = backend.variable(np.ones((1, 3, 2))) + y = backend.repeat_elements(x, 3, axis=1) + self.assertEqual(y.shape.as_list(), [1, 9, 2]) + + # Use with a dynamic axis: + if not tf.executing_eagerly(): + x = backend.placeholder(shape=(2, None, 2)) + y = backend.repeat_elements(x, 3, axis=1) + self.assertEqual(y.shape.as_list(), [2, None, 2]) -class BackendGraphTests(tf.test.TestCase, parameterized.TestCase): + def test_repeat(self): + x = backend.variable(np.ones((1, 3))) + y = backend.repeat(x, 2) + self.assertEqual(y.shape.as_list(), [1, 2, 3]) - @test_combinations.generate(test_combinations.combine(mode=['graph'])) - def test_function_placeholder_with_default(self): - with backend.get_graph().as_default(): - x1 = tf.compat.v1.placeholder_with_default( - np.array(2., dtype='float32'), shape=()) - x2 = tf.compat.v1.placeholder_with_default( - np.array(3, dtype='int32'), shape=()) - y1 = x1 + backend.cast(x2, 'float32') - y2 = x1 * backend.cast(x2, 'float32') - f = backend.function([x1, x2], [y1, y2]) - output_values = f([4, 5]) - self.assertEqual(output_values, [9., 20.]) - output_values = f([None, None]) - self.assertEqual(output_values, [5., 6.]) - - def test_function_tf_feed_symbols(self): - # Test Keras backend functions with TF tensor inputs. - with tf.Graph().as_default(), self.cached_session(): - # Test feeding a resource variable to `function`. - x1 = backend.placeholder(shape=()) - x2 = backend.placeholder(shape=()) - lr = backend.learning_phase() # Include a placeholder_with_default. - - y1 = backend.variable(10.) - y2 = 3 - - f = backend.function( - inputs=[x1, x2, lr], - outputs=[x1 + 1, backend.in_train_phase(x2 + 2, x2 - 1)]) - outs = f([y1, y2, None]) # Use default learning_phase value. - self.assertEqual(outs, [11., 2.]) - outs = f([y1, y2, 1]) # Set learning phase value. - self.assertEqual(outs, [11., 5.]) - - # Test triggering a callable refresh by changing the input. - y3 = backend.constant(20.) # Test with tensor - outs = f([y3, y2, None]) - self.assertEqual(outs, [21., 2.]) - - y4 = 4 # Test with non-symbol - outs = f([y4, y2, None]) - self.assertEqual(outs, [5., 2.]) - - # Test with a different dtype - y5 = backend.constant(10., dtype='float64') - outs = f([y5, y2, None]) - self.assertEqual(outs, [11., 2.]) - - def test_function_tf_fetches(self): - # Additional operations can be passed to tf.compat.v1.Session().run() via - # its `fetches` arguments. In contrast to `updates` argument of - # backend.function() these do not have control dependency on `outputs` - # so they can run in parallel. Also they should not contribute to output of - # backend.function(). - with tf.Graph().as_default(), self.cached_session(): - x = backend.variable(0.) - y = backend.variable(0.) - x_placeholder = backend.placeholder(shape=()) - y_placeholder = backend.placeholder(shape=()) - - f = backend.function( - inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder], - updates=[(x, x_placeholder + 1.)], - fetches=[backend.update(y, 5.)]) - output = f([10., 20.]) - self.assertEqual(output, [30.]) - self.assertEqual(backend.get_session().run(fetches=[x, y]), [11., 5.]) - - def test_function_tf_feed_dict(self): - # Additional substitutions can be passed to `tf.compat.v1.Session().run()` - # via its `feed_dict` arguments. Note that the feed_dict is passed once in - # the constructor but we can modify the values in the dictionary. Through - # this feed_dict we can provide additional substitutions besides Keras - # inputs. - with tf.Graph().as_default(), self.cached_session(): - x = backend.variable(0.) - y = backend.variable(0.) - x_placeholder = backend.placeholder(shape=()) - y_placeholder = backend.placeholder(shape=()) - - feed_dict = {y_placeholder: 3.} - fetches = [backend.update(y, y_placeholder * 10.)] - f = backend.function( - inputs=[x_placeholder], - outputs=[x_placeholder + 1.], - updates=[(x, x_placeholder + 10.)], - feed_dict=feed_dict, - fetches=fetches) - output = f([10.]) - self.assertEqual(output, [11.]) - self.assertEqual(backend.get_session().run(fetches=[x, y]), [20., 30.]) - - # updated value in feed_dict will be modified within the K.function() - feed_dict[y_placeholder] = 4. - output = f([20.]) - self.assertEqual(output, [21.]) - self.assertEqual(backend.get_session().run(fetches=[x, y]), [30., 40.]) - - def test_function_tf_run_options_with_run_metadata(self): - with tf.Graph().as_default(), self.cached_session(): - x_placeholder = backend.placeholder(shape=()) - y_placeholder = backend.placeholder(shape=()) - - run_options = tf.compat.v1.RunOptions(output_partition_graphs=True) - run_metadata = tf.compat.v1.RunMetadata() - # enable run_options. - f = backend.function( - inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder], - options=run_options, - run_metadata=run_metadata) - output = f([10., 20.]) - self.assertEqual(output, [30.]) - self.assertNotEmpty(run_metadata.partition_graphs) - # disable run_options. - f1 = backend.function( - inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder], - run_metadata=run_metadata) - output1 = f1([10., 20.]) - self.assertEqual(output1, [30.]) - self.assertEmpty(run_metadata.partition_graphs) - - def test_function_fetch_callbacks(self): - - class CallbackStub: - - def __init__(self): - self.times_called = 0 - self.callback_result = 0 - - def _fetch_callback(self, result): - self.times_called += 1 - self.callback_result = result - - with tf.Graph().as_default(), self.cached_session(): - callback = CallbackStub() - x_placeholder = backend.placeholder(shape=()) - y_placeholder = backend.placeholder(shape=()) - - callback_op = x_placeholder * y_placeholder - - f = backend.function( - inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder]) - f.fetches.append(callback_op) - f.fetch_callbacks[callback_op] = callback._fetch_callback - - _ = f([10., 20.]) - - self.assertEqual(callback.times_called, 1) - self.assertEqual(callback.callback_result, 200) - - def test_get_session_different_graphs(self): - with tf.Graph().as_default(): - x = backend.constant(1) - session = backend.get_session() - self.assertIs(session, backend.get_session((x,))) - self.assertIs(session, backend.get_session()) - with tf.Graph().as_default(): - self.assertIs(session, backend.get_session((x,))) - self.assertIsNot(session, backend.get_session()) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class ControlOpsTests(tf.test.TestCase): + def test_flatten(self): + compare_single_input_op_to_numpy( + backend.flatten, + np.reshape, + input_shape=(4, 7, 6), + np_args=[(4 * 7 * 6,)], + ) + + def test_batch_flatten(self): + compare_single_input_op_to_numpy( + backend.batch_flatten, + np.reshape, + input_shape=(4, 7, 6), + np_args=[(4, 7 * 6)], + ) + + def test_temporal_padding(self): + def ref_op(x, padding): + shape = list(x.shape) + shape[1] += padding[0] + padding[1] + y = np.zeros(tuple(shape)) + y[:, padding[0] : -padding[1], :] = x + return y + + compare_single_input_op_to_numpy( + backend.temporal_padding, + ref_op, + input_shape=(4, 7, 6), + keras_args=[(2, 3)], + np_args=[(2, 3)], + ) + + def test_spatial_2d_padding(self): + def ref_op(x, padding, data_format="channels_last"): + shape = list(x.shape) + if data_format == "channels_last": + shape[1] += padding[0][0] + padding[0][1] + shape[2] += padding[1][0] + padding[1][1] + y = np.zeros(tuple(shape)) + y[ + :, + padding[0][0] : -padding[0][1], + padding[1][0] : -padding[1][1], + :, + ] = x + else: + shape[2] += padding[0][0] + padding[0][1] + shape[3] += padding[1][0] + padding[1][1] + y = np.zeros(tuple(shape)) + y[ + :, + :, + padding[0][0] : -padding[0][1], + padding[1][0] : -padding[1][1], + ] = x + return y + + compare_single_input_op_to_numpy( + backend.spatial_2d_padding, + ref_op, + input_shape=(2, 3, 2, 3), + keras_args=[((2, 3), (1, 2))], + keras_kwargs={"data_format": "channels_last"}, + np_args=[((2, 3), (1, 2))], + np_kwargs={"data_format": "channels_last"}, + ) + compare_single_input_op_to_numpy( + backend.spatial_2d_padding, + ref_op, + input_shape=(2, 3, 2, 3), + keras_args=[((2, 3), (1, 2))], + keras_kwargs={"data_format": "channels_first"}, + np_args=[((2, 3), (1, 2))], + np_kwargs={"data_format": "channels_first"}, + ) + + def test_spatial_3d_padding(self): + def ref_op(x, padding, data_format="channels_last"): + shape = list(x.shape) + if data_format == "channels_last": + shape[1] += padding[0][0] + padding[0][1] + shape[2] += padding[1][0] + padding[1][1] + shape[3] += padding[2][0] + padding[2][1] + y = np.zeros(tuple(shape)) + y[ + :, + padding[0][0] : -padding[0][1], + padding[1][0] : -padding[1][1], + padding[2][0] : -padding[2][1], + :, + ] = x + else: + shape[2] += padding[0][0] + padding[0][1] + shape[3] += padding[1][0] + padding[1][1] + shape[4] += padding[2][0] + padding[2][1] + y = np.zeros(tuple(shape)) + y[ + :, + :, + padding[0][0] : -padding[0][1], + padding[1][0] : -padding[1][1], + padding[2][0] : -padding[2][1], + ] = x + return y + + compare_single_input_op_to_numpy( + backend.spatial_3d_padding, + ref_op, + input_shape=(2, 3, 2, 3, 2), + keras_args=[((2, 3), (1, 2), (2, 3))], + keras_kwargs={"data_format": "channels_last"}, + np_args=[((2, 3), (1, 2), (2, 3))], + np_kwargs={"data_format": "channels_last"}, + ) + compare_single_input_op_to_numpy( + backend.spatial_3d_padding, + ref_op, + input_shape=(2, 3, 2, 3, 2), + keras_args=[((2, 3), (1, 2), (2, 3))], + keras_kwargs={"data_format": "channels_first"}, + np_args=[((2, 3), (1, 2), (2, 3))], + np_kwargs={"data_format": "channels_first"}, + ) - def test_function_switch_basics(self): - x = tf.constant(2.0) - y = tf.constant(3.0) - def xpowy(): - return backend.pow(x, y) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BackendNNOpsTest(tf.test.TestCase, parameterized.TestCase): + def test_bias_add(self): + keras_op = backend.bias_add + np_op = np.add + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 7), input_shape_b=(7,) + ) + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 3, 7), input_shape_b=(7,) + ) + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 3, 5, 7), input_shape_b=(7,) + ) + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 3, 5, 2, 7), input_shape_b=(7,) + ) + + with self.assertRaises((ValueError, tf.errors.InvalidArgumentError)): + x = backend.variable((3, 4)) + b = backend.variable((3, 4)) + backend.bias_add(x, b) + with self.assertRaises(ValueError): + x = backend.variable((3, 4)) + b = backend.variable((4,)) + backend.bias_add(x, b, data_format="unknown") + + def test_bias_add_channels_first(self): + def keras_op(x, b): + return backend.bias_add(x, b, data_format="channels_first") + + def np_op(x, b): + if x.ndim == 3: + b = b.reshape((1, b.shape[0], 1)) + if x.ndim == 4: + b = b.reshape((1, b.shape[0], 1, 1)) + return x + b + + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 3, 7), input_shape_b=(3,) + ) + compare_two_inputs_op_to_numpy( + keras_op, np_op, input_shape_a=(4, 3, 5, 7), input_shape_b=(3,) + ) + + def test_pool2d(self): + val = np.random.random((10, 3, 10, 10)) + x = backend.variable(val) + y = backend.pool2d( + x, + (2, 2), + strides=(1, 1), + padding="valid", + data_format="channels_first", + pool_mode="max", + ) + self.assertEqual(y.shape.as_list(), [10, 3, 9, 9]) + + y = backend.pool2d( + x, + (2, 2), + strides=(1, 1), + padding="valid", + data_format="channels_first", + pool_mode="avg", + ) + self.assertEqual(y.shape.as_list(), [10, 3, 9, 9]) + + val = np.random.random((10, 10, 10, 3)) + x = backend.variable(val) + y = backend.pool2d( + x, + (2, 2), + strides=(1, 1), + padding="valid", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 9, 9, 3]) + + val = np.random.random((10, 10, 10, 3)) + x = backend.variable(val) + y = backend.pool2d( + x, + (2, 2), + strides=(1, 1), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 10, 10, 3]) + + val = np.random.random((10, 10, 10, 3)) + x = backend.variable(val) + y = backend.pool2d( + x, + (2, 2), + strides=(2, 2), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 5, 5, 3]) + + with self.assertRaises(ValueError): + y = backend.pool2d( + x, + (2, 2), + strides=(2, 2), + padding="other", + data_format="channels_last", + ) + with self.assertRaises(ValueError): + y = backend.pool2d(x, (2, 2), strides=(2, 2), data_format="other") + with self.assertRaises(ValueError): + y = backend.pool2d(x, (2, 2, 2), strides=(2, 2)) + with self.assertRaises(ValueError): + y = backend.pool2d(x, (2, 2), strides=(2, 2, 2)) + with self.assertRaises(ValueError): + y = backend.pool2d(x, (2, 2), strides=(2, 2), pool_mode="other") + + def test_pool3d(self): + val = np.random.random((10, 3, 10, 10, 10)) + x = backend.variable(val) + y = backend.pool3d( + x, + (2, 2, 2), + strides=(1, 1, 1), + padding="valid", + data_format="channels_first", + pool_mode="max", + ) + self.assertEqual(y.shape.as_list(), [10, 3, 9, 9, 9]) + + y = backend.pool3d( + x, + (2, 2, 2), + strides=(1, 1, 1), + padding="valid", + data_format="channels_first", + pool_mode="avg", + ) + self.assertEqual(y.shape.as_list(), [10, 3, 9, 9, 9]) + + val = np.random.random((10, 10, 10, 10, 3)) + x = backend.variable(val) + y = backend.pool3d( + x, + (2, 2, 2), + strides=(1, 1, 1), + padding="valid", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 9, 9, 9, 3]) + + val = np.random.random((10, 10, 10, 10, 3)) + x = backend.variable(val) + y = backend.pool3d( + x, + (2, 2, 2), + strides=(1, 1, 1), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 10, 10, 10, 3]) + + val = np.random.random((10, 10, 10, 10, 3)) + x = backend.variable(val) + y = backend.pool3d( + x, + (2, 2, 2), + strides=(2, 2, 2), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 5, 5, 5, 3]) + + def test_conv1d(self): + val = np.random.random((10, 4, 10)) + x = backend.variable(val) + kernel_val = np.random.random((3, 4, 5)) + k = backend.variable(kernel_val) + y = backend.conv1d( + x, k, strides=(1,), padding="valid", data_format="channels_first" + ) + self.assertEqual(y.shape.as_list(), [10, 5, 8]) + + val = np.random.random((10, 10, 4)) + x = backend.variable(val) + y = backend.conv1d( + x, k, strides=(1,), padding="valid", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 8, 5]) + + val = np.random.random((10, 10, 4)) + x = backend.variable(val) + y = backend.conv1d( + x, k, strides=(1,), padding="same", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 10, 5]) + + val = np.random.random((10, 10, 4)) + x = backend.variable(val) + y = backend.conv1d( + x, k, strides=(2,), padding="same", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 5, 5]) + + def test_local_conv_channels_dim(self): + filters = 3 + batch_size = 2 + + for input_shape in [(3, 5), (2, 3, 5), (2, 5, 3, 4)]: + channels_in = input_shape[0] + input_spatial_shape = input_shape[1:] + dim = len(input_spatial_shape) + + inputs = np.random.normal(0, 1, (batch_size,) + input_shape) + inputs_cf = backend.variable(inputs) + + for kernel_size in [1, 2]: + for stride in [1, 2]: + kernel_sizes = (kernel_size,) * dim + strides = (stride,) * dim + + output_shape = tuple( + [ + (i - kernel_size + stride) // stride + for i in input_spatial_shape + ] + ) + + kernel_shape = ( + np.prod(output_shape), + np.prod(kernel_sizes) * channels_in, + filters, + ) + + kernel = np.random.normal( + 0, + 1, + output_shape + + (channels_in, np.prod(kernel_sizes), filters), + ) + + kernel_cf = np.reshape(kernel, kernel_shape) + kernel_cf = backend.variable(kernel_cf) + + conv_cf = backend.local_conv( + inputs_cf, + kernel_cf, + kernel_sizes, + strides, + output_shape, + "channels_first", + ) + + inputs_cl = np.transpose( + inputs, [0, 2] + list(range(3, dim + 2)) + [1] + ) + inputs_cl = backend.variable(inputs_cl) + + kernel_cl = np.reshape( + np.transpose( + kernel, list(range(dim)) + [dim + 1, dim, dim + 2] + ), + kernel_shape, + ) + kernel_cl = backend.variable(kernel_cl) + + conv_cl = backend.local_conv( + inputs_cl, + kernel_cl, + kernel_sizes, + strides, + output_shape, + "channels_last", + ) + + conv_cf = backend.eval(conv_cf) + conv_cl = backend.eval(conv_cl) + + self.assertAllCloseAccordingToType( + conv_cf, + np.transpose( + conv_cl, [0, dim + 1] + list(range(1, dim + 1)) + ), + atol=1e-5, + ) + + @parameterized.named_parameters( + ("local_conv1d", (5, 6), (3,), (1,), (3,)), + ("local_conv2d", (4, 5, 6), (3, 3), (1, 1), (2, 3)), + ) + def test_local_conv_1d_and_2d( + self, input_shape, kernel_sizes, strides, output_shape + ): + filters = 3 + batch_size = 2 + + inputs = np.random.normal(0, 1, (batch_size,) + input_shape) + inputs = backend.variable(inputs) + + kernel = np.random.normal( + 0, + 1, + ( + np.prod(output_shape), + np.prod(kernel_sizes) * input_shape[-1], + filters, + ), + ) + kernel = backend.variable(kernel) + + local_conv = backend.local_conv( + inputs, kernel, kernel_sizes, strides, output_shape, "channels_last" + ) + if len(output_shape) == 1: + local_conv_dim = backend.local_conv1d( + inputs, kernel, kernel_sizes, strides, "channels_last" + ) + else: + local_conv_dim = backend.local_conv2d( + inputs, + kernel, + kernel_sizes, + strides, + output_shape, + "channels_last", + ) + + local_conv = backend.eval(local_conv) + local_conv_dim = backend.eval(local_conv_dim) + + self.assertAllCloseAccordingToType(local_conv, local_conv_dim) + + def test_conv2d(self): + kernel_val = np.random.random((3, 3, 4, 5)) + k = backend.variable(kernel_val) + + # Test channels_first + val = np.random.random((10, 4, 10, 10)) + x = backend.variable(val) + y = backend.conv2d(x, k, padding="valid", data_format="channels_first") + self.assertEqual(y.shape.as_list(), [10, 5, 8, 8]) + + # Test channels_last + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv2d( + x, k, strides=(1, 1), padding="valid", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 8, 8, 5]) + + # Test same padding + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv2d(x, k, padding="same", data_format="channels_last") + self.assertEqual(y.shape.as_list(), [10, 10, 10, 5]) + + # Test dilation_rate + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv2d( + x, + k, + dilation_rate=(2, 2), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 10, 10, 5]) + + # Test strides + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv2d( + x, k, strides=(2, 2), padding="same", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 5, 5, 5]) + + # Test invalid arguments + with self.assertRaises(ValueError): + y = backend.conv2d( + x, k, (2, 2), padding="other", data_format="channels_last" + ) + with self.assertRaises(ValueError): + y = backend.conv2d(x, k, (2, 2), data_format="other") + with self.assertRaises(ValueError): + y = backend.conv2d(x, k, (2, 2, 2)) + + def test_conv2d_transpose(self): + input_size = (7, 8) + kernel_size = (3, 3) + input_depth = 6 + filters = 6 + batch_size = 2 + + kernel_val = np.random.random(kernel_size + (input_depth, filters)) + k = backend.variable(kernel_val) + + # Test channels_first + input_val = np.random.random((batch_size, input_depth) + input_size) + x = backend.variable(input_val) + y = backend.conv2d_transpose( + x, + k, + (batch_size, filters) + input_size, + padding="same", + data_format="channels_first", + ) + self.assertEqual( + tuple(y.shape.as_list()), (batch_size, filters) + input_size + ) + + # Test channels_last + input_val = np.random.random( + (batch_size,) + input_size + (input_depth,) + ) + x = backend.variable(input_val) + y = backend.conv2d_transpose( + x, + k, + (batch_size,) + input_size + (filters,), + padding="same", + data_format="channels_last", + ) + self.assertEqual( + tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,) + ) + + # Test dilation_rate + y = backend.conv2d_transpose( + x, + k, + (batch_size,) + input_size + (filters,), + padding="same", + data_format="channels_last", + dilation_rate=(2, 2), + ) + self.assertEqual( + tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,) + ) + + # Test dilation_rate error + with self.assertRaisesRegex(ValueError, "Expected the 2 dimensions"): + y = backend.conv2d_transpose( + x, + k, + (batch_size,) + input_size + (filters,), + padding="same", + data_format="channels_last", + dilation_rate=(1, 2), + ) + + # Test batch size of None in output_shape + y = backend.conv2d_transpose( + x, + k, + (None,) + input_size + (filters,), + padding="same", + data_format="channels_last", + ) + self.assertEqual( + tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,) + ) + + # Test invalid values + with self.assertRaises(ValueError): + y = backend.conv2d_transpose( + x, k, (2, 2, 8, 9), padding="other", data_format="channels_last" + ) + with self.assertRaises(ValueError): + y = backend.conv2d_transpose( + x, k, (2, 2, 8, 9), data_format="other" + ) + + def test_separable_conv2d(self): + val = np.random.random((10, 4, 10, 10)) + x = backend.variable(val) + depthwise_kernel_val = np.random.random((3, 3, 4, 1)) + pointwise_kernel_val = np.random.random((1, 1, 4, 5)) + dk = backend.variable(depthwise_kernel_val) + pk = backend.variable(pointwise_kernel_val) + y = backend.separable_conv2d( + x, dk, pk, padding="valid", data_format="channels_first" + ) + self.assertEqual(y.shape.as_list(), [10, 5, 8, 8]) + + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.separable_conv2d( + x, + dk, + pk, + strides=(1, 1), + padding="valid", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 8, 8, 5]) + + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.separable_conv2d( + x, + dk, + pk, + strides=(1, 1), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 10, 10, 5]) + + val = np.random.random((10, 10, 10, 4)) + x = backend.variable(val) + y = backend.separable_conv2d( + x, + dk, + pk, + strides=(2, 2), + padding="same", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 5, 5, 5]) + with self.assertRaises(ValueError): + y = backend.separable_conv2d( + x, dk, pk, (2, 2), padding="other", data_format="channels_last" + ) + with self.assertRaises(ValueError): + y = backend.separable_conv2d(x, dk, pk, (2, 2), data_format="other") + with self.assertRaises(ValueError): + y = backend.separable_conv2d(x, dk, pk, (2, 2, 2)) + + def test_conv3d(self): + val = np.random.random((10, 4, 10, 10, 10)) + x = backend.variable(val) + kernel_val = np.random.random((3, 3, 3, 4, 5)) + k = backend.variable(kernel_val) + y = backend.conv3d(x, k, padding="valid", data_format="channels_first") + self.assertEqual(y.shape.as_list(), [10, 5, 8, 8, 8]) + + val = np.random.random((10, 10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv3d( + x, + k, + strides=(1, 1, 1), + padding="valid", + data_format="channels_last", + ) + self.assertEqual(y.shape.as_list(), [10, 8, 8, 8, 5]) + + val = np.random.random((10, 10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv3d( + x, k, strides=(1, 1, 1), padding="same", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 10, 10, 10, 5]) + + val = np.random.random((10, 10, 10, 10, 4)) + x = backend.variable(val) + y = backend.conv3d( + x, k, strides=(2, 2, 2), padding="same", data_format="channels_last" + ) + self.assertEqual(y.shape.as_list(), [10, 5, 5, 5, 5]) + with self.assertRaises(ValueError): + y = backend.conv3d( + x, k, (2, 2, 2), padding="other", data_format="channels_last" + ) + with self.assertRaises(ValueError): + y = backend.conv3d(x, k, (2, 2, 2), data_format="other") + with self.assertRaises(ValueError): + y = backend.conv3d(x, k, (2, 2)) + + def test_rnn(self): + # implement a simple RNN + num_samples = 4 + input_dim = 5 + output_dim = 3 + timesteps = 6 + + input_val = np.random.random( + (num_samples, timesteps, input_dim) + ).astype(np.float32) + init_state_val = np.random.random((num_samples, output_dim)).astype( + np.float32 + ) + w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32) + w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32) + np_mask = np.random.randint(2, size=(num_samples, timesteps)) + + def rnn_step_fn(): + w_i = backend.variable(w_i_val) + w_o = backend.variable(w_o_val) + + def step_function(x, states): + assert len(states) == 1 + prev_output = states[0] + output = backend.dot(x, w_i) + backend.dot(prev_output, w_o) + return output, [output] + + return step_function + + # test default setup + last_output_list = [[], [], [], [], [], []] + outputs_list = [[], [], [], [], [], []] + state_list = [[], [], [], [], [], []] + + rnn_fn = rnn_step_fn() + inputs = backend.variable(input_val) + initial_states = [backend.variable(init_state_val)] + mask = backend.variable(np_mask) + + kwargs_list = [ + {"go_backwards": False, "mask": None}, + {"go_backwards": False, "mask": None, "unroll": True}, + {"go_backwards": True, "mask": None}, + {"go_backwards": True, "mask": None, "unroll": True}, + {"go_backwards": False, "mask": mask}, + {"go_backwards": False, "mask": mask, "unroll": True}, + ] + for i, kwargs in enumerate(kwargs_list): + last_output, outputs, new_states = backend.rnn( + rnn_fn, inputs, initial_states, **kwargs + ) + # check static shape inference + self.assertEqual( + last_output.shape.as_list(), [num_samples, output_dim] + ) + self.assertEqual( + outputs.shape.as_list(), [num_samples, timesteps, output_dim] + ) + for state in new_states: + self.assertEqual( + state.shape.as_list(), [num_samples, output_dim] + ) + + last_output_list[i].append(backend.eval(last_output)) + outputs_list[i].append(backend.eval(outputs)) + self.assertLen(new_states, 1) + state_list[i].append(backend.eval(new_states[0])) + + def assert_list_pairwise(z_list, atol=1e-05): + for z1, z2 in zip(z_list[1:], z_list[:-1]): + self.assertAllClose(z1, z2, atol=atol) + + assert_list_pairwise(last_output_list[0], atol=1e-04) + assert_list_pairwise(outputs_list[0], atol=1e-04) + assert_list_pairwise(state_list[0], atol=1e-04) + assert_list_pairwise(last_output_list[2], atol=1e-04) + assert_list_pairwise(outputs_list[2], atol=1e-04) + assert_list_pairwise(state_list[2], atol=1e-04) + + for l, u_l in zip(last_output_list[0], last_output_list[1]): + self.assertAllClose(l, u_l, atol=1e-04) + + for o, u_o in zip(outputs_list[0], outputs_list[1]): + self.assertAllClose(o, u_o, atol=1e-04) + + for s, u_s in zip(state_list[0], state_list[1]): + self.assertAllClose(s, u_s, atol=1e-04) + + for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]): + self.assertAllClose(b_l, b_u_l, atol=1e-04) + + for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]): + self.assertAllClose(b_o, b_u_o, atol=1e-04) + + for b_s, b_u_s in zip(state_list[2], state_list[3]): + self.assertAllClose(b_s, b_u_s, atol=1e-04) + + def test_rnn_additional_states(self): + # implement a simple RNN + num_samples = 4 + input_dim = 5 + output_dim = 3 + timesteps = 6 + + input_val = np.random.random( + (num_samples, timesteps, input_dim) + ).astype(np.float32) + init_state_val = np.random.random((num_samples, output_dim)).astype( + np.float32 + ) + w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32) + w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32) + np_mask = np.random.randint(2, size=(num_samples, timesteps)) + + def rnn_step_fn(): + w_i = backend.variable(w_i_val) + w_o = backend.variable(w_o_val) + + def step_function(x, states): + assert len(states) == 2 + prev_output = states[0] + output = backend.dot(x, w_i) + backend.dot(prev_output, w_o) + return output, [ + output, + backend.concatenate([output, output], axis=-1), + ] + + return step_function + + # test default setup + last_output_list = [[], [], [], [], [], []] + outputs_list = [[], [], [], [], [], []] + state_list = [[], [], [], [], [], []] + additional_state_list = [[], [], [], [], [], []] + + rnn_fn = rnn_step_fn() + inputs = backend.variable(input_val) + initial_states = [ + backend.variable(init_state_val), + tf.convert_to_tensor( + np.concatenate([init_state_val, init_state_val], axis=-1) + ), + ] + mask = backend.variable(np_mask) + + kwargs_list = [ + {"go_backwards": False, "mask": None}, + {"go_backwards": False, "mask": None, "unroll": True}, + {"go_backwards": True, "mask": None}, + {"go_backwards": True, "mask": None, "unroll": True}, + {"go_backwards": False, "mask": mask}, + {"go_backwards": False, "mask": mask, "unroll": True}, + ] + for i, kwargs in enumerate(kwargs_list): + last_output, outputs, new_states = backend.rnn( + rnn_fn, inputs, initial_states, **kwargs + ) + # check static shape inference + self.assertEqual( + last_output.shape.as_list(), [num_samples, output_dim] + ) + self.assertEqual( + outputs.shape.as_list(), [num_samples, timesteps, output_dim] + ) + # for state in new_states: + # self.assertEqual(state.shape.as_list(), + # [num_samples, output_dim]) + self.assertEqual( + new_states[0].shape.as_list(), [num_samples, output_dim] + ) + self.assertEqual( + new_states[1].shape.as_list(), [num_samples, 2 * output_dim] + ) + + last_output_list[i].append(backend.eval(last_output)) + outputs_list[i].append(backend.eval(outputs)) + self.assertLen(new_states, 2) + state_list[i].append(backend.eval(new_states[0])) + additional_state_list[i].append(backend.eval(new_states[1])) + + def assert_list_pairwise(z_list, atol=1e-05): + for z1, z2 in zip(z_list[1:], z_list[:-1]): + self.assertAllClose(z1, z2, atol=atol) + + assert_list_pairwise(last_output_list[0], atol=1e-04) + assert_list_pairwise(outputs_list[0], atol=1e-04) + assert_list_pairwise(state_list[0], atol=1e-04) + assert_list_pairwise(additional_state_list[0], atol=1e-04) + assert_list_pairwise(last_output_list[2], atol=1e-04) + assert_list_pairwise(outputs_list[2], atol=1e-04) + assert_list_pairwise(state_list[2], atol=1e-04) + assert_list_pairwise(additional_state_list[2], atol=1e-04) + + for l, u_l in zip(last_output_list[0], last_output_list[1]): + self.assertAllClose(l, u_l, atol=1e-04) + + for o, u_o in zip(outputs_list[0], outputs_list[1]): + self.assertAllClose(o, u_o, atol=1e-04) + + for s, u_s in zip(state_list[0], state_list[1]): + self.assertAllClose(s, u_s, atol=1e-04) + + for s, u_s in zip( + additional_state_list[0], additional_state_list[1] + ): + self.assertAllClose(s, u_s, atol=1e-04) + + for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]): + self.assertAllClose(b_l, b_u_l, atol=1e-04) + + for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]): + self.assertAllClose(b_o, b_u_o, atol=1e-04) + + for b_s, b_u_s in zip(state_list[2], state_list[3]): + self.assertAllClose(b_s, b_u_s, atol=1e-04) + + for s, u_s in zip( + additional_state_list[2], additional_state_list[3] + ): + self.assertAllClose(s, u_s, atol=1e-04) + + def test_rnn_output_and_state_masking_independent(self): + num_samples = 2 + num_timesteps = 4 + state_and_io_size = 2 + mask_last_num_timesteps = 2 # for second sample only + + # a step function that just outputs inputs, + # but increments states +1 per timestep + def step_function(inputs, states): + return inputs, [s + 1 for s in states] + + inputs_vals = np.random.random( + (num_samples, num_timesteps, state_and_io_size) + ) + initial_state_vals = np.random.random((num_samples, state_and_io_size)) + # masking of two last timesteps for second sample only + mask_vals = np.ones((num_samples, num_timesteps)) + mask_vals[1, -mask_last_num_timesteps:] = 0 + + # outputs expected to be same as inputs for the first sample + expected_outputs = inputs_vals.copy() + # but for the second sample all outputs in masked region should be the + # same as last output before masked region + expected_outputs[1, -mask_last_num_timesteps:] = expected_outputs[ + 1, -(mask_last_num_timesteps + 1) + ] + + expected_last_state = initial_state_vals.copy() + # first state should be incremented for every timestep (no masking) + expected_last_state[0] += num_timesteps + # second state should not be incremented for last two timesteps + expected_last_state[1] += num_timesteps - mask_last_num_timesteps + + # verify same expected output for `unroll=true/false` + inputs = backend.variable(inputs_vals) + initial_states = [backend.variable(initial_state_vals)] + mask = backend.variable(mask_vals) + for unroll in [True, False]: + _, outputs, last_states = backend.rnn( + step_function, + inputs, + initial_states, + mask=mask, + unroll=unroll, + input_length=num_timesteps if unroll else None, + ) + + self.assertAllClose(backend.eval(outputs), expected_outputs) + self.assertAllClose( + backend.eval(last_states[0]), expected_last_state + ) + + def test_rnn_output_num_dim_larger_than_2_masking(self): + num_samples = 3 + num_timesteps = 4 + num_features = 5 + + def step_function(inputs, states): + outputs = backend.tile(backend.expand_dims(inputs), [1, 1, 2]) + return outputs, [backend.identity(s) for s in states] + # Note: cannot just return states (which can be a problem) -> + # tensorflow/python/ops/resource_variable_ops.py", line 824, in + # set_shape NotImplementedError: ResourceVariable does not implement + # set_shape() + + inputs_vals = np.random.random( + (num_samples, num_timesteps, num_features) + ) + initial_state_vals = np.random.random((num_samples, 6)) + mask_vals = np.ones((num_samples, num_timesteps)) + mask_vals[-1, -1] = 0 # final timestep masked for last sample + + expected_outputs = np.repeat(inputs_vals[..., None], repeats=2, axis=-1) + # for the last sample, the final timestep (in masked region) should be + # the same as the second to final output (before masked region) + expected_outputs[-1, -1] = expected_outputs[-1, -2] + + inputs = backend.variable(inputs_vals) + initial_states = [backend.variable(initial_state_vals)] + mask = backend.variable(mask_vals) + for unroll in [True, False]: + _, outputs, _ = backend.rnn( + step_function, + inputs, + initial_states, + mask=mask, + unroll=unroll, + input_length=num_timesteps if unroll else None, + ) + + self.assertAllClose(backend.eval(outputs), expected_outputs) + + def test_rnn_state_num_dim_larger_than_2_masking(self): + num_samples = 3 + num_timesteps = 4 + + def step_function(inputs, states): + return inputs, [s + 1 for s in states] + + inputs_vals = np.random.random((num_samples, num_timesteps, 5)) + initial_state_vals = np.random.random((num_samples, 6, 7)) + mask_vals = np.ones((num_samples, num_timesteps)) + mask_vals[0, -2:] = 0 # final two timesteps masked for first sample + + expected_last_state = initial_state_vals.copy() + expected_last_state[0] += num_timesteps - 2 + expected_last_state[1:] += num_timesteps + + inputs = backend.variable(inputs_vals) + initial_states = [backend.variable(initial_state_vals)] + mask = backend.variable(mask_vals) + for unroll in [True, False]: + _, _, last_states = backend.rnn( + step_function, + inputs, + initial_states, + mask=mask, + unroll=unroll, + input_length=num_timesteps if unroll else None, + ) + + self.assertAllClose( + backend.eval(last_states[0]), expected_last_state + ) + + def test_rnn_function_jit_compile_no_unroll_input_length_none(self): + num_samples = 3 + num_timesteps = 4 + + def step_function(inputs, states): + return inputs, [s + 1 for s in states] + + inputs_vals = np.random.random((num_samples, num_timesteps, 5)) + initial_state_vals = np.random.random((num_samples, 6, 7)) + mask_vals = np.ones((num_samples, num_timesteps)) + mask_vals[0, -2:] = 0 # final two timesteps masked for first sample + + expected_last_state = initial_state_vals.copy() + expected_last_state[0] += num_timesteps - 2 + expected_last_state[1:] += num_timesteps + + inputs = backend.variable(inputs_vals) + initial_states = [backend.variable(initial_state_vals)] + mask = backend.variable(mask_vals) + + @tf.function(jit_compile=True) + def fn(): + _, _, last_states = backend.rnn( + step_function, + inputs, + initial_states, + mask=mask, + unroll=False, + input_length=None, + ) + return last_states + + last_states = fn() + self.assertAllClose(backend.eval(last_states[0]), expected_last_state) + + def test_batch_normalization(self): + g_val = np.random.random((3,)) + b_val = np.random.random((3,)) + gamma = backend.variable(g_val) + beta = backend.variable(b_val) + + # 3D NHC case + val = np.random.random((10, 5, 3)) + x = backend.variable(val) + mean, var = tf.nn.moments(x, (0, 1), None, None, False) + normed = backend.batch_normalization( + x, mean, var, beta, gamma, axis=-1, epsilon=1e-3 + ) + self.assertEqual(normed.shape.as_list(), [10, 5, 3]) + + # 4D NHWC case + val = np.random.random((10, 5, 5, 3)) + x = backend.variable(val) + mean, var = tf.nn.moments(x, (0, 1, 2), None, None, False) + normed = backend.batch_normalization( + x, mean, var, beta, gamma, axis=-1, epsilon=1e-3 + ) + self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3]) + + # 4D NCHW case + if not tf.executing_eagerly(): + # Eager CPU kernel for NCHW does not exist. + val = np.random.random((10, 3, 5, 5)) + x = backend.variable(val) + mean, var = tf.nn.moments(x, (0, 2, 3), None, None, False) + normed = backend.batch_normalization( + x, mean, var, beta, gamma, axis=1, epsilon=1e-3 + ) + self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5]) + + def test_normalize_batch_in_training(self): + val = np.random.random((10, 3, 10, 10)) + x = backend.variable(val) + reduction_axes = (0, 2, 3) + + g_val = np.random.random((3,)) + b_val = np.random.random((3,)) + gamma = backend.variable(g_val) + beta = backend.variable(b_val) + normed, mean, var = backend.normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=1e-3 + ) + self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10]) + self.assertEqual( + mean.shape.as_list(), + [ + 3, + ], + ) + self.assertEqual( + var.shape.as_list(), + [ + 3, + ], + ) + + # case: gamma=None + gamma = None + normed, mean, var = backend.normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=1e-3 + ) + self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10]) + self.assertEqual( + mean.shape.as_list(), + [ + 3, + ], + ) + self.assertEqual( + var.shape.as_list(), + [ + 3, + ], + ) + + # case: beta=None + beta = None + normed, mean, var = backend.normalize_batch_in_training( + x, gamma, beta, reduction_axes, epsilon=1e-3 + ) + self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10]) + self.assertEqual( + mean.shape.as_list(), + [ + 3, + ], + ) + self.assertEqual( + var.shape.as_list(), + [ + 3, + ], + ) + + def test_dropout(self): + inputs = tf.ones((200, 200)) + outputs = backend.dropout(inputs, 0.2) + outputs_val = backend.eval(outputs) + self.assertEqual(np.min(outputs_val), 0) + self.assertAllClose(np.count_nonzero(outputs_val), 32000, atol=1000) + # Test noise shape + outputs = backend.dropout(inputs, 0.2, noise_shape=(200, 1)) + outputs_val = backend.eval(outputs) + # Make sure the whole column gets the same dropout + self.assertEqual(np.min(outputs_val[0, :]), np.max(outputs_val[0, :])) - def ypowx(): - return backend.pow(y, x) - tensor = backend.switch(backend.less(x, y), xpowy, ypowx) - self.assertEqual(backend.eval(tensor), [8.0]) +class BackendCrossEntropyLossesTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_crossentropy_with_sigmoid(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = backend.sigmoid(logits) + p = tf.identity(tf.identity(p)) + result = self.evaluate(backend.binary_crossentropy(t, p)) + self.assertArrayNear(result[0], [8.0, 0.313, 1.313], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_loss(self): + t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + + p = backend.constant( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06], [0.05, 0.01, 0.94]] + ) + result = backend.categorical_crossentropy(t, p) + self.assertArrayNear(self.evaluate(result), [0.105, 0.116, 0.062], 1e-3) + + p = backend.constant( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.01], [0.05, 0.06, 0.94]] + ) + result = backend.categorical_crossentropy(t, p, axis=0) + self.assertArrayNear(self.evaluate(result), [0.105, 0.116, 0.062], 1e-3) + + p = backend.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + result = (backend.categorical_crossentropy(t, p, from_logits=True),) + self.assertArrayNear(self.evaluate(result)[0], [0.002, 0, 0.17], 1e-3) + + p = backend.constant( + [[8.0, 0.0, 2.0], [1.0, 9.0, 3.0], [1.0, 1.0, 5.0]] + ) + result = ( + backend.categorical_crossentropy(t, p, from_logits=True, axis=0), + ) + self.assertArrayNear(self.evaluate(result)[0], [0.002, 0, 0.17], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_loss_with_unknown_rank_tensor(self): + t = backend.placeholder() + p = backend.placeholder() + o = backend.categorical_crossentropy(t, p) + + t_val = tf.convert_to_tensor( + [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]] + ) + p_val = tf.convert_to_tensor( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06], [0.05, 0.01, 0.94]] + ) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.105, 0.116, 0.062], 1e-3) + + # With axis set + o = backend.categorical_crossentropy(t, p, axis=0) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.105, 0.065, 0.111], 1e-3) + + # from logits + p_val = tf.convert_to_tensor( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + o = backend.categorical_crossentropy(t, p, from_logits=True) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.002, 0, 0.17], 1e-3) + + # from logits and axis set + o = backend.categorical_crossentropy(t, p, from_logits=True, axis=0) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.002, 0.003, 0.036], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_with_softmax(self): + t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + logits = backend.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + p = backend.softmax(logits) + p = tf.identity(tf.identity(p)) + result = self.evaluate(backend.categorical_crossentropy(t, p)) + self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_loss(self): + t = backend.constant([0, 1, 2]) + + p = backend.constant( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06], [0.05, 0.01, 0.94]] + ) + result = backend.sparse_categorical_crossentropy(t, p) + self.assertArrayNear(self.evaluate(result), [0.105, 0.116, 0.062], 1e-3) + + p = backend.constant( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.01], [0.05, 0.06, 0.94]] + ) + result = backend.sparse_categorical_crossentropy(t, p, axis=0) + self.assertArrayNear(self.evaluate(result), [0.105, 0.116, 0.062], 1e-3) + + p = backend.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + result = ( + backend.sparse_categorical_crossentropy(t, p, from_logits=True), + ) + self.assertArrayNear(self.evaluate(result)[0], [0.002, 0, 0.17], 1e-3) + + p = backend.constant( + [[8.0, 0.0, 2.0], [1.0, 9.0, 3.0], [1.0, 1.0, 5.0]] + ) + result = ( + backend.sparse_categorical_crossentropy( + t, p, from_logits=True, axis=0 + ), + ) + self.assertArrayNear(self.evaluate(result)[0], [0.002, 0, 0.17], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_loss_with_ignore_class(self): + tests = (([255, 1, 2, 2], 255), ([-1, 1, 2, 2], -1)) + p = backend.softmax( + backend.constant( + [ + [1.8, 1.2, 0.5], + [0.2, 3.8, 0.8], + [1.1, 0.4, 3.4], + [1.3, 0.7, 3.8], + ] + ) + ) + + for t, ignore_class in tests: + t = backend.constant(t) + result = backend.sparse_categorical_crossentropy( + t, p, ignore_class=ignore_class + ) + self.assertArrayNear( + self.evaluate(result), + [0.0, 0.07428224, 0.13980183, 0.11967831], + 1e-3, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_cce_loss_with_ignore_class_for_segmentation(self): + t = backend.constant( + [[[0, 2], [-1, -1]], [[0, 2], [-1, -1]], [[0, 0], [0, 0]]] + ) + p = backend.constant( + [ + [ + [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]], + [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]], + ], + [ + [[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]], + [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]], + ], + [ + [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0]], + [[0.1, 0.9, 0.0], [0.2, 0.8, 0.0]], + ], + ] + ) + + expected_result = [ + [[0.0, 0.0], [0.0, 0.0]], + [[0.0, 0.693148], [0.0, 0.0]], + [[0.0, 0.0], [2.302585, 1.609438]], + ] + + # total_entries = 12 + # valid_entries = 8 + expected_mask = backend.constant( + [ + [[True, True], [False, False]], + [[True, True], [False, False]], + [[True, True], [True, True]], + ] + ) + + result = backend.sparse_categorical_crossentropy(t, p, ignore_class=-1) + mask = losses_utils.get_mask(result) + + self.assertIsNotNone( + mask, + "expected sparse_categorical_crossentropy to set the " + "`_keras_mask` attribute when `ignore_class is not None`, " + "which indicates which loss values are valid.", + ) + + result = self.evaluate(result) + mask = self.evaluate(mask) + self.assertAllEqual(mask, expected_mask) + self.assertAllClose(result, expected_result, atol=1e-6) + + @test_combinations.generate(test_combinations.combine(mode=["graph"])) + def test_sparse_categorical_crossentropy_loss_with_unknown_rank_tensor( + self, + ): + # This test only runs in graph because the TF op layer is not supported + # yet for sparse ops. + t = backend.placeholder() + p = backend.placeholder() + o = backend.sparse_categorical_crossentropy(t, p) + + t_val = tf.convert_to_tensor([0, 1, 2]) + p_val = tf.convert_to_tensor( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06], [0.05, 0.01, 0.94]] + ) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.105, 0.116, 0.062], 1e-3) + + # With axis set + with self.assertRaisesRegex( + ValueError, + "Cannot compute sparse categorical crossentropy with `axis=0`", + ): + o = backend.sparse_categorical_crossentropy(t, p, axis=0) + f = backend.function([t, p], o) + + _ = f([t_val, p_val]) + + # from logits + p_val = tf.convert_to_tensor( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + o = backend.sparse_categorical_crossentropy(t, p, from_logits=True) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.002, 0, 0.17], 1e-3) + + # from logits and axis set + with self.assertRaisesRegex( + ValueError, + "Cannot compute sparse categorical crossentropy with `axis=0`", + ): + o = backend.sparse_categorical_crossentropy( + t, p, from_logits=True, axis=0 + ) + f = backend.function([t, p], o) + + _ = f([t_val, p_val]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_with_softmax(self): + t = backend.constant([0, 1, 2]) + logits = backend.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + p = backend.softmax(logits) + p = tf.identity(tf.identity(p)) + result = self.evaluate(backend.sparse_categorical_crossentropy(t, p)) + self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_crossentropy_from_logits_no_warnings(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + with warnings.catch_warnings(record=True) as w: + self.evaluate( + backend.binary_crossentropy(t, logits, from_logits=True) + ) + self.assertEmpty(w) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_crossentropy_from_logits_with_sigmoid(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = activations.sigmoid(logits) + with warnings.catch_warnings(record=True) as w: + self.evaluate(backend.binary_crossentropy(t, p, from_logits=True)) + self.assertLen(w, 1) + self.assertIn("received `from_logits=True`", str(w[0].message)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_from_logits_with_softmax(self): + t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + logits = backend.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + p = activations.softmax(logits) + with warnings.catch_warnings(record=True) as w: + self.evaluate( + backend.categorical_crossentropy(t, p, from_logits=True) + ) + self.assertLen(w, 1) + self.assertIn("received `from_logits=True`", str(w[0].message)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_from_logits_with_softmax(self): + t = backend.constant([0, 1, 2]) + logits = backend.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + p = activations.softmax(logits) + with warnings.catch_warnings(record=True) as w: + self.evaluate( + backend.sparse_categorical_crossentropy(t, p, from_logits=True) + ) + self.assertLen(w, 1) + self.assertIn("received `from_logits=True`", str(w[0].message)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_focal_crossentropy_with_sigmoid(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = backend.sigmoid(logits) + p = tf.identity(tf.identity(p)) + result = self.evaluate( + backend.binary_focal_crossentropy(t, p, gamma=2.0) + ) + self.assertArrayNear(result[0], [7.995, 0.022, 0.701], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_focal_crossentropy_with_softmax(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = backend.softmax(logits) + p = tf.identity(tf.identity(p)) + result = self.evaluate( + backend.categorical_focal_crossentropy(t, p, gamma=2.0) + ) + self.assertArrayNear(result, [1.747], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_focal_crossentropy_from_logits(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + result = self.evaluate( + backend.binary_focal_crossentropy( + target=t, + output=logits, + gamma=2.0, + from_logits=True, + ) + ) + self.assertArrayNear(result[0], [7.995, 0.022, 0.701], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_focal_crossentropy_from_logits(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + result = self.evaluate( + backend.categorical_focal_crossentropy( + target=t, + output=logits, + from_logits=True, + ) + ) + self.assertArrayNear(result, [1.7472], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_focal_crossentropy_no_focal_effect_with_zero_gamma(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = backend.sigmoid(logits) + p = tf.identity(tf.identity(p)) + gamma = 0 + focal_result = self.evaluate( + backend.binary_focal_crossentropy( + target=t, + output=p, + gamma=gamma, + ) + ) + non_focal_result = self.evaluate(backend.binary_crossentropy(t, p)) + self.assertArrayNear(focal_result[0], non_focal_result[0], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_focal_crossentropy_no_focal_effect(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = backend.softmax(logits) + p = tf.identity(tf.identity(p)) + focal_result = self.evaluate( + backend.categorical_focal_crossentropy( + target=t, + output=p, + gamma=0.0, + alpha=1.0, + ) + ) + non_focal_result = self.evaluate(backend.categorical_crossentropy(t, p)) + self.assertArrayNear(focal_result, non_focal_result, 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_weighted_focal_crossentropy_with_sigmoid(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + p = backend.sigmoid(logits) + p = tf.identity(tf.identity(p)) + result = self.evaluate( + backend.binary_focal_crossentropy( + target=t, + output=p, + apply_class_balancing=True, + ) + ) + self.assertArrayNear(result[0], [5.996, 0.006, 0.526], 1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_weighted_focal_crossentropy_from_logits(self): + t = backend.constant([[0, 1, 0]]) + logits = backend.constant([[8.0, 1.0, 1.0]]) + result = self.evaluate( + backend.binary_focal_crossentropy( + target=t, + output=logits, + apply_class_balancing=True, + from_logits=True, + ) + ) + self.assertArrayNear(result[0], [5.996, 0.006, 0.526], 1e-3) - tensor = backend.switch(backend.greater(x, y), xpowy, ypowx) - self.assertEqual(backend.eval(tensor), [9.0]) - def test_unequal_rank(self): - x = tf.convert_to_tensor( - np.array([[1, 2, 3], [4, 5, 6]]), dtype='float32') - y = tf.convert_to_tensor( - np.array([1, 2, 3]), dtype='float32') +@tf_test_utils.with_control_flow_v2 +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TestCTC(tf.test.TestCase): + def test_ctc_decode(self): + depth = 6 + seq_len_0 = 5 + input_prob_matrix_0 = np.asarray( + [ + [0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908], + [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517], + [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763], + [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655], + [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878], + # Random entry added in at time=5 + [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671], + ], + dtype=np.float32, + ) + + # len max_time_steps array of batch_size x depth matrices + inputs = [ + input_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0) + ] + 2 * [ # Pad to max_time_steps = 8 + np.zeros((1, depth), dtype=np.float32) + ] + + inputs = backend.variable(np.asarray(inputs).transpose((1, 0, 2))) + + # batch_size length vector of sequence_lengths + input_length = backend.variable(np.array([seq_len_0], dtype=np.int32)) + # batch_size length vector of negative log probabilities + log_prob_truth = np.array( + [-3.5821197, -3.777835], # output beam 0 # output beam 1 + np.float32, + )[np.newaxis, :] + + decode_truth = [ + np.array([1, 0, -1, -1, -1, -1, -1]), + np.array([0, 1, 0, -1, -1, -1, -1]), + ] + beam_width = 2 + top_paths = 2 + + decode_pred_tf, log_prob_pred_tf = backend.ctc_decode( + inputs, + input_length, + greedy=False, + beam_width=beam_width, + top_paths=top_paths, + ) + + self.assertEqual(len(decode_pred_tf), top_paths) + log_prob_pred = backend.eval(log_prob_pred_tf) + for i in range(top_paths): + self.assertTrue( + np.all(decode_truth[i] == backend.eval(decode_pred_tf[i])) + ) + self.assertAllClose(log_prob_truth, log_prob_pred) + + def test_ctc_batch_cost(self): + with self.cached_session(): + label_lens = np.expand_dims(np.asarray([5, 4]), 1) + input_lens = np.expand_dims( + np.asarray([5, 5]), 1 + ) # number of timesteps + loss_log_probs = [3.34211, 5.42262] + + # dimensions are batch x time x categories + labels = np.asarray([[0, 1, 2, 1, 0], [0, 1, 1, 0, -1]]) + inputs = np.asarray( + [ + [ + [ + 0.633766, + 0.221185, + 0.0917319, + 0.0129757, + 0.0142857, + 0.0260553, + ], + [ + 0.111121, + 0.588392, + 0.278779, + 0.0055756, + 0.00569609, + 0.010436, + ], + [ + 0.0357786, + 0.633813, + 0.321418, + 0.00249248, + 0.00272882, + 0.0037688, + ], + [ + 0.0663296, + 0.643849, + 0.280111, + 0.00283995, + 0.0035545, + 0.00331533, + ], + [ + 0.458235, + 0.396634, + 0.123377, + 0.00648837, + 0.00903441, + 0.00623107, + ], + ], + [ + [ + 0.30176, + 0.28562, + 0.0831517, + 0.0862751, + 0.0816851, + 0.161508, + ], + [ + 0.24082, + 0.397533, + 0.0557226, + 0.0546814, + 0.0557528, + 0.19549, + ], + [ + 0.230246, + 0.450868, + 0.0389607, + 0.038309, + 0.0391602, + 0.202456, + ], + [ + 0.280884, + 0.429522, + 0.0326593, + 0.0339046, + 0.0326856, + 0.190345, + ], + [ + 0.423286, + 0.315517, + 0.0338439, + 0.0393744, + 0.0339315, + 0.154046, + ], + ], + ], + dtype=np.float32, + ) + + labels = backend.variable(labels, dtype="int32") + inputs = backend.variable(inputs, dtype="float32") + input_lens = backend.variable(input_lens, dtype="int32") + label_lens = backend.variable(label_lens, dtype="int32") + res = backend.eval( + backend.ctc_batch_cost(labels, inputs, input_lens, label_lens) + ) + self.assertAllClose(res[:, 0], loss_log_probs, atol=1e-05) + + # test when batch_size = 1, that is, one sample only + ref = [3.34211] + input_lens = np.expand_dims(np.asarray([5]), 1) + label_lens = np.expand_dims(np.asarray([5]), 1) + + labels = np.asarray([[0, 1, 2, 1, 0]]) + inputs = np.asarray( + [ + [ + [ + 0.633766, + 0.221185, + 0.0917319, + 0.0129757, + 0.0142857, + 0.0260553, + ], + [ + 0.111121, + 0.588392, + 0.278779, + 0.0055756, + 0.00569609, + 0.010436, + ], + [ + 0.0357786, + 0.633813, + 0.321418, + 0.00249248, + 0.00272882, + 0.0037688, + ], + [ + 0.0663296, + 0.643849, + 0.280111, + 0.00283995, + 0.0035545, + 0.00331533, + ], + [ + 0.458235, + 0.396634, + 0.123377, + 0.00648837, + 0.00903441, + 0.00623107, + ], + ] + ], + dtype=np.float32, + ) + + k_labels = backend.variable(labels, dtype="int32") + k_inputs = backend.variable(inputs, dtype="float32") + k_input_lens = backend.variable(input_lens, dtype="int32") + k_label_lens = backend.variable(label_lens, dtype="int32") + res = backend.eval( + backend.ctc_batch_cost( + k_labels, k_inputs, k_input_lens, k_label_lens + ) + ) + self.assertAllClose(res[:, 0], ref, atol=1e-05) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TestRandomOps(tf.test.TestCase): + def test_random_normal(self): + np.random.seed(123) + x = backend.random_normal((500, 500)) + val = backend.eval(x) + self.assertAllClose(np.mean(val), 0.0, atol=0.01) + self.assertAllClose(np.std(val), 1.0, atol=0.01) + + def test_random_uniform(self): + np.random.seed(123) + x = backend.random_uniform((500, 500)) + val = backend.eval(x) + self.assertAllClose(np.mean(val), 0.5, atol=0.01) + self.assertAllClose(np.max(val), 1.0, atol=0.01) + self.assertAllClose(np.min(val), 0.0, atol=0.01) + + def test_random_binomial(self): + np.random.seed(123) + x = backend.random_binomial((500, 500), p=0.5) + self.assertAllClose(np.mean(backend.eval(x)), 0.5, atol=0.01) + + def test_truncated_normal(self): + np.random.seed(123) + x = backend.truncated_normal((500, 500), mean=0.0, stddev=1.0) + x = backend.truncated_normal((1000, 1000), mean=0.0, stddev=1.0) + y = backend.eval(x) + self.assertAllClose(np.mean(y), 0.0, atol=0.01) + self.assertAllClose(np.std(y), 0.88, atol=0.01) + self.assertAllClose(np.max(y), 2.0, atol=0.01) + self.assertAllClose(np.min(y), -2.0, atol=0.01) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class FunctionTest(tf.test.TestCase): + def test_function_basics(self): + if tf.executing_eagerly(): + self.skipTest("eager backend.function does not support updates") + x1 = backend.placeholder(shape=(), dtype="float32") + x2 = backend.placeholder(shape=(), dtype="int32") + v = backend.variable(10.0) + + y1 = x1 + backend.cast(x2, "float32") + v + y2 = x1 * backend.cast(x2, "float32") + + with tf.control_dependencies([y1]): + u = backend.update(v, x1) + + f = backend.function([x1, x2], [y1, y2], updates=[u]) + output_values = f([2, 3]) + self.assertEqual(output_values, [15.0, 6.0]) + self.assertEqual(backend.eval(v), 2.0) + + def test_function_dict_outputs(self): + x_ph = backend.placeholder(shape=(), name="x") + y_ph = backend.placeholder(shape=(), name="y") + outputs = {"x*y": y_ph * x_ph, "x*x": x_ph * x_ph} + + f = backend.function(inputs=[x_ph, y_ph], outputs=outputs) + x, y = 2.0, 5.0 + results = f([x, y]) + + self.assertEqual(results["x*y"], 10.0) + self.assertEqual(results["x*x"], 4) + + def test_function_dict_inputs(self): + placeholders = { + "x": backend.placeholder(shape=()), + "y": backend.placeholder(shape=()), + } + outputs = [placeholders["x"] * placeholders["y"]] + + f = backend.function(inputs=placeholders, outputs=outputs) + results = f({"x": 2.0, "y": 3.0}) + self.assertEqual(results[0], 6.0) + + def test_function_variable_inputs(self): + placeholders = { + "x": backend.placeholder(shape=()), + "y": backend.placeholder(shape=()), + } + outputs = [placeholders["x"] * placeholders["y"]] + + f = backend.function(inputs=placeholders, outputs=outputs) + results = f({"x": backend.variable(2.0), "y": 3.0}) + self.assertEqual(results[0], 6.0) + + def test_function_composite_variable_inputs(self): + if context.executing_eagerly(): + self.skipTest( + "Only graph mode flattens composite tensor inputs into flat " + "tensors." + ) + + class Spec(tf.TypeSpec): + value_type = property(lambda self: CompositeVariable) + + def _serialize(self): + pass + + def _component_specs(self): + pass + + def _to_components(self, value): + return value.variables + + def _from_components(self, variable_list): + return CompositeVariable(variable_list) + + class CompositeVariable(tf.__internal__.CompositeTensor): + def __init__(self, variable_list): + self.variables = variable_list + + @property + def _type_spec(self): + return Spec() + + def _convert_variables_to_tensors(self): + self.variables = tf.nest.map_structure( + tf_utils.convert_variables_to_tensors, self.variables + ) + return self + + placeholders = { + "x": backend.placeholder(shape=()), + "y": backend.placeholder(shape=()), + } + outputs = [placeholders["x"] * placeholders["y"]] + + f = backend.function(inputs=placeholders, outputs=outputs) + results = f({"x": CompositeVariable([backend.variable(2.0)]), "y": 3.0}) + self.assertEqual(results[0], 6.0) + + def test_function_single_input_output(self): + x_ph = backend.placeholder(shape=(), name="x") + output = x_ph * x_ph + f = backend.function(x_ph, output) + result = f(2.0) + self.assertEqual(result, 4.0) + + def test_tuple_updates(self): + if tf.executing_eagerly(): + self.skipTest("eager backend.function does not support updates") + + x_ph = backend.placeholder(ndim=2) + v = backend.variable(np.ones((4, 2))) + output = x_ph**2 + v + new_v = v + x_ph + f = backend.function(x_ph, output, updates=[(v, new_v)]) + input_val = np.random.random((4, 2)) + result = f(input_val) + self.assertAllClose(result, input_val**2 + 1) + self.assertAllClose(backend.get_value(v), np.ones((4, 2)) + input_val) - def true_func(): - return x - def false_func(): - return y +class BackendGraphTests(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate(test_combinations.combine(mode=["graph"])) + def test_function_placeholder_with_default(self): + with backend.get_graph().as_default(): + x1 = tf.compat.v1.placeholder_with_default( + np.array(2.0, dtype="float32"), shape=() + ) + x2 = tf.compat.v1.placeholder_with_default( + np.array(3, dtype="int32"), shape=() + ) + y1 = x1 + backend.cast(x2, "float32") + y2 = x1 * backend.cast(x2, "float32") + f = backend.function([x1, x2], [y1, y2]) + output_values = f([4, 5]) + self.assertEqual(output_values, [9.0, 20.0]) + output_values = f([None, None]) + self.assertEqual(output_values, [5.0, 6.0]) + + def test_function_tf_feed_symbols(self): + # Test Keras backend functions with TF tensor inputs. + with tf.Graph().as_default(), self.cached_session(): + # Test feeding a resource variable to `function`. + x1 = backend.placeholder(shape=()) + x2 = backend.placeholder(shape=()) + lr = backend.learning_phase() # Include a placeholder_with_default. + + y1 = backend.variable(10.0) + y2 = 3 + + f = backend.function( + inputs=[x1, x2, lr], + outputs=[x1 + 1, backend.in_train_phase(x2 + 2, x2 - 1)], + ) + outs = f([y1, y2, None]) # Use default learning_phase value. + self.assertEqual(outs, [11.0, 2.0]) + outs = f([y1, y2, 1]) # Set learning phase value. + self.assertEqual(outs, [11.0, 5.0]) + + # Test triggering a callable refresh by changing the input. + y3 = backend.constant(20.0) # Test with tensor + outs = f([y3, y2, None]) + self.assertEqual(outs, [21.0, 2.0]) + + y4 = 4 # Test with non-symbol + outs = f([y4, y2, None]) + self.assertEqual(outs, [5.0, 2.0]) + + # Test with a different dtype + y5 = backend.constant(10.0, dtype="float64") + outs = f([y5, y2, None]) + self.assertEqual(outs, [11.0, 2.0]) + + def test_function_tf_fetches(self): + # Additional operations can be passed to tf.compat.v1.Session().run() + # via its `fetches` arguments. In contrast to `updates` argument of + # backend.function() these do not have control dependency on `outputs` + # so they can run in parallel. Also they should not contribute to output + # of backend.function(). + with tf.Graph().as_default(), self.cached_session(): + x = backend.variable(0.0) + y = backend.variable(0.0) + x_placeholder = backend.placeholder(shape=()) + y_placeholder = backend.placeholder(shape=()) + + f = backend.function( + inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + updates=[(x, x_placeholder + 1.0)], + fetches=[backend.update(y, 5.0)], + ) + output = f([10.0, 20.0]) + self.assertEqual(output, [30.0]) + self.assertEqual( + backend.get_session().run(fetches=[x, y]), [11.0, 5.0] + ) + + def test_function_tf_feed_dict(self): + # Additional substitutions can be passed to + # `tf.compat.v1.Session().run()` via its `feed_dict` arguments. Note + # that the feed_dict is passed once in the constructor but we can modify + # the values in the dictionary. Through this feed_dict we can provide + # additional substitutions besides Keras inputs. + with tf.Graph().as_default(), self.cached_session(): + x = backend.variable(0.0) + y = backend.variable(0.0) + x_placeholder = backend.placeholder(shape=()) + y_placeholder = backend.placeholder(shape=()) + + feed_dict = {y_placeholder: 3.0} + fetches = [backend.update(y, y_placeholder * 10.0)] + f = backend.function( + inputs=[x_placeholder], + outputs=[x_placeholder + 1.0], + updates=[(x, x_placeholder + 10.0)], + feed_dict=feed_dict, + fetches=fetches, + ) + output = f([10.0]) + self.assertEqual(output, [11.0]) + self.assertEqual( + backend.get_session().run(fetches=[x, y]), [20.0, 30.0] + ) + + # updated value in feed_dict will be modified within the + # K.function() + feed_dict[y_placeholder] = 4.0 + output = f([20.0]) + self.assertEqual(output, [21.0]) + self.assertEqual( + backend.get_session().run(fetches=[x, y]), [30.0, 40.0] + ) + + def test_function_tf_run_options_with_run_metadata(self): + with tf.Graph().as_default(), self.cached_session(): + x_placeholder = backend.placeholder(shape=()) + y_placeholder = backend.placeholder(shape=()) + + run_options = tf.compat.v1.RunOptions(output_partition_graphs=True) + run_metadata = tf.compat.v1.RunMetadata() + # enable run_options. + f = backend.function( + inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + options=run_options, + run_metadata=run_metadata, + ) + output = f([10.0, 20.0]) + self.assertEqual(output, [30.0]) + self.assertNotEmpty(run_metadata.partition_graphs) + # disable run_options. + f1 = backend.function( + inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + run_metadata=run_metadata, + ) + output1 = f1([10.0, 20.0]) + self.assertEqual(output1, [30.0]) + self.assertEmpty(run_metadata.partition_graphs) + + def test_function_fetch_callbacks(self): + class CallbackStub: + def __init__(self): + self.times_called = 0 + self.callback_result = 0 + + def _fetch_callback(self, result): + self.times_called += 1 + self.callback_result = result + + with tf.Graph().as_default(), self.cached_session(): + callback = CallbackStub() + x_placeholder = backend.placeholder(shape=()) + y_placeholder = backend.placeholder(shape=()) + + callback_op = x_placeholder * y_placeholder + + f = backend.function( + inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + ) + f.fetches.append(callback_op) + f.fetch_callbacks[callback_op] = callback._fetch_callback + + _ = f([10.0, 20.0]) + + self.assertEqual(callback.times_called, 1) + self.assertEqual(callback.callback_result, 200) + + def test_get_session_different_graphs(self): + with tf.Graph().as_default(): + x = backend.constant(1) + session = backend.get_session() + self.assertIs(session, backend.get_session((x,))) + self.assertIs(session, backend.get_session()) + with tf.Graph().as_default(): + self.assertIs(session, backend.get_session((x,))) + self.assertIsNot(session, backend.get_session()) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class ControlOpsTests(tf.test.TestCase): + def test_function_switch_basics(self): + x = tf.constant(2.0) + y = tf.constant(3.0) - with self.assertRaisesRegex(ValueError, - 'Rank of `condition` should be less than'): - backend.switch(backend.equal(x, x), false_func, true_func) + def xpowy(): + return backend.pow(x, y) + def ypowx(): + return backend.pow(y, x) -class ContextValueCacheTest(tf.test.TestCase): + tensor = backend.switch(backend.less(x, y), xpowy, ypowx) + self.assertEqual(backend.eval(tensor), [8.0]) + + tensor = backend.switch(backend.greater(x, y), xpowy, ypowx) + self.assertEqual(backend.eval(tensor), [9.0]) - def test_cache(self): - cache = backend.ContextValueCache(list) - graph1 = tf.Graph() - graph2 = tf.Graph() + def test_unequal_rank(self): + x = tf.convert_to_tensor( + np.array([[1, 2, 3], [4, 5, 6]]), dtype="float32" + ) + y = tf.convert_to_tensor(np.array([1, 2, 3]), dtype="float32") - cache[graph1].append(1) - with graph1.as_default(): - cache[None].append(2) + def true_func(): + return x - with graph2.as_default(): - cache[None].append(3) - cache[graph2].append(4) + def false_func(): + return y - self.assertAllEqual(cache[graph1], [1, 2]) - self.assertAllEqual(cache[graph2], [3, 4]) + with self.assertRaisesRegex( + ValueError, "Rank of `condition` should be less than" + ): + backend.switch(backend.equal(x, x), false_func, true_func) - with tf.__internal__.eager_context.eager_mode(): - cache[None].append(5) - cache[None].append(6) - self.assertAllEqual(cache[None], [5, 6]) - self.assertLen(cache, 3) +class ContextValueCacheTest(tf.test.TestCase): + def test_cache(self): + cache = backend.ContextValueCache(list) + graph1 = tf.Graph() + graph2 = tf.Graph() - del graph1 - gc.collect() - self.assertLen(cache, 2) + cache[graph1].append(1) + with graph1.as_default(): + cache[None].append(2) - def test_cache_in_parent_graph(self): - cache = backend.ContextValueCache(int) - cache.setdefault(None, backend.constant(5)) + with graph2.as_default(): + cache[None].append(3) + cache[graph2].append(4) - with tf.Graph().as_default() as g: - # g is not a child graph of the default test context, so the recursive - # lookup will create a new default value. - self.assertAllEqual(cache[g], 0) + self.assertAllEqual(cache[graph1], [1, 2]) + self.assertAllEqual(cache[graph2], [3, 4]) - @tf.function - def fn(): - # The function graph is a child of the default test context, so - # __getitem__ will return the previously saved value. - return cache[tf.compat.v1.get_default_graph()] + with tf.__internal__.eager_context.eager_mode(): + cache[None].append(5) + cache[None].append(6) + self.assertAllEqual(cache[None], [5, 6]) - self.assertEqual(self.evaluate(fn()), 5) + self.assertLen(cache, 3) + del graph1 + gc.collect() + self.assertLen(cache, 2) -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class RandomGeneratorTest(tf.test.TestCase, parameterized.TestCase): + def test_cache_in_parent_graph(self): + cache = backend.ContextValueCache(int) + cache.setdefault(None, backend.constant(5)) - def test_generator_reproducibility(self): - seed = 1337 - gen1 = backend.RandomGenerator(seed, rng_type='stateful') - output1 = gen1.random_normal(shape=[2, 3]) - output2 = gen1.random_normal(shape=[2, 3]) - - self.assertNotAllClose(output1, output2) - - gen2 = backend.RandomGenerator(seed, rng_type='stateful') - output3 = gen2.random_normal(shape=[2, 3]) - output4 = gen2.random_normal(shape=[2, 3]) - - if tf.compat.v1.executing_eagerly(): - # Make sure generator with same seed will produce same sequence. - self.assertAllEqual(output1, output3) - self.assertAllEqual(output2, output4) - - def test_unseeded(self): - seed = None - gen1 = backend.RandomGenerator(seed, rng_type='stateful') - output1 = gen1.random_normal(shape=[2, 3]) - - gen2 = backend.RandomGenerator(seed, rng_type='stateful') - output2 = gen2.random_normal(shape=[2, 3]) - - self.assertNotAllClose(output1, output2) - - def test_implementation(self): - seed = 1337 - seeded = backend.RandomGenerator(seed, rng_type='stateful') - seeded._maybe_init() - unseeded = backend.RandomGenerator(None, rng_type='stateful') - unseeded._maybe_init() - if tf.compat.v1.executing_eagerly(): - # Make sure we use tf.random.Generator in v2. - self.assertIsNotNone(seeded._generator) - self.assertIsNotNone(unseeded._generator) - else: - # In v1, we can't use tf.random.Generator since it is not compatible with - # graph mode. - self.assertIsNone(seeded._generator) - self.assertIsNone(unseeded._generator) - - def test_unseeded_with_utils_set_random_seed(self): - keras_seed = 1337 - tf_utils.set_random_seed(keras_seed) - gen1 = backend.RandomGenerator(seed=None, rng_type='stateful') - output1 = gen1.random_normal(shape=[2, 3]) - output2 = gen1.random_normal(shape=[2, 3]) - - self.assertNotAllClose(output1, output2) - - # Make sure even with unseeded backend generator, as long as we set the - # keras random seed, it will make the generator to produce the same - # sequence. This will ensure all the client are in sync in the multi-client - # setting, when they all set the keras seed. - tf_utils.set_random_seed(keras_seed) - gen2 = backend.RandomGenerator(seed=None, rng_type='stateful') - output3 = gen2.random_normal(shape=[2, 3]) - output4 = gen2.random_normal(shape=[2, 3]) - - gen3 = backend.RandomGenerator(seed=None, rng_type='stateful') - output5 = gen3.random_normal(shape=[2, 3]) - output6 = gen3.random_normal(shape=[2, 3]) - - if tf.compat.v1.executing_eagerly(): - # The generator is only used in the tf2 with eager. - self.assertAllEqual(output1, output3) - self.assertAllEqual(output2, output4) - - # Also make sure different generator instance are still producing - # different result - self.assertNotAllEqual(output3, output5) - self.assertNotAllEqual(output4, output6) - - def test_force_stateless(self): - gen = backend.RandomGenerator(seed=None, rng_type='stateless') - output1 = gen.random_normal(shape=[2, 3]) - seed1 = gen._seed - output2 = gen.random_normal(shape=[2, 3]) - seed2 = gen._seed - - self.assertAllClose(output1, output2) - # Make sure we always use the same seed, and it is not None - self.assertEqual(seed1, seed2) - self.assertIsNotNone(seed1) - - # Make sure a new seed is used when creating a new generator instance. - gen2 = backend.RandomGenerator(seed=None, rng_type='stateless') - output3 = gen2.random_normal(shape=[2, 3]) - seed3 = gen2._seed - output4 = gen2.random_normal(shape=[2, 3]) - seed4 = gen2._seed - - self.assertAllClose(output3, output4) - self.assertEqual(seed3, seed4) - self.assertNotEqual(seed1, seed3) - - def test_force_stateless_with_seed(self): - seed = 1337 - gen = backend.RandomGenerator(seed=seed, rng_type='stateless') - output1 = gen.random_normal(shape=[2, 3]) - seed1 = gen._seed - output2 = gen.random_normal(shape=[2, 3]) - seed2 = gen._seed - - self.assertAllClose(output1, output2) - # Make sure we always use the same seed, and it is not None - self.assertEqual(seed, seed1) - self.assertEqual(seed, seed2) - - # Make sure RandomGenerator always generate same value with same seed. - gen2 = backend.RandomGenerator(seed=seed, rng_type='stateless') - output3 = gen2.random_normal(shape=[2, 3]) - self.assertAllClose(output3, output1) - - @parameterized.named_parameters( - ('seeded', 1337), ('unseeded', None) - ) - def test_stateless_with_seed_delta(self, seed): - gen = backend.RandomGenerator(seed=seed, rng_type='stateless') - output1 = gen.random_normal(shape=[2, 3], nonce=hash((1, 1))) - seed1 = gen._seed - output2 = gen.random_normal(shape=[2, 3], nonce=hash((1, 1))) - seed2 = gen._seed - output3 = gen.random_normal(shape=[2, 3], nonce=hash((2, 1))) - seed3 = gen._seed - - self.assertAllClose(output1, output2) - # Different seed_delta will produce different value. - self.assertNotAllClose(output1, output3) - # Make sure the internal seed is not changed at all. - self.assertEqual(seed1, seed2) - self.assertEqual(seed1, seed3) - - def test_unknown_rng_type(self): - with self.assertRaisesRegex(ValueError, 'Got: unknown'): - backend.RandomGenerator(seed=None, rng_type='unknown') - - def test_prefer_stateless_over_global_generator(self): - try: - generator_enabled = backend.is_tf_random_generator_enabled() - if not generator_enabled: - backend.enable_tf_random_generator() + with tf.Graph().as_default() as g: + # g is not a child graph of the default test context, so the + # recursive lookup will create a new default value. + self.assertAllEqual(cache[g], 0) - seed = 1337 - gen = backend.RandomGenerator(seed=seed, rng_type='stateless') - output1 = gen.random_normal(shape=[2, 3]) - output2 = gen.random_normal(shape=[2, 3]) + @tf.function + def fn(): + # The function graph is a child of the default test context, so + # __getitem__ will return the previously saved value. + return cache[tf.compat.v1.get_default_graph()] - self.assertIsNone(gen._generator) - self.assertAllClose(output1, output2) - finally: - if not generator_enabled: - # Change the global flag back. - backend.disable_tf_random_generator() + self.assertEqual(self.evaluate(fn()), 5) -if __name__ == '__main__': - tf.test.main() +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class RandomGeneratorTest(tf.test.TestCase, parameterized.TestCase): + def test_generator_reproducibility(self): + seed = 1337 + gen1 = backend.RandomGenerator(seed, rng_type="stateful") + output1 = gen1.random_normal(shape=[2, 3]) + output2 = gen1.random_normal(shape=[2, 3]) + + self.assertNotAllClose(output1, output2) + + gen2 = backend.RandomGenerator(seed, rng_type="stateful") + output3 = gen2.random_normal(shape=[2, 3]) + output4 = gen2.random_normal(shape=[2, 3]) + + if tf.compat.v1.executing_eagerly(): + # Make sure generator with same seed will produce same sequence. + self.assertAllEqual(output1, output3) + self.assertAllEqual(output2, output4) + + def test_unseeded(self): + seed = None + gen1 = backend.RandomGenerator(seed, rng_type="stateful") + output1 = gen1.random_normal(shape=[2, 3]) + + gen2 = backend.RandomGenerator(seed, rng_type="stateful") + output2 = gen2.random_normal(shape=[2, 3]) + + self.assertNotAllClose(output1, output2) + + def test_implementation(self): + seed = 1337 + seeded = backend.RandomGenerator(seed, rng_type="stateful") + seeded._maybe_init() + unseeded = backend.RandomGenerator(None, rng_type="stateful") + unseeded._maybe_init() + if tf.compat.v1.executing_eagerly(): + # Make sure we use tf.random.Generator in v2. + self.assertIsNotNone(seeded._generator) + self.assertIsNotNone(unseeded._generator) + else: + # In v1, we can't use tf.random.Generator since it is not compatible + # with graph mode. + self.assertIsNone(seeded._generator) + self.assertIsNone(unseeded._generator) + + def test_unseeded_with_utils_set_random_seed(self): + keras_seed = 1337 + tf_utils.set_random_seed(keras_seed) + gen1 = backend.RandomGenerator(seed=None, rng_type="stateful") + output1 = gen1.random_normal(shape=[2, 3]) + output2 = gen1.random_normal(shape=[2, 3]) + + self.assertNotAllClose(output1, output2) + + # Make sure even with unseeded backend generator, as long as we set the + # keras random seed, it will make the generator to produce the same + # sequence. This will ensure all the client are in sync in the + # multi-client setting, when they all set the keras seed. + tf_utils.set_random_seed(keras_seed) + gen2 = backend.RandomGenerator(seed=None, rng_type="stateful") + output3 = gen2.random_normal(shape=[2, 3]) + output4 = gen2.random_normal(shape=[2, 3]) + + gen3 = backend.RandomGenerator(seed=None, rng_type="stateful") + output5 = gen3.random_normal(shape=[2, 3]) + output6 = gen3.random_normal(shape=[2, 3]) + + if tf.compat.v1.executing_eagerly(): + # The generator is only used in the tf2 with eager. + self.assertAllEqual(output1, output3) + self.assertAllEqual(output2, output4) + + # Also make sure different generator instance are still producing + # different result + self.assertNotAllEqual(output3, output5) + self.assertNotAllEqual(output4, output6) + + def test_force_stateless(self): + gen = backend.RandomGenerator(seed=None, rng_type="stateless") + output1 = gen.random_normal(shape=[2, 3]) + seed1 = gen._seed + output2 = gen.random_normal(shape=[2, 3]) + seed2 = gen._seed + + self.assertAllClose(output1, output2) + # Make sure we always use the same seed, and it is not None + self.assertEqual(seed1, seed2) + self.assertIsNotNone(seed1) + + # Make sure a new seed is used when creating a new generator instance. + gen2 = backend.RandomGenerator(seed=None, rng_type="stateless") + output3 = gen2.random_normal(shape=[2, 3]) + seed3 = gen2._seed + output4 = gen2.random_normal(shape=[2, 3]) + seed4 = gen2._seed + + self.assertAllClose(output3, output4) + self.assertEqual(seed3, seed4) + self.assertNotEqual(seed1, seed3) + + def test_force_stateless_with_seed(self): + seed = 1337 + gen = backend.RandomGenerator(seed=seed, rng_type="stateless") + output1 = gen.random_normal(shape=[2, 3]) + seed1 = gen._seed + output2 = gen.random_normal(shape=[2, 3]) + seed2 = gen._seed + + self.assertAllClose(output1, output2) + # Make sure we always use the same seed, and it is not None + self.assertEqual(seed, seed1) + self.assertEqual(seed, seed2) + + # Make sure RandomGenerator always generate same value with same seed. + gen2 = backend.RandomGenerator(seed=seed, rng_type="stateless") + output3 = gen2.random_normal(shape=[2, 3]) + self.assertAllClose(output3, output1) + + @parameterized.named_parameters(("seeded", 1337), ("unseeded", None)) + def test_stateless_with_seed_delta(self, seed): + gen = backend.RandomGenerator(seed=seed, rng_type="stateless") + output1 = gen.random_normal(shape=[2, 3], nonce=hash((1, 1))) + seed1 = gen._seed + output2 = gen.random_normal(shape=[2, 3], nonce=hash((1, 1))) + seed2 = gen._seed + output3 = gen.random_normal(shape=[2, 3], nonce=hash((2, 1))) + seed3 = gen._seed + + self.assertAllClose(output1, output2) + # Different seed_delta will produce different value. + self.assertNotAllClose(output1, output3) + # Make sure the internal seed is not changed at all. + self.assertEqual(seed1, seed2) + self.assertEqual(seed1, seed3) + + def test_unknown_rng_type(self): + with self.assertRaisesRegex(ValueError, "Got: unknown"): + backend.RandomGenerator(seed=None, rng_type="unknown") + + def test_prefer_stateless_over_global_generator(self): + try: + generator_enabled = backend.is_tf_random_generator_enabled() + if not generator_enabled: + backend.enable_tf_random_generator() + + seed = 1337 + gen = backend.RandomGenerator(seed=seed, rng_type="stateless") + output1 = gen.random_normal(shape=[2, 3]) + output2 = gen.random_normal(shape=[2, 3]) + + self.assertIsNone(gen._generator) + self.assertAllClose(output1, output2) + finally: + if not generator_enabled: + # Change the global flag back. + backend.disable_tf_random_generator() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/BUILD b/keras/benchmarks/BUILD index 37085c716478..eacb26a3a36c 100644 --- a/keras/benchmarks/BUILD +++ b/keras/benchmarks/BUILD @@ -1,9 +1,13 @@ # Description: # Implementation of Keras benchmarks. +# Placeholder: load unaliased py_library +# Placeholder: load unaliased py_test +# Placeholder: load unaliased py_binary load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = ["//visibility:public"], licenses = ["notice"], ) @@ -134,7 +138,7 @@ py_test( ":profiler_lib", "//:expect_tensorflow_installed", "//keras/api:keras_api", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", ], ) diff --git a/keras/benchmarks/benchmark_util.py b/keras/benchmarks/benchmark_util.py index 564fade27d79..a37b71ac0196 100644 --- a/keras/benchmarks/benchmark_util.py +++ b/keras/benchmarks/benchmark_util.py @@ -14,203 +14,211 @@ # ============================================================================== """Common utils for benchmarks.""" -import tensorflow.compat.v2 as tf - import timeit + import numpy as np +from keras import callbacks from keras.benchmarks import distribution_util def get_benchmark_name(name): - """Split the suffix of the benchmark name. + """Split the suffix of the benchmark name. - For example, for the name = 'benchmark_layer_call__Conv2D_small_shape', - the return value is ['Conv2D', 'small', 'shape']. + For example, for the name = 'benchmark_layer_call__Conv2D_small_shape', + the return value is ['Conv2D', 'small', 'shape']. - This is to generate the metadata of the benchmark test. + This is to generate the metadata of the benchmark test. - Args: - name: A string, the benchmark name. + Args: + name: A string, the benchmark name. - Returns: - A list of strings of the suffix in the benchmark name. - """ - if '__' not in name or '_' not in name: - raise ValueError('The format of the benchmark name is wrong.') - return name.split('__')[-1].split('_') + Returns: + A list of strings of the suffix in the benchmark name. + """ + if "__" not in name or "_" not in name: + raise ValueError("The format of the benchmark name is wrong.") + return name.split("__")[-1].split("_") def generate_benchmark_params_cpu_gpu(*params_list): - """Extend the benchmark names with CPU and GPU suffix. - - Args: - *params_list: A list of tuples represents the benchmark parameters. - - Returns: - A list of strings with the benchmark name extended with CPU and GPU suffix. - """ - benchmark_params = [] - for params in params_list: - benchmark_params.extend([ - ((param[0] + '_CPU',) + param[1:]) for param in params - ]) - benchmark_params.extend([ - ((param[0] + '_GPU',) + param[1:]) for param in params - ]) - return benchmark_params - - -def get_keras_examples_metadata(keras_model, - batch_size, - impl='.keras.cfit_graph'): - return { - 'model_name': 'keras_examples', - 'implementation': keras_model + impl, - 'parameters': 'bs_' + str(batch_size), - } - - -class TimerCallBack(tf.keras.callbacks.Callback): - """Callback for logging time in each epoch or batch.""" - - def __init__(self): - self.times = [] - self.timer = timeit.default_timer - self.startup_time = timeit.default_timer() - self.recorded_startup = False - - def on_epoch_begin(self, e, logs): - self.epoch_start_time = self.timer() - - def on_epoch_end(self, e, logs): - self.times.append(self.timer() - self.epoch_start_time) - - def on_batch_end(self, e, logs): - if not self.recorded_startup: - self.startup_time = self.timer() - self.startup_time - self.recorded_startup = True - - -def measure_performance(model_fn, - x=None, - y=None, - epochs=2, - batch_size=32, - run_iters=4, - optimizer=None, - loss=None, - metrics=None, - verbose=0, - num_gpus=0, - distribution_strategy='off'): - """Run models and measure the performance. - - Args: - model_fn: Model function to be benchmarked. - x: Input data. See `x` in the `fit()` method of `keras.Model`. - y: Target data. See `y` in the `fit()` method of `keras.Model`. - epochs: Integer. Number of epochs to train the model. - If unspecified, `epochs` will default to 2. - batch_size: Integer. Number of samples per gradient update. If unspecified, - `batch_size` will default to 32. - run_iters: Integer. Number of iterations to run the performance measurement. - If unspecified, `run_iters` will default to 4. - optimizer: String (name of optimizer) or optimizer instance. See - `tf.keras.optimizers`. - loss: String (name of objective function), objective function or - `tf.keras.losses.Loss` instance. See `tf.keras.losses`. - metrics: Lists of metrics to be evaluated by the model during training. See - `metrics` in the `compile()` method of `keras.Model`. - verbose: 0, 1, 2. Verbosity mode. See `verbose` in the `fit()` method of - `keras.Model`. If unspecified, `verbose` will default to 0. - num_gpus: Number of GPUs to run the model. - distribution_strategy: Distribution strategies. It could be - `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, - `distribution_strategy` will default to 'off'. Note that, `TPU` - and `parameter_server` are not supported yet. - - Returns: - Performance summary, which contains build_time, compile_time, - startup_time, avg_epoch_time, wall_time, exp_per_sec, epochs, - distribution_strategy. - - Raise: - ValueError: If `x` is none or if `optimizer` is not provided or - if `loss` is not provided or if `num_gpus` is negative. - """ - if 'x' is None: - raise ValueError('Input data is required.') - if 'optimizer' is None: - raise ValueError('Optimizer is required.') - if 'loss' is None: - raise ValueError('Loss function is required.') - if num_gpus < 0: - raise ValueError('`num_gpus` cannot be negative') - - # TODO(xingyulong): we will add tfds support later and - # get the `num_examples` from info. - num_examples = x.shape[0] - - build_time_list, compile_time_list, startup_time_list = [], [], [] - avg_epoch_time_list, wall_time_list, exp_per_sec_list = [], [], [] - total_num_examples = epochs * num_examples - - strategy = distribution_util.get_distribution_strategy( - distribution_strategy=distribution_strategy, num_gpus=num_gpus) - - for _ in range(run_iters): - timer = timeit.default_timer - start_time = timer() - # Init the distribution strategy scope for each iteration. - strategy_scope = distribution_util.get_strategy_scope(strategy) - with strategy_scope: - t0 = timer() - model = model_fn() - build_time = timer() - t0 - - t1 = timer() - model.compile( - optimizer=optimizer, - loss=loss, - metrics=metrics, - ) - compile_time = timer() - t1 - # Run one warm up epoch. - model.fit(x=x, y=y, batch_size=batch_size, epochs=1) - cbk = TimerCallBack() - t2 = timer() - model.fit( - x=x, - y=y, - batch_size=batch_size, - epochs=epochs, - callbacks=[cbk], - verbose=verbose) - end_time = timer() - - build_time_list.append(build_time) - compile_time_list.append(compile_time) - startup_time_list.append(cbk.startup_time) - avg_epoch_time_list.append(np.mean(cbk.times)) - wall_time_list.append(end_time - start_time) - exp_per_sec_list.append(total_num_examples / (end_time - t2)) - - metrics = [] - metrics.append({'name': 'build_time', 'value': np.mean(build_time_list)}) - metrics.append({'name': 'compile_time', 'value': np.mean(compile_time_list)}) - metrics.append({'name': 'startup_time', 'value': np.mean(startup_time_list)}) - metrics.append({ - 'name': 'avg_epoch_time', - 'value': np.mean(avg_epoch_time_list) - }) - metrics.append({'name': 'exp_per_sec', 'value': np.mean(exp_per_sec_list)}) - metrics.append({'name': 'epochs', 'value': epochs}) - - wall_time = np.mean(wall_time_list) - extras = { - 'distribution_strategy': distribution_strategy, - 'num_gpus': num_gpus - } - - return metrics, wall_time, extras + """Extend the benchmark names with CPU and GPU suffix. + + Args: + *params_list: A list of tuples represents the benchmark parameters. + + Returns: + A list of strings with the benchmark name extended with CPU and GPU + suffix. + """ + benchmark_params = [] + for params in params_list: + benchmark_params.extend( + [((param[0] + "_CPU",) + param[1:]) for param in params] + ) + benchmark_params.extend( + [((param[0] + "_GPU",) + param[1:]) for param in params] + ) + return benchmark_params + + +def get_keras_examples_metadata( + keras_model, batch_size, impl=".keras.cfit_graph" +): + return { + "model_name": "keras_examples", + "implementation": keras_model + impl, + "parameters": "bs_" + str(batch_size), + } + + +class TimerCallBack(callbacks.Callback): + """Callback for logging time in each epoch or batch.""" + + def __init__(self): + self.times = [] + self.timer = timeit.default_timer + self.startup_time = timeit.default_timer() + self.recorded_startup = False + + def on_epoch_begin(self, e, logs): + self.epoch_start_time = self.timer() + + def on_epoch_end(self, e, logs): + self.times.append(self.timer() - self.epoch_start_time) + + def on_batch_end(self, e, logs): + if not self.recorded_startup: + self.startup_time = self.timer() - self.startup_time + self.recorded_startup = True + + +def measure_performance( + model_fn, + x=None, + y=None, + epochs=2, + batch_size=32, + run_iters=4, + optimizer=None, + loss=None, + metrics=None, + verbose=0, + num_gpus=0, + distribution_strategy="off", +): + """Run models and measure the performance. + + Args: + model_fn: Model function to be benchmarked. + x: Input data. See `x` in the `fit()` method of `keras.Model`. + y: Target data. See `y` in the `fit()` method of `keras.Model`. + epochs: Integer. Number of epochs to train the model. + If unspecified, `epochs` will default to 2. + batch_size: Integer. Number of samples per gradient update. If + unspecified, `batch_size` will default to 32. + run_iters: Integer. Number of iterations to run the performance + measurement. If unspecified, `run_iters` will default to 4. + optimizer: String (name of optimizer) or optimizer instance. See + `tf.keras.optimizers`. + loss: String (name of objective function), objective function or + `tf.keras.losses.Loss` instance. See `tf.keras.losses`. + metrics: Lists of metrics to be evaluated by the model during training. + See `metrics` in the `compile()` method of `keras.Model`. + verbose: 0, 1, 2. Verbosity mode. See `verbose` in the `fit()` method of + `keras.Model`. If unspecified, `verbose` will default to 0. + num_gpus: Number of GPUs to run the model. + distribution_strategy: Distribution strategies. It could be + `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, + `distribution_strategy` will default to 'off'. Note that, `TPU` + and `parameter_server` are not supported yet. + + Returns: + Performance summary, which contains build_time, compile_time, + startup_time, avg_epoch_time, wall_time, exp_per_sec, epochs, + distribution_strategy. + + Raise: + ValueError: If `x` is none or if `optimizer` is not provided or + if `loss` is not provided or if `num_gpus` is negative. + """ + if x is None: + raise ValueError("Input data is required.") + elif optimizer is None: + raise ValueError("Optimizer is required.") + elif loss is None: + raise ValueError("Loss function is required.") + elif num_gpus < 0: + raise ValueError("`num_gpus` cannot be negative") + + # TODO(xingyulong): we will add tfds support later and + # get the `num_examples` from info. + num_examples = x.shape[0] + + build_time_list, compile_time_list, startup_time_list = [], [], [] + avg_epoch_time_list, wall_time_list, exp_per_sec_list = [], [], [] + total_num_examples = epochs * num_examples + + strategy = distribution_util.get_distribution_strategy( + distribution_strategy=distribution_strategy, num_gpus=num_gpus + ) + + for _ in range(run_iters): + timer = timeit.default_timer + start_time = timer() + # Init the distribution strategy scope for each iteration. + strategy_scope = distribution_util.get_strategy_scope(strategy) + with strategy_scope: + t0 = timer() + model = model_fn() + build_time = timer() - t0 + + t1 = timer() + model.compile( + optimizer=optimizer, + loss=loss, + metrics=metrics, + ) + compile_time = timer() - t1 + # Run one warm up epoch. + model.fit(x=x, y=y, batch_size=batch_size, epochs=1) + cbk = TimerCallBack() + t2 = timer() + model.fit( + x=x, + y=y, + batch_size=batch_size, + epochs=epochs, + callbacks=[cbk], + verbose=verbose, + ) + end_time = timer() + + build_time_list.append(build_time) + compile_time_list.append(compile_time) + startup_time_list.append(cbk.startup_time) + avg_epoch_time_list.append(np.mean(cbk.times)) + wall_time_list.append(end_time - start_time) + exp_per_sec_list.append(total_num_examples / (end_time - t2)) + + metrics = [] + metrics.append({"name": "build_time", "value": np.mean(build_time_list)}) + metrics.append( + {"name": "compile_time", "value": np.mean(compile_time_list)} + ) + metrics.append( + {"name": "startup_time", "value": np.mean(startup_time_list)} + ) + metrics.append( + {"name": "avg_epoch_time", "value": np.mean(avg_epoch_time_list)} + ) + metrics.append({"name": "exp_per_sec", "value": np.mean(exp_per_sec_list)}) + metrics.append({"name": "epochs", "value": epochs}) + + wall_time = np.mean(wall_time_list) + extras = { + "distribution_strategy": distribution_strategy, + "num_gpus": num_gpus, + } + + return metrics, wall_time, extras diff --git a/keras/benchmarks/benchmark_util_test.py b/keras/benchmarks/benchmark_util_test.py index fb14d5ab63b7..a667f53c5fda 100644 --- a/keras/benchmarks/benchmark_util_test.py +++ b/keras/benchmarks/benchmark_util_test.py @@ -20,30 +20,29 @@ class BenchmarkUtilTest(tf.test.TestCase): - - def test_get_benchmark_name(self): - name = "benchmark_layer_call__Conv2D_small_shape" - expected = ["Conv2D", "small", "shape"] - out = benchmark_util.get_benchmark_name(name) - self.assertAllEqual(out, expected) - - def test_generate_benchmark_params_cpu_gpu(self): - adam_opt = tf.keras.optimizers.Adam() - sgd_opt = tf.keras.optimizers.SGD() - params = [ - ("Adam", adam_opt, 10), - ("SGD", sgd_opt, 10), - ] - expected = [ - ("Adam_CPU", adam_opt, 10), - ("SGD_CPU", sgd_opt, 10), - ("Adam_GPU", adam_opt, 10), - ("SGD_GPU", sgd_opt, 10), - ] - - out = benchmark_util.generate_benchmark_params_cpu_gpu(params) - self.assertAllEqual(out, expected) + def test_get_benchmark_name(self): + name = "benchmark_layer_call__Conv2D_small_shape" + expected = ["Conv2D", "small", "shape"] + out = benchmark_util.get_benchmark_name(name) + self.assertAllEqual(out, expected) + + def test_generate_benchmark_params_cpu_gpu(self): + adam_opt = tf.keras.optimizers.Adam() + sgd_opt = tf.keras.optimizers.SGD() + params = [ + ("Adam", adam_opt, 10), + ("SGD", sgd_opt, 10), + ] + expected = [ + ("Adam_CPU", adam_opt, 10), + ("SGD_CPU", sgd_opt, 10), + ("Adam_GPU", adam_opt, 10), + ("SGD_GPU", sgd_opt, 10), + ] + + out = benchmark_util.generate_benchmark_params_cpu_gpu(params) + self.assertAllEqual(out, expected) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/benchmarks/distribution_util.py b/keras/benchmarks/distribution_util.py index 4c180b6ad414..a4868749ed5c 100644 --- a/keras/benchmarks/distribution_util.py +++ b/keras/benchmarks/distribution_util.py @@ -18,168 +18,182 @@ https://github.com/tensorflow/models/blob/master/official/utils/misc/distribution_utils.py. """ -import tensorflow.compat.v2 as tf - import json import os +import tensorflow.compat.v2 as tf + def _collective_communication(all_reduce_alg): - """Return a CollectiveCommunication based on all_reduce_alg. - - Args: - all_reduce_alg: a string specifying which collective communication to pick, - or None. - - Returns: - tf.distribute.experimental.CollectiveCommunication object - - Raises: - ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"] - """ - collective_communication_options = { - None: tf.distribute.experimental.CollectiveCommunication.AUTO, - "ring": tf.distribute.experimental.CollectiveCommunication.RING, - "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL - } - if all_reduce_alg not in collective_communication_options: - raise ValueError( - "When used with `multi_worker_mirrored`, valid values for " - "all_reduce_alg are [`ring`, `nccl`]. Supplied value: {}".format( - all_reduce_alg)) - return collective_communication_options[all_reduce_alg] + """Return a CollectiveCommunication based on all_reduce_alg. + + Args: + all_reduce_alg: a string specifying which collective communication to + pick, or None. + + Returns: + tf.distribute.experimental.CollectiveCommunication object + + Raises: + ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"] + """ + collective_communication_options = { + None: tf.distribute.experimental.CollectiveCommunication.AUTO, + "ring": tf.distribute.experimental.CollectiveCommunication.RING, + "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL, + } + if all_reduce_alg not in collective_communication_options: + raise ValueError( + "When used with `multi_worker_mirrored`, valid values for " + "all_reduce_alg are [`ring`, `nccl`]. Supplied value: {}".format( + all_reduce_alg + ) + ) + return collective_communication_options[all_reduce_alg] def _mirrored_cross_device_ops(all_reduce_alg, num_packs): - """Return a CrossDeviceOps based on all_reduce_alg and num_packs. - - Args: - all_reduce_alg: a string specifying which cross device op to pick, or None. - num_packs: an integer specifying number of packs for the cross device op. - - Returns: - tf.distribute.CrossDeviceOps object or None. - - Raises: - ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"]. - """ - if all_reduce_alg is None: - return None - mirrored_all_reduce_options = { - "nccl": tf.distribute.NcclAllReduce, - "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce - } - if all_reduce_alg not in mirrored_all_reduce_options: - raise ValueError( - "When used with `mirrored`, valid values for all_reduce_alg are " - "[`nccl`, `hierarchical_copy`]. Supplied value: {}".format( - all_reduce_alg)) - cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg] - return cross_device_ops_class(num_packs=num_packs) - - -def get_distribution_strategy(distribution_strategy="mirrored", - num_gpus=0, - all_reduce_alg=None, - num_packs=1): - """Return a DistributionStrategy for running the model. - - Args: - distribution_strategy: a string specifying which distribution strategy to - use. Accepted values are "off", "one_device", "mirrored", and - "multi_worker_mirrored" -- case insensitive. "off" means not to use - Distribution Strategy. - num_gpus: Number of GPUs to run this model. - - Returns: - tf.distribute.DistibutionStrategy object. - Raises: - ValueError: if `distribution_strategy` is "off" or "one_device" and - `num_gpus` is larger than 1; or `num_gpus` is negative. - """ - if num_gpus < 0: - raise ValueError("`num_gpus` can not be negative.") - - distribution_strategy = distribution_strategy.lower() - - if distribution_strategy == "off": - if num_gpus > 1: - raise ValueError("When {} GPUs are specified, distribution_strategy " - "flag cannot be set to `off`.".format(num_gpus)) - return None - - if distribution_strategy == "multi_worker_mirrored": - return tf.distribute.experimental.MultiWorkerMirroredStrategy( - communication=_collective_communication(all_reduce_alg)) - - if distribution_strategy == "one_device": - if num_gpus == 0: - return tf.distribute.OneDeviceStrategy("device:CPU:0") - if num_gpus > 1: - raise ValueError("`OneDeviceStrategy` can not be used for more than " - "one device.") - return tf.distribute.OneDeviceStrategy("device:GPU:0") - - if distribution_strategy == "mirrored": - if num_gpus == 0: - devices = ["device:CPU:0"] - else: - devices = ["device:GPU:%d" % i for i in range(num_gpus)] - return tf.distribute.MirroredStrategy( - devices=devices, - cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs)) + """Return a CrossDeviceOps based on all_reduce_alg and num_packs. + + Args: + all_reduce_alg: a string specifying which cross device op to pick, or + None. + num_packs: an integer specifying number of packs for the cross device op. + + Returns: + tf.distribute.CrossDeviceOps object or None. + + Raises: + ValueError: if `all_reduce_alg` not in [None, "nccl", + "hierarchical_copy"]. + """ + if all_reduce_alg is None: + return None + mirrored_all_reduce_options = { + "nccl": tf.distribute.NcclAllReduce, + "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce, + } + if all_reduce_alg not in mirrored_all_reduce_options: + raise ValueError( + "When used with `mirrored`, valid values for all_reduce_alg are " + "[`nccl`, `hierarchical_copy`]. Supplied value: {}".format( + all_reduce_alg + ) + ) + cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg] + return cross_device_ops_class(num_packs=num_packs) + + +def get_distribution_strategy( + distribution_strategy="mirrored", + num_gpus=0, + all_reduce_alg=None, + num_packs=1, +): + """Return a DistributionStrategy for running the model. + + Args: + distribution_strategy: a string specifying which distribution strategy to + use. Accepted values are "off", "one_device", "mirrored", and + "multi_worker_mirrored" -- case insensitive. "off" means not to use + Distribution Strategy. + num_gpus: Number of GPUs to run this model. + + Returns: + tf.distribute.DistibutionStrategy object. + Raises: + ValueError: if `distribution_strategy` is "off" or "one_device" and + `num_gpus` is larger than 1; or `num_gpus` is negative. + """ + if num_gpus < 0: + raise ValueError("`num_gpus` can not be negative.") + + distribution_strategy = distribution_strategy.lower() + + if distribution_strategy == "off": + if num_gpus > 1: + raise ValueError( + "When {} GPUs are specified, distribution_strategy " + "flag cannot be set to `off`.".format(num_gpus) + ) + return None + + if distribution_strategy == "multi_worker_mirrored": + return tf.distribute.experimental.MultiWorkerMirroredStrategy( + communication=_collective_communication(all_reduce_alg) + ) + + if distribution_strategy == "one_device": + if num_gpus == 0: + return tf.distribute.OneDeviceStrategy("device:CPU:0") + if num_gpus > 1: + raise ValueError( + "`OneDeviceStrategy` can not be used for more than one device." + ) + return tf.distribute.OneDeviceStrategy("device:GPU:0") + + if distribution_strategy == "mirrored": + if num_gpus == 0: + devices = ["device:CPU:0"] + else: + devices = ["device:GPU:%d" % i for i in range(num_gpus)] + return tf.distribute.MirroredStrategy( + devices=devices, + cross_device_ops=_mirrored_cross_device_ops( + all_reduce_alg, num_packs + ), + ) - raise ValueError("Unrecognized Distribution Strategy: %r" % - distribution_strategy) + raise ValueError( + f"Unrecognized Distribution Strategy: {distribution_strategy}" + ) def configure_cluster(worker_hosts=None, task_index=-1): - """Set multi-worker cluster spec in TF_CONFIG environment variable. - - Args: - worker_hosts: comma-separated list of worker ip:port pairs. - - Returns: - Number of workers in the cluster. - """ - tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) - if tf_config: - num_workers = ( - len(tf_config["cluster"].get("chief", [])) + - len(tf_config["cluster"].get("worker", []))) - elif worker_hosts: - workers = worker_hosts.split(",") - num_workers = len(workers) - if num_workers > 1 and task_index < 0: - raise ValueError("Must specify task_index when number of workers > 1") - task_index = 0 if num_workers == 1 else task_index - os.environ["TF_CONFIG"] = json.dumps({ - "cluster": { - "worker": workers - }, - "task": { - "type": "worker", - "index": task_index - } - }) - else: - num_workers = 1 - return num_workers + """Set multi-worker cluster spec in TF_CONFIG environment variable. + + Args: + worker_hosts: comma-separated list of worker ip:port pairs. + + Returns: + Number of workers in the cluster. + """ + tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) + if tf_config: + num_workers = len(tf_config["cluster"].get("chief", [])) + len( + tf_config["cluster"].get("worker", []) + ) + elif worker_hosts: + workers = worker_hosts.split(",") + num_workers = len(workers) + if num_workers > 1 and task_index < 0: + raise ValueError( + "Must specify task_index when number of workers > 1" + ) + task_index = 0 if num_workers == 1 else task_index + os.environ["TF_CONFIG"] = json.dumps( + { + "cluster": {"worker": workers}, + "task": {"type": "worker", "index": task_index}, + } + ) + else: + num_workers = 1 + return num_workers def get_strategy_scope(strategy): - if strategy: - strategy_scope = strategy.scope() - else: - strategy_scope = DummyContextManager() + if strategy: + strategy_scope = strategy.scope() + else: + strategy_scope = DummyContextManager() - return strategy_scope + return strategy_scope class DummyContextManager: + def __enter__(self): + pass - def __enter__(self): - pass - - def __exit__(self, *args): - pass + def __exit__(self, *args): + pass diff --git a/keras/benchmarks/eager_microbenchmarks_test.py b/keras/benchmarks/eager_microbenchmarks_test.py index dcfcdaadd88c..19b42f750dcd 100644 --- a/keras/benchmarks/eager_microbenchmarks_test.py +++ b/keras/benchmarks/eager_microbenchmarks_test.py @@ -14,193 +14,226 @@ # ============================================================================== """Microbenchmarks for Keras components in eager mode.""" +import time + import tensorflow.compat.v2 as tf -import time +from keras.utils import tf_inspect +# isort: off from tensorflow.python.eager import context from tensorflow.python.eager.context import get_executor -from keras.utils import tf_inspect def _run_benchmark(func, num_iters, execution_mode=None): - with context.execution_mode(execution_mode): - # call func to warm up - func() - if execution_mode == context.ASYNC: - get_executor().wait() - start = time.time() - for _ in range(num_iters): - func() - if execution_mode == context.ASYNC: - get_executor().wait() - end = time.time() + with context.execution_mode(execution_mode): + # call func to warm up + func() + if execution_mode == context.ASYNC: + get_executor().wait() + start = time.time() + for _ in range(num_iters): + func() + if execution_mode == context.ASYNC: + get_executor().wait() + end = time.time() - return end - start + return end - start class MicroBenchmarksBase(tf.test.Benchmark): - """Run and report benchmark results.""" - - def run_report(self, run_benchmark, func, num_iters, execution_mode=None): """Run and report benchmark results.""" - total_time = run_benchmark(func, num_iters, execution_mode) - mean_us = total_time * 1e6 / num_iters - metrics = [{ - "name": "exp_per_sec", - "value": float("{0:.3f}".format(num_iters / total_time)) - }, { - "name": "us_per_exp", - "value": float("{0:.3f}".format(total_time * 1e6 / num_iters)) - }] - benchmark_name = self._get_benchmark_name() - self.report_benchmark( - iters=num_iters, - wall_time=mean_us, - metrics=metrics, - name=benchmark_name) - - def _get_benchmark_name(self): - """Mostly copied from benchmark.py _get_name().""" - stack = tf_inspect.stack() - name = None - for frame in stack[::-1]: - f_locals = frame[0].f_locals - f_self = f_locals.get("self", None) - if isinstance(f_self, tf.test.Benchmark): - name = frame[3] # Get the method name - # This is a hack to get around the fact that some methods might have a - # disable_tfrt decorator around them. In that case a function called - # 'decorated' wraps the real called function underneath and so we - # peek one deeper into the stack to get the real name. - if name == "decorated": - continue - else: - break - if name is None: - raise ValueError("Unable to determine calling Benchmark function.") - if tf.__internal__.is_tfrt_enabled(): - name = name + "_tfrt" - return name - - def _run(self, func, num_iters, execution_mode=None): - self.run_report(_run_benchmark, func, num_iters, execution_mode) - - def benchmark_layers_call_overhead(self): - - class OnlyOverheadLayer(tf.keras.layers.Layer): - - def call(self, x): - return x - - layer = OnlyOverheadLayer() - x = tf.convert_to_tensor([[1.]]) - - def fn(): - layer(x) # pylint: disable=not-callable - - self._run(fn, 10000) - - def benchmark_op_layer_call_overhead(self): - model_input = tf.keras.Input(shape=(1,)) - model_output = model_input - x = tf.convert_to_tensor([[1.1]]) - - for _ in range(20): - model_output = tf.multiply(model_output, x) - model = tf.keras.Model(inputs=model_input, outputs=model_output) - - def fn(): - model(x) # pylint: disable=not-callable - - fn() - self._run(fn, 100) - - def benchmark_model_predict_tensorlike_overhead(self): - - class OnlyOverheadLayer(tf.keras.layers.Layer): - - def call(self, x): - return x - - model = tf.keras.Sequential([OnlyOverheadLayer()]) - x = tf.convert_to_tensor([[1.]]) - - def fn(): - model.predict(x) - - self._run(fn, 20) - - def benchmark_layers_embeddings_embedding_overhead(self): - - layer = tf.keras.layers.Embedding(1, 1) - x = tf.zeros((1, 1), dtype="int32") - - def fn(): - layer(x) - - self._run(fn, 10000) - - -class KerasLayerCallOverheadBenchmarks( # pylint: disable=undefined-variable - MicroBenchmarksBase, metaclass=tf.__internal__.test.ParameterizedBenchmark): - - # The set of layers for benchmarking. To add benchmarks for new layers, - # please add the parameter configs to "_benchmark_paramters". - - # The parameter of each layer benchmark is a tuple contains: - # 1) The benchmark name with convention "{module_name}_{layer_name}"; - # 2) The layer instance; - # 3) The shape of the input to the layer; - # 4) The kwargs used in the benchmark. It can include the number of - # iterations to run the benchmarks, and kwargs used in the layer call. - # By default, # of iteration is 10000. - _benchmark_parameters = [ - ("advanced_activations_leaky_relu", tf.keras.layers.LeakyReLU(), - (1, 1)), - ("advanced_activations_prelu", tf.keras.layers.PReLU(), (1, 1)), - ("advanced_activations_elu", tf.keras.layers.ELU(), (1, 1)), - ("advanced_activations_thresholded_relu", - tf.keras.layers.ThresholdedReLU(), (1, 1)), - ("advanced_activations_softmax", tf.keras.layers.Softmax(), (1, 1)), - ("advanced_activations_relu", tf.keras.layers.ReLU(), (1, 1)), - ("core_masking", tf.keras.layers.Masking(), (1, 1)), - ("core_dropout", tf.keras.layers.Dropout(0.5), (1, 1), { - "training": True - }), - ("core_flatten", tf.keras.layers.Flatten(), (1, 1, 1)), - ("core_dense", tf.keras.layers.Dense(1), (1, 1)), - ("convolutional_conv1d", tf.keras.layers.Conv1D(1, (1,)), (1, 1, 1)), - ("convolutional_conv2d", tf.keras.layers.Conv2D(1, (1, 1)), (1, 1, 1, 1)), - ("convolutional_conv3d", tf.keras.layers.Conv3D( - 1, (1, 1, 1)), (1, 1, 1, 1, 1)), - ("batch_norm_fused_inf", tf.keras.layers.BatchNormalization(fused=True), - (1, 1, 1, 1)), - ("batch_norm_fused_train", tf.keras.layers.BatchNormalization(fused=True), - (1, 1, 1, 1), {"training": True}), - ("batch_norm_nonfused_inf", - tf.keras.layers.BatchNormalization(fused=False), (1, 1, 1, 1)), - ("batch_norm_nonfused_train", - tf.keras.layers.BatchNormalization(fused=False), (1, 1, 1, 1), - {"training": True}), - ("normalization_layer_normalization", - tf.keras.layers.LayerNormalization(), (1, 1), - {"iters": 100, "training": True}), - ] - - def benchmark_layer(self, layer, input_shape, kwargs=None): - - x = tf.ones(input_shape) - - def fn(): - layer(x, **(kwargs or {})) - default_iters = 10000 - iters = kwargs.pop("iters", default_iters) if kwargs else default_iters - self._run(fn, iters) + def run_report(self, run_benchmark, func, num_iters, execution_mode=None): + """Run and report benchmark results.""" + total_time = run_benchmark(func, num_iters, execution_mode) + mean_us = total_time * 1e6 / num_iters + metrics = [ + { + "name": "exp_per_sec", + "value": float(f"{num_iters / total_time:.3f}"), + }, + { + "name": "us_per_exp", + "value": float(f"{total_time * 1000000.0 / num_iters:.3f}"), + }, + ] + benchmark_name = self._get_benchmark_name() + self.report_benchmark( + iters=num_iters, + wall_time=mean_us, + metrics=metrics, + name=benchmark_name, + ) + + def _get_benchmark_name(self): + """Mostly copied from benchmark.py _get_name().""" + stack = tf_inspect.stack() + name = None + for frame in stack[::-1]: + f_locals = frame[0].f_locals + f_self = f_locals.get("self", None) + if isinstance(f_self, tf.test.Benchmark): + name = frame[3] # Get the method name + # This is a hack to get around the fact that some methods might + # have a disable_tfrt decorator around them. In that case a + # function called 'decorated' wraps the real called function + # underneath and so we peek one deeper into the stack to get the + # real name. + if name == "decorated": + continue + else: + break + if name is None: + raise ValueError("Unable to determine calling Benchmark function.") + if tf.__internal__.is_tfrt_enabled(): + name = name + "_tfrt" + return name + + def _run(self, func, num_iters, execution_mode=None): + self.run_report(_run_benchmark, func, num_iters, execution_mode) + + def benchmark_layers_call_overhead(self): + class OnlyOverheadLayer(tf.keras.layers.Layer): + def call(self, x): + return x + + layer = OnlyOverheadLayer() + x = tf.convert_to_tensor([[1.0]]) + + def fn(): + layer(x) + + self._run(fn, 10000) + + def benchmark_op_layer_call_overhead(self): + model_input = tf.keras.Input(shape=(1,)) + model_output = model_input + x = tf.convert_to_tensor([[1.1]]) + + for _ in range(20): + model_output = tf.multiply(model_output, x) + model = tf.keras.Model(inputs=model_input, outputs=model_output) + + def fn(): + model(x) + + fn() + self._run(fn, 100) + + def benchmark_model_predict_tensorlike_overhead(self): + class OnlyOverheadLayer(tf.keras.layers.Layer): + def call(self, x): + return x + + model = tf.keras.Sequential([OnlyOverheadLayer()]) + x = tf.convert_to_tensor([[1.0]]) + + def fn(): + model.predict(x) + + self._run(fn, 20) + + def benchmark_layers_embeddings_embedding_overhead(self): + layer = tf.keras.layers.Embedding(1, 1) + x = tf.zeros((1, 1), dtype="int32") + + def fn(): + layer(x) + + self._run(fn, 10000) + + +class KerasLayerCallOverheadBenchmarks( + MicroBenchmarksBase, metaclass=tf.__internal__.test.ParameterizedBenchmark +): + # The set of layers for benchmarking. To add benchmarks for new layers, + # please add the parameter configs to "_benchmark_paramters". + + # The parameter of each layer benchmark is a tuple contains: + # 1) The benchmark name with convention "{module_name}_{layer_name}"; + # 2) The layer instance; + # 3) The shape of the input to the layer; + # 4) The kwargs used in the benchmark. It can include the number of + # iterations to run the benchmarks, and kwargs used in the layer call. + # By default, # of iteration is 10000. + _benchmark_parameters = [ + ( + "advanced_activations_leaky_relu", + tf.keras.layers.LeakyReLU(), + (1, 1), + ), + ("advanced_activations_prelu", tf.keras.layers.PReLU(), (1, 1)), + ("advanced_activations_elu", tf.keras.layers.ELU(), (1, 1)), + ( + "advanced_activations_thresholded_relu", + tf.keras.layers.ThresholdedReLU(), + (1, 1), + ), + ("advanced_activations_softmax", tf.keras.layers.Softmax(), (1, 1)), + ("advanced_activations_relu", tf.keras.layers.ReLU(), (1, 1)), + ("core_masking", tf.keras.layers.Masking(), (1, 1)), + ( + "core_dropout", + tf.keras.layers.Dropout(0.5), + (1, 1), + {"training": True}, + ), + ("core_flatten", tf.keras.layers.Flatten(), (1, 1, 1)), + ("core_dense", tf.keras.layers.Dense(1), (1, 1)), + ("convolutional_conv1d", tf.keras.layers.Conv1D(1, (1,)), (1, 1, 1)), + ( + "convolutional_conv2d", + tf.keras.layers.Conv2D(1, (1, 1)), + (1, 1, 1, 1), + ), + ( + "convolutional_conv3d", + tf.keras.layers.Conv3D(1, (1, 1, 1)), + (1, 1, 1, 1, 1), + ), + ( + "batch_norm_fused_inf", + tf.keras.layers.BatchNormalization(fused=True), + (1, 1, 1, 1), + ), + ( + "batch_norm_fused_train", + tf.keras.layers.BatchNormalization(fused=True), + (1, 1, 1, 1), + {"training": True}, + ), + ( + "batch_norm_nonfused_inf", + tf.keras.layers.BatchNormalization(fused=False), + (1, 1, 1, 1), + ), + ( + "batch_norm_nonfused_train", + tf.keras.layers.BatchNormalization(fused=False), + (1, 1, 1, 1), + {"training": True}, + ), + ( + "normalization_layer_normalization", + tf.keras.layers.LayerNormalization(), + (1, 1), + {"iters": 100, "training": True}, + ), + ] + + def benchmark_layer(self, layer, input_shape, kwargs=None): + x = tf.ones(input_shape) + + def fn(): + layer(x, **(kwargs or {})) + + default_iters = 10000 + iters = kwargs.pop("iters", default_iters) if kwargs else default_iters + self._run(fn, iters) if __name__ == "__main__": - if tf.compat.v1.executing_eagerly(): - # Only run test when eager is enabled (skip test in v1). - tf.test.main() + if tf.compat.v1.executing_eagerly(): + # Only run test when eager is enabled (skip test in v1). + tf.test.main() diff --git a/keras/benchmarks/keras_cpu_benchmark_test.py b/keras/benchmarks/keras_cpu_benchmark_test.py index b2ba3604ab04..6ca5cb8c3870 100644 --- a/keras/benchmarks/keras_cpu_benchmark_test.py +++ b/keras/benchmarks/keras_cpu_benchmark_test.py @@ -14,123 +14,141 @@ # ============================================================================== """Benchmark tests for CPU performance of Keras models.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf from keras.benchmarks import benchmark_util # Loss function and optimizer. -_LOSS = 'binary_crossentropy' -_OPTIMIZER = 'rmsprop' - - -class KerasModelCPUBenchmark( # pylint: disable=undefined-variable - tf.test.Benchmark, metaclass=tf.__internal__.test.ParameterizedBenchmark): - """Required Arguments for measure_performance. - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ - # The parameters of each benchmark is a tuple: - - # (benchmark_name_suffix, batch_size, run_iters). - # benchmark_name_suffix: The suffix of the benchmark test name with - # convention `{bs}_{batch_size}`. - # batch_size: Integer. Number of samples per gradient update. - # run_iters: Integer. Number of iterations to run the - # performance measurement. - - _benchmark_parameters = [ - ('bs_32', 32, 3), ('bs_64', 64, 2), ('bs_128', 128, 2), - ('bs_256', 256, 1), ('bs_512', 512, 1)] - - def _mnist_mlp(self): - """Simple MLP model.""" - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,))) - model.add(tf.keras.layers.Dropout(0.2)) - model.add(tf.keras.layers.Dense(512, activation='relu')) - model.add(tf.keras.layers.Dropout(0.2)) - model.add(tf.keras.layers.Dense(10, activation='softmax')) - - return model - - def _mnist_convnet(self): - """Simple Convnet model.""" - model = tf.keras.Sequential() - model.add( - tf.keras.layers.Conv2D( - 32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1))) - model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu')) - model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) - model.add(tf.keras.layers.Dropout(0.25)) - model.add(tf.keras.layers.Flatten()) - model.add(tf.keras.layers.Dense(128, activation='relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(10, activation='softmax')) - - return model - - def _imdb_lstm(self): - """Simple LSTM model.""" - model = tf.keras.Sequential() - model.add(tf.keras.layers.Embedding(20000, 128)) - model.add(tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) - model.add(tf.keras.layers.Dense(1, activation='sigmoid')) - - return model - - def benchmark_mnist_mlp(self, batch_size, run_iters): - """Benchmark for MLP model on synthetic mnist data.""" - mlp_x = np.random.random((5000, 784)) - mlp_y = np.random.random((5000, 10)) - metrics, wall_time, extras = benchmark_util.measure_performance( - self._mnist_mlp, - x=mlp_x, - y=mlp_y, - batch_size=batch_size, - run_iters=run_iters, - optimizer=_OPTIMIZER, - loss=_LOSS) - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_mnist_convnet(self, batch_size, run_iters): - """Benchmark for Convnet model on synthetic mnist data.""" - convnet_x = np.random.random((5000, 28, 28, 1)) - convnet_y = np.random.random((5000, 10)) - metrics, wall_time, extras = benchmark_util.measure_performance( - self._mnist_convnet, - x=convnet_x, - y=convnet_y, - batch_size=batch_size, - run_iters=run_iters, - optimizer=_OPTIMIZER, - loss=_LOSS) - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_imdb_lstm(self, batch_size, run_iters): - """Benchmark for LSTM model on synthetic imdb review dataset.""" - lstm_x = np.random.randint(0, 1999, size=(2500, 100)) - lstm_y = np.random.random((2500, 1)) - metrics, wall_time, extras = benchmark_util.measure_performance( - self._imdb_lstm, - x=lstm_x, - y=lstm_y, - batch_size=batch_size, - run_iters=run_iters, - optimizer=_OPTIMIZER, - loss=_LOSS) - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() +_LOSS = "binary_crossentropy" +_OPTIMIZER = "rmsprop" + + +class KerasModelCPUBenchmark( + tf.test.Benchmark, metaclass=tf.__internal__.test.ParameterizedBenchmark +): + """Required Arguments for measure_performance. + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ + + # The parameters of each benchmark is a tuple: + + # (benchmark_name_suffix, batch_size, run_iters). + # benchmark_name_suffix: The suffix of the benchmark test name with + # convention `{bs}_{batch_size}`. + # batch_size: Integer. Number of samples per gradient update. + # run_iters: Integer. Number of iterations to run the + # performance measurement. + + _benchmark_parameters = [ + ("bs_32", 32, 3), + ("bs_64", 64, 2), + ("bs_128", 128, 2), + ("bs_256", 256, 1), + ("bs_512", 512, 1), + ] + + def _mnist_mlp(self): + """Simple MLP model.""" + model = tf.keras.Sequential() + model.add( + tf.keras.layers.Dense(512, activation="relu", input_shape=(784,)) + ) + model.add(tf.keras.layers.Dropout(0.2)) + model.add(tf.keras.layers.Dense(512, activation="relu")) + model.add(tf.keras.layers.Dropout(0.2)) + model.add(tf.keras.layers.Dense(10, activation="softmax")) + + return model + + def _mnist_convnet(self): + """Simple Convnet model.""" + model = tf.keras.Sequential() + model.add( + tf.keras.layers.Conv2D( + 32, + kernel_size=(3, 3), + activation="relu", + input_shape=(28, 28, 1), + ) + ) + model.add(tf.keras.layers.Conv2D(64, (3, 3), activation="relu")) + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) + model.add(tf.keras.layers.Dropout(0.25)) + model.add(tf.keras.layers.Flatten()) + model.add(tf.keras.layers.Dense(128, activation="relu")) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(10, activation="softmax")) + + return model + + def _imdb_lstm(self): + """Simple LSTM model.""" + model = tf.keras.Sequential() + model.add(tf.keras.layers.Embedding(20000, 128)) + model.add(tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) + model.add(tf.keras.layers.Dense(1, activation="sigmoid")) + + return model + + def benchmark_mnist_mlp(self, batch_size, run_iters): + """Benchmark for MLP model on synthetic mnist data.""" + mlp_x = np.random.random((5000, 784)) + mlp_y = np.random.random((5000, 10)) + metrics, wall_time, extras = benchmark_util.measure_performance( + self._mnist_mlp, + x=mlp_x, + y=mlp_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer=_OPTIMIZER, + loss=_LOSS, + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_mnist_convnet(self, batch_size, run_iters): + """Benchmark for Convnet model on synthetic mnist data.""" + convnet_x = np.random.random((5000, 28, 28, 1)) + convnet_y = np.random.random((5000, 10)) + metrics, wall_time, extras = benchmark_util.measure_performance( + self._mnist_convnet, + x=convnet_x, + y=convnet_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer=_OPTIMIZER, + loss=_LOSS, + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_imdb_lstm(self, batch_size, run_iters): + """Benchmark for LSTM model on synthetic imdb review dataset.""" + lstm_x = np.random.randint(0, 1999, size=(2500, 100)) + lstm_y = np.random.random((2500, 1)) + metrics, wall_time, extras = benchmark_util.measure_performance( + self._imdb_lstm, + x=lstm_x, + y=lstm_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer=_OPTIMIZER, + loss=_LOSS, + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/BUILD b/keras/benchmarks/keras_examples_benchmarks/BUILD index 4668cacaf1c5..932a7643a689 100644 --- a/keras/benchmarks/keras_examples_benchmarks/BUILD +++ b/keras/benchmarks/keras_examples_benchmarks/BUILD @@ -4,6 +4,7 @@ load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = ["//visibility:public"], licenses = ["notice"], ) diff --git a/keras/benchmarks/keras_examples_benchmarks/README.md b/keras/benchmarks/keras_examples_benchmarks/README.md index a2e460fb9421..42bae76a5e29 100644 --- a/keras/benchmarks/keras_examples_benchmarks/README.md +++ b/keras/benchmarks/keras_examples_benchmarks/README.md @@ -186,7 +186,7 @@ To run benchmarks in [keras/benchmarks](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks), please take the following steps: -1. Pull the latest tensorflow repo from github. +1. Pull the latest tensorflow repo from GitHub. 2. Install the Bazel tool which works with tensorflow, please take a look for the [Install bazel](#install-bazel) section. 3. To run benchmarks with Bazel, use the `--benchmarks=.` flags to specify the diff --git a/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py index 43e2a405ae51..be16c0a2cb4f 100644 --- a/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py @@ -23,140 +23,168 @@ class AntirectifierBenchmark(tf.test.Benchmark): - """Benchmarks for Antirectifier using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() - self.x_train = self.x_train.reshape(-1, 784) - self.x_train = self.x_train.astype("float32") / 255 - - def _build_model(self): - """Model from https://keras.io/examples/keras_recipes/antirectifier/.""" - model = tf.keras.Sequential([ - tf.keras.Input(shape=(784,)), - tf.keras.layers.Dense(256), - Antirectifier(), - tf.keras.layers.Dense(256), - Antirectifier(), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(10), - ]) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_antirectifier_bs_128(self): - """Measure performance with batch_size=128.""" - batch_size = 128 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer="rmsprop", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["sparse_categorical_accuracy"]) - - metadata = benchmark_util.get_keras_examples_metadata( - "antirectifier", batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_antirectifier_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer="rmsprop", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["sparse_categorical_accuracy"]) - - metadata = benchmark_util.get_keras_examples_metadata( - "antirectifier", batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_antirectifier_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer="rmsprop", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["sparse_categorical_accuracy"]) - - metadata = benchmark_util.get_keras_examples_metadata( - "antirectifier", batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_antirectifier_bs_512_gpu_2(self): - """Measure performance with batch_size=512, gpu=2 and - - distribution_strategy=`mirrored`. - """ - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - num_gpus=2, - distribution_strategy="mirrored", - optimizer="rmsprop", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["sparse_categorical_accuracy"]) - - metadata = benchmark_util.get_keras_examples_metadata( - "antirectifier", batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) + """Benchmarks for Antirectifier using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.reshape(-1, 784) + self.x_train = self.x_train.astype("float32") / 255 + + def _build_model(self): + """Model from https://keras.io/examples/keras_recipes/antirectifier/.""" + model = tf.keras.Sequential( + [ + tf.keras.Input(shape=(784,)), + tf.keras.layers.Dense(256), + Antirectifier(), + tf.keras.layers.Dense(256), + Antirectifier(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(10), + ] + ) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_antirectifier_bs_128(self): + """Measure performance with batch_size=128.""" + batch_size = 128 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True + ), + metrics=["sparse_categorical_accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "antirectifier", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_antirectifier_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True + ), + metrics=["sparse_categorical_accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "antirectifier", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_antirectifier_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True + ), + metrics=["sparse_categorical_accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "antirectifier", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_antirectifier_bs_512_gpu_2(self): + """Measure performance with batch_size=512, gpu=2 and + + distribution_strategy=`mirrored`. + """ + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True + ), + metrics=["sparse_categorical_accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "antirectifier", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) class Antirectifier(tf.keras.layers.Layer): - """Build simple custom layer.""" - - def __init__(self, initializer="he_normal", **kwargs): - super().__init__(**kwargs) - self.initializer = tf.keras.initializers.get(initializer) - - def build(self, input_shape): - output_dim = input_shape[-1] - self.kernel = self.add_weight( - shape=(output_dim * 2, output_dim), - initializer=self.initializer, - name="kernel", - trainable=True, - ) - - def call(self, inputs): #pylint: disable=arguments-differ - inputs -= tf.reduce_mean(inputs, axis=-1, keepdims=True) - pos = tf.nn.relu(inputs) - neg = tf.nn.relu(-inputs) - concatenated = tf.concat([pos, neg], axis=-1) - mixed = tf.matmul(concatenated, self.kernel) - return mixed - - def get_config(self): - # Implement get_config to enable serialization. This is optional. - base_config = super().get_config() - config = {"initializer": tf.keras.initializers.serialize(self.initializer)} - return dict(list(base_config.items()) + list(config.items())) + """Build simple custom layer.""" + + def __init__(self, initializer="he_normal", **kwargs): + super().__init__(**kwargs) + self.initializer = tf.keras.initializers.get(initializer) + + def build(self, input_shape): + output_dim = input_shape[-1] + self.kernel = self.add_weight( + shape=(output_dim * 2, output_dim), + initializer=self.initializer, + name="kernel", + trainable=True, + ) + + def call(self, inputs): + inputs -= tf.reduce_mean(inputs, axis=-1, keepdims=True) + pos = tf.nn.relu(inputs) + neg = tf.nn.relu(-inputs) + concatenated = tf.concat([pos, neg], axis=-1) + mixed = tf.matmul(concatenated, self.kernel) + return mixed + + def get_config(self): + # Implement get_config to enable serialization. This is optional. + base_config = super().get_config() + config = { + "initializer": tf.keras.initializers.serialize(self.initializer) + } + return dict(list(base_config.items()) + list(config.items())) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 65ef5ea6e265..771612a31389 100644 --- a/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -23,111 +23,129 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): - """Benchmarks for Bidirectional LSTM using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.max_feature = 20000 - self.max_len = 200 - (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( - num_words=self.max_feature) - self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( - self.imdb_x, maxlen=self.max_len) - - def _build_model(self): - """Model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/.""" - inputs = tf.keras.Input(shape=(None,), dtype='int32') - x = tf.keras.layers.Embedding(self.max_feature, 128)(inputs) - x = tf.keras.layers.Bidirectional( - tf.keras.layers.LSTM(64, return_sequences=True))( - x) - x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x) - outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x) - model = tf.keras.Model(inputs, outputs) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_bidirect_lstm_imdb_bs_128(self): - """Measure performance with batch_size=128.""" - batch_size = 128 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - optimizer='adam', - loss='binary_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'bidirectional_lstm', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_bidirect_lstm_imdb_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - optimizer='adam', - loss='binary_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'bidirectional_lstm', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_bidirect_lstm_imdb_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - optimizer='adam', - loss='binary_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'bidirectional_lstm', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_bidirect_lstm_imdb_bs_512_gpu_2(self): - """Measure performance with batch_size=512, gpu=2 and - - distribution_strategy=`mirrored`. - """ - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - optimizer='adam', - loss='binary_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'bidirectional_lstm', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + """Benchmarks for Bidirectional LSTM using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.max_feature = 20000 + self.max_len = 200 + (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( + num_words=self.max_feature + ) + self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( + self.imdb_x, maxlen=self.max_len + ) + + def _build_model(self): + """Model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/.""" + inputs = tf.keras.Input(shape=(None,), dtype="int32") + x = tf.keras.layers.Embedding(self.max_feature, 128)(inputs) + x = tf.keras.layers.Bidirectional( + tf.keras.layers.LSTM(64, return_sequences=True) + )(x) + x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x) + outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x) + model = tf.keras.Model(inputs, outputs) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_bidirect_lstm_imdb_bs_128(self): + """Measure performance with batch_size=128.""" + batch_size = 128 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "bidirectional_lstm", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_bidirect_lstm_imdb_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "bidirectional_lstm", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_bidirect_lstm_imdb_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "bidirectional_lstm", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_bidirect_lstm_imdb_bs_512_gpu_2(self): + """Measure performance with batch_size=512, gpu=2 and + + distribution_strategy=`mirrored`. + """ + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "bidirectional_lstm", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py index 10b1c1f0d743..cd8537cdd647 100644 --- a/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py @@ -23,125 +23,151 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): - """Benchmarks for CNN using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.num_classes = 10 - (self.x_train, self.y_train), _ = tf.keras.datasets.cifar10.load_data() - self.x_train = self.x_train.astype('float32') / 255 - self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) - self.epochs = 5 - - def _build_model(self): - """Model from https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py.""" - model = tf.keras.Sequential() - model.add( - tf.keras.layers.Conv2D( - 32, (3, 3), padding='same', input_shape=self.x_train.shape[1:])) - model.add(tf.keras.layers.Activation('relu')) - model.add(tf.keras.layers.Conv2D(32, (3, 3))) - model.add(tf.keras.layers.Activation('relu')) - model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) - model.add(tf.keras.layers.Dropout(0.25)) - - model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same')) - model.add(tf.keras.layers.Activation('relu')) - model.add(tf.keras.layers.Conv2D(64, (3, 3))) - model.add(tf.keras.layers.Activation('relu')) - model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) - model.add(tf.keras.layers.Dropout(0.25)) - - model.add(tf.keras.layers.Flatten()) - model.add(tf.keras.layers.Dense(512)) - model.add(tf.keras.layers.Activation('relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(self.num_classes)) - model.add(tf.keras.layers.Activation('softmax')) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_cnn_cifar10_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('cnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_cnn_cifar10_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('cnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_cnn_cifar10_bs_1024(self): - """Measure performance with batch_size=1024.""" - batch_size = 1024 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('cnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_cnn_cifar10_bs_1024_gpu_2(self): - """Measure performance with batch_size=1024, gpu=2 and - - distribution_strategy=`mirrored`. - """ - batch_size = 1024 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - epochs=self.epochs, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('cnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + """Benchmarks for CNN using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.num_classes = 10 + (self.x_train, self.y_train), _ = tf.keras.datasets.cifar10.load_data() + self.x_train = self.x_train.astype("float32") / 255 + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes + ) + self.epochs = 5 + + def _build_model(self): + """Model from + https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py. + """ + model = tf.keras.Sequential() + model.add( + tf.keras.layers.Conv2D( + 32, (3, 3), padding="same", input_shape=self.x_train.shape[1:] + ) + ) + model.add(tf.keras.layers.Activation("relu")) + model.add(tf.keras.layers.Conv2D(32, (3, 3))) + model.add(tf.keras.layers.Activation("relu")) + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) + model.add(tf.keras.layers.Dropout(0.25)) + + model.add(tf.keras.layers.Conv2D(64, (3, 3), padding="same")) + model.add(tf.keras.layers.Activation("relu")) + model.add(tf.keras.layers.Conv2D(64, (3, 3))) + model.add(tf.keras.layers.Activation("relu")) + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) + model.add(tf.keras.layers.Dropout(0.25)) + + model.add(tf.keras.layers.Flatten()) + model.add(tf.keras.layers.Dense(512)) + model.add(tf.keras.layers.Activation("relu")) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(self.num_classes)) + model.add(tf.keras.layers.Activation("softmax")) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_cnn_cifar10_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6 + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("cnn", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_cnn_cifar10_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6 + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("cnn", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_cnn_cifar10_bs_1024(self): + """Measure performance with batch_size=1024.""" + batch_size = 1024 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6 + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("cnn", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_cnn_cifar10_bs_1024_gpu_2(self): + """Measure performance with batch_size=1024, gpu=2 and + + distribution_strategy=`mirrored`. + """ + batch_size = 1024 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6 + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("cnn", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py index 47b077373f26..fc5cedd27df2 100644 --- a/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py @@ -17,122 +17,149 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf from keras.benchmarks import benchmark_util class ConvMnistBenchmark(tf.test.Benchmark): - """Benchmarks for Convnet using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.num_classes = 10 - self.input_shape = (28, 28, 1) - (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() - self.x_train = self.x_train.astype('float32') / 255 - self.x_train = np.expand_dims(self.x_train, -1) - self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) - self.epochs = 15 - - def _build_model(self): - """Model from https://keras.io/examples/vision/mnist_convnet/.""" - model = tf.keras.Sequential([ - tf.keras.Input(shape=self.input_shape), - tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(self.num_classes, activation='softmax'), - ]) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_conv_mnist_bs_128(self): - """Measure performance with batch_size=128.""" - batch_size = 128 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('conv', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_conv_mnist_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('conv', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_conv_mnist_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('conv', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_conv_mnist_bs_512_gpu_2(self): - """Measure performance with batch_size=512, gpu=2 and - - distribution_strategy='mirrored' - """ - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('conv', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + """Benchmarks for Convnet using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.num_classes = 10 + self.input_shape = (28, 28, 1) + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.astype("float32") / 255 + self.x_train = np.expand_dims(self.x_train, -1) + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes + ) + self.epochs = 15 + + def _build_model(self): + """Model from https://keras.io/examples/vision/mnist_convnet/.""" + model = tf.keras.Sequential( + [ + tf.keras.Input(shape=self.input_shape), + tf.keras.layers.Conv2D( + 32, kernel_size=(3, 3), activation="relu" + ), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Conv2D( + 64, kernel_size=(3, 3), activation="relu" + ), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(self.num_classes, activation="softmax"), + ] + ) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_conv_mnist_bs_128(self): + """Measure performance with batch_size=128.""" + batch_size = 128 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "conv", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_conv_mnist_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "conv", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_conv_mnist_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "conv", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_conv_mnist_bs_512_gpu_2(self): + """Measure performance with batch_size=512, gpu=2 and + + distribution_strategy='mirrored' + """ + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "conv", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/mnist_conv_custom_training_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/mnist_conv_custom_training_benchmark_test.py index 79d5c00af563..70762325ee74 100644 --- a/keras/benchmarks/keras_examples_benchmarks/mnist_conv_custom_training_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/mnist_conv_custom_training_benchmark_test.py @@ -17,357 +17,448 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import timeit + import numpy as np +import tensorflow.compat.v2 as tf from keras.benchmarks import benchmark_util from keras.benchmarks import distribution_util class CustomMnistBenchmark(tf.test.Benchmark): - """Benchmarks for custom training loop using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.num_classes = 10 - self.input_shape = (28, 28, 1) - self.epochs = 15 - (x_train, y_train), _ = tf.keras.datasets.mnist.load_data() - x_train = x_train.astype('float32') / 255 - x_train = np.expand_dims(x_train, -1) - y_train = tf.keras.utils.to_categorical(y_train, self.num_classes) - self.num_examples = x_train.shape[0] - # Use `tf.data.Dataset` for custom training loop. - self.train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - - def _build_model(self): - """Model from https://keras.io/examples/vision/mnist_convnet/.""" - model = tf.keras.Sequential([ - tf.keras.Input(shape=self.input_shape), - tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(self.num_classes, activation='softmax'), - ]) - - return model - - def compute_loss(self, targets, predictions, loss_fn, batch_size): - """Compute average loss.""" - per_example_loss = loss_fn(targets, predictions) - return tf.nn.compute_average_loss( - per_example_loss, global_batch_size=batch_size) - - @tf.function(reduce_retracing=True) - def train_step(self, inputs, model, loss_fn, optimizer, batch_size): - """Compute loss and optimize model by optimizer. - - Args: - inputs: `tf.data`. - model: See `model` in `train_function()` method. - loss_fn: See `loss_fn` in `train_function()` method. - optimizer: See `optimizer` in `train_function()` method. - batch_size: See `batch_size` in `train_function()` method. - - Returns: - Loss value. - """ - train_x, train_y = inputs - with tf.GradientTape() as tape: - predictions = model(train_x, training=True) - loss = self.compute_loss(train_y, predictions, loss_fn, batch_size) - grads = tape.gradient(loss, model.trainable_weights) - optimizer.apply_gradients(zip(grads, model.trainable_weights)) - return loss - - @tf.function(reduce_retracing=True) - def distributed_train_step(self, batch_dataset, model, loss_fn, optimizer, - batch_size, distribution_strategy): - """Train step in distribution strategy setting. - - Args: - batch_dataset: `tf.data`. - model: See `model` in `train_function()` method. - loss_fn: See `loss_fn` in `train_function()` method. - optimizer: See `optimizer` in `train_function()` method. - batch_size: See `batch_size` in `train_function()` method. - distribution_strategy: See `distribution_strategy` in `train_function()` - method. - - Returns: - Sum of per_replica_losses. - """ - per_replica_losses = distribution_strategy.run( - self.train_step, - args=( - batch_dataset, + """Benchmarks for custom training loop using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.num_classes = 10 + self.input_shape = (28, 28, 1) + self.epochs = 15 + (x_train, y_train), _ = tf.keras.datasets.mnist.load_data() + x_train = x_train.astype("float32") / 255 + x_train = np.expand_dims(x_train, -1) + y_train = tf.keras.utils.to_categorical(y_train, self.num_classes) + self.num_examples = x_train.shape[0] + # Use `tf.data.Dataset` for custom training loop. + self.train_dataset = tf.data.Dataset.from_tensor_slices( + (x_train, y_train) + ) + + def _build_model(self): + """Model from https://keras.io/examples/vision/mnist_convnet/.""" + model = tf.keras.Sequential( + [ + tf.keras.Input(shape=self.input_shape), + tf.keras.layers.Conv2D( + 32, kernel_size=(3, 3), activation="relu" + ), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Conv2D( + 64, kernel_size=(3, 3), activation="relu" + ), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(self.num_classes, activation="softmax"), + ] + ) + + return model + + def compute_loss(self, targets, predictions, loss_fn, batch_size): + """Compute average loss.""" + per_example_loss = loss_fn(targets, predictions) + return tf.nn.compute_average_loss( + per_example_loss, global_batch_size=batch_size + ) + + @tf.function(reduce_retracing=True) + def train_step(self, inputs, model, loss_fn, optimizer, batch_size): + """Compute loss and optimize model by optimizer. + + Args: + inputs: `tf.data`. + model: See `model` in `train_function()` method. + loss_fn: See `loss_fn` in `train_function()` method. + optimizer: See `optimizer` in `train_function()` method. + batch_size: See `batch_size` in `train_function()` method. + + Returns: + Loss value. + """ + train_x, train_y = inputs + with tf.GradientTape() as tape: + predictions = model(train_x, training=True) + loss = self.compute_loss(train_y, predictions, loss_fn, batch_size) + grads = tape.gradient(loss, model.trainable_weights) + optimizer.apply_gradients(zip(grads, model.trainable_weights)) + return loss + + @tf.function(reduce_retracing=True) + def distributed_train_step( + self, + batch_dataset, + model, + loss_fn, + optimizer, + batch_size, + distribution_strategy, + ): + """Train step in distribution strategy setting. + + Args: + batch_dataset: `tf.data`. + model: See `model` in `train_function()` method. + loss_fn: See `loss_fn` in `train_function()` method. + optimizer: See `optimizer` in `train_function()` method. + batch_size: See `batch_size` in `train_function()` method. + distribution_strategy: See `distribution_strategy` in + `train_function()` method. + + Returns: + Sum of per_replica_losses. + """ + per_replica_losses = distribution_strategy.run( + self.train_step, + args=( + batch_dataset, + model, + loss_fn, + optimizer, + batch_size, + ), + ) + return distribution_strategy.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None + ) + + def train_function( + self, + model, + train_dataset, + loss_fn, + optimizer, + epochs=2, + distribution_strategy=None, + batch_size=256, + ): + """Train model in custom training loop and return average + + train_step_time. + + Args: + model: Model function to be benchmarked. + train_dataset: `tf.data` dataset. Should return a tuple of either + (inputs, targets) or (inputs, targets, sample_weights). + loss_fn: `tf.keras.losses.Loss` instance. + optimizer: `tf.keras.optimizers` instance. + epochs: Integer. Number of epochs to train the model. If unspecified, + `epochs` will default to 2. + distribution_strategy: Distribution strategies. It could be + `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, + `distribution_strategy` will default to 'off'. Note that, `TPU` and + `parameter_server` are not supported yet. + batch_size: Integer. Number of samples per gradient update. If + unspecified, `batch_size` will default to 32. + + Returns: + Average train_step_time. + """ + train_step_time_list = [] + timer = timeit.default_timer + + total_loss = 0.0 + num_batches = 0 + for _ in range(epochs): + # Iterate over the batches of the dataset. + for batch_dataset in train_dataset: + + start_time = timer() + + if distribution_strategy is not None: + total_loss += self.distributed_train_step( + batch_dataset, + model, + loss_fn, + optimizer, + batch_size, + distribution_strategy, + ) + else: + total_loss += self.train_step( + batch_dataset, model, loss_fn, optimizer, batch_size + ) + num_batches += 1 + + end_time = timer() + train_step_time_list.append(end_time - start_time) + + return np.mean(train_step_time_list) + + def measure_performance( + self, + model, + dataset, + loss_fn, + optimizer, + batch_size=32, + run_iters=4, + epochs=10, + distribution_strategy=None, + ): + """Run models and measure the performance. + + Args: + model_fn: Model function to be benchmarked. + dataset: `tf.data` dataset. Should return a tuple of either (inputs, + targets) or (inputs, targets, sample_weights). + loss_fn: `tf.keras.losses.Loss` instance. + optimizer: `tf.keras.optimizers` instance. + batch_size: Integer. Number of samples per gradient update. If + unspecified, `batch_size` will default to 32. + run_iters: Integer. Number of iterations to run the performance + measurement. If unspecified, `run_iters` will default to 4. + epochs: Integer. Number of epochs to train the model. If unspecified, + `epochs` will default to 10. + distribution_strategy: Distribution strategies. It could be + `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, + `distribution_strategy` will default to 'off'. Note that, `TPU` and + `parameter_server` are not supported yet. + + Returns: + Performance summary, which contains build_time, avg_epoch_time, + wall_time, exp_per_sec, epochs, warmup_time, train_step_time. + + Raise: + ValueError: if `dataset` is None or if `optimizer` instance is + not provided or if `loss_fn` instance is not provided. + """ + if distribution_strategy is not None and not isinstance( + dataset, tf.distribute.DistributedDataset + ): + raise ValueError( + "tf.distribute.DistributedDataset" + " required in distribution strategy." + ) + + if distribution_strategy is None and not isinstance( + dataset, tf.data.Dataset + ): + raise ValueError("`tf.data` is required.") + + if not isinstance(loss_fn, tf.keras.losses.Loss): + raise ValueError( + "`tf.keras.losses.Loss` instance for loss_fn is required." + ) + + if not isinstance(optimizer, tf.keras.optimizers.Optimizer): + raise ValueError( + "`tf.keras.optimizers` instance for optimizer is required." + ) + + avg_epoch_time_list, train_step_time_list = [], [] + wall_time_list, exp_per_sec_list, warmup_time_list = [], [], [] + + total_num_examples = epochs * self.num_examples + + for _ in range(run_iters): + timer = timeit.default_timer + start_time = timer() + t1 = timer() + self.train_function( + model, + dataset, + loss_fn, + optimizer, + 1, + distribution_strategy, + batch_size, + ) + warmup_time = timer() - t1 + + t2 = timer() + train_step_time = self.train_function( + model, + dataset, + loss_fn, + optimizer, + epochs, + distribution_strategy, + batch_size, + ) + end_time = timer() + + train_step_time_list.append(train_step_time) + warmup_time_list.append(warmup_time) + wall_time_list.append(end_time - start_time) + exp_per_sec_list.append(total_num_examples / (end_time - t2)) + avg_epoch_time_list.append((end_time - t2) / epochs) + + metrics = [] + metrics.append( + {"name": "avg_epoch_time", "value": np.mean(avg_epoch_time_list)} + ) + metrics.append( + {"name": "exp_per_sec", "value": np.mean(exp_per_sec_list)} + ) + metrics.append( + {"name": "warmup_time", "value": np.mean(warmup_time_list)} + ) + metrics.append( + {"name": "train_step_time", "value": np.mean(train_step_time_list)} + ) + metrics.append({"name": "epochs", "value": epochs}) + + wall_time = np.mean(wall_time_list) + + return metrics, wall_time + + def benchmark_custom_training_mnist_bs_128(self): + """Measure performance with batch_size=128 and run_iters=5.""" + batch_size = 128 + run_iters = 5 + train_dataset = self.train_dataset.shuffle(buffer_size=1024).batch( + batch_size + ) + + # Instantiate a loss function. + loss_fn = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE + ) + # Instantiate an optimizer to train the model. + optimizer = tf.keras.optimizers.Adam() + model = self._build_model() + + metrics, wall_time = self.measure_performance( + model, + train_dataset, + loss_fn, + optimizer, + batch_size, + run_iters, + self.epochs, + ) + extras = benchmark_util.get_keras_examples_metadata( + "conv", batch_size, ".keras.ctl_graph" + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_custom_training_mnist_bs_256(self): + """Measure performance with batch_size=256 and run_iters=5.""" + batch_size = 256 + run_iters = 5 + train_dataset = self.train_dataset.shuffle(buffer_size=1024).batch( + batch_size + ) + + # Instantiate a loss function. + loss_fn = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE + ) + # Instantiate an optimizer to train the model. + optimizer = tf.keras.optimizers.Adam() + model = self._build_model() + + metrics, wall_time = self.measure_performance( + model, + train_dataset, + loss_fn, + optimizer, + batch_size, + run_iters, + self.epochs, + ) + extras = benchmark_util.get_keras_examples_metadata( + "conv", batch_size, ".keras.ctl_graph" + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_custom_training_mnist_bs_512(self): + """Measure performance with batch_size=512 and run_iters=10.""" + batch_size = 512 + run_iters = 5 + train_dataset = self.train_dataset.shuffle(buffer_size=1024).batch( + batch_size + ) + + # Instantiate a loss function. + loss_fn = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE + ) + # Instantiate an optimizer to train the model. + optimizer = tf.keras.optimizers.Adam() + model = self._build_model() + + metrics, wall_time = self.measure_performance( + model, + train_dataset, + loss_fn, + optimizer, + batch_size, + run_iters, + self.epochs, + ) + extras = benchmark_util.get_keras_examples_metadata( + "conv", batch_size, ".keras.ctl_graph" + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_custom_training_mnist_bs_512_gpu_2(self): + """Measure performance with batch_size=512, run_iters=10, gpu=2 and + + distribution_strategy='mirrored'. + """ + batch_size = 512 + run_iters = 10 + train_dataset = self.train_dataset.shuffle(buffer_size=1024).batch( + batch_size + ) + + distribution_strategy = "mirrored" + + strategy = distribution_util.get_distribution_strategy( + distribution_strategy=distribution_strategy, num_gpus=2 + ) + + if distribution_strategy != "off": + train_dataset = strategy.experimental_distribute_dataset( + train_dataset + ) + + strategy_scope = distribution_util.get_strategy_scope(strategy) + + with strategy_scope: + # Instantiate a loss function. + loss_fn = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE + ) + # Instantiate an optimizer to train the model. + optimizer = tf.keras.optimizers.Adam() + model = self._build_model() + + metrics, wall_time = self.measure_performance( model, + train_dataset, loss_fn, optimizer, batch_size, - )) - return distribution_strategy.reduce( - tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) - - def train_function(self, - model, - train_dataset, - loss_fn, - optimizer, - epochs=2, - distribution_strategy=None, - batch_size=256): - """Train model in custom training loop and return average - - train_step_time. - - Args: - model: Model function to be benchmarked. - train_dataset: `tf.data` dataset. Should return a tuple of either (inputs, - targets) or (inputs, targets, sample_weights). - loss_fn: `tf.keras.losses.Loss` instance. - optimizer: `tf.keras.optimizers` instance. - epochs: Integer. Number of epochs to train the model. If unspecified, - `epochs` will default to 2. - distribution_strategy: Distribution strategies. It could be - `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, - `distribution_strategy` will default to 'off'. Note that, `TPU` and - `parameter_server` are not supported yet. - batch_size: Integer. Number of samples per gradient update. If - unspecified, `batch_size` will default to 32. - - Returns: - Average train_step_time. - """ - train_step_time_list = [] - timer = timeit.default_timer - - total_loss = 0.0 - num_batches = 0 - for _ in range(epochs): - # Iterate over the batches of the dataset. - for batch_dataset in train_dataset: - - start_time = timer() - - if distribution_strategy is not None: - total_loss += self.distributed_train_step(batch_dataset, model, - loss_fn, optimizer, - batch_size, - distribution_strategy) - else: - total_loss += self.train_step(batch_dataset, model, loss_fn, - optimizer, batch_size) - num_batches += 1 - - end_time = timer() - train_step_time_list.append(end_time - start_time) - - return np.mean(train_step_time_list) - - def measure_performance(self, - model, - dataset, - loss_fn, - optimizer, - batch_size=32, - run_iters=4, - epochs=10, - distribution_strategy=None): - """Run models and measure the performance. - - Args: - model_fn: Model function to be benchmarked. - dataset: `tf.data` dataset. Should return a tuple of either (inputs, - targets) or (inputs, targets, sample_weights). - loss_fn: `tf.keras.losses.Loss` instance. - optimizer: `tf.keras.optimizers` instance. - batch_size: Integer. Number of samples per gradient update. If - unspecified, `batch_size` will default to 32. - run_iters: Integer. Number of iterations to run the performance - measurement. If unspecified, `run_iters` will default to 4. - epochs: Integer. Number of epochs to train the model. If unspecified, - `epochs` will default to 10. - distribution_strategy: Distribution strategies. It could be - `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, - `distribution_strategy` will default to 'off'. Note that, `TPU` and - `parameter_server` are not supported yet. - - Returns: - Performance summary, which contains build_time, avg_epoch_time, - wall_time, exp_per_sec, epochs, warmup_time, train_step_time. - - Raise: - ValueError: if `dataset` is None or if `optimizer` instance is - not provided or if `loss_fn` instance is not provided. - """ - if distribution_strategy is not None and \ - not isinstance(dataset, tf.distribute.DistributedDataset): - raise ValueError('tf.distribute.DistributedDataset' - ' required in distribution strategy.') - - if distribution_strategy is None and \ - not isinstance(dataset, tf.data.Dataset): - raise ValueError('`tf.data` is required.') - - if not isinstance(loss_fn, tf.keras.losses.Loss): - raise ValueError('`tf.keras.losses.Loss` instance ' - 'for loss_fn is required.') - - if not isinstance(optimizer, tf.keras.optimizers.Optimizer): - raise ValueError('`tf.keras.optimizers` instance ' - 'for optimizer is required.') - - avg_epoch_time_list, train_step_time_list = [], [] - wall_time_list, exp_per_sec_list, warmup_time_list = [], [], [] - - total_num_examples = epochs * self.num_examples - - for _ in range(run_iters): - timer = timeit.default_timer - start_time = timer() - t1 = timer() - self.train_function(model, dataset, loss_fn, optimizer, 1, - distribution_strategy, batch_size) - warmup_time = timer() - t1 - - t2 = timer() - train_step_time = self.train_function(model, dataset, loss_fn, optimizer, - epochs, distribution_strategy, - batch_size) - end_time = timer() - - train_step_time_list.append(train_step_time) - warmup_time_list.append(warmup_time) - wall_time_list.append(end_time - start_time) - exp_per_sec_list.append(total_num_examples / (end_time - t2)) - avg_epoch_time_list.append((end_time - t2) / epochs) - - metrics = [] - metrics.append({ - 'name': 'avg_epoch_time', - 'value': np.mean(avg_epoch_time_list) - }) - metrics.append({'name': 'exp_per_sec', 'value': np.mean(exp_per_sec_list)}) - metrics.append({'name': 'warmup_time', 'value': np.mean(warmup_time_list)}) - metrics.append({ - 'name': 'train_step_time', - 'value': np.mean(train_step_time_list) - }) - metrics.append({'name': 'epochs', 'value': epochs}) - - wall_time = np.mean(wall_time_list) - - return metrics, wall_time - - def benchmark_custom_training_mnist_bs_128(self): - """Measure performance with batch_size=128 and run_iters=5.""" - batch_size = 128 - run_iters = 5 - train_dataset = self.train_dataset.shuffle( - buffer_size=1024).batch(batch_size) - - # Instantiate a loss function. - loss_fn = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE) - # Instantiate an optimizer to train the model. - optimizer = tf.keras.optimizers.Adam() - model = self._build_model() - - metrics, wall_time = self.measure_performance(model, train_dataset, loss_fn, - optimizer, batch_size, - run_iters, self.epochs) - extras = benchmark_util.get_keras_examples_metadata('conv', batch_size, - '.keras.ctl_graph') - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_custom_training_mnist_bs_256(self): - """Measure performance with batch_size=256 and run_iters=5.""" - batch_size = 256 - run_iters = 5 - train_dataset = self.train_dataset.shuffle( - buffer_size=1024).batch(batch_size) - - # Instantiate a loss function. - loss_fn = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE) - # Instantiate an optimizer to train the model. - optimizer = tf.keras.optimizers.Adam() - model = self._build_model() - - metrics, wall_time = self.measure_performance(model, train_dataset, loss_fn, - optimizer, batch_size, - run_iters, self.epochs) - extras = benchmark_util.get_keras_examples_metadata('conv', batch_size, - '.keras.ctl_graph') - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_custom_training_mnist_bs_512(self): - """Measure performance with batch_size=512 and run_iters=10.""" - batch_size = 512 - run_iters = 5 - train_dataset = self.train_dataset.shuffle( - buffer_size=1024).batch(batch_size) - - # Instantiate a loss function. - loss_fn = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE) - # Instantiate an optimizer to train the model. - optimizer = tf.keras.optimizers.Adam() - model = self._build_model() - - metrics, wall_time = self.measure_performance(model, train_dataset, loss_fn, - optimizer, batch_size, - run_iters, self.epochs) - extras = benchmark_util.get_keras_examples_metadata('conv', batch_size, - '.keras.ctl_graph') - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_custom_training_mnist_bs_512_gpu_2(self): - """Measure performance with batch_size=512, run_iters=10, gpu=2 and - - distribution_strategy='mirrored'. - """ - batch_size = 512 - run_iters = 10 - train_dataset = self.train_dataset.shuffle( - buffer_size=1024).batch(batch_size) - - distribution_strategy = 'mirrored' - - strategy = distribution_util.get_distribution_strategy( - distribution_strategy=distribution_strategy, num_gpus=2) - - if distribution_strategy != 'off': - train_dataset = strategy.experimental_distribute_dataset(train_dataset) - - strategy_scope = distribution_util.get_strategy_scope(strategy) - - with strategy_scope: - # Instantiate a loss function. - loss_fn = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE) - # Instantiate an optimizer to train the model. - optimizer = tf.keras.optimizers.Adam() - model = self._build_model() - - metrics, wall_time = self.measure_performance(model, train_dataset, loss_fn, - optimizer, batch_size, - run_iters, self.epochs, - strategy) - extras = benchmark_util.get_keras_examples_metadata('conv', batch_size, - '.keras.ctl_graph') - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + run_iters, + self.epochs, + strategy, + ) + extras = benchmark_util.get_keras_examples_metadata( + "conv", batch_size, ".keras.ctl_graph" + ) + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py index a58f2ec36dce..4103c3a3ee40 100644 --- a/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py @@ -23,117 +23,135 @@ class HierarchicalRNNBenchmark(tf.test.Benchmark): - """Benchmarks for Hierarchical RNN using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.num_classes = 10 - self.row_hidden, self.col_hidden = 128, 128 - (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() - self.x_train = self.x_train.reshape(self.x_train.shape[0], 28, 28, 1) - self.x_train = self.x_train.astype('float32') / 255 - self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) - - def _build_model(self): - """Model from https://github.com/keras-team/keras/blob/master/examples - - /mnist_hierarchical_rnn.py. - """ - row, col, pixel = self.x_train.shape[1:] - inputs = tf.keras.layers.Input(shape=(row, col, pixel)) - encoded_rows = tf.keras.layers.TimeDistributed( - tf.keras.layers.LSTM(self.row_hidden))( - inputs) - encoded_cols = tf.keras.layers.LSTM(self.col_hidden)(encoded_rows) - outputs = tf.keras.layers.Dense( - self.num_classes, activation='softmax')( - encoded_cols) - model = tf.keras.Model(inputs, outputs) - - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_hrnn_mnist_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'hierarchical_rnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_hrnn_mnist_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'hierarchical_rnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_hrnn_mnist_bs_1024(self): - """Measure performance with batch_size=1024.""" - batch_size = 1024 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'hierarchical_rnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_hrnn_mnist_bs_1024_gpu_2(self): - """Measure performance with batch_size=1024, gpu=2 and - - distribution_strategy='mirrored' - """ - batch_size = 1024 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'hierarchical_rnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + """Benchmarks for Hierarchical RNN using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.num_classes = 10 + self.row_hidden, self.col_hidden = 128, 128 + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.reshape(self.x_train.shape[0], 28, 28, 1) + self.x_train = self.x_train.astype("float32") / 255 + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes + ) + + def _build_model(self): + """Model from https://github.com/keras-team/keras/blob/master/examples + + /mnist_hierarchical_rnn.py. + """ + row, col, pixel = self.x_train.shape[1:] + inputs = tf.keras.layers.Input(shape=(row, col, pixel)) + encoded_rows = tf.keras.layers.TimeDistributed( + tf.keras.layers.LSTM(self.row_hidden) + )(inputs) + encoded_cols = tf.keras.layers.LSTM(self.col_hidden)(encoded_rows) + outputs = tf.keras.layers.Dense(self.num_classes, activation="softmax")( + encoded_cols + ) + model = tf.keras.Model(inputs, outputs) + + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_hrnn_mnist_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "hierarchical_rnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_hrnn_mnist_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "hierarchical_rnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_hrnn_mnist_bs_1024(self): + """Measure performance with batch_size=1024.""" + batch_size = 1024 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "hierarchical_rnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_hrnn_mnist_bs_1024_gpu_2(self): + """Measure performance with batch_size=1024, gpu=2 and + + distribution_strategy='mirrored' + """ + batch_size = 1024 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "hierarchical_rnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py index c996b2360132..42dbfede4a4d 100644 --- a/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -23,113 +23,147 @@ class IRNNMnistBenchmark(tf.test.Benchmark): - """Benchmarks for IRNN using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.num_classes = 10 - self.hidden_units = 100 - self.learning_rate = 1e-6 - (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() - self.x_train = self.x_train.reshape(self.x_train.shape[0], -1, 1) - self.x_train = self.x_train.astype('float32') / 255 - self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) - - def _build_model(self): - """Model from https://github.com/keras-team/keras/ - - blob/master/examples/mnist_irnn.py. - """ - model = tf.keras.Sequential() - model.add( - tf.keras.layers.SimpleRNN( - self.hidden_units, - kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), - recurrent_initializer=tf.keras.initializers.Identity(gain=1.0), - activation='relu', - input_shape=self.x_train.shape[1:])) - model.add(tf.keras.layers.Dense(self.num_classes)) - model.add(tf.keras.layers.Activation('softmax')) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_irnn_mnist_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('irnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_irnn_mnist_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('irnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_irnn_mnist_bs_1024(self): - """Measure performance with batch_size=1024.""" - batch_size = 1024 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('irnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_irnn_mnist_bs_1024_gpu_2(self): - """Measure performance with batch_size=1024, gpu=2 and - - distribution_strategy='mirrored' - """ - batch_size = 1024 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate), - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('irnn', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + """Benchmarks for IRNN using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.num_classes = 10 + self.hidden_units = 100 + self.learning_rate = 1e-6 + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.reshape(self.x_train.shape[0], -1, 1) + self.x_train = self.x_train.astype("float32") / 255 + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes + ) + + def _build_model(self): + """Model from https://github.com/keras-team/keras/ + + blob/master/examples/mnist_irnn.py. + """ + model = tf.keras.Sequential() + model.add( + tf.keras.layers.SimpleRNN( + self.hidden_units, + kernel_initializer=tf.keras.initializers.RandomNormal( + stddev=0.001 + ), + recurrent_initializer=tf.keras.initializers.Identity(gain=1.0), + activation="relu", + input_shape=self.x_train.shape[1:], + ) + ) + model.add(tf.keras.layers.Dense(self.num_classes)) + model.add(tf.keras.layers.Activation("softmax")) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_irnn_mnist_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "irnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_irnn_mnist_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "irnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_irnn_mnist_bs_1024(self): + """Measure performance with batch_size=1024.""" + batch_size = 1024 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "irnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_irnn_mnist_bs_1024_gpu_2(self): + """Measure performance with batch_size=1024, gpu=2 and + + distribution_strategy='mirrored' + """ + batch_size = 1024 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate + ), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "irnn", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py index d446713e165b..39fc136c4618 100644 --- a/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py @@ -17,122 +17,140 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf from keras.benchmarks import benchmark_util class MLPReutersBenchmark(tf.test.Benchmark): - """Benchmarks for MLP using `tf.test.Benchmark`.""" - - def __init__(self): - super().__init__() - self.max_words = 1000 - (self.x_train, self.y_train), _ = tf.keras.datasets.reuters.load_data( - num_words=self.max_words) - self.num_classes = np.max(self.y_train) + 1 - tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=self.max_words) - self.x_train = tokenizer.sequences_to_matrix(self.x_train, mode='binary') - self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) - self.epochs = 5 - - def _build_model(self): - """Model from https://github.com/keras-team/keras/blob/master/ - - examples/reuters_mlp.py. - """ - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(512, input_shape=(self.max_words,))) - model.add(tf.keras.layers.Activation('relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(self.num_classes)) - model.add(tf.keras.layers.Activation('softmax')) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_mlp_reuters_bs_128(self): - """Measure performance with batch_size=128.""" - batch_size = 128 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('mlp', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_mlp_reuters_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('mlp', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_mlp_reuters_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('mlp', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_mlp_reuters_bs_512_gpu_2(self): - """Measure performance with batch_size=512, gpu=2 and - - distribution_strategy='mirrored' - """ - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - epochs=self.epochs, - optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata('mlp', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - -if __name__ == '__main__': - tf.test.main() + """Benchmarks for MLP using `tf.test.Benchmark`.""" + + def __init__(self): + super().__init__() + self.max_words = 1000 + (self.x_train, self.y_train), _ = tf.keras.datasets.reuters.load_data( + num_words=self.max_words + ) + self.num_classes = np.max(self.y_train) + 1 + tokenizer = tf.keras.preprocessing.text.Tokenizer( + num_words=self.max_words + ) + self.x_train = tokenizer.sequences_to_matrix( + self.x_train, mode="binary" + ) + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes + ) + self.epochs = 5 + + def _build_model(self): + """Model from https://github.com/keras-team/keras/blob/master/ + + examples/reuters_mlp.py. + """ + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(512, input_shape=(self.max_words,))) + model.add(tf.keras.layers.Activation("relu")) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(self.num_classes)) + model.add(tf.keras.layers.Activation("softmax")) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_mlp_reuters_bs_128(self): + """Measure performance with batch_size=128.""" + batch_size = 128 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("mlp", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_mlp_reuters_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("mlp", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_mlp_reuters_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("mlp", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_mlp_reuters_bs_512_gpu_2(self): + """Measure performance with batch_size=512, gpu=2 and + + distribution_strategy='mirrored' + """ + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata("mlp", batch_size) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 7f2af56afcc1..7277c955f215 100644 --- a/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -23,215 +23,245 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): - """Benchmarks for Text classification with Transformer - using `tf.test.Benchmark`. - """ - - def __init__(self): - super().__init__() - self.max_feature = 20000 - self.max_len = 200 - (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( - num_words=self.max_feature) - self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( - self.imdb_x, maxlen=self.max_len) - - def _build_model(self): - """Model from https://keras.io/examples/nlp/text_classification_with_transformer/.""" - embed_dim = 32 - num_heads = 2 - ff_dim = 32 - inputs = tf.keras.layers.Input(shape=(self.max_len,)) - embedding_layer = TokenAndPositionEmbedding(self.max_len, self.max_feature, - embed_dim) - x = embedding_layer(inputs) #pylint: disable=not-callable - transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim) - x = transformer_block(x) #pylint: disable=not-callable - x = tf.keras.layers.GlobalAvgPool1D()(x) - x = tf.keras.layers.Dropout(0.1)(x) - x = tf.keras.layers.Dense(20, activation='relu')(x) - x = tf.keras.layers.Dropout(0.1)(x) - outputs = tf.keras.layers.Dense(2, activation='softmax')(x) - - model = tf.keras.Model(inputs=inputs, outputs=outputs) - return model - - # In each benchmark test, the required arguments for the - # method `measure_performance` include: - # x: Input data, it could be Numpy or loaded from tfds. - # y: Target data. If `x` is a dataset or generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Check more details in `measure_performance()` method of - # benchmark_util. - def benchmark_text_classification_bs_128(self): - """Measure performance with batch_size=128.""" - batch_size = 128 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'transformer', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_text_classification_bs_256(self): - """Measure performance with batch_size=256.""" - batch_size = 256 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'transformer', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_text_classification_bs_512(self): - """Measure performance with batch_size=512.""" - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'transformer', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) - - def benchmark_text_classification_bs_512_gpu_2(self): - """Measure performance with batch_size=512, gpu=1 and - - distribution_strategy='mirrored' + """Benchmarks for Text classification with Transformer + using `tf.test.Benchmark`. """ - batch_size = 512 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - num_gpus=2, - distribution_strategy='mirrored', - optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - metadata = benchmark_util.get_keras_examples_metadata( - 'transformer', batch_size) - extras.update(metadata) - self.report_benchmark(wall_time=wall_time, metrics=metrics, extras=extras) + + def __init__(self): + super().__init__() + self.max_feature = 20000 + self.max_len = 200 + (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( + num_words=self.max_feature + ) + self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( + self.imdb_x, maxlen=self.max_len + ) + + def _build_model(self): + """Model from + https://keras.io/examples/nlp/text_classification_with_transformer/.""" + embed_dim = 32 + num_heads = 2 + ff_dim = 32 + inputs = tf.keras.layers.Input(shape=(self.max_len,)) + embedding_layer = TokenAndPositionEmbedding( + self.max_len, self.max_feature, embed_dim + ) + x = embedding_layer(inputs) + transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim) + x = transformer_block(x) + x = tf.keras.layers.GlobalAvgPool1D()(x) + x = tf.keras.layers.Dropout(0.1)(x) + x = tf.keras.layers.Dense(20, activation="relu")(x) + x = tf.keras.layers.Dropout(0.1)(x) + outputs = tf.keras.layers.Dense(2, activation="softmax")(x) + + model = tf.keras.Model(inputs=inputs, outputs=outputs) + return model + + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. + def benchmark_text_classification_bs_128(self): + """Measure performance with batch_size=128.""" + batch_size = 128 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + optimizer="adam", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "transformer", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_text_classification_bs_256(self): + """Measure performance with batch_size=256.""" + batch_size = 256 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + optimizer="adam", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "transformer", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_text_classification_bs_512(self): + """Measure performance with batch_size=512.""" + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + optimizer="adam", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "transformer", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) + + def benchmark_text_classification_bs_512_gpu_2(self): + """Measure performance with batch_size=512, gpu=1 and + + distribution_strategy='mirrored' + """ + batch_size = 512 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + num_gpus=2, + distribution_strategy="mirrored", + optimizer="adam", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + + metadata = benchmark_util.get_keras_examples_metadata( + "transformer", batch_size + ) + extras.update(metadata) + self.report_benchmark( + wall_time=wall_time, metrics=metrics, extras=extras + ) class MultiHeadSelfAttention(tf.keras.layers.Layer): - """Implement multi head self attention as a Keras layer.""" - - def __init__(self, embed_dim, num_heads=8): - super().__init__() - self.embed_dim = embed_dim - self.num_heads = num_heads - if embed_dim % num_heads != 0: - raise ValueError(f'embedding dimension = {embed_dim} should be divisible' - f'by number of heads = {num_heads}') - self.projection_dim = embed_dim // num_heads - self.query_dense = tf.keras.layers.Dense(embed_dim) - self.key_dense = tf.keras.layers.Dense(embed_dim) - self.value_dense = tf.keras.layers.Dense(embed_dim) - self.combine_heads = tf.keras.layers.Dense(embed_dim) - - def attention(self, query, key, value): - score = tf.matmul(query, key, transpose_b=True) - dim_key = tf.cast(tf.shape(key)[-1], tf.float32) - scaled_score = score / tf.math.sqrt(dim_key) - weights = tf.nn.softmax(scaled_score, axis=-1) - output = tf.matmul(weights, value) - return output, weights - - def separate_heads(self, x, batch_size): - x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim)) - return tf.transpose(x, perm=[0, 2, 1, 3]) - - def call(self, inputs): #pylint: disable=arguments-differ - # x.shape = [batch_size, seq_len, embedding_dim] - batch_size = tf.shape(inputs)[0] - query = self.query_dense(inputs) # (batch_size, seq_len, embed_dim) - key = self.key_dense(inputs) # (batch_size, seq_len, embed_dim) - value = self.value_dense(inputs) # (batch_size, seq_len, embed_dim) - query = self.separate_heads( - query, batch_size) # (batch_size, num_heads, seq_len, projection_dim) - key = self.separate_heads( - key, batch_size) # (batch_size, num_heads, seq_len, projection_dim) - value = self.separate_heads( - value, batch_size) # (batch_size, num_heads, seq_len, projection_dim) - attention, _ = self.attention(query, key, value) - attention = tf.transpose( - attention, perm=[0, 2, 1, - 3]) # (batch_size, seq_len, num_heads, projection_dim) - concat_attention = tf.reshape( - attention, - (batch_size, -1, self.embed_dim)) # (batch_size, seq_len, embed_dim) - output = self.combine_heads( - concat_attention) # (batch_size, seq_len, embed_dim) - return output + """Implement multi head self attention as a Keras layer.""" + + def __init__(self, embed_dim, num_heads=8): + super().__init__() + self.embed_dim = embed_dim + self.num_heads = num_heads + if embed_dim % num_heads != 0: + raise ValueError( + f"embedding dimension = {embed_dim} should be divisible" + f"by number of heads = {num_heads}" + ) + self.projection_dim = embed_dim // num_heads + self.query_dense = tf.keras.layers.Dense(embed_dim) + self.key_dense = tf.keras.layers.Dense(embed_dim) + self.value_dense = tf.keras.layers.Dense(embed_dim) + self.combine_heads = tf.keras.layers.Dense(embed_dim) + + def attention(self, query, key, value): + score = tf.matmul(query, key, transpose_b=True) + dim_key = tf.cast(tf.shape(key)[-1], tf.float32) + scaled_score = score / tf.math.sqrt(dim_key) + weights = tf.nn.softmax(scaled_score, axis=-1) + output = tf.matmul(weights, value) + return output, weights + + def separate_heads(self, x, batch_size): + x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim)) + return tf.transpose(x, perm=[0, 2, 1, 3]) + + def call(self, inputs): + # x.shape = [batch_size, seq_len, embedding_dim] + batch_size = tf.shape(inputs)[0] + query = self.query_dense(inputs) # (batch_size, seq_len, embed_dim) + key = self.key_dense(inputs) # (batch_size, seq_len, embed_dim) + value = self.value_dense(inputs) # (batch_size, seq_len, embed_dim) + query = self.separate_heads( + query, batch_size + ) # (batch_size, num_heads, seq_len, projection_dim) + key = self.separate_heads( + key, batch_size + ) # (batch_size, num_heads, seq_len, projection_dim) + value = self.separate_heads( + value, batch_size + ) # (batch_size, num_heads, seq_len, projection_dim) + attention, _ = self.attention(query, key, value) + attention = tf.transpose( + attention, perm=[0, 2, 1, 3] + ) # (batch_size, seq_len, num_heads, projection_dim) + concat_attention = tf.reshape( + attention, (batch_size, -1, self.embed_dim) + ) # (batch_size, seq_len, embed_dim) + output = self.combine_heads( + concat_attention + ) # (batch_size, seq_len, embed_dim) + return output class TransformerBlock(tf.keras.layers.Layer): - """Implement a Transformer block as a layer.""" - - def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): - super().__init__() - self.att = MultiHeadSelfAttention(embed_dim, num_heads) - self.ffn = tf.keras.Sequential([ - tf.keras.layers.Dense(ff_dim, activation='relu'), - tf.keras.layers.Dense(embed_dim) - ]) - self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) - self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) - self.dropout1 = tf.keras.layers.Dropout(rate) - self.dropout2 = tf.keras.layers.Dropout(rate) - - def call(self, inputs, training): #pylint: disable=arguments-differ - attn_output = self.att(inputs) #pylint: disable=not-callable - attn_output = self.dropout1(attn_output, training=training) - out1 = self.layernorm1(inputs + attn_output) - ffn_output = self.ffn(out1) - ffn_output = self.dropout2(ffn_output, training=training) - return self.layernorm2(out1 + ffn_output) + """Implement a Transformer block as a layer.""" + + def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): + super().__init__() + self.att = MultiHeadSelfAttention(embed_dim, num_heads) + self.ffn = tf.keras.Sequential( + [ + tf.keras.layers.Dense(ff_dim, activation="relu"), + tf.keras.layers.Dense(embed_dim), + ] + ) + self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.dropout1 = tf.keras.layers.Dropout(rate) + self.dropout2 = tf.keras.layers.Dropout(rate) + + def call(self, inputs, training): + attn_output = self.att(inputs) + attn_output = self.dropout1(attn_output, training=training) + out1 = self.layernorm1(inputs + attn_output) + ffn_output = self.ffn(out1) + ffn_output = self.dropout2(ffn_output, training=training) + return self.layernorm2(out1 + ffn_output) class TokenAndPositionEmbedding(tf.keras.layers.Layer): - """Implement embedding layer.""" - - def __init__(self, maxlen, vocab_size, embed_dim): - super().__init__() - self.token_emb = tf.keras.layers.Embedding( - input_dim=vocab_size, output_dim=embed_dim) - self.pos_emb = tf.keras.layers.Embedding( - input_dim=maxlen, output_dim=embed_dim) - - def call(self, x): #pylint: disable=arguments-differ - maxlen = tf.shape(x)[-1] - positions = tf.range(start=0, limit=maxlen, delta=1) - positions = self.pos_emb(positions) - x = self.token_emb(x) - return x + positions - - -if __name__ == '__main__': - tf.test.main() + """Implement embedding layer.""" + + def __init__(self, maxlen, vocab_size, embed_dim): + super().__init__() + self.token_emb = tf.keras.layers.Embedding( + input_dim=vocab_size, output_dim=embed_dim + ) + self.pos_emb = tf.keras.layers.Embedding( + input_dim=maxlen, output_dim=embed_dim + ) + + def call(self, x): + maxlen = tf.shape(x)[-1] + positions = tf.range(start=0, limit=maxlen, delta=1) + positions = self.pos_emb(positions) + x = self.token_emb(x) + return x + positions + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/layer_benchmarks/BUILD b/keras/benchmarks/layer_benchmarks/BUILD index ef34aff6d7c5..809292c8c18f 100644 --- a/keras/benchmarks/layer_benchmarks/BUILD +++ b/keras/benchmarks/layer_benchmarks/BUILD @@ -1,9 +1,11 @@ # Description: # Implementation of benchmarks on Keras layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = ["//visibility:public"], licenses = ["notice"], ) diff --git a/keras/benchmarks/layer_benchmarks/layer_benchmarks_test.py b/keras/benchmarks/layer_benchmarks/layer_benchmarks_test.py index 5073bb9fed24..42c5d17fa069 100644 --- a/keras/benchmarks/layer_benchmarks/layer_benchmarks_test.py +++ b/keras/benchmarks/layer_benchmarks/layer_benchmarks_test.py @@ -18,284 +18,524 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import functools + import numpy as np +import tensorflow.compat.v2 as tf + from keras.benchmarks import benchmark_util from keras.benchmarks.layer_benchmarks import layer_benchmarks_test_base def _get_metadata(name): - return { - "model_name": "ideal_layers", - "parameters": name[1] + "_shape", - } + return { + "model_name": "ideal_layers", + "parameters": name[1] + "_shape", + } def _get_layer_args(layer_cls, layer_args): - # To make benchmark parameters compatible with GPU platform. - if layer_cls is tf.keras.layers.Bidirectional: - return {"layer": tf.keras.layers.LSTM(1)} - return layer_args + # To make benchmark parameters compatible with GPU platform. + if layer_cls is tf.keras.layers.Bidirectional: + return {"layer": tf.keras.layers.LSTM(1)} + return layer_args def _get_input_data(inputs): - if "input_shape" in inputs: - return tf.ones(inputs["input_shape"]) - elif "input" in inputs: - return inputs["input"] - else: - raise ValueError("Please specify either `input_shape` or `input`" - "for the benchmark test") + if "input_shape" in inputs: + return tf.ones(inputs["input_shape"]) + elif "input" in inputs: + return inputs["input"] + else: + raise ValueError( + "Please specify either `input_shape` or `input`" + "for the benchmark test" + ) def _layer_call_backward(layer, x): - with tf.GradientTape() as tape: - y = layer(x) - loss = tf.reduce_mean(y**2) + with tf.GradientTape() as tape: + y = layer(x) + loss = tf.reduce_mean(y**2) + + _ = tape.gradient(loss, layer.trainable_variables) - _ = tape.gradient(loss, layer.trainable_variables) CORE_LAYERS = [ - ("Dense_small_shape", tf.keras.layers.Dense, - {"units": 32, "activation": "relu"}, - {"input_shape": (1, 16)}, 100), - ("Activation_small_shape", tf.keras.layers.Activation, - {"activation": "relu"}, - {"input_shape": (1, 4)}, 100), - ("Embedding_small_shape", tf.keras.layers.Embedding, - {"input_dim": 1, "output_dim": 1, "input_length": 1}, - {"input": np.random.randint(1, size=(1, 1))}, 100), - ("Embedding_normal_shape", tf.keras.layers.Embedding, - {"input_dim": 1000, "output_dim": 64, "input_length": 10}, - {"input": np.random.randint(1000, size=(32, 10))}, 100), - ("Masking_small_shape", tf.keras.layers.Masking, - {"mask_value": 1}, {"input_shape": (1, 1)}, 100), - ("Lambda_small_shape", tf.keras.layers.Lambda, - {"function": lambda x: x ** 2}, {"input_shape": (1, 1)}, 100), - ("Flatten_small_shape", tf.keras.layers.Flatten, - {}, {"input_shape": (1, 1)}, 100), + ( + "Dense_small_shape", + tf.keras.layers.Dense, + {"units": 32, "activation": "relu"}, + {"input_shape": (1, 16)}, + 100, + ), + ( + "Activation_small_shape", + tf.keras.layers.Activation, + {"activation": "relu"}, + {"input_shape": (1, 4)}, + 100, + ), + ( + "Embedding_small_shape", + tf.keras.layers.Embedding, + {"input_dim": 1, "output_dim": 1, "input_length": 1}, + {"input": np.random.randint(1, size=(1, 1))}, + 100, + ), + ( + "Embedding_normal_shape", + tf.keras.layers.Embedding, + {"input_dim": 1000, "output_dim": 64, "input_length": 10}, + {"input": np.random.randint(1000, size=(32, 10))}, + 100, + ), + ( + "Masking_small_shape", + tf.keras.layers.Masking, + {"mask_value": 1}, + {"input_shape": (1, 1)}, + 100, + ), + ( + "Lambda_small_shape", + tf.keras.layers.Lambda, + {"function": lambda x: x**2}, + {"input_shape": (1, 1)}, + 100, + ), + ( + "Flatten_small_shape", + tf.keras.layers.Flatten, + {}, + {"input_shape": (1, 1)}, + 100, + ), ] CONV_LAYERS = [ - ("Conv1D_small_shape", tf.keras.layers.Conv1D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1)}, 100), - ("Conv2D_small_shape", tf.keras.layers.Conv2D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1)}, 100), - ("Conv2D_normal_shape", tf.keras.layers.Conv2D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (64, 28, 28, 3)}, 100), - ("Conv3D_small_shape", tf.keras.layers.Conv3D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("Conv1DTranspose_small_shape", tf.keras.layers.Conv1DTranspose, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1)}, 100), - ("Conv2DTranspose_small_shape", tf.keras.layers.Conv2DTranspose, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1)}, 100), - ("Conv3DTranspose_small_shape", tf.keras.layers.Conv3DTranspose, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("SeparableConv1D_small_shape", tf.keras.layers.SeparableConv1D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1)}, 100), - ("SeparableConv2D_small_shape", tf.keras.layers.SeparableConv2D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1)}, 100), - ("DepthwiseConv2D_small_shape", tf.keras.layers.DepthwiseConv2D, - {"kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1)}, 100), + ( + "Conv1D_small_shape", + tf.keras.layers.Conv1D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "Conv2D_small_shape", + tf.keras.layers.Conv2D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "Conv2D_normal_shape", + tf.keras.layers.Conv2D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (64, 28, 28, 3)}, + 100, + ), + ( + "Conv3D_small_shape", + tf.keras.layers.Conv3D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "Conv1DTranspose_small_shape", + tf.keras.layers.Conv1DTranspose, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "Conv2DTranspose_small_shape", + tf.keras.layers.Conv2DTranspose, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "Conv3DTranspose_small_shape", + tf.keras.layers.Conv3DTranspose, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "SeparableConv1D_small_shape", + tf.keras.layers.SeparableConv1D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "SeparableConv2D_small_shape", + tf.keras.layers.SeparableConv2D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "DepthwiseConv2D_small_shape", + tf.keras.layers.DepthwiseConv2D, + {"kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), ] RECURRENT_LAYERS = [ - ("LSTM_small_shape", tf.keras.layers.LSTM, - {"units": 1}, {"input_shape": (1, 1, 1)}, 100), - ("LSTM_normal_shape", tf.keras.layers.LSTM, - {"units": 4}, {"input_shape": (32, 10, 8)}, 100), - ("GRU_small_shape", tf.keras.layers.GRU, - {"units": 1}, {"input_shape": (1, 1, 1)}, 100), - ("SimpleRNN_small_shape", tf.keras.layers.SimpleRNN, - {"units": 1}, {"input_shape": (1, 1, 1)}, 100), - ("TimeDistributed_small_shape", tf.keras.layers.TimeDistributed, - {"layer": tf.keras.layers.Conv2D(1, 1)}, - {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("Bidirectional_small_shape", tf.keras.layers.Bidirectional, - {}, {"input_shape": (1, 1, 1)}, 100), - ("ConvLSTM2D_small_shape", tf.keras.layers.ConvLSTM2D, - {"filters": 1, "kernel_size": 1, "activation": "relu"}, - {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("RNN_small_shape", tf.keras.layers.RNN, - {"cell": tf.keras.layers.LSTMCell(1)}, {"input_shape": (1, 1, 1)}, 100), + ( + "LSTM_small_shape", + tf.keras.layers.LSTM, + {"units": 1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "LSTM_normal_shape", + tf.keras.layers.LSTM, + {"units": 4}, + {"input_shape": (32, 10, 8)}, + 100, + ), + ( + "GRU_small_shape", + tf.keras.layers.GRU, + {"units": 1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "SimpleRNN_small_shape", + tf.keras.layers.SimpleRNN, + {"units": 1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "TimeDistributed_small_shape", + tf.keras.layers.TimeDistributed, + {"layer": tf.keras.layers.Conv2D(1, 1)}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "Bidirectional_small_shape", + tf.keras.layers.Bidirectional, + {}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "ConvLSTM2D_small_shape", + tf.keras.layers.ConvLSTM2D, + {"filters": 1, "kernel_size": 1, "activation": "relu"}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "RNN_small_shape", + tf.keras.layers.RNN, + {"cell": tf.keras.layers.LSTMCell(1)}, + {"input_shape": (1, 1, 1)}, + 100, + ), ] NORMALIZATION_LAYERS = [ - ("BatchNormalization_small_shape", tf.keras.layers.BatchNormalization, - {"axis": -1}, {"input_shape": (1, 1, 1)}, 100), - ("LayerNormalization_small_shape", tf.keras.layers.LayerNormalization, - {"axis": -1}, {"input_shape": (1, 1, 1)}, 100), + ( + "BatchNormalization_small_shape", + tf.keras.layers.BatchNormalization, + {"axis": -1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "LayerNormalization_small_shape", + tf.keras.layers.LayerNormalization, + {"axis": -1}, + {"input_shape": (1, 1, 1)}, + 100, + ), ] REGULARIZATION_LAYERS = [ - ("Dropout_small_shape", tf.keras.layers.Dropout, - {"rate": 0.2}, {"input_shape": (1, 1, 1)}, 100), - ("SpatialDropout1D_small_shape", tf.keras.layers.SpatialDropout1D, - {"rate": 0.2}, {"input_shape": (1, 1, 1)}, 100), - ("SpatialDropout2D_small_shape", tf.keras.layers.SpatialDropout2D, - {"rate": 0.2}, {"input_shape": (1, 1, 1, 1)}, 100), - ("SpatialDropout3D_small_shape", tf.keras.layers.SpatialDropout3D, - {"rate": 0.2}, {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("GaussianDropout_small_shape", tf.keras.layers.GaussianDropout, - {"rate": 0.2}, {"input_shape": (1, 1, 1)}, 100), - ("GaussianNoise_small_shape", tf.keras.layers.GaussianNoise, - {"stddev": 0.1}, {"input_shape": (1, 1, 1)}, 100), - ("ActivityRegularization_small_shape", - tf.keras.layers.ActivityRegularization, - {"l1": 0.3}, {"input_shape": (1, 1, 1)}, 100), - ("AlphaDropout_small_shape", tf.keras.layers.AlphaDropout, - {"rate": 0.2}, {"input_shape": (1, 1, 1)}, 100), + ( + "Dropout_small_shape", + tf.keras.layers.Dropout, + {"rate": 0.2}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "SpatialDropout1D_small_shape", + tf.keras.layers.SpatialDropout1D, + {"rate": 0.2}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "SpatialDropout2D_small_shape", + tf.keras.layers.SpatialDropout2D, + {"rate": 0.2}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "SpatialDropout3D_small_shape", + tf.keras.layers.SpatialDropout3D, + {"rate": 0.2}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "GaussianDropout_small_shape", + tf.keras.layers.GaussianDropout, + {"rate": 0.2}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "GaussianNoise_small_shape", + tf.keras.layers.GaussianNoise, + {"stddev": 0.1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "ActivityRegularization_small_shape", + tf.keras.layers.ActivityRegularization, + {"l1": 0.3}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "AlphaDropout_small_shape", + tf.keras.layers.AlphaDropout, + {"rate": 0.2}, + {"input_shape": (1, 1, 1)}, + 100, + ), ] ATTENSION_LAYERS = [ - ("Attention_small_shape", tf.keras.layers.Attention, - {"use_scale": False}, {"input": [np.ones((1, 1, 1)), np.ones((1, 1, 1))]}, - 100), - ("AdditiveAttention_small_shape", tf.keras.layers.AdditiveAttention, - {"use_scale": True}, {"input": [np.ones((1, 1, 1)), np.ones((1, 1, 1))]}, - 100), + ( + "Attention_small_shape", + tf.keras.layers.Attention, + {"use_scale": False}, + {"input": [np.ones((1, 1, 1)), np.ones((1, 1, 1))]}, + 100, + ), + ( + "AdditiveAttention_small_shape", + tf.keras.layers.AdditiveAttention, + {"use_scale": True}, + {"input": [np.ones((1, 1, 1)), np.ones((1, 1, 1))]}, + 100, + ), ] POOLING_LAYERS = [ - ("MaxPooling1D_small_shape", tf.keras.layers.MaxPooling1D, - {"pool_size": 1, "strides": 1}, {"input_shape": (1, 1, 1)}, 100), - ("MaxPooling2D_small_shape", tf.keras.layers.MaxPooling2D, - {"pool_size": 1, "strides": 1}, {"input_shape": (1, 1, 1, 1)}, 100), - ("MaxPooling3D_small_shape", tf.keras.layers.MaxPooling3D, - {"pool_size": 1, "strides": 1}, {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("AveragePooling1D_small_shape", tf.keras.layers.AveragePooling1D, - {"pool_size": 1, "strides": 1}, {"input_shape": (1, 1, 1)}, 100), - ("AveragePooling2D_small_shape", tf.keras.layers.AveragePooling2D, - {"pool_size": 1, "strides": 1}, {"input_shape": (1, 1, 1, 1)}, 100), - ("AveragePooling3D_small_shape", tf.keras.layers.AveragePooling3D, - {"pool_size": 1, "strides": 1}, {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("GlobalMaxPooling1D_small_shape", tf.keras.layers.GlobalMaxPooling1D, - {}, {"input_shape": (1, 1, 1)}, 100), - ("GlobalMaxPooling2D_small_shape", tf.keras.layers.GlobalMaxPooling2D, - {}, {"input_shape": (1, 1, 1, 1)}, 100), - ("GlobalMaxPooling3D_small_shape", tf.keras.layers.GlobalMaxPooling3D, - {}, {"input_shape": (1, 1, 1, 1, 1)}, 100), - ("GlobalAveragePooling1D_small_shape", - tf.keras.layers.GlobalAveragePooling1D, - {}, {"input_shape": (1, 1, 1)}, 100), - ("GlobalAveragePooling2D_small_shape", - tf.keras.layers.GlobalAveragePooling2D, - {}, {"input_shape": (1, 1, 1, 1)}, 100), - ("GlobalAveragePooling3D_small_shape", - tf.keras.layers.GlobalAveragePooling3D, - {}, {"input_shape": (1, 1, 1, 1, 1)}, 100), + ( + "MaxPooling1D_small_shape", + tf.keras.layers.MaxPooling1D, + {"pool_size": 1, "strides": 1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "MaxPooling2D_small_shape", + tf.keras.layers.MaxPooling2D, + {"pool_size": 1, "strides": 1}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "MaxPooling3D_small_shape", + tf.keras.layers.MaxPooling3D, + {"pool_size": 1, "strides": 1}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "AveragePooling1D_small_shape", + tf.keras.layers.AveragePooling1D, + {"pool_size": 1, "strides": 1}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "AveragePooling2D_small_shape", + tf.keras.layers.AveragePooling2D, + {"pool_size": 1, "strides": 1}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "AveragePooling3D_small_shape", + tf.keras.layers.AveragePooling3D, + {"pool_size": 1, "strides": 1}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "GlobalMaxPooling1D_small_shape", + tf.keras.layers.GlobalMaxPooling1D, + {}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "GlobalMaxPooling2D_small_shape", + tf.keras.layers.GlobalMaxPooling2D, + {}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "GlobalMaxPooling3D_small_shape", + tf.keras.layers.GlobalMaxPooling3D, + {}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), + ( + "GlobalAveragePooling1D_small_shape", + tf.keras.layers.GlobalAveragePooling1D, + {}, + {"input_shape": (1, 1, 1)}, + 100, + ), + ( + "GlobalAveragePooling2D_small_shape", + tf.keras.layers.GlobalAveragePooling2D, + {}, + {"input_shape": (1, 1, 1, 1)}, + 100, + ), + ( + "GlobalAveragePooling3D_small_shape", + tf.keras.layers.GlobalAveragePooling3D, + {}, + {"input_shape": (1, 1, 1, 1, 1)}, + 100, + ), ] -class KerasLayerBenchmarks( # pylint: disable=undefined-variable +class KerasLayerBenchmarks( layer_benchmarks_test_base.LayerBenchmarksBase, - metaclass=tf.__internal__.test.ParameterizedBenchmark): - - # The parameter of each layer benchmark is a tuple, and the first one is - # the benchmark name. It must follow the convention of - # "{layer_name}_{small|normal|large}_shape" to make it compatible with - # `self.report_benchmark()` method. - _benchmark_parameters = benchmark_util.generate_benchmark_params_cpu_gpu( - CORE_LAYERS + CONV_LAYERS + RECURRENT_LAYERS + NORMALIZATION_LAYERS + - REGULARIZATION_LAYERS + ATTENSION_LAYERS + POOLING_LAYERS) - - def benchmark_layer_call(self, layer_cls, layer_args, inputs, num_iters): - layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) - x = _get_input_data(inputs) - - fn = functools.partial(layer, x) - name = benchmark_util.get_benchmark_name(self._get_name()) - metadata = {"implementation": name[0] + ".layer.call"} - metadata.update(_get_metadata(name)) - self.run_report(fn, num_iters, metadata) - - def benchmark_layer_call_with_function( - self, layer_cls, layer_args, inputs, num_iters): - layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) - x = _get_input_data(inputs) - layer.call = tf.function(layer.call) - - fn = functools.partial(layer, x) - name = benchmark_util.get_benchmark_name(self._get_name()) - metadata = {"implementation": name[0] + ".layer.call.function"} - metadata.update(_get_metadata(name)) - self.run_report(fn, num_iters, metadata) - - def benchmark_layer_call_with_xla( - self, layer_cls, layer_args, inputs, num_iters): - name = benchmark_util.get_benchmark_name(self._get_name()) - # TODO(b/173461426) - if layer_cls is tf.keras.layers.Embedding and name[-1] == "GPU": - return - layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) - x = _get_input_data(inputs) - layer.call = tf.function( - layer.call, jit_compile=True) - - fn = functools.partial(layer, x) - metadata = {"implementation": name[0] + ".layer.call.xla"} - metadata.update(_get_metadata(name)) - self.run_report(fn, num_iters, metadata) - - def benchmark_layer_call_backward( - self, layer_cls, layer_args, inputs, num_iters): - layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) - x = _get_input_data(inputs) - - fn = functools.partial(_layer_call_backward, layer, x) - name = benchmark_util.get_benchmark_name(self._get_name()) - metadata = {"implementation": name[0] + ".layer.call.backward"} - metadata.update(_get_metadata(name)) - self.run_report(fn, num_iters, metadata) - - def benchmark_layer_call_backward_with_function( - self, layer_cls, layer_args, inputs, num_iters): - layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) - x = _get_input_data(inputs) - layer.call = tf.function(layer.call) - - fn = functools.partial(_layer_call_backward, layer, x) - name = benchmark_util.get_benchmark_name(self._get_name()) - metadata = {"implementation": name[0] + ".layer.call.backward.function"} - metadata.update(_get_metadata(name)) - self.run_report(fn, num_iters, metadata) - - def benchmark_layer_call_backward_with_xla( - self, layer_cls, layer_args, inputs, num_iters): - name = benchmark_util.get_benchmark_name(self._get_name()) - # TODO(b/153480400) - if layer_cls in [ - tf.keras.layers.LSTM, tf.keras.layers.Bidirectional, - tf.keras.layers.ConvLSTM2D, tf.keras.layers.GRU, tf.keras.layers.RNN, - tf.keras.layers.SimpleRNN - ]: - return - # TODO(b/173461426) - if layer_cls is tf.keras.layers.Embedding and name[-1] == "GPU": - return - layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) - x = _get_input_data(inputs) - layer.call = tf.function( - layer.call, jit_compile=True) - - fn = functools.partial(_layer_call_backward, layer, x) - metadata = {"implementation": name[0] + ".layer.call.backward.xla"} - metadata.update(_get_metadata(name)) - self.run_report(fn, num_iters, metadata) + metaclass=tf.__internal__.test.ParameterizedBenchmark, +): + + # The parameter of each layer benchmark is a tuple, and the first one is + # the benchmark name. It must follow the convention of + # "{layer_name}_{small|normal|large}_shape" to make it compatible with + # `self.report_benchmark()` method. + _benchmark_parameters = benchmark_util.generate_benchmark_params_cpu_gpu( + CORE_LAYERS + + CONV_LAYERS + + RECURRENT_LAYERS + + NORMALIZATION_LAYERS + + REGULARIZATION_LAYERS + + ATTENSION_LAYERS + + POOLING_LAYERS + ) + + def benchmark_layer_call(self, layer_cls, layer_args, inputs, num_iters): + layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) + x = _get_input_data(inputs) + + fn = functools.partial(layer, x) + name = benchmark_util.get_benchmark_name(self._get_name()) + metadata = {"implementation": name[0] + ".layer.call"} + metadata.update(_get_metadata(name)) + self.run_report(fn, num_iters, metadata) + + def benchmark_layer_call_with_function( + self, layer_cls, layer_args, inputs, num_iters + ): + layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) + x = _get_input_data(inputs) + layer.call = tf.function(layer.call) + + fn = functools.partial(layer, x) + name = benchmark_util.get_benchmark_name(self._get_name()) + metadata = {"implementation": name[0] + ".layer.call.function"} + metadata.update(_get_metadata(name)) + self.run_report(fn, num_iters, metadata) + + def benchmark_layer_call_with_xla( + self, layer_cls, layer_args, inputs, num_iters + ): + name = benchmark_util.get_benchmark_name(self._get_name()) + # TODO(b/173461426) + if layer_cls is tf.keras.layers.Embedding and name[-1] == "GPU": + return + layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) + x = _get_input_data(inputs) + layer.call = tf.function(layer.call, jit_compile=True) + + fn = functools.partial(layer, x) + metadata = {"implementation": name[0] + ".layer.call.xla"} + metadata.update(_get_metadata(name)) + self.run_report(fn, num_iters, metadata) + + def benchmark_layer_call_backward( + self, layer_cls, layer_args, inputs, num_iters + ): + layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) + x = _get_input_data(inputs) + + fn = functools.partial(_layer_call_backward, layer, x) + name = benchmark_util.get_benchmark_name(self._get_name()) + metadata = {"implementation": name[0] + ".layer.call.backward"} + metadata.update(_get_metadata(name)) + self.run_report(fn, num_iters, metadata) + + def benchmark_layer_call_backward_with_function( + self, layer_cls, layer_args, inputs, num_iters + ): + layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) + x = _get_input_data(inputs) + layer.call = tf.function(layer.call) + + fn = functools.partial(_layer_call_backward, layer, x) + name = benchmark_util.get_benchmark_name(self._get_name()) + metadata = {"implementation": name[0] + ".layer.call.backward.function"} + metadata.update(_get_metadata(name)) + self.run_report(fn, num_iters, metadata) + + def benchmark_layer_call_backward_with_xla( + self, layer_cls, layer_args, inputs, num_iters + ): + name = benchmark_util.get_benchmark_name(self._get_name()) + # TODO(b/153480400) + if layer_cls in [ + tf.keras.layers.LSTM, + tf.keras.layers.Bidirectional, + tf.keras.layers.ConvLSTM2D, + tf.keras.layers.GRU, + tf.keras.layers.RNN, + tf.keras.layers.SimpleRNN, + ]: + return + # TODO(b/173461426) + if layer_cls is tf.keras.layers.Embedding and name[-1] == "GPU": + return + layer = layer_cls(**_get_layer_args(layer_cls, layer_args)) + x = _get_input_data(inputs) + layer.call = tf.function(layer.call, jit_compile=True) + + fn = functools.partial(_layer_call_backward, layer, x) + metadata = {"implementation": name[0] + ".layer.call.backward.xla"} + metadata.update(_get_metadata(name)) + self.run_report(fn, num_iters, metadata) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/benchmarks/layer_benchmarks/layer_benchmarks_test_base.py b/keras/benchmarks/layer_benchmarks/layer_benchmarks_test_base.py index 8331240e4d42..d64e95c241df 100644 --- a/keras/benchmarks/layer_benchmarks/layer_benchmarks_test_base.py +++ b/keras/benchmarks/layer_benchmarks/layer_benchmarks_test_base.py @@ -18,58 +18,69 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import time +import tensorflow.compat.v2 as tf + from keras.benchmarks.layer_benchmarks import run_xprof class LayerBenchmarksBase(tf.test.Benchmark): - """Run and report benchmark results. + """Run and report benchmark results. - The first run is without any profiling to purly measure running time. - Second run is with xprof but no python trace. - Third run is with xprof and python trace. - Note: xprof runs fewer iterations, and the maximum iterations is 100. - """ + The first run is without any profiling to purly measure running time. + Second run is with xprof but no python trace. + Third run is with xprof and python trace. + Note: xprof runs fewer iterations, and the maximum iterations is 100. + """ - def run_report(self, func, num_iters, metadata=None): - """Run and report benchmark results for different settings.""" + def run_report(self, func, num_iters, metadata=None): + """Run and report benchmark results for different settings.""" - # 0. Warm up. - func() + # 0. Warm up. + func() - # 1. Run without profiling. - start = time.time() - for _ in range(num_iters): - func() - total_time = time.time() - start - us_mean_time = total_time * 1e6 / num_iters + # 1. Run without profiling. + start = time.time() + for _ in range(num_iters): + func() + total_time = time.time() - start + us_mean_time = total_time * 1e6 / num_iters - metrics = [ - {"name": "examples_per_sec", - "value": float("{0:.3f}".format(num_iters / total_time))}, - {"name": "us_per_example", - "value": float("{0:.3f}".format(us_mean_time))}] + metrics = [ + { + "name": "examples_per_sec", + "value": float(f"{num_iters / total_time:.3f}"), + }, + { + "name": "us_per_example", + "value": float(f"{us_mean_time:.3f}"), + }, + ] - # 2. Run with xprof with no python trace. - num_iters_xprof = min(100, num_iters) - xprof_link, us_per_example = run_xprof.run_with_xprof( - func, num_iters_xprof, False) - # This xprof link will appear in the benchmark dashboard. - extras = { - "xprof_link": xprof_link, - "us_per_example_with_xprof": us_per_example - } + # 2. Run with xprof with no python trace. + num_iters_xprof = min(100, num_iters) + xprof_link, us_per_example = run_xprof.run_with_xprof( + func, num_iters_xprof, False + ) + # This xprof link will appear in the benchmark dashboard. + extras = { + "xprof_link": xprof_link, + "us_per_example_with_xprof": us_per_example, + } - # 3. Run with xprof and python trace. - xprof_link, us_per_example = run_xprof.run_with_xprof( - func, num_iters_xprof, True) - extras["python_trace_xprof_link"] = xprof_link - extras["us_per_example_with_xprof_and_python"] = us_per_example + # 3. Run with xprof and python trace. + xprof_link, us_per_example = run_xprof.run_with_xprof( + func, num_iters_xprof, True + ) + extras["python_trace_xprof_link"] = xprof_link + extras["us_per_example_with_xprof_and_python"] = us_per_example - if metadata: - extras.update(metadata) - self.report_benchmark( - iters=num_iters, wall_time=us_mean_time, extras=extras, metrics=metrics) + if metadata: + extras.update(metadata) + self.report_benchmark( + iters=num_iters, + wall_time=us_mean_time, + extras=extras, + metrics=metrics, + ) diff --git a/keras/benchmarks/layer_benchmarks/run_xprof.py b/keras/benchmarks/layer_benchmarks/run_xprof.py index aef4d7b98771..1eb65a367a4c 100644 --- a/keras/benchmarks/layer_benchmarks/run_xprof.py +++ b/keras/benchmarks/layer_benchmarks/run_xprof.py @@ -16,25 +16,32 @@ from __future__ import division as _division from __future__ import print_function as _print_function +import os import time import uuid from tensorflow.python.profiler import profiler_v2 as profiler -def run_with_xprof(self, func, num_iters_xprof=100, enable_python_trace=True, - logdir='/tmp/layer_benchmark_xprof/'): - suid = str(uuid.uuid4()) - if enable_python_trace: - options = profiler.ProfilerOptions(python_tracer_level=1) - logdir = os.path.join(logdir, str(uuid.uuid4()) + "_with_python") - else: - options = profiler.ProfilerOptions(python_tracer_level=0) - logdir = os.path.join(logdir, suid) - start = time.time() - with profiler.Profile(logdir, options): - for _ in range(num_iters_xprof): - func() - total_time = time.time() - start - us_per_example = float("{0:.3f}".format(total_time * 1e6 / num_iters_xprof)) - return logdir, us_per_example +def run_with_xprof( + self, + func, + num_iters_xprof=100, + enable_python_trace=True, + logdir="/tmp/layer_benchmark_xprof/", +): + suid = str(uuid.uuid4()) + if enable_python_trace: + options = profiler.ProfilerOptions(python_tracer_level=1) + logdir = os.path.join(logdir, str(uuid.uuid4()) + "_with_python") + else: + options = profiler.ProfilerOptions(python_tracer_level=0) + logdir = os.path.join(logdir, suid) + + start = time.time() + with profiler.Profile(logdir, options): + for _ in range(num_iters_xprof): + func() + total_time = time.time() - start + us_per_example = float(f"{total_time * 1000000.0 / num_iters_xprof:.3f}") + return logdir, us_per_example diff --git a/keras/benchmarks/metrics_memory_benchmark_test.py b/keras/benchmarks/metrics_memory_benchmark_test.py index 07ab36e6cbc0..2bc58d85e3c6 100644 --- a/keras/benchmarks/metrics_memory_benchmark_test.py +++ b/keras/benchmarks/metrics_memory_benchmark_test.py @@ -14,59 +14,64 @@ # ============================================================================== """Benchmark tests for Keras metrics memory consumption.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf try: - import memory_profiler # pylint:disable=g-import-not-at-top + import memory_profiler except ImportError: - memory_profiler = None + memory_profiler = None class KerasMetricMemoryBenchmark(tf.test.Benchmark): - # This test is added to measure the memory footprint for - # metrics_utils._update_confusion_matrix_variables_optimized(). + # This test is added to measure the memory footprint for + # metrics_utils._update_confusion_matrix_variables_optimized(). - def benchmark_auc_memory_usage(self): - if memory_profiler is None: - self.skipTest('Skip test since memory_profiler is not available.') + def benchmark_auc_memory_usage(self): + if memory_profiler is None: + self.skipTest("Skip test since memory_profiler is not available.") - with tf.compat.forward_compatibility_horizon(2021, 6, 9): - self.y_true = np.random.randint(2, size=(1024, 1024)) - self.y_pred = np.random.rand(1024, 1024) + with tf.compat.forward_compatibility_horizon(2021, 6, 9): + self.y_true = np.random.randint(2, size=(1024, 1024)) + self.y_pred = np.random.rand(1024, 1024) - memory_usage_1 = memory_profiler.memory_usage((self.even_thresholds_auc)) - memory_usage_2 = memory_profiler.memory_usage( - (self.uneven_thresholds_auc)) - # memory usage is a list of number which sampled when running the function - # The pure memory consumption is approximately max(usage) - min(usage) - memory_usage_1 = max(memory_usage_1) - min(memory_usage_1) - memory_usage_2 = max(memory_usage_2) - min(memory_usage_2) + memory_usage_1 = memory_profiler.memory_usage( + (self.even_thresholds_auc) + ) + memory_usage_2 = memory_profiler.memory_usage( + (self.uneven_thresholds_auc) + ) + # memory usage is a list of number which sampled when running the + # function The pure memory consumption is approximately max(usage) - + # min(usage) + memory_usage_1 = max(memory_usage_1) - min(memory_usage_1) + memory_usage_2 = max(memory_usage_2) - min(memory_usage_2) - metrics = {'even_threshold_memory_usage': memory_usage_1, - 'uneven_threshold_memory_usage': memory_usage_2} - self.report_benchmark(iters=1, metrics=metrics) + metrics = { + "even_threshold_memory_usage": memory_usage_1, + "uneven_threshold_memory_usage": memory_usage_2, + } + self.report_benchmark(iters=1, metrics=metrics) - def even_thresholds_auc(self): - auc = tf.keras.metrics.AUC(num_thresholds=200) - self.assertTrue(auc._thresholds_distributed_evenly) + def even_thresholds_auc(self): + auc = tf.keras.metrics.AUC(num_thresholds=200) + self.assertTrue(auc._thresholds_distributed_evenly) - auc(self.y_true, self.y_pred) + auc(self.y_true, self.y_pred) - def uneven_thresholds_auc(self): - num_thresholds = 200 - thresholds = [x / (num_thresholds - 1) for x in range(num_thresholds)] - thresholds[100] += 1 / 200 - thresholds = thresholds[1:-1] + def uneven_thresholds_auc(self): + num_thresholds = 200 + thresholds = [x / (num_thresholds - 1) for x in range(num_thresholds)] + thresholds[100] += 1 / 200 + thresholds = thresholds[1:-1] - auc = tf.keras.metrics.AUC(thresholds=thresholds) - self.assertFalse(auc._thresholds_distributed_evenly) - self.assertEqual(auc.num_thresholds, num_thresholds) + auc = tf.keras.metrics.AUC(thresholds=thresholds) + self.assertFalse(auc._thresholds_distributed_evenly) + self.assertEqual(auc.num_thresholds, num_thresholds) - auc(self.y_true, self.y_pred) + auc(self.y_true, self.y_pred) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/model_components_benchmarks_test.py b/keras/benchmarks/model_components_benchmarks_test.py index af637ad28a23..f10f07294b29 100644 --- a/keras/benchmarks/model_components_benchmarks_test.py +++ b/keras/benchmarks/model_components_benchmarks_test.py @@ -14,277 +14,300 @@ # ============================================================================== r"""Benchmarks on Keras components with different Keras model types.""" -import tensorflow.compat.v2 as tf - import time import numpy as np +import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.eager import context from tensorflow.python.eager.context import get_executor class SubclassedKerasModel(tf.keras.Model): - - def __init__(self, initializer="ones"): - super().__init__() - self.layer_a = tf.keras.layers.Dense( - 64, kernel_initializer=initializer, bias_initializer="zeros") - self.layer_b = tf.keras.layers.Dense( - 128, kernel_initializer=initializer, bias_initializer="zeros") - self.layer_c = tf.keras.layers.Dense( - 256, kernel_initializer=initializer, bias_initializer="zeros") - self.layer_d = tf.keras.layers.Dense( - 256, kernel_initializer=initializer, bias_initializer="zeros") - self.layer_e = tf.keras.layers.Dense( - 10, kernel_initializer=initializer, bias_initializer="zeros") - - def call(self, x): - x = self.layer_a(x) - x = self.layer_b(x) - x = self.layer_c(x) - x = self.layer_d(x) - return self.layer_e(x) + def __init__(self, initializer="ones"): + super().__init__() + self.layer_a = tf.keras.layers.Dense( + 64, kernel_initializer=initializer, bias_initializer="zeros" + ) + self.layer_b = tf.keras.layers.Dense( + 128, kernel_initializer=initializer, bias_initializer="zeros" + ) + self.layer_c = tf.keras.layers.Dense( + 256, kernel_initializer=initializer, bias_initializer="zeros" + ) + self.layer_d = tf.keras.layers.Dense( + 256, kernel_initializer=initializer, bias_initializer="zeros" + ) + self.layer_e = tf.keras.layers.Dense( + 10, kernel_initializer=initializer, bias_initializer="zeros" + ) + + def call(self, x): + x = self.layer_a(x) + x = self.layer_b(x) + x = self.layer_c(x) + x = self.layer_d(x) + return self.layer_e(x) def make_keras_model(initializer="ones"): - model_input = tf.keras.Input(shape=(10,)) - x = tf.keras.layers.Dense( - 64, kernel_initializer=initializer, bias_initializer="zeros")(model_input) - x = tf.keras.layers.Dense( - 128, kernel_initializer=initializer, bias_initializer="zeros")(x) - x = tf.keras.layers.Dense( - 256, kernel_initializer=initializer, bias_initializer="zeros")(x) - x = tf.keras.layers.Dense( - 256, kernel_initializer=initializer, bias_initializer="zeros")(x) - x = tf.keras.layers.Dense( - 10, kernel_initializer=initializer, bias_initializer="zeros")(x) - return tf.keras.Model(inputs=model_input, outputs=x) + model_input = tf.keras.Input(shape=(10,)) + x = tf.keras.layers.Dense( + 64, kernel_initializer=initializer, bias_initializer="zeros" + )(model_input) + x = tf.keras.layers.Dense( + 128, kernel_initializer=initializer, bias_initializer="zeros" + )(x) + x = tf.keras.layers.Dense( + 256, kernel_initializer=initializer, bias_initializer="zeros" + )(x) + x = tf.keras.layers.Dense( + 256, kernel_initializer=initializer, bias_initializer="zeros" + )(x) + x = tf.keras.layers.Dense( + 10, kernel_initializer=initializer, bias_initializer="zeros" + )(x) + return tf.keras.Model(inputs=model_input, outputs=x) def make_sequential_keras_model(initializer="ones"): - model = tf.keras.models.Sequential() - model.add(tf.keras.layers.Dense( - 64, kernel_initializer=initializer, bias_initializer="zeros", - input_shape=(10,))) - model.add(tf.keras.layers.Dense( - 128, kernel_initializer=initializer, bias_initializer="zeros")) - model.add(tf.keras.layers.Dense( - 256, kernel_initializer=initializer, bias_initializer="zeros")) - model.add(tf.keras.layers.Dense( - 256, kernel_initializer=initializer, bias_initializer="zeros")) - model.add(tf.keras.layers.Dense( - 10, kernel_initializer=initializer, bias_initializer="zeros")) - return model + model = tf.keras.models.Sequential() + model.add( + tf.keras.layers.Dense( + 64, + kernel_initializer=initializer, + bias_initializer="zeros", + input_shape=(10,), + ) + ) + model.add( + tf.keras.layers.Dense( + 128, kernel_initializer=initializer, bias_initializer="zeros" + ) + ) + model.add( + tf.keras.layers.Dense( + 256, kernel_initializer=initializer, bias_initializer="zeros" + ) + ) + model.add( + tf.keras.layers.Dense( + 256, kernel_initializer=initializer, bias_initializer="zeros" + ) + ) + model.add( + tf.keras.layers.Dense( + 10, kernel_initializer=initializer, bias_initializer="zeros" + ) + ) + return model def run_benchmark(func, num_iters, execution_mode=None): - with context.execution_mode(execution_mode): - # call func to warm up - func() - if execution_mode == context.ASYNC: - get_executor().wait() - start = time.time() - for _ in range(num_iters): - func() - if execution_mode == context.ASYNC: - get_executor().wait() - end = time.time() + with context.execution_mode(execution_mode): + # call func to warm up + func() + if execution_mode == context.ASYNC: + get_executor().wait() + start = time.time() + for _ in range(num_iters): + func() + if execution_mode == context.ASYNC: + get_executor().wait() + end = time.time() - return end - start + return end - start class KerasComponentsBenchmarks(tf.test.Benchmark): + def _run(self, func, num_iters, execution_mode=None): + total_time = run_benchmark(func, num_iters, execution_mode) + mean_us = total_time * 1e6 / num_iters + self.report_benchmark( + iters=num_iters, + wall_time=mean_us, + metrics=[ + { + "name": "exp_per_sec", + "value": float(f"{num_iters / total_time:.3f}"), + }, + { + "name": "us_per_exp", + "value": float(f"{total_time * 1000000.0 / num_iters:.3f}"), + }, + ], + ) + + def benchmark_keras_model_subclassed(self): + model = SubclassedKerasModel() + data = tf.random.uniform((10, 10)) + + func = lambda: model(data) + # First call is more expensive (creates variables etc.), discount that. + func() + + # The whole point of this test is to contrast subclassing with + # the functional style of keras model building, so validate that + # the models are equivalent. + assert np.equal(func(), make_keras_model()(data)).all() + + self._run(func, 30000) + + def benchmark_keras_model_functional(self): + model = make_keras_model() + data = tf.random.uniform((10, 10)) + func = lambda: model(data) + # Symmetry with benchmark_keras_model_subclassed + func() + assert np.equal(func(), SubclassedKerasModel()(data)).all() + self._run(func, 30000) + + def benchmark_keras_model_sequential(self): + model = make_sequential_keras_model() + data = tf.random.uniform((10, 10)) + func = lambda: model(data) + # Symmetry with benchmark_keras_model_functional + func() + assert np.equal(func(), make_keras_model()(data)).all() + self._run(func, 30000) + + def _benchmark_keras_model_fit(self, model, run_eagerly=False): + data = tf.random.uniform((10, 10), minval=-1, maxval=1) + labels = tf.random.uniform((10, 10), minval=-1, maxval=1) + dataset = tf.data.Dataset.from_tensors((data, labels)).repeat() + model.compile("sgd", loss="mse", run_eagerly=run_eagerly) + func = lambda: model.fit( + dataset, epochs=1, steps_per_epoch=1000, verbose=0 + ) + # First call is more expensive (creates variables etc.), discount that. + model.fit(dataset, epochs=1, steps_per_epoch=1, verbose=0) + + self._run(func, 1) + + def _benchmark_keras_model_evaluate(self, model, run_eagerly=False): + data = tf.random.uniform((10, 10), minval=-1, maxval=1) + labels = tf.random.uniform((10, 10), minval=-1, maxval=1) + dataset = tf.data.Dataset.from_tensors((data, labels)).repeat() + model.compile("sgd", loss="mse", run_eagerly=run_eagerly) + func = lambda: model.evaluate(dataset, steps=1000, verbose=0) + # First call is more expensive (creates variables etc.), discount that. + model.evaluate(dataset, steps=1, verbose=0) + + self._run(func, 1) + + def _benchmark_keras_model_predict(self, model, run_eagerly=False): + data = tf.random.uniform((10, 10), minval=-1, maxval=1) + dataset = tf.data.Dataset.from_tensors(data).repeat() + model.compile("sgd", loss="mse", run_eagerly=run_eagerly) + func = lambda: model.predict(dataset, steps=1000, verbose=0) + # First call is more expensive (creates variables etc.), discount that. + model.predict(dataset, steps=1, verbose=0) + + self._run(func, 1) + + def benchmark_keras_model_subclassed_fit(self): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + + def benchmark_keras_model_subclassed_fit_graph_mode(self): + with context.graph_mode(): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + + def benchmark_keras_model_subclassed_fit_run_model_eagerly(self): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model, run_eagerly=True) + + def benchmark_keras_model_functional_fit(self): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + + def benchmark_keras_model_functional_fit_graph_mode(self): + with context.graph_mode(): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + + def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): + tf.profiler.experimental.start("") + with context.graph_mode(): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + tf.profiler.experimental.stop(save=False) + + def benchmark_keras_model_functional_fit_run_model_eagerly(self): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model, run_eagerly=True) + + def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( + self, + ): + tf.profiler.experimental.start("") + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model, run_eagerly=True) + tf.profiler.experimental.stop(save=False) + + def benchmark_keras_model_sequential_fit(self): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + + def benchmark_keras_model_sequential_fit_graph_mode(self): + with context.graph_mode(): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model) + + def benchmark_keras_model_sequential_fit_run_model_eagerly(self): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_fit(model, run_eagerly=True) + + def benchmark_keras_model_subclassed_evaluate(self): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_evaluate(model) + + def benchmark_keras_model_subclassed_evaluate_run_model_eagerly(self): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_evaluate(model, run_eagerly=True) + + def benchmark_keras_model_functional_evaluate(self): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_evaluate(model) + + def benchmark_keras_model_functional_evaluate_run_model_eagerly(self): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_evaluate(model, run_eagerly=True) + + def benchmark_keras_model_sequential_evaluate(self): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_evaluate(model) + + def benchmark_keras_model_sequential_evaluate_run_model_eagerly(self): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_evaluate(model, run_eagerly=True) + + def benchmark_keras_model_subclassed_predict(self): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_predict(model) + + def benchmark_keras_model_subclassed_predict_run_model_eagerly(self): + model = SubclassedKerasModel(initializer="glorot_uniform") + self._benchmark_keras_model_predict(model, run_eagerly=True) + + def benchmark_keras_model_functional_predict(self): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_predict(model) + + def benchmark_keras_model_functional_predict_run_model_eagerly(self): + model = make_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_predict(model, run_eagerly=True) + + def benchmark_keras_model_sequential_predict(self): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_predict(model) - def _run(self, func, num_iters, execution_mode=None): - total_time = run_benchmark(func, num_iters, execution_mode) - mean_us = total_time * 1e6 / num_iters - self.report_benchmark( - iters=num_iters, - wall_time=mean_us, - metrics=[ - { - "name": "exp_per_sec", - "value": float("{0:.3f}".format(num_iters / total_time)) - }, - { - "name": "us_per_exp", - "value": float("{0:.3f}".format(total_time * 1e6 / num_iters)) - }, - ]) - - def benchmark_keras_model_subclassed(self): - model = SubclassedKerasModel() - data = tf.random.uniform((10, 10)) - - func = lambda: model(data) # pylint: disable=not-callable - # First call is more expensive (creates variables etc.), discount that. - func() - - # The whole point of this test is to contrast subclassing with - # the functional style of keras model building, so validate that - # the models are equivalent. - assert np.equal(func(), make_keras_model()(data)).all() - - self._run(func, 30000) - - def benchmark_keras_model_functional(self): - model = make_keras_model() - data = tf.random.uniform((10, 10)) - func = lambda: model(data) # pylint: disable=not-callable - # Symmetry with benchmark_keras_model_subclassed - func() - assert np.equal(func(), SubclassedKerasModel()(data)).all() # pylint: disable=not-callable - self._run(func, 30000) - - def benchmark_keras_model_sequential(self): - model = make_sequential_keras_model() - data = tf.random.uniform((10, 10)) - func = lambda: model(data) - # Symmetry with benchmark_keras_model_functional - func() - assert np.equal(func(), make_keras_model()(data)).all() - self._run(func, 30000) - - def _benchmark_keras_model_fit(self, model, run_eagerly=False): - data = tf.random.uniform((10, 10), minval=-1, maxval=1) - labels = tf.random.uniform((10, 10), minval=-1, maxval=1) - dataset = tf.data.Dataset.from_tensors((data, labels)).repeat() - model.compile( - "sgd", - loss="mse", run_eagerly=run_eagerly) - func = lambda: model.fit(dataset, epochs=1, steps_per_epoch=1000, verbose=0) - # First call is more expensive (creates variables etc.), discount that. - model.fit(dataset, epochs=1, steps_per_epoch=1, verbose=0) - - self._run(func, 1) - - def _benchmark_keras_model_evaluate(self, model, run_eagerly=False): - data = tf.random.uniform((10, 10), minval=-1, maxval=1) - labels = tf.random.uniform((10, 10), minval=-1, maxval=1) - dataset = tf.data.Dataset.from_tensors((data, labels)).repeat() - model.compile( - "sgd", - loss="mse", run_eagerly=run_eagerly) - func = lambda: model.evaluate(dataset, steps=1000, verbose=0) - # First call is more expensive (creates variables etc.), discount that. - model.evaluate(dataset, steps=1, verbose=0) - - self._run(func, 1) - - def _benchmark_keras_model_predict(self, model, run_eagerly=False): - data = tf.random.uniform((10, 10), minval=-1, maxval=1) - dataset = tf.data.Dataset.from_tensors(data).repeat() - model.compile( - "sgd", - loss="mse", run_eagerly=run_eagerly) - func = lambda: model.predict(dataset, steps=1000, verbose=0) - # First call is more expensive (creates variables etc.), discount that. - model.predict(dataset, steps=1, verbose=0) - - self._run(func, 1) - - def benchmark_keras_model_subclassed_fit(self): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - - def benchmark_keras_model_subclassed_fit_graph_mode(self): - with context.graph_mode(): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - - def benchmark_keras_model_subclassed_fit_run_model_eagerly(self): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model, run_eagerly=True) - - def benchmark_keras_model_functional_fit(self): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - - def benchmark_keras_model_functional_fit_graph_mode(self): - with context.graph_mode(): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - - def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): - tf.profiler.experimental.start("") - with context.graph_mode(): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - tf.profiler.experimental.stop(save=False) - - def benchmark_keras_model_functional_fit_run_model_eagerly(self): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model, run_eagerly=True) - - def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( - self): - tf.profiler.experimental.start("") - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model, run_eagerly=True) - tf.profiler.experimental.stop(save=False) - - def benchmark_keras_model_sequential_fit(self): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - - def benchmark_keras_model_sequential_fit_graph_mode(self): - with context.graph_mode(): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model) - - def benchmark_keras_model_sequential_fit_run_model_eagerly(self): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_fit(model, run_eagerly=True) - - def benchmark_keras_model_subclassed_evaluate(self): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_evaluate(model) - - def benchmark_keras_model_subclassed_evaluate_run_model_eagerly(self): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_evaluate(model, run_eagerly=True) - - def benchmark_keras_model_functional_evaluate(self): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_evaluate(model) - - def benchmark_keras_model_functional_evaluate_run_model_eagerly(self): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_evaluate(model, run_eagerly=True) - - def benchmark_keras_model_sequential_evaluate(self): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_evaluate(model) - - def benchmark_keras_model_sequential_evaluate_run_model_eagerly(self): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_evaluate(model, run_eagerly=True) - - def benchmark_keras_model_subclassed_predict(self): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_predict(model) - - def benchmark_keras_model_subclassed_predict_run_model_eagerly(self): - model = SubclassedKerasModel(initializer="glorot_uniform") - self._benchmark_keras_model_predict(model, run_eagerly=True) - - def benchmark_keras_model_functional_predict(self): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_predict(model) - - def benchmark_keras_model_functional_predict_run_model_eagerly(self): - model = make_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_predict(model, run_eagerly=True) - - def benchmark_keras_model_sequential_predict(self): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_predict(model) - - def benchmark_keras_model_sequential_predict_run_model_eagerly(self): - model = make_sequential_keras_model(initializer="glorot_uniform") - self._benchmark_keras_model_predict(model, run_eagerly=True) + def benchmark_keras_model_sequential_predict_run_model_eagerly(self): + model = make_sequential_keras_model(initializer="glorot_uniform") + self._benchmark_keras_model_predict(model, run_eagerly=True) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/benchmarks/model_memory_profile.py b/keras/benchmarks/model_memory_profile.py index 04877e0d98f0..927c5fdb5943 100644 --- a/keras/benchmarks/model_memory_profile.py +++ b/keras/benchmarks/model_memory_profile.py @@ -20,58 +20,54 @@ 3. Add the model function to the dict `models`. """ -import tensorflow.compat.v2 as tf - +import numpy as np from absl import app from absl import flags - from absl import logging -import numpy as np + +import keras try: - import memory_profiler # pylint:disable=g-import-not-at-top + import memory_profiler except ImportError: - memory_profiler = None + memory_profiler = None FLAGS = flags.FLAGS -flags.DEFINE_string('model', None, - 'The model to run memory profiler.') - - -@memory_profiler.profile -def _imdb_lstm_model(): - """LSTM model.""" - x_train = np.random.randint(0, 1999, size=(2500, 100)) - y_train = np.random.random((2500, 1)) - - # IMDB LSTM model. - model = tf.keras.Sequential() - model.add(tf.keras.layers.Embedding(20000, 128)) - model.add(tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) - model.add(tf.keras.layers.Dense(1, activation='sigmoid')) - - model.compile('sgd', 'mse') - # Warm up the model with one epoch. - model.fit(x_train, y_train, batch_size=512, epochs=3) +flags.DEFINE_string("model", None, "The model to run memory profiler.") def main(_): - # Add the model for memory profile. - models = { - 'lstm': _imdb_lstm_model, - } - - if FLAGS.model in models: - logging.info('Run memory profile on %s.', FLAGS.model) - run_model = models[FLAGS.model] - run_model() - else: - logging.info('The model does not exist. Please verify the model name.') - - -if __name__ == '__main__': - flags.mark_flags_as_required(['model']) - if memory_profiler: - app.run(main) - + @memory_profiler.profile + def _imdb_lstm_model(): + """LSTM model.""" + x_train = np.random.randint(0, 1999, size=(2500, 100)) + y_train = np.random.random((2500, 1)) + + # IMDB LSTM model. + model = keras.Sequential() + model.add(keras.layers.Embedding(20000, 128)) + model.add(keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) + model.add(keras.layers.Dense(1, activation="sigmoid")) + + model.compile("sgd", "mse") + # Warm up the model with one epoch. + model.fit(x_train, y_train, batch_size=512, epochs=3) + + # Add the model for memory profile. + models = { + "lstm": _imdb_lstm_model, + } + + if FLAGS.model in models: + logging.info("Run memory profile on %s.", FLAGS.model) + run_model = models[FLAGS.model] + run_model() + else: + logging.info("The model does not exist. Please verify the model name.") + + +if __name__ == "__main__": + flags.mark_flags_as_required(["model"]) + if memory_profiler: + app.run(main) diff --git a/keras/benchmarks/optimizer_benchmarks_test.py b/keras/benchmarks/optimizer_benchmarks_test.py index 2b50f8a54710..7156a1fa7137 100644 --- a/keras/benchmarks/optimizer_benchmarks_test.py +++ b/keras/benchmarks/optimizer_benchmarks_test.py @@ -17,67 +17,77 @@ import tensorflow.compat.v2 as tf from keras.benchmarks import benchmark_util -from keras.optimizers.optimizer_v2 import adam -from tensorflow.python.platform.benchmark import ParameterizedBenchmark +from keras.optimizers.legacy import adam + +# isort: off +from tensorflow.python.platform.benchmark import ( + ParameterizedBenchmark, +) def bidirect_imdb_lstm_config(): - """Bidirectional LSTM model and IMDB data.""" + """Bidirectional LSTM model and IMDB data.""" - def model_fn(): - inputs = tf.keras.Input(shape=(None,), dtype="int32") - x = tf.keras.layers.Embedding(20000, 128)(inputs) - x = tf.keras.layers.Bidirectional( - tf.keras.layers.LSTM(64, return_sequences=True))( - x) - x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x) - outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x) - model = tf.keras.Model(inputs, outputs) - return model + def model_fn(): + inputs = tf.keras.Input(shape=(None,), dtype="int32") + x = tf.keras.layers.Embedding(20000, 128)(inputs) + x = tf.keras.layers.Bidirectional( + tf.keras.layers.LSTM(64, return_sequences=True) + )(x) + x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x) + outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x) + model = tf.keras.Model(inputs, outputs) + return model - (x_train, y_train), _ = tf.keras.datasets.imdb.load_data(num_words=20000) - x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=200) + (x_train, y_train), _ = tf.keras.datasets.imdb.load_data(num_words=20000) + x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=200) - return model_fn, x_train, y_train + return model_fn, x_train, y_train class KerasOptimizerBenchmark( - tf.test.Benchmark, metaclass=ParameterizedBenchmark): - """Keras optimizer benchmarks.""" + tf.test.Benchmark, metaclass=ParameterizedBenchmark +): + """Keras optimizer benchmarks.""" - # The parameter of each benchmark test is a tuple, and the first one is - # the optimizer name. - _benchmark_parameters = benchmark_util.generate_benchmark_params_cpu_gpu([ - ("Adam", tf.keras.optimizers.Adam(), 10), - ("NonFusedAdam", adam.NonFusedAdam(), 10), - ]) + # The parameter of each benchmark test is a tuple, and the first one is + # the optimizer name. + _benchmark_parameters = benchmark_util.generate_benchmark_params_cpu_gpu( + [ + ("Adam", tf.keras.optimizers.Adam(), 10), + ("NonFusedAdam", adam.NonFusedAdam(), 10), + ] + ) - def benchmark_optimizer(self, optimizer, num_iters): - """Optimizer benchmark with Bidirectional LSTM model on IMDB data. + def benchmark_optimizer(self, optimizer, num_iters): + """Optimizer benchmark with Bidirectional LSTM model on IMDB data. - Args: - optimizer: The optimizer instance to be benchmarked. - num_iters: The number of iterations to run for performance measurement. - """ - model, train_x, train_y = bidirect_imdb_lstm_config() - metrics, wall_time, extras = benchmark_util.measure_performance( - model, - x=train_x, - y=train_y, - batch_size=512, - optimizer=optimizer, - loss="binary_crossentropy", - metrics=["accuracy"]) - name = benchmark_util.get_benchmark_name(self._get_name()) - metadata = { - "implementation": name[0], - "model_name": "optimizers", - "parameters": "lstm.512", - } - extras.update(metadata) - self.report_benchmark( - iters=num_iters, wall_time=wall_time, metrics=metrics, extras=extras) + Args: + optimizer: The optimizer instance to be benchmarked. + num_iters: The number of iterations to run for performance + measurement. + """ + model, train_x, train_y = bidirect_imdb_lstm_config() + metrics, wall_time, extras = benchmark_util.measure_performance( + model, + x=train_x, + y=train_y, + batch_size=512, + optimizer=optimizer, + loss="binary_crossentropy", + metrics=["accuracy"], + ) + name = benchmark_util.get_benchmark_name(self._get_name()) + metadata = { + "implementation": name[0], + "model_name": "optimizers", + "parameters": "lstm.512", + } + extras.update(metadata) + self.report_benchmark( + iters=num_iters, wall_time=wall_time, metrics=metrics, extras=extras + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/BUILD b/keras/benchmarks/saved_model_benchmarks/BUILD index 01b3df2d30ef..408dd37c96e3 100644 --- a/keras/benchmarks/saved_model_benchmarks/BUILD +++ b/keras/benchmarks/saved_model_benchmarks/BUILD @@ -1,9 +1,11 @@ # Description: # Implementation of Keras benchmarks. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = ["//visibility:public"], licenses = ["notice"], ) diff --git a/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py index 52c81e633cdc..bcc94015baf7 100644 --- a/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py @@ -19,25 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_densenet_201(self): - app = tf.keras.applications.DenseNet201 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_densenet_201(self): + app = tf.keras.applications.DenseNet201 + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py index 5c0dabb6a1f6..62707cdcf776 100644 --- a/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py @@ -19,25 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_efficient_net_b7(self): - app = tf.keras.applications.EfficientNetB7 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_efficient_net_b7(self): + app = tf.keras.applications.EfficientNetB7 + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py index 0b489dd855c6..fd53786d7cc0 100644 --- a/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py @@ -19,26 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_inception_resnet_v2(self): - app = tf.keras.applications.InceptionResNetV2 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_inception_resnet_v2(self): + app = tf.keras.applications.InceptionResNetV2 + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py index de8eadfa6fb0..bb00e7da03f3 100644 --- a/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py @@ -19,25 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_mobilenet_v2(self): - app = tf.keras.applications.MobileNetV2 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_mobilenet_v2(self): + app = tf.keras.applications.MobileNetV2 + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py index bd9e41c0bc60..cd97d1d53153 100644 --- a/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py @@ -19,25 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_nasnet_large(self): - app = tf.keras.applications.NASNetLarge - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_nasnet_large(self): + app = tf.keras.applications.NASNetLarge + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py index 5bada695c99e..bab2f5a60d35 100644 --- a/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py @@ -19,26 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_resnet152_v2(self): - app = tf.keras.applications.ResNet152V2 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_resnet152_v2(self): + app = tf.keras.applications.ResNet152V2 + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py b/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py index 692646749a6a..62271f0b7189 100644 --- a/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py +++ b/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py @@ -18,50 +18,51 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import tempfile import time +import tensorflow.compat.v2 as tf + +import keras -def save_and_load_benchmark(app): - """Util for saved model benchmarks.""" - trials = 3 - model = app(weights=None) - model_name = app.__name__ +def save_and_load_benchmark(app): + """Util for saved model benchmarks.""" + trials = 3 - tmp_dir = tf.compat.v1.test.get_temp_dir() - tf.io.gfile.makedirs(tmp_dir) - save_dir = tempfile.mkdtemp(dir=tmp_dir) + model = app(weights=None) + model_name = app.__name__ - total_save_time = 0 - total_load_time = 0 + tmp_dir = tf.compat.v1.test.get_temp_dir() + tf.io.gfile.makedirs(tmp_dir) + save_dir = tempfile.mkdtemp(dir=tmp_dir) - # Run one untimed iteration of saving/loading. - model.save(save_dir, save_format='tf') - tf.keras.models.load_model(save_dir) + total_save_time = 0 + total_load_time = 0 - for _ in range(trials): - start_time = time.time() - model.save(save_dir, save_format='tf') - total_save_time += time.time() - start_time + # Run one untimed iteration of saving/loading. + model.save(save_dir, save_format="tf") + keras.models.load_model(save_dir) - start_time = time.time() - tf.keras.models.load_model(save_dir) - total_load_time += time.time() - start_time + for _ in range(trials): + start_time = time.time() + model.save(save_dir, save_format="tf") + total_save_time += time.time() - start_time - save_result = { - 'iters': trials, - 'wall_time': total_save_time / trials, - 'name': '{}.save'.format(model_name) - } + start_time = time.time() + keras.models.load_model(save_dir) + total_load_time += time.time() - start_time - load_result = { - 'iters': trials, - 'wall_time': total_load_time / trials, - 'name': '{}.load'.format(model_name) - } - tf.compat.v1.gfile.DeleteRecursively(save_dir) - return save_result, load_result + save_result = { + "iters": trials, + "wall_time": total_save_time / trials, + "name": f"{model_name}.save", + } + load_result = { + "iters": trials, + "wall_time": total_load_time / trials, + "name": f"{model_name}.load", + } + tf.compat.v1.gfile.DeleteRecursively(save_dir) + return save_result, load_result diff --git a/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py index 246596dbecac..cdb044a1fcb0 100644 --- a/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py @@ -19,26 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_vgg19(self): - app = tf.keras.applications.VGG19 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_vgg19(self): + app = tf.keras.applications.VGG19 + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py b/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py index 627ccc9cb3cf..ca9eb7c63060 100644 --- a/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py +++ b/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py @@ -19,26 +19,30 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_xception(self): - app = tf.keras.applications.Xception - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() + def benchmark_save_and_load_xception(self): + app = tf.keras.applications.Xception + ( + save_result, + load_result, + ) = saved_model_benchmark_util.save_and_load_benchmark(app) + + self.report_benchmark( + iters=save_result["iters"], + wall_time=save_result["wall_time"], + name=save_result["name"], + ) + + self.report_benchmark( + iters=load_result["iters"], + wall_time=load_result["wall_time"], + name=load_result["name"], + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/callbacks.py b/keras/callbacks.py index 47081d3d3c48..bc5a3080512a 100644 --- a/keras/callbacks.py +++ b/keras/callbacks.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-import-not-at-top -# pylint: disable=g-classes-have-attributes + + """Callbacks: utilities called at certain points during model training.""" import collections @@ -25,10 +25,13 @@ import sys import time +import numpy as np +import tensorflow.compat.v2 as tf from keras import backend from keras.distribute import distributed_file_utils from keras.distribute import worker_training_state +from keras.optimizers import optimizer from keras.optimizers.schedules import learning_rate_schedule from keras.utils import generic_utils from keras.utils import io_utils @@ -37,2893 +40,3269 @@ from keras.utils.data_utils import Sequence from keras.utils.generic_utils import Progbar from keras.utils.mode_keys import ModeKeys -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls try: - import requests + import requests except ImportError: - requests = None + requests = None # Note: `configure_callbacks` is only used in TF1. -def configure_callbacks(callbacks, - model, - do_validation=False, - batch_size=None, - epochs=None, - steps_per_epoch=None, - samples=None, - verbose=1, - count_mode='steps', - mode=ModeKeys.TRAIN): - """Configures callbacks for use in various training loops. - - Args: - callbacks: List of Callbacks. - model: Model being trained. - do_validation: Whether or not validation loop will be run. - batch_size: Number of samples per batch. - epochs: Number of epoch to train. - steps_per_epoch: Number of batches to run per training epoch. - samples: Number of training samples. - verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger. - count_mode: One of 'steps' or 'samples'. Per-batch or per-sample count. - mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT. - Which loop mode to configure callbacks for. - - Returns: - Instance of CallbackList used to control all Callbacks. - """ - # Check if callbacks have already been configured. - if isinstance(callbacks, CallbackList): - return callbacks - - if not callbacks: - callbacks = [] - - # Add additional callbacks during training. - if mode == ModeKeys.TRAIN: - model.history = History() - callbacks = [BaseLogger()] + (callbacks or []) + [model.history] - if verbose: - callbacks.append(ProgbarLogger(count_mode)) - callback_list = CallbackList(callbacks) - - # Set callback model - callback_model = model._get_callback_model() # pylint: disable=protected-access - callback_list.set_model(callback_model) - - set_callback_parameters( - callback_list, - model, - do_validation=do_validation, - batch_size=batch_size, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - samples=samples, - verbose=verbose, - mode=mode) - - callback_list.model.stop_training = False - return callback_list - - -def set_callback_parameters(callback_list, - model, - do_validation=False, - batch_size=None, - epochs=None, - steps_per_epoch=None, - samples=None, - verbose=1, - mode=ModeKeys.TRAIN): - """Sets callback parameters. - - Args: - callback_list: CallbackList instance. - model: Model being trained. - do_validation: Whether or not validation loop will be run. - batch_size: Number of samples per batch. - epochs: Number of epoch to train. - steps_per_epoch: Number of batches to run per training epoch. - samples: Number of training samples. - verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger. - mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT. - Which loop mode to configure callbacks for. - """ - metric_names = model.metrics_names - for cbk in callback_list: - if isinstance(cbk, (BaseLogger, ProgbarLogger)): - cbk.stateful_metrics = metric_names[1:] # Exclude `loss` - - # Set callback parameters - callback_metrics = [] - # When we have deferred build scenario with iterator input, we will compile - # when we standardize first batch of data. - if mode != ModeKeys.PREDICT: - callback_metrics = copy.copy(metric_names) - if do_validation: - callback_metrics += ['val_' + n for n in metric_names] - callback_params = { - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics, - } - callback_list.set_params(callback_params) - - -def _is_generator_like(data): - """Checks if data is a generator, Sequence, or Iterator.""" - return (hasattr(data, '__next__') or hasattr(data, 'next') or isinstance( - data, (Sequence, tf.compat.v1.data.Iterator, tf.data.Iterator))) - - -def make_logs(model, logs, outputs, mode, prefix=''): - """Computes logs for sending to `on_batch_end` methods.""" - metric_names = model.metrics_names - if mode in {ModeKeys.TRAIN, ModeKeys.TEST} and metric_names: - for label, output in zip(metric_names, outputs): - logs[prefix + label] = output - else: - logs['outputs'] = outputs - return logs - - -@keras_export('keras.callbacks.CallbackList') -class CallbackList: - """Container abstracting a list of callbacks.""" - - def __init__(self, - callbacks=None, - add_history=False, - add_progbar=False, - model=None, - **params): - """Container for `Callback` instances. - - This object wraps a list of `Callback` instances, making it possible - to call them all at once via a single endpoint - (e.g. `callback_list.on_epoch_end(...)`). +def configure_callbacks( + callbacks, + model, + do_validation=False, + batch_size=None, + epochs=None, + steps_per_epoch=None, + samples=None, + verbose=1, + count_mode="steps", + mode=ModeKeys.TRAIN, +): + """Configures callbacks for use in various training loops. Args: - callbacks: List of `Callback` instances. - add_history: Whether a `History` callback should be added, if one does not - already exist in the `callbacks` list. - add_progbar: Whether a `ProgbarLogger` callback should be added, if one - does not already exist in the `callbacks` list. - model: The `Model` these callbacks are used with. - **params: If provided, parameters will be passed to each `Callback` via - `Callback.set_params`. - """ - self.callbacks = tf.nest.flatten(callbacks) if callbacks else [] - self._add_default_callbacks(add_history, add_progbar) - - if model: - self.set_model(model) - if params: - self.set_params(params) - - # Performance optimization: determines if batch hooks need to be called. - # pylint: disable=protected-access - self._supports_tf_logs = all( - getattr(cb, '_supports_tf_logs', False) for cb in self.callbacks) - self._batch_hooks_support_tf_logs = all( - getattr(cb, '_supports_tf_logs', False) - for cb in self.callbacks - if cb._implements_train_batch_hooks() or cb - ._implements_test_batch_hooks() or cb._implements_predict_batch_hooks()) - - self._should_call_train_batch_hooks = any( - cb._implements_train_batch_hooks() for cb in self.callbacks) - self._should_call_test_batch_hooks = any( - cb._implements_test_batch_hooks() for cb in self.callbacks) - self._should_call_predict_batch_hooks = any( - cb._implements_predict_batch_hooks() for cb in self.callbacks) - # pylint: enable=protected-access - - self._disallow_batch_hooks_in_ps_strategy() - - # Performance check: Check batch hooks for slowness compared to batch time. - # Only run check for custom callbacks (i.e. not present in this file). - self._check_timing = any( - cbk.__class__.__name__ not in globals() for cbk in self.callbacks) - self._num_batches_for_timing_check = 5 - self._hook_times = {} - self._batch_start_time = None - self._batch_times = [] - - def _add_default_callbacks(self, add_history, add_progbar): - """Adds `Callback`s that are always present.""" - self._progbar = None - self._history = None - - for cb in self.callbacks: - if isinstance(cb, ProgbarLogger): - self._progbar = cb - elif isinstance(cb, History): - self._history = cb - - if self._history is None and add_history: - self._history = History() - self.callbacks.append(self._history) - - if self._progbar is None and add_progbar: - self._progbar = ProgbarLogger(count_mode='steps') - self.callbacks.append(self._progbar) - - def _process_logs(self, logs, is_batch_hook=False): - """Turns tensors into numpy arrays or Python scalars if necessary.""" - if logs is None: - return {} - if self._supports_tf_logs: - return logs - if is_batch_hook and self._batch_hooks_support_tf_logs: - return logs - return tf_utils.sync_to_numpy_or_python_type(logs) - - def append(self, callback): - self.callbacks.append(callback) - - def set_params(self, params): - self.params = params - for callback in self.callbacks: - callback.set_params(params) - - def set_model(self, model): - self.model = model - if self._history: - model.history = self._history - for callback in self.callbacks: - callback.set_model(model) - - def _call_batch_hook(self, mode, hook, batch, logs=None): - """Helper function for all batch_{begin | end} methods.""" - if not self.callbacks: - return - - if hook == 'begin': - self._call_batch_begin_hook(mode, batch, logs) - elif hook == 'end': - self._call_batch_end_hook(mode, batch, logs) - else: - raise ValueError( - f'Unrecognized hook: {hook}. Expected values are ["begin", "end"]') - - def _call_batch_begin_hook(self, mode, batch, logs): - """Helper function for `on_*_batch_begin` methods.""" - hook_name = 'on_{mode}_batch_begin'.format(mode=mode) - self._call_batch_hook_helper(hook_name, batch, logs) - - if self._check_timing: - self._batch_start_time = time.time() - - def _call_batch_end_hook(self, mode, batch, logs): - """Helper function for `on_*_batch_end` methods.""" - hook_name = 'on_{mode}_batch_end'.format(mode=mode) - - if self._check_timing and batch >= 1: - batch_time = time.time() - self._batch_start_time - self._batch_times.append(batch_time) - - self._call_batch_hook_helper(hook_name, batch, logs) - - if len(self._batch_times) >= self._num_batches_for_timing_check: - end_hook_name = hook_name - begin_hook_name = 'on_{mode}_batch_begin'.format(mode=mode) - avg_batch_time = sum(self._batch_times) / len(self._batch_times) - avg_end_hook_time = sum(self._hook_times[end_hook_name]) / len( - self._hook_times[end_hook_name]) - avg_begin_hook_time = sum(self._hook_times[begin_hook_name]) / len( - self._hook_times[begin_hook_name]) - - threshold_time = 1.0 * avg_batch_time - warning_msg = ('Callback method `{hook}` is slow compared to ' - 'the batch time (batch time: {batch_time:.4f}s vs ' - '`{hook}` time: {hook_time:.4f}s). Check your callbacks.') - if avg_begin_hook_time > threshold_time: - logging.warning(warning_msg.format( - hook=begin_hook_name, - batch_time=avg_batch_time, - hook_time=avg_begin_hook_time)) - if avg_end_hook_time > threshold_time: - logging.warning(warning_msg.format( - hook=end_hook_name, - batch_time=avg_batch_time, - hook_time=avg_end_hook_time)) - self._check_timing = False - self._batch_start_time = None - self._batch_times = [] - self._hook_times = {} - - def _call_batch_hook_helper(self, hook_name, batch, logs): - """Helper function for `on_*_batch_*` methods.""" - if self._check_timing: - start_time = time.time() - - logs = self._process_logs(logs, is_batch_hook=True) - for callback in self.callbacks: - hook = getattr(callback, hook_name) - hook(batch, logs) - - if self._check_timing: - if hook_name not in self._hook_times: - self._hook_times[hook_name] = [] - self._hook_times[hook_name].append(time.time() - start_time) - - def _call_begin_hook(self, mode): - """Helper function for on_{train|test|predict}_begin methods.""" - if mode == ModeKeys.TRAIN: - self.on_train_begin() - elif mode == ModeKeys.TEST: - self.on_test_begin() - else: - self.on_predict_begin() - - def _call_end_hook(self, mode): - """Helper function for on_{train|test|predict}_end methods.""" - if mode == ModeKeys.TRAIN: - self.on_train_end() - elif mode == ModeKeys.TEST: - self.on_test_end() - else: - self.on_predict_end() - - def on_batch_begin(self, batch, logs=None): - if self._should_call_train_batch_hooks: - self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs) + callbacks: List of Callbacks. + model: Model being trained. + do_validation: Whether or not validation loop will be run. + batch_size: Number of samples per batch. + epochs: Number of epoch to train. + steps_per_epoch: Number of batches to run per training epoch. + samples: Number of training samples. + verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger. + count_mode: One of 'steps' or 'samples'. Per-batch or per-sample count. + mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT. + Which loop mode to configure callbacks for. - def on_batch_end(self, batch, logs=None): - if self._should_call_train_batch_hooks: - self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs) - - def on_epoch_begin(self, epoch, logs=None): - """Calls the `on_epoch_begin` methods of its callbacks. - - This function should only be called during TRAIN mode. - - Args: - epoch: Integer, index of epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. + Returns: + Instance of CallbackList used to control all Callbacks. """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_epoch_begin(epoch, logs) + # Check if callbacks have already been configured. + if isinstance(callbacks, CallbackList): + return callbacks - def on_epoch_end(self, epoch, logs=None): - """Calls the `on_epoch_end` methods of its callbacks. + if not callbacks: + callbacks = [] - This function should only be called during TRAIN mode. + # Add additional callbacks during training. + if mode == ModeKeys.TRAIN: + model.history = History() + callbacks = [BaseLogger()] + (callbacks or []) + [model.history] + if verbose: + callbacks.append(ProgbarLogger(count_mode)) + callback_list = CallbackList(callbacks) + + # Set callback model + callback_model = model._get_callback_model() + callback_list.set_model(callback_model) + + set_callback_parameters( + callback_list, + model, + do_validation=do_validation, + batch_size=batch_size, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + samples=samples, + verbose=verbose, + mode=mode, + ) + + callback_list.model.stop_training = False + return callback_list + + +def set_callback_parameters( + callback_list, + model, + do_validation=False, + batch_size=None, + epochs=None, + steps_per_epoch=None, + samples=None, + verbose=1, + mode=ModeKeys.TRAIN, +): + """Sets callback parameters. Args: - epoch: Integer, index of epoch. - logs: Dict, metric results for this training epoch, and for the - validation epoch if validation is performed. Validation result keys - are prefixed with `val_`. + callback_list: CallbackList instance. + model: Model being trained. + do_validation: Whether or not validation loop will be run. + batch_size: Number of samples per batch. + epochs: Number of epoch to train. + steps_per_epoch: Number of batches to run per training epoch. + samples: Number of training samples. + verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger. + mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT. + Which loop mode to configure callbacks for. """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_epoch_end(epoch, logs) + metric_names = None + for cbk in callback_list: + if isinstance(cbk, (BaseLogger, ProgbarLogger)): + if not metric_names: + metric_names = model.metrics_names + cbk.stateful_metrics = metric_names[1:] # Exclude `loss` + + # Set callback parameters + callback_metrics = [] + # When we have deferred build scenario with iterator input, we will compile + # when we standardize first batch of data. + if mode != ModeKeys.PREDICT: + if not metric_names: + metric_names = model.metrics_names + callback_metrics = copy.copy(metric_names) + if do_validation: + callback_metrics += ["val_" + n for n in metric_names] + callback_params = { + "batch_size": batch_size, + "epochs": epochs, + "steps": steps_per_epoch, + "samples": samples, + "verbose": verbose, + "do_validation": do_validation, + "metrics": callback_metrics, + } + callback_list.set_params(callback_params) - def on_train_batch_begin(self, batch, logs=None): - """Calls the `on_train_batch_begin` methods of its callbacks. - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict, contains the return value of `model.train_step`. Typically, - the values of the `Model`'s metrics are returned. Example: - `{'loss': 0.2, 'accuracy': 0.7}`. - """ - if self._should_call_train_batch_hooks: - self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs) +def _is_generator_like(data): + """Checks if data is a generator, Sequence, or Iterator.""" + return ( + hasattr(data, "__next__") + or hasattr(data, "next") + or isinstance( + data, (Sequence, tf.compat.v1.data.Iterator, tf.data.Iterator) + ) + ) + + +def make_logs(model, logs, outputs, mode, prefix=""): + """Computes logs for sending to `on_batch_end` methods.""" + metric_names = model.metrics_names + if mode in {ModeKeys.TRAIN, ModeKeys.TEST} and metric_names: + for label, output in zip(metric_names, outputs): + logs[prefix + label] = output + else: + logs["outputs"] = outputs + return logs - def on_train_batch_end(self, batch, logs=None): - """Calls the `on_train_batch_end` methods of its callbacks. - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - if self._should_call_train_batch_hooks: - self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs) +@keras_export("keras.callbacks.CallbackList") +class CallbackList: + """Container abstracting a list of callbacks.""" + + def __init__( + self, + callbacks=None, + add_history=False, + add_progbar=False, + model=None, + **params, + ): + """Container for `Callback` instances. + + This object wraps a list of `Callback` instances, making it possible + to call them all at once via a single endpoint + (e.g. `callback_list.on_epoch_end(...)`). + + Args: + callbacks: List of `Callback` instances. + add_history: Whether a `History` callback should be added, if one does + not already exist in the `callbacks` list. + add_progbar: Whether a `ProgbarLogger` callback should be added, if + one does not already exist in the `callbacks` list. + model: The `Model` these callbacks are used with. + **params: If provided, parameters will be passed to each `Callback` + via `Callback.set_params`. + """ + self.callbacks = tf.nest.flatten(callbacks) if callbacks else [] + self._add_default_callbacks(add_history, add_progbar) + + if model: + self.set_model(model) + if params: + self.set_params(params) + + # Performance optimization: determines if batch hooks need to be called. + + self._supports_tf_logs = all( + getattr(cb, "_supports_tf_logs", False) for cb in self.callbacks + ) + self._batch_hooks_support_tf_logs = all( + getattr(cb, "_supports_tf_logs", False) + for cb in self.callbacks + if cb._implements_train_batch_hooks() + or cb._implements_test_batch_hooks() + or cb._implements_predict_batch_hooks() + ) + + self._should_call_train_batch_hooks = any( + cb._implements_train_batch_hooks() for cb in self.callbacks + ) + self._should_call_test_batch_hooks = any( + cb._implements_test_batch_hooks() for cb in self.callbacks + ) + self._should_call_predict_batch_hooks = any( + cb._implements_predict_batch_hooks() for cb in self.callbacks + ) + + self._disallow_batch_hooks_in_ps_strategy() + + # Performance check: Check batch hooks for slowness compared to batch + # time. Only run check for custom callbacks (i.e. not present in this + # file). + self._check_timing = any( + cbk.__class__.__name__ not in globals() for cbk in self.callbacks + ) + self._num_batches_for_timing_check = 5 + self._hook_times = {} + self._batch_start_time = None + self._batch_times = [] + + def _add_default_callbacks(self, add_history, add_progbar): + """Adds `Callback`s that are always present.""" + self._progbar = None + self._history = None + + for cb in self.callbacks: + if isinstance(cb, ProgbarLogger): + self._progbar = cb + elif isinstance(cb, History): + self._history = cb + + if self._history is None and add_history: + self._history = History() + self.callbacks.append(self._history) + + if self._progbar is None and add_progbar: + self._progbar = ProgbarLogger(count_mode="steps") + self.callbacks.append(self._progbar) + + def _process_logs(self, logs, is_batch_hook=False): + """Turns tensors into numpy arrays or Python scalars if necessary.""" + if logs is None: + return {} + if self._supports_tf_logs: + return logs + if is_batch_hook and self._batch_hooks_support_tf_logs: + return logs + return tf_utils.sync_to_numpy_or_python_type(logs) + + def append(self, callback): + self.callbacks.append(callback) + + def set_params(self, params): + self.params = params + for callback in self.callbacks: + callback.set_params(params) + + def set_model(self, model): + self.model = model + if self._history: + model.history = self._history + for callback in self.callbacks: + callback.set_model(model) + + def _call_batch_hook(self, mode, hook, batch, logs=None): + """Helper function for all batch_{begin | end} methods.""" + if not self.callbacks: + return + + if hook == "begin": + self._call_batch_begin_hook(mode, batch, logs) + elif hook == "end": + self._call_batch_end_hook(mode, batch, logs) + else: + raise ValueError( + f"Unrecognized hook: {hook}. " + 'Expected values are ["begin", "end"]' + ) + + def _call_batch_begin_hook(self, mode, batch, logs): + """Helper function for `on_*_batch_begin` methods.""" + hook_name = f"on_{mode}_batch_begin" + self._call_batch_hook_helper(hook_name, batch, logs) + + if self._check_timing: + self._batch_start_time = time.time() + + def _call_batch_end_hook(self, mode, batch, logs): + """Helper function for `on_*_batch_end` methods.""" + hook_name = f"on_{mode}_batch_end" + + if self._check_timing and batch >= 1: + batch_time = time.time() - self._batch_start_time + self._batch_times.append(batch_time) + + self._call_batch_hook_helper(hook_name, batch, logs) + + if len(self._batch_times) >= self._num_batches_for_timing_check: + end_hook_name = hook_name + begin_hook_name = f"on_{mode}_batch_begin" + avg_batch_time = sum(self._batch_times) / len(self._batch_times) + avg_end_hook_time = sum(self._hook_times[end_hook_name]) / len( + self._hook_times[end_hook_name] + ) + avg_begin_hook_time = sum(self._hook_times[begin_hook_name]) / len( + self._hook_times[begin_hook_name] + ) + + threshold_time = 1.0 * avg_batch_time + warning_msg = ( + "Callback method `{hook}` is slow compared to " + "the batch time (batch time: {batch_time:.4f}s vs " + "`{hook}` time: {hook_time:.4f}s). Check your callbacks." + ) + if avg_begin_hook_time > threshold_time: + logging.warning( + warning_msg.format( + hook=begin_hook_name, + batch_time=avg_batch_time, + hook_time=avg_begin_hook_time, + ) + ) + if avg_end_hook_time > threshold_time: + logging.warning( + warning_msg.format( + hook=end_hook_name, + batch_time=avg_batch_time, + hook_time=avg_end_hook_time, + ) + ) + self._check_timing = False + self._batch_start_time = None + self._batch_times = [] + self._hook_times = {} + + def _call_batch_hook_helper(self, hook_name, batch, logs): + """Helper function for `on_*_batch_*` methods.""" + if self._check_timing: + start_time = time.time() + + logs = self._process_logs(logs, is_batch_hook=True) + for callback in self.callbacks: + hook = getattr(callback, hook_name) + hook(batch, logs) + + if self._check_timing: + if hook_name not in self._hook_times: + self._hook_times[hook_name] = [] + self._hook_times[hook_name].append(time.time() - start_time) + + def _call_begin_hook(self, mode): + """Helper function for on_{train|test|predict}_begin methods.""" + if mode == ModeKeys.TRAIN: + self.on_train_begin() + elif mode == ModeKeys.TEST: + self.on_test_begin() + else: + self.on_predict_begin() + + def _call_end_hook(self, mode): + """Helper function for on_{train|test|predict}_end methods.""" + if mode == ModeKeys.TRAIN: + self.on_train_end() + elif mode == ModeKeys.TEST: + self.on_test_end() + else: + self.on_predict_end() + + def on_batch_begin(self, batch, logs=None): + if self._should_call_train_batch_hooks: + self._call_batch_hook(ModeKeys.TRAIN, "begin", batch, logs=logs) + + def on_batch_end(self, batch, logs=None): + if self._should_call_train_batch_hooks: + self._call_batch_hook(ModeKeys.TRAIN, "end", batch, logs=logs) + + def on_epoch_begin(self, epoch, logs=None): + """Calls the `on_epoch_begin` methods of its callbacks. + + This function should only be called during TRAIN mode. + + Args: + epoch: Integer, index of epoch. + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_epoch_begin(epoch, logs) + + def on_epoch_end(self, epoch, logs=None): + """Calls the `on_epoch_end` methods of its callbacks. + + This function should only be called during TRAIN mode. + + Args: + epoch: Integer, index of epoch. + logs: Dict, metric results for this training epoch, and for the + validation epoch if validation is performed. Validation result + keys are prefixed with `val_`. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_epoch_end(epoch, logs) + + def on_train_batch_begin(self, batch, logs=None): + """Calls the `on_train_batch_begin` methods of its callbacks. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict, contains the return value of `model.train_step`. + Typically, the values of the `Model`'s metrics are returned. + Example: `{'loss': 0.2, 'accuracy': 0.7}`. + """ + if self._should_call_train_batch_hooks: + self._call_batch_hook(ModeKeys.TRAIN, "begin", batch, logs=logs) + + def on_train_batch_end(self, batch, logs=None): + """Calls the `on_train_batch_end` methods of its callbacks. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Aggregated metric results up until this batch. + """ + if self._should_call_train_batch_hooks: + self._call_batch_hook(ModeKeys.TRAIN, "end", batch, logs=logs) + + def on_test_batch_begin(self, batch, logs=None): + """Calls the `on_test_batch_begin` methods of its callbacks. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict, contains the return value of `model.test_step`. + Typically, the values of the `Model`'s metrics are returned. + Example: `{'loss': 0.2, 'accuracy': 0.7}`. + """ + if self._should_call_test_batch_hooks: + self._call_batch_hook(ModeKeys.TEST, "begin", batch, logs=logs) + + def on_test_batch_end(self, batch, logs=None): + """Calls the `on_test_batch_end` methods of its callbacks. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Aggregated metric results up until this batch. + """ + if self._should_call_test_batch_hooks: + self._call_batch_hook(ModeKeys.TEST, "end", batch, logs=logs) + + def on_predict_batch_begin(self, batch, logs=None): + """Calls the `on_predict_batch_begin` methods of its callbacks. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict, contains the return value of `model.predict_step`, + it typically returns a dict with a key 'outputs' containing + the model's outputs. + """ + if self._should_call_predict_batch_hooks: + self._call_batch_hook(ModeKeys.PREDICT, "begin", batch, logs=logs) + + def on_predict_batch_end(self, batch, logs=None): + """Calls the `on_predict_batch_end` methods of its callbacks. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Aggregated metric results up until this batch. + """ + if self._should_call_predict_batch_hooks: + self._call_batch_hook(ModeKeys.PREDICT, "end", batch, logs=logs) + + def on_train_begin(self, logs=None): + """Calls the `on_train_begin` methods of its callbacks. + + Args: + logs: Dict. Currently, no data is passed via this argument + for this method, but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_train_begin(logs) + + def on_train_end(self, logs=None): + """Calls the `on_train_end` methods of its callbacks. + + Args: + logs: Dict. Currently, no data is passed via this argument + for this method, but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_train_end(logs) + + def on_test_begin(self, logs=None): + """Calls the `on_test_begin` methods of its callbacks. + + Args: + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_test_begin(logs) + + def on_test_end(self, logs=None): + """Calls the `on_test_end` methods of its callbacks. + + Args: + logs: Dict. Currently, no data is passed via this argument + for this method, but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_test_end(logs) + + def on_predict_begin(self, logs=None): + """Calls the 'on_predict_begin` methods of its callbacks. + + Args: + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_predict_begin(logs) + + def on_predict_end(self, logs=None): + """Calls the `on_predict_end` methods of its callbacks. + + Args: + logs: Dict. Currently, no data is passed via this argument + for this method, but that may change in the future. + """ + logs = self._process_logs(logs) + for callback in self.callbacks: + callback.on_predict_end(logs) + + def __iter__(self): + return iter(self.callbacks) + + def _disallow_batch_hooks_in_ps_strategy(self): + """Error out if batch-level callbacks are passed with PSStrategy.""" + + strategy = tf.distribute.get_strategy() + if strategy._should_use_with_coordinator: + unsupported_callbacks = [] + for cb in self.callbacks: + # These Callbacks can accept RemoteValues directly. + if getattr(cb, "_supports_tf_logs", False): + continue + if ( + cb._implements_train_batch_hooks() + or cb._implements_test_batch_hooks() + or cb._implements_predict_batch_hooks() + ): + unsupported_callbacks.append(cb) + if unsupported_callbacks: + raise ValueError( + "Batch-level `Callback`s are not supported with " + "`ParameterServerStrategy`. Found unsupported " + f"callbacks: {unsupported_callbacks}" + ) + + def make_logs(self, model, logs, outputs, mode, prefix=""): + """Computes logs for sending to `on_batch_end` methods.""" + if not self.callbacks: + return logs + + return make_logs(model, logs, outputs, mode, prefix=prefix) + + +@keras_export("keras.callbacks.Callback") +class Callback: + """Abstract base class used to build new callbacks. + + Callbacks can be passed to keras methods such as `fit`, `evaluate`, and + `predict` in order to hook into the various stages of the model training and + inference lifecycle. + + To create a custom callback, subclass `keras.callbacks.Callback` and + override the method associated with the stage of interest. See + https://www.tensorflow.org/guide/keras/custom_callback for more information. + + Example: + + >>> training_finished = False + >>> class MyCallback(tf.keras.callbacks.Callback): + ... def on_train_end(self, logs=None): + ... global training_finished + ... training_finished = True + >>> model = tf.keras.Sequential([ + ... tf.keras.layers.Dense(1, input_shape=(1,))]) + >>> model.compile(loss='mean_squared_error') + >>> model.fit(tf.constant([[1.0]]), tf.constant([[1.0]]), + ... callbacks=[MyCallback()]) + >>> assert training_finished == True + + If you want to use `Callback` objects in a custom training loop: + + 1. You should pack all your callbacks into a single `callbacks.CallbackList` + so they can all be called together. + 2. You will need to manually call all the `on_*` methods at the appropriate + locations in your loop. Like this: + + Example: + ```python + callbacks = tf.keras.callbacks.CallbackList([...]) + callbacks.append(...) + callbacks.on_train_begin(...) + for epoch in range(EPOCHS): + callbacks.on_epoch_begin(epoch) + for i, data in dataset.enumerate(): + callbacks.on_train_batch_begin(i) + batch_logs = model.train_step(data) + callbacks.on_train_batch_end(i, batch_logs) + epoch_logs = ... + callbacks.on_epoch_end(epoch, epoch_logs) + final_logs=... + callbacks.on_train_end(final_logs) + ``` - def on_test_batch_begin(self, batch, logs=None): - """Calls the `on_test_batch_begin` methods of its callbacks. + Attributes: + params: Dict. Training parameters + (eg. verbosity, batch size, number of epochs...). + model: Instance of `keras.models.Model`. + Reference of the model being trained. - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict, contains the return value of `model.test_step`. Typically, - the values of the `Model`'s metrics are returned. Example: - `{'loss': 0.2, 'accuracy': 0.7}`. + The `logs` dictionary that callback methods + take as argument will contain keys for quantities relevant to + the current batch or epoch (see method-specific docstrings). """ - if self._should_call_test_batch_hooks: - self._call_batch_hook(ModeKeys.TEST, 'begin', batch, logs=logs) - def on_test_batch_end(self, batch, logs=None): - """Calls the `on_test_batch_end` methods of its callbacks. + def __init__(self): + self.validation_data = None + self.model = None + # Whether this Callback should only run on the chief worker in a + # Multi-Worker setting. + # TODO(omalleyt): Make this attr public once solution is stable. + self._chief_worker_only = None + self._supports_tf_logs = False + + def set_params(self, params): + self.params = params + + def set_model(self, model): + self.model = model + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_batch_begin(self, batch, logs=None): + """A backwards compatibility alias for `on_train_batch_begin`.""" + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_batch_end(self, batch, logs=None): + """A backwards compatibility alias for `on_train_batch_end`.""" + + @doc_controls.for_subclass_implementers + def on_epoch_begin(self, epoch, logs=None): + """Called at the start of an epoch. + + Subclasses should override for any actions to run. This function should + only be called during TRAIN mode. + + Args: + epoch: Integer, index of epoch. + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + + @doc_controls.for_subclass_implementers + def on_epoch_end(self, epoch, logs=None): + """Called at the end of an epoch. + + Subclasses should override for any actions to run. This function should + only be called during TRAIN mode. + + Args: + epoch: Integer, index of epoch. + logs: Dict, metric results for this training epoch, and for the + validation epoch if validation is performed. Validation result + keys are prefixed with `val_`. For training epoch, the values of + the `Model`'s metrics are returned. Example: + `{'loss': 0.2, 'accuracy': 0.7}`. + """ + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_train_batch_begin(self, batch, logs=None): + """Called at the beginning of a training batch in `fit` methods. + + Subclasses should override for any actions to run. + + Note that if the `steps_per_execution` argument to `compile` in + `tf.keras.Model` is set to `N`, this method will only be called every + `N` batches. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + # For backwards compatibility. + self.on_batch_begin(batch, logs=logs) + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_train_batch_end(self, batch, logs=None): + """Called at the end of a training batch in `fit` methods. + + Subclasses should override for any actions to run. + + Note that if the `steps_per_execution` argument to `compile` in + `tf.keras.Model` is set to `N`, this method will only be called every + `N` batches. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Aggregated metric results up until this batch. + """ + # For backwards compatibility. + self.on_batch_end(batch, logs=logs) + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_test_batch_begin(self, batch, logs=None): + """Called at the beginning of a batch in `evaluate` methods. + + Also called at the beginning of a validation batch in the `fit` + methods, if validation data is provided. + + Subclasses should override for any actions to run. + + Note that if the `steps_per_execution` argument to `compile` in + `tf.keras.Model` is set to `N`, this method will only be called every + `N` batches. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_test_batch_end(self, batch, logs=None): + """Called at the end of a batch in `evaluate` methods. + + Also called at the end of a validation batch in the `fit` + methods, if validation data is provided. + + Subclasses should override for any actions to run. + + Note that if the `steps_per_execution` argument to `compile` in + `tf.keras.Model` is set to `N`, this method will only be called every + `N` batches. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Aggregated metric results up until this batch. + """ + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_predict_batch_begin(self, batch, logs=None): + """Called at the beginning of a batch in `predict` methods. + + Subclasses should override for any actions to run. + + Note that if the `steps_per_execution` argument to `compile` in + `tf.keras.Model` is set to `N`, this method will only be called every + `N` batches. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + + @doc_controls.for_subclass_implementers + @generic_utils.default + def on_predict_batch_end(self, batch, logs=None): + """Called at the end of a batch in `predict` methods. + + Subclasses should override for any actions to run. + + Note that if the `steps_per_execution` argument to `compile` in + `tf.keras.Model` is set to `N`, this method will only be called every + `N` batches. + + Args: + batch: Integer, index of batch within the current epoch. + logs: Dict. Aggregated metric results up until this batch. + """ + + @doc_controls.for_subclass_implementers + def on_train_begin(self, logs=None): + """Called at the beginning of training. + + Subclasses should override for any actions to run. + + Args: + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + + @doc_controls.for_subclass_implementers + def on_train_end(self, logs=None): + """Called at the end of training. - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - if self._should_call_test_batch_hooks: - self._call_batch_hook(ModeKeys.TEST, 'end', batch, logs=logs) + Subclasses should override for any actions to run. + + Args: + logs: Dict. Currently the output of the last call to + `on_epoch_end()` is passed to this argument for this method but + that may change in the future. + """ + + @doc_controls.for_subclass_implementers + def on_test_begin(self, logs=None): + """Called at the beginning of evaluation or validation. - def on_predict_batch_begin(self, batch, logs=None): - """Calls the `on_predict_batch_begin` methods of its callbacks. + Subclasses should override for any actions to run. + + Args: + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict, contains the return value of `model.predict_step`, - it typically returns a dict with a key 'outputs' containing - the model's outputs. - """ - if self._should_call_predict_batch_hooks: - self._call_batch_hook(ModeKeys.PREDICT, 'begin', batch, logs=logs) - - def on_predict_batch_end(self, batch, logs=None): - """Calls the `on_predict_batch_end` methods of its callbacks. + @doc_controls.for_subclass_implementers + def on_test_end(self, logs=None): + """Called at the end of evaluation or validation. - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - if self._should_call_predict_batch_hooks: - self._call_batch_hook(ModeKeys.PREDICT, 'end', batch, logs=logs) + Subclasses should override for any actions to run. + + Args: + logs: Dict. Currently the output of the last call to + `on_test_batch_end()` is passed to this argument for this method + but that may change in the future. + """ - def on_train_begin(self, logs=None): - """Calls the `on_train_begin` methods of its callbacks. + @doc_controls.for_subclass_implementers + def on_predict_begin(self, logs=None): + """Called at the beginning of prediction. - Args: - logs: Dict. Currently, no data is passed via this argument - for this method, but that may change in the future. - """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_train_begin(logs) + Subclasses should override for any actions to run. - def on_train_end(self, logs=None): - """Calls the `on_train_end` methods of its callbacks. + Args: + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + + @doc_controls.for_subclass_implementers + def on_predict_end(self, logs=None): + """Called at the end of prediction. + + Subclasses should override for any actions to run. + + Args: + logs: Dict. Currently no data is passed to this argument for this + method but that may change in the future. + """ + + def _implements_train_batch_hooks(self): + """Determines if this Callback should be called for each train batch.""" + return ( + not generic_utils.is_default(self.on_batch_begin) + or not generic_utils.is_default(self.on_batch_end) + or not generic_utils.is_default(self.on_train_batch_begin) + or not generic_utils.is_default(self.on_train_batch_end) + ) + + def _implements_test_batch_hooks(self): + """Determines if this Callback should be called for each test batch.""" + return not generic_utils.is_default( + self.on_test_batch_begin + ) or not generic_utils.is_default(self.on_test_batch_end) - Args: - logs: Dict. Currently, no data is passed via this argument - for this method, but that may change in the future. - """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_train_end(logs) + def _implements_predict_batch_hooks(self): + """Determines if this Callback should be called for each predict + batch.""" + return not generic_utils.is_default( + self.on_predict_batch_begin + ) or not generic_utils.is_default(self.on_predict_batch_end) - def on_test_begin(self, logs=None): - """Calls the `on_test_begin` methods of its callbacks. - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_test_begin(logs) +@keras_export("keras.callbacks.BaseLogger") +class BaseLogger(Callback): + """Callback that accumulates epoch averages of metrics. - def on_test_end(self, logs=None): - """Calls the `on_test_end` methods of its callbacks. + This callback is automatically applied to every Keras model. Args: - logs: Dict. Currently, no data is passed via this argument - for this method, but that may change in the future. + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over an epoch. + Metrics in this list will be logged as-is in `on_epoch_end`. + All others will be averaged in `on_epoch_end`. """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_test_end(logs) - def on_predict_begin(self, logs=None): - """Calls the 'on_predict_begin` methods of its callbacks. + def __init__(self, stateful_metrics=None): + super().__init__() + self.stateful_metrics = set(stateful_metrics or []) + + def on_epoch_begin(self, epoch, logs=None): + self.seen = 0 + self.totals = {} + + def on_batch_end(self, batch, logs=None): + logs = logs or {} + batch_size = logs.get("size", 0) + # In case of distribution strategy we can potentially run multiple steps + # at the same time, we should account for that in the `seen` + # calculation. + num_steps = logs.get("num_steps", 1) + self.seen += batch_size * num_steps + + for k, v in logs.items(): + if k in self.stateful_metrics: + self.totals[k] = v + else: + if k in self.totals: + self.totals[k] += v * batch_size + else: + self.totals[k] = v * batch_size + + def on_epoch_end(self, epoch, logs=None): + if logs is not None: + for k in self.params["metrics"]: + if k in self.totals: + # Make value available to next callbacks. + if k in self.stateful_metrics: + logs[k] = self.totals[k] + else: + logs[k] = self.totals[k] / self.seen + + +@keras_export("keras.callbacks.TerminateOnNaN") +class TerminateOnNaN(Callback): + """Callback that terminates training when a NaN loss is encountered.""" + + def __init__(self): + super().__init__() + self._supports_tf_logs = True + + def on_batch_end(self, batch, logs=None): + logs = logs or {} + loss = logs.get("loss") + if loss is not None: + loss = tf_utils.sync_to_numpy_or_python_type(loss) + if np.isnan(loss) or np.isinf(loss): + io_utils.print_msg( + f"Batch {batch}: Invalid loss, terminating training" + ) + self.model.stop_training = True - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_predict_begin(logs) - def on_predict_end(self, logs=None): - """Calls the `on_predict_end` methods of its callbacks. +@keras_export("keras.callbacks.ProgbarLogger") +class ProgbarLogger(Callback): + """Callback that prints metrics to stdout. Args: - logs: Dict. Currently, no data is passed via this argument - for this method, but that may change in the future. - """ - logs = self._process_logs(logs) - for callback in self.callbacks: - callback.on_predict_end(logs) - - def __iter__(self): - return iter(self.callbacks) - - def _disallow_batch_hooks_in_ps_strategy(self): - """Error out if batch-level callbacks are passed with PSStrategy.""" - # pylint: disable=protected-access - strategy = tf.distribute.get_strategy() - if strategy._should_use_with_coordinator: - unsupported_callbacks = [] - for cb in self.callbacks: - # These Callbacks can accept RemoteValues directly. - if getattr(cb, '_supports_tf_logs', False): - continue - if (cb._implements_train_batch_hooks() or - cb._implements_test_batch_hooks() or - cb._implements_predict_batch_hooks()): - unsupported_callbacks.append(cb) - if unsupported_callbacks: - raise ValueError( - 'Batch-level `Callback`s are not supported with ' - '`ParameterServerStrategy`. Found unsupported ' - f'callbacks: {unsupported_callbacks}') - # pylint: enable=protected-access - - -@keras_export('keras.callbacks.Callback') -class Callback: - """Abstract base class used to build new callbacks. - - Callbacks can be passed to keras methods such as `fit`, `evaluate`, and - `predict` in order to hook into the various stages of the model training and - inference lifecycle. - - To create a custom callback, subclass `keras.callbacks.Callback` and override - the method associated with the stage of interest. See - https://www.tensorflow.org/guide/keras/custom_callback for more information. - - Example: - - >>> training_finished = False - >>> class MyCallback(tf.keras.callbacks.Callback): - ... def on_train_end(self, logs=None): - ... global training_finished - ... training_finished = True - >>> model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))]) - >>> model.compile(loss='mean_squared_error') - >>> model.fit(tf.constant([[1.0]]), tf.constant([[1.0]]), - ... callbacks=[MyCallback()]) - >>> assert training_finished == True - - If you want to use `Callback` objects in a custom training loop: - - 1. You should pack all your callbacks into a single `callbacks.CallbackList` - so they can all be called together. - 2. You will need to manually call all the `on_*` methods at the appropriate - locations in your loop. Like this: - - ``` - callbacks = tf.keras.callbacks.CallbackList([...]) - callbacks.append(...) - - callbacks.on_train_begin(...) - for epoch in range(EPOCHS): - callbacks.on_epoch_begin(epoch) - for i, data in dataset.enumerate(): - callbacks.on_train_batch_begin(i) - batch_logs = model.train_step(data) - callbacks.on_train_batch_end(i, batch_logs) - epoch_logs = ... - callbacks.on_epoch_end(epoch, epoch_logs) - final_logs=... - callbacks.on_train_end(final_logs) - ``` - - Attributes: - params: Dict. Training parameters - (eg. verbosity, batch size, number of epochs...). - model: Instance of `keras.models.Model`. - Reference of the model being trained. - - The `logs` dictionary that callback methods - take as argument will contain keys for quantities relevant to - the current batch or epoch (see method-specific docstrings). - """ - - def __init__(self): - self.validation_data = None # pylint: disable=g-missing-from-attributes - self.model = None - # Whether this Callback should only run on the chief worker in a - # Multi-Worker setting. - # TODO(omalleyt): Make this attr public once solution is stable. - self._chief_worker_only = None - self._supports_tf_logs = False - - def set_params(self, params): - self.params = params - - def set_model(self, model): - self.model = model - - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_batch_begin(self, batch, logs=None): - """A backwards compatibility alias for `on_train_batch_begin`.""" - - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_batch_end(self, batch, logs=None): - """A backwards compatibility alias for `on_train_batch_end`.""" - - @doc_controls.for_subclass_implementers - def on_epoch_begin(self, epoch, logs=None): - """Called at the start of an epoch. - - Subclasses should override for any actions to run. This function should only - be called during TRAIN mode. + count_mode: One of `"steps"` or `"samples"`. + Whether the progress bar should + count samples seen or steps (batches) seen. + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over an epoch. + Metrics in this list will be logged as-is. + All others will be averaged over time (e.g. loss, etc). + If not provided, defaults to the `Model`'s metrics. - Args: - epoch: Integer, index of epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. + Raises: + ValueError: In case of invalid `count_mode`. """ - @doc_controls.for_subclass_implementers - def on_epoch_end(self, epoch, logs=None): - """Called at the end of an epoch. + def __init__(self, count_mode: str = "samples", stateful_metrics=None): + super().__init__() + self._supports_tf_logs = True + if count_mode == "samples": + self.use_steps = False + elif count_mode == "steps": + self.use_steps = True + else: + raise ValueError( + f"Unknown `count_mode`: {count_mode}. " + 'Expected values are ["samples", "steps"]' + ) + # Defaults to all Model's metrics except for loss. + self.stateful_metrics = ( + set(stateful_metrics) if stateful_metrics else set() + ) + + self.seen = 0 + self.progbar = None + self.target = None + self.verbose = 1 + self.epochs = 1 + + self._train_step, self._test_step, self._predict_step = None, None, None + self._call_batch_hooks = True - Subclasses should override for any actions to run. This function should only - be called during TRAIN mode. + self._called_in_fit = False - Args: - epoch: Integer, index of epoch. - logs: Dict, metric results for this training epoch, and for the - validation epoch if validation is performed. Validation result keys - are prefixed with `val_`. For training epoch, the values of the - `Model`'s metrics are returned. Example : `{'loss': 0.2, 'accuracy': - 0.7}`. - """ + def set_params(self, params): + self.verbose = params["verbose"] + self.epochs = params["epochs"] + if self.use_steps and "steps" in params: + self.target = params["steps"] + elif not self.use_steps and "samples" in params: + self.target = params["samples"] + else: + self.target = ( + None # Will be inferred at the end of the first epoch. + ) + + self._call_batch_hooks = self.verbose == 1 + if self.target is None: + try: + self._train_step = self.model._train_counter + self._test_step = self.model._test_counter + self._predict_step = self.model._predict_counter + except AttributeError: + self._call_batch_hooks = True + + def on_train_begin(self, logs=None): + # When this logger is called inside `fit`, validation is silent. + self._called_in_fit = True + + def on_test_begin(self, logs=None): + if not self._called_in_fit: + self._reset_progbar() + self._maybe_init_progbar() + + def on_predict_begin(self, logs=None): + self._reset_progbar() + self._maybe_init_progbar() + + def on_epoch_begin(self, epoch, logs=None): + self._reset_progbar() + self._maybe_init_progbar() + if self.verbose and self.epochs > 1: + io_utils.print_msg(f"Epoch {epoch + 1}/{self.epochs}") + + def on_train_batch_end(self, batch, logs=None): + self._batch_update_progbar(batch, logs) + + def on_test_batch_end(self, batch, logs=None): + if not self._called_in_fit: + self._batch_update_progbar(batch, logs) + + def on_predict_batch_end(self, batch, logs=None): + # Don't pass prediction results. + self._batch_update_progbar(batch, None) + + def on_epoch_end(self, epoch, logs=None): + self._finalize_progbar(logs, self._train_step) + + def on_test_end(self, logs=None): + if not self._called_in_fit: + self._finalize_progbar(logs, self._test_step) + + def on_predict_end(self, logs=None): + self._finalize_progbar(logs, self._predict_step) + + def _reset_progbar(self): + self.seen = 0 + self.progbar = None + + def _maybe_init_progbar(self): + """Instantiate a `Progbar` if not yet, and update the stateful + metrics.""" + # TODO(rchao): Legacy TF1 code path may use list for + # `self.stateful_metrics`. Remove "cast to set" when TF1 support is + # dropped. + self.stateful_metrics = set(self.stateful_metrics) + + if self.model: + # Update the existing stateful metrics as `self.model.metrics` may + # contain updated metrics after `MetricsContainer` is built in the + # first train step. + self.stateful_metrics = self.stateful_metrics.union( + set(m.name for m in self.model.metrics) + ) + + if self.progbar is None: + self.progbar = Progbar( + target=self.target, + verbose=self.verbose, + stateful_metrics=self.stateful_metrics, + unit_name="step" if self.use_steps else "sample", + ) + + self.progbar._update_stateful_metrics(self.stateful_metrics) + + def _implements_train_batch_hooks(self): + return self._call_batch_hooks + + def _implements_test_batch_hooks(self): + return self._call_batch_hooks + + def _implements_predict_batch_hooks(self): + return self._call_batch_hooks + + def _batch_update_progbar(self, batch, logs=None): + """Updates the progbar.""" + logs = logs or {} + self._maybe_init_progbar() + if self.use_steps: + self.seen = batch + 1 # One-indexed. + else: + # v1 path only. + logs = copy.copy(logs) + batch_size = logs.pop("size", 0) + num_steps = logs.pop("num_steps", 1) + logs.pop("batch", None) + add_seen = num_steps * batch_size + self.seen += add_seen + + if self.verbose == 1: + # Only block async when verbose = 1. + logs = tf_utils.sync_to_numpy_or_python_type(logs) + self.progbar.update(self.seen, list(logs.items()), finalize=False) + + def _finalize_progbar(self, logs, counter): + logs = tf_utils.sync_to_numpy_or_python_type(logs or {}) + if self.target is None: + if counter is not None: + counter = counter.numpy() + if not self.use_steps: + counter *= logs.get("size", 1) + self.target = counter or self.seen + self.progbar.target = self.target + self.progbar.update(self.target, list(logs.items()), finalize=True) + + +@keras_export("keras.callbacks.History") +class History(Callback): + """Callback that records events into a `History` object. - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_train_batch_begin(self, batch, logs=None): - """Called at the beginning of a training batch in `fit` methods. + This callback is automatically applied to + every Keras model. The `History` object + gets returned by the `fit` method of models. - Subclasses should override for any actions to run. + Example: - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. + >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') + >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + ... epochs=10, verbose=1) + >>> print(history.params) + {'verbose': 1, 'epochs': 10, 'steps': 1} + >>> # check the keys of history object + >>> print(history.history.keys()) + dict_keys(['loss']) - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. """ - # For backwards compatibility. - self.on_batch_begin(batch, logs=logs) - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_train_batch_end(self, batch, logs=None): - """Called at the end of a training batch in `fit` methods. + def __init__(self): + super().__init__() + self.history = {} - Subclasses should override for any actions to run. + def on_train_begin(self, logs=None): + self.epoch = [] - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + self.epoch.append(epoch) + for k, v in logs.items(): + self.history.setdefault(k, []).append(v) - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - # For backwards compatibility. - self.on_batch_end(batch, logs=logs) + # Set the history attribute on the model after the epoch ends. This will + # make sure that the state which is set is the latest one. + self.model.history = self - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_test_batch_begin(self, batch, logs=None): - """Called at the beginning of a batch in `evaluate` methods. - Also called at the beginning of a validation batch in the `fit` - methods, if validation data is provided. - - Subclasses should override for any actions to run. +@keras_export("keras.callbacks.ModelCheckpoint") +class ModelCheckpoint(Callback): + """Callback to save the Keras model or model weights at some frequency. - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. + `ModelCheckpoint` callback is used in conjunction with training using + `model.fit()` to save a model or weights (in a checkpoint file) at some + interval, so the model or weights can be loaded later to continue the + training from the state saved. - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ + A few options this callback provides include: - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_test_batch_end(self, batch, logs=None): - """Called at the end of a batch in `evaluate` methods. + - Whether to only keep the model that has achieved the "best performance" so + far, or whether to save the model at the end of every epoch regardless of + performance. + - Definition of 'best'; which quantity to monitor and whether it should be + maximized or minimized. + - The frequency it should save at. Currently, the callback supports saving + at the end of every epoch, or after a fixed number of training batches. + - Whether only weights are saved, or the whole model is saved. - Also called at the end of a validation batch in the `fit` - methods, if validation data is provided. + Note: If you get `WARNING:tensorflow:Can save best model only with + available, skipping` see the description of the `monitor` argument for + details on how to get this right. - Subclasses should override for any actions to run. + Example: - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. + ```python + model.compile(loss=..., optimizer=..., + metrics=['accuracy']) + + EPOCHS = 10 + checkpoint_filepath = '/tmp/checkpoint' + model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( + filepath=checkpoint_filepath, + save_weights_only=True, + monitor='val_accuracy', + mode='max', + save_best_only=True) + + # Model weights are saved at the end of every epoch, if it's the best seen + # so far. + model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback]) + + # The model weights (that are considered the best) are loaded into the + # model. + model.load_weights(checkpoint_filepath) + ``` Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. + filepath: string or `PathLike`, path to save the model file. e.g. + filepath = os.path.join(working_dir, 'ckpt', file_name). `filepath` + can contain named formatting options, which will be filled the value + of `epoch` and keys in `logs` (passed in `on_epoch_end`). For example: + if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, then the + model checkpoints will be saved with the epoch number and the + validation loss in the filename. The directory of the filepath should + not be reused by any other callbacks to avoid conflicts. + monitor: The metric name to monitor. Typically the metrics are set by + the `Model.compile` method. Note: + + * Prefix the name with `"val_`" to monitor validation metrics. + * Use `"loss"` or "`val_loss`" to monitor the model's total loss. + * If you specify metrics as strings, like `"accuracy"`, pass the same + string (with or without the `"val_"` prefix). + * If you pass `metrics.Metric` objects, `monitor` should be set to + `metric.name` + * If you're not sure about the metric names you can check the contents + of the `history.history` dictionary returned by + `history = model.fit()` + * Multi-output models set additional prefixes on the metric names. + + verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 + displays messages when the callback takes an action. + save_best_only: if `save_best_only=True`, it only saves when the model + is considered the "best" and the latest best model according to the + quantity monitored will not be overwritten. If `filepath` doesn't + contain formatting options like `{epoch}` then `filepath` will be + overwritten by each new better model. + mode: one of {'auto', 'min', 'max'}. If `save_best_only=True`, the + decision to overwrite the current save file is made based on either + the maximization or the minimization of the monitored quantity. + For `val_acc`, this should be `max`, for `val_loss` this should be + `min`, etc. In `auto` mode, the mode is set to `max` if the quantities + monitored are 'acc' or start with 'fmeasure' and are set to `min` for + the rest of the quantities. + save_weights_only: if True, then only the model's weights will be saved + (`model.save_weights(filepath)`), else the full model is saved + (`model.save(filepath)`). + save_freq: `'epoch'` or integer. When using `'epoch'`, the callback + saves the model after each epoch. When using integer, the callback + saves the model at end of this many batches. If the `Model` is + compiled with `steps_per_execution=N`, then the saving criteria will + be checked every Nth batch. Note that if the saving isn't aligned to + epochs, the monitored metric may potentially be less reliable (it + could reflect as little as 1 batch, since the metrics get reset every + epoch). Defaults to `'epoch'`. + options: Optional `tf.train.CheckpointOptions` object if + `save_weights_only` is true or optional `tf.saved_model.SaveOptions` + object if `save_weights_only` is false. + initial_value_threshold: Floating point initial "best" value of the + metric to be monitored. Only applies if `save_best_value=True`. Only + overwrites the model weights already saved if the performance of + current model is better than this value. + **kwargs: Additional arguments for backwards compatibility. Possible key + is `period`. """ - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_predict_batch_begin(self, batch, logs=None): - """Called at the beginning of a batch in `predict` methods. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ + def __init__( + self, + filepath, + monitor: str = "val_loss", + verbose: int = 0, + save_best_only: bool = False, + save_weights_only: bool = False, + mode: str = "auto", + save_freq="epoch", + options=None, + initial_value_threshold=None, + **kwargs, + ): + super().__init__() + self._supports_tf_logs = True + self.monitor = monitor + self.verbose = verbose + self.filepath = io_utils.path_to_string(filepath) + self.save_best_only = save_best_only + self.save_weights_only = save_weights_only + self.save_freq = save_freq + self.epochs_since_last_save = 0 + self._batches_seen_since_last_saving = 0 + self._last_batch_seen = -1 + self.best = initial_value_threshold + + if save_weights_only: + if options is None or isinstance( + options, tf.train.CheckpointOptions + ): + self._options = options or tf.train.CheckpointOptions() + else: + raise TypeError( + "If save_weights_only is True, then `options` must be " + "either None or a tf.train.CheckpointOptions. " + f"Got {options}." + ) + else: + if filepath and filepath.endswith(".keras") and options is not None: + raise ValueError( + "The native Keras format does not support " + "the `options` argument. Please remove " + "the `options` argument, or use the SavedModel " + "format by removing the `.keras` extension from " + "the model filepath." + ) + if options is None or isinstance( + options, tf.saved_model.SaveOptions + ): + self._options = options or tf.saved_model.SaveOptions() + else: + raise TypeError( + "If save_weights_only is False, then `options` must be " + "either None or a tf.saved_model.SaveOptions. " + f"Got {options}." + ) + + # Deprecated field `load_weights_on_restart` is for loading the + # checkpoint file from `filepath` at the start of `model.fit()` + # TODO(rchao): Remove the arg during next breaking release. + if "load_weights_on_restart" in kwargs: + self.load_weights_on_restart = kwargs["load_weights_on_restart"] + logging.warning( + "`load_weights_on_restart` argument is deprecated. " + "Please use `model.load_weights()` for loading weights " + "before the start of `model.fit()`." + ) + else: + self.load_weights_on_restart = False + + # Deprecated field `period` is for the number of epochs between which + # the model is saved. + if "period" in kwargs: + self.period = kwargs["period"] + logging.warning( + "`period` argument is deprecated. Please use `save_freq` " + "to specify the frequency in number of batches seen." + ) + else: + self.period = 1 + + if mode not in ["auto", "min", "max"]: + logging.warning( + "ModelCheckpoint mode %s is unknown, fallback to auto mode.", + mode, + ) + mode = "auto" + + if mode == "min": + self.monitor_op = np.less + if self.best is None: + self.best = np.Inf + elif mode == "max": + self.monitor_op = np.greater + if self.best is None: + self.best = -np.Inf + else: + if "acc" in self.monitor or self.monitor.startswith("fmeasure"): + self.monitor_op = np.greater + if self.best is None: + self.best = -np.Inf + else: + self.monitor_op = np.less + if self.best is None: + self.best = np.Inf + + if self.save_freq != "epoch" and not isinstance(self.save_freq, int): + raise ValueError( + f"Unrecognized save_freq: {self.save_freq}. " + 'Expected save_freq are "epoch" or integer' + ) + + # Only the chief worker writes model checkpoints, but all workers + # restore checkpoint at on_train_begin(). + self._chief_worker_only = False + + def on_train_begin(self, logs=None): + if self.load_weights_on_restart: + filepath_to_load = ( + self._get_most_recently_modified_file_matching_pattern( + self.filepath + ) + ) + if filepath_to_load is not None and self._checkpoint_exists( + filepath_to_load + ): + try: + # `filepath` may contain placeholders such as `{epoch:02d}`, + # and thus it attempts to load the most recently modified + # file with file name matching the pattern. + self.model.load_weights(filepath_to_load) + except (IOError, ValueError) as e: + raise ValueError( + f"Error loading file from {filepath_to_load}. " + f"Reason: {e}" + ) + + def _implements_train_batch_hooks(self): + # Only call batch hooks when saving on batch + return self.save_freq != "epoch" + + def on_train_batch_end(self, batch, logs=None): + if self._should_save_on_batch(batch): + self._save_model(epoch=self._current_epoch, batch=batch, logs=logs) + + def on_epoch_begin(self, epoch, logs=None): + self._current_epoch = epoch + + def on_epoch_end(self, epoch, logs=None): + self.epochs_since_last_save += 1 + + if self.save_freq == "epoch": + self._save_model(epoch=epoch, batch=None, logs=logs) + + def _should_save_on_batch(self, batch): + """Handles batch-level saving logic, supports steps_per_execution.""" + if self.save_freq == "epoch": + return False + + if batch <= self._last_batch_seen: # New epoch. + add_batches = batch + 1 # batches are zero-indexed. + else: + add_batches = batch - self._last_batch_seen + self._batches_seen_since_last_saving += add_batches + self._last_batch_seen = batch + + if self._batches_seen_since_last_saving >= self.save_freq: + self._batches_seen_since_last_saving = 0 + return True + return False + + def _save_model(self, epoch, batch, logs): + """Saves the model. + + Args: + epoch: the epoch this iteration is in. + batch: the batch this iteration is in. `None` if the `save_freq` + is set to `epoch`. + logs: the `logs` dict passed in to `on_batch_end` or `on_epoch_end`. + """ + logs = logs or {} + + if ( + isinstance(self.save_freq, int) + or self.epochs_since_last_save >= self.period + ): + # Block only when saving interval is reached. + logs = tf_utils.sync_to_numpy_or_python_type(logs) + self.epochs_since_last_save = 0 + filepath = self._get_file_path(epoch, batch, logs) + + dirname = os.path.dirname(filepath) + if ( + dirname + and not dirname.startswith("gs://") + and not tf.io.gfile.exists(dirname) + ): + tf.io.gfile.makedirs(dirname) + + try: + if self.save_best_only: + current = logs.get(self.monitor) + if current is None: + logging.warning( + "Can save best model only with %s available, " + "skipping.", + self.monitor, + ) + else: + if self.monitor_op(current, self.best): + if self.verbose > 0: + io_utils.print_msg( + f"\nEpoch {epoch + 1}: {self.monitor} " + "improved " + f"from {self.best:.5f} to {current:.5f}, " + f"saving model to {filepath}" + ) + self.best = current + if self.save_weights_only: + self.model.save_weights( + filepath, + overwrite=True, + options=self._options, + ) + else: + self.model.save( + filepath, + overwrite=True, + options=self._options, + ) + else: + if self.verbose > 0: + io_utils.print_msg( + f"\nEpoch {epoch + 1}: " + f"{self.monitor} did not improve " + f"from {self.best:.5f}" + ) + else: + if self.verbose > 0: + io_utils.print_msg( + f"\nEpoch {epoch + 1}: saving model to {filepath}" + ) + if self.save_weights_only: + self.model.save_weights( + filepath, overwrite=True, options=self._options + ) + elif filepath.endswith(".keras"): + self.model.save(filepath, overwrite=True) + else: + self.model.save( + filepath, overwrite=True, options=self._options + ) + + self._maybe_remove_file() + except IsADirectoryError: # h5py 3.x + raise IOError( + "Please specify a non-directory filepath for " + "ModelCheckpoint. Filepath used is an existing " + f"directory: {filepath}" + ) + except IOError as e: # h5py 2.x + # `e.errno` appears to be `None` so checking the content of + # `e.args[0]`. + if "is a directory" in str(e.args[0]).lower(): + raise IOError( + "Please specify a non-directory filepath for " + "ModelCheckpoint. Filepath used is an existing " + f"directory: f{filepath}" + ) + # Re-throw the error for any other causes. + raise e + + def _get_file_path(self, epoch, batch, logs): + """Returns the file path for checkpoint.""" - @doc_controls.for_subclass_implementers - @generic_utils.default - def on_predict_batch_end(self, batch, logs=None): - """Called at the end of a batch in `predict` methods. + try: + # `filepath` may contain placeholders such as + # `{epoch:02d}`,`{batch:02d}` and `{mape:.2f}`. A mismatch between + # logged metrics and the path's placeholders can cause formatting to + # fail. + if batch is None or "batch" in logs: + file_path = self.filepath.format(epoch=epoch + 1, **logs) + else: + file_path = self.filepath.format( + epoch=epoch + 1, batch=batch + 1, **logs + ) + except KeyError as e: + raise KeyError( + f'Failed to format this callback filepath: "{self.filepath}". ' + f"Reason: {e}" + ) + self._write_filepath = distributed_file_utils.write_filepath( + file_path, self.model.distribute_strategy + ) + return self._write_filepath + + def _maybe_remove_file(self): + # Remove the checkpoint directory in multi-worker training where this + # worker should not checkpoint. It is a dummy directory previously saved + # for sync distributed training. + distributed_file_utils.remove_temp_dir_with_filepath( + self._write_filepath, self.model.distribute_strategy + ) + + def _checkpoint_exists(self, filepath): + """Returns whether the checkpoint `filepath` refers to exists.""" + if filepath.endswith(".h5"): + return tf.io.gfile.exists(filepath) + tf_saved_model_exists = tf.io.gfile.exists(filepath) + tf_weights_only_checkpoint_exists = tf.io.gfile.exists( + filepath + ".index" + ) + return tf_saved_model_exists or tf_weights_only_checkpoint_exists + + def _get_most_recently_modified_file_matching_pattern(self, pattern): + """Returns the most recently modified filepath matching pattern. + + Pattern may contain python formatting placeholder. If + `tf.train.latest_checkpoint()` does not return None, use that; + otherwise, check for most recently modified one that matches the + pattern. + + In the rare case where there are more than one pattern-matching file + having the same modified time that is most recent among all, return the + filepath that is largest (by `>` operator, lexicographically using the + numeric equivalents). This provides a tie-breaker when multiple files + are most recent. Note that a larger `filepath` can sometimes indicate a + later time of modification (for instance, when epoch/batch is used as + formatting option), but not necessarily (when accuracy or loss is used). + The tie-breaker is put in the logic as best effort to return the most + recent, and to avoid undeterministic result. + + Modified time of a file is obtained with `os.path.getmtime()`. + + This utility function is best demonstrated via an example: + + ```python + file_pattern = 'f.batch{batch:02d}epoch{epoch:02d}.h5' + test_dir = self.get_temp_dir() + path_pattern = os.path.join(test_dir, file_pattern) + file_paths = [ + os.path.join(test_dir, file_name) for file_name in + ['f.batch03epoch02.h5', + 'f.batch02epoch02.h5', 'f.batch01epoch01.h5'] + ] + for file_path in file_paths: + # Write something to each of the files + self.assertEqual( + _get_most_recently_modified_file_matching_pattern(path_pattern), + file_paths[-1]) + ``` + + Args: + pattern: The file pattern that may optionally contain python + placeholder such as `{epoch:02d}`. + + Returns: + The most recently modified file's full filepath matching `pattern`. + If `pattern` does not contain any placeholder, this returns the + filepath that exactly matches `pattern`. Returns `None` if no match + is found. + """ + dir_name = os.path.dirname(pattern) + base_name = os.path.basename(pattern) + base_name_regex = "^" + re.sub(r"{.*}", r".*", base_name) + "$" + + # If tf.train.latest_checkpoint tells us there exists a latest + # checkpoint, use that as it is more robust than `os.path.getmtime()`. + latest_tf_checkpoint = tf.train.latest_checkpoint(dir_name) + if latest_tf_checkpoint is not None and re.match( + base_name_regex, os.path.basename(latest_tf_checkpoint) + ): + return latest_tf_checkpoint + + latest_mod_time = 0 + file_path_with_latest_mod_time = None + n_file_with_latest_mod_time = 0 + file_path_with_largest_file_name = None + + if tf.io.gfile.exists(dir_name): + for file_name in os.listdir(dir_name): + # Only consider if `file_name` matches the pattern. + if re.match(base_name_regex, file_name): + file_path = os.path.join(dir_name, file_name) + mod_time = os.path.getmtime(file_path) + if ( + file_path_with_largest_file_name is None + or file_path > file_path_with_largest_file_name + ): + file_path_with_largest_file_name = file_path + if mod_time > latest_mod_time: + latest_mod_time = mod_time + file_path_with_latest_mod_time = file_path + # In the case a file with later modified time is found, + # reset the counter for the number of files with latest + # modified time. + n_file_with_latest_mod_time = 1 + elif mod_time == latest_mod_time: + # In the case a file has modified time tied with the + # most recent, increment the counter for the number of + # files with latest modified time by 1. + n_file_with_latest_mod_time += 1 + + if n_file_with_latest_mod_time == 1: + # Return the sole file that has most recent modified time. + return file_path_with_latest_mod_time + else: + # If there are more than one file having latest modified time, + # return the file path with the largest file name. + return file_path_with_largest_file_name - Subclasses should override for any actions to run. - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. +@keras_export("keras.callbacks.BackupAndRestore", v1=[]) +class BackupAndRestore(Callback): + """Callback to back up and restore the training state. + + `BackupAndRestore` callback is intended to recover training from an + interruption that has happened in the middle of a `Model.fit` execution, by + backing up the training states in a temporary checkpoint file (with the help + of a `tf.train.CheckpointManager`), at the end of each epoch. Each backup + overwrites the previously written checkpoint file, so at any given time + there is at most one such checkpoint file for backup/restoring purpose. + + If training restarts before completion, the training state (which includes + the `Model` weights and epoch number) is restored to the most recently saved + state at the beginning of a new `Model.fit` run. At the completion of a + `Model.fit` run, the temporary checkpoint file is deleted. + + Note that the user is responsible to bring jobs back after the interruption. + This callback is important for the backup and restore mechanism for fault + tolerance purpose, and the model to be restored from a previous checkpoint + is expected to be the same as the one used to back up. If user changes + arguments passed to compile or fit, the checkpoint saved for fault tolerance + can become invalid. + + Note: + + 1. This callback is not compatible with eager execution disabled. + 2. A checkpoint is saved at the end of each epoch. After restoring, + `Model.fit` redoes any partial work during the unfinished epoch in which the + training got restarted (so the work done before the interruption doesn't + affect the final model state). + 3. This works for both single worker and multi-worker modes. When + `Model.fit` is used with `tf.distribute`, it supports + `tf.distribute.MirroredStrategy`, + `tf.distribute.MultiWorkerMirroredStrategy`, `tf.distribute.TPUStrategy`, + and `tf.distribute.experimental.ParameterServerStrategy`. + + Example: + + >>> class InterruptingCallback(tf.keras.callbacks.Callback): + ... def on_epoch_begin(self, epoch, logs=None): + ... if epoch == 4: + ... raise RuntimeError('Interrupting!') + >>> callback = tf.keras.callbacks.BackupAndRestore(backup_dir="/tmp/backup") + >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') + >>> try: + ... model.fit(np.arange(100).reshape(5, 20), np.zeros(5), epochs=10, + ... batch_size=1, callbacks=[callback, InterruptingCallback()], + ... verbose=0) + ... except: + ... pass + >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + ... epochs=10, batch_size=1, callbacks=[callback], + ... verbose=0) + >>> # Only 6 more epochs are run, since first training got interrupted at + >>> # zero-indexed epoch 4, second training will continue from 4 to 9. + >>> len(history.history['loss']) + 6 + + Besides the option to save at the end of every epoch or every N steps, if + you are doing distributed training with + `tf.distribute.MultiWorkerMirroredStrategy` on Google Cloud Platform or + Google Borg, you can also use the `save_before_preemption` argument + to enable saving a checkpoint right before a worker gets preempted + by other jobs and training gets interrupted. See + `tf.distribute.experimental.PreemptionCheckpointHandler` for more details. Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. + backup_dir: String, path to store the checkpoint. + e.g. `backup_dir = os.path.join(working_dir, 'backup')`. + This is the directory in which the system stores temporary files to + recover the model from jobs terminated unexpectedly. The directory + cannot be reused elsewhere to store other files, e.g. by the + `BackupAndRestore` callback of another training run, + or by another callback + (e.g. `ModelCheckpoint`) of the same training. + save_freq: `'epoch'`, integer, or `False`. When set to `'epoch'` + the callback saves the checkpoint at the end of each epoch. + When set to an integer, the callback saves the checkpoint every + `save_freq` batches. Set `save_freq` to `False` if only using + preemption checkpointing (with `save_before_preemption=True`). + delete_checkpoint: Boolean, default to True. This `BackupAndRestore` + callback works by saving a checkpoint to back up the training state. + If `delete_checkpoint=True`, the checkpoint will be deleted after + training is finished. Use `False` if you'd like to keep the checkpoint + for future usage. + save_before_preemption: A boolean value instructing whether to turn on + the automatic checkpoint saving for preemption/maintenance events. + This only supports + `tf.distribute.MultiWorkerMirroredStrategy` on Google Cloud Platform + or Google Borg for now. """ - @doc_controls.for_subclass_implementers - def on_train_begin(self, logs=None): - """Called at the beginning of training. - - Subclasses should override for any actions to run. + def __init__( + self, + backup_dir, + save_freq="epoch", + delete_checkpoint=True, + save_before_preemption=False, + ): + super().__init__() + self.backup_dir = backup_dir + self._supports_tf_logs = True + self._supported_strategies = ( + tf.distribute.MirroredStrategy, + tf.distribute.MultiWorkerMirroredStrategy, + tf.distribute.experimental.TPUStrategy, + tf.distribute.TPUStrategy, + tf.distribute.experimental.ParameterServerStrategy, + ) + self.save_freq = save_freq + self.delete_checkpoint = delete_checkpoint + self.save_before_preemption = save_before_preemption + self._batches_count = 0 + self._current_epoch = 0 + + if not tf.executing_eagerly(): + if tf.inside_function(): + raise ValueError( + "This Callback's method contains Python state and " + "should be called outside of `tf.function`s." + ) + else: # Legacy graph mode: + raise ValueError( + "BackupAndRestore only supports eager mode. In graph " + "mode, consider using ModelCheckpoint to manually save " + "and restore weights with `model.load_weights()` and by " + "providing `initial_epoch` in `model.fit()` for fault " + "tolerance." + ) + if (not save_freq) and (not save_before_preemption): + raise ValueError( + "Either `save_freq` or `save_before_preemption` " "must be set." + ) + + # Only the chief worker writes model checkpoints, but all workers + # restore checkpoint at on_train_begin(). + self._chief_worker_only = False + + def on_train_begin(self, logs=None): + # TrainingState is used to manage the training state needed for + # failure-recovery of a worker in training. + + if self.model._distribution_strategy and not isinstance( + self.model.distribute_strategy, self._supported_strategies + ): + raise NotImplementedError( + f"{type(self.model.distribute_strategy)} is not supported yet. " + "Currently BackupAndRestore callback " + "only supports empty strategy, " + "MirroredStrategy, MultiWorkerMirroredStrategy and TPUStrategy." + ) + self.model._training_state = worker_training_state.WorkerTrainingState( + self.model, + self.backup_dir, + self.save_freq, + self.save_before_preemption, + ) + self._training_state = self.model._training_state + self._training_state.restore() + + def on_train_batch_begin(self, batch, logs=None): + # Skip batch update for PSS Strategy + if isinstance( + self.model.distribute_strategy, + tf.distribute.ParameterServerStrategy, + ): + return + self._training_state._ckpt_saved_batch.assign(batch) + + def on_train_batch_end(self, batch, logs=None): + # Skip batch update for PSS Strategy + if isinstance( + self.model.distribute_strategy, + tf.distribute.ParameterServerStrategy, + ): + return + self._training_state.backup_if_preempted() + if self.save_freq and self.save_freq != "epoch": + self._batches_count += 1 + if self._batches_count >= self.save_freq: + self._batches_count = 0 + self._backup(epoch=self._current_epoch, batch=batch) + + def _implements_train_batch_hooks(self): + return self.save_freq != "epoch" + + def on_train_end(self, logs=None): + if self.delete_checkpoint: + # On exit of training, delete the training state backup file saved + # for the purpose of worker recovery unless the user opts out. + self._training_state.delete_backup() + # Clean up the training state. + del self._training_state + del self.model._training_state + + def on_epoch_begin(self, epoch, logs=None): + self._training_state._ckpt_saved_epoch.assign(epoch) + self._current_epoch = epoch + + def on_epoch_end(self, epoch, logs=None): + # Back up the model and current epoch for possible future recovery. + if self.save_freq == "epoch": + self._backup(epoch=epoch) + + def _backup(self, epoch, batch=0): + self._training_state.back_up(epoch=epoch, batch=batch) + + +@keras_export("keras.callbacks.experimental.BackupAndRestore", v1=[]) +@deprecation.deprecated_endpoints( + "keras.callbacks.experimental.BackupAndRestore" +) +class BackupAndRestoreExperimental(BackupAndRestore): + """Deprecated. Please use `tf.keras.callbacks.BackupAndRestore` instead. - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. + Caution: `tf.keras.callbacks.experimental.BackupAndRestore` endpoint is + deprecated and will be removed in a future release. Please use + `tf.keras.callbacks.BackupAndRestore`. """ - @doc_controls.for_subclass_implementers - def on_train_end(self, logs=None): - """Called at the end of training. + def __init__(self, *args, **kwargs): + logging.warning( + "`tf.keras.callbacks.experimental.BackupAndRestore` endpoint is " + "deprecated and will be removed in a future release. Please use " + "`tf.keras.callbacks.BackupAndRestore`." + ) + super().__init__(*args, **kwargs) - Subclasses should override for any actions to run. - Args: - logs: Dict. Currently the output of the last call to `on_epoch_end()` - is passed to this argument for this method but that may change in - the future. - """ +@keras_export("keras.callbacks.EarlyStopping") +class EarlyStopping(Callback): + """Stop training when a monitored metric has stopped improving. - @doc_controls.for_subclass_implementers - def on_test_begin(self, logs=None): - """Called at the beginning of evaluation or validation. + Assuming the goal of a training is to minimize the loss. With this, the + metric to be monitored would be `'loss'`, and mode would be `'min'`. A + `model.fit()` training loop will check at end of every epoch whether + the loss is no longer decreasing, considering the `min_delta` and + `patience` if applicable. Once it's found no longer decreasing, + `model.stop_training` is marked True and the training terminates. - Subclasses should override for any actions to run. + The quantity to be monitored needs to be available in `logs` dict. + To make it so, pass the loss or metrics at `model.compile()`. Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. + monitor: Quantity to be monitored. + min_delta: Minimum change in the monitored quantity + to qualify as an improvement, i.e. an absolute + change of less than min_delta, will count as no + improvement. + patience: Number of epochs with no improvement + after which training will be stopped. + verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 + displays messages when the callback takes an action. + mode: One of `{"auto", "min", "max"}`. In `min` mode, + training will stop when the quantity + monitored has stopped decreasing; in `"max"` + mode it will stop when the quantity + monitored has stopped increasing; in `"auto"` + mode, the direction is automatically inferred + from the name of the monitored quantity. + baseline: Baseline value for the monitored quantity. + Training will stop if the model doesn't show improvement over the + baseline. + restore_best_weights: Whether to restore model weights from + the epoch with the best value of the monitored quantity. + If False, the model weights obtained at the last step of + training are used. An epoch will be restored regardless + of the performance relative to the `baseline`. If no epoch + improves on `baseline`, training will run for `patience` + epochs and restore weights from the best epoch in that set. + start_from_epoch: Number of epochs to wait before starting + to monitor improvement. This allows for a warm-up period in which + no improvement is expected and thus training will not be stopped. + + + Example: + + >>> callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) + >>> # This callback will stop the training when there is no improvement in + >>> # the loss for three consecutive epochs. + >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') + >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + ... epochs=10, batch_size=1, callbacks=[callback], + ... verbose=0) + >>> len(history.history['loss']) # Only 4 epochs are run. + 4 """ - @doc_controls.for_subclass_implementers - def on_test_end(self, logs=None): - """Called at the end of evaluation or validation. + def __init__( + self, + monitor="val_loss", + min_delta=0, + patience=0, + verbose=0, + mode="auto", + baseline=None, + restore_best_weights=False, + start_from_epoch=0, + ): + super().__init__() + + self.monitor = monitor + self.patience = patience + self.verbose = verbose + self.baseline = baseline + self.min_delta = abs(min_delta) + self.wait = 0 + self.stopped_epoch = 0 + self.restore_best_weights = restore_best_weights + self.best_weights = None + self.start_from_epoch = start_from_epoch + + if mode not in ["auto", "min", "max"]: + logging.warning( + "EarlyStopping mode %s is unknown, fallback to auto mode.", + mode, + ) + mode = "auto" + + if mode == "min": + self.monitor_op = np.less + elif mode == "max": + self.monitor_op = np.greater + else: + if ( + self.monitor.endswith("acc") + or self.monitor.endswith("accuracy") + or self.monitor.endswith("auc") + ): + self.monitor_op = np.greater + else: + self.monitor_op = np.less - Subclasses should override for any actions to run. + if self.monitor_op == np.greater: + self.min_delta *= 1 + else: + self.min_delta *= -1 - Args: - logs: Dict. Currently the output of the last call to - `on_test_batch_end()` is passed to this argument for this method - but that may change in the future. - """ + def on_train_begin(self, logs=None): + # Allow instances to be re-used + self.wait = 0 + self.stopped_epoch = 0 + self.best = np.Inf if self.monitor_op == np.less else -np.Inf + self.best_weights = None + self.best_epoch = 0 + + def on_epoch_end(self, epoch, logs=None): + current = self.get_monitor_value(logs) + if current is None or epoch < self.start_from_epoch: + # If no monitor value exists or still in initial warm-up stage. + return + if self.restore_best_weights and self.best_weights is None: + # Restore the weights after first epoch if no progress is ever made. + self.best_weights = self.model.get_weights() - @doc_controls.for_subclass_implementers - def on_predict_begin(self, logs=None): - """Called at the beginning of prediction. + self.wait += 1 + if self._is_improvement(current, self.best): + self.best = current + self.best_epoch = epoch + if self.restore_best_weights: + self.best_weights = self.model.get_weights() + # Only restart wait if we beat both the baseline and our previous + # best. + if self.baseline is None or self._is_improvement( + current, self.baseline + ): + self.wait = 0 + return + + # Only check after the first epoch. + if self.wait >= self.patience and epoch > 0: + self.stopped_epoch = epoch + self.model.stop_training = True + if self.restore_best_weights and self.best_weights is not None: + if self.verbose > 0: + io_utils.print_msg( + "Restoring model weights from " + "the end of the best epoch: " + f"{self.best_epoch + 1}." + ) + self.model.set_weights(self.best_weights) + + def on_train_end(self, logs=None): + if self.stopped_epoch > 0 and self.verbose > 0: + io_utils.print_msg( + f"Epoch {self.stopped_epoch + 1}: early stopping" + ) + + def get_monitor_value(self, logs): + logs = logs or {} + monitor_value = logs.get(self.monitor) + if monitor_value is None: + logging.warning( + "Early stopping conditioned on metric `%s` " + "which is not available. Available metrics are: %s", + self.monitor, + ",".join(list(logs.keys())), + ) + return monitor_value + + def _is_improvement(self, monitor_value, reference_value): + return self.monitor_op(monitor_value - self.min_delta, reference_value) + + +@keras_export("keras.callbacks.RemoteMonitor") +class RemoteMonitor(Callback): + """Callback used to stream events to a server. - Subclasses should override for any actions to run. + Requires the `requests` library. + Events are sent to `root + '/publish/epoch/end/'` by default. Calls are + HTTP POST, with a `data` argument which is a + JSON-encoded dictionary of event data. + If `send_as_json=True`, the content type of the request will be + `"application/json"`. + Otherwise the serialized JSON will be sent within a form. Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. + root: String; root url of the target server. + path: String; path relative to `root` to which the events will be sent. + field: String; JSON field under which the data will be stored. + The field is used only if the payload is sent within a form + (i.e. send_as_json is set to False). + headers: Dictionary; optional custom HTTP headers. + send_as_json: Boolean; whether the request should be + sent as `"application/json"`. """ - @doc_controls.for_subclass_implementers - def on_predict_end(self, logs=None): - """Called at the end of prediction. + def __init__( + self, + root="http://localhost:9000", + path="/publish/epoch/end/", + field="data", + headers=None, + send_as_json=False, + ): + super().__init__() + + self.root = root + self.path = path + self.field = field + self.headers = headers + self.send_as_json = send_as_json + + def on_epoch_end(self, epoch, logs=None): + if requests is None: + raise ImportError("RemoteMonitor requires the `requests` library.") + logs = logs or {} + send = {} + send["epoch"] = epoch + for k, v in logs.items(): + # np.ndarray and np.generic are not scalar types + # therefore we must unwrap their scalar values and + # pass to the json-serializable dict 'send' + if isinstance(v, (np.ndarray, np.generic)): + send[k] = v.item() + else: + send[k] = v + try: + if self.send_as_json: + requests.post( + self.root + self.path, json=send, headers=self.headers + ) + else: + requests.post( + self.root + self.path, + {self.field: json.dumps(send)}, + headers=self.headers, + ) + except requests.exceptions.RequestException: + logging.warning( + "Warning: could not reach RemoteMonitor root server at " + + str(self.root) + ) + + +@keras_export("keras.callbacks.LearningRateScheduler") +class LearningRateScheduler(Callback): + """Learning rate scheduler. - Subclasses should override for any actions to run. + At the beginning of every epoch, this callback gets the updated learning + rate value from `schedule` function provided at `__init__`, with the current + epoch and current learning rate, and applies the updated learning rate on + the optimizer. Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - - def _implements_train_batch_hooks(self): - """Determines if this Callback should be called for each train batch.""" - return (not generic_utils.is_default(self.on_batch_begin) or - not generic_utils.is_default(self.on_batch_end) or - not generic_utils.is_default(self.on_train_batch_begin) or - not generic_utils.is_default(self.on_train_batch_end)) - - def _implements_test_batch_hooks(self): - """Determines if this Callback should be called for each test batch.""" - return (not generic_utils.is_default(self.on_test_batch_begin) or - not generic_utils.is_default(self.on_test_batch_end)) - - def _implements_predict_batch_hooks(self): - """Determines if this Callback should be called for each predict batch.""" - return (not generic_utils.is_default(self.on_predict_batch_begin) or - not generic_utils.is_default(self.on_predict_batch_end)) - - -@keras_export('keras.callbacks.BaseLogger') -class BaseLogger(Callback): - """Callback that accumulates epoch averages of metrics. - - This callback is automatically applied to every Keras model. - - Args: - stateful_metrics: Iterable of string names of metrics that - should *not* be averaged over an epoch. - Metrics in this list will be logged as-is in `on_epoch_end`. - All others will be averaged in `on_epoch_end`. - """ - - def __init__(self, stateful_metrics=None): - super().__init__() - self.stateful_metrics = set(stateful_metrics or []) - - def on_epoch_begin(self, epoch, logs=None): - self.seen = 0 - self.totals = {} - - def on_batch_end(self, batch, logs=None): - logs = logs or {} - batch_size = logs.get('size', 0) - # In case of distribution strategy we can potentially run multiple steps - # at the same time, we should account for that in the `seen` calculation. - num_steps = logs.get('num_steps', 1) - self.seen += batch_size * num_steps - - for k, v in logs.items(): - if k in self.stateful_metrics: - self.totals[k] = v - else: - if k in self.totals: - self.totals[k] += v * batch_size - else: - self.totals[k] = v * batch_size - - def on_epoch_end(self, epoch, logs=None): - if logs is not None: - for k in self.params['metrics']: - if k in self.totals: - # Make value available to next callbacks. - if k in self.stateful_metrics: - logs[k] = self.totals[k] - else: - logs[k] = self.totals[k] / self.seen + schedule: a function that takes an epoch index (integer, indexed from 0) + and current learning rate (float) as inputs and returns a new + learning rate as output (float). + verbose: int. 0: quiet, 1: update messages. + Example: + + >>> # This function keeps the initial learning rate for the first ten epochs + >>> # and decreases it exponentially after that. + >>> def scheduler(epoch, lr): + ... if epoch < 10: + ... return lr + ... else: + ... return lr * tf.math.exp(-0.1) + >>> + >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') + >>> round(model.optimizer.lr.numpy(), 5) + 0.01 + + >>> callback = tf.keras.callbacks.LearningRateScheduler(scheduler) + >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + ... epochs=15, callbacks=[callback], verbose=0) + >>> round(model.optimizer.lr.numpy(), 5) + 0.00607 -@keras_export('keras.callbacks.TerminateOnNaN') -class TerminateOnNaN(Callback): - """Callback that terminates training when a NaN loss is encountered. - """ + """ - def __init__(self): - super().__init__() - self._supports_tf_logs = True + def __init__(self, schedule, verbose=0): + super().__init__() + self.schedule = schedule + self.verbose = verbose + + def on_epoch_begin(self, epoch, logs=None): + if not hasattr(self.model.optimizer, "lr"): + raise ValueError('Optimizer must have a "lr" attribute.') + try: # new API + lr = float(backend.get_value(self.model.optimizer.lr)) + lr = self.schedule(epoch, lr) + except TypeError: # Support for old API for backward compatibility + lr = self.schedule(epoch) + if not isinstance(lr, (tf.Tensor, float, np.float32, np.float64)): + raise ValueError( + 'The output of the "schedule" function ' + f"should be float. Got: {lr}" + ) + if isinstance(lr, tf.Tensor) and not lr.dtype.is_floating: + raise ValueError( + f"The dtype of `lr` Tensor should be float. Got: {lr.dtype}" + ) + backend.set_value(self.model.optimizer.lr, backend.get_value(lr)) + if self.verbose > 0: + io_utils.print_msg( + f"\nEpoch {epoch + 1}: LearningRateScheduler setting learning " + f"rate to {lr}." + ) - def on_batch_end(self, batch, logs=None): - logs = logs or {} - loss = logs.get('loss') - if loss is not None: - loss = tf_utils.sync_to_numpy_or_python_type(loss) - if np.isnan(loss) or np.isinf(loss): - io_utils.print_msg(f'Batch {batch}: Invalid loss, terminating training') - self.model.stop_training = True + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + logs["lr"] = backend.get_value(self.model.optimizer.lr) -@keras_export('keras.callbacks.ProgbarLogger') -class ProgbarLogger(Callback): - """Callback that prints metrics to stdout. - - Args: - count_mode: One of `"steps"` or `"samples"`. - Whether the progress bar should - count samples seen or steps (batches) seen. - stateful_metrics: Iterable of string names of metrics that - should *not* be averaged over an epoch. - Metrics in this list will be logged as-is. - All others will be averaged over time (e.g. loss, etc). - If not provided, defaults to the `Model`'s metrics. - - Raises: - ValueError: In case of invalid `count_mode`. - """ - - def __init__(self, count_mode='samples', stateful_metrics=None): - super().__init__() - self._supports_tf_logs = True - if count_mode == 'samples': - self.use_steps = False - elif count_mode == 'steps': - self.use_steps = True - else: - raise ValueError( - f'Unknown `count_mode`: {count_mode}. ' - 'Expected values are ["samples", "steps"]') - # Defaults to all Model's metrics except for loss. - self.stateful_metrics = set(stateful_metrics) if stateful_metrics else set() - - self.seen = 0 - self.progbar = None - self.target = None - self.verbose = 1 - self.epochs = 1 - - self._train_step, self._test_step, self._predict_step = None, None, None - self._call_batch_hooks = True - - self._called_in_fit = False - - def set_params(self, params): - self.verbose = params['verbose'] - self.epochs = params['epochs'] - if self.use_steps and 'steps' in params: - self.target = params['steps'] - elif not self.use_steps and 'samples' in params: - self.target = params['samples'] - else: - self.target = None # Will be inferred at the end of the first epoch. - - self._call_batch_hooks = self.verbose == 1 - if self.target is None: - try: - self._train_step = self.model._train_counter # pylint: disable=protected-access - self._test_step = self.model._test_counter # pylint: disable=protected-access - self._predict_step = self.model._predict_counter # pylint: disable=protected-access - except AttributeError: - self._call_batch_hooks = True +def keras_model_summary(name, data, step=None): + """Writes a Keras model as JSON to as a Summary. - def on_train_begin(self, logs=None): - # When this logger is called inside `fit`, validation is silent. - self._called_in_fit = True - - def on_test_begin(self, logs=None): - if not self._called_in_fit: - self._reset_progbar() - self._maybe_init_progbar() - - def on_predict_begin(self, logs=None): - self._reset_progbar() - self._maybe_init_progbar() - - def on_epoch_begin(self, epoch, logs=None): - self._reset_progbar() - self._maybe_init_progbar() - if self.verbose and self.epochs > 1: - io_utils.print_msg(f'Epoch {epoch + 1}/{self.epochs}') - - def on_train_batch_end(self, batch, logs=None): - self._batch_update_progbar(batch, logs) - - def on_test_batch_end(self, batch, logs=None): - if not self._called_in_fit: - self._batch_update_progbar(batch, logs) - - def on_predict_batch_end(self, batch, logs=None): - # Don't pass prediction results. - self._batch_update_progbar(batch, None) - - def on_epoch_end(self, epoch, logs=None): - self._finalize_progbar(logs, self._train_step) - - def on_test_end(self, logs=None): - if not self._called_in_fit: - self._finalize_progbar(logs, self._test_step) - - def on_predict_end(self, logs=None): - self._finalize_progbar(logs, self._predict_step) - - def _reset_progbar(self): - self.seen = 0 - self.progbar = None - - def _maybe_init_progbar(self): - """Instantiate a `Progbar` if not yet, and update the stateful metrics.""" - # TODO(rchao): Legacy TF1 code path may use list for - # `self.stateful_metrics`. Remove "cast to set" when TF1 support is dropped. - self.stateful_metrics = set(self.stateful_metrics) - - if self.model: - # Update the existing stateful metrics as `self.model.metrics` may contain - # updated metrics after `MetricsContainer` is built in the first train - # step. - self.stateful_metrics = self.stateful_metrics.union( - set(m.name for m in self.model.metrics)) - - if self.progbar is None: - self.progbar = Progbar( - target=self.target, - verbose=self.verbose, - stateful_metrics=self.stateful_metrics, - unit_name='step' if self.use_steps else 'sample') - - self.progbar._update_stateful_metrics(self.stateful_metrics) # pylint: disable=protected-access - - def _implements_train_batch_hooks(self): - return self._call_batch_hooks - - def _implements_test_batch_hooks(self): - return self._call_batch_hooks - - def _implements_predict_batch_hooks(self): - return self._call_batch_hooks - - def _batch_update_progbar(self, batch, logs=None): - """Updates the progbar.""" - logs = logs or {} - self._maybe_init_progbar() - if self.use_steps: - self.seen = batch + 1 # One-indexed. - else: - # v1 path only. - logs = copy.copy(logs) - batch_size = logs.pop('size', 0) - num_steps = logs.pop('num_steps', 1) - logs.pop('batch', None) - add_seen = num_steps * batch_size - self.seen += add_seen - - if self.verbose == 1: - # Only block async when verbose = 1. - logs = tf_utils.sync_to_numpy_or_python_type(logs) - self.progbar.update(self.seen, list(logs.items()), finalize=False) - - def _finalize_progbar(self, logs, counter): - logs = tf_utils.sync_to_numpy_or_python_type(logs or {}) - if self.target is None: - if counter is not None: - counter = counter.numpy() - if not self.use_steps: - counter *= logs.get('size', 1) - self.target = counter or self.seen - self.progbar.target = self.target - self.progbar.update(self.target, list(logs.items()), finalize=True) - - -@keras_export('keras.callbacks.History') -class History(Callback): - """Callback that records events into a `History` object. + Writing the Keras model configuration allows the TensorBoard graph plugin to + render a conceptual graph, as opposed to graph of ops. In case the model + fails to serialize as JSON, it ignores and returns False. - This callback is automatically applied to - every Keras model. The `History` object - gets returned by the `fit` method of models. + Args: + name: A name for this summary. The summary tag used for TensorBoard will + be this name prefixed by any active name scopes. + data: A Keras Model to write. + step: Explicit `int64`-castable monotonic step value for this summary. If + omitted, this defaults to `tf.summary.experimental.get_step()`, which + must not be None. - Example: + Returns: + True on success, or False if no summary was written because no default + summary writer was available. - >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') - >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), - ... epochs=10, verbose=1) - >>> print(history.params) - {'verbose': 1, 'epochs': 10, 'steps': 1} - >>> # check the keys of history object - >>> print(history.history.keys()) - dict_keys(['loss']) + Raises: + ValueError: if a default writer exists, but no step was provided and + `tf.summary.experimental.get_step()` is None. + """ + summary_metadata = tf.compat.v1.SummaryMetadata() + # Hard coding a plugin name. Please refer to go/tb-plugin-name-hardcode for + # the rationale. + summary_metadata.plugin_data.plugin_name = "graph_keras_model" + # version number = 1 + summary_metadata.plugin_data.content = b"1" - """ + try: + json_string = data.to_json() + except Exception as exc: + # An exception should not break a model code. + logging.warning( + "Model failed to serialize as JSON. Ignoring... %s", exc + ) + return False + + with tf.summary.experimental.summary_scope( + name, "graph_keras_model", [data, step] + ) as (tag, _): + with tf.device("cpu:0"): + tensor = tf.constant(json_string, dtype=tf.string) + return tf.summary.write( + tag=tag, tensor=tensor, step=step, metadata=summary_metadata + ) + + +@keras_export("keras.callbacks.TensorBoard", v1=[]) +class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): - def __init__(self): - super().__init__() - self.history = {} + """Enable visualizations for TensorBoard. - def on_train_begin(self, logs=None): - self.epoch = [] + TensorBoard is a visualization tool provided with TensorFlow. - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - self.epoch.append(epoch) - for k, v in logs.items(): - self.history.setdefault(k, []).append(v) + This callback logs events for TensorBoard, including: - # Set the history attribute on the model after the epoch ends. This will - # make sure that the state which is set is the latest one. - self.model.history = self + * Metrics summary plots + * Training graph visualization + * Weight histograms + * Sampled profiling + When used in `Model.evaluate` or regular validation + ([on_test_end](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback#on_test_end)), + in addition to epoch summaries, there will be a summary that records + evaluation metrics vs `Model.optimizer.iterations` written. The metric names + will be prepended with `evaluation`, with `Model.optimizer.iterations` being + the step in the visualized TensorBoard. -@keras_export('keras.callbacks.ModelCheckpoint') -class ModelCheckpoint(Callback): - """Callback to save the Keras model or model weights at some frequency. - - `ModelCheckpoint` callback is used in conjunction with training using - `model.fit()` to save a model or weights (in a checkpoint file) at some - interval, so the model or weights can be loaded later to continue the training - from the state saved. - - A few options this callback provides include: - - - Whether to only keep the model that has achieved the "best performance" so - far, or whether to save the model at the end of every epoch regardless of - performance. - - Definition of 'best'; which quantity to monitor and whether it should be - maximized or minimized. - - The frequency it should save at. Currently, the callback supports saving at - the end of every epoch, or after a fixed number of training batches. - - Whether only weights are saved, or the whole model is saved. - - Note: If you get `WARNING:tensorflow:Can save best model only with - available, skipping` see the description of the `monitor` argument for - details on how to get this right. - - Example: - - ```python - model.compile(loss=..., optimizer=..., - metrics=['accuracy']) - - EPOCHS = 10 - checkpoint_filepath = '/tmp/checkpoint' - model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( - filepath=checkpoint_filepath, - save_weights_only=True, - monitor='val_accuracy', - mode='max', - save_best_only=True) - - # Model weights are saved at the end of every epoch, if it's the best seen - # so far. - model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback]) - - # The model weights (that are considered the best) are loaded into the model. - model.load_weights(checkpoint_filepath) - ``` - - Args: - filepath: string or `PathLike`, path to save the model file. e.g. - filepath = os.path.join(working_dir, 'ckpt', file_name). `filepath` - can contain named formatting options, which will be filled the value of - `epoch` and keys in `logs` (passed in `on_epoch_end`). For example: if - `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, then the model - checkpoints will be saved with the epoch number and the validation loss - in the filename. The directory of the filepath should not be reused by - any other callbacks to avoid conflicts. - monitor: The metric name to monitor. Typically the metrics are set by the - `Model.compile` method. Note: - - * Prefix the name with `"val_`" to monitor validation metrics. - * Use `"loss"` or "`val_loss`" to monitor the model's total loss. - * If you specify metrics as strings, like `"accuracy"`, pass the same - string (with or without the `"val_"` prefix). - * If you pass `metrics.Metric` objects, `monitor` should be set to - `metric.name` - * If you're not sure about the metric names you can check the contents - of the `history.history` dictionary returned by - `history = model.fit()` - * Multi-output models set additional prefixes on the metric names. + If you have installed TensorFlow with pip, you should be able + to launch TensorBoard from the command line: - verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 - displays messages when the callback takes an action. - save_best_only: if `save_best_only=True`, it only saves when the model - is considered the "best" and the latest best model according to the - quantity monitored will not be overwritten. If `filepath` doesn't - contain formatting options like `{epoch}` then `filepath` will be - overwritten by each new better model. - mode: one of {'auto', 'min', 'max'}. If `save_best_only=True`, the - decision to overwrite the current save file is made based on either - the maximization or the minimization of the monitored quantity. - For `val_acc`, this should be `max`, for `val_loss` this should be - `min`, etc. In `auto` mode, the mode is set to `max` if the quantities - monitored are 'acc' or start with 'fmeasure' and are set to `min` for - the rest of the quantities. - save_weights_only: if True, then only the model's weights will be saved - (`model.save_weights(filepath)`), else the full model is saved - (`model.save(filepath)`). - save_freq: `'epoch'` or integer. When using `'epoch'`, the callback saves - the model after each epoch. When using integer, the callback saves the - model at end of this many batches. If the `Model` is compiled with - `steps_per_execution=N`, then the saving criteria will be - checked every Nth batch. Note that if the saving isn't aligned to - epochs, the monitored metric may potentially be less reliable (it - could reflect as little as 1 batch, since the metrics get reset every - epoch). Defaults to `'epoch'`. - options: Optional `tf.train.CheckpointOptions` object if - `save_weights_only` is true or optional `tf.saved_model.SaveOptions` - object if `save_weights_only` is false. - initial_value_threshold: Floating point initial "best" value of the metric - to be monitored. Only applies if `save_best_value=True`. Only overwrites - the model weights already saved if the performance of current - model is better than this value. - **kwargs: Additional arguments for backwards compatibility. Possible key - is `period`. - """ - - def __init__(self, - filepath, - monitor='val_loss', - verbose=0, - save_best_only=False, - save_weights_only=False, - mode='auto', - save_freq='epoch', - options=None, - initial_value_threshold=None, - **kwargs): - super().__init__() - self._supports_tf_logs = True - self.monitor = monitor - self.verbose = verbose - self.filepath = io_utils.path_to_string(filepath) - self.save_best_only = save_best_only - self.save_weights_only = save_weights_only - self.save_freq = save_freq - self.epochs_since_last_save = 0 - self._batches_seen_since_last_saving = 0 - self._last_batch_seen = 0 - self.best = initial_value_threshold - - if save_weights_only: - if options is None or isinstance( - options, tf.train.CheckpointOptions): - self._options = options or tf.train.CheckpointOptions() - else: - raise TypeError( - 'If save_weights_only is True, then `options` must be ' - f'either None or a tf.train.CheckpointOptions. Got {options}.') - else: - if options is None or isinstance(options, tf.saved_model.SaveOptions): - self._options = options or tf.saved_model.SaveOptions() - else: - raise TypeError( - 'If save_weights_only is False, then `options` must be ' - f'either None or a tf.saved_model.SaveOptions. Got {options}.') - - # Deprecated field `load_weights_on_restart` is for loading the checkpoint - # file from `filepath` at the start of `model.fit()` - # TODO(rchao): Remove the arg during next breaking release. - if 'load_weights_on_restart' in kwargs: - self.load_weights_on_restart = kwargs['load_weights_on_restart'] - logging.warning('`load_weights_on_restart` argument is deprecated. ' - 'Please use `model.load_weights()` for loading weights ' - 'before the start of `model.fit()`.') - else: - self.load_weights_on_restart = False - - # Deprecated field `period` is for the number of epochs between which - # the model is saved. - if 'period' in kwargs: - self.period = kwargs['period'] - logging.warning('`period` argument is deprecated. Please use `save_freq` ' - 'to specify the frequency in number of batches seen.') - else: - self.period = 1 - - if mode not in ['auto', 'min', 'max']: - logging.warning('ModelCheckpoint mode %s is unknown, ' - 'fallback to auto mode.', mode) - mode = 'auto' - - if mode == 'min': - self.monitor_op = np.less - if self.best is None: - self.best = np.Inf - elif mode == 'max': - self.monitor_op = np.greater - if self.best is None: - self.best = -np.Inf - else: - if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): - self.monitor_op = np.greater - if self.best is None: - self.best = -np.Inf - else: - self.monitor_op = np.less - if self.best is None: - self.best = np.Inf - - if self.save_freq != 'epoch' and not isinstance(self.save_freq, int): - raise ValueError( - f'Unrecognized save_freq: {self.save_freq}. ' - 'Expected save_freq are "epoch" or integer') - - # Only the chief worker writes model checkpoints, but all workers - # restore checkpoint at on_train_begin(). - self._chief_worker_only = False - - def on_train_begin(self, logs=None): - if self.load_weights_on_restart: - filepath_to_load = ( - self._get_most_recently_modified_file_matching_pattern(self.filepath)) - if (filepath_to_load is not None and - self._checkpoint_exists(filepath_to_load)): - try: - # `filepath` may contain placeholders such as `{epoch:02d}`, and - # thus it attempts to load the most recently modified file with file - # name matching the pattern. - self.model.load_weights(filepath_to_load) - except (IOError, ValueError) as e: - raise ValueError( - f'Error loading file from {filepath_to_load}. Reason: {e}') - - def _implements_train_batch_hooks(self): - # Only call batch hooks when saving on batch - return self.save_freq != 'epoch' - - def on_train_batch_end(self, batch, logs=None): - if self._should_save_on_batch(batch): - self._save_model(epoch=self._current_epoch, batch=batch, logs=logs) - - def on_epoch_begin(self, epoch, logs=None): - self._current_epoch = epoch - - def on_epoch_end(self, epoch, logs=None): - self.epochs_since_last_save += 1 - # pylint: disable=protected-access - if self.save_freq == 'epoch': - self._save_model(epoch=epoch, batch=None, logs=logs) - - def _should_save_on_batch(self, batch): - """Handles batch-level saving logic, supports steps_per_execution.""" - if self.save_freq == 'epoch': - return False - - if batch <= self._last_batch_seen: # New epoch. - add_batches = batch + 1 # batches are zero-indexed. - else: - add_batches = batch - self._last_batch_seen - self._batches_seen_since_last_saving += add_batches - self._last_batch_seen = batch - - if self._batches_seen_since_last_saving >= self.save_freq: - self._batches_seen_since_last_saving = 0 - return True - return False + ``` + tensorboard --logdir=path_to_your_logs + ``` - def _save_model(self, epoch, batch, logs): - """Saves the model. + You can find more information about TensorBoard + [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard). Args: - epoch: the epoch this iteration is in. - batch: the batch this iteration is in. `None` if the `save_freq` - is set to `epoch`. - logs: the `logs` dict passed in to `on_batch_end` or `on_epoch_end`. - """ - logs = logs or {} - - if isinstance(self.save_freq, - int) or self.epochs_since_last_save >= self.period: - # Block only when saving interval is reached. - logs = tf_utils.sync_to_numpy_or_python_type(logs) - self.epochs_since_last_save = 0 - filepath = self._get_file_path(epoch, batch, logs) - - try: - if self.save_best_only: - current = logs.get(self.monitor) - if current is None: - logging.warning('Can save best model only with %s available, ' - 'skipping.', self.monitor) - else: - if self.monitor_op(current, self.best): - if self.verbose > 0: - io_utils.print_msg( - f'\nEpoch {epoch + 1}: {self.monitor} improved ' - f'from {self.best:.5f} to {current:.5f}, ' - f'saving model to {filepath}') - self.best = current - if self.save_weights_only: - self.model.save_weights( - filepath, overwrite=True, options=self._options) - else: - self.model.save(filepath, overwrite=True, options=self._options) - else: - if self.verbose > 0: - io_utils.print_msg( - f'\nEpoch {epoch + 1}: ' - f'{self.monitor} did not improve from {self.best:.5f}') - else: - if self.verbose > 0: - io_utils.print_msg( - f'\nEpoch {epoch + 1}: saving model to {filepath}') - if self.save_weights_only: - self.model.save_weights( - filepath, overwrite=True, options=self._options) - else: - self.model.save(filepath, overwrite=True, options=self._options) - - self._maybe_remove_file() - except IsADirectoryError as e: # h5py 3.x - raise IOError('Please specify a non-directory filepath for ' - 'ModelCheckpoint. Filepath used is an existing ' - f'directory: {filepath}') - except IOError as e: # h5py 2.x - # `e.errno` appears to be `None` so checking the content of `e.args[0]`. - if 'is a directory' in str(e.args[0]).lower(): - raise IOError('Please specify a non-directory filepath for ' - 'ModelCheckpoint. Filepath used is an existing ' - f'directory: f{filepath}') - # Re-throw the error for any other causes. - raise e - - def _get_file_path(self, epoch, batch, logs): - """Returns the file path for checkpoint.""" - # pylint: disable=protected-access - try: - # `filepath` may contain placeholders such as `{epoch:02d}`,`{batch:02d}` - # and `{mape:.2f}`. A mismatch between logged metrics and the path's - # placeholders can cause formatting to fail. - if batch is None or 'batch' in logs: - file_path = self.filepath.format(epoch=epoch + 1, **logs) - else: - file_path = self.filepath.format( - epoch=epoch + 1, batch=batch + 1, **logs) - except KeyError as e: - raise KeyError( - f'Failed to format this callback filepath: "{self.filepath}". ' - f'Reason: {e}') - self._write_filepath = distributed_file_utils.write_filepath( - file_path, self.model.distribute_strategy) - return self._write_filepath - - def _maybe_remove_file(self): - # Remove the checkpoint directory in multi-worker training where this worker - # should not checkpoint. It is a dummy directory previously saved for sync - # distributed training. - distributed_file_utils.remove_temp_dir_with_filepath( - self._write_filepath, self.model.distribute_strategy) - - def _checkpoint_exists(self, filepath): - """Returns whether the checkpoint `filepath` refers to exists.""" - if filepath.endswith('.h5'): - return tf.io.gfile.exists(filepath) - tf_saved_model_exists = tf.io.gfile.exists(filepath) - tf_weights_only_checkpoint_exists = tf.io.gfile.exists( - filepath + '.index') - return tf_saved_model_exists or tf_weights_only_checkpoint_exists - - def _get_most_recently_modified_file_matching_pattern(self, pattern): - """Returns the most recently modified filepath matching pattern. - - Pattern may contain python formatting placeholder. If - `tf.train.latest_checkpoint()` does not return None, use that; otherwise, - check for most recently modified one that matches the pattern. - - In the rare case where there are more than one pattern-matching file having - the same modified time that is most recent among all, return the filepath - that is largest (by `>` operator, lexicographically using the numeric - equivalents). This provides a tie-breaker when multiple files are most - recent. Note that a larger `filepath` can sometimes indicate a later time of - modification (for instance, when epoch/batch is used as formatting option), - but not necessarily (when accuracy or loss is used). The tie-breaker is - put in the logic as best effort to return the most recent, and to avoid - undeterministic result. - - Modified time of a file is obtained with `os.path.getmtime()`. - - This utility function is best demonstrated via an example: + log_dir: the path of the directory where to save the log files to be + parsed by TensorBoard. e.g. log_dir = os.path.join(working_dir, + 'logs') This directory should not be reused by any other callbacks. + histogram_freq: frequency (in epochs) at which to compute + weight histograms for the layers of the model. If set to 0, histograms + won't be computed. Validation data (or split) must be specified for + histogram visualizations. + write_graph: whether to visualize the graph in TensorBoard. The log file + can become quite large when write_graph is set to True. + write_images: whether to write model weights to visualize as image in + TensorBoard. + write_steps_per_second: whether to log the training steps per second + into TensorBoard. This supports both epoch and batch frequency + logging. + update_freq: `'batch'` or `'epoch'` or integer. When using `'epoch'`, + writes the losses and metrics to TensorBoard after every epoch. + If using an integer, let's say `1000`, all metrics and losses + (including custom ones added by `Model.compile`) will be logged to + TensorBoard every 1000 batches. `'batch'` is a synonym for `1`, + meaning that they will be written every batch. + Note however that writing too frequently to TensorBoard can slow down + your training, especially when used with `tf.distribute.Strategy` as + it will incur additional synchronization overhead. + Use with `ParameterServerStrategy` is not supported. + Batch-level summary writing is also available via `train_step` + override. Please see + [TensorBoard Scalars tutorial](https://www.tensorflow.org/tensorboard/scalars_and_keras#batch-level_logging) # noqa: E501 + for more details. + profile_batch: Profile the batch(es) to sample compute characteristics. + profile_batch must be a non-negative integer or a tuple of integers. + A pair of positive integers signify a range of batches to profile. + By default, profiling is disabled. + embeddings_freq: frequency (in epochs) at which embedding layers will be + visualized. If set to 0, embeddings won't be visualized. + embeddings_metadata: Dictionary which maps embedding layer names to the + filename of a file in which to save metadata for the embedding layer. + In case the same metadata file is to be + used for all embedding layers, a single filename can be passed. + + Examples: + + Basic usage: ```python - file_pattern = 'f.batch{batch:02d}epoch{epoch:02d}.h5' - test_dir = self.get_temp_dir() - path_pattern = os.path.join(test_dir, file_pattern) - file_paths = [ - os.path.join(test_dir, file_name) for file_name in - ['f.batch03epoch02.h5', 'f.batch02epoch02.h5', 'f.batch01epoch01.h5'] - ] - for file_path in file_paths: - # Write something to each of the files - self.assertEqual( - _get_most_recently_modified_file_matching_pattern(path_pattern), - file_paths[-1]) + tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs") + model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) + # Then run the tensorboard command to view the visualizations. ``` - Args: - pattern: The file pattern that may optionally contain python placeholder - such as `{epoch:02d}`. - - Returns: - The most recently modified file's full filepath matching `pattern`. If - `pattern` does not contain any placeholder, this returns the filepath - that - exactly matches `pattern`. Returns `None` if no match is found. - """ - dir_name = os.path.dirname(pattern) - base_name = os.path.basename(pattern) - base_name_regex = '^' + re.sub(r'{.*}', r'.*', base_name) + '$' - - # If tf.train.latest_checkpoint tells us there exists a latest checkpoint, - # use that as it is more robust than `os.path.getmtime()`. - latest_tf_checkpoint = tf.train.latest_checkpoint(dir_name) - if latest_tf_checkpoint is not None and re.match( - base_name_regex, os.path.basename(latest_tf_checkpoint)): - return latest_tf_checkpoint - - latest_mod_time = 0 - file_path_with_latest_mod_time = None - n_file_with_latest_mod_time = 0 - file_path_with_largest_file_name = None - - if tf.io.gfile.exists(dir_name): - for file_name in os.listdir(dir_name): - # Only consider if `file_name` matches the pattern. - if re.match(base_name_regex, file_name): - file_path = os.path.join(dir_name, file_name) - mod_time = os.path.getmtime(file_path) - if (file_path_with_largest_file_name is None or - file_path > file_path_with_largest_file_name): - file_path_with_largest_file_name = file_path - if mod_time > latest_mod_time: - latest_mod_time = mod_time - file_path_with_latest_mod_time = file_path - # In the case a file with later modified time is found, reset - # the counter for the number of files with latest modified time. - n_file_with_latest_mod_time = 1 - elif mod_time == latest_mod_time: - # In the case a file has modified time tied with the most recent, - # increment the counter for the number of files with latest modified - # time by 1. - n_file_with_latest_mod_time += 1 - - if n_file_with_latest_mod_time == 1: - # Return the sole file that has most recent modified time. - return file_path_with_latest_mod_time - else: - # If there are more than one file having latest modified time, return - # the file path with the largest file name. - return file_path_with_largest_file_name - - -@keras_export('keras.callbacks.BackupAndRestore', v1=[]) -class BackupAndRestore(Callback): - """Callback to back up and restore the training state. - - `BackupAndRestore` callback is intended to recover training from an - interruption that has happened in the middle of a `Model.fit` execution, by - backing up the training states in a temporary checkpoint file (with the help - of a `tf.train.CheckpointManager`), at the end of each epoch. Each backup - overwrites the previously written checkpoint file, so at any given time there - is at most one such checkpoint file for backup/restoring purpose. - - If training restarts before completion, the training state (which includes the - `Model` weights and epoch number) is restored to the most recently saved state - at the beginning of a new `Model.fit` run. At the completion of a `Model.fit` - run, the temporary checkpoint file is deleted. - - Note that the user is responsible to bring jobs back after the interruption. - This callback is important for the backup and restore mechanism for fault - tolerance purpose, and the model to be restored from an previous checkpoint is - expected to be the same as the one used to back up. If user changes arguments - passed to compile or fit, the checkpoint saved for fault tolerance can become - invalid. - - Note: - - 1. This callback is not compatible with eager execution disabled. - 2. A checkpoint is saved at the end of each epoch. After restoring, - `Model.fit` redoes any partial work during the unfinished epoch in which the - training got restarted (so the work done before the interruption doesn't - affect the final model state). - 3. This works for both single worker and multi-worker modes. When `Model.fit` - is used with `tf.distribute`, it supports `tf.distribute.MirroredStrategy`, - `tf.distribute.MultiWorkerMirroredStrategy`, `tf.distribute.TPUStrategy`, and - `tf.distribute.experimental.ParameterServerStrategy`. - - Example: - - >>> class InterruptingCallback(tf.keras.callbacks.Callback): - ... def on_epoch_begin(self, epoch, logs=None): - ... if epoch == 4: - ... raise RuntimeError('Interrupting!') - >>> callback = tf.keras.callbacks.BackupAndRestore(backup_dir="/tmp/backup") - >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') - >>> try: - ... model.fit(np.arange(100).reshape(5, 20), np.zeros(5), epochs=10, - ... batch_size=1, callbacks=[callback, InterruptingCallback()], - ... verbose=0) - ... except: - ... pass - >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), epochs=10, - ... batch_size=1, callbacks=[callback], verbose=0) - >>> # Only 6 more epochs are run, since first trainning got interrupted at - >>> # zero-indexed epoch 4, second training will continue from 4 to 9. - >>> len(history.history['loss']) - 6 - - Args: - backup_dir: String, path to store the checkpoint. - e.g. backup_dir = os.path.join(working_dir, 'backup') - This is the directory in which the system stores temporary files to - recover the model from jobs terminated unexpectedly. The directory - cannot be reused elsewhere to store other files, e.g. by - BackupAndRestore callback of another training, or by another callback - (ModelCheckpoint) of the same training. - """ - - def __init__(self, backup_dir): - super().__init__() - self.backup_dir = backup_dir - self._supports_tf_logs = True - self._supported_strategies = ( - tf.distribute.MirroredStrategy, - tf.distribute.MultiWorkerMirroredStrategy, - tf.distribute.experimental.TPUStrategy, tf.distribute.TPUStrategy, - tf.distribute.experimental.ParameterServerStrategy) - - if not tf.executing_eagerly(): - if tf.inside_function(): - raise ValueError('This Callback\'s method contains Python state and ' - 'should be called outside of `tf.function`s.') - else: # Legacy graph mode: - raise ValueError( - 'BackupAndRestore only supports eager mode. In graph ' - 'mode, consider using ModelCheckpoint to manually save ' - 'and restore weights with `model.load_weights()` and by ' - 'providing `initial_epoch` in `model.fit()` for fault tolerance.') - - # Only the chief worker writes model checkpoints, but all workers - # restore checkpoint at on_train_begin(). - self._chief_worker_only = False - - def on_train_begin(self, logs=None): - # TrainingState is used to manage the training state needed for - # failure-recovery of a worker in training. - # pylint: disable=protected-access - - if self.model._distribution_strategy and not isinstance( - self.model.distribute_strategy, self._supported_strategies): - raise NotImplementedError( - f'{type(self.model.distribute_strategy)} is not supported yet. ' - 'Currently BackupAndRestore callback only supports empty strategy, ' - 'MirroredStrategy, MultiWorkerMirroredStrategy and TPUStrategy.') - self.model._training_state = ( - worker_training_state.WorkerTrainingState(self.model, self.backup_dir)) - self._training_state = self.model._training_state - self._training_state.restore() - - def on_train_end(self, logs=None): - # pylint: disable=protected-access - # On exit of training, delete the training state backup file that was saved - # for the purpose of worker recovery. - self._training_state.delete_backup() - - # Clean up the training state. - del self._training_state - del self.model._training_state - - def on_epoch_end(self, epoch, logs=None): - # Back up the model and current epoch for possible future recovery. - self._training_state.back_up(epoch) - - -@keras_export('keras.callbacks.experimental.BackupAndRestore', v1=[]) -@deprecation.deprecated_endpoints( - 'keras.callbacks.experimental.BackupAndRestore') -class BackupAndRestoreExperimental(BackupAndRestore): - """Deprecated. Please use `tf.keras.callbacks.BackupAndRestore` instead. - - Caution: `tf.keras.callbacks.experimental.BackupAndRestore` endpoint is - deprecated and will be removed in a future release. Please use - `tf.keras.callbacks.BackupAndRestore`. - """ - - def __init__(self, *args, **kwargs): - logging.warning( - '`tf.keras.callbacks.experimental.BackupAndRestore` endpoint is ' - 'deprecated and will be removed in a future release. Please use ' - '`tf.keras.callbacks.BackupAndRestore`.') - super().__init__(*args, **kwargs) - - -@keras_export('keras.callbacks.EarlyStopping') -class EarlyStopping(Callback): - """Stop training when a monitored metric has stopped improving. - - Assuming the goal of a training is to minimize the loss. With this, the - metric to be monitored would be `'loss'`, and mode would be `'min'`. A - `model.fit()` training loop will check at end of every epoch whether - the loss is no longer decreasing, considering the `min_delta` and - `patience` if applicable. Once it's found no longer decreasing, - `model.stop_training` is marked True and the training terminates. - - The quantity to be monitored needs to be available in `logs` dict. - To make it so, pass the loss or metrics at `model.compile()`. - - Args: - monitor: Quantity to be monitored. - min_delta: Minimum change in the monitored quantity - to qualify as an improvement, i.e. an absolute - change of less than min_delta, will count as no - improvement. - patience: Number of epochs with no improvement - after which training will be stopped. - verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 - displays messages when the callback takes an action. - mode: One of `{"auto", "min", "max"}`. In `min` mode, - training will stop when the quantity - monitored has stopped decreasing; in `"max"` - mode it will stop when the quantity - monitored has stopped increasing; in `"auto"` - mode, the direction is automatically inferred - from the name of the monitored quantity. - baseline: Baseline value for the monitored quantity. - Training will stop if the model doesn't show improvement over the - baseline. - restore_best_weights: Whether to restore model weights from - the epoch with the best value of the monitored quantity. - If False, the model weights obtained at the last step of - training are used. An epoch will be restored regardless - of the performance relative to the `baseline`. If no epoch - improves on `baseline`, training will run for `patience` - epochs and restore weights from the best epoch in that set. - - Example: - - >>> callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) - >>> # This callback will stop the training when there is no improvement in - >>> # the loss for three consecutive epochs. - >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') - >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), - ... epochs=10, batch_size=1, callbacks=[callback], - ... verbose=0) - >>> len(history.history['loss']) # Only 4 epochs are run. - 4 - """ - - def __init__(self, - monitor='val_loss', - min_delta=0, - patience=0, - verbose=0, - mode='auto', - baseline=None, - restore_best_weights=False): - super().__init__() - - self.monitor = monitor - self.patience = patience - self.verbose = verbose - self.baseline = baseline - self.min_delta = abs(min_delta) - self.wait = 0 - self.stopped_epoch = 0 - self.restore_best_weights = restore_best_weights - self.best_weights = None - - if mode not in ['auto', 'min', 'max']: - logging.warning('EarlyStopping mode %s is unknown, ' - 'fallback to auto mode.', mode) - mode = 'auto' - - if mode == 'min': - self.monitor_op = np.less - elif mode == 'max': - self.monitor_op = np.greater - else: - if (self.monitor.endswith('acc') or self.monitor.endswith('accuracy') or - self.monitor.endswith('auc')): - self.monitor_op = np.greater - else: - self.monitor_op = np.less - - if self.monitor_op == np.greater: - self.min_delta *= 1 - else: - self.min_delta *= -1 - - def on_train_begin(self, logs=None): - # Allow instances to be re-used - self.wait = 0 - self.stopped_epoch = 0 - self.best = np.Inf if self.monitor_op == np.less else -np.Inf - self.best_weights = None - self.best_epoch = 0 - - def on_epoch_end(self, epoch, logs=None): - current = self.get_monitor_value(logs) - if current is None: - return - if self.restore_best_weights and self.best_weights is None: - # Restore the weights after first epoch if no progress is ever made. - self.best_weights = self.model.get_weights() - - self.wait += 1 - if self._is_improvement(current, self.best): - self.best = current - self.best_epoch = epoch - if self.restore_best_weights: - self.best_weights = self.model.get_weights() - # Only restart wait if we beat both the baseline and our previous best. - if self.baseline is None or self._is_improvement(current, self.baseline): - self.wait = 0 + Custom batch-level summaries in a subclassed Model: - # Only check after the first epoch. - if self.wait >= self.patience and epoch > 0: - self.stopped_epoch = epoch - self.model.stop_training = True - if self.restore_best_weights and self.best_weights is not None: - if self.verbose > 0: - io_utils.print_msg( - 'Restoring model weights from the end of the best epoch: ' - f'{self.best_epoch + 1}.') - self.model.set_weights(self.best_weights) - - def on_train_end(self, logs=None): - if self.stopped_epoch > 0 and self.verbose > 0: - io_utils.print_msg( - f'Epoch {self.stopped_epoch + 1}: early stopping') - - def get_monitor_value(self, logs): - logs = logs or {} - monitor_value = logs.get(self.monitor) - if monitor_value is None: - logging.warning('Early stopping conditioned on metric `%s` ' - 'which is not available. Available metrics are: %s', - self.monitor, ','.join(list(logs.keys()))) - return monitor_value - - def _is_improvement(self, monitor_value, reference_value): - return self.monitor_op(monitor_value - self.min_delta, reference_value) - - -@keras_export('keras.callbacks.RemoteMonitor') -class RemoteMonitor(Callback): - """Callback used to stream events to a server. - - Requires the `requests` library. - Events are sent to `root + '/publish/epoch/end/'` by default. Calls are - HTTP POST, with a `data` argument which is a - JSON-encoded dictionary of event data. - If `send_as_json=True`, the content type of the request will be - `"application/json"`. - Otherwise the serialized JSON will be sent within a form. - - Args: - root: String; root url of the target server. - path: String; path relative to `root` to which the events will be sent. - field: String; JSON field under which the data will be stored. - The field is used only if the payload is sent within a form - (i.e. send_as_json is set to False). - headers: Dictionary; optional custom HTTP headers. - send_as_json: Boolean; whether the request should be - sent as `"application/json"`. - """ - - def __init__(self, - root='http://localhost:9000', - path='/publish/epoch/end/', - field='data', - headers=None, - send_as_json=False): - super().__init__() - - self.root = root - self.path = path - self.field = field - self.headers = headers - self.send_as_json = send_as_json - - def on_epoch_end(self, epoch, logs=None): - if requests is None: - raise ImportError('RemoteMonitor requires the `requests` library.') - logs = logs or {} - send = {} - send['epoch'] = epoch - for k, v in logs.items(): - # np.ndarray and np.generic are not scalar types - # therefore we must unwrap their scalar values and - # pass to the json-serializable dict 'send' - if isinstance(v, (np.ndarray, np.generic)): - send[k] = v.item() - else: - send[k] = v - try: - if self.send_as_json: - requests.post(self.root + self.path, json=send, headers=self.headers) - else: - requests.post( - self.root + self.path, {self.field: json.dumps(send)}, - headers=self.headers) - except requests.exceptions.RequestException: - logging.warning('Warning: could not reach RemoteMonitor ' - 'root server at ' + str(self.root)) - - -@keras_export('keras.callbacks.LearningRateScheduler') -class LearningRateScheduler(Callback): - """Learning rate scheduler. - - At the beginning of every epoch, this callback gets the updated learning rate - value from `schedule` function provided at `__init__`, with the current epoch - and current learning rate, and applies the updated learning rate - on the optimizer. - - Args: - schedule: a function that takes an epoch index (integer, indexed from 0) - and current learning rate (float) as inputs and returns a new - learning rate as output (float). - verbose: int. 0: quiet, 1: update messages. - - Example: - - >>> # This function keeps the initial learning rate for the first ten epochs - >>> # and decreases it exponentially after that. - >>> def scheduler(epoch, lr): - ... if epoch < 10: - ... return lr - ... else: - ... return lr * tf.math.exp(-0.1) - >>> - >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - >>> model.compile(tf.keras.optimizers.SGD(), loss='mse') - >>> round(model.optimizer.lr.numpy(), 5) - 0.01 - - >>> callback = tf.keras.callbacks.LearningRateScheduler(scheduler) - >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), - ... epochs=15, callbacks=[callback], verbose=0) - >>> round(model.optimizer.lr.numpy(), 5) - 0.00607 - - """ - - def __init__(self, schedule, verbose=0): - super().__init__() - self.schedule = schedule - self.verbose = verbose - - def on_epoch_begin(self, epoch, logs=None): - if not hasattr(self.model.optimizer, 'lr'): - raise ValueError('Optimizer must have a "lr" attribute.') - try: # new API - lr = float(backend.get_value(self.model.optimizer.lr)) - lr = self.schedule(epoch, lr) - except TypeError: # Support for old API for backward compatibility - lr = self.schedule(epoch) - if not isinstance(lr, (tf.Tensor, float, np.float32, np.float64)): - raise ValueError('The output of the "schedule" function ' - f'should be float. Got: {lr}') - if isinstance(lr, tf.Tensor) and not lr.dtype.is_floating: - raise ValueError( - f'The dtype of `lr` Tensor should be float. Got: {lr.dtype}') - backend.set_value(self.model.optimizer.lr, backend.get_value(lr)) - if self.verbose > 0: - io_utils.print_msg( - f'\nEpoch {epoch + 1}: LearningRateScheduler setting learning ' - f'rate to {lr}.') - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - logs['lr'] = backend.get_value(self.model.optimizer.lr) + ```python + class MyModel(tf.keras.Model): + def build(self, _): + self.dense = tf.keras.layers.Dense(10) -def keras_model_summary(name, data, step=None): - """Writes a Keras model as JSON to as a Summary. - - Writing the Keras model configuration allows the TensorBoard graph plugin to - render a conceptual graph, as opposed to graph of ops. In case the model fails - to serialize as JSON, it ignores and returns False. - - Args: - name: A name for this summary. The summary tag used for TensorBoard will be - this name prefixed by any active name scopes. - data: A Keras Model to write. - step: Explicit `int64`-castable monotonic step value for this summary. If - omitted, this defaults to `tf.summary.experimental.get_step()`, which must - not be None. - - Returns: - True on success, or False if no summary was written because no default - summary writer was available. - - Raises: - ValueError: if a default writer exists, but no step was provided and - `tf.summary.experimental.get_step()` is None. - """ - summary_metadata = tf.compat.v1.SummaryMetadata() - # Hard coding a plugin name. Please refer to go/tb-plugin-name-hardcode for - # the rationale. - summary_metadata.plugin_data.plugin_name = 'graph_keras_model' - # version number = 1 - summary_metadata.plugin_data.content = b'1' - - try: - json_string = data.to_json() - except Exception as exc: # pylint: disable=broad-except - # An exception should not break a model code. - logging.warning('Model failed to serialize as JSON. Ignoring... %s', exc) - return False - - with tf.summary.experimental.summary_scope( - name, 'graph_keras_model', [data, step]) as (tag, _): - with tf.device('cpu:0'): - tensor = tf.constant(json_string, dtype=tf.string) - return tf.summary.write( - tag=tag, tensor=tensor, step=step, metadata=summary_metadata) - - -@keras_export('keras.callbacks.TensorBoard', v1=[]) -class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): - # pylint: disable=line-too-long - """Enable visualizations for TensorBoard. - - TensorBoard is a visualization tool provided with TensorFlow. - - This callback logs events for TensorBoard, including: - - * Metrics summary plots - * Training graph visualization - * Weight histograms - * Sampled profiling - - When used in `Model.evaluate`, in addition to epoch summaries, there will be - a summary that records evaluation metrics vs `Model.optimizer.iterations` - written. The metric names will be prepended with `evaluation`, with - `Model.optimizer.iterations` being the step in the visualized TensorBoard. - - If you have installed TensorFlow with pip, you should be able - to launch TensorBoard from the command line: - - ``` - tensorboard --logdir=path_to_your_logs - ``` - - You can find more information about TensorBoard - [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard). - - Args: - log_dir: the path of the directory where to save the log files to be - parsed by TensorBoard. e.g. log_dir = os.path.join(working_dir, 'logs') - This directory should not be reused by any other callbacks. - histogram_freq: frequency (in epochs) at which to compute - weight histograms for the layers of the model. If set to 0, histograms - won't be computed. Validation data (or split) must be specified for - histogram visualizations. - write_graph: whether to visualize the graph in TensorBoard. The log file - can become quite large when write_graph is set to True. - write_images: whether to write model weights to visualize as image in - TensorBoard. - write_steps_per_second: whether to log the training steps per second into - Tensorboard. This supports both epoch and batch frequency logging. - update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`, - writes the losses and metrics to TensorBoard after each batch. The same - applies for `'epoch'`. If using an integer, let's say `1000`, the - callback will write the metrics and losses to TensorBoard every 1000 - batches. Note that writing too frequently to TensorBoard can slow down - your training. - profile_batch: Profile the batch(es) to sample compute characteristics. - profile_batch must be a non-negative integer or a tuple of integers. - A pair of positive integers signify a range of batches to profile. - By default, profiling is disabled. - embeddings_freq: frequency (in epochs) at which embedding layers will be - visualized. If set to 0, embeddings won't be visualized. - embeddings_metadata: Dictionary which maps embedding layer names to the - filename of a file in which to save metadata for the embedding layer. - In case the same metadata file is to be - used for all embedding layers, a single filename can be passed. - - Examples: - - Basic usage: - - ```python - tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs") - model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) - # Then run the tensorboard command to view the visualizations. - ``` - - Custom batch-level summaries in a subclassed Model: - - ```python - class MyModel(tf.keras.Model): - - def build(self, _): - self.dense = tf.keras.layers.Dense(10) - - def call(self, x): - outputs = self.dense(x) - tf.summary.histogram('outputs', outputs) - return outputs - - model = MyModel() - model.compile('sgd', 'mse') - - # Make sure to set `update_freq=N` to log a batch-level summary every N batches. - # In addition to any `tf.summary` contained in `Model.call`, metrics added in - # `Model.compile` will be logged every N batches. - tb_callback = tf.keras.callbacks.TensorBoard('./logs', update_freq=1) - model.fit(x_train, y_train, callbacks=[tb_callback]) - ``` - - Custom batch-level summaries in a Functional API Model: - - ```python - def my_summary(x): - tf.summary.histogram('x', x) - return x - - inputs = tf.keras.Input(10) - x = tf.keras.layers.Dense(10)(inputs) - outputs = tf.keras.layers.Lambda(my_summary)(x) - model = tf.keras.Model(inputs, outputs) - model.compile('sgd', 'mse') - - # Make sure to set `update_freq=N` to log a batch-level summary every N batches. - # In addition to any `tf.summary` contained in `Model.call`, metrics added in - # `Model.compile` will be logged every N batches. - tb_callback = tf.keras.callbacks.TensorBoard('./logs', update_freq=1) - model.fit(x_train, y_train, callbacks=[tb_callback]) - ``` - - Profiling: - - ```python - # Profile a single batch, e.g. the 5th batch. - tensorboard_callback = tf.keras.callbacks.TensorBoard( - log_dir='./logs', profile_batch=5) - model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) - - # Profile a range of batches, e.g. from 10 to 20. - tensorboard_callback = tf.keras.callbacks.TensorBoard( - log_dir='./logs', profile_batch=(10,20)) - model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) - ``` - """ - - # pylint: enable=line-too-long - - def __init__(self, - log_dir='logs', - histogram_freq=0, - write_graph=True, - write_images=False, - write_steps_per_second=False, - update_freq='epoch', - profile_batch=0, - embeddings_freq=0, - embeddings_metadata=None, - **kwargs): - super().__init__() - self._supports_tf_logs = True - self._validate_kwargs(kwargs) - - self.log_dir = io_utils.path_to_string(log_dir) - self.histogram_freq = histogram_freq - self.write_graph = write_graph - self.write_images = write_images - self.write_steps_per_second = write_steps_per_second - self.update_freq = 1 if update_freq == 'batch' else update_freq - self.embeddings_freq = embeddings_freq - self.embeddings_metadata = embeddings_metadata - self._init_profile_batch(profile_batch) - self._global_train_batch = 0 - self._previous_epoch_iterations = 0 - self._train_accumulated_time = 0 - self._batch_start_time = 0 - - # Lazily initialized in order to avoid creating event files when - # not needed. - self._writers = {} - - # Used to restore any existing `SummaryWriter` after training ends. - self._prev_summary_state = [] - - def _validate_kwargs(self, kwargs): - """Handle arguments were supported in V1.""" - if kwargs.get('write_grads', False): - logging.warning('`write_grads` will be ignored in TensorFlow 2.0 ' - 'for the `TensorBoard` Callback.') - if kwargs.get('batch_size', False): - logging.warning('`batch_size` is no longer needed in the ' - '`TensorBoard` Callback and will be ignored ' - 'in TensorFlow 2.0.') - if kwargs.get('embeddings_layer_names', False): - logging.warning('`embeddings_layer_names` is not supported in ' - 'TensorFlow 2.0. Instead, all `Embedding` layers ' - 'will be visualized.') - if kwargs.get('embeddings_data', False): - logging.warning('`embeddings_data` is not supported in TensorFlow ' - '2.0. Instead, all `Embedding` variables will be ' - 'visualized.') - - supported_kwargs = {'write_grads', 'embeddings_layer_names', - 'embeddings_data', 'batch_size'} - unrecognized_kwargs = set(kwargs.keys()) - supported_kwargs - - # Only allow kwargs that were supported in V1. - if unrecognized_kwargs: - raise ValueError( - 'Unrecognized arguments in `TensorBoard` Callback: ' - f'{unrecognized_kwargs}. Supported kwargs are: {supported_kwargs}') - - def set_model(self, model): - """Sets Keras model and writes graph if specified.""" - self.model = model - self._log_write_dir = self._get_log_write_dir() - - self._train_dir = os.path.join(self._log_write_dir, 'train') - self._train_step = self.model._train_counter # pylint: disable=protected-access - - self._val_dir = os.path.join(self._log_write_dir, 'validation') - self._val_step = self.model._test_counter # pylint: disable=protected-access - - self._writers = {} # Resets writers. - - self._should_write_train_graph = False - if self.write_graph: - self._write_keras_model_summary() - self._should_write_train_graph = True - if self.embeddings_freq: - self._configure_embeddings() - - @property - def _train_writer(self): - if 'train' not in self._writers: - self._writers['train'] = tf.summary.create_file_writer( - self._train_dir) - return self._writers['train'] - - @property - def _val_writer(self): - if 'val' not in self._writers: - self._writers['val'] = tf.summary.create_file_writer(self._val_dir) - return self._writers['val'] - - def _get_log_write_dir(self): - """For multi-worker, only chief should write, others write to '/tmp'.""" - return distributed_file_utils.write_dirpath(self.log_dir, - self.model.distribute_strategy) - - def _delete_tmp_write_dir(self): - """Deletes tmp write directories for multi-worker.""" - distributed_file_utils.remove_temp_dirpath(self.log_dir, - self.model.distribute_strategy) - - def _write_keras_model_train_graph(self): - """Writes Keras model train_function graph to TensorBoard.""" - with self._train_writer.as_default(): - with tf.summary.record_if(True): - train_fn = self.model.train_tf_function - # If the train_function is a `tf.function`, we can write out a graph - if hasattr(train_fn, 'function_spec'): - tf.summary.graph(train_fn._concrete_stateful_fn.graph) # pylint: disable=protected-access - - def _write_keras_model_summary(self): - """Writes Keras graph network summary to TensorBoard.""" - with self._train_writer.as_default(): - with tf.summary.record_if(True): - summary_writable = ( - self.model._is_graph_network or # pylint: disable=protected-access - self.model.__class__.__name__ == 'Sequential') # pylint: disable=protected-access - if summary_writable: - keras_model_summary('keras', self.model, step=0) - - def _configure_embeddings(self): - """Configure the Projector for embeddings.""" - # TODO(omalleyt): Add integration tests. - from google.protobuf import text_format - from keras.layers import core - from keras.protobuf import projector_config_pb2 - - config = projector_config_pb2.ProjectorConfig() - for layer in self.model.layers: - if isinstance(layer, core.Embedding): - embedding = config.embeddings.add() - # Embeddings are always the first layer, so this naming should be - # consistent in any keras models checkpoints. - name = 'layer_with_weights-0/embeddings/.ATTRIBUTES/VARIABLE_VALUE' - embedding.tensor_name = name - - if self.embeddings_metadata is not None: - if isinstance(self.embeddings_metadata, str): - embedding.metadata_path = self.embeddings_metadata - else: - if layer.name in self.embeddings_metadata.keys(): - embedding.metadata_path = self.embeddings_metadata.pop(layer.name) - - if self.embeddings_metadata and not isinstance(self.embeddings_metadata, - str): - raise ValueError('Unrecognized `Embedding` layer names passed to ' - '`keras.callbacks.TensorBoard` `embeddings_metadata` ' - f'argument: {self.embeddings_metadata.keys()}') - - config_pbtxt = text_format.MessageToString(config) - path = os.path.join(self._log_write_dir, 'projector_config.pbtxt') - with tf.io.gfile.GFile(path, 'w') as f: - f.write(config_pbtxt) - - def _push_writer(self, writer, step): - """Sets the default writer for custom batch-level summaries.""" - if self.update_freq == 'epoch': - return - - should_record = lambda: tf.equal(step % self.update_freq, 0) - # TODO(b/151339474): Fix deadlock when not using .value() here. - summary_context = (writer.as_default(step.value()), - tf.summary.record_if(should_record)) - self._prev_summary_state.append(summary_context) - summary_context[0].__enter__() - summary_context[1].__enter__() - - def _pop_writer(self): - """Pops the current writer.""" - if self.update_freq == 'epoch': - return - - # See _push_writer for the content of the previous_context, which is pair - # of context. - previous_context = self._prev_summary_state.pop() - previous_context[1].__exit__(*sys.exc_info()) - previous_context[0].__exit__(*sys.exc_info()) - - def _close_writers(self): - for writer in self._writers.values(): - writer.close() - - def _init_profile_batch(self, profile_batch): - """Validate profile_batch value and set the range of batches to profile. - - Sets values of _start_batch and _stop_batch attributes, - specifying the start and stop batch to profile. - Setting `profile_batch=0` disables profiling. + def call(self, x): + outputs = self.dense(x) + tf.summary.histogram('outputs', outputs) + return outputs - Args: - profile_batch: The range of batches to profile. Should be a non-negative - integer or a comma separated string of pair of positive integers. A pair - of positive integers signify a range of batches to profile. + model = MyModel() + model.compile('sgd', 'mse') - Raises: - ValueError: If profile_batch is not an integer or a comma separated pair - of positive integers. + # Make sure to set `update_freq=N` to log a batch-level summary every N + # batches. In addition to any `tf.summary` contained in `Model.call`, + # metrics added in `Model.compile` will be logged every N batches. + tb_callback = tf.keras.callbacks.TensorBoard('./logs', update_freq=1) + model.fit(x_train, y_train, callbacks=[tb_callback]) + ``` - """ - profile_batch_error_message = ( - 'profile_batch must be a non-negative integer or 2-tuple of positive ' - 'integers. A pair of positive integers signifies a range of batches ' - f'to profile. Found: {profile_batch}') - - # Support legacy way of specifying "start,stop" or "start" as str. - if isinstance(profile_batch, str): - profile_batch = str(profile_batch).split(',') - profile_batch = tf.nest.map_structure(int, profile_batch) - - if isinstance(profile_batch, int): - self._start_batch = profile_batch - self._stop_batch = profile_batch - elif isinstance(profile_batch, (tuple, list)) and len(profile_batch) == 2: - self._start_batch, self._stop_batch = profile_batch - else: - raise ValueError(profile_batch_error_message) - - if self._start_batch < 0 or self._stop_batch < self._start_batch: - raise ValueError(profile_batch_error_message) - - # True when the profiler was successfully started by this callback. - # We track the status here to make sure callbacks do not interfere with - # each other. The callback will only stop the profiler it started. - self._profiler_started = False - if self._start_batch > 0: - # Warm up and improve the profiling accuracy. - self._start_profiler(logdir='') - self._stop_profiler(save=False) - # True when a trace is running. - self._is_tracing = False - - # Setting `profile_batch=0` disables profiling. - self._should_trace = not (self._start_batch == 0 and self._stop_batch == 0) - - def on_train_begin(self, logs=None): - self._global_train_batch = 0 - self._previous_epoch_iterations = 0 - self._push_writer(self._train_writer, self._train_step) - - def on_train_end(self, logs=None): - self._pop_writer() - - if self._is_tracing: - self._stop_trace() - - self._close_writers() - self._delete_tmp_write_dir() - - def on_test_begin(self, logs=None): - self._push_writer(self._val_writer, self._val_step) - - def on_test_end(self, logs=None): - if self.model.optimizer and hasattr(self.model.optimizer, 'iterations'): - with tf.summary.record_if(True), self._val_writer.as_default(): - for name, value in logs.items(): - tf.summary.scalar( - 'evaluation_' + name + '_vs_iterations', - value, - step=self.model.optimizer.iterations.read_value()) - self._pop_writer() - - def _implements_train_batch_hooks(self): - # Only call batch hooks when tracing or write_steps_per_second are enabled - return self._should_trace or self.write_steps_per_second - - def on_train_batch_begin(self, batch, logs=None): - self._global_train_batch += 1 - if self.write_steps_per_second: - self._batch_start_time = time.time() - if not self._should_trace: - return - - if self._global_train_batch == self._start_batch: - self._start_trace() - - def on_train_batch_end(self, batch, logs=None): - if self._should_write_train_graph: - self._write_keras_model_train_graph() - self._should_write_train_graph = False - if self.write_steps_per_second: - batch_run_time = time.time() - self._batch_start_time - tf.summary.scalar( - 'batch_steps_per_second', 1. / batch_run_time, step=self._train_step) - if not self._should_trace: - return - - if self._is_tracing and self._global_train_batch >= self._stop_batch: - self._stop_trace() - - def on_epoch_begin(self, epoch, logs=None): - # Keeps track of epoch for profiling. - if self.write_steps_per_second: - self._previous_epoch_iterations = self.model.optimizer.iterations.numpy() - self._epoch_start_time = time.time() - - def on_epoch_end(self, epoch, logs=None): - """Runs metrics and histogram summaries at epoch end.""" - self._log_epoch_metrics(epoch, logs) - - if self.histogram_freq and epoch % self.histogram_freq == 0: - self._log_weights(epoch) - - if self.embeddings_freq and epoch % self.embeddings_freq == 0: - self._log_embeddings(epoch) - - def _start_trace(self): - tf.summary.trace_on(graph=True, profiler=False) - self._start_profiler(logdir=self.log_dir) - self._is_tracing = True - - def _stop_trace(self, batch=None): - """Logs the trace graph to TensorBoard.""" - if batch is None: - batch = self._stop_batch - with self._train_writer.as_default(): - with tf.summary.record_if(True): - # TODO(b/126388999): Remove step info in the summary name. - tf.summary.trace_export(name='batch_%d' % batch, step=batch) - self._stop_profiler() - self._is_tracing = False - - def _collect_learning_rate(self, logs): - lr_schedule = getattr(self.model.optimizer, 'lr', None) - if isinstance(lr_schedule, learning_rate_schedule.LearningRateSchedule): - logs['learning_rate'] = lr_schedule(self.model.optimizer.iterations) - return logs + Custom batch-level summaries in a Functional API Model: - def _compute_steps_per_second(self): - current_iteration = self.model.optimizer.iterations.numpy() - time_since_epoch_begin = time.time() - self._epoch_start_time - steps_per_second = ((current_iteration - self._previous_epoch_iterations) / - time_since_epoch_begin) - return steps_per_second + ```python + def my_summary(x): + tf.summary.histogram('x', x) + return x + + inputs = tf.keras.Input(10) + x = tf.keras.layers.Dense(10)(inputs) + outputs = tf.keras.layers.Lambda(my_summary)(x) + model = tf.keras.Model(inputs, outputs) + model.compile('sgd', 'mse') + + # Make sure to set `update_freq=N` to log a batch-level summary every N + # batches. In addition to any `tf.summary` contained in `Model.call`, + # metrics added in `Model.compile` will be logged every N batches. + tb_callback = tf.keras.callbacks.TensorBoard('./logs', update_freq=1) + model.fit(x_train, y_train, callbacks=[tb_callback]) + ``` - def _log_epoch_metrics(self, epoch, logs): - """Writes epoch metrics out as scalar summaries. + Profiling: - Args: - epoch: Int. The global step to use for TensorBoard. - logs: Dict. Keys are scalar summary names, values are scalars. + ```python + # Profile a single batch, e.g. the 5th batch. + tensorboard_callback = tf.keras.callbacks.TensorBoard( + log_dir='./logs', profile_batch=5) + model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) + + # Profile a range of batches, e.g. from 10 to 20. + tensorboard_callback = tf.keras.callbacks.TensorBoard( + log_dir='./logs', profile_batch=(10,20)) + model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) + ``` """ - if not logs: - return - train_logs = {k: v for k, v in logs.items() if not k.startswith('val_')} - val_logs = {k: v for k, v in logs.items() if k.startswith('val_')} - train_logs = self._collect_learning_rate(train_logs) - if self.write_steps_per_second: - train_logs['steps_per_second'] = self._compute_steps_per_second() - - with tf.summary.record_if(True): - if train_logs: + def __init__( + self, + log_dir="logs", + histogram_freq=0, + write_graph=True, + write_images=False, + write_steps_per_second=False, + update_freq="epoch", + profile_batch=0, + embeddings_freq=0, + embeddings_metadata=None, + **kwargs, + ): + super().__init__() + self._supports_tf_logs = True + self._validate_kwargs(kwargs) + + self.log_dir = io_utils.path_to_string(log_dir) + self.histogram_freq = histogram_freq + self.write_graph = write_graph + self.write_images = write_images + self.write_steps_per_second = write_steps_per_second + self.update_freq = 1 if update_freq == "batch" else update_freq + self.embeddings_freq = embeddings_freq + self.embeddings_metadata = embeddings_metadata + self._init_profile_batch(profile_batch) + self._global_train_batch = 0 + self._previous_epoch_iterations = 0 + self._train_accumulated_time = 0 + self._batch_start_time = 0 + + # Lazily initialized in order to avoid creating event files when + # not needed. + self._writers = {} + + # Used to restore any existing `SummaryWriter` after training ends. + self._prev_summary_state = [] + + def _validate_kwargs(self, kwargs): + """Handle arguments were supported in V1.""" + if kwargs.get("write_grads", False): + logging.warning( + "`write_grads` will be ignored in TensorFlow 2.0 " + "for the `TensorBoard` Callback." + ) + if kwargs.get("batch_size", False): + logging.warning( + "`batch_size` is no longer needed in the " + "`TensorBoard` Callback and will be ignored " + "in TensorFlow 2.0." + ) + if kwargs.get("embeddings_layer_names", False): + logging.warning( + "`embeddings_layer_names` is not supported in " + "TensorFlow 2.0. Instead, all `Embedding` layers " + "will be visualized." + ) + if kwargs.get("embeddings_data", False): + logging.warning( + "`embeddings_data` is not supported in TensorFlow " + "2.0. Instead, all `Embedding` variables will be " + "visualized." + ) + + supported_kwargs = { + "write_grads", + "embeddings_layer_names", + "embeddings_data", + "batch_size", + } + unrecognized_kwargs = set(kwargs.keys()) - supported_kwargs + + # Only allow kwargs that were supported in V1. + if unrecognized_kwargs: + raise ValueError( + "Unrecognized arguments in `TensorBoard` Callback: " + f"{unrecognized_kwargs}. " + f"Supported kwargs are: {supported_kwargs}" + ) + + def set_model(self, model): + """Sets Keras model and writes graph if specified.""" + self.model = model + self._log_write_dir = self._get_log_write_dir() + + self._train_dir = os.path.join(self._log_write_dir, "train") + self._train_step = self.model._train_counter + + self._val_dir = os.path.join(self._log_write_dir, "validation") + self._val_step = self.model._test_counter + + self._writers = {} # Resets writers. + + self._should_write_train_graph = False + if self.write_graph: + self._write_keras_model_summary() + self._should_write_train_graph = True + if self.embeddings_freq: + self._configure_embeddings() + + @property + def _train_writer(self): + if "train" not in self._writers: + self._writers["train"] = tf.summary.create_file_writer( + self._train_dir + ) + return self._writers["train"] + + @property + def _val_writer(self): + if "val" not in self._writers: + self._writers["val"] = tf.summary.create_file_writer(self._val_dir) + return self._writers["val"] + + def _get_log_write_dir(self): + """For multi-worker, only chief should write, others write to '/tmp'.""" + return distributed_file_utils.write_dirpath( + self.log_dir, self.model.distribute_strategy + ) + + def _delete_tmp_write_dir(self): + """Deletes tmp write directories for multi-worker.""" + distributed_file_utils.remove_temp_dirpath( + self.log_dir, self.model.distribute_strategy + ) + + def _write_keras_model_train_graph(self): + """Writes Keras model train_function graph to TensorBoard.""" + with self._train_writer.as_default(): + with tf.summary.record_if(True): + train_fn = self.model.train_tf_function + # If the train_function is a `tf.function`, we can write out a + # graph + if hasattr(train_fn, "function_spec"): + tf.summary.graph( + train_fn._concrete_variable_creation_fn.graph + ) + + def _write_keras_model_summary(self): + """Writes Keras graph network summary to TensorBoard.""" with self._train_writer.as_default(): - for name, value in train_logs.items(): - tf.summary.scalar('epoch_' + name, value, step=epoch) - if val_logs: - with self._val_writer.as_default(): - for name, value in val_logs.items(): - name = name[4:] # Remove 'val_' prefix. - tf.summary.scalar('epoch_' + name, value, step=epoch) - - def _log_weights(self, epoch): - """Logs the weights of the Model to TensorBoard.""" - with self._train_writer.as_default(): - with tf.summary.record_if(True): + with tf.summary.record_if(True): + summary_writable = ( + self.model._is_graph_network + or self.model.__class__.__name__ == "Sequential" + ) + if summary_writable: + keras_model_summary("keras", self.model, step=0) + + def _configure_embeddings(self): + """Configure the Projector for embeddings.""" + # TODO(omalleyt): Add integration tests. + from keras.layers import core + from keras.protobuf import projector_config_pb2 + + # isort: off + from google.protobuf import text_format + + config = projector_config_pb2.ProjectorConfig() for layer in self.model.layers: - for weight in layer.weights: - weight_name = weight.name.replace(':', '_') - # Add a suffix to prevent summary tag name collision. - histogram_weight_name = weight_name + '/histogram' - tf.summary.histogram(histogram_weight_name, weight, step=epoch) - if self.write_images: - # Add a suffix to prevent summary tag name collision. - image_weight_name = weight_name + '/image' - self._log_weight_as_image(weight, image_weight_name, epoch) - self._train_writer.flush() - - def _log_weight_as_image(self, weight, weight_name, epoch): - """Logs a weight as a TensorBoard image.""" - w_img = tf.squeeze(weight) - shape = backend.int_shape(w_img) - if len(shape) == 1: # Bias case - w_img = tf.reshape(w_img, [1, shape[0], 1, 1]) - elif len(shape) == 2: # Dense layer kernel case - if shape[0] > shape[1]: - w_img = tf.transpose(w_img) + if isinstance(layer, core.Embedding): + embedding = config.embeddings.add() + # Embeddings are always the first layer, so this naming should + # be consistent in any keras models checkpoints. + name = ( + "layer_with_weights-0/embeddings/.ATTRIBUTES/VARIABLE_VALUE" + ) + embedding.tensor_name = name + + if self.embeddings_metadata is not None: + if isinstance(self.embeddings_metadata, str): + embedding.metadata_path = self.embeddings_metadata + else: + if layer.name in self.embeddings_metadata.keys(): + embedding.metadata_path = ( + self.embeddings_metadata.pop(layer.name) + ) + + if self.embeddings_metadata and not isinstance( + self.embeddings_metadata, str + ): + raise ValueError( + "Unrecognized `Embedding` layer names passed to " + "`keras.callbacks.TensorBoard` `embeddings_metadata` " + f"argument: {self.embeddings_metadata.keys()}" + ) + + config_pbtxt = text_format.MessageToString(config) + path = os.path.join(self._log_write_dir, "projector_config.pbtxt") + with tf.io.gfile.GFile(path, "w") as f: + f.write(config_pbtxt) + + def _push_writer(self, writer, step): + """Sets the default writer for custom batch-level summaries.""" + if self.update_freq == "epoch": + return + + should_record = lambda: tf.equal(step % self.update_freq, 0) + # TODO(b/151339474): Fix deadlock when not using .value() here. + summary_context = ( + writer.as_default(step.value()), + tf.summary.record_if(should_record), + ) + self._prev_summary_state.append(summary_context) + summary_context[0].__enter__() + summary_context[1].__enter__() + + def _pop_writer(self): + """Pops the current writer.""" + if self.update_freq == "epoch": + return + + # See _push_writer for the content of the previous_context, which is + # pair of context. + previous_context = self._prev_summary_state.pop() + previous_context[1].__exit__(*sys.exc_info()) + previous_context[0].__exit__(*sys.exc_info()) + + def _close_writers(self): + for writer in self._writers.values(): + writer.close() + + def _init_profile_batch(self, profile_batch): + """Validate profile_batch value and set the range of batches to profile. + + Sets values of _start_batch and _stop_batch attributes, + specifying the start and stop batch to profile. + Setting `profile_batch=0` disables profiling. + + Args: + profile_batch: The range of batches to profile. Should be a + non-negative integer or a comma separated string of pair of positive + integers. A pair of positive integers signify a range of batches to + profile. + + Raises: + ValueError: If profile_batch is not an integer or a comma separated + pair of positive integers. + + """ + profile_batch_error_message = ( + "profile_batch must be a non-negative integer or " + "2-tuple of positive " + "integers. A pair of positive integers " + "signifies a range of batches " + f"to profile. Found: {profile_batch}" + ) + + # Support legacy way of specifying "start,stop" or "start" as str. + if isinstance(profile_batch, str): + profile_batch = str(profile_batch).split(",") + profile_batch = tf.nest.map_structure(int, profile_batch) + + if isinstance(profile_batch, int): + self._start_batch = profile_batch + self._stop_batch = profile_batch + elif ( + isinstance(profile_batch, (tuple, list)) and len(profile_batch) == 2 + ): + self._start_batch, self._stop_batch = profile_batch + else: + raise ValueError(profile_batch_error_message) + + if self._start_batch < 0 or self._stop_batch < self._start_batch: + raise ValueError(profile_batch_error_message) + + # True when the profiler was successfully started by this callback. + # We track the status here to make sure callbacks do not interfere with + # each other. The callback will only stop the profiler it started. + self._profiler_started = False + if self._start_batch > 0: + # Warm up and improve the profiling accuracy. + self._start_profiler(logdir="") + self._stop_profiler(save=False) + # True when a trace is running. + self._is_tracing = False + + # Setting `profile_batch=0` disables profiling. + self._should_trace = not ( + self._start_batch == 0 and self._stop_batch == 0 + ) + + def on_train_begin(self, logs=None): + self._global_train_batch = 0 + self._previous_epoch_iterations = 0 + self._push_writer(self._train_writer, self._train_step) + + def on_train_end(self, logs=None): + self._pop_writer() + + if self._is_tracing: + self._stop_trace() + + self._close_writers() + self._delete_tmp_write_dir() + + def on_test_begin(self, logs=None): + self._push_writer(self._val_writer, self._val_step) + + def on_test_end(self, logs=None): + if self.model.optimizer and hasattr(self.model.optimizer, "iterations"): + with tf.summary.record_if(True), self._val_writer.as_default(): + for name, value in logs.items(): + tf.summary.scalar( + "evaluation_" + name + "_vs_iterations", + value, + step=self.model.optimizer.iterations.read_value(), + ) + self._pop_writer() + + def _implements_train_batch_hooks(self): + # Only call batch hooks when tracing or write_steps_per_second are + # enabled + return self._should_trace or self.write_steps_per_second + + def on_train_batch_begin(self, batch, logs=None): + self._global_train_batch += 1 + if self.write_steps_per_second: + self._batch_start_time = time.time() + if not self._should_trace: + return + + if self._global_train_batch == self._start_batch: + self._start_trace() + + def on_train_batch_end(self, batch, logs=None): + if self._should_write_train_graph: + self._write_keras_model_train_graph() + self._should_write_train_graph = False + if self.write_steps_per_second: + batch_run_time = time.time() - self._batch_start_time + tf.summary.scalar( + "batch_steps_per_second", + 1.0 / batch_run_time, + step=self._train_step, + ) + + # `logs` isn't necessarily always a dict. For example, when using + # `tf.distribute.experimental.ParameterServerStrategy`, a + # `tf.distribute.experimental.coordinator.RemoteValue` will be passed. + # For now, we just disable `update_freq` in those cases. + if isinstance(logs, dict): + for name, value in logs.items(): + tf.summary.scalar("batch_" + name, value, step=self._train_step) + + if not self._should_trace: + return + + if self._is_tracing and self._global_train_batch >= self._stop_batch: + self._stop_trace() + + def on_epoch_begin(self, epoch, logs=None): + # Keeps track of epoch for profiling. + if self.write_steps_per_second: + self._previous_epoch_iterations = ( + self.model.optimizer.iterations.numpy() + ) + self._epoch_start_time = time.time() + + def on_epoch_end(self, epoch, logs=None): + """Runs metrics and histogram summaries at epoch end.""" + self._log_epoch_metrics(epoch, logs) + + if self.histogram_freq and epoch % self.histogram_freq == 0: + self._log_weights(epoch) + + if self.embeddings_freq and epoch % self.embeddings_freq == 0: + self._log_embeddings(epoch) + + def _start_trace(self): + tf.summary.trace_on(graph=True, profiler=False) + self._start_profiler(logdir=self.log_dir) + self._is_tracing = True + + def _stop_trace(self, batch=None): + """Logs the trace graph to TensorBoard.""" + if batch is None: + batch = self._stop_batch + with self._train_writer.as_default(): + with tf.summary.record_if(True): + # TODO(b/126388999): Remove step info in the summary name. + tf.summary.trace_export(name="batch_%d" % batch, step=batch) + self._stop_profiler() + self._is_tracing = False + + def _collect_learning_rate(self, logs): + if isinstance(self.model.optimizer, optimizer.Optimizer): + lr_schedule = getattr(self.model.optimizer, "_learning_rate", None) + else: + lr_schedule = getattr(self.model.optimizer, "lr", None) + if isinstance(lr_schedule, learning_rate_schedule.LearningRateSchedule): + logs["learning_rate"] = lr_schedule(self.model.optimizer.iterations) + return logs + + def _compute_steps_per_second(self): + current_iteration = self.model.optimizer.iterations.numpy() + time_since_epoch_begin = time.time() - self._epoch_start_time + steps_per_second = ( + current_iteration - self._previous_epoch_iterations + ) / time_since_epoch_begin + return steps_per_second + + def _log_epoch_metrics(self, epoch, logs): + """Writes epoch metrics out as scalar summaries. + + Args: + epoch: Int. The global step to use for TensorBoard. + logs: Dict. Keys are scalar summary names, values are scalars. + """ + if not logs: + return + + train_logs = dict() + val_logs = dict() + for k, v in logs.items(): + if k.startswith("val_"): + val_logs[k] = v + else: + train_logs[k] = v + + train_logs = self._collect_learning_rate(train_logs) + if self.write_steps_per_second: + train_logs["steps_per_second"] = self._compute_steps_per_second() + + with tf.summary.record_if(True): + if train_logs: + with self._train_writer.as_default(): + for name, value in train_logs.items(): + tf.summary.scalar("epoch_" + name, value, step=epoch) + if val_logs: + with self._val_writer.as_default(): + for name, value in val_logs.items(): + name = name[4:] # Remove 'val_' prefix. + tf.summary.scalar("epoch_" + name, value, step=epoch) + + def _log_weights(self, epoch): + """Logs the weights of the Model to TensorBoard.""" + with self._train_writer.as_default(): + with tf.summary.record_if(True): + for layer in self.model.layers: + for weight in layer.weights: + weight_name = weight.name.replace(":", "_") + # Add a suffix to prevent summary tag name collision. + histogram_weight_name = weight_name + "/histogram" + tf.summary.histogram( + histogram_weight_name, weight, step=epoch + ) + if self.write_images: + # Add a suffix to prevent summary tag name + # collision. + image_weight_name = weight_name + "/image" + self._log_weight_as_image( + weight, image_weight_name, epoch + ) + self._train_writer.flush() + + def _log_weight_as_image(self, weight, weight_name, epoch): + """Logs a weight as a TensorBoard image.""" + w_img = tf.squeeze(weight) shape = backend.int_shape(w_img) - w_img = tf.reshape(w_img, [1, shape[0], shape[1], 1]) - elif len(shape) == 3: # ConvNet case - if backend.image_data_format() == 'channels_last': - # Switch to channels_first to display every kernel as a separate - # image. - w_img = tf.transpose(w_img, perm=[2, 0, 1]) + if len(shape) == 1: # Bias case + w_img = tf.reshape(w_img, [1, shape[0], 1, 1]) + elif len(shape) == 2: # Dense layer kernel case + if shape[0] > shape[1]: + w_img = tf.transpose(w_img) + shape = backend.int_shape(w_img) + w_img = tf.reshape(w_img, [1, shape[0], shape[1], 1]) + elif len(shape) == 3: # ConvNet case + if backend.image_data_format() == "channels_last": + # Switch to channels_first to display every kernel as a separate + # image. + w_img = tf.transpose(w_img, perm=[2, 0, 1]) + shape = backend.int_shape(w_img) + w_img = tf.reshape(w_img, [shape[0], shape[1], shape[2], 1]) + shape = backend.int_shape(w_img) - w_img = tf.reshape(w_img, [shape[0], shape[1], shape[2], 1]) + # Not possible to handle 3D convnets etc. + if len(shape) == 4 and shape[-1] in [1, 3, 4]: + tf.summary.image(weight_name, w_img, step=epoch) + + def _log_embeddings(self, epoch): + embeddings_ckpt = os.path.join( + self._log_write_dir, + "train", + f"keras_embedding.ckpt-{epoch}", + ) + self.model.save_weights(embeddings_ckpt) + + def _start_profiler(self, logdir): + """Starts the profiler if currently inactive. + + Args: + logdir: Directory where profiler results will be saved. + """ + if self._profiler_started: + return + try: + tf.profiler.experimental.start(logdir=logdir) + self._profiler_started = True + except tf.errors.AlreadyExistsError as e: + # Profiler errors should not be fatal. + logging.error("Failed to start profiler: %s", e.message) + + def _stop_profiler(self, save=True): + """Stops the profiler if currently active. + + Args: + save: Whether to save the profiler results to TensorBoard. + """ + if not self._profiler_started: + return + try: + tf.profiler.experimental.stop(save=save) + except tf.errors.UnavailableError as e: + # Profiler errors should not be fatal. + logging.error("Failed to stop profiler: %s", e.message) + finally: + self._profiler_started = False - shape = backend.int_shape(w_img) - # Not possible to handle 3D convnets etc. - if len(shape) == 4 and shape[-1] in [1, 3, 4]: - tf.summary.image(weight_name, w_img, step=epoch) - def _log_embeddings(self, epoch): - embeddings_ckpt = os.path.join(self._log_write_dir, 'train', - 'keras_embedding.ckpt-{}'.format(epoch)) - self.model.save_weights(embeddings_ckpt) +@keras_export("keras.callbacks.ReduceLROnPlateau") +class ReduceLROnPlateau(Callback): + """Reduce learning rate when a metric has stopped improving. - def _start_profiler(self, logdir): - """Starts the profiler if currently inactive. + Models often benefit from reducing the learning rate by a factor + of 2-10 once learning stagnates. This callback monitors a + quantity and if no improvement is seen for a 'patience' number + of epochs, the learning rate is reduced. - Args: - logdir: Directory where profiler results will be saved. - """ - if self._profiler_started: - return - try: - tf.profiler.experimental.start(logdir=logdir) - self._profiler_started = True - except tf.errors.AlreadyExistsError as e: - # Profiler errors should not be fatal. - logging.error('Failed to start profiler: %s', e.message) + Example: - def _stop_profiler(self, save=True): - """Stops the profiler if currently active. + ```python + reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, + patience=5, min_lr=0.001) + model.fit(X_train, Y_train, callbacks=[reduce_lr]) + ``` Args: - save: Whether to save the profiler results to TensorBoard. + monitor: quantity to be monitored. + factor: factor by which the learning rate will be reduced. + `new_lr = lr * factor`. + patience: number of epochs with no improvement after which learning rate + will be reduced. + verbose: int. 0: quiet, 1: update messages. + mode: one of `{'auto', 'min', 'max'}`. In `'min'` mode, + the learning rate will be reduced when the + quantity monitored has stopped decreasing; in `'max'` mode it will be + reduced when the quantity monitored has stopped increasing; in + `'auto'` mode, the direction is automatically inferred from the name + of the monitored quantity. + min_delta: threshold for measuring the new optimum, to only focus on + significant changes. + cooldown: number of epochs to wait before resuming normal operation + after lr has been reduced. + min_lr: lower bound on the learning rate. """ - if not self._profiler_started: - return - try: - tf.profiler.experimental.stop(save=save) - except tf.errors.UnavailableError as e: - # Profiler errors should not be fatal. - logging.error('Failed to stop profiler: %s', e.message) - finally: - self._profiler_started = False + def __init__( + self, + monitor="val_loss", + factor=0.1, + patience=10, + verbose=0, + mode="auto", + min_delta=1e-4, + cooldown=0, + min_lr=0, + **kwargs, + ): + super().__init__() + + self.monitor = monitor + if factor >= 1.0: + raise ValueError( + "ReduceLROnPlateau does not support " + f"a factor >= 1.0. Got {factor}" + ) + if "epsilon" in kwargs: + min_delta = kwargs.pop("epsilon") + logging.warning( + "`epsilon` argument is deprecated and " + "will be removed, use `min_delta` instead." + ) + self.factor = factor + self.min_lr = min_lr + self.min_delta = min_delta + self.patience = patience + self.verbose = verbose + self.cooldown = cooldown + self.cooldown_counter = 0 # Cooldown counter. + self.wait = 0 + self.best = 0 + self.mode = mode + self.monitor_op = None + self._reset() + + def _reset(self): + """Resets wait counter and cooldown counter.""" + if self.mode not in ["auto", "min", "max"]: + logging.warning( + "Learning rate reduction mode %s is unknown, " + "fallback to auto mode.", + self.mode, + ) + self.mode = "auto" + if self.mode == "min" or ( + self.mode == "auto" and "acc" not in self.monitor + ): + self.monitor_op = lambda a, b: np.less(a, b - self.min_delta) + self.best = np.Inf + else: + self.monitor_op = lambda a, b: np.greater(a, b + self.min_delta) + self.best = -np.Inf + self.cooldown_counter = 0 + self.wait = 0 -@keras_export('keras.callbacks.ReduceLROnPlateau') -class ReduceLROnPlateau(Callback): - """Reduce learning rate when a metric has stopped improving. - - Models often benefit from reducing the learning rate by a factor - of 2-10 once learning stagnates. This callback monitors a - quantity and if no improvement is seen for a 'patience' number - of epochs, the learning rate is reduced. - - Example: - - ```python - reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, - patience=5, min_lr=0.001) - model.fit(X_train, Y_train, callbacks=[reduce_lr]) - ``` - - Args: - monitor: quantity to be monitored. - factor: factor by which the learning rate will be reduced. - `new_lr = lr * factor`. - patience: number of epochs with no improvement after which learning rate - will be reduced. - verbose: int. 0: quiet, 1: update messages. - mode: one of `{'auto', 'min', 'max'}`. In `'min'` mode, - the learning rate will be reduced when the - quantity monitored has stopped decreasing; in `'max'` mode it will be - reduced when the quantity monitored has stopped increasing; in `'auto'` - mode, the direction is automatically inferred from the name of the - monitored quantity. - min_delta: threshold for measuring the new optimum, to only focus on - significant changes. - cooldown: number of epochs to wait before resuming normal operation after - lr has been reduced. - min_lr: lower bound on the learning rate. - """ - - def __init__(self, - monitor='val_loss', - factor=0.1, - patience=10, - verbose=0, - mode='auto', - min_delta=1e-4, - cooldown=0, - min_lr=0, - **kwargs): - super().__init__() - - self.monitor = monitor - if factor >= 1.0: - raise ValueError( - f'ReduceLROnPlateau does not support a factor >= 1.0. Got {factor}') - if 'epsilon' in kwargs: - min_delta = kwargs.pop('epsilon') - logging.warning('`epsilon` argument is deprecated and ' - 'will be removed, use `min_delta` instead.') - self.factor = factor - self.min_lr = min_lr - self.min_delta = min_delta - self.patience = patience - self.verbose = verbose - self.cooldown = cooldown - self.cooldown_counter = 0 # Cooldown counter. - self.wait = 0 - self.best = 0 - self.mode = mode - self.monitor_op = None - self._reset() - - def _reset(self): - """Resets wait counter and cooldown counter. - """ - if self.mode not in ['auto', 'min', 'max']: - logging.warning('Learning rate reduction mode %s is unknown, ' - 'fallback to auto mode.', self.mode) - self.mode = 'auto' - if (self.mode == 'min' or - (self.mode == 'auto' and 'acc' not in self.monitor)): - self.monitor_op = lambda a, b: np.less(a, b - self.min_delta) - self.best = np.Inf - else: - self.monitor_op = lambda a, b: np.greater(a, b + self.min_delta) - self.best = -np.Inf - self.cooldown_counter = 0 - self.wait = 0 - - def on_train_begin(self, logs=None): - self._reset() - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - logs['lr'] = backend.get_value(self.model.optimizer.lr) - current = logs.get(self.monitor) - if current is None: - logging.warning('Learning rate reduction is conditioned on metric `%s` ' - 'which is not available. Available metrics are: %s', - self.monitor, ','.join(list(logs.keys()))) + def on_train_begin(self, logs=None): + self._reset() + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + logs["lr"] = backend.get_value(self.model.optimizer.lr) + current = logs.get(self.monitor) + if current is None: + logging.warning( + "Learning rate reduction is conditioned on metric `%s` " + "which is not available. Available metrics are: %s", + self.monitor, + ",".join(list(logs.keys())), + ) - else: - if self.in_cooldown(): - self.cooldown_counter -= 1 - self.wait = 0 + else: + if self.in_cooldown(): + self.cooldown_counter -= 1 + self.wait = 0 - if self.monitor_op(current, self.best): - self.best = current - self.wait = 0 - elif not self.in_cooldown(): - self.wait += 1 - if self.wait >= self.patience: - old_lr = backend.get_value(self.model.optimizer.lr) - if old_lr > np.float32(self.min_lr): - new_lr = old_lr * self.factor - new_lr = max(new_lr, self.min_lr) - backend.set_value(self.model.optimizer.lr, new_lr) - if self.verbose > 0: - io_utils.print_msg( - f'\nEpoch {epoch +1}: ' - f'ReduceLROnPlateau reducing learning rate to {new_lr}.') - self.cooldown_counter = self.cooldown - self.wait = 0 - - def in_cooldown(self): - return self.cooldown_counter > 0 - - -@keras_export('keras.callbacks.CSVLogger') + if self.monitor_op(current, self.best): + self.best = current + self.wait = 0 + elif not self.in_cooldown(): + self.wait += 1 + if self.wait >= self.patience: + old_lr = backend.get_value(self.model.optimizer.lr) + if old_lr > np.float32(self.min_lr): + new_lr = old_lr * self.factor + new_lr = max(new_lr, self.min_lr) + backend.set_value(self.model.optimizer.lr, new_lr) + if self.verbose > 0: + io_utils.print_msg( + f"\nEpoch {epoch +1}: " + "ReduceLROnPlateau reducing " + f"learning rate to {new_lr}." + ) + self.cooldown_counter = self.cooldown + self.wait = 0 + + def in_cooldown(self): + return self.cooldown_counter > 0 + + +@keras_export("keras.callbacks.CSVLogger") class CSVLogger(Callback): - """Callback that streams epoch results to a CSV file. - - Supports all values that can be represented as a string, - including 1D iterables such as `np.ndarray`. - - Example: - - ```python - csv_logger = CSVLogger('training.log') - model.fit(X_train, Y_train, callbacks=[csv_logger]) - ``` - - Args: - filename: Filename of the CSV file, e.g. `'run/log.csv'`. - separator: String used to separate elements in the CSV file. - append: Boolean. True: append if file exists (useful for continuing - training). False: overwrite existing file. - """ - - def __init__(self, filename, separator=',', append=False): - self.sep = separator - self.filename = io_utils.path_to_string(filename) - self.append = append - self.writer = None - self.keys = None - self.append_header = True - super().__init__() - - def on_train_begin(self, logs=None): - if self.append: - if tf.io.gfile.exists(self.filename): - with tf.io.gfile.GFile(self.filename, 'r') as f: - self.append_header = not bool(len(f.readline())) - mode = 'a' - else: - mode = 'w' - self.csv_file = tf.io.gfile.GFile(self.filename, mode) + """Callback that streams epoch results to a CSV file. - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} + Supports all values that can be represented as a string, + including 1D iterables such as `np.ndarray`. - def handle_value(k): - is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0 - if isinstance(k, str): - return k - elif isinstance(k, collections.abc.Iterable) and not is_zero_dim_ndarray: - return '"[%s]"' % (', '.join(map(str, k))) - else: - return k + Example: - if self.keys is None: - self.keys = sorted(logs.keys()) - - if self.model.stop_training: - # We set NA so that csv parsers do not fail for this last epoch. - logs = dict((k, logs[k]) if k in logs else (k, 'NA') for k in self.keys) + ```python + csv_logger = CSVLogger('training.log') + model.fit(X_train, Y_train, callbacks=[csv_logger]) + ``` - if not self.writer: + Args: + filename: Filename of the CSV file, e.g. `'run/log.csv'`. + separator: String used to separate elements in the CSV file. + append: Boolean. True: append if file exists (useful for continuing + training). False: overwrite existing file. + """ - class CustomDialect(csv.excel): - delimiter = self.sep + def __init__(self, filename, separator=",", append=False): + self.sep = separator + self.filename = io_utils.path_to_string(filename) + self.append = append + self.writer = None + self.keys = None + self.append_header = True + super().__init__() + + def on_train_begin(self, logs=None): + if self.append: + if tf.io.gfile.exists(self.filename): + with tf.io.gfile.GFile(self.filename, "r") as f: + self.append_header = not bool(len(f.readline())) + mode = "a" + else: + mode = "w" + self.csv_file = tf.io.gfile.GFile(self.filename, mode) + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + + def handle_value(k): + is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0 + if isinstance(k, str): + return k + elif ( + isinstance(k, collections.abc.Iterable) + and not is_zero_dim_ndarray + ): + return f"\"[{', '.join(map(str, k))}]\"" + else: + return k + + if self.keys is None: + self.keys = sorted(logs.keys()) + # When validation_freq > 1, `val_` keys are not in first epoch logs + # Add the `val_` keys so that its part of the fieldnames of writer. + val_keys_found = False + for key in self.keys: + if key.startswith("val_"): + val_keys_found = True + break + if not val_keys_found: + self.keys.extend(["val_" + k for k in self.keys]) + + if not self.writer: + + class CustomDialect(csv.excel): + delimiter = self.sep + + fieldnames = ["epoch"] + self.keys + + self.writer = csv.DictWriter( + self.csv_file, fieldnames=fieldnames, dialect=CustomDialect + ) + if self.append_header: + self.writer.writeheader() + + row_dict = collections.OrderedDict({"epoch": epoch}) + row_dict.update( + (key, handle_value(logs.get(key, "NA"))) for key in self.keys + ) + self.writer.writerow(row_dict) + self.csv_file.flush() + + def on_train_end(self, logs=None): + self.csv_file.close() + self.writer = None + + +@keras_export("keras.callbacks.LambdaCallback") +class LambdaCallback(Callback): + r"""Callback for creating simple, custom callbacks on-the-fly. - fieldnames = ['epoch'] + self.keys + This callback is constructed with anonymous functions that will be called + at the appropriate time (during `Model.{fit | evaluate | predict}`). + Note that the callbacks expects positional arguments, as: - self.writer = csv.DictWriter( - self.csv_file, - fieldnames=fieldnames, - dialect=CustomDialect) - if self.append_header: - self.writer.writeheader() + - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: + `epoch`, `logs` + - `on_batch_begin` and `on_batch_end` expect two positional arguments: + `batch`, `logs` + - `on_train_begin` and `on_train_end` expect one positional argument: + `logs` - row_dict = collections.OrderedDict({'epoch': epoch}) - row_dict.update((key, handle_value(logs[key])) for key in self.keys) - self.writer.writerow(row_dict) - self.csv_file.flush() + Args: + on_epoch_begin: called at the beginning of every epoch. + on_epoch_end: called at the end of every epoch. + on_batch_begin: called at the beginning of every batch. + on_batch_end: called at the end of every batch. + on_train_begin: called at the beginning of model training. + on_train_end: called at the end of model training. - def on_train_end(self, logs=None): - self.csv_file.close() - self.writer = None + Example: + ```python + # Print the batch number at the beginning of every batch. + batch_print_callback = LambdaCallback( + on_batch_begin=lambda batch,logs: print(batch)) + + # Stream the epoch loss to a file in JSON format. The file content + # is not well-formed JSON but rather has a JSON object per line. + import json + json_log = open('loss_log.json', mode='wt', buffering=1) + json_logging_callback = LambdaCallback( + on_epoch_end=lambda epoch, logs: json_log.write( + json.dumps({'epoch': epoch, 'loss': logs['loss']}) + '\n'), + on_train_end=lambda logs: json_log.close() + ) + + # Terminate some processes after having finished model training. + processes = ... + cleanup_callback = LambdaCallback( + on_train_end=lambda logs: [ + p.terminate() for p in processes if p.is_alive()]) + + model.fit(..., + callbacks=[batch_print_callback, + json_logging_callback, + cleanup_callback]) + ``` + """ -@keras_export('keras.callbacks.LambdaCallback') -class LambdaCallback(Callback): - r"""Callback for creating simple, custom callbacks on-the-fly. - - This callback is constructed with anonymous functions that will be called - at the appropriate time (during `Model.{fit | evaluate | predict}`). - Note that the callbacks expects positional arguments, as: - - - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: - `epoch`, `logs` - - `on_batch_begin` and `on_batch_end` expect two positional arguments: - `batch`, `logs` - - `on_train_begin` and `on_train_end` expect one positional argument: - `logs` - - Args: - on_epoch_begin: called at the beginning of every epoch. - on_epoch_end: called at the end of every epoch. - on_batch_begin: called at the beginning of every batch. - on_batch_end: called at the end of every batch. - on_train_begin: called at the beginning of model training. - on_train_end: called at the end of model training. - - Example: - - ```python - # Print the batch number at the beginning of every batch. - batch_print_callback = LambdaCallback( - on_batch_begin=lambda batch,logs: print(batch)) - - # Stream the epoch loss to a file in JSON format. The file content - # is not well-formed JSON but rather has a JSON object per line. - import json - json_log = open('loss_log.json', mode='wt', buffering=1) - json_logging_callback = LambdaCallback( - on_epoch_end=lambda epoch, logs: json_log.write( - json.dumps({'epoch': epoch, 'loss': logs['loss']}) + '\n'), - on_train_end=lambda logs: json_log.close() - ) - - # Terminate some processes after having finished model training. - processes = ... - cleanup_callback = LambdaCallback( - on_train_end=lambda logs: [ - p.terminate() for p in processes if p.is_alive()]) - - model.fit(..., - callbacks=[batch_print_callback, - json_logging_callback, - cleanup_callback]) - ``` - """ - - def __init__(self, - on_epoch_begin=None, - on_epoch_end=None, - on_batch_begin=None, - on_batch_end=None, - on_train_begin=None, - on_train_end=None, - **kwargs): - super().__init__() - self.__dict__.update(kwargs) - if on_epoch_begin is not None: - self.on_epoch_begin = on_epoch_begin - if on_epoch_end is not None: - self.on_epoch_end = on_epoch_end - if on_batch_begin is not None: - self.on_batch_begin = on_batch_begin - if on_batch_end is not None: - self.on_batch_end = on_batch_end - if on_train_begin is not None: - self.on_train_begin = on_train_begin - if on_train_end is not None: - self.on_train_end = on_train_end + def __init__( + self, + on_epoch_begin=None, + on_epoch_end=None, + on_batch_begin=None, + on_batch_end=None, + on_train_begin=None, + on_train_end=None, + **kwargs, + ): + super().__init__() + self.__dict__.update(kwargs) + if on_epoch_begin is not None: + self.on_epoch_begin = on_epoch_begin + if on_epoch_end is not None: + self.on_epoch_end = on_epoch_end + if on_batch_begin is not None: + self.on_batch_begin = on_batch_begin + if on_batch_end is not None: + self.on_batch_end = on_batch_end + if on_train_begin is not None: + self.on_train_begin = on_train_begin + if on_train_end is not None: + self.on_train_end = on_train_end diff --git a/keras/callbacks_test.py b/keras/callbacks_test.py index b3d6cff1e8ce..f0e269141666 100644 --- a/keras/callbacks_test.py +++ b/keras/callbacks_test.py @@ -27,32 +27,38 @@ import unittest from unittest import mock +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.callbacks import BackupAndRestore from keras.callbacks import BackupAndRestoreExperimental +from keras.callbacks import Callback from keras.engine import sequential from keras.layers import Activation from keras.layers import Dense -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers import sgd +from keras.optimizers.legacy import gradient_descent from keras.optimizers.schedules import learning_rate_schedule from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import io_utils from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf +from keras.utils import tf_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging try: - import h5py # pylint:disable=g-import-not-at-top + import h5py except ImportError: - h5py = None + h5py = None try: - import requests # pylint:disable=g-import-not-at-top + import requests except ImportError: - requests = None + requests = None TRAIN_SAMPLES = 10 @@ -63,3220 +69,4107 @@ BATCH_SIZE = 5 CALLBACK_HOOKS = [ - 'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end', - 'on_predict_batch_begin', 'on_predict_batch_end', 'on_predict_begin', - 'on_predict_end', 'on_test_batch_begin', 'on_test_batch_end', - 'on_test_begin', 'on_test_end', 'on_train_batch_begin', - 'on_train_batch_end', 'on_train_begin', 'on_train_end' + "on_batch_begin", + "on_batch_end", + "on_epoch_begin", + "on_epoch_end", + "on_predict_batch_begin", + "on_predict_batch_end", + "on_predict_begin", + "on_predict_end", + "on_test_batch_begin", + "on_test_batch_end", + "on_test_begin", + "on_test_end", + "on_train_batch_begin", + "on_train_batch_end", + "on_train_begin", + "on_train_end", ] class Counter(keras.callbacks.Callback): - """Counts the number of times each callback method was run. - - Attributes: - method_counts: dict. Contains the counts of time each callback method was - run. - """ + """Counts the number of times each callback method was run. - def __init__(self): - self.method_counts = collections.defaultdict(int) - for method_name in CALLBACK_HOOKS: - setattr(self, method_name, - self.wrap_with_counts(method_name, getattr(self, method_name))) + Attributes: + method_counts: dict. Contains the counts of time each callback method was + run. + """ - def wrap_with_counts(self, method_name, method): + def __init__(self): + self.method_counts = collections.defaultdict(int) + for method_name in CALLBACK_HOOKS: + setattr( + self, + method_name, + self.wrap_with_counts(method_name, getattr(self, method_name)), + ) - def _call_and_count(*args, **kwargs): - self.method_counts[method_name] += 1 - return method(*args, **kwargs) + def wrap_with_counts(self, method_name, method): + def _call_and_count(*args, **kwargs): + self.method_counts[method_name] += 1 + return method(*args, **kwargs) - return _call_and_count + return _call_and_count class CallAllHooks(keras.callbacks.Callback): - """A callback that calls self._run for all hooks""" + """A callback that calls self._run for all hooks""" - def __init__(self): - for method_name in CALLBACK_HOOKS: - setattr(self, method_name, self._run) + def __init__(self): + for method_name in CALLBACK_HOOKS: + setattr(self, method_name, self._run) - def _run(self, *args, logs=None): - raise NotImplementedError + def _run(self, *args, logs=None): + raise NotImplementedError def _get_numpy(): - return np.ones((10, 10)), np.ones((10, 1)) + return np.ones((10, 10)), np.ones((10, 1)) def _get_sequence(): + class MySequence(keras.utils.data_utils.Sequence): + def __getitem__(self, _): + return np.ones((2, 10)), np.ones((2, 1)) - class MySequence(keras.utils.data_utils.Sequence): + def __len__(self): + return 5 - def __getitem__(self, _): - return np.ones((2, 10)), np.ones((2, 1)) - - def __len__(self): - return 5 - - return MySequence(), None + return MySequence(), None @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class CallbackCountsTest(test_combinations.TestCase): + def _check_counts(self, counter, expected_counts): + """Checks that the counts registered by `counter` are those expected.""" + for method_name, expected_count in expected_counts.items(): + self.assertEqual( + counter.method_counts[method_name], + expected_count, + msg="For method {}: expected {}, got: {}".format( + method_name, + expected_count, + counter.method_counts[method_name], + ), + ) + + def _get_model(self): + layers = [ + keras.layers.Dense(10, activation="relu"), + keras.layers.Dense(1, activation="sigmoid"), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(10,)) + model.compile( + tf.compat.v1.train.AdamOptimizer(0.001), + "binary_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + return model - def _check_counts(self, counter, expected_counts): - """Checks that the counts registered by `counter` are those expected.""" - for method_name, expected_count in expected_counts.items(): - self.assertEqual( - counter.method_counts[method_name], - expected_count, - msg='For method {}: expected {}, got: {}'.format( - method_name, expected_count, counter.method_counts[method_name])) - - def _get_model(self): - layers = [ - keras.layers.Dense(10, activation='relu'), - keras.layers.Dense(1, activation='sigmoid') - ] - model = test_utils.get_model_from_layers(layers, input_shape=(10,)) - model.compile( - tf.compat.v1.train.AdamOptimizer(0.001), - 'binary_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - return model - - @parameterized.named_parameters(('with_numpy', _get_numpy()), - ('with_sequence', _get_sequence())) - def test_callback_hooks_are_called_in_fit(self, data): - if not tf.executing_eagerly(): - self.skipTest('Behavior changed in v2.') - x, y = data - val_x, val_y = np.ones((4, 10)), np.ones((4, 1)) - - model = self._get_model() - counter = Counter() - model.fit( - x, - y, - validation_data=(val_x, val_y), - batch_size=2, - steps_per_epoch=5, - epochs=5, - callbacks=[counter]) - - self._check_counts( - counter, { - 'on_batch_begin': 25, - 'on_batch_end': 25, - 'on_epoch_begin': 5, - 'on_epoch_end': 5, - 'on_predict_batch_begin': 0, - 'on_predict_batch_end': 0, - 'on_predict_begin': 0, - 'on_predict_end': 0, - 'on_test_batch_begin': 10, - 'on_test_batch_end': 10, - 'on_test_begin': 5, - 'on_test_end': 5, - 'on_train_batch_begin': 25, - 'on_train_batch_end': 25, - 'on_train_begin': 1, - 'on_train_end': 1 - }) - - @parameterized.named_parameters(('with_numpy', _get_numpy()), - ('with_sequence', _get_sequence())) - def test_callback_hooks_are_called_in_evaluate(self, data): - x, y = data - is_sequence = isinstance(x, keras.utils.data_utils.Sequence) - - model = self._get_model() - counter = Counter() - model.evaluate( - x, - y, - batch_size=2 if not is_sequence else None, - steps=5 if is_sequence else None, - callbacks=[counter]) - self._check_counts( - counter, { - 'on_test_batch_begin': 5, - 'on_test_batch_end': 5, - 'on_test_begin': 1, - 'on_test_end': 1 - }) - - @parameterized.named_parameters(('with_numpy', _get_numpy()), - ('with_sequence', _get_sequence())) - def test_callback_hooks_are_called_in_predict(self, data): - x = data[0] - is_sequence = isinstance(x, keras.utils.data_utils.Sequence) - - model = self._get_model() - counter = Counter() - model.predict( - x, - batch_size=2 if not is_sequence else None, - steps=5 if is_sequence else None, - callbacks=[counter]) - self._check_counts( - counter, { - 'on_predict_batch_begin': 5, - 'on_predict_batch_end': 5, - 'on_predict_begin': 1, - 'on_predict_end': 1 - }) - - def test_callback_list_methods(self): - counter = Counter() - callback_list = keras.callbacks.CallbackList([counter]) - - batch = 0 - callback_list.on_test_batch_begin(batch) - callback_list.on_test_batch_end(batch) - callback_list.on_predict_batch_begin(batch) - callback_list.on_predict_batch_end(batch) - - self._check_counts( - counter, { - 'on_test_batch_begin': 1, - 'on_test_batch_end': 1, - 'on_predict_batch_begin': 1, - 'on_predict_batch_end': 1 - }) + @parameterized.named_parameters( + ("with_numpy", _get_numpy()), ("with_sequence", _get_sequence()) + ) + def test_callback_hooks_are_called_in_fit(self, data): + if not tf.executing_eagerly(): + self.skipTest("Behavior changed in v2.") + x, y = data + val_x, val_y = np.ones((4, 10)), np.ones((4, 1)) + + model = self._get_model() + counter = Counter() + model.fit( + x, + y, + validation_data=(val_x, val_y), + batch_size=2, + steps_per_epoch=5, + epochs=5, + callbacks=[counter], + ) + + self._check_counts( + counter, + { + "on_batch_begin": 25, + "on_batch_end": 25, + "on_epoch_begin": 5, + "on_epoch_end": 5, + "on_predict_batch_begin": 0, + "on_predict_batch_end": 0, + "on_predict_begin": 0, + "on_predict_end": 0, + "on_test_batch_begin": 10, + "on_test_batch_end": 10, + "on_test_begin": 5, + "on_test_end": 5, + "on_train_batch_begin": 25, + "on_train_batch_end": 25, + "on_train_begin": 1, + "on_train_end": 1, + }, + ) + + @parameterized.named_parameters( + ("with_numpy", _get_numpy()), ("with_sequence", _get_sequence()) + ) + def test_callback_hooks_are_called_in_evaluate(self, data): + x, y = data + is_sequence = isinstance(x, keras.utils.data_utils.Sequence) + + model = self._get_model() + counter = Counter() + model.evaluate( + x, + y, + batch_size=2 if not is_sequence else None, + steps=5 if is_sequence else None, + callbacks=[counter], + ) + self._check_counts( + counter, + { + "on_test_batch_begin": 5, + "on_test_batch_end": 5, + "on_test_begin": 1, + "on_test_end": 1, + }, + ) + + @parameterized.named_parameters( + ("with_numpy", _get_numpy()), ("with_sequence", _get_sequence()) + ) + def test_callback_hooks_are_called_in_predict(self, data): + x = data[0] + is_sequence = isinstance(x, keras.utils.data_utils.Sequence) + + model = self._get_model() + counter = Counter() + model.predict( + x, + batch_size=2 if not is_sequence else None, + steps=5 if is_sequence else None, + callbacks=[counter], + ) + self._check_counts( + counter, + { + "on_predict_batch_begin": 5, + "on_predict_batch_end": 5, + "on_predict_begin": 1, + "on_predict_end": 1, + }, + ) + + def test_callback_list_methods(self): + counter = Counter() + callback_list = keras.callbacks.CallbackList([counter]) + + batch = 0 + callback_list.on_test_batch_begin(batch) + callback_list.on_test_batch_end(batch) + callback_list.on_predict_batch_begin(batch) + callback_list.on_predict_batch_end(batch) + + self._check_counts( + counter, + { + "on_test_batch_begin": 1, + "on_test_batch_end": 1, + "on_predict_batch_begin": 1, + "on_predict_batch_end": 1, + }, + ) class KerasCallbacksTest(test_combinations.TestCase): + def _get_model(self, input_shape=None, additional_metrics=None): + additional_metrics = additional_metrics or [] + layers = [ + keras.layers.Dense(3, activation="relu"), + keras.layers.Dense(2, activation="softmax"), + ] + model = test_utils.get_model_from_layers( + layers, input_shape=input_shape + ) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=[keras.metrics.CategoricalAccuracy(name="my_acc")] + + additional_metrics, + run_eagerly=test_utils.should_run_eagerly(), + ) + return model - def _get_model(self, input_shape=None, additional_metrics=None): - additional_metrics = additional_metrics or [] - layers = [ - keras.layers.Dense(3, activation='relu'), - keras.layers.Dense(2, activation='softmax') - ] - model = test_utils.get_model_from_layers(layers, input_shape=input_shape) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=[keras.metrics.CategoricalAccuracy(name='my_acc')] + - additional_metrics, - run_eagerly=test_utils.should_run_eagerly()) - return model - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_progbar_logging(self): - model = self._get_model(input_shape=(3,)) - - x = tf.ones((200, 3)) - y = tf.zeros((200, 2)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) - expected_log = r'(.*- loss:.*- my_acc:.*)+' - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model.fit(dataset, epochs=2, steps_per_epoch=10) - self.assertRegex(printed.contents(), expected_log) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_progbar_logging_with_stateful_metrics(self): - - class AddAllOnes(keras.metrics.Metric): - """A simple metric that adds all the one's in `y_true`.""" - - def __init__(self, name='add_all_ones', **kwargs): - super().__init__(name=name, **kwargs) - self.total = self.add_weight(name='total', initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - self.total.assign_add( - tf.cast(tf.reduce_sum(y_true), dtype=tf.float32)) - - def result(self): - return self.total - - x_train = np.array([[0, 1, 0, 1, 0, 1, 0, 1]] * 8).astype(float) - y_train = np.array([[1, 0], [0, 0], [1, 1], [1, 0], [0, 1], [1, 0], [1, 0], - [0, 0]]) - # There are 7 ones in total in `y_train` after two batches. - expected_log = r'(.*- loss:.*- my_acc:.*- add_all_ones: 7.0000)+' - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model = self._get_model( - input_shape=(8,), additional_metrics=[AddAllOnes()]) - model.fit(x_train, y_train, verbose=1, batch_size=4, shuffle=False) - self.assertRegex(printed.contents(), expected_log) - - # When not executing eagerly, `model.evaluate` does not have the metrics - # results printed. - if tf.executing_eagerly(): - with self.captureWritesToStream(sys.stdout) as printed: - model = self._get_model( - input_shape=(8,), additional_metrics=[AddAllOnes()]) - model.evaluate(x_train, y_train, verbose=1, batch_size=4) - self.assertRegex(printed.contents(), expected_log) - - @test_combinations.run_all_keras_modes - def test_trivial_backup_restore(self): - if test_utils.should_run_eagerly(): - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - cbk = BackupAndRestore(self.get_temp_dir()) - model.fit(np.ones((10, 1)), np.ones((10, 1)), epochs=0, callbacks=[cbk]) - - def test_backup_restore_train_counter(self): - if not tf.compat.v1.executing_eagerly(): - self.skipTest('BackupAndRestore only available when execution is enabled') - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - cbk = BackupAndRestore(self.get_temp_dir()) - - class InterruptingCallback(keras.callbacks.Callback): - """A callback to intentionally introduce interruption to training.""" - - def on_epoch_end(self, epoch, log=None): - logging.info(f'counter: {model._train_counter}') - if epoch == 5 or epoch == 12: - raise RuntimeError('Interruption') - - log_dir = self.get_temp_dir() - - # The following asserts that the train counter is fault tolerant. - self.assertEqual(model._train_counter.numpy(), 0) - try: - model.fit(np.ones((10, 1)), np.ones((10, 1)), epochs=20, - callbacks=[cbk, InterruptingCallback()]) - except RuntimeError: - pass - self.assertEqual(model._train_counter.numpy(), 6) - try: - model.fit(np.ones((10, 1)), np.ones((10, 1)), epochs=20, - callbacks=[cbk, InterruptingCallback()]) - except RuntimeError: - pass - self.assertEqual(model._train_counter.numpy(), 13) - - def _test_backup_and_restore_callback_with(self, cls): - if not tf.compat.v1.executing_eagerly(): - self.skipTest('BackupAndRestore only available when execution is enabled') - - class InterruptingCallback(keras.callbacks.Callback): - """A callback to intentionally introduce interruption to training.""" - - def on_epoch_end(self, epoch, log=None): - if epoch == 15: - raise RuntimeError('Interruption') - - model = keras.Sequential([keras.layers.Dense(10)]) - optimizer = gradient_descent.SGD() - model.compile(optimizer, loss='mse') - - x = tf.random.uniform((24, 10)) - y = tf.random.uniform((24,)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat().batch(2) - - backup_callback = cls(backup_dir=self.get_temp_dir()) - try: - model.fit( - dataset, - epochs=20, - steps_per_epoch=5, - callbacks=[backup_callback, InterruptingCallback()]) - except RuntimeError: - logging.warning('***Handling interruption***') - # This continues at the epoch where it left off. - model.fit( - dataset, epochs=20, steps_per_epoch=5, callbacks=[backup_callback]) - - def test_experimental_backup_and_restore(self): - """Ensure the legacy endpoint of `BackupAndRestore` gives warning.""" - - warning_messages = [] - - def warning(msg): - warning_messages.append(msg) - - with tf.compat.v1.test.mock.patch.object(logging, 'warning', warning): - self._test_backup_and_restore_callback_with(BackupAndRestoreExperimental) - - warning_msg = ('`tf.keras.callbacks.experimental.BackupAndRestore` ' - 'endpoint is deprecated') - self.assertIn(warning_msg, '\n'.join(warning_messages)) - warning_msg = ('***Handling interruption***') - self.assertIn(warning_msg, '\n'.join(warning_messages)) - - def test_backup_and_restore(self): - """Ensure the public endpoint of `BackupAndRestore` is working.""" - - warning_messages = [] - - def warning(msg): - warning_messages.append(msg) - - with tf.compat.v1.test.mock.patch.object(logging, 'warning', warning): - self._test_backup_and_restore_callback_with(BackupAndRestore) - - warning_msg = ('`tf.keras.callbacks.experimental.BackupAndRestore` ' - 'endpoint is deprecated') - self.assertNotIn(warning_msg, '\n'.join(warning_messages)) - warning_msg = ('***Handling interruption***') - self.assertIn(warning_msg, '\n'.join(warning_messages)) - - @test_combinations.run_all_keras_modes - def test_callback_warning(self): - - class SleepCallback(keras.callbacks.Callback): - - def on_train_batch_end(self, batch, logs=None): - time.sleep(0.1) - - model = sequential.Sequential() - model.add(keras.layers.Dense(1)) - model.compile( - 'sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - warning_messages = [] - - def warning(msg): - warning_messages.append(msg) - - with tf.compat.v1.test.mock.patch.object(logging, 'warning', warning): - model.fit( - np.ones((16, 1), 'float32'), - np.ones((16, 1), 'float32'), - batch_size=3, - epochs=1, - callbacks=[SleepCallback()]) - warning_msg = ('Callback method `on_train_batch_end` is slow compared ' - 'to the batch time') - self.assertIn(warning_msg, '\n'.join(warning_messages)) - - @test_combinations.run_all_keras_modes - def test_default_callbacks_no_warning(self): - # Test that without the callback no warning is raised - model = sequential.Sequential() - model.add(keras.layers.Dense(1)) - model.compile( - 'sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - warning_messages = [] - - def warning(msg): - warning_messages.append(msg) - - with tf.compat.v1.test.mock.patch.object(logging, 'warning', warning): - model.fit( - np.ones((16, 1), 'float32'), - np.ones((16, 1), 'float32'), - batch_size=3, - epochs=1) - self.assertListEqual(warning_messages, []) - - @test_combinations.run_with_all_model_types(exclude_models='functional') - @test_combinations.run_all_keras_modes - def test_progbar_logging_deferred_model_build(self): - model = self._get_model() - self.assertFalse(model.built) - - x = tf.ones((200, 3)) - y = tf.zeros((200, 2)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) - expected_log = r'(.*- loss:.*- my_acc:.*)+' - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model.fit(dataset, epochs=2, steps_per_epoch=10) - self.assertRegex(printed.contents(), expected_log) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_progbar_logging_validation_data(self): - model = self._get_model(input_shape=(3,)) - - x = tf.ones((50, 3)) - y = tf.zeros((50, 2)) - training_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) - val_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) - expected_log = r'(.*5/5.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:.*)+' - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model.fit(training_dataset, epochs=2, validation_data=val_dataset) - self.assertRegex(printed.contents(), expected_log) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_progbar_logging_validation_split(self): - model = self._get_model(input_shape=(3,)) - - x = np.ones((100, 3)) - y = np.zeros((100, 2)) - expected_log = ( - r'(?s).*1/2.*8/8.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:' - r'.*2/2.*8/8.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:.*') - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model.fit(x, y, batch_size=10, epochs=2, validation_split=0.2) - self.assertRegex(printed.contents(), expected_log) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_progbar_logging_training_validation(self): - model = self._get_model(input_shape=(2,)) - - def generator(): - for _ in range(100): - yield [1, 1], 1 - - training = tf.data.Dataset \ - .from_generator( + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_progbar_logging(self): + model = self._get_model(input_shape=(3,)) + + x = tf.ones((200, 3)) + y = tf.zeros((200, 2)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) + expected_log = r"(.*- loss:.*- my_acc:.*)+" + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(dataset, epochs=2, steps_per_epoch=10) + self.assertRegex(printed.contents(), expected_log) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_progbar_logging_with_stateful_metrics(self): + class AddAllOnes(keras.metrics.Metric): + """A simple metric that adds all the one's in `y_true`.""" + + def __init__(self, name="add_all_ones", **kwargs): + super().__init__(name=name, **kwargs) + self.total = self.add_weight(name="total", initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + self.total.assign_add( + tf.cast(tf.reduce_sum(y_true), dtype=tf.float32) + ) + + def result(self): + return self.total + + x_train = np.array([[0, 1, 0, 1, 0, 1, 0, 1]] * 8).astype(float) + y_train = np.array( + [[1, 0], [0, 0], [1, 1], [1, 0], [0, 1], [1, 0], [1, 0], [0, 0]] + ) + # There are 7 ones in total in `y_train` after two batches. + expected_log = r"(.*- loss:.*- my_acc:.*- add_all_ones: 7.0000)+" + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model = self._get_model( + input_shape=(8,), additional_metrics=[AddAllOnes()] + ) + model.fit(x_train, y_train, verbose=1, batch_size=4, shuffle=False) + self.assertRegex(printed.contents(), expected_log) + + # When not executing eagerly, `model.evaluate` does not have the metrics + # results printed. + if tf.executing_eagerly(): + with self.captureWritesToStream(sys.stdout) as printed: + model = self._get_model( + input_shape=(8,), additional_metrics=[AddAllOnes()] + ) + model.evaluate(x_train, y_train, verbose=1, batch_size=4) + self.assertRegex(printed.contents(), expected_log) + + @test_combinations.run_all_keras_modes + def test_trivial_backup_restore(self): + if test_utils.should_run_eagerly(): + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + cbk = BackupAndRestore(self.get_temp_dir()) + model.fit( + np.ones((10, 1)), np.ones((10, 1)), epochs=1, callbacks=[cbk] + ) + + def test_backup_restore_train_counter(self): + if not tf.compat.v1.executing_eagerly(): + self.skipTest( + "BackupAndRestore only available when eager execution is " + "enabled" + ) + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + cbk = BackupAndRestore(self.get_temp_dir()) + + class InterruptingCallback(keras.callbacks.Callback): + """A callback to intentionally introduce interruption to + training.""" + + def on_epoch_end(self, epoch, log=None): + logging.info(f"counter: {model._train_counter}") + if epoch == 5 or epoch == 12: + raise RuntimeError("Interruption") + + self.get_temp_dir() + + # The following asserts that the train counter is fault tolerant. + self.assertEqual(model._train_counter.numpy(), 0) + try: + model.fit( + np.ones((10, 1)), + np.ones((10, 1)), + epochs=20, + callbacks=[cbk, InterruptingCallback()], + ) + except RuntimeError: + pass + self.assertEqual(model._train_counter.numpy(), 6) + try: + model.fit( + np.ones((10, 1)), + np.ones((10, 1)), + epochs=20, + callbacks=[cbk, InterruptingCallback()], + ) + except RuntimeError: + pass + self.assertEqual(model._train_counter.numpy(), 13) + + def _test_backup_and_restore_callback_with(self, cls): + if not tf.compat.v1.executing_eagerly(): + self.skipTest( + "BackupAndRestore only available when execution is enabled" + ) + + class InterruptingCallback(keras.callbacks.Callback): + """A callback to intentionally introduce interruption to + training.""" + + def on_epoch_end(self, epoch, log=None): + if epoch == 15: + raise RuntimeError("Interruption") + + model = keras.Sequential([keras.layers.Dense(10)]) + optimizer = sgd.SGD() + model.compile(optimizer, loss="mse") + + x = tf.random.uniform((24, 10)) + y = tf.random.uniform((24,)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat().batch(2) + + backup_callback = cls(backup_dir=self.get_temp_dir()) + try: + model.fit( + dataset, + epochs=20, + steps_per_epoch=5, + callbacks=[backup_callback, InterruptingCallback()], + ) + except RuntimeError: + logging.warning("***Handling interruption***") + # This continues at the epoch where it left off. + model.fit( + dataset, + epochs=20, + steps_per_epoch=5, + callbacks=[backup_callback], + ) + + def _test_backup_and_restore_callback_at_steps( + self, cls, epoch_int, steps_int, mode + ): + if not tf.compat.v1.executing_eagerly(): + self.skipTest( + "BackupAndRestore only available when eager execution is " + "enabled" + ) + + class InterruptingCallback(keras.callbacks.Callback): + """A callback to intentionally introduce interruption to + training.""" + + batch_count = 0 + + def on_epoch_end(self, epoch, log=None): + if epoch == epoch_int: + raise RuntimeError("EpochInterruption") + + def on_batch_end(self, batch, logs=None): + self.batch_count += 1 + if self.batch_count == steps_int: + raise RuntimeError("StepsInterruption") + + class VerifyRestore(Callback): + """Verify if the training restored to the correct epoch and step.""" + + def __init__(self, initial_epoch, initial_step): + super(VerifyRestore, self).__init__() + self.initial_epoch = initial_epoch + self.initial_step = initial_step + self._current_epoch = 0 + + def on_epoch_begin(self, epoch, logs=None): + self._current_epoch = epoch + if epoch < self.initial_epoch: + raise ValueError( + "Training did not restore at epoch (%d) and step (%d)" + % (self.initial_epoch, self.initial_step) + ) + + def on_batch_begin(self, batch, logs=None): + if ( + batch <= self.initial_step + and self._current_epoch < self.initial_epoch + ): + raise ValueError( + "Training did not restore at Epoch (%d) and step (%d)" + % (self.initial_epoch, self.initial_step) + ) + + model = keras.Sequential([keras.layers.Dense(10)]) + optimizer = sgd.SGD() + model.compile(optimizer, loss="mse") + + x = tf.random.uniform((24, 10)) + y = tf.random.uniform((24,)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat().batch(2) + save_freq_arg = "epoch" if mode == "epoch" else 7 + backup_callback = cls( + backup_dir=self.get_temp_dir(), save_freq=save_freq_arg + ) + # epoch where the restore should resume from + if save_freq_arg == "epoch": + init_epoch = epoch_int + init_step = 0 + elif save_freq_arg: + init_epoch = int(((steps_int // 7) * 7) // 5) + init_step = int((((steps_int // 7) * 7) % 5) - 1) + else: + init_epoch = 0 + init_step = 0 + + # callback to verify accurate training state restore + verify_restore_callback = VerifyRestore( + initial_epoch=init_epoch, initial_step=init_step + ) + try: + model.fit( + dataset, + epochs=20, + steps_per_epoch=5, + callbacks=[backup_callback, InterruptingCallback()], + ) + except RuntimeError as e: + if str(e) == "EpochInterruption": + logging.warning("***Handling interruption at epoch***") + elif str(e) == "StepsInterruption": + logging.warning("***Handling interruption at Nth step***") + # This continues at the epoch and step where it left off. + model.fit( + dataset, + epochs=20, + steps_per_epoch=5, + callbacks=[backup_callback, verify_restore_callback], + ) + + def test_experimental_backup_and_restore(self): + """Ensure the legacy endpoint of `BackupAndRestore` gives warning.""" + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + self._test_backup_and_restore_callback_with( + BackupAndRestoreExperimental + ) + + warning_msg = ( + "`tf.keras.callbacks.experimental.BackupAndRestore` " + "endpoint is deprecated" + ) + self.assertIn(warning_msg, "\n".join(warning_messages)) + warning_msg = "***Handling interruption***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + def test_backup_and_restore(self): + """Ensure the public endpoint of `BackupAndRestore` is working.""" + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + self._test_backup_and_restore_callback_with(BackupAndRestore) + + warning_msg = ( + "`tf.keras.callbacks.experimental.BackupAndRestore` " + "endpoint is deprecated" + ) + self.assertNotIn(warning_msg, "\n".join(warning_messages)) + warning_msg = "***Handling interruption***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + def test_backup_and_restore_steps(self): + """Ensure the public endpoint of `BackupAndRestore` is working.""" + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + # interrupt at steps before 1 epoch + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=20, steps_int=3, mode="batch" + ) + warning_msg = ( + "`tf.keras.callbacks.experimental.BackupAndRestore` " + "endpoint is deprecated" + ) + self.assertNotIn(warning_msg, "\n".join(warning_messages)) + warning_msg = "***Handling interruption at Nth step***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + # interrupt at steps after 1 epoch + warning_messages = [] + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=20, steps_int=8, mode="batch" + ) + warning_msg = "***Handling interruption at Nth step***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + # interrupt at epoch before steps + warning_messages = [] + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=1, steps_int=12, mode="epoch" + ) + warning_msg = "***Handling interruption at epoch***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + def test_backup_and_restore_steps_last_batch(self): + """Ensure the public endpoint of `BackupAndRestore` is working.""" + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + # interrupt at last step in 7th epoch + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=20, steps_int=35, mode="batch" + ) + warning_msg = ( + "`tf.keras.callbacks.experimental.BackupAndRestore` " + "endpoint is deprecated" + ) + self.assertNotIn(warning_msg, "\n".join(warning_messages)) + warning_msg = "***Handling interruption at Nth step***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + def test_backup_and_restore_steps_false_save_freq(self): + """Ensure the public endpoint of `BackupAndRestore` is working.""" + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + # interrupt at steps before 1 epoch + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=20, steps_int=3, mode=False + ) + warning_msg = ( + "`tf.keras.callbacks.experimental.BackupAndRestore` " + "endpoint is deprecated" + ) + self.assertNotIn(warning_msg, "\n".join(warning_messages)) + warning_msg = "***Handling interruption at Nth step***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + # interrupt at steps after 1 epoch + warning_messages = [] + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=20, steps_int=8, mode="batch" + ) + warning_msg = "***Handling interruption at Nth step***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + # interrupt at epoch before steps + warning_messages = [] + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + self._test_backup_and_restore_callback_at_steps( + BackupAndRestore, epoch_int=1, steps_int=12, mode="epoch" + ) + warning_msg = "***Handling interruption at epoch***" + self.assertIn(warning_msg, "\n".join(warning_messages)) + + def test_backup_and_restore_steps_clean_up(self): + if not tf.executing_eagerly(): + self.skipTest( + "BackupAndRestore only available when eager execution is " + "enabled." + ) + path = self.get_temp_dir() + callback = BackupAndRestore(path, delete_checkpoint=True) + model = keras.Sequential([keras.layers.Dense(10)]) + optimizer = gradient_descent.SGD() + model.compile(optimizer, loss="mse") + + x = tf.random.uniform((24, 10)) + y = tf.random.uniform((24,)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + model.fit(dataset, epochs=1, callbacks=[callback]) + self.assertEmpty(os.listdir(path)) + + callback = BackupAndRestore(path, delete_checkpoint=False) + model.fit(dataset, epochs=1, callbacks=[callback]) + self.assertNotEmpty(os.listdir(path)) + + @test_combinations.run_all_keras_modes + def test_callback_warning(self): + class SleepCallback(keras.callbacks.Callback): + def on_train_batch_end(self, batch, logs=None): + time.sleep(0.1) + + model = sequential.Sequential() + model.add(keras.layers.Dense(1)) + model.compile( + "sgd", loss="mse", run_eagerly=test_utils.should_run_eagerly() + ) + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + model.fit( + np.ones((16, 1), "float32"), + np.ones((16, 1), "float32"), + batch_size=3, + epochs=1, + callbacks=[SleepCallback()], + ) + warning_msg = ( + "Callback method `on_train_batch_end` is slow compared " + "to the batch time" + ) + self.assertIn(warning_msg, "\n".join(warning_messages)) + + @test_combinations.run_all_keras_modes + def test_default_callbacks_no_warning(self): + # Test that without the callback no warning is raised + model = sequential.Sequential() + model.add(keras.layers.Dense(1)) + model.compile( + "sgd", loss="mse", run_eagerly=test_utils.should_run_eagerly() + ) + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + model.fit( + np.ones((16, 1), "float32"), + np.ones((16, 1), "float32"), + batch_size=3, + epochs=1, + ) + self.assertListEqual(warning_messages, []) + + @test_combinations.run_with_all_model_types(exclude_models="functional") + @test_combinations.run_all_keras_modes + def test_progbar_logging_deferred_model_build(self): + model = self._get_model() + self.assertFalse(model.built) + + x = tf.ones((200, 3)) + y = tf.zeros((200, 2)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) + expected_log = r"(.*- loss:.*- my_acc:.*)+" + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(dataset, epochs=2, steps_per_epoch=10) + self.assertRegex(printed.contents(), expected_log) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_progbar_logging_validation_data(self): + model = self._get_model(input_shape=(3,)) + + x = tf.ones((50, 3)) + y = tf.zeros((50, 2)) + training_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) + val_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(10) + expected_log = ( + r"(.*5/5.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:.*)+" + ) + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(training_dataset, epochs=2, validation_data=val_dataset) + self.assertRegex(printed.contents(), expected_log) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_progbar_logging_validation_split(self): + model = self._get_model(input_shape=(3,)) + + x = np.ones((100, 3)) + y = np.zeros((100, 2)) + expected_log = ( + r"(?s).*1/2.*8/8.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:" + r".*2/2.*8/8.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:.*" + ) + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(x, y, batch_size=10, epochs=2, validation_split=0.2) + self.assertRegex(printed.contents(), expected_log) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_progbar_logging_training_validation(self): + model = self._get_model(input_shape=(2,)) + + def generator(): + for _ in range(100): + yield [1, 1], 1 + + training = ( + tf.data.Dataset.from_generator( + generator=generator, + output_types=("float64", "float64"), + output_shapes=([2], []), + ) + .batch(2) + .repeat() + ) + validation = tf.data.Dataset.from_generator( generator=generator, - output_types=('float64', 'float64'), - output_shapes=([2], [])) \ - .batch(2) \ - .repeat() - validation = tf.data.Dataset \ - .from_generator( + output_types=("float64", "float64"), + output_shapes=([2], []), + ).batch(2) + expected_log = ( + r"(?s).*1/2.*20/20.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:" + r".*2/2.*20/20.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:.*" + ) + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit( + x=training, + validation_data=validation, + epochs=2, + steps_per_epoch=20, + ) + self.assertRegex(printed.contents(), expected_log) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_progbar_logging_with_dataset_and_partial_batch(self): + model = self._get_model(input_shape=(2,)) + + def generator(): + # Have a partial batch at the end. + for _ in range(9): + yield np.random.random(2), 1 + + training = tf.data.Dataset.from_generator( generator=generator, - output_types=('float64', 'float64'), - output_shapes=([2], [])) \ - .batch(2) - expected_log = ( - r'(?s).*1/2.*20/20.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:' - r'.*2/2.*20/20.*- loss:.*- my_acc:.*- val_loss:.*- val_my_acc:.*') - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model.fit( - x=training, validation_data=validation, epochs=2, steps_per_epoch=20) - self.assertRegex(printed.contents(), expected_log) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_progbar_logging_with_dataset_and_partial_batch(self): - model = self._get_model(input_shape=(2,)) - - def generator(): - # Have a partial batch at the end. - for _ in range(9): - yield np.random.random(2), 1 - - training = tf.data.Dataset \ - .from_generator( - generator=generator, - output_types=('float64', 'float64'), - output_shapes=([2], [])) \ - .batch(2) - validation = tf.data.Dataset \ - .from_generator( - generator=generator, - output_types=('float64', 'float64'), - output_shapes=([2], [])) \ - .batch(2) - - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - model.fit(x=training, validation_data=validation) - - # Make sure the value of val_ metrics are not zeros. - log_content = printed.contents() - val_loss = re.findall(r'val_loss: (\d\.\d+)', log_content) - self.assertLen(val_loss, 1) - self.assertGreater(float(val_loss[0]), 0.0) - - @test_combinations.run_with_all_model_types - def test_ModelCheckpoint(self): - if h5py is None: - return # Skip test if models cannot be saved. - - model_type = test_utils.get_model_type() - if model_type == 'subclass': - return # Skip test since subclassed models cannot be saved in .h5 format. - if not tf.__internal__.tf2.enabled(): - self.skipTest('Checkpoint callback only available in v2.') - - layers = [ - keras.layers.Dense(NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'), - keras.layers.Dense(NUM_CLASSES, activation='softmax') - ] - model = test_utils.get_model_from_layers(layers, input_shape=(3,)) - model.compile( - loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - filepath = os.path.join(temp_dir, 'checkpoint.h5') - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - # Case 1 - monitor = 'val_loss' - save_best_only = False - mode = 'auto' - - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert os.path.exists(filepath) - os.remove(filepath) - - # Case 2 - mode = 'min' - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert os.path.exists(filepath) - os.remove(filepath) - - # Case 3 - mode = 'max' - monitor = 'val_acc' - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert os.path.exists(filepath) - os.remove(filepath) - - # Case 4 - save_best_only = True - cbks = [ + output_types=("float64", "float64"), + output_shapes=([2], []), + ).batch(2) + validation = tf.data.Dataset.from_generator( + generator=generator, + output_types=("float64", "float64"), + output_shapes=([2], []), + ).batch(2) + + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(x=training, validation_data=validation) + + # Make sure the value of val_ metrics are not zeros. + log_content = printed.contents() + val_loss = re.findall(r"val_loss: (\d\.\d+)", log_content) + self.assertLen(val_loss, 1) + self.assertGreater(float(val_loss[0]), 0.0) + + @test_combinations.run_with_all_model_types + def test_ModelCheckpoint(self): + if h5py is None: + return # Skip test if models cannot be saved. + + model_type = test_utils.get_model_type() + if model_type == "subclass": + # Skip test since subclassed models cannot be saved in .h5 format. + return + if not tf.__internal__.tf2.enabled(): + self.skipTest("Checkpoint callback only available in v2.") + + layers = [ + keras.layers.Dense( + NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu" + ), + keras.layers.Dense(NUM_CLASSES, activation="softmax"), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(3,)) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + metrics=["acc"], + ) + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + # Save model to a subdir inside the temp_dir so we can test + # automatic directory creation. + filepath = os.path.join(temp_dir, "subdir", "checkpoint.h5") + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + # Case 1 + monitor = "val_loss" + save_best_only = False + mode = "auto" + + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + assert os.path.exists(filepath) + os.remove(filepath) + + # Case 2 + mode = "min" + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + assert os.path.exists(filepath) + os.remove(filepath) + + # Case 3 + mode = "max" + monitor = "val_acc" + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + assert os.path.exists(filepath) + os.remove(filepath) + + # Case 4 + save_best_only = True + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + assert os.path.exists(filepath) + os.remove(filepath) + + # Case 5: metric not available. + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, monitor="unknown", save_best_only=True + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + # File won't be written. + assert not os.path.exists(filepath) + + # Case 6 + save_best_only = False + period = 2 + mode = "auto" + + filepath = os.path.join(temp_dir, "checkpoint.{epoch:02d}.h5") + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + period=period, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=4, + verbose=1, + ) + assert os.path.exists(filepath.format(epoch=2)) + assert os.path.exists(filepath.format(epoch=4)) + os.remove(filepath.format(epoch=2)) + os.remove(filepath.format(epoch=4)) + assert not os.path.exists(filepath.format(epoch=1)) + assert not os.path.exists(filepath.format(epoch=3)) + + # Invalid use: this will raise a warning but not an Exception. keras.callbacks.ModelCheckpoint( filepath, monitor=monitor, save_best_only=save_best_only, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert os.path.exists(filepath) - os.remove(filepath) - - # Case 5: metric not available. - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, - monitor='unknown', - save_best_only=True) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - # File won't be written. - assert not os.path.exists(filepath) - - # Case 6 - save_best_only = False - period = 2 - mode = 'auto' - - filepath = os.path.join(temp_dir, 'checkpoint.{epoch:02d}.h5') - cbks = [ + mode="unknown", + ) + + # Case 7: `ModelCheckpoint` with a combination of `save_freq` and + # `period`. Though `period` is deprecated, we're testing it for + # backward-compatibility. + filepath = os.path.join(temp_dir, "checkpoint.epoch{epoch:02d}.h5") + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + mode=mode, + save_freq="epoch", + period=5, + ) + ] + assert not os.path.exists(filepath.format(epoch=0)) + assert not os.path.exists(filepath.format(epoch=5)) + model.fit( + x_train, + y_train, + batch_size=2, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=10, + verbose=1, + ) + assert not os.path.exists(filepath.format(epoch=1)) + assert not os.path.exists(filepath.format(epoch=2)) + assert not os.path.exists(filepath.format(epoch=3)) + assert not os.path.exists(filepath.format(epoch=4)) + assert os.path.exists(filepath.format(epoch=5)) + assert not os.path.exists(filepath.format(epoch=6)) + assert os.path.exists(filepath.format(epoch=10)) + os.remove(filepath.format(epoch=5)) + os.remove(filepath.format(epoch=10)) + + # Case 8: `ModelCheckpoint` with an integer `save_freq` + filepath = os.path.join(temp_dir, "checkpoint.epoch{epoch:02d}.h5") + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + save_freq=15, + period=100, + ) # The period should be ignored (this test tests this). + ] + assert not os.path.exists(filepath.format(epoch=3)) + model.fit( + x_train, + y_train, + batch_size=2, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=10, + verbose=1, + ) + assert not os.path.exists(filepath.format(epoch=1)) + assert not os.path.exists(filepath.format(epoch=2)) + assert os.path.exists(filepath.format(epoch=3)) + assert not os.path.exists(filepath.format(epoch=4)) + assert not os.path.exists(filepath.format(epoch=5)) + assert os.path.exists(filepath.format(epoch=6)) + assert not os.path.exists(filepath.format(epoch=7)) + assert not os.path.exists(filepath.format(epoch=8)) + assert os.path.exists(filepath.format(epoch=9)) + os.remove(filepath.format(epoch=3)) + os.remove(filepath.format(epoch=6)) + os.remove(filepath.format(epoch=9)) + + # Case 9: `ModelCheckpoint` with valid and invalid save_freq argument. + with self.assertRaisesRegex(ValueError, "Unrecognized save_freq"): + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + mode=mode, + save_freq="invalid_save_freq", + ) + # The following should not raise ValueError. keras.callbacks.ModelCheckpoint( filepath, monitor=monitor, save_best_only=save_best_only, mode=mode, - period=period) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=4, - verbose=1) - assert os.path.exists(filepath.format(epoch=2)) - assert os.path.exists(filepath.format(epoch=4)) - os.remove(filepath.format(epoch=2)) - os.remove(filepath.format(epoch=4)) - assert not os.path.exists(filepath.format(epoch=1)) - assert not os.path.exists(filepath.format(epoch=3)) - - # Invalid use: this will raise a warning but not an Exception. - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode='unknown') - - # Case 7: `ModelCheckpoint` with a combination of `save_freq` and `period`. - # Though `period` is deprecated, we're testing it for - # backward-compatibility. - filepath = os.path.join(temp_dir, 'checkpoint.epoch{epoch:02d}.h5') - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, monitor=monitor, mode=mode, save_freq='epoch', period=5) - ] - assert not os.path.exists(filepath.format(epoch=0)) - assert not os.path.exists(filepath.format(epoch=5)) - model.fit( - x_train, - y_train, - batch_size=2, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=10, - verbose=1) - assert not os.path.exists(filepath.format(epoch=1)) - assert not os.path.exists(filepath.format(epoch=2)) - assert not os.path.exists(filepath.format(epoch=3)) - assert not os.path.exists(filepath.format(epoch=4)) - assert os.path.exists(filepath.format(epoch=5)) - assert not os.path.exists(filepath.format(epoch=6)) - assert os.path.exists(filepath.format(epoch=10)) - os.remove(filepath.format(epoch=5)) - os.remove(filepath.format(epoch=10)) - - # Case 8: `ModelCheckpoint` with an integer `save_freq` - filepath = os.path.join(temp_dir, 'checkpoint.epoch{epoch:02d}.h5') - cbks = [ + save_freq="epoch", + ) keras.callbacks.ModelCheckpoint( filepath, monitor=monitor, save_best_only=save_best_only, mode=mode, - save_freq=15, - period=100) # The period should be ignored (this test tests this). - ] - assert not os.path.exists(filepath.format(epoch=3)) - model.fit( - x_train, - y_train, - batch_size=2, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=10, - verbose=1) - assert not os.path.exists(filepath.format(epoch=1)) - assert not os.path.exists(filepath.format(epoch=2)) - assert os.path.exists(filepath.format(epoch=3)) - assert not os.path.exists(filepath.format(epoch=4)) - assert not os.path.exists(filepath.format(epoch=5)) - assert os.path.exists(filepath.format(epoch=6)) - assert not os.path.exists(filepath.format(epoch=7)) - assert not os.path.exists(filepath.format(epoch=8)) - assert os.path.exists(filepath.format(epoch=9)) - os.remove(filepath.format(epoch=3)) - os.remove(filepath.format(epoch=6)) - os.remove(filepath.format(epoch=9)) - - # Case 9: `ModelCheckpoint` with valid and invalid save_freq argument. - with self.assertRaisesRegex(ValueError, 'Unrecognized save_freq'): - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode=mode, - save_freq='invalid_save_freq') - # The following should not raise ValueError. - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode=mode, - save_freq='epoch') - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - mode=mode, - save_freq=3) - - # Case 10: `ModelCheckpoint` with valid and invalid `options` argument. - with self.assertRaisesRegex(TypeError, 'tf.train.CheckpointOptions'): - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - save_weights_only=True, - mode=mode, - options=tf.saved_model.SaveOptions()) - with self.assertRaisesRegex(TypeError, 'tf.saved_model.SaveOptions'): - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - save_weights_only=False, - mode=mode, - options=tf.train.CheckpointOptions()) - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - save_weights_only=True, - mode=mode, - options=tf.train.CheckpointOptions()) - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - save_weights_only=False, - mode=mode, - options=tf.saved_model.SaveOptions()) - - # Case 11: `ModelCheckpoint` save model with batch number in filename. - filepath = os.path.join(temp_dir, - 'checkpoint.epoch{epoch:02d}batch{batch:02d}.h5') - cbks = [ - keras.callbacks.ModelCheckpoint(filepath, monitor=monitor, save_freq=1) - ] - assert not os.path.exists(filepath.format(epoch=1, batch=1)) - assert not os.path.exists(filepath.format(epoch=1, batch=2)) - assert not os.path.exists(filepath.format(epoch=2, batch=1)) - assert not os.path.exists(filepath.format(epoch=2, batch=2)) - assert not os.path.exists(filepath.format(epoch=3, batch=1)) - assert not os.path.exists(filepath.format(epoch=3, batch=2)) - assert not os.path.exists(filepath.format(epoch=4, batch=1)) - assert not os.path.exists(filepath.format(epoch=4, batch=2)) - assert not os.path.exists(filepath.format(epoch=5, batch=1)) - assert not os.path.exists(filepath.format(epoch=5, batch=2)) - model.fit( - x_train, - y_train, - batch_size=5, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=5, - verbose=1) - - assert os.path.exists(filepath.format(epoch=1, batch=1)) - assert os.path.exists(filepath.format(epoch=1, batch=2)) - assert os.path.exists(filepath.format(epoch=2, batch=1)) - assert os.path.exists(filepath.format(epoch=2, batch=2)) - assert os.path.exists(filepath.format(epoch=3, batch=1)) - assert os.path.exists(filepath.format(epoch=3, batch=2)) - assert os.path.exists(filepath.format(epoch=4, batch=1)) - assert os.path.exists(filepath.format(epoch=4, batch=2)) - assert os.path.exists(filepath.format(epoch=5, batch=1)) - assert os.path.exists(filepath.format(epoch=5, batch=2)) - - os.remove(filepath.format(epoch=1, batch=1)) - os.remove(filepath.format(epoch=1, batch=2)) - os.remove(filepath.format(epoch=2, batch=1)) - os.remove(filepath.format(epoch=2, batch=2)) - os.remove(filepath.format(epoch=3, batch=1)) - os.remove(filepath.format(epoch=3, batch=2)) - os.remove(filepath.format(epoch=4, batch=1)) - os.remove(filepath.format(epoch=4, batch=2)) - os.remove(filepath.format(epoch=5, batch=1)) - os.remove(filepath.format(epoch=5, batch=2)) - - # Case 12: ModelCheckpoint saves model with initial_value_threshold param - mode = 'max' - monitor = 'val_acc' - initial_value_threshold = 0 - save_best_only = True - filepath = os.path.join(temp_dir, 'checkpoint.h5') - cbks = [ + save_freq=3, + ) + + # Case 10: `ModelCheckpoint` with valid and invalid `options` argument. + with self.assertRaisesRegex(TypeError, "tf.train.CheckpointOptions"): + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + save_weights_only=True, + mode=mode, + options=tf.saved_model.SaveOptions(), + ) + with self.assertRaisesRegex(TypeError, "tf.saved_model.SaveOptions"): + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + save_weights_only=False, + mode=mode, + options=tf.train.CheckpointOptions(), + ) keras.callbacks.ModelCheckpoint( filepath, monitor=monitor, save_best_only=save_best_only, - initial_value_threshold=initial_value_threshold, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert os.path.exists(filepath) - os.remove(filepath) - - # Case 13: ModelCheckpoint saves model with initial_value_threshold param - mode = 'auto' - monitor = 'val_loss' - initial_value_threshold = None - save_best_only = True - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - initial_value_threshold=initial_value_threshold, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert os.path.exists(filepath) - os.remove(filepath) - - # Case 14: ModelCheckpoint doesnt save model if loss was minimum earlier - mode = 'min' - monitor = 'val_loss' - initial_value_threshold = 0 - save_best_only = True - cbks = [ - keras.callbacks.ModelCheckpoint( - filepath, - monitor=monitor, - save_best_only=save_best_only, - initial_value_threshold=initial_value_threshold, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert not os.path.exists(filepath) - - # Case 15: ModelCheckpoint doesnt save model if loss was min earlier in auto - # mode - mode = 'auto' - monitor = 'val_loss' - initial_value_threshold = 0 - save_best_only = True - cbks = [ + save_weights_only=True, + mode=mode, + options=tf.train.CheckpointOptions(), + ) keras.callbacks.ModelCheckpoint( filepath, monitor=monitor, save_best_only=save_best_only, - initial_value_threshold=initial_value_threshold, - mode=mode) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - assert not os.path.exists(filepath) - - @test_utils.run_v2_only - def test_ModelCheckpoint_subclass_save_weights_false(self): - model = test_utils.get_small_subclass_mlp(NUM_HIDDEN, NUM_CLASSES) - model.compile( - loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - filepath = os.path.join(temp_dir, 'checkpoint') - cbks = [keras.callbacks.ModelCheckpoint( - filepath, save_weights_only=False)] - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_train = np_utils.to_categorical(y_train, num_classes=NUM_CLASSES) - - model.fit( - x_train, - y_train, - callbacks=cbks, - epochs=1, - verbose=0) - # Check that the filepath is a SavedModel directory. - self.assertIn('saved_model.pb', os.listdir(filepath)) - - def _get_dummy_resource_for_model_checkpoint_testing(self): - - def get_input_datasets(): - # Simple training input. - train_input = [[1.]] * 16 - train_label = [[0.]] * 16 - ds = tf.data.Dataset.from_tensor_slices((train_input, train_label)) - return ds.batch(8, drop_remainder=True) - - # Very simple bias model to eliminate randomness. - optimizer = gradient_descent.SGD(0.1) - model = sequential.Sequential() - model.add(test_utils.Bias(input_shape=(1,))) - model.compile(loss='mae', optimizer=optimizer, metrics=['mae']) - train_ds = get_input_datasets() - - temp_dir = self.get_temp_dir() - filepath = os.path.join(temp_dir, 'checkpoint.epoch{epoch:02d}.h5') - - # The filepath shouldn't exist at the beginning. - self.assertFalse(os.path.exists(filepath)) - callback = keras.callbacks.ModelCheckpoint( - filepath=filepath, save_weights_only=True) - - return model, train_ds, callback, filepath - - def _run_load_weights_on_restart_test_common_iterations(self): - - (model, train_ds, callback, - filepath) = self._get_dummy_resource_for_model_checkpoint_testing() - initial_epochs = 3 - model.fit(train_ds, epochs=initial_epochs, callbacks=[callback]) - - # The files should exist after fitting with callback. - for epoch in range(initial_epochs): - self.assertTrue(os.path.exists(filepath.format(epoch=epoch + 1))) - self.assertFalse(os.path.exists(filepath.format(epoch=initial_epochs + 1))) - self.assertEqual( - callback._get_most_recently_modified_file_matching_pattern(filepath), - filepath.format(epoch=initial_epochs)) - - model.fit(train_ds, epochs=1) - weights_after_one_more_epoch = model.get_weights() - - # The filepath should continue to exist after fitting without callback. - for epoch in range(initial_epochs): - self.assertTrue(os.path.exists(filepath.format(epoch=epoch + 1))) - - return model, train_ds, filepath, weights_after_one_more_epoch - - @staticmethod - def get_ModelCheckpoint_load_weights_on_restart_true_test(save_weights_only): - - def func(self): - (model, train_ds, filepath, weights_after_one_more_epoch - ) = self._run_load_weights_on_restart_test_common_iterations() - - # Sleep for some short time period ensuring the files are created with - # a different time (in MacOS OSS the granularity is only 1 second). - time.sleep(2) - callback = keras.callbacks.ModelCheckpoint( - filepath=filepath, - save_weights_only=save_weights_only, - load_weights_on_restart=True) - model.fit(train_ds, epochs=1, callbacks=[callback]) - weights_after_model_restoring_and_one_more_epoch = model.get_weights() - - self.assertEqual( - callback._get_most_recently_modified_file_matching_pattern(filepath), - filepath.format(epoch=1)) - - model.fit( - train_ds, - epochs=1, - callbacks=[ - keras.callbacks.ModelCheckpoint( - filepath=filepath, - save_weights_only=save_weights_only, - load_weights_on_restart=True) - ]) - weights_with_one_final_extra_epoch = model.get_weights() - - # Asserting the weights one epoch after initial fitting and another epoch - # after that are closed, if a ModelCheckpoint with - # load_weights_on_restart=True is given (so the model is restored at the - # beginning of training). - self.assertAllClose(weights_after_one_more_epoch, - weights_after_model_restoring_and_one_more_epoch) - - self.assertNotAllClose(weights_after_one_more_epoch, - weights_with_one_final_extra_epoch) - - return func - - @staticmethod - def get_ModelCheckpoint_load_weights_on_restart_false_test(save_weights_only): - - def func(self): - (model, train_ds, filepath, weights_after_one_more_epoch - ) = self._run_load_weights_on_restart_test_common_iterations() - - model.fit( - train_ds, - epochs=1, - callbacks=[ - keras.callbacks.ModelCheckpoint( - filepath=filepath, save_weights_only=save_weights_only) - ]) - weights_after_model_restoring_and_one_more_epoch = model.get_weights() - - # Asserting the weights one epoch after initial fitting and another epoch - # after that are different, if a ModelCheckpoint with - # load_weights_on_restart=False is given (so the model is not restored at - # the beginning of training). - self.assertNotAllClose(weights_after_one_more_epoch, - weights_after_model_restoring_and_one_more_epoch) - - return func - - test_model_checkpoint_load_weights_on_restart_true_save_weights_only_true = \ - get_ModelCheckpoint_load_weights_on_restart_true_test.__func__(True) - - test_model_checkpoint_load_weights_on_restart_true_save_weights_only_false = \ - get_ModelCheckpoint_load_weights_on_restart_true_test.__func__(False) - - test_model_checkpoint_load_weights_on_restart_false_save_weights_only_true = \ - get_ModelCheckpoint_load_weights_on_restart_false_test.__func__(True) - - test_model_checkpoint_load_weights_on_restart_false_save_weights_only_false \ - = get_ModelCheckpoint_load_weights_on_restart_false_test.__func__(False) - - def test_ModelCheckpoint_override_if_file_exist(self): - (model, train_ds, filepath, - _) = self._run_load_weights_on_restart_test_common_iterations() - - # Sleep for some short time period to ensure the files are created with - # a different time (in MacOS OSS the granularity is only 1 second). - time.sleep(2) - callback = keras.callbacks.ModelCheckpoint( - filepath=filepath, save_weights_only=True) - model.load_weights( - callback._get_most_recently_modified_file_matching_pattern(filepath)) - weights_before_additional_fit = model.get_weights() - model.fit(train_ds, epochs=1, callbacks=[callback]) - model.load_weights( - callback._get_most_recently_modified_file_matching_pattern(filepath)) - weights_after_additional_fit = model.get_weights() - - self.assertNotAllClose(weights_before_additional_fit, - weights_after_additional_fit) - - def test_fit_with_ModelCheckpoint_with_tf_config(self): - (model, train_ds, callback, - _) = self._get_dummy_resource_for_model_checkpoint_testing() - - os.environ['TF_CONFIG'] = json.dumps({ - 'cluster': { - 'worker': ['localhost:23333'] - }, - 'task': { - 'type': 'worker', - 'index': 0 - } - }) - - # `model.fit()` should work regardless of the presence of `TF_CONFIG`. - model.fit(train_ds, epochs=1, callbacks=[callback]) - - def test_fit_with_ModelCheckpoint_with_dir_as_h5_filepath(self): - (model, train_ds, callback, - filepath) = self._get_dummy_resource_for_model_checkpoint_testing() - - temp_dir = self.get_temp_dir() - filepath = os.path.join(temp_dir, 'temp.h5') - - self.assertFalse(os.path.exists(filepath)) - os.mkdir(filepath) - self.assertTrue(os.path.exists(filepath)) - - callback = keras.callbacks.ModelCheckpoint(filepath=filepath) - - with self.assertRaisesRegex( - IOError, 'Please specify a non-directory ' - 'filepath for ModelCheckpoint.'): - model.fit(train_ds, epochs=1, callbacks=[callback]) - - def test_ModelCheckpoint_with_bad_path_placeholders(self): - (model, train_ds, callback, - filepath) = self._get_dummy_resource_for_model_checkpoint_testing() - - temp_dir = self.get_temp_dir() - filepath = os.path.join(temp_dir, 'chkpt_{epoch:02d}_{mape:.2f}.h5') - callback = keras.callbacks.ModelCheckpoint(filepath=filepath) - - with self.assertRaisesRegex(KeyError, 'Failed to format this callback ' - 'filepath.*'): - model.fit(train_ds, epochs=1, callbacks=[callback]) - - def test_ModelCheckpoint_nonblocking(self): - filepath = self.get_temp_dir() - # Should only cause a sync block when saving is actually performed. - callback = keras.callbacks.ModelCheckpoint(filepath=filepath, save_freq=100) - self.assertTrue(callback._supports_tf_logs) - - model = keras.Sequential([keras.layers.Dense(1)]) - cb_list = keras.callbacks.CallbackList([callback], - model=model, - epochs=1, - steps=10, - verbose=0) - - tensor = tf.convert_to_tensor(1.) - - def mock_numpy(): - raise RuntimeError( - 'If this error is seen, ModelCheckpoint is causing a blocking ' - 'NumPy conversion even when not checkpointing.') - - tensor.numpy = mock_numpy - - logs = {'metric': tensor} - - cb_list.on_train_begin(logs) - cb_list.on_epoch_begin(0, logs) - cb_list.on_train_batch_begin(0, logs) - cb_list.on_train_batch_end(0, logs) - cb_list.on_epoch_end(0, logs) - cb_list.on_train_end(logs) - - cb_list.on_test_begin(logs) - cb_list.on_test_batch_begin(0, logs) - cb_list.on_test_batch_end(0, logs) - cb_list.on_test_end(logs) - - cb_list.on_predict_begin(logs) - cb_list.on_predict_batch_begin(logs) - cb_list.on_predict_batch_end(logs) - cb_list.on_predict_end(logs) - - def test_verbose_2_logging(self): - data = np.random.random((100, 1)) - labels = np.where(data > 0.5, 1, 0) - model = keras.models.Sequential((keras.layers.Dense( - 1, input_dim=1, activation='relu'), keras.layers.Dense( - 1, activation='sigmoid'),)) - model.compile( - optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) - expected_log = r'(.*- loss:.*- acc.*:.*epoch)+' - with self.captureWritesToStream(sys.stdout) as printed: - model.fit(data, labels, verbose=2, epochs=20) - self.assertRegex(printed.contents(), expected_log) - - def test_ProgbarLogger_verbose_2_nonblocking(self): - # Should only cause a sync block on epoch end methods. - callback = keras.callbacks.ProgbarLogger(count_mode='steps') - self.assertTrue(callback._supports_tf_logs) - - model = keras.Sequential([keras.layers.Dense(1)]) - cb_list = keras.callbacks.CallbackList([callback], - model=model, - epochs=1, - steps=10, - verbose=2) - - tensor = tf.convert_to_tensor(1.) - - def mock_numpy(): - raise RuntimeError( - 'If this error is seen, ModelCheckpoint is causing a blocking ' - 'NumPy conversion even when not checkpointing.') - - tensor.numpy = mock_numpy - logs = {'metric': tensor} - - cb_list.on_train_begin(logs) - cb_list.on_epoch_begin(0, logs) - cb_list.on_train_batch_begin(0, logs) - cb_list.on_train_batch_end(0, logs) - - cb_list.on_test_begin(logs) - cb_list.on_test_batch_begin(0, logs) - cb_list.on_test_batch_end(0, logs) - cb_list.on_test_end(logs) - - with self.assertRaisesRegex(RuntimeError, 'NumPy conversion'): - # on_epoch_end should still block. - cb_list.on_epoch_end(0, logs) - cb_list.on_train_end(logs) - - def test_EarlyStopping(self): - with self.cached_session(): - np.random.seed(123) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = test_utils.get_small_sequential_mlp( - num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) - model.compile( - loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) - - cases = [ - ('max', 'val_acc'), - ('min', 'val_loss'), - ('auto', 'val_acc'), - ('auto', 'loss'), - ('unknown', 'unknown') - ] - for mode, monitor in cases: - patience = 0 + save_weights_only=False, + mode=mode, + options=tf.saved_model.SaveOptions(), + ) + + # Case 11: `ModelCheckpoint` save model with batch number in filename. + filepath = os.path.join( + temp_dir, "checkpoint.epoch{epoch:02d}batch{batch:02d}.h5" + ) cbks = [ - keras.callbacks.EarlyStopping( - patience=patience, monitor=monitor, mode=mode) + keras.callbacks.ModelCheckpoint( + filepath, monitor=monitor, save_freq=1 + ) ] + assert not os.path.exists(filepath.format(epoch=1, batch=1)) + assert not os.path.exists(filepath.format(epoch=1, batch=2)) + assert not os.path.exists(filepath.format(epoch=2, batch=1)) + assert not os.path.exists(filepath.format(epoch=2, batch=2)) + assert not os.path.exists(filepath.format(epoch=3, batch=1)) + assert not os.path.exists(filepath.format(epoch=3, batch=2)) + assert not os.path.exists(filepath.format(epoch=4, batch=1)) + assert not os.path.exists(filepath.format(epoch=4, batch=2)) + assert not os.path.exists(filepath.format(epoch=5, batch=1)) + assert not os.path.exists(filepath.format(epoch=5, batch=2)) model.fit( x_train, y_train, - batch_size=BATCH_SIZE, + batch_size=5, validation_data=(x_test, y_test), callbacks=cbks, epochs=5, - verbose=0) - - def test_EarlyStopping_reuse(self): - with self.cached_session(): - np.random.seed(1337) - patience = 3 - data = np.random.random((100, 1)) - labels = np.where(data > 0.5, 1, 0) - model = keras.models.Sequential((keras.layers.Dense( - 1, input_dim=1, activation='relu'), keras.layers.Dense( - 1, activation='sigmoid'),)) - model.compile( - optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) - weights = model.get_weights() - - # This should allow training to go for at least `patience` epochs - model.set_weights(weights) - - stopper = keras.callbacks.EarlyStopping(monitor='acc', patience=patience) - hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20) - assert len(hist.epoch) >= patience - - def test_EarlyStopping_with_baseline(self): - with self.cached_session(): - np.random.seed(1337) - baseline = 0.6 - (data, labels), _ = test_utils.get_test_data( - train_samples=100, - test_samples=50, - input_shape=(1,), - num_classes=NUM_CLASSES) - model = test_utils.get_small_sequential_mlp( - num_hidden=1, num_classes=1, input_dim=1) - model.compile( - optimizer='sgd', loss='binary_crossentropy', metrics=['acc']) - - stopper = keras.callbacks.EarlyStopping(monitor='acc', - baseline=baseline) - hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20) - assert len(hist.epoch) == 2 - - patience = 3 - stopper = keras.callbacks.EarlyStopping(monitor='acc', - patience=patience, - baseline=baseline) - hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20) - assert len(hist.epoch) >= patience - - def test_EarlyStopping_final_weights_when_restoring_model_weights(self): - - class DummyModel: - - def __init__(self): - self.stop_training = False - self.weights = -1 - - def get_weights(self): - return self.weights - - def set_weights(self, weights): - self.weights = weights - - def set_weight_to_epoch(self, epoch): - self.weights = epoch - - early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', - patience=2, - restore_best_weights=True) - early_stop.model = DummyModel() - losses = [0.2, 0.15, 0.1, 0.11, 0.12] - # The best configuration is in the epoch 2 (loss = 0.1000). - epochs_trained = 0 - early_stop.on_train_begin() - for epoch in range(len(losses)): - epochs_trained += 1 - early_stop.model.set_weight_to_epoch(epoch=epoch) - early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - if early_stop.model.stop_training: - break - # The best configuration is in epoch 2 (loss = 0.1000), - # and while patience = 2, we're restoring the best weights, - # so we end up at the epoch with the best weights, i.e. epoch 2 - self.assertEqual(early_stop.model.get_weights(), 2) - - # Check early stopping when no model beats the baseline. - early_stop = keras.callbacks.EarlyStopping( - monitor='val_loss', patience=5, baseline=0.5, restore_best_weights=True) - early_stop.model = DummyModel() - losses = [0.9, 0.8, 0.7, 0.71, 0.72, 0.73] - # The best configuration is in the epoch 2 (loss = 0.7000). - epochs_trained = 0 - early_stop.on_train_begin() - for epoch in range(len(losses)): - epochs_trained += 1 - early_stop.model.set_weight_to_epoch(epoch=epoch) - early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - if early_stop.model.stop_training: - break - # No epoch improves on the baseline, so we should train for only 5 epochs, - # and restore the second model. - self.assertEqual(epochs_trained, 5) - self.assertEqual(early_stop.model.get_weights(), 2) - - def test_RemoteMonitor(self): - if requests is None: - self.skipTest('`requests` required to run this test') - return None - - monitor = keras.callbacks.RemoteMonitor() - # This will raise a warning since the default address in unreachable: - monitor.on_epoch_end(0, logs={'loss': 0.}) - - def test_LearningRateScheduler(self): - with self.cached_session(): - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = test_utils.get_small_sequential_mlp( - num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - cbks = [ - keras.callbacks.LearningRateScheduler( - lambda x: 1. / (1. + x), verbose=1) - ] - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: + verbose=1, + ) + + assert os.path.exists(filepath.format(epoch=1, batch=1)) + assert os.path.exists(filepath.format(epoch=1, batch=2)) + assert os.path.exists(filepath.format(epoch=2, batch=1)) + assert os.path.exists(filepath.format(epoch=2, batch=2)) + assert os.path.exists(filepath.format(epoch=3, batch=1)) + assert os.path.exists(filepath.format(epoch=3, batch=2)) + assert os.path.exists(filepath.format(epoch=4, batch=1)) + assert os.path.exists(filepath.format(epoch=4, batch=2)) + assert os.path.exists(filepath.format(epoch=5, batch=1)) + assert os.path.exists(filepath.format(epoch=5, batch=2)) + + os.remove(filepath.format(epoch=1, batch=1)) + os.remove(filepath.format(epoch=1, batch=2)) + os.remove(filepath.format(epoch=2, batch=1)) + os.remove(filepath.format(epoch=2, batch=2)) + os.remove(filepath.format(epoch=3, batch=1)) + os.remove(filepath.format(epoch=3, batch=2)) + os.remove(filepath.format(epoch=4, batch=1)) + os.remove(filepath.format(epoch=4, batch=2)) + os.remove(filepath.format(epoch=5, batch=1)) + os.remove(filepath.format(epoch=5, batch=2)) + + # Case 12: ModelCheckpoint saves model with initial_value_threshold + # param + mode = "max" + monitor = "val_acc" + initial_value_threshold = 0 + save_best_only = True + filepath = os.path.join(temp_dir, "checkpoint.h5") + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + initial_value_threshold=initial_value_threshold, + mode=mode, + ) + ] model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, - epochs=5) - self.assertIn('LearningRateScheduler setting learning rate to 1.0', - printed.contents()) - assert ( - float(keras.backend.get_value( - model.optimizer.lr)) - 0.2) < keras.backend.epsilon() - - cbks = [keras.callbacks.LearningRateScheduler(lambda x, lr: lr / 2)] - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - assert ( - float(keras.backend.get_value( - model.optimizer.lr)) - 0.01 / 4) < keras.backend.epsilon() - - cbks = [ - keras.callbacks.LearningRateScheduler( - lambda epoch, _: learning_rate_schedule.CosineDecay(0.01, 2) - (epoch)) - ] - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - - cosine_decay_np = 0.5 * (1 + np.cos(np.pi * (1 / 2))) - decayed_learning_rate = 0.01 * cosine_decay_np - - assert (float(keras.backend.get_value(model.optimizer.lr)) - - decayed_learning_rate) < keras.backend.epsilon() - - def test_ReduceLROnPlateau(self): - with self.cached_session(): - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - def make_model(): - tf.compat.v1.set_random_seed(1234) - np.random.seed(1337) - model = test_utils.get_small_sequential_mlp( - num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) - model.compile( - loss='categorical_crossentropy', - optimizer=gradient_descent.SGD(lr=0.1)) - return model - - # TODO(psv): Make sure the callback works correctly when min_delta is - # set as 0. Test fails when the order of this callback and assertion is - # interchanged. - model = make_model() - cbks = [ - keras.callbacks.ReduceLROnPlateau( - monitor='val_loss', - factor=0.1, - min_delta=0, - patience=1, - cooldown=5) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - self.assertAllClose( - float(keras.backend.get_value(model.optimizer.lr)), 0.1, atol=1e-4) - - model = make_model() - # This should reduce the LR after the first epoch (due to high epsilon). - cbks = [ - keras.callbacks.ReduceLROnPlateau( - monitor='val_loss', - factor=0.1, - min_delta=10, - patience=1, - cooldown=5) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=2) - self.assertAllClose( - float(keras.backend.get_value(model.optimizer.lr)), 0.01, atol=1e-4) - - def test_ReduceLROnPlateau_patience(self): - - class DummyOptimizer: - - def __init__(self): - self.lr = keras.backend.variable(1.0) - - class DummyModel: - - def __init__(self): - self.optimizer = DummyOptimizer() - - reduce_on_plateau = keras.callbacks.ReduceLROnPlateau( - monitor='val_loss', patience=2) - reduce_on_plateau.model = DummyModel() - - losses = [0.0860, 0.1096, 0.1040] - lrs = [] - - for epoch in range(len(losses)): - reduce_on_plateau.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - lrs.append(keras.backend.get_value(reduce_on_plateau.model.optimizer.lr)) - - # The learning rates should be 1.0 except the last one - for lr in lrs[:-1]: - self.assertEqual(lr, 1.0) - self.assertLess(lrs[-1], 1.0) - - def test_ReduceLROnPlateau_backwards_compatibility(self): - with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: - reduce_on_plateau = keras.callbacks.ReduceLROnPlateau(epsilon=1e-13) - self.assertRegex( - str(mock_log.call_args), '`epsilon` argument is deprecated') - self.assertFalse(hasattr(reduce_on_plateau, 'epsilon')) - self.assertTrue(hasattr(reduce_on_plateau, 'min_delta')) - self.assertEqual(reduce_on_plateau.min_delta, 1e-13) - - def test_CSVLogger(self): - with self.cached_session(): - np.random.seed(1337) - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - filepath = os.path.join(temp_dir, 'log.tsv') - - sep = '\t' - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - def make_model(): - np.random.seed(1337) - model = test_utils.get_small_sequential_mlp( - num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) - model.compile( - loss='categorical_crossentropy', - optimizer=gradient_descent.SGD(lr=0.1), - metrics=['accuracy']) - return model - - # case 1, create new file with defined separator - model = make_model() - cbks = [keras.callbacks.CSVLogger(filepath, separator=sep)] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - - assert os.path.exists(filepath) - with open(filepath) as csvfile: - dialect = csv.Sniffer().sniff(csvfile.read()) - assert dialect.delimiter == sep - del model - del cbks - - # case 2, append data to existing file, skip header - model = make_model() - cbks = [keras.callbacks.CSVLogger(filepath, separator=sep, append=True)] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=1, - verbose=0) - - # case 3, reuse of CSVLogger object - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - - with open(filepath) as csvfile: - list_lines = csvfile.readlines() - for line in list_lines: - assert line.count(sep) == 4 - assert len(list_lines) == 5 - output = ' '.join(list_lines) - assert len(re.findall('epoch', output)) == 1 - - os.remove(filepath) - - def test_stop_training_csv(self): - # Test that using the CSVLogger callback with the TerminateOnNaN callback - # does not result in invalid CSVs. - np.random.seed(1337) - tmpdir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) - - with self.cached_session(): - fp = os.path.join(tmpdir, 'test.csv') - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - cbks = [keras.callbacks.TerminateOnNaN(), keras.callbacks.CSVLogger(fp)] - model = keras.models.Sequential() - for _ in range(5): - model.add(keras.layers.Dense(2, input_dim=INPUT_DIM, activation='relu')) - model.add(keras.layers.Dense(NUM_CLASSES, activation='linear')) - model.compile(loss='mean_squared_error', - optimizer='rmsprop') - - def data_generator(): - i = 0 - max_batch_index = len(x_train) // BATCH_SIZE - tot = 0 - while 1: - if tot > 3 * len(x_train): - yield (np.ones([BATCH_SIZE, INPUT_DIM]) * np.nan, - np.ones([BATCH_SIZE, NUM_CLASSES]) * np.nan) - else: - yield (x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE], - y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]) - i += 1 - tot += 1 - i %= max_batch_index - - history = model.fit_generator(data_generator(), - len(x_train) // BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=20) - loss = history.history['loss'] - assert len(loss) > 1 - assert loss[-1] == np.inf or np.isnan(loss[-1]) - - values = [] - with open(fp) as f: - # On Windows, due to \r\n line ends, we may end up reading empty lines - # after each line. Skip empty lines. - values = [x for x in csv.reader(f) if x] - - assert 'nan' in values[-1], 'The last epoch was not logged.' - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_TerminateOnNaN(self): - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - cbks = [keras.callbacks.TerminateOnNaN()] - model = keras.models.Sequential() - initializer = keras.initializers.Constant(value=1e5) - for _ in range(5): - model.add( - keras.layers.Dense( - 2, - input_dim=INPUT_DIM, - activation='relu', - kernel_initializer=initializer)) - model.add(keras.layers.Dense(NUM_CLASSES)) - model.compile(loss='mean_squared_error', optimizer='rmsprop') - - history = model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=20) - loss = history.history['loss'] - self.assertEqual(len(loss), 1) - self.assertTrue(np.isnan(loss[0]) or np.isinf(loss[0])) - - @unittest.skipIf( - os.name == 'nt', - 'use_multiprocessing=True does not work on windows properly.') - def test_LambdaCallback(self): - with self.cached_session(): - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = keras.models.Sequential() - model.add( - keras.layers.Dense( - NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) - model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax')) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - # Start an arbitrary process that should run during model - # training and be terminated after training has completed. - e = threading.Event() - - def target(): - e.wait() - - t = threading.Thread(target=target) - t.start() - cleanup_callback = keras.callbacks.LambdaCallback( - on_train_end=lambda logs: e.set()) - - cbks = [cleanup_callback] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=5, - verbose=0) - t.join() - assert not t.is_alive() - - def test_RemoteMonitor_np_array(self): - if requests is None: - self.skipTest('`requests` required to run this test') - with tf.compat.v1.test.mock.patch.object(requests, 'post') as requests_post: - monitor = keras.callbacks.RemoteMonitor(send_as_json=True) - a = np.arange(1) # a 1 by 1 array - logs = {'loss': 0., 'val': a} - monitor.on_epoch_end(0, logs=logs) - send = {'loss': 0., 'epoch': 0, 'val': 0} - requests_post.assert_called_once_with( - monitor.root + monitor.path, json=send, headers=monitor.headers) - - def test_RemoteMonitor_np_float32(self): - if requests is None: - self.skipTest('`requests` required to run this test') - - with tf.compat.v1.test.mock.patch.object(requests, 'post') as requests_post: - monitor = keras.callbacks.RemoteMonitor(send_as_json=True) - a = np.float32(1.0) # a float32 generic type - logs = {'loss': 0., 'val': a} - monitor.on_epoch_end(0, logs=logs) - send = {'loss': 0., 'epoch': 0, 'val': 1.0} - requests_post.assert_called_once_with( - monitor.root + monitor.path, json=send, headers=monitor.headers) - - def test_RemoteMonitorWithJsonPayload(self): - if requests is None: - self.skipTest('`requests` required to run this test') - return None - with self.cached_session(): - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = keras.utils.np_utils.to_categorical(y_test) - y_train = keras.utils.np_utils.to_categorical(y_train) - model = keras.models.Sequential() - model.add( - keras.layers.Dense( - NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) - model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax')) - model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - cbks = [keras.callbacks.RemoteMonitor(send_as_json=True)] - - with tf.compat.v1.test.mock.patch.object(requests, 'post'): + epochs=1, + verbose=0, + ) + assert os.path.exists(filepath) + os.remove(filepath) + + # Case 13: ModelCheckpoint saves model with initial_value_threshold + # param + mode = "auto" + monitor = "val_loss" + initial_value_threshold = None + save_best_only = True + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + initial_value_threshold=initial_value_threshold, + mode=mode, + ) + ] model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, - epochs=1) - - def test_progbar_infers_steps(self): - x, y = np.ones((10, 1)), np.ones((10, 1)) - data = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) - data = data.filter(lambda x, y: True) # Unknown cardinality. - - progbar = keras.callbacks.ProgbarLogger('steps') - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - self.assertIsNone(progbar.target) - model.fit(data, epochs=2, callbacks=[progbar]) - self.assertEqual(progbar.target, 5) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_callback_passed_floats(self): - - class MyCallback(keras.callbacks.Callback): - - def on_batch_end(self, batch, logs=None): - assert isinstance(batch, int) - assert isinstance(logs['loss'], float) - self.on_batch_end_called = True - - def on_epoch_end(self, batch, logs=None): - assert isinstance(batch, int) - assert isinstance(logs['loss'], float) - self.on_epoch_end_called = True - - x, y = np.ones((10, 1)), np.ones((10, 1)) - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - callback = MyCallback() - model.fit(x, y, epochs=2, callbacks=[callback]) - self.assertTrue(callback.on_batch_end_called) - self.assertTrue(callback.on_batch_end_called) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_implements_batch_hooks(self): - - class MyCallbackWithBatchHooks(keras.callbacks.Callback): - - def __init__(self): - self.train_batches = 0 - self.test_batches = 0 - self.predict_batches = 0 + epochs=1, + verbose=0, + ) + assert os.path.exists(filepath) + os.remove(filepath) + + # Case 14: ModelCheckpoint doesnt save model if loss was minimum earlier + mode = "min" + monitor = "val_loss" + initial_value_threshold = 0 + save_best_only = True + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + initial_value_threshold=initial_value_threshold, + mode=mode, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + assert not os.path.exists(filepath) + + # Case 15: ModelCheckpoint doesnt save model if loss was min earlier in + # auto mode + mode = "auto" + monitor = "val_loss" + initial_value_threshold = 0 + save_best_only = True + cbks = [ + keras.callbacks.ModelCheckpoint( + filepath, + monitor=monitor, + save_best_only=save_best_only, + initial_value_threshold=initial_value_threshold, + mode=mode, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + assert not os.path.exists(filepath) + + @test_utils.run_v2_only + def test_ModelCheckpoint_subclass_SavedModel_save_weights_false(self): + model = test_utils.get_small_subclass_mlp(NUM_HIDDEN, NUM_CLASSES) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + metrics=["acc"], + ) + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + filepath = os.path.join(temp_dir, "checkpoint") + cbks = [ + keras.callbacks.ModelCheckpoint(filepath, save_weights_only=False) + ] - def on_train_batch_end(self, batch, logs=None): - self.train_batches += 1 + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_train = np_utils.to_categorical(y_train, num_classes=NUM_CLASSES) + + model.fit(x_train, y_train, callbacks=cbks, epochs=1, verbose=0) + # Check that the filepath is a SavedModel directory. + self.assertIn("saved_model.pb", os.listdir(filepath)) + + @test_utils.run_v2_only + def test_ModelCheckpoint_subclass_KerasV3_save_weights_false(self): + model = test_utils.get_small_subclass_mlp(NUM_HIDDEN, NUM_CLASSES) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + metrics=["acc"], + ) + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + filepath = os.path.join(temp_dir, "checkpoint.keras") + cbks = [ + keras.callbacks.ModelCheckpoint(filepath, save_weights_only=False) + ] - def on_test_batch_end(self, batch, logs=None): - self.test_batches += 1 + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_train = np_utils.to_categorical(y_train, num_classes=NUM_CLASSES) + + model.fit(x_train, y_train, callbacks=cbks, epochs=1, verbose=0) + + assert os.path.exists(filepath) + + def _get_dummy_resource_for_model_checkpoint_testing(self): + def get_input_datasets(): + # Simple training input. + train_input = [[1.0]] * 16 + train_label = [[0.0]] * 16 + ds = tf.data.Dataset.from_tensor_slices((train_input, train_label)) + return ds.batch(8, drop_remainder=True) + + # Very simple bias model to eliminate randomness. + optimizer = gradient_descent.SGD(0.1) + model = sequential.Sequential() + model.add(test_utils.Bias(input_shape=(1,))) + model.compile(loss="mae", optimizer=optimizer, metrics=["mae"]) + train_ds = get_input_datasets() + + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "checkpoint.epoch{epoch:02d}.h5") + + # The filepath shouldn't exist at the beginning. + self.assertFalse(os.path.exists(filepath)) + callback = keras.callbacks.ModelCheckpoint( + filepath=filepath, save_weights_only=True + ) + + return model, train_ds, callback, filepath + + def _run_load_weights_on_restart_test_common_iterations(self): + ( + model, + train_ds, + callback, + filepath, + ) = self._get_dummy_resource_for_model_checkpoint_testing() + initial_epochs = 3 + model.fit(train_ds, epochs=initial_epochs, callbacks=[callback]) - def on_predict_batch_end(self, batch, logs=None): - self.predict_batches += 1 - - class MyCallbackWithTFBatchHooks(keras.callbacks.Callback): - - def __init__(self): - super().__init__() - self._supports_tf_logs = True + # The files should exist after fitting with callback. + for epoch in range(initial_epochs): + self.assertTrue(os.path.exists(filepath.format(epoch=epoch + 1))) + self.assertFalse( + os.path.exists(filepath.format(epoch=initial_epochs + 1)) + ) + self.assertEqual( + callback._get_most_recently_modified_file_matching_pattern( + filepath + ), + filepath.format(epoch=initial_epochs), + ) + + model.fit(train_ds, epochs=1) + weights_after_one_more_epoch = model.get_weights() + + # The filepath should continue to exist after fitting without callback. + for epoch in range(initial_epochs): + self.assertTrue(os.path.exists(filepath.format(epoch=epoch + 1))) + + return model, train_ds, filepath, weights_after_one_more_epoch + + @staticmethod + def get_ModelCheckpoint_load_weights_on_restart_true_test( + save_weights_only, + ): + def func(self): + ( + model, + train_ds, + filepath, + weights_after_one_more_epoch, + ) = self._run_load_weights_on_restart_test_common_iterations() + + # Sleep for some short time period ensuring the files are created + # with a different time (in MacOS OSS the granularity is only 1 + # second). + time.sleep(2) + callback = keras.callbacks.ModelCheckpoint( + filepath=filepath, + save_weights_only=save_weights_only, + load_weights_on_restart=True, + ) + model.fit(train_ds, epochs=1, callbacks=[callback]) + weights_after_model_restoring_and_one_more_epoch = ( + model.get_weights() + ) + + self.assertEqual( + callback._get_most_recently_modified_file_matching_pattern( + filepath + ), + filepath.format(epoch=1), + ) + + model.fit( + train_ds, + epochs=1, + callbacks=[ + keras.callbacks.ModelCheckpoint( + filepath=filepath, + save_weights_only=save_weights_only, + load_weights_on_restart=True, + ) + ], + ) + weights_with_one_final_extra_epoch = model.get_weights() + + # Asserting the weights one epoch after initial fitting and another + # epoch after that are closed, if a ModelCheckpoint with + # load_weights_on_restart=True is given (so the model is restored at + # the beginning of training). + self.assertAllClose( + weights_after_one_more_epoch, + weights_after_model_restoring_and_one_more_epoch, + ) + + self.assertNotAllClose( + weights_after_one_more_epoch, weights_with_one_final_extra_epoch + ) + + return func + + @staticmethod + def get_ModelCheckpoint_load_weights_on_restart_false_test( + save_weights_only, + ): + def func(self): + ( + model, + train_ds, + filepath, + weights_after_one_more_epoch, + ) = self._run_load_weights_on_restart_test_common_iterations() + + model.fit( + train_ds, + epochs=1, + callbacks=[ + keras.callbacks.ModelCheckpoint( + filepath=filepath, save_weights_only=save_weights_only + ) + ], + ) + weights_after_model_restoring_and_one_more_epoch = ( + model.get_weights() + ) + + # Asserting the weights one epoch after initial fitting and another + # epoch after that are different, if a ModelCheckpoint with + # load_weights_on_restart=False is given (so the model is not + # restored at the beginning of training). + self.assertNotAllClose( + weights_after_one_more_epoch, + weights_after_model_restoring_and_one_more_epoch, + ) + + return func + + test_model_checkpoint_load_weights_on_restart_true_save_weights_only_true = get_ModelCheckpoint_load_weights_on_restart_true_test.__func__( # noqa: E501 + True + ) - class MyCallbackWithoutBatchHooks(keras.callbacks.Callback): + test_model_checkpoint_load_weights_on_restart_true_save_weights_only_false = get_ModelCheckpoint_load_weights_on_restart_true_test.__func__( # noqa: E501 + False + ) - def __init__(self): - self.epochs = 0 - - def on_epoch_end(self, epoch, logs=None): - self.epochs += 1 + test_model_checkpoint_load_weights_on_restart_false_save_weights_only_true = get_ModelCheckpoint_load_weights_on_restart_false_test.__func__( # noqa: E501 + True + ) - x, y = np.ones((10, 1)), np.ones((10, 1)) - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') + test_model_checkpoint_load_weights_on_restart_false_save_weights_only_false = get_ModelCheckpoint_load_weights_on_restart_false_test.__func__( # noqa: E501 + False + ) - my_cb = MyCallbackWithBatchHooks() - cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) - self.assertTrue(cb_list._should_call_train_batch_hooks) - self.assertTrue(cb_list._should_call_test_batch_hooks) - self.assertTrue(cb_list._should_call_predict_batch_hooks) - self.assertFalse(cb_list._batch_hooks_support_tf_logs) + def test_ModelCheckpoint_override_if_file_exist(self): + ( + model, + train_ds, + filepath, + _, + ) = self._run_load_weights_on_restart_test_common_iterations() - model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) - model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) - model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) + # Sleep for some short time period to ensure the files are created with + # a different time (in MacOS OSS the granularity is only 1 second). + time.sleep(2) + callback = keras.callbacks.ModelCheckpoint( + filepath=filepath, save_weights_only=True + ) + model.load_weights( + callback._get_most_recently_modified_file_matching_pattern(filepath) + ) + weights_before_additional_fit = model.get_weights() + model.fit(train_ds, epochs=1, callbacks=[callback]) + model.load_weights( + callback._get_most_recently_modified_file_matching_pattern(filepath) + ) + weights_after_additional_fit = model.get_weights() + + self.assertNotAllClose( + weights_before_additional_fit, weights_after_additional_fit + ) + + def test_fit_with_ModelCheckpoint_with_tf_config(self): + ( + model, + train_ds, + callback, + _, + ) = self._get_dummy_resource_for_model_checkpoint_testing() + + os.environ["TF_CONFIG"] = json.dumps( + { + "cluster": {"worker": ["localhost:23333"]}, + "task": {"type": "worker", "index": 0}, + } + ) + + # `model.fit()` should work regardless of the presence of `TF_CONFIG`. + model.fit(train_ds, epochs=1, callbacks=[callback]) + + def test_fit_with_ModelCheckpoint_with_dir_as_h5_filepath(self): + ( + model, + train_ds, + callback, + filepath, + ) = self._get_dummy_resource_for_model_checkpoint_testing() - self.assertEqual(my_cb.train_batches, 2) - self.assertEqual(my_cb.test_batches, 1) - self.assertEqual(my_cb.predict_batches, 1) + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "temp.h5") - my_cb = MyCallbackWithTFBatchHooks() - cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) - self.assertTrue(cb_list._batch_hooks_support_tf_logs) + self.assertFalse(os.path.exists(filepath)) + os.mkdir(filepath) + self.assertTrue(os.path.exists(filepath)) - my_cb = MyCallbackWithoutBatchHooks() - cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) - self.assertLen(cb_list.callbacks, 1) - self.assertFalse(cb_list._should_call_train_batch_hooks) - self.assertFalse(cb_list._should_call_test_batch_hooks) - self.assertFalse(cb_list._should_call_predict_batch_hooks) + callback = keras.callbacks.ModelCheckpoint(filepath=filepath) - model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) - model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) - model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) + with self.assertRaisesRegex( + IOError, + "Please specify a non-directory filepath for ModelCheckpoint.", + ): + model.fit(train_ds, epochs=1, callbacks=[callback]) - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_logs_conversion(self): - assert_dict_equal = self.assertDictEqual + def test_ModelCheckpoint_KerasV3_save_options_error(self): + ( + model, + train_ds, + callback, + filepath, + ) = self._get_dummy_resource_for_model_checkpoint_testing() + + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "temp.keras") + + with self.assertRaisesRegex( + ValueError, "The native Keras format does not support" + ): + _ = keras.callbacks.ModelCheckpoint( + filepath=filepath, options=tf.saved_model.SaveOptions() + ) + + def test_ModelCheckpoint_with_bad_path_placeholders(self): + ( + model, + train_ds, + callback, + filepath, + ) = self._get_dummy_resource_for_model_checkpoint_testing() + + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "chkpt_{epoch:02d}_{mape:.2f}.h5") + callback = keras.callbacks.ModelCheckpoint(filepath=filepath) + + with self.assertRaisesRegex( + KeyError, "Failed to format this callback filepath.*" + ): + model.fit(train_ds, epochs=1, callbacks=[callback]) + + def test_ModelCheckpoint_nonblocking(self): + filepath = self.get_temp_dir() + # Should only cause a sync block when saving is actually performed. + callback = keras.callbacks.ModelCheckpoint( + filepath=filepath, save_freq=100 + ) + self.assertTrue(callback._supports_tf_logs) + + model = keras.Sequential([keras.layers.Dense(1)]) + cb_list = keras.callbacks.CallbackList( + [callback], model=model, epochs=1, steps=10, verbose=0 + ) + + tensor = tf.convert_to_tensor(1.0) + + def mock_numpy(): + raise RuntimeError( + "If this error is seen, ModelCheckpoint is causing a blocking " + "NumPy conversion even when not checkpointing." + ) + + tensor.numpy = mock_numpy + + logs = {"metric": tensor} + + cb_list.on_train_begin(logs) + cb_list.on_epoch_begin(0, logs) + cb_list.on_train_batch_begin(0, logs) + cb_list.on_train_batch_end(0, logs) + cb_list.on_epoch_end(0, logs) + cb_list.on_train_end(logs) + + cb_list.on_test_begin(logs) + cb_list.on_test_batch_begin(0, logs) + cb_list.on_test_batch_end(0, logs) + cb_list.on_test_end(logs) + + cb_list.on_predict_begin(logs) + cb_list.on_predict_batch_begin(logs) + cb_list.on_predict_batch_end(logs) + cb_list.on_predict_end(logs) + + def _run_fit_with_ModelCheckpoint_with_steps_per_execution( + self, + model, + savepath, + save_freq, + train_samples, + steps_per_execution, + epochs, + check_ckpt_epochs, + check_ckpt_batchs, + ): + assert len(check_ckpt_epochs) == len(check_ckpt_batchs) + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=train_samples, + test_samples=0, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_train = np_utils.to_categorical(y_train) - class MutateNumpyLogs(CallAllHooks): + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + steps_per_execution=steps_per_execution, + ) - def _run(self, *args, logs=None): - logs = logs or args[-1] - logs['numpy'] = 1 + self.assertFalse(os.path.exists(savepath)) - class MutateTensorFlowLogs(CallAllHooks): + callback = keras.callbacks.ModelCheckpoint( + filepath=os.path.join(savepath, "ckpt_{epoch}_{batch}"), + save_freq=save_freq, + ) - def __init__(self): - super().__init__() - self._supports_tf_logs = True + model.fit( + x_train, + y_train, + batch_size=1, + epochs=epochs, + verbose=0, + callbacks=[callback], + ) + + self.assertTrue(os.path.exists(savepath)) + + for i in range(len(check_ckpt_epochs)): + epoch = check_ckpt_epochs[i] + batch = check_ckpt_batchs[i] + ckpt_name = "ckpt_" + str(epoch) + "_" + str(batch) + ckpt_path = os.path.join(savepath, ckpt_name) + self.assertTrue(os.path.exists(ckpt_path)) + self.assertIn("saved_model.pb", os.listdir(ckpt_path)) + + shutil.rmtree(savepath) + + @test_combinations.run_with_all_model_types + @test_utils.run_v2_only + def test_fit_with_ModelCheckpoint_with_steps_per_execution(self): + layers = [ + keras.layers.Dense( + NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu" + ), + keras.layers.Dense(NUM_CLASSES, activation="softmax"), + ] + model = test_utils.get_model_from_layers( + layers, input_shape=(INPUT_DIM,) + ) + + temp_dir = self.get_temp_dir() + savepath = os.path.join(temp_dir, "checkpoint") + + for steps_per_execution in [None, 7]: + self._run_fit_with_ModelCheckpoint_with_steps_per_execution( + model, + savepath, + save_freq=7, + train_samples=7, + steps_per_execution=steps_per_execution, + epochs=1, + check_ckpt_epochs=[1], + check_ckpt_batchs=[7], + ) + + self._run_fit_with_ModelCheckpoint_with_steps_per_execution( + model, + savepath, + save_freq=7, + train_samples=7, + steps_per_execution=steps_per_execution, + epochs=2, + check_ckpt_epochs=[1, 2], + check_ckpt_batchs=[7, 7], + ) + + self._run_fit_with_ModelCheckpoint_with_steps_per_execution( + model, + savepath, + save_freq=14, + train_samples=7, + steps_per_execution=steps_per_execution, + epochs=2, + check_ckpt_epochs=[2], + check_ckpt_batchs=[7], + ) + + self._run_fit_with_ModelCheckpoint_with_steps_per_execution( + model, + savepath, + save_freq=7, + train_samples=14, + steps_per_execution=steps_per_execution, + epochs=2, + check_ckpt_epochs=[1, 1, 2, 2], + check_ckpt_batchs=[7, 14, 7, 14], + ) + + def test_verbose_2_logging(self): + data = np.random.random((100, 1)) + labels = np.where(data > 0.5, 1, 0) + model = keras.models.Sequential( + ( + keras.layers.Dense(1, input_dim=1, activation="relu"), + keras.layers.Dense(1, activation="sigmoid"), + ) + ) + model.compile( + optimizer="sgd", loss="binary_crossentropy", metrics=["accuracy"] + ) + expected_log = r"(.*- loss:.*- acc.*:.*epoch)+" + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(data, labels, verbose=2, epochs=20) + self.assertRegex(printed.contents(), expected_log) + + def test_ProgbarLogger_verbose_2_nonblocking(self): + # Should only cause a sync block on epoch end methods. + callback = keras.callbacks.ProgbarLogger(count_mode="steps") + self.assertTrue(callback._supports_tf_logs) + + model = keras.Sequential([keras.layers.Dense(1)]) + cb_list = keras.callbacks.CallbackList( + [callback], model=model, epochs=1, steps=10, verbose=2 + ) + + tensor = tf.convert_to_tensor(1.0) + + def mock_numpy(): + raise RuntimeError( + "If this error is seen, ModelCheckpoint is causing a blocking " + "NumPy conversion even when not checkpointing." + ) + + tensor.numpy = mock_numpy + logs = {"metric": tensor} + + cb_list.on_train_begin(logs) + cb_list.on_epoch_begin(0, logs) + cb_list.on_train_batch_begin(0, logs) + cb_list.on_train_batch_end(0, logs) + + cb_list.on_test_begin(logs) + cb_list.on_test_batch_begin(0, logs) + cb_list.on_test_batch_end(0, logs) + cb_list.on_test_end(logs) + + with self.assertRaisesRegex(RuntimeError, "NumPy conversion"): + # on_epoch_end should still block. + cb_list.on_epoch_end(0, logs) + cb_list.on_train_end(logs) + + def test_EarlyStopping(self): + with self.cached_session(): + np.random.seed(123) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, + num_classes=NUM_CLASSES, + input_dim=INPUT_DIM, + ) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + metrics=["acc"], + ) + + cases = [ + ("max", "val_acc"), + ("min", "val_loss"), + ("auto", "val_acc"), + ("auto", "loss"), + ("unknown", "unknown"), + ] + for mode, monitor in cases: + patience = 0 + cbks = [ + keras.callbacks.EarlyStopping( + patience=patience, monitor=monitor, mode=mode + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=5, + verbose=0, + ) + + def test_EarlyStopping_patience(self): + cases = [0, 1, 2, 3] + losses = [10.0, 9.0, 8.0, 9.0, 8.9, 8.8, 8.7, 8.6, 8.5] + + for patience in cases: + stopper = keras.callbacks.EarlyStopping( + monitor="loss", patience=patience + ) + stopper.model = keras.models.Sequential() + stopper.on_train_begin() + + for epoch, loss in enumerate(losses): + stopper.on_epoch_end(epoch=epoch, logs={"loss": loss}) + if stopper.model.stop_training: + break + + self.assertEqual(stopper.stopped_epoch, max(patience, 1) + 2) + + def test_EarlyStopping_reuse(self): + with self.cached_session(): + np.random.seed(1337) + patience = 3 + data = np.random.random((100, 1)) + labels = np.where(data > 0.5, 1, 0) + model = keras.models.Sequential( + ( + keras.layers.Dense(1, input_dim=1, activation="relu"), + keras.layers.Dense(1, activation="sigmoid"), + ) + ) + model.compile( + optimizer="sgd", + loss="binary_crossentropy", + metrics=["accuracy"], + ) + weights = model.get_weights() + + # This should allow training to go for at least `patience` epochs + model.set_weights(weights) + + stopper = keras.callbacks.EarlyStopping( + monitor="acc", patience=patience + ) + hist = model.fit( + data, labels, callbacks=[stopper], verbose=0, epochs=20 + ) + assert len(hist.epoch) >= patience + + def test_EarlyStopping_with_baseline(self): + with self.cached_session(): + np.random.seed(1337) + baseline = 0.6 + (data, labels), _ = test_utils.get_test_data( + train_samples=100, + test_samples=50, + input_shape=(1,), + num_classes=NUM_CLASSES, + ) + model = test_utils.get_small_sequential_mlp( + num_hidden=1, num_classes=1, input_dim=1 + ) + model.compile( + optimizer="sgd", loss="binary_crossentropy", metrics=["acc"] + ) + + stopper = keras.callbacks.EarlyStopping( + monitor="acc", baseline=baseline + ) + hist = model.fit( + data, labels, callbacks=[stopper], verbose=0, epochs=20 + ) + assert len(hist.epoch) == 2 + + patience = 3 + stopper = keras.callbacks.EarlyStopping( + monitor="acc", patience=patience, baseline=baseline + ) + hist = model.fit( + data, labels, callbacks=[stopper], verbose=0, epochs=20 + ) + assert len(hist.epoch) >= patience + + def test_EarlyStopping_final_weights_when_restoring_model_weights(self): + class DummyModel: + def __init__(self): + self.stop_training = False + self.weights = -1 + + def get_weights(self): + return self.weights + + def set_weights(self, weights): + self.weights = weights + + def set_weight_to_epoch(self, epoch): + self.weights = epoch + + early_stop = keras.callbacks.EarlyStopping( + monitor="val_loss", patience=2, restore_best_weights=True + ) + early_stop.model = DummyModel() + losses = [0.2, 0.15, 0.1, 0.11, 0.12] + # The best configuration is in the epoch 2 (loss = 0.1000). + epochs_trained = 0 + early_stop.on_train_begin() + for epoch in range(len(losses)): + epochs_trained += 1 + early_stop.model.set_weight_to_epoch(epoch=epoch) + early_stop.on_epoch_end(epoch, logs={"val_loss": losses[epoch]}) + if early_stop.model.stop_training: + break + # The best configuration is in epoch 2 (loss = 0.1000), + # and while patience = 2, we're restoring the best weights, + # so we end up at the epoch with the best weights, i.e. epoch 2 + self.assertEqual(early_stop.model.get_weights(), 2) + + # Check early stopping when no model beats the baseline. + early_stop = keras.callbacks.EarlyStopping( + monitor="val_loss", + patience=5, + baseline=0.5, + restore_best_weights=True, + ) + early_stop.model = DummyModel() + losses = [0.9, 0.8, 0.7, 0.71, 0.72, 0.73] + # The best configuration is in the epoch 2 (loss = 0.7000). + epochs_trained = 0 + early_stop.on_train_begin() + for epoch in range(len(losses)): + epochs_trained += 1 + early_stop.model.set_weight_to_epoch(epoch=epoch) + early_stop.on_epoch_end(epoch, logs={"val_loss": losses[epoch]}) + if early_stop.model.stop_training: + break + # No epoch improves on the baseline, so we should train for only 5 + # epochs, and restore the second model. + self.assertEqual(epochs_trained, 5) + self.assertEqual(early_stop.model.get_weights(), 2) + + def test_EarlyStopping_with_start_from_epoch(self): + with self.cached_session(): + np.random.seed(1337) + (data, labels), _ = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + labels = np_utils.to_categorical(labels) + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, + num_classes=NUM_CLASSES, + input_dim=INPUT_DIM, + ) + model.compile( + optimizer="sgd", loss="binary_crossentropy", metrics=["acc"] + ) + start_from_epoch = 2 + patience = 3 + stopper = keras.callbacks.EarlyStopping( + monitor="acc", + patience=patience, + start_from_epoch=start_from_epoch, + ) + history = model.fit( + data, labels, callbacks=[stopper], verbose=0, epochs=20 + ) + # Test 'patience' argument functions correctly when used + # in conjunction with 'start_from_epoch'. + self.assertGreaterEqual( + len(history.epoch), patience + start_from_epoch + ) + + start_from_epoch = 2 + patience = 0 + stopper = keras.callbacks.EarlyStopping( + monitor="acc", + patience=patience, + start_from_epoch=start_from_epoch, + ) + history = model.fit( + data, labels, callbacks=[stopper], verbose=0, epochs=20 + ) + # Test for boundary condition when 'patience' = 0. + self.assertGreaterEqual(len(history.epoch), start_from_epoch) + + def test_RemoteMonitor(self): + if requests is None: + self.skipTest("`requests` required to run this test") + return None + + monitor = keras.callbacks.RemoteMonitor() + # This will raise a warning since the default address in unreachable: + monitor.on_epoch_end(0, logs={"loss": 0.0}) + + def test_LearningRateScheduler(self): + with self.cached_session(): + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, + num_classes=NUM_CLASSES, + input_dim=INPUT_DIM, + ) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + + cbks = [ + keras.callbacks.LearningRateScheduler( + lambda x: 1.0 / (1.0 + x), verbose=1 + ) + ] + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=5, + ) + self.assertIn( + "LearningRateScheduler setting learning rate to 1.0", + printed.contents(), + ) + assert ( + float(keras.backend.get_value(model.optimizer.lr)) - 0.2 + ) < keras.backend.epsilon() + + cbks = [keras.callbacks.LearningRateScheduler(lambda x, lr: lr / 2)] + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=0, + ) + assert ( + float(keras.backend.get_value(model.optimizer.lr)) - 0.01 / 4 + ) < keras.backend.epsilon() + + cbks = [ + keras.callbacks.LearningRateScheduler( + lambda epoch, _: learning_rate_schedule.CosineDecay( + 0.01, 2 + )(epoch) + ) + ] + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=0, + ) + + cosine_decay_np = 0.5 * (1 + np.cos(np.pi * (1 / 2))) + decayed_learning_rate = 0.01 * cosine_decay_np + + assert ( + float(keras.backend.get_value(model.optimizer.lr)) + - decayed_learning_rate + ) < keras.backend.epsilon() + + def test_ReduceLROnPlateau(self): + with self.cached_session(): + tf_utils.set_random_seed(1337) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + def make_model(): + tf_utils.set_random_seed(1337) + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, + num_classes=NUM_CLASSES, + input_dim=INPUT_DIM, + ) + model.compile( + loss="categorical_crossentropy", + optimizer=gradient_descent.SGD(lr=0.1), + ) + return model + + # TODO(psv): Make sure the callback works correctly when min_delta + # is set as 0. Test fails when the order of this callback and + # assertion is interchanged. + model = make_model() + cbks = [ + keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", + factor=0.1, + min_delta=0, + patience=1, + cooldown=5, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=0, + ) + self.assertAllClose( + float(keras.backend.get_value(model.optimizer.lr)), + 0.1, + atol=1e-4, + ) + + model = make_model() + # This should reduce the LR after the first epoch (due to high + # epsilon). + cbks = [ + keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", + factor=0.1, + min_delta=10, + patience=1, + cooldown=5, + ) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=2, + ) + self.assertAllClose( + float(keras.backend.get_value(model.optimizer.lr)), + 0.01, + atol=1e-4, + ) + + def test_ReduceLROnPlateau_patience(self): + class DummyOptimizer: + def __init__(self): + self.lr = keras.backend.variable(1.0) + + class DummyModel: + def __init__(self): + self.optimizer = DummyOptimizer() + + reduce_on_plateau = keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", patience=2 + ) + reduce_on_plateau.model = DummyModel() + + losses = [0.0860, 0.1096, 0.1040] + lrs = [] + + for epoch in range(len(losses)): + reduce_on_plateau.on_epoch_end( + epoch, logs={"val_loss": losses[epoch]} + ) + lrs.append( + keras.backend.get_value(reduce_on_plateau.model.optimizer.lr) + ) + + # The learning rates should be 1.0 except the last one + for lr in lrs[:-1]: + self.assertEqual(lr, 1.0) + self.assertLess(lrs[-1], 1.0) + + def test_ReduceLROnPlateau_backwards_compatibility(self): + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + reduce_on_plateau = keras.callbacks.ReduceLROnPlateau(epsilon=1e-13) + self.assertRegex( + str(mock_log.call_args), "`epsilon` argument is deprecated" + ) + self.assertFalse(hasattr(reduce_on_plateau, "epsilon")) + self.assertTrue(hasattr(reduce_on_plateau, "min_delta")) + self.assertEqual(reduce_on_plateau.min_delta, 1e-13) + + def test_CSVLogger(self): + with self.cached_session(): + np.random.seed(1337) + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + filepath = os.path.join(temp_dir, "log.tsv") + + sep = "\t" + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + def make_model(): + np.random.seed(1337) + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, + num_classes=NUM_CLASSES, + input_dim=INPUT_DIM, + ) + model.compile( + loss="categorical_crossentropy", + optimizer=gradient_descent.SGD(lr=0.1), + metrics=["accuracy"], + ) + return model + + # case 1, create new file with defined separator + model = make_model() + cbks = [keras.callbacks.CSVLogger(filepath, separator=sep)] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + + assert os.path.exists(filepath) + with open(filepath) as csvfile: + dialect = csv.Sniffer().sniff(csvfile.read()) + assert dialect.delimiter == sep + del model + del cbks + + # case 2, append data to existing file, skip header + model = make_model() + cbks = [ + keras.callbacks.CSVLogger(filepath, separator=sep, append=True) + ] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + verbose=0, + ) + + # case 3, reuse of CSVLogger object + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=0, + ) + + with open(filepath) as csvfile: + list_lines = csvfile.readlines() + for line in list_lines: + assert line.count(sep) == 4 + assert len(list_lines) == 5 + output = " ".join(list_lines) + assert len(re.findall("epoch", output)) == 1 + + os.remove(filepath) + + # case 3, Verify Val. loss also registered when Validation Freq > 1 + model = make_model() + cbks = [keras.callbacks.CSVLogger(filepath, separator=sep)] + hist = model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + validation_freq=3, + callbacks=cbks, + epochs=5, + verbose=0, + ) + assert os.path.exists(filepath) + # Verify that validation loss is registered at val. freq + with open(filepath) as csvfile: + rows = csv.DictReader(csvfile, delimiter=sep) + for idx, row in enumerate(rows, 1): + self.assertIn("val_loss", row) + if idx == 3: + self.assertEqual( + row["val_loss"], str(hist.history["val_loss"][0]) + ) + else: + self.assertEqual(row["val_loss"], "NA") + + def test_stop_training_csv(self): + # Test that using the CSVLogger callback with the TerminateOnNaN + # callback does not result in invalid CSVs. + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) + + with self.cached_session(): + fp = os.path.join(tmpdir, "test.csv") + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + cbks = [ + keras.callbacks.TerminateOnNaN(), + keras.callbacks.CSVLogger(fp), + ] + model = keras.models.Sequential() + for _ in range(5): + model.add( + keras.layers.Dense( + 2, input_dim=INPUT_DIM, activation="relu" + ) + ) + model.add(keras.layers.Dense(NUM_CLASSES, activation="linear")) + model.compile(loss="mean_squared_error", optimizer="rmsprop") + + def data_generator(): + i = 0 + max_batch_index = len(x_train) // BATCH_SIZE + tot = 0 + while 1: + if tot > 3 * len(x_train): + yield ( + np.ones([BATCH_SIZE, INPUT_DIM]) * np.nan, + np.ones([BATCH_SIZE, NUM_CLASSES]) * np.nan, + ) + else: + yield ( + x_train[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], + y_train[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], + ) + i += 1 + tot += 1 + i %= max_batch_index + + history = model.fit_generator( + data_generator(), + len(x_train) // BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=20, + ) + loss = history.history["loss"] + assert len(loss) > 1 + assert loss[-1] == np.inf or np.isnan(loss[-1]) + + values = [] + with open(fp) as f: + # On Windows, due to \r\n line ends, we may end up reading empty + # lines after each line. Skip empty lines. + values = [x for x in csv.reader(f) if x] + + assert "nan" in values[-1], "The last epoch was not logged." + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_TerminateOnNaN(self): + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + cbks = [keras.callbacks.TerminateOnNaN()] + model = keras.models.Sequential() + initializer = keras.initializers.Constant(value=1e5) + for _ in range(5): + model.add( + keras.layers.Dense( + 2, + input_dim=INPUT_DIM, + activation="relu", + kernel_initializer=initializer, + ) + ) + model.add(keras.layers.Dense(NUM_CLASSES)) + model.compile(loss="mean_squared_error", optimizer="rmsprop") + + history = model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=20, + ) + loss = history.history["loss"] + self.assertEqual(len(loss), 1) + self.assertTrue(np.isnan(loss[0]) or np.isinf(loss[0])) + + @unittest.skipIf( + os.name == "nt", + "use_multiprocessing=True does not work on windows properly.", + ) + def test_LambdaCallback(self): + with self.cached_session(): + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu" + ) + ) + model.add(keras.layers.Dense(NUM_CLASSES, activation="softmax")) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + + # Start an arbitrary process that should run during model + # training and be terminated after training has completed. + e = threading.Event() + + def target(): + e.wait() + + t = threading.Thread(target=target) + t.start() + cleanup_callback = keras.callbacks.LambdaCallback( + on_train_end=lambda logs: e.set() + ) + + cbks = [cleanup_callback] + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=5, + verbose=0, + ) + t.join() + assert not t.is_alive() + + def test_RemoteMonitor_np_array(self): + if requests is None: + self.skipTest("`requests` required to run this test") + with tf.compat.v1.test.mock.patch.object( + requests, "post" + ) as requests_post: + monitor = keras.callbacks.RemoteMonitor(send_as_json=True) + a = np.arange(1) # a 1 by 1 array + logs = {"loss": 0.0, "val": a} + monitor.on_epoch_end(0, logs=logs) + send = {"loss": 0.0, "epoch": 0, "val": 0} + requests_post.assert_called_once_with( + monitor.root + monitor.path, json=send, headers=monitor.headers + ) + + def test_RemoteMonitor_np_float32(self): + if requests is None: + self.skipTest("`requests` required to run this test") + + with tf.compat.v1.test.mock.patch.object( + requests, "post" + ) as requests_post: + monitor = keras.callbacks.RemoteMonitor(send_as_json=True) + a = np.float32(1.0) # a float32 generic type + logs = {"loss": 0.0, "val": a} + monitor.on_epoch_end(0, logs=logs) + send = {"loss": 0.0, "epoch": 0, "val": 1.0} + requests_post.assert_called_once_with( + monitor.root + monitor.path, json=send, headers=monitor.headers + ) + + def test_RemoteMonitorWithJsonPayload(self): + if requests is None: + self.skipTest("`requests` required to run this test") + return None + with self.cached_session(): + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = keras.utils.np_utils.to_categorical(y_test) + y_train = keras.utils.np_utils.to_categorical(y_train) + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu" + ) + ) + model.add(keras.layers.Dense(NUM_CLASSES, activation="softmax")) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + metrics=["accuracy"], + ) + cbks = [keras.callbacks.RemoteMonitor(send_as_json=True)] + + with tf.compat.v1.test.mock.patch.object(requests, "post"): + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=1, + ) + + def test_progbar_infers_steps(self): + x, y = np.ones((10, 1)), np.ones((10, 1)) + data = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + data = data.filter(lambda x, y: True) # Unknown cardinality. + + progbar = keras.callbacks.ProgbarLogger("steps") + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + self.assertIsNone(progbar.target) + model.fit(data, epochs=2, callbacks=[progbar]) + self.assertEqual(progbar.target, 5) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_callback_passed_floats(self): + class MyCallback(keras.callbacks.Callback): + def on_batch_end(self, batch, logs=None): + assert isinstance(batch, int) + assert isinstance(logs["loss"], float) + self.on_batch_end_called = True + + def on_epoch_end(self, batch, logs=None): + assert isinstance(batch, int) + assert isinstance(logs["loss"], float) + self.on_epoch_end_called = True + + x, y = np.ones((10, 1)), np.ones((10, 1)) + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + callback = MyCallback() + model.fit(x, y, epochs=2, callbacks=[callback]) + self.assertTrue(callback.on_batch_end_called) + self.assertTrue(callback.on_batch_end_called) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_implements_batch_hooks(self): + class MyCallbackWithBatchHooks(keras.callbacks.Callback): + def __init__(self): + self.train_batches = 0 + self.test_batches = 0 + self.predict_batches = 0 + + def on_train_batch_end(self, batch, logs=None): + self.train_batches += 1 + + def on_test_batch_end(self, batch, logs=None): + self.test_batches += 1 + + def on_predict_batch_end(self, batch, logs=None): + self.predict_batches += 1 + + class MyCallbackWithTFBatchHooks(keras.callbacks.Callback): + def __init__(self): + super().__init__() + self._supports_tf_logs = True + + class MyCallbackWithoutBatchHooks(keras.callbacks.Callback): + def __init__(self): + self.epochs = 0 + + def on_epoch_end(self, epoch, logs=None): + self.epochs += 1 + + x, y = np.ones((10, 1)), np.ones((10, 1)) + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + + my_cb = MyCallbackWithBatchHooks() + cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) + self.assertTrue(cb_list._should_call_train_batch_hooks) + self.assertTrue(cb_list._should_call_test_batch_hooks) + self.assertTrue(cb_list._should_call_predict_batch_hooks) + self.assertFalse(cb_list._batch_hooks_support_tf_logs) + + model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) + model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) + model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) + + self.assertEqual(my_cb.train_batches, 2) + self.assertEqual(my_cb.test_batches, 1) + self.assertEqual(my_cb.predict_batches, 1) + + my_cb = MyCallbackWithTFBatchHooks() + cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) + self.assertTrue(cb_list._batch_hooks_support_tf_logs) + + my_cb = MyCallbackWithoutBatchHooks() + cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) + self.assertLen(cb_list.callbacks, 1) + self.assertFalse(cb_list._should_call_train_batch_hooks) + self.assertFalse(cb_list._should_call_test_batch_hooks) + self.assertFalse(cb_list._should_call_predict_batch_hooks) + + model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) + model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) + model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_logs_conversion(self): + assert_dict_equal = self.assertDictEqual + + class MutateNumpyLogs(CallAllHooks): + def _run(self, *args, logs=None): + logs = logs or args[-1] + logs["numpy"] = 1 + + class MutateTensorFlowLogs(CallAllHooks): + def __init__(self): + super().__init__() + self._supports_tf_logs = True + + def _run(self, *args, logs=None): + logs = logs or args[-1] + logs["tf"] = 2 + + class AssertNumpyLogs(CallAllHooks): + def _run(self, *args, logs=None): + logs = logs or args[-1] + assert_dict_equal(logs, {"all": 0, "numpy": 1, "tf": 2}) + + class AssertTensorFlowLogs(AssertNumpyLogs): + def __init__(self): + super().__init__() + self._supports_tf_logs = True + + cb_list = keras.callbacks.CallbackList( + [ + MutateNumpyLogs(), + MutateTensorFlowLogs(), + AssertNumpyLogs(), + AssertTensorFlowLogs(), + ] + ) + + assert len(cb_list.callbacks) == 4 + cb_list.on_epoch_begin(0, logs={"all": 0}) + cb_list.on_epoch_end(0, logs={"all": 0}) + cb_list.on_predict_batch_begin(0, logs={"all": 0}) + cb_list.on_predict_batch_end(0, logs={"all": 0}) + cb_list.on_predict_begin(logs={"all": 0}) + cb_list.on_predict_end(logs={"all": 0}) + cb_list.on_test_batch_begin(0, logs={"all": 0}) + cb_list.on_test_batch_end(0, logs={"all": 0}) + cb_list.on_test_begin(logs={"all": 0}) + cb_list.on_test_end(logs={"all": 0}) + cb_list.on_train_batch_begin(0, logs={"all": 0}) + cb_list.on_train_batch_end(0, logs={"all": 0}) + cb_list.on_train_begin(logs={"all": 0}) + cb_list.on_train_end(logs={"all": 0}) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_implements_batch_hooks_override(self): + class MyCallback(keras.callbacks.Callback): + def __init__(self, should_run=True): + self.should_run = should_run + self.train_batches = 0 + self.test_batches = 0 + self.predict_batches = 0 + + def on_train_batch_end(self, batch, logs=None): + self.train_batches += 1 + + def on_test_batch_end(self, batch, logs=None): + self.test_batches += 1 + + def on_predict_batch_end(self, batch, logs=None): + self.predict_batches += 1 + + def _implements_train_batch_hooks(self): + return self.should_run + + def _implements_test_batch_hooks(self): + return self.should_run + + def _implements_predict_batch_hooks(self): + return self.should_run + + x, y = np.ones((10, 1)), np.ones((10, 1)) + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + + my_cb = MyCallback(should_run=True) + cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) + self.assertTrue(cb_list._should_call_train_batch_hooks) + self.assertTrue(cb_list._should_call_test_batch_hooks) + self.assertTrue(cb_list._should_call_predict_batch_hooks) + + model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) + model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) + model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) + + self.assertEqual(my_cb.train_batches, 2) + self.assertEqual(my_cb.test_batches, 1) + self.assertEqual(my_cb.predict_batches, 1) + + my_cb = MyCallback(should_run=False) + cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) + self.assertFalse(cb_list._should_call_train_batch_hooks) + self.assertFalse(cb_list._should_call_test_batch_hooks) + self.assertFalse(cb_list._should_call_predict_batch_hooks) + + model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) + model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) + model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) + + self.assertEqual(my_cb.train_batches, 0) + self.assertEqual(my_cb.test_batches, 0) + self.assertEqual(my_cb.predict_batches, 0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_default_callbacks_do_not_call_batch_hooks(self): + model = keras.Sequential([keras.layers.Dense(1)]) + log_dir = self.get_temp_dir() + cb_list = keras.callbacks.CallbackList( + [ + keras.callbacks.TensorBoard(log_dir, profile_batch=0), + keras.callbacks.ModelCheckpoint(log_dir), + ], + add_progbar=True, + model=model, + verbose=2, + epochs=3, + ) + self.assertLen(cb_list.callbacks, 3) + self.assertFalse(cb_list._should_call_train_batch_hooks) + self.assertFalse(cb_list._should_call_test_batch_hooks) + self.assertFalse(cb_list._should_call_predict_batch_hooks) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_change_tf_functions_during_fit(self): + class ChangeFunctions(keras.callbacks.Callback): + def on_epoch_end(self, epochs, logs=None): + def new_fn(iterator): + raise ValueError("New function substituted successfully.") + + self.model.train_function = new_fn + self.model.test_function = new_fn + self.model.predict_function = new_fn + + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + + x, y = np.ones((10, 10)), np.ones((10, 1)) + with self.assertRaisesRegex(ValueError, "New function "): + model.fit( + x, y, batch_size=2, epochs=2, callbacks=[ChangeFunctions()] + ) + with self.assertRaisesRegex(ValueError, "New function "): + model.evaluate(x, y, batch_size=2) + with self.assertRaisesRegex(ValueError, "New function "): + model.predict(x, batch_size=2) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_stop_training_batch_level(self): + class MyCallback(keras.callbacks.Callback): + def __init__(self): + super().__init__() + self.batch_counter = 0 + + def on_train_batch_end(self, batch, logs=None): + self.batch_counter += 1 + if batch == 2: + self.model.stop_training = True + + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + x, y = np.ones((10, 10)), np.ones((10, 1)) + my_cb = MyCallback() + # Will run 5 batches if `stop_training` doesn't work. + model.fit(x, y, batch_size=2, callbacks=[my_cb]) + self.assertEqual(my_cb.batch_counter, 3) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_built_in_callback_order(self): + class CustomCallback(keras.callbacks.Callback): + pass + + class TestingCallbackList(keras.callbacks.CallbackList): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if ( + (not isinstance(self.callbacks[0], CustomCallback)) + or ( + not isinstance( + self.callbacks[1], keras.callbacks.History + ) + ) + or ( + not isinstance( + self.callbacks[2], keras.callbacks.ProgbarLogger + ) + ) + ): + raise AssertionError( + f"Callback order unexpected: {self.callbacks}" + ) + + with mock.patch.object( + keras.callbacks, "CallbackList", TestingCallbackList + ): + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + custom_callback = CustomCallback() + model.fit( + np.ones((10, 10)), + np.ones((10, 1)), + epochs=5, + callbacks=[custom_callback], + ) - def _run(self, *args, logs=None): - logs = logs or args[-1] - logs['tf'] = 2 - - class AssertNumpyLogs(CallAllHooks): - - def _run(self, *args, logs=None): - logs = logs or args[-1] - assert_dict_equal(logs, {'all': 0, 'numpy': 1, 'tf': 2}) - - class AssertTensorFlowLogs(AssertNumpyLogs): - - def __init__(self): - super().__init__() - self._supports_tf_logs = True - - cb_list = keras.callbacks.CallbackList([ - MutateNumpyLogs(), - MutateTensorFlowLogs(), - AssertNumpyLogs(), - AssertTensorFlowLogs() - ]) - - assert len(cb_list.callbacks) == 4 - cb_list.on_epoch_begin(0, logs={'all': 0}) - cb_list.on_epoch_end(0, logs={'all': 0}) - cb_list.on_predict_batch_begin(0, logs={'all': 0}) - cb_list.on_predict_batch_end(0, logs={'all': 0}) - cb_list.on_predict_begin(logs={'all': 0}) - cb_list.on_predict_end(logs={'all': 0}) - cb_list.on_test_batch_begin(0, logs={'all': 0}) - cb_list.on_test_batch_end(0, logs={'all': 0}) - cb_list.on_test_begin(logs={'all': 0}) - cb_list.on_test_end(logs={'all': 0}) - cb_list.on_train_batch_begin(0, logs={'all': 0}) - cb_list.on_train_batch_end(0, logs={'all': 0}) - cb_list.on_train_begin(logs={'all': 0}) - cb_list.on_train_end(logs={'all': 0}) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_implements_batch_hooks_override(self): - - class MyCallback(keras.callbacks.Callback): - - def __init__(self, should_run=True): - self.should_run = should_run - self.train_batches = 0 - self.test_batches = 0 - self.predict_batches = 0 - - def on_train_batch_end(self, batch, logs=None): - self.train_batches += 1 - - def on_test_batch_end(self, batch, logs=None): - self.test_batches += 1 - - def on_predict_batch_end(self, batch, logs=None): - self.predict_batches += 1 - - def _implements_train_batch_hooks(self): - return self.should_run - - def _implements_test_batch_hooks(self): - return self.should_run - - def _implements_predict_batch_hooks(self): - return self.should_run - - x, y = np.ones((10, 1)), np.ones((10, 1)) - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - - my_cb = MyCallback(should_run=True) - cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) - self.assertTrue(cb_list._should_call_train_batch_hooks) - self.assertTrue(cb_list._should_call_test_batch_hooks) - self.assertTrue(cb_list._should_call_predict_batch_hooks) - - model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) - model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) - model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) - - self.assertEqual(my_cb.train_batches, 2) - self.assertEqual(my_cb.test_batches, 1) - self.assertEqual(my_cb.predict_batches, 1) - - my_cb = MyCallback(should_run=False) - cb_list = keras.callbacks.CallbackList([my_cb], verbose=0) - self.assertFalse(cb_list._should_call_train_batch_hooks) - self.assertFalse(cb_list._should_call_test_batch_hooks) - self.assertFalse(cb_list._should_call_predict_batch_hooks) - - model.fit(x, y, epochs=2, batch_size=10, callbacks=[my_cb], verbose=0) - model.evaluate(x, y, batch_size=10, callbacks=[my_cb], verbose=0) - model.predict(x, batch_size=10, callbacks=[my_cb], verbose=0) - - self.assertEqual(my_cb.train_batches, 0) - self.assertEqual(my_cb.test_batches, 0) - self.assertEqual(my_cb.predict_batches, 0) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_default_callbacks_do_not_call_batch_hooks(self): - model = keras.Sequential([keras.layers.Dense(1)]) - log_dir = self.get_temp_dir() - cb_list = keras.callbacks.CallbackList([ - keras.callbacks.TensorBoard(log_dir, profile_batch=0), - keras.callbacks.ModelCheckpoint(log_dir), - ], - add_progbar=True, - model=model, - verbose=2, - epochs=3) - self.assertLen(cb_list.callbacks, 3) - self.assertFalse(cb_list._should_call_train_batch_hooks) - self.assertFalse(cb_list._should_call_test_batch_hooks) - self.assertFalse(cb_list._should_call_predict_batch_hooks) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_change_tf_functions_during_fit(self): - - class ChangeFunctions(keras.callbacks.Callback): - - def on_epoch_end(self, epochs, logs=None): - - def new_fn(iterator): - raise ValueError('New function substituted successfully.') - - self.model.train_function = new_fn - self.model.test_function = new_fn - self.model.predict_function = new_fn - - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - - x, y = np.ones((10, 10)), np.ones((10, 1)) - with self.assertRaisesRegexp(ValueError, 'New function '): - model.fit(x, y, batch_size=2, epochs=2, callbacks=[ChangeFunctions()]) - with self.assertRaisesRegexp(ValueError, 'New function '): - model.evaluate(x, y, batch_size=2) - with self.assertRaisesRegexp(ValueError, 'New function '): - model.predict(x, batch_size=2) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_stop_training_batch_level(self): - - class MyCallback(keras.callbacks.Callback): - - def __init__(self): - super().__init__() - self.batch_counter = 0 - - def on_train_batch_end(self, batch, logs=None): - self.batch_counter += 1 - if batch == 2: - self.model.stop_training = True - - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - x, y = np.ones((10, 10)), np.ones((10, 1)) - my_cb = MyCallback() - # Will run 5 batches if `stop_training` doesn't work. - model.fit(x, y, batch_size=2, callbacks=[my_cb]) - self.assertEqual(my_cb.batch_counter, 3) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_built_in_callback_order(self): - - class CustomCallback(keras.callbacks.Callback): - pass - - class TestingCallbackList(keras.callbacks.CallbackList): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - if ((not isinstance(self.callbacks[0], CustomCallback)) or - (not isinstance(self.callbacks[1], keras.callbacks.History)) or - (not isinstance(self.callbacks[2], keras.callbacks.ProgbarLogger))): - raise AssertionError(f'Callback order unexpected: {self.callbacks}') - - with mock.patch.object( - keras.callbacks, 'CallbackList', TestingCallbackList): - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - custom_callback = CustomCallback() - model.fit(np.ones((10, 10)), np.ones((10, 1)), epochs=5, - callbacks=[custom_callback]) # A summary that was emitted during a test. Fields: # logdir: str. The logdir of the FileWriter to which the summary was # written. # tag: str. The name of the summary. -_ObservedSummary = collections.namedtuple('_ObservedSummary', ('logdir', 'tag')) +_ObservedSummary = collections.namedtuple("_ObservedSummary", ("logdir", "tag")) class _SummaryFile: - """A record of summary tags and the files to which they were written. + """A record of summary tags and the files to which they were written. - Fields `scalars`, `images`, `histograms`, and `tensors` are sets - containing `_ObservedSummary` values. - """ + Fields `scalars`, `images`, `histograms`, and `tensors` are sets + containing `_ObservedSummary` values. + """ - def __init__(self): - self.scalars = set() - self.images = set() - self.histograms = set() - self.tensors = set() - self.graph_defs = [] - self.convert_from_v2_summary_proto = False + def __init__(self): + self.scalars = set() + self.images = set() + self.histograms = set() + self.tensors = set() + self.graph_defs = [] + self.convert_from_v2_summary_proto = False def list_summaries(logdir): - """Read all summaries under the logdir into a `_SummaryFile`. - - Args: - logdir: A path to a directory that contains zero or more event - files, either as direct children or in transitive subdirectories. - Summaries in these events must only contain old-style scalars, - images, and histograms. Non-summary events, like `graph_def`s, are - ignored. - - Returns: - A `_SummaryFile` object reflecting all summaries written to any - event files in the logdir or any of its descendant directories. - - Raises: - ValueError: If an event file contains an summary of unexpected kind. - """ - result = _SummaryFile() - for (dirpath, _, filenames) in os.walk(logdir): - for filename in filenames: - if not filename.startswith('events.out.'): - continue - path = os.path.join(dirpath, filename) - for event in tf.compat.v1.train.summary_iterator(path): - if event.graph_def: - result.graph_defs.append(event.graph_def) - if not event.summary: # (e.g., it's a `graph_def` event) - continue - for value in event.summary.value: - tag = value.tag - # Case on the `value` rather than the summary metadata because - # the Keras callback uses `summary_ops_v2` to emit old-style - # summaries. See b/124535134. - kind = value.WhichOneof('value') - container = { - 'simple_value': result.scalars, - 'image': result.images, - 'histo': result.histograms, - 'tensor': result.tensors, - }.get(kind) - if container is None: - raise ValueError( - 'Unexpected summary kind %r in event file %s:\n%r' - % (kind, path, event)) - elif kind == 'tensor' and tag != 'keras': - # Convert the tf2 summary proto to old style for type checking. - plugin_name = value.metadata.plugin_data.plugin_name - container = { - 'images': result.images, - 'histograms': result.histograms, - 'scalars': result.scalars, - }.get(plugin_name) - if container is not None: - result.convert_from_v2_summary_proto = True - else: - container = result.tensors - container.add(_ObservedSummary(logdir=dirpath, tag=tag)) - return result + """Read all summaries under the logdir into a `_SummaryFile`. + + Args: + logdir: A path to a directory that contains zero or more event + files, either as direct children or in transitive subdirectories. + Summaries in these events must only contain old-style scalars, + images, and histograms. Non-summary events, like `graph_def`s, are + ignored. + + Returns: + A `_SummaryFile` object reflecting all summaries written to any + event files in the logdir or any of its descendant directories. + + Raises: + ValueError: If an event file contains an summary of unexpected kind. + """ + result = _SummaryFile() + for dirpath, _, filenames in os.walk(logdir): + for filename in filenames: + if not filename.startswith("events.out."): + continue + path = os.path.join(dirpath, filename) + for event in tf.compat.v1.train.summary_iterator(path): + if event.graph_def: + result.graph_defs.append(event.graph_def) + if not event.summary: # (e.g., it's a `graph_def` event) + continue + for value in event.summary.value: + tag = value.tag + # Case on the `value` rather than the summary metadata + # because the Keras callback uses `summary_ops_v2` to emit + # old-style summaries. See b/124535134. + kind = value.WhichOneof("value") + container = { + "simple_value": result.scalars, + "image": result.images, + "histo": result.histograms, + "tensor": result.tensors, + }.get(kind) + if container is None: + raise ValueError( + "Unexpected summary kind %r in event file %s:\n%r" + % (kind, path, event) + ) + elif kind == "tensor" and tag != "keras": + # Convert the tf2 summary proto to old style for type + # checking. + plugin_name = value.metadata.plugin_data.plugin_name + container = { + "images": result.images, + "histograms": result.histograms, + "scalars": result.scalars, + }.get(plugin_name) + if container is not None: + result.convert_from_v2_summary_proto = True + else: + container = result.tensors + container.add(_ObservedSummary(logdir=dirpath, tag=tag)) + return result @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes(always_skip_v1=True) class TestTensorBoardV2(test_combinations.TestCase): + def setUp(self): + super(TestTensorBoardV2, self).setUp() + self.logdir = os.path.join(self.get_temp_dir(), "tb") + self.train_dir = os.path.join(self.logdir, "train") + self.validation_dir = os.path.join(self.logdir, "validation") + + def _get_model(self, compile_model=True): + layers = [ + keras.layers.Conv2D(8, (3, 3)), + keras.layers.Flatten(), + keras.layers.Dense(1), + ] + model = test_utils.get_model_from_layers( + layers, input_shape=(10, 10, 1) + ) + if compile_model: + opt = gradient_descent.SGD(learning_rate=0.001) + model.compile( + opt, "mse", run_eagerly=test_utils.should_run_eagerly() + ) + return model - def setUp(self): - super(TestTensorBoardV2, self).setUp() - self.logdir = os.path.join(self.get_temp_dir(), 'tb') - self.train_dir = os.path.join(self.logdir, 'train') - self.validation_dir = os.path.join(self.logdir, 'validation') - - def _get_model(self, compile_model=True): - layers = [ - keras.layers.Conv2D(8, (3, 3)), - keras.layers.Flatten(), - keras.layers.Dense(1) - ] - model = test_utils.get_model_from_layers(layers, input_shape=(10, 10, 1)) - if compile_model: - opt = gradient_descent.SGD(learning_rate=0.001) - model.compile(opt, 'mse', run_eagerly=test_utils.should_run_eagerly()) - return model - - def test_TensorBoard_default_logdir(self): - """Regression test for cross-platform pathsep in default logdir.""" - os.chdir(self.get_temp_dir()) - - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard() # no logdir specified - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - - summary_file = list_summaries(logdir='.') - train_dir = os.path.join('.', 'logs', 'train') - validation_dir = os.path.join('.', 'logs', 'validation') - self.assertEqual( - summary_file.scalars, { - _ObservedSummary(logdir=train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=validation_dir, tag='evaluation_loss_vs_iterations'), - }) - - def test_TensorBoard_basic(self): - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard(self.logdir) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.scalars, { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=self.validation_dir, - tag='evaluation_loss_vs_iterations'), - }) - - def test_TensorBoard_across_invocations(self): - """Regression test for summary writer resource use-after-free. - - See: - """ - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard(self.logdir) - - for _ in (1, 2): - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.scalars, { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=self.validation_dir, - tag='evaluation_loss_vs_iterations'), - }) - - def test_TensorBoard_no_spurious_event_files(self): - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard(self.logdir) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - callbacks=[tb_cbk]) - - events_file_run_basenames = set() - for (dirpath, _, filenames) in os.walk(self.train_dir): - if any(fn.startswith('events.out.') for fn in filenames): - events_file_run_basenames.add(os.path.basename(dirpath)) - self.assertEqual(events_file_run_basenames, {'train'}) - - def test_TensorBoard_batch_metrics(self): - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard(self.logdir, update_freq=1) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.scalars, - { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=self.validation_dir, - tag='evaluation_loss_vs_iterations'), - }, - ) + def test_TensorBoard_default_logdir(self): + """Regression test for cross-platform pathsep in default logdir.""" + os.chdir(self.get_temp_dir()) - def test_TensorBoard_learning_rate_schedules(self): - model = self._get_model(compile_model=False) - opt = gradient_descent.SGD(learning_rate_schedule.CosineDecay(0.01, 1)) - model.compile(opt, 'mse', run_eagerly=test_utils.should_run_eagerly()) - - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - callbacks=[keras.callbacks.TensorBoard(self.logdir)]) - - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.scalars, - { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.train_dir, tag='epoch_learning_rate'), - }, - ) + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard() # no logdir specified - def test_TensorBoard_global_step(self): - model = self._get_model(compile_model=False) - opt = gradient_descent.SGD(learning_rate_schedule.CosineDecay(0.01, 1)) - model.compile(opt, 'mse', run_eagerly=test_utils.should_run_eagerly()) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + + summary_file = list_summaries(logdir=".") + train_dir = os.path.join(".", "logs", "train") + validation_dir = os.path.join(".", "logs", "validation") + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=validation_dir, tag="evaluation_loss_vs_iterations" + ), + }, + ) + + def test_TensorBoard_basic(self): + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard(self.logdir) - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=self.validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.validation_dir, + tag="evaluation_loss_vs_iterations", + ), + }, + ) + + def test_TensorBoard_across_invocations(self): + """Regression test for summary writer resource use-after-free. + + See: + """ + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard(self.logdir) + + for _ in (1, 2): + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=self.validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.validation_dir, + tag="evaluation_loss_vs_iterations", + ), + }, + ) + + def test_TensorBoard_no_spurious_event_files(self): + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard(self.logdir) + + model.fit(x, y, batch_size=2, epochs=2, callbacks=[tb_cbk]) + + events_file_run_basenames = set() + for dirpath, _, filenames in os.walk(self.train_dir): + if any(fn.startswith("events.out.") for fn in filenames): + events_file_run_basenames.add(os.path.basename(dirpath)) + self.assertEqual(events_file_run_basenames, {"train"}) + + def test_TensorBoard_batch_metrics(self): + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard(self.logdir, update_freq=1) - model.fit( - x, - y, - batch_size=2, - epochs=2, - verbose=0, - callbacks=[ - keras.callbacks.TensorBoard( - self.logdir, - update_freq=1, - profile_batch=0, - write_steps_per_second=True) - ]) - - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.scalars, - { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.train_dir, tag='epoch_learning_rate'), - _ObservedSummary( - logdir=self.train_dir, tag='epoch_steps_per_second'), - _ObservedSummary( - logdir=self.train_dir, tag='batch_steps_per_second'), - }, - ) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="batch_loss"), + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=self.validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.validation_dir, + tag="evaluation_loss_vs_iterations", + ), + }, + ) + + def test_TensorBoard_learning_rate_schedules(self): + model = self._get_model(compile_model=False) + opt = gradient_descent.SGD(learning_rate_schedule.CosineDecay(0.01, 1)) + model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly()) + + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - def test_TensorBoard_weight_histograms(self): - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard(self.logdir, histogram_freq=1) - model_type = test_utils.get_model_type() - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - self.assertEqual( - summary_file.scalars, - { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=self.validation_dir, - tag='evaluation_loss_vs_iterations'), - }, - ) - self.assertEqual( - self._strip_layer_names(summary_file.histograms, model_type), - { - _ObservedSummary(logdir=self.train_dir, tag='bias_0/histogram'), - _ObservedSummary(logdir=self.train_dir, tag='kernel_0/histogram'), - }, - ) + model.fit( + x, + y, + batch_size=2, + epochs=2, + callbacks=[keras.callbacks.TensorBoard(self.logdir)], + ) + + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.train_dir, tag="epoch_learning_rate" + ), + }, + ) + + def test_TensorBoard_global_step(self): + model = self._get_model(compile_model=False) + opt = gradient_descent.SGD(learning_rate_schedule.CosineDecay(0.01, 1)) + model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly()) + + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - def test_TensorBoard_weight_images(self): - model = self._get_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, write_images=True) - model_type = test_utils.get_model_type() - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - self.assertEqual( - summary_file.scalars, - { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=self.validation_dir, - tag='evaluation_loss_vs_iterations'), - }, - ) - self.assertEqual( - self._strip_layer_names(summary_file.histograms, model_type), - { - _ObservedSummary(logdir=self.train_dir, tag='bias_0/histogram'), - _ObservedSummary(logdir=self.train_dir, tag='kernel_0/histogram'), - }, - ) - if summary_file.convert_from_v2_summary_proto: - expected_image_summaries = { - _ObservedSummary(logdir=self.train_dir, tag='bias_0/image'), - _ObservedSummary(logdir=self.train_dir, tag='kernel_0/image'), - } - else: - expected_image_summaries = { - _ObservedSummary(logdir=self.train_dir, tag='bias_0/image/0'), - _ObservedSummary(logdir=self.train_dir, tag='kernel_0/image/0'), - _ObservedSummary(logdir=self.train_dir, tag='kernel_0/image/1'), - _ObservedSummary(logdir=self.train_dir, tag='kernel_0/image/2'), - } - self.assertEqual( - self._strip_layer_names(summary_file.images, model_type), - expected_image_summaries - ) + model.fit( + x, + y, + batch_size=2, + epochs=2, + verbose=0, + callbacks=[ + keras.callbacks.TensorBoard( + self.logdir, + update_freq=1, + profile_batch=0, + write_steps_per_second=True, + ) + ], + ) + + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="batch_loss"), + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.train_dir, tag="epoch_learning_rate" + ), + _ObservedSummary( + logdir=self.train_dir, tag="epoch_steps_per_second" + ), + _ObservedSummary( + logdir=self.train_dir, tag="batch_steps_per_second" + ), + }, + ) + + def test_TensorBoard_weight_histograms(self): + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard(self.logdir, histogram_freq=1) + model_type = test_utils.get_model_type() - def test_TensorBoard_projector_callback(self): - layers = [ - keras.layers.Embedding(10, 10, name='test_embedding'), - keras.layers.Dense(10, activation='relu'), - keras.layers.Dense(1, activation='sigmoid') - ] - model = test_utils.get_model_from_layers(layers, input_shape=(10,)) - model.compile( - optimizer='adam', - loss=keras.losses.BinaryCrossentropy(from_logits=True), - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 10)), np.ones((10, 10)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, - embeddings_freq=1, - embeddings_metadata={'test_embedding': 'metadata.tsv'}) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - - with open(os.path.join(self.logdir, 'projector_config.pbtxt')) as f: - self.assertEqual(f.readlines(), [ - 'embeddings {\n', - (' tensor_name: ' - '"layer_with_weights-0/embeddings/.ATTRIBUTES/VARIABLE_VALUE"\n'), - ' metadata_path: "metadata.tsv"\n', '}\n' - ]) - - def test_custom_summary(self): - if not tf.executing_eagerly(): - self.skipTest('Custom summaries only supported in V2 code path.') - - def scalar_v2_mock(name, data, step=None): - """A reimplementation of the scalar plugin to avoid circular deps.""" - metadata = tf.compat.v1.SummaryMetadata() - # Should match value in tensorboard/plugins/scalar/metadata.py. - metadata.plugin_data.plugin_name = 'scalars' - with tf.summary.experimental.summary_scope( - name, 'scalar_summary', values=[data, step]) as (tag, _): - return tf.summary.write( - tag=tag, - tensor=tf.cast(data, 'float32'), - step=step, - metadata=metadata) - - class LayerWithSummary(keras.layers.Layer): - - def call(self, x): - scalar_v2_mock('custom_summary', tf.reduce_sum(x)) - return x - - model = test_utils.get_model_from_layers([LayerWithSummary()], - input_shape=(5,), - name='model') - - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - tb_cbk = keras.callbacks.TensorBoard(self.logdir, update_freq=1) - x, y = np.ones((10, 5)), np.ones((10, 5)) - model.fit(x, y, batch_size=2, validation_data=(x, y), callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.scalars, - { - _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), - _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), - _ObservedSummary( - logdir=self.validation_dir, - tag='evaluation_loss_vs_iterations'), - _ObservedSummary( - logdir=self.train_dir, - tag='model/layer_with_summary/custom_summary'), - _ObservedSummary( - logdir=self.validation_dir, - tag='model/layer_with_summary/custom_summary') - }, - ) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=self.validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.validation_dir, + tag="evaluation_loss_vs_iterations", + ), + }, + ) + self.assertEqual( + self._strip_layer_names(summary_file.histograms, model_type), + { + _ObservedSummary(logdir=self.train_dir, tag="bias_0/histogram"), + _ObservedSummary( + logdir=self.train_dir, tag="kernel_0/histogram" + ), + }, + ) + + def test_TensorBoard_weight_images(self): + model = self._get_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, histogram_freq=1, write_images=True + ) + model_type = test_utils.get_model_type() - def _strip_layer_names(self, summaries, model_type): - """Deduplicate summary names modulo layer prefix. + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=self.validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.validation_dir, + tag="evaluation_loss_vs_iterations", + ), + }, + ) + self.assertEqual( + self._strip_layer_names(summary_file.histograms, model_type), + { + _ObservedSummary(logdir=self.train_dir, tag="bias_0/histogram"), + _ObservedSummary( + logdir=self.train_dir, tag="kernel_0/histogram" + ), + }, + ) + if summary_file.convert_from_v2_summary_proto: + expected_image_summaries = { + _ObservedSummary(logdir=self.train_dir, tag="bias_0/image"), + _ObservedSummary(logdir=self.train_dir, tag="kernel_0/image"), + } + else: + expected_image_summaries = { + _ObservedSummary(logdir=self.train_dir, tag="bias_0/image/0"), + _ObservedSummary(logdir=self.train_dir, tag="kernel_0/image/0"), + _ObservedSummary(logdir=self.train_dir, tag="kernel_0/image/1"), + _ObservedSummary(logdir=self.train_dir, tag="kernel_0/image/2"), + } + self.assertEqual( + self._strip_layer_names(summary_file.images, model_type), + expected_image_summaries, + ) + + def test_TensorBoard_projector_callback(self): + layers = [ + keras.layers.Embedding(10, 10, name="test_embedding"), + keras.layers.Dense(10, activation="relu"), + keras.layers.Dense(1, activation="sigmoid"), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(10,)) + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(from_logits=True), + run_eagerly=test_utils.should_run_eagerly(), + ) + x, y = np.ones((10, 10)), np.ones((10, 10)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, + embeddings_freq=1, + embeddings_metadata={"test_embedding": "metadata.tsv"}, + ) - This removes the first slash-component of each tag name: for - instance, "foo/bar/baz" becomes "bar/baz". + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + + with open(os.path.join(self.logdir, "projector_config.pbtxt")) as f: + self.assertEqual( + f.readlines(), + [ + "embeddings {\n", + " tensor_name: " + '"layer_with_weights-0/embeddings/.ATTRIBUTES/' + 'VARIABLE_VALUE"\n', + ' metadata_path: "metadata.tsv"\n', + "}\n", + ], + ) + + def test_custom_summary(self): + if not tf.executing_eagerly(): + self.skipTest("Custom summaries only supported in V2 code path.") + + def scalar_v2_mock(name, data, step=None): + """A reimplementation of the scalar plugin to avoid circular + deps.""" + metadata = tf.compat.v1.SummaryMetadata() + # Should match value in tensorboard/plugins/scalar/metadata.py. + metadata.plugin_data.plugin_name = "scalars" + with tf.summary.experimental.summary_scope( + name, "scalar_summary", values=[data, step] + ) as (tag, _): + return tf.summary.write( + tag=tag, + tensor=tf.cast(data, "float32"), + step=step, + metadata=metadata, + ) + + class LayerWithSummary(keras.layers.Layer): + def call(self, x): + scalar_v2_mock("custom_summary", tf.reduce_sum(x)) + return x + + model = test_utils.get_model_from_layers( + [LayerWithSummary()], input_shape=(5,), name="model" + ) + + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + tb_cbk = keras.callbacks.TensorBoard(self.logdir, update_freq=1) + x, y = np.ones((10, 5)), np.ones((10, 5)) + model.fit( + x, y, batch_size=2, validation_data=(x, y), callbacks=[tb_cbk] + ) + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag="batch_loss"), + _ObservedSummary(logdir=self.train_dir, tag="epoch_loss"), + _ObservedSummary(logdir=self.validation_dir, tag="epoch_loss"), + _ObservedSummary( + logdir=self.validation_dir, + tag="evaluation_loss_vs_iterations", + ), + _ObservedSummary( + logdir=self.train_dir, + tag="model/layer_with_summary/custom_summary", + ), + _ObservedSummary( + logdir=self.validation_dir, + tag="model/layer_with_summary/custom_summary", + ), + }, + ) + + def _strip_layer_names(self, summaries, model_type): + """Deduplicate summary names modulo layer prefix. + + This removes the first slash-component of each tag name: for + instance, "foo/bar/baz" becomes "bar/baz". + + Args: + summaries: A `set` of `_ObservedSummary` values. + model_type: The model type currently being tested. + + Returns: + A new `set` of `_ObservedSummary` values with layer prefixes + removed. + """ + result = set() + for summary in summaries: + if "/" not in summary.tag: + raise ValueError(f"tag has no layer name: {summary.tag!r}") + start_from = 2 if "subclass" in model_type else 1 + new_tag = "/".join(summary.tag.split("/")[start_from:]) + result.add(summary._replace(tag=new_tag)) + return result + + def test_TensorBoard_invalid_argument(self): + with self.assertRaisesRegex(ValueError, "Unrecognized arguments"): + keras.callbacks.TensorBoard(wwrite_images=True) + + def test_TensorBoard_non_blocking(self): + model = keras.Sequential([keras.layers.Dense(1)]) + tb = keras.callbacks.TensorBoard(self.logdir) + self.assertTrue(tb._supports_tf_logs) + cb_list = keras.callbacks.CallbackList( + [tb], model=model, epochs=1, steps=100, verbose=0 + ) + + tensor = tf.convert_to_tensor(1.0) + + def mock_numpy(): + raise RuntimeError( + "If this error is seen, TensorBoard is causing a blocking " + "NumPy conversion." + ) + + with tf.compat.v1.test.mock.patch.object(tensor, "numpy", mock_numpy): + logs = {"metric": tensor} + + cb_list.on_train_begin(logs) + cb_list.on_epoch_begin(0, logs) + cb_list.on_train_batch_begin(0, logs) + cb_list.on_train_batch_end(0, logs) + cb_list.on_epoch_end(0, logs) + cb_list.on_train_end(logs) + + cb_list.on_test_begin(logs) + cb_list.on_test_batch_begin(0, logs) + cb_list.on_test_batch_end(0, logs) + cb_list.on_test_end(logs) + + cb_list.on_predict_begin(logs) + cb_list.on_predict_batch_begin(logs) + cb_list.on_predict_batch_end(logs) + cb_list.on_predict_end(logs) - Args: - summaries: A `set` of `_ObservedSummary` values. - model_type: The model type currently being tested. - Returns: - A new `set` of `_ObservedSummary` values with layer prefixes - removed. - """ - result = set() - for summary in summaries: - if '/' not in summary.tag: - raise ValueError('tag has no layer name: %r' % summary.tag) - start_from = 2 if 'subclass' in model_type else 1 - new_tag = '/'.join(summary.tag.split('/')[start_from:]) - result.add(summary._replace(tag=new_tag)) - return result +# Note that this test specifies model_type explicitly. +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class TestTensorBoardV2NonParameterizedTest(test_combinations.TestCase): + def setUp(self): + super(TestTensorBoardV2NonParameterizedTest, self).setUp() + self.logdir = os.path.join(self.get_temp_dir(), "tb") + self.train_dir = os.path.join(self.logdir, "train") + self.validation_dir = os.path.join(self.logdir, "validation") + + def _get_seq_model(self): + model = keras.models.Sequential( + [ + keras.layers.Conv2D(8, (3, 3), input_shape=(10, 10, 1)), + keras.layers.Flatten(), + keras.layers.Dense(1), + ] + ) + opt = gradient_descent.SGD(learning_rate=0.001) + model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly()) + return model - def test_TensorBoard_invalid_argument(self): - with self.assertRaisesRegex(ValueError, 'Unrecognized arguments'): - keras.callbacks.TensorBoard(wwrite_images=True) + def _count_xplane_file(self, logdir): + profile_dir = os.path.join(logdir, "plugins", "profile") + count = 0 + for dirpath, dirnames, filenames in os.walk(profile_dir): + del dirpath # unused + del dirnames # unused + for filename in filenames: + if filename.endswith(".xplane.pb"): + count += 1 + return count + + def fitModelAndAssertKerasModelWritten(self, model): + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, write_graph=True, profile_batch=0 + ) + model.fit( + x, + y, + batch_size=2, + epochs=3, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.tensors, + { + _ObservedSummary(logdir=self.train_dir, tag="keras"), + }, + ) + if not model.run_eagerly: + # There should be one train graph + self.assertLen(summary_file.graph_defs, 1) + for graph_def in summary_file.graph_defs: + graph_def_str = str(graph_def) + + # All the model layers should appear in the graphs + for layer in model.layers: + if "input" not in layer.name: + self.assertIn(layer.name, graph_def_str) + + def test_TensorBoard_writeSequentialModel_noInputShape(self): + model = keras.models.Sequential( + [ + keras.layers.Conv2D(8, (3, 3)), + keras.layers.Flatten(), + keras.layers.Dense(1), + ] + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + self.fitModelAndAssertKerasModelWritten(model) + + def test_TensorBoard_writeSequentialModel_withInputShape(self): + model = keras.models.Sequential( + [ + keras.layers.Conv2D(8, (3, 3), input_shape=(10, 10, 1)), + keras.layers.Flatten(), + keras.layers.Dense(1), + ] + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + self.fitModelAndAssertKerasModelWritten(model) + + def test_TensorBoard_writeModel(self): + inputs = keras.layers.Input([10, 10, 1]) + x = keras.layers.Conv2D(8, (3, 3), activation="relu")(inputs) + x = keras.layers.Flatten()(x) + x = keras.layers.Dense(1)(x) + model = keras.models.Model(inputs=inputs, outputs=[x]) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + self.fitModelAndAssertKerasModelWritten(model) + + def test_TensorBoard_autoTrace(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, histogram_freq=1, profile_batch=1, write_graph=False + ) - def test_TensorBoard_non_blocking(self): - model = keras.Sequential([keras.layers.Dense(1)]) - tb = keras.callbacks.TensorBoard(self.logdir) - self.assertTrue(tb._supports_tf_logs) - cb_list = keras.callbacks.CallbackList([tb], - model=model, - epochs=1, - steps=100, - verbose=0) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + + self.assertEqual( + summary_file.tensors, + { + _ObservedSummary(logdir=self.train_dir, tag="batch_1"), + }, + ) + self.assertEqual(1, self._count_xplane_file(logdir=self.logdir)) + + def test_TensorBoard_autoTrace_outerProfiler(self): + """Runs a profiler session that interferes with the callback's one. + + The callback will not generate a profile but execution will proceed + without crashing due to unhandled exceptions. + """ + tf.profiler.experimental.start(logdir="") + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, histogram_freq=1, profile_batch=1, write_graph=False + ) - tensor = tf.convert_to_tensor(1.) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + tf.profiler.experimental.stop(save=False) + + self.assertEqual( + summary_file.tensors, + { + _ObservedSummary(logdir=self.train_dir, tag="batch_1"), + }, + ) + self.assertEqual(0, self._count_xplane_file(logdir=self.train_dir)) + + def test_TensorBoard_autoTrace_tagNameWithBatchNum(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, histogram_freq=1, profile_batch=2, write_graph=False + ) - def mock_numpy(): - raise RuntimeError( - 'If this error is seen, TensorBoard is causing a blocking ' - 'NumPy conversion.') + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + + self.assertEqual( + summary_file.tensors, + { + _ObservedSummary(logdir=self.train_dir, tag="batch_2"), + }, + ) + self.assertEqual(1, self._count_xplane_file(logdir=self.logdir)) + + def test_TensorBoard_autoTrace_profileBatchRangeSingle(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch="2,2", + write_graph=False, + ) - with tf.compat.v1.test.mock.patch.object(tensor, 'numpy', mock_numpy): - logs = {'metric': tensor} + model.fit( + x, + y, + batch_size=3, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + + self.assertEqual( + summary_file.tensors, + { + # Trace will be logged once at the batch it stops profiling. + _ObservedSummary(logdir=self.train_dir, tag="batch_2"), + }, + ) + self.assertEqual(1, self._count_xplane_file(logdir=self.logdir)) + + def test_TensorBoard_autoTrace_profileBatchRangeTwice(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch="10,10", + write_graph=False, + ) - cb_list.on_train_begin(logs) - cb_list.on_epoch_begin(0, logs) - cb_list.on_train_batch_begin(0, logs) - cb_list.on_train_batch_end(0, logs) - cb_list.on_epoch_end(0, logs) - cb_list.on_train_end(logs) + model.fit( + x, + y, + batch_size=3, + epochs=10, + validation_data=(x, y), + callbacks=[tb_cbk], + ) - cb_list.on_test_begin(logs) - cb_list.on_test_batch_begin(0, logs) - cb_list.on_test_batch_end(0, logs) - cb_list.on_test_end(logs) + time.sleep(1) # Avoids the second profile over-writing the first. - cb_list.on_predict_begin(logs) - cb_list.on_predict_batch_begin(logs) - cb_list.on_predict_batch_end(logs) - cb_list.on_predict_end(logs) + model.fit( + x, + y, + batch_size=3, + epochs=10, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + self.assertEqual(2, self._count_xplane_file(logdir=self.logdir)) + + # Test case that replicates a GitHub issue. + # https://github.com/tensorflow/tensorflow/issues/37543 + def test_TensorBoard_autoTrace_profileTwiceGraphMode(self): + tf.compat.v1.disable_eager_execution() + inp = keras.Input((1,)) + out = keras.layers.Dense(units=1)(inp) + model = keras.Model(inp, out) + + model.compile(gradient_descent.SGD(1), "mse") + + logdir = os.path.join(self.get_temp_dir(), "tb1") + model.fit( + np.zeros((64, 1)), + np.zeros((64, 1)), + batch_size=32, + callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=1)], + ) + # Verifies trace exists in the first logdir. + self.assertEqual(1, self._count_xplane_file(logdir=logdir)) + logdir = os.path.join(self.get_temp_dir(), "tb2") + model.fit( + np.zeros((64, 1)), + np.zeros((64, 1)), + batch_size=32, + callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=2)], + ) + # Verifies trace exists in the second logdir. + self.assertEqual(1, self._count_xplane_file(logdir=logdir)) + + def test_TensorBoard_autoTrace_profileBatchRange(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch="1,3", + write_graph=False, + ) + model.fit( + x, + y, + batch_size=4, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + + self.assertEqual( + summary_file.tensors, + { + # Trace will be logged once at the batch it stops profiling. + _ObservedSummary(logdir=self.train_dir, tag="batch_3"), + }, + ) + self.assertEqual(1, self._count_xplane_file(logdir=self.logdir)) + + def test_TensorBoard_autoTrace_profileInvalidBatchRange(self): + with self.assertRaises(ValueError): + keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch="-1,3", + write_graph=False, + ) -# Note that this test specifies model_type explicitly. -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class TestTensorBoardV2NonParameterizedTest(test_combinations.TestCase): + with self.assertRaises(ValueError): + keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch="1,None", + write_graph=False, + ) - def setUp(self): - super(TestTensorBoardV2NonParameterizedTest, self).setUp() - self.logdir = os.path.join(self.get_temp_dir(), 'tb') - self.train_dir = os.path.join(self.logdir, 'train') - self.validation_dir = os.path.join(self.logdir, 'validation') - - def _get_seq_model(self): - model = keras.models.Sequential([ - keras.layers.Conv2D(8, (3, 3), input_shape=(10, 10, 1)), - keras.layers.Flatten(), - keras.layers.Dense(1), - ]) - opt = gradient_descent.SGD(learning_rate=0.001) - model.compile( - opt, - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - return model - - def _count_trace_file(self, logdir): - profile_dir = os.path.join(logdir, 'plugins', 'profile') - count = 0 - for (dirpath, dirnames, filenames) in os.walk(profile_dir): - del dirpath # unused - del dirnames # unused - for filename in filenames: - if filename.endswith('.trace.json.gz'): - count += 1 - return count - - def fitModelAndAssertKerasModelWritten(self, model): - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard(self.logdir, - write_graph=True, - profile_batch=0) - model.fit( - x, - y, - batch_size=2, - epochs=3, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - self.assertEqual( - summary_file.tensors, - { - _ObservedSummary(logdir=self.train_dir, tag='keras'), - }, - ) - if not model.run_eagerly: - # There should be one train graph - self.assertLen(summary_file.graph_defs, 1) - for graph_def in summary_file.graph_defs: - graph_def_str = str(graph_def) - - # All the model layers should appear in the graphs - for layer in model.layers: - if 'input' not in layer.name: - self.assertIn(layer.name, graph_def_str) - - def test_TensorBoard_writeSequentialModel_noInputShape(self): - model = keras.models.Sequential([ - keras.layers.Conv2D(8, (3, 3)), - keras.layers.Flatten(), - keras.layers.Dense(1), - ]) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - self.fitModelAndAssertKerasModelWritten(model) - - def test_TensorBoard_writeSequentialModel_withInputShape(self): - model = keras.models.Sequential([ - keras.layers.Conv2D(8, (3, 3), input_shape=(10, 10, 1)), - keras.layers.Flatten(), - keras.layers.Dense(1), - ]) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - self.fitModelAndAssertKerasModelWritten(model) - - def test_TensorBoard_writeModel(self): - inputs = keras.layers.Input([10, 10, 1]) - x = keras.layers.Conv2D(8, (3, 3), activation='relu')(inputs) - x = keras.layers.Flatten()(x) - x = keras.layers.Dense(1)(x) - model = keras.models.Model(inputs=inputs, outputs=[x]) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - self.fitModelAndAssertKerasModelWritten(model) - - def test_TensorBoard_autoTrace(self): - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch=1, write_graph=False) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - self.assertEqual( - summary_file.tensors, - { - _ObservedSummary(logdir=self.train_dir, tag=u'batch_1'), - }, - ) - self.assertEqual(1, self._count_trace_file(logdir=self.logdir)) + with self.assertRaises(ValueError): + keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch="6,5", + write_graph=False, + ) - def test_TensorBoard_autoTrace_outerProfiler(self): - """Runs a profiler session that interferes with the one from the callback. + with self.assertRaises(ValueError): + keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch=-1, + write_graph=False, + ) + + def test_TensorBoard_autoTrace_profile_batch_largerThanBatchCount(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, + histogram_freq=1, + profile_batch=10000, + write_graph=False, + ) - The callback will not generate a profile but execution will proceed without - crashing due to unhandled exceptions. - """ - tf.profiler.experimental.start(logdir='') - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch=1, write_graph=False) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - tf.profiler.experimental.stop(save=False) - - self.assertEqual( - summary_file.tensors, - { - _ObservedSummary(logdir=self.train_dir, tag=u'batch_1'), - }, - ) - self.assertEqual(0, self._count_trace_file(logdir=self.train_dir)) - - def test_TensorBoard_autoTrace_tagNameWithBatchNum(self): - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch=2, write_graph=False) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - self.assertEqual( - summary_file.tensors, - { - _ObservedSummary(logdir=self.train_dir, tag=u'batch_2'), - }, - ) - self.assertEqual(1, self._count_trace_file(logdir=self.logdir)) - - def test_TensorBoard_autoTrace_profileBatchRangeSingle(self): - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch='2,2', write_graph=False) - - model.fit( - x, - y, - batch_size=3, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - self.assertEqual( - summary_file.tensors, - { - # Trace will be logged once at the batch it stops profiling. - _ObservedSummary(logdir=self.train_dir, tag=u'batch_2'), - }, - ) - self.assertEqual(1, self._count_trace_file(logdir=self.logdir)) - - def test_TensorBoard_autoTrace_profileBatchRangeTwice(self): - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch='10,10', write_graph=False) - - model.fit( - x, - y, - batch_size=3, - epochs=10, - validation_data=(x, y), - callbacks=[tb_cbk]) - - time.sleep(1) # Avoids the second profile over-writing the first. - - model.fit( - x, - y, - batch_size=3, - epochs=10, - validation_data=(x, y), - callbacks=[tb_cbk]) - self.assertEqual(2, self._count_trace_file(logdir=self.logdir)) - - # Test case that replicates a Github issue. - # https://github.com/tensorflow/tensorflow/issues/37543 - def test_TensorBoard_autoTrace_profileTwiceGraphMode(self): - tf.compat.v1.disable_eager_execution() - inp = keras.Input((1,)) - out = keras.layers.Dense(units=1)(inp) - model = keras.Model(inp, out) - - model.compile(gradient_descent.SGD(1), 'mse') - - logdir = os.path.join(self.get_temp_dir(), 'tb1') - model.fit( - np.zeros((64, 1)), - np.zeros((64, 1)), - batch_size=32, - callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=1)], - ) - # Verifies trace exists in the first logdir. - self.assertEqual(1, self._count_trace_file(logdir=logdir)) - logdir = os.path.join(self.get_temp_dir(), 'tb2') - model.fit( - np.zeros((64, 1)), - np.zeros((64, 1)), - batch_size=32, - callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=2)], - ) - # Verifies trace exists in the second logdir. - self.assertEqual(1, self._count_trace_file(logdir=logdir)) - - def test_TensorBoard_autoTrace_profileBatchRange(self): - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch='1,3', write_graph=False) - - model.fit( - x, - y, - batch_size=4, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - self.assertEqual( - summary_file.tensors, - { - # Trace will be logged once at the batch it stops profiling. - _ObservedSummary(logdir=self.train_dir, tag=u'batch_3'), - }, - ) - self.assertEqual(1, self._count_trace_file(logdir=self.logdir)) - - def test_TensorBoard_autoTrace_profileInvalidBatchRange(self): - with self.assertRaises(ValueError): - keras.callbacks.TensorBoard( - self.logdir, - histogram_freq=1, - profile_batch='-1,3', - write_graph=False) - - with self.assertRaises(ValueError): - keras.callbacks.TensorBoard( - self.logdir, - histogram_freq=1, - profile_batch='1,None', - write_graph=False) - - with self.assertRaises(ValueError): - keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch='6,5', write_graph=False) - - with self.assertRaises(ValueError): - keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch=-1, write_graph=False) - - def test_TensorBoard_autoTrace_profile_batch_largerThanBatchCount(self): - model = self._get_seq_model() - x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - tb_cbk = keras.callbacks.TensorBoard( - self.logdir, histogram_freq=1, profile_batch=10000, write_graph=False) - - model.fit( - x, - y, - batch_size=2, - epochs=2, - validation_data=(x, y), - callbacks=[tb_cbk]) - summary_file = list_summaries(self.logdir) - - # Enabled trace only on the 10000th batch, thus it should be empty. - self.assertEmpty(summary_file.tensors) - self.assertEqual(0, self._count_trace_file(logdir=self.train_dir)) + model.fit( + x, + y, + batch_size=2, + epochs=2, + validation_data=(x, y), + callbacks=[tb_cbk], + ) + summary_file = list_summaries(self.logdir) + # Enabled trace only on the 10000th batch, thus it should be empty. + self.assertEmpty(summary_file.tensors) + self.assertEqual(0, self._count_xplane_file(logdir=self.train_dir)) -class MostRecentlyModifiedFileMatchingPatternTest(tf.test.TestCase): - def test_get_most_recently_modified_file_matching_pattern(self): - file_pattern = 'f.batch{batch:02d}epoch{epoch:02d}.h5' - test_dir = self.get_temp_dir() - path_pattern = os.path.join(test_dir, file_pattern) - file_paths = [ - os.path.join(test_dir, file_name) for file_name in - ['f.batch03epoch02.h5', 'f.batch02epoch02.h5', 'f.batch01epoch01.h5'] - ] - for file_path in file_paths: - with open(file_path, 'w') as f: - # Ensure there are some intervals between file creation. - time.sleep(2) - f.write('foo bar') - # Ensure the files have been actually written. - self.assertEqual( - set([ +class MostRecentlyModifiedFileMatchingPatternTest(tf.test.TestCase): + def test_get_most_recently_modified_file_matching_pattern(self): + file_pattern = "f.batch{batch:02d}epoch{epoch:02d}.h5" + test_dir = self.get_temp_dir() + path_pattern = os.path.join(test_dir, file_pattern) + file_paths = [ os.path.join(test_dir, file_name) - for file_name in os.listdir(test_dir) - ]), set(file_paths)) - self.assertEqual( - keras.callbacks.ModelCheckpoint(None) - ._get_most_recently_modified_file_matching_pattern(path_pattern), - file_paths[-1]) - - def test_some_file_not_matching_pattern(self): - file_pattern = 'f.batch{batch:02d}epoch{epoch:02d}.h5' - test_dir = self.get_temp_dir() - path_pattern = os.path.join(test_dir, file_pattern) - file_paths = [ - os.path.join(test_dir, file_name) for file_name in - ['f.batch03epoch02.h5', 'f.batch02epoch02.h5', 'f.baatch01epoch01.h5'] - ] - for file_path in file_paths: - with open(file_path, 'w') as f: - # Ensure there are some intervals between file creation. - time.sleep(2) - f.write('foo bar') - self.assertEqual( - keras.callbacks.ModelCheckpoint(None) - ._get_most_recently_modified_file_matching_pattern(path_pattern), - file_paths[-2]) - - def test_get_same_file_if_file_name_equals_pattern(self): - file_name = 'f.batch02.h5' - test_dir = self.get_temp_dir() - file_path = os.path.join(test_dir, file_name) - with open(file_path, 'w') as f: - f.write('foo bar') - self.assertEqual(os.path.join(test_dir, os.listdir(test_dir)[0]), file_path) - self.assertEqual( - keras.callbacks.ModelCheckpoint( - None)._get_most_recently_modified_file_matching_pattern(file_path), - file_path) - - def test_get_none_if_file_does_not_exist(self): - file_name = 'f.batch02.h5' - test_dir = self.get_temp_dir() - file_path = os.path.join(test_dir, file_name) - self.assertLen(os.listdir(test_dir), 0) - self.assertEqual( - keras.callbacks.ModelCheckpoint( - None)._get_most_recently_modified_file_matching_pattern(file_path), - None) - - def test_using_checkpoint_management_latest_checkpoint(self): - file_pattern = 'f.batch{batch:02d}epoch{epoch:02d}' - ckpt_file_name = 'f.batchXepochY' - test_dir = self.get_temp_dir() - path_pattern = os.path.join(test_dir, file_pattern) - ckpt_file_path = os.path.join(test_dir, ckpt_file_name) - with open(ckpt_file_path, 'w') as f: - f.write('dummy ckpt') - tf.__internal__.train.update_checkpoint_state( - test_dir, ckpt_file_path) - - file_paths = [ - os.path.join(test_dir, file_name) - for file_name in ['f.batch03epoch02', 'f.batch02epoch02'] - ] - for file_path in file_paths: - with open(file_path, 'w') as f: - f.write('foo bar') - - # The result returned from checkpoint_management.latest_checkpoint takes - # priority, so even if it was written earlier, we should still return that. - self.assertEqual( - keras.callbacks.ModelCheckpoint(None) - ._get_most_recently_modified_file_matching_pattern(path_pattern), - ckpt_file_path) - + for file_name in [ + "f.batch03epoch02.h5", + "f.batch02epoch02.h5", + "f.batch01epoch01.h5", + ] + ] + for file_path in file_paths: + with open(file_path, "w") as f: + # Ensure there are some intervals between file creation. + time.sleep(2) + f.write("foo bar") + # Ensure the files have been actually written. + self.assertEqual( + set( + [ + os.path.join(test_dir, file_name) + for file_name in os.listdir(test_dir) + ] + ), + set(file_paths), + ) + self.assertEqual( + keras.callbacks.ModelCheckpoint( + None + )._get_most_recently_modified_file_matching_pattern(path_pattern), + file_paths[-1], + ) + + def test_some_file_not_matching_pattern(self): + file_pattern = "f.batch{batch:02d}epoch{epoch:02d}.h5" + test_dir = self.get_temp_dir() + path_pattern = os.path.join(test_dir, file_pattern) + file_paths = [ + os.path.join(test_dir, file_name) + for file_name in [ + "f.batch03epoch02.h5", + "f.batch02epoch02.h5", + "f.baatch01epoch01.h5", + ] + ] + for file_path in file_paths: + with open(file_path, "w") as f: + # Ensure there are some intervals between file creation. + time.sleep(2) + f.write("foo bar") + self.assertEqual( + keras.callbacks.ModelCheckpoint( + None + )._get_most_recently_modified_file_matching_pattern(path_pattern), + file_paths[-2], + ) + + def test_get_same_file_if_file_name_equals_pattern(self): + file_name = "f.batch02.h5" + test_dir = self.get_temp_dir() + file_path = os.path.join(test_dir, file_name) + with open(file_path, "w") as f: + f.write("foo bar") + self.assertEqual( + os.path.join(test_dir, os.listdir(test_dir)[0]), file_path + ) + self.assertEqual( + keras.callbacks.ModelCheckpoint( + None + )._get_most_recently_modified_file_matching_pattern(file_path), + file_path, + ) + + def test_get_none_if_file_does_not_exist(self): + file_name = "f.batch02.h5" + test_dir = self.get_temp_dir() + file_path = os.path.join(test_dir, file_name) + self.assertEmpty(os.listdir(test_dir)) + self.assertEqual( + keras.callbacks.ModelCheckpoint( + None + )._get_most_recently_modified_file_matching_pattern(file_path), + None, + ) + + def test_using_checkpoint_management_latest_checkpoint(self): + file_pattern = "f.batch{batch:02d}epoch{epoch:02d}" + ckpt_file_name = "f.batchXepochY" + test_dir = self.get_temp_dir() + path_pattern = os.path.join(test_dir, file_pattern) + ckpt_file_path = os.path.join(test_dir, ckpt_file_name) + with open(ckpt_file_path, "w") as f: + f.write("dummy ckpt") + tf.__internal__.train.update_checkpoint_state(test_dir, ckpt_file_path) + + file_paths = [ + os.path.join(test_dir, file_name) + for file_name in ["f.batch03epoch02", "f.batch02epoch02"] + ] + for file_path in file_paths: + with open(file_path, "w") as f: + f.write("foo bar") -class SummaryOpsTest(tf.test.TestCase): + # The result returned from checkpoint_management.latest_checkpoint takes + # priority, so even if it was written earlier, we should still return + # that. + self.assertEqual( + keras.callbacks.ModelCheckpoint( + None + )._get_most_recently_modified_file_matching_pattern(path_pattern), + ckpt_file_path, + ) - def tearDown(self): - super(SummaryOpsTest, self).tearDown() - tf.summary.trace_off() - - def keras_model(self, *args, **kwargs): - logdir = self.get_temp_dir() - writer = tf.summary.create_file_writer(logdir) - with writer.as_default(): - keras.callbacks.keras_model_summary(*args, **kwargs) - writer.close() - events = events_from_logdir(logdir) - # The first event contains no summary values. The written content goes to - # the second event. - return events[1] - - @test_utils.run_v2_only - def testKerasModel(self): - model = keras.Sequential( - [Dense(10, input_shape=(100,)), - Activation('relu', name='my_relu')]) - event = self.keras_model(name='my_name', data=model, step=1) - first_val = event.summary.value[0] - self.assertEqual(model.to_json(), first_val.tensor.string_val[0].decode()) - - @test_utils.run_v2_only - def testKerasModel_usesDefaultStep(self): - model = keras.Sequential( - [Dense(10, input_shape=(100,)), - Activation('relu', name='my_relu')]) - try: - tf.summary.experimental.set_step(42) - event = self.keras_model(name='my_name', data=model) - self.assertEqual(42, event.step) - finally: - # Reset to default state for other tests. - tf.summary.experimental.set_step(None) - - @test_utils.run_v2_only - def testKerasModel_subclass(self): - - class SimpleSubclass(keras.Model): - - def __init__(self): - super().__init__(name='subclass') - self.dense = Dense(10, input_shape=(100,)) - self.activation = Activation('relu', name='my_relu') - - def call(self, inputs): - x = self.dense(inputs) - return self.activation(x) - - # Intentionally erroring out at json serialization to test the warning. - def get_config(self): - raise NotImplementedError - model = SimpleSubclass() - with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: - self.assertFalse( - keras.callbacks.keras_model_summary( - name='my_name', data=model, step=1)) - self.assertRegex( - str(mock_log.call_args), 'Model failed to serialize as JSON.') - - @test_utils.run_v2_only - def testKerasModel_otherExceptions(self): - model = keras.Sequential() - - with tf.compat.v1.test.mock.patch.object(model, 'to_json') as mock_to_json: - with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: - mock_to_json.side_effect = Exception('oops') - self.assertFalse( - keras.callbacks.keras_model_summary( - name='my_name', data=model, step=1)) - self.assertRegex( - str(mock_log.call_args), - 'Model failed to serialize as JSON. Ignoring') +class SummaryOpsTest(tf.test.TestCase): + def tearDown(self): + super(SummaryOpsTest, self).tearDown() + tf.summary.trace_off() + + def keras_model(self, *args, **kwargs): + logdir = self.get_temp_dir() + writer = tf.summary.create_file_writer(logdir) + with writer.as_default(): + keras.callbacks.keras_model_summary(*args, **kwargs) + writer.close() + events = events_from_logdir(logdir) + # The first event contains no summary values. The written content goes + # to the second event. + return events[1] + + @test_utils.run_v2_only + def testKerasModel(self): + model = keras.Sequential( + [Dense(10, input_shape=(100,)), Activation("relu", name="my_relu")] + ) + event = self.keras_model(name="my_name", data=model, step=1) + first_val = event.summary.value[0] + self.assertEqual( + model.to_json(), first_val.tensor.string_val[0].decode() + ) + + @test_utils.run_v2_only + def testKerasModel_usesDefaultStep(self): + model = keras.Sequential( + [Dense(10, input_shape=(100,)), Activation("relu", name="my_relu")] + ) + try: + tf.summary.experimental.set_step(42) + event = self.keras_model(name="my_name", data=model) + self.assertEqual(42, event.step) + finally: + # Reset to default state for other tests. + tf.summary.experimental.set_step(None) + + @test_utils.run_v2_only + def testKerasModel_subclass(self): + class SimpleSubclass(keras.Model): + def __init__(self): + super().__init__(name="subclass") + self.dense = Dense(10, input_shape=(100,)) + self.activation = Activation("relu", name="my_relu") + + def call(self, inputs): + x = self.dense(inputs) + return self.activation(x) + + # Intentionally erroring out at json serialization to test the + # warning. + def get_config(self): + raise NotImplementedError + + model = SimpleSubclass() + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + self.assertFalse( + keras.callbacks.keras_model_summary( + name="my_name", data=model, step=1 + ) + ) + self.assertRegex( + str(mock_log.call_args), "Model failed to serialize as JSON." + ) + + @test_utils.run_v2_only + def testKerasModel_otherExceptions(self): + model = keras.Sequential() + + with tf.compat.v1.test.mock.patch.object( + model, "to_json" + ) as mock_to_json: + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + mock_to_json.side_effect = Exception("oops") + self.assertFalse( + keras.callbacks.keras_model_summary( + name="my_name", data=model, step=1 + ) + ) + self.assertRegex( + str(mock_log.call_args), + "Model failed to serialize as JSON. Ignoring", + ) def events_from_file(filepath): - """Returns all events in a single event file. + """Returns all events in a single event file. - Args: - filepath: Path to the event file. + Args: + filepath: Path to the event file. - Returns: - A list of all tf.Event protos in the event file. - """ - result = [] - raw_dataset = tf.data.TFRecordDataset([filepath]) - for raw_record in raw_dataset.take(10): - event = tf.compat.v1.Event() - event.ParseFromString(raw_record.numpy()) - result.append(event) - return result + Returns: + A list of all tf.Event protos in the event file. + """ + result = [] + raw_dataset = tf.data.TFRecordDataset([filepath]) + for raw_record in raw_dataset.take(10): + event = tf.compat.v1.Event() + event.ParseFromString(raw_record.numpy()) + result.append(event) + return result def events_from_logdir(logdir): - """Returns all events in the single eventfile in logdir. + """Returns all events in the single eventfile in logdir. - Args: - logdir: The directory in which the single event file is sought. + Args: + logdir: The directory in which the single event file is sought. - Returns: - A list of all tf.Event protos from the single event file. + Returns: + A list of all tf.Event protos from the single event file. - Raises: - AssertionError: If logdir does not contain exactly one file. - """ - assert tf.compat.v1.gfile.Exists(logdir) - files = tf.compat.v1.gfile.ListDirectory(logdir) - assert len(files) == 1, 'Found not exactly one file in logdir: %s' % files - return events_from_file(os.path.join(logdir, files[0])) + Raises: + AssertionError: If logdir does not contain exactly one file. + """ + assert tf.compat.v1.gfile.Exists(logdir) + files = tf.compat.v1.gfile.ListDirectory(logdir) + assert len(files) == 1, f"Found not exactly one file in logdir: {files}" + return events_from_file(os.path.join(logdir, files[0])) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/callbacks_v1.py b/keras/callbacks_v1.py index e09297fcd3ff..013b7bcadef9 100644 --- a/keras/callbacks_v1.py +++ b/keras/callbacks_v1.py @@ -12,463 +12,517 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-import-not-at-top -# pylint: disable=g-classes-have-attributes -"""Callbacks: utilities called at certain points during model training.""" -import tensorflow.compat.v2 as tf + +"""Callbacks: utilities called at certain points during model training.""" import os + import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import callbacks + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export(v1=['keras.callbacks.TensorBoard']) +@keras_export(v1=["keras.callbacks.TensorBoard"]) class TensorBoard(callbacks.TensorBoard): - # pylint: disable=line-too-long - """Enable visualizations for TensorBoard. - - TensorBoard is a visualization tool provided with TensorFlow. - - This callback logs events for TensorBoard, including: - * Metrics summary plots - * Training graph visualization - * Activation histograms - * Sampled profiling - - If you have installed TensorFlow with pip, you should be able - to launch TensorBoard from the command line: - - ```sh - tensorboard --logdir=path_to_your_logs - ``` - - You can find more information about TensorBoard - [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard). - - Args: - log_dir: the path of the directory where to save the log files to be - parsed by TensorBoard. - histogram_freq: frequency (in epochs) at which to compute activation and - weight histograms for the layers of the model. If set to 0, histograms - won't be computed. Validation data (or split) must be specified for - histogram visualizations. - write_graph: whether to visualize the graph in TensorBoard. The log file - can become quite large when write_graph is set to True. - write_grads: whether to visualize gradient histograms in TensorBoard. - `histogram_freq` must be greater than 0. - batch_size: size of batch of inputs to feed to the network for histograms - computation. - write_images: whether to write model weights to visualize as image in - TensorBoard. - embeddings_freq: frequency (in epochs) at which selected embedding layers - will be saved. If set to 0, embeddings won't be computed. Data to be - visualized in TensorBoard's Embedding tab must be passed as - `embeddings_data`. - embeddings_layer_names: a list of names of layers to keep eye on. If None - or empty list all the embedding layer will be watched. - embeddings_metadata: a dictionary which maps layer name to a file name in - which metadata for this embedding layer is saved. - [Here are details]( - https://www.tensorflow.org/how_tos/embedding_viz/#metadata_optional) - about metadata files format. In case if the same metadata file is - used for all embedding layers, string can be passed. - embeddings_data: data to be embedded at layers specified in - `embeddings_layer_names`. Numpy array (if the model has a single input) - or list of Numpy arrays (if the model has multiple inputs). Learn more - about embeddings [in this guide]( - https://www.tensorflow.org/programmers_guide/embedding). - update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`, - writes the losses and metrics to TensorBoard after each batch. The same - applies for `'epoch'`. If using an integer, let's say `1000`, the - callback will write the metrics and losses to TensorBoard every 1000 - samples. Note that writing too frequently to TensorBoard can slow down - your training. - profile_batch: Profile the batch to sample compute characteristics. By - default, it will profile the second batch. Set profile_batch=0 to - disable profiling. - - Raises: - ValueError: If histogram_freq is set and no validation data is provided. - - @compatibility(eager) - Using the `TensorBoard` callback will work when eager execution is enabled, - with the restriction that outputting histogram summaries of weights and - gradients is not supported. Consequently, `histogram_freq` will be ignored. - @end_compatibility - """ - - # pylint: enable=line-too-long - - def __init__(self, - log_dir='./logs', - histogram_freq=0, - batch_size=32, - write_graph=True, - write_grads=False, - write_images=False, - embeddings_freq=0, - embeddings_layer_names=None, - embeddings_metadata=None, - embeddings_data=None, - update_freq='epoch', - profile_batch=2): - # Don't call super's init since it is an eager-only version. - callbacks.Callback.__init__(self) - self.log_dir = log_dir - self.histogram_freq = histogram_freq - if self.histogram_freq and tf.executing_eagerly(): - logging.warning( - UserWarning('Weight and gradient histograms not supported for eager' - 'execution, setting `histogram_freq` to `0`.')) - self.histogram_freq = 0 - self.merged = None - self.write_graph = write_graph - self.write_grads = write_grads - self.write_images = write_images - self.batch_size = batch_size - self._current_batch = 0 - self._total_batches_seen = 0 - self._total_val_batches_seen = 0 - self.embeddings_freq = embeddings_freq - self.embeddings_layer_names = embeddings_layer_names - self.embeddings_metadata = embeddings_metadata - self.embeddings_data = embeddings_data - if update_freq == 'batch': - self.update_freq = 1 - else: - self.update_freq = update_freq - self._samples_seen = 0 - self._samples_seen_at_last_write = 0 - # TODO(fishx): Add a link to the full profiler tutorial. - self._profile_batch = profile_batch - # True when the profiler was successfully started by this callback. - # We track the status here to make sure callbacks do not interfere with - # each other. The callback will only stop the profiler it started. - self._profiler_started = False - - # TensorBoard should only write summaries on the chief when in a - # Multi-Worker setting. - self._chief_worker_only = True - - def _init_writer(self, model): - """Sets file writer.""" - if tf.executing_eagerly(): - self.writer = tf.summary.create_file_writer(self.log_dir) - if not model.run_eagerly and self.write_graph: - with self.writer.as_default(): - tf.summary.graph(backend.get_graph()) - elif self.write_graph: - self.writer = tf.compat.v1.summary.FileWriter( - self.log_dir, backend.get_graph()) - else: - self.writer = tf.compat.v1.summary.FileWriter(self.log_dir) - - def _make_histogram_ops(self, model): - """Defines histogram ops when histogram_freq > 0.""" - # only make histogram summary op if it hasn't already been made - if self.histogram_freq and self.merged is None: - for layer in self.model.layers: - for weight in layer.weights: - mapped_weight_name = weight.name.replace(':', '_') - tf.compat.v1.summary.histogram(mapped_weight_name, weight) - if self.write_images: - w_img = tf.compat.v1.squeeze(weight) - shape = tuple(w_img.shape) - if len(shape) == 2: # dense layer kernel case - if shape[0] > shape[1]: - w_img = tf.compat.v1.transpose(w_img) - shape = tuple(w_img.shape) - w_img = tf.reshape(w_img, [1, shape[0], shape[1], 1]) - elif len(shape) == 3: # convnet case - if backend.image_data_format() == 'channels_last': - # switch to channels_first to display - # every kernel as a separate image - w_img = tf.compat.v1.transpose(w_img, perm=[2, 0, 1]) - shape = tuple(w_img.shape) - w_img = tf.reshape(w_img, [shape[0], shape[1], shape[2], 1]) - elif len(shape) == 1: # bias case - w_img = tf.reshape(w_img, [1, shape[0], 1, 1]) - else: - # not possible to handle 3D convnets etc. - continue - - shape = tuple(w_img.shape) - assert len(shape) == 4 and shape[-1] in [1, 3, 4] - tf.compat.v1.summary.image(mapped_weight_name, w_img) - - if self.write_grads: - for weight in layer.trainable_weights: - mapped_weight_name = weight.name.replace(':', '_') - grads = model.optimizer.get_gradients(model.total_loss, weight) - - def is_indexed_slices(grad): - return type(grad).__name__ == 'IndexedSlices' - - grads = [ - grad.values if is_indexed_slices(grad) else grad - for grad in grads - ] - tf.compat.v1.summary.histogram('{}_grad'.format(mapped_weight_name), grads) - - if hasattr(layer, 'output'): - if isinstance(layer.output, list): - for i, output in enumerate(layer.output): - tf.compat.v1.summary.histogram('{}_out_{}'.format(layer.name, i), output) - else: - tf.compat.v1.summary.histogram('{}_out'.format(layer.name), layer.output) - - def set_model(self, model): - """Sets Keras model and creates summary ops.""" - - self.model = model - self._init_writer(model) - # histogram summaries only enabled in graph mode - if not tf.executing_eagerly(): - self._make_histogram_ops(model) - self.merged = tf.compat.v1.summary.merge_all() - - # If both embedding_freq and embeddings_data are available, we will - # visualize embeddings. - if self.embeddings_freq and self.embeddings_data is not None: - # Avoid circular dependency. - from keras.engine import training_utils_v1 # pylint: disable=g-import-not-at-top - self.embeddings_data = training_utils_v1.standardize_input_data( - self.embeddings_data, model.input_names) - - # If embedding_layer_names are not provided, get all of the embedding - # layers from the model. - embeddings_layer_names = self.embeddings_layer_names - if not embeddings_layer_names: - embeddings_layer_names = [ - layer.name - for layer in self.model.layers - if type(layer).__name__ == 'Embedding' - ] - - self.assign_embeddings = [] - embeddings_vars = {} - - self.batch_id = batch_id = tf.compat.v1.placeholder(tf.int32) - self.step = step = tf.compat.v1.placeholder(tf.int32) - - for layer in self.model.layers: - if layer.name in embeddings_layer_names: - embedding_input = self.model.get_layer(layer.name).output - embedding_size = np.prod(embedding_input.shape[1:]) - embedding_input = tf.reshape(embedding_input, - (step, int(embedding_size))) - shape = (self.embeddings_data[0].shape[0], int(embedding_size)) - embedding = tf.Variable( - tf.zeros(shape), name=layer.name + '_embedding') - embeddings_vars[layer.name] = embedding - batch = tf.compat.v1.assign(embedding[batch_id:batch_id + step], - embedding_input) - self.assign_embeddings.append(batch) - - self.saver = tf.compat.v1.train.Saver(list(embeddings_vars.values())) - - # Create embeddings_metadata dictionary - if isinstance(self.embeddings_metadata, str): - embeddings_metadata = { - layer_name: self.embeddings_metadata - for layer_name in embeddings_vars.keys() - } - else: - # If embedding_metadata is already a dictionary - embeddings_metadata = self.embeddings_metadata - try: - from tensorboard.plugins import projector - except ImportError: - raise ImportError('Failed to import TensorBoard. Please make sure that ' - 'TensorBoard integration is complete."') + """Enable visualizations for TensorBoard. - # TODO(psv): Add integration tests to test embedding visualization - # with TensorBoard callback. We are unable to write a unit test for this - # because TensorBoard dependency assumes TensorFlow package is installed. - config = projector.ProjectorConfig() - for layer_name, tensor in embeddings_vars.items(): - embedding = config.embeddings.add() - embedding.tensor_name = tensor.name + TensorBoard is a visualization tool provided with TensorFlow. - if (embeddings_metadata is not None and - layer_name in embeddings_metadata): - embedding.metadata_path = embeddings_metadata[layer_name] + This callback logs events for TensorBoard, including: + * Metrics summary plots + * Training graph visualization + * Activation histograms + * Sampled profiling - projector.visualize_embeddings(self.writer, config) + If you have installed TensorFlow with pip, you should be able + to launch TensorBoard from the command line: - def _fetch_callback(self, summary): - self.writer.add_summary(summary, self._total_val_batches_seen) - self._total_val_batches_seen += 1 + ```sh + tensorboard --logdir=path_to_your_logs + ``` - def _write_custom_summaries(self, step, logs=None): - """Writes metrics out as custom scalar summaries. + You can find more information about TensorBoard + [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard). Args: - step: the global step to use for TensorBoard. - logs: dict. Keys are scalar summary names, values are - NumPy scalars. - - """ - logs = logs or {} - if tf.executing_eagerly(): - # use v2 summary ops - with self.writer.as_default(), tf.summary.record_if(True): - for name, value in logs.items(): - if isinstance(value, np.ndarray): - value = value.item() - tf.summary.scalar(name, value, step=step) - else: - # use FileWriter from v1 summary - for name, value in logs.items(): - if isinstance(value, np.ndarray): - value = value.item() - summary = tf.compat.v1.Summary() - summary_value = summary.value.add() - summary_value.simple_value = value - summary_value.tag = name - self.writer.add_summary(summary, step) - self.writer.flush() - - def on_train_batch_begin(self, batch, logs=None): - if self._total_batches_seen == self._profile_batch - 1: - self._start_profiler() - - def on_train_batch_end(self, batch, logs=None): - return self.on_batch_end(batch, logs) - - def on_test_begin(self, logs=None): - pass - - def on_test_end(self, logs=None): - pass - - def on_batch_end(self, batch, logs=None): - """Writes scalar summaries for metrics on every training batch. - - Performs profiling if current batch is in profiler_batches. + log_dir: the path of the directory where to save the log files to be + parsed by TensorBoard. + histogram_freq: frequency (in epochs) at which to compute activation and + weight histograms for the layers of the model. If set to 0, histograms + won't be computed. Validation data (or split) must be specified for + histogram visualizations. + write_graph: whether to visualize the graph in TensorBoard. The log file + can become quite large when write_graph is set to True. + write_grads: whether to visualize gradient histograms in TensorBoard. + `histogram_freq` must be greater than 0. + batch_size: size of batch of inputs to feed to the network for + histograms computation. + write_images: whether to write model weights to visualize as image in + TensorBoard. + embeddings_freq: frequency (in epochs) at which selected embedding + layers will be saved. If set to 0, embeddings won't be computed. Data + to be visualized in TensorBoard's Embedding tab must be passed as + `embeddings_data`. + embeddings_layer_names: a list of names of layers to keep eye on. If + None or empty list all the embedding layer will be watched. + embeddings_metadata: a dictionary which maps layer name to a file name + in which metadata for this embedding layer is saved. + [Here are details]( + https://www.tensorflow.org/how_tos/embedding_viz/#metadata_optional) + about metadata files format. In case if the same metadata file is + used for all embedding layers, string can be passed. + embeddings_data: data to be embedded at layers specified in + `embeddings_layer_names`. Numpy array (if the model has a single + input) or list of Numpy arrays (if the model has multiple inputs). + Learn more about embeddings [in this guide]( + https://www.tensorflow.org/programmers_guide/embedding). + update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`, + writes the losses and metrics to TensorBoard after each batch. The + same applies for `'epoch'`. If using an integer, let's say `1000`, the + callback will write the metrics and losses to TensorBoard every 1000 + samples. Note that writing too frequently to TensorBoard can slow down + your training. + profile_batch: Profile the batch to sample compute characteristics. By + default, it will profile the second batch. Set profile_batch=0 to + disable profiling. + + Raises: + ValueError: If histogram_freq is set and no validation data is provided. + + @compatibility(eager) + Using the `TensorBoard` callback will work when eager execution is enabled, + with the restriction that outputting histogram summaries of weights and + gradients is not supported. Consequently, `histogram_freq` will be ignored. + @end_compatibility """ - # Don't output batch_size and batch number as TensorBoard summaries - logs = logs or {} - self._samples_seen += logs.get('size', 1) - samples_seen_since = self._samples_seen - self._samples_seen_at_last_write - if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq: - batch_logs = {('batch_' + k): v - for k, v in logs.items() - if k not in ['batch', 'size', 'num_steps']} - self._write_custom_summaries(self._total_batches_seen, batch_logs) - self._samples_seen_at_last_write = self._samples_seen - self._total_batches_seen += 1 - self._stop_profiler() - - def on_train_begin(self, logs=None): - pass - - def on_epoch_begin(self, epoch, logs=None): - """Add histogram op to Model eval_function callbacks, reset batch count.""" - - # check if histogram summary should be run for this epoch - if self.histogram_freq and epoch % self.histogram_freq == 0: - # pylint: disable=protected-access - # add the histogram summary op if it should run this epoch - self.model._make_test_function() - if self.merged not in self.model.test_function.fetches: - self.model.test_function.fetches.append(self.merged) - self.model.test_function.fetch_callbacks[ - self.merged] = self._fetch_callback - # pylint: enable=protected-access - - def on_epoch_end(self, epoch, logs=None): - """Checks if summary ops should run next epoch, logs scalar summaries.""" - - # don't output batch_size and - # batch number as TensorBoard summaries - logs = {('epoch_' + k): v - for k, v in logs.items() - if k not in ['batch', 'size', 'num_steps']} - if self.update_freq == 'epoch': - step = epoch - else: - step = self._samples_seen - self._write_custom_summaries(step, logs) - - # pop the histogram summary op after each epoch - if self.histogram_freq: - # pylint: disable=protected-access - if self.merged in self.model.test_function.fetches: - self.model.test_function.fetches.remove(self.merged) - if self.merged in self.model.test_function.fetch_callbacks: - self.model.test_function.fetch_callbacks.pop(self.merged) - # pylint: enable=protected-access - - if self.embeddings_data is None and self.embeddings_freq: - raise ValueError('To visualize embeddings, embeddings_data must ' - 'be provided.') - - if self.embeddings_freq and self.embeddings_data is not None: - if epoch % self.embeddings_freq == 0: - # We need a second forward-pass here because we're passing - # the `embeddings_data` explicitly. This design allows to pass - # arbitrary data as `embeddings_data` and results from the fact - # that we need to know the size of the `tf.Variable`s which - # hold the embeddings in `set_model`. At this point, however, - # the `validation_data` is not yet set. - - embeddings_data = self.embeddings_data - n_samples = embeddings_data[0].shape[0] - i = 0 - sess = backend.get_session() - while i < n_samples: - step = min(self.batch_size, n_samples - i) - batch = slice(i, i + step) - - if isinstance(self.model.input, list): - feed_dict = { - model_input: embeddings_data[idx][batch] - for idx, model_input in enumerate(self.model.input) + + def __init__( + self, + log_dir="./logs", + histogram_freq=0, + batch_size=32, + write_graph=True, + write_grads=False, + write_images=False, + embeddings_freq=0, + embeddings_layer_names=None, + embeddings_metadata=None, + embeddings_data=None, + update_freq="epoch", + profile_batch=2, + ): + # Don't call super's init since it is an eager-only version. + callbacks.Callback.__init__(self) + self.log_dir = log_dir + self.histogram_freq = histogram_freq + if self.histogram_freq and tf.executing_eagerly(): + logging.warning( + UserWarning( + "Weight and gradient histograms not supported for eager" + "execution, setting `histogram_freq` to `0`." + ) + ) + self.histogram_freq = 0 + self.merged = None + self.write_graph = write_graph + self.write_grads = write_grads + self.write_images = write_images + self.batch_size = batch_size + self._current_batch = 0 + self._total_batches_seen = 0 + self._total_val_batches_seen = 0 + self.embeddings_freq = embeddings_freq + self.embeddings_layer_names = embeddings_layer_names + self.embeddings_metadata = embeddings_metadata + self.embeddings_data = embeddings_data + if update_freq == "batch": + self.update_freq = 1 + else: + self.update_freq = update_freq + self._samples_seen = 0 + self._samples_seen_at_last_write = 0 + # TODO(fishx): Add a link to the full profiler tutorial. + self._profile_batch = profile_batch + # True when the profiler was successfully started by this callback. + # We track the status here to make sure callbacks do not interfere with + # each other. The callback will only stop the profiler it started. + self._profiler_started = False + + # TensorBoard should only write summaries on the chief when in a + # Multi-Worker setting. + self._chief_worker_only = True + + def _init_writer(self, model): + """Sets file writer.""" + if tf.executing_eagerly(): + self.writer = tf.summary.create_file_writer(self.log_dir) + if not model.run_eagerly and self.write_graph: + with self.writer.as_default(): + tf.summary.graph(backend.get_graph()) + elif self.write_graph: + self.writer = tf.compat.v1.summary.FileWriter( + self.log_dir, backend.get_graph() + ) + else: + self.writer = tf.compat.v1.summary.FileWriter(self.log_dir) + + def _make_histogram_ops(self, model): + """Defines histogram ops when histogram_freq > 0.""" + # only make histogram summary op if it hasn't already been made + if self.histogram_freq and self.merged is None: + for layer in self.model.layers: + for weight in layer.weights: + mapped_weight_name = weight.name.replace(":", "_") + tf.compat.v1.summary.histogram(mapped_weight_name, weight) + if self.write_images: + w_img = tf.compat.v1.squeeze(weight) + shape = tuple(w_img.shape) + if len(shape) == 2: # dense layer kernel case + if shape[0] > shape[1]: + w_img = tf.compat.v1.transpose(w_img) + shape = tuple(w_img.shape) + w_img = tf.reshape( + w_img, [1, shape[0], shape[1], 1] + ) + elif len(shape) == 3: # convnet case + if backend.image_data_format() == "channels_last": + # switch to channels_first to display + # every kernel as a separate image + w_img = tf.compat.v1.transpose( + w_img, perm=[2, 0, 1] + ) + shape = tuple(w_img.shape) + w_img = tf.reshape( + w_img, [shape[0], shape[1], shape[2], 1] + ) + elif len(shape) == 1: # bias case + w_img = tf.reshape(w_img, [1, shape[0], 1, 1]) + else: + # not possible to handle 3D convnets etc. + continue + + shape = tuple(w_img.shape) + assert len(shape) == 4 and shape[-1] in [1, 3, 4] + tf.compat.v1.summary.image(mapped_weight_name, w_img) + + if self.write_grads: + for weight in layer.trainable_weights: + mapped_weight_name = weight.name.replace(":", "_") + grads = model.optimizer.get_gradients( + model.total_loss, weight + ) + + def is_indexed_slices(grad): + return type(grad).__name__ == "IndexedSlices" + + grads = [ + grad.values if is_indexed_slices(grad) else grad + for grad in grads + ] + tf.compat.v1.summary.histogram( + f"{mapped_weight_name}_grad", grads + ) + + if hasattr(layer, "output"): + if isinstance(layer.output, list): + for i, output in enumerate(layer.output): + tf.compat.v1.summary.histogram( + f"{layer.name}_out_{i}", output + ) + else: + tf.compat.v1.summary.histogram( + f"{layer.name}_out", layer.output + ) + + def set_model(self, model): + """Sets Keras model and creates summary ops.""" + + self.model = model + self._init_writer(model) + # histogram summaries only enabled in graph mode + if not tf.executing_eagerly(): + self._make_histogram_ops(model) + self.merged = tf.compat.v1.summary.merge_all() + + # If both embedding_freq and embeddings_data are available, we will + # visualize embeddings. + if self.embeddings_freq and self.embeddings_data is not None: + # Avoid circular dependency. + from keras.engine import ( + training_utils_v1, + ) + + self.embeddings_data = training_utils_v1.standardize_input_data( + self.embeddings_data, model.input_names + ) + + # If embedding_layer_names are not provided, get all of the + # embedding layers from the model. + embeddings_layer_names = self.embeddings_layer_names + if not embeddings_layer_names: + embeddings_layer_names = [ + layer.name + for layer in self.model.layers + if type(layer).__name__ == "Embedding" + ] + + self.assign_embeddings = [] + embeddings_vars = {} + + self.batch_id = batch_id = tf.compat.v1.placeholder(tf.int32) + self.step = step = tf.compat.v1.placeholder(tf.int32) + + for layer in self.model.layers: + if layer.name in embeddings_layer_names: + embedding_input = self.model.get_layer(layer.name).output + embedding_size = np.prod(embedding_input.shape[1:]) + embedding_input = tf.reshape( + embedding_input, (step, int(embedding_size)) + ) + shape = ( + self.embeddings_data[0].shape[0], + int(embedding_size), + ) + embedding = tf.Variable( + tf.zeros(shape), name=layer.name + "_embedding" + ) + embeddings_vars[layer.name] = embedding + batch = tf.compat.v1.assign( + embedding[batch_id : batch_id + step], embedding_input + ) + self.assign_embeddings.append(batch) + + self.saver = tf.compat.v1.train.Saver( + list(embeddings_vars.values()) + ) + + # Create embeddings_metadata dictionary + if isinstance(self.embeddings_metadata, str): + embeddings_metadata = { + layer_name: self.embeddings_metadata + for layer_name in embeddings_vars.keys() + } + else: + # If embedding_metadata is already a dictionary + embeddings_metadata = self.embeddings_metadata + + try: + # isort: off + from tensorboard.plugins import projector + except ImportError: + raise ImportError( + "Failed to import TensorBoard. Please make sure that " + 'TensorBoard integration is complete."' + ) + + # TODO(psv): Add integration tests to test embedding visualization + # with TensorBoard callback. We are unable to write a unit test for + # this because TensorBoard dependency assumes TensorFlow package is + # installed. + config = projector.ProjectorConfig() + for layer_name, tensor in embeddings_vars.items(): + embedding = config.embeddings.add() + embedding.tensor_name = tensor.name + + if ( + embeddings_metadata is not None + and layer_name in embeddings_metadata + ): + embedding.metadata_path = embeddings_metadata[layer_name] + + projector.visualize_embeddings(self.writer, config) + + def _fetch_callback(self, summary): + self.writer.add_summary(summary, self._total_val_batches_seen) + self._total_val_batches_seen += 1 + + def _write_custom_summaries(self, step, logs=None): + """Writes metrics out as custom scalar summaries. + + Args: + step: the global step to use for TensorBoard. + logs: dict. Keys are scalar summary names, values are + NumPy scalars. + + """ + logs = logs or {} + if tf.executing_eagerly(): + # use v2 summary ops + with self.writer.as_default(), tf.summary.record_if(True): + for name, value in logs.items(): + if isinstance(value, np.ndarray): + value = value.item() + tf.summary.scalar(name, value, step=step) + else: + # use FileWriter from v1 summary + for name, value in logs.items(): + if isinstance(value, np.ndarray): + value = value.item() + summary = tf.compat.v1.Summary() + summary_value = summary.value.add() + summary_value.simple_value = value + summary_value.tag = name + self.writer.add_summary(summary, step) + self.writer.flush() + + def on_train_batch_begin(self, batch, logs=None): + if self._total_batches_seen == self._profile_batch - 1: + self._start_profiler() + + def on_train_batch_end(self, batch, logs=None): + return self.on_batch_end(batch, logs) + + def on_test_begin(self, logs=None): + pass + + def on_test_end(self, logs=None): + pass + + def on_batch_end(self, batch, logs=None): + """Writes scalar summaries for metrics on every training batch. + + Performs profiling if current batch is in profiler_batches. + """ + # Don't output batch_size and batch number as TensorBoard summaries + logs = logs or {} + self._samples_seen += logs.get("size", 1) + samples_seen_since = ( + self._samples_seen - self._samples_seen_at_last_write + ) + if ( + self.update_freq != "epoch" + and samples_seen_since >= self.update_freq + ): + batch_logs = { + ("batch_" + k): v + for k, v in logs.items() + if k not in ["batch", "size", "num_steps"] } - else: - feed_dict = {self.model.input: embeddings_data[0][batch]} - - feed_dict.update({self.batch_id: i, self.step: step}) - - if not isinstance(backend.learning_phase(), int): - feed_dict[backend.learning_phase()] = False - - sess.run(self.assign_embeddings, feed_dict=feed_dict) - self.saver.save(sess, - os.path.join(self.log_dir, 'keras_embedding.ckpt'), - epoch) - - i += self.batch_size - - def on_train_end(self, logs=None): - self._stop_profiler() - self.writer.close() - - def _start_profiler(self): - """Starts the profiler if currently inactive.""" - if self._profiler_started: - return - try: - tf.profiler.experimental.start(logdir=self.log_dir) - self._profiler_started = True - except tf.errors.AlreadyExistsError as e: - # Profiler errors should not be fatal. - logging.error('Failed to start profiler: %s', e.message) - - def _stop_profiler(self): - """Stops the profiler if currently active.""" - if not self._profiler_started: - return - try: - tf.profiler.experimental.stop() - except tf.errors.UnavailableError as e: - # Profiler errors should not be fatal. - logging.error('Failed to stop profiler: %s', e.message) - finally: - self._profiler_started = False + self._write_custom_summaries(self._total_batches_seen, batch_logs) + self._samples_seen_at_last_write = self._samples_seen + self._total_batches_seen += 1 + self._stop_profiler() + + def on_train_begin(self, logs=None): + pass + + def on_epoch_begin(self, epoch, logs=None): + """Add histogram op to Model eval_function callbacks, reset batch + count.""" + + # check if histogram summary should be run for this epoch + if self.histogram_freq and epoch % self.histogram_freq == 0: + + # add the histogram summary op if it should run this epoch + self.model._make_test_function() + if self.merged not in self.model.test_function.fetches: + self.model.test_function.fetches.append(self.merged) + self.model.test_function.fetch_callbacks[ + self.merged + ] = self._fetch_callback + + def on_epoch_end(self, epoch, logs=None): + """Checks if summary ops should run next epoch, logs scalar + summaries.""" + + # don't output batch_size and + # batch number as TensorBoard summaries + logs = { + ("epoch_" + k): v + for k, v in logs.items() + if k not in ["batch", "size", "num_steps"] + } + if self.update_freq == "epoch": + step = epoch + else: + step = self._samples_seen + self._write_custom_summaries(step, logs) + + # pop the histogram summary op after each epoch + if self.histogram_freq: + + if self.merged in self.model.test_function.fetches: + self.model.test_function.fetches.remove(self.merged) + if self.merged in self.model.test_function.fetch_callbacks: + self.model.test_function.fetch_callbacks.pop(self.merged) + + if self.embeddings_data is None and self.embeddings_freq: + raise ValueError( + "To visualize embeddings, embeddings_data must be provided." + ) + + if self.embeddings_freq and self.embeddings_data is not None: + if epoch % self.embeddings_freq == 0: + # We need a second forward-pass here because we're passing + # the `embeddings_data` explicitly. This design allows to pass + # arbitrary data as `embeddings_data` and results from the fact + # that we need to know the size of the `tf.Variable`s which + # hold the embeddings in `set_model`. At this point, however, + # the `validation_data` is not yet set. + + embeddings_data = self.embeddings_data + n_samples = embeddings_data[0].shape[0] + i = 0 + sess = backend.get_session() + while i < n_samples: + step = min(self.batch_size, n_samples - i) + batch = slice(i, i + step) + + if isinstance(self.model.input, list): + feed_dict = { + model_input: embeddings_data[idx][batch] + for idx, model_input in enumerate(self.model.input) + } + else: + feed_dict = { + self.model.input: embeddings_data[0][batch] + } + + feed_dict.update({self.batch_id: i, self.step: step}) + + if not isinstance(backend.learning_phase(), int): + feed_dict[backend.learning_phase()] = False + + sess.run(self.assign_embeddings, feed_dict=feed_dict) + self.saver.save( + sess, + os.path.join(self.log_dir, "keras_embedding.ckpt"), + epoch, + ) + + i += self.batch_size + + def on_train_end(self, logs=None): + self._stop_profiler() + self.writer.close() + + def _start_profiler(self): + """Starts the profiler if currently inactive.""" + if self._profiler_started: + return + try: + tf.profiler.experimental.start(logdir=self.log_dir) + self._profiler_started = True + except tf.errors.AlreadyExistsError as e: + # Profiler errors should not be fatal. + logging.error("Failed to start profiler: %s", e.message) + + def _stop_profiler(self): + """Stops the profiler if currently active.""" + if not self._profiler_started: + return + try: + tf.profiler.experimental.stop() + except tf.errors.UnavailableError as e: + # Profiler errors should not be fatal. + logging.error("Failed to stop profiler: %s", e.message) + finally: + self._profiler_started = False diff --git a/keras/callbacks_v1_test.py b/keras/callbacks_v1_test.py index da0202e35881..b46c6e9f185e 100644 --- a/keras/callbacks_v1_test.py +++ b/keras/callbacks_v1_test.py @@ -14,25 +14,24 @@ # ============================================================================== """Tests for Keras callbacks.""" -import tensorflow.compat.v2 as tf - import os import shutil import tempfile -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import callbacks from keras import callbacks_v1 -from keras.testing_infra import test_combinations from keras import layers -from keras.testing_infra import test_utils from keras.engine import input_layer from keras.engine import sequential from keras.engine import training +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils from keras.utils import np_utils - TRAIN_SAMPLES = 10 TEST_SAMPLES = 10 NUM_CLASSES = 2 @@ -42,523 +41,581 @@ class TestTensorBoardV1(tf.test.TestCase, parameterized.TestCase): - - def test_TensorBoard(self): - np.random.seed(1337) - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - def data_generator(train): - if train: - max_batch_index = len(x_train) // BATCH_SIZE - else: - max_batch_index = len(x_test) // BATCH_SIZE - i = 0 - while 1: - if train: - yield (x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE], - y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]) - else: - yield (x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE], - y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]) - i += 1 - i %= max_batch_index - - # case: Sequential - with tf.Graph().as_default(), self.cached_session(): - model = sequential.Sequential() - model.add( - layers.Dense( - NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) - # non_trainable_weights: moving_variance, moving_mean - model.add(layers.BatchNormalization()) - model.add(layers.Dense(NUM_CLASSES, activation='softmax')) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - tsb = callbacks_v1.TensorBoard( - log_dir=temp_dir, - histogram_freq=1, - write_images=True, - write_grads=True, - batch_size=5) - cbks = [tsb] - - # fit with validation data - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=3, - verbose=0) - - # fit with validation data and accuracy - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - - # fit generator with validation data - model.fit_generator( - data_generator(True), - len(x_train), - epochs=2, - validation_data=(x_test, y_test), - callbacks=cbks, - verbose=0) - - # fit generator without validation data - # histogram_freq must be zero - tsb.histogram_freq = 0 - model.fit_generator( - data_generator(True), - len(x_train), - epochs=2, - callbacks=cbks, - verbose=0) - - # fit generator with validation data and accuracy - tsb.histogram_freq = 1 - model.fit_generator( - data_generator(True), - len(x_train), - epochs=2, - validation_data=(x_test, y_test), - callbacks=cbks, - verbose=0) - - # fit generator without validation data and accuracy - tsb.histogram_freq = 0 - model.fit_generator( - data_generator(True), len(x_train), epochs=2, callbacks=cbks) - assert os.path.exists(temp_dir) - - def test_TensorBoard_multi_input_output(self): - np.random.seed(1337) - tmpdir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) - - with tf.Graph().as_default(), self.cached_session(): - filepath = os.path.join(tmpdir, 'logs') - - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - def data_generator(train): - if train: - max_batch_index = len(x_train) // BATCH_SIZE - else: - max_batch_index = len(x_test) // BATCH_SIZE - i = 0 - while 1: - if train: - # simulate multi-input/output models - yield ([x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2, - [y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2) - else: - yield ([x_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2, - [y_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2) - i += 1 - i %= max_batch_index - - inp1 = input_layer.Input((INPUT_DIM,)) - inp2 = input_layer.Input((INPUT_DIM,)) - inp = layers.add([inp1, inp2]) - hidden = layers.Dense(2, activation='relu')(inp) - hidden = layers.Dropout(0.1)(hidden) - output1 = layers.Dense(NUM_CLASSES, activation='softmax')(hidden) - output2 = layers.Dense(NUM_CLASSES, activation='softmax')(hidden) - model = training.Model([inp1, inp2], [output1, output2]) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - # we must generate new callbacks for each test, as they aren't stateless - def callbacks_factory(histogram_freq): - return [ - callbacks_v1.TensorBoard( - log_dir=filepath, - histogram_freq=histogram_freq, + def test_TensorBoard(self): + np.random.seed(1337) + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + def data_generator(train): + if train: + max_batch_index = len(x_train) // BATCH_SIZE + else: + max_batch_index = len(x_test) // BATCH_SIZE + i = 0 + while 1: + if train: + yield ( + x_train[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], + y_train[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], + ) + else: + yield ( + x_test[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], + y_test[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], + ) + i += 1 + i %= max_batch_index + + # case: Sequential + with tf.Graph().as_default(), self.cached_session(): + model = sequential.Sequential() + model.add( + layers.Dense(NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu") + ) + # non_trainable_weights: moving_variance, moving_mean + model.add(layers.BatchNormalization()) + model.add(layers.Dense(NUM_CLASSES, activation="softmax")) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + tsb = callbacks_v1.TensorBoard( + log_dir=temp_dir, + histogram_freq=1, write_images=True, write_grads=True, - batch_size=5) - ] - - # fit without validation data - model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, - callbacks=callbacks_factory(histogram_freq=0), epochs=3) - - # fit with validation data and accuracy - model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, + batch_size=5, + ) + cbks = [tsb] + + # fit with validation data + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=3, + verbose=0, + ) + + # fit with validation data and accuracy + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=0, + ) + + # fit generator with validation data + model.fit_generator( + data_generator(True), + len(x_train), + epochs=2, + validation_data=(x_test, y_test), + callbacks=cbks, + verbose=0, + ) + + # fit generator without validation data + # histogram_freq must be zero + tsb.histogram_freq = 0 + model.fit_generator( + data_generator(True), + len(x_train), + epochs=2, + callbacks=cbks, + verbose=0, + ) + + # fit generator with validation data and accuracy + tsb.histogram_freq = 1 + model.fit_generator( + data_generator(True), + len(x_train), + epochs=2, + validation_data=(x_test, y_test), + callbacks=cbks, + verbose=0, + ) + + # fit generator without validation data and accuracy + tsb.histogram_freq = 0 + model.fit_generator( + data_generator(True), len(x_train), epochs=2, callbacks=cbks + ) + assert os.path.exists(temp_dir) + + def test_TensorBoard_multi_input_output(self): + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) + + with tf.Graph().as_default(), self.cached_session(): + filepath = os.path.join(tmpdir, "logs") + + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + def data_generator(train): + if train: + max_batch_index = len(x_train) // BATCH_SIZE + else: + max_batch_index = len(x_test) // BATCH_SIZE + i = 0 + while 1: + if train: + # simulate multi-input/output models + yield ( + [x_train[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]] + * 2, + [y_train[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]] + * 2, + ) + else: + yield ( + [x_test[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]] * 2, + [y_test[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]] * 2, + ) + i += 1 + i %= max_batch_index + + inp1 = input_layer.Input((INPUT_DIM,)) + inp2 = input_layer.Input((INPUT_DIM,)) + inp = layers.add([inp1, inp2]) + hidden = layers.Dense(2, activation="relu")(inp) + hidden = layers.Dropout(0.1)(hidden) + output1 = layers.Dense(NUM_CLASSES, activation="softmax")(hidden) + output2 = layers.Dense(NUM_CLASSES, activation="softmax")(hidden) + model = training.Model([inp1, inp2], [output1, output2]) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + + # we must generate new callbacks for each test, as they aren't + # stateless + def callbacks_factory(histogram_freq): + return [ + callbacks_v1.TensorBoard( + log_dir=filepath, + histogram_freq=histogram_freq, + write_images=True, + write_grads=True, + batch_size=5, + ) + ] + + # fit without validation data + model.fit( + [x_train] * 2, + [y_train] * 2, + batch_size=BATCH_SIZE, + callbacks=callbacks_factory(histogram_freq=0), + epochs=3, + ) + + # fit with validation data and accuracy + model.fit( + [x_train] * 2, + [y_train] * 2, + batch_size=BATCH_SIZE, validation_data=([x_test] * 2, [y_test] * 2), - callbacks=callbacks_factory(histogram_freq=1), epochs=2) - - # fit generator without validation data - model.fit_generator(data_generator(True), len(x_train), epochs=2, - callbacks=callbacks_factory(histogram_freq=0)) - - # fit generator with validation data and accuracy - model.fit_generator(data_generator(True), len(x_train), epochs=2, - validation_data=([x_test] * 2, [y_test] * 2), - callbacks=callbacks_factory(histogram_freq=1)) - assert os.path.isdir(filepath) - - def test_Tensorboard_histogram_summaries_in_test_function(self): - - class FileWriterStub: - - def __init__(self, logdir, graph=None): - self.logdir = logdir - self.graph = graph - self.steps_seen = [] - - def add_summary(self, summary, global_step): - summary_obj = tf.compat.v1.Summary() - - # ensure a valid Summary proto is being sent - if isinstance(summary, bytes): - summary_obj.ParseFromString(summary) - else: - assert isinstance(summary, tf.compat.v1.Summary) - summary_obj = summary - - # keep track of steps seen for the merged_summary op, - # which contains the histogram summaries - if len(summary_obj.value) > 1: - self.steps_seen.append(global_step) - - def flush(self): - pass - - def close(self): - pass - - def _init_writer(obj, _): - obj.writer = FileWriterStub(obj.log_dir) - - np.random.seed(1337) - tmpdir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - with tf.Graph().as_default(), self.cached_session(): - model = sequential.Sequential() - model.add( - layers.Dense( - NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) - # non_trainable_weights: moving_variance, moving_mean - model.add(layers.BatchNormalization()) - model.add(layers.Dense(NUM_CLASSES, activation='softmax')) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - callbacks_v1.TensorBoard._init_writer = _init_writer - tsb = callbacks_v1.TensorBoard( - log_dir=tmpdir, - histogram_freq=1, - write_images=True, - write_grads=True, - batch_size=5) - cbks = [tsb] - - # fit with validation data - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=3, - verbose=0) - - self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5]) - - def test_Tensorboard_histogram_summaries_with_generator(self): - np.random.seed(1337) - tmpdir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) - - def generator(): - x = np.random.randn(10, 100).astype(np.float32) - y = np.random.randn(10, 10).astype(np.float32) - while True: - yield x, y - - with tf.Graph().as_default(), self.cached_session(): - model = test_utils.get_small_sequential_mlp( - num_hidden=10, num_classes=10, input_dim=100) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - tsb = callbacks_v1.TensorBoard( - log_dir=tmpdir, - histogram_freq=1, - write_images=True, - write_grads=True, - batch_size=5) - cbks = [tsb] - - # fit with validation generator - model.fit_generator( - generator(), - steps_per_epoch=2, - epochs=2, - validation_data=generator(), - validation_steps=2, - callbacks=cbks, - verbose=0) - - with self.assertRaises(ValueError): - # fit with validation generator but no - # validation_steps - model.fit_generator( - generator(), - steps_per_epoch=2, - epochs=2, - validation_data=generator(), + callbacks=callbacks_factory(histogram_freq=1), + epochs=2, + ) + + # fit generator without validation data + model.fit_generator( + data_generator(True), + len(x_train), + epochs=2, + callbacks=callbacks_factory(histogram_freq=0), + ) + + # fit generator with validation data and accuracy + model.fit_generator( + data_generator(True), + len(x_train), + epochs=2, + validation_data=([x_test] * 2, [y_test] * 2), + callbacks=callbacks_factory(histogram_freq=1), + ) + assert os.path.isdir(filepath) + + def test_Tensorboard_histogram_summaries_in_test_function(self): + class FileWriterStub: + def __init__(self, logdir, graph=None): + self.logdir = logdir + self.graph = graph + self.steps_seen = [] + + def add_summary(self, summary, global_step): + summary_obj = tf.compat.v1.Summary() + + # ensure a valid Summary proto is being sent + if isinstance(summary, bytes): + summary_obj.ParseFromString(summary) + else: + assert isinstance(summary, tf.compat.v1.Summary) + summary_obj = summary + + # keep track of steps seen for the merged_summary op, + # which contains the histogram summaries + if len(summary_obj.value) > 1: + self.steps_seen.append(global_step) + + def flush(self): + pass + + def close(self): + pass + + def _init_writer(obj, _): + obj.writer = FileWriterStub(obj.log_dir) + + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + with tf.Graph().as_default(), self.cached_session(): + model = sequential.Sequential() + model.add( + layers.Dense(NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu") + ) + # non_trainable_weights: moving_variance, moving_mean + model.add(layers.BatchNormalization()) + model.add(layers.Dense(NUM_CLASSES, activation="softmax")) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + callbacks_v1.TensorBoard._init_writer = _init_writer + tsb = callbacks_v1.TensorBoard( + log_dir=tmpdir, + histogram_freq=1, + write_images=True, + write_grads=True, + batch_size=5, + ) + cbks = [tsb] + + # fit with validation data + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=3, + verbose=0, + ) + + self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5]) + + def test_Tensorboard_histogram_summaries_with_generator(self): + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) + + def generator(): + x = np.random.randn(10, 100).astype(np.float32) + y = np.random.randn(10, 10).astype(np.float32) + while True: + yield x, y + + with tf.Graph().as_default(), self.cached_session(): + model = test_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=10, input_dim=100 + ) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + tsb = callbacks_v1.TensorBoard( + log_dir=tmpdir, + histogram_freq=1, + write_images=True, + write_grads=True, + batch_size=5, + ) + cbks = [tsb] + + # fit with validation generator + model.fit_generator( + generator(), + steps_per_epoch=2, + epochs=2, + validation_data=generator(), + validation_steps=2, + callbacks=cbks, + verbose=0, + ) + + with self.assertRaises(ValueError): + # fit with validation generator but no + # validation_steps + model.fit_generator( + generator(), + steps_per_epoch=2, + epochs=2, + validation_data=generator(), + callbacks=cbks, + verbose=0, + ) + + self.assertTrue(os.path.exists(tmpdir)) + + def test_TensorBoard_with_ReduceLROnPlateau(self): + with self.cached_session(): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, + num_classes=NUM_CLASSES, + input_dim=INPUT_DIM, + ) + model.compile( + loss="binary_crossentropy", + optimizer="sgd", + metrics=["accuracy"], + ) + + cbks = [ + callbacks.ReduceLROnPlateau( + monitor="val_loss", factor=0.5, patience=4, verbose=1 + ), + callbacks_v1.TensorBoard(log_dir=temp_dir), + ] + + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=2, + verbose=0, + ) + + assert os.path.exists(temp_dir) + + def test_Tensorboard_batch_logging(self): + class FileWriterStub: + def __init__(self, logdir, graph=None): + self.logdir = logdir + self.graph = graph + self.batches_logged = [] + self.summary_values = [] + self.summary_tags = [] + + def add_summary(self, summary, step): + self.summary_values.append(summary.value[0].simple_value) + self.summary_tags.append(summary.value[0].tag) + self.batches_logged.append(step) + + def flush(self): + pass + + def close(self): + pass + + with tf.Graph().as_default(): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch") + tb_cbk.writer = FileWriterStub(temp_dir) + + for batch in range(5): + tb_cbk.on_batch_end(batch, {"acc": batch}) + self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4]) + self.assertEqual( + tb_cbk.writer.summary_values, [0.0, 1.0, 2.0, 3.0, 4.0] + ) + self.assertEqual(tb_cbk.writer.summary_tags, ["batch_acc"] * 5) + + def test_Tensorboard_epoch_and_batch_logging(self): + class FileWriterStub: + def __init__(self, logdir, graph=None): + self.logdir = logdir + self.graph = graph + + def add_summary(self, summary, step): + if "batch_" in summary.value[0].tag: + self.batch_summary = (step, summary) + elif "epoch_" in summary.value[0].tag: + self.epoch_summary = (step, summary) + + def flush(self): + pass + + def close(self): + pass + + with tf.Graph().as_default(): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch") + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {"acc": 5.0}) + tb_cbk.on_train_end() + batch_step, batch_summary = tb_cbk.writer.batch_summary + self.assertEqual(batch_step, 0) + self.assertEqual(batch_summary.value[0].simple_value, 5.0) + + tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="epoch") + tb_cbk.writer = FileWriterStub(temp_dir) + tb_cbk.on_epoch_end(0, {"acc": 10.0}) + tb_cbk.on_train_end() + epoch_step, epoch_summary = tb_cbk.writer.epoch_summary + self.assertEqual(epoch_step, 0) + self.assertEqual(epoch_summary.value[0].simple_value, 10.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_Tensorboard_eager(self): + temp_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + model = test_utils.get_small_sequential_mlp( + num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM + ) + model.compile( + loss="binary_crossentropy", + optimizer=tf.compat.v1.train.AdamOptimizer(0.01), + metrics=["accuracy"], + ) + + cbks = [callbacks_v1.TensorBoard(log_dir=temp_dir)] + + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), callbacks=cbks, - verbose=0) - - self.assertTrue(os.path.exists(tmpdir)) - - def test_TensorBoard_with_ReduceLROnPlateau(self): - with self.cached_session(): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - model = test_utils.get_small_sequential_mlp( - num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) - model.compile( - loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) - - cbks = [ - callbacks.ReduceLROnPlateau( - monitor='val_loss', factor=0.5, patience=4, verbose=1), - callbacks_v1.TensorBoard(log_dir=temp_dir) - ] - - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - - assert os.path.exists(temp_dir) - - def test_Tensorboard_batch_logging(self): - - class FileWriterStub: - - def __init__(self, logdir, graph=None): - self.logdir = logdir - self.graph = graph - self.batches_logged = [] - self.summary_values = [] - self.summary_tags = [] - - def add_summary(self, summary, step): - self.summary_values.append(summary.value[0].simple_value) - self.summary_tags.append(summary.value[0].tag) - self.batches_logged.append(step) - - def flush(self): - pass - - def close(self): - pass - - with tf.Graph().as_default(): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='batch') - tb_cbk.writer = FileWriterStub(temp_dir) - - for batch in range(5): - tb_cbk.on_batch_end(batch, {'acc': batch}) - self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4]) - self.assertEqual(tb_cbk.writer.summary_values, [0., 1., 2., 3., 4.]) - self.assertEqual(tb_cbk.writer.summary_tags, ['batch_acc'] * 5) - - def test_Tensorboard_epoch_and_batch_logging(self): - - class FileWriterStub: - - def __init__(self, logdir, graph=None): - self.logdir = logdir - self.graph = graph - - def add_summary(self, summary, step): - if 'batch_' in summary.value[0].tag: - self.batch_summary = (step, summary) - elif 'epoch_' in summary.value[0].tag: - self.epoch_summary = (step, summary) - - def flush(self): - pass - - def close(self): - pass - - with tf.Graph().as_default(): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='batch') - tb_cbk.writer = FileWriterStub(temp_dir) - - tb_cbk.on_batch_end(0, {'acc': 5.0}) - tb_cbk.on_train_end() - batch_step, batch_summary = tb_cbk.writer.batch_summary - self.assertEqual(batch_step, 0) - self.assertEqual(batch_summary.value[0].simple_value, 5.0) - - tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='epoch') - tb_cbk.writer = FileWriterStub(temp_dir) - tb_cbk.on_epoch_end(0, {'acc': 10.0}) - tb_cbk.on_train_end() - epoch_step, epoch_summary = tb_cbk.writer.epoch_summary - self.assertEqual(epoch_step, 0) - self.assertEqual(epoch_summary.value[0].simple_value, 10.0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_Tensorboard_eager(self): - temp_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - model = test_utils.get_small_sequential_mlp( - num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) - model.compile( - loss='binary_crossentropy', - optimizer=tf.compat.v1.train.AdamOptimizer(0.01), - metrics=['accuracy']) - - cbks = [callbacks_v1.TensorBoard(log_dir=temp_dir)] - - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=2, - verbose=0) - - self.assertTrue(os.path.exists(temp_dir)) - - def test_TensorBoard_update_freq(self): - - class FileWriterStub: - - def __init__(self, logdir, graph=None): - self.logdir = logdir - self.graph = graph - self.batch_summaries = [] - self.epoch_summaries = [] - - def add_summary(self, summary, step): - if 'batch_' in summary.value[0].tag: - self.batch_summaries.append((step, summary)) - elif 'epoch_' in summary.value[0].tag: - self.epoch_summaries.append((step, summary)) - - def flush(self): - pass - - def close(self): - pass - - with tf.Graph().as_default(): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - - # Epoch mode - tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='epoch') - tb_cbk.writer = FileWriterStub(temp_dir) - - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1}) - self.assertEqual(tb_cbk.writer.batch_summaries, []) - tb_cbk.on_epoch_end(0, {'acc': 10.0, 'size': 1}) - self.assertLen(tb_cbk.writer.epoch_summaries, 1) - tb_cbk.on_train_end() - - # Batch mode - tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='batch') - tb_cbk.writer = FileWriterStub(temp_dir) - - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1}) - self.assertLen(tb_cbk.writer.batch_summaries, 1) - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1}) - self.assertLen(tb_cbk.writer.batch_summaries, 2) - self.assertFalse(tb_cbk.writer.epoch_summaries) - tb_cbk.on_train_end() - - # Integer mode - tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq=20) - tb_cbk.writer = FileWriterStub(temp_dir) - - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) - self.assertFalse(tb_cbk.writer.batch_summaries) - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) - self.assertLen(tb_cbk.writer.batch_summaries, 1) - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) - self.assertLen(tb_cbk.writer.batch_summaries, 1) - tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) - self.assertLen(tb_cbk.writer.batch_summaries, 2) - tb_cbk.on_batch_end(0, {'acc': 10.0, 'size': 10}) - self.assertLen(tb_cbk.writer.batch_summaries, 2) - self.assertFalse(tb_cbk.writer.epoch_summaries) - tb_cbk.on_train_end() - - -if __name__ == '__main__': - tf.test.main() + epochs=2, + verbose=0, + ) + + self.assertTrue(os.path.exists(temp_dir)) + + def test_TensorBoard_update_freq(self): + class FileWriterStub: + def __init__(self, logdir, graph=None): + self.logdir = logdir + self.graph = graph + self.batch_summaries = [] + self.epoch_summaries = [] + + def add_summary(self, summary, step): + if "batch_" in summary.value[0].tag: + self.batch_summaries.append((step, summary)) + elif "epoch_" in summary.value[0].tag: + self.epoch_summaries.append((step, summary)) + + def flush(self): + pass + + def close(self): + pass + + with tf.Graph().as_default(): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + # Epoch mode + tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="epoch") + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1}) + self.assertEqual(tb_cbk.writer.batch_summaries, []) + tb_cbk.on_epoch_end(0, {"acc": 10.0, "size": 1}) + self.assertLen(tb_cbk.writer.epoch_summaries, 1) + tb_cbk.on_train_end() + + # Batch mode + tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch") + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1}) + self.assertLen(tb_cbk.writer.batch_summaries, 1) + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1}) + self.assertLen(tb_cbk.writer.batch_summaries, 2) + self.assertFalse(tb_cbk.writer.epoch_summaries) + tb_cbk.on_train_end() + + # Integer mode + tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq=20) + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) + self.assertFalse(tb_cbk.writer.batch_summaries) + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) + self.assertLen(tb_cbk.writer.batch_summaries, 1) + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) + self.assertLen(tb_cbk.writer.batch_summaries, 1) + tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) + self.assertLen(tb_cbk.writer.batch_summaries, 2) + tb_cbk.on_batch_end(0, {"acc": 10.0, "size": 10}) + self.assertLen(tb_cbk.writer.batch_summaries, 2) + self.assertFalse(tb_cbk.writer.epoch_summaries) + tb_cbk.on_train_end() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/constraints.py b/keras/constraints.py index c3302ab195c5..4a25f5a3dbf2 100644 --- a/keras/constraints.py +++ b/keras/constraints.py @@ -12,295 +12,335 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=g-classes-have-attributes + + """Constraints: functions that impose constraints on weight values.""" +import warnings + import tensorflow.compat.v2 as tf + from keras import backend -from keras.utils.generic_utils import deserialize_keras_object -from keras.utils.generic_utils import serialize_keras_object +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object + +# isort: off from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls -@keras_export('keras.constraints.Constraint') +@keras_export("keras.constraints.Constraint") class Constraint: - """Base class for weight constraints. + """Base class for weight constraints. + + A `Constraint` instance works like a stateless function. + Users who subclass this + class should override the `__call__` method, which takes a single + weight parameter and return a projected version of that parameter + (e.g. normalized or clipped). Constraints can be used with various Keras + layers via the `kernel_constraint` or `bias_constraint` arguments. + + Here's a simple example of a non-negative weight constraint: + + >>> class NonNegative(tf.keras.constraints.Constraint): + ... + ... def __call__(self, w): + ... return w * tf.cast(tf.math.greater_equal(w, 0.), w.dtype) + + >>> weight = tf.constant((-1.0, 1.0)) + >>> NonNegative()(weight) + + + >>> tf.keras.layers.Dense(4, kernel_constraint=NonNegative()) + """ + + def __call__(self, w): + """Applies the constraint to the input weight variable. + + By default, the inputs weight variable is not modified. + Users should override this method to implement their own projection + function. - A `Constraint` instance works like a stateless function. - Users who subclass this - class should override the `__call__` method, which takes a single - weight parameter and return a projected version of that parameter - (e.g. normalized or clipped). Constraints can be used with various Keras - layers via the `kernel_constraint` or `bias_constraint` arguments. + Args: + w: Input weight variable. - Here's a simple example of a non-negative weight constraint: + Returns: + Projected variable (by default, returns unmodified inputs). + """ + return w - >>> class NonNegative(tf.keras.constraints.Constraint): - ... - ... def __call__(self, w): - ... return w * tf.cast(tf.math.greater_equal(w, 0.), w.dtype) + def get_config(self): + """Returns a Python dict of the object config. - >>> weight = tf.constant((-1.0, 1.0)) - >>> NonNegative()(weight) - + A constraint config is a Python dictionary (JSON-serializable) that can + be used to reinstantiate the same object. - >>> tf.keras.layers.Dense(4, kernel_constraint=NonNegative()) - """ + Returns: + Python dict containing the configuration of the constraint object. + """ + return {} - def __call__(self, w): - """Applies the constraint to the input weight variable. + @classmethod + def from_config(cls, config): + """Instantiates a weight constraint from a configuration dictionary. - By default, the inputs weight variable is not modified. - Users should override this method to implement their own projection - function. + Example: + + ```python + constraint = UnitNorm() + config = constraint.get_config() + constraint = UnitNorm.from_config(config) + ``` + + Args: + config: A Python dictionary, the output of `get_config`. + + Returns: + A `tf.keras.constraints.Constraint` instance. + """ + return cls(**config) + + +@keras_export("keras.constraints.MaxNorm", "keras.constraints.max_norm") +class MaxNorm(Constraint): + """MaxNorm weight constraint. + + Constrains the weights incident to each hidden unit + to have a norm less than or equal to a desired value. + + Also available via the shortcut function `tf.keras.constraints.max_norm`. Args: - w: Input weight variable. + max_value: the maximum norm value for the incoming weights. + axis: integer, axis along which to calculate weight norms. + For instance, in a `Dense` layer the weight matrix + has shape `(input_dim, output_dim)`, + set `axis` to `0` to constrain each weight vector + of length `(input_dim,)`. + In a `Conv2D` layer with `data_format="channels_last"`, + the weight tensor has shape + `(rows, cols, input_depth, output_depth)`, + set `axis` to `[0, 1, 2]` + to constrain the weights of each filter tensor of size + `(rows, cols, input_depth)`. - Returns: - Projected variable (by default, returns unmodified inputs). """ - return w - def get_config(self): - """Returns a Python dict of the object config. + def __init__(self, max_value=2, axis=0): + self.max_value = max_value + self.axis = axis - A constraint config is a Python dictionary (JSON-serializable) that can - be used to reinstantiate the same object. + @doc_controls.do_not_generate_docs + def __call__(self, w): + norms = backend.sqrt( + tf.reduce_sum(tf.square(w), axis=self.axis, keepdims=True) + ) + desired = backend.clip(norms, 0, self.max_value) + return w * (desired / (backend.epsilon() + norms)) - Returns: - Python dict containing the configuration of the constraint object. - """ - return {} + @doc_controls.do_not_generate_docs + def get_config(self): + return {"max_value": self.max_value, "axis": self.axis} -@keras_export('keras.constraints.MaxNorm', 'keras.constraints.max_norm') -class MaxNorm(Constraint): - """MaxNorm weight constraint. - - Constrains the weights incident to each hidden unit - to have a norm less than or equal to a desired value. - - Also available via the shortcut function `tf.keras.constraints.max_norm`. - - Args: - max_value: the maximum norm value for the incoming weights. - axis: integer, axis along which to calculate weight norms. - For instance, in a `Dense` layer the weight matrix - has shape `(input_dim, output_dim)`, - set `axis` to `0` to constrain each weight vector - of length `(input_dim,)`. - In a `Conv2D` layer with `data_format="channels_last"`, - the weight tensor has shape - `(rows, cols, input_depth, output_depth)`, - set `axis` to `[0, 1, 2]` - to constrain the weights of each filter tensor of size - `(rows, cols, input_depth)`. - - """ - - def __init__(self, max_value=2, axis=0): - self.max_value = max_value - self.axis = axis - - @doc_controls.do_not_generate_docs - def __call__(self, w): - norms = backend.sqrt( - tf.reduce_sum(tf.square(w), axis=self.axis, keepdims=True)) - desired = backend.clip(norms, 0, self.max_value) - return w * (desired / (backend.epsilon() + norms)) - - @doc_controls.do_not_generate_docs - def get_config(self): - return {'max_value': self.max_value, 'axis': self.axis} - - -@keras_export('keras.constraints.NonNeg', 'keras.constraints.non_neg') +@keras_export("keras.constraints.NonNeg", "keras.constraints.non_neg") class NonNeg(Constraint): - """Constrains the weights to be non-negative. + """Constrains the weights to be non-negative. - Also available via the shortcut function `tf.keras.constraints.non_neg`. - """ + Also available via the shortcut function `tf.keras.constraints.non_neg`. + """ - def __call__(self, w): - return w * tf.cast(tf.greater_equal(w, 0.), backend.floatx()) + def __call__(self, w): + return w * tf.cast(tf.greater_equal(w, 0.0), backend.floatx()) -@keras_export('keras.constraints.UnitNorm', 'keras.constraints.unit_norm') +@keras_export("keras.constraints.UnitNorm", "keras.constraints.unit_norm") class UnitNorm(Constraint): - """Constrains the weights incident to each hidden unit to have unit norm. - - Also available via the shortcut function `tf.keras.constraints.unit_norm`. - - Args: - axis: integer, axis along which to calculate weight norms. - For instance, in a `Dense` layer the weight matrix - has shape `(input_dim, output_dim)`, - set `axis` to `0` to constrain each weight vector - of length `(input_dim,)`. - In a `Conv2D` layer with `data_format="channels_last"`, - the weight tensor has shape - `(rows, cols, input_depth, output_depth)`, - set `axis` to `[0, 1, 2]` - to constrain the weights of each filter tensor of size - `(rows, cols, input_depth)`. - """ - - def __init__(self, axis=0): - self.axis = axis - - @doc_controls.do_not_generate_docs - def __call__(self, w): - return w / ( - backend.epsilon() + backend.sqrt( - tf.reduce_sum( - tf.square(w), axis=self.axis, keepdims=True))) - - @doc_controls.do_not_generate_docs - def get_config(self): - return {'axis': self.axis} - - -@keras_export('keras.constraints.MinMaxNorm', 'keras.constraints.min_max_norm') + """Constrains the weights incident to each hidden unit to have unit norm. + + Also available via the shortcut function `tf.keras.constraints.unit_norm`. + + Args: + axis: integer, axis along which to calculate weight norms. + For instance, in a `Dense` layer the weight matrix + has shape `(input_dim, output_dim)`, + set `axis` to `0` to constrain each weight vector + of length `(input_dim,)`. + In a `Conv2D` layer with `data_format="channels_last"`, + the weight tensor has shape + `(rows, cols, input_depth, output_depth)`, + set `axis` to `[0, 1, 2]` + to constrain the weights of each filter tensor of size + `(rows, cols, input_depth)`. + """ + + def __init__(self, axis=0): + self.axis = axis + + @doc_controls.do_not_generate_docs + def __call__(self, w): + return w / ( + backend.epsilon() + + backend.sqrt( + tf.reduce_sum(tf.square(w), axis=self.axis, keepdims=True) + ) + ) + + @doc_controls.do_not_generate_docs + def get_config(self): + return {"axis": self.axis} + + +@keras_export("keras.constraints.MinMaxNorm", "keras.constraints.min_max_norm") class MinMaxNorm(Constraint): - """MinMaxNorm weight constraint. - - Constrains the weights incident to each hidden unit - to have the norm between a lower bound and an upper bound. - - Also available via the shortcut function `tf.keras.constraints.min_max_norm`. - - Args: - min_value: the minimum norm for the incoming weights. - max_value: the maximum norm for the incoming weights. - rate: rate for enforcing the constraint: weights will be - rescaled to yield - `(1 - rate) * norm + rate * norm.clip(min_value, max_value)`. - Effectively, this means that rate=1.0 stands for strict - enforcement of the constraint, while rate<1.0 means that - weights will be rescaled at each step to slowly move - towards a value inside the desired interval. - axis: integer, axis along which to calculate weight norms. - For instance, in a `Dense` layer the weight matrix - has shape `(input_dim, output_dim)`, - set `axis` to `0` to constrain each weight vector - of length `(input_dim,)`. - In a `Conv2D` layer with `data_format="channels_last"`, - the weight tensor has shape - `(rows, cols, input_depth, output_depth)`, - set `axis` to `[0, 1, 2]` - to constrain the weights of each filter tensor of size - `(rows, cols, input_depth)`. - """ - - def __init__(self, min_value=0.0, max_value=1.0, rate=1.0, axis=0): - self.min_value = min_value - self.max_value = max_value - self.rate = rate - self.axis = axis - - @doc_controls.do_not_generate_docs - def __call__(self, w): - norms = backend.sqrt( - tf.reduce_sum(tf.square(w), axis=self.axis, keepdims=True)) - desired = ( - self.rate * backend.clip(norms, self.min_value, self.max_value) + - (1 - self.rate) * norms) - return w * (desired / (backend.epsilon() + norms)) - - @doc_controls.do_not_generate_docs - def get_config(self): - return { - 'min_value': self.min_value, - 'max_value': self.max_value, - 'rate': self.rate, - 'axis': self.axis - } - - -@keras_export('keras.constraints.RadialConstraint', - 'keras.constraints.radial_constraint') + """MinMaxNorm weight constraint. + + Constrains the weights incident to each hidden unit + to have the norm between a lower bound and an upper bound. + + Also available via the shortcut function + `tf.keras.constraints.min_max_norm`. + + Args: + min_value: the minimum norm for the incoming weights. + max_value: the maximum norm for the incoming weights. + rate: rate for enforcing the constraint: weights will be + rescaled to yield + `(1 - rate) * norm + rate * norm.clip(min_value, max_value)`. + Effectively, this means that rate=1.0 stands for strict + enforcement of the constraint, while rate<1.0 means that + weights will be rescaled at each step to slowly move + towards a value inside the desired interval. + axis: integer, axis along which to calculate weight norms. + For instance, in a `Dense` layer the weight matrix + has shape `(input_dim, output_dim)`, + set `axis` to `0` to constrain each weight vector + of length `(input_dim,)`. + In a `Conv2D` layer with `data_format="channels_last"`, + the weight tensor has shape + `(rows, cols, input_depth, output_depth)`, + set `axis` to `[0, 1, 2]` + to constrain the weights of each filter tensor of size + `(rows, cols, input_depth)`. + """ + + def __init__(self, min_value=0.0, max_value=1.0, rate=1.0, axis=0): + self.min_value = min_value + self.max_value = max_value + self.rate = rate + self.axis = axis + + @doc_controls.do_not_generate_docs + def __call__(self, w): + norms = backend.sqrt( + tf.reduce_sum(tf.square(w), axis=self.axis, keepdims=True) + ) + desired = ( + self.rate * backend.clip(norms, self.min_value, self.max_value) + + (1 - self.rate) * norms + ) + return w * (desired / (backend.epsilon() + norms)) + + @doc_controls.do_not_generate_docs + def get_config(self): + return { + "min_value": self.min_value, + "max_value": self.max_value, + "rate": self.rate, + "axis": self.axis, + } + + +@keras_export( + "keras.constraints.RadialConstraint", "keras.constraints.radial_constraint" +) class RadialConstraint(Constraint): - """Constrains `Conv2D` kernel weights to be the same for each radius. - - Also available via the shortcut function - `tf.keras.constraints.radial_constraint`. - - For example, the desired output for the following 4-by-4 kernel: - - ``` - kernel = [[v_00, v_01, v_02, v_03], - [v_10, v_11, v_12, v_13], - [v_20, v_21, v_22, v_23], - [v_30, v_31, v_32, v_33]] - ``` - - is this:: - - ``` - kernel = [[v_11, v_11, v_11, v_11], - [v_11, v_33, v_33, v_11], - [v_11, v_33, v_33, v_11], - [v_11, v_11, v_11, v_11]] - ``` - - This constraint can be applied to any `Conv2D` layer version, including - `Conv2DTranspose` and `SeparableConv2D`, and with either `"channels_last"` or - `"channels_first"` data format. The method assumes the weight tensor is of - shape `(rows, cols, input_depth, output_depth)`. - """ - - @doc_controls.do_not_generate_docs - def __call__(self, w): - w_shape = w.shape - if w_shape.rank is None or w_shape.rank != 4: - raise ValueError( - 'The weight tensor must have rank 4. ' - f'Received weight tensor with shape: {w_shape}') - - height, width, channels, kernels = w_shape - w = backend.reshape(w, (height, width, channels * kernels)) - # TODO(cpeter): Switch map_fn for a faster tf.vectorized_map once - # backend.switch is supported. - w = backend.map_fn( - self._kernel_constraint, - backend.stack(tf.unstack(w, axis=-1), axis=0)) - return backend.reshape(backend.stack(tf.unstack(w, axis=0), axis=-1), - (height, width, channels, kernels)) - - def _kernel_constraint(self, kernel): - """Radially constraints a kernel with shape (height, width, channels).""" - padding = backend.constant([[1, 1], [1, 1]], dtype='int32') - - kernel_shape = backend.shape(kernel)[0] - start = backend.cast(kernel_shape / 2, 'int32') - - kernel_new = backend.switch( - backend.cast(tf.math.floormod(kernel_shape, 2), 'bool'), - lambda: kernel[start - 1:start, start - 1:start], - lambda: kernel[start - 1:start, start - 1:start] + backend.zeros( # pylint: disable=g-long-lambda - (2, 2), dtype=kernel.dtype)) - index = backend.switch( - backend.cast(tf.math.floormod(kernel_shape, 2), 'bool'), - lambda: backend.constant(0, dtype='int32'), - lambda: backend.constant(1, dtype='int32')) - while_condition = lambda index, *args: backend.less(index, start) - - def body_fn(i, array): - return i + 1, tf.pad( - array, - padding, - constant_values=kernel[start + i, start + i]) - - _, kernel_new = tf.compat.v1.while_loop( - while_condition, - body_fn, - [index, kernel_new], - shape_invariants=[index.get_shape(), - tf.TensorShape([None, None])]) - return kernel_new + """Constrains `Conv2D` kernel weights to be the same for each radius. + + Also available via the shortcut function + `tf.keras.constraints.radial_constraint`. + + For example, the desired output for the following 4-by-4 kernel: + + ``` + kernel = [[v_00, v_01, v_02, v_03], + [v_10, v_11, v_12, v_13], + [v_20, v_21, v_22, v_23], + [v_30, v_31, v_32, v_33]] + ``` + + is this:: + + ``` + kernel = [[v_11, v_11, v_11, v_11], + [v_11, v_33, v_33, v_11], + [v_11, v_33, v_33, v_11], + [v_11, v_11, v_11, v_11]] + ``` + + This constraint can be applied to any `Conv2D` layer version, including + `Conv2DTranspose` and `SeparableConv2D`, and with either `"channels_last"` + or `"channels_first"` data format. The method assumes the weight tensor is + of shape `(rows, cols, input_depth, output_depth)`. + """ + + @doc_controls.do_not_generate_docs + def __call__(self, w): + w_shape = w.shape + if w_shape.rank is None or w_shape.rank != 4: + raise ValueError( + "The weight tensor must have rank 4. " + f"Received weight tensor with shape: {w_shape}" + ) + + height, width, channels, kernels = w_shape + w = backend.reshape(w, (height, width, channels * kernels)) + # TODO(cpeter): Switch map_fn for a faster tf.vectorized_map once + # backend.switch is supported. + w = backend.map_fn( + self._kernel_constraint, + backend.stack(tf.unstack(w, axis=-1), axis=0), + ) + return backend.reshape( + backend.stack(tf.unstack(w, axis=0), axis=-1), + (height, width, channels, kernels), + ) + + def _kernel_constraint(self, kernel): + """Radially constraints a kernel with shape (height, width, + channels).""" + padding = backend.constant([[1, 1], [1, 1]], dtype="int32") + + kernel_shape = backend.shape(kernel)[0] + start = backend.cast(kernel_shape / 2, "int32") + + kernel_new = backend.switch( + backend.cast(tf.math.floormod(kernel_shape, 2), "bool"), + lambda: kernel[start - 1 : start, start - 1 : start], + lambda: kernel[start - 1 : start, start - 1 : start] + + backend.zeros((2, 2), dtype=kernel.dtype), + ) + index = backend.switch( + backend.cast(tf.math.floormod(kernel_shape, 2), "bool"), + lambda: backend.constant(0, dtype="int32"), + lambda: backend.constant(1, dtype="int32"), + ) + while_condition = lambda index, *args: backend.less(index, start) + + def body_fn(i, array): + return i + 1, tf.pad( + array, padding, constant_values=kernel[start + i, start + i] + ) + + _, kernel_new = tf.compat.v1.while_loop( + while_condition, + body_fn, + [index, kernel_new], + shape_invariants=[index.get_shape(), tf.TensorShape([None, None])], + ) + return kernel_new # Aliases. @@ -317,32 +357,53 @@ def body_fn(i, array): unitnorm = unit_norm -@keras_export('keras.constraints.serialize') -def serialize(constraint): - return serialize_keras_object(constraint) - - -@keras_export('keras.constraints.deserialize') -def deserialize(config, custom_objects=None): - return deserialize_keras_object( - config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='constraint') - - -@keras_export('keras.constraints.get') +@keras_export("keras.constraints.serialize") +def serialize(constraint, use_legacy_format=False): + if constraint is None: + return None + if not isinstance(constraint, Constraint): + warnings.warn( + "The `keras.constraints.serialize()` API should only be used for " + "objects of type `keras.constraints.Constraint`. Found an instance " + f"of type {type(constraint)}, which may lead to improper " + "serialization." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(constraint) + return serialize_keras_object(constraint) + + +@keras_export("keras.constraints.deserialize") +def deserialize(config, custom_objects=None, use_legacy_format=False): + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="constraint", + ) + return deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="constraint", + ) + + +@keras_export("keras.constraints.get") def get(identifier): - """Retrieves a Keras constraint function.""" - if identifier is None: - return None - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, str): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - elif callable(identifier): - return identifier - else: - raise ValueError( - f'Could not interpret constraint function identifier: {identifier}') + """Retrieves a Keras constraint function.""" + if identifier is None: + return None + if isinstance(identifier, dict): + use_legacy_format = "module" not in identifier + return deserialize(identifier, use_legacy_format=use_legacy_format) + elif isinstance(identifier, str): + config = {"class_name": str(identifier), "config": {}} + return get(config) + elif callable(identifier): + return identifier + else: + raise ValueError( + f"Could not interpret constraint function identifier: {identifier}" + ) diff --git a/keras/constraints_test.py b/keras/constraints_test.py index a7c0ba06608a..b0fdb95b4367 100644 --- a/keras/constraints_test.py +++ b/keras/constraints_test.py @@ -14,98 +14,106 @@ # ============================================================================== """Tests for Keras weights constraints.""" -import tensorflow.compat.v2 as tf - import math import numpy as np +import tensorflow.compat.v2 as tf from keras import backend -from keras.testing_infra import test_combinations from keras import constraints +from keras.testing_infra import test_combinations def get_test_values(): - return [0.1, 0.5, 3, 8, 1e-7] + return [0.1, 0.5, 3, 8, 1e-7] def get_example_array(): - np.random.seed(3537) - example_array = np.random.random((100, 100)) * 100. - 50. - example_array[0, 0] = 0. # 0 could possibly cause trouble - return example_array + np.random.seed(3537) + example_array = np.random.random((100, 100)) * 100.0 - 50.0 + example_array[0, 0] = 0.0 # 0 could possibly cause trouble + return example_array def get_example_kernel(width): - np.random.seed(3537) - example_array = np.random.rand(width, width, 2, 2) - return example_array + np.random.seed(3537) + example_array = np.random.rand(width, width, 2, 2) + return example_array -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class KerasConstraintsTest(tf.test.TestCase): - - def test_serialization(self): - all_activations = ['max_norm', 'non_neg', - 'unit_norm', 'min_max_norm'] - for name in all_activations: - fn = constraints.get(name) - ref_fn = getattr(constraints, name)() - assert fn.__class__ == ref_fn.__class__ - config = constraints.serialize(fn) - fn = constraints.deserialize(config) - assert fn.__class__ == ref_fn.__class__ - - def test_max_norm(self): - array = get_example_array() - for m in get_test_values(): - norm_instance = constraints.max_norm(m) - normed = norm_instance(backend.variable(array)) - assert np.all(backend.eval(normed) < m) - - # a more explicit example - norm_instance = constraints.max_norm(2.0) - x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T - x_normed_target = np.array( - [[0, 0, 0], [1.0, 0, 0], [2.0, 0, 0], - [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T - x_normed_actual = backend.eval(norm_instance(backend.variable(x))) - self.assertAllClose(x_normed_actual, x_normed_target, rtol=1e-05) - - def test_non_neg(self): - non_neg_instance = constraints.non_neg() - normed = non_neg_instance(backend.variable(get_example_array())) - assert np.all(np.min(backend.eval(normed), axis=1) == 0.) - - def test_unit_norm(self): - unit_norm_instance = constraints.unit_norm() - normalized = unit_norm_instance(backend.variable(get_example_array())) - norm_of_normalized = np.sqrt(np.sum(backend.eval(normalized)**2, axis=0)) - # In the unit norm constraint, it should be equal to 1. - difference = norm_of_normalized - 1. - largest_difference = np.max(np.abs(difference)) - assert np.abs(largest_difference) < 10e-5 - - def test_min_max_norm(self): - array = get_example_array() - for m in get_test_values(): - norm_instance = constraints.min_max_norm(min_value=m, max_value=m * 2) - normed = norm_instance(backend.variable(array)) - value = backend.eval(normed) - l2 = np.sqrt(np.sum(np.square(value), axis=0)) - assert not l2[l2 < m] - assert not l2[l2 > m * 2 + 1e-5] - - def test_conv2d_radial_constraint(self): - for width in (3, 4, 5, 6): - array = get_example_kernel(width) - norm_instance = constraints.radial_constraint() - normed = norm_instance(backend.variable(array)) - value = backend.eval(normed) - assert np.all(value.shape == array.shape) - assert np.all(value[0:, 0, 0, 0] == value[-1:, 0, 0, 0]) - assert len(set(value[..., 0, 0].flatten())) == math.ceil(float(width) / 2) - - -if __name__ == '__main__': - tf.test.main() + def test_serialization(self): + all_activations = ["max_norm", "non_neg", "unit_norm", "min_max_norm"] + for name in all_activations: + fn = constraints.get(name) + ref_fn = getattr(constraints, name)() + assert fn.__class__ == ref_fn.__class__ + config = constraints.serialize(fn) + fn = constraints.deserialize(config) + assert fn.__class__ == ref_fn.__class__ + + def test_max_norm(self): + array = get_example_array() + for m in get_test_values(): + norm_instance = constraints.max_norm(m) + normed = norm_instance(backend.variable(array)) + assert np.all(backend.eval(normed) < m) + + # a more explicit example + norm_instance = constraints.max_norm(2.0) + x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T + x_normed_target = np.array( + [ + [0, 0, 0], + [1.0, 0, 0], + [2.0, 0, 0], + [2.0 / np.sqrt(3), 2.0 / np.sqrt(3), 2.0 / np.sqrt(3)], + ] + ).T + x_normed_actual = backend.eval(norm_instance(backend.variable(x))) + self.assertAllClose(x_normed_actual, x_normed_target, rtol=1e-05) + + def test_non_neg(self): + non_neg_instance = constraints.non_neg() + normed = non_neg_instance(backend.variable(get_example_array())) + assert np.all(np.min(backend.eval(normed), axis=1) == 0.0) + + def test_unit_norm(self): + unit_norm_instance = constraints.unit_norm() + normalized = unit_norm_instance(backend.variable(get_example_array())) + norm_of_normalized = np.sqrt( + np.sum(backend.eval(normalized) ** 2, axis=0) + ) + # In the unit norm constraint, it should be equal to 1. + difference = norm_of_normalized - 1.0 + largest_difference = np.max(np.abs(difference)) + assert np.abs(largest_difference) < 10e-5 + + def test_min_max_norm(self): + array = get_example_array() + for m in get_test_values(): + norm_instance = constraints.min_max_norm( + min_value=m, max_value=m * 2 + ) + normed = norm_instance(backend.variable(array)) + value = backend.eval(normed) + l2 = np.sqrt(np.sum(np.square(value), axis=0)) + assert not l2[l2 < m] + assert not l2[l2 > m * 2 + 1e-5] + + def test_conv2d_radial_constraint(self): + for width in (3, 4, 5, 6): + array = get_example_kernel(width) + norm_instance = constraints.radial_constraint() + normed = norm_instance(backend.variable(array)) + value = backend.eval(normed) + assert np.all(value.shape == array.shape) + assert np.all(value[0:, 0, 0, 0] == value[-1:, 0, 0, 0]) + assert len(set(value[..., 0, 0].flatten())) == math.ceil( + float(width) / 2 + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/datasets/BUILD b/keras/datasets/BUILD index 06be216b3486..325aff5ed829 100644 --- a/keras/datasets/BUILD +++ b/keras/datasets/BUILD @@ -1,7 +1,10 @@ # Description: # Contains the Keras datasets package (internal TensorFlow version). +# Placeholder: load unaliased py_library + package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], diff --git a/keras/datasets/boston_housing.py b/keras/datasets/boston_housing.py index 64b6743ceb8f..08a31e34614b 100644 --- a/keras/datasets/boston_housing.py +++ b/keras/datasets/boston_housing.py @@ -17,60 +17,72 @@ import numpy as np from keras.utils.data_utils import get_file + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.datasets.boston_housing.load_data') -def load_data(path='boston_housing.npz', test_split=0.2, seed=113): - """Loads the Boston Housing dataset. +@keras_export("keras.datasets.boston_housing.load_data") +def load_data(path="boston_housing.npz", test_split=0.2, seed=113): + """Loads the Boston Housing dataset. + + This is a dataset taken from the StatLib library which is maintained at + Carnegie Mellon University. - This is a dataset taken from the StatLib library which is maintained at - Carnegie Mellon University. + **WARNING:** This dataset has an ethical problem: the authors of this + dataset included a variable, "B", that may appear to assume that racial + self-segregation influences house prices. As such, we strongly discourage + the use of this dataset, unless in the context of illustrating ethical + issues in data science and machine learning. - Samples contain 13 attributes of houses at different locations around the - Boston suburbs in the late 1970s. Targets are the median values of - the houses at a location (in k$). + Samples contain 13 attributes of houses at different locations around the + Boston suburbs in the late 1970s. Targets are the median values of + the houses at a location (in k$). - The attributes themselves are defined in the - [StatLib website](http://lib.stat.cmu.edu/datasets/boston). + The attributes themselves are defined in the + [StatLib website](http://lib.stat.cmu.edu/datasets/boston). - Args: - path: path where to cache the dataset locally - (relative to `~/.keras/datasets`). - test_split: fraction of the data to reserve as test set. - seed: Random seed for shuffling the data - before computing the test split. + Args: + path: path where to cache the dataset locally + (relative to `~/.keras/datasets`). + test_split: fraction of the data to reserve as test set. + seed: Random seed for shuffling the data + before computing the test split. - Returns: - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. + Returns: + Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - **x_train, x_test**: numpy arrays with shape `(num_samples, 13)` - containing either the training samples (for x_train), - or test samples (for y_train). + **x_train, x_test**: numpy arrays with shape `(num_samples, 13)` + containing either the training samples (for x_train), + or test samples (for y_train). - **y_train, y_test**: numpy arrays of shape `(num_samples,)` containing the - target scalars. The targets are float scalars typically between 10 and - 50 that represent the home prices in k$. - """ - assert 0 <= test_split < 1 - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'boston_housing.npz', - file_hash= - 'f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - x = f['x'] - y = f['y'] + **y_train, y_test**: numpy arrays of shape `(num_samples,)` containing the + target scalars. The targets are float scalars typically between 10 and + 50 that represent the home prices in k$. + """ + assert 0 <= test_split < 1 + origin_folder = ( + "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + ) + path = get_file( + path, + origin=origin_folder + "boston_housing.npz", + file_hash=( # noqa: E501 + "f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5" + ), + ) + with np.load(path, allow_pickle=True) as f: + x = f["x"] + y = f["y"] - rng = np.random.RandomState(seed) - indices = np.arange(len(x)) - rng.shuffle(indices) - x = x[indices] - y = y[indices] + rng = np.random.RandomState(seed) + indices = np.arange(len(x)) + rng.shuffle(indices) + x = x[indices] + y = y[indices] - x_train = np.array(x[:int(len(x) * (1 - test_split))]) - y_train = np.array(y[:int(len(x) * (1 - test_split))]) - x_test = np.array(x[int(len(x) * (1 - test_split)):]) - y_test = np.array(y[int(len(x) * (1 - test_split)):]) - return (x_train, y_train), (x_test, y_test) + x_train = np.array(x[: int(len(x) * (1 - test_split))]) + y_train = np.array(y[: int(len(x) * (1 - test_split))]) + x_test = np.array(x[int(len(x) * (1 - test_split)) :]) + y_test = np.array(y[int(len(x) * (1 - test_split)) :]) + return (x_train, y_train), (x_test, y_test) diff --git a/keras/datasets/cifar.py b/keras/datasets/cifar.py index af4f44bae89f..2d21d066a46d 100644 --- a/keras/datasets/cifar.py +++ b/keras/datasets/cifar.py @@ -17,26 +17,26 @@ import _pickle as cPickle -def load_batch(fpath, label_key='labels'): - """Internal utility for parsing CIFAR data. +def load_batch(fpath, label_key="labels"): + """Internal utility for parsing CIFAR data. - Args: - fpath: path the file to parse. - label_key: key for label data in the retrieve - dictionary. + Args: + fpath: path the file to parse. + label_key: key for label data in the retrieve + dictionary. - Returns: - A tuple `(data, labels)`. - """ - with open(fpath, 'rb') as f: - d = cPickle.load(f, encoding='bytes') - # decode utf8 - d_decoded = {} - for k, v in d.items(): - d_decoded[k.decode('utf8')] = v - d = d_decoded - data = d['data'] - labels = d[label_key] + Returns: + A tuple `(data, labels)`. + """ + with open(fpath, "rb") as f: + d = cPickle.load(f, encoding="bytes") + # decode utf8 + d_decoded = {} + for k, v in d.items(): + d_decoded[k.decode("utf8")] = v + d = d_decoded + data = d["data"] + labels = d[label_key] - data = data.reshape(data.shape[0], 3, 32, 32) - return data, labels + data = data.reshape(data.shape[0], 3, 32, 32) + return data, labels diff --git a/keras/datasets/cifar10.py b/keras/datasets/cifar10.py index 92919f80c89f..5131d2a69f54 100644 --- a/keras/datasets/cifar10.py +++ b/keras/datasets/cifar10.py @@ -21,89 +21,95 @@ from keras import backend from keras.datasets.cifar import load_batch from keras.utils.data_utils import get_file + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.datasets.cifar10.load_data') +@keras_export("keras.datasets.cifar10.load_data") def load_data(): - """Loads the CIFAR10 dataset. - - This is a dataset of 50,000 32x32 color training images and 10,000 test - images, labeled over 10 categories. See more info at the - [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). - - The classes are: - - | Label | Description | - |:-----:|-------------| - | 0 | airplane | - | 1 | automobile | - | 2 | bird | - | 3 | cat | - | 4 | deer | - | 5 | dog | - | 6 | frog | - | 7 | horse | - | 8 | ship | - | 9 | truck | - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(50000, 32, 32, 3)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(50000, 1)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - `(10000, 32, 32, 3)`, containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(10000, 1)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() - assert x_train.shape == (50000, 32, 32, 3) - assert x_test.shape == (10000, 32, 32, 3) - assert y_train.shape == (50000, 1) - assert y_test.shape == (10000, 1) - ``` - """ - dirname = 'cifar-10-batches-py' - origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' - path = get_file( - dirname, - origin=origin, - untar=True, - file_hash= - '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce') - - num_train_samples = 50000 - - x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') - y_train = np.empty((num_train_samples,), dtype='uint8') - - for i in range(1, 6): - fpath = os.path.join(path, 'data_batch_' + str(i)) - (x_train[(i - 1) * 10000:i * 10000, :, :, :], - y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) - - fpath = os.path.join(path, 'test_batch') - x_test, y_test = load_batch(fpath) - - y_train = np.reshape(y_train, (len(y_train), 1)) - y_test = np.reshape(y_test, (len(y_test), 1)) - - if backend.image_data_format() == 'channels_last': - x_train = x_train.transpose(0, 2, 3, 1) - x_test = x_test.transpose(0, 2, 3, 1) - - x_test = x_test.astype(x_train.dtype) - y_test = y_test.astype(y_train.dtype) - - return (x_train, y_train), (x_test, y_test) + """Loads the CIFAR10 dataset. + + This is a dataset of 50,000 32x32 color training images and 10,000 test + images, labeled over 10 categories. See more info at the + [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). + + The classes are: + + | Label | Description | + |:-----:|-------------| + | 0 | airplane | + | 1 | automobile | + | 2 | bird | + | 3 | cat | + | 4 | deer | + | 5 | dog | + | 6 | frog | + | 7 | horse | + | 8 | ship | + | 9 | truck | + + Returns: + Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. + + **x_train**: uint8 NumPy array of image data with shapes + `(50000, 32, 32, 3)`, containing the training data. Pixel values range + from 0 to 255. + + **y_train**: uint8 NumPy array of labels (integers in range 0-9) + with shape `(50000, 1)` for the training data. + + **x_test**: uint8 NumPy array of image data with shapes + `(10000, 32, 32, 3)`, containing the test data. Pixel values range + from 0 to 255. + + **y_test**: uint8 NumPy array of labels (integers in range 0-9) + with shape `(10000, 1)` for the test data. + + Example: + + ```python + (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() + assert x_train.shape == (50000, 32, 32, 3) + assert x_test.shape == (10000, 32, 32, 3) + assert y_train.shape == (50000, 1) + assert y_test.shape == (10000, 1) + ``` + """ + dirname = "cifar-10-batches-py" + origin = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" + path = get_file( + dirname, + origin=origin, + untar=True, + file_hash=( # noqa: E501 + "6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce" + ), + ) + + num_train_samples = 50000 + + x_train = np.empty((num_train_samples, 3, 32, 32), dtype="uint8") + y_train = np.empty((num_train_samples,), dtype="uint8") + + for i in range(1, 6): + fpath = os.path.join(path, "data_batch_" + str(i)) + ( + x_train[(i - 1) * 10000 : i * 10000, :, :, :], + y_train[(i - 1) * 10000 : i * 10000], + ) = load_batch(fpath) + + fpath = os.path.join(path, "test_batch") + x_test, y_test = load_batch(fpath) + + y_train = np.reshape(y_train, (len(y_train), 1)) + y_test = np.reshape(y_test, (len(y_test), 1)) + + if backend.image_data_format() == "channels_last": + x_train = x_train.transpose(0, 2, 3, 1) + x_test = x_test.transpose(0, 2, 3, 1) + + x_test = x_test.astype(x_train.dtype) + y_test = y_test.astype(y_train.dtype) + + return (x_train, y_train), (x_test, y_test) diff --git a/keras/datasets/cifar100.py b/keras/datasets/cifar100.py index b7f24ebfda82..e910b0051884 100644 --- a/keras/datasets/cifar100.py +++ b/keras/datasets/cifar100.py @@ -21,74 +21,80 @@ from keras import backend from keras.datasets.cifar import load_batch from keras.utils.data_utils import get_file -from tensorflow.python.util.tf_export import keras_export - - -@keras_export('keras.datasets.cifar100.load_data') -def load_data(label_mode='fine'): - """Loads the CIFAR100 dataset. - - This is a dataset of 50,000 32x32 color training images and - 10,000 test images, labeled over 100 fine-grained classes that are - grouped into 20 coarse-grained classes. See more info at the - [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). - - Args: - label_mode: one of "fine", "coarse". If it is "fine" the category labels - are the fine-grained labels, if it is "coarse" the output labels are the - coarse-grained superclasses. - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(50000, 32, 32, 3)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of labels (integers in range 0-99) - with shape `(50000, 1)` for the training data. - **x_test**: uint8 NumPy array of grayscale image data with shapes - `(10000, 32, 32, 3)`, containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of labels (integers in range 0-99) - with shape `(10000, 1)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data() - assert x_train.shape == (50000, 32, 32, 3) - assert x_test.shape == (10000, 32, 32, 3) - assert y_train.shape == (50000, 1) - assert y_test.shape == (10000, 1) - ``` - """ - if label_mode not in ['fine', 'coarse']: - raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`. ' - f'Received: label_mode={label_mode}.') - - dirname = 'cifar-100-python' - origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' - path = get_file( - dirname, - origin=origin, - untar=True, - file_hash= - '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7') - - fpath = os.path.join(path, 'train') - x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') - - fpath = os.path.join(path, 'test') - x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') - - y_train = np.reshape(y_train, (len(y_train), 1)) - y_test = np.reshape(y_test, (len(y_test), 1)) +# isort: off +from tensorflow.python.util.tf_export import keras_export - if backend.image_data_format() == 'channels_last': - x_train = x_train.transpose(0, 2, 3, 1) - x_test = x_test.transpose(0, 2, 3, 1) - return (x_train, y_train), (x_test, y_test) +@keras_export("keras.datasets.cifar100.load_data") +def load_data(label_mode="fine"): + """Loads the CIFAR100 dataset. + + This is a dataset of 50,000 32x32 color training images and + 10,000 test images, labeled over 100 fine-grained classes that are + grouped into 20 coarse-grained classes. See more info at the + [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). + + Args: + label_mode: one of "fine", "coarse". If it is "fine" the category labels + are the fine-grained labels, if it is "coarse" the output labels are the + coarse-grained superclasses. + + Returns: + Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. + + **x_train**: uint8 NumPy array of image data with shapes + `(50000, 32, 32, 3)`, containing the training data. Pixel values range + from 0 to 255. + + **y_train**: uint8 NumPy array of labels (integers in range 0-99) + with shape `(50000, 1)` for the training data. + + **x_test**: uint8 NumPy array of image data with shapes + `(10000, 32, 32, 3)`, containing the test data. Pixel values range + from 0 to 255. + + **y_test**: uint8 NumPy array of labels (integers in range 0-99) + with shape `(10000, 1)` for the test data. + + Example: + + ```python + (x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data() + assert x_train.shape == (50000, 32, 32, 3) + assert x_test.shape == (10000, 32, 32, 3) + assert y_train.shape == (50000, 1) + assert y_test.shape == (10000, 1) + ``` + """ + if label_mode not in ["fine", "coarse"]: + raise ValueError( + '`label_mode` must be one of `"fine"`, `"coarse"`. ' + f"Received: label_mode={label_mode}." + ) + + dirname = "cifar-100-python" + origin = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" + path = get_file( + dirname, + origin=origin, + untar=True, + file_hash=( # noqa: E501 + "85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7" + ), + ) + + fpath = os.path.join(path, "train") + x_train, y_train = load_batch(fpath, label_key=label_mode + "_labels") + + fpath = os.path.join(path, "test") + x_test, y_test = load_batch(fpath, label_key=label_mode + "_labels") + + y_train = np.reshape(y_train, (len(y_train), 1)) + y_test = np.reshape(y_test, (len(y_test), 1)) + + if backend.image_data_format() == "channels_last": + x_train = x_train.transpose(0, 2, 3, 1) + x_test = x_test.transpose(0, 2, 3, 1) + + return (x_train, y_train), (x_test, y_test) diff --git a/keras/datasets/fashion_mnist.py b/keras/datasets/fashion_mnist.py index adbba99cd7ec..e7d64ebef178 100644 --- a/keras/datasets/fashion_mnist.py +++ b/keras/datasets/fashion_mnist.py @@ -20,86 +20,92 @@ import numpy as np from keras.utils.data_utils import get_file + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.datasets.fashion_mnist.load_data') +@keras_export("keras.datasets.fashion_mnist.load_data") def load_data(): - """Loads the Fashion-MNIST dataset. - - This is a dataset of 60,000 28x28 grayscale images of 10 fashion categories, - along with a test set of 10,000 images. This dataset can be used as - a drop-in replacement for MNIST. - - The classes are: - - | Label | Description | - |:-----:|-------------| - | 0 | T-shirt/top | - | 1 | Trouser | - | 2 | Pullover | - | 3 | Dress | - | 4 | Coat | - | 5 | Sandal | - | 6 | Shirt | - | 7 | Sneaker | - | 8 | Bag | - | 9 | Ankle boot | - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(60000, 28, 28)`, containing the training data. - - **y_train**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(60000,)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - (10000, 28, 28), containing the test data. - - **y_test**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(10000,)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() - assert x_train.shape == (60000, 28, 28) - assert x_test.shape == (10000, 28, 28) - assert y_train.shape == (60000,) - assert y_test.shape == (10000,) - ``` - - License: - The copyright for Fashion-MNIST is held by Zalando SE. - Fashion-MNIST is licensed under the [MIT license]( - https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE). - - """ - dirname = os.path.join('datasets', 'fashion-mnist') - base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - files = [ - 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', - 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' - ] - - paths = [] - for fname in files: - paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) - - with gzip.open(paths[0], 'rb') as lbpath: - y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) - - with gzip.open(paths[1], 'rb') as imgpath: - x_train = np.frombuffer( - imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) - - with gzip.open(paths[2], 'rb') as lbpath: - y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) - - with gzip.open(paths[3], 'rb') as imgpath: - x_test = np.frombuffer( - imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) - - return (x_train, y_train), (x_test, y_test) + """Loads the Fashion-MNIST dataset. + + This is a dataset of 60,000 28x28 grayscale images of 10 fashion categories, + along with a test set of 10,000 images. This dataset can be used as + a drop-in replacement for MNIST. + + The classes are: + + | Label | Description | + |:-----:|-------------| + | 0 | T-shirt/top | + | 1 | Trouser | + | 2 | Pullover | + | 3 | Dress | + | 4 | Coat | + | 5 | Sandal | + | 6 | Shirt | + | 7 | Sneaker | + | 8 | Bag | + | 9 | Ankle boot | + + Returns: + Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. + + **x_train**: uint8 NumPy array of grayscale image data with shapes + `(60000, 28, 28)`, containing the training data. + + **y_train**: uint8 NumPy array of labels (integers in range 0-9) + with shape `(60000,)` for the training data. + + **x_test**: uint8 NumPy array of grayscale image data with shapes + (10000, 28, 28), containing the test data. + + **y_test**: uint8 NumPy array of labels (integers in range 0-9) + with shape `(10000,)` for the test data. + + Example: + + ```python + (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() + assert x_train.shape == (60000, 28, 28) + assert x_test.shape == (10000, 28, 28) + assert y_train.shape == (60000,) + assert y_test.shape == (10000,) + ``` + + License: + The copyright for Fashion-MNIST is held by Zalando SE. + Fashion-MNIST is licensed under the [MIT license]( + https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE). + + """ + dirname = os.path.join("datasets", "fashion-mnist") + base = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + files = [ + "train-labels-idx1-ubyte.gz", + "train-images-idx3-ubyte.gz", + "t10k-labels-idx1-ubyte.gz", + "t10k-images-idx3-ubyte.gz", + ] + + paths = [] + for fname in files: + paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) + + with gzip.open(paths[0], "rb") as lbpath: + y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) + + with gzip.open(paths[1], "rb") as imgpath: + x_train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape( + len(y_train), 28, 28 + ) + + with gzip.open(paths[2], "rb") as lbpath: + y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) + + with gzip.open(paths[3], "rb") as imgpath: + x_test = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape( + len(y_test), 28, 28 + ) + + return (x_train, y_train), (x_test, y_test) diff --git a/keras/datasets/imdb.py b/keras/datasets/imdb.py index a90764bf8507..1e61771ad79b 100644 --- a/keras/datasets/imdb.py +++ b/keras/datasets/imdb.py @@ -20,169 +20,198 @@ from keras.preprocessing.sequence import _remove_long_seq from keras.utils.data_utils import get_file + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.datasets.imdb.load_data') -def load_data(path='imdb.npz', - num_words=None, - skip_top=0, - maxlen=None, - seed=113, - start_char=1, - oov_char=2, - index_from=3, - **kwargs): - """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). - - This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment - (positive/negative). Reviews have been preprocessed, and each review is - encoded as a list of word indexes (integers). - For convenience, words are indexed by overall frequency in the dataset, - so that for instance the integer "3" encodes the 3rd most frequent word in - the data. This allows for quick filtering operations such as: - "only consider the top 10,000 most - common words, but eliminate the top 20 most common words". - - As a convention, "0" does not stand for a specific word, but instead is used - to encode any unknown word. - - Args: - path: where to cache the data (relative to `~/.keras/dataset`). - num_words: integer or None. Words are - ranked by how often they occur (in the training set) and only - the `num_words` most frequent words are kept. Any less frequent word - will appear as `oov_char` value in the sequence data. If None, - all words are kept. Defaults to None, so all words are kept. - skip_top: skip the top N most frequently occurring words - (which may not be informative). These words will appear as - `oov_char` value in the dataset. Defaults to 0, so no words are - skipped. - maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. Defaults to None, which - means no truncation. - seed: int. Seed for reproducible data shuffling. - start_char: int. The start of a sequence will be marked with this - character. Defaults to 1 because 0 is usually the padding character. - oov_char: int. The out-of-vocabulary character. - Words that were cut out because of the `num_words` or - `skip_top` limits will be replaced with this character. - index_from: int. Index actual words with this index and higher. - **kwargs: Used for backwards compatibility. - - Returns: - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train, x_test**: lists of sequences, which are lists of indexes - (integers). If the num_words argument was specific, the maximum - possible index value is `num_words - 1`. If the `maxlen` argument was - specified, the largest possible sequence length is `maxlen`. - - **y_train, y_test**: lists of integer labels (1 or 0). - - Raises: - ValueError: in case `maxlen` is so low - that no input sequence could be kept. - - Note that the 'out of vocabulary' character is only used for - words that were present in the training set but are not included - because they're not making the `num_words` cut here. - Words that were not seen in the training set but are in the test set - have simply been skipped. - """ - # Legacy support - if 'nb_words' in kwargs: - logging.warning('The `nb_words` argument in `load_data` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError(f'Unrecognized keyword arguments: {str(kwargs)}.') - - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'imdb.npz', - file_hash= - '69664113be75683a8fe16e3ed0ab59fda8886cb3cd7ada244f7d9544e4676b9f') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - x_train, labels_train = f['x_train'], f['y_train'] - x_test, labels_test = f['x_test'], f['y_test'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(x_train)) - rng.shuffle(indices) - x_train = x_train[indices] - labels_train = labels_train[indices] - - indices = np.arange(len(x_test)) - rng.shuffle(indices) - x_test = x_test[indices] - labels_test = labels_test[indices] - - if start_char is not None: - x_train = [[start_char] + [w + index_from for w in x] for x in x_train] - x_test = [[start_char] + [w + index_from for w in x] for x in x_test] - elif index_from: - x_train = [[w + index_from for w in x] for x in x_train] - x_test = [[w + index_from for w in x] for x in x_test] - - if maxlen: - x_train, labels_train = _remove_long_seq(maxlen, x_train, labels_train) - x_test, labels_test = _remove_long_seq(maxlen, x_test, labels_test) - if not x_train or not x_test: - raise ValueError('After filtering for sequences shorter than maxlen=' - f'{str(maxlen)}, no sequence was kept. Increase maxlen.') - - xs = x_train + x_test - labels = np.concatenate([labels_train, labels_test]) - - if not num_words: - num_words = max(max(x) for x in xs) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: - # 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - xs = [ - [w if (skip_top <= w < num_words) else oov_char for w in x] for x in xs - ] - else: - xs = [[w for w in x if skip_top <= w < num_words] for x in xs] - - idx = len(x_train) - x_train, y_train = np.array(xs[:idx], dtype='object'), labels[:idx] - x_test, y_test = np.array(xs[idx:], dtype='object'), labels[idx:] - return (x_train, y_train), (x_test, y_test) - - -@keras_export('keras.datasets.imdb.get_word_index') -def get_word_index(path='imdb_word_index.json'): - """Retrieves a dict mapping words to their index in the IMDB dataset. - - Args: +@keras_export("keras.datasets.imdb.load_data") +def load_data( + path="imdb.npz", + num_words=None, + skip_top=0, + maxlen=None, + seed=113, + start_char=1, + oov_char=2, + index_from=3, + **kwargs, +): + """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). + + This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment + (positive/negative). Reviews have been preprocessed, and each review is + encoded as a list of word indexes (integers). + For convenience, words are indexed by overall frequency in the dataset, + so that for instance the integer "3" encodes the 3rd most frequent word in + the data. This allows for quick filtering operations such as: + "only consider the top 10,000 most + common words, but eliminate the top 20 most common words". + + As a convention, "0" does not stand for a specific word, but instead is used + to encode the pad token. + + Args: path: where to cache the data (relative to `~/.keras/dataset`). - - Returns: - The word index dictionary. Keys are word strings, values are their index. - - Example: - - ```python - # Retrieve the training sequences. - (x_train, _), _ = keras.datasets.imdb.load_data() - # Retrieve the word index file mapping words to indices - word_index = keras.datasets.imdb.get_word_index() - # Reverse the word index to obtain a dict mapping indices to words - inverted_word_index = dict((i, word) for (word, i) in word_index.items()) - # Decode the first sequence in the dataset - decoded_sequence = " ".join(inverted_word_index[i] for i in x_train[0]) - ``` - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'imdb_word_index.json', - file_hash='bfafd718b763782e994055a2d397834f') - with open(path) as f: - return json.load(f) + num_words: integer or None. Words are + ranked by how often they occur (in the training set) and only + the `num_words` most frequent words are kept. Any less frequent word + will appear as `oov_char` value in the sequence data. If None, + all words are kept. Defaults to `None`. + skip_top: skip the top N most frequently occurring words + (which may not be informative). These words will appear as + `oov_char` value in the dataset. When 0, no words are + skipped. Defaults to `0`. + maxlen: int or None. Maximum sequence length. + Any longer sequence will be truncated. None, means no truncation. + Defaults to `None`. + seed: int. Seed for reproducible data shuffling. + start_char: int. The start of a sequence will be marked with this + character. 0 is usually the padding character. Defaults to `1`. + oov_char: int. The out-of-vocabulary character. + Words that were cut out because of the `num_words` or + `skip_top` limits will be replaced with this character. + index_from: int. Index actual words with this index and higher. + **kwargs: Used for backwards compatibility. + + Returns: + Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. + + **x_train, x_test**: lists of sequences, which are lists of indexes + (integers). If the num_words argument was specific, the maximum + possible index value is `num_words - 1`. If the `maxlen` argument was + specified, the largest possible sequence length is `maxlen`. + + **y_train, y_test**: lists of integer labels (1 or 0). + + Raises: + ValueError: in case `maxlen` is so low + that no input sequence could be kept. + + Note that the 'out of vocabulary' character is only used for + words that were present in the training set but are not included + because they're not making the `num_words` cut here. + Words that were not seen in the training set but are in the test set + have simply been skipped. + """ + # Legacy support + if "nb_words" in kwargs: + logging.warning( + "The `nb_words` argument in `load_data` " + "has been renamed `num_words`." + ) + num_words = kwargs.pop("nb_words") + if kwargs: + raise TypeError(f"Unrecognized keyword arguments: {str(kwargs)}.") + + origin_folder = ( + "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + ) + path = get_file( + path, + origin=origin_folder + "imdb.npz", + file_hash=( # noqa: E501 + "69664113be75683a8fe16e3ed0ab59fda8886cb3cd7ada244f7d9544e4676b9f" + ), + ) + with np.load(path, allow_pickle=True) as f: + x_train, labels_train = f["x_train"], f["y_train"] + x_test, labels_test = f["x_test"], f["y_test"] + + rng = np.random.RandomState(seed) + indices = np.arange(len(x_train)) + rng.shuffle(indices) + x_train = x_train[indices] + labels_train = labels_train[indices] + + indices = np.arange(len(x_test)) + rng.shuffle(indices) + x_test = x_test[indices] + labels_test = labels_test[indices] + + if start_char is not None: + x_train = [[start_char] + [w + index_from for w in x] for x in x_train] + x_test = [[start_char] + [w + index_from for w in x] for x in x_test] + elif index_from: + x_train = [[w + index_from for w in x] for x in x_train] + x_test = [[w + index_from for w in x] for x in x_test] + + if maxlen: + x_train, labels_train = _remove_long_seq(maxlen, x_train, labels_train) + x_test, labels_test = _remove_long_seq(maxlen, x_test, labels_test) + if not x_train or not x_test: + raise ValueError( + "After filtering for sequences shorter than maxlen=" + f"{str(maxlen)}, no sequence was kept. Increase maxlen." + ) + + xs = x_train + x_test + labels = np.concatenate([labels_train, labels_test]) + + if not num_words: + num_words = max(max(x) for x in xs) + + # by convention, use 2 as OOV word + # reserve 'index_from' (=3 by default) characters: + # 0 (padding), 1 (start), 2 (OOV) + if oov_char is not None: + xs = [ + [w if (skip_top <= w < num_words) else oov_char for w in x] + for x in xs + ] + else: + xs = [[w for w in x if skip_top <= w < num_words] for x in xs] + + idx = len(x_train) + x_train, y_train = np.array(xs[:idx], dtype="object"), labels[:idx] + x_test, y_test = np.array(xs[idx:], dtype="object"), labels[idx:] + return (x_train, y_train), (x_test, y_test) + + +@keras_export("keras.datasets.imdb.get_word_index") +def get_word_index(path="imdb_word_index.json"): + """Retrieves a dict mapping words to their index in the IMDB dataset. + + Args: + path: where to cache the data (relative to `~/.keras/dataset`). + + Returns: + The word index dictionary. Keys are word strings, values are their + index. + + Example: + + ```python + # Use the default parameters to keras.datasets.imdb.load_data + start_char = 1 + oov_char = 2 + index_from = 3 + # Retrieve the training sequences. + (x_train, _), _ = keras.datasets.imdb.load_data( + start_char=start_char, oov_char=oov_char, index_from=index_from + ) + # Retrieve the word index file mapping words to indices + word_index = keras.datasets.imdb.get_word_index() + # Reverse the word index to obtain a dict mapping indices to words + # And add `index_from` to indices to sync with `x_train` + inverted_word_index = dict( + (i + index_from, word) for (word, i) in word_index.items() + ) + # Update `inverted_word_index` to include `start_char` and `oov_char` + inverted_word_index[start_char] = "[START]" + inverted_word_index[oov_char] = "[OOV]" + # Decode the first sequence in the dataset + decoded_sequence = " ".join(inverted_word_index[i] for i in x_train[0]) + ``` + """ + origin_folder = ( + "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + ) + path = get_file( + path, + origin=origin_folder + "imdb_word_index.json", + file_hash="bfafd718b763782e994055a2d397834f", + ) + with open(path) as f: + return json.load(f) diff --git a/keras/datasets/mnist.py b/keras/datasets/mnist.py index 1bd4349fdf1b..a145d167affa 100644 --- a/keras/datasets/mnist.py +++ b/keras/datasets/mnist.py @@ -17,64 +17,70 @@ import numpy as np from keras.utils.data_utils import get_file + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.datasets.mnist.load_data') -def load_data(path='mnist.npz'): - """Loads the MNIST dataset. - - This is a dataset of 60,000 28x28 grayscale images of the 10 digits, - along with a test set of 10,000 images. - More info can be found at the - [MNIST homepage](http://yann.lecun.com/exdb/mnist/). - - Args: - path: path where to cache the dataset locally - (relative to `~/.keras/datasets`). - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(60000, 28, 28)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of digit labels (integers in range 0-9) - with shape `(60000,)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - (10000, 28, 28), containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of digit labels (integers in range 0-9) - with shape `(10000,)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() - assert x_train.shape == (60000, 28, 28) - assert x_test.shape == (10000, 28, 28) - assert y_train.shape == (60000,) - assert y_test.shape == (10000,) - ``` - - License: - Yann LeCun and Corinna Cortes hold the copyright of MNIST dataset, - which is a derivative work from original NIST datasets. - MNIST dataset is made available under the terms of the - [Creative Commons Attribution-Share Alike 3.0 license.]( - https://creativecommons.org/licenses/by-sa/3.0/) - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'mnist.npz', - file_hash= - '731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - x_train, y_train = f['x_train'], f['y_train'] - x_test, y_test = f['x_test'], f['y_test'] - - return (x_train, y_train), (x_test, y_test) +@keras_export("keras.datasets.mnist.load_data") +def load_data(path="mnist.npz"): + """Loads the MNIST dataset. + + This is a dataset of 60,000 28x28 grayscale images of the 10 digits, + along with a test set of 10,000 images. + More info can be found at the + [MNIST homepage](http://yann.lecun.com/exdb/mnist/). + + Args: + path: path where to cache the dataset locally + (relative to `~/.keras/datasets`). + + Returns: + Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. + + **x_train**: uint8 NumPy array of grayscale image data with shapes + `(60000, 28, 28)`, containing the training data. Pixel values range + from 0 to 255. + + **y_train**: uint8 NumPy array of digit labels (integers in range 0-9) + with shape `(60000,)` for the training data. + + **x_test**: uint8 NumPy array of grayscale image data with shapes + (10000, 28, 28), containing the test data. Pixel values range + from 0 to 255. + + **y_test**: uint8 NumPy array of digit labels (integers in range 0-9) + with shape `(10000,)` for the test data. + + Example: + + ```python + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + assert x_train.shape == (60000, 28, 28) + assert x_test.shape == (10000, 28, 28) + assert y_train.shape == (60000,) + assert y_test.shape == (10000,) + ``` + + License: + Yann LeCun and Corinna Cortes hold the copyright of MNIST dataset, + which is a derivative work from original NIST datasets. + MNIST dataset is made available under the terms of the + [Creative Commons Attribution-Share Alike 3.0 license.]( + https://creativecommons.org/licenses/by-sa/3.0/) + """ + origin_folder = ( + "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + ) + path = get_file( + path, + origin=origin_folder + "mnist.npz", + file_hash=( # noqa: E501 + "731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1" + ), + ) + with np.load(path, allow_pickle=True) as f: + x_train, y_train = f["x_train"], f["y_train"] + x_test, y_test = f["x_test"], f["y_test"] + + return (x_train, y_train), (x_test, y_test) diff --git a/keras/datasets/reuters.py b/keras/datasets/reuters.py index 8aec4906c532..38cc15e33d98 100644 --- a/keras/datasets/reuters.py +++ b/keras/datasets/reuters.py @@ -20,146 +20,230 @@ from keras.preprocessing.sequence import _remove_long_seq from keras.utils.data_utils import get_file + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.datasets.reuters.load_data') -def load_data(path='reuters.npz', - num_words=None, - skip_top=0, - maxlen=None, - test_split=0.2, - seed=113, - start_char=1, - oov_char=2, - index_from=3, - **kwargs): - """Loads the Reuters newswire classification dataset. - - This is a dataset of 11,228 newswires from Reuters, labeled over 46 topics. - - This was originally generated by parsing and preprocessing the classic - Reuters-21578 dataset, but the preprocessing code is no longer packaged - with Keras. See this - [github discussion](https://github.com/keras-team/keras/issues/12072) - for more info. - - Each newswire is encoded as a list of word indexes (integers). - For convenience, words are indexed by overall frequency in the dataset, - so that for instance the integer "3" encodes the 3rd most frequent word in - the data. This allows for quick filtering operations such as: - "only consider the top 10,000 most - common words, but eliminate the top 20 most common words". - - As a convention, "0" does not stand for a specific word, but instead is used - to encode any unknown word. - - Args: - path: where to cache the data (relative to `~/.keras/dataset`). - num_words: integer or None. Words are - ranked by how often they occur (in the training set) and only - the `num_words` most frequent words are kept. Any less frequent word - will appear as `oov_char` value in the sequence data. If None, - all words are kept. Defaults to None, so all words are kept. - skip_top: skip the top N most frequently occurring words - (which may not be informative). These words will appear as - `oov_char` value in the dataset. Defaults to 0, so no words are - skipped. - maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. Defaults to None, which - means no truncation. - test_split: Float between 0 and 1. Fraction of the dataset to be used - as test data. Defaults to 0.2, meaning 20% of the dataset is used as - test data. - seed: int. Seed for reproducible data shuffling. - start_char: int. The start of a sequence will be marked with this - character. Defaults to 1 because 0 is usually the padding character. - oov_char: int. The out-of-vocabulary character. - Words that were cut out because of the `num_words` or - `skip_top` limits will be replaced with this character. - index_from: int. Index actual words with this index and higher. - **kwargs: Used for backwards compatibility. - - Returns: - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train, x_test**: lists of sequences, which are lists of indexes - (integers). If the num_words argument was specific, the maximum - possible index value is `num_words - 1`. If the `maxlen` argument was - specified, the largest possible sequence length is `maxlen`. - - **y_train, y_test**: lists of integer labels (1 or 0). - - Note: The 'out of vocabulary' character is only used for - words that were present in the training set but are not included - because they're not making the `num_words` cut here. - Words that were not seen in the training set but are in the test set - have simply been skipped. - """ - # Legacy support - if 'nb_words' in kwargs: - logging.warning('The `nb_words` argument in `load_data` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError(f'Unrecognized keyword arguments: {str(kwargs)}') - - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'reuters.npz', - file_hash= - 'd6586e694ee56d7a4e65172e12b3e987c03096cb01eab99753921ef915959916') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - xs, labels = f['x'], f['y'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(xs)) - rng.shuffle(indices) - xs = xs[indices] - labels = labels[indices] - - if start_char is not None: - xs = [[start_char] + [w + index_from for w in x] for x in xs] - elif index_from: - xs = [[w + index_from for w in x] for x in xs] - - if maxlen: - xs, labels = _remove_long_seq(maxlen, xs, labels) - - if not num_words: - num_words = max(max(x) for x in xs) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: - # 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - xs = [[w if skip_top <= w < num_words else oov_char for w in x] for x in xs] - else: - xs = [[w for w in x if skip_top <= w < num_words] for x in xs] - - idx = int(len(xs) * (1 - test_split)) - x_train, y_train = np.array(xs[:idx], dtype='object'), np.array(labels[:idx]) - x_test, y_test = np.array(xs[idx:], dtype='object'), np.array(labels[idx:]) - - return (x_train, y_train), (x_test, y_test) - - -@keras_export('keras.datasets.reuters.get_word_index') -def get_word_index(path='reuters_word_index.json'): - """Retrieves a dict mapping words to their index in the Reuters dataset. - - Args: +@keras_export("keras.datasets.reuters.load_data") +def load_data( + path="reuters.npz", + num_words=None, + skip_top=0, + maxlen=None, + test_split=0.2, + seed=113, + start_char=1, + oov_char=2, + index_from=3, + **kwargs, +): + """Loads the Reuters newswire classification dataset. + + This is a dataset of 11,228 newswires from Reuters, labeled over 46 topics. + + This was originally generated by parsing and preprocessing the classic + Reuters-21578 dataset, but the preprocessing code is no longer packaged + with Keras. See this + [GitHub discussion](https://github.com/keras-team/keras/issues/12072) + for more info. + + Each newswire is encoded as a list of word indexes (integers). + For convenience, words are indexed by overall frequency in the dataset, + so that for instance the integer "3" encodes the 3rd most frequent word in + the data. This allows for quick filtering operations such as: + "only consider the top 10,000 most + common words, but eliminate the top 20 most common words". + + As a convention, "0" does not stand for a specific word, but instead is used + to encode any unknown word. + + Args: path: where to cache the data (relative to `~/.keras/dataset`). - - Returns: - The word index dictionary. Keys are word strings, values are their index. - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'reuters_word_index.json', - file_hash='4d44cc38712099c9e383dc6e5f11a921') - with open(path) as f: - return json.load(f) + num_words: integer or None. Words are + ranked by how often they occur (in the training set) and only + the `num_words` most frequent words are kept. Any less frequent word + will appear as `oov_char` value in the sequence data. If None, + all words are kept. Defaults to `None`. + skip_top: skip the top N most frequently occurring words + (which may not be informative). These words will appear as + `oov_char` value in the dataset. 0 means no words are + skipped. Defaults to `0`. + maxlen: int or None. Maximum sequence length. + Any longer sequence will be truncated. None means no truncation. + Defaults to `None`. + test_split: Float between `0.` and `1.`. Fraction of the dataset to be + used as test data. `0.2` means that 20% of the dataset is used as + test data. Defaults to `0.2`. + seed: int. Seed for reproducible data shuffling. + start_char: int. The start of a sequence will be marked with this + character. 0 is usually the padding character. Defaults to `1`. + oov_char: int. The out-of-vocabulary character. + Words that were cut out because of the `num_words` or + `skip_top` limits will be replaced with this character. + index_from: int. Index actual words with this index and higher. + **kwargs: Used for backwards compatibility. + + Returns: + Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. + + **x_train, x_test**: lists of sequences, which are lists of indexes + (integers). If the num_words argument was specific, the maximum + possible index value is `num_words - 1`. If the `maxlen` argument was + specified, the largest possible sequence length is `maxlen`. + + **y_train, y_test**: lists of integer labels (1 or 0). + + Note: The 'out of vocabulary' character is only used for + words that were present in the training set but are not included + because they're not making the `num_words` cut here. + Words that were not seen in the training set but are in the test set + have simply been skipped. + """ + # Legacy support + if "nb_words" in kwargs: + logging.warning( + "The `nb_words` argument in `load_data` " + "has been renamed `num_words`." + ) + num_words = kwargs.pop("nb_words") + if kwargs: + raise TypeError(f"Unrecognized keyword arguments: {str(kwargs)}") + + origin_folder = ( + "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + ) + path = get_file( + path, + origin=origin_folder + "reuters.npz", + file_hash=( # noqa: E501 + "d6586e694ee56d7a4e65172e12b3e987c03096cb01eab99753921ef915959916" + ), + ) + with np.load(path, allow_pickle=True) as f: + xs, labels = f["x"], f["y"] + + rng = np.random.RandomState(seed) + indices = np.arange(len(xs)) + rng.shuffle(indices) + xs = xs[indices] + labels = labels[indices] + + if start_char is not None: + xs = [[start_char] + [w + index_from for w in x] for x in xs] + elif index_from: + xs = [[w + index_from for w in x] for x in xs] + + if maxlen: + xs, labels = _remove_long_seq(maxlen, xs, labels) + + if not num_words: + num_words = max(max(x) for x in xs) + + # by convention, use 2 as OOV word + # reserve 'index_from' (=3 by default) characters: + # 0 (padding), 1 (start), 2 (OOV) + if oov_char is not None: + xs = [ + [w if skip_top <= w < num_words else oov_char for w in x] + for x in xs + ] + else: + xs = [[w for w in x if skip_top <= w < num_words] for x in xs] + + idx = int(len(xs) * (1 - test_split)) + x_train, y_train = np.array(xs[:idx], dtype="object"), np.array( + labels[:idx] + ) + x_test, y_test = np.array(xs[idx:], dtype="object"), np.array(labels[idx:]) + + return (x_train, y_train), (x_test, y_test) + + +@keras_export("keras.datasets.reuters.get_word_index") +def get_word_index(path="reuters_word_index.json"): + """Retrieves a dict mapping words to their index in the Reuters dataset. + + Actual word indices starts from 3, with 3 indices reserved for: + 0 (padding), 1 (start), 2 (oov). + + E.g. word index of 'the' is 1, but the in the actual training data, the + index of 'the' will be 1 + 3 = 4. Vice versa, to translate word indices in + training data back to words using this mapping, indices need to substract 3. + + Args: + path: where to cache the data (relative to `~/.keras/dataset`). + + Returns: + The word index dictionary. Keys are word strings, values are their + index. + """ + origin_folder = ( + "https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + ) + path = get_file( + path, + origin=origin_folder + "reuters_word_index.json", + file_hash="4d44cc38712099c9e383dc6e5f11a921", + ) + with open(path) as f: + return json.load(f) + + +@keras_export("keras.datasets.reuters.get_label_names") +def get_label_names(): + """Returns labels as a list of strings with indices matching training data. + + Reference: + + - [Reuters Dataset](https://martin-thoma.com/nlp-reuters/) + """ + return ( + "cocoa", + "grain", + "veg-oil", + "earn", + "acq", + "wheat", + "copper", + "housing", + "money-supply", + "coffee", + "sugar", + "trade", + "reserves", + "ship", + "cotton", + "carcass", + "crude", + "nat-gas", + "cpi", + "money-fx", + "interest", + "gnp", + "meal-feed", + "alum", + "oilseed", + "gold", + "tin", + "strategic-metal", + "livestock", + "retail", + "ipi", + "iron-steel", + "rubber", + "heat", + "jobs", + "lei", + "bop", + "zinc", + "orange", + "pet-chem", + "dlr", + "gas", + "silver", + "wpi", + "hog", + "lead", + ) diff --git a/keras/distribute/BUILD b/keras/distribute/BUILD index 63b7fd485342..7c5d1c04714d 100644 --- a/keras/distribute/BUILD +++ b/keras/distribute/BUILD @@ -2,11 +2,13 @@ # keras/distribute package is intended to serve as the centralized place for things # related to dist-strat used by Keras.. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "distribute_py_test") load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test") # buildifier: disable=same-origin-load package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove this deps when distribute test are converted to integration test. default_visibility = [ "//keras:friends", @@ -26,7 +28,6 @@ py_library( srcs_version = "PY3", deps = [ ":distribute_coordinator_utils", - ":sidecar_evaluator", "//:expect_tensorflow_installed", "//keras:backend", "//keras:callbacks", @@ -63,8 +64,8 @@ py_library( srcs_version = "PY3", deps = [ "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_experimental:optimizer", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers", + "//keras/optimizers/legacy:optimizers", ], ) @@ -121,6 +122,19 @@ py_library( ], ) +cuda_py_test( + name = "model_checkpoint_test", + srcs = ["model_checkpoint_test.py"], + python_version = "PY3", + shard_count = 4, + deps = [ + ":multi_worker_testing_utils", + ":worker_training_state", + "//:expect_tensorflow_installed", + "//keras", + ], +) + cuda_py_test( name = "worker_training_state_test", srcs = ["worker_training_state_test.py"], @@ -144,7 +158,7 @@ distribute_py_test( ], deps = [ "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", ], ) @@ -174,12 +188,14 @@ cuda_py_test( distribute_py_test( name = "ctl_correctness_test", srcs = ["ctl_correctness_test.py"], + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "ctl_correctness_test.py", shard_count = 10, tags = [ "multi_and_single_gpu", "no_cuda_asan", # times out - "no_oss", # TODO(b/226938240): Timeout "nomultivm", # TODO(b/170502145) ], deps = [ @@ -246,7 +262,7 @@ distribute_py_test( ":strategy_combinations", "//:expect_absl_installed", "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", ], ) @@ -273,13 +289,16 @@ distribute_py_test( size = "medium", srcs = ["keras_premade_models_test.py"], disable_mlir_bridge = False, + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, full_precision = True, main = "keras_premade_models_test.py", shard_count = 8, tags = [ "multi_and_single_gpu", - "no_oss", # TODO(b/226938240): Reenable "nomultivm", # TODO(b/170502145) + "requires-mem:28g", # spawns multiple processes. ], deps = [ ":distribute_strategy_test_lib", @@ -415,7 +434,6 @@ distribute_py_test( shard_count = 16, tags = [ "multi_and_single_gpu", - "no_oss", # TODO(b/226938240): Reenable "no_rocm", # times out on ROCm "no_windows_gpu", "noasan", # TODO(b/337374867) fails with -fsanitize=null @@ -434,6 +452,9 @@ distribute_py_test( name = "keras_metrics_test", srcs = ["keras_metrics_test.py"], disable_mlir_bridge = False, + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "keras_metrics_test.py", shard_count = 8, tags = [ @@ -475,6 +496,7 @@ distribute_py_test( shard_count = 31, tags = [ "multi_and_single_gpu", + "no_oss", # TODO(b/277925387) "no_rocm", # Would require size large, but that effectively disables the test for presubmits. "no_windows_gpu", "noasan", # TODO(b/337374867) fails with -fsanitize=null @@ -536,6 +558,7 @@ distribute_py_test( tags = [ "multi_and_single_gpu", "no_cuda_asan", # times out + "no_pip", # The test imports distribute_strategy_test which is not in the pip package. "no_windows_gpu", "nomultivm", # TODO(b/170502145) "notsan", @@ -645,7 +668,7 @@ cuda_py_test( "//keras:callbacks", "//keras:engine", "//keras/optimizers", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/utils:kpl_test_utils", ], ) @@ -675,7 +698,7 @@ py_library( deps = [ "//:expect_tensorflow_installed", "//keras", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", ], ) @@ -766,6 +789,27 @@ distribute_py_test( ], ) +distribute_py_test( + name = "parameter_server_exact_evaluation_test", + srcs = ["parameter_server_exact_evaluation_test.py"], + python_version = "PY3", + shard_count = 29, + tags = [ + "multi_and_single_gpu", + "no_cuda_asan", # TODO(b/186361027) + "no_oss", # TODO(b/186248973) + "no_tfrt", + "nomultivm", # TODO(b/170502145) + "notpu", + ], + deps = [ + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_utils", + "//keras/utils:dataset_creator", + ], +) + distribute_py_test( name = "dataset_creator_model_fit_test", srcs = ["dataset_creator_model_fit_test.py"], @@ -838,30 +882,6 @@ tf_py_test( ], ) -py_library( - name = "sidecar_evaluator", - srcs = ["sidecar_evaluator.py"], - srcs_version = "PY3", - deps = [ - "//:expect_tensorboard_installed", - "//:expect_tensorflow_installed", - ], -) - -tf_py_test( - name = "sidecar_evaluator_test", - size = "medium", - srcs = ["sidecar_evaluator_test.py"], - python_version = "PY3", - deps = [ - ":sidecar_evaluator", - "//:expect_absl_installed", - "//:expect_tensorflow_installed", - "//keras", - "//keras/testing_infra:test_utils", - ], -) - py_library( name = "strategy_combinations", srcs = ["strategy_combinations.py"], @@ -905,7 +925,7 @@ py_library( "//keras/engine", "//keras/layers/core", "//keras/layers/preprocessing:string_lookup", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/utils:dataset_creator", ], ) diff --git a/keras/distribute/__init__.py b/keras/distribute/__init__.py index 9348b6fe3d01..808055096522 100644 --- a/keras/distribute/__init__.py +++ b/keras/distribute/__init__.py @@ -13,6 +13,3 @@ # limitations under the License. # ============================================================================== """Keras' Distribution Strategy library.""" - -# pylint: disable=unused-import -from keras.distribute import sidecar_evaluator diff --git a/keras/distribute/checkpointing_test.py b/keras/distribute/checkpointing_test.py index b03ce0703e02..a3d586fbc749 100644 --- a/keras/distribute/checkpointing_test.py +++ b/keras/distribute/checkpointing_test.py @@ -16,113 +16,117 @@ import os import tensorflow.compat.v2 as tf - from absl.testing import parameterized -from keras.optimizers.optimizer_v2 import adam +from keras.optimizers.legacy import adam -class TrainingCheckpointTests(tf.test.TestCase, parameterized.TestCase): - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.tpu_strategy, - tf.__internal__.distribute.combinations.tpu_strategy_packed_var, - tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, - ], - mode=["eager"])) - def testCheckpointRestoreOptimizerSlots(self, distribution): - def state(): - with distribution.scope(): - v = tf.Variable(tf.random.normal([])) - opt = adam.Adam(0.001) - - @tf.function - def step(): - def f(): - with tf.GradientTape() as tape: - loss = v + v - gradients = tape.gradient(loss, [v]) - opt.apply_gradients(zip(gradients, [v])) - - distribution.run(f) - - return v, opt, step - - def checkpoint(): - v, opt, step = state() - step() - - # Save random weights into checkpoint. - checkpoint = tf.train.Checkpoint(v=v, opt=opt) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - with self.test_session(): - save_path = checkpoint.save(prefix) - return save_path - - save_path = checkpoint() - - v, opt, step = state() - checkpoint = tf.train.Checkpoint(v=v, opt=opt) - # Restore from the checkpoint inside a distribution.scope(). - with self.test_session(): - with distribution.scope(): - checkpoint.restore(save_path) - step() - slot = opt.get_slot(v, "m") - self.assertEqual(v._distribute_strategy, slot._distribute_strategy) - - v, opt, step = state() - checkpoint = tf.train.Checkpoint(v=v, opt=opt) - # Restore from the checkpoint outside a distribution.scope(). - with self.test_session(): - with self.assertRaisesRegex( - ValueError, "optimizer slot variable under the scope"): - checkpoint.restore(save_path) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.cloud_tpu_strategy, - tf.__internal__.distribute.combinations.tpu_strategy, - tf.__internal__.distribute.combinations.tpu_strategy_packed_var, - tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, - ], - mode=["eager"])) - def testCheckpointSaveRestoreIoDevice(self, distribution): - - def state(): - with distribution.scope(): - v = tf.Variable(tf.random.normal([])) - return v - - ckpt_options = tf.train.CheckpointOptions( - experimental_io_device="/job:localhost") - - def checkpoint(): - v = state() - # Save random weights into checkpoint. - checkpoint = tf.train.Checkpoint(v=v) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - with self.test_session(): - save_path = checkpoint.save(prefix, options=ckpt_options) - return save_path - - save_path = checkpoint() - - v = state() - checkpoint = tf.train.Checkpoint(v=v) - # Restore from the checkpoint inside a distribution.scope(). - # Check that restore works without error. - with self.test_session(): - with distribution.scope(): - checkpoint.restore(save_path, options=ckpt_options) +class TrainingCheckpointTests(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.tpu_strategy, # noqa: E501 + tf.__internal__.distribute.combinations.tpu_strategy_packed_var, # noqa: E501 + tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, # noqa: E501 + ], + mode=["eager"], + ) + ) + def testCheckpointRestoreOptimizerSlots(self, distribution): + def state(): + with distribution.scope(): + v = tf.Variable(tf.random.normal([])) + opt = adam.Adam(0.001) + + @tf.function + def step(): + def f(): + with tf.GradientTape() as tape: + loss = v + v + gradients = tape.gradient(loss, [v]) + opt.apply_gradients(zip(gradients, [v])) + + distribution.run(f) + + return v, opt, step + + def checkpoint(): + v, opt, step = state() + step() + + # Save random weights into checkpoint. + checkpoint = tf.train.Checkpoint(v=v, opt=opt) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + with self.test_session(): + save_path = checkpoint.save(prefix) + return save_path + + save_path = checkpoint() + + v, opt, step = state() + checkpoint = tf.train.Checkpoint(v=v, opt=opt) + # Restore from the checkpoint inside a distribution.scope(). + with self.test_session(): + with distribution.scope(): + checkpoint.restore(save_path) + step() + slot = opt.get_slot(v, "m") + self.assertEqual(v._distribute_strategy, slot._distribute_strategy) + + v, opt, step = state() + checkpoint = tf.train.Checkpoint(v=v, opt=opt) + # Restore from the checkpoint outside a distribution.scope(). + with self.test_session(): + with self.assertRaisesRegex( + ValueError, "optimizer slot variable under the scope" + ): + checkpoint.restore(save_path) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.cloud_tpu_strategy, # noqa: E501 + tf.__internal__.distribute.combinations.tpu_strategy, # noqa: E501 + tf.__internal__.distribute.combinations.tpu_strategy_packed_var, # noqa: E501 + tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, # noqa: E501 + ], + mode=["eager"], + ) + ) + def testCheckpointSaveRestoreIoDevice(self, distribution): + def state(): + with distribution.scope(): + v = tf.Variable(tf.random.normal([])) + return v + + ckpt_options = tf.train.CheckpointOptions( + experimental_io_device="/job:localhost" + ) + + def checkpoint(): + v = state() + # Save random weights into checkpoint. + checkpoint = tf.train.Checkpoint(v=v) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + with self.test_session(): + save_path = checkpoint.save(prefix, options=ckpt_options) + return save_path + + save_path = checkpoint() + + v = state() + checkpoint = tf.train.Checkpoint(v=v) + # Restore from the checkpoint inside a distribution.scope(). + # Check that restore works without error. + with self.test_session(): + with distribution.scope(): + checkpoint.restore(save_path, options=ckpt_options) if __name__ == "__main__": - tf.compat.v1.enable_eager_execution() - tf.test.main() + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/distribute/collective_all_reduce_strategy_test.py b/keras/distribute/collective_all_reduce_strategy_test.py index da485d062f2b..42992cef34b9 100644 --- a/keras/distribute/collective_all_reduce_strategy_test.py +++ b/keras/distribute/collective_all_reduce_strategy_test.py @@ -15,58 +15,56 @@ """Tests for CollectiveAllReduceStrategy.""" import tensorflow.compat.v2 as tf - from absl.testing import parameterized + from keras import layers -from keras.testing_infra import test_utils from keras.engine import training -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras +from keras.optimizers.legacy import gradient_descent as gradient_descent_keras +from keras.testing_infra import test_utils @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( strategy=[ - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, # noqa: E501 ], - mode=['eager'])) + mode=["eager"], + ) +) class MultiWorkerMirroredStrategyTest(tf.test.TestCase, parameterized.TestCase): + def testFitWithoutStepsPerEpochPartialBatch(self, strategy): + def _model_fn(): + x = layers.Input(shape=(1,), name="input") + y = layers.Dense(1, name="dense")(x) + model = training.Model(x, y) + return model - def testFitWithoutStepsPerEpochPartialBatch(self, strategy): - - def _model_fn(): - x = layers.Input(shape=(1,), name='input') - y = layers.Dense(1, name='dense')(x) - model = training.Model(x, y) - return model - - def _get_dataset(): - inputs = tf.expand_dims( - tf.constant(range(10)), axis=1) - targets = tf.expand_dims( - tf.constant(range(10)), axis=1) - # Make global batch size 12 for 2 replicas and a non-repeated dataset - # with 10 elements so that we have partial batch - dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch( - 12, drop_remainder=False) - return dataset + def _get_dataset(): + inputs = tf.expand_dims(tf.constant(range(10)), axis=1) + targets = tf.expand_dims(tf.constant(range(10)), axis=1) + # Make global batch size 12 for 2 replicas and a non-repeated + # dataset with 10 elements so that we have partial batch + dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(12, drop_remainder=False) + return dataset - with strategy.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = _model_fn() - loss = 'mse' - metrics = ['mae'] - model.compile(optimizer, loss, metrics=metrics) - dataset = _get_dataset() - kernel_before = model.get_weights()[0][0] - model.fit(dataset, epochs=10) - kernel_after = model.get_weights()[0][0] - self.assertNotEqual(kernel_before, kernel_after) - self.assertGreater(abs(kernel_before - 1), abs(kernel_after - 1)) + with strategy.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = _model_fn() + loss = "mse" + metrics = ["mae"] + model.compile(optimizer, loss, metrics=metrics) + dataset = _get_dataset() + kernel_before = model.get_weights()[0][0] + model.fit(dataset, epochs=10) + kernel_after = model.get_weights()[0][0] + self.assertNotEqual(kernel_before, kernel_after) + self.assertGreater(abs(kernel_before - 1), abs(kernel_after - 1)) -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/ctl_correctness_test.py b/keras/distribute/ctl_correctness_test.py index d5be37b534a5..48b15e8fb245 100644 --- a/keras/distribute/ctl_correctness_test.py +++ b/keras/distribute/ctl_correctness_test.py @@ -14,7 +14,10 @@ # ============================================================================== """Custom Training Loop correctness test.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras import optimizers from keras.applications import resnet_v2 @@ -22,9 +25,8 @@ from keras.distribute import optimizer_combinations from keras.distribute import strategy_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.ops.losses import losses_impl _NUM_SAMPLES = 66 @@ -35,382 +37,445 @@ class MaybeStrategyScope: - """Provides a context allowing no distribution strategy.""" + """Provides a context allowing no distribution strategy.""" - def __init__(self, strategy): - self._strategy = strategy - self._scope = None + def __init__(self, strategy): + self._strategy = strategy + self._scope = None - def __enter__(self): - if self._strategy: - self._scope = self._strategy.scope() - self._scope.__enter__() + def __enter__(self): + if self._strategy: + self._scope = self._strategy.scope() + self._scope.__enter__() - def __exit__(self, exc_type, value, traceback): - if self._strategy: - self._scope.__exit__(exc_type, value, traceback) - self._scope = None + def __exit__(self, exc_type, value, traceback): + if self._strategy: + self._scope.__exit__(exc_type, value, traceback) + self._scope = None def get_model(sync_batchnorm=False): - model = keras.Sequential() - model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,))) - model.add(keras.layers.Dense( - 10, activation='relu', - kernel_regularizer=keras.regularizers.l2(1e-4))) - if sync_batchnorm: - model.add(keras.layers.SyncBatchNormalization()) - else: - model.add(keras.layers.BatchNormalization()) - model.add(keras.layers.Dense(10, activation='relu')) - model.add(keras.layers.Dense(1)) - return model + model = keras.Sequential() + model.add(keras.layers.Dense(10, activation="relu", input_shape=(1,))) + model.add( + keras.layers.Dense( + 10, + activation="relu", + kernel_regularizer=keras.regularizers.l2(1e-4), + ) + ) + if sync_batchnorm: + model.add(keras.layers.BatchNormalization(synchronized=True)) + else: + model.add(keras.layers.BatchNormalization()) + model.add(keras.layers.Dense(10, activation="relu")) + model.add(keras.layers.Dense(1)) + return model def get_data(): - x_train = np.random.rand(_NUM_SAMPLES, 1) - y_train = 3 * x_train - x_train = x_train.astype('float32') - y_train = y_train.astype('float32') - train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - train_dataset = train_dataset.batch(_BATCH_SIZE) - return train_dataset + x_train = np.random.rand(_NUM_SAMPLES, 1) + y_train = 3 * x_train + x_train = x_train.astype("float32") + y_train = y_train.astype("float32") + train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + train_dataset = train_dataset.batch(_BATCH_SIZE) + return train_dataset def compute_loss(labels, logits, reg_losses): - pred_loss = keras.losses.mean_squared_error(labels, logits) - scaled_loss = tf.nn.compute_average_loss( - pred_loss, global_batch_size=_BATCH_SIZE) - l2_loss = tf.nn.scale_regularization_loss(reg_losses) - return scaled_loss + l2_loss - - -def iteration_inside_func(initial_weights, - dataset, - optimizer_fn, - iteration_type, - strategy=None, - sync_batchnorm=None, - jit_compile=False): - """Helper function to test iterating over data inside a tf.function.""" - with MaybeStrategyScope(strategy): - if strategy and sync_batchnorm: - model = get_model(sync_batchnorm) - else: - model = get_model() - model.set_weights(initial_weights) - optimizer = optimizer_fn() - - training_accuracy = keras.metrics.CategoricalAccuracy( - 'training_accuracy', dtype=tf.float32) - - @tf.function - def train_epoch(dist_input): - """Training StepFn.""" - - @tf.function(jit_compile=jit_compile) - def step_fn(inputs): - samples, labels = inputs - with tf.GradientTape() as tape: - logits = model(samples) - loss = compute_loss(labels, logits, model.losses) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(grads, model.trainable_variables)) - training_accuracy.update_state(labels, logits) - return loss - - total_loss = 0.0 - num_batches = 0 - if iteration_type == 'dataset': - for x in dist_input: - if strategy: - per_replica_losses = strategy.run(step_fn, args=(x,)) - total_loss += strategy.reduce(tf.distribute.ReduceOp.SUM, - per_replica_losses, - axis=None) - else: - total_loss += step_fn(x) - num_batches += 1 - else: - iterator = iter(dist_input) - for _ in range(_STEPS_PER_EPOCH): - if strategy: - per_replica_losses = strategy.run(step_fn, args=(next(iterator),)) - total_loss += strategy.reduce(tf.distribute.ReduceOp.SUM, - per_replica_losses, - axis=None) - else: - total_loss += step_fn(next(iterator)) - num_batches += 1 - - return total_loss / tf.cast(num_batches, dtype=tf.float32) - - if strategy: - dataset = strategy.experimental_distribute_dataset(dataset) - - for _ in range(_NUM_EPOCHS): - loss = train_epoch(dataset) - - return (model.get_weights(), - loss, - training_accuracy.result()) - - -def iteration_outside_func(initial_weights, - dataset, - optimizer_fn, - iteration_type, - strategy=None, - sync_batchnorm=None, - jit_compile=False): - """Helper function to test iterating over data outside a tf.function.""" - with MaybeStrategyScope(strategy): - model = get_model(sync_batchnorm=sync_batchnorm) - model.set_weights(initial_weights) - optimizer = optimizer_fn() - - training_accuracy = keras.metrics.CategoricalAccuracy( - 'training_accuracy', dtype=tf.float32) - - @tf.function - def train_step(dist_inputs): - """Training StepFn.""" - - @tf.function(jit_compile=jit_compile) - def step_fn(inputs): - samples, labels = inputs - with tf.GradientTape() as tape: - logits = model(samples) - loss = compute_loss(labels, logits, model.losses) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(grads, model.trainable_variables)) - training_accuracy.update_state(labels, logits) - return loss - - if strategy: - per_replica_losses = strategy.run(step_fn, args=(dist_inputs,)) - return strategy.reduce(tf.distribute.ReduceOp.SUM, - per_replica_losses, - axis=None) - else: - return step_fn(dist_inputs) - - if strategy: - dataset = strategy.experimental_distribute_dataset(dataset) - - total_loss = 0.0 - num_batches = 0 - if iteration_type == 'dataset': - for _ in range(_NUM_EPOCHS): - for x in dataset: - total_loss += train_step(x) - num_batches += 1 - else: - for _ in range(_NUM_EPOCHS): - iterator = iter(dataset) - for _ in range(_STEPS_PER_EPOCH): - total_loss += train_step(next(iterator)) - num_batches += 1 - - return (model.get_weights(), + pred_loss = keras.losses.mean_squared_error(labels, logits) + scaled_loss = tf.nn.compute_average_loss( + pred_loss, global_batch_size=_BATCH_SIZE + ) + l2_loss = tf.nn.scale_regularization_loss(reg_losses) + return scaled_loss + l2_loss + + +def iteration_inside_func( + initial_weights, + dataset, + optimizer_fn, + iteration_type, + strategy=None, + sync_batchnorm=None, + jit_compile=False, +): + """Helper function to test iterating over data inside a tf.function.""" + with MaybeStrategyScope(strategy): + if strategy and sync_batchnorm: + model = get_model(sync_batchnorm) + else: + model = get_model() + model.set_weights(initial_weights) + optimizer = optimizer_fn() + + training_accuracy = keras.metrics.CategoricalAccuracy( + "training_accuracy", dtype=tf.float32 + ) + + @tf.function + def train_epoch(dist_input): + """Training StepFn.""" + + @tf.function(jit_compile=jit_compile) + def step_fn(inputs): + samples, labels = inputs + with tf.GradientTape() as tape: + logits = model(samples) + loss = compute_loss(labels, logits, model.losses) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + training_accuracy.update_state(labels, logits) + return loss + + total_loss = 0.0 + num_batches = 0 + if iteration_type == "dataset": + for x in dist_input: + if strategy: + per_replica_losses = strategy.run(step_fn, args=(x,)) + total_loss += strategy.reduce( + tf.distribute.ReduceOp.SUM, + per_replica_losses, + axis=None, + ) + else: + total_loss += step_fn(x) + num_batches += 1 + else: + iterator = iter(dist_input) + for _ in range(_STEPS_PER_EPOCH): + if strategy: + per_replica_losses = strategy.run( + step_fn, args=(next(iterator),) + ) + total_loss += strategy.reduce( + tf.distribute.ReduceOp.SUM, + per_replica_losses, + axis=None, + ) + else: + total_loss += step_fn(next(iterator)) + num_batches += 1 + + return total_loss / tf.cast(num_batches, dtype=tf.float32) + + if strategy: + dataset = strategy.experimental_distribute_dataset(dataset) + + for _ in range(_NUM_EPOCHS): + loss = train_epoch(dataset) + + return (model.get_weights(), loss, training_accuracy.result()) + + +def iteration_outside_func( + initial_weights, + dataset, + optimizer_fn, + iteration_type, + strategy=None, + sync_batchnorm=None, + jit_compile=False, +): + """Helper function to test iterating over data outside a tf.function.""" + with MaybeStrategyScope(strategy): + model = get_model(sync_batchnorm=sync_batchnorm) + model.set_weights(initial_weights) + optimizer = optimizer_fn() + + training_accuracy = keras.metrics.CategoricalAccuracy( + "training_accuracy", dtype=tf.float32 + ) + + @tf.function + def train_step(dist_inputs): + """Training StepFn.""" + + @tf.function(jit_compile=jit_compile) + def step_fn(inputs): + samples, labels = inputs + with tf.GradientTape() as tape: + logits = model(samples) + loss = compute_loss(labels, logits, model.losses) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + training_accuracy.update_state(labels, logits) + return loss + + if strategy: + per_replica_losses = strategy.run(step_fn, args=(dist_inputs,)) + return strategy.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None + ) + else: + return step_fn(dist_inputs) + + if strategy: + dataset = strategy.experimental_distribute_dataset(dataset) + + total_loss = 0.0 + num_batches = 0 + if iteration_type == "dataset": + for _ in range(_NUM_EPOCHS): + for x in dataset: + total_loss += train_step(x) + num_batches += 1 + else: + for _ in range(_NUM_EPOCHS): + iterator = iter(dataset) + for _ in range(_STEPS_PER_EPOCH): + total_loss += train_step(next(iterator)) + num_batches += 1 + + return ( + model.get_weights(), total_loss / tf.cast(num_batches, dtype=tf.float32), - training_accuracy.result()) + training_accuracy.result(), + ) @test_utils.run_v2_only -class TestDistributionStrategyDnnCorrectness(tf.test.TestCase, - parameterized.TestCase): - """Test custom training loop correctness with a simple DNN model.""" - - def setUp(self): - super().setUp() - np.random.seed(_RANDOM_SEED) - tf.compat.v1.set_random_seed(_RANDOM_SEED) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.all_strategies, - optimizer_fn=optimizer_combinations.optimizers_v2, - mode=['eager'], - iteration_type=['iterator', 'dataset'], - inside_func=[False, True], - sync_batchnorm=[True, False], - jit_compile=[False]) + tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.multiworker_strategies, - optimizer_fn=[ - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, - optimizer_combinations.adagrad_optimizer_keras_v2_fn, - optimizer_combinations.adam_experimental_fn, - ], - mode=['eager'], - iteration_type=['iterator', 'dataset'], - inside_func=[False, True], - sync_batchnorm=[True, False], - jit_compile=[False]) + - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy_gpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - ], - optimizer_fn=[ - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, - optimizer_combinations.adagrad_optimizer_keras_v2_fn - ], - mode=['eager'], - iteration_type=['iterator', 'dataset'], - inside_func=[False, True], - sync_batchnorm=[True, False], - jit_compile=[True])) - def test_dnn_correctness_minus_tpus(self, distribution, optimizer_fn, - iteration_type, inside_func, - sync_batchnorm, jit_compile): - # TODO(anjs): Identify why this particular V1 optimizer needs a higher tol. - if 'FtrlV1' in optimizer_fn._name and 'TPU' in type(distribution).__name__: - self.skipTest('Reduced tolerance of the order of 1e-1 required.') - self.dnn_correctness(distribution, optimizer_fn, iteration_type, - inside_func, sync_batchnorm, jit_compile) - - def dnn_correctness(self, - distribution, - optimizer_fn, - iteration_type, - inside_func, - sync_batchnorm=None, - jit_compile=False): - model = get_model(sync_batchnorm) - initial_weights = model.get_weights() - dataset = get_data() - if inside_func: - iteration_func = iteration_inside_func - else: - iteration_func = iteration_outside_func - - wts_with_ds, loss_with_ds, acc_with_ds = iteration_func( - initial_weights, - dataset, +class TestDistributionStrategyDnnCorrectness( + tf.test.TestCase, parameterized.TestCase +): + """Test custom training loop correctness with a simple DNN model.""" + + def setUp(self): + super().setUp() + np.random.seed(_RANDOM_SEED) + tf.compat.v1.set_random_seed(_RANDOM_SEED) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.all_strategies, + optimizer_fn=optimizer_combinations.optimizers_v2, + mode=["eager"], + iteration_type=["iterator", "dataset"], + inside_func=[False, True], + sync_batchnorm=[True, False], + jit_compile=[False], + ) + + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.multiworker_strategies, + optimizer_fn=[ + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.adagrad_optimizer_keras_v2_fn, + optimizer_combinations.adam_experimental_fn, + ], + mode=["eager"], + iteration_type=["iterator", "dataset"], + inside_func=[False, True], + sync_batchnorm=[True, False], + jit_compile=[False], + ) + + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy_gpu, + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + ], + optimizer_fn=[ + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.adagrad_optimizer_keras_v2_fn, + ], + mode=["eager"], + iteration_type=["iterator", "dataset"], + inside_func=[False, True], + sync_batchnorm=[True, False], + jit_compile=[True], + ) + ) + def test_dnn_correctness_minus_tpus( + self, + distribution, optimizer_fn, iteration_type, - strategy=distribution, - sync_batchnorm=sync_batchnorm, - jit_compile=jit_compile) - wts, loss, acc = iteration_func( - initial_weights, - dataset, + inside_func, + sync_batchnorm, + jit_compile, + ): + # TODO(anjs): Identify why this particular V1 optimizer needs a higher + # tol. + if ( + "FtrlV1" in optimizer_fn._name + and "TPU" in type(distribution).__name__ + ): + self.skipTest("Reduced tolerance of the order of 1e-1 required.") + self.dnn_correctness( + distribution, + optimizer_fn, + iteration_type, + inside_func, + sync_batchnorm, + jit_compile, + ) + + def dnn_correctness( + self, + distribution, optimizer_fn, iteration_type, - sync_batchnorm=sync_batchnorm, - jit_compile=False) - - self.assertAllClose(wts, wts_with_ds, atol=1e-3, rtol=1e-3) - self.assertAllClose(loss, loss_with_ds, atol=1e-3, rtol=1e-3) - self.assertAllClose(acc, acc_with_ds, atol=1e-3, rtol=1e-3) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus, - ], - mode=['eager'], - )) - def test_fused_batch_norm_uneven_batch(self, distribution): - """Test that fused batch norm works when the last device may get empty data. - - Adapted from https://www.tensorflow.org/tutorials/distribute/custom_training - but using ResNet, which uses fused batchnorm, as the model. - - Arguments: - distribution: distribute test configuration - """ - (train_images, train_labels), _ = fashion_mnist.load_data() - # add channel dimension to make 2D data into 3D, since some ops of the model - # require it. - train_images = train_images[..., None] - train_images = train_images / np.float32(255) - - # Padding images because ResNet requires a minimal shape of (32, 32) - padded_train_images = np.concatenate([ - np.zeros((len(train_images), 2, 28, 1)), - train_images, - np.zeros((len(train_images), 2, 28, 1)) - ], axis=1) - padded_train_images = np.concatenate([ - np.zeros((len(train_images), 32, 2, 1)), - padded_train_images, - np.zeros((len(train_images), 32, 2, 1)) - ], axis=2) - - buffer_size = len(train_images) - global_batch_size = distribution.num_replicas_in_sync - num_samples = global_batch_size - 1 - - epochs = 2 - - # Keep only the first images, so that the last GPU receives an empty batch - padded_train_images = padded_train_images[:num_samples] - train_labels = train_labels[:num_samples] - - train_dataset = tf.data.Dataset.from_tensor_slices( - (padded_train_images, - train_labels)).shuffle(buffer_size).batch(global_batch_size) - train_dist_dataset = distribution.experimental_distribute_dataset( - train_dataset) - - def create_model(): - inputs = keras.Input((32, 32, 1)) - preprocessed = keras.layers.Conv2D(3, (1, 1))( - inputs) # ResNet requires 3 channels - features = resnet_v2.ResNet50V2( - include_top=False, - input_tensor=preprocessed, - pooling='avg', - weights=None).output - return keras.Model(inputs, features) - - with distribution.scope(): - # Set reduction to `none` so we can do the reduction afterwards and divide - # by global batch size. - loss_object = keras.losses.SparseCategoricalCrossentropy( - from_logits=True, - reduction=losses_impl.Reduction.NONE) - - def compute_resnet_loss(labels, predictions): - per_example_loss = loss_object(labels, predictions) - return tf.nn.compute_average_loss( - per_example_loss, global_batch_size=global_batch_size) - - model = create_model() - - optimizer = optimizers.adam_v2.Adam() - - def train_step(inputs): - images, labels = inputs - - with tf.GradientTape() as tape: - predictions = model(images, training=True) - loss = compute_resnet_loss(labels, predictions) - - gradients = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - return loss - - @tf.function - def distributed_train_step(dataset_inputs): - per_replica_losses = distribution.run(train_step, args=(dataset_inputs,)) - return distribution.reduce( - tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) - - for epoch in range(epochs): - # Train loop - total_loss = 0.0 - num_batches = 0 - for x in train_dist_dataset: - total_loss += distributed_train_step(x) - num_batches += 1 - train_loss = total_loss / num_batches - - print(f'Epoch {epoch+1}, Loss: {train_loss}') - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + inside_func, + sync_batchnorm=None, + jit_compile=False, + ): + model = get_model(sync_batchnorm) + initial_weights = model.get_weights() + dataset = get_data() + if inside_func: + iteration_func = iteration_inside_func + else: + iteration_func = iteration_outside_func + + wts_with_ds, loss_with_ds, acc_with_ds = iteration_func( + initial_weights, + dataset, + optimizer_fn, + iteration_type, + strategy=distribution, + sync_batchnorm=sync_batchnorm, + jit_compile=jit_compile, + ) + wts, loss, acc = iteration_func( + initial_weights, + dataset, + optimizer_fn, + iteration_type, + sync_batchnorm=sync_batchnorm, + jit_compile=False, + ) + + self.assertAllClose(wts, wts_with_ds, atol=1e-3, rtol=1e-3) + self.assertAllClose(loss, loss_with_ds, atol=1e-3, rtol=1e-3) + self.assertAllClose(acc, acc_with_ds, atol=1e-3, rtol=1e-3) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + ], + mode=["eager"], + ) + ) + def test_fused_batch_norm_uneven_batch(self, distribution): + """Test that fused BN works when the last device gets empty data. + + Adapted from + https://www.tensorflow.org/tutorials/distribute/custom_training + but using ResNet, which uses fused batchnorm, as the model. + + Arguments: + distribution: distribute test configuration + """ + self.skipTest("TODO(b/234354008): Requires fetching data from network.") + (train_images, train_labels), _ = fashion_mnist.load_data() + # add channel dimension to make 2D data into 3D, since some ops of the + # model require it. + train_images = train_images[..., None] + train_images = train_images / np.float32(255) + + # Padding images because ResNet requires a minimal shape of (32, 32) + padded_train_images = np.concatenate( + [ + np.zeros((len(train_images), 2, 28, 1)), + train_images, + np.zeros((len(train_images), 2, 28, 1)), + ], + axis=1, + ) + padded_train_images = np.concatenate( + [ + np.zeros((len(train_images), 32, 2, 1)), + padded_train_images, + np.zeros((len(train_images), 32, 2, 1)), + ], + axis=2, + ) + + buffer_size = len(train_images) + global_batch_size = distribution.num_replicas_in_sync + num_samples = global_batch_size - 1 + + epochs = 2 + + # Keep only the first images, so that the last GPU receives an empty + # batch + padded_train_images = padded_train_images[:num_samples] + train_labels = train_labels[:num_samples] + + train_dataset = ( + tf.data.Dataset.from_tensor_slices( + (padded_train_images, train_labels) + ) + .shuffle(buffer_size) + .batch(global_batch_size) + ) + train_dist_dataset = distribution.experimental_distribute_dataset( + train_dataset + ) + + def create_model(): + inputs = keras.Input((32, 32, 1)) + preprocessed = keras.layers.Conv2D(3, (1, 1))( + inputs + ) # ResNet requires 3 channels + features = resnet_v2.ResNet50V2( + include_top=False, + input_tensor=preprocessed, + pooling="avg", + weights=None, + ).output + return keras.Model(inputs, features) + + with distribution.scope(): + # Set reduction to `none` so we can do the reduction afterwards and + # divide by global batch size. + loss_object = keras.losses.SparseCategoricalCrossentropy( + from_logits=True, reduction=losses_impl.Reduction.NONE + ) + + def compute_resnet_loss(labels, predictions): + per_example_loss = loss_object(labels, predictions) + return tf.nn.compute_average_loss( + per_example_loss, global_batch_size=global_batch_size + ) + + model = create_model() + + optimizer = optimizers.adam_legacy.Adam() + + def train_step(inputs): + images, labels = inputs + + with tf.GradientTape() as tape: + predictions = model(images, training=True) + loss = compute_resnet_loss(labels, predictions) + + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + return loss + + @tf.function + def distributed_train_step(dataset_inputs): + per_replica_losses = distribution.run( + train_step, args=(dataset_inputs,) + ) + return distribution.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None + ) + + for epoch in range(epochs): + # Train loop + total_loss = 0.0 + num_batches = 0 + for x in train_dist_dataset: + total_loss += distributed_train_step(x) + num_batches += 1 + train_loss = total_loss / num_batches + + print(f"Epoch {epoch+1}, Loss: {train_loss}") + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/custom_training_loop_metrics_test.py b/keras/distribute/custom_training_loop_metrics_test.py index c7957dd87c02..a48a7d6b1b8f 100644 --- a/keras/distribute/custom_training_loop_metrics_test.py +++ b/keras/distribute/custom_training_loop_metrics_test.py @@ -14,110 +14,120 @@ # ============================================================================== """Tests for custom training loops.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np -from tensorflow.python.framework import test_util as tf_test_utils + from keras import metrics from keras.distribute import strategy_combinations +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) -class KerasMetricsTest(tf.test.TestCase, parameterized.TestCase): - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.all_strategies + - strategy_combinations.multiworker_strategies, - mode=["eager"] - )) - def test_multiple_keras_metrics_experimental_run(self, distribution): - with distribution.scope(): - loss_metric = metrics.Mean("loss", dtype=np.float32) - loss_metric_2 = metrics.Mean("loss_2", dtype=np.float32) - - @tf.function - def train_step(): - def step_fn(): - loss = tf.constant(5.0, dtype=np.float32) - loss_metric.update_state(loss) - loss_metric_2.update_state(loss) - - distribution.run(step_fn) - - train_step() - self.assertEqual(loss_metric.result().numpy(), - loss_metric_2.result().numpy()) - self.assertEqual(loss_metric.result().numpy(), 5.0) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.all_strategies+ - strategy_combinations.multiworker_strategies, - mode=["eager"] - )) - def test_update_keras_metric_declared_in_strategy_scope(self, distribution): - with distribution.scope(): - metric = metrics.Mean("test_metric", dtype=np.float32) - - dataset = tf.data.Dataset.range(10).batch(2) - dataset = distribution.experimental_distribute_dataset(dataset) - - @tf.function - def step_fn(i): - metric.update_state(i) - - for i in dataset: - distribution.run(step_fn, args=(i,)) - - # This should be the mean of integers 0-9 which has a sum of 45 and a count - # of 10 resulting in mean of 4.5. - self.assertEqual(metric.result().numpy(), 4.5) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.all_strategies, - mode=["eager"] - )) - def test_update_keras_metric_outside_strategy_scope_cross_replica( - self, distribution): - metric = metrics.Mean("test_metric", dtype=np.float32) - - with distribution.scope(): - for i in range(10): - metric.update_state(i) - - # This should be the mean of integers 0-9 which has a sum of 45 and a count - # of 10 resulting in mean of 4.5. - self.assertEqual(metric.result().numpy(), 4.5) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.all_strategies, mode=["eager"])) - @tf_test_utils.disable_mlir_bridge( - "TODO(b/168036682): Support dynamic padder") - def test_update_keras_metrics_dynamic_shape(self, distribution): - with distribution.scope(): - metric = metrics.Mean("test_metric", dtype=np.float32) - - dataset = tf.data.Dataset.range(10).batch(2, drop_remainder=False) - - @tf.function - def train_fn(dataset): - weights = tf.constant([0.1, 0.1]) - - def step_fn(i): - metric.update_state(i, weights) - - for i in dataset: - distribution.run(step_fn, args=(i,)) - - train_fn(dataset) - - # This should be the mean of integers 0-9 which has a sum of 45 and a count - # of 10 resulting in mean of 4.5. - self.assertEqual(metric.result().numpy(), 4.5) +class KerasMetricsTest(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.all_strategies + + strategy_combinations.multiworker_strategies, + mode=["eager"], + ) + ) + def test_multiple_keras_metrics_experimental_run(self, distribution): + with distribution.scope(): + loss_metric = metrics.Mean("loss", dtype=np.float32) + loss_metric_2 = metrics.Mean("loss_2", dtype=np.float32) + + @tf.function + def train_step(): + def step_fn(): + loss = tf.constant(5.0, dtype=np.float32) + loss_metric.update_state(loss) + loss_metric_2.update_state(loss) + + distribution.run(step_fn) + + train_step() + self.assertEqual( + loss_metric.result().numpy(), loss_metric_2.result().numpy() + ) + self.assertEqual(loss_metric.result().numpy(), 5.0) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.all_strategies + + strategy_combinations.multiworker_strategies, + mode=["eager"], + ) + ) + def test_update_keras_metric_declared_in_strategy_scope(self, distribution): + with distribution.scope(): + metric = metrics.Mean("test_metric", dtype=np.float32) + + dataset = tf.data.Dataset.range(10).batch(2) + dataset = distribution.experimental_distribute_dataset(dataset) + + @tf.function + def step_fn(i): + metric.update_state(i) + + for i in dataset: + distribution.run(step_fn, args=(i,)) + + # This should be the mean of integers 0-9 which has a sum of 45 and a + # count of 10 resulting in mean of 4.5. + self.assertEqual(metric.result().numpy(), 4.5) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.all_strategies, mode=["eager"] + ) + ) + def test_update_keras_metric_outside_strategy_scope_cross_replica( + self, distribution + ): + metric = metrics.Mean("test_metric", dtype=np.float32) + + with distribution.scope(): + for i in range(10): + metric.update_state(i) + + # This should be the mean of integers 0-9 which has a sum of 45 and a + # count of 10 resulting in mean of 4.5. + self.assertEqual(metric.result().numpy(), 4.5) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.all_strategies, mode=["eager"] + ) + ) + @tf_test_utils.disable_mlir_bridge( + "TODO(b/168036682): Support dynamic padder" + ) + def test_update_keras_metrics_dynamic_shape(self, distribution): + with distribution.scope(): + metric = metrics.Mean("test_metric", dtype=np.float32) + + dataset = tf.data.Dataset.range(10).batch(2, drop_remainder=False) + + @tf.function + def train_fn(dataset): + weights = tf.constant([0.1, 0.1]) + + def step_fn(i): + metric.update_state(i, weights) + + for i in dataset: + distribution.run(step_fn, args=(i,)) + + train_fn(dataset) + + # This should be the mean of integers 0-9 which has a sum of 45 and a + # count of 10 resulting in mean of 4.5. + self.assertEqual(metric.result().numpy(), 4.5) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/custom_training_loop_models_test.py b/keras/distribute/custom_training_loop_models_test.py index 7e6990608eb7..cdcd869b9fab 100644 --- a/keras/distribute/custom_training_loop_models_test.py +++ b/keras/distribute/custom_training_loop_models_test.py @@ -14,526 +14,558 @@ # ============================================================================== """Tests for custom training loops.""" -import tensorflow.compat.v2 as tf - import os -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras.distribute import strategy_combinations from keras.layers import core -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent class CustomModel(tf.Module): + def __init__(self, name=None): + super().__init__(name=name) + with self.name_scope: + self._layers = [ + keras.layers.Dense(4, name="dense"), + ] - def __init__(self, name=None): - super().__init__(name=name) - with self.name_scope: - self._layers = [ - keras.layers.Dense(4, name="dense"), - ] - - @tf.Module.with_name_scope - def __call__(self, x): - for layer in self._layers: - x = layer(x) - return x + @tf.Module.with_name_scope + def __call__(self, x): + for layer in self._layers: + x = layer(x) + return x @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - distribution=(strategy_combinations.all_strategies + - strategy_combinations.multiworker_strategies), - mode=["eager"] - ) + distribution=( + strategy_combinations.all_strategies + + strategy_combinations.multiworker_strategies + ), + mode=["eager"], ) +) class KerasModelsTest(tf.test.TestCase, parameterized.TestCase): + def test_single_keras_layer_run(self, distribution): + dataset = _get_dataset() + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - def test_single_keras_layer_run(self, distribution): - dataset = _get_dataset() - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - model = keras.layers.Dense(4, name="dense") - - @tf.function - def train_step(iterator): - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - return grads - - outputs = distribution.run( - step_fn, args=(next(iterator),)) - return tf.nest.map_structure(distribution.experimental_local_results, - outputs) - - train_step(input_iterator) - - def test_keras_model_optimizer_run(self, distribution): - dataset = _get_dataset() - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - model = _get_model() - optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop() - - @tf.function - def train_step(replicated_inputs): - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - return loss - - outputs = distribution.run(step_fn, args=(replicated_inputs,)) - return tf.nest.map_structure(distribution.experimental_local_results, - outputs) - - for x in input_iterator: - train_step(x) - - def test_keras_subclass_model_optimizer_run(self, distribution): - def get_subclass_model(): - - class KerasSubclassModel(keras.Model): - - def __init__(self): - super().__init__() - self.l = keras.layers.Dense(4, name="dense") - - def call(self, x): - return self.l(x) - - return KerasSubclassModel() - dataset = _get_dataset() - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - model = get_subclass_model() - optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop() - - @tf.function - def train_step(iterator): - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - return loss - - outputs = distribution.run(step_fn, args=(next(iterator),)) - return tf.nest.map_structure(distribution.experimental_local_results, - outputs) - - train_step(input_iterator) - - def test_keras_model_optimizer_run_loop(self, distribution): - dataset = _get_dataset() - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - model = _get_model() - optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop() - - @tf.function - def train_step(iterator): - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - return loss - - for _ in tf.range(4): - distribution.run(step_fn, args=(next(iterator),)) - - train_step(input_iterator) - - def test_batch_norm_with_dynamic_batch(self, distribution): - inputs = np.zeros((10, 3, 3, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat() - dataset = dataset.batch(10) - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - x = keras.layers.Input(shape=(3, 3, 3), name="input") - y = keras.layers.BatchNormalization(fused=True, name="bn")(x) - y = keras.layers.Flatten()(y) - y = keras.layers.Dense(4, name="dense")(y) - model = keras.Model(x, y) - optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop() - - @tf.function - def train_step(iterator): - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images, training=True) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - return loss - - distribution.run(step_fn, args=(next(iterator),)) - - train_step(input_iterator) - - def test_lstm(self, distribution): - - batch_size = 32 - - def create_lstm_model(): - model = keras.models.Sequential() - # We only have LSTM variables so we can detect no gradient issues more - # easily. - model.add( - keras.layers.LSTM(1, return_sequences=False, input_shape=(10, 1))) - return model - - def create_lstm_data(): - seq_length = 10 - - x_train = np.random.rand(batch_size, seq_length, 1).astype("float32") - y_train = np.random.rand(batch_size, 1).astype("float32") - return x_train, y_train - - x, y = create_lstm_data() - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.batch(batch_size) - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - model = create_lstm_model() - optimizer = keras.optimizers.optimizer_v2.gradient_descent.SGD() - - @tf.function - def train_step(input_iterator): - - def step_fn(inputs): - inps, targ = inputs - with tf.GradientTape() as tape: - output = model(inps) - loss = tf.reduce_mean( - keras.losses.binary_crossentropy( - y_true=targ, y_pred=output, from_logits=False)) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - return loss - - outputs = distribution.run( - step_fn, args=(next(input_iterator),)) - return distribution.experimental_local_results(outputs) - - train_step(input_iterator) - - def test_nested_tf_functions(self, distribution): - # The test builds two computations with keras layers, one with nested - # tf.function, and the other without nested tf.function. We run these - # computations independently on the model with same weights, and make sure - # the variables are still the same after one training step. - - inputs = np.random.random((10, 3)).astype(np.float32) - targets = np.ones((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).repeat() - dataset = dataset.batch(10) - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - def get_model(): - x = keras.layers.Input(shape=(3,), name="input") - y = keras.layers.Dense(4, name="dense")(x) - model = keras.Model(x, y) - return model - - with distribution.scope(): - model = get_model() - optimizer = keras.optimizers.optimizer_v2.gradient_descent.SGD( - 0.1, momentum=0.01) - weights_file = os.path.join(self.get_temp_dir(), ".h5") - model.save_weights(weights_file) - model2 = get_model() - model2.load_weights(weights_file) - - # Make sure model and model2 variables are in sync when initialized. - for model_v, model2_v in zip(model.variables, model2.variables): - self.assertAllClose(model_v.numpy(), model2_v.numpy()) - - def compute_loss(images, targets): - outputs = model(images) - return keras.losses.mean_squared_error(targets, outputs) - - @tf.function - def train_step_without_nested_tf_function(inputs): - - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - loss = compute_loss(images, targets) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - - distribution.run(step_fn, args=(inputs,)) - - @tf.function - def compute_loss2(images, targets): - outputs = model2(images) - return keras.losses.mean_squared_error(targets, outputs) - - @tf.function - def train_step_with_nested_tf_function(inputs): - - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - loss = compute_loss2(images, targets) - grads = tape.gradient(loss, model2.variables) - optimizer.apply_gradients(zip(grads, model2.variables)) - - distribution.run(step_fn, args=(inputs,)) - - inputs = next(input_iterator) - - train_step_without_nested_tf_function(inputs) - train_step_with_nested_tf_function(inputs) - - # Make sure model and model2 variables are still in sync. - for model_v, model2_v in zip(model.variables, model2.variables): - self.assertAllClose(model_v.numpy(), model2_v.numpy()) - - def test_nested_tf_functions_with_control_flow(self, distribution): - inputs = np.random.random((10, 3)).astype(np.float32) - targets = np.ones((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).repeat() - dataset = dataset.batch(10) - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - def get_model(): - x = keras.layers.Input(shape=(3,), name="input") - y = keras.layers.Dense(4, name="dense")(x) - model = keras.Model(x, y) - return model - - with distribution.scope(): - model = get_model() - optimizer = keras.optimizers.optimizer_v2.gradient_descent.SGD( - 0.1, momentum=0.01) - - @tf.function - def train_step(iterator): - - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - - distribution.run(step_fn, args=(next(iterator),)) - - @tf.function - def train_steps(iterator): - for _ in tf.range(10): - train_step(iterator) - - train_steps(input_iterator) - - def test_nested_tf_functions_with_tf_function_passing_to_strategy_run( - self, distribution): - self.skipTest("b/190608193") - - inputs = np.random.random((10, 3)).astype(np.float32) - targets = np.ones((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).repeat() - dataset = dataset.batch(10) - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - def get_model(): - x = keras.layers.Input(shape=(3,), name="input") - y = keras.layers.Dense(4, name="dense")(x) - model = keras.Model(x, y) - return model - - with distribution.scope(): - model = get_model() - optimizer = keras.optimizers.optimizer_v2.gradient_descent.SGD( - 0.1, momentum=0.01) - - @tf.function - def compute_loss(images, targets): - outputs = model(images) - return keras.losses.mean_squared_error(targets, outputs) - - @tf.function - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - loss = compute_loss(images, targets) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) - - inputs = next(input_iterator) - distribution.run(step_fn, args=(inputs,)) - - def test_customized_tf_module_run(self, distribution): - dataset = _get_dataset() - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - model = CustomModel() - - @tf.function - def train_step(iterator): - - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - return grads - - outputs = distribution.run( - step_fn, args=(next(iterator),)) - return tf.nest.map_structure(distribution.experimental_local_results, - outputs) - - train_step(input_iterator) - - def test_reduce_loss(self, distribution): - inputs = np.zeros((10, 4), dtype=np.float32) - targets = np.zeros((10, 1), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.batch(10) - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - with distribution.scope(): - x = keras.layers.Input(shape=(4), name="input") - y = keras.layers.Dense(3, name="dense")(x) - model = keras.Model(x, y) + with distribution.scope(): + model = keras.layers.Dense(4, name="dense") + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + return grads + + outputs = distribution.run(step_fn, args=(next(iterator),)) + return tf.nest.map_structure( + distribution.experimental_local_results, outputs + ) + + train_step(input_iterator) + + def test_keras_model_optimizer_run(self, distribution): + dataset = _get_dataset() + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - @tf.function - def train_step(iterator): + with distribution.scope(): + model = _get_model() + optimizer = keras.optimizers.legacy.rmsprop.RMSprop() + + @tf.function + def train_step(replicated_inputs): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + return loss + + outputs = distribution.run(step_fn, args=(replicated_inputs,)) + return tf.nest.map_structure( + distribution.experimental_local_results, outputs + ) + + for x in input_iterator: + train_step(x) + + def test_keras_subclass_model_optimizer_run(self, distribution): + def get_subclass_model(): + class KerasSubclassModel(keras.Model): + def __init__(self): + super().__init__() + self.l = keras.layers.Dense(4, name="dense") + + def call(self, x): + return self.l(x) + + return KerasSubclassModel() + + dataset = _get_dataset() + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - def step_fn(inputs): - images, targets = inputs - outputs = model(images) - loss = keras.losses.sparse_categorical_crossentropy(targets, outputs) - return loss + with distribution.scope(): + model = get_subclass_model() + optimizer = keras.optimizers.legacy.rmsprop.RMSprop() + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + return loss + + outputs = distribution.run(step_fn, args=(next(iterator),)) + return tf.nest.map_structure( + distribution.experimental_local_results, outputs + ) + + train_step(input_iterator) + + def test_keras_model_optimizer_run_loop(self, distribution): + dataset = _get_dataset() + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - return distribution.run(step_fn, args=(next(iterator),)) + with distribution.scope(): + model = _get_model() + optimizer = keras.optimizers.legacy.rmsprop.RMSprop() + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + return loss + + for _ in tf.range(4): + distribution.run(step_fn, args=(next(iterator),)) + + train_step(input_iterator) + + def test_batch_norm_with_dynamic_batch(self, distribution): + inputs = np.zeros((10, 3, 3, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat() + dataset = dataset.batch(10) + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - loss = train_step(input_iterator) - loss = distribution.reduce(tf.distribute.ReduceOp.MEAN, loss, axis=0) + with distribution.scope(): + x = keras.layers.Input(shape=(3, 3, 3), name="input") + y = keras.layers.BatchNormalization(fused=True, name="bn")(x) + y = keras.layers.Flatten()(y) + y = keras.layers.Dense(4, name="dense")(y) + model = keras.Model(x, y) + optimizer = keras.optimizers.legacy.rmsprop.RMSprop() + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images, training=True) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + return loss + + distribution.run(step_fn, args=(next(iterator),)) + + train_step(input_iterator) + + def test_lstm(self, distribution): + + batch_size = 32 + + def create_lstm_model(): + model = keras.models.Sequential() + # We only have LSTM variables so we can detect no gradient issues + # more easily. + model.add( + keras.layers.LSTM( + 1, return_sequences=False, input_shape=(10, 1) + ) + ) + return model + + def create_lstm_data(): + seq_length = 10 + + x_train = np.random.rand(batch_size, seq_length, 1).astype( + "float32" + ) + y_train = np.random.rand(batch_size, 1).astype("float32") + return x_train, y_train + + x, y = create_lstm_data() + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.batch(batch_size) + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - def test_variable_run_argument(self, distribution): - # Test that variables passed to run() remain variables. Previous behavior - # in TPUStrategy was to cast to Tensor. + with distribution.scope(): + model = create_lstm_model() + optimizer = keras.optimizers.legacy.gradient_descent.SGD() + + @tf.function + def train_step(input_iterator): + def step_fn(inputs): + inps, targ = inputs + with tf.GradientTape() as tape: + output = model(inps) + loss = tf.reduce_mean( + keras.losses.binary_crossentropy( + y_true=targ, y_pred=output, from_logits=False + ) + ) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + return loss + + outputs = distribution.run(step_fn, args=(next(input_iterator),)) + return distribution.experimental_local_results(outputs) + + train_step(input_iterator) + + def test_nested_tf_functions(self, distribution): + # The test builds two computations with keras layers, one with nested + # tf.function, and the other without nested tf.function. We run these + # computations independently on the model with same weights, and make + # sure the variables are still the same after one training step. + + inputs = np.random.random((10, 3)).astype(np.float32) + targets = np.ones((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).repeat() + dataset = dataset.batch(10) + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - with distribution.scope(): - optimizer = gradient_descent.SGD(0.1) - net = core.Dense(1, trainable=True) - dataset = tf.data.Dataset.from_tensors([[1.]]) - dataset = dataset.repeat() - dataset = dataset.batch(2, drop_remainder=True) + def get_model(): + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model + + with distribution.scope(): + model = get_model() + optimizer = keras.optimizers.legacy.gradient_descent.SGD( + 0.1, momentum=0.01 + ) + weights_file = os.path.join(self.get_temp_dir(), ".h5") + model.save_weights(weights_file) + model2 = get_model() + model2.load_weights(weights_file) + + # Make sure model and model2 variables are in sync when initialized. + for model_v, model2_v in zip(model.variables, model2.variables): + self.assertAllClose(model_v.numpy(), model2_v.numpy()) + + def compute_loss(images, targets): + outputs = model(images) + return keras.losses.mean_squared_error(targets, outputs) + + @tf.function + def train_step_without_nested_tf_function(inputs): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + loss = compute_loss(images, targets) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + + distribution.run(step_fn, args=(inputs,)) + + @tf.function + def compute_loss2(images, targets): + outputs = model2(images) + return keras.losses.mean_squared_error(targets, outputs) + + @tf.function + def train_step_with_nested_tf_function(inputs): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + loss = compute_loss2(images, targets) + grads = tape.gradient(loss, model2.variables) + optimizer.apply_gradients(zip(grads, model2.variables)) + + distribution.run(step_fn, args=(inputs,)) + + inputs = next(input_iterator) + + train_step_without_nested_tf_function(inputs) + train_step_with_nested_tf_function(inputs) + + # Make sure model and model2 variables are still in sync. + for model_v, model2_v in zip(model.variables, model2.variables): + self.assertAllClose(model_v.numpy(), model2_v.numpy()) + + def test_nested_tf_functions_with_control_flow(self, distribution): + inputs = np.random.random((10, 3)).astype(np.float32) + targets = np.ones((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).repeat() + dataset = dataset.batch(10) + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - def replica_step(trainable_variables, features): + def get_model(): + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model + + with distribution.scope(): + model = get_model() + optimizer = keras.optimizers.legacy.gradient_descent.SGD( + 0.1, momentum=0.01 + ) + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + + distribution.run(step_fn, args=(next(iterator),)) + + @tf.function + def train_steps(iterator): + for _ in tf.range(10): + train_step(iterator) + + train_steps(input_iterator) + + def test_nested_tf_functions_with_tf_function_passing_to_strategy_run( + self, distribution + ): + self.skipTest("b/190608193") + + inputs = np.random.random((10, 3)).astype(np.float32) + targets = np.ones((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).repeat() + dataset = dataset.batch(10) + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - with tf.GradientTape() as tape: - net_out = net(features[0], training=True) - loss = (net_out - 1.0) * (net_out - 1.0) - gradients = tape.gradient(loss, trainable_variables) - optimizer.apply_gradients(zip(gradients, trainable_variables)) - return loss + def get_model(): + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model + + with distribution.scope(): + model = get_model() + optimizer = keras.optimizers.legacy.gradient_descent.SGD( + 0.1, momentum=0.01 + ) + + @tf.function + def compute_loss(images, targets): + outputs = model(images) + return keras.losses.mean_squared_error(targets, outputs) + + @tf.function + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + loss = compute_loss(images, targets) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + + inputs = next(input_iterator) + distribution.run(step_fn, args=(inputs,)) + + def test_customized_tf_module_run(self, distribution): + dataset = _get_dataset() + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - @tf.function - def step(features): - per_replica_losses = distribution.run( - replica_step, - (net.trainable_variables, features), - ) - loss = distribution.reduce( - tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) - return loss + with distribution.scope(): + model = CustomModel() + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + return grads + + outputs = distribution.run(step_fn, args=(next(iterator),)) + return tf.nest.map_structure( + distribution.experimental_local_results, outputs + ) + + train_step(input_iterator) + + def test_reduce_loss(self, distribution): + inputs = np.zeros((10, 4), dtype=np.float32) + targets = np.zeros((10, 1), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.batch(10) + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - step(next(iter(dataset))) + with distribution.scope(): + x = keras.layers.Input(shape=(4), name="input") + y = keras.layers.Dense(3, name="dense")(x) + model = keras.Model(x, y) + + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + outputs = model(images) + loss = keras.losses.sparse_categorical_crossentropy( + targets, outputs + ) + return loss + + return distribution.run(step_fn, args=(next(iterator),)) + + loss = train_step(input_iterator) + loss = distribution.reduce(tf.distribute.ReduceOp.MEAN, loss, axis=0) + + def test_variable_run_argument(self, distribution): + # Test that variables passed to run() remain variables. Previous + # behavior in TPUStrategy was to cast to Tensor. + + with distribution.scope(): + optimizer = gradient_descent.SGD(0.1) + net = core.Dense(1, trainable=True) + dataset = tf.data.Dataset.from_tensors([[1.0]]) + dataset = dataset.repeat() + dataset = dataset.batch(2, drop_remainder=True) + + def replica_step(trainable_variables, features): + + with tf.GradientTape() as tape: + net_out = net(features[0], training=True) + loss = (net_out - 1.0) * (net_out - 1.0) + gradients = tape.gradient(loss, trainable_variables) + optimizer.apply_gradients(zip(gradients, trainable_variables)) + return loss + + @tf.function + def step(features): + per_replica_losses = distribution.run( + replica_step, + (net.trainable_variables, features), + ) + loss = distribution.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None + ) + return loss + + step(next(iter(dataset))) class KerasModelsXLATest(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.tpu_strategies, mode=["eager"] + ) + ) + def test_tf_function_jit_compile(self, distribution): + dataset = _get_dataset() + input_iterator = iter( + distribution.experimental_distribute_dataset(dataset) + ) - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.tpu_strategies, mode=["eager"])) - def test_tf_function_jit_compile(self, distribution): - dataset = _get_dataset() - input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) - - class CustomDense(keras.layers.Layer): - - def __init__(self, num_outputs): - super().__init__() - self.num_outputs = num_outputs + class CustomDense(keras.layers.Layer): + def __init__(self, num_outputs): + super().__init__() + self.num_outputs = num_outputs - def build(self, input_shape): - self.kernel = self.add_weight( - "kernel", shape=[int(input_shape[-1]), self.num_outputs]) + def build(self, input_shape): + self.kernel = self.add_weight( + "kernel", shape=[int(input_shape[-1]), self.num_outputs] + ) - @tf.function(jit_compile=True) - def call(self, inputs): - return tf.matmul(inputs, self.kernel) + @tf.function(jit_compile=True) + def call(self, inputs): + return tf.matmul(inputs, self.kernel) - with distribution.scope(): - x = keras.layers.Input(shape=(3,)) - y = CustomDense(4)(x) - model = keras.Model(x, y) + with distribution.scope(): + x = keras.layers.Input(shape=(3,)) + y = CustomDense(4)(x) + model = keras.Model(x, y) - @tf.function - def train_step(iterator): - def step_fn(inputs): - images, targets = inputs - with tf.GradientTape() as tape: - outputs = model(images) - loss = keras.losses.mean_squared_error(targets, outputs) - grads = tape.gradient(loss, model.variables) - return grads + @tf.function + def train_step(iterator): + def step_fn(inputs): + images, targets = inputs + with tf.GradientTape() as tape: + outputs = model(images) + loss = keras.losses.mean_squared_error(targets, outputs) + grads = tape.gradient(loss, model.variables) + return grads - outputs = distribution.run( - step_fn, args=(next(iterator),)) - return tf.nest.map_structure(distribution.experimental_local_results, - outputs) + outputs = distribution.run(step_fn, args=(next(iterator),)) + return tf.nest.map_structure( + distribution.experimental_local_results, outputs + ) - train_step(input_iterator) + train_step(input_iterator) def _get_dataset(): - inputs = np.zeros((31, 3), dtype=np.float32) - targets = np.zeros((31, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.batch(10) - return dataset + inputs = np.zeros((31, 3), dtype=np.float32) + targets = np.zeros((31, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.batch(10) + return dataset def _get_model(): - x = keras.layers.Input(shape=(3,), name="input") - y = keras.layers.Dense(4, name="dense")(x) - model = keras.Model(x, y) - return model + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/custom_training_loop_optimizer_test.py b/keras/distribute/custom_training_loop_optimizer_test.py index 511a28e0894d..c972b96a2e56 100644 --- a/keras/distribute/custom_training_loop_optimizer_test.py +++ b/keras/distribute/custom_training_loop_optimizer_test.py @@ -15,106 +15,125 @@ """Tests for custom training loops that involves advanced optimizer usage.""" import tensorflow.compat.v2 as tf - from absl.testing import parameterized + +from keras.distribute import ( + strategy_combinations as keras_strategy_combinations, +) +from keras.optimizers.legacy import gradient_descent + +# isort: off from tensorflow.python.distribute import values -from keras.distribute import strategy_combinations as keras_strategy_combinations -from keras.optimizers.optimizer_v2 import gradient_descent class OptimizerTest(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine( - distribution=keras_strategy_combinations.multidevice_strategies, - mode=["eager"], - ), - tf.__internal__.test.combinations.combine( - experimental_aggregate_gradients=True, - expected=[[[-0.3, -0.3], [-0.3, -0.3]]]) + - tf.__internal__.test.combinations.combine( - experimental_aggregate_gradients=False, - expected=[[[-0.1, -0.1], [-0.2, -0.2]]]) - )) - def test_custom_aggregation(self, distribution, - experimental_aggregate_gradients, expected): - - with distribution.scope(): - v = tf.Variable([0., 0.]) - optimizer = gradient_descent.SGD(0.1) - - class PerReplica(values.DistributedValues): - """Holds a map from replica to unsynchronized values.""" - - @property - def values(self): - """Returns the per replica values.""" - return self._values - - @tf.function - def optimize(): - with tf.device(distribution.extended.worker_devices[0]): - v1 = tf.convert_to_tensor([1., 1.]) - with tf.device(distribution.extended.worker_devices[1]): - v2 = tf.convert_to_tensor([2., 2.]) - grads = PerReplica([v1, v2]) - def step_fn(grads): - optimizer.apply_gradients( - [(grads, v)], - experimental_aggregate_gradients=experimental_aggregate_gradients) - return v.read_value() - - return distribution.experimental_local_results( - distribution.run(step_fn, args=(grads,))) - - self.assertAllClose(optimize(), expected) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=tf.__internal__.distribute.combinations.one_device_strategy, - mode=["eager"], - experimental_aggregate_gradients=[True, False])) - def test_custom_aggregation_one_device(self, distribution, - experimental_aggregate_gradients): - - with distribution.scope(): - v = tf.Variable([0., 0.]) - optimizer = gradient_descent.SGD(0.1) - - @tf.function - def optimize(): - grads = tf.convert_to_tensor([1., 1.]) - - def step_fn(grads): - optimizer.apply_gradients( - [(grads, v)], - experimental_aggregate_gradients=experimental_aggregate_gradients) - return v.read_value() - - return distribution.experimental_local_results( - distribution.run(step_fn, args=(grads,))) - - self.assertAllClose(optimize(), [[-0.1, -0.1]]) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=[ - tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu - ])) - def test_custom_aggregation_central_storage(self, distribution): - with distribution.scope(): - v = tf.Variable([0., 0.]) - optimizer = gradient_descent.SGD(0.1) - - grads = tf.convert_to_tensor([1., 1.]) - - def step_fn(grads): - with self.assertRaises(NotImplementedError): - optimizer.apply_gradients([(grads, v)], - experimental_aggregate_gradients=False) - - return distribution.run(step_fn, args=(grads,)) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=keras_strategy_combinations.multidevice_strategies, + mode=["eager"], + ), + tf.__internal__.test.combinations.combine( + experimental_aggregate_gradients=True, + expected=[[[-0.3, -0.3], [-0.3, -0.3]]], + ) + + tf.__internal__.test.combinations.combine( + experimental_aggregate_gradients=False, + expected=[[[-0.1, -0.1], [-0.2, -0.2]]], + ), + ) + ) + def test_custom_aggregation( + self, distribution, experimental_aggregate_gradients, expected + ): + + with distribution.scope(): + v = tf.Variable([0.0, 0.0]) + optimizer = gradient_descent.SGD(0.1) + + class PerReplica(values.DistributedValues): + """Holds a map from replica to unsynchronized values.""" + + @property + def values(self): + """Returns the per replica values.""" + return self._values + + @tf.function + def optimize(): + with tf.device(distribution.extended.worker_devices[0]): + v1 = tf.convert_to_tensor([1.0, 1.0]) + with tf.device(distribution.extended.worker_devices[1]): + v2 = tf.convert_to_tensor([2.0, 2.0]) + grads = PerReplica([v1, v2]) + + def step_fn(grads): + optimizer.apply_gradients( + [(grads, v)], + experimental_aggregate_gradients=experimental_aggregate_gradients, # noqa: E501 + ) + return v.read_value() + + return distribution.experimental_local_results( + distribution.run(step_fn, args=(grads,)) + ) + + self.assertAllClose(optimize(), expected) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=tf.__internal__.distribute.combinations.one_device_strategy, # noqa: E501 + mode=["eager"], + experimental_aggregate_gradients=[True, False], + ) + ) + def test_custom_aggregation_one_device( + self, distribution, experimental_aggregate_gradients + ): + + with distribution.scope(): + v = tf.Variable([0.0, 0.0]) + optimizer = gradient_descent.SGD(0.1) + + @tf.function + def optimize(): + grads = tf.convert_to_tensor([1.0, 1.0]) + + def step_fn(grads): + optimizer.apply_gradients( + [(grads, v)], + experimental_aggregate_gradients=experimental_aggregate_gradients, # noqa: E501 + ) + return v.read_value() + + return distribution.experimental_local_results( + distribution.run(step_fn, args=(grads,)) + ) + + self.assertAllClose(optimize(), [[-0.1, -0.1]]) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu # noqa: E501 + ] + ) + ) + def test_custom_aggregation_central_storage(self, distribution): + with distribution.scope(): + v = tf.Variable([0.0, 0.0]) + optimizer = gradient_descent.SGD(0.1) + + grads = tf.convert_to_tensor([1.0, 1.0]) + + def step_fn(grads): + with self.assertRaises(NotImplementedError): + optimizer.apply_gradients( + [(grads, v)], experimental_aggregate_gradients=False + ) + + return distribution.run(step_fn, args=(grads,)) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/distribute/dataset_creator_model_fit_ps_only_test.py b/keras/distribute/dataset_creator_model_fit_ps_only_test.py index edc515aa327e..077ff151008e 100644 --- a/keras/distribute/dataset_creator_model_fit_ps_only_test.py +++ b/keras/distribute/dataset_creator_model_fit_ps_only_test.py @@ -14,11 +14,12 @@ # ============================================================================== """Tests for `DatasetCreator` with `Model.fit` across usages and strategies.""" +import tensorflow.compat.v2 as tf + from keras import callbacks as callbacks_lib from keras.distribute import dataset_creator_model_fit_test_base as test_base from keras.distribute import strategy_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_utils.run_v2_only @@ -26,121 +27,152 @@ tf.__internal__.test.combinations.combine( strategy=strategy_combinations.parameter_server_strategies_multi_worker, use_dataset_creator=[True, False], - mode="eager")) + mode="eager", + ) +) class DatasetCreatorModelFitParameterServerStrategyOnlyTest( - test_base.DatasetCreatorModelFitTestBase): - - def testModelFitWithRunEagerly(self, strategy, use_dataset_creator): - with self.assertRaisesRegex( - ValueError, "When using `Model` with `ParameterServerStrategy`, " - "`run_eagerly` is not supported."): - self._model_fit( - strategy, run_eagerly=True, use_dataset_creator=use_dataset_creator) - - def testModelPredict(self, strategy, use_dataset_creator): - if use_dataset_creator: - self.skipTest("Unused option.") - model, _ = self._model_compile(strategy) - test_data = tf.data.Dataset.from_tensor_slices( - [[1.], [2.], [3.], [1.], [5.], [1.]]).repeat().batch(2) - model.predict(x=test_data, steps=3) - - def testClusterCoordinatorSingleInstance(self, strategy, use_dataset_creator): - model = self._model_fit(strategy, use_dataset_creator=use_dataset_creator) - strategy = model.distribute_strategy - self.assertIs( - strategy._cluster_coordinator, - tf.distribute.experimental.coordinator.ClusterCoordinator(strategy)) - - def testModelFitErrorOnBatchLevelCallbacks(self, strategy, - use_dataset_creator): - - class BatchLevelCallback(callbacks_lib.Callback): - - def on_train_batch_end(self, batch, logs=None): - pass - - with self.assertRaisesRegex(ValueError, - "Batch-level `Callback`s are not supported"): - callbacks = [BatchLevelCallback()] - self._model_fit( - strategy, - callbacks=callbacks, - use_dataset_creator=use_dataset_creator) - - def testModelFitCallbackSupportsTFLogs(self, strategy, use_dataset_creator): - - class MyCallback(callbacks_lib.Callback): - - def __init__(self): - super().__init__() - # Fetches the RemoteValues if necessary. - self._supports_tf_logs = True - - def on_train_batch_end(self, batch, logs=None): - assert isinstance(logs, tf.distribute.experimental.coordinator.RemoteValue) - - my_callback = MyCallback() - callbacks = [my_callback] - self._model_fit( - strategy, callbacks=callbacks, use_dataset_creator=use_dataset_creator) - - def testModelFitVerbosity(self, strategy, use_dataset_creator): - - class MyCallback(callbacks_lib.Callback): - pass - - my_callback = MyCallback() - callbacks = [my_callback] - self._model_fit( - strategy, callbacks=callbacks, use_dataset_creator=use_dataset_creator) - # PSStrategy should default to epoch-level logging. - self.assertEqual(my_callback.params["verbose"], 2) - - def testModelFitTensorBoardEpochLevel(self, strategy, use_dataset_creator): - log_dir = self.get_temp_dir() - callbacks = [callbacks_lib.TensorBoard(log_dir)] - self._model_fit( - strategy, callbacks=callbacks, use_dataset_creator=use_dataset_creator) - self.assertTrue(tf.compat.v1.gfile.Exists(log_dir)) - files = tf.compat.v1.gfile.ListDirectory(log_dir) - self.assertGreaterEqual(len(files), 1) - - def testModelFitVerbose1(self, strategy, use_dataset_creator): - with self.assertRaisesRegex(ValueError, - "`verbose=1` is not allowed with " - "`ParameterServerStrategy` for performance " - "reasons. Received: verbose=1"): - self._model_fit( - strategy, use_dataset_creator=use_dataset_creator, - verbose=1) - - def testModelEvaluateErrorOnBatchLevelCallbacks(self, strategy, - use_dataset_creator): - - class BatchLevelCallback(callbacks_lib.Callback): - - def on_train_batch_end(self, batch, logs=None): - pass - - with self.assertRaisesRegex(ValueError, - "Batch-level `Callback`s are not supported"): - callbacks = [BatchLevelCallback()] - self._model_evaluate( - strategy, - callbacks=callbacks, - use_dataset_creator=use_dataset_creator) - - def testClusterCoordinatorSingleInstanceWithJitCompileTrue( - self, strategy, use_dataset_creator): - model = self._model_fit(strategy, - use_dataset_creator=use_dataset_creator, - jit_compile=True) - strategy = model.distribute_strategy - self.assertIs( - strategy._cluster_coordinator, - tf.distribute.experimental.coordinator.ClusterCoordinator(strategy)) + test_base.DatasetCreatorModelFitTestBase +): + def testModelFitWithRunEagerly(self, strategy, use_dataset_creator): + with self.assertRaisesRegex( + ValueError, + "When using `Model` with `ParameterServerStrategy`, " + "`run_eagerly` is not supported.", + ): + self._model_fit( + strategy, + run_eagerly=True, + use_dataset_creator=use_dataset_creator, + ) + + def testModelPredict(self, strategy, use_dataset_creator): + if use_dataset_creator: + self.skipTest("Unused option.") + model, _ = self._model_compile(strategy) + test_data = ( + tf.data.Dataset.from_tensor_slices( + [[1.0], [2.0], [3.0], [1.0], [5.0], [1.0]] + ) + .repeat() + .batch(2) + ) + model.predict(x=test_data, steps=3) + + def testClusterCoordinatorSingleInstance( + self, strategy, use_dataset_creator + ): + model = self._model_fit( + strategy, use_dataset_creator=use_dataset_creator + ) + strategy = model.distribute_strategy + self.assertIs( + strategy._cluster_coordinator, + tf.distribute.experimental.coordinator.ClusterCoordinator(strategy), + ) + + def testModelFitErrorOnBatchLevelCallbacks( + self, strategy, use_dataset_creator + ): + class BatchLevelCallback(callbacks_lib.Callback): + def on_train_batch_end(self, batch, logs=None): + pass + + with self.assertRaisesRegex( + ValueError, "Batch-level `Callback`s are not supported" + ): + callbacks = [BatchLevelCallback()] + self._model_fit( + strategy, + callbacks=callbacks, + use_dataset_creator=use_dataset_creator, + ) + + def testModelFitCallbackSupportsTFLogs(self, strategy, use_dataset_creator): + class MyCallback(callbacks_lib.Callback): + def __init__(self): + super().__init__() + # Fetches the RemoteValues if necessary. + self._supports_tf_logs = True + + def on_train_batch_end(self, batch, logs=None): + assert isinstance( + logs, tf.distribute.experimental.coordinator.RemoteValue + ) + + my_callback = MyCallback() + callbacks = [my_callback] + self._model_fit( + strategy, + callbacks=callbacks, + use_dataset_creator=use_dataset_creator, + ) + + def testModelFitVerbosity(self, strategy, use_dataset_creator): + class MyCallback(callbacks_lib.Callback): + pass + + my_callback = MyCallback() + callbacks = [my_callback] + self._model_fit( + strategy, + callbacks=callbacks, + use_dataset_creator=use_dataset_creator, + ) + # PSStrategy should default to epoch-level logging. + self.assertEqual(my_callback.params["verbose"], 2) + + def testModelFitTensorBoardEpochLevel(self, strategy, use_dataset_creator): + log_dir = self.get_temp_dir() + callbacks = [callbacks_lib.TensorBoard(log_dir)] + self._model_fit( + strategy, + callbacks=callbacks, + use_dataset_creator=use_dataset_creator, + ) + self.assertTrue(tf.compat.v1.gfile.Exists(log_dir)) + files = tf.compat.v1.gfile.ListDirectory(log_dir) + self.assertGreaterEqual(len(files), 1) + + def testModelFitVerbose1(self, strategy, use_dataset_creator): + with self.assertRaisesRegex( + ValueError, + "`verbose=1` is not allowed with " + "`ParameterServerStrategy` for performance " + "reasons. Received: verbose=1", + ): + self._model_fit( + strategy, use_dataset_creator=use_dataset_creator, verbose=1 + ) + + def testModelEvaluateErrorOnBatchLevelCallbacks( + self, strategy, use_dataset_creator + ): + class BatchLevelCallback(callbacks_lib.Callback): + def on_train_batch_end(self, batch, logs=None): + pass + + with self.assertRaisesRegex( + ValueError, "Batch-level `Callback`s are not supported" + ): + callbacks = [BatchLevelCallback()] + self._model_evaluate( + strategy, + callbacks=callbacks, + use_dataset_creator=use_dataset_creator, + ) + + def testClusterCoordinatorSingleInstanceWithJitCompileTrue( + self, strategy, use_dataset_creator + ): + model = self._model_fit( + strategy, use_dataset_creator=use_dataset_creator, jit_compile=True + ) + strategy = model.distribute_strategy + self.assertIs( + strategy._cluster_coordinator, + tf.distribute.experimental.coordinator.ClusterCoordinator(strategy), + ) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/dataset_creator_model_fit_test.py b/keras/distribute/dataset_creator_model_fit_test.py index 518bd3c54289..c6b36be62c46 100644 --- a/keras/distribute/dataset_creator_model_fit_test.py +++ b/keras/distribute/dataset_creator_model_fit_test.py @@ -14,247 +14,287 @@ # ============================================================================== """Tests for `DatasetCreator` with `Model.fit` across usages and strategies.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np -from tensorflow.python.framework import test_util as tf_test_utils -from keras.testing_infra import test_utils from keras.distribute import dataset_creator_model_fit_test_base as test_base from keras.distribute import strategy_combinations +from keras.testing_infra import test_utils from keras.utils import dataset_creator +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + # TODO(rchao): Investigate why there cannot be single worker and multi worker # PS strategies running in the same shard. @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_multi_worker, - mode="eager")) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_multi_worker, + mode="eager", + ) +) class DatasetCreatorModelFitTest(test_base.DatasetCreatorModelFitTestBase): + def setUp(self): + super().setUp() + if tf_test_utils.is_xla_enabled(): + self.skipTest( + "model.optimizer.iterations values is not as expected " + "with XLA: b/184384487" + ) + + def testModelFit(self, strategy): + model = self._model_fit(strategy) + self.assertEqual(model.optimizer.iterations, 100) + + def testModelFitwithStepsPerEpochNegativeOne(self, strategy): + def dataset_fn(input_context): + del input_context + x = tf.random.uniform((10, 10)) + y = tf.random.uniform((10,)) + return ( + tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).batch(2) + ) + + if strategy._should_use_with_coordinator: + with self.assertRaises( + (tf.errors.OutOfRangeError, tf.errors.CancelledError) + ): + self._model_fit( + strategy, + steps_per_epoch=-1, + x=dataset_creator.DatasetCreator(dataset_fn), + validation_data=dataset_creator.DatasetCreator(dataset_fn), + ) + else: + self._model_fit( + strategy, + steps_per_epoch=-1, + x=dataset_creator.DatasetCreator(dataset_fn), + validation_data=dataset_creator.DatasetCreator(dataset_fn), + ) + + def testModelFitWithNumpyData(self, strategy): + x = np.random.rand(100, 10) + y = np.random.rand(100, 1) + model = self._model_fit( + strategy, + x=x, + y=y, + batch_size=1, + validation_data=(x, y), + ) + self.assertEqual(model.optimizer.iterations, 100) - def setUp(self): - super().setUp() - if tf_test_utils.is_xla_enabled(): - self.skipTest("model.optimizer.iterations values is not as expected " - "with XLA: b/184384487") - - def testModelFit(self, strategy): - model = self._model_fit(strategy) - self.assertEqual(model.optimizer.iterations, 100) - - def testModelFitwithStepsPerEpochNegativeOne(self, strategy): - def dataset_fn(input_context): - del input_context - x = tf.random.uniform((10, 10)) - y = tf.random.uniform((10,)) - return tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(10).batch(2) - - if strategy._should_use_with_coordinator: - with self.assertRaises((tf.errors.OutOfRangeError, - tf.errors.CancelledError)): - self._model_fit( + def testModelFitWithTensorData(self, strategy): + x = tf.random.uniform((100, 10)) + y = tf.random.uniform((100,)) + model = self._model_fit( strategy, - steps_per_epoch=-1, - x=dataset_creator.DatasetCreator(dataset_fn), - validation_data=dataset_creator.DatasetCreator(dataset_fn), + x=x, + y=y, + batch_size=1, + validation_data=(x, y), ) - else: - self._model_fit( - strategy, - steps_per_epoch=-1, - x=dataset_creator.DatasetCreator(dataset_fn), - validation_data=dataset_creator.DatasetCreator(dataset_fn), - ) - - def testModelFitWithNumpyData(self, strategy): - x = np.random.rand(100, 10) - y = np.random.rand(100, 1) - model = self._model_fit( - strategy, - x=x, - y=y, - batch_size=1, - validation_data=(x, y), - ) - self.assertEqual(model.optimizer.iterations, 100) - - def testModelFitWithTensorData(self, strategy): - x = tf.random.uniform((100, 10)) - y = tf.random.uniform((100,)) - model = self._model_fit( - strategy, - x=x, - y=y, - batch_size=1, - validation_data=(x, y), - ) - self.assertEqual(model.optimizer.iterations, 100) - - def testModelFitWithLookupLayer(self, strategy): - model = self._model_fit(strategy, use_lookup_layer=True) - self.assertEqual(model.optimizer.iterations, 100) - - def testModelFitWithNormalizationLayer(self, strategy): - model = self._model_fit(strategy, with_normalization_layer=True) - self.assertEqual(model.optimizer.iterations, 100) - - def testModelFitWithStepsPerExecution(self, strategy): - model = self._model_fit(strategy, steps_per_execution=10) - self.assertEqual(model.optimizer.iterations, 100) - - def testModelFitWithNoStepsPerEpoch(self, strategy): - with self.assertRaisesRegex( - ValueError, - "When using a `tf.keras.utils.experimental.DatasetCreator`, " - "`steps_per_epoch`, `validation_steps` or `steps` argument must be " - "provided in `Model.fit`, `Model.evaluate`, or `Model.predict`."): - self._model_fit(strategy, steps_per_epoch=None) - - def testModelEvaluate(self, strategy): - self._model_evaluate(strategy) - self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) - - def testModelEvaluateWithNumpyData(self, strategy): - x = np.random.rand(100, 10) - y = np.random.rand(100, 1) - self._model_evaluate( - strategy, - x=x, - y=y, - batch_size=1, - ) - self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) - - def testModelEvaluateWithTensorData(self, strategy): - x = tf.random.uniform((100, 10)) - y = tf.random.uniform((100,)) - self._model_evaluate( - strategy, - x=x, - y=y, - batch_size=1, - ) - self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) - - def testModelEvaluateWithNormalizationLayer(self, strategy): - self._model_evaluate(strategy, with_normalization_layer=True) - self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) - - def testModelEvaluateWithStepsPerExecution(self, strategy): - self._model_evaluate(strategy, steps_per_execution=10) - self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) - - def testModelEvaluateWithNoStepsPerEpoch(self, strategy): - with self.assertRaisesRegex( - ValueError, - "When using a `tf.keras.utils.experimental.DatasetCreator`, " - "`steps_per_epoch`, `validation_steps` or `steps` argument must be " - "provided in `Model.fit`, `Model.evaluate`, or `Model.predict`."): - self._model_evaluate(strategy, steps=None) - - def testModelPredict(self, strategy): - _, predictions = self._model_predict(strategy, steps=3) - # Check the first (0th index), fourth (3rd index) and the last predictions - # because the first, fourth and the last input are the same in - # `model.predict` so there predictions should match. - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) - - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) - - def testModelPredictWithNumpyData(self, strategy): - x = np.array([[1.], [2.], [3.], [1.], [5.], [1.]]) - _, predictions = self._model_predict(strategy, test_data=x) - - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) - - def testModelPredictWithTensorData(self, strategy): - x = tf.constant([[1.], [2.], [3.], [1.], [5.], [1.]]) - _, predictions = self._model_predict(strategy, test_data=x) - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) - - def testModelPredictWithNormalizationLayer(self, strategy): - _, predictions = self._model_predict( - strategy, with_normalization_layer=True, steps=3) - # Check the first (0th index), fourth (3rd index) and the last predictions - # because the first, fourth and the last input is the same in - # `model.predict` so there predictions should match. - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) - - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) - - def testModelPredictWithStepsPerExecution(self, strategy): - _, predictions = self._model_predict( - strategy, steps_per_execution=3, steps=3) - - # Check the first (0th index), fourth (3rd index) and the last predictions - # because the first, fourth and the last input is the same in - # `model.predict` so there predictions should match. - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) - - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) - - def testModelFitAndPredict(self, strategy): - def fit_dataset_fn(input_context): - del input_context - x = tf.random.uniform((10, 1)) - y = tf.random.uniform((10,)) - return tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(10).repeat().batch(2) - - x = dataset_creator.DatasetCreator(fit_dataset_fn) - validation_data = dataset_creator.DatasetCreator(fit_dataset_fn) - - model = self._model_fit(strategy, x=x, validation_data=validation_data) - _, predictions = self._model_predict(strategy, model, steps=3) - - # Check the first (0th index), fourth (3rd index) and the last predictions - # because the first, fourth and the last input is the same in - # `model.predict` so there predictions should match. - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) - - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) - - def testModelPredictWithDatasetCreator(self, strategy): - if isinstance(strategy, - tf.distribute.MultiWorkerMirroredStrategy): - self.skipTest("b/189223991") - - def _dataset_fn(input_context): - del input_context - x = tf.constant([[1.], [2.], [3.], [1.], [5.], [1.]]) - return tf.data.Dataset.from_tensor_slices(x).repeat().batch(2) - - _, predictions = self._model_predict( - strategy, - steps=3, - test_data=dataset_creator.DatasetCreator(_dataset_fn), - ) + self.assertEqual(model.optimizer.iterations, 100) + + def testModelFitWithLookupLayer(self, strategy): + model = self._model_fit(strategy, use_lookup_layer=True) + self.assertEqual(model.optimizer.iterations, 100) + + def testModelFitWithNormalizationLayer(self, strategy): + model = self._model_fit(strategy, with_normalization_layer=True) + self.assertEqual(model.optimizer.iterations, 100) + + def testModelFitWithStepsPerExecution(self, strategy): + model = self._model_fit(strategy, steps_per_execution=10) + self.assertEqual(model.optimizer.iterations, 100) + + def testModelFitWithNoStepsPerEpoch(self, strategy): + with self.assertRaisesRegex( + ValueError, + "When using a `tf.keras.utils.experimental.DatasetCreator`, " + "`steps_per_epoch`, `validation_steps`, `steps`, or " + "`pss_evaluation_shards` argument must be provided in " + "`Model.fit`, `Model.evaluate`, or `Model.predict`.", + ): + self._model_fit(strategy, steps_per_epoch=None) + + def testModelEvaluate(self, strategy): + self._model_evaluate(strategy) + self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) + + def testModelEvaluateWithNumpyData(self, strategy): + x = np.random.rand(100, 10) + y = np.random.rand(100, 1) + self._model_evaluate( + strategy, + x=x, + y=y, + batch_size=1, + ) + self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) - # Check the first (0th index), fourth (3rd index) and the last predictions - # because the first, fourth and the last input is the same in - # `model.predict` so there predictions should match. - self.assertTrue(all(predictions[0] == predictions[i] for i in [0, 3, 5])) + def testModelEvaluateWithTensorData(self, strategy): + x = tf.random.uniform((100, 10)) + y = tf.random.uniform((100,)) + self._model_evaluate( + strategy, + x=x, + y=y, + batch_size=1, + ) + self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) + + def testModelEvaluateWithNormalizationLayer(self, strategy): + self._model_evaluate(strategy, with_normalization_layer=True) + self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) + + def testModelEvaluateWithStepsPerExecution(self, strategy): + self._model_evaluate(strategy, steps_per_execution=10) + self.assertGreaterEqual(self._accuracy_metric.result(), 0.0) + + def testModelEvaluateWithNoStepsPerEpoch(self, strategy): + with self.assertRaisesRegex( + ValueError, + "When using a `tf.keras.utils.experimental.DatasetCreator`, " + "`steps_per_epoch`, `validation_steps`, `steps`, or " + "`pss_evaluation_shards` argument must be provided in " + "`Model.fit`, `Model.evaluate`, or `Model.predict`.", + ): + self._model_evaluate(strategy, steps=None) + + def testModelPredict(self, strategy): + _, predictions = self._model_predict(strategy, steps=3) + # Check the first (0th index), fourth (3rd index) and the last + # predictions because the first, fourth and the last input are the same + # in `model.predict` so there predictions should match. + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) - self.assertFalse( - all(predictions[0] == predictions[i] for i in [0, 1, 2, 4])) + def testModelPredictWithNumpyData(self, strategy): + x = np.array([[1.0], [2.0], [3.0], [1.0], [5.0], [1.0]]) + _, predictions = self._model_predict(strategy, test_data=x) + + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) + + def testModelPredictWithTensorData(self, strategy): + x = tf.constant([[1.0], [2.0], [3.0], [1.0], [5.0], [1.0]]) + _, predictions = self._model_predict(strategy, test_data=x) + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) - def testModelTrainTFFunction(self, strategy): - model = self._model_fit(strategy) - self.assertIsInstance(model.train_tf_function, - tf.__internal__.function.Function) + def testModelPredictWithNormalizationLayer(self, strategy): + _, predictions = self._model_predict( + strategy, with_normalization_layer=True, steps=3 + ) + # Check the first (0th index), fourth (3rd index) and the last + # predictions because the first, fourth and the last input is the same + # in `model.predict` so there predictions should match. + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) + + def testModelPredictWithStepsPerExecution(self, strategy): + _, predictions = self._model_predict( + strategy, steps_per_execution=3, steps=3 + ) + + # Check the first (0th index), fourth (3rd index) and the last + # predictions because the first, fourth and the last input is the same + # in `model.predict` so there predictions should match. + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) + + def testModelFitAndPredict(self, strategy): + def fit_dataset_fn(input_context): + del input_context + x = tf.random.uniform((10, 1)) + y = tf.random.uniform((10,)) + return ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(10) + .repeat() + .batch(2) + ) + + x = dataset_creator.DatasetCreator(fit_dataset_fn) + validation_data = dataset_creator.DatasetCreator(fit_dataset_fn) + + model = self._model_fit(strategy, x=x, validation_data=validation_data) + _, predictions = self._model_predict(strategy, model, steps=3) + + # Check the first (0th index), fourth (3rd index) and the last + # predictions because the first, fourth and the last input is the same + # in `model.predict` so there predictions should match. + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) + + def testModelPredictWithDatasetCreator(self, strategy): + if isinstance(strategy, tf.distribute.MultiWorkerMirroredStrategy): + self.skipTest("b/189223991") + + def _dataset_fn(input_context): + del input_context + x = tf.constant([[1.0], [2.0], [3.0], [1.0], [5.0], [1.0]]) + return tf.data.Dataset.from_tensor_slices(x).repeat().batch(2) + + _, predictions = self._model_predict( + strategy, + steps=3, + test_data=dataset_creator.DatasetCreator(_dataset_fn), + ) + + # Check the first (0th index), fourth (3rd index) and the last + # predictions because the first, fourth and the last input is the same + # in `model.predict` so there predictions should match. + self.assertTrue( + all(predictions[0] == predictions[i] for i in [0, 3, 5]) + ) + + self.assertFalse( + all(predictions[0] == predictions[i] for i in [0, 1, 2, 4]) + ) + + def testModelTrainTFFunction(self, strategy): + model = self._model_fit(strategy) + self.assertIsInstance( + model.train_tf_function, tf.__internal__.function.Function + ) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/dataset_creator_model_fit_test_base.py b/keras/distribute/dataset_creator_model_fit_test_base.py index b2369cf123da..e7318fdf3b3b 100644 --- a/keras/distribute/dataset_creator_model_fit_test_base.py +++ b/keras/distribute/dataset_creator_model_fit_test_base.py @@ -14,216 +14,254 @@ # ============================================================================== """Tests for `DatasetCreator` with `Model.fit` across usages and strategies.""" -import tensorflow.compat.v2 as tf - import os -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras import callbacks as callbacks_lib from keras.engine import sequential from keras.layers import core as core_layers from keras.layers.preprocessing import string_lookup -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent from keras.utils import dataset_creator + +# isort: off from tensorflow.python.platform import tf_logging as logging class DatasetCreatorModelFitTestBase(tf.test.TestCase, parameterized.TestCase): - """The base class for DatasetCreator with Model.fit tests.""" - - def _get_dataset_fn(self, use_lookup_layer): - - if use_lookup_layer: - - filepath = os.path.join(self.get_temp_dir(), "vocab") - with open(filepath, "w") as f: - f.write("\n".join(["earth", "wind", "and", "fire"])) - - def dataset_fn(input_context): - del input_context - lookup_layer = string_lookup.StringLookup( - num_oov_indices=1, vocabulary=filepath) - x = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - y = np.array([0, 1]) - map_fn = lambda x, y: (lookup_layer(x), y) - return tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(10).repeat().batch(2).map(map_fn) - - else: - - def dataset_fn(input_context): - del input_context - x = tf.random.uniform((10, 10)) - y = tf.random.uniform((10,)) - return tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(10).repeat().batch(2) - - return dataset_fn - - def _model_compile(self, - strategy, - steps_per_execution=1, - run_eagerly=False, - with_normalization_layer=False, - jit_compile=None): - - class ResultAssertingCallback(callbacks_lib.Callback): - """A callback that asserts the result of the tests.""" - - def __init__(self): - self._prev_epoch = -1 - - def on_epoch_end(self, epoch, logs=None): - logging.info("testModelFit: epoch=%r, logs=%r", epoch, logs) - if epoch <= self._prev_epoch: - raise RuntimeError("Epoch is supposed to be larger than previous.") - self._prev_epoch = epoch - is_loss_float = ( - logs.get("loss", None) is not None and - isinstance(logs["loss"], (float, np.floating))) - if not is_loss_float: - raise RuntimeError("loss is supposed to be in the logs and float.") - - with strategy.scope(): - model = sequential.Sequential([core_layers.Dense(10)]) - if with_normalization_layer: - norm = keras.layers.BatchNormalization( - axis=-1, input_shape=(4, 4, 3), momentum=0.8) - model.add(norm) - model.add(core_layers.Dense(1, activation="sigmoid")) - self._accuracy_metric = keras.metrics.Accuracy() - - model.compile( - gradient_descent.SGD(), - loss="binary_crossentropy", - metrics=[self._accuracy_metric], - steps_per_execution=steps_per_execution, - run_eagerly=run_eagerly, - jit_compile=jit_compile) - return model, [ResultAssertingCallback()] - - def _model_fit(self, - strategy, - steps_per_execution=1, - validation_data=None, - x=None, - y=None, - shuffle=True, - batch_size=None, - steps_per_epoch=10, - run_eagerly=False, - with_normalization_layer=False, - callbacks=None, - use_lookup_layer=False, - use_dataset_creator=True, - verbose="auto", - jit_compile=None): - if callbacks is None: - callbacks = [] - - model, default_callbacks = self._model_compile(strategy, - steps_per_execution, - run_eagerly, - with_normalization_layer, - jit_compile) - callbacks += default_callbacks - - if x is None: - if use_dataset_creator: - x = dataset_creator.DatasetCreator( - self._get_dataset_fn(use_lookup_layer)) - else: - x = self._get_dataset_fn(use_lookup_layer)(None) - - if validation_data is None: - if use_dataset_creator: - validation_data = dataset_creator.DatasetCreator( - self._get_dataset_fn(use_lookup_layer)) - else: - validation_data = self._get_dataset_fn(use_lookup_layer)(None) - - model.fit( - x, - y, - shuffle=shuffle, - batch_size=batch_size, - epochs=10, - steps_per_epoch=steps_per_epoch, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=steps_per_epoch, - verbose=verbose) - return model - - def _model_evaluate(self, - strategy, - steps_per_execution=1, - x=None, - y=None, - batch_size=None, - steps=10, - run_eagerly=False, - with_normalization_layer=False, - callbacks=None, - use_dataset_creator=True): - if callbacks is None: - callbacks = [] - - model, default_callbacks = self._model_compile( + """The base class for DatasetCreator with Model.fit tests.""" + + def _get_dataset_fn(self, use_lookup_layer): + + if use_lookup_layer: + + filepath = os.path.join(self.get_temp_dir(), "vocab") + with open(filepath, "w") as f: + f.write("\n".join(["earth", "wind", "and", "fire"])) + + def dataset_fn(input_context): + del input_context + lookup_layer = string_lookup.StringLookup( + num_oov_indices=1, vocabulary=filepath + ) + x = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + y = np.array([0, 1]) + map_fn = lambda x, y: (lookup_layer(x), y) + return ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(10) + .repeat() + .batch(2) + .map(map_fn) + ) + + else: + + def dataset_fn(input_context): + del input_context + x = tf.random.uniform((10, 10)) + y = tf.random.uniform((10,)) + return ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(10) + .repeat() + .batch(2) + ) + + return dataset_fn + + def _model_compile( + self, + strategy, + steps_per_execution=1, + run_eagerly=False, + with_normalization_layer=False, + jit_compile=None, + ): + class ResultAssertingCallback(callbacks_lib.Callback): + """A callback that asserts the result of the tests.""" + + def __init__(self): + self._prev_epoch = -1 + + def on_epoch_end(self, epoch, logs=None): + logging.info("testModelFit: epoch=%r, logs=%r", epoch, logs) + if epoch <= self._prev_epoch: + raise RuntimeError( + "Epoch is supposed to be larger than previous." + ) + self._prev_epoch = epoch + is_loss_float = logs.get( + "loss", None + ) is not None and isinstance(logs["loss"], (float, np.floating)) + if not is_loss_float: + raise RuntimeError( + "loss is supposed to be in the logs and float." + ) + + with strategy.scope(): + model = sequential.Sequential([core_layers.Dense(10)]) + if with_normalization_layer: + norm = keras.layers.BatchNormalization( + axis=-1, input_shape=(4, 4, 3), momentum=0.8 + ) + model.add(norm) + model.add(core_layers.Dense(1, activation="sigmoid")) + self._accuracy_metric = keras.metrics.Accuracy() + + model.compile( + gradient_descent.SGD(), + loss="binary_crossentropy", + metrics=[self._accuracy_metric], + steps_per_execution=steps_per_execution, + run_eagerly=run_eagerly, + jit_compile=jit_compile, + ) + return model, [ResultAssertingCallback()] + + def _model_fit( + self, + strategy, + steps_per_execution=1, + validation_data=None, + x=None, + y=None, + shuffle=True, + batch_size=None, + steps_per_epoch=10, + run_eagerly=False, + with_normalization_layer=False, + callbacks=None, + use_lookup_layer=False, + use_dataset_creator=True, + verbose="auto", + jit_compile=None, + ): + if callbacks is None: + callbacks = [] + + model, default_callbacks = self._model_compile( + strategy, + steps_per_execution, + run_eagerly, + with_normalization_layer, + jit_compile, + ) + callbacks += default_callbacks + + if x is None: + if use_dataset_creator: + x = dataset_creator.DatasetCreator( + self._get_dataset_fn(use_lookup_layer) + ) + else: + x = self._get_dataset_fn(use_lookup_layer)(None) + + if validation_data is None: + if use_dataset_creator: + validation_data = dataset_creator.DatasetCreator( + self._get_dataset_fn(use_lookup_layer) + ) + else: + validation_data = self._get_dataset_fn(use_lookup_layer)(None) + + model.fit( + x, + y, + shuffle=shuffle, + batch_size=batch_size, + epochs=10, + steps_per_epoch=steps_per_epoch, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=steps_per_epoch, + verbose=verbose, + ) + return model + + def _model_evaluate( + self, + strategy, + steps_per_execution=1, + x=None, + y=None, + batch_size=None, + steps=10, + run_eagerly=False, + with_normalization_layer=False, + callbacks=None, + use_dataset_creator=True, + ): + if callbacks is None: + callbacks = [] + + model, default_callbacks = self._model_compile( + strategy, + steps_per_execution, + run_eagerly, + with_normalization_layer, + ) + callbacks += default_callbacks + + def dataset_fn(input_context): + del input_context + x = tf.random.uniform((10, 10)) + y = tf.random.uniform((10, 1)) + return ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(10) + .repeat() + .batch(8) + ) + + if x is None: + if use_dataset_creator: + x = dataset_creator.DatasetCreator(dataset_fn) + else: + x = dataset_fn(None) + + model.evaluate( + x=x, y=y, steps=steps, callbacks=callbacks, batch_size=batch_size + ) + return model + + def _model_predict( + self, strategy, - steps_per_execution, - run_eagerly, - with_normalization_layer, - ) - callbacks += default_callbacks - - def dataset_fn(input_context): - del input_context - x = tf.random.uniform((10, 10)) - y = tf.random.uniform((10, 1)) - return tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(10).repeat().batch(8) - - if x is None: - if use_dataset_creator: - x = dataset_creator.DatasetCreator(dataset_fn) - else: - x = dataset_fn(None) - - model.evaluate( - x=x, y=y, steps=steps, callbacks=callbacks, batch_size=batch_size) - return model - - def _model_predict( - self, - strategy, - model=None, - steps_per_execution=1, - test_data=None, - steps=10, - with_normalization_layer=False, - ): - callbacks = [] - - if model is None: - model, default_callbacks = self._model_compile( - strategy, - steps_per_execution, - with_normalization_layer=with_normalization_layer, - ) - callbacks += default_callbacks - - def create_test_data(): - x = tf.constant([[1.], [2.], [3.], [1.], [5.], [1.]]) - return tf.data.Dataset.from_tensor_slices(x).repeat().batch(2) - - if test_data is None: - test_data = create_test_data() - - predictions = model.predict(x=test_data, steps=steps, callbacks=callbacks) - predictions = np.around(predictions, 4) - return model, predictions + model=None, + steps_per_execution=1, + test_data=None, + steps=10, + with_normalization_layer=False, + ): + callbacks = [] + + if model is None: + model, default_callbacks = self._model_compile( + strategy, + steps_per_execution, + with_normalization_layer=with_normalization_layer, + ) + callbacks += default_callbacks + + def create_test_data(): + x = tf.constant([[1.0], [2.0], [3.0], [1.0], [5.0], [1.0]]) + return tf.data.Dataset.from_tensor_slices(x).repeat().batch(2) + + if test_data is None: + test_data = create_test_data() + + predictions = model.predict( + x=test_data, steps=steps, callbacks=callbacks + ) + predictions = np.around(predictions, 4) + return model, predictions diff --git a/keras/distribute/distribute_coordinator_utils.py b/keras/distribute/distribute_coordinator_utils.py index fe3f625d36c8..9aa95008b3f5 100644 --- a/keras/distribute/distribute_coordinator_utils.py +++ b/keras/distribute/distribute_coordinator_utils.py @@ -25,13 +25,15 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import copy import json import os import threading import time + +import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.core.protobuf import cluster_pb2 from tensorflow.python.platform import tf_logging as logging @@ -40,638 +42,742 @@ def get_current_worker_context(): - """Returns the current task context.""" - try: - return _worker_context.current - except AttributeError: - return None + """Returns the current task context.""" + try: + return _worker_context.current + except AttributeError: + return None class _TaskType: - PS = "ps" - WORKER = "worker" - CHIEF = "chief" - EVALUATOR = "evaluator" - CLIENT = "client" + PS = "ps" + WORKER = "worker" + CHIEF = "chief" + EVALUATOR = "evaluator" + CLIENT = "client" def _get_num_workers(cluster_spec): - """Gets number of workers including chief.""" - if not cluster_spec: - return 0 - return len(cluster_spec.as_dict().get(_TaskType.WORKER, [])) + len( - cluster_spec.as_dict().get(_TaskType.CHIEF, [])) + """Gets number of workers including chief.""" + if not cluster_spec: + return 0 + return len(cluster_spec.as_dict().get(_TaskType.WORKER, [])) + len( + cluster_spec.as_dict().get(_TaskType.CHIEF, []) + ) class _WorkerContext: - """The worker context class. - - This context object provides configuration information for each task. One - context manager with a worker context object will be created per - invocation to the `worker_fn` where `get_current_worker_context` can be called - to access the worker context object. - """ - - def __init__(self, - strategy, - cluster_spec, - task_type, - task_id, - session_config=None, - rpc_layer="grpc", - worker_barrier=None): - """Initialize the worker context object. + """The worker context class. - Args: - strategy: a `DistributionStrategy` object. - cluster_spec: a ClusterSpec object. It can be empty or None in the local - training case. - task_type: a string indicating the role of the corresponding task, such as - "worker" or "ps". It can be None if it is local training or in-graph - replicated training. - task_id: an integer indicating id of the corresponding task. It can be - None if it is local training or in-graph replicated training. - session_config: an optional `tf.compat.v1.ConfigProto` object. - rpc_layer: optional string specifying the RPC protocol for communication - with worker masters. If None or empty, hosts in the `cluster_spec` will - be used directly. - worker_barrier: optional, the barrier object for worker synchronization. + This context object provides configuration information for each task. One + context manager with a worker context object will be created per invocation + to the `worker_fn` where `get_current_worker_context` can be called to + access the worker context object. """ - self._strategy = strategy - self._cluster_spec = cluster_spec - self._task_type = task_type - self._task_id = task_id - self._session_config = session_config - self._worker_barrier = worker_barrier - self._rpc_layer = rpc_layer - self._master_target = self._get_master_target() - self._num_workers = _get_num_workers(cluster_spec) - self._is_chief_node = self._is_chief() - - def _debug_message(self): - if self._cluster_spec: - return "[cluster_spec: %r, task_type: %r, task_id: %r]" % ( - self._cluster_spec, self.task_type, self.task_id) - else: - return "[local]" - - def __enter__(self): - old_context = get_current_worker_context() - if old_context: - raise ValueError( - "You cannot run distribute coordinator in a `worker_fn`.\t" + - self._debug_message()) - # pylint: disable=protected-access - _worker_context.current = self - - def __exit__(self, unused_exception_type, unused_exception_value, - unused_traceback): - # pylint: disable=protected-access - _worker_context.current = None - - def _get_master_target(self): - """Return the master target for a task.""" - # If cluster_spec is None or empty, we use local master. - if not self._cluster_spec or self._task_type == _TaskType.EVALUATOR: - return "" - - # If task_type is None, then it is in-graph replicated training. In this - # case we use the chief or first worker's master target. - if not self._task_type: - if _TaskType.CHIEF in self._cluster_spec.jobs: - task_type = _TaskType.CHIEF - task_id = 0 - else: - assert _TaskType.WORKER in self._cluster_spec.jobs - task_type = _TaskType.WORKER - task_id = 0 + + def __init__( + self, + strategy, + cluster_spec, + task_type, + task_id, + session_config=None, + rpc_layer="grpc", + worker_barrier=None, + ): + """Initialize the worker context object. + + Args: + strategy: a `DistributionStrategy` object. + cluster_spec: a ClusterSpec object. It can be empty or None in the + local training case. + task_type: a string indicating the role of the corresponding task, + such as "worker" or "ps". It can be None if it is local training or + in-graph replicated training. + task_id: an integer indicating id of the corresponding task. It can be + None if it is local training or in-graph replicated training. + session_config: an optional `tf.compat.v1.ConfigProto` object. + rpc_layer: optional string specifying the RPC protocol for + communication with worker masters. If None or empty, hosts in the + `cluster_spec` will be used directly. + worker_barrier: optional, the barrier object for worker + synchronization. + """ + self._strategy = strategy + self._cluster_spec = cluster_spec + self._task_type = task_type + self._task_id = task_id + self._session_config = session_config + self._worker_barrier = worker_barrier + self._rpc_layer = rpc_layer + self._master_target = self._get_master_target() + self._num_workers = _get_num_workers(cluster_spec) + self._is_chief_node = self._is_chief() + + def _debug_message(self): + if self._cluster_spec: + return "[cluster_spec: %r, task_type: %r, task_id: %r]" % ( + self._cluster_spec, + self.task_type, + self.task_id, + ) + else: + return "[local]" + + def __enter__(self): + old_context = get_current_worker_context() + if old_context: + raise ValueError( + "You cannot run distribute coordinator in a `worker_fn`.\t" + + self._debug_message() + ) + + _worker_context.current = self + + def __exit__( + self, unused_exception_type, unused_exception_value, unused_traceback + ): + + _worker_context.current = None + + def _get_master_target(self): + """Return the master target for a task.""" + # If cluster_spec is None or empty, we use local master. + if not self._cluster_spec or self._task_type == _TaskType.EVALUATOR: + return "" + + # If task_type is None, then it is in-graph replicated training. In this + # case we use the chief or first worker's master target. + if not self._task_type: + if _TaskType.CHIEF in self._cluster_spec.jobs: + task_type = _TaskType.CHIEF + task_id = 0 + else: + assert _TaskType.WORKER in self._cluster_spec.jobs + task_type = _TaskType.WORKER + task_id = 0 + else: + task_type = self._task_type + task_id = self._task_id + + prefix = "" + if self._rpc_layer: + prefix = self._rpc_layer + "://" + return prefix + self._cluster_spec.job_tasks(task_type)[task_id or 0] + + def _is_chief(self): + """Return whether the task is the chief worker.""" + if not self._cluster_spec or self._task_type in [ + _TaskType.CHIEF, + _TaskType.EVALUATOR, + None, + ]: + return True + + # If not local and chief not in the cluster_spec, use the first worker + # as chief. + if ( + _TaskType.CHIEF not in self._cluster_spec.jobs + and self._task_type == _TaskType.WORKER + and self._task_id == 0 + ): + return True + return False + + def wait_for_other_workers(self): + """Waits for other workers to reach the same call to this method. + + Raises: + ValueError: if `worker_barrier` is not passed to the __init__ method. + """ + if not self._worker_barrier: + # TODO(yuefengz): we should throw an error in independent worker + # mode. + return + self._worker_barrier.wait() + + def session_creator( + self, + scaffold=None, + config=None, + checkpoint_dir=None, + checkpoint_filename_with_path=None, + max_wait_secs=7200, + ): + """Returns a session creator. + + The returned session creator will be configured with the correct master + target and session configs. It will also run either init ops or ready + ops by querying the `strategy` object when `create_session` is called on + it. + + Args: + scaffold: A `Scaffold` used for gathering or building supportive ops. + If not specified a default one is created. It's used to finalize the + graph. + config: `ConfigProto` proto used to configure the session. + checkpoint_dir: A string. Optional path to a directory where to + restore variables. + checkpoint_filename_with_path: Full file name path to the checkpoint + file. Only one of `checkpoint_dir` or + `checkpoint_filename_with_path` can be specified. + max_wait_secs: Maximum time to wait for the session to become + available. + + Returns: + a descendant of SessionCreator. + """ + if config: + session_config = copy.deepcopy(config) + session_config.MergeFrom(self._session_config) + else: + session_config = self._session_config + + if ( + not self._strategy + or self._strategy.extended.experimental_should_init + ): + logging.info( + "Creating chief session creator with config: %r", config + ) + return tf.compat.v1.train.ChiefSessionCreator( + scaffold, + master=self.master_target, + config=session_config, + checkpoint_dir=checkpoint_dir, + checkpoint_filename_with_path=checkpoint_filename_with_path, + ) + else: + logging.info( + "Creating worker session creator with config: %r", config + ) + return tf.compat.v1.train.WorkerSessionCreator( + scaffold, + master=self.master_target, + config=session_config, + max_wait_secs=max_wait_secs, + ) + + @property + def session_config(self): + return copy.deepcopy(self._session_config) + + @property + def has_barrier(self): + """Whether the barrier is set or not.""" + return self._worker_barrier is not None + + @property + def distributed_mode(self): + """Whether it is distributed training or not.""" + return ( + bool(self._cluster_spec) and self._task_type != _TaskType.EVALUATOR + ) + + @property + def cluster_spec(self): + """Returns a copy of the cluster_spec object.""" + return copy.deepcopy(self._cluster_spec) + + @property + def task_type(self): + """Returns the role of the corresponding task.""" + return self._task_type + + @property + def task_id(self): + """Returns the id or index of the corresponding task.""" + return self._task_id + + @property + def master_target(self): + """Returns the session master for the corresponding task to connect + to.""" + return self._master_target + + @property + def is_chief(self): + """Returns whether the task is a chief node.""" + return self._is_chief_node + + @property + def num_workers(self): + """Returns number of workers in the cluster, including chief.""" + return self._num_workers + + @property + def experimental_should_init(self): + """Whether to run init ops.""" + return self._strategy.extended.experimental_should_init + + @property + def should_checkpoint(self): + """Whether to save checkpoint.""" + return self._strategy.extended.should_checkpoint + + @property + def should_save_summary(self): + """Whether to save summaries.""" + return self._strategy.extended.should_save_summary + + +def _run_single_worker( + worker_fn, + strategy, + cluster_spec, + task_type, + task_id, + session_config, + rpc_layer="", + worker_barrier=None, + coord=None, +): + """Runs a single worker by calling `worker_fn` under context.""" + session_config = copy.deepcopy(session_config) + strategy = copy.deepcopy(strategy) + # If there is an EVALUATOR task, we run single-machine eval on that task. + if task_type == _TaskType.EVALUATOR: + # It is possible to not have a strategy object for EVALUATOR task. + if strategy: + strategy.configure(session_config) else: - task_type = self._task_type - task_id = self._task_id - - prefix = "" - if self._rpc_layer: - prefix = self._rpc_layer + "://" - return prefix + self._cluster_spec.job_tasks(task_type)[task_id or 0] - - def _is_chief(self): - """Return whether the task is the chief worker.""" - if (not self._cluster_spec or - self._task_type in [_TaskType.CHIEF, _TaskType.EVALUATOR, None]): - return True - - # If not local and chief not in the cluster_spec, use the first worker as - # chief. - if (_TaskType.CHIEF not in self._cluster_spec.jobs and - self._task_type == _TaskType.WORKER and self._task_id == 0): - return True - return False - - def wait_for_other_workers(self): - """Waits for other workers to reach the same call to this method. + assert strategy + strategy.configure(session_config, cluster_spec, task_type, task_id) - Raises: - ValueError: if `worker_barrier` is not passed to the __init__ method. - """ - if not self._worker_barrier: - # TODO(yuefengz): we should throw an error in independent worker mode. - return - self._worker_barrier.wait() - - def session_creator(self, - scaffold=None, - config=None, - checkpoint_dir=None, - checkpoint_filename_with_path=None, - max_wait_secs=7200): - """Returns a session creator. - - The returned session creator will be configured with the correct master - target and session configs. It will also run either init ops or ready ops - by querying the `strategy` object when `create_session` is called on it. + context = _WorkerContext( + strategy, + cluster_spec, + task_type, + task_id, + session_config=session_config, + rpc_layer=rpc_layer, + worker_barrier=worker_barrier, + ) + with context: + if coord: + with coord.stop_on_exception(): + return worker_fn(strategy) + else: + return worker_fn(strategy) - Args: - scaffold: A `Scaffold` used for gathering or building supportive ops. If - not specified a default one is created. It's used to finalize the graph. - config: `ConfigProto` proto used to configure the session. - checkpoint_dir: A string. Optional path to a directory where to restore - variables. - checkpoint_filename_with_path: Full file name path to the checkpoint file. - Only one of `checkpoint_dir` or `checkpoint_filename_with_path` can be - specified. - max_wait_secs: Maximum time to wait for the session to become available. - Returns: - a descendant of SessionCreator. - """ - if config: - session_config = copy.deepcopy(config) - session_config.MergeFrom(self._session_config) +def _split_cluster_for_evaluator(cluster_spec, task_type): + """Split the cluster for evaluator since it needn't talk to other tasks.""" + # Splitting the cluster is important to prevent the evaluator from talking + # to other tasks in the cluster. Since we allow evaluator not to use + # distribution strategies and as a result ops in the evaluator task may have + # unspecified devices. Those ops may end up on other tasks if we don't split + # the cluster. + # Note: if you bypass distribute coordinator and bring the cluster yourself, + # you can equivalently set device filters to split clusters. This is already + # done by distribution strategy's `update_config_proto` method. + new_cluster_spec = normalize_cluster_spec(cluster_spec).as_dict() + if task_type == _TaskType.EVALUATOR: + assert _TaskType.EVALUATOR in new_cluster_spec + new_cluster_spec = { + _TaskType.EVALUATOR: new_cluster_spec[_TaskType.EVALUATOR] + } else: - session_config = self._session_config - - if not self._strategy or self._strategy.extended.experimental_should_init: - logging.info("Creating chief session creator with config: %r", config) - return tf.compat.v1.train.ChiefSessionCreator( - scaffold, - master=self.master_target, - config=session_config, - checkpoint_dir=checkpoint_dir, - checkpoint_filename_with_path=checkpoint_filename_with_path) + new_cluster_spec.pop(_TaskType.EVALUATOR, None) + return normalize_cluster_spec(new_cluster_spec) + + +def _run_std_server( + cluster_spec=None, + task_type=None, + task_id=None, + session_config=None, + rpc_layer=None, + environment=None, +): + """Runs a standard server.""" + # Check if the Server is already running. If so, assert that no + # configuration options have changed, and return the existing Server. This + # allows us to call `run_distribute_coordinator` multiple times. + if getattr(_thread_local, "server", None) is not None: + assert _thread_local.cluster_spec == cluster_spec + assert _thread_local.task_type == task_type + assert _thread_local.task_id == task_id + assert _thread_local.session_config_str == repr(session_config) + assert _thread_local.rpc_layer == rpc_layer + assert _thread_local.environment == environment + return _thread_local.server else: - logging.info("Creating worker session creator with config: %r", config) - return tf.compat.v1.train.WorkerSessionCreator( - scaffold, - master=self.master_target, - config=session_config, - max_wait_secs=max_wait_secs) - - @property - def session_config(self): - return copy.deepcopy(self._session_config) - - @property - def has_barrier(self): - """Whether the barrier is set or not.""" - return self._worker_barrier is not None - - @property - def distributed_mode(self): - """Whether it is distributed training or not.""" - return bool(self._cluster_spec) and self._task_type != _TaskType.EVALUATOR - - @property - def cluster_spec(self): - """Returns a copy of the cluster_spec object.""" - return copy.deepcopy(self._cluster_spec) - - @property - def task_type(self): - """Returns the role of the corresponding task.""" - return self._task_type - - @property - def task_id(self): - """Returns the id or index of the corresponding task.""" - return self._task_id - - @property - def master_target(self): - """Returns the session master for the corresponding task to connect to.""" - return self._master_target - - @property - def is_chief(self): - """Returns whether the task is a chief node.""" - return self._is_chief_node - - @property - def num_workers(self): - """Returns number of workers in the cluster, including chief.""" - return self._num_workers - - @property - def experimental_should_init(self): - """Whether to run init ops.""" - return self._strategy.extended.experimental_should_init - - @property - def should_checkpoint(self): - """Whether to save checkpoint.""" - return self._strategy.extended.should_checkpoint - - @property - def should_save_summary(self): - """Whether to save summaries.""" - return self._strategy.extended.should_save_summary - - -def _run_single_worker(worker_fn, - strategy, - cluster_spec, - task_type, - task_id, - session_config, - rpc_layer="", - worker_barrier=None, - coord=None): - """Runs a single worker by calling `worker_fn` under context.""" - session_config = copy.deepcopy(session_config) - strategy = copy.deepcopy(strategy) - # If there is an EVALUATOR task, we run single-machine eval on that task. - if task_type == _TaskType.EVALUATOR: - # It is possible to not have a strategy object for EVALUATOR task. - if strategy: - strategy.configure(session_config) - else: - assert strategy - strategy.configure(session_config, cluster_spec, task_type, task_id) - - context = _WorkerContext( - strategy, - cluster_spec, - task_type, - task_id, - session_config=session_config, - rpc_layer=rpc_layer, - worker_barrier=worker_barrier) - with context: - if coord: - with coord.stop_on_exception(): - return worker_fn(strategy) + # This method is not thread-safe. + _thread_local.server_started = True + _thread_local.cluster_spec = cluster_spec + _thread_local.task_type = task_type + _thread_local.task_id = task_id + _thread_local.session_config_str = repr(session_config) + _thread_local.rpc_layer = rpc_layer + _thread_local.environment = environment + + assert cluster_spec + target = cluster_spec.task_address(task_type, task_id) + if rpc_layer: + target = rpc_layer + "://" + target + + class _FakeServer: + """A fake server that runs a master session.""" + + def start(self): + # A tensorflow server starts when a remote session is created. + logging.info( + "Creating a remote session to start a TensorFlow server, " + "target = %r, session_config=%r", + target, + session_config, + ) + tf.compat.v1.Session(target=target, config=session_config) + + def join(self): + while True: + time.sleep(5) + + if environment == "google": + server = _FakeServer() else: - return worker_fn(strategy) - - -def _split_cluster_for_evaluator(cluster_spec, task_type): - """Split the cluster for evaluator since it needn't talk to other tasks.""" - # Splitting the cluster is important to prevent the evaluator from talking to - # other tasks in the cluster. Since we allow evaluator not to use - # distribution strategies and as a result ops in the evaluator task may have - # unspecified devices. Those ops may end up on other tasks if we don't split - # the cluster. - # Note: if you bypass distribute coordinator and bring the cluster yourself, - # you can equivalently set device filters to split clusters. This is already - # done by distribution strategy's `update_config_proto` method. - new_cluster_spec = normalize_cluster_spec(cluster_spec).as_dict() - if task_type == _TaskType.EVALUATOR: - assert _TaskType.EVALUATOR in new_cluster_spec - new_cluster_spec = { - _TaskType.EVALUATOR: new_cluster_spec[_TaskType.EVALUATOR] - } - else: - new_cluster_spec.pop(_TaskType.EVALUATOR, None) - return normalize_cluster_spec(new_cluster_spec) - - -def _run_std_server(cluster_spec=None, - task_type=None, - task_id=None, - session_config=None, - rpc_layer=None, - environment=None): - """Runs a standard server.""" - # Check if the Server is already running. If so, assert that no configuration - # options have changed, and return the existing Server. This allows us to - # call `run_distribute_coordinator` multiple times. - if getattr(_thread_local, "server", None) is not None: - assert _thread_local.cluster_spec == cluster_spec - assert _thread_local.task_type == task_type - assert _thread_local.task_id == task_id - assert _thread_local.session_config_str == repr(session_config) - assert _thread_local.rpc_layer == rpc_layer - assert _thread_local.environment == environment - return _thread_local.server - else: - # This method is not thread-safe. - _thread_local.server_started = True - _thread_local.cluster_spec = cluster_spec - _thread_local.task_type = task_type - _thread_local.task_id = task_id - _thread_local.session_config_str = repr(session_config) - _thread_local.rpc_layer = rpc_layer - _thread_local.environment = environment - - assert cluster_spec - target = cluster_spec.task_address(task_type, task_id) - if rpc_layer: - target = rpc_layer + "://" + target - - class _FakeServer: - """A fake server that runs a master session.""" - - def start(self): - # A tensorflow server starts when a remote session is created. - logging.info( - "Creating a remote session to start a TensorFlow server, " - "target = %r, session_config=%r", target, session_config) - tf.compat.v1.Session(target=target, config=session_config) - - def join(self): - while True: - time.sleep(5) - - if environment == "google": - server = _FakeServer() - else: - if session_config: - logging.info( - "Starting standard TensorFlow server, target = %r, session_config= " - "%r", target, session_config) + if session_config: + logging.info( + "Starting standard TensorFlow server, target = %r, " + "session_config = %r", + target, + session_config, + ) + else: + logging.info( + "Starting standard TensorFlow server, target = %r", target + ) + cluster_spec = _split_cluster_for_evaluator(cluster_spec, task_type) + server = tf.distribute.Server( + cluster_spec, + job_name=task_type, + task_index=task_id, + config=session_config, + protocol=rpc_layer, + ) + + server.start() + _thread_local.server = server + return server + + +def _configure_session_config_for_std_servers( + strategy, eval_strategy, session_config, cluster_spec, task_type, task_id +): + + """Call strategy's `configure` to mutate the session_config. + + The session_config is currently needed as default config for a TensorFlow + server. In the future, we should be able to remove this method and only pass + the session config to a client session. + """ + if task_type == _TaskType.EVALUATOR: + if eval_strategy: + eval_strategy.configure(session_config=session_config) else: - logging.info("Starting standard TensorFlow server, target = %r", target) - cluster_spec = _split_cluster_for_evaluator(cluster_spec, task_type) - server = tf.distribute.Server( - cluster_spec, - job_name=task_type, - task_index=task_id, - config=session_config, - protocol=rpc_layer) - - server.start() - _thread_local.server = server - return server - - -def _configure_session_config_for_std_servers(strategy, eval_strategy, - session_config, cluster_spec, - task_type, task_id): - # pylint: disable=g-doc-args - """Call strategy's `configure` to mutate the session_config. - - The session_config is currently needed as default config for a TensorFlow - server. In the future, we should be able to remove this method and only pass - the session config to a client session. - """ - if task_type == _TaskType.EVALUATOR: - if eval_strategy: - eval_strategy.configure(session_config=session_config) - else: - # The strategy may be shared in standalone client mode. - strategy = copy.deepcopy(strategy) - strategy.configure( - session_config=session_config, - cluster_spec=cluster_spec, - task_type=task_type, - task_id=task_id) - # Remove the device filters specific to the strategy, so that the - # TensorFlow server brought up with one strategy can be used by other - # strategies. The device filters can be set in the client side as well. - del session_config.device_filters[:] + # The strategy may be shared in standalone client mode. + strategy = copy.deepcopy(strategy) + strategy.configure( + session_config=session_config, + cluster_spec=cluster_spec, + task_type=task_type, + task_id=task_id, + ) + # Remove the device filters specific to the strategy, so that the + # TensorFlow server brought up with one strategy can be used by other + # strategies. The device filters can be set in the client side as well. + del session_config.device_filters[:] # TODO(yuefengz): propagate cluster_spec in the STANDALONE_CLIENT mode. # TODO(yuefengz): we may need a smart way to figure out whether the current task # is the special task when we support cluster_spec propagation. -def run_distribute_coordinator(worker_fn, - strategy, - eval_fn=None, - eval_strategy=None, - cluster_spec=None, - task_type=None, - task_id=None, - session_config=None, - rpc_layer="grpc"): - """Runs the coordinator for distributed TensorFlow. - - This function runs a split coordinator for distributed TensorFlow in its - default mode, i.e the STANDALONE_CLIENT mode. Given a `cluster_spec` - specifying server addresses and their roles in a cluster, this coordinator - will figure out how to set them up, give the underlying function the right - targets for master sessions via a scope object and coordinate their training. - The cluster consisting of standard servers needs to be brought up either with - the standard server binary or with a binary running distribute coordinator - with `task_type` set to non-client type which will then turn into standard - servers. - - In addition to be the distribute coordinator, this is also the source of - configurations for each job in the distributed training. As there are multiple - ways to configure a distributed TensorFlow cluster, its context object - provides these configurations so that users or higher-level APIs don't have to - figure out the configuration for each job by themselves. - - In the between-graph replicated training, this coordinator will create - multiple threads and each calls the `worker_fn` which is supposed to create - its own graph and connect to one worker master given by its context object. In - the in-graph replicated training, it has only one thread calling this - `worker_fn`. - - Another mode is the INDEPENDENT_WORKER mode where each server runs a - distribute coordinator which will start a standard server and optionally runs - `worker_fn` depending whether it is between-graph training or in-graph - replicated training. - - The `strategy` object is expected to be a DistributionStrategy object which - has implemented methods needed by distributed coordinator such as - `configure(session_config, cluster_spec, task_type, task_id)` which configures - the strategy object for a specific task and `experimental_should_init` - property which instructs the distribute coordinator whether to run init ops - for a task. The distribute coordinator will make a copy of the `strategy` - object, call its `configure` method and pass it to `worker_fn` as an argument. - - The `worker_fn` defines the training logic and is called under its own - worker context which can be accessed to via `get_current_worker_context`. A - worker context provides access to configurations for each task, e.g. the - task_type, task_id, master target and so on. Since `worker_fn` will be called - in a thread and possibly multiple times, caller should be careful when it - accesses global data. For example, it is unsafe to define flags in a - `worker_fn` or to define different environment variables for different - `worker_fn`s. - - The `worker_fn` for the between-graph replication is defined as if there is - only one worker corresponding to the `worker_fn` and possibly ps jobs. For - example, when training with parameter servers, it assigns variables to - parameter servers and all other operations to that worker. In the in-graph - replication case, the `worker_fn` has to define operations for all worker - jobs. Using a distribution strategy can simplify the `worker_fn` by not having - to worry about the replication and device assignment of variables and - operations. - - This method is intended to be invoked by high-level APIs so that users don't - have to explicitly call it to run this coordinator. For those who don't use - high-level APIs, to change a program to use this coordinator, wrap everything - in a the program after global data definitions such as commandline flag - definition into the `worker_fn` and get task-specific configurations from - the worker context. - - The `cluster_spec` can be either passed by the argument or parsed from the - "TF_CONFIG" environment variable. Example of a TF_CONFIG: - ``` - cluster = {'chief': ['host0:2222'], - 'ps': ['host1:2222', 'host2:2222'], - 'worker': ['host3:2222', 'host4:2222', 'host5:2222']} - os.environ['TF_CONFIG'] = json.dumps({'cluster': cluster}) - ``` - - If `cluster_spec` is not given in any format, it becomes local training and - this coordinator will connect to a local session. - - For evaluation, if "evaluator" exists in the cluster_spec, a separate thread - will be created to call `eval_fn` with its `task_type` set to "evaluator". If - `eval_fn` is not defined, fall back to `worker_fn`. This implies that - evaluation will be done on a single machine if there is an "evaluator" task. - If "evaluator" doesn't exist in the cluster_spec, it entirely depends on the - `worker_fn` for how to do evaluation. - - Args: - worker_fn: the function to be called. The function should accept a - `strategy` object and will be given access to a context object via a - context manager scope. - strategy: a DistributionStrategy object specifying whether it should run - between-graph replicated training or not, whether to run init ops, etc. - This object will also be configured given `session_config`, - `cluster_spec`, `task_type` and `task_id`. - eval_fn: optional function for "evaluator" task. If `eval_fn` is not passed - in but a "evaluator" task is found in the `cluster_spec`, the `worker_fn` - will be used for this task. - eval_strategy: optional DistributionStrategy object for "evaluator" task. - cluster_spec: a dict, ClusterDef or ClusterSpec specifying servers and roles - in a cluster. If not set or empty, fall back to local training. - task_type: the current task type, optional if this is a client. - task_id: the current task id, optional if this is a client. - session_config: an optional `tf.compat.v1.ConfigProto` object which will be - passed to `strategy`'s `configure` method and used to create a session. - rpc_layer: optional string, the protocol for RPC, e.g. "grpc". - - Raises: - ValueError: if `cluster_spec` is supplied but not a dict or a ClusterDef or - a ClusterSpec. - - Returns: - In the client job, return the value returned by `worker_fn` if - it is in-graph replication or INDEPENDENT_WORKER mode; return None - otherwise. - """ - tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) - rpc_layer = tf_config.get("rpc_layer", rpc_layer) - environment = tf_config.get("environment", None) - - if not cluster_spec: - cluster_spec = tf_config.get("cluster", {}) - task_env = tf_config.get("task", {}) - if task_env: - task_type = task_env.get("type", task_type) - task_id = int(task_env.get("index", task_id)) - - if cluster_spec: - # TODO(yuefengz): validate cluster_spec. - cluster_spec = normalize_cluster_spec(cluster_spec) - elif hasattr(strategy.extended, "_cluster_resolver"): - cluster_resolver = strategy.extended._cluster_resolver # pylint: disable=protected-access - task_type = cluster_resolver.task_type - task_id = cluster_resolver.task_id - rpc_layer = cluster_resolver.rpc_layer or rpc_layer - environment = cluster_resolver.environment - cluster_spec = cluster_resolver.cluster_spec() - - # Setting the session config is necessary for some strategies such as - # CollectiveAllReduceStrategy. - session_config = session_config or tf.compat.v1.ConfigProto( - allow_soft_placement=True) - - if cluster_spec: - logging.info( - "Running Distribute Coordinator with cluster_spec = %r, " - "task_type = %r, task_id = %r, environment = %r, rpc_layer = %r", - cluster_spec.as_dict(), task_type, task_id, environment, rpc_layer) - - if not cluster_spec: - # `mode` is ignored in the local case. - logging.info("Running local Distribute Coordinator.") - _run_single_worker(worker_fn, strategy, None, None, None, session_config, - rpc_layer) - if eval_fn: - _run_single_worker(eval_fn, eval_strategy, None, None, None, - session_config, rpc_layer) - else: - logging.warning("Skipped evaluation since `eval_fn` is not passed in.") - else: - if not eval_fn: - logging.warning("`eval_fn` is not passed in. The `worker_fn` will be " - "used if an \"evaluator\" task exists in the cluster.") - eval_fn = eval_fn or worker_fn - if not eval_strategy: - logging.warning("`eval_strategy` is not passed in. No distribution " - "strategy will be used for evaluation.") - - # Every one starts a standard server, get session config from `configure` - # method. - _configure_session_config_for_std_servers(strategy, eval_strategy, - session_config, cluster_spec, - task_type, task_id) - - if (task_type != _TaskType.EVALUATOR and - not getattr(strategy.extended, "_std_server_started", False)): - # Right now, with eager mode, context is configured with a std server at - # the very beginning while with graph mode the std server is started when - # distribute coordinator is called. We should consolidate these two paths. - server = _run_std_server( - cluster_spec=cluster_spec, - task_type=task_type, - task_id=task_id, - session_config=session_config, - rpc_layer=rpc_layer, - environment=environment) - if task_type in [_TaskType.CHIEF, _TaskType.WORKER]: - if strategy.extended.experimental_between_graph: - # All jobs run `worker_fn` if between-graph. - return _run_single_worker(worker_fn, strategy, cluster_spec, task_type, - task_id, session_config, rpc_layer) - else: - # Only one node runs `worker_fn` if in-graph. - context = _WorkerContext(strategy, cluster_spec, task_type, task_id) - if context.is_chief: - return _run_single_worker(worker_fn, strategy, cluster_spec, None, - None, session_config, rpc_layer) +def run_distribute_coordinator( + worker_fn, + strategy, + eval_fn=None, + eval_strategy=None, + cluster_spec=None, + task_type=None, + task_id=None, + session_config=None, + rpc_layer="grpc", +): + """Runs the coordinator for distributed TensorFlow. + + This function runs a split coordinator for distributed TensorFlow in its + default mode, i.e the STANDALONE_CLIENT mode. Given a `cluster_spec` + specifying server addresses and their roles in a cluster, this coordinator + will figure out how to set them up, give the underlying function the right + targets for master sessions via a scope object and coordinate their + training. The cluster consisting of standard servers needs to be brought up + either with the standard server binary or with a binary running distribute + coordinator with `task_type` set to non-client type which will then turn + into standard servers. + + In addition to be the distribute coordinator, this is also the source of + configurations for each job in the distributed training. As there are + multiple ways to configure a distributed TensorFlow cluster, its context + object provides these configurations so that users or higher-level APIs + don't have to figure out the configuration for each job by themselves. + + In the between-graph replicated training, this coordinator will create + multiple threads and each calls the `worker_fn` which is supposed to create + its own graph and connect to one worker master given by its context object. + In the in-graph replicated training, it has only one thread calling this + `worker_fn`. + + Another mode is the INDEPENDENT_WORKER mode where each server runs a + distribute coordinator which will start a standard server and optionally + runs `worker_fn` depending whether it is between-graph training or in-graph + replicated training. + + The `strategy` object is expected to be a DistributionStrategy object which + has implemented methods needed by distributed coordinator such as + `configure(session_config, cluster_spec, task_type, task_id)` which + configures the strategy object for a specific task and + `experimental_should_init` property which instructs the distribute + coordinator whether to run init ops for a task. The distribute coordinator + will make a copy of the `strategy` object, call its `configure` method and + pass it to `worker_fn` as an argument. + + The `worker_fn` defines the training logic and is called under its own + worker context which can be accessed to via `get_current_worker_context`. A + worker context provides access to configurations for each task, e.g. the + task_type, task_id, master target and so on. Since `worker_fn` will be + called in a thread and possibly multiple times, caller should be careful + when it accesses global data. For example, it is unsafe to define flags in a + `worker_fn` or to define different environment variables for different + `worker_fn`s. + + The `worker_fn` for the between-graph replication is defined as if there is + only one worker corresponding to the `worker_fn` and possibly ps jobs. For + example, when training with parameter servers, it assigns variables to + parameter servers and all other operations to that worker. In the in-graph + replication case, the `worker_fn` has to define operations for all worker + jobs. Using a distribution strategy can simplify the `worker_fn` by not + having to worry about the replication and device assignment of variables and + operations. + + This method is intended to be invoked by high-level APIs so that users don't + have to explicitly call it to run this coordinator. For those who don't use + high-level APIs, to change a program to use this coordinator, wrap + everything in a the program after global data definitions such as + commandline flag definition into the `worker_fn` and get task-specific + configurations from the worker context. + + The `cluster_spec` can be either passed by the argument or parsed from the + "TF_CONFIG" environment variable. Example of a TF_CONFIG: + ``` + cluster = {'chief': ['host0:2222'], + 'ps': ['host1:2222', 'host2:2222'], + 'worker': ['host3:2222', 'host4:2222', 'host5:2222']} + os.environ['TF_CONFIG'] = json.dumps({'cluster': cluster}) + ``` + + If `cluster_spec` is not given in any format, it becomes local training and + this coordinator will connect to a local session. + + For evaluation, if "evaluator" exists in the cluster_spec, a separate thread + will be created to call `eval_fn` with its `task_type` set to "evaluator". + If `eval_fn` is not defined, fall back to `worker_fn`. This implies that + evaluation will be done on a single machine if there is an "evaluator" task. + If "evaluator" doesn't exist in the cluster_spec, it entirely depends on the + `worker_fn` for how to do evaluation. + + Args: + worker_fn: the function to be called. The function should accept a + `strategy` object and will be given access to a context object via a + context manager scope. + strategy: a DistributionStrategy object specifying whether it should run + between-graph replicated training or not, whether to run init ops, etc. + This object will also be configured given `session_config`, + `cluster_spec`, `task_type` and `task_id`. + eval_fn: optional function for "evaluator" task. If `eval_fn` is not + passed in but a "evaluator" task is found in the `cluster_spec`, the + `worker_fn` will be used for this task. + eval_strategy: optional DistributionStrategy object for "evaluator" task. + cluster_spec: a dict, ClusterDef or ClusterSpec specifying servers and + roles in a cluster. If not set or empty, fall back to local training. + task_type: the current task type, optional if this is a client. + task_id: the current task id, optional if this is a client. + session_config: an optional `tf.compat.v1.ConfigProto` object which will + be passed to `strategy`'s `configure` method and used to create a + session. + rpc_layer: optional string, the protocol for RPC, e.g. "grpc". + + Raises: + ValueError: if `cluster_spec` is supplied but not a dict or a ClusterDef + or a ClusterSpec. + + Returns: + In the client job, return the value returned by `worker_fn` if + it is in-graph replication or INDEPENDENT_WORKER mode; return None + otherwise. + """ + tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) + rpc_layer = tf_config.get("rpc_layer", rpc_layer) + environment = tf_config.get("environment", None) + + if not cluster_spec: + cluster_spec = tf_config.get("cluster", {}) + task_env = tf_config.get("task", {}) + if task_env: + task_type = task_env.get("type", task_type) + task_id = int(task_env.get("index", task_id)) + + if cluster_spec: + # TODO(yuefengz): validate cluster_spec. + cluster_spec = normalize_cluster_spec(cluster_spec) + elif hasattr(strategy.extended, "_cluster_resolver"): + cluster_resolver = strategy.extended._cluster_resolver + task_type = cluster_resolver.task_type + task_id = cluster_resolver.task_id + rpc_layer = cluster_resolver.rpc_layer or rpc_layer + environment = cluster_resolver.environment + cluster_spec = cluster_resolver.cluster_spec() + + # Setting the session config is necessary for some strategies such as + # CollectiveAllReduceStrategy. + session_config = session_config or tf.compat.v1.ConfigProto( + allow_soft_placement=True + ) + + if cluster_spec: + logging.info( + "Running Distribute Coordinator with cluster_spec = %r, " + "task_type = %r, task_id = %r, environment = %r, rpc_layer = %r", + cluster_spec.as_dict(), + task_type, + task_id, + environment, + rpc_layer, + ) + + if not cluster_spec: + # `mode` is ignored in the local case. + logging.info("Running local Distribute Coordinator.") + _run_single_worker( + worker_fn, strategy, None, None, None, session_config, rpc_layer + ) + if eval_fn: + _run_single_worker( + eval_fn, + eval_strategy, + None, + None, + None, + session_config, + rpc_layer, + ) else: - server.join() - elif task_type == _TaskType.EVALUATOR: - return _run_single_worker(eval_fn, eval_strategy, cluster_spec, task_type, - task_id, session_config, rpc_layer) + logging.warning( + "Skipped evaluation since `eval_fn` is not passed in." + ) else: - if task_type != _TaskType.PS: - raise ValueError("Unexpected task_type: %r" % task_type) - server.join() + if not eval_fn: + logging.warning( + "`eval_fn` is not passed in. The `worker_fn` will be " + 'used if an "evaluator" task exists in the cluster.' + ) + eval_fn = eval_fn or worker_fn + if not eval_strategy: + logging.warning( + "`eval_strategy` is not passed in. No distribution " + "strategy will be used for evaluation." + ) + + # Every one starts a standard server, get session config from + # `configure` method. + _configure_session_config_for_std_servers( + strategy, + eval_strategy, + session_config, + cluster_spec, + task_type, + task_id, + ) + + if task_type != _TaskType.EVALUATOR and not getattr( + strategy.extended, "_std_server_started", False + ): + # Right now, with eager mode, context is configured with a std + # server at the very beginning while with graph mode the std server + # is started when distribute coordinator is called. We should + # consolidate these two paths. + server = _run_std_server( + cluster_spec=cluster_spec, + task_type=task_type, + task_id=task_id, + session_config=session_config, + rpc_layer=rpc_layer, + environment=environment, + ) + if task_type in [_TaskType.CHIEF, _TaskType.WORKER]: + if strategy.extended.experimental_between_graph: + # All jobs run `worker_fn` if between-graph. + return _run_single_worker( + worker_fn, + strategy, + cluster_spec, + task_type, + task_id, + session_config, + rpc_layer, + ) + else: + # Only one node runs `worker_fn` if in-graph. + context = _WorkerContext( + strategy, cluster_spec, task_type, task_id + ) + if context.is_chief: + return _run_single_worker( + worker_fn, + strategy, + cluster_spec, + None, + None, + session_config, + rpc_layer, + ) + else: + server.join() + elif task_type == _TaskType.EVALUATOR: + return _run_single_worker( + eval_fn, + eval_strategy, + cluster_spec, + task_type, + task_id, + session_config, + rpc_layer, + ) + else: + if task_type != _TaskType.PS: + raise ValueError(f"Unexpected task_type: {task_type!r}") + server.join() def normalize_cluster_spec(cluster_spec): - """Makes `cluster_spec` into a `ClusterSpec` object. - - Args: - cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the - cluster configurations. - - Returns: - a `ClusterSpec` object. - - Raises: - ValueError: if `cluster_spec` is not a dict or a `ClusterSpec` or a - `ClusterDef`. - """ - if isinstance(cluster_spec, (dict, cluster_pb2.ClusterDef)): - return tf.train.ClusterSpec(cluster_spec) - elif not isinstance(cluster_spec, tf.train.ClusterSpec): - raise ValueError( - "`cluster_spec' should be dict or a `tf.train.ClusterSpec` or a " - "`tf.train.ClusterDef` object") - return cluster_spec + """Makes `cluster_spec` into a `ClusterSpec` object. + + Args: + cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the + cluster configurations. + + Returns: + a `ClusterSpec` object. + + Raises: + ValueError: if `cluster_spec` is not a dict or a `ClusterSpec` or a + `ClusterDef`. + """ + if isinstance(cluster_spec, (dict, cluster_pb2.ClusterDef)): + return tf.train.ClusterSpec(cluster_spec) + elif not isinstance(cluster_spec, tf.train.ClusterSpec): + raise ValueError( + "`cluster_spec' should be dict or a `tf.train.ClusterSpec` or a " + "`tf.train.ClusterDef` object" + ) + return cluster_spec diff --git a/keras/distribute/distribute_strategy_test.py b/keras/distribute/distribute_strategy_test.py index fba7cfbbd12e..5931f4cc7636 100644 --- a/keras/distribute/distribute_strategy_test.py +++ b/keras/distribute/distribute_strategy_test.py @@ -14,32 +14,40 @@ # ============================================================================== """Tests for tf.keras models using tf.distribute.Strategy.""" -import tensorflow.compat.v2 as tf - import os -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver from keras import backend -from keras.testing_infra import test_utils from keras.distribute import distributed_training_utils from keras.distribute import distributed_training_utils_v1 from keras.distribute import multi_worker_testing_utils from keras.distribute import optimizer_combinations from keras.distribute.strategy_combinations import all_strategies -from keras.distribute.strategy_combinations import multi_worker_mirrored_strategies -from keras.distribute.strategy_combinations import strategies_minus_default_minus_tpu +from keras.distribute.strategy_combinations import ( + multi_worker_mirrored_strategies, +) +from keras.distribute.strategy_combinations import ( + strategies_minus_default_minus_tpu, +) from keras.distribute.strategy_combinations import strategies_minus_tpu from keras.distribute.strategy_combinations import tpu_strategies from keras.engine import base_layer_utils from keras.mixed_precision import policy -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras +from keras.optimizers import optimizer as optimizer_base +from keras.optimizers.legacy import gradient_descent as gradient_descent_keras +from keras.testing_infra import test_utils from keras.utils import losses_utils from keras.utils import np_utils +# isort: off +from tensorflow.python.distribute.cluster_resolver import ( + SimpleClusterResolver, +) + _RANDOM_SEED = 1337 _TRAIN_SIZE = 200 _INPUT_SIZE = (10,) @@ -53,2631 +61,3008 @@ def simple_sequential_model(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE)) - model.add(keras.layers.Dropout(0.1)) - model.add(keras.layers.Dense(_NUM_CLASS, activation='softmax')) - return model + model = keras.models.Sequential() + model.add( + keras.layers.Dense(16, activation="relu", input_shape=_INPUT_SIZE) + ) + model.add(keras.layers.Dropout(0.1)) + model.add(keras.layers.Dense(_NUM_CLASS, activation="softmax")) + return model def simple_subclassed_model(num_labels=_NUM_CLASS): + class _SimpleMLP(keras.Model): + def __init__(self, num_labels): + super().__init__() + self.dense = keras.layers.Dense(num_labels) - class _SimpleMLP(keras.Model): - - def __init__(self, num_labels): - super().__init__() - self.dense = keras.layers.Dense(num_labels) + def call(self, inputs): + return self.dense(inputs) - def call(self, inputs): - return self.dense(inputs) - - return _SimpleMLP(num_labels) + return _SimpleMLP(num_labels) def simple_multi_inputs_multi_outputs_model(): - input_a = keras.layers.Input(shape=(16,), name='input_a') - input_b = keras.layers.Input(shape=(16,), name='input_b') - - merged = keras.layers.concatenate([input_a, input_b], name='merge') - output_c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) - output_d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged) - model = keras.models.Model( - inputs=[input_a, input_b], outputs=[output_c, output_d]) - return model + input_a = keras.layers.Input(shape=(16,), name="input_a") + input_b = keras.layers.Input(shape=(16,), name="input_b") + + merged = keras.layers.concatenate([input_a, input_b], name="merge") + output_c = keras.layers.Dense(3, activation="softmax", name="dense_2")( + merged + ) + output_d = keras.layers.Dense(2, activation="softmax", name="dense_3")( + merged + ) + model = keras.models.Model( + inputs=[input_a, input_b], outputs=[output_c, output_d] + ) + return model def get_multi_inputs_multi_outputs_data(): - (a_train, c_train), (a_test, c_test) = test_utils.get_test_data( - train_samples=_TRAIN_SIZE, - test_samples=50, - input_shape=(16,), - num_classes=3, - random_seed=_RANDOM_SEED) - (b_train, d_train), (b_test, d_test) = test_utils.get_test_data( - train_samples=_TRAIN_SIZE, - test_samples=50, - input_shape=(16,), - num_classes=2, - random_seed=_RANDOM_SEED) - (m_train, _), (m_test, _) = test_utils.get_test_data( - train_samples=_TRAIN_SIZE, - test_samples=50, - input_shape=(8,), - num_classes=2, - random_seed=_RANDOM_SEED) - - c_train = np_utils.to_categorical(c_train) - c_test = np_utils.to_categorical(c_test) - d_train = np_utils.to_categorical(d_train) - d_test = np_utils.to_categorical(d_test) - - train_data = { - 'input_a': a_train, - 'input_b': b_train, - 'input_m': m_train, - 'output_c': c_train, - 'output_d': d_train - } - test_data = { - 'input_a': a_test, - 'input_b': b_test, - 'input_m': m_test, - 'output_c': c_test, - 'output_d': d_test - } - - return (train_data, test_data) + (a_train, c_train), (a_test, c_test) = test_utils.get_test_data( + train_samples=_TRAIN_SIZE, + test_samples=50, + input_shape=(16,), + num_classes=3, + random_seed=_RANDOM_SEED, + ) + (b_train, d_train), (b_test, d_test) = test_utils.get_test_data( + train_samples=_TRAIN_SIZE, + test_samples=50, + input_shape=(16,), + num_classes=2, + random_seed=_RANDOM_SEED, + ) + (m_train, _), (m_test, _) = test_utils.get_test_data( + train_samples=_TRAIN_SIZE, + test_samples=50, + input_shape=(8,), + num_classes=2, + random_seed=_RANDOM_SEED, + ) + + c_train = np_utils.to_categorical(c_train) + c_test = np_utils.to_categorical(c_test) + d_train = np_utils.to_categorical(d_train) + d_test = np_utils.to_categorical(d_test) + + train_data = { + "input_a": a_train, + "input_b": b_train, + "input_m": m_train, + "output_c": c_train, + "output_d": d_train, + } + test_data = { + "input_a": a_test, + "input_b": b_test, + "input_m": m_test, + "output_c": c_test, + "output_d": d_test, + } + + return (train_data, test_data) def batch_wrapper(dataset, batch_size, distribution, repeat=None): - if repeat: - dataset = dataset.repeat(repeat) - # TPUs currently require fully defined input shapes, drop_remainder ensures - # the input will have fully defined shapes. - if backend.is_tpu_strategy(distribution): - return dataset.batch(batch_size, drop_remainder=True) - else: - return dataset.batch(batch_size) + if repeat: + dataset = dataset.repeat(repeat) + # TPUs currently require fully defined input shapes, drop_remainder ensures + # the input will have fully defined shapes. + if backend.is_tpu_strategy(distribution): + return dataset.batch(batch_size, drop_remainder=True) + else: + return dataset.batch(batch_size) def get_model(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) - return model + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model def get_sample_weights_model(): - x = keras.layers.Input(shape=(1,), name='input') - y = keras.layers.Dense( - 1, kernel_initializer='ones', bias_initializer='zeros', name='dense')( - x) - model = keras.Model(x, y) - return model + x = keras.layers.Input(shape=(1,), name="input") + y = keras.layers.Dense( + 1, kernel_initializer="ones", bias_initializer="zeros", name="dense" + )(x) + model = keras.Model(x, y) + return model def get_dataset(distribution): - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = batch_wrapper(dataset, 10, distribution) - return dataset + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = batch_wrapper(dataset, 10, distribution) + return dataset def get_predict_dataset(distribution): - inputs = np.zeros((10, 3), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices(inputs) - dataset = dataset.repeat(100) - dataset = batch_wrapper(dataset, 10, distribution) - return dataset + inputs = np.zeros((10, 3), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices(inputs) + dataset = dataset.repeat(100) + dataset = batch_wrapper(dataset, 10, distribution) + return dataset def convert_numpy_to_dataset_with_unknown_cardinality(inputs, targets=None): - if targets is not None: - input_slices = (inputs, targets) - dummy_op = (lambda inp, target: True) - else: - input_slices = inputs - dummy_op = (lambda inp: True) + if targets is not None: + input_slices = (inputs, targets) + dummy_op = lambda inp, target: True + else: + input_slices = inputs + dummy_op = lambda inp: True - original_dataset = (tf.data.Dataset.from_tensor_slices(input_slices)) - ds_with_unknown_cardinality = ( - original_dataset.filter(dummy_op).batch(10, drop_remainder=True)) - return ds_with_unknown_cardinality + original_dataset = tf.data.Dataset.from_tensor_slices(input_slices) + ds_with_unknown_cardinality = original_dataset.filter(dummy_op).batch( + 10, drop_remainder=True + ) + return ds_with_unknown_cardinality def multi_input_output_model(): - a = keras.layers.Input(shape=(3,), name='input_a') - b = keras.layers.Input(shape=(5,), name='input_b') - # TODO(anjalisridhar): Change the output dimension of the second Dense layer - # once the iterator output validation issue has been fixed. - dense_1 = keras.layers.Dense(7, name='dense_1') - dense_2 = keras.layers.Dense(7, name='dense_2') - c = dense_1(a) - d = dense_2(b) - e = keras.layers.Dropout(0.5, name='dropout')(c) - model = keras.models.Model([a, b], [d, e]) - return model + a = keras.layers.Input(shape=(3,), name="input_a") + b = keras.layers.Input(shape=(5,), name="input_b") + # TODO(anjalisridhar): Change the output dimension of the second Dense layer + # once the iterator output validation issue has been fixed. + dense_1 = keras.layers.Dense(7, name="dense_1") + dense_2 = keras.layers.Dense(7, name="dense_2") + c = dense_1(a) + d = dense_2(b) + e = keras.layers.Dropout(0.5, name="dropout")(c) + model = keras.models.Model([a, b], [d, e]) + return model def strategy_minus_tpu_combinations(): - return tf.__internal__.test.combinations.combine( - distribution=strategies_minus_tpu, mode=['graph', 'eager']) + return tf.__internal__.test.combinations.combine( + distribution=strategies_minus_tpu, mode=["graph", "eager"] + ) def tpu_strategy_combinations(): - return tf.__internal__.test.combinations.combine( - distribution=tpu_strategies, mode=['graph', 'eager']) + return tf.__internal__.test.combinations.combine( + distribution=tpu_strategies, mode=["graph", "eager"] + ) def tpu_strategy_combinations_graph_only(): - return tf.__internal__.test.combinations.combine(distribution=tpu_strategies, mode=['graph']) + return tf.__internal__.test.combinations.combine( + distribution=tpu_strategies, mode=["graph"] + ) def multi_worker_strategy_combinations_eager_only(): - return tf.__internal__.test.combinations.combine( - distribution=multi_worker_mirrored_strategies, mode=['eager']) + return tf.__internal__.test.combinations.combine( + distribution=multi_worker_mirrored_strategies, mode=["eager"] + ) def all_strategy_combinations(): - return strategy_minus_tpu_combinations() + tpu_strategy_combinations( - ) + multi_worker_strategy_combinations_eager_only() + return ( + strategy_minus_tpu_combinations() + + tpu_strategy_combinations() + + multi_worker_strategy_combinations_eager_only() + ) def all_strategy_minus_default_and_tpu_combinations(): - return tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.one_device_strategy_gpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - ], - mode=['graph', 'eager']) + return tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy, + tf.__internal__.distribute.combinations.one_device_strategy_gpu, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + ], + mode=["graph", "eager"], + ) def all_strategy_combinations_minus_default(): - return (all_strategy_minus_default_and_tpu_combinations() + - tpu_strategy_combinations() + - multi_worker_strategy_combinations_eager_only()) + return ( + all_strategy_minus_default_and_tpu_combinations() + + tpu_strategy_combinations() + + multi_worker_strategy_combinations_eager_only() + ) def strategy_and_optimizer_combinations(): - non_tpu_strategies = tf.__internal__.test.combinations.times( - strategy_minus_tpu_combinations(), - tf.__internal__.test.combinations.combine( - optimizer=[ - optimizer_combinations.adagrad_optimizer_v1_fn, - optimizer_combinations.adam_optimizer_v1_fn, - optimizer_combinations.gradient_descent_optimizer_v1_fn, - optimizer_combinations.rmsprop_optimizer_v1_fn, - optimizer_combinations.adadelta_optimizer_keras_v2_fn, - optimizer_combinations.adagrad_optimizer_keras_v2_fn, - optimizer_combinations.adam_optimizer_keras_v2_fn, - optimizer_combinations.adamax_optimizer_keras_v2_fn, - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, - optimizer_combinations.nadam_optimizer_keras_v2_fn, - optimizer_combinations.rmsprop_optimizer_keras_v2_fn, - optimizer_combinations.ftrl_optimizer_keras_v2_fn - ])) - tpu_strategies_graph = tf.__internal__.test.combinations.combine( - distribution=tpu_strategies, - mode=['graph'], - optimizer=[ - optimizer_combinations.adagrad_optimizer_v1_fn, - optimizer_combinations.adam_optimizer_v1_fn, - optimizer_combinations.gradient_descent_optimizer_v1_fn, - optimizer_combinations.rmsprop_optimizer_v1_fn, - optimizer_combinations.adagrad_optimizer_keras_v2_fn, - optimizer_combinations.adam_optimizer_keras_v2_fn, - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, - optimizer_combinations.rmsprop_optimizer_keras_v2_fn - ]) - tpu_strategies_eager = tf.__internal__.test.combinations.combine( - distribution=tpu_strategies, - mode=['eager'], - optimizer=[ - optimizer_combinations.adagrad_optimizer_keras_v2_fn, - optimizer_combinations.adam_optimizer_keras_v2_fn, - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, - optimizer_combinations.rmsprop_optimizer_keras_v2_fn - ]) - multi_worker_eager = tf.__internal__.test.combinations.combine( - distribution=multi_worker_mirrored_strategies, - mode=['eager'], - optimizer=[ - optimizer_combinations.adadelta_optimizer_keras_v2_fn, - optimizer_combinations.adagrad_optimizer_keras_v2_fn, - optimizer_combinations.adam_optimizer_keras_v2_fn, - optimizer_combinations.adamax_optimizer_keras_v2_fn, - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, - optimizer_combinations.nadam_optimizer_keras_v2_fn, - optimizer_combinations.rmsprop_optimizer_keras_v2_fn, - optimizer_combinations.ftrl_optimizer_keras_v2_fn - ]) - return (non_tpu_strategies + tpu_strategies_eager + tpu_strategies_graph + - multi_worker_eager) + non_tpu_strategies = tf.__internal__.test.combinations.times( + strategy_minus_tpu_combinations(), + tf.__internal__.test.combinations.combine( + optimizer=[ + optimizer_combinations.adagrad_optimizer_v1_fn, + optimizer_combinations.adam_optimizer_v1_fn, + optimizer_combinations.gradient_descent_optimizer_v1_fn, + optimizer_combinations.rmsprop_optimizer_v1_fn, + optimizer_combinations.adadelta_optimizer_keras_v2_fn, + optimizer_combinations.adagrad_optimizer_keras_v2_fn, + optimizer_combinations.adam_optimizer_keras_v2_fn, + optimizer_combinations.adamax_optimizer_keras_v2_fn, + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.nadam_optimizer_keras_v2_fn, + optimizer_combinations.rmsprop_optimizer_keras_v2_fn, + optimizer_combinations.ftrl_optimizer_keras_v2_fn, + ] + ), + ) + tpu_strategies_graph = tf.__internal__.test.combinations.combine( + distribution=tpu_strategies, + mode=["graph"], + optimizer=[ + optimizer_combinations.adagrad_optimizer_v1_fn, + optimizer_combinations.adam_optimizer_v1_fn, + optimizer_combinations.gradient_descent_optimizer_v1_fn, + optimizer_combinations.rmsprop_optimizer_v1_fn, + optimizer_combinations.adagrad_optimizer_keras_v2_fn, + optimizer_combinations.adam_optimizer_keras_v2_fn, + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.rmsprop_optimizer_keras_v2_fn, + ], + ) + tpu_strategies_eager = tf.__internal__.test.combinations.combine( + distribution=tpu_strategies, + mode=["eager"], + optimizer=[ + optimizer_combinations.adagrad_optimizer_keras_v2_fn, + optimizer_combinations.adam_optimizer_keras_v2_fn, + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.rmsprop_optimizer_keras_v2_fn, + ], + ) + multi_worker_eager = tf.__internal__.test.combinations.combine( + distribution=multi_worker_mirrored_strategies, + mode=["eager"], + optimizer=[ + optimizer_combinations.adadelta_optimizer_keras_v2_fn, + optimizer_combinations.adagrad_optimizer_keras_v2_fn, + optimizer_combinations.adam_optimizer_keras_v2_fn, + optimizer_combinations.adamax_optimizer_keras_v2_fn, + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.nadam_optimizer_keras_v2_fn, + optimizer_combinations.rmsprop_optimizer_keras_v2_fn, + optimizer_combinations.ftrl_optimizer_keras_v2_fn, + ], + ) + return ( + non_tpu_strategies + + tpu_strategies_eager + + tpu_strategies_graph + + multi_worker_eager + ) class BatchCountingCB(keras.callbacks.Callback): + def __init__(self): + super().__init__() + self.train_begin_batches = [] + self.train_end_batches = [] + self.test_begin_batches = [] + self.test_end_batches = [] + self.predict_begin_batches = [] + self.predict_end_batches = [] + + def on_train_batch_begin(self, batch, logs=None): + self.train_begin_batches.append(batch) + + def on_train_batch_end(self, batch, logs=None): + self.train_end_batches.append(batch) + + def on_test_batch_begin(self, batch, logs=None): + self.test_begin_batches.append(batch) + + def on_test_batch_end(self, batch, logs=None): + self.test_end_batches.append(batch) + + def on_predict_batch_begin(self, batch, logs=None): + self.predict_begin_batches.append(batch) + + def on_predict_batch_end(self, batch, logs=None): + self.predict_end_batches.append(batch) + + +class TestDistributionStrategyWithNumpyArrays( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calculating_input_params_no_steps_no_batch_size( + self, distribution + ): + # Calculate the per_replica_batch_size scaling factor for strategies + # that use per_core_batch_size + replica_scale_factor = 1.0 + if not distributed_training_utils.global_batch_size_supported( + distribution + ): + replica_scale_factor = distribution.num_replicas_in_sync + + with self.cached_session(): + # Default global batch size 32 for input with 64 samples run in 2 + # steps + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 64, steps=None, batch_size=None + ) + self.assertEqual(batch_size, 32 // replica_scale_factor) + self.assertEqual(steps, 2) + + # Computed global batch size 20 is lower than 32 if we pass less + # samples. + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 20, steps=None, batch_size=None + ) + self.assertEqual(batch_size, 20 // replica_scale_factor) + self.assertEqual(steps, 1) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calculating_input_params_with_steps_no_batch_size( + self, distribution + ): + # Calculate the per_replica_batch_size scaling factor for strategies + # that use per_core_batch_size + replica_scale_factor = 1.0 + if not distributed_training_utils.global_batch_size_supported( + distribution + ): + replica_scale_factor = distribution.num_replicas_in_sync + + with self.cached_session(): + # Computed global batch size is correct for number of specified 1 + # step + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 64, steps=1, batch_size=None + ) + self.assertEqual(batch_size, 64 // replica_scale_factor) + self.assertEqual(steps, 1) + + # Computed global batch size is correct for number of specified 2 + # steps + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 64, steps=2, batch_size=None + ) + self.assertEqual(batch_size, 32 // replica_scale_factor) + self.assertEqual(steps, 2) + + # All samples can not be consumed in specified number of steps + with self.assertRaisesRegex(ValueError, "not divisible by steps"): + distributed_training_utils_v1.get_input_params( + distribution, 63, steps=2, batch_size=None + ) + + # This cases is different for different strategies due to the + # difference in supported batch size being global or per-replica. + if replica_scale_factor == 1: + # Computed global batch size is correct even if not sharadable + ( + steps, + batch_size, + ) = distributed_training_utils_v1.get_input_params( + distribution, 63, steps=3, batch_size=None + ) + self.assertEqual(batch_size, 21) + self.assertEqual(steps, 3) + else: + # Computed global batch size can not be sharded across replicas + with self.assertRaisesRegex( + ValueError, + "could not be sharded evenly across the sync replicas", + ): + distributed_training_utils_v1.get_input_params( + distribution, 63, steps=1, batch_size=None + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calculating_input_params_no_steps_with_batch_size( + self, distribution + ): + # Calculate the per_replica_batch_size scaling factor for strategies + # that use per_core_batch_size + replica_scale_factor = 1.0 + if not distributed_training_utils.global_batch_size_supported( + distribution + ): + replica_scale_factor = distribution.num_replicas_in_sync + + with self.cached_session(): + # Computed steps is correct for specified batch size + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 64, steps=None, batch_size=16 + ) + self.assertEqual(batch_size, 16) + self.assertEqual(steps, 4 // replica_scale_factor) + + # Computed steps is correct for specified batch size + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 64, steps=None, batch_size=32 + ) + self.assertEqual(batch_size, 32) + self.assertEqual(steps, 2 // replica_scale_factor) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calculating_input_params_with_steps_with_batch_size( + self, distribution + ): + with self.cached_session(): + # No change to steps and batch size if both specified and feasible + steps, batch_size = distributed_training_utils_v1.get_input_params( + distribution, 64, steps=5, batch_size=3 + ) + self.assertEqual(batch_size, 3) + self.assertEqual(steps, 5) + + # Number of samples is less than global batch size * steps + with self.assertRaisesRegex( + ValueError, "less than samples required" + ): + distributed_training_utils_v1.get_input_params( + distribution, 64, steps=10, batch_size=13 + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calling_model_with_numpy_arrays(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + metrics = ["mae"] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((64, 3), dtype=np.float32) + targets = np.zeros((64, 4), dtype=np.float32) + + # Call fit with validation data + model.fit( + inputs, + targets, + epochs=1, + batch_size=2, + verbose=0, + validation_data=(inputs, targets), + ) + + # TODO(anjalisridhar): We need tests for when the batch size and + # steps are smaller and results in a 0 batch_size and steps + # value. + model.evaluate(inputs, targets) + model.evaluate(inputs, targets, batch_size=8) + + model.predict(inputs) + model.predict(inputs, batch_size=8) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calling_model_with_mixed_precision(self, distribution): + if isinstance( + distribution, + ( + tf.compat.v1.distribute.experimental.ParameterServerStrategy, + tf.distribute.experimental.ParameterServerStrategy, + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + self.skipTest("b/152097775") + if backend.is_tpu_strategy(distribution): + policy_name = "mixed_bfloat16" + else: + policy_name = "mixed_float16" + with self.cached_session(), distribution.scope(), policy.policy_scope( + policy_name + ): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + y = keras.layers.Activation("softmax", dtype="float32")(y) + model = keras.Model(x, y) + loss = "mse" + metrics = ["mae"] + model.compile(optimizer, loss, metrics=metrics) + + # We need to pass float32 since TPUs do not support float64, even + # though these arrays will immediately be casted to bfloat16 on + # TPUs. We also cannot pass bfloat16, as Numpy does not support it. + inputs = np.zeros((64, 3), dtype="float32") + targets = np.zeros((64, 4), dtype="float32") + + model.fit( + inputs, + targets, + epochs=1, + batch_size=2, + verbose=0, + validation_data=(inputs, targets), + ) + + model.evaluate(inputs, targets) + model.evaluate(inputs, targets, batch_size=8) + + model.predict(inputs) + model.predict(inputs, batch_size=8) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_operator_overload_mixed_precision(self, distribution): + # Regression test that tests a fixed bug does not reoccur. Adding an + # AutoCastVariable to a tensor on a TPU, where the variable was the LHS + # of the '+' operator, used to cause the gradient w.r.t. the variable to + # be None. + if isinstance( + distribution, + ( + tf.compat.v1.distribute.experimental.ParameterServerStrategy, + tf.distribute.experimental.ParameterServerStrategy, + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + self.skipTest("b/152097775") + + if backend.is_tpu_strategy(distribution): + policy_name = "mixed_bfloat16" + else: + policy_name = "mixed_float16" + + class MyLayer(keras.layers.Layer): + def build(self, _): + self.v1 = self.add_weight("v", ()) + self.v2 = self.add_weight("v", ()) + + def call(self, inp): + inp += self.v1 + return self.v2 + inp + + with self.cached_session(), distribution.scope(): + layer = MyLayer(dtype=policy_name) + + def run_fn(): + x = np.array([1.0]) + with tf.GradientTape() as tape: + y = layer(x) + grad_v1, grad_v2 = tape.gradient(y, [layer.v1, layer.v2]) + return grad_v1, grad_v2 + + if tf.executing_eagerly(): + run_fn = tf.function(run_fn) + + grad_v1, grad_v2 = distribution.run(run_fn) + self.assertIsNotNone(grad_v1) + self.assertIsNotNone(grad_v2) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy + ], + mode=["graph", "eager"], + ) + ) + def test_optimizer_in_cross_replica_context_raises_error( + self, distribution + ): + + with self.cached_session(), distribution.scope(): + model = keras.models.Sequential([keras.layers.Dense(1)]) + x = np.array([[1.0]]) + with tf.GradientTape() as tape: + y = model(x) + gradients = tape.gradient(y, model.trainable_variables) + optimizer = gradient_descent_keras.SGD() + + with self.assertRaisesRegex( + RuntimeError, "cannot be called in cross-replica context" + ): + optimizer.apply_gradients( + zip(gradients, model.trainable_variables) + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calling_model_with_nested_numpy_arrays(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + model = multi_input_output_model() + loss = "mse" + model.compile(optimizer, loss) + + input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) + input_b_np = np.asarray(np.random.random((64, 5)), dtype=np.float32) + inputs = [input_a_np, input_b_np] + + output_d_np = np.asarray( + np.random.random((64, 7)), dtype=np.float32 + ) + output_e_np = np.asarray( + np.random.random((64, 7)), dtype=np.float32 + ) + targets = [output_d_np, output_e_np] + + # Call fit with validation data + model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0) + + # TODO(anjalisridhar): We need tests for when the batch size and + # steps are smaller and results in a 0 batch_size and steps value. + model.evaluate(inputs, targets) + model.evaluate(inputs, targets, batch_size=8) + + model.predict(inputs) + model.predict(inputs, batch_size=8) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategies_minus_tpu, mode=["graph", "eager"] + ) + + tf.__internal__.test.combinations.combine( + distribution=multi_worker_mirrored_strategies, mode=["eager"] + ) + ) + def test_numpy_with_sample_weights(self, distribution): + with self.cached_session(), distribution.scope(): + model = get_sample_weights_model() + optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.001) + loss = "mse" + model.compile(optimizer, loss) + + inputs = np.array([[0], [1], [2], [3]], np.float32) + targets = np.array([[2], [4], [6], [8]], np.float32) + sample_weights = np.array([0.25, 0.5, 0.75, 1], np.float32) + + result = model.evaluate( + inputs, + targets, + batch_size=2, + sample_weight=sample_weights, + verbose=1, + ) + + # The per sample loss is multiplied by the corresponding sample + # weight. The average of these weighted losses is the return value + # of the `evaluate` call. For example, in the test above the average + # weighted loss is calculated in the following manner: + + # batch_1 = (((2-0)^2) * 0.25 + ((4-1)^2) * 0.5) / 2 = 5.5 / 2 = + # 2.75 + # batch_2 = (((6-2)^2 * 0.75) + ((8-3)^2 * 1)) / 2 = 37 / 2 = 18.5 + # final result = (batch_1 + batch_2) / 2 = 10.625. + # The first time we divide by number of input samples and the second + # time we divide by number of steps/batches that the loss is + # aggregated over. + self.assertAllClose(result, 10.625) + + # We now test without passing sample_weights: + # batch_1 = ((2-0)^2) + ((4-1)^2) / 2 = 13 / 2 = 6.5 + # batch_2 = ((6-2)^2) + ((8-3)^2) / 2 = 41 / 2 = 20.5 + # final result = (batch_1 + batch_2) / 2 = 27 / 2 = 13.5 + result = model.evaluate(inputs, targets, batch_size=2, verbose=1) + self.assertAllClose(result, 13.5) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_flatten_predict_outputs(self, distribution): + with self.cached_session(): + with distribution.scope(): + model = multi_input_output_model() + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + loss = "mse" + model.compile(optimizer, loss) + + # We take 6 input samples with each input having a dimension of 3 or + # 5. + input_a_np = np.asarray(np.random.random((6, 3)), dtype=np.float32) + input_b_np = np.asarray(np.random.random((6, 5)), dtype=np.float32) + inputs = [input_a_np, input_b_np] + + outs = model.predict(inputs) + # `predict` a list that is equal in length to the number of model + # outputs. In this test our model has two outputs and each element + # of `outs` corresponds to all the samples of one of the model + # outputs. + self.assertLen(outs, 2) + # Each of the output samples have a dimension of 7. We should + # process all the available input samples(6). + self.assertAllEqual([6, 7], outs[0].shape) + self.assertAllEqual([6, 7], outs[1].shape) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tpu_strategy_combinations_graph_only(), + tf.__internal__.test.combinations.combine(batch_size=[4, 6]), + ) + ) + def test_evaluate_with_partial_batch(self, distribution, batch_size): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + + with distribution.scope(): + model_with_ds_strategy = get_model() + model_with_ds_strategy.compile(optimizer, loss, metrics=metrics) + + cpu_model = get_model() + cpu_model.compile(optimizer, loss, metrics=metrics) + + x = np.random.random((10, 3)).astype("float32") + y = np.random.random((10, 4)).astype("float32") + + # As sample size is 10, we batch by 4 so that the last batch is a + # partial batch. Also `evaluate()` using numpy array as inputs + # without distribution strategy uses entire sample as a single + # batch. As so, we remove parameters `batch_size` and `steps`. + cpu_model.set_weights(model_with_ds_strategy.get_weights()) + evaluate_ground_truth = cpu_model.evaluate(x, y) + + # We don't compare the loss as loss is currently not computed as + # metric in Keras, the loss value is inaccurate for last partial + # batch due to more weights for the last batch samples. + steps = np.ceil(10.0 / batch_size) + self.assertAllClose( + model_with_ds_strategy.evaluate( + x, y, batch_size=batch_size, steps=steps + )[1:], + evaluate_ground_truth[1:], + atol=1e-5, + rtol=1e-5, + ) + # Test that `steps` is inferred correctly when final partial batch + # exists. + self.assertAllClose( + model_with_ds_strategy.evaluate(x, y, batch_size=batch_size)[ + 1: + ], + evaluate_ground_truth[1:], + atol=1e-5, + rtol=1e-5, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tpu_strategy_combinations_graph_only() + ) + ) + def test_predict_with_partial_batch(self, distribution): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + + with distribution.scope(): + model_with_ds_strategy = get_model() + model_with_ds_strategy.compile(optimizer, loss) + + cpu_model = get_model() + cpu_model.compile(optimizer, loss) + + inputs = np.random.random((10, 3)).astype(np.float32) + + # As sample size is 10, we batch by 4 so that the last batch is + # a partial batch. Also `predict()` using numpy array as inputs + # without distribution strategy uses entire sample as a single + # batch. As so, we remove parameters `batch_size` and `steps`. + cpu_model.set_weights(model_with_ds_strategy.get_weights()) + predict_ground_truth = cpu_model.predict(inputs) + self.assertAllClose( + model_with_ds_strategy.predict(inputs, batch_size=4, steps=3), + predict_ground_truth, + atol=1e-5, + rtol=1e-5, + ) + # Test that `steps` is inferred correctly when final partial batch + # exists. + self.assertAllClose( + model_with_ds_strategy.predict(inputs, batch_size=4), + predict_ground_truth, + atol=1e-5, + rtol=1e-5, + ) + + @tf.__internal__.distribute.combinations.generate( + tpu_strategy_combinations_graph_only() + ) + def test_no_target_model(self, distribution): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + + class MyLayer(keras.layers.Layer): + def call(self, inputs, training=None): + self.add_loss(tf.reduce_sum(inputs), inputs=True) + return inputs + + with distribution.scope(): + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 16, activation="relu", input_shape=_INPUT_SIZE + ) + ) + model.add(MyLayer()) + model.add(keras.layers.Dense(_NUM_CLASS, activation="softmax")) + + model.compile(optimizer) + inputs = np.zeros((20, 10), np.float32) + + model.fit(inputs, epochs=1, steps_per_epoch=2) + model.predict(inputs, steps=1) + model.evaluate(inputs, steps=1) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tpu_strategy_combinations_graph_only() + ) + ) + def test_predict_multi_output_model_with_partial_batch(self, distribution): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + + with distribution.scope(): + model_with_ds_strategy = ( + simple_multi_inputs_multi_outputs_model() + ) + model_with_ds_strategy.compile(optimizer, loss) + + cpu_model = simple_multi_inputs_multi_outputs_model() + cpu_model.compile(optimizer, loss) + + input_data, _ = get_multi_inputs_multi_outputs_data() + input_dict = { + "input_a": input_data["input_a"], + "input_b": input_data["input_b"], + } + + # As sample size is 200, we batch by 18 so that the last batch is + # a partial batch. Also `fit()` using numpy array as inputs without + # distribution strategy uses entire sample as a single batch. As so, + # we remove parameters `batch_size` and `steps`. + cpu_model.set_weights(model_with_ds_strategy.get_weights()) + self.assertAllClose( + model_with_ds_strategy.predict( + input_dict, batch_size=18, steps=12 + ), + cpu_model.predict(input_dict), + atol=1e-4, + rtol=1e-4, + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_gradients_are_none(self, distribution): + + if not tf.executing_eagerly(): + self.skipTest("None gradients are not supported in graph mode") + + class DenseWithExtraWeight(keras.layers.Dense): + def build(self, input_shape): + # Gradients w.r.t. extra_weights are None + self.extra_weight_1 = self.add_weight( + "extra_weight_1", shape=(), initializer="ones" + ) + super().build(input_shape) + self.extra_weight_2 = self.add_weight( + "extra_weight_2", shape=(), initializer="ones" + ) + + with distribution.scope(): + model = keras.Sequential( + [DenseWithExtraWeight(4, input_shape=(4,))] + ) + model.compile("adam", "mse") + + inputs = np.random.normal(size=(64, 4)) + targets = np.random.normal(size=(64, 4)) + old_kernel = model.get_weights()[1] + model.fit(inputs, targets) + new_kernel = model.get_weights()[1] + self.assertNotAllEqual(old_kernel, new_kernel) + + +class TestDistributionStrategyWithDatasets( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_calling_model_on_same_dataset(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + dataset = get_dataset(distribution) + + # Call fit with validation data + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=dataset, + validation_steps=2, + ) + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=dataset, + validation_steps=2, + ) + model.predict(get_predict_dataset(distribution), steps=2) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_model_interleaved_eval_same_as_direct_eval(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + user_controlled_model = get_model() + user_controlled_model.compile( + optimizer_fn(0.001), + loss="mse", + metrics=["mae", keras.metrics.CategoricalAccuracy()], + ) + + interleaved_model = get_model() + interleaved_model.set_weights( + user_controlled_model.get_weights() + ) + interleaved_model.compile( + optimizer_fn(0.001), + loss="mse", + metrics=["mae", keras.metrics.CategoricalAccuracy()], + ) + + dataset = get_dataset(distribution) + + # Call fit with validation interleaved + interleaved_output = interleaved_model.fit( + dataset, + epochs=2, + steps_per_epoch=2, + verbose=1, + validation_data=dataset, + validation_steps=2, + shuffle=False, + ) + + # Manually control the validation running after each epoch. + user_controlled_output = [] + for _ in range(2): + user_controlled_model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=1, + shuffle=False, + ) + user_controlled_output.append( + user_controlled_model.evaluate(dataset, steps=2) + ) + + self.assertEqual( + interleaved_output.history["val_loss"], + [x[0] for x in user_controlled_output], + ) + val_mean_absolute_error = interleaved_output.history.get( + "val_mean_absolute_error" + ) + if not val_mean_absolute_error: + # The name of the metric changed in TF2.0 + val_mean_absolute_error = interleaved_output.history["val_mae"] + self.assertEqual( + val_mean_absolute_error, [x[1] for x in user_controlled_output] + ) + self.assertEqual( + interleaved_output.history["val_categorical_accuracy"], + [x[2] for x in user_controlled_output], + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + model = multi_input_output_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + input_a_np = np.random.random((10, 3)).astype("float32") + input_b_np = np.random.random((10, 5)).astype("float32") + output_d_np = np.random.random((10, 7)).astype("float32") + output_e_np = np.random.random((10, 7)).astype("float32") + + # Test with tuples + dataset_tuple = tf.data.Dataset.from_tensor_slices( + ((input_a_np, input_b_np), (output_d_np, output_e_np)) + ) + dataset_tuple = dataset_tuple.repeat(100) + dataset_tuple = dataset_tuple.batch(10) + + model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) + + # Test with dict + dataset_dict = tf.data.Dataset.from_tensor_slices( + ( + {"input_a": input_a_np, "input_b": input_b_np}, + (output_d_np, output_e_np), + ) + ) + dataset_dict = dataset_dict.repeat(100) + dataset_dict = dataset_dict.batch(10) + + model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_fit_with_dictionary_in_the_dataset_b135161171(self, distribution): + + if backend.is_tpu_strategy(distribution): + self.skipTest("b/142805125") + + def custom_loss(predict, label, weight): + bce = keras.losses.binary_crossentropy(label, predict) + return tf.reduce_mean(bce * weight) + + with self.cached_session(): + with distribution.scope(): + input_img = keras.layers.Input([64, 64, 3], name="img") + input_lbl = keras.layers.Input([64, 64, 1], name="lbl") + input_weight = keras.layers.Input([64, 64], name="weight") + predict = keras.layers.Conv2D(2, [1, 1], padding="same")( + input_img + ) + loss_lambda = keras.layers.Lambda( + lambda x: custom_loss(*x), name="my_loss" + ) + my_loss = loss_lambda([predict, input_lbl, input_weight]) + model = keras.models.Model( + inputs=[input_img, input_lbl, input_weight], + outputs=[predict, my_loss], + ) + model.add_loss(model.get_layer("my_loss").output) + model.compile(optimizer="adam") + + if tf.executing_eagerly(): + + def map_fn(img, lbl, weight): + inputs = {"img": img, "lbl": lbl, "weight": weight} + return (inputs,) + + else: + + def map_fn(img, lbl, weight): + inputs = {"img": img, "lbl": lbl, "weight": weight} + return inputs, {} + + fake_imgs = np.ones([50, 64, 64, 3], dtype=np.float32) + fake_lbls = np.ones([50, 64, 64, 1], dtype=np.float32) + fake_weights = np.ones([50, 64, 64], dtype=np.float32) + + data = ( + tf.data.Dataset.from_tensor_slices( + (fake_imgs, fake_lbls, fake_weights) + ) + .map(map_fn) + .batch(10) + ) + + model.fit(data) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_fit_eval_and_predict_methods_on_dataset_without_steps( + self, distribution + ): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((1000, 3), dtype=np.float32) + targets = np.zeros((1000, 4), dtype=np.float32) + # steps/steps_per_epoch are calculated when using numpy arrays as + # input data. + fit_with_numpy = model.fit( + inputs, targets, epochs=1, batch_size=10 + ).history + eval_with_numpy = model.evaluate(inputs, targets, batch_size=10) + predict_with_numpy = model.predict(inputs, batch_size=10) + + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.batch(10, drop_remainder=True) + fit_with_ds = model.fit(dataset, epochs=1).history + eval_with_ds = model.evaluate(dataset) + predict_dataset = tf.data.Dataset.from_tensor_slices(inputs) + predict_dataset = predict_dataset.batch(10, drop_remainder=True) + predict_with_ds = model.predict(predict_dataset) + self.assertAllClose( + fit_with_numpy, fit_with_ds, atol=1e-4, rtol=1e-4 + ) + self.assertAllClose( + eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4 + ) + self.assertAllClose( + predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4 + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_predict_on_dataset_with_unknown_cardinality_without_steps( + self, distribution, mode + ): + + if mode == "graph" and backend.is_tpu_strategy(distribution): + self.skipTest("partial batch not supported with TPU in graph mode.") + + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((20, 3), dtype=np.float32) + # steps/steps_per_epoch are calculated when using numpy arrays as + # input data. + predict_with_numpy = model.predict(inputs, batch_size=10) + + predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality( + inputs + ) + + self.assertEqual( + keras.backend.get_value( + tf.data.experimental.cardinality(predict_dataset) + ), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + predict_with_ds = model.predict(predict_dataset) + self.assertAllClose( + predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4 + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_on_dataset_with_unknown_cardinality_without_steps( + self, distribution, mode + ): + # TODO(b/155867206): Investigate why this test occasionally segfaults on + # TPU in eager mode. + if mode == "eager" and backend.is_tpu_strategy(distribution): + self.skipTest("caused segfault with TPU in eager mode.") + + if mode == "graph" and backend.is_tpu_strategy(distribution): + self.skipTest("partial batch not supported with TPU in graph mode.") + + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((100, 3), dtype=np.float32) + targets = np.zeros((100, 4), dtype=np.float32) + # steps/steps_per_epoch are calculated when using numpy arrays as + # input data. + fit_with_numpy = model.fit( + inputs, targets, epochs=1, batch_size=10 + ).history + fit_with_numpy_multiple_epochs = model.fit( + inputs, targets, epochs=2, batch_size=10 + ).history + eval_with_numpy = model.evaluate(inputs, targets, batch_size=10) + predict_with_numpy = model.predict(inputs, batch_size=10) + + dataset = convert_numpy_to_dataset_with_unknown_cardinality( + inputs, targets + ) + predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality( + inputs + ) + + self.assertEqual( + keras.backend.get_value( + tf.data.experimental.cardinality(dataset) + ), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + self.assertEqual( + keras.backend.get_value( + tf.data.experimental.cardinality(predict_dataset) + ), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + eval_with_ds = model.evaluate(dataset) + predict_with_ds = model.predict(predict_dataset) + self.assertAllClose( + eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4 + ) + self.assertAllClose( + predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4 + ) + + fit_with_ds = model.fit(dataset, epochs=1).history + fit_with_ds_multiple_epochs = model.fit(dataset, epochs=2).history + self.assertAllClose( + fit_with_numpy, fit_with_ds, atol=1e-4, rtol=1e-4 + ) + self.assertAllClose( + fit_with_numpy_multiple_epochs, + fit_with_ds_multiple_epochs, + atol=1e-4, + rtol=1e-4, + ) + + @tf.__internal__.distribute.combinations.generate( + tpu_strategy_combinations_graph_only() + ) + def test_on_dataset_with_unknown_cardinality(self, distribution): + with self.cached_session(): + with distribution.scope(): + model = get_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile( + tf.compat.v1.train.GradientDescentOptimizer(0.001), + loss, + metrics=metrics, + ) + + inputs = np.zeros((1000, 3), dtype=np.float32) + targets = np.zeros((1000, 4), dtype=np.float32) + # steps/steps_per_epoch are calculated when using numpy arrays as + # input data. + eval_with_numpy = model.evaluate(inputs, targets, batch_size=10) + predict_with_numpy = model.predict(inputs, batch_size=10) + + dataset = convert_numpy_to_dataset_with_unknown_cardinality( + inputs, targets + ) + predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality( + inputs + ) + + self.assertEqual( + keras.backend.get_value( + tf.data.experimental.cardinality(dataset) + ), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + self.assertEqual( + keras.backend.get_value( + tf.data.experimental.cardinality(predict_dataset) + ), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + eval_with_ds = model.evaluate(dataset, steps=100) + predict_with_ds = model.predict(predict_dataset, steps=100) + self.assertAllClose( + eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4 + ) + self.assertAllClose( + predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4 + ) + + with self.assertRaisesRegex( + ValueError, "Number of steps could not be inferred" + ): + model.fit(dataset, epochs=1) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_fit_eval_and_predict_methods_on_dataset(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + dataset = get_dataset(distribution) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(get_predict_dataset(distribution), steps=2) + + @tf.__internal__.distribute.combinations.generate( + strategy_and_optimizer_combinations() + ) + def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): + with self.cached_session(): + + with distribution.scope(): + + model = get_model() + loss = "mse" + model.compile(optimizer(), loss) + + dataset = get_dataset(distribution) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(get_predict_dataset(distribution), steps=2) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.one_device_strategy, + ], + mode=["graph", "eager"], + ) + ) + def test_dataset_wrong_input_shape(self, distribution, mode): + if mode == "graph": + self.skipTest( + "TODO(b/120943676, b/120957836): Re-enable for graph once the " + "validation code is restored." + ) + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + model = get_model() + loss = "mse" + model.compile(optimizer, loss) + + # Wrong input shape + inputs = np.zeros((10, 5), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + with self.assertRaisesRegex(ValueError, "is incompatible with"): + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def test_dataset_external_batch_input_validation(self, distribution): + with self.cached_session(): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + model = get_model() + loss = "mse" + model.compile(optimizer, loss) + + # Batching is done outside tf.data's `batch` + inputs = np.zeros((100, 10, 3), dtype=np.float32) + targets = np.zeros((100, 10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def test_learning_phase_value(self, distribution): + # TODO(anjalisridhar): Modify this test to use Lambdas since we can + # compare meaningful values. Currently we don't pass the learning phase + # if the Lambda layer uses the learning phase. + with self.cached_session(): + with distribution.scope(): + x = keras.layers.Input(shape=(1,), name="input") + y = keras.layers.Dense(1, kernel_initializer="ones")(x) + z = keras.layers.Dropout(0.9999)(y) + model = keras.Model(x, z) + initial_weights = model.get_weights() + + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.005) + loss = "mse" + metrics = ["acc"] + model.compile(optimizer, loss, metrics=metrics) + + batch_size = 8 + if isinstance( + distribution, + ( + tf.distribute.MirroredStrategy, + tf.compat.v1.distribute.MirroredStrategy, + ), + ): + # MirroredStrategy uses global batch size. + batch_size = 8 * distribution.num_replicas_in_sync + + inputs = np.ones((10, 1), dtype=np.float32) + targets = np.ones((10, 1), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat().batch(batch_size) + hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) + self.assertAlmostEqual(hist.history["acc"][0], 0, 0) + + with distribution.scope(): + model.set_weights(initial_weights) + # TODO(psv/anjalisridhar): Enable these lines after we fix + # b/117431185. evaluate_output = model.evaluate(dataset, steps=20) + # self.assertAlmostEqual(evaluate_output[1], 1, 0) + + inputs = np.ones((10, 1), dtype=np.float32) + predict_dataset = tf.data.Dataset.from_tensor_slices(inputs) + + predict_dataset = predict_dataset.repeat().batch(batch_size) + output = model.predict(predict_dataset, steps=10) + # `predict` runs for 10 steps + ref_output = np.ones((160, 1), dtype=np.float32) + self.assertArrayNear(output, ref_output, 1e-1) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def testOptimizerWithCallbacks(self, distribution): + with self.cached_session(): + with distribution.scope(): + model = get_model() + optimizer = gradient_descent_keras.SGD(0.01) + loss = "mse" + model.compile(optimizer, loss) + + dataset = get_dataset(distribution) + + def schedule(_): + return 0.001 + + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + callbacks=[keras.callbacks.LearningRateScheduler(schedule)], + ) + self.assertAllClose( + 0.001, keras.backend.get_value(model.optimizer.lr) + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tpu_strategy_combinations_graph_only(), + tf.__internal__.test.combinations.combine(batch_size=[4, 6]), + ) + ) + def test_evaluate_with_dataset_with_partial_batch( + self, distribution, batch_size + ): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + + with distribution.scope(): + model_with_ds_strategy = get_model() + model_with_ds_strategy.compile(optimizer, loss, metrics=metrics) + + cpu_model = get_model() + cpu_model.compile(optimizer, loss, metrics=metrics) + + x = np.random.random((10, 3)).astype("float32") + y = np.random.random((10, 4)).astype("float32") + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + + # As sample size is 10, we make the last batch a partial batch. + cpu_model.set_weights(model_with_ds_strategy.get_weights()) + dataset_with_partial_batch = dataset.batch(batch_size) + + # We don't compare the loss as loss is currently not computed as + # metric in Keras, the loss value is inaccurate for last partial + # batch due to more weights for the last batch samples. + steps = np.ceil(10.0 / batch_size) + self.assertAllClose( + model_with_ds_strategy.evaluate( + dataset_with_partial_batch, steps=steps + )[1:], + cpu_model.evaluate(dataset_with_partial_batch, steps=steps)[1:], + atol=1e-5, + rtol=1e-5, + ) + self.assertAllClose( + model_with_ds_strategy.evaluate(dataset_with_partial_batch)[1:], + cpu_model.evaluate(dataset_with_partial_batch)[1:], + atol=1e-5, + rtol=1e-5, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tpu_strategy_combinations_graph_only() + ) + ) + def test_predict_with_dataset_with_partial_batch(self, distribution): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + + with distribution.scope(): + model_with_ds_strategy = get_model() + model_with_ds_strategy.compile(optimizer, loss) + + cpu_model = get_model() + cpu_model.compile(optimizer, loss) + + inputs = np.random.random((10, 3)).astype(np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs)) + + # As sample size is 10, we batch by 4 so that the last batch is + # a partial batch. + dataset_with_partial_batch = dataset.batch(4) + cpu_model.set_weights(model_with_ds_strategy.get_weights()) + + self.assertAllClose( + model_with_ds_strategy.predict( + dataset_with_partial_batch, steps=3 + ), + cpu_model.predict(dataset_with_partial_batch, steps=3), + atol=1e-5, + rtol=1e-5, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tpu_strategy_combinations_graph_only() + ) + ) + def test_predict_multi_output_model_with_dataset_with_partial_batch( + self, distribution + ): + with self.cached_session(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + + with distribution.scope(): + model_with_ds_strategy = ( + simple_multi_inputs_multi_outputs_model() + ) + model_with_ds_strategy.compile(optimizer, loss) + + cpu_model = simple_multi_inputs_multi_outputs_model() + cpu_model.compile(optimizer, loss) + + input_data, _ = get_multi_inputs_multi_outputs_data() + input_dict = { + "input_a": input_data["input_a"], + "input_b": input_data["input_b"], + } + + dataset = tf.data.Dataset.from_tensor_slices(input_dict) + + # As sample size is 200, we batch by 18 using 12 steps per epoch so + # that the last batch is a partial batch. + dataset_with_partial_batch = dataset.batch(18) + cpu_model.set_weights(model_with_ds_strategy.get_weights()) + + self.assertAllClose( + model_with_ds_strategy.predict( + dataset_with_partial_batch, steps=12 + ), + cpu_model.predict(dataset_with_partial_batch, steps=12), + atol=1e-4, + rtol=1e-4, + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations_minus_default() + ) + def test_match_model_input_matches_with_dataset_tensors(self, distribution): + def _create_model_input_output_tensors(): + input_a = keras.layers.Input( + shape=(16,), name="z_input_sorted_last" + ) + input_b = keras.layers.Input( + shape=(32,), name="a_input_sorted_first" + ) + intermediate_a = keras.layers.Dense(10)(input_a) + intermediate_b = keras.layers.Dense(10)(input_b) + merged = keras.layers.Add()([intermediate_a, intermediate_b]) + output = keras.layers.Dense(2)(merged) + return input_a, input_b, output + + input_dict = { + "z_input_sorted_last": np.random.rand(32, 16).astype(np.float32), + "a_input_sorted_first": np.random.rand(32, 32).astype(np.float32), + } + target = np.ones((32, 2), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((input_dict, target)) + dataset = dataset.batch(4, drop_remainder=True) + + with self.cached_session(): + with distribution.scope(): + input_a, input_b, output = _create_model_input_output_tensors() + # `input_a`, which has input name that comes last in + # alphanumeric order, is the first input of the model input + # layers. If tensors from `input_dict` is blindly flattened and + # passed to model inputs incorrectly, this would result in + # `input_a` input layer matching with tensor + # `a_input_sorted_first` and would result in shape mismatch. + model_with_array_input = keras.models.Model( + inputs=[input_a, input_b], outputs=output + ) + model_with_array_input.compile("sgd", "mse") + model_weights = model_with_array_input.get_weights() + model_with_array_input_fit = model_with_array_input.fit( + dataset, steps_per_epoch=1, epochs=1 + ).history + + input_a, input_b, output = _create_model_input_output_tensors() + model_with_dict_input = keras.models.Model( + inputs={ + "z_input_sorted_last": input_a, + "a_input_sorted_first": input_b, + }, + outputs=output, + ) + model_with_dict_input.compile("sgd", "mse") + model_with_dict_input.set_weights(model_weights) + model_with_dict_input_fit = model_with_dict_input.fit( + dataset, steps_per_epoch=1, epochs=1 + ).history + self.assertAllClose( + model_with_dict_input_fit, + model_with_array_input_fit, + atol=1e-4, + rtol=1e-4, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategies_minus_tpu, mode=["graph", "eager"] + ) + + tf.__internal__.test.combinations.combine( + distribution=multi_worker_mirrored_strategies, mode=["eager"] + ) + ) + def test_dataset_with_sample_weights(self, distribution): + with self.cached_session(), distribution.scope(): + model = get_sample_weights_model() + optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.001) + loss = "mse" + model.compile(optimizer, loss) + + inputs = np.array([[0], [1], [2], [3]], np.float32) + targets = np.array([[2], [4], [6], [8]], np.float32) + sample_weights = np.array([0.25, 0.5, 0.75, 1], np.float32) + ds = tf.data.Dataset.from_tensor_slices( + (inputs, targets, sample_weights) + ).batch(2) + result = model.evaluate(ds, verbose=1) + + # The per sample loss is multiplied by the corresponding sample + # weight. The average of these weighted losses is the return value + # of the `evaluate` call. For example, in the test above the average + # weighted loss is calculated in the following manner: + # batch_1 = (((2-0)^2) * 0.25 + ((4-1)^2) * 0.5) / 2 = 5.5 / 2 = + # 2.75 + # batch_2 = (((6-2)^2 * 0.75) + ((8-3)^2 * 1)) / 2 = 37 / 2 = 18.5 + # final result = (batch_1 + batch_2) / 2 = 10.625. + # The first time we divide by number of input samples and the second + # time we divide by number of steps/batches that the loss is + # aggregated over. + self.assertAllClose(result, 10.625) + + # We now test without passing sample_weights: + # batch_1 = ((2-0)^2) + ((4-1)^2) / 2 = 13 / 2 = 6.5 + # batch_2 = ((6-2)^2) + ((8-3)^2) / 2 = 41 / 2 = 20.5 + # final result = (batch_1 + batch_2) / 2 = 27 / 2 = 13.5 + ds = tf.data.Dataset.from_tensor_slices((inputs, targets)).batch(2) + result = model.evaluate(ds, verbose=1) + self.assertAllClose(result, 13.5) + + +class TestDistributionStrategyWithDatasetsFile( + tf.test.TestCase, parameterized.TestCase +): + def setUp(self): + super().setUp() + self.input_file_name = os.path.join( + self.get_temp_dir(), "input.tfrecord" + ) + inputs = np.zeros((20, 3), dtype=np.float32) + input_dataset = tf.data.Dataset.from_tensor_slices(inputs) + input_dataset = input_dataset.map(tf.io.serialize_tensor) + writer = tf.data.experimental.TFRecordWriter(self.input_file_name) + writer.write(input_dataset) + + # TODO(wxinyi): add a multi-worker test for TPU + @tf.__internal__.distribute.combinations.generate( + multi_worker_strategy_combinations_eager_only() + ) + def test_predict_on_dataset_shard_options_file_multi_worker_mirrored( + self, distribution, mode + ): + # This test is to verify if we successfully switch auto_shard_policy of + # a input dataset inside model.predict with MultiWorkerMirroredStrategy + # to AutoShardPolicy.DATA. Since there is only one input file for + # multiple workers, AutoShardPolicy.AUTO or AutoShardPolicy.FILE will + # lead to an error. However, since we switch to AutoShardPolicy.DATA in + # model.predict, no error is raised. + del mode + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = get_model() + loss = "mse" + model.compile(optimizer, loss) + + dataset = tf.data.TFRecordDataset(self.input_file_name) + dataset = dataset.map(lambda x: tf.io.parse_tensor(x, tf.float32)) + + dummy_op = lambda inp: True + + dataset = dataset.filter(dummy_op).batch(8, drop_remainder=True) + + options = tf.data.Options() + options.experimental_distribute.auto_shard_policy = ( + tf.data.experimental.AutoShardPolicy.FILE + ) + dataset = dataset.with_options(options) + + model.predict(dataset, steps=1) - def __init__(self): - super().__init__() - self.train_begin_batches = [] - self.train_end_batches = [] - self.test_begin_batches = [] - self.test_end_batches = [] - self.predict_begin_batches = [] - self.predict_end_batches = [] - - def on_train_batch_begin(self, batch, logs=None): - self.train_begin_batches.append(batch) - - def on_train_batch_end(self, batch, logs=None): - self.train_end_batches.append(batch) - - def on_test_batch_begin(self, batch, logs=None): - self.test_begin_batches.append(batch) - - def on_test_batch_end(self, batch, logs=None): - self.test_end_batches.append(batch) - - def on_predict_batch_begin(self, batch, logs=None): - self.predict_begin_batches.append(batch) - - def on_predict_batch_end(self, batch, logs=None): - self.predict_end_batches.append(batch) - - -class TestDistributionStrategyWithNumpyArrays(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calculating_input_params_no_steps_no_batch_size(self, distribution): - # Calculate the per_replica_batch_size scaling factor for strategies - # that use per_core_batch_size - replica_scale_factor = 1.0 - if not distributed_training_utils.global_batch_size_supported(distribution): - replica_scale_factor = distribution.num_replicas_in_sync - - with self.cached_session(): - # Default global batch size 32 for input with 64 samples run in 2 steps - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 64, steps=None, batch_size=None) - self.assertEqual(batch_size, 32 // replica_scale_factor) - self.assertEqual(steps, 2) - - # Computed global batch size 20 is lower than 32 if we pass less samples. - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 20, steps=None, batch_size=None) - self.assertEqual(batch_size, 20 // replica_scale_factor) - self.assertEqual(steps, 1) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calculating_input_params_with_steps_no_batch_size( - self, distribution): - # Calculate the per_replica_batch_size scaling factor for strategies - # that use per_core_batch_size - replica_scale_factor = 1.0 - if not distributed_training_utils.global_batch_size_supported(distribution): - replica_scale_factor = distribution.num_replicas_in_sync - - with self.cached_session(): - # Computed global batch size is correct for number of specified 1 step - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 64, steps=1, batch_size=None) - self.assertEqual(batch_size, 64 // replica_scale_factor) - self.assertEqual(steps, 1) - - # Computed global batch size is correct for number of specified 2 steps - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 64, steps=2, batch_size=None) - self.assertEqual(batch_size, 32 // replica_scale_factor) - self.assertEqual(steps, 2) - - # All samples can not be consumed in specified number of steps - with self.assertRaisesRegex(ValueError, 'not divisible by steps'): - distributed_training_utils_v1.get_input_params( - distribution, 63, steps=2, batch_size=None) - - # This cases is different for different strategies due to the - # difference in supported batch size being global or per-replica. - if replica_scale_factor == 1: - # Computed global batch size is correct even if not sharadable - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 63, steps=3, batch_size=None) - self.assertEqual(batch_size, 21) - self.assertEqual(steps, 3) - else: - # Computed global batch size can not be sharded across replicas - with self.assertRaisesRegex( - ValueError, 'could not be sharded evenly ' - 'across the sync replicas'): - distributed_training_utils_v1.get_input_params( - distribution, 63, steps=1, batch_size=None) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calculating_input_params_no_steps_with_batch_size( - self, distribution): - # Calculate the per_replica_batch_size scaling factor for strategies - # that use per_core_batch_size - replica_scale_factor = 1.0 - if not distributed_training_utils.global_batch_size_supported(distribution): - replica_scale_factor = distribution.num_replicas_in_sync - - with self.cached_session(): - # Computed steps is correct for specified batch size - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 64, steps=None, batch_size=16) - self.assertEqual(batch_size, 16) - self.assertEqual(steps, 4 // replica_scale_factor) - - # Computed steps is correct for specified batch size - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 64, steps=None, batch_size=32) - self.assertEqual(batch_size, 32) - self.assertEqual(steps, 2 // replica_scale_factor) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calculating_input_params_with_steps_with_batch_size( - self, distribution): - with self.cached_session(): - # No change to steps and batch size if both specified and feasible - steps, batch_size = distributed_training_utils_v1.get_input_params( - distribution, 64, steps=5, batch_size=3) - self.assertEqual(batch_size, 3) - self.assertEqual(steps, 5) - - # Number of samples is less than global batch size * steps - with self.assertRaisesRegex(ValueError, 'less than samples required'): - distributed_training_utils_v1.get_input_params( - distribution, 64, steps=10, batch_size=13) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calling_model_with_numpy_arrays(self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - metrics = ['mae'] - model.compile( - optimizer, - loss, - metrics=metrics) - - inputs = np.zeros((64, 3), dtype=np.float32) - targets = np.zeros((64, 4), dtype=np.float32) - - # Call fit with validation data - model.fit( - inputs, - targets, - epochs=1, - batch_size=2, - verbose=0, - validation_data=(inputs, targets)) - - # TODO(anjalisridhar): We need tests for when the batch size and steps - # are smaller and results in a 0 batch_size and steps value. - model.evaluate(inputs, targets) - model.evaluate(inputs, targets, batch_size=8) - - model.predict(inputs) - model.predict(inputs, batch_size=8) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calling_model_with_mixed_precision(self, distribution): - if isinstance(distribution, - (tf.compat.v1.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - self.skipTest('b/152097775') - if backend.is_tpu_strategy(distribution): - policy_name = 'mixed_bfloat16' - else: - policy_name = 'mixed_float16' - with self.cached_session(), \ - distribution.scope(), \ - policy.policy_scope(policy_name): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - y = keras.layers.Activation('softmax', dtype='float32')(y) - model = keras.Model(x, y) - loss = 'mse' - metrics = ['mae'] - model.compile( - optimizer, - loss, - metrics=metrics) - - # We need to pass float32 since TPUs do not support float64, even though - # these arrays will immediately be casted to bfloat16 on TPUs. We also - # cannot pass bfloat16, as Numpy does not support it. - inputs = np.zeros((64, 3), dtype='float32') - targets = np.zeros((64, 4), dtype='float32') - - model.fit( - inputs, - targets, - epochs=1, - batch_size=2, - verbose=0, - validation_data=(inputs, targets)) - - model.evaluate(inputs, targets) - model.evaluate(inputs, targets, batch_size=8) - - model.predict(inputs) - model.predict(inputs, batch_size=8) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_operator_overload_mixed_precision(self, distribution): - # Regression test that tests a fixed bug does not reoccur. Adding an - # AutoCastVariable to a tensor on a TPU, where the variable was the LHS of - # the '+' operator, used to cause the gradient w.r.t. the variable to be - # None. - if isinstance(distribution, - (tf.compat.v1.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - self.skipTest('b/152097775') - - if backend.is_tpu_strategy(distribution): - policy_name = 'mixed_bfloat16' - else: - policy_name = 'mixed_float16' - - class MyLayer(keras.layers.Layer): - - def build(self, _): - self.v1 = self.add_weight('v', ()) - self.v2 = self.add_weight('v', ()) - - def call(self, inp): - inp += self.v1 - return self.v2 + inp - - with self.cached_session(), distribution.scope(): - layer = MyLayer(dtype=policy_name) - def run_fn(): - x = np.array([1.]) - with tf.GradientTape() as tape: - y = layer(x) - grad_v1, grad_v2 = tape.gradient(y, [layer.v1, layer.v2]) - return grad_v1, grad_v2 - if tf.executing_eagerly(): - run_fn = tf.function(run_fn) - - grad_v1, grad_v2 = distribution.run(run_fn) - self.assertIsNotNone(grad_v1) - self.assertIsNotNone(grad_v2) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.one_device_strategy], - mode=['graph', 'eager'])) - def test_optimizer_in_cross_replica_context_raises_error(self, distribution): - - with self.cached_session(), distribution.scope(): - model = keras.models.Sequential([keras.layers.Dense(1)]) - x = np.array([[1.]]) - with tf.GradientTape() as tape: - y = model(x) - gradients = tape.gradient(y, model.trainable_variables) - optimizer = gradient_descent_keras.SGD() - - with self.assertRaisesRegex(RuntimeError, - 'cannot be called in cross-replica context'): - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calling_model_with_nested_numpy_arrays(self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - model = multi_input_output_model() - loss = 'mse' - model.compile( - optimizer, - loss) - - input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) - input_b_np = np.asarray(np.random.random((64, 5)), dtype=np.float32) - inputs = [input_a_np, input_b_np] - - output_d_np = np.asarray(np.random.random((64, 7)), dtype=np.float32) - output_e_np = np.asarray(np.random.random((64, 7)), dtype=np.float32) - targets = [output_d_np, output_e_np] - - # Call fit with validation data - model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0) - - # TODO(anjalisridhar): We need tests for when the batch size and steps are - # smaller and results in a 0 batch_size and steps value. - model.evaluate(inputs, targets) - model.evaluate(inputs, targets, batch_size=8) - - model.predict(inputs) - model.predict(inputs, batch_size=8) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategies_minus_tpu, mode=['graph', 'eager']) + - tf.__internal__.test.combinations.combine( - distribution=multi_worker_mirrored_strategies, mode=['eager'])) - def test_numpy_with_sample_weights(self, distribution): - with self.cached_session(), distribution.scope(): - model = get_sample_weights_model() - optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - model.compile( - optimizer, - loss) - - inputs = np.array([[0], [1], [2], [3]], np.float32) - targets = np.array([[2], [4], [6], [8]], np.float32) - sample_weights = np.array([0.25, 0.5, 0.75, 1], np.float32) - - result = model.evaluate( - inputs, - targets, - batch_size=2, - sample_weight=sample_weights, - verbose=1) - - # The per sample loss is multiplied by the corresponding sample weight. - # The average of these weighted losses is the return value of the - # `evaluate` call. For example, in the test above the average weighted - # loss is calculated in the following manner: - - # batch_1 = (((2-0)^2) * 0.25 + ((4-1)^2) * 0.5) / 2 = 5.5 / 2 = 2.75 - # batch_2 = (((6-2)^2 * 0.75) + ((8-3)^2 * 1)) / 2 = 37 / 2 = 18.5 - # final result = (batch_1 + batch_2) / 2 = 10.625. - # The first time we divide by number of input samples and the second time - # we divide by number of steps/batches that the loss is aggregated over. - self.assertAllClose(result, 10.625) - - # We now test without passing sample_weights: - # batch_1 = ((2-0)^2) + ((4-1)^2) / 2 = 13 / 2 = 6.5 - # batch_2 = ((6-2)^2) + ((8-3)^2) / 2 = 41 / 2 = 20.5 - # final result = (batch_1 + batch_2) / 2 = 27 / 2 = 13.5 - result = model.evaluate(inputs, targets, batch_size=2, verbose=1) - self.assertAllClose(result, 13.5) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_flatten_predict_outputs(self, distribution): - with self.cached_session(): - with distribution.scope(): - model = multi_input_output_model() - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - loss = 'mse' - model.compile( - optimizer, - loss) - - # We take 6 input samples with each input having a dimension of 3 or 5. - input_a_np = np.asarray(np.random.random((6, 3)), dtype=np.float32) - input_b_np = np.asarray(np.random.random((6, 5)), dtype=np.float32) - inputs = [input_a_np, input_b_np] - - outs = model.predict(inputs) - # `predict` a list that is equal in length to the number of model outputs. - # In this test our model has two outputs and each element of `outs` - # corresponds to all the samples of one of the model outputs. - self.assertLen(outs, 2) - # Each of the output samples have a dimension of 7. We should process all - # the available input samples(6). - self.assertAllEqual([6, 7], outs[0].shape) - self.assertAllEqual([6, 7], outs[1].shape) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(tpu_strategy_combinations_graph_only(), - tf.__internal__.test.combinations.combine(batch_size=[4, 6]))) - def test_evaluate_with_partial_batch(self, distribution, batch_size): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - - with distribution.scope(): - model_with_ds_strategy = get_model() - model_with_ds_strategy.compile(optimizer, loss, metrics=metrics) - - cpu_model = get_model() - cpu_model.compile(optimizer, loss, metrics=metrics) - - x = np.random.random((10, 3)).astype('float32') - y = np.random.random((10, 4)).astype('float32') - - # As sample size is 10, we batch by 4 so that the last batch is - # a partial batch. Also `evaluate()` using numpy array as inputs without - # distribution strategy uses entire sample as a single batch. As so, - # we remove parameters `batch_size` and `steps`. - cpu_model.set_weights(model_with_ds_strategy.get_weights()) - evaluate_ground_truth = cpu_model.evaluate(x, y) - - # We don't compare the loss as loss is currently not computed as metric - # in Keras, the loss value is inaccurate for last partial batch due to - # more weights for the last batch samples. - steps = np.ceil(10.0 / batch_size) - self.assertAllClose( - model_with_ds_strategy.evaluate( - x, y, batch_size=batch_size, steps=steps)[1:], - evaluate_ground_truth[1:], - atol=1e-5, - rtol=1e-5) - # Test that `steps` is inferred correctly when final partial batch exists. - self.assertAllClose( - model_with_ds_strategy.evaluate(x, y, batch_size=batch_size)[1:], - evaluate_ground_truth[1:], - atol=1e-5, - rtol=1e-5) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tpu_strategy_combinations_graph_only())) - def test_predict_with_partial_batch(self, distribution): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - - with distribution.scope(): - model_with_ds_strategy = get_model() - model_with_ds_strategy.compile( - optimizer, - loss) - - cpu_model = get_model() - cpu_model.compile(optimizer, loss) - - inputs = np.random.random((10, 3)).astype(np.float32) - - # As sample size is 10, we batch by 4 so that the last batch is - # a partial batch. Also `predict()` using numpy array as inputs without - # distribution strategy uses entire sample as a single batch. As so, - # we remove parameters `batch_size` and `steps`. - cpu_model.set_weights(model_with_ds_strategy.get_weights()) - predict_ground_truth = cpu_model.predict(inputs) - self.assertAllClose( - model_with_ds_strategy.predict(inputs, batch_size=4, steps=3), - predict_ground_truth, - atol=1e-5, - rtol=1e-5) - # Test that `steps` is inferred correctly when final partial batch exists. - self.assertAllClose( - model_with_ds_strategy.predict(inputs, batch_size=4), - predict_ground_truth, - atol=1e-5, - rtol=1e-5) - - @tf.__internal__.distribute.combinations.generate(tpu_strategy_combinations_graph_only()) - def test_no_target_model(self, distribution): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - - class MyLayer(keras.layers.Layer): - - def call(self, inputs, training=None): - self.add_loss(tf.reduce_sum(inputs), inputs=True) - return inputs - - with distribution.scope(): - model = keras.models.Sequential() - model.add( - keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE)) - model.add(MyLayer()) - model.add(keras.layers.Dense(_NUM_CLASS, activation='softmax')) - - model.compile(optimizer) - inputs = np.zeros((20, 10), np.float32) - - model.fit(inputs, epochs=1, steps_per_epoch=2) - model.predict(inputs, steps=1) - model.evaluate(inputs, steps=1) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tpu_strategy_combinations_graph_only())) - def test_predict_multi_output_model_with_partial_batch( - self, distribution): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - - with distribution.scope(): - model_with_ds_strategy = simple_multi_inputs_multi_outputs_model() - model_with_ds_strategy.compile( - optimizer, - loss) - - cpu_model = simple_multi_inputs_multi_outputs_model() - cpu_model.compile(optimizer, loss) - - input_data, _ = get_multi_inputs_multi_outputs_data() - input_dict = { - 'input_a': input_data['input_a'], - 'input_b': input_data['input_b'], - } - - # As sample size is 200, we batch by 18 so that the last batch is - # a partial batch. Also `fit()` using numpy array as inputs without - # distribution strategy uses entire sample as a single batch. As so, - # we remove parameters `batch_size` and `steps`. - cpu_model.set_weights(model_with_ds_strategy.get_weights()) - self.assertAllClose( - model_with_ds_strategy.predict(input_dict, batch_size=18, steps=12), - cpu_model.predict(input_dict), - atol=1e-4, - rtol=1e-4) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_gradients_are_none(self, distribution): - - if not tf.executing_eagerly(): - self.skipTest('None gradients are not supported in graph mode') - - class DenseWithExtraWeight(keras.layers.Dense): - - def build(self, input_shape): - # Gradients w.r.t. extra_weights are None - self.extra_weight_1 = self.add_weight('extra_weight_1', shape=(), - initializer='ones') - super().build(input_shape) - self.extra_weight_2 = self.add_weight('extra_weight_2', shape=(), - initializer='ones') - - with distribution.scope(): - model = keras.Sequential([DenseWithExtraWeight(4, input_shape=(4,))]) - model.compile('adam', 'mse') - - inputs = np.random.normal(size=(64, 4)) - targets = np.random.normal(size=(64, 4)) - old_kernel = model.get_weights()[1] - model.fit(inputs, targets) - new_kernel = model.get_weights()[1] - self.assertNotAllEqual(old_kernel, new_kernel) - - -class TestDistributionStrategyWithDatasets(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_calling_model_on_same_dataset(self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics) - - dataset = get_dataset(distribution) - - # Call fit with validation data - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_data=dataset, - validation_steps=2) - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_data=dataset, - validation_steps=2) - model.predict(get_predict_dataset(distribution), steps=2) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_model_interleaved_eval_same_as_direct_eval( - self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - user_controlled_model = get_model() - user_controlled_model.compile( - optimizer_fn(0.001), - loss='mse', - metrics=['mae', keras.metrics.CategoricalAccuracy()]) - - interleaved_model = get_model() - interleaved_model.set_weights(user_controlled_model.get_weights()) - interleaved_model.compile( - optimizer_fn(0.001), - loss='mse', - metrics=['mae', keras.metrics.CategoricalAccuracy()]) - - dataset = get_dataset(distribution) - - # Call fit with validation interleaved - interleaved_output = interleaved_model.fit( - dataset, - epochs=2, - steps_per_epoch=2, - verbose=1, - validation_data=dataset, - validation_steps=2, - shuffle=False) - - # Manually control the validation running after each epoch. - user_controlled_output = [] - for _ in range(2): - user_controlled_model.fit( - dataset, epochs=1, steps_per_epoch=2, verbose=1, shuffle=False) - user_controlled_output.append( - user_controlled_model.evaluate(dataset, steps=2)) - - self.assertEqual(interleaved_output.history['val_loss'], - [x[0] for x in user_controlled_output]) - val_mean_absolute_error = interleaved_output.history.get( - 'val_mean_absolute_error') - if not val_mean_absolute_error: - # The name of the metric changed in TF2.0 - val_mean_absolute_error = interleaved_output.history['val_mae'] - self.assertEqual(val_mean_absolute_error, - [x[1] for x in user_controlled_output]) - self.assertEqual(interleaved_output.history['val_categorical_accuracy'], - [x[2] for x in user_controlled_output]) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - model = multi_input_output_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics) - - input_a_np = np.random.random((10, 3)).astype('float32') - input_b_np = np.random.random((10, 5)).astype('float32') - output_d_np = np.random.random((10, 7)).astype('float32') - output_e_np = np.random.random((10, 7)).astype('float32') - - # Test with tuples - dataset_tuple = tf.data.Dataset.from_tensor_slices( - ((input_a_np, input_b_np), (output_d_np, output_e_np))) - dataset_tuple = dataset_tuple.repeat(100) - dataset_tuple = dataset_tuple.batch(10) - - model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) - # Test with dict - dataset_dict = tf.data.Dataset.from_tensor_slices(({ - 'input_a': input_a_np, - 'input_b': input_b_np - }, (output_d_np, output_e_np))) - dataset_dict = dataset_dict.repeat(100) - dataset_dict = dataset_dict.batch(10) - - model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_fit_with_dictionary_in_the_dataset_b135161171( - self, distribution): +class TestRegularizerLoss(tf.test.TestCase, parameterized.TestCase): + class IdentityRegularizer(keras.regularizers.Regularizer): + def __call__(self, x): + return tf.identity(x) + + class AddLayer(keras.layers.Layer): + def build(self, _): + self.v = self.add_weight( + "v", + (), + initializer="ones", + regularizer=TestRegularizerLoss.IdentityRegularizer(), + ) + + def call(self, inputs): + return inputs + self.v + + @staticmethod + def loss_fn(_, y_pred): + return tf.reduce_mean(y_pred) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + all_strategy_combinations_minus_default() + ) + ) + def test_regularizer_loss(self, distribution): + batch_size = 2 + if not distributed_training_utils.global_batch_size_supported( + distribution + ): + batch_size //= distribution.num_replicas_in_sync + + # Given an input x, which is always 1, and variable v, this model + # computes Loss=x+v+regularizer_loss, where regularizer_loss=v and + # the variable is initialized to 1. Therefore, this model computes + # Loss=1+2v, and so the gradient dLoss/dv = 2. This gradient of 2 is + # averaged over all examples in a batch and then multiplied by the + # learning rate of 1. As a result, the model update for one batch + # should subtract 2 from v, resulting in v being -1. If the + # regularizer loss is not scaled correctly by number of replicas, + # the variable value will be incorrect when number of replicas >1. + # For e.g. it will be -2 if num replicas = 2. + with distribution.scope(): + x = keras.layers.Input(shape=(1,), batch_size=batch_size) + y = TestRegularizerLoss.AddLayer()(x) + model = keras.models.Model(inputs=x, outputs=y) + opt = gradient_descent_keras.SGD(1.0) + model.compile(opt, loss=TestRegularizerLoss.loss_fn) + model.fit( + x=np.array([[1.0], [1.0]], dtype=np.float32), + y=np.array([[1.0], [1.0]], dtype=np.float32), + batch_size=batch_size, + ) + v = model.get_weights()[0] + self.assertEqual(-1.0, v) - if backend.is_tpu_strategy(distribution): - self.skipTest('b/142805125') - - def custom_loss(predict, label, weight): - bce = keras.losses.binary_crossentropy(label, predict) - return tf.reduce_mean(bce * weight) - - with self.cached_session(): - with distribution.scope(): - input_img = keras.layers.Input([64, 64, 3], name='img') - input_lbl = keras.layers.Input([64, 64, 1], name='lbl') - input_weight = keras.layers.Input([64, 64], name='weight') - predict = keras.layers.Conv2D(2, [1, 1], padding='same')(input_img) - loss_lambda = keras.layers.Lambda( - lambda x: custom_loss(*x), name='my_loss') - my_loss = loss_lambda([predict, input_lbl, input_weight]) - model = keras.models.Model( - inputs=[input_img, input_lbl, input_weight], - outputs=[predict, my_loss]) - model.add_loss(model.get_layer('my_loss').output) - model.compile( - optimizer='adam') - - if tf.executing_eagerly(): - - def map_fn(img, lbl, weight): - inputs = {'img': img, 'lbl': lbl, 'weight': weight} - return (inputs,) - else: - - def map_fn(img, lbl, weight): - inputs = {'img': img, 'lbl': lbl, 'weight': weight} - return inputs, {} - - fake_imgs = np.ones([50, 64, 64, 3], dtype=np.float32) - fake_lbls = np.ones([50, 64, 64, 1], dtype=np.float32) - fake_weights = np.ones([50, 64, 64], dtype=np.float32) - - data = tf.data.Dataset.from_tensor_slices( - (fake_imgs, fake_lbls, fake_weights)).map(map_fn).batch(10) - - model.fit(data) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_fit_eval_and_predict_methods_on_dataset_without_steps( - self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics) - - inputs = np.zeros((1000, 3), dtype=np.float32) - targets = np.zeros((1000, 4), dtype=np.float32) - # steps/steps_per_epoch are calculated when using numpy arrays as - # input data. - fit_with_numpy = model.fit( - inputs, targets, epochs=1, batch_size=10).history - eval_with_numpy = model.evaluate(inputs, targets, batch_size=10) - predict_with_numpy = model.predict(inputs, batch_size=10) - - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.batch(10, drop_remainder=True) - fit_with_ds = model.fit(dataset, epochs=1).history - eval_with_ds = model.evaluate(dataset) - predict_dataset = tf.data.Dataset.from_tensor_slices(inputs) - predict_dataset = predict_dataset.batch(10, drop_remainder=True) - predict_with_ds = model.predict(predict_dataset) - self.assertAllClose(fit_with_numpy, fit_with_ds, atol=1e-4, rtol=1e-4) - self.assertAllClose(eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4) - self.assertAllClose( - predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_predict_on_dataset_with_unknown_cardinality_without_steps( - self, distribution, mode): - - if mode == 'graph' and backend.is_tpu_strategy(distribution): - self.skipTest('partial batch not supported with TPU in graph mode.') - - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile(optimizer, loss, metrics=metrics) - - inputs = np.zeros((20, 3), dtype=np.float32) - # steps/steps_per_epoch are calculated when using numpy arrays as - # input data. - predict_with_numpy = model.predict(inputs, batch_size=10) - - predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality( - inputs) - - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(predict_dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - - predict_with_ds = model.predict(predict_dataset) - self.assertAllClose( - predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_on_dataset_with_unknown_cardinality_without_steps( - self, distribution, mode): - # TODO(b/155867206): Investigate why this test occasionally segfaults on TPU - # in eager mode. - if mode == 'eager' and backend.is_tpu_strategy(distribution): - self.skipTest('caused segfault with TPU in eager mode.') - - if mode == 'graph' and backend.is_tpu_strategy(distribution): - self.skipTest('partial batch not supported with TPU in graph mode.') - - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics) - - inputs = np.zeros((100, 3), dtype=np.float32) - targets = np.zeros((100, 4), dtype=np.float32) - # steps/steps_per_epoch are calculated when using numpy arrays as - # input data. - fit_with_numpy = model.fit( - inputs, targets, epochs=1, batch_size=10).history - fit_with_numpy_multiple_epochs = model.fit( - inputs, targets, epochs=2, batch_size=10).history - eval_with_numpy = model.evaluate(inputs, targets, batch_size=10) - predict_with_numpy = model.predict(inputs, batch_size=10) - - dataset = convert_numpy_to_dataset_with_unknown_cardinality( - inputs, targets) - predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality( - inputs) - - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(predict_dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - - eval_with_ds = model.evaluate(dataset) - predict_with_ds = model.predict(predict_dataset) - self.assertAllClose(eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4) - self.assertAllClose( - predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4) - - fit_with_ds = model.fit(dataset, epochs=1).history - fit_with_ds_multiple_epochs = model.fit(dataset, epochs=2).history - self.assertAllClose(fit_with_numpy, fit_with_ds, atol=1e-4, rtol=1e-4) - self.assertAllClose( - fit_with_numpy_multiple_epochs, - fit_with_ds_multiple_epochs, - atol=1e-4, - rtol=1e-4) - - @tf.__internal__.distribute.combinations.generate(tpu_strategy_combinations_graph_only()) - def test_on_dataset_with_unknown_cardinality(self, distribution): - with self.cached_session(): - with distribution.scope(): - model = get_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - tf.compat.v1.train.GradientDescentOptimizer(0.001), - loss, - metrics=metrics) - - inputs = np.zeros((1000, 3), dtype=np.float32) - targets = np.zeros((1000, 4), dtype=np.float32) - # steps/steps_per_epoch are calculated when using numpy arrays as - # input data. - eval_with_numpy = model.evaluate(inputs, targets, batch_size=10) - predict_with_numpy = model.predict(inputs, batch_size=10) - - dataset = convert_numpy_to_dataset_with_unknown_cardinality( - inputs, targets) - predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality( - inputs) - - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(predict_dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - - eval_with_ds = model.evaluate(dataset, steps=100) - predict_with_ds = model.predict(predict_dataset, steps=100) - self.assertAllClose(eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4) - self.assertAllClose( - predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4) - - with self.assertRaisesRegex(ValueError, - 'Number of steps could not be inferred'): - model.fit(dataset, epochs=1) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_fit_eval_and_predict_methods_on_dataset( - self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics) - dataset = get_dataset(distribution) +@test_utils.run_all_without_tensor_float_32( + "Uses Dense layers, which call matmul" +) +class TestDistributionStrategyWithKerasModels( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_distribution_strategy_on_sequential_model(self, distribution): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + model = simple_sequential_model() + loss = "mse" + model.compile(optimizer, loss) + + inputs = np.zeros((20, 10), np.float32) + targets = np.zeros((20, 2), np.float32) + + model.fit(inputs, targets, epochs=1, batch_size=10) + model.predict(inputs, batch_size=10) + model.evaluate(inputs, targets, batch_size=10) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations() + ) + def test_distribution_strategy_on_functional_model(self, distribution): + with distribution.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(learning_rate=0.001) + model = get_model() + loss = "mse" + model.compile(optimizer, loss) + + inputs = np.zeros((64, 3), dtype=np.float32) + targets = np.zeros((64, 4), dtype=np.float32) + + model.fit(inputs, targets, epochs=1) + model.predict(inputs) + model.evaluate(inputs, targets) - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - model.evaluate(dataset, steps=2, verbose=1) - model.predict(get_predict_dataset(distribution), steps=2) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_distributed_dataset(self, distribution): + with distribution.scope(): + + class CBCounter(keras.callbacks.Callback): + def __init__(self): + self.epochs = 0 + self.train_batches = 0 + self.test_batches = 0 + + def on_epoch_end(self, batch, logs=None): + self.epochs += 1 + + def on_train_batch_end(self, batch, logs=None): + self.train_batches += 1 + + def on_test_batch_end(self, batch, logs=None): + self.test_batches += 1 + + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + cb_counter = CBCounter() + + x, y = np.ones((100, 10)), np.ones((100, 1)) + ds = tf.data.Dataset.from_tensor_slices((x, y)) + ds = ds.batch(10).repeat(2) + ds = distribution.experimental_distribute_dataset(ds) + + val_ds = tf.data.Dataset.from_tensor_slices((x, y)) + val_ds = val_ds.batch(20) + val_ds = distribution.experimental_distribute_dataset(val_ds) + + model.fit( + ds, + steps_per_epoch=10, + validation_data=val_ds, + validation_steps=5, + epochs=2, + callbacks=[cb_counter], + ) + + self.assertEqual(cb_counter.train_batches, 20) + self.assertEqual(cb_counter.test_batches, 10) + self.assertEqual(cb_counter.epochs, 2) + + # Check for `steps_per_epoch`. + if distribution.num_replicas_in_sync > 1: + with self.assertRaisesRegex( + ValueError, "distributed dataset, you must specify" + ): + model.fit(ds, epochs=2) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_distributed_datasets_from_function(self, distribution): + with distribution.scope(): + + class CBCounter(keras.callbacks.Callback): + def __init__(self): + self.epochs = 0 + self.train_batches = 0 + self.test_batches = 0 + + def on_epoch_end(self, batch, logs=None): + self.epochs += 1 + + def on_train_batch_end(self, batch, logs=None): + self.train_batches += 1 + + def on_test_batch_end(self, batch, logs=None): + self.test_batches += 1 + + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + cb_counter = CBCounter() + + def make_dataset(_): + x, y = np.ones((100, 10)), np.ones((100, 1)) + ds = tf.data.Dataset.from_tensor_slices((x, y)) + ds = ds.batch(5).repeat() + return ds + + ds = distribution.distribute_datasets_from_function(make_dataset) + val_ds = distribution.distribute_datasets_from_function( + make_dataset + ) + + model.fit( + ds, + steps_per_epoch=10, + validation_data=val_ds, + validation_steps=5, + epochs=2, + callbacks=[cb_counter], + ) + + self.assertEqual(cb_counter.train_batches, 20) + self.assertEqual(cb_counter.test_batches, 10) + self.assertEqual(cb_counter.epochs, 2) + + # Check for `steps_per_epoch`. + if distribution.num_replicas_in_sync > 1: + with self.assertRaisesRegex( + ValueError, "distributed dataset, you must specify" + ): + model.fit(ds, epochs=2) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_host_training_loop(self, distribution): + if isinstance(distribution, tf.distribute.MultiWorkerMirroredStrategy): + self.skipTest("b/172032817") + with distribution.scope(): + inputs = keras.Input((10, 10, 3)) + x = keras.layers.Conv2D(3, kernel_size=3)(inputs) + x = keras.layers.Flatten()(x) + outputs = keras.layers.Dense(1)(x) + model = keras.Model(inputs, outputs) + + model.compile("sgd", "mse", steps_per_execution=10) + + bc = BatchCountingCB() + x, y = np.ones((100, 10, 10, 3)), np.ones((100, 1)) + model.fit(x, y, batch_size=2, epochs=1, callbacks=[bc]) + self.assertEqual(bc.train_begin_batches, [0, 10, 20, 30, 40]) + self.assertEqual(bc.train_end_batches, [9, 19, 29, 39, 49]) + + model.evaluate(x, y, batch_size=2, callbacks=[bc]) + self.assertEqual(bc.test_begin_batches, [0, 10, 20, 30, 40]) + self.assertEqual(bc.test_end_batches, [9, 19, 29, 39, 49]) + + model.predict(x, batch_size=2, callbacks=[bc]) + self.assertEqual(bc.predict_begin_batches, [0, 10, 20, 30, 40]) + self.assertEqual(bc.predict_end_batches, [9, 19, 29, 39, 49]) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_host_training_loop_last_partial_execution(self, distribution): + if isinstance(distribution, tf.distribute.MultiWorkerMirroredStrategy): + self.skipTest("b/172032817") + with distribution.scope(): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + + model.compile("sgd", "mse", steps_per_execution=20) + + bc = BatchCountingCB() + x, y = np.ones((100, 10)), np.ones((100, 1)) + model.fit(x, y, batch_size=2, epochs=1, callbacks=[bc]) + self.assertEqual(bc.train_begin_batches, [0, 20, 40]) + self.assertEqual(bc.train_end_batches, [19, 39, 49]) + + model.evaluate(x, y, batch_size=2, callbacks=[bc]) + self.assertEqual(bc.test_begin_batches, [0, 20, 40]) + self.assertEqual(bc.test_end_batches, [19, 39, 49]) + + model.predict(x, batch_size=2, callbacks=[bc]) + self.assertEqual(bc.predict_begin_batches, [0, 20, 40]) + self.assertEqual(bc.predict_end_batches, [19, 39, 49]) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_host_training_loop_dataset_unknown_size(self, distribution): + if isinstance(distribution, tf.distribute.MultiWorkerMirroredStrategy): + self.skipTest("b/172032817") + with distribution.scope(): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + + model.compile("sgd", "mse", steps_per_execution=20) - @tf.__internal__.distribute.combinations.generate(strategy_and_optimizer_combinations()) - def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): - with self.cached_session(): + x, y = np.ones((100, 10)), np.ones((100, 1)) + ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + ds = ds.filter(lambda *args, **kwargs: True) # Makes the size UNKNOWN. + bc = BatchCountingCB() + + with self.assertRaisesRegex(ValueError, "steps_per_execution"): + model.fit(ds, epochs=2, callbacks=[bc]) + + train_ds = ds.repeat(2) + model.fit(train_ds, steps_per_epoch=50, epochs=2, callbacks=[bc]) + self.assertEqual(bc.train_begin_batches, [0, 20, 40, 0, 20, 40]) + self.assertEqual(bc.train_end_batches, [19, 39, 49, 19, 39, 49]) + + with self.assertRaisesRegex(ValueError, "steps_per_execution"): + model.evaluate(ds, callbacks=[bc]) + + test_ds = ds.repeat(2) + model.evaluate(test_ds, steps=50, callbacks=[bc]) + self.assertEqual(bc.test_begin_batches, [0, 20, 40]) + self.assertEqual(bc.test_end_batches, [19, 39, 49]) + + predict_ds = ds.repeat(2) + model.predict(predict_ds, steps=50, callbacks=[bc]) + self.assertEqual(bc.predict_begin_batches, [0, 20, 40]) + self.assertEqual(bc.predict_end_batches, [19, 39, 49]) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_host_training_loop_truncate_to_epoch(self, distribution): + if isinstance(distribution, tf.distribute.MultiWorkerMirroredStrategy): + self.skipTest("b/172032817") + with distribution.scope(): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + + model.compile("sgd", "mse", steps_per_execution=500) - with distribution.scope(): + x, y = np.ones((100, 10)), np.ones((100, 1)) + bc = BatchCountingCB() + model.fit(x, y, batch_size=2, epochs=2, callbacks=[bc]) + self.assertEqual(bc.train_begin_batches, [0, 0]) + self.assertEqual(bc.train_end_batches, [49, 49]) + + x, y = np.ones((50, 10)), np.ones((50, 1)) + model.evaluate(x, y, batch_size=2, callbacks=[bc]) + self.assertEqual(bc.test_begin_batches, [0]) + self.assertEqual(bc.test_end_batches, [24]) + + x = np.ones((50, 10)) + model.predict(x, batch_size=2, callbacks=[bc]) + self.assertEqual(bc.predict_begin_batches, [0]) + self.assertEqual(bc.predict_end_batches, [24]) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_gradient_clipping(self, distribution): + class MyLayer(keras.layers.Layer): + def build(self, _): + self.v1 = tf.Variable(1.0) + self.v2 = tf.Variable(1.0) + + def call(self, x): + return 3 * self.v1 - 3 * self.v2 + + x, y = np.ones((10, 1)), np.ones((10, 1)) + + with distribution.scope(): + layer = MyLayer() + model = keras.Sequential([layer]) + optimizer = gradient_descent_keras.SGD( + 1.0, clipnorm=2.0, clipvalue=2.0 + ) + model.compile(optimizer, "mae") + + if isinstance( + distribution, + ( + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + with self.assertRaisesRegex(ValueError, "not supported"): + model.fit(x, y, batch_size=10, epochs=1) + else: + model.fit(x, y, batch_size=10, epochs=1) + self.assertAllClose(self.evaluate(layer.v1), 3.0) + self.assertAllClose(self.evaluate(layer.v2), -1.0) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_custom_gradient_transformation(self, distribution): + if isinstance( + distribution, + ( + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + self.skipTest("Not supported with `CentralStorageStrategy`") + + class MyLayer(keras.layers.Layer): + def build(self, _): + self.v1 = tf.Variable(1.0) + self.v2 = tf.Variable(-1.0) + + def call(self, x): + return x + self.v1 + self.v2 + + def custom_transform(grads_and_vars): + # Always set gradients to 1. + return [(tf.ones_like(g), v) for g, v in grads_and_vars] + + x, y = np.ones((10, 1)), np.ones((10, 1)) + + with distribution.scope(): + layer = MyLayer() + model = keras.Sequential([layer]) + optimizer = gradient_descent_keras.SGD( + 1.0, gradient_transformers=[custom_transform] + ) + model.compile(optimizer, "mae") - model = get_model() - loss = 'mse' - model.compile( - optimizer(), - loss) - - dataset = get_dataset(distribution) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - model.evaluate(dataset, steps=2, verbose=1) - model.predict(get_predict_dataset(distribution), steps=2) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.one_device_strategy - ], - mode=['graph', 'eager'])) - def test_dataset_wrong_input_shape(self, distribution, mode): - if mode == 'graph': - self.skipTest( - 'TODO(b/120943676, b/120957836): Re-enable for graph once the ' - 'validation code is restored.') - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - model = get_model() - loss = 'mse' - model.compile( - optimizer, - loss) - - # Wrong input shape - inputs = np.zeros((10, 5), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - with self.assertRaisesRegex(ValueError, 'is incompatible with'): - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu - ], - mode=['graph', 'eager'])) - def test_dataset_external_batch_input_validation( - self, distribution): - with self.cached_session(): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - model = get_model() - loss = 'mse' - model.compile( - optimizer, - loss) - - # Batching is done outside tf.data's `batch` - inputs = np.zeros((100, 10, 3), dtype=np.float32) - targets = np.zeros((100, 10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus - ], - mode=['graph', 'eager'])) - def test_learning_phase_value(self, distribution): - # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare - # meaningful values. Currently we don't pass the learning phase if the - # Lambda layer uses the learning phase. - with self.cached_session(): - with distribution.scope(): - x = keras.layers.Input(shape=(1,), name='input') - y = keras.layers.Dense(1, kernel_initializer='ones')(x) - z = keras.layers.Dropout(0.9999)(y) - model = keras.Model(x, z) - initial_weights = model.get_weights() - - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.005) - loss = 'mse' - metrics = ['acc'] - model.compile( - optimizer, - loss, - metrics=metrics) - - batch_size = 8 - if isinstance(distribution, (tf.distribute.MirroredStrategy, - tf.compat.v1.distribute.MirroredStrategy)): - # MirroredStrategy uses global batch size. - batch_size = 8 * distribution.num_replicas_in_sync - - inputs = np.ones((10, 1), dtype=np.float32) - targets = np.ones((10, 1), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat().batch(batch_size) - hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) - self.assertAlmostEqual(hist.history['acc'][0], 0, 0) - - with distribution.scope(): - model.set_weights(initial_weights) - # TODO(psv/anjalisridhar): Enable these lines after we fix b/117431185. - # evaluate_output = model.evaluate(dataset, steps=20) - # self.assertAlmostEqual(evaluate_output[1], 1, 0) - - inputs = np.ones((10, 1), dtype=np.float32) - predict_dataset = tf.data.Dataset.from_tensor_slices(inputs) - - predict_dataset = predict_dataset.repeat().batch(batch_size) - output = model.predict(predict_dataset, steps=10) - # `predict` runs for 10 steps - ref_output = np.ones((160, 1), dtype=np.float32) - self.assertArrayNear(output, ref_output, 1e-1) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def testOptimizerWithCallbacks(self, distribution): - with self.cached_session(): - with distribution.scope(): - model = get_model() - optimizer = gradient_descent_keras.SGD(0.01) - loss = 'mse' + model.fit(x, y, batch_size=10, epochs=1) + self.assertAllClose(self.evaluate(layer.v1), 0.0) + self.assertAllClose(self.evaluate(layer.v2), -2.0) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + all_strategy_combinations_minus_default() + ) + ) + def test_distribution_strategy_one_dimensional(self, distribution): + with distribution.scope(): + inp = keras.layers.Input(shape=(10,)) + out = keras.layers.Dense(3, activation="softmax")(inp) + model = keras.Model(inputs=[inp], outputs=[out]) + model.compile( + optimizer="rmsprop", + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], + ) + + x = np.random.random((64, 10)).astype("float32") + y = np.random.randint(3, size=64) + + model.fit(x, y, epochs=1, steps_per_epoch=2) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + ], + mode=["graph", "eager"], + reduction=[ + losses_utils.ReductionV2.AUTO, + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + losses_utils.ReductionV2.SUM, + ], + ) + ) + def test_distribution_strategy_with_loss_reduction_types( + self, distribution, reduction + ): + np.random.seed(_RANDOM_SEED) + + def _get_model(): + inputs = keras.Input((10,)) + x1 = keras.layers.Dense(10, kernel_initializer="zeros")(inputs) + x2 = keras.layers.Dense(10, kernel_initializer="zeros")(x1) + outputs = keras.layers.Dense(1, kernel_initializer="zeros")(x2) + model = keras.Model(inputs, outputs) + return model + + x = np.random.random((64, 10)) + y = np.random.random((64, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.batch(32) + + model = _get_model() model.compile( - optimizer, - loss) - - dataset = get_dataset(distribution) - - def schedule(_): - return 0.001 - - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - callbacks=[keras.callbacks.LearningRateScheduler(schedule)]) - self.assertAllClose(0.001, keras.backend.get_value(model.optimizer.lr)) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(tpu_strategy_combinations_graph_only(), - tf.__internal__.test.combinations.combine(batch_size=[4, 6]))) - def test_evaluate_with_dataset_with_partial_batch(self, distribution, - batch_size): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - - with distribution.scope(): - model_with_ds_strategy = get_model() - model_with_ds_strategy.compile(optimizer, loss, metrics=metrics) - - cpu_model = get_model() - cpu_model.compile(optimizer, loss, metrics=metrics) - - x = np.random.random((10, 3)).astype('float32') - y = np.random.random((10, 4)).astype('float32') - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - - # As sample size is 10, we make the last batch a partial batch. - cpu_model.set_weights(model_with_ds_strategy.get_weights()) - dataset_with_partial_batch = dataset.batch(batch_size) - - # We don't compare the loss as loss is currently not computed as metric - # in Keras, the loss value is inaccurate for last partial batch due to - # more weights for the last batch samples. - steps = np.ceil(10.0 / batch_size) - self.assertAllClose( - model_with_ds_strategy.evaluate( - dataset_with_partial_batch, steps=steps)[1:], - cpu_model.evaluate(dataset_with_partial_batch, steps=steps)[1:], - atol=1e-5, - rtol=1e-5) - self.assertAllClose( - model_with_ds_strategy.evaluate(dataset_with_partial_batch)[1:], - cpu_model.evaluate(dataset_with_partial_batch)[1:], - atol=1e-5, - rtol=1e-5) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tpu_strategy_combinations_graph_only())) - def test_predict_with_dataset_with_partial_batch( - self, distribution): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - - with distribution.scope(): - model_with_ds_strategy = get_model() - model_with_ds_strategy.compile( - optimizer, - loss) - - cpu_model = get_model() - cpu_model.compile(optimizer, loss) - - inputs = np.random.random((10, 3)).astype(np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs)) - - # As sample size is 10, we batch by 4 so that the last batch is - # a partial batch. - dataset_with_partial_batch = dataset.batch(4) - cpu_model.set_weights(model_with_ds_strategy.get_weights()) - - self.assertAllClose( - model_with_ds_strategy.predict(dataset_with_partial_batch, steps=3), - cpu_model.predict(dataset_with_partial_batch, steps=3), - atol=1e-5, - rtol=1e-5) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tpu_strategy_combinations_graph_only())) - def test_predict_multi_output_model_with_dataset_with_partial_batch( - self, distribution): - with self.cached_session(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - - with distribution.scope(): - model_with_ds_strategy = simple_multi_inputs_multi_outputs_model() - model_with_ds_strategy.compile( - optimizer, - loss) - - cpu_model = simple_multi_inputs_multi_outputs_model() - cpu_model.compile(optimizer, loss) - - input_data, _ = get_multi_inputs_multi_outputs_data() - input_dict = { - 'input_a': input_data['input_a'], - 'input_b': input_data['input_b'], - } - - dataset = tf.data.Dataset.from_tensor_slices(input_dict) - - # As sample size is 200, we batch by 18 using 12 steps per epoch so - # that the last batch is a partial batch. - dataset_with_partial_batch = dataset.batch(18) - cpu_model.set_weights(model_with_ds_strategy.get_weights()) - - self.assertAllClose( - model_with_ds_strategy.predict(dataset_with_partial_batch, steps=12), - cpu_model.predict(dataset_with_partial_batch, steps=12), - atol=1e-4, - rtol=1e-4) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations_minus_default()) - def test_match_model_input_matches_with_dataset_tensors(self, distribution): - - def _create_model_input_output_tensors(): - input_a = keras.layers.Input(shape=(16,), name='z_input_sorted_last') - input_b = keras.layers.Input(shape=(32,), name='a_input_sorted_first') - intermediate_a = keras.layers.Dense(10)(input_a) - intermediate_b = keras.layers.Dense(10)(input_b) - merged = keras.layers.Add()([intermediate_a, intermediate_b]) - output = keras.layers.Dense(2)(merged) - return input_a, input_b, output - - input_dict = { - 'z_input_sorted_last': np.random.rand(32, 16).astype(np.float32), - 'a_input_sorted_first': np.random.rand(32, 32).astype(np.float32) - } - target = np.ones((32, 2), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((input_dict, target)) - dataset = dataset.batch(4, drop_remainder=True) - - with self.cached_session(): - with distribution.scope(): - input_a, input_b, output = _create_model_input_output_tensors() - # `input_a`, which has input name that comes last in alphanumeric - # order, is the first input of the model input layers. If tensors - # from `input_dict` is blindly flattened and passed to model - # inputs incorrectly, this would result in `input_a` input layer - # matching with tensor `a_input_sorted_first` and would result in - # shape mismatch. - model_with_array_input = keras.models.Model( - inputs=[input_a, input_b], outputs=output) - model_with_array_input.compile('sgd', 'mse') - model_weights = model_with_array_input.get_weights() - model_with_array_input_fit = model_with_array_input.fit( - dataset, steps_per_epoch=1, epochs=1).history - - input_a, input_b, output = _create_model_input_output_tensors() - model_with_dict_input = keras.models.Model( - inputs={ - 'z_input_sorted_last': input_a, - 'a_input_sorted_first': input_b, - }, - outputs=output) - model_with_dict_input.compile('sgd', 'mse') - model_with_dict_input.set_weights(model_weights) - model_with_dict_input_fit = model_with_dict_input.fit( - dataset, steps_per_epoch=1, epochs=1).history - self.assertAllClose( - model_with_dict_input_fit, - model_with_array_input_fit, - atol=1e-4, - rtol=1e-4) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategies_minus_tpu, mode=['graph', 'eager']) + - tf.__internal__.test.combinations.combine( - distribution=multi_worker_mirrored_strategies, mode=['eager'])) - def test_dataset_with_sample_weights(self, distribution): - with self.cached_session(), distribution.scope(): - model = get_sample_weights_model() - optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - model.compile( - optimizer, - loss) - - inputs = np.array([[0], [1], [2], [3]], np.float32) - targets = np.array([[2], [4], [6], [8]], np.float32) - sample_weights = np.array([0.25, 0.5, 0.75, 1], np.float32) - ds = tf.data.Dataset.from_tensor_slices( - (inputs, targets, sample_weights)).batch(2) - result = model.evaluate(ds, verbose=1) - - # The per sample loss is multiplied by the corresponding sample weight. - # The average of these weighted losses is the return value of the - # `evaluate` call. For example, in the test above the average weighted - # loss is calculated in the following manner: - # batch_1 = (((2-0)^2) * 0.25 + ((4-1)^2) * 0.5) / 2 = 5.5 / 2 = 2.75 - # batch_2 = (((6-2)^2 * 0.75) + ((8-3)^2 * 1)) / 2 = 37 / 2 = 18.5 - # final result = (batch_1 + batch_2) / 2 = 10.625. - # The first time we divide by number of input samples and the second time - # we divide by number of steps/batches that the loss is aggregated over. - self.assertAllClose(result, 10.625) - - # We now test without passing sample_weights: - # batch_1 = ((2-0)^2) + ((4-1)^2) / 2 = 13 / 2 = 6.5 - # batch_2 = ((6-2)^2) + ((8-3)^2) / 2 = 41 / 2 = 20.5 - # final result = (batch_1 + batch_2) / 2 = 27 / 2 = 13.5 - ds = tf.data.Dataset.from_tensor_slices((inputs, targets)).batch(2) - result = model.evaluate(ds, verbose=1) - self.assertAllClose(result, 13.5) - - -class TestDistributionStrategyWithDatasetsFile(tf.test.TestCase, - parameterized.TestCase): - - def setUp(self): - super().setUp() - self.input_file_name = os.path.join(self.get_temp_dir(), 'input.tfrecord') - inputs = np.zeros((20, 3), dtype=np.float32) - input_dataset = tf.data.Dataset.from_tensor_slices(inputs) - input_dataset = input_dataset.map(tf.io.serialize_tensor) - writer = tf.data.experimental.TFRecordWriter(self.input_file_name) - writer.write(input_dataset) - - # TODO(wxinyi): add a multi-worker test for TPU - @tf.__internal__.distribute.combinations.generate(multi_worker_strategy_combinations_eager_only()) - def test_predict_on_dataset_shard_options_file_multi_worker_mirrored( - self, distribution, mode): - # This test is to verify if we successfully switch auto_shard_policy of a - # input dataset inside model.predict with MultiWorkerMirroredStrategy to - # AutoShardPolicy.DATA. Since there is only one input file for multiple - # workers, AutoShardPolicy.AUTO or AutoShardPolicy.FILE will lead to an - # error. However, since we switch to AutoShardPolicy.DATA in model.predict, - # no error is raised. - del mode - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(0.001) - model = get_model() - loss = 'mse' - model.compile(optimizer, loss) - - dataset = tf.data.TFRecordDataset(self.input_file_name) - dataset = dataset.map(lambda x: tf.io.parse_tensor(x, tf.float32)) - - dummy_op = lambda inp: True - - dataset = dataset.filter(dummy_op).batch(8, drop_remainder=True) - - options = tf.data.Options() - options.experimental_distribute.auto_shard_policy = \ - tf.data.experimental.AutoShardPolicy.FILE - dataset = dataset.with_options(options) - - model.predict(dataset, steps=1) - - -class TestRegularizerLoss(tf.test.TestCase, parameterized.TestCase): - - class IdentityRegularizer(keras.regularizers.Regularizer): - - def __call__(self, x): - return tf.identity(x) - - class AddLayer(keras.layers.Layer): - - def build(self, _): - self.v = self.add_weight( - 'v', (), - initializer='ones', - regularizer=TestRegularizerLoss.IdentityRegularizer()) - - def call(self, inputs): - return inputs + self.v - - @staticmethod - def loss_fn(_, y_pred): - return tf.reduce_mean(y_pred) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(all_strategy_combinations_minus_default())) - def test_regularizer_loss(self, distribution): - batch_size = 2 - if not distributed_training_utils.global_batch_size_supported(distribution): - batch_size //= distribution.num_replicas_in_sync - - # Given an input x, which is always 1, and variable v, this model computes - # Loss=x+v+regularizer_loss, where regularizer_loss=v and the variable is - # initialized to 1. Therefore, this model computes Loss=1+2v, and so the - # gradient dLoss/dv = 2. This gradient of 2 is averaged over all examples - # in a batch and then multiplied by the learning rate of 1. As a result, - # the model update for one batch should subtract 2 from v, resulting in v - # being -1. If the regularizer loss is not scaled correctly by number of - # replicas, the variable value will be incorrect when number of replicas - # >1. For e.g. it will be -2 if num replicas = 2. - with distribution.scope(): - x = keras.layers.Input(shape=(1,), batch_size=batch_size) - y = TestRegularizerLoss.AddLayer()(x) - model = keras.models.Model(inputs=x, outputs=y) - opt = gradient_descent_keras.SGD(1.) - model.compile( - opt, - loss=TestRegularizerLoss.loss_fn) - model.fit( - x=np.array([[1.], [1.]], dtype=np.float32), - y=np.array([[1.], [1.]], dtype=np.float32), - batch_size=batch_size) - v = model.get_weights()[0] - self.assertEqual(-1.0, v) + "sgd", loss=keras.losses.MeanSquaredError(reduction=reduction) + ) + history = model.fit(dataset, steps_per_epoch=2, epochs=1, shuffle=False) + + with distribution.scope(): + ds_model = _get_model() + ds_model.compile( + "sgd", loss=keras.losses.MeanSquaredError(reduction=reduction) + ) + ds_history = ds_model.fit( + dataset, steps_per_epoch=2, epochs=1, shuffle=False + ) + self.assertArrayNear( + history.history["loss"], ds_history.history["loss"], 1e-5 + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + all_strategy_combinations_minus_default() + ) + ) + def test_distribution_strategy_with_symbolic_add_loss( + self, mode, distribution + ): + def _make_model_with_add_loss(): + inputs = keras.Input((10,)) + x1 = keras.layers.Dense(10, kernel_initializer="zeros")(inputs) + x2 = keras.layers.Dense(10, kernel_initializer="zeros")(x1) + outputs = keras.layers.Dense(1, kernel_initializer="zeros")(x2) + model = keras.Model(inputs, outputs) + model.add_loss(tf.reduce_mean(x1)) + model.add_loss(tf.reduce_mean(outputs)) + return model + + x = np.ones((64, 10)).astype("float32") + + model = _make_model_with_add_loss() + model.compile("sgd") + history = model.fit(x, epochs=1) + + with distribution.scope(): + ds_model = _make_model_with_add_loss() + ds_model.compile("sgd") + ds_history = ds_model.fit(x, epochs=1) + + self.assertAllClose(history.history, ds_history.history) + + # TODO(omalleyt): Investigate flakiness and re-enable. + @tf.__internal__.distribute.combinations.generate( + all_strategy_minus_default_and_tpu_combinations() + ) + def DISABLED_test_distribution_strategy_with_callable_add_loss( + self, distribution + ): + def _make_model(): + inputs = keras.Input((10,)) + x1 = keras.layers.Dense(10, kernel_initializer="zeros")(inputs) + x2 = keras.layers.Dense(10, kernel_initializer="zeros")(x1) + d = keras.layers.Dense(1, kernel_initializer="zeros") + outputs = d(x2) + model = keras.Model(inputs, outputs) + model.add_loss(lambda: 100.0 * tf.reduce_mean(d.kernel)) + return model + + x = np.ones((64, 10)).astype("float32") + y = np.ones((64, 1)).astype("float32") + + model = _make_model() + self.assertLen(model.losses, 1) + + model.compile("sgd", "mse") + history = model.fit(x, y, steps_per_epoch=2, epochs=1) + + with distribution.scope(): + ds_model = _make_model() + self.assertLen(ds_model.losses, 1) + ds_model.compile("sgd", "mse") + ds_history = ds_model.fit(x, y, steps_per_epoch=2, epochs=1) + + self.assertAllClose(history.history, ds_history.history) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + all_strategy_minus_default_and_tpu_combinations() + ) + ) + def test_distribution_strategy_with_add_metric_in_call(self, distribution): + class Bias(keras.layers.Layer): + def build(self, input_shape): + self.bias = self.add_weight( + name="bias", initializer="zeros", shape=() + ) + + def call(self, inputs): + self.add_metric( + tf.reduce_mean(inputs), name="bias", aggregation="mean" + ) + return inputs + self.bias + + def _make_model_with_add_metric(): + inputs = keras.Input((10,)) + x1 = keras.layers.Dense(10, kernel_initializer="zeros")(inputs) + x2 = Bias()(x1) + outputs = keras.layers.Dense(1, kernel_initializer="zeros")(x2) + model = keras.Model(inputs, outputs) + return model + + x = np.ones((64, 10)).astype("float32") + y = np.ones((64, 1)).astype("float32") + + model = _make_model_with_add_metric() + self.assertLen(model.metrics, 1) + + model.compile("sgd", "mse") + history = model.fit( + x, y, validation_data=(x, y), validation_steps=2, epochs=2 + ) + + with distribution.scope(): + ds_model = _make_model_with_add_metric() + self.assertLen(ds_model.metrics, 1) + ds_model.compile("sgd", "mse") + ds_history = ds_model.fit( + x, y, validation_data=(x, y), validation_steps=2, epochs=2 + ) + # includes stateful loss metric in eager. + metrics_len = 2 if tf.executing_eagerly() else 1 + self.assertLen(ds_model.metrics, metrics_len) + + self.assertAllClose(history.history, ds_history.history) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy, + tf.__internal__.distribute.combinations.one_device_strategy_gpu, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + ], + mode=["eager"], + ) + ) + def test_distribution_strategy_with_add_metric_object(self, distribution): + class Bias(keras.layers.Layer): + def build(self, input_shape): + self.bias = self.add_weight( + name="bias", initializer="zeros", shape=() + ) + self.mean = keras.metrics.Mean(name="mean") + + def call(self, inputs): + self.add_metric(self.mean(inputs)) + return inputs + self.bias + + def _make_model_with_add_metric_object(): + inputs = keras.Input((10,)) + x1 = keras.layers.Dense(10, kernel_initializer="zeros")(inputs) + x2 = Bias()(x1) + outputs = keras.layers.Dense(1, kernel_initializer="zeros")(x2) + model = keras.Model(inputs, outputs) + return model + + x = np.ones((64, 10)).astype("float32") + y = np.ones((64, 1)).astype("float32") + + model = _make_model_with_add_metric_object() + self.assertLen(model.metrics, 1) + + model.compile("sgd", "mse") + history = model.fit( + x, y, validation_data=(x, y), validation_steps=2, epochs=2 + ) + + with distribution.scope(): + ds_model = _make_model_with_add_metric_object() + self.assertLen(ds_model.metrics, 1) + ds_model.compile("sgd", "mse") + ds_history = ds_model.fit( + x, y, validation_data=(x, y), validation_steps=2, epochs=2 + ) + # includes stateful loss metric in eager. + metrics_len = 2 if tf.executing_eagerly() else 1 + self.assertLen(ds_model.metrics, metrics_len) + + self.assertAllClose(history.history, ds_history.history) + + @tf.__internal__.distribute.combinations.generate( + # TODO(phillypham): Why does validation_steps > 1 not work on TPUs? + tf.__internal__.test.combinations.times( + all_strategy_minus_default_and_tpu_combinations() + ) + ) + def test_distribution_strategy_with_add_metric_outside_call( + self, distribution + ): + def _make_model_with_add_metric(): + inputs = keras.Input((10,)) + x1 = keras.layers.Dense(10, kernel_initializer="zeros")(inputs) + outputs = keras.layers.Dense(1, kernel_initializer="zeros")(x1) + model = keras.Model(inputs, outputs) + model.add_metric( + tf.reduce_mean(x1), name="mid_mean", aggregation="mean" + ) + return model + + x = np.ones((64, 10)).astype("float32") + y = np.ones((64, 1)).astype("float32") + + model = _make_model_with_add_metric() + self.assertLen(model.metrics, 1) + + model.compile("sgd", "mse") + history = model.fit( + x, y, validation_data=(x, y), validation_steps=2, epochs=2 + ) + + with distribution.scope(): + ds_model = _make_model_with_add_metric() + self.assertLen(ds_model.metrics, 1) + ds_model.compile("sgd", "mse") + ds_history = ds_model.fit( + x, y, validation_data=(x, y), validation_steps=2, epochs=2 + ) + # includes stateful loss metric in eager. + metrics_len = 2 if tf.executing_eagerly() else 1 + self.assertLen(ds_model.metrics, metrics_len) + + self.assertAllClose(history.history, ds_history.history) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategies_minus_tpu + + multi_worker_mirrored_strategies, + mode=["eager"], + ) + ) + def test_sparse_tensor_outputs(self, distribution): + class ToSparse(keras.layers.Layer): + """Create a sparse tensor based on a given dense tensor.""" + + def call(self, inputs): + indices = tf.where(tf.not_equal(inputs, 0)) + values = tf.gather_nd(inputs, indices) + shape = tf.shape(inputs, out_type="int64") + return tf.SparseTensor(indices, values, dense_shape=shape) + + model = keras.Sequential([ToSparse()]) + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0]]) + output = model.predict(input_data, batch_size=2) + + expected_indices = np.array([[0, 0], [1, 0], [1, 1]]) + expected_values = np.array([1, 2, 3]) + expected_dense_shape = np.array([2, 3]) + + self.assertAllEqual(output.indices, expected_indices) + self.assertAllEqual(output.values, expected_values) + self.assertAllEqual(output.dense_shape, expected_dense_shape) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategies_minus_tpu + + multi_worker_mirrored_strategies, + mode=["eager"], + ) + ) + def test_ragged_tensor_outputs(self, distribution): + class ToRagged(keras.layers.Layer): + """Create a ragged tensor based on a given dense tensor.""" + + def __init__(self, padding, ragged_rank=1, **kwargs): + super().__init__(**kwargs) + self._padding = padding + self._ragged_rank = ragged_rank + + def call(self, inputs): + return tf.RaggedTensor.from_tensor( + inputs, padding=self._padding, ragged_rank=self._ragged_rank + ) + + model = keras.Sequential([ToRagged(padding=0)]) + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0]]) + output = model.predict(input_data, batch_size=2) + + expected_values = [[1], [2, 3]] + self.assertAllEqual(expected_values, output) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategies_minus_default_minus_tpu + + tpu_strategies + + multi_worker_mirrored_strategies, + mode=["eager"], + ) + ) + def test_correctness_of_add_loss_with_merge_call(self, distribution): + batch_size = 32 + + def _get_model(): + inputs = keras.layers.Input(shape=(1,)) + labels = keras.layers.Input(shape=(1,)) + x = keras.layers.Dense(10, activation="relu")(inputs) + y = keras.layers.Dense(1)(x) + model = keras.models.Model([inputs, labels], y) + model.add_loss(keras.losses.mean_squared_error(labels, y)) + return model + + def _get_data(): + x_train = np.random.rand(64, 1) + y_train = 3 * x_train + x_train = x_train.astype("float32") + y_train = y_train.astype("float32") + dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + dataset = dataset.batch(batch_size) + return dataset + + with distribution.scope(): + model = _get_model() + optimizer = gradient_descent_keras.SGD(0.2) + + @tf.function + def train_step(dist_inputs): + def step_fn(inputs): + with tf.GradientTape() as tape: + logits = model(inputs) + + # Invoke a merge_call() + tf.distribute.get_replica_context().merge_call( + lambda d: None + ) + + # Verify that there is only one loss on the model. + assert len(model.losses) == 1 + loss_from_model = ( + tf.reduce_sum(model.losses) * 1.0 / batch_size + ) + + # Compute loss in this loop. + loss = keras.losses.mean_squared_error( + inputs[1], logits + ) + loss = tf.nn.compute_average_loss( + loss, global_batch_size=batch_size + ) + + # Verify that the loss computed in this loop is + # equivalent to the loss from the model that was added + # via add_loss. + tf.compat.v1.assert_equal(loss, loss_from_model) + + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + zip(grads, model.trainable_variables) + ) + return loss + + per_replica_losses = distribution.run( + step_fn, args=(dist_inputs,) + ) + return distribution.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None + ) + + dataset = distribution.experimental_distribute_dataset(_get_data()) + for _ in range(2): + for x in dataset: + train_step(x) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["graph", "eager"]) + ) + def test_unimplemented_parameter_server_strategy(self): + cluster_spec = multi_worker_testing_utils.create_in_process_cluster( + num_workers=3, num_ps=2 + ) + cluster_resolver = SimpleClusterResolver( + cluster_spec=tf.train.ClusterSpec(cluster_spec), + task_type="worker", + task_id=1, + num_accelerators={"GPU": 0}, + ) + distribution = ( + tf.compat.v1.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + ) + + self.assertIsInstance( + distribution, + tf.compat.v1.distribute.experimental.ParameterServerStrategy, + ) - -@test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') -class TestDistributionStrategyWithKerasModels(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_distribution_strategy_on_sequential_model( - self, distribution): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - model = simple_sequential_model() - loss = 'mse' - model.compile( - optimizer, - loss) - - inputs = np.zeros((20, 10), np.float32) - targets = np.zeros((20, 2), np.float32) - - model.fit(inputs, targets, epochs=1, batch_size=10) - model.predict(inputs, batch_size=10) - model.evaluate(inputs, targets, batch_size=10) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations()) - def test_distribution_strategy_on_functional_model( - self, distribution): - with distribution.scope(): - optimizer_fn = gradient_descent_keras.SGD - optimizer = optimizer_fn(learning_rate=0.001) - model = get_model() - loss = 'mse' - model.compile( - optimizer, - loss) - - inputs = np.zeros((64, 3), dtype=np.float32) - targets = np.zeros((64, 4), dtype=np.float32) - - model.fit(inputs, targets, epochs=1) - model.predict(inputs) - model.evaluate(inputs, targets) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_distributed_dataset(self, distribution): - with distribution.scope(): - - class CBCounter(keras.callbacks.Callback): - - def __init__(self): - self.epochs = 0 - self.train_batches = 0 - self.test_batches = 0 - - def on_epoch_end(self, batch, logs=None): - self.epochs += 1 - - def on_train_batch_end(self, batch, logs=None): - self.train_batches += 1 - - def on_test_batch_end(self, batch, logs=None): - self.test_batches += 1 - - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - cb_counter = CBCounter() - - x, y = np.ones((100, 10)), np.ones((100, 1)) - ds = tf.data.Dataset.from_tensor_slices((x, y)) - ds = ds.batch(10).repeat(2) - ds = distribution.experimental_distribute_dataset(ds) - - val_ds = tf.data.Dataset.from_tensor_slices((x, y)) - val_ds = val_ds.batch(20) - val_ds = distribution.experimental_distribute_dataset(val_ds) - - model.fit( - ds, - steps_per_epoch=10, - validation_data=val_ds, - validation_steps=5, - epochs=2, - callbacks=[cb_counter]) - - self.assertEqual(cb_counter.train_batches, 20) - self.assertEqual(cb_counter.test_batches, 10) - self.assertEqual(cb_counter.epochs, 2) - - # Check for `steps_per_epoch`. - if distribution.num_replicas_in_sync > 1: - with self.assertRaisesRegex(ValueError, - 'distributed dataset, you must specify'): - model.fit(ds, epochs=2) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_distributed_datasets_from_function(self, distribution): - with distribution.scope(): - - class CBCounter(keras.callbacks.Callback): - - def __init__(self): - self.epochs = 0 - self.train_batches = 0 - self.test_batches = 0 - - def on_epoch_end(self, batch, logs=None): - self.epochs += 1 - - def on_train_batch_end(self, batch, logs=None): - self.train_batches += 1 - - def on_test_batch_end(self, batch, logs=None): - self.test_batches += 1 - - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - cb_counter = CBCounter() - - def make_dataset(_): - x, y = np.ones((100, 10)), np.ones((100, 1)) - ds = tf.data.Dataset.from_tensor_slices((x, y)) - ds = ds.batch(5).repeat() - return ds - - ds = distribution.distribute_datasets_from_function(make_dataset) - val_ds = distribution.distribute_datasets_from_function(make_dataset) - - model.fit( - ds, - steps_per_epoch=10, - validation_data=val_ds, - validation_steps=5, - epochs=2, - callbacks=[cb_counter]) - - self.assertEqual(cb_counter.train_batches, 20) - self.assertEqual(cb_counter.test_batches, 10) - self.assertEqual(cb_counter.epochs, 2) - - # Check for `steps_per_epoch`. - if distribution.num_replicas_in_sync > 1: - with self.assertRaisesRegex(ValueError, - 'distributed dataset, you must specify'): - model.fit(ds, epochs=2) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_host_training_loop(self, distribution): - if isinstance(distribution, - tf.distribute.MultiWorkerMirroredStrategy): - self.skipTest('b/172032817') - with distribution.scope(): - inputs = keras.Input((10, 10, 3)) - x = keras.layers.Conv2D(3, kernel_size=3)(inputs) - x = keras.layers.Flatten()(x) - outputs = keras.layers.Dense(1)(x) - model = keras.Model(inputs, outputs) - - model.compile('sgd', 'mse', steps_per_execution=10) - - bc = BatchCountingCB() - x, y = np.ones((100, 10, 10, 3)), np.ones((100, 1)) - model.fit(x, y, batch_size=2, epochs=1, callbacks=[bc]) - self.assertEqual(bc.train_begin_batches, [0, 10, 20, 30, 40]) - self.assertEqual(bc.train_end_batches, [9, 19, 29, 39, 49]) - - model.evaluate(x, y, batch_size=2, callbacks=[bc]) - self.assertEqual(bc.test_begin_batches, [0, 10, 20, 30, 40]) - self.assertEqual(bc.test_end_batches, [9, 19, 29, 39, 49]) - - model.predict(x, batch_size=2, callbacks=[bc]) - self.assertEqual(bc.predict_begin_batches, [0, 10, 20, 30, 40]) - self.assertEqual(bc.predict_end_batches, [9, 19, 29, 39, 49]) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_host_training_loop_last_partial_execution(self, distribution): - if isinstance(distribution, - tf.distribute.MultiWorkerMirroredStrategy): - self.skipTest('b/172032817') - with distribution.scope(): - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - model = keras.Model(inputs, outputs) - - model.compile('sgd', 'mse', steps_per_execution=20) - - bc = BatchCountingCB() - x, y = np.ones((100, 10)), np.ones((100, 1)) - model.fit(x, y, batch_size=2, epochs=1, callbacks=[bc]) - self.assertEqual(bc.train_begin_batches, [0, 20, 40]) - self.assertEqual(bc.train_end_batches, [19, 39, 49]) - - model.evaluate(x, y, batch_size=2, callbacks=[bc]) - self.assertEqual(bc.test_begin_batches, [0, 20, 40]) - self.assertEqual(bc.test_end_batches, [19, 39, 49]) - - model.predict(x, batch_size=2, callbacks=[bc]) - self.assertEqual(bc.predict_begin_batches, [0, 20, 40]) - self.assertEqual(bc.predict_end_batches, [19, 39, 49]) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_host_training_loop_dataset_unknown_size(self, distribution): - if isinstance(distribution, - tf.distribute.MultiWorkerMirroredStrategy): - self.skipTest('b/172032817') - with distribution.scope(): - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - model = keras.Model(inputs, outputs) - - model.compile('sgd', 'mse', steps_per_execution=20) - - x, y = np.ones((100, 10)), np.ones((100, 1)) - ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) - ds = ds.filter(lambda *args, **kwargs: True) # Makes the size UNKNOWN. - bc = BatchCountingCB() - - with self.assertRaisesRegex(ValueError, 'steps_per_execution'): - model.fit(ds, epochs=2, callbacks=[bc]) - - train_ds = ds.repeat(2) - model.fit(train_ds, steps_per_epoch=50, epochs=2, callbacks=[bc]) - self.assertEqual(bc.train_begin_batches, [0, 20, 40, 0, 20, 40]) - self.assertEqual(bc.train_end_batches, [19, 39, 49, 19, 39, 49]) - - with self.assertRaisesRegex(ValueError, 'steps_per_execution'): - model.evaluate(ds, callbacks=[bc]) - - test_ds = ds.repeat(2) - model.evaluate(test_ds, steps=50, callbacks=[bc]) - self.assertEqual(bc.test_begin_batches, [0, 20, 40]) - self.assertEqual(bc.test_end_batches, [19, 39, 49]) - - predict_ds = ds.repeat(2) - model.predict(predict_ds, steps=50, callbacks=[bc]) - self.assertEqual(bc.predict_begin_batches, [0, 20, 40]) - self.assertEqual(bc.predict_end_batches, [19, 39, 49]) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_host_training_loop_truncate_to_epoch(self, distribution): - if isinstance(distribution, - tf.distribute.MultiWorkerMirroredStrategy): - self.skipTest('b/172032817') - with distribution.scope(): - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - model = keras.Model(inputs, outputs) - - model.compile('sgd', 'mse', steps_per_execution=500) - - x, y = np.ones((100, 10)), np.ones((100, 1)) - bc = BatchCountingCB() - model.fit(x, y, batch_size=2, epochs=2, callbacks=[bc]) - self.assertEqual(bc.train_begin_batches, [0, 0]) - self.assertEqual(bc.train_end_batches, [49, 49]) - - x, y = np.ones((50, 10)), np.ones((50, 1)) - model.evaluate(x, y, batch_size=2, callbacks=[bc]) - self.assertEqual(bc.test_begin_batches, [0]) - self.assertEqual(bc.test_end_batches, [24]) - - x = np.ones((50, 10)) - model.predict(x, batch_size=2, callbacks=[bc]) - self.assertEqual(bc.predict_begin_batches, [0]) - self.assertEqual(bc.predict_end_batches, [24]) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_gradient_clipping(self, distribution): - - class MyLayer(keras.layers.Layer): - - def build(self, _): - self.v1 = tf.Variable(1.) - self.v2 = tf.Variable(1.) - - def call(self, x): - return 3 * self.v1 - 3 * self.v2 - - x, y = np.ones((10, 1)), np.ones((10, 1)) - - with distribution.scope(): - layer = MyLayer() - model = keras.Sequential([layer]) - optimizer = gradient_descent_keras.SGD(1., clipnorm=2., clipvalue=2.) - model.compile(optimizer, 'mae') - - if isinstance(distribution, - (tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - with self.assertRaisesRegex(ValueError, 'not supported'): - model.fit(x, y, batch_size=10, epochs=1) - else: - model.fit(x, y, batch_size=10, epochs=1) - self.assertAllClose(self.evaluate(layer.v1), 3.) - self.assertAllClose(self.evaluate(layer.v2), -1.) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_custom_gradient_transformation(self, distribution): - if isinstance(distribution, - (tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - self.skipTest('Not supported with `CentralStorageStrategy`') - - class MyLayer(keras.layers.Layer): - - def build(self, _): - self.v1 = tf.Variable(1.) - self.v2 = tf.Variable(-1.) - - def call(self, x): - return x + self.v1 + self.v2 - - def custom_transform(grads_and_vars): - # Always set gradients to 1. - return [(tf.ones_like(g), v) for g, v in grads_and_vars] - - x, y = np.ones((10, 1)), np.ones((10, 1)) - - with distribution.scope(): - layer = MyLayer() - model = keras.Sequential([layer]) - optimizer = gradient_descent_keras.SGD( - 1., gradient_transformers=[custom_transform]) - model.compile(optimizer, 'mae') - - model.fit(x, y, batch_size=10, epochs=1) - self.assertAllClose(self.evaluate(layer.v1), 0.) - self.assertAllClose(self.evaluate(layer.v2), -2.) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - all_strategy_combinations_minus_default())) - def test_distribution_strategy_one_dimensional(self, distribution): - with distribution.scope(): - inp = keras.layers.Input(shape=(10,)) - out = keras.layers.Dense(3, activation='softmax')(inp) - model = keras.Model(inputs=[inp], outputs=[out]) - model.compile( - optimizer='rmsprop', - loss='sparse_categorical_crossentropy', - metrics=['sparse_categorical_accuracy']) - - x = np.random.random((64, 10)).astype('float32') - y = np.random.randint(3, size=64) - - model.fit(x, y, epochs=1, steps_per_epoch=2) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus - ], - mode=['graph', 'eager'], - reduction=[ - losses_utils.ReductionV2.AUTO, - losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, - losses_utils.ReductionV2.SUM - ])) - def test_distribution_strategy_with_loss_reduction_types( - self, distribution, reduction): - np.random.seed(_RANDOM_SEED) - - def _get_model(): - inputs = keras.Input((10,)) - x1 = keras.layers.Dense(10, kernel_initializer='zeros')(inputs) - x2 = keras.layers.Dense(10, kernel_initializer='zeros')(x1) - outputs = keras.layers.Dense(1, kernel_initializer='zeros')(x2) - model = keras.Model(inputs, outputs) - return model - - x = np.random.random((64, 10)) - y = np.random.random((64, 1)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.batch(32) - - model = _get_model() - model.compile( - 'sgd', loss=keras.losses.MeanSquaredError(reduction=reduction)) - history = model.fit(dataset, steps_per_epoch=2, epochs=1, shuffle=False) - - with distribution.scope(): - ds_model = _get_model() - ds_model.compile( - 'sgd', - loss=keras.losses.MeanSquaredError(reduction=reduction)) - ds_history = ds_model.fit( - dataset, steps_per_epoch=2, epochs=1, shuffle=False) - self.assertArrayNear(history.history['loss'], ds_history.history['loss'], - 1e-5) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - all_strategy_combinations_minus_default())) - def test_distribution_strategy_with_symbolic_add_loss( - self, mode, distribution): - - def _make_model_with_add_loss(): - inputs = keras.Input((10,)) - x1 = keras.layers.Dense(10, kernel_initializer='zeros')(inputs) - x2 = keras.layers.Dense(10, kernel_initializer='zeros')(x1) - outputs = keras.layers.Dense(1, kernel_initializer='zeros')(x2) - model = keras.Model(inputs, outputs) - model.add_loss(tf.reduce_mean(x1)) - model.add_loss(tf.reduce_mean(outputs)) - return model - - x = np.ones((64, 10)).astype('float32') - - model = _make_model_with_add_loss() - model.compile('sgd') - history = model.fit(x, epochs=1) - - with distribution.scope(): - ds_model = _make_model_with_add_loss() - ds_model.compile( - 'sgd') - ds_history = ds_model.fit(x, epochs=1) - - self.assertAllClose(history.history, ds_history.history) - - # TODO(omalleyt): Investigate flakiness and re-enable. - @tf.__internal__.distribute.combinations.generate(all_strategy_minus_default_and_tpu_combinations()) - def DISABLED_test_distribution_strategy_with_callable_add_loss( - self, distribution): - - def _make_model(): - inputs = keras.Input((10,)) - x1 = keras.layers.Dense(10, kernel_initializer='zeros')(inputs) - x2 = keras.layers.Dense(10, kernel_initializer='zeros')(x1) - d = keras.layers.Dense(1, kernel_initializer='zeros') - outputs = d(x2) - model = keras.Model(inputs, outputs) - model.add_loss(lambda: 100. * tf.reduce_mean(d.kernel)) - return model - - x = np.ones((64, 10)).astype('float32') - y = np.ones((64, 1)).astype('float32') - - model = _make_model() - self.assertLen(model.losses, 1) - - model.compile('sgd', 'mse') - history = model.fit(x, y, steps_per_epoch=2, epochs=1) - - with distribution.scope(): - ds_model = _make_model() - self.assertLen(ds_model.losses, 1) - ds_model.compile('sgd', 'mse') - ds_history = ds_model.fit(x, y, steps_per_epoch=2, epochs=1) - - self.assertAllClose(history.history, ds_history.history) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - all_strategy_minus_default_and_tpu_combinations())) - def test_distribution_strategy_with_add_metric_in_call( - self, distribution): - - class Bias(keras.layers.Layer): - - def build(self, input_shape): - self.bias = self.add_weight(name='bias', initializer='zeros', shape=()) - - def call(self, inputs): - self.add_metric( - tf.reduce_mean(inputs), name='bias', aggregation='mean') - return inputs + self.bias - - def _make_model_with_add_metric(): - inputs = keras.Input((10,)) - x1 = keras.layers.Dense(10, kernel_initializer='zeros')(inputs) - x2 = Bias()(x1) - outputs = keras.layers.Dense(1, kernel_initializer='zeros')(x2) - model = keras.Model(inputs, outputs) - return model - - x = np.ones((64, 10)).astype('float32') - y = np.ones((64, 1)).astype('float32') - - model = _make_model_with_add_metric() - self.assertLen(model.metrics, 1) - - model.compile('sgd', 'mse') - history = model.fit( - x, y, validation_data=(x, y), validation_steps=2, epochs=2) - - with distribution.scope(): - ds_model = _make_model_with_add_metric() - self.assertLen(ds_model.metrics, 1) - ds_model.compile( - 'sgd', - 'mse') - ds_history = ds_model.fit( - x, y, validation_data=(x, y), validation_steps=2, epochs=2) - # includes stateful loss metric in eager. - metrics_len = 2 if tf.executing_eagerly() else 1 - self.assertLen(ds_model.metrics, metrics_len) - - self.assertAllClose(history.history, ds_history.history) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.one_device_strategy_gpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus - ], - mode=['eager'])) - def test_distribution_strategy_with_add_metric_object( - self, distribution): - - class Bias(keras.layers.Layer): - - def build(self, input_shape): - self.bias = self.add_weight(name='bias', initializer='zeros', shape=()) - self.mean = keras.metrics.Mean(name='mean') - - def call(self, inputs): - self.add_metric(self.mean(inputs)) - return inputs + self.bias - - def _make_model_with_add_metric_object(): - inputs = keras.Input((10,)) - x1 = keras.layers.Dense(10, kernel_initializer='zeros')(inputs) - x2 = Bias()(x1) - outputs = keras.layers.Dense(1, kernel_initializer='zeros')(x2) - model = keras.Model(inputs, outputs) - return model - - x = np.ones((64, 10)).astype('float32') - y = np.ones((64, 1)).astype('float32') - - model = _make_model_with_add_metric_object() - self.assertLen(model.metrics, 1) - - model.compile('sgd', 'mse') - history = model.fit( - x, y, validation_data=(x, y), validation_steps=2, epochs=2) - - with distribution.scope(): - ds_model = _make_model_with_add_metric_object() - self.assertLen(ds_model.metrics, 1) - ds_model.compile( - 'sgd', - 'mse') - ds_history = ds_model.fit( - x, y, validation_data=(x, y), validation_steps=2, epochs=2) - # includes stateful loss metric in eager. - metrics_len = 2 if tf.executing_eagerly() else 1 - self.assertLen(ds_model.metrics, metrics_len) - - self.assertAllClose(history.history, ds_history.history) - - @tf.__internal__.distribute.combinations.generate( - # TODO(phillypham): Why does validation_steps > 1 not work on TPUs? - tf.__internal__.test.combinations.times( - all_strategy_minus_default_and_tpu_combinations())) - def test_distribution_strategy_with_add_metric_outside_call( - self, distribution): - - def _make_model_with_add_metric(): - inputs = keras.Input((10,)) - x1 = keras.layers.Dense(10, kernel_initializer='zeros')(inputs) - outputs = keras.layers.Dense(1, kernel_initializer='zeros')(x1) - model = keras.Model(inputs, outputs) - model.add_metric( - tf.reduce_mean(x1), name='mid_mean', aggregation='mean') - return model - - x = np.ones((64, 10)).astype('float32') - y = np.ones((64, 1)).astype('float32') - - model = _make_model_with_add_metric() - self.assertLen(model.metrics, 1) - - model.compile('sgd', 'mse') - history = model.fit( - x, y, validation_data=(x, y), validation_steps=2, epochs=2) - - with distribution.scope(): - ds_model = _make_model_with_add_metric() - self.assertLen(ds_model.metrics, 1) - ds_model.compile( - 'sgd', - 'mse') - ds_history = ds_model.fit( - x, y, validation_data=(x, y), validation_steps=2, epochs=2) - # includes stateful loss metric in eager. - metrics_len = 2 if tf.executing_eagerly() else 1 - self.assertLen(ds_model.metrics, metrics_len) - - self.assertAllClose(history.history, ds_history.history) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategies_minus_tpu + multi_worker_mirrored_strategies, - mode=['eager'])) - def test_sparse_tensor_outputs(self, distribution): - - class ToSparse(keras.layers.Layer): - """Create a sparse tensor based on a given dense tensor.""" - - def call(self, inputs): - indices = tf.where(tf.not_equal(inputs, 0)) - values = tf.gather_nd(inputs, indices) - shape = tf.shape(inputs, out_type='int64') - return tf.SparseTensor(indices, values, dense_shape=shape) - - model = keras.Sequential([ToSparse()]) - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0]]) - output = model.predict(input_data, batch_size=2) - - expected_indices = np.array([[0, 0], [1, 0], [1, 1]]) - expected_values = np.array([1, 2, 3]) - expected_dense_shape = np.array([2, 3]) - - self.assertAllEqual(output.indices, expected_indices) - self.assertAllEqual(output.values, expected_values) - self.assertAllEqual(output.dense_shape, expected_dense_shape) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategies_minus_tpu + multi_worker_mirrored_strategies, - mode=['eager'])) - def test_ragged_tensor_outputs(self, distribution): - - class ToRagged(keras.layers.Layer): - """Create a ragged tensor based on a given dense tensor.""" - - def __init__(self, padding, ragged_rank=1, **kwargs): - super().__init__(**kwargs) - self._padding = padding - self._ragged_rank = ragged_rank - - def call(self, inputs): - return tf.RaggedTensor.from_tensor( - inputs, padding=self._padding, ragged_rank=self._ragged_rank) - - model = keras.Sequential([ToRagged(padding=0)]) - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0]]) - output = model.predict(input_data, batch_size=2) - - expected_values = [[1], [2, 3]] - self.assertAllEqual(expected_values, output) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategies_minus_default_minus_tpu + tpu_strategies + - multi_worker_mirrored_strategies, - mode=['eager'])) - def test_correctness_of_add_loss_with_merge_call(self, distribution): - batch_size = 32 - - def _get_model(): - inputs = keras.layers.Input(shape=(1,)) - labels = keras.layers.Input(shape=(1,)) - x = keras.layers.Dense(10, activation='relu')(inputs) - y = keras.layers.Dense(1)(x) - model = keras.models.Model([inputs, labels], y) - model.add_loss(keras.losses.mean_squared_error(labels, y)) - return model - - def _get_data(): - x_train = np.random.rand(64, 1) - y_train = 3 * x_train - x_train = x_train.astype('float32') - y_train = y_train.astype('float32') - dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - dataset = dataset.batch(batch_size) - return dataset - - with distribution.scope(): - model = _get_model() - optimizer = gradient_descent_keras.SGD(0.2) - - @tf.function - def train_step(dist_inputs): - - def step_fn(inputs): - with tf.GradientTape() as tape: - logits = model(inputs) - - # Invoke a merge_call() - tf.distribute.get_replica_context().merge_call( - lambda d: None) - - # Verify that there is only one loss on the model. - assert len(model.losses) == 1 - loss_from_model = tf.reduce_sum( - model.losses) * 1.0 / batch_size - - # Compute loss in this loop. - loss = keras.losses.mean_squared_error(inputs[1], logits) - loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size) - - # Verify that the loss computed in this loop is equivalent to the - # loss from the model that was added via add_loss. - tf.compat.v1.assert_equal(loss, loss_from_model) - - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(grads, model.trainable_variables)) - return loss - - per_replica_losses = distribution.run(step_fn, args=(dist_inputs,)) - return distribution.reduce( - tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) - - dataset = distribution.experimental_distribute_dataset(_get_data()) - for _ in range(2): - for x in dataset: - train_step(x) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['graph', 'eager'])) - def test_unimplemented_parameter_server_strategy(self): - cluster_spec = multi_worker_testing_utils.create_in_process_cluster( - num_workers=3, num_ps=2) - cluster_resolver = SimpleClusterResolver( - cluster_spec=tf.train.ClusterSpec(cluster_spec), - task_type='worker', - task_id=1, - num_accelerators={'GPU': 0}) - distribution = tf.compat.v1.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - - self.assertIsInstance(distribution, - tf.compat.v1.distribute.experimental.ParameterServerStrategy) - - with self.assertRaisesRegex(NotImplementedError, - 'ParameterServerStrategy*'): - with distribution.scope(): - model = simple_sequential_model() - optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - model.compile(optimizer, loss) + with self.assertRaisesRegex( + NotImplementedError, "ParameterServerStrategy*" + ): + with distribution.scope(): + model = simple_sequential_model() + optimizer = tf.compat.v1.train.RMSPropOptimizer( + learning_rate=0.001 + ) + loss = "mse" + model.compile(optimizer, loss) # Models to exercise inserting ancillary layers with add_loss and add_metric. def _functional_with_add_loss_and_metric(input_shape, num_classes, l1, l2): - inputs = keras.Input(input_shape, name='images') - x = keras.layers.Conv2D(32, kernel_size=5, activation='relu')(inputs) - x = keras.layers.MaxPooling2D(pool_size=2)(x) - x = keras.layers.Conv2D(64, kernel_size=5, activation='relu')(x) - x = keras.layers.MaxPooling2D(pool_size=2)(x) - # Apply L2 regularization to embedding. Use a mix of TensorFlow ops and layers - # to exercise all code paths. - x = keras.layers.Flatten(name='embedding')(x) - l2_loss = tf.reduce_mean(tf.reduce_sum(tf.square(x), -1)) - # Apply L1 regularization to next layer. - x = keras.layers.Dense(1024, activation='relu', name='sparse_embedding')(x) - l1_loss = keras.layers.Lambda( - lambda x: tf.reduce_mean(tf.reduce_sum(x, -1)), - name='l1_loss')( - x) - outputs = keras.layers.Dense(num_classes, name='logits')(x) - model = keras.Model(inputs=inputs, outputs=outputs) - # Weight regularization terms. - model.add_loss(keras.layers.Lambda(lambda x: x * l2)(l2_loss)) - model.add_metric(l2_loss, aggregation='mean', name='l2_loss') - model.add_loss(l1_loss * l1) - model.add_metric(l1_loss, aggregation='mean', name='l1_loss') - return model + inputs = keras.Input(input_shape, name="images") + x = keras.layers.Conv2D(32, kernel_size=5, activation="relu")(inputs) + x = keras.layers.MaxPooling2D(pool_size=2)(x) + x = keras.layers.Conv2D(64, kernel_size=5, activation="relu")(x) + x = keras.layers.MaxPooling2D(pool_size=2)(x) + # Apply L2 regularization to embedding. Use a mix of TensorFlow ops and + # layers to exercise all code paths. + x = keras.layers.Flatten(name="embedding")(x) + l2_loss = tf.reduce_mean(tf.reduce_sum(tf.square(x), -1)) + # Apply L1 regularization to next layer. + x = keras.layers.Dense(1024, activation="relu", name="sparse_embedding")(x) + l1_loss = keras.layers.Lambda( + lambda x: tf.reduce_mean(tf.reduce_sum(x, -1)), name="l1_loss" + )(x) + outputs = keras.layers.Dense(num_classes, name="logits")(x) + model = keras.Model(inputs=inputs, outputs=outputs) + # Weight regularization terms. + model.add_loss(keras.layers.Lambda(lambda x: x * l2)(l2_loss)) + model.add_metric(l2_loss, aggregation="mean", name="l2_loss") + model.add_loss(l1_loss * l1) + model.add_metric(l1_loss, aggregation="mean", name="l1_loss") + return model def _sequential_with_add_loss_and_metric(input_shape, num_classes, l1, l2): - model = keras.Sequential([ - keras.layers.Conv2D( - 32, kernel_size=5, activation='relu', input_shape=input_shape), - keras.layers.MaxPooling2D(pool_size=2), - keras.layers.Conv2D(64, kernel_size=5, activation='relu'), - keras.layers.MaxPooling2D(pool_size=2), - keras.layers.Flatten(name='embedding'), - keras.layers.Dense(1024, activation='relu', name='sparse_embedding'), - keras.layers.Dense(num_classes, name='logits'), - ]) - # Extract layer outputs, add regularization terms, and rescale the metric. - # Use a mix of TensorFlow ops and layers to exercise all code paths. - x = model.get_layer('sparse_embedding').get_output_at(-1) - l1_loss = l1 * tf.reduce_mean(tf.reduce_sum(x, -1)) - model.add_loss(l1_loss) - model.add_metric( - keras.layers.Lambda(lambda x: tf.divide(x, l1))(l1_loss), - aggregation='mean', - name='l1_loss') - x = model.get_layer('embedding').get_output_at(-1) - l2_loss = keras.layers.Lambda( - lambda x: l2 * tf.reduce_mean(tf.reduce_sum(x * x, -1)), - name='l2_loss')( - x) - model.add_loss(l2_loss) - model.add_metric(l2_loss / l2, aggregation='mean', name='l2_loss') - return model + model = keras.Sequential( + [ + keras.layers.Conv2D( + 32, kernel_size=5, activation="relu", input_shape=input_shape + ), + keras.layers.MaxPooling2D(pool_size=2), + keras.layers.Conv2D(64, kernel_size=5, activation="relu"), + keras.layers.MaxPooling2D(pool_size=2), + keras.layers.Flatten(name="embedding"), + keras.layers.Dense( + 1024, activation="relu", name="sparse_embedding" + ), + keras.layers.Dense(num_classes, name="logits"), + ] + ) + # Extract layer outputs, add regularization terms, and rescale the metric. + # Use a mix of TensorFlow ops and layers to exercise all code paths. + x = model.get_layer("sparse_embedding").get_output_at(-1) + l1_loss = l1 * tf.reduce_mean(tf.reduce_sum(x, -1)) + model.add_loss(l1_loss) + model.add_metric( + keras.layers.Lambda(lambda x: tf.divide(x, l1))(l1_loss), + aggregation="mean", + name="l1_loss", + ) + x = model.get_layer("embedding").get_output_at(-1) + l2_loss = keras.layers.Lambda( + lambda x: l2 * tf.reduce_mean(tf.reduce_sum(x * x, -1)), name="l2_loss" + )(x) + model.add_loss(l2_loss) + model.add_metric(l2_loss / l2, aggregation="mean", name="l2_loss") + return model def _functional_with_layer_reuse(input_shape, num_classes, l1, l2): - base_model = keras.Sequential([ - keras.layers.Conv2D( - 32, kernel_size=5, activation='relu', input_shape=input_shape), - keras.layers.MaxPooling2D(pool_size=2), - keras.layers.Conv2D(64, kernel_size=5, activation='relu'), - keras.layers.MaxPooling2D(pool_size=2), - keras.layers.Flatten(), - keras.layers.Dense(1024, activation='relu'), - keras.layers.Dense(num_classes, name='logits'), - ]) - inputs = keras.Input(input_shape, name='images') - logits = base_model(inputs) - model = keras.Model(inputs=inputs, outputs=logits) - # Reuse sequential layer and create new nodes. - zero_logits = base_model(tf.zeros_like(inputs)) - one_logits = base_model(tf.ones_like(inputs)) - # L2 loss. - l2_loss = tf.reduce_mean( - tf.reduce_sum(tf.square(logits - zero_logits), -1)) - model.add_loss(l2_loss * l2) - model.add_metric(l2_loss, aggregation='mean', name='l2_loss') - # L1 loss. - l1_loss = tf.reduce_mean( - tf.reduce_sum(tf.abs(logits - one_logits), -1)) - model.add_loss(l1_loss * l1) - model.add_metric(l1_loss, aggregation='mean', name='l1_loss') - return model + base_model = keras.Sequential( + [ + keras.layers.Conv2D( + 32, kernel_size=5, activation="relu", input_shape=input_shape + ), + keras.layers.MaxPooling2D(pool_size=2), + keras.layers.Conv2D(64, kernel_size=5, activation="relu"), + keras.layers.MaxPooling2D(pool_size=2), + keras.layers.Flatten(), + keras.layers.Dense(1024, activation="relu"), + keras.layers.Dense(num_classes, name="logits"), + ] + ) + inputs = keras.Input(input_shape, name="images") + logits = base_model(inputs) + model = keras.Model(inputs=inputs, outputs=logits) + # Reuse sequential layer and create new nodes. + zero_logits = base_model(tf.zeros_like(inputs)) + one_logits = base_model(tf.ones_like(inputs)) + # L2 loss. + l2_loss = tf.reduce_mean(tf.reduce_sum(tf.square(logits - zero_logits), -1)) + model.add_loss(l2_loss * l2) + model.add_metric(l2_loss, aggregation="mean", name="l2_loss") + # L1 loss. + l1_loss = tf.reduce_mean(tf.reduce_sum(tf.abs(logits - one_logits), -1)) + model.add_loss(l1_loss * l1) + model.add_metric(l1_loss, aggregation="mean", name="l1_loss") + return model class TestDistributionStrategyWithMultipleAddLossAndMetricCalls( - tf.test.TestCase, parameterized.TestCase): - """Tests complex models with multiple add loss and metric calls.""" - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - all_strategy_combinations_minus_default(), - tf.__internal__.test.combinations.combine( - model_fn=[ - _functional_with_add_loss_and_metric, - _sequential_with_add_loss_and_metric, - _functional_with_layer_reuse, - ], - l1=[0.01], - l2=[0.1]))) - def test_fit_and_evaluate(self, distribution, model_fn, l1, l2): - # Make fake MNIST-like image data. - np.random.seed(_RANDOM_SEED) - dataset = tf.data.Dataset.from_tensor_slices( - (np.random.uniform(size=(64, 28, 28, 1)).astype(np.float32), - np.random.randint(0, 10, size=(64,)))) - dataset = dataset.shuffle(64).batch( - 8 * distribution.num_replicas_in_sync, drop_remainder=True) - # Make model with distribution strategy and initialize with dataset shape. - input_shape = tf.data.experimental.get_structure(dataset)[0].shape[1:] - with distribution.scope(): - model = model_fn(input_shape, 10, l1, l2) - model.compile( - optimizer=keras.optimizers.adam_v2.Adam(1e-4), - loss=keras.losses.SparseCategoricalCrossentropy( - from_logits=True, - reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE), - metrics=[ - keras.metrics.SparseCategoricalAccuracy(), - keras.metrics.SparseCategoricalCrossentropy(from_logits=True), - ]) - # Non-eager training doesn't support steps_per_epoch=None. - for unused_epoch in range(2): - model.fit(dataset) - results = dict(zip(model.metrics_names, model.evaluate(dataset))) - # Sanity checks. - self.assertBetween(results['sparse_categorical_accuracy'], 0.02, 1.) - self.assertGreater(results['l2_loss'], 0.) - self.assertGreater(results['l1_loss'], 0.) - # Assert correctness of the loss calculation and updating of metrics. - self.assertNear( - results['l1_loss'] * l1 + results['l2_loss'] * l2 + - results['sparse_categorical_crossentropy'], results['loss'], 1e-6) + tf.test.TestCase, parameterized.TestCase +): + """Tests complex models with multiple add loss and metric calls.""" + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + all_strategy_combinations_minus_default(), + tf.__internal__.test.combinations.combine( + model_fn=[ + _functional_with_add_loss_and_metric, + _sequential_with_add_loss_and_metric, + _functional_with_layer_reuse, + ], + l1=[0.01], + l2=[0.1], + ), + ) + ) + def test_fit_and_evaluate(self, distribution, model_fn, l1, l2): + # Make fake MNIST-like image data. + np.random.seed(_RANDOM_SEED) + dataset = tf.data.Dataset.from_tensor_slices( + ( + np.random.uniform(size=(64, 28, 28, 1)).astype(np.float32), + np.random.randint(0, 10, size=(64,)), + ) + ) + dataset = dataset.shuffle(64).batch( + 8 * distribution.num_replicas_in_sync, drop_remainder=True + ) + # Make model with distribution strategy and initialize with dataset + # shape. + input_shape = tf.data.experimental.get_structure(dataset)[0].shape[1:] + with distribution.scope(): + model = model_fn(input_shape, 10, l1, l2) + model.compile( + optimizer=keras.optimizers.adam_legacy.Adam(1e-4), + loss=keras.losses.SparseCategoricalCrossentropy( + from_logits=True, + reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + ), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(), + keras.metrics.SparseCategoricalCrossentropy( + from_logits=True + ), + ], + ) + # Non-eager training doesn't support steps_per_epoch=None. + for unused_epoch in range(2): + model.fit(dataset) + results = dict(zip(model.metrics_names, model.evaluate(dataset))) + # Sanity checks. + self.assertBetween(results["sparse_categorical_accuracy"], 0.02, 1.0) + self.assertGreater(results["l2_loss"], 0.0) + self.assertGreater(results["l1_loss"], 0.0) + # Assert correctness of the loss calculation and updating of metrics. + self.assertNear( + results["l1_loss"] * l1 + + results["l2_loss"] * l2 + + results["sparse_categorical_crossentropy"], + results["loss"], + 1e-6, + ) class DeterministicModel(keras.Model): - """Deterministic Model that always outputs the same initial result. + """Deterministic Model that always outputs the same initial result. - It verifies the `call` method is run inside the same distribution - strategy that the model was initially passed. - """ + It verifies the `call` method is run inside the same distribution + strategy that the model was initially passed. + """ - def __init__(self, strategy): - super().__init__() - self.x = None - self.strategy = strategy + def __init__(self, strategy): + super().__init__() + self.x = None + self.strategy = strategy - def build(self, input_shape): - self.x = tf.Variable(tf.ones(shape=())) + def build(self, input_shape): + self.x = tf.Variable(tf.ones(shape=())) - def call(self, inputs, training=None, mask=None): - active_strategy = tf.distribute.get_strategy() - if active_strategy is not self.strategy: - raise ValueError('Model must execute call w/ the original strategy') - return self.x * inputs + def call(self, inputs, training=None, mask=None): + active_strategy = tf.distribute.get_strategy() + if active_strategy is not self.strategy: + raise ValueError("Model must execute call w/ the original strategy") + return self.x * inputs class TestModelCapturesStrategy(tf.test.TestCase, parameterized.TestCase): - """Tests that model creation captures the strategy.""" - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_fit_and_evaluate(self, distribution): - dataset = tf.data.Dataset.from_tensor_slices( - (tf.ones(shape=(64,)), tf.ones(shape=(64,)))) - dataset = dataset.batch(8 * distribution.num_replicas_in_sync) - # Make model with distribution strategy - with distribution.scope(): - model = DeterministicModel(distribution) - optimizer = keras.optimizers.adam_v2.Adam(1e-4) - - # Compile & evaluate the model outside of the distribution strategy scope - model.compile( - optimizer=optimizer, - loss=keras.losses.MeanSquaredError(), - metrics=['binary_accuracy']) - - # Call `optimizer.iterations` out of strategy scope. - self.assertEqual(model.optimizer.iterations.numpy(), 0) - - # Non-eager training doesn't support steps_per_epoch=None. - for unused_epoch in range(2): - model.fit(dataset) - - results = model.evaluate(dataset) - results = dict(zip(model.metrics_names, results)) - - # Check that the metrics have a result we expect - self.assertEqual(results['binary_accuracy'], 1.0) - self.assertAllClose(results['loss'], 0.0) - - # Assert that all metric/optimizer/model variables were made in the - # distribution strategy (Test that compile uses the captured - # distribution strategy) - metric_vars = tf.nest.flatten( - [metric.variables for metric in model.metrics]) - for var in metric_vars: - self.assertTrue(distribution.extended.variable_created_in_scope(var)) - for var in model.optimizer._weights: - self.assertTrue(distribution.extended.variable_created_in_scope(var)) - for var in model.variables: - self.assertTrue(distribution.extended.variable_created_in_scope(var)) - - # Make sure the metric must be created in the same scope as the model: - # This shouldn't raise any validation errors - with distribution.scope(): - metric = keras.metrics.BinaryAccuracy() - model.compile( - optimizer=optimizer, - loss=keras.losses.MeanSquaredError(), - metrics=[metric]) - - # This should raise an error because the metric is constructed - # outside of the scope, and not by compile - if tf.distribute.has_strategy(): - with self.assertRaisesRegex(ValueError, 'All metrics must be created in'): + """Tests that model creation captures the strategy.""" + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_fit_and_evaluate(self, distribution): + dataset = tf.data.Dataset.from_tensor_slices( + (tf.ones(shape=(64,)), tf.ones(shape=(64,))) + ) + dataset = dataset.batch(8 * distribution.num_replicas_in_sync) + # Make model with distribution strategy + with distribution.scope(): + model = DeterministicModel(distribution) + optimizer = keras.optimizers.adam_legacy.Adam(1e-4) + + # Compile & evaluate the model outside of the distribution strategy + # scope + model.compile( + optimizer=optimizer, + loss=keras.losses.MeanSquaredError(), + metrics=["binary_accuracy"], + ) + + # Call `optimizer.iterations` out of strategy scope. + self.assertEqual(model.optimizer.iterations.numpy(), 0) + + # Non-eager training doesn't support steps_per_epoch=None. + for unused_epoch in range(2): + model.fit(dataset) + + results = model.evaluate(dataset) + results = dict(zip(model.metrics_names, results)) + + # Check that the metrics have a result we expect + self.assertEqual(results["binary_accuracy"], 1.0) + self.assertAllClose(results["loss"], 0.0) + + # Assert that all metric/optimizer/model variables were made in the + # distribution strategy (Test that compile uses the captured + # distribution strategy) + metric_vars = tf.nest.flatten( + [metric.variables for metric in model.metrics] + ) + for var in metric_vars: + self.assertTrue( + distribution.extended.variable_created_in_scope(var) + ) + for var in model.optimizer._weights: + self.assertTrue( + distribution.extended.variable_created_in_scope(var) + ) + for var in model.variables: + self.assertTrue( + distribution.extended.variable_created_in_scope(var) + ) + + # Make sure the metric must be created in the same scope as the model: + # This shouldn't raise any validation errors + with distribution.scope(): + metric = keras.metrics.BinaryAccuracy() model.compile( - optimizer=keras.optimizers.adam_v2.Adam(1e-4), + optimizer=optimizer, loss=keras.losses.MeanSquaredError(), - metrics=[keras.metrics.BinaryAccuracy()]) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu, - mode=['eager'])) - def test_optimizer(self, distribution): - temp_dir = os.path.join(self.get_temp_dir(), 'ckpt') - - def create_model(): - model = keras.models.Sequential([ - keras.layers.Dense(1), - ]) - model.compile(optimizer='adam', loss='mse') - model.build([None, 1]) # create weights. - self.assertEmpty(model.optimizer.weights) - return model - - model = create_model() - x = y = tf.ones(shape=(1, 1)) - model.fit(x=x, y=y, batch_size=1) - model.save_weights(temp_dir) - - with distribution.scope(): - model = create_model() - model.load_weights(temp_dir) - self.assertNotEmpty(model.optimizer.weights) - self.assertTrue( - distributed_training_utils.is_distributed_variable( - model.optimizer.weights[0])) - - with distribution.scope(): - model = create_model() - # create/restore slot variables outside of scope is fine. - model.load_weights(temp_dir) - self.assertNotEmpty(model.optimizer.weights) - self.assertTrue( - distributed_training_utils.is_distributed_variable( - model.optimizer.weights[0])) - - -if __name__ == '__main__': - base_layer_utils.enable_v2_dtype_behavior() - tf.__internal__.distribute.multi_process_runner.test_main() + metrics=[metric], + ) + + # This should raise an error because the metric is constructed + # outside of the scope, and not by compile + if tf.distribute.has_strategy(): + with self.assertRaisesRegex( + ValueError, "All metrics must be created in" + ): + model.compile( + optimizer=keras.optimizers.adam_v2.Adam(1e-4), + loss=keras.losses.MeanSquaredError(), + metrics=[keras.metrics.BinaryAccuracy()], + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu, # noqa: E501 + mode=["eager"], + ) + ) + def test_optimizer(self, distribution): + temp_dir = os.path.join(self.get_temp_dir(), "ckpt") + + def create_model(): + model = keras.models.Sequential( + [ + keras.layers.Dense(1), + ] + ) + model.compile(optimizer=keras.optimizers.Adam(), loss="mse") + model.build([None, 1]) # create weights. + return model + + model = create_model() + x = y = tf.ones(shape=(1, 1)) + model.fit(x=x, y=y, batch_size=1) + model.save_weights(temp_dir) + + with distribution.scope(): + model = create_model() + model.load_weights(temp_dir) + if isinstance(model.optimizer, optimizer_base.Optimizer): + model.optimizer.build(model.trainable_variables) + variables = model.optimizer.variables + else: + variables = model.optimizer.variables() + self.assertNotEmpty(variables) + self.assertTrue( + distributed_training_utils.is_distributed_variable(variables[0]) + ) + + with distribution.scope(): + model = create_model() + # create/restore slot variables outside of scope is fine. + model.load_weights(temp_dir) + if isinstance(model.optimizer, optimizer_base.Optimizer): + # V3 optimizer has to restore variables in scope. + return + # From this point on, the optimizer must be a V2 optimizer. + self.assertNotEmpty(model.optimizer.variables()) + self.assertTrue( + distributed_training_utils.is_distributed_variable( + model.optimizer.variables()[0] + ) + ) + + +if __name__ == "__main__": + base_layer_utils.enable_v2_dtype_behavior() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/distributed_file_utils.py b/keras/distribute/distributed_file_utils.py index 52de006e8b5b..fec668cfaa59 100644 --- a/keras/distribute/distributed_file_utils.py +++ b/keras/distribute/distributed_file_utils.py @@ -44,102 +44,137 @@ Experimental. API is subject to change. """ +import os + +import requests import tensorflow.compat.v2 as tf -import os +GCP_METADATA_HEADER = {"Metadata-Flavor": "Google"} +_GCE_METADATA_URL_ENV_VARIABLE = "GCE_METADATA_IP" def _get_base_dirpath(strategy): - task_id = strategy.extended._task_id # pylint: disable=protected-access - return 'workertemp_' + str(task_id) + task_id = strategy.extended._task_id + return "workertemp_" + str(task_id) def _is_temp_dir(dirpath, strategy): - return dirpath.endswith(_get_base_dirpath(strategy)) + return dirpath.endswith(_get_base_dirpath(strategy)) def _get_temp_dir(dirpath, strategy): - if _is_temp_dir(dirpath, strategy): - temp_dir = dirpath - else: - temp_dir = os.path.join(dirpath, _get_base_dirpath(strategy)) - tf.io.gfile.makedirs(temp_dir) - return temp_dir + if _is_temp_dir(dirpath, strategy): + temp_dir = dirpath + else: + temp_dir = os.path.join(dirpath, _get_base_dirpath(strategy)) + tf.io.gfile.makedirs(temp_dir) + return temp_dir def write_dirpath(dirpath, strategy): - """Returns the writing dir that should be used to save file distributedly. - - `dirpath` would be created if it doesn't exist. - - Args: - dirpath: Original dirpath that would be used without distribution. - strategy: The tf.distribute strategy object currently used. - - Returns: - The writing dir path that should be used to save with distribution. - """ - if strategy is None: - # Infer strategy from `distribution_strategy_context` if not given. - strategy = tf.distribute.get_strategy() - if strategy is None: - # If strategy is still not available, this is not in distributed training. - # Fallback to original dirpath. - return dirpath - if not strategy.extended._in_multi_worker_mode(): # pylint: disable=protected-access - return dirpath - if strategy.extended.should_checkpoint: - return dirpath - # If this worker is not chief and hence should not save file, save it to a - # temporary directory to be removed later. - return _get_temp_dir(dirpath, strategy) + """Returns the writing dir that should be used to save file distributedly. + + `dirpath` would be created if it doesn't exist. + + Args: + dirpath: Original dirpath that would be used without distribution. + strategy: The tf.distribute strategy object currently used. + + Returns: + The writing dir path that should be used to save with distribution. + """ + if strategy is None: + # Infer strategy from `tf.distribute` if not given. + strategy = tf.distribute.get_strategy() + if strategy is None: + # If strategy is still not available, this is not in distributed + # training. Fallback to original dirpath. + return dirpath + if not strategy.extended._in_multi_worker_mode(): + return dirpath + if strategy.extended.should_checkpoint: + return dirpath + # If this worker is not chief and hence should not save file, save it to a + # temporary directory to be removed later. + return _get_temp_dir(dirpath, strategy) def remove_temp_dirpath(dirpath, strategy): - """Removes the temp path after writing is finished. - - Args: - dirpath: Original dirpath that would be used without distribution. - strategy: The tf.distribute strategy object currently used. - """ - if strategy is None: - # Infer strategy from `distribution_strategy_context` if not given. - strategy = tf.distribute.get_strategy() - if strategy is None: - # If strategy is still not available, this is not in distributed training. - # Fallback to no-op. - return - # TODO(anjalisridhar): Consider removing the check for multi worker mode since - # it is redundant when used with the should_checkpoint property. - if (strategy.extended._in_multi_worker_mode() and # pylint: disable=protected-access - not strategy.extended.should_checkpoint): - # If this worker is not chief and hence should not save file, remove - # the temporary directory. - tf.compat.v1.gfile.DeleteRecursively(_get_temp_dir(dirpath, strategy)) + """Removes the temp path after writing is finished. + + Args: + dirpath: Original dirpath that would be used without distribution. + strategy: The tf.distribute strategy object currently used. + """ + if strategy is None: + # Infer strategy from `tf.distribute` if not given. + strategy = tf.distribute.get_strategy() + if strategy is None: + # If strategy is still not available, this is not in distributed + # training. Fallback to no-op. + return + # TODO(anjalisridhar): Consider removing the check for multi worker mode + # since it is redundant when used with the should_checkpoint property. + if ( + strategy.extended._in_multi_worker_mode() + and not strategy.extended.should_checkpoint + ): + # If this worker is not chief and hence should not save file, remove + # the temporary directory. + tf.compat.v1.gfile.DeleteRecursively(_get_temp_dir(dirpath, strategy)) def write_filepath(filepath, strategy): - """Returns the writing file path to be used to save file distributedly. + """Returns the writing file path to be used to save file distributedly. - Directory to contain `filepath` would be created if it doesn't exist. + Directory to contain `filepath` would be created if it doesn't exist. - Args: - filepath: Original filepath that would be used without distribution. - strategy: The tf.distribute strategy object currently used. + Args: + filepath: Original filepath that would be used without distribution. + strategy: The tf.distribute strategy object currently used. - Returns: - The writing filepath that should be used to save file with distribution. - """ - dirpath = os.path.dirname(filepath) - base = os.path.basename(filepath) - return os.path.join(write_dirpath(dirpath, strategy), base) + Returns: + The writing filepath that should be used to save file with distribution. + """ + dirpath = os.path.dirname(filepath) + base = os.path.basename(filepath) + return os.path.join(write_dirpath(dirpath, strategy), base) def remove_temp_dir_with_filepath(filepath, strategy): - """Removes the temp path for file after writing is finished. - - Args: - filepath: Original filepath that would be used without distribution. - strategy: The tf.distribute strategy object currently used. - """ - remove_temp_dirpath(os.path.dirname(filepath), strategy) + """Removes the temp path for file after writing is finished. + + Args: + filepath: Original filepath that would be used without distribution. + strategy: The tf.distribute strategy object currently used. + """ + remove_temp_dirpath(os.path.dirname(filepath), strategy) + + +def _on_gcp(): + """Detect whether the current running environment is on GCP.""" + gce_metadata_endpoint = "http://" + os.environ.get( + _GCE_METADATA_URL_ENV_VARIABLE, "metadata.google.internal" + ) + + try: + # Timeout in 5 seconds, in case the test environment has connectivity + # issue. There is not default timeout, which means it might block + # forever. + response = requests.get( + f"{gce_metadata_endpoint}/computeMetadata/v1/{'instance/hostname'}", + headers=GCP_METADATA_HEADER, + timeout=5, + ) + return response.status_code + except requests.exceptions.RequestException: + return False + + +def support_on_demand_checkpoint_callback(strategy): + if _on_gcp() and isinstance( + strategy, tf.distribute.MultiWorkerMirroredStrategy + ): + return True + + return False diff --git a/keras/distribute/distributed_file_utils_test.py b/keras/distribute/distributed_file_utils_test.py index ddd7f0485bd0..0260b45c13c5 100644 --- a/keras/distribute/distributed_file_utils_test.py +++ b/keras/distribute/distributed_file_utils_test.py @@ -14,119 +14,121 @@ # ============================================================================== """Tests for distributed_file_utils.""" -import tensorflow.compat.v2 as tf - import os +import tensorflow.compat.v2 as tf + from keras.distribute import distributed_file_utils class DistributedFileUtilsTest(tf.test.TestCase): - - class MockedExtended: - pass - - class MockedChiefStrategy: - - def __init__(self): - self.extended = DistributedFileUtilsTest.MockedExtended() - self.extended._in_multi_worker_mode = lambda: True - self.extended.should_checkpoint = True - - class MockedWorkerStrategy: - - def __init__(self): - self.extended = DistributedFileUtilsTest.MockedExtended() - self.extended._in_multi_worker_mode = lambda: True - self.extended.should_checkpoint = False - self.extended._task_id = 3 - - class MockedSingleWorkerStrategy: - - def __init__(self): - self.extended = DistributedFileUtilsTest.MockedExtended() - self.extended._in_multi_worker_mode = lambda: False - - def _write_dummy_file(self, file_to_write): - with open(file_to_write, 'w') as f: - f.write('foo bar') - - def testChiefWriteDirAndFilePath(self): - dirpath = self.get_temp_dir() - filepath = os.path.join(dirpath, 'foo.bar') - strategy = DistributedFileUtilsTest.MockedChiefStrategy() - self.assertEqual( - distributed_file_utils.write_filepath(filepath, strategy), filepath) - self.assertEqual( - distributed_file_utils.write_dirpath(dirpath, strategy), dirpath) - - def testWorkerWriteDirAndFilePath(self): - dirpath = self.get_temp_dir() - filepath = os.path.join(dirpath, 'foo.bar') - strategy = DistributedFileUtilsTest.MockedWorkerStrategy() - self.assertEqual( - distributed_file_utils.write_filepath(filepath, strategy), - os.path.join(dirpath, 'workertemp_3', 'foo.bar')) - self.assertEqual( - distributed_file_utils.write_dirpath(dirpath, strategy), - os.path.join(dirpath, 'workertemp_3')) - - def testChiefDoesNotRemoveDirAndFilePath(self): - temp_dir = self.get_temp_dir() - strategy = DistributedFileUtilsTest.MockedChiefStrategy() - dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) - file_to_write = os.path.join(dir_to_write, 'tmp') - self.assertFalse(os.path.exists(file_to_write)) - self._write_dummy_file(file_to_write) - self.assertTrue(os.path.exists(file_to_write)) - distributed_file_utils.remove_temp_dir_with_filepath( - file_to_write, strategy) - self.assertTrue(os.path.exists(file_to_write)) - - def testWorkerDoesRemoveFilePath(self): - temp_dir = self.get_temp_dir() - strategy = DistributedFileUtilsTest.MockedWorkerStrategy() - dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) - file_to_write = os.path.join(dir_to_write, 'tmp') - self.assertFalse(os.path.exists(file_to_write)) - self._write_dummy_file(file_to_write) - self.assertTrue(os.path.exists(file_to_write)) - distributed_file_utils.remove_temp_dir_with_filepath( - file_to_write, strategy) - self.assertFalse(os.path.exists(file_to_write)) - - def testWorkerDoesRemoveDirPath(self): - temp_dir = self.get_temp_dir() - strategy = DistributedFileUtilsTest.MockedWorkerStrategy() - dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) - file_to_write = os.path.join(dir_to_write, 'tmp') - self.assertFalse(os.path.exists(file_to_write)) - self._write_dummy_file(file_to_write) - self.assertTrue(os.path.exists(file_to_write)) - distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) - self.assertFalse(os.path.exists(file_to_write)) - self.assertFalse(os.path.exists(os.path.dirname(file_to_write))) - - def testMultipleRemoveOrigDirPathIsFine(self): - temp_dir = self.get_temp_dir() - strategy = DistributedFileUtilsTest.MockedWorkerStrategy() - dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) - file_to_write = os.path.join(dir_to_write, 'tmp') - self._write_dummy_file(file_to_write) - distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) - distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) - distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) - - def testMultipleRemoveDirToWritePathIsFine(self): - temp_dir = self.get_temp_dir() - strategy = DistributedFileUtilsTest.MockedWorkerStrategy() - dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) - file_to_write = os.path.join(dir_to_write, 'tmp') - self._write_dummy_file(file_to_write) - distributed_file_utils.remove_temp_dirpath(dir_to_write, strategy) - distributed_file_utils.remove_temp_dirpath(dir_to_write, strategy) - distributed_file_utils.remove_temp_dirpath(dir_to_write, strategy) - - -if __name__ == '__main__': - tf.test.main() + class MockedExtended: + pass + + class MockedChiefStrategy: + def __init__(self): + self.extended = DistributedFileUtilsTest.MockedExtended() + self.extended._in_multi_worker_mode = lambda: True + self.extended.should_checkpoint = True + + class MockedWorkerStrategy: + def __init__(self): + self.extended = DistributedFileUtilsTest.MockedExtended() + self.extended._in_multi_worker_mode = lambda: True + self.extended.should_checkpoint = False + self.extended._task_id = 3 + + class MockedSingleWorkerStrategy: + def __init__(self): + self.extended = DistributedFileUtilsTest.MockedExtended() + self.extended._in_multi_worker_mode = lambda: False + + def _write_dummy_file(self, file_to_write): + with open(file_to_write, "w") as f: + f.write("foo bar") + + def testChiefWriteDirAndFilePath(self): + dirpath = self.get_temp_dir() + filepath = os.path.join(dirpath, "foo.bar") + strategy = DistributedFileUtilsTest.MockedChiefStrategy() + self.assertEqual( + distributed_file_utils.write_filepath(filepath, strategy), filepath + ) + self.assertEqual( + distributed_file_utils.write_dirpath(dirpath, strategy), dirpath + ) + + def testWorkerWriteDirAndFilePath(self): + dirpath = self.get_temp_dir() + filepath = os.path.join(dirpath, "foo.bar") + strategy = DistributedFileUtilsTest.MockedWorkerStrategy() + self.assertEqual( + distributed_file_utils.write_filepath(filepath, strategy), + os.path.join(dirpath, "workertemp_3", "foo.bar"), + ) + self.assertEqual( + distributed_file_utils.write_dirpath(dirpath, strategy), + os.path.join(dirpath, "workertemp_3"), + ) + + def testChiefDoesNotRemoveDirAndFilePath(self): + temp_dir = self.get_temp_dir() + strategy = DistributedFileUtilsTest.MockedChiefStrategy() + dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) + file_to_write = os.path.join(dir_to_write, "tmp") + self.assertFalse(os.path.exists(file_to_write)) + self._write_dummy_file(file_to_write) + self.assertTrue(os.path.exists(file_to_write)) + distributed_file_utils.remove_temp_dir_with_filepath( + file_to_write, strategy + ) + self.assertTrue(os.path.exists(file_to_write)) + + def testWorkerDoesRemoveFilePath(self): + temp_dir = self.get_temp_dir() + strategy = DistributedFileUtilsTest.MockedWorkerStrategy() + dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) + file_to_write = os.path.join(dir_to_write, "tmp") + self.assertFalse(os.path.exists(file_to_write)) + self._write_dummy_file(file_to_write) + self.assertTrue(os.path.exists(file_to_write)) + distributed_file_utils.remove_temp_dir_with_filepath( + file_to_write, strategy + ) + self.assertFalse(os.path.exists(file_to_write)) + + def testWorkerDoesRemoveDirPath(self): + temp_dir = self.get_temp_dir() + strategy = DistributedFileUtilsTest.MockedWorkerStrategy() + dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) + file_to_write = os.path.join(dir_to_write, "tmp") + self.assertFalse(os.path.exists(file_to_write)) + self._write_dummy_file(file_to_write) + self.assertTrue(os.path.exists(file_to_write)) + distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) + self.assertFalse(os.path.exists(file_to_write)) + self.assertFalse(os.path.exists(os.path.dirname(file_to_write))) + + def testMultipleRemoveOrigDirPathIsFine(self): + temp_dir = self.get_temp_dir() + strategy = DistributedFileUtilsTest.MockedWorkerStrategy() + dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) + file_to_write = os.path.join(dir_to_write, "tmp") + self._write_dummy_file(file_to_write) + distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) + distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) + distributed_file_utils.remove_temp_dirpath(temp_dir, strategy) + + def testMultipleRemoveDirToWritePathIsFine(self): + temp_dir = self.get_temp_dir() + strategy = DistributedFileUtilsTest.MockedWorkerStrategy() + dir_to_write = distributed_file_utils.write_dirpath(temp_dir, strategy) + file_to_write = os.path.join(dir_to_write, "tmp") + self._write_dummy_file(file_to_write) + distributed_file_utils.remove_temp_dirpath(dir_to_write, strategy) + distributed_file_utils.remove_temp_dirpath(dir_to_write, strategy) + distributed_file_utils.remove_temp_dirpath(dir_to_write, strategy) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/distribute/distributed_training_utils.py b/keras/distribute/distributed_training_utils.py index 876f83c7142b..61edf4f5193d 100644 --- a/keras/distribute/distributed_training_utils.py +++ b/keras/distribute/distributed_training_utils.py @@ -14,10 +14,12 @@ # ============================================================================== """Utilities related to distributed training.""" -from absl import flags -from keras import backend +import contextlib import tensorflow.compat.v2 as tf +from absl import flags + +from keras import backend FLAGS = flags.FLAGS @@ -26,92 +28,115 @@ # core MirroredStrategy only. Remove this check when contrib MirroredStrategy is # no longer needed. def global_batch_size_supported(distribution_strategy): - return distribution_strategy.extended._global_batch_size # pylint: disable=protected-access + return distribution_strategy.extended._global_batch_size def call_replica_local_fn(fn, *args, **kwargs): - """Call a function that uses replica-local variables. - - This function correctly handles calling `fn` in a cross-replica - context. - - Args: - fn: The function to call. - *args: Positional arguments to the `fn`. - **kwargs: Keyword argument to `fn`. - - Returns: - The result of calling `fn`. - """ - # TODO(b/132666209): Remove this function when we support assign_* - # for replica-local variables. - strategy = None - if 'strategy' in kwargs: - strategy = kwargs.pop('strategy') - else: - if tf.distribute.has_strategy(): - strategy = tf.distribute.get_strategy() - - # TODO(b/120571621): TPUStrategy does not implement replica-local variables. - is_tpu = backend.is_tpu_strategy(strategy) - if ((not is_tpu) and strategy and tf.distribute.in_cross_replica_context()): - with strategy.scope(): - return strategy.extended.call_for_each_replica(fn, args, kwargs) - return fn(*args, **kwargs) + """Call a function that uses replica-local variables. + + This function correctly handles calling `fn` in a cross-replica + context. + + Args: + fn: The function to call. + *args: Positional arguments to the `fn`. + **kwargs: Keyword argument to `fn`. + + Returns: + The result of calling `fn`. + """ + # TODO(b/132666209): Remove this function when we support assign_* + # for replica-local variables. + strategy = None + if "strategy" in kwargs: + strategy = kwargs.pop("strategy") + else: + if tf.distribute.has_strategy(): + strategy = tf.distribute.get_strategy() + + # TODO(b/120571621): TPUStrategy does not implement replica-local variables. + is_tpu = backend.is_tpu_strategy(strategy) + if (not is_tpu) and strategy and tf.distribute.in_cross_replica_context(): + with strategy.scope(): + return strategy.extended.call_for_each_replica(fn, args, kwargs) + return fn(*args, **kwargs) def is_distributed_variable(v): - """Returns whether `v` is a distributed variable.""" - return (isinstance(v, tf.distribute.DistributedValues) and - isinstance(v, tf.Variable)) + """Returns whether `v` is a distributed variable.""" + return isinstance(v, tf.distribute.DistributedValues) and isinstance( + v, tf.Variable + ) def get_strategy(): - """Creates a `tf.distribute.Strategy` object from flags. - - Example usage: - - ```python - strategy = utils.get_strategy() - with strategy.scope(): - model = tf.keras.Sequential([tf.keras.layers.Dense(10)]) - - model.compile(...) - train_ds, test_ds = ... - model.fit(train_ds, validation_data=test_ds, epochs=10) - ``` - - Returns: - `tf.distribute.Strategy` instance. - """ - cls = FLAGS.keras_distribute_strategy_class - accepted_strats = { - 'tpu', 'multi_worker_mirrored', 'mirrored', - 'parameter_server', 'one_device'} - if cls == 'tpu': - tpu_addr = FLAGS.keras_distribute_strategy_tpu_addr - if not tpu_addr: - raise ValueError( - 'When using a TPU strategy, you must set the flag ' - '`keras_distribute_strategy_tpu_addr` (TPU address).') - cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=tpu_addr) - tf.config.experimental_connect_to_cluster(cluster_resolver) - tf.tpu.experimental.initialize_tpu_system(cluster_resolver) - strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) - elif cls == 'multi_worker_mirrored': - strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() - elif cls == 'mirrored': - strategy = tf.distribute.MirroredStrategy() - elif cls == 'parameter_server': - cluster_resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver() - strategy = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - elif cls == 'one_device': - strategy = tf.distribute.OneDeviceStrategy('/gpu:0') - else: - raise ValueError( - 'Unknown distribution strategy flag. Received: ' - f'keras_distribute_strategy_class={cls}. ' - f'It should be one of {accepted_strats}') - return strategy + """Creates a `tf.distribute.Strategy` object from flags. + + Example usage: + + ```python + strategy = utils.get_strategy() + with strategy.scope(): + model = tf.keras.Sequential([tf.keras.layers.Dense(10)]) + + model.compile(...) + train_ds, test_ds = ... + model.fit(train_ds, validation_data=test_ds, epochs=10) + ``` + + Returns: + `tf.distribute.Strategy` instance. + """ + cls = FLAGS.keras_distribute_strategy_class + accepted_strats = { + "tpu", + "multi_worker_mirrored", + "mirrored", + "parameter_server", + "one_device", + } + if cls == "tpu": + tpu_addr = FLAGS.keras_distribute_strategy_tpu_addr + if not tpu_addr: + raise ValueError( + "When using a TPU strategy, you must set the flag " + "`keras_distribute_strategy_tpu_addr` (TPU address)." + ) + cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu=tpu_addr + ) + tf.config.experimental_connect_to_cluster(cluster_resolver) + tf.tpu.experimental.initialize_tpu_system(cluster_resolver) + strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) + elif cls == "multi_worker_mirrored": + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + elif cls == "mirrored": + strategy = tf.distribute.MirroredStrategy() + elif cls == "parameter_server": + cluster_resolver = ( + tf.distribute.cluster_resolver.TFConfigClusterResolver() + ) + strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + elif cls == "one_device": + strategy = tf.distribute.OneDeviceStrategy("/gpu:0") + else: + raise ValueError( + "Unknown distribution strategy flag. Received: " + f"keras_distribute_strategy_class={cls}. " + f"It should be one of {accepted_strats}" + ) + return strategy + + +def maybe_preemption_handler_scope(model): + + if getattr(model, "_preemption_handler", None): + preemption_checkpoint_scope = ( + model._preemption_handler.watch_preemption_scope() + ) + else: + preemption_checkpoint_scope = contextlib.nullcontext() + + return preemption_checkpoint_scope diff --git a/keras/distribute/distributed_training_utils_test.py b/keras/distribute/distributed_training_utils_test.py index 54e5124be30f..690cade75923 100644 --- a/keras/distribute/distributed_training_utils_test.py +++ b/keras/distribute/distributed_training_utils_test.py @@ -18,37 +18,39 @@ from keras import callbacks from keras.distribute import distributed_training_utils_v1 -from keras.optimizers.optimizer_v2 import adam +from keras.optimizers.legacy import adam class DistributedTrainingUtilsTest(tf.test.TestCase): + def test_validate_callbacks_predefined_callbacks(self): + supported_predefined_callbacks = [ + callbacks.TensorBoard(), + callbacks.CSVLogger(filename="./log.csv"), + callbacks.EarlyStopping(), + callbacks.ModelCheckpoint(filepath="./checkpoint"), + callbacks.TerminateOnNaN(), + callbacks.ProgbarLogger(), + callbacks.History(), + callbacks.RemoteMonitor(), + ] - def test_validate_callbacks_predefined_callbacks(self): - supported_predefined_callbacks = [ - callbacks.TensorBoard(), - callbacks.CSVLogger(filename='./log.csv'), - callbacks.EarlyStopping(), - callbacks.ModelCheckpoint(filepath='./checkpoint'), - callbacks.TerminateOnNaN(), - callbacks.ProgbarLogger(), - callbacks.History(), - callbacks.RemoteMonitor() - ] - - distributed_training_utils_v1.validate_callbacks( - supported_predefined_callbacks, adam.Adam()) - - unsupported_predefined_callbacks = [ - callbacks.ReduceLROnPlateau(), - callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001) - ] - - for callback in unsupported_predefined_callbacks: - with self.assertRaisesRegex(ValueError, - 'You must specify a Keras Optimizer V2'): distributed_training_utils_v1.validate_callbacks( - [callback], tf.compat.v1.train.AdamOptimizer()) + supported_predefined_callbacks, adam.Adam() + ) + unsupported_predefined_callbacks = [ + callbacks.ReduceLROnPlateau(), + callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001), + ] -if __name__ == '__main__': - tf.test.main() + for callback in unsupported_predefined_callbacks: + with self.assertRaisesRegex( + ValueError, "You must specify a Keras Optimizer V2" + ): + distributed_training_utils_v1.validate_callbacks( + [callback], tf.compat.v1.train.AdamOptimizer() + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/distribute/distributed_training_utils_v1.py b/keras/distribute/distributed_training_utils_v1.py index 1155e3d14398..8b19235f41ff 100644 --- a/keras/distribute/distributed_training_utils_v1.py +++ b/keras/distribute/distributed_training_utils_v1.py @@ -14,12 +14,11 @@ # ============================================================================== """Utilities related to distributed training.""" -import tensorflow.compat.v2 as tf -# pylint:disable=protected-access - import functools import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import callbacks from keras import metrics as metrics_module @@ -27,1107 +26,1240 @@ from keras.distribute import distribute_coordinator_utils as dc from keras.distribute import distributed_training_utils as dist_utils from keras.engine import training_utils_v1 -from keras.optimizers.optimizer_v2 import optimizer_v2 +from keras.optimizers.legacy import optimizer_v2 from keras.utils import tf_contextlib from keras.utils.mode_keys import ModeKeys + +# isort: off from tensorflow.python.platform import tf_logging as logging def set_weights(distribution_strategy, dist_model, weights): - """Sets the weights of the replicated models. + """Sets the weights of the replicated models. + + The weights of the replicated models are set to the weights of the original + model. The weights of the replicated model are Mirrored variables and hence + we need to use the `update` call within a DistributionStrategy scope. + + Args: + distribution_strategy: DistributionStrategy used to distribute training + and validation. + dist_model: The replicated models on the different devices. + weights: The weights of the original model. + """ + assign_ops = [] + for layer in dist_model.layers: + num_param = len(layer.weights) + layer_weights = weights[:num_param] + for sw, w in zip(layer.weights, layer_weights): + if tf.compat.v1.executing_eagerly_outside_functions(): + sw.assign(w) + else: + assign_ops.append(distribution_strategy.unwrap(sw.assign(w))) + weights = weights[num_param:] + + if not tf.compat.v1.executing_eagerly_outside_functions(): + backend.get_session(assign_ops).run(assign_ops) + + +def unwrap_values( + distribution_strategy, + grouped_inputs, + grouped_outputs, + grouped_updates=None, + grouped_session_args=None, + with_loss_tensor=False, +): + """Unwrap the list of values contained in the PerReplica parameters. + + This function calls `flatten_per_replica_values` to parse each of the input + parameters into a list of values on the different devices. If we set + `with_loss_tensor` to be True, we also call `reduce` on the list of losses + on the different devices to give us one loss tensor. + + Args: + distribution_strategy: DistributionStrategy used to distribute training + and validation. + grouped_inputs: PerReplica inputs returned from the train or test function + that we ran on each device. + grouped_outputs: PerReplica outputs returned from the train or test + function that we ran on each device. + grouped_updates: PerReplica updates returned from the train or test + function that we ran on each device. + grouped_session_args: PerReplica session args returned from the train or + test function that we ran on each device. + with_loss_tensor: Boolean that indicates if we need to add the reduced + loss tensor as one of the outputs. + + Returns: + Values of each of the PerReplica parameters. + + """ + # Unwrap per device values returned from each model's train function. + # This will be used to construct the main train function. + all_inputs = flatten_per_replica_values( + distribution_strategy, grouped_inputs + ) + all_outputs = unwrap_outputs( + distribution_strategy, grouped_outputs, with_loss_tensor + ) + + if grouped_updates: + all_updates = flatten_per_replica_values( + distribution_strategy, grouped_updates + ) + else: + all_updates = None - The weights of the replicated models are set to the weights of the original - model. The weights of the replicated model are Mirrored variables and hence - we need to use the `update` call within a DistributionStrategy scope. + all_session_args = {} + if grouped_session_args: + grouped_feed_dict = grouped_session_args.get("feed_dict") + if grouped_feed_dict: + all_session_args["feed_dict"] = flatten_per_replica_values( + distribution_strategy, grouped_feed_dict + ) - Args: - distribution_strategy: DistributionStrategy used to distribute training - and validation. - dist_model: The replicated models on the different devices. - weights: The weights of the original model. - """ - assign_ops = [] - for layer in dist_model.layers: - num_param = len(layer.weights) - layer_weights = weights[:num_param] - for sw, w in zip(layer.weights, layer_weights): - if tf.compat.v1.executing_eagerly_outside_functions(): - sw.assign(w) - else: - assign_ops.append(distribution_strategy.unwrap(sw.assign(w))) - weights = weights[num_param:] - - if not tf.compat.v1.executing_eagerly_outside_functions(): - backend.get_session(assign_ops).run(assign_ops) - - -def unwrap_values(distribution_strategy, grouped_inputs, grouped_outputs, - grouped_updates=None, grouped_session_args=None, - with_loss_tensor=False): - """Unwrap the list of values contained in the PerReplica parameters. - - This function calls `flatten_per_replica_values` to parse each of the input - parameters into a list of values on the different devices. If we set - `with_loss_tensor` to be True, we also call `reduce` on the list of losses on - the different devices to give us one loss tensor. - - Args: - distribution_strategy: DistributionStrategy used to distribute training and - validation. - grouped_inputs: PerReplica inputs returned from the train or test function - that we ran on each device. - grouped_outputs: PerReplica outputs returned from the train or test function - that we ran on each device. - grouped_updates: PerReplica updates returned from the train or test function - that we ran on each device. - grouped_session_args: PerReplica session args returned from the train or - test function that we ran on each device. - with_loss_tensor: Boolean that indicates if we need to add the reduced loss - tensor as one of the outputs. - - Returns: - Values of each of the PerReplica parameters. - - """ - # Unwrap per device values returned from each model's train function. - # This will be used to construct the main train function. - all_inputs = flatten_per_replica_values(distribution_strategy, - grouped_inputs) - all_outputs = unwrap_outputs(distribution_strategy, grouped_outputs, - with_loss_tensor) - - if grouped_updates: - all_updates = flatten_per_replica_values(distribution_strategy, - grouped_updates) - else: - all_updates = None - - all_session_args = {} - if grouped_session_args: - grouped_feed_dict = grouped_session_args.get('feed_dict') - if grouped_feed_dict: - all_session_args['feed_dict'] = flatten_per_replica_values( - distribution_strategy, grouped_feed_dict) - - grouped_fetches = grouped_session_args.get('fetches') - if grouped_fetches: - all_session_args['fetches'] = flatten_per_replica_values( - distribution_strategy, grouped_fetches) - - # TODO(priyag): Return only non empty/None values - return all_inputs, all_outputs, all_updates, all_session_args + grouped_fetches = grouped_session_args.get("fetches") + if grouped_fetches: + all_session_args["fetches"] = flatten_per_replica_values( + distribution_strategy, grouped_fetches + ) + + # TODO(priyag): Return only non empty/None values + return all_inputs, all_outputs, all_updates, all_session_args def unwrap_output_dict(strategy, grouped_outputs, mode): - """Unwrap the list of outputs contained in the PerReplica parameters.""" - if mode == ModeKeys.PREDICT: - return flatten_per_replica_values(strategy, grouped_outputs) - - # In the case of fit/eval, the grouped_outputs is a dict, whereas in predict, - # the output is as same structure as model output. They need to be treated - # differently - total_loss = strategy.reduce(tf.distribute.ReduceOp.SUM, - grouped_outputs['total_loss'][0], axis=None) - output_losses = flatten_per_replica_values(strategy, - grouped_outputs['output_losses']) - metrics = flatten_per_replica_values(strategy, - grouped_outputs['metrics']) - batch_size = strategy.reduce(tf.distribute.ReduceOp.SUM, - grouped_outputs['batch_size'], axis=None) - if (backend.is_tpu_strategy(strategy) and - tf.compat.v1.executing_eagerly_outside_functions()): - # Choose 1 value per replica in the TPU case since all replicas produce the - # same output. - # We only do this in eager mode for now since this function is used in - # both graph and eager mode and in the graph case we currently don't use - # experimental_run so would need to be removed when we converge the graph - # code path as well. - output_losses = output_losses[::strategy.num_replicas_in_sync] - metrics = metrics[::strategy.num_replicas_in_sync] - return {'total_loss': [total_loss], - 'output_losses': output_losses, - 'metrics': metrics, - 'batch_size': batch_size} - - -def unwrap_outputs(distribution_strategy, grouped_outputs, - with_loss_tensor=False): - """Unwrap the list of outputs contained in the PerReplica parameters. - - This function calls `flatten_per_replica_values` to parse each of the input - parameters into a list of outputs on the different devices. If we set - `with_loss_tensor` to be True, we also call `reduce` on the list of losses on - the different devices to give us one loss tensor. - - Args: - distribution_strategy: DistributionStrategy used to distribute training and - validation. - grouped_outputs: PerReplica outputs returned from the train or test function - that we ran on each device. - with_loss_tensor: Boolean that indicates if we need to add the reduced loss - tensor as one of the outputs. - - Returns: - Values of each of the PerReplica outputs. - - """ - if not with_loss_tensor: - return flatten_per_replica_values(distribution_strategy, - grouped_outputs) - - if not isinstance(grouped_outputs, list): - grouped_outputs = [grouped_outputs] - # reduce loss tensor before adding it to the list of fetches - loss = distribution_strategy.reduce(tf.distribute.ReduceOp.SUM, - grouped_outputs[0], axis=None) - all_outputs = flatten_per_replica_values(distribution_strategy, - grouped_outputs[1:]) - if (backend.is_tpu_strategy(distribution_strategy) and - tf.compat.v1.executing_eagerly_outside_functions()): - # Choose 1 value per replica in the TPU case since all replicas produce the - # same output. - # We only do this in eager mode for now since this function is used in - # both graph and eager mode and in the graph case we currently don't use - # experimental_run so would need to be removed when we converge the graph - # code path as well. - all_outputs = all_outputs[::distribution_strategy.num_replicas_in_sync] - return [loss] + all_outputs + """Unwrap the list of outputs contained in the PerReplica parameters.""" + if mode == ModeKeys.PREDICT: + return flatten_per_replica_values(strategy, grouped_outputs) + + # In the case of fit/eval, the grouped_outputs is a dict, whereas in + # predict, the output is as same structure as model output. They need to be + # treated differently + total_loss = strategy.reduce( + tf.distribute.ReduceOp.SUM, grouped_outputs["total_loss"][0], axis=None + ) + output_losses = flatten_per_replica_values( + strategy, grouped_outputs["output_losses"] + ) + metrics = flatten_per_replica_values(strategy, grouped_outputs["metrics"]) + batch_size = strategy.reduce( + tf.distribute.ReduceOp.SUM, grouped_outputs["batch_size"], axis=None + ) + if ( + backend.is_tpu_strategy(strategy) + and tf.compat.v1.executing_eagerly_outside_functions() + ): + # Choose 1 value per replica in the TPU case since all replicas produce + # the same output. + # We only do this in eager mode for now since this function is used in + # both graph and eager mode and in the graph case we currently don't use + # experimental_run so would need to be removed when we converge the + # graph code path as well. + output_losses = output_losses[:: strategy.num_replicas_in_sync] + metrics = metrics[:: strategy.num_replicas_in_sync] + return { + "total_loss": [total_loss], + "output_losses": output_losses, + "metrics": metrics, + "batch_size": batch_size, + } + + +def unwrap_outputs( + distribution_strategy, grouped_outputs, with_loss_tensor=False +): + """Unwrap the list of outputs contained in the PerReplica parameters. + + This function calls `flatten_per_replica_values` to parse each of the input + parameters into a list of outputs on the different devices. If we set + `with_loss_tensor` to be True, we also call `reduce` on the list of losses + on the different devices to give us one loss tensor. + + Args: + distribution_strategy: DistributionStrategy used to distribute training + and validation. + grouped_outputs: PerReplica outputs returned from the train or test + function that we ran on each device. + with_loss_tensor: Boolean that indicates if we need to add the reduced + loss tensor as one of the outputs. + + Returns: + Values of each of the PerReplica outputs. + + """ + if not with_loss_tensor: + return flatten_per_replica_values( + distribution_strategy, grouped_outputs + ) + + if not isinstance(grouped_outputs, list): + grouped_outputs = [grouped_outputs] + # reduce loss tensor before adding it to the list of fetches + loss = distribution_strategy.reduce( + tf.distribute.ReduceOp.SUM, grouped_outputs[0], axis=None + ) + all_outputs = flatten_per_replica_values( + distribution_strategy, grouped_outputs[1:] + ) + if ( + backend.is_tpu_strategy(distribution_strategy) + and tf.compat.v1.executing_eagerly_outside_functions() + ): + # Choose 1 value per replica in the TPU case since all replicas produce + # the same output. + # We only do this in eager mode for now since this function is used in + # both graph and eager mode and in the graph case we currently don't use + # experimental_run so would need to be removed when we converge the + # graph code path as well. + all_outputs = all_outputs[:: distribution_strategy.num_replicas_in_sync] + return [loss] + all_outputs def flatten_per_replica_values(distribution_strategy, per_replica_values): - """Unwraps and flattens a nest of PerReplica parameters. + """Unwraps and flattens a nest of PerReplica parameters. + + PerReplica values have one value associated with each device. Each entry in + the PerReplica dict has a device `key` and the corresponding value on the + device as the `value`. In this function we take a PerReplica value or a list + of PerReplica values and return all the values in the PerReplica dict. - PerReplica values have one value associated with each device. Each entry in - the PerReplica dict has a device `key` and the corresponding value on the - device as the `value`. In this function we take a PerReplica value or a list - of PerReplica values and return all the values in the PerReplica dict. + Args: + distribution_strategy: DistributionStrategy used to distribute training + and validation. + per_replica_values: List of PerReplica object or a single PerReplica + object. - Args: - distribution_strategy: DistributionStrategy used to distribute training and - validation. - per_replica_values: List of PerReplica object or a single PerReplica object. + Returns: + List of values of all the PerReplica objects. - Returns: - List of values of all the PerReplica objects. + """ - """ - # pylint: disable=g-complex-comprehension - # This function takes a PerReplica object or a list of PerReplica objects and - # returns all the values associated with it. - return [e for flattened in tf.nest.flatten(per_replica_values) - for e in distribution_strategy.unwrap(flattened)] + # This function takes a PerReplica object or a list of PerReplica objects + # and returns all the values associated with it. + return [ + e + for flattened in tf.nest.flatten(per_replica_values) + for e in distribution_strategy.unwrap(flattened) + ] def validate_callbacks(input_callbacks, optimizer): - """Validate whether given callbacks are supported by DistributionStrategy. - - Args: - input_callbacks: List of callbacks passed by the user to fit. - optimizer: Optimizer instance used to train the model. - - Raises: - ValueError: If `LearningRateScheduler` or `ReduceLROnPlateau` is one of the - callbacks passed. - ValueError: If `write_grads` is one of the parameters passed as part of the - TensorBoard callback. - """ - if input_callbacks: - for callback in input_callbacks: - if isinstance(callback, (callbacks.LearningRateScheduler, - callbacks.ReduceLROnPlateau)): - - if not isinstance(optimizer, optimizer_v2.OptimizerV2): - raise ValueError('You must specify a Keras Optimizer V2 when using ' - '%s callback with DistributionStrategy.' % callback) - - # If users want to use the TensorBoard callback they cannot use certain - # features of the callback that involve accessing model attributes and - # running ops. - if isinstance(callback, callbacks.TensorBoard): - if getattr(callback, 'write_grads', False): - logging.warning( - UserWarning( - '`write_grads` in the TensorBoard callback is not supported ' - 'when using DistributionStrategy. Setting `write_grads` ' - 'to `False`.')) - callback.write_grads = False - - -def validate_distributed_dataset_inputs(distribution_strategy, x, y, - sample_weights=None): - """Validate all the components of a DistributedValue Dataset input. - - Args: - distribution_strategy: The current DistributionStrategy used to call - `fit`/`evaluate`. - x: Input Dataset DistributedValue object. For example, when we use - `MirroredStrategy` this is a PerReplica object with a tensor for each - device set in the dict. x can also be a tuple or dict. The keys of the - dict should match the names of the input layers of the model. - y: Target Dataset DistributedValue object. For example, when we use - `MirroredStrategy` this is a PerReplica object with a tensor for each - device set in the dict. y can also be a tuple or dict. The keys of the - dict should match the names of the output layers of the model. - sample_weights: Sample weights Dataset DistributedValue object. For example, - when we use `MirroredStrategy` this is a PerReplica object with a tensor - for each device set in the dict. - - Returns: - The unwrapped values list of the x and y DistributedValues inputs. - - Raises: - ValueError: If x and y do not have support for being evaluated as tensors. - or if x and y contain elements that are not tensors or if x and y - contain elements that have a shape or dtype mismatch. - """ - # If the input and target used to call the model are not dataset tensors, - # we need to raise an error. When using a DistributionStrategy, the input - # and targets to a model should be from a `tf.data.Dataset`. - - # If each element of x and y are not tensors, we cannot standardize and - # validate the input and targets. - x_values_list = validate_per_replica_inputs(distribution_strategy, x) - - if y is not None: - y_values_list = validate_per_replica_inputs(distribution_strategy, y) - else: - y_values_list = None - - if sample_weights is not None: - sample_weights_list = validate_per_replica_inputs(distribution_strategy, - sample_weights) - else: - sample_weights_list = None - - # Return the unwrapped values to avoid calling `unwrap` a second time. - return x_values_list, y_values_list, sample_weights_list + """Validate whether given callbacks are supported by DistributionStrategy. + + Args: + input_callbacks: List of callbacks passed by the user to fit. + optimizer: Optimizer instance used to train the model. + + Raises: + ValueError: If `LearningRateScheduler` or `ReduceLROnPlateau` is one of + the callbacks passed. + ValueError: If `write_grads` is one of the parameters passed as part of + the TensorBoard callback. + """ + if input_callbacks: + for callback in input_callbacks: + if isinstance( + callback, + (callbacks.LearningRateScheduler, callbacks.ReduceLROnPlateau), + ): + + if not isinstance(optimizer, optimizer_v2.OptimizerV2): + raise ValueError( + "You must specify a Keras Optimizer V2 when using " + "%s callback with DistributionStrategy." % callback + ) + + # If users want to use the TensorBoard callback they cannot use + # certain features of the callback that involve accessing model + # attributes and running ops. + if isinstance(callback, callbacks.TensorBoard): + if getattr(callback, "write_grads", False): + logging.warning( + UserWarning( + "`write_grads` in the TensorBoard callback is not " + "supported when using DistributionStrategy. " + "Setting `write_grads` to `False`." + ) + ) + callback.write_grads = False + + +def validate_distributed_dataset_inputs( + distribution_strategy, x, y, sample_weights=None +): + """Validate all the components of a DistributedValue Dataset input. + + Args: + distribution_strategy: The current DistributionStrategy used to call + `fit`/`evaluate`. + x: Input Dataset DistributedValue object. For example, when we use + `MirroredStrategy` this is a PerReplica object with a tensor for each + device set in the dict. x can also be a tuple or dict. The keys of the + dict should match the names of the input layers of the model. + y: Target Dataset DistributedValue object. For example, when we use + `MirroredStrategy` this is a PerReplica object with a tensor for each + device set in the dict. y can also be a tuple or dict. The keys of the + dict should match the names of the output layers of the model. + sample_weights: Sample weights Dataset DistributedValue object. For + example, when we use `MirroredStrategy` this is a PerReplica object + with a tensor for each device set in the dict. + + Returns: + The unwrapped values list of the x and y DistributedValues inputs. + + Raises: + ValueError: If x and y do not have support for being evaluated as tensors. + or if x and y contain elements that are not tensors or if x and y + contain elements that have a shape or dtype mismatch. + """ + # If the input and target used to call the model are not dataset tensors, + # we need to raise an error. When using a DistributionStrategy, the input + # and targets to a model should be from a `tf.data.Dataset`. + + # If each element of x and y are not tensors, we cannot standardize and + # validate the input and targets. + x_values_list = validate_per_replica_inputs(distribution_strategy, x) + + if y is not None: + y_values_list = validate_per_replica_inputs(distribution_strategy, y) + else: + y_values_list = None + + if sample_weights is not None: + sample_weights_list = validate_per_replica_inputs( + distribution_strategy, sample_weights + ) + else: + sample_weights_list = None + + # Return the unwrapped values to avoid calling `unwrap` a second time. + return x_values_list, y_values_list, sample_weights_list def validate_per_replica_inputs(distribution_strategy, x): - """Validates PerReplica dataset input list. - - Args: - distribution_strategy: The current DistributionStrategy used to call - `fit`, `evaluate` and `predict`. - x: A list of PerReplica objects that represent the input or - target values. - - Returns: - List containing the first element of each of the PerReplica objects in - the input list. - - Raises: - ValueError: If any of the objects in the `per_replica_list` is not a tensor. - - """ - # Convert the inputs and targets into a list of PerReplica objects. - per_replica_list = tf.nest.flatten(x) - x_values_list = [] - for x in per_replica_list: - # At this point x should contain only tensors. - x_values = distribution_strategy.unwrap(x) - for value in x_values: - if not tf.is_tensor(value): - raise ValueError('Dataset input to the model should be tensors instead ' - 'they are of type {}'.format(type(value))) - - if not tf.executing_eagerly(): - # Validate that the shape and dtype of all the elements in x are the same. - validate_all_tensor_shapes(x, x_values) - validate_all_tensor_types(x, x_values) - - x_values_list.append(x_values[0]) - return x_values_list + """Validates PerReplica dataset input list. + + Args: + distribution_strategy: The current DistributionStrategy used to call + `fit`, `evaluate` and `predict`. + x: A list of PerReplica objects that represent the input or + target values. + + Returns: + List containing the first element of each of the PerReplica objects in + the input list. + + Raises: + ValueError: If any of the objects in the `per_replica_list` is not a + tensor. + + """ + # Convert the inputs and targets into a list of PerReplica objects. + per_replica_list = tf.nest.flatten(x) + x_values_list = [] + for x in per_replica_list: + # At this point x should contain only tensors. + x_values = distribution_strategy.unwrap(x) + for value in x_values: + if not tf.is_tensor(value): + raise ValueError( + "Dataset input to the model should be tensors instead " + "they are of type {}".format(type(value)) + ) + + if not tf.executing_eagerly(): + # Validate that the shape and dtype of all the elements in x are the + # same. + validate_all_tensor_shapes(x, x_values) + validate_all_tensor_types(x, x_values) + + x_values_list.append(x_values[0]) + return x_values_list def validate_all_tensor_types(x, x_values): - x_dtype = x_values[0].dtype - for i in range(1, len(x_values)): - if x_dtype != x_values[i].dtype: - raise ValueError('Input tensor dtypes do not match for distributed tensor' - ' inputs {}'.format(x)) + x_dtype = x_values[0].dtype + for i in range(1, len(x_values)): + if x_dtype != x_values[i].dtype: + raise ValueError( + "Input tensor dtypes do not match for distributed tensor" + " inputs {}".format(x) + ) def validate_all_tensor_shapes(x, x_values): - # Validate that the shape of all the elements in x have the same shape - x_shape = x_values[0].shape.as_list() - for i in range(1, len(x_values)): - if x_shape != x_values[i].shape.as_list(): - raise ValueError('Input tensor shapes do not match for distributed tensor' - ' inputs {}'.format(x)) + # Validate that the shape of all the elements in x have the same shape + x_shape = x_values[0].shape.as_list() + for i in range(1, len(x_values)): + if x_shape != x_values[i].shape.as_list(): + raise ValueError( + "Input tensor shapes do not match for distributed tensor" + " inputs {}".format(x) + ) def _wait_for_variable_initialization(session): - """Utility to wait for variables to be initialized.""" - all_variables = backend._get_variables(backend.get_graph()) # pylint: disable=protected-access - candidate_vars = [] - for v in all_variables: - if not getattr(v, '_keras_initialized', False): - candidate_vars.append(v) - - if not candidate_vars: - return - - while True: - is_initialized = session.run( - [tf.compat.v1.is_variable_initialized(v) for v in candidate_vars]) - uninitialized_vars = [] - for flag, v in zip(is_initialized, candidate_vars): - if not flag: - uninitialized_vars.append(v) - v._keras_initialized = True # pylint: disable=protected-access - if not uninitialized_vars: - break + """Utility to wait for variables to be initialized.""" + all_variables = backend._get_variables(backend.get_graph()) + candidate_vars = [] + for v in all_variables: + if not getattr(v, "_keras_initialized", False): + candidate_vars.append(v) + + if not candidate_vars: + return + + while True: + is_initialized = session.run( + [tf.compat.v1.is_variable_initialized(v) for v in candidate_vars] + ) + uninitialized_vars = [] + for flag, v in zip(is_initialized, candidate_vars): + if not flag: + uninitialized_vars.append(v) + v._keras_initialized = True + if not uninitialized_vars: + break def init_restore_or_wait_for_variables(): - """Initialize or restore variables or wait for variables to be initialized.""" - backend._initialize_variables(backend._get_session()) # pylint: disable=protected-access + """Initialize or restore variables or wait for variables to be + initialized.""" + backend._initialize_variables(backend._get_session()) def validate_inputs(x, y): - """Validate inputs when using DistributionStrategy. - - Args: - x: Model Inputs. - y: Model Targets. - - Raises: - ValueError: if input is not a Dataset or a numpy array(when we use - MirroredStrategy). - """ - if (isinstance(x, tf.compat.v1.data.Iterator) or - isinstance(y, tf.compat.v1.data.Iterator)): - raise ValueError('`DistributionStrategy` does not support inputs of type ' - 'Iterator. You must pass a `tf.data.Dataset` object or a ' - 'numpy array as input.') + """Validate inputs when using DistributionStrategy. + + Args: + x: Model Inputs. + y: Model Targets. + + Raises: + ValueError: if input is not a Dataset or a numpy array(when we use + MirroredStrategy). + """ + if isinstance(x, tf.compat.v1.data.Iterator) or isinstance( + y, tf.compat.v1.data.Iterator + ): + raise ValueError( + "`DistributionStrategy` does not support inputs of type " + "Iterator. You must pass a `tf.data.Dataset` object or a " + "numpy array as input." + ) def is_dataset_shape_fully_defined(dataset): - """Returns whether a dataset contains a final partial batch.""" - shapes = tf.nest.flatten(tf.compat.v1.data.get_output_shapes(dataset)) - unknown_shapes = [s for s in shapes if not s.is_fully_defined()] - return not unknown_shapes - - -def process_batch_and_step_size(strategy, - inputs, - batch_size, - steps_per_epoch, - mode, - validation_split=0.): - """Process the batch size and step size based on input and dist strategy.""" - first_x_value = tf.nest.flatten(inputs)[0] - if isinstance(first_x_value, np.ndarray): - num_samples = first_x_value.shape[0] - if validation_split and 0. < validation_split < 1.: - num_samples = int(num_samples * (1 - validation_split)) - # Until support for partial batch is implemented across all - # functions and distribution strategy, we pass `mode` to selectively - # relax the constraint to consume all the training samples. - steps_per_epoch, batch_size = get_input_params( - strategy, num_samples, steps_per_epoch, batch_size, mode=mode) - return batch_size, steps_per_epoch - - -def get_input_params(distribution_strategy, - num_samples, - steps, - batch_size, - mode=None): - """Calculate the number of batches and steps/steps_per_epoch. - - Args: - distribution_strategy: The DistributionStrategy used to compile the model. - num_samples: The number of samples from which we determine the batch size - and steps. - steps: The specified number of steps. - batch_size: The specified batch_size. - mode: ModeKey representing whether input will be used for training, - evaluation, or prediction. This is used to relax the constraints on - consuming all the training samples to keep compatibility till we support - partial batches. If none, then partial batches are not allowed. - - Returns: - steps: The steps or steps_per_epoch argument depending on if a user is - calling `fit`, `evaluate` or `predict`. If the is_training flag is set - we don't require the number of samples to be used completely. - batch_size: The batch size to be used in model iterations. - - Raises: - ValueError: If the number of batches or steps evaluates to 0. - - """ - # TODO(b/118776054): Use global batch size for Keras/DS support. - # Currently this is only supported in TPUStrategy and CoreMirroredStrategy. - use_per_replica_batch = not dist_utils.global_batch_size_supported( - distribution_strategy) - - # TODO(b/128995245): In eager mode, uneven batch sizes are allowed except for - # `fit()` on TPUStrategy. - # In graph mode, the zero batch case in batch norm is not handled due to - # XLA-GPU regression. Uneven batch sizes are not allowed except - # for `test()` and `predict()` on TPUStrategy. - if tf.executing_eagerly(): - allow_partial_batch = ( - mode != ModeKeys.TRAIN or - not backend.is_tpu_strategy(distribution_strategy)) - else: - allow_partial_batch = ( - mode == ModeKeys.TRAIN or - ((mode == ModeKeys.PREDICT or mode == ModeKeys.TEST) and - backend.is_tpu_strategy(distribution_strategy))) - - if steps is None: - if batch_size is None: - # If neither the batch size or number of steps are set. We choose the - # global batch size as the minimum of number of samples and 32. 32 is - # chosen to provide backward compatibility. - global_batch_size = min(num_samples, 32) + """Returns whether a dataset contains a final partial batch.""" + shapes = tf.nest.flatten(tf.compat.v1.data.get_output_shapes(dataset)) + unknown_shapes = [s for s in shapes if not s.is_fully_defined()] + return not unknown_shapes + + +def process_batch_and_step_size( + strategy, inputs, batch_size, steps_per_epoch, mode, validation_split=0.0 +): + """Process the batch size and step size based on input and dist strategy.""" + first_x_value = tf.nest.flatten(inputs)[0] + if isinstance(first_x_value, np.ndarray): + num_samples = first_x_value.shape[0] + if validation_split and 0.0 < validation_split < 1.0: + num_samples = int(num_samples * (1 - validation_split)) + # Until support for partial batch is implemented across all + # functions and distribution strategy, we pass `mode` to selectively + # relax the constraint to consume all the training samples. + steps_per_epoch, batch_size = get_input_params( + strategy, num_samples, steps_per_epoch, batch_size, mode=mode + ) + return batch_size, steps_per_epoch + + +def get_input_params( + distribution_strategy, num_samples, steps, batch_size, mode=None +): + """Calculate the number of batches and steps/steps_per_epoch. + + Args: + distribution_strategy: The DistributionStrategy used to compile the model. + num_samples: The number of samples from which we determine the batch size + and steps. + steps: The specified number of steps. + batch_size: The specified batch_size. + mode: ModeKey representing whether input will be used for training, + evaluation, or prediction. This is used to relax the constraints on + consuming all the training samples to keep compatibility till we support + partial batches. If none, then partial batches are not allowed. + + Returns: + steps: The steps or steps_per_epoch argument depending on if a user is + calling `fit`, `evaluate` or `predict`. If the is_training flag is set + we don't require the number of samples to be used completely. + batch_size: The batch size to be used in model iterations. + + Raises: + ValueError: If the number of batches or steps evaluates to 0. + + """ + # TODO(b/118776054): Use global batch size for Keras/DS support. + # Currently this is only supported in TPUStrategy and CoreMirroredStrategy. + use_per_replica_batch = not dist_utils.global_batch_size_supported( + distribution_strategy + ) + + # TODO(b/128995245): In eager mode, uneven batch sizes are allowed except + # for `fit()` on TPUStrategy. + # In graph mode, the zero batch case in batch norm is not handled due to + # XLA-GPU regression. Uneven batch sizes are not allowed except + # for `test()` and `predict()` on TPUStrategy. + if tf.executing_eagerly(): + allow_partial_batch = ( + mode != ModeKeys.TRAIN + or not backend.is_tpu_strategy(distribution_strategy) + ) else: - # If the user provided the batch size we need to handle the case - # between different strategies that use the global/per-replica batch size - global_batch_size = batch_size - if use_per_replica_batch: - global_batch_size *= distribution_strategy.num_replicas_in_sync - if allow_partial_batch: - steps = np.ceil(num_samples / global_batch_size).astype(int) + allow_partial_batch = mode == ModeKeys.TRAIN or ( + (mode == ModeKeys.PREDICT or mode == ModeKeys.TEST) + and backend.is_tpu_strategy(distribution_strategy) + ) + + if steps is None: + if batch_size is None: + # If neither the batch size or number of steps are set. We choose + # the global batch size as the minimum of number of samples and 32. + # 32 is chosen to provide backward compatibility. + global_batch_size = min(num_samples, 32) + else: + # If the user provided the batch size we need to handle the case + # between different strategies that use the global/per-replica batch + # size + global_batch_size = batch_size + if use_per_replica_batch: + global_batch_size *= distribution_strategy.num_replicas_in_sync + if allow_partial_batch: + steps = np.ceil(num_samples / global_batch_size).astype(int) + else: + if num_samples % global_batch_size: + raise ValueError( + "The number of samples %s is not divisible by " + "batch size %s." % (num_samples, global_batch_size) + ) + steps = num_samples // global_batch_size else: - if num_samples % global_batch_size: - raise ValueError('The number of samples %s is not divisible by ' - 'batch size %s.' % (num_samples, global_batch_size)) - steps = num_samples // global_batch_size - else: - if batch_size is None: - # We calculate the batch size based on the number of steps specified - if num_samples % steps: - raise ValueError('The number of samples %s is not divisible by ' - 'steps %s. Please change the number of steps to a ' - 'value that can consume all the samples' % ( - num_samples, steps)) - global_batch_size = num_samples // steps + if batch_size is None: + # We calculate the batch size based on the number of steps specified + if num_samples % steps: + raise ValueError( + "The number of samples %s is not divisible by " + "steps %s. Please change the number of steps to a " + "value that can consume all the samples" + % (num_samples, steps) + ) + global_batch_size = num_samples // steps + else: + # If the user provided the batch size we need to handle the case + # between different strategies that use the global/per-replica batch + # size + global_batch_size = batch_size + if use_per_replica_batch: + global_batch_size *= distribution_strategy.num_replicas_in_sync + + min_num_samples = global_batch_size * steps + if allow_partial_batch: + min_num_samples = ( + global_batch_size * (steps - 1) + 1 if steps > 1 else 0 + ) + + if num_samples < min_num_samples: + raise ValueError( + "Number of samples %s is less than samples required " + "for specified batch_size %s and steps %s" + % (num_samples, global_batch_size, steps) + ) + + # We need to return the per replica or global batch size based on the + # strategy + if use_per_replica_batch: + if global_batch_size % distribution_strategy.num_replicas_in_sync: + raise ValueError( + "The batch size (%s) could not be sharded evenly across the " + "sync replicas (%s) in the distribution strategy." + % ( + global_batch_size, + distribution_strategy.num_replicas_in_sync, + ) + ) + batch_size = ( + global_batch_size // distribution_strategy.num_replicas_in_sync + ) else: - # If the user provided the batch size we need to handle the case - # between different strategies that use the global/per-replica batch size - global_batch_size = batch_size - if use_per_replica_batch: - global_batch_size *= distribution_strategy.num_replicas_in_sync - - min_num_samples = global_batch_size * steps - if allow_partial_batch: - min_num_samples = global_batch_size * (steps-1) + 1 if steps > 1 else 0 - - if num_samples < min_num_samples: - raise ValueError('Number of samples %s is less than samples required ' - 'for specified batch_size %s and steps %s' % ( - num_samples, global_batch_size, steps)) - - # We need to return the per replica or global batch size based on the strategy - if use_per_replica_batch: - if global_batch_size % distribution_strategy.num_replicas_in_sync: - raise ValueError( - 'The batch size (%s) could not be sharded evenly across the sync ' - 'replicas (%s) in the distribution strategy.' % ( - global_batch_size, distribution_strategy.num_replicas_in_sync)) - batch_size = global_batch_size // distribution_strategy.num_replicas_in_sync - else: - batch_size = global_batch_size - - return steps, batch_size + batch_size = global_batch_size + + return steps, batch_size def get_batch_dimension(iterator): - shapes = tf.nest.flatten(tf.compat.v1.data.get_output_shapes(iterator)) - # Take the batch size from the first element, as it should be the same for - # all. - dims = shapes[0].dims - return dims[0] if dims else None + shapes = tf.nest.flatten(tf.compat.v1.data.get_output_shapes(iterator)) + # Take the batch size from the first element, as it should be the same for + # all. + dims = shapes[0].dims + return dims[0] if dims else None def get_iterator(dataset, distribution_strategy): - with distribution_strategy.scope(): - iterator = distribution_strategy.make_dataset_iterator(dataset) - initialize_iterator(iterator, distribution_strategy) - return iterator + with distribution_strategy.scope(): + iterator = distribution_strategy.make_dataset_iterator(dataset) + initialize_iterator(iterator, distribution_strategy) + return iterator def initialize_iterator(iterator, distribution_strategy): - with distribution_strategy.scope(): - init_op = tf.group(iterator.initializer) - if not tf.executing_eagerly(): - backend.get_session((init_op,)).run(init_op) + with distribution_strategy.scope(): + init_op = tf.group(iterator.initializer) + if not tf.executing_eagerly(): + backend.get_session((init_op,)).run(init_op) def _get_input_from_iterator(iterator, model): - """Get elements from the iterator and verify the input shape and type.""" - next_element = iterator.get_next() - - # `len(nest.flatten(x))` is going to not count empty elements such as {}. - # len(nest.flatten([[0,1,2], {}])) is 3 and not 4. The `next_element` is - # going to get flattened in `_prepare_feed_values` to work around that. Empty - # elements are going to get filtered out as part of the flattening. - if len(tf.nest.flatten(next_element)) == len(model.inputs): - x = next_element - y = None - sample_weights = None - elif len(tf.nest.flatten(next_element)) == (len(model.inputs) + - len(model.outputs)): - x, y = next_element - sample_weights = None - else: - x, y, sample_weights = next_element - - # Validate that all the elements in x and y are of the same type and shape. - validate_distributed_dataset_inputs( - model._distribution_strategy, x, y, sample_weights) - return x, y, sample_weights + """Get elements from the iterator and verify the input shape and type.""" + next_element = iterator.get_next() + + # `len(nest.flatten(x))` is going to not count empty elements such as {}. + # len(nest.flatten([[0,1,2], {}])) is 3 and not 4. The `next_element` is + # going to get flattened in `_prepare_feed_values` to work around that. + # Empty elements are going to get filtered out as part of the flattening. + if len(tf.nest.flatten(next_element)) == len(model.inputs): + x = next_element + y = None + sample_weights = None + elif len(tf.nest.flatten(next_element)) == ( + len(model.inputs) + len(model.outputs) + ): + x, y = next_element + sample_weights = None + else: + x, y, sample_weights = next_element + + # Validate that all the elements in x and y are of the same type and shape. + validate_distributed_dataset_inputs( + model._distribution_strategy, x, y, sample_weights + ) + return x, y, sample_weights def _prepare_feed_values(model, inputs, targets, sample_weights, mode): - """Prepare feed values to the model execution function. - - Args: - model: Model to prepare feed values for. - inputs: List or dict of model inputs. - targets: Optional list of model targets. - sample_weights: Optional list of sample weight arrays. - mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. - - Returns: - Feed values for the model in the given mode. - """ - strategy = model._distribution_strategy - inputs, targets, sample_weights = _get_input_from_iterator(inputs, model) - if backend.is_tpu_strategy(strategy): - if sample_weights is not None: - raise ValueError('TPUStrategy does not support sample weights.') - - # When the inputs are dict, then we want to flatten it in the same order as - # the input layers, such that the data are fed into the input layers in the - # correct order. - if isinstance(inputs, dict): - inputs = [inputs[key] for key in model._feed_input_names] - if is_distributing_by_cloning(model): - inputs = flatten_per_replica_values(strategy, inputs) - targets = flatten_per_replica_values(strategy, targets) - # Expand 1-dimensional inputs. - # TODO(b/124535720): Remove once this standarize data logic is shared with - # main flow. - inputs, targets = tf.nest.map_structure( - training_utils_v1.standardize_single_array, (inputs, targets)) - else: - inputs = training_utils_v1.ModelInputs(inputs).as_list() - - if mode == ModeKeys.PREDICT: - sample_weights = [] - targets = [] - elif sample_weights is not None and is_distributing_by_cloning(model): - if tf.executing_eagerly() and not model._compile_distribution: - raise NotImplementedError('`sample_weight` is not supported when using ' - 'tf.distribute.Strategy in eager mode and ' - 'cloning=True.') - sample_weights = flatten_per_replica_values(strategy, sample_weights) - - ins = [inputs, targets, sample_weights] - return tuple(ins) + """Prepare feed values to the model execution function. + + Args: + model: Model to prepare feed values for. + inputs: List or dict of model inputs. + targets: Optional list of model targets. + sample_weights: Optional list of sample weight arrays. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. + + Returns: + Feed values for the model in the given mode. + """ + strategy = model._distribution_strategy + inputs, targets, sample_weights = _get_input_from_iterator(inputs, model) + if backend.is_tpu_strategy(strategy): + if sample_weights is not None: + raise ValueError("TPUStrategy does not support sample weights.") + + # When the inputs are dict, then we want to flatten it in the same order as + # the input layers, such that the data are fed into the input layers in the + # correct order. + if isinstance(inputs, dict): + inputs = [inputs[key] for key in model._feed_input_names] + if is_distributing_by_cloning(model): + inputs = flatten_per_replica_values(strategy, inputs) + targets = flatten_per_replica_values(strategy, targets) + # Expand 1-dimensional inputs. + # TODO(b/124535720): Remove once this standarize data logic is shared + # with main flow. + inputs, targets = tf.nest.map_structure( + training_utils_v1.standardize_single_array, (inputs, targets) + ) + else: + inputs = training_utils_v1.ModelInputs(inputs).as_list() + + if mode == ModeKeys.PREDICT: + sample_weights = [] + targets = [] + elif sample_weights is not None and is_distributing_by_cloning(model): + if tf.executing_eagerly() and not model._compile_distribution: + raise NotImplementedError( + "`sample_weight` is not supported when using " + "tf.distribute.Strategy in eager mode and " + "cloning=True." + ) + sample_weights = flatten_per_replica_values(strategy, sample_weights) + + ins = [inputs, targets, sample_weights] + return tuple(ins) def is_distributing_by_cloning(model): - """Decide whether this model is going to be distributed via cloning. + """Decide whether this model is going to be distributed via cloning. - We are going to distribute the model by cloning in graph mode. + We are going to distribute the model by cloning in graph mode. - Args: - model: Keras model to distribute. + Args: + model: Keras model to distribute. - Returns: - True if the `model` is going to be distributed using cloning and False - otherwise. - """ - if (backend.is_tpu_strategy(model._distribution_strategy) and - tf.executing_eagerly): # b/137580852 - return False - elif tf.compat.v1.executing_eagerly_outside_functions(): - return bool(model._compile_distribution) - return True + Returns: + True if the `model` is going to be distributed using cloning and False + otherwise. + """ + if ( + backend.is_tpu_strategy(model._distribution_strategy) + and tf.executing_eagerly + ): # b/137580852 + return False + elif tf.compat.v1.executing_eagerly_outside_functions(): + return bool(model._compile_distribution) + return True def _custom_compile_for_predict(model): - """Custom compile for TPU predict mode.""" - if not model.built: - # Model is not compilable because it does not know its number of inputs - # and outputs, nor their shapes and names. We will compile after the first - # time the model gets called on training data. - return - model._is_compiled = True - model.total_loss = None - model.train_function = None - model.test_function = None - model.predict_function = None + """Custom compile for TPU predict mode.""" + if not model.built: + # Model is not compilable because it does not know its number of inputs + # and outputs, nor their shapes and names. We will compile after the + # first time the model gets called on training data. + return + model._is_compiled = True + model.total_loss = None + model.train_function = None + model.test_function = None + model.predict_function = None def _build_network_on_replica(model, mode, inputs=None, targets=None): - """Build an updated model on replicas. - - We create a new Keras model while sharing the variables from the old graph. - Building a new sub-graph is required since the original keras model creates - placeholders for the input and the output that are not accessible till we - call iterator.get_next() inside the step_fn for `fit`/`evaluate`/`predict`. - - The sharing of weights and layers between the old and the new model guarantee - that we're using Strategy variables and any updates on either model are - reflected correctly in callbacks and loop iterations. - - We need to make sure we share the optimizers between the old and the new model - as well so that optimizer state is not lost if the user is running fit - multiple times. - - Args: - model: Model to be replicated across Replicas - mode: Which of fit/eval/predict is building the distributed network - inputs: Input variables to be passed to the model - targets: Target tensor to be passed to model.compile - - Returns: - A new model with shared layers with the old model. - """ - # Need to do imports here since we run into a circular dependency error. - from keras import models # pylint: disable=g-import-not-at-top - from keras.engine import sequential # pylint: disable=g-import-not-at-top - - # We rely on the internal methods to avoid having share_weights weights in the - # public API. - if isinstance(model, sequential.Sequential): - updated_model = models._clone_sequential_model( - model, input_tensors=inputs, layer_fn=models.share_weights) - else: - updated_model = models._clone_functional_model( - model, input_tensors=inputs, layer_fn=models.share_weights) - # Callable losses added directly to a functional Model need to be added - # here. - updated_model._callable_losses = model._callable_losses - - # Recast all low precision outputs back to float32 since we only casted - # the inputs to bfloat16 and not targets. This is done so that we can preserve - # precision when calculating the loss value. - def _upcast_low_precision_outputs(output): - if output.dtype == tf.bfloat16: - return tf.cast(output, tf.float32) + """Build an updated model on replicas. + + We create a new Keras model while sharing the variables from the old graph. + Building a new sub-graph is required since the original keras model creates + placeholders for the input and the output that are not accessible till we + call iterator.get_next() inside the step_fn for `fit`/`evaluate`/`predict`. + + The sharing of weights and layers between the old and the new model + guarantee that we're using Strategy variables and any updates on either + model are reflected correctly in callbacks and loop iterations. + + We need to make sure we share the optimizers between the old and the new + model as well so that optimizer state is not lost if the user is running fit + multiple times. + + Args: + model: Model to be replicated across Replicas + mode: Which of fit/eval/predict is building the distributed network + inputs: Input variables to be passed to the model + targets: Target tensor to be passed to model.compile + + Returns: + A new model with shared layers with the old model. + """ + # Need to do imports here since we run into a circular dependency error. + from keras import models + from keras.engine import sequential + + # We rely on the internal methods to avoid having share_weights weights in + # the public API. + if isinstance(model, sequential.Sequential): + updated_model = models._clone_sequential_model( + model, input_tensors=inputs, layer_fn=models.share_weights + ) + else: + updated_model = models._clone_functional_model( + model, input_tensors=inputs, layer_fn=models.share_weights + ) + # Callable losses added directly to a functional Model need to be added + # here. + updated_model._callable_losses = model._callable_losses + + # Recast all low precision outputs back to float32 since we only casted the + # inputs to bfloat16 and not targets. This is done so that we can preserve + # precision when calculating the loss value. + def _upcast_low_precision_outputs(output): + if output.dtype == tf.bfloat16: + return tf.cast(output, tf.float32) + else: + return output + + updated_model.outputs = [ + _upcast_low_precision_outputs(o) for o in updated_model.outputs + ] + + if isinstance(targets, tuple): + targets = tf.nest.flatten(targets) + + if mode == ModeKeys.PREDICT and inputs is not None: # TPU predict case + _custom_compile_for_predict(updated_model) else: - return output - updated_model.outputs = [_upcast_low_precision_outputs(o) - for o in updated_model.outputs] - - if isinstance(targets, tuple): - targets = tf.nest.flatten(targets) - - if mode == ModeKeys.PREDICT and inputs is not None: # TPU predict case - _custom_compile_for_predict(updated_model) - else: - updated_model.compile( - model.optimizer, - model.loss, - metrics=metrics_module.clone_metrics(model._compile_metrics), - loss_weights=model.loss_weights, - sample_weight_mode=model.sample_weight_mode, - weighted_metrics=metrics_module.clone_metrics( - model._compile_weighted_metrics), - target_tensors=targets) - return updated_model - - -def _build_distributed_network(model, strategy, mode, inputs=None, - targets=None): - """Create a cloned model on each replica.""" - with backend.get_graph().as_default(), strategy.scope(): - distributed_model = strategy.extended.call_for_each_replica( - _build_network_on_replica, - args=(model, mode, inputs, targets)) - set_distributed_model(model, mode, distributed_model) + updated_model.compile( + model.optimizer, + model.loss, + metrics=metrics_module.clone_metrics(model._compile_metrics), + loss_weights=model.loss_weights, + sample_weight_mode=model.sample_weight_mode, + weighted_metrics=metrics_module.clone_metrics( + model._compile_weighted_metrics + ), + target_tensors=targets, + ) + return updated_model + + +def _build_distributed_network( + model, strategy, mode, inputs=None, targets=None +): + """Create a cloned model on each replica.""" + with backend.get_graph().as_default(), strategy.scope(): + distributed_model = strategy.extended.call_for_each_replica( + _build_network_on_replica, args=(model, mode, inputs, targets) + ) + set_distributed_model(model, mode, distributed_model) def _clone_and_build_model(model, mode, inputs=None, targets=None): - """Clone and build the given keras_model.""" - # We need to set the import here since we run into a circular dependency - # error. - from keras import models # pylint: disable=g-import-not-at-top - cloned_model = models.clone_model(model, input_tensors=inputs) - - # Compile and build model. - if isinstance(model.optimizer, optimizers.TFOptimizer): - optimizer = model.optimizer - else: - optimizer_config = model.optimizer.get_config() - optimizer = model.optimizer.__class__.from_config(optimizer_config) - - # Recast all low precision outputs back to float32 since we only casted - # the inputs to bfloat16 and not targets. This is done so that we can preserve - # precision when calculating the loss value. - def _upcast_low_precision_outputs(output): - if output.dtype == tf.bfloat16: - return tf.cast(output, tf.float32) + """Clone and build the given keras_model.""" + # We need to set the import here since we run into a circular dependency + # error. + from keras import models + + cloned_model = models.clone_model(model, input_tensors=inputs) + + # Compile and build model. + if isinstance(model.optimizer, optimizers.TFOptimizer): + optimizer = model.optimizer else: - return output - cloned_model.outputs = [_upcast_low_precision_outputs(o) - for o in cloned_model.outputs] - - if isinstance(targets, tuple): - targets = tf.nest.flatten(targets) - if mode == ModeKeys.PREDICT and inputs is not None: # TPU predict case - _custom_compile_for_predict(cloned_model) - else: - cloned_model.compile( - optimizer, - model.loss, - metrics=metrics_module.clone_metrics(model._compile_metrics), - loss_weights=model.loss_weights, - sample_weight_mode=model.sample_weight_mode, - weighted_metrics=metrics_module.clone_metrics( - model._compile_weighted_metrics), - target_tensors=targets) - return cloned_model + optimizer_config = model.optimizer.get_config() + optimizer = model.optimizer.__class__.from_config(optimizer_config) + + # Recast all low precision outputs back to float32 since we only casted + # the inputs to bfloat16 and not targets. This is done so that we can + # preserve precision when calculating the loss value. + def _upcast_low_precision_outputs(output): + if output.dtype == tf.bfloat16: + return tf.cast(output, tf.float32) + else: + return output + + cloned_model.outputs = [ + _upcast_low_precision_outputs(o) for o in cloned_model.outputs + ] + + if isinstance(targets, tuple): + targets = tf.nest.flatten(targets) + if mode == ModeKeys.PREDICT and inputs is not None: # TPU predict case + _custom_compile_for_predict(cloned_model) + else: + cloned_model.compile( + optimizer, + model.loss, + metrics=metrics_module.clone_metrics(model._compile_metrics), + loss_weights=model.loss_weights, + sample_weight_mode=model.sample_weight_mode, + weighted_metrics=metrics_module.clone_metrics( + model._compile_weighted_metrics + ), + target_tensors=targets, + ) + return cloned_model def clone_model_on_replicas(model, strategy, mode, inputs=None, targets=None): - """Create a cloned model on each replica.""" - with backend.get_graph().as_default(), strategy.scope(): - distributed_model = strategy.extended.call_for_each_replica( - _clone_and_build_model, args=(model, mode, inputs, targets)) - set_distributed_model(model, mode, distributed_model) - if mode == ModeKeys.TRAIN: - model._make_callback_model(distributed_model) + """Create a cloned model on each replica.""" + with backend.get_graph().as_default(), strategy.scope(): + distributed_model = strategy.extended.call_for_each_replica( + _clone_and_build_model, args=(model, mode, inputs, targets) + ) + set_distributed_model(model, mode, distributed_model) + if mode == ModeKeys.TRAIN: + model._make_callback_model(distributed_model) def _make_execution_function(model, mode): - """Makes or reuses function to run one step of distributed model execution.""" - if is_distributing_by_cloning(model): - return _make_execution_function_with_cloning(model, mode) + """Makes or reuses function to run one step of distributed model + execution.""" + if is_distributing_by_cloning(model): + return _make_execution_function_with_cloning(model, mode) - distributed_function = get_distributed_function(model, mode) - if distributed_function: - return distributed_function + distributed_function = get_distributed_function(model, mode) + if distributed_function: + return distributed_function - distribution_function = _make_execution_function_without_cloning(model, mode) - set_distributed_function(model, mode, distribution_function) - return distribution_function + distribution_function = _make_execution_function_without_cloning( + model, mode + ) + set_distributed_function(model, mode, distribution_function) + return distribution_function def _make_execution_function_without_cloning(model, mode): - """Creates a function to run one step of distributed model execution.""" - strategy = model._distribution_strategy - - with strategy.scope(): - per_replica_function = _make_replica_execution_function(model, mode) - - def distributed_function(input_fn): - """A single step of the distributed execution across replicas.""" - x, y, sample_weights = input_fn() - # Call `Model.{train,test,predict}_on_batch` on every replica passing - # PerReplicas as arguments. On every replica inside this call, each - # PerReplica object will return the value for that replica. The outputs - # are PerReplicas too. - outputs = strategy.run(per_replica_function, args=(x, y, sample_weights)) - # Out of PerReplica outputs reduce or pick values to return. - all_outputs = unwrap_outputs( - strategy, outputs, with_loss_tensor=(mode != ModeKeys.PREDICT)) - return all_outputs - - if not model.run_eagerly: - distributed_function = tf.function(distributed_function) - def execution_function(input_fn): - # `numpy` translates Tensors to values in Eager mode. - return [out.numpy() for out in distributed_function(input_fn)] - else: - execution_function = distributed_function + """Creates a function to run one step of distributed model execution.""" + strategy = model._distribution_strategy + + with strategy.scope(): + per_replica_function = _make_replica_execution_function(model, mode) + + def distributed_function(input_fn): + """A single step of the distributed execution across replicas.""" + x, y, sample_weights = input_fn() + # Call `Model.{train,test,predict}_on_batch` on every replica + # passing PerReplicas as arguments. On every replica inside this + # call, each PerReplica object will return the value for that + # replica. The outputs are PerReplicas too. + outputs = strategy.run( + per_replica_function, args=(x, y, sample_weights) + ) + # Out of PerReplica outputs reduce or pick values to return. + all_outputs = unwrap_outputs( + strategy, outputs, with_loss_tensor=(mode != ModeKeys.PREDICT) + ) + return all_outputs - return execution_function + if not model.run_eagerly: + distributed_function = tf.function(distributed_function) + + def execution_function(input_fn): + # `numpy` translates Tensors to values in Eager mode. + return [out.numpy() for out in distributed_function(input_fn)] + + else: + execution_function = distributed_function + + return execution_function def _make_replica_execution_function(model, mode): - """A single step of the distributed execution on a replica.""" - if mode == ModeKeys.TRAIN: - func = model.train_on_batch - elif mode == ModeKeys.TEST: - func = model.test_on_batch - else: + """A single step of the distributed execution on a replica.""" + if mode == ModeKeys.TRAIN: + func = model.train_on_batch + elif mode == ModeKeys.TEST: + func = model.test_on_batch + else: - def predict_on_batch(x, y=None, sample_weights=None): - del y, sample_weights - return model.predict_on_batch(x) + def predict_on_batch(x, y=None, sample_weights=None): + del y, sample_weights + return model.predict_on_batch(x) - func = predict_on_batch + func = predict_on_batch - if mode != ModeKeys.PREDICT: - # `reset_metrics` is set to False to maintain stateful metrics across - # batch-level calls. - func = functools.partial(func, reset_metrics=False) + if mode != ModeKeys.PREDICT: + # `reset_metrics` is set to False to maintain stateful metrics across + # batch-level calls. + func = functools.partial(func, reset_metrics=False) - return func + return func def _make_replicated_models_with_cloning(model, mode): - """Build models on each replica.""" - strategy = model._distribution_strategy + """Build models on each replica.""" + strategy = model._distribution_strategy - # If distributed_model is not built, create one for `mode`. - if model._compile_distribution: - clone_model_on_replicas(model, strategy, mode) - else: - _build_distributed_network(model, strategy, mode) + # If distributed_model is not built, create one for `mode`. + if model._compile_distribution: + clone_model_on_replicas(model, strategy, mode) + else: + _build_distributed_network(model, strategy, mode) def _make_execution_function_with_cloning(model, mode): - """Clones or re-uses models to run one step of distributed model execution.""" - distributed_model = get_distributed_model(model, mode) - # TODO(b/134069401): Create a cache for the distributed model and exec - # function that incorporates additional attributes to be part of the cache key - # than just the mode. - # If distributed model for a particular `mode` is already built, use the - # `_distribution_function` on that distributed model. - # If you have updated the sample_weight_mode on the model, then you will need - # to recompile metrics and recreate the execution function. This is indicated - # by the `_recompile_exec_function` property. - if (distributed_model and hasattr(distributed_model, '_distribution_function') - and not (hasattr(distributed_model, '_recompile_exec_function') and - distributed_model._recompile_exec_function)): - return distributed_model._distributed_function - - if not distributed_model: - _make_replicated_models_with_cloning(model, mode) + """Clones or re-uses models to run one step of distributed model + execution.""" distributed_model = get_distributed_model(model, mode) - assert distributed_model + # TODO(b/134069401): Create a cache for the distributed model and exec + # function that incorporates additional attributes to be part of the cache + # key than just the mode. + # If distributed model for a particular `mode` is already built, use the + # `_distribution_function` on that distributed model. + # If you have updated the sample_weight_mode on the model, then you will + # need to recompile metrics and recreate the execution function. This is + # indicated by the `_recompile_exec_function` property. + if ( + distributed_model + and hasattr(distributed_model, "_distribution_function") + and not ( + hasattr(distributed_model, "_recompile_exec_function") + and distributed_model._recompile_exec_function + ) + ): + return distributed_model._distributed_function - # Also create an execution function on that distributed model. - if tf.executing_eagerly(): - distributed_function = _make_eager_execution_function(model, mode) - else: - distributed_function = _make_graph_execution_function(model, mode) + if not distributed_model: + _make_replicated_models_with_cloning(model, mode) + distributed_model = get_distributed_model(model, mode) + assert distributed_model - # We cache the distributed execution function on the model since creating - # distributed models and execution functions are expensive. - distributed_model._distributed_function = distributed_function - distributed_model._recompile_exec_function = False - return distributed_function + # Also create an execution function on that distributed model. + if tf.executing_eagerly(): + distributed_function = _make_eager_execution_function(model, mode) + else: + distributed_function = _make_graph_execution_function(model, mode) + + # We cache the distributed execution function on the model since creating + # distributed models and execution functions are expensive. + distributed_model._distributed_function = distributed_function + distributed_model._recompile_exec_function = False + return distributed_function def _make_graph_execution_function(model, mode): - """Makes function to run one step of distributed model in graph mode.""" - - def _per_replica_function(model): - f = model._make_execution_function(mode) - return (f.inputs, f.outputs, f.updates_op, f.session_kwargs) - - strategy = model._distribution_strategy - with strategy.scope(): - # Create train ops on each of the devices when we call - # `_per_replica_fit_function`. - (grouped_inputs, grouped_outputs, grouped_updates, - grouped_session_args) = strategy.extended.call_for_each_replica( - _per_replica_function, args=(get_distributed_model(model, mode),)) - - # Initialize the variables in the replicated model. This is necessary for - # multi-worker training because on some workers, initialization is not - # needed. This method does initialization or waiting for initialization - # according to the context object of distribute coordinator. - init_restore_or_wait_for_variables() - - # Unwrap all the per device values returned from `call_for_each_replica`. - # Unwrapping per device values gives you a list of values that can be - # used to construct a new train function that is composed of update ops on - # all the devices over which the model is distributed. - (all_inputs, all_outputs, all_updates, all_session_args) = unwrap_values( - strategy, - grouped_inputs, - grouped_outputs, - grouped_updates, - grouped_session_args, - with_loss_tensor=(mode != ModeKeys.PREDICT)) - - return backend.function( - all_inputs, - all_outputs, - updates=all_updates, - name='distributed_{}_function'.format(mode), - **all_session_args) + """Makes function to run one step of distributed model in graph mode.""" + + def _per_replica_function(model): + f = model._make_execution_function(mode) + return (f.inputs, f.outputs, f.updates_op, f.session_kwargs) + + strategy = model._distribution_strategy + with strategy.scope(): + # Create train ops on each of the devices when we call + # `_per_replica_fit_function`. + ( + grouped_inputs, + grouped_outputs, + grouped_updates, + grouped_session_args, + ) = strategy.extended.call_for_each_replica( + _per_replica_function, args=(get_distributed_model(model, mode),) + ) + + # Initialize the variables in the replicated model. This is necessary + # for multi-worker training because on some workers, initialization is + # not needed. This method does initialization or waiting for + # initialization according to the context object of distribute + # coordinator. + init_restore_or_wait_for_variables() + + # Unwrap all the per device values returned from + # `call_for_each_replica`. Unwrapping per device values gives you a + # list of values that can be used to construct a new train function that + # is composed of update ops on all the devices over which the model is + # distributed. + ( + all_inputs, + all_outputs, + all_updates, + all_session_args, + ) = unwrap_values( + strategy, + grouped_inputs, + grouped_outputs, + grouped_updates, + grouped_session_args, + with_loss_tensor=(mode != ModeKeys.PREDICT), + ) + + return backend.function( + all_inputs, + all_outputs, + updates=all_updates, + name=f"distributed_{mode}_function", + **all_session_args, + ) def _make_eager_execution_function(model, mode): - """Makes function to run one step of distributed model eager execution.""" - def _per_replica_function(model): - f = model._make_execution_function(mode) - return (f.inputs, f.outputs) - - # NOTE(priyag): Try creating a new FuncGraph within DS scope instead of using - # the global one. - strategy = model._distribution_strategy - global_graph = backend.get_graph() - - with global_graph.as_default(), strategy.scope(): - # First we gather the relevant portions of the model across all replicas. - # `backend._scratch_graph(global_graph)` signals to Keras that it should not - # lift to a separate graph when creating the per-replica functions. - with backend._scratch_graph(global_graph): - # Create train ops on each of the devices when we call - # `_per_replica_fit_function`. - grouped = strategy.extended.call_for_each_replica( - _per_replica_function, args=(get_distributed_model(model, mode),)) - grouped_inputs, grouped_outputs = grouped - - # Unwrap all the per device values returned from `call_for_each_replica`. - # Unwrapping per device values gives you a list of values that can be - # used to construct a new train function that is composed of - # inputs/outputs on all the devices over which the model is distributed. - (all_inputs, all_outputs, _, _) = unwrap_values( - strategy, - grouped_inputs, - grouped_outputs, - with_loss_tensor=(mode != ModeKeys.PREDICT)) - - # Finally, a joint Keras function is created; this one will be created in - # a separate FuncGraph. - return backend.function( - all_inputs, - all_outputs, - name='eager_distributed_{}_function'.format(mode)) + """Makes function to run one step of distributed model eager execution.""" + + def _per_replica_function(model): + f = model._make_execution_function(mode) + return (f.inputs, f.outputs) + + # NOTE(priyag): Try creating a new FuncGraph within DS scope instead of + # using the global one. + strategy = model._distribution_strategy + global_graph = backend.get_graph() + + with global_graph.as_default(), strategy.scope(): + # First we gather the relevant portions of the model across all + # replicas. `backend._scratch_graph(global_graph)` signals to Keras + # that it should not lift to a separate graph when creating the + # per-replica functions. + with backend._scratch_graph(global_graph): + # Create train ops on each of the devices when we call + # `_per_replica_fit_function`. + grouped = strategy.extended.call_for_each_replica( + _per_replica_function, + args=(get_distributed_model(model, mode),), + ) + grouped_inputs, grouped_outputs = grouped + + # Unwrap all the per device values returned from + # `call_for_each_replica`. Unwrapping per device values gives you a + # list of values that can be used to construct a new train function + # that is composed of inputs/outputs on all the devices over which + # the model is distributed. + (all_inputs, all_outputs, _, _) = unwrap_values( + strategy, + grouped_inputs, + grouped_outputs, + with_loss_tensor=(mode != ModeKeys.PREDICT), + ) + + # Finally, a joint Keras function is created; this one will be created + # in a separate FuncGraph. + return backend.function( + all_inputs, + all_outputs, + name=f"eager_distributed_{mode}_function", + ) def _copy_weights_to_distributed_model(original_model, mode): - """Copies weights from original model to distributed models.""" - strategy = original_model._distribution_strategy - distributed_model = get_distributed_model(original_model, mode) - if strategy: - # Copy the weights from the original model to each of the replicated - # models. - orig_model_weights = original_model.get_weights() - first_model = strategy.unwrap(distributed_model)[0] - set_weights(strategy, first_model, orig_model_weights) + """Copies weights from original model to distributed models.""" + strategy = original_model._distribution_strategy + distributed_model = get_distributed_model(original_model, mode) + if strategy: + # Copy the weights from the original model to each of the replicated + # models. + orig_model_weights = original_model.get_weights() + first_model = strategy.unwrap(distributed_model)[0] + set_weights(strategy, first_model, orig_model_weights) def _copy_weights_to_original_model(model, mode): - """Copies weights from first distributed model back to original model.""" - if model._distribution_strategy and mode == ModeKeys.TRAIN: - distributed_model = get_distributed_model(model, mode) - updated_weights = model._distribution_strategy.unwrap( - distributed_model)[0].get_weights() - model.set_weights(updated_weights) + """Copies weights from first distributed model back to original model.""" + if model._distribution_strategy and mode == ModeKeys.TRAIN: + distributed_model = get_distributed_model(model, mode) + updated_weights = model._distribution_strategy.unwrap( + distributed_model + )[0].get_weights() + model.set_weights(updated_weights) def _per_replica_aggregate_batch(strategy, batch_outs, model, mode): - """Aggregates the per-replica batch-level outputs from a distributed step.""" - if strategy is not None and mode == ModeKeys.PREDICT: - total_batch_outs = [] - for i in range(len(model.outputs)): - num_replicas = strategy.num_replicas_in_sync - nested_outs = batch_outs[i * num_replicas:i * num_replicas + num_replicas] - total_batch_outs.append( - concat_along_batch_dimension(tf.nest.flatten(nested_outs))) - return total_batch_outs - return batch_outs + """Aggregates the per-replica batch-level outputs from a distributed + step.""" + if strategy is not None and mode == ModeKeys.PREDICT: + total_batch_outs = [] + for i in range(len(model.outputs)): + num_replicas = strategy.num_replicas_in_sync + nested_outs = batch_outs[ + i * num_replicas : i * num_replicas + num_replicas + ] + total_batch_outs.append( + concat_along_batch_dimension(tf.nest.flatten(nested_outs)) + ) + return total_batch_outs + return batch_outs def _reset_metrics(model): - if model._distribution_strategy: - for mode in [ModeKeys.TRAIN, ModeKeys.TEST, ModeKeys.PREDICT]: - distributed_model = get_distributed_model(model, mode) - if distributed_model: - first_model = model._distribution_strategy.unwrap(distributed_model)[0] - first_model.reset_metrics() + if model._distribution_strategy: + for mode in [ModeKeys.TRAIN, ModeKeys.TEST, ModeKeys.PREDICT]: + distributed_model = get_distributed_model(model, mode) + if distributed_model: + first_model = model._distribution_strategy.unwrap( + distributed_model + )[0] + first_model.reset_metrics() def get_distributed_model(model, mode): - key = _generate_cache_key(mode) - return model._distributed_model_cache.get(key, None) + key = _generate_cache_key(mode) + return model._distributed_model_cache.get(key, None) def set_distributed_model(model, mode, distributed_model): - key = _generate_cache_key(mode) - model._distributed_model_cache[key] = distributed_model + key = _generate_cache_key(mode) + model._distributed_model_cache[key] = distributed_model def get_distributed_function(model, mode): - key = _generate_cache_key(mode) - return model._distributed_function_cache.get(key, None) + key = _generate_cache_key(mode) + return model._distributed_function_cache.get(key, None) def set_distributed_function(model, mode, distributed_function): - key = _generate_cache_key(mode) - model._distributed_function_cache[key] = distributed_function + key = _generate_cache_key(mode) + model._distributed_function_cache[key] = distributed_function def _generate_cache_key(mode): - key = hash(mode) - return key + key = hash(mode) + return key @tf_contextlib.contextmanager def distributed_scope(strategy, learning_phase): - with strategy.scope(), backend.learning_phase_scope(learning_phase): - yield + with strategy.scope(), backend.learning_phase_scope(learning_phase): + yield def is_current_worker_chief(): - return dc.get_current_worker_context().is_chief + return dc.get_current_worker_context().is_chief def filter_distributed_callbacks(callbacks_list, model): - """Filter Callbacks based on the worker context when running multi-worker. - - Args: - callbacks_list: A list of `Callback` instances. - model: Keras model instance. - - Returns: - The list of `Callback` instances that should be run on this worker. - """ - - if not model._in_multi_worker_mode(): - raise ValueError( - 'filter_distributed_callbacks() should only be called when Keras ' - 'is in multi worker mode.') - - callbacks_list = callbacks_list or [] - if not [ - c for c in callbacks_list if isinstance(c, callbacks.ModelCheckpoint) - ]: - # TODO(rchao): Consider providing a ModelCheckpoint here if the user - # fails to (possibly with tempfile directory). - logging.warning('ModelCheckpoint callback is not provided. ' - 'Workers will need to restart training if any fails.') - - if callbacks_list is None or is_current_worker_chief(): - return callbacks_list - - # Some Callbacks should only run on the chief worker. - return [ - callback for callback in callbacks_list if not callback._chief_worker_only - ] # pylint: disable=protected-access + """Filter Callbacks based on the worker context when running multi-worker. + + Args: + callbacks_list: A list of `Callback` instances. + model: Keras model instance. + + Returns: + The list of `Callback` instances that should be run on this worker. + """ + + if not model._in_multi_worker_mode(): + raise ValueError( + "filter_distributed_callbacks() should only be called when Keras " + "is in multi worker mode." + ) + + callbacks_list = callbacks_list or [] + if not [ + c for c in callbacks_list if isinstance(c, callbacks.ModelCheckpoint) + ]: + # TODO(rchao): Consider providing a ModelCheckpoint here if the user + # fails to (possibly with tempfile directory). + logging.warning( + "ModelCheckpoint callback is not provided. " + "Workers will need to restart training if any fails." + ) + + if callbacks_list is None or is_current_worker_chief(): + return callbacks_list + + # Some Callbacks should only run on the chief worker. + return [ + callback + for callback in callbacks_list + if not callback._chief_worker_only + ] def _update_sample_weight_modes(model, mode, sample_weights): - """Update sample_weight_mode of the distributed model.""" - if is_distributing_by_cloning(model): - distributed_model = get_distributed_model(model, mode) - if not distributed_model: - _make_replicated_models_with_cloning(model, mode) - distributed_model = get_distributed_model(model, mode) - distributed_model._recompile_exec_function = any( - [e.sample_weights_mismatch() for e in model._training_endpoints]) - - if sample_weights: - distributed_models = flatten_per_replica_values( - model._distribution_strategy, distributed_model) - # sample_weights is a tuple of 1 list where the number of elements in the - # list is equal to the number of replicas in sync. - sample_weights = sample_weights[0] - if sample_weights and None not in sample_weights: - for m, sw in zip(distributed_models, sample_weights): - m._update_sample_weight_modes(sample_weights=[sw]) + """Update sample_weight_mode of the distributed model.""" + if is_distributing_by_cloning(model): + distributed_model = get_distributed_model(model, mode) + if not distributed_model: + _make_replicated_models_with_cloning(model, mode) + distributed_model = get_distributed_model(model, mode) + distributed_model._recompile_exec_function = any( + [e.sample_weights_mismatch() for e in model._training_endpoints] + ) + + if sample_weights: + distributed_models = flatten_per_replica_values( + model._distribution_strategy, distributed_model + ) + # sample_weights is a tuple of 1 list where the number of elements + # in the list is equal to the number of replicas in sync. + sample_weights = sample_weights[0] + if sample_weights and None not in sample_weights: + for m, sw in zip(distributed_models, sample_weights): + m._update_sample_weight_modes(sample_weights=[sw]) def concat_along_batch_dimension(outputs): - """Concats prediction outputs along the batch dimension.""" - if isinstance(outputs[0], tf.SparseTensor): - return tf.sparse.concat(axis=0, sp_inputs=outputs) - if isinstance(outputs[0], tf.RaggedTensor): - return tf.concat(outputs, axis=0) - return np.concatenate(outputs) + """Concats prediction outputs along the batch dimension.""" + if isinstance(outputs[0], tf.SparseTensor): + return tf.sparse.concat(axis=0, sp_inputs=outputs) + if isinstance(outputs[0], tf.RaggedTensor): + return tf.concat(outputs, axis=0) + return np.concatenate(outputs) diff --git a/keras/distribute/keras_correctness_test_base.py b/keras/distribute/keras_correctness_test_base.py index 1f131128a234..1e5501654ecd 100644 --- a/keras/distribute/keras_correctness_test_base.py +++ b/keras/distribute/keras_correctness_test_base.py @@ -14,16 +14,18 @@ # ============================================================================== """Correctness tests for tf.keras using DistributionStrategy.""" -import tensorflow.compat.v2 as tf - import functools -from absl.testing import parameterized + import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras.distribute import distributed_training_utils from keras.distribute.strategy_combinations import all_strategies -from keras.distribute.strategy_combinations import multi_worker_mirrored_strategies +from keras.distribute.strategy_combinations import ( + multi_worker_mirrored_strategies, +) from keras.distribute.strategy_combinations import strategies_minus_tpu from keras.mixed_precision import policy from keras.utils import data_utils @@ -37,583 +39,672 @@ def eager_mode_test_configuration(): - return tf.__internal__.test.combinations.combine( - mode='eager', use_numpy=[True, False], use_validation_data=[True, False]) + return tf.__internal__.test.combinations.combine( + mode="eager", use_numpy=[True, False], use_validation_data=[True, False] + ) def graph_mode_test_configuration(): - return tf.__internal__.test.combinations.combine( - mode='graph', use_numpy=[True, False], use_validation_data=[True, False]) + return tf.__internal__.test.combinations.combine( + mode="graph", use_numpy=[True, False], use_validation_data=[True, False] + ) def all_strategy_and_input_config_combinations(): - return (tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=all_strategies), - eager_mode_test_configuration() + graph_mode_test_configuration())) + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine(distribution=all_strategies), + eager_mode_test_configuration() + graph_mode_test_configuration(), + ) def all_strategy_and_input_config_combinations_eager(): - return (tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=all_strategies), - eager_mode_test_configuration())) + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine(distribution=all_strategies), + eager_mode_test_configuration(), + ) def strategy_minus_tpu_and_input_config_combinations_eager(): - return (tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=strategies_minus_tpu), - eager_mode_test_configuration())) + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=strategies_minus_tpu + ), + eager_mode_test_configuration(), + ) def strategies_for_embedding_models(): - """Returns distribution strategies to test for embedding models. + """Returns distribution strategies to test for embedding models. - Since embedding models take longer to train, we disregard DefaultStrategy - in order to prevent testing timeouts. - """ + Since embedding models take longer to train, we disregard DefaultStrategy + in order to prevent testing timeouts. + """ - return [ - s for s in all_strategies if s.required_tpu or s.required_gpus or - s is tf.__internal__.distribute.combinations.one_device_strategy - ] + return [ + s + for s in all_strategies + if s.required_tpu + or s.required_gpus + or s is tf.__internal__.distribute.combinations.one_device_strategy + ] def test_combinations_for_embedding_model(): - # TODO(sourabhbajaj): Enable tests for eager mode - eager_mode_strategies = [ - s for s in strategies_for_embedding_models() if not s.required_tpu - ] - - return (tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine( - distribution=strategies_for_embedding_models()), - (graph_mode_test_configuration())) + tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine( - distribution=eager_mode_strategies), - (eager_mode_test_configuration()))) + # TODO(sourabhbajaj): Enable tests for eager mode + eager_mode_strategies = [ + s for s in strategies_for_embedding_models() if not s.required_tpu + ] + + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=strategies_for_embedding_models() + ), + (graph_mode_test_configuration()), + ) + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=eager_mode_strategies + ), + (eager_mode_test_configuration()), + ) def test_combinations_with_tpu_strategies_graph(): - tpu_strategies = [ - tf.__internal__.distribute.combinations.tpu_strategy, - ] + tpu_strategies = [ + tf.__internal__.distribute.combinations.tpu_strategy, + ] - return (tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=tpu_strategies), - graph_mode_test_configuration())) + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine(distribution=tpu_strategies), + graph_mode_test_configuration(), + ) def multi_worker_mirrored_eager(): - return tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=multi_worker_mirrored_strategies), - eager_mode_test_configuration()) + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=multi_worker_mirrored_strategies + ), + eager_mode_test_configuration(), + ) def multi_worker_mirrored_eager_and_graph(): - return tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=multi_worker_mirrored_strategies), - eager_mode_test_configuration() + graph_mode_test_configuration()) + return tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=multi_worker_mirrored_strategies + ), + eager_mode_test_configuration() + graph_mode_test_configuration(), + ) class MaybeDistributionScope: - """Provides a context allowing no distribution strategy.""" + """Provides a context allowing no distribution strategy.""" - def __init__(self, distribution): - self._distribution = distribution - self._scope = None + def __init__(self, distribution): + self._distribution = distribution + self._scope = None - def __enter__(self): - if self._distribution: - self._scope = self._distribution.scope() - self._scope.__enter__() + def __enter__(self): + if self._distribution: + self._scope = self._distribution.scope() + self._scope.__enter__() - def __exit__(self, exc_type, value, traceback): - if self._distribution: - self._scope.__exit__(exc_type, value, traceback) - self._scope = None + def __exit__(self, exc_type, value, traceback): + if self._distribution: + self._scope.__exit__(exc_type, value, traceback) + self._scope = None def batch_wrapper(dataset, batch_size, repeat=None): - if repeat: - dataset = dataset.repeat(repeat) - return dataset.batch(batch_size) + if repeat: + dataset = dataset.repeat(repeat) + return dataset.batch(batch_size) def get_batch_size(global_batch_size, distribution): - batch_size = global_batch_size - # TODO(b/118776054): Use global batch size for Keras/DS support. - use_per_core_batch_size = ( - distribution and - not distributed_training_utils.global_batch_size_supported(distribution)) - if use_per_core_batch_size: - batch_size //= distribution.num_replicas_in_sync - return batch_size + batch_size = global_batch_size + # TODO(b/118776054): Use global batch size for Keras/DS support. + use_per_core_batch_size = ( + distribution + and not distributed_training_utils.global_batch_size_supported( + distribution + ) + ) + if use_per_core_batch_size: + batch_size //= distribution.num_replicas_in_sync + return batch_size def get_data_size(data): - """Gets the size of data in list, tuple, dict, or a numpy array.""" - assert isinstance(data, (np.ndarray, list, dict, tuple)) + """Gets the size of data in list, tuple, dict, or a numpy array.""" + assert isinstance(data, (np.ndarray, list, dict, tuple)) - if isinstance(data, np.ndarray): - return len(data) + if isinstance(data, np.ndarray): + return len(data) - if isinstance(data, (list, tuple)): - return len(data[0]) + if isinstance(data, (list, tuple)): + return len(data[0]) - return len(data.values()) + return len(data.values()) def get_shapes(data): - shapes = None - if all(hasattr(x, 'shape') for x in tf.nest.flatten(data)): - shapes = tf.nest.map_structure(lambda x: x.shape, data) - return shapes - - -def get_correctness_test_inputs(use_numpy, use_validation_data, - with_distribution, x_train, y_train, x_eval, - y_eval, x_predict, training_epochs): - """Generates the inputs for correctness check when enable Keras with DS.""" - global_batch_size = _GLOBAL_BATCH_SIZE - batch_size = get_batch_size(global_batch_size, with_distribution) - - if use_numpy: - training_inputs = { - 'batch_size': batch_size, - 'x': x_train, - 'y': y_train, - 'epochs': training_epochs, - 'shuffle': False, - } - - if use_validation_data: - eval_inputs = None - training_inputs['validation_data'] = (x_eval, y_eval) + shapes = None + if all(hasattr(x, "shape") for x in tf.nest.flatten(data)): + shapes = tf.nest.map_structure(lambda x: x.shape, data) + return shapes + + +def get_correctness_test_inputs( + use_numpy, + use_validation_data, + with_distribution, + x_train, + y_train, + x_eval, + y_eval, + x_predict, + training_epochs, +): + """Generates the inputs for correctness check when enable Keras with DS.""" + global_batch_size = _GLOBAL_BATCH_SIZE + batch_size = get_batch_size(global_batch_size, with_distribution) + + if use_numpy: + training_inputs = { + "batch_size": batch_size, + "x": x_train, + "y": y_train, + "epochs": training_epochs, + "shuffle": False, + } + + if use_validation_data: + eval_inputs = None + training_inputs["validation_data"] = (x_eval, y_eval) + else: + eval_inputs = { + "batch_size": batch_size, + "x": x_eval, + "y": y_eval, + } + predict_inputs = {"x": x_predict} else: - eval_inputs = { - 'batch_size': batch_size, - 'x': x_eval, - 'y': y_eval, - } - predict_inputs = {'x': x_predict} - else: - training_data_size = get_data_size(x_train) - # For dataset inputs, we do not pass batch_size to - # keras.fit/evaluate/predict. The batch size is part of the dataset. - train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - x = batch_wrapper(train_dataset, batch_size, repeat=training_epochs) - - steps_per_epoch = int(np.ceil(1.0 * training_data_size / global_batch_size)) - training_inputs = { - 'batch_size': None, - 'x': x, - 'y': None, - 'epochs': training_epochs, - 'shuffle': False, - 'steps_per_epoch': steps_per_epoch - } - if use_validation_data: - eval_inputs = None # Remove the eval_inputs - eval_dataset = tf.data.Dataset.from_tensor_slices((x_eval, y_eval)) - x = batch_wrapper(eval_dataset, batch_size) - training_inputs['validation_data'] = x - training_inputs['validation_steps'] = 5 + training_data_size = get_data_size(x_train) + # For dataset inputs, we do not pass batch_size to + # keras.fit/evaluate/predict. The batch size is part of the dataset. + train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + x = batch_wrapper(train_dataset, batch_size, repeat=training_epochs) + + steps_per_epoch = int( + np.ceil(1.0 * training_data_size / global_batch_size) + ) + training_inputs = { + "batch_size": None, + "x": x, + "y": None, + "epochs": training_epochs, + "shuffle": False, + "steps_per_epoch": steps_per_epoch, + } + if use_validation_data: + eval_inputs = None # Remove the eval_inputs + eval_dataset = tf.data.Dataset.from_tensor_slices((x_eval, y_eval)) + x = batch_wrapper(eval_dataset, batch_size) + training_inputs["validation_data"] = x + training_inputs["validation_steps"] = 5 + else: + eval_dataset = tf.data.Dataset.from_tensor_slices((x_eval, y_eval)) + x = batch_wrapper(eval_dataset, batch_size) + eval_steps = int( + np.ceil(1.0 * get_data_size(x_eval) / global_batch_size) + ) + eval_inputs = { + "batch_size": None, + "x": x, + "y": None, + "steps": eval_steps, + } + + predict_batch_size = get_batch_size( + get_data_size(x_predict), with_distribution + ) + predict_dataset = tf.data.Dataset.from_tensor_slices(x_predict) + predict_dataset = batch_wrapper(predict_dataset, predict_batch_size) + predict_inputs = { + "steps": 1, + "x": predict_dataset, + } + + return training_inputs, eval_inputs, predict_inputs + + +def fit_eval_and_predict( + initial_weights, + input_fn, + model_fn, + distribution=None, + is_stateful_model=False, +): + """Generates results for fit/predict/evaluate for given model.""" + training_inputs, eval_inputs, predict_inputs = input_fn() + model = model_fn( + initial_weights=initial_weights, + distribution=distribution, + input_shapes=get_shapes(training_inputs["x"]), + ) + + result = {} + result["training_history_1"] = model.fit(**training_inputs).history + + if eval_inputs is not None: + result["eval_result_1"] = model.evaluate(**eval_inputs) + + result["weights_1"] = model.get_weights() + + if predict_inputs is not None: + # Check correctness of the result of predict() invoked + # multiple times -- as for stateful models, result of + # predict may differ for each batch. + predict_length = 1 + if is_stateful_model: + predict_length = 3 + for i in range(predict_length): + result_key = f"predict_result_{i}" + result[result_key] = model.predict(**predict_inputs) + + # Train and eval again to mimic user's flow. + + result["training_history_2"] = model.fit(**training_inputs).history + + if eval_inputs is not None: + result["eval_result_2"] = model.evaluate(**eval_inputs) + + result["weights_2"] = model.get_weights() + + return result + + +def compare_results( + results_with_ds, + results_without_ds, + distribution, + testcase, + partial_last_batch=None, +): + """Compares results of model compiled with/without distribution strategy.""" + if policy.global_policy().compute_dtype in ("float16", "bfloat16"): + default_tolerance = 1e-2 + relaxed_tolerance = 1e-2 + elif partial_last_batch == "train_and_eval": + # We relax the tolerance a lot in the partial last batch case as + # 1. the examples in uneven batches may have different weights when + # applying the gradients in the distributed case. + # 2. TF Keras and TF Keras DS have different ways to handle the case + # when training with epochs > 1 with numpy inputs. In TF Keras, + # every epoch may have a partial batch. While in TF Keras DS, as we + # convert numpy inputs into dataset, it will do a repeat() first + # and calculate steps_per_epoch, so it will at most have one + # partial batch. This makes the 1-CPU result even different. + default_tolerance = 1e-3 + relaxed_tolerance = 1e-3 else: - eval_dataset = tf.data.Dataset.from_tensor_slices((x_eval, y_eval)) - x = batch_wrapper(eval_dataset, batch_size) - eval_steps = int(np.ceil(1.0 * get_data_size(x_eval) / global_batch_size)) - eval_inputs = { - 'batch_size': None, - 'x': x, - 'y': None, - 'steps': eval_steps, - } - - predict_batch_size = get_batch_size( - get_data_size(x_predict), with_distribution) - predict_dataset = tf.data.Dataset.from_tensor_slices(x_predict) - predict_dataset = batch_wrapper(predict_dataset, predict_batch_size) - predict_inputs = { - 'steps': 1, - 'x': predict_dataset, - } - - return training_inputs, eval_inputs, predict_inputs - - -def fit_eval_and_predict(initial_weights, - input_fn, - model_fn, - distribution=None, - is_stateful_model=False): - """Generates results for fit/predict/evaluate for given model.""" - training_inputs, eval_inputs, predict_inputs = input_fn() - model = model_fn( - initial_weights=initial_weights, - distribution=distribution, - input_shapes=get_shapes(training_inputs['x'])) - - result = {} - result['training_history_1'] = model.fit(**training_inputs).history - - if eval_inputs is not None: - result['eval_result_1'] = model.evaluate(**eval_inputs) - - result['weights_1'] = model.get_weights() - - if predict_inputs is not None: - # Check correctness of the result of predict() invoked - # multiple times -- as for stateful models, result of - # predict may differ for each batch. - predict_length = 1 - if is_stateful_model: - predict_length = 3 - for i in range(predict_length): - result_key = 'predict_result_{}'.format(i) - result[result_key] = model.predict(**predict_inputs) - - # Train and eval again to mimic user's flow. - - result['training_history_2'] = model.fit(**training_inputs).history - - if eval_inputs is not None: - result['eval_result_2'] = model.evaluate(**eval_inputs) - - result['weights_2'] = model.get_weights() - - return result - - -def compare_results(results_with_ds, - results_without_ds, - distribution, - testcase, - partial_last_batch=None): - """Compares results of model compiled with/without distribution strategy.""" - if policy.global_policy().compute_dtype in ('float16', 'bfloat16'): - default_tolerance = 1e-2 - relaxed_tolerance = 1e-2 - elif partial_last_batch == 'train_and_eval': - # We relax the tolerance a lot in the partial last batch case as - # 1. the examples in uneven batches may have different weights when - # applying the gradients in the distributed case. - # 2. TF Keras and TF Keras DS have different ways to handle the case when - # training with epochs > 1 with numpy inputs. In TF Keras, every epoch - # may have a partial batch. While in TF Keras DS, as we convert - # numpy inputs into dataset, it will do a repeat() first and calculate - # steps_per_epoch, so it will at most have one partial batch. This - # makes the 1-CPU result even different. - default_tolerance = 1e-3 - relaxed_tolerance = 1e-3 - else: - default_tolerance = 4e-5 - relaxed_tolerance = 1e-4 - - def _get_compare_result_tolerance(key): - """Returns tolerance to compare results.""" - # See b/119257215 for more details. DS test run on GPU could have larger - # variance then test on CPU. - if (tf.test.is_gpu_available() and - key.startswith(('weights_1', 'weights_2', 'predict_result'))): - return relaxed_tolerance - - return default_tolerance - - for key in sorted(results_with_ds.keys()): - if (key.startswith('training_history') and - isinstance(distribution, - (tf.distribute.experimental.TPUStrategy, tf.compat.v1.distribute.experimental.TPUStrategy)) and - distribution.extended.steps_per_run > 1): - # TODO(b/119894254): Enable this test for all cases once the - # underlying bug is fixed. - continue - - tolerance = _get_compare_result_tolerance(key) - - # We don't compare the loss as loss is currently not computed as metric - # in Keras, the loss value is inaccurate for last partial batch due to - # more weights for the last batch samples. - if partial_last_batch is not None: - if key.startswith('eval_result'): - results_with_ds[key] = results_with_ds[key][1:] - results_without_ds[key] = results_without_ds[key][1:] - if key.startswith('training_history'): - results_with_ds[key]['val_loss'] = 0 - results_without_ds[key]['val_loss'] = 0 - - testcase.assertAllClose( - results_with_ds[key], - results_without_ds[key], - atol=tolerance, - rtol=tolerance, - msg='Fail to assert {}.'.format(key)) + default_tolerance = 4e-5 + relaxed_tolerance = 1e-4 + + def _get_compare_result_tolerance(key): + """Returns tolerance to compare results.""" + # See b/119257215 for more details. DS test run on GPU could have larger + # variance then test on CPU. + if tf.test.is_gpu_available() and key.startswith( + ("weights_1", "weights_2", "predict_result") + ): + return relaxed_tolerance + + return default_tolerance + + for key in sorted(results_with_ds.keys()): + if ( + key.startswith("training_history") + and isinstance( + distribution, + ( + tf.distribute.experimental.TPUStrategy, + tf.compat.v1.distribute.experimental.TPUStrategy, + ), + ) + and distribution.extended.steps_per_run > 1 + ): + # TODO(b/119894254): Enable this test for all cases once the + # underlying bug is fixed. + continue + + tolerance = _get_compare_result_tolerance(key) + + # We don't compare the loss as loss is currently not computed as metric + # in Keras, the loss value is inaccurate for last partial batch due to + # more weights for the last batch samples. + if partial_last_batch is not None: + if key.startswith("eval_result"): + results_with_ds[key] = results_with_ds[key][1:] + results_without_ds[key] = results_without_ds[key][1:] + if key.startswith("training_history"): + results_with_ds[key]["val_loss"] = 0 + results_without_ds[key]["val_loss"] = 0 + + testcase.assertAllClose( + results_with_ds[key], + results_without_ds[key], + atol=tolerance, + rtol=tolerance, + msg=f"Fail to assert {key}.", + ) def should_skip_tpu_with_eager(distribution): - return (tf.executing_eagerly() and - isinstance(distribution, - (tf.distribute.experimental.TPUStrategy, tf.compat.v1.distribute.experimental.TPUStrategy))) + return tf.executing_eagerly() and isinstance( + distribution, + ( + tf.distribute.experimental.TPUStrategy, + tf.compat.v1.distribute.experimental.TPUStrategy, + ), + ) class LearningRateBatchScheduler(keras.callbacks.Callback): - """Scheduler that dynamically sets the learning rate of model.""" - - def __init__(self, update_freq=None): - self._update_freq = update_freq - - def on_batch_begin(self, batch, logs=None): - if self._update_freq and batch % self._update_freq != 0: - return - - # To avoid divergence, limit the value range. - lr = 0.001 * (batch % 10) - keras.backend.set_value(self.model.optimizer.lr, lr) - - -class TestDistributionStrategyCorrectnessBase(tf.test.TestCase, - parameterized.TestCase): - """Model agnostic testing infra to test correctness of Keras models.""" - - def set_up_test_config(self, - use_numpy=False, - use_validation_data=False, - with_batch_norm=None): - self.use_numpy = use_numpy - self.use_validation_data = use_validation_data - self.with_batch_norm = with_batch_norm - - keras.backend.set_image_data_format('channels_last') - np.random.seed(_RANDOM_SEED) - tf.compat.v1.set_random_seed(_RANDOM_SEED) - - def get_data(self): - num_samples = 10000 - x_train = np.random.randint(0, 2, num_samples) - x_train = np.reshape(x_train, (num_samples, 1)) - y_train = x_train - return (x_train.astype('float32'), y_train.astype('float32'), None) + """Scheduler that dynamically sets the learning rate of model.""" + + def __init__(self, update_freq=None): + self._update_freq = update_freq + + def on_batch_begin(self, batch, logs=None): + if self._update_freq and batch % self._update_freq != 0: + return + + # To avoid divergence, limit the value range. + lr = 0.001 * (batch % 10) + keras.backend.set_value(self.model.optimizer.lr, lr) + + +class TestDistributionStrategyCorrectnessBase( + tf.test.TestCase, parameterized.TestCase +): + """Model agnostic testing infra to test correctness of Keras models.""" + + def set_up_test_config( + self, use_numpy=False, use_validation_data=False, with_batch_norm=None + ): + self.use_numpy = use_numpy + self.use_validation_data = use_validation_data + self.with_batch_norm = with_batch_norm + + keras.backend.set_image_data_format("channels_last") + np.random.seed(_RANDOM_SEED) + tf.compat.v1.set_random_seed(_RANDOM_SEED) + + def get_data(self): + num_samples = 10000 + x_train = np.random.randint(0, 2, num_samples) + x_train = np.reshape(x_train, (num_samples, 1)) + y_train = x_train + return (x_train.astype("float32"), y_train.astype("float32"), None) + + def get_data_with_partial_last_batch(self): + raise NotImplementedError + + def get_data_with_partial_last_batch_eval(self): + raise NotImplementedError + + def get_input_for_correctness_test(self, **kwargs): + """Generates inputs that are dictionaries. + + We only provide a default implementation of this method here. If you + need more customized way of providing input to your model, overwrite + this method. + + Args: + **kwargs: key word arguments about how to create the input + dictionaries + + Returns: + Three dictionaries representing the input for fit(), evaluate() and + predict() + """ + + return get_correctness_test_inputs(**kwargs) + + def get_model(self, distribution=None, input_shapes=None): + raise NotImplementedError + + def run_correctness_test( + self, + distribution, + use_numpy, + use_validation_data, + with_batch_norm=None, + is_stateful_model=False, + partial_last_batch=None, + training_epochs=2, + ): + with self.cached_session(): + self.set_up_test_config( + use_numpy, use_validation_data, with_batch_norm + ) + + if partial_last_batch == "eval": + ( + x_train, + y_train, + x_eval, + y_eval, + x_predict, + ) = self.get_data_with_partial_last_batch_eval() + elif partial_last_batch == "train_and_eval": + ( + x_train, + y_train, + x_eval, + y_eval, + x_predict, + ) = self.get_data_with_partial_last_batch() + else: + x_train, y_train, x_predict = self.get_data() + x_eval = x_train + y_eval = y_train + + # The model is built once and the initial weights are saved. + # This is used to initialize the model for both the distribution and + # non-distribution run. + model = self.get_model(input_shapes=get_shapes(x_train)) + initial_weights = model.get_weights() + + ds_input_fn = functools.partial( + self.get_input_for_correctness_test, + use_numpy=use_numpy, + use_validation_data=use_validation_data, + with_distribution=distribution, + x_train=x_train, + y_train=y_train, + x_eval=x_eval, + y_eval=y_eval, + x_predict=x_predict, + training_epochs=training_epochs, + ) + + nods_input_fn = functools.partial( + self.get_input_for_correctness_test, + use_numpy=use_numpy, + use_validation_data=use_validation_data, + with_distribution=None, + x_train=x_train, + y_train=y_train, + x_eval=x_eval, + y_eval=y_eval, + x_predict=x_predict, + training_epochs=training_epochs, + ) + + results_with_ds = fit_eval_and_predict( + initial_weights, + input_fn=ds_input_fn, + model_fn=self.get_model, + distribution=distribution, + is_stateful_model=is_stateful_model, + ) + results_without_ds = fit_eval_and_predict( + initial_weights, + input_fn=nods_input_fn, + model_fn=self.get_model, + distribution=None, + is_stateful_model=is_stateful_model, + ) + + # First, special case, for multi-replica distributed training, batch + # norm is not aggregated globally. So it is expected to have + # different weights. + if ( + self.with_batch_norm == "regular" + and distribution.num_replicas_in_sync > 1 + ): + with self.assertRaises(AssertionError): + compare_results( + results_with_ds, + results_without_ds, + distribution, + testcase=self, + partial_last_batch=partial_last_batch, + ) + else: + compare_results( + results_with_ds, + results_without_ds, + distribution, + testcase=self, + partial_last_batch=partial_last_batch, + ) - def get_data_with_partial_last_batch(self): - raise NotImplementedError + def get_input_for_dynamic_lr_test(self, **kwargs): + """Generates inputs that are dictionaries. - def get_data_with_partial_last_batch_eval(self): - raise NotImplementedError + We only provide a default implementation of this method here. If you + need more customized way of providing input to your model, overwrite + this method. - def get_input_for_correctness_test(self, **kwargs): - """Generates inputs that are dictionaries. + Args: + **kwargs: key word arguments about how to create the input + dictionaries - We only provide a default implementation of this method here. If you need - more customized way of providing input to your model, overwrite this method. + Returns: + Three dictionaries representing the input for fit(), evaluate() and + predict() + """ - Args: - **kwargs: key word arguments about how to create the input dictionaries + training_input = kwargs + return training_input, None, None - Returns: - Three dictionaries representing the input for fit(), evaluate() and - predict() - """ + def run_dynamic_lr_test(self, distribution): + with self.cached_session(): + self.set_up_test_config() - return get_correctness_test_inputs(**kwargs) + x_train, y_train, _ = self.get_data() + model = self.get_model(input_shapes=get_shapes(x_train)) + initial_weights = model.get_weights() + update_freq = None - def get_model(self, + if ( + isinstance( + distribution, + tf.compat.v1.distribute.experimental.TPUStrategy, + ) + and distribution.extended.steps_per_run > 1 + ): + # For TPUStrategy with steps_per_run > 1, the callback is not + # invoked every step. So, to compare the CPU/TPU, we let the CPU + # to behave the same as TPU. + update_freq = distribution.extended.steps_per_run + + training_epochs = 2 + global_batch_size = 64 + + ds_batch_size = get_batch_size(global_batch_size, distribution) + nods_batch_size = get_batch_size(global_batch_size, None) + + ds_input_fn = functools.partial( + self.get_input_for_dynamic_lr_test, + x=x_train, + y=y_train, + batch_size=ds_batch_size, + shuffle=False, + epochs=training_epochs, + callbacks=[LearningRateBatchScheduler(update_freq)], + validation_data=(x_train, y_train), + ) + + nods_input_fn = functools.partial( + self.get_input_for_dynamic_lr_test, + x=x_train, + y=y_train, + batch_size=nods_batch_size, + shuffle=False, + epochs=training_epochs, + callbacks=[LearningRateBatchScheduler(update_freq)], + validation_data=(x_train, y_train), + ) + + results_with_ds = fit_eval_and_predict( + initial_weights, + input_fn=ds_input_fn, + model_fn=self.get_model, + distribution=distribution, + ) + results_without_ds = fit_eval_and_predict( + initial_weights, + input_fn=nods_input_fn, + model_fn=self.get_model, distribution=None, - input_shapes=None): - raise NotImplementedError - - def run_correctness_test(self, - distribution, - use_numpy, - use_validation_data, - with_batch_norm=None, - is_stateful_model=False, - partial_last_batch=None, - training_epochs=2): - with self.cached_session(): - self.set_up_test_config(use_numpy, use_validation_data, with_batch_norm) - - if partial_last_batch == 'eval': - x_train, y_train, x_eval, y_eval, x_predict = ( - self.get_data_with_partial_last_batch_eval()) - elif partial_last_batch == 'train_and_eval': - x_train, y_train, x_eval, y_eval, x_predict = ( - self.get_data_with_partial_last_batch()) - else: - x_train, y_train, x_predict = self.get_data() - x_eval = x_train - y_eval = y_train - - # The model is built once and the initial weights are saved. - # This is used to initialize the model for both the distribution and - # non-distribution run. - model = self.get_model( - input_shapes=get_shapes(x_train)) - initial_weights = model.get_weights() - - ds_input_fn = functools.partial( - self.get_input_for_correctness_test, - use_numpy=use_numpy, - use_validation_data=use_validation_data, - with_distribution=distribution, - x_train=x_train, - y_train=y_train, - x_eval=x_eval, - y_eval=y_eval, - x_predict=x_predict, - training_epochs=training_epochs) - - nods_input_fn = functools.partial( - self.get_input_for_correctness_test, - use_numpy=use_numpy, - use_validation_data=use_validation_data, - with_distribution=None, - x_train=x_train, - y_train=y_train, - x_eval=x_eval, - y_eval=y_eval, - x_predict=x_predict, - training_epochs=training_epochs) - - results_with_ds = fit_eval_and_predict( - initial_weights, - input_fn=ds_input_fn, - model_fn=self.get_model, - distribution=distribution, - is_stateful_model=is_stateful_model) - results_without_ds = fit_eval_and_predict( - initial_weights, - input_fn=nods_input_fn, - model_fn=self.get_model, - distribution=None, - is_stateful_model=is_stateful_model) - - # First, special case, for multi-replica distributed training, batch - # norm is not aggregated globally. So it is expected to have different - # weights. - if (self.with_batch_norm == 'regular' and - distribution.num_replicas_in_sync > 1): - with self.assertRaises(AssertionError): - compare_results( - results_with_ds, - results_without_ds, - distribution, - testcase=self, - partial_last_batch=partial_last_batch) - else: - compare_results( - results_with_ds, - results_without_ds, - distribution, - testcase=self, - partial_last_batch=partial_last_batch) - - def get_input_for_dynamic_lr_test(self, **kwargs): - """Generates inputs that are dictionaries. - - We only provide a default implementation of this method here. If you need - more customized way of providing input to your model, overwrite this method. - - Args: - **kwargs: key word arguments about how to create the input dictionaries - - Returns: - Three dictionaries representing the input for fit(), evaluate() and - predict() - """ - - training_input = kwargs - return training_input, None, None - - def run_dynamic_lr_test(self, - distribution): - with self.cached_session(): - self.set_up_test_config() - - x_train, y_train, _ = self.get_data() - model = self.get_model( - input_shapes=get_shapes(x_train)) - initial_weights = model.get_weights() - update_freq = None - - if (isinstance(distribution, tf.compat.v1.distribute.experimental.TPUStrategy) and - distribution.extended.steps_per_run > 1): - # For TPUStrategy with steps_per_run > 1, the callback is not invoked - # every step. So, to compare the CPU/TPU, we let the CPU to behave the - # same as TPU. - update_freq = distribution.extended.steps_per_run - - training_epochs = 2 - global_batch_size = 64 - - ds_batch_size = get_batch_size(global_batch_size, distribution) - nods_batch_size = get_batch_size(global_batch_size, None) - - ds_input_fn = functools.partial( - self.get_input_for_dynamic_lr_test, - x=x_train, - y=y_train, - batch_size=ds_batch_size, - shuffle=False, - epochs=training_epochs, - callbacks=[LearningRateBatchScheduler(update_freq)], - validation_data=(x_train, y_train)) - - nods_input_fn = functools.partial( - self.get_input_for_dynamic_lr_test, - x=x_train, - y=y_train, - batch_size=nods_batch_size, - shuffle=False, - epochs=training_epochs, - callbacks=[LearningRateBatchScheduler(update_freq)], - validation_data=(x_train, y_train)) - - results_with_ds = fit_eval_and_predict( - initial_weights, - input_fn=ds_input_fn, - model_fn=self.get_model, - distribution=distribution) - results_without_ds = fit_eval_and_predict( - initial_weights, - input_fn=nods_input_fn, - model_fn=self.get_model, - distribution=None) - compare_results( - results_with_ds, results_without_ds, distribution, testcase=self) + ) + compare_results( + results_with_ds, results_without_ds, distribution, testcase=self + ) class TestDistributionStrategyEmbeddingModelCorrectnessBase( - TestDistributionStrategyCorrectnessBase): - """Base class to test correctness of Keras models with embedding layers.""" - - def get_data(self, - count=(_GLOBAL_BATCH_SIZE * _EVAL_STEPS), - min_words=5, - max_words=10, - max_word_id=19, - num_classes=2): - distribution = [] - for _ in range(num_classes): - dist = np.abs(np.random.randn(max_word_id)) - dist /= np.sum(dist) - distribution.append(dist) - - features = [] - labels = [] - for _ in range(count): - label = np.random.randint(0, num_classes, size=1)[0] - num_words = np.random.randint(min_words, max_words, size=1)[0] - word_ids = np.random.choice( - max_word_id, size=num_words, replace=True, p=distribution[label]) - word_ids = word_ids - labels.append(label) - features.append(word_ids) - - features = data_utils.pad_sequences( - features, maxlen=max_words) - x_train = np.asarray(features, dtype=np.float32) - y_train = np.asarray(labels, dtype=np.int32).reshape((count, 1)) - x_predict = x_train[:_GLOBAL_BATCH_SIZE] - return x_train, y_train, x_predict - - -if __name__ == '__main__': - tf.test.main() + TestDistributionStrategyCorrectnessBase +): + """Base class to test correctness of Keras models with embedding layers.""" + + def get_data( + self, + count=(_GLOBAL_BATCH_SIZE * _EVAL_STEPS), + min_words=5, + max_words=10, + max_word_id=19, + num_classes=2, + ): + distribution = [] + for _ in range(num_classes): + dist = np.abs(np.random.randn(max_word_id)) + dist /= np.sum(dist) + distribution.append(dist) + + features = [] + labels = [] + for _ in range(count): + label = np.random.randint(0, num_classes, size=1)[0] + num_words = np.random.randint(min_words, max_words, size=1)[0] + word_ids = np.random.choice( + max_word_id, size=num_words, replace=True, p=distribution[label] + ) + word_ids = word_ids + labels.append(label) + features.append(word_ids) + + features = data_utils.pad_sequences(features, maxlen=max_words) + x_train = np.asarray(features, dtype=np.float32) + y_train = np.asarray(labels, dtype=np.int32).reshape((count, 1)) + x_predict = x_train[:_GLOBAL_BATCH_SIZE] + return x_train, y_train, x_predict + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/distribute/keras_dnn_correctness_test.py b/keras/distribute/keras_dnn_correctness_test.py index d4d1602cfc56..9577957a236c 100644 --- a/keras/distribute/keras_dnn_correctness_test.py +++ b/keras/distribute/keras_dnn_correctness_test.py @@ -14,312 +14,361 @@ # ============================================================================== """Correctness tests for tf.keras DNN model using DistributionStrategy.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras from keras import backend -from keras.testing_infra import test_utils from keras.distribute import keras_correctness_test_base from keras.distribute import strategy_combinations -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras +from keras.optimizers.legacy import gradient_descent as gradient_descent_keras +from keras.testing_infra import test_utils def all_strategy_combinations_with_eager_and_graph_modes(): - return (tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.all_strategies, - mode=['graph', 'eager']) + tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.multi_worker_mirrored_strategies, - mode='eager')) + return tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.all_strategies, + mode=["graph", "eager"], + ) + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.multi_worker_mirrored_strategies, + mode="eager", + ) def all_strategy_combinations_with_graph_mode(): - return (tf.__internal__.test.combinations.combine( - distribution=keras_correctness_test_base.all_strategies, - mode=['graph'])) + return tf.__internal__.test.combinations.combine( + distribution=keras_correctness_test_base.all_strategies, mode=["graph"] + ) def is_default_strategy(strategy): - with strategy.scope(): - return not tf.distribute.has_strategy() + with strategy.scope(): + return not tf.distribute.has_strategy() @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class TestDistributionStrategyDnnCorrectness( - keras_correctness_test_base.TestDistributionStrategyCorrectnessBase): - - def get_model(self, - initial_weights=None, - distribution=None, - input_shapes=None): - with keras_correctness_test_base.MaybeDistributionScope(distribution): - # We add few non-linear layers to make it non-trivial. - model = keras.Sequential() - model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,))) - model.add( - keras.layers.Dense( - 10, - activation='relu', - kernel_regularizer=keras.regularizers.l2(1e-4))) - model.add(keras.layers.Dense(10, activation='relu')) - model.add(keras.layers.Dense(1)) - - if initial_weights: - model.set_weights(initial_weights) - - model.compile( - loss=keras.losses.mean_squared_error, - optimizer=gradient_descent_keras.SGD(0.05), - metrics=['mse']) - return model - - def get_data(self): - x_train = np.random.rand(9984, 1).astype('float32') - y_train = 3 * x_train - x_predict = np.array([[1.], [2.], [3.], [4.]], dtype=np.float32) - return x_train, y_train, x_predict - - def get_data_with_partial_last_batch(self): - x_train = np.random.rand(10000, 1).astype('float32') - y_train = 3 * x_train - x_eval = np.random.rand(10000, 1).astype('float32') - y_eval = 3 * x_eval - x_predict = np.array([[1.], [2.], [3.], [4.]], dtype=np.float32) - return x_train, y_train, x_eval, y_eval, x_predict - - def get_data_with_partial_last_batch_eval(self): - x_train = np.random.rand(9984, 1).astype('float32') - y_train = 3 * x_train - x_eval = np.random.rand(10000, 1).astype('float32') - y_eval = 3 * x_eval - x_predict = np.array([[1.], [2.], [3.], [4.]], dtype=np.float32) - return x_train, y_train, x_eval, y_eval, x_predict - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.all_strategy_and_input_config_combinations() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_dnn_correctness(self, distribution, use_numpy, use_validation_data): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base - .test_combinations_with_tpu_strategies_graph() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_dnn_correctness_with_partial_last_batch_eval(self, distribution, - use_numpy, - use_validation_data): - self.run_correctness_test( - distribution, use_numpy, use_validation_data, partial_last_batch='eval') - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base - .strategy_minus_tpu_and_input_config_combinations_eager() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_dnn_correctness_with_partial_last_batch(self, distribution, - use_numpy, - use_validation_data): - distribution.extended.experimental_enable_get_next_as_optional = True - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - partial_last_batch='train_and_eval', - training_epochs=1) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations_with_graph_mode()) - def test_dnn_with_dynamic_learning_rate(self, distribution): - self.run_dynamic_lr_test(distribution) + keras_correctness_test_base.TestDistributionStrategyCorrectnessBase +): + def get_model( + self, initial_weights=None, distribution=None, input_shapes=None + ): + with keras_correctness_test_base.MaybeDistributionScope(distribution): + # We add few non-linear layers to make it non-trivial. + model = keras.Sequential() + model.add( + keras.layers.Dense(10, activation="relu", input_shape=(1,)) + ) + model.add( + keras.layers.Dense( + 10, + activation="relu", + kernel_regularizer=keras.regularizers.l2(1e-4), + ) + ) + model.add(keras.layers.Dense(10, activation="relu")) + model.add(keras.layers.Dense(1)) + + if initial_weights: + model.set_weights(initial_weights) + + model.compile( + loss=keras.losses.mean_squared_error, + optimizer=gradient_descent_keras.SGD(0.05), + metrics=["mse"], + ) + return model + + def get_data(self): + x_train = np.random.rand(9984, 1).astype("float32") + y_train = 3 * x_train + x_predict = np.array([[1.0], [2.0], [3.0], [4.0]], dtype=np.float32) + return x_train, y_train, x_predict + + def get_data_with_partial_last_batch(self): + x_train = np.random.rand(10000, 1).astype("float32") + y_train = 3 * x_train + x_eval = np.random.rand(10000, 1).astype("float32") + y_eval = 3 * x_eval + x_predict = np.array([[1.0], [2.0], [3.0], [4.0]], dtype=np.float32) + return x_train, y_train, x_eval, y_eval, x_predict + + def get_data_with_partial_last_batch_eval(self): + x_train = np.random.rand(9984, 1).astype("float32") + y_train = 3 * x_train + x_eval = np.random.rand(10000, 1).astype("float32") + y_eval = 3 * x_eval + x_predict = np.array([[1.0], [2.0], [3.0], [4.0]], dtype=np.float32) + return x_train, y_train, x_eval, y_eval, x_predict + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_dnn_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test(distribution, use_numpy, use_validation_data) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_with_tpu_strategies_graph() # noqa: E501 + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_dnn_correctness_with_partial_last_batch_eval( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + partial_last_batch="eval", + ) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.strategy_minus_tpu_and_input_config_combinations_eager() # noqa: E501 + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_dnn_correctness_with_partial_last_batch( + self, distribution, use_numpy, use_validation_data + ): + distribution.extended.experimental_enable_get_next_as_optional = True + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + partial_last_batch="train_and_eval", + training_epochs=1, + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations_with_graph_mode() + ) + def test_dnn_with_dynamic_learning_rate(self, distribution): + self.run_dynamic_lr_test(distribution) class TestDistributionStrategyDnnMetricCorrectness( - keras_correctness_test_base.TestDistributionStrategyCorrectnessBase): - - def get_model(self, - distribution=None, - input_shapes=None): - with distribution.scope(): - model = keras.Sequential() - model.add( - keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones')) - model.compile( - loss=keras.losses.mean_squared_error, - optimizer=gradient_descent_keras.SGD(0.05), - metrics=[keras.metrics.BinaryAccuracy()]) - return model - - def run_metric_correctness_test(self, distribution): - with self.cached_session(): - self.set_up_test_config() - - x_train, y_train, _ = self.get_data() - model = self.get_model( - distribution=distribution) - - batch_size = 64 - batch_size = ( - keras_correctness_test_base.get_batch_size(batch_size, distribution)) - train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - train_dataset = ( - keras_correctness_test_base.batch_wrapper(train_dataset, batch_size)) - - history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10) - self.assertEqual(history.history['binary_accuracy'], [1.0, 1.0]) - - @tf.__internal__.distribute.combinations.generate( - all_strategy_combinations_with_eager_and_graph_modes()) - def test_simple_dnn_metric_correctness(self, distribution): - self.run_metric_correctness_test(distribution) + keras_correctness_test_base.TestDistributionStrategyCorrectnessBase +): + def get_model(self, distribution=None, input_shapes=None): + with distribution.scope(): + model = keras.Sequential() + model.add( + keras.layers.Dense( + 1, input_shape=(1,), kernel_initializer="ones" + ) + ) + model.compile( + loss=keras.losses.mean_squared_error, + optimizer=gradient_descent_keras.SGD(0.05), + metrics=[keras.metrics.BinaryAccuracy()], + ) + return model + + def run_metric_correctness_test(self, distribution): + with self.cached_session(): + self.set_up_test_config() + + x_train, y_train, _ = self.get_data() + model = self.get_model(distribution=distribution) + + batch_size = 64 + batch_size = keras_correctness_test_base.get_batch_size( + batch_size, distribution + ) + train_dataset = tf.data.Dataset.from_tensor_slices( + (x_train, y_train) + ) + train_dataset = keras_correctness_test_base.batch_wrapper( + train_dataset, batch_size + ) + + history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10) + self.assertEqual(history.history["binary_accuracy"], [1.0, 1.0]) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations_with_eager_and_graph_modes() + ) + def test_simple_dnn_metric_correctness(self, distribution): + self.run_metric_correctness_test(distribution) class TestDistributionStrategyDnnMetricEvalCorrectness( - keras_correctness_test_base.TestDistributionStrategyCorrectnessBase): - - def get_model(self, - distribution=None, - input_shapes=None): - with distribution.scope(): - model = keras.Sequential() - model.add( - keras.layers.Dense( - 3, activation='relu', input_dim=4, kernel_initializer='ones')) - model.add( - keras.layers.Dense( - 1, activation='sigmoid', kernel_initializer='ones')) - model.compile( - loss='mae', - metrics=['accuracy', keras.metrics.BinaryAccuracy()], - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.001)) - return model - - def run_eval_metrics_correctness_test(self, distribution): - with self.cached_session(): - self.set_up_test_config() - - model = self.get_model( - distribution=distribution) - - # verify correctness of stateful and stateless metrics. - x = np.ones((100, 4)).astype('float32') - y = np.ones((100, 1)).astype('float32') - dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat() - dataset = keras_correctness_test_base.batch_wrapper(dataset, 4) - outs = model.evaluate(dataset, steps=10) - self.assertEqual(outs[1], 1.) - self.assertEqual(outs[2], 1.) - - y = np.zeros((100, 1)).astype('float32') - dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat() - dataset = keras_correctness_test_base.batch_wrapper(dataset, 4) - outs = model.evaluate(dataset, steps=10) - self.assertEqual(outs[1], 0.) - self.assertEqual(outs[2], 0.) - - @tf.__internal__.distribute.combinations.generate( - all_strategy_combinations_with_eager_and_graph_modes()) - def test_identity_model_metric_eval_correctness(self, distribution): - self.run_eval_metrics_correctness_test(distribution) + keras_correctness_test_base.TestDistributionStrategyCorrectnessBase +): + def get_model(self, distribution=None, input_shapes=None): + with distribution.scope(): + model = keras.Sequential() + model.add( + keras.layers.Dense( + 3, activation="relu", input_dim=4, kernel_initializer="ones" + ) + ) + model.add( + keras.layers.Dense( + 1, activation="sigmoid", kernel_initializer="ones" + ) + ) + model.compile( + loss="mae", + metrics=["accuracy", keras.metrics.BinaryAccuracy()], + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.001), + ) + return model + + def run_eval_metrics_correctness_test(self, distribution): + with self.cached_session(): + self.set_up_test_config() + + model = self.get_model(distribution=distribution) + + # verify correctness of stateful and stateless metrics. + x = np.ones((100, 4)).astype("float32") + y = np.ones((100, 1)).astype("float32") + dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat() + dataset = keras_correctness_test_base.batch_wrapper(dataset, 4) + outs = model.evaluate(dataset, steps=10) + self.assertEqual(outs[1], 1.0) + self.assertEqual(outs[2], 1.0) + + y = np.zeros((100, 1)).astype("float32") + dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat() + dataset = keras_correctness_test_base.batch_wrapper(dataset, 4) + outs = model.evaluate(dataset, steps=10) + self.assertEqual(outs[1], 0.0) + self.assertEqual(outs[2], 0.0) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations_with_eager_and_graph_modes() + ) + def test_identity_model_metric_eval_correctness(self, distribution): + self.run_eval_metrics_correctness_test(distribution) class SubclassedModel(keras.Model): - - def __init__(self, initial_weights, input_shapes): - super().__init__() - self.dense1 = keras.layers.Dense(10, activation='relu', input_shape=(1,)) - self.dense2 = keras.layers.Dense( - 10, activation='relu', kernel_regularizer=keras.regularizers.l2(1e-4)) - self.dense3 = keras.layers.Dense(10, activation='relu') - self.dense4 = keras.layers.Dense(1) - if input_shapes: - self.build(input_shapes) - else: - # This covers cases when the input is DatasetV1Adapter. - self.build((None, 1)) - if initial_weights: - self.set_weights(initial_weights) - - def call(self, inputs): - x = self.dense1(inputs) - x = self.dense2(x) - x = self.dense3(x) - return self.dense4(x) + def __init__(self, initial_weights, input_shapes): + super().__init__() + self.dense1 = keras.layers.Dense( + 10, activation="relu", input_shape=(1,) + ) + self.dense2 = keras.layers.Dense( + 10, + activation="relu", + kernel_regularizer=keras.regularizers.l2(1e-4), + ) + self.dense3 = keras.layers.Dense(10, activation="relu") + self.dense4 = keras.layers.Dense(1) + if input_shapes: + self.build(input_shapes) + else: + # This covers cases when the input is DatasetV1Adapter. + self.build((None, 1)) + if initial_weights: + self.set_weights(initial_weights) + + def call(self, inputs): + x = self.dense1(inputs) + x = self.dense2(x) + x = self.dense3(x) + return self.dense4(x) @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class TestDistributionStrategyDnnCorrectnessWithSubclassedModel( - TestDistributionStrategyDnnCorrectness): - - def get_model(self, - initial_weights=None, - distribution=None, - input_shapes=None): - with keras_correctness_test_base.MaybeDistributionScope(distribution): - model = SubclassedModel(initial_weights, input_shapes) - - model.compile( - loss=keras.losses.mean_squared_error, - optimizer=gradient_descent_keras.SGD(0.05), - metrics=['mse']) - return model - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.all_strategy_and_input_config_combinations() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_dnn_correctness(self, distribution, use_numpy, use_validation_data): - if (tf.executing_eagerly()) or is_default_strategy(distribution): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - elif (backend.is_tpu_strategy(distribution) - and not tf.executing_eagerly()): - with self.assertRaisesRegex( - ValueError, - 'Expected `model` argument to be a functional `Model` instance, ' - 'but got a subclassed model instead.'): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - else: - with self.assertRaisesRegex( - ValueError, - 'We currently do not support distribution strategy with a ' - '`Sequential` model that is created without `input_shape`/' - '`input_dim` set in its first layer or a subclassed model.'): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - @tf.__internal__.distribute.combinations.generate(all_strategy_combinations_with_graph_mode()) - def test_dnn_with_dynamic_learning_rate(self, distribution): - if ((tf.executing_eagerly() - and not backend.is_tpu_strategy(distribution)) - or is_default_strategy(distribution)): - self.run_dynamic_lr_test(distribution) - elif backend.is_tpu_strategy(distribution): - with self.assertRaisesRegex( - ValueError, - 'Expected `model` argument to be a functional `Model` instance, ' - 'but got a subclassed model instead.'): - self.run_dynamic_lr_test(distribution) - else: - with self.assertRaisesRegex( - ValueError, - 'We currently do not support distribution strategy with a ' - '`Sequential` model that is created without `input_shape`/' - '`input_dim` set in its first layer or a subclassed model.'): - self.run_dynamic_lr_test(distribution) - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_with_tpu_strategies_graph()) - def test_dnn_correctness_with_partial_last_batch_eval(self, distribution, - use_numpy, - use_validation_data): - with self.assertRaisesRegex( - ValueError, - 'Expected `model` argument to be a functional `Model` instance, ' - 'but got a subclassed model instead.'): - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - partial_last_batch='eval') - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + TestDistributionStrategyDnnCorrectness +): + def get_model( + self, initial_weights=None, distribution=None, input_shapes=None + ): + with keras_correctness_test_base.MaybeDistributionScope(distribution): + model = SubclassedModel(initial_weights, input_shapes) + + model.compile( + loss=keras.losses.mean_squared_error, + optimizer=gradient_descent_keras.SGD(0.05), + metrics=["mse"], + ) + return model + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_dnn_correctness( + self, distribution, use_numpy, use_validation_data + ): + if (tf.executing_eagerly()) or is_default_strategy(distribution): + self.run_correctness_test( + distribution, use_numpy, use_validation_data + ) + elif ( + backend.is_tpu_strategy(distribution) and not tf.executing_eagerly() + ): + with self.assertRaisesRegex( + ValueError, + "Expected `model` argument to be a functional `Model` " + "instance, but got a subclassed model instead.", + ): + self.run_correctness_test( + distribution, use_numpy, use_validation_data + ) + else: + with self.assertRaisesRegex( + ValueError, + "We currently do not support distribution strategy with a " + "`Sequential` model that is created without `input_shape`/" + "`input_dim` set in its first layer or a subclassed model.", + ): + self.run_correctness_test( + distribution, use_numpy, use_validation_data + ) + + @tf.__internal__.distribute.combinations.generate( + all_strategy_combinations_with_graph_mode() + ) + def test_dnn_with_dynamic_learning_rate(self, distribution): + if ( + tf.executing_eagerly() and not backend.is_tpu_strategy(distribution) + ) or is_default_strategy(distribution): + self.run_dynamic_lr_test(distribution) + elif backend.is_tpu_strategy(distribution): + with self.assertRaisesRegex( + ValueError, + "Expected `model` argument to be a functional `Model` " + "instance, but got a subclassed model instead.", + ): + self.run_dynamic_lr_test(distribution) + else: + with self.assertRaisesRegex( + ValueError, + "We currently do not support distribution strategy with a " + "`Sequential` model that is created without `input_shape`/" + "`input_dim` set in its first layer or a subclassed model.", + ): + self.run_dynamic_lr_test(distribution) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_with_tpu_strategies_graph() # noqa: E501 + ) + def test_dnn_correctness_with_partial_last_batch_eval( + self, distribution, use_numpy, use_validation_data + ): + with self.assertRaisesRegex( + ValueError, + "Expected `model` argument to be a functional `Model` instance, " + "but got a subclassed model instead.", + ): + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + partial_last_batch="eval", + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/keras_embedding_model_correctness_test.py b/keras/distribute/keras_embedding_model_correctness_test.py index a5c041e75429..f126c41609a1 100644 --- a/keras/distribute/keras_embedding_model_correctness_test.py +++ b/keras/distribute/keras_embedding_model_correctness_test.py @@ -14,142 +14,162 @@ # ============================================================================== """Correctness test for tf.keras Embedding models using DistributionStrategy.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.distribute import keras_correctness_test_base -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras +from keras.optimizers.legacy import gradient_descent as gradient_descent_keras class DistributionStrategyEmbeddingModelCorrectnessTest( - keras_correctness_test_base - .TestDistributionStrategyEmbeddingModelCorrectnessBase): - - def get_model(self, - max_words=10, - initial_weights=None, - distribution=None, - input_shapes=None): - del input_shapes - with keras_correctness_test_base.MaybeDistributionScope(distribution): - word_ids = keras.layers.Input( - shape=(max_words,), dtype=np.int32, name='words') - word_embed = keras.layers.Embedding(input_dim=20, output_dim=10)(word_ids) - if self.use_distributed_dense: - word_embed = keras.layers.TimeDistributed(keras.layers.Dense(4))( - word_embed) - avg = keras.layers.GlobalAveragePooling1D()(word_embed) - preds = keras.layers.Dense(2, activation='softmax')(avg) - model = keras.Model(inputs=[word_ids], outputs=[preds]) - - if initial_weights: - model.set_weights(initial_weights) - - model.compile( - optimizer=gradient_descent_keras.SGD(learning_rate=0.1), - loss='sparse_categorical_crossentropy', - metrics=['sparse_categorical_accuracy']) - return model - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_for_embedding_model() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_embedding_model_correctness(self, distribution, use_numpy, - use_validation_data): - - self.use_distributed_dense = False - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_for_embedding_model() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_embedding_time_distributed_model_correctness( - self, distribution, use_numpy, use_validation_data): - self.use_distributed_dense = True - self.run_correctness_test(distribution, use_numpy, use_validation_data) + keras_correctness_test_base.TestDistributionStrategyEmbeddingModelCorrectnessBase # noqa: E501 +): + def get_model( + self, + max_words=10, + initial_weights=None, + distribution=None, + input_shapes=None, + ): + del input_shapes + with keras_correctness_test_base.MaybeDistributionScope(distribution): + word_ids = keras.layers.Input( + shape=(max_words,), dtype=np.int32, name="words" + ) + word_embed = keras.layers.Embedding(input_dim=20, output_dim=10)( + word_ids + ) + if self.use_distributed_dense: + word_embed = keras.layers.TimeDistributed( + keras.layers.Dense(4) + )(word_embed) + avg = keras.layers.GlobalAveragePooling1D()(word_embed) + preds = keras.layers.Dense(2, activation="softmax")(avg) + model = keras.Model(inputs=[word_ids], outputs=[preds]) + + if initial_weights: + model.set_weights(initial_weights) + + model.compile( + optimizer=gradient_descent_keras.SGD(learning_rate=0.1), + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], + ) + return model + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_for_embedding_model() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_embedding_model_correctness( + self, distribution, use_numpy, use_validation_data + ): + + self.use_distributed_dense = False + self.run_correctness_test(distribution, use_numpy, use_validation_data) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_for_embedding_model() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_embedding_time_distributed_model_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.use_distributed_dense = True + self.run_correctness_test(distribution, use_numpy, use_validation_data) class DistributionStrategySiameseEmbeddingModelCorrectnessTest( - keras_correctness_test_base - .TestDistributionStrategyEmbeddingModelCorrectnessBase): - - def get_model(self, - max_words=10, - initial_weights=None, - distribution=None, - input_shapes=None): - del input_shapes - with keras_correctness_test_base.MaybeDistributionScope(distribution): - word_ids_a = keras.layers.Input( - shape=(max_words,), dtype=np.int32, name='words_a') - word_ids_b = keras.layers.Input( - shape=(max_words,), dtype=np.int32, name='words_b') - - def submodel(embedding, word_ids): - word_embed = embedding(word_ids) - rep = keras.layers.GlobalAveragePooling1D()(word_embed) - return keras.Model(inputs=[word_ids], outputs=[rep]) - - word_embed = keras.layers.Embedding( - input_dim=20, - output_dim=10, - input_length=max_words, - embeddings_initializer=keras.initializers.RandomUniform(0, 1)) - - a_rep = submodel(word_embed, word_ids_a).outputs[0] - b_rep = submodel(word_embed, word_ids_b).outputs[0] - sim = keras.layers.Dot(axes=1, normalize=True)([a_rep, b_rep]) - - model = keras.Model(inputs=[word_ids_a, word_ids_b], outputs=[sim]) - - if initial_weights: - model.set_weights(initial_weights) - - # TODO(b/130808953): Switch back to the V1 optimizer after global_step - # is made mirrored. - model.compile( - optimizer=gradient_descent_keras.SGD(learning_rate=0.1), - loss='mse', - metrics=['mse']) - return model - - def get_data(self, - count=(keras_correctness_test_base._GLOBAL_BATCH_SIZE * - keras_correctness_test_base._EVAL_STEPS), - min_words=5, - max_words=10, - max_word_id=19, - num_classes=2): - features_a, labels_a, _ = ( - super().get_data(count, min_words, max_words, max_word_id, - num_classes)) - - features_b, labels_b, _ = ( - super().get_data(count, min_words, max_words, max_word_id, - num_classes)) - - y_train = np.zeros((count, 1), dtype=np.float32) - y_train[labels_a == labels_b] = 1.0 - y_train[labels_a != labels_b] = -1.0 - # TODO(b/123360757): Add tests for using list as inputs for multi-input - # models. - x_train = { - 'words_a': features_a, - 'words_b': features_b, - } - x_predict = x_train - - return x_train, y_train, x_predict - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_for_embedding_model() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_siamese_embedding_model_correctness(self, distribution, use_numpy, - use_validation_data): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + keras_correctness_test_base.TestDistributionStrategyEmbeddingModelCorrectnessBase # noqa: E501 +): + def get_model( + self, + max_words=10, + initial_weights=None, + distribution=None, + input_shapes=None, + ): + del input_shapes + with keras_correctness_test_base.MaybeDistributionScope(distribution): + word_ids_a = keras.layers.Input( + shape=(max_words,), dtype=np.int32, name="words_a" + ) + word_ids_b = keras.layers.Input( + shape=(max_words,), dtype=np.int32, name="words_b" + ) + + def submodel(embedding, word_ids): + word_embed = embedding(word_ids) + rep = keras.layers.GlobalAveragePooling1D()(word_embed) + return keras.Model(inputs=[word_ids], outputs=[rep]) + + word_embed = keras.layers.Embedding( + input_dim=20, + output_dim=10, + input_length=max_words, + embeddings_initializer=keras.initializers.RandomUniform(0, 1), + ) + + a_rep = submodel(word_embed, word_ids_a).outputs[0] + b_rep = submodel(word_embed, word_ids_b).outputs[0] + sim = keras.layers.Dot(axes=1, normalize=True)([a_rep, b_rep]) + + model = keras.Model(inputs=[word_ids_a, word_ids_b], outputs=[sim]) + + if initial_weights: + model.set_weights(initial_weights) + + # TODO(b/130808953): Switch back to the V1 optimizer after + # global_step is made mirrored. + model.compile( + optimizer=gradient_descent_keras.SGD(learning_rate=0.1), + loss="mse", + metrics=["mse"], + ) + return model + + def get_data( + self, + count=( + keras_correctness_test_base._GLOBAL_BATCH_SIZE + * keras_correctness_test_base._EVAL_STEPS + ), + min_words=5, + max_words=10, + max_word_id=19, + num_classes=2, + ): + features_a, labels_a, _ = super().get_data( + count, min_words, max_words, max_word_id, num_classes + ) + + features_b, labels_b, _ = super().get_data( + count, min_words, max_words, max_word_id, num_classes + ) + + y_train = np.zeros((count, 1), dtype=np.float32) + y_train[labels_a == labels_b] = 1.0 + y_train[labels_a != labels_b] = -1.0 + # TODO(b/123360757): Add tests for using list as inputs for multi-input + # models. + x_train = { + "words_a": features_a, + "words_b": features_b, + } + x_predict = x_train + + return x_train, y_train, x_predict + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_for_embedding_model() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_siamese_embedding_model_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test(distribution, use_numpy, use_validation_data) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/keras_image_model_correctness_test.py b/keras/distribute/keras_image_model_correctness_test.py index dee432912102..687c180aa3f5 100644 --- a/keras/distribute/keras_image_model_correctness_test.py +++ b/keras/distribute/keras_image_model_correctness_test.py @@ -14,150 +14,169 @@ # ============================================================================== """Correctness tests for tf.keras CNN models using DistributionStrategy.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np import keras -from keras.testing_infra import test_utils from keras.distribute import keras_correctness_test_base -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent +from keras.testing_infra import test_utils @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul. Even if Dense layers run in ' - 'float64, the test sometimes fails with TensorFloat-32 enabled for unknown ' - 'reasons') + "Uses Dense layers, which call matmul. Even if Dense layers run in " + "float64, the test sometimes fails with TensorFloat-32 enabled for unknown " + "reasons" +) +@test_utils.run_v2_only() class DistributionStrategyCnnCorrectnessTest( - keras_correctness_test_base.TestDistributionStrategyCorrectnessBase): - - def get_model(self, - initial_weights=None, - distribution=None, - input_shapes=None): - del input_shapes - with keras_correctness_test_base.MaybeDistributionScope(distribution): - image = keras.layers.Input(shape=(28, 28, 3), name='image') - c1 = keras.layers.Conv2D( - name='conv1', - filters=16, - kernel_size=(3, 3), - strides=(4, 4), - kernel_regularizer=keras.regularizers.l2(1e-4))( - image) - if self.with_batch_norm == 'regular': - c1 = keras.layers.BatchNormalization(name='bn1')(c1) - elif self.with_batch_norm == 'sync': - # Test with parallel batch norms to verify all-reduce works OK. - bn1 = keras.layers.SyncBatchNormalization(name='bn1')(c1) - bn2 = keras.layers.SyncBatchNormalization(name='bn2')(c1) - c1 = keras.layers.Add()([bn1, bn2]) - c1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(c1) - logits = keras.layers.Dense( - 10, activation='softmax', name='pred')( - keras.layers.Flatten()(c1)) - model = keras.Model(inputs=[image], outputs=[logits]) - - if initial_weights: - model.set_weights(initial_weights) - - model.compile( - optimizer=gradient_descent.SGD(learning_rate=0.1), - loss='sparse_categorical_crossentropy', - metrics=['sparse_categorical_accuracy']) - - return model - - def _get_data(self, count, shape=(28, 28, 3), num_classes=10): - centers = np.random.randn(num_classes, *shape) - - features = [] - labels = [] - for _ in range(count): - label = np.random.randint(0, num_classes, size=1)[0] - offset = np.random.normal(loc=0, scale=0.1, size=np.prod(shape)) - offset = offset.reshape(shape) - labels.append(label) - features.append(centers[label] + offset) - - x = np.asarray(features, dtype=np.float32) - y = np.asarray(labels, dtype=np.float32).reshape((count, 1)) - return x, y - - def get_data(self): - x_train, y_train = self._get_data( - count=keras_correctness_test_base._GLOBAL_BATCH_SIZE * - keras_correctness_test_base._EVAL_STEPS) - x_predict = x_train - return x_train, y_train, x_predict - - def get_data_with_partial_last_batch_eval(self): - x_train, y_train = self._get_data(count=1280) - x_eval, y_eval = self._get_data(count=1000) - return x_train, y_train, x_eval, y_eval, x_eval - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.all_strategy_and_input_config_combinations() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_cnn_correctness(self, distribution, use_numpy, use_validation_data): - if (distribution == - tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu): - self.skipTest('b/183958183') - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.all_strategy_and_input_config_combinations() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_cnn_with_batch_norm_correctness(self, distribution, use_numpy, - use_validation_data): - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - with_batch_norm='regular') - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.all_strategy_and_input_config_combinations() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_cnn_with_sync_batch_norm_correctness(self, distribution, use_numpy, - use_validation_data): - if not tf.executing_eagerly(): - self.skipTest('SyncBatchNorm is not enabled in graph mode.') - - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - with_batch_norm='sync') - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base - .all_strategy_and_input_config_combinations_eager() + - keras_correctness_test_base.multi_worker_mirrored_eager() + - keras_correctness_test_base.test_combinations_with_tpu_strategies_graph()) - def test_cnn_correctness_with_partial_last_batch_eval(self, distribution, - use_numpy, - use_validation_data): - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - partial_last_batch=True, - training_epochs=1) - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base. - all_strategy_and_input_config_combinations_eager() + - keras_correctness_test_base.multi_worker_mirrored_eager() + - keras_correctness_test_base.test_combinations_with_tpu_strategies_graph()) - def test_cnn_with_batch_norm_correctness_and_partial_last_batch_eval( - self, distribution, use_numpy, use_validation_data): - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - with_batch_norm='regular', - partial_last_batch=True) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + keras_correctness_test_base.TestDistributionStrategyCorrectnessBase +): + def get_model( + self, initial_weights=None, distribution=None, input_shapes=None + ): + del input_shapes + with keras_correctness_test_base.MaybeDistributionScope(distribution): + image = keras.layers.Input(shape=(28, 28, 3), name="image") + c1 = keras.layers.Conv2D( + name="conv1", + filters=16, + kernel_size=(3, 3), + strides=(4, 4), + kernel_regularizer=keras.regularizers.l2(1e-4), + )(image) + if self.with_batch_norm == "regular": + c1 = keras.layers.BatchNormalization(name="bn1")(c1) + elif self.with_batch_norm == "sync": + # Test with parallel batch norms to verify all-reduce works OK. + bn1 = keras.layers.BatchNormalization( + name="bn1", synchronized=True + )(c1) + bn2 = keras.layers.BatchNormalization( + name="bn2", synchronized=True + )(c1) + c1 = keras.layers.Add()([bn1, bn2]) + c1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(c1) + logits = keras.layers.Dense(10, activation="softmax", name="pred")( + keras.layers.Flatten()(c1) + ) + model = keras.Model(inputs=[image], outputs=[logits]) + + if initial_weights: + model.set_weights(initial_weights) + + model.compile( + optimizer=gradient_descent.SGD(learning_rate=0.1), + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], + ) + + return model + + def _get_data(self, count, shape=(28, 28, 3), num_classes=10): + centers = np.random.randn(num_classes, *shape) + + features = [] + labels = [] + for _ in range(count): + label = np.random.randint(0, num_classes, size=1)[0] + offset = np.random.normal(loc=0, scale=0.1, size=np.prod(shape)) + offset = offset.reshape(shape) + labels.append(label) + features.append(centers[label] + offset) + + x = np.asarray(features, dtype=np.float32) + y = np.asarray(labels, dtype=np.float32).reshape((count, 1)) + return x, y + + def get_data(self): + x_train, y_train = self._get_data( + count=keras_correctness_test_base._GLOBAL_BATCH_SIZE + * keras_correctness_test_base._EVAL_STEPS + ) + x_predict = x_train + return x_train, y_train, x_predict + + def get_data_with_partial_last_batch_eval(self): + x_train, y_train = self._get_data(count=1280) + x_eval, y_eval = self._get_data(count=1000) + return x_train, y_train, x_eval, y_eval, x_eval + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_cnn_correctness( + self, distribution, use_numpy, use_validation_data + ): + if ( + distribution + == tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu # noqa: E501 + ): + self.skipTest("b/183958183") + self.run_correctness_test(distribution, use_numpy, use_validation_data) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_cnn_with_batch_norm_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + with_batch_norm="regular", + ) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_cnn_with_sync_batch_norm_correctness( + self, distribution, use_numpy, use_validation_data + ): + if not tf.executing_eagerly(): + self.skipTest( + "BatchNorm with `synchronized` is not enabled in graph mode." + ) + self.run_correctness_test( + distribution, use_numpy, use_validation_data, with_batch_norm="sync" + ) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations_eager() # noqa: E501 + + keras_correctness_test_base.multi_worker_mirrored_eager() + + keras_correctness_test_base.test_combinations_with_tpu_strategies_graph() # noqa: E501 + ) + def test_cnn_correctness_with_partial_last_batch_eval( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + partial_last_batch=True, + training_epochs=1, + ) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.all_strategy_and_input_config_combinations_eager() # noqa: E501 + + keras_correctness_test_base.multi_worker_mirrored_eager() + + keras_correctness_test_base.test_combinations_with_tpu_strategies_graph() # noqa: E501 + ) + def test_cnn_with_batch_norm_correctness_and_partial_last_batch_eval( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + with_batch_norm="regular", + partial_last_batch=True, + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/keras_metrics_test.py b/keras/distribute/keras_metrics_test.py index adf45640571c..a0f79e4181ef 100644 --- a/keras/distribute/keras_metrics_test.py +++ b/keras/distribute/keras_metrics_test.py @@ -14,251 +14,294 @@ # ============================================================================== """Tests for Keras metrics.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import metrics from keras.engine import base_layer -import tensorflow.compat.v2 as tf combinations = tf.__internal__.distribute.combinations def _labeled_dataset_fn(): - # First four batches of x: labels, predictions -> (labels == predictions) - # 0: 0, 0 -> True; 1: 1, 1 -> True; 2: 2, 2 -> True; 3: 3, 0 -> False - # 4: 4, 1 -> False; 5: 0, 2 -> False; 6: 1, 0 -> False; 7: 2, 1 -> False - # 8: 3, 2 -> False; 9: 4, 0 -> False; 10: 0, 1 -> False; 11: 1, 2 -> False - # 12: 2, 0 -> False; 13: 3, 1 -> False; 14: 4, 2 -> False; 15: 0, 0 -> True - return tf.data.Dataset.range(1000).map( - lambda x: {"labels": x % 5, "predictions": x % 3}).batch( - 4, drop_remainder=True) + # First four batches of x: labels, predictions -> (labels == predictions) + # 0: 0, 0 -> True; 1: 1, 1 -> True; 2: 2, 2 -> True; 3: 3, 0 -> False + # 4: 4, 1 -> False; 5: 0, 2 -> False; 6: 1, 0 -> False; 7: 2, 1 -> False + # 8: 3, 2 -> False; 9: 4, 0 -> False; 10: 0, 1 -> False; 11: 1, 2 -> False + # 12: 2, 0 -> False; 13: 3, 1 -> False; 14: 4, 2 -> False; 15: 0, 0 -> True + return ( + tf.data.Dataset.range(1000) + .map(lambda x: {"labels": x % 5, "predictions": x % 3}) + .batch(4, drop_remainder=True) + ) def _boolean_dataset_fn(): - # First four batches of labels, predictions: {TP, FP, TN, FN} - # with a threshold of 0.5: - # T, T -> TP; F, T -> FP; T, F -> FN - # F, F -> TN; T, T -> TP; F, T -> FP - # T, F -> FN; F, F -> TN; T, T -> TP - # F, T -> FP; T, F -> FN; F, F -> TN - return tf.data.Dataset.from_tensor_slices({ - "labels": [True, False, True, False], - "predictions": [True, True, False, False]}).repeat().batch( - 3, drop_remainder=True) + # First four batches of labels, predictions: {TP, FP, TN, FN} + # with a threshold of 0.5: + # T, T -> TP; F, T -> FP; T, F -> FN + # F, F -> TN; T, T -> TP; F, T -> FP + # T, F -> FN; F, F -> TN; T, T -> TP + # F, T -> FP; T, F -> FN; F, F -> TN + return ( + tf.data.Dataset.from_tensor_slices( + { + "labels": [True, False, True, False], + "predictions": [True, True, False, False], + } + ) + .repeat() + .batch(3, drop_remainder=True) + ) def _threshold_dataset_fn(): - # First four batches of labels, predictions: {TP, FP, TN, FN} - # with a threshold of 0.5: - # True, 1.0 -> TP; False, .75 -> FP; True, .25 -> FN - # False, 0.0 -> TN; True, 1.0 -> TP; False, .75 -> FP - # True, .25 -> FN; False, 0.0 -> TN; True, 1.0 -> TP - # False, .75 -> FP; True, .25 -> FN; False, 0.0 -> TN - return tf.data.Dataset.from_tensor_slices({ - "labels": [True, False, True, False], - "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch( - 3, drop_remainder=True) + # First four batches of labels, predictions: {TP, FP, TN, FN} + # with a threshold of 0.5: + # True, 1.0 -> TP; False, .75 -> FP; True, .25 -> FN + # False, 0.0 -> TN; True, 1.0 -> TP; False, .75 -> FP + # True, .25 -> FN; False, 0.0 -> TN; True, 1.0 -> TP + # False, .75 -> FP; True, .25 -> FN; False, 0.0 -> TN + return ( + tf.data.Dataset.from_tensor_slices( + { + "labels": [True, False, True, False], + "predictions": [1.0, 0.75, 0.25, 0.0], + } + ) + .repeat() + .batch(3, drop_remainder=True) + ) def _regression_dataset_fn(): - return tf.data.Dataset.from_tensor_slices({ - "labels": [1., .5, 1., 0.], - "predictions": [1., .75, .25, 0.]}).repeat() + return tf.data.Dataset.from_tensor_slices( + {"labels": [1.0, 0.5, 1.0, 0.0], "predictions": [1.0, 0.75, 0.25, 0.0]} + ).repeat() def all_combinations(): - return tf.__internal__.test.combinations.combine( - distribution=[ - combinations.default_strategy, combinations.one_device_strategy, - combinations.mirrored_strategy_with_gpu_and_cpu, - combinations.mirrored_strategy_with_two_gpus - ], - mode=["graph", "eager"]) + return tf.__internal__.test.combinations.combine( + distribution=[ + combinations.default_strategy, + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus, + ], + mode=["graph", "eager"], + ) def tpu_combinations(): - return tf.__internal__.test.combinations.combine( - distribution=[ - combinations.tpu_strategy, - ], mode=["graph"]) + return tf.__internal__.test.combinations.combine( + distribution=[ + combinations.tpu_strategy, + ], + mode=["graph"], + ) class KerasMetricsTest(tf.test.TestCase, parameterized.TestCase): - - def _test_metric(self, distribution, dataset_fn, metric_init_fn, expected_fn): - with tf.Graph().as_default(), distribution.scope(): - metric = metric_init_fn() - - iterator = distribution.make_input_fn_iterator(lambda _: dataset_fn()) - updates = distribution.experimental_local_results( - distribution.run(metric, args=(iterator.get_next(),))) - batches_per_update = distribution.num_replicas_in_sync - - self.evaluate(iterator.initializer) - self.evaluate([v.initializer for v in metric.variables]) - - batches_consumed = 0 - for i in range(4): - batches_consumed += batches_per_update - self.evaluate(updates) - self.assertAllClose(expected_fn(batches_consumed), - self.evaluate(metric.result()), - 0.001, - msg="After update #" + str(i+1)) - if batches_consumed >= 4: # Consume 4 input batches in total. - break - - @combinations.generate(all_combinations() + tpu_combinations()) - def testMean(self, distribution): - def _dataset_fn(): - return tf.data.Dataset.range(1000).map(tf.compat.v1.to_float).batch( - 4, drop_remainder=True) - - def _expected_fn(num_batches): - # Mean(0..3) = 1.5, Mean(0..7) = 3.5, Mean(0..11) = 5.5, etc. - return num_batches * 2 - 0.5 - - self._test_metric(distribution, _dataset_fn, metrics.Mean, _expected_fn) - - @combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - combinations.mirrored_strategy_with_one_cpu, - combinations.mirrored_strategy_with_gpu_and_cpu, - combinations.mirrored_strategy_with_two_gpus, - combinations.tpu_strategy_packed_var, - combinations.parameter_server_strategy_1worker_2ps_cpu, - combinations.parameter_server_strategy_1worker_2ps_1gpu, - ], - mode=["eager"], - jit_compile=[False]) + tf.__internal__.test.combinations.combine( - distribution=[combinations.mirrored_strategy_with_two_gpus], - mode=["eager"], - jit_compile=[True])) - def testAddMetric(self, distribution, jit_compile): - if not tf.__internal__.tf2.enabled(): - self.skipTest("Skip test since tf2 is not enabled. Pass " - " --test_env=TF2_BEHAVIOR=1 to enable tf2 behavior.") - - class MetricLayer(base_layer.Layer): - - def __init__(self): - super().__init__(name="metric_layer") - self.sum = metrics.Sum(name="sum") - # Using aggregation for jit_compile results in failure. Thus only set - # aggregation for PS Strategy for multi-gpu tests. - if isinstance(distribution, - tf.distribute.experimental.ParameterServerStrategy): - self.sum_var = tf.Variable( - 1.0, aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) + def _test_metric( + self, distribution, dataset_fn, metric_init_fn, expected_fn + ): + with tf.Graph().as_default(), distribution.scope(): + metric = metric_init_fn() + + iterator = distribution.make_input_fn_iterator( + lambda _: dataset_fn() + ) + updates = distribution.experimental_local_results( + distribution.run(metric, args=(iterator.get_next(),)) + ) + batches_per_update = distribution.num_replicas_in_sync + + self.evaluate(iterator.initializer) + self.evaluate([v.initializer for v in metric.variables]) + + batches_consumed = 0 + for i in range(4): + batches_consumed += batches_per_update + self.evaluate(updates) + self.assertAllClose( + expected_fn(batches_consumed), + self.evaluate(metric.result()), + 0.001, + msg="After update #" + str(i + 1), + ) + if batches_consumed >= 4: # Consume 4 input batches in total. + break + + @combinations.generate(all_combinations() + tpu_combinations()) + def testMean(self, distribution): + def _dataset_fn(): + return ( + tf.data.Dataset.range(1000) + .map(tf.compat.v1.to_float) + .batch(4, drop_remainder=True) + ) + + def _expected_fn(num_batches): + # Mean(0..3) = 1.5, Mean(0..7) = 3.5, Mean(0..11) = 5.5, etc. + return num_batches * 2 - 0.5 + + self._test_metric(distribution, _dataset_fn, metrics.Mean, _expected_fn) + + @combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + combinations.mirrored_strategy_with_one_cpu, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus, + combinations.tpu_strategy_packed_var, + combinations.parameter_server_strategy_1worker_2ps_cpu, + combinations.parameter_server_strategy_1worker_2ps_1gpu, + ], + mode=["eager"], + jit_compile=[False], + ) + + tf.__internal__.test.combinations.combine( + distribution=[combinations.mirrored_strategy_with_two_gpus], + mode=["eager"], + jit_compile=[True], + ) + ) + def testAddMetric(self, distribution, jit_compile): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "Skip test since tf2 is not enabled. Pass " + " --test_env=TF2_BEHAVIOR=1 to enable tf2 behavior." + ) + + class MetricLayer(base_layer.Layer): + def __init__(self): + super().__init__(name="metric_layer") + self.sum = metrics.Sum(name="sum") + # Using aggregation for jit_compile results in failure. Thus + # only set aggregation for PS Strategy for multi-gpu tests. + if isinstance( + distribution, + tf.distribute.experimental.ParameterServerStrategy, + ): + self.sum_var = tf.Variable( + 1.0, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + ) + else: + self.sum_var = tf.Variable(1.0) + + def call(self, inputs): + self.add_metric(self.sum(inputs)) + self.add_metric( + tf.reduce_mean(inputs), name="mean", aggregation="mean" + ) + self.sum_var.assign(self.sum.result()) + return inputs + + with distribution.scope(): + layer = MetricLayer() + + def func(): + return layer(tf.ones(())) + + if jit_compile: + func = tf.function(jit_compile=True)(func) + + @tf.function + def run(): + return distribution.run(func) + + if distribution._should_use_with_coordinator: + coord = tf.distribute.experimental.coordinator.ClusterCoordinator( + distribution + ) + coord.schedule(run) + coord.join() else: - self.sum_var = tf.Variable(1.0) - - def call(self, inputs): - self.add_metric(self.sum(inputs)) - self.add_metric( - tf.reduce_mean(inputs), name="mean", aggregation="mean") - self.sum_var.assign(self.sum.result()) - return inputs - - with distribution.scope(): - layer = MetricLayer() - - def func(): - return layer(tf.ones(())) - - if jit_compile: - func = tf.function(jit_compile=True)(func) - - @tf.function - def run(): - return distribution.run(func) - - if distribution._should_use_with_coordinator: - coord = tf.distribute.experimental.coordinator.ClusterCoordinator( - distribution) - coord.schedule(run) - coord.join() - else: - run() - - self.assertEqual(layer.metrics[0].result().numpy(), - 1.0 * distribution.num_replicas_in_sync) - self.assertEqual(layer.metrics[1].result().numpy(), 1.0) - self.assertEqual(layer.sum_var.read_value().numpy(), - 1.0 * distribution.num_replicas_in_sync) - - @combinations.generate(all_combinations()) - def test_precision(self, distribution): - # True positive is 2, false positive 1, precision is 2/3 = 0.6666667 - label_prediction = ([0, 1, 1, 1], [1, 0, 1, 1]) - with distribution.scope(): - precision = metrics.Precision() - self.evaluate([v.initializer for v in precision.variables]) - updates = distribution.run(precision, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(precision.result(), 0.6666667) - - @combinations.generate(all_combinations()) - def test_recall(self, distribution): - # True positive is 2, false negative 1, precision is 2/3 = 0.6666667 - label_prediction = ([0, 1, 1, 1], [1, 0, 1, 1]) - with distribution.scope(): - recall = metrics.Recall() - self.evaluate([v.initializer for v in recall.variables]) - updates = distribution.run(recall, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(recall.result(), 0.6666667) - - @combinations.generate(all_combinations()) - def test_SensitivityAtSpecificity(self, distribution): - label_prediction = ([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) - with distribution.scope(): - metric = metrics.SensitivityAtSpecificity(0.5) - self.evaluate([v.initializer for v in metric.variables]) - updates = distribution.run(metric, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(metric.result(), 0.5) - - @combinations.generate(all_combinations()) - def test_SpecificityAtSensitivity(self, distribution): - label_prediction = ([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) - with distribution.scope(): - metric = metrics.SpecificityAtSensitivity(0.5) - self.evaluate([v.initializer for v in metric.variables]) - updates = distribution.run(metric, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(metric.result(), 0.66666667) - - @combinations.generate(all_combinations()) - def test_PrecisionAtRecall(self, distribution): - label_prediction = ([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) - with distribution.scope(): - metric = metrics.PrecisionAtRecall(0.5) - self.evaluate([v.initializer for v in metric.variables]) - updates = distribution.run(metric, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(metric.result(), 0.5) - - @combinations.generate(all_combinations()) - def test_RecallAtPrecision(self, distribution): - label_prediction = ([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) - with distribution.scope(): - metric = metrics.RecallAtPrecision(0.8) - self.evaluate([v.initializer for v in metric.variables]) - updates = distribution.run(metric, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(metric.result(), 0.5) - - @combinations.generate(all_combinations()) - def test_auc(self, distribution): - label_prediction = ([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) - with distribution.scope(): - metric = metrics.AUC(num_thresholds=3) - self.evaluate([v.initializer for v in metric.variables]) - updates = distribution.run(metric, args=label_prediction) - self.evaluate(updates) - self.assertAllClose(metric.result(), 0.75) + run() + + self.assertEqual( + layer.metrics[0].result().numpy(), + 1.0 * distribution.num_replicas_in_sync, + ) + self.assertEqual(layer.metrics[1].result().numpy(), 1.0) + self.assertEqual( + layer.sum_var.read_value().numpy(), + 1.0 * distribution.num_replicas_in_sync, + ) + + @combinations.generate(all_combinations()) + def test_precision(self, distribution): + # True positive is 2, false positive 1, precision is 2/3 = 0.6666667 + label_prediction = ([0, 1, 1, 1], [1, 0, 1, 1]) + with distribution.scope(): + precision = metrics.Precision() + self.evaluate([v.initializer for v in precision.variables]) + updates = distribution.run(precision, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(precision.result(), 0.6666667) + + @combinations.generate(all_combinations()) + def test_recall(self, distribution): + # True positive is 2, false negative 1, precision is 2/3 = 0.6666667 + label_prediction = ([0, 1, 1, 1], [1, 0, 1, 1]) + with distribution.scope(): + recall = metrics.Recall() + self.evaluate([v.initializer for v in recall.variables]) + updates = distribution.run(recall, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(recall.result(), 0.6666667) + + @combinations.generate(all_combinations()) + def test_SensitivityAtSpecificity(self, distribution): + label_prediction = ([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + with distribution.scope(): + metric = metrics.SensitivityAtSpecificity(0.5) + self.evaluate([v.initializer for v in metric.variables]) + updates = distribution.run(metric, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(metric.result(), 0.5) + + @combinations.generate(all_combinations()) + def test_SpecificityAtSensitivity(self, distribution): + label_prediction = ([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + with distribution.scope(): + metric = metrics.SpecificityAtSensitivity(0.5) + self.evaluate([v.initializer for v in metric.variables]) + updates = distribution.run(metric, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(metric.result(), 0.66666667) + + @combinations.generate(all_combinations()) + def test_PrecisionAtRecall(self, distribution): + label_prediction = ([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + with distribution.scope(): + metric = metrics.PrecisionAtRecall(0.5) + self.evaluate([v.initializer for v in metric.variables]) + updates = distribution.run(metric, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(metric.result(), 0.5) + + @combinations.generate(all_combinations()) + def test_RecallAtPrecision(self, distribution): + label_prediction = ([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + with distribution.scope(): + metric = metrics.RecallAtPrecision(0.8) + self.evaluate([v.initializer for v in metric.variables]) + updates = distribution.run(metric, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(metric.result(), 0.5) + + @combinations.generate(all_combinations()) + def test_auc(self, distribution): + label_prediction = ([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + with distribution.scope(): + metric = metrics.AUC(num_thresholds=3) + self.evaluate([v.initializer for v in metric.variables]) + updates = distribution.run(metric, args=label_prediction) + self.evaluate(updates) + self.assertAllClose(metric.result(), 0.75) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/keras_models_test.py b/keras/distribute/keras_models_test.py index c4a9683954b7..4cc9e9c35c1a 100644 --- a/keras/distribute/keras_models_test.py +++ b/keras/distribute/keras_models_test.py @@ -14,43 +14,45 @@ # ============================================================================== """Tests for Keras high level APIs, e.g. fit, evaluate and predict.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras from keras.distribute.strategy_combinations import all_strategies class KerasModelsTest(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=all_strategies, mode=["eager"])) - def test_lstm_model_with_dynamic_batch(self, distribution): - input_data = np.random.random([1, 32, 64, 64, 3]) - input_shape = tuple(input_data.shape[1:]) - - def build_model(): - model = keras.models.Sequential() - model.add( - keras.layers.ConvLSTM2D( - 4, - kernel_size=(4, 4), - activation="sigmoid", - padding="same", - input_shape=input_shape)) - model.add(keras.layers.GlobalMaxPooling2D()) - model.add(keras.layers.Dense(2, activation="sigmoid")) - return model - - with distribution.scope(): - model = build_model() - model.compile(loss="binary_crossentropy", optimizer="adam") - result = model.predict(input_data) - self.assertEqual(result.shape, (1, 2)) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=all_strategies, mode=["eager"] + ) + ) + def test_lstm_model_with_dynamic_batch(self, distribution): + input_data = np.random.random([1, 32, 64, 64, 3]) + input_shape = tuple(input_data.shape[1:]) + + def build_model(): + model = keras.models.Sequential() + model.add( + keras.layers.ConvLSTM2D( + 4, + kernel_size=(4, 4), + activation="sigmoid", + padding="same", + input_shape=input_shape, + ) + ) + model.add(keras.layers.GlobalMaxPooling2D()) + model.add(keras.layers.Dense(2, activation="sigmoid")) + return model + + with distribution.scope(): + model = build_model() + model.compile(loss="binary_crossentropy", optimizer="adam") + result = model.predict(input_data) + self.assertEqual(result.shape, (1, 2)) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/distribute/keras_optimizer_v2_test.py b/keras/distribute/keras_optimizer_v2_test.py index b7dc18c66139..1b4c6150af2c 100644 --- a/keras/distribute/keras_optimizer_v2_test.py +++ b/keras/distribute/keras_optimizer_v2_test.py @@ -14,119 +14,123 @@ # ============================================================================== """Tests that show that DistributionStrategy works with optimizer v2.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras -from keras.optimizers.optimizer_v2 import adam -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import adam +from keras.optimizers.legacy import gradient_descent def get_model(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) - return model + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model class MirroredStrategyOptimizerV2Test(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, - ], - mode=['graph', 'eager'])) - def testKerasOptimizerWithUnequalInput(self, distribution): - with distribution.scope(): - var = tf.Variable( - 2.0, name='var', aggregation=tf.VariableAggregation.SUM) - optimizer = adam.Adam(learning_rate=0.01, beta_1=0.2, beta_2=0.2) - all_vars = [] - - def model_fn(): - - def loss_fn(): - replica_id = _replica_id() - return tf.cast(replica_id + 1, dtype=tf.float32) * 0.5 * var - - train_op = optimizer.minimize(loss_fn, var_list=[var]) - - return train_op, optimizer - - def train_fn(): - train_op, optimizer = distribution.extended.call_for_each_replica( - model_fn) - if not all_vars: - all_vars.append(var) - all_vars.append(optimizer.get_slot(var, 'm')) - all_vars.append(optimizer.get_slot(var, 'v')) - return distribution.group(train_op) - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - train_fn = sess.make_callable(train_fn()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # first step. - train_fn() - # var(1) = var(0) - lr * m(1) * sqrt(1 - beta2) / sqrt(v(1)) / (1 - beta1) - # = 2.0 - 0.01 * 1.2 * sqrt(0.8) / sqrt(1.8) / 0.8 - self.assertAllClose(1.99, self.evaluate(all_vars[0])) - # m(1) = beta1 * m(0) + (1-beta1) * grad = 0.2 * 0 + 0.8 * (1 + 2) / 2 - self.assertAllClose(1.2, self.evaluate(all_vars[1])) - # v(1) = beta2 * v(0) + (1-beta2) * grad^2 = 0.2 * 0 + 0.8 * 2.25 - self.assertAllClose(1.8, self.evaluate(all_vars[2])) - - # second step. - train_fn() - # var(1) = var(0) - lr * 2 = 1.98 - self.assertAllClose(1.98, self.evaluate(all_vars[0])) - # m(2) = beta1 * m(1) + (1-beta1) * grad = 0.2 * 1.2 + 0.8 * 1.5 - self.assertAllClose(1.44, self.evaluate(all_vars[1])) - # v(2) = beta2 * v(1) + (1-beta2) * grad^2 = 0.2 * 1.8 + 0.8 * 2.25 - self.assertAllClose(2.16, self.evaluate(all_vars[2])) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, - ], - mode=['graph', 'eager'])) - def testOptimizerWithKerasModelAndNumpyArrays(self, distribution): - with self.cached_session(): - with distribution.scope(): - model = get_model() - optimizer = gradient_descent.SGD(0.001) - loss = 'mse' - metrics = ['mae'] - model.compile( - optimizer, - loss, - metrics=metrics) - - inputs = np.zeros((64, 3), dtype=np.float32) - targets = np.zeros((64, 4), dtype=np.float32) - - model.fit( - inputs, - targets, - epochs=1, - batch_size=2, - verbose=0, - validation_data=(inputs, targets)) - model.evaluate(inputs, targets) - model.predict(inputs) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def testKerasOptimizerWithUnequalInput(self, distribution): + with distribution.scope(): + var = tf.Variable( + 2.0, name="var", aggregation=tf.VariableAggregation.SUM + ) + optimizer = adam.Adam(learning_rate=0.01, beta_1=0.2, beta_2=0.2) + all_vars = [] + + def model_fn(): + def loss_fn(): + replica_id = _replica_id() + return tf.cast(replica_id + 1, dtype=tf.float32) * 0.5 * var + + train_op = optimizer.minimize(loss_fn, var_list=[var]) + + return train_op, optimizer + + def train_fn(): + ( + train_op, + optimizer, + ) = distribution.extended.call_for_each_replica(model_fn) + if not all_vars: + all_vars.append(var) + all_vars.append(optimizer.get_slot(var, "m")) + all_vars.append(optimizer.get_slot(var, "v")) + return distribution.group(train_op) + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + train_fn = sess.make_callable(train_fn()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # first step. + train_fn() + # var(1) = var(0) - lr * m(1) * sqrt(1 - beta2) / sqrt(v(1)) / (1 - + # beta1) + # = 2.0 - 0.01 * 1.2 * sqrt(0.8) / sqrt(1.8) / 0.8 + self.assertAllClose(1.99, self.evaluate(all_vars[0])) + # m(1) = beta1 * m(0) + (1-beta1) * grad = 0.2 * 0 + 0.8 * (1 + 2) / + # 2 + self.assertAllClose(1.2, self.evaluate(all_vars[1])) + # v(1) = beta2 * v(0) + (1-beta2) * grad^2 = 0.2 * 0 + 0.8 * 2.25 + self.assertAllClose(1.8, self.evaluate(all_vars[2])) + + # second step. + train_fn() + # var(1) = var(0) - lr * 2 = 1.98 + self.assertAllClose(1.98, self.evaluate(all_vars[0])) + # m(2) = beta1 * m(1) + (1-beta1) * grad = 0.2 * 1.2 + 0.8 * 1.5 + self.assertAllClose(1.44, self.evaluate(all_vars[1])) + # v(2) = beta2 * v(1) + (1-beta2) * grad^2 = 0.2 * 1.8 + 0.8 * 2.25 + self.assertAllClose(2.16, self.evaluate(all_vars[2])) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def testOptimizerWithKerasModelAndNumpyArrays(self, distribution): + with self.cached_session(): + with distribution.scope(): + model = get_model() + optimizer = gradient_descent.SGD(0.001) + loss = "mse" + metrics = ["mae"] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((64, 3), dtype=np.float32) + targets = np.zeros((64, 4), dtype=np.float32) + + model.fit( + inputs, + targets, + epochs=1, + batch_size=2, + verbose=0, + validation_data=(inputs, targets), + ) + model.evaluate(inputs, targets) + model.predict(inputs) def _replica_id(): - replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group - if not isinstance(replica_id, tf.Tensor): - replica_id = tf.constant(replica_id) - return replica_id + replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group + if not isinstance(replica_id, tf.Tensor): + replica_id = tf.constant(replica_id) + return replica_id -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/distribute/keras_premade_models_test.py b/keras/distribute/keras_premade_models_test.py index ace71a5ac697..e4badc570524 100644 --- a/keras/distribute/keras_premade_models_test.py +++ b/keras/distribute/keras_premade_models_test.py @@ -14,44 +14,40 @@ # ============================================================================== """Tests for keras premade models using tf.distribute.Strategy.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized from keras.engine import sequential from keras.layers import core -from keras.optimizers.optimizer_v2 import adagrad -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import adagrad +from keras.optimizers.legacy import gradient_descent from keras.premade_models import linear from keras.premade_models import wide_deep from keras.utils import dataset_creator -import numpy as np -import tensorflow.compat.v2 as tf def strategy_combinations_eager_data_fn(): - return tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.default_strategy, - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.one_device_strategy_gpu, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus_no_merge_call, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu, - tf.__internal__.distribute.combinations - .parameter_server_strategy_1worker_2ps_cpu, - tf.__internal__.distribute.combinations - .parameter_server_strategy_1worker_2ps_1gpu, - # NOTE: TPUStrategy not tested because the models in this test are - # sparse and do not work with TPUs. - ], - use_dataset_creator=[True, False], - mode=['eager'], - data_fn=['numpy', 'dataset']) + return tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.default_strategy, + tf.__internal__.distribute.combinations.one_device_strategy, + tf.__internal__.distribute.combinations.one_device_strategy_gpu, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus_no_merge_call, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu, # noqa: E501 + tf.__internal__.distribute.combinations.parameter_server_strategy_1worker_2ps_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.parameter_server_strategy_1worker_2ps_1gpu, # noqa: E501 + # NOTE: TPUStrategy not tested because the models in this test are + # sparse and do not work with TPUs. + ], + use_dataset_creator=[True, False], + mode=["eager"], + data_fn=["numpy", "dataset"], + ) INPUT_SIZE = 64 @@ -59,96 +55,116 @@ def strategy_combinations_eager_data_fn(): def get_numpy(): - inputs = np.random.uniform( - low=-5., high=5., size=(INPUT_SIZE, 2)).astype(np.float32) - output = .3 * inputs[:, 0] + .2 * inputs[:, 1] - return inputs, output + inputs = np.random.uniform(low=-5.0, high=5.0, size=(INPUT_SIZE, 2)).astype( + np.float32 + ) + output = 0.3 * inputs[:, 0] + 0.2 * inputs[:, 1] + return inputs, output def get_dataset(input_context=None, batch_size=None): - inputs, output = get_numpy() - dataset = tf.data.Dataset.from_tensor_slices((inputs, output)) - if input_context: - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - if batch_size is None: - batch_size = BATCH_SIZE + inputs, output = get_numpy() + dataset = tf.data.Dataset.from_tensor_slices((inputs, output)) + if input_context: + dataset = dataset.shard( + input_context.num_input_pipelines, input_context.input_pipeline_id + ) + if batch_size is None: + batch_size = BATCH_SIZE - dataset = dataset.batch(batch_size).repeat(200) - return dataset + dataset = dataset.batch(batch_size).repeat(200) + return dataset # A `dataset_fn` is required for `Model.fit` to work across all strategies. def dataset_fn(input_context): - batch_size = input_context.get_per_replica_batch_size( - global_batch_size=BATCH_SIZE) - return get_dataset(input_context, batch_size) + batch_size = input_context.get_per_replica_batch_size( + global_batch_size=BATCH_SIZE + ) + return get_dataset(input_context, batch_size) class KerasPremadeModelsTest(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - strategy_combinations_eager_data_fn()) - def test_linear_model(self, distribution, use_dataset_creator, data_fn): - if ((not use_dataset_creator) and isinstance( - distribution, tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - 'Parameter Server strategy requires dataset creator to be used in ' - 'model.fit.') - if (not tf.__internal__.tf2.enabled() and use_dataset_creator - and isinstance(distribution, - tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - 'Parameter Server strategy with dataset creator needs to be run when ' - 'eager execution is enabled.') - with distribution.scope(): - model = linear.LinearModel() - opt = gradient_descent.SGD(learning_rate=0.1) - model.compile(opt, 'mse') - if use_dataset_creator: - x = dataset_creator.DatasetCreator(dataset_fn) - hist = model.fit(x, epochs=3, steps_per_epoch=INPUT_SIZE) - else: - if data_fn == 'numpy': - inputs, output = get_numpy() - hist = model.fit(inputs, output, epochs=3) - else: - hist = model.fit(get_dataset(), epochs=3) - self.assertLess(hist.history['loss'][2], 0.2) - - @tf.__internal__.distribute.combinations.generate( - strategy_combinations_eager_data_fn()) - def test_wide_deep_model(self, distribution, use_dataset_creator, data_fn): - if ((not use_dataset_creator) and isinstance( - distribution, tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - 'Parameter Server strategy requires dataset creator to be used in ' - 'model.fit.') - if (not tf.__internal__.tf2.enabled() and use_dataset_creator - and isinstance(distribution, - tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - 'Parameter Server strategy with dataset creator needs to be run when ' - 'eager execution is enabled.') - with distribution.scope(): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1)]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - linear_opt = gradient_descent.SGD(learning_rate=0.05) - dnn_opt = adagrad.Adagrad(learning_rate=0.1) - wide_deep_model.compile(optimizer=[linear_opt, dnn_opt], loss='mse') - - if use_dataset_creator: - x = dataset_creator.DatasetCreator(dataset_fn) - hist = wide_deep_model.fit(x, epochs=3, steps_per_epoch=INPUT_SIZE) - else: - if data_fn == 'numpy': - inputs, output = get_numpy() - hist = wide_deep_model.fit(inputs, output, epochs=3) - else: - hist = wide_deep_model.fit(get_dataset(), epochs=3) - self.assertLess(hist.history['loss'][2], 0.2) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + @tf.__internal__.distribute.combinations.generate( + strategy_combinations_eager_data_fn() + ) + def test_linear_model(self, distribution, use_dataset_creator, data_fn): + if (not use_dataset_creator) and isinstance( + distribution, tf.distribute.experimental.ParameterServerStrategy + ): + self.skipTest( + "Parameter Server strategy requires dataset creator to be used " + "in model.fit." + ) + if ( + not tf.__internal__.tf2.enabled() + and use_dataset_creator + and isinstance( + distribution, tf.distribute.experimental.ParameterServerStrategy + ) + ): + self.skipTest( + "Parameter Server strategy with dataset creator needs to be " + "run when eager execution is enabled." + ) + with distribution.scope(): + model = linear.LinearModel() + opt = gradient_descent.SGD(learning_rate=0.1) + model.compile(opt, "mse") + if use_dataset_creator: + x = dataset_creator.DatasetCreator(dataset_fn) + hist = model.fit(x, epochs=3, steps_per_epoch=INPUT_SIZE) + else: + if data_fn == "numpy": + inputs, output = get_numpy() + hist = model.fit(inputs, output, epochs=3) + else: + hist = model.fit(get_dataset(), epochs=3) + self.assertLess(hist.history["loss"][2], 0.2) + + @tf.__internal__.distribute.combinations.generate( + strategy_combinations_eager_data_fn() + ) + def test_wide_deep_model(self, distribution, use_dataset_creator, data_fn): + if (not use_dataset_creator) and isinstance( + distribution, tf.distribute.experimental.ParameterServerStrategy + ): + self.skipTest( + "Parameter Server strategy requires dataset creator to be used " + "in model.fit." + ) + if ( + not tf.__internal__.tf2.enabled() + and use_dataset_creator + and isinstance( + distribution, tf.distribute.experimental.ParameterServerStrategy + ) + ): + self.skipTest( + "Parameter Server strategy with dataset creator needs to be " + "run when eager execution is enabled." + ) + with distribution.scope(): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1)]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + linear_opt = gradient_descent.SGD(learning_rate=0.05) + dnn_opt = adagrad.Adagrad(learning_rate=0.1) + wide_deep_model.compile(optimizer=[linear_opt, dnn_opt], loss="mse") + + if use_dataset_creator: + x = dataset_creator.DatasetCreator(dataset_fn) + hist = wide_deep_model.fit( + x, epochs=3, steps_per_epoch=INPUT_SIZE + ) + else: + if data_fn == "numpy": + inputs, output = get_numpy() + hist = wide_deep_model.fit(inputs, output, epochs=3) + else: + hist = wide_deep_model.fit(get_dataset(), epochs=3) + self.assertLess(hist.history["loss"][2], 0.2) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/keras_rnn_model_correctness_test.py b/keras/distribute/keras_rnn_model_correctness_test.py index 18c468b7039d..74bf17077d36 100644 --- a/keras/distribute/keras_rnn_model_correctness_test.py +++ b/keras/distribute/keras_rnn_model_correctness_test.py @@ -14,119 +14,147 @@ # ============================================================================== """Correctness tests for tf.keras RNN models using DistributionStrategy.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras -from keras.testing_infra import test_utils from keras.distribute import keras_correctness_test_base from keras.layers.rnn import gru from keras.layers.rnn import gru_v1 from keras.layers.rnn import lstm from keras.layers.rnn import lstm_v1 from keras.mixed_precision import policy -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras +from keras.optimizers.legacy import gradient_descent as gradient_descent_keras +from keras.testing_infra import test_utils class _DistributionStrategyRnnModelCorrectnessTest( - keras_correctness_test_base - .TestDistributionStrategyEmbeddingModelCorrectnessBase): - - def _get_layer_class(self): - raise NotImplementedError - - def get_model(self, - max_words=10, - initial_weights=None, - distribution=None, - input_shapes=None): - del input_shapes - rnn_cls = self._get_layer_class() - - with keras_correctness_test_base.MaybeDistributionScope(distribution): - word_ids = keras.layers.Input( - shape=(max_words,), dtype=np.int32, name='words') - word_embed = keras.layers.Embedding(input_dim=20, output_dim=10)(word_ids) - rnn_embed = rnn_cls(units=4, return_sequences=False)(word_embed) - - dense_output = keras.layers.Dense(2)(rnn_embed) - preds = keras.layers.Softmax(dtype='float32')(dense_output) - model = keras.Model(inputs=[word_ids], outputs=[preds]) - - if initial_weights: - model.set_weights(initial_weights) - - optimizer_fn = gradient_descent_keras.SGD - - model.compile( - optimizer=optimizer_fn(learning_rate=0.1), - loss='sparse_categorical_crossentropy', - metrics=['sparse_categorical_accuracy']) - return model + keras_correctness_test_base.TestDistributionStrategyEmbeddingModelCorrectnessBase # noqa: E501 +): + def _get_layer_class(self): + raise NotImplementedError + + def get_model( + self, + max_words=10, + initial_weights=None, + distribution=None, + input_shapes=None, + ): + del input_shapes + rnn_cls = self._get_layer_class() + + with keras_correctness_test_base.MaybeDistributionScope(distribution): + word_ids = keras.layers.Input( + shape=(max_words,), dtype=np.int32, name="words" + ) + word_embed = keras.layers.Embedding(input_dim=20, output_dim=10)( + word_ids + ) + rnn_embed = rnn_cls(units=4, return_sequences=False)(word_embed) + + dense_output = keras.layers.Dense(2)(rnn_embed) + preds = keras.layers.Softmax(dtype="float32")(dense_output) + model = keras.Model(inputs=[word_ids], outputs=[preds]) + + if initial_weights: + model.set_weights(initial_weights) + + optimizer_fn = gradient_descent_keras.SGD + + model.compile( + optimizer=optimizer_fn(learning_rate=0.1), + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], + ) + return model @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class DistributionStrategyGruModelCorrectnessTest( - _DistributionStrategyRnnModelCorrectnessTest): - - def _get_layer_class(self): - if tf.__internal__.tf2.enabled(): - if not tf.executing_eagerly(): - self.skipTest("GRU v2 and legacy graph mode don't work together.") - return gru.GRU - else: - return gru_v1.GRU - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_for_embedding_model() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_gru_model_correctness(self, distribution, use_numpy, - use_validation_data): - self.run_correctness_test(distribution, use_numpy, use_validation_data) + _DistributionStrategyRnnModelCorrectnessTest +): + def _get_layer_class(self): + if tf.__internal__.tf2.enabled(): + if not tf.executing_eagerly(): + self.skipTest( + "GRU v2 and legacy graph mode don't work together." + ) + return gru.GRU + else: + return gru_v1.GRU + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_for_embedding_model() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_gru_model_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test(distribution, use_numpy, use_validation_data) @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class DistributionStrategyLstmModelCorrectnessTest( - _DistributionStrategyRnnModelCorrectnessTest): - - def _get_layer_class(self): - if tf.__internal__.tf2.enabled(): - if not tf.executing_eagerly(): - self.skipTest("LSTM v2 and legacy graph mode don't work together.") - return lstm.LSTM - else: - return lstm_v1.LSTM - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_for_embedding_model() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - def test_lstm_model_correctness(self, distribution, use_numpy, - use_validation_data): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - @tf.__internal__.distribute.combinations.generate( - keras_correctness_test_base.test_combinations_for_embedding_model() + - keras_correctness_test_base.multi_worker_mirrored_eager()) - @test_utils.enable_v2_dtype_behavior - def test_lstm_model_correctness_mixed_precision(self, distribution, use_numpy, - use_validation_data): - if isinstance(distribution, - (tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - self.skipTest('CentralStorageStrategy is not supported by ' - 'mixed precision.') - if isinstance(distribution, - (tf.distribute.experimental.TPUStrategy, tf.compat.v1.distribute.experimental.TPUStrategy)): - policy_name = 'mixed_bfloat16' - else: - policy_name = 'mixed_float16' - - with policy.policy_scope(policy_name): - self.run_correctness_test(distribution, use_numpy, use_validation_data) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + _DistributionStrategyRnnModelCorrectnessTest +): + def _get_layer_class(self): + if tf.__internal__.tf2.enabled(): + if not tf.executing_eagerly(): + self.skipTest( + "LSTM v2 and legacy graph mode don't work together." + ) + return lstm.LSTM + else: + return lstm_v1.LSTM + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_for_embedding_model() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + def test_lstm_model_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test(distribution, use_numpy, use_validation_data) + + @tf.__internal__.distribute.combinations.generate( + keras_correctness_test_base.test_combinations_for_embedding_model() + + keras_correctness_test_base.multi_worker_mirrored_eager() + ) + @test_utils.enable_v2_dtype_behavior + def test_lstm_model_correctness_mixed_precision( + self, distribution, use_numpy, use_validation_data + ): + if isinstance( + distribution, + ( + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + self.skipTest( + "CentralStorageStrategy is not supported by mixed precision." + ) + if isinstance( + distribution, + ( + tf.distribute.experimental.TPUStrategy, + tf.compat.v1.distribute.experimental.TPUStrategy, + ), + ): + policy_name = "mixed_bfloat16" + else: + policy_name = "mixed_float16" + + with policy.policy_scope(policy_name): + self.run_correctness_test( + distribution, use_numpy, use_validation_data + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/keras_save_load_test.py b/keras/distribute/keras_save_load_test.py index 7b35bd613cc8..b72be7171d8f 100644 --- a/keras/distribute/keras_save_load_test.py +++ b/keras/distribute/keras_save_load_test.py @@ -14,59 +14,80 @@ # ============================================================================== """Tests for saving and loading using keras save/load APIs with DS.""" +import tensorflow.compat.v2 as tf + from keras.distribute import saved_model_test_base as test_base -from keras.saving import save +from keras.saving.legacy import save from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class KerasSaveLoadTest(test_base.TestSavedModelBase): + def setUp(self): + self._root_dir = "keras_save_load" + super().setUp() - def setUp(self): - self._root_dir = 'keras_save_load' - super().setUp() - - def _save_model(self, model, saved_dir): - model.save(saved_dir, save_format='tf') + def _save_model(self, model, saved_dir): + model.save(saved_dir, save_format="tf") - def _load_and_run_model(self, - distribution, - saved_dir, - predict_dataset, - output_name='output_1'): - restored_keras_model = save.load_model(saved_dir) - return restored_keras_model.predict( - predict_dataset, steps=test_base.PREDICT_STEPS) + def _load_and_run_model( + self, distribution, saved_dir, predict_dataset, output_name="output_1" + ): + restored_keras_model = save.load_model(saved_dir) + return restored_keras_model.predict( + predict_dataset, steps=test_base.PREDICT_STEPS + ) - @tf.__internal__.distribute.combinations.generate(test_base.simple_models_with_strategies()) - def test_save_no_strategy_restore_strategy(self, model_and_input, - distribution): - self.run_test_save_no_strategy_restore_strategy( - model_and_input, distribution) + @tf.__internal__.distribute.combinations.generate( + test_base.simple_models_with_strategies() + ) + def test_save_no_strategy_restore_strategy( + self, model_and_input, distribution + ): + self.run_test_save_no_strategy_restore_strategy( + model_and_input, distribution + ) - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategies(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_no_strategy(self, model_and_input, - distribution, save_in_scope): - self.run_test_save_strategy_restore_no_strategy( - model_and_input, distribution, save_in_scope) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategies(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_no_strategy( + self, model_and_input, distribution, save_in_scope + ): + self.run_test_save_strategy_restore_no_strategy( + model_and_input, distribution, save_in_scope + ) - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategy_pairs(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_strategy(self, model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope): - self.run_test_save_strategy_restore_strategy(model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategy_pairs(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_strategy( + self, + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ): + self.run_test_save_strategy_restore_strategy( + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ) -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/distribute/keras_stateful_lstm_model_correctness_test.py b/keras/distribute/keras_stateful_lstm_model_correctness_test.py index c0e28d41c70f..631643c645c9 100644 --- a/keras/distribute/keras_stateful_lstm_model_correctness_test.py +++ b/keras/distribute/keras_stateful_lstm_model_correctness_test.py @@ -14,93 +14,103 @@ # ============================================================================== """Tests for stateful tf.keras LSTM models using DistributionStrategy.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.distribute import keras_correctness_test_base -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras +from keras.optimizers.legacy import gradient_descent as gradient_descent_keras def strategies_for_stateful_embedding_model(): - """Returns TPUStrategy with single core device assignment.""" + """Returns TPUStrategy with single core device assignment.""" - return [ - tf.__internal__.distribute.combinations.tpu_strategy_one_core, - ] + return [ + tf.__internal__.distribute.combinations.tpu_strategy_one_core, + ] def test_combinations_for_stateful_embedding_model(): - return (tf.__internal__.test.combinations.combine( - distribution=strategies_for_stateful_embedding_model(), - mode='graph', - use_numpy=False, - use_validation_data=False)) + return tf.__internal__.test.combinations.combine( + distribution=strategies_for_stateful_embedding_model(), + mode="graph", + use_numpy=False, + use_validation_data=False, + ) class DistributionStrategyStatefulLstmModelCorrectnessTest( - keras_correctness_test_base - .TestDistributionStrategyEmbeddingModelCorrectnessBase): - - def get_model(self, - max_words=10, - initial_weights=None, - distribution=None, - input_shapes=None): - del input_shapes - batch_size = keras_correctness_test_base._GLOBAL_BATCH_SIZE - - with keras_correctness_test_base.MaybeDistributionScope(distribution): - word_ids = keras.layers.Input( - shape=(max_words,), - batch_size=batch_size, - dtype=np.int32, - name='words') - word_embed = keras.layers.Embedding(input_dim=20, output_dim=10)(word_ids) - lstm_embed = keras.layers.LSTM( - units=4, return_sequences=False, stateful=True)( - word_embed) - - preds = keras.layers.Dense(2, activation='softmax')(lstm_embed) - model = keras.Model(inputs=[word_ids], outputs=[preds]) - - if initial_weights: - model.set_weights(initial_weights) - - optimizer_fn = gradient_descent_keras.SGD - - model.compile( - optimizer=optimizer_fn(learning_rate=0.1), - loss='sparse_categorical_crossentropy', - metrics=['sparse_categorical_accuracy']) - return model - - # TODO(jhseu): Disabled to fix b/130808953. Need to investigate why it - # doesn't work and enable for DistributionStrategy more generally. - @tf.__internal__.distribute.combinations.generate(test_combinations_for_stateful_embedding_model()) - def disabled_test_stateful_lstm_model_correctness( - self, distribution, use_numpy, use_validation_data): - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - is_stateful_model=True) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_correctness_test_base - .test_combinations_with_tpu_strategies_graph())) - def test_incorrectly_use_multiple_cores_for_stateful_lstm_model( - self, distribution, use_numpy, use_validation_data): - with self.assertRaisesRegex( - ValueError, 'not yet supported with tf.distribute.Strategy'): - self.run_correctness_test( - distribution, - use_numpy, - use_validation_data, - is_stateful_model=True) - - -if __name__ == '__main__': - tf.test.main() + keras_correctness_test_base.TestDistributionStrategyEmbeddingModelCorrectnessBase # noqa: E501 +): + def get_model( + self, + max_words=10, + initial_weights=None, + distribution=None, + input_shapes=None, + ): + del input_shapes + batch_size = keras_correctness_test_base._GLOBAL_BATCH_SIZE + + with keras_correctness_test_base.MaybeDistributionScope(distribution): + word_ids = keras.layers.Input( + shape=(max_words,), + batch_size=batch_size, + dtype=np.int32, + name="words", + ) + word_embed = keras.layers.Embedding(input_dim=20, output_dim=10)( + word_ids + ) + lstm_embed = keras.layers.LSTM( + units=4, return_sequences=False, stateful=True + )(word_embed) + + preds = keras.layers.Dense(2, activation="softmax")(lstm_embed) + model = keras.Model(inputs=[word_ids], outputs=[preds]) + + if initial_weights: + model.set_weights(initial_weights) + + optimizer_fn = gradient_descent_keras.SGD + + model.compile( + optimizer=optimizer_fn(learning_rate=0.1), + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], + ) + return model + + # TODO(jhseu): Disabled to fix b/130808953. Need to investigate why it + # doesn't work and enable for DistributionStrategy more generally. + @tf.__internal__.distribute.combinations.generate( + test_combinations_for_stateful_embedding_model() + ) + def disabled_test_stateful_lstm_model_correctness( + self, distribution, use_numpy, use_validation_data + ): + self.run_correctness_test( + distribution, use_numpy, use_validation_data, is_stateful_model=True + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_correctness_test_base.test_combinations_with_tpu_strategies_graph() # noqa: E501 + ) + ) + def test_incorrectly_use_multiple_cores_for_stateful_lstm_model( + self, distribution, use_numpy, use_validation_data + ): + with self.assertRaisesRegex( + ValueError, "not yet supported with tf.distribute.Strategy" + ): + self.run_correctness_test( + distribution, + use_numpy, + use_validation_data, + is_stateful_model=True, + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/distribute/keras_utils_test.py b/keras/distribute/keras_utils_test.py index d33299f0bd9e..8925801ea4dc 100644 --- a/keras/distribute/keras_utils_test.py +++ b/keras/distribute/keras_utils_test.py @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for tf.keras models with callbacks, checkpointing with dist strategy.""" - -import tensorflow.compat.v2 as tf +"""Tests for tf.keras models with callbacks, checkpointing with dist +strategy.""" import collections import tempfile -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras import losses @@ -30,594 +30,668 @@ class Counter(keras.callbacks.Callback): - """Counts the number of times each callback method was run. - - Attributes: - method_counts: dict. Contains the counts of time each callback method was - run. - """ - - def __init__(self): - self.method_counts = collections.defaultdict(int) - methods_to_count = [ - 'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end', - 'on_predict_batch_begin', 'on_predict_batch_end', 'on_predict_begin', - 'on_predict_end', 'on_test_batch_begin', 'on_test_batch_end', - 'on_test_begin', 'on_test_end', 'on_train_batch_begin', - 'on_train_batch_end', 'on_train_begin', 'on_train_end' - ] - for method_name in methods_to_count: - setattr(self, method_name, - self.wrap_with_counts(method_name, getattr(self, method_name))) - - def wrap_with_counts(self, method_name, method): - - def _call_and_count(*args, **kwargs): - self.method_counts[method_name] += 1 - return method(*args, **kwargs) - - return _call_and_count - - -class TestDistributionStrategyWithCallbacks(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations())) - def test_callbacks_in_fit(self, distribution): - with distribution.scope(): - model = keras_test_lib.get_model() - model.compile( - optimizer='sgd', - loss='mse', - metrics=['mae']) - - dataset = keras_test_lib.get_dataset(distribution) - counter = Counter() - - epochs = 2 - steps_per_epoch = 5 - validation_steps = 3 - - model.fit( - dataset, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - verbose=0, - validation_data=dataset, - validation_steps=validation_steps, - callbacks=[counter]) - - if (isinstance(distribution, tf.compat.v1.distribute.experimental.TPUStrategy) and - not tf.executing_eagerly()): - # TPU Strategy can have multi step training, from extended.steps_per_run - # if steps_per_run = 1, then num_batch_call_per_epoch = steps_per_epoch - steps_per_run = distribution.extended.steps_per_run - num_batch_call_per_epoch = steps_per_epoch // steps_per_run - if steps_per_epoch % steps_per_run: - num_batch_call_per_epoch += 1 - else: - num_batch_call_per_epoch = steps_per_epoch - - self.assertDictEqual( - counter.method_counts, { - 'on_batch_begin': epochs * num_batch_call_per_epoch, - 'on_batch_end': epochs * num_batch_call_per_epoch, - 'on_epoch_begin': epochs, - 'on_epoch_end': epochs, - 'on_test_batch_begin': epochs * validation_steps, - 'on_test_batch_end': epochs * validation_steps, - 'on_test_begin': epochs, - 'on_test_end': epochs, - 'on_train_batch_begin': epochs * num_batch_call_per_epoch, - 'on_train_batch_end': epochs * num_batch_call_per_epoch, - 'on_train_begin': 1, - 'on_train_end': 1 - }) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations())) - def test_callbacks_in_eval(self, distribution): - with distribution.scope(): - model = keras_test_lib.get_model() - model.compile( - optimizer='sgd', - loss='mse', - metrics=['mae']) - - dataset = keras_test_lib.get_dataset(distribution) - counter = Counter() - - model.evaluate(dataset, steps=5, callbacks=[counter]) - - self.assertDictEqual( - counter.method_counts, { - 'on_test_batch_begin': 5, - 'on_test_batch_end': 5, - 'on_test_begin': 1, - 'on_test_end': 1 - }) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations())) - def test_callbacks_in_predict(self, distribution): - with distribution.scope(): - model = keras_test_lib.get_model() - model.compile( - optimizer='sgd', - loss='mse', - metrics=['mae']) - - dataset = keras_test_lib.get_dataset(distribution) - counter = Counter() - - model.predict( - keras_test_lib.get_predict_dataset(dataset), - steps=5, - callbacks=[counter]) - - self.assertDictEqual( - counter.method_counts, { - 'on_predict_batch_begin': 5, - 'on_predict_batch_end': 5, - 'on_predict_begin': 1, - 'on_predict_end': 1 - }) - - -class TestDistributionStrategyErrorCases(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations. - mirrored_strategy_with_gpu_and_cpu, - ], - mode=['graph'])) - def test_validating_dataset_input_tensors_with_shape_mismatch( - self, distribution): - with self.cached_session(): - @tf.function - def run(): - ctx = tf.distribute.get_replica_context() - if ctx.replica_id_in_sync_group.device.endswith('GPU:0'): - return tf.constant([[1, 2]]) - else: - return tf.constant([[1, 2], [1, 2]]) - - x = distribution.run(run) - - # Removed device and input tensor shape details from the error message - # since the order of the device and the corresponding input tensor shape - # is not deterministic over different runs. - with self.assertRaisesRegex( - ValueError, 'Input tensor shapes do not match for ' - 'distributed tensor inputs ' - 'PerReplica:.+'): + """Counts the number of times each callback method was run. + + Attributes: + method_counts: dict. Contains the counts of time each callback method was + run. + """ + + def __init__(self): + self.method_counts = collections.defaultdict(int) + methods_to_count = [ + "on_batch_begin", + "on_batch_end", + "on_epoch_begin", + "on_epoch_end", + "on_predict_batch_begin", + "on_predict_batch_end", + "on_predict_begin", + "on_predict_end", + "on_test_batch_begin", + "on_test_batch_end", + "on_test_begin", + "on_test_end", + "on_train_batch_begin", + "on_train_batch_end", + "on_train_begin", + "on_train_end", + ] + for method_name in methods_to_count: + setattr( + self, + method_name, + self.wrap_with_counts(method_name, getattr(self, method_name)), + ) + + def wrap_with_counts(self, method_name, method): + def _call_and_count(*args, **kwargs): + self.method_counts[method_name] += 1 + return method(*args, **kwargs) + + return _call_and_count + + +class TestDistributionStrategyWithCallbacks( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations() + ) + ) + def test_callbacks_in_fit(self, distribution): with distribution.scope(): - distributed_training_utils_v1.validate_distributed_dataset_inputs( - distribution, x, None) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations - .mirrored_strategy_with_gpu_and_cpu, - ], - mode=['graph', 'eager'])) - def test_validating_dataset_input_tensors_with_dtype_mismatch( - self, distribution): - with self.cached_session(): - - @tf.function - def run(): - ctx = tf.distribute.get_replica_context() - if ctx.replica_id_in_sync_group.device.endswith('GPU:0'): - return tf.constant([[1, 2]], dtype=tf.int32) - else: - return tf.constant([[1, 2]], dtype=tf.float64) + model = keras_test_lib.get_model() + model.compile(optimizer="sgd", loss="mse", metrics=["mae"]) - x = distribution.run(run) + dataset = keras_test_lib.get_dataset(distribution) + counter = Counter() - # Removed device and input tensor dtype details from the error message - # since the order of the device and the corresponding input tensor dtype - # is not deterministic over different runs. - with self.assertRaisesRegex( - ValueError, 'Input tensor dtypes do not match for ' - 'distributed tensor inputs ' - 'PerReplica:.+'): - with distribution.scope(): - distributed_training_utils_v1.validate_distributed_dataset_inputs( - distribution, x, None) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=['graph', 'eager'])) - def test_unsupported_features(self, distribution, mode): - with self.cached_session(): - with distribution.scope(): - model = keras_test_lib.get_model() - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - metrics = ['mae'] - model.compile( - optimizer, - loss, - metrics=metrics) - - dataset = keras_test_lib.get_dataset(distribution) - # Test with validation split - with self.assertRaises(ValueError): - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_split=0.5, - validation_steps=2) + epochs = 2 + steps_per_epoch = 5 + validation_steps = 3 - # Test with sample weight. - sample_weight = np.random.random((10,)) - with self.assertRaises(ValueError): model.fit( dataset, - epochs=1, - steps_per_epoch=2, + epochs=epochs, + steps_per_epoch=steps_per_epoch, verbose=0, - sample_weight=sample_weight) - - # Test with not specifying the `steps` argument for dataset with infinite - # cardinality. - dataset = dataset.repeat() - with self.assertRaises(ValueError): - model.fit(dataset, epochs=1, verbose=0) - with self.assertRaises(ValueError): - model.evaluate(dataset, verbose=0) - - with self.assertRaises(ValueError): - model.predict(dataset, verbose=0) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.one_device_strategy, - ], - mode=['graph', 'eager'])) - def test_distribution_strategy_on_subclassed_model( - self, distribution): - with distribution.scope(): - - class _SimpleMLP(keras.Model): - - def __init__(self, num_labels): - super().__init__() - self.dense = keras.layers.Dense(num_labels) - - def call(self, inputs): - return self.dense(inputs) - - model = _SimpleMLP(3) - - if not tf.executing_eagerly(): - with self.assertRaisesRegex( - ValueError, - 'We currently do not support distribution strategy with a ' - '`Sequential` model that is created without `input_shape`/' - '`input_dim` set in its first layer or a subclassed model.'): - model.compile( - 'sgd') - else: - model.compile( - 'sgd') - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.one_device_strategy, - ], - mode=['graph', 'eager'])) - def test_distribution_strategy_on_deferred_sequential_model( - self, distribution): - with distribution.scope(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(16, activation='relu')) - model.add(keras.layers.Dense(3, activation='softmax')) - - if tf.executing_eagerly(): - model.compile( - 'sgd') - else: - with self.assertRaisesRegex( - ValueError, - 'We currently do not support distribution strategy with a ' - '`Sequential` model that is created without ' - '`input_shape`/`input_dim` set in its first layer or ' - 'a subclassed model.'): - model.compile( - 'sgd') - - @tf.__internal__.distribute.combinations.generate( - keras_test_lib.all_strategy_combinations_minus_default()) - def test_standalone_loss_without_loss_reduction(self, distribution): - with distribution.scope(): - loss_object = losses.MeanSquaredError() - - with self.assertRaisesRegex( - ValueError, 'Please use `tf.keras.losses.Reduction.SUM` or ' - '`tf.keras.losses.Reduction.NONE`'): - y = np.asarray([1, 0]) - loss_object(y, y) - - -class TestDistributionStrategyWithLossMasking(tf.test.TestCase, - parameterized.TestCase): - - # TODO(priyag): Enable all strategies for this test. Currently it does not - # work for TPU due to some invalid datatype. - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=['graph', 'eager'], - optimizer=optimizer_combinations - .gradient_descent_optimizer_keras_v2_fn - )) - def test_masking(self, distribution, optimizer): - with self.cached_session(): - np.random.seed(1337) - x = np.array([[[1], [1]], [[0], [0]]]) - with distribution.scope(): - model = keras.models.Sequential() - model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1))) - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(1, kernel_initializer='one'))) - model.compile( - loss='mse', - optimizer=optimizer()) - y = np.array([[[1], [1]], [[1], [1]]]) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - hist = model.fit(x=dataset, epochs=1, steps_per_epoch=2) - self.assertEqual(hist.history['loss'][0], 0) - - -class TestDistributionStrategyWithNormalizationLayer(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations(), - tf.__internal__.test.combinations.combine( - fused=[True, False], - optimizer=optimizer_combinations - .gradient_descent_optimizer_keras_v2_fn))) - def test_batchnorm_correctness(self, distribution, fused, optimizer): - with self.cached_session(): - with distribution.scope(): - model = keras.models.Sequential() - norm = keras.layers.BatchNormalization( - input_shape=( - 10, - 20, - 30, - ), momentum=0.8, fused=fused) - model.add(norm) - model.compile( - loss='mse', - optimizer=optimizer()) - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 20, 30)) - x = x.astype('float32') - dataset = tf.data.Dataset.from_tensor_slices((x, x)) - dataset = dataset.repeat(100) - dataset = keras_test_lib.batch_wrapper(dataset, 32, distribution) - - predict_dataset = tf.data.Dataset.from_tensor_slices(x) - predict_dataset = predict_dataset.repeat(100) - predict_dataset = keras_test_lib.batch_wrapper(predict_dataset, 32, - distribution) - - model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10) - out = model.predict(predict_dataset, steps=2) - out -= keras.backend.eval(norm.beta) - out /= keras.backend.eval(norm.gamma) - np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) - np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) - -# TODO(b/146181571): Enable this for all distribution strategies once -# DistributedVariable.assign() returns a variable for MirroredStrategy. - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.tpu_strategy_combinations(), - tf.__internal__.test.combinations.combine( - optimizer=optimizer_combinations - .gradient_descent_optimizer_keras_v2_fn))) - def test_batchnorm_correctness_with_renorm(self, distribution, optimizer): - with self.cached_session(): - with distribution.scope(): - model = keras.models.Sequential() - norm = keras.layers.BatchNormalization( - input_shape=( - 10, - 20, - 30, - ), momentum=0.8, fused=False, renorm=True) - model.add(norm) - model.compile( - loss='mse', - optimizer=optimizer()) - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 20, 30)) - x = x.astype('float32') - dataset = tf.data.Dataset.from_tensor_slices((x, x)) - dataset = dataset.repeat(100) - dataset = keras_test_lib.batch_wrapper(dataset, 32, distribution) - - predict_dataset = tf.data.Dataset.from_tensor_slices(x) - predict_dataset = predict_dataset.repeat(100) - predict_dataset = keras_test_lib.batch_wrapper(predict_dataset, 32, - distribution) - - model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10) - out = model.predict(predict_dataset, steps=2) - out -= keras.backend.eval(norm.beta) - out /= keras.backend.eval(norm.gamma) - np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) - np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) - - -class TestDistributionStrategySaveLoadWeights(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations_minus_default(), - tf.__internal__.test.combinations.combine( - optimizer=optimizer_combinations.rmsprop_optimizer_keras_v2_fn))) - def test_save_load_h5(self, distribution, optimizer): - with self.cached_session(): - dataset = keras_test_lib.get_dataset(distribution) - with distribution.scope(): - model = keras_test_lib.get_model() - model.compile( - optimizer(), - 'mse') - model.fit(dataset, epochs=1, steps_per_epoch=1) - - weights_file = tempfile.mktemp('.h5') - model.save_weights(weights_file) - - model_2 = keras_test_lib.get_model() - model_2.compile( - optimizer(), - 'mse') - model_2.load_weights(weights_file) - model_2.predict( - keras_test_lib.get_predict_dataset(distribution), steps=2) - model_2.fit(dataset, epochs=1, steps_per_epoch=1) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations_minus_default(), - tf.__internal__.test.combinations.combine( - optimizer=optimizer_combinations.rmsprop_optimizer_keras_v2_fn))) - def test_save_load_trackable(self, distribution, optimizer): - # TODO(b/123533246): Enable the test for TPU once bug is fixed - if (isinstance(distribution, - (tf.distribute.experimental.TPUStrategy, tf.compat.v1.distribute.experimental.TPUStrategy)) and - distribution.extended.steps_per_run > 1): - self.skipTest('MultiStep TPU Strategy deadlocks with optimizer restore.') - with self.cached_session(): - dataset = keras_test_lib.get_dataset(distribution) - with distribution.scope(): - model = keras_test_lib.get_model() - model.compile( - optimizer(), - 'mse') - model.fit(dataset, epochs=1, steps_per_epoch=1) - - weights_file = tempfile.mktemp() - model.save_weights(weights_file) - - model_2 = keras_test_lib.get_model() - model_2.compile( - optimizer(), - 'mse') - model_2.load_weights(weights_file) - model_2.predict( - keras_test_lib.get_predict_dataset(distribution), steps=2) - model_2.fit(dataset, epochs=1, steps_per_epoch=1) - - -class TestDistributionStrategyValidation(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - keras_test_lib.all_strategy_combinations_minus_default())) - def test_layer_outside_scope(self, distribution): - with self.cached_session(): - with self.assertRaisesRegex( - ValueError, 'was not created in the distribution strategy'): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) + validation_data=dataset, + validation_steps=validation_steps, + callbacks=[counter], + ) + + if ( + isinstance( + distribution, tf.compat.v1.distribute.experimental.TPUStrategy + ) + and not tf.executing_eagerly() + ): + # TPU Strategy can have multi step training, from + # extended.steps_per_run if steps_per_run = 1, then + # num_batch_call_per_epoch = steps_per_epoch + steps_per_run = distribution.extended.steps_per_run + num_batch_call_per_epoch = steps_per_epoch // steps_per_run + if steps_per_epoch % steps_per_run: + num_batch_call_per_epoch += 1 + else: + num_batch_call_per_epoch = steps_per_epoch + + self.assertDictEqual( + counter.method_counts, + { + "on_batch_begin": epochs * num_batch_call_per_epoch, + "on_batch_end": epochs * num_batch_call_per_epoch, + "on_epoch_begin": epochs, + "on_epoch_end": epochs, + "on_test_batch_begin": epochs * validation_steps, + "on_test_batch_end": epochs * validation_steps, + "on_test_begin": epochs, + "on_test_end": epochs, + "on_train_batch_begin": epochs * num_batch_call_per_epoch, + "on_train_batch_end": epochs * num_batch_call_per_epoch, + "on_train_begin": 1, + "on_train_end": 1, + }, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations() + ) + ) + def test_callbacks_in_eval(self, distribution): + with distribution.scope(): + model = keras_test_lib.get_model() + model.compile(optimizer="sgd", loss="mse", metrics=["mae"]) + + dataset = keras_test_lib.get_dataset(distribution) + counter = Counter() + + model.evaluate(dataset, steps=5, callbacks=[counter]) + + self.assertDictEqual( + counter.method_counts, + { + "on_test_batch_begin": 5, + "on_test_batch_end": 5, + "on_test_begin": 1, + "on_test_end": 1, + }, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations() + ) + ) + def test_callbacks_in_predict(self, distribution): + with distribution.scope(): + model = keras_test_lib.get_model() + model.compile(optimizer="sgd", loss="mse", metrics=["mae"]) + + dataset = keras_test_lib.get_dataset(distribution) + counter = Counter() + + model.predict( + keras_test_lib.get_predict_dataset(dataset), + steps=5, + callbacks=[counter], + ) + + self.assertDictEqual( + counter.method_counts, + { + "on_predict_batch_begin": 5, + "on_predict_batch_end": 5, + "on_predict_begin": 1, + "on_predict_end": 1, + }, + ) + + +class TestDistributionStrategyErrorCases( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + ], + mode=["graph"], + ) + ) + def test_validating_dataset_input_tensors_with_shape_mismatch( + self, distribution + ): + with self.cached_session(): + + @tf.function + def run(): + ctx = tf.distribute.get_replica_context() + if ctx.replica_id_in_sync_group.device.endswith("GPU:0"): + return tf.constant([[1, 2]]) + else: + return tf.constant([[1, 2], [1, 2]]) + + x = distribution.run(run) + + # Removed device and input tensor shape details from the error + # message since the order of the device and the corresponding input + # tensor shape is not deterministic over different runs. + with self.assertRaisesRegex( + ValueError, + "Input tensor shapes do not match for " + "distributed tensor inputs " + "PerReplica:.+", + ): + with distribution.scope(): + distributed_training_utils_v1.validate_distributed_dataset_inputs( # noqa: E501 + distribution, x, None + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def test_validating_dataset_input_tensors_with_dtype_mismatch( + self, distribution + ): + with self.cached_session(): + + @tf.function + def run(): + ctx = tf.distribute.get_replica_context() + if ctx.replica_id_in_sync_group.device.endswith("GPU:0"): + return tf.constant([[1, 2]], dtype=tf.int32) + else: + return tf.constant([[1, 2]], dtype=tf.float64) + + x = distribution.run(run) + + # Removed device and input tensor dtype details from the error + # message since the order of the device and the corresponding input + # tensor dtype is not deterministic over different runs. + with self.assertRaisesRegex( + ValueError, + "Input tensor dtypes do not match for " + "distributed tensor inputs " + "PerReplica:.+", + ): + with distribution.scope(): + distributed_training_utils_v1.validate_distributed_dataset_inputs( # noqa: E501 + distribution, x, None + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def test_unsupported_features(self, distribution, mode): + with self.cached_session(): + with distribution.scope(): + model = keras_test_lib.get_model() + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) + loss = "mse" + metrics = ["mae"] + model.compile(optimizer, loss, metrics=metrics) + + dataset = keras_test_lib.get_dataset(distribution) + # Test with validation split + with self.assertRaises(ValueError): + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_split=0.5, + validation_steps=2, + ) + + # Test with sample weight. + sample_weight = np.random.random((10,)) + with self.assertRaises(ValueError): + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + sample_weight=sample_weight, + ) + + # Test with not specifying the `steps` argument for dataset with + # infinite cardinality. + dataset = dataset.repeat() + with self.assertRaises(ValueError): + model.fit(dataset, epochs=1, verbose=0) + with self.assertRaises(ValueError): + model.evaluate(dataset, verbose=0) + + with self.assertRaises(ValueError): + model.predict(dataset, verbose=0) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.one_device_strategy, + ], + mode=["graph", "eager"], + ) + ) + def test_distribution_strategy_on_subclassed_model(self, distribution): + with distribution.scope(): + + class _SimpleMLP(keras.Model): + def __init__(self, num_labels): + super().__init__() + self.dense = keras.layers.Dense(num_labels) + + def call(self, inputs): + return self.dense(inputs) + + model = _SimpleMLP(3) + + if not tf.executing_eagerly(): + with self.assertRaisesRegex( + ValueError, + "We currently do not support distribution strategy with a " + "`Sequential` model that is created without `input_shape`/" + "`input_dim` set in its first layer or a subclassed model.", + ): + model.compile("sgd") + else: + model.compile("sgd") + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.one_device_strategy, + ], + mode=["graph", "eager"], + ) + ) + def test_distribution_strategy_on_deferred_sequential_model( + self, distribution + ): with distribution.scope(): - model = keras.Model(x, y) - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics) - - @tf.__internal__.distribute.combinations.generate( - keras_test_lib.all_strategy_combinations_minus_default()) - def test_model_outside_scope(self, distribution): - with self.cached_session(): - with self.assertRaisesRegex( - ValueError, 'was not created in the distribution strategy'): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = keras.models.Sequential() + model.add(keras.layers.Dense(16, activation="relu")) + model.add(keras.layers.Dense(3, activation="softmax")) + + if tf.executing_eagerly(): + model.compile("sgd") + else: + with self.assertRaisesRegex( + ValueError, + "We currently do not support distribution strategy with a " + "`Sequential` model that is created without " + "`input_shape`/`input_dim` set in its first layer or " + "a subclassed model.", + ): + model.compile("sgd") + + @tf.__internal__.distribute.combinations.generate( + keras_test_lib.all_strategy_combinations_minus_default() + ) + def test_standalone_loss_without_loss_reduction(self, distribution): with distribution.scope(): - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.001) - loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - model.compile(optimizer, loss, metrics=metrics) - - -class TestDistributionStrategyWithStaticShapes(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=['graph', 'eager'])) - def test_input_batch_size_not_divisible_by_num_replicas(self, distribution): - with distribution.scope(): - with self.assertRaisesRegex( - ValueError, r'The `batch_size` argument \(5\) must be divisible by ' - r'the number of replicas \(2\)'): - keras.layers.Input(shape=(3,), batch_size=5, name='input') - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=['graph', 'eager'])) - def test_static_input_batch_size(self, distribution): - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10, drop_remainder=True) - - with distribution.scope(): - x = keras.layers.Input(shape=(3,), batch_size=10, name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) - model.compile(optimizer='sgd', loss='mse', metrics=['mae']) - - model.fit(dataset, epochs=1, steps_per_epoch=5) - model.evaluate(dataset, steps=5) - model.predict(dataset) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + loss_object = losses.MeanSquaredError() + + with self.assertRaisesRegex( + ValueError, + "Please use `tf.keras.losses.Reduction.SUM` or " + "`tf.keras.losses.Reduction.NONE`", + ): + y = np.asarray([1, 0]) + loss_object(y, y) + + +class TestDistributionStrategyWithLossMasking( + tf.test.TestCase, parameterized.TestCase +): + + # TODO(priyag): Enable all strategies for this test. Currently it does not + # work for TPU due to some invalid datatype. + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + ], + mode=["graph", "eager"], + optimizer=optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, # noqa: E501 + ) + ) + def test_masking(self, distribution, optimizer): + with self.cached_session(): + np.random.seed(1337) + x = np.array([[[1], [1]], [[0], [0]]]) + with distribution.scope(): + model = keras.models.Sequential() + model.add( + keras.layers.Masking(mask_value=0, input_shape=(2, 1)) + ) + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(1, kernel_initializer="one") + ) + ) + model.compile(loss="mse", optimizer=optimizer()) + y = np.array([[[1], [1]], [[1], [1]]]) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + hist = model.fit(x=dataset, epochs=1, steps_per_epoch=2) + self.assertEqual(hist.history["loss"][0], 0) + + +class TestDistributionStrategyWithNormalizationLayer( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations(), + tf.__internal__.test.combinations.combine( + fused=[True, False], + optimizer=optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, # noqa: E501 + ), + ) + ) + def test_batchnorm_correctness(self, distribution, fused, optimizer): + with self.cached_session(): + with distribution.scope(): + model = keras.models.Sequential() + norm = keras.layers.BatchNormalization( + input_shape=( + 10, + 20, + 30, + ), + momentum=0.8, + fused=fused, + ) + model.add(norm) + model.compile(loss="mse", optimizer=optimizer()) + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 20, 30)) + x = x.astype("float32") + dataset = tf.data.Dataset.from_tensor_slices((x, x)) + dataset = dataset.repeat(100) + dataset = keras_test_lib.batch_wrapper(dataset, 32, distribution) + + predict_dataset = tf.data.Dataset.from_tensor_slices(x) + predict_dataset = predict_dataset.repeat(100) + predict_dataset = keras_test_lib.batch_wrapper( + predict_dataset, 32, distribution + ) + + model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10) + out = model.predict(predict_dataset, steps=2) + out -= keras.backend.eval(norm.beta) + out /= keras.backend.eval(norm.gamma) + np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) + np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) + + # TODO(b/146181571): Enable this for all distribution strategies once + # DistributedVariable.assign() returns a variable for MirroredStrategy. + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.tpu_strategy_combinations(), + tf.__internal__.test.combinations.combine( + optimizer=optimizer_combinations.gradient_descent_optimizer_keras_v2_fn # noqa: E501 + ), + ) + ) + def test_batchnorm_correctness_with_renorm(self, distribution, optimizer): + with self.cached_session(): + with distribution.scope(): + model = keras.models.Sequential() + norm = keras.layers.BatchNormalization( + input_shape=( + 10, + 20, + 30, + ), + momentum=0.8, + fused=False, + renorm=True, + ) + model.add(norm) + model.compile(loss="mse", optimizer=optimizer()) + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 20, 30)) + x = x.astype("float32") + dataset = tf.data.Dataset.from_tensor_slices((x, x)) + dataset = dataset.repeat(100) + dataset = keras_test_lib.batch_wrapper(dataset, 32, distribution) + + predict_dataset = tf.data.Dataset.from_tensor_slices(x) + predict_dataset = predict_dataset.repeat(100) + predict_dataset = keras_test_lib.batch_wrapper( + predict_dataset, 32, distribution + ) + + model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10) + out = model.predict(predict_dataset, steps=2) + out -= keras.backend.eval(norm.beta) + out /= keras.backend.eval(norm.gamma) + np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) + np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) + + +class TestDistributionStrategySaveLoadWeights( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations_minus_default(), + tf.__internal__.test.combinations.combine( + optimizer=optimizer_combinations.rmsprop_optimizer_keras_v2_fn + ), + ) + ) + def test_save_load_h5(self, distribution, optimizer): + with self.cached_session(): + dataset = keras_test_lib.get_dataset(distribution) + with distribution.scope(): + model = keras_test_lib.get_model() + model.compile(optimizer(), "mse") + model.fit(dataset, epochs=1, steps_per_epoch=1) + + weights_file = tempfile.mktemp(".h5") + model.save_weights(weights_file) + + model_2 = keras_test_lib.get_model() + model_2.compile(optimizer(), "mse") + model_2.load_weights(weights_file) + model_2.predict( + keras_test_lib.get_predict_dataset(distribution), steps=2 + ) + model_2.fit(dataset, epochs=1, steps_per_epoch=1) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations_minus_default(), + tf.__internal__.test.combinations.combine( + optimizer=optimizer_combinations.rmsprop_optimizer_keras_v2_fn + ), + ) + ) + def test_save_load_trackable(self, distribution, optimizer): + # TODO(b/123533246): Enable the test for TPU once bug is fixed + if ( + isinstance( + distribution, + ( + tf.distribute.experimental.TPUStrategy, + tf.compat.v1.distribute.experimental.TPUStrategy, + ), + ) + and distribution.extended.steps_per_run > 1 + ): + self.skipTest( + "MultiStep TPU Strategy deadlocks with optimizer restore." + ) + with self.cached_session(): + dataset = keras_test_lib.get_dataset(distribution) + with distribution.scope(): + model = keras_test_lib.get_model() + model.compile(optimizer(), "mse") + model.fit(dataset, epochs=1, steps_per_epoch=1) + + weights_file = tempfile.mktemp() + model.save_weights(weights_file) + + model_2 = keras_test_lib.get_model() + model_2.compile(optimizer(), "mse") + model_2.load_weights(weights_file) + model_2.predict( + keras_test_lib.get_predict_dataset(distribution), steps=2 + ) + model_2.fit(dataset, epochs=1, steps_per_epoch=1) + + +class TestDistributionStrategyValidation( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + keras_test_lib.all_strategy_combinations_minus_default() + ) + ) + def test_layer_outside_scope(self, distribution): + with self.cached_session(): + with self.assertRaisesRegex( + ValueError, "was not created in the distribution strategy" + ): + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + with distribution.scope(): + model = keras.Model(x, y) + optimizer = tf.compat.v1.train.GradientDescentOptimizer( + 0.001 + ) + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + @tf.__internal__.distribute.combinations.generate( + keras_test_lib.all_strategy_combinations_minus_default() + ) + def test_model_outside_scope(self, distribution): + with self.cached_session(): + with self.assertRaisesRegex( + ValueError, "was not created in the distribution strategy" + ): + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + with distribution.scope(): + optimizer = tf.compat.v1.train.GradientDescentOptimizer( + 0.001 + ) + loss = "mse" + metrics = ["mae", keras.metrics.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + +class TestDistributionStrategyWithStaticShapes( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def test_input_batch_size_not_divisible_by_num_replicas(self, distribution): + with distribution.scope(): + with self.assertRaisesRegex( + ValueError, + r"The `batch_size` argument \(5\) must be divisible by " + r"the number of replicas \(2\)", + ): + keras.layers.Input(shape=(3,), batch_size=5, name="input") + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + ], + mode=["graph", "eager"], + ) + ) + def test_static_input_batch_size(self, distribution): + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10, drop_remainder=True) + + with distribution.scope(): + x = keras.layers.Input(shape=(3,), batch_size=10, name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + model.compile(optimizer="sgd", loss="mse", metrics=["mae"]) + + model.fit(dataset, epochs=1, steps_per_epoch=5) + model.evaluate(dataset, steps=5) + model.predict(dataset) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/minimize_loss_test.py b/keras/distribute/minimize_loss_test.py index 414fe8ae4d59..14168b003fdc 100644 --- a/keras/distribute/minimize_loss_test.py +++ b/keras/distribute/minimize_loss_test.py @@ -15,522 +15,685 @@ """Tests for running legacy optimizer code with DistributionStrategy.""" +import numpy +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras.distribute import optimizer_combinations from keras.distribute.test_example import batchnorm_example from keras.distribute.test_example import minimize_loss_example from keras.layers import core -from keras.optimizers.optimizer_v2 import optimizer_v2 -import numpy -import tensorflow.compat.v2 as tf - +from keras.optimizers.legacy import optimizer_v2 VAR_MAP_V1 = { "GradientDescent": ("dense/kernel", "dense/bias"), - "Adagrad": ("dense/kernel/Adagrad", "dense/kernel", "dense/bias/Adagrad", - "dense/bias"), - "Ftrl": ("dense/kernel/Ftrl", "dense/kernel", "dense/bias/Ftrl", - "dense/bias", "dense/kernel/Ftrl_1", "dense/bias/Ftrl_1"), - "RMSProp": ("dense/kernel", "dense/bias/RMSProp", "dense/bias/RMSProp_1", - "dense/bias", "dense/kernel/RMSProp_1", "dense/kernel/RMSProp") + "Adagrad": ( + "dense/kernel/Adagrad", + "dense/kernel", + "dense/bias/Adagrad", + "dense/bias", + ), + "Ftrl": ( + "dense/kernel/Ftrl", + "dense/kernel", + "dense/bias/Ftrl", + "dense/bias", + "dense/kernel/Ftrl_1", + "dense/bias/Ftrl_1", + ), + "RMSProp": ( + "dense/kernel", + "dense/bias/RMSProp", + "dense/bias/RMSProp_1", + "dense/bias", + "dense/kernel/RMSProp_1", + "dense/kernel/RMSProp", + ), } VAR_MAP_V2 = { - "SGD": ("dense/bias", "SGD/learning_rate", "SGD/decay", "SGD/iter", - "dense/kernel", "SGD/momentum"), - "Adagrad": - ("Adagrad/iter", "dense/bias", "dense/kernel", "Adagrad/learning_rate", - "Adagrad/decay", "Adagrad/dense/kernel/accumulator", - "Adagrad/dense/bias/accumulator") + "SGD": ( + "dense/bias", + "SGD/learning_rate", + "SGD/decay", + "SGD/iter", + "dense/kernel", + "SGD/momentum", + ), + "Adagrad": ( + "Adagrad/iter", + "dense/bias", + "dense/kernel", + "Adagrad/learning_rate", + "Adagrad/decay", + "Adagrad/dense/kernel/accumulator", + "Adagrad/dense/bias/accumulator", + ), } class MinimizeLossStepTest(tf.test.TestCase, parameterized.TestCase): - - def _get_iterator(self, strategy, input_fn): - iterator = strategy.make_input_fn_iterator(lambda _: input_fn()) - self.evaluate(iterator.initializer) - return iterator - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v1_optimizers(), - tf.__internal__.test.combinations.combine(mode=["graph"], use_callable_loss=[True, False]) - + tf.__internal__.test.combinations.combine(mode=["eager"], use_callable_loss=[True])) + - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v2_optimizers(), - tf.__internal__.test.combinations.combine( - mode=["graph", "eager"], use_callable_loss=[True])) + - tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations.optimizers_v2, - mode=["graph"], - use_callable_loss=[True]) + tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations.optimizers_v1, - mode=["graph"], - use_callable_loss=[True, False])) - def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss): - with distribution.scope(): - optimizer = optimizer_fn() - model_fn, dataset_fn, layer = minimize_loss_example( - optimizer, use_bias=True, use_callable_loss=use_callable_loss) - - def step_fn(ctx, inputs): - del ctx # Unused - return distribution.group( - distribution.extended.call_for_each_replica( - model_fn, args=(inputs,))) - - iterator = self._get_iterator(distribution, dataset_fn) - - def run_step(): - return distribution.extended.experimental_run_steps_on_iterator( - step_fn, iterator, iterations=2).run_op - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - run_step = sess.make_callable(run_step()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - weights, biases = [], [] - for _ in range(5): - run_step() - weights.append(self.evaluate(layer.kernel)) - biases.append(self.evaluate(layer.bias)) - - error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) - is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) - self.assertTrue(is_not_increasing) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v1_optimizers(), - tf.__internal__.test.combinations.combine(mode=["graph"], use_callable_loss=[True, False]) - + tf.__internal__.test.combinations.combine(mode=["eager"], use_callable_loss=[True])) + - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v2_optimizers(), - tf.__internal__.test.combinations.combine( - mode=["graph", "eager"], use_callable_loss=[True]))) - def testTrainNetworkByCallForEachReplica(self, distribution, optimizer_fn, - use_callable_loss): - with distribution.scope(): - optimizer = optimizer_fn() - model_fn, dataset_fn, layer = minimize_loss_example( - optimizer, use_bias=True, use_callable_loss=use_callable_loss) - - iterator = self._get_iterator(distribution, dataset_fn) - - def run_step(): - return distribution.group( - distribution.extended.call_for_each_replica( - model_fn, args=(iterator.get_next(),))) - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - run_step = sess.make_callable(run_step()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - weights, biases = [], [] - for _ in range(10): - run_step() - - weights.append(self.evaluate(layer.kernel)) - biases.append(self.evaluate(layer.bias)) - - error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) - is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) - self.assertTrue(is_not_increasing) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v1_and_v2_optimizers(), - tf.__internal__.test.combinations.combine(mode=["graph", "eager"])) + tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations.optimizers_v1_and_v2, - mode=["graph"])) - def testOptimizerInsideModelFn(self, distribution, optimizer_fn): - if (not tf.executing_eagerly() and - tf.compat.v1.control_flow_v2_enabled()): - self.skipTest("b/138751864") - created_variables = [] - trainable_variables = [] - - def appending_creator(next_creator, **kwargs): - v = next_creator(**kwargs) - # Skip the StateVar created in the tf.random.Generator, which is used by - # keras initializers. - if "StateVar" in v.name: - return v - created_variables.append(v.name) - if "trainable" in kwargs and kwargs["trainable"]: - trainable_variables.append(v.name) - return v - - # Creator scope needs to be set before it's used inside - # `distribution.scope`. - with tf.variable_creator_scope( - appending_creator), distribution.scope(): - optimizer = optimizer_fn() - model_fn, dataset_fn, _ = minimize_loss_example( - optimizer, use_bias=True, use_callable_loss=True) - - def step_fn(ctx, inputs): - del ctx # Unused - return distribution.group( - distribution.extended.call_for_each_replica( - model_fn, args=(inputs,))) - - iterator = self._get_iterator(distribution, dataset_fn) - - def run_step(): - return distribution.extended.experimental_run_steps_on_iterator( - step_fn, iterator, iterations=1).run_op - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - run_step = sess.make_callable(run_step()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - run_step() - - def get_expected_variables(num_parameter_devices): - name = optimizer._name - - if isinstance(optimizer, optimizer_v2.OptimizerV2): - variables = VAR_MAP_V2[name] - else: - variables = VAR_MAP_V1[name] - - extended_variables = [ - v + "/replica_{}".format(replica) - for v in variables - for replica in range(1, num_parameter_devices) - ] - variables = list(variables) + extended_variables - return set(v + ":0" for v in variables) - - self.assertEqual( - get_expected_variables(len(distribution.extended.parameter_devices)), - set(created_variables)) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(momentum=[0.8, 0.9, 0.99], renorm=[False, True]), - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v1_and_v2_optimizers(), - tf.__internal__.test.combinations.combine( - mode=["graph", "eager"], - # TODO(isaprykin): Allow False here. Currently subsequent - # replicas will re-execute UPDATE_OPS of previous replicas. - update_ops_in_cross_replica_mode=[True])) + - tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations.optimizers_v1_and_v2, - mode=["graph"], - update_ops_in_cross_replica_mode=[False]))) - def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn, momentum, - renorm, update_ops_in_cross_replica_mode): - """Verifies that moving mean updates are reduced across replicas.""" - with distribution.scope(): - num_replicas = distribution.num_replicas_in_sync - model_fn, dataset_fn, batchnorm = batchnorm_example( - optimizer_fn, - batch_per_epoch=num_replicas, - momentum=momentum, - renorm=renorm, - update_ops_in_replica_mode=not update_ops_in_cross_replica_mode) - - def step_fn(ctx, inputs): - del ctx # Unused - fetches = distribution.experimental_local_results( - distribution.extended.call_for_each_replica( - model_fn, args=(inputs,))) - if update_ops_in_cross_replica_mode: - fetches += tuple(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)) - return tf.group(fetches) - - iterator = self._get_iterator(distribution, dataset_fn) - - def run_step(): - return distribution.extended.experimental_run_steps_on_iterator( - step_fn, iterator, iterations=1).run_op - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - run_step = sess.make_callable(run_step()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - expected_moving_means = [0.] * 8 - - def averaged_batch_mean(i): - # Each batch has shape [16, 8] where the ith element in jth list is - # (8 * j + i + replica_id * 100). So the batch mean in each replica is - # (60 + i + replica_id * 100). So here comes its batch mean over all - # replicas: - return 60. + i + (num_replicas - 1.) / 2. * 100. - - for _ in range(10): - run_step() - moving_means = self.evaluate(batchnorm.moving_mean) - - # We make sure that the moving_mean is updated as if the sample mean is - # calculated over all replicas. - for i, expected_moving_mean in enumerate(expected_moving_means): - expected_moving_means[i] -= (( - expected_moving_mean - averaged_batch_mean(i)) * (1.0 - momentum)) - self.assertNear(expected_moving_means[i], moving_means[i], 0.0001) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(loss_reduction=[ - tf.compat.v1.losses.Reduction.SUM, tf.compat.v1.losses.Reduction.MEAN, - tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE, - tf.compat.v1.losses.Reduction.SUM_OVER_NONZERO_WEIGHTS - ]), - tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus_no_merge_call, - ]), - tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(optimizer_fn=optimizer_combinations - .gradient_descent_optimizer_v1_fn), - tf.__internal__.test.combinations.combine( - mode=["graph"], use_callable_loss=[True, False]) + - tf.__internal__.test.combinations.combine( - mode=["eager"], use_callable_loss=[True])) + - tf.__internal__.test.combinations.times( - tf.__internal__.test.combinations.combine(optimizer_fn=optimizer_combinations - .gradient_descent_optimizer_keras_v2_fn), - tf.__internal__.test.combinations.combine( - mode=["graph", "eager"], use_callable_loss=[True]))) + - tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations - .gradient_descent_optimizer_v1_fn, - mode=["graph"], - use_callable_loss=[True, False]) + tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations - .gradient_descent_optimizer_keras_v2_fn, - mode=["graph"], - use_callable_loss=[True]))) - def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction, - use_callable_loss): - with distribution.scope(): - all_vars = [] - - def model_fn(inputs): - x, y = inputs - w = tf.compat.v1.get_variable("w", initializer=[[2.]]) - all_vars.append(w) - - def loss_fn(): - # Use fixed initialization to make the steps deterministic. - predict = tf.matmul(x, w) - loss = tf.compat.v1.losses.mean_squared_error( - y, predict, reduction=loss_reduction) - if loss_reduction == tf.compat.v1.losses.Reduction.SUM: - return loss - return loss / distribution.num_replicas_in_sync - - optimizer = optimizer_fn() # GradientDescent with 0.2 learning rate - - if isinstance(optimizer, optimizer_v2.OptimizerV2): - return optimizer.minimize(loss_fn, [w]) - else: - if use_callable_loss: - return optimizer.minimize(loss_fn) - else: - return optimizer.minimize(loss_fn()) - - def dataset_fn(): - features = tf.data.Dataset.from_tensors([[2.], [7.]]) - labels = tf.data.Dataset.from_tensors([[6.], [21.]]) - return tf.data.Dataset.zip((features, labels)).repeat() - - def step_fn(ctx, inputs): - del ctx # Unused - return distribution.group( - distribution.extended.call_for_each_replica( - model_fn, args=(inputs,))) - - iterator = self._get_iterator(distribution, dataset_fn) - - def run_step(): - return distribution.extended.experimental_run_steps_on_iterator( - step_fn, iterator, iterations=1).run_op - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - run_step = sess.make_callable(run_step()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - run_step() - - v = all_vars[0] - self.assertTrue(all(v is vi for vi in all_vars[1:])) - weight = numpy.squeeze(self.evaluate(v)) - # Our model is: - # predict = x * w - # loss = (predict - y)^2 - # dloss/dpredict = 2*(predict - y) - # dloss/dw = 2 * x^T @ (predict - y) - # For our batch size of 2, assuming sum loss reduction: - # x = [2, 7] - # y = [6, 21] - # w_initial = 2 - # predict = [4, 14] - # predict - y = [-2, -7] - # dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106 - # So unreplicated the update to w with lr=0.001 is -0.2 * -106 = 0.106 - # with sum loss reduction, or 0.053 with mean. - if loss_reduction == tf.compat.v1.losses.Reduction.SUM: - # Note that the "distribution.num_replicas_in_sync" factor will go away - # once we split the input across replicas, instead of pulling a complete - # batch of input per replica. - self.assertNear(weight, 2 + 0.106 * distribution.num_replicas_in_sync, - 0.0001) - else: - # One of the mean loss reductions. - self.assertNear(weight, 2 + 0.053, 0.0001) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times( - optimizer_combinations.distributions_and_v1_and_v2_optimizers(), - tf.__internal__.test.combinations.combine(mode=["graph", "eager"]), - tf.__internal__.test.combinations.combine(is_tpu=[False])) + tf.__internal__.test.combinations.combine( - distribution=[tf.__internal__.distribute.combinations.tpu_strategy], - optimizer_fn=optimizer_combinations.optimizers_v1_and_v2, - mode=["graph"], - is_tpu=[True])) - def testRunStepsWithOutputContext(self, distribution, optimizer_fn, is_tpu): - with distribution.scope(): - def dataset_fn(): - dataset = tf.data.Dataset.from_tensors([[1.]]).repeat() - # TODO(priyag): batch with drop_remainder=True causes shapes to be - # fully defined for TPU. Remove this when XLA supports dynamic shapes. - return dataset.batch(batch_size=1, drop_remainder=True) - - optimizer = optimizer_fn() - layer = core.Dense(1, use_bias=True) - - key1 = "foo" - value1 = "bar" - - def model_fn(output_context, x): - """A very simple model written by the user.""" - def loss_fn(): - y = tf.reshape(layer(x), []) - tf.constant(1.) - return y * y - - if isinstance(optimizer, optimizer_v2.OptimizerV2): - train_op = optimizer.minimize( - loss_fn, lambda: layer.trainable_variables) + def _get_iterator(self, strategy, input_fn): + iterator = strategy.make_input_fn_iterator(lambda _: input_fn()) + self.evaluate(iterator.initializer) + return iterator + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v1_optimizers(), + tf.__internal__.test.combinations.combine( + mode=["graph"], use_callable_loss=[True, False] + ) + + tf.__internal__.test.combinations.combine( + mode=["eager"], use_callable_loss=[True] + ), + ) + + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v2_optimizers(), + tf.__internal__.test.combinations.combine( + mode=["graph", "eager"], use_callable_loss=[True] + ), + ) + + tf.__internal__.test.combinations.combine( + distribution=[tf.__internal__.distribute.combinations.tpu_strategy], + optimizer_fn=optimizer_combinations.optimizers_v2, + mode=["graph"], + use_callable_loss=[True], + ) + + tf.__internal__.test.combinations.combine( + distribution=[tf.__internal__.distribute.combinations.tpu_strategy], + optimizer_fn=optimizer_combinations.optimizers_v1, + mode=["graph"], + use_callable_loss=[True, False], + ) + ) + def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss): + with distribution.scope(): + optimizer = optimizer_fn() + model_fn, dataset_fn, layer = minimize_loss_example( + optimizer, use_bias=True, use_callable_loss=use_callable_loss + ) + + def step_fn(ctx, inputs): + del ctx # Unused + return distribution.group( + distribution.extended.call_for_each_replica( + model_fn, args=(inputs,) + ) + ) + + iterator = self._get_iterator(distribution, dataset_fn) + + def run_step(): + return distribution.extended.experimental_run_steps_on_iterator( + step_fn, iterator, iterations=2 + ).run_op + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + weights, biases = [], [] + for _ in range(5): + run_step() + weights.append(self.evaluate(layer.kernel)) + biases.append(self.evaluate(layer.bias)) + + error = abs( + numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1 + ) + is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) + self.assertTrue(is_not_increasing) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v1_optimizers(), + tf.__internal__.test.combinations.combine( + mode=["graph"], use_callable_loss=[True, False] + ) + + tf.__internal__.test.combinations.combine( + mode=["eager"], use_callable_loss=[True] + ), + ) + + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v2_optimizers(), + tf.__internal__.test.combinations.combine( + mode=["graph", "eager"], use_callable_loss=[True] + ), + ) + ) + def testTrainNetworkByCallForEachReplica( + self, distribution, optimizer_fn, use_callable_loss + ): + with distribution.scope(): + optimizer = optimizer_fn() + model_fn, dataset_fn, layer = minimize_loss_example( + optimizer, use_bias=True, use_callable_loss=use_callable_loss + ) + + iterator = self._get_iterator(distribution, dataset_fn) + + def run_step(): + return distribution.group( + distribution.extended.call_for_each_replica( + model_fn, args=(iterator.get_next(),) + ) + ) + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + weights, biases = [], [] + for _ in range(10): + run_step() + + weights.append(self.evaluate(layer.kernel)) + biases.append(self.evaluate(layer.bias)) + + error = abs( + numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1 + ) + is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) + self.assertTrue(is_not_increasing) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v1_and_v2_optimizers(), + tf.__internal__.test.combinations.combine(mode=["graph", "eager"]), + ) + + tf.__internal__.test.combinations.combine( + distribution=[tf.__internal__.distribute.combinations.tpu_strategy], + optimizer_fn=optimizer_combinations.optimizers_v1_and_v2, + mode=["graph"], + ) + ) + def testOptimizerInsideModelFn(self, distribution, optimizer_fn): + if ( + not tf.executing_eagerly() + and tf.compat.v1.control_flow_v2_enabled() + ): + self.skipTest("b/138751864") + created_variables = [] + trainable_variables = [] + + def appending_creator(next_creator, **kwargs): + v = next_creator(**kwargs) + # Skip the StateVar created in the tf.random.Generator, which is + # used by keras initializers. + if "StateVar" in v.name: + return v + created_variables.append(v.name) + if "trainable" in kwargs and kwargs["trainable"]: + trainable_variables.append(v.name) + return v + + # Creator scope needs to be set before it's used inside + # `distribution.scope`. + with tf.variable_creator_scope(appending_creator), distribution.scope(): + optimizer = optimizer_fn() + model_fn, dataset_fn, _ = minimize_loss_example( + optimizer, use_bias=True, use_callable_loss=True + ) + + def step_fn(ctx, inputs): + del ctx # Unused + return distribution.group( + distribution.extended.call_for_each_replica( + model_fn, args=(inputs,) + ) + ) + + iterator = self._get_iterator(distribution, dataset_fn) + + def run_step(): + return distribution.extended.experimental_run_steps_on_iterator( + step_fn, iterator, iterations=1 + ).run_op + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + run_step() + + def get_expected_variables(num_parameter_devices): + name = optimizer._name + + if isinstance(optimizer, optimizer_v2.OptimizerV2): + variables = VAR_MAP_V2[name] + else: + variables = VAR_MAP_V1[name] + + extended_variables = [ + v + f"/replica_{replica}" + for v in variables + for replica in range(1, num_parameter_devices) + ] + variables = list(variables) + extended_variables + return set(v + ":0" for v in variables) + + self.assertEqual( + get_expected_variables( + len(distribution.extended.parameter_devices) + ), + set(created_variables), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + momentum=[0.8, 0.9, 0.99], renorm=[False, True] + ), + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v1_and_v2_optimizers(), + tf.__internal__.test.combinations.combine( + mode=["graph", "eager"], + # TODO(isaprykin): Allow False here. Currently subsequent + # replicas will re-execute UPDATE_OPS of previous replicas. + update_ops_in_cross_replica_mode=[True], + ), + ) + + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.tpu_strategy + ], + optimizer_fn=optimizer_combinations.optimizers_v1_and_v2, + mode=["graph"], + update_ops_in_cross_replica_mode=[False], + ), + ) + ) + def testTrainNetworkWithBatchNorm( + self, + distribution, + optimizer_fn, + momentum, + renorm, + update_ops_in_cross_replica_mode, + ): + """Verifies that moving mean updates are reduced across replicas.""" + with distribution.scope(): + num_replicas = distribution.num_replicas_in_sync + model_fn, dataset_fn, batchnorm = batchnorm_example( + optimizer_fn, + batch_per_epoch=num_replicas, + momentum=momentum, + renorm=renorm, + update_ops_in_replica_mode=not update_ops_in_cross_replica_mode, + ) + + def step_fn(ctx, inputs): + del ctx # Unused + fetches = distribution.experimental_local_results( + distribution.extended.call_for_each_replica( + model_fn, args=(inputs,) + ) + ) + if update_ops_in_cross_replica_mode: + fetches += tuple( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.UPDATE_OPS + ) + ) + return tf.group(fetches) + + iterator = self._get_iterator(distribution, dataset_fn) + + def run_step(): + return distribution.extended.experimental_run_steps_on_iterator( + step_fn, iterator, iterations=1 + ).run_op + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + expected_moving_means = [0.0] * 8 + + def averaged_batch_mean(i): + # Each batch has shape [16, 8] where the ith element in jth list + # is (8 * j + i + replica_id * 100). So the batch mean in each + # replica is (60 + i + replica_id * 100). So here comes its + # batch mean over all replicas: + return 60.0 + i + (num_replicas - 1.0) / 2.0 * 100.0 + + for _ in range(10): + run_step() + moving_means = self.evaluate(batchnorm.moving_mean) + + # We make sure that the moving_mean is updated as if the sample + # mean is calculated over all replicas. + for i, expected_moving_mean in enumerate(expected_moving_means): + expected_moving_means[i] -= ( + expected_moving_mean - averaged_batch_mean(i) + ) * (1.0 - momentum) + self.assertNear( + expected_moving_means[i], moving_means[i], 0.0001 + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + loss_reduction=[ + tf.compat.v1.losses.Reduction.SUM, + tf.compat.v1.losses.Reduction.MEAN, + tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE, + tf.compat.v1.losses.Reduction.SUM_OVER_NONZERO_WEIGHTS, + ] + ), + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus_no_merge_call, # noqa: E501 + ] + ), + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + optimizer_fn=optimizer_combinations.gradient_descent_optimizer_v1_fn # noqa: E501 + ), + tf.__internal__.test.combinations.combine( + mode=["graph"], use_callable_loss=[True, False] + ) + + tf.__internal__.test.combinations.combine( + mode=["eager"], use_callable_loss=[True] + ), + ) + + tf.__internal__.test.combinations.times( + tf.__internal__.test.combinations.combine( + optimizer_fn=optimizer_combinations.gradient_descent_optimizer_keras_v2_fn # noqa: E501 + ), + tf.__internal__.test.combinations.combine( + mode=["graph", "eager"], use_callable_loss=[True] + ), + ), + ) + + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.tpu_strategy + ], + optimizer_fn=optimizer_combinations.gradient_descent_optimizer_v1_fn, # noqa: E501 + mode=["graph"], + use_callable_loss=[True, False], + ) + + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.tpu_strategy + ], + optimizer_fn=optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, # noqa: E501 + mode=["graph"], + use_callable_loss=[True], + ), + ) + ) + def testMeanVsSum( + self, distribution, optimizer_fn, loss_reduction, use_callable_loss + ): + with distribution.scope(): + all_vars = [] + + def model_fn(inputs): + x, y = inputs + w = tf.compat.v1.get_variable("w", initializer=[[2.0]]) + all_vars.append(w) + + def loss_fn(): + # Use fixed initialization to make the steps deterministic. + predict = tf.matmul(x, w) + loss = tf.compat.v1.losses.mean_squared_error( + y, predict, reduction=loss_reduction + ) + if loss_reduction == tf.compat.v1.losses.Reduction.SUM: + return loss + return loss / distribution.num_replicas_in_sync + + optimizer = ( + optimizer_fn() + ) # GradientDescent with 0.2 learning rate + + if isinstance(optimizer, optimizer_v2.OptimizerV2): + return optimizer.minimize(loss_fn, [w]) + else: + if use_callable_loss: + return optimizer.minimize(loss_fn) + else: + return optimizer.minimize(loss_fn()) + + def dataset_fn(): + features = tf.data.Dataset.from_tensors([[2.0], [7.0]]) + labels = tf.data.Dataset.from_tensors([[6.0], [21.0]]) + return tf.data.Dataset.zip((features, labels)).repeat() + + def step_fn(ctx, inputs): + del ctx # Unused + return distribution.group( + distribution.extended.call_for_each_replica( + model_fn, args=(inputs,) + ) + ) + + iterator = self._get_iterator(distribution, dataset_fn) + + def run_step(): + return distribution.extended.experimental_run_steps_on_iterator( + step_fn, iterator, iterations=1 + ).run_op + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + run_step() + + v = all_vars[0] + self.assertTrue(all(v is vi for vi in all_vars[1:])) + weight = numpy.squeeze(self.evaluate(v)) + # Our model is: + # predict = x * w + # loss = (predict - y)^2 + # dloss/dpredict = 2*(predict - y) + # dloss/dw = 2 * x^T @ (predict - y) + # For our batch size of 2, assuming sum loss reduction: + # x = [2, 7] + # y = [6, 21] + # w_initial = 2 + # predict = [4, 14] + # predict - y = [-2, -7] + # dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106 + # So unreplicated the update to w with lr=0.001 is -0.2 * -106 = + # 0.106 with sum loss reduction, or 0.053 with mean. + if loss_reduction == tf.compat.v1.losses.Reduction.SUM: + # Note that the "distribution.num_replicas_in_sync" factor will + # go away once we split the input across replicas, instead of + # pulling a complete batch of input per replica. + self.assertNear( + weight, + 2 + 0.106 * distribution.num_replicas_in_sync, + 0.0001, + ) + else: + # One of the mean loss reductions. + self.assertNear(weight, 2 + 0.053, 0.0001) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + optimizer_combinations.distributions_and_v1_and_v2_optimizers(), + tf.__internal__.test.combinations.combine(mode=["graph", "eager"]), + tf.__internal__.test.combinations.combine(is_tpu=[False]), + ) + + tf.__internal__.test.combinations.combine( + distribution=[tf.__internal__.distribute.combinations.tpu_strategy], + optimizer_fn=optimizer_combinations.optimizers_v1_and_v2, + mode=["graph"], + is_tpu=[True], + ) + ) + def testRunStepsWithOutputContext(self, distribution, optimizer_fn, is_tpu): + with distribution.scope(): + + def dataset_fn(): + dataset = tf.data.Dataset.from_tensors([[1.0]]).repeat() + # TODO(priyag): batch with drop_remainder=True causes shapes to + # be fully defined for TPU. Remove this when XLA supports + # dynamic shapes. + return dataset.batch(batch_size=1, drop_remainder=True) + + optimizer = optimizer_fn() + layer = core.Dense(1, use_bias=True) + + key1 = "foo" + value1 = "bar" + + def model_fn(output_context, x): + """A very simple model written by the user.""" + + def loss_fn(): + y = tf.reshape(layer(x), []) - tf.constant(1.0) + return y * y + + if isinstance(optimizer, optimizer_v2.OptimizerV2): + train_op = optimizer.minimize( + loss_fn, lambda: layer.trainable_variables + ) + else: + train_op = optimizer.minimize(loss_fn) + loss = loss_fn() + output_context.set_last_step_output( + name="replica_loss_reduced", + output=loss, + reduce_op=tf.distribute.ReduceOp.MEAN, + ) + output_context.set_non_tensor_output(key1, value1) + return (train_op, loss) + + def step_fn(output_context, inputs): + (train_op, loss) = distribution.extended.call_for_each_replica( + model_fn, args=(output_context, inputs) + ) + output_context.set_last_step_output( + name="cross_replica_loss_reduced", + output=loss, + reduce_op=tf.distribute.ReduceOp.MEAN, + ) + output_context.set_last_step_output( + name="cross_replica_loss_not_reduced", output=loss + ) + return distribution.group(train_op) + + iterator = self._get_iterator(distribution, dataset_fn) + + def run_step(): + initial_loss = lambda: tf.constant(1e7) + # Initial values corresponding to reduced losses are just single + # tensors. But for non reduced losses, we need to have initial + # values that are of the same structure as non reduced losses. + # In MirroredStrategy, this will be a list of losses, in + # TPUStrategy it will be single tensor. Using + # `call_for_each_replica` followed by + # `experimental_local_results` gives us the desired initial + # value structure. + not_reduced = distribution.experimental_local_results( + distribution.extended.call_for_each_replica(initial_loss) + ) + initial_loop_values = { + "replica_loss_reduced": initial_loss(), + "cross_replica_loss_reduced": initial_loss(), + "cross_replica_loss_not_reduced": not_reduced, + } + ctx = distribution.extended.experimental_run_steps_on_iterator( + step_fn, + iterator, + iterations=2, + initial_loop_values=initial_loop_values, + ) + + self.assertEqual({key1: (value1,)}, ctx.non_tensor_outputs) + self._verify_loss_output( + initial_loss(), + loss_output=ctx.last_step_outputs["replica_loss_reduced"], + reduced=True, + distribution=distribution, + ) + self._verify_loss_output( + initial_loss(), + loss_output=ctx.last_step_outputs[ + "cross_replica_loss_reduced" + ], + reduced=True, + distribution=distribution, + ) + self._verify_loss_output( + initial_loss(), + loss_output=ctx.last_step_outputs[ + "cross_replica_loss_not_reduced" + ], + reduced=False, + distribution=distribution, + ) + return ( + ctx.run_op, + ctx.last_step_outputs["replica_loss_reduced"], + ) + + if not tf.executing_eagerly(): + with self.cached_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + weights, biases = [], [] + for _ in range(5): + run_step() + weights.append(self.evaluate(layer.kernel)) + biases.append(self.evaluate(layer.bias)) + + error = abs( + numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1 + ) + error_is_not_increasing = all( + y <= x for x, y in zip(error, error[1:]) + ) + self.assertTrue(error_is_not_increasing) + + def _verify_loss_output( + self, initial_loss, loss_output, reduced, distribution + ): + if not reduced: + self.assertLen( + distribution.experimental_local_results(loss_output), + distribution.num_replicas_in_sync, + ) + loss_tensor = distribution.reduce( + tf.distribute.ReduceOp.MEAN, loss_output, axis=None + ) else: - train_op = optimizer.minimize(loss_fn) - loss = loss_fn() - output_context.set_last_step_output( - name="replica_loss_reduced", - output=loss, - reduce_op=tf.distribute.ReduceOp.MEAN) - output_context.set_non_tensor_output(key1, value1) - return (train_op, loss) - - def step_fn(output_context, inputs): - (train_op, loss) = distribution.extended.call_for_each_replica( - model_fn, args=(output_context, inputs)) - output_context.set_last_step_output( - name="cross_replica_loss_reduced", - output=loss, - reduce_op=tf.distribute.ReduceOp.MEAN) - output_context.set_last_step_output( - name="cross_replica_loss_not_reduced", - output=loss) - return distribution.group(train_op) - - iterator = self._get_iterator(distribution, dataset_fn) - - def run_step(): - initial_loss = lambda: tf.constant(1e7) - # Initial values corresponding to reduced losses are just single - # tensors. But for non reduced losses, we need to have initial - # values that are of the same structure as non reduced losses. In - # MirroredStrategy, this will be a list of losses, in TPUStrategy - # it will be single tensor. Using `call_for_each_replica` followed - # by `experimental_local_results` gives us the desired initial - # value structure. - not_reduced = distribution.experimental_local_results( - distribution.extended.call_for_each_replica(initial_loss)) - initial_loop_values = { - "replica_loss_reduced": initial_loss(), - "cross_replica_loss_reduced": initial_loss(), - "cross_replica_loss_not_reduced": not_reduced, - } - ctx = distribution.extended.experimental_run_steps_on_iterator( - step_fn, iterator, iterations=2, - initial_loop_values=initial_loop_values) - - self.assertEqual({key1: (value1,)}, ctx.non_tensor_outputs) - self._verify_loss_output( - initial_loss(), - loss_output=ctx.last_step_outputs["replica_loss_reduced"], - reduced=True, distribution=distribution) - self._verify_loss_output( - initial_loss(), - loss_output=ctx.last_step_outputs["cross_replica_loss_reduced"], - reduced=True, distribution=distribution) - self._verify_loss_output( - initial_loss(), - loss_output=ctx.last_step_outputs["cross_replica_loss_not_reduced"], - reduced=False, distribution=distribution) - return (ctx.run_op, ctx.last_step_outputs["replica_loss_reduced"]) - - if not tf.executing_eagerly(): - with self.cached_session() as sess: - run_step = sess.make_callable(run_step()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - weights, biases = [], [] - for _ in range(5): - run_step() - weights.append(self.evaluate(layer.kernel)) - biases.append(self.evaluate(layer.bias)) - - error = abs( - numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) - error_is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) - self.assertTrue(error_is_not_increasing) - - def _verify_loss_output(self, initial_loss, loss_output, reduced, - distribution): - if not reduced: - self.assertLen(distribution.experimental_local_results(loss_output), - distribution.num_replicas_in_sync) - loss_tensor = distribution.reduce(tf.distribute.ReduceOp.MEAN, loss_output, - axis=None) - else: - unwrapped_output = distribution.experimental_local_results(loss_output) - self.assertLen(unwrapped_output, 1) - loss_tensor = unwrapped_output[0] - self.assertEqual(initial_loss.dtype, loss_tensor.dtype) - self.assertEqual(initial_loss.shape, loss_tensor.shape) - - @tf.__internal__.distribute.combinations.generate( - optimizer_combinations.distributions_and_v2_optimizers()) - def test_empty_var_list(self, distribution, optimizer_fn): - opt = optimizer_fn() - with distribution.scope(): - - def run_fn(): - opt.minimize(lambda: tf.constant(1.), []) - opt.apply_gradients([]) - - distribution.run(run_fn) + unwrapped_output = distribution.experimental_local_results( + loss_output + ) + self.assertLen(unwrapped_output, 1) + loss_tensor = unwrapped_output[0] + self.assertEqual(initial_loss.dtype, loss_tensor.dtype) + self.assertEqual(initial_loss.shape, loss_tensor.shape) + + @tf.__internal__.distribute.combinations.generate( + optimizer_combinations.distributions_and_v2_optimizers() + ) + def test_empty_var_list(self, distribution, optimizer_fn): + opt = optimizer_fn() + with distribution.scope(): + + def run_fn(): + opt.minimize(lambda: tf.constant(1.0), []) + opt.apply_gradients([]) + + distribution.run(run_fn) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/distribute/mirrored_strategy_test.py b/keras/distribute/mirrored_strategy_test.py index 47e4105e5c87..2f482f5ccbed 100644 --- a/keras/distribute/mirrored_strategy_test.py +++ b/keras/distribute/mirrored_strategy_test.py @@ -14,119 +14,135 @@ # ============================================================================== """Tests for MirroredStrategy.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras -from tensorflow.python.eager import backprop from keras.engine import training as keras_training from keras.layers import core as keras_core -from keras.optimizers.optimizer_v2 import rmsprop +from keras.optimizers.legacy import rmsprop from keras.utils import kpl_test_utils -from tensorflow.python.training import optimizer as optimizer_lib + +# isort: off +from tensorflow.python.eager import backprop +from tensorflow.python.training import ( + optimizer as optimizer_lib, +) class MiniModel(keras_training.Model): - """Minimal model for mnist. + """Minimal model for mnist. - Useful for testing and debugging on slow TPU simulators. - """ + Useful for testing and debugging on slow TPU simulators. + """ - def __init__(self): - super().__init__(name="") - self.fc = keras_core.Dense(1, name="fc", kernel_initializer="ones", - bias_initializer="ones") + def __init__(self): + super().__init__(name="") + self.fc = keras_core.Dense( + 1, name="fc", kernel_initializer="ones", bias_initializer="ones" + ) - def call(self, inputs, training=True): - inputs = tf.ones([1, 10]) - return self.fc(inputs) + def call(self, inputs, training=True): + inputs = tf.ones([1, 10]) + return self.fc(inputs) @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( distribution=[ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 ], - mode=["eager"])) + mode=["eager"], + ) +) class MirroredStrategyDefunTest(tf.test.TestCase, parameterized.TestCase): - - def testTrain(self, distribution): - with distribution.scope(): - mock_model = MiniModel() - mock_model.call = tf.function(mock_model.call) - - def loss_fn(ctx): - del ctx - return mock_model(tf.ones([1, 10])) - - gradients_fn = backprop.implicit_grad(loss_fn) - gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn) - grads_and_vars = distribution.extended.call_for_each_replica( - gradients_fn, args=(None,)) - - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.25) - update_ops = optimizer._distributed_apply(distribution, grads_and_vars) # pylint: disable=protected-access - - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(update_ops) - - updated_var_values = self.evaluate(mock_model.variables) - # All variables start at 1.0 and get two updates of 0.25. - self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0]) - self.assertAllEqual([0.5], updated_var_values[1]) - - def testTrainAndServeWithKPL(self, distribution): - use_adapt = False - test_utils_obj = kpl_test_utils.DistributeKplTestUtils() - with distribution.scope(): - feature_mapper, label_mapper = test_utils_obj.define_kpls_for_training( - use_adapt) - model = test_utils_obj.define_model() - optimizer = rmsprop.RMSprop(learning_rate=0.1) - accuracy = keras.metrics.Accuracy() - - def dataset_fn(_): - return test_utils_obj.dataset_fn(feature_mapper, label_mapper) - - @tf.function - def train_step(iterator): - """The step function for one training step.""" - - def step_fn(inputs): - """The computation to run on each replica(GPU).""" - features, labels = inputs - with tf.GradientTape() as tape: - pred = model(features, training=True) - loss = keras.losses.binary_crossentropy(labels, pred) - loss = tf.nn.compute_average_loss(loss) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(list(zip(grads, model.trainable_variables))) - - actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64) - accuracy.update_state(labels, actual_pred) - - distribution.run(step_fn, args=(next(iterator),)) - - distributed_dataset = distribution.distribute_datasets_from_function( - dataset_fn) - distributed_iterator = iter(distributed_dataset) - num_epochs = 4 - num_steps = 7 - for _ in range(num_epochs): - accuracy.reset_state() - for _ in range(num_steps): - train_step(distributed_iterator) - - self.assertGreater(accuracy.result().numpy(), 0.5) - self.assertEqual(optimizer.iterations.numpy(), num_epochs * num_steps) - - # Test save/load/serving the trained model. - test_utils_obj.test_save_load_serving_model( - model, feature_mapper, test_utils_obj.define_reverse_lookup_layer()) + def testTrain(self, distribution): + with distribution.scope(): + mock_model = MiniModel() + mock_model.call = tf.function(mock_model.call) + + def loss_fn(ctx): + del ctx + return mock_model(tf.ones([1, 10])) + + gradients_fn = backprop.implicit_grad(loss_fn) + gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn) + grads_and_vars = distribution.extended.call_for_each_replica( + gradients_fn, args=(None,) + ) + + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.25) + update_ops = optimizer._distributed_apply( + distribution, grads_and_vars + ) + + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(update_ops) + + updated_var_values = self.evaluate(mock_model.variables) + # All variables start at 1.0 and get two updates of 0.25. + self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0]) + self.assertAllEqual([0.5], updated_var_values[1]) + + def testTrainAndServeWithKPL(self, distribution): + use_adapt = False + test_utils_obj = kpl_test_utils.DistributeKplTestUtils() + with distribution.scope(): + ( + feature_mapper, + label_mapper, + ) = test_utils_obj.define_kpls_for_training(use_adapt) + model = test_utils_obj.define_model() + optimizer = rmsprop.RMSprop(learning_rate=0.1) + accuracy = keras.metrics.Accuracy() + + def dataset_fn(_): + return test_utils_obj.dataset_fn(feature_mapper, label_mapper) + + @tf.function + def train_step(iterator): + """The step function for one training step.""" + + def step_fn(inputs): + """The computation to run on each replica(GPU).""" + features, labels = inputs + with tf.GradientTape() as tape: + pred = model(features, training=True) + loss = keras.losses.binary_crossentropy(labels, pred) + loss = tf.nn.compute_average_loss(loss) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + list(zip(grads, model.trainable_variables)) + ) + + actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64) + accuracy.update_state(labels, actual_pred) + + distribution.run(step_fn, args=(next(iterator),)) + + distributed_dataset = ( + distribution.distribute_datasets_from_function(dataset_fn) + ) + distributed_iterator = iter(distributed_dataset) + num_epochs = 4 + num_steps = 7 + for _ in range(num_epochs): + accuracy.reset_state() + for _ in range(num_steps): + train_step(distributed_iterator) + + self.assertGreater(accuracy.result().numpy(), 0.5) + self.assertEqual( + optimizer.iterations.numpy(), num_epochs * num_steps + ) + + # Test save/load/serving the trained model. + test_utils_obj.test_save_load_serving_model( + model, feature_mapper, test_utils_obj.define_reverse_lookup_layer() + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/distribute/mirrored_variable_test.py b/keras/distribute/mirrored_variable_test.py index 9f247031d209..fc7cdb566f61 100644 --- a/keras/distribute/mirrored_variable_test.py +++ b/keras/distribute/mirrored_variable_test.py @@ -15,96 +15,115 @@ """Test MirroredVariable in MirroredStrategy and MultiWorkerMirroredStrategy.""" import tensorflow.compat.v2 as tf + from keras.distribute import distributed_training_utils from keras.layers import core def _mimic_two_cpus(): - try: - cpus = tf.config.list_physical_devices("CPU") - except tf.errors.NotFoundError: - # Testing device not available. Skip the test. - return False - - tf.config.set_logical_device_configuration(cpus[0], [ - tf.config.LogicalDeviceConfiguration(), - tf.config.LogicalDeviceConfiguration(), - ]) - return True + try: + cpus = tf.config.list_physical_devices("CPU") + except tf.errors.NotFoundError: + # Testing device not available. Skip the test. + return False + + tf.config.set_logical_device_configuration( + cpus[0], + [ + tf.config.LogicalDeviceConfiguration(), + tf.config.LogicalDeviceConfiguration(), + ], + ) + return True def get_strategy_with_mimicing_cpus(): - if not _mimic_two_cpus(): - return None - return (tf.distribute.MultiWorkerMirroredStrategy - ._from_local_devices(("/device:CPU:0", "/device:CPU:1"))) + if not _mimic_two_cpus(): + return None + return tf.distribute.MultiWorkerMirroredStrategy._from_local_devices( + ("/device:CPU:0", "/device:CPU:1") + ) @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( distribution=list( - filter(None.__ne__, [ - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - get_strategy_with_mimicing_cpus() - ])), - mode=["graph", "eager"])) + filter( + None.__ne__, + [ + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + get_strategy_with_mimicing_cpus(), + ], + ) + ), + mode=["graph", "eager"], + ) +) class MirroredVariableCreationTest(tf.test.TestCase): - """Base class that tests mirrored variable creator. - - Currently it assumes all strategy objects have two replicas. - """ - - @classmethod - def setUpClass(cls): - _mimic_two_cpus() - - def assertAllDifferent(self, objs): - for i in range(len(objs)): - for j in range(len(objs)): - if i == j: - continue - self.assertIsNot(objs[i], objs[j]) - - def _is_mirrored(self, val): - if distributed_training_utils.is_distributed_variable(val): - if val._policy: # pylint: disable=protected-access - return val._policy._is_mirrored() # pylint: disable=protected-access - # Since `Mirrored` is a private symbol in tf.distribute, we're checking - # with `DistributedValues` as an approximation. - return isinstance(val, tf.distribute.DistributedValues) - - def testWithLayers(self, distribution): - - def model_fn(features): - - layer1 = core.Dense(1) - layer1(features) - layer2 = core.Dense(1) - layer2(features) - # We rely on names and orders to make sure replica references the same - # MirroredVariable. Uniquifying names may involve global states, - # merge_call switches threads so we need to test things work after - # merge_call. - tf.distribute.get_replica_context().merge_call(lambda _: _) - layer3 = core.Dense(1) - layer3(features) - return [(layer1.kernel, layer1.bias), (layer2.kernel, layer2.bias), - (layer3.kernel, layer3.bias)] - - iterator = distribution.make_input_fn_iterator( - lambda _: tf.data.Dataset.from_tensors([[1.]]).repeat(10)) - self.evaluate(iterator.initializer) - features = iterator.get_next() - - with distribution.scope(): - result = distribution.extended.call_for_each_replica( - model_fn, args=(features,)) - for kernel, bias in result: - self.assertTrue(self._is_mirrored(kernel)) - self.assertAllDifferent(distribution.experimental_local_results(kernel)) - self.assertTrue(self._is_mirrored(bias)) - self.assertAllDifferent(distribution.experimental_local_results(kernel)) + """Base class that tests mirrored variable creator. + + Currently it assumes all strategy objects have two replicas. + """ + + @classmethod + def setUpClass(cls): + _mimic_two_cpus() + + def assertAllDifferent(self, objs): + for i in range(len(objs)): + for j in range(len(objs)): + if i == j: + continue + self.assertIsNot(objs[i], objs[j]) + + def _is_mirrored(self, val): + if distributed_training_utils.is_distributed_variable(val): + if val._policy: + return val._policy._is_mirrored() + # Since `Mirrored` is a private symbol in tf.distribute, we're checking + # with `DistributedValues` as an approximation. + return isinstance(val, tf.distribute.DistributedValues) + + def testWithLayers(self, distribution): + def model_fn(features): + + layer1 = core.Dense(1) + layer1(features) + layer2 = core.Dense(1) + layer2(features) + # We rely on names and orders to make sure replica references the + # same MirroredVariable. Uniquifying names may involve global + # states, merge_call switches threads so we need to test things work + # after merge_call. + tf.distribute.get_replica_context().merge_call(lambda _: _) + layer3 = core.Dense(1) + layer3(features) + return [ + (layer1.kernel, layer1.bias), + (layer2.kernel, layer2.bias), + (layer3.kernel, layer3.bias), + ] + + iterator = distribution.make_input_fn_iterator( + lambda _: tf.data.Dataset.from_tensors([[1.0]]).repeat(10) + ) + self.evaluate(iterator.initializer) + features = iterator.get_next() + + with distribution.scope(): + result = distribution.extended.call_for_each_replica( + model_fn, args=(features,) + ) + for kernel, bias in result: + self.assertTrue(self._is_mirrored(kernel)) + self.assertAllDifferent( + distribution.experimental_local_results(kernel) + ) + self.assertTrue(self._is_mirrored(bias)) + self.assertAllDifferent( + distribution.experimental_local_results(kernel) + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/distribute/model_checkpoint_test.py b/keras/distribute/model_checkpoint_test.py new file mode 100644 index 000000000000..a2d75cc5d0ab --- /dev/null +++ b/keras/distribute/model_checkpoint_test.py @@ -0,0 +1,60 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests of ModelCheckpoint callback.""" + +import os +import sys + +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras import callbacks +from keras.distribute import multi_worker_testing_utils + + +class ModelCheckpointTest(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + file_format=["h5", "tf"], + save_weights_only=[True, False], + ) + ) + def testCheckpointExists(self, file_format, save_weights_only): + train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset(64, 2) + model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) + saving_dir = self.get_temp_dir() + saving_filepath = os.path.join(saving_dir, "checkpoint." + file_format) + callbacks_list = [ + callbacks.ModelCheckpoint( + filepath=saving_filepath, save_weights_only=save_weights_only + ) + ] + self.assertFalse(tf.io.gfile.exists(saving_filepath)) + model.fit( + x=train_ds, epochs=2, steps_per_epoch=2, callbacks=callbacks_list + ) + tf_saved_model_exists = tf.io.gfile.exists(saving_filepath) + tf_weights_only_checkpoint_exists = tf.io.gfile.exists( + saving_filepath + ".index" + ) + self.assertTrue( + tf_saved_model_exists or tf_weights_only_checkpoint_exists + ) + + +if __name__ == "__main__": + with tf.compat.v1.test.mock.patch.object(sys, "exit", os._exit): + tf.test.main() diff --git a/keras/distribute/model_collection_base.py b/keras/distribute/model_collection_base.py index 75e0d4ccdf1d..16dea694b528 100644 --- a/keras/distribute/model_collection_base.py +++ b/keras/distribute/model_collection_base.py @@ -16,27 +16,27 @@ class ModelAndInput: - """Base class to provide model and its corresponding inputs.""" + """Base class to provide model and its corresponding inputs.""" - def get_model(self): - """Returns a compiled keras model object, together with output name. + def get_model(self): + """Returns a compiled keras model object, together with output name. - Returns: - model: a keras model object - output_name: a string for the name of the output layer - """ - raise NotImplementedError("must be implemented in descendants") + Returns: + model: a keras model object + output_name: a string for the name of the output layer + """ + raise NotImplementedError("must be implemented in descendants") - def get_data(self): - """Returns data for training and predicting. + def get_data(self): + """Returns data for training and predicting. - Returns: - x_train: data used for training - y_train: label used for training - x_predict: data used for predicting - """ - raise NotImplementedError("must be implemented in descendants") + Returns: + x_train: data used for training + y_train: label used for training + x_predict: data used for predicting + """ + raise NotImplementedError("must be implemented in descendants") - def get_batch_size(self): - """Returns the batch_size used by the model.""" - raise NotImplementedError("must be implemented in descendants") + def get_batch_size(self): + """Returns the batch_size used by the model.""" + raise NotImplementedError("must be implemented in descendants") diff --git a/keras/distribute/model_combinations.py b/keras/distribute/model_combinations.py index f4f5602b2719..0349cad552eb 100644 --- a/keras/distribute/model_combinations.py +++ b/keras/distribute/model_combinations.py @@ -15,16 +15,21 @@ """Strategy and optimizer combinations for combinations.combine().""" import tensorflow.compat.v2 as tf + from keras.distribute import simple_models simple_functional_model = tf.__internal__.test.combinations.NamedObject( - "SimpleFunctionalModel", simple_models.SimpleFunctionalModel()) + "SimpleFunctionalModel", simple_models.SimpleFunctionalModel() +) simple_sequential_model = tf.__internal__.test.combinations.NamedObject( - "SimpleSequentialModel", simple_models.SimpleSequentialModel()) + "SimpleSequentialModel", simple_models.SimpleSequentialModel() +) simple_subclass_model = tf.__internal__.test.combinations.NamedObject( - "SimpleSubclassModel", simple_models.SimpleSubclassModel()) + "SimpleSubclassModel", simple_models.SimpleSubclassModel() +) simple_tfmodule_model = tf.__internal__.test.combinations.NamedObject( - "SimpleTFModuleModel", simple_models.SimpleTFModuleModel()) + "SimpleTFModuleModel", simple_models.SimpleTFModuleModel() +) diff --git a/keras/distribute/multi_worker_callback_tf2_test.py b/keras/distribute/multi_worker_callback_tf2_test.py index 24cc90076b5e..69043d6bd824 100644 --- a/keras/distribute/multi_worker_callback_tf2_test.py +++ b/keras/distribute/multi_worker_callback_tf2_test.py @@ -14,390 +14,464 @@ # ============================================================================== """Tests for Keras callbacks in multi-worker training with TF2.""" -import tensorflow.compat.v2 as tf - import json import os +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import callbacks from keras.distribute import distributed_file_utils from keras.distribute import multi_worker_testing_utils def checkpoint_exists(filepath): - """Returns whether the checkpoint `filepath` refers to exists.""" - if filepath.endswith('.h5'): - return tf.io.gfile.exists(filepath) - tf_saved_model_exists = tf.io.gfile.exists(filepath) - tf_weights_only_checkpoint_exists = tf.io.gfile.exists( - filepath + '.index') - return tf_saved_model_exists or tf_weights_only_checkpoint_exists + """Returns whether the checkpoint `filepath` refers to exists.""" + if filepath.endswith(".h5"): + return tf.io.gfile.exists(filepath) + tf_saved_model_exists = tf.io.gfile.exists(filepath) + tf_weights_only_checkpoint_exists = tf.io.gfile.exists(filepath + ".index") + return tf_saved_model_exists or tf_weights_only_checkpoint_exists def _model_setup(test_obj, file_format): - """Set up a MNIST Keras model for testing purposes. - - This function builds a MNIST Keras model and returns relevant information - for testing. - - Args: - test_obj: The `TestCase` testing object. - file_format: File format for checkpoints. 'tf' or 'h5'. - - Returns: - A tuple of (model, saving_filepath, train_ds, steps) where train_ds is - the training dataset. - """ - batch_size = 64 - steps = 2 - with tf.distribute.MultiWorkerMirroredStrategy().scope(): - # TODO(b/142509827): In rare cases this errors out at C++ level with the - # "Connect failed" error message. - train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( - batch_size, steps) - model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) - # Pass saving_filepath from the parent thread to ensure every worker has the - # same filepath to save. - saving_filepath = os.path.join(test_obj.get_temp_dir(), - 'checkpoint.' + file_format) - return model, saving_filepath, train_ds, steps + """Set up a MNIST Keras model for testing purposes. + + This function builds a MNIST Keras model and returns relevant information + for testing. + + Args: + test_obj: The `TestCase` testing object. + file_format: File format for checkpoints. 'tf' or 'h5'. + + Returns: + A tuple of (model, saving_filepath, train_ds, steps) where train_ds is + the training dataset. + """ + batch_size = 64 + steps = 2 + with tf.distribute.MultiWorkerMirroredStrategy().scope(): + # TODO(b/142509827): In rare cases this errors out at C++ level with the + # "Connect failed" error message. + train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( + batch_size, steps + ) + model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) + # Pass saving_filepath from the parent thread to ensure every worker has the + # same filepath to save. + saving_filepath = os.path.join( + test_obj.get_temp_dir(), "checkpoint." + file_format + ) + return model, saving_filepath, train_ds, steps def get_tf_config_task(): - return json.loads(os.environ['TF_CONFIG'])['task'] + return json.loads(os.environ["TF_CONFIG"])["task"] def get_tf_config_cluster_spec(): - return json.loads(os.environ['TF_CONFIG'])['cluster'] + return json.loads(os.environ["TF_CONFIG"])["cluster"] def get_task_type(): - return get_tf_config_task()['type'] + return get_tf_config_task()["type"] def get_task_index(): - return get_tf_config_task()['index'] + return get_tf_config_task()["index"] def is_chief(): - return ('chief' not in get_tf_config_cluster_spec() and - get_task_type() == 'worker' and get_task_index() == 0) + return ( + "chief" not in get_tf_config_cluster_spec() + and get_task_type() == "worker" + and get_task_index() == 0 + ) class KerasCallbackMultiProcessTest(parameterized.TestCase, tf.test.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - file_format=['h5', 'tf'], - save_weights_only=[True, False])) - def test_model_checkpoint_saves_on_chief_but_not_otherwise( - self, file_format, mode, save_weights_only): - - def proc_model_checkpoint_saves_on_chief_but_not_otherwise( - test_obj, file_format): - - model, saving_filepath, train_ds, steps = _model_setup( - test_obj, file_format) - num_epoch = 2 - extension = os.path.splitext(saving_filepath)[1] - - # Incorporate type/index information and thread id in saving_filepath to - # ensure every worker has a unique path. Note that in normal use case the - # saving_filepath will be the same for all workers, but we use different - # ones here just to test out chief saves checkpoint but non-chief doesn't. - task_config = get_tf_config_task() - saving_filepath = os.path.join( - test_obj.get_temp_dir(), 'checkpoint_%s_%d%s' % - (task_config['type'], task_config['index'], extension)) - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(checkpoint_exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - validation_data=train_ds, - validation_steps=steps, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, save_weights_only=save_weights_only) - ]) - - # If it's chief, the model should be saved; if not, the model shouldn't. - test_obj.assertEqual(checkpoint_exists(saving_filepath), is_chief()) - - # If it's chief, the model should be saved (`write_filepath` should - # simply return `saving_filepath`); if not, i.e. for non-chief workers, - # the temporary path generated by `write_filepath` should no longer - # contain the checkpoint that has been deleted. - test_obj.assertEqual( - checkpoint_exists( - distributed_file_utils.write_filepath( - saving_filepath, model._distribution_strategy)), is_chief()) - - tf.__internal__.distribute.multi_process_runner.run( - proc_model_checkpoint_saves_on_chief_but_not_otherwise, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self, file_format)) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_model_checkpoint_works_with_same_file_path(self, mode): - - def proc_model_checkpoint_works_with_same_file_path( - test_obj, saving_filepath): - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - num_epoch = 2 - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[callbacks.ModelCheckpoint(filepath=saving_filepath)]) - - test_obj.assertTrue(tf.io.gfile.exists(saving_filepath)) - - saving_filepath = os.path.join(self.get_temp_dir(), 'checkpoint') - - tf.__internal__.distribute.multi_process_runner.run( - proc_model_checkpoint_works_with_same_file_path, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self, saving_filepath)) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_backupandrestore_checkpoint_works_with_interruption(self, mode): - - class InterruptingCallback(callbacks.Callback): - - def on_epoch_begin(self, epoch, logs=None): - if epoch == 2: - raise RuntimeError('Interrupting!') - - class AssertCallback(callbacks.Callback): - - def on_epoch_begin(self, epoch, logs=None): - # the interruption happened on epoch 2 as specified in - # InterruptingCallback, so the initial epoch after restart will begin - # at 2. - assert epoch > 1 - - def proc_model_checkpoint_works_with_same_file_path(test_obj, - saving_filepath): - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - num_epoch = 4 - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) - bar_dir = os.path.join(os.path.dirname(saving_filepath), 'backup') - - try: - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint(filepath=saving_filepath), - callbacks.BackupAndRestore(backup_dir=bar_dir), - InterruptingCallback() - ]) - except RuntimeError as e: - if 'Interrupting!' not in str(e): - raise - - tf.__internal__.distribute.multi_process_runner.get_barrier().wait() - backup_filepath = os.path.join(bar_dir, 'chief', 'checkpoint') - test_obj.assertTrue(tf.io.gfile.exists(backup_filepath)) - test_obj.assertTrue(tf.io.gfile.exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint(filepath=saving_filepath), - callbacks.BackupAndRestore(backup_dir=bar_dir), - AssertCallback() - ]) - tf.__internal__.distribute.multi_process_runner.get_barrier().wait() - test_obj.assertFalse(tf.io.gfile.exists(backup_filepath)) - test_obj.assertTrue(tf.io.gfile.exists(saving_filepath)) - - saving_filepath = os.path.join(self.get_temp_dir(), 'checkpoint') - - tf.__internal__.distribute.multi_process_runner.run( - proc_model_checkpoint_works_with_same_file_path, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self, saving_filepath)) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_profiler_saves_on_both_chief_and_non_chief(self, mode): - - def proc_profiler_saves_on_both_chief_and_non_chief(test_obj): - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - num_epoch = 2 - - task_config = get_tf_config_task() - saving_filepath = os.path.join( - test_obj.get_temp_dir(), - 'logfile_%s_%d' % (task_config['type'], task_config['index'])) - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.TensorBoard( - log_dir=saving_filepath, profile_batch=[2, 4]) - ]) - - # Profiler dir should be created on both chief and non-chief node - profiler_dir_path = os.path.join(saving_filepath, 'plugins', 'profile') - test_obj.assertTrue(tf.io.gfile.exists(profiler_dir_path)) - - tf.__internal__.distribute.multi_process_runner.run( - proc_profiler_saves_on_both_chief_and_non_chief, - cluster_spec= - tf.__internal__.distribute.multi_process_runner.create_cluster_spec( - num_workers=2), - args=(self,)) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_tensorboard_saves_on_chief_but_not_otherwise(self, mode): - - def proc_tensorboard_saves_on_chief_but_not_otherwise(test_obj): - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - num_epoch = 2 - - # Incorporate type/index information and thread id in saving_filepath to - # ensure every worker has a unique path. Note that in normal use case the - # saving_filepath will be the same for all workers, but we use different - # ones here just to test out chief saves summaries but non-chief doesn't. - task_config = get_tf_config_task() - saving_filepath = os.path.join( - test_obj.get_temp_dir(), - 'logfile_%s_%d' % (task_config['type'], task_config['index'])) - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - # disabling profiler by setting profile_batch to zero - callbacks=[ - callbacks.TensorBoard(log_dir=saving_filepath, profile_batch=0) - ]) - - # If it's chief, the summaries should be saved in the filepath; if not, - # the directory should be empty (although created). Using - # `file_io.list_directory()` since the directory may be created at this - # point. - test_obj.assertEqual( - bool(tf.io.gfile.listdir(saving_filepath)), is_chief()) - - tf.__internal__.distribute.multi_process_runner.run( - proc_tensorboard_saves_on_chief_but_not_otherwise, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self,)) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_tensorboard_can_still_save_to_temp_even_if_it_exists(self, mode): - - def proc_tensorboard_can_still_save_to_temp_even_if_it_exists(test_obj): - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - num_epoch = 2 - - saving_filepath = os.path.join( - test_obj.get_temp_dir(), - 'logfile_%s' % (get_tf_config_task()['type'])) - - saving_filepath_for_temp = os.path.join(saving_filepath, 'workertemp_1') - os.mkdir(saving_filepath) - os.mkdir(saving_filepath_for_temp) - - # Verifies that even if `saving_filepath_for_temp` exists, tensorboard - # can still save to temporary directory. - test_obj.assertTrue(tf.io.gfile.exists(saving_filepath_for_temp)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)]) - - tf.__internal__.distribute.multi_process_runner.run( - proc_tensorboard_can_still_save_to_temp_even_if_it_exists, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self,)) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_tensorboard_works_with_same_file_path(self, mode): - - def proc_tensorboard_works_with_same_file_path(test_obj, saving_filepath): - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - num_epoch = 2 - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) - - tf.__internal__.distribute.multi_process_runner.get_barrier().wait() - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)]) - - tf.__internal__.distribute.multi_process_runner.get_barrier().wait() - - test_obj.assertTrue(tf.io.gfile.listdir(saving_filepath)) - - saving_filepath = os.path.join(self.get_temp_dir(), 'logfile') - - tf.__internal__.distribute.multi_process_runner.run( - proc_tensorboard_works_with_same_file_path, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self, saving_filepath)) - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['eager'])) - def test_early_stopping(self, mode): - - def proc_early_stopping(test_obj): - - class EpochCounterCallback(callbacks.Callback): - - def on_epoch_begin(self, epoch, logs): - self.last_epoch = epoch - - model, _, train_ds, steps = _model_setup(test_obj, file_format='') - epoch_counter_cbk = EpochCounterCallback() - cbks = [ - callbacks.EarlyStopping( - monitor='loss', min_delta=0.05, patience=1, verbose=1), - epoch_counter_cbk - ] - - # Empirically, it is expected that `model.fit()` terminates around the - # 22th epoch. Asserting that it should have been stopped before the 50th - # epoch to avoid flakiness and be more predictable. - model.fit(x=train_ds, epochs=100, steps_per_epoch=steps, callbacks=cbks) - test_obj.assertLess(epoch_counter_cbk.last_epoch, 50) - - tf.__internal__.distribute.multi_process_runner.run( - proc_early_stopping, - cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec(num_workers=2), - args=(self,)) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + file_format=["h5", "tf"], + save_weights_only=[True, False], + ) + ) + def test_model_checkpoint_saves_on_chief_but_not_otherwise( + self, file_format, mode, save_weights_only + ): + def proc_model_checkpoint_saves_on_chief_but_not_otherwise( + test_obj, file_format + ): + + model, saving_filepath, train_ds, steps = _model_setup( + test_obj, file_format + ) + num_epoch = 2 + extension = os.path.splitext(saving_filepath)[1] + + # Incorporate type/index information and thread id in + # saving_filepath to ensure every worker has a unique path. Note + # that in normal use case the saving_filepath will be the same for + # all workers, but we use different ones here just to test out chief + # saves checkpoint but non-chief doesn't. + task_config = get_tf_config_task() + saving_filepath = os.path.join( + test_obj.get_temp_dir(), + "checkpoint_%s_%d%s" + % (task_config["type"], task_config["index"], extension), + ) + + # The saving_filepath shouldn't exist at the beginning (as it's + # unique). + test_obj.assertFalse(checkpoint_exists(saving_filepath)) + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + validation_data=train_ds, + validation_steps=steps, + callbacks=[ + callbacks.ModelCheckpoint( + filepath=saving_filepath, + save_weights_only=save_weights_only, + ) + ], + ) + + # If it's chief, the model should be saved; if not, the model + # shouldn't. + test_obj.assertEqual(checkpoint_exists(saving_filepath), is_chief()) + + # If it's chief, the model should be saved (`write_filepath` should + # simply return `saving_filepath`); if not, i.e. for non-chief + # workers, the temporary path generated by `write_filepath` should + # no longer contain the checkpoint that has been deleted. + test_obj.assertEqual( + checkpoint_exists( + distributed_file_utils.write_filepath( + saving_filepath, model._distribution_strategy + ) + ), + is_chief(), + ) + + tf.__internal__.distribute.multi_process_runner.run( + proc_model_checkpoint_saves_on_chief_but_not_otherwise, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self, file_format), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_model_checkpoint_works_with_same_file_path(self, mode): + def proc_model_checkpoint_works_with_same_file_path( + test_obj, saving_filepath + ): + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + num_epoch = 2 + + # The saving_filepath shouldn't exist at the beginning (as it's + # unique). + test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + callbacks=[callbacks.ModelCheckpoint(filepath=saving_filepath)], + ) + + test_obj.assertTrue(tf.io.gfile.exists(saving_filepath)) + + saving_filepath = os.path.join(self.get_temp_dir(), "checkpoint") + + tf.__internal__.distribute.multi_process_runner.run( + proc_model_checkpoint_works_with_same_file_path, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self, saving_filepath), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_backupandrestore_checkpoint_works_with_interruption(self, mode): + class InterruptingCallback(callbacks.Callback): + def on_epoch_begin(self, epoch, logs=None): + if epoch == 2: + raise RuntimeError("Interrupting!") + + class AssertCallback(callbacks.Callback): + def on_epoch_begin(self, epoch, logs=None): + # the interruption happened on epoch 2 as specified in + # InterruptingCallback, so the initial epoch after restart will + # begin at 2. + assert epoch > 1 + + def proc_model_checkpoint_works_with_same_file_path( + test_obj, saving_filepath + ): + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + num_epoch = 4 + + # The saving_filepath shouldn't exist at the beginning (as it's + # unique). + test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) + bar_dir = os.path.join(os.path.dirname(saving_filepath), "backup") + + try: + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + callbacks=[ + callbacks.ModelCheckpoint(filepath=saving_filepath), + callbacks.BackupAndRestore(backup_dir=bar_dir), + InterruptingCallback(), + ], + ) + except RuntimeError as e: + if "Interrupting!" not in str(e): + raise + + tf.__internal__.distribute.multi_process_runner.get_barrier().wait() + backup_filepath = os.path.join(bar_dir, "chief", "checkpoint") + test_obj.assertTrue(tf.io.gfile.exists(backup_filepath)) + test_obj.assertTrue(tf.io.gfile.exists(saving_filepath)) + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + callbacks=[ + callbacks.ModelCheckpoint(filepath=saving_filepath), + callbacks.BackupAndRestore(backup_dir=bar_dir), + AssertCallback(), + ], + ) + tf.__internal__.distribute.multi_process_runner.get_barrier().wait() + test_obj.assertFalse(tf.io.gfile.exists(backup_filepath)) + test_obj.assertTrue(tf.io.gfile.exists(saving_filepath)) + + saving_filepath = os.path.join(self.get_temp_dir(), "checkpoint") + + tf.__internal__.distribute.multi_process_runner.run( + proc_model_checkpoint_works_with_same_file_path, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self, saving_filepath), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_profiler_saves_on_both_chief_and_non_chief(self, mode): + def proc_profiler_saves_on_both_chief_and_non_chief(test_obj): + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + num_epoch = 2 + + task_config = get_tf_config_task() + saving_filepath = os.path.join( + test_obj.get_temp_dir(), + "logfile_%s_%d" % (task_config["type"], task_config["index"]), + ) + + # The saving_filepath shouldn't exist at the beginning (as it's + # unique). + test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + callbacks=[ + callbacks.TensorBoard( + log_dir=saving_filepath, profile_batch=[2, 4] + ) + ], + ) + + # Profiler dir should be created on both chief and non-chief node + profiler_dir_path = os.path.join( + saving_filepath, "plugins", "profile" + ) + test_obj.assertTrue(tf.io.gfile.exists(profiler_dir_path)) + + tf.__internal__.distribute.multi_process_runner.run( + proc_profiler_saves_on_both_chief_and_non_chief, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self,), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_tensorboard_saves_on_chief_but_not_otherwise(self, mode): + def proc_tensorboard_saves_on_chief_but_not_otherwise(test_obj): + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + num_epoch = 2 + + # Incorporate type/index information and thread id in + # saving_filepath to ensure every worker has a unique path. Note + # that in normal use case the saving_filepath will be the same for + # all workers, but we use different ones here just to test out chief + # saves summaries but non-chief doesn't. + task_config = get_tf_config_task() + saving_filepath = os.path.join( + test_obj.get_temp_dir(), + "logfile_%s_%d" % (task_config["type"], task_config["index"]), + ) + + # The saving_filepath shouldn't exist at the beginning (as it's + # unique). + test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + # disabling profiler by setting profile_batch to zero + callbacks=[ + callbacks.TensorBoard( + log_dir=saving_filepath, profile_batch=0 + ) + ], + ) + + # If it's chief, the summaries should be saved in the filepath; if + # not, the directory should be empty (although created). Using + # `file_io.list_directory()` since the directory may be created at + # this point. + test_obj.assertEqual( + bool(tf.io.gfile.listdir(saving_filepath)), is_chief() + ) + + tf.__internal__.distribute.multi_process_runner.run( + proc_tensorboard_saves_on_chief_but_not_otherwise, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self,), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_tensorboard_can_still_save_to_temp_even_if_it_exists(self, mode): + def proc_tensorboard_can_still_save_to_temp_even_if_it_exists(test_obj): + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + num_epoch = 2 + + saving_filepath = os.path.join( + test_obj.get_temp_dir(), + f"logfile_{get_tf_config_task()['type']}", + ) + + saving_filepath_for_temp = os.path.join( + saving_filepath, "workertemp_1" + ) + os.mkdir(saving_filepath) + os.mkdir(saving_filepath_for_temp) + + # Verifies that even if `saving_filepath_for_temp` exists, + # tensorboard can still save to temporary directory. + test_obj.assertTrue(tf.io.gfile.exists(saving_filepath_for_temp)) + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)], + ) + + tf.__internal__.distribute.multi_process_runner.run( + proc_tensorboard_can_still_save_to_temp_even_if_it_exists, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self,), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_tensorboard_works_with_same_file_path(self, mode): + def proc_tensorboard_works_with_same_file_path( + test_obj, saving_filepath + ): + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + num_epoch = 2 + + # The saving_filepath shouldn't exist at the beginning (as it's + # unique). + test_obj.assertFalse(tf.io.gfile.exists(saving_filepath)) + + tf.__internal__.distribute.multi_process_runner.get_barrier().wait() + + model.fit( + x=train_ds, + epochs=num_epoch, + steps_per_epoch=steps, + callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)], + ) + + tf.__internal__.distribute.multi_process_runner.get_barrier().wait() + + test_obj.assertTrue(tf.io.gfile.listdir(saving_filepath)) + + saving_filepath = os.path.join(self.get_temp_dir(), "logfile") + + tf.__internal__.distribute.multi_process_runner.run( + proc_tensorboard_works_with_same_file_path, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self, saving_filepath), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def test_early_stopping(self, mode): + def proc_early_stopping(test_obj): + class EpochCounterCallback(callbacks.Callback): + def on_epoch_begin(self, epoch, logs): + self.last_epoch = epoch + + model, _, train_ds, steps = _model_setup(test_obj, file_format="") + epoch_counter_cbk = EpochCounterCallback() + cbks = [ + callbacks.EarlyStopping( + monitor="loss", min_delta=0.05, patience=1, verbose=1 + ), + epoch_counter_cbk, + ] + + # Empirically, it is expected that `model.fit()` terminates around + # the 22th epoch. Asserting that it should have been stopped before + # the 50th epoch to avoid flakiness and be more predictable. + model.fit( + x=train_ds, epochs=100, steps_per_epoch=steps, callbacks=cbks + ) + test_obj.assertLess(epoch_counter_cbk.last_epoch, 50) + + tf.__internal__.distribute.multi_process_runner.run( + proc_early_stopping, + cluster_spec=tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=2 + ), + args=(self,), + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/multi_worker_test.py b/keras/distribute/multi_worker_test.py index ae74ba22af5f..243b6b54737c 100644 --- a/keras/distribute/multi_worker_test.py +++ b/keras/distribute/multi_worker_test.py @@ -14,8 +14,6 @@ # ============================================================================== """Test multi-worker Keras.""" -import tensorflow.compat.v2 as tf - import collections import copy import functools @@ -24,261 +22,409 @@ import sys import threading +import tensorflow.compat.v2 as tf from absl.testing import parameterized - import keras from keras import backend from keras import callbacks from keras import metrics as metrics_module from keras import models -from keras.optimizers import optimizer_v1 from keras.distribute import multi_worker_testing_utils -from keras.optimizers.optimizer_v2 import rmsprop +from keras.optimizers import optimizer_v1 +from keras.optimizers.legacy import rmsprop from keras.utils import kpl_test_utils - - def _clone_and_build_model(model, strategy): - # The new "original" model in worker 0. - with strategy.scope(): - cloned_model = models.clone_model(model) - - # Compile and build model. - if isinstance(model.optimizer, optimizer_v1.TFOptimizer): - optimizer = model.optimizer - # TODO(yuefengz): figure out why the optimizer here is still a - # TFOptimizer. - while isinstance(optimizer, optimizer_v1.TFOptimizer): - optimizer = optimizer.optimizer - optimizer = copy.deepcopy(optimizer) - else: - optimizer_config = model.optimizer.get_config() - optimizer = type(model.optimizer).from_config(optimizer_config) - - cloned_model.compile( - optimizer, - model.loss, - metrics=metrics_module.clone_metrics(model._compile_metrics), - loss_weights=model.loss_weights, - sample_weight_mode=model.sample_weight_mode, - weighted_metrics=metrics_module.clone_metrics( - model._compile_weighted_metrics)) - return cloned_model + # The new "original" model in worker 0. + with strategy.scope(): + cloned_model = models.clone_model(model) + + # Compile and build model. + if isinstance(model.optimizer, optimizer_v1.TFOptimizer): + optimizer = model.optimizer + # TODO(yuefengz): figure out why the optimizer here is still a + # TFOptimizer. + while isinstance(optimizer, optimizer_v1.TFOptimizer): + optimizer = optimizer.optimizer + optimizer = copy.deepcopy(optimizer) + else: + optimizer_config = model.optimizer.get_config() + optimizer = type(model.optimizer).from_config(optimizer_config) + + cloned_model.compile( + optimizer, + model.loss, + metrics=metrics_module.clone_metrics(model._compile_metrics), + loss_weights=model.loss_weights, + sample_weight_mode=model.sample_weight_mode, + weighted_metrics=metrics_module.clone_metrics( + model._compile_weighted_metrics + ), + ) + return cloned_model # TODO(b/123918215): Possibly merge this Callback with keras_test.Counter. class MultiWorkerVerificationCallback(callbacks.Callback): - """MultiWorkerVerificationCallback verifies the callbacks in multi-worker scheme. - - This Callback is intended to be used for verifying the callback is indeed - called the correct number of times in various task types. - - Attributes: - _task_dict: A nested dictionary storing the number of times a callback has - been called in specific task type, task index, and method name. - Look up structure is - task_name -> task_id -> tracking_method_name -> invoke_count - For example, a _task_dict of - { - 'ps': { - 0: { - 'on_epoch_begin': 2 - }, - 1: { - 'on_epoch_begin': 2 - } - }, - 'worker': { - 0: { - 'on_epoch_begin': 2 - }, - 1: { - 'on_epoch_begin': 2 - } - } - } - indicates the ps task has 'on_epoch_begin' called twice on each - of the two indices, and likewise for worker task. - """ - - # TODO(rchao): Add other method calls to verify. - METHODS_TO_VERIFY = ['on_epoch_begin'] - - def __init__(self, num_epoch, num_worker): - """Initialize a MultiWorkerVerificationCallback. - - Args: - num_epoch: Number of epochs this Callback is expected to be called for. - num_worker: Number of workers this Callback is expected to be called from. + """MultiWorkerVerificationCallback verifies the callbacks in multi-worker + scheme. + + This Callback is intended to be used for verifying the callback is indeed + called the correct number of times in various task types. + + Attributes: + _task_dict: A nested dictionary storing the number of times a callback has + been called in specific task type, task index, and method + name. Look up structure is + task_name -> task_id -> tracking_method_name -> invoke_count + For example, a _task_dict of + { + 'ps': { + 0: { + 'on_epoch_begin': 2 + }, + 1: { + 'on_epoch_begin': 2 + } + }, + 'worker': { + 0: { + 'on_epoch_begin': 2 + }, + 1: { + 'on_epoch_begin': 2 + } + } + } + indicates the ps task has 'on_epoch_begin' called twice on + each of the two indices, and likewise for worker task. """ - super().__init__() - self._num_epoch = num_epoch - self._num_worker = num_worker - self._task_dict = { - key: collections.defaultdict(lambda: collections.defaultdict(int)) - for key in ['ps', 'worker', 'chief'] - } - self._lock = threading.Lock() - self._is_between_graph = None - self.wrap_methods(self.METHODS_TO_VERIFY) - - @property - def is_between_graph(self): - return self._is_between_graph - - @is_between_graph.setter - def is_between_graph(self, is_between_graph): - self._is_between_graph = is_between_graph - - def wrap_methods(self, method_names): - """Wrap methods so that the counts of calls are tracked. - - Args: - method_names: A list of names of methods to track calls. - """ - for method_name in method_names: - method = getattr(self, method_name) - - def wrapped_method(method_to_wrap, name, *arg, **kwargs): - # Use lock to ensure += operation is thread-safe. - with self._lock: - task_config = json.loads(os.environ['TF_CONFIG'])['task'] - self._task_dict[task_config['type']][task_config['index']][name] += 1 - method_to_wrap(*arg, **kwargs) - - setattr(self, method_name, - functools.partial(wrapped_method, method, method_name)) - - def verify(self, test_case): - method_count_dict = { - method_name: self._num_epoch for method_name in self.METHODS_TO_VERIFY - } - assert self._is_between_graph is not None - if self._is_between_graph: - # TODO(b/124171024): In between-graph replication, by default only the - # chief calls callback. Fix this test to cover that, as well as the rare - # cases where all workers call. - worker_call_count = { - i: method_count_dict for i in range(0, self._num_worker) - } - else: - # If in-graph, only the first worker calls callback methods. - worker_call_count = {0: method_count_dict} - chief_call_count = {0: method_count_dict} - task_config = json.loads(os.environ['TF_CONFIG'])['task']['type'] - test_case.assertDictEqual( - self._task_dict, - { - # PS' callback is not supposed to be called. - 'ps': {}, - # Worker or chief should only be called on worker/chief. - 'worker': worker_call_count if task_config == 'worker' else {}, - 'chief': chief_call_count if task_config == 'chief' else {} - }) - - -class KerasMultiWorkerTestIndependentWorker(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - strategy=[ - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, - ])) - def testSimpleModelIndependentWorkerSync(self, strategy): - verification_callback = MultiWorkerVerificationCallback( - num_epoch=2, - num_worker=len( - json.loads(os.environ['TF_CONFIG'])['cluster']['worker'])) - verification_callback.is_between_graph = \ - strategy.extended.experimental_between_graph - batch_size = 64 - steps = 2 - train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( - batch_size, steps) - with strategy.scope(): - model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) - orig_loss, _ = model.evaluate(train_ds, steps=steps) - history = model.fit( - x=train_ds, - epochs=2, - steps_per_epoch=steps, - callbacks=[verification_callback]) - self.assertIsInstance(history, keras.callbacks.History) - trained_loss, _ = model.evaluate(train_ds, steps=steps) - self.assertLess(trained_loss, orig_loss) - - verification_callback.verify(self) - - -class KPLMultiWorkerTest(tf.test.TestCase, - parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - use_adapt=[False], # TODO(b/180742437): Add tests for using adapt. - strategy=[ - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, - # TODO(b/183956672): Re-enable - # strategy_combinations.multi_worker_mirrored_2x2_gpu, - ])) - def testTrainAndServeWithKPL(self, use_adapt, strategy): - test_utils_obj = kpl_test_utils.DistributeKplTestUtils() - with strategy.scope(): - feature_mapper, label_mapper = test_utils_obj.define_kpls_for_training( - use_adapt) - model = test_utils_obj.define_model() - optimizer = rmsprop.RMSprop(learning_rate=0.1) - accuracy = keras.metrics.Accuracy() - - def dataset_fn(_): - return test_utils_obj.dataset_fn(feature_mapper, label_mapper) - - @tf.function - def train_step(iterator): - """The step function for one training step.""" - - def step_fn(inputs): - """The computation to run on each worker.""" - features, labels = inputs - with tf.GradientTape() as tape: - pred = model(features, training=True) - loss = keras.losses.binary_crossentropy(labels, pred) - loss = tf.nn.compute_average_loss(loss) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(list(zip(grads, model.trainable_variables))) - - actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64) - accuracy.update_state(labels, actual_pred) - - strategy.run(step_fn, args=(next(iterator),)) - - distributed_dataset = strategy.distribute_datasets_from_function( - dataset_fn) - distributed_iterator = iter(distributed_dataset) - num_epochs = 4 - num_steps = 7 - for _ in range(num_epochs): - accuracy.reset_state() - for _ in range(num_steps): - train_step(distributed_iterator) - - self.assertGreater(accuracy.result().numpy(), 0.5) - self.assertEqual(optimizer.iterations.numpy(), num_epochs * num_steps) - - # Test save/load/serving the trained model. - test_utils_obj.test_save_load_serving_model( - model, feature_mapper, test_utils_obj.define_reverse_lookup_layer()) - - -if __name__ == '__main__': - # Enable manual variable initialization to make sure variables are initialized - # by `init_restore_or_wait_for_variables`. - backend.manual_variable_initialization(True) - with tf.compat.v1.test.mock.patch.object(sys, 'exit', os._exit): - tf.__internal__.distribute.multi_process_runner.test_main() + + # TODO(rchao): Add other method calls to verify. + METHODS_TO_VERIFY = ["on_epoch_begin"] + + def __init__(self, num_epoch, num_worker): + """Initialize a MultiWorkerVerificationCallback. + + Args: + num_epoch: Number of epochs this Callback is expected to be called + for. + num_worker: Number of workers this Callback is expected to be called + from. + """ + super().__init__() + self._num_epoch = num_epoch + self._num_worker = num_worker + self._task_dict = { + key: collections.defaultdict(lambda: collections.defaultdict(int)) + for key in ["ps", "worker", "chief"] + } + self._lock = threading.Lock() + self._is_between_graph = None + self.wrap_methods(self.METHODS_TO_VERIFY) + + @property + def is_between_graph(self): + return self._is_between_graph + + @is_between_graph.setter + def is_between_graph(self, is_between_graph): + self._is_between_graph = is_between_graph + + def wrap_methods(self, method_names): + """Wrap methods so that the counts of calls are tracked. + + Args: + method_names: A list of names of methods to track calls. + """ + for method_name in method_names: + method = getattr(self, method_name) + + def wrapped_method(method_to_wrap, name, *arg, **kwargs): + # Use lock to ensure += operation is thread-safe. + with self._lock: + task_config = json.loads(os.environ["TF_CONFIG"])["task"] + self._task_dict[task_config["type"]][task_config["index"]][ + name + ] += 1 + method_to_wrap(*arg, **kwargs) + + setattr( + self, + method_name, + functools.partial(wrapped_method, method, method_name), + ) + + def verify(self, test_case): + method_count_dict = { + method_name: self._num_epoch + for method_name in self.METHODS_TO_VERIFY + } + assert self._is_between_graph is not None + if self._is_between_graph: + # TODO(b/124171024): In between-graph replication, by default only + # the chief calls callback. Fix this test to cover that, as well as + # the rare cases where all workers call. + worker_call_count = { + i: method_count_dict for i in range(0, self._num_worker) + } + else: + # If in-graph, only the first worker calls callback methods. + worker_call_count = {0: method_count_dict} + chief_call_count = {0: method_count_dict} + task_config = json.loads(os.environ["TF_CONFIG"])["task"]["type"] + test_case.assertDictEqual( + self._task_dict, + { + # PS' callback is not supposed to be called. + "ps": {}, + # Worker or chief should only be called on worker/chief. + "worker": worker_call_count if task_config == "worker" else {}, + "chief": chief_call_count if task_config == "chief" else {}, + }, + ) + + +class KerasMultiWorkerTestIndependentWorker( + tf.test.TestCase, parameterized.TestCase +): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + strategy=[ + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, # noqa: E501 + ], + ) + ) + def testSimpleModelIndependentWorkerSync(self, strategy): + verification_callback = MultiWorkerVerificationCallback( + num_epoch=2, + num_worker=len( + json.loads(os.environ["TF_CONFIG"])["cluster"]["worker"] + ), + ) + verification_callback.is_between_graph = ( + strategy.extended.experimental_between_graph + ) + batch_size = 64 + steps = 2 + train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( + batch_size, steps + ) + with strategy.scope(): + model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) + orig_loss, _ = model.evaluate(train_ds, steps=steps) + history = model.fit( + x=train_ds, + epochs=2, + steps_per_epoch=steps, + callbacks=[verification_callback], + ) + self.assertIsInstance(history, keras.callbacks.History) + trained_loss, _ = model.evaluate(train_ds, steps=steps) + self.assertLess(trained_loss, orig_loss) + + verification_callback.verify(self) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + strategy=[ + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, # noqa: E501 + ], + ) + ) + def test_distribution_reduction_method_auto_default_train_step( + self, strategy + ): + BATCH = 4 + EPOCHS = 1 + STEPS = 2 + + # Dataset's targets are [0, 1, 2, 3, 4, 5, 6, 7]: + train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( + BATCH, STEPS, target_values="increasing" + ) + + # A model that always outputs `sum(inputs*0) + 1 = 1` + with strategy.scope(): + inputs = keras.Input(shape=(28, 28, 1)) + x = keras.layers.Flatten()(inputs) + x = keras.layers.Dense( + 1, kernel_initializer="zeros", bias_initializer="ones" + )(x) + model = keras.Model(inputs=inputs, outputs=x) + model.trainable = False + # model.distribute_reduction_method = 'auto' + + model.compile( + loss=keras.losses.MeanAbsoluteError( + reduction=keras.losses.losses_utils.ReductionV2.NONE + ), + optimizer=multi_worker_testing_utils.gradient_descent.SGD( + learning_rate=0.001 + ), + metrics=["mse"], + ) + + # For every output x_i = 1, and increasing target values in [0, 8): + # loss_i = |i-1| + # loss = (|0-1| + |1-1| + |2-1| + ... |7-1|) / (BATCH*STEPS) + # = (1+0+1+2+3+4+5+6) / 8 = 2.75 + orig_loss, _ = model.evaluate(train_ds, steps=STEPS) + self.assertEqual(2.75, orig_loss) + + history = model.fit(train_ds, epochs=EPOCHS, steps_per_epoch=STEPS) + self.assertAllClose(history.history["loss"], [2.75] * EPOCHS) + + trained_loss, _ = model.evaluate(train_ds, steps=STEPS) + self.assertEqual(2.75, trained_loss) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + strategy=[ + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, # noqa: E501 + ], + ) + ) + def test_distribution_reduction_method_auto_custom_train_step( + self, strategy + ): + BATCH = 4 + EPOCHS = 1 + STEPS = 2 + + # Dataset's targets are [0, 1, 2, 3, 4, 5, 6, 7]: + train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( + BATCH, STEPS, target_values="increasing" + ) + + # A model that has loss=sum(targets) / BATCH: + class MyModel(keras.Model): + def train_step(self, data): + _, y = data + loss_value = tf.cast(y, tf.float32) + loss_value = tf.nn.compute_average_loss( + loss_value, global_batch_size=BATCH + ) + return {"loss": loss_value} + + def test_step(self, data): + _, y = data + loss_value = tf.cast(y, tf.float32) + loss_value = tf.nn.compute_average_loss( + loss_value, global_batch_size=BATCH + ) + return {"loss": loss_value} + + with strategy.scope(): + inputs = keras.Input(shape=(28, 28, 1)) + x = keras.layers.Flatten()(inputs) + x = keras.layers.Dense( + 1, kernel_initializer="ones", bias_initializer="ones" + )(x) + model = MyModel(inputs=inputs, outputs=x) + # model.distribute_reduction_method = 'auto' + + model.compile( + optimizer=multi_worker_testing_utils.gradient_descent.SGD( + learning_rate=0.001 + ), + ) + + # For epochs=1 steps=2 replicas=2 batch=4, and increasing target vals, + # loss_e0_s0_r0 = [0+1]/BATCH = 1/4 + # loss_e0_s0_r1 = [2+3]/BATCH = 5/4 + # loss_e0_s0 = 1/4 + 5/4 = 1.5 + # loss_e0_s1_r0 = [4+5]/BATCH = 9/4 + # loss_e0_s2_r1 = [6+7]/BATCH = 13/4 + # loss_e0_s1 = 9/4 + 13/4 = 5.5 + # loss_e0 = last([1.5, 5.5]) + history = model.fit(train_ds, epochs=EPOCHS, steps_per_epoch=STEPS) + self.assertAllClose([5.5], history.history["loss"]) + + eval_output = model.evaluate(train_ds, steps=STEPS) + self.assertAllClose(5.5, eval_output) + + +class KPLMultiWorkerTest(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + use_adapt=[False], # TODO(b/180742437): Add tests for using adapt. + strategy=[ + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, # noqa: E501 + # TODO(b/183956672): Re-enable + # strategy_combinations.multi_worker_mirrored_2x2_gpu, + ], + ) + ) + def testTrainAndServeWithKPL(self, use_adapt, strategy): + test_utils_obj = kpl_test_utils.DistributeKplTestUtils() + with strategy.scope(): + ( + feature_mapper, + label_mapper, + ) = test_utils_obj.define_kpls_for_training(use_adapt) + model = test_utils_obj.define_model() + optimizer = rmsprop.RMSprop(learning_rate=0.1) + accuracy = keras.metrics.Accuracy() + + def dataset_fn(_): + return test_utils_obj.dataset_fn(feature_mapper, label_mapper) + + @tf.function + def train_step(iterator): + """The step function for one training step.""" + + def step_fn(inputs): + """The computation to run on each worker.""" + features, labels = inputs + with tf.GradientTape() as tape: + pred = model(features, training=True) + loss = keras.losses.binary_crossentropy(labels, pred) + loss = tf.nn.compute_average_loss(loss) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + list(zip(grads, model.trainable_variables)) + ) + + actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64) + accuracy.update_state(labels, actual_pred) + + strategy.run(step_fn, args=(next(iterator),)) + + distributed_dataset = strategy.distribute_datasets_from_function( + dataset_fn + ) + distributed_iterator = iter(distributed_dataset) + num_epochs = 4 + num_steps = 7 + for _ in range(num_epochs): + accuracy.reset_state() + for _ in range(num_steps): + train_step(distributed_iterator) + + self.assertGreater(accuracy.result().numpy(), 0.5) + self.assertEqual( + optimizer.iterations.numpy(), num_epochs * num_steps + ) + + # Test save/load/serving the trained model. + test_utils_obj.test_save_load_serving_model( + model, feature_mapper, test_utils_obj.define_reverse_lookup_layer() + ) + + +if __name__ == "__main__": + # Enable manual variable initialization to make sure variables are + # initialized by `init_restore_or_wait_for_variables`. + backend.manual_variable_initialization(True) + with tf.compat.v1.test.mock.patch.object(sys, "exit", os._exit): + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/multi_worker_testing_utils.py b/keras/distribute/multi_worker_testing_utils.py index e9b4e319a509..c0fd9d19d969 100644 --- a/keras/distribute/multi_worker_testing_utils.py +++ b/keras/distribute/multi_worker_testing_utils.py @@ -14,224 +14,259 @@ # ============================================================================== """Utilities for testing multi-worker distribution strategies with Keras.""" -import tensorflow.compat.v2 as tf - import threading import unittest + +import tensorflow.compat.v2 as tf + import keras -from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver -from keras.optimizers.optimizer_v2 import gradient_descent -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training.server_lib import ClusterSpec +from keras.optimizers.legacy import gradient_descent +# isort: off +from tensorflow.python.distribute.cluster_resolver import ( + SimpleClusterResolver, +) +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training.server_lib import ( + ClusterSpec, +) _portpicker_import_error = None try: - import portpicker # pylint: disable=g-import-not-at-top -except (ImportError, ModuleNotFoundError) as _error: # pylint: disable=invalid-name - _portpicker_import_error = _error - portpicker = None + import portpicker +except ( + ImportError, + ModuleNotFoundError, +) as _error: + _portpicker_import_error = _error + portpicker = None ASSIGNED_PORTS = set() lock = threading.Lock() -def mnist_synthetic_dataset(batch_size, steps_per_epoch): - """Generate synthetic MNIST dataset for testing.""" - # train dataset - x_train = tf.ones([batch_size * steps_per_epoch, 28, 28, 1], - dtype=tf.float32) - y_train = tf.ones([batch_size * steps_per_epoch, 1], - dtype=tf.int32) - train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - train_ds = train_ds.repeat() - # train_ds = train_ds.shuffle(100) - train_ds = train_ds.batch(64, drop_remainder=True) +def mnist_synthetic_dataset( + batch_size, steps_per_epoch, target_values="constant" +): + """Generate synthetic MNIST dataset for testing.""" + # train dataset + x_train = tf.ones( + [batch_size * steps_per_epoch, 28, 28, 1], dtype=tf.float32 + ) + if target_values == "constant": + y_train = tf.ones([batch_size * steps_per_epoch, 1], dtype=tf.int32) + elif target_values == "increasing": + y_train = tf.reshape( + tf.range(batch_size * steps_per_epoch, dtype=tf.int32), (-1, 1) + ) + else: + raise ValueError( + 'Unknown value for `target_values` "' + + str(target_values) + + '". Valid options are "constant" and "increasing".' + ) + + train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + train_ds = train_ds.repeat() + # train_ds = train_ds.shuffle(100) + train_ds = train_ds.batch(batch_size, drop_remainder=True) - # eval dataset - x_test = tf.random.uniform([10000, 28, 28, 1], dtype=tf.float32) - y_test = tf.random.uniform([10000, 1], - minval=0, - maxval=9, - dtype=tf.int32) - eval_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)) - eval_ds = eval_ds.batch(64, drop_remainder=True) + # eval dataset + x_test = tf.random.uniform([10000, 28, 28, 1], dtype=tf.float32) + y_test = tf.random.uniform([10000, 1], minval=0, maxval=9, dtype=tf.int32) + eval_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)) + eval_ds = eval_ds.batch(batch_size, drop_remainder=True) - return train_ds, eval_ds + return train_ds, eval_ds def get_mnist_model(input_shape): - """Define a deterministically-initialized CNN model for MNIST testing.""" - inputs = keras.Input(shape=input_shape) - x = keras.layers.Conv2D( - 32, - kernel_size=(3, 3), - activation="relu", - kernel_initializer=keras.initializers.TruncatedNormal(seed=99))(inputs) - x = keras.layers.BatchNormalization()(x) - x = keras.layers.Flatten()(x) + keras.layers.Flatten()(x) - x = keras.layers.Dense( - 10, - activation="softmax", - kernel_initializer=keras.initializers.TruncatedNormal(seed=99))(x) - model = keras.Model(inputs=inputs, outputs=x) - - # TODO(yuefengz): optimizer with slot variables doesn't work because of - # optimizer's bug. - # TODO(yuefengz): we should not allow non-v2 optimizer. - model.compile( - loss=keras.losses.sparse_categorical_crossentropy, - optimizer=gradient_descent.SGD(learning_rate=0.001), - metrics=["accuracy"]) - return model + """Define a deterministically-initialized CNN model for MNIST testing.""" + inputs = keras.Input(shape=input_shape) + x = keras.layers.Conv2D( + 32, + kernel_size=(3, 3), + activation="relu", + kernel_initializer=keras.initializers.TruncatedNormal(seed=99), + )(inputs) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Flatten()(x) + keras.layers.Flatten()(x) + x = keras.layers.Dense( + 10, + activation="softmax", + kernel_initializer=keras.initializers.TruncatedNormal(seed=99), + )(x) + model = keras.Model(inputs=inputs, outputs=x) + + # TODO(yuefengz): optimizer with slot variables doesn't work because of + # optimizer's bug. + # TODO(yuefengz): we should not allow non-v2 optimizer. + model.compile( + loss=keras.losses.sparse_categorical_crossentropy, + optimizer=gradient_descent.SGD(learning_rate=0.001), + metrics=["accuracy"], + ) + return model def make_parameter_server_cluster(num_workers, num_ps): - cluster_def = create_in_process_cluster( - num_workers=num_workers, num_ps=num_ps, rpc_layer="grpc") - return SimpleClusterResolver(ClusterSpec(cluster_def), rpc_layer="grpc") + cluster_def = create_in_process_cluster( + num_workers=num_workers, num_ps=num_ps, rpc_layer="grpc" + ) + return SimpleClusterResolver(ClusterSpec(cluster_def), rpc_layer="grpc") def pick_unused_port(): - """Returns an unused and unassigned local port.""" - if _portpicker_import_error: - raise _portpicker_import_error # pylint: disable=raising-bad-type - - global ASSIGNED_PORTS - with lock: - while True: - try: - port = portpicker.pick_unused_port() - except portpicker.NoFreePortFoundError: - raise unittest.SkipTest("Flakes in portpicker library do not represent " - "TensorFlow errors.") - if port > 10000 and port not in ASSIGNED_PORTS: - ASSIGNED_PORTS.add(port) - logging.info("Using local port %r", port) - return port - - -def _create_cluster(num_workers, - num_ps, - has_chief=False, - has_eval=False, - protocol="grpc", - worker_config=None, - ps_config=None, - eval_config=None, - worker_name="worker", - ps_name="ps", - chief_name="chief"): - """Creates and starts local servers and returns the cluster_spec dict.""" - if _portpicker_import_error: - raise _portpicker_import_error # pylint: disable=raising-bad-type - worker_ports = [pick_unused_port() for _ in range(num_workers)] - ps_ports = [pick_unused_port() for _ in range(num_ps)] - - cluster_dict = {} - if num_workers > 0: - cluster_dict[worker_name] = ["localhost:%s" % port for port in worker_ports] - if num_ps > 0: - cluster_dict[ps_name] = ["localhost:%s" % port for port in ps_ports] - if has_eval: - cluster_dict["evaluator"] = ["localhost:%s" % pick_unused_port()] - if has_chief: - cluster_dict[chief_name] = ["localhost:%s" % pick_unused_port()] - - cs = tf.train.ClusterSpec(cluster_dict) - - for i in range(num_workers): - tf.distribute.Server( - cs, - job_name=worker_name, - protocol=protocol, - task_index=i, - config=worker_config, - start=True) - - for i in range(num_ps): - tf.distribute.Server( - cs, - job_name=ps_name, - protocol=protocol, - task_index=i, - config=ps_config, - start=True) - - if has_chief: - tf.distribute.Server( - cs, - job_name=chief_name, - protocol=protocol, - task_index=0, - config=worker_config, - start=True) - - if has_eval: - tf.distribute.Server( - cs, - job_name="evaluator", - protocol=protocol, - task_index=0, - config=eval_config, - start=True) - - return cluster_dict - - -def create_in_process_cluster(num_workers, - num_ps, - has_chief=False, - has_eval=False, - rpc_layer="grpc"): - """Create an in-process cluster that consists of only standard server.""" - # Leave some memory for cuda runtime. - gpu_mem_frac = 0.7 / (num_workers + int(has_chief) + int(has_eval)) - worker_config = tf.compat.v1.ConfigProto() - worker_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac - - # The cluster may hang if workers don't have enough inter_op threads. See - # b/172296720 for more details. - if worker_config.inter_op_parallelism_threads < num_workers + 1: - worker_config.inter_op_parallelism_threads = num_workers + 1 - - # Enable collective ops which has no impact on non-collective ops. - if has_chief: - worker_config.experimental.collective_group_leader = ( - "/job:chief/replica:0/task:0") - else: - worker_config.experimental.collective_group_leader = ( - "/job:worker/replica:0/task:0") - - ps_config = tf.compat.v1.ConfigProto() - ps_config.device_count["GPU"] = 0 - - eval_config = tf.compat.v1.ConfigProto() - eval_config.experimental.collective_group_leader = "" - - # Create in-process servers. Once an in-process tensorflow server is created, - # there is no way to terminate it. So we create one cluster per test process. - # We could've started the server in another process, we could then kill that - # process to terminate the server. The reasons why we don"t want multiple - # processes are - # 1) it is more difficult to manage these processes; - # 2) there is something global in CUDA such that if we initialize CUDA in the - # parent process, the child process cannot initialize it again and thus cannot - # use GPUs (https://stackoverflow.com/questions/22950047). - cluster = None - try: - cluster = _create_cluster( - num_workers, - num_ps=num_ps, - has_chief=has_chief, - has_eval=has_eval, - worker_config=worker_config, - ps_config=ps_config, - eval_config=eval_config, - protocol=rpc_layer) - except tf.errors.UnknownError as e: - if "Could not start gRPC server" in e.message: - raise unittest.SkipTest("Cannot start std servers.") + """Returns an unused and unassigned local port.""" + if _portpicker_import_error: + raise _portpicker_import_error + + global ASSIGNED_PORTS + with lock: + while True: + try: + port = portpicker.pick_unused_port() + except portpicker.NoFreePortFoundError: + raise unittest.SkipTest( + "Flakes in portpicker library do not represent " + "TensorFlow errors." + ) + if port > 10000 and port not in ASSIGNED_PORTS: + ASSIGNED_PORTS.add(port) + logging.info("Using local port %r", port) + return port + + +def _create_cluster( + num_workers, + num_ps, + has_chief=False, + has_eval=False, + protocol="grpc", + worker_config=None, + ps_config=None, + eval_config=None, + worker_name="worker", + ps_name="ps", + chief_name="chief", +): + """Creates and starts local servers and returns the cluster_spec dict.""" + if _portpicker_import_error: + raise _portpicker_import_error + worker_ports = [pick_unused_port() for _ in range(num_workers)] + ps_ports = [pick_unused_port() for _ in range(num_ps)] + + cluster_dict = {} + if num_workers > 0: + cluster_dict[worker_name] = [ + f"localhost:{port}" for port in worker_ports + ] + if num_ps > 0: + cluster_dict[ps_name] = [f"localhost:{port}" for port in ps_ports] + if has_eval: + cluster_dict["evaluator"] = [f"localhost:{pick_unused_port()}"] + if has_chief: + cluster_dict[chief_name] = [f"localhost:{pick_unused_port()}"] + + cs = tf.train.ClusterSpec(cluster_dict) + + for i in range(num_workers): + tf.distribute.Server( + cs, + job_name=worker_name, + protocol=protocol, + task_index=i, + config=worker_config, + start=True, + ) + + for i in range(num_ps): + tf.distribute.Server( + cs, + job_name=ps_name, + protocol=protocol, + task_index=i, + config=ps_config, + start=True, + ) + + if has_chief: + tf.distribute.Server( + cs, + job_name=chief_name, + protocol=protocol, + task_index=0, + config=worker_config, + start=True, + ) + + if has_eval: + tf.distribute.Server( + cs, + job_name="evaluator", + protocol=protocol, + task_index=0, + config=eval_config, + start=True, + ) + + return cluster_dict + + +def create_in_process_cluster( + num_workers, num_ps, has_chief=False, has_eval=False, rpc_layer="grpc" +): + """Create an in-process cluster that consists of only standard server.""" + # Leave some memory for cuda runtime. + gpu_mem_frac = 0.7 / (num_workers + int(has_chief) + int(has_eval)) + worker_config = tf.compat.v1.ConfigProto() + worker_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac + + # The cluster may hang if workers don't have enough inter_op threads. See + # b/172296720 for more details. + if worker_config.inter_op_parallelism_threads < num_workers + 1: + worker_config.inter_op_parallelism_threads = num_workers + 1 + + # Enable collective ops which has no impact on non-collective ops. + if has_chief: + worker_config.experimental.collective_group_leader = ( + "/job:chief/replica:0/task:0" + ) else: - raise - return cluster + worker_config.experimental.collective_group_leader = ( + "/job:worker/replica:0/task:0" + ) + + ps_config = tf.compat.v1.ConfigProto() + ps_config.device_count["GPU"] = 0 + + eval_config = tf.compat.v1.ConfigProto() + eval_config.experimental.collective_group_leader = "" + + # Create in-process servers. Once an in-process tensorflow server is + # created, there is no way to terminate it. So we create one cluster per + # test process. We could've started the server in another process, we could + # then kill that process to terminate the server. The reasons why we don"t + # want multiple processes are + # 1) it is more difficult to manage these processes; + # 2) there is something global in CUDA such that if we initialize CUDA in + # the parent process, the child process cannot initialize it again and thus + # cannot use GPUs (https://stackoverflow.com/questions/22950047). + cluster = None + try: + cluster = _create_cluster( + num_workers, + num_ps=num_ps, + has_chief=has_chief, + has_eval=has_eval, + worker_config=worker_config, + ps_config=ps_config, + eval_config=eval_config, + protocol=rpc_layer, + ) + except tf.errors.UnknownError as e: + if "Could not start gRPC server" in e.message: + raise unittest.SkipTest("Cannot start std servers.") + else: + raise + return cluster diff --git a/keras/distribute/optimizer_combinations.py b/keras/distribute/optimizer_combinations.py index 8a585a00dea4..9df667080acd 100644 --- a/keras/distribute/optimizer_combinations.py +++ b/keras/distribute/optimizer_combinations.py @@ -14,97 +14,123 @@ # ============================================================================== """Strategy and optimizer combinations for combinations.combine().""" -from keras.optimizers.optimizer_experimental import adam as adam_experimental -from keras.optimizers.optimizer_v2 import adadelta as adadelta_keras_v2 -from keras.optimizers.optimizer_v2 import adagrad as adagrad_keras_v2 -from keras.optimizers.optimizer_v2 import adam as adam_keras_v2 -from keras.optimizers.optimizer_v2 import adamax as adamax_keras_v2 -from keras.optimizers.optimizer_v2 import ftrl as ftrl_keras_v2 -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_keras_v2 -from keras.optimizers.optimizer_v2 import nadam as nadam_keras_v2 -from keras.optimizers.optimizer_v2 import rmsprop as rmsprop_keras_v2 import tensorflow.compat.v2 as tf +from keras.optimizers import adam as adam_experimental +from keras.optimizers.legacy import adadelta as adadelta_keras_v2 +from keras.optimizers.legacy import adagrad as adagrad_keras_v2 +from keras.optimizers.legacy import adam as adam_keras_v2 +from keras.optimizers.legacy import adamax as adamax_keras_v2 +from keras.optimizers.legacy import ftrl as ftrl_keras_v2 +from keras.optimizers.legacy import ( + gradient_descent as gradient_descent_keras_v2, +) +from keras.optimizers.legacy import nadam as nadam_keras_v2 +from keras.optimizers.legacy import rmsprop as rmsprop_keras_v2 -gradient_descent_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( - "GradientDescentV1", - lambda: tf.compat.v1.train.GradientDescentOptimizer(0.001)) +gradient_descent_optimizer_v1_fn = ( + tf.__internal__.test.combinations.NamedObject( + "GradientDescentV1", + lambda: tf.compat.v1.train.GradientDescentOptimizer(0.001), + ) +) adagrad_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( - "AdagradV1", lambda: tf.compat.v1.train.AdagradOptimizer(0.001)) + "AdagradV1", lambda: tf.compat.v1.train.AdagradOptimizer(0.001) +) adam_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( - "AdamV1", lambda: tf.compat.v1.train.AdamOptimizer(0.001, epsilon=1)) + "AdamV1", lambda: tf.compat.v1.train.AdamOptimizer(0.001, epsilon=1) +) ftrl_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( - "FtrlV1", lambda: tf.compat.v1.train.FtrlOptimizer(0.001)) + "FtrlV1", lambda: tf.compat.v1.train.FtrlOptimizer(0.001) +) rmsprop_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( - "RmsPropV1", lambda: tf.compat.v1.train.RMSPropOptimizer(0.001)) + "RmsPropV1", lambda: tf.compat.v1.train.RMSPropOptimizer(0.001) +) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [ - gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn, - ftrl_optimizer_v1_fn, rmsprop_optimizer_v1_fn + gradient_descent_optimizer_v1_fn, + adagrad_optimizer_v1_fn, + ftrl_optimizer_v1_fn, + rmsprop_optimizer_v1_fn, ] adadelta_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)) + "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001) +) adagrad_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)) + "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001) +) adam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)) + "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0) +) adam_experimental_fn = tf.__internal__.test.combinations.NamedObject( - "AdamExperimental", lambda: adam_experimental.Adam(0.001)) + "AdamExperimental", lambda: adam_experimental.Adam(0.001) +) adamax_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0)) + "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0) +) nadam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0)) + "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0) +) ftrl_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)) -gradient_descent_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.001)) + "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001) +) +gradient_descent_optimizer_keras_v2_fn = ( + tf.__internal__.test.combinations.NamedObject( + "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.001) + ) +) rmsprop_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( - "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001)) + "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001) +) # TODO(shiningsun): consider adding the other v2 optimizers optimizers_v2 = [ - gradient_descent_optimizer_keras_v2_fn, adagrad_optimizer_keras_v2_fn + gradient_descent_optimizer_keras_v2_fn, + adagrad_optimizer_keras_v2_fn, ] optimizers_v1_and_v2 = optimizers_v1 + optimizers_v2 def distributions_and_v1_optimizers(): - """A common set of combination with DistributionStrategies and Optimizers.""" - return tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus_no_merge_call, - ], - optimizer_fn=optimizers_v1) + """A common set of combination with DistributionStrategies and + Optimizers.""" + return tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus_no_merge_call, # noqa: E501 + ], + optimizer_fn=optimizers_v1, + ) def distributions_and_v2_optimizers(): - """A common set of combination with DistributionStrategies and Optimizers.""" - return tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus_no_merge_call, - ], - optimizer_fn=optimizers_v2) + """A common set of combination with DistributionStrategies and + Optimizers.""" + return tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus_no_merge_call, # noqa: E501 + ], + optimizer_fn=optimizers_v2, + ) def distributions_and_v1_and_v2_optimizers(): - """A common set of combination with DistributionStrategies and Optimizers.""" - return tf.__internal__.test.combinations.combine( - distribution=[ - tf.__internal__.distribute.combinations.one_device_strategy, - tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, - tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations - .mirrored_strategy_with_two_gpus_no_merge_call, - ], - optimizer_fn=optimizers_v1_and_v2) + """A common set of combination with DistributionStrategies and + Optimizers.""" + return tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.one_device_strategy, + tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, # noqa: E501 + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus_no_merge_call, # noqa: E501 + ], + optimizer_fn=optimizers_v1_and_v2, + ) diff --git a/keras/distribute/parameter_server_evaluation_test.py b/keras/distribute/parameter_server_evaluation_test.py index d1e67ea01705..647d35d85a2a 100644 --- a/keras/distribute/parameter_server_evaluation_test.py +++ b/keras/distribute/parameter_server_evaluation_test.py @@ -16,161 +16,180 @@ import time +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf -from tensorflow.python.distribute import multi_worker_test_base -from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver +# isort: off +from tensorflow.python.distribute import ( + multi_worker_test_base, +) +from tensorflow.python.distribute.cluster_resolver import ( + SimpleClusterResolver, +) from tensorflow.python.ops import resource_variable_ops # TODO(yuefengz): move the following implementation to Keras core. class MeanMetricSpec(tf.TypeSpec): + def __init__(self, config, weights): + self._config = config + self._weights = weights - def __init__(self, config, weights): - self._config = config - self._weights = weights - - def _serialize(self): - return (self._config, self._weights) + def _serialize(self): + return (self._config, self._weights) - @property - def value_type(self): - return MeanMetricAsCompositeTensor + @property + def value_type(self): + return MeanMetricAsCompositeTensor - @property - def _component_specs(self): - return self._weights + @property + def _component_specs(self): + return self._weights - def _to_components(self, value): - return value.weights + def _to_components(self, value): + return value.weights - def _from_components(self, weights): - counter = [0] + def _from_components(self, weights): + counter = [0] - def fetch_variable(next_creator, **kwargs): - del next_creator, kwargs - # TODO(yuefengz): verify the var creation order matches the weights - # property - var = weights[counter[0]] - counter[0] += 1 - return var + def fetch_variable(next_creator, **kwargs): + del next_creator, kwargs + # TODO(yuefengz): verify the var creation order matches the weights + # property + var = weights[counter[0]] + counter[0] += 1 + return var - with tf.variable_creator_scope(fetch_variable): - ret = MeanMetricAsCompositeTensor.from_config(self._config) - assert len(weights) == len(ret.weights) - return ret + with tf.variable_creator_scope(fetch_variable): + ret = MeanMetricAsCompositeTensor.from_config(self._config) + assert len(weights) == len(ret.weights) + return ret -class MeanMetricAsCompositeTensor(keras.metrics.Mean, - tf.__internal__.CompositeTensor): +class MeanMetricAsCompositeTensor( + keras.metrics.Mean, tf.__internal__.CompositeTensor +): + def element_spec(self): + raise NotImplementedError("element_spec not implemented") - def element_spec(self): - raise NotImplementedError("element_spec not implemented") - - @property - def _type_spec(self): - weight_specs = [ - resource_variable_ops.VariableSpec.from_value(w) for w in self.weights] - return MeanMetricSpec(self.get_config(), weight_specs) + @property + def _type_spec(self): + weight_specs = [ + resource_variable_ops.VariableSpec.from_value(w) + for w in self.weights + ] + return MeanMetricSpec(self.get_config(), weight_specs) @test_utils.run_v2_only class EvaluationTest(tf.test.TestCase): - - @classmethod - def setUpClass(cls): - super(EvaluationTest, cls).setUpClass() - cls._cluster = multi_worker_test_base.create_multi_process_cluster( - num_workers=3, num_ps=2, rpc_layer="grpc") - cls._cluster_def = cls._cluster.cluster_resolver.cluster_spec().as_dict() - cluster_resolver = SimpleClusterResolver( - tf.train.ClusterSpec(cls._cluster_def), rpc_layer="grpc") - - cls.strategy = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - cls.cluster_coord = tf.distribute.experimental.coordinator.ClusterCoordinator(cls.strategy) - - @classmethod - def tearDownClass(cls): - cls._cluster.stop() - cls._cluster = None - super(EvaluationTest, cls).tearDownClass() - - def testPassMetricToTfFunction(self): - metric1 = MeanMetricAsCompositeTensor() - metric2 = MeanMetricAsCompositeTensor() - - self.assertEqual(metric1.result(), 0.0) - self.assertEqual(metric2.result(), 0.0) - - tf.nest.assert_same_structure( - metric1, metric2._type_spec, expand_composites=True) - tf.nest.assert_same_structure( - metric1._type_spec, metric2, expand_composites=True) - - @tf.function - def func(m): - m.update_state([1.0, 2.0]) - - func(metric1) - self.assertEqual(metric1.result(), 1.5) - self.assertEqual(metric2.result(), 0.0) - - concrete_f = func.get_concrete_function(metric1._type_spec) - concrete_f(metric2) - self.assertEqual(metric1.result(), 1.5) - self.assertEqual(metric2.result(), 1.5) - - def testModelEvaluatePrototype(self): - - def metric_fn(): - return MeanMetricAsCompositeTensor() - - # TODO(yuefengz): make _create_per_worker_resources public and get rid of - # the type_spec hack. - per_worker_metric = self.cluster_coord._create_per_worker_resources( - metric_fn) - - metric_on_coordinator = metric_fn() - - for metric_remote_value in per_worker_metric._values: - metric_remote_value._type_spec = metric_on_coordinator._type_spec - - def dataset_fn(): - return tf.data.Dataset.range(1024) - - # TODO(yuefengz): integrate it into model.evaluate. - - @tf.function - def eval_fn(total_shard, shard_id, metric): - metric.reset_states() - dataset_shard = dataset_fn().shard(total_shard, shard_id) - for i in dataset_shard: - metric.update_state(i) - - # TODO(yuefengz): we should return the internal state of the metric and - # then use the combiner API. - return metric.result() - - total_shards = 128 - result_remote_values = [] - for i in range(total_shards): - result_remote_values.append( - self.cluster_coord.schedule( - eval_fn, args=(total_shards, i, per_worker_metric))) - - self._cluster.kill_task("worker", 0) - self._cluster.kill_task("worker", 1) - time.sleep(1) - self._cluster.start_task("worker", 0) - self._cluster.start_task("worker", 1) - - results = [r.fetch() for r in result_remote_values] - result = sum(results) / len(results) - self.assertEqual(result, 511.5) + @classmethod + def setUpClass(cls): + super(EvaluationTest, cls).setUpClass() + cls._cluster = multi_worker_test_base.create_multi_process_cluster( + num_workers=3, num_ps=2, rpc_layer="grpc" + ) + cls._cluster_def = ( + cls._cluster.cluster_resolver.cluster_spec().as_dict() + ) + cluster_resolver = SimpleClusterResolver( + tf.train.ClusterSpec(cls._cluster_def), rpc_layer="grpc" + ) + + cls.strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + cls.cluster_coord = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + cls.strategy + ) + ) + + @classmethod + def tearDownClass(cls): + cls._cluster.stop() + cls._cluster = None + super(EvaluationTest, cls).tearDownClass() + + def testPassMetricToTfFunction(self): + metric1 = MeanMetricAsCompositeTensor() + metric2 = MeanMetricAsCompositeTensor() + + self.assertEqual(metric1.result(), 0.0) + self.assertEqual(metric2.result(), 0.0) + + tf.nest.assert_same_structure( + metric1, metric2._type_spec, expand_composites=True + ) + tf.nest.assert_same_structure( + metric1._type_spec, metric2, expand_composites=True + ) + + @tf.function + def func(m): + m.update_state([1.0, 2.0]) + + func(metric1) + self.assertEqual(metric1.result(), 1.5) + self.assertEqual(metric2.result(), 0.0) + + concrete_f = func.get_concrete_function(metric1._type_spec) + concrete_f(metric2) + self.assertEqual(metric1.result(), 1.5) + self.assertEqual(metric2.result(), 1.5) + + def testModelEvaluatePrototype(self): + def metric_fn(): + return MeanMetricAsCompositeTensor() + + # TODO(yuefengz): make _create_per_worker_resources public and get rid + # of the type_spec hack. + per_worker_metric = self.cluster_coord._create_per_worker_resources( + metric_fn + ) + + metric_on_coordinator = metric_fn() + + for metric_remote_value in per_worker_metric._values: + metric_remote_value._type_spec = metric_on_coordinator._type_spec + + def dataset_fn(): + return tf.data.Dataset.range(1024) + + # TODO(yuefengz): integrate it into model.evaluate. + + @tf.function + def eval_fn(total_shard, shard_id, metric): + metric.reset_states() + dataset_shard = dataset_fn().shard(total_shard, shard_id) + for i in dataset_shard: + metric.update_state(i) + + # TODO(yuefengz): we should return the internal state of the metric + # and then use the combiner API. + return metric.result() + + total_shards = 128 + result_remote_values = [] + for i in range(total_shards): + result_remote_values.append( + self.cluster_coord.schedule( + eval_fn, args=(total_shards, i, per_worker_metric) + ) + ) + + self._cluster.kill_task("worker", 0) + self._cluster.kill_task("worker", 1) + time.sleep(1) + self._cluster.start_task("worker", 0) + self._cluster.start_task("worker", 1) + + results = [r.fetch() for r in result_remote_values] + result = sum(results) / len(results) + self.assertEqual(result, 511.5) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/parameter_server_exact_evaluation_test.py b/keras/distribute/parameter_server_exact_evaluation_test.py new file mode 100644 index 000000000000..097fbdffdba3 --- /dev/null +++ b/keras/distribute/parameter_server_exact_evaluation_test.py @@ -0,0 +1,484 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for evaluation using Keras model and ParameterServerStrategy.""" +import threading +import time + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized +from tensorflow.python.platform import tf_logging as logging + +import keras +from keras.metrics import base_metric +from keras.testing_infra import test_utils +from keras.utils import dataset_creator +from keras.utils import tf_utils + +# isort: off +from tensorflow.python.distribute import ( + multi_worker_test_base, +) +from tensorflow.python.distribute.cluster_resolver import ( + SimpleClusterResolver, +) + + +def _aggregate_results(coordinator_metrics, results): + for result in results: + for metric in coordinator_metrics: + if metric.name == "loss": + continue + assert metric.name in result.keys() + metric_result = result[metric.name] + assert len(metric_result) == len(metric.weights) + for weight, val in zip(metric.weights, metric_result): + weight.assign_add(val) + return coordinator_metrics + + +def make_binary_dataset_fn(num_examples, num_data_shards, batch_size): + def dataset_fn(input_context=None): + del input_context + x = np.arange(num_examples) + + def make_batch_with_n_true(n): + return np.concatenate((np.ones(n), np.zeros(batch_size - n))) + + y = np.zeros(num_examples) + batch_idxs = np.arange(num_examples // batch_size) + for shard_idx in range(num_data_shards): + num_correct = shard_idx + # Dataset.shard uses mod sharding, so each shard consists of the + # batches whose index mod (num_data_shards) = shard_idx + batch_idxs_for_shard = np.where( + np.mod(batch_idxs, num_data_shards) == shard_idx + )[0] + for batch_idx in batch_idxs_for_shard: + # Select the individual data elements for this batch + batch_range = range( + batch_idx * batch_size, (batch_idx + 1) * batch_size + ) + num_for_batch = min(num_correct, batch_size) + y[batch_range] = make_batch_with_n_true(num_for_batch) + num_correct -= num_for_batch + + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + + dataset = dataset.batch(batch_size) + return dataset + + return dataset_fn + + +def make_multiclass_dataset_fn( + num_examples, num_data_shards, batch_size, n_classes +): + def dataset_fn(input_context=None): + del input_context + x = np.arange(num_examples) + y = np.mod(np.arange(num_examples), n_classes) + y[y == 0] = 1 + y = tf.convert_to_tensor(y, dtype=tf.int64) + weights = np.random.uniform(size=num_examples) + dataset = tf.data.Dataset.from_tensor_slices((x, y, weights)).batch( + batch_size + ) + return dataset + + return dataset_fn + + +@test_utils.run_v2_only +class ExactEvaluationTest(tf.test.TestCase, parameterized.TestCase): + def setUp(self): + super(ExactEvaluationTest, self).setUp() + self._cluster = multi_worker_test_base.create_multi_process_cluster( + num_workers=5, num_ps=1, rpc_layer="grpc" + ) + self._cluster_def = ( + self._cluster.cluster_resolver.cluster_spec().as_dict() + ) + cluster_resolver = SimpleClusterResolver( + tf.train.ClusterSpec(self._cluster_def), rpc_layer="grpc" + ) + + self.strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + self.cluster_coord = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self.strategy + ) + ) + + def tearDown(self): + super(ExactEvaluationTest, self).tearDown() + self._cluster.stop() + self._cluster = None + + def testDistributedMetrics(self): + coordinator_metrics = [ + keras.metrics.AUC(), + keras.metrics.MeanAbsoluteError(), + ] + + def dataset_fn(): + y_true = np.concatenate((np.zeros(512), np.ones(512))) + y_pred = np.concatenate( + (np.linspace(0, 1, 512), np.linspace(0, 1, 512)) + ) + return tf.data.Dataset.from_tensor_slices((y_true, y_pred)).batch(1) + + @tf.function + def eval_shard_fn(total_shard, shard_id, worker_dataset): + with tf_utils.with_metric_local_vars_scope(): + worker_metrics = [] + for coord_metric in coordinator_metrics: + worker_metrics.append( + base_metric.clone_metric(coord_metric) + ) + + dataset_shard = worker_dataset.shard(total_shard, shard_id) + + for value in dataset_shard: + for worker_metric in worker_metrics: + worker_metric.update_state(*value) + + return { + metric.name: metric.weights for metric in worker_metrics + } + + per_worker_dataset = self.cluster_coord.create_per_worker_dataset( + dataset_fn() + ) + # Trigger dataset creation on workers without creating an iterator + built_dataset = per_worker_dataset.build() + + # needs to be a tf.constant so it doesn't get re-traced each time + # needs to be int64 because that's what Dataset.shard expects + total_shards = tf.constant(100, dtype=tf.int64) + + result_remote_values = [] + logging.info("Scheduling eval closures") + for i in tf.range(total_shards): + result_remote_values.append( + self.cluster_coord.schedule( + eval_shard_fn, + args=(total_shards, i, built_dataset), + ) + ) + + logging.info("Killing 2 workers") + self._cluster.kill_task("worker", 0) + self._cluster.kill_task("worker", 1) + time.sleep(1) + self._cluster.start_task("worker", 0) + self._cluster.start_task("worker", 1) + + self.cluster_coord.join() + results = [r.fetch() for r in result_remote_values] + coordinator_metrics = _aggregate_results(coordinator_metrics, results) + + expected_results = {"auc": 0.5, "mean_absolute_error": 0.5} + for metric in coordinator_metrics: + self.assertAlmostEqual( + metric.result().numpy(), expected_results[metric.name], places=5 + ) + + def testModelAddMetricErrors(self): + class MyModel(keras.Model): + def call(self, x): + self.add_metric( + tf.cast(x >= 0, tf.float32), + aggregation="sum", + name="num_positive", + ) + return tf.cast(tf.add(x, 1), tf.float32) + + dataset = tf.data.Dataset.zip( + (tf.data.Dataset.range(-5, 5), tf.data.Dataset.range(-4, 6)) + ).batch(1) + with self.strategy.scope(): + model = MyModel() + model.compile( + metrics=[keras.metrics.Accuracy()], + loss="binary_crossentropy", + pss_evaluation_shards="auto", + ) + + # run a single train step to compile metrics + model.fit(dataset, steps_per_epoch=1) + with self.assertRaises(ValueError): + model.evaluate(dataset, return_dict=True) + + def testModelInfiniteDatasetErrors(self): + dataset = tf.data.Dataset.range(10).repeat() + with self.strategy.scope(): + model = keras.Model() + model.compile(pss_evaluation_shards="auto") + with self.assertRaisesRegex( + ValueError, + "When performing exact evaluation, the dataset must " + "be finite. Make sure not to call `repeat\(\)` on your " + "dataset.", + ): + model.evaluate(dataset) + + def testTrainingWithVariablesCreatedInFunction(self): + # When metrics are specified via string, they are instantiated in a + # tf.function in the the first pass of the model when update_state is + # called. This use case should not be affected by exact visitation + # guarantee support. + + class MyModel(keras.Model): + @tf.function + def worker_fn(self, y_true, y_pred): + self.compiled_metrics.update_state(y_true, y_pred) + + with self.strategy.scope(): + model = MyModel() + model.compile(metrics=["accuracy"]) + + y_true_0 = tf.convert_to_tensor([[0.0], [0.0], [0.0]]) + y_pred_0 = tf.convert_to_tensor([[0.0], [0.0], [1.0]]) + self.cluster_coord.schedule(model.worker_fn, args=(y_true_0, y_pred_0)) + + y_true_1 = tf.convert_to_tensor([[0.0], [0.0], [0.0]]) + y_pred_1 = tf.convert_to_tensor([[0.0], [1.0], [1.0]]) + self.cluster_coord.schedule(model.worker_fn, args=(y_true_1, y_pred_1)) + + self.cluster_coord.join() + for metric in model.compiled_metrics.metrics: + self.assertAlmostEqual(metric.result().numpy(), 0.5) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + input_type=["dataset", "dataset_creator", "distributed_dataset"], + eval_in_model_fit=[True, False], + use_auto=[True, False], + custom_metric=[True, False], + ) + ) + def testDistributedModelEvaluation( + self, input_type, eval_in_model_fit, use_auto, custom_metric + ): + # Define dataset by batch size, number of shards, and batches per shard + batch_size = 16 + num_data_shards = 32 + batches_per_shard = 4 + num_examples = batch_size * num_data_shards * batches_per_shard + + # Input dataset x: just the sequence of numbers up to the dataset size + # Input dataset y: defined such that each shard has index equal to the + # number of y_i's == True in that shard + expected_acc = sum(range(num_data_shards)) / num_examples + + # The predictions y_pred from this dummy model are fixed to True. This + # way we can control the expected accuracy by just modifying y. + class BinaryModel(keras.Model): + def __call__(self, x, training=False): + return tf.cast(x >= 0, tf.float32) + + class CustomAccuracy(keras.metrics.Metric): + def __init__(self, name="custom_acc", dtype=None): + super().__init__(name, dtype) + self.total = self.add_weight("total", initializer="zeros") + self.count = self.add_weight("count", initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + matches = tf.cast(tf.equal(y_true, y_pred), tf.float32) + count = tf.reduce_sum(matches) + self.count.assign_add(count) + total = tf.cast(tf.size(y_true), tf.float32) + self.total.assign_add(total) + + def result(self): + return self.count / self.total + + def reset_state(self): + self.total.assign(0) + self.count.assign(0) + + def build_metric(): + metric = ( + CustomAccuracy() if custom_metric else keras.metrics.Accuracy() + ) + return metric + + dataset_fn = make_binary_dataset_fn( + num_examples, num_data_shards, batch_size + ) + + loss = "mae" + + logging.info("Local evaluation (exact)") + model = BinaryModel() + model.compile(metrics=[build_metric()], loss=loss) + ground_truth_evaluation = model.evaluate(dataset_fn()) + logging.info( + "Result local evaluation (exact): %s", ground_truth_evaluation + ) + self.assertAlmostEqual(ground_truth_evaluation[1], expected_acc) + # Since outputs are always 0 or 1, MAE loss should == 1 - accuracy + self.assertAlmostEqual(ground_truth_evaluation[0], 1 - expected_acc) + + logging.info("Distributed evaluation (exact)") + if use_auto: + num_shards = "auto" + else: + num_shards = 5 * self.strategy._extended._num_workers + + with self.strategy.scope(): + model = BinaryModel() + model.compile( + metrics=[build_metric()], + loss=loss, + pss_evaluation_shards=num_shards, + ) + + if input_type == "dataset": + train_dataset = dataset_fn() + val_dataset = dataset_fn() + elif input_type == "dataset_creator": + train_dataset = dataset_creator.DatasetCreator(dataset_fn) + val_dataset = dataset_creator.DatasetCreator(dataset_fn) + elif input_type == "distributed_dataset": + train_dataset = self.strategy.experimental_distribute_dataset( + dataset_fn() + ) + val_dataset = self.strategy.experimental_distribute_dataset( + dataset_fn() + ) + + metric_name = "custom_acc" if custom_metric else "accuracy" + expected_results = {metric_name: expected_acc, "loss": 1 - expected_acc} + + def kill_and_revive_in_thread(wait_secs=0.1): + def _kill_and_revive_fn(): + time.sleep(wait_secs) + logging.info("Killing 2 workers") + self._cluster.kill_task("worker", 0) + self._cluster.kill_task("worker", 1) + time.sleep(1) + self._cluster.start_task("worker", 0) + self._cluster.start_task("worker", 1) + + restart_thread = threading.Thread(target=_kill_and_revive_fn) + restart_thread.start() + return restart_thread + + eval_results = {} + if eval_in_model_fit: + kill_and_revive_in_thread() + history = model.fit( + train_dataset, + steps_per_epoch=1, + validation_data=val_dataset, + ) + logging.info( + "History: params (%r), history (%r)", + history.params, + history.history, + ) + eval_results = { + metric.split("val_")[1]: val[-1] + for metric, val in history.history.items() + if metric.startswith("val_") + } + else: + # run a single train step to compile metrics + model.fit(train_dataset, steps_per_epoch=1) + kill_and_revive_in_thread() + eval_results = model.evaluate(val_dataset, return_dict=True) + eval_results = { + metric: val.numpy() for metric, val in eval_results.items() + } + for metric, val in eval_results.items(): + self.assertIn(metric, expected_results) + self.assertAlmostEqual(val, expected_results[metric], places=5) + + def testDistributedMulticlassWeightedEvaluation(self): + n_classes = 5 + + # Define dataset by batch size, number of shards, and batches per shard + batch_size = n_classes * 2 + num_data_shards = 32 + batches_per_shard = 4 + num_examples = batch_size * num_data_shards * batches_per_shard + expected_acc = 4 / 5 + + class MulticlassModel(keras.Model): + def __call__(self, x, training=False): + # e.g. x = 6 -> y_pred = [0, 1, 0, 0, 0] + return tf.squeeze( + tf.one_hot( + indices=[tf.math.floormod(x, n_classes)], + depth=n_classes, + ) + ) + + dataset_fn = make_multiclass_dataset_fn( + num_examples, num_data_shards, batch_size, n_classes + ) + + model = MulticlassModel() + model.compile( + metrics=[ + keras.metrics.SparseCategoricalAccuracy(), + keras.metrics.SparseCategoricalCrossentropy(), + ], + weighted_metrics=[keras.metrics.SparseCategoricalCrossentropy()], + loss="sparse_categorical_crossentropy", + ) + eval_dataset = dataset_fn() + ground_truth_evaluation = model.evaluate(eval_dataset, return_dict=True) + self.assertAlmostEqual( + ground_truth_evaluation["sparse_categorical_accuracy"], expected_acc + ) + + with self.strategy.scope(): + model = MulticlassModel() + model.compile( + metrics=[ + keras.metrics.SparseCategoricalAccuracy(), + keras.metrics.SparseCategoricalCrossentropy(), + ], + weighted_metrics=[ + keras.metrics.SparseCategoricalCrossentropy() + ], + loss="sparse_categorical_crossentropy", + pss_evaluation_shards=num_data_shards, + ) + + # run a single train step to compile metrics + train_dataset = dataset_fn() + model.fit(train_dataset, steps_per_epoch=1) + + eval_results = model.evaluate(eval_dataset, return_dict=True) + eval_results = { + metric: val.numpy() for metric, val in eval_results.items() + } + for metric, val in eval_results.items(): + self.assertIn(metric, ground_truth_evaluation) + self.assertAlmostEqual( + val, ground_truth_evaluation[metric], places=4 + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/distribute/saved_model_mixed_api_test.py b/keras/distribute/saved_model_mixed_api_test.py index fb901ca3a9ca..0aaeed7c1143 100644 --- a/keras/distribute/saved_model_mixed_api_test.py +++ b/keras/distribute/saved_model_mixed_api_test.py @@ -20,61 +20,81 @@ tf.saved_model.save(). """ +import tensorflow.compat.v2 as tf + from keras.distribute import saved_model_test_base as test_base -from keras.saving import save +from keras.saving.legacy import save from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf -_DEFAULT_FUNCTION_KEY = 'serving_default' +_DEFAULT_FUNCTION_KEY = "serving_default" @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class SavedModelSaveAndLoadTest(test_base.TestSavedModelBase): + def setUp(self): + self._root_dir = "saved_model_save_load" + super().setUp() - def setUp(self): - self._root_dir = 'saved_model_save_load' - super().setUp() - - def _save_model(self, model, saved_dir): - save.save_model(model, saved_dir, save_format='tf') + def _save_model(self, model, saved_dir): + save.save_model(model, saved_dir, save_format="tf") - def _load_and_run_model(self, - distribution, - saved_dir, - predict_dataset, - output_name='output_1'): - return test_base.load_and_run_with_saved_model_api(distribution, saved_dir, - predict_dataset, - output_name) + def _load_and_run_model( + self, distribution, saved_dir, predict_dataset, output_name="output_1" + ): + return test_base.load_and_run_with_saved_model_api( + distribution, saved_dir, predict_dataset, output_name + ) - @tf.__internal__.distribute.combinations.generate(test_base.simple_models_with_strategies()) - def test_save_no_strategy_restore_strategy(self, model_and_input, - distribution): - self.run_test_save_no_strategy_restore_strategy( - model_and_input, distribution) + @tf.__internal__.distribute.combinations.generate( + test_base.simple_models_with_strategies() + ) + def test_save_no_strategy_restore_strategy( + self, model_and_input, distribution + ): + self.run_test_save_no_strategy_restore_strategy( + model_and_input, distribution + ) - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategies(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_no_strategy(self, model_and_input, - distribution, save_in_scope): - self.run_test_save_strategy_restore_no_strategy( - model_and_input, distribution, save_in_scope) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategies(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_no_strategy( + self, model_and_input, distribution, save_in_scope + ): + self.run_test_save_strategy_restore_no_strategy( + model_and_input, distribution, save_in_scope + ) - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategy_pairs(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_strategy(self, model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope): - self.run_test_save_strategy_restore_strategy(model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope) + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategy_pairs(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_strategy( + self, + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ): + self.run_test_save_strategy_restore_strategy( + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ) -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/distribute/saved_model_save_load_test.py b/keras/distribute/saved_model_save_load_test.py index da91996aa17c..2ca75d238a83 100644 --- a/keras/distribute/saved_model_save_load_test.py +++ b/keras/distribute/saved_model_save_load_test.py @@ -14,161 +14,214 @@ # ============================================================================== """Tests for saving and loading using tf's saved_model APIs with DS.""" +import os + import tensorflow.compat.v2 as tf -import os -from keras.testing_infra import test_utils from keras.distribute import model_combinations from keras.distribute import saved_model_test_base as test_base +from keras.testing_infra import test_utils @test_utils.run_v2_only @test_utils.run_all_without_tensor_float_32( - 'Uses Dense layers, which call matmul') + "Uses Dense layers, which call matmul" +) class SavedModelKerasModelTest(test_base.TestSavedModelBase): - - def setUp(self): - self._root_dir = 'saved_model_save_load' - super().setUp() - - def _save_model(self, model, saved_dir): - tf.saved_model.save(model, saved_dir) - - def _load_and_run_model(self, - distribution, - saved_dir, - predict_dataset, - output_name='output_1'): - return test_base.load_and_run_with_saved_model_api(distribution, saved_dir, - predict_dataset, - output_name) - - @tf.__internal__.distribute.combinations.generate(test_base.simple_models_with_strategies()) - def test_save_no_strategy_restore_strategy(self, model_and_input, - distribution): - self.run_test_save_no_strategy_restore_strategy( - model_and_input, distribution) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategies(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_no_strategy(self, model_and_input, - distribution, save_in_scope): - self.run_test_save_strategy_restore_no_strategy( - model_and_input, distribution, save_in_scope) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategy_pairs(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_strategy(self, model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope): - self.run_test_save_strategy_restore_strategy(model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.simple_models_with_strategies(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_no_variable_device_placement(self, model_and_input, distribution, - save_in_scope): - saved_dir = self.run_test_save_strategy(model_and_input, distribution, - save_in_scope) - func = tf.saved_model.load(saved_dir) - concrete_function = func.signatures[test_base._DEFAULT_FUNCTION_KEY] - for f in concrete_function.graph.as_graph_def().library.function: - for n in f.node_def: - if n.op == 'ReadVariableOp': - self.assertEmpty(n.device) + def setUp(self): + self._root_dir = "saved_model_save_load" + super().setUp() + + def _save_model(self, model, saved_dir): + tf.saved_model.save(model, saved_dir) + + def _load_and_run_model( + self, distribution, saved_dir, predict_dataset, output_name="output_1" + ): + return test_base.load_and_run_with_saved_model_api( + distribution, saved_dir, predict_dataset, output_name + ) + + @tf.__internal__.distribute.combinations.generate( + test_base.simple_models_with_strategies() + ) + def test_save_no_strategy_restore_strategy( + self, model_and_input, distribution + ): + self.run_test_save_no_strategy_restore_strategy( + model_and_input, distribution + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategies(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_no_strategy( + self, model_and_input, distribution, save_in_scope + ): + self.run_test_save_strategy_restore_no_strategy( + model_and_input, distribution, save_in_scope + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategy_pairs(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_strategy( + self, + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ): + self.run_test_save_strategy_restore_strategy( + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.simple_models_with_strategies(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_no_variable_device_placement( + self, model_and_input, distribution, save_in_scope + ): + saved_dir = self.run_test_save_strategy( + model_and_input, distribution, save_in_scope + ) + func = tf.saved_model.load(saved_dir) + concrete_function = func.signatures[test_base._DEFAULT_FUNCTION_KEY] + for f in concrete_function.graph.as_graph_def().library.function: + for n in f.node_def: + if n.op == "ReadVariableOp": + self.assertEmpty(n.device) @test_utils.run_v2_only class SavedModelTFModuleTest(test_base.TestSavedModelBase): - - def setUp(self): - self._root_dir = 'saved_model_save_load' - super().setUp() - - def _train_model(self, model, x_train, y_train, batch_size): - pass - - def _predict_with_model(self, distribution, model, predict_dataset): - if distribution: - dist_predict_dataset = distribution.experimental_distribute_dataset( - predict_dataset) - per_replica_predict_data = next(iter(dist_predict_dataset)) - result = distribution.run(model, args=(per_replica_predict_data,)) - # Convert the per_replica value to a list, then concatenate them - reduced = distribution.experimental_local_results(result) - concat = tf.concat(reduced, 0) - return concat - else: - return model(next(iter(predict_dataset))) - - def _save_model(self, model, saved_dir): - call = model.__call__.get_concrete_function(tf.TensorSpec(None)) - tf.saved_model.save(model, saved_dir, signatures=call) - - def _load_and_run_model(self, - distribution, - saved_dir, - predict_dataset, - output_name='output_1'): - del output_name - model = tf.saved_model.load(saved_dir) - return self._predict_with_model(distribution, model, predict_dataset) - - @tf.__internal__.distribute.combinations.generate(test_base.tfmodule_models_with_strategies()) - def test_save_no_strategy_restore_strategy(self, model_and_input, - distribution): - self.run_test_save_no_strategy_restore_strategy( - model_and_input, distribution) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.tfmodule_models_with_strategies(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_no_strategy( - self, model_and_input, distribution, save_in_scope): - self.run_test_save_strategy_restore_no_strategy( - model_and_input, distribution, save_in_scope) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.times(test_base.tfmodule_models_with_strategy_pairs(), - tf.__internal__.test.combinations.combine(save_in_scope=[True, False]))) - def test_save_strategy_restore_strategy(self, model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope): - self.run_test_save_strategy_restore_strategy(model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - model_and_input=[model_combinations.simple_tfmodule_model], - distribution=test_base.strategies + - [tf.__internal__.distribute.combinations.cloud_tpu_strategy])) - def test_save_load_io_device(self, model_and_input, distribution): - saved_dir = os.path.join(self.get_temp_dir(), 'io_device') - with distribution.scope(): - model = model_and_input.get_model() - x_train, y_train, _ = model_and_input.get_data() - batch_size = model_and_input.get_batch_size() - self._train_model(model, x_train, y_train, batch_size) - call = model.__call__.get_concrete_function(tf.TensorSpec(None)) - save_options = tf.saved_model.SaveOptions( - experimental_io_device='/job:localhost') - tf.saved_model.save(model, saved_dir, signatures=call, options=save_options) - load_options = tf.saved_model.LoadOptions( - experimental_io_device='/job:localhost') - # Check that the model can be loaded and training continued without error. - with distribution.scope(): - loaded_model = tf.saved_model.load(saved_dir, options=load_options) - self._train_model(loaded_model, x_train, y_train, batch_size) - - -if __name__ == '__main__': - tf.test.main() + def setUp(self): + self._root_dir = "saved_model_save_load" + super().setUp() + + def _train_model(self, model, x_train, y_train, batch_size): + pass + + def _predict_with_model(self, distribution, model, predict_dataset): + if distribution: + dist_predict_dataset = distribution.experimental_distribute_dataset( + predict_dataset + ) + per_replica_predict_data = next(iter(dist_predict_dataset)) + result = distribution.run(model, args=(per_replica_predict_data,)) + # Convert the per_replica value to a list, then concatenate them + reduced = distribution.experimental_local_results(result) + concat = tf.concat(reduced, 0) + return concat + else: + return model(next(iter(predict_dataset))) + + def _save_model(self, model, saved_dir): + call = model.__call__.get_concrete_function(tf.TensorSpec(None)) + tf.saved_model.save(model, saved_dir, signatures=call) + + def _load_and_run_model( + self, distribution, saved_dir, predict_dataset, output_name="output_1" + ): + del output_name + model = tf.saved_model.load(saved_dir) + return self._predict_with_model(distribution, model, predict_dataset) + + @tf.__internal__.distribute.combinations.generate( + test_base.tfmodule_models_with_strategies() + ) + def test_save_no_strategy_restore_strategy( + self, model_and_input, distribution + ): + self.run_test_save_no_strategy_restore_strategy( + model_and_input, distribution + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.tfmodule_models_with_strategies(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_no_strategy( + self, model_and_input, distribution, save_in_scope + ): + self.run_test_save_strategy_restore_no_strategy( + model_and_input, distribution, save_in_scope + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.times( + test_base.tfmodule_models_with_strategy_pairs(), + tf.__internal__.test.combinations.combine( + save_in_scope=[True, False] + ), + ) + ) + def test_save_strategy_restore_strategy( + self, + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ): + self.run_test_save_strategy_restore_strategy( + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + model_and_input=[model_combinations.simple_tfmodule_model], + distribution=test_base.strategies + + [tf.__internal__.distribute.combinations.cloud_tpu_strategy], + ) + ) + def test_save_load_io_device(self, model_and_input, distribution): + saved_dir = os.path.join(self.get_temp_dir(), "io_device") + with distribution.scope(): + model = model_and_input.get_model() + x_train, y_train, _ = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + self._train_model(model, x_train, y_train, batch_size) + call = model.__call__.get_concrete_function(tf.TensorSpec(None)) + save_options = tf.saved_model.SaveOptions( + experimental_io_device="/job:localhost" + ) + tf.saved_model.save( + model, saved_dir, signatures=call, options=save_options + ) + load_options = tf.saved_model.LoadOptions( + experimental_io_device="/job:localhost" + ) + # Check that the model can be loaded and training continued without + # error. + with distribution.scope(): + loaded_model = tf.saved_model.load(saved_dir, options=load_options) + self._train_model(loaded_model, x_train, y_train, batch_size) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/distribute/saved_model_test_base.py b/keras/distribute/saved_model_test_base.py index 576a6d836021..09e8e5aff184 100644 --- a/keras/distribute/saved_model_test_base.py +++ b/keras/distribute/saved_model_test_base.py @@ -16,14 +16,14 @@ import os -from absl.testing import parameterized -from keras.distribute import model_combinations import numpy as np - import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.distribute import model_combinations _RANDOM_SEED = 1337 -_DEFAULT_FUNCTION_KEY = 'serving_default' +_DEFAULT_FUNCTION_KEY = "serving_default" _TOLERANCE = 1e-30 # TPU uses bfloat16 for computation in hardware underlying, so it has less @@ -49,219 +49,239 @@ tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, tf.__internal__.distribute.combinations.tpu_strategy, tf.__internal__.distribute.combinations.tpu_strategy_packed_var, - tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, + tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus, # noqa: E501 ] def simple_models_with_strategies(): - return tf.__internal__.test.combinations.combine( - model_and_input=simple_models, - distribution=strategies, - mode=['eager']) + return tf.__internal__.test.combinations.combine( + model_and_input=simple_models, distribution=strategies, mode=["eager"] + ) def simple_models_with_strategy_pairs(): - return tf.__internal__.test.combinations.combine( - model_and_input=simple_models, - distribution_for_saving=strategies, - distribution_for_restoring=strategies, - mode=['eager']) + return tf.__internal__.test.combinations.combine( + model_and_input=simple_models, + distribution_for_saving=strategies, + distribution_for_restoring=strategies, + mode=["eager"], + ) def tfmodule_models_with_strategies(): - return tf.__internal__.test.combinations.combine( - model_and_input=[model_combinations.simple_tfmodule_model], - distribution=strategies, - mode=['eager']) + return tf.__internal__.test.combinations.combine( + model_and_input=[model_combinations.simple_tfmodule_model], + distribution=strategies, + mode=["eager"], + ) def tfmodule_models_with_strategy_pairs(): - return tf.__internal__.test.combinations.combine( - model_and_input=[model_combinations.simple_tfmodule_model], - distribution_for_saving=strategies, - distribution_for_restoring=strategies, - mode=['eager']) - - -def load_and_run_with_saved_model_api(distribution, saved_dir, predict_dataset, - output_name): - """Loads a saved_model using tf.saved_model API, and runs it.""" - func = tf.saved_model.load(saved_dir) - if distribution: - dist_predict_dataset = distribution.experimental_distribute_dataset( - predict_dataset) - per_replica_predict_data = next(iter(dist_predict_dataset)) - result = distribution.run( - func.signatures[_DEFAULT_FUNCTION_KEY], - args=(per_replica_predict_data,)) - result = result[output_name] - - # Convert the per_replica value to a list, then concatenate them - reduced = distribution.experimental_local_results(result) - concat = tf.concat(reduced, 0) - return concat - else: - result = func.signatures[_DEFAULT_FUNCTION_KEY](next(iter(predict_dataset))) - return result[output_name] - - -class TestSavedModelBase(tf.test.TestCase, parameterized.TestCase): - """Base class for testing saving/loading with DS.""" - - def setUp(self): - np.random.seed(_RANDOM_SEED) - tf.compat.v1.set_random_seed(_RANDOM_SEED) - self._root_dir = 'base' - super().setUp() - - def _save_model(self, model, saved_dir): - """Save the given model to the given saved_dir. - - This method needs to be implemented by the subclasses. - - Args: - model: a keras model object to save. - saved_dir: a string representing the path to save the keras model - """ - raise NotImplementedError('must be implemented in descendants') - - def _load_and_run_model(self, - distribution, - saved_dir, - predict_dataset, - output_name='output_1'): - """Load the model and run 1 step of predict with it. - - This method must be implemented by the subclasses. - - Args: - distribution: the distribution strategy used to load the model. None if no - distribution strategy is used - saved_dir: the string representing the path where the model is saved. - predict_dataset: the data used to do the predict on the model for - cross_replica context. - output_name: the string representing the name of the output layer of the - model. - """ - - raise NotImplementedError('must be implemented in descendants') - - def _train_model(self, model, x_train, y_train, batch_size): - training_dataset = tf.data.Dataset.from_tensor_slices( - (x_train, y_train)) - training_dataset = training_dataset.repeat() - training_dataset = training_dataset.batch(batch_size) - - # Train the model for 1 epoch - model.fit(x=training_dataset, epochs=1, steps_per_epoch=100) - - def _predict_with_model(self, distribution, model, predict_dataset): - return model.predict(predict_dataset, steps=PREDICT_STEPS) - - def _get_predict_dataset(self, x_predict, batch_size): - predict_dataset = tf.data.Dataset.from_tensor_slices(x_predict) - predict_dataset = predict_dataset.repeat() - predict_dataset = predict_dataset.batch(batch_size) - return predict_dataset - - def run_test_save_no_strategy_restore_strategy(self, model_and_input, - distribution): - """Save a model without DS, and restore it with DS.""" - - saved_dir = os.path.join(self.get_temp_dir(), '0') - - model = model_and_input.get_model() - x_train, y_train, x_predict = model_and_input.get_data() - batch_size = model_and_input.get_batch_size() - predict_dataset = self._get_predict_dataset(x_predict, batch_size) - - self._train_model(model, x_train, y_train, batch_size) - result_before_save = self._predict_with_model(None, model, predict_dataset) - - self._save_model(model, saved_dir) - - with distribution.scope(): - result_after_save = self._load_and_run_model( - distribution=distribution, - saved_dir=saved_dir, - predict_dataset=predict_dataset) - - self.assertAllClose(result_before_save, result_after_save) - - def run_test_save_strategy_restore_no_strategy(self, model_and_input, - distribution, save_in_scope): - """Save a model with DS, and restore it without DS.""" - - saved_dir = os.path.join(self.get_temp_dir(), '1') - - with distribution.scope(): - model = model_and_input.get_model() - x_train, y_train, x_predict = model_and_input.get_data() - batch_size = model_and_input.get_batch_size() - - self._train_model(model, x_train, y_train, batch_size) - predict_dataset = self._get_predict_dataset(x_predict, batch_size) - result_before_save = self._predict_with_model( - distribution, model, predict_dataset) - - if save_in_scope: - with distribution.scope(): - self._save_model(model, saved_dir) - else: - self._save_model(model, saved_dir) - - load_result = self._load_and_run_model( - distribution=None, - saved_dir=saved_dir, - predict_dataset=predict_dataset) - - self.assertAllClose(result_before_save, load_result) - - def run_test_save_strategy_restore_strategy(self, model_and_input, - distribution_for_saving, - distribution_for_restoring, - save_in_scope): - """Save a model with DS, and restore it with potentially different DS.""" - saved_dir = os.path.join(self.get_temp_dir(), '2') - - with distribution_for_saving.scope(): - model = model_and_input.get_model() - x_train, y_train, x_predict = model_and_input.get_data() - batch_size = model_and_input.get_batch_size() - - self._train_model(model, x_train, y_train, batch_size) - predict_dataset = self._get_predict_dataset(x_predict, batch_size) - result_before_save = self._predict_with_model( - distribution_for_saving, model, predict_dataset) - - if save_in_scope: - with distribution_for_saving.scope(): - self._save_model(model, saved_dir) + return tf.__internal__.test.combinations.combine( + model_and_input=[model_combinations.simple_tfmodule_model], + distribution_for_saving=strategies, + distribution_for_restoring=strategies, + mode=["eager"], + ) + + +def load_and_run_with_saved_model_api( + distribution, saved_dir, predict_dataset, output_name +): + """Loads a saved_model using tf.saved_model API, and runs it.""" + func = tf.saved_model.load(saved_dir) + if distribution: + dist_predict_dataset = distribution.experimental_distribute_dataset( + predict_dataset + ) + per_replica_predict_data = next(iter(dist_predict_dataset)) + result = distribution.run( + func.signatures[_DEFAULT_FUNCTION_KEY], + args=(per_replica_predict_data,), + ) + result = result[output_name] + + # Convert the per_replica value to a list, then concatenate them + reduced = distribution.experimental_local_results(result) + concat = tf.concat(reduced, 0) + return concat else: - self._save_model(model, saved_dir) - - with distribution_for_restoring.scope(): + result = func.signatures[_DEFAULT_FUNCTION_KEY]( + next(iter(predict_dataset)) + ) + return result[output_name] - load_result = self._load_and_run_model( - distribution=distribution_for_restoring, - saved_dir=saved_dir, - predict_dataset=predict_dataset) - self.assertAllClose(result_before_save, load_result) - - def run_test_save_strategy(self, model_and_input, - distribution, save_in_scope): - """Save a model with DS.""" - saved_dir = os.path.join(self.get_temp_dir(), '3') - with distribution.scope(): - model = model_and_input.get_model() - x_train, y_train, _ = model_and_input.get_data() - batch_size = model_and_input.get_batch_size() - self._train_model(model, x_train, y_train, batch_size) +class TestSavedModelBase(tf.test.TestCase, parameterized.TestCase): + """Base class for testing saving/loading with DS.""" + + def setUp(self): + np.random.seed(_RANDOM_SEED) + tf.compat.v1.set_random_seed(_RANDOM_SEED) + self._root_dir = "base" + super().setUp() + + def _save_model(self, model, saved_dir): + """Save the given model to the given saved_dir. + + This method needs to be implemented by the subclasses. + + Args: + model: a keras model object to save. + saved_dir: a string representing the path to save the keras model + """ + raise NotImplementedError("must be implemented in descendants") + + def _load_and_run_model( + self, distribution, saved_dir, predict_dataset, output_name="output_1" + ): + """Load the model and run 1 step of predict with it. + + This method must be implemented by the subclasses. + + Args: + distribution: the distribution strategy used to load the model. None + if no distribution strategy is used + saved_dir: the string representing the path where the model is saved. + predict_dataset: the data used to do the predict on the model for + cross_replica context. + output_name: the string representing the name of the output layer of + the model. + """ + + raise NotImplementedError("must be implemented in descendants") + + def _train_model(self, model, x_train, y_train, batch_size): + training_dataset = tf.data.Dataset.from_tensor_slices( + (x_train, y_train) + ) + training_dataset = training_dataset.repeat() + training_dataset = training_dataset.batch(batch_size) + + # Train the model for 1 epoch + model.fit(x=training_dataset, epochs=1, steps_per_epoch=100) + + def _predict_with_model(self, distribution, model, predict_dataset): + return model.predict(predict_dataset, steps=PREDICT_STEPS) + + def _get_predict_dataset(self, x_predict, batch_size): + predict_dataset = tf.data.Dataset.from_tensor_slices(x_predict) + predict_dataset = predict_dataset.repeat() + predict_dataset = predict_dataset.batch(batch_size) + return predict_dataset + + def run_test_save_no_strategy_restore_strategy( + self, model_and_input, distribution + ): + """Save a model without DS, and restore it with DS.""" + + saved_dir = os.path.join(self.get_temp_dir(), "0") + + model = model_and_input.get_model() + x_train, y_train, x_predict = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + predict_dataset = self._get_predict_dataset(x_predict, batch_size) + + self._train_model(model, x_train, y_train, batch_size) + result_before_save = self._predict_with_model( + None, model, predict_dataset + ) - if save_in_scope: - with distribution.scope(): self._save_model(model, saved_dir) - else: - self._save_model(model, saved_dir) - return saved_dir + + with distribution.scope(): + result_after_save = self._load_and_run_model( + distribution=distribution, + saved_dir=saved_dir, + predict_dataset=predict_dataset, + ) + + self.assertAllClose(result_before_save, result_after_save) + + def run_test_save_strategy_restore_no_strategy( + self, model_and_input, distribution, save_in_scope + ): + """Save a model with DS, and restore it without DS.""" + + saved_dir = os.path.join(self.get_temp_dir(), "1") + + with distribution.scope(): + model = model_and_input.get_model() + x_train, y_train, x_predict = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + + self._train_model(model, x_train, y_train, batch_size) + predict_dataset = self._get_predict_dataset(x_predict, batch_size) + result_before_save = self._predict_with_model( + distribution, model, predict_dataset + ) + + if save_in_scope: + with distribution.scope(): + self._save_model(model, saved_dir) + else: + self._save_model(model, saved_dir) + + load_result = self._load_and_run_model( + distribution=None, + saved_dir=saved_dir, + predict_dataset=predict_dataset, + ) + + self.assertAllClose(result_before_save, load_result) + + def run_test_save_strategy_restore_strategy( + self, + model_and_input, + distribution_for_saving, + distribution_for_restoring, + save_in_scope, + ): + """Save a model with DS, and restore it with potentially different + DS.""" + saved_dir = os.path.join(self.get_temp_dir(), "2") + + with distribution_for_saving.scope(): + model = model_and_input.get_model() + x_train, y_train, x_predict = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + + self._train_model(model, x_train, y_train, batch_size) + predict_dataset = self._get_predict_dataset(x_predict, batch_size) + result_before_save = self._predict_with_model( + distribution_for_saving, model, predict_dataset + ) + + if save_in_scope: + with distribution_for_saving.scope(): + self._save_model(model, saved_dir) + else: + self._save_model(model, saved_dir) + + with distribution_for_restoring.scope(): + + load_result = self._load_and_run_model( + distribution=distribution_for_restoring, + saved_dir=saved_dir, + predict_dataset=predict_dataset, + ) + + self.assertAllClose(result_before_save, load_result) + + def run_test_save_strategy( + self, model_and_input, distribution, save_in_scope + ): + """Save a model with DS.""" + saved_dir = os.path.join(self.get_temp_dir(), "3") + with distribution.scope(): + model = model_and_input.get_model() + x_train, y_train, _ = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + self._train_model(model, x_train, y_train, batch_size) + + if save_in_scope: + with distribution.scope(): + self._save_model(model, saved_dir) + else: + self._save_model(model, saved_dir) + return saved_dir diff --git a/keras/distribute/sharded_variable_test.py b/keras/distribute/sharded_variable_test.py index 7b9b8eda6cd1..acd1e6fd3bf6 100644 --- a/keras/distribute/sharded_variable_test.py +++ b/keras/distribute/sharded_variable_test.py @@ -14,406 +14,458 @@ # ============================================================================== """Tests for ClusterCoordinator and Keras models.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras from keras.distribute import multi_worker_testing_utils from keras.distribute import strategy_combinations from keras.engine import base_layer -import numpy as np -import tensorflow.compat.v2 as tf class ShardedVariableTest(tf.test.TestCase, parameterized.TestCase): - - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.strategy = tf.distribute.experimental.ParameterServerStrategy( - multi_worker_testing_utils.make_parameter_server_cluster(3, 2), - variable_partitioner=tf.distribute.experimental.partitioners - .FixedShardsPartitioner(2)) - - def assert_list_all_equal(self, list1, list2): - """Used in lieu of `assertAllEqual`. - - This is used to replace standard `assertAllEqual` for the cases where - `list1` and `list2` contain `AggregatingVariable`. Lists with - `AggregatingVariable` are not convertible to numpy array via `np.array` - calls as numpy would raise `ValueError: setting an array element with a - sequence.` - - Args: - list1: The first list to compare equality. - list2: The second list to compare equality. - """ - for lhs, rhs in zip(list1, list2): - self.assertEqual(lhs, rhs) - - def test_keras_layer_setattr(self): - - class Layer(base_layer.Layer): - - def __init__(self): - super().__init__() - self.w = tf.Variable([0, 1]) - self.b = tf.Variable([2, 3], trainable=False) - - with self.strategy.scope(): - layer = Layer() - - self.assertLen(layer.trainable_weights, 2) - self.assertEqual(layer.trainable_weights[0], [0]) - self.assertEqual(layer.trainable_weights[1], [1]) - self.assertLen(layer.non_trainable_weights, 2) - self.assertEqual(layer.non_trainable_weights[0], [2]) - self.assertEqual(layer.non_trainable_weights[1], [3]) - self.assert_list_all_equal( - layer.weights, layer.trainable_weights + layer.non_trainable_weights) - self.assert_list_all_equal(layer.trainable_weights, - layer.trainable_variables) - self.assert_list_all_equal(layer.weights, layer.variables) - - checkpoint_deps = set(layer._trackable_children().values()) - self.assertEqual(checkpoint_deps, set([layer.w, layer.b])) - - def test_keras_layer_add_weight(self): - - class Layer(base_layer.Layer): - - def __init__(self): - super().__init__() - self.w = self.add_weight( - shape=(2,), - initializer=lambda shape, dtype: tf.constant([0., 1.],), - trainable=True) - self.b = self.add_weight( - shape=(2,), - initializer=lambda shape, dtype: tf.constant([2., 3.]), - trainable=False) - - with self.strategy.scope(): - layer = Layer() - - self.assertLen(layer.trainable_weights, 2) - self.assertEqual(layer.trainable_weights[0], [0.]) - self.assertEqual(layer.trainable_weights[1], [1.]) - self.assertLen(layer.non_trainable_weights, 2) - self.assertEqual(layer.non_trainable_weights[0], [2.]) - self.assertEqual(layer.non_trainable_weights[1], [3.]) - self.assert_list_all_equal( - layer.weights, layer.trainable_weights + layer.non_trainable_weights) - self.assert_list_all_equal(layer.trainable_weights, - layer.trainable_variables) - self.assert_list_all_equal(layer.weights, layer.variables) - - checkpoint_deps = set(layer._trackable_children().values()) - self.assertEqual(checkpoint_deps, set([layer.w, layer.b])) - - def test_keras_metrics(self): - with self.strategy.scope(): - fp = keras.metrics.FalsePositives(thresholds=[0.2, 0.5, 0.7, 0.8]) - auc = keras.metrics.AUC(num_thresholds=10) - - @tf.function - def update(): - fp.update_state([0., 1., 0., 0.], [0., 0., 0.3, 0.9]) - auc.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) - - @tf.function - def reset(): - fp.reset_state() - auc.reset_state() - - update() - self.assertEqual(auc.result(), 0.75) - self.assertAllEqual(fp.result(), [2., 1., 1., 1.]) - reset() - self.assertEqual(auc.result(), 0.0) - self.assertAllEqual(fp.result(), [0., 0., 0., 0.]) - - self.assertTrue(hasattr(auc.true_positives, 'variables')) - self.assertTrue(hasattr(fp.accumulator, 'variables')) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - shard_config=[[2, 2], [2, 3], [3, 2], [2, 1], [1, 1], [1, 2], [1, 3]], - model_type=['dense', 'embedding'], - )) - def test_saved_model_combined(self, shard_config, model_type): - """Test saving and loading models with various fixed numbers of shards. - - Args: - shard_config: The number of shards to use per variable before and after - loading. For example, [1, 3] means to create and save the model with 1 - shard (i.e., no variable partitioning), and load it into 3 shards per - variable. - model_type: Either 'dense' or 'embedding', which simple model to test. - """ - - def create_embedding_model(): - inputs = keras.layers.Input(shape=(6,)) - embedding = keras.layers.Embedding(output_dim=2, input_dim=6) - outputs = embedding(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='adam', loss='mean_squared_error') - return model - - def create_dense_model(): - inputs = keras.layers.Input(shape=(6,)) - outputs = keras.layers.Dense(6)(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='adam', loss='mean_squared_error') - return model - - # Maybe create new strategy with different number of shards - if shard_config[0] > 2: - strategy = tf.distribute.experimental.ParameterServerStrategy( - multi_worker_testing_utils.make_parameter_server_cluster(3, 3), - variable_partitioner=tf.distribute.experimental.partitioners - .FixedShardsPartitioner(shard_config[0])) - elif shard_config[0] == 2: - strategy = self.strategy - else: - # Just one shard, so use default strategy - strategy = tf.distribute.get_strategy() - - x = tf.cast(tf.expand_dims(tf.range(6), 0), tf.float32) - with strategy.scope(): - model = ( - create_dense_model() - if model_type == 'dense' else create_embedding_model()) - expect = model(x) - - # Dense layers have two variables (kernel and bias), embedding layers have 1 - n_expected_variables = shard_config[0] * (2 if model_type == 'dense' else 1) - self.assertLen(model.variables, n_expected_variables) - model_weights = [v.numpy() for v in model.variables] - - saved_dir = self.get_temp_dir() - model.save(saved_dir) - - if shard_config[1] > 2: - strategy2 = tf.distribute.experimental.ParameterServerStrategy( - multi_worker_testing_utils.make_parameter_server_cluster(3, 3), - variable_partitioner=tf.distribute.experimental.partitioners - .FixedShardsPartitioner(shard_config[1])) - elif shard_config[1] == 2: - strategy2 = self.strategy - else: - # Just one shard, so use default strategy - strategy2 = tf.distribute.get_strategy() - - with strategy2.scope(): - loaded_model = keras.models.load_model(saved_dir) - got = loaded_model(x) - - self.assertAllClose(got, expect) - n_expected_variables = shard_config[1] * (2 - if model_type == 'dense' else 1) - self.assertLen(loaded_model.variables, n_expected_variables) - loaded_model_weights = [v.numpy() for v in loaded_model.variables] - self.assertAllClose( - np.concatenate([w.flatten() for w in model_weights]), - np.concatenate([w.flatten() for w in loaded_model_weights])) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - distribution=strategy_combinations.strategies_minus_tpu, - model_type=['dense', 'embedding'], - )) - def test_saved_model_load_non_pss(self, model_type, distribution): - - def create_embedding_model(): - inputs = keras.layers.Input(shape=(6,)) - embedding = keras.layers.Embedding(output_dim=2, input_dim=6) - outputs = embedding(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='adam', loss='mean_squared_error') - return model - - def create_dense_model(): - inputs = keras.layers.Input(shape=(6,)) - outputs = keras.layers.Dense(6)(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='adam', loss='mean_squared_error') - return model - - x = tf.cast(tf.expand_dims(tf.range(6), 0), tf.float32) - with self.strategy.scope(): - model = ( - create_dense_model() - if model_type == 'dense' else create_embedding_model()) - expect = model(x) - - model_weights = [v.numpy() for v in model.variables] - - saved_dir = self.get_temp_dir() - model.save(saved_dir) - - with distribution.scope(): - loaded_model = keras.models.load_model(saved_dir) - got = loaded_model(x) - - self.assertAllClose(got, expect) - n_expected_variables = 2 if model_type == 'dense' else 1 - self.assertLen(loaded_model.variables, n_expected_variables) - loaded_model_weights = [v.numpy() for v in loaded_model.variables] - self.assertAllClose( - np.concatenate([w.flatten() for w in model_weights]), - np.concatenate([w.flatten() for w in loaded_model_weights])) - - def test_slot_variable_checkpointing(self): - - with self.strategy.scope(): - # Set a name so the ShardedVariable is well-named for slot var keying - var = tf.Variable([1., 2., 3., 4., 5., 6.], name='test') - - opt = keras.optimizers.optimizer_v2.adam.Adam() - - # Run once to trigger apply_gradients to populate optimizer slot variables. - def train_step(): - with tf.GradientTape() as tape: - loss = sum(var) - opt.minimize(loss, var.variables, tape=tape) - - self.strategy.run(train_step) - - # Check that we can call get_slot using each slot, before and after - # Checkpointing, and get the same results - pre_ckpt_slots = [] - for slot in opt.get_slot_names(): - pre_ckpt_slots.extend([v.numpy() for v in opt.get_slot(var, slot)]) - - ckpt = tf.train.Checkpoint(var=var, opt=opt) - - # Assert that checkpoint has slots for each shard and the ShardedVariable - self.assertLen(ckpt.opt._slots, 3) - for var_name in ckpt.opt._slots.keys(): - self.assertLen(ckpt.opt._slots[var_name], 2) - self.assertEqual(ckpt.opt._slots[var_name].keys(), {'m', 'v'}) - if hasattr(ckpt.opt._slots[var_name]['m'], 'variables'): - self.assertLen(ckpt.opt._slots[var_name]['m'].variables, 2) - self.assertLen(ckpt.opt._slots[var_name]['v'].variables, 2) - - saved_dir = self.get_temp_dir() - ckpt_prefix = f'{saved_dir}/ckpt' - ckpt.save(ckpt_prefix) - - # Run once more to alter slot variables and ensure checkpoint restores - # the earlier values. - self.strategy.run(train_step) - - changed_ckpt_slots = [] - for slot in opt.get_slot_names(): - changed_ckpt_slots.extend([v.numpy() for v in opt.get_slot(var, slot)]) - self.assertNotAllClose(pre_ckpt_slots, changed_ckpt_slots) - - ckpt.restore(tf.train.latest_checkpoint(saved_dir)) - - post_ckpt_slots = [] - for slot in opt.get_slot_names(): - post_ckpt_slots.extend([v.numpy() for v in opt.get_slot(var, slot)]) - - self.assertAllClose(pre_ckpt_slots, post_ckpt_slots) - - def test_slot_variable_checkpoint_load_with_diff_shards(self): - - with self.strategy.scope(): - # Set a name so the ShardedVariable is well-named for slot var keying - var = tf.Variable([1., 2., 3., 4., 5., 6.], name='test') - - opt = keras.optimizers.optimizer_v2.adam.Adam() - - # Run once to trigger apply_gradients to populate optimizer slot variables. - def train_step(): - with tf.GradientTape() as tape: - loss = sum(var) - opt.minimize(loss, var.variables, tape=tape) - - self.strategy.run(train_step) - - # Check that we can call get_slot using each slot, before and after - # Checkpointing, and get the same results - pre_ckpt_slots = [] - for slot in opt.get_slot_names(): - pre_ckpt_slots.extend( - tf.concat(list(opt.get_slot(var, slot)), axis=0).numpy()) - - ckpt = tf.train.Checkpoint(var=var, opt=opt) - saved_dir = self.get_temp_dir() - ckpt_prefix = f'{saved_dir}/ckpt' - ckpt.save(ckpt_prefix) - - # Create new strategy with different number of shards - strategy2 = tf.distribute.experimental.ParameterServerStrategy( - multi_worker_testing_utils.make_parameter_server_cluster(3, 2), - variable_partitioner=tf.distribute.experimental.partitioners - .FixedShardsPartitioner(3)) - - # Create new variable with different values, to be overwritten by ckpt. - with strategy2.scope(): - var = tf.Variable([0., 1., 2., 3., 4., 5.], name='test') - - opt = keras.optimizers.optimizer_v2.adam.Adam() - # Run once to trigger apply_gradients to populate optimizer slot variables. - strategy2.run(train_step) - - new_ckpt = tf.train.Checkpoint(var=var, opt=opt) - new_ckpt.restore(tf.train.latest_checkpoint(saved_dir)) - post_ckpt_slots = [] - for slot in new_ckpt.opt.get_slot_names(): - post_ckpt_slots.extend( - tf.concat(list(new_ckpt.opt.get_slot(var, slot)), axis=0).numpy()) - self.assertAllClose(pre_ckpt_slots, post_ckpt_slots) + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.strategy = tf.distribute.experimental.ParameterServerStrategy( + multi_worker_testing_utils.make_parameter_server_cluster(3, 2), + variable_partitioner=tf.distribute.experimental.partitioners.FixedShardsPartitioner( # noqa: E501 + 2 + ), + ) + + def assert_list_all_equal(self, list1, list2): + """Used in lieu of `assertAllEqual`. + + This is used to replace standard `assertAllEqual` for the cases where + `list1` and `list2` contain `AggregatingVariable`. Lists with + `AggregatingVariable` are not convertible to numpy array via `np.array` + calls as numpy would raise `ValueError: setting an array element with a + sequence.` + + Args: + list1: The first list to compare equality. + list2: The second list to compare equality. + """ + for lhs, rhs in zip(list1, list2): + self.assertEqual(lhs, rhs) + + def test_keras_layer_setattr(self): + class Layer(base_layer.Layer): + def __init__(self): + super().__init__() + self.w = tf.Variable([0, 1]) + self.b = tf.Variable([2, 3], trainable=False) + + with self.strategy.scope(): + layer = Layer() + + self.assertLen(layer.trainable_weights, 2) + self.assertEqual(layer.trainable_weights[0], [0]) + self.assertEqual(layer.trainable_weights[1], [1]) + self.assertLen(layer.non_trainable_weights, 2) + self.assertEqual(layer.non_trainable_weights[0], [2]) + self.assertEqual(layer.non_trainable_weights[1], [3]) + self.assert_list_all_equal( + layer.weights, layer.trainable_weights + layer.non_trainable_weights + ) + self.assert_list_all_equal( + layer.trainable_weights, layer.trainable_variables + ) + self.assert_list_all_equal(layer.weights, layer.variables) + + checkpoint_deps = set(layer._trackable_children().values()) + self.assertEqual(checkpoint_deps, set([layer.w, layer.b])) + + def test_keras_layer_add_weight(self): + class Layer(base_layer.Layer): + def __init__(self): + super().__init__() + self.w = self.add_weight( + shape=(2,), + initializer=lambda shape, dtype: tf.constant( + [0.0, 1.0], + ), + trainable=True, + ) + self.b = self.add_weight( + shape=(2,), + initializer=lambda shape, dtype: tf.constant([2.0, 3.0]), + trainable=False, + ) + + with self.strategy.scope(): + layer = Layer() + + self.assertLen(layer.trainable_weights, 2) + self.assertEqual(layer.trainable_weights[0], [0.0]) + self.assertEqual(layer.trainable_weights[1], [1.0]) + self.assertLen(layer.non_trainable_weights, 2) + self.assertEqual(layer.non_trainable_weights[0], [2.0]) + self.assertEqual(layer.non_trainable_weights[1], [3.0]) + self.assert_list_all_equal( + layer.weights, layer.trainable_weights + layer.non_trainable_weights + ) + self.assert_list_all_equal( + layer.trainable_weights, layer.trainable_variables + ) + self.assert_list_all_equal(layer.weights, layer.variables) + + checkpoint_deps = set(layer._trackable_children().values()) + self.assertEqual(checkpoint_deps, set([layer.w, layer.b])) + + def test_keras_metrics(self): + with self.strategy.scope(): + fp = keras.metrics.FalsePositives(thresholds=[0.2, 0.5, 0.7, 0.8]) + auc = keras.metrics.AUC(num_thresholds=10) + + @tf.function + def update(): + fp.update_state([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.3, 0.9]) + auc.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + + @tf.function + def reset(): + fp.reset_state() + auc.reset_state() + + update() + self.assertEqual(auc.result(), 0.75) + self.assertAllEqual(fp.result(), [2.0, 1.0, 1.0, 1.0]) + reset() + self.assertEqual(auc.result(), 0.0) + self.assertAllEqual(fp.result(), [0.0, 0.0, 0.0, 0.0]) + + self.assertTrue(hasattr(auc.true_positives, "variables")) + self.assertTrue(hasattr(fp.accumulator, "variables")) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + shard_config=[ + [2, 2], + [2, 3], + [3, 2], + [2, 1], + [1, 1], + [1, 2], + [1, 3], + ], + model_type=["dense", "embedding"], + ) + ) + def test_saved_model_combined(self, shard_config, model_type): + """Test saving and loading models with various fixed numbers of shards. + + Args: + shard_config: The number of shards to use per variable before and + after loading. For example, [1, 3] means to create and save the + model with 1 shard (i.e., no variable partitioning), and load it + into 3 shards per variable. + model_type: Either 'dense' or 'embedding', which simple model to test. + """ + + def create_embedding_model(): + inputs = keras.layers.Input(shape=(6,)) + embedding = keras.layers.Embedding(output_dim=2, input_dim=6) + outputs = embedding(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer="adam", loss="mean_squared_error") + return model + + def create_dense_model(): + inputs = keras.layers.Input(shape=(6,)) + outputs = keras.layers.Dense(6)(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer="adam", loss="mean_squared_error") + return model + + # Maybe create new strategy with different number of shards + if shard_config[0] > 2: + strategy = tf.distribute.experimental.ParameterServerStrategy( + multi_worker_testing_utils.make_parameter_server_cluster(3, 3), + variable_partitioner=tf.distribute.experimental.partitioners.FixedShardsPartitioner( # noqa: E501 + shard_config[0] + ), + ) + elif shard_config[0] == 2: + strategy = self.strategy + else: + # Just one shard, so use default strategy + strategy = tf.distribute.get_strategy() + + x = tf.cast(tf.expand_dims(tf.range(6), 0), tf.float32) + with strategy.scope(): + model = ( + create_dense_model() + if model_type == "dense" + else create_embedding_model() + ) + expect = model(x) + + # Dense layers have two variables (kernel and bias), embedding layers + # have 1 + n_expected_variables = shard_config[0] * ( + 2 if model_type == "dense" else 1 + ) + self.assertLen(model.variables, n_expected_variables) + model_weights = [v.numpy() for v in model.variables] + + saved_dir = self.get_temp_dir() + model.save(saved_dir) + + if shard_config[1] > 2: + strategy2 = tf.distribute.experimental.ParameterServerStrategy( + multi_worker_testing_utils.make_parameter_server_cluster(3, 3), + variable_partitioner=tf.distribute.experimental.partitioners.FixedShardsPartitioner( # noqa: E501 + shard_config[1] + ), + ) + elif shard_config[1] == 2: + strategy2 = self.strategy + else: + # Just one shard, so use default strategy + strategy2 = tf.distribute.get_strategy() + + with strategy2.scope(): + loaded_model = keras.models.load_model(saved_dir) + got = loaded_model(x) + + self.assertAllClose(got, expect) + n_expected_variables = shard_config[1] * ( + 2 if model_type == "dense" else 1 + ) + self.assertLen(loaded_model.variables, n_expected_variables) + loaded_model_weights = [v.numpy() for v in loaded_model.variables] + self.assertAllClose( + np.concatenate([w.flatten() for w in model_weights]), + np.concatenate([w.flatten() for w in loaded_model_weights]), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=strategy_combinations.strategies_minus_tpu, + model_type=["dense", "embedding"], + ) + ) + def test_saved_model_load_non_pss(self, model_type, distribution): + def create_embedding_model(): + inputs = keras.layers.Input(shape=(6,)) + embedding = keras.layers.Embedding(output_dim=2, input_dim=6) + outputs = embedding(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer="adam", loss="mean_squared_error") + return model + + def create_dense_model(): + inputs = keras.layers.Input(shape=(6,)) + outputs = keras.layers.Dense(6)(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer="adam", loss="mean_squared_error") + return model + + x = tf.cast(tf.expand_dims(tf.range(6), 0), tf.float32) + with self.strategy.scope(): + model = ( + create_dense_model() + if model_type == "dense" + else create_embedding_model() + ) + expect = model(x) + + model_weights = [v.numpy() for v in model.variables] + + saved_dir = self.get_temp_dir() + model.save(saved_dir) + + with distribution.scope(): + loaded_model = keras.models.load_model(saved_dir) + got = loaded_model(x) + + self.assertAllClose(got, expect) + n_expected_variables = 2 if model_type == "dense" else 1 + self.assertLen(loaded_model.variables, n_expected_variables) + loaded_model_weights = [v.numpy() for v in loaded_model.variables] + self.assertAllClose( + np.concatenate([w.flatten() for w in model_weights]), + np.concatenate([w.flatten() for w in loaded_model_weights]), + ) + + def test_slot_variable_checkpointing(self): + + with self.strategy.scope(): + # Set a name so the ShardedVariable is well-named for slot var + # keying + var = tf.Variable([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="test") + + opt = keras.optimizers.legacy.adam.Adam() + + # Run once to trigger apply_gradients to populate optimizer slot + # variables. + def train_step(): + with tf.GradientTape() as tape: + loss = sum(var) + opt.minimize(loss, var.variables, tape=tape) + + self.strategy.run(train_step) + + # Check that we can call get_slot using each slot, before and after + # Checkpointing, and get the same results + pre_ckpt_slots = [] + for slot in opt.get_slot_names(): + pre_ckpt_slots.extend([v.numpy() for v in opt.get_slot(var, slot)]) + + ckpt = tf.train.Checkpoint(var=var, opt=opt) + + # Assert that checkpoint has slots for each shard and the + # ShardedVariable + self.assertLen(ckpt.opt._slots, 3) + for var_name in ckpt.opt._slots.keys(): + self.assertLen(ckpt.opt._slots[var_name], 2) + self.assertEqual(ckpt.opt._slots[var_name].keys(), {"m", "v"}) + if hasattr(ckpt.opt._slots[var_name]["m"], "variables"): + self.assertLen(ckpt.opt._slots[var_name]["m"].variables, 2) + self.assertLen(ckpt.opt._slots[var_name]["v"].variables, 2) + + saved_dir = self.get_temp_dir() + ckpt_prefix = f"{saved_dir}/ckpt" + ckpt.save(ckpt_prefix) + + # Run once more to alter slot variables and ensure checkpoint restores + # the earlier values. + self.strategy.run(train_step) + + changed_ckpt_slots = [] + for slot in opt.get_slot_names(): + changed_ckpt_slots.extend( + [v.numpy() for v in opt.get_slot(var, slot)] + ) + self.assertNotAllClose(pre_ckpt_slots, changed_ckpt_slots) + + ckpt.restore(tf.train.latest_checkpoint(saved_dir)) + + post_ckpt_slots = [] + for slot in opt.get_slot_names(): + post_ckpt_slots.extend([v.numpy() for v in opt.get_slot(var, slot)]) + + self.assertAllClose(pre_ckpt_slots, post_ckpt_slots) + + def test_slot_variable_checkpoint_load_with_diff_shards(self): + + with self.strategy.scope(): + # Set a name so the ShardedVariable is well-named for slot var + # keying + var = tf.Variable([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="test") + + opt = keras.optimizers.legacy.adam.Adam() + + # Run once to trigger apply_gradients to populate optimizer slot + # variables. + def train_step(): + with tf.GradientTape() as tape: + loss = sum(var) + opt.minimize(loss, var.variables, tape=tape) + + self.strategy.run(train_step) + + # Check that we can call get_slot using each slot, before and after + # Checkpointing, and get the same results + pre_ckpt_slots = [] + for slot in opt.get_slot_names(): + pre_ckpt_slots.extend( + tf.concat(list(opt.get_slot(var, slot)), axis=0).numpy() + ) + + ckpt = tf.train.Checkpoint(var=var, opt=opt) + saved_dir = self.get_temp_dir() + ckpt_prefix = f"{saved_dir}/ckpt" + ckpt.save(ckpt_prefix) + + # Create new strategy with different number of shards + strategy2 = tf.distribute.experimental.ParameterServerStrategy( + multi_worker_testing_utils.make_parameter_server_cluster(3, 2), + variable_partitioner=tf.distribute.experimental.partitioners.FixedShardsPartitioner( # noqa: E501 + 3 + ), + ) + + # Create new variable with different values, to be overwritten by ckpt. + with strategy2.scope(): + var = tf.Variable([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], name="test") + + opt = keras.optimizers.legacy.adam.Adam() + # Run once to trigger apply_gradients to populate optimizer slot + # variables. + strategy2.run(train_step) + + new_ckpt = tf.train.Checkpoint(var=var, opt=opt) + new_ckpt.restore(tf.train.latest_checkpoint(saved_dir)) + post_ckpt_slots = [] + for slot in new_ckpt.opt.get_slot_names(): + post_ckpt_slots.extend( + tf.concat( + list(new_ckpt.opt.get_slot(var, slot)), axis=0 + ).numpy() + ) + self.assertAllClose(pre_ckpt_slots, post_ckpt_slots) class ShardedVariableMixedPartitioningTest(tf.test.TestCase): - - def test_saved_model_min_size_partitioner(self): - - # set min_shard_bytes such that Dense kernel is split into 2 and bias into 1 - partitioner = tf.distribute.experimental.partitioners.MinSizePartitioner( - min_shard_bytes=(6 * 6 * 4) // 2, max_shards=2) - - cluster_resolver = multi_worker_testing_utils.make_parameter_server_cluster( - 3, 2) - strategy = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver, variable_partitioner=partitioner) - - def create_dense_model(): - inputs = keras.layers.Input(shape=(6,)) - outputs = keras.layers.Dense(6)(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='adam', loss='mean_squared_error') - return model - - x = tf.cast(tf.expand_dims(tf.range(6), 0), tf.float32) - with strategy.scope(): - model = create_dense_model() - expect = model(x) - - # 2 kernel variables, 1 bias - self.assertLen(model.variables, 3) - - saved_dir = self.get_temp_dir() - model.save(saved_dir) - - # set min_shard_bytes such that Dense kernel is split into 3 and bias into 1 - partitioner2 = tf.distribute.experimental.partitioners.MinSizePartitioner( - min_shard_bytes=(6 * 6 * 4) // 3, max_shards=3) - strategy2 = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver, variable_partitioner=partitioner2) - - with strategy2.scope(): - loaded_model = keras.models.load_model(saved_dir) - got = loaded_model(x) - - self.assertAllClose(got, expect) - # 3 kernel variables, 1 bias - self.assertLen(loaded_model.variables, 4) - - -if __name__ == '__main__': - tf.compat.v1.enable_v2_behavior() - tf.test.main() + def test_saved_model_min_size_partitioner(self): + + # set min_shard_bytes such that Dense kernel is split into 2 and bias + # into 1 + partitioner = ( + tf.distribute.experimental.partitioners.MinSizePartitioner( + min_shard_bytes=(6 * 6 * 4) // 2, max_shards=2 + ) + ) + + cluster_resolver = ( + multi_worker_testing_utils.make_parameter_server_cluster(3, 2) + ) + strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver, variable_partitioner=partitioner + ) + + def create_dense_model(): + inputs = keras.layers.Input(shape=(6,)) + outputs = keras.layers.Dense(6)(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer="adam", loss="mean_squared_error") + return model + + x = tf.cast(tf.expand_dims(tf.range(6), 0), tf.float32) + with strategy.scope(): + model = create_dense_model() + expect = model(x) + + # 2 kernel variables, 1 bias + self.assertLen(model.variables, 3) + + saved_dir = self.get_temp_dir() + model.save(saved_dir) + + # set min_shard_bytes such that Dense kernel is split into 3 and bias + # into 1 + partitioner2 = ( + tf.distribute.experimental.partitioners.MinSizePartitioner( + min_shard_bytes=(6 * 6 * 4) // 3, max_shards=3 + ) + ) + strategy2 = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver, variable_partitioner=partitioner2 + ) + + with strategy2.scope(): + loaded_model = keras.models.load_model(saved_dir) + got = loaded_model(x) + + self.assertAllClose(got, expect) + # 3 kernel variables, 1 bias + self.assertLen(loaded_model.variables, 4) + + +if __name__ == "__main__": + tf.compat.v1.enable_v2_behavior() + tf.test.main() diff --git a/keras/distribute/sidecar_evaluator.py b/keras/distribute/sidecar_evaluator.py deleted file mode 100644 index 0e9cfe56c21f..000000000000 --- a/keras/distribute/sidecar_evaluator.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python module for evaluation loop.""" - -import tensorflow.compat.v2 as tf -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util import deprecation -from tensorflow.python.util.tf_export import keras_export - -_PRINT_EVAL_STEP_EVERY_SEC = 60.0 -_ITERATIONS_UNINITIALIZED = -1 -_CHECKPOINT_TIMEOUT_SEC = 30 - - -def list_checkpoint_attributes(ckpt_dir_or_file): - """Lists all the attributes in a checkpoint. - - Checkpoint keys are paths in a checkpoint graph, and attribute is the first - element in the path. e.g. with a checkpoint key - "optimizer/iter/.ATTRIBUTES/VARIABLE_VALUE", optimizer is the attribute. The - attribute is also used to save/restore a variable in a checkpoint, - e.g. tf.train.Checkpoint(optimizer=optimizer, model=model). - - Args: - ckpt_dir_or_file: Directory with checkpoints file or path to checkpoint. - - Returns: - Set of attributes in a checkpoint. - """ - reader = tf.train.load_checkpoint(ckpt_dir_or_file) - variable_map = reader.get_variable_to_shape_map() - return {name.split('/')[0] for name in variable_map.keys()} - - -@keras_export('keras.utils.SidecarEvaluator', v1=[]) -class SidecarEvaluator: - """A class designed for a dedicated evaluator task. - - `SidecarEvaluator` is expected to be run in a process on a separate machine - from the training cluster. It is meant for the purpose of a dedicated - evaluator, evaluating the metric results of a training cluster which has one - or more workers performing the training, and saving checkpoints. - - The `SidecarEvaluator` API is compatible with both Custom Training Loop (CTL), - and Keras `Model.fit` to be used in the training cluster. Using the model - (with compiled metrics) provided at `__init__`, `SidecarEvaluator` repeatedly - performs evaluation "epochs" when it finds a checkpoint that has not yet been - used. Depending on the `steps` argument, an eval epoch is evaluation over all - eval data, or up to certain number of steps (batches). See examples below for - how the training program should save the checkpoints in order to be recognized - by `SidecarEvaluator`. - - Since under the hood, `SidecarEvaluator` uses `model.evaluate` for evaluation, - it also supports arbitrary Keras callbacks. That is, if one or more callbacks - are provided, their `on_test_batch_begin` and `on_test_batch_end` methods are - called at the start and end of a batch, and their `on_test_begin` and - `on_test_end` are called at the start and end of an evaluation epoch. Note - that `SidecarEvaluator` may skip some checkpoints because it always picks up - the latest checkpoint available, and during an evaluation epoch, multiple - checkpoints can be produced from the training side. - - Example: - ```python - model = tf.keras.models.Sequential(...) - model.compile(metrics=tf.keras.metrics.SparseCategoricalAccuracy( - name="eval_metrics")) - data = tf.data.Dataset.from_tensor_slices(...) - - tf.keras.SidecarEvaluator( - model=model, - data=data, - checkpoint_dir='/tmp/checkpoint_dir', # dir for training-saved checkpoint - steps=None, # Eval until dataset is exhausted - max_evaluations=None, # The evaluation needs to be stopped manually - callbacks=[tf.keras.callbacks.TensorBoard(log_dir='/tmp/log_dir')] - ).start() - ``` - - `SidecarEvaluator.start` writes a series of summary - files which can be visualized by tensorboard (which provides a webpage link): - - ```bash - $ tensorboard --logdir=/tmp/log_dir/validation - ... - TensorBoard 2.4.0a0 at http://host:port (Press CTRL+C to quit) - ``` - - If the training cluster uses a CTL, the `checkpoint_dir` should contain - checkpoints that track both `model` and `optimizer`, to fulfill - `SidecarEvaluator`'s expectation. This can be done by a - `tf.train.Checkpoint` and a `tf.train.CheckpointManager`: - - ```python - checkpoint_dir = ... # Same `checkpoint_dir` supplied to `SidecarEvaluator`. - checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, checkpoint_dir=..., max_to_keep=...) - checkpoint_manager.save() - ``` - - If the training cluster uses Keras `Model.fit` API, a - `tf.keras.callbacks.ModelCheckpoint` should be used, with - `save_weights_only=True`, and the `filepath` should have 'ckpt-{epoch}' - appended: - - ```python - checkpoint_dir = ... # Same `checkpoint_dir` supplied to `SidecarEvaluator`. - model_checkpoint = tf.keras.callbacks.ModelCheckpoint( - filepath=os.path.join(checkpoint_dir, 'ckpt-{epoch}'), - save_weights_only=True) - model.fit(dataset, epochs, callbacks=[model_checkpoint]) - ``` - """ - - def __init__(self, - model, - data, - checkpoint_dir, - steps=None, - max_evaluations=None, - callbacks=None): - """Initializes an `SidecarEvaluator` object. - - Args: - model: Model to use for evaluation. The model object used here should be a - `tf.keras.Model`, and should be the same as the one that is used in - training, where `tf.keras.Model`s are checkpointed. The model should - have one or more metrics compiled before using `SidecarEvaluator`. - data: The input data for evaluation. `SidecarEvaluator` supports all data - types that Keras `model.evaluate` supports as the input data `x`, such - as a `tf.data.Dataset`. - checkpoint_dir: Directory where checkpoint files are saved. - steps: Number of steps to perform evaluation for, when evaluating a single - checkpoint file. If `None`, evaluation continues until the dataset is - exhausted. For repeated evaluation dataset, user must specify `steps` to - avoid infinite evaluation loop. - max_evaluations: Maximum number of the checkpoint file to be evaluated, - for `SidecarEvaluator` to know when to stop. The evaluator will stop - after it evaluates a checkpoint filepath ending with - '-'. If using - `tf.train.CheckpointManager.save` for saving checkpoints, the kth saved - checkpoint has the filepath suffix '-' (k=1 for the first - saved), and if checkpoints are saved every epoch after training, the - filepath saved at the kth epoch would end with '-. Thus, - if training runs for n epochs, and the evaluator should end after the - training finishes, use n for this parameter. Note that this is not - necessarily equal to the number of total evaluations, since some - checkpoints may be skipped if evaluation is slower than checkpoint - creation. If `None`, `SidecarEvaluator` will evaluate indefinitely, and - the user must terminate evaluator program themselves. - callbacks: List of `keras.callbacks.Callback` instances to apply during - evaluation. See [callbacks](/api_docs/python/tf/keras/callbacks). - """ - self.model = model - self.data = data - self.checkpoint_dir = checkpoint_dir - self._iterations = tf.Variable( - name='iterations', - initial_value=_ITERATIONS_UNINITIALIZED, - dtype=tf.int64) - self.max_evaluations = max_evaluations - self.steps = steps - self.callbacks = callbacks or [] - - def _timeout_fn(self): - logging.info( - f'No checkpoints appear to be found after {_CHECKPOINT_TIMEOUT_SEC} ' - 'seconds. Please check if you are properly using a ' - '`tf.train.Checkpoint/CheckpointManager` or ' - '`tf.keras.callbacks.ModelCheckpoint(save_weights_only=True)` to save ' - 'checkpoints by the training. See ' - '`tf.keras.SidecarEvaluator` doc for recommended flows ' - 'of saving checkpoints.') - return False - - def start(self): - """Starts the evaluation loop.""" - optimizer_checkpoint = tf.train.Checkpoint(iter=self._iterations) - checkpoint = tf.train.Checkpoint( - model=self.model, optimizer=optimizer_checkpoint) - - for latest_checkpoint in tf.train.checkpoints_iterator( - self.checkpoint_dir, - timeout=_CHECKPOINT_TIMEOUT_SEC, - timeout_fn=self._timeout_fn): - try: - # `expect_partial` because the checkpoint can have other `Trackable`s - # such as `optimizer`. - checkpoint.restore(latest_checkpoint).expect_partial() - checkpoint_attributes = list_checkpoint_attributes(latest_checkpoint) - # The checkpoint should contain model and optimizer for SidecarEvaluator - # to work. But the model weights saved by ModelCheckpoint callback does - # not contain model as an attribute. To make SidecarEvaluator compatibly - # work in this case, use model.load_weights to load the model's weights, - # while self._iterations is still restored by checkpoint variable. - if 'model' not in checkpoint_attributes: - self.model.load_weights(latest_checkpoint) - # The model checkpoint might not include optimizer in cases, e.g. - # using a custom training loop. Directly assign the iterations - # property to be used in callbacks. - if self.model.optimizer: - self.model.optimizer.iterations.assign(self._iterations) - except (tf.errors.OpError,) as e: - # A couple errors can happen here with the coordinator racing to write - # checkpoint: - # 1) OpError: open failed for : No such file or directory - # 2) NotFoundError (subclass of OpError): Unsuccessful - # TensorSliceReader constructor. - # TODO(rchao): Remove this except block once b/150954027 is resolved. - logging.info( - 'SidecarEvaluator encountered an error when loading the checkpoint ' - f'at {latest_checkpoint}. Retrying. ' - f'Error: {e.__class__.__name__}: {e}') - continue - - if self._iterations.numpy() == _ITERATIONS_UNINITIALIZED: - raise RuntimeError( - 'Variable `iterations` cannot be loaded from the ' - f'checkpoint file at {self.checkpoint_dir}. ' - 'Please ensure `iterations` is ' - 'included in the checkpoint saved during training.') - - logging.info( - 'Evaluation starts: Model weights loaded from latest ' - f'checkpoint file {latest_checkpoint}') - - self.model.evaluate( - self.data, steps=self.steps, callbacks=self.callbacks, verbose=2) - - return_metrics = {} - for metric in self.model.metrics: - result = metric.result() - if isinstance(result, dict): - return_metrics.update(result) - else: - return_metrics[metric.name] = result - - logging.info( - 'End of evaluation. Metrics: %s', ' '.join([ - '{}={}'.format(name, value.numpy()) - for name, value in return_metrics.items() - ])) - - if (self.max_evaluations and - (self.max_evaluations <= int(latest_checkpoint.split('-')[-1]))): - # Exit the loop because we have evaluated the final checkpoint file. - logging.info('Last checkpoint evaluated. SidecarEvaluator stops.') - return - - -@keras_export('keras.experimental.SidecarEvaluator', v1=[]) -@deprecation.deprecated_endpoints('keras.experimental.SidecarEvaluator') -class SidecarEvaluatorExperimental(SidecarEvaluator): - """Deprecated. Please use `tf.keras.utils.SidecarEvaluator` instead. - - Caution: `tf.keras.experimental.SidecarEvaluator` endpoint is - deprecated and will be removed in a future release. Please use - `tf.keras.utils.SidecarEvaluator`. - """ - - def __init__(self, *args, **kwargs): - logging.warning( - '`tf.keras.experimental.SidecarEvaluator` endpoint is ' - 'deprecated and will be removed in a future release. Please use ' - '`tf.keras.utils.SidecarEvaluator`.') - super().__init__(*args, **kwargs) diff --git a/keras/distribute/sidecar_evaluator_test.py b/keras/distribute/sidecar_evaluator_test.py deleted file mode 100644 index 0d5b54dbd419..000000000000 --- a/keras/distribute/sidecar_evaluator_test.py +++ /dev/null @@ -1,305 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test covering sidecar_evaluator.py.""" - -import enum -import os -import threading -import time - -from absl.testing import parameterized -import keras -from keras.distribute import sidecar_evaluator as sidecar_evaluator_lib -from keras.optimizers.optimizer_v2 import gradient_descent -from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.platform import tf_logging as logging - -_BATCH_SIZE = 32 - - -class TestModel(keras.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense = keras.layers.Dense(10) - - def call(self, inputs): - return self.dense(inputs) - - -class DictMetric(keras.metrics.MeanSquaredError): - - def result(self): - res = super().result() - return {'mean_squared_error_1': res, 'mean_squared_error_2': res} - - -class ModelType(enum.Enum): - SEQUENTIAL = 'sequential' - SUBCLASS = 'subclass' - - -def _test_model_builder(model_type: ModelType, compile_model, build_model): - if model_type == ModelType.SEQUENTIAL: - model = keras.Sequential([keras.layers.Dense(10)]) - elif model_type == ModelType.SUBCLASS: - model = TestModel() - - if compile_model: - model.compile( - gradient_descent.SGD(), - loss='mse', - metrics=[keras.metrics.CategoricalAccuracy(), - DictMetric()]) - if build_model: - model.build((None, 32)) - - return model - - -@test_utils.run_v2_only -class SidecarEvaluatorTest(tf.test.TestCase, parameterized.TestCase): - - def assertSummaryEventsWritten(self, log_dir): - # Asserts summary files do get written when log_dir is provided. - summary_files = tf.io.gfile.listdir(log_dir) - self.assertNotEmpty( - summary_files, 'Summary should have been written and ' - 'log_dir should not be empty.') - - # Asserts the content of the summary file. - event_pb_written = False - event_tags = [] - for summary_file in summary_files: - for event_pb in tf.compat.v1.train.summary_iterator( - os.path.join(log_dir, summary_file)): - if event_pb.step > 0: - self.assertEqual(event_pb.step, 32) - event_tags.append(event_pb.summary.value[0].tag) - event_pb_written = True - self.assertCountEqual(event_tags, [ - 'evaluation_categorical_accuracy_vs_iterations', - 'evaluation_loss_vs_iterations', - 'evaluation_mean_squared_error_1_vs_iterations', - 'evaluation_mean_squared_error_2_vs_iterations', - ]) - - # Verifying at least one non-zeroth step is written to summary. - self.assertTrue(event_pb_written) - - def assertModelsSameVariables(self, model_a, model_b): - # Check both have the same number of variables. - self.assertEqual(len(model_a.variables), len(model_b.variables)) - - # Check variable values to be equal. - for var_a, var_b in zip(model_a.variables, model_b.variables): - self.assertAllEqual(var_a.numpy(), var_b.numpy()) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], model_type=[ModelType.SEQUENTIAL, - ModelType.SUBCLASS])) - def testIterationsNotSavedWillRaiseError(self, model_type): - model = _test_model_builder( - model_type=model_type, compile_model=False, build_model=True) - - checkpoint_dir = self.get_temp_dir() - checkpoint = tf.train.Checkpoint(model=model) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, checkpoint_dir, max_to_keep=2) - checkpoint_manager.save() - - sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( - model, data=None, checkpoint_dir=checkpoint_dir) - with self.assertRaisesRegex( - RuntimeError, '`iterations` cannot be loaded ' - 'from the checkpoint file.'): - sidecar_evaluator.start() - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], model_type=[ModelType.SEQUENTIAL, - ModelType.SUBCLASS])) - def testModelNotBuiltRaiseError(self, model_type): - model = _test_model_builder( - model_type=model_type, compile_model=False, build_model=False) - - checkpoint_dir = self.get_temp_dir() - checkpoint = tf.train.Checkpoint(model=model) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, checkpoint_dir, max_to_keep=2) - checkpoint_manager.save() - - sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( - model, data=None, checkpoint_dir=checkpoint_dir) - with self.assertRaisesRegex(AssertionError, 'Nothing to load.'): - sidecar_evaluator.start() - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], - build_model=[True, False])) - def testSidecarEvaluatorOutputsSummary(self, model_type, build_model): - # Create a model with synthetic data, and fit for one epoch. - model = _test_model_builder( - model_type=model_type, compile_model=True, build_model=False) - data = np.random.random((1000, 32)) - labels = np.random.random((1000, 10)) - dataset = tf.data.Dataset.from_tensor_slices((data, labels)) - dataset = dataset.batch(32) - model.fit(dataset, epochs=1) - - # Save a checkpoint. - checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') - log_dir = os.path.join(self.get_temp_dir(), 'summary') - logging.info('checkpoint_dir = %s, log_dir = %s', checkpoint_dir, log_dir) - checkpoint = tf.train.Checkpoint( - model=model, optimizer=model.optimizer) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, checkpoint_dir, max_to_keep=2) - logging.info('Checkpoint manager saved to: %s', checkpoint_manager.save()) - self.assertNotEmpty( - tf.io.gfile.listdir(checkpoint_dir), - 'Checkpoint should have been written and ' - 'checkpoint_dir should not be empty.') - - # Create a new model used for evaluation. - eval_model = _test_model_builder( - model_type=model_type, compile_model=True, build_model=build_model) - # Have a sidecar_evaluator evaluate once. - sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( - eval_model, - data=dataset, - checkpoint_dir=checkpoint_dir, - max_evaluations=1, - callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)]) - sidecar_evaluator.start() - # Eval model has been restored to the same state as the original model, so - # their weights should match. If not, restoration of the model didn't - # work. - self.assertModelsSameVariables(model, eval_model) - - self.assertSummaryEventsWritten(os.path.join(log_dir, 'validation')) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], - build_model=[True, False])) - def testSidecarEvaluatorOutputsSummarySavedWithCallback( - self, model_type, build_model): - checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') - log_dir = os.path.join(self.get_temp_dir(), 'summary') - # Create a model with synthetic data, and fit for one epoch. - model = _test_model_builder( - model_type=model_type, compile_model=True, build_model=False) - data = np.random.random((1000, 32)) - labels = np.random.random((1000, 10)) - dataset = tf.data.Dataset.from_tensor_slices((data, labels)) - dataset = dataset.batch(_BATCH_SIZE) - save_callback = keras.callbacks.ModelCheckpoint( - filepath=os.path.join(checkpoint_dir, 'ckpt-{epoch}'), - save_weights_only=True) - model.fit(dataset, epochs=1, callbacks=[save_callback]) - self.assertNotEmpty( - tf.io.gfile.listdir(checkpoint_dir), - 'Checkpoint should have been written and ' - 'checkpoint_dir should not be empty.') - - # Create a new model used for evaluation. - eval_model = _test_model_builder( - model_type=model_type, compile_model=True, build_model=build_model) - # Have an sidecar_evaluator evaluate once. - sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( - eval_model, - data=dataset, - checkpoint_dir=checkpoint_dir, - max_evaluations=1, - callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)]) - with self.assertLogs() as cm: - sidecar_evaluator.start() - - metrics_logging = [ - line for line in cm.output if 'End of evaluation' in line - ] - self.assertLen(metrics_logging, 1) - expected_logged_metrics = [ - 'loss', 'categorical_accuracy', 'mean_squared_error_1', - 'mean_squared_error_2' - ] - for metric_name in expected_logged_metrics: - self.assertRegex(metrics_logging[0], f'{metric_name}=') - - # Eval model has been restored to the same state as the original model, so - # their weights should match. If not, restoration of the model didn't - # work. - self.assertModelsSameVariables(model, eval_model) - - # check the iterations is restored. - self.assertEqual(sidecar_evaluator._iterations.numpy(), _BATCH_SIZE) - - self.assertSummaryEventsWritten(os.path.join(log_dir, 'validation')) - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], - build_model=[True, False])) - def testTimeoutFunction(self, model_type, build_model): - checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') - # Create a model with synthetic data, and fit for one epoch. - data = np.random.random((1000, 32)) - labels = np.random.random((1000, 10)) - dataset = tf.data.Dataset.from_tensor_slices((data, labels)) - dataset = dataset.batch(_BATCH_SIZE) - - # Create a new model used for evaluation. - eval_model = _test_model_builder( - model_type=model_type, compile_model=True, build_model=build_model) - # Have an sidecar_evaluator evaluate once. - sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( - eval_model, - data=dataset, - checkpoint_dir=checkpoint_dir, - max_evaluations=1) - with self.assertLogs() as cm: - threading.Thread(target=sidecar_evaluator.start, daemon=True).start() - time.sleep(50) - - metrics_logging = [ - l for l in cm.output if 'No checkpoints appear to be found' in l - ] - self.assertGreaterEqual(len(metrics_logging), 1) - - def testExperimentalDeprecatedMessage(self): - - warning_messages = [] - - def warning(msg): - warning_messages.append(msg) - - with tf.compat.v1.test.mock.patch.object(logging, 'warning', warning): - sidecar_evaluator_lib.SidecarEvaluatorExperimental(None, None, None) - - warning_msg = ('`tf.keras.experimental.SidecarEvaluator` ' - 'endpoint is deprecated') - self.assertIn(warning_msg, '\n'.join(warning_messages)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/distribute/simple_models.py b/keras/distribute/simple_models.py index e9f751fc87d7..0b5384e12f85 100644 --- a/keras/distribute/simple_models.py +++ b/keras/distribute/simple_models.py @@ -14,126 +14,115 @@ # ============================================================================== """A simple functional keras model with one layer.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.distribute import model_collection_base -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent _BATCH_SIZE = 10 def _get_data_for_simple_models(): - x_train = tf.constant(np.random.rand(1000, 3), dtype=tf.float32) - y_train = tf.constant(np.random.rand(1000, 5), dtype=tf.float32) - x_predict = tf.constant( - np.random.rand(1000, 3), dtype=tf.float32) + x_train = tf.constant(np.random.rand(1000, 3), dtype=tf.float32) + y_train = tf.constant(np.random.rand(1000, 5), dtype=tf.float32) + x_predict = tf.constant(np.random.rand(1000, 3), dtype=tf.float32) - return x_train, y_train, x_predict + return x_train, y_train, x_predict class SimpleFunctionalModel(model_collection_base.ModelAndInput): - """A simple functional model and its inputs.""" + """A simple functional model and its inputs.""" - def get_model(self, **kwargs): - output_name = 'output_1' + def get_model(self, **kwargs): + output_name = "output_1" - x = keras.layers.Input(shape=(3,), dtype=tf.float32) - y = keras.layers.Dense(5, dtype=tf.float32, name=output_name)(x) + x = keras.layers.Input(shape=(3,), dtype=tf.float32) + y = keras.layers.Dense(5, dtype=tf.float32, name=output_name)(x) - model = keras.Model(inputs=x, outputs=y) - optimizer = gradient_descent.SGD(learning_rate=0.001) - model.compile( - loss='mse', - metrics=['mae'], - optimizer=optimizer) + model = keras.Model(inputs=x, outputs=y) + optimizer = gradient_descent.SGD(learning_rate=0.001) + model.compile(loss="mse", metrics=["mae"], optimizer=optimizer) - return model + return model - def get_data(self): - return _get_data_for_simple_models() + def get_data(self): + return _get_data_for_simple_models() - def get_batch_size(self): - return _BATCH_SIZE + def get_batch_size(self): + return _BATCH_SIZE class SimpleSequentialModel(model_collection_base.ModelAndInput): - """A simple sequential model and its inputs.""" + """A simple sequential model and its inputs.""" - def get_model(self, **kwargs): - output_name = 'output_1' + def get_model(self, **kwargs): + output_name = "output_1" - model = keras.Sequential() - y = keras.layers.Dense( - 5, dtype=tf.float32, name=output_name, input_dim=3) - model.add(y) - optimizer = gradient_descent.SGD(learning_rate=0.001) - model.compile( - loss='mse', - metrics=['mae'], - optimizer=optimizer) + model = keras.Sequential() + y = keras.layers.Dense( + 5, dtype=tf.float32, name=output_name, input_dim=3 + ) + model.add(y) + optimizer = gradient_descent.SGD(learning_rate=0.001) + model.compile(loss="mse", metrics=["mae"], optimizer=optimizer) - return model + return model - def get_data(self): - return _get_data_for_simple_models() + def get_data(self): + return _get_data_for_simple_models() - def get_batch_size(self): - return _BATCH_SIZE + def get_batch_size(self): + return _BATCH_SIZE class _SimpleModel(keras.Model): + def __init__(self): + super().__init__() + self._dense_layer = keras.layers.Dense(5, dtype=tf.float32) - def __init__(self): - super().__init__() - self._dense_layer = keras.layers.Dense(5, dtype=tf.float32) - - def call(self, inputs): - return self._dense_layer(inputs) + def call(self, inputs): + return self._dense_layer(inputs) class SimpleSubclassModel(model_collection_base.ModelAndInput): - """A simple subclass model and its data.""" + """A simple subclass model and its data.""" - def get_model(self, **kwargs): - model = _SimpleModel() - optimizer = gradient_descent.SGD(learning_rate=0.001) - model.compile( - loss='mse', - metrics=['mae'], - cloning=False, - optimizer=optimizer) + def get_model(self, **kwargs): + model = _SimpleModel() + optimizer = gradient_descent.SGD(learning_rate=0.001) + model.compile( + loss="mse", metrics=["mae"], cloning=False, optimizer=optimizer + ) - return model + return model - def get_data(self): - return _get_data_for_simple_models() + def get_data(self): + return _get_data_for_simple_models() - def get_batch_size(self): - return _BATCH_SIZE + def get_batch_size(self): + return _BATCH_SIZE class _SimpleModule(tf.Module): + def __init__(self): + self.v = tf.Variable(3.0) - def __init__(self): - self.v = tf.Variable(3.0) - - @tf.function - def __call__(self, x): - return self.v * x + @tf.function + def __call__(self, x): + return self.v * x class SimpleTFModuleModel(model_collection_base.ModelAndInput): - """A simple model based on tf.Module and its data.""" + """A simple model based on tf.Module and its data.""" - def get_model(self, **kwargs): - model = _SimpleModule() - return model + def get_model(self, **kwargs): + model = _SimpleModule() + return model - def get_data(self): - return _get_data_for_simple_models() + def get_data(self): + return _get_data_for_simple_models() - def get_batch_size(self): - return _BATCH_SIZE + def get_batch_size(self): + return _BATCH_SIZE diff --git a/keras/distribute/strategy_combinations.py b/keras/distribute/strategy_combinations.py index 5b38b9a24aa0..8261e2386ce7 100644 --- a/keras/distribute/strategy_combinations.py +++ b/keras/distribute/strategy_combinations.py @@ -16,7 +16,6 @@ import tensorflow.compat.v2 as tf - multidevice_strategies = [ tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, @@ -26,7 +25,7 @@ multiworker_strategies = [ tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu, ] strategies_minus_default_minus_tpu = [ @@ -34,7 +33,7 @@ tf.__internal__.distribute.combinations.one_device_strategy_gpu, tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu + tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu, # noqa: E501 ] strategies_minus_tpu = [ @@ -43,13 +42,13 @@ tf.__internal__.distribute.combinations.one_device_strategy_gpu, tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, - tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu + tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu, # noqa: E501 ] multi_worker_mirrored_strategies = [ tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_cpu, tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu, - tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu + tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu, ] tpu_strategies = [ @@ -57,13 +56,13 @@ ] parameter_server_strategies_single_worker = [ - tf.__internal__.distribute.combinations.parameter_server_strategy_1worker_2ps_cpu, - tf.__internal__.distribute.combinations.parameter_server_strategy_1worker_2ps_1gpu, + tf.__internal__.distribute.combinations.parameter_server_strategy_1worker_2ps_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.parameter_server_strategy_1worker_2ps_1gpu, # noqa: E501 ] parameter_server_strategies_multi_worker = [ - tf.__internal__.distribute.combinations.parameter_server_strategy_3worker_2ps_cpu, - tf.__internal__.distribute.combinations.parameter_server_strategy_3worker_2ps_1gpu, + tf.__internal__.distribute.combinations.parameter_server_strategy_3worker_2ps_cpu, # noqa: E501 + tf.__internal__.distribute.combinations.parameter_server_strategy_3worker_2ps_1gpu, # noqa: E501 ] all_strategies = strategies_minus_tpu + tpu_strategies diff --git a/keras/distribute/test_example.py b/keras/distribute/test_example.py index 5d6e5981d2ef..aa216592b781 100644 --- a/keras/distribute/test_example.py +++ b/keras/distribute/test_example.py @@ -14,78 +14,95 @@ # ============================================================================== """A simple network to use in tests and examples.""" +import tensorflow.compat.v2 as tf + from keras.legacy_tf_layers import core from keras.legacy_tf_layers import normalization -from keras.optimizers.optimizer_v2 import optimizer_v2 - -import tensorflow.compat.v2 as tf +from keras.optimizers.legacy import optimizer_v2 def minimize_loss_example(optimizer, use_bias=False, use_callable_loss=True): - """Example of non-distribution-aware legacy code.""" - - def dataset_fn(): - dataset = tf.data.Dataset.from_tensors([[1.]]).repeat() - # TODO(isaprykin): batch with drop_remainder causes shapes to be - # fully defined for TPU. Remove this when XLA supports dynamic shapes. - return dataset.batch(1, drop_remainder=True) - - layer = core.Dense(1, use_bias=use_bias) - - def model_fn(x): - """A very simple model written by the user.""" - - def loss_fn(): - y = tf.reshape(layer(x), []) - tf.constant(1.) - return y * y - - if isinstance(optimizer, optimizer_v2.OptimizerV2): - return optimizer.minimize(loss_fn, lambda: layer.trainable_variables) - elif use_callable_loss: - return optimizer.minimize(loss_fn) - else: - return optimizer.minimize(loss_fn()) - - return model_fn, dataset_fn, layer - - -def batchnorm_example(optimizer_fn, - batch_per_epoch=1, - momentum=0.9, - renorm=False, - update_ops_in_replica_mode=False): - """Example of non-distribution-aware legacy code with batch normalization.""" - - def dataset_fn(): - # input shape is [16, 8], input values are increasing in both dimensions. - return tf.data.Dataset.from_tensor_slices( - [[[float(x * 8 + y + z * 100) - for y in range(8)] - for x in range(16)] - for z in range(batch_per_epoch)]).repeat() - - optimizer = optimizer_fn() - batchnorm = normalization.BatchNormalization( - renorm=renorm, momentum=momentum, fused=False) - layer = core.Dense(1, use_bias=False) - - def model_fn(x): - """A model that uses batchnorm.""" - - def loss_fn(): - y = batchnorm(x, training=True) - with tf.control_dependencies( - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) - if update_ops_in_replica_mode else []): - loss = tf.reduce_mean( - tf.reduce_sum(layer(y)) - tf.constant(1.)) - # `x` and `y` will be fetched by the gradient computation, but not `loss`. - return loss - - if isinstance(optimizer, optimizer_v2.OptimizerV2): - return optimizer.minimize(loss_fn, lambda: layer.trainable_variables) - - # Callable loss. - return optimizer.minimize(loss_fn) - - return model_fn, dataset_fn, batchnorm + """Example of non-distribution-aware legacy code.""" + + def dataset_fn(): + dataset = tf.data.Dataset.from_tensors([[1.0]]).repeat() + # TODO(isaprykin): batch with drop_remainder causes shapes to be + # fully defined for TPU. Remove this when XLA supports dynamic shapes. + return dataset.batch(1, drop_remainder=True) + + layer = core.Dense(1, use_bias=use_bias) + + def model_fn(x): + """A very simple model written by the user.""" + + def loss_fn(): + y = tf.reshape(layer(x), []) - tf.constant(1.0) + return y * y + + if isinstance(optimizer, optimizer_v2.OptimizerV2): + return optimizer.minimize( + loss_fn, lambda: layer.trainable_variables + ) + elif use_callable_loss: + return optimizer.minimize(loss_fn) + else: + return optimizer.minimize(loss_fn()) + + return model_fn, dataset_fn, layer + + +def batchnorm_example( + optimizer_fn, + batch_per_epoch=1, + momentum=0.9, + renorm=False, + update_ops_in_replica_mode=False, +): + """Example of non-distribution-aware legacy code with batch + normalization.""" + + def dataset_fn(): + # input shape is [16, 8], input values are increasing in both + # dimensions. + return tf.data.Dataset.from_tensor_slices( + [ + [ + [float(x * 8 + y + z * 100) for y in range(8)] + for x in range(16) + ] + for z in range(batch_per_epoch) + ] + ).repeat() + + optimizer = optimizer_fn() + batchnorm = normalization.BatchNormalization( + renorm=renorm, momentum=momentum, fused=False + ) + layer = core.Dense(1, use_bias=False) + + def model_fn(x): + """A model that uses batchnorm.""" + + def loss_fn(): + y = batchnorm(x, training=True) + with tf.control_dependencies( + tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) + if update_ops_in_replica_mode + else [] + ): + loss = tf.reduce_mean( + tf.reduce_sum(layer(y)) - tf.constant(1.0) + ) + # `x` and `y` will be fetched by the gradient computation, but not + # `loss`. + return loss + + if isinstance(optimizer, optimizer_v2.OptimizerV2): + return optimizer.minimize( + loss_fn, lambda: layer.trainable_variables + ) + + # Callable loss. + return optimizer.minimize(loss_fn) + + return model_fn, dataset_fn, batchnorm diff --git a/keras/distribute/tpu_strategy_test_utils.py b/keras/distribute/tpu_strategy_test_utils.py index 8a167fbb40bb..f94c3d3cf2ea 100644 --- a/keras/distribute/tpu_strategy_test_utils.py +++ b/keras/distribute/tpu_strategy_test_utils.py @@ -15,7 +15,6 @@ """Utility functions for tests using TPUStrategy.""" import tensorflow.compat.v2 as tf - from absl import flags FLAGS = flags.FLAGS @@ -25,16 +24,16 @@ def get_tpu_cluster_resolver(): - resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=FLAGS.tpu, - zone=FLAGS.zone, - project=FLAGS.project, - ) - return resolver + resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu=FLAGS.tpu, + zone=FLAGS.zone, + project=FLAGS.project, + ) + return resolver def get_tpu_strategy(): - resolver = get_tpu_cluster_resolver() - tf.config.experimental_connect_to_cluster(resolver) - tf.tpu.experimental.initialize_tpu_system(resolver) - return tf.distribute.experimental.TPUStrategy(resolver) + resolver = get_tpu_cluster_resolver() + tf.config.experimental_connect_to_cluster(resolver) + tf.tpu.experimental.initialize_tpu_system(resolver) + return tf.distribute.experimental.TPUStrategy(resolver) diff --git a/keras/distribute/worker_training_state.py b/keras/distribute/worker_training_state.py index ff550dae11a1..335feedc8174 100644 --- a/keras/distribute/worker_training_state.py +++ b/keras/distribute/worker_training_state.py @@ -14,126 +14,212 @@ # ============================================================================== """Training state management.""" +import os + import tensorflow.compat.v2 as tf -import os from keras import backend from keras.distribute import distributed_file_utils from keras.utils import mode_keys -# Constant for `tf.keras.Model` attribute to store the epoch at which the most -# recently saved checkpoint was saved. -CKPT_SAVED_EPOCH = '_ckpt_saved_epoch' +# isort: off +from keras.distribute.distributed_file_utils import ( + support_on_demand_checkpoint_callback, +) # noqa: E501 -CKPT_SAVED_EPOCH_UNUSED_VALUE = -1 +MAX_CHECKPOINT_TO_KEEP = 1 -class WorkerTrainingState: - """Training state management class. - - This class provides apis for backing up and restoring the training state. - This allows model and epoch information to be saved periodically and restore - for fault-tolerance, also known as preemption-recovery purpose. - """ - - def __init__(self, model, checkpoint_dir): - self._model = model - - # The epoch at which the checkpoint is saved. Used for fault-tolerance. - # GPU device only has int64 dtype registered VarHandleOp. - self._ckpt_saved_epoch = tf.Variable( - initial_value=tf.constant( - CKPT_SAVED_EPOCH_UNUSED_VALUE, dtype=tf.int64), - name='ckpt_saved_epoch') - - # Variable initialization. - backend.set_value(self._ckpt_saved_epoch, CKPT_SAVED_EPOCH_UNUSED_VALUE) - - # _ckpt_saved_epoch gets tracked and is included in the checkpoint file - # when backing up. - checkpoint = tf.train.Checkpoint( - model=self._model, ckpt_saved_epoch=self._ckpt_saved_epoch, - train_counter=self._model._train_counter) - - # If this is single-worker training, checkpoint_dir are the same for - # write_checkpoint_manager and read_checkpoint_manager. - # - # If this is multi-worker training, and this worker should not - # save checkpoint, we replace the write_checkpoint_manager's checkpoint_dir - # with a temp filepath, so it writes to a file that will be removed at the - # end of back_up() call. This is necessary because the SyncOnReadVariable - # needs to be synced across all the workers in order to be read, and all - # workers need to perform `save()`. - # But all workers should restore from the same checkpoint_dir as passed in - # read_checkpoint_manager. - self.read_checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - directory=os.path.join(checkpoint_dir, 'chief'), - max_to_keep=1) - write_checkpoint_dir = distributed_file_utils.write_dirpath( - checkpoint_dir, self._model.distribute_strategy) - if self._model.distribute_strategy.extended.should_checkpoint: - self.write_checkpoint_manager = self.read_checkpoint_manager - else: - self.write_checkpoint_manager = tf.train.CheckpointManager( - checkpoint, directory=write_checkpoint_dir, max_to_keep=1) - - def back_up(self, epoch): - """Back up the current state of training into a checkpoint file. - - Args: - epoch: The current epoch information to be saved. - """ - backend.set_value(self._ckpt_saved_epoch, epoch) - # Save the model plus CKPT_SAVED_EPOCH variable. - if self.write_checkpoint_manager.save(): - distributed_file_utils.remove_temp_dirpath( - self.write_checkpoint_manager.directory, - self._model.distribute_strategy) - - def restore(self): - """Restore the training state from the backed up checkpoint file. - - Returns: - True if the training state is successfully restored. False if the training - state doesn't need to be restored, or error occurred so it can't. - """ - self.read_checkpoint_manager.restore_or_initialize() - def delete_backup(self): - """Delete the backup directories. +class WorkerTrainingState: + """Training state management class. - Delete the backup directories which should not exist after `fit()` - successfully finishes. - """ - if self.write_checkpoint_manager is self.read_checkpoint_manager: - try: - tf.io.gfile.rmtree(self.write_checkpoint_manager.directory) - except tf.errors.NotFoundError: - pass - - def maybe_load_initial_epoch_from_ckpt(self, initial_epoch, mode): - """Maybe load initial epoch from ckpt considering possible worker recovery. - - When `_ckpt_saved_epoch` attribute exists and is not - `CKPT_SAVED_EPOCH_UNUSED_VALUE`, this is under multi-worker training setting - and indicates the worker is recovering from previous failure. In this case, - infer `initial_epoch` from `self._ckpt_saved_epoch` to continue previous - unfinished training from certain epoch. - - Args: - initial_epoch: The original initial_epoch user passes in in `fit()`. - mode: The mode for running `model.fit()`. - - Returns: - If the training is recovering from previous failure under multi-worker - training setting, return the epoch the training is supposed to continue - at. Otherwise, return the `initial_epoch` the user passes in. + This class provides apis for backing up and restoring the training state. + This allows model and epoch and batch information to be saved periodically + and restore for fault-tolerance, also known as preemption-recovery purpose. """ - epoch = backend.eval(self._ckpt_saved_epoch) - if mode == mode_keys.ModeKeys.TRAIN and epoch >= 0: - # The most recently saved epoch is one epoch prior to the epoch it - # failed at, so return the value of 'self._ckpt_saved_epoch' plus one. - return epoch + 1 - return initial_epoch + # Constant for `tf.keras.Model` attribute to store the epoch and batch + # at which the most recently saved checkpoint was saved. + CKPT_SAVED_EPOCH_UNUSED_VALUE = -1 + + CKPT_SAVED_BATCH_UNUSED_VALUE = -1 + + def __init__( + self, + model, + checkpoint_dir, + save_freq="epoch", + save_before_preemption_arg=None, + ): + self._enable_save_before_preemption = save_before_preemption_arg and ( + support_on_demand_checkpoint_callback(model.distribute_strategy) + ) + self._model = model + + self._save_freq = save_freq + # The batch and epoch at which the checkpoint is saved. Used for + # fault-tolerance. GPU device only has int64 dtype registered + # VarHandleOp. + self._ckpt_saved_epoch = tf.Variable( + initial_value=tf.constant( + self.CKPT_SAVED_EPOCH_UNUSED_VALUE, dtype=tf.int64 + ), + name="ckpt_saved_epoch", + ) + self._ckpt_saved_batch = tf.Variable( + initial_value=tf.constant( + self.CKPT_SAVED_BATCH_UNUSED_VALUE, dtype=tf.int64 + ), + name="ckpt_saved_batch", + ) + # Variable initialization. + backend.set_value( + self._ckpt_saved_epoch, self.CKPT_SAVED_EPOCH_UNUSED_VALUE + ) + backend.set_value( + self._ckpt_saved_batch, self.CKPT_SAVED_BATCH_UNUSED_VALUE + ) + # _ckpt_saved_epoch and _ckpt_saved_batch gets tracked and is included + # in the checkpoint file when backing up. + checkpoint = tf.train.Checkpoint( + model=self._model, + ckpt_saved_epoch=self._ckpt_saved_epoch, + ckpt_saved_batch=self._ckpt_saved_batch, + train_counter=self._model._train_counter, + ) + + # If this is single-worker training, checkpoint_dir are the same for + # write_checkpoint_manager and read_checkpoint_manager. + # + # If this is multi-worker training, and this worker should not save + # checkpoint, we replace the write_checkpoint_manager's checkpoint_dir + # with a temp filepath, so it writes to a file that will be removed at + # the end of back_up() call. This is necessary because the + # SyncOnReadVariable needs to be synced across all the workers in order + # to be read, and all workers need to perform `save()`. But all workers + # should restore from the same checkpoint_dir as passed in + # read_checkpoint_manager. + self.read_checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + directory=os.path.join(checkpoint_dir, "chief"), + max_to_keep=MAX_CHECKPOINT_TO_KEEP, + ) + write_checkpoint_dir = distributed_file_utils.write_dirpath( + checkpoint_dir, self._model.distribute_strategy + ) + if self._model.distribute_strategy.extended.should_checkpoint: + self.write_checkpoint_manager = self.read_checkpoint_manager + else: + self.write_checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + directory=write_checkpoint_dir, + max_to_keep=MAX_CHECKPOINT_TO_KEEP, + ) + + if self._enable_save_before_preemption: + self.preemption_handler = ( + tf.distribute.experimental.PreemptionCheckpointHandler( + self._model.distribute_strategy.cluster_resolver, + self.write_checkpoint_manager, + ) + ) + self.preemption_handler._read_checkpoint_manager = ( + self.read_checkpoint_manager + ) + self._model._preemption_handler = self.preemption_handler + + def back_up(self, epoch, batch=0): + """Back up the current state of training into a checkpoint file. + + Args: + epoch: The current epoch information to be saved. + batch: The current batch(step) information to be saved. + """ + # Save the model plus CKPT_SAVED_EPOCH and CKPT_SAVED_BATCH variable. + if self.write_checkpoint_manager.save(): + distributed_file_utils.remove_temp_dirpath( + self.write_checkpoint_manager.directory, + self._model.distribute_strategy, + ) + + def backup_if_preempted(self): + if self._enable_save_before_preemption: + self.preemption_handler._run_counter += 1 + self.preemption_handler._check_preemption_and_maybe_checkpoint() + + def restore(self): + """Restore the training state from the backed up checkpoint file. + + Returns: + True if the training state is successfully restored. False if the + training state doesn't need to be restored, or error occurred so it + can't. + """ + # When creating the PreemptionCheckpointHandler object, we have already + # restored the checkpoint. + if not self._enable_save_before_preemption: + self.read_checkpoint_manager.restore_or_initialize() + + def delete_backup(self): + """Delete the backup directories. + + Delete the backup directories which should not exist after `fit()` + successfully finishes. + """ + if self.write_checkpoint_manager is self.read_checkpoint_manager: + try: + tf.io.gfile.rmtree(self.write_checkpoint_manager.directory) + except tf.errors.NotFoundError: + pass + + def maybe_load_initial_counters_from_ckpt( + self, steps_per_epoch, initial_epoch, mode + ): + """Maybe load 1st epoch from checkpoint, considering worker recovery. + + When `_ckpt_saved_epoch` attribute exists and is not + `CKPT_SAVED_EPOCH_UNUSED_VALUE`, this is under multi-worker training + setting and indicates the worker is recovering from previous failure. In + this case, infer `initial_epoch` from `self._ckpt_saved_epoch` to + continue previous unfinished training from certain epoch. + + Args: + steps_per_epoch: The number of steps per epoch value. + initial_epoch: The original initial_epoch user passes in in `fit()`. + mode: The mode for running `model.fit()`. + + Returns: + If the training is recovering from previous failure under multi-worker + training setting, return the (epoch, step) the training is supposed to + continue at. Otherwise, return the `initial_epoch, initial_step` the + user passes in. + """ + + initial_step = 0 + epoch = backend.eval(self._ckpt_saved_epoch) + batch = backend.eval(self._ckpt_saved_batch) + if mode == mode_keys.ModeKeys.TRAIN: + # For batch-level saving + if self._enable_save_before_preemption or isinstance( + self._save_freq, int + ): + if batch >= 0: + # If the checkpoint was last saved at last batch of the + # epoch, return the next epoch number and batch=0 + if batch == steps_per_epoch - 1: + initial_epoch = epoch + 1 + initial_step = 0 + else: + # If the checkpoint was not last saved at last batch of + # the epoch, return the same epoch and next batch number + initial_epoch = epoch + initial_step = batch + 1 + else: + if epoch >= 0: + # The most recently saved epoch is one epoch prior to the + # epoch it failed at, so return the value of + # 'self._ckpt_saved_epoch' plus one. + initial_epoch = epoch + 1 + + return (initial_epoch, initial_step) diff --git a/keras/distribute/worker_training_state_test.py b/keras/distribute/worker_training_state_test.py index b63f0525f043..c2d3cde468d2 100644 --- a/keras/distribute/worker_training_state_test.py +++ b/keras/distribute/worker_training_state_test.py @@ -14,40 +14,40 @@ # ============================================================================== """Tests of `worker_training_state.py` utilities.""" -import tensorflow.compat.v2 as tf - import os import sys +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import callbacks from keras.distribute import multi_worker_testing_utils -class ModelCheckpointTest(tf.test.TestCase, parameterized.TestCase): - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], - file_format=['h5', 'tf'], - save_weights_only=[True, False])) - def testCheckpointExists(self, file_format, save_weights_only): - train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset(64, 2) - model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) - saving_dir = self.get_temp_dir() - saving_filepath = os.path.join(saving_dir, 'checkpoint.' + file_format) - callbacks_list = [ - callbacks.ModelCheckpoint( - filepath=saving_filepath, save_weights_only=save_weights_only) - ] - self.assertFalse(tf.io.gfile.exists(saving_filepath)) - model.fit(x=train_ds, epochs=2, steps_per_epoch=2, callbacks=callbacks_list) - tf_saved_model_exists = tf.io.gfile.exists(saving_filepath) - tf_weights_only_checkpoint_exists = tf.io.gfile.exists(saving_filepath + - '.index') - self.assertTrue(tf_saved_model_exists or tf_weights_only_checkpoint_exists) - - -if __name__ == '__main__': - with tf.compat.v1.test.mock.patch.object(sys, 'exit', os._exit): - tf.test.main() +class WorkerTrainingStateTest(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["eager"]) + ) + def testCheckpointExists(self): + train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset(64, 2) + model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) + saving_dir = self.get_temp_dir() + callbacks_list = [ + callbacks.BackupAndRestore( + backup_dir=saving_dir, delete_checkpoint=False + ) + ] + self.assertLen(tf.io.gfile.glob(os.path.join(saving_dir, "*")), 0) + model.fit( + x=train_ds, epochs=2, steps_per_epoch=2, callbacks=callbacks_list + ) + # By default worker_training_state only keeps the results from one + # checkpoint. Even though the test is expected to checkpoint twice, it + # only keeps the checkpoint files from the second checkpoint. + checkpoint_path = os.path.join(saving_dir, "chief", "ckpt-2.index") + self.assertLen(tf.io.gfile.glob(checkpoint_path), 1) + + +if __name__ == "__main__": + with tf.compat.v1.test.mock.patch.object(sys, "exit", os._exit): + tf.test.main() diff --git a/keras/dtensor/BUILD b/keras/dtensor/BUILD index 65b9d509b295..79716c1a3c4a 100644 --- a/keras/dtensor/BUILD +++ b/keras/dtensor/BUILD @@ -2,11 +2,22 @@ # Since DTensor is not a public API yet, all the DTensor related change # can't be exposed to public yet. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") +# copybara:uncomment_begin(google-only) +# load( +# "//third_party/tensorflow/dtensor:build_defs.bzl", +# "dtensor_test", +# ) +# copybara:uncomment_end + package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", + "//learning/brain/distribute/experimental/auto_distribute:__pkg__", + "//learning/brain/distribute/python:__subpackages__", "//learning/brain/experimental/dtensor/models:__subpackages__", ], licenses = ["notice"], @@ -15,6 +26,9 @@ package( py_library( name = "dtensor", srcs = ["__init__.py"], + deps = [ + "//:expect_tensorflow_installed", + ], ) tf_py_test( @@ -65,12 +79,13 @@ tf_py_test( deps = [ ":dtensor", ":layout_map", + ":test_util", "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras:backend", "//keras/layers", + "//keras/models", "//keras/utils:tf_utils", - "//learning/brain/experimental/dtensor/tests:test_util", ], ) @@ -105,42 +120,45 @@ tf_py_test( ], ) -tf_py_test( - name = "mnist_model_test", - srcs = ["mnist_model_test.py"], - tags = [ - "requires-net:external", - ], - deps = [ - ":integration_test_utils", - ":optimizers", - ":test_util", - "//:expect_numpy_installed", - "//:expect_tensorflow_installed", - "//keras/utils:tf_utils", - ], -) - -py_library( - name = "optimizers", - srcs = ["optimizers.py"], - deps = [ - ":dtensor", - "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_experimental:optimizer", - "//keras/optimizers/schedules:learning_rate_schedule", - ], -) +# copybara:uncomment_begin(google-only) +# dtensor_test( +# name = "mnist_model_test", +# srcs = ["mnist_model_test.py"], +# env = { +# "CUDA_MODULE_LOADING": "LAZY", +# "TF_GPU_ALLOCATOR": "cuda_malloc_async", +# }, +# tags = [ +# "no_oss", +# "requires-net:external", +# ], +# deps = [ +# ":dtensor", +# ":integration_test_utils", +# ":layout_map", +# ":test_util", +# "//keras:backend", +# "//keras/optimizers", +# "//keras/utils:tf_utils", +# "//:expect_numpy_installed", +# "//:expect_tensorflow_installed", +# ], +# ) +# copybara:uncomment_end tf_py_test( name = "optimizers_test", srcs = ["optimizers_test.py"], deps = [ ":dtensor", - ":optimizers", + ":layout_map", ":test_util", "//:expect_numpy_installed", "//:expect_tensorflow_installed", + "//keras:losses", + "//keras/layers", + "//keras/models", + "//keras/optimizers", ], ) @@ -184,3 +202,44 @@ py_library( "//:expect_tensorflow_installed", ], ) + +tf_py_test( + name = "save_load_test", + srcs = ["save_load_test.py"], + deps = [ + ":dtensor", + ":layout_map", + ":test_util", + "//keras", + "//keras:backend", + "//keras/layers", + "//keras/models", + "//keras/utils:tf_utils", + ], +) + +# copybara:uncomment_begin(google-only) +# dtensor_test( +# name = "strategy_integration_test", +# srcs = ["strategy_integration_test.py"], +# shard_count = { +# "CPU": 2, +# "GPU": 4, +# "TPU": 2, +# }, +# tags = ["no_oss"], +# deps = [ +# ":integration_test_utils", +# ":test_util", +# "//:expect_absl_installed", +# "//keras:backend", +# "//keras/mixed_precision:mixed_precision_experimental", +# "//keras/optimizers", +# "//keras/utils:tf_utils", +# "//:expect_numpy_installed", +# "//:expect_tensorflow_installed", +# "//third_party/tensorflow/dtensor/python/tests:test_util", +# "//third_party/tensorflow/python/distribute/experimental:mirrored_strategy", +# ], +# ) +# copybara:uncomment_end diff --git a/keras/dtensor/__init__.py b/keras/dtensor/__init__.py index ec4357740cc4..59a004592af3 100644 --- a/keras/dtensor/__init__.py +++ b/keras/dtensor/__init__.py @@ -14,13 +14,5 @@ # ============================================================================== """Keras' DTensor library.""" -_DTENSOR_API_ENABLED = True - -# Conditional import the dtensor API, since it is currently broken in OSS. -if _DTENSOR_API_ENABLED: - from tensorflow.compat.v2.experimental import dtensor as dtensor_api # pylint: disable=g-import-not-at-top -else: - # Leave it with a placeholder, so that the import line from other python file - # will not break. - dtensor_api = None +from tensorflow.compat.v2.experimental import dtensor as dtensor_api diff --git a/keras/dtensor/initializers_test.py b/keras/dtensor/initializers_test.py index d2c47f8bca81..11d97fca2895 100644 --- a/keras/dtensor/initializers_test.py +++ b/keras/dtensor/initializers_test.py @@ -14,142 +14,149 @@ # ============================================================================== """Tests for initializers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import backend from keras import initializers from keras.dtensor import dtensor_api as dtensor from keras.dtensor import test_util from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf class InitializersTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) - def setUp(self): - super().setUp() - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) - - @parameterized.named_parameters( - ('Zeros', initializers.Zeros, {}), - ('Ones', initializers.Ones, {}), - ('Constant', initializers.Constant, {'value': 3.}), - # TODO(b/222160686): Add Identity after after we have SPMD support for - # tf.MatrixDiagV3 - # ('Identity', initializers.Identity, {}), - ) - def test_static_value_initializer(self, initializer_cls, init_args): - layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh) - shape = (4, 4) - initializer = initializer_cls(**init_args) - value = initializer(shape=shape, layout=layout) - normal_tensor_value = initializer(shape=shape) + @parameterized.named_parameters( + ("Zeros", initializers.Zeros, {}), + ("Ones", initializers.Ones, {}), + ("Constant", initializers.Constant, {"value": 3.0}), + # TODO(b/222160686): Add Identity after after we have SPMD support for + # tf.MatrixDiagV3 + # ('Identity', initializers.Identity, {}), + ) + def test_static_value_initializer(self, initializer_cls, init_args): + layout = dtensor.Layout( + [dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh + ) + shape = (4, 4) + initializer = initializer_cls(**init_args) + value = initializer(shape=shape, layout=layout) + normal_tensor_value = initializer(shape=shape) - self.assertEqual(value.shape, shape) - fetched_layout = dtensor.fetch_layout(value) - self.assertEqual(layout, fetched_layout) + self.assertEqual(value.shape, shape) + fetched_layout = dtensor.fetch_layout(value) + self.assertEqual(layout, fetched_layout) - self.assertAllClose(value, normal_tensor_value) + self.assertAllClose(value, normal_tensor_value) - @parameterized.named_parameters( - ('RandomUniform', initializers.RandomUniform, {}), - ('RandomUniform_seeded', initializers.RandomUniform, {'seed': 1}), - ('RandomNormal', initializers.RandomNormal, {}), - ('RandomNormal_seeded', initializers.RandomNormal, {'seed': 1}), - ('TruncatedNormal', initializers.TruncatedNormal, {}), - ('TruncatedNormal_seeded', initializers.TruncatedNormal, {'seed': 1}), - ('Orthogonal', initializers.Orthogonal, {}), - ('Orthogonal_seeded', initializers.Orthogonal, {'seed': 1}), - ('VarianceScaling', initializers.VarianceScaling, {}), - ('VarianceScaling_seeded', initializers.VarianceScaling, {'seed': 1}), - ('GlorotUniform', initializers.GlorotUniform, {}), - ('GlorotUniform_seeded', initializers.GlorotUniform, {'seed': 1}), - ('GlorotNormal', initializers.GlorotNormal, {}), - ('GlorotNormal_seeded', initializers.GlorotNormal, {'seed': 1}), - ('LecunNormal', initializers.LecunNormal, {}), - ('LecunNormal_seeded', initializers.LecunNormal, {'seed': 1}), - ('LecunUniform', initializers.LecunUniform, {}), - ('LecunUniform_seeded', initializers.LecunUniform, {'seed': 1}), - ('HeNormal', initializers.HeNormal, {}), - ('HeNormal_seeded', initializers.HeNormal, {'seed': 1}), - ('HeUniform', initializers.HeUniform, {}), - ('HeUniform_seeded', initializers.HeUniform, {'seed': 1}), - ) - def test_random_value_initializer(self, initializer_cls, init_args): - layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh) - shape = (4, 4) - initializer = initializer_cls(**init_args) - # Make sure to raise error when keras global seed is not set. - with self.assertRaisesRegex(ValueError, 'set the global seed'): - initializer(shape=shape, layout=layout) + @parameterized.named_parameters( + ("RandomUniform", initializers.RandomUniform, {}), + ("RandomUniform_seeded", initializers.RandomUniform, {"seed": 1}), + ("RandomNormal", initializers.RandomNormal, {}), + ("RandomNormal_seeded", initializers.RandomNormal, {"seed": 1}), + ("TruncatedNormal", initializers.TruncatedNormal, {}), + ("TruncatedNormal_seeded", initializers.TruncatedNormal, {"seed": 1}), + ("Orthogonal", initializers.Orthogonal, {}), + ("Orthogonal_seeded", initializers.Orthogonal, {"seed": 1}), + ("VarianceScaling", initializers.VarianceScaling, {}), + ("VarianceScaling_seeded", initializers.VarianceScaling, {"seed": 1}), + ("GlorotUniform", initializers.GlorotUniform, {}), + ("GlorotUniform_seeded", initializers.GlorotUniform, {"seed": 1}), + ("GlorotNormal", initializers.GlorotNormal, {}), + ("GlorotNormal_seeded", initializers.GlorotNormal, {"seed": 1}), + ("LecunNormal", initializers.LecunNormal, {}), + ("LecunNormal_seeded", initializers.LecunNormal, {"seed": 1}), + ("LecunUniform", initializers.LecunUniform, {}), + ("LecunUniform_seeded", initializers.LecunUniform, {"seed": 1}), + ("HeNormal", initializers.HeNormal, {}), + ("HeNormal_seeded", initializers.HeNormal, {"seed": 1}), + ("HeUniform", initializers.HeUniform, {}), + ("HeUniform_seeded", initializers.HeUniform, {"seed": 1}), + ) + def test_random_value_initializer(self, initializer_cls, init_args): + layout = dtensor.Layout( + [dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh + ) + shape = (4, 4) + initializer = initializer_cls(**init_args) + # Make sure to raise error when keras global seed is not set. + with self.assertRaisesRegex(ValueError, "set the global seed"): + initializer(shape=shape, layout=layout) - try: - tf_utils.set_random_seed(1337) - value = initializer(shape=shape, layout=layout) - self.assertEqual(value.shape, shape) - fetched_layout = dtensor.fetch_layout(value) - self.assertEqual(layout, fetched_layout) + try: + tf_utils.set_random_seed(1337) + value = initializer(shape=shape, layout=layout) + self.assertEqual(value.shape, shape) + fetched_layout = dtensor.fetch_layout(value) + self.assertEqual(layout, fetched_layout) - # Make sure when same seed is set again, the new initializer should - # generate same result - tf_utils.set_random_seed(1337) - initializer = initializer_cls(**init_args) - new_value = initializer(shape=shape, layout=layout) - self.assertAllClose(value, new_value) - finally: - # Unset the keras global generator so that it doesn't affect other tests - # that need to verify the existence of global generator. - backend._SEED_GENERATOR.generator = None + # Make sure when same seed is set again, the new initializer should + # generate same result + tf_utils.set_random_seed(1337) + initializer = initializer_cls(**init_args) + new_value = initializer(shape=shape, layout=layout) + self.assertAllClose(value, new_value) + finally: + # Unset the keras global generator so that it doesn't affect other + # tests that need to verify the existence of global generator. + backend._SEED_GENERATOR.generator = None - @parameterized.named_parameters( - ('zeros', 'zeros', initializers.Zeros), - ('Zeros', 'Zeros', initializers.Zeros), - ('ones', 'ones', initializers.Ones), - ('Ones', 'Ones', initializers.Ones), - ('constant', 'constant', initializers.Constant), - ('Constant', 'Constant', initializers.Constant), - ('random_uniform', 'random_uniform', initializers.RandomUniform), - ('RandomUniform', 'RandomUniform', initializers.RandomUniform), - ('random_normal', 'random_normal', initializers.RandomNormal), - ('RandomNormal', 'RandomNormal', initializers.RandomNormal), - ('truncated_normal', 'truncated_normal', initializers.TruncatedNormal), - ('TruncatedNormal', 'TruncatedNormal', initializers.TruncatedNormal), - ('Identity', 'Identity', initializers.Identity), - ('identity', 'identity', initializers.Identity), - ('Orthogonal', 'Orthogonal', initializers.Orthogonal), - ('orthogonal', 'orthogonal', initializers.Orthogonal), - ('variance_scaling', 'variance_scaling', initializers.VarianceScaling), - ('VarianceScaling', 'VarianceScaling', initializers.VarianceScaling), - ('glorot_uniform', 'glorot_uniform', initializers.GlorotUniform), - ('GlorotUniform', 'GlorotUniform', initializers.GlorotUniform), - ('glorot_normal', 'glorot_normal', initializers.GlorotNormal), - ('GlorotNormal', 'GlorotNormal', initializers.GlorotNormal), - ('lecun_normal', 'lecun_normal', initializers.LecunNormal), - ('LecunNormal', 'LecunNormal', initializers.LecunNormal), - ('lecun_uniform', 'lecun_uniform', initializers.LecunUniform), - ('LecunUniform', 'LecunUniform', initializers.LecunUniform), - ('he_normal', 'he_normal', initializers.HeNormal), - ('HeNormal', 'HeNormal', initializers.HeNormal), - ('he_uniform', 'he_uniform', initializers.HeUniform), - ('HeUniform', 'HeUniform', initializers.HeUniform), - ) - def test_serialization_deserialization(self, cls_name, expected_cls): - initializer = initializers.get(cls_name) - self.assertIsInstance(initializer, expected_cls) + @parameterized.named_parameters( + ("zeros", "zeros", initializers.Zeros), + ("Zeros", "Zeros", initializers.Zeros), + ("ones", "ones", initializers.Ones), + ("Ones", "Ones", initializers.Ones), + ("constant", "constant", initializers.Constant), + ("Constant", "Constant", initializers.Constant), + ("random_uniform", "random_uniform", initializers.RandomUniform), + ("RandomUniform", "RandomUniform", initializers.RandomUniform), + ("random_normal", "random_normal", initializers.RandomNormal), + ("RandomNormal", "RandomNormal", initializers.RandomNormal), + ("truncated_normal", "truncated_normal", initializers.TruncatedNormal), + ("TruncatedNormal", "TruncatedNormal", initializers.TruncatedNormal), + ("Identity", "Identity", initializers.Identity), + ("identity", "identity", initializers.Identity), + ("Orthogonal", "Orthogonal", initializers.Orthogonal), + ("orthogonal", "orthogonal", initializers.Orthogonal), + ("variance_scaling", "variance_scaling", initializers.VarianceScaling), + ("VarianceScaling", "VarianceScaling", initializers.VarianceScaling), + ("glorot_uniform", "glorot_uniform", initializers.GlorotUniform), + ("GlorotUniform", "GlorotUniform", initializers.GlorotUniform), + ("glorot_normal", "glorot_normal", initializers.GlorotNormal), + ("GlorotNormal", "GlorotNormal", initializers.GlorotNormal), + ("lecun_normal", "lecun_normal", initializers.LecunNormal), + ("LecunNormal", "LecunNormal", initializers.LecunNormal), + ("lecun_uniform", "lecun_uniform", initializers.LecunUniform), + ("LecunUniform", "LecunUniform", initializers.LecunUniform), + ("he_normal", "he_normal", initializers.HeNormal), + ("HeNormal", "HeNormal", initializers.HeNormal), + ("he_uniform", "he_uniform", initializers.HeUniform), + ("HeUniform", "HeUniform", initializers.HeUniform), + ) + def test_serialization_deserialization(self, cls_name, expected_cls): + initializer = initializers.get(cls_name) + self.assertIsInstance(initializer, expected_cls) - config = initializers.serialize(initializer) - recreated = initializers.deserialize(config) + config = initializers.serialize(initializer) + recreated = initializers.deserialize(config) - self.assertIsInstance(recreated, expected_cls) - self.assertEqual(config, initializers.serialize(recreated)) + self.assertIsInstance(recreated, expected_cls) + self.assertEqual(config, initializers.serialize(recreated)) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/integration_test_utils.py b/keras/dtensor/integration_test_utils.py index e8a69e8d8df4..3db7cc00d428 100644 --- a/keras/dtensor/integration_test_utils.py +++ b/keras/dtensor/integration_test_utils.py @@ -20,7 +20,10 @@ """ +import numpy as np +import tensorflow.compat.v2 as tf from absl import logging + from keras import layers from keras import losses from keras import models @@ -28,119 +31,136 @@ from keras.dtensor import dtensor_api as dtensor from keras.dtensor import layout_map as layout_map_lib from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf - -# pylint: disable=missing-function-docstring NUM_CLASS = 10 # MNIST has 10 digits def get_model_with_layout_map(layout_map): - """Builds a Sequential CNN model to recognize MNIST digits. + """Builds a Sequential CNN model to recognize MNIST digits. + + Args: + layout_map: dict of string name -> Layout, for weights creation. + + Returns: + a CNN Keras model used for MNIST + """ - Args: - layout_map: dict of string name -> Layout, for weights creation. + with layout_map_lib.layout_map_scope(layout_map): + # Define a CNN model to recognize MNIST digits. + return get_model() - Returns: - a CNN Keras model used for MNIST - """ - with layout_map_lib.layout_map_scope(layout_map): - # Define a CNN model to recognize MNIST digits. +def get_model(): + """Builds a Sequential CNN model to recognize MNIST digits.""" model = models.Sequential() model.add( layers.Conv2D( 32, - name='conv2d_1', + name="conv2d_1", kernel_size=(3, 3), - activation='relu', - input_shape=(28, 28, 1), # channel last gray scale input - )) - model.add(layers.Conv2D( - 64, - name='conv2d_2', - kernel_size=(3, 3), - activation='relu', - )) + activation="relu", + input_shape=(28, 28, 1), # channel last gray scale input + ) + ) + model.add( + layers.Conv2D( + 64, + name="conv2d_2", + kernel_size=(3, 3), + activation="relu", + ) + ) model.add(layers.MaxPooling2D(pool_size=(2, 2))) model.add(layers.Dropout(0.25)) model.add(layers.Flatten()) - model.add(layers.Dense( - 128, - name='dense_1', - activation='relu', - )) + model.add( + layers.Dense( + 128, + name="dense_1", + activation="relu", + ) + ) model.add(layers.Dropout(0.5)) - model.add(layers.Dense( - NUM_CLASS, - name='dense_2', - activation='softmax', - )) + model.add( + layers.Dense( + NUM_CLASS, + name="dense_2", + activation="softmax", + ) + ) return model def get_all_replicated_layout_map(mesh): - layout_map = layout_map_lib.LayoutMap(mesh=mesh) + layout_map = layout_map_lib.LayoutMap(mesh=mesh) - layout_4d = dtensor.Layout.replicated(mesh, rank=4) - layout_2d = dtensor.Layout.replicated(mesh, rank=2) - layout_1d = dtensor.Layout.replicated(mesh, rank=1) + layout_4d = dtensor.Layout.replicated(mesh, rank=4) + layout_2d = dtensor.Layout.replicated(mesh, rank=2) + layout_1d = dtensor.Layout.replicated(mesh, rank=1) - layout_map['conv2d.*kernel'] = layout_4d - layout_map['conv2d.*bias'] = layout_1d - layout_map['dense.*kernel'] = layout_2d - layout_map['dense.*bias'] = layout_1d + layout_map["conv2d.*kernel"] = layout_4d + layout_map["conv2d.*bias"] = layout_1d + layout_map["dense.*kernel"] = layout_2d + layout_map["dense.*bias"] = layout_1d - return layout_map + return layout_map def get_mnist_datasets(num_class, batch_size): - (x_train, y_train), (x_test, y_test) = mnist.load_data() + (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = np.expand_dims(x_train, axis=-1).astype('float32') - x_test = np.expand_dims(x_test, axis=-1).astype('float32') - x_train /= 255 # normalize to 0~1 - x_test /= 255 + x_train = np.expand_dims(x_train, axis=-1).astype("float32") + x_test = np.expand_dims(x_test, axis=-1).astype("float32") + x_train /= 255 # normalize to 0~1 + x_test /= 255 - y_train = np_utils.to_categorical(y_train, num_class) - y_test = np_utils.to_categorical(y_test, num_class) + y_train = np_utils.to_categorical(y_train, num_class) + y_test = np_utils.to_categorical(y_test, num_class) - train_ds = tf.data.Dataset.from_tensor_slices( - (x_train, y_train)).repeat().batch(batch_size, drop_remainder=True) - eval_ds = tf.data.Dataset.from_tensor_slices( - (x_test, y_test)).repeat().batch(batch_size, drop_remainder=True) + train_ds = ( + tf.data.Dataset.from_tensor_slices((x_train, y_train)) + .repeat() + .batch(batch_size, drop_remainder=True) + ) + eval_ds = ( + tf.data.Dataset.from_tensor_slices((x_test, y_test)) + .repeat() + .batch(batch_size, drop_remainder=True) + ) - return train_ds, eval_ds + return train_ds, eval_ds def train_mnist_model_batch_sharded( - model, optimizer, mesh, num_epochs, steps_per_epoch, global_batch_size): + model, optimizer, mesh, num_epochs, steps_per_epoch, global_batch_size +): - dataset, _ = get_mnist_datasets(NUM_CLASS, global_batch_size) + dataset, _ = get_mnist_datasets(NUM_CLASS, global_batch_size) - input_image_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=4) - input_label_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2) - loss_obj = losses.CategoricalCrossentropy() + input_image_layout = dtensor.Layout.batch_sharded(mesh, "batch", rank=4) + input_label_layout = dtensor.Layout.batch_sharded(mesh, "batch", rank=2) + loss_obj = losses.CategoricalCrossentropy() - num_local_devices = mesh.num_local_devices() - iterator = iter(dataset) - train_losses = [] - for epoch in range(num_epochs): - total_loss = 0.00 - for _ in range(steps_per_epoch): - images, labels = next(iterator) - images = tf.split(images, num_local_devices) - labels = tf.split(labels, num_local_devices) - d_images = dtensor.pack(images, input_image_layout) - d_labels = dtensor.pack(labels, input_label_layout) - total_loss += train_step(model, d_images, d_labels, loss_obj, optimizer) + num_local_devices = mesh.num_local_devices() + iterator = iter(dataset) + train_losses = [] + for epoch in range(num_epochs): + total_loss = 0.00 + for _ in range(steps_per_epoch): + images, labels = next(iterator) + images = tf.split(images, num_local_devices) + labels = tf.split(labels, num_local_devices) + d_images = dtensor.pack(images, input_image_layout) + d_labels = dtensor.pack(labels, input_label_layout) + total_loss += train_step( + model, d_images, d_labels, loss_obj, optimizer + ) - train_loss = tf.reduce_mean(total_loss / steps_per_epoch) + train_loss = tf.reduce_mean(total_loss / steps_per_epoch) - logging.info('Epoch %d, Loss: %f', epoch, train_loss) - train_losses.append(train_loss) - return train_losses + logging.info("Epoch %d, Loss: %f", epoch, train_loss) + train_losses.append(train_loss) + return train_losses # Change to use model.fit when dataset has the correct layout info populated @@ -148,12 +168,10 @@ def train_mnist_model_batch_sharded( @tf.function def train_step(model, feature, label, loss_obj, optimizer): - with tf.GradientTape() as tape: - predict = model(feature, training=True) - loss = loss_obj(label, predict) - - gradients = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - return loss - + with tf.GradientTape() as tape: + predict = model(feature, training=True) + loss = loss_obj(label, predict) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + return loss diff --git a/keras/dtensor/layers_test.py b/keras/dtensor/layers_test.py index 11b83f6a557c..5efc2b7a8f26 100644 --- a/keras/dtensor/layers_test.py +++ b/keras/dtensor/layers_test.py @@ -14,87 +14,142 @@ # ============================================================================== """Tests for layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import backend from keras import layers from keras.dtensor import dtensor_api as dtensor from keras.dtensor import test_util from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf class LayersTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) - def setUp(self): - super().setUp() - backend.enable_tf_random_generator() - tf_utils.set_random_seed(1337) - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, - local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) + @parameterized.named_parameters( + ( + "dense", + layers.Dense, + {"units": 4}, + {"kernel": 2, "bias": 1}, + [10, 8], + ), + # TODO(b/224861663): Enable this test. + # ('embedding', layers.Embedding, {'input_dim': 100, 'output_dim': 32}, + # {'embeddings': 2}, [10,], np.int32), + ( + "conv1d", + layers.Conv1D, + {"filters": 4, "kernel_size": 3}, + {"kernel": 3, "bias": 1}, + [10, 28, 3], + ), + ( + "conv1d_transpose", + layers.Conv1DTranspose, + {"filters": 4, "kernel_size": 3}, + {"kernel": 3, "bias": 1}, + [10, 28, 3], + ), + ( + "conv2d", + layers.Conv2D, + {"filters": 4, "kernel_size": (3, 3)}, + {"kernel": 4, "bias": 1}, + [10, 28, 28, 3], + ), + ( + "conv2d_transpose", + layers.Conv2DTranspose, + {"filters": 4, "kernel_size": (3, 3)}, + {"kernel": 4, "bias": 1}, + [10, 28, 28, 3], + ), + ( + "conv3d", + layers.Conv3D, + {"filters": 4, "kernel_size": (3, 3, 3)}, + {"kernel": 5, "bias": 1}, + [10, 28, 28, 28, 3], + ), + # TODO(b/224862394): Add support for tf.Conv3DBackpropInputV2 + # ('conv3dtranspose', layers.Conv3DTranspose, + # {'filters': 4, 'kernel_size': (3, 3, 3)}, + # {'kernel': 5, 'bias': 1}, [10, 28, 28, 28, 3]), + ( + "batch_norm", + layers.BatchNormalization, + {"fused": False}, + {"beta": 1, "gamma": 1, "moving_mean": 1, "moving_variance": 1}, + [10, 28, 28, 3], + ), + ( + "layer_norm", + layers.LayerNormalization, + {"dtype": tf.float64}, + {"beta": 1, "gamma": 1}, + [10, 28, 28, 3], + ), + ) + def test_layer( + self, + layer_cls, + init_args, + variable_settings, + input_shape, + input_dtype=np.float32, + ): + args_with_layout = init_args.copy() + for variable_name, variable_rank in variable_settings.items(): + args_with_layout[ + variable_name + "_layout" + ] = dtensor.Layout.replicated(self.mesh, variable_rank) - @parameterized.named_parameters( - ('dense', layers.Dense, {'units': 4}, {'kernel': 2, 'bias': 1}, [10, 8]), - # TODO(b/224861663): Enable this test. - # ('embedding', layers.Embedding, {'input_dim': 100, 'output_dim': 32}, - # {'embeddings': 2}, [10,], np.int32), - ('conv1d', layers.Conv1D, {'filters': 4, 'kernel_size': 3}, - {'kernel': 3, 'bias': 1}, [10, 28, 3]), - ('conv1d_transpose', layers.Conv1DTranspose, - {'filters': 4, 'kernel_size': 3}, {'kernel': 3, 'bias': 1}, [10, 28, 3]), - ('conv2d', layers.Conv2D, {'filters': 4, 'kernel_size': (3, 3)}, - {'kernel': 4, 'bias': 1}, [10, 28, 28, 3]), - ('conv2d_transpose', layers.Conv2DTranspose, - {'filters': 4, 'kernel_size': (3, 3)}, - {'kernel': 4, 'bias': 1}, [10, 28, 28, 3]), - ('conv3d', layers.Conv3D, {'filters': 4, 'kernel_size': (3, 3, 3)}, - {'kernel': 5, 'bias': 1}, [10, 28, 28, 28, 3]), - # TODO(b/224862394): Add support for tf.Conv3DBackpropInputV2 - # ('conv3dtranspose', layers.Conv3DTranspose, - # {'filters': 4, 'kernel_size': (3, 3, 3)}, - # {'kernel': 5, 'bias': 1}, [10, 28, 28, 28, 3]), - ('batch_norm', layers.BatchNormalization, {'fused': False}, - {'beta': 1, 'gamma': 1, 'moving_mean': 1, 'moving_variance': 1}, - [10, 28, 28, 3]), - ('layer_norm', layers.LayerNormalization, {'dtype': tf.float64}, - {'beta': 1, 'gamma': 1}, [10, 28, 28, 3]) - ) - def test_layer(self, layer_cls, init_args, variable_settings, input_shape, - input_dtype=np.float32): - args_with_layout = init_args.copy() - for variable_name, variable_rank in variable_settings.items(): - args_with_layout[variable_name + '_layout'] = dtensor.Layout.replicated( - self.mesh, variable_rank) + layer = layer_cls(**args_with_layout) + # inputs = np.random.random(input_shape) + inputs = np.random.randn(*input_shape).astype(input_dtype) + d_inputs = dtensor.copy_to_mesh( + inputs, dtensor.Layout.replicated(self.mesh, len(input_shape)) + ) + d_output = layer(d_inputs) - layer = layer_cls(**args_with_layout) - # inputs = np.random.random(input_shape) - inputs = np.random.randn(*input_shape).astype(input_dtype) - d_inputs = dtensor.copy_to_mesh( - inputs, dtensor.Layout.replicated(self.mesh, len(input_shape))) - d_output = layer(d_inputs) + for variable_name, variable_rank in variable_settings.items(): + self.assertIsInstance( + getattr(layer, variable_name), dtensor.DVariable + ) - for variable_name, variable_rank in variable_settings.items(): - self.assertIsInstance(getattr(layer, variable_name), dtensor.DVariable) + expected_layout = dtensor.Layout.replicated( + self.mesh, d_output.shape.rank + ) + self.assertEqual(dtensor.fetch_layout(d_output), expected_layout) - expected_layout = dtensor.Layout.replicated(self.mesh, d_output.shape.rank) - self.assertEqual(dtensor.fetch_layout(d_output), expected_layout) + # Make sure to produce same output when layout is not used + tf_utils.set_random_seed(1337) + layer_2 = layer_cls(**init_args) + output = layer_2(inputs) + self.assertAllClose(d_output, output) - # Make sure to produce same output when layout is not used - tf_utils.set_random_seed(1337) - layer_2 = layer_cls(**init_args) - output = layer_2(inputs) - self.assertAllClose(d_output, output) + for variable_name, variable_rank in variable_settings.items(): + self.assertNotIsInstance( + getattr(layer_2, variable_name), dtensor.DVariable + ) - for variable_name, variable_rank in variable_settings.items(): - self.assertNotIsInstance(getattr(layer_2, variable_name), - dtensor.DVariable) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/layout_map.py b/keras/dtensor/layout_map.py index 8b23ab79ac49..c7fd3407d533 100644 --- a/keras/dtensor/layout_map.py +++ b/keras/dtensor/layout_map.py @@ -19,433 +19,579 @@ import re import threading +import tensorflow.compat.v2 as tf + from keras.dtensor import dtensor_api as dtensor from keras.dtensor import lazy_variable from keras.dtensor import utils from keras.engine import base_layer +# isort: off from tensorflow.python.util.tf_export import keras_export -# pylint: disable=missing-class-docstring # We will skip the path for certain attributes when mapping the layout, e.g. # model._self_tracked_trackables, or layer._trainable_weights/ # _non_trainable_weights, etc. Those attributes are usually served as a cache, # and the actual variable should be in somewhere else. -_KERAS_ATTRIBUTES_TO_SKIP = ['_self_tracked_trackables', '_trainable_weights', - '_non_trainable_weights', - '_captured_weight_regularizer'] +_KERAS_ATTRIBUTES_TO_SKIP = [ + "_self_tracked_trackables", + "_trainable_weights", + "_non_trainable_weights", + "_captured_weight_regularizer", +] _LAYOUT_MAP = threading.local() def get_current_layout_map(): - return getattr(_LAYOUT_MAP, 'layout_map', None) + return getattr(_LAYOUT_MAP, "layout_map", None) -@keras_export('keras.dtensor.experimental.LayoutMap', v1=[]) +@keras_export("keras.dtensor.experimental.LayoutMap", v1=[]) class LayoutMap(collections.abc.MutableMapping): - """A dict-like object that maps string to `Layout` instances. - - `LayoutMap` uses a string as key and a `Layout` as value. There is a behavior - difference between a normal Python dict and this class. The string key will be - treated as a regex when retrieving the value. See the docstring of - `get` for more details. - - See below for a usage example. You can define the naming schema - of the `Layout`, and then retrieve the corresponding `Layout` instance. - - To use the `LayoutMap` with a `Model`, please see the docstring of - `tf.keras.dtensor.experimental.layout_map_scope`. - - ```python - map = LayoutMap(mesh=None) - map['.*dense.*kernel'] = layout_2d - map['.*dense.*bias'] = layout_1d - map['.*conv2d.*kernel'] = layout_4d - map['.*conv2d.*bias'] = layout_1d - - layout_1 = map['dense_1.kernel'] # layout_1 == layout_2d - layout_2 = map['dense_1.bias'] # layout_2 == layout_1d - layout_3 = map['dense_2.kernel'] # layout_3 == layout_2d - layout_4 = map['dense_2.bias'] # layout_4 == layout_1d - layout_5 = map['my_model/conv2d_123/kernel'] # layout_5 == layout_4d - layout_6 = map['my_model/conv2d_123/bias'] # layout_6 == layout_1d - ``` - - Args: - mesh: An optional `Mesh` that can be used to create all replicated - layout as default when there isn't a layout found based on the input - string query. - """ - - def __init__(self, mesh=None): - self._layout_map = collections.OrderedDict() - self._default_mesh = mesh - - def __getitem__(self, key): - """Retrieve the corresponding layout by the string key. - - When there isn't an exact match, all the existing keys in the layout map - will be treated as a regex and map against the input key again. The first - match will be returned, based on the key insertion order. Return None if - there isn't any match found. + """A dict-like object that maps string to `Layout` instances. + + `LayoutMap` uses a string as key and a `Layout` as value. There is a + behavior difference between a normal Python dict and this class. The string + key will be treated as a regex when retrieving the value. See the docstring + of `get` for more details. + + See below for a usage example. You can define the naming schema + of the `Layout`, and then retrieve the corresponding `Layout` instance. + + To use the `LayoutMap` with a `Model`, please see the docstring of + `tf.keras.dtensor.experimental.layout_map_scope`. + + ```python + map = LayoutMap(mesh=None) + map['.*dense.*kernel'] = layout_2d + map['.*dense.*bias'] = layout_1d + map['.*conv2d.*kernel'] = layout_4d + map['.*conv2d.*bias'] = layout_1d + + layout_1 = map['dense_1.kernel'] # layout_1 == layout_2d + layout_2 = map['dense_1.bias'] # layout_2 == layout_1d + layout_3 = map['dense_2.kernel'] # layout_3 == layout_2d + layout_4 = map['dense_2.bias'] # layout_4 == layout_1d + layout_5 = map['my_model/conv2d_123/kernel'] # layout_5 == layout_4d + layout_6 = map['my_model/conv2d_123/bias'] # layout_6 == layout_1d + ``` Args: - key: the string key as the query for the layout. - - Returns: - Corresponding layout based on the query. + mesh: An optional `Mesh` that can be used to create all replicated + layout as default when there isn't a layout found based on the input + string query. """ - if key in self._layout_map: - return self._layout_map[key] - - for k in self._layout_map: - if re.match(k, key): - return self._layout_map[k] - return None - - def __setitem__(self, key, layout): - if key in self._layout_map: - raise ValueError(f'{key} already exist in the LayoutMap with ' - f'value {self._layout_map[key]}. Please make sure to ' - 'not use duplicated keys.') - if not isinstance(layout, dtensor.Layout): - raise ValueError(f'{layout} should be a dtensor.Layout type, ' - f'got {type(layout)}') - - self._layout_map[key] = layout - def __delitem__(self, key): - # let the dict to handle the key missing error - return self._layout_map.pop(key) - - def __len__(self): - return len(self._layout_map) - - def __iter__(self): - return iter(self._layout_map) - - def get_default_mesh(self): - """Return the default `Mesh` set at instance creation. - - The `Mesh` can be used to create default replicated `Layout` when there - isn't a match of the input string query. - """ - return self._default_mesh + def __init__(self, mesh=None): + self._layout_map = collections.OrderedDict() + self._default_mesh = mesh + + def __getitem__(self, key): + """Retrieve the corresponding layout by the string key. + + When there isn't an exact match, all the existing keys in the layout map + will be treated as a regex and map against the input key again. The + first match will be returned, based on the key insertion order. Return + None if there isn't any match found. + + Args: + key: the string key as the query for the layout. + + Returns: + Corresponding layout based on the query. + """ + if key in self._layout_map: + return self._layout_map[key] + + for k in self._layout_map: + if re.match(k, key): + return self._layout_map[k] + return None + + def __setitem__(self, key, layout): + if key in self._layout_map: + raise ValueError( + f"{key} already exist in the LayoutMap with " + f"value {self._layout_map[key]}. Please make sure to " + "not use duplicated keys." + ) + if not isinstance(layout, dtensor.Layout): + raise ValueError( + f"{layout} should be a dtensor.Layout type, got {type(layout)}" + ) + + self._layout_map[key] = layout + + def __delitem__(self, key): + # let the dict to handle the key missing error + return self._layout_map.pop(key) + + def __len__(self): + return len(self._layout_map) + + def __iter__(self): + return iter(self._layout_map) + + def get_default_mesh(self): + """Return the default `Mesh` set at instance creation. + + The `Mesh` can be used to create default replicated `Layout` when there + isn't a match of the input string query. + """ + return self._default_mesh + + def scope(self): + """Apply layout to all `tf.Variable` instances created under the scope. + + All `tf.Variable` instances created under this scope + will be lazily initialized first. Once they are attached as the model + or layer attributes, and there is a stable layout mapping for it, the + variables will be reinitialized into a + `tf.experimental.dtensor.DVariable` with corresponding layout. + + Note that the layout mapping will use object/attribute names as the + keys to map the variable to the layout. + + For subclassed models, the full object/attribute name is used as the + key. For Functional/Sequential models, we use `layer.name` as + the key for the layer, followed by the attribute name. Keras ensures + name uniqueness among the layers within a Functional/Sequential model. + + See the following examples that show variable object names + for different Keras model types: + + ```python + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + layout_map['d1.kernel'] = layout_1 + layout_map['d1.bias'] = layout_2 + layout_map['d2.kernel'] = layout_3 + layout_map['d2.bias'] = layout_4 + + ## Subclassed model + class SubclassModel(tf.keras.Model): + + def __init__(self, name=None): + super().__init__(name=name) + self.d1 = tf.keras.layers.Dense(1000) + self.d2 = tf.keras.layers.Dense(1000) + + def call(self, inputs): + x = self.d1(inputs) + return self.d2(x) + + with layout_map.scope(): + model = SubclassModel() + inputs = tf.zeros((10, 10)) + results = model(inputs) + + model.d1.kernel.layout == layout_1 + model.d1.bias.layout == layout_2 + model.d2.kernel.layout == layout_3 + model.d2.bias.layout == layout_4 + + ## Functional model + with layout_map.scope(): + inputs = tf.keras.Input((10,), batch_size=10) + x = tf.keras.layers.Dense(20, name='d1')(inputs) + output = tf.keras.layers.Dense(30, name='d2')(x) + + model = tf.keras.Model(inputs, output) + + d1 = model.layers[1] + d2 = model.layers[2] + + d1.kernel.layout == layout_1 + d1.bias.layout == layout_2 + d1.kernel.layout == layout_3 + d1.bias.layout == layout_4 + + ## Sequential model + with layout_map.scope(): + model = tf.keras.Sequential([ + tf.keras.layers.Dense(20, name='d1', input_shape=(10,)), + tf.keras.layers.Dense(30, name='d2') + ]) + + d1 = model.layers[0] + d2 = model.layers[1] + + d1.kernel.layout == layout_1 + d1.bias.layout == layout_2 + d1.kernel.layout == layout_3 + d1.bias.layout == layout_4 + ``` + + Returns: + A context that will lazily initialize all `tf.Variable` objects + within the model, with their attributed layouts. + """ + return layout_map_scope(self) LayoutMap.get.__doc__ = LayoutMap.__getitem__.__doc__ -@keras_export('keras.dtensor.experimental.layout_map_scope', v1=[]) @contextlib.contextmanager def layout_map_scope(layout_map): - """Apply the layout to all the tf.Variables created under the scope. - - Create a scope that all the tf.Variable created under this scope - will be lazily inited, and initialized later on with proper layout when the - object path in the model is stable/finalized. - - Note that the layout mapping will use the object/attribute names as the key - to map the variable against the layout. - - For subclassed models, the full object/attribute name is used as the key. - For Functional/Sequential models, since the layers within the model do not get - assigned to a meaningful attribute, we use `layer.name` as the key - for the layer, followed by the attribute name. Keras ensures - name uniqueness among the layers in all Functional/Sequential models. - - See the following examples that show the variable object names - for different Keras model types: - - ```python - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - layout_map['d1.kernel'] = layout_1 - layout_map['d1.bias'] = layout_2 - layout_map['d2.kernel'] = layout_3 - layout_map['d2.bias'] = layout_4 - - ## Subclassed model - class SubclassModel(tf.keras.Model): - - def __init__(self, name=None): - super().__init__(name=name) - self.d1 = tf.keras.layers.Dense(1000) - self.d2 = tf.keras.layers.Dense(1000) - - def call(self, inputs): - x = self.d1(inputs) - return self.d2(x) - - with layout_map_scope(layout_map): - model = SubclassModel() - # Triggering the creation of weights within or outside of the scope works - inputs = tf.zeros((10, 10)) - results = model(inputs) - - model.d1.kernel.layout == layout_1 - model.d1.bias.layout == layout_2 - model.d2.kernel.layout == layout_3 - model.d2.bias.layout == layout_4 - - ## Functional model - with layout_map_scope(layout_map): - inputs = tf.keras.Input((10,), batch_size=10) - x = tf.keras.layers.Dense(20, name='d1')(inputs) - output = tf.keras.layers.Dense(30, name='d2')(x) - - model = tf.keras.Model(inputs, output) - - d1 = model.layers[1] - d2 = model.layers[2] - - d1.kernel.layout == layout_1 - d1.bias.layout == layout_2 - d1.kernel.layout == layout_3 - d1.bias.layout == layout_4 - - ## Sequential model - with layout_map_scope(layout_map): - model = tf.keras.Sequential([ - tf.keras.layers.Dense(20, name='d1', input_shape=(10,)), - tf.keras.layers.Dense(30, name='d2') - ]) - - d1 = model.layers[0] - d2 = model.layers[1] - - d1.kernel.layout == layout_1 - d1.bias.layout == layout_2 - d1.kernel.layout == layout_3 - d1.bias.layout == layout_4 - ``` - - Args: - layout_map: a LayoutMap which contains the variable_object_path (string) -> - Layout. When a layout is not found for the variable, a default all - replicated layout will be created for the variable. - - Yields: - A context that will lazily initialize all `tf.Variable` objects - within the model, with their attributed layouts. - """ - previous_layout_map = get_current_layout_map() - global _LAYOUT_MAP - _LAYOUT_MAP.layout_map = layout_map - - with lazy_variable.lazy_init_scope(): - try: - yield - finally: - _LAYOUT_MAP.layout_map = previous_layout_map + """Apply the layout to all the tf.Variables created under the scope. + + Create a scope that all the tf.Variable created under this scope + will be lazily inited, and initialized later on with proper layout when the + object path in the model is stable/finalized. + + Note that the layout mapping will use the object/attribute names as the key + to map the variable against the layout. + + For subclassed models, the full object/attribute name is used as the key. + For Functional/Sequential models, since the layers within the model do not + get assigned to a meaningful attribute, we use `layer.name` as the key for + the layer, followed by the attribute name. Keras ensures name uniqueness + among the layers in all Functional/Sequential models. + + See the following examples that show the variable object names + for different Keras model types: + + ```python + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + layout_map['d1.kernel'] = layout_1 + layout_map['d1.bias'] = layout_2 + layout_map['d2.kernel'] = layout_3 + layout_map['d2.bias'] = layout_4 + + ## Subclassed model + class SubclassModel(tf.keras.Model): + + def __init__(self, name=None): + super().__init__(name=name) + self.d1 = tf.keras.layers.Dense(1000) + self.d2 = tf.keras.layers.Dense(1000) + + def call(self, inputs): + x = self.d1(inputs) + return self.d2(x) + + with layout_map_scope(layout_map): + model = SubclassModel() + # Triggering the creation of weights within or outside of the scope works + inputs = tf.zeros((10, 10)) + results = model(inputs) + + model.d1.kernel.layout == layout_1 + model.d1.bias.layout == layout_2 + model.d2.kernel.layout == layout_3 + model.d2.bias.layout == layout_4 + + ## Functional model + with layout_map_scope(layout_map): + inputs = tf.keras.Input((10,), batch_size=10) + x = tf.keras.layers.Dense(20, name='d1')(inputs) + output = tf.keras.layers.Dense(30, name='d2')(x) + + model = tf.keras.Model(inputs, output) + + d1 = model.layers[1] + d2 = model.layers[2] + + d1.kernel.layout == layout_1 + d1.bias.layout == layout_2 + d1.kernel.layout == layout_3 + d1.bias.layout == layout_4 + + ## Sequential model + with layout_map_scope(layout_map): + model = tf.keras.Sequential([ + tf.keras.layers.Dense(20, name='d1', input_shape=(10,)), + tf.keras.layers.Dense(30, name='d2') + ]) + + d1 = model.layers[0] + d2 = model.layers[1] + + d1.kernel.layout == layout_1 + d1.bias.layout == layout_2 + d1.kernel.layout == layout_3 + d1.bias.layout == layout_4 + ``` + Args: + layout_map: a LayoutMap which contains the variable_object_path (string) + -> Layout. When a layout is not found for the variable, a default all + replicated layout will be created for the variable. -def _map_subclass_model_variable(model, layout_map): - """Map/Replace LazyInitVariable for subclass model.""" - lazy_init_variable_to_tf_variable_map = {} - - # Note that the model._flatten is a method from tf.Module, and it returns - # duplicated items (since some of the items have different paths). - for path, variable in model._flatten(predicate=_is_lazy_init_variable, # pylint: disable=protected-access - with_path=True): - # Note that path is a tuple that contains string and ints, eg: - # ('d1', '_trainable_weights', 0) maps to model.d1._trainable_weights[0] - if [a for a in _KERAS_ATTRIBUTES_TO_SKIP if a in path]: - continue - # Convert all the ints to string and join with . - object_path = '.'.join([str(item) for item in path]) - - new_variable = _create_dvariable(layout_map, object_path, variable) - _set_object_by_path(model, path, new_variable) - lazy_init_variable_to_tf_variable_map[id(variable)] = new_variable - - for layer in model._flatten( # pylint: disable=protected-access - predicate=lambda o: isinstance(o, base_layer.Layer)): - _config_dvariable_regularization( - layer, lazy_init_variable_to_tf_variable_map) - # After we replaced all the variables, we want to make sure all the cached - # attributes are having the new variable, rather than old LazyInitVariable. - for path, variable in model._flatten(predicate=_is_lazy_init_variable, # pylint: disable=protected-access - with_path=True): - tf_variable = lazy_init_variable_to_tf_variable_map[id(variable)] - _set_object_by_path(model, path, tf_variable) - - _init_state_variable_for_rng(model, layout_map) - return model + Yields: + A context that will lazily initialize all `tf.Variable` objects + within the model, with their attributed layouts. + """ + previous_layout_map = get_current_layout_map() + global _LAYOUT_MAP + _LAYOUT_MAP.layout_map = layout_map + with lazy_variable.lazy_init_scope(): + try: + yield + finally: + _LAYOUT_MAP.layout_map = previous_layout_map -def _map_functional_model_variable(model, layout_map): - """Map/Replace LazyInitVariable for functional/sequential model.""" - lazy_init_variable_to_tf_variable_map = {} - - for layer in model.layers: - # Note that layer name is unique among the functional/sequential model - # when the layer name is not provided, Keras will auto generate a layer - # name based on the class name. - layer_name = layer.name - for path, variable in layer._flatten(predicate=_is_lazy_init_variable, # pylint: disable=protected-access - with_path=True): - # Note that path is a tuple that contains string and ints, eg: - # ('d1', '_trainable_weights', 0) maps to model.d1._trainable_weights[0] - if [a for a in _KERAS_ATTRIBUTES_TO_SKIP if a in path]: - continue - # Convert all the ints to string and join with . - object_path = '.'.join([str(item) for item in path]) - # Also attach the layer name - object_path = layer_name + '.' + object_path - - new_variable = _create_dvariable(layout_map, object_path, variable) - _set_object_by_path(layer, path, new_variable) - lazy_init_variable_to_tf_variable_map[id(variable)] = new_variable - - _config_dvariable_regularization( - layer, lazy_init_variable_to_tf_variable_map) +def _map_subclass_model_variable(model, layout_map): + """Map/Replace LazyInitVariable for subclass model.""" + lazy_init_variable_to_tf_variable_map = {} + + # Note that the model._flatten is a method from tf.Module, and it returns + # duplicated items (since some of the items have different paths). + for path, variable in model._flatten( + predicate=_is_lazy_init_variable, + with_path=True, + ): + # Note that path is a tuple that contains string and ints, eg: + # ('d1', '_trainable_weights', 0) maps to model.d1._trainable_weights[0] + if [a for a in _KERAS_ATTRIBUTES_TO_SKIP if a in path]: + continue + # Convert all the ints to string and join with . + object_path = ".".join([str(item) for item in path]) + + new_variable = _create_dvariable(layout_map, object_path, variable) + _set_object_by_path(model, path, new_variable) + lazy_init_variable_to_tf_variable_map[id(variable)] = new_variable + + for layer in model._flatten( + predicate=lambda o: isinstance(o, base_layer.Layer) + ): + _config_dvariable_regularization( + layer, lazy_init_variable_to_tf_variable_map + ) # After we replaced all the variables, we want to make sure all the cached # attributes are having the new variable, rather than old LazyInitVariable. - for path, variable in layer._flatten(predicate=_is_lazy_init_variable, # pylint: disable=protected-access - with_path=True): - tf_variable = lazy_init_variable_to_tf_variable_map[id(variable)] - _set_object_by_path(layer, path, tf_variable) + for path, variable in model._flatten( + predicate=_is_lazy_init_variable, + with_path=True, + ): + tf_variable = lazy_init_variable_to_tf_variable_map[id(variable)] + _set_object_by_path(model, path, tf_variable) + + _init_state_variable_for_rng(model, layout_map) + _update_trackable_reference(model, lazy_init_variable_to_tf_variable_map) + return model - _init_state_variable_for_rng(model, layout_map) - return model + +def _map_functional_model_variable(model, layout_map): + """Map/Replace LazyInitVariable for functional/sequential model.""" + lazy_init_variable_to_tf_variable_map = {} + + for layer in model.layers: + # Note that layer name is unique among the functional/sequential model + # when the layer name is not provided, Keras will auto generate a layer + # name based on the class name. + layer_name = layer.name + for path, variable in layer._flatten( + predicate=_is_lazy_init_variable, + with_path=True, + ): + # Note that path is a tuple that contains string and ints, eg: + # ('d1', '_trainable_weights', 0) maps to + # model.d1._trainable_weights[0] + if [a for a in _KERAS_ATTRIBUTES_TO_SKIP if a in path]: + continue + # Convert all the ints to string and join with . + object_path = ".".join([str(item) for item in path]) + # Also attach the layer name + object_path = layer_name + "." + object_path + + new_variable = _create_dvariable(layout_map, object_path, variable) + _set_object_by_path(layer, path, new_variable) + lazy_init_variable_to_tf_variable_map[id(variable)] = new_variable + + _config_dvariable_regularization( + layer, lazy_init_variable_to_tf_variable_map + ) + + # After we replaced all the variables, we want to make sure all the + # cached attributes are having the new variable, rather than old + # LazyInitVariable. + for path, variable in layer._flatten( + predicate=_is_lazy_init_variable, + with_path=True, + ): + tf_variable = lazy_init_variable_to_tf_variable_map[id(variable)] + _set_object_by_path(layer, path, tf_variable) + + _init_state_variable_for_rng(model, layout_map) + _update_trackable_reference(model, lazy_init_variable_to_tf_variable_map) + return model def _init_state_variable_for_rng(model, layout_map): - """Init the state variable in tf.ranodm.Generator. - - Since the BaseRandomLayer in keras explicitly untrack the tf.random.Generator, - the variable in it will stay as LazyInitVariable, which cause runtime error if - we don't replace them with proper DVariable. Since user usually are not - aware the existance of those variable, we will just give them replicated - layout since they are tiny. - - Args: - model: the model whose layers will be checked to find the BaseRandomLayers. - layout_map: used to get the default mesh information to create DVariable. - """ - # pylint: disable=protected-access - for l in model._flatten( - predicate=lambda o: isinstance(o, base_layer.BaseRandomLayer)): - keras_generator = l._random_generator - if keras_generator._built and keras_generator._generator is None: - raise ValueError( - 'Keras is expected to use tf.random.Generator when using DTensor API.' - 'Please call ' - '`tf.keras.backend.experimental.enable_tf_random_generator` at the ' - 'beginning of your program.') - if hasattr(keras_generator, '_generator') and _is_lazy_init_variable( - keras_generator._generator._state_var): - # Replace it with DVariable - keras_generator._generator._state_var = _create_dvariable( - layout_map, '', keras_generator._generator._state_var) - else: - # When the keras_generator is not built yet. Call the init function with - # DTensor device to init all the variable with default replicated layout. - with dtensor.run_on(layout_map.get_default_mesh()): - keras_generator._maybe_init() + """Init the state variable in tf.ranodm.Generator. + + Since the BaseRandomLayer in keras explicitly untrack the + tf.random.Generator, the variable in it will stay as LazyInitVariable, which + cause runtime error if we don't replace them with proper DVariable. Since + user usually are not aware the existence of those variable, we will just + give them replicated layout since they are tiny. + + Args: + model: the model whose layers will be checked to find the + BaseRandomLayers. + layout_map: used to get the default mesh information to create DVariable. + """ + + for l in model._flatten( + predicate=lambda o: isinstance(o, base_layer.BaseRandomLayer) + ): + keras_generator = l._random_generator + if keras_generator._built and keras_generator._generator is None: + raise ValueError( + "Keras is expected to use tf.random.Generator when using " + "DTensor API. Please call " + "`tf.keras.backend.experimental.enable_tf_random_generator` at " + "the beginning of your program." + ) + if hasattr(keras_generator, "_generator") and _is_lazy_init_variable( + keras_generator._generator._state_var + ): + # Replace it with DVariable + keras_generator._generator._state_var = _create_dvariable( + layout_map, "", keras_generator._generator._state_var + ) + else: + # When the keras_generator is not built yet. Call the init function + # with DTensor device to init all the variable with default + # replicated layout. + with dtensor.default_mesh(layout_map.get_default_mesh()): + keras_generator._maybe_init() def _config_dvariable_regularization( - layer, lazy_init_variable_to_tf_variable_map): - """Update the weights regularizer for newly created `DVariable`. - - The weight regularization usually happens when `layer.add_weight()` is called, - at which point the library will first create a `LazyInitVariable`, and then - replace it with a `DVariable`. We will defer the creation of those losses, - until the DVariable is created. - - See `layer._captured_weight_regularizer` for more details. - - Args: - layer: the layer instance for DVariable regularization config. - lazy_init_variable_to_tf_variable_map: the dict between LazyInitVariable ID - and newly created DVariable. - """ - # pylint: disable=protected-access - for (name, variable, regualarizer) in layer._captured_weight_regularizer: - if not _is_lazy_init_variable(variable): - raise ValueError('Expect the regularization loss are created from ' - f'LazyInitVariable, got {variable}') - d_variable = lazy_init_variable_to_tf_variable_map[id(variable)] - layer._handle_weight_regularization(name, d_variable, regualarizer) - # After that, we should cleanup `layer._captured_weight_regularizer` - layer._captured_weight_regularizer = [] + layer, lazy_init_variable_to_tf_variable_map +): + """Update the weights regularizer for newly created `DVariable`. + + The weight regularization usually happens when `layer.add_weight()` is + called, at which point the library will first create a `LazyInitVariable`, + and then replace it with a `DVariable`. We will defer the creation of those + losses, until the DVariable is created. + + See `layer._captured_weight_regularizer` for more details. + + Args: + layer: the layer instance for DVariable regularization config. + lazy_init_variable_to_tf_variable_map: the dict between LazyInitVariable + ID and newly created DVariable. + """ + + for name, variable, regualarizer in layer._captured_weight_regularizer: + if not _is_lazy_init_variable(variable): + raise ValueError( + "Expect the regularization loss are created from " + f"LazyInitVariable, got {variable}" + ) + d_variable = lazy_init_variable_to_tf_variable_map[id(variable)] + layer._handle_weight_regularization(name, d_variable, regualarizer) + # After that, we should cleanup `layer._captured_weight_regularizer` + layer._captured_weight_regularizer = [] def _create_dvariable(layout_map, object_path, variable): - """Create a new variable instead of using the LazyInitVariable. - - We choose to do this since even the LazyInitVariable might behavior like - a normal tf.Variable/DVariable, it is not future proof for any new changes - to variable class. It will also fail the instance type check in python, - which could affect user's code when they do any filtering based on type to - find any variables. - - Args: - layout_map: a LayoutMap which contains the variable_object_path (string) -> - Layout. - object_path: string, the object attribute path for the variable. - variable: LazyInitVariable which will be replaced by the newly created - tf.Variable. - Returns: - A new tf.Variable with correct layout information. - """ - # TODO(b/228209108): Revisit this in future and see if we can just reuse the - # LazyInitVariable rather than creating a new tf.Variable instance. - layout = layout_map[object_path] - if layout is None: - variable_rank = variable.shape.rank - layout = dtensor.Layout.replicated( - mesh=layout_map.get_default_mesh(), - rank=variable_rank) - init_val = variable._initial_value # pylint: disable=protected-access - if callable(init_val): - with lazy_variable.disable_init_variable_creator(): - init_val = utils.call_with_layout(init_val, layout) - else: - # The init value is probably already created as a tensor, we will just copy - # it to mesh and give it a proper layout. - init_val = dtensor.copy_to_mesh(init_val, layout) - # Use the original variable name for new DVariable creation. TF was adding - # ":0" suffix to it. - variable_name = variable.name - if variable_name.endswith(':0'): - variable_name = variable_name[:-2] - new_variable = dtensor.DVariable(init_val, - trainable=variable.trainable, - name=variable_name) - return new_variable + """Create a new variable instead of using the LazyInitVariable. + We choose to do this since even the LazyInitVariable might behavior like + a normal tf.Variable/DVariable, it is not future proof for any new changes + to variable class. It will also fail the instance type check in python, + which could affect user's code when they do any filtering based on type to + find any variables. -def _set_object_by_path(object_to_set, path, value): - """Set the attribute of instance to the object. - - Args: - object_to_set: the instance whose attribute should be set. - path: the tuple/list of string and ints, representing the attribute names. - Int means that the attribute to set is a item a list. - value: the value of the attribute. - """ - - for i, attr_name in enumerate(path): - if i == len(path) - 1: - # We found the actual attribute to set - if isinstance(attr_name, int): - # This means we are trying to set an element in the array, make sure the - # instance is array like object. - object_to_set[attr_name] = value - else: - setattr(object_to_set, attr_name, value) + Args: + layout_map: a LayoutMap which contains the variable_object_path (string) + -> Layout. + object_path: string, the object attribute path for the variable. + variable: LazyInitVariable which will be replaced by the newly created + tf.Variable. + Returns: + A new tf.Variable with correct layout information. + """ + # TODO(b/228209108): Revisit this in future and see if we can just reuse the + # LazyInitVariable rather than creating a new tf.Variable instance. + layout = layout_map[object_path] + if layout is None: + variable_rank = variable.shape.rank + layout = dtensor.Layout.replicated( + mesh=layout_map.get_default_mesh(), rank=variable_rank + ) + init_val = variable._initial_value + if callable(init_val): + with lazy_variable.disable_init_variable_creator(): + init_val = utils.call_with_layout(init_val, layout) else: - if isinstance(attr_name, int): - object_to_set = object_to_set[attr_name] - else: - object_to_set = getattr(object_to_set, attr_name) + # The init value is probably already created as a tensor, we will just + # copy it to mesh and give it a proper layout. + init_val = dtensor.copy_to_mesh(init_val, layout) + # Use the original variable name for new DVariable creation. TF was adding + # ":0" suffix to it. + variable_name = variable.name + if variable_name.endswith(":0"): + variable_name = variable_name[:-2] + new_variable = dtensor.DVariable( + init_val, trainable=variable.trainable, name=variable_name + ) + return new_variable + + +def _set_object_by_path(object_to_set, path, value): + """Set the attribute of instance to the object. + + Args: + object_to_set: the instance whose attribute should be set. + path: the tuple/list of string and ints, representing the attribute names. + Int means that the attribute to set is a item a list. + value: the value of the attribute. + """ + + for i, attr_name in enumerate(path): + if i == len(path) - 1: + # We found the actual attribute to set + if isinstance(attr_name, int): + # This means we are trying to set an element in the array, make + # sure the instance is array like object. + object_to_set[attr_name] = value + else: + setattr(object_to_set, attr_name, value) + else: + if isinstance(attr_name, int): + object_to_set = object_to_set[attr_name] + else: + object_to_set = getattr(object_to_set, attr_name) + + +# TODO(b/228209108): Revisit this after we can reinit LazyInitVariable. +def _update_trackable_reference(model, lazy_init_variable_to_tf_variable_map): + """Update the trackable object references for the model. + + Note that this method is only needed because of a corner case for model + checkpoint, where it could accidently catch a LazyInitVariable in checkpoint + dependency and not visible to the model attribute graph itself. + + Args: + model: the keras model instance whose checkpoint dependency will be + examed. + lazy_init_variable_to_tf_variable_map: the dict between LazyInitVariable + ID and newly created DVariable. + """ + # See b/234621758 for more details. + object_graph = tf.__internal__.tracking.ObjectGraphView(model) + trackables, _ = object_graph.breadth_first_traversal() + for trackable in trackables: + for ref_name, ref in trackable._trackable_children().items(): + if _is_lazy_init_variable(ref): + # Replacing the LazyVariable with DVariable. + trackable._track_trackable( + lazy_init_variable_to_tf_variable_map[id(ref)], + ref_name, + overwrite=True, + ) def _is_lazy_init_variable(obj): - return isinstance(obj, lazy_variable.LazyInitVariable) + return isinstance(obj, lazy_variable.LazyInitVariable) diff --git a/keras/dtensor/layout_map_test.py b/keras/dtensor/layout_map_test.py index 5d0860c5cb9e..7df61a78d475 100644 --- a/keras/dtensor/layout_map_test.py +++ b/keras/dtensor/layout_map_test.py @@ -14,329 +14,399 @@ # ============================================================================== """Tests for layout_map.""" +import os +import shutil + +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import layers +from keras import models from keras.dtensor import dtensor_api as dtensor from keras.dtensor import layout_map as layout_map_lib +from keras.dtensor import test_util from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf - -# TODO(scottzhu): Fix the layout map test with keras/dtensor/test_util -from keras.dtensor.tests import test_util class LayoutMapTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + self.layout_2d = dtensor.Layout.replicated(self.mesh, rank=2) + self.layout_1d = dtensor.Layout.replicated(self.mesh, rank=1) - def setUp(self): - super().setUp() - backend.enable_tf_random_generator() - tf_utils.set_random_seed(1337) - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, - local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) - self.layout_2d = dtensor.Layout.replicated(self.mesh, rank=2) - self.layout_1d = dtensor.Layout.replicated(self.mesh, rank=1) - - self.sharded_2d = dtensor.Layout.batch_sharded(self.mesh, 'X', rank=2) - self.sharded_1d = dtensor.Layout.batch_sharded(self.mesh, 'X', rank=1) + self.sharded_2d = dtensor.Layout.batch_sharded(self.mesh, "X", rank=2) + self.sharded_1d = dtensor.Layout.batch_sharded(self.mesh, "X", rank=1) - def test_add(self): - layout_map = layout_map_lib.LayoutMap() + def test_add(self): + layout_map = layout_map_lib.LayoutMap() - layout_map['dense/kernel'] = self.layout_2d - layout_map['dense/bias'] = self.layout_1d + layout_map["dense/kernel"] = self.layout_2d + layout_map["dense/bias"] = self.layout_1d - # Make there are two items in the map, and we access them via the - # underlying container at layout_map._layout_map - self.assertLen(layout_map._layout_map, 2) - self.assertEqual(layout_map._layout_map['dense/kernel'], self.layout_2d) - self.assertEqual(layout_map._layout_map['dense/bias'], self.layout_1d) + # Make there are two items in the map, and we access them via the + # underlying container at layout_map._layout_map + self.assertLen(layout_map._layout_map, 2) + self.assertEqual(layout_map._layout_map["dense/kernel"], self.layout_2d) + self.assertEqual(layout_map._layout_map["dense/bias"], self.layout_1d) - with self.assertRaisesRegex(ValueError, 'dense/kernel already exist'): - layout_map['dense/kernel'] = self.layout_1d + with self.assertRaisesRegex(ValueError, "dense/kernel already exist"): + layout_map["dense/kernel"] = self.layout_1d - with self.assertRaisesRegex(ValueError, 'should be a dtensor.Layout'): - layout_map['conv.kernel'] = [1, 2, 3] + with self.assertRaisesRegex(ValueError, "should be a dtensor.Layout"): + layout_map["conv.kernel"] = [1, 2, 3] - def test_get(self): - layout_map = layout_map_lib.LayoutMap() + def test_get(self): + layout_map = layout_map_lib.LayoutMap() - layout_map['dense/kernel'] = self.sharded_2d - layout_map['dense/bias'] = self.sharded_1d + layout_map["dense/kernel"] = self.sharded_2d + layout_map["dense/bias"] = self.sharded_1d - layout_map['dense.*kernel'] = self.layout_2d - layout_map['dense.*bias'] = self.layout_1d + layout_map["dense.*kernel"] = self.layout_2d + layout_map["dense.*bias"] = self.layout_1d - layout_map['.*bias'] = self.sharded_1d + layout_map[".*bias"] = self.sharded_1d - self.assertEqual(layout_map['dense/kernel'], self.sharded_2d) - self.assertEqual(layout_map['dense/bias'], self.sharded_1d) + self.assertEqual(layout_map["dense/kernel"], self.sharded_2d) + self.assertEqual(layout_map["dense/bias"], self.sharded_1d) - # Map against the wildcard bias rule for dense, and based on the order of - # insertion, it will not use .*bias. - self.assertEqual(layout_map['dense_2/kernel'], self.layout_2d) - self.assertEqual(layout_map['dense_2/bias'], self.layout_1d) + # Map against the wildcard bias rule for dense, and based on the order + # of insertion, it will not use .*bias. + self.assertEqual(layout_map["dense_2/kernel"], self.layout_2d) + self.assertEqual(layout_map["dense_2/bias"], self.layout_1d) - self.assertIsNone(layout_map['conv2d/kernel']) - self.assertEqual(layout_map['conv2d/bias'], self.sharded_1d) + self.assertIsNone(layout_map["conv2d/kernel"]) + self.assertEqual(layout_map["conv2d/bias"], self.sharded_1d) - def test_delete(self): - layout_map = layout_map_lib.LayoutMap() + def test_delete(self): + layout_map = layout_map_lib.LayoutMap() - layout_map['dense/kernel'] = self.layout_2d - layout_map['dense/bias'] = self.layout_1d + layout_map["dense/kernel"] = self.layout_2d + layout_map["dense/bias"] = self.layout_1d - self.assertEqual(layout_map.pop('dense/kernel'), self.layout_2d) - # Make sure to match against the exact string, not the regex - with self.assertRaises(KeyError): - layout_map.pop('.*bias') + self.assertEqual(layout_map.pop("dense/kernel"), self.layout_2d) + # Make sure to match against the exact string, not the regex + with self.assertRaises(KeyError): + layout_map.pop(".*bias") - # Make sure del also works - del layout_map['dense/bias'] + # Make sure del also works + del layout_map["dense/bias"] - self.assertEmpty(layout_map._layout_map) + self.assertEmpty(layout_map._layout_map) - def test_len(self): - layout_map = layout_map_lib.LayoutMap() - self.assertEmpty(layout_map) + def test_len(self): + layout_map = layout_map_lib.LayoutMap() + self.assertEmpty(layout_map) - layout_map['dense/kernel'] = self.layout_2d - layout_map['dense/bias'] = self.layout_1d + layout_map["dense/kernel"] = self.layout_2d + layout_map["dense/bias"] = self.layout_1d - self.assertLen(layout_map, 2) + self.assertLen(layout_map, 2) - def test_iter(self): - layout_map = layout_map_lib.LayoutMap() + def test_iter(self): + layout_map = layout_map_lib.LayoutMap() - layout_map['dense/kernel'] = self.layout_2d - layout_map['dense/bias'] = self.layout_1d + layout_map["dense/kernel"] = self.layout_2d + layout_map["dense/bias"] = self.layout_1d - # Make sure the items are ordered based on the insertion order. - self.assertEqual(list(layout_map.keys()), ['dense/kernel', 'dense/bias']) + # Make sure the items are ordered based on the insertion order. + self.assertEqual( + list(layout_map.keys()), ["dense/kernel", "dense/bias"] + ) - keys = [] - values = [] - for k, v in layout_map.items(): - keys.append(k) - values.append(v) + keys = [] + values = [] + for k, v in layout_map.items(): + keys.append(k) + values.append(v) - self.assertEqual(keys, ['dense/kernel', 'dense/bias']) - self.assertEqual(values, [self.layout_2d, self.layout_1d]) + self.assertEqual(keys, ["dense/kernel", "dense/bias"]) + self.assertEqual(values, [self.layout_2d, self.layout_1d]) # Class used for testing. -class SubclassModel(tf.keras.Model): +class SubclassModel(models.Model): + def __init__(self, name=None): + super().__init__(name=name) + self.d1 = layers.Dense(1000) + self.d2 = layers.Dense(1000) + self.dropout = layers.Dropout(0.1) - def __init__(self, name=None): - super().__init__(name=name) - self.d1 = layers.Dense(1000) - self.d2 = layers.Dense(1000) - self.dropout = layers.Dropout(0.1) + def call(self, inputs, training=None): + x = self.d1(inputs) + x = self.dropout(x, training=training) + return self.d2(x) - def call(self, inputs, training=None): - x = self.d1(inputs) - x = self.dropout(x, training=training) - return self.d2(x) +class SubclassLayer(layers.Layer): + def __init__(self, unit): + super().__init__() + self.unit = unit -class ObjectPathMappingTest(test_util.DTensorBaseTest): + def build(self, input_shape): + weight_shape = (input_shape[-1], self.unit) + # Note that the variable name is "kernel", but assigned to "_weight" + # This will cause the checkpoint to record 2 dependencies. + self._weight = self.add_weight(shape=weight_shape, name="kernel") - def setUp(self): - super().setUp() - backend.enable_tf_random_generator() - tf_utils.set_random_seed(1337) - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, - local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) - self.layout_2d = dtensor.Layout.replicated(self.mesh, rank=2) - self.layout_1d = dtensor.Layout.replicated(self.mesh, rank=1) - - self.sharded_2d = dtensor.Layout.batch_sharded(self.mesh, 'X', rank=2) - self.sharded_1d = dtensor.Layout.batch_sharded(self.mesh, 'X', rank=1) - - def test_init_subclass_model_variable_with_layout(self): - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - layout_map['d1.kernel'] = self.layout_2d - layout_map['d1.bias'] = self.layout_1d - layout_map['d2.kernel'] = self.layout_2d - layout_map['d2.bias'] = self.layout_1d - - with layout_map_lib.layout_map_scope(layout_map): - model = SubclassModel(name='model') - - # Init the model with eager tensor, make sure the model weights have correct - # layout, as well as produce correct result. - inputs = tf.zeros((10, 10)) - inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) - result = model(inputs) - self.assertAllClose(result, tf.zeros((10, 1000))) - d1 = model.d1 - d2 = model.d2 - self.assertEqual(d1.kernel.layout, self.layout_2d) - self.assertEqual(d1.bias.layout, self.layout_1d) - self.assertEqual(d2.kernel.layout, self.layout_2d) - self.assertEqual(d2.bias.layout, self.layout_1d) - - # Also make sure we repopulate the cached attributes like - # layer._trainable_weights - self.assertIs(d1.kernel, d1._trainable_weights[0]) - self.assertIs(d1.bias, d1._trainable_weights[1]) - self.assertIs(d2.kernel, d2._trainable_weights[0]) - self.assertIs(d2.bias, d2._trainable_weights[1]) - - result = model(inputs, training=True) - self.assertAllClose(result, tf.zeros((10, 1000), layout=self.layout_2d)) - - def test_init_functional_model_variable_with_layout(self): - # Note that the functional model is using layers name + attribute name - # the layer name are unique among the functional model, and when the layer - # doesn't have a name, keras will give it a unique name based on the layer - # class. - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - layout_map['d1.kernel'] = self.layout_2d - layout_map['d1.bias'] = self.layout_1d - layout_map['d2.kernel'] = self.layout_2d - layout_map['d2.bias'] = self.layout_1d - - with layout_map_lib.layout_map_scope(layout_map): - inputs = tf.keras.Input((10,), batch_size=10) - x = layers.Dense(20, name='d1')(inputs) - x = layers.Dropout(0.1)(x) - output = layers.Dense(30, name='d2')(x) - - model = tf.keras.Model(inputs, output) - - # It includes input layer as well. - self.assertLen(model.layers, 4) - d1 = model.layers[1] - d2 = model.layers[3] - - self.assertEqual(d1.kernel.layout, self.layout_2d) - self.assertEqual(d1.bias.layout, self.layout_1d) - self.assertEqual(d2.kernel.layout, self.layout_2d) - self.assertEqual(d2.bias.layout, self.layout_1d) - - # Also make sure we repopulate the cached attributes like - # layer._trainable_weights - self.assertIs(d1.kernel, d1._trainable_weights[0]) - self.assertIs(d1.bias, d1._trainable_weights[1]) - self.assertIs(d2.kernel, d2._trainable_weights[0]) - self.assertIs(d2.bias, d2._trainable_weights[1]) - - inputs = tf.zeros((10, 10)) - inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) - result = model(inputs, training=True) - expected_result = tf.zeros((10, 30)) - expected_result = dtensor.copy_to_mesh( - expected_result, layout=self.layout_2d) - self.assertAllClose(result, expected_result) - - def test_init_sequential_model_variable_with_layout(self): - # Note that the sequential model is using layers name + attribute name - # the layer name are unique among the functional model, and when the layer - # doesn't have a name, keras will give it a unique name based on the layer - # class. - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - layout_map['d1.kernel'] = self.layout_2d - layout_map['d1.bias'] = self.layout_1d - layout_map['d2.kernel'] = self.layout_2d - layout_map['d2.bias'] = self.layout_1d - - with layout_map_lib.layout_map_scope(layout_map): - model = tf.keras.Sequential([ - layers.Dense(20, name='d1', input_shape=(10,)), - layers.Dropout(0.1), - layers.Dense(30, name='d2') - ]) - - self.assertLen(model.layers, 3) - d1 = model.layers[0] - d2 = model.layers[2] - - self.assertEqual(d1.kernel.layout, self.layout_2d) - self.assertEqual(d1.bias.layout, self.layout_1d) - self.assertEqual(d2.kernel.layout, self.layout_2d) - self.assertEqual(d2.bias.layout, self.layout_1d) - - # Also make sure we repopulate the cached attributes like - # layer._trainable_weights - self.assertIs(d1.kernel, d1._trainable_weights[0]) - self.assertIs(d1.bias, d1._trainable_weights[1]) - self.assertIs(d2.kernel, d2._trainable_weights[0]) - self.assertIs(d2.bias, d2._trainable_weights[1]) - - inputs = tf.zeros((10, 10)) - inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) - result = model(inputs, training=True) - expected_result = tf.zeros((10, 30)) - expected_result = dtensor.copy_to_mesh( - expected_result, layout=self.layout_2d) - self.assertAllClose(result, expected_result) - - def test_init_model_with_empty_layout_map(self): - # Create empty layout map, which means all the weights just default to - # all replicated. - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - with layout_map_lib.layout_map_scope(layout_map): - model = tf.keras.Sequential([ - layers.Dense(20, name='d1', input_shape=(10,)), - layers.Dropout(0.1), - layers.Dense(30, name='d2') - ]) - - self.assertLen(model.layers, 3) - d1 = model.layers[0] - d2 = model.layers[2] - - self.assertEqual(d1.kernel.layout, self.layout_2d) - self.assertEqual(d1.bias.layout, self.layout_1d) - self.assertEqual(d2.kernel.layout, self.layout_2d) - self.assertEqual(d2.bias.layout, self.layout_1d) - - def test_weight_regularization(self): - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - with layout_map_lib.layout_map_scope(layout_map): - model = tf.keras.Sequential([ - layers.Dense(20, name='d1', input_shape=(10,), - kernel_initializer='ones', - kernel_regularizer='l2'), - layers.Dropout(0.1), - layers.Dense(30, name='d2', kernel_initializer='ones', - kernel_regularizer='l2') - ]) - - self.assertLen(model.losses, 2) - # kernel shape [10, 20] with all "1", timed by 0.01 from l2 - self.assertAllClose(model.losses[0], 2.0) - # kernel shape [20, 30] with all "1", timed by 0.01 from l2 - self.assertAllClose(model.losses[1], 6.0) - - def test_dvariable_name(self): - layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) - with layout_map_lib.layout_map_scope(layout_map): - model = tf.keras.Sequential([ - layers.Dense(20, name='d1', input_shape=(10,)), - layers.Dropout(0.1), - layers.Dense(30, name='d2') - ]) - - self.assertLen(model.layers, 3) - self.assertEqual(model.layers[0].kernel.name, 'd1/kernel:0') - self.assertEqual(model.layers[0].bias.name, 'd1/bias:0') - - -if __name__ == '__main__': - tf.test.main() + def call(self, inputs): + return tf.matmul(inputs, self._weight) + + +class ObjectPathMappingTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + self.layout_2d = dtensor.Layout.replicated(self.mesh, rank=2) + self.layout_1d = dtensor.Layout.replicated(self.mesh, rank=1) + + self.sharded_2d = dtensor.Layout.batch_sharded(self.mesh, "X", rank=2) + self.sharded_1d = dtensor.Layout.batch_sharded(self.mesh, "X", rank=1) + + def test_init_subclass_model_variable_with_layout(self): + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + layout_map["d1.kernel"] = self.layout_2d + layout_map["d1.bias"] = self.layout_1d + layout_map["d2.kernel"] = self.layout_2d + layout_map["d2.bias"] = self.layout_1d + + with layout_map.scope(): + model = SubclassModel(name="model") + + # Init the model with eager tensor, make sure the model weights have + # correct layout, as well as produce correct result. + inputs = tf.zeros((10, 10)) + inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) + result = model(inputs) + self.assertAllClose(result, tf.zeros((10, 1000))) + d1 = model.d1 + d2 = model.d2 + self.assertEqual(d1.kernel.layout, self.layout_2d) + self.assertEqual(d1.bias.layout, self.layout_1d) + self.assertEqual(d2.kernel.layout, self.layout_2d) + self.assertEqual(d2.bias.layout, self.layout_1d) + + # Also make sure we repopulate the cached attributes like + # layer._trainable_weights + self.assertIs(d1.kernel, d1._trainable_weights[0]) + self.assertIs(d1.bias, d1._trainable_weights[1]) + self.assertIs(d2.kernel, d2._trainable_weights[0]) + self.assertIs(d2.bias, d2._trainable_weights[1]) + + result = model(inputs, training=True) + self.assertAllClose( + result, + tf.experimental.dtensor.copy_to_mesh( + tf.zeros((10, 1000)), self.layout_2d + ), + ) + + def test_init_functional_model_variable_with_layout(self): + # Note that the functional model is using layers name + attribute name + # the layer name are unique among the functional model, and when the + # layer doesn't have a name, keras will give it a unique name based on + # the layer class. + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + layout_map["d1.kernel"] = self.layout_2d + layout_map["d1.bias"] = self.layout_1d + layout_map["d2.kernel"] = self.layout_2d + layout_map["d2.bias"] = self.layout_1d + + with layout_map.scope(): + inputs = layers.Input((10,), batch_size=10) + x = layers.Dense(20, name="d1")(inputs) + x = layers.Dropout(0.1)(x) + output = layers.Dense(30, name="d2")(x) + + model = models.Model(inputs, output) + + # It includes input layer as well. + self.assertLen(model.layers, 4) + d1 = model.layers[1] + d2 = model.layers[3] + + self.assertEqual(d1.kernel.layout, self.layout_2d) + self.assertEqual(d1.bias.layout, self.layout_1d) + self.assertEqual(d2.kernel.layout, self.layout_2d) + self.assertEqual(d2.bias.layout, self.layout_1d) + + # Also make sure we repopulate the cached attributes like + # layer._trainable_weights + self.assertIs(d1.kernel, d1._trainable_weights[0]) + self.assertIs(d1.bias, d1._trainable_weights[1]) + self.assertIs(d2.kernel, d2._trainable_weights[0]) + self.assertIs(d2.bias, d2._trainable_weights[1]) + + inputs = tf.zeros((10, 10)) + inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) + result = model(inputs, training=True) + expected_result = tf.zeros((10, 30)) + expected_result = dtensor.copy_to_mesh( + expected_result, layout=self.layout_2d + ) + self.assertAllClose(result, expected_result) + + def test_init_sequential_model_variable_with_layout(self): + # Note that the sequential model is using layers name + attribute name + # the layer name are unique among the functional model, and when the + # layer doesn't have a name, keras will give it a unique name based on + # the layer class. + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + layout_map["d1.kernel"] = self.layout_2d + layout_map["d1.bias"] = self.layout_1d + layout_map["d2.kernel"] = self.layout_2d + layout_map["d2.bias"] = self.layout_1d + + with layout_map.scope(): + model = models.Sequential( + [ + layers.Dense(20, name="d1", input_shape=(10,)), + layers.Dropout(0.1), + layers.Dense(30, name="d2"), + ] + ) + + self.assertLen(model.layers, 3) + d1 = model.layers[0] + d2 = model.layers[2] + + self.assertEqual(d1.kernel.layout, self.layout_2d) + self.assertEqual(d1.bias.layout, self.layout_1d) + self.assertEqual(d2.kernel.layout, self.layout_2d) + self.assertEqual(d2.bias.layout, self.layout_1d) + + # Also make sure we repopulate the cached attributes like + # layer._trainable_weights + self.assertIs(d1.kernel, d1._trainable_weights[0]) + self.assertIs(d1.bias, d1._trainable_weights[1]) + self.assertIs(d2.kernel, d2._trainable_weights[0]) + self.assertIs(d2.bias, d2._trainable_weights[1]) + + inputs = tf.zeros((10, 10)) + inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) + result = model(inputs, training=True) + expected_result = tf.zeros((10, 30)) + expected_result = dtensor.copy_to_mesh( + expected_result, layout=self.layout_2d + ) + self.assertAllClose(result, expected_result) + + def test_init_model_with_empty_layout_map(self): + # Create empty layout map, which means all the weights just default to + # all replicated. + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + with layout_map.scope(): + model = models.Sequential( + [ + layers.Dense(20, name="d1", input_shape=(10,)), + layers.Dropout(0.1), + layers.Dense(30, name="d2"), + ] + ) + + self.assertLen(model.layers, 3) + d1 = model.layers[0] + d2 = model.layers[2] + + self.assertEqual(d1.kernel.layout, self.layout_2d) + self.assertEqual(d1.bias.layout, self.layout_1d) + self.assertEqual(d2.kernel.layout, self.layout_2d) + self.assertEqual(d2.bias.layout, self.layout_1d) + + def test_weight_regularization(self): + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + with layout_map.scope(): + model = models.Sequential( + [ + layers.Dense( + 20, + name="d1", + input_shape=(10,), + kernel_initializer="ones", + kernel_regularizer="l2", + ), + layers.Dropout(0.1), + layers.Dense( + 30, + name="d2", + kernel_initializer="ones", + kernel_regularizer="l2", + ), + ] + ) + + self.assertLen(model.losses, 2) + # kernel shape [10, 20] with all "1", timed by 0.01 from l2 + self.assertAllClose(model.losses[0], 2.0) + # kernel shape [20, 30] with all "1", timed by 0.01 from l2 + self.assertAllClose(model.losses[1], 6.0) + + def test_dvariable_name(self): + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + with layout_map.scope(): + model = models.Sequential( + [ + layers.Dense(20, name="d1", input_shape=(10,)), + layers.Dropout(0.1), + layers.Dense(30, name="d2"), + ] + ) + + self.assertLen(model.layers, 3) + self.assertEqual(model.layers[0].kernel.name, "d1/kernel:0") + self.assertEqual(model.layers[0].bias.name, "d1/bias:0") + + @tf.compat.v1.test.mock.patch.dict( + "os.environ", {"DTENSOR_ENABLE_CHECKPOINT_V2": "True"} + ) + def test_checkpoint(self): + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + with layout_map.scope(): + model = models.Sequential( + [ + layers.Dense(20, name="d1", input_shape=(10,)), + SubclassLayer(10), + ] + ) + cpt = tf.train.Checkpoint(root=model) + options = tf.train.CheckpointOptions( + experimental_io_device=dtensor.device_name() + ) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) + + saved_path = cpt.save( + os.path.join(tmpdir, "checkpoint"), + options=options, + ) + + cpt.restore(saved_path, options=options) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/lazy_variable.py b/keras/dtensor/lazy_variable.py index b69e96dd2bff..1bf9887137e4 100644 --- a/keras/dtensor/lazy_variable.py +++ b/keras/dtensor/lazy_variable.py @@ -16,204 +16,244 @@ import threading - +# isort: off from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope -from tensorflow.python.training.tracking import base as trackable +from tensorflow.python.trackable import base as trackable from tensorflow.python.util import compat from tensorflow.python.util import tf_contextlib - _DISABLE_LAZY_VARIABLE_INIT = threading.local() def _infer_shape_dtype_and_create_handle(initial_value, shape, dtype, name): - """Infer shape and dtype from initial_value and create a variable handle.""" - with ops.name_scope(name, "Variable", skip_on_eager=False) as name: - handle_name = ops.name_from_scope_name(name) - unique_id = "%s_%d" % (handle_name, ops.uid()) - - # Use attr_scope and device(None) to simulate the behavior of - # colocate_with when the variable we want to colocate with doesn't - # yet exist. - device_context_manager = ops.NullContextmanager - attr = attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue( - s=[compat.as_bytes("loc:@%s" % handle_name)])) - with ops.get_default_graph()._attr_scope({"_class": attr}): # pylint: disable=protected-access - with ops.name_scope("Initializer"), device_context_manager(None): - if not callable(initial_value): - if isinstance(initial_value, trackable.CheckpointInitialValue): - raise NotImplementedError( - "CheckpointInitialValue is not supported to be the initial " - "value of a lazy variable.") - initial_value = ops.convert_to_tensor( - initial_value, name="initial_value", dtype=dtype) - assert not callable(initial_value) - - assert initial_value.shape.is_compatible_with(shape) - dtype = dtype or initial_value.dtype.base_dtype - shape = shape or initial_value.shape - - assert dtype - assert shape - handle = resource_variable_ops._variable_handle_from_shape_and_dtype( # pylint: disable=protected-access - shape=shape, - dtype=dtype, - shared_name=None, # Never shared - name=name, - graph_mode=False, - initial_value=None) - # initial_value=initial_value if not callable(initial_value) else None) - return initial_value, shape, dtype, handle, handle_name, unique_id + """Infer shape and dtype from initial_value and create a variable handle.""" + with ops.name_scope(name, "Variable", skip_on_eager=False) as name: + handle_name = ops.name_from_scope_name(name) + unique_id = "%s_%d" % (handle_name, ops.uid()) + + # Use attr_scope and device(None) to simulate the behavior of + # colocate_with when the variable we want to colocate with doesn't + # yet exist. + device_context_manager = ops.NullContextmanager + attr = attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue( + s=[compat.as_bytes(f"loc:@{handle_name}")] + ) + ) + with ops.get_default_graph()._attr_scope({"_class": attr}): + with ops.name_scope("Initializer"), device_context_manager(None): + if not callable(initial_value): + if isinstance( + initial_value, trackable.CheckpointInitialValue + ): + raise NotImplementedError( + "CheckpointInitialValue is not supported to be the " + "initial value of a lazy variable." + ) + initial_value = ops.convert_to_tensor( + initial_value, name="initial_value", dtype=dtype + ) + assert not callable(initial_value) + + assert initial_value.shape.is_compatible_with(shape) + dtype = dtype or initial_value.dtype.base_dtype + shape = shape or initial_value.shape + + assert dtype + assert shape + handle = ( + resource_variable_ops._variable_handle_from_shape_and_dtype( + shape=shape, + dtype=dtype, + shared_name=None, # Never shared + name=name, + graph_mode=False, + initial_value=None, + ) + ) + # initial_value=initial_value if not callable(initial_value) else + # None) + return initial_value, shape, dtype, handle, handle_name, unique_id class LazyInitVariable(resource_variable_ops.BaseResourceVariable): - """Lazily initialized variables. - - The major use case for this class is to serve as a memory efficient - alternative for tf.Variable. The resource handle of this class is point to - nothing, which mean it will raise error when its value is fetched in a eager - context. Having said that, it will perform like a normal tf.Variable when - using with graph tensor, like KerasTensor produced from tf.keras.Input. - """ - - def __init__( - self, - initial_value=None, - trainable=None, - collections=None, - validate_shape=True, # pylint: disable=unused-argument - caching_device=None, - name=None, - dtype=None, - variable_def=None, - import_scope=None, - constraint=None, - distribute_strategy=None, - synchronization=None, - aggregation=None, - shape=None, - **kwargs): - assert context.executing_eagerly() # To simplify the logic - assert variable_def is None # Not supported yet. - assert caching_device is None # Not supported yet - - if initial_value is None: - raise ValueError("The `initial_value` arg to `tf.Variable` must " - "be specified except when you are not providing a " - "`variable_def`. You provided neither.") - - if isinstance(initial_value, ops.Tensor) and hasattr( - initial_value, "graph") and initial_value.graph.building_function: - raise ValueError(f"Argument `initial_value` ({initial_value}) could not " - "be lifted out of a `tf.function`. " - f"(Tried to create variable with name='{name}'). " - "To avoid this error, when constructing `tf.Variable`s " - "inside of `tf.function` you can create the " - "`initial_value` tensor in a " - "`tf.init_scope` or pass a callable `initial_value` " - "(e.g., `tf.Variable(lambda : " - "tf.truncated_normal([10, 40]))`). " - "Please file a feature request if this " - "restriction inconveniences you.") - - if constraint is not None and not callable(constraint): - raise ValueError(f"Argument `constraint` must be None or a callable. " - f"a callable. Got a {type(constraint)}: {constraint}") - - self._name = name - (initial_value, shape, dtype, handle, handle_name, - unique_id) = _infer_shape_dtype_and_create_handle(initial_value, shape, - dtype, name) - - super().__init__( - distribute_strategy=distribute_strategy, - initial_value=initial_value, - shape=shape, - dtype=dtype, - name=name, - unique_id=unique_id, - handle_name=handle_name, - constraint=constraint, - handle=handle, - graph_element=None, - trainable=trainable, - synchronization=synchronization, - aggregation=aggregation, - in_graph_mode=False) - - # TODO(scottzhu): This method and create_and_initialize might be removed if - # we decide to just use the tf.Variable to replace this class. - def initialize(self): - with ops.name_scope(self._name, "Variable", skip_on_eager=False) as name: - with ops.colocate_with(self._handle), ops.name_scope("Initializer"): + """Lazily initialized variables. + + The major use case for this class is to serve as a memory efficient + alternative for tf.Variable. The resource handle of this class is point to + nothing, which mean it will raise error when its value is fetched in a eager + context. Having said that, it will perform like a normal tf.Variable when + using with graph tensor, like KerasTensor produced from tf.keras.Input. + """ + + def __init__( + self, + initial_value=None, + trainable=None, + collections=None, + validate_shape=True, + caching_device=None, + name=None, + dtype=None, + variable_def=None, + import_scope=None, + constraint=None, + distribute_strategy=None, + synchronization=None, + aggregation=None, + shape=None, + **kwargs, + ): + assert context.executing_eagerly() # To simplify the logic + assert variable_def is None # Not supported yet. + assert caching_device is None # Not supported yet + + if initial_value is None: + raise ValueError( + "The `initial_value` arg to `tf.Variable` must " + "be specified except when you are not providing a " + "`variable_def`. You provided neither." + ) + + if ( + isinstance(initial_value, tensor.Tensor) + and hasattr(initial_value, "graph") + and initial_value.graph.building_function + ): + raise ValueError( + f"Argument `initial_value` ({initial_value}) could not " + "be lifted out of a `tf.function`. " + f"(Tried to create variable with name='{name}'). " + "To avoid this error, when constructing `tf.Variable`s " + "inside of `tf.function` you can create the " + "`initial_value` tensor in a " + "`tf.init_scope` or pass a callable `initial_value` " + "(e.g., `tf.Variable(lambda : " + "tf.truncated_normal([10, 40]))`). " + "Please file a feature request if this " + "restriction inconveniences you." + ) + + if constraint is not None and not callable(constraint): + raise ValueError( + "Argument `constraint` must be None or a callable. " + f"a callable. Got a {type(constraint)}: {constraint}" + ) + + self._name = name + ( + initial_value, + shape, + dtype, + handle, + handle_name, + unique_id, + ) = _infer_shape_dtype_and_create_handle( + initial_value, shape, dtype, name + ) + + super().__init__( + distribute_strategy=distribute_strategy, + initial_value=initial_value, + shape=shape, + dtype=dtype, + name=name, + unique_id=unique_id, + handle_name=handle_name, + constraint=constraint, + handle=handle, + graph_element=None, + trainable=trainable, + synchronization=synchronization, + aggregation=aggregation, + in_graph_mode=False, + ) + + # TODO(scottzhu): This method and create_and_initialize might be removed if + # we decide to just use the tf.Variable to replace this class. + def initialize(self): + with ops.name_scope(self._name, "Variable", skip_on_eager=False): + with ops.colocate_with(self._handle), ops.name_scope("Initializer"): + if callable(self._initial_value): + initial_value = self._initial_value() + else: + initial_value = self._initial_value + + if not initial_value.shape.is_compatible_with(self._shape): + raise ValueError( + "In this `tf.Variable` creation, the initial value's " + f"shape ({initial_value.shape}) is not compatible with " + "the explicitly supplied `shape` " + f"argument ({self._shape})." + ) + assert self._dtype is initial_value.dtype.base_dtype + gen_resource_variable_ops.assign_variable_op( + self._handle, initial_value + ) + + def create_and_initialize(self): if callable(self._initial_value): - initial_value = self._initial_value() - else: - initial_value = self._initial_value - - if not initial_value.shape.is_compatible_with(self._shape): - raise ValueError( - f"In this `tf.Variable` creation, the initial value's shape " - f"({initial_value.shape}) is not compatible with " - f"the explicitly supplied `shape` argument ({self._shape}).") - assert self._dtype is initial_value.dtype.base_dtype - gen_resource_variable_ops.assign_variable_op(self._handle, initial_value) - - def create_and_initialize(self): - if callable(self._initial_value): - initial_value = self._initial_value() - - with ops.device(initial_value.device): - (initial_value, shape, dtype, handle, handle_name, - unique_id) = _infer_shape_dtype_and_create_handle( - initial_value, self._shape, self._dtype, self._name) - self.initialize() - - super().__init__( - trainable=self._trainable, - shape=shape, - dtype=dtype, - handle=handle, - synchronization=self._synchronization, - constraint=self._constraint, - aggregation=self._aggregation, - distribute_strategy=self._distribute_strategy, - name=self._name, - unique_id=unique_id, - handle_name=handle_name, - graph_element=None, - initial_value=initial_value, - initializer_op=None, - is_initialized_op=None, - cached_value=None, - caching_device=None) + initial_value = self._initial_value() + + with ops.device(initial_value.device): + ( + initial_value, + shape, + dtype, + handle, + handle_name, + unique_id, + ) = _infer_shape_dtype_and_create_handle( + initial_value, self._shape, self._dtype, self._name + ) + self.initialize() + + super().__init__( + trainable=self._trainable, + shape=shape, + dtype=dtype, + handle=handle, + synchronization=self._synchronization, + constraint=self._constraint, + aggregation=self._aggregation, + distribute_strategy=self._distribute_strategy, + name=self._name, + unique_id=unique_id, + handle_name=handle_name, + graph_element=None, + initial_value=initial_value, + initializer_op=None, + is_initialized_op=None, + cached_value=None, + caching_device=None, + ) def _lazy_init_variable_creator(next_creator, **kwargs): - if getattr(_DISABLE_LAZY_VARIABLE_INIT, "disabled", False): - return next_creator(**kwargs) - else: - return LazyInitVariable(**kwargs) + if getattr(_DISABLE_LAZY_VARIABLE_INIT, "disabled", False): + return next_creator(**kwargs) + else: + return LazyInitVariable(**kwargs) @tf_contextlib.contextmanager def lazy_init_scope(): - with variable_scope.variable_creator_scope(_lazy_init_variable_creator): - yield + with variable_scope.variable_creator_scope(_lazy_init_variable_creator): + yield @tf_contextlib.contextmanager def disable_init_variable_creator(): - try: - global _DISABLE_LAZY_VARIABLE_INIT - existing_value = getattr(_DISABLE_LAZY_VARIABLE_INIT, "disabled", False) - _DISABLE_LAZY_VARIABLE_INIT.disabled = True - yield - finally: - _DISABLE_LAZY_VARIABLE_INIT.disabled = existing_value + try: + global _DISABLE_LAZY_VARIABLE_INIT + existing_value = getattr(_DISABLE_LAZY_VARIABLE_INIT, "disabled", False) + _DISABLE_LAZY_VARIABLE_INIT.disabled = True + yield + finally: + _DISABLE_LAZY_VARIABLE_INIT.disabled = existing_value diff --git a/keras/dtensor/metrics_test.py b/keras/dtensor/metrics_test.py index 4be1afcd92db..ddad4077ef95 100644 --- a/keras/dtensor/metrics_test.py +++ b/keras/dtensor/metrics_test.py @@ -14,78 +14,81 @@ # ============================================================================== """Tests for metrics.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import metrics from keras.dtensor import dtensor_api as dtensor from keras.dtensor import test_util from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf class MetricsTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + tf_utils.set_random_seed(1337) - def setUp(self): - super().setUp() - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) - tf_utils.set_random_seed(1337) - - @parameterized.parameters( - (metrics.Accuracy, {}), - (metrics.AUC, {}), - (metrics.BinaryAccuracy, {}), - (metrics.BinaryCrossentropy, {}), - (metrics.BinaryIoU, {}), - (metrics.CategoricalAccuracy, {}), - (metrics.CategoricalCrossentropy, {}), - (metrics.CategoricalHinge, {}), - (metrics.CosineSimilarity, {}), - (metrics.FalseNegatives, {}), - (metrics.FalsePositives, {}), - (metrics.Hinge, {}), - (metrics.IoU, {'num_classes': 3, 'target_class_ids': [1]}), - (metrics.KLDivergence, {}), - (metrics.LogCoshError, {}), - (metrics.Mean, {}), - (metrics.MeanAbsoluteError, {}), - (metrics.MeanAbsolutePercentageError, {}), - (metrics.MeanIoU, {'num_classes': 3}), - (metrics.MeanRelativeError, {'normalizer': [1, 3, 2, 3]}), - (metrics.MeanSquaredError, {}), - (metrics.MeanSquaredLogarithmicError, {}), - (metrics.OneHotIoU, {'num_classes': 3, 'target_class_ids': [1]}), - (metrics.OneHotMeanIoU, {'num_classes': 3}), - (metrics.Poisson, {}), - (metrics.Precision, {}), - (metrics.PrecisionAtRecall, {'recall': 0.5}), - (metrics.Recall, {}), - (metrics.RecallAtPrecision, {'precision': 0.5}), - (metrics.RootMeanSquaredError, {}), - (metrics.SensitivityAtSpecificity, {'specificity': 0.5}), - (metrics.SparseCategoricalAccuracy, {}), - (metrics.SparseCategoricalCrossentropy, {}), - (metrics.SparseTopKCategoricalAccuracy, {}), - (metrics.SpecificityAtSensitivity, {'sensitivity': 0.5}), - (metrics.SquaredHinge, {}), - (metrics.Sum, {}), - (metrics.TopKCategoricalAccuracy, {}), - (metrics.TrueNegatives, {}), - (metrics.TruePositives, {}), - ) - def test_metric_layout(self, metric_cls, init_args): - metric = metric_cls(**init_args, mesh=self.mesh) + @parameterized.parameters( + (metrics.Accuracy, {}), + (metrics.AUC, {}), + (metrics.BinaryAccuracy, {}), + (metrics.BinaryCrossentropy, {}), + (metrics.BinaryIoU, {}), + (metrics.CategoricalAccuracy, {}), + (metrics.CategoricalCrossentropy, {}), + (metrics.CategoricalHinge, {}), + (metrics.CosineSimilarity, {}), + (metrics.FalseNegatives, {}), + (metrics.FalsePositives, {}), + (metrics.Hinge, {}), + (metrics.IoU, {"num_classes": 3, "target_class_ids": [1]}), + (metrics.KLDivergence, {}), + (metrics.LogCoshError, {}), + (metrics.Mean, {}), + (metrics.MeanAbsoluteError, {}), + (metrics.MeanAbsolutePercentageError, {}), + (metrics.MeanIoU, {"num_classes": 3}), + (metrics.MeanRelativeError, {"normalizer": [1, 3, 2, 3]}), + (metrics.MeanSquaredError, {}), + (metrics.MeanSquaredLogarithmicError, {}), + (metrics.OneHotIoU, {"num_classes": 3, "target_class_ids": [1]}), + (metrics.OneHotMeanIoU, {"num_classes": 3}), + (metrics.Poisson, {}), + (metrics.Precision, {}), + (metrics.PrecisionAtRecall, {"recall": 0.5}), + (metrics.Recall, {}), + (metrics.RecallAtPrecision, {"precision": 0.5}), + (metrics.RootMeanSquaredError, {}), + (metrics.SensitivityAtSpecificity, {"specificity": 0.5}), + (metrics.SparseCategoricalAccuracy, {}), + (metrics.SparseCategoricalCrossentropy, {}), + (metrics.SparseTopKCategoricalAccuracy, {}), + (metrics.SpecificityAtSensitivity, {"sensitivity": 0.5}), + (metrics.SquaredHinge, {}), + (metrics.Sum, {}), + (metrics.TopKCategoricalAccuracy, {}), + (metrics.TrueNegatives, {}), + (metrics.TruePositives, {}), + ) + def test_metric_layout(self, metric_cls, init_args): + metric = metric_cls(**init_args, mesh=self.mesh) - for weight in metric.non_trainable_weights: - self.assertIsInstance(weight, dtensor.DVariable) - self.assertTrue(weight.layout.is_fully_replicated()) + for weight in metric.non_trainable_weights: + self.assertIsInstance(weight, dtensor.DVariable) + self.assertTrue(weight.layout.is_fully_replicated()) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/mnist_model_test.py b/keras/dtensor/mnist_model_test.py index 800dcdfae76d..13cd15d5a4ae 100644 --- a/keras/dtensor/mnist_model_test.py +++ b/keras/dtensor/mnist_model_test.py @@ -14,70 +14,99 @@ # ============================================================================== """E2E Tests for mnist_model.""" +import numpy as np +import tensorflow.compat.v2 as tf +from tensorflow.compat.v2.experimental import dtensor + from keras import backend -from keras.dtensor import dtensor_api as dtensor from keras.dtensor import integration_test_utils -from keras.dtensor import optimizers as optimizer_lib +from keras.dtensor import layout_map as layout_map_lib from keras.dtensor import test_util +from keras.optimizers import adam from keras.utils import tf_utils -import tensorflow.compat.v2 as tf - - -from tensorflow.dtensor.python import mesh_util -from tensorflow.dtensor.python import tpu_util - - class MnistTest(test_util.DTensorBaseTest): - - def test_mnist_training_cpu(self): - devices = tf.config.list_physical_devices('CPU') - tf.config.set_logical_device_configuration( - devices[0], [tf.config.LogicalDeviceConfiguration(),] * 8) - - mesh = mesh_util.create_mesh( - devices=['CPU:%d' % i for i in range(8)], mesh_dims=[('batch', 8)]) - - backend.enable_tf_random_generator() - # Needed by keras initializers. - tf_utils.set_random_seed(1337) - - model = integration_test_utils.get_model_with_layout_map( - integration_test_utils.get_all_replicated_layout_map(mesh)) - - optimizer = optimizer_lib.Adam(learning_rate=0.001, mesh=mesh) - optimizer.build(model.trainable_variables) - - train_losses = integration_test_utils.train_mnist_model_batch_sharded( - model, optimizer, mesh, num_epochs=3, steps_per_epoch=100, - global_batch_size=64) - # Make sure the losses are decreasing - self.assertEqual(train_losses, sorted(train_losses, reverse=True)) - - def DISABLED_test_mnist_training_tpu(self): - # TODO(scottzhu): Enable TPU test once the dtensor_test rule is migrated out - # of learning/brain - tpu_util.dtensor_initialize_tpu_system() - total_tpu_device_count = dtensor.num_global_devices('TPU') - mesh_shape = [total_tpu_device_count] - mesh = tpu_util.create_tpu_mesh(['batch'], mesh_shape, 'tpu_mesh') - - # Needed by keras initializers. - tf_utils.set_random_seed(1337) - - model = integration_test_utils.get_model_with_layout_map( - integration_test_utils.get_all_replicated_layout_map(mesh)) - - optimizer = optimizer_lib.Adam(learning_rate=0.001, mesh=mesh) - optimizer.build(model.trainable_variables) - - train_losses = integration_test_utils.train_mnist_model_batch_sharded( - model, optimizer, mesh, num_epochs=3, steps_per_epoch=100, - global_batch_size=64) - # Make sure the losses are decreasing - self.assertEqual(train_losses, sorted(train_losses, reverse=True)) - - -if __name__ == '__main__': - tf.test.main() + def setUp(self): + super().setUp() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2,)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + device: tf.experimental.dtensor.Mesh( + ["batch"], + global_ids, + local_device_ids, + test_util.create_device_list((2,), device), + ) + for device in ("CPU", "GPU", "TPU") + } + self.mesh = self.configTestMesh(mesh_dict) + + def test_mnist_training(self): + layout_map = layout_map_lib.LayoutMap(self.mesh) + with layout_map.scope(): + model = integration_test_utils.get_model() + + optimizer = adam.Adam(learning_rate=0.001, mesh=self.mesh) + optimizer.build(model.trainable_variables) + + train_losses = integration_test_utils.train_mnist_model_batch_sharded( + model, + optimizer, + self.mesh, + num_epochs=3, + steps_per_epoch=20, + global_batch_size=64, + ) + # Make sure the losses are decreasing + self.assertEqual(train_losses, sorted(train_losses, reverse=True)) + + def test_model_fit(self): + layout_map = layout_map_lib.LayoutMap(self.mesh) + with layout_map.scope(): + model = integration_test_utils.get_model() + + optimizer = adam.Adam(learning_rate=0.001, mesh=self.mesh) + + global_batch_size = 64 + model.compile( + loss="CategoricalCrossentropy", optimizer=optimizer, metrics="acc" + ) + train_ds, eval_ds = integration_test_utils.get_mnist_datasets( + integration_test_utils.NUM_CLASS, global_batch_size + ) + + def distribute_ds(dataset): + dataset = dataset.unbatch() + + def _create_batch_layout(tensor_spec): + rank = len(tensor_spec.shape) + 1 + return dtensor.Layout.batch_sharded( + self.mesh, batch_dim="batch", rank=rank + ) + + layouts = tf.nest.map_structure( + _create_batch_layout, dataset.element_spec + ) + + return dtensor.DTensorDataset( + dataset=dataset, + mesh=self.mesh, + layouts=layouts, + global_batch_size=global_batch_size, + dataset_already_batched=False, + batch_dim="batch", + prefetch=None, + tf_data_service_config=None, + ) + + train_ds = distribute_ds(train_ds) + eval_ds = distribute_ds(eval_ds) + model.fit(train_ds, steps_per_epoch=10) + model.evaluate(eval_ds, steps=10) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/optimizers.py b/keras/dtensor/optimizers.py deleted file mode 100644 index d94d243dd4d6..000000000000 --- a/keras/dtensor/optimizers.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""DTensor specific Keras optimizers.""" - -from keras.dtensor import dtensor_api as dtensor -from keras.optimizers.optimizer_experimental import adadelta -from keras.optimizers.optimizer_experimental import adagrad -from keras.optimizers.optimizer_experimental import adam -from keras.optimizers.optimizer_experimental import optimizer as optimizer_lib -from keras.optimizers.optimizer_experimental import rmsprop -from keras.optimizers.optimizer_experimental import sgd -from keras.optimizers.schedules import learning_rate_schedule - -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export -from tensorflow.tools.docs import doc_controls - - -# pylint: disable=protected-access,missing-class-docstring -class Optimizer(optimizer_lib._BaseOptimizer): - """DTensor specific optimizers. - - The major changes for this class is that all the variable init logic will be - mesh/layout aware. - - """ - # Note that we didn't subclass optimizer_lib.Optimizer since it contains the - # extra logic of handling distribution strategy, which we don't need for - # DTensor - - def __init__(self, name, mesh=None): - """Create a new Optimizer. - - Args: - name: String. The name of the optimizer, which will appear in all the - state variables created by this optimizer. - mesh: dtensor.Mesh. The optional Mesh which will be used to create - the states. Note that usually the state variable will use the layout - from the corresponding model variables. This mesh only used for global - variables like globle steps, learning rate, etc. - """ - # TODO(scottzhu): Skip the gradients_clip_option and ema_option for now, and - # will cover them in future if really needed. - # TODO(scottzhu): We might want to make mesh to be required in future. - self._mesh = mesh - super().__init__(name=name) - - def _create_iteration_variable(self): - init_val = tf.constant(0, dtype=tf.int64) - if self._mesh: - init_val = dtensor.copy_to_mesh( - init_val, dtensor.Layout.replicated(self._mesh, rank=0)) - with tf.init_scope(): - # Lift the variable creation to init scope to avoid environment issue. - self._iterations = dtensor.DVariable(init_val, name='iteration') - - ################## Override methods from keras.Optimizer ################ - def add_variable_from_reference(self, - model_variable, - variable_name, - initial_value=None): - """Create an optimizer variable from model variable. - - Create an optimizer variable based on the information of model variable. - For example, in SGD optimizer momemtum, for each model variable, a - corresponding momemtum variable is created of the same shape and dtype. - - Args: - model_variable: The corresponding model variable to the optimizer variable - to be created. - variable_name: The name prefix of the optimizer variable to be created. - The create variables name will follow the pattern - `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`. - initial_value: The initial value of the optimizer variable, if None, the - value will be default to 0. - - Returns: - An optimizer variable. - """ - if initial_value is None: - # Use tf.zeros_like which will propagate the layout information from the - # model weights if any. - initial_value = tf.zeros_like(model_variable) - elif isinstance(initial_value, tf.Tensor): - initial_value = dtensor.copy_to_mesh( - initial_value, - dtensor.Layout.replicated(self._mesh, rank=initial_value.shape.rank)) - return dtensor.DVariable( - initial_value=initial_value, - name=f'{variable_name}/{model_variable._shared_name}', - dtype=model_variable.dtype, - trainable=False) - - @doc_controls.do_not_generate_docs - def aggregate_gradients(self, grads_and_vars): - # Hide the aggregate_gradients from Optimizer.aggregate_gradients - raise NotImplementedError( - 'Dtensor doesn\'t need to manually aggregate gradients') - - def _var_key(self, variable): - """Get a unique identifier of the given variable.""" - return optimizer_lib._BaseOptimizer._var_key(self, variable) - - def apply_gradients(self, grads_and_vars): - """Apply gradients to variables. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - - Returns: - None - - Raises: - TypeError: If `grads_and_vars` is malformed. - """ - # Explicitly call the _BaseOptimizer to avoid any chance of using - # Optimizers.apply_gradients which contains distribution strategy logic. - optimizer_lib._BaseOptimizer.apply_gradients(self, grads_and_vars) - - def _internal_apply_gradients(self, grads_and_vars): - """Helper function of apply gradients. - - This is required for separating out distributed training logic. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - """ - # Explicitly call the _BaseOptimizer to avoid any chance of using - # Optimizers.apply_gradients which contains distribution strategy logic. - optimizer_lib._BaseOptimizer._internal_apply_gradients(self, grads_and_vars) - - def _overwrite_model_variables_with_average_value_helper(self, var_list): - """Helper function to _overwrite_model_variables_with_average_value.""" - (optimizer_lib._BaseOptimizer. - _overwrite_model_variables_with_average_value_helper(self, var_list)) - - def _build_learning_rate(self, learning_rate): - if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule): - # Create a variable to hold the current learning rate. - # Note that the init value `learning_rate(self.iterations)` should have - # the correct layout information from self.iterations. - self._current_learning_rate = dtensor.DVariable( - learning_rate(self.iterations), - name='learning_rate', - dtype=tf.float32) - return learning_rate - init_val = tf.constant(learning_rate, dtype=tf.float32) - if self._mesh: - init_val = dtensor.copy_to_mesh( - init_val, dtensor.Layout.replicated(self._mesh, rank=0)) - return dtensor.DVariable(init_val, name='learning_rate') - - -@keras_export('keras.dtensor.experimental.optimizers.Adadelta', v1=[]) -class Adadelta(Optimizer, adadelta.Adadelta): - - def __init__(self, - learning_rate=0.001, - rho=0.95, - epsilon=1e-7, - gradients_clip_option=None, - ema_option=None, - name='Adadelta', - mesh=None): - # Skip the adam.Adadelta.__init__ and only call the Optimizer.__init__ - # this is to skip the keras.Optimizer.__init__, which contains the logic - # of distribution strategy. Same for all the optimizers subclasses. - Optimizer.__init__(self, name=name, mesh=mesh) - self._learning_rate = self._build_learning_rate(learning_rate) - self.rho = rho - self.epsilon = epsilon - - -@keras_export('keras.dtensor.experimental.optimizers.Adagrad', v1=[]) -class Adagrad(Optimizer, adagrad.Adagrad): - - def __init__(self, - learning_rate=0.001, - initial_accumulator_value=0.1, - epsilon=1e-7, - gradients_clip_option=None, - ema_option=None, - name='Adagrad', - mesh=None): - Optimizer.__init__(self, name=name, mesh=mesh) - self._learning_rate = self._build_learning_rate(learning_rate) - self.initial_accumulator_value = initial_accumulator_value - self.epsilon = epsilon - - -@keras_export('keras.dtensor.experimental.optimizers.Adam', v1=[]) -class Adam(Optimizer, adam.Adam): - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - amsgrad=False, - gradients_clip_option=None, - ema_option=None, - name='Adam', - mesh=None): - Optimizer.__init__(self, name=name, mesh=mesh) - self._learning_rate = self._build_learning_rate(learning_rate) - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.amsgrad = amsgrad - - -@keras_export('keras.dtensor.experimental.optimizers.RMSprop', v1=[]) -class RMSprop(Optimizer, rmsprop.RMSprop): - - def __init__(self, - learning_rate=0.001, - rho=0.9, - momentum=0.0, - epsilon=1e-7, - centered=False, - gradients_clip_option=None, - ema_option=None, - jit_compile=False, - name='RMSprop', - mesh=None): - Optimizer.__init__(self, name=name, mesh=mesh) - self._learning_rate = self._build_learning_rate(learning_rate) - self.rho = rho - self.momentum = momentum - self.epsilon = epsilon - self.centered = centered - - -@keras_export('keras.dtensor.experimental.optimizers.SGD', v1=[]) -class SGD(Optimizer, sgd.SGD): - - def __init__(self, - learning_rate=0.01, - momentum=0.0, - nesterov=False, - amsgrad=False, - gradients_clip_option=None, - ema_option=None, - jit_compile=False, - name='SGD', - mesh=None): - Optimizer.__init__(self, name=name, mesh=mesh) - self._learning_rate = self._build_learning_rate(learning_rate) - self.momentum = momentum - self.nesterov = nesterov - if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): - raise ValueError('`momentum` must be between [0, 1].') - - -Adadelta.__doc__ = Optimizer.__doc__ + adadelta.Adadelta.__doc__ -Adagrad.__doc__ = Optimizer.__doc__ + adagrad.Adagrad.__doc__ -Adam.__doc__ = Optimizer.__doc__ + adam.Adam.__doc__ -RMSprop.__doc__ = Optimizer.__doc__ + rmsprop.RMSprop.__doc__ -SGD.__doc__ = Optimizer.__doc__ + sgd.SGD.__doc__ diff --git a/keras/dtensor/optimizers_test.py b/keras/dtensor/optimizers_test.py index bfaf076225d2..356d2d2965e7 100644 --- a/keras/dtensor/optimizers_test.py +++ b/keras/dtensor/optimizers_test.py @@ -14,92 +14,240 @@ # ============================================================================== """Tests for initializers.""" +import os + +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + +from keras import backend +from keras import layers +from keras import losses +from keras import models from keras.dtensor import dtensor_api as dtensor -from keras.dtensor import optimizers +from keras.dtensor import layout_map from keras.dtensor import test_util -import numpy as np -import tensorflow.compat.v2 as tf +from keras.optimizers import adadelta +from keras.optimizers import adagrad +from keras.optimizers import adam +from keras.optimizers import adamw +from keras.optimizers import rmsprop +from keras.optimizers import sgd class OptimizersTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + + def test_add_variable_from_reference(self): + optimizer = adam.Adam(mesh=self.mesh) + variable_init_value = tf.ones([4, 4], dtype=tf.float32) + variable_init_value = dtensor.copy_to_mesh( + variable_init_value, + layout=dtensor.Layout.replicated(self.mesh, rank=2), + ) + model_variable = dtensor.DVariable( + variable_init_value, trainable=True, name="tmp" + ) + state_variable = optimizer.add_variable_from_reference( + model_variable, "test" + ) + self.assertEqual(state_variable._shared_name, "test/tmp") + self.assertAllClose(self.evaluate(state_variable), tf.zeros([4, 4])) + # Make sure the variable contains the correct layout info + self.assertEqual(state_variable.layout, model_variable.layout) + + def test_build_index_dict(self): + optimizer = adam.Adam(mesh=self.mesh) + variable_init_value = tf.ones(shape=(), dtype=tf.float32) + variable_init_value = dtensor.copy_to_mesh( + variable_init_value, + layout=dtensor.Layout.replicated(self.mesh, rank=0), + ) + var_list = [ + dtensor.DVariable(variable_init_value, name=f"var{i}") + for i in range(10) + ] + optimizer._build_index_dict(var_list) + self.assertEqual( + optimizer._index_dict[optimizer._var_key(var_list[7])], 7 + ) + + def test_aggregate_gradients_noop(self): + optimizer = adam.Adam(mesh=self.mesh) + + variable_init_value = tf.ones(shape=(), dtype=tf.float32) + model_variable = dtensor.DVariable( + variable_init_value, + trainable=True, + layout=dtensor.Layout.replicated(self.mesh, rank=0), + ) + grads = tf.ones_like(variable_init_value) + + grad_and_var = zip([grads], [model_variable]) + + result = optimizer.aggregate_gradients(grad_and_var) + self.assertEqual(result, grad_and_var) + + @parameterized.named_parameters( + ( + "Adadelta", + adadelta.Adadelta, + {}, + [ + "Adadelta/accumulated_grad/Variable", + "Adadelta/accumulated_delta_var/Variable", + "iteration", + ], + ), + ( + "Adam", + adam.Adam, + {"amsgrad": True}, + [ + "Adam/m/Variable", + "Adam/v/Variable", + "Adam/vhat/Variable", + "iteration", + ], + ), + ( + "AdamW", + adamw.AdamW, + {"amsgrad": True}, + [ + "AdamW/m/Variable", + "AdamW/v/Variable", + "AdamW/vhat/Variable", + "iteration", + ], + ), + ( + "Adagrad", + adagrad.Adagrad, + {}, + ["Adagrad/accumulator/Variable", "iteration"], + ), + ( + "RMSprop", + rmsprop.RMSprop, + {"momentum": 0.1, "centered": True}, + [ + "RMSprop/velocity/Variable", + "RMSprop/momentum/Variable", + "RMSprop/average_gradient/Variable", + "iteration", + ], + ), + ( + "SGD", + sgd.SGD, + {"momentum": 0.1}, + ["SGD/m/Variable", "iteration"], + ), + ) + def test_apply_gradients( + self, optimizer_cls, init_args, expect_variable_names + ): + optimizer = optimizer_cls(mesh=self.mesh, **init_args) + + self.assertEqual(self.evaluate(optimizer.iterations), 0) + self.assertEqual( + optimizer.iterations.layout, + dtensor.Layout.replicated(self.mesh, rank=0), + ) + + variable_init_value = tf.ones([4, 4], dtype=tf.float32) + variable_init_value = dtensor.copy_to_mesh( + variable_init_value, + layout=dtensor.Layout.replicated(self.mesh, rank=2), + ) + model_variable = dtensor.DVariable(variable_init_value, trainable=True) + + grads = tf.ones_like(variable_init_value) + optimizer.apply_gradients(zip([grads], [model_variable])) + optimizer_variables = optimizer.variables + + self.assertEqual(self.evaluate(optimizer.iterations), 1) + + all_names = [var._shared_name for var in optimizer_variables] + self.assertCountEqual(all_names, expect_variable_names) + + def test_embedding_lookup_backward_path(self): + # See b/265441685 for more context. + backend.enable_tf_random_generator() + os.environ[ + "DTENSOR_ENABLE_REPLICATED_SPMD_AS_DEFAULT_TF.RESOURCESCATTERADD" + ] = "1" + # Build a small functional model with embedding layer, it contains + # tf.gather ops which will trigger the _deduplicate_sparse_grad() code + # path. tf.unique op will have a shape mismatch issue for dtensor. + batch_size = 16 + seq_length = 10 + vocab_size = 100 + output_size = 8 + + def produce_data(): + inputs = tf.random.uniform( + maxval=vocab_size, + shape=(batch_size, seq_length), + dtype=tf.int32, + ) + label = tf.random.uniform( + maxval=output_size, shape=(batch_size,), dtype=tf.int32 + ) + inputs = dtensor.copy_to_mesh( + inputs, layout=dtensor.Layout.replicated(self.mesh, rank=2) + ) + inputs = dtensor.relayout( + inputs, dtensor.Layout.batch_sharded(self.mesh, "X", 2) + ) + label = dtensor.copy_to_mesh( + label, layout=dtensor.Layout.replicated(self.mesh, rank=1) + ) + label = dtensor.relayout( + label, dtensor.Layout.batch_sharded(self.mesh, "X", 1) + ) + return inputs, label + + with layout_map.LayoutMap(self.mesh).scope(): + inputs = layers.Input(shape=(seq_length,)) + x = layers.Embedding(vocab_size, 64)(inputs) + x = layers.GlobalAveragePooling1D()(x) + preds = layers.Dense(output_size, activation="softmax")(x) + model = models.Model(inputs, preds) + + optimizer = adam.Adam(mesh=self.mesh) + + @tf.function + def train_func(model, inputs, label, optimizer): + with tf.GradientTape() as tape: + output = model(inputs) + loss = losses.sparse_categorical_crossentropy(label, output) + optimizer.minimize(loss, model.variables, tape) + return loss + + # The error only happens across the batch, where the value of + # tf.unique are different. + input1, label1 = produce_data() + train_func(model, input1, label1, optimizer) + input2, label2 = produce_data() + train_func(model, input2, label2, optimizer) + # Assert nothing here, and expect the train_func can run properly with + # different inputs. + - def setUp(self): - super().setUp() - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, - local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) - - def test_add_variable_from_reference(self): - optimizer = optimizers.Adam(mesh=self.mesh) - variable_init_value = tf.ones([4, 4], dtype=tf.float32) - variable_init_value = dtensor.copy_to_mesh( - variable_init_value, - layout=dtensor.Layout.replicated(self.mesh, rank=2)) - model_variable = dtensor.DVariable(variable_init_value, - trainable=True, - name='tmp') - state_variable = optimizer.add_variable_from_reference( - model_variable, 'test') - self.assertEqual(state_variable._shared_name, 'test/tmp') - self.assertAllClose(self.evaluate(state_variable), tf.zeros([4, 4])) - # Make sure the variable contains the correct layout info - self.assertEqual(state_variable.layout, model_variable.layout) - - def test_build_index_dict(self): - optimizer = optimizers.Adam(mesh=self.mesh) - variable_init_value = tf.ones(shape=(), dtype=tf.float32) - variable_init_value = dtensor.copy_to_mesh( - variable_init_value, - layout=dtensor.Layout.replicated(self.mesh, rank=0)) - var_list = [dtensor.DVariable(variable_init_value, name=f'var{i}') - for i in range(10)] - optimizer._build_index_dict(var_list) - self.assertEqual(optimizer._index_dict[optimizer._var_key(var_list[7])], 7) - - @parameterized.named_parameters( - ('Adadelta', optimizers.Adadelta, {}, - ['Adadelta/accumulated_grad/Variable', - 'Adadelta/accumulated_delta_var/Variable']), - ('Adam', optimizers.Adam, {'amsgrad': True}, - ['Adam/m/Variable', 'Adam/v/Variable', 'Adam/vhat/Variable']), - ('Adagrad', optimizers.Adagrad, {}, ['Adagrad/accumulator/Variable']), - ('RMSprop', optimizers.RMSprop, {'momentum': 0.1, 'centered': True}, - ['RMSprop/velocity/Variable', 'RMSprop/momentum/Variable', - 'RMSprop/average_gradient/Variable']), - ('SGD', optimizers.SGD, {'momentum': 0.1}, ['SGD/m/Variable']) - ) - def test_apply_gradients(self, optimizer_cls, init_args, - expect_variable_names): - optimizer = optimizer_cls(mesh=self.mesh, **init_args) - - self.assertEqual(self.evaluate(optimizer.iterations), 0) - self.assertEqual(optimizer.iterations.layout, - dtensor.Layout.replicated(self.mesh, rank=0)) - - variable_init_value = tf.ones([4, 4], dtype=tf.float32) - variable_init_value = dtensor.copy_to_mesh( - variable_init_value, - layout=dtensor.Layout.replicated(self.mesh, rank=2)) - model_variable = dtensor.DVariable(variable_init_value, - trainable=True) - - grads = tf.ones_like(variable_init_value) - optimizer.apply_gradients(zip([grads], [model_variable])) - optimizer_variables = optimizer.variables - - self.assertEqual(self.evaluate(optimizer.iterations), 1) - - all_names = [var._shared_name for var in optimizer_variables] - expect_variable_names.extend(['iteration', 'learning_rate']) - self.assertCountEqual(all_names, expect_variable_names) - - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/save_load_test.py b/keras/dtensor/save_load_test.py new file mode 100644 index 000000000000..e188c9ee4761 --- /dev/null +++ b/keras/dtensor/save_load_test.py @@ -0,0 +1,116 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for keras model save/load.""" + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras import backend +from keras import layers +from keras import models +from keras.dtensor import dtensor_api as dtensor +from keras.dtensor import layout_map as layout_map_lib +from keras.dtensor import test_util +from keras.utils import tf_utils + + +def _create_test_model(): + model = models.Sequential() + model.add( + layers.Conv2D( + 32, + name="conv2d_1", + kernel_size=(3, 3), + activation="relu", + input_shape=(28, 28, 1), # channel last gray scale input + ) + ) + model.add( + layers.Conv2D( + 64, + name="conv2d_2", + kernel_size=(3, 3), + activation="relu", + ) + ) + return model + + +class SaveLoadTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + + def test_save_h5_weights_for_dtensor_model(self): + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + with layout_map_lib.layout_map_scope(layout_map): + dtensor_model = _create_test_model() + + self.assertNotEmpty(dtensor_model.weights) + for w in dtensor_model.weights: + # Make sure the weights are DVariable + self.assertIsNotNone(w.layout) + + save_file = self.create_tempfile("dtensor_model.h5") + dtensor_model.save_weights(save_file) + + # Make sure the weights can be load back to a normal keras model. + normal_model = _create_test_model() + normal_model.load_weights(save_file) + + for ( + w1, + w2, + ) in zip(normal_model.weights, dtensor_model.weights): + self.assertAllClose(w1.numpy(), w2.numpy()) + self.assertIsNone(getattr(w1, "layout", None)) + + def test_load_h5_weights_for_dtensor_model(self): + normal_model = _create_test_model() + + save_file = self.create_tempfile("normal_model.h5") + normal_model.save_weights(save_file) + + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + with layout_map_lib.layout_map_scope(layout_map): + dtensor_model = _create_test_model() + + self.assertNotEmpty(dtensor_model.weights) + for w in dtensor_model.weights: + self.assertIsNotNone(w.layout) + + dtensor_model.load_weights(save_file) + + for ( + w1, + w2, + ) in zip(normal_model.weights, dtensor_model.weights): + self.assertAllClose(w1.numpy(), w2.numpy()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/strategy_integration_test.py b/keras/dtensor/strategy_integration_test.py new file mode 100644 index 000000000000..0f5d660b4cd2 --- /dev/null +++ b/keras/dtensor/strategy_integration_test.py @@ -0,0 +1,118 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for DTensor based strategy training.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras import backend +from keras import mixed_precision +from keras.dtensor import integration_test_utils +from keras.optimizers import adam +from keras.utils import tf_utils + +# isort: off +# Import the MirroredStrategy that is backed by DTensor +# It is not a public API yet, so we do a private symbol import for now. +from tensorflow.python.distribute.experimental import ( + mirrored_strategy as dtensor_mirrored_strategy, +) +from tensorflow.dtensor.python.tests import test_util + + +class TrainingTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2,)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + device: tf.experimental.dtensor.Mesh( + ["batch"], + global_ids, + local_device_ids, + test_util.create_device_list((2,), device), + ) + for device in ("CPU", "GPU", "TPU") + } + self.mesh = self.configTestMesh(mesh_dict) + + def tearDown(self): + super().tearDown() + # clean up the mixed precision setting if any. + mixed_precision.set_global_policy("float32") + + @parameterized.product( + run_eagerly=[True, False], + jit_compile=[True, False], + optimizer_creator=[lambda: adam.Adam(), lambda: "adam"], + enable_mixed_precision=[True, False], + ) + def test_model_fit( + self, + run_eagerly, + jit_compile, + optimizer_creator, + enable_mixed_precision, + ): + if run_eagerly and jit_compile: + self.skipTest("run_eagerly can't run with jit_compile") + if enable_mixed_precision and self.mesh.device_type() != "GPU": + self.skipTest("Only run mixed_precision on GPU for performance") + + if enable_mixed_precision: + mixed_precision.set_global_policy("mixed_float16") + dtensor_strategy = dtensor_mirrored_strategy.MirroredStrategy( + mesh=self.mesh + ) + # Make fake MNIST-like image data. + batch_size = 64 + dataset = tf.data.Dataset.from_tensor_slices( + ( + np.random.uniform(size=(batch_size, 28, 28, 1)).astype( + np.float32 + ), + np.random.randint(0, 10, size=(batch_size,)), + ) + ) + dataset = dataset.shuffle(64).repeat().batch(64, drop_remainder=True) + + with dtensor_strategy.scope(): + model = integration_test_utils.get_model() + optimizer = optimizer_creator() + + model.compile( + loss="SparseCategoricalCrossentropy", + optimizer=optimizer, + metrics="acc", + run_eagerly=run_eagerly, + jit_compile=jit_compile, + ) + model.fit(dataset, steps_per_epoch=10) + + prediction = model.predict( + np.random.uniform(size=(batch_size, 28, 28, 1)).astype(np.float32) + ) + self.assertEqual(prediction.shape, (batch_size, 10)) + if enable_mixed_precision: + self.assertEqual(prediction.dtype, tf.float16) + else: + self.assertEqual(prediction.dtype, tf.float32) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/dtensor/test_util.py b/keras/dtensor/test_util.py index 7d2019df670b..44e2b7f709ce 100644 --- a/keras/dtensor/test_util.py +++ b/keras/dtensor/test_util.py @@ -14,120 +14,137 @@ # ============================================================================== """Keras utilities for DTensor unit test.""" -from absl.testing import parameterized import numpy as np - import tensorflow.compat.v2 as tf +from absl.testing import parameterized - +# isort: off from tensorflow.dtensor.python import api as dtensor_api from tensorflow.python.eager import context - _DEFAULT_GPU_MEMORY_LIMIT = 200 # MB class DTensorBaseTest(tf.test.TestCase, parameterized.TestCase): - """Provides comparison helper for dtensor vs local results.""" - - @classmethod - def setUpClass(cls): - super(DTensorBaseTest, cls).setUpClass() - - def tearDown(self): - super().tearDown() - # Make sure all async ops finish. - context.async_wait() - - # TODO(hthu): Remove the reset once we fixed the CopyToMesh with - # DefaultMesh placement issue. - reset_dtensor() - - @staticmethod - def configTestMesh(device_type_mesh_map): # pylint: disable=invalid-name - """Configs corresponding mesh given test context. - - If runs on a CPU mesh, set virtual device on CPU. - If runs on a GPU mesh, sets virtual device on GPU with proper memory limits. - if runs on a TPU mesh, initializes TPU system. - - Args: - device_type_mesh_map: A dictionary containing device_type -> mesh mapping. - - Returns: - A properly configured mesh for use in test. - """ - reset_context() - - def get_mesh(device_type): - mesh = device_type_mesh_map.get(device_type, None) - if mesh is None: - raise ValueError('Requires a %s mesh to run test on %s.' % - (device_type, device_type)) - return mesh - - mesh = None - if tf.config.list_physical_devices('GPU'): - mesh = get_mesh('GPU') - reset_logical_devices('GPU', np.prod(mesh.shape())) - else: - mesh = get_mesh('CPU') - reset_logical_devices('CPU', np.prod(mesh.shape())) - - context.ensure_initialized() - return mesh + """Provides comparison helper for dtensor vs local results.""" + + @classmethod + def setUpClass(cls): + super(DTensorBaseTest, cls).setUpClass() + + def tearDown(self): + super().tearDown() + # Make sure all async ops finish. + context.async_wait() + + # TODO(hthu): Remove the reset once we fixed the CopyToMesh with + # DefaultMesh placement issue. + reset_dtensor() + + @staticmethod + def configTestMesh(device_type_mesh_map): + """Configs corresponding mesh given test context. + + If runs on a CPU mesh, set virtual device on CPU. + If runs on a GPU mesh, sets virtual device on GPU with proper memory + limits. + if runs on a TPU mesh, initializes TPU system. + + Args: + device_type_mesh_map: A dictionary containing device_type -> mesh + mapping. + + Returns: + A properly configured mesh for use in test. + """ + reset_context() + + def get_mesh(device_type): + mesh = device_type_mesh_map.get(device_type, None) + if mesh is None: + dt = device_type + raise ValueError(f"Requires a {dt} mesh to run test on {dt}.") + return mesh + + mesh = None + if tf.config.list_physical_devices("GPU"): + mesh = get_mesh("GPU") + reset_logical_devices("GPU", np.prod(mesh.shape())) + else: + mesh = get_mesh("CPU") + reset_logical_devices("CPU", np.prod(mesh.shape())) + + context.ensure_initialized() + return mesh def create_device_array(shape, device_type): - device_count = np.prod(shape) - return np.asarray([ - tf.DeviceSpec( # pylint: disable=g-complex-comprehension - job='localhost/replica:0/task:0', - device_type=device_type, - device_index=i) for i in range(device_count) - ]).reshape(shape) + device_count = np.prod(shape) + return np.asarray( + [ + tf.DeviceSpec( + job="localhost/replica:0/task:0", + device_type=device_type, + device_index=i, + ) + for i in range(device_count) + ] + ).reshape(shape) def create_device_list(shape, device_type): - devices = create_device_array(shape, device_type) - return np.ravel(devices).tolist() + devices = create_device_array(shape, device_type) + return np.ravel(devices).tolist() def create_device_ids_array(shape): - device_count = np.prod(shape) - return np.arange(device_count).reshape(shape) + device_count = np.prod(shape) + return np.arange(device_count).reshape(shape) def reset_context(): - context._reset_context() # pylint: disable=protected-access + context._reset_context() def reset_logical_devices(device_type, count): - """Resets logical devices for CPU/GPU. - - Logical devices can only be instantiated once on a particular context. For - now, context re-use is triggering some function duplication errors, so we - reset the context on each call. - - Args: - device_type: The device_type to reset. - count: numbers of virtual device to reset to. - """ - reset_context() - devices = tf.config.list_physical_devices(device_type) - if device_type.upper() == 'CPU': - tf.config.set_logical_device_configuration(devices[0], [ - tf.config.LogicalDeviceConfiguration(), - ] * count) - elif device_type.upper() == 'GPU': - tf.config.set_logical_device_configuration(devices[0], [ - tf.config.LogicalDeviceConfiguration( - memory_limit=_DEFAULT_GPU_MEMORY_LIMIT), - ] * count) - else: - raise ValueError('resetting logical device for non-supported device type : ' - '%s' % device_type) + """Resets logical devices for CPU/GPU. + + Logical devices can only be instantiated once on a particular context. For + now, context re-use is triggering some function duplication errors, so we + reset the context on each call. + + Args: + device_type: The device_type to reset. + count: numbers of virtual device to reset to. + """ + if device_type.upper() not in ["CPU", "GPU"]: + raise ValueError( + "resetting logical device for non-supported device type: " + f"{device_type}" + ) + reset_context() + + cpus = tf.config.list_physical_devices("CPU") + if device_type.upper() == "GPU": + gpus = tf.config.list_physical_devices(device_type) + tf.config.set_logical_device_configuration( + gpus[0], + [ + tf.config.LogicalDeviceConfiguration( + memory_limit=_DEFAULT_GPU_MEMORY_LIMIT + ), + ] + * count, + ) + # Always config CPU mesh as the host mesh for DTensor + tf.config.set_logical_device_configuration( + cpus[0], + [ + tf.config.LogicalDeviceConfiguration(), + ] + * count, + ) def reset_dtensor(): - dtensor_api._reset() # pylint: disable=protected-access + dtensor_api._reset() diff --git a/keras/dtensor/utils.py b/keras/dtensor/utils.py index 378560af8cec..234ffe13cbf6 100644 --- a/keras/dtensor/utils.py +++ b/keras/dtensor/utils.py @@ -16,9 +16,9 @@ import inspect -from keras.dtensor import dtensor_api as dtensor import tensorflow.compat.v2 as tf +from keras.dtensor import dtensor_api as dtensor # All the variable names in the default keras layers. We will use those to map # against the args in the __init__ method to find corresponding layout args. @@ -39,125 +39,148 @@ def allow_initializer_layout(init_method): - """A decorator for injecting layout information to layer.__init__. - - Layout will be a new param for any of the weights for all the keras layers. - Adding the param to all the __init__ method will be a big/duplicated work. - - This decorator is design to reduce and code duplication and make it easy to - add/remove the dtensor feature if needed. - - Sample usage: - ```python - class Dense(tf.keras.layer.Layer): - - @allow_initializer_layout - def __init__(self, units, - kernel_initializer='zeros', - bias_initializer='zeros', - **kwargs): - super().__init__(**kwargs) - - d = Dense(units=8, kernel_layout=layout1, bias_layout=layout2) - d.kernel_layout == layout1 - d.bias_layout == layout2 - ``` - - By adding this annotation, it will: - - 1. Filter out the kwargs based on some keywords, eg if the 'kernel_initialzer' - appears in method signature, then it will try to pop the 'kernel_layout' if - it presents. Same for "bias" and "recurrent_kernel", etc. This will make - sure the layout related param is not passed to `BaseLayer.__init__`, which - will raise error about unexpect keyword args. - 2. Set the self.kernel/bias_layout attribute after the `__init__` method is - called. Keras framework will use those fields to create weights down the - stream. - - Args: - init_method: the `__init__` method of the Keras layer to annotate. - - Returns: - the annotated __init__ method. - """ - - def _wrap_function(layer_instance, *args, **kwargs): - signature = inspect.signature(init_method) - layout_args = {} - # Check args like 'kernel_initializer' and pop the 'kernel_layout' if it - # presents. - for variable_name in KERAS_VARIABLE_NAMES: - if variable_name + "_initializer" in signature.parameters: - layout = kwargs.pop(variable_name + "_layout", None) - if layout: - layout_args[variable_name + "_layout"] = layout - - init_method(layer_instance, *args, **kwargs) - - # Inject the layout parameter after the invocation of __init__() - for layout_param_name, layout in layout_args.items(): - setattr(layer_instance, layout_param_name, layout) - - # return decorated - return tf.__internal__.decorator.make_decorator( - target=init_method, decorator_func=_wrap_function) + """A decorator for injecting layout information to layer.__init__. + + Layout will be a new param for any of the weights for all the keras layers. + Adding the param to all the __init__ method will be a big/duplicated work. + + This decorator is design to reduce and code duplication and make it easy to + add/remove the dtensor feature if needed. + + Sample usage: + ```python + class Dense(tf.keras.layer.Layer): + + @allow_initializer_layout + def __init__(self, units, + kernel_initializer='zeros', + bias_initializer='zeros', + **kwargs): + super().__init__(**kwargs) + + d = Dense(units=8, kernel_layout=layout1, bias_layout=layout2) + d.kernel_layout == layout1 + d.bias_layout == layout2 + ``` + + By adding this annotation, it will: + + 1. Filter out the kwargs based on some keywords, eg if the + 'kernel_initialzer' appears in method signature, then it will try to pop + the 'kernel_layout' if it presents. Same for "bias" and + "recurrent_kernel", etc. This will make sure the layout related param is + not passed to `BaseLayer.__init__`, which will raise error about unexpect + keyword args. + 2. Set the self.kernel/bias_layout attribute after the `__init__` method is + called. Keras framework will use those fields to create weights down the + stream. + + Args: + init_method: the `__init__` method of the Keras layer to annotate. + + Returns: + the annotated __init__ method. + """ + + def _wrap_function(layer_instance, *args, **kwargs): + signature = inspect.signature(init_method) + layout_args = {} + # Check args like 'kernel_initializer' and pop the 'kernel_layout' if it + # presents. + for variable_name in KERAS_VARIABLE_NAMES: + if variable_name + "_initializer" in signature.parameters: + layout = kwargs.pop(variable_name + "_layout", None) + if layout: + layout_args[variable_name + "_layout"] = layout + + init_method(layer_instance, *args, **kwargs) + + # Inject the layout parameter after the invocation of __init__() + for layout_param_name, layout in layout_args.items(): + setattr(layer_instance, layout_param_name, layout) + + # return decorated + return tf.__internal__.decorator.make_decorator( + target=init_method, decorator_func=_wrap_function + ) def inject_mesh(init_method): - """Inject DTensor mesh information to an object. + """Inject DTensor mesh information to an object. + + This is useful for keras object like `Metric` and `Optimizer` which need + DTensor mesh to create the weights, but doesn't want to change the current + public API interface. - This is useful for keras object like `Metric` and `Optimizer` which need - DTensor mesh to create the weights, but doesn't want to change the current - public API interface. + This is for temporary usage and eventually the mesh/layout information will + be public arguments in the `__init__` method. - This is for temporary usage and eventually the mesh/layout information will be - public arguments in the `__init__` method + Sample usage: + ```python + class Accuracy(tf.keras.metrics.Metric): - Sample usage: - ```python - class Accuracy(tf.keras.metrics.Metric): + @inject_mesh + def __init__(self, name='accuracy', dtype=None): + super().__init__(**kwargs) - @inject_mesh - def __init__(self, name='accuracy', dtype=None): - super().__init__(**kwargs) + acc = Accuracy(mesh=mesh) + assert acc._mesh == mesh + ``` - acc = Accuracy(mesh=mesh) - assert acc._mesh == mesh - ``` + Args: + init_method: the `__init__` method of the Keras class to annotate. - Args: - init_method: the `__init__` method of the Keras class to annotate. + Returns: + the annotated __init__ method. + """ - Returns: - the annotated __init__ method. - """ - def _wrap_function(instance, *args, **kwargs): - mesh = kwargs.pop("mesh", None) - # Note that the injection of _mesh need to happen before the invocation of - # __init__, since the class might need the mesh to create weights in the - # __init__. - if mesh is not None: - instance._mesh = mesh # pylint: disable=protected-access - init_method(instance, *args, **kwargs) + def _wrap_function(instance, *args, **kwargs): + mesh = kwargs.pop("mesh", None) + # Note that the injection of _mesh need to happen before the invocation + # of __init__, since the class might need the mesh to create weights in + # the __init__. + if mesh is not None: + instance._mesh = mesh + init_method(instance, *args, **kwargs) - return tf.__internal__.decorator.make_decorator( - target=init_method, decorator_func=_wrap_function) + return tf.__internal__.decorator.make_decorator( + target=init_method, decorator_func=_wrap_function + ) def call_with_layout(fn, layout, *args, **kwargs): - """Invoke the function with inputs and relayout the result. - - Args: - fn: the function to invoke. - layout: if not None, the output of the fn will be relayout with this. - *args: positional arguments to be called with fn. - **kwargs: keyword arguments to be called with fn. - - Returns: - The output of fn, with potential relayout with the layout specified. - """ - if layout: - with dtensor.run_on(layout.mesh): - result = fn(*args, **kwargs) - return dtensor.relayout(result, layout) - return fn(*args, **kwargs) + """Invoke the function with inputs and relayout the result. + + Args: + fn: the function to invoke. + layout: if not None, the output of the fn will be relayout with this. + *args: positional arguments to be called with fn. + **kwargs: keyword arguments to be called with fn. + + Returns: + The output of fn, with potential relayout with the layout specified. + """ + if layout: + with dtensor.default_mesh(layout.mesh): + result = fn(*args, **kwargs) + return dtensor.relayout(result, layout) + return fn(*args, **kwargs) + + +def running_with_dtensor_strategy(): + """Check whether running with a `Strategy` that is backed by DTensor. + + In the DTensor based training, all the tensors are in global context, which + is different from the local context. Some keras components need to + behave differently, e.g. BatchNormalization and SyncBatchNormalization, as + well as optimizers. + + This check will help those layer to branch the logic and keep the correct + behavior between different context. + """ + if not tf.distribute.has_strategy(): + return False + strategy = tf.distribute.get_strategy() + # TODO(scottzhu): Finalize the strategy API to check if a strategy is backed + # by DTensor. + return getattr(strategy, "_mesh", None) is not None diff --git a/keras/dtensor/utils_test.py b/keras/dtensor/utils_test.py index 98851163a72a..407ecf149abc 100644 --- a/keras/dtensor/utils_test.py +++ b/keras/dtensor/utils_test.py @@ -14,63 +14,83 @@ # ============================================================================== """Tests for utils.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import layers from keras.dtensor import dtensor_api as dtensor from keras.dtensor import test_util from keras.dtensor import utils -import numpy as np -import tensorflow.compat.v2 as tf - class UtilsTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["X", "Y"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + self.layout = dtensor.Layout.replicated(self.mesh, rank=1) - def setUp(self): - super().setUp() - global_ids = test_util.create_device_ids_array((2, 2)) - local_device_ids = np.ravel(global_ids).tolist() - mesh_dict = { - 'CPU': - dtensor.Mesh(['X', 'Y'], global_ids, - local_device_ids, - test_util.create_device_list((2, 2), 'CPU')) - } - self.mesh = self.configTestMesh(mesh_dict) - self.layout = dtensor.Layout.replicated(self.mesh, rank=1) - - @parameterized.named_parameters( - ('Dense', layers.Dense, {'units': 4}, ['kernel_layout', 'bias_layout']), - ('Conv2D', layers.Conv2D, {'filters': 2, 'kernel_size': 3}, - ['kernel_layout', 'bias_layout']), - ('BatchNorm', layers.BatchNormalization, {}, - ['beta_layout', 'gamma_layout', 'moving_mean_layout', - 'moving_variance_layout']), - ('Embedding', layers.Embedding, {'input_dim': 100, 'output_dim': 20}, - ['embeddings_layout']), - (' PReLU', layers. PReLU, {}, ['alpha_layout']), - ('SeparableConv2D', layers.SeparableConv2D, - {'filters': 2, 'kernel_size': 3}, - ['depthwise_layout', 'pointwise_layout', 'bias_layout']), - # TODO(scottzhu): Probably add more coverage for all the layers. - ) - def test_all_layout_decorator(self, layer_cls, init_args, layout_args): + @parameterized.named_parameters( + ("Dense", layers.Dense, {"units": 4}, ["kernel_layout", "bias_layout"]), + ( + "Conv2D", + layers.Conv2D, + {"filters": 2, "kernel_size": 3}, + ["kernel_layout", "bias_layout"], + ), + ( + "BatchNorm", + layers.BatchNormalization, + {}, + [ + "beta_layout", + "gamma_layout", + "moving_mean_layout", + "moving_variance_layout", + ], + ), + ( + "Embedding", + layers.Embedding, + {"input_dim": 100, "output_dim": 20}, + ["embeddings_layout"], + ), + (" PReLU", layers.PReLU, {}, ["alpha_layout"]), + ( + "SeparableConv2D", + layers.SeparableConv2D, + {"filters": 2, "kernel_size": 3}, + ["depthwise_layout", "pointwise_layout", "bias_layout"], + ), + # TODO(scottzhu): Probably add more coverage for all the layers. + ) + def test_all_layout_decorator(self, layer_cls, init_args, layout_args): - layer_cls.__init__ = utils.allow_initializer_layout(layer_cls.__init__) + layer_cls.__init__ = utils.allow_initializer_layout(layer_cls.__init__) - # Make sure we don't set the layout attribute if the init kwargs is not - # provided. - layer = layer_cls(**init_args) - for layout_arg in layout_args: - self.assertFalse(hasattr(layer, layout_arg)) + # Make sure we don't set the layout attribute if the init kwargs is not + # provided. + layer = layer_cls(**init_args) + for layout_arg in layout_args: + self.assertFalse(hasattr(layer, layout_arg)) - layout_kwargs = {k: self.layout for k in layout_args} - init_args.update(layout_kwargs) - layer = layer_cls(**init_args) + layout_kwargs = {k: self.layout for k in layout_args} + init_args.update(layout_kwargs) + layer = layer_cls(**init_args) - for layout_arg in layout_args: - self.assertEqual(getattr(layer, layout_arg), self.layout) + for layout_arg in layout_args: + self.assertEqual(getattr(layer, layout_arg), self.layout) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/BUILD b/keras/engine/BUILD index 90d067461a7d..a2c40e878106 100644 --- a/keras/engine/BUILD +++ b/keras/engine/BUILD @@ -1,6 +1,8 @@ # Description: # Contains the Keras engine API (internal TensorFlow version). +# Placeholder: load unaliased py_library + # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "tf_py_test") @@ -8,12 +10,9 @@ load("@org_keras//keras:keras.bzl", "tf_py_test") load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove non-keras deps from TF. - default_visibility = [ - "//keras:friends", - "//third_party/tensorflow/python:__pkg__", - "//third_party/tensorflow/python/feature_column:__pkg__", - ], + default_visibility = ["//keras:friends"], licenses = ["notice"], ) @@ -59,6 +58,7 @@ py_library( "//keras/distribute", "//keras/distribute:distribute_coordinator_utils", "//keras/dtensor:layout_map", + "//keras/export:export_lib", "//keras/initializers", "//keras/metrics", "//keras/mixed_precision:autocast_variable", @@ -66,10 +66,10 @@ py_library( "//keras/mixed_precision:policy", "//keras/optimizers", "//keras/saving", - "//keras/saving/experimental", "//keras/utils:engine_utils", "//keras/utils:metrics_utils", "//keras/utils:mode_keys", + "//keras/utils:steps_per_execution_tuning", "//keras/utils:tf_utils", "//keras/utils:version_utils", ], @@ -153,6 +153,7 @@ py_library( srcs_version = "PY3", deps = [ "//:expect_tensorflow_installed", + "//keras/distribute", "//keras/utils:dataset_creator", "//keras/utils:engine_utils", "//keras/utils:tf_utils", @@ -382,6 +383,7 @@ tf_py_test( "//keras:losses", "//keras/layers", "//keras/metrics", + "//keras/mixed_precision:policy", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", "//keras/utils:data_utils", @@ -461,7 +463,7 @@ tf_py_test( "//keras:losses", "//keras/layers", "//keras/metrics", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", "//keras/utils:data_utils", @@ -627,7 +629,7 @@ tf_py_test( "//keras/layers", "//keras/legacy_tf_layers:core", "//keras/mixed_precision:policy", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", "//keras/utils:tf_utils", @@ -657,6 +659,7 @@ tf_py_test( size = "medium", srcs = ["sequential_test.py"], python_version = "PY3", + shard_count = 4, tags = [ "nomac", # TODO(mihaimaruseac): b/127695564 ], diff --git a/keras/engine/base_layer.py b/keras/engine/base_layer.py index 647e8cd9cf51..4e4039631ba5 100644 --- a/keras/engine/base_layer.py +++ b/keras/engine/base_layer.py @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -# pylint: disable=g-classes-have-attributes -# pylint: disable=g-bad-import-order -"""Contains the base Layer class, from which all layers inherit.""" -import tensorflow.compat.v2 as tf + +"""Contains the base Layer class, from which all layers inherit.""" import collections import contextlib @@ -29,8 +26,8 @@ import weakref import numpy as np +import tensorflow.compat.v2 as tf -from google.protobuf import json_format from keras import backend from keras import constraints from keras import initializers @@ -41,9 +38,9 @@ from keras.engine import keras_tensor from keras.engine import node as node_module from keras.mixed_precision import autocast_variable -from keras.mixed_precision import loss_scale_optimizer from keras.mixed_precision import policy -from keras.saving.saved_model import layer_serialization +from keras.saving import serialization_lib +from keras.saving.legacy.saved_model import layer_serialization from keras.utils import generic_utils from keras.utils import layer_utils from keras.utils import object_identity @@ -51,36 +48,45 @@ from keras.utils import tf_utils from keras.utils import traceback_utils from keras.utils import version_utils + # A module that only depends on `keras.layers` import these from here. -from keras.utils.generic_utils import to_snake_case # pylint: disable=unused-import -from keras.utils.tf_utils import is_tensor_or_tensor_list # pylint: disable=unused-import +from keras.utils.generic_utils import to_snake_case # noqa: F401 +from keras.utils.tf_utils import is_tensor_or_tensor_list # noqa: F401 + +# isort: off +from google.protobuf import json_format from tensorflow.python.platform import tf_logging -from tensorflow.python.util.tf_export import get_canonical_name_for_symbol +from tensorflow.python.util.tf_export import ( + get_canonical_name_for_symbol, +) from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls -# pylint: disable=g-inconsistent-quotes + metrics_mod = generic_utils.LazyLoader( - "metrics_mod", globals(), - "keras.metrics") -# pylint: enable=g-inconsistent-quotes + "metrics_mod", globals(), "keras.metrics" +) + # Prefix that is added to the TF op layer names. -_TF_OP_LAYER_NAME_PREFIX = 'tf_op_layer_' +_TF_OP_LAYER_NAME_PREFIX = "tf_op_layer_" # TODO(mdan): Should we have a single generic type for types that can be passed # to tf.cast? -_AUTOCAST_TYPES = (tf.Tensor, tf.SparseTensor, - tf.RaggedTensor) +_AUTOCAST_TYPES = (tf.Tensor, tf.SparseTensor, tf.RaggedTensor) keras_layers_gauge = tf.__internal__.monitoring.BoolGauge( - '/tensorflow/api/keras/layers', 'keras layers usage', 'method') + "/tensorflow/api/keras/layers", "keras layers usage", "method" +) keras_models_gauge = tf.__internal__.monitoring.BoolGauge( - '/tensorflow/api/keras/models', 'keras model usage', 'method') + "/tensorflow/api/keras/models", "keras model usage", "method" +) keras_api_gauge = tf.__internal__.monitoring.BoolGauge( - '/tensorflow/api/keras', 'keras api usage', 'method') + "/tensorflow/api/keras", "keras api usage", "method" +) keras_premade_model_gauge = tf.__internal__.monitoring.BoolGauge( - '/tensorflow/api/keras/premade_models', 'premade keras model usage', 'type') + "/tensorflow/api/keras/premade_models", "premade keras model usage", "type" +) _is_name_scope_on_model_declaration_enabled = False @@ -89,3262 +95,3753 @@ @contextlib.contextmanager def _name_scope_unnester(full_name_scope): - """Helper to get relative name scope from fully specified nested name scopes. - - Args: - full_name_scope: full(absolute) name scope path. - - Yields: - Relative name scope path from the parent `_name_scope_unnester` context - manager. - - Example: - ``` - with _name_scope_unnester('a') as name1: # name1 == 'a' - with _name_scope_unnester('a/b') as name2: # name2 == 'b' - with _name_scope_unnester('a/b/c') as name3: # name3 == 'c' - pass - ``` - """ - if not getattr(_name_scope_unnester_stack, 'value', None): - _name_scope_unnester_stack.value = [''] - - _name_scope_unnester_stack.value.append(full_name_scope) - - try: - full_name_scope = _name_scope_unnester_stack.value[-1] - outer_name_scope = _name_scope_unnester_stack.value[-2] - relative_name_scope = full_name_scope.lstrip(outer_name_scope) - relative_name_scope = relative_name_scope.lstrip('/') - yield relative_name_scope - finally: - _name_scope_unnester_stack.value.pop() - - -@keras_export('keras.layers.Layer') -class Layer(tf.Module, version_utils.LayerVersionSelector): - """This is the class from which all layers inherit. - - A layer is a callable object that takes as input one or more tensors and - that outputs one or more tensors. It involves *computation*, defined - in the `call()` method, and a *state* (weight variables). State can be - created in various places, at the convenience of the subclass implementer: - - * in `__init__()`; - * in the optional `build()` method, which is invoked by the first - `__call__()` to the layer, and supplies the shape(s) of the input(s), - which may not have been known at initialization time; - * in the first invocation of `call()`, with some caveats discussed - below. - - Users will just instantiate a layer and then treat it as a callable. - - Args: - trainable: Boolean, whether the layer's variables should be trainable. - name: String name of the layer. - dtype: The dtype of the layer's computations and weights. Can also be a - `tf.keras.mixed_precision.Policy`, which allows the computation and weight - dtype to differ. Default of `None` means to use - `tf.keras.mixed_precision.global_policy()`, which is a float32 policy - unless set to different value. - dynamic: Set this to `True` if your layer should only be run eagerly, and - should not be used to generate a static computation graph. - This would be the case for a Tree-RNN or a recursive network, - for example, or generally for any layer that manipulates tensors - using Python control flow. If `False`, we assume that the layer can - safely be used to generate a static computation graph. - - Attributes: - name: The name of the layer (string). - dtype: The dtype of the layer's weights. - variable_dtype: Alias of `dtype`. - compute_dtype: The dtype of the layer's computations. Layers automatically - cast inputs to this dtype which causes the computations and output to also - be in this dtype. When mixed precision is used with a - `tf.keras.mixed_precision.Policy`, this will be different than - `variable_dtype`. - dtype_policy: The layer's dtype policy. See the - `tf.keras.mixed_precision.Policy` documentation for details. - trainable_weights: List of variables to be included in backprop. - non_trainable_weights: List of variables that should not be - included in backprop. - weights: The concatenation of the lists trainable_weights and - non_trainable_weights (in this order). - trainable: Whether the layer should be trained (boolean), i.e. whether - its potentially-trainable weights should be returned as part of - `layer.trainable_weights`. - input_spec: Optional (list of) `InputSpec` object(s) specifying the - constraints on inputs that can be accepted by the layer. - - We recommend that descendants of `Layer` implement the following methods: - - * `__init__()`: Defines custom layer attributes, and creates layer weights - that do not depend on input shapes, using `add_weight()`, or other state. - * `build(self, input_shape)`: This method can be used to create weights that - depend on the shape(s) of the input(s), using `add_weight()`, or other - state. `__call__()` will automatically build the layer (if it has not been - built yet) by calling `build()`. - * `call(self, inputs, *args, **kwargs)`: Called in `__call__` after making - sure `build()` has been called. `call()` performs the logic of applying the - layer to the `inputs`. The first invocation may additionally create state - that could not be conveniently created in `build()`; see its docstring - for details. - Two reserved keyword arguments you can optionally use in `call()` are: - - `training` (boolean, whether the call is in inference mode or training - mode). See more details in [the layer/model subclassing guide]( - https://www.tensorflow.org/guide/keras/custom_layers_and_models#privileged_training_argument_in_the_call_method) - - `mask` (boolean tensor encoding masked timesteps in the input, used - in RNN layers). See more details in [the layer/model subclassing guide]( - https://www.tensorflow.org/guide/keras/custom_layers_and_models#privileged_mask_argument_in_the_call_method) - A typical signature for this method is `call(self, inputs)`, and user could - optionally add `training` and `mask` if the layer need them. `*args` and - `**kwargs` is only useful for future extension when more input parameters - are planned to be added. - * `get_config(self)`: Returns a dictionary containing the configuration used - to initialize this layer. If the keys differ from the arguments - in `__init__`, then override `from_config(self)` as well. - This method is used when saving - the layer or a model that contains this layer. - - Examples: - - Here's a basic example: a layer with two variables, `w` and `b`, - that returns `y = w . x + b`. - It shows how to implement `build()` and `call()`. - Variables set as attributes of a layer are tracked as weights - of the layers (in `layer.weights`). - - ```python - class SimpleDense(Layer): - - def __init__(self, units=32): - super(SimpleDense, self).__init__() - self.units = units - - def build(self, input_shape): # Create the state of the layer (weights) - w_init = tf.random_normal_initializer() - self.w = tf.Variable( - initial_value=w_init(shape=(input_shape[-1], self.units), - dtype='float32'), - trainable=True) - b_init = tf.zeros_initializer() - self.b = tf.Variable( - initial_value=b_init(shape=(self.units,), dtype='float32'), - trainable=True) - - def call(self, inputs): # Defines the computation from inputs to outputs - return tf.matmul(inputs, self.w) + self.b - - # Instantiates the layer. - linear_layer = SimpleDense(4) - - # This will also call `build(input_shape)` and create the weights. - y = linear_layer(tf.ones((2, 2))) - assert len(linear_layer.weights) == 2 - - # These weights are trainable, so they're listed in `trainable_weights`: - assert len(linear_layer.trainable_weights) == 2 - ``` - - Note that the method `add_weight()` offers a shortcut to create weights: - - ```python - class SimpleDense(Layer): - - def __init__(self, units=32): - super(SimpleDense, self).__init__() - self.units = units - - def build(self, input_shape): - self.w = self.add_weight(shape=(input_shape[-1], self.units), - initializer='random_normal', - trainable=True) - self.b = self.add_weight(shape=(self.units,), - initializer='random_normal', - trainable=True) - - def call(self, inputs): - return tf.matmul(inputs, self.w) + self.b - ``` - - Besides trainable weights, updated via backpropagation during training, - layers can also have non-trainable weights. These weights are meant to - be updated manually during `call()`. Here's a example layer that computes - the running sum of its inputs: - - ```python - class ComputeSum(Layer): - - def __init__(self, input_dim): - super(ComputeSum, self).__init__() - # Create a non-trainable weight. - self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), - trainable=False) - - def call(self, inputs): - self.total.assign_add(tf.reduce_sum(inputs, axis=0)) - return self.total - - my_sum = ComputeSum(2) - x = tf.ones((2, 2)) - - y = my_sum(x) - print(y.numpy()) # [2. 2.] - - y = my_sum(x) - print(y.numpy()) # [4. 4.] - - assert my_sum.weights == [my_sum.total] - assert my_sum.non_trainable_weights == [my_sum.total] - assert my_sum.trainable_weights == [] - ``` - - For more information about creating layers, see the guide - [Making new Layers and Models via subclassing]( - https://www.tensorflow.org/guide/keras/custom_layers_and_models) - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, - trainable=True, - name=None, - dtype=None, - dynamic=False, - **kwargs): - self._instrument_layer_creation() - - # These properties should be set by the user via keyword arguments. - # note that 'dtype', 'input_shape' and 'batch_input_shape' - # are only applicable to input layers: do not pass these keywords - # to non-input layers. - allowed_kwargs = { - 'input_dim', - 'input_shape', - 'batch_input_shape', - 'batch_size', - 'weights', - 'activity_regularizer', - 'autocast', - 'implementation', - } - # Validate optional keyword arguments. - generic_utils.validate_kwargs(kwargs, allowed_kwargs) - - # Mutable properties - # Indicates whether the layer's weights are updated during training - # and whether the layer's updates are run during training. - if not (isinstance(trainable, bool) or - (isinstance(trainable, (tf.Tensor, tf.Variable)) and - trainable.dtype is tf.bool)): - raise TypeError( - 'Expected `trainable` argument to be a boolean, ' - f'but got: {trainable}') - self._trainable = trainable - # A stateful layer is a layer whose updates are run during inference too, - # for instance stateful RNNs. - self._stateful = False - # Indicates whether `build` needs to be called upon layer call, to create - # the layer's weights. (Note that the first call() may also create weights, - # independent of build().) - self.built = False - # Provides information about which inputs are compatible with the layer. - self._input_spec = None - - # SavedModel-related attributes. - # Record the build input shape for loading purposes. - # TODO(kathywu): Move this to Layer._set_save_spec once cl/290121460 is - # submitted. - self._build_input_shape = None - self._saved_model_inputs_spec = None - self._saved_model_arg_spec = None - - # `Layer.compute_mask` will be called at the end of `Layer.__call__` if - # `Layer.compute_mask` is overridden, or if the `Layer` subclass sets - # `self.supports_masking=True`. - self._supports_masking = not generic_utils.is_default(self.compute_mask) - - self._init_set_name(name) - self._activity_regularizer = regularizers.get( - kwargs.pop('activity_regularizer', None)) - self._maybe_create_attribute('_trainable_weights', []) - self._maybe_create_attribute('_non_trainable_weights', []) - self._updates = [] - # Object to store all thread local layer properties. - self._thread_local = threading.local() - # A list of zero-argument lambdas which return Tensors, used for variable - # regularizers. - self._callable_losses = [] - # A list of symbolic Tensors containing activity regularizers and losses - # manually added through `add_loss` in graph-building mode. - self._losses = [] - # A list of metric instances corresponding to the symbolic metric tensors - # added using the `add_metric` API. - self._metrics = [] - # Ensures the same metric is not added multiple times in `MirroredStrategy`. - self._metrics_lock = threading.Lock() - - # Note that models also have a dtype policy, as they are layers. For - # functional models, the policy is only used in Model.compile, which wraps - # the optimizer with a LossScaleOptimizer if the policy name is - # "mixed_float16". Subclassed models additionally use the policy's compute - # and variable dtypes, as like any ordinary layer. - self._set_dtype_policy(dtype) - # Boolean indicating whether the layer automatically casts its inputs to the - # layer's compute_dtype. - self._autocast = kwargs.get('autocast', - base_layer_utils.v2_dtype_behavior_enabled()) - - # Tracks `TrackableDataStructure`s, `Module`s, and `Layer`s. - # Ordered by when the object was assigned as an attr. - # Entries are unique. - self._maybe_create_attribute('_self_tracked_trackables', []) - - # These lists will be filled via successive calls - # to self._add_inbound_node(). - # Used in symbolic mode only, only in conjunction with graph-networks - self._inbound_nodes_value = [] - self._outbound_nodes_value = [] - - self._init_call_fn_args() - - # Whether the `call` method can be used to build a TF graph without issues. - # This attribute has no effect if the model is created using the Functional - # API. Instead, `model.dynamic` is determined based on the internal layers. - if not isinstance(dynamic, bool): - raise TypeError( - f'Expected `dynamic` argument to be a boolean, but got: {dynamic}') - self._dynamic = dynamic - - # Manage input shape information if passed. - if 'input_dim' in kwargs and 'input_shape' not in kwargs: - # Backwards compatibility: alias 'input_dim' to 'input_shape'. - kwargs['input_shape'] = (kwargs['input_dim'],) - if 'input_shape' in kwargs or 'batch_input_shape' in kwargs: - # In this case we will later create an input layer - # to insert before the current layer - if 'batch_input_shape' in kwargs: - batch_input_shape = tuple(kwargs['batch_input_shape']) - elif 'input_shape' in kwargs: - if 'batch_size' in kwargs: - batch_size = kwargs['batch_size'] - else: - batch_size = None - batch_input_shape = (batch_size,) + tuple(kwargs['input_shape']) - self._batch_input_shape = batch_input_shape - - # Manage initial weight values if passed. - self._initial_weights = kwargs.get('weights', None) - - # Whether the layer will track any layers that is set as attribute on itself - # as sub-layers, the weights from the sub-layers will be included in the - # parent layer's variables() as well. - # Default to True, which means auto tracking is turned on. Certain subclass - # might want to turn it off, like Sequential model. - self._auto_track_sub_layers = True - - # For backwards compat reasons, most built-in layers do not guarantee - # That they will 100% preserve the structure of input args when saving - # / loading configs. E.g. they may un-nest an arg that is - # a list with one element. - self._preserve_input_structure_in_config = False - - # Save outer name scope at layer declaration so that it is preserved at - # the actual layer construction. - self._name_scope_on_declaration = tf.get_current_name_scope() - - # Save the temp regularization losses created in the DTensor use case. - # When DTensor is enable, we will first create LazyInitVariable and then - # DVariable with proper layout afterward. For the weights regularization - # loss, we have to create against the DVariable as well. - self._captured_weight_regularizer = [] - - @tf.__internal__.tracking.no_automatic_dependency_tracking - @generic_utils.default - def build(self, input_shape): - """Creates the variables of the layer (optional, for subclass implementers). - - This is a method that implementers of subclasses of `Layer` or `Model` - can override if they need a state-creation step in-between - layer instantiation and layer call. It is invoked automatically before - the first execution of `call()`. - - This is typically used to create the weights of `Layer` subclasses - (at the discretion of the subclass implementer). - - Args: - input_shape: Instance of `TensorShape`, or list of instances of - `TensorShape` if the layer expects a list of inputs - (one instance per input). - """ - self._build_input_shape = input_shape - self.built = True - - @doc_controls.for_subclass_implementers - def call(self, inputs, *args, **kwargs): # pylint: disable=unused-argument - """This is where the layer's logic lives. - - The `call()` method may not create state (except in its first invocation, - wrapping the creation of variables or other resources in `tf.init_scope()`). - It is recommended to create state in `__init__()`, or the `build()` method - that is called automatically before `call()` executes the first time. - - Args: - inputs: Input tensor, or dict/list/tuple of input tensors. - The first positional `inputs` argument is subject to special rules: - - `inputs` must be explicitly passed. A layer cannot have zero - arguments, and `inputs` cannot be provided via the default value - of a keyword argument. - - NumPy array or Python scalar values in `inputs` get cast as tensors. - - Keras mask metadata is only collected from `inputs`. - - Layers are built (`build(input_shape)` method) - using shape info from `inputs` only. - - `input_spec` compatibility is only checked against `inputs`. - - Mixed precision input casting is only applied to `inputs`. - If a layer has tensor arguments in `*args` or `**kwargs`, their - casting behavior in mixed precision should be handled manually. - - The SavedModel input specification is generated using `inputs` only. - - Integration with various ecosystem packages like TFMOT, TFLite, - TF.js, etc is only supported for `inputs` and not for tensors in - positional and keyword arguments. - *args: Additional positional arguments. May contain tensors, although - this is not recommended, for the reasons above. - **kwargs: Additional keyword arguments. May contain tensors, although - this is not recommended, for the reasons above. - The following optional keyword arguments are reserved: - - `training`: Boolean scalar tensor of Python boolean indicating - whether the `call` is meant for training or inference. - - `mask`: Boolean input mask. If the layer's `call()` method takes a - `mask` argument, its default value will be set to the mask generated - for `inputs` by the previous layer (if `input` did come from a layer - that generated a corresponding mask, i.e. if it came from a Keras - layer with masking support). - - Returns: - A tensor or list/tuple of tensors. - """ - return inputs - - @doc_controls.for_subclass_implementers - def add_weight(self, - name=None, - shape=None, - dtype=None, - initializer=None, - regularizer=None, - trainable=None, - constraint=None, - use_resource=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.VariableAggregation.NONE, - **kwargs): - """Adds a new variable to the layer. - - Args: - name: Variable name. - shape: Variable shape. Defaults to scalar if unspecified. - dtype: The type of the variable. Defaults to `self.dtype`. - initializer: Initializer instance (callable). - regularizer: Regularizer instance (callable). - trainable: Boolean, whether the variable should be part of the layer's - "trainable_variables" (e.g. variables, biases) - or "non_trainable_variables" (e.g. BatchNorm mean and variance). - Note that `trainable` cannot be `True` if `synchronization` - is set to `ON_READ`. - constraint: Constraint instance (callable). - use_resource: Whether to use a `ResourceVariable` or not. - See [this guide](https://www.tensorflow.org/guide/migrate/tf1_vs_tf2#resourcevariables_instead_of_referencevariables) # pylint: disable=line-too-long - for more information. - synchronization: Indicates when a distributed a variable will be - aggregated. Accepted values are constants defined in the class - `tf.VariableSynchronization`. By default the synchronization is set to - `AUTO` and the current `DistributionStrategy` chooses - when to synchronize. If `synchronization` is set to `ON_READ`, - `trainable` must not be set to `True`. - aggregation: Indicates how a distributed variable will be aggregated. - Accepted values are constants defined in the class - `tf.VariableAggregation`. - **kwargs: Additional keyword arguments. Accepted values are `getter`, - `collections`, `experimental_autocast` and `caching_device`. - - Returns: - The variable created. - - Raises: - ValueError: When giving unsupported dtype and no initializer or when - trainable has been set to True with synchronization set as `ON_READ`. - """ - if shape is None: - shape = () - kwargs.pop('partitioner', None) # Ignored. - # Validate optional keyword arguments. - for kwarg in kwargs: - if kwarg not in ['collections', 'experimental_autocast', - 'caching_device', 'getter', 'layout']: - raise TypeError('Unknown keyword argument:', kwarg) - collections_arg = kwargs.pop('collections', None) - # 'experimental_autocast' can be set to False by the caller to indicate an - # AutoCastVariable should never be created. - autocast = kwargs.pop('experimental_autocast', True) - # See the docstring for tf.Variable about the details for caching_device. - caching_device = kwargs.pop('caching_device', None) - - layout = kwargs.pop('layout', None) - # Specially handling of auto layout fetch, based on the variable name and - # attribute name. For built-in keras layers, usually the variable name, eg - # 'kernel', will match with a 'kernel_layout' attribute name on the - # instance. We will try to do this auto fetch if layout is not explicitly - # specified. This is mainly a quick workaround for not applying too many - # interface change to built-in layers, until DTensor is a public API. - # Also see dtensor.utils.allow_initializer_layout for more details. - # TODO(scottzhu): Remove this once dtensor is public to end user. - if not layout and name: - layout = getattr(self, name + '_layout', None) - - if dtype is None: - dtype = self.dtype or backend.floatx() - dtype = tf.as_dtype(dtype) - if self._dtype_policy.variable_dtype is None: - # The policy is "_infer", so we infer the policy from the variable dtype. - self._set_dtype_policy(policy.Policy(dtype.base_dtype.name)) - initializer = initializers.get(initializer) - regularizer = regularizers.get(regularizer) - constraint = constraints.get(constraint) - - if synchronization == tf.VariableSynchronization.ON_READ: - if trainable: - raise ValueError( - 'Synchronization value can be set to ' - 'VariableSynchronization.ON_READ only for non-trainable variables. ' - 'You have specified trainable=True and ' - 'synchronization=VariableSynchronization.ON_READ.') - else: - # Set trainable to be false when variable is to be synced on read. - trainable = False - elif trainable is None: - trainable = True - - # Initialize variable when no initializer provided - if initializer is None: - # If dtype is DT_FLOAT, provide a uniform unit scaling initializer - if dtype.is_floating: - initializer = initializers.get('glorot_uniform') - # If dtype is DT_INT/DT_UINT, provide a default value `zero` - # If dtype is DT_BOOL, provide a default value `FALSE` - elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool: - initializer = initializers.get('zeros') - # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here? - elif 'getter' not in kwargs: - # When `getter` is specified, it's possibly fine for `initializer` to be - # None since it's up to the custom `getter` to raise error in case it - # indeed needs `initializer`. - raise ValueError(f'An initializer for variable {name} of type ' - f'{dtype.base_dtype} is required for layer ' - f'{self.name}. Received: {initializer}.') - - getter = kwargs.pop('getter', base_layer_utils.make_variable) - if (autocast and - self._dtype_policy.compute_dtype != self._dtype_policy.variable_dtype - and dtype.is_floating): - old_getter = getter - # Wrap variable constructor to return an AutoCastVariable. - def getter(*args, **kwargs): # pylint: disable=function-redefined - variable = old_getter(*args, **kwargs) - return autocast_variable.create_autocast_variable(variable) - # Also the caching_device does not work with the mixed precision API, - # disable it if it is specified. - # TODO(b/142020079): Re-enable it once the bug is fixed. - if caching_device is not None: - tf_logging.warning( - '`caching_device` does not work with mixed precision API. Ignoring ' - 'user specified `caching_device`.') - caching_device = None - if layout: - getter = functools.partial(getter, layout=layout) - - variable = self._add_variable_with_custom_getter( - name=name, - shape=shape, - # TODO(allenl): a `make_variable` equivalent should be added as a - # `Trackable` method. - getter=getter, - # Manage errors in Layer rather than Trackable. - overwrite=True, - initializer=initializer, - dtype=dtype, - constraint=constraint, - trainable=trainable, - use_resource=use_resource, - collections=collections_arg, - synchronization=synchronization, - aggregation=aggregation, - caching_device=caching_device) - if regularizer is not None: - # TODO(fchollet): in the future, this should be handled at the - # level of variable creation, and weight regularization losses - # should be variable attributes. - name_in_scope = variable.name[:variable.name.find(':')] - self._handle_weight_regularization(name_in_scope, - variable, - regularizer) - if base_layer_utils.is_split_variable(variable): - for v in variable: - backend.track_variable(v) - if trainable: - self._trainable_weights.append(v) - else: - self._non_trainable_weights.append(v) - else: - backend.track_variable(variable) - if trainable: - self._trainable_weights.append(variable) - else: - self._non_trainable_weights.append(variable) - return variable - - @generic_utils.default - def get_config(self): - """Returns the config of the layer. - - A layer config is a Python dictionary (serializable) - containing the configuration of a layer. - The same layer can be reinstantiated later - (without its trained weights) from this configuration. - - The config of a layer does not include connectivity - information, nor the layer class name. These are handled - by `Network` (one layer of abstraction above). - - Note that `get_config()` does not guarantee to return a fresh copy of dict - every time it is called. The callers should make a copy of the returned dict - if they want to modify it. - - Returns: - Python dictionary. - """ - all_args = tf_inspect.getfullargspec(self.__init__).args - config = { - 'name': self.name, - 'trainable': self.trainable, - } - if hasattr(self, '_batch_input_shape'): - config['batch_input_shape'] = self._batch_input_shape - config['dtype'] = policy.serialize(self._dtype_policy) - if hasattr(self, 'dynamic'): - # Only include `dynamic` in the `config` if it is `True` - if self.dynamic: - config['dynamic'] = self.dynamic - elif 'dynamic' in all_args: - all_args.remove('dynamic') - expected_args = config.keys() - # Finds all arguments in the `__init__` that are not in the config: - extra_args = [arg for arg in all_args if arg not in expected_args] - # Check that either the only argument in the `__init__` is `self`, - # or that `get_config` has been overridden: - if len(extra_args) > 1 and hasattr(self.get_config, '_is_default'): - raise NotImplementedError(textwrap.dedent(f""" - Layer {self.__class__.__name__} has arguments {extra_args} - in `__init__` and therefore must override `get_config()`. - - Example: - - class CustomLayer(keras.layers.Layer): - def __init__(self, arg1, arg2): - super().__init__() - self.arg1 = arg1 - self.arg2 = arg2 - - def get_config(self): - config = super().get_config() - config.update({{ - "arg1": self.arg1, - "arg2": self.arg2, - }}) - return config""")) - - return config - - @classmethod - def from_config(cls, config): - """Creates a layer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same layer from the config - dictionary. It does not handle layer connectivity - (handled by Network), nor weights (handled by `set_weights`). + """Helper to get relative name scope from fully-speced nested name scopes. Args: - config: A Python dictionary, typically the - output of get_config. - - Returns: - A layer instance. - """ - return cls(**config) - - def compute_output_shape(self, input_shape): - """Computes the output shape of the layer. - - This method will cause the layer's state to be built, if that has not - happened before. This requires that the layer will later be used with - inputs that match the input shape provided here. - - Args: - input_shape: Shape tuple (tuple of integers) - or list of shape tuples (one per output tensor of the layer). - Shape tuples can include None for free dimensions, - instead of an integer. - - Returns: - An input shape tuple. - """ - if tf.executing_eagerly(): - # In this case we build the model first in order to do shape inference. - # This is acceptable because the framework only calls - # `compute_output_shape` on shape values that the layer would later be - # built for. It would however cause issues in case a user attempts to - # use `compute_output_shape` manually with shapes that are incompatible - # with the shape the Layer will be called on (these users will have to - # implement `compute_output_shape` themselves). - self._maybe_build(input_shape) - graph_name = str(self.name) + '_scratch_graph' - with tf.__internal__.FuncGraph(graph_name).as_default(): - input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - def _make_placeholder_like(shape): - ph = backend.placeholder(shape=shape, dtype=self.dtype) - ph._keras_mask = None - return ph - inputs = tf.nest.map_structure(_make_placeholder_like, input_shape) - try: - outputs = self(inputs, training=False) - except TypeError as e: - raise NotImplementedError( - 'We could not automatically infer the static shape of the ' - 'layer\'s output. Please implement the ' - '`compute_output_shape` method on your layer (%s).' % - self.__class__.__name__) from e - return tf.nest.map_structure(lambda t: t.shape, outputs) - raise NotImplementedError( - 'Please run in eager mode or implement the `compute_output_shape` ' - 'method on your layer (%s).' % self.__class__.__name__) - - @doc_controls.for_subclass_implementers - def compute_output_signature(self, input_signature): - """Compute the output tensor signature of the layer based on the inputs. - - Unlike a TensorShape object, a TensorSpec object contains both shape - and dtype information for a tensor. This method allows layers to provide - output dtype information if it is different from the input dtype. - For any layer that doesn't implement this function, - the framework will fall back to use `compute_output_shape`, and will - assume that the output dtype matches the input dtype. - - Args: - input_signature: Single TensorSpec or nested structure of TensorSpec - objects, describing a candidate input for the layer. - - Returns: - Single TensorSpec or nested structure of TensorSpec objects, describing - how the layer would transform the provided input. - - Raises: - TypeError: If input_signature contains a non-TensorSpec object. - """ - def check_type_return_shape(s): - if not isinstance(s, tf.TensorSpec): - raise TypeError('Only TensorSpec signature types are supported. ' - f'Received: {s}.') - return s.shape - input_shape = tf.nest.map_structure( - check_type_return_shape, input_signature) - output_shape = self.compute_output_shape(input_shape) - dtype = self._compute_dtype - if dtype is None: - input_dtypes = [s.dtype for s in tf.nest.flatten(input_signature)] - # Default behavior when self.dtype is None, is to use the first input's - # dtype. - dtype = input_dtypes[0] - return tf.nest.map_structure( - lambda s: tf.TensorSpec(dtype=dtype, shape=s), - output_shape) - - @generic_utils.default - def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument - """Computes an output mask tensor. - - Args: - inputs: Tensor or list of tensors. - mask: Tensor or list of tensors. - - Returns: - None or a tensor (or list of tensors, - one per output tensor of the layer). - """ - if not self._supports_masking: - if any(m is not None for m in tf.nest.flatten(mask)): - raise TypeError('Layer ' + self.name + ' does not support masking, ' - 'but was passed an input_mask: ' + str(mask)) - # masking not explicitly supported: return None as mask. - return None - # if masking is explicitly supported, by default - # carry over the input mask - return mask - - @traceback_utils.filter_traceback - def __call__(self, *args, **kwargs): - """Wraps `call`, applying pre- and post-processing steps. - - Args: - *args: Positional arguments to be passed to `self.call`. - **kwargs: Keyword arguments to be passed to `self.call`. - - Returns: - Output tensor(s). - - Note: - - The following optional keyword arguments are reserved for specific uses: - * `training`: Boolean scalar tensor of Python boolean indicating - whether the `call` is meant for training or inference. - * `mask`: Boolean input mask. - - If the layer's `call` method takes a `mask` argument (as some Keras - layers do), its default value will be set to the mask generated - for `inputs` by the previous layer (if `input` did come from - a layer that generated a corresponding mask, i.e. if it came from - a Keras layer with masking support. - - If the layer is not built, the method will call `build`. - - Raises: - ValueError: if the layer's `call` method returns None (an invalid value). - RuntimeError: if `super().__init__()` was not called in the constructor. - """ - if not hasattr(self, '_thread_local'): - raise RuntimeError( - 'You must call `super().__init__()` in the layer constructor.') - - # `inputs` (the first arg in the method spec) is special cased in - # layer call due to historical reasons. - # This special casing currently takes the form of: - # - 'inputs' must be explicitly passed. A layer cannot have zero arguments, - # and inputs cannot have been provided via the default value of a kwarg. - # - numpy/scalar values in `inputs` get converted to tensors - # - implicit masks / mask metadata are only collected from 'inputs` - # - Layers are built using shape info from 'inputs' only - # - input_spec compatibility is only checked against `inputs` - # - mixed precision casting (autocast) is only applied to `inputs`, - # not to any other argument. - inputs, args, kwargs = self._call_spec.split_out_first_arg(args, kwargs) - input_list = tf.nest.flatten(inputs) - - # Functional Model construction mode is invoked when `Layer`s are called on - # symbolic `KerasTensor`s, i.e.: - # >> inputs = tf.keras.Input(10) - # >> outputs = MyLayer()(inputs) # Functional construction mode. - # >> model = tf.keras.Model(inputs, outputs) - if _in_functional_construction_mode(self, inputs, args, kwargs, input_list): - return self._functional_construction_call(inputs, args, kwargs, - input_list) - - # Maintains info about the `Layer.call` stack. - call_context = base_layer_utils.call_context() - - # Accept NumPy and scalar inputs by converting to Tensors. - if any(isinstance(x, ( - tf.Tensor, np.ndarray, float, int)) for x in input_list): - inputs = tf.nest.map_structure(_convert_numpy_or_python_types, inputs) - input_list = tf.nest.flatten(inputs) - - # Handle `mask` propagation from previous layer to current layer. Masks can - # be propagated explicitly via the `mask` argument, or implicitly via - # setting the `_keras_mask` attribute on the inputs to a Layer. Masks passed - # explicitly take priority. - input_masks, mask_is_implicit = self._get_input_masks( - inputs, input_list, args, kwargs) - if self._expects_mask_arg and mask_is_implicit: - kwargs['mask'] = input_masks - - # Training mode for `Layer.call` is set via (in order of priority): - # (1) The `training` argument passed to this `Layer.call`, if it is not None - # (2) The training mode of an outer `Layer.call`. - # (3) The default mode set by `tf.keras.backend.set_learning_phase` (if set) - # (4) Any non-None default value for `training` specified in the call - # signature - # (5) False (treating the layer as if it's in inference) - args, kwargs, training_mode = self._set_training_mode( - args, kwargs, call_context) - - # Losses are cleared for all sublayers on the outermost `Layer.call`. - # Losses are not cleared on inner `Layer.call`s, because sublayers can be - # called multiple times. - if not call_context.in_call: - self._clear_losses() - - eager = tf.executing_eagerly() - with call_context.enter( - layer=self, - inputs=inputs, - build_graph=not eager, - training=training_mode): - - input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) - - if eager: - call_fn = self.call - name_scope = self._name - else: - name_scope = self._get_unnested_name_scope() - call_fn = self._autographed_call() - - call_fn = traceback_utils.inject_argument_info_in_traceback( - call_fn, - object_name=f'layer "{self.name}" (type {self.__class__.__name__})') - with contextlib.ExitStack() as namescope_stack: - if _is_name_scope_on_model_declaration_enabled: - namescope_stack.enter_context(_name_scope_unnester( - self._name_scope_on_declaration)) - namescope_stack.enter_context(tf.name_scope(name_scope)) - - if not self.built: - self._maybe_build(inputs) - - if self._autocast: - inputs = self._maybe_cast_inputs(inputs, input_list) - - with autocast_variable.enable_auto_cast_variables( - self._compute_dtype_object): - outputs = call_fn(inputs, *args, **kwargs) - - if self._activity_regularizer: - self._handle_activity_regularization(inputs, outputs) - if self._supports_masking: - self._set_mask_metadata(inputs, outputs, input_masks, not eager) - if self._saved_model_inputs_spec is None: - self._set_save_spec(inputs, args, kwargs) + full_name_scope: full(absolute) name scope path. - return outputs + Yields: + Relative name scope path from the parent `_name_scope_unnester` context + manager. - def _get_unnested_name_scope(self): - if _is_name_scope_on_model_declaration_enabled: - with _name_scope_unnester(self._name_scope_on_declaration - ) as relative_name_scope_on_declaration: - # To avoid `tf.name_scope` autoincrement, use absolute path. - relative_name_scope = filter( - None, - [tf.get_current_name_scope(), relative_name_scope_on_declaration]) - current_name_scope = '/'.join(relative_name_scope) + '/' - if current_name_scope == '/': - current_name_scope = self._name_scope_on_declaration - with tf.name_scope(current_name_scope): - name_scope = self._name_scope() # Avoid autoincrementing. # pylint: disable=not-callable - else: - name_scope = self._name_scope() - - return name_scope - - @property - def dtype(self): - """The dtype of the layer weights. - - This is equivalent to `Layer.dtype_policy.variable_dtype`. Unless - mixed precision is used, this is the same as `Layer.compute_dtype`, the - dtype of the layer's computations. + Example: + ``` + with _name_scope_unnester('a') as name1: # name1 == 'a' + with _name_scope_unnester('a/b') as name2: # name2 == 'b' + with _name_scope_unnester('a/b/c') as name3: # name3 == 'c' + pass + ``` """ - return self._dtype_policy.variable_dtype - - @property - def name(self): - """Name of the layer (string), set in the constructor.""" - return self._name + if not getattr(_name_scope_unnester_stack, "value", None): + _name_scope_unnester_stack.value = [""] - @property - def supports_masking(self): - """Whether this layer supports computing a mask using `compute_mask`.""" - return self._supports_masking + _name_scope_unnester_stack.value.append(full_name_scope) - @supports_masking.setter - def supports_masking(self, value): - self._supports_masking = value + try: + full_name_scope = _name_scope_unnester_stack.value[-1] + outer_name_scope = _name_scope_unnester_stack.value[-2] + relative_name_scope = full_name_scope.lstrip(outer_name_scope) + relative_name_scope = relative_name_scope.lstrip("/") + yield relative_name_scope + finally: + _name_scope_unnester_stack.value.pop() - @property - def dynamic(self): - """Whether the layer is dynamic (eager-only); set in the constructor.""" - return any(layer._dynamic for layer in self._flatten_layers()) - @property - @doc_controls.do_not_doc_inheritable - def stateful(self): - return any(layer._stateful for layer in self._flatten_layers()) +@keras_export("keras.layers.Layer") +class Layer(tf.Module, version_utils.LayerVersionSelector): + """This is the class from which all layers inherit. - @stateful.setter - def stateful(self, value): - self._stateful = value + A layer is a callable object that takes as input one or more tensors and + that outputs one or more tensors. It involves *computation*, defined + in the `call()` method, and a *state* (weight variables). State can be + created in various places, at the convenience of the subclass implementer: - @property - def trainable(self): - return self._trainable + * in `__init__()`; + * in the optional `build()` method, which is invoked by the first + `__call__()` to the layer, and supplies the shape(s) of the input(s), + which may not have been known at initialization time; + * in the first invocation of `call()`, with some caveats discussed + below. - @trainable.setter - def trainable(self, value): - """Sets trainable attribute for the layer and its sublayers. + Layers are recursively composable: If you assign a Layer instance as an + attribute of another Layer, the outer layer will start tracking the weights + created by the inner layer. Nested layers should be instantiated in the + `__init__()` method. - When this value is changed during training (e.g. with a - `tf.keras.callbacks.Callback`) you need to call the parent - `tf.keras.Model.make_train_function` with `force=True` in order to recompile - the training graph. + Users will just instantiate a layer and then treat it as a callable. Args: - value: Boolean with the desired state for the layer's trainable attribute. - """ - for layer in self._flatten_layers(): - layer._trainable = value - - @property - def activity_regularizer(self): - """Optional regularizer function for the output of this layer.""" - return self._activity_regularizer - - @activity_regularizer.setter - def activity_regularizer(self, regularizer): - """Optional regularizer function for the output of this layer.""" - self._activity_regularizer = regularizer - - @property - def input_spec(self): - """`InputSpec` instance(s) describing the input format for this layer. - - When you create a layer subclass, you can set `self.input_spec` to enable - the layer to run input compatibility checks when it is called. - Consider a `Conv2D` layer: it can only be called on a single input tensor - of rank 4. As such, you can set, in `__init__()`: - - ```python - self.input_spec = tf.keras.layers.InputSpec(ndim=4) - ``` - - Now, if you try to call the layer on an input that isn't rank 4 - (for instance, an input of shape `(2,)`, it will raise a nicely-formatted - error: - - ``` - ValueError: Input 0 of layer conv2d is incompatible with the layer: - expected ndim=4, found ndim=1. Full shape received: [2] - ``` - - Input checks that can be specified via `input_spec` include: - - Structure (e.g. a single input, a list of 2 inputs, etc) - - Shape - - Rank (ndim) - - Dtype - - For more information, see `tf.keras.layers.InputSpec`. - - Returns: - A `tf.keras.layers.InputSpec` instance, or nested structure thereof. - """ - return self._input_spec - - @input_spec.setter - # Must be decorated to prevent tracking, since the input_spec can be nested - # InputSpec objects. - @tf.__internal__.tracking.no_automatic_dependency_tracking - def input_spec(self, value): - for v in tf.nest.flatten(value): - if v is not None and not isinstance(v, input_spec.InputSpec): - raise TypeError('Layer input_spec must be an instance of InputSpec. ' - 'Got: {}'.format(v)) - self._input_spec = value - - @property - def trainable_weights(self): - """List of all trainable weights tracked by this layer. - - Trainable weights are updated via gradient descent during training. - - Returns: - A list of trainable variables. - """ - if self.trainable: - children_weights = self._gather_children_attribute('trainable_variables') - return self._dedup_weights(self._trainable_weights + children_weights) - else: - return [] - - @property - def non_trainable_weights(self): - """List of all non-trainable weights tracked by this layer. - - Non-trainable weights are *not* updated during training. They are expected - to be updated manually in `call()`. - - Returns: - A list of non-trainable variables. - """ - if self.trainable: - children_weights = self._gather_children_attribute( - 'non_trainable_variables') - non_trainable_weights = self._non_trainable_weights + children_weights - else: - children_weights = self._gather_children_attribute('variables') - non_trainable_weights = ( - self._trainable_weights + self._non_trainable_weights + - children_weights) - return self._dedup_weights(non_trainable_weights) - - @property - def weights(self): - """Returns the list of all layer variables/weights. - - Returns: - A list of variables. - """ - return self.trainable_weights + self.non_trainable_weights - - @property - @doc_controls.do_not_generate_docs - def updates(self): - warnings.warn( - '`layer.updates` will be removed in a future version. ' - 'This property should not be used in TensorFlow 2.0, ' - 'as `updates` are applied automatically.', - stacklevel=2) - return [] - - @property - def losses(self): - """List of losses added using the `add_loss()` API. - - Variable regularization tensors are created when this property is accessed, - so it is eager safe: accessing `losses` under a `tf.GradientTape` will - propagate gradients back to the corresponding variables. + trainable: Boolean, whether the layer's variables should be trainable. + name: String name of the layer. + dtype: The dtype of the layer's computations and weights. Can also be a + `tf.keras.mixed_precision.Policy`, which allows the computation and + weight dtype to differ. Default of `None` means to use + `tf.keras.mixed_precision.global_policy()`, which is a float32 policy + unless set to different value. + dynamic: Set this to `True` if your layer should only be run eagerly, and + should not be used to generate a static computation graph. + This would be the case for a Tree-RNN or a recursive network, + for example, or generally for any layer that manipulates tensors + using Python control flow. If `False`, we assume that the layer can + safely be used to generate a static computation graph. + + Attributes: + name: The name of the layer (string). + dtype: The dtype of the layer's weights. + variable_dtype: Alias of `dtype`. + compute_dtype: The dtype of the layer's computations. Layers automatically + cast inputs to this dtype which causes the computations and output to + also be in this dtype. When mixed precision is used with a + `tf.keras.mixed_precision.Policy`, this will be different than + `variable_dtype`. + dtype_policy: The layer's dtype policy. See the + `tf.keras.mixed_precision.Policy` documentation for details. + trainable_weights: List of variables to be included in backprop. + non_trainable_weights: List of variables that should not be + included in backprop. + weights: The concatenation of the lists trainable_weights and + non_trainable_weights (in this order). + trainable: Whether the layer should be trained (boolean), i.e. whether + its potentially-trainable weights should be returned as part of + `layer.trainable_weights`. + input_spec: Optional (list of) `InputSpec` object(s) specifying the + constraints on inputs that can be accepted by the layer. + + We recommend that descendants of `Layer` implement the following methods: + + * `__init__()`: Defines custom layer attributes, and creates layer weights + that do not depend on input shapes, using `add_weight()`, or other state. + * `build(self, input_shape)`: This method can be used to create weights that + depend on the shape(s) of the input(s), using `add_weight()`, or other + state. `__call__()` will automatically build the layer (if it has not been + built yet) by calling `build()`. + * `call(self, inputs, *args, **kwargs)`: Called in `__call__` after making + sure `build()` has been called. `call()` performs the logic of applying + the layer to the `inputs`. The first invocation may additionally create + state that could not be conveniently created in `build()`; see its + docstring for details. + Two reserved keyword arguments you can optionally use in `call()` are: + - `training` (boolean, whether the call is in inference mode or training + mode). See more details in [the layer/model subclassing guide]( + https://www.tensorflow.org/guide/keras/custom_layers_and_models#privileged_training_argument_in_the_call_method) + - `mask` (boolean tensor encoding masked timesteps in the input, used + in RNN layers). See more details in + [the layer/model subclassing guide]( + https://www.tensorflow.org/guide/keras/custom_layers_and_models#privileged_mask_argument_in_the_call_method) + A typical signature for this method is `call(self, inputs)`, and user + could optionally add `training` and `mask` if the layer need them. `*args` + and `**kwargs` is only useful for future extension when more input + parameters are planned to be added. + * `get_config(self)`: Returns a dictionary containing the configuration used + to initialize this layer. If the keys differ from the arguments + in `__init__`, then override `from_config(self)` as well. + This method is used when saving + the layer or a model that contains this layer. Examples: - >>> class MyLayer(tf.keras.layers.Layer): - ... def call(self, inputs): - ... self.add_loss(tf.abs(tf.reduce_mean(inputs))) - ... return inputs - >>> l = MyLayer() - >>> l(np.ones((10, 1))) - >>> l.losses - [1.0] - - >>> inputs = tf.keras.Input(shape=(10,)) - >>> x = tf.keras.layers.Dense(10)(inputs) - >>> outputs = tf.keras.layers.Dense(1)(x) - >>> model = tf.keras.Model(inputs, outputs) - >>> # Activity regularization. - >>> len(model.losses) - 0 - >>> model.add_loss(tf.abs(tf.reduce_mean(x))) - >>> len(model.losses) - 1 - - >>> inputs = tf.keras.Input(shape=(10,)) - >>> d = tf.keras.layers.Dense(10, kernel_initializer='ones') - >>> x = d(inputs) - >>> outputs = tf.keras.layers.Dense(1)(x) - >>> model = tf.keras.Model(inputs, outputs) - >>> # Weight regularization. - >>> model.add_loss(lambda: tf.reduce_mean(d.kernel)) - >>> model.losses - [] - - Returns: - A list of tensors. - """ - collected_losses = [] - for layer in self._flatten_layers(): - # If any eager losses are present, we assume the model to be part of an - # eager training loop (either a custom one or the one used when - # `run_eagerly=True`) and so we always return just the eager losses. - if layer._eager_losses: - # Filter placeholder losses that may have been added by revived layers. - # (see base_layer_utils for details). - if (layer._eager_losses[0] is - not base_layer_utils.REVIVED_LOSS_PLACEHOLDER): - collected_losses.extend(layer._eager_losses) - else: - collected_losses.extend(layer._losses) - for regularizer in layer._callable_losses: - loss_tensor = regularizer() - if loss_tensor is not None: - collected_losses.append(loss_tensor) - return collected_losses - - def add_loss(self, losses, **kwargs): - """Add loss tensor(s), potentially dependent on layer inputs. - - Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing the same - layer on different inputs `a` and `b`, some entries in `layer.losses` may - be dependent on `a` and some on `b`. This method automatically keeps track - of dependencies. - - This method can be used inside a subclassed layer or model's `call` - function, in which case `losses` should be a Tensor or list of Tensors. - - Example: - - ```python - class MyLayer(tf.keras.layers.Layer): - def call(self, inputs): - self.add_loss(tf.abs(tf.reduce_mean(inputs))) - return inputs - ``` - - This method can also be called directly on a Functional Model during - construction. In this case, any loss Tensors passed to this Model must - be symbolic and be able to be traced back to the model's `Input`s. These - losses become part of the model's topology and are tracked in `get_config`. - - Example: + Here's a basic example: a layer with two variables, `w` and `b`, + that returns `y = w . x + b`. + It shows how to implement `build()` and `call()`. + Variables set as attributes of a layer are tracked as weights + of the layers (in `layer.weights`). ```python - inputs = tf.keras.Input(shape=(10,)) - x = tf.keras.layers.Dense(10)(inputs) - outputs = tf.keras.layers.Dense(1)(x) - model = tf.keras.Model(inputs, outputs) - # Activity regularization. - model.add_loss(tf.abs(tf.reduce_mean(x))) + class SimpleDense(Layer): + + def __init__(self, units=32): + super(SimpleDense, self).__init__() + self.units = units + + def build(self, input_shape): # Create the state of the layer (weights) + w_init = tf.random_normal_initializer() + self.w = tf.Variable( + initial_value=w_init(shape=(input_shape[-1], self.units), + dtype='float32'), + trainable=True) + b_init = tf.zeros_initializer() + self.b = tf.Variable( + initial_value=b_init(shape=(self.units,), dtype='float32'), + trainable=True) + + def call(self, inputs): # Defines the computation from inputs to outputs + return tf.matmul(inputs, self.w) + self.b + + # Instantiates the layer. + linear_layer = SimpleDense(4) + + # This will also call `build(input_shape)` and create the weights. + y = linear_layer(tf.ones((2, 2))) + assert len(linear_layer.weights) == 2 + + # These weights are trainable, so they're listed in `trainable_weights`: + assert len(linear_layer.trainable_weights) == 2 ``` - If this is not the case for your loss (if, for example, your loss references - a `Variable` of one of the model's layers), you can wrap your loss in a - zero-argument lambda. These losses are not tracked as part of the model's - topology since they can't be serialized. - - Example: + Note that the method `add_weight()` offers a shortcut to create weights: ```python - inputs = tf.keras.Input(shape=(10,)) - d = tf.keras.layers.Dense(10) - x = d(inputs) - outputs = tf.keras.layers.Dense(1)(x) - model = tf.keras.Model(inputs, outputs) - # Weight regularization. - model.add_loss(lambda: tf.reduce_mean(d.kernel)) - ``` + class SimpleDense(Layer): - Args: - losses: Loss tensor, or list/tuple of tensors. Rather than tensors, losses - may also be zero-argument callables which create a loss tensor. - **kwargs: Used for backwards compatibility only. - """ - kwargs.pop('inputs', None) - if kwargs: - raise TypeError('Unknown keyword arguments: %s' % (kwargs.keys(),)) - - def _tag_callable(loss): - """Tags callable loss tensor as `_unconditional_loss`.""" - if callable(loss): - # We run the loss without autocasting, as regularizers are often - # numerically unstable in float16. - with autocast_variable.enable_auto_cast_variables(None): - loss = loss() - if loss is None: - return None # Will be filtered out when computing the .losses property - if not tf.is_tensor(loss): - loss = tf.convert_to_tensor( - loss, dtype=backend.floatx()) - loss._unconditional_loss = True # pylint: disable=protected-access - return loss - - losses = tf.nest.flatten(losses) - - callable_losses = [] - eager_losses = [] - symbolic_losses = [] - for loss in losses: - if callable(loss): - callable_losses.append(functools.partial(_tag_callable, loss)) - continue - if loss is None: - continue - if not tf.is_tensor(loss) and not isinstance( - loss, keras_tensor.KerasTensor): - loss = tf.convert_to_tensor( - loss, dtype=backend.floatx()) - # TF Functions should take the eager path. - if ((tf_utils.is_symbolic_tensor(loss) or - isinstance(loss, keras_tensor.KerasTensor)) and - not base_layer_utils.is_in_tf_function()): - symbolic_losses.append(loss) - elif tf.is_tensor(loss): - eager_losses.append(loss) - - self._callable_losses.extend(callable_losses) - - in_call_context = base_layer_utils.call_context().in_call - if eager_losses and not in_call_context: - raise ValueError( - 'Expected a symbolic Tensors or a callable for the loss value. ' - 'Please wrap your loss computation in a zero argument `lambda`.') - - self._eager_losses.extend(eager_losses) - - for symbolic_loss in symbolic_losses: - if getattr(self, '_is_graph_network', False): - self._graph_network_add_loss(symbolic_loss) - else: - # Possible a loss was added in a Layer's `build`. - self._losses.append(symbolic_loss) - - @property - def metrics(self): - """List of metrics added using the `add_metric()` API. + def __init__(self, units=32): + super(SimpleDense, self).__init__() + self.units = units - Example: - - >>> input = tf.keras.layers.Input(shape=(3,)) - >>> d = tf.keras.layers.Dense(2) - >>> output = d(input) - >>> d.add_metric(tf.reduce_max(output), name='max') - >>> d.add_metric(tf.reduce_min(output), name='min') - >>> [m.name for m in d.metrics] - ['max', 'min'] - - Returns: - A list of `Metric` objects. - """ - collected_metrics = [] - for layer in self._flatten_layers(): - if not hasattr(layer, '_metrics_lock'): - continue - with layer._metrics_lock: - collected_metrics.extend(layer._metrics) - return collected_metrics - - def add_metric(self, value, name=None, **kwargs): - """Adds metric tensor to the layer. - - This method can be used inside the `call()` method of a subclassed layer - or model. - - ```python - class MyMetricLayer(tf.keras.layers.Layer): - def __init__(self): - super(MyMetricLayer, self).__init__(name='my_metric_layer') - self.mean = tf.keras.metrics.Mean(name='metric_1') + def build(self, input_shape): + self.w = self.add_weight(shape=(input_shape[-1], self.units), + initializer='random_normal', + trainable=True) + self.b = self.add_weight(shape=(self.units,), + initializer='random_normal', + trainable=True) def call(self, inputs): - self.add_metric(self.mean(inputs)) - self.add_metric(tf.reduce_sum(inputs), name='metric_2') - return inputs + return tf.matmul(inputs, self.w) + self.b ``` - This method can also be called directly on a Functional Model during - construction. In this case, any tensor passed to this Model must - be symbolic and be able to be traced back to the model's `Input`s. These - metrics become part of the model's topology and are tracked when you - save the model via `save()`. + Besides trainable weights, updated via backpropagation during training, + layers can also have non-trainable weights. These weights are meant to + be updated manually during `call()`. Here's a example layer that computes + the running sum of its inputs: ```python - inputs = tf.keras.Input(shape=(10,)) - x = tf.keras.layers.Dense(10)(inputs) - outputs = tf.keras.layers.Dense(1)(x) - model = tf.keras.Model(inputs, outputs) - model.add_metric(math_ops.reduce_sum(x), name='metric_1') - ``` - - Note: Calling `add_metric()` with the result of a metric object on a - Functional Model, as shown in the example below, is not supported. This is - because we cannot trace the metric result tensor back to the model's inputs. - - ```python - inputs = tf.keras.Input(shape=(10,)) - x = tf.keras.layers.Dense(10)(inputs) - outputs = tf.keras.layers.Dense(1)(x) - model = tf.keras.Model(inputs, outputs) - model.add_metric(tf.keras.metrics.Mean()(x), name='metric_1') - ``` + class ComputeSum(Layer): - Args: - value: Metric tensor. - name: String metric name. - **kwargs: Additional keyword arguments for backward compatibility. - Accepted values: - `aggregation` - When the `value` tensor provided is not the result of - calling a `keras.Metric` instance, it will be aggregated by default - using a `keras.Metric.Mean`. - """ - kwargs_keys = list(kwargs.keys()) - if (len(kwargs_keys) > 1 or - (len(kwargs_keys) == 1 and kwargs_keys[0] != 'aggregation')): - raise TypeError(f'Unknown keyword arguments: {kwargs.keys()}. ' - 'Expected `aggregation`.') - - from_metric_obj = hasattr(value, '_metric_obj') - is_symbolic = isinstance(value, keras_tensor.KerasTensor) - in_call_context = base_layer_utils.call_context().in_call - - if name is None and not from_metric_obj: - # Eg. `self.add_metric(math_ops.reduce_sum(x))` - # In eager mode, we use metric name to lookup a metric. Without a name, - # a new Mean metric wrapper will be created on every model/layer call. - # So, we raise an error when no name is provided. - # We will do the same for symbolic mode for consistency although a name - # will be generated if no name is provided. - - # We will not raise this error in the foll use case for the sake of - # consistency as name in provided in the metric constructor. - # mean = metrics.Mean(name='my_metric') - # model.add_metric(mean(outputs)) - raise ValueError('Please provide a name for your metric like ' - '`self.add_metric(tf.reduce_sum(inputs), ' - 'name=\'mean_activation\')`') - elif from_metric_obj: - name = value._metric_obj.name - - if not in_call_context and not is_symbolic: - raise ValueError('Expected a symbolic Tensor for the metric value, ' - 'received: ' + str(value)) - - # If a metric was added in a Layer's `call` or `build`. - if in_call_context or not getattr(self, '_is_graph_network', False): - # TF Function path should take the eager path. - - # If the given metric is available in `metrics` list we just update state - # on it, otherwise we create a new metric instance and - # add it to the `metrics` list. - metric_obj = getattr(value, '_metric_obj', None) - # Tensors that come from a Metric object already updated the Metric state. - should_update_state = not metric_obj - name = metric_obj.name if metric_obj else name - - with self._metrics_lock: - match = self._get_existing_metric(name) - if match: - metric_obj = match - elif metric_obj: - self._metrics.append(metric_obj) - else: - # Build the metric object with the value's dtype if it defines one - metric_obj = metrics_mod.Mean( - name=name, dtype=getattr(value, 'dtype', None)) - self._metrics.append(metric_obj) - - if should_update_state: - metric_obj(value) - else: - if from_metric_obj: - raise ValueError('Using the result of calling a `Metric` object ' - 'when calling `add_metric` on a Functional ' - 'Model is not supported. Please pass the ' - 'Tensor to monitor directly.') - - # Insert layers into the Keras Graph Network. - aggregation = None if from_metric_obj else 'mean' - self._graph_network_add_metric(value, aggregation, name) - - @doc_controls.do_not_doc_inheritable - def add_update(self, updates): - """Add update op(s), potentially dependent on layer inputs. - - Weight updates (for instance, the updates of the moving mean and variance - in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing the same layer on - different inputs `a` and `b`, some entries in `layer.updates` may be - dependent on `a` and some on `b`. This method automatically keeps track - of dependencies. - - This call is ignored when eager execution is enabled (in that case, variable - updates are run on the fly and thus do not need to be tracked for later - execution). + def __init__(self, input_dim): + super(ComputeSum, self).__init__() + # Create a non-trainable weight. + self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), + trainable=False) - Args: - updates: Update op, or list/tuple of update ops, or zero-arg callable - that returns an update op. A zero-arg callable should be passed in - order to disable running the updates by setting `trainable=False` - on this Layer, when executing in Eager mode. - """ - call_context = base_layer_utils.call_context() - # No need to run updates during Functional API construction. - if call_context.in_keras_graph: - return - - # Callable updates are disabled by setting `trainable=False`. - if not call_context.frozen: - for update in tf.nest.flatten(updates): - if callable(update): - update() # pylint: disable=not-callable - - def set_weights(self, weights): - """Sets the weights of the layer, from NumPy arrays. - - The weights of a layer represent the state of the layer. This function - sets the weight values from numpy arrays. The weight values should be - passed in the order they are created by the layer. Note that the layer's - weights must be instantiated before calling this function, by calling - the layer. - - For example, a `Dense` layer returns a list of two values: the kernel matrix - and the bias vector. These can be used to set the weights of another - `Dense` layer: - - >>> layer_a = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(1.)) - >>> a_out = layer_a(tf.convert_to_tensor([[1., 2., 3.]])) - >>> layer_a.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - >>> layer_b = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(2.)) - >>> b_out = layer_b(tf.convert_to_tensor([[10., 20., 30.]])) - >>> layer_b.get_weights() - [array([[2.], - [2.], - [2.]], dtype=float32), array([0.], dtype=float32)] - >>> layer_b.set_weights(layer_a.get_weights()) - >>> layer_b.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - - Args: - weights: a list of NumPy arrays. The number - of arrays and their shape must match - number of the dimensions of the weights - of the layer (i.e. it should match the - output of `get_weights`). - - Raises: - ValueError: If the provided weights list does not match the - layer's specifications. - """ - params = self.weights - - expected_num_weights = 0 - for param in params: - if isinstance(param, base_layer_utils.TrackableWeightHandler): - expected_num_weights += param.num_tensors - else: - expected_num_weights += 1 - - if expected_num_weights != len(weights): - raise ValueError( - 'You called `set_weights(weights)` on layer "%s" ' - 'with a weight list of length %s, but the layer was ' - 'expecting %s weights. Provided weights: %s...' % - (self.name, len(weights), expected_num_weights, str(weights)[:50])) - - weight_index = 0 - weight_value_tuples = [] - for param in params: - if isinstance(param, base_layer_utils.TrackableWeightHandler): - num_tensors = param.num_tensors - tensors = weights[weight_index:weight_index + num_tensors] - param.set_weights(tensors) - weight_index += num_tensors - else: - weight = weights[weight_index] - weight_shape = weight.shape if hasattr(weight, 'shape') else () - ref_shape = param.shape - if not ref_shape.is_compatible_with(weight_shape): - raise ValueError( - f'Layer {self.name} weight shape {ref_shape} ' - 'is not compatible with provided weight ' - f'shape {weight_shape}.') - weight_value_tuples.append((param, weight)) - weight_index += 1 - - backend.batch_set_value(weight_value_tuples) - - # Perform any layer defined finalization of the layer state. - for layer in self._flatten_layers(): - layer.finalize_state() - - def get_weights(self): - """Returns the current weights of the layer, as NumPy arrays. - - The weights of a layer represent the state of the layer. This function - returns both trainable and non-trainable weight values associated with this - layer as a list of NumPy arrays, which can in turn be used to load state - into similarly parameterized layers. - - For example, a `Dense` layer returns a list of two values: the kernel matrix - and the bias vector. These can be used to set the weights of another - `Dense` layer: - - >>> layer_a = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(1.)) - >>> a_out = layer_a(tf.convert_to_tensor([[1., 2., 3.]])) - >>> layer_a.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - >>> layer_b = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(2.)) - >>> b_out = layer_b(tf.convert_to_tensor([[10., 20., 30.]])) - >>> layer_b.get_weights() - [array([[2.], - [2.], - [2.]], dtype=float32), array([0.], dtype=float32)] - >>> layer_b.set_weights(layer_a.get_weights()) - >>> layer_b.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - - Returns: - Weights values as a list of NumPy arrays. - """ - weights = self.weights - output_weights = [] - for weight in weights: - if isinstance(weight, base_layer_utils.TrackableWeightHandler): - output_weights.extend(weight.get_tensors()) - else: - output_weights.append(weight) - return backend.batch_get_value(output_weights) - - @doc_controls.do_not_generate_docs - def finalize_state(self): - """Finalizes the layers state after updating layer weights. - - This function can be subclassed in a layer and will be called after updating - a layer weights. It can be overridden to finalize any additional layer state - after a weight update. - - This function will be called after weights of a layer have been restored - from a loaded model. - """ - pass - - @doc_controls.do_not_doc_inheritable - def get_input_mask_at(self, node_index): - """Retrieves the input mask tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A mask tensor - (or list of tensors if the layer has multiple inputs). - """ - inputs = self.get_input_at(node_index) - if isinstance(inputs, list): - return [getattr(x, '_keras_mask', None) for x in inputs] - else: - return getattr(inputs, '_keras_mask', None) - - @doc_controls.do_not_doc_inheritable - def get_output_mask_at(self, node_index): - """Retrieves the output mask tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A mask tensor - (or list of tensors if the layer has multiple outputs). - """ - output = self.get_output_at(node_index) - if isinstance(output, list): - return [getattr(x, '_keras_mask', None) for x in output] - else: - return getattr(output, '_keras_mask', None) - - @property - @doc_controls.do_not_doc_inheritable - def input_mask(self): - """Retrieves the input mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - Returns: - Input mask tensor (potentially None) or list of input - mask tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - """ - inputs = self.input - if isinstance(inputs, list): - return [getattr(x, '_keras_mask', None) for x in inputs] - else: - return getattr(inputs, '_keras_mask', None) - - @property - @doc_controls.do_not_doc_inheritable - def output_mask(self): - """Retrieves the output mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - Returns: - Output mask tensor (potentially None) or list of output - mask tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - """ - output = self.output - if isinstance(output, list): - return [getattr(x, '_keras_mask', None) for x in output] - else: - return getattr(output, '_keras_mask', None) - - @doc_controls.do_not_doc_inheritable - def get_input_shape_at(self, node_index): - """Retrieves the input shape(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A shape tuple - (or list of shape tuples if the layer has multiple inputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'input_shapes', - 'input shape') - - @doc_controls.do_not_doc_inheritable - def get_output_shape_at(self, node_index): - """Retrieves the output shape(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A shape tuple - (or list of shape tuples if the layer has multiple outputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'output_shapes', - 'output shape') - - @doc_controls.do_not_doc_inheritable - def get_input_at(self, node_index): - """Retrieves the input tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first input node of the layer. - - Returns: - A tensor (or list of tensors if the layer has multiple inputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'input_tensors', - 'input') - - @doc_controls.do_not_doc_inheritable - def get_output_at(self, node_index): - """Retrieves the output tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first output node of the layer. - - Returns: - A tensor (or list of tensors if the layer has multiple outputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'output_tensors', - 'output') - - @property - def input(self): - """Retrieves the input tensor(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer. - - Returns: - Input tensor or list of input tensors. - - Raises: - RuntimeError: If called in Eager mode. - AttributeError: If no inbound nodes are found. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + - ' is not connected, no input to return.') - return self._get_node_attribute_at_index(0, 'input_tensors', 'input') - - @property - def output(self): - """Retrieves the output tensor(s) of a layer. - - Only applicable if the layer has exactly one output, - i.e. if it is connected to one incoming layer. - - Returns: - Output tensor or list of output tensors. - - Raises: - AttributeError: if the layer is connected to more than one incoming - layers. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + ' has no inbound nodes.') - return self._get_node_attribute_at_index(0, 'output_tensors', 'output') - - @property - @doc_controls.do_not_doc_inheritable - def input_shape(self): - """Retrieves the input shape(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer, or if all inputs - have the same shape. - - Returns: - Input shape, as an integer shape tuple - (or list of shape tuples, one tuple per input tensor). - - Raises: - AttributeError: if the layer has no defined input_shape. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError(f'The layer "{self.name}" has never been called ' - 'and thus has no defined input shape. Note that the ' - '`input_shape` property is only available for ' - 'Functional and Sequential models.') - all_input_shapes = set( - [str(node.input_shapes) for node in self._inbound_nodes]) - if len(all_input_shapes) == 1: - return self._inbound_nodes[0].input_shapes - else: - raise AttributeError('The layer "' + str(self.name) + - '" has multiple inbound nodes, ' - 'with different input shapes. Hence ' - 'the notion of "input shape" is ' - 'ill-defined for the layer. ' - 'Use `get_input_shape_at(node_index)` ' - 'instead.') - - def count_params(self): - """Count the total number of scalars composing the weights. - - Returns: - An integer count. - - Raises: - ValueError: if the layer isn't yet built - (in which case its weights aren't yet defined). - """ - if not self.built: - if getattr(self, '_is_graph_network', False): - with tf_utils.maybe_init_scope(self): - self._maybe_build(self.inputs) - else: - raise ValueError('You tried to call `count_params` ' - f'on layer {self.name}' - ', but the layer isn\'t built. ' - 'You can build it manually via: ' - f'`{self.name}.build(batch_input_shape)`.') - return layer_utils.count_params(self.weights) - - @property - @doc_controls.do_not_doc_inheritable - def output_shape(self): - """Retrieves the output shape(s) of a layer. - - Only applicable if the layer has one output, - or if all outputs have the same shape. - - Returns: - Output shape, as an integer shape tuple - (or list of shape tuples, one tuple per output tensor). - - Raises: - AttributeError: if the layer has no defined output shape. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError(f'The layer "{self.name}" has never been called ' - 'and thus has no defined output shape.') - all_output_shapes = set( - [str(node.output_shapes) for node in self._inbound_nodes]) - if len(all_output_shapes) == 1: - return self._inbound_nodes[0].output_shapes - else: - raise AttributeError('The layer "%s"' - ' has multiple inbound nodes, ' - 'with different output shapes. Hence ' - 'the notion of "output shape" is ' - 'ill-defined for the layer. ' - 'Use `get_output_shape_at(node_index)` ' - 'instead.' % self.name) - - @property - def dtype_policy(self): - """The dtype policy associated with this layer. - - This is an instance of a `tf.keras.mixed_precision.Policy`. - """ - return self._dtype_policy + def call(self, inputs): + self.total.assign_add(tf.reduce_sum(inputs, axis=0)) + return self.total - @property - def compute_dtype(self): - """The dtype of the layer's computations. + my_sum = ComputeSum(2) + x = tf.ones((2, 2)) - This is equivalent to `Layer.dtype_policy.compute_dtype`. Unless - mixed precision is used, this is the same as `Layer.dtype`, the dtype of - the weights. + y = my_sum(x) + print(y.numpy()) # [2. 2.] - Layers automatically cast their inputs to the compute dtype, which causes - computations and the output to be in the compute dtype as well. This is done - by the base Layer class in `Layer.__call__`, so you do not have to insert - these casts if implementing your own layer. + y = my_sum(x) + print(y.numpy()) # [4. 4.] - Layers often perform certain internal computations in higher precision when - `compute_dtype` is float16 or bfloat16 for numeric stability. The output - will still typically be float16 or bfloat16 in such cases. + assert my_sum.weights == [my_sum.total] + assert my_sum.non_trainable_weights == [my_sum.total] + assert my_sum.trainable_weights == [] + ``` - Returns: - The layer's compute dtype. + For more information about creating layers, see the guide + [Making new Layers and Models via subclassing]( + https://www.tensorflow.org/guide/keras/custom_layers_and_models) """ - return self._dtype_policy.compute_dtype - - @property - def variable_dtype(self): - """Alias of `Layer.dtype`, the dtype of the weights.""" - return self.dtype - @property - @doc_controls.do_not_doc_inheritable - def inbound_nodes(self): - """Return Functional API nodes upstream of this layer.""" - return self._inbound_nodes - - @property - @doc_controls.do_not_doc_inheritable - def outbound_nodes(self): - """Return Functional API nodes downstream of this layer.""" - return self._outbound_nodes - - ############################################################################## - # Methods & attributes below are public aliases of other methods. # - ############################################################################## + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__( + self, trainable=True, name=None, dtype=None, dynamic=False, **kwargs + ): + self._instrument_layer_creation() + + # These properties should be set by the user via keyword arguments. + # note that 'dtype', 'input_shape' and 'batch_input_shape' + # are only applicable to input layers: do not pass these keywords + # to non-input layers. + allowed_kwargs = { + "input_dim", + "input_shape", + "batch_input_shape", + "batch_size", + "weights", + "activity_regularizer", + "autocast", + "implementation", + } + # Validate optional keyword arguments. + generic_utils.validate_kwargs(kwargs, allowed_kwargs) + + # Mutable properties + # Indicates whether the layer's weights are updated during training + # and whether the layer's updates are run during training. + if not ( + isinstance(trainable, bool) + or ( + isinstance(trainable, (tf.Tensor, tf.Variable)) + and trainable.dtype is tf.bool + ) + ): + raise TypeError( + "Expected `trainable` argument to be a boolean, " + f"but got: {trainable}" + ) + self._trainable = trainable + # A stateful layer is a layer whose updates are run during inference + # too, for instance stateful RNNs. + self._stateful = False + # Indicates whether `build` needs to be called upon layer call, to + # create the layer's weights. (Note that the first call() may also + # create weights, independent of build().) + self.built = False + # Provides information about which inputs are compatible with the layer. + self._input_spec = None + + # SavedModel-related attributes. + # Record the build input shape for loading purposes. + # TODO(kathywu): Move this to Layer._set_save_spec once cl/290121460 is + # submitted. + self._build_input_shape = None + self._saved_model_inputs_spec = None + self._saved_model_arg_spec = None + + # `Layer.compute_mask` will be called at the end of `Layer.__call__` if + # `Layer.compute_mask` is overridden, or if the `Layer` subclass sets + # `self.supports_masking=True`. + self._supports_masking = not generic_utils.is_default(self.compute_mask) + + self._init_set_name(name) + self._activity_regularizer = regularizers.get( + kwargs.pop("activity_regularizer", None) + ) + self._maybe_create_attribute("_trainable_weights", []) + self._maybe_create_attribute("_non_trainable_weights", []) + self._updates = [] + # Object to store all thread local layer properties. + self._thread_local = threading.local() + # A list of zero-argument lambdas which return Tensors, used for + # variable regularizers. + self._callable_losses = [] + # A list of symbolic Tensors containing activity regularizers and losses + # manually added through `add_loss` in graph-building mode. + self._losses = [] + # A list of metric instances corresponding to the symbolic metric + # tensors added using the `add_metric` API. + self._metrics = [] + # Ensures the same metric is not added multiple times in + # `MirroredStrategy`. + self._metrics_lock = threading.Lock() + + # Note that models also have a dtype policy, as they are layers. For + # functional models, the policy is only used in Model.compile, which + # wraps the optimizer with a LossScaleOptimizer if the policy name is + # "mixed_float16". Subclassed models additionally use the policy's + # compute and variable dtypes, as like any ordinary layer. + self._set_dtype_policy(dtype) + # Boolean indicating whether the layer automatically casts its inputs to + # the layer's compute_dtype. + self._autocast = kwargs.get( + "autocast", base_layer_utils.v2_dtype_behavior_enabled() + ) + + # Tracks `TrackableDataStructure`s, `Module`s, and `Layer`s. + # Ordered by when the object was assigned as an attr. + # Entries are unique. + self._maybe_create_attribute("_self_tracked_trackables", []) + + # These lists will be filled via successive calls + # to self._add_inbound_node(). + # Used in symbolic mode only, only in conjunction with graph-networks + self._inbound_nodes_value = [] + self._outbound_nodes_value = [] + + self._init_call_fn_args() + + # Whether the `call` method can be used to build a TF graph without + # issues. This attribute has no effect if the model is created using + # the Functional API. Instead, `model.dynamic` is determined based on + # the internal layers. + if not isinstance(dynamic, bool): + raise TypeError( + "Expected `dynamic` argument to be a boolean, " + f"but got: {dynamic}" + ) + self._dynamic = dynamic + + # Manage input shape information if passed. + if "input_dim" in kwargs and "input_shape" not in kwargs: + # Backwards compatibility: alias 'input_dim' to 'input_shape'. + kwargs["input_shape"] = (kwargs["input_dim"],) + if "input_shape" in kwargs or "batch_input_shape" in kwargs: + # In this case we will later create an input layer + # to insert before the current layer + if "batch_input_shape" in kwargs: + batch_input_shape = tuple(kwargs["batch_input_shape"]) + elif "input_shape" in kwargs: + if "batch_size" in kwargs: + batch_size = kwargs["batch_size"] + else: + batch_size = None + batch_input_shape = (batch_size,) + tuple(kwargs["input_shape"]) + self._batch_input_shape = batch_input_shape + + # Manage initial weight values if passed. + self._initial_weights = kwargs.get("weights", None) + + # Whether the layer will track any layers that is set as attribute on + # itself as sub-layers, the weights from the sub-layers will be included + # in the parent layer's variables() as well. Defaults to `True`, which + # means auto tracking is turned on. Certain subclass might want to turn + # it off, like Sequential model. + self._auto_track_sub_layers = True + + # For backwards compat reasons, most built-in layers do not guarantee + # That they will 100% preserve the structure of input args when saving + # / loading configs. E.g. they may un-nest an arg that is + # a list with one element. + self._preserve_input_structure_in_config = False + + # Save outer name scope at layer declaration so that it is preserved at + # the actual layer construction. + self._name_scope_on_declaration = tf.get_current_name_scope() + + # Save the temp regularization losses created in the DTensor use case. + # When DTensor is enable, we will first create LazyInitVariable and then + # DVariable with proper layout afterward. For the weights regularization + # loss, we have to create against the DVariable as well. + self._captured_weight_regularizer = [] + + @tf.__internal__.tracking.no_automatic_dependency_tracking + @generic_utils.default + def build(self, input_shape): + """Creates the variables of the layer (for subclass implementers). + + This is a method that implementers of subclasses of `Layer` or `Model` + can override if they need a state-creation step in-between + layer instantiation and layer call. It is invoked automatically before + the first execution of `call()`. + + This is typically used to create the weights of `Layer` subclasses + (at the discretion of the subclass implementer). + + Args: + input_shape: Instance of `TensorShape`, or list of instances of + `TensorShape` if the layer expects a list of inputs + (one instance per input). + """ + self._build_input_shape = input_shape + self.built = True + + @doc_controls.for_subclass_implementers + def call(self, inputs, *args, **kwargs): + """This is where the layer's logic lives. + + The `call()` method may not create state (except in its first + invocation, wrapping the creation of variables or other resources in + `tf.init_scope()`). It is recommended to create state, including + `tf.Variable` instances and nested `Layer` instances, + in `__init__()`, or in the `build()` method that is + called automatically before `call()` executes for the first time. + + Args: + inputs: Input tensor, or dict/list/tuple of input tensors. + The first positional `inputs` argument is subject to special rules: + - `inputs` must be explicitly passed. A layer cannot have zero + arguments, and `inputs` cannot be provided via the default value + of a keyword argument. + - NumPy array or Python scalar values in `inputs` get cast as + tensors. + - Keras mask metadata is only collected from `inputs`. + - Layers are built (`build(input_shape)` method) + using shape info from `inputs` only. + - `input_spec` compatibility is only checked against `inputs`. + - Mixed precision input casting is only applied to `inputs`. + If a layer has tensor arguments in `*args` or `**kwargs`, their + casting behavior in mixed precision should be handled manually. + - The SavedModel input specification is generated using `inputs` + only. + - Integration with various ecosystem packages like TFMOT, TFLite, + TF.js, etc is only supported for `inputs` and not for tensors in + positional and keyword arguments. + *args: Additional positional arguments. May contain tensors, although + this is not recommended, for the reasons above. + **kwargs: Additional keyword arguments. May contain tensors, although + this is not recommended, for the reasons above. + The following optional keyword arguments are reserved: + - `training`: Boolean scalar tensor of Python boolean indicating + whether the `call` is meant for training or inference. + - `mask`: Boolean input mask. If the layer's `call()` method takes a + `mask` argument, its default value will be set to the mask + generated for `inputs` by the previous layer (if `input` did come + from a layer that generated a corresponding mask, i.e. if it came + from a Keras layer with masking support). + + Returns: + A tensor or list/tuple of tensors. + """ + return inputs - @property - @doc_controls.do_not_generate_docs - def variables(self): - """Returns the list of all layer variables/weights. + @doc_controls.for_subclass_implementers + def add_weight( + self, + name=None, + shape=None, + dtype=None, + initializer=None, + regularizer=None, + trainable=None, + constraint=None, + use_resource=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.VariableAggregation.NONE, + **kwargs, + ): + """Adds a new variable to the layer. + + Args: + name: Variable name. + shape: Variable shape. Defaults to scalar if unspecified. + dtype: The type of the variable. Defaults to `self.dtype`. + initializer: Initializer instance (callable). + regularizer: Regularizer instance (callable). + trainable: Boolean, whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) + or "non_trainable_variables" (e.g. BatchNorm mean and variance). + Note that `trainable` cannot be `True` if `synchronization` + is set to `ON_READ`. + constraint: Constraint instance (callable). + use_resource: Whether to use a `ResourceVariable` or not. + See [this guide]( + https://www.tensorflow.org/guide/migrate/tf1_vs_tf2#resourcevariables_instead_of_referencevariables) + for more information. + synchronization: Indicates when a distributed a variable will be + aggregated. Accepted values are constants defined in the class + `tf.VariableSynchronization`. By default the synchronization is set + to `AUTO` and the current `DistributionStrategy` chooses when to + synchronize. If `synchronization` is set to `ON_READ`, `trainable` + must not be set to `True`. + aggregation: Indicates how a distributed variable will be aggregated. + Accepted values are constants defined in the class + `tf.VariableAggregation`. + **kwargs: Additional keyword arguments. Accepted values are `getter`, + `collections`, `experimental_autocast` and `caching_device`. + + Returns: + The variable created. + + Raises: + ValueError: When giving unsupported dtype and no initializer or when + trainable has been set to True with synchronization set as + `ON_READ`. + """ + if shape is None: + shape = () + kwargs.pop("partitioner", None) # Ignored. + # Validate optional keyword arguments. + for kwarg in kwargs: + if kwarg not in [ + "collections", + "experimental_autocast", + "caching_device", + "getter", + "layout", + "experimental_enable_variable_lifting", + ]: + raise TypeError("Unknown keyword argument:", kwarg) + collections_arg = kwargs.pop("collections", None) + # 'experimental_autocast' can be set to False by the caller to indicate + # an AutoCastVariable should never be created. + autocast = kwargs.pop("experimental_autocast", True) + # See the docstring for tf.Variable about the details for + # caching_device. + caching_device = kwargs.pop("caching_device", None) + + layout = kwargs.pop("layout", None) + # Specially handling of auto layout fetch, based on the variable name + # and attribute name. For built-in keras layers, usually the variable + # name, eg 'kernel', will match with a 'kernel_layout' attribute name on + # the instance. We will try to do this auto fetch if layout is not + # explicitly specified. This is mainly a quick workaround for not + # applying too many interface change to built-in layers, until DTensor + # is a public API. Also see dtensor.utils.allow_initializer_layout for + # more details. + # TODO(scottzhu): Remove this once dtensor is public to end user. + if not layout and name: + layout = getattr(self, name + "_layout", None) + + if dtype is None: + dtype = self.dtype or backend.floatx() + dtype = tf.as_dtype(dtype) + if self._dtype_policy.variable_dtype is None: + # The policy is "_infer", so we infer the policy from the variable + # dtype. + self._set_dtype_policy(policy.Policy(dtype.base_dtype.name)) + initializer = initializers.get(initializer) + regularizer = regularizers.get(regularizer) + constraint = constraints.get(constraint) + + if synchronization == tf.VariableSynchronization.ON_READ: + if trainable: + raise ValueError( + "Synchronization value can be set to " + "VariableSynchronization.ON_READ only for non-trainable " + "variables. You have specified trainable=True and " + "synchronization=VariableSynchronization.ON_READ." + ) + else: + # Set trainable to be false when variable is to be synced on + # read. + trainable = False + elif trainable is None: + trainable = True + + # Initialize variable when no initializer provided + if initializer is None: + # If dtype is DT_FLOAT, provide a uniform unit scaling initializer + if dtype.is_floating: + initializer = initializers.get("glorot_uniform") + # If dtype is DT_INT/DT_UINT, provide a default value `zero` + # If dtype is DT_BOOL, provide a default value `FALSE` + elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool: + initializer = initializers.get("zeros") + # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX + # here? + elif "getter" not in kwargs: + # When `getter` is specified, it's possibly fine for + # `initializer` to be None since it's up to the custom `getter` + # to raise error in case it indeed needs `initializer`. + raise ValueError( + f"An initializer for variable {name} of type " + f"{dtype.base_dtype} is required for layer " + f"{self.name}. Received: {initializer}." + ) + + getter = kwargs.pop("getter", base_layer_utils.make_variable) + if ( + autocast + and self._dtype_policy.compute_dtype + != self._dtype_policy.variable_dtype + and dtype.is_floating + ): + old_getter = getter + + # Wrap variable constructor to return an AutoCastVariable. + def getter(*args, **kwargs): + variable = old_getter(*args, **kwargs) + return autocast_variable.create_autocast_variable(variable) + + # Also the caching_device does not work with the mixed precision + # API, disable it if it is specified. + # TODO(b/142020079): Re-enable it once the bug is fixed. + if caching_device is not None: + tf_logging.warning( + "`caching_device` does not work with mixed precision API. " + "Ignoring user specified `caching_device`." + ) + caching_device = None + if layout: + getter = functools.partial(getter, layout=layout) + + variable = self._add_variable_with_custom_getter( + name=name, + shape=shape, + # TODO(allenl): a `make_variable` equivalent should be added as a + # `Trackable` method. + getter=getter, + # Manage errors in Layer rather than Trackable. + overwrite=True, + initializer=initializer, + dtype=dtype, + constraint=constraint, + trainable=trainable, + use_resource=use_resource, + collections=collections_arg, + synchronization=synchronization, + aggregation=aggregation, + caching_device=caching_device, + ) + if regularizer is not None: + # TODO(fchollet): in the future, this should be handled at the + # level of variable creation, and weight regularization losses + # should be variable attributes. + name_in_scope = variable.name[: variable.name.find(":")] + self._handle_weight_regularization( + name_in_scope, variable, regularizer + ) + if base_layer_utils.is_split_variable(variable): + for v in variable: + backend.track_variable(v) + if trainable: + self._trainable_weights.append(v) + else: + self._non_trainable_weights.append(v) + else: + backend.track_variable(variable) + if trainable: + self._trainable_weights.append(variable) + else: + self._non_trainable_weights.append(variable) + return variable + + def __new__(cls, *args, **kwargs): + # Generate a config to be returned by default by `get_config()`. + arg_names = tf_inspect.getfullargspec(cls.__init__).args + kwargs.update(dict(zip(arg_names[1 : len(args) + 1], args))) + instance = super(Layer, cls).__new__(cls, *args, **kwargs) + # For safety, we only rely on auto-configs for a small set of + # serializable types. + supported_types = (str, int, float, bool, type(None)) + try: + flat_arg_values = tf.nest.flatten(kwargs) + auto_get_config = True + for value in flat_arg_values: + if not isinstance(value, supported_types): + auto_get_config = False + break + except TypeError: + auto_get_config = False + try: + instance._auto_get_config = auto_get_config + if auto_get_config: + instance._auto_config = serialization_lib.Config(**kwargs) + except RecursionError: + # Setting an instance attribute in __new__ has the potential + # to trigger an infinite recursion if a subclass overrides + # setattr in an unsafe way. + pass + return instance + + @generic_utils.default + def get_config(self): + """Returns the config of the layer. + + A layer config is a Python dictionary (serializable) + containing the configuration of a layer. + The same layer can be reinstantiated later + (without its trained weights) from this configuration. + + The config of a layer does not include connectivity + information, nor the layer class name. These are handled + by `Network` (one layer of abstraction above). + + Note that `get_config()` does not guarantee to return a fresh copy of + dict every time it is called. The callers should make a copy of the + returned dict if they want to modify it. + + Returns: + Python dictionary. + """ + config = { + "name": self.name, + "trainable": self.trainable, + } + config["dtype"] = policy.serialize(self._dtype_policy) + if hasattr(self, "_batch_input_shape"): + config["batch_input_shape"] = self._batch_input_shape + + if not generic_utils.is_default(self.get_config): + # In this case the subclass implements get_config() + return config + + # In this case the subclass doesn't implement get_config(): + # Let's see if we can autogenerate it. + if getattr(self, "_auto_get_config", False): + xtra_args = set(config.keys()) + config.update(self._auto_config.config) + # Remove args non explicitly supported + argspec = tf_inspect.getfullargspec(self.__init__) + if argspec.varkw != "kwargs": + for key in xtra_args - xtra_args.intersection(argspec.args[1:]): + config.pop(key, None) + return config + else: + raise NotImplementedError( + textwrap.dedent( + f""" + Layer {self.__class__.__name__} was created by passing + non-serializable argument values in `__init__()`, + and therefore the layer must override `get_config()` in + order to be serializable. Please implement `get_config()`. + + Example: + + class CustomLayer(keras.layers.Layer): + def __init__(self, arg1, arg2, **kwargs): + super().__init__(**kwargs) + self.arg1 = arg1 + self.arg2 = arg2 + + def get_config(self): + config = super().get_config() + config.update({{ + "arg1": self.arg1, + "arg2": self.arg2, + }}) + return config""" + ) + ) + + @classmethod + def from_config(cls, config): + """Creates a layer from its config. + + This method is the reverse of `get_config`, + capable of instantiating the same layer from the config + dictionary. It does not handle layer connectivity + (handled by Network), nor weights (handled by `set_weights`). + + Args: + config: A Python dictionary, typically the + output of get_config. + + Returns: + A layer instance. + """ + try: + return cls(**config) + except Exception as e: + raise TypeError( + f"Error when deserializing class '{cls.__name__}' using " + f"config={config}.\n\nException encountered: {e}" + ) + + def compute_output_shape(self, input_shape): + """Computes the output shape of the layer. + + This method will cause the layer's state to be built, if that has not + happened before. This requires that the layer will later be used with + inputs that match the input shape provided here. + + Args: + input_shape: Shape tuple (tuple of integers) or `tf.TensorShape`, + or structure of shape tuples / `tf.TensorShape` instances + (one per output tensor of the layer). + Shape tuples can include None for free dimensions, + instead of an integer. + + Returns: + A `tf.TensorShape` instance + or structure of `tf.TensorShape` instances. + """ + if tf.executing_eagerly(): + # In this case we build the model first in order to do shape + # inference. This is acceptable because the framework only calls + # `compute_output_shape` on shape values that the layer would later + # be built for. It would however cause issues in case a user + # attempts to use `compute_output_shape` manually with shapes that + # are incompatible with the shape the Layer will be called on (these + # users will have to implement `compute_output_shape` themselves). + self._maybe_build(input_shape) + graph_name = str(self.name) + "_scratch_graph" + with tf.__internal__.FuncGraph(graph_name).as_default(): + input_shape = tf_utils.convert_shapes( + input_shape, to_tuples=False + ) + + def _make_placeholder_like(shape): + ph = backend.placeholder(shape=shape, dtype=self.dtype) + ph._keras_mask = None + return ph + + inputs = tf.nest.map_structure( + _make_placeholder_like, input_shape + ) + try: + outputs = self(inputs, training=False) + except TypeError as e: + raise NotImplementedError( + "We could not automatically infer the static shape of " + "the layer's output. Please implement the " + "`compute_output_shape` method on your layer (%s)." + % self.__class__.__name__ + ) from e + return tf.nest.map_structure(lambda t: t.shape, outputs) + raise NotImplementedError( + "Please run in eager mode or implement the `compute_output_shape` " + "method on your layer (%s)." % self.__class__.__name__ + ) + + @doc_controls.for_subclass_implementers + def compute_output_signature(self, input_signature): + """Compute the output tensor signature of the layer based on the inputs. + + Unlike a TensorShape object, a TensorSpec object contains both shape + and dtype information for a tensor. This method allows layers to provide + output dtype information if it is different from the input dtype. + For any layer that doesn't implement this function, + the framework will fall back to use `compute_output_shape`, and will + assume that the output dtype matches the input dtype. + + Args: + input_signature: Single TensorSpec or nested structure of TensorSpec + objects, describing a candidate input for the layer. + + Returns: + Single TensorSpec or nested structure of TensorSpec objects, + describing how the layer would transform the provided input. + + Raises: + TypeError: If input_signature contains a non-TensorSpec object. + """ + + def check_type_return_shape(s): + if not isinstance(s, tf.TensorSpec): + raise TypeError( + "Only TensorSpec signature types are supported. " + f"Received: {s}." + ) + return s.shape + + input_shape = tf.nest.map_structure( + check_type_return_shape, input_signature + ) + output_shape = self.compute_output_shape(input_shape) - Alias of `self.weights`. + try: + dtype = self.output.dtype + except AttributeError: + dtype = self._compute_dtype + + if dtype is None: + input_dtypes = [s.dtype for s in tf.nest.flatten(input_signature)] + # Default behavior when self.dtype is None, is to use the first + # input's dtype. + dtype = input_dtypes[0] + return tf.nest.map_structure( + lambda s: tf.TensorSpec(dtype=dtype, shape=s), output_shape + ) + + @generic_utils.default + def compute_mask(self, inputs, mask=None): + """Computes an output mask tensor. + + Args: + inputs: Tensor or list of tensors. + mask: Tensor or list of tensors. + + Returns: + None or a tensor (or list of tensors, + one per output tensor of the layer). + """ + if not self._supports_masking: + if any(m is not None for m in tf.nest.flatten(mask)): + raise TypeError( + "Layer " + self.name + " does not support masking, " + "but was passed an input_mask: " + str(mask) + ) + # masking not explicitly supported: return None as mask. + return None + # if masking is explicitly supported, by default + # carry over the input mask + return mask + + @traceback_utils.filter_traceback + def __call__(self, *args, **kwargs): + """Wraps `call`, applying pre- and post-processing steps. + + Args: + *args: Positional arguments to be passed to `self.call`. + **kwargs: Keyword arguments to be passed to `self.call`. + + Returns: + Output tensor(s). + + Note: + - The following optional keyword arguments are reserved for specific + uses: + * `training`: Boolean scalar tensor of Python boolean indicating + whether the `call` is meant for training or inference. + * `mask`: Boolean input mask. + - If the layer's `call` method takes a `mask` argument (as some Keras + layers do), its default value will be set to the mask generated + for `inputs` by the previous layer (if `input` did come from + a layer that generated a corresponding mask, i.e. if it came from + a Keras layer with masking support. + - If the layer is not built, the method will call `build`. + + Raises: + ValueError: if the layer's `call` method returns None (an invalid + value). + RuntimeError: if `super().__init__()` was not called in the + constructor. + """ + if not hasattr(self, "_thread_local"): + raise RuntimeError( + "You must call `super().__init__()` in the layer constructor." + ) + + # `inputs` (the first arg in the method spec) is special cased in + # layer call due to historical reasons. + # This special casing currently takes the form of: + # - 'inputs' must be explicitly passed. A layer cannot have zero + # arguments, and inputs cannot have been provided via the default + # value of a kwarg. + # - numpy/scalar values in `inputs` get converted to tensors + # - implicit masks / mask metadata are only collected from 'inputs` + # - Layers are built using shape info from 'inputs' only + # - input_spec compatibility is only checked against `inputs` + # - mixed precision casting (autocast) is only applied to `inputs`, + # not to any other argument. + inputs, args, kwargs = self._call_spec.split_out_first_arg(args, kwargs) + input_list = tf.nest.flatten(inputs) + + # Functional Model construction mode is invoked when `Layer`s are called + # on symbolic `KerasTensor`s, i.e.: + # >> inputs = tf.keras.Input(10) + # >> outputs = MyLayer()(inputs) # Functional construction mode. + # >> model = tf.keras.Model(inputs, outputs) + if _in_functional_construction_mode( + self, inputs, args, kwargs, input_list + ): + return self._functional_construction_call( + inputs, args, kwargs, input_list + ) + + # Maintains info about the `Layer.call` stack. + call_context = base_layer_utils.call_context() + + # Accept NumPy and scalar inputs by converting to Tensors. + if any( + isinstance(x, (tf.Tensor, np.ndarray, float, int)) + for x in input_list + ): + inputs = tf.nest.map_structure( + _convert_numpy_or_python_types, inputs + ) + input_list = tf.nest.flatten(inputs) + + # Handle `mask` propagation from previous layer to current layer. Masks + # can be propagated explicitly via the `mask` argument, or implicitly + # via setting the `_keras_mask` attribute on the inputs to a Layer. + # Masks passed explicitly take priority. + input_masks, mask_is_implicit = self._get_input_masks( + inputs, input_list, args, kwargs + ) + if self._expects_mask_arg and mask_is_implicit: + kwargs["mask"] = input_masks + + # Training mode for `Layer.call` is set via (in order of priority): + # (1) The `training` argument passed to this `Layer.call`, if it is not + # None + # (2) The training mode of an outer `Layer.call`. + # (3) The default mode set by `tf.keras.backend.set_learning_phase` (if + # set) + # (4) Any non-None default value for `training` specified in the call + # signature + # (5) False (treating the layer as if it's in inference) + args, kwargs, training_mode = self._set_training_mode( + args, kwargs, call_context + ) + + # Losses are cleared for all sublayers on the outermost `Layer.call`. + # Losses are not cleared on inner `Layer.call`s, because sublayers can + # be called multiple times. + if not call_context.in_call: + self._clear_losses() + + eager = tf.executing_eagerly() + with call_context.enter( + layer=self, + inputs=inputs, + build_graph=not eager, + training=training_mode, + ): + input_spec.assert_input_compatibility( + self.input_spec, inputs, self.name + ) + + if eager: + call_fn = self.call + name_scope = self._name + else: + name_scope = self._get_unnested_name_scope() + call_fn = self._autographed_call() + + call_fn = traceback_utils.inject_argument_info_in_traceback( + call_fn, + object_name=( + f"layer '{self.name}' (type {self.__class__.__name__})" + ), + ) + with contextlib.ExitStack() as namescope_stack: + if _is_name_scope_on_model_declaration_enabled: + namescope_stack.enter_context( + _name_scope_unnester(self._name_scope_on_declaration) + ) + namescope_stack.enter_context(tf.name_scope(name_scope)) + + if not self.built: + self._maybe_build(inputs) + + if self._autocast: + inputs = self._maybe_cast_inputs(inputs, input_list) + + with autocast_variable.enable_auto_cast_variables( + self._compute_dtype_object + ): + outputs = call_fn(inputs, *args, **kwargs) + + if self._activity_regularizer: + self._handle_activity_regularization(inputs, outputs) + if self._supports_masking: + self._set_mask_metadata( + inputs, outputs, input_masks, not eager + ) + if self._saved_model_inputs_spec is None: + self._set_save_spec(inputs, args, kwargs) + + return outputs + + def _get_unnested_name_scope(self): + if _is_name_scope_on_model_declaration_enabled: + with _name_scope_unnester( + self._name_scope_on_declaration + ) as relative_name_scope_on_declaration: + # To avoid `tf.name_scope` autoincrement, use absolute path. + relative_name_scope = filter( + None, + [ + tf.get_current_name_scope(), + relative_name_scope_on_declaration, + ], + ) + current_name_scope = "/".join(relative_name_scope) + "/" + if current_name_scope == "/": + current_name_scope = self._name_scope_on_declaration + with tf.name_scope(current_name_scope): + name_scope = self._name_scope() # Avoid autoincrementing. + else: + name_scope = self._name_scope() + + return name_scope + + @property + def dtype(self): + """The dtype of the layer weights. + + This is equivalent to `Layer.dtype_policy.variable_dtype`. Unless + mixed precision is used, this is the same as `Layer.compute_dtype`, the + dtype of the layer's computations. + """ + return self._dtype_policy.variable_dtype + + @property + def name(self): + """Name of the layer (string), set in the constructor.""" + return self._name + + @property + def supports_masking(self): + """Whether this layer supports computing a mask using `compute_mask`.""" + return self._supports_masking + + @supports_masking.setter + def supports_masking(self, value): + self._supports_masking = value + + @property + def dynamic(self): + """Whether the layer is dynamic (eager-only); set in the constructor.""" + return any(layer._dynamic for layer in self._flatten_layers()) + + @property + @doc_controls.do_not_doc_inheritable + def stateful(self): + return any(layer._stateful for layer in self._flatten_layers()) + + @stateful.setter + def stateful(self, value): + self._stateful = value + + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, value): + """Sets trainable attribute for the layer and its sublayers. + + When this value is changed during training (e.g. with a + `tf.keras.callbacks.Callback`) you need to call the parent + `tf.keras.Model.make_train_function` with `force=True` in order to + recompile the training graph. + + Args: + value: Boolean with the desired state for the layer's trainable + attribute. + """ + for layer in self._flatten_layers(): + layer._trainable = value + + @property + def activity_regularizer(self): + """Optional regularizer function for the output of this layer.""" + return self._activity_regularizer + + @activity_regularizer.setter + def activity_regularizer(self, regularizer): + """Optional regularizer function for the output of this layer.""" + self._activity_regularizer = regularizer + + @property + def input_spec(self): + """`InputSpec` instance(s) describing the input format for this layer. + + When you create a layer subclass, you can set `self.input_spec` to + enable the layer to run input compatibility checks when it is called. + Consider a `Conv2D` layer: it can only be called on a single input + tensor of rank 4. As such, you can set, in `__init__()`: + + ```python + self.input_spec = tf.keras.layers.InputSpec(ndim=4) + ``` + + Now, if you try to call the layer on an input that isn't rank 4 + (for instance, an input of shape `(2,)`, it will raise a + nicely-formatted error: + + ``` + ValueError: Input 0 of layer conv2d is incompatible with the layer: + expected ndim=4, found ndim=1. Full shape received: [2] + ``` + + Input checks that can be specified via `input_spec` include: + - Structure (e.g. a single input, a list of 2 inputs, etc) + - Shape + - Rank (ndim) + - Dtype + + For more information, see `tf.keras.layers.InputSpec`. + + Returns: + A `tf.keras.layers.InputSpec` instance, or nested structure thereof. + """ + return self._input_spec + + @input_spec.setter + # Must be decorated to prevent tracking, since the input_spec can be nested + # InputSpec objects. + @tf.__internal__.tracking.no_automatic_dependency_tracking + def input_spec(self, value): + for v in tf.nest.flatten(value): + if v is not None and not isinstance(v, input_spec.InputSpec): + raise TypeError( + "Layer input_spec must be an instance of InputSpec. " + "Got: {}".format(v) + ) + self._input_spec = value + + @property + def trainable_weights(self): + """List of all trainable weights tracked by this layer. + + Trainable weights are updated via gradient descent during training. + + Returns: + A list of trainable variables. + """ + self._update_trackables() + if self.trainable: + children_weights = self._gather_children_attribute( + "trainable_variables" + ) + return self._dedup_weights( + self._trainable_weights + children_weights + ) + else: + return [] + + @property + def non_trainable_weights(self): + """List of all non-trainable weights tracked by this layer. + + Non-trainable weights are *not* updated during training. They are + expected to be updated manually in `call()`. + + Returns: + A list of non-trainable variables. + """ + self._update_trackables() + if self.trainable: + children_weights = self._gather_children_attribute( + "non_trainable_variables" + ) + non_trainable_weights = ( + self._non_trainable_weights + children_weights + ) + else: + children_weights = self._gather_children_attribute("variables") + non_trainable_weights = ( + self._trainable_weights + + self._non_trainable_weights + + children_weights + ) + return self._dedup_weights(non_trainable_weights) + + @property + def weights(self): + """Returns the list of all layer variables/weights. + + Returns: + A list of variables. + """ + return self.trainable_weights + self.non_trainable_weights + + @property + @doc_controls.do_not_generate_docs + def updates(self): + warnings.warn( + "`layer.updates` will be removed in a future version. " + "This property should not be used in TensorFlow 2.0, " + "as `updates` are applied automatically.", + stacklevel=2, + ) + return [] + + @property + def losses(self): + """List of losses added using the `add_loss()` API. + + Variable regularization tensors are created when this property is + accessed, so it is eager safe: accessing `losses` under a + `tf.GradientTape` will propagate gradients back to the corresponding + variables. + + Examples: + + >>> class MyLayer(tf.keras.layers.Layer): + ... def call(self, inputs): + ... self.add_loss(tf.abs(tf.reduce_mean(inputs))) + ... return inputs + >>> l = MyLayer() + >>> l(np.ones((10, 1))) + >>> l.losses + [1.0] + + >>> inputs = tf.keras.Input(shape=(10,)) + >>> x = tf.keras.layers.Dense(10)(inputs) + >>> outputs = tf.keras.layers.Dense(1)(x) + >>> model = tf.keras.Model(inputs, outputs) + >>> # Activity regularization. + >>> len(model.losses) + 0 + >>> model.add_loss(tf.abs(tf.reduce_mean(x))) + >>> len(model.losses) + 1 + + >>> inputs = tf.keras.Input(shape=(10,)) + >>> d = tf.keras.layers.Dense(10, kernel_initializer='ones') + >>> x = d(inputs) + >>> outputs = tf.keras.layers.Dense(1)(x) + >>> model = tf.keras.Model(inputs, outputs) + >>> # Weight regularization. + >>> model.add_loss(lambda: tf.reduce_mean(d.kernel)) + >>> model.losses + [] + + Returns: + A list of tensors. + """ + collected_losses = [] + for layer in self._flatten_layers(): + # If any eager losses are present, we assume the model to be part of + # an eager training loop (either a custom one or the one used when + # `run_eagerly=True`) and so we always return just the eager losses. + if layer._eager_losses: + # Filter placeholder losses that may have been added by revived + # layers. (see base_layer_utils for details). + if ( + layer._eager_losses[0] + is not base_layer_utils.REVIVED_LOSS_PLACEHOLDER + ): + collected_losses.extend(layer._eager_losses) + else: + collected_losses.extend(layer._losses) + for regularizer in layer._callable_losses: + loss_tensor = regularizer() + if loss_tensor is not None: + collected_losses.append(loss_tensor) + return collected_losses + + def add_loss(self, losses, **kwargs): + """Add loss tensor(s), potentially dependent on layer inputs. + + Some losses (for instance, activity regularization losses) may be + dependent on the inputs passed when calling a layer. Hence, when reusing + the same layer on different inputs `a` and `b`, some entries in + `layer.losses` may be dependent on `a` and some on `b`. This method + automatically keeps track of dependencies. + + This method can be used inside a subclassed layer or model's `call` + function, in which case `losses` should be a Tensor or list of Tensors. + + Example: + + ```python + class MyLayer(tf.keras.layers.Layer): + def call(self, inputs): + self.add_loss(tf.abs(tf.reduce_mean(inputs))) + return inputs + ``` + + The same code works in distributed training: the input to `add_loss()` + is treated like a regularization loss and averaged across replicas + by the training loop (both built-in `Model.fit()` and compliant custom + training loops). + + The `add_loss` method can also be called directly on a Functional Model + during construction. In this case, any loss Tensors passed to this Model + must be symbolic and be able to be traced back to the model's `Input`s. + These losses become part of the model's topology and are tracked in + `get_config`. + + Example: + + ```python + inputs = tf.keras.Input(shape=(10,)) + x = tf.keras.layers.Dense(10)(inputs) + outputs = tf.keras.layers.Dense(1)(x) + model = tf.keras.Model(inputs, outputs) + # Activity regularization. + model.add_loss(tf.abs(tf.reduce_mean(x))) + ``` + + If this is not the case for your loss (if, for example, your loss + references a `Variable` of one of the model's layers), you can wrap your + loss in a zero-argument lambda. These losses are not tracked as part of + the model's topology since they can't be serialized. + + Example: + + ```python + inputs = tf.keras.Input(shape=(10,)) + d = tf.keras.layers.Dense(10) + x = d(inputs) + outputs = tf.keras.layers.Dense(1)(x) + model = tf.keras.Model(inputs, outputs) + # Weight regularization. + model.add_loss(lambda: tf.reduce_mean(d.kernel)) + ``` + + Args: + losses: Loss tensor, or list/tuple of tensors. Rather than tensors, + losses may also be zero-argument callables which create a loss + tensor. + **kwargs: Used for backwards compatibility only. + """ + kwargs.pop("inputs", None) + if kwargs: + raise TypeError(f"Unknown keyword arguments: {kwargs.keys()}") + + def _tag_callable(loss): + """Tags callable loss tensor as `_unconditional_loss`.""" + if callable(loss): + # We run the loss without autocasting, as regularizers are often + # numerically unstable in float16. + with autocast_variable.enable_auto_cast_variables(None): + loss = loss() + if loss is None: + # Will be filtered out when computing the .losses property + return None + if not tf.is_tensor(loss): + loss = tf.convert_to_tensor(loss, dtype=backend.floatx()) + loss._unconditional_loss = True + return loss + + losses = tf.nest.flatten(losses) + + callable_losses = [] + eager_losses = [] + symbolic_losses = [] + for loss in losses: + if callable(loss): + callable_losses.append(functools.partial(_tag_callable, loss)) + continue + if loss is None: + continue + if not tf.is_tensor(loss) and not isinstance( + loss, keras_tensor.KerasTensor + ): + loss = tf.convert_to_tensor(loss, dtype=backend.floatx()) + # TF Functions should take the eager path. + if ( + tf_utils.is_symbolic_tensor(loss) + or isinstance(loss, keras_tensor.KerasTensor) + ) and not base_layer_utils.is_in_tf_function(): + symbolic_losses.append(loss) + elif tf.is_tensor(loss): + eager_losses.append(loss) + + self._callable_losses.extend(callable_losses) + + in_call_context = base_layer_utils.call_context().in_call + if eager_losses and not in_call_context: + raise ValueError( + "Expected a symbolic Tensors or a callable for the loss value. " + "Please wrap your loss computation in a zero argument `lambda`." + ) + + self._eager_losses.extend(eager_losses) + + for symbolic_loss in symbolic_losses: + if getattr(self, "_is_graph_network", False): + self._graph_network_add_loss(symbolic_loss) + else: + # Possible a loss was added in a Layer's `build`. + self._losses.append(symbolic_loss) + + @property + def metrics(self): + """List of metrics attached to the layer. + + Returns: + A list of `Metric` objects. + """ + collected_metrics = [] + for layer in self._flatten_layers(): + if not hasattr(layer, "_metrics_lock"): + continue + with layer._metrics_lock: + collected_metrics.extend(layer._metrics) + return collected_metrics + + @doc_controls.do_not_generate_docs + def add_metric(self, value, name=None, **kwargs): + """Adds metric tensor to the layer. + + This method can be used inside the `call()` method of a subclassed layer + or model. + + ```python + class MyMetricLayer(tf.keras.layers.Layer): + def __init__(self): + super(MyMetricLayer, self).__init__(name='my_metric_layer') + self.mean = tf.keras.metrics.Mean(name='metric_1') + + def call(self, inputs): + self.add_metric(self.mean(inputs)) + self.add_metric(tf.reduce_sum(inputs), name='metric_2') + return inputs + ``` + + This method can also be called directly on a Functional Model during + construction. In this case, any tensor passed to this Model must + be symbolic and be able to be traced back to the model's `Input`s. These + metrics become part of the model's topology and are tracked when you + save the model via `save()`. + + ```python + inputs = tf.keras.Input(shape=(10,)) + x = tf.keras.layers.Dense(10)(inputs) + outputs = tf.keras.layers.Dense(1)(x) + model = tf.keras.Model(inputs, outputs) + model.add_metric(math_ops.reduce_sum(x), name='metric_1') + ``` + + Note: Calling `add_metric()` with the result of a metric object on a + Functional Model, as shown in the example below, is not supported. This + is because we cannot trace the metric result tensor back to the model's + inputs. + + ```python + inputs = tf.keras.Input(shape=(10,)) + x = tf.keras.layers.Dense(10)(inputs) + outputs = tf.keras.layers.Dense(1)(x) + model = tf.keras.Model(inputs, outputs) + model.add_metric(tf.keras.metrics.Mean()(x), name='metric_1') + ``` + + Args: + value: Metric tensor. + name: String metric name. + **kwargs: Additional keyword arguments for backward compatibility. + Accepted values: + `aggregation` - When the `value` tensor provided is not the result + of calling a `keras.Metric` instance, it will be aggregated by + default using a `keras.Metric.Mean`. + """ + kwargs_keys = list(kwargs.keys()) + if len(kwargs_keys) > 1 or ( + len(kwargs_keys) == 1 and kwargs_keys[0] != "aggregation" + ): + raise TypeError( + f"Unknown keyword arguments: {kwargs.keys()}. " + "Expected `aggregation`." + ) + + from_metric_obj = hasattr(value, "_metric_obj") + is_symbolic = isinstance(value, keras_tensor.KerasTensor) + in_call_context = base_layer_utils.call_context().in_call + + if name is None and not from_metric_obj: + # Eg. `self.add_metric(math_ops.reduce_sum(x))` In eager mode, we + # use metric name to lookup a metric. Without a name, a new Mean + # metric wrapper will be created on every model/layer call. So, we + # raise an error when no name is provided. We will do the same for + # symbolic mode for consistency although a name will be generated if + # no name is provided. + + # We will not raise this error in the foll use case for the sake of + # consistency as name in provided in the metric constructor. + # mean = metrics.Mean(name='my_metric') + # model.add_metric(mean(outputs)) + raise ValueError( + "Please provide a name for your metric like " + "`self.add_metric(tf.reduce_sum(inputs), " + "name='mean_activation')`" + ) + elif from_metric_obj: + name = value._metric_obj.name + + if not in_call_context and not is_symbolic: + raise ValueError( + "Expected a symbolic Tensor for the metric value, received: " + + str(value) + ) + + # If a metric was added in a Layer's `call` or `build`. + if in_call_context or not getattr(self, "_is_graph_network", False): + # TF Function path should take the eager path. + + # If the given metric is available in `metrics` list we just update + # state on it, otherwise we create a new metric instance and + # add it to the `metrics` list. + metric_obj = getattr(value, "_metric_obj", None) + # Tensors that come from a Metric object already updated the Metric + # state. + should_update_state = not metric_obj + name = metric_obj.name if metric_obj else name + + with self._metrics_lock: + match = self._get_existing_metric(name) + if match: + metric_obj = match + elif metric_obj: + self._metrics.append(metric_obj) + else: + # Build the metric object with the value's dtype if it + # defines one + metric_obj = metrics_mod.Mean( + name=name, dtype=getattr(value, "dtype", None) + ) + self._metrics.append(metric_obj) + + if should_update_state: + metric_obj(value) + else: + if from_metric_obj: + raise ValueError( + "Using the result of calling a `Metric` object " + "when calling `add_metric` on a Functional " + "Model is not supported. Please pass the " + "Tensor to monitor directly." + ) + + # Insert layers into the Keras Graph Network. + aggregation = None if from_metric_obj else "mean" + self._graph_network_add_metric(value, aggregation, name) + + @doc_controls.do_not_doc_inheritable + def add_update(self, updates): + """Add update op(s), potentially dependent on layer inputs. + + Weight updates (for instance, the updates of the moving mean and + variance in a BatchNormalization layer) may be dependent on the inputs + passed when calling a layer. Hence, when reusing the same layer on + different inputs `a` and `b`, some entries in `layer.updates` may be + dependent on `a` and some on `b`. This method automatically keeps track + of dependencies. + + This call is ignored when eager execution is enabled (in that case, + variable updates are run on the fly and thus do not need to be tracked + for later execution). + + Args: + updates: Update op, or list/tuple of update ops, or zero-arg callable + that returns an update op. A zero-arg callable should be passed in + order to disable running the updates by setting `trainable=False` + on this Layer, when executing in Eager mode. + """ + call_context = base_layer_utils.call_context() + # No need to run updates during Functional API construction. + if call_context.in_keras_graph: + return + + # Callable updates are disabled by setting `trainable=False`. + if not call_context.frozen: + for update in tf.nest.flatten(updates): + if callable(update): + update() + + def set_weights(self, weights): + """Sets the weights of the layer, from NumPy arrays. + + The weights of a layer represent the state of the layer. This function + sets the weight values from numpy arrays. The weight values should be + passed in the order they are created by the layer. Note that the layer's + weights must be instantiated before calling this function, by calling + the layer. + + For example, a `Dense` layer returns a list of two values: the kernel + matrix and the bias vector. These can be used to set the weights of + another `Dense` layer: + + >>> layer_a = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(1.)) + >>> a_out = layer_a(tf.convert_to_tensor([[1., 2., 3.]])) + >>> layer_a.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + >>> layer_b = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(2.)) + >>> b_out = layer_b(tf.convert_to_tensor([[10., 20., 30.]])) + >>> layer_b.get_weights() + [array([[2.], + [2.], + [2.]], dtype=float32), array([0.], dtype=float32)] + >>> layer_b.set_weights(layer_a.get_weights()) + >>> layer_b.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + + Args: + weights: a list of NumPy arrays. The number + of arrays and their shape must match + number of the dimensions of the weights + of the layer (i.e. it should match the + output of `get_weights`). + + Raises: + ValueError: If the provided weights list does not match the + layer's specifications. + """ + params = self.weights + + expected_num_weights = 0 + for param in params: + if isinstance(param, base_layer_utils.TrackableWeightHandler): + expected_num_weights += param.num_tensors + else: + expected_num_weights += 1 + + if expected_num_weights != len(weights): + raise ValueError( + 'You called `set_weights(weights)` on layer "%s" ' + "with a weight list of length %s, but the layer was " + "expecting %s weights. Provided weights: %s..." + % ( + self.name, + len(weights), + expected_num_weights, + str(weights)[:50], + ) + ) + + weight_index = 0 + weight_value_tuples = [] + for param in params: + if isinstance(param, base_layer_utils.TrackableWeightHandler): + num_tensors = param.num_tensors + tensors = weights[weight_index : weight_index + num_tensors] + param.set_weights(tensors) + weight_index += num_tensors + else: + weight = weights[weight_index] + weight_shape = weight.shape if hasattr(weight, "shape") else () + ref_shape = param.shape + if not ref_shape.is_compatible_with(weight_shape): + raise ValueError( + f"Layer {self.name} weight shape {ref_shape} " + "is not compatible with provided weight " + f"shape {weight_shape}." + ) + weight_value_tuples.append((param, weight)) + weight_index += 1 + + backend.batch_set_value(weight_value_tuples) + + # Perform any layer defined finalization of the layer state. + for layer in self._flatten_layers(): + layer.finalize_state() + + def get_weights(self): + """Returns the current weights of the layer, as NumPy arrays. + + The weights of a layer represent the state of the layer. This function + returns both trainable and non-trainable weight values associated with + this layer as a list of NumPy arrays, which can in turn be used to load + state into similarly parameterized layers. + + For example, a `Dense` layer returns a list of two values: the kernel + matrix and the bias vector. These can be used to set the weights of + another `Dense` layer: + + >>> layer_a = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(1.)) + >>> a_out = layer_a(tf.convert_to_tensor([[1., 2., 3.]])) + >>> layer_a.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + >>> layer_b = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(2.)) + >>> b_out = layer_b(tf.convert_to_tensor([[10., 20., 30.]])) + >>> layer_b.get_weights() + [array([[2.], + [2.], + [2.]], dtype=float32), array([0.], dtype=float32)] + >>> layer_b.set_weights(layer_a.get_weights()) + >>> layer_b.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + + Returns: + Weights values as a list of NumPy arrays. + """ + weights = self.weights + output_weights = [] + for weight in weights: + if isinstance(weight, base_layer_utils.TrackableWeightHandler): + output_weights.extend(weight.get_tensors()) + else: + output_weights.append(weight) + return backend.batch_get_value(output_weights) + + @doc_controls.do_not_generate_docs + def finalize_state(self): + """Finalizes the layers state after updating layer weights. + + This function can be subclassed in a layer and will be called after + updating a layer weights. It can be overridden to finalize any + additional layer state after a weight update. + + This function will be called after weights of a layer have been restored + from a loaded model. + """ + pass - Note: This will not track the weights of nested `tf.Modules` that are not - themselves Keras layers. + @doc_controls.do_not_doc_inheritable + def get_input_mask_at(self, node_index): + """Retrieves the input mask tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A mask tensor + (or list of tensors if the layer has multiple inputs). + """ + inputs = self.get_input_at(node_index) + if isinstance(inputs, list): + return [getattr(x, "_keras_mask", None) for x in inputs] + else: + return getattr(inputs, "_keras_mask", None) + + @doc_controls.do_not_doc_inheritable + def get_output_mask_at(self, node_index): + """Retrieves the output mask tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A mask tensor + (or list of tensors if the layer has multiple outputs). + """ + output = self.get_output_at(node_index) + if isinstance(output, list): + return [getattr(x, "_keras_mask", None) for x in output] + else: + return getattr(output, "_keras_mask", None) + + @property + @doc_controls.do_not_doc_inheritable + def input_mask(self): + """Retrieves the input mask tensor(s) of a layer. + + Only applicable if the layer has exactly one inbound node, + i.e. if it is connected to one incoming layer. + + Returns: + Input mask tensor (potentially None) or list of input + mask tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + """ + inputs = self.input + if isinstance(inputs, list): + return [getattr(x, "_keras_mask", None) for x in inputs] + else: + return getattr(inputs, "_keras_mask", None) + + @property + @doc_controls.do_not_doc_inheritable + def output_mask(self): + """Retrieves the output mask tensor(s) of a layer. + + Only applicable if the layer has exactly one inbound node, + i.e. if it is connected to one incoming layer. + + Returns: + Output mask tensor (potentially None) or list of output + mask tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + """ + output = self.output + if isinstance(output, list): + return [getattr(x, "_keras_mask", None) for x in output] + else: + return getattr(output, "_keras_mask", None) + + @doc_controls.do_not_doc_inheritable + def get_input_shape_at(self, node_index): + """Retrieves the input shape(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A shape tuple + (or list of shape tuples if the layer has multiple inputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "input_shapes", "input shape" + ) + + @doc_controls.do_not_doc_inheritable + def get_output_shape_at(self, node_index): + """Retrieves the output shape(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A shape tuple + (or list of shape tuples if the layer has multiple outputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "output_shapes", "output shape" + ) + + @doc_controls.do_not_doc_inheritable + def get_input_at(self, node_index): + """Retrieves the input tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first input node of the layer. + + Returns: + A tensor (or list of tensors if the layer has multiple inputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "input_tensors", "input" + ) + + @doc_controls.do_not_doc_inheritable + def get_output_at(self, node_index): + """Retrieves the output tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first output node of the layer. + + Returns: + A tensor (or list of tensors if the layer has multiple outputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "output_tensors", "output" + ) + + @property + def input(self): + """Retrieves the input tensor(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer. + + Returns: + Input tensor or list of input tensors. + + Raises: + RuntimeError: If called in Eager mode. + AttributeError: If no inbound nodes are found. + """ + if not self._inbound_nodes: + raise AttributeError( + "Layer " + self.name + " is not connected, no input to return." + ) + return self._get_node_attribute_at_index(0, "input_tensors", "input") + + @property + def output(self): + """Retrieves the output tensor(s) of a layer. + + Only applicable if the layer has exactly one output, + i.e. if it is connected to one incoming layer. + + Returns: + Output tensor or list of output tensors. + + Raises: + AttributeError: if the layer is connected to more than one incoming + layers. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError( + "Layer " + self.name + " has no inbound nodes." + ) + return self._get_node_attribute_at_index(0, "output_tensors", "output") + + @property + @doc_controls.do_not_doc_inheritable + def input_shape(self): + """Retrieves the input shape(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer, or if all inputs + have the same shape. + + Returns: + Input shape, as an integer shape tuple + (or list of shape tuples, one tuple per input tensor). + + Raises: + AttributeError: if the layer has no defined input_shape. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError( + f'The layer "{self.name}" has never been called ' + "and thus has no defined input shape. Note that the " + "`input_shape` property is only available for " + "Functional and Sequential models." + ) + all_input_shapes = set( + [str(node.input_shapes) for node in self._inbound_nodes] + ) + if len(all_input_shapes) == 1: + return self._inbound_nodes[0].input_shapes + else: + raise AttributeError( + 'The layer "' + + str(self.name) + + '" has multiple inbound nodes, ' + "with different input shapes. Hence " + 'the notion of "input shape" is ' + "ill-defined for the layer. " + "Use `get_input_shape_at(node_index)` " + "instead." + ) + + def count_params(self): + """Count the total number of scalars composing the weights. + + Returns: + An integer count. + + Raises: + ValueError: if the layer isn't yet built + (in which case its weights aren't yet defined). + """ + if not self.built: + if getattr(self, "_is_graph_network", False): + with tf_utils.maybe_init_scope(self): + self._maybe_build(self.inputs) + else: + raise ValueError( + "You tried to call `count_params` " + f"on layer {self.name}" + ", but the layer isn't built. " + "You can build it manually via: " + f"`{self.name}.build(batch_input_shape)`." + ) + return layer_utils.count_params(self.weights) + + @property + @doc_controls.do_not_doc_inheritable + def output_shape(self): + """Retrieves the output shape(s) of a layer. + + Only applicable if the layer has one output, + or if all outputs have the same shape. + + Returns: + Output shape, as an integer shape tuple + (or list of shape tuples, one tuple per output tensor). + + Raises: + AttributeError: if the layer has no defined output shape. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError( + f'The layer "{self.name}" has never been called ' + "and thus has no defined output shape." + ) + all_output_shapes = set( + [str(node.output_shapes) for node in self._inbound_nodes] + ) + if len(all_output_shapes) == 1: + return self._inbound_nodes[0].output_shapes + else: + raise AttributeError( + 'The layer "%s"' + " has multiple inbound nodes, " + "with different output shapes. Hence " + 'the notion of "output shape" is ' + "ill-defined for the layer. " + "Use `get_output_shape_at(node_index)` " + "instead." % self.name + ) + + @property + def dtype_policy(self): + """The dtype policy associated with this layer. + + This is an instance of a `tf.keras.mixed_precision.Policy`. + """ + return self._dtype_policy + + @property + def compute_dtype(self): + """The dtype of the layer's computations. + + This is equivalent to `Layer.dtype_policy.compute_dtype`. Unless + mixed precision is used, this is the same as `Layer.dtype`, the dtype of + the weights. + + Layers automatically cast their inputs to the compute dtype, which + causes computations and the output to be in the compute dtype as well. + This is done by the base Layer class in `Layer.__call__`, so you do not + have to insert these casts if implementing your own layer. + + Layers often perform certain internal computations in higher precision + when `compute_dtype` is float16 or bfloat16 for numeric stability. The + output will still typically be float16 or bfloat16 in such cases. + + Returns: + The layer's compute dtype. + """ + return self._dtype_policy.compute_dtype + + @property + def variable_dtype(self): + """Alias of `Layer.dtype`, the dtype of the weights.""" + return self.dtype + + @property + @doc_controls.do_not_doc_inheritable + def inbound_nodes(self): + """Return Functional API nodes upstream of this layer.""" + return self._inbound_nodes + + @property + @doc_controls.do_not_doc_inheritable + def outbound_nodes(self): + """Return Functional API nodes downstream of this layer.""" + return self._outbound_nodes + + ############################################################################ + # Methods & attributes below are public aliases of other methods. # + ############################################################################ + + @property + @doc_controls.do_not_generate_docs + def variables(self): + """Returns the list of all layer variables/weights. + + Alias of `self.weights`. + + Note: This will not track the weights of nested `tf.Modules` that are + not themselves Keras layers. + + Returns: + A list of variables. + """ + return self.weights + + @property + @doc_controls.do_not_generate_docs + def trainable_variables(self): + return self.trainable_weights + + @property + @doc_controls.do_not_generate_docs + def non_trainable_variables(self): + return self.non_trainable_weights + + @doc_controls.do_not_doc_inheritable + def add_variable(self, *args, **kwargs): + """Deprecated, do NOT use! Alias for `add_weight`.""" + warnings.warn( + "`layer.add_variable` is deprecated and " + "will be removed in a future version. " + "Please use the `layer.add_weight()` method instead.", + stacklevel=2, + ) + return self.add_weight(*args, **kwargs) + + def get_build_config(self): + """Returns a dictionary with the layer's input shape. + + This method returns a config dict that can be used by + `build_from_config(config)` to create all states (e.g. Variables and + Lookup tables) needed by the layer. + + By default, the config only contains the input shape that the layer + was built with. If you're writing a custom layer that creates state in + an unusual way, you should override this method to make sure this state + is already created when Keras attempts to load its value upon model + loading. + + Returns: + A dict containing the input shape associated with the layer. + """ + if self._build_input_shape is not None: + + def convert_tensorshapes(x): + if isinstance(x, tf.TensorShape) and x._dims: + return tuple(x.as_list()) + return x + + return { + "input_shape": tf.nest.map_structure( + convert_tensorshapes, self._build_input_shape + ) + } + + def build_from_config(self, config): + """Builds the layer's states with the supplied config dict. + + By default, this method calls the `build(config["input_shape"])` method, + which creates weights based on the layer's input shape in the supplied + config. If your config contains other information needed to load the + layer's state, you should override this method. + + Args: + config: Dict containing the input shape associated with this layer. + """ + input_shape = config["input_shape"] + if input_shape is not None: + self.build(input_shape) + + ############################################################################ + # Methods & attributes below are all private and only used by the framework. + ############################################################################ + + # See tf.Module for the usage of this property. + # The key for _obj_reference_counts_dict is a Trackable, which could be a + # variable or layer etc. tf.Module._flatten will fail to flatten the key + # since it is trying to convert Trackable to a string. This attribute can be + # ignored even after the fix of nest lib, since the trackable object should + # already been available as individual attributes. + # _obj_reference_counts_dict just contains a copy of them. + _TF_MODULE_IGNORED_PROPERTIES = frozenset( + itertools.chain( + ("_obj_reference_counts_dict",), + tf.Module._TF_MODULE_IGNORED_PROPERTIES, + ) + ) + + # When loading from a SavedModel, Layers typically can be revived into a + # generic Layer wrapper. Sometimes, however, layers may implement methods + # that go beyond this wrapper, as in the case of PreprocessingLayers' + # `adapt` method. When this is the case, layer implementers can override + # must_restore_from_config to return True; layers with this property must + # be restored into their actual objects (and will fail if the object is + # not available to the restoration code). + _must_restore_from_config = False + + def _get_cell_name(self): + canonical_name = get_canonical_name_for_symbol( + self.__class__, api_name="keras", add_prefix_to_v1_names=True + ) + if canonical_name is not None: + return f"tf.{canonical_name}" + return self.__class__.__module__ + "." + self.__class__.__name__ + + def _instrument_layer_creation(self): + self._instrumented_keras_api = False + self._instrumented_keras_layer_class = False + self._instrumented_keras_model_class = False + if not getattr(self, "_disable_keras_instrumentation", False): + keras_api_gauge.get_cell("layer").set(True) + self._instrumented_keras_api = True + if getattr(self, "_is_model_for_instrumentation", False): + keras_models_gauge.get_cell(self._get_cell_name()).set(True) + self._instrumented_keras_model_class = True + else: + keras_layers_gauge.get_cell(self._get_cell_name()).set(True) + self._instrumented_keras_layer_class = True + else: + # This is a legacy layer that has disabled instrumentation + # as a native keras object. We still instrument this as + # legacy usage. + keras_api_gauge.get_cell("legacy_layer").set(True) + + @doc_controls.for_subclass_implementers + def _add_trackable(self, trackable_object, trainable): + """Adds a Trackable object to this layer's state. + + Args: + trackable_object: The tf.tracking.Trackable object to add. + trainable: Boolean, whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) or + "non_trainable_variables" (e.g. BatchNorm mean and variance). + + Returns: + The TrackableWeightHandler used to track this object. + """ + if isinstance( + trackable_object, base_layer_utils.TrackableWeightHandler + ): + handler = trackable_object + else: + handler = base_layer_utils.TrackableWeightHandler(trackable_object) + if trainable: + self._trainable_weights.append(handler) + else: + self._non_trainable_weights.append(handler) + return handler + + def _clear_losses(self): + """Used every step in eager to reset losses.""" + # Set to thread local directly to avoid Layer.__setattr__ overhead. + if not getattr( + self, "_self_tracked_trackables", None + ): # Fast path for single Layer. + self._thread_local._eager_losses = [] + else: + for layer in self._flatten_layers(): + layer._thread_local._eager_losses = [] + + def _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs): + if self.dynamic: + # We will use static shape inference to return symbolic tensors + # matching the specifications of the layer outputs. + # Since `self.dynamic` is True, we will never attempt to + # run the underlying TF graph (which is disconnected). + # TODO(fchollet): consider py_func as an alternative, which + # would enable us to run the underlying graph if needed. + input_signature = tf.nest.map_structure( + lambda x: tf.TensorSpec(shape=x.shape, dtype=x.dtype), inputs + ) + output_signature = self.compute_output_signature(input_signature) + return tf.nest.map_structure( + keras_tensor.KerasTensor, output_signature + ) + else: + return self._infer_output_signature( + inputs, args, kwargs, input_masks + ) - Returns: - A list of variables. - """ - return self.weights - - @property - @doc_controls.do_not_generate_docs - def trainable_variables(self): - return self.trainable_weights - - @property - @doc_controls.do_not_generate_docs - def non_trainable_variables(self): - return self.non_trainable_weights - - @doc_controls.do_not_doc_inheritable - def add_variable(self, *args, **kwargs): - """Deprecated, do NOT use! Alias for `add_weight`.""" - warnings.warn( - '`layer.add_variable` is deprecated and ' - 'will be removed in a future version. ' - 'Please use the `layer.add_weight()` method instead.', - stacklevel=2) - return self.add_weight(*args, **kwargs) - - ############################################################################## - # Methods & attributes below are all private and only used by the framework. # - ############################################################################## - - # See tf.Module for the usage of this property. - # The key for _obj_reference_counts_dict is a Trackable, which could be a - # variable or layer etc. tf.Module._flatten will fail to flatten the key - # since it is trying to convert Trackable to a string. This attribute can be - # ignored even after the fix of nest lib, since the trackable object should - # already been available as individual attributes. _obj_reference_counts_dict - # just contains a copy of them. - _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain( - ('_obj_reference_counts_dict',), - tf.Module._TF_MODULE_IGNORED_PROPERTIES - )) - - # When loading from a SavedModel, Layers typically can be revived into a - # generic Layer wrapper. Sometimes, however, layers may implement methods - # that go beyond this wrapper, as in the case of PreprocessingLayers' - # `adapt` method. When this is the case, layer implementers can override - # must_restore_from_config to return True; layers with this property must - # be restored into their actual objects (and will fail if the object is - # not available to the restoration code). - _must_restore_from_config = False - - def _get_cell_name(self): - canonical_name = get_canonical_name_for_symbol( - self.__class__, api_name='keras', add_prefix_to_v1_names=True) - if canonical_name is not None: - return 'tf.{}'.format(canonical_name) - return self.__class__.__module__ + '.' + self.__class__.__name__ - - def _instrument_layer_creation(self): - self._instrumented_keras_api = False - self._instrumented_keras_layer_class = False - self._instrumented_keras_model_class = False - if not getattr(self, '_disable_keras_instrumentation', False): - keras_api_gauge.get_cell('layer').set(True) - self._instrumented_keras_api = True - if getattr(self, '_is_model_for_instrumentation', False): - keras_models_gauge.get_cell(self._get_cell_name()).set(True) - self._instrumented_keras_model_class = True - else: - keras_layers_gauge.get_cell(self._get_cell_name()).set(True) - self._instrumented_keras_layer_class = True - else: - # This is a legacy layer that has disabled instrumentation - # as a native keras object. We still instrument this as - # legacy usage. - keras_api_gauge.get_cell('legacy_layer').set(True) - - @doc_controls.for_subclass_implementers - def _add_trackable(self, trackable_object, trainable): - """Adds a Trackable object to this layer's state. + def _infer_output_signature(self, inputs, args, kwargs, input_masks): + """Call the layer on input KerasTensors, returns output KerasTensors.""" - Args: - trackable_object: The tf.tracking.Trackable object to add. - trainable: Boolean, whether the variable should be part of the layer's - "trainable_variables" (e.g. variables, biases) or - "non_trainable_variables" (e.g. BatchNorm mean and variance). + keras_tensor_inputs = inputs + call_fn = self.call + # Wrapping `call` function in autograph to allow for dynamic control + # flow and control dependencies in call. We are limiting this to + # subclassed layers as autograph is strictly needed only for + # subclassed layers and models. + # tf_convert will respect the value of autograph setting in the + # enclosing tf.function, if any. + if base_layer_utils.is_subclassed( + self + ) and not base_layer_utils.from_saved_model(self): + call_fn = tf.__internal__.autograph.tf_convert( + self.call, tf.__internal__.autograph.control_status_ctx() + ) + + call_fn = traceback_utils.inject_argument_info_in_traceback( + call_fn, + object_name=f'layer "{self.name}" (type {self.__class__.__name__})', + ) + + # We enter a scratch graph and build placeholder inputs inside of it + # that match the input args. + # We then call the layer inside of the scratch graph to identify the + # output signatures, then we build KerasTensors corresponding to those + # outputs. + scratch_graph = tf.__internal__.FuncGraph( + str(self.name) + "_scratch_graph" + ) + with scratch_graph.as_default(): + inputs = tf.nest.map_structure( + keras_tensor.keras_tensor_to_placeholder, inputs + ) + args = tf.nest.map_structure( + keras_tensor.keras_tensor_to_placeholder, args + ) + kwargs = tf.nest.map_structure( + keras_tensor.keras_tensor_to_placeholder, kwargs + ) + input_masks = tf.nest.map_structure( + keras_tensor.keras_tensor_to_placeholder, input_masks + ) + + with backend.name_scope(self._name_scope()): + with autocast_variable.enable_auto_cast_variables( + self._compute_dtype_object + ): + # Build layer if applicable (if the `build` method has been + # overridden). + # TODO(kaftan): do we maybe_build here, or have we already + # done it? + self._maybe_build(inputs) + inputs = self._maybe_cast_inputs(inputs) + outputs = call_fn(inputs, *args, **kwargs) + + self._handle_activity_regularization(inputs, outputs) + self._set_mask_metadata( + inputs, outputs, input_masks, build_graph=False + ) + outputs = tf.nest.map_structure( + keras_tensor.keras_tensor_from_tensor, outputs + ) + + self._set_save_spec(keras_tensor_inputs, args, kwargs) + if hasattr(self, "_set_inputs") and not self.inputs: + # TODO(kaftan): figure out if we need to do this at all + # Subclassed network: explicitly set metadata normally set by + # a call to self._set_inputs(). + self._set_inputs(inputs, outputs) + del scratch_graph + return outputs - Returns: - The TrackableWeightHandler used to track this object. - """ - if isinstance(trackable_object, base_layer_utils.TrackableWeightHandler): - handler = trackable_object - else: - handler = base_layer_utils.TrackableWeightHandler(trackable_object) - if trainable: - self._trainable_weights.append(handler) - else: - self._non_trainable_weights.append(handler) - return handler - - def _clear_losses(self): - """Used every step in eager to reset losses.""" - # Set to thread local directly to avoid Layer.__setattr__ overhead. - if not getattr(self, '_self_tracked_trackables', - None): # Fast path for single Layer. - self._thread_local._eager_losses = [] - else: - for layer in self._flatten_layers(): - layer._thread_local._eager_losses = [] - - def _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs): - if self.dynamic: - # We will use static shape inference to return symbolic tensors - # matching the specifications of the layer outputs. - # Since `self.dynamic` is True, we will never attempt to - # run the underlying TF graph (which is disconnected). - # TODO(fchollet): consider py_func as an alternative, which - # would enable us to run the underlying graph if needed. - input_signature = tf.nest.map_structure( - lambda x: tf.TensorSpec(shape=x.shape, dtype=x.dtype), - inputs) - output_signature = self.compute_output_signature(input_signature) - return tf.nest.map_structure(keras_tensor.KerasTensor, output_signature) - else: - return self._infer_output_signature(inputs, args, kwargs, input_masks) - - def _infer_output_signature(self, inputs, args, kwargs, input_masks): - """Call the layer on input KerasTensors and returns output KerasTensors.""" - - keras_tensor_inputs = inputs - call_fn = self.call - # Wrapping `call` function in autograph to allow for dynamic control - # flow and control dependencies in call. We are limiting this to - # subclassed layers as autograph is strictly needed only for - # subclassed layers and models. - # tf_convert will respect the value of autograph setting in the - # enclosing tf.function, if any. - if (base_layer_utils.is_subclassed(self) and - not base_layer_utils.from_saved_model(self)): - call_fn = tf.__internal__.autograph.tf_convert( - self.call, tf.__internal__.autograph.control_status_ctx()) - - call_fn = traceback_utils.inject_argument_info_in_traceback( - call_fn, - object_name=f'layer "{self.name}" (type {self.__class__.__name__})') - - # We enter a scratch graph and build placeholder inputs inside of it that - # match the input args. - # We then call the layer inside of the scratch graph to identify the - # output signatures, then we build KerasTensors corresponding to those - # outputs. - scratch_graph = tf.__internal__.FuncGraph(str(self.name) + '_scratch_graph') - with scratch_graph.as_default(): - inputs = tf.nest.map_structure( - keras_tensor.keras_tensor_to_placeholder, inputs) - args = tf.nest.map_structure( - keras_tensor.keras_tensor_to_placeholder, args) - kwargs = tf.nest.map_structure( - keras_tensor.keras_tensor_to_placeholder, kwargs) - input_masks = tf.nest.map_structure( - keras_tensor.keras_tensor_to_placeholder, input_masks) - - with backend.name_scope(self._name_scope()): # pylint: disable=not-callable - with autocast_variable.enable_auto_cast_variables( - self._compute_dtype_object): - # Build layer if applicable (if the `build` method has been - # overridden). - # TODO(kaftan): do we maybe_build here, or have we already done it? - self._maybe_build(inputs) - inputs = self._maybe_cast_inputs(inputs) - outputs = call_fn(inputs, *args, **kwargs) - - self._handle_activity_regularization(inputs, outputs) - self._set_mask_metadata(inputs, outputs, input_masks, - build_graph=False) - outputs = tf.nest.map_structure( - keras_tensor.keras_tensor_from_tensor, outputs) - - self._set_save_spec(keras_tensor_inputs, args, kwargs) - if hasattr(self, '_set_inputs') and not self.inputs: - # TODO(kaftan): figure out if we need to do this at all - # Subclassed network: explicitly set metadata normally set by - # a call to self._set_inputs(). - self._set_inputs(inputs, outputs) - del scratch_graph - return outputs - - def _functional_construction_call(self, inputs, args, kwargs, input_list): - call_context = base_layer_utils.call_context() - - # Accept NumPy and scalar inputs by converting to Tensors. - if any(isinstance(x, ( - tf.Tensor, np.ndarray, float, int)) for x in input_list): - - def _convert_non_tensor(x): - # Don't call `ops.convert_to_tensor` on all `inputs` because - # `SparseTensors` can't be converted to `Tensor`. - if isinstance(x, (tf.Tensor, np.ndarray, float, int)): - return tf.convert_to_tensor(x) - return x - - inputs = tf.nest.map_structure(_convert_non_tensor, inputs) - input_list = tf.nest.flatten(inputs) - - # Handle `mask` propagation from previous layer to current layer. Masks can - # be propagated explicitly via the `mask` argument, or implicitly via - # setting the `_keras_mask` attribute on the inputs to a Layer. Masks passed - # explicitly take priority. - mask_arg_passed_by_framework = False - input_masks, mask_is_implicit = self._get_input_masks( - inputs, input_list, args, kwargs) - if self._expects_mask_arg and mask_is_implicit: - kwargs['mask'] = input_masks - mask_arg_passed_by_framework = True - - # If `training` argument is None or not explicitly passed, - # propagate `training` value from this layer's calling layer. - training_value = None - training_arg_passed_by_framework = False - # Priority 1: `training` was explicitly passed a non-None value. - if self._call_spec.arg_was_passed('training', args, kwargs): - training_value = self._call_spec.get_arg_value('training', args, kwargs) - if not self._expects_training_arg: - kwargs.pop('training') - - if training_value is None: - # Priority 2: `training` was passed to a parent layer. - if call_context.training is not None: - training_value = call_context.training - # Priority 3: `learning_phase()` has been set. - elif backend.global_learning_phase_is_set(): - training_value = backend.learning_phase() - # Force the training_value to be bool type which matches to the contract - # for layer/model call args. - if tf.is_tensor(training_value): - training_value = tf.cast(training_value, tf.bool) + def _functional_construction_call(self, inputs, args, kwargs, input_list): + call_context = base_layer_utils.call_context() + + # Accept NumPy and scalar inputs by converting to Tensors. + if any( + isinstance(x, (tf.Tensor, np.ndarray, float, int)) + for x in input_list + ): + + def _convert_non_tensor(x): + # Don't call `ops.convert_to_tensor` on all `inputs` because + # `SparseTensors` can't be converted to `Tensor`. + if isinstance(x, (tf.Tensor, np.ndarray, float, int)): + return tf.convert_to_tensor(x) + return x + + inputs = tf.nest.map_structure(_convert_non_tensor, inputs) + input_list = tf.nest.flatten(inputs) + + # Handle `mask` propagation from previous layer to current layer. Masks + # can be propagated explicitly via the `mask` argument, or implicitly + # via setting the `_keras_mask` attribute on the inputs to a Layer. + # Masks passed explicitly take priority. + mask_arg_passed_by_framework = False + input_masks, mask_is_implicit = self._get_input_masks( + inputs, input_list, args, kwargs + ) + if self._expects_mask_arg and mask_is_implicit: + kwargs["mask"] = input_masks + mask_arg_passed_by_framework = True + + # If `training` argument is None or not explicitly passed, + # propagate `training` value from this layer's calling layer. + training_value = None + training_arg_passed_by_framework = False + # Priority 1: `training` was explicitly passed a non-None value. + if self._call_spec.arg_was_passed("training", args, kwargs): + training_value = self._call_spec.get_arg_value( + "training", args, kwargs + ) + if not self._expects_training_arg: + kwargs.pop("training") + + if training_value is None: + # Priority 2: `training` was passed to a parent layer. + if call_context.training is not None: + training_value = call_context.training + # Priority 3: `learning_phase()` has been set. + elif backend.global_learning_phase_is_set(): + training_value = backend.learning_phase() + # Force the training_value to be bool type which matches to the + # contract for layer/model call args. + if tf.is_tensor(training_value): + training_value = tf.cast(training_value, tf.bool) + else: + training_value = bool(training_value) + # Priority 4: trace layer with the default training argument + # specified in the `call` signature (or in inference mode if the + # `call` signature specifies no non-None default). + else: + training_value = self._call_spec.default_training_arg + # In cases (2), (3), (4) the training argument is passed + # automatically by the framework, and will not be hard-coded into + # the model. + if self._expects_training_arg: + args, kwargs = self._call_spec.set_arg_value( + "training", training_value, args, kwargs + ) + training_arg_passed_by_framework = True + + with call_context.enter( + layer=self, inputs=inputs, build_graph=True, training=training_value + ): + # Check input assumptions set after layer building, e.g. input + # shape. + try: + outputs = self._keras_tensor_symbolic_call( + inputs, input_masks, args, kwargs + ) + except TypeError as e: + if "DictWrapper" in str(e): + raise TypeError( + f"{self} could not be deserialized properly. Please" + " ensure that components that are Python object" + " instances (layers, models, etc.) returned by" + " `get_config()` are explicitly deserialized in the" + " model's `from_config()` method." + ) from e + else: + raise e + + if outputs is None: + raise ValueError( + "A layer's `call` method should return a " + "Tensor or a list of Tensors, not None " + "(layer: " + self.name + ")." + ) + if training_arg_passed_by_framework: + args, kwargs = self._call_spec.set_arg_value( + "training", None, args, kwargs, pop_kwarg_if_none=True + ) + if mask_arg_passed_by_framework: + kwargs.pop("mask") + # Node connectivity does not special-case the first argument. + outputs = self._set_connectivity_metadata( + (inputs,) + args, kwargs, outputs + ) + return outputs + + def _set_training_mode(self, args, kwargs, call_context): + training_mode = None + if self._expects_training_arg: + # (1) `training` was passed to this `Layer.call`. + if self._call_spec.arg_was_passed("training", args, kwargs): + training_mode = self._call_spec.get_arg_value( + "training", args, kwargs + ) + # If no `training` arg was passed, or `None` was explicitly passed, + # the framework will make a decision about the training mode is. + if training_mode is None: + call_ctx_training = call_context.training + # (2) `training` mode is inferred from an outer `Layer.call`. + if call_ctx_training is not None: + training_mode = call_ctx_training + # (3) User set `tf.keras.backend.set_learning_phase`. + elif backend.global_learning_phase_is_set(): + training_mode = backend.learning_phase() + # Ensure value is a `bool` or `tf.bool`. + if isinstance(training_mode, bool): + pass + elif tf.is_tensor(training_mode): + training_mode = tf.cast(training_mode, tf.bool) + else: + training_mode = bool(training_mode) + # (4) We default to using `call`'s default value for `training`, + # or treating the layer as if it is in inference if no non-None + # default is specified in the `call` signature. + else: + training_mode = self._call_spec.default_training_arg + + # For case (2), (3), (4) `training` arg is passed by framework. + args, kwargs = self._call_spec.set_arg_value( + "training", training_mode, args, kwargs + ) else: - training_value = bool(training_value) - # Priority 4: trace layer with the default training argument specified - # in the `call` signature (or in inference mode if the `call` signature - # specifies no non-None default). - else: - training_value = self._call_spec.default_training_arg - # In cases (2), (3), (4) the training argument is passed automatically - # by the framework, and will not be hard-coded into the model. - if self._expects_training_arg: - args, kwargs = self._call_spec.set_arg_value('training', training_value, - args, kwargs) - training_arg_passed_by_framework = True - - with call_context.enter( - layer=self, inputs=inputs, build_graph=True, training=training_value): - # Check input assumptions set after layer building, e.g. input shape. - outputs = self._keras_tensor_symbolic_call( - inputs, input_masks, args, kwargs) - - if outputs is None: - raise ValueError('A layer\'s `call` method should return a ' - 'Tensor or a list of Tensors, not None ' - '(layer: ' + self.name + ').') - if training_arg_passed_by_framework: - args, kwargs = self._call_spec.set_arg_value( - 'training', None, args, kwargs, pop_kwarg_if_none=True) - if mask_arg_passed_by_framework: - kwargs.pop('mask') - # Node connectivity does not special-case the first argument. - outputs = self._set_connectivity_metadata((inputs,) + args, kwargs, - outputs) - return outputs - - def _set_training_mode(self, args, kwargs, call_context): - training_mode = None - if self._expects_training_arg: - # (1) `training` was passed to this `Layer.call`. - if self._call_spec.arg_was_passed('training', args, kwargs): - training_mode = self._call_spec.get_arg_value('training', args, kwargs) - # If no `training` arg was passed, or `None` was explicitly passed, - # the framework will make a decision about the training mode is. - if training_mode is None: - call_ctx_training = call_context.training - # (2) `training` mode is inferred from an outer `Layer.call`. - if call_ctx_training is not None: - training_mode = call_ctx_training - # (3) User set `tf.keras.backend.set_learning_phase`. - elif backend.global_learning_phase_is_set(): - training_mode = backend.learning_phase() - # Ensure value is a `bool` or `tf.bool`. - if isinstance(training_mode, bool): - pass - elif tf.is_tensor(training_mode): - training_mode = tf.cast(training_mode, tf.bool) - else: - training_mode = bool(training_mode) - # (4) We default to using `call`'s default value for `training`, - # or treating the layer as if it is in inference if no non-None default - # is specified in the `call` signature. + if "training" in kwargs: + # `training` was passed to this `Layer` but is not needed for + # `Layer.call`. It will set the default mode for inner + # `Layer.call`s. + training_mode = kwargs.pop("training") + else: + # Grab the current `training` mode from any outer `Layer.call`. + training_mode = call_context.training + + return args, kwargs, training_mode + + def _autographed_call(self): + # Wrapping `call` function in autograph to allow for dynamic control + # flow and control dependencies in call. We are limiting this to + # subclassed layers as autograph is strictly needed only for + # subclassed layers and models. + # tf_convert will respect the value of autograph setting in the + # enclosing tf.function, if any. + if base_layer_utils.is_subclassed( + self + ) and not base_layer_utils.from_saved_model(self): + return tf.__internal__.autograph.tf_convert( + self.call, tf.__internal__.autograph.control_status_ctx() + ) + else: + return self.call + + @property + def _inbound_nodes(self): + return self._inbound_nodes_value + + @_inbound_nodes.setter + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _inbound_nodes(self, value): + self._inbound_nodes_value = value + + @property + def _outbound_nodes(self): + return self._outbound_nodes_value + + @_outbound_nodes.setter + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _outbound_nodes(self, value): + self._outbound_nodes_value = value + + def _set_dtype_policy(self, dtype): + """Sets self._dtype_policy.""" + self._dtype_policy = policy.get_policy(dtype) + + # Performance optimization: cache the compute dtype as a Dtype object or + # None, so that str to Dtype conversion doesn't happen in + # Layer.__call__. + # TODO(b/157486353): Investigate returning DTypes in Policy. + if self._dtype_policy.compute_dtype: + self._compute_dtype_object = tf.as_dtype( + self._dtype_policy.compute_dtype + ) + else: + self._compute_dtype_object = None + + @property + def _compute_dtype(self): + """Deprecated alias of `compute_dtype`.""" + return self._dtype_policy.compute_dtype + + def _maybe_cast_inputs(self, inputs, input_list=None): + """Maybe casts the inputs to the compute dtype. + + If self._compute_dtype is floating-point, and self_autocast is True, + floating-point inputs are casted to self._compute_dtype. + + Args: + inputs: Input tensor, or structure of input tensors. + input_list: Flat list of input tensors. + + Returns: + `inputs`, but tensors may have been casted to self._compute_dtype + """ + if not input_list: + input_list = tf.nest.flatten(inputs) + + compute_dtype_object = self._compute_dtype_object + should_autocast = ( + self._autocast + and compute_dtype_object + and compute_dtype_object.is_floating + ) + + if should_autocast and any( + map(self._should_cast_single_input, input_list) + ): + # Only perform expensive `nest` operation when needed. + return tf.nest.map_structure(self._cast_single_input, inputs) else: - training_mode = self._call_spec.default_training_arg - - # For case (2), (3), (4) `training` arg is passed by framework. - args, kwargs = self._call_spec.set_arg_value('training', training_mode, - args, kwargs) - else: - if 'training' in kwargs: - # `training` was passed to this `Layer` but is not needed for - # `Layer.call`. It will set the default mode for inner `Layer.call`s. - training_mode = kwargs.pop('training') - else: - # Grab the current `training` mode from any outer `Layer.call`. - training_mode = call_context.training - - return args, kwargs, training_mode - - def _autographed_call(self): - # Wrapping `call` function in autograph to allow for dynamic control - # flow and control dependencies in call. We are limiting this to - # subclassed layers as autograph is strictly needed only for - # subclassed layers and models. - # tf_convert will respect the value of autograph setting in the - # enclosing tf.function, if any. - if (base_layer_utils.is_subclassed(self) and - not base_layer_utils.from_saved_model(self)): - return tf.__internal__.autograph.tf_convert( - self.call, tf.__internal__.autograph.control_status_ctx()) - else: - return self.call - - @property - def _inbound_nodes(self): - return self._inbound_nodes_value - - @_inbound_nodes.setter - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _inbound_nodes(self, value): - self._inbound_nodes_value = value - - @property - def _outbound_nodes(self): - return self._outbound_nodes_value - - @_outbound_nodes.setter - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _outbound_nodes(self, value): - self._outbound_nodes_value = value - - def _set_dtype_policy(self, dtype): - """Sets self._dtype_policy.""" - if isinstance(dtype, policy.Policy): - self._dtype_policy = dtype - elif isinstance(dtype, dict): - self._dtype_policy = policy.deserialize(dtype) - elif isinstance(dtype, str) and dtype in ('mixed_float16', - 'mixed_bfloat16'): - # The isinstance check is required since np.dtype raises an error if - # compared to a non-dtype string. - self._dtype_policy = policy.Policy(dtype) - elif dtype: - self._dtype_policy = policy.Policy(tf.as_dtype(dtype).name) - else: - self._dtype_policy = policy.global_policy() - if (self._dtype_policy.name == 'mixed_float16' and - not loss_scale_optimizer.strategy_supports_loss_scaling()): - # Although only loss scaling doesn't support certain strategies, to avoid - # confusion, we disallow the 'mixed_float16' policy with unsupported - # strategies. This is because 'mixed_float16' requires loss scaling for - # numeric stability. - strategy = tf.distribute.get_strategy() - raise ValueError('Mixed precision is not supported with the ' - 'tf.distribute.Strategy: %s. Either stop using mixed ' - 'precision by removing the use of the "%s" policy or ' - 'use a different Strategy, e.g. a MirroredStrategy.' % - (strategy.__class__.__name__, self._dtype_policy.name)) - - # Performance optimization: cache the compute dtype as a Dtype object or - # None, so that str to Dtype conversion doesn't happen in Layer.__call__. - # TODO(b/157486353): Investigate returning DTypes in Policy. - if self._dtype_policy.compute_dtype: - self._compute_dtype_object = tf.as_dtype( - self._dtype_policy.compute_dtype) - else: - self._compute_dtype_object = None - - @property - def _compute_dtype(self): - """Deprecated alias of `compute_dtype`.""" - return self._dtype_policy.compute_dtype - - def _maybe_cast_inputs(self, inputs, input_list=None): - """Maybe casts the inputs to the compute dtype. - - If self._compute_dtype is floating-point, and self_autocast is True, - floating-point inputs are casted to self._compute_dtype. + return inputs + + def _should_cast_single_input(self, x): + if isinstance(x, _AUTOCAST_TYPES): + return ( + self._compute_dtype_object + and x.dtype != self._compute_dtype_object + and x.dtype.is_floating + ) + return False + + def _cast_single_input(self, x): + """Cast a single Tensor or TensorSpec to the compute dtype.""" + if self._should_cast_single_input(x): + return tf.cast(x, self._compute_dtype_object) + else: + return x + + # _dtype used to be an attribute set in the constructor. We still expose it + # because some clients still use it. + # TODO(reedwm): Deprecate, then remove the _dtype property. + @property + def _dtype(self): + # This is equivalent to returning self.dtype . We do not return + # self.dtype as it would cause infinite recursion in a few subclasses, + # which override "dtype" to return self._dtype. + return self._dtype_policy.variable_dtype + + @_dtype.setter + def _dtype(self, value): + value = tf.as_dtype(value).name + self._set_dtype_policy(policy.Policy(value)) + + def _name_scope(self): + if not tf.__internal__.tf2.enabled(): + return self.name + name_scope = self.name + current_name_scope = tf.__internal__.get_name_scope() + if current_name_scope: + name_scope = current_name_scope + "/" + name_scope + if name_scope: + # Note that the trailing `/` prevents autogenerated + # numerical suffixes to get appended. It will also fully reset + # nested name scope (i.e. the outer name scope has no effect). + name_scope += "/" + return name_scope + + def _init_set_name(self, name, zero_based=True): + if name is None: + self._name = backend.unique_object_name( + generic_utils.to_snake_case(self.__class__.__name__), + zero_based=zero_based, + ) + elif isinstance(name, str): + backend.observe_object_name(name) + self._name = name + else: + raise TypeError( + f"Expected `name` argument to be a string, but got: {name}" + ) + + def _get_existing_metric(self, name=None): + match = [m for m in self._metrics if m.name == name] + if not match: + return + if len(match) > 1: + raise ValueError( + "Please provide different names for the metrics you have " + 'added. We found {} metrics with the name: "{}"'.format( + len(match), name + ) + ) + return match[0] + + def _handle_weight_regularization(self, name, variable, regularizer): + """Create lambdas which compute regularization losses.""" + + def _loss_for_variable(v): + """Creates a regularization loss `Tensor` for variable `v`.""" + with backend.name_scope(name + "/Regularizer"): + regularization = regularizer(v) + return regularization + + if base_layer_utils.is_split_variable(variable): + for v in variable: + self.add_loss(functools.partial(_loss_for_variable, v)) + elif isinstance(variable, lazy_variable.LazyInitVariable): + self._captured_weight_regularizer.append( + (name, variable, regularizer) + ) + else: + self.add_loss(functools.partial(_loss_for_variable, variable)) - Args: - inputs: Input tensor, or structure of input tensors. - input_list: Flat list of input tensors. + def _handle_activity_regularization(self, inputs, outputs): + # Apply activity regularization. + # Note that it should be applied every time the layer creates a new + # output, since it is output-specific. + if self._activity_regularizer: + output_list = tf.nest.flatten(outputs) + with backend.name_scope("ActivityRegularizer"): + for output in output_list: + activity_loss = tf.convert_to_tensor( + self._activity_regularizer(output) + ) + batch_size = tf.cast( + tf.shape(output)[0], activity_loss.dtype + ) + # Make activity regularization strength batch-agnostic. + mean_activity_loss = tf.math.divide_no_nan( + activity_loss, batch_size + ) + self.add_loss(mean_activity_loss) + + def _set_mask_metadata(self, inputs, outputs, previous_mask, build_graph): + # Many `Layer`s don't need to call `compute_mask`. + # This method is optimized to do as little work as needed for the common + # case. + if not self._supports_masking: + return + + flat_outputs = tf.nest.flatten(outputs) + + mask_already_computed = getattr( + self, "_compute_output_and_mask_jointly", False + ) or all( + getattr(x, "_keras_mask", None) is not None for x in flat_outputs + ) + if mask_already_computed: + if build_graph: + self._set_mask_keras_history_checked(flat_outputs) + return + + output_masks = self.compute_mask(inputs, previous_mask) + if output_masks is None: + return + + flat_masks = tf.nest.flatten(output_masks) + for tensor, mask in zip(flat_outputs, flat_masks): + try: + tensor._keras_mask = mask + except AttributeError: + # C Type such as np.ndarray. + pass + + if build_graph: + self._set_mask_keras_history_checked(flat_outputs) + + def _set_mask_keras_history_checked(self, flat_outputs): + for output in flat_outputs: + if getattr(output, "_keras_mask", None) is not None: + # Do not track masks for `TensorFlowOpLayer` construction. + output._keras_mask._keras_history_checked = True + + def _get_input_masks(self, inputs, input_list, args, kwargs): + if not self._supports_masking and not self._expects_mask_arg: + # Input masks only need to be retrieved if they are needed for + # `call` or `compute_mask`. + input_masks = None + implicit_mask = False + elif self._call_spec.arg_was_passed("mask", args, kwargs): + input_masks = self._call_spec.get_arg_value("mask", args, kwargs) + implicit_mask = False + else: + input_masks = [getattr(t, "_keras_mask", None) for t in input_list] + if all(mask is None for mask in input_masks): + input_masks = None + implicit_mask = False + else: + # Only do expensive `nest` op when masking is actually being + # used. + input_masks = tf.nest.pack_sequence_as(inputs, input_masks) + implicit_mask = True + return input_masks, implicit_mask + + def _set_connectivity_metadata(self, args, kwargs, outputs): + # If the layer returns tensors from its inputs unmodified, + # we copy them to avoid loss of KerasHistory metadata. + flat_outputs = tf.nest.flatten(outputs) + flat_inputs = tf.nest.flatten((args, kwargs)) + input_ids_set = {id(i) for i in flat_inputs} + outputs_copy = [] + for x in flat_outputs: + if id(x) in input_ids_set: + with backend.name_scope(self.name): + x = tf.identity(x) + outputs_copy.append(x) + outputs = tf.nest.pack_sequence_as(outputs, outputs_copy) + + # Create node, Node wires itself to inbound and outbound layers. The + # Node constructor actually updates this layer's self._inbound_nodes, + # sets _keras_history on the outputs, and adds itself to the + # `_outbound_nodes` of the layers that produced the inputs to this layer + # call. + node_module.Node( + self, call_args=args, call_kwargs=kwargs, outputs=outputs + ) + return outputs - Returns: - `inputs`, but tensors may have been casted to self._compute_dtype - """ - if not input_list: - input_list = tf.nest.flatten(inputs) - - compute_dtype_object = self._compute_dtype_object - should_autocast = ( - self._autocast and compute_dtype_object and - compute_dtype_object.is_floating) - - if (should_autocast and - any(map(self._should_cast_single_input, input_list))): - # Only perform expensive `nest` operation when needed. - return tf.nest.map_structure(self._cast_single_input, inputs) - else: - return inputs - - def _should_cast_single_input(self, x): - if isinstance(x, _AUTOCAST_TYPES): - return (self._compute_dtype_object and - x.dtype != self._compute_dtype_object and x.dtype.is_floating) - return False - - def _cast_single_input(self, x): - """Cast a single Tensor or TensorSpec to the compute dtype.""" - if self._should_cast_single_input(x): - return tf.cast(x, self._compute_dtype_object) - else: - return x - - # _dtype used to be an attribute set in the constructor. We still expose it - # because some clients still use it. - # TODO(reedwm): Deprecate, then remove the _dtype property. - @property - def _dtype(self): - # This is equivalent to returning self.dtype . We do not return self.dtype - # as it would cause infinite recursion in a few subclasses, which override - # "dtype" to return self._dtype. - return self._dtype_policy.variable_dtype - - @_dtype.setter - def _dtype(self, value): - value = tf.as_dtype(value).name - self._set_dtype_policy(policy.Policy(value)) - - def _name_scope(self): # pylint: disable=method-hidden - if not tf.__internal__.tf2.enabled(): - return self.name - name_scope = self.name - current_name_scope = tf.__internal__.get_name_scope() - if current_name_scope: - name_scope = current_name_scope + '/' + name_scope - if name_scope: - # Note that the trailing `/` prevents autogenerated - # numerical suffixes to get appended. It will also fully reset - # nested name scope (i.e. the outer name scope has no effect). - name_scope += '/' - return name_scope - - def _init_set_name(self, name, zero_based=True): - if name is None: - self._name = backend.unique_object_name( - generic_utils.to_snake_case(self.__class__.__name__), - zero_based=zero_based) - elif isinstance(name, str): - backend.observe_object_name(name) - self._name = name - else: - raise TypeError( - f'Expected `name` argument to be a string, but got: {name}') - - def _get_existing_metric(self, name=None): - match = [m for m in self._metrics if m.name == name] - if not match: - return - if len(match) > 1: - raise ValueError( - 'Please provide different names for the metrics you have added. ' - 'We found {} metrics with the name: "{}"'.format(len(match), name)) - return match[0] - - def _handle_weight_regularization(self, name, variable, regularizer): - """Create lambdas which compute regularization losses.""" - - def _loss_for_variable(v): - """Creates a regularization loss `Tensor` for variable `v`.""" - with backend.name_scope(name + '/Regularizer'): - regularization = regularizer(v) - return regularization - - if base_layer_utils.is_split_variable(variable): - for v in variable: - self.add_loss(functools.partial(_loss_for_variable, v)) - elif isinstance(variable, lazy_variable.LazyInitVariable): - self._captured_weight_regularizer.append((name, variable, regularizer)) - else: - self.add_loss(functools.partial(_loss_for_variable, variable)) - - def _handle_activity_regularization(self, inputs, outputs): - # Apply activity regularization. - # Note that it should be applied every time the layer creates a new - # output, since it is output-specific. - if self._activity_regularizer: - output_list = tf.nest.flatten(outputs) - with backend.name_scope('ActivityRegularizer'): - for output in output_list: - activity_loss = tf.convert_to_tensor( - self._activity_regularizer(output)) - batch_size = tf.cast( - tf.shape(output)[0], activity_loss.dtype) - # Make activity regularization strength batch-agnostic. - mean_activity_loss = activity_loss / batch_size - self.add_loss(mean_activity_loss) - - def _set_mask_metadata(self, inputs, outputs, previous_mask, build_graph): - # Many `Layer`s don't need to call `compute_mask`. - # This method is optimized to do as little work as needed for the common - # case. - if not self._supports_masking: - return - - flat_outputs = tf.nest.flatten(outputs) - - mask_already_computed = ( - getattr(self, '_compute_output_and_mask_jointly', False) or - all(getattr(x, '_keras_mask', None) is not None for x in flat_outputs)) - if mask_already_computed: - if build_graph: - self._set_mask_keras_history_checked(flat_outputs) - return - - output_masks = self.compute_mask(inputs, previous_mask) - if output_masks is None: - return - - flat_masks = tf.nest.flatten(output_masks) - for tensor, mask in zip(flat_outputs, flat_masks): - try: - tensor._keras_mask = mask - except AttributeError: - # C Type such as np.ndarray. - pass + def _get_node_attribute_at_index(self, node_index, attr, attr_name): + """Private utility to retrieves an attribute (e.g. inputs) from a node. + + This is used to implement the methods: + - get_input_shape_at + - get_output_shape_at + - get_input_at + etc... + + Args: + node_index: Integer index of the node from which + to retrieve the attribute. + attr: Exact node attribute name. + attr_name: Human-readable attribute name, for error messages. + + Returns: + The layer's attribute `attr` at the node of index `node_index`. + + Raises: + RuntimeError: If the layer has no inbound nodes, or if called in + Eager mode. + ValueError: If the index provided does not match any node. + """ + if not self._inbound_nodes: + raise RuntimeError( + f"The layer {self.name} has never been called " + f"and thus has no defined {attr_name}." + ) + if not len(self._inbound_nodes) > node_index: + raise ValueError( + f"Asked to get {attr_name} at node " + f"{node_index}, but the layer has only " + f"{len(self._inbound_nodes)} inbound nodes." + ) + values = getattr(self._inbound_nodes[node_index], attr) + if isinstance(values, list) and len(values) == 1: + return values[0] + else: + return values - if build_graph: - self._set_mask_keras_history_checked(flat_outputs) - - def _set_mask_keras_history_checked(self, flat_outputs): - for output in flat_outputs: - if getattr(output, '_keras_mask', None) is not None: - # Do not track masks for `TensorFlowOpLayer` construction. - output._keras_mask._keras_history_checked = True - - def _get_input_masks(self, inputs, input_list, args, kwargs): - if not self._supports_masking and not self._expects_mask_arg: - # Input masks only need to be retrieved if they are needed for `call` - # or `compute_mask`. - input_masks = None - implicit_mask = False - elif self._call_spec.arg_was_passed('mask', args, kwargs): - input_masks = self._call_spec.get_arg_value('mask', args, kwargs) - implicit_mask = False - else: - input_masks = [getattr(t, '_keras_mask', None) for t in input_list] - if all(mask is None for mask in input_masks): - input_masks = None - implicit_mask = False - else: - # Only do expensive `nest` op when masking is actually being used. - input_masks = tf.nest.pack_sequence_as(inputs, input_masks) - implicit_mask = True - return input_masks, implicit_mask - - def _set_connectivity_metadata(self, args, kwargs, outputs): - # If the layer returns tensors from its inputs unmodified, - # we copy them to avoid loss of KerasHistory metadata. - flat_outputs = tf.nest.flatten(outputs) - flat_inputs = tf.nest.flatten((args, kwargs)) - input_ids_set = {id(i) for i in flat_inputs} - outputs_copy = [] - for x in flat_outputs: - if id(x) in input_ids_set: - with backend.name_scope(self.name): - x = tf.identity(x) - outputs_copy.append(x) - outputs = tf.nest.pack_sequence_as(outputs, outputs_copy) - - # Create node, Node wires itself to inbound and outbound layers. - # The Node constructor actually updates this layer's self._inbound_nodes, - # sets _keras_history on the outputs, and adds itself to the - # `_outbound_nodes` of the layers that produced the inputs to this - # layer call. - node_module.Node(self, call_args=args, call_kwargs=kwargs, outputs=outputs) - return outputs - - def _get_node_attribute_at_index(self, node_index, attr, attr_name): - """Private utility to retrieves an attribute (e.g. inputs) from a node. - - This is used to implement the methods: - - get_input_shape_at - - get_output_shape_at - - get_input_at - etc... + def _maybe_build(self, inputs): + # Check input assumptions set before layer building, e.g. input rank. + if not self.built: + input_spec.assert_input_compatibility( + self.input_spec, inputs, self.name + ) + input_list = tf.nest.flatten(inputs) + if input_list and self._dtype_policy.compute_dtype is None: + try: + dtype = input_list[0].dtype.base_dtype.name + except AttributeError: + pass + else: + self._set_dtype_policy(policy.Policy(dtype)) + input_shapes = None + # Converts Tensors / CompositeTensors to TensorShapes. + if any(hasattr(x, "shape") for x in input_list): + input_shapes = tf_utils.get_shapes(inputs) + else: + # Converts input shape to TensorShapes. + try: + input_shapes = tf_utils.convert_shapes( + inputs, to_tuples=False + ) + except ValueError: + pass + # Only call `build` if the user has manually overridden the build + # method. + if not hasattr(self.build, "_is_default"): + # Any setup work performed only once should happen in an + # `init_scope` to avoid creating symbolic Tensors that will + # later pollute any eager operations. + with tf_utils.maybe_init_scope(self): + self.build(input_shapes) + # We must set also ensure that the layer is marked as built, and the + # build shape is stored since user defined build functions may not + # be calling `super.build()` + Layer.build(self, input_shapes) + + # Optionally load weight values specified at layer instantiation. + if self._initial_weights is not None: + with tf.init_scope(): + # Using `init_scope` since we want variable assignment in + # `set_weights` to be treated like variable initialization. + self.set_weights(self._initial_weights) + self._initial_weights = None + + def _get_trainable_state(self): + """Get the `trainable` state of each sublayer. + + Returns: + A dict mapping all sublayers to their `trainable` value. + """ + trainable_state = weakref.WeakKeyDictionary() + for layer in self._flatten_layers(): + trainable_state[layer] = layer.trainable + return trainable_state + + def _set_trainable_state(self, trainable_state): + """Set `trainable` state for each sublayer.""" + for layer in self._flatten_layers(): + if layer in trainable_state: + layer.trainable = trainable_state[layer] + + @property + def _obj_reference_counts(self): + """A dict counting the number of attributes referencing an object.""" + self._maybe_create_attribute( + "_obj_reference_counts_dict", + object_identity.ObjectIdentityDictionary(), + ) + return self._obj_reference_counts_dict + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _maybe_create_attribute(self, name, default_value): + """Create attribute (with the default value) if it hasn't been created. + + This is useful for fields that is used for tracking purpose, + _trainable_weights, or _layers. Note that user could create a layer + subclass and assign an internal field before invoking the + Layer.__init__(), the __setattr__() need to create the tracking fields + and __init__() need to not override them. + + Args: + name: String, the name of the attribute. + default_value: Object, the default value of the attribute. + """ + if not hasattr(self, name): + self.__setattr__(name, default_value) + + def __delattr__(self, name): + # For any super.__delattr__() call, we will directly use the + # implementation in Trackable and skip the behavior in AutoTrackable. + # The Layer was originally use Trackable as base class, the change of + # using Module as base class forced us to have AutoTrackable in the + # class hierarchy. + # + # TODO(b/180760306) Keeping the status quo of skipping _delattr__ and + # __setattr__ in AutoTrackable may be unsustainable. + existing_value = getattr(self, name, None) + + # If this value is replacing an existing object assigned to an + # attribute, we should clean it out to avoid leaking memory. First we + # check if there are other attributes referencing it. + reference_counts = self._obj_reference_counts + if existing_value not in reference_counts: + super(tf.__internal__.tracking.AutoTrackable, self).__delattr__( + name + ) + return + + reference_count = reference_counts[existing_value] + if reference_count > 1: + # There are other remaining references. We can't remove this object + # from _layers etc. + reference_counts[existing_value] = reference_count - 1 + super(tf.__internal__.tracking.AutoTrackable, self).__delattr__( + name + ) + return + else: + # This is the last remaining reference. + del reference_counts[existing_value] + + super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) + + if isinstance(existing_value, Layer) or base_layer_utils.has_weights( + existing_value + ): + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + "_self_tracked_trackables", + [ + l + for l in self._self_tracked_trackables + if l is not existing_value + ], + ) + if isinstance(existing_value, tf.Variable): + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + "_trainable_weights", + [w for w in self._trainable_weights if w is not existing_value], + ) + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + "_non_trainable_weights", + [ + w + for w in self._non_trainable_weights + if w is not existing_value + ], + ) + + def __setattr__(self, name, value): + if ( + name == "_self_setattr_tracking" + or not getattr(self, "_self_setattr_tracking", True) + # Exclude @property.setters from tracking + or hasattr(self.__class__, name) + ): + try: + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + name, value + ) + except AttributeError: + raise AttributeError( + ( + 'Can\'t set the attribute "{}", likely because it ' + "conflicts with an existing read-only @property of the " + "object. Please choose a different name." + ).format(name) + ) + return + + # Wraps data structures in `Trackable`, unwraps `NoDependency` objects. + value = tf.__internal__.tracking.sticky_attribute_assignment( + trackable=self, value=value, name=name + ) + + reference_counts = self._obj_reference_counts + reference_counts[value] = reference_counts.get(value, 0) + 1 + + # When replacing an existing tf.Variable with a new one, we want to + # check its existing position in the + # self._trainable/non_trainable_variable, so that we can put it back to + # the original position. + if isinstance(value, tf.Variable) and isinstance( + getattr(self, name, None), tf.Variable + ): + existing_variable = getattr(self, name) + + def _get_variable_from_list(var_list, var): + # helper function to get the tf.variable from the list + # the default list.index() use == for comparison, which will + # cause issue for eager tensor. + for i in range(len(var_list)): + if var_list[i] is var: + return i + return None + + if existing_variable.trainable: + self._maybe_create_attribute("_trainable_weights", []) + position = _get_variable_from_list( + self._trainable_weights, existing_variable + ) + else: + self._maybe_create_attribute("_non_trainable_variable", []) + position = _get_variable_from_list( + self._non_trainable_variable, existing_variable + ) + else: + position = None - Args: - node_index: Integer index of the node from which - to retrieve the attribute. - attr: Exact node attribute name. - attr_name: Human-readable attribute name, for error messages. - - Returns: - The layer's attribute `attr` at the node of index `node_index`. - - Raises: - RuntimeError: If the layer has no inbound nodes, or if called in Eager - mode. - ValueError: If the index provided does not match any node. - """ - if not self._inbound_nodes: - raise RuntimeError(f'The layer {self.name} has never been called ' - f'and thus has no defined {attr_name}.') - if not len(self._inbound_nodes) > node_index: - raise ValueError(f'Asked to get {attr_name} at node ' - f'{node_index}, but the layer has only ' - f'{len(self._inbound_nodes)} inbound nodes.') - values = getattr(self._inbound_nodes[node_index], attr) - if isinstance(values, list) and len(values) == 1: - return values[0] - else: - return values - - def _maybe_build(self, inputs): - # Check input assumptions set before layer building, e.g. input rank. - if not self.built: - input_spec.assert_input_compatibility( - self.input_spec, inputs, self.name) - input_list = tf.nest.flatten(inputs) - if input_list and self._dtype_policy.compute_dtype is None: + # Clean out the old attribute, which clears _layers and + # _trainable_weights if necessary. try: - dtype = input_list[0].dtype.base_dtype.name + self.__delattr__(name) except AttributeError: - pass - else: - self._set_dtype_policy(policy.Policy(dtype)) - input_shapes = None - # Converts Tensors / CompositeTensors to TensorShapes. - if any(hasattr(x, 'shape') for x in input_list): - input_shapes = tf_utils.get_shapes(inputs) - else: - # Converts input shape to TensorShapes. - try: - input_shapes = tf_utils.convert_shapes(inputs, to_tuples=False) - except ValueError: - pass - # Only call `build` if the user has manually overridden the build method. - if not hasattr(self.build, '_is_default'): - # Any setup work performed only once should happen in an `init_scope` - # to avoid creating symbolic Tensors that will later pollute any eager - # operations. - with tf_utils.maybe_init_scope(self): - self.build(input_shapes) # pylint:disable=not-callable - # We must set also ensure that the layer is marked as built, and the build - # shape is stored since user defined build functions may not be calling - # `super.build()` - Layer.build(self, input_shapes) - - # Optionally load weight values specified at layer instantiation. - if self._initial_weights is not None: - with tf.init_scope(): - # Using `init_scope` since we want variable assignment in - # `set_weights` to be treated like variable initialization. - self.set_weights(self._initial_weights) - self._initial_weights = None - - def _get_trainable_state(self): - """Get the `trainable` state of each sublayer. - - Returns: - A dict mapping all sublayers to their `trainable` value. - """ - trainable_state = weakref.WeakKeyDictionary() - for layer in self._flatten_layers(): - trainable_state[layer] = layer.trainable - return trainable_state - - def _set_trainable_state(self, trainable_state): - """Set `trainable` state for each sublayer.""" - for layer in self._flatten_layers(): - if layer in trainable_state: - layer.trainable = trainable_state[layer] - - @property - def _obj_reference_counts(self): - """A dictionary counting the number of attributes referencing an object.""" - self._maybe_create_attribute('_obj_reference_counts_dict', - object_identity.ObjectIdentityDictionary()) - return self._obj_reference_counts_dict - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _maybe_create_attribute(self, name, default_value): - """Create the attribute with the default value if it hasn't been created. - - This is useful for fields that is used for tracking purpose, - _trainable_weights, or _layers. Note that user could create a layer subclass - and assign an internal field before invoking the Layer.__init__(), the - __setattr__() need to create the tracking fields and __init__() need to not - override them. + pass - Args: - name: String, the name of the attribute. - default_value: Object, the default value of the attribute. - """ - if not hasattr(self, name): - self.__setattr__(name, default_value) - - def __delattr__(self, name): - # For any super.__delattr__() call, we will directly use the implementation - # in Trackable and skip the behavior in AutoTrackable. The Layer was - # originally use Trackable as base class, the change of using Module as base - # class forced us to have AutoTrackable in the class hierarchy. - # - # TODO(b/180760306) Keeping the status quo of skipping _delattr__ and - # __setattr__ in AutoTrackable may be unsustainable. - existing_value = getattr(self, name, None) - - # If this value is replacing an existing object assigned to an attribute, we - # should clean it out to avoid leaking memory. First we check if there are - # other attributes referencing it. - reference_counts = self._obj_reference_counts - if existing_value not in reference_counts: - super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) # pylint: disable=bad-super-call - return - - reference_count = reference_counts[existing_value] - if reference_count > 1: - # There are other remaining references. We can't remove this object from - # _layers etc. - reference_counts[existing_value] = reference_count - 1 - super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) # pylint: disable=bad-super-call - return - else: - # This is the last remaining reference. - del reference_counts[existing_value] - - super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) # pylint: disable=bad-super-call - - if (isinstance(existing_value, Layer) - or base_layer_utils.has_weights(existing_value)): - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( # pylint: disable=bad-super-call - '_self_tracked_trackables', - [l for l in self._self_tracked_trackables if l is not existing_value]) - if isinstance(existing_value, tf.Variable): - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( # pylint: disable=bad-super-call - '_trainable_weights', - [w for w in self._trainable_weights if w is not existing_value]) - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( # pylint: disable=bad-super-call - '_non_trainable_weights', - [w for w in self._non_trainable_weights if w is not existing_value]) - - def __setattr__(self, name, value): - if (name == '_self_setattr_tracking' or - not getattr(self, '_self_setattr_tracking', True) or - # Exclude @property.setters from tracking - hasattr(self.__class__, name)): - try: + # Keep track of metric instance created in subclassed layer. + for val in tf.nest.flatten(value): + if isinstance(val, metrics_mod.Metric) and hasattr( + self, "_metrics" + ): + self._metrics.append(val) + + # Append value to self._self_tracked_trackables if relevant + if getattr(self, "_auto_track_sub_layers", True) and ( + isinstance(value, tf.Module) or base_layer_utils.has_weights(value) + ): + self._maybe_create_attribute("_self_tracked_trackables", []) + # We need to check object identity to avoid de-duplicating empty + # container types which compare equal. + if not any( + (layer is value for layer in self._self_tracked_trackables) + ): + self._self_tracked_trackables.append(value) + if hasattr(value, "_use_resource_variables"): + # Legacy layers (V1 tf.layers) must always use + # resource variables. + value._use_resource_variables = True + + # Append value to list of trainable / non-trainable weights if relevant + # TODO(b/125122625): This won't pick up on any variables added to a + # list/dict after creation. + self._track_variables(value, position=position) + + # TODO(b/180760306) Skip the auto trackable from tf.Module to keep + # status quo. See the comment at __delattr__. super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( - name, value) # pylint: disable=bad-super-call - except AttributeError: - raise AttributeError( - ('Can\'t set the attribute "{}", likely because it conflicts with ' - 'an existing read-only @property of the object. Please choose a ' - 'different name.').format(name)) - return - - # Wraps data structures in `Trackable`, unwraps `NoDependency` objects. - value = tf.__internal__.tracking.sticky_attribute_assignment( - trackable=self, value=value, name=name) - - reference_counts = self._obj_reference_counts - reference_counts[value] = reference_counts.get(value, 0) + 1 - - # Clean out the old attribute, which clears _layers and _trainable_weights - # if necessary. - try: - self.__delattr__(name) - except AttributeError: - pass - - # Keep track of metric instance created in subclassed layer. - for val in tf.nest.flatten(value): - if isinstance(val, metrics_mod.Metric) and hasattr(self, '_metrics'): - self._metrics.append(val) - - # Append value to self._self_tracked_trackables if relevant - if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, tf.Module) or - base_layer_utils.has_weights(value))): - self._maybe_create_attribute('_self_tracked_trackables', []) - # We need to check object identity to avoid de-duplicating empty - # container types which compare equal. - if not any((layer is value for layer in self._self_tracked_trackables)): - self._self_tracked_trackables.append(value) - if hasattr(value, '_use_resource_variables'): - # Legacy layers (V1 tf.layers) must always use - # resource variables. - value._use_resource_variables = True - - # Append value to list of trainable / non-trainable weights if relevant - # TODO(b/125122625): This won't pick up on any variables added to a - # list/dict after creation. - for val in tf.nest.flatten(value, expand_composites=True): - if not isinstance(val, tf.Variable): - continue - - # Users may add extra weights/variables - # simply by assigning them to attributes (invalid for graph networks) - self._maybe_create_attribute('_trainable_weights', []) - self._maybe_create_attribute('_non_trainable_weights', []) - if val.trainable: - if any(val is w for w in self._trainable_weights): - continue - self._trainable_weights.append(val) - else: - if any(val is w for w in self._non_trainable_weights): - continue - self._non_trainable_weights.append(val) - - backend.track_variable(val) - - # TODO(b/180760306) Skip the auto trackable from tf.Module to keep status - # quo. See the comment at __delattr__. - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__(name, value) # pylint: disable=bad-super-call - - def _gather_children_attribute(self, attribute): - assert attribute in { - 'variables', 'trainable_variables', 'non_trainable_variables' - } - if hasattr(self, '_self_tracked_trackables'): - nested_layers = self._flatten_modules(include_self=False, recursive=False) - return list( - itertools.chain.from_iterable( - getattr(layer, attribute) for layer in nested_layers)) - return [] - - def _flatten_layers(self, recursive=True, include_self=True): - for m in self._flatten_modules( - recursive=recursive, include_self=include_self): - if isinstance(m, Layer): - yield m - - def _flatten_modules(self, recursive=True, include_self=True): - """Flattens `tf.Module` instances (excluding `Metrics`). - - Args: - recursive: Whether to recursively flatten through submodules. - include_self: Whether to include this `Layer` instance. + name, value + ) + + def _update_trackables(self): + """Track variables added to lists/dicts after creation""" + for trackable_obj in self._self_tracked_trackables: + if isinstance( + trackable_obj, tf.__internal__.tracking.TrackableDataStructure + ): + self._track_variables(trackable_obj) + + def _track_variables(self, value, position=None): + """Tracks `Variable`s including `Variable`s in `CompositeTensor`s.""" + for val in tf.nest.flatten(value): + if isinstance(val, tf.Variable): + self._track_variable(val, position=position) + elif tf_utils.is_extension_type(val): + # Manually expand extension types to track resource variables. + nested_vals = tf_utils.type_spec_from_value(val)._to_components( + val + ) + self._track_variables(nested_vals, position=position) + + def _track_variable(self, val, position=None): + """Tracks the given `tf.Variable`.""" + # Users may add extra weights/variables simply by assigning them to + # attributes (invalid for graph networks) + self._maybe_create_attribute("_trainable_weights", []) + self._maybe_create_attribute("_non_trainable_weights", []) + if val.trainable: + if any(val is w for w in self._trainable_weights): + return + if position is None: + self._trainable_weights.append(val) + else: + self._trainable_weights.insert(position, val) + else: + if any(val is w for w in self._non_trainable_weights): + return + if position is None: + self._non_trainable_weights.append(val) + else: + self._non_trainable_weights.insert(position, val) + backend.track_variable(val) + + def _gather_children_attribute(self, attribute): + assert attribute in { + "variables", + "trainable_variables", + "non_trainable_variables", + } + if hasattr(self, "_self_tracked_trackables"): + nested_layers = self._flatten_modules( + include_self=False, recursive=False + ) + return list( + itertools.chain.from_iterable( + getattr(layer, attribute) for layer in nested_layers + ) + ) + return [] + + def _flatten_layers(self, recursive=True, include_self=True): + for m in self._flatten_modules( + recursive=recursive, include_self=include_self + ): + if isinstance(m, Layer): + yield m + + def _flatten_modules(self, recursive=True, include_self=True): + """Flattens `tf.Module` instances (excluding `Metrics`). + + Args: + recursive: Whether to recursively flatten through submodules. + include_self: Whether to include this `Layer` instance. + + Yields: + `tf.Module` instance tracked by this `Layer`. + """ + if include_self: + yield self + + # Only instantiate set and deque if needed. + trackables = getattr(self, "_self_tracked_trackables", None) + if trackables: + seen_object_ids = set() + deque = collections.deque(trackables) + while deque: + trackable_obj = deque.popleft() + trackable_id = id(trackable_obj) + if trackable_id in seen_object_ids: + continue + seen_object_ids.add(trackable_id) + + # Metrics are not considered part of the Layer's topology. + if isinstance(trackable_obj, tf.Module) and not isinstance( + trackable_obj, metrics_mod.Metric + ): + yield trackable_obj + # Introspect recursively through sublayers. + if recursive: + subtrackables = getattr( + trackable_obj, "_self_tracked_trackables", None + ) + if subtrackables: + deque.extendleft(reversed(subtrackables)) + elif isinstance( + trackable_obj, + tf.__internal__.tracking.TrackableDataStructure, + ): + # Data structures are introspected even with + # `recursive=False`. + tracked_values = trackable_obj._values + if tracked_values: + deque.extendleft(reversed(tracked_values)) + + # This is a hack so that the is_layer (within + # training/trackable/layer_utils.py) check doesn't get the weights attr. + # TODO(b/110718070): Remove when fixed. + def _is_layer(self): + return True + + def _init_call_fn_args(self, expects_training_arg=None): + self._call_spec = layer_utils.CallFunctionSpec( + tf_inspect.getfullargspec(self.call) + ) + if expects_training_arg is not None: + self._call_spec.expects_training_arg = expects_training_arg + + @property + def _expects_training_arg(self): + """Whether the call function uses 'training' as a parameter.""" + return self._call_spec.expects_training_arg + + @property + def _expects_mask_arg(self): + return self._call_spec.expects_mask_arg + + @property + def _eager_losses(self): + # A list of loss values containing activity regularizers and losses + # manually added through `add_loss` during eager execution. It is + # cleared after every batch. Because we plan on eventually allowing a + # same model instance to be trained in eager mode or graph mode + # alternatively, we need to keep track of eager losses and symbolic + # losses via separate attributes. + if not hasattr(self._thread_local, "_eager_losses"): + self._thread_local._eager_losses = [] + return self._thread_local._eager_losses + + @_eager_losses.setter + def _eager_losses(self, losses): + self._thread_local._eager_losses = losses + + def _dedup_weights(self, weights): + """Dedupe weights while maintaining order as much as possible.""" + output, seen_ids = [], set() + for w in weights: + if id(w) not in seen_ids: + output.append(w) + # Track the Variable's identity to avoid __eq__ issues. + seen_ids.add(id(w)) + return output + + # SavedModel properties. Please see keras/saving/saved_model for details. + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _set_save_spec(self, inputs, args=None, kwargs=None): + """Defines the save spec so that serialization can trace layer calls. + + The TensorSpecs of the call function `inputs`, `args`, and `kwargs` are + saved into a tuple of `([inputs] + args, kwargs)`. + + Args: + inputs: possibly nested inputs passed into the call function. + args: a list of positional arguments passed into call. + kwargs: a dictionary of keyword arguments passed into call. + """ + if self._saved_model_inputs_spec is not None: + return # Already set. + + inputs_spec = tf.nest.map_structure(tf_utils.get_tensor_spec, inputs) + args_spec = tf.nest.map_structure(tf_utils.get_tensor_spec, args or []) + kwargs_spec = {} + # Filter out non-tensor arguments from kwargs. + for key, kwarg in kwargs.items(): + flat_kwarg = tf.nest.flatten(kwarg) + flat_specs = [tf_utils.get_tensor_spec(x) for x in flat_kwarg] + if any(s is None for s in flat_specs): + continue + kwargs_spec[key] = tf.nest.pack_sequence_as(kwarg, flat_specs) + + self._saved_model_inputs_spec = inputs_spec + self._saved_model_arg_spec = ( + [inputs_spec] + list(args_spec), + kwargs_spec, + ) + + def _get_save_spec(self, dynamic_batch=True, inputs_only=True): + if self._saved_model_inputs_spec is None: + return None + + spec = tf.nest.map_structure( + lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=dynamic_batch), + self._saved_model_arg_spec, + ) + return spec[0][0] if inputs_only else spec + + @property + def _trackable_saved_model_saver(self): + return layer_serialization.LayerSavedModelSaver(self) + + @property + def _object_identifier(self): + return self._trackable_saved_model_saver.object_identifier + + @property + def _tracking_metadata(self): + """Info about this layer to be saved into the SavedModel.""" + return self._trackable_saved_model_saver.tracking_metadata + + def _trackable_children(self, save_type="checkpoint", **kwargs): + if save_type == "savedmodel": + cache = kwargs["cache"] + # TODO(b/213628533): This must be called before super() to ensure + # that any input shape changes are applied before getting the config + # of the model. + children = self._trackable_saved_model_saver.trackable_children( + cache + ) + else: + children = {} + children.update(super()._trackable_children(save_type, **kwargs)) + return children + + @property + def _use_input_spec_as_call_signature(self): + # Whether input spec can be used as the call signature when tracing the + # Layer for SavedModel. By default, this is set to `True` for layers + # exported from the Keras library, because the layers more rigidly + # define the `input_specs` property (many custom layers only set the + # `ndims`) + return ( + get_canonical_name_for_symbol(type(self), api_name="keras") + is not None + ) + + def __getstate__(self): + # Override to support `copy.deepcopy` and pickling. + # Thread-local objects cannot be copied in Python 3, so pop these. + # Thread-local objects are used to cache losses in MirroredStrategy, and + # so shouldn't be copied. + state = self.__dict__.copy() + state.pop("_thread_local", None) + state.pop("_metrics_lock", None) + return state + + def __setstate__(self, state): + state["_thread_local"] = threading.local() + state["_metrics_lock"] = threading.Lock() + # Bypass Trackable logic as `__dict__` already contains this info. + object.__setattr__(self, "__dict__", state) + + def save_own_variables(self, store): + """Saves the state of the layer. + + You can override this method to take full control of how the state of + the layer is saved upon calling `model.save()`. + + Args: + store: Dict where the state of the model will be saved. + """ + all_vars = self._trainable_weights + self._non_trainable_weights + for i, v in enumerate(all_vars): + store[f"{i}"] = v.numpy() + + def load_own_variables(self, store): + """Loads the state of the layer. + + You can override this method to take full control of how the state of + the layer is loaded upon calling `keras.models.load_model()`. + + Args: + store: Dict from which the state of the model will be loaded. + """ + self._update_trackables() + all_vars = self._trainable_weights + self._non_trainable_weights + if len(store.keys()) != len(all_vars): + raise ValueError( + f"Layer '{self.name}' expected {len(all_vars)} variables, " + "but received " + f"{len(store.keys())} variables during loading. " + f"Expected: {[v.name for v in all_vars]}" + ) + for i, v in enumerate(all_vars): + # TODO(rchao): check shapes and raise errors. + v.assign(store[f"{i}"]) - Yields: - `tf.Module` instance tracked by this `Layer`. - """ - if include_self: - yield self - - # Only instantiate set and deque if needed. - trackables = getattr(self, '_self_tracked_trackables', None) - if trackables: - seen_object_ids = set() - deque = collections.deque(trackables) - while deque: - trackable_obj = deque.popleft() - trackable_id = id(trackable_obj) - if trackable_id in seen_object_ids: - continue - seen_object_ids.add(trackable_id) - - # Metrics are not considered part of the Layer's topology. - if (isinstance(trackable_obj, tf.Module) and - not isinstance(trackable_obj, metrics_mod.Metric)): - yield trackable_obj - # Introspect recursively through sublayers. - if recursive: - subtrackables = getattr(trackable_obj, '_self_tracked_trackables', - None) - if subtrackables: - deque.extendleft(reversed(subtrackables)) - elif isinstance(trackable_obj, - tf.__internal__.tracking.TrackableDataStructure): - # Data structures are introspected even with `recursive=False`. - tracked_values = trackable_obj._values - if tracked_values: - deque.extendleft(reversed(tracked_values)) - - # This is a hack so that the is_layer (within - # training/trackable/layer_utils.py) check doesn't get the weights attr. - # TODO(b/110718070): Remove when fixed. - def _is_layer(self): - return True - - def _init_call_fn_args(self, expects_training_arg=None): - self._call_spec = layer_utils.CallFunctionSpec( - tf_inspect.getfullargspec(self.call)) - if expects_training_arg is not None: - self._call_spec.expects_training_arg = expects_training_arg - - @property - def _expects_training_arg(self): - """Whether the call function uses 'training' as a parameter.""" - return self._call_spec.expects_training_arg - - @property - def _expects_mask_arg(self): - return self._call_spec.expects_mask_arg - - @property - def _eager_losses(self): - # A list of loss values containing activity regularizers and losses - # manually added through `add_loss` during eager execution. It is cleared - # after every batch. - # Because we plan on eventually allowing a same model instance to be trained - # in eager mode or graph mode alternatively, we need to keep track of - # eager losses and symbolic losses via separate attributes. - if not hasattr(self._thread_local, '_eager_losses'): - self._thread_local._eager_losses = [] - return self._thread_local._eager_losses - - @_eager_losses.setter - def _eager_losses(self, losses): - self._thread_local._eager_losses = losses - - def _dedup_weights(self, weights): - """Dedupe weights while maintaining order as much as possible.""" - output, seen_ids = [], set() - for w in weights: - if id(w) not in seen_ids: - output.append(w) - # Track the Variable's identity to avoid __eq__ issues. - seen_ids.add(id(w)) - - return output - - # SavedModel properties. Please see keras/saving/saved_model for details. - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _set_save_spec(self, inputs, args=None, kwargs=None): - """Defines the save spec so that serialization is able to trace layer call. - - The TensorSpecs of the call function `inputs`, `args`, and `kwargs` are - saved into a tuple of `([inputs] + args, kwargs)`. - Args: - inputs: possibly nested inputs passed into the call function. - args: a list of positional arguments passed into call. - kwargs: a dictionary of keyword arguments passed into call. +class TensorFlowOpLayer(Layer): + """Wraps a TensorFlow Operation in a Layer. + + This class is used internally by the Functional API. When a user + uses a raw TensorFlow Operation on symbolic tensors originating + from an `Input` Layer, the resultant operation will be wrapped + with this Layer object in order to make the operation compatible + with the Keras API. + + This Layer will create a new, identical operation (except for inputs + and outputs) every time it is called. If `run_eagerly` is `True`, + the op creation and calculation will happen inside an Eager function. + + Instances of this Layer are created when `autolambda` is called, which + is whenever a Layer's `__call__` encounters symbolic inputs that do + not have Keras metadata, or when a Network's `__init__` encounters + outputs that do not have Keras metadata. + + Attributes: + node_def: String, the serialized NodeDef of the Op this layer will wrap. + name: String, the name of the Layer. + constants: Dict of NumPy arrays, the values of any Tensors needed for this + Operation that do not originate from a Keras `Input` Layer. Since all + placeholders must come from Keras `Input` Layers, these Tensors must be + treated as constant in the Functional API. + trainable: Bool, whether this Layer is trainable. Currently Variables are + not supported, and so this parameter has no effect. + dtype: The default dtype of this Layer. Inherited from `Layer` and has no + effect on this class, however is used in `get_config`. """ - if self._saved_model_inputs_spec is not None: - return # Already set. - - inputs_spec = tf.nest.map_structure(tf_utils.get_tensor_spec, inputs) - args_spec = tf.nest.map_structure(tf_utils.get_tensor_spec, args or []) - kwargs_spec = {} - # Filter out non-tensor arguments from kwargs. - for key, kwarg in kwargs.items(): - flat_kwarg = tf.nest.flatten(kwarg) - flat_specs = [tf_utils.get_tensor_spec(x) for x in flat_kwarg] - if any(s is None for s in flat_specs): - continue - kwargs_spec[key] = tf.nest.pack_sequence_as(kwarg, flat_specs) - - self._saved_model_inputs_spec = inputs_spec - self._saved_model_arg_spec = ([inputs_spec] + list(args_spec), kwargs_spec) - - def _get_save_spec(self, dynamic_batch=True, inputs_only=True): - if self._saved_model_inputs_spec is None: - return None - - spec = tf.nest.map_structure( - lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=dynamic_batch), - self._saved_model_arg_spec) - return spec[0][0] if inputs_only else spec - - @property - def _trackable_saved_model_saver(self): - return layer_serialization.LayerSavedModelSaver(self) - - @property - def _object_identifier(self): - return self._trackable_saved_model_saver.object_identifier - - @property - def _tracking_metadata(self): - """Info about this layer to be saved into the SavedModel.""" - return self._trackable_saved_model_saver.tracking_metadata - - def _trackable_children(self, save_type='checkpoint', **kwargs): - if save_type == 'savedmodel': - cache = kwargs['cache'] - # TODO(b/213628533): This must be called before super() to ensure - # that any input shape changes are applied before getting the config of - # the model. - children = self._trackable_saved_model_saver.trackable_children(cache) - else: - children = {} - children.update(super()._trackable_children(save_type, **kwargs)) - return children - - @property - def _use_input_spec_as_call_signature(self): - # Whether input spec can be used as the call signature when tracing the - # Layer for SavedModel. By default, this is set to `True` for layers - # exported from the Keras library, because the layers more rigidly define - # the `input_specs` property (many custom layers only set the `ndims`) - return get_canonical_name_for_symbol(type(self), - api_name='keras') is not None - - def __getstate__(self): - # Override to support `copy.deepcopy` and pickling. - # Thread-local objects cannot be copied in Python 3, so pop these. - # Thread-local objects are used to cache losses in MirroredStrategy, and - # so shouldn't be copied. - state = self.__dict__.copy() - state.pop('_thread_local', None) - state.pop('_metrics_lock', None) - return state - - def __setstate__(self, state): - state['_thread_local'] = threading.local() - state['_metrics_lock'] = threading.Lock() - # Bypass Trackable logic as `__dict__` already contains this info. - object.__setattr__(self, '__dict__', state) + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__( + self, node_def, name, constants=None, trainable=True, dtype=None + ): + # Pass autocast=False, as if inputs are cast, input types might not + # match Operation type. + super(TensorFlowOpLayer, self).__init__( + name=_TF_OP_LAYER_NAME_PREFIX + name, + trainable=trainable, + dtype=dtype, + autocast=False, + ) + if isinstance(node_def, dict): + self.node_def = json_format.ParseDict( + node_def, tf.compat.v1.NodeDef() + ) + else: + if not isinstance(node_def, bytes): + node_def = node_def.encode("utf-8") + self.node_def = tf.compat.v1.NodeDef.FromString(node_def) + # JSON serialization stringifies keys which are integer input indices. + self.constants = ( + {int(index): constant for index, constant in constants.items()} + if constants is not None + else {} + ) + # Layer uses original op unless it is called on new inputs. + # This means `built` is not set in `__call__`. + self.built = True + + # Do not individually trace TensorflowOpLayers in the SavedModel. + self._must_restore_from_config = True -class TensorFlowOpLayer(Layer): - """Wraps a TensorFlow Operation in a Layer. - - This class is used internally by the Functional API. When a user - uses a raw TensorFlow Operation on symbolic tensors originating - from an `Input` Layer, the resultant operation will be wrapped - with this Layer object in order to make the operation compatible - with the Keras API. - - This Layer will create a new, identical operation (except for inputs - and outputs) every time it is called. If `run_eagerly` is `True`, - the op creation and calculation will happen inside an Eager function. - - Instances of this Layer are created when `autolambda` is called, which - is whenever a Layer's `__call__` encounters symbolic inputs that do - not have Keras metadata, or when a Network's `__init__` encounters - outputs that do not have Keras metadata. - - Attributes: - node_def: String, the serialized NodeDef of the Op this layer will wrap. - name: String, the name of the Layer. - constants: Dict of NumPy arrays, the values of any Tensors needed for this - Operation that do not originate from a Keras `Input` Layer. Since all - placeholders must come from Keras `Input` Layers, these Tensors must be - treated as constant in the Functional API. - trainable: Bool, whether this Layer is trainable. Currently Variables are - not supported, and so this parameter has no effect. - dtype: The default dtype of this Layer. Inherited from `Layer` and has no - effect on this class, however is used in `get_config`. - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, - node_def, - name, - constants=None, - trainable=True, - dtype=None): - # Pass autocast=False, as if inputs are cast, input types might not match - # Operation type. - super(TensorFlowOpLayer, self).__init__( - name=_TF_OP_LAYER_NAME_PREFIX + name, trainable=trainable, dtype=dtype, - autocast=False) - if isinstance(node_def, dict): - self.node_def = json_format.ParseDict(node_def, tf.compat.v1.NodeDef()) - else: - if not isinstance(node_def, bytes): - node_def = node_def.encode('utf-8') - self.node_def = tf.compat.v1.NodeDef.FromString(node_def) - # JSON serialization stringifies keys which are integer input indices. - self.constants = ({ - int(index): constant for index, constant in constants.items() - } if constants is not None else {}) - # Layer uses original op unless it is called on new inputs. - # This means `built` is not set in `__call__`. - self.built = True - - # Do not individually trace TensorflowOpLayers in the SavedModel. - self._must_restore_from_config = True - - def call(self, inputs): - if tf.executing_eagerly(): - return self._defun_call(inputs) - return self._make_op(inputs) - - def _make_node_def(self, graph): - node_def = tf.compat.v1.NodeDef() - node_def.CopyFrom(self.node_def) - # Used in TPUReplicateContext to indicate whether this node has been cloned - # and to not add TPU attributes. - node_def.attr['_cloned'].b = True - node_def.name = graph.unique_name(node_def.name) - return node_def - - def _make_op(self, inputs): - inputs = tf.nest.flatten(inputs) - graph = inputs[0].graph - node_def = self._make_node_def(graph) - with graph.as_default(): - for index, constant in self.constants.items(): - # Recreate constant in graph to add distribution context. - value = tf.get_static_value(constant) - if value is not None: - constant = tf.constant(value, name=node_def.input[index]) - inputs.insert(index, constant) - # TODO(b/183990973): We should drop or consolidate these private api calls - # for adding an op to the graph and recording its gradient. - c_op = tf.__internal__.create_c_op(graph, node_def, inputs, control_inputs=[]) - op = graph._create_op_from_tf_operation(c_op) - op._control_flow_post_processing() - - # Record the gradient because custom-made ops don't go through the - # code-gen'd eager call path - op_type = tf.compat.as_str(op.op_def.name) - attr_names = [tf.compat.as_str(attr.name) for attr in op.op_def.attr] - attrs = [] - for attr_name in attr_names: - attrs.append(attr_name) - attrs.append(op.get_attr(attr_name)) - attrs = tuple(attrs) - tf.__internal__.record_gradient(op_type, op.inputs, attrs, op.outputs) - - if len(op.outputs) == 1: - return op.outputs[0] - return op.outputs - - @tf.function - def _defun_call(self, inputs): - """Wraps the op creation method in an Eager function for `run_eagerly`.""" - return self._make_op(inputs) - - def get_config(self): - config = super(TensorFlowOpLayer, self).get_config() - config.update({ - # `__init__` prefixes the name. Revert to the constructor argument. - 'name': config['name'][len(_TF_OP_LAYER_NAME_PREFIX):], - 'node_def': json_format.MessageToDict(self.node_def), - 'constants': { - i: backend.get_value(c) for i, c in self.constants.items() - } - }) - return config + def call(self, inputs): + if tf.executing_eagerly(): + return self._defun_call(inputs) + return self._make_op(inputs) + + def _make_node_def(self, graph): + node_def = tf.compat.v1.NodeDef() + node_def.CopyFrom(self.node_def) + # Used in TPUReplicateContext to indicate whether this node has been + # cloned and to not add TPU attributes. + node_def.attr["_cloned"].b = True + node_def.name = graph.unique_name(node_def.name) + return node_def + + def _make_op(self, inputs): + inputs = tf.nest.flatten(inputs) + graph = inputs[0].graph + node_def = self._make_node_def(graph) + with graph.as_default(): + for index, constant in self.constants.items(): + # Recreate constant in graph to add distribution context. + value = tf.get_static_value(constant) + if value is not None: + if isinstance(value, dict): + value = serialization_lib.deserialize_keras_object( + value + ) + constant = tf.constant(value, name=node_def.input[index]) + inputs.insert(index, constant) + # TODO(b/183990973): We should drop or consolidate these private api + # calls for adding an op to the graph and recording its gradient. + c_op = tf.__internal__.create_c_op( + graph, node_def, inputs, control_inputs=[] + ) + op = graph._create_op_from_tf_operation(c_op) + op._control_flow_post_processing() + + # Record the gradient because custom-made ops don't go through the + # code-gen'd eager call path + op_type = tf.compat.as_str(op.op_def.name) + attr_names = [ + tf.compat.as_str(attr.name) for attr in op.op_def.attr + ] + attrs = [] + for attr_name in attr_names: + attrs.append(attr_name) + attrs.append(op.get_attr(attr_name)) + attrs = tuple(attrs) + tf.__internal__.record_gradient( + op_type, op.inputs, attrs, op.outputs + ) + + if len(op.outputs) == 1: + return op.outputs[0] + return op.outputs + + @tf.function + def _defun_call(self, inputs): + """Wraps op creation method in an Eager function for `run_eagerly`.""" + return self._make_op(inputs) + + def get_config(self): + config = super(TensorFlowOpLayer, self).get_config() + config.update( + { + # `__init__` prefixes the name. Revert to the constructor + # argument. + "name": config["name"][len(_TF_OP_LAYER_NAME_PREFIX) :], + "node_def": json_format.MessageToDict(self.node_def), + "constants": { + i: backend.get_value(c) for i, c in self.constants.items() + }, + } + ) + return config class AddLoss(Layer): - """Adds its inputs as a loss. + """Adds its inputs as a loss. - Attributes: - unconditional: Whether or not the loss should be conditioned on the inputs. - """ + Attributes: + unconditional: Whether or not the loss should be conditioned on the + inputs. + """ - def __init__(self, unconditional, **kwargs): - # Pass autocast=False, as there is no reason to cast loss to a different - # dtype. - kwargs['autocast'] = False - super(AddLoss, self).__init__(**kwargs) - self.unconditional = unconditional + def __init__(self, unconditional, **kwargs): + # Pass autocast=False, as there is no reason to cast loss to a different + # dtype. + kwargs["autocast"] = False + super(AddLoss, self).__init__(**kwargs) + self.unconditional = unconditional - def call(self, inputs): - self.add_loss(inputs, inputs=(not self.unconditional)) - return inputs + def call(self, inputs): + self.add_loss(inputs, inputs=(not self.unconditional)) + return inputs - def get_config(self): - config = super(AddLoss, self).get_config() - config.update({'unconditional': self.unconditional}) - return config + def get_config(self): + config = super(AddLoss, self).get_config() + config.update({"unconditional": self.unconditional}) + return config class AddMetric(Layer): - """Adds its inputs as a metric. + """Adds its inputs as a metric. - Attributes: - aggregation: 'mean' or None. How the inputs should be aggregated. - metric_name: The name to use for this metric. - """ + Attributes: + aggregation: 'mean' or None. How the inputs should be aggregated. + metric_name: The name to use for this metric. + """ - def __init__(self, aggregation=None, metric_name=None, **kwargs): - super(AddMetric, self).__init__(**kwargs) - self.aggregation = aggregation - self.metric_name = metric_name + def __init__(self, aggregation=None, metric_name=None, **kwargs): + super(AddMetric, self).__init__(**kwargs) + self.aggregation = aggregation + self.metric_name = metric_name - def call(self, inputs): - self.add_metric(inputs, aggregation=self.aggregation, name=self.metric_name) - return inputs + def call(self, inputs): + self.add_metric( + inputs, aggregation=self.aggregation, name=self.metric_name + ) + return inputs - def get_config(self): - config = super(AddMetric, self).get_config() - config.update({ - 'aggregation': self.aggregation, - 'metric_name': self.metric_name - }) - return config + def get_config(self): + config = super(AddMetric, self).get_config() + config.update( + {"aggregation": self.aggregation, "metric_name": self.metric_name} + ) + return config -def _in_functional_construction_mode(layer, inputs, args, kwargs, input_list): # pylint: disable=unused-argument - """Check the arguments to see if we are constructing a functional model.""" - # We are constructing a functional model if any of the inputs - # are KerasTensors - return any( - isinstance(tensor, keras_tensor.KerasTensor) - for tensor in tf.nest.flatten([inputs, args, kwargs])) +def _in_functional_construction_mode(layer, inputs, args, kwargs, input_list): + """Check the arguments to see if we are constructing a functional model.""" + # We are constructing a functional model if any of the inputs + # are KerasTensors + return any( + isinstance(tensor, keras_tensor.KerasTensor) + for tensor in tf.nest.flatten([inputs, args, kwargs]) + ) def _convert_numpy_or_python_types(x): - if isinstance(x, (tf.Tensor, np.ndarray, float, int)): - return tf.convert_to_tensor(x) - return x + if isinstance(x, (tf.Tensor, np.ndarray, float, int)): + return tf.convert_to_tensor(x) + return x -@keras_export( - 'keras.__internal__.apply_name_scope_on_model_declaration', v1=[]) +@keras_export("keras.__internal__.apply_name_scope_on_model_declaration", v1=[]) def _apply_name_scope_on_model_declaration(enable): - """Apply `with tf.name_scope(...)` on model declaration. + """Apply `with tf.name_scope(...)` on model declaration. - ```python - tf.keras.__internal__.apply_name_scope_on_model_declaration(True) + ```python + tf.keras.__internal__.apply_name_scope_on_model_declaration(True) - inputs = input_layer.Input((3,)) - with tf.name_scope('MyScope'): - outputs = layers.Dense(10, name='MyDense')(inputs) - model = tf.keras.Model(inputs, outputs) + inputs = input_layer.Input((3,)) + with tf.name_scope('MyScope'): + outputs = layers.Dense(10, name='MyDense')(inputs) + model = tf.keras.Model(inputs, outputs) - # with `tf.keras.__internal__.apply_name_scope_on_model_declaration(True)`, - # The name of the dense layer is "model/MyScope/MyDense/*", and without, - # "model/MyDense/*" - ``` + # with `tf.keras.__internal__.apply_name_scope_on_model_declaration(True)`, + # The name of the dense layer is "model/MyScope/MyDense/*", and without, + # "model/MyDense/*" + ``` - Args: - enable: Enables if `True`, disables if `False`. - """ - if not isinstance(enable, bool): - raise TypeError( - '`enable` argument must be `True` or `False`, got {}'.format(enable)) + Args: + enable: Enables if `True`, disables if `False`. + """ + if not isinstance(enable, bool): + raise TypeError( + f"`enable` argument must be `True` or `False`, got {enable}" + ) - global _is_name_scope_on_model_declaration_enabled - _is_name_scope_on_model_declaration_enabled = enable + global _is_name_scope_on_model_declaration_enabled + _is_name_scope_on_model_declaration_enabled = enable -@keras_export('keras.__internal__.layers.BaseRandomLayer') +@keras_export("keras.__internal__.layers.BaseRandomLayer") class BaseRandomLayer(Layer): - """A layer handle the random number creation and savemodel behavior.""" - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, seed=None, force_generator=False, **kwargs): - """Initialize the BaseRandomLayer. - - Note that the constructor is annotated with - @no_automatic_dependency_tracking. This is to skip the auto - tracking of self._random_generator instance, which is an AutoTrackable. - The backend.RandomGenerator could contain a tf.random.Generator instance - which will have tf.Variable as the internal state. We want to avoid saving - that state into model.weights and checkpoints for backward compatibility - reason. In the meantime, we still need to make them visible to SavedModel - when it is tracing the tf.function for the `call()`. - See _list_extra_dependencies_for_serialization below for more details. + """A layer handle the random number creation and savemodel behavior.""" + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__( + self, seed=None, force_generator=False, rng_type=None, **kwargs + ): + """Initialize the BaseRandomLayer. + + Note that the constructor is annotated with + @no_automatic_dependency_tracking. This is to skip the auto + tracking of self._random_generator instance, which is an AutoTrackable. + The backend.RandomGenerator could contain a tf.random.Generator instance + which will have tf.Variable as the internal state. We want to avoid + saving that state into model.weights and checkpoints for backward + compatibility reason. In the meantime, we still need to make them + visible to SavedModel when it is tracing the tf.function for the + `call()`. + See _list_extra_dependencies_for_serialization below for more details. + + Args: + seed: optional integer, used to create RandomGenerator. + force_generator: boolean, default to False, whether to force the + RandomGenerator to use the code branch of tf.random.Generator. + rng_type: string, the rng type that will be passed to backend + RandomGenerator. `None` will allow RandomGenerator to choose + types by itself. Valid values are "stateful", "stateless", + "legacy_stateful". Defaults to `None`. + **kwargs: other keyword arguments that will be passed to the parent + *class + """ + super().__init__(**kwargs) + self._random_generator = backend.RandomGenerator( + seed, force_generator=force_generator, rng_type=rng_type + ) - Args: - seed: optional integer, used to create RandomGenerator. - force_generator: boolean, default to False, whether to force the - RandomGenerator to use the code branch of tf.random.Generator. - **kwargs: other keyword arguments that will be passed to the parent class - """ - super().__init__(**kwargs) - self._random_generator = backend.RandomGenerator( - seed, force_generator=force_generator) - # Eagerly init the generator to avoid any issue like b/206821407 - self._random_generator._maybe_init() - - def _trackable_children(self, save_type='checkpoint', **kwargs): - if save_type == 'savedmodel': - cache = kwargs['cache'] - # TODO(b/213628533): This must be called before super() to ensure - # that any input shape changes are applied before getting the config of - # the model. - children = self._trackable_saved_model_saver.trackable_children(cache) - # This method exposes the self._random_generator to SavedModel only - # (not layer.weights and checkpoint). - children['_random_generator'] = self._random_generator - else: - children = {} - children.update(super()._trackable_children(save_type, **kwargs)) - return children + def build(self, input_shape): + super().build(input_shape) + self._random_generator._maybe_init() + + def _trackable_children(self, save_type="checkpoint", **kwargs): + if save_type == "savedmodel": + cache = kwargs["cache"] + # TODO(b/213628533): This must be called before super() to ensure + # that any input shape changes are applied before getting the config + # of the model. + children = self._trackable_saved_model_saver.trackable_children( + cache + ) + # This method exposes the self._random_generator to SavedModel only + # (not layer.weights and checkpoint). + children["_random_generator"] = self._random_generator + else: + children = {} + children.update(super()._trackable_children(save_type, **kwargs)) + return children + + def _lookup_dependency(self, name, cached_dependencies=None): + # When loading from a Keras SavedModel load, make sure that the loader + # can find the random generator, otherwise the loader will assume that + # it does not exist, and will try to create a new generator. + if name == "_random_generator": + return self._random_generator + elif cached_dependencies is not None: + return cached_dependencies.get(name) + else: + return super()._lookup_dependency(name) diff --git a/keras/engine/base_layer_test.py b/keras/engine/base_layer_test.py index 7182da8fa36a..0389ea5126c1 100644 --- a/keras/engine/base_layer_test.py +++ b/keras/engine/base_layer_test.py @@ -13,1950 +13,2071 @@ # limitations under the License. # ============================================================================== """Tests for TensorFlow 2.0 layer behavior.""" -# pylint: disable=g-bad-import-order -import tensorflow.compat.v2 as tf - import copy import os import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend -from keras.testing_infra import test_combinations from keras import layers from keras import regularizers -from keras.testing_infra import test_utils from keras.engine import base_layer from keras.engine import input_layer from keras.engine import sequential from keras.engine import training as training_lib from keras.legacy_tf_layers import core as legacy_core -from keras.optimizers.optimizer_v2 import rmsprop +from keras.optimizers.legacy import rmsprop +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils from keras.utils import control_flow_util class DynamicLayer(base_layer.Layer): + def __init__(self, dynamic=False, **kwargs): + super().__init__(dynamic=dynamic, **kwargs) - def __init__(self, dynamic=False, **kwargs): - super().__init__(dynamic=dynamic, **kwargs) - - def call(self, inputs): - samples = tf.TensorArray( - dtype=tf.float32, size=tf.shape(inputs)[0]) - for idx, sample in enumerate(inputs): - samples = samples.write(idx, tf.square(sample)) - return samples.stack() + def call(self, inputs): + samples = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0]) + for idx, sample in enumerate(inputs): + samples = samples.write(idx, tf.square(sample)) + return samples.stack() - def compute_output_shape(self, input_shape): - return input_shape + def compute_output_shape(self, input_shape): + return input_shape class InvalidLayer(base_layer.Layer): - - def call(self, inputs): - raise ValueError('You did something wrong!') + def call(self, inputs): + raise ValueError("You did something wrong!") @test_utils.run_v2_only class BaseLayerTest(test_combinations.TestCase): - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_layer_instrumentation(self): - layer = layers.Add() - self.assertTrue(layer._instrumented_keras_api) - self.assertTrue(layer._instrumented_keras_layer_class) - self.assertFalse(layer._instrumented_keras_model_class) - self.assertTrue(base_layer.keras_api_gauge.get_cell('tf.keras.layers.Add')) - - # Verify this was not instrumented as a legacy layer - self.assertFalse( - base_layer.keras_api_gauge.get_cell('legacy_layer').value()) - base_layer.keras_api_gauge.get_cell('tf.keras.layers.Add').set(False) - - @test_combinations.generate(test_combinations.keras_model_type_combinations()) - def test_dynamic_layer(self): - model = test_utils.get_model_from_layers([DynamicLayer(dynamic=True)], - input_shape=(3,)) - self.assertEqual(model.dynamic, True) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - self.assertEqual(model.run_eagerly, True) - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - - @test_combinations.generate(test_combinations.keras_model_type_combinations()) - def test_dynamic_layer_error(self): - # Functional Models hit the `dyanamic=True` error during construction. - # Subclass Models should just throw the original autograph error during - # execution. - raised_error = False - try: - model = test_utils.get_model_from_layers([DynamicLayer()], - input_shape=(3,)) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - except tf.errors.OperatorNotAllowedInGraphError as e: - if 'iterating over `tf.Tensor`' in str(e): - raised_error = True - elif 'Iterating over a symbolic `tf.Tensor`' in str(e): - raised_error = True - except TypeError as e: - if 'attempting to use Python control flow' in str(e): - raised_error = True - elif 'Attempting to use Python control flow' in str(e): - raised_error = True - self.assertTrue(raised_error) - - @test_combinations.generate(test_combinations.keras_model_type_combinations()) - def test_dynamic_layer_error_running_in_graph_mode(self): - with tf.compat.v1.get_default_graph().as_default(): - model = test_utils.get_model_from_layers([DynamicLayer(dynamic=True)], - input_shape=(3,)) - self.assertEqual(model.dynamic, True) - # But then you cannot run the model since you're in a graph scope. - with self.assertRaisesRegex(ValueError, - 'You must enable eager execution'): - model.compile(rmsprop.RMSprop(0.001), loss='mse') - - def test_manual_compute_output_shape(self): - - class BuildCounter(base_layer.Layer): - - def __init__(self, *args, **kwargs): # pylint: disable=redefined-outer-name - super().__init__(*args, **kwargs) - self.build_counter = 0 - - def build(self, input_shape): - self.build_counter += 1 - self.build_shape = input_shape - - def call(self, inputs): - return inputs - - layer = BuildCounter(dtype=tf.float64) - output_shape = layer.compute_output_shape((None, 10)) - self.assertEqual(layer.build_counter, 1) - self.assertEqual(layer.build_shape.as_list(), [None, 10]) - self.assertEqual(output_shape.as_list(), [None, 10]) - output_signature = layer.compute_output_signature( - tf.TensorSpec(dtype=tf.float64, shape=[None, 10])) - self.assertEqual(layer.build_counter, 1) - self.assertEqual(layer.build_shape.as_list(), [None, 10]) - self.assertEqual(output_signature.dtype, tf.float64) - self.assertEqual(output_signature.shape.as_list(), [None, 10]) - layer(np.ones((5, 10))) - self.assertEqual(layer.build_counter, 1) - self.assertEqual(layer.build_shape.as_list(), [None, 10]) - - def test_dynamic_layer_with_deferred_sequential_model(self): - model = sequential.Sequential([DynamicLayer(dynamic=True), layers.Dense(3)]) - self.assertEqual(model.dynamic, True) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - self.assertEqual(model.run_eagerly, True) - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - - def test_nested_dynamic_layers_in_eager_mode(self): - inputs = input_layer.Input((3,)) - outputs = DynamicLayer(dynamic=True)(inputs) - inner_model = training_lib.Model(inputs, outputs) - self.assertEqual(inner_model.dynamic, True) - - inputs = input_layer.Input((3,)) - x = DynamicLayer(dynamic=True)(inputs) - outputs = inner_model(x) - - model = training_lib.Model(inputs, outputs) - self.assertEqual(model.dynamic, True) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - self.assertEqual(model.run_eagerly, True) - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - - def test_dynamic_subclassed_model_no_shape_inference(self): - - class MyModel(training_lib.Model): - - def __init__(self): - super().__init__(dynamic=True) - self.layer1 = layers.Dense(3) - self.layer2 = layers.Dense(3) - - def call(self, inputs): - if tf.reduce_sum(inputs) > 0: - return self.layer1(inputs) - else: - return self.layer2(inputs) - - model = MyModel() - self.assertEqual(model.dynamic, True) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - self.assertEqual(model.run_eagerly, True) - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - self.assertEqual(model.outputs, None) - - def test_dynamic_subclassed_model_with_shape_inference(self): - - class MyModel(training_lib.Model): - - def __init__(self): - super().__init__(dynamic=True) - self.layer1 = layers.Dense(3) - self.layer2 = layers.Dense(3) - - def call(self, inputs): - if tf.reduce_sum(inputs) > 0: - return self.layer1(inputs) - else: - return self.layer2(inputs) - - def compute_output_shape(self, input_shape): - return tuple(input_shape[:-1].as_list()) + (3,) - - model = MyModel() - self.assertEqual(model.dynamic, True) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - x, y = np.random.random((2, 3)), np.random.random((2, 3)) - model.train_on_batch(x, y) - outputs = model(x) - self.assertEqual(outputs.shape.as_list(), [2, 3]) - - def test_deepcopy(self): - bias_reg = lambda x: 1e-3 * tf.reduce_sum(x) - layer = layers.Conv2D(32, (3, 3), bias_regularizer=bias_reg) - # Call the Layer on data to generate regularize losses. - layer(tf.ones((1, 10, 10, 3))) - self.assertLen(layer.losses, 1) - new_layer = copy.deepcopy(layer) - self.assertEqual(new_layer.bias_regularizer, bias_reg) - self.assertEqual(layer.get_config(), new_layer.get_config()) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_invalid_forward_pass(self): - inputs = input_layer.Input((3,)) - with self.assertRaisesRegex(ValueError, 'You did something wrong!'): - _ = InvalidLayer()(inputs) - - def test_no_legacy_model(self): - inputs = input_layer.Input((1,)) - legacy_dense_0 = legacy_core.Dense(1, name='legacy_dense_0') - legacy_dense_1 = legacy_core.Dense(1, name='legacy_dense_1') - - layer = legacy_dense_0(inputs) - layer = layers.Dense(1)(layer) - layer = legacy_dense_1(layer) - - expected_regex = (r'The following are legacy tf\.layers\.Layers:\n ' - '{}\n {}'.format(legacy_dense_0, legacy_dense_1)) - - with self.assertRaisesRegex(TypeError, expected_regex): - _ = training_lib.Model(inputs=[inputs], outputs=[layer]) - - model = training_lib.Model(inputs=[inputs], outputs=[inputs]) - with self.assertRaisesRegex(TypeError, expected_regex): - model._insert_layers([legacy_dense_0, legacy_dense_1]) - - def test_no_legacy_sequential(self): - layer = [layers.Dense(1), legacy_core.Dense(1, name='legacy_dense_0')] - - expected_regex = r'legacy tf\.layers\.Layers:\n {}'.format(layer[1]) - with self.assertRaisesRegex(TypeError, expected_regex): - _ = sequential.Sequential(layer) - - with self.assertRaisesRegex(TypeError, expected_regex): - _ = sequential.Sequential([input_layer.Input(shape=(4,))] + layer) - - model = sequential.Sequential() - with self.assertRaisesRegex(TypeError, expected_regex): - for l in layer: - model.add(l) - - @test_combinations.generate( - test_combinations.times( - test_combinations.keras_model_type_combinations(), - test_combinations.combine(mode=['graph', 'eager']))) - def test_build_with_numpy_data(self): - model_layers = [ - layers.Dense(3, activation='relu', kernel_initializer='ones'), - layers.Dense(1, activation='sigmoid', kernel_initializer='ones') - ] - model = test_utils.get_model_from_layers(model_layers, input_shape=(4,)) - model(np.zeros((2, 4), dtype='float32')) - self.assertTrue(model.built) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_default_add_weight(self): - - class TestLayer(base_layer.Layer): - - def __init__(self): - super().__init__() - self.default_weight = self.add_weight() - self.weight_without_name = self.add_weight(shape=(3, 4)) - self.regularized_weight_without_name = self.add_weight( - shape=(3, 4), regularizer='l2') - - layer = TestLayer() - self.assertEqual(layer.default_weight.shape.as_list(), []) - self.assertEqual(layer.weight_without_name.shape.as_list(), [3, 4]) - self.assertEqual(layer.default_weight.dtype.name, 'float32') - self.assertEqual(layer.weight_without_name.dtype.name, 'float32') - self.assertEqual(len(layer.losses), 1) - if not tf.executing_eagerly(): - # Cannot access tensor.name in eager execution. - self.assertIn('Variable_2/Regularizer', layer.losses[0].name) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_add_weight_by_getter(self): - layer = base_layer.Layer() - variable = tf.Variable('abc') - added = layer.add_weight( - dtype=tf.string, getter=lambda *_, **__: variable) - self.assertIs(variable, added) - - @test_combinations.generate( - test_combinations.keras_mode_combinations(mode=['eager'])) - def test_learning_phase_freezing_for_layers(self): - - class LearningPhaseLayer(base_layer.Layer): - - def call(self, inputs): - return backend.in_train_phase(lambda: tf.ones_like(inputs), - lambda: tf.zeros_like(inputs)) - - def get_learning_phase_value(): - model = sequential.Sequential([LearningPhaseLayer(input_shape=(1,))]) - model._run_eagerly = test_utils.should_run_eagerly() - return np.sum(model(np.ones((1, 1)))) - - self.assertEqual(get_learning_phase_value(), 0) - - # Test scope. - with backend.learning_phase_scope(1): - self.assertEqual(get_learning_phase_value(), 1) - - # The effects of the scope end after exiting it. - self.assertEqual(get_learning_phase_value(), 0) - - # Test setting. - backend.set_learning_phase(1) - self.assertEqual(get_learning_phase_value(), 1) - backend.set_learning_phase(0) - self.assertEqual(get_learning_phase_value(), 0) - - # Cannot be enabled with `run_eagerly=True`, see b/123904578 - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_layer_can_return_variable(self): - - class ComputeSum(base_layer.Layer): - - def __init__(self): - super().__init__() - self.total = tf.Variable( - initial_value=tf.zeros((1, 1)), trainable=False) + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_layer_instrumentation(self): + layer = layers.Add() + self.assertTrue(layer._instrumented_keras_api) + self.assertTrue(layer._instrumented_keras_layer_class) + self.assertFalse(layer._instrumented_keras_model_class) + self.assertTrue( + base_layer.keras_api_gauge.get_cell("tf.keras.layers.Add") + ) + + # Verify this was not instrumented as a legacy layer + self.assertFalse( + base_layer.keras_api_gauge.get_cell("legacy_layer").value() + ) + base_layer.keras_api_gauge.get_cell("tf.keras.layers.Add").set(False) + + @test_combinations.generate( + test_combinations.keras_model_type_combinations() + ) + def test_dynamic_layer(self): + model = test_utils.get_model_from_layers( + [DynamicLayer(dynamic=True)], input_shape=(3,) + ) + self.assertEqual(model.dynamic, True) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + self.assertEqual(model.run_eagerly, True) + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + + @test_combinations.generate( + test_combinations.keras_model_type_combinations() + ) + def test_dynamic_layer_error(self): + # Functional Models hit the `dyanamic=True` error during construction. + # Subclass Models should just throw the original autograph error during + # execution. + raised_error = False + try: + model = test_utils.get_model_from_layers( + [DynamicLayer()], input_shape=(3,) + ) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + model.train_on_batch( + np.random.random((2, 3)), np.random.random((2, 3)) + ) + except tf.errors.OperatorNotAllowedInGraphError as e: + if "iterating over `tf.Tensor`" in str(e): + raised_error = True + elif "Iterating over a symbolic `tf.Tensor`" in str(e): + raised_error = True + except TypeError as e: + if "attempting to use Python control flow" in str(e): + raised_error = True + elif "Attempting to use Python control flow" in str(e): + raised_error = True + self.assertTrue(raised_error) + + @test_combinations.generate( + test_combinations.keras_model_type_combinations() + ) + def test_dynamic_layer_error_running_in_graph_mode(self): + with tf.compat.v1.get_default_graph().as_default(): + model = test_utils.get_model_from_layers( + [DynamicLayer(dynamic=True)], input_shape=(3,) + ) + self.assertEqual(model.dynamic, True) + # But then you cannot run the model since you're in a graph scope. + with self.assertRaisesRegex( + ValueError, "You must enable eager execution" + ): + model.compile(rmsprop.RMSprop(0.001), loss="mse") + + def test_manual_compute_output_shape(self): + class BuildCounter(base_layer.Layer): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.build_counter = 0 + + def build(self, input_shape): + self.build_counter += 1 + self.build_shape = input_shape + + def call(self, inputs): + return inputs + + layer = BuildCounter(dtype=tf.float64) + output_shape = layer.compute_output_shape((None, 10)) + self.assertEqual(layer.build_counter, 1) + self.assertEqual(layer.build_shape.as_list(), [None, 10]) + self.assertEqual(output_shape.as_list(), [None, 10]) + output_signature = layer.compute_output_signature( + tf.TensorSpec(dtype=tf.float64, shape=[None, 10]) + ) + self.assertEqual(layer.build_counter, 1) + self.assertEqual(layer.build_shape.as_list(), [None, 10]) + self.assertEqual(output_signature.dtype, tf.float64) + self.assertEqual(output_signature.shape.as_list(), [None, 10]) + layer(np.ones((5, 10))) + self.assertEqual(layer.build_counter, 1) + self.assertEqual(layer.build_shape.as_list(), [None, 10]) + + def test_dynamic_layer_with_deferred_sequential_model(self): + model = sequential.Sequential( + [DynamicLayer(dynamic=True), layers.Dense(3)] + ) + self.assertEqual(model.dynamic, True) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + self.assertEqual(model.run_eagerly, True) + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + + def test_nested_dynamic_layers_in_eager_mode(self): + inputs = input_layer.Input((3,)) + outputs = DynamicLayer(dynamic=True)(inputs) + inner_model = training_lib.Model(inputs, outputs) + self.assertEqual(inner_model.dynamic, True) + + inputs = input_layer.Input((3,)) + x = DynamicLayer(dynamic=True)(inputs) + outputs = inner_model(x) + + model = training_lib.Model(inputs, outputs) + self.assertEqual(model.dynamic, True) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + self.assertEqual(model.run_eagerly, True) + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + + def test_dynamic_subclassed_model_no_shape_inference(self): + class MyModel(training_lib.Model): + def __init__(self): + super().__init__(dynamic=True) + self.layer1 = layers.Dense(3) + self.layer2 = layers.Dense(3) + + def call(self, inputs): + if tf.reduce_sum(inputs) > 0: + return self.layer1(inputs) + else: + return self.layer2(inputs) + + model = MyModel() + self.assertEqual(model.dynamic, True) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + self.assertEqual(model.run_eagerly, True) + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + self.assertEqual(model.outputs, None) + + def test_dynamic_subclassed_model_with_shape_inference(self): + class MyModel(training_lib.Model): + def __init__(self): + super().__init__(dynamic=True) + self.layer1 = layers.Dense(3) + self.layer2 = layers.Dense(3) + + def call(self, inputs): + if tf.reduce_sum(inputs) > 0: + return self.layer1(inputs) + else: + return self.layer2(inputs) + + def compute_output_shape(self, input_shape): + return tuple(input_shape[:-1].as_list()) + (3,) + + model = MyModel() + self.assertEqual(model.dynamic, True) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + x, y = np.random.random((2, 3)), np.random.random((2, 3)) + model.train_on_batch(x, y) + outputs = model(x) + self.assertEqual(outputs.shape.as_list(), [2, 3]) + + def test_deepcopy(self): + bias_reg = lambda x: 1e-3 * tf.reduce_sum(x) + layer = layers.Conv2D(32, (3, 3), bias_regularizer=bias_reg) + # Call the Layer on data to generate regularize losses. + layer(tf.ones((1, 10, 10, 3))) + self.assertLen(layer.losses, 1) + new_layer = copy.deepcopy(layer) + self.assertEqual(new_layer.bias_regularizer, bias_reg) + self.assertEqual(layer.get_config(), new_layer.get_config()) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_invalid_forward_pass(self): + inputs = input_layer.Input((3,)) + with self.assertRaisesRegex(ValueError, "You did something wrong!"): + _ = InvalidLayer()(inputs) + + def test_no_legacy_model(self): + inputs = input_layer.Input((1,)) + legacy_dense_0 = legacy_core.Dense(1, name="legacy_dense_0") + legacy_dense_1 = legacy_core.Dense(1, name="legacy_dense_1") + + layer = legacy_dense_0(inputs) + layer = layers.Dense(1)(layer) + layer = legacy_dense_1(layer) + + expected_regex = ( + r"The following are legacy tf\.layers\.Layers:\n " + "{}\n {}".format(legacy_dense_0, legacy_dense_1) + ) + + with self.assertRaisesRegex(TypeError, expected_regex): + _ = training_lib.Model(inputs=[inputs], outputs=[layer]) + + model = training_lib.Model(inputs=[inputs], outputs=[inputs]) + with self.assertRaisesRegex(TypeError, expected_regex): + model._insert_layers([legacy_dense_0, legacy_dense_1]) + + def test_no_legacy_sequential(self): + layer = [layers.Dense(1), legacy_core.Dense(1, name="legacy_dense_0")] + + expected_regex = r"legacy tf\.layers\.Layers:\n {}".format(layer[1]) + with self.assertRaisesRegex(TypeError, expected_regex): + _ = sequential.Sequential(layer) + + with self.assertRaisesRegex(TypeError, expected_regex): + _ = sequential.Sequential([input_layer.Input(shape=(4,))] + layer) + + model = sequential.Sequential() + with self.assertRaisesRegex(TypeError, expected_regex): + for l in layer: + model.add(l) + + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_model_type_combinations(), + test_combinations.combine(mode=["graph", "eager"]), + ) + ) + def test_build_with_numpy_data(self): + model_layers = [ + layers.Dense(3, activation="relu", kernel_initializer="ones"), + layers.Dense(1, activation="sigmoid", kernel_initializer="ones"), + ] + model = test_utils.get_model_from_layers(model_layers, input_shape=(4,)) + model(np.zeros((2, 4), dtype="float32")) + self.assertTrue(model.built) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_default_add_weight(self): + class TestLayer(base_layer.Layer): + def __init__(self): + super().__init__() + self.default_weight = self.add_weight() + self.weight_without_name = self.add_weight(shape=(3, 4)) + self.regularized_weight_without_name = self.add_weight( + shape=(3, 4), regularizer="l2" + ) + + layer = TestLayer() + self.assertEqual(layer.default_weight.shape.as_list(), []) + self.assertEqual(layer.weight_without_name.shape.as_list(), [3, 4]) + self.assertEqual(layer.default_weight.dtype.name, "float32") + self.assertEqual(layer.weight_without_name.dtype.name, "float32") + self.assertEqual(len(layer.losses), 1) if not tf.executing_eagerly(): - backend.get_session().run(self.total.initializer) - - def call(self, inputs): - self.total.assign_add(inputs) - return self.total - - inputs = input_layer.Input(shape=(1,)) - model = training_lib.Model(inputs, ComputeSum()(inputs)) - model.predict(np.ones((1, 1))) - - def _get_layer_with_training_arg(self): - - class TrainingLayer(base_layer.Layer): - """A layer with a `training` argument in a defuned `call`.""" - - @tf.function - def call(self, inputs, training=None): - if training is None: - training = backend.learning_phase() - return control_flow_util.smart_cond( - training, lambda: tf.ones_like(inputs), - lambda: tf.zeros_like(inputs)) - - return TrainingLayer() - - # b/124459427: can't test with `run_eagerly=True` for now. - @test_combinations.generate( - test_combinations.times( - test_combinations.keras_mode_combinations(), - test_combinations.keras_model_type_combinations())) - def test_training_arg_in_defun(self): - layer = self._get_layer_with_training_arg() - model = test_utils.get_model_from_layers([layer], input_shape=(1,)) - model.compile(rmsprop.RMSprop(0.), - loss='mae') - history = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) - self.assertEqual(history.history['loss'][0], 1.) - loss = model.evaluate(np.zeros((1, 1)), np.zeros((1, 1))) - self.assertEqual(loss, 0.) - - # Test that the argument injection performed in `call` is not active - # when the argument is passed explicitly. - layer = self._get_layer_with_training_arg() - inputs = input_layer.Input(shape=(1,)) - # Pass `training` by name - outputs = layer(inputs, training=False) - model = training_lib.Model(inputs, outputs) - model.compile(rmsprop.RMSprop(0.), - loss='mae') - history = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) - self.assertEqual(history.history['loss'][0], 0.) - - @test_combinations.generate( - test_combinations.times( - test_combinations.keras_mode_combinations(), - test_combinations.keras_model_type_combinations())) - def test_raw_variable_assignment(self): - - class RawVariableLayer(base_layer.Layer): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - # Test variables in nested structure. - self.var_list = [tf.Variable(1.), {'a': tf.Variable(2.)}] - - def call(self, inputs): - return inputs * self.var_list[0] * self.var_list[1]['a'] - - model = test_utils.get_model_from_layers([RawVariableLayer()], - input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 10)), np.ones((10, 10)) - # Checks that variables get initialized. - model.fit(x, y, batch_size=2, epochs=2) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_composite_variable_assignment(self): - - class Spec(tf.TypeSpec): - - value_type = property(lambda self: CompositeVariable) - - def _component_specs(self): - pass - - def _serialize(self): - pass - - def _to_components(self, value): - return value._variables - - def _from_components(self, variable_list): - return CompositeVariable(variable_list) - - class CompositeVariable(tf.__internal__.CompositeTensor): - - def __init__(self, variable_list): - self._variables = variable_list - - @property - def _type_spec(self): - return Spec() - - class CompositeVariableLayer(base_layer.Layer): - - def __init__(self): - super().__init__() - self.composite_var = CompositeVariable( - [tf.Variable(1.), - tf.Variable(2.)]) - - layer = CompositeVariableLayer() - self.assertLen(layer.weights, 2) - self.assertIsInstance(layer.weights[0], tf.Variable) - self.assertIsInstance(layer.weights[1], tf.Variable) - self.assertEqual(self.evaluate(layer.weights[0]), 1.) - self.assertEqual(self.evaluate(layer.weights[1]), 2.) - - def test_exception_if_trainable_not_boolean(self): - base_layer.Layer(trainable=True) - base_layer.Layer(trainable=tf.constant(True)) - base_layer.Layer(trainable=tf.Variable(tf.constant(True))) - with self.assertRaisesRegex( - TypeError, 'Expected `trainable` argument to be a boolean'): - base_layer.Layer(trainable=0) - - def test_exception_if_dynamic_not_boolean(self): - base_layer.Layer(dynamic=True) - with self.assertRaisesRegex(TypeError, - 'Expected `dynamic` argument to be a boolean'): - base_layer.Layer(dynamic=0) - - def test_exception_if_name_not_string_or_none(self): - base_layer.Layer(name=None) - base_layer.Layer(name='layer_name') - with self.assertRaisesRegex(TypeError, - 'Expected `name` argument to be a string'): - base_layer.Layer(name=0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_layer_names(self): - inputs = input_layer.Input(shape=[2]) - add1 = inputs + inputs - add2 = layers.Add()([inputs, inputs]) - add3 = inputs + inputs - add4 = layers.Add()([inputs, inputs]) - model = training_lib.Model(inputs=[inputs], - outputs=[add1, add2, add3, add4]) - actual_names = [l.name for l in model.layers] - graph_names = [ - 'input_1', 'tf_op_layer_add', 'add', 'tf_op_layer_add_2', 'add_1' - ] - eager_names = [ - 'input_1', 'tf.__operators__.add', 'add', 'tf.__operators__.add_1', - 'add_1' - ] - for actual, eager, graph in zip(actual_names, graph_names, eager_names): - self.assertIn(actual, {eager, graph}) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_layer_names_after_loading(self): - backend.clear_session() - # Mimic loading a model that already contained add layers with - # name = 'add_1' and 'tf.__operators__.add' - layers.Add(name='add_1') - layers.Add(name='tf.__operators__.add') - - inputs = input_layer.Input(shape=[2]) - add1 = inputs + inputs - add2 = layers.Add()([inputs, inputs]) - add3 = inputs + inputs - add4 = layers.Add()([inputs, inputs]) - model = training_lib.Model( - inputs=[inputs], outputs=[add1, add2, add3, add4]) - actual_names = [l.name for l in model.layers] - # The generated op layer names should have avoided layer names seen in - # the loaded model. (This avoiance should not apply to non-op-layers) - expected_names = [ - 'input_1', 'tf.__operators__.add_1', - 'add', 'tf.__operators__.add_2', 'add_1' - ] - self.assertAllEqual(actual_names, expected_names) - - def test_add_trainable_weight_on_frozen_layer(self): - - class TestLayer(base_layer.Layer): - - def build(self, input_shape): - self.w = self.add_weight(shape=(), trainable=True) - - def call(self, inputs): - return self.w * inputs - - layer = TestLayer() - layer.trainable = False - layer.build(None) - layer.trainable = True - self.assertListEqual(layer.trainable_weights, [layer.w]) - - @test_combinations.generate( - test_combinations.times( - test_combinations.keras_mode_combinations(), - test_combinations.keras_model_type_combinations())) - def test_passing_initial_weights_values(self): - kernel_value = np.random.random((10, 2)) - layer_with_weights = layers.Dense(2, use_bias=False, weights=[kernel_value]) - - model = test_utils.get_model_from_layers([layer_with_weights], - input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - inputs = np.random.random((3, 10)) - out = model.predict(inputs) - self.assertAllClose(model.layers[-1].get_weights()[0], kernel_value) - self.assertAllClose(out, np.dot(inputs, kernel_value)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_set_weights_and_get_weights(self): - layer = layers.Dense(2) - layer.build((None, 10)) - kernel = np.random.random((10, 2)) - bias = np.random.random((2,)) - layer.set_weights([kernel, bias]) - weights = layer.get_weights() - self.assertEqual(len(weights), 2) - self.assertAllClose(weights[0], kernel) - self.assertAllClose(weights[1], bias) - with self.assertRaisesRegex(ValueError, - 'but the layer was expecting 2 weights'): - layer.set_weights([1, 2, 3]) - with self.assertRaisesRegex(ValueError, - 'not compatible with provided weight shape'): - layer.set_weights([kernel.T, bias]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_set_weights_accepts_output_of_get_weights(self): - layer = layers.Layer() - layer.add_weight(name='scalar_float', shape=(), dtype=tf.float32) - layer.add_weight(name='scalar_string', shape=(), dtype=tf.string, - initializer=lambda *a, **k: 'abc') - layer.add_weight(name='vector_float', shape=(3,), dtype=tf.float32) - layer.add_weight(name='vector_string', shape=(2,), dtype=tf.string, - initializer=lambda *a, **k: 2 * ['abc']) - layer.set_weights(layer.get_weights()) - - def test_get_config_error(self): - - class MyLayer(base_layer.Layer): - - def __init__(self, my_kwarg='default', **kwargs): - super().__init__(**kwargs) - self.my_kwarg = my_kwarg - - # `__init__` includes kwargs but `get_config` is not overridden, so - # an error should be thrown: - with self.assertRaisesRegex(NotImplementedError, 'Layer MyLayer has'): - MyLayer('custom').get_config() - - class MyLayerNew(base_layer.Layer): - - def __init__(self, my_kwarg='default', **kwargs): - super().__init__(**kwargs) - self.my_kwarg = my_kwarg - - def get_config(self): - config = super().get_config() - config['my_kwarg'] = self.my_kwarg - return config - - # Test to make sure that error is not raised if the method call is - # from an overridden `get_config`: - self.assertEqual(MyLayerNew('custom').get_config()['my_kwarg'], 'custom') - - class MyLayerNew2(base_layer.Layer): - - def __init__(self, name='MyLayerName', dtype=None, **kwargs): # pylint:disable=redefined-outer-name - super().__init__(name=name, dtype=dtype, **kwargs) - - # Check that if the kwargs in `__init__` are base layer constructor - # arguments, no error is thrown: - self.assertEqual(MyLayerNew2(name='New').get_config()['name'], 'New') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_count_params(self): - dense = layers.Dense(16) - dense.build((None, 4)) - self.assertEqual(dense.count_params(), 16 * 4 + 16) - - dense = layers.Dense(16) - with self.assertRaisesRegex(ValueError, 'call `count_params`'): - dense.count_params() - - model = sequential.Sequential(layers.Dense(16)) - with self.assertRaisesRegex(ValueError, 'call `count_params`'): - model.count_params() - - dense = layers.Dense(16, input_dim=4) - model = sequential.Sequential(dense) - self.assertEqual(model.count_params(), 16 * 4 + 16) - - def test_super_not_called(self): - - class CustomLayerNotCallingSuper(base_layer.Layer): - - def __init__(self): - pass - - layer = CustomLayerNotCallingSuper() - with self.assertRaisesRegex(RuntimeError, 'You must call `super()'): - layer(np.random.random((10, 2))) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_first_arg_not_called_inputs(self): - x, y = tf.ones((10, 1)), tf.ones((10, 1)) - - class ArgLayer(base_layer.Layer): - - def call(self, x, y): - return x + y - - layer = ArgLayer() - out = self.evaluate(layer(x=x, y=y)) - self.assertAllClose(out, 2 * np.ones((10, 1))) - - class KwargLayer(base_layer.Layer): - - def call(self, x=None, y=None): - return x + y - - layer = KwargLayer() - out = self.evaluate(layer(x=x, y=y)) - self.assertAllClose(out, 2 * np.ones((10, 1))) - - with self.assertRaisesRegex(ValueError, 'must always be passed'): - layer(y=y) - - class TFFunctionLayer(base_layer.Layer): - - @tf.function - def call(self, x, y=None): - if y is None: - return x - return x + y - - layer = TFFunctionLayer() - out = self.evaluate(layer(x=x, y=y)) - self.assertAllClose(out, 2 * np.ones((10, 1))) - - def test_build_input_shape(self): - - class CustomLayer(base_layer.Layer): - - def build(self, input_shape): - self.add_weight('w', shape=input_shape[1:]) - super().build(input_shape) - - layer = CustomLayer() - self.assertFalse(layer.built) - - layer.build([None, 1, 2, 3]) - self.assertTrue(layer.built) - self.assertEqual([None, 1, 2, 3], layer._build_input_shape) - - layer = CustomLayer() - layer(input_layer.Input((3,))) - self.assertTrue(layer.built) - self.assertEqual([None, 3], layer._build_input_shape.as_list()) - - def test_build_input_shape_list_with_none(self): - - class CustomLayer(base_layer.Layer): - - def build(self, input_shape): - super().build(input_shape) - self.build_shape = input_shape - - def call(self, inputs): - return inputs[0] - - layer = CustomLayer() - layer([tf.constant([1.0]), None, tf.constant([2.0])]) - self.assertEqual(layer.build_shape, [[1], None, [1]]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_layer_input_shape_raises_error(self): - layer = layers.Dense(3) - with self.assertRaisesRegex(AttributeError, 'no defined input shape'): - _ = layer.input_shape - - layer(tf.ones((10, 1))) - with self.assertRaisesRegex(AttributeError, 'no defined input shape'): - _ = layer.input_shape - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_custom_layer_training_arg(self): - class CustomLayerNoTrainingArg(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs): - return self._nested_layer(inputs) - - class CustomLayerDefaultTrainingMissing(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, training): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - class CustomLayerDefaultTrainingNone(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, training=None): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - class CustomLayerDefaultTrainingFalse(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, training=False): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - class CustomLayerDefaultTrainingTrue(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, training=True): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - self._test_custom_layer_training_arg( - CustomLayerNoTrainingArg=CustomLayerNoTrainingArg, - CustomLayerDefaultTrainingMissing=CustomLayerDefaultTrainingMissing, - CustomLayerDefaultTrainingNone=CustomLayerDefaultTrainingNone, - CustomLayerDefaultTrainingFalse=CustomLayerDefaultTrainingFalse, - CustomLayerDefaultTrainingTrue=CustomLayerDefaultTrainingTrue) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_custom_layer_training_arg_kwargonly(self): - class CustomLayerNoTrainingArg(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs): - return self._nested_layer(inputs) - - class CustomLayerDefaultTrainingMissing(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, *, training): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - class CustomLayerDefaultTrainingNone(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, *, training=None): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - class CustomLayerDefaultTrainingFalse(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, *, training=False): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - class CustomLayerDefaultTrainingTrue(base_layer.Layer): - - def __init__(self, nested_layer=None): - super().__init__() - self._nested_layer = nested_layer or tf.identity - - def call(self, inputs, *, training=True): - if training: - return self._nested_layer(inputs) - else: - return self._nested_layer(inputs) * 0.5 - - self._test_custom_layer_training_arg( - CustomLayerNoTrainingArg=CustomLayerNoTrainingArg, - CustomLayerDefaultTrainingMissing=CustomLayerDefaultTrainingMissing, - CustomLayerDefaultTrainingNone=CustomLayerDefaultTrainingNone, - CustomLayerDefaultTrainingFalse=CustomLayerDefaultTrainingFalse, - CustomLayerDefaultTrainingTrue=CustomLayerDefaultTrainingTrue) - - def _test_custom_layer_training_arg(self, - # pylint: disable=invalid-name - CustomLayerNoTrainingArg, - CustomLayerDefaultTrainingMissing, - CustomLayerDefaultTrainingNone, - CustomLayerDefaultTrainingFalse, - CustomLayerDefaultTrainingTrue, - # pylint: enable=invalid-name - ): - x = tf.ones(shape=(1, 1)) - - # If the layer signature doesn't specify a default training arg, - # run it in inference mode when to training arg is passed - # to __call__ - layer = CustomLayerDefaultTrainingMissing() - self.assertAllEqual(layer(x), x * 0.5) - self.assertAllEqual(layer(x, training=False), x * 0.5) - self.assertAllEqual(layer(x, training=True), x) - - # If the layer signature specifies `False` as the default training arg, - # run it in inference mode when no training arg is passed - # to __call__ - layer = CustomLayerDefaultTrainingFalse() - self.assertAllEqual(layer(x), x * 0.5) - self.assertAllEqual(layer(x, training=False), x * 0.5) - self.assertAllEqual(layer(x, training=True), x) - - # If the layer signature specifies `True` as the default training arg, - # explicitly run it in training mode when no training arg is passed - # to __call__ - layer = CustomLayerDefaultTrainingTrue() - self.assertAllEqual(layer(x), x) - self.assertAllEqual(layer(x, training=False), x * 0.5) - self.assertAllEqual(layer(x, training=True), x) - - # Outer layers/models should set the training context implicitly for all - # nested layers, respecting whatever mode the outer layer was run with. - layer = CustomLayerDefaultTrainingTrue(CustomLayerDefaultTrainingFalse()) - # No outer value passed: use local defaults - self.assertAllEqual(layer(x), x) # Use outer default True - # Outer value passed: override local defaults - self.assertAllEqual(layer(x, training=False), x * 0.25) - self.assertAllEqual(layer(x, training=True), x) - - layer = CustomLayerDefaultTrainingFalse(CustomLayerDefaultTrainingTrue()) - # No outer value passed: use local defaults - self.assertAllEqual(layer(x), x * 0.25) # Use outer default False - # Outer value passed: override local defaults - self.assertAllEqual(layer(x, training=False), x * 0.25) - self.assertAllEqual(layer(x, training=True), x) - - # If the outer layer `call` doesn't take a training argument at all, - # it'll set the nested scope as None when no training arg is passed in. - # If a training arg is passed in it won't use it directly in `call`, but - # it will set the nested training mode. - layer = CustomLayerNoTrainingArg(CustomLayerDefaultTrainingTrue()) - self.assertAllEqual(layer(x), x) # Use local default True - self.assertAllEqual(layer(x, training=False), x * 0.5) - self.assertAllEqual(layer(x, training=True), x) - - layer = CustomLayerDefaultTrainingNone(CustomLayerDefaultTrainingTrue()) - self.assertAllEqual(layer(x), x * 0.5) # Nested use local default True - self.assertAllEqual(layer(x, training=False), x * 0.25) - self.assertAllEqual(layer(x, training=True), x) - - def test_activity_regularizer_string(self): - - class MyLayer(base_layer.Layer): - pass - - layer = MyLayer(activity_regularizer='l2') - self.assertIsInstance(layer.activity_regularizer, regularizers.L2) - - def test_tf_module_tracking(self): - - class MyModule(tf.Module): - - def __init__(self): - super().__init__() - self.v1 = tf.Variable(1., trainable=True, name='v1') - self.v2 = tf.Variable(2., trainable=False, name='v2') - - def __call__(self, x): - return x * self.v1 * self.v2 - - class MyLayer(base_layer.Layer): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.my_modules = {} - self.my_modules['a'] = MyModule() - - def call(self, x): - return self.my_modules['a'](x) - - layer = MyLayer() - self.assertLen(layer.variables, 2) - self.assertLen(layer.trainable_variables, 1) - self.assertLen(layer.non_trainable_variables, 1) - - layer.trainable = False - self.assertLen(layer.variables, 2) - self.assertLen(layer.trainable_variables, 0) - self.assertLen(layer.non_trainable_variables, 2) - - class MyModel(training_lib.Model): - - def __init__(self): - super().__init__() - self.my_modules = [] - self.my_modules.append(MyModule()) - - def call(self, x): - return self.my_modules[0](x) - - model = MyModel() - self.assertLen(model.variables, 2) - self.assertLen(model.trainable_variables, 1) - self.assertLen(model.non_trainable_variables, 1) - - model.trainable = False - self.assertLen(model.variables, 2) - self.assertLen(model.trainable_variables, 0) - self.assertLen(model.non_trainable_variables, 2) + # Cannot access tensor.name in eager execution. + self.assertIn("Variable_2/Regularizer", layer.losses[0].name) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_add_weight_by_getter(self): + layer = base_layer.Layer() + variable = tf.Variable("abc") + added = layer.add_weight( + dtype=tf.string, getter=lambda *_, **__: variable + ) + self.assertIs(variable, added) + + def test_variable_resetting(self): + dense = layers.Dense(1) + dense.build([8, 2]) + + self.assertIs(dense.trainable_variables[0], dense.kernel) + self.assertIs(dense.trainable_variables[1], dense.bias) + + # when we reset the variable to another instance, make sure the ordering + # of the variable in the trainable_variables doesn't change. + # This is important for h5 saving/loading. + dense.bias = tf.Variable(initial_value=tf.zeros(shape=(1,))) + dense.kernel = tf.Variable(initial_value=tf.zeros(shape=(2, 1))) + + self.assertIs(dense.trainable_variables[0], dense.kernel) + self.assertIs(dense.trainable_variables[1], dense.bias) + + @test_combinations.generate( + test_combinations.keras_mode_combinations(mode=["eager"]) + ) + def test_learning_phase_freezing_for_layers(self): + class LearningPhaseLayer(base_layer.Layer): + def call(self, inputs): + return backend.in_train_phase( + lambda: tf.ones_like(inputs), lambda: tf.zeros_like(inputs) + ) + + def get_learning_phase_value(): + model = sequential.Sequential( + [LearningPhaseLayer(input_shape=(1,))] + ) + model._run_eagerly = test_utils.should_run_eagerly() + return np.sum(model(np.ones((1, 1)))) + + self.assertEqual(get_learning_phase_value(), 0) + + # Test scope. + with backend.learning_phase_scope(1): + self.assertEqual(get_learning_phase_value(), 1) + + # The effects of the scope end after exiting it. + self.assertEqual(get_learning_phase_value(), 0) + + # Test setting. + backend.set_learning_phase(1) + self.assertEqual(get_learning_phase_value(), 1) + backend.set_learning_phase(0) + self.assertEqual(get_learning_phase_value(), 0) + + # Cannot be enabled with `run_eagerly=True`, see b/123904578 + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_layer_can_return_variable(self): + class ComputeSum(base_layer.Layer): + def __init__(self): + super().__init__() + self.total = tf.Variable( + initial_value=tf.zeros((1, 1)), trainable=False + ) + if not tf.executing_eagerly(): + backend.get_session().run(self.total.initializer) + + def call(self, inputs): + self.total.assign_add(inputs) + return self.total + + inputs = input_layer.Input(shape=(1,)) + model = training_lib.Model(inputs, ComputeSum()(inputs)) + model.predict(np.ones((1, 1))) + + def _get_layer_with_training_arg(self): + class TrainingLayer(base_layer.Layer): + """A layer with a `training` argument in a defuned `call`.""" + + @tf.function + def call(self, inputs, training=None): + if training is None: + training = backend.learning_phase() + return control_flow_util.smart_cond( + training, + lambda: tf.ones_like(inputs), + lambda: tf.zeros_like(inputs), + ) + + return TrainingLayer() + + # b/124459427: can't test with `run_eagerly=True` for now. + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_mode_combinations(), + test_combinations.keras_model_type_combinations(), + ) + ) + def test_training_arg_in_defun(self): + layer = self._get_layer_with_training_arg() + model = test_utils.get_model_from_layers([layer], input_shape=(1,)) + model.compile(rmsprop.RMSprop(0.0), loss="mae") + history = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(history.history["loss"][0], 1.0) + loss = model.evaluate(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(loss, 0.0) + + # Test that the argument injection performed in `call` is not active + # when the argument is passed explicitly. + layer = self._get_layer_with_training_arg() + inputs = input_layer.Input(shape=(1,)) + # Pass `training` by name + outputs = layer(inputs, training=False) + model = training_lib.Model(inputs, outputs) + model.compile(rmsprop.RMSprop(0.0), loss="mae") + history = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_mode_combinations(), + test_combinations.keras_model_type_combinations(), + ) + ) + def test_raw_variable_assignment(self): + class RawVariableLayer(base_layer.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + # Test variables in nested structure. + self.var_list = [tf.Variable(1.0), {"a": tf.Variable(2.0)}] + + def call(self, inputs): + return inputs * self.var_list[0] * self.var_list[1]["a"] + + model = test_utils.get_model_from_layers( + [RawVariableLayer()], input_shape=(10,) + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x, y = np.ones((10, 10)), np.ones((10, 10)) + # Checks that variables get initialized. + model.fit(x, y, batch_size=2, epochs=2) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_composite_variable_assignment(self): + class Spec(tf.TypeSpec): + + value_type = property(lambda self: CompositeVariable) + + def _component_specs(self): + pass + + def _serialize(self): + pass + + def _to_components(self, value): + return value._variables + + def _from_components(self, variable_list): + return CompositeVariable(variable_list) + + class CompositeVariable(tf.__internal__.CompositeTensor): + def __init__(self, variable_list): + self._variables = variable_list + + @property + def _type_spec(self): + return Spec() + + class CompositeVariableLayer(base_layer.Layer): + def __init__(self): + super().__init__() + self.composite_var = CompositeVariable( + [tf.Variable(1.0), tf.Variable(2.0)] + ) + + layer = CompositeVariableLayer() + self.assertLen(layer.weights, 2) + self.assertIsInstance(layer.weights[0], tf.Variable) + self.assertIsInstance(layer.weights[1], tf.Variable) + self.assertEqual(self.evaluate(layer.weights[0]), 1.0) + self.assertEqual(self.evaluate(layer.weights[1]), 2.0) + + def test_exception_if_trainable_not_boolean(self): + base_layer.Layer(trainable=True) + base_layer.Layer(trainable=tf.constant(True)) + base_layer.Layer(trainable=tf.Variable(tf.constant(True))) + with self.assertRaisesRegex( + TypeError, "Expected `trainable` argument to be a boolean" + ): + base_layer.Layer(trainable=0) + + def test_exception_if_dynamic_not_boolean(self): + base_layer.Layer(dynamic=True) + with self.assertRaisesRegex( + TypeError, "Expected `dynamic` argument to be a boolean" + ): + base_layer.Layer(dynamic=0) + + def test_exception_if_name_not_string_or_none(self): + base_layer.Layer(name=None) + base_layer.Layer(name="layer_name") + with self.assertRaisesRegex( + TypeError, "Expected `name` argument to be a string" + ): + base_layer.Layer(name=0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_layer_names(self): + inputs = input_layer.Input(shape=[2]) + add1 = inputs + inputs + add2 = layers.Add()([inputs, inputs]) + add3 = inputs + inputs + add4 = layers.Add()([inputs, inputs]) + model = training_lib.Model( + inputs=[inputs], outputs=[add1, add2, add3, add4] + ) + actual_names = [l.name for l in model.layers] + graph_names = [ + "input_1", + "tf_op_layer_add", + "add", + "tf_op_layer_add_2", + "add_1", + ] + eager_names = [ + "input_1", + "tf.__operators__.add", + "add", + "tf.__operators__.add_1", + "add_1", + ] + for actual, eager, graph in zip(actual_names, graph_names, eager_names): + self.assertIn(actual, {eager, graph}) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_layer_names_after_loading(self): + backend.clear_session() + # Mimic loading a model that already contained add layers with + # name = 'add_1' and 'tf.__operators__.add' + layers.Add(name="add_1") + layers.Add(name="tf.__operators__.add") + + inputs = input_layer.Input(shape=[2]) + add1 = inputs + inputs + add2 = layers.Add()([inputs, inputs]) + add3 = inputs + inputs + add4 = layers.Add()([inputs, inputs]) + model = training_lib.Model( + inputs=[inputs], outputs=[add1, add2, add3, add4] + ) + actual_names = [l.name for l in model.layers] + # The generated op layer names should have avoided layer names seen in + # the loaded model. (This avoiance should not apply to non-op-layers) + expected_names = [ + "input_1", + "tf.__operators__.add_1", + "add", + "tf.__operators__.add_2", + "add_1", + ] + self.assertAllEqual(actual_names, expected_names) + + def test_add_trainable_weight_on_frozen_layer(self): + class TestLayer(base_layer.Layer): + def build(self, input_shape): + self.w = self.add_weight(shape=(), trainable=True) + + def call(self, inputs): + return self.w * inputs + + layer = TestLayer() + layer.trainable = False + layer.build(None) + layer.trainable = True + self.assertListEqual(layer.trainable_weights, [layer.w]) + + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_mode_combinations(), + test_combinations.keras_model_type_combinations(), + ) + ) + def test_passing_initial_weights_values(self): + kernel_value = np.random.random((10, 2)) + layer_with_weights = layers.Dense( + 2, use_bias=False, weights=[kernel_value] + ) + + model = test_utils.get_model_from_layers( + [layer_with_weights], input_shape=(10,) + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + inputs = np.random.random((3, 10)) + out = model.predict(inputs) + self.assertAllClose(model.layers[-1].get_weights()[0], kernel_value) + self.assertAllClose(out, np.dot(inputs, kernel_value)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_set_weights_and_get_weights(self): + layer = layers.Dense(2) + layer.build((None, 10)) + kernel = np.random.random((10, 2)) + bias = np.random.random((2,)) + layer.set_weights([kernel, bias]) + weights = layer.get_weights() + self.assertEqual(len(weights), 2) + self.assertAllClose(weights[0], kernel) + self.assertAllClose(weights[1], bias) + with self.assertRaisesRegex( + ValueError, "but the layer was expecting 2 weights" + ): + layer.set_weights([1, 2, 3]) + with self.assertRaisesRegex( + ValueError, "not compatible with provided weight shape" + ): + layer.set_weights([kernel.T, bias]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_set_weights_accepts_output_of_get_weights(self): + layer = layers.Layer() + layer.add_weight(name="scalar_float", shape=(), dtype=tf.float32) + layer.add_weight( + name="scalar_string", + shape=(), + dtype=tf.string, + initializer=lambda *a, **k: "abc", + ) + layer.add_weight(name="vector_float", shape=(3,), dtype=tf.float32) + layer.add_weight( + name="vector_string", + shape=(2,), + dtype=tf.string, + initializer=lambda *a, **k: 2 * ["abc"], + ) + layer.set_weights(layer.get_weights()) + + def test_get_config_error(self): + class MyLayer(base_layer.Layer): + def __init__(self, my_kwarg="default", **kwargs): + super().__init__(**kwargs) + self.my_kwarg = my_kwarg + + # `__init__` includes kwargs but `get_config` is not overridden, so + # an error should be thrown: + with self.assertRaisesRegex( + NotImplementedError, "Layer MyLayer was created by" + ): + # We pass bytes because it's non-serializable and thus + # will not be handled by the auto-get_config + MyLayer(b"custom").get_config() + + class MyLayerNew(base_layer.Layer): + def __init__(self, my_kwarg="default", **kwargs): + super().__init__(**kwargs) + self.my_kwarg = my_kwarg + + def get_config(self): + config = super().get_config() + config["my_kwarg"] = self.my_kwarg + return config + + # Test to make sure that error is not raised if the method call is + # from an overridden `get_config`: + self.assertEqual( + MyLayerNew("custom").get_config()["my_kwarg"], "custom" + ) + + class MyLayerNew2(base_layer.Layer): + def __init__(self, name="MyLayerName", dtype=None, **kwargs): + super().__init__(name=name, dtype=dtype, **kwargs) + + # Check that if the kwargs in `__init__` are base layer constructor + # arguments, no error is thrown: + self.assertEqual(MyLayerNew2(name="New").get_config()["name"], "New") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_count_params(self): + dense = layers.Dense(16) + dense.build((None, 4)) + self.assertEqual(dense.count_params(), 16 * 4 + 16) + + dense = layers.Dense(16) + with self.assertRaisesRegex(ValueError, "call `count_params`"): + dense.count_params() + + model = sequential.Sequential(layers.Dense(16)) + with self.assertRaisesRegex(ValueError, "call `count_params`"): + model.count_params() + + dense = layers.Dense(16, input_dim=4) + model = sequential.Sequential(dense) + self.assertEqual(model.count_params(), 16 * 4 + 16) + + def test_super_not_called(self): + class CustomLayerNotCallingSuper(base_layer.Layer): + def __init__(self): + pass + + layer = CustomLayerNotCallingSuper() + with self.assertRaisesRegex(RuntimeError, "You must call `super()"): + layer(np.random.random((10, 2))) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_first_arg_not_called_inputs(self): + x, y = tf.ones((10, 1)), tf.ones((10, 1)) + + class ArgLayer(base_layer.Layer): + def call(self, x, y): + return x + y + + layer = ArgLayer() + out = self.evaluate(layer(x=x, y=y)) + self.assertAllClose(out, 2 * np.ones((10, 1))) + + class KwargLayer(base_layer.Layer): + def call(self, x=None, y=None): + return x + y + + layer = KwargLayer() + out = self.evaluate(layer(x=x, y=y)) + self.assertAllClose(out, 2 * np.ones((10, 1))) + + with self.assertRaisesRegex(ValueError, "must always be passed"): + layer(y=y) + + class TFFunctionLayer(base_layer.Layer): + @tf.function + def call(self, x, y=None): + if y is None: + return x + return x + y + + layer = TFFunctionLayer() + out = self.evaluate(layer(x=x, y=y)) + self.assertAllClose(out, 2 * np.ones((10, 1))) + + def test_build_input_shape(self): + class CustomLayer(base_layer.Layer): + def build(self, input_shape): + self.add_weight("w", shape=input_shape[1:]) + super().build(input_shape) + + layer = CustomLayer() + self.assertFalse(layer.built) + + layer.build([None, 1, 2, 3]) + self.assertTrue(layer.built) + self.assertEqual([None, 1, 2, 3], layer._build_input_shape) + + layer = CustomLayer() + layer(input_layer.Input((3,))) + self.assertTrue(layer.built) + self.assertEqual([None, 3], layer._build_input_shape.as_list()) + + def test_build_input_shape_list_with_none(self): + class CustomLayer(base_layer.Layer): + def build(self, input_shape): + super().build(input_shape) + self.build_shape = input_shape + + def call(self, inputs): + return inputs[0] + + layer = CustomLayer() + layer([tf.constant([1.0]), None, tf.constant([2.0])]) + self.assertEqual(layer.build_shape, [[1], None, [1]]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_layer_input_shape_raises_error(self): + layer = layers.Dense(3) + with self.assertRaisesRegex(AttributeError, "no defined input shape"): + _ = layer.input_shape + + layer(tf.ones((10, 1))) + with self.assertRaisesRegex(AttributeError, "no defined input shape"): + _ = layer.input_shape + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_custom_layer_training_arg(self): + class CustomLayerNoTrainingArg(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs): + return self._nested_layer(inputs) + + class CustomLayerDefaultTrainingMissing(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, training): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + class CustomLayerDefaultTrainingNone(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, training=None): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + class CustomLayerDefaultTrainingFalse(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, training=False): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + class CustomLayerDefaultTrainingTrue(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, training=True): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + self._test_custom_layer_training_arg( + CustomLayerNoTrainingArg=CustomLayerNoTrainingArg, + CustomLayerDefaultTrainingMissing=CustomLayerDefaultTrainingMissing, + CustomLayerDefaultTrainingNone=CustomLayerDefaultTrainingNone, + CustomLayerDefaultTrainingFalse=CustomLayerDefaultTrainingFalse, + CustomLayerDefaultTrainingTrue=CustomLayerDefaultTrainingTrue, + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_custom_layer_training_arg_kwargonly(self): + class CustomLayerNoTrainingArg(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs): + return self._nested_layer(inputs) + + class CustomLayerDefaultTrainingMissing(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, *, training): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + class CustomLayerDefaultTrainingNone(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, *, training=None): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + class CustomLayerDefaultTrainingFalse(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, *, training=False): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + class CustomLayerDefaultTrainingTrue(base_layer.Layer): + def __init__(self, nested_layer=None): + super().__init__() + self._nested_layer = nested_layer or tf.identity + + def call(self, inputs, *, training=True): + if training: + return self._nested_layer(inputs) + else: + return self._nested_layer(inputs) * 0.5 + + self._test_custom_layer_training_arg( + CustomLayerNoTrainingArg=CustomLayerNoTrainingArg, + CustomLayerDefaultTrainingMissing=CustomLayerDefaultTrainingMissing, + CustomLayerDefaultTrainingNone=CustomLayerDefaultTrainingNone, + CustomLayerDefaultTrainingFalse=CustomLayerDefaultTrainingFalse, + CustomLayerDefaultTrainingTrue=CustomLayerDefaultTrainingTrue, + ) + + def _test_custom_layer_training_arg( + self, + CustomLayerNoTrainingArg, + CustomLayerDefaultTrainingMissing, + CustomLayerDefaultTrainingNone, + CustomLayerDefaultTrainingFalse, + CustomLayerDefaultTrainingTrue, + ): + x = tf.ones(shape=(1, 1)) + + # If the layer signature doesn't specify a default training arg, + # run it in inference mode when to training arg is passed + # to __call__ + layer = CustomLayerDefaultTrainingMissing() + self.assertAllEqual(layer(x), x * 0.5) + self.assertAllEqual(layer(x, training=False), x * 0.5) + self.assertAllEqual(layer(x, training=True), x) + + # If the layer signature specifies `False` as the default training arg, + # run it in inference mode when no training arg is passed + # to __call__ + layer = CustomLayerDefaultTrainingFalse() + self.assertAllEqual(layer(x), x * 0.5) + self.assertAllEqual(layer(x, training=False), x * 0.5) + self.assertAllEqual(layer(x, training=True), x) + + # If the layer signature specifies `True` as the default training arg, + # explicitly run it in training mode when no training arg is passed + # to __call__ + layer = CustomLayerDefaultTrainingTrue() + self.assertAllEqual(layer(x), x) + self.assertAllEqual(layer(x, training=False), x * 0.5) + self.assertAllEqual(layer(x, training=True), x) + + # Outer layers/models should set the training context implicitly for all + # nested layers, respecting whatever mode the outer layer was run with. + layer = CustomLayerDefaultTrainingTrue( + CustomLayerDefaultTrainingFalse() + ) + # No outer value passed: use local defaults + self.assertAllEqual(layer(x), x) # Use outer default True + # Outer value passed: override local defaults + self.assertAllEqual(layer(x, training=False), x * 0.25) + self.assertAllEqual(layer(x, training=True), x) + + layer = CustomLayerDefaultTrainingFalse( + CustomLayerDefaultTrainingTrue() + ) + # No outer value passed: use local defaults + self.assertAllEqual(layer(x), x * 0.25) # Use outer default False + # Outer value passed: override local defaults + self.assertAllEqual(layer(x, training=False), x * 0.25) + self.assertAllEqual(layer(x, training=True), x) + + # If the outer layer `call` doesn't take a training argument at all, + # it'll set the nested scope as None when no training arg is passed in. + # If a training arg is passed in it won't use it directly in `call`, but + # it will set the nested training mode. + layer = CustomLayerNoTrainingArg(CustomLayerDefaultTrainingTrue()) + self.assertAllEqual(layer(x), x) # Use local default True + self.assertAllEqual(layer(x, training=False), x * 0.5) + self.assertAllEqual(layer(x, training=True), x) + + layer = CustomLayerDefaultTrainingNone(CustomLayerDefaultTrainingTrue()) + self.assertAllEqual(layer(x), x * 0.5) # Nested use local default True + self.assertAllEqual(layer(x, training=False), x * 0.25) + self.assertAllEqual(layer(x, training=True), x) + + def test_activity_regularizer_string(self): + class MyLayer(base_layer.Layer): + pass + + layer = MyLayer(activity_regularizer="l2") + self.assertIsInstance(layer.activity_regularizer, regularizers.L2) + + def test_tf_module_tracking(self): + class MyModule(tf.Module): + def __init__(self): + super().__init__() + self.v1 = tf.Variable(1.0, trainable=True, name="v1") + self.v2 = tf.Variable(2.0, trainable=False, name="v2") + + def __call__(self, x): + return x * self.v1 * self.v2 + + class MyLayer(base_layer.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.my_modules = {} + self.my_modules["a"] = MyModule() + + def call(self, x): + return self.my_modules["a"](x) + + layer = MyLayer() + self.assertLen(layer.variables, 2) + self.assertLen(layer.trainable_variables, 1) + self.assertLen(layer.non_trainable_variables, 1) + + layer.trainable = False + self.assertLen(layer.variables, 2) + self.assertLen(layer.trainable_variables, 0) + self.assertLen(layer.non_trainable_variables, 2) + + class MyModel(training_lib.Model): + def __init__(self): + super().__init__() + self.my_modules = [] + self.my_modules.append(MyModule()) + + def call(self, x): + return self.my_modules[0](x) + + model = MyModel() + self.assertLen(model.variables, 2) + self.assertLen(model.trainable_variables, 1) + self.assertLen(model.non_trainable_variables, 1) + + model.trainable = False + self.assertLen(model.variables, 2) + self.assertLen(model.trainable_variables, 0) + self.assertLen(model.non_trainable_variables, 2) + + def test_tf_tracking_lists(self): + class MyLayer(base_layer.Layer): + def __init__(self, num_weights): + super().__init__() + self.num_weights = num_weights + + def build(self, input_shape): + super().build(input_shape) + self.my_weights = [] + w_init = tf.random_normal_initializer() + for i in range(self.num_weights): + self.my_weights.append( + tf.Variable( + name=f"w_{i}", + initial_value=w_init( + shape=(input_shape[1], input_shape[1]), + dtype="float32", + ), + trainable=True, + ) + ) + + def call(self, x): + for w in self.my_weights: + x = tf.matmul(x, w) + return x + + layer = MyLayer(3) + layer(tf.constant([[1.0, 1.0, 1.0, 1.0]])) + self.assertLen(layer.variables, 3) + self.assertLen(layer.trainable_variables, 3) + self.assertLen(layer.non_trainable_variables, 0) + + layer.trainable = False + self.assertLen(layer.variables, 3) + self.assertLen(layer.trainable_variables, 0) + self.assertLen(layer.non_trainable_variables, 3) + + def test_auto_get_config(self): + class MyLayer(base_layer.Layer): + def __init__(self, var1, var2, var3=None, **kwargs): + super().__init__(**kwargs) + + layer = MyLayer("a", 2, var3=True, name="mylayer") + config = layer.get_config() + self.assertLen(config, 6) + self.assertEqual(config["var1"], "a") + self.assertEqual(config["var2"], 2) + self.assertEqual(config["var3"], True) + self.assertEqual(config["name"], "mylayer") + self.assertEqual(config["trainable"], True) + self.assertEqual(config["dtype"], "float32") + layer = MyLayer.from_config(config) + self.assertDictEqual(layer.get_config(), config) + + layer = MyLayer("a", 2, var3=tf.nn.relu) + with self.assertRaises(NotImplementedError): + config = layer.get_config() @test_utils.run_v2_only class SymbolicSupportTest(test_combinations.TestCase): - - def test_using_symbolic_tensors_with_tf_ops(self): - # Single-input. - x = input_layer.Input((3,)) - tf.square(x) - - # Multi-inputs. - x1, x2 = input_layer.Input((3,)), input_layer.Input((3,)) - tf.concat([x1, x2], axis=1) - - # Mixing Keras symbolic tensors and graph tensors from the same graph works. - with backend.get_graph().as_default(): - x1 = input_layer.Input((3,)) - x2 = input_layer.Input((3,)) - tf.matmul(x1, x2) - - # Creating same op type (matmul) multiple times in the Keras graph works. - x1 = input_layer.Input((3,)) - x2 = input_layer.Input((3,)) - tf.matmul(x1, x2) - - def test_mixing_eager_and_graph_tensors(self): - with tf.Graph().as_default(): - x1 = tf.ones((3, 3)) - x2 = tf.ones((3, 3)) - with self.assertRaises(TypeError): - tf.matmul(x1, x2) - - def test_mixing_numpy_arrays_and_graph_tensors(self): - with tf.Graph().as_default(): - x1 = tf.ones((3, 3)) - x2 = np.ones((3, 3), dtype='float32') - with self.assertRaises(TypeError): - tf.matmul(x1, x2) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_mixing_keras_symbolic_tensors_and_eager_tensors(self): - x1 = input_layer.Input((3,)) - x2 = tf.ones((3, 3)) - y = tf.matmul(x1, x2) - - fn = backend.function(inputs=[x1], outputs=[y]) - x_val = np.random.random((3, 3)) - y_val = np.ones((3, 3)) - self.assertAllClose(fn([x_val])[0], - np.matmul(x_val, y_val), - atol=1e-5) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self): - x1 = input_layer.Input((3,)) - x2 = np.ones((3, 3), dtype='float32') - y = tf.matmul(x1, x2) - - fn = backend.function(inputs=[x1], outputs=[y]) - x_val = np.random.random((3, 3)) - y_val = np.ones((3, 3)) - self.assertAllClose(fn([x_val])[0], - np.matmul(x_val, y_val), - atol=1e-5) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_reraising_exception(self): - # When layer is not dynamic, we have some pattern matching during exception - # handling to detect when the user is trying to use python control flow. - # When an exception is thrown but the pattern doesn't match, we want to - # preserve the originating stack trace. An early implementation of this - # logic lost the stack trace. We test the correct behavior here. - - class TypeErrorLayer(base_layer.Layer): - - def call(self, inputs): - def easily_identifiable_name(): - raise TypeError('Non-matching TypeError message.') - easily_identifiable_name() - - inputs = input_layer.Input((3,)) - - try: - _ = TypeErrorLayer()(inputs) - except TypeError as e: - self.assertIn('easily_identifiable_name', str(e)) # pylint: disable=g-assert-in-except - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_summaries_in_tf_function(self): - if not tf.executing_eagerly(): - return - - class MyLayer(base_layer.Layer): - - def call(self, inputs): - tf.summary.scalar('mean', tf.reduce_mean(inputs)) - return inputs - - tmp_dir = self.get_temp_dir() - writer = tf.summary.create_file_writer(tmp_dir) - with writer.as_default(step=1), tf.summary.record_if(True): - my_layer = MyLayer() - x = tf.ones((10, 10)) - - def my_fn(x): - return my_layer(x) - - _ = my_fn(x) - - event_file = tf.compat.v1.gfile.Glob(os.path.join(tmp_dir, 'events*')) - self.assertLen(event_file, 1) - event_file = event_file[0] - tags = set() - for e in tf.compat.v1.train.summary_iterator(event_file): - for val in e.summary.value: - tags.add(val.tag) - self.assertEqual(set(['my_layer/mean']), tags) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_error_when_passing_non_tensor(self): - # layers that have an `input_spec` will raise an error when called on - # non-tensors. This covers all built-in layers. - layer = layers.Dense(3) - x = object() - with self.assertRaisesRegex(TypeError, r'should be tensors'): - layer(x) + def test_using_symbolic_tensors_with_tf_ops(self): + # Single-input. + x = input_layer.Input((3,)) + tf.square(x) + + # Multi-inputs. + x1, x2 = input_layer.Input((3,)), input_layer.Input((3,)) + tf.concat([x1, x2], axis=1) + + # Mixing Keras symbolic tensors and graph tensors from the same graph + # works. + with backend.get_graph().as_default(): + x1 = input_layer.Input((3,)) + x2 = input_layer.Input((3,)) + tf.matmul(x1, x2) + + # Creating same op type (matmul) multiple times in the Keras graph + # works. + x1 = input_layer.Input((3,)) + x2 = input_layer.Input((3,)) + tf.matmul(x1, x2) + + def test_mixing_eager_and_graph_tensors(self): + with tf.Graph().as_default(): + x1 = tf.ones((3, 3)) + x2 = tf.ones((3, 3)) + with self.assertRaises(TypeError): + tf.matmul(x1, x2) + + def test_mixing_numpy_arrays_and_graph_tensors(self): + with tf.Graph().as_default(): + x1 = tf.ones((3, 3)) + x2 = np.ones((3, 3), dtype="float32") + with self.assertRaises(TypeError): + tf.matmul(x1, x2) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_mixing_keras_symbolic_tensors_and_eager_tensors(self): + x1 = input_layer.Input((3,)) + x2 = tf.ones((3, 3)) + y = tf.matmul(x1, x2) + + fn = backend.function(inputs=[x1], outputs=[y]) + x_val = np.random.random((3, 3)) + y_val = np.ones((3, 3)) + self.assertAllClose(fn([x_val])[0], np.matmul(x_val, y_val), atol=1e-5) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self): + x1 = input_layer.Input((3,)) + x2 = np.ones((3, 3), dtype="float32") + y = tf.matmul(x1, x2) + + fn = backend.function(inputs=[x1], outputs=[y]) + x_val = np.random.random((3, 3)) + y_val = np.ones((3, 3)) + self.assertAllClose(fn([x_val])[0], np.matmul(x_val, y_val), atol=1e-5) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_reraising_exception(self): + # When layer is not dynamic, we have some pattern matching during + # exception handling to detect when the user is trying to use python + # control flow. When an exception is thrown but the pattern doesn't + # match, we want to preserve the originating stack trace. An early + # implementation of this logic lost the stack trace. We test the correct + # behavior here. + + class TypeErrorLayer(base_layer.Layer): + def call(self, inputs): + def easily_identifiable_name(): + raise TypeError("Non-matching TypeError message.") + + easily_identifiable_name() + + inputs = input_layer.Input((3,)) + + try: + _ = TypeErrorLayer()(inputs) + except TypeError as e: + self.assertIn("easily_identifiable_name", str(e)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_summaries_in_tf_function(self): + if not tf.executing_eagerly(): + return + + class MyLayer(base_layer.Layer): + def call(self, inputs): + tf.summary.scalar("mean", tf.reduce_mean(inputs)) + return inputs + + tmp_dir = self.get_temp_dir() + writer = tf.summary.create_file_writer(tmp_dir) + with writer.as_default(step=1), tf.summary.record_if(True): + my_layer = MyLayer() + x = tf.ones((10, 10)) + + def my_fn(x): + return my_layer(x) + + _ = my_fn(x) + + event_file = tf.compat.v1.gfile.Glob(os.path.join(tmp_dir, "events*")) + self.assertLen(event_file, 1) + event_file = event_file[0] + tags = set() + for e in tf.compat.v1.train.summary_iterator(event_file): + for val in e.summary.value: + tags.add(val.tag) + self.assertEqual(set(["my_layer/mean"]), tags) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_error_when_passing_non_tensor(self): + # layers that have an `input_spec` will raise an error when called on + # non-tensors. This covers all built-in layers. + layer = layers.Dense(3) + x = object() + with self.assertRaisesRegex(TypeError, r"should be tensors"): + layer(x) @test_utils.run_v2_only -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class NestedTrackingTest(tf.test.TestCase): - - def test_nested_layer_variable_tracking(self): - # Test that variables from nested sublayers are - # being tracked by subclassed layers. - - class MyLayer(base_layer.Layer): - - def __init__(self): - super().__init__() - self.dense1 = layers.Dense(1) - self.dense2 = layers.BatchNormalization() - - def build(self, input_shape): - self.v1 = self.add_weight('v1', shape=input_shape[1:].as_list()) - self.v2 = tf.Variable( - name='v2', - initial_value=np.zeros(input_shape[1:].as_list(), dtype='float32'), - trainable=False) - - def call(self, inputs): - x = self.dense1(inputs) + self.dense2(inputs) - return x + self.v1 + self.v2 - - layer = MyLayer() - inputs = input_layer.Input((1,)) - _ = layer(inputs) - - self.assertEqual(len(layer.weights), 8) - self.assertEqual(len(layer.trainable_weights), 5) - self.assertEqual(len(layer.non_trainable_weights), 3) - - layer.dense1.trainable = False - self.assertEqual(len(layer.weights), 8) - self.assertEqual(len(layer.trainable_weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 5) - - layer.trainable = False - self.assertEqual(len(layer.weights), 8) - self.assertEqual(len(layer.trainable_weights), 0) - self.assertEqual(len(layer.non_trainable_weights), 8) - self.assertEqual( - {id(v) for v in [layer.dense1, layer.dense2, layer.v1, layer.v2]}, - {id(v) for v in layer._trackable_children().values()}) - - def test_nested_layer_updates_losses_tracking(self): - # Test that updates and losses from nested sublayers are - # being tracked by subclassed layers. - - class UpdateAndLossLayer(base_layer.Layer): - - def build(self, _): - self.v1 = self.add_weight('v1', shape=()) - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - self.add_update(tf.compat.v1.assign_add(self.v1, 1)) - return inputs + 1 - - class MyLayer(base_layer.Layer): - - def build(self, _): - self.v1 = self.add_weight('v1', shape=()) - - def __init__(self): - super().__init__() - self.ul1 = UpdateAndLossLayer() - self.ul2 = UpdateAndLossLayer() - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - self.add_update(tf.compat.v1.assign_add(self.v1, 1)) - x = self.ul1(inputs) - return self.ul2(x) - - layer = MyLayer() - - if tf.executing_eagerly(): - inputs = tf.ones((3, 1)) - _ = layer(inputs) - self.assertEqual(len(layer.losses), 3) - else: - inputs = input_layer.Input((1,)) - _ = layer(inputs) - self.assertEqual(len(layer.losses), 3) - self.assertEqual(len(layer.updates), 3) - - def test_attribute_reassignment(self): - l = base_layer.Layer() - l.a = base_layer.Layer() - l.a = [] - l.a = tf.Variable(1.) - l.a = base_layer.Layer() - last_assignment = base_layer.Layer() - l.a = last_assignment - l.b = tf.Variable(1.) - del l.b - l.c = base_layer.Layer() - del l.c - l.d = last_assignment - del l.d - sublayers = list(l._flatten_layers(include_self=False, recursive=False)) - self.assertEqual([last_assignment], sublayers) - self.assertEqual([], l.trainable_weights) - self.assertEqual([], l.non_trainable_weights) - self.assertEqual([], l.weights) - del l.a - self.assertEqual([], l._self_tracked_trackables) - - def test_layer_class_not_tracked_as_sublayer(self): - # See https://github.com/tensorflow/tensorflow/issues/27431 for details. - - class LayerWithClassAttribute(base_layer.Layer): - - def __init__(self): - super().__init__() - self.layer_fn = layers.Dense - - layer = LayerWithClassAttribute() - self.assertEmpty(layer.variables) - self.assertEmpty(layer.submodules) - - def test_layer_call_fn_args(self): - - class NonDefunLayer(base_layer.Layer): - - def call(self, inputs, a, mask, b=None, training=None): - return inputs - - class DefunLayer(base_layer.Layer): - - @tf.function - def call(self, x, mask, a, training=None, b=None): - return x - - nondefun_layer = NonDefunLayer() - self.assertEqual(nondefun_layer._call_spec.arg_names, - ['inputs', 'a', 'mask', 'b', 'training']) - defun_layer = DefunLayer() - self.assertEqual(defun_layer._call_spec.arg_names, - ['x', 'mask', 'a', 'training', 'b']) - - def test_sequential_model(self): - model = sequential.Sequential( - [layers.Dense(10, input_shape=(10,)), - layers.Dense(5)]) - self.assertLen(model.layers, 2) - self.assertLen(model.weights, 4) - - # Make sure a subclass model also works when it is called 'Sequential'. - class Sequential(training_lib.Model): - - def __init__(self): - super().__init__() - self.dense_layers = [layers.Dense(10), layers.Dense(5)] - - def call(self, inputs): - x = inputs - for d in self.dense_layers: - x = d(x) - return x - - s = Sequential() - self.assertLen(s.layers, 2) - self.assertLen(s.weights, 0) - - s(input_layer.Input((10,))) - self.assertLen(s.weights, 4) + def test_nested_layer_variable_tracking(self): + # Test that variables from nested sublayers are + # being tracked by subclassed layers. + + class MyLayer(base_layer.Layer): + def __init__(self): + super().__init__() + self.dense1 = layers.Dense(1) + self.dense2 = layers.BatchNormalization() + + def build(self, input_shape): + self.v1 = self.add_weight("v1", shape=input_shape[1:].as_list()) + self.v2 = tf.Variable( + name="v2", + initial_value=np.zeros( + input_shape[1:].as_list(), dtype="float32" + ), + trainable=False, + ) + + def call(self, inputs): + x = self.dense1(inputs) + self.dense2(inputs) + return x + self.v1 + self.v2 + + layer = MyLayer() + inputs = input_layer.Input((1,)) + _ = layer(inputs) + + self.assertEqual(len(layer.weights), 8) + self.assertEqual(len(layer.trainable_weights), 5) + self.assertEqual(len(layer.non_trainable_weights), 3) + + layer.dense1.trainable = False + self.assertEqual(len(layer.weights), 8) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 5) + + layer.trainable = False + self.assertEqual(len(layer.weights), 8) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.non_trainable_weights), 8) + self.assertEqual( + {id(v) for v in [layer.dense1, layer.dense2, layer.v1, layer.v2]}, + {id(v) for v in layer._trackable_children().values()}, + ) + + def test_nested_layer_updates_losses_tracking(self): + # Test that updates and losses from nested sublayers are + # being tracked by subclassed layers. + + class UpdateAndLossLayer(base_layer.Layer): + def build(self, _): + self.v1 = self.add_weight("v1", shape=()) + + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + self.add_update(tf.compat.v1.assign_add(self.v1, 1)) + return inputs + 1 + + class MyLayer(base_layer.Layer): + def build(self, _): + self.v1 = self.add_weight("v1", shape=()) + + def __init__(self): + super().__init__() + self.ul1 = UpdateAndLossLayer() + self.ul2 = UpdateAndLossLayer() + + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + self.add_update(tf.compat.v1.assign_add(self.v1, 1)) + x = self.ul1(inputs) + return self.ul2(x) + + layer = MyLayer() + + if tf.executing_eagerly(): + inputs = tf.ones((3, 1)) + _ = layer(inputs) + self.assertEqual(len(layer.losses), 3) + else: + inputs = input_layer.Input((1,)) + _ = layer(inputs) + self.assertEqual(len(layer.losses), 3) + self.assertEqual(len(layer.updates), 3) + + def test_attribute_reassignment(self): + l = base_layer.Layer() + l.a = base_layer.Layer() + l.a = [] + l.a = tf.Variable(1.0) + l.a = base_layer.Layer() + last_assignment = base_layer.Layer() + l.a = last_assignment + l.b = tf.Variable(1.0) + del l.b + l.c = base_layer.Layer() + del l.c + l.d = last_assignment + del l.d + sublayers = list(l._flatten_layers(include_self=False, recursive=False)) + self.assertEqual([last_assignment], sublayers) + self.assertEqual([], l.trainable_weights) + self.assertEqual([], l.non_trainable_weights) + self.assertEqual([], l.weights) + del l.a + self.assertEqual([], l._self_tracked_trackables) + + def test_layer_class_not_tracked_as_sublayer(self): + # See https://github.com/tensorflow/tensorflow/issues/27431 for details. + + class LayerWithClassAttribute(base_layer.Layer): + def __init__(self): + super().__init__() + self.layer_fn = layers.Dense + + layer = LayerWithClassAttribute() + self.assertEmpty(layer.variables) + self.assertEmpty(layer.submodules) + + def test_layer_call_fn_args(self): + class NonDefunLayer(base_layer.Layer): + def call(self, inputs, a, mask, b=None, training=None): + return inputs + + class DefunLayer(base_layer.Layer): + @tf.function + def call(self, x, mask, a, training=None, b=None): + return x + + nondefun_layer = NonDefunLayer() + self.assertEqual( + nondefun_layer._call_spec.arg_names, + ["inputs", "a", "mask", "b", "training"], + ) + defun_layer = DefunLayer() + self.assertEqual( + defun_layer._call_spec.arg_names, + ["x", "mask", "a", "training", "b"], + ) + + def test_sequential_model(self): + model = sequential.Sequential( + [layers.Dense(10, input_shape=(10,)), layers.Dense(5)] + ) + self.assertLen(model.layers, 2) + self.assertLen(model.weights, 4) + + # Make sure a subclass model also works when it is called 'Sequential'. + class Sequential(training_lib.Model): + def __init__(self): + super().__init__() + self.dense_layers = [layers.Dense(10), layers.Dense(5)] + + def call(self, inputs): + x = inputs + for d in self.dense_layers: + x = d(x) + return x + + s = Sequential() + self.assertLen(s.layers, 2) + self.assertLen(s.weights, 0) + + s(input_layer.Input((10,))) + self.assertLen(s.weights, 4) @test_utils.run_v2_only -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class NameScopingTest(test_combinations.TestCase): - - def test_name_scope_layer(self): - x = backend.placeholder(shape=(10, 10)) - layer = layers.Dense(10, name='MyName') - layer(x) - self.assertEqual(layer.bias.name, 'MyName/bias:0') - self.assertEqual(layer.kernel.name, 'MyName/kernel:0') - - def test_name_scope_functional_api(self): - inputs = input_layer.Input((3,)) - layer = layers.Dense(10, name='MyName') - _ = layer(inputs) - self.assertEqual(layer.bias.name, 'MyName/bias:0') - self.assertEqual(layer.kernel.name, 'MyName/kernel:0') - - def test_name_scope_functional_api_nested(self): - - class NestedLayer(base_layer.Layer): - - def __init__(self, name='OuterName'): - super().__init__(name=name) - self.dense = layers.Dense(10, name='InnerName') - - def call(self, inputs): - return self.dense(inputs) - - inputs = input_layer.Input((3,)) - layer = NestedLayer() - _ = layer(inputs) - self.assertEqual(layer.dense.bias.name, 'OuterName/InnerName/bias:0') - self.assertEqual(layer.dense.kernel.name, 'OuterName/InnerName/kernel:0') - - def test_name_scope_sublayer(self): - - class NameScopeTracker(base_layer.Layer): - - def call(self, inputs): - self.active_name_scope = tf.__internal__.get_name_scope() - return inputs - - x = backend.placeholder(shape=(10, 10)) - sublayer = NameScopeTracker(name='Sublayer') - layer = layers.Dense(10, activation=sublayer, name='MyName2') - layer(x) - self.assertEqual(layer.bias.name, 'MyName2/bias:0') - self.assertEqual(layer.kernel.name, 'MyName2/kernel:0') - self.assertEqual(sublayer.active_name_scope, 'MyName2/Sublayer') - - def test_name_scope_tf_tensor(self): - x = tf.convert_to_tensor(np.ones((10, 10))) - layer = layers.Dense( - 10, activation=layers.ReLU(name='MyAct'), name='MyName3') - layer(x) - self.assertEqual(layer.bias.name, 'MyName3/bias:0') - self.assertEqual(layer.kernel.name, 'MyName3/kernel:0') - - @test_utils.run_v2_only - def test_apply_name_scope_on_model_declaration(self): - if not tf.executing_eagerly(): - self.skipTest('`apply_name_scope_on_model_declaration` API is supported' - ' only for V2 eager') - - base_layer._apply_name_scope_on_model_declaration(True) - - inputs = input_layer.Input((3,)) - x = layers.Dense(10, name='Dense1')(inputs) - with tf.name_scope('outer'): - x = layers.Dense(10, name='Dense2')(x) - with tf.name_scope('inner'): - x = layers.Dense(10, name='Dense3')(x) - x = layers.Dense(10, name='Dense4')(x) - outputs = layers.Dense(10, name='Dense5')(x) - - model = training_lib.Model(inputs, outputs) - node_names = self._get_model_node_names(model, np.random.random((1, 3)), - 'call_scope') - self.assertListEqual(node_names, [ - 'call_scope/Const', - 'call_scope/model/Cast', - 'call_scope/model/Dense1/MatMul/ReadVariableOp/resource', - 'call_scope/model/Dense1/MatMul/ReadVariableOp', - 'call_scope/model/Dense1/MatMul', - 'call_scope/model/Dense1/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/Dense1/BiasAdd/ReadVariableOp', - 'call_scope/model/Dense1/BiasAdd', - 'call_scope/model/outer/Dense2/MatMul/ReadVariableOp/resource', - 'call_scope/model/outer/Dense2/MatMul/ReadVariableOp', - 'call_scope/model/outer/Dense2/MatMul', - 'call_scope/model/outer/Dense2/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/outer/Dense2/BiasAdd/ReadVariableOp', - 'call_scope/model/outer/Dense2/BiasAdd', - 'call_scope/model/outer/inner/Dense3/MatMul/ReadVariableOp/resource', - 'call_scope/model/outer/inner/Dense3/MatMul/ReadVariableOp', - 'call_scope/model/outer/inner/Dense3/MatMul', - 'call_scope/model/outer/inner/Dense3/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/outer/inner/Dense3/BiasAdd/ReadVariableOp', - 'call_scope/model/outer/inner/Dense3/BiasAdd', - 'call_scope/model/outer/Dense4/MatMul/ReadVariableOp/resource', - 'call_scope/model/outer/Dense4/MatMul/ReadVariableOp', - 'call_scope/model/outer/Dense4/MatMul', - 'call_scope/model/outer/Dense4/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/outer/Dense4/BiasAdd/ReadVariableOp', - 'call_scope/model/outer/Dense4/BiasAdd', - 'call_scope/model/Dense5/MatMul/ReadVariableOp/resource', - 'call_scope/model/Dense5/MatMul/ReadVariableOp', - 'call_scope/model/Dense5/MatMul', - 'call_scope/model/Dense5/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/Dense5/BiasAdd/ReadVariableOp', - 'call_scope/model/Dense5/BiasAdd', - 'Identity', - 'NoOp' - ]) - base_layer._apply_name_scope_on_model_declaration(False) - - @test_utils.run_v2_only - def test_apply_name_scope_on_nested_layer_model_declaration(self): - if not tf.executing_eagerly(): - self.skipTest('`apply_name_scope_on_model_declaration` API is supported' - ' only for V2 eager') - - base_layer._apply_name_scope_on_model_declaration(True) - - class ThreeDenses(layers.Layer): - - def __init__(self, name='ThreeDenses', **kwargs): - super().__init__(name=name, **kwargs) - self.inner_dense_1 = layers.Dense(10, name='NestedDense1') - with tf.name_scope('inner1/inner2'): - self.inner_dense_2 = layers.Dense(20, name='NestedDense2') - self.inner_dense_3 = layers.Dense(30, name='NestedDense3') - - def call(self, x): - x = self.inner_dense_1(x) - x = self.inner_dense_2(x) - x = self.inner_dense_3(x) - return x - - inputs = input_layer.Input((3,)) - with tf.name_scope('outer'): - x = ThreeDenses()(inputs) - outputs = layers.Dense(10, name='OuterDense')(x) - - model = training_lib.Model(inputs, outputs) - node_names = self._get_model_node_names(model, np.random.random((1, 3)), - 'call_scope') - - self.assertListEqual(node_names, [ - 'call_scope/Const', 'call_scope/model/Cast', - 'call_scope/model/outer/ThreeDenses/NestedDense1/MatMul/ReadVariableOp/resource', - 'call_scope/model/outer/ThreeDenses/NestedDense1/MatMul/ReadVariableOp', - 'call_scope/model/outer/ThreeDenses/NestedDense1/MatMul', - 'call_scope/model/outer/ThreeDenses/NestedDense1/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/outer/ThreeDenses/NestedDense1/BiasAdd/ReadVariableOp', - 'call_scope/model/outer/ThreeDenses/NestedDense1/BiasAdd', - 'call_scope/model/outer/ThreeDenses/inner1/inner2/NestedDense2/MatMul/ReadVariableOp/resource', - 'call_scope/model/outer/ThreeDenses/inner1/inner2/NestedDense2/MatMul/ReadVariableOp', - 'call_scope/model/outer/ThreeDenses/inner1/inner2/NestedDense2/MatMul', - 'call_scope/model/outer/ThreeDenses/inner1/inner2/NestedDense2/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/outer/ThreeDenses/inner1/inner2/NestedDense2/BiasAdd/ReadVariableOp', - 'call_scope/model/outer/ThreeDenses/inner1/inner2/NestedDense2/BiasAdd', - 'call_scope/model/outer/ThreeDenses/NestedDense3/MatMul/ReadVariableOp/resource', - 'call_scope/model/outer/ThreeDenses/NestedDense3/MatMul/ReadVariableOp', - 'call_scope/model/outer/ThreeDenses/NestedDense3/MatMul', - 'call_scope/model/outer/ThreeDenses/NestedDense3/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/outer/ThreeDenses/NestedDense3/BiasAdd/ReadVariableOp', - 'call_scope/model/outer/ThreeDenses/NestedDense3/BiasAdd', - 'call_scope/model/OuterDense/MatMul/ReadVariableOp/resource', - 'call_scope/model/OuterDense/MatMul/ReadVariableOp', - 'call_scope/model/OuterDense/MatMul', - 'call_scope/model/OuterDense/BiasAdd/ReadVariableOp/resource', - 'call_scope/model/OuterDense/BiasAdd/ReadVariableOp', - 'call_scope/model/OuterDense/BiasAdd', 'Identity', 'NoOp' - ]) - base_layer._apply_name_scope_on_model_declaration(False) - - def _get_model_node_names(self, model, inputs, call_name_scope): - """Returns a list of model's node names.""" - - @tf.function() - def wrapper(): - with tf.name_scope(call_name_scope): - return model(inputs) - - return [ - node.name - for node in wrapper.get_concrete_function().graph.as_graph_def().node - ] + def test_name_scope_layer(self): + x = backend.placeholder(shape=(10, 10)) + layer = layers.Dense(10, name="MyName") + layer(x) + self.assertEqual(layer.bias.name, "MyName/bias:0") + self.assertEqual(layer.kernel.name, "MyName/kernel:0") + + def test_name_scope_functional_api(self): + inputs = input_layer.Input((3,)) + layer = layers.Dense(10, name="MyName") + _ = layer(inputs) + self.assertEqual(layer.bias.name, "MyName/bias:0") + self.assertEqual(layer.kernel.name, "MyName/kernel:0") + + def test_name_scope_functional_api_nested(self): + class NestedLayer(base_layer.Layer): + def __init__(self, name="OuterName"): + super().__init__(name=name) + self.dense = layers.Dense(10, name="InnerName") + + def call(self, inputs): + return self.dense(inputs) + + inputs = input_layer.Input((3,)) + layer = NestedLayer() + _ = layer(inputs) + self.assertEqual(layer.dense.bias.name, "OuterName/InnerName/bias:0") + self.assertEqual( + layer.dense.kernel.name, "OuterName/InnerName/kernel:0" + ) + + def test_name_scope_sublayer(self): + class NameScopeTracker(base_layer.Layer): + def call(self, inputs): + self.active_name_scope = tf.__internal__.get_name_scope() + return inputs + + x = backend.placeholder(shape=(10, 10)) + sublayer = NameScopeTracker(name="Sublayer") + layer = layers.Dense(10, activation=sublayer, name="MyName2") + layer(x) + self.assertEqual(layer.bias.name, "MyName2/bias:0") + self.assertEqual(layer.kernel.name, "MyName2/kernel:0") + self.assertEqual(sublayer.active_name_scope, "MyName2/Sublayer") + + def test_name_scope_tf_tensor(self): + x = tf.convert_to_tensor(np.ones((10, 10))) + layer = layers.Dense( + 10, activation=layers.ReLU(name="MyAct"), name="MyName3" + ) + layer(x) + self.assertEqual(layer.bias.name, "MyName3/bias:0") + self.assertEqual(layer.kernel.name, "MyName3/kernel:0") + + @test_utils.run_v2_only + def test_apply_name_scope_on_model_declaration(self): + if not tf.executing_eagerly(): + self.skipTest( + "`apply_name_scope_on_model_declaration` API is supported" + " only for V2 eager" + ) + + base_layer._apply_name_scope_on_model_declaration(True) + + inputs = input_layer.Input((3,)) + x = layers.Dense(10, name="Dense1")(inputs) + with tf.name_scope("outer"): + x = layers.Dense(10, name="Dense2")(x) + with tf.name_scope("inner"): + x = layers.Dense(10, name="Dense3")(x) + x = layers.Dense(10, name="Dense4")(x) + outputs = layers.Dense(10, name="Dense5")(x) + + model = training_lib.Model(inputs, outputs) + node_names = self._get_model_node_names( + model, np.random.random((1, 3)), "call_scope" + ) + self.assertListEqual( + node_names, + [ + "call_scope/Const", + "call_scope/model/Cast", + "call_scope/model/Dense1/MatMul/ReadVariableOp/resource", + "call_scope/model/Dense1/MatMul/ReadVariableOp", + "call_scope/model/Dense1/MatMul", + "call_scope/model/Dense1/BiasAdd/ReadVariableOp/resource", + "call_scope/model/Dense1/BiasAdd/ReadVariableOp", + "call_scope/model/Dense1/BiasAdd", + "call_scope/model/outer/Dense2/MatMul/ReadVariableOp/resource", + "call_scope/model/outer/Dense2/MatMul/ReadVariableOp", + "call_scope/model/outer/Dense2/MatMul", + "call_scope/model/outer/Dense2/BiasAdd/ReadVariableOp/resource", + "call_scope/model/outer/Dense2/BiasAdd/ReadVariableOp", + "call_scope/model/outer/Dense2/BiasAdd", + "call_scope/model/outer/inner/Dense3/MatMul/ReadVariableOp/" + "resource", + "call_scope/model/outer/inner/Dense3/MatMul/ReadVariableOp", + "call_scope/model/outer/inner/Dense3/MatMul", + "call_scope/model/outer/inner/Dense3/BiasAdd/ReadVariableOp/" + "resource", + "call_scope/model/outer/inner/Dense3/BiasAdd/ReadVariableOp", + "call_scope/model/outer/inner/Dense3/BiasAdd", + "call_scope/model/outer/Dense4/MatMul/ReadVariableOp/resource", + "call_scope/model/outer/Dense4/MatMul/ReadVariableOp", + "call_scope/model/outer/Dense4/MatMul", + "call_scope/model/outer/Dense4/BiasAdd/ReadVariableOp/resource", + "call_scope/model/outer/Dense4/BiasAdd/ReadVariableOp", + "call_scope/model/outer/Dense4/BiasAdd", + "call_scope/model/Dense5/MatMul/ReadVariableOp/resource", + "call_scope/model/Dense5/MatMul/ReadVariableOp", + "call_scope/model/Dense5/MatMul", + "call_scope/model/Dense5/BiasAdd/ReadVariableOp/resource", + "call_scope/model/Dense5/BiasAdd/ReadVariableOp", + "call_scope/model/Dense5/BiasAdd", + "Identity", + "NoOp", + ], + ) + base_layer._apply_name_scope_on_model_declaration(False) + + @test_utils.run_v2_only + def test_apply_name_scope_on_nested_layer_model_declaration(self): + if not tf.executing_eagerly(): + self.skipTest( + "`apply_name_scope_on_model_declaration` API is supported" + " only for V2 eager" + ) + + base_layer._apply_name_scope_on_model_declaration(True) + + class ThreeDenses(layers.Layer): + def __init__(self, name="ThreeDenses", **kwargs): + super().__init__(name=name, **kwargs) + self.inner_dense_1 = layers.Dense(10, name="NestedDense1") + with tf.name_scope("inner1/inner2"): + self.inner_dense_2 = layers.Dense(20, name="NestedDense2") + self.inner_dense_3 = layers.Dense(30, name="NestedDense3") + + def call(self, x): + x = self.inner_dense_1(x) + x = self.inner_dense_2(x) + x = self.inner_dense_3(x) + return x + + inputs = input_layer.Input((3,)) + with tf.name_scope("outer"): + x = ThreeDenses()(inputs) + outputs = layers.Dense(10, name="OuterDense")(x) + + model = training_lib.Model(inputs, outputs) + node_names = self._get_model_node_names( + model, np.random.random((1, 3)), "call_scope" + ) + + self.assertListEqual( + node_names, + [ + "call_scope/Const", + "call_scope/model/Cast", + "call_scope/model/outer/ThreeDenses/NestedDense1/MatMul/" + "ReadVariableOp/resource", + "call_scope/model/outer/ThreeDenses/NestedDense1/MatMul/" + "ReadVariableOp", + "call_scope/model/outer/ThreeDenses/NestedDense1/MatMul", + "call_scope/model/outer/ThreeDenses/NestedDense1/BiasAdd/" + "ReadVariableOp/resource", + "call_scope/model/outer/ThreeDenses/NestedDense1/BiasAdd/" + "ReadVariableOp", + "call_scope/model/outer/ThreeDenses/NestedDense1/BiasAdd", + "call_scope/model/outer/ThreeDenses/inner1/inner2/" + "NestedDense2/MatMul/ReadVariableOp/resource", + "call_scope/model/outer/ThreeDenses/inner1/inner2/" + "NestedDense2/MatMul/ReadVariableOp", + "call_scope/model/outer/ThreeDenses/inner1/inner2/" + "NestedDense2/MatMul", + "call_scope/model/outer/ThreeDenses/inner1/inner2/" + "NestedDense2/BiasAdd/ReadVariableOp/resource", + "call_scope/model/outer/ThreeDenses/inner1/inner2/" + "NestedDense2/BiasAdd/ReadVariableOp", + "call_scope/model/outer/ThreeDenses/inner1/inner2/" + "NestedDense2/BiasAdd", + "call_scope/model/outer/ThreeDenses/NestedDense3/" + "MatMul/ReadVariableOp/resource", + "call_scope/model/outer/ThreeDenses/NestedDense3/" + "MatMul/ReadVariableOp", + "call_scope/model/outer/ThreeDenses/NestedDense3/MatMul", + "call_scope/model/outer/ThreeDenses/NestedDense3/" + "BiasAdd/ReadVariableOp/resource", + "call_scope/model/outer/ThreeDenses/NestedDense3/" + "BiasAdd/ReadVariableOp", + "call_scope/model/outer/ThreeDenses/NestedDense3/BiasAdd", + "call_scope/model/OuterDense/MatMul/ReadVariableOp/resource", + "call_scope/model/OuterDense/MatMul/ReadVariableOp", + "call_scope/model/OuterDense/MatMul", + "call_scope/model/OuterDense/BiasAdd/ReadVariableOp/resource", + "call_scope/model/OuterDense/BiasAdd/ReadVariableOp", + "call_scope/model/OuterDense/BiasAdd", + "Identity", + "NoOp", + ], + ) + base_layer._apply_name_scope_on_model_declaration(False) + + def _get_model_node_names(self, model, inputs, call_name_scope): + """Returns a list of model's node names.""" + + @tf.function() + def wrapper(): + with tf.name_scope(call_name_scope): + return model(inputs) + + return [ + node.name + for node in wrapper.get_concrete_function() + .graph.as_graph_def() + .node + ] @test_utils.run_v2_only @test_combinations.generate( - test_combinations.keras_mode_combinations(mode=['eager'])) + test_combinations.keras_mode_combinations(mode=["eager"]) +) class AutographControlFlowTest(test_combinations.TestCase): - - def test_disabling_in_context_is_matched(self): - - test_obj = self - - class MyLayer(base_layer.Layer): - - def call(self, inputs, training=None): - with test_obj.assertRaisesRegex(TypeError, 'Tensor.*as.*bool'): - if tf.constant(False): - return inputs * 1. - return inputs * 0. - - @tf.function(autograph=False) - def test_fn(): - return MyLayer()(tf.constant([[1., 2., 3.]])) - - test_fn() - - def test_if_training_pattern_output(self): - - class MyLayer(base_layer.Layer): - - def call(self, inputs, training=None): - if training: - return inputs * 1. - return inputs * 0. - - inputs = input_layer.Input((3,)) - outputs = MyLayer()(inputs) - model = training_lib.Model(inputs, outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - train_loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(train_loss, 0.) - test_loss = model.test_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(test_loss, 1.) - - def test_if_training_pattern_loss(self): - - class MyLayer(base_layer.Layer): - - def call(self, inputs, training=None): - if training: - loss = tf.reduce_sum(inputs) - else: - loss = 0. - self.add_loss(loss) - return inputs - - inputs = input_layer.Input((3,)) - outputs = MyLayer()(inputs) - model = training_lib.Model(inputs, outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - train_loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(train_loss, 2 * 3) - test_loss = model.test_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(test_loss, 0) - - def test_if_training_pattern_metric(self): - - class MyLayer(base_layer.Layer): - - def call(self, inputs, training=None): - if training: - metric = tf.reduce_sum(inputs) - else: - metric = 0. - self.add_metric(metric, name='my_metric', aggregation='mean') - return inputs - - inputs = input_layer.Input((3,)) - outputs = MyLayer()(inputs) - model = training_lib.Model(inputs, outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - for _ in range(3): - _, train_metric = model.train_on_batch(np.ones((2, 3)), - np.ones((2, 3))) - - self.assertEqual(train_metric, 2 * 3) - _, test_metric = model.test_on_batch(np.ones((2, 3)), - np.ones((2, 3))) - self.assertEqual(test_metric, 0) - - def test_if_training_pattern_update(self): - - class MyLayer(base_layer.Layer): - - def build(self, input_shape): - self.counter = self.add_weight( - shape=(), trainable=False, initializer='zeros') - - def call(self, inputs, training=None): - if training: - increment = 1. - else: - increment = 0. - self.counter.assign_add(increment) - return inputs - - inputs = input_layer.Input((3,)) - layer = MyLayer() - outputs = layer(inputs) - model = training_lib.Model(inputs, outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(backend.get_value(layer.counter), 1.) - - def test_conditional_losses_in_call(self): - - class MyLayer(base_layer.Layer): - - def __init__(self): - super().__init__(dynamic=test_utils.should_run_eagerly()) - - def call(self, inputs, training=None): - if training: - self.add_loss(tf.reduce_sum(inputs)) - return inputs - - def compute_output_shape(self, input_shape): - return input_shape - - inputs = input_layer.Input((3,)) - layer = MyLayer() - outputs = layer(inputs) - model = training_lib.Model(inputs, outputs) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(loss, 2 * 3) - - def test_conditional_callable_losses(self): - model = sequential.Sequential([ - layers.Dense( - 1, kernel_regularizer=regularizers.l2(1e-4), input_shape=(1,)) - ]) - model._run_eagerly = test_utils.should_run_eagerly() - - def assert_graph(t): - if not tf.executing_eagerly(): - self.assertEqual(t.graph, tf.compat.v1.get_default_graph()) - - @tf.function - def get_losses(t): - if t < 0: - return tf.reduce_sum(model.losses) * t - else: - return tf.reduce_sum(model.losses) - - assert_graph(get_losses(tf.constant(2.))) - assert_graph(get_losses(tf.constant(0.5))) - - def test_conditional_metrics_in_call(self): - - class MyLayer(base_layer.Layer): - - def __init__(self): - super().__init__(dynamic=test_utils.should_run_eagerly()) - - def call(self, inputs, training=None): - if training: - self.add_metric(tf.reduce_sum(inputs), - name='sum', - aggregation='mean') - return inputs - - def compute_output_shape(self, input_shape): - return input_shape - - inputs = input_layer.Input((3,)) - layer = MyLayer() - outputs = layer(inputs) - model = training_lib.Model(inputs, outputs) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(history.history['sum'][-1], 2 * 3) - - def test_conditional_activity_regularizer_in_call(self): - - class TestModel(training_lib.Model): - - def __init__(self): - super().__init__( - name='test_model', dynamic=test_utils.should_run_eagerly()) - self.layer = layers.Dense(2, activity_regularizer='l2') - - def call(self, x, training=None): - if tf.greater(tf.reduce_sum(x), 0.0): - return self.layer(x) + def test_disabling_in_context_is_matched(self): + + test_obj = self + + class MyLayer(base_layer.Layer): + def call(self, inputs, training=None): + with test_obj.assertRaisesRegex(TypeError, "Tensor.*as.*bool"): + if tf.constant(False): + return inputs * 1.0 + return inputs * 0.0 + + @tf.function(autograph=False) + def test_fn(): + return MyLayer()(tf.constant([[1.0, 2.0, 3.0]])) + + test_fn() + + def test_if_training_pattern_output(self): + class MyLayer(base_layer.Layer): + def call(self, inputs, training=None): + if training: + return inputs * 1.0 + return inputs * 0.0 + + inputs = input_layer.Input((3,)) + outputs = MyLayer()(inputs) + model = training_lib.Model(inputs, outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + train_loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(train_loss, 0.0) + test_loss = model.test_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(test_loss, 1.0) + + def test_if_training_pattern_loss(self): + class MyLayer(base_layer.Layer): + def call(self, inputs, training=None): + if training: + loss = tf.reduce_sum(inputs) + else: + loss = 0.0 + self.add_loss(loss) + return inputs + + inputs = input_layer.Input((3,)) + outputs = MyLayer()(inputs) + model = training_lib.Model(inputs, outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + train_loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(train_loss, 2 * 3) + test_loss = model.test_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(test_loss, 0) + + def test_if_training_pattern_metric(self): + class MyLayer(base_layer.Layer): + def call(self, inputs, training=None): + if training: + metric = tf.reduce_sum(inputs) + else: + metric = 0.0 + self.add_metric(metric, name="my_metric", aggregation="mean") + return inputs + + inputs = input_layer.Input((3,)) + outputs = MyLayer()(inputs) + model = training_lib.Model(inputs, outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + for _ in range(3): + _, train_metric = model.train_on_batch( + np.ones((2, 3)), np.ones((2, 3)) + ) + + self.assertEqual(train_metric, 2 * 3) + _, test_metric = model.test_on_batch( + np.ones((2, 3)), np.ones((2, 3)) + ) + self.assertEqual(test_metric, 0) + + def test_if_training_pattern_update(self): + class MyLayer(base_layer.Layer): + def build(self, input_shape): + self.counter = self.add_weight( + shape=(), trainable=False, initializer="zeros" + ) + + def call(self, inputs, training=None): + if training: + increment = 1.0 + else: + increment = 0.0 + self.counter.assign_add(increment) + return inputs + + inputs = input_layer.Input((3,)) + layer = MyLayer() + outputs = layer(inputs) + model = training_lib.Model(inputs, outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(backend.get_value(layer.counter), 1.0) + + def test_conditional_losses_in_call(self): + class MyLayer(base_layer.Layer): + def __init__(self): + super().__init__(dynamic=test_utils.should_run_eagerly()) + + def call(self, inputs, training=None): + if training: + self.add_loss(tf.reduce_sum(inputs)) + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + inputs = input_layer.Input((3,)) + layer = MyLayer() + outputs = layer(inputs) + model = training_lib.Model(inputs, outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(loss, 2 * 3) + + def test_conditional_callable_losses(self): + model = sequential.Sequential( + [ + layers.Dense( + 1, + kernel_regularizer=regularizers.l2(1e-4), + input_shape=(1,), + ) + ] + ) + model._run_eagerly = test_utils.should_run_eagerly() + + def assert_graph(t): + if not tf.executing_eagerly(): + self.assertEqual(t.graph, tf.compat.v1.get_default_graph()) + + @tf.function + def get_losses(t): + if t < 0: + return tf.reduce_sum(model.losses) * t + else: + return tf.reduce_sum(model.losses) + + assert_graph(get_losses(tf.constant(2.0))) + assert_graph(get_losses(tf.constant(0.5))) + + def test_conditional_metrics_in_call(self): + class MyLayer(base_layer.Layer): + def __init__(self): + super().__init__(dynamic=test_utils.should_run_eagerly()) + + def call(self, inputs, training=None): + if training: + self.add_metric( + tf.reduce_sum(inputs), name="sum", aggregation="mean" + ) + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + inputs = input_layer.Input((3,)) + layer = MyLayer() + outputs = layer(inputs) + model = training_lib.Model(inputs, outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(history.history["sum"][-1], 2 * 3) + + def test_conditional_activity_regularizer_in_call(self): + class TestModel(training_lib.Model): + def __init__(self): + super().__init__( + name="test_model", dynamic=test_utils.should_run_eagerly() + ) + self.layer = layers.Dense(2, activity_regularizer="l2") + + def call(self, x, training=None): + if tf.greater(tf.reduce_sum(x), 0.0): + return self.layer(x) + else: + return self.layer(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + + if test_utils.should_run_eagerly(): + model.fit(x, y, epochs=2, batch_size=5) else: - return self.layer(x) - - model = TestModel() - model.compile( - loss='mse', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - - if test_utils.should_run_eagerly(): - model.fit(x, y, epochs=2, batch_size=5) - else: - with self.assertRaisesRegex(ValueError, 'ActivityRegularizer'): - model.fit(x, y, epochs=2, batch_size=5) - - def test_conditional_activity_regularizer_with_wrappers_in_call(self): - - class TestModel(training_lib.Model): - - def __init__(self): - super().__init__( - name='test_model', dynamic=test_utils.should_run_eagerly()) - self.layer = layers.TimeDistributed( - layers.Dense(2, activity_regularizer='l2'), input_shape=(3, 4)) - - def call(self, x, training=None): - if tf.greater(tf.reduce_sum(x), 0.0): - return self.layer(x) + with self.assertRaisesRegex(ValueError, "ActivityRegularizer"): + model.fit(x, y, epochs=2, batch_size=5) + + def test_conditional_activity_regularizer_with_wrappers_in_call(self): + class TestModel(training_lib.Model): + def __init__(self): + super().__init__( + name="test_model", dynamic=test_utils.should_run_eagerly() + ) + self.layer = layers.TimeDistributed( + layers.Dense(2, activity_regularizer="l2"), + input_shape=(3, 4), + ) + + def call(self, x, training=None): + if tf.greater(tf.reduce_sum(x), 0.0): + return self.layer(x) + else: + return self.layer(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 3, 4)) + y = np.ones(shape=(10, 3, 2)) + + if test_utils.should_run_eagerly(): + model.fit(x, y, epochs=2, batch_size=5) else: - return self.layer(x) - - model = TestModel() - model.compile( - loss='mse', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones(shape=(10, 3, 4)) - y = np.ones(shape=(10, 3, 2)) - - if test_utils.should_run_eagerly(): - model.fit(x, y, epochs=2, batch_size=5) - else: - with self.assertRaisesRegex(ValueError, 'ActivityRegularizer'): - model.fit(x, y, epochs=2, batch_size=5) + with self.assertRaisesRegex(ValueError, "ActivityRegularizer"): + model.fit(x, y, epochs=2, batch_size=5) class AddLayer(base_layer.Layer): - """A layer which adds its input to a variable. + """A layer which adds its input to a variable. - Useful for testing a layer with a variable - """ + Useful for testing a layer with a variable + """ - def build(self, _): - self.v = self.add_weight('v', (), initializer='ones') - self.built = True + def build(self, _): + self.v = self.add_weight("v", (), initializer="ones") + self.built = True - def call(self, inputs): - return inputs + self.v + def call(self, inputs): + return inputs + self.v class IdentityLayer(base_layer.Layer): - """A layer that returns its input. + """A layer that returns its input. - Useful for testing a layer without a variable. - """ + Useful for testing a layer without a variable. + """ - def call(self, inputs): - return inputs + def call(self, inputs): + return inputs @test_utils.run_v2_only -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class DTypeTest(test_combinations.TestCase): - - def _const(self, dtype): - return tf.constant(1, dtype=dtype) - - @test_utils.enable_v2_dtype_behavior - def test_dtype_defaults_to_floatx(self): - layer = AddLayer() - self.assertEqual(layer.dtype, 'float32') - layer(self._const('float64')) - self.assertEqual(layer.dtype, 'float32') # dtype should not change - - try: - backend.set_floatx('float64') - layer = AddLayer() - self.assertEqual(layer.dtype, 'float64') - finally: - backend.set_floatx('float32') - - @test_utils.enable_v2_dtype_behavior - def test_passing_dtype_to_constructor(self): - layer = IdentityLayer(dtype='float64') - layer(self._const('float32')) - self.assertEqual(layer.dtype, 'float64') - - layer = IdentityLayer(dtype='int32') - layer(self._const('float32')) - self.assertEqual(layer.dtype, 'int32') - - layer = IdentityLayer(dtype=tf.float64) - layer(self._const('float32')) - self.assertEqual(layer.dtype, 'float64') - - @test_utils.enable_v2_dtype_behavior - def input_cast_to_dtype(self): - layer = AddLayer() - - # Input should be cast to layer.dtype, so output should also be layer.dtype - self.assertEqual(layer(self._const('float64')).dtype, 'float32') - - layer = AddLayer(dtype='float64') - self.assertEqual(layer(self._const('float32')).dtype, 'float64') - - # Test inputs are not casted if layer.dtype is not floating-point - layer = IdentityLayer(dtype='int32') - self.assertEqual(layer(self._const('float64')).dtype, 'float64') - - # Test inputs are not casted if the inputs are not floating-point - layer = IdentityLayer(dtype='float32') - self.assertEqual(layer(self._const('int32')).dtype, 'int32') - - # Test Numpy arrays are casted - layer = IdentityLayer(dtype='float64') - self.assertEqual(layer(np.array(1, dtype='float32')).dtype, 'float64') - - # Test Python floats are casted - layer = IdentityLayer(dtype='float64') - self.assertEqual(layer(1.).dtype, 'float64') - - @test_utils.enable_v2_dtype_behavior - def multiple_inputs_cast_to_dtype(self): - - class MultiIdentityLayer(base_layer.Layer): - - def call(self, inputs): - return [tf.identity(x) for x in inputs] - - # Testing layer with default dtype of float32 - layer = MultiIdentityLayer() - x, y = layer([self._const('float16'), self._const('float32')]) - self.assertEqual(x.dtype, 'float32') - self.assertEqual(y.dtype, 'float32') - - # Test passing dtype to the constructor - layer = MultiIdentityLayer(dtype='float64') - x, y = layer([self._const('float16'), self._const('float32')]) - self.assertEqual(x.dtype, 'float64') - self.assertEqual(y.dtype, 'float64') - - # Test several non-floating point types - layer = MultiIdentityLayer(dtype='float64') - x, y, z, w = layer([self._const('float16'), self._const('bool'), - self._const('float64'), self._constant('complex64')]) - self.assertEqual(x.dtype, 'float64') - self.assertEqual(y.dtype, 'bool') - self.assertEqual(z.dtype, 'float64') - self.assertEqual(w.dtype, 'complex64') - - @test_utils.enable_v2_dtype_behavior - def test_extra_args_and_kwargs_not_casted(self): - - class IdentityLayerWithArgs(base_layer.Layer): - - def call(self, inputs, *args, **kwargs): - kwargs.pop('training', None) - return tf.nest.flatten([inputs, args, kwargs]) - - layer = IdentityLayerWithArgs(dtype='float64') - x, y, z = layer(self._const('float16'), self._const('float16'), - kwarg=self._const('float16')) - self.assertEqual(x.dtype, 'float64') - self.assertEqual(y.dtype, 'float16') - self.assertEqual(z.dtype, 'float16') - - @test_utils.enable_v2_dtype_behavior - def test_layer_without_autocast(self): - - class IdentityLayerWithoutAutocast(IdentityLayer): - - def __init__(self, *args, **kwargs): - kwargs['autocast'] = False - super().__init__(*args, **kwargs) - - layer = IdentityLayerWithoutAutocast(dtype='float64') - self.assertEqual(layer(self._const('float32')).dtype, 'float32') - - @test_utils.enable_v2_dtype_behavior - def test_compute_output_signature(self): - - class IdentityLayerWithOutputShape(IdentityLayer): - - def compute_output_shape(self, input_shape): - return input_shape - - layer = IdentityLayerWithOutputShape(dtype='float64') - output_signature = layer.compute_output_signature( - tf.TensorSpec(shape=(), dtype='float32')) - self.assertEqual(output_signature.shape, ()) - self.assertEqual(output_signature.dtype, 'float64') - - @test_utils.enable_v2_dtype_behavior - def test_composite_tensors_input_casting(self): - sparse = tf.SparseTensor( - indices=tf.constant([[0, 1], [2, 3]], dtype='int64'), - values=tf.constant([0., 1.], dtype='float32'), - dense_shape=tf.constant([4, 4], dtype='int64')) - ragged = tf.RaggedTensor.from_row_splits( - values=tf.constant([1., 2., 3.], dtype='float32'), - row_splits=tf.constant([0, 2, 2, 3], dtype='int64')) - - layer = IdentityLayer(dtype='float16') - - for x in sparse, ragged: - self.assertEqual(x.dtype, 'float32') - y = layer(x) - self.assertEqual(y.dtype, 'float16') - self.assertEqual(type(x), type(y)) - - @test_utils.enable_v2_dtype_behavior - def test_passing_non_tensor(self): - layer = IdentityLayer() - x = object() - y = layer(x) # Layer should not cast 'x', as it's not a tensor - self.assertIs(x, y) - - @test_utils.disable_v2_dtype_behavior - def test_v1_behavior(self): - # Test dtype defaults to None and inferred from input - layer = IdentityLayer() - self.assertIsNone(layer.dtype) - layer(self._const('float64')) - self.assertEqual(layer.dtype, 'float64') - - # Test layer does not cast to dtype - self.assertEqual(layer(self._const('float32')).dtype, 'float32') - - -if __name__ == '__main__': - tf.test.main() + def _const(self, dtype): + return tf.constant(1, dtype=dtype) + + @test_utils.enable_v2_dtype_behavior + def test_dtype_defaults_to_floatx(self): + layer = AddLayer() + self.assertEqual(layer.dtype, "float32") + layer(self._const("float64")) + self.assertEqual(layer.dtype, "float32") # dtype should not change + + try: + backend.set_floatx("float64") + layer = AddLayer() + self.assertEqual(layer.dtype, "float64") + finally: + backend.set_floatx("float32") + + @test_utils.enable_v2_dtype_behavior + def test_passing_dtype_to_constructor(self): + layer = IdentityLayer(dtype="float64") + layer(self._const("float32")) + self.assertEqual(layer.dtype, "float64") + + layer = IdentityLayer(dtype="int32") + layer(self._const("float32")) + self.assertEqual(layer.dtype, "int32") + + layer = IdentityLayer(dtype=tf.float64) + layer(self._const("float32")) + self.assertEqual(layer.dtype, "float64") + + @test_utils.enable_v2_dtype_behavior + def input_cast_to_dtype(self): + layer = AddLayer() + + # Input should be cast to layer.dtype, so output should also be + # layer.dtype + self.assertEqual(layer(self._const("float64")).dtype, "float32") + + layer = AddLayer(dtype="float64") + self.assertEqual(layer(self._const("float32")).dtype, "float64") + + # Test inputs are not casted if layer.dtype is not floating-point + layer = IdentityLayer(dtype="int32") + self.assertEqual(layer(self._const("float64")).dtype, "float64") + + # Test inputs are not casted if the inputs are not floating-point + layer = IdentityLayer(dtype="float32") + self.assertEqual(layer(self._const("int32")).dtype, "int32") + + # Test Numpy arrays are casted + layer = IdentityLayer(dtype="float64") + self.assertEqual(layer(np.array(1, dtype="float32")).dtype, "float64") + + # Test Python floats are casted + layer = IdentityLayer(dtype="float64") + self.assertEqual(layer(1.0).dtype, "float64") + + @test_utils.enable_v2_dtype_behavior + def multiple_inputs_cast_to_dtype(self): + class MultiIdentityLayer(base_layer.Layer): + def call(self, inputs): + return [tf.identity(x) for x in inputs] + + # Testing layer with default dtype of float32 + layer = MultiIdentityLayer() + x, y = layer([self._const("float16"), self._const("float32")]) + self.assertEqual(x.dtype, "float32") + self.assertEqual(y.dtype, "float32") + + # Test passing dtype to the constructor + layer = MultiIdentityLayer(dtype="float64") + x, y = layer([self._const("float16"), self._const("float32")]) + self.assertEqual(x.dtype, "float64") + self.assertEqual(y.dtype, "float64") + + # Test several non-floating point types + layer = MultiIdentityLayer(dtype="float64") + x, y, z, w = layer( + [ + self._const("float16"), + self._const("bool"), + self._const("float64"), + self._constant("complex64"), + ] + ) + self.assertEqual(x.dtype, "float64") + self.assertEqual(y.dtype, "bool") + self.assertEqual(z.dtype, "float64") + self.assertEqual(w.dtype, "complex64") + + @test_utils.enable_v2_dtype_behavior + def test_extra_args_and_kwargs_not_casted(self): + class IdentityLayerWithArgs(base_layer.Layer): + def call(self, inputs, *args, **kwargs): + kwargs.pop("training", None) + return tf.nest.flatten([inputs, args, kwargs]) + + layer = IdentityLayerWithArgs(dtype="float64") + x, y, z = layer( + self._const("float16"), + self._const("float16"), + kwarg=self._const("float16"), + ) + self.assertEqual(x.dtype, "float64") + self.assertEqual(y.dtype, "float16") + self.assertEqual(z.dtype, "float16") + + @test_utils.enable_v2_dtype_behavior + def test_layer_without_autocast(self): + class IdentityLayerWithoutAutocast(IdentityLayer): + def __init__(self, *args, **kwargs): + kwargs["autocast"] = False + super().__init__(*args, **kwargs) + + layer = IdentityLayerWithoutAutocast(dtype="float64") + self.assertEqual(layer(self._const("float32")).dtype, "float32") + + @test_utils.enable_v2_dtype_behavior + def test_compute_output_signature(self): + class IdentityLayerWithOutputShape(IdentityLayer): + def compute_output_shape(self, input_shape): + return input_shape + + layer = IdentityLayerWithOutputShape(dtype="float64") + output_signature = layer.compute_output_signature( + tf.TensorSpec(shape=(), dtype="float32") + ) + self.assertEqual(output_signature.shape, ()) + self.assertEqual(output_signature.dtype, "float64") + + @test_utils.enable_v2_dtype_behavior + def test_composite_tensors_input_casting(self): + sparse = tf.SparseTensor( + indices=tf.constant([[0, 1], [2, 3]], dtype="int64"), + values=tf.constant([0.0, 1.0], dtype="float32"), + dense_shape=tf.constant([4, 4], dtype="int64"), + ) + ragged = tf.RaggedTensor.from_row_splits( + values=tf.constant([1.0, 2.0, 3.0], dtype="float32"), + row_splits=tf.constant([0, 2, 2, 3], dtype="int64"), + ) + + layer = IdentityLayer(dtype="float16") + + for x in sparse, ragged: + self.assertEqual(x.dtype, "float32") + y = layer(x) + self.assertEqual(y.dtype, "float16") + self.assertEqual(type(x), type(y)) + + @test_utils.enable_v2_dtype_behavior + def test_passing_non_tensor(self): + layer = IdentityLayer() + x = object() + y = layer(x) # Layer should not cast 'x', as it's not a tensor + self.assertIs(x, y) + + @test_utils.disable_v2_dtype_behavior + def test_v1_behavior(self): + # Test dtype defaults to None and inferred from input + layer = IdentityLayer() + self.assertIsNone(layer.dtype) + layer(self._const("float64")) + self.assertEqual(layer.dtype, "float64") + + # Test layer does not cast to dtype + self.assertEqual(layer(self._const("float32")).dtype, "float32") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/base_layer_utils.py b/keras/engine/base_layer_utils.py index 8234e105bfc8..8e3de3d4df2e 100644 --- a/keras/engine/base_layer_utils.py +++ b/keras/engine/base_layer_utils.py @@ -14,188 +14,202 @@ # ============================================================================== """Contains private utilities used mainly by the base Layer class.""" -import tensorflow.compat.v2 as tf - import functools import threading + +import tensorflow.compat.v1 as tf1 +import tensorflow.compat.v2 as tf + from keras import backend from keras.dtensor import dtensor_api as dtensor from keras.utils import control_flow_util from keras.utils import tf_inspect from keras.utils import tf_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export _call_context = threading.local() def create_mean_metric(value, name=None): - # import keras will import base_layer and then this module, and metric relies - # on base_layer, which result into a cyclic dependency. - from keras import metrics as metrics_module # pylint: disable=g-import-not-at-top - metric_obj = metrics_module.Mean(name=name, dtype=value.dtype) - return metric_obj, metric_obj(value) - - -def make_variable(name, - shape=None, - dtype=tf.float32, - initializer=None, - trainable=None, - caching_device=None, - validate_shape=True, - constraint=None, - use_resource=None, - collections=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.VariableAggregation.NONE, - partitioner=None, # pylint: disable=unused-argument - layout=None): - """Temporary util to create a variable (relies on `variable_scope.variable`). - - Some reuse-related technicalities prevent us from using - `variable_scope.get_variable()` directly, so we use a subcomponent - that has fewer constraints (`variable_scope.variable()`). - - In the longer term, it seems like a similar "default variable creator" method - should exist in `Trackable` instead. When this happens, we can get - rid of this temporary solution. - - TODO(fchollet): remove this method when no longer needed. - - Args: - name: Variable name. - shape: Variable shape. - dtype: The type of the variable. Defaults to `self.dtype` or `float32`. - initializer: Initializer instance (callable). - trainable: Whether the variable should be part of the layer's - "trainable_variables" (e.g. variables, biases) - or "non_trainable_variables" (e.g. BatchNorm mean, stddev). - Note, if the current variable scope is marked as non-trainable - then this parameter is ignored and any added variables are also - marked as non-trainable. `trainable` defaults to `True` unless - `synchronization` is set to `ON_READ`. - caching_device: Passed to `tf.Variable`. - validate_shape: Passed to `tf.Variable`. - constraint: Constraint instance (callable). - use_resource: Whether to use a `ResourceVariable`. - collections: List of graph collections keys. The new variable is added to - these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. - synchronization: Indicates when a distributed a variable will be - aggregated. Accepted values are constants defined in the class - `tf.VariableSynchronization`. By default the synchronization is set to - `AUTO` and the current `DistributionStrategy` chooses - when to synchronize. If `synchronization` is set to `ON_READ`, - `trainable` must not be set to `True`. - aggregation: Indicates how a distributed variable will be aggregated. - Accepted values are constants defined in the class - `tf.VariableAggregation`. - partitioner: Not handled at this time. - layout: the optional DTensor layout, used for creating DVariable. - - Returns: - Variable instance. - """ - initializing_from_value = False - if initializer is not None and not callable(initializer): - initializing_from_value = True - - if initializing_from_value: - init_val = initializer - variable_dtype = None - else: - # Instantiate initializer if provided initializer is a type object. - if tf_inspect.isclass(initializer): - initializer = initializer() - if layout: - init_val = functools.partial(initializer, shape, dtype=dtype, - layout=layout) + # import keras will import base_layer and then this module, and metric + # relies on base_layer, which result into a cyclic dependency. + from keras import metrics as metrics_module + + metric_obj = metrics_module.Mean(name=name, dtype=value.dtype) + return metric_obj, metric_obj(value) + + +def infer_init_val_and_dtype(initializer, dtype, shape, layout=None): + if initializer is not None and not callable(initializer): + init_val = initializer + variable_dtype = None else: - init_val = functools.partial(initializer, shape, dtype=dtype) - variable_dtype = dtype.base_dtype - - variable_shape = tf.TensorShape(shape) - - if use_resource is None: - use_resource = True - - if layout is None: - # In theory, in `use_resource` is True and `collections` is empty - # (that is to say, in TF2), we can use tf.Variable. - # However, this breaks legacy (Estimator) checkpoints because - # it changes variable names. Remove this when V1 is fully deprecated. - return tf.compat.v1.Variable( - initial_value=init_val, - name=name, - trainable=trainable, - caching_device=caching_device, - dtype=variable_dtype, - validate_shape=validate_shape, - constraint=constraint, - use_resource=use_resource, - collections=collections, - synchronization=synchronization, - aggregation=aggregation, - shape=variable_shape if variable_shape else None) - else: - return dtensor.DVariable( - initial_value=init_val, - name=name, - trainable=trainable, - caching_device=caching_device, - dtype=variable_dtype, - validate_shape=validate_shape, - constraint=constraint, - collections=collections, - synchronization=synchronization, - aggregation=aggregation, - shape=variable_shape if variable_shape else None) + # Instantiate initializer if provided initializer is a type object. + if tf_inspect.isclass(initializer): + initializer = initializer() + if layout: + init_val = functools.partial( + initializer, shape, dtype=dtype, layout=layout + ) + else: + init_val = functools.partial(initializer, shape, dtype=dtype) + variable_dtype = dtype.base_dtype + return init_val, variable_dtype + + +def make_variable( + name, + shape=None, + dtype=tf.float32, + initializer=None, + trainable=None, + caching_device=None, + validate_shape=True, + constraint=None, + use_resource=None, + collections=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.VariableAggregation.NONE, + partitioner=None, + layout=None, + experimental_enable_variable_lifting=True, +): + """Util to create a variable (relies on `variable_scope.variable`). + + Some reuse-related technicalities prevent us from using + `variable_scope.get_variable()` directly, so we use a subcomponent + that has fewer constraints (`variable_scope.variable()`). + + In the longer term, it seems like a similar "default variable creator" + method should exist in `Trackable` instead. When this happens, we can get + rid of this temporary solution. + + TODO(fchollet): remove this method when no longer needed. + + Args: + name: Variable name. + shape: Variable shape. + dtype: The type of the variable. Defaults to `self.dtype` or `float32`. + initializer: Initializer instance (callable). + trainable: Whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) + or "non_trainable_variables" (e.g. BatchNorm mean, stddev). + Note, if the current variable scope is marked as non-trainable + then this parameter is ignored and any added variables are also + marked as non-trainable. `trainable` becomes `True` unless + `synchronization` is set to `ON_READ`. Defaults to `None`. + caching_device: Passed to `tf.Variable`. + validate_shape: Passed to `tf.Variable`. + constraint: Constraint instance (callable). + use_resource: Whether to use a `ResourceVariable`. + collections: List of graph collections keys. The new variable is added to + these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. + synchronization: Indicates when a distributed a variable will be + aggregated. Accepted values are constants defined in the class + `tf.VariableSynchronization`. By default the synchronization is set to + `AUTO` and the current `DistributionStrategy` chooses + when to synchronize. If `synchronization` is set to `ON_READ`, + `trainable` must not be set to `True`. + aggregation: Indicates how a distributed variable will be aggregated. + Accepted values are constants defined in the class + `tf.VariableAggregation`. + partitioner: Not handled at this time. + layout: the optional DTensor layout, used for creating DVariable. + + Returns: + Variable instance. + """ + init_val, variable_dtype = infer_init_val_and_dtype( + initializer, dtype, shape, layout + ) + variable_shape = tf.TensorShape(shape) + + if use_resource is None: + use_resource = True + + if layout is None: + # In theory, in `use_resource` is True and `collections` is empty + # (that is to say, in TF2), we can use tf.Variable. + # However, this breaks legacy (Estimator) checkpoints because + # it changes variable names. Remove this when V1 is fully deprecated. + return tf1.Variable( + initial_value=init_val, + name=name, + trainable=trainable, + caching_device=caching_device, + dtype=variable_dtype, + validate_shape=validate_shape, + constraint=constraint, + use_resource=use_resource, + collections=collections, + synchronization=synchronization, + aggregation=aggregation, + shape=variable_shape if variable_shape else None, + experimental_enable_variable_lifting=experimental_enable_variable_lifting, # noqa: E501 + ) + else: + return dtensor.DVariable( + initial_value=init_val, + name=name, + trainable=trainable, + caching_device=caching_device, + dtype=variable_dtype, + validate_shape=validate_shape, + constraint=constraint, + collections=collections, + synchronization=synchronization, + aggregation=aggregation, + shape=variable_shape if variable_shape else None, + ) def collect_previous_mask(input_tensors): - """Retrieves the output mask(s) of the previous node. + """Retrieves the output mask(s) of the previous node. - Args: - input_tensors: An arbitrary structure of Tensors. + Args: + input_tensors: An arbitrary structure of Tensors. - Returns: - A mask tensor or list of mask tensors. - """ + Returns: + A mask tensor or list of mask tensors. + """ - def _collect_previous_mask(x): - return getattr(x, '_keras_mask', None) + def _collect_previous_mask(x): + return getattr(x, "_keras_mask", None) - return tf.nest.map_structure(_collect_previous_mask, input_tensors) + return tf.nest.map_structure(_collect_previous_mask, input_tensors) def have_all_keras_metadata(tensors): - return all(hasattr(x, '_keras_history') for x in tf.nest.flatten(tensors)) + return all(hasattr(x, "_keras_history") for x in tf.nest.flatten(tensors)) def generate_placeholders_from_shape(shape): - return tf.compat.v1.placeholder(shape=shape, dtype=backend.floatx()) + return tf1.placeholder(shape=shape, dtype=backend.floatx()) def create_keras_history(tensors): - """Wraps TensorFlow Operations for compatibility with the Functional API. + """Wraps TensorFlow Operations for compatibility with the Functional API. - This method checks to see if a Tensor in `tensors` is missing Keras metadata - and has its origin in a Keras `Input` Layer. If so, this method will replace - the raw TensorFlow Operations that created this tensor with - `TensorFlowOpLayer` instances that create identical operations. + This method checks to see if a Tensor in `tensors` is missing Keras metadata + and has its origin in a Keras `Input` Layer. If so, this method will replace + the raw TensorFlow Operations that created this tensor with + `TensorFlowOpLayer` instances that create identical operations. - Any Tensors not originating from a Keras `Input` Layer will be treated as - constants when constructing `TensorFlowOpLayer` instances. + Any Tensors not originating from a Keras `Input` Layer will be treated as + constants when constructing `TensorFlowOpLayer` instances. - Args: - tensors: A structure of Tensors, some of which come from raw TensorFlow - operations and need to have Keras metadata assigned to them. + Args: + tensors: A structure of Tensors, some of which come from raw TensorFlow + operations and need to have Keras metadata assigned to them. - Returns: - created_layers: List. The `TensorFlowOpLayer` instances created to wrap - the raw Tensorflow operations. - """ - _, created_layers = _create_keras_history_helper(tensors, set(), []) - return created_layers + Returns: + created_layers: List. The `TensorFlowOpLayer` instances created to wrap + the raw Tensorflow operations. + """ + _, created_layers = _create_keras_history_helper(tensors, set(), []) + return created_layers # Unsafe Internal attribute. @@ -212,232 +226,245 @@ def create_keras_history(tensors): def _create_keras_history_helper(tensors, processed_ops, created_layers): - """Helper method for `create_keras_history`. - - Args: - tensors: A structure of Tensors for which to create Keras metadata. - processed_ops: Set. TensorFlow operations that have already been wrapped in - `TensorFlowOpLayer` instances. - created_layers: List. The `TensorFlowOpLayer` instances created. - - Returns: - Tuple. First element is the updated set of TensorFlow Operations that - have been wrapped in `TensorFlowOpLayer` instances. Second element is - a list of the `TensorFlowOpLayer` instances created. - """ - if tf.compat.v1.executing_eagerly_outside_functions(): - raise ValueError( - '`create_keras_history` should only be called if eager is disabled!') - # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. - # Cannot be imported at top because of circular dependencies. - # TODO(omalleyt): Resolve circular dependency. - from keras.engine import base_layer # pylint: disable=g-import-not-at-top - tensor_list = tf.nest.flatten(tensors) - sparse_ops = [] - ragged_tensors = [] - for tensor in tensor_list: - if getattr(tensor, '_keras_history', None) is not None: - continue - if isinstance( - tensor, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)): - sparse_ops.append(tensor.op) - continue - if tf_utils.is_ragged(tensor): - # Ragged tensors don't have an op property - ragged_tensors.append(tensor) - continue - op = tensor.op # The Op that created this Tensor. - if op not in processed_ops: - # Recursively set `_keras_history`. - op_inputs = list(op.inputs) - constants = {} - layer_inputs = [] - for i, op_input in enumerate(op_inputs): - if uses_keras_history(op_input): - layer_inputs.append(op_input) - else: - # Treat any value not originating from a `keras.Input` as - # a constant. Variables cannot be supported. - ds_with_session = ( - tf.distribute.in_cross_replica_context() and - not tf.compat.v1.executing_eagerly_outside_functions()) - using_xla = control_flow_util.GraphOrParentsInXlaContext( - tf.compat.v1.get_default_graph()) - if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION: - # In Legacy Graph mode, evaluating here makes Session be - # configured improperly. The downside of this is that saving - # via `get_config` breaks, but SavedModel still works. - constants[i] = op_input - else: - with tf.init_scope(): - constants[i] = backend.function([], op_input)([]) - layer_inputs = unnest_if_single_tensor(layer_inputs) - processed_ops, created_layers = _create_keras_history_helper( - layer_inputs, processed_ops, created_layers) - name = op.name - node_def = op.node_def.SerializeToString() - op_layer = base_layer.TensorFlowOpLayer( - node_def, constants=constants, name=name) - created_layers.append(op_layer) - op_layer._set_connectivity_metadata( # pylint: disable=protected-access - args=(layer_inputs,), - kwargs={}, - outputs=op.outputs) - processed_ops.update([op]) - if sparse_ops or ragged_tensors: - lambda_example = """ + """Helper method for `create_keras_history`. + + Args: + tensors: A structure of Tensors for which to create Keras metadata. + processed_ops: Set. TensorFlow operations that have already been wrapped + in `TensorFlowOpLayer` instances. + created_layers: List. The `TensorFlowOpLayer` instances created. + + Returns: + Tuple. First element is the updated set of TensorFlow Operations that + have been wrapped in `TensorFlowOpLayer` instances. Second element is + a list of the `TensorFlowOpLayer` instances created. + """ + if tf1.executing_eagerly_outside_functions(): + raise ValueError( + "`create_keras_history` should only be called if eager is disabled!" + ) + # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. + # Cannot be imported at top because of circular dependencies. + # TODO(omalleyt): Resolve circular dependency. + from keras.engine import base_layer + + tensor_list = tf.nest.flatten(tensors) + sparse_ops = [] + ragged_tensors = [] + for tensor in tensor_list: + if getattr(tensor, "_keras_history", None) is not None: + continue + if isinstance(tensor, (tf.SparseTensor, tf1.SparseTensorValue)): + sparse_ops.append(tensor.op) + continue + if tf_utils.is_ragged(tensor): + # Ragged tensors don't have an op property + ragged_tensors.append(tensor) + continue + op = tensor.op # The Op that created this Tensor. + if op not in processed_ops: + # Recursively set `_keras_history`. + op_inputs = list(op.inputs) + constants = {} + layer_inputs = [] + for i, op_input in enumerate(op_inputs): + if uses_keras_history(op_input): + layer_inputs.append(op_input) + else: + # Treat any value not originating from a `keras.Input` as + # a constant. Variables cannot be supported. + ds_with_session = ( + tf.distribute.in_cross_replica_context() + and not tf1.executing_eagerly_outside_functions() + ) + using_xla = control_flow_util.GraphOrParentsInXlaContext( + tf1.get_default_graph() + ) + if ( + ds_with_session + or using_xla + or _UNSAFE_GRAPH_OP_LAYER_CREATION + ): + # In Legacy Graph mode, evaluating here makes Session be + # configured improperly. The downside of this is that + # saving via `get_config` breaks, but SavedModel still + # works. + constants[i] = op_input + else: + with tf.init_scope(): + constants[i] = backend.function([], op_input)([]) + layer_inputs = unnest_if_single_tensor(layer_inputs) + processed_ops, created_layers = _create_keras_history_helper( + layer_inputs, processed_ops, created_layers + ) + name = op.name + node_def = op.node_def.SerializeToString() + op_layer = base_layer.TensorFlowOpLayer( + node_def, constants=constants, name=name + ) + created_layers.append(op_layer) + op_layer._set_connectivity_metadata( + args=(layer_inputs,), kwargs={}, outputs=op.outputs + ) + processed_ops.update([op]) + if sparse_ops or ragged_tensors: + lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ - raise ValueError( - 'Tensorflow ops that generate ragged or sparse tensor ' - 'outputs are currently not supported by Keras automatic ' - 'op wrapping. Please wrap these ops in a Lambda layer: ' - '\n\n```\n{example}\n```\n' - 'Sparse ops encountered: {sparse_ops}\n' - 'Ragged tensors encountered: {ragged_tensors}\n'.format( - example=lambda_example, - sparse_ops=str(sparse_ops), - ragged_tensors=str(ragged_tensors))) - return processed_ops, created_layers + raise ValueError( + "Tensorflow ops that generate ragged or sparse tensor " + "outputs are currently not supported by Keras automatic " + "op wrapping. Please wrap these ops in a Lambda layer: " + "\n\n```\n{example}\n```\n" + "Sparse ops encountered: {sparse_ops}\n" + "Ragged tensors encountered: {ragged_tensors}\n".format( + example=lambda_example, + sparse_ops=str(sparse_ops), + ragged_tensors=str(ragged_tensors), + ) + ) + return processed_ops, created_layers def unnest_if_single_tensor(input_tensors): - # Preserve compatibility with older configs - flat_input_tensors = tf.nest.flatten(input_tensors) - # If this is a single element but not a dict, unwrap. If this is a dict, - # assume the first layer expects a dict (as is the case with a - # DenseFeatures layer); pass through. - if not isinstance(input_tensors, dict) and len(flat_input_tensors) == 1: - input_tensors = flat_input_tensors[0] - return input_tensors + # Preserve compatibility with older configs + flat_input_tensors = tf.nest.flatten(input_tensors) + # If this is a single element but not a dict, unwrap. If this is a dict, + # assume the first layer expects a dict (as is the case with a + # DenseFeatures layer); pass through. + if not isinstance(input_tensors, dict) and len(flat_input_tensors) == 1: + input_tensors = flat_input_tensors[0] + return input_tensors def needs_keras_history(tensors, ignore_call_context=False): - """Check if any Tensors need to be wrapped in TensorFlowOpLayers. - - This will never return True inside a sublayer, because sublayers - do not need to create Keras History. Otherwise, this returns True - if one or more of `tensors` originates from a `keras.Input` and - does not have `_keras_history` set. - - Args: - tensors: An arbitrary nested structure of Tensors. - ignore_call_context: Whether to ignore the check of if currently - outside of a `call` context. This is `True` when creating - KerasHistory inside `Node`, where we always know that Tensors - are being used with the Functional API. - - Returns: - Bool, whether at least one Tensor needs to be wrapped. - """ - input_tensors = tf.nest.flatten(tensors) - if call_context().in_call and not ignore_call_context: - return False - if all( - getattr(tensor, '_keras_history', None) is not None - for tensor in input_tensors): - # KerasHistory already set. - return False - return uses_keras_history(tensors) + """Check if any Tensors need to be wrapped in TensorFlowOpLayers. + + This will never return True inside a sublayer, because sublayers + do not need to create Keras History. Otherwise, this returns True + if one or more of `tensors` originates from a `keras.Input` and + does not have `_keras_history` set. + + Args: + tensors: An arbitrary nested structure of Tensors. + ignore_call_context: Whether to ignore the check of if currently + outside of a `call` context. This is `True` when creating + KerasHistory inside `Node`, where we always know that Tensors + are being used with the Functional API. + + Returns: + Bool, whether at least one Tensor needs to be wrapped. + """ + input_tensors = tf.nest.flatten(tensors) + if call_context().in_call and not ignore_call_context: + return False + if all( + getattr(tensor, "_keras_history", None) is not None + for tensor in input_tensors + ): + # KerasHistory already set. + return False + return uses_keras_history(tensors) def is_in_keras_graph(): - """Returns if currently executing inside of a Keras graph.""" - return call_context().in_keras_graph + """Returns if currently executing inside of a Keras graph.""" + return call_context().in_keras_graph def is_in_eager_or_tf_function(): - """Returns if in eager mode or inside of a tf.function.""" - return tf.executing_eagerly() or is_in_tf_function() + """Returns if in eager mode or inside of a tf.function.""" + return tf.executing_eagerly() or is_in_tf_function() def is_in_tf_function(): - """Returns if inside of a tf.function.""" - # Check if running in V1 graph mode. - if not tf.compat.v1.executing_eagerly_outside_functions(): - return False - if not tf.inside_function(): - return False - # Check if inside Keras FuncGraph. - if is_in_keras_graph(): - return False - # Check for a v1 `wrap_function` FuncGraph. - graph = tf.compat.v1.get_default_graph() - if (getattr(graph, 'name', False) and - graph.name.startswith('wrapped_function')): - return False - return True + """Returns if inside of a tf.function.""" + # Check if running in V1 graph mode. + if not tf1.executing_eagerly_outside_functions(): + return False + if not tf.inside_function(): + return False + # Check if inside Keras FuncGraph. + if is_in_keras_graph(): + return False + # Check for a v1 `wrap_function` FuncGraph. + graph = tf1.get_default_graph() + if getattr(graph, "name", False) and graph.name.startswith( + "wrapped_function" + ): + return False + return True def uses_keras_history(tensors): - """Check if at least one Tensor originates from a `keras.Input`. + """Check if at least one Tensor originates from a `keras.Input`. - This is `True` if at least one Tensor has its origin in a `keras.Input`. - Any Tensor that originates from a `keras.Input` will have a dependency - Tensor with a `_keras_history` attribute attached. Tensors that have - already been checked to not originate from a `keras.Input` - are marked as `_keras_history_checked`. + This is `True` if at least one Tensor has its origin in a `keras.Input`. + Any Tensor that originates from a `keras.Input` will have a dependency + Tensor with a `_keras_history` attribute attached. Tensors that have + already been checked to not originate from a `keras.Input` + are marked as `_keras_history_checked`. - Args: - tensors: An arbitrary nested structure of Tensors. + Args: + tensors: An arbitrary nested structure of Tensors. - Returns: - Bool, whether at least one Tensor originates from a `keras.Input`. - """ - checked_tensors = set() - tensors_to_check = tf.nest.flatten(tensors) + Returns: + Bool, whether at least one Tensor originates from a `keras.Input`. + """ + checked_tensors = set() + tensors_to_check = tf.nest.flatten(tensors) - while tensors_to_check: - new_tensors_to_check = [] - for tensor in tensors_to_check: - if id(tensor) in checked_tensors: - continue + while tensors_to_check: + new_tensors_to_check = [] + for tensor in tensors_to_check: + if id(tensor) in checked_tensors: + continue - checked_tensors.add(id(tensor)) + checked_tensors.add(id(tensor)) - if getattr(tensor, '_keras_history_checked', None) is not None: - continue - if getattr(tensor, '_keras_history', None) is not None: - return True + if getattr(tensor, "_keras_history_checked", None) is not None: + continue + if getattr(tensor, "_keras_history", None) is not None: + return True - try: - new_tensors_to_check.extend(tensor.op.inputs) - except AttributeError: - # In case `tensor` is a Variable created in an Eager context. - pass + try: + new_tensors_to_check.extend(tensor.op.inputs) + except AttributeError: + # In case `tensor` is a Variable created in an Eager context. + pass - tensors_to_check = new_tensors_to_check + tensors_to_check = new_tensors_to_check - # Mark that these Tensors have been checked once for `_keras_history`, - # and should not be checked again for performance reasons. - mark_checked(tensors) - return False + # Mark that these Tensors have been checked once for `_keras_history`, + # and should not be checked again for performance reasons. + mark_checked(tensors) + return False def mark_checked(tensors): - """Marks that these Tensors should not be tracked. + """Marks that these Tensors should not be tracked. - This prevents Layers from attempting to create TensorFlowOpLayers - for these Tensors. + This prevents Layers from attempting to create TensorFlowOpLayers + for these Tensors. - Args: - tensors: An arbitrary structure of Tensors. - """ + Args: + tensors: An arbitrary structure of Tensors. + """ - def _mark_checked(tensor): - tensor._keras_history_checked = True # pylint: disable=protected-access + def _mark_checked(tensor): + tensor._keras_history_checked = True - tf.nest.map_structure(_mark_checked, tensors) + tf.nest.map_structure(_mark_checked, tensors) def call_context(): - """Returns currently active `CallContext`.""" - call_ctx = getattr(_call_context, 'call_context', None) - if call_ctx is None: - call_ctx = CallContext() - _call_context.call_context = call_ctx - return call_ctx + """Returns currently active `CallContext`.""" + call_ctx = getattr(_call_context, "call_context", None) + if call_ctx is None: + call_ctx = CallContext() + _call_context.call_context = call_ctx + return call_ctx # Inject the call_context function to keras_deps to remove the dependency @@ -446,167 +473,174 @@ def call_context(): class CallContext: - """Keeps track of properties currently inside a Layer/Model's `call`. - - Attributes: - in_call: Whether currently inside the `call` of a Layer. - layer: The `Layer` whose `call` is currently active. - inputs: The inputs to the currently active `Layer`. - build_graph: Whether currently inside a Graph or FuncGraph. - training: Whether currently executing in training or inference mode. - saving: Whether currently saving to SavedModel. - frozen: Whether currently executing inside a `Layer` with `trainable` set to - `False`. - in_keras_graph: Whether executing inside the Keras Graph. - """ - - def __init__(self): - # Handle `in_call` separately as it is the most-read attr and reading it is - # on the hot path. - self.in_call = False - self._state = { - 'layer': None, - 'inputs': None, - 'build_graph': False, - 'training': None, - 'saving': None - } - # TODO(b/150169018): This logic can be replaced after the Functional API - # refactor. - self._in_keras_graph = False - - def enter(self, layer, inputs, build_graph, training, saving=None): - """Push a Layer and its inputs and state onto the current call context. + """Keeps track of properties currently inside a Layer/Model's `call`. - Args: + Attributes: + in_call: Whether currently inside the `call` of a Layer. layer: The `Layer` whose `call` is currently active. inputs: The inputs to the currently active `Layer`. build_graph: Whether currently inside a Graph or FuncGraph. training: Whether currently executing in training or inference mode. saving: Whether currently saving to SavedModel. - - Returns: - Context manager. + frozen: Whether currently executing inside a `Layer` with `trainable` set + to `False`. + in_keras_graph: Whether executing inside the Keras Graph. """ - state = { - 'layer': layer, - 'inputs': inputs, - 'build_graph': build_graph, - 'training': training, - 'saving': saving - } - return CallContextManager(self, state) - - @property - def layer(self): - return self._state['layer'] - - @property - def inputs(self): - return self._state['inputs'] - - @property - def build_graph(self): - return self._state['build_graph'] - - @property - def training(self): - return self._state['training'] - - @property - def saving(self): - return self._state['saving'] - - @property - def frozen(self): - layer = self._state['layer'] - if not layer: - return False - return not layer.trainable - - @property - def in_keras_graph(self): - # Returns True even if in a subgraph of the Keras graph, such as those - # created by control flow ops. - if tf.executing_eagerly(): - return False - return (self._in_keras_graph or - getattr(backend.get_graph(), 'name', None) == 'keras_graph') + + def __init__(self): + # Handle `in_call` separately as it is the most-read attr and reading it + # is on the hot path. + self.in_call = False + self._state = { + "layer": None, + "inputs": None, + "build_graph": False, + "training": None, + "saving": None, + } + # TODO(b/150169018): This logic can be replaced after the Functional API + # refactor. + self._in_keras_graph = False + + def enter(self, layer, inputs, build_graph, training, saving=None): + """Push a Layer and its inputs and state onto the current call context. + + Args: + layer: The `Layer` whose `call` is currently active. + inputs: The inputs to the currently active `Layer`. + build_graph: Whether currently inside a Graph or FuncGraph. + training: Whether currently executing in training or inference mode. + saving: Whether currently saving to SavedModel. + + Returns: + Context manager. + """ + state = { + "layer": layer, + "inputs": inputs, + "build_graph": build_graph, + "training": training, + "saving": saving, + } + return CallContextManager(self, state) + + @property + def layer(self): + return self._state["layer"] + + @property + def inputs(self): + return self._state["inputs"] + + @property + def build_graph(self): + return self._state["build_graph"] + + @property + def training(self): + return self._state["training"] + + @property + def saving(self): + return self._state["saving"] + + @property + def frozen(self): + layer = self._state["layer"] + if not layer: + return False + return not layer.trainable + + @property + def in_keras_graph(self): + # Returns True even if in a subgraph of the Keras graph, such as those + # created by control flow ops. + if tf.executing_eagerly(): + return False + return ( + self._in_keras_graph + or getattr(backend.get_graph(), "name", None) == "keras_graph" + ) class CallContextManager: - """Context manager for `CallContext`.""" + """Context manager for `CallContext`.""" - def __init__(self, call_ctx, state): - self._call_ctx = call_ctx - self._state = state - self._build_graph = state['build_graph'] + def __init__(self, call_ctx, state): + self._call_ctx = call_ctx + self._state = state + self._build_graph = state["build_graph"] - def __enter__(self): - call_ctx = self._call_ctx - self._prev_in_call = call_ctx.in_call - self._prev_state = call_ctx._state + def __enter__(self): + call_ctx = self._call_ctx + self._prev_in_call = call_ctx.in_call + self._prev_state = call_ctx._state - call_ctx.in_call = True - call_ctx._state = self._state + call_ctx.in_call = True + call_ctx._state = self._state - # TODO(b/150169018): This logic can be removed after the Functional API - # refactor. - if self._build_graph: - self._prev_in_keras_graph = call_ctx._in_keras_graph - call_ctx._in_keras_graph = ( - call_ctx._in_keras_graph or - getattr(backend.get_graph(), 'name', None) == 'keras_graph') + # TODO(b/150169018): This logic can be removed after the Functional API + # refactor. + if self._build_graph: + self._prev_in_keras_graph = call_ctx._in_keras_graph + call_ctx._in_keras_graph = ( + call_ctx._in_keras_graph + or getattr(backend.get_graph(), "name", None) == "keras_graph" + ) - def __exit__(self, *exc_info): - call_ctx = self._call_ctx - call_ctx.in_call = self._prev_in_call - call_ctx._state = self._prev_state + def __exit__(self, *exc_info): + call_ctx = self._call_ctx + call_ctx.in_call = self._prev_in_call + call_ctx._state = self._prev_state - if self._build_graph: - call_ctx._in_keras_graph = self._prev_in_keras_graph + if self._build_graph: + call_ctx._in_keras_graph = self._prev_in_keras_graph def training_arg_passed_to_call(argspec, args, kwargs): - """Returns whether a user passed the `training` argument in `__call__`.""" - # `argspec.args` starts with ['self', 'inputs'] - full_args = dict(zip(argspec.args[2:], args)) - full_args.update(kwargs) - return 'training' in full_args and full_args['training'] is not None + """Returns whether a user passed the `training` argument in `__call__`.""" + # `argspec.args` starts with ['self', 'inputs'] + full_args = dict(zip(argspec.args[2:], args)) + full_args.update(kwargs) + return "training" in full_args and full_args["training"] is not None def is_subclassed(layer): - """Returns True if the object is a subclassed layer or subclassed model.""" - return (layer.__module__.find('keras.engine') == -1 and - layer.__module__.find('keras.layers') == -1) + """Returns True if the object is a subclassed layer or subclassed model.""" + return ( + layer.__module__.find("keras.engine") == -1 + and layer.__module__.find("keras.layers") == -1 + ) def from_saved_model(layer): - """Returns whether the layer is loaded from a SavedModel.""" - return layer.__module__.find('keras.saving.saved_model') != -1 - - -def check_graph_consistency(tensor=None, method='add_loss', force_raise=False): - """Checks that tensors passed to `add_*` method match the Keras graph. - - When one of the `add_*` method is called inside a V2 conditional branch, - the underlying tensor gets created in a FuncGraph managed by control_flow_v2. - We need to raise clear error messages in such cases. - - Args: - tensor: Tensor to check, or `False` if it is known that an error - should be raised. - method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}. - force_raise: If an error should be raised regardless of `tensor`. - - Raises: - RuntimeError: In case of an out-of-graph tensor. - """ - if (force_raise or - (tf.compat.v1.executing_eagerly_outside_functions() and - hasattr(tensor, 'graph') and tensor.graph.is_control_flow_graph)): - if method == 'activity_regularizer': - bad_example = """ + """Returns whether the layer is loaded from a SavedModel.""" + return layer.__module__.find("keras.saving.legacy.saved_model") != -1 + + +def check_graph_consistency(tensor=None, method="add_loss", force_raise=False): + """Checks that tensors passed to `add_*` method match the Keras graph. + + When one of the `add_*` method is called inside a V2 conditional branch, the + underlying tensor gets created in a FuncGraph managed by control_flow_v2. + We need to raise clear error messages in such cases. + + Args: + tensor: Tensor to check, or `False` if it is known that an error + should be raised. + method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}. + force_raise: If an error should be raised regardless of `tensor`. + + Raises: + RuntimeError: In case of an out-of-graph tensor. + """ + if force_raise or ( + tf1.executing_eagerly_outside_functions() + and hasattr(tensor, "graph") + and tensor.graph.is_control_flow_graph + ): + if method == "activity_regularizer": + bad_example = """ class TestModel(tf.keras.Model): def __init__(self): @@ -619,7 +653,7 @@ def call(self, x, training=None): else: return self.dense(x) """ - correct_example = """ + correct_example = """ class TestModel(tf.keras.Model): def __init__(self): @@ -629,28 +663,29 @@ def __init__(self): def call(self, x, training=None): return self.dense(x) """ - raise RuntimeError( - 'You are using a layer with `activity_regularizer` in a control flow ' - 'branch, e.g.:\n{bad_example}\nThis is currently not supported. ' - 'Please move your call to the layer with `activity_regularizer` out ' - 'of the control flow branch, e.g.:\n{correct_example}\n' - 'You can also resolve this by marking your outer model/layer dynamic' - ' (eager-only) by passing `dynamic=True` to the layer constructor. ' - 'Any kind of control flow is supported with dynamic layers. ' - 'Note that using `dynamic=True` requires you to implement static ' - 'shape inference in the `compute_output_shape(input_shape)` ' - 'method.'.format( - bad_example=bad_example, correct_example=correct_example)) - - if method == 'add_metric': - bad_example = """ + raise RuntimeError( + "You are using a layer with `activity_regularizer` in a " + f"control flow branch, e.g.:\n{bad_example}\nThis is currently " + "not supported. Please move your call to the layer with " + "`activity_regularizer` out of the control flow branch, " + f"e.g.:\n{correct_example}\nYou can also resolve this by " + "marking your outer model/layer dynamic (eager-only) by " + "passing `dynamic=True` to the layer constructor. Any kind of " + "control flow is supported with dynamic layers. Note that " + "using `dynamic=True` requires you to implement static shape " + "inference in the `compute_output_shape(input_shape)` " + "method." + ) + + if method == "add_metric": + bad_example = """ def call(self, inputs, training=None): if training: metric = compute_metric(inputs) self.add_metric(metric, name='my_metric', aggregation='mean') return inputs """ - correct_example = """ + correct_example = """ def call(self, inputs, training=None): if training: metric = compute_metric(inputs) @@ -659,15 +694,15 @@ def call(self, inputs, training=None): self.add_metric(metric, name='my_metric', aggregation='mean') return inputs """ - elif method == 'add_loss': - bad_example = """ + elif method == "add_loss": + bad_example = """ def call(self, inputs, training=None): if training: loss = compute_loss(inputs) self.add_loss(loss) return inputs """ - correct_example = """ + correct_example = """ def call(self, inputs, training=None): if training: loss = compute_loss(inputs) @@ -676,14 +711,14 @@ def call(self, inputs, training=None): self.add_loss(loss) return inputs """ - else: - bad_example = """ + else: + bad_example = """ def call(self, inputs, training=None): if training: self.add_update(self.w.assign_add(1)) return inputs """ - correct_example = """ + correct_example = """ def call(self, inputs, training=None): if training: increment = 1 @@ -692,207 +727,225 @@ def call(self, inputs, training=None): self.add_update(self.w.assign_add(increment)) return inputs """ - raise RuntimeError( - 'You are using the method `{method}` in a control flow branch ' - 'in your layer, e.g.:\n{bad_example}\n' - 'This is not currently supported. ' - 'Please move your call to {method} out of the control flow branch, ' - 'e.g.:\n{correct_example}\n' - 'You can also resolve this by marking your layer ' - 'as dynamic (eager-only) by passing ' - '`dynamic=True` to the layer constructor. ' - 'Any kind of control flow is supported with dynamic layers. ' - 'Note that using `dynamic=True` requires you ' - 'to implement static shape inference ' - 'in the `compute_output_shape(input_shape)` method.'.format( - method=method, - bad_example=bad_example, - correct_example=correct_example)) + raise RuntimeError( + "You are using the method `{method}` in a control flow branch " + "in your layer, e.g.:\n{bad_example}\n" + "This is not currently supported. " + "Please move your call to {method} out of the control flow branch, " + "e.g.:\n{correct_example}\n" + "You can also resolve this by marking your layer " + "as dynamic (eager-only) by passing " + "`dynamic=True` to the layer constructor. " + "Any kind of control flow is supported with dynamic layers. " + "Note that using `dynamic=True` requires you " + "to implement static shape inference " + "in the `compute_output_shape(input_shape)` method.".format( + method=method, + bad_example=bad_example, + correct_example=correct_example, + ) + ) def mark_as_return(outputs, acd): - """Marks `outputs` as the return values for automatic control deps.""" + """Marks `outputs` as the return values for automatic control deps.""" - def _mark_as_return(tensor): - """Marks `tensor` as the return value for automatic control deps.""" - if not tf.is_tensor(tensor): - return tensor + def _mark_as_return(tensor): + """Marks `tensor` as the return value for automatic control deps.""" + if not tf.is_tensor(tensor): + return tensor - # pylint: disable=protected-access - return_tensor = acd.mark_as_return(tensor) - if getattr(tensor, '_keras_mask', None) is not None: - return_tensor._keras_mask = acd.mark_as_return(tensor._keras_mask) - else: - return_tensor._keras_mask = None + return_tensor = acd.mark_as_return(tensor) + if getattr(tensor, "_keras_mask", None) is not None: + return_tensor._keras_mask = acd.mark_as_return(tensor._keras_mask) + else: + return_tensor._keras_mask = None - # Handle TensorFlow Probability attached metadata. - # TODO(b/132076537): Remove this once TFP uses `CompositeTensor`. - if getattr(tensor, '_tfp_distribution', None) is not None: - return_tensor._tfp_distribution = tensor._tfp_distribution + # Handle TensorFlow Probability attached metadata. + # TODO(b/132076537): Remove this once TFP uses `CompositeTensor`. + if getattr(tensor, "_tfp_distribution", None) is not None: + return_tensor._tfp_distribution = tensor._tfp_distribution - return return_tensor - # pylint: enable=protected-access + return return_tensor - return tf.nest.map_structure(_mark_as_return, outputs) + return tf.nest.map_structure(_mark_as_return, outputs) V2_DTYPE_BEHAVIOR = None -@keras_export(v1=['keras.layers.enable_v2_dtype_behavior']) +@keras_export(v1=["keras.layers.enable_v2_dtype_behavior"]) def enable_v2_dtype_behavior(): - """Enable the V2 dtype behavior for Keras layers. - - By default, the V2 dtype behavior is enabled in TensorFlow 2, so this function - is only useful if `tf.compat.v1.disable_v2_behavior` has been called. Since - mixed precision requires V2 dtype behavior to be enabled, this function allows - you to use mixed precision in Keras layers if `disable_v2_behavior` has been - called. - - When enabled, the dtype of Keras layers defaults to floatx (which is typically - float32) instead of None. In addition, layers will automatically cast - floating-point inputs to the layer's dtype. - - >>> x = tf.ones((4, 4, 4, 4), dtype='float64') - >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2) - >>> print(layer.dtype) # float32 since V2 dtype behavior is enabled - float32 - >>> y = layer(x) # Layer casts inputs since V2 dtype behavior is enabled - >>> print(y.dtype.name) - float32 - - A layer author can opt-out their layer from the automatic input casting by - passing `autocast=False` to the base Layer's constructor. This disables the - autocasting part of the V2 behavior for that layer, but not the defaulting to - floatx part of the V2 behavior. - - When a global `tf.keras.mixed_precision.Policy` is set, a Keras layer's dtype - will default to the global policy instead of floatx. Layers will automatically - cast inputs to the policy's compute_dtype. - """ - global V2_DTYPE_BEHAVIOR - V2_DTYPE_BEHAVIOR = True - - -@keras_export(v1=['keras.layers.disable_v2_dtype_behavior']) + """Enable the V2 dtype behavior for Keras layers. + + By default, the V2 dtype behavior is enabled in TensorFlow 2, so this + function is only useful if `tf.compat.v1.disable_v2_behavior` has been + called. Since mixed precision requires V2 dtype behavior to be enabled, this + function allows you to use mixed precision in Keras layers if + `disable_v2_behavior` has been called. + + When enabled, the dtype of Keras layers defaults to floatx (which is + typically float32) instead of None. In addition, layers will automatically + cast floating-point inputs to the layer's dtype. + + >>> x = tf.ones((4, 4, 4, 4), dtype='float64') + >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2) + >>> print(layer.dtype) # float32 since V2 dtype behavior is enabled + float32 + >>> y = layer(x) # Layer casts inputs since V2 dtype behavior is enabled + >>> print(y.dtype.name) + float32 + + A layer author can opt-out their layer from the automatic input casting by + passing `autocast=False` to the base Layer's constructor. This disables the + autocasting part of the V2 behavior for that layer, but not the defaulting + to floatx part of the V2 behavior. + + When a global `tf.keras.mixed_precision.Policy` is set, a Keras layer's + dtype will default to the global policy instead of floatx. Layers will + automatically cast inputs to the policy's compute_dtype. + """ + global V2_DTYPE_BEHAVIOR + V2_DTYPE_BEHAVIOR = True + + +@keras_export(v1=["keras.layers.disable_v2_dtype_behavior"]) def disable_v2_dtype_behavior(): - """Disables the V2 dtype behavior for Keras layers. + """Disables the V2 dtype behavior for Keras layers. - See `tf.compat.v1.keras.layers.enable_v2_dtype_behavior`. - """ - global V2_DTYPE_BEHAVIOR - V2_DTYPE_BEHAVIOR = False + See `tf.compat.v1.keras.layers.enable_v2_dtype_behavior`. + """ + global V2_DTYPE_BEHAVIOR + V2_DTYPE_BEHAVIOR = False def v2_dtype_behavior_enabled(): - """Returns True if the V2 dtype behavior is enabled.""" - if V2_DTYPE_BEHAVIOR is None: - return tf.__internal__.tf2.enabled() - return V2_DTYPE_BEHAVIOR + """Returns True if the V2 dtype behavior is enabled.""" + if V2_DTYPE_BEHAVIOR is None: + return tf.__internal__.tf2.enabled() + return V2_DTYPE_BEHAVIOR class TrackableWeightHandler: - """Keras wrapper for handling tracking.Trackable object saving and restoring. - - This class handles Trackables in both V1 and V2 modes, ensuring that they can - be saved and restored with the correct data and without adding additional ops - on every save. - - Attributes: - trackable: The trackable to wrap. - num_tensors: The number of tensors that this trackable requires for saving. - """ - - def __init__(self, trackable): - if not isinstance(trackable, tf.__internal__.tracking.Trackable): - raise ValueError(f'{trackable} is not a Trackable object.') - self._trackable = trackable - self._distribute_strategy = tf.distribute.get_strategy() - - saveables = tf.__internal__.tracking.saveable_objects_from_trackable( - trackable).values() - # 'Saveables' won't exist when we're passed a legacy TF1 table like - # a StaticHashTable. - if not saveables: - self._num_tensors = 0 - self._setter = lambda weights: None - self._getter = lambda: [] - - elif len(saveables) == 1: - saveable = list(saveables)[0] - - if tf.compat.v1.executing_eagerly_outside_functions(): - # If we're in eager mode, we need to defer calling the Trackable's - # saveable() callable until data export time. - # However, it is safe to call the saveable as many times as we want, so - # we will call it now to figure out how many tensors this Trackable will - # produce. - self._saveable = saveable - self._num_tensors = len(self._saveable().specs) - self._setter = lambda weights: self._saveable().restore(weights, None) - self._getter = lambda: [spec.tensor for spec in self._saveable().specs] - else: - # If we're in Graph mode, we need to evaluate the Saveable only once and - # cache the resulting restore graph. Failing to do this will result in - # new assignment ops being added to the graph each time set_weights() is - # called. - self._placeholder_tensors = [] - self._saveable = saveable() - self._num_tensors = len(self._saveable.specs) - for spec in self._saveable.specs: - tensor = spec.tensor - self._placeholder_tensors.append( - tf.compat.v1.placeholder(tensor.dtype, tensor.shape)) - self._assign_op = self._saveable.restore(self._placeholder_tensors, - None) - self._setter = self._set_weights_v1 - self._getter = lambda: [spec.tensor for spec in self._saveable.specs] - else: - raise ValueError( - 'Only Trackables with one Saveable are supported. The Trackable ' - f'{trackable} has {len(saveables)} Saveables.') - - @property - def num_tensors(self): - return self._num_tensors + """Keras wrapper for handling Trackable object saving and restoring. - def set_weights(self, weights): - if len(weights) != self._num_tensors: - raise ValueError( - f'Weight handler for trackable {self._trackable} received ' - 'an incorrect number of weights: ' - f'expected {self._num_tensors} weights, got {len(weights)} weights.') - self._setter(weights) + This class handles Trackables in both V1 and V2 modes, ensuring that they + can be saved and restored with the correct data and without adding + additional ops on every save. - def get_tensors(self): - return self._getter() + Attributes: + trackable: The trackable to wrap. + num_tensors: The number of tensors that this trackable requires for + saving. + """ - def _set_weights_v1(self, weights): - feed_dict = {} - for idx, tensor in enumerate(weights): - feed_dict[self._placeholder_tensors[idx]] = tensor - backend.get_session().run(self._assign_op, feed_dict) + def __init__(self, trackable): + if not isinstance(trackable, tf.__internal__.tracking.Trackable): + raise ValueError(f"{trackable} is not a Trackable object.") + self._trackable = trackable + self._distribute_strategy = tf.distribute.get_strategy() + + saveables = tf.__internal__.tracking.saveable_objects_from_trackable( + trackable + ).values() + # 'Saveables' won't exist when we're passed a legacy TF1 table like + # a StaticHashTable. + if not saveables: + self._num_tensors = 0 + self._setter = lambda weights: None + self._getter = lambda: [] + + elif len(saveables) == 1: + saveable = list(saveables)[0] + + if tf1.executing_eagerly_outside_functions(): + # If we're in eager mode, we need to defer calling the + # Trackable's saveable() callable until data export time. + # However, it is safe to call the saveable as many times as we + # want, so we will call it now to figure out how many tensors + # this Trackable will produce. + self._saveable = saveable + self._num_tensors = len(self._saveable().specs) + self._setter = lambda weights: self._saveable().restore( + weights, None + ) + self._getter = lambda: [ + spec.tensor for spec in self._saveable().specs + ] + else: + # If we're in Graph mode, we need to evaluate the Saveable only + # once and cache the resulting restore graph. Failing to do this + # will result in new assignment ops being added to the graph + # each time set_weights() is called. + self._placeholder_tensors = [] + self._saveable = saveable() + self._num_tensors = len(self._saveable.specs) + for spec in self._saveable.specs: + tensor = spec.tensor + self._placeholder_tensors.append( + tf1.placeholder(tensor.dtype, tensor.shape) + ) + self._assign_op = self._saveable.restore( + self._placeholder_tensors, None + ) + self._setter = self._set_weights_v1 + self._getter = lambda: [ + spec.tensor for spec in self._saveable.specs + ] + else: + raise ValueError( + "Only Trackables with one Saveable are supported. " + f"The Trackable {trackable} has {len(saveables)} Saveables." + ) + + @property + def num_tensors(self): + return self._num_tensors + + def set_weights(self, weights): + if len(weights) != self._num_tensors: + raise ValueError( + f"Weight handler for trackable {self._trackable} received " + "an incorrect number of weights: " + f"expected {self._num_tensors} weights, " + f"got {len(weights)} weights." + ) + self._setter(weights) + + def get_tensors(self): + return self._getter() + + def _set_weights_v1(self, weights): + feed_dict = {} + for idx, tensor in enumerate(weights): + feed_dict[self._placeholder_tensors[idx]] = tensor + backend.get_session().run(self._assign_op, feed_dict) def no_ragged_support(inputs, layer_name): - input_list = tf.nest.flatten(inputs) - if any(isinstance(x, tf.RaggedTensor) for x in input_list): - raise ValueError( - f'Layer {layer_name} does not support RaggedTensors as input. ' - f'Inputs received: {inputs}. You can try converting your ' - 'input to a dense (uniform) tensor.') + input_list = tf.nest.flatten(inputs) + if any(isinstance(x, tf.RaggedTensor) for x in input_list): + raise ValueError( + f"Layer {layer_name} does not support RaggedTensors as input. " + f"Inputs received: {inputs}. You can try converting your " + "input to a dense (uniform) tensor." + ) def is_split_variable(v): - """Returns True if `v` is either a PartionedVariable or a ShardedVariable.""" - return hasattr(v, '_variable_list') or hasattr(v, '_variables') + """Returns True if `v` is a PartitionedVariable or a ShardedVariable.""" + return not {clz.__name__ for clz in v.__class__.__mro__}.isdisjoint( + {"PartitionedVariable", "ShardedVariable"} + ) def has_weights(obj): - obj_type = type(obj) - return (hasattr(obj_type, 'trainable_weights') and - hasattr(obj_type, 'non_trainable_weights') and - not isinstance(obj, type)) + obj_type = type(obj) + return ( + hasattr(obj_type, "trainable_weights") + and hasattr(obj_type, "non_trainable_weights") + and not isinstance(obj, type) + ) # TODO(kathywu): This is a temporary hack. When a network of layers is revived @@ -902,4 +955,5 @@ def has_weights(obj): # whenever eager losses are added to one layer, add eager losses to all # child layers. This causes `.losses` to only return eager losses. REVIVED_LOSS_PLACEHOLDER = ( - 'This layer\'s losses have been added to the parent layer.') + "This layer's losses have been added to the parent layer." +) diff --git a/keras/engine/base_layer_utils_test.py b/keras/engine/base_layer_utils_test.py index ed3c73a6c8ce..67a4d2d5db22 100644 --- a/keras/engine/base_layer_utils_test.py +++ b/keras/engine/base_layer_utils_test.py @@ -14,97 +14,95 @@ # ============================================================================== import numpy as np - import tensorflow.compat.v2 as tf import keras from keras import backend -from keras.testing_infra import test_combinations from keras.engine import base_layer_utils +from keras.testing_infra import test_combinations -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class TrackableWeightHandlerTest(test_combinations.TestCase): - - def get_table_handler(self): - # Note: There is some repetition in these tests' setup. However, Tensorflow - # does not play nicely with a separate setUp() call (causing errors related - # to graph building), so we have to use a called setup instead of a setUp() - # call. - table = tf.lookup.experimental.MutableHashTable( - key_dtype=tf.string, value_dtype=tf.int32, default_value=0) - return base_layer_utils.TrackableWeightHandler(table) - - def test_get_num_tensors(self): - table_handler = self.get_table_handler() - self.assertEqual(2, table_handler.num_tensors) - - def test_get_and_set_weights(self): - table_handler = self.get_table_handler() - - table_data = {b'a': 1, b'b': 2, b'c': 3} - table_handler.set_weights( - [list(table_data.keys()), - list(table_data.values())]) - weights = backend.batch_get_value(table_handler.get_tensors()) - weight_data = {key: value for key, value in zip(weights[0], weights[1])} - self.assertDictEqual(table_data, weight_data) - - def test_get_and_set_weights_does_not_add_ops(self): - table_handler = self.get_table_handler() - table_data = {b'a': 1, b'b': 2, b'c': 3} - table_handler.set_weights( - [list(table_data.keys()), - list(table_data.values())]) - _ = backend.batch_get_value(table_handler.get_tensors()) - backend.get_session().graph.finalize() - table_handler.set_weights( - [list(table_data.keys()), - list(table_data.values())]) - _ = backend.batch_get_value(table_handler.get_tensors()) - - -@test_combinations.generate(test_combinations.combine(mode=['eager'])) + def get_table_handler(self): + # Note: There is some repetition in these tests' setup. However, + # Tensorflow does not play nicely with a separate setUp() call (causing + # errors related to graph building), so we have to use a called setup + # instead of a setUp() call. + table = tf.lookup.experimental.MutableHashTable( + key_dtype=tf.string, value_dtype=tf.int32, default_value=0 + ) + return base_layer_utils.TrackableWeightHandler(table) + + def test_get_num_tensors(self): + table_handler = self.get_table_handler() + self.assertEqual(2, table_handler.num_tensors) + + def test_get_and_set_weights(self): + table_handler = self.get_table_handler() + + table_data = {b"a": 1, b"b": 2, b"c": 3} + table_handler.set_weights( + [list(table_data.keys()), list(table_data.values())] + ) + weights = backend.batch_get_value(table_handler.get_tensors()) + weight_data = {key: value for key, value in zip(weights[0], weights[1])} + self.assertDictEqual(table_data, weight_data) + + def test_get_and_set_weights_does_not_add_ops(self): + table_handler = self.get_table_handler() + table_data = {b"a": 1, b"b": 2, b"c": 3} + table_handler.set_weights( + [list(table_data.keys()), list(table_data.values())] + ) + _ = backend.batch_get_value(table_handler.get_tensors()) + backend.get_session().graph.finalize() + table_handler.set_weights( + [list(table_data.keys()), list(table_data.values())] + ) + _ = backend.batch_get_value(table_handler.get_tensors()) + + +@test_combinations.generate(test_combinations.combine(mode=["eager"])) class OpLayerTest(test_combinations.TestCase): - - def test_tensor_op_layer(self): - int_values = keras.Input(shape=(2,), dtype=tf.int32) - float_values = tf.cast(int_values, tf.float32) - model = keras.Model(int_values, float_values) - model.compile(loss='mse') - - input_data = np.array([[1, 2], [3, 4]], dtype=np.int32) - expected = [[1.0, 2.0], [3.0, 4.0]] - output = model.predict(input_data) - self.assertAllClose(expected, output) - - def test_ragged_op_layer_keras_tensors(self): - int_values = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) - float_values = tf.cast(int_values, tf.float32) - model = keras.Model(int_values, float_values) - model.compile(loss='mse') - - input_data = tf.ragged.constant( - [[1, 2], [3, 4]], dtype=np.int32) - expected = [[1.0, 2.0], [3.0, 4.0]] - output = model.predict(input_data) - self.assertIsInstance(output, tf.RaggedTensor) - self.assertAllClose(expected, output) - - def test_sparse_op_layer_keras_tensors(self): - int_values = keras.Input(shape=(None,), dtype=tf.int32, sparse=True) - float_values = tf.cast(int_values, tf.float32) - _ = keras.Model(int_values, float_values) - model = keras.Model(int_values, float_values) - model.compile(loss='mse') - - input_data = tf.sparse.from_dense( - np.array([[1, 2], [3, 4]], dtype=np.int32)) - expected = [[1.0, 2.0], [3.0, 4.0]] - output = model.predict(input_data) - self.assertIsInstance(output, tf.SparseTensor) - self.assertAllClose(expected, tf.sparse.to_dense(output)) - - -if __name__ == '__main__': - tf.test.main() + def test_tensor_op_layer(self): + int_values = keras.Input(shape=(2,), dtype=tf.int32) + float_values = tf.cast(int_values, tf.float32) + model = keras.Model(int_values, float_values) + model.compile(loss="mse") + + input_data = np.array([[1, 2], [3, 4]], dtype=np.int32) + expected = [[1.0, 2.0], [3.0, 4.0]] + output = model.predict(input_data) + self.assertAllClose(expected, output) + + def test_ragged_op_layer_keras_tensors(self): + int_values = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) + float_values = tf.cast(int_values, tf.float32) + model = keras.Model(int_values, float_values) + model.compile(loss="mse") + + input_data = tf.ragged.constant([[1, 2], [3, 4]], dtype=np.int32) + expected = [[1.0, 2.0], [3.0, 4.0]] + output = model.predict(input_data) + self.assertIsInstance(output, tf.RaggedTensor) + self.assertAllClose(expected, output) + + def test_sparse_op_layer_keras_tensors(self): + int_values = keras.Input(shape=(None,), dtype=tf.int32, sparse=True) + float_values = tf.cast(int_values, tf.float32) + _ = keras.Model(int_values, float_values) + model = keras.Model(int_values, float_values) + model.compile(loss="mse") + + input_data = tf.sparse.from_dense( + np.array([[1, 2], [3, 4]], dtype=np.int32) + ) + expected = [[1.0, 2.0], [3.0, 4.0]] + output = model.predict(input_data) + self.assertIsInstance(output, tf.SparseTensor) + self.assertAllClose(expected, tf.sparse.to_dense(output)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/base_layer_v1.py b/keras/engine/base_layer_v1.py index 1e2d281d2e92..e54211473268 100644 --- a/keras/engine/base_layer_v1.py +++ b/keras/engine/base_layer_v1.py @@ -12,17 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -# pylint: disable=g-bad-import-order -"""Contains the base Layer class, from which all layers inherit.""" -import tensorflow.compat.v2 as tf + +"""Contains the base Layer class, from which all layers inherit.""" import functools import itertools import threading import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import constraints from keras import initializers @@ -33,2211 +33,2437 @@ from keras.mixed_precision import autocast_variable from keras.mixed_precision import loss_scale_optimizer from keras.mixed_precision import policy -from keras.saving.saved_model import layer_serialization +from keras.saving.legacy.saved_model import layer_serialization from keras.utils import generic_utils from keras.utils import layer_utils from keras.utils import object_identity from keras.utils import tf_inspect from keras.utils import tf_utils + # A module that only depends on `keras.layers` import these from here. -from keras.utils.generic_utils import to_snake_case # pylint: disable=unused-import -from keras.utils.tf_utils import is_tensor_or_tensor_list # pylint: disable=unused-import +from keras.utils.generic_utils import to_snake_case # noqa: F401 +from keras.utils.tf_utils import is_tensor_or_tensor_list # noqa: F401 + +# isort: off from tensorflow.python.platform import tf_logging from tensorflow.tools.docs import doc_controls -# pylint: disable=g-classes-have-attributes class Layer(base_layer.Layer): - """Base layer class. - - This is the class from which all layers inherit. - - A layer is a class implementing common neural networks operations, such - as convolution, batch norm, etc. These operations require managing weights, - losses, updates, and inter-layer connectivity. - - Users will just instantiate a layer and then treat it as a callable. - - We recommend that descendants of `Layer` implement the following methods: - - * `__init__()`: Save configuration in member variables - * `build()`: Called once from `__call__`, when we know the shapes of inputs - and `dtype`. Should have the calls to `add_weight()`, and then - call the super's `build()` (which sets `self.built = True`, which is - nice in case the user wants to call `build()` manually before the - first `__call__`). - * `call()`: Called in `__call__` after making sure `build()` has been called - once. Should actually perform the logic of applying the layer to the - input tensors (which should be passed in as the first argument). - - Args: - trainable: Boolean, whether the layer's variables should be trainable. - name: String name of the layer. - dtype: The dtype of the layer's computations and weights (default of - `None` means use `tf.keras.backend.floatx` in TensorFlow 2, or the type - of the first input in TensorFlow 1). - dynamic: Set this to `True` if your layer should only be run eagerly, and - should not be used to generate a static computation graph. - This would be the case for a Tree-RNN or a recursive network, - for example, or generally for any layer that manipulates tensors - using Python control flow. If `False`, we assume that the layer can - safely be used to generate a static computation graph. - - Attributes: - name: The name of the layer (string). - dtype: The dtype of the layer's computations and weights. If mixed - precision is used with a `tf.keras.mixed_precision.Policy`, this is - instead just the dtype of the layer's weights, as the computations are - done in a different dtype. - updates: List of update ops of this layer. - losses: List of losses added by this layer. - trainable_weights: List of variables to be included in backprop. - non_trainable_weights: List of variables that should not be - included in backprop. - weights: The concatenation of the lists trainable_weights and - non_trainable_weights (in this order). - trainable: Whether the layer should be trained (boolean). - input_spec: Optional (list of) `InputSpec` object(s) specifying the - constraints on inputs that can be accepted by the layer. - - Each layer has a dtype, which is typically the dtype of the layer's - computations and variables. A layer's dtype can be queried via the - `Layer.dtype` property. The dtype is specified with the `dtype` constructor - argument. In TensorFlow 2, the dtype defaults to `tf.keras.backend.floatx()` - if no dtype is passed. `floatx()` itself defaults to "float32". Additionally, - layers will cast their inputs to the layer's dtype in TensorFlow 2. When mixed - precision is used, layers may have different computation and variable dtypes. - See `tf.keras.mixed_precision.Policy` for details on layer dtypes. - """ - - # See tf.Module for the usage of this property. - # The key for _obj_reference_counts_dict is a Trackable, which could be a - # variable or layer etc. tf.Module._flatten will fail to flatten the key - # since it is trying to convert Trackable to a string. This attribute can be - # ignored even after the fix of nest lib, since the trackable object should - # already been available as individual attributes. _obj_reference_counts_dict - # just contains a copy of them. - _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain( - ('_obj_reference_counts_dict',), - tf.Module._TF_MODULE_IGNORED_PROPERTIES - )) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, trainable=True, name=None, dtype=None, dynamic=False, - **kwargs): - self._instrument_layer_creation() - - # These properties should be set by the user via keyword arguments. - # note that 'dtype', 'input_shape' and 'batch_input_shape' - # are only applicable to input layers: do not pass these keywords - # to non-input layers. - allowed_kwargs = { - 'input_dim', 'input_shape', 'batch_input_shape', 'batch_size', - 'weights', 'activity_regularizer', 'autocast', 'implementation' - } - # Validate optional keyword arguments. - generic_utils.validate_kwargs(kwargs, allowed_kwargs) - - # Mutable properties - # Indicates whether the layer's weights are updated during training - # and whether the layer's updates are run during training. - self._trainable = trainable - # A stateful layer is a layer whose updates are run during inference too, - # for instance stateful RNNs. - self._stateful = False - # Indicates whether `build` needs to be called upon layer call, to create - # the layer's weights. - self.built = False - self._build_input_shape = None - # Provides information about which inputs are compatible with the layer. - self._input_spec = None - self.supports_masking = False - - self._init_set_name(name) - self._activity_regularizer = regularizers.get( - kwargs.pop('activity_regularizer', None)) - self._maybe_create_attribute('_trainable_weights', []) - self._maybe_create_attribute('_non_trainable_weights', []) - self._updates = [] - # Object to store all thread local layer properties. - self._thread_local = threading.local() - # A list of zero-argument lambdas which return Tensors, used for variable - # regularizers. - self._callable_losses = [] - # A list of symbolic Tensors containing activity regularizers and losses - # manually added through `add_loss` in graph-building mode. - self._losses = [] - # A list of metric instances corresponding to the symbolic metric tensors - # added using the `add_metric` API. - self._metrics = [] - - # Note that models also have a dtype policy, as they are layers. For - # functional models, the policy is only used in Model.compile, which wraps - # the optimizer with a LossScaleOptimizer if the policy name is - # "mixed_float16". Subclassed models additionally use the policy's compute - # and variable dtypes, as like any ordinary layer. - self._set_dtype_policy(dtype) - # Boolean indicating whether the layer automatically casts its inputs to the - # layer's compute_dtype. - self._autocast = kwargs.get('autocast', - base_layer_utils.v2_dtype_behavior_enabled()) - - # Dependencies tracked via attribute assignment. - # All layers in order of horizontal graph traversal. - # Entries are unique. For models includes input and output layers. - self._maybe_create_attribute('_self_tracked_trackables', []) - - # These lists will be filled via successive calls - # to self._add_inbound_node(). - # Used in symbolic mode only, only in conjunction with graph-networks - self._inbound_nodes_value = [] - self._outbound_nodes_value = [] - - self._init_call_fn_args() - - # Whether the `call` method can be used to build a TF graph without issues. - # This attribute has no effect if the model is created using the Functional - # API. Instead, `model.dynamic` is determined based on the internal layers. - self._dynamic = dynamic - - # Manage input shape information if passed. - if 'input_dim' in kwargs and 'input_shape' not in kwargs: - # Backwards compatibility: alias 'input_dim' to 'input_shape'. - kwargs['input_shape'] = (kwargs['input_dim'],) - if 'input_shape' in kwargs or 'batch_input_shape' in kwargs: - # In this case we will later create an input layer - # to insert before the current layer - if 'batch_input_shape' in kwargs: - batch_input_shape = tuple(kwargs['batch_input_shape']) - elif 'input_shape' in kwargs: - if 'batch_size' in kwargs: - batch_size = kwargs['batch_size'] - else: - batch_size = None - batch_input_shape = (batch_size,) + tuple(kwargs['input_shape']) - self._batch_input_shape = batch_input_shape - - # Manage initial weight values if passed. - self._initial_weights = kwargs.get('weights', None) - - # Whether the layer will track any layers that is set as attribute on itself - # as sub-layers, the weights from the sub-layers will be included in the - # parent layer's variables() as well. - # Default to True, which means auto tracking is turned on. Certain subclass - # might want to turn it off, like Sequential model. - self._auto_track_sub_layers = True + """Base layer class. - # Mark this layer as having been originally built as a tf1 layer/model - self._originally_built_as_v1 = True + This is the class from which all layers inherit. - # For backwards compat reasons, most built-in layers do not guarantee - # That they will 100% preserve the structure of input args when saving - # / loading configs. E.g. they may un-nest an arg that is - # a list with one element. - self._preserve_input_structure_in_config = False + A layer is a class implementing common neural networks operations, such + as convolution, batch norm, etc. These operations require managing weights, + losses, updates, and inter-layer connectivity. - @tf.__internal__.tracking.no_automatic_dependency_tracking - @generic_utils.default - def build(self, input_shape): - """Creates the variables of the layer (optional, for subclass implementers). + Users will just instantiate a layer and then treat it as a callable. - This is a method that implementers of subclasses of `Layer` or `Model` - can override if they need a state-creation step in-between - layer instantiation and layer call. + We recommend that descendants of `Layer` implement the following methods: - This is typically used to create the weights of `Layer` subclasses. + * `__init__()`: Save configuration in member variables + * `build()`: Called once from `__call__`, when we know the shapes of inputs + and `dtype`. Should have the calls to `add_weight()`, and then + call the super's `build()` (which sets `self.built = True`, which is + nice in case the user wants to call `build()` manually before the + first `__call__`). + * `call()`: Called in `__call__` after making sure `build()` has been called + once. Should actually perform the logic of applying the layer to the + input tensors (which should be passed in as the first argument). Args: - input_shape: Instance of `TensorShape`, or list of instances of - `TensorShape` if the layer expects a list of inputs - (one instance per input). + trainable: Boolean, whether the layer's variables should be trainable. + name: String name of the layer. + dtype: The dtype of the layer's computations and weights (default of + `None` means use `tf.keras.backend.floatx` in TensorFlow 2, or the type + of the first input in TensorFlow 1). + dynamic: Set this to `True` if your layer should only be run eagerly, and + should not be used to generate a static computation graph. + This would be the case for a Tree-RNN or a recursive network, + for example, or generally for any layer that manipulates tensors + using Python control flow. If `False`, we assume that the layer can + safely be used to generate a static computation graph. + + Attributes: + name: The name of the layer (string). + dtype: The dtype of the layer's computations and weights. If mixed + precision is used with a `tf.keras.mixed_precision.Policy`, this is + instead just the dtype of the layer's weights, as the computations are + done in a different dtype. + updates: List of update ops of this layer. + losses: List of losses added by this layer. + trainable_weights: List of variables to be included in backprop. + non_trainable_weights: List of variables that should not be + included in backprop. + weights: The concatenation of the lists trainable_weights and + non_trainable_weights (in this order). + trainable: Whether the layer should be trained (boolean). + input_spec: Optional (list of) `InputSpec` object(s) specifying the + constraints on inputs that can be accepted by the layer. + + Each layer has a dtype, which is typically the dtype of the layer's + computations and variables. A layer's dtype can be queried via the + `Layer.dtype` property. The dtype is specified with the `dtype` constructor + argument. In TensorFlow 2, the dtype defaults to `tf.keras.backend.floatx()` + if no dtype is passed. `floatx()` itself defaults to "float32". + Additionally, layers will cast their inputs to the layer's dtype in + TensorFlow 2. When mixed precision is used, layers may have different + computation and variable dtypes. See `tf.keras.mixed_precision.Policy` for + details on layer dtypes. """ - if not hasattr(self.build, '_is_default'): - self._build_input_shape = input_shape - self.built = True - @doc_controls.for_subclass_implementers - def call(self, inputs, **kwargs): # pylint: disable=unused-argument - """This is where the layer's logic lives. - - Args: - inputs: Input tensor, or list/tuple of input tensors. - **kwargs: Additional keyword arguments. - - Returns: - A tensor or list/tuple of tensors. - """ - return inputs - - @doc_controls.for_subclass_implementers - def _add_trackable(self, trackable_object, trainable): - """Adds a Trackable object to this layer's state. - - Args: - trackable_object: The tf.tracking.Trackable object to add. - trainable: Boolean, whether the variable should be part of the layer's - "trainable_variables" (e.g. variables, biases) or - "non_trainable_variables" (e.g. BatchNorm mean and variance). - - Returns: - The TrackableWeightHandler used to track this object. - """ - if isinstance(trackable_object, base_layer_utils.TrackableWeightHandler): - handler = trackable_object - else: - handler = base_layer_utils.TrackableWeightHandler(trackable_object) - if trainable: - self._trainable_weights.append(handler) - else: - self._non_trainable_weights.append(handler) - return handler - - @doc_controls.for_subclass_implementers - def add_weight(self, - name=None, - shape=None, - dtype=None, - initializer=None, - regularizer=None, - trainable=None, - constraint=None, - partitioner=None, - use_resource=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.compat.v1.VariableAggregation.NONE, - **kwargs): - """Adds a new variable to the layer. + # See tf.Module for the usage of this property. The key for + # _obj_reference_counts_dict is a Trackable, which could be a variable or + # layer etc. tf.Module._flatten will fail to flatten the key since it is + # trying to convert Trackable to a string. This attribute can be ignored + # even after the fix of nest lib, since the trackable object should already + # been available as individual attributes. _obj_reference_counts_dict just + # contains a copy of them. + _TF_MODULE_IGNORED_PROPERTIES = frozenset( + itertools.chain( + ("_obj_reference_counts_dict",), + tf.Module._TF_MODULE_IGNORED_PROPERTIES, + ) + ) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__( + self, trainable=True, name=None, dtype=None, dynamic=False, **kwargs + ): + self._instrument_layer_creation() + + # These properties should be set by the user via keyword arguments. + # note that 'dtype', 'input_shape' and 'batch_input_shape' + # are only applicable to input layers: do not pass these keywords + # to non-input layers. + allowed_kwargs = { + "input_dim", + "input_shape", + "batch_input_shape", + "batch_size", + "weights", + "activity_regularizer", + "autocast", + "implementation", + } + # Validate optional keyword arguments. + generic_utils.validate_kwargs(kwargs, allowed_kwargs) + + # Mutable properties + # Indicates whether the layer's weights are updated during training + # and whether the layer's updates are run during training. + self._trainable = trainable + # A stateful layer is a layer whose updates are run during inference + # too, for instance stateful RNNs. + self._stateful = False + # Indicates whether `build` needs to be called upon layer call, to + # create the layer's weights. + self.built = False + self._build_input_shape = None + # Provides information about which inputs are compatible with the layer. + self._input_spec = None + self.supports_masking = False + + self._init_set_name(name) + self._activity_regularizer = regularizers.get( + kwargs.pop("activity_regularizer", None) + ) + self._maybe_create_attribute("_trainable_weights", []) + self._maybe_create_attribute("_non_trainable_weights", []) + self._updates = [] + # Object to store all thread local layer properties. + self._thread_local = threading.local() + # A list of zero-argument lambdas which return Tensors, used for + # variable regularizers. + self._callable_losses = [] + # A list of symbolic Tensors containing activity regularizers and losses + # manually added through `add_loss` in graph-building mode. + self._losses = [] + # A list of metric instances corresponding to the symbolic metric + # tensors added using the `add_metric` API. + self._metrics = [] + + # Note that models also have a dtype policy, as they are layers. For + # functional models, the policy is only used in Model.compile, which + # wraps the optimizer with a LossScaleOptimizer if the policy name is + # "mixed_float16". Subclassed models additionally use the policy's + # compute and variable dtypes, as like any ordinary layer. + self._set_dtype_policy(dtype) + # Boolean indicating whether the layer automatically casts its inputs to + # the layer's compute_dtype. + self._autocast = kwargs.get( + "autocast", base_layer_utils.v2_dtype_behavior_enabled() + ) + + # Dependencies tracked via attribute assignment. + # All layers in order of horizontal graph traversal. + # Entries are unique. For models includes input and output layers. + self._maybe_create_attribute("_self_tracked_trackables", []) + + # These lists will be filled via successive calls + # to self._add_inbound_node(). + # Used in symbolic mode only, only in conjunction with graph-networks + self._inbound_nodes_value = [] + self._outbound_nodes_value = [] + + self._init_call_fn_args() + + # Whether the `call` method can be used to build a TF graph without + # issues. This attribute has no effect if the model is created using + # the Functional API. Instead, `model.dynamic` is determined based on + # the internal layers. + self._dynamic = dynamic + + # Manage input shape information if passed. + if "input_dim" in kwargs and "input_shape" not in kwargs: + # Backwards compatibility: alias 'input_dim' to 'input_shape'. + kwargs["input_shape"] = (kwargs["input_dim"],) + if "input_shape" in kwargs or "batch_input_shape" in kwargs: + # In this case we will later create an input layer + # to insert before the current layer + if "batch_input_shape" in kwargs: + batch_input_shape = tuple(kwargs["batch_input_shape"]) + elif "input_shape" in kwargs: + if "batch_size" in kwargs: + batch_size = kwargs["batch_size"] + else: + batch_size = None + batch_input_shape = (batch_size,) + tuple(kwargs["input_shape"]) + self._batch_input_shape = batch_input_shape + + # Manage initial weight values if passed. + self._initial_weights = kwargs.get("weights", None) + + # Whether the layer will track any layers that are set as attribute on + # itself as sub-layers, the weights from the sub-layers will be included + # in the parent layer's variables() as well. Defaults to `True`, which + # means auto tracking is turned on. Certain subclass might want to turn + # it off, like the Sequential model. + self._auto_track_sub_layers = True + + # Mark this layer as having been originally built as a tf1 layer/model + self._originally_built_as_v1 = True + + # For backward compat reasons, most built-in layers do not guarantee + # That they will 100% preserve the structure of input args when saving + # / loading configs. E.g. they may un-nest an arg that is + # a list with one element. + self._preserve_input_structure_in_config = False + + @tf.__internal__.tracking.no_automatic_dependency_tracking + @generic_utils.default + def build(self, input_shape): + """Creates the variables of the layer (for subclass implementers). + + This is a method that implementers of subclasses of `Layer` or `Model` + can override if they need a state-creation step in-between + layer instantiation and layer call. + + This is typically used to create the weights of `Layer` subclasses. + + Args: + input_shape: Instance of `TensorShape`, or list of instances of + `TensorShape` if the layer expects a list of inputs + (one instance per input). + """ + if not hasattr(self.build, "_is_default"): + self._build_input_shape = input_shape + self.built = True + + @doc_controls.for_subclass_implementers + def call(self, inputs, **kwargs): + """This is where the layer's logic lives. + + Args: + inputs: Input tensor, or list/tuple of input tensors. + **kwargs: Additional keyword arguments. + + Returns: + A tensor or list/tuple of tensors. + """ + return inputs - Args: - name: Variable name. - shape: Variable shape. Defaults to scalar if unspecified. - dtype: The type of the variable. Defaults to `self.dtype` or `float32`. - initializer: Initializer instance (callable). - regularizer: Regularizer instance (callable). - trainable: Boolean, whether the variable should be part of the layer's - "trainable_variables" (e.g. variables, biases) - or "non_trainable_variables" (e.g. BatchNorm mean and variance). - Note that `trainable` cannot be `True` if `synchronization` - is set to `ON_READ`. - constraint: Constraint instance (callable). - partitioner: Partitioner to be passed to the `Trackable` API. - use_resource: Whether to use `ResourceVariable`. - synchronization: Indicates when a distributed a variable will be - aggregated. Accepted values are constants defined in the class - `tf.VariableSynchronization`. By default the synchronization is set to - `AUTO` and the current `DistributionStrategy` chooses - when to synchronize. If `synchronization` is set to `ON_READ`, - `trainable` must not be set to `True`. - aggregation: Indicates how a distributed variable will be aggregated. - Accepted values are constants defined in the class - `tf.VariableAggregation`. - **kwargs: Additional keyword arguments. Accepted values are `getter`, - `collections`, `experimental_autocast` and `caching_device`. - - Returns: - The created variable. Usually either a `Variable` or `ResourceVariable` - instance. If `partitioner` is not `None`, a `PartitionedVariable` - instance is returned. - - Raises: - RuntimeError: If called with partitioned variable regularization and - eager execution is enabled. - ValueError: When giving unsupported dtype and no initializer or when - trainable has been set to True with synchronization set as `ON_READ`. - """ - if shape is None: - shape = () - # Validate optional keyword arguments. - for kwarg in kwargs: - if kwarg not in ['getter', 'collections', 'experimental_autocast', - 'caching_device']: - raise TypeError('Unknown keyword argument:', kwarg) - has_custom_getter = 'getter' in kwargs - getter = kwargs.pop('getter', base_layer_utils.make_variable) - collections_arg = kwargs.pop('collections', None) - # 'experimental_autocast' can be set to False by the caller to indicate an - # AutoCastVariable should never be created. - autocast = kwargs.pop('experimental_autocast', True) - # See the docstring for tf.Variable about the details for caching_device. - caching_device = kwargs.pop('caching_device', None) - - if dtype is None: - dtype = self.dtype or backend.floatx() - dtype = tf.as_dtype(dtype) - if self._dtype_policy.variable_dtype is None: - # The policy is "_infer", so we infer the policy from the variable dtype. - self._set_dtype_policy(policy.Policy(dtype.base_dtype.name)) - initializer = initializers.get(initializer) - regularizer = regularizers.get(regularizer) - constraint = constraints.get(constraint) - - if synchronization == tf.VariableSynchronization.ON_READ: - if trainable: - raise ValueError( - 'Synchronization value can be set to ' - 'VariableSynchronization.ON_READ only for non-trainable variables. ' - 'You have specified trainable=True and ' - 'synchronization=VariableSynchronization.ON_READ.') - else: - # Set trainable to be false when variable is to be synced on read. - trainable = False - elif trainable is None: - trainable = True - - # Initialize variable when no initializer provided - if initializer is None: - # If dtype is DT_FLOAT, provide a uniform unit scaling initializer - if dtype.is_floating: - initializer = initializers.get('glorot_uniform') - # If dtype is DT_INT/DT_UINT, provide a default value `zero` - # If dtype is DT_BOOL, provide a default value `FALSE` - elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool: - initializer = tf.compat.v1.zeros_initializer() - # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here? - elif not has_custom_getter: - # When `getter` is specified, it's possibly fine for `initializer` to be - # None since it's up to the custom `getter` to raise error in case it - # indeed needs `initializer`. - raise ValueError('An initializer for variable %s of type %s is required' - ' for layer %s' % (name, dtype.base_dtype, self.name)) - - if (autocast and - self._dtype_policy.compute_dtype != self._dtype_policy.variable_dtype - and dtype.is_floating): - # Wrap 'getter' with a version that returns an AutoCastVariable. - old_getter = getter - def getter(*args, **kwargs): # pylint: disable=function-redefined - variable = old_getter(*args, **kwargs) - return autocast_variable.create_autocast_variable(variable) - # Also the caching_device does not work with the mixed precision API, - # disable it if it is specified. - # TODO(b/142020079): Re-enable it once the bug is fixed. - if caching_device is not None: - tf_logging.warning( - '`caching_device` does not work with mixed precision API. Ignoring ' - 'user specified `caching_device`.') - caching_device = None - - variable = self._add_variable_with_custom_getter( - name=name, - shape=shape, - # TODO(allenl): a `make_variable` equivalent should be added as a - # `Trackable` method. - getter=getter, - # Manage errors in Layer rather than Trackable. - overwrite=True, - initializer=initializer, - dtype=dtype, - constraint=constraint, - trainable=trainable, - partitioner=partitioner, - use_resource=use_resource, - collections=collections_arg, - synchronization=synchronization, - aggregation=aggregation, - caching_device=caching_device) - if regularizer is not None: - # TODO(fchollet): in the future, this should be handled at the - # level of variable creation, and weight regularization losses - # should be variable attributes. - name_in_scope = variable.name[:variable.name.find(':')] - self._handle_weight_regularization(name_in_scope, - variable, - regularizer) - if base_layer_utils.is_split_variable(variable): - for v in variable: - backend.track_variable(v) + @doc_controls.for_subclass_implementers + def _add_trackable(self, trackable_object, trainable): + """Adds a Trackable object to this layer's state. + + Args: + trackable_object: The tf.tracking.Trackable object to add. + trainable: Boolean, whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) or + "non_trainable_variables" (e.g. BatchNorm mean and variance). + + Returns: + The TrackableWeightHandler used to track this object. + """ + if isinstance( + trackable_object, base_layer_utils.TrackableWeightHandler + ): + handler = trackable_object + else: + handler = base_layer_utils.TrackableWeightHandler(trackable_object) if trainable: - self._trainable_weights.append(v) + self._trainable_weights.append(handler) else: - self._non_trainable_weights.append(v) - else: - backend.track_variable(variable) - if trainable: - self._trainable_weights.append(variable) - else: - self._non_trainable_weights.append(variable) - return variable - - @generic_utils.default - def get_config(self): - """Returns the config of the layer. - - A layer config is a Python dictionary (serializable) - containing the configuration of a layer. - The same layer can be reinstantiated later - (without its trained weights) from this configuration. - - The config of a layer does not include connectivity - information, nor the layer class name. These are handled - by `Network` (one layer of abstraction above). - - Returns: - Python dictionary. - """ - all_args = tf_inspect.getfullargspec(self.__init__).args - config = {'name': self.name, 'trainable': self.trainable} - if hasattr(self, '_batch_input_shape'): - config['batch_input_shape'] = self._batch_input_shape - config['dtype'] = policy.serialize(self._dtype_policy) - if hasattr(self, 'dynamic'): - # Only include `dynamic` in the `config` if it is `True` - if self.dynamic: - config['dynamic'] = self.dynamic - elif 'dynamic' in all_args: - all_args.remove('dynamic') - expected_args = config.keys() - # Finds all arguments in the `__init__` that are not in the config: - extra_args = [arg for arg in all_args if arg not in expected_args] - # Check that either the only argument in the `__init__` is `self`, - # or that `get_config` has been overridden: - if len(extra_args) > 1 and hasattr(self.get_config, '_is_default'): - raise NotImplementedError('Layers with arguments in `__init__` must ' - 'override `get_config`.') - return config - - @classmethod - def from_config(cls, config): - """Creates a layer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same layer from the config - dictionary. It does not handle layer connectivity - (handled by Network), nor weights (handled by `set_weights`). - - Args: - config: A Python dictionary, typically the - output of get_config. - - Returns: - A layer instance. - """ - return cls(**config) - - def compute_output_shape(self, input_shape): - """Computes the output shape of the layer. - - If the layer has not been built, this method will call `build` on the - layer. This assumes that the layer will later be used with inputs that - match the input shape provided here. - - Args: - input_shape: Shape tuple (tuple of integers) - or list of shape tuples (one per output tensor of the layer). - Shape tuples can include None for free dimensions, - instead of an integer. - - Returns: - An input shape tuple. - """ - if tf.executing_eagerly(): - # In this case we build the model first in order to do shape inference. - # This is acceptable because the framework only calls - # `compute_output_shape` on shape values that the layer would later be - # built for. It would however cause issues in case a user attempts to - # use `compute_output_shape` manually with shapes that are incompatible - # with the shape the Layer will be called on (these users will have to - # implement `compute_output_shape` themselves). - self._maybe_build(input_shape) - with tf.compat.v1.get_default_graph().as_default(): - graph = tf.__internal__.FuncGraph('graph') - with graph.as_default(): - input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - inputs = tf.nest.map_structure( - base_layer_utils.generate_placeholders_from_shape, input_shape) - try: - outputs = self(inputs, training=False) - except TypeError as e: + self._non_trainable_weights.append(handler) + return handler + + @doc_controls.for_subclass_implementers + def add_weight( + self, + name=None, + shape=None, + dtype=None, + initializer=None, + regularizer=None, + trainable=None, + constraint=None, + partitioner=None, + use_resource=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.compat.v1.VariableAggregation.NONE, + **kwargs, + ): + """Adds a new variable to the layer. + + Args: + name: Variable name. + shape: Variable shape. Defaults to scalar if unspecified. + dtype: The type of the variable. Defaults to `self.dtype` or + `float32`. + initializer: Initializer instance (callable). + regularizer: Regularizer instance (callable). + trainable: Boolean, whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) + or "non_trainable_variables" (e.g. BatchNorm mean and variance). + Note that `trainable` cannot be `True` if `synchronization` + is set to `ON_READ`. + constraint: Constraint instance (callable). + partitioner: Partitioner to be passed to the `Trackable` API. + use_resource: Whether to use `ResourceVariable`. + synchronization: Indicates when a distributed variable will be + aggregated. Accepted values are constants defined in the class + `tf.VariableSynchronization`. By default the synchronization is set + to `AUTO` and the current `DistributionStrategy` chooses when to + synchronize. If `synchronization` is set to `ON_READ`, `trainable` + must not be set to `True`. + aggregation: Indicates how a distributed variable will be aggregated. + Accepted values are constants defined in the class + `tf.VariableAggregation`. + **kwargs: Additional keyword arguments. Accepted values are `getter`, + `collections`, `experimental_autocast` and `caching_device`. + + Returns: + The created variable. Usually either a `Variable` or + `ResourceVariable` instance. If `partitioner` is not `None`, a + `PartitionedVariable` instance is returned. + + Raises: + RuntimeError: If called with partitioned variable regularization and + eager execution is enabled. + ValueError: When giving unsupported dtype and no initializer or when + trainable has been set to True with synchronization set as + `ON_READ`. + """ + if shape is None: + shape = () + # Validate optional keyword arguments. + for kwarg in kwargs: + if kwarg not in [ + "getter", + "collections", + "experimental_autocast", + "caching_device", + ]: + raise TypeError("Unknown keyword argument:", kwarg) + has_custom_getter = "getter" in kwargs + getter = kwargs.pop("getter", base_layer_utils.make_variable) + collections_arg = kwargs.pop("collections", None) + # 'experimental_autocast' can be set to False by the caller to indicate + # an AutoCastVariable should never be created. + autocast = kwargs.pop("experimental_autocast", True) + # See the docstring for tf.Variable about the details for + # caching_device. + caching_device = kwargs.pop("caching_device", None) + + if dtype is None: + dtype = self.dtype or backend.floatx() + dtype = tf.as_dtype(dtype) + if self._dtype_policy.variable_dtype is None: + # The policy is "_infer", so we infer the policy from the variable + # dtype. + self._set_dtype_policy(policy.Policy(dtype.base_dtype.name)) + initializer = initializers.get(initializer) + regularizer = regularizers.get(regularizer) + constraint = constraints.get(constraint) + + if synchronization == tf.VariableSynchronization.ON_READ: + if trainable: + raise ValueError( + "Synchronization value can be set to " + "VariableSynchronization.ON_READ only for non-trainable " + "variables. You have specified trainable=True and " + "synchronization=VariableSynchronization.ON_READ." + ) + else: + # Set trainable to be false when the variable is to be synced on + # read. + trainable = False + elif trainable is None: + trainable = True + + # Initialize variable when no initializer provided + if initializer is None: + # If dtype is DT_FLOAT, provide a uniform unit scaling initializer + if dtype.is_floating: + initializer = initializers.get("glorot_uniform") + # If dtype is DT_INT/DT_UINT, provide a default value `zero` + # If dtype is DT_BOOL, provide a default value `FALSE` + elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool: + initializer = tf.compat.v1.zeros_initializer() + # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX + # here? + elif not has_custom_getter: + # When `getter` is specified, it's possibly fine for + # `initializer` to be None since it's up to the custom `getter` + # to raise error in case it indeed needs `initializer`. + raise ValueError( + "An initializer for variable %s of type %s is required" + " for layer %s" % (name, dtype.base_dtype, self.name) + ) + + if ( + autocast + and self._dtype_policy.compute_dtype + != self._dtype_policy.variable_dtype + and dtype.is_floating + ): + # Wrap 'getter' with a version that returns an AutoCastVariable. + old_getter = getter + + def getter(*args, **kwargs): + variable = old_getter(*args, **kwargs) + return autocast_variable.create_autocast_variable(variable) + + # Also the caching_device does not work with the mixed precision + # API, disable it if it is specified. + # TODO(b/142020079): Re-enable it once the bug is fixed. + if caching_device is not None: + tf_logging.warning( + "`caching_device` does not work with mixed precision API. " + "Ignoring user specified `caching_device`." + ) + caching_device = None + + variable = self._add_variable_with_custom_getter( + name=name, + shape=shape, + # TODO(allenl): a `make_variable` equivalent should be added as a + # `Trackable` method. + getter=getter, + # Manage errors in Layer rather than Trackable. + overwrite=True, + initializer=initializer, + dtype=dtype, + constraint=constraint, + trainable=trainable, + partitioner=partitioner, + use_resource=use_resource, + collections=collections_arg, + synchronization=synchronization, + aggregation=aggregation, + caching_device=caching_device, + ) + if regularizer is not None: + # TODO(fchollet): in the future, this should be handled at the + # level of variable creation, and weight regularization losses + # should be variable attributes. + name_in_scope = variable.name[: variable.name.find(":")] + self._handle_weight_regularization( + name_in_scope, variable, regularizer + ) + if base_layer_utils.is_split_variable(variable): + for v in variable: + backend.track_variable(v) + if trainable: + self._trainable_weights.append(v) + else: + self._non_trainable_weights.append(v) + else: + backend.track_variable(variable) + if trainable: + self._trainable_weights.append(variable) + else: + self._non_trainable_weights.append(variable) + return variable + + @generic_utils.default + def get_config(self): + """Returns the config of the layer. + + A layer config is a Python dictionary (serializable) + containing the configuration of a layer. + The same layer can be reinstantiated later + (without its trained weights) from this configuration. + + The config of a layer does not include connectivity + information, nor the layer class name. These are handled + by `Network` (one layer of abstraction above). + + Returns: + Python dictionary. + """ + all_args = tf_inspect.getfullargspec(self.__init__).args + config = {"name": self.name, "trainable": self.trainable} + if hasattr(self, "_batch_input_shape"): + config["batch_input_shape"] = self._batch_input_shape + config["dtype"] = policy.serialize(self._dtype_policy) + if hasattr(self, "dynamic"): + # Only include `dynamic` in the `config` if it is `True` + if self.dynamic: + config["dynamic"] = self.dynamic + elif "dynamic" in all_args: + all_args.remove("dynamic") + expected_args = config.keys() + # Finds all arguments in the `__init__` that are not in the config: + extra_args = [arg for arg in all_args if arg not in expected_args] + # Check that either the only argument in the `__init__` is `self`, + # or that `get_config` has been overridden: + if len(extra_args) > 1 and hasattr(self.get_config, "_is_default"): raise NotImplementedError( - 'We could not automatically infer the static shape of the ' - 'layer\'s output. Please implement the ' - '`compute_output_shape` method on your layer (%s).' % - self.__class__.__name__) from e - return tf.nest.map_structure(lambda t: t.shape, outputs) - raise NotImplementedError - - @doc_controls.for_subclass_implementers - def compute_output_signature(self, input_signature): - """Compute the output tensor signature of the layer based on the inputs. - - Unlike a TensorShape object, a TensorSpec object contains both shape - and dtype information for a tensor. This method allows layers to provide - output dtype information if it is different from the input dtype. - For any layer that doesn't implement this function, - the framework will fall back to use `compute_output_shape`, and will - assume that the output dtype matches the input dtype. - - Args: - input_signature: Single TensorSpec or nested structure of TensorSpec - objects, describing a candidate input for the layer. - - Returns: - Single TensorSpec or nested structure of TensorSpec objects, describing - how the layer would transform the provided input. - - Raises: - TypeError: If input_signature contains a non-TensorSpec object. - """ - def check_type_return_shape(s): - if not isinstance(s, tf.TensorSpec): - raise TypeError('Only TensorSpec signature types are supported, ' - 'but saw signature entry: {}.'.format(s)) - return s.shape - input_shape = tf.nest.map_structure(check_type_return_shape, input_signature) - output_shape = self.compute_output_shape(input_shape) - dtype = self._compute_dtype - if dtype is None: - input_dtypes = [s.dtype for s in tf.nest.flatten(input_signature)] - # Default behavior when self.dtype is None, is to use the first input's - # dtype. - dtype = input_dtypes[0] - return tf.nest.map_structure( - lambda s: tf.TensorSpec(dtype=dtype, shape=s), - output_shape) - - @generic_utils.default - def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument - """Computes an output mask tensor. + "Layers with arguments in `__init__` must " + "override `get_config`." + ) + return config + + @classmethod + def from_config(cls, config): + """Creates a layer from its config. + + This method is the reverse of `get_config`, + capable of instantiating the same layer from the config + dictionary. It does not handle layer connectivity + (handled by Network), nor weights (handled by `set_weights`). + + Args: + config: A Python dictionary, typically the + output of get_config. + + Returns: + A layer instance. + """ + return cls(**config) + + def compute_output_shape(self, input_shape): + """Computes the output shape of the layer. + + If the layer has not been built, this method will call `build` on the + layer. This assumes that the layer will later be used with inputs that + match the input shape provided here. + + Args: + input_shape: Shape tuple (tuple of integers) + or list of shape tuples (one per output tensor of the layer). + Shape tuples can include None for free dimensions, + instead of an integer. + + Returns: + An input shape tuple. + """ + if tf.executing_eagerly(): + # In this case we build the model first in order to do shape + # inference. This is acceptable because the framework only calls + # `compute_output_shape` on shape values that the layer would later + # be built for. It would however cause issues in case a user + # attempts to use `compute_output_shape` manually with shapes that + # are incompatible with the shape the Layer will be called on (these + # users will have to implement `compute_output_shape` themselves). + self._maybe_build(input_shape) + with tf.compat.v1.get_default_graph().as_default(): + graph = tf.__internal__.FuncGraph("graph") + with graph.as_default(): + input_shape = tf_utils.convert_shapes( + input_shape, to_tuples=False + ) + inputs = tf.nest.map_structure( + base_layer_utils.generate_placeholders_from_shape, + input_shape, + ) + try: + outputs = self(inputs, training=False) + except TypeError as e: + raise NotImplementedError( + "We could not automatically infer the static " + "shape of the layer's output. Please implement the " + "`compute_output_shape` method on your layer (%s)." + % self.__class__.__name__ + ) from e + return tf.nest.map_structure(lambda t: t.shape, outputs) + raise NotImplementedError + + @doc_controls.for_subclass_implementers + def compute_output_signature(self, input_signature): + """Compute the output tensor signature of the layer based on the inputs. + + Unlike a TensorShape object, a TensorSpec object contains both shape + and dtype information for a tensor. This method allows layers to provide + output dtype information if it is different from the input dtype. + For any layer that doesn't implement this function, + the framework will fall back to use `compute_output_shape`, and will + assume that the output dtype matches the input dtype. + + Args: + input_signature: Single TensorSpec or nested structure of TensorSpec + objects, describing a candidate input for the layer. + + Returns: + Single TensorSpec or nested structure of TensorSpec objects, + describing how the layer would transform the provided input. + + Raises: + TypeError: If input_signature contains a non-TensorSpec object. + """ + + def check_type_return_shape(s): + if not isinstance(s, tf.TensorSpec): + raise TypeError( + "Only TensorSpec signature types are supported, " + "but saw signature entry: {}.".format(s) + ) + return s.shape + + input_shape = tf.nest.map_structure( + check_type_return_shape, input_signature + ) + output_shape = self.compute_output_shape(input_shape) + dtype = self._compute_dtype + if dtype is None: + input_dtypes = [s.dtype for s in tf.nest.flatten(input_signature)] + # Default behavior when self.dtype is None, is to use the first + # input's dtype. + dtype = input_dtypes[0] + return tf.nest.map_structure( + lambda s: tf.TensorSpec(dtype=dtype, shape=s), output_shape + ) + + @generic_utils.default + def compute_mask(self, inputs, mask=None): + """Computes an output mask tensor. + + Args: + inputs: Tensor or list of tensors. + mask: Tensor or list of tensors. + + Returns: + None or a tensor (or list of tensors, + one per output tensor of the layer). + """ + if not self.supports_masking: + if any(m is not None for m in tf.nest.flatten(mask)): + raise TypeError( + "Layer " + self.name + " does not support masking, " + "but was passed an input_mask: " + str(mask) + ) + # masking not explicitly supported: return None as mask. + return None + # if masking is explicitly supported, by default + # carry over the input mask + return mask + + def __call__(self, *args, **kwargs): + """Wraps `call`, applying pre- and post-processing steps. + + Args: + *args: Positional arguments to be passed to `self.call`. + **kwargs: Keyword arguments to be passed to `self.call`. + + Returns: + Output tensor(s). + + Note: + - The following optional keyword arguments are reserved for specific + uses: + * `training`: Boolean scalar tensor of Python boolean indicating + whether the `call` is meant for training or inference. + * `mask`: Boolean input mask. + - If the layer's `call` method takes a `mask` argument (as some Keras + layers do), its default value will be set to the mask generated + for `inputs` by the previous layer (if `input` did come from + a layer that generated a corresponding mask, i.e. if it came from + a Keras layer with masking support. + + Raises: + ValueError: if the layer's `call` method returns None (an invalid + value). + RuntimeError: if `super().__init__()` was not called in the + constructor. + """ + self._assert_built_as_v1() + + if not hasattr(self, "_thread_local"): + raise RuntimeError( + "You must call `super().__init__()` in the layer constructor." + ) + + # Grab the first positional or keyword argument. + if args: + inputs = args[0] + args = args[1:] + elif self._call_spec.arg_names[0] in kwargs: + inputs = kwargs.pop(self._call_spec.arg_names[0]) + else: + raise ValueError( + "The first argument to `Layer.call` must always be passed." + ) + + call_context = base_layer_utils.call_context() + input_list = tf.nest.flatten(inputs) + + # We will attempt to build a TF graph if & only if all inputs are + # symbolic. This is always the case in graph mode. It can also be the + # case in eager mode when all inputs can be traced back to + # `keras.Input()` (when building models using the functional API). + build_graph = tf_utils.are_all_symbolic_tensors(input_list) + + # Accept NumPy and scalar inputs by converting to Tensors. + if any(isinstance(x, (np.ndarray, float, int)) for x in input_list): + + def _convert_non_tensor(x): + # Don't call `ops.convert_to_tensor` on all `inputs` because + # `SparseTensors` can't be converted to `Tensor`. + if isinstance(x, (np.ndarray, float, int)): + return tf.convert_to_tensor(x) + return x + + inputs = tf.nest.map_structure(_convert_non_tensor, inputs) + input_list = tf.nest.flatten(inputs) + + # Handle `mask` propagation from previous layer to current layer. Masks + # can be propagated explicitly via the `mask` argument, or implicitly + # via setting the `_keras_mask` attribute on the inputs to a Layer. + # Masks passed explicitly take priority. + mask_arg_passed_by_framework = False + input_masks = self._collect_input_masks(inputs, args, kwargs) + if ( + self._expects_mask_arg + and input_masks is not None + and not self._call_spec.arg_was_passed("mask", args, kwargs) + ): + mask_arg_passed_by_framework = True + kwargs["mask"] = input_masks + + # If `training` argument is None or not explicitly passed, + # propagate `training` value from this layer's calling layer. + training_value = None + training_arg_passed_by_framework = False + # Priority 1: `training` was explicitly passed. + if self._call_spec.arg_was_passed("training", args, kwargs): + training_value = self._call_spec.get_arg_value( + "training", args, kwargs + ) + if not self._expects_training_arg: + kwargs.pop("training") + + if training_value is None: + # Priority 2: `training` was passed to a parent layer. + if call_context.training is not None: + training_value = call_context.training + # Priority 3a: `learning_phase()` has been set. + elif backend.global_learning_phase_is_set(): + training_value = backend.learning_phase() + # Priority 3b: Pass the `learning_phase()` if in the Keras + # FuncGraph. + elif build_graph: + with backend.get_graph().as_default(): + if base_layer_utils.is_in_keras_graph(): + training_value = backend.learning_phase() + + if self._expects_training_arg and training_value is not None: + # Force the training_value to be bool type which matches to the + # contract for layer/model call args. + if tf.is_tensor(training_value): + training_value = tf.cast(training_value, tf.bool) + else: + training_value = bool(training_value) + args, kwargs = self._call_spec.set_arg_value( + "training", training_value, args, kwargs + ) + training_arg_passed_by_framework = True + + # Only create Keras history if at least one tensor originates from a + # `keras.Input`. Otherwise this Layer may be being used outside the + # Keras framework. + if build_graph and base_layer_utils.needs_keras_history(inputs): + base_layer_utils.create_keras_history(inputs) + + with call_context.enter(self, inputs, build_graph, training_value): + # Check input assumptions set after layer building, e.g. input + # shape. + if build_graph: + # Symbolic execution on symbolic tensors. We will attempt to + # build the corresponding TF subgraph inside + # `backend.get_graph()` + input_spec.assert_input_compatibility( + self.input_spec, inputs, self.name + ) + graph = backend.get_graph() + with graph.as_default(), backend.name_scope(self._name_scope()): + # Build layer if applicable (if the `build` method has been + # overridden). + self._maybe_build(inputs) + cast_inputs = self._maybe_cast_inputs(inputs) + + # Wrapping `call` function in autograph to allow for dynamic + # control flow and control dependencies in call. We are + # limiting this to subclassed layers as autograph is + # strictly needed only for subclassed layers and models. + # tf_convert will respect the value of autograph setting in + # the enclosing tf.function, if any. + if base_layer_utils.is_subclassed( + self + ) and not base_layer_utils.from_saved_model(self): + call_fn = tf.__internal__.autograph.tf_convert( + self.call, + tf.__internal__.autograph.control_status_ctx(), + ) + else: + call_fn = self.call + + if not self.dynamic: + try: + with autocast_variable.enable_auto_cast_variables( + self._compute_dtype_object + ): + outputs = call_fn(cast_inputs, *args, **kwargs) + + except tf.errors.OperatorNotAllowedInGraphError as e: + raise TypeError( + "You are attempting to use Python control " + "flow in a layer that was not declared to be " + "dynamic. Pass `dynamic=True` to the class " + 'constructor.\nEncountered error:\n"""\n' + + str(e) + + '\n"""' + ) + else: + # We will use static shape inference to return symbolic + # tensors matching the specifications of the layer + # outputs. Since `self.dynamic` is True, we will never + # attempt to run the underlying TF graph (which is + # disconnected). + # TODO(fchollet): consider py_func as an alternative, + # which would enable us to run the underlying graph if + # needed. + outputs = self._symbolic_call(inputs) + + if outputs is None: + raise ValueError( + "A layer's `call` method should return a " + "Tensor or a list of Tensors, not None " + "(layer: " + self.name + ")." + ) + if base_layer_utils.have_all_keras_metadata(inputs): + if training_arg_passed_by_framework: + args, kwargs = self._call_spec.set_arg_value( + "training", + None, + args, + kwargs, + pop_kwarg_if_none=True, + ) + if mask_arg_passed_by_framework: + kwargs.pop("mask") + outputs = self._set_connectivity_metadata( + (inputs,) + args, kwargs, outputs + ) + self._handle_activity_regularization(inputs, outputs) + self._set_mask_metadata(inputs, outputs, input_masks) + if hasattr(self, "_set_inputs") and not self.inputs: + # Subclassed network: explicitly set metadata normally + # set by a call to self._set_inputs(). + # TODO(b/120997007): This should be done in Eager as + # well, but causes garbage collection issues because of + # the placeholders created on the default Keras graph. + self._set_save_spec(inputs, args, kwargs) + self._set_inputs(inputs, outputs) + else: + # Eager execution on data tensors. + with backend.name_scope(self._name_scope()): + self._maybe_build(inputs) + cast_inputs = self._maybe_cast_inputs(inputs) + with autocast_variable.enable_auto_cast_variables( + self._compute_dtype_object + ): + outputs = self.call(cast_inputs, *args, **kwargs) + self._handle_activity_regularization(inputs, outputs) + self._set_mask_metadata(inputs, outputs, input_masks) + + return outputs + + def _assert_built_as_v1(self): + if not hasattr(self, "_originally_built_as_v1"): + raise ValueError( + "Your Layer or Model is in an invalid state. " + "This can happen for the following cases:\n " + "1. You might be interleaving estimator/non-estimator models " + "or interleaving models/layers made in " + "tf.compat.v1.Graph.as_default() with models/layers created " + "outside of it. " + "Converting a model to an estimator (via model_to_estimator) " + "invalidates all models/layers made before the conversion " + "(even if they were not the model converted to an estimator). " + "Similarly, making a layer or a model inside a " + "a tf.compat.v1.Graph invalidates all layers/models you " + "previously made outside of the graph.\n" + "2. You might be using a custom keras layer implementation " + "with custom __init__ which didn't call super().__init__. " + " Please check the implementation of %s and its bases." + % (type(self),) + ) + + @property + def dtype(self): + return self._dtype_policy.variable_dtype + + @property + def name(self): + return self._name + + @property + def dynamic(self): + return any(layer._dynamic for layer in self._flatten_layers()) + + @property + @doc_controls.do_not_generate_docs + def stateful(self): + return any(layer._stateful for layer in self._flatten_layers()) + + @stateful.setter + def stateful(self, value): + self._stateful = value + + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, value): + self._trainable = value + for layer in getattr(self, "_self_tracked_trackables", []): + layer.trainable = value + + @property + def activity_regularizer(self): + """Optional regularizer function for the output of this layer.""" + return self._activity_regularizer + + @activity_regularizer.setter + def activity_regularizer(self, regularizer): + """Optional regularizer function for the output of this layer.""" + self._activity_regularizer = regularizer + + @property + def input_spec(self): + return self._input_spec + + @input_spec.setter + # Must be decorated to prevent tracking, since the input_spec can be nested + # InputSpec objects. + @tf.__internal__.tracking.no_automatic_dependency_tracking + def input_spec(self, value): + for v in tf.nest.flatten(value): + if v is not None and "InputSpec" not in v.__class__.__name__: + raise TypeError( + "Layer input_spec must be an instance of InputSpec. " + "Got: {}".format(v) + ) + self._input_spec = value + + @property + def updates(self): + collected_updates = [] + all_layers = self._flatten_layers() + with backend.get_graph().as_default(): + for layer in all_layers: + if not layer.trainable and not layer.stateful: + continue + for u in layer._updates: + if callable(u): + try: + u = u() + except ValueError as e: + if "InaccessibleTensorError" in type(e).__name__: + # For one specific case of error we try to raise + # a more meaningful error message about the + # graph if we can. This error is an internal TF + # symbol that is not publicly exposed, so we + # check the name directly rather than using a + # direct import. + base_layer_utils.check_graph_consistency( + method="add_update", force_raise=True + ) + # check_graph_consistency may not always raise. + raise + base_layer_utils.check_graph_consistency( + u, method="add_update" + ) + collected_updates.append(u) + return collected_updates + + @property + def losses(self): + """Losses which are associated with this `Layer`. + + Variable regularization tensors are created when this property is + accessed, so it is eager safe: accessing `losses` under a + `tf.GradientTape` will propagate gradients back to the corresponding + variables. + + Returns: + A list of tensors. + """ + collected_losses = [] + all_layers = self._flatten_layers() + for layer in all_layers: + # If any eager losses are present, we assume the model to be part of + # an eager training loop (either a custom one or the one used when + # `run_eagerly=True`) and so we always return just the eager losses. + collected_losses.extend(layer._losses) + for regularizer in layer._callable_losses: + loss_tensor = regularizer() + if loss_tensor is not None: + collected_losses.append(loss_tensor) + return collected_losses + + @doc_controls.for_subclass_implementers + def add_loss(self, losses, inputs=None): + """Add loss tensor(s), potentially dependent on layer inputs. + + Some losses (for instance, activity regularization losses) may be + dependent on the inputs passed when calling a layer. Hence, when reusing + the same layer on different inputs `a` and `b`, some entries in + `layer.losses` may be dependent on `a` and some on `b`. This method + automatically keeps track of dependencies. + + This method can be used inside a subclassed layer or model's `call` + function, in which case `losses` should be a Tensor or list of Tensors. + + Example: + + ```python + class MyLayer(tf.keras.layers.Layer): + def call(inputs, self): + self.add_loss(tf.abs(tf.reduce_mean(inputs)), inputs=True) + return inputs + ``` + + This method can also be called directly on a Functional Model during + construction. In this case, any loss Tensors passed to this Model must + be symbolic and be able to be traced back to the model's `Input`s. These + losses become part of the model's topology and are tracked in + `get_config`. + + Example: + + ```python + inputs = tf.keras.Input(shape=(10,)) + x = tf.keras.layers.Dense(10)(inputs) + outputs = tf.keras.layers.Dense(1)(x) + model = tf.keras.Model(inputs, outputs) + # Activity regularization. + model.add_loss(tf.abs(tf.reduce_mean(x))) + ``` + + If this is not the case for your loss (if, for example, your loss + references a `Variable` of one of the model's layers), you can wrap your + loss in a zero-argument lambda. These losses are not tracked as part of + the model's topology since they can't be serialized. + + Example: + + ```python + inputs = tf.keras.Input(shape=(10,)) + x = tf.keras.layers.Dense(10)(inputs) + outputs = tf.keras.layers.Dense(1)(x) + model = tf.keras.Model(inputs, outputs) + # Weight regularization. + model.add_loss(lambda: tf.reduce_mean(x.kernel)) + ``` + + Args: + losses: Loss tensor, or list/tuple of tensors. Rather than tensors, + losses may also be zero-argument callables which create a loss + tensor. + inputs: Ignored when executing eagerly. If anything other than None is + passed, it signals the losses are conditional on some of the layer's + inputs, and thus they should only be run where these inputs are + available. This is the case for activity regularization losses, for + instance. If `None` is passed, the losses are assumed + to be unconditional, and will apply across all dataflows of the + layer (e.g. weight regularization losses). + """ + + def _tag_unconditional(loss): + """Process the loss and tag it by setting ._unconditional_loss.""" + if callable(loss): + # We run the loss without autocasting, as regularizers are often + # numerically unstable in float16. + with autocast_variable.enable_auto_cast_variables(None): + loss = loss() + if loss is None: + # Will be filtered out when computing the .losses property + return None + if not tf.is_tensor(loss): + loss = tf.convert_to_tensor(loss, dtype=backend.floatx()) + loss._unconditional_loss = inputs is None + return loss + + losses = tf.nest.flatten(losses) + + callable_losses = [] + symbolic_losses = [] + for loss in losses: + if callable(loss): + callable_losses.append( + functools.partial(_tag_unconditional, loss) + ) + continue + if loss is None: + continue + if not tf.is_tensor(loss): + loss = tf.convert_to_tensor(loss, dtype=backend.floatx()) + # TF Functions should take the eager path. + if ( + tf_utils.is_symbolic_tensor(loss) + and not base_layer_utils.is_in_tf_function() + ): + symbolic_losses.append(_tag_unconditional(loss)) + base_layer_utils.check_graph_consistency( + loss, method="add_loss" + ) - Args: - inputs: Tensor or list of tensors. - mask: Tensor or list of tensors. + self._callable_losses.extend(callable_losses) - Returns: - None or a tensor (or list of tensors, - one per output tensor of the layer). - """ - if not self.supports_masking: - if any(m is not None for m in tf.nest.flatten(mask)): - raise TypeError('Layer ' + self.name + ' does not support masking, ' - 'but was passed an input_mask: ' + str(mask)) - # masking not explicitly supported: return None as mask. - return None - # if masking is explicitly supported, by default - # carry over the input mask - return mask - - def __call__(self, *args, **kwargs): - """Wraps `call`, applying pre- and post-processing steps. + in_call_context = base_layer_utils.call_context().in_call - Args: - *args: Positional arguments to be passed to `self.call`. - **kwargs: Keyword arguments to be passed to `self.call`. - - Returns: - Output tensor(s). - - Note: - - The following optional keyword arguments are reserved for specific uses: - * `training`: Boolean scalar tensor of Python boolean indicating - whether the `call` is meant for training or inference. - * `mask`: Boolean input mask. - - If the layer's `call` method takes a `mask` argument (as some Keras - layers do), its default value will be set to the mask generated - for `inputs` by the previous layer (if `input` did come from - a layer that generated a corresponding mask, i.e. if it came from - a Keras layer with masking support. - - Raises: - ValueError: if the layer's `call` method returns None (an invalid value). - RuntimeError: if `super().__init__()` was not called in the constructor. - """ - self._assert_built_as_v1() - - if not hasattr(self, '_thread_local'): - raise RuntimeError( - 'You must call `super().__init__()` in the layer constructor.') - - # Grab the first positional or keyword argument. - if args: - inputs = args[0] - args = args[1:] - elif self._call_spec.arg_names[0] in kwargs: - inputs = kwargs.pop(self._call_spec.arg_names[0]) - else: - raise ValueError( - 'The first argument to `Layer.call` must always be passed.') - - call_context = base_layer_utils.call_context() - input_list = tf.nest.flatten(inputs) - - # We will attempt to build a TF graph if & only if all inputs are symbolic. - # This is always the case in graph mode. It can also be the case in eager - # mode when all inputs can be traced back to `keras.Input()` (when building - # models using the functional API). - build_graph = tf_utils.are_all_symbolic_tensors(input_list) - - # Accept NumPy and scalar inputs by converting to Tensors. - if any(isinstance(x, (np.ndarray, float, int)) for x in input_list): - def _convert_non_tensor(x): - # Don't call `ops.convert_to_tensor` on all `inputs` because - # `SparseTensors` can't be converted to `Tensor`. - if isinstance(x, (np.ndarray, float, int)): - return tf.convert_to_tensor(x) - return x - inputs = tf.nest.map_structure(_convert_non_tensor, inputs) - input_list = tf.nest.flatten(inputs) - - # Handle `mask` propagation from previous layer to current layer. Masks can - # be propagated explicitly via the `mask` argument, or implicitly via - # setting the `_keras_mask` attribute on the inputs to a Layer. Masks passed - # explicitly take priority. - mask_arg_passed_by_framework = False - input_masks = self._collect_input_masks(inputs, args, kwargs) - if (self._expects_mask_arg and input_masks is not None and - not self._call_spec.arg_was_passed('mask', args, kwargs)): - mask_arg_passed_by_framework = True - kwargs['mask'] = input_masks - - # If `training` argument is None or not explicitly passed, - # propagate `training` value from this layer's calling layer. - training_value = None - training_arg_passed_by_framework = False - # Priority 1: `training` was explicitly passed. - if self._call_spec.arg_was_passed('training', args, kwargs): - training_value = self._call_spec.get_arg_value('training', args, kwargs) - if not self._expects_training_arg: - kwargs.pop('training') - - if training_value is None: - # Priority 2: `training` was passed to a parent layer. - if call_context.training is not None: - training_value = call_context.training - # Priority 3a: `learning_phase()` has been set. - elif backend.global_learning_phase_is_set(): - training_value = backend.learning_phase() - # Priority 3b: Pass the `learning_phase()` if in the Keras FuncGraph. - elif build_graph: - with backend.get_graph().as_default(): - if base_layer_utils.is_in_keras_graph(): - training_value = backend.learning_phase() - - if self._expects_training_arg and training_value is not None: - # Force the training_value to be bool type which matches to the contract - # for layer/model call args. - if tf.is_tensor(training_value): - training_value = tf.cast(training_value, tf.bool) + if in_call_context: + for symbolic_loss in symbolic_losses: + self._losses.append(symbolic_loss) else: - training_value = bool(training_value) - args, kwargs = self._call_spec.set_arg_value('training', training_value, - args, kwargs) - training_arg_passed_by_framework = True - - # Only create Keras history if at least one tensor originates from a - # `keras.Input`. Otherwise this Layer may be being used outside the Keras - # framework. - if build_graph and base_layer_utils.needs_keras_history(inputs): - base_layer_utils.create_keras_history(inputs) - - with call_context.enter(self, inputs, build_graph, training_value): - # Check input assumptions set after layer building, e.g. input shape. - if build_graph: - # Symbolic execution on symbolic tensors. We will attempt to build - # the corresponding TF subgraph inside `backend.get_graph()` - input_spec.assert_input_compatibility(self.input_spec, inputs, - self.name) - graph = backend.get_graph() - with graph.as_default(), backend.name_scope(self._name_scope()): # pylint: disable=not-callable - # Build layer if applicable (if the `build` method has been - # overridden). - self._maybe_build(inputs) - cast_inputs = self._maybe_cast_inputs(inputs) - - # Wrapping `call` function in autograph to allow for dynamic control - # flow and control dependencies in call. We are limiting this to - # subclassed layers as autograph is strictly needed only for - # subclassed layers and models. - # tf_convert will respect the value of autograph setting in the - # enclosing tf.function, if any. - if (base_layer_utils.is_subclassed(self) and - not base_layer_utils.from_saved_model(self)): - call_fn = tf.__internal__.autograph.tf_convert( - self.call, tf.__internal__.autograph.control_status_ctx()) - else: - call_fn = self.call - - if not self.dynamic: - try: - with autocast_variable.enable_auto_cast_variables( - self._compute_dtype_object): - outputs = call_fn(cast_inputs, *args, **kwargs) - - except tf.errors.OperatorNotAllowedInGraphError as e: - raise TypeError('You are attempting to use Python control ' - 'flow in a layer that was not declared to be ' - 'dynamic. Pass `dynamic=True` to the class ' - 'constructor.\nEncountered error:\n"""\n' + - str(e) + '\n"""') - else: - # We will use static shape inference to return symbolic tensors - # matching the specifications of the layer outputs. - # Since `self.dynamic` is True, we will never attempt to - # run the underlying TF graph (which is disconnected). - # TODO(fchollet): consider py_func as an alternative, which - # would enable us to run the underlying graph if needed. - outputs = self._symbolic_call(inputs) - - if outputs is None: - raise ValueError('A layer\'s `call` method should return a ' - 'Tensor or a list of Tensors, not None ' - '(layer: ' + self.name + ').') - if base_layer_utils.have_all_keras_metadata(inputs): - if training_arg_passed_by_framework: - args, kwargs = self._call_spec.set_arg_value( - 'training', None, args, kwargs, pop_kwarg_if_none=True) - if mask_arg_passed_by_framework: - kwargs.pop('mask') - outputs = self._set_connectivity_metadata((inputs,) + args, kwargs, - outputs) - self._handle_activity_regularization(inputs, outputs) - self._set_mask_metadata(inputs, outputs, input_masks) - if hasattr(self, '_set_inputs') and not self.inputs: - # Subclassed network: explicitly set metadata normally set by - # a call to self._set_inputs(). - # TODO(b/120997007): This should be done in Eager as well, but - # causes garbage collection issues because of the placeholders - # created on the default Keras graph. - self._set_save_spec(inputs, args, kwargs) - self._set_inputs(inputs, outputs) - else: - # Eager execution on data tensors. - with backend.name_scope(self._name_scope()): # pylint: disable=not-callable - self._maybe_build(inputs) - cast_inputs = self._maybe_cast_inputs(inputs) - with autocast_variable.enable_auto_cast_variables( - self._compute_dtype_object): - outputs = self.call(cast_inputs, *args, **kwargs) - self._handle_activity_regularization(inputs, outputs) - self._set_mask_metadata(inputs, outputs, input_masks) - - return outputs - - def _assert_built_as_v1(self): - if not hasattr(self, '_originally_built_as_v1'): - raise ValueError( - 'Your Layer or Model is in an invalid state. ' - 'This can happen for the following cases:\n ' - '1. You might be interleaving estimator/non-estimator models or ' - 'interleaving models/layers made in tf.compat.v1.Graph.as_default() ' - 'with models/layers created outside of it. ' - 'Converting a model to an estimator (via model_to_estimator) ' - 'invalidates all models/layers made before the conversion (even ' - 'if they were not the model converted to an estimator). ' - 'Similarly, making a layer or a model inside a ' - 'a tf.compat.v1.Graph invalidates all layers/models you previously ' - 'made outside of the graph.\n' - '2. You might be using a custom keras layer implementation with ' - ' custom __init__ which didn\'t call super().__init__. ' - ' Please check the implementation of %s and its bases.' % - (type(self),)) - - @property - def dtype(self): - return self._dtype_policy.variable_dtype - - @property - def name(self): - return self._name - - @property - def dynamic(self): - return any(layer._dynamic for layer in self._flatten_layers()) - - @property - @doc_controls.do_not_generate_docs - def stateful(self): - return any(layer._stateful for layer in self._flatten_layers()) - - @stateful.setter - def stateful(self, value): - self._stateful = value - - @property - def trainable(self): - return self._trainable - - @trainable.setter - def trainable(self, value): - self._trainable = value - for layer in getattr(self, '_self_tracked_trackables', []): - layer.trainable = value - - @property - def activity_regularizer(self): - """Optional regularizer function for the output of this layer.""" - return self._activity_regularizer - - @activity_regularizer.setter - def activity_regularizer(self, regularizer): - """Optional regularizer function for the output of this layer.""" - self._activity_regularizer = regularizer - - @property - def input_spec(self): - return self._input_spec - - @input_spec.setter - # Must be decorated to prevent tracking, since the input_spec can be nested - # InputSpec objects. - @tf.__internal__.tracking.no_automatic_dependency_tracking - def input_spec(self, value): - for v in tf.nest.flatten(value): - if v is not None and not isinstance(v, input_spec.InputSpec): - raise TypeError('Layer input_spec must be an instance of InputSpec. ' - 'Got: {}'.format(v)) - self._input_spec = value - - @property - def updates(self): - collected_updates = [] - all_layers = self._flatten_layers() - with backend.get_graph().as_default(): - for layer in all_layers: - if not layer.trainable and not layer.stateful: - continue - for u in layer._updates: - if callable(u): + for symbolic_loss in symbolic_losses: + if getattr(self, "_is_graph_network", False): + self._graph_network_add_loss(symbolic_loss) + else: + # Possible a loss was added in a Layer's `build`. + self._losses.append(symbolic_loss) + + @property + def metrics(self): + collected_metrics = [] + for layer in self._flatten_layers(): + collected_metrics.extend(layer._metrics) + return collected_metrics + + @doc_controls.for_subclass_implementers + def add_metric(self, value, aggregation=None, name=None): + """Adds metric tensor to the layer. + + Args: + value: Metric tensor. + aggregation: Sample-wise metric reduction function. If + `aggregation=None`, it indicates that the metric tensor provided has + been aggregated already. eg, `bin_acc = BinaryAccuracy(name='acc')` + followed by `model.add_metric(bin_acc(y_true, y_pred))`. If + aggregation='mean', the given metric tensor will be sample-wise + reduced using `mean` function. eg, + `model.add_metric(tf.reduce_sum(outputs), name='output_mean', + aggregation='mean')`. + name: String metric name. + + Raises: + ValueError: If `aggregation` is anything other than None or `mean`. + """ + if aggregation is not None and aggregation != "mean": + raise ValueError( + "We currently support only `mean` sample-wise metric " + "aggregation. You provided aggregation=`%s`" % aggregation + ) + + from_metric_obj = hasattr(value, "_metric_obj") + is_symbolic = tf_utils.is_symbolic_tensor(value) + in_call_context = base_layer_utils.call_context().in_call + + if name is None and not from_metric_obj: + # Eg. `self.add_metric(math_ops.reduce_sum(x), aggregation='mean')` + # In eager mode, we use metric name to lookup a metric. Without a + # name, a new Mean metric wrapper will be created on every + # model/layer call. So, we raise an error when no name is provided. + # We will do the same for symbolic mode for consistency although a + # name will be generated if no name is provided. + + # We will not raise this error in the foll use case for the sake of + # consistency as name in provided in the metric constructor. + # mean = metrics.Mean(name='my_metric') + # model.add_metric(mean(outputs)) + raise ValueError( + "Please provide a name for your metric like " + "`self.add_metric(tf.reduce_sum(inputs), " + "name='mean_activation', aggregation='mean')`" + ) + elif from_metric_obj: + name = value._metric_obj.name + + if in_call_context: + # TF Function path should take the eager path. + self._symbolic_add_metric(value, aggregation, name) + else: + if not is_symbolic: + raise ValueError( + "Expected a symbolic Tensor for the metric value, " + "received: " + str(value) + ) + + # Possible a metric was added in a Layer's `build`. + if not getattr(self, "_is_graph_network", False): + with backend.get_graph().as_default(): + self._symbolic_add_metric(value, aggregation, name) + return + + if from_metric_obj: + raise ValueError( + "Using the result of calling a `Metric` object " + "when calling `add_metric` on a Functional " + "Model is not supported. Please pass the " + "Tensor to monitor directly." + ) + + # Insert layers into the Keras Graph Network. + self._graph_network_add_metric(value, aggregation, name) + + @doc_controls.for_subclass_implementers + def add_update(self, updates): + """Add update op(s), potentially dependent on layer inputs. + + Weight updates (for instance, the updates of the moving mean and + variance in a BatchNormalization layer) may be dependent on the inputs + passed when calling a layer. Hence, when reusing the same layer on + different inputs `a` and `b`, some entries in `layer.updates` may be + dependent on `a` and some on `b`. This method automatically keeps track + of dependencies. + + The `get_updates_for` method allows to retrieve the updates relevant to + a specific set of inputs. + + This call is ignored when eager execution is enabled (in that case, + variable updates are run on the fly and thus do not need to be tracked + for later execution). + + Args: + updates: Update op, or list/tuple of update ops, or zero-arg callable + that returns an update op. A zero-arg callable should be passed in + order to disable running the updates by setting `trainable=False` + on this Layer, when executing in Eager mode. + """ + call_context = base_layer_utils.call_context() + + if ( + tf.distribute.has_strategy() + and tf.distribute.in_cross_replica_context() + # When saving the model, the distribution strategy context should be + # ignored, following the default path for adding updates. + and not call_context.saving + ): + # Updates don't need to be run in a cross-replica context. + return + + updates = generic_utils.to_list(updates) + + if call_context.in_call: + relevant_inputs = call_context.inputs + else: + inbound_nodes = getattr(self, "_inbound_nodes", []) + relevant_inputs = [node.input_tensors for node in inbound_nodes] + + def process_update(x): + """Standardize update ops. + + Args: + x: Tensor, op, or callable. + + Returns: + An update op. + """ + if callable(x): + update = lambda: process_update(x()) + return update() + elif isinstance(x, tf.Operation): + update = x + elif hasattr(x, "op"): + update = x.op + else: + update = tf.convert_to_tensor(x) + + reachable = tf_utils.get_reachable_from_inputs( + relevant_inputs, [update] + ) + update._unconditional_update = update not in reachable + return update + + updates = [process_update(x) for x in updates] + self._updates.extend(updates) + + def set_weights(self, weights): + """Sets the weights of the layer, from Numpy arrays. + + The weights of a layer represent the state of the layer. This function + sets the weight values from numpy arrays. The weight values should be + passed in the order they are created by the layer. Note that the layer's + weights must be instantiated before calling this function by calling + the layer. + + For example, a Dense layer returns a list of two values-- per-output + weights and the bias value. These can be used to set the weights of + another Dense layer: + + >>> a = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(1.)) + >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]])) + >>> a.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + >>> b = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(2.)) + >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]])) + >>> b.get_weights() + [array([[2.], + [2.], + [2.]], dtype=float32), array([0.], dtype=float32)] + >>> b.set_weights(a.get_weights()) + >>> b.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + + Args: + weights: a list of Numpy arrays. The number + of arrays and their shape must match + number of the dimensions of the weights + of the layer (i.e. it should match the + output of `get_weights`). + + Raises: + ValueError: If the provided weights list does not match the + layer's specifications. + """ + params = self.weights + + expected_num_weights = 0 + for param in params: + if isinstance(param, base_layer_utils.TrackableWeightHandler): + expected_num_weights += param.num_tensors + else: + expected_num_weights += 1 + + if expected_num_weights != len(weights): + raise ValueError( + 'You called `set_weights(weights)` on layer "%s" ' + "with a weight list of length %s, but the layer was " + "expecting %s weights. Provided weights: %s..." + % ( + self.name, + len(weights), + expected_num_weights, + str(weights)[:50], + ) + ) + + weight_index = 0 + weight_value_tuples = [] + for param in params: + if isinstance(param, base_layer_utils.TrackableWeightHandler): + num_tensors = param.num_tensors + tensors = weights[weight_index : weight_index + num_tensors] + param.set_weights(tensors) + weight_index += num_tensors + else: + weight = weights[weight_index] + weight_shape = weight.shape if hasattr(weight, "shape") else () + ref_shape = param.shape + if not ref_shape.is_compatible_with(weight_shape): + raise ValueError( + "Layer weight shape %s not compatible with provided " + "weight shape %s" % (ref_shape, weight_shape) + ) + weight_value_tuples.append((param, weight)) + weight_index += 1 + + backend.batch_set_value(weight_value_tuples) + + def get_weights(self): + """Returns the current weights of the layer. + + The weights of a layer represent the state of the layer. This function + returns both trainable and non-trainable weight values associated with + this layer as a list of Numpy arrays, which can in turn be used to load + state into similarly parameterized layers. + + For example, a Dense layer returns a list of two values-- per-output + weights and the bias value. These can be used to set the weights of + another Dense layer: + + >>> a = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(1.)) + >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]])) + >>> a.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + >>> b = tf.keras.layers.Dense(1, + ... kernel_initializer=tf.constant_initializer(2.)) + >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]])) + >>> b.get_weights() + [array([[2.], + [2.], + [2.]], dtype=float32), array([0.], dtype=float32)] + >>> b.set_weights(a.get_weights()) + >>> b.get_weights() + [array([[1.], + [1.], + [1.]], dtype=float32), array([0.], dtype=float32)] + + Returns: + Weights values as a list of numpy arrays. + """ + weights = self.weights + output_weights = [] + for weight in weights: + if isinstance(weight, base_layer_utils.TrackableWeightHandler): + output_weights.extend(weight.get_tensors()) + else: + output_weights.append(weight) + return backend.batch_get_value(output_weights) + + def get_updates_for(self, inputs): + """Retrieves updates relevant to a specific set of inputs. + + Args: + inputs: Input tensor or list/tuple of input tensors. + + Returns: + List of update ops of the layer that depend on `inputs`. + """ + if inputs is None: + # Requesting unconditional updates. + return [u for u in self.updates if u._unconditional_update] + + # Requesting input-conditional updates. + updates = [u for u in self.updates if not u._unconditional_update] + inputs = tf.nest.flatten(inputs) + reachable = tf_utils.get_reachable_from_inputs(inputs, updates) + return [u for u in updates if u in reachable] + + def get_losses_for(self, inputs): + """Retrieves losses relevant to a specific set of inputs. + + Args: + inputs: Input tensor or list/tuple of input tensors. + + Returns: + List of loss tensors of the layer that depend on `inputs`. + """ + if inputs is None: + # Requesting unconditional losses. + return [l for l in self.losses if l._unconditional_loss] + + # Requesting input-conditional losses. + losses = [l for l in self.losses if not l._unconditional_loss] + inputs = tf.nest.flatten(inputs) + reachable = tf_utils.get_reachable_from_inputs(inputs, losses) + return [l for l in losses if l in reachable] + + def get_input_mask_at(self, node_index): + """Retrieves the input mask tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A mask tensor + (or list of tensors if the layer has multiple inputs). + """ + inputs = self.get_input_at(node_index) + if isinstance(inputs, list): + return [getattr(x, "_keras_mask", None) for x in inputs] + else: + return getattr(inputs, "_keras_mask", None) + + def get_output_mask_at(self, node_index): + """Retrieves the output mask tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A mask tensor + (or list of tensors if the layer has multiple outputs). + """ + output = self.get_output_at(node_index) + if isinstance(output, list): + return [getattr(x, "_keras_mask", None) for x in output] + else: + return getattr(output, "_keras_mask", None) + + @property + def input_mask(self): + """Retrieves the input mask tensor(s) of a layer. + + Only applicable if the layer has exactly one inbound node, + i.e. if it is connected to one incoming layer. + + Returns: + Input mask tensor (potentially None) or list of input + mask tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + """ + inputs = self.input + if isinstance(inputs, list): + return [getattr(x, "_keras_mask", None) for x in inputs] + else: + return getattr(inputs, "_keras_mask", None) + + @property + def output_mask(self): + """Retrieves the output mask tensor(s) of a layer. + + Only applicable if the layer has exactly one inbound node, + i.e. if it is connected to one incoming layer. + + Returns: + Output mask tensor (potentially None) or list of output + mask tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + """ + output = self.output + if isinstance(output, list): + return [getattr(x, "_keras_mask", None) for x in output] + else: + return getattr(output, "_keras_mask", None) + + def get_input_shape_at(self, node_index): + """Retrieves the input shape(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A shape tuple + (or list of shape tuples if the layer has multiple inputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "input_shapes", "input shape" + ) + + def get_output_shape_at(self, node_index): + """Retrieves the output shape(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A shape tuple + (or list of shape tuples if the layer has multiple outputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "output_shapes", "output shape" + ) + + def get_input_at(self, node_index): + """Retrieves the input tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first input node of the layer. + + Returns: + A tensor (or list of tensors if the layer has multiple inputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "input_tensors", "input" + ) + + def get_output_at(self, node_index): + """Retrieves the output tensor(s) of a layer at a given node. + + Args: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first output node of the layer. + + Returns: + A tensor (or list of tensors if the layer has multiple outputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index( + node_index, "output_tensors", "output" + ) + + @property + def input(self): + """Retrieves the input tensor(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer. + + Returns: + Input tensor or list of input tensors. + + Raises: + RuntimeError: If called in Eager mode. + AttributeError: If no inbound nodes are found. + """ + if not self._inbound_nodes: + raise AttributeError( + "Layer " + self.name + " is not connected, no input to return." + ) + return self._get_node_attribute_at_index(0, "input_tensors", "input") + + @property + def output(self): + """Retrieves the output tensor(s) of a layer. + + Only applicable if the layer has exactly one output, + i.e. if it is connected to one incoming layer. + + Returns: + Output tensor or list of output tensors. + + Raises: + AttributeError: if the layer is connected to more than one incoming + layers. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError( + "Layer " + self.name + " has no inbound nodes." + ) + return self._get_node_attribute_at_index(0, "output_tensors", "output") + + @property + def input_shape(self): + """Retrieves the input shape(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer, or if all inputs + have the same shape. + + Returns: + Input shape, as an integer shape tuple + (or list of shape tuples, one tuple per input tensor). + + Raises: + AttributeError: if the layer has no defined input_shape. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError( + f'The layer "{self.name}" has never been called ' + "and thus has no defined input shape. Note that the " + "`input_shape` property is only available for " + "Functional and Sequential models." + ) + all_input_shapes = set( + [str(node.input_shapes) for node in self._inbound_nodes] + ) + if len(all_input_shapes) == 1: + return self._inbound_nodes[0].input_shapes + else: + raise AttributeError( + 'The layer "' + str(self.name) + " has multiple inbound nodes, " + "with different input shapes. Hence " + 'the notion of "input shape" is ' + "ill-defined for the layer. " + "Use `get_input_shape_at(node_index)` " + "instead." + ) + + def count_params(self): + """Count the total number of scalars composing the weights. + + Returns: + An integer count. + + Raises: + ValueError: if the layer isn't yet built + (in which case its weights aren't yet defined). + """ + if not self.built: + if getattr(self, "_is_graph_network", False): + with tf_utils.maybe_init_scope(self): + self._maybe_build(self.inputs) + else: + raise ValueError( + "You tried to call `count_params` on " + + self.name + + ", but the layer isn't built. " + "You can build it manually via: `" + + self.name + + ".build(batch_input_shape)`." + ) + return layer_utils.count_params(self.weights) + + @property + def output_shape(self): + """Retrieves the output shape(s) of a layer. + + Only applicable if the layer has one output, + or if all outputs have the same shape. + + Returns: + Output shape, as an integer shape tuple + (or list of shape tuples, one tuple per output tensor). + + Raises: + AttributeError: if the layer has no defined output shape. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError( + "The layer has never been called " + "and thus has no defined output shape." + ) + all_output_shapes = set( + [str(node.output_shapes) for node in self._inbound_nodes] + ) + if len(all_output_shapes) == 1: + return self._inbound_nodes[0].output_shapes + else: + raise AttributeError( + 'The layer "%s"' + " has multiple inbound nodes, " + "with different output shapes. Hence " + 'the notion of "output shape" is ' + "ill-defined for the layer. " + "Use `get_output_shape_at(node_index)` " + "instead." % self.name + ) + + @property + @doc_controls.do_not_doc_inheritable + def inbound_nodes(self): + """Deprecated, do NOT use! Only for external Keras compatibility .""" + return self._inbound_nodes + + @property + @doc_controls.do_not_doc_inheritable + def outbound_nodes(self): + """Deprecated, do NOT use! Only for external Keras compatibility .""" + return self._outbound_nodes + + ########################################################################### + # Methods & attributes below are public aliases of other methods. # + ########################################################################### + + @property + def variables(self): + """Returns the list of all layer variables/weights. + + Alias of `self.weights`. + + Returns: + A list of variables. + """ + return self.weights + + @property + def trainable_variables(self): + return self.trainable_weights + + @property + def non_trainable_variables(self): + return self.non_trainable_weights + + ############################################################################ + # Methods & attributes below are all private and only used by the framework. + ############################################################################ + + @property + def _inbound_nodes(self): + return self._inbound_nodes_value + + @_inbound_nodes.setter + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _inbound_nodes(self, value): + self._inbound_nodes_value = value + + @property + def _outbound_nodes(self): + return self._outbound_nodes_value + + @_outbound_nodes.setter + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _outbound_nodes(self, value): + self._outbound_nodes_value = value + + def _set_dtype_policy(self, dtype): + """Sets self._dtype_policy.""" + if isinstance(dtype, policy.Policy): + self._dtype_policy = dtype + elif isinstance(dtype, dict): + self._dtype_policy = policy.deserialize(dtype) + elif isinstance(dtype, str) and dtype in ( + "mixed_float16", + "mixed_bfloat16", + ): + # The isinstance check is required since np.dtype raises an error if + # compared to a non-dtype string. + self._dtype_policy = policy.Policy(dtype) + elif dtype: + self._dtype_policy = policy.Policy(tf.as_dtype(dtype).name) + else: + self._dtype_policy = policy.global_policy() + if ( + self._dtype_policy.name == "mixed_float16" + and not loss_scale_optimizer.strategy_supports_loss_scaling() + ): + # Although only loss scaling doesn't support certain strategies, to + # avoid confusion, we disallow the 'mixed_float16' policy with + # unsupported strategies. This is because 'mixed_float16' requires + # loss scaling for numeric stability. + strategy = tf.distribute.get_strategy() + raise ValueError( + "Mixed precision is not supported with the " + "tf.distribute.Strategy: %s. Either stop using mixed " + 'precision by removing the use of the "%s" policy or ' + "use a different Strategy, e.g. a MirroredStrategy." + % (strategy.__class__.__name__, self._dtype_policy.name) + ) + + # Performance optimization: cache the compute dtype as a Dtype object or + # None, so that str to Dtype conversion doesn't happen in + # Layer.__call__. + if self._dtype_policy.compute_dtype: + self._compute_dtype_object = tf.as_dtype( + self._dtype_policy.compute_dtype + ) + else: + self._compute_dtype_object = None + + # TODO(reedwm): Expose this property? + @property + def _compute_dtype(self): + """The layer's compute dtype. + + Unless mixed-precision is used, this is the same as `Layer.dtype`. + + If self._autocast is True, layer's will cast floating-point inputs to + this. + + Returns: + The layer's compute dtype. + """ + return self._dtype_policy.compute_dtype + + def _maybe_cast_inputs(self, inputs): + """Maybe casts the inputs to the compute dtype. + + If self._compute_dtype is floating-point, and self_autocast is True, + floating-point inputs are casted to self._compute_dtype. + + Args: + inputs: Input tensor, or structure of input tensors. + + Returns: + `inputs`, but tensors may have been casted to self._compute_dtype + """ + compute_dtype = self._compute_dtype + if ( + self._autocast + and compute_dtype + and tf.as_dtype(compute_dtype).is_floating + ): + + def f(x): + """Cast a single Tensor or TensorSpec to the compute dtype.""" + cast_types = (tf.Tensor, tf.SparseTensor, tf.RaggedTensor) + if ( + isinstance(x, cast_types) + and x.dtype.is_floating + and x.dtype.base_dtype.name != compute_dtype + ): + return tf.cast(x, compute_dtype) + elif isinstance(x, tf.TensorSpec) and x.dtype.is_floating: + # Inputs may be TensorSpecs when this function is called + # from model._set_inputs. + return tf.TensorSpec(x.shape, compute_dtype, x.name) + else: + return x + + return tf.nest.map_structure(f, inputs) + else: + return inputs + + # _dtype used to be an attribute set in the constructor. We still expose it + # because some clients still use it. + # TODO(reedwm): Deprecate, then remove the _dtype property. + @property + def _dtype(self): + # This is equivalent to returning self.dtype . We do not return + # self.dtype as it would cause infinite recursion in a few subclasses, + # which override "dtype" to return self._dtype. + return self._dtype_policy.variable_dtype + + @_dtype.setter + def _dtype(self, value): + value = tf.as_dtype(value).name + self._set_dtype_policy(policy.Policy(value)) + + def _name_scope(self): + return self.name + + def _init_set_name(self, name, zero_based=True): + if not name: + self._name = backend.unique_object_name( + generic_utils.to_snake_case(self.__class__.__name__), + zero_based=zero_based, + ) + else: + self._name = name + + def _get_existing_metric(self, name=None): + match = [m for m in self._metrics if m.name == name] + if not match: + return + if len(match) > 1: + raise ValueError( + "Please provide different names for the metrics you have " + 'added. We found {} metrics with the name: "{}"'.format( + len(match), name + ) + ) + return match[0] + + def _symbolic_add_metric(self, value, aggregation=None, name=None): + base_layer_utils.check_graph_consistency(value, method="add_metric") + match = self._get_existing_metric(name) + if aggregation is None: + # Iterate over the metrics and check if the given metric exists + # already. This can happen when a metric instance is created in + # subclassed model layer `__init__` and we have tracked that + # instance already in model.__setattr__. + if match: + result_tensor = value + metric_obj = match + elif hasattr(value, "_metric_obj"): + # We track the instance using the metadata on the result tensor. + result_tensor = value + metric_obj = result_tensor._metric_obj + self._metrics.append(metric_obj) + else: + raise ValueError( + "We do not support adding an aggregated metric result " + "tensor that is not the output of a " + "`tf.keras.metrics.Metric` metric instance. Without " + "having access to the metric instance we cannot reset the " + "state of a metric after every epoch during training. You " + "can create a `tf.keras.metrics.Metric` instance and pass " + "the result here or pass an un-aggregated result with " + "`aggregation` parameter set as `mean`. For example: " + "`self.add_metric(tf.reduce_sum(inputs), " + "name='mean_activation', aggregation='mean')` " + ) + else: + # If a non-aggregated tensor is given as input (ie. `aggregation` is + # explicitly set to `mean`), we wrap the tensor in `Mean` metric. + if match: + result_tensor = match(value) + metric_obj = match + else: + metric_obj, result_tensor = base_layer_utils.create_mean_metric( + value, name + ) + self._metrics.append(metric_obj) + + def _handle_weight_regularization(self, name, variable, regularizer): + """Create lambdas which compute regularization losses.""" + + def _loss_for_variable(v): + """Creates a regularization loss `Tensor` for variable `v`.""" + with backend.name_scope(name + "/Regularizer"): + regularization = regularizer(v) + return regularization + + if base_layer_utils.is_split_variable(variable): + for v in variable: + self.add_loss(functools.partial(_loss_for_variable, v)) + else: + self.add_loss(functools.partial(_loss_for_variable, variable)) + + def _handle_activity_regularization(self, inputs, outputs): + # Apply activity regularization. + # Note that it should be applied every time the layer creates a new + # output, since it is output-specific. + if self._activity_regularizer: + output_list = tf.nest.flatten(outputs) + with backend.name_scope("ActivityRegularizer"): + for output in output_list: + activity_loss = tf.convert_to_tensor( + self._activity_regularizer(output) + ) + batch_size = tf.cast( + tf.compat.v1.shape(output)[0], activity_loss.dtype + ) + # Make activity regularization strength batch-agnostic. + mean_activity_loss = activity_loss / batch_size + base_layer_utils.check_graph_consistency( + mean_activity_loss, method="activity_regularizer" + ) + self.add_loss(mean_activity_loss, inputs=inputs) + + def _set_mask_metadata(self, inputs, outputs, previous_mask): + flat_outputs = tf.nest.flatten(outputs) + + mask_already_computed = getattr( + self, "_compute_output_and_mask_jointly", False + ) or all( + getattr(x, "_keras_mask", None) is not None for x in flat_outputs + ) + + # Only compute the mask if the Layer explicitly supports masking or has + # overridden `compute_mask`. + should_compute_mask = hasattr(self, "compute_mask") and ( + self.supports_masking + or not getattr(self.compute_mask, "_is_default", False) + ) + + if mask_already_computed: + flat_masks = [getattr(x, "_keras_mask", None) for x in flat_outputs] + elif not should_compute_mask: + flat_masks = [None for _ in flat_outputs] + else: + output_masks = self.compute_mask(inputs, previous_mask) + # `compute_mask` can return a single `None` even when a Layer + # has multiple outputs. + if output_masks is None: + flat_masks = [None for _ in flat_outputs] + else: + flat_masks = tf.nest.flatten(output_masks) + + for output, mask in zip(flat_outputs, flat_masks): try: - u = u() - except ValueError as e: - if 'InaccessibleTensorError' in type(e).__name__: - # For one specific case of error we try to raise - # a more meaningful error message about the graph if we can. - # This error is an internal TF symbol that is not - # publicly exposed, so we check the name directly rather - # than using a direct import. - base_layer_utils.check_graph_consistency( - method='add_update', force_raise=True) - raise # check_graph_consistency may not always raise. - base_layer_utils.check_graph_consistency(u, method='add_update') - collected_updates.append(u) - return collected_updates - - @property - def losses(self): - """Losses which are associated with this `Layer`. - - Variable regularization tensors are created when this property is accessed, - so it is eager safe: accessing `losses` under a `tf.GradientTape` will - propagate gradients back to the corresponding variables. - - Returns: - A list of tensors. - """ - collected_losses = [] - all_layers = self._flatten_layers() - for layer in all_layers: - # If any eager losses are present, we assume the model to be part of an - # eager training loop (either a custom one or the one used when - # `run_eagerly=True`) and so we always return just the eager losses. - collected_losses.extend(layer._losses) - for regularizer in layer._callable_losses: - loss_tensor = regularizer() - if loss_tensor is not None: - collected_losses.append(loss_tensor) - return collected_losses - - @doc_controls.for_subclass_implementers - def add_loss(self, losses, inputs=None): - """Add loss tensor(s), potentially dependent on layer inputs. - - Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing the same - layer on different inputs `a` and `b`, some entries in `layer.losses` may - be dependent on `a` and some on `b`. This method automatically keeps track - of dependencies. - - This method can be used inside a subclassed layer or model's `call` - function, in which case `losses` should be a Tensor or list of Tensors. - - Example: - - ```python - class MyLayer(tf.keras.layers.Layer): - def call(inputs, self): - self.add_loss(tf.abs(tf.reduce_mean(inputs)), inputs=True) - return inputs - ``` - - This method can also be called directly on a Functional Model during - construction. In this case, any loss Tensors passed to this Model must - be symbolic and be able to be traced back to the model's `Input`s. These - losses become part of the model's topology and are tracked in `get_config`. - - Example: - - ```python - inputs = tf.keras.Input(shape=(10,)) - x = tf.keras.layers.Dense(10)(inputs) - outputs = tf.keras.layers.Dense(1)(x) - model = tf.keras.Model(inputs, outputs) - # Activity regularization. - model.add_loss(tf.abs(tf.reduce_mean(x))) - ``` - - If this is not the case for your loss (if, for example, your loss references - a `Variable` of one of the model's layers), you can wrap your loss in a - zero-argument lambda. These losses are not tracked as part of the model's - topology since they can't be serialized. - - Example: - - ```python - inputs = tf.keras.Input(shape=(10,)) - x = tf.keras.layers.Dense(10)(inputs) - outputs = tf.keras.layers.Dense(1)(x) - model = tf.keras.Model(inputs, outputs) - # Weight regularization. - model.add_loss(lambda: tf.reduce_mean(x.kernel)) - ``` - - Args: - losses: Loss tensor, or list/tuple of tensors. Rather than tensors, losses - may also be zero-argument callables which create a loss tensor. - inputs: Ignored when executing eagerly. If anything other than None is - passed, it signals the losses are conditional on some of the layer's - inputs, and thus they should only be run where these inputs are - available. This is the case for activity regularization losses, for - instance. If `None` is passed, the losses are assumed - to be unconditional, and will apply across all dataflows of the layer - (e.g. weight regularization losses). - """ - def _tag_unconditional(loss): - """Process the loss and tag it by setting loss._unconditional_loss.""" - if callable(loss): - # We run the loss without autocasting, as regularizers are often - # numerically unstable in float16. - with autocast_variable.enable_auto_cast_variables(None): - loss = loss() - if loss is None: - return None # Will be filtered out when computing the .losses property - if not tf.is_tensor(loss): - loss = tf.convert_to_tensor( - loss, dtype=backend.floatx()) - loss._unconditional_loss = (inputs is None) # pylint: disable=protected-access - return loss - - losses = tf.nest.flatten(losses) - - callable_losses = [] - symbolic_losses = [] - for loss in losses: - if callable(loss): - callable_losses.append(functools.partial(_tag_unconditional, loss)) - continue - if loss is None: - continue - if not tf.is_tensor(loss): - loss = tf.convert_to_tensor( - loss, dtype=backend.floatx()) - # TF Functions should take the eager path. - if (tf_utils.is_symbolic_tensor(loss) and - not base_layer_utils.is_in_tf_function()): - symbolic_losses.append(_tag_unconditional(loss)) - base_layer_utils.check_graph_consistency(loss, method='add_loss') - - self._callable_losses.extend(callable_losses) - - in_call_context = base_layer_utils.call_context().in_call - - if in_call_context: - for symbolic_loss in symbolic_losses: - self._losses.append(symbolic_loss) - else: - for symbolic_loss in symbolic_losses: - if getattr(self, '_is_graph_network', False): - self._graph_network_add_loss(symbolic_loss) + output._keras_mask = mask + except AttributeError: + # C Type such as np.ndarray. + pass + + if tf_utils.are_all_symbolic_tensors(flat_outputs): + for output in flat_outputs: + if getattr(output, "_keras_mask", None) is not None: + # Do not track masks for `TensorFlowOpLayer` construction. + output._keras_mask._keras_history_checked = True + + def _collect_input_masks(self, inputs, args, kwargs): + """Checks if mask argument was passed, else gathers mask from inputs.""" + if self._call_spec.arg_was_passed("mask", args, kwargs): + return self._call_spec.get_arg_value("mask", args, kwargs) + + if not self._should_compute_mask: + return None + + input_masks = tf.nest.map_structure( + lambda t: getattr(t, "_keras_mask", None), inputs + ) + if generic_utils.is_all_none(input_masks): + return None + return input_masks + + def _get_node_attribute_at_index(self, node_index, attr, attr_name): + """Private utility to retrieves an attribute (e.g. inputs) from a node. + + This is used to implement the methods: + - get_input_shape_at + - get_output_shape_at + - get_input_at + etc... + + Args: + node_index: Integer index of the node from which + to retrieve the attribute. + attr: Exact node attribute name. + attr_name: Human-readable attribute name, for error messages. + + Returns: + The layer's attribute `attr` at the node of index `node_index`. + + Raises: + RuntimeError: If the layer has no inbound nodes, or if called in + Eager mode. + ValueError: If the index provided does not match any node. + """ + if not self._inbound_nodes: + raise RuntimeError( + "The layer has never been called and thus has no defined " + + attr_name + + "." + ) + if not len(self._inbound_nodes) > node_index: + raise ValueError( + "Asked to get " + + attr_name + + " at node " + + str(node_index) + + ", but the layer has only " + + str(len(self._inbound_nodes)) + + " inbound nodes." + ) + values = getattr(self._inbound_nodes[node_index], attr) + if isinstance(values, list) and len(values) == 1: + return values[0] else: - # Possible a loss was added in a Layer's `build`. - self._losses.append(symbolic_loss) - - @property - def metrics(self): - collected_metrics = [] - for layer in self._flatten_layers(): - collected_metrics.extend(layer._metrics) - return collected_metrics - - @doc_controls.for_subclass_implementers - def add_metric(self, value, aggregation=None, name=None): - """Adds metric tensor to the layer. - - Args: - value: Metric tensor. - aggregation: Sample-wise metric reduction function. If `aggregation=None`, - it indicates that the metric tensor provided has been aggregated - already. eg, `bin_acc = BinaryAccuracy(name='acc')` followed by - `model.add_metric(bin_acc(y_true, y_pred))`. If aggregation='mean', the - given metric tensor will be sample-wise reduced using `mean` function. - eg, `model.add_metric(tf.reduce_sum(outputs), name='output_mean', - aggregation='mean')`. - name: String metric name. - - Raises: - ValueError: If `aggregation` is anything other than None or `mean`. - """ - if aggregation is not None and aggregation != 'mean': - raise ValueError( - 'We currently support only `mean` sample-wise metric aggregation. ' - 'You provided aggregation=`%s`' % aggregation) - - from_metric_obj = hasattr(value, '_metric_obj') - is_symbolic = tf_utils.is_symbolic_tensor(value) - in_call_context = base_layer_utils.call_context().in_call - - if name is None and not from_metric_obj: - # Eg. `self.add_metric(math_ops.reduce_sum(x), aggregation='mean')` - # In eager mode, we use metric name to lookup a metric. Without a name, - # a new Mean metric wrapper will be created on every model/layer call. - # So, we raise an error when no name is provided. - # We will do the same for symbolic mode for consistency although a name - # will be generated if no name is provided. - - # We will not raise this error in the foll use case for the sake of - # consistency as name in provided in the metric constructor. - # mean = metrics.Mean(name='my_metric') - # model.add_metric(mean(outputs)) - raise ValueError('Please provide a name for your metric like ' - '`self.add_metric(tf.reduce_sum(inputs), ' - 'name=\'mean_activation\', aggregation=\'mean\')`') - elif from_metric_obj: - name = value._metric_obj.name - - if in_call_context: - # TF Function path should take the eager path. - self._symbolic_add_metric(value, aggregation, name) - else: - if not is_symbolic: - raise ValueError('Expected a symbolic Tensor for the metric value, ' - 'received: ' + str(value)) - - # Possible a metric was added in a Layer's `build`. - if not getattr(self, '_is_graph_network', False): - with backend.get_graph().as_default(): - self._symbolic_add_metric(value, aggregation, name) - return - - if from_metric_obj: - raise ValueError('Using the result of calling a `Metric` object ' - 'when calling `add_metric` on a Functional ' - 'Model is not supported. Please pass the ' - 'Tensor to monitor directly.') - - # Insert layers into the Keras Graph Network. - self._graph_network_add_metric(value, aggregation, name) - - @doc_controls.for_subclass_implementers - def add_update(self, updates): - """Add update op(s), potentially dependent on layer inputs. - - Weight updates (for instance, the updates of the moving mean and variance - in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing the same layer on - different inputs `a` and `b`, some entries in `layer.updates` may be - dependent on `a` and some on `b`. This method automatically keeps track - of dependencies. - - The `get_updates_for` method allows to retrieve the updates relevant to a - specific set of inputs. - - This call is ignored when eager execution is enabled (in that case, variable - updates are run on the fly and thus do not need to be tracked for later - execution). - - Args: - updates: Update op, or list/tuple of update ops, or zero-arg callable - that returns an update op. A zero-arg callable should be passed in - order to disable running the updates by setting `trainable=False` - on this Layer, when executing in Eager mode. - """ - call_context = base_layer_utils.call_context() - - if (tf.distribute.has_strategy() and - tf.distribute.in_cross_replica_context() and - # When saving the model, the distribution strategy context should be - # ignored, following the default path for adding updates. - not call_context.saving): - # Updates don't need to be run in a cross-replica context. - return - - updates = generic_utils.to_list(updates) - - if call_context.in_call: - relevant_inputs = call_context.inputs - else: - inbound_nodes = getattr(self, '_inbound_nodes', []) - relevant_inputs = [node.input_tensors for node in inbound_nodes] - - def process_update(x): - """Standardize update ops. - - Args: - x: Tensor, op, or callable. - - Returns: - An update op. - """ - if callable(x): - update = lambda: process_update(x()) - return update() - elif isinstance(x, tf.Operation): - update = x - elif hasattr(x, 'op'): - update = x.op - else: - update = tf.convert_to_tensor(x) - - reachable = tf_utils.get_reachable_from_inputs(relevant_inputs, [update]) - update._unconditional_update = update not in reachable - return update - - updates = [process_update(x) for x in updates] - self._updates.extend(updates) - - def set_weights(self, weights): - """Sets the weights of the layer, from Numpy arrays. - - The weights of a layer represent the state of the layer. This function - sets the weight values from numpy arrays. The weight values should be - passed in the order they are created by the layer. Note that the layer's - weights must be instantiated before calling this function by calling - the layer. - - For example, a Dense layer returns a list of two values-- per-output - weights and the bias value. These can be used to set the weights of another - Dense layer: - - >>> a = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(1.)) - >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]])) - >>> a.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - >>> b = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(2.)) - >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]])) - >>> b.get_weights() - [array([[2.], - [2.], - [2.]], dtype=float32), array([0.], dtype=float32)] - >>> b.set_weights(a.get_weights()) - >>> b.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - - Args: - weights: a list of Numpy arrays. The number - of arrays and their shape must match - number of the dimensions of the weights - of the layer (i.e. it should match the - output of `get_weights`). - - Raises: - ValueError: If the provided weights list does not match the - layer's specifications. - """ - params = self.weights - - expected_num_weights = 0 - for param in params: - if isinstance(param, base_layer_utils.TrackableWeightHandler): - expected_num_weights += param.num_tensors - else: - expected_num_weights += 1 - - if expected_num_weights != len(weights): - raise ValueError( - 'You called `set_weights(weights)` on layer "%s" ' - 'with a weight list of length %s, but the layer was ' - 'expecting %s weights. Provided weights: %s...' % - (self.name, len(weights), expected_num_weights, str(weights)[:50])) - - weight_index = 0 - weight_value_tuples = [] - for param in params: - if isinstance(param, base_layer_utils.TrackableWeightHandler): - num_tensors = param.num_tensors - tensors = weights[weight_index:weight_index + num_tensors] - param.set_weights(tensors) - weight_index += num_tensors - else: - weight = weights[weight_index] - weight_shape = weight.shape if hasattr(weight, 'shape') else () - ref_shape = param.shape - if not ref_shape.is_compatible_with(weight_shape): - raise ValueError( - 'Layer weight shape %s not compatible with provided weight ' - 'shape %s' % (ref_shape, weight_shape)) - weight_value_tuples.append((param, weight)) - weight_index += 1 - - backend.batch_set_value(weight_value_tuples) - - def get_weights(self): - """Returns the current weights of the layer. - - The weights of a layer represent the state of the layer. This function - returns both trainable and non-trainable weight values associated with this - layer as a list of Numpy arrays, which can in turn be used to load state - into similarly parameterized layers. - - For example, a Dense layer returns a list of two values-- per-output - weights and the bias value. These can be used to set the weights of another - Dense layer: - - >>> a = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(1.)) - >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]])) - >>> a.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - >>> b = tf.keras.layers.Dense(1, - ... kernel_initializer=tf.constant_initializer(2.)) - >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]])) - >>> b.get_weights() - [array([[2.], - [2.], - [2.]], dtype=float32), array([0.], dtype=float32)] - >>> b.set_weights(a.get_weights()) - >>> b.get_weights() - [array([[1.], - [1.], - [1.]], dtype=float32), array([0.], dtype=float32)] - - Returns: - Weights values as a list of numpy arrays. - """ - weights = self.weights - output_weights = [] - for weight in weights: - if isinstance(weight, base_layer_utils.TrackableWeightHandler): - output_weights.extend(weight.get_tensors()) - else: - output_weights.append(weight) - return backend.batch_get_value(output_weights) - - def get_updates_for(self, inputs): - """Retrieves updates relevant to a specific set of inputs. - - Args: - inputs: Input tensor or list/tuple of input tensors. - - Returns: - List of update ops of the layer that depend on `inputs`. - """ - if inputs is None: - # Requesting unconditional updates. - return [u for u in self.updates if u._unconditional_update] - - # Requesting input-conditional updates. - updates = [u for u in self.updates if not u._unconditional_update] - inputs = tf.nest.flatten(inputs) - reachable = tf_utils.get_reachable_from_inputs(inputs, updates) - return [u for u in updates if u in reachable] - - def get_losses_for(self, inputs): - """Retrieves losses relevant to a specific set of inputs. - - Args: - inputs: Input tensor or list/tuple of input tensors. - - Returns: - List of loss tensors of the layer that depend on `inputs`. - """ - if inputs is None: - # Requesting unconditional losses. - return [l for l in self.losses if l._unconditional_loss] - - # Requesting input-conditional losses. - losses = [l for l in self.losses if not l._unconditional_loss] - inputs = tf.nest.flatten(inputs) - reachable = tf_utils.get_reachable_from_inputs(inputs, losses) - return [l for l in losses if l in reachable] - - def get_input_mask_at(self, node_index): - """Retrieves the input mask tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A mask tensor - (or list of tensors if the layer has multiple inputs). - """ - inputs = self.get_input_at(node_index) - if isinstance(inputs, list): - return [getattr(x, '_keras_mask', None) for x in inputs] - else: - return getattr(inputs, '_keras_mask', None) - - def get_output_mask_at(self, node_index): - """Retrieves the output mask tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A mask tensor - (or list of tensors if the layer has multiple outputs). - """ - output = self.get_output_at(node_index) - if isinstance(output, list): - return [getattr(x, '_keras_mask', None) for x in output] - else: - return getattr(output, '_keras_mask', None) - - @property - def input_mask(self): - """Retrieves the input mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - Returns: - Input mask tensor (potentially None) or list of input - mask tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - """ - inputs = self.input - if isinstance(inputs, list): - return [getattr(x, '_keras_mask', None) for x in inputs] - else: - return getattr(inputs, '_keras_mask', None) - - @property - def output_mask(self): - """Retrieves the output mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - Returns: - Output mask tensor (potentially None) or list of output - mask tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - """ - output = self.output - if isinstance(output, list): - return [getattr(x, '_keras_mask', None) for x in output] - else: - return getattr(output, '_keras_mask', None) - - def get_input_shape_at(self, node_index): - """Retrieves the input shape(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A shape tuple - (or list of shape tuples if the layer has multiple inputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'input_shapes', - 'input shape') - - def get_output_shape_at(self, node_index): - """Retrieves the output shape(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A shape tuple - (or list of shape tuples if the layer has multiple outputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'output_shapes', - 'output shape') - - def get_input_at(self, node_index): - """Retrieves the input tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first input node of the layer. - - Returns: - A tensor (or list of tensors if the layer has multiple inputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'input_tensors', - 'input') - - def get_output_at(self, node_index): - """Retrieves the output tensor(s) of a layer at a given node. - - Args: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first output node of the layer. - - Returns: - A tensor (or list of tensors if the layer has multiple outputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'output_tensors', - 'output') - - @property - def input(self): - """Retrieves the input tensor(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer. - - Returns: - Input tensor or list of input tensors. - - Raises: - RuntimeError: If called in Eager mode. - AttributeError: If no inbound nodes are found. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + - ' is not connected, no input to return.') - return self._get_node_attribute_at_index(0, 'input_tensors', 'input') - - @property - def output(self): - """Retrieves the output tensor(s) of a layer. - - Only applicable if the layer has exactly one output, - i.e. if it is connected to one incoming layer. - - Returns: - Output tensor or list of output tensors. - - Raises: - AttributeError: if the layer is connected to more than one incoming - layers. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + ' has no inbound nodes.') - return self._get_node_attribute_at_index(0, 'output_tensors', 'output') - - @property - def input_shape(self): - """Retrieves the input shape(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer, or if all inputs - have the same shape. - - Returns: - Input shape, as an integer shape tuple - (or list of shape tuples, one tuple per input tensor). - - Raises: - AttributeError: if the layer has no defined input_shape. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError(f'The layer "{self.name}" has never been called ' - 'and thus has no defined input shape. Note that the ' - '`input_shape` property is only available for ' - 'Functional and Sequential models.') - all_input_shapes = set( - [str(node.input_shapes) for node in self._inbound_nodes]) - if len(all_input_shapes) == 1: - return self._inbound_nodes[0].input_shapes - else: - raise AttributeError('The layer "' + str(self.name) + - ' has multiple inbound nodes, ' - 'with different input shapes. Hence ' - 'the notion of "input shape" is ' - 'ill-defined for the layer. ' - 'Use `get_input_shape_at(node_index)` ' - 'instead.') - - def count_params(self): - """Count the total number of scalars composing the weights. - - Returns: - An integer count. - - Raises: - ValueError: if the layer isn't yet built - (in which case its weights aren't yet defined). - """ - if not self.built: - if getattr(self, '_is_graph_network', False): - with tf_utils.maybe_init_scope(self): - self._maybe_build(self.inputs) - else: - raise ValueError('You tried to call `count_params` on ' + self.name + - ', but the layer isn\'t built. ' - 'You can build it manually via: `' + self.name + - '.build(batch_input_shape)`.') - return layer_utils.count_params(self.weights) - - @property - def output_shape(self): - """Retrieves the output shape(s) of a layer. - - Only applicable if the layer has one output, - or if all outputs have the same shape. - - Returns: - Output shape, as an integer shape tuple - (or list of shape tuples, one tuple per output tensor). - - Raises: - AttributeError: if the layer has no defined output shape. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError('The layer has never been called ' - 'and thus has no defined output shape.') - all_output_shapes = set( - [str(node.output_shapes) for node in self._inbound_nodes]) - if len(all_output_shapes) == 1: - return self._inbound_nodes[0].output_shapes - else: - raise AttributeError('The layer "%s"' - ' has multiple inbound nodes, ' - 'with different output shapes. Hence ' - 'the notion of "output shape" is ' - 'ill-defined for the layer. ' - 'Use `get_output_shape_at(node_index)` ' - 'instead.' % self.name) - - @property - @doc_controls.do_not_doc_inheritable - def inbound_nodes(self): - """Deprecated, do NOT use! Only for compatibility with external Keras.""" - return self._inbound_nodes - - @property - @doc_controls.do_not_doc_inheritable - def outbound_nodes(self): - """Deprecated, do NOT use! Only for compatibility with external Keras.""" - return self._outbound_nodes - - ############################################################################## - # Methods & attributes below are public aliases of other methods. # - ############################################################################## - - @property - def variables(self): - """Returns the list of all layer variables/weights. - - Alias of `self.weights`. - - Returns: - A list of variables. - """ - return self.weights - - @property - def trainable_variables(self): - return self.trainable_weights - - @property - def non_trainable_variables(self): - return self.non_trainable_weights - - ############################################################################## - # Methods & attributes below are all private and only used by the framework. # - ############################################################################## - - @property - def _inbound_nodes(self): - return self._inbound_nodes_value - - @_inbound_nodes.setter - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _inbound_nodes(self, value): - self._inbound_nodes_value = value - - @property - def _outbound_nodes(self): - return self._outbound_nodes_value - - @_outbound_nodes.setter - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _outbound_nodes(self, value): - self._outbound_nodes_value = value - - def _set_dtype_policy(self, dtype): - """Sets self._dtype_policy.""" - if isinstance(dtype, policy.Policy): - self._dtype_policy = dtype - elif isinstance(dtype, dict): - self._dtype_policy = policy.deserialize(dtype) - elif isinstance(dtype, str) and dtype in ('mixed_float16', - 'mixed_bfloat16'): - # The isinstance check is required since np.dtype raises an error if - # compared to a non-dtype string. - self._dtype_policy = policy.Policy(dtype) - elif dtype: - self._dtype_policy = policy.Policy(tf.as_dtype(dtype).name) - else: - self._dtype_policy = policy.global_policy() - if (self._dtype_policy.name == 'mixed_float16' and - not loss_scale_optimizer.strategy_supports_loss_scaling()): - # Although only loss scaling doesn't support certain strategies, to avoid - # confusion, we disallow the 'mixed_float16' policy with unsupported - # strategies. This is because 'mixed_float16' requires loss scaling for - # numeric stability. - strategy = tf.distribute.get_strategy() - raise ValueError('Mixed precision is not supported with the ' - 'tf.distribute.Strategy: %s. Either stop using mixed ' - 'precision by removing the use of the "%s" policy or ' - 'use a different Strategy, e.g. a MirroredStrategy.' % - (strategy.__class__.__name__, self._dtype_policy.name)) - - # Performance optimization: cache the compute dtype as a Dtype object or - # None, so that str to Dtype conversion doesn't happen in Layer.__call__. - if self._dtype_policy.compute_dtype: - self._compute_dtype_object = tf.as_dtype( - self._dtype_policy.compute_dtype) - else: - self._compute_dtype_object = None - - # TODO(reedwm): Expose this property? - @property - def _compute_dtype(self): - """The layer's compute dtype. - - Unless mixed-precision is used, this is the same as `Layer.dtype`. - - If self._autocast is True, layer's will cast floating-point inputs to this. - - Returns: - The layer's compute dtype. - """ - return self._dtype_policy.compute_dtype - - def _maybe_cast_inputs(self, inputs): - """Maybe casts the inputs to the compute dtype. - - If self._compute_dtype is floating-point, and self_autocast is True, - floating-point inputs are casted to self._compute_dtype. - - Args: - inputs: Input tensor, or structure of input tensors. - - Returns: - `inputs`, but tensors may have been casted to self._compute_dtype - """ - compute_dtype = self._compute_dtype - if (self._autocast and compute_dtype and - tf.as_dtype(compute_dtype).is_floating): - def f(x): - """Cast a single Tensor or TensorSpec to the compute dtype.""" - cast_types = (tf.Tensor, tf.SparseTensor, - tf.RaggedTensor) - if (isinstance(x, cast_types) and x.dtype.is_floating and - x.dtype.base_dtype.name != compute_dtype): - return tf.cast(x, compute_dtype) - elif isinstance(x, tf.TensorSpec) and x.dtype.is_floating: - # Inputs may be TensorSpecs when this function is called from - # model._set_inputs. - return tf.TensorSpec(x.shape, compute_dtype, x.name) + return values + + def _maybe_build(self, inputs): + # Check input assumptions set before layer building, e.g. input rank. + if not self.built: + input_spec.assert_input_compatibility( + self.input_spec, inputs, self.name + ) + input_list = tf.nest.flatten(inputs) + if input_list and self._dtype_policy.compute_dtype is None: + try: + dtype = input_list[0].dtype.base_dtype.name + except AttributeError: + pass + else: + self._set_dtype_policy(policy.Policy(dtype)) + input_shapes = None + if all(hasattr(x, "shape") for x in input_list): + input_shapes = tf.nest.map_structure(lambda x: x.shape, inputs) + # Only call `build` if the user has manually overridden the build + # method. + if not hasattr(self.build, "_is_default"): + # Any setup work performed only once should happen in an + # `init_scope` to avoid creating symbolic Tensors that will + # later pollute any eager operations. + with tf_utils.maybe_init_scope(self): + self.build(input_shapes) + # We must set also ensure that the layer is marked as built, and the + # build shape is stored since user defined build functions may not + # be calling `super.build()` + Layer.build(self, input_shapes) + + # Optionally load weight values specified at layer instantiation. + if self._initial_weights is not None: + self.set_weights(self._initial_weights) + self._initial_weights = None + + def _symbolic_call(self, inputs): + input_shapes = tf.nest.map_structure(lambda x: x.shape, inputs) + output_shapes = self.compute_output_shape(input_shapes) + + def _make_placeholder_like(shape): + ph = backend.placeholder(shape=shape, dtype=self.dtype) + ph._keras_mask = None + return ph + + return tf.nest.map_structure(_make_placeholder_like, output_shapes) + + def _get_trainable_state(self): + """Get the `trainable` state of each sublayer. + + Returns: + A dict mapping all sublayers to their `trainable` value. + """ + layers = self._flatten_layers(include_self=False, recursive=False) + trainable_state = {self: self.trainable} + for l in layers: + trainable_state.update(l._get_trainable_state()) + return trainable_state + + def _set_trainable_state(self, trainable_state): + """Set `trainable` state for each sublayer.""" + if self in trainable_state: + self.trainable = trainable_state[self] + layers = self._flatten_layers(include_self=False, recursive=False) + for l in layers: + if l in trainable_state: + l._set_trainable_state(trainable_state) + + @property + def _obj_reference_counts(self): + """A dict counting the number of attributes referencing an object.""" + self._maybe_create_attribute( + "_obj_reference_counts_dict", + object_identity.ObjectIdentityDictionary(), + ) + return self._obj_reference_counts_dict + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _maybe_create_attribute(self, name, default_value): + """Create attribute (with the default value) if it hasn't been created. + + This is useful for fields that is used for tracking purpose, + _trainable_weights, or _layers. Note that user could create a layer + subclass and assign an internal field before invoking the + Layer.__init__(), the __setattr__() need to create the tracking fields + and __init__() need to not override them. + + Args: + name: String, the name of the attribute. + default_value: Object, the default value of the attribute. + """ + if not hasattr(self, name): + self.__setattr__(name, default_value) + + def __delattr__(self, name): + # For any super.__delattr__() call, we will directly use the + # implementation in Trackable and skip the behavior in AutoTrackable. + # The Layer was originally use Trackable as base class, the change of + # using Module as base class forced us to have AutoTrackable in the + # class hierarchy. + # + # TODO(b/180760306) Keeping the status quo of skipping _delattr__ and + # __setattr__ in AutoTrackable may be unsustainable. + existing_value = getattr(self, name, None) + + # If this value is replacing an existing object assigned to an + # attribute, we should clean it out to avoid leaking memory. First we + # check if there are other attributes referencing it. + reference_counts = self._obj_reference_counts + if existing_value not in reference_counts: + super(tf.__internal__.tracking.AutoTrackable, self).__delattr__( + name + ) + return + + reference_count = reference_counts[existing_value] + if reference_count > 1: + # There are other remaining references. We can't remove this object + # from _layers etc. + reference_counts[existing_value] = reference_count - 1 + super(tf.__internal__.tracking.AutoTrackable, self).__delattr__( + name + ) + return else: - return x - return tf.nest.map_structure(f, inputs) - else: - return inputs - - # _dtype used to be an attribute set in the constructor. We still expose it - # because some clients still use it. - # TODO(reedwm): Deprecate, then remove the _dtype property. - @property - def _dtype(self): - # This is equivalent to returning self.dtype . We do not return self.dtype - # as it would cause infinite recursion in a few subclasses, which override - # "dtype" to return self._dtype. - return self._dtype_policy.variable_dtype - - @_dtype.setter - def _dtype(self, value): - value = tf.as_dtype(value).name - self._set_dtype_policy(policy.Policy(value)) - - def _name_scope(self): # pylint: disable=method-hidden - return self.name - - def _init_set_name(self, name, zero_based=True): - if not name: - self._name = backend.unique_object_name( - generic_utils.to_snake_case(self.__class__.__name__), - zero_based=zero_based) - else: - self._name = name - - def _get_existing_metric(self, name=None): - match = [m for m in self._metrics if m.name == name] - if not match: - return - if len(match) > 1: - raise ValueError( - 'Please provide different names for the metrics you have added. ' - 'We found {} metrics with the name: "{}"'.format(len(match), name)) - return match[0] - - def _symbolic_add_metric(self, value, aggregation=None, name=None): - base_layer_utils.check_graph_consistency(value, method='add_metric') - match = self._get_existing_metric(name) - if aggregation is None: - # Iterate over the metrics and check if the given metric exists already. - # This can happen when a metric instance is created in subclassed model - # layer `__init__` and we have tracked that instance already in - # model.__setattr__. - if match: - result_tensor = value - metric_obj = match - elif hasattr(value, '_metric_obj'): - # We track the instance using the metadata on the result tensor. - result_tensor = value - metric_obj = result_tensor._metric_obj - self._metrics.append(metric_obj) - else: - raise ValueError( - 'We do not support adding an aggregated metric result tensor that ' - 'is not the output of a `tf.keras.metrics.Metric` metric instance. ' - 'Without having access to the metric instance we cannot reset the ' - 'state of a metric after every epoch during training. You can ' - 'create a `tf.keras.metrics.Metric` instance and pass the result ' - 'here or pass an un-aggregated result with `aggregation` parameter ' - 'set as `mean`. For example: `self.add_metric(tf.reduce_sum(inputs)' - ', name=\'mean_activation\', aggregation=\'mean\')`') - else: - # If a non-aggregated tensor is given as input (ie. `aggregation` is - # explicitly set to `mean`), we wrap the tensor in `Mean` metric. - if match: - result_tensor = match(value) - metric_obj = match - else: - metric_obj, result_tensor = base_layer_utils.create_mean_metric( - value, name) - self._metrics.append(metric_obj) - - def _handle_weight_regularization(self, name, variable, regularizer): - """Create lambdas which compute regularization losses.""" - - def _loss_for_variable(v): - """Creates a regularization loss `Tensor` for variable `v`.""" - with backend.name_scope(name + '/Regularizer'): - regularization = regularizer(v) - return regularization - - if base_layer_utils.is_split_variable(variable): - for v in variable: - self.add_loss(functools.partial(_loss_for_variable, v)) - else: - self.add_loss(functools.partial(_loss_for_variable, variable)) - - def _handle_activity_regularization(self, inputs, outputs): - # Apply activity regularization. - # Note that it should be applied every time the layer creates a new - # output, since it is output-specific. - if self._activity_regularizer: - output_list = tf.nest.flatten(outputs) - with backend.name_scope('ActivityRegularizer'): - for output in output_list: - activity_loss = tf.convert_to_tensor( - self._activity_regularizer(output)) - batch_size = tf.cast( - tf.compat.v1.shape(output)[0], activity_loss.dtype) - # Make activity regularization strength batch-agnostic. - mean_activity_loss = activity_loss / batch_size - base_layer_utils.check_graph_consistency( - mean_activity_loss, method='activity_regularizer') - self.add_loss(mean_activity_loss, inputs=inputs) - - def _set_mask_metadata(self, inputs, outputs, previous_mask): - flat_outputs = tf.nest.flatten(outputs) - - mask_already_computed = ( - getattr(self, '_compute_output_and_mask_jointly', False) or - all(getattr(x, '_keras_mask', None) is not None for x in flat_outputs)) - - # Only compute the mask if the Layer explicitly supports masking or has - # overridden `compute_mask`. - should_compute_mask = ( - hasattr(self, 'compute_mask') and - (self.supports_masking or - not getattr(self.compute_mask, '_is_default', False))) - - if mask_already_computed: - flat_masks = [getattr(x, '_keras_mask', None) for x in flat_outputs] - elif not should_compute_mask: - flat_masks = [None for _ in flat_outputs] - else: - output_masks = self.compute_mask(inputs, previous_mask) - # `compute_mask` can return a single `None` even when a Layer - # has multiple outputs. - if output_masks is None: - flat_masks = [None for _ in flat_outputs] - else: - flat_masks = tf.nest.flatten(output_masks) - - for output, mask in zip(flat_outputs, flat_masks): - try: - output._keras_mask = mask - except AttributeError: - # C Type such as np.ndarray. - pass - - if tf_utils.are_all_symbolic_tensors(flat_outputs): - for output in flat_outputs: - if getattr(output, '_keras_mask', None) is not None: - # Do not track masks for `TensorFlowOpLayer` construction. - output._keras_mask._keras_history_checked = True - - def _collect_input_masks(self, inputs, args, kwargs): - """Checks if `mask` argument was passed, else gathers mask from inputs.""" - if self._call_spec.arg_was_passed('mask', args, kwargs): - return self._call_spec.get_arg_value('mask', args, kwargs) - - if not self._should_compute_mask: - return None - - input_masks = tf.nest.map_structure( - lambda t: getattr(t, '_keras_mask', None), inputs) - if generic_utils.is_all_none(input_masks): - return None - return input_masks - - def _get_node_attribute_at_index(self, node_index, attr, attr_name): - """Private utility to retrieves an attribute (e.g. inputs) from a node. - - This is used to implement the methods: - - get_input_shape_at - - get_output_shape_at - - get_input_at - etc... - - Args: - node_index: Integer index of the node from which - to retrieve the attribute. - attr: Exact node attribute name. - attr_name: Human-readable attribute name, for error messages. - - Returns: - The layer's attribute `attr` at the node of index `node_index`. - - Raises: - RuntimeError: If the layer has no inbound nodes, or if called in Eager - mode. - ValueError: If the index provided does not match any node. - """ - if not self._inbound_nodes: - raise RuntimeError('The layer has never been called ' - 'and thus has no defined ' + attr_name + '.') - if not len(self._inbound_nodes) > node_index: - raise ValueError('Asked to get ' + attr_name + ' at node ' + - str(node_index) + ', but the layer has only ' + - str(len(self._inbound_nodes)) + ' inbound nodes.') - values = getattr(self._inbound_nodes[node_index], attr) - if isinstance(values, list) and len(values) == 1: - return values[0] - else: - return values - - def _maybe_build(self, inputs): - # Check input assumptions set before layer building, e.g. input rank. - if not self.built: - input_spec.assert_input_compatibility( - self.input_spec, inputs, self.name) - input_list = tf.nest.flatten(inputs) - if input_list and self._dtype_policy.compute_dtype is None: + # This is the last remaining reference. + del reference_counts[existing_value] + + super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) + + if isinstance(existing_value, Layer) or base_layer_utils.has_weights( + existing_value + ): + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + "_self_tracked_trackables", + [ + l + for l in self._self_tracked_trackables + if l is not existing_value + ], + ) + if isinstance(existing_value, tf.Variable): + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + "_trainable_weights", + [w for w in self._trainable_weights if w is not existing_value], + ) + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + "_non_trainable_weights", + [ + w + for w in self._non_trainable_weights + if w is not existing_value + ], + ) + + def __setattr__(self, name, value): + if ( + name == "_self_setattr_tracking" + or not getattr(self, "_self_setattr_tracking", True) + # Exclude @property.setters from tracking + or hasattr(self.__class__, name) + ): + try: + super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( + name, value + ) + except AttributeError: + raise AttributeError( + ( + 'Can\'t set the attribute "{}", likely because it ' + "conflicts with an existing read-only @property of the " + "object. Please choose a different name." + ).format(name) + ) + return + + # Keep track of trackable objects, for the needs of + # `Network.save_weights`. + value = tf.__internal__.tracking.sticky_attribute_assignment( + trackable=self, value=value, name=name + ) + + reference_counts = self._obj_reference_counts + reference_counts[value] = reference_counts.get(value, 0) + 1 + + # Clean out the old attribute, which clears _layers and + # _trainable_weights if necessary. try: - dtype = input_list[0].dtype.base_dtype.name + self.__delattr__(name) except AttributeError: - pass - else: - self._set_dtype_policy(policy.Policy(dtype)) - input_shapes = None - if all(hasattr(x, 'shape') for x in input_list): - input_shapes = tf.nest.map_structure(lambda x: x.shape, inputs) - # Only call `build` if the user has manually overridden the build method. - if not hasattr(self.build, '_is_default'): - # Any setup work performed only once should happen in an `init_scope` - # to avoid creating symbolic Tensors that will later pollute any eager - # operations. - with tf_utils.maybe_init_scope(self): - self.build(input_shapes) - # We must set also ensure that the layer is marked as built, and the build - # shape is stored since user defined build functions may not be calling - # `super.build()` - Layer.build(self, input_shapes) - - # Optionally load weight values specified at layer instantiation. - if self._initial_weights is not None: - self.set_weights(self._initial_weights) - self._initial_weights = None - - def _symbolic_call(self, inputs): - input_shapes = tf.nest.map_structure(lambda x: x.shape, inputs) - output_shapes = self.compute_output_shape(input_shapes) - - def _make_placeholder_like(shape): - ph = backend.placeholder(shape=shape, dtype=self.dtype) - ph._keras_mask = None - return ph - - return tf.nest.map_structure(_make_placeholder_like, output_shapes) - - def _get_trainable_state(self): - """Get the `trainable` state of each sublayer. - - Returns: - A dict mapping all sublayers to their `trainable` value. - """ - layers = self._flatten_layers(include_self=False, recursive=False) - trainable_state = {self: self.trainable} - for l in layers: - trainable_state.update(l._get_trainable_state()) - return trainable_state - - def _set_trainable_state(self, trainable_state): - """Set `trainable` state for each sublayer.""" - if self in trainable_state: - self.trainable = trainable_state[self] - layers = self._flatten_layers(include_self=False, recursive=False) - for l in layers: - if l in trainable_state: - l._set_trainable_state(trainable_state) - - @property - def _obj_reference_counts(self): - """A dictionary counting the number of attributes referencing an object.""" - self._maybe_create_attribute('_obj_reference_counts_dict', - object_identity.ObjectIdentityDictionary()) - return self._obj_reference_counts_dict - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _maybe_create_attribute(self, name, default_value): - """Create the attribute with the default value if it hasn't been created. - - This is useful for fields that is used for tracking purpose, - _trainable_weights, or _layers. Note that user could create a layer subclass - and assign an internal field before invoking the Layer.__init__(), the - __setattr__() need to create the tracking fields and __init__() need to not - override them. - - Args: - name: String, the name of the attribute. - default_value: Object, the default value of the attribute. - """ - if not hasattr(self, name): - self.__setattr__(name, default_value) - - def __delattr__(self, name): - # For any super.__delattr__() call, we will directly use the implementation - # in Trackable and skip the behavior in AutoTrackable. The Layer was - # originally use Trackable as base class, the change of using Module as base - # class forced us to have AutoTrackable in the class hierarchy. - # - # TODO(b/180760306) Keeping the status quo of skipping _delattr__ and - # __setattr__ in AutoTrackable may be unsustainable. - existing_value = getattr(self, name, None) - - # If this value is replacing an existing object assigned to an attribute, we - # should clean it out to avoid leaking memory. First we check if there are - # other attributes referencing it. - reference_counts = self._obj_reference_counts - if existing_value not in reference_counts: - super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) # pylint: disable=bad-super-call - return - - reference_count = reference_counts[existing_value] - if reference_count > 1: - # There are other remaining references. We can't remove this object from - # _layers etc. - reference_counts[existing_value] = reference_count - 1 - super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) # pylint: disable=bad-super-call - return - else: - # This is the last remaining reference. - del reference_counts[existing_value] - - super(tf.__internal__.tracking.AutoTrackable, self).__delattr__(name) # pylint: disable=bad-super-call - - if (isinstance(existing_value, Layer) - or base_layer_utils.has_weights(existing_value)): - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( # pylint: disable=bad-super-call - '_self_tracked_trackables', - [l for l in self._self_tracked_trackables if l is not existing_value]) - if isinstance(existing_value, tf.Variable): - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( # pylint: disable=bad-super-call - '_trainable_weights', - [w for w in self._trainable_weights if w is not existing_value]) - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( # pylint: disable=bad-super-call - '_non_trainable_weights', - [w for w in self._non_trainable_weights if w is not existing_value]) - - def __setattr__(self, name, value): - if (name == '_self_setattr_tracking' or - not getattr(self, '_self_setattr_tracking', True) or - # Exclude @property.setters from tracking - hasattr(self.__class__, name)): - try: + pass + + # Keep track of metric instance created in subclassed layer. + from keras import metrics as metrics_module + + for val in tf.nest.flatten(value): + if isinstance(val, metrics_module.Metric) and hasattr( + self, "_metrics" + ): + self._metrics.append(val) + + # TODO(scottzhu): Need to track Module object as well for weight + # tracking. Be careful about metric if it becomes a Module in future. + # Append value to self._layers if relevant + if getattr(self, "_auto_track_sub_layers", True) and ( + isinstance(value, Layer) or base_layer_utils.has_weights(value) + ): + self._maybe_create_attribute("_self_tracked_trackables", []) + # We need to check object identity to avoid de-duplicating empty + # container types which compare equal. + if not any( + (layer is value for layer in self._self_tracked_trackables) + ): + self._self_tracked_trackables.append(value) + if hasattr(value, "_use_resource_variables"): + # Legacy layers (V1 tf.layers) must always use + # resource variables. + value._use_resource_variables = True + + # Append value to list of trainable / non-trainable weights if relevant + # TODO(b/125122625): This won't pick up on any variables added to a + # list/dict after creation. + for val in tf.nest.flatten(value): + if not isinstance(val, tf.Variable): + continue + + # Users may add extra weights/variables simply by assigning them to + # attributes (invalid for graph networks) + self._maybe_create_attribute("_trainable_weights", []) + self._maybe_create_attribute("_non_trainable_weights", []) + if val.trainable: + if any(val is w for w in self._trainable_weights): + continue + self._trainable_weights.append(val) + else: + if any(val is w for w in self._non_trainable_weights): + continue + self._non_trainable_weights.append(val) + + backend.track_variable(val) + + # TODO(b/180760306) Skip the auto trackable from tf.Module to keep + # status quo. See the comment at __delattr__. super(tf.__internal__.tracking.AutoTrackable, self).__setattr__( - name, value) # pylint: disable=bad-super-call - except AttributeError: - raise AttributeError( - ('Can\'t set the attribute "{}", likely because it conflicts with ' - 'an existing read-only @property of the object. Please choose a ' - 'different name.').format(name)) - return - - # Keep track of trackable objects, for the needs of `Network.save_weights`. - value = tf.__internal__.tracking.sticky_attribute_assignment( - trackable=self, value=value, name=name) - - reference_counts = self._obj_reference_counts - reference_counts[value] = reference_counts.get(value, 0) + 1 - - # Clean out the old attribute, which clears _layers and _trainable_weights - # if necessary. - try: - self.__delattr__(name) - except AttributeError: - pass - - # Keep track of metric instance created in subclassed layer. - from keras import metrics as metrics_module # pylint: disable=g-import-not-at-top - for val in tf.nest.flatten(value): - if isinstance(val, metrics_module.Metric) and hasattr(self, '_metrics'): - self._metrics.append(val) - - # TODO(scottzhu): Need to track Module object as well for weight tracking. - # Be careful about metric if it becomes a Module in future. - # Append value to self._layers if relevant - if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or base_layer_utils.has_weights(value))): - self._maybe_create_attribute('_self_tracked_trackables', []) - # We need to check object identity to avoid de-duplicating empty - # container types which compare equal. - if not any((layer is value for layer in self._self_tracked_trackables)): - self._self_tracked_trackables.append(value) - if hasattr(value, '_use_resource_variables'): - # Legacy layers (V1 tf.layers) must always use - # resource variables. - value._use_resource_variables = True - - # Append value to list of trainable / non-trainable weights if relevant - # TODO(b/125122625): This won't pick up on any variables added to a - # list/dict after creation. - for val in tf.nest.flatten(value): - if not isinstance(val, tf.Variable): - continue - - # Users may add extra weights/variables - # simply by assigning them to attributes (invalid for graph networks) - self._maybe_create_attribute('_trainable_weights', []) - self._maybe_create_attribute('_non_trainable_weights', []) - if val.trainable: - if any(val is w for w in self._trainable_weights): - continue - self._trainable_weights.append(val) - else: - if any(val is w for w in self._non_trainable_weights): - continue - self._non_trainable_weights.append(val) - - backend.track_variable(val) - - # TODO(b/180760306) Skip the auto trackable from tf.Module to keep status - # quo. See the comment at __delattr__. - super(tf.__internal__.tracking.AutoTrackable, self).__setattr__(name, value) # pylint: disable=bad-super-call - - # This is a hack so that the is_layer (within - # training/trackable/layer_utils.py) check doesn't get the weights attr. - # TODO(b/110718070): Remove when fixed. - def _is_layer(self): - return True - - @property - @layer_utils.cached_per_instance - def _should_compute_mask(self): - return ('mask' in self._call_spec.arg_names or - getattr(self, 'compute_mask', None) is not None) - - def _dedup_weights(self, weights): - """Dedupe weights while maintaining order as much as possible.""" - output, seen_ids = [], set() - for w in weights: - if id(w) not in seen_ids: - output.append(w) - # Track the Variable's identity to avoid __eq__ issues. - seen_ids.add(id(w)) - - return output - - # SavedModel properties. Please see keras/saving/saved_model for details. - - @property - def _trackable_saved_model_saver(self): - return layer_serialization.LayerSavedModelSaver(self) - - @property - def _object_identifier(self): - return self._trackable_saved_model_saver.object_identifier - - @property - def _tracking_metadata(self): - return self._trackable_saved_model_saver.tracking_metadata - - def _trackable_children(self, save_type='checkpoint', **kwargs): - if save_type == 'savedmodel': - cache = kwargs['cache'] - # TODO(b/213628533): This must be called before super() to ensure - # that any input shape changes are applied before getting the config of - # the model. - children = self._trackable_saved_model_saver.trackable_children(cache) - else: - children = {} - children.update(super()._trackable_children(save_type, **kwargs)) - return children - - def __getstate__(self): - # Override to support `copy.deepcopy` and pickling. - # Thread-local objects cannot be copied in Python 3, so pop these. - # Thread-local objects are used to cache losses in MirroredStrategy, and - # so shouldn't be copied. - state = self.__dict__.copy() - state.pop('_thread_local', None) - return state - - def __setstate__(self, state): - state['_thread_local'] = threading.local() - # Bypass Trackable logic as `__dict__` already contains this info. - object.__setattr__(self, '__dict__', state) + name, value + ) + + # This is a hack so that the is_layer (within + # training/trackable/layer_utils.py) check doesn't get the weights attr. + # TODO(b/110718070): Remove when fixed. + def _is_layer(self): + return True + + @property + @layer_utils.cached_per_instance + def _should_compute_mask(self): + return ( + "mask" in self._call_spec.arg_names + or getattr(self, "compute_mask", None) is not None + ) + + def _dedup_weights(self, weights): + """Dedupe weights while maintaining order as much as possible.""" + output, seen_ids = [], set() + for w in weights: + if id(w) not in seen_ids: + output.append(w) + # Track the Variable's identity to avoid __eq__ issues. + seen_ids.add(id(w)) + + return output + + # SavedModel properties. Please see keras/saving/saved_model for details. + + @property + def _trackable_saved_model_saver(self): + return layer_serialization.LayerSavedModelSaver(self) + + @property + def _object_identifier(self): + return self._trackable_saved_model_saver.object_identifier + + @property + def _tracking_metadata(self): + return self._trackable_saved_model_saver.tracking_metadata + + def _trackable_children(self, save_type="checkpoint", **kwargs): + if save_type == "savedmodel": + cache = kwargs["cache"] + # TODO(b/213628533): This must be called before super() to ensure + # that any input shape changes are applied before getting the config + # of the model. + children = self._trackable_saved_model_saver.trackable_children( + cache + ) + else: + children = {} + children.update(super()._trackable_children(save_type, **kwargs)) + return children + + def __getstate__(self): + # Override to support `copy.deepcopy` and pickling. + # Thread-local objects cannot be copied in Python 3, so pop these. + # Thread-local objects are used to cache losses in MirroredStrategy, and + # so shouldn't be copied. + state = self.__dict__.copy() + state.pop("_thread_local", None) + return state + + def __setstate__(self, state): + state["_thread_local"] = threading.local() + # Bypass Trackable logic as `__dict__` already contains this info. + object.__setattr__(self, "__dict__", state) diff --git a/keras/engine/base_preprocessing_layer.py b/keras/engine/base_preprocessing_layer.py index 0df5fec54506..bdd32405ee0f 100644 --- a/keras/engine/base_preprocessing_layer.py +++ b/keras/engine/base_preprocessing_layer.py @@ -16,286 +16,296 @@ import abc +import tensorflow.compat.v2 as tf + from keras.engine import data_adapter from keras.engine.base_layer import Layer from keras.utils import version_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.eager import context from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls - keras_kpl_gauge = tf.__internal__.monitoring.BoolGauge( - '/tensorflow/api/keras/layers/preprocessing', - 'keras preprocessing layers usage', 'method') + "/tensorflow/api/keras/layers/preprocessing", + "keras preprocessing layers usage", + "method", +) -@keras_export('keras.layers.experimental.preprocessing.PreprocessingLayer') +@keras_export("keras.layers.experimental.preprocessing.PreprocessingLayer") class PreprocessingLayer(Layer, metaclass=abc.ABCMeta): - """Base class for Preprocessing Layers. - - **Don't use this class directly: it's an abstract base class!** You may - be looking for one of the many built-in - [preprocessing layers](https://keras.io/guides/preprocessing_layers/) - instead. - - Preprocessing layers are layers whose state gets computed before model - training starts. They do not get updated during training. - Most preprocessing layers implement an `adapt()` method for state computation. + """Base class for Preprocessing Layers. - The `PreprocessingLayer` class is the base class you would subclass to - implement your own preprocessing layers. - """ - _must_restore_from_config = True + **Don't use this class directly: it's an abstract base class!** You may + be looking for one of the many built-in + [preprocessing layers](https://keras.io/guides/preprocessing_layers/) + instead. - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._is_compiled = False - self._is_adapted = False + Preprocessing layers are layers whose state gets computed before model + training starts. They do not get updated during training. Most + preprocessing layers implement an `adapt()` method for state computation. - # Sets `is_adapted=False` when `reset_state` is called. - self._reset_state_impl = self.reset_state - self.reset_state = self._reset_state_wrapper - - self._adapt_function = None - - @property - def is_adapted(self): - """Whether the layer has been fit to data already.""" - return self._is_adapted - - @doc_controls.do_not_generate_docs - def update_state(self, data): - """Accumulates statistics for the preprocessing layer. - - Arguments: - data: A mini-batch of inputs to the layer. + The `PreprocessingLayer` class is the base class you would subclass to + implement your own preprocessing layers. """ - raise NotImplementedError - - @doc_controls.do_not_generate_docs - def reset_state(self): # pylint: disable=method-hidden - """Resets the statistics of the preprocessing layer.""" - raise NotImplementedError - @doc_controls.do_not_generate_docs - def finalize_state(self): - """Finalize the statistics for the preprocessing layer. - - This method is called at the end of `adapt` or after restoring a serialized - preprocessing layer's state. This method handles any one-time operations - that should occur on the layer's state before `Layer.__call__`. - """ - pass - - @doc_controls.do_not_generate_docs - def make_adapt_function(self): - """Creates a function to execute one step of `adapt`. - - This method can be overridden to support custom adapt logic. - This method is called by `PreprocessingLayer.adapt`. - - Typically, this method directly controls `tf.function` settings, - and delegates the actual state update logic to - `PreprocessingLayer.update_state`. - - This function is cached the first time `PreprocessingLayer.adapt` - is called. The cache is cleared whenever `PreprocessingLayer.compile` - is called. - - Returns: - Function. The function created by this method should accept a - `tf.data.Iterator`, retrieve a batch, and update the state of the - layer. - """ - if self._adapt_function is not None: - return self._adapt_function - - def adapt_step(iterator): - data = next(iterator) - self._adapt_maybe_build(data) - self.update_state(data) - - if self._steps_per_execution.numpy().item() == 1: - adapt_fn = adapt_step - else: - - def adapt_fn(iterator): - for _ in tf.range(self._steps_per_execution): - adapt_step(iterator) - - if not self._run_eagerly: - adapt_fn = tf.function(adapt_fn) - - self._adapt_function = adapt_fn - return self._adapt_function - - def compile(self, run_eagerly=None, steps_per_execution=None): - """Configures the layer for `adapt`. - - Arguments: - run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s logic - will not be wrapped in a `tf.function`. Recommended to leave this as - `None` unless your `Model` cannot be run inside a `tf.function`. - steps_per_execution: Int. Defaults to 1. The number of batches to run - during each `tf.function` call. Running multiple batches inside a - single `tf.function` call can greatly improve performance on TPUs or - small models with a large Python overhead. - """ - if steps_per_execution is None: - steps_per_execution = 1 - self._configure_steps_per_execution(steps_per_execution) - - if run_eagerly is None: - run_eagerly = self.dynamic - self._run_eagerly = run_eagerly - - self._is_compiled = True - - def adapt(self, data, batch_size=None, steps=None): - """Fits the state of the preprocessing layer to the data being passed. - - After calling `adapt` on a layer, a preprocessing layer's state will not - update during training. In order to make preprocessing layers efficient in - any distribution context, they are kept constant with respect to any - compiled `tf.Graph`s that call the layer. This does not affect the layer use - when adapting each layer only once, but if you adapt a layer multiple times - you will need to take care to re-compile any compiled functions as follows: - - * If you are adding a preprocessing layer to a `keras.Model`, you need to - call `model.compile` after each subsequent call to `adapt`. - * If you are calling a preprocessing layer inside `tf.data.Dataset.map`, - you should call `map` again on the input `tf.data.Dataset` after each - `adapt`. - * If you are using a `tf.function` directly which calls a preprocessing - layer, you need to call `tf.function` again on your callable after - each subsequent call to `adapt`. - - `tf.keras.Model` example with multiple adapts: - - >>> layer = tf.keras.layers.Normalization( - ... axis=None) - >>> layer.adapt([0, 2]) - >>> model = tf.keras.Sequential(layer) - >>> model.predict([0, 1, 2]) - array([-1., 0., 1.], dtype=float32) - >>> layer.adapt([-1, 1]) - >>> model.compile() # This is needed to re-compile model.predict! - >>> model.predict([0, 1, 2]) - array([0., 1., 2.], dtype=float32) - - `tf.data.Dataset` example with multiple adapts: - - >>> layer = tf.keras.layers.Normalization( - ... axis=None) - >>> layer.adapt([0, 2]) - >>> input_ds = tf.data.Dataset.range(3) - >>> normalized_ds = input_ds.map(layer) - >>> list(normalized_ds.as_numpy_iterator()) - [array([-1.], dtype=float32), - array([0.], dtype=float32), - array([1.], dtype=float32)] - >>> layer.adapt([-1, 1]) - >>> normalized_ds = input_ds.map(layer) # Re-map over the input dataset. - >>> list(normalized_ds.as_numpy_iterator()) - [array([0.], dtype=float32), - array([1.], dtype=float32), - array([2.], dtype=float32)] - - `adapt()` is meant only as a single machine utility to compute layer state. - To analyze a dataset that cannot fit on a single machine, see - [Tensorflow Transform](https://www.tensorflow.org/tfx/transform/get_started) - for a multi-machine, map-reduce solution. - - Arguments: - data: The data to train on. It can be passed either as a tf.data - Dataset, or as a numpy array. - batch_size: Integer or `None`. - Number of samples per state update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - steps: Integer or `None`. - Total number of steps (batches of samples) - When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps' is None, the epoch will run until - the input dataset is exhausted. When passing an infinitely - repeating dataset, you must specify the `steps` argument. This - argument is not supported with array inputs. - """ - _disallow_inside_tf_function('adapt') - if not version_utils.should_use_v2(): - raise RuntimeError('`adapt` is only supported in tensorflow v2.') # pylint: disable=g-doc-exception - if not self._is_compiled: - self.compile() # Compile with defaults. - if self.built: - self.reset_state() - data_handler = data_adapter.DataHandler( - data, - batch_size=batch_size, - steps_per_epoch=steps, - epochs=1, - steps_per_execution=self._steps_per_execution, - distribute=False) - self._adapt_function = self.make_adapt_function() - for _, iterator in data_handler.enumerate_epochs(): - with data_handler.catch_stop_iteration(): - for _ in data_handler.steps(): - self._adapt_function(iterator) - if data_handler.should_sync: - context.async_wait() - self.finalize_state() - self._is_adapted = True - - def _reset_state_wrapper(self): - """Calls `reset_state` and sets `adapted` to `False`.""" - self._reset_state_impl() - self._is_adapted = False - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _configure_steps_per_execution(self, steps_per_execution): - self._steps_per_execution = tf.Variable( - steps_per_execution, - dtype='int64', - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - - # TODO(omalleyt): Unify this logic with `Layer._maybe_build`. - def _adapt_maybe_build(self, data): - if not self.built: - try: - # If this is a Numpy array or tensor, we can get shape from .shape. - # If not, an attribute error will be thrown. - data_shape = data.shape - data_shape_nones = tuple([None] * len(data.shape)) - except AttributeError: - # The input has an unknown number of dimensions. - data_shape = None - data_shape_nones = None - - # TODO (b/159261555): move this to base layer build. - batch_input_shape = getattr(self, '_batch_input_shape', None) - if batch_input_shape is None: - # Set the number of dimensions. - self._batch_input_shape = data_shape_nones - self.build(data_shape) - self.built = True + _must_restore_from_config = True + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._is_compiled = False + self._is_adapted = False + + # Sets `is_adapted=False` when `reset_state` is called. + self._reset_state_impl = self.reset_state + self.reset_state = self._reset_state_wrapper + + self._adapt_function = None + + @property + def is_adapted(self): + """Whether the layer has been fit to data already.""" + return self._is_adapted + + @doc_controls.do_not_generate_docs + def update_state(self, data): + """Accumulates statistics for the preprocessing layer. + + Arguments: + data: A mini-batch of inputs to the layer. + """ + raise NotImplementedError + + @doc_controls.do_not_generate_docs + def reset_state(self): + """Resets the statistics of the preprocessing layer.""" + raise NotImplementedError + + @doc_controls.do_not_generate_docs + def finalize_state(self): + """Finalize the statistics for the preprocessing layer. + + This method is called at the end of `adapt` or after restoring a + serialized preprocessing layer's state. This method handles any one-time + operations that should occur on the layer's state before + `Layer.__call__`. + """ + pass + + @doc_controls.do_not_generate_docs + def make_adapt_function(self): + """Creates a function to execute one step of `adapt`. + + This method can be overridden to support custom adapt logic. + This method is called by `PreprocessingLayer.adapt`. + + Typically, this method directly controls `tf.function` settings, + and delegates the actual state update logic to + `PreprocessingLayer.update_state`. + + This function is cached the first time `PreprocessingLayer.adapt` + is called. The cache is cleared whenever `PreprocessingLayer.compile` + is called. + + Returns: + Function. The function created by this method should accept a + `tf.data.Iterator`, retrieve a batch, and update the state of the + layer. + """ + if self._adapt_function is not None: + return self._adapt_function + + def adapt_step(iterator): + data = next(iterator) + self._adapt_maybe_build(data) + self.update_state(data) + + if self._steps_per_execution.numpy().item() == 1: + adapt_fn = adapt_step + else: + + def adapt_fn(iterator): + for _ in tf.range(self._steps_per_execution): + adapt_step(iterator) + + if not self._run_eagerly: + adapt_fn = tf.function(adapt_fn) + + self._adapt_function = adapt_fn + return self._adapt_function + + def compile(self, run_eagerly=None, steps_per_execution=None): + """Configures the layer for `adapt`. + + Arguments: + run_eagerly: Bool. If `True`, this `Model`'s + logic will not be wrapped in a `tf.function`. Recommended to leave + this as `None` unless your `Model` cannot be run inside a + `tf.function`. Defaults to `False`. + steps_per_execution: Int. The number of batches to run + during each `tf.function` call. Running multiple batches inside a + single `tf.function` call can greatly improve performance on TPUs or + small models with a large Python overhead. Defaults to `1`. + """ + if steps_per_execution is None: + steps_per_execution = 1 + self._configure_steps_per_execution(steps_per_execution) + + if run_eagerly is None: + run_eagerly = self.dynamic + self._run_eagerly = run_eagerly + + self._is_compiled = True + + def adapt(self, data, batch_size=None, steps=None): + """Fits the state of the preprocessing layer to the data being passed. + + After calling `adapt` on a layer, a preprocessing layer's state will not + update during training. In order to make preprocessing layers efficient + in any distribution context, they are kept constant with respect to any + compiled `tf.Graph`s that call the layer. This does not affect the layer + use when adapting each layer only once, but if you adapt a layer + multiple times you will need to take care to re-compile any compiled + functions as follows: + + * If you are adding a preprocessing layer to a `keras.Model`, you need + to call `model.compile` after each subsequent call to `adapt`. + * If you are calling a preprocessing layer inside + `tf.data.Dataset.map`, you should call `map` again on the input + `tf.data.Dataset` after each `adapt`. + * If you are using a `tf.function` directly which calls a preprocessing + layer, you need to call `tf.function` again on your callable after + each subsequent call to `adapt`. + + `tf.keras.Model` example with multiple adapts: + + >>> layer = tf.keras.layers.Normalization( + ... axis=None) + >>> layer.adapt([0, 2]) + >>> model = tf.keras.Sequential(layer) + >>> model.predict([0, 1, 2]) + array([-1., 0., 1.], dtype=float32) + >>> layer.adapt([-1, 1]) + >>> model.compile() # This is needed to re-compile model.predict! + >>> model.predict([0, 1, 2]) + array([0., 1., 2.], dtype=float32) + + `tf.data.Dataset` example with multiple adapts: + + >>> layer = tf.keras.layers.Normalization( + ... axis=None) + >>> layer.adapt([0, 2]) + >>> input_ds = tf.data.Dataset.range(3) + >>> normalized_ds = input_ds.map(layer) + >>> list(normalized_ds.as_numpy_iterator()) + [array([-1.], dtype=float32), + array([0.], dtype=float32), + array([1.], dtype=float32)] + >>> layer.adapt([-1, 1]) + >>> normalized_ds = input_ds.map(layer) # Re-map over the input dataset. + >>> list(normalized_ds.as_numpy_iterator()) + [array([0.], dtype=float32), + array([1.], dtype=float32), + array([2.], dtype=float32)] + + `adapt()` is meant only as a single machine utility to compute layer + state. To analyze a dataset that cannot fit on a single machine, see + [Tensorflow Transform]( + https://www.tensorflow.org/tfx/transform/get_started) + for a multi-machine, map-reduce solution. + + Arguments: + data: The data to train on. It can be passed either as a tf.data + Dataset, or as a numpy array. + batch_size: Integer or `None`. + Number of samples per state update. If unspecified, + `batch_size` will default to 32. Do not specify the + `batch_size` if your data is in the form of datasets, + generators, or `keras.utils.Sequence` instances (since they + generate batches). + steps: Integer or `None`. + Total number of steps (batches of samples) + When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps' is None, the epoch will run until + the input dataset is exhausted. When passing an infinitely + repeating dataset, you must specify the `steps` argument. This + argument is not supported with array inputs. + """ + _disallow_inside_tf_function("adapt") + if not version_utils.should_use_v2(): + raise RuntimeError("`adapt` is only supported in tensorflow v2.") + if not self._is_compiled: + self.compile() # Compile with defaults. + if self.built: + self.reset_state() + data_handler = data_adapter.DataHandler( + data, + batch_size=batch_size, + steps_per_epoch=steps, + epochs=1, + steps_per_execution=self._steps_per_execution, + distribute=False, + ) + self._adapt_function = self.make_adapt_function() + for _, iterator in data_handler.enumerate_epochs(): + with data_handler.catch_stop_iteration(): + for _ in data_handler.steps(): + self._adapt_function(iterator) + if data_handler.should_sync: + context.async_wait() + self.finalize_state() + self._is_adapted = True + + def _reset_state_wrapper(self): + """Calls `reset_state` and sets `adapted` to `False`.""" + self._reset_state_impl() + self._is_adapted = False + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _configure_steps_per_execution(self, steps_per_execution): + self._steps_per_execution = tf.Variable( + steps_per_execution, + dtype="int64", + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + ) + + # TODO(omalleyt): Unify this logic with `Layer._maybe_build`. + def _adapt_maybe_build(self, data): + if not self.built: + try: + # If this is a Numpy array or tensor, we can get shape from + # .shape. If not, an attribute error will be thrown. + data_shape = data.shape + data_shape_nones = tuple([None] * len(data.shape)) + except AttributeError: + # The input has an unknown number of dimensions. + data_shape = None + data_shape_nones = None + + # TODO (b/159261555): move this to base layer build. + batch_input_shape = getattr(self, "_batch_input_shape", None) + if batch_input_shape is None: + # Set the number of dimensions. + self._batch_input_shape = data_shape_nones + self.build(data_shape) + self.built = True def _disallow_inside_tf_function(method_name): - """Disallow calling a method inside a `tf.function`.""" - if tf.inside_function(): - error_msg = ( - 'Detected a call to `PreprocessingLayer.{method_name}` inside a ' - '`tf.function`. `PreprocessingLayer.{method_name} is a high-level ' - 'endpoint that manages its own `tf.function`. Please move the call ' - 'to `PreprocessingLayer.{method_name}` outside of all enclosing ' - '`tf.function`s. Note that you can call a `PreprocessingLayer` ' - 'directly on `Tensor`s inside a `tf.function` like: `layer(x)`, ' - 'or update its state like: `layer.update_state(x)`.').format( - method_name=method_name) - raise RuntimeError(error_msg) + """Disallow calling a method inside a `tf.function`.""" + if tf.inside_function(): + error_msg = ( + "Detected a call to `PreprocessingLayer.{method_name}` inside a " + "`tf.function`. `PreprocessingLayer.{method_name} is a high-level " + "endpoint that manages its own `tf.function`. Please move the call " + "to `PreprocessingLayer.{method_name}` outside of all enclosing " + "`tf.function`s. Note that you can call a `PreprocessingLayer` " + "directly on `Tensor`s inside a `tf.function` like: `layer(x)`, " + "or update its state like: `layer.update_state(x)`." + ).format(method_name=method_name) + raise RuntimeError(error_msg) diff --git a/keras/engine/base_preprocessing_layer_test.py b/keras/engine/base_preprocessing_layer_test.py index f065c9325d38..af4344fd5ea6 100644 --- a/keras/engine/base_preprocessing_layer_test.py +++ b/keras/engine/base_preprocessing_layer_test.py @@ -16,223 +16,235 @@ import os +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.engine import base_preprocessing_layer from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf # Define a test-only implementation of BasePreprocessingLayer to validate # its correctness directly. class AddingPreprocessingLayer(base_preprocessing_layer.PreprocessingLayer): + def build(self, input_shape): + super().build(input_shape) + self.sum = tf.Variable(0.0, dtype=tf.float32) - def build(self, input_shape): - super().build(input_shape) - self.sum = tf.Variable(0., dtype=tf.float32) + def update_state(self, data): + self.sum.assign_add(tf.reduce_sum(tf.cast(data, tf.float32))) - def update_state(self, data): - self.sum.assign_add(tf.reduce_sum(tf.cast(data, tf.float32))) + def reset_state(self): + self.sum.assign(0.0) - def reset_state(self): # pylint: disable=method-hidden - self.sum.assign(0.) + def set_total(self, sum_value): + """This is an example of how a subclass would implement a direct setter. - def set_total(self, sum_value): - """This is an example of how a subclass would implement a direct setter. + Args: + sum_value: The total to set. + """ + self.sum.assign(sum_value) - Args: - sum_value: The total to set. - """ - self.sum.assign(sum_value) - - def call(self, inputs): - return inputs + self.sum + def call(self, inputs): + return inputs + self.sum @test_combinations.run_all_keras_modes(always_skip_v1=True) class PreprocessingLayerTest(test_combinations.TestCase): - - def test_adapt_bad_input_fails(self): - """Test that non-Dataset/Numpy inputs cause a reasonable error.""" - input_dataset = {"foo": 0} - - layer = AddingPreprocessingLayer() - if tf.executing_eagerly(): - with self.assertRaisesRegex(ValueError, "Failed to find data adapter"): + def test_adapt_bad_input_fails(self): + """Test that non-Dataset/Numpy inputs cause a reasonable error.""" + input_dataset = {"foo": 0} + + layer = AddingPreprocessingLayer() + if tf.executing_eagerly(): + with self.assertRaisesRegex( + ValueError, "Failed to find data adapter" + ): + layer.adapt(input_dataset) + else: + with self.assertRaisesRegex(ValueError, "requires a"): + layer.adapt(input_dataset) + + def test_adapt_infinite_dataset_fails(self): + """Test that preproc layers fail if an infinite dataset is passed.""" + input_dataset = tf.data.Dataset.from_tensor_slices( + np.array([[1], [2], [3], [4], [5], [0]]) + ).repeat() + + layer = AddingPreprocessingLayer() + if tf.executing_eagerly(): + with self.assertRaisesRegex(ValueError, "infinite dataset"): + layer.adapt(input_dataset) + else: + with self.assertRaisesRegex( + ValueError, ".*infinite number of elements.*" + ): + layer.adapt(input_dataset) + + def test_setter_update(self): + """Test the prototyped setter method.""" + input_data = keras.Input(shape=(1,)) + layer = AddingPreprocessingLayer() + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() + + layer.set_total(15) + + self.assertAllEqual([[16], [17], [18]], model.predict([1.0, 2.0, 3.0])) + + def test_pre_build_adapt_update_numpy(self): + """Test that preproc layers can adapt() before build() is called.""" + input_dataset = np.array([1, 2, 3, 4, 5]) + + layer = AddingPreprocessingLayer() layer.adapt(input_dataset) - else: - with self.assertRaisesRegex(ValueError, "requires a"): - layer.adapt(input_dataset) - - def test_adapt_infinite_dataset_fails(self): - """Test that preproc layers fail if an infinite dataset is passed.""" - input_dataset = tf.data.Dataset.from_tensor_slices( - np.array([[1], [2], [3], [4], [5], [0]])).repeat() - - layer = AddingPreprocessingLayer() - if tf.executing_eagerly(): - with self.assertRaisesRegex(ValueError, "infinite dataset"): - layer.adapt(input_dataset) - else: - with self.assertRaisesRegex(ValueError, - ".*infinite number of elements.*"): - layer.adapt(input_dataset) - - def test_setter_update(self): - """Test the prototyped setter method.""" - input_data = keras.Input(shape=(1,)) - layer = AddingPreprocessingLayer() - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - - layer.set_total(15) - - self.assertAllEqual([[16], [17], [18]], model.predict([1., 2., 3.])) - - def test_pre_build_adapt_update_numpy(self): - """Test that preproc layers can adapt() before build() is called.""" - input_dataset = np.array([1, 2, 3, 4, 5]) - - layer = AddingPreprocessingLayer() - layer.adapt(input_dataset) - - input_data = keras.Input(shape=(1,)) - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - self.assertAllEqual([[16], [17], [18]], model.predict([1., 2., 3.])) + input_data = keras.Input(shape=(1,)) + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() - def test_post_build_adapt_update_numpy(self): - """Test that preproc layers can adapt() after build() is called.""" - input_dataset = np.array([1, 2, 3, 4, 5]) + self.assertAllEqual([[16], [17], [18]], model.predict([1.0, 2.0, 3.0])) - input_data = keras.Input(shape=(1,)) - layer = AddingPreprocessingLayer() - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() + def test_post_build_adapt_update_numpy(self): + """Test that preproc layers can adapt() after build() is called.""" + input_dataset = np.array([1, 2, 3, 4, 5]) - layer.adapt(input_dataset) + input_data = keras.Input(shape=(1,)) + layer = AddingPreprocessingLayer() + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() - self.assertAllEqual([[16], [17], [18]], model.predict([1., 2., 3.])) - - def test_pre_build_adapt_update_dataset(self): - """Test that preproc layers can adapt() before build() is called.""" - input_dataset = tf.data.Dataset.from_tensor_slices( - np.array([[1], [2], [3], [4], [5], [0]])) - - layer = AddingPreprocessingLayer() - layer.adapt(input_dataset) - - input_data = keras.Input(shape=(1,)) - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - - self.assertAllEqual([[16], [17], [18]], model.predict([1., 2., 3.])) - - def test_post_build_adapt_update_dataset(self): - """Test that preproc layers can adapt() after build() is called.""" - input_dataset = tf.data.Dataset.from_tensor_slices( - np.array([[1], [2], [3], [4], [5], [0]])) - - input_data = keras.Input(shape=(1,)) - layer = AddingPreprocessingLayer() - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - - layer.adapt(input_dataset) - - self.assertAllEqual([[16], [17], [18]], model.predict([1., 2., 3.])) - - def test_weight_based_state_transfer(self): - """Test that preproc layers can transfer state via get/set weights..""" - - def get_model(): - input_data = keras.Input(shape=(1,)) - layer = AddingPreprocessingLayer() - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - return (model, layer) - - input_dataset = np.array([1, 2, 3, 4, 5]) - model, layer = get_model() - layer.adapt(input_dataset) - self.assertAllEqual([[16], [17], [18]], model.predict([1., 2., 3.])) - - # Create a new model and verify it has no state carryover. - weights = model.get_weights() - model_2, _ = get_model() - self.assertAllEqual([[1], [2], [3]], model_2.predict([1., 2., 3.])) + layer.adapt(input_dataset) - # Transfer state from model to model_2 via get/set weights. - model_2.set_weights(weights) - self.assertAllEqual([[16], [17], [18]], model_2.predict([1., 2., 3.])) + self.assertAllEqual([[16], [17], [18]], model.predict([1.0, 2.0, 3.0])) - def test_loading_without_providing_class_fails(self): - input_data = keras.Input(shape=(1,)) - layer = AddingPreprocessingLayer() - output = layer(input_data) - model = keras.Model(input_data, output) + def test_pre_build_adapt_update_dataset(self): + """Test that preproc layers can adapt() before build() is called.""" + input_dataset = tf.data.Dataset.from_tensor_slices( + np.array([[1], [2], [3], [4], [5], [0]]) + ) - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + layer = AddingPreprocessingLayer() + layer.adapt(input_dataset) - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") + input_data = keras.Input(shape=(1,)) + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() - with self.assertRaisesRegex(ValueError, - "Unknown layer: AddingPreprocessingLayer"): - _ = keras.models.load_model(output_path) + self.assertAllEqual([[16], [17], [18]], model.predict([1.0, 2.0, 3.0])) - def test_adapt_sets_input_shape_rank(self): - """Check that `.adapt()` sets the `input_shape`'s rank.""" - # Shape: (3,1,2) - adapt_dataset = np.array([[[1., 2.]], [[3., 4.]], [[5., 6.]]], - dtype=np.float32) + def test_post_build_adapt_update_dataset(self): + """Test that preproc layers can adapt() after build() is called.""" + input_dataset = tf.data.Dataset.from_tensor_slices( + np.array([[1], [2], [3], [4], [5], [0]]) + ) - layer = AddingPreprocessingLayer() - layer.adapt(adapt_dataset) + input_data = keras.Input(shape=(1,)) + layer = AddingPreprocessingLayer() + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() - input_dataset = np.array([[[1., 2.], [3., 4.]], [[3., 4.], [5., 6.]]], - dtype=np.float32) - layer(input_dataset) + layer.adapt(input_dataset) - model = keras.Sequential([layer]) - self.assertTrue(model.built) - self.assertEqual(model.input_shape, (None, None, None)) + self.assertAllEqual([[16], [17], [18]], model.predict([1.0, 2.0, 3.0])) - def test_adapt_doesnt_overwrite_input_shape(self): - """Check that `.adapt()` doesn't change the `input_shape`.""" - # Shape: (3, 1, 2) - adapt_dataset = np.array([[[1., 2.]], [[3., 4.]], [[5., 6.]]], - dtype=np.float32) + def test_weight_based_state_transfer(self): + """Test that preproc layers can transfer state via get/set weights..""" - layer = AddingPreprocessingLayer(input_shape=[1, 2]) - layer.adapt(adapt_dataset) + def get_model(): + input_data = keras.Input(shape=(1,)) + layer = AddingPreprocessingLayer() + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() + return (model, layer) - model = keras.Sequential([layer]) - self.assertTrue(model.built) - self.assertEqual(model.input_shape, (None, 1, 2)) + input_dataset = np.array([1, 2, 3, 4, 5]) + model, layer = get_model() + layer.adapt(input_dataset) + self.assertAllEqual([[16], [17], [18]], model.predict([1.0, 2.0, 3.0])) + + # Create a new model and verify it has no state carryover. + weights = model.get_weights() + model_2, _ = get_model() + self.assertAllEqual([[1], [2], [3]], model_2.predict([1.0, 2.0, 3.0])) + + # Transfer state from model to model_2 via get/set weights. + model_2.set_weights(weights) + self.assertAllEqual( + [[16], [17], [18]], model_2.predict([1.0, 2.0, 3.0]) + ) + + def test_loading_without_providing_class_fails(self): + input_data = keras.Input(shape=(1,)) + layer = AddingPreprocessingLayer() + output = layer(input_data) + model = keras.Model(input_data, output) + + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + + with self.assertRaisesRegex( + ValueError, "Unknown layer: 'AddingPreprocessingLayer'" + ): + _ = keras.models.load_model(output_path) + + def test_adapt_sets_input_shape_rank(self): + """Check that `.adapt()` sets the `input_shape`'s rank.""" + # Shape: (3,1,2) + adapt_dataset = np.array( + [[[1.0, 2.0]], [[3.0, 4.0]], [[5.0, 6.0]]], dtype=np.float32 + ) + + layer = AddingPreprocessingLayer() + layer.adapt(adapt_dataset) + + input_dataset = np.array( + [[[1.0, 2.0], [3.0, 4.0]], [[3.0, 4.0], [5.0, 6.0]]], + dtype=np.float32, + ) + layer(input_dataset) + + model = keras.Sequential([layer]) + self.assertTrue(model.built) + self.assertEqual(model.input_shape, (None, None, None)) + + def test_adapt_doesnt_overwrite_input_shape(self): + """Check that `.adapt()` doesn't change the `input_shape`.""" + # Shape: (3, 1, 2) + adapt_dataset = np.array( + [[[1.0, 2.0]], [[3.0, 4.0]], [[5.0, 6.0]]], dtype=np.float32 + ) + + layer = AddingPreprocessingLayer(input_shape=[1, 2]) + layer.adapt(adapt_dataset) + + model = keras.Sequential([layer]) + self.assertTrue(model.built) + self.assertEqual(model.input_shape, (None, 1, 2)) class PreprocessingLayerV1Test(test_combinations.TestCase): + def test_adapt_fails(self): + """Test that calling adapt leads to a runtime error.""" + input_dataset = {"foo": 0} - def test_adapt_fails(self): - """Test that calling adapt leads to a runtime error.""" - input_dataset = {"foo": 0} - - with tf.Graph().as_default(): - layer = AddingPreprocessingLayer() - with self.assertRaisesRegex(RuntimeError, - "`adapt` is only supported in tensorflow v2"): - layer.adapt(input_dataset) + with tf.Graph().as_default(): + layer = AddingPreprocessingLayer() + with self.assertRaisesRegex( + RuntimeError, "`adapt` is only supported in tensorflow v2" + ): + layer.adapt(input_dataset) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/engine/compile_utils.py b/keras/engine/compile_utils.py index 3b487e15d388..5d443654ced9 100644 --- a/keras/engine/compile_utils.py +++ b/keras/engine/compile_utils.py @@ -12,805 +12,869 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes + """Utilities for `Model.compile`.""" import copy + +import tensorflow.compat.v2 as tf + from keras import losses as losses_mod from keras import metrics as metrics_mod -from keras.saving.experimental import saving_lib +from keras.saving import saving_lib from keras.utils import generic_utils from keras.utils import losses_utils from keras.utils import tf_utils -import tensorflow.compat.v2 as tf class Container: - """Base Container class.""" + """Base Container class.""" - def __init__(self, output_names=None): - self._output_names = output_names + def __init__(self, output_names=None, mesh=None): + self._output_names = output_names + # Used by DTensor layout map use case. Can be removed after DTensor + # based distribution strategy. + self._mesh = mesh - def build(self, y_pred): - if self._output_names is None: - # In Subclass API, output names like 'output_1' are used for - # `Metric` names. - self._output_names = create_pseudo_output_names(y_pred) + def build(self, y_pred): + if self._output_names is None: + # In Subclass API, output names like 'output_1' are used for + # `Metric` names. + self._output_names = create_pseudo_output_names(y_pred) - def _conform_to_outputs(self, outputs, struct): - """Convenience method to conform `struct` to `outputs` structure. + def _conform_to_outputs(self, outputs, struct): + """Convenience method to conform `struct` to `outputs` structure. - Mappings performed: + Mappings performed: - (1) Map a dict to a list of outputs, using the output names. - (2) Fill missing keys in a dict w/ `None`s. - (3) Map a single item to all outputs. + (1) Map a dict to a list of outputs, using the output names. + (2) Fill missing keys in a dict w/ `None`s. + (3) Map a single item to all outputs. - Args: - outputs: Model predictions. - struct: Arbitrary nested structure (e.g. of labels, sample_weights, - losses, or metrics). + Args: + outputs: Model predictions. + struct: Arbitrary nested structure (e.g. of labels, sample_weights, + losses, or metrics). - Returns: - Mapping of `struct` to `outputs` structure. - """ - struct = map_to_output_names(outputs, self._output_names, struct) - struct = map_missing_dict_keys(outputs, struct) - # Allow passing one object that applies to all outputs. - if not tf.nest.is_nested(struct) and tf.nest.is_nested(outputs): - struct = tf.nest.map_structure(lambda _: struct, outputs) - return struct + Returns: + Mapping of `struct` to `outputs` structure. + """ + struct = map_to_output_names(outputs, self._output_names, struct) + struct = map_missing_dict_keys(outputs, struct) + # Allow passing one object that applies to all outputs. + if not tf.nest.is_nested(struct) and tf.nest.is_nested(outputs): + struct = tf.nest.map_structure(lambda _: struct, outputs) + return struct - def _maybe_broadcast_to_outputs(self, outputs, objects): - """Determines if losses / metrics should be applied to all outputs. + def _maybe_broadcast_to_outputs(self, outputs, objects): + """Determines if losses / metrics should be applied to all outputs. - NOTE: This method should only be called for Metrics / Losses, not for - y_true / sample_weight. + NOTE: This method should only be called for Metrics / Losses, not for + y_true / sample_weight. - Args: - outputs: Model predictions. - objects: Arbitrary nested structure (e.g. of losses or metrics) + Args: + outputs: Model predictions. + objects: Arbitrary nested structure (e.g. of losses or metrics) - Returns: - Arbitrary nested structure of objects, maybe copied to each output. + Returns: + Arbitrary nested structure of objects, maybe copied to each output. - Applies a Loss / Metric to all outputs. - """ - if not self._should_broadcast(objects): - return objects + Applies a Loss / Metric to all outputs. + """ + if not self._should_broadcast(objects): + return objects - # When there is more than one Model output, this is needed to keep - # each Metric / Loss separate. When there is only one Model output, - # the user-supplied object should be used. - should_copy_objects = len(tf.nest.flatten(outputs)) > 1 + # When there is more than one Model output, this is needed to keep + # each Metric / Loss separate. When there is only one Model output, + # the user-supplied object should be used. + should_copy_objects = len(tf.nest.flatten(outputs)) > 1 - def _broadcast_fn(): - if should_copy_objects: - return tf.nest.map_structure(self._copy_object, objects) - return objects + def _broadcast_fn(): + if should_copy_objects: + return tf.nest.map_structure(self._copy_object, objects) + return objects - return tf.nest.map_structure(lambda _: _broadcast_fn(), outputs) + return tf.nest.map_structure(lambda _: _broadcast_fn(), outputs) - def _should_broadcast(self, objects): - raise NotImplementedError + def _should_broadcast(self, objects): + raise NotImplementedError - def _copy_object(self, obj): - raise NotImplementedError + def _copy_object(self, obj): + raise NotImplementedError class LossesContainer(Container): - """A container class for losses passed to `Model.compile()`. - - Args: - losses: Struct of loss function(s). See `Model.compile()` doc for more - information. - loss_weights: Weights of the losses contributions of different model - outputs. See `Model.compile()` doc for more information. - output_names: List of string. Per-output metric names. - total_loss_mean: A `keras.metrics.Mean` instance that is used to track the - mean of all losses (including compiled and regularization losses). - """ - - def __init__(self, - losses, - loss_weights=None, - output_names=None, - total_loss_mean=None): - super(LossesContainer, self).__init__(output_names=output_names) - - # Keep user-supplied values untouched for recompiling and serialization. - self._user_losses = losses - self._user_loss_weights = loss_weights - - self._losses = losses - self._loss_weights = loss_weights - self._per_output_metrics = None # Per-output losses become metrics. - - # Mean of the total loss. - self._total_loss_mean = total_loss_mean or metrics_mod.Mean(name='loss') - self._built = False - - def get_config(self): - # In case `self._losses` is a single string where we convert it to a list. - self._losses = tf.nest.flatten(self._losses) - return { - 'losses': [ - saving_lib.serialize_keras_object(obj) - for obj in self._losses - if obj is not None - ], - 'total_loss_mean': - saving_lib.serialize_keras_object(self._total_loss_mean) - } - - @classmethod - def from_config(cls, config): - """Returns the `LossesContainer` instance given the `config`.""" - deserialized_config = {} - for key, value in config.items(): - if isinstance(value, list): - deserialized_config[key] = [ - saving_lib.deserialize_keras_object(item) for item in value - ] - else: - deserialized_config[key] = saving_lib.deserialize_keras_object(value) - return cls(**deserialized_config) - - @property - def metrics(self): - """Per-output loss metrics.""" - if not self._built: - return [] - per_output_metrics = [ - metric_obj for metric_obj in tf.nest.flatten(self._per_output_metrics) - if metric_obj is not None - ] - return [self._total_loss_mean] + per_output_metrics - - def build(self, y_pred): - """One-time setup of loss objects.""" - super(LossesContainer, self).build(y_pred) - - self._losses = self._maybe_broadcast_to_outputs(y_pred, self._losses) - self._losses = self._conform_to_outputs(y_pred, self._losses) - self._losses = tf.nest.map_structure(self._get_loss_object, self._losses) - self._losses = tf.nest.flatten(self._losses) - - self._loss_weights = self._maybe_broadcast_to_outputs( - y_pred, self._loss_weights) - self._loss_weights = self._conform_to_outputs(y_pred, self._loss_weights) - self._loss_weights = tf.nest.flatten(self._loss_weights) - - self._create_metrics() - self._built = True - - @property - def built(self): - return self._built - - def _create_metrics(self): - """Creates per-output loss metrics, but only for multi-output Models.""" - if len(self._output_names) == 1: - self._per_output_metrics = [None] - else: - self._per_output_metrics = [] - for loss_obj, output_name in zip(self._losses, self._output_names): - if loss_obj is None: - self._per_output_metrics.append(None) - else: - self._per_output_metrics.append( - metrics_mod.Mean(output_name + '_loss')) - - def __call__(self, - y_true, - y_pred, - sample_weight=None, - regularization_losses=None): - """Computes the overall loss. - - Args: - y_true: An arbitrary structure of Tensors representing the ground truth. - y_pred: An arbitrary structure of Tensors representing a Model's outputs. - sample_weight: An arbitrary structure of Tensors representing the - per-sample loss weights. If one Tensor is passed, it is used for all - losses. If multiple Tensors are passed, the structure should match - `y_pred`. - regularization_losses: Additional losses to be added to the total loss. - - Returns: - The total loss as a `tf.Tensor`, or `None` if no loss results. - """ - y_true = self._conform_to_outputs(y_pred, y_true) - sample_weight = self._conform_to_outputs(y_pred, sample_weight) - - if not self._built: - self.build(y_pred) - - y_pred = tf.nest.flatten(y_pred) - y_true = tf.nest.flatten(y_true) - sample_weight = tf.nest.flatten(sample_weight) - - loss_values = [] # Used for gradient calculation. - total_loss_mean_values = [] # Used for loss metric calculation. - batch_dim = None - zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights, - self._per_output_metrics) - for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): - if y_t is None or loss_obj is None: # Ok to have no loss for an output. - continue - - y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) - sw = apply_mask(y_p, sw, get_mask(y_p)) - loss_value = loss_obj(y_t, y_p, sample_weight=sw) - - total_loss_mean_value = loss_value - # Correct for the `Mean` loss metrics counting each replica as a batch. - if loss_obj.reduction == losses_utils.ReductionV2.SUM: - total_loss_mean_value *= tf.distribute.get_strategy( - ).num_replicas_in_sync - - if batch_dim is None: - if tf_utils.is_ragged(y_t): - batch_dim = y_t.nrows() - else: - batch_dim = tf.shape(y_t)[0] - - if metric_obj is not None: - metric_obj.update_state(total_loss_mean_value, sample_weight=batch_dim) - - if loss_weight is not None: - loss_value *= loss_weight - total_loss_mean_value *= loss_weight - - if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or - loss_obj.reduction == losses_utils.ReductionV2.AUTO): - loss_value = losses_utils.scale_loss_for_distribution(loss_value) - - loss_values.append(loss_value) - total_loss_mean_values.append(total_loss_mean_value) - - if regularization_losses: - regularization_losses = losses_utils.cast_losses_to_common_dtype( - regularization_losses) - reg_loss = tf.add_n(regularization_losses) - total_loss_mean_values.append(reg_loss) - loss_values.append(losses_utils.scale_loss_for_distribution(reg_loss)) - - if loss_values: - total_loss_mean_values = losses_utils.cast_losses_to_common_dtype( - total_loss_mean_values) - total_total_loss_mean_value = tf.add_n(total_loss_mean_values) - self._total_loss_mean.update_state( - total_total_loss_mean_value, sample_weight=batch_dim) - - loss_values = losses_utils.cast_losses_to_common_dtype(loss_values) - total_loss = tf.add_n(loss_values) - return total_loss - else: - return None - - def reset_state(self): - """Resets the state of loss metrics.""" - if not self._built: - return - metrics = [self._total_loss_mean] + tf.nest.flatten( - self._per_output_metrics) - for metric_obj in metrics: - if metric_obj is not None: - metric_obj.reset_state() - - def _get_loss_object(self, loss): - """Returns a `Loss` object. - - Converts the user-supplied loss to a `Loss` object. Also allows - `SUM_OVER_BATCH_SIZE` reduction to be used for this loss. + """A container class for losses passed to `Model.compile()`. Args: - loss: A string, function, or `Loss` object. - - Returns: - A `Loss` object. + losses: Struct of loss function(s). See `Model.compile()` doc for more + information. + loss_weights: Weights of the losses contributions of different model + outputs. See `Model.compile()` doc for more information. + output_names: List of string. Per-output metric names. + total_loss_mean: A `keras.metrics.Mean` instance that is used to track the + mean of all losses (including compiled and regularization losses). """ - if loss is None: - return None # Ok to have no loss for an output. - - loss = losses_mod.get(loss) - if not isinstance(loss, losses_mod.Loss): - loss_name = get_custom_object_name(loss) - if loss_name is None: - raise ValueError( - f'Loss should be a callable, received: {loss}') - loss = losses_mod.LossFunctionWrapper(loss, name=loss_name) - loss._allow_sum_over_batch_size = True # pylint: disable=protected-access - return loss - - def _should_broadcast(self, obj): - return not tf.nest.is_nested(obj) - def _copy_object(self, obj): - return obj # Losses don't need to be copied. + def __init__( + self, + losses, + loss_weights=None, + output_names=None, + total_loss_mean=None, + mesh=None, + ): + super(LossesContainer, self).__init__( + output_names=output_names, mesh=mesh + ) + + # Keep user-supplied values untouched for recompiling and serialization. + self._user_losses = losses + self._user_loss_weights = loss_weights + + self._losses = losses + self._loss_weights = loss_weights + self._per_output_metrics = None # Per-output losses become metrics. + + # Mean of the total loss. + self._total_loss_mean = total_loss_mean or metrics_mod.Mean( + name="loss", mesh=self._mesh + ) + self._built = False + + def get_config(self): + # In case `self._losses` is a single string where we convert it to a + # list. + self._losses = tf.nest.flatten(self._losses) + return { + "losses": [ + saving_lib.serialize_keras_object(obj) + for obj in self._losses + if obj is not None + ], + "total_loss_mean": saving_lib.serialize_keras_object( + self._total_loss_mean + ), + } + + @classmethod + def from_config(cls, config): + """Returns the `LossesContainer` instance given the `config`.""" + deserialized_config = {} + for key, value in config.items(): + if isinstance(value, list): + deserialized_config[key] = [ + saving_lib.deserialize_keras_object(item) for item in value + ] + else: + deserialized_config[key] = saving_lib.deserialize_keras_object( + value + ) + return cls(**deserialized_config) + + @property + def metrics(self): + """Per-output loss metrics.""" + if not self._built: + return [] + per_output_metrics = [ + metric_obj + for metric_obj in tf.nest.flatten(self._per_output_metrics) + if metric_obj is not None + ] + return [self._total_loss_mean] + per_output_metrics + + def build(self, y_pred): + """One-time setup of loss objects.""" + super(LossesContainer, self).build(y_pred) + + self._losses = self._maybe_broadcast_to_outputs(y_pred, self._losses) + self._losses = self._conform_to_outputs(y_pred, self._losses) + self._losses = tf.nest.map_structure( + self._get_loss_object, self._losses + ) + self._losses = tf.nest.flatten(self._losses) + + self._loss_weights = self._maybe_broadcast_to_outputs( + y_pred, self._loss_weights + ) + self._loss_weights = self._conform_to_outputs( + y_pred, self._loss_weights + ) + self._loss_weights = tf.nest.flatten(self._loss_weights) + + self._create_metrics() + self._built = True + + @property + def built(self): + return self._built + + def _create_metrics(self): + """Creates per-output loss metrics, but only for multi-output Models.""" + if len(self._output_names) == 1: + self._per_output_metrics = [None] + else: + self._per_output_metrics = [] + for loss_obj, output_name in zip(self._losses, self._output_names): + if loss_obj is None: + self._per_output_metrics.append(None) + else: + self._per_output_metrics.append( + metrics_mod.Mean(output_name + "_loss", mesh=self._mesh) + ) + + def __call__( + self, y_true, y_pred, sample_weight=None, regularization_losses=None + ): + """Computes the overall loss. + + Args: + y_true: An arbitrary structure of Tensors representing the ground + truth. + y_pred: An arbitrary structure of Tensors representing a Model's + outputs. + sample_weight: An arbitrary structure of Tensors representing the + per-sample loss weights. If one Tensor is passed, it is used for all + losses. If multiple Tensors are passed, the structure should match + `y_pred`. + regularization_losses: Additional losses to be added to the total + loss. + + Returns: + The total loss as a `tf.Tensor`, or `None` if no loss results. + """ + y_true = self._conform_to_outputs(y_pred, y_true) + sample_weight = self._conform_to_outputs(y_pred, sample_weight) + + if not self._built: + self.build(y_pred) + + y_pred = tf.nest.flatten(y_pred) + y_true = tf.nest.flatten(y_true) + sample_weight = tf.nest.flatten(sample_weight) + + loss_values = [] # Used for gradient calculation. + total_loss_mean_values = [] # Used for loss metric calculation. + batch_dim = None + zip_args = ( + y_true, + y_pred, + sample_weight, + self._losses, + self._loss_weights, + self._per_output_metrics, + ) + for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): + if ( + y_t is None or loss_obj is None + ): # Ok to have no loss for an output. + continue + + y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) + sw = losses_utils.apply_mask(y_p, sw, losses_utils.get_mask(y_p)) + loss_value = loss_obj(y_t, y_p, sample_weight=sw) + + total_loss_mean_value = loss_value + # Correct for the `Mean` loss metrics counting each replica as a + # batch. + if loss_obj.reduction == losses_utils.ReductionV2.SUM: + total_loss_mean_value *= ( + tf.distribute.get_strategy().num_replicas_in_sync + ) + + if batch_dim is None: + if tf_utils.is_ragged(y_t): + batch_dim = y_t.nrows() + else: + batch_dim = tf.shape(y_t)[0] + + if metric_obj is not None: + metric_obj.update_state( + total_loss_mean_value, sample_weight=batch_dim + ) + + if loss_weight is not None: + loss_value *= loss_weight + total_loss_mean_value *= loss_weight + + if ( + loss_obj.reduction + == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + or loss_obj.reduction == losses_utils.ReductionV2.AUTO + ): + loss_value = losses_utils.scale_loss_for_distribution( + loss_value + ) + + loss_values.append(loss_value) + total_loss_mean_values.append(total_loss_mean_value) + + if regularization_losses: + regularization_losses = losses_utils.cast_losses_to_common_dtype( + regularization_losses + ) + reg_loss = tf.add_n(regularization_losses) + total_loss_mean_values.append(reg_loss) + loss_values.append( + losses_utils.scale_loss_for_distribution(reg_loss) + ) + + if loss_values: + total_loss_mean_values = losses_utils.cast_losses_to_common_dtype( + total_loss_mean_values + ) + total_total_loss_mean_value = tf.add_n(total_loss_mean_values) + self._total_loss_mean.update_state( + total_total_loss_mean_value, sample_weight=batch_dim + ) + + loss_values = losses_utils.cast_losses_to_common_dtype(loss_values) + total_loss = tf.add_n(loss_values) + return total_loss + else: + return None + + def reset_state(self): + """Resets the state of loss metrics.""" + if not self._built: + return + metrics = [self._total_loss_mean] + tf.nest.flatten( + self._per_output_metrics + ) + for metric_obj in metrics: + if metric_obj is not None: + metric_obj.reset_state() + + def _get_loss_object(self, loss): + """Returns a `Loss` object. + + Converts the user-supplied loss to a `Loss` object. Also allows + `SUM_OVER_BATCH_SIZE` reduction to be used for this loss. + + Args: + loss: A string, function, or `Loss` object. + + Returns: + A `Loss` object. + """ + if loss is None: + return None # Ok to have no loss for an output. + + loss = losses_mod.get(loss) + if not isinstance(loss, losses_mod.Loss): + loss_name = get_custom_object_name(loss) + if loss_name is None: + raise ValueError(f"Loss should be a callable, received: {loss}") + loss = losses_mod.LossFunctionWrapper(loss, name=loss_name) + loss._allow_sum_over_batch_size = True + return loss + + def _should_broadcast(self, obj): + return not tf.nest.is_nested(obj) + + def _copy_object(self, obj): + return obj # Losses don't need to be copied. class MetricsContainer(Container): - """A container class for metrics passed to `Model.compile`.""" - - def __init__(self, metrics=None, weighted_metrics=None, output_names=None, - from_serialized=False): - """Initializes a container for metrics. - - Arguments: - metrics: see the `metrics` argument from `tf.keras.Model.compile`. - weighted_metrics: see the `weighted_metrics` argument from - `tf.keras.Model.compile`. - output_names: A list of strings of names of outputs for the model. - from_serialized: Whether the model being compiled is from a serialized - model. Used to avoid redundantly applying pre-processing renaming - steps. - """ - super(MetricsContainer, self).__init__(output_names=output_names) + """A container class for metrics passed to `Model.compile`.""" + + def __init__( + self, + metrics=None, + weighted_metrics=None, + output_names=None, + from_serialized=False, + mesh=None, + ): + """Initializes a container for metrics. + + Arguments: + metrics: see the `metrics` argument from `tf.keras.Model.compile`. + weighted_metrics: see the `weighted_metrics` argument from + `tf.keras.Model.compile`. + output_names: A list of strings of names of outputs for the model. + from_serialized: Whether the model being compiled is from a serialized + model. Used to avoid redundantly applying pre-processing renaming + steps. + """ + super(MetricsContainer, self).__init__( + output_names=output_names, mesh=mesh + ) + + self._check_duplicated_metrics(metrics, weighted_metrics) + # Keep user-supplied values untouched for recompiling and serialization. + self._user_metrics = metrics + self._user_weighted_metrics = weighted_metrics + + self._metrics = metrics + self._weighted_metrics = weighted_metrics + self._built = False + + self._from_serialized = from_serialized + + def _check_duplicated_metrics(self, metrics, weighted_metrics): + """Raise error when user provided metrics have any duplications. + + Note that metrics are stateful container, a shared metric instance + between model.metric and model.weighted_metric will make the same + intance to be udpated twice, and report wrong value. + + Args: + metrics: User provided metrics list. + weighted_metrics: User provided weighted metrics list. + + Raises: + ValueError, when duplicated metrics instance discovered in user + provided metrics and weighted metrics. + """ + seen = set() + duplicated = [] + for x in tf.nest.flatten(metrics) + tf.nest.flatten(weighted_metrics): + # We only check metrics object. The string and function objects + # will be converted to unique Metric instance. + if not isinstance(x, metrics_mod.Metric): + continue + if x in seen: + duplicated.append(x) + seen.add(x) + + if duplicated: + raise ValueError( + "Found duplicated metrics object in the user provided " + "metrics and weighted metrics. This will cause the same " + "metric object to be updated multiple times, and report " + "wrong results. \n" + f"Duplicated items: {duplicated}" + ) + + @property + def metrics(self): + """All metrics in this container.""" + if not self._built: + return [] + return self._metrics_in_order + + @property + def unweighted_metrics(self): + """Metrics in the container that should not be passed sample_weight.""" + if not self._built: + return None + return tf.nest.flatten(self._metrics) + + @property + def weighted_metrics(self): + """Metrics in this container that should be passed `sample_weight`.""" + if not self._built: + return None + return tf.nest.flatten(self._weighted_metrics) + + def build(self, y_pred, y_true): + """One-time setup of metric objects.""" + super(MetricsContainer, self).build(y_pred) + + self._metrics = self._maybe_broadcast_to_outputs(y_pred, self._metrics) + self._metrics = self._conform_to_outputs(y_pred, self._metrics) + + self._weighted_metrics = self._maybe_broadcast_to_outputs( + y_pred, self._weighted_metrics + ) + self._weighted_metrics = self._conform_to_outputs( + y_pred, self._weighted_metrics + ) + + # Standardize on tuple since `tf.data` turns lists into `Tensor`s. + y_pred = tf.__internal__.nest.list_to_tuple(y_pred) + y_true = tf.__internal__.nest.list_to_tuple(y_true) + self._metrics = tf.__internal__.nest.list_to_tuple(self._metrics) + self._weighted_metrics = tf.__internal__.nest.list_to_tuple( + self._weighted_metrics + ) + + # Convert to `Metric` objects, potentially disambiguating based on + # output properties. + self._metrics = tf.__internal__.nest.map_structure_up_to( + y_pred, self._get_metric_objects, self._metrics, y_true, y_pred + ) + self._weighted_metrics = tf.__internal__.nest.map_structure_up_to( + y_pred, + self._get_metric_objects, + self._weighted_metrics, + y_true, + y_pred, + ) + + self._metrics = tf.__internal__.nest.flatten_up_to( + y_pred, self._metrics, check_types=False + ) + self._weighted_metrics = tf.__internal__.nest.flatten_up_to( + y_pred, self._weighted_metrics, check_types=False + ) + + # Assumes metrics, weighted_metrics have been flattened up to outputs. + # + # If we are loading a model that has been already serialized, we do not + # want to re-apply any pre-processing metric renaming steps. + if not self._from_serialized: + self._set_metric_names() + self._create_ordered_metrics() + self._built = True + + @property + def built(self): + return self._built + + def _set_metric_names(self): + """Sets unique metric names.""" + # For multi-output models, prepend the output name to the metric name. + # For weighted metrics, prepend "weighted_" if the name would be + # non-unique. + + metric_names = set() + is_multi_output = len(self._output_names) > 1 + zip_args = (self._output_names, self._metrics, self._weighted_metrics) + for output_name, output_metrics, weighted_output_metrics in zip( + *zip_args + ): + for m in output_metrics: + if m is None: + continue + if is_multi_output: + m._name = output_name + "_" + m._name + if m._name in metric_names: + raise ValueError( + f"Found two metrics with the same name: {m._name}. " + "All the metrics added to the model need to have " + "unique names." + ) + metric_names.add(m._name) + + for wm in weighted_output_metrics: + if wm is None: + continue + if is_multi_output: + if output_name + "_" + wm._name in metric_names: + wm._name = output_name + "_weighted_" + wm._name + else: + wm._name = output_name + "_" + wm._name + elif wm._name in metric_names: + wm._name = "weighted_" + wm._name + + if wm._name in metric_names: + raise ValueError( + "Found two weighted metrics with the same name: " + f"{wm._name}.All the metrics added to the model need " + "to have unique names." + ) + metric_names.add(wm._name) + + def _create_ordered_metrics(self): + """Cache the flat order needed when return metrics, for backcompat.""" + self._metrics_in_order = [] + for output_metrics, output_weighted_metrics in zip( + self._metrics, self._weighted_metrics + ): + for m in tf.nest.flatten(output_metrics): + if m is not None: + self._metrics_in_order.append(m) + for wm in tf.nest.flatten(output_weighted_metrics): + if wm is not None: + self._metrics_in_order.append(wm) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Updates the state of per-output metrics.""" + y_true = self._conform_to_outputs(y_pred, y_true) + sample_weight = self._conform_to_outputs(y_pred, sample_weight) + + if not self._built: + self.build(y_pred, y_true) + + y_pred = tf.nest.flatten(y_pred) + y_true = tf.nest.flatten(y_true) if y_true is not None else [] + sample_weight = tf.nest.flatten(sample_weight) + + zip_args = ( + y_true, + y_pred, + sample_weight, + self._metrics, + self._weighted_metrics, + ) + for y_t, y_p, sw, metric_objs, weighted_metric_objs in zip(*zip_args): + # Ok to have no metrics for an output. + if y_t is None or ( + all(m is None for m in metric_objs) + and all(wm is None for wm in weighted_metric_objs) + ): + continue + + y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) + mask = losses_utils.get_mask(y_p) + sw = losses_utils.apply_mask(y_p, sw, mask) + + for metric_obj in metric_objs: + if metric_obj is None: + continue + metric_obj.update_state(y_t, y_p, sample_weight=mask) + + for weighted_metric_obj in weighted_metric_objs: + if weighted_metric_obj is None: + continue + weighted_metric_obj.update_state(y_t, y_p, sample_weight=sw) + + def reset_state(self): + """Resets the state of all `Metric`s in this container.""" + if self._built: + metrics = self._metrics_in_order + else: + # If the user supplied `Metric` objects directly, we should + # reset those. This could also contain `str`s or `function`s + # though. + metrics = tf.nest.flatten(self._user_metrics) + tf.nest.flatten( + self._user_weighted_metrics + ) + + for metric_obj in metrics: + if isinstance(metric_obj, metrics_mod.Metric): + metric_obj.reset_state() + + def _get_metric_objects(self, metrics, y_t, y_p): + """Convert user-supplied metrics to `Metric` objects.""" + metrics = tf.nest.flatten(metrics) + return [self._get_metric_object(m, y_t, y_p) for m in metrics] + + def _get_metric_object(self, metric, y_t, y_p): + """Converts user-supplied metric to a `Metric` object. + + Args: + metric: A string, function, or `Metric` object. + y_t: Sample of label. + y_p: Sample of output. + + Returns: + A `Metric` object. + """ + if metric is None: + return None # Ok to have no metric for an output. + + # Convenience feature for selecting b/t binary, categorical, + # and sparse categorical. + if str(metric).lower() not in ["accuracy", "acc", "crossentropy", "ce"]: + metric_obj = metrics_mod.get(metric) + else: + y_t_rank = len(y_t.shape.as_list()) + y_p_rank = len(y_p.shape.as_list()) + y_t_last_dim = y_t.shape.as_list()[-1] + y_p_last_dim = y_p.shape.as_list()[-1] + + is_binary = y_p_last_dim == 1 + is_sparse_categorical = ( + y_t_rank < y_p_rank or y_t_last_dim == 1 and y_p_last_dim > 1 + ) + + if str(metric).lower() in ["accuracy", "acc"]: + if is_binary: + metric_obj = metrics_mod.binary_accuracy + elif is_sparse_categorical: + metric_obj = metrics_mod.sparse_categorical_accuracy + else: + metric_obj = metrics_mod.categorical_accuracy + else: + if is_binary: + metric_obj = metrics_mod.binary_crossentropy + elif is_sparse_categorical: + metric_obj = metrics_mod.sparse_categorical_crossentropy + else: + metric_obj = metrics_mod.categorical_crossentropy + + if isinstance(metric_obj, losses_mod.Loss): + metric_obj._allow_sum_over_batch_size = True + + if not isinstance(metric_obj, metrics_mod.Metric): + if isinstance(metric, str): + metric_name = metric + else: + metric_name = get_custom_object_name(metric) + if metric_name is None: + raise ValueError( + f"Metric should be a callable, received: {metric}" + ) + + metric_obj = metrics_mod.MeanMetricWrapper( + metric_obj, name=metric_name, mesh=self._mesh + ) + return metric_obj + + def _should_broadcast(self, obj): + # e.g. 'mse'. + if not tf.nest.is_nested(obj): + return True + # e.g. ['mse'] or ['mse', 'mae']. + return isinstance(obj, (list, tuple)) and not any( + tf.nest.is_nested(o) for o in obj + ) + + def _copy_object(self, obj): + if isinstance(obj, metrics_mod.Metric): + return obj.__class__.from_config(obj.get_config()) + return obj # Can be a function or `None`. - self._check_duplicated_metrics(metrics, weighted_metrics) - # Keep user-supplied values untouched for recompiling and serialization. - self._user_metrics = metrics - self._user_weighted_metrics = weighted_metrics - self._metrics = metrics - self._weighted_metrics = weighted_metrics - self._built = False +def create_pseudo_output_names(outputs): + """Create pseudo output names for a subclassed Model.""" + return _create_pseudo_names(outputs, prefix="output_") - self._from_serialized = from_serialized - def _check_duplicated_metrics(self, metrics, weighted_metrics): - """Check and raise error when user provided metrics has any duplications. +def create_pseudo_input_names(inputs): + """Create pseudo input names for a subclassed Model.""" + return _create_pseudo_names(inputs, prefix="input_") - Note that metrics are stateful container, a shared metric instance between - model.metric and model.weighted_metric will make the same intance to be - udpated twice, and report wrong value. - Args: - metrics: User provided metrics list. - weighted_metrics: User provided weighted metrics list. +def _create_pseudo_names(tensors, prefix): + """Creates pseudo {input | output} names for subclassed Models. - Raises: - ValueError, when duplicated metrics instance discovered in user provided - metrics and weighted metrics. - """ - seen = set() - duplicated = [] - for x in tf.nest.flatten(metrics) + tf.nest.flatten(weighted_metrics): - # We only check metrics object. The string and function objects - # will be converted to unique Metric instance. - if not isinstance(x, metrics_mod.Metric): - continue - if x in seen: - duplicated.append(x) - seen.add(x) - - if duplicated: - raise ValueError('Found duplicated metrics object in the user provided ' - 'metrics and weighted metrics. This will cause the same ' - 'metric object to be updated multiple times, and report ' - 'wrong results. \n' - f'Duplicated items: {duplicated}') - - @property - def metrics(self): - """All metrics in this container.""" - if not self._built: - return [] - return self._metrics_in_order - - @property - def unweighted_metrics(self): - """Metrics in this container that should not be passed `sample_weight`.""" - if not self._built: - return None - return tf.nest.flatten(self._metrics) - - @property - def weighted_metrics(self): - """Metrics in this container that should be passed `sample_weight`.""" - if not self._built: - return None - return tf.nest.flatten(self._weighted_metrics) - - def build(self, y_pred, y_true): - """One-time setup of metric objects.""" - super(MetricsContainer, self).build(y_pred) - - self._metrics = self._maybe_broadcast_to_outputs(y_pred, self._metrics) - self._metrics = self._conform_to_outputs(y_pred, self._metrics) - - self._weighted_metrics = self._maybe_broadcast_to_outputs( - y_pred, self._weighted_metrics) - self._weighted_metrics = self._conform_to_outputs(y_pred, - self._weighted_metrics) - - # Standardize on tuple since `tf.data` turns lists into `Tensor`s. - y_pred = tf.__internal__.nest.list_to_tuple(y_pred) - y_true = tf.__internal__.nest.list_to_tuple(y_true) - self._metrics = tf.__internal__.nest.list_to_tuple(self._metrics) - self._weighted_metrics = tf.__internal__.nest.list_to_tuple( - self._weighted_metrics) - - # Convert to `Metric` objects, potentially disambiguating based on output - # properties. - self._metrics = tf.__internal__.nest.map_structure_up_to( - y_pred, - self._get_metric_objects, - self._metrics, - y_true, - y_pred) - self._weighted_metrics = tf.__internal__.nest.map_structure_up_to( - y_pred, - self._get_metric_objects, - self._weighted_metrics, - y_true, - y_pred) - - self._metrics = tf.__internal__.nest.flatten_up_to( - y_pred, self._metrics, check_types=False) - self._weighted_metrics = tf.__internal__.nest.flatten_up_to( - y_pred, self._weighted_metrics, check_types=False) - - # Assumes metrics, weighted_metrics have been flattened up to outputs. - # - # If we are loading a model that has been already serialized, we do not - # want to re-apply any pre-processing metric renaming steps. - if not self._from_serialized: - self._set_metric_names() - self._create_ordered_metrics() - self._built = True - - @property - def built(self): - return self._built - - def _set_metric_names(self): - """Sets unique metric names.""" - # For multi-output models, prepend the output name to the metric name. - # For weighted metrics, prepend "weighted_" if the name would be non-unique. - # pylint: disable=protected-access - metric_names = set() - is_multi_output = len(self._output_names) > 1 - zip_args = (self._output_names, self._metrics, self._weighted_metrics) - for output_name, output_metrics, weighted_output_metrics in zip(*zip_args): - for m in output_metrics: - if m is None: - continue - if is_multi_output: - m._name = output_name + '_' + m._name - if m._name in metric_names: - raise ValueError( - f'Found two metrics with the same name: {m._name}. ' - 'All the metrics added to the model need to have unique names.') - metric_names.add(m._name) - - for wm in weighted_output_metrics: - if wm is None: - continue - if is_multi_output: - if output_name + '_' + wm._name in metric_names: - wm._name = output_name + '_weighted_' + wm._name - else: - wm._name = output_name + '_' + wm._name - elif wm._name in metric_names: - wm._name = 'weighted_' + wm._name - - if wm._name in metric_names: - raise ValueError( - f'Found two weighted metrics with the same name: {wm._name}.' - 'All the metrics added to the model need to have unique names.') - metric_names.add(wm._name) - # pylint: enable=protected-access - - def _create_ordered_metrics(self): - """Cache the flat order needed when returning metrics, for backwards compat.""" - self._metrics_in_order = [] - for output_metrics, output_weighted_metrics in zip(self._metrics, - self._weighted_metrics): - for m in tf.nest.flatten(output_metrics): - if m is not None: - self._metrics_in_order.append(m) - for wm in tf.nest.flatten(output_weighted_metrics): - if wm is not None: - self._metrics_in_order.append(wm) - - def update_state(self, y_true, y_pred, sample_weight=None): - """Updates the state of per-output metrics.""" - y_true = self._conform_to_outputs(y_pred, y_true) - sample_weight = self._conform_to_outputs(y_pred, sample_weight) - - if not self._built: - self.build(y_pred, y_true) - - y_pred = tf.nest.flatten(y_pred) - y_true = tf.nest.flatten(y_true) if y_true is not None else [] - sample_weight = tf.nest.flatten(sample_weight) - - zip_args = (y_true, y_pred, sample_weight, self._metrics, - self._weighted_metrics) - for y_t, y_p, sw, metric_objs, weighted_metric_objs in zip(*zip_args): - # Ok to have no metrics for an output. - if (y_t is None or (all(m is None for m in metric_objs) and - all(wm is None for wm in weighted_metric_objs))): - continue - - y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) - mask = get_mask(y_p) - sw = apply_mask(y_p, sw, mask) - - for metric_obj in metric_objs: - if metric_obj is None: - continue - metric_obj.update_state(y_t, y_p, sample_weight=mask) - - for weighted_metric_obj in weighted_metric_objs: - if weighted_metric_obj is None: - continue - weighted_metric_obj.update_state(y_t, y_p, sample_weight=sw) - - def reset_state(self): - """Resets the state of all `Metric`s in this container.""" - if self._built: - metrics = self._metrics_in_order - else: - # If the user supplied `Metric` objects directly, we should - # reset those. This could also contain `str`s or `function`s - # though. - metrics = tf.nest.flatten(self._user_metrics) + tf.nest.flatten( - self._user_weighted_metrics) + Warning: this function should only be used to define default + names for `Metics` and `SavedModel`. No other use cases should + rely on a `Model`'s input or output names. + + Example with dict: - for metric_obj in metrics: - if isinstance(metric_obj, metrics_mod.Metric): - metric_obj.reset_state() + `{'a': [x1, x2], 'b': x3}` becomes: + `['a_1', 'a_2', 'b']` - def _get_metric_objects(self, metrics, y_t, y_p): - """Convert user-supplied metrics to `Metric` objects.""" - metrics = tf.nest.flatten(metrics) - return [self._get_metric_object(m, y_t, y_p) for m in metrics] + Example with list: - def _get_metric_object(self, metric, y_t, y_p): - """Converts user-supplied metric to a `Metric` object. + `[x, y]` becomes: + `['output_1', 'output_2']` Args: - metric: A string, function, or `Metric` object. - y_t: Sample of label. - y_p: Sample of output. + tensors: `Model`'s outputs or inputs. + prefix: 'output_' for outputs, 'input_' for inputs. Returns: - A `Metric` object. + Flattened list of pseudo names. """ - if metric is None: - return None # Ok to have no metric for an output. - # Convenience feature for selecting b/t binary, categorical, - # and sparse categorical. - if str(metric).lower() not in ['accuracy', 'acc', 'crossentropy', 'ce']: - metric_obj = metrics_mod.get(metric) - else: - y_t_rank = len(y_t.shape.as_list()) - y_p_rank = len(y_p.shape.as_list()) - y_t_last_dim = y_t.shape.as_list()[-1] - y_p_last_dim = y_p.shape.as_list()[-1] - - is_binary = y_p_last_dim == 1 - is_sparse_categorical = ( - y_t_rank < y_p_rank or y_t_last_dim == 1 and y_p_last_dim > 1) - - if str(metric).lower() in ['accuracy', 'acc']: - if is_binary: - metric_obj = metrics_mod.binary_accuracy - elif is_sparse_categorical: - metric_obj = metrics_mod.sparse_categorical_accuracy - else: - metric_obj = metrics_mod.categorical_accuracy - else: - if is_binary: - metric_obj = metrics_mod.binary_crossentropy - elif is_sparse_categorical: - metric_obj = metrics_mod.sparse_categorical_crossentropy + def one_index(ele): + # Start with "output_1" instead of "output_0". + if isinstance(ele, int): + return ele + 1 + return ele + + flat_paths = list(tf.__internal__.nest.yield_flat_paths(tensors)) + flat_paths = tf.nest.map_structure(one_index, flat_paths) + names = [] + for path in flat_paths: + if not path: + name = prefix + "1" # Single output. else: - metric_obj = metrics_mod.categorical_crossentropy - - if isinstance(metric_obj, losses_mod.Loss): - metric_obj._allow_sum_over_batch_size = True # pylint: disable=protected-access - - if not isinstance(metric_obj, metrics_mod.Metric): - if isinstance(metric, str): - metric_name = metric - else: - metric_name = get_custom_object_name(metric) - if metric_name is None: - raise ValueError( - f'Metric should be a callable, received: {metric}') - - metric_obj = metrics_mod.MeanMetricWrapper(metric_obj, name=metric_name) - - return metric_obj - - def _should_broadcast(self, obj): - # e.g. 'mse'. - if not tf.nest.is_nested(obj): - return True - # e.g. ['mse'] or ['mse', 'mae']. - return (isinstance(obj, (list, tuple)) and - not any(tf.nest.is_nested(o) for o in obj)) - - def _copy_object(self, obj): - if isinstance(obj, metrics_mod.Metric): - return obj.__class__.from_config(obj.get_config()) - return obj # Can be a function or `None`. - - -def create_pseudo_output_names(outputs): - """Create pseudo output names for a subclassed Model.""" - return _create_pseudo_names(outputs, prefix='output_') - - -def create_pseudo_input_names(inputs): - """Create pseudo input names for a subclassed Model.""" - return _create_pseudo_names(inputs, prefix='input_') - - -def _create_pseudo_names(tensors, prefix): - """Creates pseudo {input | output} names for subclassed Models. + name = "_".join(str(p) for p in path) + if isinstance(path[0], int): + name = prefix + name + names.append(name) + return names - Warning: this function should only be used to define default - names for `Metics` and `SavedModel`. No other use cases should - rely on a `Model`'s input or output names. - Example with dict: - - `{'a': [x1, x2], 'b': x3}` becomes: - `['a_1', 'a_2', 'b']` - - Example with list: +def map_to_output_names(y_pred, output_names, struct): + """Maps a dict to a list using `output_names` as keys. - `[x, y]` becomes: - `['output_1', 'output_2']` + This is a convenience feature only. When a `Model`'s outputs + are a list, you can specify per-output losses and metrics as + a dict, where the keys are the output names. If you specify + per-output losses and metrics via the same structure as the + `Model`'s outputs (recommended), no mapping is performed. - Args: - tensors: `Model`'s outputs or inputs. - prefix: 'output_' for outputs, 'input_' for inputs. + For the Functional API, the output names are the names of the + last layer of each output. For the Subclass API, the output names + are determined by `create_pseudo_output_names` (For example: + `['output_1', 'output_2']` for a list of outputs). - Returns: - Flattened list of pseudo names. - """ + This mapping preserves backwards compatibility for `compile` and + `fit`. - def one_index(ele): - # Start with "output_1" instead of "output_0". - if isinstance(ele, int): - return ele + 1 - return ele + Args: + y_pred: Sample outputs of the Model, to determine if this convenience + feature should be applied (`struct` is returned unmodified if `y_pred` + isn't a flat list). + output_names: List. The names of the outputs of the Model. + struct: The structure to map. - flat_paths = list(tf.__internal__.nest.yield_flat_paths(tensors)) - flat_paths = tf.nest.map_structure(one_index, flat_paths) - names = [] - for path in flat_paths: - if not path: - name = prefix + '1' # Single output. + Returns: + `struct` mapped to a list in same order as `output_names`. + """ + single_output = not tf.nest.is_nested(y_pred) + outputs_are_flat_list = ( + not single_output + and isinstance(y_pred, (list, tuple)) + and not any(tf.nest.is_nested(y_p) for y_p in y_pred) + ) + + if (single_output or outputs_are_flat_list) and isinstance(struct, dict): + output_names = output_names or create_pseudo_output_names(y_pred) + struct = copy.copy(struct) + new_struct = [struct.pop(name, None) for name in output_names] + if struct: + raise ValueError( + "Found unexpected losses or metrics that do not correspond " + f"to any Model output: {struct.keys()}. " + f"Valid mode output names: {output_names}. " + f"Received struct is: {struct}." + ) + if len(new_struct) == 1: + return new_struct[0] + return new_struct else: - name = '_'.join(str(p) for p in path) - if isinstance(path[0], int): - name = prefix + name - names.append(name) - return names + return struct -def map_to_output_names(y_pred, output_names, struct): - """Maps a dict to a list using `output_names` as keys. - - This is a convenience feature only. When a `Model`'s outputs - are a list, you can specify per-output losses and metrics as - a dict, where the keys are the output names. If you specify - per-output losses and metrics via the same structure as the - `Model`'s outputs (recommended), no mapping is performed. - - For the Functional API, the output names are the names of the - last layer of each output. For the Subclass API, the output names - are determined by `create_pseudo_output_names` (For example: - `['output_1', 'output_2']` for a list of outputs). - - This mapping preserves backwards compatibility for `compile` and - `fit`. - - Args: - y_pred: Sample outputs of the Model, to determine if this convenience - feature should be applied (`struct` is returned unmodified if `y_pred` - isn't a flat list). - output_names: List. The names of the outputs of the Model. - struct: The structure to map. - - Returns: - `struct` mapped to a list in same order as `output_names`. - """ - single_output = not tf.nest.is_nested(y_pred) - outputs_are_flat_list = (not single_output and - isinstance(y_pred, (list, tuple)) and - not any(tf.nest.is_nested(y_p) for y_p in y_pred)) - - if (single_output or outputs_are_flat_list) and isinstance(struct, dict): - output_names = output_names or create_pseudo_output_names(y_pred) +def map_missing_dict_keys(y_pred, struct): + """Replaces missing dict keys in `struct` with `None` placeholders.""" + if not isinstance(y_pred, dict) or not isinstance(struct, dict): + return struct struct = copy.copy(struct) - new_struct = [struct.pop(name, None) for name in output_names] - if struct: - raise ValueError( - 'Found unexpected losses or metrics that do not correspond ' - f'to any Model output: {struct.keys()}. ' - f'Valid mode output names: {output_names}. ' - f'Received struct is: {struct}.') - if len(new_struct) == 1: - return new_struct[0] - return new_struct - else: + for k in y_pred.keys(): + if k not in struct: + struct[k] = None return struct -def map_missing_dict_keys(y_pred, struct): - """Replaces missing dict keys in `struct` with `None` placeholders.""" - if not isinstance(y_pred, dict) or not isinstance(struct, dict): - return struct - struct = copy.copy(struct) - for k in y_pred.keys(): - if k not in struct: - struct[k] = None - return struct +def match_dtype_and_rank(y_t, y_p, sw): + """Match dtype and rank of predictions.""" + if y_t.shape.rank == 1 and y_p.shape.rank == 2: + y_t = tf.expand_dims(y_t, axis=-1) + if sw is not None: + if sw.shape.rank == 1 and y_p.shape.rank == 2: + sw = tf.expand_dims(sw, axis=-1) + # Dtype. + # This is required mainly for custom loss functions which do not take care + # casting dtypes. + if (y_t.dtype.is_floating and y_p.dtype.is_floating) or ( + y_t.dtype.is_integer and y_p.dtype.is_integer + ): + y_t = tf.cast(y_t, y_p.dtype) -def match_dtype_and_rank(y_t, y_p, sw): - """Match dtype and rank of predictions.""" - if y_t.shape.rank == 1 and y_p.shape.rank == 2: - y_t = tf.expand_dims(y_t, axis=-1) - if sw is not None: - if sw.shape.rank == 1 and y_p.shape.rank == 2: - sw = tf.expand_dims(sw, axis=-1) - - # Dtype. - # This is required mainly for custom loss functions which do not take care - # casting dtypes. - if ((y_t.dtype.is_floating and y_p.dtype.is_floating) or - (y_t.dtype.is_integer and y_p.dtype.is_integer)): - y_t = tf.cast(y_t, y_p.dtype) - - if sw is not None: - sw = tf.cast(sw, y_p.dtype) - return y_t, y_p, sw - - -def get_mask(y_p): - """Returns Keras mask from tensor.""" - return getattr(y_p, '_keras_mask', None) - - -def apply_mask(y_p, sw, mask): - """Applies any mask on predictions to sample weights.""" - if mask is not None: - mask = tf.cast(mask, y_p.dtype) if sw is not None: - mask, _, sw = ( - losses_utils.squeeze_or_expand_dimensions(mask, sample_weight=sw)) - sw *= mask - else: - sw = mask - return sw + sw = tf.cast(sw, y_p.dtype) + return y_t, y_p, sw def get_custom_object_name(obj): - """Returns the name to use for a custom loss or metric callable. - - Args: - obj: Custom loss of metric callable - - Returns: - Name to use, or `None` if the object was not recognized. - """ - if hasattr(obj, 'name'): # Accept `Loss` instance as `Metric`. - return obj.name - elif hasattr(obj, '__name__'): # Function. - return obj.__name__ - elif hasattr(obj, '__class__'): # Class instance. - return generic_utils.to_snake_case(obj.__class__.__name__) - else: # Unrecognized object. - return None + """Returns the name to use for a custom loss or metric callable. + + Args: + obj: Custom loss of metric callable + + Returns: + Name to use, or `None` if the object was not recognized. + """ + if hasattr(obj, "name"): # Accept `Loss` instance as `Metric`. + return obj.name + elif hasattr(obj, "__name__"): # Function. + return obj.__name__ + elif hasattr(obj, "__class__"): # Class instance. + return generic_utils.to_snake_case(obj.__class__.__name__) + else: # Unrecognized object. + return None diff --git a/keras/engine/compile_utils_test.py b/keras/engine/compile_utils_test.py index e62a0a4bb117..557d6e2b4e23 100644 --- a/keras/engine/compile_utils_test.py +++ b/keras/engine/compile_utils_test.py @@ -15,833 +15,874 @@ """Tests for compile utitilies.""" import tensorflow.compat.v2 as tf + from keras import backend -from keras.testing_infra import test_combinations from keras import losses as losses_mod from keras import metrics as metrics_mod from keras.engine import compile_utils +from keras.testing_infra import test_combinations class LossesContainerTest(test_combinations.TestCase): + def test_single_loss(self): + loss_container = compile_utils.LossesContainer("mse") + y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) + total_loss = loss_container(y_t, y_p) - def test_single_loss(self): - loss_container = compile_utils.LossesContainer('mse') - y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) - total_loss = loss_container(y_t, y_p) - - self.assertTrue(loss_container._built) - self.assertLen(loss_container._losses, 1) - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(total_loss.numpy(), 1.) - self.assertLen(loss_container.metrics, 1) + self.assertTrue(loss_container._built) + self.assertLen(loss_container._losses, 1) + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(total_loss.numpy(), 1.0) + self.assertLen(loss_container.metrics, 1) - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 1.) + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 1.0) - loss_container.reset_state() - self.assertEqual(loss_metric.result().numpy(), 0.) + loss_container.reset_state() + self.assertEqual(loss_metric.result().numpy(), 0.0) - def test_loss_list(self): - loss_container = compile_utils.LossesContainer(['mse', 'mae'], [1, 0.5]) + def test_loss_list(self): + loss_container = compile_utils.LossesContainer(["mse", "mae"], [1, 0.5]) - y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] - y_p = [tf.ones((10, 1)), tf.ones((10, 1))] - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] + y_p = [tf.ones((10, 1)), tf.ones((10, 1))] + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - total_loss = loss_container(y_t, y_p, sample_weight=sw) + total_loss = loss_container(y_t, y_p, sample_weight=sw) - self.assertEqual(loss_container._output_names, ['output_1', 'output_2']) + self.assertEqual(loss_container._output_names, ["output_1", "output_2"]) - self.assertLen(loss_container._losses, 2) - self.assertEqual(total_loss.numpy(), 0.25) + self.assertLen(loss_container._losses, 2) + self.assertEqual(total_loss.numpy(), 0.25) - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 0.25) + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 0.25) - output_1_metric = loss_container.metrics[1] - self.assertEqual(output_1_metric.name, 'output_1_loss') - self.assertEqual(output_1_metric.result().numpy(), 0) - - output_2_metric = loss_container.metrics[2] - self.assertEqual(output_2_metric.name, 'output_2_loss') - self.assertEqual(output_2_metric.result().numpy(), 0.5) - - loss_container.reset_state() - self.assertEqual(loss_metric.result().numpy(), 0) - self.assertEqual(output_1_metric.result().numpy(), 0) - self.assertEqual(output_2_metric.result().numpy(), 0) - - def test_loss_dict(self): - loss_container = compile_utils.LossesContainer( - { - 'out1': 'mse', - 'out2': 'mae' - }, { - 'out1': 1, - 'out2': 0.5 - }) - - y_t = {'out1': tf.ones((10, 1)), 'out2': tf.zeros((10, 1))} - y_p = {'out1': tf.ones((10, 1)), 'out2': tf.ones((10, 1))} - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - - self.assertLen(loss_container._losses, 2) - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(total_loss.numpy(), 0.25) - self.assertLen(loss_container.metrics, 3) - - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 0.25) - - out1_metric = loss_container.metrics[1] - self.assertEqual(out1_metric.name, 'out1_loss') - self.assertEqual(out1_metric.result().numpy(), 0) - - out2_metric = loss_container.metrics[2] - self.assertEqual(out2_metric.name, 'out2_loss') - self.assertEqual(out2_metric.result().numpy(), 0.5) - - loss_container.reset_state() - self.assertEqual(loss_metric.result().numpy(), 0) - self.assertEqual(out1_metric.result().numpy(), 0) - self.assertEqual(out2_metric.result().numpy(), 0) - - def test_loss_partial_dict_with_output_names(self): - loss_container = compile_utils.LossesContainer( - {'out2': 'mae'}, {'out2': 1.}, output_names=['out1', 'out2']) - - y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] - y_p = [tf.ones((10, 1)), tf.ones((10, 1))] - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - - self.assertEqual(total_loss.numpy(), 0.5) - self.assertLen(loss_container.metrics, 2) - - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 0.5) - - out2_metric = loss_container.metrics[1] - self.assertEqual(out2_metric.name, 'out2_loss') - self.assertEqual(out2_metric.result().numpy(), 0.5) - - def test_loss_dict_with_nones(self): - loss_container = compile_utils.LossesContainer({ - 'out1': None, - 'out2': 'mae' - }) - - y_t = {'out1': tf.ones((10, 1)), 'out2': tf.zeros((10, 1))} - y_p = {'out1': tf.ones((10, 1)), 'out2': tf.ones((10, 1))} - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(total_loss.numpy(), 0.5) - self.assertLen(loss_container.metrics, 2) - - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 0.5) - - out2_metric = loss_container.metrics[1] - self.assertEqual(out2_metric.name, 'out2_loss') - self.assertEqual(out2_metric.result().numpy(), 0.5) - - def test_nested_structure(self): - loss_container = compile_utils.LossesContainer( - { - 'b': ['mse', None], - 'a': 'mae' - }, loss_weights={ - 'b': [0.5, 0], - 'a': 1 - }) - - y_t = { - 'b': [tf.ones((10, 1)), - tf.zeros((10, 1))], - 'a': tf.zeros((10, 1)) - } - y_p = { - 'b': [tf.zeros((10, 1)), - tf.zeros((10, 1))], - 'a': tf.ones((10, 1)) - } - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(total_loss.numpy(), 0.75) - self.assertLen(loss_container.metrics, 3) - - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 0.75) - - a_metric = loss_container.metrics[1] - self.assertEqual(a_metric.name, 'a_loss') - self.assertEqual(a_metric.result().numpy(), 0.5) - - b_1_metric = loss_container.metrics[2] - self.assertEqual(b_1_metric.name, 'b_1_loss') - self.assertEqual(b_1_metric.result().numpy(), 0.5) - - def test_no_input_mutation(self): - loss = {'a': 'mae'} - loss_container = compile_utils.LossesContainer(loss) - - y_t = {'a': tf.zeros((10, 1))} - y_p = {'a': tf.ones((10, 1)), 'b': tf.zeros((10, 1))} - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(total_loss.numpy(), 0.5) - self.assertLen(loss, 1) - - def test_broadcast_single_loss(self): - loss_container = compile_utils.LossesContainer('mse') - - y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] - y_p = [tf.ones((10, 1)), tf.ones((10, 1))] - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - self.assertEqual(total_loss.numpy(), 0.5) - self.assertLen(loss_container.metrics, 3) - - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 0.5) - - output_1_metric = loss_container.metrics[1] - self.assertEqual(output_1_metric.name, 'output_1_loss') - self.assertEqual(output_1_metric.result().numpy(), 0.) - - output_2_metric = loss_container.metrics[2] - self.assertEqual(output_2_metric.name, 'output_2_loss') - self.assertEqual(output_2_metric.result().numpy(), 0.5) - - def test_missing_label_with_no_loss(self): - # It's ok to exclude a label if that label has no - # losses or metrics associated with it. - loss_container = compile_utils.LossesContainer({ - 'output1': 'mse', - 'output3': 'mae' - }) - - y_p = { - 'output1': tf.convert_to_tensor([[0], [1], [2]]), - 'output2': tf.convert_to_tensor([[3], [4], [5]]), - 'output3': tf.convert_to_tensor([[6], [7], [8]]) - } - y_t = { - 'output1': tf.convert_to_tensor([[1], [2], [3]]), - 'output3': tf.convert_to_tensor([[4], [5], [6]]) - } - - total_loss = loss_container(y_t, y_p) - self.assertEqual(total_loss.numpy(), 3.) - self.assertLen(loss_container.metrics, 3) - - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertEqual(loss_metric.result().numpy(), 3.) - - output_1_metric = loss_container.metrics[1] - self.assertEqual(output_1_metric.name, 'output1_loss') - self.assertEqual(output_1_metric.result().numpy(), 1.) - - output_3_metric = loss_container.metrics[2] - self.assertEqual(output_3_metric.name, 'output3_loss') - self.assertEqual(output_3_metric.result().numpy(), 2.) - - def test_mismatched_dtypes(self): - y_t = tf.constant([1, 9, 2, -5], shape=(2, 2)) - y_p = tf.constant([4, 8, 12, 8], - shape=(2, 2), - dtype=tf.float32) - - def my_mae(labels, preds): - self.assertEqual(labels.dtype, tf.int32) - self.assertEqual(preds.dtype, tf.float32) - labels = tf.cast(labels, preds.dtype) - return backend.mean(tf.abs(preds - labels), axis=-1) - - loss_container = compile_utils.LossesContainer(my_mae) - total_loss = loss_container(y_t, y_p) - self.assertEqual(total_loss.dtype, tf.float32) - - def test_integer_dtypes(self): - y_t = tf.constant([1, 9, 2, -5], shape=(2, 2)) - y_p = tf.constant([4, 8, 12, 8], shape=(2, 2), dtype=tf.int64) - - def my_mae(labels, preds): - self.assertEqual(labels.dtype, tf.int64) - self.assertEqual(preds.dtype, tf.int64) - return backend.mean(tf.abs(preds - labels), axis=-1) - - loss_container = compile_utils.LossesContainer(my_mae) - total_loss = loss_container(y_t, y_p) - self.assertEqual(total_loss.dtype, tf.int64) - - def test_float_dtypes(self): - y_t = tf.constant([1, 9, 2, -5], - shape=(2, 2), - dtype=tf.float32) - y_p = tf.constant([4, 8, 12, 8], - shape=(2, 2), - dtype=tf.float64) - - def my_mae(labels, preds): - self.assertEqual(labels.dtype, tf.float64) - self.assertEqual(preds.dtype, tf.float64) - return backend.mean(tf.abs(preds - labels), axis=-1) - - loss_container = compile_utils.LossesContainer(my_mae) - total_loss = loss_container(y_t, y_p) - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(total_loss.dtype, tf.float64) - - def test_loss_masking(self): - loss_container = compile_utils.LossesContainer('mae') - y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) - y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) - y_p._keras_mask = tf.constant([[1, 0], [1, 0]], - dtype=tf.float32) - - total_loss = loss_container(y_t, y_p) - self.assertAlmostEqual(total_loss.numpy(), .25) # sum over batch size - - self.assertLen(loss_container.metrics, 1) - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertAlmostEqual(loss_metric.result().numpy(), .25) - - def test_loss_sample_weight(self): - loss_container = compile_utils.LossesContainer('mae') - y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) - y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) - sw = tf.constant([[.2, .3], [.5, 0]], dtype=tf.float32) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - # (0 * .2 + 0 * .3 + 1 * .5 + 1 * 0) / 4 - self.assertAlmostEqual(total_loss.numpy(), .125) - - self.assertLen(loss_container.metrics, 1) - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertAlmostEqual(loss_metric.result().numpy(), .125) - - def test_loss_masking_sample_weight(self): - loss_container = compile_utils.LossesContainer('mae') - y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) - y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) - sw = tf.constant([[.2, .3], [.5, 0]], dtype=tf.float32) - y_p._keras_mask = tf.constant([[1, 0], [1, 0]], - dtype=tf.float32) - - total_loss = loss_container(y_t, y_p, sample_weight=sw) - # (0 * .2 + 1 * .5) / 4 - self.assertAlmostEqual(total_loss.numpy(), .125) # sum over batch size - - self.assertLen(loss_container.metrics, 1) - loss_metric = loss_container.metrics[0] - self.assertEqual(loss_metric.name, 'loss') - self.assertAlmostEqual(loss_metric.result().numpy(), .125) - - def test_custom_loss_callables(self): - - def custom_loss_fn(y_true, y_pred): - return tf.reduce_sum(y_true - y_pred) - - class CustomLossClass: - - def __call__(self, y_true, y_pred): - return tf.reduce_sum(y_true - y_pred) - - loss_container = compile_utils.LossesContainer( - [custom_loss_fn, CustomLossClass()]) - y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) - loss_container(y_t, y_p) - - self.assertEqual(loss_container._losses[0].name, 'custom_loss_fn') - self.assertEqual(loss_container._losses[1].name, 'custom_loss_class') - - def test_ragged_tensor_output(self): - """Ensure that ragged tensors can be passed as targets and predictions.""" - - def custom_loss_fn(y_true, y_pred): - """MSE supports RaggedTensors directly.""" - return losses_mod.mse(y_true, y_pred) - - class CustomLossClass(losses_mod.Loss): - """User defined loss function must implement RaggedTensor support.""" - - def call(self, y_true, y_pred): - losses = tf.ragged.map_flat_values( - tf.math.squared_difference, y_true, y_pred) - return tf.reduce_mean(losses) - - loss_container = compile_utils.LossesContainer( - [custom_loss_fn, CustomLossClass()]) - - v_t = tf.constant([[3., 4.], [1., 2.], [3., 5.]]) - v_p = tf.constant([[3.1, 4.], [1., 2.], [3., 5.]]) - - y_t = tf.expand_dims( - tf.RaggedTensor.from_row_splits(v_t, [0, 2, 3]), 0) - y_p = tf.expand_dims( - tf.RaggedTensor.from_row_splits(v_p, [0, 2, 3]), 0) - total_loss = loss_container(y_t, y_p) - - self.assertIsInstance(total_loss, tf.Tensor) - self.assertEqual(loss_container._losses[0].name, 'custom_loss_fn') + output_1_metric = loss_container.metrics[1] + self.assertEqual(output_1_metric.name, "output_1_loss") + self.assertEqual(output_1_metric.result().numpy(), 0) + + output_2_metric = loss_container.metrics[2] + self.assertEqual(output_2_metric.name, "output_2_loss") + self.assertEqual(output_2_metric.result().numpy(), 0.5) + + loss_container.reset_state() + self.assertEqual(loss_metric.result().numpy(), 0) + self.assertEqual(output_1_metric.result().numpy(), 0) + self.assertEqual(output_2_metric.result().numpy(), 0) + + def test_loss_dict(self): + loss_container = compile_utils.LossesContainer( + {"out1": "mse", "out2": "mae"}, {"out1": 1, "out2": 0.5} + ) + + y_t = {"out1": tf.ones((10, 1)), "out2": tf.zeros((10, 1))} + y_p = {"out1": tf.ones((10, 1)), "out2": tf.ones((10, 1))} + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + + self.assertLen(loss_container._losses, 2) + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(total_loss.numpy(), 0.25) + self.assertLen(loss_container.metrics, 3) + + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 0.25) + + out1_metric = loss_container.metrics[1] + self.assertEqual(out1_metric.name, "out1_loss") + self.assertEqual(out1_metric.result().numpy(), 0) + + out2_metric = loss_container.metrics[2] + self.assertEqual(out2_metric.name, "out2_loss") + self.assertEqual(out2_metric.result().numpy(), 0.5) + + loss_container.reset_state() + self.assertEqual(loss_metric.result().numpy(), 0) + self.assertEqual(out1_metric.result().numpy(), 0) + self.assertEqual(out2_metric.result().numpy(), 0) + + def test_loss_partial_dict_with_output_names(self): + loss_container = compile_utils.LossesContainer( + {"out2": "mae"}, {"out2": 1.0}, output_names=["out1", "out2"] + ) + + y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] + y_p = [tf.ones((10, 1)), tf.ones((10, 1))] + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + + self.assertEqual(total_loss.numpy(), 0.5) + self.assertLen(loss_container.metrics, 2) + + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 0.5) + + out2_metric = loss_container.metrics[1] + self.assertEqual(out2_metric.name, "out2_loss") + self.assertEqual(out2_metric.result().numpy(), 0.5) + + def test_loss_dict_with_nones(self): + loss_container = compile_utils.LossesContainer( + {"out1": None, "out2": "mae"} + ) + + y_t = {"out1": tf.ones((10, 1)), "out2": tf.zeros((10, 1))} + y_p = {"out1": tf.ones((10, 1)), "out2": tf.ones((10, 1))} + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(total_loss.numpy(), 0.5) + self.assertLen(loss_container.metrics, 2) + + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 0.5) + + out2_metric = loss_container.metrics[1] + self.assertEqual(out2_metric.name, "out2_loss") + self.assertEqual(out2_metric.result().numpy(), 0.5) + + def test_nested_structure(self): + loss_container = compile_utils.LossesContainer( + {"b": ["mse", None], "a": "mae"}, + loss_weights={"b": [0.5, 0], "a": 1}, + ) + + y_t = { + "b": [tf.ones((10, 1)), tf.zeros((10, 1))], + "a": tf.zeros((10, 1)), + } + y_p = { + "b": [tf.zeros((10, 1)), tf.zeros((10, 1))], + "a": tf.ones((10, 1)), + } + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(total_loss.numpy(), 0.75) + self.assertLen(loss_container.metrics, 3) + + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 0.75) + + a_metric = loss_container.metrics[1] + self.assertEqual(a_metric.name, "a_loss") + self.assertEqual(a_metric.result().numpy(), 0.5) + + b_1_metric = loss_container.metrics[2] + self.assertEqual(b_1_metric.name, "b_1_loss") + self.assertEqual(b_1_metric.result().numpy(), 0.5) + + def test_no_input_mutation(self): + loss = {"a": "mae"} + loss_container = compile_utils.LossesContainer(loss) + + y_t = {"a": tf.zeros((10, 1))} + y_p = {"a": tf.ones((10, 1)), "b": tf.zeros((10, 1))} + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(total_loss.numpy(), 0.5) + self.assertLen(loss, 1) + + def test_broadcast_single_loss(self): + loss_container = compile_utils.LossesContainer("mse") + + y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] + y_p = [tf.ones((10, 1)), tf.ones((10, 1))] + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + self.assertEqual(total_loss.numpy(), 0.5) + self.assertLen(loss_container.metrics, 3) + + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 0.5) + + output_1_metric = loss_container.metrics[1] + self.assertEqual(output_1_metric.name, "output_1_loss") + self.assertEqual(output_1_metric.result().numpy(), 0.0) + + output_2_metric = loss_container.metrics[2] + self.assertEqual(output_2_metric.name, "output_2_loss") + self.assertEqual(output_2_metric.result().numpy(), 0.5) + + def test_missing_label_with_no_loss(self): + # It's ok to exclude a label if that label has no + # losses or metrics associated with it. + loss_container = compile_utils.LossesContainer( + {"output1": "mse", "output3": "mae"} + ) + + y_p = { + "output1": tf.convert_to_tensor([[0], [1], [2]]), + "output2": tf.convert_to_tensor([[3], [4], [5]]), + "output3": tf.convert_to_tensor([[6], [7], [8]]), + } + y_t = { + "output1": tf.convert_to_tensor([[1], [2], [3]]), + "output3": tf.convert_to_tensor([[4], [5], [6]]), + } + + total_loss = loss_container(y_t, y_p) + self.assertEqual(total_loss.numpy(), 3.0) + self.assertLen(loss_container.metrics, 3) + + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertEqual(loss_metric.result().numpy(), 3.0) + + output_1_metric = loss_container.metrics[1] + self.assertEqual(output_1_metric.name, "output1_loss") + self.assertEqual(output_1_metric.result().numpy(), 1.0) + + output_3_metric = loss_container.metrics[2] + self.assertEqual(output_3_metric.name, "output3_loss") + self.assertEqual(output_3_metric.result().numpy(), 2.0) + + def test_mismatched_dtypes(self): + y_t = tf.constant([1, 9, 2, -5], shape=(2, 2)) + y_p = tf.constant([4, 8, 12, 8], shape=(2, 2), dtype=tf.float32) + + def my_mae(labels, preds): + self.assertEqual(labels.dtype, tf.int32) + self.assertEqual(preds.dtype, tf.float32) + labels = tf.cast(labels, preds.dtype) + return backend.mean(tf.abs(preds - labels), axis=-1) + + loss_container = compile_utils.LossesContainer(my_mae) + total_loss = loss_container(y_t, y_p) + self.assertEqual(total_loss.dtype, tf.float32) + + def test_integer_dtypes(self): + y_t = tf.constant([1, 9, 2, -5], shape=(2, 2)) + y_p = tf.constant([4, 8, 12, 8], shape=(2, 2), dtype=tf.int64) + + def my_mae(labels, preds): + self.assertEqual(labels.dtype, tf.int64) + self.assertEqual(preds.dtype, tf.int64) + return backend.mean(tf.abs(preds - labels), axis=-1) + + loss_container = compile_utils.LossesContainer(my_mae) + total_loss = loss_container(y_t, y_p) + self.assertEqual(total_loss.dtype, tf.int64) + + def test_float_dtypes(self): + y_t = tf.constant([1, 9, 2, -5], shape=(2, 2), dtype=tf.float32) + y_p = tf.constant([4, 8, 12, 8], shape=(2, 2), dtype=tf.float64) + + def my_mae(labels, preds): + self.assertEqual(labels.dtype, tf.float64) + self.assertEqual(preds.dtype, tf.float64) + return backend.mean(tf.abs(preds - labels), axis=-1) + + loss_container = compile_utils.LossesContainer(my_mae) + total_loss = loss_container(y_t, y_p) + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(total_loss.dtype, tf.float64) + + @test_combinations.generate( + test_combinations.combine( + input_type=["dense", "masked", "ragged"], + reduction=["auto", "sum"], + use_sample_weights=[True, False], + ), + ) + def test_loss_consistency(self, input_type, reduction, use_sample_weights): + y_p = tf.ragged.constant( + [[[1], [1], [1]], [[1], [1]]], dtype=tf.float32 + ) + y_t = tf.ragged.constant( + [[[1], [0], [0]], [[1], [1]]], dtype=tf.float32 + ) + + if input_type == "masked": + mask = tf.ones_like(y_p).to_tensor() + y_p = y_p.to_tensor() + y_t = y_t.to_tensor() + y_p._keras_mask = mask + elif input_type == "dense": + y_p = y_p.to_tensor() + y_t = y_t.to_tensor() + + if input_type == "dense": + count = 6 + else: + count = 5 + + if use_sample_weights: + wrong = 4 + maybe_sample_weight = { + "sample_weight": tf.constant([[2], [1]], dtype=tf.float32) + } + else: + wrong = 2 + maybe_sample_weight = {} + + expected = wrong + if reduction != "sum": + expected /= count + + loss_obj = losses_mod.MeanAbsoluteError(reduction=reduction) + + result = loss_obj(y_t, y_p, **maybe_sample_weight) + self.assertAlmostEqual(result.numpy(), expected) + + container = compile_utils.LossesContainer(loss_obj) + container_result = container(y_t, y_p, **maybe_sample_weight) + self.assertAlmostEqual(container_result.numpy(), expected) + + def test_loss_masking(self): + loss_container = compile_utils.LossesContainer("mae") + y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) + y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) + # Reduction is "sum_over_batch_size" that's not the literal batch size, + # but the number of elements being summed: The number of valid + # emlements. So since the mask has two valid items, the number of + # elements is 2. + y_p._keras_mask = tf.constant([[1, 0], [1, 0]], dtype=tf.float32) + + total_loss = loss_container(y_t, y_p) + self.assertAlmostEqual(total_loss.numpy(), 0.5) # sum over num valid + + self.assertLen(loss_container.metrics, 1) + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertAlmostEqual(loss_metric.result().numpy(), 0.5) + + def test_loss_sample_weight(self): + loss_container = compile_utils.LossesContainer("mae") + y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) + y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) + sw = tf.constant([[0.2, 0.3], [0.5, 0]], dtype=tf.float32) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + # (0 * .2 + 0 * .3 + 1 * .5 + 1 * 0) / 4 + self.assertAlmostEqual(total_loss.numpy(), 0.125) + + self.assertLen(loss_container.metrics, 1) + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertAlmostEqual(loss_metric.result().numpy(), 0.125) + + def test_loss_masking_sample_weight(self): + loss_container = compile_utils.LossesContainer("mae") + y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) + y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) + sw = tf.constant([[0.2, 0.3], [0.5, 0]], dtype=tf.float32) + y_p._keras_mask = tf.constant([[1, 0], [1, 0]], dtype=tf.float32) + + total_loss = loss_container(y_t, y_p, sample_weight=sw) + # (0 * .2 + 1 * .5) / 2 + self.assertAlmostEqual(total_loss.numpy(), 0.25) # sum over num valid + + self.assertLen(loss_container.metrics, 1) + loss_metric = loss_container.metrics[0] + self.assertEqual(loss_metric.name, "loss") + self.assertAlmostEqual(loss_metric.result().numpy(), 0.25) + + def test_custom_loss_callables(self): + def custom_loss_fn(y_true, y_pred): + return tf.reduce_sum(y_true - y_pred) + + class CustomLossClass: + def __call__(self, y_true, y_pred): + return tf.reduce_sum(y_true - y_pred) + + loss_container = compile_utils.LossesContainer( + [custom_loss_fn, CustomLossClass()] + ) + y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) + loss_container(y_t, y_p) + + self.assertEqual(loss_container._losses[0].name, "custom_loss_fn") + self.assertEqual(loss_container._losses[1].name, "custom_loss_class") + + def test_ragged_tensor_output(self): + """Ensure ragged tensors can be passed as targets and predictions.""" + + def custom_loss_fn(y_true, y_pred): + """MSE supports RaggedTensors directly.""" + return losses_mod.mse(y_true, y_pred) + + class CustomLossClass(losses_mod.Loss): + """User defined loss func must implement RaggedTensor support.""" + + def call(self, y_true, y_pred): + losses = tf.ragged.map_flat_values( + tf.math.squared_difference, y_true, y_pred + ) + return tf.reduce_mean(losses) + + loss_container = compile_utils.LossesContainer( + [custom_loss_fn, CustomLossClass()] + ) + + v_t = tf.constant([[3.0, 4.0], [1.0, 2.0], [3.0, 5.0]]) + v_p = tf.constant([[3.1, 4.0], [1.0, 2.0], [3.0, 5.0]]) + + y_t = tf.expand_dims(tf.RaggedTensor.from_row_splits(v_t, [0, 2, 3]), 0) + y_p = tf.expand_dims(tf.RaggedTensor.from_row_splits(v_p, [0, 2, 3]), 0) + total_loss = loss_container(y_t, y_p) + + self.assertIsInstance(total_loss, tf.Tensor) + self.assertEqual(loss_container._losses[0].name, "custom_loss_fn") class MetricsContainerTest(test_combinations.TestCase): - - def test_single_metric(self): - metric_container = compile_utils.MetricsContainer('mse') - y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) - metric_container.update_state(y_t, y_p) - - self.assertLen(metric_container.metrics, 1) - metric = metric_container.metrics[0] - self.assertEqual(metric.name, 'mse') - self.assertEqual(metric.result().numpy(), 1.) - - metric_container.reset_state() - self.assertEqual(metric.result().numpy(), 0.) - - def test_list_of_metrics_one_output(self): - metric_container = compile_utils.MetricsContainer(['mse', 'mae']) - y_t, y_p = 2 * tf.ones((10, 5)), tf.zeros((10, 5)) - metric_container.update_state(y_t, y_p) - self.assertLen(metric_container.metrics, 2) - - mse_metric = metric_container.metrics[0] - self.assertEqual(mse_metric.name, 'mse') - self.assertEqual(mse_metric.result().numpy(), 4.) - - mae_metric = metric_container.metrics[1] - self.assertEqual(mae_metric.name, 'mae') - self.assertEqual(mae_metric.result().numpy(), 2.) - - metric_container.reset_state() - self.assertEqual(mse_metric.result().numpy(), 0.) - self.assertEqual(mae_metric.result().numpy(), 0.) - - def test_list_of_metrics_list_of_outputs(self): - metric_container = compile_utils.MetricsContainer( - metrics=['mse', 'mae'], # Should broadcast to both outputs. - weighted_metrics=['accuracy']) # Should broadcast to both outputs. - - y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] - y_p = [tf.ones((10, 1)), 2 * tf.ones((10, 1))] - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - metric_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metric_container.metrics, 6) - - mse_metric = metric_container.metrics[0] - self.assertEqual(mse_metric.name, 'output_1_mse') - self.assertEqual(mse_metric.result().numpy(), 0.) - - mse_metric = metric_container.metrics[1] - self.assertEqual(mse_metric.name, 'output_1_mae') - self.assertEqual(mse_metric.result().numpy(), 0.) - - acc_metric_1 = metric_container.metrics[2] - self.assertEqual(acc_metric_1.name, 'output_1_accuracy') - self.assertEqual(acc_metric_1.result().numpy(), 1.) - self.assertEqual(acc_metric_1._fn, metrics_mod.binary_accuracy) - - mae_metric = metric_container.metrics[3] - self.assertEqual(mae_metric.name, 'output_2_mse') - self.assertEqual(mae_metric.result().numpy(), 4.) - - mae_metric = metric_container.metrics[4] - self.assertEqual(mae_metric.name, 'output_2_mae') - self.assertEqual(mae_metric.result().numpy(), 2.) - - acc_metric_2 = metric_container.metrics[5] - self.assertEqual(acc_metric_2.name, 'output_2_accuracy') - self.assertEqual(acc_metric_2.result().numpy(), 0.) - self.assertEqual(acc_metric_2._fn, metrics_mod.binary_accuracy) - - weighted_metrics = metric_container.weighted_metrics - self.assertLen(weighted_metrics, 2) - self.assertEqual(weighted_metrics[0].name, 'output_1_accuracy') - self.assertEqual(weighted_metrics[1].name, 'output_2_accuracy') - - unweighted_metrics = metric_container.unweighted_metrics - self.assertLen(unweighted_metrics, 4) - self.assertEqual(unweighted_metrics[0].name, 'output_1_mse') - self.assertEqual(unweighted_metrics[1].name, 'output_1_mae') - self.assertEqual(unweighted_metrics[2].name, 'output_2_mse') - self.assertEqual(unweighted_metrics[3].name, 'output_2_mae') - - def test_metric_dict(self): - metric_container = compile_utils.MetricsContainer( - metrics={ - 'out1': 'mse', - 'out2': 'mae' - }, - weighted_metrics={ - 'out1': 'mse', - 'out2': 'mae' - }) - - y_t = {'out1': tf.ones((10, 1)), 'out2': tf.zeros((10, 1))} - y_p = {'out1': tf.ones((10, 1)), 'out2': 2 * tf.ones((10, 1))} - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - metric_container.update_state(y_t, y_p, sample_weight=sw) - - mse_metric = metric_container.metrics[0] - self.assertEqual(mse_metric.name, 'out1_mse') - self.assertEqual(mse_metric.result().numpy(), 0.) - - weighted_mse_metric = metric_container.metrics[1] - self.assertEqual(weighted_mse_metric.name, 'out1_weighted_mse') - self.assertEqual(weighted_mse_metric.result().numpy(), 0.) - - mae_metric = metric_container.metrics[2] - self.assertEqual(mae_metric.name, 'out2_mae') - self.assertEqual(mae_metric.result().numpy(), 2.) - - weighted_mae_metric = metric_container.metrics[3] - self.assertEqual(weighted_mae_metric.name, 'out2_weighted_mae') - self.assertEqual(weighted_mae_metric.result().numpy(), 2.) - - metric_container.reset_state() - self.assertEqual(mse_metric.result().numpy(), 0.) - self.assertEqual(weighted_mse_metric.result().numpy(), 0.) - self.assertEqual(mae_metric.result().numpy(), 0.) - self.assertEqual(weighted_mae_metric.result().numpy(), 0.) - - def test_metric_partial_dict_with_output_names(self): - metric_container = compile_utils.MetricsContainer( - {'out2': 'mae'}, output_names=['out1', 'out2']) - - y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] - y_p = [tf.ones((10, 1)), tf.ones((10, 1))] - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - metric_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metric_container.metrics, 1) - - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.name, 'out2_mae') - self.assertEqual(mae_metric.result().numpy(), 1.) - - def test_metric_partial_dict_with_nones(self): - metric_container = compile_utils.MetricsContainer({ - 'out1': None, - 'out2': 'mae' - }) - - y_t = {'out1': tf.ones((10, 1)), 'out2': tf.zeros((10, 1))} - y_p = {'out1': tf.ones((10, 1)), 'out2': tf.ones((10, 1))} - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - metric_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metric_container.metrics, 1) - - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.name, 'out2_mae') - self.assertEqual(mae_metric.result().numpy(), 1.) - - def test_nested_structure(self): - metric_container = compile_utils.MetricsContainer( - metrics={ - 'b': ['mse', None], - 'a': 'mae' - }, - weighted_metrics={ - 'b': [None, None], - 'a': 'mse' - }) - - y_t = { - 'b': [2 * tf.ones((10, 1)), - tf.zeros((10, 1))], - 'a': tf.zeros((10, 1)) - } - y_p = { - 'b': [tf.zeros((10, 1)), - tf.zeros((10, 1))], - 'a': tf.ones((10, 1)) - } - sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - - metric_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metric_container.metrics, 3) - - a_mae_metric = metric_container.metrics[0] - self.assertEqual(a_mae_metric.name, 'a_mae') - self.assertEqual(a_mae_metric.result().numpy(), 1.) - - weighted_a_mae_metric = metric_container.metrics[1] - self.assertEqual(weighted_a_mae_metric.name, 'a_mse') - self.assertEqual(weighted_a_mae_metric.result().numpy(), 1.) - - b_1_mse_metric = metric_container.metrics[2] - self.assertEqual(b_1_mse_metric.name, 'b_1_mse') - self.assertEqual(b_1_mse_metric.result().numpy(), 4.) - - def test_no_input_mutation(self): - metric = {'a': 'mae'} - metric_container = compile_utils.MetricsContainer(metric) - - y_t = {'a': tf.zeros((10, 1))} - y_p = {'a': tf.ones((10, 1)), 'b': tf.zeros((10, 1))} - - metric_container.update_state(y_t, y_p) - self.assertLen(metric, 1) - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.result().numpy(), 1.) - - def test_crossentropy(self): - metric_container = compile_utils.MetricsContainer('crossentropy') - y_t, y_p = tf.ones((10, 1)), tf.ones((10, 1)) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.binary_crossentropy) - - metric_container = compile_utils.MetricsContainer('crossentropy') - y_t, y_p = tf.ones((10, 1)), tf.ones((10, 20)) - self.assertEqual(y_p.shape.as_list()[-1], 20) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.sparse_categorical_crossentropy) - - metric_container = compile_utils.MetricsContainer('crossentropy') - y_t, y_p = tf.ones((10, 20)), tf.ones((10, 20)) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.categorical_crossentropy) - - def test_accuracy(self): - metric_container = compile_utils.MetricsContainer('accuracy') - y_t, y_p = tf.ones((10, 1)), tf.ones((10, 1)) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.binary_accuracy) - - metric_container = compile_utils.MetricsContainer('Accuracy') - y_t, y_p = tf.ones((10, 1)), tf.ones((10, 1)) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.binary_accuracy) - - metric_container = compile_utils.MetricsContainer('accuracy') - y_t, y_p = tf.ones((10, 1)), tf.ones((10, 20)) - self.assertEqual(y_p.shape.as_list()[-1], 20) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.sparse_categorical_accuracy) - - metric_container = compile_utils.MetricsContainer('accuracy') - y_t, y_p = tf.ones((10, 20)), tf.ones((10, 20)) - metric_container.update_state(y_t, y_p) - self.assertEqual(metric_container.metrics[0]._fn, - metrics_mod.categorical_accuracy) - - def test_metric_weighting(self): - metric_container = compile_utils.MetricsContainer( - metrics=['mae'], weighted_metrics=['mae']) - - y_t = tf.convert_to_tensor([[0], [3], [0]]) - y_p = tf.convert_to_tensor([[0], [0], [0]]) - sw = tf.convert_to_tensor([[1], [0], [1]]) - - metric_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metric_container.metrics, 2) - - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.name, 'mae') - self.assertEqual(mae_metric.result().numpy(), 1.) - - weighted_mae_metric = metric_container.metrics[1] - self.assertEqual(weighted_mae_metric.name, 'weighted_mae') - self.assertEqual(weighted_mae_metric.result().numpy(), 0.) - - def test_broadcast_metrics_to_dict(self): - metric_container = compile_utils.MetricsContainer(metrics=['mae']) - - y_p = {'output': tf.convert_to_tensor([[0], [1], [2]])} - y_t = {'output': tf.convert_to_tensor([[1], [2], [3]])} - metric_container.update_state(y_t, y_p) - - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.name, 'mae') - self.assertEqual(mae_metric.result().numpy(), 1.) - - def test_broadcast_metrics_to_dict_with_output_names(self): - metric_container = compile_utils.MetricsContainer( - metrics=['mae'], output_names=['output']) - - y_p = tf.convert_to_tensor([[0], [1], [2]]) - y_t = {'output': tf.convert_to_tensor([[1], [2], [3]])} - metric_container.update_state(y_t, y_p) - - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.name, 'mae') - self.assertEqual(mae_metric.result().numpy(), 1.) - - def test_missing_label_with_no_metrics(self): - # It's ok to exclude a label if that label has no - # losses or metrics associated with it. - metric_container = compile_utils.MetricsContainer(metrics={ - 'output1': 'mae', - 'output3': 'mse' - }) - - y_p = { - 'output1': tf.convert_to_tensor([[0], [1], [2]]), - 'output2': tf.convert_to_tensor([[3], [4], [5]]), - 'output3': tf.convert_to_tensor([[6], [7], [8]]) - } - y_t = { - 'output1': tf.convert_to_tensor([[1], [2], [3]]), - 'output3': tf.convert_to_tensor([[4], [5], [6]]) - } - - metric_container.update_state(y_t, y_p) - self.assertLen(metric_container.metrics, 2) - - mae_metric = metric_container.metrics[0] - self.assertEqual(mae_metric.name, 'output1_mae') - self.assertEqual(mae_metric.result().numpy(), 1.) - - mse_metric = metric_container.metrics[1] - self.assertEqual(mse_metric.name, 'output3_mse') - self.assertEqual(mse_metric.result().numpy(), 4.) - - def test_metrics_masking(self): - metrics_container = compile_utils.MetricsContainer( - metrics=['mae'], weighted_metrics=['mse']) - y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) - y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) - y_p._keras_mask = tf.constant([[1, 1], [0, 0]], - dtype=tf.float32) - - metrics_container.update_state(y_t, y_p) - self.assertLen(metrics_container.metrics, 2) - - mae_metric = metrics_container.metrics[0] - self.assertEqual(mae_metric.name, 'mae') - self.assertAlmostEqual(mae_metric.result().numpy(), 0) - - weighted_mae_metric = metrics_container.metrics[1] - self.assertEqual(weighted_mae_metric.name, 'mse') - self.assertAlmostEqual(weighted_mae_metric.result().numpy(), 0) - - def test_metrics_sample_weight(self): - metrics_container = compile_utils.MetricsContainer( - metrics=['mae'], weighted_metrics=['mse']) - y_p = tf.constant([[[1], [1]], [[0], [1]]], dtype=tf.float32) - y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) - sw = tf.constant([[.2, .3], [.5, 0]], dtype=tf.float32) - - metrics_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metrics_container.metrics, 2) - - mae_metric = metrics_container.metrics[0] - self.assertEqual(mae_metric.name, 'mae') - self.assertAlmostEqual(mae_metric.result().numpy(), .25) # 1 / 4 - - weighted_mae_metric = metrics_container.metrics[1] - self.assertEqual(weighted_mae_metric.name, 'mse') - self.assertAlmostEqual(weighted_mae_metric.result().numpy(), .5) # .5 / 1 - - def test_metrics_masking_sample_weight(self): - metrics_container = compile_utils.MetricsContainer( - metrics=['mae'], weighted_metrics=['mse']) - y_p = tf.constant([[[1], [1]], [[0], [1]]], dtype=tf.float32) - y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) - sw = tf.constant([[.3, .2], [.2, .3]], dtype=tf.float32) - y_p._keras_mask = tf.constant([[1, 0], [1, 0]], - dtype=tf.float32) - - metrics_container.update_state(y_t, y_p, sample_weight=sw) - self.assertLen(metrics_container.metrics, 2) - - mae_metric = metrics_container.metrics[0] - self.assertEqual(mae_metric.name, 'mae') - self.assertAlmostEqual(mae_metric.result().numpy(), .5) # 1 / .5 - - weighted_mae_metric = metrics_container.metrics[1] - self.assertEqual(weighted_mae_metric.name, 'mse') - self.assertAlmostEqual(weighted_mae_metric.result().numpy(), .2 / .5) - - def test_loss_class_as_metric_with_distribution(self): - distribution = tf.distribute.OneDeviceStrategy('/device:CPU:0') - with distribution.scope(): - metric_container = compile_utils.MetricsContainer( - losses_mod.MeanSquaredError()) - y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) - metric_container.update_state(y_t, y_p) - - self.assertLen(metric_container.metrics, 1) - metric = metric_container.metrics[0] - self.assertEqual(metric.name, 'mean_squared_error') - self.assertEqual(metric.result().numpy(), 1.) - - def test_custom_metric_callables(self): - - def custom_metric_fn(y_true, y_pred): - return tf.reduce_sum(y_true - y_pred) - - class CustomMetricClass: - - def __call__(self, y_true, y_pred): - return tf.reduce_sum(y_true - y_pred) - - metric_container = compile_utils.MetricsContainer( - [custom_metric_fn, CustomMetricClass()]) - y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) - metric_container.update_state(y_t, y_p) - - self.assertEqual(metric_container.metrics[0].name, 'custom_metric_fn') - self.assertEqual(metric_container.metrics[1].name, 'custom_metric_class') - - def test_reset_state_existing_metric_before_built(self): - metric = metrics_mod.Mean() - metric.update_state([2.0, 4.0]) - self.assertEqual(metric.result().numpy(), 3.0) - - metric_container = compile_utils.MetricsContainer(metric) - metric_container.reset_state() - self.assertEqual(metric.result().numpy(), 0.0) - - def test_duplicated_metric_instance(self): - mean_obj = metrics_mod.Mean() - metric = mean_obj - with self.assertRaisesRegex(ValueError, 'Found duplicated metrics'): - compile_utils.MetricsContainer(metrics=metric, weighted_metrics=metric) - - # duplicated string should be fine - metric = 'acc' - compile_utils.MetricsContainer(metrics=metric, weighted_metrics=metric) - - # complicated structure - metric = [mean_obj, 'acc'] - weighted_metric = {'output1': mean_obj, 'output2': 'acc'} - with self.assertRaisesRegex(ValueError, 'Found duplicated metrics'): - compile_utils.MetricsContainer( - metrics=metric, weighted_metrics=weighted_metric) - - -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() + def test_single_metric(self): + metric_container = compile_utils.MetricsContainer("mse") + y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) + metric_container.update_state(y_t, y_p) + + self.assertLen(metric_container.metrics, 1) + metric = metric_container.metrics[0] + self.assertEqual(metric.name, "mse") + self.assertEqual(metric.result().numpy(), 1.0) + + metric_container.reset_state() + self.assertEqual(metric.result().numpy(), 0.0) + + def test_list_of_metrics_one_output(self): + metric_container = compile_utils.MetricsContainer(["mse", "mae"]) + y_t, y_p = 2 * tf.ones((10, 5)), tf.zeros((10, 5)) + metric_container.update_state(y_t, y_p) + self.assertLen(metric_container.metrics, 2) + + mse_metric = metric_container.metrics[0] + self.assertEqual(mse_metric.name, "mse") + self.assertEqual(mse_metric.result().numpy(), 4.0) + + mae_metric = metric_container.metrics[1] + self.assertEqual(mae_metric.name, "mae") + self.assertEqual(mae_metric.result().numpy(), 2.0) + + metric_container.reset_state() + self.assertEqual(mse_metric.result().numpy(), 0.0) + self.assertEqual(mae_metric.result().numpy(), 0.0) + + def test_list_of_metrics_list_of_outputs(self): + metric_container = compile_utils.MetricsContainer( + metrics=["mse", "mae"], # Should broadcast to both outputs. + weighted_metrics=["accuracy"], + ) # Should broadcast to both outputs. + + y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] + y_p = [tf.ones((10, 1)), 2 * tf.ones((10, 1))] + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + metric_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metric_container.metrics, 6) + + mse_metric = metric_container.metrics[0] + self.assertEqual(mse_metric.name, "output_1_mse") + self.assertEqual(mse_metric.result().numpy(), 0.0) + + mse_metric = metric_container.metrics[1] + self.assertEqual(mse_metric.name, "output_1_mae") + self.assertEqual(mse_metric.result().numpy(), 0.0) + + acc_metric_1 = metric_container.metrics[2] + self.assertEqual(acc_metric_1.name, "output_1_accuracy") + self.assertEqual(acc_metric_1.result().numpy(), 1.0) + self.assertEqual(acc_metric_1._fn, metrics_mod.binary_accuracy) + + mae_metric = metric_container.metrics[3] + self.assertEqual(mae_metric.name, "output_2_mse") + self.assertEqual(mae_metric.result().numpy(), 4.0) + + mae_metric = metric_container.metrics[4] + self.assertEqual(mae_metric.name, "output_2_mae") + self.assertEqual(mae_metric.result().numpy(), 2.0) + + acc_metric_2 = metric_container.metrics[5] + self.assertEqual(acc_metric_2.name, "output_2_accuracy") + self.assertEqual(acc_metric_2.result().numpy(), 0.0) + self.assertEqual(acc_metric_2._fn, metrics_mod.binary_accuracy) + + weighted_metrics = metric_container.weighted_metrics + self.assertLen(weighted_metrics, 2) + self.assertEqual(weighted_metrics[0].name, "output_1_accuracy") + self.assertEqual(weighted_metrics[1].name, "output_2_accuracy") + + unweighted_metrics = metric_container.unweighted_metrics + self.assertLen(unweighted_metrics, 4) + self.assertEqual(unweighted_metrics[0].name, "output_1_mse") + self.assertEqual(unweighted_metrics[1].name, "output_1_mae") + self.assertEqual(unweighted_metrics[2].name, "output_2_mse") + self.assertEqual(unweighted_metrics[3].name, "output_2_mae") + + def test_metric_dict(self): + metric_container = compile_utils.MetricsContainer( + metrics={"out1": "mse", "out2": "mae"}, + weighted_metrics={"out1": "mse", "out2": "mae"}, + ) + + y_t = {"out1": tf.ones((10, 1)), "out2": tf.zeros((10, 1))} + y_p = {"out1": tf.ones((10, 1)), "out2": 2 * tf.ones((10, 1))} + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + metric_container.update_state(y_t, y_p, sample_weight=sw) + + mse_metric = metric_container.metrics[0] + self.assertEqual(mse_metric.name, "out1_mse") + self.assertEqual(mse_metric.result().numpy(), 0.0) + + weighted_mse_metric = metric_container.metrics[1] + self.assertEqual(weighted_mse_metric.name, "out1_weighted_mse") + self.assertEqual(weighted_mse_metric.result().numpy(), 0.0) + + mae_metric = metric_container.metrics[2] + self.assertEqual(mae_metric.name, "out2_mae") + self.assertEqual(mae_metric.result().numpy(), 2.0) + + weighted_mae_metric = metric_container.metrics[3] + self.assertEqual(weighted_mae_metric.name, "out2_weighted_mae") + self.assertEqual(weighted_mae_metric.result().numpy(), 2.0) + + metric_container.reset_state() + self.assertEqual(mse_metric.result().numpy(), 0.0) + self.assertEqual(weighted_mse_metric.result().numpy(), 0.0) + self.assertEqual(mae_metric.result().numpy(), 0.0) + self.assertEqual(weighted_mae_metric.result().numpy(), 0.0) + + def test_metric_partial_dict_with_output_names(self): + metric_container = compile_utils.MetricsContainer( + {"out2": "mae"}, output_names=["out1", "out2"] + ) + + y_t = [tf.ones((10, 1)), tf.zeros((10, 1))] + y_p = [tf.ones((10, 1)), tf.ones((10, 1))] + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + metric_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metric_container.metrics, 1) + + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.name, "out2_mae") + self.assertEqual(mae_metric.result().numpy(), 1.0) + + def test_metric_partial_dict_with_nones(self): + metric_container = compile_utils.MetricsContainer( + {"out1": None, "out2": "mae"} + ) + + y_t = {"out1": tf.ones((10, 1)), "out2": tf.zeros((10, 1))} + y_p = {"out1": tf.ones((10, 1)), "out2": tf.ones((10, 1))} + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + metric_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metric_container.metrics, 1) + + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.name, "out2_mae") + self.assertEqual(mae_metric.result().numpy(), 1.0) + + def test_nested_structure(self): + metric_container = compile_utils.MetricsContainer( + metrics={"b": ["mse", None], "a": "mae"}, + weighted_metrics={"b": [None, None], "a": "mse"}, + ) + + y_t = { + "b": [2 * tf.ones((10, 1)), tf.zeros((10, 1))], + "a": tf.zeros((10, 1)), + } + y_p = { + "b": [tf.zeros((10, 1)), tf.zeros((10, 1))], + "a": tf.ones((10, 1)), + } + sw = tf.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) + + metric_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metric_container.metrics, 3) + + a_mae_metric = metric_container.metrics[0] + self.assertEqual(a_mae_metric.name, "a_mae") + self.assertEqual(a_mae_metric.result().numpy(), 1.0) + + weighted_a_mae_metric = metric_container.metrics[1] + self.assertEqual(weighted_a_mae_metric.name, "a_mse") + self.assertEqual(weighted_a_mae_metric.result().numpy(), 1.0) + + b_1_mse_metric = metric_container.metrics[2] + self.assertEqual(b_1_mse_metric.name, "b_1_mse") + self.assertEqual(b_1_mse_metric.result().numpy(), 4.0) + + def test_no_input_mutation(self): + metric = {"a": "mae"} + metric_container = compile_utils.MetricsContainer(metric) + + y_t = {"a": tf.zeros((10, 1))} + y_p = {"a": tf.ones((10, 1)), "b": tf.zeros((10, 1))} + + metric_container.update_state(y_t, y_p) + self.assertLen(metric, 1) + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.result().numpy(), 1.0) + + def test_crossentropy(self): + metric_container = compile_utils.MetricsContainer("crossentropy") + y_t, y_p = tf.ones((10, 1)), tf.ones((10, 1)) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, metrics_mod.binary_crossentropy + ) + + metric_container = compile_utils.MetricsContainer("crossentropy") + y_t, y_p = tf.ones((10, 1)), tf.ones((10, 20)) + self.assertEqual(y_p.shape.as_list()[-1], 20) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, + metrics_mod.sparse_categorical_crossentropy, + ) + + metric_container = compile_utils.MetricsContainer("crossentropy") + y_t, y_p = tf.ones((10, 20)), tf.ones((10, 20)) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, + metrics_mod.categorical_crossentropy, + ) + + def test_accuracy(self): + metric_container = compile_utils.MetricsContainer("accuracy") + y_t, y_p = tf.ones((10, 1)), tf.ones((10, 1)) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, metrics_mod.binary_accuracy + ) + + metric_container = compile_utils.MetricsContainer("Accuracy") + y_t, y_p = tf.ones((10, 1)), tf.ones((10, 1)) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, metrics_mod.binary_accuracy + ) + + metric_container = compile_utils.MetricsContainer("accuracy") + y_t, y_p = tf.ones((10, 1)), tf.ones((10, 20)) + self.assertEqual(y_p.shape.as_list()[-1], 20) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, + metrics_mod.sparse_categorical_accuracy, + ) + + metric_container = compile_utils.MetricsContainer("accuracy") + y_t, y_p = tf.ones((10, 20)), tf.ones((10, 20)) + metric_container.update_state(y_t, y_p) + self.assertEqual( + metric_container.metrics[0]._fn, metrics_mod.categorical_accuracy + ) + + def test_metric_weighting(self): + metric_container = compile_utils.MetricsContainer( + metrics=["mae"], weighted_metrics=["mae"] + ) + + y_t = tf.convert_to_tensor([[0], [3], [0]]) + y_p = tf.convert_to_tensor([[0], [0], [0]]) + sw = tf.convert_to_tensor([[1], [0], [1]]) + + metric_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metric_container.metrics, 2) + + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.name, "mae") + self.assertEqual(mae_metric.result().numpy(), 1.0) + + weighted_mae_metric = metric_container.metrics[1] + self.assertEqual(weighted_mae_metric.name, "weighted_mae") + self.assertEqual(weighted_mae_metric.result().numpy(), 0.0) + + def test_broadcast_metrics_to_dict(self): + metric_container = compile_utils.MetricsContainer(metrics=["mae"]) + + y_p = {"output": tf.convert_to_tensor([[0], [1], [2]])} + y_t = {"output": tf.convert_to_tensor([[1], [2], [3]])} + metric_container.update_state(y_t, y_p) + + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.name, "mae") + self.assertEqual(mae_metric.result().numpy(), 1.0) + + def test_broadcast_metrics_to_dict_with_output_names(self): + metric_container = compile_utils.MetricsContainer( + metrics=["mae"], output_names=["output"] + ) + + y_p = tf.convert_to_tensor([[0], [1], [2]]) + y_t = {"output": tf.convert_to_tensor([[1], [2], [3]])} + metric_container.update_state(y_t, y_p) + + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.name, "mae") + self.assertEqual(mae_metric.result().numpy(), 1.0) + + def test_missing_label_with_no_metrics(self): + # It's ok to exclude a label if that label has no + # losses or metrics associated with it. + metric_container = compile_utils.MetricsContainer( + metrics={"output1": "mae", "output3": "mse"} + ) + + y_p = { + "output1": tf.convert_to_tensor([[0], [1], [2]]), + "output2": tf.convert_to_tensor([[3], [4], [5]]), + "output3": tf.convert_to_tensor([[6], [7], [8]]), + } + y_t = { + "output1": tf.convert_to_tensor([[1], [2], [3]]), + "output3": tf.convert_to_tensor([[4], [5], [6]]), + } + + metric_container.update_state(y_t, y_p) + self.assertLen(metric_container.metrics, 2) + + mae_metric = metric_container.metrics[0] + self.assertEqual(mae_metric.name, "output1_mae") + self.assertEqual(mae_metric.result().numpy(), 1.0) + + mse_metric = metric_container.metrics[1] + self.assertEqual(mse_metric.name, "output3_mse") + self.assertEqual(mse_metric.result().numpy(), 4.0) + + def test_metrics_masking(self): + metrics_container = compile_utils.MetricsContainer( + metrics=["mae"], weighted_metrics=["mse"] + ) + y_p = tf.constant([[[1], [1]], [[0], [0]]], dtype=tf.float32) + y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) + y_p._keras_mask = tf.constant([[1, 1], [0, 0]], dtype=tf.float32) + + metrics_container.update_state(y_t, y_p) + self.assertLen(metrics_container.metrics, 2) + + mae_metric = metrics_container.metrics[0] + self.assertEqual(mae_metric.name, "mae") + self.assertAlmostEqual(mae_metric.result().numpy(), 0) + + weighted_mae_metric = metrics_container.metrics[1] + self.assertEqual(weighted_mae_metric.name, "mse") + self.assertAlmostEqual(weighted_mae_metric.result().numpy(), 0) + + def test_metrics_sample_weight(self): + metrics_container = compile_utils.MetricsContainer( + metrics=["mae"], weighted_metrics=["mse"] + ) + y_p = tf.constant([[[1], [1]], [[0], [1]]], dtype=tf.float32) + y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) + sw = tf.constant([[0.2, 0.3], [0.5, 0]], dtype=tf.float32) + + metrics_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metrics_container.metrics, 2) + + mae_metric = metrics_container.metrics[0] + self.assertEqual(mae_metric.name, "mae") + self.assertAlmostEqual(mae_metric.result().numpy(), 0.25) # 1 / 4 + + weighted_mae_metric = metrics_container.metrics[1] + self.assertEqual(weighted_mae_metric.name, "mse") + self.assertAlmostEqual( + weighted_mae_metric.result().numpy(), 0.5 + ) # .5 / 1 + + def test_metrics_masking_sample_weight(self): + metrics_container = compile_utils.MetricsContainer( + metrics=["mae"], weighted_metrics=["mse"] + ) + y_p = tf.constant([[[1], [1]], [[0], [1]]], dtype=tf.float32) + y_t = tf.constant([[[1], [1]], [[1], [1]]], dtype=tf.float32) + sw = tf.constant([[0.3, 0.2], [0.2, 0.3]], dtype=tf.float32) + y_p._keras_mask = tf.constant([[1, 0], [1, 0]], dtype=tf.float32) + + metrics_container.update_state(y_t, y_p, sample_weight=sw) + self.assertLen(metrics_container.metrics, 2) + + mae_metric = metrics_container.metrics[0] + self.assertEqual(mae_metric.name, "mae") + self.assertAlmostEqual(mae_metric.result().numpy(), 0.5) # 1 / .5 + + weighted_mae_metric = metrics_container.metrics[1] + self.assertEqual(weighted_mae_metric.name, "mse") + self.assertAlmostEqual(weighted_mae_metric.result().numpy(), 0.2 / 0.5) + + def test_loss_class_as_metric_with_distribution(self): + distribution = tf.distribute.OneDeviceStrategy("/device:CPU:0") + with distribution.scope(): + metric_container = compile_utils.MetricsContainer( + losses_mod.MeanSquaredError() + ) + y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) + metric_container.update_state(y_t, y_p) + + self.assertLen(metric_container.metrics, 1) + metric = metric_container.metrics[0] + self.assertEqual(metric.name, "mean_squared_error") + self.assertEqual(metric.result().numpy(), 1.0) + + def test_custom_metric_callables(self): + def custom_metric_fn(y_true, y_pred): + return tf.reduce_sum(y_true - y_pred) + + class CustomMetricClass: + def __call__(self, y_true, y_pred): + return tf.reduce_sum(y_true - y_pred) + + metric_container = compile_utils.MetricsContainer( + [custom_metric_fn, CustomMetricClass()] + ) + y_t, y_p = tf.ones((10, 5)), tf.zeros((10, 5)) + metric_container.update_state(y_t, y_p) + + self.assertEqual(metric_container.metrics[0].name, "custom_metric_fn") + self.assertEqual( + metric_container.metrics[1].name, "custom_metric_class" + ) + + def test_reset_state_existing_metric_before_built(self): + metric = metrics_mod.Mean() + metric.update_state([2.0, 4.0]) + self.assertEqual(metric.result().numpy(), 3.0) + + metric_container = compile_utils.MetricsContainer(metric) + metric_container.reset_state() + self.assertEqual(metric.result().numpy(), 0.0) + + def test_duplicated_metric_instance(self): + mean_obj = metrics_mod.Mean() + metric = mean_obj + with self.assertRaisesRegex(ValueError, "Found duplicated metrics"): + compile_utils.MetricsContainer( + metrics=metric, weighted_metrics=metric + ) + + # duplicated string should be fine + metric = "acc" + compile_utils.MetricsContainer(metrics=metric, weighted_metrics=metric) + + # complicated structure + metric = [mean_obj, "acc"] + weighted_metric = {"output1": mean_obj, "output2": "acc"} + with self.assertRaisesRegex(ValueError, "Found duplicated metrics"): + compile_utils.MetricsContainer( + metrics=metric, weighted_metrics=weighted_metric + ) + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/engine/control_flow_test.py b/keras/engine/control_flow_test.py index 6ac7586b03e0..161e05d24960 100644 --- a/keras/engine/control_flow_test.py +++ b/keras/engine/control_flow_test.py @@ -14,117 +14,118 @@ # ============================================================================== """Tests for dynamic control flow behavior with Keras.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras +from keras.engine import base_layer +from keras.optimizers.legacy import rmsprop from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.engine import base_layer -from keras.optimizers.optimizer_v2 import rmsprop class ControlFlowLayer1(base_layer.Layer): - """Layer with an `if` condition in call.""" + """Layer with an `if` condition in call.""" - def call(self, inputs): - if tf.reduce_sum(inputs) > 0: - return tf.sqrt(inputs) - else: - return tf.square(inputs) + def call(self, inputs): + if tf.reduce_sum(inputs) > 0: + return tf.sqrt(inputs) + else: + return tf.square(inputs) class ControlFlowLayer2(base_layer.Layer): - """Layer with a `for` loop in call.""" + """Layer with a `for` loop in call.""" - def call(self, inputs): - samples = tf.TensorArray( - dtype=tf.float32, size=tf.shape(inputs)[0]) - i = 0 - for sample in inputs: - samples = samples.write(i, tf.square(sample)) - i += 1 - return samples.stack() + def call(self, inputs): + samples = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0]) + i = 0 + for sample in inputs: + samples = samples.write(i, tf.square(sample)) + i += 1 + return samples.stack() class NestedControlFlowLayer(base_layer.Layer): - """Layer nested with a control flow layer.""" + """Layer nested with a control flow layer.""" - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.layer = ControlFlowLayer1() + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.layer = ControlFlowLayer1() - def call(self, inputs): - return self.layer(inputs) + def call(self, inputs): + return self.layer(inputs) class ControlFlowModel(keras.Model): - """Model with an `if` condition in call.""" + """Model with an `if` condition in call.""" - def call(self, inputs): - if tf.reduce_sum(inputs) > 0: - return tf.sqrt(inputs) - else: - return tf.square(inputs) + def call(self, inputs): + if tf.reduce_sum(inputs) > 0: + return tf.sqrt(inputs) + else: + return tf.square(inputs) class NestedControlFlowModel(keras.Model): - """Model with an `if` condition in call using a control flow layer.""" + """Model with an `if` condition in call using a control flow layer.""" - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.layer = NestedControlFlowLayer() + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.layer = NestedControlFlowLayer() - def call(self, inputs): - inputs = self.layer(inputs) - if tf.reduce_sum(inputs) > 0: - return tf.sqrt(inputs) - else: - return tf.square(inputs) + def call(self, inputs): + inputs = self.layer(inputs) + if tf.reduce_sum(inputs) > 0: + return tf.sqrt(inputs) + else: + return tf.square(inputs) class FunctionControlFlowModel(keras.Model): - """Model with control flow where `call` is wrapped in function already.""" + """Model with control flow where `call` is wrapped in function already.""" - @tf.function - def call(self, inputs): - if tf.reduce_sum(inputs) > 0: - return tf.sqrt(inputs) - else: - return tf.square(inputs) + @tf.function + def call(self, inputs): + if tf.reduce_sum(inputs) > 0: + return tf.sqrt(inputs) + else: + return tf.square(inputs) @test_combinations.run_all_keras_modes class AutographWrapperTest(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @parameterized.named_parameters(('with_if', ControlFlowLayer1), - ('with_for', ControlFlowLayer2), - ('nested', NestedControlFlowLayer)) - def test_control_flow_layer(self, layer_class): - model = test_utils.get_model_from_layers([layer_class()], - input_shape=(3,)) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - - @parameterized.named_parameters( - ('with_if', ControlFlowModel), ('nested', NestedControlFlowModel), - ('wrapped_in_function', FunctionControlFlowModel)) - def test_control_flow_model(self, model_class): - model = model_class() - model.compile(rmsprop.RMSprop(0.001), loss='mse') - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - - def test_control_flow_in_deferred_sequential_model(self): - model = keras.Sequential( - [ControlFlowLayer1(), - keras.layers.Dense(3), - ControlFlowLayer2()]) - model.compile(rmsprop.RMSprop(0.001), loss='mse') - model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) - - -if __name__ == '__main__': - tf.test.main() + @test_combinations.run_with_all_model_types + @parameterized.named_parameters( + ("with_if", ControlFlowLayer1), + ("with_for", ControlFlowLayer2), + ("nested", NestedControlFlowLayer), + ) + def test_control_flow_layer(self, layer_class): + model = test_utils.get_model_from_layers( + [layer_class()], input_shape=(3,) + ) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + + @parameterized.named_parameters( + ("with_if", ControlFlowModel), + ("nested", NestedControlFlowModel), + ("wrapped_in_function", FunctionControlFlowModel), + ) + def test_control_flow_model(self, model_class): + model = model_class() + model.compile(rmsprop.RMSprop(0.001), loss="mse") + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + + def test_control_flow_in_deferred_sequential_model(self): + model = keras.Sequential( + [ControlFlowLayer1(), keras.layers.Dense(3), ControlFlowLayer2()] + ) + model.compile(rmsprop.RMSprop(0.001), loss="mse") + model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3))) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/correctness_test.py b/keras/engine/correctness_test.py index dd66f556e507..6b16e247cea9 100644 --- a/keras/engine/correctness_test.py +++ b/keras/engine/correctness_test.py @@ -14,10 +14,9 @@ # ============================================================================== """Tests for numerical correctness.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras from keras.testing_infra import test_combinations @@ -25,117 +24,118 @@ class MultiInputSubclassed(keras.Model): - """Subclassed Model that adds its inputs and then adds a bias.""" + """Subclassed Model that adds its inputs and then adds a bias.""" - def __init__(self): - super().__init__() - self.add = keras.layers.Add() - self.bias = test_utils.Bias() + def __init__(self): + super().__init__() + self.add = keras.layers.Add() + self.bias = test_utils.Bias() - def call(self, inputs): - added = self.add(inputs) - return self.bias(added) + def call(self, inputs): + added = self.add(inputs) + return self.bias(added) def multi_input_functional(): - """Functional Model that adds its inputs and then adds a bias.""" - input_1 = keras.Input(shape=(1,)) - input_2 = keras.Input(shape=(1,)) - input_3 = keras.Input(shape=(1,)) - added = keras.layers.Add()([input_1, input_2, input_3]) - output = test_utils.Bias()(added) - return keras.Model([input_1, input_2, input_3], output) + """Functional Model that adds its inputs and then adds a bias.""" + input_1 = keras.Input(shape=(1,)) + input_2 = keras.Input(shape=(1,)) + input_3 = keras.Input(shape=(1,)) + added = keras.layers.Add()([input_1, input_2, input_3]) + output = test_utils.Bias()(added) + return keras.Model([input_1, input_2, input_3], output) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class SimpleBiasTest(test_combinations.TestCase): + def _get_simple_bias_model(self): + model = test_utils.get_model_from_layers( + [test_utils.Bias()], input_shape=(1,) + ) + model.compile( + keras.optimizers.legacy.gradient_descent.SGD(0.1), + "mae", + run_eagerly=test_utils.should_run_eagerly(), + ) + return model - def _get_simple_bias_model(self): - model = test_utils.get_model_from_layers([test_utils.Bias()], - input_shape=(1,)) - model.compile( - keras.optimizers.optimizer_v2.gradient_descent.SGD(0.1), - 'mae', - run_eagerly=test_utils.should_run_eagerly()) - return model + def test_simple_bias_fit(self): + x = np.array([[0.0], [1.0], [2.0]]) + y = np.array([[0.5], [2.0], [3.5]]) + model = self._get_simple_bias_model() - def test_simple_bias_fit(self): - x = np.array([[0.], [1.], [2.]]) - y = np.array([[0.5], [2.], [3.5]]) - model = self._get_simple_bias_model() + history = model.fit(x, y, batch_size=3, epochs=5) + self.assertAllClose(history.history["loss"], [1.0, 0.9, 0.8, 0.7, 0.6]) - history = model.fit(x, y, batch_size=3, epochs=5) - self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6]) + def test_simple_bias_evaluate(self): + x = np.array([[0.0], [1.0], [2.0]]) + y = np.array([[1.0], [3.0], [5.0]]) + model = self._get_simple_bias_model() - def test_simple_bias_evaluate(self): - x = np.array([[0.], [1.], [2.]]) - y = np.array([[1.], [3.], [5.]]) - model = self._get_simple_bias_model() + loss = model.evaluate(x, y, batch_size=1) + self.assertAlmostEqual(loss, 2.0) - loss = model.evaluate(x, y, batch_size=1) - self.assertAlmostEqual(loss, 2.) + def test_simple_bias_predict(self): + x = np.array([[0.0], [1.0], [2.0]]) + model = self._get_simple_bias_model() - def test_simple_bias_predict(self): - x = np.array([[0.], [1.], [2.]]) - model = self._get_simple_bias_model() - - pred = model.predict(x, batch_size=1) - self.assertAllClose(x, pred) + pred = model.predict(x, batch_size=1) + self.assertAllClose(x, pred) @test_combinations.run_all_keras_modes class MultipleInputTest(test_combinations.TestCase): - - def _get_multiple_input_model(self, subclassed=True): - if subclassed: - model = MultiInputSubclassed() - else: - model = multi_input_functional() - model.compile( - keras.optimizers.optimizer_v2.gradient_descent.SGD(0.1), - 'mae', - run_eagerly=test_utils.should_run_eagerly()) - return model - - @parameterized.named_parameters(('subclassed', True), ('functional', False)) - def test_multiple_input_fit(self, subclassed): - x = [ - np.array([[1.], [2.], [3.]]), - np.array([[4.], [5.], [6.]]), - np.array([[7.], [8.], [9.]]) - ] - y = np.array([[12.5], [16.], [19.5]]) - - model = self._get_multiple_input_model(subclassed) - history = model.fit(x, y, batch_size=3, epochs=5) - self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6]) - - @parameterized.named_parameters(('subclassed', True), ('functional', False)) - def test_multiple_input_evaluate(self, subclassed): - x = [ - np.array([[1.], [2.], [3.]]), - np.array([[4.], [5.], [6.]]), - np.array([[7.], [8.], [9.]]) - ] - y = np.array([[13.], [17.], [21.]]) - - model = self._get_multiple_input_model(subclassed) - loss = model.evaluate(x, y, batch_size=3) - self.assertAlmostEqual(loss, 2.) - - @parameterized.named_parameters(('subclassed', True), ('functional', False)) - def test_multiple_input_predict(self, subclassed): - x = [ - np.array([[1.], [2.], [3.]]), - np.array([[4.], [5.], [6.]]), - np.array([[7.], [8.], [9.]]) - ] - - model = self._get_multiple_input_model(subclassed) - pred = model.predict(x, batch_size=1) - self.assertAllClose(pred, [[12.], [15.], [18.]]) - - -if __name__ == '__main__': - tf.test.main() + def _get_multiple_input_model(self, subclassed=True): + if subclassed: + model = MultiInputSubclassed() + else: + model = multi_input_functional() + model.compile( + keras.optimizers.legacy.gradient_descent.SGD(0.1), + "mae", + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + @parameterized.named_parameters(("subclassed", True), ("functional", False)) + def test_multiple_input_fit(self, subclassed): + x = [ + np.array([[1.0], [2.0], [3.0]]), + np.array([[4.0], [5.0], [6.0]]), + np.array([[7.0], [8.0], [9.0]]), + ] + y = np.array([[12.5], [16.0], [19.5]]) + + model = self._get_multiple_input_model(subclassed) + history = model.fit(x, y, batch_size=3, epochs=5) + self.assertAllClose(history.history["loss"], [1.0, 0.9, 0.8, 0.7, 0.6]) + + @parameterized.named_parameters(("subclassed", True), ("functional", False)) + def test_multiple_input_evaluate(self, subclassed): + x = [ + np.array([[1.0], [2.0], [3.0]]), + np.array([[4.0], [5.0], [6.0]]), + np.array([[7.0], [8.0], [9.0]]), + ] + y = np.array([[13.0], [17.0], [21.0]]) + + model = self._get_multiple_input_model(subclassed) + loss = model.evaluate(x, y, batch_size=3) + self.assertAlmostEqual(loss, 2.0) + + @parameterized.named_parameters(("subclassed", True), ("functional", False)) + def test_multiple_input_predict(self, subclassed): + x = [ + np.array([[1.0], [2.0], [3.0]]), + np.array([[4.0], [5.0], [6.0]]), + np.array([[7.0], [8.0], [9.0]]), + ] + + model = self._get_multiple_input_model(subclassed) + pred = model.predict(x, batch_size=1) + self.assertAllClose(pred, [[12.0], [15.0], [18.0]]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/data_adapter.py b/keras/engine/data_adapter.py index 00f8c41e4ab9..517684e75590 100644 --- a/keras/engine/data_adapter.py +++ b/keras/engine/data_adapter.py @@ -14,8 +14,6 @@ # ============================================================================== """Adapter module that convert different input data objects into tf.dataset.""" -import tensorflow.compat.v2 as tf - import abc import contextlib import functools @@ -24,1711 +22,1985 @@ import random import numpy as np -from tensorflow.python.eager import context +import tensorflow.compat.v2 as tf + from keras import backend +from keras.distribute import distributed_training_utils from keras.engine import training_utils from keras.utils import data_utils from keras.utils import dataset_creator from keras.utils import tf_utils -from tensorflow.python.distribute.input_lib import DistributedDataset + +# isort: off +from tensorflow.python.distribute.input_lib import ( + DistributedDataset, +) +from tensorflow.python.eager import context from tensorflow.python.framework import type_spec from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export +from tensorflow.python.data.ops import ( + from_sparse_tensor_slices_op, +) +from tensorflow.python.data.ops import from_generator_op +from tensorflow.python.data.ops import range_op +from tensorflow.python.data.ops import from_tensors_op +from tensorflow.python.data.ops import from_tensor_slices_op try: - import pandas as pd # pylint: disable=g-import-not-at-top + import pandas as pd except ImportError: - pd = None + pd = None keras_data_adapter_gauge = tf.__internal__.monitoring.BoolGauge( - "/tensorflow/api/keras/data_adapters", "keras data adapter usage", "method") + "/tensorflow/api/keras/data_adapters", "keras data adapter usage", "method" +) class DataAdapter(object, metaclass=abc.ABCMeta): - """Base class for input data adapter. - - In TF 2.0, tf.data is the preferred API for user to feed in data. In order - to simplify the training code path, all the input data object will be - converted to `tf.data.Dataset` if possible. - - Note that since this class is mainly targeted for TF 2.0, it might have a lot - of assumptions under the hood, e.g. eager context by default, distribution - strategy, etc. In the meantime, some legacy feature support might be dropped, - eg, Iterator from dataset API in v1, etc. - - The sample usage of this class is like: - - ``` - x = tf.data.Dataset.range(100) - adapter_cls = [NumpyArrayDataAdapter, ..., DatasetAdapter] - applicable_adapters = [cls for cls in adapter_cls if cls.can_handle(x)] - if len(applicable_adapters) != 1: - raise ValueError("Expect only one adapter class to handle the input") - - dataset = applicable_adapters[0](x).get_dataset() - for data in dataset: - # training - ``` - """ - - @staticmethod - def can_handle(x, y=None): - """Whether the current DataAdapter could handle the input x and y. - - Structure wise, x and y can be single object, or list of objects if there - multiple input/output, or dictionary of objects when the input/output are - named. - - Args: - x: input features. - y: target labels. Note that y could be None in the case of prediction. - - Returns: - boolean - """ - raise NotImplementedError - - @abc.abstractmethod - def __init__(self, x, y=None, **kwargs): - """Create a DataAdapter based on data inputs. - - The caller must make sure to call `can_handle()` first before invoking this - method. Provide unsupported data type will result into unexpected behavior. - - Args: - x: input features. - y: target labels. Note that y could be None in the case of prediction. - **kwargs: Other keyword arguments for DataAdapter during the construction - of the tf.dataset.Dataset. For example: - - Numpy data might have `sample_weights` which will be used for - weighting the loss function during training. - - Numpy data might need to have `batch_size` parameter when constructing - the dataset and iterator. - - Certain input might need to be distribution strategy aware. When - `distribution_strategy` is passed, the created dataset need to respect - the strategy. - DataAdapter might choose to ignore any keyword argument if it doesn't - use it, or raise exception if any required argument is not provided. - """ - if not self.can_handle(x, y): - raise ValueError("{} Cannot handle input {}, {}".format( - self.__class__, x, y)) - - @abc.abstractmethod - def get_dataset(self): - """Get a dataset instance for the current DataAdapter. - - Note that the dataset returned does not repeat for epoch, so caller might - need to create new iterator for the same dataset at the beginning of the - epoch. This behavior might change in the future. - - Returns: - A `tf.data.Dataset`. Caller might use the dataset in different - context, e.g. iter(dataset) in eager to get the value directly, or in - graph mode, provide the iterator tensor to Keras model function. - """ - raise NotImplementedError - - @abc.abstractmethod - def get_size(self): - """Return the size (number of batches) for the dataset created. - - For certain type of the data input, the number of batches is known, eg for - Numpy data, the size is same as (number_of_element / batch_size). Whereas - for dataset or python generator, the size is unknown since it may or may not - have an end state. - - Returns: - int, the number of batches for the dataset, or None if it is unknown. The - caller could use this to control the loop of training, show progress bar, - or handle unexpected StopIteration error. - """ - raise NotImplementedError - - @abc.abstractmethod - def batch_size(self): - """Return the batch size of the dataset created. - - For certain type of the data input, the batch size is known, and even - required, like numpy array. Whereas for dataset, the batch is unknown - unless we take a peek. - - Returns: - int, the batch size of the dataset, or None if it is unknown. - """ - raise NotImplementedError - - def representative_batch_size(self): - """Return a representative size for batches in the dataset. - - This is not guaranteed to be the batch size for all batches in the - dataset. It just needs to be a rough approximation for batch sizes in - the dataset. - - Returns: - int, a representative size for batches found in the dataset, - or None if it is unknown. - """ - return self.batch_size() - - @abc.abstractmethod - def has_partial_batch(self): - """Whether the dataset has partial batch at the end.""" - raise NotImplementedError - - @abc.abstractmethod - def partial_batch_size(self): - """The size of the final partial batch for dataset. - - Will return None if has_partial_batch is False or batch_size is None. + """Base class for input data adapter. + + In TF 2.0, tf.data is the preferred API for user to feed in data. In order + to simplify the training code path, all the input data object will be + converted to `tf.data.Dataset` if possible. + + Note that since this class is mainly targeted for TF 2.0, it might have a + lot of assumptions under the hood, e.g. eager context by default, + distribution strategy, etc. In the meantime, some legacy feature support + might be dropped, eg, Iterator from dataset API in v1, etc. + + The sample usage of this class is like: + + ``` + x = tf.data.Dataset.range(100) + adapter_cls = [NumpyArrayDataAdapter, ..., DatasetAdapter] + applicable_adapters = [cls for cls in adapter_cls if cls.can_handle(x)] + if len(applicable_adapters) != 1: + raise ValueError("Expect only one adapter class to handle the input") + + dataset = applicable_adapters[0](x).get_dataset() + for data in dataset: + # training + ``` """ - raise NotImplementedError - @abc.abstractmethod - def should_recreate_iterator(self): - """Returns whether a new iterator should be created every epoch.""" - raise NotImplementedError - - def get_samples(self): - """Returns number of samples in the data, or `None`.""" - if not self.get_size() or not self.batch_size(): - return None - total_sample = self.get_size() * self.batch_size() - if self.has_partial_batch(): - total_sample -= (self.batch_size() - self.partial_batch_size()) - return total_sample - - def on_epoch_end(self): - """A hook called after each epoch.""" - pass + @staticmethod + def can_handle(x, y=None): + """Whether the current DataAdapter could handle the input x and y. + + Structure wise, x and y can be single object, or list of objects if + there multiple input/output, or dictionary of objects when the + input/output are named. + + Args: + x: input features. + y: target labels. Note that y could be None in the case of prediction. + + Returns: + boolean + """ + raise NotImplementedError + + @abc.abstractmethod + def __init__(self, x, y=None, **kwargs): + """Create a DataAdapter based on data inputs. + + The caller must make sure to call `can_handle()` first before invoking + this method. Provide unsupported data type will result into unexpected + behavior. + + Args: + x: input features. + y: target labels. Note that y could be None in the case of prediction. + **kwargs: Other keyword arguments for DataAdapter during the + construction of the tf.dataset.Dataset. For example: + - Numpy data might have `sample_weights` which will be used for + weighting the loss function during training. + - Numpy data might need to have `batch_size` parameter when + constructing the dataset and iterator. + - Certain input might need to be distribution strategy aware. When + `distribution_strategy` is passed, the created dataset need to + respect the strategy. + DataAdapter might choose to ignore any keyword argument if it + doesn't use it, or raise exception if any required argument is not + provided. + """ + if not self.can_handle(x, y): + raise ValueError(f"{self.__class__} Cannot handle input {x}, {y}") + + @abc.abstractmethod + def get_dataset(self): + """Get a dataset instance for the current DataAdapter. + + Note that the dataset returned does not repeat for epoch, so caller + might need to create new iterator for the same dataset at the beginning + of the epoch. This behavior might change in the future. + + Returns: + A `tf.data.Dataset`. Caller might use the dataset in different + context, e.g. iter(dataset) in eager to get the value directly, or in + graph mode, provide the iterator tensor to Keras model function. + """ + raise NotImplementedError + + @abc.abstractmethod + def get_size(self): + """Return the size (number of batches) for the dataset created. + + For certain type of the data input, the number of batches is known, eg + for Numpy data, the size is same as (number_of_element / batch_size). + Whereas for dataset or python generator, the size is unknown since it + may or may not have an end state. + + Returns: + int, the number of batches for the dataset, or None if it is unknown. + The caller could use this to control the loop of training, show + progress bar, or handle unexpected StopIteration error. + """ + raise NotImplementedError + + @abc.abstractmethod + def batch_size(self): + """Return the batch size of the dataset created. + + For certain type of the data input, the batch size is known, and even + required, like numpy array. Whereas for dataset, the batch is unknown + unless we take a peek. + + Returns: + int, the batch size of the dataset, or None if it is unknown. + """ + raise NotImplementedError + + def representative_batch_size(self): + """Return a representative size for batches in the dataset. + + This is not guaranteed to be the batch size for all batches in the + dataset. It just needs to be a rough approximation for batch sizes in + the dataset. + + Returns: + int, a representative size for batches found in the dataset, + or None if it is unknown. + """ + return self.batch_size() + + @abc.abstractmethod + def has_partial_batch(self): + """Whether the dataset has partial batch at the end.""" + raise NotImplementedError + + @abc.abstractmethod + def partial_batch_size(self): + """The size of the final partial batch for dataset. + + Will return None if has_partial_batch is False or batch_size is None. + """ + raise NotImplementedError + + @abc.abstractmethod + def should_recreate_iterator(self): + """Returns whether a new iterator should be created every epoch.""" + raise NotImplementedError + + def get_samples(self): + """Returns number of samples in the data, or `None`.""" + if not self.get_size() or not self.batch_size(): + return None + total_sample = self.get_size() * self.batch_size() + if self.has_partial_batch(): + total_sample -= self.batch_size() - self.partial_batch_size() + return total_sample + + def on_epoch_end(self): + """A hook called after each epoch.""" + pass class TensorLikeDataAdapter(DataAdapter): - """Adapter that handles Tensor-like objects, e.g. EagerTensor and NumPy.""" - - @staticmethod - def can_handle(x, y=None): - # TODO(kaftan): Check performance implications of using a flatten - # here for other types of inputs. - flat_inputs = tf.nest.flatten(x) - if y is not None: - flat_inputs += tf.nest.flatten(y) - - tensor_types = _get_tensor_types() - - def _is_tensor(v): - if isinstance(v, tensor_types): - return True - return False - - return all(_is_tensor(v) for v in flat_inputs) - - def __init__(self, - x, - y=None, - sample_weights=None, - sample_weight_modes=None, - batch_size=None, - epochs=1, - steps=None, - shuffle=False, - **kwargs): - super().__init__(x, y, **kwargs) - x, y, sample_weights = _process_tensorlike((x, y, sample_weights)) - sample_weight_modes = broadcast_sample_weight_modes( - sample_weights, sample_weight_modes) - - # If sample_weights are not specified for an output use 1.0 as weights. - (sample_weights, _, _) = training_utils.handle_partial_sample_weights( - y, sample_weights, sample_weight_modes, check_all_flat=True) - - inputs = pack_x_y_sample_weight(x, y, sample_weights) - - num_samples = set(int(i.shape[0]) for i in tf.nest.flatten(inputs)).pop() - _check_data_cardinality(inputs) - - # If batch_size is not passed but steps is, calculate from the input data. - # Default to 32 for backwards compat. - if not batch_size: - batch_size = int(math.ceil(num_samples / steps)) if steps else 32 - - self._size = int(math.ceil(num_samples / batch_size)) - self._batch_size = batch_size - - num_full_batches = int(num_samples // batch_size) - self._partial_batch_size = num_samples % batch_size - - if isinstance(shuffle, str): - shuffle = shuffle.lower() - - self._shuffle = shuffle - # Vectorized version of shuffle. - # This is a performance improvement over using `from_tensor_slices`. - # The indices of the data are shuffled and batched, and these indices - # are then zipped with the data and used to extract a batch of the data - # at each step. The performance improvements here come from: - # 1. vectorized batch using gather - # 2. parallelized map - # 3. pipelined permutation generation - # 4. optimized permutation batching - # 5. disabled static optimizations - - indices_dataset = tf.data.Dataset.range(1) - if shuffle != "batch": - indices_dataset = indices_dataset.repeat(epochs) - - def permutation(_): - # It turns out to be more performant to make a new set of indices rather - # than reusing the same range Tensor. (presumably because of buffer - # forwarding.) - indices = tf.range(num_samples, dtype=tf.int64) - if shuffle and shuffle != "batch": - indices = tf.random.shuffle(indices) - return indices - - # We prefetch a single element. Computing large permutations can take quite - # a while so we don't want to wait for prefetching over an epoch boundary to - # trigger the next permutation. On the other hand, too many simultaneous - # shuffles can contend on a hardware level and degrade all performance. - indices_dataset = indices_dataset.map(permutation).prefetch(1) - - def slice_batch_indices(indices): - """Convert a Tensor of indices into a dataset of batched indices. - - This step can be accomplished in several ways. The most natural is to - slice the Tensor in a Dataset map. (With a condition on the upper index to - handle the partial batch.) However it turns out that coercing the Tensor - into a shape which is divisible by the batch size (and handling the last - partial batch separately) allows for a much more favorable memory access - pattern and improved performance. - - Args: - indices: Tensor which determines the data order for an entire epoch. - - Returns: - A Dataset of batched indices. - """ - num_in_full_batch = num_full_batches * batch_size - first_k_indices = tf.slice(indices, [0], [num_in_full_batch]) - first_k_indices = tf.reshape( - first_k_indices, [num_full_batches, batch_size]) - - flat_dataset = tf.data.Dataset.from_tensor_slices(first_k_indices) - if self._partial_batch_size: - index_remainder = tf.data.Dataset.from_tensors(tf.slice( - indices, [num_in_full_batch], [self._partial_batch_size])) - flat_dataset = flat_dataset.concatenate(index_remainder) - - if shuffle == "batch": - # 1024 is a magic constant that has not been properly evaluated - flat_dataset = flat_dataset.shuffle(1024).repeat(epochs) - return flat_dataset - - indices_dataset = indices_dataset.flat_map(slice_batch_indices) - - dataset = self.slice_inputs(indices_dataset, inputs) - - if shuffle == "batch": - def shuffle_batch(*batch): - return tf.nest.map_structure(tf.random.shuffle, batch) - dataset = dataset.map(shuffle_batch) - - self._dataset = dataset - - def slice_inputs(self, indices_dataset, inputs): - """Slice inputs into a Dataset of batches. - - Given a Dataset of batch indices and the unsliced inputs, - this step slices the inputs in a parallelized fashion - and produces a dataset of input batches. - - Args: - indices_dataset: A Dataset of batched indices - inputs: A python data structure that contains the inputs, targets, - and possibly sample weights. - - Returns: - A Dataset of input batches matching the batch indices. - """ - dataset = tf.data.Dataset.zip(( - indices_dataset, - tf.data.Dataset.from_tensors(inputs).repeat() - )) + """Adapter that handles Tensor-like objects, e.g. EagerTensor and NumPy.""" - def grab_batch(i, data): - return tf.nest.map_structure(lambda d: tf.gather(d, i, axis=0), data) + @staticmethod + def can_handle(x, y=None): + # TODO(kaftan): Check performance implications of using a flatten + # here for other types of inputs. + flat_inputs = tf.nest.flatten(x) + if y is not None: + flat_inputs += tf.nest.flatten(y) - dataset = dataset.map( - grab_batch, num_parallel_calls=tf.data.AUTOTUNE) + tensor_types = _get_tensor_types() - # Default optimizations are disabled to avoid the overhead of (unnecessary) - # input pipeline graph serialization and deserialization - options = tf.data.Options() - options.experimental_optimization.apply_default_optimizations = False - if self._shuffle: - # See b/141490660 for more details. - options.experimental_external_state_policy = ( - tf.data.experimental.ExternalStatePolicy.IGNORE) - dataset = dataset.with_options(options) - return dataset + def _is_tensor(v): + if isinstance(v, tensor_types): + return True + return False - def get_dataset(self): - return self._dataset + return all(_is_tensor(v) for v in flat_inputs) - def get_size(self): - return self._size - - def batch_size(self): - return self._batch_size - - def has_partial_batch(self): - return self._partial_batch_size > 0 + def __init__( + self, + x, + y=None, + sample_weights=None, + sample_weight_modes=None, + batch_size=None, + epochs=1, + steps=None, + shuffle=False, + **kwargs, + ): + super().__init__(x, y, **kwargs) + x, y, sample_weights = _process_tensorlike((x, y, sample_weights)) + sample_weight_modes = broadcast_sample_weight_modes( + sample_weights, sample_weight_modes + ) + + # If sample_weights are not specified for an output use 1.0 as weights. + (sample_weights, _, _) = training_utils.handle_partial_sample_weights( + y, sample_weights, sample_weight_modes, check_all_flat=True + ) + + inputs = pack_x_y_sample_weight(x, y, sample_weights) + + num_samples = set( + int(i.shape[0]) for i in tf.nest.flatten(inputs) + ).pop() + _check_data_cardinality(inputs) + + # If batch_size is not passed but steps is, calculate from the input + # data. Defaults to `32` for backwards compatibility. + if not batch_size: + batch_size = int(math.ceil(num_samples / steps)) if steps else 32 + + self._size = int(math.ceil(num_samples / batch_size)) + self._batch_size = batch_size + + num_full_batches = int(num_samples // batch_size) + self._partial_batch_size = num_samples % batch_size + + if isinstance(shuffle, str): + shuffle = shuffle.lower() + + self._shuffle = shuffle + # Vectorized version of shuffle. + # This is a performance improvement over using `from_tensor_slices`. + # The indices of the data are shuffled and batched, and these indices + # are then zipped with the data and used to extract a batch of the data + # at each step. The performance improvements here come from: + # 1. vectorized batch using gather + # 2. parallelized map + # 3. pipelined permutation generation + # 4. optimized permutation batching + # 5. disabled static optimizations + + indices_dataset = tf.data.Dataset.range(1) + if shuffle != "batch": + indices_dataset = indices_dataset.repeat(epochs) + + def permutation(_): + # It turns out to be more performant to make a new set of indices + # rather than reusing the same range Tensor. (presumably because of + # buffer forwarding.) + indices = tf.range(num_samples, dtype=tf.int64) + if shuffle and shuffle != "batch": + indices = tf.random.shuffle(indices) + return indices + + # We prefetch a single element. Computing large permutations can take + # quite a while so we don't want to wait for prefetching over an epoch + # boundary to trigger the next permutation. On the other hand, too many + # simultaneous shuffles can contend on a hardware level and degrade all + # performance. + indices_dataset = indices_dataset.map(permutation).prefetch(1) + + def slice_batch_indices(indices): + """Convert a Tensor of indices into a dataset of batched indices. + + This step can be accomplished in several ways. The most natural is + to slice the Tensor in a Dataset map. (With a condition on the upper + index to handle the partial batch.) However it turns out that + coercing the Tensor into a shape which is divisible by the batch + size (and handling the last partial batch separately) allows for a + much more favorable memory access pattern and improved performance. + + Args: + indices: Tensor which determines the data order for an entire + epoch. + + Returns: + A Dataset of batched indices. + """ + num_in_full_batch = num_full_batches * batch_size + first_k_indices = tf.slice(indices, [0], [num_in_full_batch]) + first_k_indices = tf.reshape( + first_k_indices, [num_full_batches, batch_size] + ) + + flat_dataset = tf.data.Dataset.from_tensor_slices(first_k_indices) + if self._partial_batch_size: + index_remainder = tf.data.Dataset.from_tensors( + tf.slice( + indices, [num_in_full_batch], [self._partial_batch_size] + ) + ) + flat_dataset = flat_dataset.concatenate(index_remainder) + + if shuffle == "batch": + # 1024 is a magic constant that has not been properly evaluated + flat_dataset = flat_dataset.shuffle(1024).repeat(epochs) + return flat_dataset + + indices_dataset = indices_dataset.flat_map(slice_batch_indices) + + dataset = self.slice_inputs(indices_dataset, inputs) + + if shuffle == "batch": + + def shuffle_batch(*batch): + return tf.nest.map_structure(tf.random.shuffle, batch) + + dataset = dataset.map(shuffle_batch) + + options = tf.data.Options() + options.experimental_distribute.auto_shard_policy = ( + tf.data.experimental.AutoShardPolicy.DATA + ) + dataset = dataset.with_options(options) + + self._dataset = dataset.prefetch(tf.data.AUTOTUNE) + + def slice_inputs(self, indices_dataset, inputs): + """Slice inputs into a Dataset of batches. + + Given a Dataset of batch indices and the unsliced inputs, + this step slices the inputs in a parallelized fashion + and produces a dataset of input batches. + + Args: + indices_dataset: A Dataset of batched indices + inputs: A python data structure that contains the inputs, targets, + and possibly sample weights. + + Returns: + A Dataset of input batches matching the batch indices. + """ + dataset = tf.data.Dataset.zip( + (indices_dataset, tf.data.Dataset.from_tensors(inputs).repeat()) + ) + + def grab_batch(i, data): + return tf.nest.map_structure( + lambda d: tf.gather(d, i, axis=0), data + ) + + dataset = dataset.map(grab_batch, num_parallel_calls=tf.data.AUTOTUNE) + + # Default optimizations are disabled to avoid the overhead of + # (unnecessary) input pipeline graph serialization and deserialization + options = tf.data.Options() + options.experimental_optimization.apply_default_optimizations = False + if self._shuffle: + # See b/141490660 for more details. + options.experimental_external_state_policy = ( + tf.data.experimental.ExternalStatePolicy.IGNORE + ) + dataset = dataset.with_options(options) + return dataset + + def get_dataset(self): + return self._dataset + + def get_size(self): + return self._size - def partial_batch_size(self): - return self._partial_batch_size or None + def batch_size(self): + return self._batch_size + + def has_partial_batch(self): + return self._partial_batch_size > 0 + + def partial_batch_size(self): + return self._partial_batch_size or None - def should_recreate_iterator(self): - # An infinite dataset is always created here. - return False + def should_recreate_iterator(self): + # An infinite dataset is always created here. + return False class GenericArrayLikeDataAdapter(TensorLikeDataAdapter): - """Adapter that handles array-like data without forcing it into memory. - - This adapter handles array-like datasets that may be too big to fully - fit into memory. - - Specifically, this adapter handles any Python class which implements: - `__get_item__`, `__len__`, `shape`, and `dtype` with the same meanings - as Numpy, but it ignores any case where all the inputs are Tensors or Numpy - arrays (because that case is handled by the base TensorLikeDataAdapter). - - It ignores scipy sparse matrices and Composite Tensors because those are - handled by the CompositeTensorDataAdapter. - - It also does not handle lists/tuples of scalars, because those are handled - by the ListsOfScalarsDataAdapter. - """ - - @staticmethod - def can_handle(x, y=None): - flat_inputs = tf.nest.flatten(x) - if y is not None: - flat_inputs += tf.nest.flatten(y) - - def _is_array_like(v): - """Return True if v is a Tensor, array, or is array-like.""" - return ( - hasattr(v, "__getitem__") and - hasattr(v, "shape") and - hasattr(v, "dtype") and - hasattr(v, "__len__") - ) - - if (not TensorLikeDataAdapter.can_handle(x, y) and - not CompositeTensorDataAdapter.can_handle(x, y)): - return all(_is_array_like(v) for v in flat_inputs) - else: - return False - - def __init__(self, *args, **kwargs): - logging.warning( - "Keras is training/fitting/evaluating on array-like data. Keras may " - "not be optimized for this format, so if your input data format is " - "supported by TensorFlow I/O (https://github.com/tensorflow/io) we " - "recommend using that to load a Dataset instead.") - - super().__init__(*args, **kwargs) + """Adapter that handles array-like data without forcing it into memory. - def slice_inputs(self, indices_dataset, inputs): - """Slice inputs into a Dataset of batches. + This adapter handles array-like datasets that may be too big to fully + fit into memory. - Given a Dataset of batch indices and the unsliced inputs, - this step slices the inputs in a parallelized fashion - and produces a dataset of input batches. + Specifically, this adapter handles any Python class which implements: + `__get_item__`, `__len__`, `shape`, and `dtype` with the same meanings + as Numpy, but it ignores any case where all the inputs are Tensors or Numpy + arrays (because that case is handled by the base TensorLikeDataAdapter). - Args: - indices_dataset: A Dataset of batched indices - inputs: A python data structure that contains the inputs, targets, - and possibly sample weights. + It ignores scipy sparse matrices and Composite Tensors because those are + handled by the CompositeTensorDataAdapter. - Returns: - A Dataset of input batches matching the batch indices. + It also does not handle lists/tuples of scalars, because those are handled + by the ListsOfScalarsDataAdapter. """ - flat_inputs = tf.nest.flatten(inputs) - def dynamic_shape_like(t): - shape = list(t.shape) - shape[0] = None - return tuple(shape) - - flat_dtypes = [inp.dtype for inp in flat_inputs] - contiguous = True - if self._shuffle and self._shuffle != "batch": - contiguous = False - - def grab_batch(indices): - """Grab a batch of data from the inputs.""" - # This uses a py_function to avoid converting the array-like - # into a Tensor before slicing it, because converting the array-like - # to a Tensor may force it into memory.. - def py_method(ind): - def slice_array(data): - return training_utils.slice_arrays(data, ind.numpy(), - contiguous=contiguous) - return [slice_array(inp) for inp in flat_inputs] - - flat_out = tf.py_function(py_method, [indices], flat_dtypes) - for v, original_inp in zip(flat_out, flat_inputs): - v.set_shape(dynamic_shape_like(original_inp)) - return tf.nest.pack_sequence_as(inputs, flat_out) - - dataset = indices_dataset.map( - grab_batch, num_parallel_calls=tf.data.AUTOTUNE) - - return dataset + + @staticmethod + def can_handle(x, y=None): + flat_inputs = tf.nest.flatten(x) + if y is not None: + flat_inputs += tf.nest.flatten(y) + + def _is_array_like(v): + """Return True if v is a Tensor, array, or is array-like.""" + return ( + hasattr(v, "__getitem__") + and hasattr(v, "shape") + and hasattr(v, "dtype") + and hasattr(v, "__len__") + ) + + if not TensorLikeDataAdapter.can_handle( + x, y + ) and not CompositeTensorDataAdapter.can_handle(x, y): + return all(_is_array_like(v) for v in flat_inputs) + else: + return False + + def __init__(self, *args, **kwargs): + logging.warning( + "Keras is training/fitting/evaluating on array-like data. Keras " + "may not be optimized for this format, so if your input data " + "format is supported by TensorFlow I/O " + "(https://github.com/tensorflow/io) we recommend using that to " + "load a Dataset instead." + ) + + super().__init__(*args, **kwargs) + + def slice_inputs(self, indices_dataset, inputs): + """Slice inputs into a Dataset of batches. + + Given a Dataset of batch indices and the unsliced inputs, + this step slices the inputs in a parallelized fashion + and produces a dataset of input batches. + + Args: + indices_dataset: A Dataset of batched indices + inputs: A python data structure that contains the inputs, targets, + and possibly sample weights. + + Returns: + A Dataset of input batches matching the batch indices. + """ + flat_inputs = tf.nest.flatten(inputs) + + def dynamic_shape_like(t): + shape = list(t.shape) + shape[0] = None + return tuple(shape) + + flat_dtypes = [inp.dtype for inp in flat_inputs] + contiguous = True + if self._shuffle and self._shuffle != "batch": + contiguous = False + + def grab_batch(indices): + """Grab a batch of data from the inputs.""" + # This uses a py_function to avoid converting the array-like + # into a Tensor before slicing it, because converting the array-like + # to a Tensor may force it into memory.. + def py_method(ind): + def slice_array(data): + return training_utils.slice_arrays( + data, ind.numpy(), contiguous=contiguous + ) + + return [slice_array(inp) for inp in flat_inputs] + + flat_out = tf.py_function(py_method, [indices], flat_dtypes) + for v, original_inp in zip(flat_out, flat_inputs): + v.set_shape(dynamic_shape_like(original_inp)) + return tf.nest.pack_sequence_as(inputs, flat_out) + + dataset = indices_dataset.map( + grab_batch, num_parallel_calls=tf.data.AUTOTUNE + ) + + return dataset class DatasetCreatorAdapter(DataAdapter): - """Adapter that handles dataset functions.""" - - def __init__(self, x, y, steps=None, distribution_strategy=None, **kwargs): - super().__init__(x, **kwargs) - - if not isinstance(x, dataset_creator.DatasetCreator): - raise TypeError("The input of a `DatasetCreatorAdapter` should be a " - "`DatasetCreator` but it received type {}.".format( - type(x))) - if steps is None: - raise ValueError("When using a " - "`tf.keras.utils.experimental.DatasetCreator`, " - "`steps_per_epoch`, `validation_steps` or `steps` " - "argument must be provided in `Model.fit`, " - "`Model.evaluate`, or `Model.predict`.") - self.dataset_creator = x - self.steps = steps - self.strategy = distribution_strategy - - @staticmethod - def can_handle(x, y=None): - if isinstance(x, dataset_creator.DatasetCreator): - assert y is None - return True - - def should_recreate_iterator(self): - # We expect users to shuffle the dataset in their `dataset_fn` supplied to - # `DatasetCreator`. Since that is a buffered shuffle, we intend to not reset - # the dataset so the batches that are not shuffled can still be pulled. - return False - - def get_size(self): - return None # To be inferred by `DataHandler`. - - def get_dataset(self): - return self.strategy.distribute_datasets_from_function( - self.dataset_creator, options=self.dataset_creator.input_options) - - def batch_size(self): - raise NotImplementedError() - - def has_partial_batch(self): - raise NotImplementedError() - - def partial_batch_size(self): - raise NotImplementedError() + """Adapter that handles dataset functions.""" + + def __init__(self, x, y, steps=None, distribution_strategy=None, **kwargs): + super().__init__(x, **kwargs) + + if not isinstance(x, dataset_creator.DatasetCreator): + raise TypeError( + "The input of a `DatasetCreatorAdapter` should be a " + "`DatasetCreator` but it received type {}.".format(type(x)) + ) + if steps is None: + if not kwargs.get("pss_evaluation_shards"): + raise ValueError( + "When using a " + "`tf.keras.utils.experimental.DatasetCreator`, " + "`steps_per_epoch`, `validation_steps`, `steps`, or " + "`pss_evaluation_shards` argument must be provided in " + "`Model.fit`, `Model.evaluate`, or `Model.predict`." + ) + self.dataset_creator = x + self.steps = steps + self.strategy = distribution_strategy + + @staticmethod + def can_handle(x, y=None): + if isinstance(x, dataset_creator.DatasetCreator): + assert y is None + return True + + def should_recreate_iterator(self): + # We expect users to shuffle the dataset in their `dataset_fn` supplied + # to `DatasetCreator`. Since that is a buffered shuffle, we intend to + # not reset the dataset so the batches that are not shuffled can still + # be pulled. + return False + + def get_size(self): + return None # To be inferred by `DataHandler`. + + def get_dataset(self): + return self.strategy.distribute_datasets_from_function( + self.dataset_creator, options=self.dataset_creator.input_options + ) + + def batch_size(self): + raise NotImplementedError() + + def has_partial_batch(self): + raise NotImplementedError() + + def partial_batch_size(self): + raise NotImplementedError() class CompositeTensorDataAdapter(DataAdapter): - """Adapter that handles composite tensor.""" - - @staticmethod - def can_handle(x, y=None): - flat_inputs = tf.nest.flatten(x) - if y is not None: - flat_inputs += tf.nest.flatten(y) - - def _is_composite(v): - # Dataset/iterator/DistributedDataset inherits from CompositeTensor but - # should be handled by DatasetAdapter and GeneratorAdapter. - if (tf_utils.is_extension_type(v) and - not isinstance(v, - (tf.data.Dataset, tf.data.Iterator)) and - not _is_distributed_dataset(v)): - return True - # Support Scipy sparse tensors if scipy is installed - return _is_scipy_sparse(v) - - def _is_tensor_or_composite(v): - if isinstance(v, (tf.Tensor, np.ndarray)): + """Adapter that handles composite tensor.""" + + @staticmethod + def can_handle(x, y=None): + flat_inputs = tf.nest.flatten(x) + if y is not None: + flat_inputs += tf.nest.flatten(y) + + def _is_composite(v): + # Dataset/iterator/DistributedDataset inherits from CompositeTensor + # but should be handled by DatasetAdapter and GeneratorAdapter. + if ( + tf_utils.is_extension_type(v) + and not isinstance(v, (tf.data.Dataset, tf.data.Iterator)) + and not _is_distributed_dataset(v) + ): + return True + # Support Scipy sparse tensors if scipy is installed + return _is_scipy_sparse(v) + + def _is_tensor_or_composite(v): + if isinstance(v, (tf.Tensor, np.ndarray)): + return True + return _is_composite(v) + + return any(_is_composite(v) for v in flat_inputs) and all( + _is_tensor_or_composite(v) for v in flat_inputs + ) + + def __init__( + self, + x, + y=None, + sample_weights=None, + sample_weight_modes=None, + batch_size=None, + steps=None, + shuffle=False, + **kwargs, + ): + super().__init__(x, y, **kwargs) + x, y, sample_weights = _process_tensorlike((x, y, sample_weights)) + sample_weight_modes = broadcast_sample_weight_modes( + sample_weights, sample_weight_modes + ) + + # If sample_weights are not specified for an output use 1.0 as weights. + (sample_weights, _, _) = training_utils.handle_partial_sample_weights( + y, sample_weights, sample_weight_modes, check_all_flat=True + ) + + inputs = pack_x_y_sample_weight(x, y, sample_weights) + + dataset = tf.data.Dataset.from_tensor_slices(inputs) + num_samples = int(tf.nest.flatten(x)[0].shape[0]) + if shuffle: + dataset = dataset.shuffle(num_samples) + + # If batch_size is not passed but steps is, calculate from the input + # data. Defaults to `32` for backwards compatibility. + if not batch_size: + batch_size = int(math.ceil(num_samples / steps)) if steps else 32 + + dataset = dataset.batch(batch_size) + self._size = int(math.ceil(num_samples / batch_size)) + self._batch_size = batch_size + self._has_partial_batch = self._size != (num_samples // batch_size) + + self._partial_batch_size = None + if self._has_partial_batch: + self._partial_batch_size = ( + num_samples - (self._size - 1) * self._batch_size + ) + + self._dataset = dataset.prefetch(tf.data.AUTOTUNE) + + def get_dataset(self): + return self._dataset + + def get_size(self): + return self._size + + def batch_size(self): + return self._batch_size + + def has_partial_batch(self): + return self._has_partial_batch + + def partial_batch_size(self): + return self._partial_batch_size + + def should_recreate_iterator(self): return True - return _is_composite(v) - - return (any(_is_composite(v) for v in flat_inputs) and - all(_is_tensor_or_composite(v) for v in flat_inputs)) - - def __init__(self, - x, - y=None, - sample_weights=None, - sample_weight_modes=None, - batch_size=None, - steps=None, - shuffle=False, - **kwargs): - super().__init__(x, y, **kwargs) - x, y, sample_weights = _process_tensorlike((x, y, sample_weights)) - sample_weight_modes = broadcast_sample_weight_modes( - sample_weights, sample_weight_modes) - - # If sample_weights are not specified for an output use 1.0 as weights. - (sample_weights, _, _) = training_utils.handle_partial_sample_weights( - y, sample_weights, sample_weight_modes, check_all_flat=True) - - inputs = pack_x_y_sample_weight(x, y, sample_weights) - - dataset = tf.data.Dataset.from_tensor_slices(inputs) - num_samples = int(tf.nest.flatten(x)[0].shape[0]) - if shuffle: - dataset = dataset.shuffle(num_samples) - - # If batch_size is not passed but steps is, calculate from the input data. - # Default to 32 for backwards compatibility. - if not batch_size: - batch_size = int(math.ceil(num_samples / steps)) if steps else 32 - - dataset = dataset.batch(batch_size) - self._size = int(math.ceil(num_samples / batch_size)) - self._batch_size = batch_size - self._has_partial_batch = (self._size != (num_samples // batch_size)) - - self._partial_batch_size = None - if self._has_partial_batch: - self._partial_batch_size = ( - num_samples - (self._size - 1) * self._batch_size) - - self._dataset = dataset - - def get_dataset(self): - return self._dataset - - def get_size(self): - return self._size - - def batch_size(self): - return self._batch_size - - def has_partial_batch(self): - return self._has_partial_batch - - def partial_batch_size(self): - return self._partial_batch_size - - def should_recreate_iterator(self): - return True class ListsOfScalarsDataAdapter(DataAdapter): - """Adapter that handles lists of scalars and lists of lists of scalars.""" - - @staticmethod - def can_handle(x, y=None): - handles_x = ListsOfScalarsDataAdapter._is_list_of_scalars(x) - handles_y = True - if y is not None: - handles_y = ListsOfScalarsDataAdapter._is_list_of_scalars(y) - return handles_x and handles_y - - @staticmethod - def _is_list_of_scalars(inp): - if isinstance(inp, (float, int, str, bytes, bytearray)): - return True - if isinstance(inp, (list, tuple)) and inp: - return ListsOfScalarsDataAdapter._is_list_of_scalars(inp[0]) - return False - - def __init__(self, - x, - y=None, - sample_weights=None, - sample_weight_modes=None, - batch_size=None, - shuffle=False, - **kwargs): - super().__init__(x, y, **kwargs) - x = np.asarray(x) - if y is not None: - y = np.asarray(y) - if sample_weights is not None: - sample_weights = np.asarray(sample_weights) - sample_weight_modes = broadcast_sample_weight_modes( - sample_weights, sample_weight_modes) - - self._internal_adapter = TensorLikeDataAdapter( + """Adapter that handles lists of scalars and lists of lists of scalars.""" + + @staticmethod + def can_handle(x, y=None): + handles_x = ListsOfScalarsDataAdapter._is_list_of_scalars(x) + handles_y = True + if y is not None: + handles_y = ListsOfScalarsDataAdapter._is_list_of_scalars(y) + return handles_x and handles_y + + @staticmethod + def _is_list_of_scalars(inp): + if isinstance(inp, (float, int, str, bytes, bytearray)): + return True + if isinstance(inp, (list, tuple)) and inp: + return ListsOfScalarsDataAdapter._is_list_of_scalars(inp[0]) + return False + + def __init__( + self, x, - y=y, - sample_weights=sample_weights, - sample_weight_modes=sample_weight_modes, - batch_size=batch_size, - shuffle=shuffle, - **kwargs) - - def get_dataset(self): - return self._internal_adapter.get_dataset() - - def get_size(self): - return self._internal_adapter.get_size() - - def batch_size(self): - return self._internal_adapter.batch_size() - - def has_partial_batch(self): - return self._internal_adapter.has_partial_batch() - - def partial_batch_size(self): - return self._internal_adapter.partial_batch_size() - - def should_recreate_iterator(self): - return True + y=None, + sample_weights=None, + sample_weight_modes=None, + batch_size=None, + shuffle=False, + **kwargs, + ): + super().__init__(x, y, **kwargs) + x = np.asarray(x) + if y is not None: + y = np.asarray(y) + if sample_weights is not None: + sample_weights = np.asarray(sample_weights) + sample_weight_modes = broadcast_sample_weight_modes( + sample_weights, sample_weight_modes + ) + + self._internal_adapter = TensorLikeDataAdapter( + x, + y=y, + sample_weights=sample_weights, + sample_weight_modes=sample_weight_modes, + batch_size=batch_size, + shuffle=shuffle, + **kwargs, + ) + + def get_dataset(self): + return self._internal_adapter.get_dataset() + + def get_size(self): + return self._internal_adapter.get_size() + + def batch_size(self): + return self._internal_adapter.batch_size() + + def has_partial_batch(self): + return self._internal_adapter.has_partial_batch() + + def partial_batch_size(self): + return self._internal_adapter.partial_batch_size() + + def should_recreate_iterator(self): + return True class DatasetAdapter(DataAdapter): - """Adapter that handles `tf.data.Dataset`.""" - - @staticmethod - def can_handle(x, y=None): - return (isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)) or - _is_distributed_dataset(x)) - - def __init__(self, - x, - y=None, - sample_weights=None, - steps=None, - **kwargs): - super().__init__(x, y, **kwargs) - # Note that the dataset instance is immutable, its fine to reuse the user - # provided dataset. - self._dataset = x - - # The user-provided steps. - self._user_steps = steps - - self._validate_args(y, sample_weights, steps) - - def get_dataset(self): - return self._dataset - - def get_size(self): - return # Inferred in `DataHandler`. - - def batch_size(self): - return None - - def has_partial_batch(self): - return False - - def partial_batch_size(self): - return None - - def should_recreate_iterator(self): - # Since DistributedDatasets have no cardinality, the user must provide - # all steps that need to be run, calling `.repeat()` as needed. - if _is_distributed_dataset(self._dataset): - return False - - # If user doesn't supply `steps`, or if they supply `steps` that - # exactly equals the size of the `Dataset`, create a new iterator - # each epoch. - return (self._user_steps is None or - tf.data.experimental.cardinality(self._dataset).numpy() == self._user_steps) - - def _validate_args(self, y, sample_weights, steps): - """Validates `__init__` arguments.""" - # Arguments that shouldn't be passed. - if not is_none_or_empty(y): - raise ValueError("`y` argument is not supported when using " - "dataset as input.") - if not is_none_or_empty(sample_weights): - raise ValueError("`sample_weight` argument is not supported when using " - "dataset as input.") - - if steps is None: - if _is_distributed_dataset(self._dataset): - raise ValueError("When providing a distributed dataset, you must " - "specify the number of steps to run.") - - size = tf.data.experimental.cardinality(self._dataset).numpy() - if size == tf.data.experimental.INFINITE_CARDINALITY and steps is None: - raise ValueError( - "When providing an infinite dataset, you must specify " - "the number of steps to run (if you did not intend to " - "create an infinite dataset, make sure to not call " - "`repeat()` on the dataset).") + """Adapter that handles `tf.data.Dataset`.""" + + @staticmethod + def can_handle(x, y=None): + return isinstance( + x, (tf.compat.v1.data.Dataset, tf.data.Dataset) + ) or _is_distributed_dataset(x) + + def __init__(self, x, y=None, sample_weights=None, steps=None, **kwargs): + super().__init__(x, y, **kwargs) + # Note that the dataset instance is immutable, its fine to reuse the + # user provided dataset. + self._dataset = x + + # The user-provided steps. + self._user_steps = steps + + self._validate_args( + y, sample_weights, steps, kwargs.get("pss_evaluation_shards") + ) + + def get_dataset(self): + return self._dataset + + def get_size(self): + return # Inferred in `DataHandler`. + + def batch_size(self): + return None + + def has_partial_batch(self): + return False + + def partial_batch_size(self): + return None + + def should_recreate_iterator(self): + # Since DistributedDatasets have no cardinality, the user must provide + # all steps that need to be run, calling `.repeat()` as needed. + if _is_distributed_dataset(self._dataset): + return False + + # If user doesn't supply `steps`, or if they supply `steps` that + # exactly equals the size of the `Dataset`, create a new iterator + # each epoch. + return ( + self._user_steps is None + or tf.data.experimental.cardinality(self._dataset).numpy() + == self._user_steps + ) + + def _validate_args(self, y, sample_weights, steps, pss_evaluation_shards): + """Validates `__init__` arguments.""" + # Arguments that shouldn't be passed. + if not is_none_or_empty(y): + raise ValueError( + "`y` argument is not supported when using dataset as input." + ) + if not is_none_or_empty(sample_weights): + raise ValueError( + "`sample_weight` argument is not supported when using " + "dataset as input." + ) + + if steps is None: + if _is_distributed_dataset(self._dataset): + if not pss_evaluation_shards: + raise ValueError( + "When providing a distributed dataset, you must " + "specify the number of steps to run." + ) + else: + size = tf.data.experimental.cardinality(self._dataset).numpy() + if size == tf.data.experimental.INFINITE_CARDINALITY: + if pss_evaluation_shards: + raise ValueError( + "When performing exact evaluation, the dataset " + "must be finite. Make sure not to call `repeat()` " + "on your dataset." + ) + else: + raise ValueError( + "When providing an infinite dataset, you must " + "specify the number of steps to run (if you did " + "not intend to create an infinite dataset, make " + "sure to not call `repeat()` on the dataset)." + ) class GeneratorDataAdapter(DataAdapter): - """Adapter that handles python generators and iterators.""" + """Adapter that handles python generators and iterators.""" - @staticmethod - def can_handle(x, y=None): - return ((hasattr(x, "__next__") or hasattr(x, "next")) + @staticmethod + def can_handle(x, y=None): + return ( + (hasattr(x, "__next__") or hasattr(x, "next")) and hasattr(x, "__iter__") - and not isinstance(x, data_utils.Sequence)) - - def __init__(self, - x, - y=None, - sample_weights=None, - workers=1, - use_multiprocessing=False, - max_queue_size=10, - model=None, - **kwargs): - # Generators should never shuffle as exhausting the generator in order to - # shuffle the batches is inefficient. - kwargs.pop("shuffle", None) - - if not is_none_or_empty(y): - raise ValueError("`y` argument is not supported when using " - "python generator as input.") - if not is_none_or_empty(sample_weights): - raise ValueError("`sample_weight` argument is not supported when using " - "python generator as input.") - - super().__init__(x, y, **kwargs) - - # Since we have to know the dtype of the python generator when we build the - # dataset, we have to look at a batch to infer the structure. - peek, x = self._peek_and_restore(x) - peek = self._standardize_batch(peek) - peek = _process_tensorlike(peek) - - # Need to build the Model on concrete input shapes. - if model is not None and not model.built: - concrete_x, _, _ = unpack_x_y_sample_weight(peek) - try: - model.distribute_strategy.run( - lambda x: model(x, training=False), args=(concrete_x,)) - except NotImplementedError: - # The above call may fail if the model is a container-like class that - # does not implement its own forward pass (e.g. a GAN or VAE where the - # forward pass is handled by subcomponents). - # Such a model does not need to be built. - pass - - self._first_batch_size = int(tf.nest.flatten(peek)[0].shape[0]) - - def _get_tensor_spec(t): - # TODO(b/226395276): Remove _with_tensor_ranks_only usage. - return type_spec.type_spec_from_value(t)._with_tensor_ranks_only() # pylint: disable=protected-access - - output_signature = tf.nest.map_structure(_get_tensor_spec, peek) - - # Note that dataset API takes a callable that creates a generator object, - # rather than generator itself, which is why we define a function here. - generator_fn = self._handle_multiprocessing(x, workers, use_multiprocessing, - max_queue_size) - - def wrapped_generator(): - for data in generator_fn(): - yield self._standardize_batch(data) - - dataset = tf.data.Dataset.from_generator( - wrapped_generator, output_signature=output_signature) - - if workers == 1 and not use_multiprocessing: - dataset = dataset.prefetch(1) - - self._dataset = dataset - - def _standardize_batch(self, data): - """Standardizes a batch output by a generator.""" - # Removes `None`s. - x, y, sample_weight = unpack_x_y_sample_weight(data) - data = pack_x_y_sample_weight(x, y, sample_weight) - - data = tf.__internal__.nest.list_to_tuple(data) + and not isinstance(x, data_utils.Sequence) + ) - def _convert_dtype(t): - if (isinstance(t, np.ndarray) and issubclass(t.dtype.type, np.floating)): - return np.array(t, dtype=backend.floatx()) - return t - - data = tf.nest.map_structure(_convert_dtype, data) - return data - - @staticmethod - def _peek_and_restore(x): - peek = next(x) - return peek, itertools.chain([peek], x) - - def _handle_multiprocessing(self, x, workers, use_multiprocessing, - max_queue_size): - """Create a callable, possibly including an Enqueuer.""" - if workers > 1 or (workers > 0 and use_multiprocessing): - def generator_fn(): - enqueuer = data_utils.GeneratorEnqueuer( - x, use_multiprocessing=use_multiprocessing) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - return enqueuer.get() - else: - generator_fn = lambda: x - return generator_fn - - def get_dataset(self): - return self._dataset - - def get_size(self): - return None - - def batch_size(self): - return None - - def representative_batch_size(self): - return self._first_batch_size + def __init__( + self, + x, + y=None, + sample_weights=None, + workers=1, + use_multiprocessing=False, + max_queue_size=10, + model=None, + **kwargs, + ): + # Generators should never shuffle as exhausting the generator in order + # to shuffle the batches is inefficient. + kwargs.pop("shuffle", None) + + if not is_none_or_empty(y): + raise ValueError( + "`y` argument is not supported when using " + "python generator as input." + ) + if not is_none_or_empty(sample_weights): + raise ValueError( + "`sample_weight` argument is not supported when using " + "python generator as input." + ) + + super().__init__(x, y, **kwargs) + + # Since we have to know the dtype of the python generator when we build + # the dataset, we have to look at a batch to infer the structure. + peek, x = self._peek_and_restore(x) + peek = self._standardize_batch(peek) + peek = _process_tensorlike(peek) + + # Need to build the Model on concrete input shapes. + if model is not None and not model.built: + concrete_x, _, _ = unpack_x_y_sample_weight(peek) + try: + model.distribute_strategy.run( + lambda x: model(x, training=False), args=(concrete_x,) + ) + except NotImplementedError: + # The above call may fail if the model is a container-like class + # that does not implement its own forward pass (e.g. a GAN or + # VAE where the forward pass is handled by subcomponents). Such + # a model does not need to be built. + pass + + self._first_batch_size = int(tf.nest.flatten(peek)[0].shape[0]) + + def _get_tensor_spec(t): + # TODO(b/226395276): Remove _with_tensor_ranks_only usage. + return type_spec.type_spec_from_value(t)._with_tensor_ranks_only() + + output_signature = tf.nest.map_structure(_get_tensor_spec, peek) + + # Note that dataset API takes a callable that creates a generator + # object, rather than generator itself, which is why we define a + # function here. + generator_fn = self._handle_multiprocessing( + x, workers, use_multiprocessing, max_queue_size + ) + + def wrapped_generator(): + for data in generator_fn(): + yield self._standardize_batch(data) + + dataset = tf.data.Dataset.from_generator( + wrapped_generator, output_signature=output_signature + ) + + if workers == 1 and not use_multiprocessing: + dataset = dataset.prefetch(1) + + self._dataset = dataset.prefetch(tf.data.AUTOTUNE) + + def _standardize_batch(self, data): + """Standardizes a batch output by a generator.""" + # Removes `None`s. + x, y, sample_weight = unpack_x_y_sample_weight(data) + data = pack_x_y_sample_weight(x, y, sample_weight) + + data = tf.__internal__.nest.list_to_tuple(data) + + def _convert_dtype(t): + if isinstance(t, np.ndarray) and issubclass( + t.dtype.type, np.floating + ): + return np.array(t, dtype=backend.floatx()) + return t + + data = tf.nest.map_structure(_convert_dtype, data) + return data + + @staticmethod + def _peek_and_restore(x): + peek = next(x) + return peek, itertools.chain([peek], x) + + def _handle_multiprocessing( + self, x, workers, use_multiprocessing, max_queue_size + ): + """Create a callable, possibly including an Enqueuer.""" + if workers > 1 or (workers > 0 and use_multiprocessing): + + def generator_fn(): + enqueuer = data_utils.GeneratorEnqueuer( + x, use_multiprocessing=use_multiprocessing + ) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + return enqueuer.get() + + else: + generator_fn = lambda: x + return generator_fn + + def get_dataset(self): + return self._dataset + + def get_size(self): + return None + + def batch_size(self): + return None - def has_partial_batch(self): - return False + def representative_batch_size(self): + return self._first_batch_size - def partial_batch_size(self): - return + def has_partial_batch(self): + return False + + def partial_batch_size(self): + return - def should_recreate_iterator(self): - return False + def should_recreate_iterator(self): + return False class KerasSequenceAdapter(GeneratorDataAdapter): - """Adapter that handles `keras.utils.Sequence`.""" - - @staticmethod - def can_handle(x, y=None): - return isinstance(x, data_utils.Sequence) - - def __init__(self, - x, - y=None, - sample_weights=None, - shuffle=False, - workers=1, - use_multiprocessing=False, - max_queue_size=10, - model=None, - **kwargs): - if not is_none_or_empty(y): - raise ValueError("`y` argument is not supported when using " - "`keras.utils.Sequence` as input.") - if not is_none_or_empty(sample_weights): - raise ValueError("`sample_weight` argument is not supported when using " - "`keras.utils.Sequence` as input.") - - self._shuffle_sequence = shuffle - self._keras_sequence = x - self._enqueuer = None - super().__init__( - x, - shuffle=False, # Shuffle is handed in the _make_callable override. - workers=workers, - use_multiprocessing=use_multiprocessing, - max_queue_size=max_queue_size, - model=model, - **kwargs) - - @staticmethod - def _peek_and_restore(x): - return x[0], x - - def _handle_multiprocessing(self, x, workers, use_multiprocessing, - max_queue_size): - if workers > 1 or (workers > 0 and use_multiprocessing): - def generator_fn(): - self._enqueuer = data_utils.OrderedEnqueuer( - x, use_multiprocessing=use_multiprocessing, - shuffle=self._shuffle_sequence) - self._enqueuer.start(workers=workers, max_queue_size=max_queue_size) - return self._enqueuer.get() - else: - def generator_fn(): - order = range(len(x)) - if self._shuffle_sequence: - # Match the shuffle convention in OrderedEnqueuer. - order = list(order) - random.shuffle(order) - - for i in order: - yield x[i] + """Adapter that handles `keras.utils.Sequence`.""" - return generator_fn + @staticmethod + def can_handle(x, y=None): + return isinstance(x, data_utils.Sequence) - def get_size(self): - return len(self._keras_sequence) - - def should_recreate_iterator(self): - return True + def __init__( + self, + x, + y=None, + sample_weights=None, + shuffle=False, + workers=1, + use_multiprocessing=False, + max_queue_size=10, + model=None, + **kwargs, + ): + if not is_none_or_empty(y): + raise ValueError( + "`y` argument is not supported when using " + "`keras.utils.Sequence` as input." + ) + if not is_none_or_empty(sample_weights): + raise ValueError( + "`sample_weight` argument is not supported when using " + "`keras.utils.Sequence` as input." + ) + + self._shuffle_sequence = shuffle + self._keras_sequence = x + self._enqueuer = None + super().__init__( + x, + shuffle=False, # Shuffle is handed in the _make_callable override. + workers=workers, + use_multiprocessing=use_multiprocessing, + max_queue_size=max_queue_size, + model=model, + **kwargs, + ) + + @staticmethod + def _peek_and_restore(x): + return x[0], x + + def _handle_multiprocessing( + self, x, workers, use_multiprocessing, max_queue_size + ): + if workers > 1 or (workers > 0 and use_multiprocessing): + + def generator_fn(): + self._enqueuer = data_utils.OrderedEnqueuer( + x, + use_multiprocessing=use_multiprocessing, + shuffle=self._shuffle_sequence, + ) + self._enqueuer.start( + workers=workers, max_queue_size=max_queue_size + ) + return self._enqueuer.get() + + else: + + def generator_fn(): + order = range(len(x)) + if self._shuffle_sequence: + # Match the shuffle convention in OrderedEnqueuer. + order = list(order) + random.shuffle(order) + + for i in order: + yield x[i] + + return generator_fn + + def get_size(self): + return len(self._keras_sequence) + + def should_recreate_iterator(self): + return True - def on_epoch_end(self): - if self._enqueuer: - self._enqueuer.stop() - self._keras_sequence.on_epoch_end() + def on_epoch_end(self): + if self._enqueuer: + self._enqueuer.stop() + self._keras_sequence.on_epoch_end() ALL_ADAPTER_CLS = [ - ListsOfScalarsDataAdapter, TensorLikeDataAdapter, - GenericArrayLikeDataAdapter, DatasetAdapter, GeneratorDataAdapter, - KerasSequenceAdapter, CompositeTensorDataAdapter, DatasetCreatorAdapter + ListsOfScalarsDataAdapter, + TensorLikeDataAdapter, + GenericArrayLikeDataAdapter, + DatasetAdapter, + GeneratorDataAdapter, + KerasSequenceAdapter, + CompositeTensorDataAdapter, + DatasetCreatorAdapter, +] + +UNSHARDABLE_DATASET_TYPES = [ + from_generator_op._GeneratorDataset, + range_op._RangeDataset, + from_sparse_tensor_slices_op._SparseTensorSliceDataset, + from_tensors_op._TensorDataset, + from_tensor_slices_op._TensorSliceDataset, ] def select_data_adapter(x, y): - """Selects a data adapter that can handle a given x and y.""" - adapter_cls = [cls for cls in ALL_ADAPTER_CLS if cls.can_handle(x, y)] - if not adapter_cls: - # TODO(scottzhu): This should be a less implementation-specific error. - raise ValueError( - "Failed to find data adapter that can handle " - "input: {}, {}".format( - _type_name(x), _type_name(y))) - elif len(adapter_cls) > 1: - raise RuntimeError( - "Data adapters should be mutually exclusive for " - "handling inputs. Found multiple adapters {} to handle " - "input: {}, {}".format( - adapter_cls, _type_name(x), _type_name(y))) - # Instrument the data adapter usage before returning it - keras_data_adapter_gauge.get_cell(adapter_cls[0].__name__).set(True) - return adapter_cls[0] + """Selects a data adapter that can handle a given x and y.""" + adapter_cls = [cls for cls in ALL_ADAPTER_CLS if cls.can_handle(x, y)] + if not adapter_cls: + # TODO(scottzhu): This should be a less implementation-specific error. + raise ValueError( + "Failed to find data adapter that can handle input: {}, {}".format( + _type_name(x), _type_name(y) + ) + ) + elif len(adapter_cls) > 1: + raise RuntimeError( + "Data adapters should be mutually exclusive for " + "handling inputs. Found multiple adapters {} to handle " + "input: {}, {}".format(adapter_cls, _type_name(x), _type_name(y)) + ) + # Instrument the data adapter usage before returning it + keras_data_adapter_gauge.get_cell(adapter_cls[0].__name__).set(True) + return adapter_cls[0] def _type_name(x): - """Generates a description of the type of an object.""" - if isinstance(x, dict): - key_types = set(_type_name(key) for key in x.keys()) - val_types = set(_type_name(key) for key in x.values()) - return "({} containing {} keys and {} values)".format( - type(x), key_types, val_types) - if isinstance(x, (list, tuple)): - types = set(_type_name(val) for val in x) - return "({} containing values of types {})".format( - type(x), types) - return str(type(x)) + """Generates a description of the type of an object.""" + if isinstance(x, dict): + key_types = set(_type_name(key) for key in x.keys()) + val_types = set(_type_name(key) for key in x.values()) + return f"({type(x)} containing {key_types} keys and {val_types} values)" + if isinstance(x, (list, tuple)): + types = set(_type_name(val) for val in x) + return f"({type(x)} containing values of types {types})" + return str(type(x)) def _process_tensorlike(inputs): - """Process tensor-like inputs. + """Process tensor-like inputs. - This function: + This function: - (1) Converts `Numpy` arrays to `Tensor`s. - (2) Converts `Scipy` sparse matrices to `SparseTensor`s. - (3) Converts `pandas.Series` to `Tensor`s - (4) Converts `list`s to `tuple`s (for `tf.data` support). + (1) Converts `Numpy` arrays to `Tensor`s. + (2) Converts `Scipy` sparse matrices to `SparseTensor`s. + (3) Converts `pandas.Series` to `Tensor`s + (4) Converts `list`s to `tuple`s (for `tf.data` support). - Args: - inputs: Structure of `Tensor`s, `NumPy` arrays, or tensor-like. + Args: + inputs: Structure of `Tensor`s, `NumPy` arrays, or tensor-like. - Returns: - Structure of `Tensor`s or tensor-like. - """ + Returns: + Structure of `Tensor`s or tensor-like. + """ - def _convert_single_tensor(x): - if _is_pandas_series(x): - x = np.expand_dims(x.to_numpy(), axis=-1) + def _convert_single_tensor(x): + if _is_pandas_series(x): + x = np.expand_dims(x.to_numpy(), axis=-1) - if isinstance(x, np.ndarray): - dtype = None - if issubclass(x.dtype.type, np.floating): - dtype = backend.floatx() - return tf.convert_to_tensor(x, dtype=dtype) - elif _is_scipy_sparse(x): - return _scipy_sparse_to_sparse_tensor(x) - return x + if isinstance(x, np.ndarray): + dtype = None + if issubclass(x.dtype.type, np.floating): + dtype = backend.floatx() + return tf.convert_to_tensor(x, dtype=dtype) + elif _is_scipy_sparse(x): + return _scipy_sparse_to_sparse_tensor(x) + return x - inputs = tf.nest.map_structure(_convert_single_tensor, inputs) - return tf.__internal__.nest.list_to_tuple(inputs) + inputs = tf.nest.map_structure(_convert_single_tensor, inputs) + return tf.__internal__.nest.list_to_tuple(inputs) def is_none_or_empty(inputs): - # util method to check if the input is a None or a empty list. - # the python "not" check will raise an error like below if the input is a - # numpy array - # "The truth value of an array with more than one element is ambiguous. - # Use a.any() or a.all()" - return inputs is None or not tf.nest.flatten(inputs) + # util method to check if the input is a None or a empty list. + # the python "not" check will raise an error like below if the input is a + # numpy array + # "The truth value of an array with more than one element is ambiguous. + # Use a.any() or a.all()" + return inputs is None or not tf.nest.flatten(inputs) def broadcast_sample_weight_modes(target_structure, sample_weight_modes): - """Match sample_weight_modes structure with output structure.""" - if target_structure is None or not tf.nest.flatten(target_structure): + """Match sample_weight_modes structure with output structure.""" + if target_structure is None or not tf.nest.flatten(target_structure): + return sample_weight_modes + + if isinstance(sample_weight_modes, str): + if isinstance(target_structure, dict): + return {key: sample_weight_modes for key in target_structure.keys()} + return [sample_weight_modes for _ in target_structure] + + if sample_weight_modes: + try: + tf.nest.assert_same_structure( + training_utils.list_to_tuple(target_structure), + training_utils.list_to_tuple(sample_weight_modes), + ) + except (ValueError, TypeError): + target_str = str( + tf.nest.map_structure(lambda _: "...", target_structure) + ) + mode_str = str( + tf.nest.map_structure(lambda _: "...", sample_weight_modes) + ) + + # Attempt to coerce sample_weight_modes to the target structure. + # This implicitly depends on the fact that Model flattens outputs + # for its internal representation. + try: + sample_weight_modes = tf.nest.pack_sequence_as( + target_structure, tf.nest.flatten(sample_weight_modes) + ) + logging.warning( + "sample_weight modes were coerced from\n " + "{}\n to \n {}".format(target_str, mode_str) + ) + except (ValueError, TypeError): + raise ValueError( + "Unable to match target structure and sample_weight_modes " + "structure:\n {}\n to \n {}".format( + target_str, mode_str + ) + ) + return sample_weight_modes - if isinstance(sample_weight_modes, str): - if isinstance(target_structure, dict): - return {key: sample_weight_modes for key in target_structure.keys()} - return [sample_weight_modes for _ in target_structure] - if sample_weight_modes: - try: - tf.nest.assert_same_structure( - training_utils.list_to_tuple(target_structure), - training_utils.list_to_tuple(sample_weight_modes)) - except (ValueError, TypeError): - target_str = str(tf.nest.map_structure(lambda _: "...", target_structure)) - mode_str = str( - tf.nest.map_structure(lambda _: "...", sample_weight_modes)) - - # Attempt to coerce sample_weight_modes to the target structure. This - # implicitly depends on the fact that Model flattens outputs for its - # internal representation. - try: - sample_weight_modes = tf.nest.pack_sequence_as( - target_structure, tf.nest.flatten(sample_weight_modes)) +class DataHandler: + """Handles iterating over epoch-level `tf.data.Iterator` objects.""" + + def __init__( + self, + x, + y=None, + sample_weight=None, + batch_size=None, + steps_per_epoch=None, + initial_epoch=0, + epochs=1, + shuffle=False, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + model=None, + steps_per_execution=None, + distribute=True, + pss_evaluation_shards=0, + ): + """Initializes a `DataHandler`. + + Arguments: + x: See `Model.fit`. + y: See `Model.fit`. + sample_weight: See `Model.fit`. + batch_size: See `Model.fit`. + steps_per_epoch: See `Model.fit`. + initial_epoch: See `Model.fit`. + epochs: See `Model.fit`. + shuffle: See `Model.fit`. + class_weight: See `Model.fit`. + max_queue_size: See `Model.fit`. + workers: See `Model.fit`. + use_multiprocessing: See `Model.fit`. + model: The `Model` instance. Needed in order to correctly `build` the + `Model` using generator-like inputs (see `GeneratorDataAdapter`). + steps_per_execution: See `Model.compile`. + distribute: Whether to distribute the `tf.dataset`. + `PreprocessingLayer.adapt` does not support distributed datasets, + `Model` should always set this to `True`. + pss_evaluation_shards: See `Model.fit`. + """ + + self._initial_epoch = initial_epoch + self._initial_step = 0 + self._epochs = epochs + self._insufficient_data = False + self._model = model + + if steps_per_epoch == 0: + raise ValueError( + "Unexpected value for `steps_per_epoch`. Received value is 0. " + "Please check the docstring for `model.fit()` for supported " + "values." + ) + + self._steps_per_epoch = steps_per_epoch + + # `steps_per_execution_value` is the cached initial value. + # `steps_per_execution` is mutable and may be changed by the DataAdapter + # to handle partial executions. + if steps_per_execution is None: + self._steps_per_execution = tf.Variable(1) + else: + self._steps_per_execution = steps_per_execution + + adapter_cls = select_data_adapter(x, y) + self._adapter = adapter_cls( + x, + y, + batch_size=batch_size, + steps=steps_per_epoch, + epochs=epochs - initial_epoch, + sample_weights=sample_weight, + shuffle=shuffle, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + distribution_strategy=tf.distribute.get_strategy(), + model=model, + pss_evaluation_shards=pss_evaluation_shards, + ) + + strategy = tf.distribute.get_strategy() + + self._current_step = 0 + self._step_increment = self._steps_per_execution.numpy().item() - 1 + self._insufficient_data = False + + self._configure_dataset_and_inferred_steps( + strategy, x, steps_per_epoch, class_weight, distribute + ) + + if self._inferred_steps == 0: + raise ValueError("Expected input data to be non-empty.") + + def _configure_dataset_and_inferred_steps( + self, strategy, x, steps_per_epoch, class_weight, distribute + ): + """Configure the `_dataset` and `_inferred_steps` attributes.""" + del x + dataset = self._adapter.get_dataset() + if class_weight: + dataset = dataset.map(_make_class_weight_map_fn(class_weight)) + self._inferred_steps = self._infer_steps(steps_per_epoch, dataset) + + # `PreprocessingLayer.adapt` does not currently support distributed + # datasets, so we pass `distribute=False` there. + if distribute and not _is_distributed_dataset(dataset): + dataset = strategy.experimental_distribute_dataset(dataset) + self._dataset = dataset + self._validate_data_handler() + + def enumerate_epochs(self): + """Yields `(epoch, tf.data.Iterator)`.""" + with self._truncate_execution_to_epoch(): + data_iterator = iter(self._dataset) + for epoch in range(self._initial_epoch, self._epochs): + if self._insufficient_data: # Set by `catch_stop_iteration`. + break + if self._adapter.should_recreate_iterator(): + data_iterator = iter(self._dataset) + if not isinstance(self._dataset, DistributedDataset): + steps = self._infer_steps( + self._steps_per_epoch, self._dataset + ) + if steps is not None: + self._inferred_steps = steps + yield epoch, data_iterator + self._adapter.on_epoch_end() + + @contextlib.contextmanager + def _truncate_execution_to_epoch(self): + """Truncates steps per execution to at most one epoch.""" + should_truncate = ( + self._inferred_steps is not None + and self._steps_per_execution.numpy().item() > self._inferred_steps + ) + original_value = self._steps_per_execution.numpy().item() + try: + if should_truncate: + self._steps_per_execution.assign(self._inferred_steps) + yield + finally: + if should_truncate: + self._steps_per_execution.assign(original_value) + + def sync(self): + context.async_wait() + + @contextlib.contextmanager + def catch_stop_iteration(self): + """Catches errors when an iterator runs out of data.""" + with distributed_training_utils.maybe_preemption_handler_scope( + self._model + ): + try: + yield + self.sync() + except (StopIteration, tf.errors.OutOfRangeError): + if self._inferred_steps is None: + self._inferred_steps = self._current_step + else: + self._insufficient_data = True + total_epochs = self._epochs - self._initial_epoch + logging.warning( + "Your input ran out of data; interrupting training. " + "Make sure that your dataset or generator can generate " + "at least `steps_per_epoch * epochs` batches (in this " + "case, {} batches). You may need to use the repeat() " + "function when building your dataset.".format( + total_epochs * self._inferred_steps + ) + ) + + def steps(self): + """Yields steps for the current epoch.""" + self._current_step = self._initial_step + self._initial_step = 0 + # `self._inferred_steps` can be changed by `catch_stop_iteration`. + while ( + self._inferred_steps is None + or self._current_step < self._inferred_steps + ): + if self._insufficient_data: # Set by `catch_stop_iteration`. + break + original_spe = self._steps_per_execution.numpy().item() + can_run_full_execution = ( + original_spe == 1 + or self._inferred_steps is None + or self._inferred_steps - self._current_step >= original_spe + ) + + if can_run_full_execution: + self._step_increment = original_spe - 1 + yield self._current_step + self._current_step += original_spe + else: + # Last partial execution. + steps_remaining = self._inferred_steps - self._current_step + self._steps_per_execution.assign(steps_remaining) + self._step_increment = steps_remaining - 1 + yield self._current_step + self._current_step += steps_remaining + self._steps_per_execution.assign(original_spe) + + @property + def step_increment(self): + """The number to increment the step for `on_batch_end` methods.""" + return self._step_increment + + @property + def inferred_steps(self): + """The inferred steps per epoch of the created `Dataset`. + + This will be `None` in the case where: + + (1) A `Dataset` of unknown cardinality was passed to the `DataHandler`, + (2) `steps_per_epoch` was not provided, and + (3) The first epoch of iteration has not yet completed. + + Returns: + The inferred steps per epoch of the created `Dataset`. + """ + return self._inferred_steps + + @property + def should_sync(self): + # Catch OutOfRangeError for Datasets of unknown size. + # This blocks until the batch has finished executing. + # TODO(b/150292341): Allow multiple async steps here. + return self._inferred_steps is None + + def _log_indefinite_training_warning(self): logging.warning( - "sample_weight modes were coerced from\n {}\n to \n {}" - .format(target_str, mode_str)) - except (ValueError, TypeError): - raise ValueError( - "Unable to match target structure and sample_weight_modes " - "structure:\n {}\n to \n {}".format(target_str, mode_str)) + "The training loop will run indefinitely since you have " + "set `steps_per_epoch=-1`. Please use batch-level " + "callbacks to save checkpoints or log training progress, " + "etc" + ) + + def _infer_steps(self, steps, dataset): + """Infers steps_per_epoch needed to loop through a dataset.""" + if steps == -1: + self._log_indefinite_training_warning() + return None + + if steps is not None: + return steps + + adapter_steps = self._adapter.get_size() + if adapter_steps is not None: + return adapter_steps + + # tf.distribute's `PerWorkerDataset` does not inherit from + # `tf.data.Dataset` and in those cases we give up on inferring steps. + if not isinstance(dataset, tf.data.Dataset): + return None + + size = tf.data.experimental.cardinality(dataset) + if size == tf.data.experimental.INFINITE_CARDINALITY and steps is None: + raise ValueError( + "When passing an infinitely repeating dataset, please specify " + "a `steps_per_epoch` value so that epoch level " + "callbacks continue to work. The value can be arbitrary, or a " + "number that you think correctly defines the size of an epoch. " + "Epoch-level callbacks will then be called at this interval." + ) + if size >= 0: + return size.numpy().item() + return None + + @property + def _samples(self): + return self._adapter.get_samples() + + def _validate_data_handler(self): + # TODO(b/152094471): Support this with DistIter.get_next_as_optional. + if ( + self._steps_per_execution.numpy().item() > 1 + and self._inferred_steps is None + ): + raise ValueError( + "Could not infer the size of the data. With " + "`steps_per_execution > 1`, you must specify the number of " + "steps to run." + ) - return sample_weight_modes +class _ClusterCoordinatorDataHandler(DataHandler): + """A `DataHandler` that is compatible with `ClusterCoordinator`.""" + + def __init__(self, x, y=None, **kwargs): + if not _is_distributed_dataset(x) and not isinstance( + x, (dataset_creator.DatasetCreator, tf.data.Dataset) + ): + x = self._convert_to_dataset_creator(x, y, **kwargs) + + super().__init__(x=x, **kwargs) + + def _convert_to_dataset_creator(self, x, y, **kwargs): + """Converts non-tf.data.Dataset to `DatasetCreator` instances.""" + + def _dataset_fn(input_context): + del input_context + data_adapter_cls = select_data_adapter(x, y) + return data_adapter_cls(x=x, y=y, **kwargs).get_dataset() + + # This check is needed because types like `tf.data.Dataset` don't work + # with PSS yet. So only apply this logic to the types we can support. + if isinstance(x, _get_tensor_types()) and isinstance( + y, _get_tensor_types() + ): + return dataset_creator.DatasetCreator(_dataset_fn) + else: + raise NotImplementedError( + "Only `tf.keras.utils.experimental.DatasetCreator`, " + "`tf.Tensor`, numpy arrays and pandas dataframes are " + "supported types at this time." + ) + + def _configure_dataset_and_inferred_steps( + self, strategy, x, steps_per_epoch, class_weight, distribute + ): + if isinstance(x, dataset_creator.DatasetCreator): + + def per_worker_dataset_fn(): + + return strategy.distribute_datasets_from_function( + x, options=x.input_options + ) + + coordinator = self._model._cluster_coordinator + self._dataset = coordinator.create_per_worker_dataset( + per_worker_dataset_fn + ) + else: + assert distribute + if not _is_distributed_dataset(x): + x = strategy.experimental_distribute_dataset(x) + + coordinator = self._model._cluster_coordinator + self._dataset = coordinator.create_per_worker_dataset(x) + + if steps_per_epoch == -1: + self._inferred_steps = None + self._log_indefinite_training_warning() + else: + self._inferred_steps = steps_per_epoch + + def sync(self): + self._model._cluster_coordinator.join() + + +class _ClusterCoordinatorExactEvalDataHandler(_ClusterCoordinatorDataHandler): + def __init__(self, x, y=None, **kwargs): + super().__init__(x=x, **kwargs) + self._total_shards = kwargs.get("pss_evaluation_shards") + + def _warn_if_not_file_shardable(self, dataset): + # Traverse backwards to find source dataset and check if that is one of + # the unshardable types + # TODO(b/268521864): expand this to inspect dataset function graphs and + # use the auto-sharding logic rather than re-creating it here. + cur_dataset = dataset + while hasattr(cur_dataset, "_input_dataset"): + cur_dataset = cur_dataset._input_dataset + if type(cur_dataset) in UNSHARDABLE_DATASET_TYPES: + logging.warning( + "Found source dataset of type {}. This type is not " + "efficiently shardable, so exact evaluation may be " + "slower than inexact evaluation. Try converting to " + "a TFRecord or other file-based dataset if " + "performance is a concern.".format(type(cur_dataset)) + ) + + def _configure_dataset_and_inferred_steps( + self, strategy, x, steps_per_epoch, class_weight, distribute + ): + if isinstance(x, dataset_creator.DatasetCreator): + + def per_worker_dataset_fn(): + ddf = strategy.distribute_datasets_from_function( + x, options=x.input_options + ) + return ddf + + coordinator = self._model._cluster_coordinator + self._dataset = coordinator.create_per_worker_dataset( + per_worker_dataset_fn + ) + logging.info("dataset element spec: %r", self._dataset.element_spec) + self._dataset = self._dataset.build() + else: + # TODO(b/268226218): Support DistributedDataset input + if not _is_distributed_dataset(x): + self._warn_if_not_file_shardable(x) + x = strategy.experimental_distribute_dataset(x) + + coordinator = self._model._cluster_coordinator + self._dataset = coordinator.create_per_worker_dataset(x) + self._dataset = self._dataset.build() + + if steps_per_epoch == -1: + self._inferred_steps = None + self._log_indefinite_training_warning() + else: + self._inferred_steps = steps_per_epoch + + def enumerate_epochs(self): + """Yields `(epoch, dataset)`.""" + for epoch in range(self._initial_epoch, self._epochs): + yield epoch, self._dataset + self._adapter.on_epoch_end() + + def steps(self): + """Yields steps for the current epoch.""" + for step in range(self._total_shards): + yield step + + +@keras_export("keras.__internal__.utils.get_data_handler", v1=[]) +def get_data_handler(*args, **kwargs): + """Creates a `DataHandler`, providing standardized access to a `Dataset`. + + See `DataHandler` for the list and definition of the arguments. See the + implementation of `Model.fit()`, `evaluate()`, or `predict()` methods + for complete usage examples. As a rule of tumb, `get_data_handler()` accepts + the same inputs as the `x` argument of `Model.fit()`. + + Example: + + ```python + def step(iterator): + data = next(iterator) + # result <= Do something with data + return result + tf_step = tf.function(step, reduce_retracing=True) + + # Assume x is a tf.data Dataset. + data_handler = data_adapter.get_data_handler(x=x) + # Epoch iteration + for epo_idx, iterator in data_handler.enumerate_epochs(): + # Stop on dataset exhaustion. + with data_handler.catch_stop_iteration(): + for step in data_handler.steps(): # Step iteration + step_result = step(iterator) + ``` + + Args: + *args: Arguments passed to the `DataHandler` constructor. + **kwargs: Arguments passed to the `DataHandler` constructor. + + Returns: + A `DataHandler` object. If the model's cluster coordinate is set (e.g. the + model was defined under a parameter-server strategy), returns a + `_ClusterCoordinatorDataHandler`. -class DataHandler: - """Handles iterating over epoch-level `tf.data.Iterator` objects.""" - - def __init__(self, - x, - y=None, - sample_weight=None, - batch_size=None, - steps_per_epoch=None, - initial_epoch=0, - epochs=1, - shuffle=False, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - model=None, - steps_per_execution=None, - distribute=True): - """Initializes a `DataHandler`. - - Arguments: - x: See `Model.fit`. - y: See `Model.fit`. - sample_weight: See `Model.fit`. - batch_size: See `Model.fit`. - steps_per_epoch: See `Model.fit`. - initial_epoch: See `Model.fit`. - epochs: See `Model.fit`. - shuffle: See `Model.fit`. - class_weight: See `Model.fit`. - max_queue_size: See `Model.fit`. - workers: See `Model.fit`. - use_multiprocessing: See `Model.fit`. - model: The `Model` instance. Needed in order to correctly `build` the - `Model` using generator-like inputs (see `GeneratorDataAdapter`). - steps_per_execution: See `Model.compile`. - distribute: Whether to distribute the `tf.dataset`. - `PreprocessingLayer.adapt` does not support distributed datasets, - `Model` should always set this to `True`. """ + if getattr(kwargs["model"], "_cluster_coordinator", None): + if kwargs.get("pss_evaluation_shards"): + return _ClusterCoordinatorExactEvalDataHandler(*args, **kwargs) + return _ClusterCoordinatorDataHandler(*args, **kwargs) + return DataHandler(*args, **kwargs) - self._initial_epoch = initial_epoch - self._initial_step = 0 - self._epochs = epochs - self._insufficient_data = False - self._model = model - self._steps_per_epoch = steps_per_epoch +def _make_class_weight_map_fn(class_weight): + """Applies class weighting to a `Dataset`. - # `steps_per_execution_value` is the cached initial value. - # `steps_per_execution` is mutable and may be changed by the DataAdapter - # to handle partial executions. - if steps_per_execution is None: - self._steps_per_execution = tf.Variable(1) - else: - self._steps_per_execution = steps_per_execution + The `Dataset` is assumed to be in format `(x, y)` or `(x, y, sw)`, where + `y` must be a single `Tensor`. - adapter_cls = select_data_adapter(x, y) - self._adapter = adapter_cls( - x, - y, - batch_size=batch_size, - steps=steps_per_epoch, - epochs=epochs - initial_epoch, - sample_weights=sample_weight, - shuffle=shuffle, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - distribution_strategy=tf.distribute.get_strategy(), - model=model) - - strategy = tf.distribute.get_strategy() - - self._current_step = 0 - self._step_increment = self._steps_per_execution.numpy().item() - 1 - self._insufficient_data = False - - self._configure_dataset_and_inferred_steps(strategy, x, steps_per_epoch, - class_weight, distribute) - - def _configure_dataset_and_inferred_steps(self, strategy, x, steps_per_epoch, - class_weight, distribute): - """Configure the `_dataset` and `_inferred_steps` attributes.""" - del x - dataset = self._adapter.get_dataset() - if class_weight: - dataset = dataset.map(_make_class_weight_map_fn(class_weight)) - self._inferred_steps = self._infer_steps(steps_per_epoch, dataset) - - # `PreprocessingLayer.adapt` does not currently support distributed - # datasets, so we pass `distribute=False` there. - if distribute and not _is_distributed_dataset(dataset): - dataset = strategy.experimental_distribute_dataset(dataset) - self._dataset = dataset - self._validate_data_handler() - - def enumerate_epochs(self): - """Yields `(epoch, tf.data.Iterator)`.""" - with self._truncate_execution_to_epoch(): - data_iterator = iter(self._dataset) - for epoch in range(self._initial_epoch, self._epochs): - if self._insufficient_data: # Set by `catch_stop_iteration`. - break - if self._adapter.should_recreate_iterator(): - data_iterator = iter(self._dataset) - if not isinstance(self._dataset, DistributedDataset): - steps = self._infer_steps(self._steps_per_epoch, self._dataset) - if steps is not None: - self._inferred_steps = steps - yield epoch, data_iterator - self._adapter.on_epoch_end() - - @contextlib.contextmanager - def _truncate_execution_to_epoch(self): - """Truncates steps per execution to at most one epoch.""" - should_truncate = ( - self._inferred_steps is not None and - self._steps_per_execution.numpy().item() > self._inferred_steps) - original_value = self._steps_per_execution.numpy().item() - try: - if should_truncate: - self._steps_per_execution.assign(self._inferred_steps) - yield - finally: - if should_truncate: - self._steps_per_execution.assign(original_value) - - def sync(self): - context.async_wait() - - @contextlib.contextmanager - def catch_stop_iteration(self): - """Catches errors when an iterator runs out of data.""" - try: - yield - self.sync() - except (StopIteration, tf.errors.OutOfRangeError): - if self._inferred_steps is None: - self._inferred_steps = self._current_step - else: - self._insufficient_data = True - total_epochs = self._epochs - self._initial_epoch - logging.warning( - "Your input ran out of data; interrupting training. " - "Make sure that your dataset or generator can generate at " - "least `steps_per_epoch * epochs` batches (in this case, " - "{} batches). You may need to use the repeat() function " - "when building your dataset.".format(total_epochs * - self._inferred_steps)) - - def steps(self): - """Yields steps for the current epoch.""" - self._current_step = self._initial_step - # `self._inferred_steps` can be changed by `catch_stop_iteration`. - while (self._inferred_steps is None or - self._current_step < self._inferred_steps): - if self._insufficient_data: # Set by `catch_stop_iteration`. - break - original_spe = self._steps_per_execution.numpy().item() - can_run_full_execution = ( - original_spe == 1 or - self._inferred_steps is None or - self._inferred_steps - self._current_step >= - original_spe) - - if can_run_full_execution: - self._step_increment = original_spe - 1 - yield self._current_step - self._current_step += original_spe - else: - # Last partial execution. - steps_remaining = self._inferred_steps - self._current_step - self._steps_per_execution.assign(steps_remaining) - self._step_increment = steps_remaining - 1 - yield self._current_step - self._current_step += steps_remaining - self._steps_per_execution.assign(original_spe) - - @property - def step_increment(self): - """The number to increment the step for `on_batch_end` methods.""" - return self._step_increment - - @property - def inferred_steps(self): - """The inferred steps per epoch of the created `Dataset`. - - This will be `None` in the case where: - - (1) A `Dataset` of unknown cardinality was passed to the `DataHandler`, and - (2) `steps_per_epoch` was not provided, and - (3) The first epoch of iteration has not yet completed. + Args: + class_weight: A map where the keys are integer class ids and values are + the class weights, e.g. `{0: 0.2, 1: 0.6, 2: 0.3}` Returns: - The inferred steps per epoch of the created `Dataset`. + A function that can be used with `tf.data.Dataset.map` to apply class + weighting. """ - return self._inferred_steps - - @property - def should_sync(self): - # Catch OutOfRangeError for Datasets of unknown size. - # This blocks until the batch has finished executing. - # TODO(b/150292341): Allow multiple async steps here. - return self._inferred_steps is None - - def _log_indefinite_training_warning(self): - logging.warning("The training loop will run indefinitely since you have " - "set `steps_per_epoch=-1`. Please use batch-level " - "callbacks to save checkpoints or log training progress, " - "etc") - - def _infer_steps(self, steps, dataset): - """Infers steps_per_epoch needed to loop through a dataset.""" - if steps == -1: - self._log_indefinite_training_warning() - return None - - if steps is not None: - return steps - - adapter_steps = self._adapter.get_size() - if adapter_steps is not None: - return adapter_steps - - size = tf.data.experimental.cardinality(dataset) - if size == tf.data.experimental.INFINITE_CARDINALITY and steps is None: - raise ValueError( - "When passing an infinitely repeating dataset, please specify a " - "`steps_per_epoch` value so that epoch level " - "callbacks continue to work. The value can be arbitrary, or a number " - "that you think correctly defines the size of an epoch. " - "Epoch-level callbacks will then be called at this interval.") - if size >= 0: - return size.numpy().item() - return None - - @property - def _samples(self): - return self._adapter.get_samples() - - def _validate_data_handler(self): - # TODO(b/152094471): Support this with DistIter.get_next_as_optional. - if self._steps_per_execution.numpy().item( - ) > 1 and self._inferred_steps is None: - raise ValueError( - "Could not infer the size of the data. With " - "`steps_per_execution > 1`, you must specify the number of steps " - "to run.") - + class_ids = list(sorted(class_weight.keys())) + expected_class_ids = list(range(len(class_ids))) + if class_ids != expected_class_ids: + error_msg = ( + "Expected `class_weight` to be a dict with keys from 0 to one less " + "than the number of classes, found {}" + ).format(class_weight) + raise ValueError(error_msg) + + class_weight_tensor = tf.convert_to_tensor( + [class_weight[int(c)] for c in class_ids] + ) + + def _class_weights_map_fn(*data): + """Convert `class_weight` to `sample_weight`.""" + x, y, sw = unpack_x_y_sample_weight(data) + + if tf.nest.is_nested(y): + raise ValueError( + "`class_weight` is only supported for Models with a single " + "output." + ) + + if y.shape.rank >= 2: + y_classes = tf.__internal__.smart_cond.smart_cond( + backend.shape(y)[-1] > 1, + lambda: backend.argmax(y, axis=-1), + lambda: tf.cast(tf.round(tf.squeeze(y, axis=-1)), tf.int64), + ) + else: + # Special casing for rank 1, where we can guarantee sparse encoding. + y_classes = tf.cast(tf.round(y), tf.int64) + + cw = tf.gather(class_weight_tensor, y_classes) + if sw is not None: + cw = tf.cast(cw, sw.dtype) + # `class_weight` and `sample_weight` are multiplicative. + # If class_weight has more than 2 dimensions, we need to reshape + # sample_weight to make broadcasting possible for multiplication. + rank_delta = cw.shape.rank - sw.shape.rank + sw = tf.reshape(sw, sw.shape + [1] * rank_delta) + sw = sw * cw + else: + sw = cw + return x, y, sw + + return _class_weights_map_fn -class _ClusterCoordinatorDataHandler(DataHandler): - """A `DataHandler` that is compatible with `ClusterCoordinator`.""" - def __init__(self, x, y=None, **kwargs): - if (not _is_distributed_dataset(x) and - not isinstance(x, (dataset_creator.DatasetCreator, tf.data.Dataset))): - x = self._convert_to_dataset_creator(x, y, **kwargs) - - super().__init__(x=x, **kwargs) +def train_validation_split(arrays, validation_split): + """Split arrays into train and validation subsets in deterministic order. - def _convert_to_dataset_creator(self, x, y, **kwargs): - """Converts non-tf.data.Dataset to `DatasetCreator` instances.""" + The last part of data will become validation data. - def _dataset_fn(input_context): - del input_context - data_adapter_cls = select_data_adapter(x, y) - return data_adapter_cls(x=x, y=y, **kwargs).get_dataset() + Args: + arrays: Tensors to split. Allowed inputs are arbitrarily nested structures + of Tensors and NumPy arrays. + validation_split: Float between 0 and 1. The proportion of the dataset to + include in the validation split. The rest of the dataset will be + included in the training split. + Returns: + `(train_arrays, validation_arrays)` + """ - # This check is needed because types like `tf.data.Dataset` don't work with - # PSS yet. So only apply this logic to the types we can support. - if (isinstance(x, _get_tensor_types()) and - isinstance(y, _get_tensor_types())): - return dataset_creator.DatasetCreator(_dataset_fn) - else: - raise NotImplementedError( - "Only `tf.keras.utils.experimental.DatasetCreator`, `tf.Tensor`, " - "numpy arrays and pandas dataframes are supported types at this " - "time.") + def _can_split(t): + tensor_types = _get_tensor_types() + return isinstance(t, tensor_types) or t is None - def _configure_dataset_and_inferred_steps(self, strategy, x, steps_per_epoch, - class_weight, distribute): - if isinstance(x, dataset_creator.DatasetCreator): + flat_arrays = tf.nest.flatten(arrays) + unsplitable = [type(t) for t in flat_arrays if not _can_split(t)] + if unsplitable: + raise ValueError( + "`validation_split` is only supported for Tensors or NumPy " + "arrays, found following types in the input: {}".format(unsplitable) + ) - def per_worker_dataset_fn(): + if all(t is None for t in flat_arrays): + return arrays, arrays - return strategy.distribute_datasets_from_function( - x, options=x.input_options) + first_non_none = None + for t in flat_arrays: + if t is not None: + first_non_none = t + break - self._dataset = self._model._cluster_coordinator.create_per_worker_dataset( # pylint: disable=protected-access - per_worker_dataset_fn) - else: - assert distribute - if not _is_distributed_dataset(x): - x = strategy.experimental_distribute_dataset(x) + # Assumes all arrays have the same batch shape or are `None`. + batch_dim = int(first_non_none.shape[0]) + split_at = int(math.floor(batch_dim * (1.0 - validation_split))) - self._dataset = self._model._cluster_coordinator.create_per_worker_dataset( # pylint: disable=protected-access - x) + if split_at == 0 or split_at == batch_dim: + raise ValueError( + "Training data contains {batch_dim} samples, which is not " + "sufficient to split it into a validation and training set as " + "specified by `validation_split={validation_split}`. Either " + "provide more data, or a different value for the " + "`validation_split` argument.".format( + batch_dim=batch_dim, validation_split=validation_split + ) + ) + + def _split(t, start, end): + if t is None: + return t + return t[start:end] + + train_arrays = tf.nest.map_structure( + functools.partial(_split, start=0, end=split_at), arrays + ) + val_arrays = tf.nest.map_structure( + functools.partial(_split, start=split_at, end=batch_dim), arrays + ) + + return train_arrays, val_arrays - if steps_per_epoch == -1: - self._inferred_steps = None - self._log_indefinite_training_warning() - else: - self._inferred_steps = steps_per_epoch - def sync(self): - self._model._cluster_coordinator.join() # pylint: disable=protected-access +@keras_export("keras.utils.unpack_x_y_sample_weight", v1=[]) +def unpack_x_y_sample_weight(data): + """Unpacks user-provided data tuple. -@keras_export("keras.__internal__.utils.get_data_handler", v1=[]) -def get_data_handler(*args, **kwargs): - """Creates a `DataHandler`, providing standardized access to a `Dataset`. + This is a convenience utility to be used when overriding + `Model.train_step`, `Model.test_step`, or `Model.predict_step`. + This utility makes it easy to support data of the form `(x,)`, + `(x, y)`, or `(x, y, sample_weight)`. - See `DataHandler` for the list and definition of the arguments. See the - implementation of `Model.fit()`, `evaluate()`, or `predict()` methods - for complete usage examples. As a rule of tumb, `get_data_handler()` accepts - the same inputs as the `x` argument of `Model.fit()`. + Standalone usage: - Example: + >>> features_batch = tf.ones((10, 5)) + >>> labels_batch = tf.zeros((10, 5)) + >>> data = (features_batch, labels_batch) + >>> # `y` and `sample_weight` will default to `None` if not provided. + >>> x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) + >>> sample_weight is None + True - ```python - def step(iterator): - data = next(iterator) - # result <= Do something with data - return result - tf_step = tf.function(step, reduce_retracing=True) + Example in overridden `Model.train_step`: - # Assume x is a tf.data Dataset. - data_handler = data_adapter.get_data_handler(x=x) - for epo_idx, iterator in data_handler.enumerate_epochs(): # Epoch iteration - with data_handler.catch_stop_iteration(): # Stop on dataset exhaustion. - for step in data_handler.steps(): # Step iteration - step_result = step(iterator) - ``` + ```python + class MyModel(tf.keras.Model): - Args: - *args: Arguments passed to the `DataHandler` constructor. - **kwargs: Arguments passed to the `DataHandler` constructor. + def train_step(self, data): + # If `sample_weight` is not provided, all samples will be weighted + # equally. + x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) - Returns: - A `DataHandler` object. If the model's cluster coordinate is set (e.g. the - model was defined under a parameter-server strategy), returns a - `_ClusterCoordinatorDataHandler`. + with tf.GradientTape() as tape: + y_pred = self(x, training=True) + loss = self.compiled_loss( + y, y_pred, sample_weight, regularization_losses=self.losses) + trainable_variables = self.trainable_variables + gradients = tape.gradient(loss, trainable_variables) + self.optimizer.apply_gradients(zip(gradients, trainable_variables)) - """ - if getattr(kwargs["model"], "_cluster_coordinator", None): - return _ClusterCoordinatorDataHandler(*args, **kwargs) - return DataHandler(*args, **kwargs) + self.compiled_metrics.update_state(y, y_pred, sample_weight) + return {m.name: m.result() for m in self.metrics} + ``` + Args: + data: A tuple of the form `(x,)`, `(x, y)`, or `(x, y, sample_weight)`. -def _make_class_weight_map_fn(class_weight): - """Applies class weighting to a `Dataset`. - - The `Dataset` is assumed to be in format `(x, y)` or `(x, y, sw)`, where - `y` must be a single `Tensor`. - - Args: - class_weight: A map where the keys are integer class ids and values are - the class weights, e.g. `{0: 0.2, 1: 0.6, 2: 0.3}` - - Returns: - A function that can be used with `tf.data.Dataset.map` to apply class - weighting. - """ - class_ids = list(sorted(class_weight.keys())) - expected_class_ids = list(range(len(class_ids))) - if class_ids != expected_class_ids: - error_msg = ( - "Expected `class_weight` to be a dict with keys from 0 to one less " - "than the number of classes, found {}").format(class_weight) - raise ValueError(error_msg) - - class_weight_tensor = tf.convert_to_tensor( - [class_weight[int(c)] for c in class_ids]) - - def _class_weights_map_fn(*data): - """Convert `class_weight` to `sample_weight`.""" - x, y, sw = unpack_x_y_sample_weight(data) - - if tf.nest.is_nested(y): - raise ValueError( - "`class_weight` is only supported for Models with a single output.") - - if y.shape.rank > 2: - raise ValueError("`class_weight` not supported for " - "3+ dimensional targets.") - - y_classes = tf.__internal__.smart_cond.smart_cond( - y.shape.rank == 2 and backend.shape(y)[1] > 1, - lambda: backend.argmax(y, axis=1), - lambda: tf.cast(backend.reshape(y, (-1,)), tf.int64)) - - cw = tf.gather(class_weight_tensor, y_classes) - if sw is not None: - cw = tf.cast(cw, sw.dtype) - # `class_weight` and `sample_weight` are multiplicative. - sw = sw * cw + Returns: + The unpacked tuple, with `None`s for `y` and `sample_weight` if they are + not provided. + """ + if isinstance(data, list): + data = tuple(data) + if not isinstance(data, tuple): + return (data, None, None) + elif len(data) == 1: + return (data[0], None, None) + elif len(data) == 2: + return (data[0], data[1], None) + elif len(data) == 3: + return (data[0], data[1], data[2]) else: - sw = cw - return x, y, sw + error_msg = ( + "Data is expected to be in format `x`, `(x,)`, `(x, y)`, " + "or `(x, y, sample_weight)`, found: {}" + ).format(data) + raise ValueError(error_msg) - return _class_weights_map_fn +@keras_export("keras.utils.pack_x_y_sample_weight", v1=[]) +def pack_x_y_sample_weight(x, y=None, sample_weight=None): + """Packs user-provided data into a tuple. -def train_validation_split(arrays, validation_split): - """Split arrays into train and validation subsets in deterministic order. - - The last part of data will become validation data. - - Args: - arrays: Tensors to split. Allowed inputs are arbitrarily nested structures - of Tensors and NumPy arrays. - validation_split: Float between 0 and 1. The proportion of the dataset to - include in the validation split. The rest of the dataset will be included - in the training split. - Returns: - `(train_arrays, validation_arrays)` - """ - - def _can_split(t): - tensor_types = _get_tensor_types() - return isinstance(t, tensor_types) or t is None - - flat_arrays = tf.nest.flatten(arrays) - unsplitable = [type(t) for t in flat_arrays if not _can_split(t)] - if unsplitable: - raise ValueError( - "`validation_split` is only supported for Tensors or NumPy " - "arrays, found following types in the input: {}".format(unsplitable)) - - if all(t is None for t in flat_arrays): - return arrays, arrays - - first_non_none = None - for t in flat_arrays: - if t is not None: - first_non_none = t - break - - # Assumes all arrays have the same batch shape or are `None`. - batch_dim = int(first_non_none.shape[0]) - split_at = int(math.floor(batch_dim * (1. - validation_split))) - - if split_at == 0 or split_at == batch_dim: - raise ValueError( - "Training data contains {batch_dim} samples, which is not sufficient " - "to split it into a validation and training set as specified by " - "`validation_split={validation_split}`. Either provide more data, or a " - "different value for the `validation_split` argument." .format( - batch_dim=batch_dim, validation_split=validation_split)) - - def _split(t, start, end): - if t is None: - return t - return t[start:end] - - train_arrays = tf.nest.map_structure( - functools.partial(_split, start=0, end=split_at), arrays) - val_arrays = tf.nest.map_structure( - functools.partial(_split, start=split_at, end=batch_dim), arrays) - - return train_arrays, val_arrays + This is a convenience utility for packing data into the tuple formats + that `Model.fit` uses. + Standalone usage: -@keras_export("keras.utils.unpack_x_y_sample_weight", v1=[]) -def unpack_x_y_sample_weight(data): - """Unpacks user-provided data tuple. - - This is a convenience utility to be used when overriding - `Model.train_step`, `Model.test_step`, or `Model.predict_step`. - This utility makes it easy to support data of the form `(x,)`, - `(x, y)`, or `(x, y, sample_weight)`. - - Standalone usage: - - >>> features_batch = tf.ones((10, 5)) - >>> labels_batch = tf.zeros((10, 5)) - >>> data = (features_batch, labels_batch) - >>> # `y` and `sample_weight` will default to `None` if not provided. - >>> x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) - >>> sample_weight is None - True - - Example in overridden `Model.train_step`: - - ```python - class MyModel(tf.keras.Model): - - def train_step(self, data): - # If `sample_weight` is not provided, all samples will be weighted - # equally. - x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) - - with tf.GradientTape() as tape: - y_pred = self(x, training=True) - loss = self.compiled_loss( - y, y_pred, sample_weight, regularization_losses=self.losses) - trainable_variables = self.trainable_variables - gradients = tape.gradient(loss, trainable_variables) - self.optimizer.apply_gradients(zip(gradients, trainable_variables)) - - self.compiled_metrics.update_state(y, y_pred, sample_weight) - return {m.name: m.result() for m in self.metrics} - ``` - - Args: - data: A tuple of the form `(x,)`, `(x, y)`, or `(x, y, sample_weight)`. - - Returns: - The unpacked tuple, with `None`s for `y` and `sample_weight` if they are not - provided. - """ - if isinstance(data, list): - data = tuple(data) - if not isinstance(data, tuple): - return (data, None, None) - elif len(data) == 1: - return (data[0], None, None) - elif len(data) == 2: - return (data[0], data[1], None) - elif len(data) == 3: - return (data[0], data[1], data[2]) - else: - error_msg = ("Data is expected to be in format `x`, `(x,)`, `(x, y)`, " - "or `(x, y, sample_weight)`, found: {}").format(data) - raise ValueError(error_msg) + >>> x = tf.ones((10, 1)) + >>> data = tf.keras.utils.pack_x_y_sample_weight(x) + >>> isinstance(data, tf.Tensor) + True + >>> y = tf.ones((10, 1)) + >>> data = tf.keras.utils.pack_x_y_sample_weight(x, y) + >>> isinstance(data, tuple) + True + >>> x, y = data + Args: + x: Features to pass to `Model`. + y: Ground-truth targets to pass to `Model`. + sample_weight: Sample weight for each element. -@keras_export("keras.utils.pack_x_y_sample_weight", v1=[]) -def pack_x_y_sample_weight(x, y=None, sample_weight=None): - """Packs user-provided data into a tuple. - - This is a convenience utility for packing data into the tuple formats - that `Model.fit` uses. - - Standalone usage: - - >>> x = tf.ones((10, 1)) - >>> data = tf.keras.utils.pack_x_y_sample_weight(x) - >>> isinstance(data, tf.Tensor) - True - >>> y = tf.ones((10, 1)) - >>> data = tf.keras.utils.pack_x_y_sample_weight(x, y) - >>> isinstance(data, tuple) - True - >>> x, y = data - - Args: - x: Features to pass to `Model`. - y: Ground-truth targets to pass to `Model`. - sample_weight: Sample weight for each element. - - Returns: - Tuple in the format used in `Model.fit`. - """ - if y is None: - # For single x-input, we do no tuple wrapping since in this case - # there is no ambiguity. This also makes NumPy and Dataset - # consistent in that the user does not have to wrap their Dataset - # data in an unnecessary tuple - if not tf.nest.is_nested(x): - return x + Returns: + Tuple in the format used in `Model.fit`. + """ + if y is None: + # For single x-input, we do no tuple wrapping since in this case + # there is no ambiguity. This also makes NumPy and Dataset + # consistent in that the user does not have to wrap their Dataset + # data in an unnecessary tuple. + if not isinstance(x, tuple or list): + return x + else: + return (x,) + elif sample_weight is None: + return (x, y) + else: + return (x, y, sample_weight) + + +def single_batch_iterator( + strategy, x, y=None, sample_weight=None, class_weight=None +): + """Creates a single-batch dataset.""" + x, y, sample_weight = _process_tensorlike((x, y, sample_weight)) + if y is None: + data = (x,) + elif sample_weight is None: + data = (x, y) else: - return (x,) - elif sample_weight is None: - return (x, y) - else: - return (x, y, sample_weight) - - -def single_batch_iterator(strategy, - x, - y=None, - sample_weight=None, - class_weight=None): - """Creates a single-batch dataset.""" - x, y, sample_weight = _process_tensorlike((x, y, sample_weight)) - if y is None: - data = (x,) - elif sample_weight is None: - data = (x, y) - else: - data = (x, y, sample_weight) - - _check_data_cardinality(data) - dataset = tf.data.Dataset.from_tensors(data) - if class_weight: - dataset = dataset.map(_make_class_weight_map_fn(class_weight)) - dataset = strategy.experimental_distribute_dataset(dataset) - return iter(dataset) + data = (x, y, sample_weight) + + _check_data_cardinality(data) + dataset = tf.data.Dataset.from_tensors(data) + if class_weight: + dataset = dataset.map(_make_class_weight_map_fn(class_weight)) + dataset = strategy.experimental_distribute_dataset(dataset) + return iter(dataset) def _check_data_cardinality(data): - num_samples = set(int(i.shape[0]) for i in tf.nest.flatten(data)) - if len(num_samples) > 1: - msg = "Data cardinality is ambiguous:\n" - for label, single_data in zip(["x", "y", "sample_weight"], data): - msg += " {} sizes: {}\n".format( - label, ", ".join(str(i.shape[0]) - for i in tf.nest.flatten(single_data))) - msg += "Make sure all arrays contain the same number of samples." - raise ValueError(msg) + num_samples = set(int(i.shape[0]) for i in tf.nest.flatten(data)) + if len(num_samples) > 1: + msg = "Data cardinality is ambiguous:\n" + for label, single_data in zip(["x", "y", "sample_weight"], data): + msg += " {} sizes: {}\n".format( + label, + ", ".join( + str(i.shape[0]) for i in tf.nest.flatten(single_data) + ), + ) + msg += "Make sure all arrays contain the same number of samples." + raise ValueError(msg) def _get_tensor_types(): - if pd is None: - return (tf.Tensor, np.ndarray) - else: - return (tf.Tensor, np.ndarray, pd.Series, pd.DataFrame) + if pd is None: + return (tf.Tensor, np.ndarray) + else: + return (tf.Tensor, np.ndarray, pd.Series, pd.DataFrame) def _is_scipy_sparse(x): - try: - from scipy.sparse import issparse # pylint: disable=g-import-not-at-top + try: + from scipy.sparse import issparse - return issparse(x) - except ImportError: - return False + return issparse(x) + except ImportError: + return False def _is_pandas_series(x): - if pd is None: - return False - else: - return isinstance(x, pd.Series) + if pd is None: + return False + else: + return isinstance(x, pd.Series) def _scipy_sparse_to_sparse_tensor(t): - """Converts a SciPy sparse matrix to a SparseTensor.""" - sparse_coo = t.tocoo() - row, col = sparse_coo.row, sparse_coo.col - data, shape = sparse_coo.data, sparse_coo.shape - if issubclass(data.dtype.type, np.floating): - data = data.astype(backend.floatx()) - indices = np.concatenate( - (np.expand_dims(row, axis=1), np.expand_dims(col, axis=1)), axis=1) - return tf.SparseTensor(indices, data, shape) + """Converts a SciPy sparse matrix to a SparseTensor.""" + sparse_coo = t.tocoo() + row, col = sparse_coo.row, sparse_coo.col + data, shape = sparse_coo.data, sparse_coo.shape + if issubclass(data.dtype.type, np.floating): + data = data.astype(backend.floatx()) + indices = np.concatenate( + (np.expand_dims(row, axis=1), np.expand_dims(col, axis=1)), axis=1 + ) + return tf.SparseTensor(indices, data, shape) def _is_distributed_dataset(ds): - return isinstance(ds, tf.distribute.DistributedDataset) + return isinstance( + ds, + ( + tf.distribute.DistributedDataset, + tf.experimental.dtensor.DTensorDataset, + ), + ) diff --git a/keras/engine/data_adapter_test.py b/keras/engine/data_adapter_test.py index f0aa594326dc..2a480b385b96 100644 --- a/keras/engine/data_adapter_test.py +++ b/keras/engine/data_adapter_test.py @@ -14,1303 +14,1566 @@ # ============================================================================== """DataAdapter tests.""" -import tensorflow.compat.v2 as tf - import math -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras +from keras.engine import data_adapter from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.engine import data_adapter from keras.utils import data_utils + +# isort: off from tensorflow.python.eager import context class DummyArrayLike: - """Dummy array-like object.""" + """Dummy array-like object.""" - def __init__(self, data): - self.data = data + def __init__(self, data): + self.data = data - def __len__(self): - return len(self.data) + def __len__(self): + return len(self.data) - def __getitem__(self, key): - return self.data[key] + def __getitem__(self, key): + return self.data[key] - @property - def shape(self): - return self.data.shape + @property + def shape(self): + return self.data.shape - @property - def dtype(self): - return self.data.dtype + @property + def dtype(self): + return self.data.dtype def fail_on_convert(x, **kwargs): - _ = x - _ = kwargs - raise TypeError('Cannot convert DummyArrayLike to a tensor') + _ = x + _ = kwargs + raise TypeError("Cannot convert DummyArrayLike to a tensor") + + tf.register_tensor_conversion_function(DummyArrayLike, fail_on_convert) class DataAdapterTestBase(test_combinations.TestCase): - - def setUp(self): - super().setUp() - self.batch_size = 5 - self.numpy_input = np.zeros((50, 10)) - self.numpy_target = np.ones(50) - self.tensor_input = tf.constant(2.0, shape=(50, 10)) - self.tensor_target = tf.ones((50,)) - self.arraylike_input = DummyArrayLike(self.numpy_input) - self.arraylike_target = DummyArrayLike(self.numpy_target) - self.dataset_input = tf.data.Dataset.from_tensor_slices( - (self.numpy_input, self.numpy_target)).shuffle(50).batch( - self.batch_size) - - def generator(): - while True: - yield (np.zeros((self.batch_size, 10)), np.ones(self.batch_size)) - self.generator_input = generator() - self.iterator_input = data_utils.threadsafe_generator(generator)() - self.sequence_input = TestSequence(batch_size=self.batch_size, - feature_shape=10) - self.text_input = [['abc']] - self.bytes_input = [[b'abc']] - self.model = keras.models.Sequential( - [keras.layers.Dense(8, input_shape=(10,), activation='softmax')]) + def setUp(self): + super().setUp() + self.batch_size = 5 + self.numpy_input = np.zeros((50, 10)) + self.numpy_target = np.ones(50) + self.tensor_input = tf.constant(2.0, shape=(50, 10)) + self.tensor_target = tf.ones((50,)) + self.arraylike_input = DummyArrayLike(self.numpy_input) + self.arraylike_target = DummyArrayLike(self.numpy_target) + self.dataset_input = ( + tf.data.Dataset.from_tensor_slices( + (self.numpy_input, self.numpy_target) + ) + .shuffle(50) + .batch(self.batch_size) + ) + + def generator(): + while True: + yield ( + np.zeros((self.batch_size, 10)), + np.ones(self.batch_size), + ) + + self.generator_input = generator() + self.iterator_input = data_utils.threadsafe_generator(generator)() + self.sequence_input = TestSequence( + batch_size=self.batch_size, feature_shape=10 + ) + self.text_input = [["abc"]] + self.bytes_input = [[b"abc"]] + self.model = keras.models.Sequential( + [keras.layers.Dense(8, input_shape=(10,), activation="softmax")] + ) class TestSequence(data_utils.Sequence): + def __init__(self, batch_size, feature_shape): + self.batch_size = batch_size + self.feature_shape = feature_shape - def __init__(self, batch_size, feature_shape): - self.batch_size = batch_size - self.feature_shape = feature_shape + def __getitem__(self, item): + return ( + np.zeros((self.batch_size, self.feature_shape)), + np.ones((self.batch_size,)), + ) - def __getitem__(self, item): - return (np.zeros((self.batch_size, self.feature_shape)), - np.ones((self.batch_size,))) - - def __len__(self): - return 10 + def __len__(self): + return 10 class TestSparseSequence(TestSequence): - - def __getitem__(self, item): - indices = [[row, self.feature_shape - 1] for row in range(self.batch_size)] - values = [1 for row in range(self.batch_size)] - st = tf.SparseTensor(indices, values, (self.batch_size, self.feature_shape)) - return (st, np.ones((self.batch_size,))) + def __getitem__(self, item): + indices = [ + [row, self.feature_shape - 1] for row in range(self.batch_size) + ] + values = [1 for row in range(self.batch_size)] + st = tf.SparseTensor( + indices, values, (self.batch_size, self.feature_shape) + ) + return (st, np.ones((self.batch_size,))) class TestRaggedSequence(TestSequence): - - def __getitem__(self, item): - values = np.random.randint(0, self.feature_shape, - (self.batch_size, 2)).reshape(-1) - row_lengths = np.full(self.batch_size, 2) - rt = tf.RaggedTensor.from_row_lengths(values, row_lengths) - return (rt, np.ones((self.batch_size,))) + def __getitem__(self, item): + values = np.random.randint( + 0, self.feature_shape, (self.batch_size, 2) + ).reshape(-1) + row_lengths = np.full(self.batch_size, 2) + rt = tf.RaggedTensor.from_row_lengths(values, row_lengths) + return (rt, np.ones((self.batch_size,))) class TestBatchSequence(data_utils.Sequence): - - def __init__(self, batch_size, feature_shape, epochs=2): - """Creates a keras.utils.Sequence with increasing batch_size. - - Args: - batch_size (Union[int, List[int]]): Can be a list containing two values: - start and end batch_size - feature_shape (int): Number of features in a sample - epochs (int, optional): Number of epochs - """ - self.batch_size = batch_size - self.feature_shape = feature_shape - - self._epochs = epochs - # we use `on_epoch_end` method to prepare data for the next epoch - # set current epoch to `-1`, so that `on_epoch_end` will increase it to `0` - self._current_epoch = -1 - # actual batch size will be set inside `on_epoch_end` - self._current_batch_size = 0 - - self.on_epoch_end() - - def __len__(self): - """Number of batches in the Sequence. - - Returns: int - The number of batches in the Sequence. - """ - # data was rebalanced, so need to recalculate number of examples - num_examples = 20 - batch_size = self._current_batch_size - return num_examples // batch_size + int( - num_examples % batch_size > - 0) # = math.ceil(num_examples / batch_size ) - - def __getitem__(self, index): - """Gets batch at position `index`. - - Arguments: - index (int): position of the batch in the Sequence. - Returns: Tuple[Any, Any] A batch (tuple of input data and target data). - """ - # return input and target data, as our target data is inside the input - # data return None for the target data - return (np.zeros((self._current_batch_size, self.feature_shape)), - np.ones((self._current_batch_size,))) - - def on_epoch_end(self): - """Updates the data after every epoch.""" - self._current_epoch += 1 - if self._current_epoch < self._epochs: - self._current_batch_size = self._linearly_increasing_batch_size() - - def _linearly_increasing_batch_size(self): - """Linearly increase batch size with every epoch. - - The idea comes from https://arxiv.org/abs/1711.00489. - - Returns: int - The batch size to use in this epoch. - """ - if not isinstance(self.batch_size, list): - return int(self.batch_size) - - if self._epochs > 1: - return int(self.batch_size[0] + self._current_epoch * - (self.batch_size[1] - self.batch_size[0]) / (self._epochs - 1)) - else: - return int(self.batch_size[0]) + def __init__(self, batch_size, feature_shape, epochs=2): + """Creates a keras.utils.Sequence with increasing batch_size. + + Args: + batch_size (Union[int, List[int]]): Can be a list containing two + values: start and end batch_size + feature_shape (int): Number of features in a sample + epochs (int, optional): Number of epochs + """ + self.batch_size = batch_size + self.feature_shape = feature_shape + + self._epochs = epochs + # we use `on_epoch_end` method to prepare data for the next epoch set + # current epoch to `-1`, so that `on_epoch_end` will increase it to `0` + self._current_epoch = -1 + # actual batch size will be set inside `on_epoch_end` + self._current_batch_size = 0 + + self.on_epoch_end() + + def __len__(self): + """Number of batches in the Sequence. + + Returns: int + The number of batches in the Sequence. + """ + # data was rebalanced, so need to recalculate number of examples + num_examples = 20 + batch_size = self._current_batch_size + return num_examples // batch_size + int( + num_examples % batch_size > 0 + ) # = math.ceil(num_examples / batch_size ) + + def __getitem__(self, index): + """Gets batch at position `index`. + + Arguments: + index (int): position of the batch in the Sequence. + Returns: Tuple[Any, Any] A batch (tuple of input data and target data). + """ + # return input and target data, as our target data is inside the input + # data return None for the target data + return ( + np.zeros((self._current_batch_size, self.feature_shape)), + np.ones((self._current_batch_size,)), + ) + + def on_epoch_end(self): + """Updates the data after every epoch.""" + self._current_epoch += 1 + if self._current_epoch < self._epochs: + self._current_batch_size = self._linearly_increasing_batch_size() + + def _linearly_increasing_batch_size(self): + """Linearly increase batch size with every epoch. + + The idea comes from https://arxiv.org/abs/1711.00489. + + Returns: int + The batch size to use in this epoch. + """ + if not isinstance(self.batch_size, list): + return int(self.batch_size) + + if self._epochs > 1: + return int( + self.batch_size[0] + + self._current_epoch + * (self.batch_size[1] - self.batch_size[0]) + / (self._epochs - 1) + ) + else: + return int(self.batch_size[0]) class TensorLikeDataAdapterTest(DataAdapterTestBase): - - def setUp(self): - super().setUp() - self.adapter_cls = data_adapter.TensorLikeDataAdapter - - def test_can_handle_numpy(self): - self.assertTrue(self.adapter_cls.can_handle(self.numpy_input)) - self.assertTrue( - self.adapter_cls.can_handle(self.numpy_input, self.numpy_target)) - - self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) - self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) - self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) - self.assertFalse(self.adapter_cls.can_handle(self.text_input)) - self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) - - def test_size_numpy(self): - adapter = self.adapter_cls( - self.numpy_input, self.numpy_target, batch_size=5) - self.assertEqual(adapter.get_size(), 10) - self.assertFalse(adapter.has_partial_batch()) - - def test_batch_size_numpy(self): - adapter = self.adapter_cls( - self.numpy_input, self.numpy_target, batch_size=5) - self.assertEqual(adapter.batch_size(), 5) - - def test_partial_batch_numpy(self): - adapter = self.adapter_cls( - self.numpy_input, self.numpy_target, batch_size=4) - self.assertEqual(adapter.get_size(), 13) # 50/4 - self.assertTrue(adapter.has_partial_batch()) - self.assertEqual(adapter.partial_batch_size(), 2) - - def test_epochs(self): - num_epochs = 3 - adapter = self.adapter_cls( - self.numpy_input, self.numpy_target, batch_size=5, epochs=num_epochs) - ds_iter = iter(adapter.get_dataset()) - num_batches_per_epoch = self.numpy_input.shape[0] // 5 - for _ in range(num_batches_per_epoch * num_epochs): - next(ds_iter) - with self.assertRaises(StopIteration): - next(ds_iter) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training_numpy(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.numpy_input, self.numpy_target, batch_size=5) - - def test_can_handle_pandas(self): - try: - import pandas as pd # pylint: disable=g-import-not-at-top - except ImportError: - self.skipTest('Skipping test because pandas is not installed.') - self.assertTrue(self.adapter_cls.can_handle(pd.DataFrame(self.numpy_input))) - self.assertTrue( - self.adapter_cls.can_handle(pd.DataFrame(self.numpy_input)[0])) - self.assertTrue( - self.adapter_cls.can_handle( - pd.DataFrame(self.numpy_input), - pd.DataFrame(self.numpy_input)[0])) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training_pandas(self): - try: - import pandas as pd # pylint: disable=g-import-not-at-top - except ImportError: - self.skipTest('Skipping test because pandas is not installed.') - input_a = keras.Input(shape=(3,), name='input_a') - input_b = keras.Input(shape=(3,), name='input_b') - input_c = keras.Input(shape=(1,), name='input_b') - - x = keras.layers.Dense(4, name='dense_1')(input_a) - y = keras.layers.Dense(3, name='dense_2')(input_b) - z = keras.layers.Dense(1, name='dense_3')(input_c) - - model_1 = keras.Model(inputs=input_a, outputs=x) - model_2 = keras.Model(inputs=[input_a, input_b], outputs=[x, y]) - model_3 = keras.Model(inputs=input_c, outputs=z) - - model_1.compile(optimizer='rmsprop', loss='mse') - model_2.compile(optimizer='rmsprop', loss='mse') - model_3.compile(optimizer='rmsprop', loss='mse') - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - input_a_df = pd.DataFrame(input_a_np) - input_b_df = pd.DataFrame(input_b_np) - - output_a_df = pd.DataFrame(np.random.random((10, 4))) - output_b_df = pd.DataFrame(np.random.random((10, 3))) - output_c_series = pd.DataFrame(np.random.random((10, 4)))[0] - - model_1.fit(input_a_df, - output_a_df) - model_2.fit([input_a_df, input_b_df], - [output_a_df, output_b_df]) - model_3.fit(input_a_df[[0]], - output_c_series) - model_1.fit([input_a_df], - [output_a_df]) - model_1.fit({'input_a': input_a_df}, - output_a_df) - model_2.fit({'input_a': input_a_df, 'input_b': input_b_df}, - [output_a_df, output_b_df]) - - model_1.evaluate(input_a_df, - output_a_df) - model_2.evaluate([input_a_df, input_b_df], - [output_a_df, output_b_df]) - model_3.evaluate(input_a_df[[0]], - output_c_series) - model_1.evaluate([input_a_df], - [output_a_df]) - model_1.evaluate({'input_a': input_a_df}, - output_a_df) - model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df}, - [output_a_df, output_b_df]) - - # Verify predicting on pandas vs numpy returns the same result - predict_1_pandas = model_1.predict(input_a_df) - predict_2_pandas = model_2.predict([input_a_df, input_b_df]) - predict_3_pandas = model_3.predict(input_a_df[[0]]) - predict_3_pandas_batch = model_3.predict_on_batch(input_a_df[0]) - - predict_1_numpy = model_1.predict(input_a_np) - predict_2_numpy = model_2.predict([input_a_np, input_b_np]) - predict_3_numpy = model_3.predict(np.asarray(input_a_df[0])) - - self.assertAllClose(predict_1_numpy, predict_1_pandas) - self.assertAllClose(predict_2_numpy, predict_2_pandas) - self.assertAllClose(predict_3_numpy, predict_3_pandas_batch) - self.assertAllClose(predict_3_numpy, predict_3_pandas) - - # Extra ways to pass in dataframes - model_1.predict([input_a_df]) - model_1.predict({'input_a': input_a_df}) - model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) - - def test_can_handle(self): - self.assertTrue(self.adapter_cls.can_handle(self.tensor_input)) - self.assertTrue( - self.adapter_cls.can_handle(self.tensor_input, self.tensor_target)) - - self.assertFalse(self.adapter_cls.can_handle(self.arraylike_input)) - self.assertFalse( - self.adapter_cls.can_handle(self.arraylike_input, - self.arraylike_target)) - self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) - self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) - self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) - self.assertFalse(self.adapter_cls.can_handle(self.text_input)) - self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.tensor_input, self.tensor_target, batch_size=5) - - def test_size(self): - adapter = self.adapter_cls( - self.tensor_input, self.tensor_target, batch_size=5) - self.assertEqual(adapter.get_size(), 10) - self.assertFalse(adapter.has_partial_batch()) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_shuffle_correctness(self): - num_samples = 100 - batch_size = 32 - x = np.arange(num_samples) - np.random.seed(99) - adapter = self.adapter_cls( - x, y=None, batch_size=batch_size, shuffle=True, epochs=2) - - def _get_epoch(ds_iter): - ds_data = [] - for _ in range(int(math.ceil(num_samples / batch_size))): - ds_data.append(next(ds_iter).numpy()) - return np.concatenate(ds_data) - - ds_iter = iter(adapter.get_dataset()) - - # First epoch. - epoch_data = _get_epoch(ds_iter) - # Check that shuffling occurred. - self.assertNotAllClose(x, epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(epoch_data)) - - # Second epoch. - second_epoch_data = _get_epoch(ds_iter) - # Check that shuffling occurred. - self.assertNotAllClose(x, second_epoch_data) - # Check that shuffling is different across epochs. - self.assertNotAllClose(epoch_data, second_epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(second_epoch_data)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_batch_shuffle_correctness(self): - num_samples = 100 - batch_size = 6 - x = np.arange(num_samples) - np.random.seed(99) - adapter = self.adapter_cls( - x, y=None, batch_size=batch_size, shuffle='batch', epochs=2) - - def _get_epoch_batches(ds_iter): - ds_data = [] - for _ in range(int(math.ceil(num_samples / batch_size))): - ds_data.append(next(ds_iter)[0].numpy()) - return ds_data - - ds_iter = iter(adapter.get_dataset()) - - # First epoch. - epoch_batch_data = _get_epoch_batches(ds_iter) - epoch_data = np.concatenate(epoch_batch_data) - - def _verify_batch(batch): - # Verify that a batch contains only contiguous data, and that it has - # been shuffled. - shuffled_batch = np.sort(batch) - self.assertNotAllClose(batch, shuffled_batch) - for i in range(1, len(batch)): - self.assertEqual(shuffled_batch[i-1] + 1, shuffled_batch[i]) - - # Assert that the data within each batch remains contiguous - for batch in epoch_batch_data: - _verify_batch(batch) - - # Check that individual batches are unshuffled - # Check that shuffling occurred. - self.assertNotAllClose(x, epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(epoch_data)) - - # Second epoch. - second_epoch_batch_data = _get_epoch_batches(ds_iter) - second_epoch_data = np.concatenate(second_epoch_batch_data) - - # Assert that the data within each batch remains contiguous - for batch in second_epoch_batch_data: - _verify_batch(batch) - - # Check that shuffling occurred. - self.assertNotAllClose(x, second_epoch_data) - # Check that shuffling is different across epochs. - self.assertNotAllClose(epoch_data, second_epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(second_epoch_data)) - - @parameterized.named_parameters( - ('batch_size_5', 5, None, 5), - ('batch_size_50', 50, 4, 50), # Sanity check: batch_size takes precedence - ('steps_1', None, 1, 50), - ('steps_4', None, 4, 13), - ) - def test_batch_size(self, batch_size_in, steps, batch_size_out): - adapter = self.adapter_cls( - self.tensor_input, self.tensor_target, batch_size=batch_size_in, - steps=steps) - self.assertEqual(adapter.batch_size(), batch_size_out) - - @parameterized.named_parameters( - ('batch_size_5', 5, None, 10, 0), - ('batch_size_4', 4, None, 13, 2), - ('steps_1', None, 1, 1, 0), - ('steps_5', None, 5, 5, 0), - ('steps_4', None, 4, 4, 11), - ) - def test_partial_batch( - self, batch_size_in, steps, size, partial_batch_size): - adapter = self.adapter_cls( - self.tensor_input, self.tensor_target, batch_size=batch_size_in, - steps=steps) - self.assertEqual(adapter.get_size(), size) # 50/steps - self.assertEqual(adapter.has_partial_batch(), bool(partial_batch_size)) - self.assertEqual(adapter.partial_batch_size(), partial_batch_size or None) + def setUp(self): + super().setUp() + self.adapter_cls = data_adapter.TensorLikeDataAdapter + + def test_can_handle_numpy(self): + self.assertTrue(self.adapter_cls.can_handle(self.numpy_input)) + self.assertTrue( + self.adapter_cls.can_handle(self.numpy_input, self.numpy_target) + ) + + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.text_input)) + self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) + + def test_size_numpy(self): + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=5 + ) + self.assertEqual(adapter.get_size(), 10) + self.assertFalse(adapter.has_partial_batch()) + + def test_batch_size_numpy(self): + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=5 + ) + self.assertEqual(adapter.batch_size(), 5) + + def test_partial_batch_numpy(self): + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=4 + ) + self.assertEqual(adapter.get_size(), 13) # 50/4 + self.assertTrue(adapter.has_partial_batch()) + self.assertEqual(adapter.partial_batch_size(), 2) + + def test_epochs(self): + num_epochs = 3 + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=5, epochs=num_epochs + ) + ds_iter = iter(adapter.get_dataset()) + num_batches_per_epoch = self.numpy_input.shape[0] // 5 + for _ in range(num_batches_per_epoch * num_epochs): + next(ds_iter) + with self.assertRaises(StopIteration): + next(ds_iter) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training_numpy(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(self.numpy_input, self.numpy_target, batch_size=5) + + def test_can_handle_pandas(self): + try: + import pandas as pd + except ImportError: + self.skipTest("Skipping test because pandas is not installed.") + self.assertTrue( + self.adapter_cls.can_handle(pd.DataFrame(self.numpy_input)) + ) + self.assertTrue( + self.adapter_cls.can_handle(pd.DataFrame(self.numpy_input)[0]) + ) + self.assertTrue( + self.adapter_cls.can_handle( + pd.DataFrame(self.numpy_input), + pd.DataFrame(self.numpy_input)[0], + ) + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training_pandas(self): + try: + import pandas as pd + except ImportError: + self.skipTest("Skipping test because pandas is not installed.") + input_a = keras.Input(shape=(3,), name="input_a") + input_b = keras.Input(shape=(3,), name="input_b") + input_c = keras.Input(shape=(1,), name="input_b") + + x = keras.layers.Dense(4, name="dense_1")(input_a) + y = keras.layers.Dense(3, name="dense_2")(input_b) + z = keras.layers.Dense(1, name="dense_3")(input_c) + + model_1 = keras.Model(inputs=input_a, outputs=x) + model_2 = keras.Model(inputs=[input_a, input_b], outputs=[x, y]) + model_3 = keras.Model(inputs=input_c, outputs=z) + + model_1.compile(optimizer="rmsprop", loss="mse") + model_2.compile(optimizer="rmsprop", loss="mse") + model_3.compile(optimizer="rmsprop", loss="mse") + + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + input_a_df = pd.DataFrame(input_a_np) + input_b_df = pd.DataFrame(input_b_np) + + output_a_df = pd.DataFrame(np.random.random((10, 4))) + output_b_df = pd.DataFrame(np.random.random((10, 3))) + output_c_series = pd.DataFrame(np.random.random((10, 4)))[0] + + model_1.fit(input_a_df, output_a_df) + model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) + model_3.fit(input_a_df[[0]], output_c_series) + model_1.fit([input_a_df], [output_a_df]) + model_1.fit({"input_a": input_a_df}, output_a_df) + model_2.fit( + {"input_a": input_a_df, "input_b": input_b_df}, + [output_a_df, output_b_df], + ) + + model_1.evaluate(input_a_df, output_a_df) + model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) + model_3.evaluate(input_a_df[[0]], output_c_series) + model_1.evaluate([input_a_df], [output_a_df]) + model_1.evaluate({"input_a": input_a_df}, output_a_df) + model_2.evaluate( + {"input_a": input_a_df, "input_b": input_b_df}, + [output_a_df, output_b_df], + ) + + # Verify predicting on pandas vs numpy returns the same result + predict_1_pandas = model_1.predict(input_a_df) + predict_2_pandas = model_2.predict([input_a_df, input_b_df]) + predict_3_pandas = model_3.predict(input_a_df[[0]]) + predict_3_pandas_batch = model_3.predict_on_batch(input_a_df[0]) + + predict_1_numpy = model_1.predict(input_a_np) + predict_2_numpy = model_2.predict([input_a_np, input_b_np]) + predict_3_numpy = model_3.predict(np.asarray(input_a_df[0])) + + self.assertAllClose(predict_1_numpy, predict_1_pandas) + self.assertAllClose(predict_2_numpy, predict_2_pandas) + self.assertAllClose(predict_3_numpy, predict_3_pandas_batch) + self.assertAllClose(predict_3_numpy, predict_3_pandas) + + # Extra ways to pass in dataframes + model_1.predict([input_a_df]) + model_1.predict({"input_a": input_a_df}) + model_2.predict({"input_a": input_a_df, "input_b": input_b_df}) + + def test_can_handle(self): + self.assertTrue(self.adapter_cls.can_handle(self.tensor_input)) + self.assertTrue( + self.adapter_cls.can_handle(self.tensor_input, self.tensor_target) + ) + + self.assertFalse(self.adapter_cls.can_handle(self.arraylike_input)) + self.assertFalse( + self.adapter_cls.can_handle( + self.arraylike_input, self.arraylike_target + ) + ) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.text_input)) + self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(self.tensor_input, self.tensor_target, batch_size=5) + + def test_size(self): + adapter = self.adapter_cls( + self.tensor_input, self.tensor_target, batch_size=5 + ) + self.assertEqual(adapter.get_size(), 10) + self.assertFalse(adapter.has_partial_batch()) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_shuffle_correctness(self): + num_samples = 100 + batch_size = 32 + x = np.arange(num_samples) + np.random.seed(99) + adapter = self.adapter_cls( + x, y=None, batch_size=batch_size, shuffle=True, epochs=2 + ) + + def _get_epoch(ds_iter): + ds_data = [] + for _ in range(int(math.ceil(num_samples / batch_size))): + ds_data.append(next(ds_iter).numpy()) + return np.concatenate(ds_data) + + ds_iter = iter(adapter.get_dataset()) + + # First epoch. + epoch_data = _get_epoch(ds_iter) + # Check that shuffling occurred. + self.assertNotAllClose(x, epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(epoch_data)) + + # Second epoch. + second_epoch_data = _get_epoch(ds_iter) + # Check that shuffling occurred. + self.assertNotAllClose(x, second_epoch_data) + # Check that shuffling is different across epochs. + self.assertNotAllClose(epoch_data, second_epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(second_epoch_data)) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_batch_shuffle_correctness(self): + num_samples = 100 + batch_size = 6 + x = np.arange(num_samples) + np.random.seed(99) + adapter = self.adapter_cls( + x, y=None, batch_size=batch_size, shuffle="batch", epochs=2 + ) + + def _get_epoch_batches(ds_iter): + ds_data = [] + for _ in range(int(math.ceil(num_samples / batch_size))): + ds_data.append(next(ds_iter)[0].numpy()) + return ds_data + + ds_iter = iter(adapter.get_dataset()) + + # First epoch. + epoch_batch_data = _get_epoch_batches(ds_iter) + epoch_data = np.concatenate(epoch_batch_data) + + def _verify_batch(batch): + # Verify that a batch contains only contiguous data, and that it has + # been shuffled. + shuffled_batch = np.sort(batch) + self.assertNotAllClose(batch, shuffled_batch) + for i in range(1, len(batch)): + self.assertEqual(shuffled_batch[i - 1] + 1, shuffled_batch[i]) + + # Assert that the data within each batch remains contiguous + for batch in epoch_batch_data: + _verify_batch(batch) + + # Check that individual batches are unshuffled + # Check that shuffling occurred. + self.assertNotAllClose(x, epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(epoch_data)) + + # Second epoch. + second_epoch_batch_data = _get_epoch_batches(ds_iter) + second_epoch_data = np.concatenate(second_epoch_batch_data) + + # Assert that the data within each batch remains contiguous + for batch in second_epoch_batch_data: + _verify_batch(batch) + + # Check that shuffling occurred. + self.assertNotAllClose(x, second_epoch_data) + # Check that shuffling is different across epochs. + self.assertNotAllClose(epoch_data, second_epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(second_epoch_data)) + + @parameterized.named_parameters( + ("batch_size_5", 5, None, 5), + ( + "batch_size_50", + 50, + 4, + 50, + ), # Sanity check: batch_size takes precedence + ("steps_1", None, 1, 50), + ("steps_4", None, 4, 13), + ) + def test_batch_size(self, batch_size_in, steps, batch_size_out): + adapter = self.adapter_cls( + self.tensor_input, + self.tensor_target, + batch_size=batch_size_in, + steps=steps, + ) + self.assertEqual(adapter.batch_size(), batch_size_out) + + @parameterized.named_parameters( + ("batch_size_5", 5, None, 10, 0), + ("batch_size_4", 4, None, 13, 2), + ("steps_1", None, 1, 1, 0), + ("steps_5", None, 5, 5, 0), + ("steps_4", None, 4, 4, 11), + ) + def test_partial_batch( + self, batch_size_in, steps, size, partial_batch_size + ): + adapter = self.adapter_cls( + self.tensor_input, + self.tensor_target, + batch_size=batch_size_in, + steps=steps, + ) + self.assertEqual(adapter.get_size(), size) # 50/steps + self.assertEqual(adapter.has_partial_batch(), bool(partial_batch_size)) + self.assertEqual( + adapter.partial_batch_size(), partial_batch_size or None + ) class IncreasingBatchSizeAdapterTest(test_combinations.TestCase): + def setUp(self): + super(IncreasingBatchSizeAdapterTest, self).setUp() + self.adapter_cls = data_adapter.KerasSequenceAdapter + + self.epochs = 2 + self.increasing_batch_size = [5, 10] + self.sequence_input = TestBatchSequence( + batch_size=self.increasing_batch_size, + feature_shape=10, + epochs=self.epochs, + ) + self.model = keras.models.Sequential( + [keras.layers.Dense(8, input_shape=(10,), activation="softmax")] + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training_with_test_batch_sequence(self): + """Ensures TestBatchSequence works as expected.""" + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + + # Check state before fit() + self.assertEqual(self.sequence_input._current_epoch, 0) + self.assertEqual(self.sequence_input._current_batch_size, 5) + + # Execute fit() + self.model.fit(self.sequence_input, epochs=self.epochs) + + # Check state after fit() + self.assertEqual(self.sequence_input._current_epoch, 2) + self.assertEqual(self.sequence_input._current_batch_size, 10) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training_with_increasing_batch_size(self): + """Ensures data_adapters DataHandler & DataAdapter work as expected.""" + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.stop_training = False + self.model.train_function = self.model.make_train_function() + + # Check state before fit() + self.assertEqual(self.sequence_input._current_epoch, 0) + self.assertEqual(self.sequence_input._current_batch_size, 5) + data_handler = data_adapter.get_data_handler( + self.sequence_input, + epochs=self.epochs, + model=self.model, + ) + self.assertEqual( + data_handler.inferred_steps, 4 + ) # 20 samples / 5 bs = 4 + + # Execute fit()-loop + for epoch, iterator in data_handler.enumerate_epochs(): + self.model.reset_metrics() + with data_handler.catch_stop_iteration(): + for step in data_handler.steps(): + with tf.profiler.experimental.Trace( + "train", + epoch_num=epoch, + step_num=step, + batch_size=self.sequence_input._current_batch_size, + _r=1, + ): + if data_handler.should_sync: + context.async_wait() + if self.model.stop_training: + break + + # Check state after fit() + self.assertEqual( + data_handler.inferred_steps, 2 + ) # 20 samples / 10 bs = 2 - def setUp(self): - super(IncreasingBatchSizeAdapterTest, self).setUp() - self.adapter_cls = data_adapter.KerasSequenceAdapter - self.epochs = 2 - self.increasing_batch_size = [5, 10] - self.sequence_input = TestBatchSequence( - batch_size=self.increasing_batch_size, - feature_shape=10, - epochs=self.epochs, +class GenericArrayLikeDataAdapterTest(DataAdapterTestBase): + def setUp(self): + super().setUp() + self.adapter_cls = data_adapter.GenericArrayLikeDataAdapter + + def test_can_handle_some_numpy(self): + self.assertTrue(self.adapter_cls.can_handle(self.arraylike_input)) + self.assertTrue( + self.adapter_cls.can_handle( + self.arraylike_input, self.arraylike_target + ) + ) + + # Because adapters are mutually exclusive, don't handle cases + # where all the data is numpy or an eagertensor + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse( + self.adapter_cls.can_handle(self.numpy_input, self.numpy_target) + ) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse( + self.adapter_cls.can_handle(self.tensor_input, self.tensor_target) + ) + + # But do handle mixes that include generic arraylike data + self.assertTrue( + self.adapter_cls.can_handle(self.numpy_input, self.arraylike_target) + ) + self.assertTrue( + self.adapter_cls.can_handle(self.arraylike_input, self.numpy_target) + ) + self.assertTrue( + self.adapter_cls.can_handle( + self.arraylike_input, self.tensor_target + ) + ) + self.assertTrue( + self.adapter_cls.can_handle( + self.tensor_input, self.arraylike_target + ) + ) + + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.text_input)) + self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) + + def test_size(self): + adapter = self.adapter_cls( + self.arraylike_input, self.arraylike_target, batch_size=5 + ) + self.assertEqual(adapter.get_size(), 10) + self.assertFalse(adapter.has_partial_batch()) + + def test_epochs(self): + num_epochs = 3 + adapter = self.adapter_cls( + self.arraylike_input, + self.numpy_target, + batch_size=5, + epochs=num_epochs, + ) + ds_iter = iter(adapter.get_dataset()) + num_batches_per_epoch = self.numpy_input.shape[0] // 5 + for _ in range(num_batches_per_epoch * num_epochs): + next(ds_iter) + with self.assertRaises(StopIteration): + next(ds_iter) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training(self): + # First verify that DummyArrayLike can't be converted to a Tensor + with self.assertRaises(TypeError): + tf.convert_to_tensor(self.arraylike_input) + + # Then train on the array like. + # It should not be converted to a tensor directly (which would force it + # into memory), only the sliced data should be converted. + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit( + self.arraylike_input, self.arraylike_target, batch_size=5 + ) + self.model.fit( + self.arraylike_input, + self.arraylike_target, + shuffle=True, + batch_size=5, + ) + self.model.fit( + self.arraylike_input, + self.arraylike_target, + shuffle="batch", + batch_size=5, + ) + self.model.evaluate( + self.arraylike_input, self.arraylike_target, batch_size=5 + ) + self.model.predict(self.arraylike_input, batch_size=5) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training_numpy_target(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(self.arraylike_input, self.numpy_target, batch_size=5) + self.model.fit( + self.arraylike_input, self.numpy_target, shuffle=True, batch_size=5 + ) + self.model.fit( + self.arraylike_input, + self.numpy_target, + shuffle="batch", + batch_size=5, + ) + self.model.evaluate( + self.arraylike_input, self.numpy_target, batch_size=5 + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training_tensor_target(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(self.arraylike_input, self.tensor_target, batch_size=5) + self.model.fit( + self.arraylike_input, self.tensor_target, shuffle=True, batch_size=5 + ) + self.model.fit( + self.arraylike_input, + self.tensor_target, + shuffle="batch", + batch_size=5, + ) + self.model.evaluate( + self.arraylike_input, self.tensor_target, batch_size=5 + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_shuffle_correctness(self): + num_samples = 100 + batch_size = 32 + x = DummyArrayLike(np.arange(num_samples)) + np.random.seed(99) + adapter = self.adapter_cls( + x, y=None, batch_size=batch_size, shuffle=True, epochs=2 + ) + + def _get_epoch(ds_iter): + ds_data = [] + for _ in range(int(math.ceil(num_samples / batch_size))): + ds_data.append(next(ds_iter).numpy()) + return np.concatenate(ds_data) + + ds_iter = iter(adapter.get_dataset()) + + # First epoch. + epoch_data = _get_epoch(ds_iter) + # Check that shuffling occurred. + self.assertNotAllClose(x, epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(epoch_data)) + + # Second epoch. + second_epoch_data = _get_epoch(ds_iter) + # Check that shuffling occurred. + self.assertNotAllClose(x, second_epoch_data) + # Check that shuffling is different across epochs. + self.assertNotAllClose(epoch_data, second_epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(second_epoch_data)) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_batch_shuffle_correctness(self): + num_samples = 100 + batch_size = 6 + x = DummyArrayLike(np.arange(num_samples)) + np.random.seed(99) + adapter = self.adapter_cls( + x, y=None, batch_size=batch_size, shuffle="batch", epochs=2 + ) + + def _get_epoch_batches(ds_iter): + ds_data = [] + for _ in range(int(math.ceil(num_samples / batch_size))): + ds_data.append(next(ds_iter)[0].numpy()) + return ds_data + + ds_iter = iter(adapter.get_dataset()) + + # First epoch. + epoch_batch_data = _get_epoch_batches(ds_iter) + epoch_data = np.concatenate(epoch_batch_data) + + def _verify_batch(batch): + # Verify that a batch contains only contiguous data, but that it has + # been shuffled. + shuffled_batch = np.sort(batch) + self.assertNotAllClose(batch, shuffled_batch) + for i in range(1, len(batch)): + self.assertEqual(shuffled_batch[i - 1] + 1, shuffled_batch[i]) + + # Assert that the data within each batch is shuffled contiguous data + for batch in epoch_batch_data: + _verify_batch(batch) + + # Check that individual batches are unshuffled + # Check that shuffling occurred. + self.assertNotAllClose(x, epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(epoch_data)) + + # Second epoch. + second_epoch_batch_data = _get_epoch_batches(ds_iter) + second_epoch_data = np.concatenate(second_epoch_batch_data) + + # Assert that the data within each batch remains contiguous + for batch in second_epoch_batch_data: + _verify_batch(batch) + + # Check that shuffling occurred. + self.assertNotAllClose(x, second_epoch_data) + # Check that shuffling is different across epochs. + self.assertNotAllClose(epoch_data, second_epoch_data) + # Check that each elements appears, and only once. + self.assertAllClose(x, np.sort(second_epoch_data)) + + @parameterized.named_parameters( + ("batch_size_5", 5, None, 5), + ( + "batch_size_50", + 50, + 4, + 50, + ), # Sanity check: batch_size takes precedence + ("steps_1", None, 1, 50), + ("steps_4", None, 4, 13), ) - self.model = keras.models.Sequential( - [keras.layers.Dense(8, input_shape=(10,), activation='softmax')]) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training_with_test_batch_sequence(self): - """Ensures TestBatchSequence works as expected.""" - self.model.compile( - loss='sparse_categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - - # Check state before fit() - self.assertEqual(self.sequence_input._current_epoch, 0) - self.assertEqual(self.sequence_input._current_batch_size, 5) - - # Execute fit() - self.model.fit(self.sequence_input, epochs=self.epochs) - - # Check state after fit() - self.assertEqual(self.sequence_input._current_epoch, 2) - self.assertEqual(self.sequence_input._current_batch_size, 10) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training_with_increasing_batch_size(self): - """Ensures data_adapters DataHandler & DataAdapter work as expected.""" - self.model.compile( - loss='sparse_categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.stop_training = False - self.model.train_function = self.model.make_train_function() - - # Check state before fit() - self.assertEqual(self.sequence_input._current_epoch, 0) - self.assertEqual(self.sequence_input._current_batch_size, 5) - data_handler = data_adapter.get_data_handler( - self.sequence_input, - epochs=self.epochs, - model=self.model, + def test_batch_size(self, batch_size_in, steps, batch_size_out): + adapter = self.adapter_cls( + self.arraylike_input, + self.arraylike_target, + batch_size=batch_size_in, + steps=steps, + ) + self.assertEqual(adapter.batch_size(), batch_size_out) + + @parameterized.named_parameters( + ("batch_size_5", 5, None, 10, 0), + ("batch_size_4", 4, None, 13, 2), + ("steps_1", None, 1, 1, 0), + ("steps_5", None, 5, 5, 0), + ("steps_4", None, 4, 4, 11), ) - self.assertEqual(data_handler.inferred_steps, 4) # 20 samples / 5 bs = 4 - - # Execute fit()-loop - for epoch, iterator in data_handler.enumerate_epochs(): - self.model.reset_metrics() - with data_handler.catch_stop_iteration(): - for step in data_handler.steps(): - with tf.profiler.experimental.Trace( - 'train', - epoch_num=epoch, - step_num=step, - batch_size=self.sequence_input._current_batch_size, - _r=1, - ): - if data_handler.should_sync: - context.async_wait() - if self.model.stop_training: - break - - # Check state after fit() - self.assertEqual(data_handler.inferred_steps, 2) # 20 samples / 10 bs = 2 - - -class GenericArrayLikeDataAdapterTest(DataAdapterTestBase): - - def setUp(self): - super().setUp() - self.adapter_cls = data_adapter.GenericArrayLikeDataAdapter - - def test_can_handle_some_numpy(self): - self.assertTrue(self.adapter_cls.can_handle( - self.arraylike_input)) - self.assertTrue( - self.adapter_cls.can_handle(self.arraylike_input, - self.arraylike_target)) - - # Because adapters are mutually exclusive, don't handle cases - # where all the data is numpy or an eagertensor - self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) - self.assertFalse( - self.adapter_cls.can_handle(self.numpy_input, - self.numpy_target)) - self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) - self.assertFalse( - self.adapter_cls.can_handle(self.tensor_input, self.tensor_target)) - - # But do handle mixes that include generic arraylike data - self.assertTrue( - self.adapter_cls.can_handle(self.numpy_input, - self.arraylike_target)) - self.assertTrue( - self.adapter_cls.can_handle(self.arraylike_input, - self.numpy_target)) - self.assertTrue( - self.adapter_cls.can_handle(self.arraylike_input, - self.tensor_target)) - self.assertTrue( - self.adapter_cls.can_handle(self.tensor_input, - self.arraylike_target)) - - self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) - self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) - self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) - self.assertFalse(self.adapter_cls.can_handle(self.text_input)) - self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) - - def test_size(self): - adapter = self.adapter_cls( - self.arraylike_input, - self.arraylike_target, batch_size=5) - self.assertEqual(adapter.get_size(), 10) - self.assertFalse(adapter.has_partial_batch()) - - def test_epochs(self): - num_epochs = 3 - adapter = self.adapter_cls( - self.arraylike_input, - self.numpy_target, batch_size=5, epochs=num_epochs) - ds_iter = iter(adapter.get_dataset()) - num_batches_per_epoch = self.numpy_input.shape[0] // 5 - for _ in range(num_batches_per_epoch * num_epochs): - next(ds_iter) - with self.assertRaises(StopIteration): - next(ds_iter) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training(self): - # First verify that DummyArrayLike can't be converted to a Tensor - with self.assertRaises(TypeError): - tf.convert_to_tensor(self.arraylike_input) - - # Then train on the array like. - # It should not be converted to a tensor directly (which would force it into - # memory), only the sliced data should be converted. - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.arraylike_input, - self.arraylike_target, batch_size=5) - self.model.fit(self.arraylike_input, - self.arraylike_target, - shuffle=True, batch_size=5) - self.model.fit(self.arraylike_input, - self.arraylike_target, - shuffle='batch', batch_size=5) - self.model.evaluate(self.arraylike_input, - self.arraylike_target, batch_size=5) - self.model.predict(self.arraylike_input, batch_size=5) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training_numpy_target(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.arraylike_input, - self.numpy_target, batch_size=5) - self.model.fit(self.arraylike_input, - self.numpy_target, shuffle=True, - batch_size=5) - self.model.fit(self.arraylike_input, - self.numpy_target, shuffle='batch', - batch_size=5) - self.model.evaluate(self.arraylike_input, - self.numpy_target, batch_size=5) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training_tensor_target(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.arraylike_input, - self.tensor_target, batch_size=5) - self.model.fit(self.arraylike_input, - self.tensor_target, shuffle=True, - batch_size=5) - self.model.fit(self.arraylike_input, - self.tensor_target, shuffle='batch', - batch_size=5) - self.model.evaluate(self.arraylike_input, - self.tensor_target, batch_size=5) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_shuffle_correctness(self): - num_samples = 100 - batch_size = 32 - x = DummyArrayLike(np.arange(num_samples)) - np.random.seed(99) - adapter = self.adapter_cls( - x, y=None, batch_size=batch_size, shuffle=True, epochs=2) - - def _get_epoch(ds_iter): - ds_data = [] - for _ in range(int(math.ceil(num_samples / batch_size))): - ds_data.append(next(ds_iter).numpy()) - return np.concatenate(ds_data) - - ds_iter = iter(adapter.get_dataset()) - - # First epoch. - epoch_data = _get_epoch(ds_iter) - # Check that shuffling occurred. - self.assertNotAllClose(x, epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(epoch_data)) - - # Second epoch. - second_epoch_data = _get_epoch(ds_iter) - # Check that shuffling occurred. - self.assertNotAllClose(x, second_epoch_data) - # Check that shuffling is different across epochs. - self.assertNotAllClose(epoch_data, second_epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(second_epoch_data)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_batch_shuffle_correctness(self): - num_samples = 100 - batch_size = 6 - x = DummyArrayLike(np.arange(num_samples)) - np.random.seed(99) - adapter = self.adapter_cls( - x, y=None, batch_size=batch_size, shuffle='batch', epochs=2) - - def _get_epoch_batches(ds_iter): - ds_data = [] - for _ in range(int(math.ceil(num_samples / batch_size))): - ds_data.append(next(ds_iter)[0].numpy()) - return ds_data - - ds_iter = iter(adapter.get_dataset()) - - # First epoch. - epoch_batch_data = _get_epoch_batches(ds_iter) - epoch_data = np.concatenate(epoch_batch_data) - - def _verify_batch(batch): - # Verify that a batch contains only contiguous data, but that it has - # been shuffled. - shuffled_batch = np.sort(batch) - self.assertNotAllClose(batch, shuffled_batch) - for i in range(1, len(batch)): - self.assertEqual(shuffled_batch[i-1] + 1, shuffled_batch[i]) - - # Assert that the data within each batch is shuffled contiguous data - for batch in epoch_batch_data: - _verify_batch(batch) - - # Check that individual batches are unshuffled - # Check that shuffling occurred. - self.assertNotAllClose(x, epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(epoch_data)) - - # Second epoch. - second_epoch_batch_data = _get_epoch_batches(ds_iter) - second_epoch_data = np.concatenate(second_epoch_batch_data) - - # Assert that the data within each batch remains contiguous - for batch in second_epoch_batch_data: - _verify_batch(batch) - - # Check that shuffling occurred. - self.assertNotAllClose(x, second_epoch_data) - # Check that shuffling is different across epochs. - self.assertNotAllClose(epoch_data, second_epoch_data) - # Check that each elements appears, and only once. - self.assertAllClose(x, np.sort(second_epoch_data)) - - @parameterized.named_parameters( - ('batch_size_5', 5, None, 5), - ('batch_size_50', 50, 4, 50), # Sanity check: batch_size takes precedence - ('steps_1', None, 1, 50), - ('steps_4', None, 4, 13), - ) - def test_batch_size(self, batch_size_in, steps, batch_size_out): - adapter = self.adapter_cls( - self.arraylike_input, - self.arraylike_target, batch_size=batch_size_in, - steps=steps) - self.assertEqual(adapter.batch_size(), batch_size_out) - - @parameterized.named_parameters( - ('batch_size_5', 5, None, 10, 0), - ('batch_size_4', 4, None, 13, 2), - ('steps_1', None, 1, 1, 0), - ('steps_5', None, 5, 5, 0), - ('steps_4', None, 4, 4, 11), - ) - def test_partial_batch( - self, batch_size_in, steps, size, partial_batch_size): - adapter = self.adapter_cls( - self.arraylike_input, self.arraylike_target, - batch_size=batch_size_in, - steps=steps) - self.assertEqual(adapter.get_size(), size) # 50/steps - self.assertEqual(adapter.has_partial_batch(), bool(partial_batch_size)) - self.assertEqual(adapter.partial_batch_size(), partial_batch_size or None) + def test_partial_batch( + self, batch_size_in, steps, size, partial_batch_size + ): + adapter = self.adapter_cls( + self.arraylike_input, + self.arraylike_target, + batch_size=batch_size_in, + steps=steps, + ) + self.assertEqual(adapter.get_size(), size) # 50/steps + self.assertEqual(adapter.has_partial_batch(), bool(partial_batch_size)) + self.assertEqual( + adapter.partial_batch_size(), partial_batch_size or None + ) class DatasetAdapterTest(DataAdapterTestBase): - - def setUp(self): - super().setUp() - self.adapter_cls = data_adapter.DatasetAdapter - - def test_can_handle(self): - self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) - self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) - self.assertTrue(self.adapter_cls.can_handle(self.dataset_input)) - self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) - self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training(self): - dataset = self.adapter_cls(self.dataset_input).get_dataset() - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(dataset) - - def test_size(self): - adapter = self.adapter_cls(self.dataset_input) - self.assertIsNone(adapter.get_size()) - - def test_batch_size(self): - adapter = self.adapter_cls(self.dataset_input) - self.assertIsNone(adapter.batch_size()) - - def test_partial_batch(self): - adapter = self.adapter_cls(self.dataset_input) - self.assertFalse(adapter.has_partial_batch()) - self.assertIsNone(adapter.partial_batch_size()) - - def test_invalid_targets_argument(self): - with self.assertRaisesRegex(ValueError, r'`y` argument is not supported'): - self.adapter_cls(self.dataset_input, y=self.dataset_input) - - def test_invalid_sample_weights_argument(self): - with self.assertRaisesRegex(ValueError, - r'`sample_weight` argument is not supported'): - self.adapter_cls(self.dataset_input, sample_weights=self.dataset_input) + def setUp(self): + super().setUp() + self.adapter_cls = data_adapter.DatasetAdapter + + def test_can_handle(self): + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertTrue(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training(self): + dataset = self.adapter_cls(self.dataset_input).get_dataset() + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(dataset) + + def test_size(self): + adapter = self.adapter_cls(self.dataset_input) + self.assertIsNone(adapter.get_size()) + + def test_batch_size(self): + adapter = self.adapter_cls(self.dataset_input) + self.assertIsNone(adapter.batch_size()) + + def test_partial_batch(self): + adapter = self.adapter_cls(self.dataset_input) + self.assertFalse(adapter.has_partial_batch()) + self.assertIsNone(adapter.partial_batch_size()) + + def test_invalid_targets_argument(self): + with self.assertRaisesRegex( + ValueError, r"`y` argument is not supported" + ): + self.adapter_cls(self.dataset_input, y=self.dataset_input) + + def test_invalid_sample_weights_argument(self): + with self.assertRaisesRegex( + ValueError, r"`sample_weight` argument is not supported" + ): + self.adapter_cls( + self.dataset_input, sample_weights=self.dataset_input + ) class GeneratorDataAdapterTest(DataAdapterTestBase): - - def setUp(self): - super().setUp() - self.adapter_cls = data_adapter.GeneratorDataAdapter - - def test_can_handle(self): - self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) - self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) - self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) - self.assertTrue(self.adapter_cls.can_handle(self.generator_input)) - self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) - self.assertFalse(self.adapter_cls.can_handle(self.text_input)) - self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.generator_input, steps_per_epoch=10) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @test_utils.run_v2_only - @data_utils.dont_use_multiprocessing_pool - def test_with_multiprocessing_training(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.iterator_input, workers=1, use_multiprocessing=True, - max_queue_size=10, steps_per_epoch=10) - # Fit twice to ensure there isn't any duplication that prevent the worker - # from starting. - self.model.fit(self.iterator_input, workers=1, use_multiprocessing=True, - max_queue_size=10, steps_per_epoch=10) - - def test_size(self): - adapter = self.adapter_cls(self.generator_input) - self.assertIsNone(adapter.get_size()) - - def test_batch_size(self): - adapter = self.adapter_cls(self.generator_input) - self.assertEqual(adapter.batch_size(), None) - self.assertEqual(adapter.representative_batch_size(), 5) - - def test_partial_batch(self): - adapter = self.adapter_cls(self.generator_input) - self.assertFalse(adapter.has_partial_batch()) - self.assertIsNone(adapter.partial_batch_size()) - - def test_invalid_targets_argument(self): - with self.assertRaisesRegex(ValueError, r'`y` argument is not supported'): - self.adapter_cls(self.generator_input, y=self.generator_input) - - def test_invalid_sample_weights_argument(self): - with self.assertRaisesRegex(ValueError, - r'`sample_weight` argument is not supported'): - self.adapter_cls( - self.generator_input, sample_weights=self.generator_input) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_not_shuffled(self): - def generator(): - for i in range(10): - yield np.ones((1, 1)) * i - - adapter = self.adapter_cls(generator(), shuffle=True) - for i, data in enumerate(adapter.get_dataset()): - self.assertEqual(i, data[0].numpy().flatten()) - - def test_model_without_forward_pass(self): - - class MyModel(keras.Model): - - def train_step(self, data): - return {'loss': 0.} - - def test_step(self, data): - return {'loss': 0.} - - model = MyModel() - model.compile('rmsprop') - model.fit(self.generator_input, steps_per_epoch=5) - out = model.evaluate(self.generator_input, steps=5) - self.assertEqual(out, 0) + def setUp(self): + super().setUp() + self.adapter_cls = data_adapter.GeneratorDataAdapter + + def test_can_handle(self): + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertTrue(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.text_input)) + self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(self.generator_input, steps_per_epoch=10) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @test_utils.run_v2_only + @data_utils.dont_use_multiprocessing_pool + def test_with_multiprocessing_training(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit( + self.iterator_input, + workers=1, + use_multiprocessing=True, + max_queue_size=10, + steps_per_epoch=10, + ) + # Fit twice to ensure there isn't any duplication that prevent the + # worker from starting. + self.model.fit( + self.iterator_input, + workers=1, + use_multiprocessing=True, + max_queue_size=10, + steps_per_epoch=10, + ) + + def test_size(self): + adapter = self.adapter_cls(self.generator_input) + self.assertIsNone(adapter.get_size()) + + def test_batch_size(self): + adapter = self.adapter_cls(self.generator_input) + self.assertEqual(adapter.batch_size(), None) + self.assertEqual(adapter.representative_batch_size(), 5) + + def test_partial_batch(self): + adapter = self.adapter_cls(self.generator_input) + self.assertFalse(adapter.has_partial_batch()) + self.assertIsNone(adapter.partial_batch_size()) + + def test_invalid_targets_argument(self): + with self.assertRaisesRegex( + ValueError, r"`y` argument is not supported" + ): + self.adapter_cls(self.generator_input, y=self.generator_input) + + def test_invalid_sample_weights_argument(self): + with self.assertRaisesRegex( + ValueError, r"`sample_weight` argument is not supported" + ): + self.adapter_cls( + self.generator_input, sample_weights=self.generator_input + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_not_shuffled(self): + def generator(): + for i in range(10): + yield np.ones((1, 1)) * i + + adapter = self.adapter_cls(generator(), shuffle=True) + for i, data in enumerate(adapter.get_dataset()): + self.assertEqual(i, data[0].numpy().flatten()) + + def test_model_without_forward_pass(self): + class MyModel(keras.Model): + def train_step(self, data): + return {"loss": 0.0} + + def test_step(self, data): + return {"loss": 0.0} + + model = MyModel() + model.compile("rmsprop") + model.fit(self.generator_input, steps_per_epoch=5) + out = model.evaluate(self.generator_input, steps=5) + self.assertEqual(out, 0) class KerasSequenceAdapterTest(DataAdapterTestBase): - - def setUp(self): - super().setUp() - self.adapter_cls = data_adapter.KerasSequenceAdapter - - def test_can_handle(self): - self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) - self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) - self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) - self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) - self.assertTrue(self.adapter_cls.can_handle(self.sequence_input)) - self.assertFalse(self.adapter_cls.can_handle(self.text_input)) - self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_training(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.sequence_input) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @test_utils.run_v2_only - @data_utils.dont_use_multiprocessing_pool - def test_with_multiprocessing_training(self): - self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.model.fit(self.sequence_input, workers=1, use_multiprocessing=True, - max_queue_size=10, steps_per_epoch=10) - # Fit twice to ensure there isn't any duplication that prevent the worker - # from starting. - self.model.fit(self.sequence_input, workers=1, use_multiprocessing=True, - max_queue_size=10, steps_per_epoch=10) - - def test_size(self): - adapter = self.adapter_cls(self.sequence_input) - self.assertEqual(adapter.get_size(), 10) - - def test_batch_size(self): - adapter = self.adapter_cls(self.sequence_input) - self.assertEqual(adapter.batch_size(), None) - self.assertEqual(adapter.representative_batch_size(), 5) - - def test_partial_batch(self): - adapter = self.adapter_cls(self.sequence_input) - self.assertFalse(adapter.has_partial_batch()) - self.assertIsNone(adapter.partial_batch_size()) - - def test_invalid_targets_argument(self): - with self.assertRaisesRegex(ValueError, r'`y` argument is not supported'): - self.adapter_cls(self.sequence_input, y=self.sequence_input) - - def test_invalid_sample_weights_argument(self): - with self.assertRaisesRegex(ValueError, - r'`sample_weight` argument is not supported'): - self.adapter_cls(self.sequence_input, sample_weights=self.sequence_input) + def setUp(self): + super().setUp() + self.adapter_cls = data_adapter.KerasSequenceAdapter + + def test_can_handle(self): + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertTrue(self.adapter_cls.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.text_input)) + self.assertFalse(self.adapter_cls.can_handle(self.bytes_input)) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_training(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit(self.sequence_input) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @test_utils.run_v2_only + @data_utils.dont_use_multiprocessing_pool + def test_with_multiprocessing_training(self): + self.model.compile( + loss="sparse_categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.model.fit( + self.sequence_input, + workers=1, + use_multiprocessing=True, + max_queue_size=10, + steps_per_epoch=10, + ) + # Fit twice to ensure there isn't any duplication that prevent the + # worker from starting. + self.model.fit( + self.sequence_input, + workers=1, + use_multiprocessing=True, + max_queue_size=10, + steps_per_epoch=10, + ) + + def test_size(self): + adapter = self.adapter_cls(self.sequence_input) + self.assertEqual(adapter.get_size(), 10) + + def test_batch_size(self): + adapter = self.adapter_cls(self.sequence_input) + self.assertEqual(adapter.batch_size(), None) + self.assertEqual(adapter.representative_batch_size(), 5) + + def test_partial_batch(self): + adapter = self.adapter_cls(self.sequence_input) + self.assertFalse(adapter.has_partial_batch()) + self.assertIsNone(adapter.partial_batch_size()) + + def test_invalid_targets_argument(self): + with self.assertRaisesRegex( + ValueError, r"`y` argument is not supported" + ): + self.adapter_cls(self.sequence_input, y=self.sequence_input) + + def test_invalid_sample_weights_argument(self): + with self.assertRaisesRegex( + ValueError, r"`sample_weight` argument is not supported" + ): + self.adapter_cls( + self.sequence_input, sample_weights=self.sequence_input + ) class KerasSequenceAdapterSparseTest(KerasSequenceAdapterTest): - - def setUp(self): - super().setUp() - self.sequence_input = TestSparseSequence(self.batch_size, 10) + def setUp(self): + super().setUp() + self.sequence_input = TestSparseSequence(self.batch_size, 10) class KerasSequenceAdapterRaggedTest(KerasSequenceAdapterTest): + def setUp(self): + super().setUp() + self.sequence_input = TestRaggedSequence(self.batch_size, 10) - def setUp(self): - super().setUp() - self.sequence_input = TestRaggedSequence(self.batch_size, 10) - - self.model = keras.models.Sequential([ - keras.layers.Input(shape=(None,), ragged=True), - keras.layers.Embedding(10, 10), - keras.layers.Lambda(tf.reduce_mean, arguments=dict(axis=1)), - keras.layers.Dense(8, input_shape=(10,), activation='relu'), - ]) + self.model = keras.models.Sequential( + [ + keras.layers.Input(shape=(None,), ragged=True), + keras.layers.Embedding(10, 10), + keras.layers.Lambda(tf.reduce_mean, arguments=dict(axis=1)), + keras.layers.Dense(8, input_shape=(10,), activation="relu"), + ] + ) class DataHandlerTest(test_combinations.TestCase): - - def test_finite_dataset_with_steps_per_epoch(self): - data = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1) - # User can choose to only partially consume `Dataset`. - data_handler = data_adapter.DataHandler( - data, initial_epoch=0, epochs=2, steps_per_epoch=2) - self.assertEqual(data_handler.inferred_steps, 2) - self.assertFalse(data_handler._adapter.should_recreate_iterator()) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator).numpy()) - returned_data.append(epoch_data) - self.assertEqual(returned_data, [[0, 1], [2, 3]]) - - def test_finite_dataset_without_steps_per_epoch(self): - data = tf.data.Dataset.from_tensor_slices([0, 1, 2]).batch(1) - data_handler = data_adapter.DataHandler(data, initial_epoch=0, epochs=2) - self.assertEqual(data_handler.inferred_steps, 3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator).numpy()) - returned_data.append(epoch_data) - self.assertEqual(returned_data, [[0, 1, 2], [0, 1, 2]]) - - def test_finite_dataset_with_steps_per_epoch_exact_size(self): - data = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1) - # If user specifies exact size of `Dataset` as `steps_per_epoch`, - # create a new iterator each epoch. - data_handler = data_adapter.DataHandler( - data, initial_epoch=0, epochs=2, steps_per_epoch=4) - self.assertTrue(data_handler._adapter.should_recreate_iterator()) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator).numpy()) - returned_data.append(epoch_data) - self.assertEqual(returned_data, [[0, 1, 2, 3], [0, 1, 2, 3]]) - - def test_infinite_dataset_with_steps_per_epoch(self): - data = tf.data.Dataset.from_tensor_slices([0, 1, 2]).batch(1).repeat() - data_handler = data_adapter.DataHandler( - data, initial_epoch=0, epochs=2, steps_per_epoch=3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator).numpy()) - returned_data.append(epoch_data) - self.assertEqual(returned_data, [[0, 1, 2], [0, 1, 2]]) - - def test_unknown_cardinality_dataset_with_steps_per_epoch(self): - ds = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3, 4, 5, 6]) - filtered_ds = ds.filter(lambda x: x < 4) - self.assertEqual( - tf.data.experimental.cardinality(filtered_ds).numpy(), tf.data.experimental.UNKNOWN_CARDINALITY) - - # User can choose to only partially consume `Dataset`. - data_handler = data_adapter.DataHandler( - filtered_ds, initial_epoch=0, epochs=2, steps_per_epoch=2) - self.assertFalse(data_handler._adapter.should_recreate_iterator()) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertEqual(returned_data, [[0, 1], [2, 3]]) - self.assertEqual(data_handler.inferred_steps, 2) - - def test_unknown_cardinality_dataset_without_steps_per_epoch(self): - ds = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3, 4, 5, 6]) - filtered_ds = ds.filter(lambda x: x < 4) - self.assertEqual( - tf.data.experimental.cardinality(filtered_ds).numpy(), tf.data.experimental.UNKNOWN_CARDINALITY) - - data_handler = data_adapter.DataHandler( - filtered_ds, initial_epoch=0, epochs=2) - self.assertEqual(data_handler.inferred_steps, None) - self.assertTrue(data_handler._adapter.should_recreate_iterator()) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - with data_handler.catch_stop_iteration(): - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertEqual(returned_data, [[0, 1, 2, 3], [0, 1, 2, 3]]) - self.assertEqual(data_handler.inferred_steps, 4) - - def test_insufficient_data(self): - ds = tf.data.Dataset.from_tensor_slices([0, 1]) - ds = ds.filter(lambda *args, **kwargs: True) - data_handler = data_adapter.DataHandler( - ds, initial_epoch=0, epochs=2, steps_per_epoch=3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - with data_handler.catch_stop_iteration(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertTrue(data_handler._insufficient_data) - self.assertEqual(returned_data, [[0, 1]]) - - def test_numpy(self): - x = np.array([0, 1, 2]) - y = np.array([0, 2, 4]) - sw = np.array([0, 4, 8]) - data_handler = data_adapter.DataHandler( - x=x, y=y, sample_weight=sw, batch_size=1, epochs=2) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertEqual(returned_data, - [[(0, 0, 0), (1, 2, 4), - (2, 4, 8)], [(0, 0, 0), (1, 2, 4), (2, 4, 8)]]) - - def test_generator(self): - - def generator(): - for _ in range(2): - for step in range(3): - yield (tf.convert_to_tensor([step]),) - - data_handler = data_adapter.DataHandler( - generator(), epochs=2, steps_per_epoch=3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertEqual(returned_data, [[([0],), ([1],), - ([2],)], [([0],), ([1],), ([2],)]]) - - def test_composite_tensor(self): - st = tf.SparseTensor( - indices=[[0, 0], [1, 0], [2, 0]], values=[0, 1, 2], dense_shape=[3, 1]) - data_handler = data_adapter.DataHandler(st, epochs=2, steps_per_epoch=3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate( - tf.nest.map_structure(tf.sparse.to_dense, returned_data)) - self.assertEqual(returned_data, [[([0],), ([1],), - ([2],)], [([0],), ([1],), ([2],)]]) - - def test_iterator(self): - def generator(): - for _ in range(2): - for step in range(3): - yield (tf.convert_to_tensor([step]),) - - it = iter(tf.data.Dataset.from_generator( - generator, output_types=('float32',))) - data_handler = data_adapter.DataHandler(it, epochs=2, steps_per_epoch=3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertEqual(returned_data, [[([0],), ([1],), ([2],)], - [([0],), ([1],), ([2],)]]) - - def test_list_of_scalars(self): - data_handler = data_adapter.DataHandler([[0], [1], [2]], - epochs=2, - steps_per_epoch=3) - returned_data = [] - for _, iterator in data_handler.enumerate_epochs(): - epoch_data = [] - for _ in data_handler.steps(): - epoch_data.append(next(iterator)) - returned_data.append(epoch_data) - returned_data = self.evaluate(returned_data) - self.assertEqual(returned_data, [[([0],), ([1],), - ([2],)], [([0],), ([1],), ([2],)]]) - - def test_class_weight_user_errors(self): - with self.assertRaisesRegex(ValueError, 'to be a dict with keys'): - data_adapter.DataHandler( - x=[[0], [1], [2]], - y=[[2], [1], [0]], - batch_size=1, - sample_weight=[[1.], [2.], [4.]], - class_weight={ - 0: 0.5, - 1: 1., - 3: 1.5 # Skips class `2`. - }) - - with self.assertRaisesRegex(ValueError, 'with a single output'): - data_adapter.DataHandler( - x=np.ones((10, 1)), - y=[np.ones((10, 1)), np.zeros((10, 1))], - batch_size=2, - class_weight={ - 0: 0.5, - 1: 1., - 2: 1.5 - }) - - @parameterized.named_parameters(('numpy', True), ('dataset', False)) - def test_single_x_input_no_tuple_wrapping(self, use_numpy): - x = np.ones((10, 1)) - - if use_numpy: - batch_size = 2 - else: - x = tf.data.Dataset.from_tensor_slices(x).batch(2) - batch_size = None - - data_handler = data_adapter.DataHandler(x, batch_size=batch_size) - for _, iterator in data_handler.enumerate_epochs(): - for _ in data_handler.steps(): - # Check that single x input is not wrapped in a tuple. - self.assertIsInstance(next(iterator), tf.Tensor) + def test_finite_dataset_with_steps_per_epoch(self): + data = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1) + # User can choose to only partially consume `Dataset`. + data_handler = data_adapter.DataHandler( + data, initial_epoch=0, epochs=2, steps_per_epoch=2 + ) + self.assertEqual(data_handler.inferred_steps, 2) + self.assertFalse(data_handler._adapter.should_recreate_iterator()) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator).numpy()) + returned_data.append(epoch_data) + self.assertEqual(returned_data, [[0, 1], [2, 3]]) + + def test_finite_dataset_without_steps_per_epoch(self): + data = tf.data.Dataset.from_tensor_slices([0, 1, 2]).batch(1) + data_handler = data_adapter.DataHandler(data, initial_epoch=0, epochs=2) + self.assertEqual(data_handler.inferred_steps, 3) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator).numpy()) + returned_data.append(epoch_data) + self.assertEqual(returned_data, [[0, 1, 2], [0, 1, 2]]) + + def test_finite_dataset_with_steps_per_epoch_exact_size(self): + data = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1) + # If user specifies exact size of `Dataset` as `steps_per_epoch`, + # create a new iterator each epoch. + data_handler = data_adapter.DataHandler( + data, initial_epoch=0, epochs=2, steps_per_epoch=4 + ) + self.assertTrue(data_handler._adapter.should_recreate_iterator()) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator).numpy()) + returned_data.append(epoch_data) + self.assertEqual(returned_data, [[0, 1, 2, 3], [0, 1, 2, 3]]) + + def test_infinite_dataset_with_steps_per_epoch(self): + data = tf.data.Dataset.from_tensor_slices([0, 1, 2]).batch(1).repeat() + data_handler = data_adapter.DataHandler( + data, initial_epoch=0, epochs=2, steps_per_epoch=3 + ) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator).numpy()) + returned_data.append(epoch_data) + self.assertEqual(returned_data, [[0, 1, 2], [0, 1, 2]]) + + def test_unknown_cardinality_dataset_with_steps_per_epoch(self): + ds = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3, 4, 5, 6]) + filtered_ds = ds.filter(lambda x: x < 4) + self.assertEqual( + tf.data.experimental.cardinality(filtered_ds).numpy(), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + # User can choose to only partially consume `Dataset`. + data_handler = data_adapter.DataHandler( + filtered_ds, initial_epoch=0, epochs=2, steps_per_epoch=2 + ) + self.assertFalse(data_handler._adapter.should_recreate_iterator()) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertEqual(returned_data, [[0, 1], [2, 3]]) + self.assertEqual(data_handler.inferred_steps, 2) + + def test_unknown_cardinality_dataset_without_steps_per_epoch(self): + ds = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3, 4, 5, 6]) + filtered_ds = ds.filter(lambda x: x < 4) + self.assertEqual( + tf.data.experimental.cardinality(filtered_ds).numpy(), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + data_handler = data_adapter.DataHandler( + filtered_ds, initial_epoch=0, epochs=2 + ) + self.assertEqual(data_handler.inferred_steps, None) + self.assertTrue(data_handler._adapter.should_recreate_iterator()) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + with data_handler.catch_stop_iteration(): + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertEqual(returned_data, [[0, 1, 2, 3], [0, 1, 2, 3]]) + self.assertEqual(data_handler.inferred_steps, 4) + + def test_insufficient_data(self): + ds = tf.data.Dataset.from_tensor_slices([0, 1]) + ds = ds.filter(lambda *args, **kwargs: True) + data_handler = data_adapter.DataHandler( + ds, initial_epoch=0, epochs=2, steps_per_epoch=3 + ) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + with data_handler.catch_stop_iteration(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertTrue(data_handler._insufficient_data) + self.assertEqual(returned_data, [[0, 1]]) + + def test_numpy(self): + x = np.array([0, 1, 2]) + y = np.array([0, 2, 4]) + sw = np.array([0, 4, 8]) + data_handler = data_adapter.DataHandler( + x=x, y=y, sample_weight=sw, batch_size=1, epochs=2 + ) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertEqual( + returned_data, + [ + [(0, 0, 0), (1, 2, 4), (2, 4, 8)], + [(0, 0, 0), (1, 2, 4), (2, 4, 8)], + ], + ) + + def test_generator(self): + def generator(): + for _ in range(2): + for step in range(3): + yield (tf.convert_to_tensor([step]),) + + data_handler = data_adapter.DataHandler( + generator(), epochs=2, steps_per_epoch=3 + ) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertEqual( + returned_data, [[([0],), ([1],), ([2],)], [([0],), ([1],), ([2],)]] + ) + + def test_composite_tensor(self): + st = tf.SparseTensor( + indices=[[0, 0], [1, 0], [2, 0]], + values=[0, 1, 2], + dense_shape=[3, 1], + ) + data_handler = data_adapter.DataHandler(st, epochs=2, steps_per_epoch=3) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate( + tf.nest.map_structure(tf.sparse.to_dense, returned_data) + ) + self.assertEqual( + returned_data, [[([0],), ([1],), ([2],)], [([0],), ([1],), ([2],)]] + ) + + def test_iterator(self): + def generator(): + for _ in range(2): + for step in range(3): + yield (tf.convert_to_tensor([step]),) + + it = iter( + tf.data.Dataset.from_generator(generator, output_types=("float32",)) + ) + data_handler = data_adapter.DataHandler(it, epochs=2, steps_per_epoch=3) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertEqual( + returned_data, [[([0],), ([1],), ([2],)], [([0],), ([1],), ([2],)]] + ) + + def test_list_of_scalars(self): + data_handler = data_adapter.DataHandler( + [[0], [1], [2]], epochs=2, steps_per_epoch=3 + ) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + self.assertEqual( + returned_data, [[([0],), ([1],), ([2],)], [([0],), ([1],), ([2],)]] + ) + + def test_class_weight_user_errors(self): + with self.assertRaisesRegex(ValueError, "to be a dict with keys"): + data_adapter.DataHandler( + x=[[0], [1], [2]], + y=[[2], [1], [0]], + batch_size=1, + sample_weight=[[1.0], [2.0], [4.0]], + class_weight={0: 0.5, 1: 1.0, 3: 1.5}, # Skips class `2`. + ) + + with self.assertRaisesRegex(ValueError, "with a single output"): + data_adapter.DataHandler( + x=np.ones((10, 1)), + y=[np.ones((10, 1)), np.zeros((10, 1))], + batch_size=2, + class_weight={0: 0.5, 1: 1.0, 2: 1.5}, + ) + + @parameterized.named_parameters(("one_hot", True), ("sparse", False)) + def test_class_weights_applied(self, one_hot): + num_channels = 3 + num_classes = 5 + batch_size = 2 + image_width = 8 + + input_shape = (batch_size, image_width, image_width, num_channels) + output_shape = (batch_size, image_width, image_width) + + x = tf.random.uniform(input_shape) + sparse_y = tf.random.uniform( + output_shape, maxval=num_classes, dtype=tf.int32 + ) + + if one_hot: + y = tf.one_hot(sparse_y, num_classes) + else: + y = tf.expand_dims(sparse_y, axis=-1) + + # Class weight is equal to class number + 1 + class_weight = dict([(x, x + 1) for x in range(num_classes)]) + + sample_weight = np.array([1, 2]) + + data_handler = data_adapter.DataHandler( + x=x, + y=y, + class_weight=class_weight, + sample_weight=sample_weight, + batch_size=batch_size, + epochs=1, + ) + returned_data = [] + for _, iterator in data_handler.enumerate_epochs(): + epoch_data = [] + for _ in data_handler.steps(): + epoch_data.append(next(iterator)) + returned_data.append(epoch_data) + returned_data = self.evaluate(returned_data) + + # We had only 1 batch and 1 epoch, so we extract x, y, sample_weight + result_x, result_y, result_sample_weight = returned_data[0][0] + self.assertAllEqual(x, result_x) + self.assertAllEqual(y, result_y) + + # Because class weight = class + 1, resulting class weight = y + 1 + # Sample weight is 1 for the first sample, 2 for the second, + # so we double the expected sample weight for the second sample. + self.assertAllEqual(sparse_y[0] + 1, result_sample_weight[0]) + self.assertAllEqual(2 * (sparse_y[1] + 1), result_sample_weight[1]) + + @parameterized.named_parameters(("numpy", True), ("dataset", False)) + def test_single_x_input_no_tuple_wrapping(self, use_numpy): + x = np.ones((10, 1)) + + if use_numpy: + batch_size = 2 + else: + x = tf.data.Dataset.from_tensor_slices(x).batch(2) + batch_size = None + + data_handler = data_adapter.DataHandler(x, batch_size=batch_size) + for _, iterator in data_handler.enumerate_epochs(): + for _ in data_handler.steps(): + # Check that single x input is not wrapped in a tuple. + self.assertIsInstance(next(iterator), tf.Tensor) + + def test_error_if_zero_steps_per_epoch(self): + data = tf.data.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1) + + with self.assertRaisesRegex( + ValueError, + "Unexpected value for `steps_per_epoch`. Received value is 0.", + ): + data_adapter.DataHandler( + data, initial_epoch=0, epochs=2, steps_per_epoch=0 + ) + + def test_error_if_empty_array_input_data(self): + x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) + y = np.array([0, 1, 1, 0]) + idx = [] + + with self.assertRaisesWithLiteralMatch( + ValueError, + "Expected input data to be non-empty.", + ): + data_adapter.DataHandler(x[idx], y[idx]) + + def test_error_if_empty_dataset_input_data(self): + data = tf.data.Dataset.from_tensor_slices([]).batch(1) + + with self.assertRaisesWithLiteralMatch( + ValueError, + "Expected input data to be non-empty.", + ): + data_adapter.DataHandler(data) class TestValidationSplit(test_combinations.TestCase): - - @parameterized.named_parameters(('numpy_arrays', True), ('tensors', False)) - def test_validation_split_unshuffled(self, use_numpy): - if use_numpy: - x = np.array([0, 1, 2, 3, 4]) - y = np.array([0, 2, 4, 6, 8]) - sw = np.array([0, 4, 8, 12, 16]) - else: - x = tf.convert_to_tensor([0, 1, 2, 3, 4]) - y = tf.convert_to_tensor([0, 2, 4, 6, 8]) - sw = tf.convert_to_tensor([0, 4, 8, 12, 16]) - - (train_x, train_y, train_sw), (val_x, val_y, val_sw) = ( - data_adapter.train_validation_split((x, y, sw), validation_split=0.2)) - - if use_numpy: - train_x = tf.convert_to_tensor(train_x) - train_y = tf.convert_to_tensor(train_y) - train_sw = tf.convert_to_tensor(train_sw) - val_x = tf.convert_to_tensor(val_x) - val_y = tf.convert_to_tensor(val_y) - val_sw = tf.convert_to_tensor(val_sw) - - self.assertEqual(train_x.numpy().tolist(), [0, 1, 2, 3]) - self.assertEqual(train_y.numpy().tolist(), [0, 2, 4, 6]) - self.assertEqual(train_sw.numpy().tolist(), [0, 4, 8, 12]) - - self.assertEqual(val_x.numpy().tolist(), [4]) - self.assertEqual(val_y.numpy().tolist(), [8]) - self.assertEqual(val_sw.numpy().tolist(), [16]) - - def test_validation_split_user_error(self): - with self.assertRaisesRegex(ValueError, 'is only supported for Tensors'): - data_adapter.train_validation_split( - lambda: np.ones((10, 1)), validation_split=0.2) - - def test_validation_split_examples_too_few(self): - with self.assertRaisesRegex(ValueError, 'not sufficient to split it'): - data_adapter.train_validation_split( - np.ones((1, 10)), validation_split=0.2) - - def test_validation_split_none(self): - train_sw, val_sw = data_adapter.train_validation_split( - None, validation_split=0.2) - self.assertIsNone(train_sw) - self.assertIsNone(val_sw) - - (_, train_sw), (_, val_sw) = data_adapter.train_validation_split( - (np.ones((10, 1)), None), validation_split=0.2) - self.assertIsNone(train_sw) - self.assertIsNone(val_sw) + @parameterized.named_parameters(("numpy_arrays", True), ("tensors", False)) + def test_validation_split_unshuffled(self, use_numpy): + if use_numpy: + x = np.array([0, 1, 2, 3, 4]) + y = np.array([0, 2, 4, 6, 8]) + sw = np.array([0, 4, 8, 12, 16]) + else: + x = tf.convert_to_tensor([0, 1, 2, 3, 4]) + y = tf.convert_to_tensor([0, 2, 4, 6, 8]) + sw = tf.convert_to_tensor([0, 4, 8, 12, 16]) + + (train_x, train_y, train_sw), ( + val_x, + val_y, + val_sw, + ) = data_adapter.train_validation_split( + (x, y, sw), validation_split=0.2 + ) + + if use_numpy: + train_x = tf.convert_to_tensor(train_x) + train_y = tf.convert_to_tensor(train_y) + train_sw = tf.convert_to_tensor(train_sw) + val_x = tf.convert_to_tensor(val_x) + val_y = tf.convert_to_tensor(val_y) + val_sw = tf.convert_to_tensor(val_sw) + + self.assertEqual(train_x.numpy().tolist(), [0, 1, 2, 3]) + self.assertEqual(train_y.numpy().tolist(), [0, 2, 4, 6]) + self.assertEqual(train_sw.numpy().tolist(), [0, 4, 8, 12]) + + self.assertEqual(val_x.numpy().tolist(), [4]) + self.assertEqual(val_y.numpy().tolist(), [8]) + self.assertEqual(val_sw.numpy().tolist(), [16]) + + def test_validation_split_user_error(self): + with self.assertRaisesRegex( + ValueError, "is only supported for Tensors" + ): + data_adapter.train_validation_split( + lambda: np.ones((10, 1)), validation_split=0.2 + ) + + def test_validation_split_examples_too_few(self): + with self.assertRaisesRegex(ValueError, "not sufficient to split it"): + data_adapter.train_validation_split( + np.ones((1, 10)), validation_split=0.2 + ) + + def test_validation_split_none(self): + train_sw, val_sw = data_adapter.train_validation_split( + None, validation_split=0.2 + ) + self.assertIsNone(train_sw) + self.assertIsNone(val_sw) + + (_, train_sw), (_, val_sw) = data_adapter.train_validation_split( + (np.ones((10, 1)), None), validation_split=0.2 + ) + self.assertIsNone(train_sw) + self.assertIsNone(val_sw) class ListsOfScalarsDataAdapterTest(DataAdapterTestBase): + def setUp(self): + super().setUp() + self.adapter_cls = data_adapter.ListsOfScalarsDataAdapter - def setUp(self): - super().setUp() - self.adapter_cls = data_adapter.ListsOfScalarsDataAdapter - - def test_can_list_inputs(self): - self.assertTrue(self.adapter_cls.can_handle(self.text_input)) - self.assertTrue(self.adapter_cls.can_handle(self.bytes_input)) + def test_can_list_inputs(self): + self.assertTrue(self.adapter_cls.can_handle(self.text_input)) + self.assertTrue(self.adapter_cls.can_handle(self.bytes_input)) - self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) - self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) - self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) - self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) - self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) - self.assertFalse(self.adapter_cls.can_handle([])) + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle([])) class TestDataAdapterUtils(DataAdapterTestBase): - - def test_unpack_x_y_sample_weight_with_tuple_and_list(self): - tuple_version = data_adapter.unpack_x_y_sample_weight( - (self.tensor_input, self.tensor_target)) - list_version = data_adapter.unpack_x_y_sample_weight( - [self.tensor_input, self.tensor_target]) - self.assertEqual(tuple_version, list_version) - - -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() + def test_unpack_x_y_sample_weight_with_tuple_and_list(self): + tuple_version = data_adapter.unpack_x_y_sample_weight( + (self.tensor_input, self.tensor_target) + ) + list_version = data_adapter.unpack_x_y_sample_weight( + [self.tensor_input, self.tensor_target] + ) + self.assertEqual(tuple_version, list_version) + + def test_unpack_pack_dict(self): + # A dictionary can be unambiguously represented without a tuple. + x = {"key": self.tensor_input} + packed_x = data_adapter.pack_x_y_sample_weight(x) + self.assertEqual(packed_x, x) + unpacked_x, _, _ = data_adapter.unpack_x_y_sample_weight(x) + self.assertEqual(unpacked_x, x) + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/engine/deferred_sequential_test.py b/keras/engine/deferred_sequential_test.py index f2133adcae7f..8d72abbef0d6 100644 --- a/keras/engine/deferred_sequential_test.py +++ b/keras/engine/deferred_sequential_test.py @@ -14,204 +14,226 @@ # ============================================================================== """Tests specific to deferred-build `Sequential` models.""" -import tensorflow.compat.v2 as tf - import os import unittest + import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils try: - import h5py # pylint:disable=g-import-not-at-top + import h5py except ImportError: - h5py = None + h5py = None @test_utils.run_v2_only class TestDeferredSequential(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_build_behavior(self): - # Test graph network creation after __call__ - model = get_model() - model(np.random.random((2, 6))) - self.assertLen(model.weights, 4) - self.assertTrue(model._is_graph_network) - self.assertLen(model.inputs, 1) - self.assertLen(model.outputs, 1) - self.assertEqual(model.inputs[0].shape.as_list(), [2, 6]) - self.assertEqual(model.outputs[0].shape.as_list(), [2, 2]) - - # Test effect of new __call__ with a different shape - model(np.random.random((3, 6))) - self.assertLen(model.inputs, 1) - self.assertLen(model.outputs, 1) - self.assertEqual(model.inputs[0].shape.as_list(), [None, 6]) - self.assertEqual(model.outputs[0].shape.as_list(), [None, 2]) - model(np.random.random((4, 6))) - self.assertLen(model.inputs, 1) - self.assertLen(model.outputs, 1) - self.assertEqual(model.inputs[0].shape.as_list(), [None, 6]) - self.assertEqual(model.outputs[0].shape.as_list(), [None, 2]) - - # Test graph network creation after build - model = get_model() - model.build((None, 6)) - self.assertLen(model.weights, 4) - self.assertTrue(model._is_graph_network) - self.assertLen(model.inputs, 1) - self.assertLen(model.outputs, 1) - self.assertEqual(model.inputs[0].shape.as_list(), [None, 6]) - self.assertEqual(model.outputs[0].shape.as_list(), [None, 2]) - - # Test graph network creation after compile/fit - model = get_model() - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=[keras.metrics.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - model.fit(np.zeros((2, 6)), np.zeros((2, 2))) - self.assertLen(model.weights, 4) - self.assertTrue(model._is_graph_network) - self.assertLen(model.inputs, 1) - self.assertLen(model.outputs, 1) - # Inconsistency here: with eager `fit`, the model is built with shape - # (2, 6), but with graph function `fit`, it is built with shape `(None, 6)`. - # This is likely due to our assumption "the batch size should be dynamic" - # at the level of `Model`. TODO(fchollet): investigate and resolve. - self.assertEqual(model.inputs[0].shape.as_list()[-1], 6) - self.assertEqual(model.outputs[0].shape.as_list()[-1], 2) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_add_and_pop(self): - model = get_model() - model.build((None, 6)) - self.assertTrue(model.built) - self.assertTrue(model._is_graph_network) - self.assertLen(model.layers, 3) - self.assertLen(model.weights, 4) - model.pop() - self.assertTrue(model.built) - self.assertTrue(model._is_graph_network) - self.assertLen(model.layers, 2) - self.assertLen(model.weights, 2) - model.add(keras.layers.Dense(2)) - self.assertTrue(model.built) - self.assertTrue(model._is_graph_network) - self.assertLen(model.layers, 3) - self.assertLen(model.weights, 4) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_feature_extraction(self): - # This tests layer connectivity reset when rebuilding - model = get_model() - model(np.random.random((3, 6))) # First build - model(np.random.random((4, 6))) # Triggers a rebuild - # Classic feature extractor pattern - extractor = keras.Model(inputs=model.inputs, - outputs=[layer.output for layer in model.layers]) - # Check that inputs and outputs are connected - _ = extractor(np.random.random((4, 6))) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_saving_savedmodel(self): - model = get_model() - model(np.random.random((3, 6))) # Build model - - path = os.path.join(self.get_temp_dir(), 'model_path') - model.save(path) - new_model = keras.models.load_model(path) - model_layers = model._flatten_layers(include_self=True, recursive=False) - new_model_layers = new_model._flatten_layers( - include_self=True, recursive=False) - for layer1, layer2 in zip(model_layers, new_model_layers): - self.assertEqual(layer1.name, layer2.name) - for w1, w2 in zip(layer1.weights, layer2.weights): - self.assertAllClose(w1, w2) - - @unittest.skipIf(h5py is None, 'Test requires h5py') - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_saving_h5(self): - path = os.path.join(self.get_temp_dir(), 'model_path.h5') - model = get_model() - model(np.random.random((3, 6))) # Build model - - path = os.path.join(self.get_temp_dir(), 'model_path.h5') - model.save(path) - new_model = keras.models.load_model(path) - model_layers = model._flatten_layers(include_self=True, recursive=False) - new_model_layers = new_model._flatten_layers( - include_self=True, recursive=False) - for layer1, layer2 in zip(model_layers, new_model_layers): - self.assertEqual(layer1.name, layer2.name) - for w1, w2 in zip(layer1.weights, layer2.weights): - self.assertAllClose(w1, w2) - - @test_combinations.run_all_keras_modes - def test_shared_layer(self): - # This tests that preexisting layer connectivity is preserved - # when auto-building graph networks - shared_layer = keras.layers.Dense(2) - m1 = keras.Sequential([shared_layer]) - m1(np.random.random((3, 6))) - m2 = keras.Sequential([shared_layer]) - m2(np.random.random((3, 6))) - # Nesting case - shared_layer = keras.layers.Dense(2) - m1 = keras.Sequential([shared_layer]) - m2 = keras.Sequential([shared_layer, m1]) - m2(np.random.random((3, 2))) - - @test_combinations.run_all_keras_modes - def test_loss_layer(self): - class LossLayer(keras.layers.Layer): - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - return inputs - - # Test loss layer alone - model = keras.Sequential([LossLayer()]) - model.compile('rmsprop', run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(np.ones((2, 2))) - self.assertAllClose(loss, 4.) - model(np.random.random((4, 2))) # Triggers a rebuild - loss = model.train_on_batch(np.ones((1, 2))) - self.assertAllClose(loss, 2.) - - # Test loss layer combined with another layer - model = keras.Sequential([ - keras.layers.Dense(1, kernel_initializer='ones'), - LossLayer()]) - model.compile('rmsprop', run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(np.ones((2, 2))) - self.assertAllClose(loss, 4.) - model(np.random.random((4, 2))) # Triggers a rebuild - loss = model.train_on_batch(np.ones((1, 2))) - self.assertLess(loss, 2.) - - # Test loss layer combined with external loss - model = keras.Sequential([ - keras.layers.Dense(1, kernel_initializer='ones'), - LossLayer()]) - model.compile('rmsprop', 'mse', - run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(np.ones((2, 2)), np.ones((2, 2))) - model(np.random.random((4, 2))) # Triggers a rebuild - loss = model.train_on_batch(np.ones((1, 2)), np.ones((1, 2))) + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_build_behavior(self): + # Test graph network creation after __call__ + model = get_model() + model(np.random.random((2, 6))) + self.assertLen(model.weights, 4) + self.assertTrue(model._is_graph_network) + self.assertLen(model.inputs, 1) + self.assertLen(model.outputs, 1) + self.assertEqual(model.inputs[0].shape.as_list(), [2, 6]) + self.assertEqual(model.outputs[0].shape.as_list(), [2, 2]) + + # Test effect of new __call__ with a different shape + model(np.random.random((3, 6))) + self.assertLen(model.inputs, 1) + self.assertLen(model.outputs, 1) + self.assertEqual(model.inputs[0].shape.as_list(), [None, 6]) + self.assertEqual(model.outputs[0].shape.as_list(), [None, 2]) + model(np.random.random((4, 6))) + self.assertLen(model.inputs, 1) + self.assertLen(model.outputs, 1) + self.assertEqual(model.inputs[0].shape.as_list(), [None, 6]) + self.assertEqual(model.outputs[0].shape.as_list(), [None, 2]) + + # Test graph network creation after build + model = get_model() + model.build((None, 6)) + self.assertLen(model.weights, 4) + self.assertTrue(model._is_graph_network) + self.assertLen(model.inputs, 1) + self.assertLen(model.outputs, 1) + self.assertEqual(model.inputs[0].shape.as_list(), [None, 6]) + self.assertEqual(model.outputs[0].shape.as_list(), [None, 2]) + + # Test graph network creation after compile/fit + model = get_model() + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=[keras.metrics.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(np.zeros((2, 6)), np.zeros((2, 2))) + self.assertLen(model.weights, 4) + self.assertTrue(model._is_graph_network) + self.assertLen(model.inputs, 1) + self.assertLen(model.outputs, 1) + # Inconsistency here: with eager `fit`, the model is built with shape + # (2, 6), but with graph function `fit`, it is built with shape `(None, + # 6)`. This is likely due to our assumption "the batch size should be + # dynamic" at the level of `Model`. TODO(fchollet): investigate and + # resolve. + self.assertEqual(model.inputs[0].shape.as_list()[-1], 6) + self.assertEqual(model.outputs[0].shape.as_list()[-1], 2) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_add_and_pop(self): + model = get_model() + model.build((None, 6)) + self.assertTrue(model.built) + self.assertTrue(model._is_graph_network) + self.assertLen(model.layers, 3) + self.assertLen(model.weights, 4) + model.pop() + self.assertTrue(model.built) + self.assertTrue(model._is_graph_network) + self.assertLen(model.layers, 2) + self.assertLen(model.weights, 2) + model.add(keras.layers.Dense(2)) + self.assertTrue(model.built) + self.assertTrue(model._is_graph_network) + self.assertLen(model.layers, 3) + self.assertLen(model.weights, 4) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_feature_extraction(self): + # This tests layer connectivity reset when rebuilding + model = get_model() + model(np.random.random((3, 6))) # First build + model(np.random.random((4, 6))) # Triggers a rebuild + # Classic feature extractor pattern + extractor = keras.Model( + inputs=model.inputs, + outputs=[layer.output for layer in model.layers], + ) + # Check that inputs and outputs are connected + _ = extractor(np.random.random((4, 6))) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_saving_keras_v3(self): + model = get_model() + model(np.random.random((3, 6))) # Build model + + path = os.path.join(self.get_temp_dir(), "model_path.keras") + model.save(path) + new_model = keras.models.load_model(path) + model_layers = model._flatten_layers(include_self=True, recursive=False) + new_model_layers = new_model._flatten_layers( + include_self=True, recursive=False + ) + for layer1, layer2 in zip(model_layers, new_model_layers): + self.assertEqual(layer1.name, layer2.name) + for w1, w2 in zip(layer1.weights, layer2.weights): + self.assertAllClose(w1, w2) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_saving_savedmodel(self): + model = get_model() + model(np.random.random((3, 6))) # Build model + + path = os.path.join(self.get_temp_dir(), "model_path") + model.save(path) + new_model = keras.models.load_model(path) + model_layers = model._flatten_layers(include_self=True, recursive=False) + new_model_layers = new_model._flatten_layers( + include_self=True, recursive=False + ) + for layer1, layer2 in zip(model_layers, new_model_layers): + self.assertEqual(layer1.name, layer2.name) + for w1, w2 in zip(layer1.weights, layer2.weights): + self.assertAllClose(w1, w2) + + @unittest.skipIf(h5py is None, "Test requires h5py") + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_saving_h5(self): + path = os.path.join(self.get_temp_dir(), "model_path.h5") + model = get_model() + model(np.random.random((3, 6))) # Build model + + path = os.path.join(self.get_temp_dir(), "model_path.h5") + model.save(path) + new_model = keras.models.load_model(path) + model_layers = model._flatten_layers(include_self=True, recursive=False) + new_model_layers = new_model._flatten_layers( + include_self=True, recursive=False + ) + for layer1, layer2 in zip(model_layers, new_model_layers): + self.assertEqual(layer1.name, layer2.name) + for w1, w2 in zip(layer1.weights, layer2.weights): + self.assertAllClose(w1, w2) + + @test_combinations.run_all_keras_modes + def test_shared_layer(self): + # This tests that preexisting layer connectivity is preserved + # when auto-building graph networks + shared_layer = keras.layers.Dense(2) + m1 = keras.Sequential([shared_layer]) + m1(np.random.random((3, 6))) + m2 = keras.Sequential([shared_layer]) + m2(np.random.random((3, 6))) + # Nesting case + shared_layer = keras.layers.Dense(2) + m1 = keras.Sequential([shared_layer]) + m2 = keras.Sequential([shared_layer, m1]) + m2(np.random.random((3, 2))) + + @test_combinations.run_all_keras_modes + def test_loss_layer(self): + class LossLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + return inputs + + # Test loss layer alone + model = keras.Sequential([LossLayer()]) + model.compile("rmsprop", run_eagerly=test_utils.should_run_eagerly()) + loss = model.train_on_batch(np.ones((2, 2))) + self.assertAllClose(loss, 4.0) + model(np.random.random((4, 2))) # Triggers a rebuild + loss = model.train_on_batch(np.ones((1, 2))) + self.assertAllClose(loss, 2.0) + + # Test loss layer combined with another layer + model = keras.Sequential( + [keras.layers.Dense(1, kernel_initializer="ones"), LossLayer()] + ) + model.compile("rmsprop", run_eagerly=test_utils.should_run_eagerly()) + loss = model.train_on_batch(np.ones((2, 2))) + self.assertAllClose(loss, 4.0) + model(np.random.random((4, 2))) # Triggers a rebuild + loss = model.train_on_batch(np.ones((1, 2))) + self.assertLess(loss, 2.0) + + # Test loss layer combined with external loss + model = keras.Sequential( + [keras.layers.Dense(1, kernel_initializer="ones"), LossLayer()] + ) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + loss = model.train_on_batch(np.ones((2, 2)), np.ones((2, 2))) + model(np.random.random((4, 2))) # Triggers a rebuild + loss = model.train_on_batch(np.ones((1, 2)), np.ones((1, 2))) def get_model(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, name='first_layer')) - model.add(keras.layers.Dropout(0.3, name='dp')) - model.add(keras.layers.Dense(2, name='last_layer')) - return model + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, name="first_layer")) + model.add(keras.layers.Dropout(0.3, name="dp")) + model.add(keras.layers.Dense(2, name="last_layer")) + return model -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/feature_columns_integration_test.py b/keras/engine/feature_columns_integration_test.py index e8e0d1dec186..427a8c70b696 100644 --- a/keras/engine/feature_columns_integration_test.py +++ b/keras/engine/feature_columns_integration_test.py @@ -14,286 +14,307 @@ # ============================================================================== """Tests specific to Feature Columns integration.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras -from keras.testing_infra import test_combinations from keras import metrics as metrics_module -from keras.testing_infra import test_utils from keras.feature_column import dense_features as df +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils from keras.utils import np_utils class TestDNNModel(keras.models.Model): + def __init__(self, feature_columns, units, name=None, **kwargs): + super().__init__(name=name, **kwargs) + self._input_layer = df.DenseFeatures( + feature_columns, name="input_layer" + ) + self._dense_layer = keras.layers.Dense(units, name="dense_layer") - def __init__(self, feature_columns, units, name=None, **kwargs): - super().__init__(name=name, **kwargs) - self._input_layer = df.DenseFeatures(feature_columns, name='input_layer') - self._dense_layer = keras.layers.Dense(units, name='dense_layer') - - def call(self, features): - net = self._input_layer(features) - net = self._dense_layer(net) - return net + def call(self, features): + net = self._input_layer(features) + net = self._dense_layer(net) + return net class FeatureColumnsIntegrationTest(test_combinations.TestCase): - """Most Sequential model API tests are covered in `training_test.py`. - - """ - - @test_combinations.run_all_keras_modes - def test_sequential_model(self): - columns = [tf.feature_column.numeric_column('a')] - model = keras.models.Sequential([ - df.DenseFeatures(columns), - keras.layers.Dense(64, activation='relu'), - keras.layers.Dense(20, activation='softmax') - ]) - model.compile( - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - x = {'a': np.random.random((10, 1))} - y = np.random.randint(20, size=(10, 1)) - y = np_utils.to_categorical(y, num_classes=20) - model.fit(x, y, epochs=1, batch_size=5) - model.fit(x, y, epochs=1, batch_size=5) - model.evaluate(x, y, batch_size=5) - model.predict(x, batch_size=5) - - @test_combinations.run_all_keras_modes - def test_sequential_model_with_ds_input(self): - columns = [tf.feature_column.numeric_column('a')] - model = keras.models.Sequential([ - df.DenseFeatures(columns), - keras.layers.Dense(64, activation='relu'), - keras.layers.Dense(20, activation='softmax') - ]) - model.compile( - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - y = np.random.randint(20, size=(100, 1)) - y = np_utils.to_categorical(y, num_classes=20) - x = {'a': np.random.random((100, 1))} - ds1 = tf.data.Dataset.from_tensor_slices(x) - ds2 = tf.data.Dataset.from_tensor_slices(y) - ds = tf.data.Dataset.zip((ds1, ds2)).batch(5) - model.fit(ds, steps_per_epoch=1) - model.fit(ds, steps_per_epoch=1) - model.evaluate(ds, steps=1) - model.predict(ds, steps=1) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_sequential_model_with_crossed_column(self): - feature_columns = [] - age_buckets = tf.feature_column.bucketized_column( - tf.feature_column.numeric_column('age'), - boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) - feature_columns.append(age_buckets) - - # indicator cols - thal = tf.feature_column.categorical_column_with_vocabulary_list( - 'thal', ['fixed', 'normal', 'reversible']) - - crossed_feature = tf.feature_column.crossed_column([age_buckets, thal], - hash_bucket_size=1000) - crossed_feature = tf.feature_column.indicator_column(crossed_feature) - feature_columns.append(crossed_feature) - - feature_layer = df.DenseFeatures(feature_columns) - - model = keras.models.Sequential([ - feature_layer, - keras.layers.Dense(128, activation='relu'), - keras.layers.Dense(128, activation='relu'), - keras.layers.Dense(1, activation='sigmoid') - ]) - - age_data = np.random.randint(10, 100, size=100) - thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100) - inp_x = {'age': age_data, 'thal': thal_data} - inp_y = np.random.randint(0, 1, size=100) - ds = tf.data.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) - model.compile(optimizer='adam', - loss='binary_crossentropy', - metrics=['accuracy'],) - model.fit(ds, epochs=1) - model.fit(ds, epochs=1) - model.evaluate(ds) - model.predict(ds) - - @test_combinations.run_all_keras_modes - def test_subclassed_model_with_feature_columns(self): - col_a = tf.feature_column.numeric_column('a') - col_b = tf.feature_column.numeric_column('b') - - dnn_model = TestDNNModel([col_a, col_b], 20) - - dnn_model.compile( - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - x = {'a': np.random.random((10, 1)), 'b': np.random.random((10, 1))} - y = np.random.randint(20, size=(10, 1)) - y = np_utils.to_categorical(y, num_classes=20) - dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) - dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) - dnn_model.evaluate(x=x, y=y, batch_size=5) - dnn_model.predict(x=x, batch_size=5) - - @test_combinations.run_all_keras_modes - def test_subclassed_model_with_feature_columns_with_ds_input(self): - col_a = tf.feature_column.numeric_column('a') - col_b = tf.feature_column.numeric_column('b') - - dnn_model = TestDNNModel([col_a, col_b], 20) - - dnn_model.compile( - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - y = np.random.randint(20, size=(100, 1)) - y = np_utils.to_categorical(y, num_classes=20) - x = {'a': np.random.random((100, 1)), 'b': np.random.random((100, 1))} - ds1 = tf.data.Dataset.from_tensor_slices(x) - ds2 = tf.data.Dataset.from_tensor_slices(y) - ds = tf.data.Dataset.zip((ds1, ds2)).batch(5) - dnn_model.fit(ds, steps_per_epoch=1) - dnn_model.fit(ds, steps_per_epoch=1) - dnn_model.evaluate(ds, steps=1) - dnn_model.predict(ds, steps=1) - - # TODO(kaftan) seems to throw an error when enabled. - @test_combinations.run_all_keras_modes - def DISABLED_test_function_model_feature_layer_input(self): - col_a = tf.feature_column.numeric_column('a') - col_b = tf.feature_column.numeric_column('b') - - feature_layer = df.DenseFeatures([col_a, col_b], name='fc') - dense = keras.layers.Dense(4) - - # This seems problematic.... We probably need something for DenseFeatures - # the way Input is for InputLayer. - output = dense(feature_layer) - - model = keras.models.Model([feature_layer], [output]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - model.compile( - optimizer, - loss, - metrics=[metrics_module.CategoricalAccuracy(), 'mae'], - loss_weights=loss_weights) - - data = ({'a': np.arange(10), 'b': np.arange(10)}, np.arange(10, 20)) - model.fit(*data, epochs=1) - - # TODO(kaftan) seems to throw an error when enabled. - @test_combinations.run_all_keras_modes - def DISABLED_test_function_model_multiple_feature_layer_inputs(self): - col_a = tf.feature_column.numeric_column('a') - col_b = tf.feature_column.numeric_column('b') - col_c = tf.feature_column.numeric_column('c') - - fc1 = df.DenseFeatures([col_a, col_b], name='fc1') - fc2 = df.DenseFeatures([col_b, col_c], name='fc2') - dense = keras.layers.Dense(4) - - # This seems problematic.... We probably need something for DenseFeatures - # the way Input is for InputLayer. - output = dense(fc1) + dense(fc2) - - model = keras.models.Model([fc1, fc2], [output]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - model.compile( - optimizer, - loss, - metrics=[metrics_module.CategoricalAccuracy(), 'mae'], - loss_weights=loss_weights) - - data_list = ([{ - 'a': np.arange(10), - 'b': np.arange(10) - }, { - 'b': np.arange(10), - 'c': np.arange(10) - }], np.arange(10, 100)) - model.fit(*data_list, epochs=1) - - data_bloated_list = ([{ - 'a': np.arange(10), - 'b': np.arange(10), - 'c': np.arange(10) - }, { - 'a': np.arange(10), - 'b': np.arange(10), - 'c': np.arange(10) - }], np.arange(10, 100)) - model.fit(*data_bloated_list, epochs=1) - - data_dict = ({ - 'fc1': { - 'a': np.arange(10), - 'b': np.arange(10) - }, - 'fc2': { - 'b': np.arange(10), - 'c': np.arange(10) - } - }, np.arange(10, 100)) - model.fit(*data_dict, epochs=1) - - data_bloated_dict = ({ - 'fc1': { - 'a': np.arange(10), - 'b': np.arange(10), - 'c': np.arange(10) - }, - 'fc2': { - 'a': np.arange(10), - 'b': np.arange(10), - 'c': np.arange(10) + """Most Sequential model API tests are covered in `training_test.py`.""" + + @test_combinations.run_all_keras_modes + def test_sequential_model(self): + columns = [tf.feature_column.numeric_column("a")] + model = keras.models.Sequential( + [ + df.DenseFeatures(columns), + keras.layers.Dense(64, activation="relu"), + keras.layers.Dense(20, activation="softmax"), + ] + ) + model.compile( + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = {"a": np.random.random((10, 1))} + y = np.random.randint(20, size=(10, 1)) + y = np_utils.to_categorical(y, num_classes=20) + model.fit(x, y, epochs=1, batch_size=5) + model.fit(x, y, epochs=1, batch_size=5) + model.evaluate(x, y, batch_size=5) + model.predict(x, batch_size=5) + + @test_combinations.run_all_keras_modes + def test_sequential_model_with_ds_input(self): + columns = [tf.feature_column.numeric_column("a")] + model = keras.models.Sequential( + [ + df.DenseFeatures(columns), + keras.layers.Dense(64, activation="relu"), + keras.layers.Dense(20, activation="softmax"), + ] + ) + model.compile( + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + y = np.random.randint(20, size=(100, 1)) + y = np_utils.to_categorical(y, num_classes=20) + x = {"a": np.random.random((100, 1))} + ds1 = tf.data.Dataset.from_tensor_slices(x) + ds2 = tf.data.Dataset.from_tensor_slices(y) + ds = tf.data.Dataset.zip((ds1, ds2)).batch(5) + model.fit(ds, steps_per_epoch=1) + model.fit(ds, steps_per_epoch=1) + model.evaluate(ds, steps=1) + model.predict(ds, steps=1) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_sequential_model_with_crossed_column(self): + feature_columns = [] + age_buckets = tf.feature_column.bucketized_column( + tf.feature_column.numeric_column("age"), + boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65], + ) + feature_columns.append(age_buckets) + + # indicator cols + thal = tf.feature_column.categorical_column_with_vocabulary_list( + "thal", ["fixed", "normal", "reversible"] + ) + + crossed_feature = tf.feature_column.crossed_column( + [age_buckets, thal], hash_bucket_size=1000 + ) + crossed_feature = tf.feature_column.indicator_column(crossed_feature) + feature_columns.append(crossed_feature) + + feature_layer = df.DenseFeatures(feature_columns) + + model = keras.models.Sequential( + [ + feature_layer, + keras.layers.Dense(128, activation="relu"), + keras.layers.Dense(128, activation="relu"), + keras.layers.Dense(1, activation="sigmoid"), + ] + ) + + age_data = np.random.randint(10, 100, size=100) + thal_data = np.random.choice( + ["fixed", "normal", "reversible"], size=100 + ) + inp_x = {"age": age_data, "thal": thal_data} + inp_y = np.random.randint(0, 1, size=100) + ds = tf.data.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) + model.compile( + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + ) + model.fit(ds, epochs=1) + model.fit(ds, epochs=1) + model.evaluate(ds) + model.predict(ds) + + @test_combinations.run_all_keras_modes + def test_subclassed_model_with_feature_columns(self): + col_a = tf.feature_column.numeric_column("a") + col_b = tf.feature_column.numeric_column("b") + + dnn_model = TestDNNModel([col_a, col_b], 20) + + dnn_model.compile( + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = {"a": np.random.random((10, 1)), "b": np.random.random((10, 1))} + y = np.random.randint(20, size=(10, 1)) + y = np_utils.to_categorical(y, num_classes=20) + dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) + dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) + dnn_model.evaluate(x=x, y=y, batch_size=5) + dnn_model.predict(x=x, batch_size=5) + + @test_combinations.run_all_keras_modes + def test_subclassed_model_with_feature_columns_with_ds_input(self): + col_a = tf.feature_column.numeric_column("a") + col_b = tf.feature_column.numeric_column("b") + + dnn_model = TestDNNModel([col_a, col_b], 20) + + dnn_model.compile( + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + y = np.random.randint(20, size=(100, 1)) + y = np_utils.to_categorical(y, num_classes=20) + x = {"a": np.random.random((100, 1)), "b": np.random.random((100, 1))} + ds1 = tf.data.Dataset.from_tensor_slices(x) + ds2 = tf.data.Dataset.from_tensor_slices(y) + ds = tf.data.Dataset.zip((ds1, ds2)).batch(5) + dnn_model.fit(ds, steps_per_epoch=1) + dnn_model.fit(ds, steps_per_epoch=1) + dnn_model.evaluate(ds, steps=1) + dnn_model.predict(ds, steps=1) + + # TODO(kaftan) seems to throw an error when enabled. + @test_combinations.run_all_keras_modes + def DISABLED_test_function_model_feature_layer_input(self): + col_a = tf.feature_column.numeric_column("a") + col_b = tf.feature_column.numeric_column("b") + + feature_layer = df.DenseFeatures([col_a, col_b], name="fc") + dense = keras.layers.Dense(4) + + # This seems problematic.... We probably need something for + # DenseFeatures the way Input is for InputLayer. + output = dense(feature_layer) + + model = keras.models.Model([feature_layer], [output]) + + optimizer = "rmsprop" + loss = "mse" + loss_weights = [1.0, 0.5] + model.compile( + optimizer, + loss, + metrics=[metrics_module.CategoricalAccuracy(), "mae"], + loss_weights=loss_weights, + ) + + data = ({"a": np.arange(10), "b": np.arange(10)}, np.arange(10, 20)) + model.fit(*data, epochs=1) + + # TODO(kaftan) seems to throw an error when enabled. + @test_combinations.run_all_keras_modes + def DISABLED_test_function_model_multiple_feature_layer_inputs(self): + col_a = tf.feature_column.numeric_column("a") + col_b = tf.feature_column.numeric_column("b") + col_c = tf.feature_column.numeric_column("c") + + fc1 = df.DenseFeatures([col_a, col_b], name="fc1") + fc2 = df.DenseFeatures([col_b, col_c], name="fc2") + dense = keras.layers.Dense(4) + + # This seems problematic.... We probably need something for + # DenseFeatures the way Input is for InputLayer. + output = dense(fc1) + dense(fc2) + + model = keras.models.Model([fc1, fc2], [output]) + + optimizer = "rmsprop" + loss = "mse" + loss_weights = [1.0, 0.5] + model.compile( + optimizer, + loss, + metrics=[metrics_module.CategoricalAccuracy(), "mae"], + loss_weights=loss_weights, + ) + + data_list = ( + [ + {"a": np.arange(10), "b": np.arange(10)}, + {"b": np.arange(10), "c": np.arange(10)}, + ], + np.arange(10, 100), + ) + model.fit(*data_list, epochs=1) + + data_bloated_list = ( + [ + {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}, + {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}, + ], + np.arange(10, 100), + ) + model.fit(*data_bloated_list, epochs=1) + + data_dict = ( + { + "fc1": {"a": np.arange(10), "b": np.arange(10)}, + "fc2": {"b": np.arange(10), "c": np.arange(10)}, + }, + np.arange(10, 100), + ) + model.fit(*data_dict, epochs=1) + + data_bloated_dict = ( + { + "fc1": { + "a": np.arange(10), + "b": np.arange(10), + "c": np.arange(10), + }, + "fc2": { + "a": np.arange(10), + "b": np.arange(10), + "c": np.arange(10), + }, + }, + np.arange(10, 100), + ) + model.fit(*data_bloated_dict, epochs=1) + + @test_combinations.run_all_keras_modes + def test_string_input(self): + x = { + "age": np.random.random((1024, 1)), + "cabin": np.array(["a"] * 1024), } - }, np.arange(10, 100)) - model.fit(*data_bloated_dict, epochs=1) - - @test_combinations.run_all_keras_modes - def test_string_input(self): - x = {'age': np.random.random((1024, 1)), - 'cabin': np.array(['a'] * 1024)} - y = np.random.randint(2, size=(1024, 1)) - ds1 = tf.data.Dataset.from_tensor_slices(x) - ds2 = tf.data.Dataset.from_tensor_slices(y) - dataset = tf.data.Dataset.zip((ds1, ds2)).batch(4) - categorical_cols = [tf.feature_column.categorical_column_with_hash_bucket('cabin', 10)] - feature_cols = ([tf.feature_column.numeric_column('age')] - + [tf.feature_column.indicator_column(cc) for cc in categorical_cols]) - layers = [df.DenseFeatures(feature_cols), - keras.layers.Dense(128), - keras.layers.Dense(1)] - - model = keras.models.Sequential(layers) - model.compile(optimizer='sgd', - loss=keras.losses.BinaryCrossentropy()) - model.fit(dataset) - - -if __name__ == '__main__': - tf.test.main() + y = np.random.randint(2, size=(1024, 1)) + ds1 = tf.data.Dataset.from_tensor_slices(x) + ds2 = tf.data.Dataset.from_tensor_slices(y) + dataset = tf.data.Dataset.zip((ds1, ds2)).batch(4) + categorical_cols = [ + tf.feature_column.categorical_column_with_hash_bucket("cabin", 10) + ] + feature_cols = [tf.feature_column.numeric_column("age")] + [ + tf.feature_column.indicator_column(cc) for cc in categorical_cols + ] + layers = [ + df.DenseFeatures(feature_cols), + keras.layers.Dense(128), + keras.layers.Dense(1), + ] + + model = keras.models.Sequential(layers) + model.compile(optimizer="sgd", loss=keras.losses.BinaryCrossentropy()) + model.fit(dataset) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/functional.py b/keras/engine/functional.py index 727f90d3c4fc..1dd8ba006fe7 100644 --- a/keras/engine/functional.py +++ b/keras/engine/functional.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -"""A `Network` is way to compose layers: the topological form of a `Model`.""" +"""A `Network` is way to compose layers: the topological form of a `Model`.""" import collections import copy import itertools import warnings + +import tensorflow.compat.v2 as tf + from keras import backend from keras.dtensor import layout_map as layout_map_lib from keras.engine import base_layer @@ -30,1448 +32,1672 @@ from keras.engine import node as node_module from keras.engine import training as training_lib from keras.engine import training_utils -from keras.saving.saved_model import json_utils -from keras.saving.saved_model import network_serialization +from keras.saving import serialization_lib +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import json_utils +from keras.saving.legacy.saved_model import network_serialization +from keras.saving.legacy.saved_model import utils as saved_model_utils from keras.utils import generic_utils from keras.utils import tf_inspect from keras.utils import tf_utils -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.tools.docs import doc_controls -# pylint: disable=g-classes-have-attributes class Functional(training_lib.Model): - """A `Functional` model is a `Model` defined as a directed graph of layers. - - Three types of `Model` exist: subclassed `Model`, `Functional` model, - and `Sequential` (a special case of `Functional`). - In general, more Keras features are supported with `Functional` - than with subclassed `Model`s, specifically: - - - Model cloning (`keras.models.clone`) - - Serialization (`model.get_config()/from_config`, `model.to_json()` - - Whole-model saving (`model.save()`) - - A `Functional` model can be instantiated by passing two arguments to - `__init__`. The first argument is the `keras.Input` Tensors that represent - the inputs to the model. The second argument specifies the output - tensors that represent the outputs of this model. Both arguments can be a - nested structure of tensors. - - Example: - - ``` - inputs = {'x1': keras.Input(shape=(10,)), 'x2': keras.Input(shape=(1,))} - t = keras.layers.Dense(1, activation='relu')(inputs['x1']) - outputs = keras.layers.Add()([t, inputs['x2']) - model = keras.Model(inputs, outputs) - ``` - - A `Functional` model constructed using the Functional API can also include raw - TensorFlow functions, with the exception of functions that create Variables - or assign ops. - - Example: - - ```python - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(1)(inputs) - outputs = tf.nn.relu(x) - model = keras.Model(inputs, outputs) - ``` - - A new `Functional` model can also be created by using the - intermediate tensors. This enables you to quickly extract sub-components - of the model. - - Example: - - ```python - inputs = keras.Input(shape=(None, None, 3)) - processed = keras.layers.RandomCrop(width=32, height=32)(inputs) - conv = keras.layers.Conv2D(filters=2, kernel_size=3)(processed) - pooling = keras.layers.GlobalAveragePooling2D()(conv) - feature = keras.layers.Dense(10)(pooling) - - full_model = keras.Model(inputs, feature) - backbone = keras.Model(processed, conv) - activations = keras.Model(conv, feature) - ``` - - Note that the `backbone` and `activations` models are not - created with `keras.Input` objects, but with the tensors that are originated - from `keras.Inputs` objects. Under the hood, the layers and weights will - be shared across these models, so that user can train the `full_model`, and - use `backbone` or `activations` to do feature extraction. - The inputs and outputs of the model can be nested structures of tensors as - well, and the created models are standard `Functional` model that support - all the existing API. - - Args: - inputs: List of input tensors (must be created via `tf.keras.Input()` or - originated from `tf.keras.Input()`). - outputs: List of output tensors. - name: String, optional. Name of the model. - trainable: Boolean, optional. If the model's variables should be trainable. - """ - - # See tf.Module for the usage of this property. - # The key of _layer_call_argspecs is a layer. tf.Module._flatten will fail to - # flatten the key since it is trying to convert Trackable/Layer to a string. - _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain( - ('_layer_call_argspecs', '_compiled_trainable_state', - '_output_mask_cache', '_output_tensor_cache', '_output_shape_cache'), - training_lib.Model._TF_MODULE_IGNORED_PROPERTIES - )) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, inputs, outputs, name=None, trainable=True, - **kwargs): - # This is used by the Model class, since we have some logic to swap the - # class in the __new__ method, which will lead to __init__ get invoked - # twice. Using the skip_init to skip one of the invocation of __init__ to - # avoid any side effects - skip_init = kwargs.pop('skip_init', False) - if skip_init: - return - generic_utils.validate_kwargs(kwargs, {}) - super().__init__(name=name, trainable=trainable) - # Check if the inputs contain any intermediate `KerasTensor` (not created - # by tf.keras.Input()). In this case we need to clone the `Node` and - # `KerasTensor` objects to mimic rebuilding a new model from new inputs. - # This feature is only enabled in TF2 not in v1 graph mode. - if tf.compat.v1.executing_eagerly_outside_functions(): - if not all([functional_utils.is_input_keras_tensor(t) - for t in tf.nest.flatten(inputs)]): - inputs, outputs = functional_utils.clone_graph_nodes(inputs, outputs) - self._init_graph_network(inputs, outputs) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _init_graph_network(self, inputs, outputs): - # This method is needed for Sequential to reinitialize graph network when - # layer is added or removed. - - base_layer.keras_api_gauge.get_cell('Functional').set(True) - self._is_graph_network = True - - # Normalize and set self.inputs, self.outputs. - if isinstance(inputs, list) and len(tf.nest.flatten(inputs)) == 1: - inputs = inputs[0] - if isinstance(outputs, list) and len(tf.nest.flatten(outputs)) == 1: - outputs = outputs[0] - self._nested_inputs = inputs - self._nested_outputs = outputs - self.inputs = tf.nest.flatten(inputs) - self.outputs = tf.nest.flatten(outputs) - - # Models constructed with a single Tensor or list of Tensors can - # be called with a dict, where the keys of the dict are the names - # of the `Input` objects. Extra keys are ignored with warning. - if not tf.nest.is_nested(self._nested_inputs): - self._enable_dict_to_input_mapping = True - elif (isinstance(self._nested_inputs, (list, tuple)) and - not any(tf.nest.is_nested(t) for t in self._nested_inputs)): - self._enable_dict_to_input_mapping = True - elif (isinstance(self._nested_inputs, dict) and - not any(tf.nest.is_nested(t) for t in self._nested_inputs.values())): - self._enable_dict_to_input_mapping = True - else: - self._enable_dict_to_input_mapping = False + """A `Functional` model is a `Model` defined as a directed graph of layers. + + Three types of `Model` exist: subclassed `Model`, `Functional` model, + and `Sequential` (a special case of `Functional`). + In general, more Keras features are supported with `Functional` + than with subclassed `Model`s, specifically: + + - Model cloning (`keras.models.clone`) + - Serialization (`model.get_config()/from_config`, `model.to_json()` + - Whole-model saving (`model.save()`) + + A `Functional` model can be instantiated by passing two arguments to + `__init__`. The first argument is the `keras.Input` Tensors that represent + the inputs to the model. The second argument specifies the output + tensors that represent the outputs of this model. Both arguments can be a + nested structure of tensors. + + Example: + + ``` + inputs = {'x1': keras.Input(shape=(10,)), 'x2': keras.Input(shape=(1,))} + t = keras.layers.Dense(1, activation='relu')(inputs['x1']) + outputs = keras.layers.Add()([t, inputs['x2']) + model = keras.Model(inputs, outputs) + ``` + + A `Functional` model constructed using the Functional API can also include + raw TensorFlow functions, with the exception of functions that create + Variables or assign ops. + + Example: + + ```python + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(1)(inputs) + outputs = tf.nn.relu(x) + model = keras.Model(inputs, outputs) + ``` + + A new `Functional` model can also be created by using the + intermediate tensors. This enables you to quickly extract sub-components + of the model. + + Example: + + ```python + inputs = keras.Input(shape=(None, None, 3)) + processed = keras.layers.RandomCrop(width=32, height=32)(inputs) + conv = keras.layers.Conv2D(filters=2, kernel_size=3)(processed) + pooling = keras.layers.GlobalAveragePooling2D()(conv) + feature = keras.layers.Dense(10)(pooling) + + full_model = keras.Model(inputs, feature) + backbone = keras.Model(processed, conv) + activations = keras.Model(conv, feature) + ``` + + Note that the `backbone` and `activations` models are not + created with `keras.Input` objects, but with the tensors that are originated + from `keras.Input` objects. Under the hood, the layers and weights will + be shared across these models, so that user can train the `full_model`, and + use `backbone` or `activations` to do feature extraction. + The inputs and outputs of the model can be nested structures of tensors as + well, and the created models are standard `Functional` model that support + all the existing API. - if not tf.compat.v1.executing_eagerly_outside_functions(): - if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs): - base_layer_utils.create_keras_history(self._nested_outputs) - - self._validate_graph_inputs_and_outputs() - - # A Network does not create weights of its own, thus it is already - # built. - self.built = True - self._build_input_shape = tf.nest.map_structure(lambda x: x.shape, inputs) - self._compute_output_and_mask_jointly = True - # `_expects_training_arg` is True since the `training` argument is always - # present in the signature of the `call` method of a graph network. - self._call_spec.expects_training_arg = True - self._call_spec.expects_mask_arg = True - # A graph network does not autocast inputs, as its layers will cast them - # instead. - self._autocast = False - - self._input_layers = [] - self._output_layers = [] - self._input_coordinates = [] - self._output_coordinates = [] - - # This is for performance optimization when calling the Network on new - # inputs. Every time the Network is called on a set on input tensors, - # we compute the output tensors, output masks and output shapes in one pass, - # then cache them here. When any of these outputs is queried later, we - # retrieve it from there instead of recomputing it. - self._output_mask_cache = {} - self._output_tensor_cache = {} - self._output_shape_cache = {} - - # Build self._output_layers: - for x in self.outputs: - layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access - self._output_layers.append(layer) - self._output_coordinates.append((layer, node_index, tensor_index)) - - # Build self._input_layers: - for x in self.inputs: - layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access - # It's supposed to be an input layer, so only one node - # and one tensor output. - assert node_index == 0 - assert tensor_index == 0 - self._input_layers.append(layer) - self._input_coordinates.append((layer, node_index, tensor_index)) - - # Keep track of the network's nodes and layers. - nodes, nodes_by_depth, layers, _ = _map_graph_network( - self.inputs, self.outputs) - self._network_nodes = nodes - self._nodes_by_depth = nodes_by_depth - self._self_tracked_trackables = layers - self._layer_call_argspecs = {} - for layer in self._self_tracked_trackables: - self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call) - - # Build self.input_names and self.output_names. - self._set_output_names() - self.input_names = [] - self._feed_input_names = [] - self._feed_inputs = [] - self._feed_input_shapes = [] - for layer in self._input_layers: - self.input_names.append(layer.name) - if layer.is_placeholder: - self._feed_input_names.append(layer.name) - # Use batch_input_shape here because non-eager composite tensors may not - # have a shape attribute that's meaningful (sparse, for instance, has - # a tensor that's non-constant and needs to be fed). This means that - # input layers that create placeholders will need to have the - # batch_input_shape attr to allow for input shape validation. - self._feed_input_shapes.append(layer._batch_input_shape) - self._feed_inputs.append(layer.input) - - self._compute_tensor_usage_count() - self._set_save_spec(self._nested_inputs) - tf_utils.assert_no_legacy_layers(self.layers) - - # Note that this method is used by both functional and sequential models, - # so we can't just have this method in functional.__init__, which will miss - # the coverage of sequential model. - if self._layout_map is not None: - layout_map_lib._map_functional_model_variable(self, self._layout_map) - - @property - def input(self): - """Retrieves the input tensor(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer. - - Returns: - Input tensor or list of input tensors. - - Raises: - RuntimeError: If called in Eager mode. - AttributeError: If no inbound nodes are found. + Args: + inputs: List of input tensors (must be created via `tf.keras.Input()` or + originated from `tf.keras.Input()`). + outputs: List of output tensors. + name: String, optional. Name of the model. + trainable: Boolean, optional. If the model's variables should be + trainable. """ - return self._nested_inputs - - @property - def input_shape(self): - """Retrieves the input shape(s) of a layer. - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer, or if all inputs - have the same shape. + # See tf.Module for the usage of this property. + # The key of _layer_call_argspecs is a layer. tf.Module._flatten will fail + # to flatten the key since it is trying to convert Trackable/Layer to a + # string. + _TF_MODULE_IGNORED_PROPERTIES = frozenset( + itertools.chain( + ( + "_layer_call_argspecs", + "_output_mask_cache", + "_output_tensor_cache", + "_output_shape_cache", + ), + training_lib.Model._TF_MODULE_IGNORED_PROPERTIES, + ) + ) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__(self, inputs, outputs, name=None, trainable=True, **kwargs): + # This is used by the Model class, since we have some logic to swap the + # class in the __new__ method, which will lead to __init__ get invoked + # twice. Using the skip_init to skip one of the invocation of __init__ + # to avoid any side effects + skip_init = kwargs.pop("skip_init", False) + if skip_init: + return + generic_utils.validate_kwargs(kwargs, {}) + super().__init__(name=name, trainable=trainable) + # Check if the inputs contain any intermediate `KerasTensor` (not + # created by tf.keras.Input()). In this case we need to clone the `Node` + # and `KerasTensor` objects to mimic rebuilding a new model from new + # inputs. This feature is only enabled in TF2 not in v1 graph mode. + if tf.compat.v1.executing_eagerly_outside_functions(): + if not all( + [ + functional_utils.is_input_keras_tensor(t) + for t in tf.nest.flatten(inputs) + ] + ): + inputs, outputs = functional_utils.clone_graph_nodes( + inputs, outputs + ) + self._init_graph_network(inputs, outputs) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _init_graph_network(self, inputs, outputs): + # This method is needed for Sequential to reinitialize graph network + # when layer is added or removed. + + base_layer.keras_api_gauge.get_cell("Functional").set(True) + self._is_graph_network = True + + # Normalize and set self.inputs, self.outputs. + if isinstance(inputs, list) and len(tf.nest.flatten(inputs)) == 1: + inputs = inputs[0] + if isinstance(outputs, list) and len(tf.nest.flatten(outputs)) == 1: + outputs = outputs[0] + self._nested_inputs = inputs + self._nested_outputs = outputs + self.inputs = tf.nest.flatten(inputs) + self.outputs = tf.nest.flatten(outputs) + + # Models constructed with a single Tensor or list of Tensors can + # be called with a dict, where the keys of the dict are the names + # of the `Input` objects. Extra keys are ignored with warning. + if not tf.nest.is_nested(self._nested_inputs): + self._enable_dict_to_input_mapping = True + elif isinstance(self._nested_inputs, (list, tuple)) and not any( + tf.nest.is_nested(t) for t in self._nested_inputs + ): + self._enable_dict_to_input_mapping = True + elif isinstance(self._nested_inputs, dict) and not any( + tf.nest.is_nested(t) for t in self._nested_inputs.values() + ): + self._enable_dict_to_input_mapping = True + else: + self._enable_dict_to_input_mapping = False + + if not tf.compat.v1.executing_eagerly_outside_functions(): + if any( + not hasattr(tensor, "_keras_history") for tensor in self.outputs + ): + base_layer_utils.create_keras_history(self._nested_outputs) + + self._validate_graph_inputs_and_outputs() + + # A Network does not create weights of its own, thus it is already + # built. + self.built = True + self._build_input_shape = tf.nest.map_structure( + lambda x: x.shape, inputs + ) + self._compute_output_and_mask_jointly = True + # `_expects_training_arg` is True since the `training` argument is + # always present in the signature of the `call` method of a graph + # network. + self._call_spec.expects_training_arg = True + self._call_spec.expects_mask_arg = True + # A graph network does not autocast inputs, as its layers will cast them + # instead. + self._autocast = False + + self._input_layers = [] + self._output_layers = [] + self._input_coordinates = [] + self._output_coordinates = [] + + # This is for performance optimization when calling the Network on new + # inputs. Every time the Network is called on a set on input tensors, we + # compute the output tensors, output masks and output shapes in one + # pass, then cache them here. When any of these outputs is queried + # later, we retrieve it from there instead of recomputing it. + self._output_mask_cache = {} + self._output_tensor_cache = {} + self._output_shape_cache = {} + + # Build self._output_layers: + for x in self.outputs: + ( + layer, + node_index, + tensor_index, + ) = x._keras_history + self._output_layers.append(layer) + self._output_coordinates.append((layer, node_index, tensor_index)) + + # Build self._input_layers: + for x in self.inputs: + ( + layer, + node_index, + tensor_index, + ) = x._keras_history + # It's supposed to be an input layer, so only one node + # and one tensor output. + assert node_index == 0 + assert tensor_index == 0 + self._input_layers.append(layer) + self._input_coordinates.append((layer, node_index, tensor_index)) + + # Keep track of the network's nodes and layers. + nodes, nodes_by_depth, layers, _ = _map_graph_network( + self.inputs, self.outputs + ) + self._network_nodes = nodes + self._nodes_by_depth = nodes_by_depth + self._self_tracked_trackables = layers + self._layer_call_argspecs = {} + for layer in self._self_tracked_trackables: + self._layer_call_argspecs[layer] = tf_inspect.getfullargspec( + layer.call + ) + + # Build self.input_names and self.output_names. + self._set_output_names() + self.input_names = [] + self._feed_input_names = [] + self._feed_inputs = [] + self._feed_input_shapes = [] + for layer in self._input_layers: + self.input_names.append(layer.name) + if layer.is_placeholder: + self._feed_input_names.append(layer.name) + # Use batch_input_shape here because non-eager composite tensors + # may not have a shape attribute that's meaningful (sparse, for + # instance, has a tensor that's non-constant and needs to be + # fed). This means that input layers that create placeholders + # will need to have the batch_input_shape attr to allow for + # input shape validation. + self._feed_input_shapes.append(layer._batch_input_shape) + self._feed_inputs.append(layer.input) + + self._compute_tensor_usage_count() + self._set_save_spec(self._nested_inputs) + tf_utils.assert_no_legacy_layers(self.layers) + + # Note that this method is used by both functional and sequential + # models, so we can't just have this method in functional.__init__, + # which will miss the coverage of sequential model. + if self._layout_map is not None: + layout_map_lib._map_functional_model_variable( + self, self._layout_map + ) + + @property + def input(self): + """Retrieves the input tensor(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer. + + Returns: + Input tensor or list of input tensors. + + Raises: + RuntimeError: If called in Eager mode. + AttributeError: If no inbound nodes are found. + """ + return self._nested_inputs + + @property + def input_shape(self): + """Retrieves the input shape(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer, or if all inputs + have the same shape. + + Returns: + Input shape, as an integer shape tuple + (or list of shape tuples, one tuple per input tensor). + + Raises: + AttributeError: if the layer has no defined input_shape. + RuntimeError: if called in Eager mode. + """ + return tf.nest.map_structure(backend.int_shape, self.input) + + @property + def input_spec(self): + if hasattr(self, "_manual_input_spec"): + return self._manual_input_spec + if max([len(path) for path in nest.yield_flat_paths( + self._nested_inputs)]) > 1: + ) != len(self.inputs): + # Case where we have a nested structure. + # In such a case we can't safely run any checks. + return None + if isinstance(self._nested_inputs, dict): + # Case where `_nested_inputs` is a plain dict of Inputs. + names = sorted(self._nested_inputs.keys()) + return [ + input_spec.InputSpec( + shape=shape_with_no_batch_size(self._nested_inputs[name]), + allow_last_axis_squeeze=True, + name=name, + ) + for name in names + ] + else: + # Single input, or list / tuple of inputs. + # The data may be passed as a dict keyed by input name. + return [ + input_spec.InputSpec( + shape=shape_with_no_batch_size(x), + allow_last_axis_squeeze=True, + name=x._keras_history.layer.name, + ) + for x in self.inputs + ] + + @input_spec.setter + def input_spec(self, value): + self._manual_input_spec = value + + @property + def output(self): + """Retrieves the output tensor(s) of a layer. + + Only applicable if the layer has exactly one output, + i.e. if it is connected to one incoming layer. + + Returns: + Output tensor or list of output tensors. + + Raises: + AttributeError: if the layer is connected to more than one incoming + layers. + RuntimeError: if called in Eager mode. + """ + return self._nested_outputs + + @property + def output_shape(self): + """Retrieves the output shape(s) of a layer. + + Only applicable if the layer has one output, + or if all outputs have the same shape. + + Returns: + Output shape, as an integer shape tuple + (or list of shape tuples, one tuple per output tensor). + + Raises: + AttributeError: if the layer has no defined output shape. + RuntimeError: if called in Eager mode. + """ + return tf.nest.map_structure(backend.int_shape, self.output) + + def _set_output_names(self): + """Assigns unique names to the Network's outputs. + + Output layers with multiple output tensors would otherwise lead to + duplicate names in self.output_names. + """ + uniquified = [] + output_names = set() + prefix_count = {} + for layer in self._output_layers: + proposal = layer.name + while proposal in output_names: + existing_count = prefix_count.get(layer.name, 1) + proposal = f"{layer.name}_{existing_count}" + prefix_count[layer.name] = existing_count + 1 + output_names.add(proposal) + uniquified.append(proposal) + self.output_names = uniquified + + @property + def _layer_checkpoint_dependencies(self): + """Dictionary of layer dependencies to be included in the checkpoint.""" + weight_layer_index = 0 + + dependencies = collections.OrderedDict() + for layer_index, layer in enumerate(self.layers): + try: + if layer.weights: + # Keep a separate index for layers which have weights. This + # allows users to insert Layers without weights anywhere in + # the network without breaking checkpoints. + dependencies[ + "layer_with_weights-%d" % weight_layer_index + ] = layer + weight_layer_index += 1 + except ValueError: + # The layer might have weights, but may not be built yet. We + # just treat it as layer without weight. + pass + + # Even if it doesn't have weights, we should still track everything + # in case it has/will have Trackable dependencies. + dependencies["layer-%d" % layer_index] = layer + return dependencies + + def _trackable_children(self, save_type="checkpoint", **kwargs): + dependencies = self._layer_checkpoint_dependencies + dependencies.update(super()._trackable_children(save_type, **kwargs)) + return dependencies + + def _lookup_dependency(self, name, cached_dependencies=None): + if cached_dependencies: + return cached_dependencies.get(name) + # Fall back to slow lookup (`layer_checkpoint_dependencies` does a + # thorough check of all layer to see if they contain weights.) + layer_dependencies = self._layer_checkpoint_dependencies + if name in layer_dependencies: + return layer_dependencies[name] + return super()._lookup_dependency(name) + + def _handle_deferred_layer_dependencies(self, layers): + """Handles layer checkpoint dependencies that are added after init.""" + layer_checkpoint_dependencies = self._layer_checkpoint_dependencies + layer_to_name = {v: k for k, v in layer_checkpoint_dependencies.items()} + for layer in layers: + if layer in layer_to_name: + self._handle_deferred_dependencies( + name=layer_to_name[layer], trackable=layer + ) + + @property + def _should_compute_mask(self): + return True + + def compute_mask(self, inputs, mask): + # TODO(omalleyt): b/123540974 This function is not really safe to call + # by itself because it will duplicate any updates and losses in graph + # mode by `call`ing the Layers again. + output_tensors = self._run_internal_graph(inputs, mask=mask) + return tf.nest.map_structure( + lambda t: getattr(t, "_keras_mask", None), output_tensors + ) + + @doc_controls.do_not_doc_inheritable + def call(self, inputs, training=None, mask=None): + """Calls the model on new inputs. + + In this case `call` just reapplies + all ops in the graph to the new inputs + (e.g. build a new computational graph from the provided inputs). + + Args: + inputs: A tensor or list of tensors. + training: Boolean or boolean scalar tensor, indicating whether to + run the `Network` in training mode or inference mode. + mask: A mask or list of masks. A mask can be + either a tensor or None (no mask). + + Returns: + A tensor if there is a single output, or + a list of tensors if there are more than one outputs. + """ + return self._run_internal_graph(inputs, training=training, mask=mask) + + def compute_output_shape(self, input_shape): + # Convert any shapes in tuple format to TensorShapes. + input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) + + if len(tf.nest.flatten(input_shape)) != len( + tf.nest.flatten(self._input_layers) + ): + raise ValueError( + f"Invalid `input_shape` argument {input_shape}: " + f"the model expects {len(self._input_layers)} " + "input tensors." + ) - Returns: - Input shape, as an integer shape tuple - (or list of shape tuples, one tuple per input tensor). + # Use the tuple of TensorShape as the cache key, since tuple is hashable + # and can be used as hash key. + try: + cache_key = tuple( + tf_utils.convert_shapes(input_shape, to_tuples=True) + ) + if cache_key in self._output_shape_cache: + # Cache hit. Return shapes as TensorShapes. + return self._output_shape_cache[cache_key] + except ValueError: + # In case there are unknown TensorShape, eg for sparse tensor input, + # We skip the caching since the shape is unknown. + pass + + layers_to_output_shapes = {} + for layer, shape in zip( + self._input_layers, tf.nest.flatten(input_shape) + ): + # It's an input layer: then `compute_output_shape` is identity, + # and there is only one node and one tensor.. + shape_key = layer.name + "_0_0" + layers_to_output_shapes[shape_key] = shape - Raises: - AttributeError: if the layer has no defined input_shape. - RuntimeError: if called in Eager mode. - """ - return tf.nest.map_structure(backend.int_shape, self.input) - - @property - def input_spec(self): - if hasattr(self, '_manual_input_spec'): - return self._manual_input_spec - if (isinstance(self._nested_inputs, (dict, list, tuple)) and - len(self._nested_inputs) != len(self.inputs)): - # Case where we have a nested structure. - # In such a case we can't safely run any checks. - return None - if isinstance(self._nested_inputs, dict): - # Case where `_nested_inputs` is a plain dict of Inputs. - names = sorted(self._nested_inputs.keys()) - return [input_spec.InputSpec( - shape=shape_with_no_batch_size(self._nested_inputs[name]), - allow_last_axis_squeeze=True, name=name) for name in names] - else: - # Single input, or list / tuple of inputs. - # The data may be passed as a dict keyed by input name. - return [input_spec.InputSpec( - shape=shape_with_no_batch_size(x), allow_last_axis_squeeze=True, - name=x._keras_history.layer.name) for x in self.inputs] + depth_keys = list(self._nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + # Iterate over nodes, by depth level. + if len(depth_keys) > 1: + for depth in depth_keys: + nodes = self._nodes_by_depth[depth] + for node in nodes: + layer = node.layer + if layer in self._input_layers: + # We've already covered the input layers + # a few lines above. + continue + # Get the input shapes for the first argument of the node + layer_input_shapes = [] + layer_inputs = node.call_args[0] + for layer_input in tf.nest.flatten(layer_inputs): + kh = layer_input._keras_history + input_layer_key = kh.layer.name + "_%s_%s" % ( + kh.node_index, + kh.tensor_index, + ) + layer_input_shapes.append( + layers_to_output_shapes[input_layer_key] + ) + layer_input_shapes = tf.nest.pack_sequence_as( + layer_inputs, layer_input_shapes + ) + # Layers expect shapes to be tuples for + # `compute_output_shape`. + layer_input_shapes = tf_utils.convert_shapes( + layer_input_shapes, to_tuples=True + ) + layer_output_shapes = layer.compute_output_shape( + layer_input_shapes + ) + # Convert back to TensorShapes. + layer_output_shapes = tf_utils.convert_shapes( + layer_output_shapes, to_tuples=False + ) + + node_index = layer._inbound_nodes.index(node) + for j, shape in enumerate( + tf.nest.flatten(layer_output_shapes) + ): + shape_key = layer.name + f"_{node_index}_{j}" + layers_to_output_shapes[shape_key] = shape + + # Read final output shapes from layers_to_output_shapes. + output_shapes = [] + for i in range(len(self._output_layers)): + layer, node_index, tensor_index = self._output_coordinates[i] + shape_key = layer.name + f"_{node_index}_{tensor_index}" + output_shapes.append(layers_to_output_shapes[shape_key]) + output_shapes = tf.nest.pack_sequence_as( + self._nested_outputs, output_shapes + ) + # Store in cache. + self._output_shape_cache[cache_key] = output_shapes + + # Return shapes as TensorShapes. + return output_shapes + + def _init_set_name(self, name, zero_based=True): + if not name: + cls_name = self.__class__.__name__ + if self.__class__ == Functional: + # Hide the functional class name from user, since its not a + # public visible class. Use "Model" instead, + cls_name = "Model" + self._name = backend.unique_object_name( + generic_utils.to_snake_case(cls_name), zero_based=zero_based + ) + else: + self._name = name - @input_spec.setter - def input_spec(self, value): - self._manual_input_spec = value + def _run_internal_graph(self, inputs, training=None, mask=None): + """Computes output tensors for new inputs. - @property - def output(self): - """Retrieves the output tensor(s) of a layer. + # Note: + - Can be run on non-Keras tensors. - Only applicable if the layer has exactly one output, - i.e. if it is connected to one incoming layer. + Args: + inputs: Tensor or nested structure of Tensors. + training: Boolean learning phase. + mask: (Optional) Tensor or nested structure of Tensors. - Returns: - Output tensor or list of output tensors. + Returns: + output_tensors + """ + inputs = self._flatten_to_reference_inputs(inputs) + if mask is None: + masks = [None] * len(inputs) + else: + masks = self._flatten_to_reference_inputs(mask) + for input_t, mask in zip(inputs, masks): + input_t._keras_mask = mask + + # Dictionary mapping reference tensors to computed tensors. + tensor_dict = {} + tensor_usage_count = self._tensor_usage_count + for x, y in zip(self.inputs, inputs): + y = self._conform_to_reference_input(y, ref_input=x) + x_id = str(id(x)) + tensor_dict[x_id] = [y] * tensor_usage_count[x_id] + + nodes_by_depth = self._nodes_by_depth + depth_keys = list(nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + + for depth in depth_keys: + nodes = nodes_by_depth[depth] + for node in nodes: + if node.is_input: + continue # Input tensors already exist. + + if any(t_id not in tensor_dict for t_id in node.flat_input_ids): + continue # Node is not computable, try skipping. + + args, kwargs = node.map_arguments(tensor_dict) + outputs = node.layer(*args, **kwargs) + + # Update tensor_dict. + for x_id, y in zip( + node.flat_output_ids, tf.nest.flatten(outputs) + ): + tensor_dict[x_id] = [y] * tensor_usage_count[x_id] + + output_tensors = [] + for x in self.outputs: + x_id = str(id(x)) + assert x_id in tensor_dict, "Could not compute output " + str(x) + output_tensors.append(tensor_dict[x_id].pop()) + + return tf.nest.pack_sequence_as(self._nested_outputs, output_tensors) + + def _flatten_to_reference_inputs(self, tensors): + """Maps `tensors` to their respective `keras.Input`.""" + if self._enable_dict_to_input_mapping and isinstance(tensors, dict): + ref_inputs = self._nested_inputs + if not tf.nest.is_nested(ref_inputs): + ref_inputs = [self._nested_inputs] + if isinstance(ref_inputs, dict): + # In the case that the graph is constructed with dict input + # tensors, We will use the original dict key to map with the + # keys in the input data. Note that the model.inputs is using + # nest.flatten to process the input tensors, which means the + # dict input tensors are ordered by their keys. + ref_input_names = sorted(ref_inputs.keys()) + else: + ref_input_names = [ + inp._keras_history.layer.name for inp in ref_inputs + ] + + # Raise an warning if there are more input data comparing to input + # tensor + if len(tensors) > len(ref_input_names): + warnings.warn( + "Input dict contained keys {} which did not match any " + "model input. They will be ignored by the model.".format( + [n for n in tensors.keys() if n not in ref_input_names] + ), + stacklevel=2, + ) + + try: + # Flatten in the order `Input`s were passed during Model + # construction. + return [tensors[n] for n in ref_input_names] + except KeyError: + # TODO(b/151582614) + return tf.nest.flatten(tensors) + + # Otherwise both self.inputs and tensors will already be in same order. + return tf.nest.flatten(tensors) - Raises: - AttributeError: if the layer is connected to more than one incoming - layers. - RuntimeError: if called in Eager mode. - """ - return self._nested_outputs + def _conform_to_reference_input(self, tensor, ref_input): + """Set shape and dtype based on `keras.Input`s.""" + if isinstance(tensor, tf.Tensor): + # Allow (None,) and (None, 1) Tensors to be passed interchangeably. + # Use the shape specified by the `keras.Input`. + t_shape = tensor.shape + t_rank = t_shape.rank + ref_shape = ref_input.shape + ref_rank = ref_shape.rank + keras_history = getattr(tensor, "_keras_history", None) + if t_rank is not None and ref_rank is not None: + # Should squeeze last dimension. True if tensor is (BATCH, ..., + # 1) and reference is (BATCH, ...). + if t_rank == ref_rank + 1 and t_shape[-1] == 1: + tensor = tf.squeeze(tensor, axis=-1) + # Should expand last_dimension. True if tensor is (BATCH, ...) + # and reference is (BATCH, ..., 1). + elif t_rank == ref_rank - 1 and ref_shape[-1] == 1: + tensor = tf.expand_dims(tensor, axis=-1) + if keras_history is not None: # Restore keras history. + tensor._keras_history = keras_history + + # Dtype casting. + tensor = tf.cast(tensor, dtype=ref_input.dtype) + elif tf_utils.is_extension_type(tensor): + # Dtype casting (If the extension type has a non-variant dtype and + # supports being cast). Only cast if necessary (since some + # extension types may not implement tf.cast). + tensor_dtype = getattr(tensor, "dtype", None) + ref_input_dtype = getattr(ref_input, "dtype", None) + if ( + ref_input_dtype is not None + and tensor_dtype is not None + and tensor_dtype != ref_input_dtype + and ref_input_dtype != tf.variant + ): + tensor = tf.cast(tensor, dtype=ref_input_dtype) + + return tensor + + @generic_utils.default + def get_config(self): + # Prepare base arguments + config = { + "name": self.name, + "trainable": self.trainable, + } - @property - def output_shape(self): - """Retrieves the output shape(s) of a layer. + if saved_model_utils.in_tf_saved_model_scope(): + # SavedModel special case: need to preserve legacy (potentially + # incorrect) behavior. + return copy.deepcopy(get_network_config(self, config=config)) + + # Check whether the class has a constructor compatible with a Functional + # model or if it has a custom constructor. + if has_functional_like_constructor(self.__class__): + # Only return a Functional config if the constructor is the same + # as that of a Functional model. This excludes subclassed Functional + # models with a custom __init__. + config = copy.deepcopy(get_network_config(self, config=config)) + else: + # Try to autogenerate config + xtra_args = set(config.keys()) + if getattr(self, "_auto_get_config", False): + config.update(self._auto_config.config) + # Remove args non explicitly supported + argspec = tf_inspect.getfullargspec(self.__init__) + if argspec.varkw != "kwargs": + for key in xtra_args - xtra_args.intersection(argspec.args[1:]): + config.pop(key, None) + return config + + def get_weight_paths(self): + result = {} + for layer in self.layers: + ( + descendants, + object_paths_dict, + ) = tf.__internal__.tracking.ObjectGraphView( + layer + ).breadth_first_traversal() + for descendant in descendants: + if isinstance(descendant, tf.Variable): + trackable_references = object_paths_dict[descendant] + object_path = ".".join( + [t.name for t in trackable_references] + ) + result[layer.name + "." + object_path] = descendant + return result + + def _validate_graph_inputs_and_outputs(self): + """Validates the inputs and outputs of a Graph Network.""" + # Check for redundancy in inputs. + if len({id(i) for i in self.inputs}) != len(self.inputs): + raise ValueError( + "The list of inputs passed to the model " + "contains the same input multiple times. " + "All inputs should only appear once." + f"Received inputs={self.inputs}" + ) + + for x in self.inputs: + # Check that x has appropriate `_keras_history` metadata. + if not hasattr(x, "_keras_history"): + cls_name = self.__class__.__name__ + raise ValueError( + f"Input tensors to a {cls_name} model " + "must come from `tf.keras.Input`. " + f"Received inputs={x} (missing previous layer metadata)." + ) + # Check that x is an input tensor. + + layer = x._keras_history.layer + if len(layer._inbound_nodes) > 1 or ( + layer._inbound_nodes and not layer._inbound_nodes[0].is_input + ): + cls_name = self.__class__.__name__ + logging.warning( + f"{cls_name} model inputs must come from " + "`tf.keras.Input` (thus holding past layer metadata). " + "They cannot be the output of " + "a previous non-Input layer. " + "Here, a tensor specified as " + f'input to "{self.name}" was not an Input tensor, ' + f'it was generated by layer "{layer.name}".\n' + "Note that input tensors are " + "instantiated via `tensor = tf.keras.Input(shape)`.\n" + f"The tensor that caused the issue was: {x}" + ) + + # Check compatibility of batch sizes of Input Layers. + input_batch_sizes = set( + [ + training_utils.get_static_batch_size(x._keras_history.layer) + for x in self.inputs + ] + ) + input_batch_sizes.discard(None) + if len(input_batch_sizes) > 1: + logging.warning( + "Found incompatible static batch sizes among the " + f"inputs. Batch sizes: {sorted(input_batch_sizes)}" + ) + + for x in self.outputs: + if not hasattr(x, "_keras_history"): + cls_name = self.__class__.__name__ + raise ValueError( + f"Output tensors of a {cls_name} model must be " + "the output of a TensorFlow `Layer` " + f"(thus holding past layer metadata). Found: {x}" + ) + + def _insert_layers(self, layers, relevant_nodes=None): + """Inserts Layers into the Network after Network creation. + + This is only valid for Keras Graph Networks. Layers added via this + function will be included in the `call` computation and `get_config` of + this Network. They will not be added to the Network's outputs. + + Args: + layers: Arbitrary nested structure of Layers. Layers must be reachable + from one or more of the `keras.Input` Tensors that correspond to + this Network's inputs. + relevant_nodes: Nodes from the Layers that should be considered part + of this Network. If `None`, all Nodes will be considered part of + this Network. + + Raises: + ValueError: If the layers depend on `Input`s not found in this Model. + """ + layers = tf.nest.flatten(layers) + tf_utils.assert_no_legacy_layers(layers) + node_to_depth = {} + for depth, nodes in self._nodes_by_depth.items(): + node_to_depth.update({node: depth for node in nodes}) + # The nodes of these Layers that are relevant to this Network. If not + # provided, assume all Nodes are relevant + if not relevant_nodes: + relevant_nodes = tf.nest.flatten( + [layer._inbound_nodes for layer in layers] + ) + network_nodes = set(relevant_nodes + list(node_to_depth.keys())) + + def _get_min_depth(node): + """Gets the minimum depth at which node can be computed.""" + min_depth = 0 + for layer, node_id, _, _ in node.iterate_inbound(): + inbound_node = layer._inbound_nodes[node_id] + if inbound_node in node_to_depth: + min_depth = min(min_depth, node_to_depth[inbound_node]) + elif inbound_node not in network_nodes: + continue + else: + # Previous relevant nodes haven't been processed yet. + return None + # New node is one shallower than its shallowest input. + return min_depth - 1 + + # Insert nodes into `_nodes_by_depth` and other node attrs. + unprocessed_nodes = copy.copy(relevant_nodes) + i = 0 + while unprocessed_nodes: + i += 1 + # Do a sanity check. This can occur if `Input`s from outside this + # Model are being relied on. + if i > 10000: + raise ValueError( + "Layers could not be added due to missing dependencies." + ) + + node = unprocessed_nodes.pop(0) + depth = _get_min_depth(node) + if depth is None: # Defer until inbound nodes are processed. + unprocessed_nodes.append(node) + continue + node_key = _make_node_key( + node.layer.name, node.layer._inbound_nodes.index(node) + ) + if node_key not in self._network_nodes: + node_to_depth[node] = depth + self._network_nodes.add(node_key) + self._nodes_by_depth[depth].append(node) + + # Insert layers and update other layer attrs. + layer_set = set(self._self_tracked_trackables) + deferred_layers = [] + for layer in layers: + if layer not in layer_set: + self._self_tracked_trackables.append(layer) + deferred_layers.append(layer) + self._layer_call_argspecs[layer] = tf_inspect.getfullargspec( + layer.call + ) + layer_set.add(layer) + self._handle_deferred_layer_dependencies(deferred_layers) + + self._compute_tensor_usage_count() + + def _compute_tensor_usage_count(self): + """Compute the #. of tensor usages for all the output tensors of layers. + + The computed tensor usage count is saved as `self._tensor_usage_count`. + This is later used for saving memory in eager computation by releasing + no-longer-needed tensors as early as possible. + """ + tensor_usage_count = collections.Counter() + available_tensors = set(str(id(tensor)) for tensor in self.inputs) + + depth_keys = list(self._nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + depth_keys = depth_keys[1:] + + for depth in depth_keys: + for node in self._nodes_by_depth[depth]: + input_tensors = { + str(id(tensor)) + for tensor in tf.nest.flatten(node.keras_inputs) + } + if input_tensors.issubset(available_tensors): + for tensor in tf.nest.flatten(node.keras_inputs): + tensor_usage_count[str(id(tensor))] += 1 + + for output_tensor in tf.nest.flatten(node.outputs): + available_tensors.add(str(id(output_tensor))) + + for tensor in self.outputs: + tensor_usage_count[str(id(tensor))] += 1 - Only applicable if the layer has one output, - or if all outputs have the same shape. + self._tensor_usage_count = tensor_usage_count + + def _assert_weights_created(self): + # Override the implementation in Model. + # The Functional model should always have weight created already. + return + + def _graph_network_add_loss(self, symbolic_loss): + new_nodes, new_layers = _map_subgraph_network( + self.inputs, [symbolic_loss] + ) + # Losses must be keyed on inputs no matter what in order to be supported + # in DistributionStrategy. + add_loss_layer = base_layer.AddLoss( + unconditional=False, dtype=symbolic_loss.dtype + ) + add_loss_layer(symbolic_loss) + new_nodes.extend(add_loss_layer.inbound_nodes) + new_layers.append(add_loss_layer) + self._insert_layers(new_layers, new_nodes) + + def _graph_network_add_metric(self, value, aggregation, name): + new_nodes, new_layers = _map_subgraph_network(self.inputs, [value]) + add_metric_layer = base_layer.AddMetric( + aggregation, name, dtype=value.dtype + ) + add_metric_layer(value) + new_nodes.extend(add_metric_layer.inbound_nodes) + new_layers.append(add_metric_layer) + self._insert_layers(new_layers, new_nodes) + + @property + def _trackable_saved_model_saver(self): + return network_serialization.NetworkSavedModelSaver(self) + + def _get_save_spec(self, dynamic_batch=True, inputs_only=True): + if getattr(self, "_has_explicit_input_shape", True): + # Functional models and Sequential models that have an explicit + # input shape should use the batch size set by the input layer. + dynamic_batch = False + return super()._get_save_spec(dynamic_batch, inputs_only) - Returns: - Output shape, as an integer shape tuple - (or list of shape tuples, one tuple per output tensor). - Raises: - AttributeError: if the layer has no defined output shape. - RuntimeError: if called in Eager mode. - """ - return tf.nest.map_structure(backend.int_shape, self.output) +def _make_node_key(layer_name, node_index): + return layer_name + "_ib-" + str(node_index) - def _set_output_names(self): - """Assigns unique names to the Network's outputs. - Output layers with multiple output tensors would otherwise lead to duplicate - names in self.output_names. - """ - uniquified = [] - output_names = set() - prefix_count = {} - for layer in self._output_layers: - proposal = layer.name - while proposal in output_names: - existing_count = prefix_count.get(layer.name, 1) - proposal = '{}_{}'.format(layer.name, existing_count) - prefix_count[layer.name] = existing_count + 1 - output_names.add(proposal) - uniquified.append(proposal) - self.output_names = uniquified - - @property - def _layer_checkpoint_dependencies(self): - """Dictionary of layer dependencies to be included in the checkpoint.""" - weight_layer_index = 0 - - dependencies = collections.OrderedDict() - for layer_index, layer in enumerate(self.layers): - try: - if layer.weights: - # Keep a separate index for layers which have weights. This allows - # users to insert Layers without weights anywhere in the network - # without breaking checkpoints. - dependencies['layer_with_weights-%d' % weight_layer_index] = layer - weight_layer_index += 1 - except ValueError: - # The layer might have weights, but may not be built yet. We just treat - # it as layer without weight. - pass - - # Even if it doesn't have weights, we should still track everything in - # case it has/will have Trackable dependencies. - dependencies['layer-%d' % layer_index] = layer - return dependencies - - def _trackable_children(self, save_type='checkpoint', **kwargs): - dependencies = self._layer_checkpoint_dependencies - dependencies.update( - super()._trackable_children(save_type, **kwargs)) - return dependencies - - def _lookup_dependency(self, name): - layer_dependencies = self._layer_checkpoint_dependencies - if name in layer_dependencies: - return layer_dependencies[name] - return super()._lookup_dependency(name) - - def _handle_deferred_layer_dependencies(self, layers): - """Handles layer checkpoint dependencies that are added after init.""" - layer_checkpoint_dependencies = self._layer_checkpoint_dependencies - layer_to_name = {v: k for k, v in layer_checkpoint_dependencies.items()} - for layer in layers: - if layer in layer_to_name: - self._handle_deferred_dependencies(name=layer_to_name[layer], - trackable=layer) - - @property - def _should_compute_mask(self): - return True - - def compute_mask(self, inputs, mask): - # TODO(omalleyt): b/123540974 This function is not really safe to call - # by itself because it will duplicate any updates and losses in graph - # mode by `call`ing the Layers again. - output_tensors = self._run_internal_graph(inputs, mask=mask) - return tf.nest.map_structure(lambda t: getattr(t, '_keras_mask', None), - output_tensors) - - @doc_controls.do_not_doc_inheritable - def call(self, inputs, training=None, mask=None): - """Calls the model on new inputs. - - In this case `call` just reapplies - all ops in the graph to the new inputs - (e.g. build a new computational graph from the provided inputs). +def _map_graph_network(inputs, outputs): + """Validates a network's topology and gather its layers and nodes. Args: - inputs: A tensor or list of tensors. - training: Boolean or boolean scalar tensor, indicating whether to run - the `Network` in training mode or inference mode. - mask: A mask or list of masks. A mask can be - either a tensor or None (no mask). + inputs: List of input tensors. + outputs: List of outputs tensors. Returns: - A tensor if there is a single output, or - a list of tensors if there are more than one outputs. + A tuple `(nodes, nodes_by_depth, layers, layers_by_depth)`. + - nodes: list of Node instances. + - nodes_by_depth: dict mapping ints (depth) to lists of node instances. + - layers: list of Layer instances. + - layers_by_depth: dict mapping ints (depth) to lists of layer instances. + + Raises: + ValueError: In case the network is not valid (e.g. disconnected graph). """ - return self._run_internal_graph( - inputs, training=training, mask=mask) - - def compute_output_shape(self, input_shape): - # Convert any shapes in tuple format to TensorShapes. - input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - - if (len(tf.nest.flatten(input_shape)) != - len(tf.nest.flatten(self._input_layers))): - raise ValueError(f'Invalid `input_shape` argument {input_shape}: ' - f'the model expects {len(self._input_layers)} ' - 'input tensors.') - - # Use the tuple of TensorShape as the cache key, since tuple is hashable - # and can be used as hash key. - try: - cache_key = tuple(tf_utils.convert_shapes(input_shape, to_tuples=True)) - if cache_key in self._output_shape_cache: - # Cache hit. Return shapes as TensorShapes. - return self._output_shape_cache[cache_key] - except ValueError: - # In case there are unknown TensorShape, eg for sparse tensor input, - # We skip the caching since the shape is unknown. - pass - - layers_to_output_shapes = {} - for layer, shape in zip(self._input_layers, tf.nest.flatten(input_shape)): - # It's an input layer: then `compute_output_shape` is identity, - # and there is only one node and one tensor.. - shape_key = layer.name + '_0_0' - layers_to_output_shapes[shape_key] = shape - - depth_keys = list(self._nodes_by_depth.keys()) + # "depth" is number of layers between output Node and the Node. + # Nodes are ordered from inputs -> outputs. + nodes_in_decreasing_depth, layer_indices = _build_map(outputs) + network_nodes = { + _make_node_key(node.layer.name, node.layer._inbound_nodes.index(node)) + for node in nodes_in_decreasing_depth + } + + nodes_depths = {} # dict {node: depth value} + layers_depths = {} # dict {layer: depth value} + + for node in reversed(nodes_in_decreasing_depth): + # If the depth is not set, the node has no outbound nodes (depth 0). + depth = nodes_depths.setdefault(node, 0) + + # Update the depth of the corresponding layer + previous_depth = layers_depths.get(node.layer, 0) + # If we've seen this layer before at a higher depth, + # we should use that depth instead of the node depth. + # This is necessary for shared layers that have inputs at different + # depth levels in the graph. + depth = max(depth, previous_depth) + layers_depths[node.layer] = depth + nodes_depths[node] = depth + + # Update the depth of inbound nodes. + # The "depth" of a node is the max of the depths + # of all nodes it is connected to + 1. + for node_dep in node.parent_nodes: + previous_depth = nodes_depths.get(node_dep, 0) + nodes_depths[node_dep] = max(depth + 1, previous_depth) + + # Handle inputs that are not connected to outputs. + # We do not error out here because the inputs may be used to compute losses + # and metrics. + for input_t in inputs: + input_layer = input_t._keras_history[0] + if input_layer not in layers_depths: + layers_depths[input_layer] = 0 + layer_indices[input_layer] = -1 + nodes_depths[input_layer._inbound_nodes[0]] = 0 + network_nodes.add(_make_node_key(input_layer.name, 0)) + + # Build a dict {depth: list of nodes with this depth} + nodes_by_depth = collections.defaultdict(list) + for node, depth in nodes_depths.items(): + nodes_by_depth[depth].append(node) + + # Build a dict {depth: list of layers with this depth} + layers_by_depth = collections.defaultdict(list) + for layer, depth in layers_depths.items(): + layers_by_depth[depth].append(layer) + + # Get sorted list of layer depths. + depth_keys = list(layers_by_depth.keys()) depth_keys.sort(reverse=True) - # Iterate over nodes, by depth level. - if len(depth_keys) > 1: - for depth in depth_keys: - nodes = self._nodes_by_depth[depth] - for node in nodes: - layer = node.layer - if layer in self._input_layers: - # We've already covered the input layers - # a few lines above. - continue - # Get the input shapes for the first argument of the node - layer_input_shapes = [] - layer_inputs = node.call_args[0] - for layer_input in tf.nest.flatten(layer_inputs): - kh = layer_input._keras_history - input_layer_key = kh.layer.name + '_%s_%s' % (kh.node_index, - kh.tensor_index) - layer_input_shapes.append(layers_to_output_shapes[input_layer_key]) - layer_input_shapes = tf.nest.pack_sequence_as(layer_inputs, - layer_input_shapes) - # Layers expect shapes to be tuples for `compute_output_shape`. - layer_input_shapes = tf_utils.convert_shapes( - layer_input_shapes, to_tuples=True) - layer_output_shapes = layer.compute_output_shape(layer_input_shapes) - # Convert back to TensorShapes. - layer_output_shapes = tf_utils.convert_shapes( - layer_output_shapes, to_tuples=False) - - node_index = layer._inbound_nodes.index(node) # pylint: disable=protected-access - for j, shape in enumerate(tf.nest.flatten(layer_output_shapes)): - shape_key = layer.name + '_%s_%s' % (node_index, j) - layers_to_output_shapes[shape_key] = shape - - # Read final output shapes from layers_to_output_shapes. - output_shapes = [] - for i in range(len(self._output_layers)): - layer, node_index, tensor_index = self._output_coordinates[i] - shape_key = layer.name + '_%s_%s' % (node_index, tensor_index) - output_shapes.append(layers_to_output_shapes[shape_key]) - output_shapes = tf.nest.pack_sequence_as(self._nested_outputs, - output_shapes) - # Store in cache. - self._output_shape_cache[cache_key] = output_shapes - - # Return shapes as TensorShapes. - return output_shapes - - def _init_set_name(self, name, zero_based=True): - if not name: - cls_name = self.__class__.__name__ - if self.__class__ == Functional: - # Hide the functional class name from user, since its not a public - # visible class. Use "Model" instead, - cls_name = 'Model' - self._name = backend.unique_object_name( - generic_utils.to_snake_case(cls_name), - zero_based=zero_based) - else: - self._name = name - def _run_internal_graph(self, inputs, training=None, mask=None): - """Computes output tensors for new inputs. - - # Note: - - Can be run on non-Keras tensors. - - Args: - inputs: Tensor or nested structure of Tensors. - training: Boolean learning phase. - mask: (Optional) Tensor or nested structure of Tensors. + # Set self.layers ordered by depth. + layers = [] + for depth in depth_keys: + layers_for_depth = layers_by_depth[depth] + # Network.layers needs to have a deterministic order: + # here we order them by traversal order. + layers_for_depth.sort(key=lambda x: layer_indices[x]) + layers.extend(layers_for_depth) - Returns: - output_tensors - """ - inputs = self._flatten_to_reference_inputs(inputs) - if mask is None: - masks = [None] * len(inputs) - else: - masks = self._flatten_to_reference_inputs(mask) - for input_t, mask in zip(inputs, masks): - input_t._keras_mask = mask - - # Dictionary mapping reference tensors to computed tensors. - tensor_dict = {} - tensor_usage_count = self._tensor_usage_count - for x, y in zip(self.inputs, inputs): - y = self._conform_to_reference_input(y, ref_input=x) - x_id = str(id(x)) - tensor_dict[x_id] = [y] * tensor_usage_count[x_id] - - nodes_by_depth = self._nodes_by_depth + # Get sorted list of node depths. depth_keys = list(nodes_by_depth.keys()) depth_keys.sort(reverse=True) - for depth in depth_keys: - nodes = nodes_by_depth[depth] - for node in nodes: - if node.is_input: - continue # Input tensors already exist. - - if any(t_id not in tensor_dict for t_id in node.flat_input_ids): - continue # Node is not computable, try skipping. + # Check that all tensors required are computable. + # computable_tensors: all tensors in the graph + # that can be computed from the inputs provided. + computable_tensors = set() + for x in inputs: + computable_tensors.add(id(x)) - args, kwargs = node.map_arguments(tensor_dict) - outputs = node.layer(*args, **kwargs) + layers_with_complete_input = [] # To provide a better error msg. + for depth in depth_keys: + for node in nodes_by_depth[depth]: + layer = node.layer + if layer and not node.is_input: + for x in tf.nest.flatten(node.keras_inputs): + if id(x) not in computable_tensors: + raise ValueError( + "Graph disconnected: cannot obtain value for " + f'tensor {x} at layer "{layer.name}". ' + "The following previous layers were accessed " + f"without issue: {layers_with_complete_input}" + ) + for x in tf.nest.flatten(node.outputs): + computable_tensors.add(id(x)) + layers_with_complete_input.append(layer.name) + + # Ensure name unicity, which will be crucial for serialization + # (since serialized nodes refer to layers by their name). + all_names = [layer.name for layer in layers] + for name in all_names: + if all_names.count(name) != 1: + raise ValueError( + f'The name "{name}" is used {all_names.count(name)} ' + "times in the model. All layer names should be unique." + ) + return network_nodes, nodes_by_depth, layers, layers_by_depth - # Update tensor_dict. - for x_id, y in zip(node.flat_output_ids, tf.nest.flatten(outputs)): - tensor_dict[x_id] = [y] * tensor_usage_count[x_id] - output_tensors = [] - for x in self.outputs: - x_id = str(id(x)) - assert x_id in tensor_dict, 'Could not compute output ' + str(x) - output_tensors.append(tensor_dict[x_id].pop()) - - return tf.nest.pack_sequence_as(self._nested_outputs, output_tensors) - - def _flatten_to_reference_inputs(self, tensors): - """Maps `tensors` to their respective `keras.Input`.""" - if self._enable_dict_to_input_mapping and isinstance(tensors, dict): - ref_inputs = self._nested_inputs - if not tf.nest.is_nested(ref_inputs): - ref_inputs = [self._nested_inputs] - if isinstance(ref_inputs, dict): - # In the case that the graph is constructed with dict input tensors, - # We will use the original dict key to map with the keys in the input - # data. Note that the model.inputs is using nest.flatten to process the - # input tensors, which means the dict input tensors are ordered by their - # keys. - ref_input_names = sorted(ref_inputs.keys()) - else: - ref_input_names = [inp._keras_history.layer.name for inp in ref_inputs] - - # Raise an warning if there are more input data comparing to input tensor - if len(tensors) > len(ref_input_names): - warnings.warn( - 'Input dict contained keys {} which did not match any model input. ' - 'They will be ignored by the model.'.format( - [n for n in tensors.keys() if n not in ref_input_names]), - stacklevel=2) - - try: - # Flatten in the order `Input`s were passed during Model construction. - return [tensors[n] for n in ref_input_names] - except KeyError: - # TODO(b/151582614) - return tf.nest.flatten(tensors) +def _build_map(outputs): + """This method topologically sorts nodes in order from inputs to outputs. - # Otherwise both self.inputs and tensors will already be in same order. - return tf.nest.flatten(tensors) - - def _conform_to_reference_input(self, tensor, ref_input): - """Set shape and dtype based on `keras.Input`s.""" - if isinstance(tensor, tf.Tensor): - # Allow (None,) and (None, 1) Tensors to be passed interchangeably. Use - # the shape specified by the `keras.Input`. - t_shape = tensor.shape - t_rank = t_shape.rank - ref_shape = ref_input.shape - ref_rank = ref_shape.rank - keras_history = getattr(tensor, '_keras_history', None) - if t_rank is not None and ref_rank is not None: - # Should squeeze last dimension. - # True if tensor is (BATCH, ..., 1) and reference is (BATCH, ...). - if (t_rank == ref_rank + 1 and t_shape[-1] == 1): - tensor = tf.squeeze(tensor, axis=-1) - # Should expand last_dimension. - # True if tensor is (BATCH, ...) and reference is (BATCH, ..., 1). - elif (t_rank == ref_rank - 1 and ref_shape[-1] == 1): - tensor = tf.expand_dims(tensor, axis=-1) - if keras_history is not None: # Restore keras history. - tensor._keras_history = keras_history - - # Add shape hints to Tensors that may have None shape dims but have shapes - # defined by the `keras.Input` (not applicable in eager mode). - if not tf.executing_eagerly(): - try: - tensor.set_shape(tensor.shape.merge_with(ref_input.shape)) - except ValueError: - logging.warning( - 'Model was constructed with shape {} for input {}, but it was ' - 'called on an input with incompatible shape {}.'.format( - ref_input.shape, ref_input, tensor.shape)) - - # Dtype casting. - tensor = tf.cast(tensor, dtype=ref_input.dtype) - elif tf_utils.is_extension_type(tensor): - # Dtype casting (If the extension type has a non-variant dtype and - # supports being cast). Only cast if necessary (since some extension - # types may not implement tf.cast). - tensor_dtype = getattr(tensor, 'dtype', None) - ref_input_dtype = getattr(ref_input, 'dtype', None) - if (ref_input_dtype is not None and tensor_dtype is not None and - tensor_dtype != ref_input_dtype and ref_input_dtype != tf.variant): - tensor = tf.cast(tensor, dtype=ref_input_dtype) - - return tensor - - def get_config(self): - return copy.deepcopy(get_network_config(self)) - - def _validate_graph_inputs_and_outputs(self): - """Validates the inputs and outputs of a Graph Network.""" - # Check for redundancy in inputs. - if len({id(i) for i in self.inputs}) != len(self.inputs): - raise ValueError('The list of inputs passed to the model ' - 'contains the same input multiple times. ' - 'All inputs should only appear once.' - f'Received inputs={self.inputs}') - - for x in self.inputs: - # Check that x has appropriate `_keras_history` metadata. - if not hasattr(x, '_keras_history'): - cls_name = self.__class__.__name__ - raise ValueError( - f'Input tensors to a {cls_name} model ' - 'must come from `tf.keras.Input`. ' - f'Received inputs={x} (missing previous layer metadata).') - # Check that x is an input tensor. - # pylint: disable=protected-access - layer = x._keras_history.layer - if len(layer._inbound_nodes) > 1 or ( - layer._inbound_nodes and not layer._inbound_nodes[0].is_input): - cls_name = self.__class__.__name__ - logging.warning(f'{cls_name} model inputs must come from ' - '`tf.keras.Input` (thus holding past layer metadata). ' - 'They cannot be the output of ' - 'a previous non-Input layer. ' - 'Here, a tensor specified as ' - f'input to "{self.name}" was not an Input tensor, ' - f'it was generated by layer "{layer.name}".\n' - 'Note that input tensors are ' - 'instantiated via `tensor = tf.keras.Input(shape)`.\n' - f'The tensor that caused the issue was: {x}') - - # Check compatibility of batch sizes of Input Layers. - input_batch_sizes = set([ - training_utils.get_static_batch_size(x._keras_history.layer) - for x in self.inputs]) - input_batch_sizes.discard(None) - if len(input_batch_sizes) > 1: - logging.warning('Found incompatible static batch sizes among the ' - f'inputs. Batch sizes: {sorted(input_batch_sizes)}') - - for x in self.outputs: - if not hasattr(x, '_keras_history'): - cls_name = self.__class__.__name__ - raise ValueError(f'Output tensors of a {cls_name} model must be ' - 'the output of a TensorFlow `Layer` ' - f'(thus holding past layer metadata). Found: {x}') - - def _insert_layers(self, layers, relevant_nodes=None): - """Inserts Layers into the Network after Network creation. - - This is only valid for Keras Graph Networks. Layers added via this function - will be included in the `call` computation and `get_config` of this Network. - They will not be added to the Network's outputs. + It uses a depth-first search to topologically sort nodes that appear in the + _keras_history connectivity metadata of `outputs`. Args: - layers: Arbitrary nested structure of Layers. Layers must be reachable - from one or more of the `keras.Input` Tensors that correspond to this - Network's inputs. - relevant_nodes: Nodes from the Layers that should be considered part of - this Network. If `None`, all Nodes will be considered part of this - Network. + outputs: the output tensors whose _keras_history metadata should be + walked. This may be an arbitrary nested structure. - Raises: - ValueError: If the layers depend on `Input`s not found in this Model. - """ - layers = tf.nest.flatten(layers) - tf_utils.assert_no_legacy_layers(layers) - node_to_depth = {} - for depth, nodes in self._nodes_by_depth.items(): - node_to_depth.update({node: depth for node in nodes}) - # The nodes of these Layers that are relevant to this Network. If not - # provided, assume all Nodes are relevant - if not relevant_nodes: - relevant_nodes = tf.nest.flatten( - [layer._inbound_nodes for layer in layers]) - network_nodes = set(relevant_nodes + list(node_to_depth.keys())) - - def _get_min_depth(node): - """Gets the minimum depth at which node can be computed.""" - min_depth = 0 - for layer, node_id, _, _ in node.iterate_inbound(): - inbound_node = layer._inbound_nodes[node_id] - if inbound_node in node_to_depth: - min_depth = min(min_depth, node_to_depth[inbound_node]) - elif inbound_node not in network_nodes: - continue - else: - # Previous relevant nodes haven't been processed yet. - return None - # New node is one shallower than its shallowest input. - return min_depth - 1 - - # Insert nodes into `_nodes_by_depth` and other node attrs. - unprocessed_nodes = copy.copy(relevant_nodes) - i = 0 - while unprocessed_nodes: - i += 1 - # Do a sanity check. This can occur if `Input`s from outside this Model - # are being relied on. - if i > 10000: - raise ValueError('Layers could not be added due to missing ' - 'dependencies.') - - node = unprocessed_nodes.pop(0) - depth = _get_min_depth(node) - if depth is None: # Defer until inbound nodes are processed. - unprocessed_nodes.append(node) - continue - node_key = _make_node_key(node.layer.name, - node.layer._inbound_nodes.index(node)) - if node_key not in self._network_nodes: - node_to_depth[node] = depth - self._network_nodes.add(node_key) - self._nodes_by_depth[depth].append(node) - - # Insert layers and update other layer attrs. - layer_set = set(self._self_tracked_trackables) - deferred_layers = [] - for layer in layers: - if layer not in layer_set: - self._self_tracked_trackables.append(layer) - deferred_layers.append(layer) - self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call) - layer_set.add(layer) - self._handle_deferred_layer_dependencies(deferred_layers) - - self._compute_tensor_usage_count() - - def _compute_tensor_usage_count(self): - """Compute the #. of tensor usages for all the output tensors of layers. - - The computed tensor usage count is saved as `self._tensor_usage_count`. This - is later used for saving memory in eager computation by releasing - no-longer-needed tensors as early as possible. + Returns: + A tuple like (ordered_nodes, layer_to_first_traversal_index) + ordered_nodes: list of nodes appearing in the keras history, topologically + sorted from original inputs to the `outputs`. + (If outputs have different sets of ancestors, the inputs to one output + may appear after a different output). + layer_to_first_traversal_index: + A dict mapping layer to the traversal index in the DFS where it is + seen. Note: if a layer is shared by several nodes, the dict will only + store the index corresponding to the *first* time the layer seen. """ - tensor_usage_count = collections.Counter() - available_tensors = set(str(id(tensor)) for tensor in self.inputs) - - depth_keys = list(self._nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - depth_keys = depth_keys[1:] - - for depth in depth_keys: - for node in self._nodes_by_depth[depth]: - input_tensors = { - str(id(tensor)) for tensor in tf.nest.flatten(node.keras_inputs) - } - if input_tensors.issubset(available_tensors): - for tensor in tf.nest.flatten(node.keras_inputs): - tensor_usage_count[str(id(tensor))] += 1 - - for output_tensor in tf.nest.flatten(node.outputs): - available_tensors.add(str(id(output_tensor))) - - for tensor in self.outputs: - tensor_usage_count[str(id(tensor))] += 1 - - self._tensor_usage_count = tensor_usage_count - - def _assert_weights_created(self): - # Override the implementation in Model. - # The Functional model should always have weight created already. - return - - def _graph_network_add_loss(self, symbolic_loss): - new_nodes, new_layers = _map_subgraph_network(self.inputs, [symbolic_loss]) - # Losses must be keyed on inputs no matter what in order to be supported in - # DistributionStrategy. - add_loss_layer = base_layer.AddLoss( - unconditional=False, dtype=symbolic_loss.dtype) - add_loss_layer(symbolic_loss) - new_nodes.extend(add_loss_layer.inbound_nodes) - new_layers.append(add_loss_layer) - self._insert_layers(new_layers, new_nodes) - - def _graph_network_add_metric(self, value, aggregation, name): - new_nodes, new_layers = _map_subgraph_network(self.inputs, [value]) - add_metric_layer = base_layer.AddMetric( - aggregation, name, dtype=value.dtype) - add_metric_layer(value) - new_nodes.extend(add_metric_layer.inbound_nodes) - new_layers.append(add_metric_layer) - self._insert_layers(new_layers, new_nodes) - - @property - def _trackable_saved_model_saver(self): - return network_serialization.NetworkSavedModelSaver(self) - - def _get_save_spec(self, dynamic_batch=True, inputs_only=True): - if getattr(self, '_has_explicit_input_shape', True): - # Functional models and Sequential models that have an explicit input - # shape should use the batch size set by the input layer. - dynamic_batch = False - return super()._get_save_spec(dynamic_batch, inputs_only) - - -def _make_node_key(layer_name, node_index): - return layer_name + '_ib-' + str(node_index) - - -def _map_graph_network(inputs, outputs): - """Validates a network's topology and gather its layers and nodes. - - Args: - inputs: List of input tensors. - outputs: List of outputs tensors. - - Returns: - A tuple `(nodes, nodes_by_depth, layers, layers_by_depth)`. - - nodes: list of Node instances. - - nodes_by_depth: dict mapping ints (depth) to lists of node instances. - - layers: list of Layer instances. - - layers_by_depth: dict mapping ints (depth) to lists of layer instances. - - Raises: - ValueError: In case the network is not valid (e.g. disconnected graph). - """ - # "depth" is number of layers between output Node and the Node. - # Nodes are ordered from inputs -> outputs. - nodes_in_decreasing_depth, layer_indices = _build_map(outputs) - network_nodes = { - _make_node_key(node.layer.name, node.layer._inbound_nodes.index(node)) - for node in nodes_in_decreasing_depth - } - - nodes_depths = {} # dict {node: depth value} - layers_depths = {} # dict {layer: depth value} - - for node in reversed(nodes_in_decreasing_depth): - # If the depth is not set, the node has no outbound nodes (depth 0). - depth = nodes_depths.setdefault(node, 0) - - # Update the depth of the corresponding layer - previous_depth = layers_depths.get(node.layer, 0) - # If we've seen this layer before at a higher depth, - # we should use that depth instead of the node depth. - # This is necessary for shared layers that have inputs at different - # depth levels in the graph. - depth = max(depth, previous_depth) - layers_depths[node.layer] = depth - nodes_depths[node] = depth - - # Update the depth of inbound nodes. - # The "depth" of a node is the max of the depths - # of all nodes it is connected to + 1. - for node_dep in node.parent_nodes: - previous_depth = nodes_depths.get(node_dep, 0) - nodes_depths[node_dep] = max(depth + 1, previous_depth) - - # Handle inputs that are not connected to outputs. - # We do not error out here because the inputs may be used to compute losses - # and metrics. - for input_t in inputs: - input_layer = input_t._keras_history[0] - if input_layer not in layers_depths: - layers_depths[input_layer] = 0 - layer_indices[input_layer] = -1 - nodes_depths[input_layer._inbound_nodes[0]] = 0 - network_nodes.add(_make_node_key(input_layer.name, 0)) - - # Build a dict {depth: list of nodes with this depth} - nodes_by_depth = collections.defaultdict(list) - for node, depth in nodes_depths.items(): - nodes_by_depth[depth].append(node) - - # Build a dict {depth: list of layers with this depth} - layers_by_depth = collections.defaultdict(list) - for layer, depth in layers_depths.items(): - layers_by_depth[depth].append(layer) - - # Get sorted list of layer depths. - depth_keys = list(layers_by_depth.keys()) - depth_keys.sort(reverse=True) - - # Set self.layers ordered by depth. - layers = [] - for depth in depth_keys: - layers_for_depth = layers_by_depth[depth] - # Network.layers needs to have a deterministic order: - # here we order them by traversal order. - layers_for_depth.sort(key=lambda x: layer_indices[x]) - layers.extend(layers_for_depth) - - # Get sorted list of node depths. - depth_keys = list(nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - - # Check that all tensors required are computable. - # computable_tensors: all tensors in the graph - # that can be computed from the inputs provided. - computable_tensors = set() - for x in inputs: - computable_tensors.add(id(x)) - - layers_with_complete_input = [] # To provide a better error msg. - for depth in depth_keys: - for node in nodes_by_depth[depth]: - layer = node.layer - if layer and not node.is_input: - for x in tf.nest.flatten(node.keras_inputs): - if id(x) not in computable_tensors: - raise ValueError( - f'Graph disconnected: cannot obtain value for tensor {x} ' - f'at layer "{layer.name}". The following previous layers ' - f'were accessed without issue: {layers_with_complete_input}') - for x in tf.nest.flatten(node.outputs): - computable_tensors.add(id(x)) - layers_with_complete_input.append(layer.name) - - # Ensure name unicity, which will be crucial for serialization - # (since serialized nodes refer to layers by their name). - all_names = [layer.name for layer in layers] - for name in all_names: - if all_names.count(name) != 1: - raise ValueError( - f'The name "{name}" is used {all_names.count(name)} ' - 'times in the model. All layer names should be unique.') - return network_nodes, nodes_by_depth, layers, layers_by_depth - - -def _build_map(outputs): - """This method topologically sorts nodes in order from inputs to outputs. - - It uses a depth-first search to topologically sort nodes that appear in the - _keras_history connectivity metadata of `outputs`. - - Args: - outputs: the output tensors whose _keras_history metadata should be walked. - This may be an arbitrary nested structure. - - Returns: - A tuple like (ordered_nodes, layer_to_first_traversal_index) - ordered_nodes: list of nodes appearing in the keras history, topologically - sorted from original inputs to the `outputs`. - (If outputs have different sets of ancestors, the inputs to one output - may appear after a different output). - layer_to_first_traversal_index: - A dict mapping layer to the traversal index in the DFS where it is - seen. Note: if a layer is shared by several nodes, the dict will only - store the index corresponding to the *first* time the layer seen. - """ - finished_nodes = set() - nodes_in_progress = set() - nodes_in_decreasing_depth = [] # nodes from inputs -> outputs. - layer_indices = {} # layer -> in traversal order. - for output in tf.nest.flatten(outputs): - _build_map_helper(output, finished_nodes, nodes_in_progress, - nodes_in_decreasing_depth, layer_indices) - return nodes_in_decreasing_depth, layer_indices - - -def _build_map_helper(tensor, finished_nodes, nodes_in_progress, - nodes_in_decreasing_depth, layer_indices): - """Recursive helper for `_build_map`.""" - layer, node_index, _ = tensor._keras_history # pylint: disable=protected-access - node = layer._inbound_nodes[node_index] # pylint: disable=protected-access - - # Don't repeat work for shared subgraphs - if node in finished_nodes: - return - - # Prevent cycles. - if node in nodes_in_progress: - raise ValueError(f'Tensor {tensor} from layer "{layer.name}" ' - 'is part of a cycle.') - - # Store the traversal order for layer sorting. - if layer not in layer_indices: - layer_indices[layer] = len(layer_indices) - - # Propagate to all previous tensors connected to this node. - nodes_in_progress.add(node) - if not node.is_input: - for tensor in node.keras_inputs: - _build_map_helper(tensor, finished_nodes, nodes_in_progress, - nodes_in_decreasing_depth, layer_indices) - - finished_nodes.add(node) - nodes_in_progress.remove(node) - nodes_in_decreasing_depth.append(node) + finished_nodes = set() + nodes_in_progress = set() + nodes_in_decreasing_depth = [] # nodes from inputs -> outputs. + layer_indices = {} # layer -> in traversal order. + for output in tf.nest.flatten(outputs): + _build_map_helper( + output, + finished_nodes, + nodes_in_progress, + nodes_in_decreasing_depth, + layer_indices, + ) + return nodes_in_decreasing_depth, layer_indices + + +def _build_map_helper( + tensor, + finished_nodes, + nodes_in_progress, + nodes_in_decreasing_depth, + layer_indices, +): + """Recursive helper for `_build_map`.""" + ( + layer, + node_index, + _, + ) = tensor._keras_history + node = layer._inbound_nodes[node_index] + + # Don't repeat work for shared subgraphs + if node in finished_nodes: + return + + # Prevent cycles. + if node in nodes_in_progress: + raise ValueError( + f'Tensor {tensor} from layer "{layer.name}" is part of a cycle.' + ) + + # Store the traversal order for layer sorting. + if layer not in layer_indices: + layer_indices[layer] = len(layer_indices) + + # Propagate to all previous tensors connected to this node. + nodes_in_progress.add(node) + if not node.is_input: + for tensor in node.keras_inputs: + _build_map_helper( + tensor, + finished_nodes, + nodes_in_progress, + nodes_in_decreasing_depth, + layer_indices, + ) + + finished_nodes.add(node) + nodes_in_progress.remove(node) + nodes_in_decreasing_depth.append(node) def _map_subgraph_network(inputs, outputs): - """Returns the nodes and layers in the topology from `inputs` to `outputs`. + """Returns the nodes and layers in the topology from `inputs` to `outputs`. - Args: - inputs: List of input tensors. - outputs: List of output tensors. + Args: + inputs: List of input tensors. + outputs: List of output tensors. - Returns: - A tuple of List{Node] and List[Layer]. - """ - if not tf.compat.v1.executing_eagerly_outside_functions(): - base_layer_utils.create_keras_history(outputs) - # Keep only nodes and layers in the topology between inputs and outputs. - _, nodes_by_depth, layers, _ = _map_graph_network(inputs, outputs) - return tf.nest.flatten([nodes for nodes in nodes_by_depth.values()]), layers + Returns: + A tuple of List{Node] and List[Layer]. + """ + if not tf.compat.v1.executing_eagerly_outside_functions(): + base_layer_utils.create_keras_history(outputs) + # Keep only nodes and layers in the topology between inputs and outputs. + _, nodes_by_depth, layers, _ = _map_graph_network(inputs, outputs) + return tf.nest.flatten([nodes for nodes in nodes_by_depth.values()]), layers def _should_skip_first_node(layer): - """Returns True if the first layer node should not be saved or loaded.""" - # Networks that are constructed with an Input layer/shape start with a - # pre-existing node linking their input to output. This node is excluded from - # the network config. - if layer._self_tracked_trackables: - return (isinstance(layer, Functional) and + """Returns True if the first layer node should not be saved or loaded.""" + # Networks that are constructed with an Input layer/shape start with a + # pre-existing node linking their input to output. This node is excluded + # from the network config. + if not hasattr(layer, "_self_tracked_trackables"): + # Special case for serialization of Functional models without + # defined input shape argument. + return isinstance(layer, Functional) + if layer._self_tracked_trackables: + return ( + isinstance(layer, Functional) # Filter out Sequential models without an input shape. - isinstance(layer._self_tracked_trackables[0], - input_layer_module.InputLayer)) - else: - return isinstance(layer, Functional) + and isinstance( + layer._self_tracked_trackables[0], input_layer_module.InputLayer + ) + ) + else: + return isinstance(layer, Functional) def connect_ancillary_layers(model, created_layers): - """Adds layers that are not connected to the outputs to the model.""" - # Layers not connected to outputs, such as those added in `add_loss`. - ancillary_layers = [ - layer for layer in created_layers.values() if layer not in model.layers - ] - if ancillary_layers: - relevant_nodes = tf.nest.flatten([ - layer.inbound_nodes[1:] - if _should_skip_first_node(layer) else layer.inbound_nodes - for layer in created_layers.values() - ]) - model._insert_layers(ancillary_layers, relevant_nodes) - return model + """Adds layers that are not connected to the outputs to the model.""" + # Layers not connected to outputs, such as those added in `add_loss`. + ancillary_layers = [ + layer for layer in created_layers.values() if layer not in model.layers + ] + if ancillary_layers: + relevant_nodes = tf.nest.flatten( + [ + layer.inbound_nodes[1:] + if _should_skip_first_node(layer) + else layer.inbound_nodes + for layer in created_layers.values() + ] + ) + model._insert_layers(ancillary_layers, relevant_nodes) + return model def reconstruct_from_config(config, custom_objects=None, created_layers=None): - """Reconstructs graph from config object. - - Args: - config: Dictionary returned from Network.get_config() - custom_objects: Optional dictionary mapping names (strings) to custom - classes or functions to be considered during deserialization. - created_layers: Optional dictionary mapping names to Layer objects. Any - layer not in this dictionary will be created and added to the dict. - This function will add new nodes to all layers (excluding InputLayers), - instead of re-using pre-existing nodes in the layers. - - Returns: - Tuple of (input tensors, output tensors, dictionary of created layers) - """ - # Layer instances created during the graph reconstruction process. - created_layers = created_layers or collections.OrderedDict() - - # Maps input data (tuple of inbound layer name, node index) from the config - # to node indices in the newly generated model. The node indices may be - # different if the layers have already been called previously. - node_index_map = {} - node_count_by_layer = {} - - # Dictionary mapping layer instances to - # node data that specifies a layer call. - # It acts as a queue that maintains any unprocessed - # layer call until it becomes possible to process it - # (i.e. until the input tensors to the call all exist). - unprocessed_nodes = collections.defaultdict(list) - - def get_node_index(layer, config_node_index): - """Returns node index in layer (might differ from config_node_index).""" - if isinstance(layer, input_layer_module.InputLayer): - return 0 - return node_index_map.get((layer.name, config_node_index), None) - - def _deserialize_keras_tensors(kwargs, layer_map): - """Deserializes Keras Tensors passed to `call`..""" - - def _deserialize_keras_tensor(t): - """Deserializes a single Keras Tensor passed to `call`.""" - if isinstance(t, tf_utils.ListWrapper): - t = t.as_list() - layer_name = t[0] - node_index = t[1] - tensor_index = t[2] - - layer = layer_map[layer_name] - new_node_index = get_node_index(layer, node_index) - if new_node_index is None: - # The inbound node may not have been processed yet, - # (This can happen e.g. if it depends on a different set - # of inputs than those that have been processed already). - # raise an IndexError so that the current node puts itself - # back on the unprocessed queue. - # Caution: This may lead to infinite loops for malformed - # network configurations! (or when there is a bug in - # the network config loading code). - raise IndexError - node = layer._inbound_nodes[new_node_index] - return tf.nest.flatten(node.outputs)[tensor_index] - return t - - kwargs = tf_utils.convert_inner_node_data(kwargs, wrap=True) - return tf.nest.map_structure(_deserialize_keras_tensor, kwargs) - - def process_node(layer, node_data): - """Deserialize a node. + """Reconstructs graph from config object. Args: - layer: layer instance. - node_data: Nested structure of `ListWrapper`. + config: Dictionary returned from Network.get_config() + custom_objects: Optional dictionary mapping names (strings) to custom + classes or functions to be considered during deserialization. + created_layers: Optional dictionary mapping names to Layer objects. Any + layer not in this dictionary will be created and added to the dict. + This function will add new nodes to all layers (excluding InputLayers), + instead of re-using pre-existing nodes in the layers. Returns: - Whether the node was processed (i.e. the layer was called on the inputs - specified by the node data) - - Raises: - ValueError: In case of improperly formatted `node_data`. + Tuple of (input tensors, output tensors, dictionary of created layers) """ + # Layer instances created during the graph reconstruction process. + created_layers = created_layers or collections.OrderedDict() + + # Maps input data (tuple of inbound layer name, node index) from the config + # to node indices in the newly generated model. The node indices may be + # different if the layers have already been called previously. + node_index_map = {} + node_count_by_layer = {} + + # Dictionary mapping layer instances to + # node data that specifies a layer call. + # It acts as a queue that maintains any unprocessed + # layer call until it becomes possible to process it + # (i.e. until the input tensors to the call all exist). + unprocessed_nodes = collections.defaultdict(list) + + def get_node_index(layer, config_node_index): + """Returns node index in layer (might differ from config_node_index).""" + if isinstance(layer, input_layer_module.InputLayer): + return 0 + return node_index_map.get((layer.name, config_node_index), None) + + def _deserialize_keras_tensors(kwargs, layer_map): + """Deserializes Keras Tensors passed to `call`..""" + + def _deserialize_keras_tensor(t): + """Deserializes a single Keras Tensor passed to `call`.""" + if isinstance(t, tf_utils.ListWrapper): + t = t.as_list() + layer_name = t[0] + node_index = t[1] + tensor_index = t[2] + + layer = layer_map[layer_name] + new_node_index = get_node_index(layer, node_index) + if new_node_index is None: + # The inbound node may not have been processed yet, + # (This can happen e.g. if it depends on a different set + # of inputs than those that have been processed already). + # raise an IndexError so that the current node puts itself + # back on the unprocessed queue. + # Caution: This may lead to infinite loops for malformed + # network configurations! (or when there is a bug in + # the network config loading code). + raise IndexError + node = layer._inbound_nodes[new_node_index] + return tf.nest.flatten(node.outputs)[tensor_index] + return t + + kwargs = tf_utils.convert_inner_node_data(kwargs, wrap=True) + return tf.nest.map_structure(_deserialize_keras_tensor, kwargs) + + def process_node(layer, node_data): + """Deserialize a node. + + Args: + layer: layer instance. + node_data: Nested structure of `ListWrapper`. + + Returns: + Whether the node was processed (i.e. the layer was called on the + inputs specified by the node data) + + Raises: + ValueError: In case of improperly formatted `node_data`. + """ + input_tensors = [] + for input_data in tf.nest.flatten(node_data): + input_data = input_data.as_list() + if len(input_data) == 3: + kwargs = {} + elif len(input_data) == 4: + kwargs = input_data[3] + try: + kwargs = _deserialize_keras_tensors(kwargs, created_layers) + except IndexError: + # Happens if keras tensors in kwargs are still unprocessed + return False + else: + raise ValueError("Improperly formatted model config.") + + if input_data[0] != node_module._CONSTANT_VALUE: + inbound_layer_name = input_data[0] + inbound_node_index = input_data[1] + inbound_tensor_index = input_data[2] + inbound_layer = created_layers[inbound_layer_name] + inbound_node_index = get_node_index( + inbound_layer, inbound_node_index + ) + + if inbound_node_index is None: + return False + inbound_node = inbound_layer._inbound_nodes[inbound_node_index] + input_tensors.append( + tf.nest.flatten(inbound_node.outputs)[inbound_tensor_index] + ) + else: + # We received a constant w/ no Keras history attached, + # which means it is a constant tensor input. + # Input is a constant value. + # Format = [_CONSTANT_VALUE, -1, const_val, kwargs] + assert input_data[1] == -1 + assert len(input_data) >= 3 + const_val = input_data[2] + if ( + isinstance(const_val, tuple) + and len(const_val) == 2 + and const_val[0] == node_module._COMPOSITE_TYPE + ): + # It is a composite tensor. + input_tensors.append(json_utils.decode(const_val[1])) + else: + input_tensors.append(const_val) + input_tensors = tf.nest.pack_sequence_as(node_data, input_tensors) + # Call layer on its inputs, thus creating the node + # and building the layer if needed. + if input_tensors is not None: + if ( + not hasattr(layer, "_preserve_input_structure_in_config") + or not layer._preserve_input_structure_in_config + ): + input_tensors = base_layer_utils.unnest_if_single_tensor( + input_tensors + ) + output_tensors = layer(input_tensors, **kwargs) + + # Update node index map. + output_index = tf.nest.flatten(output_tensors)[ + 0 + ]._keras_history.node_index + node_index_map[ + (layer.name, node_count_by_layer[layer]) + ] = output_index + node_count_by_layer[layer] += 1 + return True + + def process_layer(layer_data): + """Deserializes a layer, then call it on appropriate inputs. + + Args: + layer_data: layer config dict. + + Raises: + ValueError: In case of improperly formatted `layer_data` dict. + """ + layer_name = layer_data["name"] + + if layer_name in created_layers: + layer = created_layers[layer_name] + else: + # Instantiate layer. + from keras.layers import deserialize as deserialize_layer + + layer = deserialize_layer(layer_data, custom_objects=custom_objects) + created_layers[layer_name] = layer + + node_count_by_layer[layer] = int(_should_skip_first_node(layer)) + + # Gather layer inputs and convert to `ListWrapper` objects. + inbound_nodes_data = layer_data["inbound_nodes"] + inbound_nodes_data = tf_utils.convert_inner_node_data( + inbound_nodes_data, wrap=True + ) + for node_data in inbound_nodes_data: + # We don't process nodes (i.e. make layer calls) + # on the fly because the inbound node may not yet exist, + # in case of layer shared at different topological depths + # (e.g. a model such as A(B(A(B(x))))) + unprocessed_nodes[layer].append(node_data) + + # First, we create all layers and enqueue nodes to be processed + for layer_data in config["layers"]: + process_layer(layer_data) + # Then we process nodes in order of layer depth. + # Nodes that cannot yet be processed (if the inbound node + # does not yet exist) are re-enqueued, and the process + # is repeated until all nodes are processed. + while unprocessed_nodes: + for layer_data in config["layers"]: + layer = created_layers[layer_data["name"]] + if layer in unprocessed_nodes: + layer_nodes = unprocessed_nodes.pop(layer) + while layer_nodes: + node_data = layer_nodes[0] + if process_node(layer, node_data): + layer_nodes.pop(0) + else: + # If a node can't be processed, stop processing the + # nodes of the current layer to maintain node ordering. + unprocessed_nodes[layer] = layer_nodes + break + input_tensors = [] - for input_data in tf.nest.flatten(node_data): - input_data = input_data.as_list() - if len(input_data) == 3: - kwargs = {} - elif len(input_data) == 4: - kwargs = input_data[3] - try: - kwargs = _deserialize_keras_tensors(kwargs, created_layers) - except IndexError: - # Happens if keras tensors in kwargs are still unprocessed - return False - else: - raise ValueError('Improperly formatted model config.') - - if input_data[0] != node_module._CONSTANT_VALUE: - inbound_layer_name = input_data[0] - inbound_node_index = input_data[1] - inbound_tensor_index = input_data[2] - inbound_layer = created_layers[inbound_layer_name] - inbound_node_index = get_node_index(inbound_layer, inbound_node_index) - - if inbound_node_index is None: - return False - inbound_node = inbound_layer._inbound_nodes[inbound_node_index] + output_tensors = [] + + input_layers = tf_utils.convert_inner_node_data( + config["input_layers"], wrap=True + ) + for layer_data in tf.nest.flatten(input_layers): + layer_name, node_index, tensor_index = layer_data.as_list() + assert layer_name in created_layers + layer = created_layers[layer_name] + node_index = get_node_index(layer, node_index) + layer_output_tensors = layer._inbound_nodes[node_index].output_tensors input_tensors.append( - tf.nest.flatten(inbound_node.outputs)[inbound_tensor_index]) - else: - # We received a constant w/ no Keras history attached, - # which means it is a constant tensor input. - # Input is a constant value. - # Format = [_CONSTANT_VALUE, -1, const_val, kwargs] - assert input_data[1] == -1 - assert len(input_data) >= 3 - const_val = input_data[2] - if (isinstance(const_val, tuple) and - len(const_val) == 2 and - const_val[0] == node_module._COMPOSITE_TYPE): - # It is a composite tensor. - input_tensors.append(json_utils.decode(const_val[1])) - else: - input_tensors.append(const_val) - input_tensors = tf.nest.pack_sequence_as(node_data, input_tensors) - # Call layer on its inputs, thus creating the node - # and building the layer if needed. - if input_tensors is not None: - if not layer._preserve_input_structure_in_config: - input_tensors = ( - base_layer_utils.unnest_if_single_tensor(input_tensors)) - output_tensors = layer(input_tensors, **kwargs) - - # Update node index map. - output_index = (tf.nest.flatten(output_tensors)[0]. - _keras_history.node_index) - node_index_map[(layer.name, node_count_by_layer[layer])] = output_index - node_count_by_layer[layer] += 1 - return True - - def process_layer(layer_data): - """Deserializes a layer, then call it on appropriate inputs. + tf.nest.flatten(layer_output_tensors)[tensor_index] + ) + + output_layers = tf_utils.convert_inner_node_data( + config["output_layers"], wrap=True + ) + for layer_data in tf.nest.flatten(output_layers): + layer_name, node_index, tensor_index = layer_data.as_list() + assert layer_name in created_layers + layer = created_layers[layer_name] + node_index = get_node_index(layer, node_index) + layer_output_tensors = layer._inbound_nodes[node_index].output_tensors + output_tensors.append( + tf.nest.flatten(layer_output_tensors)[tensor_index] + ) + + input_tensors = tf.nest.pack_sequence_as(input_layers, input_tensors) + output_tensors = tf.nest.pack_sequence_as(output_layers, output_tensors) + return input_tensors, output_tensors, created_layers + + +def get_network_config(network, serialize_layer_fn=None, config=None): + """Build the config, which consists of the node graph and serialized layers. Args: - layer_data: layer config dict. + network: A Network object. + serialize_layer_fn: Function used to serialize layers. + config: A dict to append more config entries into. If None, start with a + new dict for the config. - Raises: - ValueError: In case of improperly formatted `layer_data` dict. + Returns: + Config dictionary. """ - layer_name = layer_data['name'] - - if layer_name in created_layers: - layer = created_layers[layer_name] - else: - # Instantiate layer. - from keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - - layer = deserialize_layer(layer_data, custom_objects=custom_objects) - created_layers[layer_name] = layer - - node_count_by_layer[layer] = int(_should_skip_first_node(layer)) - - # Gather layer inputs and convert to `ListWrapper` objects. - inbound_nodes_data = layer_data['inbound_nodes'] - inbound_nodes_data = tf_utils.convert_inner_node_data( - inbound_nodes_data, wrap=True) - for node_data in inbound_nodes_data: - # We don't process nodes (i.e. make layer calls) - # on the fly because the inbound node may not yet exist, - # in case of layer shared at different topological depths - # (e.g. a model such as A(B(A(B(x))))) - unprocessed_nodes[layer].append(node_data) - - # First, we create all layers and enqueue nodes to be processed - for layer_data in config['layers']: - process_layer(layer_data) - # Then we process nodes in order of layer depth. - # Nodes that cannot yet be processed (if the inbound node - # does not yet exist) are re-enqueued, and the process - # is repeated until all nodes are processed. - while unprocessed_nodes: - for layer_data in config['layers']: - layer = created_layers[layer_data['name']] - if layer in unprocessed_nodes: - layer_nodes = unprocessed_nodes.pop(layer) - while layer_nodes: - node_data = layer_nodes[0] - if process_node(layer, node_data): - layer_nodes.pop(0) - else: - # If a node can't be processed, stop processing the nodes of - # the current layer to maintain node ordering. - unprocessed_nodes[layer] = layer_nodes - break - - input_tensors = [] - output_tensors = [] - - input_layers = tf_utils.convert_inner_node_data( - config['input_layers'], wrap=True) - for layer_data in tf.nest.flatten(input_layers): - layer_name, node_index, tensor_index = layer_data.as_list() - assert layer_name in created_layers - layer = created_layers[layer_name] - node_index = get_node_index(layer, node_index) - layer_output_tensors = layer._inbound_nodes[node_index].output_tensors - input_tensors.append(tf.nest.flatten(layer_output_tensors)[tensor_index]) - - output_layers = tf_utils.convert_inner_node_data( - config['output_layers'], wrap=True) - for layer_data in tf.nest.flatten(output_layers): - layer_name, node_index, tensor_index = layer_data.as_list() - assert layer_name in created_layers - layer = created_layers[layer_name] - node_index = get_node_index(layer, node_index) - layer_output_tensors = layer._inbound_nodes[node_index].output_tensors - output_tensors.append(tf.nest.flatten(layer_output_tensors)[tensor_index]) - - input_tensors = tf.nest.pack_sequence_as(input_layers, input_tensors) - output_tensors = tf.nest.pack_sequence_as(output_layers, output_tensors) - return input_tensors, output_tensors, created_layers - - -def get_network_config(network, serialize_layer_fn=None): - """Builds the config, which consists of the node graph and serialized layers. - - Args: - network: A Network object. - serialize_layer_fn: Function used to serialize layers. - - Returns: - Config dictionary. - """ - serialize_layer_fn = ( - serialize_layer_fn or generic_utils.serialize_keras_object) - config = { - 'name': network.name, - } - node_conversion_map = {} - for layer in network.layers: - kept_nodes = 1 if _should_skip_first_node(layer) else 0 - for original_node_index, node in enumerate(layer._inbound_nodes): - node_key = _make_node_key(layer.name, original_node_index) - if node_key in network._network_nodes: - node_conversion_map[node_key] = kept_nodes - kept_nodes += 1 - layer_configs = [] - - with generic_utils.SharedObjectSavingScope(): - for layer in network.layers: # From the earliest layers on. - filtered_inbound_nodes = [] - for original_node_index, node in enumerate(layer._inbound_nodes): - node_key = _make_node_key(layer.name, original_node_index) - if node_key in network._network_nodes and not node.is_input: - # The node is relevant to the model: - # add to filtered_inbound_nodes. - node_data = node.serialize(_make_node_key, node_conversion_map) - filtered_inbound_nodes.append(node_data) - - layer_config = serialize_layer_fn(layer) - layer_config['name'] = layer.name - layer_config['inbound_nodes'] = filtered_inbound_nodes - layer_configs.append(layer_config) - config['layers'] = layer_configs - - # Gather info about inputs and outputs. - model_inputs = [] - for i in range(len(network._input_layers)): - layer, node_index, tensor_index = network._input_coordinates[i] - node_key = _make_node_key(layer.name, node_index) - if node_key not in network._network_nodes: - continue - new_node_index = node_conversion_map[node_key] - model_inputs.append( - tf_utils.ListWrapper([layer.name, new_node_index, tensor_index])) - model_inputs = tf.nest.pack_sequence_as(network._nested_inputs, model_inputs) - # Preserve external Keras compat for Models with single input. - if not tf.nest.is_nested(model_inputs): - model_inputs = [model_inputs] - model_inputs = tf_utils.convert_inner_node_data(model_inputs) - config['input_layers'] = model_inputs - - model_outputs = [] - for i in range(len(network._output_layers)): - layer, node_index, tensor_index = network._output_coordinates[i] - node_key = _make_node_key(layer.name, node_index) - if node_key not in network._network_nodes: - continue - new_node_index = node_conversion_map[node_key] - model_outputs.append( - tf_utils.ListWrapper([layer.name, new_node_index, tensor_index])) - model_outputs = tf.nest.pack_sequence_as(network._nested_outputs, model_outputs) - # Preserve external Keras compat for Models with single output. - if not tf.nest.is_nested(model_outputs): - model_outputs = [model_outputs] - model_outputs = tf_utils.convert_inner_node_data(model_outputs) - config['output_layers'] = model_outputs - return config + config = config or {} + serialize_obj_fn = serialization_lib.serialize_keras_object + set_layers_legacy = False + # To be removed after full affected g3 user migration to Keras V3 Saving. + if getattr(network, "use_legacy_config", False): + serialize_obj_fn = serialization.serialize_keras_object + set_layers_legacy = True + serialize_layer_fn = serialize_layer_fn or serialize_obj_fn + config["name"] = network.name + node_conversion_map = {} + for layer in network.layers: + kept_nodes = 1 if _should_skip_first_node(layer) else 0 + for original_node_index, node in enumerate(layer._inbound_nodes): + node_key = _make_node_key(layer.name, original_node_index) + if node_key in network._network_nodes: + node_conversion_map[node_key] = kept_nodes + kept_nodes += 1 + layer_configs = [] + + with serialization.SharedObjectSavingScope(): + for layer in network.layers: # From the earliest layers on. + filtered_inbound_nodes = [] + for original_node_index, node in enumerate(layer._inbound_nodes): + node_key = _make_node_key(layer.name, original_node_index) + if node_key in network._network_nodes and not node.is_input: + # The node is relevant to the model: + # add to filtered_inbound_nodes. + node_data = node.serialize( + _make_node_key, node_conversion_map + ) + filtered_inbound_nodes.append(node_data) + + if isinstance(layer, Functional) and set_layers_legacy: + layer.use_legacy_config = True + layer_config = serialize_layer_fn(layer) + layer_config["name"] = layer.name + layer_config["inbound_nodes"] = filtered_inbound_nodes + layer_configs.append(layer_config) + config["layers"] = layer_configs + + # Gather info about inputs and outputs. + model_inputs = [] + for i in range(len(network._input_layers)): + layer, node_index, tensor_index = network._input_coordinates[i] + node_key = _make_node_key(layer.name, node_index) + if node_key not in network._network_nodes: + continue + new_node_index = node_conversion_map[node_key] + model_inputs.append( + tf_utils.ListWrapper([layer.name, new_node_index, tensor_index]) + ) + model_inputs = tf.nest.pack_sequence_as( + network._nested_inputs, model_inputs + ) + # Preserve external Keras compat for Models with single input. + if not tf.nest.is_nested(model_inputs): + model_inputs = [model_inputs] + model_inputs = tf_utils.convert_inner_node_data(model_inputs) + config["input_layers"] = model_inputs + + model_outputs = [] + for i in range(len(network._output_layers)): + layer, node_index, tensor_index = network._output_coordinates[i] + node_key = _make_node_key(layer.name, node_index) + if node_key not in network._network_nodes: + continue + new_node_index = node_conversion_map[node_key] + model_outputs.append( + tf_utils.ListWrapper([layer.name, new_node_index, tensor_index]) + ) + model_outputs = tf.nest.pack_sequence_as( + network._nested_outputs, model_outputs + ) + # Preserve external Keras compat for Models with single output. + if not tf.nest.is_nested(model_outputs): + model_outputs = [model_outputs] + model_outputs = tf_utils.convert_inner_node_data(model_outputs) + config["output_layers"] = model_outputs + return config def shape_with_no_batch_size(x): - if x.shape.rank is None: - return None - shape = x.shape.as_list() - if shape: - shape[0] = None - return shape + if x.shape.rank is None: + return None + shape = x.shape.as_list() + if shape: + shape[0] = None + return shape class ModuleWrapper(base_layer.Layer): - """Wrapper for `tf.Module`s to support the Functional and Sequential API.""" - - def __init__(self, module, method_name=None, **kwargs): - """Initializes the wrapper Layer for this module. - - Args: - module: The `tf.Module` instance to be wrapped. - method_name: (Optional) str. The name of the method to use as the forward - pass of the module. If not set, defaults to '__call__' if defined, or - 'call'. - **kwargs: Additional keywrod arguments. See `tf.keras.layers.Layer`. - - Raises: - ValueError: If `method` is not defined on `module`. - """ - super().__init__(**kwargs) - if method_name is None: - if hasattr(module, '__call__'): - method_name = '__call__' - elif hasattr(module, 'call'): - method_name = 'call' - if method_name is None or not hasattr(module, method_name): - raise ValueError('{} is not defined on object {}'.format( - method_name, module)) - - self._module = module - self._method_name = method_name - - # Check if module.__call__ has a `training` arg or accepts `**kwargs`. - method = getattr(module, method_name) - method_arg_spec = tf_inspect.getfullargspec(method) - self._call_spec.expects_training_arg = ('training' in method_arg_spec.args - or - method_arg_spec.varkw is not None) - self._call_spec.expects_mask_arg = ('mask' in method_arg_spec.args or - method_arg_spec.varkw is not None) - - def call(self, *args, **kwargs): - if 'training' in kwargs and not self._expects_training_arg: - kwargs.pop('training') - if 'mask' in kwargs and not self._expects_mask_arg: - kwargs.pop('mask') - return getattr(self._module, self._method_name)(*args, **kwargs) + """Wrapper for `tf.Module`s to support the Functional and Sequential API.""" + + def __init__(self, module, method_name=None, **kwargs): + """Initializes the wrapper Layer for this module. + + Args: + module: The `tf.Module` instance to be wrapped. + method_name: (Optional) str. The name of the method to use as the + forward pass of the module. If not set, becomes '__call__' if + defined, or 'call'. Defaults to `None`. + **kwargs: Additional keywrod arguments. See `tf.keras.layers.Layer`. + + Raises: + ValueError: If `method` is not defined on `module`. + """ + super().__init__(**kwargs) + if method_name is None: + if hasattr(module, "__call__"): + method_name = "__call__" + elif hasattr(module, "call"): + method_name = "call" + if method_name is None or not hasattr(module, method_name): + raise ValueError(f"{method_name} is not defined on object {module}") + + self._module = module + self._method_name = method_name + + # Check if module.__call__ has a `training` arg or accepts `**kwargs`. + method = getattr(module, method_name) + method_arg_spec = tf_inspect.getfullargspec(method) + self._call_spec.expects_training_arg = ( + "training" in method_arg_spec.args + or method_arg_spec.varkw is not None + ) + self._call_spec.expects_mask_arg = ( + "mask" in method_arg_spec.args or method_arg_spec.varkw is not None + ) + + def call(self, *args, **kwargs): + if "training" in kwargs and not self._expects_training_arg: + kwargs.pop("training") + if "mask" in kwargs and not self._expects_mask_arg: + kwargs.pop("mask") + return getattr(self._module, self._method_name)(*args, **kwargs) + + +def has_functional_like_constructor(cls): + init_args = tf_inspect.getfullargspec(cls.__init__).args[1:] + functional_init_args = tf_inspect.getfullargspec(Functional.__init__).args[ + 1: + ] + if init_args == functional_init_args: + return True + return False diff --git a/keras/engine/functional_test.py b/keras/engine/functional_test.py index 6ae73b8948d0..302eae9d82bb 100644 --- a/keras/engine/functional_test.py +++ b/keras/engine/functional_test.py @@ -11,11 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#,============================================================================ +# ,============================================================================ """Tests for layer graphs construction & handling.""" import warnings +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import layers from keras import losses @@ -25,2562 +28,2697 @@ from keras.engine import input_layer as input_layer_lib from keras.engine import sequential from keras.engine import training as training_lib +from keras.saving import object_registration +from keras.saving.legacy import save from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import layer_utils from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf - - +# isort: off +from tensorflow.python.checkpoint.checkpoint import ( + Checkpoint, +) from tensorflow.python.framework import extension_type -from tensorflow.python.training.tracking.util import Checkpoint - class NetworkConstructionTest(test_combinations.TestCase): - - def test_default_model_name(self): - inputs = input_layer_lib.Input(shape=(1,)) - outputs = layers.Dense(1, activation='relu')(inputs) - model = training_lib.Model(inputs=inputs, outputs=outputs) - self.assertEqual(model.name, 'model') - - model_2 = training_lib.Model(inputs=inputs, outputs=outputs) - self.assertEqual(model_2.name, 'model_1') - - model_3 = training_lib.Model(inputs=inputs, outputs=outputs) - self.assertEqual(model_3.name, 'model_2') - - def test_get_updates(self): - - class MyLayer(layers.Layer): - - def build(self, input_shape): - self.a = self.add_weight('a', - (1, 1), - 'float32', - trainable=False) - self.b = self.add_weight('b', - (1, 1), - 'float32', - trainable=False) - self.add_update(tf.compat.v1.assign_add( - self.a, [[1.]], name='unconditional_update')) - self.built = True - - def call(self, inputs): - self.add_update( - tf.compat.v1.assign_add(self.b, inputs, name='conditional_update')) - return inputs + 1 - - with tf.Graph().as_default(): - x1 = input_layer_lib.Input(shape=(1,)) - layer = MyLayer() - _ = layer(x1) - - self.assertEqual(len(layer.updates), 2) - - x2 = input_layer_lib.Input(shape=(1,)) - y2 = layer(x2) - - self.assertEqual(len(layer.updates), 3) - - network = functional.Functional(x2, y2) - self.assertEqual(len(network.updates), 3) - - x3 = input_layer_lib.Input(shape=(1,)) - _ = layer(x3) - self.assertEqual(len(network.updates), 4) - - x4 = input_layer_lib.Input(shape=(1,)) - _ = network(x4) - self.assertEqual(len(network.updates), 5) - - network.add_update(tf.compat.v1.assign_add(layer.a, [[1]])) - self.assertEqual(len(network.updates), 6) - - network.add_update(tf.compat.v1.assign_add(layer.b, x4)) - self.assertEqual(len(network.updates), 7) - - @test_combinations.generate(test_combinations.combine(mode=['graph'])) - def test_get_updates_bn(self): - x1 = input_layer_lib.Input(shape=(1,)) - layer = layers.BatchNormalization() - _ = layer(x1) - - self.assertEqual(len(layer.updates), 2) - - def test_get_layer(self): - # create a simple network - x = input_layer_lib.Input(shape=(32,)) - dense_a = layers.Dense(4, name='dense_a') - dense_b = layers.Dense(2, name='dense_b') - y = dense_b(dense_a(x)) - network = functional.Functional(x, y, name='dense_network') - - # test various get_layer by index - self.assertEqual(network.get_layer(index=1), dense_a) - - # test invalid get_layer by index - with self.assertRaisesRegex( - ValueError, 'Was asked to retrieve layer at index ' + str(3) + - ' but model only has ' + str(len(network.layers)) + ' layers.'): - network.get_layer(index=3) - - # test that only one between name and index is requested - with self.assertRaisesRegex(ValueError, - 'Provide only a layer name or a layer index'): - network.get_layer(index=1, name='dense_b') - - # test that a name or an index must be provided - with self.assertRaisesRegex(ValueError, - 'Provide either a layer name or layer index.'): - network.get_layer() - - # test various get_layer by name - self.assertEqual(network.get_layer(name='dense_a'), dense_a) - - # test invalid get_layer by name - with self.assertRaisesRegex(ValueError, 'No such layer: dense_c.'): - network.get_layer(name='dense_c') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testTopologicalAttributes(self): - # test layer attributes / methods related to cross-layer connectivity. - a = input_layer_lib.Input(shape=(32,), name='input_a') - b = input_layer_lib.Input(shape=(32,), name='input_b') - - # test input, output, input_shape, output_shape - test_layer = layers.Dense(16, name='test_layer') - a_test = test_layer(a) - self.assertIs(test_layer.input, a) - self.assertIs(test_layer.output, a_test) - self.assertEqual(test_layer.input_shape, (None, 32)) - self.assertEqual(test_layer.output_shape, (None, 16)) - - # test `get_*_at` methods - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - - self.assertIs(dense.get_input_at(0), a) - self.assertIs(dense.get_input_at(1), b) - self.assertIs(dense.get_output_at(0), a_2) - self.assertIs(dense.get_output_at(1), b_2) - self.assertEqual(dense.get_input_shape_at(0), (None, 32)) - self.assertEqual(dense.get_input_shape_at(1), (None, 32)) - self.assertEqual(dense.get_output_shape_at(0), (None, 16)) - self.assertEqual(dense.get_output_shape_at(1), (None, 16)) - - # Test invalid value for attribute retrieval. - with self.assertRaises(ValueError): - dense.get_input_at(2) - with self.assertRaises(AttributeError): - new_dense = layers.Dense(16) - _ = new_dense.input - with self.assertRaises(AttributeError): - new_dense = layers.Dense(16) - _ = new_dense.output - with self.assertRaises(AttributeError): - new_dense = layers.Dense(16) - _ = new_dense.output_shape - with self.assertRaises(AttributeError): - new_dense = layers.Dense(16) - _ = new_dense.input_shape - with self.assertRaises(AttributeError): - new_dense = layers.Dense(16) - a = input_layer_lib.Input(shape=(3, 32)) - a = input_layer_lib.Input(shape=(5, 32)) - a_2 = dense(a) - b_2 = dense(b) - _ = new_dense.input_shape - with self.assertRaises(AttributeError): - new_dense = layers.Dense(16) - a = input_layer_lib.Input(shape=(3, 32)) - a = input_layer_lib.Input(shape=(5, 32)) - a_2 = dense(a) - b_2 = dense(b) - _ = new_dense.output_shape - - def _assertAllIs(self, a, b): - self.assertTrue(all(x is y for x, y in zip(a, b))) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testTopologicalAttributesMultiOutputLayer(self): - - class PowersLayer(layers.Layer): - - def call(self, inputs): - return [inputs**2, inputs**3] - - x = input_layer_lib.Input(shape=(32,)) - test_layer = PowersLayer() - p1, p2 = test_layer(x) # pylint: disable=not-callable - - self.assertIs(test_layer.input, x) - self._assertAllIs(test_layer.output, [p1, p2]) - self.assertEqual(test_layer.input_shape, (None, 32)) - self.assertEqual(test_layer.output_shape, [(None, 32), (None, 32)]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testTopologicalAttributesMultiInputLayer(self): - - class AddLayer(layers.Layer): - - def call(self, inputs): - assert len(inputs) == 2 - return inputs[0] + inputs[1] - - a = input_layer_lib.Input(shape=(32,)) - b = input_layer_lib.Input(shape=(32,)) - test_layer = AddLayer() - y = test_layer([a, b]) # pylint: disable=not-callable - - self._assertAllIs(test_layer.input, [a, b]) - self.assertIs(test_layer.output, y) - self.assertEqual(test_layer.input_shape, [(None, 32), (None, 32)]) - self.assertEqual(test_layer.output_shape, (None, 32)) - - def testBasicNetwork(self): - with tf.Graph().as_default(): - # minimum viable network - x = input_layer_lib.Input(shape=(32,)) - dense = layers.Dense(2) - y = dense(x) - network = functional.Functional(x, y, name='dense_network') - - # test basic attributes - self.assertEqual(network.name, 'dense_network') - self.assertEqual(len(network.layers), 2) # InputLayer + Dense - self.assertEqual(network.layers[1], dense) - self._assertAllIs(network.weights, dense.weights) - self._assertAllIs(network.trainable_weights, dense.trainable_weights) - self._assertAllIs(network.non_trainable_weights, - dense.non_trainable_weights) - - # test callability on Input - x_2 = input_layer_lib.Input(shape=(32,)) - y_2 = network(x_2) - self.assertEqual(y_2.shape.as_list(), [None, 2]) - - # test callability on regular tensor - x_2 = tf.compat.v1.placeholder(dtype='float32', shape=(None, 32)) - y_2 = network(x_2) - self.assertEqual(y_2.shape.as_list(), [None, 2]) - - # test network `trainable` attribute - network.trainable = False - self._assertAllIs(network.weights, dense.weights) - self.assertEqual(network.trainable_weights, []) - self._assertAllIs(network.non_trainable_weights, - dense.trainable_weights + dense.non_trainable_weights) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_trainable_weights(self): - a = layers.Input(shape=(2,)) - b = layers.Dense(1)(a) - model = training_lib.Model(a, b) - - weights = model.weights - self._assertAllIs(model.trainable_weights, weights) - self.assertListEqual(model.non_trainable_weights, []) - - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - self._assertAllIs(model.non_trainable_weights, weights) - - model.trainable = True - self._assertAllIs(model.trainable_weights, weights) - self.assertListEqual(model.non_trainable_weights, []) - - model.layers[1].trainable = False - self.assertListEqual(model.trainable_weights, []) - self._assertAllIs(model.non_trainable_weights, weights) - - # sequential model - model = sequential.Sequential() - model.add(layers.Dense(1, input_dim=2)) - weights = model.weights - - self._assertAllIs(model.trainable_weights, weights) - self.assertListEqual(model.non_trainable_weights, []) - - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - self._assertAllIs(model.non_trainable_weights, weights) - - model.trainable = True - self._assertAllIs(model.trainable_weights, weights) - self.assertListEqual(model.non_trainable_weights, []) - - model.layers[0].trainable = False - self.assertListEqual(model.trainable_weights, []) - self._assertAllIs(model.non_trainable_weights, weights) - - def test_layer_call_arguments(self): - with tf.Graph().as_default(): - # Test the ability to pass and serialize arguments to `call`. - inp = layers.Input(shape=(2,)) - x = layers.Dense(3)(inp) - x = layers.Dropout(0.5)(x, training=True) - model = training_lib.Model(inp, x) - # Would be `dropout/cond/Merge` by default - self.assertIn('dropout', model.output.op.name) - - # Test that argument is kept when applying the model - inp2 = layers.Input(shape=(2,)) - out2 = model(inp2) - self.assertIn('dropout', out2.op.name) - - # Test that argument is kept after loading a model - config = model.get_config() - model = training_lib.Model.from_config(config) - self.assertIn('dropout', model.output.op.name) - - def test_node_construction(self): - # test basics - a = layers.Input(shape=(32,), name='input_a') - b = layers.Input(shape=(32,), name='input_b') - - with self.assertRaises(ValueError): - _ = layers.Input(shape=(32,), batch_shape=(10, 32)) - with self.assertRaises(ValueError): - _ = layers.Input(shape=(32,), unknown_kwarg=None) - - self.assertListEqual(a.shape.as_list(), [None, 32]) - a_layer, a_node_index, a_tensor_index = a._keras_history - b_layer, _, _ = b._keras_history - self.assertEqual(len(a_layer._inbound_nodes), 1) - self.assertEqual(a_tensor_index, 0) - node = a_layer._inbound_nodes[a_node_index] - self.assertEqual(node.outbound_layer, a_layer) - - self.assertListEqual(node.inbound_layers, []) - self.assertListEqual(node.input_tensors, [a]) - self.assertListEqual(node.input_shapes, [(None, 32)]) - self.assertListEqual(node.output_tensors, [a]) - self.assertListEqual(node.output_shapes, [(None, 32)]) - - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - - self.assertEqual(len(dense._inbound_nodes), 2) - self.assertEqual(len(dense._outbound_nodes), 0) - self.assertEqual(dense._inbound_nodes[0].inbound_layers, a_layer) - self.assertEqual(dense._inbound_nodes[0].outbound_layer, dense) - self.assertEqual(dense._inbound_nodes[1].inbound_layers, b_layer) - self.assertEqual(dense._inbound_nodes[1].outbound_layer, dense) - self.assertIs(dense._inbound_nodes[0].input_tensors, a) - self.assertIs(dense._inbound_nodes[1].input_tensors, b) - - # test layer properties - test_layer = layers.Dense(16, name='test_layer') - a_test = test_layer(a) - self.assertListEqual(test_layer.kernel.shape.as_list(), [32, 16]) - self.assertIs(test_layer.input, a) - self.assertIs(test_layer.output, a_test) - self.assertEqual(test_layer.input_shape, (None, 32)) - self.assertEqual(test_layer.output_shape, (None, 16)) - - self.assertIs(dense.get_input_at(0), a) - self.assertIs(dense.get_input_at(1), b) - self.assertIs(dense.get_output_at(0), a_2) - self.assertIs(dense.get_output_at(1), b_2) - self.assertEqual(dense.get_input_shape_at(0), (None, 32)) - self.assertEqual(dense.get_input_shape_at(1), (None, 32)) - self.assertEqual(dense.get_output_shape_at(0), (None, 16)) - self.assertEqual(dense.get_output_shape_at(1), (None, 16)) - self.assertEqual(dense.get_input_mask_at(0), None) - self.assertEqual(dense.get_input_mask_at(1), None) - self.assertEqual(dense.get_output_mask_at(0), None) - self.assertEqual(dense.get_output_mask_at(1), None) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_multi_input_layer(self): - with self.cached_session(): - # test multi-input layer - a = layers.Input(shape=(32,), name='input_a') - b = layers.Input(shape=(32,), name='input_b') - - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - - merged = layers.concatenate([a_2, b_2], name='merge') - self.assertListEqual(merged.shape.as_list(), [None, 16 * 2]) - merge_layer, merge_node_index, merge_tensor_index = merged._keras_history - - self.assertEqual(merge_node_index, 0) - self.assertEqual(merge_tensor_index, 0) - - self.assertEqual(len(merge_layer._inbound_nodes), 1) - self.assertEqual(len(merge_layer._outbound_nodes), 0) - - self.assertEqual(len(merge_layer._inbound_nodes[0].input_tensors), 2) - self.assertEqual(len(merge_layer._inbound_nodes[0].inbound_layers), 2) - - c = layers.Dense(64, name='dense_2')(merged) - d = layers.Dense(5, name='dense_3')(c) - - model = training_lib.Model(inputs=[a, b], outputs=[c, d], name='model') - self.assertEqual(len(model.layers), 6) - output_shapes = model.compute_output_shape([(None, 32), (None, 32)]) - self.assertListEqual(output_shapes[0].as_list(), [None, 64]) - self.assertListEqual(output_shapes[1].as_list(), [None, 5]) - self.assertListEqual( - model.compute_mask([a, b], [None, None]), [None, None]) - - # we don't check names of first 2 layers (inputs) because - # ordering of same-level layers is not fixed - self.assertListEqual([l.name for l in model.layers][2:], - ['dense_1', 'merge', 'dense_2', 'dense_3']) - self.assertListEqual([l.name for l in model._input_layers], - ['input_a', 'input_b']) - self.assertListEqual([l.name for l in model._output_layers], - ['dense_2', 'dense_3']) - - # actually run model - fn = backend.function(model.inputs, model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - self.assertListEqual([x.shape for x in fn_outputs], [(10, 64), (10, 5)]) - - # test get_source_inputs - self._assertAllIs(layer_utils.get_source_inputs(c), [a, b]) - - # serialization / deserialization - json_config = model.to_json() - recreated_model = models.model_from_json(json_config) - recreated_model.compile('rmsprop', 'mse') - - self.assertListEqual([l.name for l in recreated_model.layers][2:], - ['dense_1', 'merge', 'dense_2', 'dense_3']) - self.assertListEqual([l.name for l in recreated_model._input_layers], - ['input_a', 'input_b']) - self.assertListEqual([l.name for l in recreated_model._output_layers], - ['dense_2', 'dense_3']) - - fn = backend.function(recreated_model.inputs, recreated_model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - self.assertListEqual([x.shape for x in fn_outputs], [(10, 64), (10, 5)]) - - def test_multi_output_layer_output_names(self): - inp = layers.Input(name='inp', shape=(None,), dtype=tf.float32) - - class _MultiOutput(layers.Layer): - - def call(self, x): - return x + 1., x + 2. - - out = _MultiOutput(name='out')(inp) - model = training_lib.Model(inp, out) - self.assertEqual(['out', 'out_1'], model.output_names) - self.assertAllClose([2., 3.], model(1.)) - - def test_recursion(self): - with tf.Graph().as_default(), self.cached_session(): - a = layers.Input(shape=(32,), name='input_a') - b = layers.Input(shape=(32,), name='input_b') - - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - merged = layers.concatenate([a_2, b_2], name='merge') - c = layers.Dense(64, name='dense_2')(merged) - d = layers.Dense(5, name='dense_3')(c) - - model = training_lib.Model(inputs=[a, b], outputs=[c, d], name='model') - - e = layers.Input(shape=(32,), name='input_e') - f = layers.Input(shape=(32,), name='input_f') - self.assertEqual(len(model.inputs), 2) - g, h = model([e, f]) - self.assertEqual(len(model.inputs), 2) - self.assertEqual(g.name, 'model/dense_2/BiasAdd:0') - - self.assertListEqual(g.shape.as_list(), c.shape.as_list()) - self.assertListEqual(h.shape.as_list(), d.shape.as_list()) - - # test separate manipulation of different layer outputs - i = layers.Dense(7, name='dense_4')(h) - - final_model = training_lib.Model( - inputs=[e, f], outputs=[i, g], name='final') - self.assertEqual(len(final_model.inputs), 2) - self.assertEqual(len(final_model.outputs), 2) - self.assertEqual(len(final_model.layers), 4) - - # we don't check names of first 2 layers (inputs) because - # ordering of same-level layers is not fixed - self.assertListEqual([layer.name for layer in final_model.layers][2:], - ['model', 'dense_4']) - self.assertListEqual( - model.compute_mask([e, f], [None, None]), [None, None]) - self.assertListEqual( - final_model.compute_output_shape([(10, 32), (10, 32)]), [(10, 7), - (10, 64)]) - - # run recursive model - fn = backend.function(final_model.inputs, final_model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - self.assertListEqual([x.shape for x in fn_outputs], [(10, 7), (10, 64)]) - - # test serialization - model_config = final_model.get_config() - recreated_model = models.Model.from_config(model_config) - - fn = backend.function(recreated_model.inputs, recreated_model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - self.assertListEqual([x.shape for x in fn_outputs], [(10, 7), (10, 64)]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_multi_input_multi_output_recursion(self): - with self.cached_session(): - # test multi-input multi-output - a = layers.Input(shape=(32,), name='input_a') - b = layers.Input(shape=(32,), name='input_b') - - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - merged = layers.concatenate([a_2, b_2], name='merge') - c = layers.Dense(64, name='dense_2')(merged) - d = layers.Dense(5, name='dense_3')(c) - - model = training_lib.Model(inputs=[a, b], outputs=[c, d], name='model') - - j = layers.Input(shape=(32,), name='input_j') - k = layers.Input(shape=(32,), name='input_k') - _, n = model([j, k]) - - o = layers.Input(shape=(32,), name='input_o') - p = layers.Input(shape=(32,), name='input_p') - q, _ = model([o, p]) - - self.assertListEqual(n.shape.as_list(), [None, 5]) - self.assertListEqual(q.shape.as_list(), [None, 64]) - s = layers.concatenate([n, q], name='merge_nq') - self.assertListEqual(s.shape.as_list(), [None, 64 + 5]) - - # test with single output as 1-elem list - multi_io_model = training_lib.Model([j, k, o, p], [s]) - - fn = backend.function(multi_io_model.inputs, multi_io_model.outputs) - fn_outputs = fn([ - np.random.random((10, 32)), np.random.random((10, 32)), - np.random.random((10, 32)), np.random.random((10, 32)) - ]) - self.assertListEqual([x.shape for x in fn_outputs], [(10, 69)]) - - # test with single output as tensor - multi_io_model = training_lib.Model([j, k, o, p], s) - - fn = backend.function(multi_io_model.inputs, multi_io_model.outputs) - fn_outputs = fn([ - np.random.random((10, 32)), np.random.random((10, 32)), - np.random.random((10, 32)), np.random.random((10, 32)) - ]) - # note that the output of the function will still be a 1-elem list - self.assertListEqual([x.shape for x in fn_outputs], [(10, 69)]) - - # test serialization - model_config = multi_io_model.get_config() - recreated_model = models.Model.from_config(model_config) - - fn = backend.function(recreated_model.inputs, recreated_model.outputs) - fn_outputs = fn([ - np.random.random((10, 32)), np.random.random((10, 32)), - np.random.random((10, 32)), np.random.random((10, 32)) - ]) - # note that the output of the function will still be a 1-elem list - self.assertListEqual([x.shape for x in fn_outputs], [(10, 69)]) - - config = model.get_config() - models.Model.from_config(config) - - model.summary() - json_str = model.to_json() - models.model_from_json(json_str) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_invalid_graphs(self): - a = layers.Input(shape=(32,), name='input_a') - b = layers.Input(shape=(32,), name='input_b') - - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - merged = layers.concatenate([a_2, b_2], name='merge') - c = layers.Dense(64, name='dense_2')(merged) - d = layers.Dense(5, name='dense_3')(c) - - model = training_lib.Model(inputs=[a, b], outputs=[c, d], name='model') - - # disconnected graph - j = layers.Input(shape=(32,), name='input_j') - k = layers.Input(shape=(32,), name='input_k') - m, n = model([j, k]) - with self.assertRaises(Exception): - training_lib.Model([j], [m, n]) - - # redundant outputs - j = layers.Input(shape=(32,), name='input_j') - k = layers.Input(shape=(32,), name='input_k') - m, n = model([j, k]) - - training_lib.Model([j, k], [m, n, n]) - - # redundant inputs - j = layers.Input(shape=(32,), name='input_j') - k = layers.Input(shape=(32,), name='input_k') - m, n = model([j, k]) - with self.assertRaises(Exception): - training_lib.Model([j, k, j], [m, n]) - - # i have not idea what I'm doing: garbage as inputs/outputs - j = layers.Input(shape=(32,), name='input_j') - k = layers.Input(shape=(32,), name='input_k') - m, n = model([j, k]) - with self.assertRaises(Exception): - training_lib.Model([j, k], [m, n, 0]) - - def test_raw_tf_compatibility(self): - with tf.Graph().as_default(): - # test calling layers/models on TF tensors - a = layers.Input(shape=(32,), name='input_a') - b = layers.Input(shape=(32,), name='input_b') - - dense = layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - merged = layers.concatenate([a_2, b_2], name='merge') - c = layers.Dense(64, name='dense_2')(merged) - d = layers.Dense(5, name='dense_3')(c) - - model = training_lib.Model(inputs=[a, b], outputs=[c, d], name='model') - - j = layers.Input(shape=(32,), name='input_j') - k = layers.Input(shape=(32,), name='input_k') - self.assertEqual(len(model.inputs), 2) - m, n = model([j, k]) - self.assertEqual(len(model.inputs), 2) - tf_model = training_lib.Model([j, k], [m, n]) - - j_tf = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 32)) - k_tf = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 32)) - m_tf, n_tf = tf_model([j_tf, k_tf]) - self.assertListEqual(m_tf.shape.as_list(), [None, 64]) - self.assertListEqual(n_tf.shape.as_list(), [None, 5]) - - # test merge - layers.concatenate([j_tf, k_tf], axis=1) - layers.add([j_tf, k_tf]) - - # test tensor input - x = tf.compat.v1.placeholder(shape=(None, 2), dtype=tf.float32) - layers.InputLayer(input_tensor=x) - - x = layers.Input(tensor=x) - layers.Dense(2)(x) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_basic_masking(self): - a = layers.Input(shape=(10, 32), name='input_a') - b = layers.Masking()(a) - model = training_lib.Model(a, b) - self.assertEqual(model.output_mask.shape.as_list(), [None, 10]) - - def testMaskingSingleInput(self): - - class MaskedLayer(layers.Layer): - - def call(self, inputs, mask=None): - if mask is not None: - return inputs * mask - return inputs - - def compute_mask(self, inputs, mask=None): - return tf.ones_like(inputs) - - if tf.executing_eagerly(): - a = tf.constant([2] * 32) - mask = tf.constant([0, 1] * 16) - a._keras_mask = mask - b = MaskedLayer()(a) - self.assertTrue(hasattr(b, '_keras_mask')) - self.assertAllEqual( - self.evaluate(tf.ones_like(mask)), - self.evaluate(getattr(b, '_keras_mask'))) - self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b)) - else: - x = input_layer_lib.Input(shape=(32,)) - y = MaskedLayer()(x) # pylint: disable=not-callable - network = functional.Functional(x, y) - - # test callability on Input - x_2 = input_layer_lib.Input(shape=(32,)) - y_2 = network(x_2) - self.assertEqual(y_2.shape.as_list(), [None, 32]) - - # test callability on regular tensor - x_2 = tf.compat.v1.placeholder(dtype='float32', shape=(None, 32)) - y_2 = network(x_2) - self.assertEqual(y_2.shape.as_list(), [None, 32]) - - def test_activity_regularization_with_model_composition(self): - - def reg(x): - return tf.reduce_sum(x) - - net_a_input = input_layer_lib.Input((2,)) - net_a = net_a_input - net_a = layers.Dense( - 2, kernel_initializer='ones', use_bias=False, activity_regularizer=reg)( - net_a) - model_a = training_lib.Model([net_a_input], [net_a]) - - net_b_input = input_layer_lib.Input((2,)) - net_b = model_a(net_b_input) - model_b = training_lib.Model([net_b_input], [net_b]) - - model_b.compile(optimizer='sgd', loss=None) - x = np.ones((1, 2)) - loss = model_b.evaluate(x) - self.assertEqual(loss, 4.) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_layer_sharing_at_heterogenous_depth(self): - x_val = np.random.random((10, 5)) - - x = input_layer_lib.Input(shape=(5,)) - a = layers.Dense(5, name='A') - b = layers.Dense(5, name='B') - output = a(b(a(b(x)))) - m = training_lib.Model(x, output) - m.run_eagerly = test_utils.should_run_eagerly() - - output_val = m.predict(x_val) - - config = m.get_config() - weights = m.get_weights() - - m2 = models.Model.from_config(config) - m2.set_weights(weights) - - output_val_2 = m2.predict(x_val) - self.assertAllClose(output_val, output_val_2, atol=1e-6) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_layer_sharing_at_heterogenous_depth_with_concat(self): - input_shape = (16, 9, 3) - input_layer = input_layer_lib.Input(shape=input_shape) - - a = layers.Dense(3, name='dense_A') - b = layers.Dense(3, name='dense_B') - c = layers.Dense(3, name='dense_C') - - x1 = b(a(input_layer)) - x2 = a(c(input_layer)) - output = layers.concatenate([x1, x2]) - - m = training_lib.Model(inputs=input_layer, outputs=output) - m.run_eagerly = test_utils.should_run_eagerly() - - x_val = np.random.random((10, 16, 9, 3)) - output_val = m.predict(x_val) - - config = m.get_config() - weights = m.get_weights() - - m2 = models.Model.from_config(config) - m2.set_weights(weights) - - output_val_2 = m2.predict(x_val) - self.assertAllClose(output_val, output_val_2, atol=1e-6) - - def test_layer_sharing_maintains_node_order(self): - # See https://github.com/keras-team/keras/issues/14838. - inp = input_layer_lib.Input(shape=[5], name='main_input') - - zeros = layers.Lambda(tf.zeros_like, name='generate_zeros')(inp) - ones = layers.Lambda(tf.ones_like, name='generate_ones')(inp) - - shared_layer = layers.Layer(name='shared') - - ones_result = shared_layer(ones) - zeros_result = shared_layer(zeros) - zeros_result = layers.Layer(name='blank')(zeros_result) - - m = training_lib.Model( - inputs=[inp], outputs=[zeros_result, ones_result]) - m2 = models.Model.from_config(m.get_config()) - self.assertAllClose( - m2.predict_on_batch(tf.zeros([1, 5])), - m.predict_on_batch(tf.zeros([1, 5]))) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_explicit_training_argument(self): - a = layers.Input(shape=(2,)) - b = layers.Dropout(0.5)(a) - base_model = training_lib.Model(a, b) - - a = layers.Input(shape=(2,)) - b = base_model(a, training=False) - model = training_lib.Model(a, b) - - x = np.ones((100, 2)) - y = np.ones((100, 2)) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(x, y) - self.assertEqual(loss, 0) # In inference mode, output is equal to input. - - a = layers.Input(shape=(2,)) - b = base_model(a, training=True) - model = training_lib.Model(a, b) - preds = model.predict(x) - self.assertEqual(np.min(preds), 0.) # At least one unit was dropped. - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_mask_derived_from_keras_layer(self): - inputs = input_layer_lib.Input((5, 10)) - mask = input_layer_lib.Input((5,)) - outputs = layers.RNN(layers.LSTMCell(100))(inputs, mask=mask) - model = training_lib.Model([inputs, mask], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[np.ones((10, 5, 10)), np.zeros((10, 5))], - y=np.zeros((10, 100)), - batch_size=2) - # All data is masked, returned values are 0's. - self.assertEqual(history.history['loss'][0], 0.0) - history = model.fit( - x=[np.ones((10, 5, 10)), np.ones((10, 5))], - y=np.zeros((10, 100)), - batch_size=2) - # Data is not masked, returned values are random. - self.assertGreater(history.history['loss'][0], 0.0) - - model = training_lib.Model.from_config(model.get_config()) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[np.ones((10, 5, 10)), np.zeros((10, 5))], - y=np.zeros((10, 100)), - batch_size=2) - # All data is masked, returned values are 0's. - self.assertEqual(history.history['loss'][0], 0.0) - history = model.fit( - x=[np.ones((10, 5, 10)), np.ones((10, 5))], - y=np.zeros((10, 100)), - batch_size=2) - # Data is not masked, returned values are random. - self.assertGreater(history.history['loss'][0], 0.0) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_call_arg_derived_from_keras_layer(self): - - class MyAdd(layers.Layer): - - def call(self, x1, x2): - return x1 + x2 - - input1 = input_layer_lib.Input(10) - input2 = input_layer_lib.Input(10) - outputs = MyAdd()(input1, input2) - model = training_lib.Model([input1, input2], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - # Check serialization. - model = training_lib.Model.from_config( - model.get_config(), custom_objects={'MyAdd': MyAdd}) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate( - test_combinations.keras_mode_combinations(mode='eager'),) - def test_only_some_in_first_arg_derived_from_keras_layer_keras_tensors(self): - # This functionality is unsupported in v1 graphs - - class MyAddAll(layers.Layer): - - def call(self, inputs): - x = inputs[0] - for inp in inputs[1:]: - if inp is not None: - x = x + inp - return x - - input1 = input_layer_lib.Input(10) - input2 = input_layer_lib.Input(10) - layer = MyAddAll() - outputs = layer([0.0, input1, None, input2, None]) - model = training_lib.Model([input1, input2], outputs) - self.assertIn(layer, model.layers) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - # Check serialization. - model = training_lib.Model.from_config( - model.get_config(), custom_objects={'MyAddAll': MyAddAll}) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate( - test_combinations.times( - test_combinations.keras_mode_combinations(), - test_combinations.combine(share_already_used_layer=[True, False]))) - def test_call_kwarg_derived_from_keras_layer(self, share_already_used_layer): - - class MaybeAdd(layers.Layer): - - def call(self, x1, x2=None): - if x2 is not None: - return x1 + x2 - return x1 - - class IdentityLayer(layers.Layer): - - def call(self, x): - return x - - input1 = input_layer_lib.Input(10) - input2 = input_layer_lib.Input(10) - identity_layer = IdentityLayer() - - if share_already_used_layer: - # We have had model serialization/deserialization break in the past: - # when a layer was previously used to construct other functional models - # and had a non-empty list of inbound nodes before being used to define - # the model being serialized/deserialized. - # (The serialization/deserialization was not correctly adjusting - # the node_index serialization/deserialization). - # So, we explicitly test this case. - training_lib.Model([input1], identity_layer(input1)) - - outputs = MaybeAdd()(input1, x2=identity_layer(input2)) - model = training_lib.Model([input1, input2], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - model = training_lib.Model.from_config( - model.get_config(), - custom_objects={ - 'MaybeAdd': MaybeAdd, - 'IdentityLayer': IdentityLayer - }) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_call_kwarg_dtype_serialization(self): - - class Double(layers.Layer): - - def call(self, x1, dtype=None): - return tf.cast(x1 + x1, dtype=dtype) - - input1 = input_layer_lib.Input(10) - outputs = Double()(input1, dtype=tf.float16) - model = training_lib.Model([input1], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10))], - y=6 * np.ones((10, 10)), - batch_size=2) - # Check that input was correctly doubled. - self.assertEqual(history.history['loss'][0], 0.0) - - # Check the output dtype - self.assertEqual(model(tf.ones((3, 10))).dtype, tf.float16) - - model = training_lib.Model.from_config( - model.get_config(), custom_objects={'Double': Double}) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10))], - y=6 * np.ones((10, 10)), - batch_size=2) - # Check that input was correctly doubled. - self.assertEqual(history.history['loss'][0], 0.0) - - # Check the output dtype - self.assertEqual(model(tf.ones((3, 10))).dtype, tf.float16) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_call_kwarg_nonserializable(self): - - class Double(layers.Layer): - - def call(self, x1, kwarg=None): - return x1 + x1 - - class NonSerializable: - - def __init__(self, foo=None): - self.foo = foo - - input1 = input_layer_lib.Input(10) - outputs = Double()(input1, kwarg=NonSerializable()) - model = training_lib.Model([input1], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[3 * np.ones((10, 10))], - y=6 * np.ones((10, 10)), - batch_size=2) - # Check that input was correctly doubled. - self.assertEqual(history.history['loss'][0], 0.0) - with self.assertRaisesRegex( - TypeError, 'Layer double was passed non-JSON-serializable arguments.'): - model.get_config() - - @test_combinations.generate( - test_combinations.times( - test_combinations.keras_mode_combinations(), - test_combinations.combine(share_already_used_layer=[True, False]))) - def test_call_kwarg_derived_from_keras_layer_and_first_arg_is_constant( - self, share_already_used_layer): - - class IdentityLayer(layers.Layer): - - def call(self, x): - return x - - class MaybeAdd(layers.Layer): - - def call(self, x1, x2=None): - if x2 is not None: - return x1 + x2 - return x1 - - input2 = input_layer_lib.Input(10) - identity_layer = IdentityLayer() - if share_already_used_layer: - # We have had model serialization/deserialization break in the past: - # when a layer was previously used to construct other functional models - # and had a non-empty list of inbound nodes before being used to define - # the model being serialized/deserialized. - # (The serialization/deserialization was not correctly adjusting - # the node_index serialization/deserialization). - # So, we explicitly test this case. - training_lib.Model([input2], identity_layer(input2)) - - outputs = MaybeAdd()(3., x2=identity_layer(input2)) - model = training_lib.Model([input2], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=7 * np.ones((10, 10)), - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - model = training_lib.Model.from_config( - model.get_config(), - custom_objects={ - 'MaybeAdd': MaybeAdd, - 'IdentityLayer': IdentityLayer - }) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=7 * np.ones((10, 10)), - y=10 * np.ones((10, 10)), - batch_size=2) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_dont_cast_composite_unless_necessary(self): - if not tf.executing_eagerly(): - return # Creating Keras inputs from a type_spec only supported in eager. - - # TODO(edloper): Change this to tf.experimental.ExtensionTyep once - # it's been released. - class MyType(extension_type.ExtensionType): - # TODO(edloper) Remove _shape and _dtype once Keras has been switched - # to use .shape and .dtype instead. - value: tf.Tensor - _shape = property(lambda self: self.value.shape) - shape = property(lambda self: self.value.shape) - _dtype = property(lambda self: self.value.dtype) - dtype = property(lambda self: self.value.dtype) - - class Spec: - _shape = property(lambda self: self.value.shape) - shape = property(lambda self: self.value.shape) - _dtype = property(lambda self: self.value.dtype) - dtype = property(lambda self: self.value.dtype) - - my_spec = MyType.Spec(tf.TensorSpec([5], tf.float32)) - input1 = input_layer_lib.Input(type_spec=my_spec) - model = training_lib.Model([input1], input1) - model.compile(run_eagerly=test_utils.should_run_eagerly()) - model(MyType([1., 2., 3., 4., 5.])) # Does not require cast. - with self.assertRaises((ValueError, TypeError)): - model(MyType([1, 2, 3, 4, 5])) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_composite_call_kwarg_derived_from_keras_layer(self): - - # Create a test layer that accepts composite tensor inputs. - class MaybeAdd(layers.Layer): - - def call(self, x1, x2=None): - # We need to convert this to a tensor for loss calculations - - # losses don't play nicely with ragged tensors yet. - if x2 is not None: - return (x1 + x2).to_tensor(default_value=0) - return x1.to_tensor(default_value=0) - - input1 = input_layer_lib.Input((None,), ragged=True) - input2 = input_layer_lib.Input((None,), ragged=True) - outputs = MaybeAdd()(input1, x2=input2) - model = training_lib.Model([input1, input2], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - input_data = [ - tf.ragged.constant([[3.0, 3.0], [3.0, 3.0], [3.0]]), - tf.ragged.constant([[7.0, 7.0], [7.0, 7.0], [7.0]]) - ] - expected_data = np.array([[10.0, 10.0], [10.0, 10.0], [10.0, 0.0]]) - - history = model.fit(x=input_data, y=expected_data) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - model = training_lib.Model.from_config( - model.get_config(), custom_objects={'MaybeAdd': MaybeAdd}) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(x=input_data, y=expected_data) - # Check that second input was correctly added to first. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate( - test_combinations.keras_mode_combinations(mode='eager')) - def test_call_some_not_all_nested_in_first_arg_derived_from_keras_layer(self): - # This functionality is unsupported in v1 graphs - - class AddAll(layers.Layer): - - def call(self, x1_x2, x3): - x1, x2 = x1_x2 - out = x1 + x2 - if x3 is not None: - for t in x3.values(): - out += t - return out - - input1 = input_layer_lib.Input(10) - input2 = input_layer_lib.Input(10) - input3 = input_layer_lib.Input(10) - - layer = AddAll() - outputs = layer( - [input1, 4 * tf.ones((1, 10))], - x3={ - 'a': input2, - 'b': input3, - 'c': 5 * tf.ones((1, 10)) - }) - model = training_lib.Model([input1, input2, input3], outputs) - self.assertIn(layer, model.layers) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], - y=15 * np.ones((10, 10)), - batch_size=2) - # Check that all inputs were correctly added. - self.assertEqual(history.history['loss'][0], 0.0) - - model = training_lib.Model.from_config( - model.get_config(), custom_objects={'AddAll': AddAll}) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], - y=15 * np.ones((10, 10)), - batch_size=2) - # Check that all inputs were correctly added. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_call_nested_arg_derived_from_keras_layer(self): - - class AddAll(layers.Layer): - - def call(self, x1, x2, x3=None): - out = x1 + x2 - if x3 is not None: - for t in x3.values(): - out += t - return out - - input1 = input_layer_lib.Input(10) - input2 = input_layer_lib.Input(10) - input3 = input_layer_lib.Input(10) - outputs = AddAll()( - input1, - 4 * tf.ones((1, 10)), - x3={ - 'a': input2, - 'b': input3, - 'c': 5 * tf.ones((1, 10)) - }) - model = training_lib.Model([input1, input2, input3], outputs) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], - y=15 * np.ones((10, 10)), - batch_size=2) - # Check that all inputs were correctly added. - self.assertEqual(history.history['loss'][0], 0.0) - - model = training_lib.Model.from_config( - model.get_config(), custom_objects={'AddAll': AddAll}) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit( - x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], - y=15 * np.ones((10, 10)), - batch_size=2) - # Check that all inputs were correctly added. - self.assertEqual(history.history['loss'][0], 0.0) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_multi_output_model_with_none_masking(self): - def func(x): - return [x * 0.2, x * 0.3] - - def output_shape(input_shape): - return [input_shape, input_shape] - - i = layers.Input(shape=(3, 2, 1)) - o = layers.Lambda(function=func, output_shape=output_shape)(i) - - self.assertEqual(backend.int_shape(o[0]), (None, 3, 2, 1)) - self.assertEqual(backend.int_shape(o[1]), (None, 3, 2, 1)) - - o = layers.add(o) - model = training_lib.Model(i, o) - model.run_eagerly = test_utils.should_run_eagerly() - - i2 = layers.Input(shape=(3, 2, 1)) - o2 = model(i2) - model2 = training_lib.Model(i2, o2) - model2.run_eagerly = test_utils.should_run_eagerly() - - x = np.random.random((4, 3, 2, 1)) - out = model2.predict(x) - assert out.shape == (4, 3, 2, 1) - self.assertAllClose(out, x * 0.2 + x * 0.3, atol=1e-4) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_constant_initializer_with_numpy(self): - initializer = tf.compat.v1.constant_initializer(np.ones((3, 2))) - model = sequential.Sequential() - model.add(layers.Dense(2, input_shape=(3,), kernel_initializer=initializer)) - model.add(layers.Dense(3)) - model.compile( - loss='mse', - optimizer='sgd', - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - json_str = model.to_json() - models.model_from_json(json_str) - - def test_subclassed_error_if_init_not_called(self): - - class MyNetwork(training_lib.Model): - - def __init__(self): - self._foo = [layers.Dense(10), layers.Dense(10)] - - with self.assertRaisesRegex(RuntimeError, 'forgot to call'): - MyNetwork() - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_int_input_shape(self): - inputs = input_layer_lib.Input(10) - self.assertEqual([None, 10], inputs.shape.as_list()) - - inputs_with_batch = input_layer_lib.Input(batch_size=20, shape=5) - self.assertEqual([20, 5], inputs_with_batch.shape.as_list()) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_model_initialization(self): - # Functional model - inputs = input_layer_lib.Input(shape=(32,)) - outputs = layers.Dense(4)(inputs) - - with self.assertRaisesRegex(TypeError, - 'Keyword argument not understood'): - model = training_lib.Model( - inputs, outputs, name='m', trainable=False, dtype='int64') - with self.assertRaisesRegex(TypeError, - 'Keyword argument not understood'): - model = training_lib.Model( - inputs, outputs, name='m', trainable=False, dynamic=False) - - model = training_lib.Model(inputs, outputs, name='m', trainable=False) - self.assertEqual('m', model.name) - self.assertFalse(model.trainable) - self.assertFalse(model.dynamic) - - class SubclassModel(training_lib.Model): - pass - # Subclassed model - model = SubclassModel( - name='subclassed', trainable=True, dtype='int64', dynamic=True) - self.assertEqual('subclassed', model.name) - self.assertTrue(model.dynamic) - self.assertTrue(model.trainable) - w = model.add_weight( - 'w', [], initializer=tf.compat.v1.constant_initializer(1)) - self.assertEqual(tf.int64, w.dtype) - - def test_disconnected_inputs(self): - input_tensor1 = input_layer_lib.Input(shape=[200], name='a') - input_tensor2 = input_layer_lib.Input(shape=[10], name='b') - output_tensor1 = layers.Dense(units=10)(input_tensor1) - - net = functional.Functional( - inputs=[input_tensor1, input_tensor2], outputs=[output_tensor1]) - net2 = functional.Functional.from_config(net.get_config()) - self.assertLen(net2.inputs, 2) - self.assertEqual('a', net2.layers[0].name) - self.assertEqual('b', net2.layers[1].name) - - @test_combinations.generate(test_combinations.keras_model_type_combinations()) - def test_dependency_tracking(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.trackable = Checkpoint() - self.assertIn('trackable', model._unconditional_dependency_names) - self.assertEqual(model.trackable, model._lookup_dependency('trackable')) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_model_construction_in_tf_function(self): - - d = {'model': None} - - @tf.function - def fn(x): - if d['model'] is None: - # Check that Functional can be built in a `tf.function`. + def test_default_model_name(self): + inputs = input_layer_lib.Input(shape=(1,)) + outputs = layers.Dense(1, activation="relu")(inputs) + model = training_lib.Model(inputs=inputs, outputs=outputs) + self.assertEqual(model.name, "model") + + model_2 = training_lib.Model(inputs=inputs, outputs=outputs) + self.assertEqual(model_2.name, "model_1") + + model_3 = training_lib.Model(inputs=inputs, outputs=outputs) + self.assertEqual(model_3.name, "model_2") + + def test_get_updates(self): + class MyLayer(layers.Layer): + def build(self, input_shape): + self.a = self.add_weight( + "a", (1, 1), "float32", trainable=False + ) + self.b = self.add_weight( + "b", (1, 1), "float32", trainable=False + ) + self.add_update( + tf.compat.v1.assign_add( + self.a, [[1.0]], name="unconditional_update" + ) + ) + self.built = True + + def call(self, inputs): + self.add_update( + tf.compat.v1.assign_add( + self.b, inputs, name="conditional_update" + ) + ) + return inputs + 1 + + with tf.Graph().as_default(): + x1 = input_layer_lib.Input(shape=(1,)) + layer = MyLayer() + _ = layer(x1) + + self.assertEqual(len(layer.updates), 2) + + x2 = input_layer_lib.Input(shape=(1,)) + y2 = layer(x2) + + self.assertEqual(len(layer.updates), 3) + + network = functional.Functional(x2, y2) + self.assertEqual(len(network.updates), 3) + + x3 = input_layer_lib.Input(shape=(1,)) + _ = layer(x3) + self.assertEqual(len(network.updates), 4) + + x4 = input_layer_lib.Input(shape=(1,)) + _ = network(x4) + self.assertEqual(len(network.updates), 5) + + network.add_update(tf.compat.v1.assign_add(layer.a, [[1]])) + self.assertEqual(len(network.updates), 6) + + network.add_update(tf.compat.v1.assign_add(layer.b, x4)) + self.assertEqual(len(network.updates), 7) + + @test_combinations.generate(test_combinations.combine(mode=["graph"])) + def test_get_updates_bn(self): + x1 = input_layer_lib.Input(shape=(1,)) + layer = layers.BatchNormalization() + _ = layer(x1) + + self.assertEqual(len(layer.updates), 2) + + def test_get_layer(self): + # create a simple network + x = input_layer_lib.Input(shape=(32,)) + dense_a = layers.Dense(4, name="dense_a") + dense_b = layers.Dense(2, name="dense_b") + y = dense_b(dense_a(x)) + network = functional.Functional(x, y, name="dense_network") + + # test various get_layer by index + self.assertEqual(network.get_layer(index=1), dense_a) + + # test invalid get_layer by index + with self.assertRaisesRegex( + ValueError, + "Was asked to retrieve layer at index " + + str(3) + + " but model only has " + + str(len(network.layers)) + + " layers.", + ): + network.get_layer(index=3) + + # test that only one between name and index is requested + with self.assertRaisesRegex( + ValueError, "Provide only a layer name or a layer index" + ): + network.get_layer(index=1, name="dense_b") + + # test that a name or an index must be provided + with self.assertRaisesRegex( + ValueError, "Provide either a layer name or layer index." + ): + network.get_layer() + + # test various get_layer by name + self.assertEqual(network.get_layer(name="dense_a"), dense_a) + + # test invalid get_layer by name + with self.assertRaisesRegex(ValueError, "No such layer: dense_c."): + network.get_layer(name="dense_c") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTopologicalAttributes(self): + # test layer attributes / methods related to cross-layer connectivity. + a = input_layer_lib.Input(shape=(32,), name="input_a") + b = input_layer_lib.Input(shape=(32,), name="input_b") + + # test input, output, input_shape, output_shape + test_layer = layers.Dense(16, name="test_layer") + a_test = test_layer(a) + self.assertIs(test_layer.input, a) + self.assertIs(test_layer.output, a_test) + self.assertEqual(test_layer.input_shape, (None, 32)) + self.assertEqual(test_layer.output_shape, (None, 16)) + + # test `get_*_at` methods + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + + self.assertIs(dense.get_input_at(0), a) + self.assertIs(dense.get_input_at(1), b) + self.assertIs(dense.get_output_at(0), a_2) + self.assertIs(dense.get_output_at(1), b_2) + self.assertEqual(dense.get_input_shape_at(0), (None, 32)) + self.assertEqual(dense.get_input_shape_at(1), (None, 32)) + self.assertEqual(dense.get_output_shape_at(0), (None, 16)) + self.assertEqual(dense.get_output_shape_at(1), (None, 16)) + + # Test invalid value for attribute retrieval. + with self.assertRaises(ValueError): + dense.get_input_at(2) + with self.assertRaises(AttributeError): + new_dense = layers.Dense(16) + _ = new_dense.input + with self.assertRaises(AttributeError): + new_dense = layers.Dense(16) + _ = new_dense.output + with self.assertRaises(AttributeError): + new_dense = layers.Dense(16) + _ = new_dense.output_shape + with self.assertRaises(AttributeError): + new_dense = layers.Dense(16) + _ = new_dense.input_shape + with self.assertRaises(AttributeError): + new_dense = layers.Dense(16) + a = input_layer_lib.Input(shape=(3, 32)) + a = input_layer_lib.Input(shape=(5, 32)) + a_2 = dense(a) + b_2 = dense(b) + _ = new_dense.input_shape + with self.assertRaises(AttributeError): + new_dense = layers.Dense(16) + a = input_layer_lib.Input(shape=(3, 32)) + a = input_layer_lib.Input(shape=(5, 32)) + a_2 = dense(a) + b_2 = dense(b) + _ = new_dense.output_shape + + def _assertAllIs(self, a, b): + self.assertTrue(all(x is y for x, y in zip(a, b))) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTopologicalAttributesMultiOutputLayer(self): + class PowersLayer(layers.Layer): + def call(self, inputs): + return [inputs**2, inputs**3] + + x = input_layer_lib.Input(shape=(32,)) + test_layer = PowersLayer() + p1, p2 = test_layer(x) + + self.assertIs(test_layer.input, x) + self._assertAllIs(test_layer.output, [p1, p2]) + self.assertEqual(test_layer.input_shape, (None, 32)) + self.assertEqual(test_layer.output_shape, [(None, 32), (None, 32)]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTopologicalAttributesMultiInputLayer(self): + class AddLayer(layers.Layer): + def call(self, inputs): + assert len(inputs) == 2 + return inputs[0] + inputs[1] + + a = input_layer_lib.Input(shape=(32,)) + b = input_layer_lib.Input(shape=(32,)) + test_layer = AddLayer() + y = test_layer([a, b]) + + self._assertAllIs(test_layer.input, [a, b]) + self.assertIs(test_layer.output, y) + self.assertEqual(test_layer.input_shape, [(None, 32), (None, 32)]) + self.assertEqual(test_layer.output_shape, (None, 32)) + + def testBasicNetwork(self): + with tf.Graph().as_default(): + # minimum viable network + x = input_layer_lib.Input(shape=(32,)) + dense = layers.Dense(2) + y = dense(x) + network = functional.Functional(x, y, name="dense_network") + + # test basic attributes + self.assertEqual(network.name, "dense_network") + self.assertEqual(len(network.layers), 2) # InputLayer + Dense + self.assertEqual(network.layers[1], dense) + self._assertAllIs(network.weights, dense.weights) + self._assertAllIs( + network.trainable_weights, dense.trainable_weights + ) + self._assertAllIs( + network.non_trainable_weights, dense.non_trainable_weights + ) + + # test callability on Input + x_2 = input_layer_lib.Input(shape=(32,)) + y_2 = network(x_2) + self.assertEqual(y_2.shape.as_list(), [None, 2]) + + # test callability on regular tensor + x_2 = tf.compat.v1.placeholder(dtype="float32", shape=(None, 32)) + y_2 = network(x_2) + self.assertEqual(y_2.shape.as_list(), [None, 2]) + + # test network `trainable` attribute + network.trainable = False + self._assertAllIs(network.weights, dense.weights) + self.assertEqual(network.trainable_weights, []) + self._assertAllIs( + network.non_trainable_weights, + dense.trainable_weights + dense.non_trainable_weights, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_trainable_weights(self): + a = layers.Input(shape=(2,)) + b = layers.Dense(1)(a) + model = training_lib.Model(a, b) + + weights = model.weights + self._assertAllIs(model.trainable_weights, weights) + self.assertListEqual(model.non_trainable_weights, []) + + model.trainable = False + self.assertListEqual(model.trainable_weights, []) + self._assertAllIs(model.non_trainable_weights, weights) + + model.trainable = True + self._assertAllIs(model.trainable_weights, weights) + self.assertListEqual(model.non_trainable_weights, []) + + model.layers[1].trainable = False + self.assertListEqual(model.trainable_weights, []) + self._assertAllIs(model.non_trainable_weights, weights) + + # sequential model + model = sequential.Sequential() + model.add(layers.Dense(1, input_dim=2)) + weights = model.weights + + self._assertAllIs(model.trainable_weights, weights) + self.assertListEqual(model.non_trainable_weights, []) + + model.trainable = False + self.assertListEqual(model.trainable_weights, []) + self._assertAllIs(model.non_trainable_weights, weights) + + model.trainable = True + self._assertAllIs(model.trainable_weights, weights) + self.assertListEqual(model.non_trainable_weights, []) + + model.layers[0].trainable = False + self.assertListEqual(model.trainable_weights, []) + self._assertAllIs(model.non_trainable_weights, weights) + + def test_layer_call_arguments(self): + with tf.Graph().as_default(): + # Test the ability to pass and serialize arguments to `call`. + inp = layers.Input(shape=(2,)) + x = layers.Dense(3)(inp) + x = layers.Dropout(0.5)(x, training=True) + model = training_lib.Model(inp, x) + # Would be `dropout/cond/Merge` by default + self.assertIn("dropout", model.output.op.name) + + # Test that argument is kept when applying the model + inp2 = layers.Input(shape=(2,)) + out2 = model(inp2) + self.assertIn("dropout", out2.op.name) + + # Test that argument is kept after loading a model + config = model.get_config() + model = training_lib.Model.from_config(config) + self.assertIn("dropout", model.output.op.name) + + def test_node_construction(self): + # test basics + a = layers.Input(shape=(32,), name="input_a") + b = layers.Input(shape=(32,), name="input_b") + + with self.assertRaises(ValueError): + _ = layers.Input(shape=(32,), batch_shape=(10, 32)) + with self.assertRaises(ValueError): + _ = layers.Input(shape=(32,), unknown_kwarg=None) + + self.assertListEqual(a.shape.as_list(), [None, 32]) + a_layer, a_node_index, a_tensor_index = a._keras_history + b_layer, _, _ = b._keras_history + self.assertEqual(len(a_layer._inbound_nodes), 1) + self.assertEqual(a_tensor_index, 0) + node = a_layer._inbound_nodes[a_node_index] + self.assertEqual(node.outbound_layer, a_layer) + + self.assertListEqual(node.inbound_layers, []) + self.assertListEqual(node.input_tensors, [a]) + self.assertListEqual(node.input_shapes, [(None, 32)]) + self.assertListEqual(node.output_tensors, [a]) + self.assertListEqual(node.output_shapes, [(None, 32)]) + + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + + self.assertEqual(len(dense._inbound_nodes), 2) + self.assertEqual(len(dense._outbound_nodes), 0) + self.assertEqual(dense._inbound_nodes[0].inbound_layers, a_layer) + self.assertEqual(dense._inbound_nodes[0].outbound_layer, dense) + self.assertEqual(dense._inbound_nodes[1].inbound_layers, b_layer) + self.assertEqual(dense._inbound_nodes[1].outbound_layer, dense) + self.assertIs(dense._inbound_nodes[0].input_tensors, a) + self.assertIs(dense._inbound_nodes[1].input_tensors, b) + + # test layer properties + test_layer = layers.Dense(16, name="test_layer") + a_test = test_layer(a) + self.assertListEqual(test_layer.kernel.shape.as_list(), [32, 16]) + self.assertIs(test_layer.input, a) + self.assertIs(test_layer.output, a_test) + self.assertEqual(test_layer.input_shape, (None, 32)) + self.assertEqual(test_layer.output_shape, (None, 16)) + + self.assertIs(dense.get_input_at(0), a) + self.assertIs(dense.get_input_at(1), b) + self.assertIs(dense.get_output_at(0), a_2) + self.assertIs(dense.get_output_at(1), b_2) + self.assertEqual(dense.get_input_shape_at(0), (None, 32)) + self.assertEqual(dense.get_input_shape_at(1), (None, 32)) + self.assertEqual(dense.get_output_shape_at(0), (None, 16)) + self.assertEqual(dense.get_output_shape_at(1), (None, 16)) + self.assertEqual(dense.get_input_mask_at(0), None) + self.assertEqual(dense.get_input_mask_at(1), None) + self.assertEqual(dense.get_output_mask_at(0), None) + self.assertEqual(dense.get_output_mask_at(1), None) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_multi_input_layer(self): + with self.cached_session(): + # test multi-input layer + a = layers.Input(shape=(32,), name="input_a") + b = layers.Input(shape=(32,), name="input_b") + + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + + merged = layers.concatenate([a_2, b_2], name="merge") + self.assertListEqual(merged.shape.as_list(), [None, 16 * 2]) + ( + merge_layer, + merge_node_index, + merge_tensor_index, + ) = merged._keras_history + + self.assertEqual(merge_node_index, 0) + self.assertEqual(merge_tensor_index, 0) + + self.assertEqual(len(merge_layer._inbound_nodes), 1) + self.assertEqual(len(merge_layer._outbound_nodes), 0) + + self.assertEqual( + len(merge_layer._inbound_nodes[0].input_tensors), 2 + ) + self.assertEqual( + len(merge_layer._inbound_nodes[0].inbound_layers), 2 + ) + + c = layers.Dense(64, name="dense_2")(merged) + d = layers.Dense(5, name="dense_3")(c) + + model = training_lib.Model( + inputs=[a, b], outputs=[c, d], name="model" + ) + self.assertEqual(len(model.layers), 6) + output_shapes = model.compute_output_shape([(None, 32), (None, 32)]) + self.assertListEqual(output_shapes[0].as_list(), [None, 64]) + self.assertListEqual(output_shapes[1].as_list(), [None, 5]) + self.assertListEqual( + model.compute_mask([a, b], [None, None]), [None, None] + ) + + # we don't check names of first 2 layers (inputs) because + # ordering of same-level layers is not fixed + self.assertListEqual( + [l.name for l in model.layers][2:], + ["dense_1", "merge", "dense_2", "dense_3"], + ) + self.assertListEqual( + [l.name for l in model._input_layers], ["input_a", "input_b"] + ) + self.assertListEqual( + [l.name for l in model._output_layers], ["dense_2", "dense_3"] + ) + + # actually run model + fn = backend.function(model.inputs, model.outputs) + input_a_np = np.random.random((10, 32)) + input_b_np = np.random.random((10, 32)) + fn_outputs = fn([input_a_np, input_b_np]) + self.assertListEqual( + [x.shape for x in fn_outputs], [(10, 64), (10, 5)] + ) + + # test get_source_inputs + self._assertAllIs(layer_utils.get_source_inputs(c), [a, b]) + + # serialization / deserialization + json_config = model.to_json() + recreated_model = models.model_from_json(json_config) + recreated_model.compile("rmsprop", "mse") + + self.assertListEqual( + [l.name for l in recreated_model.layers][2:], + ["dense_1", "merge", "dense_2", "dense_3"], + ) + self.assertListEqual( + [l.name for l in recreated_model._input_layers], + ["input_a", "input_b"], + ) + self.assertListEqual( + [l.name for l in recreated_model._output_layers], + ["dense_2", "dense_3"], + ) + + fn = backend.function( + recreated_model.inputs, recreated_model.outputs + ) + input_a_np = np.random.random((10, 32)) + input_b_np = np.random.random((10, 32)) + fn_outputs = fn([input_a_np, input_b_np]) + self.assertListEqual( + [x.shape for x in fn_outputs], [(10, 64), (10, 5)] + ) + + def test_multi_output_layer_output_names(self): + inp = layers.Input(name="inp", shape=(None,), dtype=tf.float32) + + class _MultiOutput(layers.Layer): + def call(self, x): + return x + 1.0, x + 2.0 + + out = _MultiOutput(name="out")(inp) + model = training_lib.Model(inp, out) + self.assertEqual(["out", "out_1"], model.output_names) + self.assertAllClose([2.0, 3.0], model(1.0)) + + def test_recursion(self): + with tf.Graph().as_default(), self.cached_session(): + a = layers.Input(shape=(32,), name="input_a") + b = layers.Input(shape=(32,), name="input_b") + + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + merged = layers.concatenate([a_2, b_2], name="merge") + c = layers.Dense(64, name="dense_2")(merged) + d = layers.Dense(5, name="dense_3")(c) + + model = training_lib.Model( + inputs=[a, b], outputs=[c, d], name="model" + ) + + e = layers.Input(shape=(32,), name="input_e") + f = layers.Input(shape=(32,), name="input_f") + self.assertEqual(len(model.inputs), 2) + g, h = model([e, f]) + self.assertEqual(len(model.inputs), 2) + self.assertEqual(g.name, "model/dense_2/BiasAdd:0") + + self.assertListEqual(g.shape.as_list(), c.shape.as_list()) + self.assertListEqual(h.shape.as_list(), d.shape.as_list()) + + # test separate manipulation of different layer outputs + i = layers.Dense(7, name="dense_4")(h) + + final_model = training_lib.Model( + inputs=[e, f], outputs=[i, g], name="final" + ) + self.assertEqual(len(final_model.inputs), 2) + self.assertEqual(len(final_model.outputs), 2) + self.assertEqual(len(final_model.layers), 4) + + # we don't check names of first 2 layers (inputs) because + # ordering of same-level layers is not fixed + self.assertListEqual( + [layer.name for layer in final_model.layers][2:], + ["model", "dense_4"], + ) + self.assertListEqual( + model.compute_mask([e, f], [None, None]), [None, None] + ) + self.assertListEqual( + final_model.compute_output_shape([(10, 32), (10, 32)]), + [(10, 7), (10, 64)], + ) + + # run recursive model + fn = backend.function(final_model.inputs, final_model.outputs) + input_a_np = np.random.random((10, 32)) + input_b_np = np.random.random((10, 32)) + fn_outputs = fn([input_a_np, input_b_np]) + self.assertListEqual( + [x.shape for x in fn_outputs], [(10, 7), (10, 64)] + ) + + # test serialization + model_config = final_model.get_config() + recreated_model = models.Model.from_config(model_config) + + fn = backend.function( + recreated_model.inputs, recreated_model.outputs + ) + input_a_np = np.random.random((10, 32)) + input_b_np = np.random.random((10, 32)) + fn_outputs = fn([input_a_np, input_b_np]) + self.assertListEqual( + [x.shape for x in fn_outputs], [(10, 7), (10, 64)] + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_multi_input_multi_output_recursion(self): + with self.cached_session(): + # test multi-input multi-output + a = layers.Input(shape=(32,), name="input_a") + b = layers.Input(shape=(32,), name="input_b") + + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + merged = layers.concatenate([a_2, b_2], name="merge") + c = layers.Dense(64, name="dense_2")(merged) + d = layers.Dense(5, name="dense_3")(c) + + model = training_lib.Model( + inputs=[a, b], outputs=[c, d], name="model" + ) + + j = layers.Input(shape=(32,), name="input_j") + k = layers.Input(shape=(32,), name="input_k") + _, n = model([j, k]) + + o = layers.Input(shape=(32,), name="input_o") + p = layers.Input(shape=(32,), name="input_p") + q, _ = model([o, p]) + + self.assertListEqual(n.shape.as_list(), [None, 5]) + self.assertListEqual(q.shape.as_list(), [None, 64]) + s = layers.concatenate([n, q], name="merge_nq") + self.assertListEqual(s.shape.as_list(), [None, 64 + 5]) + + # test with single output as 1-elem list + multi_io_model = training_lib.Model([j, k, o, p], [s]) + + fn = backend.function(multi_io_model.inputs, multi_io_model.outputs) + fn_outputs = fn( + [ + np.random.random((10, 32)), + np.random.random((10, 32)), + np.random.random((10, 32)), + np.random.random((10, 32)), + ] + ) + self.assertListEqual([x.shape for x in fn_outputs], [(10, 69)]) + + # test with single output as tensor + multi_io_model = training_lib.Model([j, k, o, p], s) + + fn = backend.function(multi_io_model.inputs, multi_io_model.outputs) + fn_outputs = fn( + [ + np.random.random((10, 32)), + np.random.random((10, 32)), + np.random.random((10, 32)), + np.random.random((10, 32)), + ] + ) + # note that the output of the function will still be a 1-elem list + self.assertListEqual([x.shape for x in fn_outputs], [(10, 69)]) + + # test serialization + model_config = multi_io_model.get_config() + recreated_model = models.Model.from_config(model_config) + + fn = backend.function( + recreated_model.inputs, recreated_model.outputs + ) + fn_outputs = fn( + [ + np.random.random((10, 32)), + np.random.random((10, 32)), + np.random.random((10, 32)), + np.random.random((10, 32)), + ] + ) + # note that the output of the function will still be a 1-elem list + self.assertListEqual([x.shape for x in fn_outputs], [(10, 69)]) + + config = model.get_config() + models.Model.from_config(config) + + model.summary() + json_str = model.to_json() + models.model_from_json(json_str) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_invalid_graphs(self): + a = layers.Input(shape=(32,), name="input_a") + b = layers.Input(shape=(32,), name="input_b") + + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + merged = layers.concatenate([a_2, b_2], name="merge") + c = layers.Dense(64, name="dense_2")(merged) + d = layers.Dense(5, name="dense_3")(c) + + model = training_lib.Model(inputs=[a, b], outputs=[c, d], name="model") + + # disconnected graph + j = layers.Input(shape=(32,), name="input_j") + k = layers.Input(shape=(32,), name="input_k") + m, n = model([j, k]) + with self.assertRaises(Exception): + training_lib.Model([j], [m, n]) + + # redundant outputs + j = layers.Input(shape=(32,), name="input_j") + k = layers.Input(shape=(32,), name="input_k") + m, n = model([j, k]) + + training_lib.Model([j, k], [m, n, n]) + + # redundant inputs + j = layers.Input(shape=(32,), name="input_j") + k = layers.Input(shape=(32,), name="input_k") + m, n = model([j, k]) + with self.assertRaises(Exception): + training_lib.Model([j, k, j], [m, n]) + + # i have not idea what I'm doing: garbage as inputs/outputs + j = layers.Input(shape=(32,), name="input_j") + k = layers.Input(shape=(32,), name="input_k") + m, n = model([j, k]) + with self.assertRaises(Exception): + training_lib.Model([j, k], [m, n, 0]) + + def test_raw_tf_compatibility(self): + with tf.Graph().as_default(): + # test calling layers/models on TF tensors + a = layers.Input(shape=(32,), name="input_a") + b = layers.Input(shape=(32,), name="input_b") + + dense = layers.Dense(16, name="dense_1") + a_2 = dense(a) + b_2 = dense(b) + merged = layers.concatenate([a_2, b_2], name="merge") + c = layers.Dense(64, name="dense_2")(merged) + d = layers.Dense(5, name="dense_3")(c) + + model = training_lib.Model( + inputs=[a, b], outputs=[c, d], name="model" + ) + + j = layers.Input(shape=(32,), name="input_j") + k = layers.Input(shape=(32,), name="input_k") + self.assertEqual(len(model.inputs), 2) + m, n = model([j, k]) + self.assertEqual(len(model.inputs), 2) + tf_model = training_lib.Model([j, k], [m, n]) + + j_tf = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 32)) + k_tf = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 32)) + m_tf, n_tf = tf_model([j_tf, k_tf]) + self.assertListEqual(m_tf.shape.as_list(), [None, 64]) + self.assertListEqual(n_tf.shape.as_list(), [None, 5]) + + # test merge + layers.concatenate([j_tf, k_tf], axis=1) + layers.add([j_tf, k_tf]) + + # test tensor input + x = tf.compat.v1.placeholder(shape=(None, 2), dtype=tf.float32) + layers.InputLayer(input_tensor=x) + + x = layers.Input(tensor=x) + layers.Dense(2)(x) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_basic_masking(self): + a = layers.Input(shape=(10, 32), name="input_a") + b = layers.Masking()(a) + model = training_lib.Model(a, b) + self.assertEqual(model.output_mask.shape.as_list(), [None, 10]) + + def testMaskingSingleInput(self): + class MaskedLayer(layers.Layer): + def call(self, inputs, mask=None): + if mask is not None: + return inputs * mask + return inputs + + def compute_mask(self, inputs, mask=None): + return tf.ones_like(inputs) + + if tf.executing_eagerly(): + a = tf.constant([2] * 32) + mask = tf.constant([0, 1] * 16) + a._keras_mask = mask + b = MaskedLayer()(a) + self.assertTrue(hasattr(b, "_keras_mask")) + self.assertAllEqual( + self.evaluate(tf.ones_like(mask)), + self.evaluate(getattr(b, "_keras_mask")), + ) + self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b)) + else: + x = input_layer_lib.Input(shape=(32,)) + y = MaskedLayer()(x) + network = functional.Functional(x, y) + + # test callability on Input + x_2 = input_layer_lib.Input(shape=(32,)) + y_2 = network(x_2) + self.assertEqual(y_2.shape.as_list(), [None, 32]) + + # test callability on regular tensor + x_2 = tf.compat.v1.placeholder(dtype="float32", shape=(None, 32)) + y_2 = network(x_2) + self.assertEqual(y_2.shape.as_list(), [None, 32]) + + def test_activity_regularization_with_model_composition(self): + def reg(x): + return tf.reduce_sum(x) + + net_a_input = input_layer_lib.Input((2,)) + net_a = net_a_input + net_a = layers.Dense( + 2, + kernel_initializer="ones", + use_bias=False, + activity_regularizer=reg, + )(net_a) + model_a = training_lib.Model([net_a_input], [net_a]) + + net_b_input = input_layer_lib.Input((2,)) + net_b = model_a(net_b_input) + model_b = training_lib.Model([net_b_input], [net_b]) + + model_b.compile(optimizer="sgd", loss=None) + x = np.ones((1, 2)) + loss = model_b.evaluate(x) + self.assertEqual(loss, 4.0) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_layer_sharing_at_heterogenous_depth(self): + x_val = np.random.random((10, 5)) + + x = input_layer_lib.Input(shape=(5,)) + a = layers.Dense(5, name="A") + b = layers.Dense(5, name="B") + output = a(b(a(b(x)))) + m = training_lib.Model(x, output) + m.run_eagerly = test_utils.should_run_eagerly() + + output_val = m.predict(x_val) + + config = m.get_config() + weights = m.get_weights() + + m2 = models.Model.from_config(config) + m2.set_weights(weights) + + output_val_2 = m2.predict(x_val) + self.assertAllClose(output_val, output_val_2, atol=1e-6) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_layer_sharing_at_heterogenous_depth_with_concat(self): + input_shape = (16, 9, 3) + input_layer = input_layer_lib.Input(shape=input_shape) + + a = layers.Dense(3, name="dense_A") + b = layers.Dense(3, name="dense_B") + c = layers.Dense(3, name="dense_C") + + x1 = b(a(input_layer)) + x2 = a(c(input_layer)) + output = layers.concatenate([x1, x2]) + + m = training_lib.Model(inputs=input_layer, outputs=output) + m.run_eagerly = test_utils.should_run_eagerly() + + x_val = np.random.random((10, 16, 9, 3)) + output_val = m.predict(x_val) + + config = m.get_config() + weights = m.get_weights() + + m2 = models.Model.from_config(config) + m2.set_weights(weights) + + output_val_2 = m2.predict(x_val) + self.assertAllClose(output_val, output_val_2, atol=1e-6) + + def test_layer_sharing_maintains_node_order(self): + # See https://github.com/keras-team/keras/issues/14838. + inp = input_layer_lib.Input(shape=[5], name="main_input") + + shared_layer = layers.Layer(name="shared") + + ones_result = shared_layer(tf.ones_like(inp)) + zeros_result = shared_layer(tf.zeros_like(inp)) + zeros_result = layers.Layer(name="blank")(zeros_result) + + m = training_lib.Model( + inputs=[inp], outputs=[zeros_result, ones_result] + ) + m2 = models.Model.from_config(m.get_config()) + self.assertAllClose( + m2.predict_on_batch(tf.zeros([1, 5])), + m.predict_on_batch(tf.zeros([1, 5])), + ) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_explicit_training_argument(self): + a = layers.Input(shape=(2,)) + b = layers.Dropout(0.5)(a) + base_model = training_lib.Model(a, b) + + a = layers.Input(shape=(2,)) + b = base_model(a, training=False) + model = training_lib.Model(a, b) + + x = np.ones((100, 2)) + y = np.ones((100, 2)) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + loss = model.train_on_batch(x, y) + self.assertEqual( + loss, 0 + ) # In inference mode, output is equal to input. + + a = layers.Input(shape=(2,)) + b = base_model(a, training=True) + model = training_lib.Model(a, b) + preds = model.predict(x) + self.assertEqual(np.min(preds), 0.0) # At least one unit was dropped. + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_mask_derived_from_keras_layer(self): + inputs = input_layer_lib.Input((5, 10)) + mask = input_layer_lib.Input((5,)) + outputs = layers.RNN(layers.LSTMCell(100))(inputs, mask=mask) + model = training_lib.Model([inputs, mask], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[np.ones((10, 5, 10)), np.zeros((10, 5))], + y=np.zeros((10, 100)), + batch_size=2, + ) + # All data is masked, returned values are 0's. + self.assertEqual(history.history["loss"][0], 0.0) + history = model.fit( + x=[np.ones((10, 5, 10)), np.ones((10, 5))], + y=np.zeros((10, 100)), + batch_size=2, + ) + # Data is not masked, returned values are random. + self.assertGreater(history.history["loss"][0], 0.0) + + model = training_lib.Model.from_config(model.get_config()) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[np.ones((10, 5, 10)), np.zeros((10, 5))], + y=np.zeros((10, 100)), + batch_size=2, + ) + # All data is masked, returned values are 0's. + self.assertEqual(history.history["loss"][0], 0.0) + history = model.fit( + x=[np.ones((10, 5, 10)), np.ones((10, 5))], + y=np.zeros((10, 100)), + batch_size=2, + ) + # Data is not masked, returned values are random. + self.assertGreater(history.history["loss"][0], 0.0) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_call_arg_derived_from_keras_layer(self): + class MyAdd(layers.Layer): + def call(self, x1, x2): + return x1 + x2 + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + outputs = MyAdd()(input1, input2) + model = training_lib.Model([input1, input2], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2, + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + # Check serialization. + model = training_lib.Model.from_config( + model.get_config(), custom_objects={"MyAdd": MyAdd} + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2, + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate( + test_combinations.keras_mode_combinations(mode="eager"), + ) + def test_only_some_in_first_arg_derived_from_keras_layer_keras_tensors( + self, + ): + # This functionality is unsupported in v1 graphs + + class MyAddAll(layers.Layer): + def call(self, inputs): + x = inputs[0] + for inp in inputs[1:]: + if inp is not None: + x = x + inp + return x + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + layer = MyAddAll() + outputs = layer([0.0, input1, None, input2, None]) + model = training_lib.Model([input1, input2], outputs) + self.assertIn(layer, model.layers) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2, + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + # Check serialization. + model = training_lib.Model.from_config( + model.get_config(), custom_objects={"MyAddAll": MyAddAll} + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2, + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_mode_combinations(), + test_combinations.combine(share_already_used_layer=[True, False]), + ) + ) + def test_call_kwarg_derived_from_keras_layer( + self, share_already_used_layer + ): + class MaybeAdd(layers.Layer): + def call(self, x1, x2=None): + if x2 is not None: + return x1 + x2 + return x1 + + class IdentityLayer(layers.Layer): + def call(self, x): + return x + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + identity_layer = IdentityLayer() + + if share_already_used_layer: + # We have had model serialization/deserialization break in the past: + # when a layer was previously used to construct other functional + # models and had a non-empty list of inbound nodes before being used + # to define the model being serialized/deserialized. (The + # serialization/deserialization was not correctly adjusting the + # node_index serialization/deserialization). So, we explicitly test + # this case. + training_lib.Model([input1], identity_layer(input1)) + + outputs = MaybeAdd()(input1, x2=identity_layer(input2)) + model = training_lib.Model([input1, input2], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2, + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + model = training_lib.Model.from_config( + model.get_config(), + custom_objects={ + "MaybeAdd": MaybeAdd, + "IdentityLayer": IdentityLayer, + }, + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2, + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_call_kwarg_dtype_serialization(self): + class Double(layers.Layer): + def call(self, x1, dtype=None): + return tf.cast(x1 + x1, dtype=dtype) + + input1 = input_layer_lib.Input(10) + outputs = Double()(input1, dtype=tf.float16) + model = training_lib.Model([input1], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10))], y=6 * np.ones((10, 10)), batch_size=2 + ) + # Check that input was correctly doubled. + self.assertEqual(history.history["loss"][0], 0.0) + + # Check the output dtype + self.assertEqual(model(tf.ones((3, 10))).dtype, tf.float16) + + model = training_lib.Model.from_config( + model.get_config(), custom_objects={"Double": Double} + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10))], y=6 * np.ones((10, 10)), batch_size=2 + ) + # Check that input was correctly doubled. + self.assertEqual(history.history["loss"][0], 0.0) + + # Check the output dtype + self.assertEqual(model(tf.ones((3, 10))).dtype, tf.float16) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_call_kwarg_nonserializable(self): + class Double(layers.Layer): + def call(self, x1, kwarg=None): + return x1 + x1 + + class NonSerializable: + def __init__(self, foo=None): + self.foo = foo + + input1 = input_layer_lib.Input(10) + outputs = Double()(input1, kwarg=NonSerializable()) + model = training_lib.Model([input1], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10))], y=6 * np.ones((10, 10)), batch_size=2 + ) + # Check that input was correctly doubled. + self.assertEqual(history.history["loss"][0], 0.0) + with self.assertRaisesRegex( + TypeError, + "Layer double was passed non-JSON-serializable arguments.", + ): + model.get_config() + + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_mode_combinations(), + test_combinations.combine(share_already_used_layer=[True, False]), + ) + ) + def test_call_kwarg_derived_from_keras_layer_and_first_arg_is_constant( + self, share_already_used_layer + ): + class IdentityLayer(layers.Layer): + def call(self, x): + return x + + class MaybeAdd(layers.Layer): + def call(self, x1, x2=None): + if x2 is not None: + return x1 + x2 + return x1 + + input2 = input_layer_lib.Input(10) + identity_layer = IdentityLayer() + if share_already_used_layer: + # We have had model serialization/deserialization break in the past: + # when a layer was previously used to construct other functional + # models and had a non-empty list of inbound nodes before being used + # to define the model being serialized/deserialized. (The + # serialization/deserialization was not correctly adjusting the + # node_index serialization/deserialization). So, we explicitly test + # this case. + training_lib.Model([input2], identity_layer(input2)) + + outputs = MaybeAdd()(3.0, x2=identity_layer(input2)) + model = training_lib.Model([input2], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=7 * np.ones((10, 10)), y=10 * np.ones((10, 10)), batch_size=2 + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + model = training_lib.Model.from_config( + model.get_config(), + custom_objects={ + "MaybeAdd": MaybeAdd, + "IdentityLayer": IdentityLayer, + }, + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=7 * np.ones((10, 10)), y=10 * np.ones((10, 10)), batch_size=2 + ) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_dont_cast_composite_unless_necessary(self): + if not tf.executing_eagerly(): + # Creating Keras inputs from a type_spec only supported in eager. + return + + # TODO(edloper): Change this to tf.experimental.ExtensionTyep once + # it's been released. + class MyType(extension_type.ExtensionType): + # TODO(edloper) Remove _shape and _dtype once Keras has been + # switched to use .shape and .dtype instead. + value: tf.Tensor + _shape = property(lambda self: self.value.shape) + shape = property(lambda self: self.value.shape) + _dtype = property(lambda self: self.value.dtype) + dtype = property(lambda self: self.value.dtype) + + class Spec: + _shape = property(lambda self: self.value.shape) + shape = property(lambda self: self.value.shape) + _dtype = property(lambda self: self.value.dtype) + dtype = property(lambda self: self.value.dtype) + + my_spec = MyType.Spec(tf.TensorSpec([5], tf.float32)) + input1 = input_layer_lib.Input(type_spec=my_spec) + model = training_lib.Model([input1], input1) + model.compile(run_eagerly=test_utils.should_run_eagerly()) + model(MyType([1.0, 2.0, 3.0, 4.0, 5.0])) # Does not require cast. + with self.assertRaises((ValueError, TypeError)): + model(MyType([1, 2, 3, 4, 5])) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_composite_call_kwarg_derived_from_keras_layer(self): + + # Create a test layer that accepts composite tensor inputs. + class MaybeAdd(layers.Layer): + def call(self, x1, x2=None): + # We need to convert this to a tensor for loss calculations - + # losses don't play nicely with ragged tensors yet. + if x2 is not None: + return (x1 + x2).to_tensor(default_value=0) + return x1.to_tensor(default_value=0) + + input1 = input_layer_lib.Input((None,), ragged=True) + input2 = input_layer_lib.Input((None,), ragged=True) + outputs = MaybeAdd()(input1, x2=input2) + model = training_lib.Model([input1, input2], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + input_data = [ + tf.ragged.constant([[3.0, 3.0], [3.0, 3.0], [3.0]]), + tf.ragged.constant([[7.0, 7.0], [7.0, 7.0], [7.0]]), + ] + expected_data = np.array([[10.0, 10.0], [10.0, 10.0], [10.0, 0.0]]) + + history = model.fit(x=input_data, y=expected_data) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + model = training_lib.Model.from_config( + model.get_config(), custom_objects={"MaybeAdd": MaybeAdd} + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit(x=input_data, y=expected_data) + # Check that second input was correctly added to first. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate( + test_combinations.keras_mode_combinations(mode="eager") + ) + def test_call_some_not_all_nested_in_first_arg_derived_from_keras_layer( + self, + ): + # This functionality is unsupported in v1 graphs + + class AddAll(layers.Layer): + def call(self, x1_x2, x3): + x1, x2 = x1_x2 + out = x1 + x2 + if x3 is not None: + for t in x3.values(): + out += t + return out + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + input3 = input_layer_lib.Input(10) + + layer = AddAll() + outputs = layer( + [input1, 4 * tf.ones((1, 10))], + x3={"a": input2, "b": input3, "c": 5 * tf.ones((1, 10))}, + ) + model = training_lib.Model([input1, input2, input3], outputs) + self.assertIn(layer, model.layers) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], + y=15 * np.ones((10, 10)), + batch_size=2, + ) + # Check that all inputs were correctly added. + self.assertEqual(history.history["loss"][0], 0.0) + + model = training_lib.Model.from_config( + model.get_config(), custom_objects={"AddAll": AddAll} + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], + y=15 * np.ones((10, 10)), + batch_size=2, + ) + # Check that all inputs were correctly added. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_call_nested_arg_derived_from_keras_layer(self): + class AddAll(layers.Layer): + def call(self, x1, x2, x3=None): + out = x1 + x2 + if x3 is not None: + for t in x3.values(): + out += t + return out + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + input3 = input_layer_lib.Input(10) + outputs = AddAll()( + input1, + 4 * tf.ones((1, 10)), + x3={"a": input2, "b": input3, "c": 5 * tf.ones((1, 10))}, + ) + model = training_lib.Model([input1, input2, input3], outputs) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], + y=15 * np.ones((10, 10)), + batch_size=2, + ) + # Check that all inputs were correctly added. + self.assertEqual(history.history["loss"][0], 0.0) + + model = training_lib.Model.from_config( + model.get_config(), custom_objects={"AddAll": AddAll} + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + history = model.fit( + x=[np.ones((10, 10)), 2 * np.ones((10, 10)), 3 * np.ones((10, 10))], + y=15 * np.ones((10, 10)), + batch_size=2, + ) + # Check that all inputs were correctly added. + self.assertEqual(history.history["loss"][0], 0.0) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_multi_output_model_with_none_masking(self): + def func(x): + return [x * 0.2, x * 0.3] + + def output_shape(input_shape): + return [input_shape, input_shape] + + i = layers.Input(shape=(3, 2, 1)) + o = layers.Lambda(function=func, output_shape=output_shape)(i) + + self.assertEqual(backend.int_shape(o[0]), (None, 3, 2, 1)) + self.assertEqual(backend.int_shape(o[1]), (None, 3, 2, 1)) + + o = layers.add(o) + model = training_lib.Model(i, o) + model.run_eagerly = test_utils.should_run_eagerly() + + i2 = layers.Input(shape=(3, 2, 1)) + o2 = model(i2) + model2 = training_lib.Model(i2, o2) + model2.run_eagerly = test_utils.should_run_eagerly() + + x = np.random.random((4, 3, 2, 1)) + out = model2.predict(x) + assert out.shape == (4, 3, 2, 1) + self.assertAllClose(out, x * 0.2 + x * 0.3, atol=1e-4) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_constant_initializer_with_numpy(self): + initializer = tf.compat.v1.constant_initializer(np.ones((3, 2))) + model = sequential.Sequential() + model.add( + layers.Dense(2, input_shape=(3,), kernel_initializer=initializer) + ) + model.add(layers.Dense(3)) + model.compile( + loss="mse", + optimizer="sgd", + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + json_str = model.to_json() + models.model_from_json(json_str) + + def test_subclassed_error_if_init_not_called(self): + class MyNetwork(training_lib.Model): + def __init__(self): + self._foo = [layers.Dense(10), layers.Dense(10)] + + with self.assertRaisesRegex(RuntimeError, "forgot to call"): + MyNetwork() + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_int_input_shape(self): inputs = input_layer_lib.Input(10) - outputs = layers.Dense(1)(inputs) - model = functional.Functional(inputs, outputs) - d['model'] = model - else: - model = d['model'] - - return model(x) - - x = tf.ones((10, 10)) - y = fn(x) - self.assertEqual(y.shape.as_list(), [10, 1]) - - def test_save_spec(self): - """Tests that functional model generates the correct save spec.""" - - class MultiInputModel(training_lib.Model): - - def call(self, x, y): - return x - - inp = input_layer_lib.Input(shape=(1,)) - inp2 = input_layer_lib.Input(shape=(1,), batch_size=5, dtype=tf.int32) - out = MultiInputModel()(inp, inp2) - m = training_lib.Model(inputs={'x': inp, 'y': inp2}, outputs=out) - input_spec = m.save_spec(dynamic_batch=False)[0][0] - self.assertIn('x', input_spec) - self.assertIn('y', input_spec) - self.assertAllEqual([None, 1], input_spec['x'].shape.as_list()) - self.assertAllEqual(tf.float32, input_spec['x'].dtype) - self.assertAllEqual([5, 1], input_spec['y'].shape.as_list()) - self.assertAllEqual(tf.int32, input_spec['y'].dtype) + self.assertEqual([None, 10], inputs.shape.as_list()) + + inputs_with_batch = input_layer_lib.Input(batch_size=20, shape=5) + self.assertEqual([20, 5], inputs_with_batch.shape.as_list()) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_model_initialization(self): + # Functional model + inputs = input_layer_lib.Input(shape=(32,)) + outputs = layers.Dense(4)(inputs) + + with self.assertRaisesRegex( + TypeError, "Keyword argument not understood" + ): + model = training_lib.Model( + inputs, outputs, name="m", trainable=False, dtype="int64" + ) + with self.assertRaisesRegex( + TypeError, "Keyword argument not understood" + ): + model = training_lib.Model( + inputs, outputs, name="m", trainable=False, dynamic=False + ) + + model = training_lib.Model(inputs, outputs, name="m", trainable=False) + self.assertEqual("m", model.name) + self.assertFalse(model.trainable) + self.assertFalse(model.dynamic) + + class SubclassModel(training_lib.Model): + pass + + # Subclassed model + model = SubclassModel( + name="subclassed", trainable=True, dtype="int64", dynamic=True + ) + self.assertEqual("subclassed", model.name) + self.assertTrue(model.dynamic) + self.assertTrue(model.trainable) + w = model.add_weight( + "w", [], initializer=tf.compat.v1.constant_initializer(1) + ) + self.assertEqual(tf.int64, w.dtype) + + def test_disconnected_inputs(self): + input_tensor1 = input_layer_lib.Input(shape=[200], name="a") + input_tensor2 = input_layer_lib.Input(shape=[10], name="b") + output_tensor1 = layers.Dense(units=10)(input_tensor1) + + net = functional.Functional( + inputs=[input_tensor1, input_tensor2], outputs=[output_tensor1] + ) + net2 = functional.Functional.from_config(net.get_config()) + self.assertLen(net2.inputs, 2) + self.assertEqual("a", net2.layers[0].name) + self.assertEqual("b", net2.layers[1].name) + + @test_combinations.generate( + test_combinations.keras_model_type_combinations() + ) + def test_dependency_tracking(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.trackable = Checkpoint() + self.assertIn("trackable", model._unconditional_dependency_names) + self.assertEqual(model.trackable, model._lookup_dependency("trackable")) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_model_construction_in_tf_function(self): + + d = {"model": None} + + @tf.function + def fn(x): + if d["model"] is None: + # Check that Functional can be built in a `tf.function`. + inputs = input_layer_lib.Input(10) + outputs = layers.Dense(1)(inputs) + model = functional.Functional(inputs, outputs) + d["model"] = model + else: + model = d["model"] + + return model(x) + + x = tf.ones((10, 10)) + y = fn(x) + self.assertEqual(y.shape.as_list(), [10, 1]) + + def test_save_spec(self): + """Tests that functional model generates the correct save spec.""" + + class MultiInputModel(training_lib.Model): + def call(self, x, y): + return x + + inp = input_layer_lib.Input(shape=(1,)) + inp2 = input_layer_lib.Input(shape=(1,), batch_size=5, dtype=tf.int32) + out = MultiInputModel()(inp, inp2) + m = training_lib.Model(inputs={"x": inp, "y": inp2}, outputs=out) + input_spec = m.save_spec(dynamic_batch=False)[0][0] + self.assertIn("x", input_spec) + self.assertIn("y", input_spec) + self.assertAllEqual([None, 1], input_spec["x"].shape.as_list()) + self.assertAllEqual(tf.float32, input_spec["x"].dtype) + self.assertAllEqual([5, 1], input_spec["y"].shape.as_list()) + self.assertAllEqual(tf.int32, input_spec["y"].dtype) + + def test_layer_ordering_checkpoint_compatibility(self): + class MLPKeras(layers.Layer): + def __init__(self, name: str) -> None: + super(MLPKeras, self).__init__(name=name) + self.layer_1 = layers.Dense( + 10, activation="relu", name=f"{name}_dense_1" + ) + self.layer_2 = layers.Dense( + 10, activation="relu", name=f"{name}_dense_2" + ) + + def call(self, inputs: tf.Tensor) -> tf.Tensor: + return self.layer_2(self.layer_1(inputs)) + + mlp_keras_1 = MLPKeras("mlp_1") + mlp_keras_2 = MLPKeras("mlp_2") + + inputs = input_layer_lib.Input((5,)) + + # Make model which is the sum of two MLPs. + outputs_1 = mlp_keras_1(inputs) + mlp_keras_2(inputs) + functional_model_1 = functional.Functional( + inputs=inputs, outputs=outputs_1 + ) + + ckpt_1 = Checkpoint(model=functional_model_1) + filepath = tf.io.gfile.join(self.get_temp_dir(), "model_1_ckpt") + ckpt_path = ckpt_1.save(filepath) + + # Swap order of MLPs. + outputs_2 = mlp_keras_2(inputs) + mlp_keras_1(inputs) + functional_model_2 = functional.Functional( + inputs=inputs, outputs=outputs_2 + ) + Checkpoint(model=functional_model_2).restore( + ckpt_path + ).assert_consumed() class DeferredModeTest(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testSimpleNetworkBuilding(self): - inputs = input_layer_lib.Input(shape=(32,)) - if tf.executing_eagerly(): - self.assertEqual(inputs.dtype.name, 'float32') - self.assertEqual(inputs.shape.as_list(), [None, 32]) - - x = layers.Dense(2)(inputs) - if tf.executing_eagerly(): - self.assertEqual(x.dtype.name, 'float32') - self.assertEqual(x.shape.as_list(), [None, 2]) - - outputs = layers.Dense(4)(x) - network = functional.Functional(inputs, outputs) - self.assertIsInstance(network, functional.Functional) - - if tf.executing_eagerly(): - # It should be possible to call such a network on EagerTensors. - inputs = tf.constant( - np.random.random((10, 32)).astype('float32')) - outputs = network(inputs) - self.assertEqual(outputs.shape.as_list(), [10, 4]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testMultiIONetworkBuilding(self): - input_a = input_layer_lib.Input(shape=(32,)) - input_b = input_layer_lib.Input(shape=(16,)) - a = layers.Dense(16)(input_a) - - class AddLayer(layers.Layer): - - def call(self, inputs): - return inputs[0] + inputs[1] - - c = AddLayer()([a, input_b]) # pylint: disable=not-callable - c = layers.Dense(2)(c) - - network = functional.Functional([input_a, input_b], [a, c]) - if tf.executing_eagerly(): - a_val = tf.constant( - np.random.random((10, 32)).astype('float32')) - b_val = tf.constant( - np.random.random((10, 16)).astype('float32')) - outputs = network([a_val, b_val]) - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].shape.as_list(), [10, 16]) - self.assertEqual(outputs[1].shape.as_list(), [10, 2]) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testSimpleNetworkBuilding(self): + inputs = input_layer_lib.Input(shape=(32,)) + if tf.executing_eagerly(): + self.assertEqual(inputs.dtype.name, "float32") + self.assertEqual(inputs.shape.as_list(), [None, 32]) + + x = layers.Dense(2)(inputs) + if tf.executing_eagerly(): + self.assertEqual(x.dtype.name, "float32") + self.assertEqual(x.shape.as_list(), [None, 2]) + + outputs = layers.Dense(4)(x) + network = functional.Functional(inputs, outputs) + self.assertIsInstance(network, functional.Functional) + + if tf.executing_eagerly(): + # It should be possible to call such a network on EagerTensors. + inputs = tf.constant(np.random.random((10, 32)).astype("float32")) + outputs = network(inputs) + self.assertEqual(outputs.shape.as_list(), [10, 4]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testMultiIONetworkBuilding(self): + input_a = input_layer_lib.Input(shape=(32,)) + input_b = input_layer_lib.Input(shape=(16,)) + a = layers.Dense(16)(input_a) + + class AddLayer(layers.Layer): + def call(self, inputs): + return inputs[0] + inputs[1] + + c = AddLayer()([a, input_b]) + c = layers.Dense(2)(c) + + network = functional.Functional([input_a, input_b], [a, c]) + if tf.executing_eagerly(): + a_val = tf.constant(np.random.random((10, 32)).astype("float32")) + b_val = tf.constant(np.random.random((10, 16)).astype("float32")) + outputs = network([a_val, b_val]) + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].shape.as_list(), [10, 16]) + self.assertEqual(outputs[1].shape.as_list(), [10, 2]) class DefaultShapeInferenceBehaviorTest(test_combinations.TestCase): - - def _testShapeInference(self, model, input_shape, expected_output_shape): - input_value = np.random.random(input_shape) - output_value = model.predict(input_value) - self.assertEqual(output_value.shape, expected_output_shape) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testSingleInputCase(self): - - class LayerWithOneInput(layers.Layer): - - def build(self, input_shape): - self.w = tf.ones(shape=(3, 4)) - - def call(self, inputs): - return backend.dot(inputs, self.w) - - inputs = input_layer_lib.Input(shape=(3,)) - layer = LayerWithOneInput() - - if tf.executing_eagerly(): - self.assertEqual( - layer.compute_output_shape((None, 3)).as_list(), [None, 4]) - # As a side-effect, compute_output_shape builds the layer. - self.assertTrue(layer.built) - # We can still query the layer's compute_output_shape with compatible - # input shapes. - self.assertEqual( - layer.compute_output_shape((6, 3)).as_list(), [6, 4]) - - outputs = layer(inputs) - model = training_lib.Model(inputs, outputs) - self._testShapeInference(model, (2, 3), (2, 4)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testMultiInputOutputCase(self): - - class MultiInputOutputLayer(layers.Layer): - - def build(self, input_shape): - self.w = tf.ones(shape=(3, 4)) - - def call(self, inputs): - a = backend.dot(inputs[0], self.w) - b = a + inputs[1] - return [a, b] - - input_a = input_layer_lib.Input(shape=(3,)) - input_b = input_layer_lib.Input(shape=(4,)) - output_a, output_b = MultiInputOutputLayer()([input_a, input_b]) - model = training_lib.Model([input_a, input_b], [output_a, output_b]) - output_a_val, output_b_val = model.predict( - [np.random.random((2, 3)), np.random.random((2, 4))]) - self.assertEqual(output_a_val.shape, (2, 4)) - self.assertEqual(output_b_val.shape, (2, 4)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testTrainingArgument(self): - - class LayerWithTrainingArg(layers.Layer): - - def build(self, input_shape): - self.w = tf.ones(shape=(3, 4)) - - def call(self, inputs, training): - return backend.dot(inputs, self.w) - - inputs = input_layer_lib.Input(shape=(3,)) - outputs = LayerWithTrainingArg()(inputs, training=False) - model = training_lib.Model(inputs, outputs) - self._testShapeInference(model, (2, 3), (2, 4)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoneInShape(self): - - class Model(training_lib.Model): - - def __init__(self): - super().__init__() - self.conv1 = layers.Conv2D(8, 3) - self.pool = layers.GlobalAveragePooling2D() - self.fc = layers.Dense(3) - - def call(self, x): - x = self.conv1(x) - x = self.pool(x) - x = self.fc(x) - return x - - model = Model() - model.build(tf.TensorShape((None, None, None, 1))) - self.assertTrue(model.built, 'Model should be built') - self.assertTrue(model.weights, - 'Model should have its weights created as it ' - 'has been built') - sample_input = tf.ones((1, 10, 10, 1)) - output = model(sample_input) - self.assertEqual(output.shape, (1, 3)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoneInShapeWithCompoundModel(self): - - class BasicBlock(training_lib.Model): - - def __init__(self): - super().__init__() - self.conv1 = layers.Conv2D(8, 3) - self.pool = layers.GlobalAveragePooling2D() - self.dense = layers.Dense(3) - - def call(self, x): - x = self.conv1(x) - x = self.pool(x) - x = self.dense(x) - return x - - class CompoundModel(training_lib.Model): - - def __init__(self): - super().__init__() - self.block = BasicBlock() - - def call(self, x): - x = self.block(x) # pylint: disable=not-callable - return x - - model = CompoundModel() - model.build(tf.TensorShape((None, None, None, 1))) - self.assertTrue(model.built, 'Model should be built') - self.assertTrue(model.weights, - 'Model should have its weights created as it ' - 'has been built') - sample_input = tf.ones((1, 10, 10, 1)) - output = model(sample_input) # pylint: disable=not-callable - self.assertEqual(output.shape, (1, 3)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoneInShapeWithFunctionalAPI(self): - - class BasicBlock(training_lib.Model): - # Inheriting from layers.Layer since we are calling this layer - # inside a model created using functional API. - - def __init__(self): - super().__init__() - self.conv1 = layers.Conv2D(8, 3) - - def call(self, x): - x = self.conv1(x) - return x - - input_layer = layers.Input(shape=(None, None, 1)) - x = BasicBlock()(input_layer) - x = layers.GlobalAveragePooling2D()(x) - output_layer = layers.Dense(3)(x) - - model = training_lib.Model(inputs=input_layer, outputs=output_layer) - - model.build(tf.TensorShape((None, None, None, 1))) - self.assertTrue(model.built, 'Model should be built') - self.assertTrue(model.weights, - 'Model should have its weights created as it ' - 'has been built') - sample_input = tf.ones((1, 10, 10, 1)) - output = model(sample_input) - self.assertEqual(output.shape, (1, 3)) - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def test_sequential_as_downstream_of_masking_layer(self): - inputs = layers.Input(shape=(3, 4)) - x = layers.Masking(mask_value=0., input_shape=(3, 4))(inputs) - - s = sequential.Sequential() - s.add(layers.Dense(5, input_shape=(4,))) - - x = layers.TimeDistributed(s)(x) - model = training_lib.Model(inputs=inputs, outputs=x) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - model_input = np.random.randint( - low=1, high=5, size=(10, 3, 4)).astype('float32') - for i in range(4): - model_input[i, i:, :] = 0. - model.fit(model_input, - np.random.random((10, 3, 5)), epochs=1, batch_size=6) - - if not tf.executing_eagerly(): - # Note: this doesn't work in eager due to DeferredTensor/ops compatibility - # issue. - mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)] - mask_outputs += [model.layers[2].compute_mask( - model.layers[2].input, mask_outputs[-1])] - func = backend.function([model.input], mask_outputs) - mask_outputs_val = func([model_input]) - self.assertAllClose(mask_outputs_val[0], np.any(model_input, axis=-1)) - self.assertAllClose(mask_outputs_val[1], np.any(model_input, axis=-1)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_external_keras_serialization_compat_input_layers(self): - inputs = input_layer_lib.Input(shape=(10,)) - outputs = layers.Dense(1)(inputs) - model = training_lib.Model(inputs, outputs) - config = model.get_config() - # Checks that single inputs and outputs are still saved as 1-element lists. - # Saving as 1-element lists or not is equivalent in TF Keras, but only the - # 1-element list format is supported in TF.js and keras-team/Keras. - self.assertLen(config['input_layers'], 1) - self.assertLen(config['output_layers'], 1) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_external_keras_serialization_compat_inbound_nodes(self): - # Check single Tensor input. - inputs = input_layer_lib.Input(shape=(10,), name='in') - outputs = layers.Dense(1)(inputs) - model = training_lib.Model(inputs, outputs) - config = model.get_config() - self.assertEqual(config['layers'][1]['inbound_nodes'], [[['in', 0, 0, {}]]]) - - # Check multiple Tensor input. - inputs1 = input_layer_lib.Input(shape=(10,), name='in1') - inputs2 = input_layer_lib.Input(shape=(10,), name='in2') - outputs = layers.Add()([inputs1, inputs2]) - model = training_lib.Model([inputs1, inputs2], outputs) - config = model.get_config() - self.assertEqual(config['layers'][2]['inbound_nodes'], - [[['in1', 0, 0, {}], ['in2', 0, 0, {}]]]) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_dict_inputs_tensors(self): - # Note that this test is running with v2 eager only, since the v1 - # will behave differently wrt to dict input for training. - inputs = { - 'sentence2': input_layer_lib.Input( - shape=(), name='a', dtype=tf.string), - 'sentence1': input_layer_lib.Input( - shape=(), name='b', dtype=tf.string), - } - strlen = layers.Lambda(tf.strings.length) - diff = layers.Subtract()( - [strlen(inputs['sentence1']), strlen(inputs['sentence2'])]) - diff = tf.cast(diff, tf.float32) - model = training_lib.Model(inputs, diff) - - extra_keys = { - 'sentence1': tf.constant(['brown fox', 'lazy dog']), - 'sentence2': tf.constant(['owl', 'cheeky cat']), - 'label': tf.constant([0, 1]), - } - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - model(extra_keys) - self.assertIn('ignored by the model', str(w[-1].message)) - - model.compile('sgd', 'mse') - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - model.fit(extra_keys, y=tf.constant([0, 1]), steps_per_epoch=1) - self.assertIn('ignored by the model', str(w[-1].message)) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - model.evaluate(extra_keys, tf.constant([0, 1])) - self.assertIn('ignored by the model', str(w[-1].message)) - - # Make sure the model inputs are sorted with the dict keys. - self.assertEqual(model.inputs[0]._keras_history.layer.name, 'b') - self.assertEqual(model.inputs[1]._keras_history.layer.name, 'a') + def _testShapeInference(self, model, input_shape, expected_output_shape): + input_value = np.random.random(input_shape) + output_value = model.predict(input_value) + self.assertEqual(output_value.shape, expected_output_shape) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testSingleInputCase(self): + class LayerWithOneInput(layers.Layer): + def build(self, input_shape): + self.w = tf.ones(shape=(3, 4)) + + def call(self, inputs): + return backend.dot(inputs, self.w) + + inputs = input_layer_lib.Input(shape=(3,)) + layer = LayerWithOneInput() + + if tf.executing_eagerly(): + self.assertEqual( + layer.compute_output_shape((None, 3)).as_list(), [None, 4] + ) + # As a side-effect, compute_output_shape builds the layer. + self.assertTrue(layer.built) + # We can still query the layer's compute_output_shape with + # compatible input shapes. + self.assertEqual( + layer.compute_output_shape((6, 3)).as_list(), [6, 4] + ) + + outputs = layer(inputs) + model = training_lib.Model(inputs, outputs) + self._testShapeInference(model, (2, 3), (2, 4)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testMultiInputOutputCase(self): + class MultiInputOutputLayer(layers.Layer): + def build(self, input_shape): + self.w = tf.ones(shape=(3, 4)) + + def call(self, inputs): + a = backend.dot(inputs[0], self.w) + b = a + inputs[1] + return [a, b] + + input_a = input_layer_lib.Input(shape=(3,)) + input_b = input_layer_lib.Input(shape=(4,)) + output_a, output_b = MultiInputOutputLayer()([input_a, input_b]) + model = training_lib.Model([input_a, input_b], [output_a, output_b]) + output_a_val, output_b_val = model.predict( + [np.random.random((2, 3)), np.random.random((2, 4))] + ) + self.assertEqual(output_a_val.shape, (2, 4)) + self.assertEqual(output_b_val.shape, (2, 4)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTrainingArgument(self): + class LayerWithTrainingArg(layers.Layer): + def build(self, input_shape): + self.w = tf.ones(shape=(3, 4)) + + def call(self, inputs, training): + return backend.dot(inputs, self.w) + + inputs = input_layer_lib.Input(shape=(3,)) + outputs = LayerWithTrainingArg()(inputs, training=False) + model = training_lib.Model(inputs, outputs) + self._testShapeInference(model, (2, 3), (2, 4)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoneInShape(self): + class Model(training_lib.Model): + def __init__(self): + super().__init__() + self.conv1 = layers.Conv2D(8, 3) + self.pool = layers.GlobalAveragePooling2D() + self.fc = layers.Dense(3) + + def call(self, x): + x = self.conv1(x) + x = self.pool(x) + x = self.fc(x) + return x + + model = Model() + model.build(tf.TensorShape((None, None, None, 1))) + self.assertTrue(model.built, "Model should be built") + self.assertTrue( + model.weights, + "Model should have its weights created as it has been built", + ) + sample_input = tf.ones((1, 10, 10, 1)) + output = model(sample_input) + self.assertEqual(output.shape, (1, 3)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoneInShapeWithCompoundModel(self): + class BasicBlock(training_lib.Model): + def __init__(self): + super().__init__() + self.conv1 = layers.Conv2D(8, 3) + self.pool = layers.GlobalAveragePooling2D() + self.dense = layers.Dense(3) + + def call(self, x): + x = self.conv1(x) + x = self.pool(x) + x = self.dense(x) + return x + + class CompoundModel(training_lib.Model): + def __init__(self): + super().__init__() + self.block = BasicBlock() + + def call(self, x): + x = self.block(x) + return x + + model = CompoundModel() + model.build(tf.TensorShape((None, None, None, 1))) + self.assertTrue(model.built, "Model should be built") + self.assertTrue( + model.weights, + "Model should have its weights created as it has been built", + ) + sample_input = tf.ones((1, 10, 10, 1)) + output = model(sample_input) + self.assertEqual(output.shape, (1, 3)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoneInShapeWithFunctionalAPI(self): + class BasicBlock(training_lib.Model): + # Inheriting from layers.Layer since we are calling this layer + # inside a model created using functional API. + + def __init__(self): + super().__init__() + self.conv1 = layers.Conv2D(8, 3) + + def call(self, x): + x = self.conv1(x) + return x + + input_layer = layers.Input(shape=(None, None, 1)) + x = BasicBlock()(input_layer) + x = layers.GlobalAveragePooling2D()(x) + output_layer = layers.Dense(3)(x) + + model = training_lib.Model(inputs=input_layer, outputs=output_layer) + + model.build(tf.TensorShape((None, None, None, 1))) + self.assertTrue(model.built, "Model should be built") + self.assertTrue( + model.weights, + "Model should have its weights created as it has been built", + ) + sample_input = tf.ones((1, 10, 10, 1)) + output = model(sample_input) + self.assertEqual(output.shape, (1, 3)) + + @test_combinations.generate(test_combinations.keras_mode_combinations()) + def test_sequential_as_downstream_of_masking_layer(self): + inputs = layers.Input(shape=(3, 4)) + x = layers.Masking(mask_value=0.0, input_shape=(3, 4))(inputs) + + s = sequential.Sequential() + s.add(layers.Dense(5, input_shape=(4,))) + + x = layers.TimeDistributed(s)(x) + model = training_lib.Model(inputs=inputs, outputs=x) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)).astype( + "float32" + ) + for i in range(4): + model_input[i, i:, :] = 0.0 + model.fit( + model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6 + ) + + if not tf.executing_eagerly(): + # Note: this doesn't work in eager due to DeferredTensor/ops + # compatibility issue. + mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)] + mask_outputs += [ + model.layers[2].compute_mask( + model.layers[2].input, mask_outputs[-1] + ) + ] + func = backend.function([model.input], mask_outputs) + mask_outputs_val = func([model_input]) + self.assertAllClose( + mask_outputs_val[0], np.any(model_input, axis=-1) + ) + self.assertAllClose( + mask_outputs_val[1], np.any(model_input, axis=-1) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_external_keras_serialization_compat_input_layers(self): + inputs = input_layer_lib.Input(shape=(10,)) + outputs = layers.Dense(1)(inputs) + model = training_lib.Model(inputs, outputs) + config = model.get_config() + # Checks that single inputs and outputs are still saved as 1-element + # lists. Saving as 1-element lists or not is equivalent in TF Keras, + # but only the 1-element list format is supported in TF.js and + # keras-team/Keras. + self.assertLen(config["input_layers"], 1) + self.assertLen(config["output_layers"], 1) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + @test_utils.run_v2_only + def test_save_load_with_single_elem_list_inputs_saved_model(self): + class MyLayer(layers.Layer): + def __init__(self): + super().__init__() + self._preserve_input_structure_in_config = True + + def call(self, inputs): + return inputs[0] + + inputs = input_layer_lib.Input(shape=(3,)) + layer = MyLayer() + outputs = layer([inputs]) + + model = training_lib.Model(inputs=inputs, outputs=outputs) + model.save("/tmp/km2") + + save.load_model("/tmp/km2") + + @test_utils.run_v2_only + def test_save_load_with_single_elem_list_inputs_keras_v3(self): + @object_registration.register_keras_serializable() + class MyLayer(layers.Layer): + def __init__(self): + super().__init__() + self._preserve_input_structure_in_config = True + + def call(self, inputs): + return inputs[0] + + inputs = input_layer_lib.Input(shape=(3,)) + layer = MyLayer() + outputs = layer([inputs]) + + model = training_lib.Model(inputs=inputs, outputs=outputs) + model.save("/tmp/model.keras") + + models.load_model("/tmp/model.keras") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_external_keras_serialization_compat_inbound_nodes(self): + # Check single Tensor input. + inputs = input_layer_lib.Input(shape=(10,), name="in") + outputs = layers.Dense(1)(inputs) + model = training_lib.Model(inputs, outputs) + config = model.get_config() + self.assertEqual( + config["layers"][1]["inbound_nodes"], [[["in", 0, 0, {}]]] + ) + + # Check multiple Tensor input. + inputs1 = input_layer_lib.Input(shape=(10,), name="in1") + inputs2 = input_layer_lib.Input(shape=(10,), name="in2") + outputs = layers.Add()([inputs1, inputs2]) + model = training_lib.Model([inputs1, inputs2], outputs) + config = model.get_config() + self.assertEqual( + config["layers"][2]["inbound_nodes"], + [[["in1", 0, 0, {}], ["in2", 0, 0, {}]]], + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_dict_inputs_tensors(self): + # Note that this test is running with v2 eager only, since the v1 + # will behave differently wrt to dict input for training. + inputs = { + "sentence2": input_layer_lib.Input( + shape=(), name="a", dtype=tf.string + ), + "sentence1": input_layer_lib.Input( + shape=(), name="b", dtype=tf.string + ), + } + strlen = layers.Lambda(tf.strings.length) + diff = layers.Subtract()( + [strlen(inputs["sentence1"]), strlen(inputs["sentence2"])] + ) + diff = tf.cast(diff, tf.float32) + model = training_lib.Model(inputs, diff) + + extra_keys = { + "sentence1": tf.constant(["brown fox", "lazy dog"]), + "sentence2": tf.constant(["owl", "cheeky cat"]), + "label": tf.constant([0, 1]), + } + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + model(extra_keys) + self.assertIn("ignored by the model", str(w[-1].message)) + + model.compile("sgd", "mse") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + model.fit(extra_keys, y=tf.constant([0, 1]), steps_per_epoch=1) + self.assertIn("ignored by the model", str(w[-1].message)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + model.evaluate(extra_keys, tf.constant([0, 1])) + self.assertIn("ignored by the model", str(w[-1].message)) + + # Make sure the model inputs are sorted with the dict keys. + self.assertEqual(model.inputs[0]._keras_history.layer.name, "b") + self.assertEqual(model.inputs[1]._keras_history.layer.name, "a") class GraphUtilsTest(tf.test.TestCase): - - def testGetReachableFromInputs(self): - - with tf.Graph().as_default(), self.cached_session(): - pl_1 = tf.compat.v1.placeholder(shape=None, dtype='float32') - pl_2 = tf.compat.v1.placeholder(shape=None, dtype='float32') - pl_3 = tf.compat.v1.placeholder(shape=None, dtype='float32') - x_1 = pl_1 + pl_2 - x_2 = pl_2 * 2 - x_3 = pl_3 + 1 - x_4 = x_1 + x_2 - x_5 = x_3 * pl_1 - - self.assertEqual( - tf_utils.get_reachable_from_inputs([pl_1]), - {pl_1, x_1, x_4, x_5, x_1.op, x_4.op, x_5.op}) - self.assertEqual( - tf_utils.get_reachable_from_inputs([pl_1, pl_2]), - {pl_1, pl_2, x_1, x_2, x_4, x_5, x_1.op, x_2.op, x_4.op, x_5.op}) - self.assertEqual( - tf_utils.get_reachable_from_inputs([pl_3]), - {pl_3, x_3, x_5, x_3.op, x_5.op}) - self.assertEqual( - tf_utils.get_reachable_from_inputs([x_3]), {x_3, x_5, x_5.op}) + def testGetReachableFromInputs(self): + + with tf.Graph().as_default(), self.cached_session(): + pl_1 = tf.compat.v1.placeholder(shape=None, dtype="float32") + pl_2 = tf.compat.v1.placeholder(shape=None, dtype="float32") + pl_3 = tf.compat.v1.placeholder(shape=None, dtype="float32") + x_1 = pl_1 + pl_2 + x_2 = pl_2 * 2 + x_3 = pl_3 + 1 + x_4 = x_1 + x_2 + x_5 = x_3 * pl_1 + + self.assertEqual( + tf_utils.get_reachable_from_inputs([pl_1]), + {pl_1, x_1, x_4, x_5, x_1.op, x_4.op, x_5.op}, + ) + self.assertEqual( + tf_utils.get_reachable_from_inputs([pl_1, pl_2]), + { + pl_1, + pl_2, + x_1, + x_2, + x_4, + x_5, + x_1.op, + x_2.op, + x_4.op, + x_5.op, + }, + ) + self.assertEqual( + tf_utils.get_reachable_from_inputs([pl_3]), + {pl_3, x_3, x_5, x_3.op, x_5.op}, + ) + self.assertEqual( + tf_utils.get_reachable_from_inputs([x_3]), {x_3, x_5, x_5.op} + ) class NestedNetworkTest(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_nested_inputs_network(self): - inputs = { - 'x1': input_layer_lib.Input(shape=(1,)), - 'x2': input_layer_lib.Input(shape=(1,)) - } - outputs = layers.Add()([inputs['x1'], inputs['x2']]) - network = functional.Functional(inputs, outputs) - - network = functional.Functional.from_config(network.get_config()) - - result_tensor = network({ - 'x1': tf.ones((1, 1), 'float32'), - 'x2': tf.ones((1, 1), 'float32') - }) - result = self.evaluate(result_tensor) - self.assertAllEqual(result, [[2.]]) - - # TODO(b/122726584): Investigate why concrete batch is flaky in some builds. - output_shape = network.compute_output_shape({ - 'x1': (None, 1), - 'x2': (None, 1) - }) - self.assertListEqual(output_shape.as_list(), [None, 1]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_nested_outputs_network(self): - inputs = input_layer_lib.Input(shape=(1,)) - outputs = { - 'x+x': layers.Add()([inputs, inputs]), - 'x*x': layers.Multiply()([inputs, inputs]) - } - - network = functional.Functional(inputs, outputs) - - network = functional.Functional.from_config(network.get_config()) - - result_tensor = network(tf.ones((1, 1), 'float32')) - result = self.evaluate(result_tensor) - self.assertAllEqual(result['x+x'], [[2.]]) - self.assertAllEqual(result['x*x'], [[1.]]) - - output_shape = network.compute_output_shape((None, 1)) - self.assertListEqual(output_shape['x+x'].as_list(), [None, 1]) - self.assertListEqual(output_shape['x*x'].as_list(), [None, 1]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_nested_network_inside_network(self): - inner_inputs = { - 'x1': input_layer_lib.Input(shape=(1,)), - 'x2': input_layer_lib.Input(shape=(1,)) - } - inner_outputs = { - 'x1+x2': layers.Add()([inner_inputs['x1'], inner_inputs['x2']]), - 'x1*x2': layers.Multiply()([inner_inputs['x1'], inner_inputs['x2']]) - } - inner_network = functional.Functional( - inner_inputs, inner_outputs) - - inputs = [ - input_layer_lib.Input(shape=(1,)), - input_layer_lib.Input(shape=(1,)) - ] - middle = inner_network({'x1': inputs[0], 'x2': inputs[1]}) - outputs = layers.Add()([middle['x1+x2'], middle['x1*x2']]) - network = functional.Functional(inputs, outputs) - - network = functional.Functional.from_config(network.get_config()) - - # Computes: `(x1+x2) + (x1*x2)` - result_tensor = network( - [tf.ones((1, 1), 'float32'), - tf.ones((1, 1), 'float32')]) - result = self.evaluate(result_tensor) - self.assertAllEqual(result, [[3.]]) - - output_shape = network.compute_output_shape([(None, 1), (None, 1)]) - self.assertListEqual(output_shape.as_list(), [None, 1]) - - @test_combinations.generate(test_combinations.combine(mode=['graph'])) - def test_updates_with_direct_call(self): - inputs = input_layer_lib.Input(shape=(10,)) - x = layers.BatchNormalization()(inputs) - x = layers.Dense(10)(x) - model = training_lib.Model(inputs, x) - - ph = backend.placeholder(shape=(10, 10)) - model(ph) - - self.assertLen(model.updates, 4) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_dict_mapping_input(self): - - class ReturnFirst(layers.Layer): - - def call(self, inputs): - b, _ = inputs - return b - - # Checks that inputs are put in same order as the - # Model was constructed with. - b = input_layer_lib.Input(shape=(10,), name='b') - a = input_layer_lib.Input(shape=(10,), name='a') - outputs = ReturnFirst()([b, a]) - - b_val = tf.ones((10, 10)) - a_val = tf.zeros((10, 10)) - - model = training_lib.Model([b, a], outputs) - res = model({'a': a_val, 'b': b_val}) - self.assertAllClose(self.evaluate(res), self.evaluate(b_val)) - - reversed_model = training_lib.Model([a, b], outputs) - res = reversed_model({'a': a_val, 'b': b_val}) - self.assertAllClose(self.evaluate(res), self.evaluate(b_val)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_dict_mapping_single_input(self): - b = input_layer_lib.Input(shape=(1,), name='b') - outputs = b * 2 - model = training_lib.Model(b, outputs) - - b_val = tf.ones((1, 1)) - extra_val = tf.ones((1, 10)) - - inputs = {'a': extra_val, 'b': b_val} - res = model(inputs) - - # Check that 'b' was used and 'a' was ignored. - self.assertEqual(res.shape.as_list(), [1, 1]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_nested_dict_mapping(self): - a = input_layer_lib.Input(shape=(1,), dtype='int32', name='a') - b = input_layer_lib.Input(shape=(1,), dtype='int32', name='b') - c = input_layer_lib.Input(shape=(1,), dtype='int32', name='c') - d = input_layer_lib.Input(shape=(1,), dtype='int32', name='d') - inputs = {'a': (a, b), 'c': (c, d)} - outputs = 1000 * a + 100 * b + 10 * c + d - model = training_lib.Model(inputs, outputs) - - a_val = tf.ones((1, 1), dtype='int32') - b_val = 2 * tf.ones((1, 1), dtype='int32') - c_val = 3 * tf.ones((1, 1), dtype='int32') - d_val = 4 * tf.ones((1, 1), dtype='int32') - - inputs_val = {'a': (a_val, b_val), 'c': (c_val, d_val)} - res = model(inputs_val) - - # Check that inputs were flattened in the correct order. - self.assertFalse(model._enable_dict_to_input_mapping) - self.assertEqual(self.evaluate(res), [1234]) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_nested_inputs_network(self): + inputs = { + "x1": input_layer_lib.Input(shape=(1,)), + "x2": input_layer_lib.Input(shape=(1,)), + } + outputs = layers.Add()([inputs["x1"], inputs["x2"]]) + network = functional.Functional(inputs, outputs) + + network = functional.Functional.from_config(network.get_config()) + + result_tensor = network( + {"x1": tf.ones((1, 1), "float32"), "x2": tf.ones((1, 1), "float32")} + ) + result = self.evaluate(result_tensor) + self.assertAllEqual(result, [[2.0]]) + + # TODO(b/122726584): Investigate why concrete batch is flaky in some + # builds. + output_shape = network.compute_output_shape( + {"x1": (None, 1), "x2": (None, 1)} + ) + self.assertListEqual(output_shape.as_list(), [None, 1]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_nested_outputs_network(self): + inputs = input_layer_lib.Input(shape=(1,)) + outputs = { + "x+x": layers.Add()([inputs, inputs]), + "x*x": layers.Multiply()([inputs, inputs]), + } + + network = functional.Functional(inputs, outputs) + + network = functional.Functional.from_config(network.get_config()) + + result_tensor = network(tf.ones((1, 1), "float32")) + result = self.evaluate(result_tensor) + self.assertAllEqual(result["x+x"], [[2.0]]) + self.assertAllEqual(result["x*x"], [[1.0]]) + + output_shape = network.compute_output_shape((None, 1)) + self.assertListEqual(output_shape["x+x"].as_list(), [None, 1]) + self.assertListEqual(output_shape["x*x"].as_list(), [None, 1]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_nested_network_inside_network(self): + inner_inputs = { + "x1": input_layer_lib.Input(shape=(1,)), + "x2": input_layer_lib.Input(shape=(1,)), + } + inner_outputs = { + "x1+x2": layers.Add()([inner_inputs["x1"], inner_inputs["x2"]]), + "x1*x2": layers.Multiply()( + [inner_inputs["x1"], inner_inputs["x2"]] + ), + } + inner_network = functional.Functional(inner_inputs, inner_outputs) + + inputs = [ + input_layer_lib.Input(shape=(1,)), + input_layer_lib.Input(shape=(1,)), + ] + middle = inner_network({"x1": inputs[0], "x2": inputs[1]}) + outputs = layers.Add()([middle["x1+x2"], middle["x1*x2"]]) + network = functional.Functional(inputs, outputs) + + network = functional.Functional.from_config(network.get_config()) + + # Computes: `(x1+x2) + (x1*x2)` + result_tensor = network( + [tf.ones((1, 1), "float32"), tf.ones((1, 1), "float32")] + ) + result = self.evaluate(result_tensor) + self.assertAllEqual(result, [[3.0]]) + + output_shape = network.compute_output_shape([(None, 1), (None, 1)]) + self.assertListEqual(output_shape.as_list(), [None, 1]) + + @test_combinations.generate(test_combinations.combine(mode=["graph"])) + def test_updates_with_direct_call(self): + inputs = input_layer_lib.Input(shape=(10,)) + x = layers.BatchNormalization()(inputs) + x = layers.Dense(10)(x) + model = training_lib.Model(inputs, x) + + ph = backend.placeholder(shape=(10, 10)) + model(ph) + + self.assertLen(model.updates, 4) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_dict_mapping_input(self): + class ReturnFirst(layers.Layer): + def call(self, inputs): + b, _ = inputs + return b + + # Checks that inputs are put in same order as the + # Model was constructed with. + b = input_layer_lib.Input(shape=(10,), name="b") + a = input_layer_lib.Input(shape=(10,), name="a") + outputs = ReturnFirst()([b, a]) + + b_val = tf.ones((10, 10)) + a_val = tf.zeros((10, 10)) + + model = training_lib.Model([b, a], outputs) + res = model({"a": a_val, "b": b_val}) + self.assertAllClose(self.evaluate(res), self.evaluate(b_val)) + + reversed_model = training_lib.Model([a, b], outputs) + res = reversed_model({"a": a_val, "b": b_val}) + self.assertAllClose(self.evaluate(res), self.evaluate(b_val)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_dict_mapping_single_input(self): + b = input_layer_lib.Input(shape=(1,), name="b") + outputs = b * 2 + model = training_lib.Model(b, outputs) + + b_val = tf.ones((1, 1)) + extra_val = tf.ones((1, 10)) + + inputs = {"a": extra_val, "b": b_val} + res = model(inputs) + + # Check that 'b' was used and 'a' was ignored. + self.assertEqual(res.shape.as_list(), [1, 1]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_nested_dict_mapping(self): + a = input_layer_lib.Input(shape=(1,), dtype="int32", name="a") + b = input_layer_lib.Input(shape=(1,), dtype="int32", name="b") + c = input_layer_lib.Input(shape=(1,), dtype="int32", name="c") + d = input_layer_lib.Input(shape=(1,), dtype="int32", name="d") + inputs = {"a": (a, b), "c": (c, d)} + outputs = 1000 * a + 100 * b + 10 * c + d + model = training_lib.Model(inputs, outputs) + + a_val = tf.ones((1, 1), dtype="int32") + b_val = 2 * tf.ones((1, 1), dtype="int32") + c_val = 3 * tf.ones((1, 1), dtype="int32") + d_val = 4 * tf.ones((1, 1), dtype="int32") + + inputs_val = {"a": (a_val, b_val), "c": (c_val, d_val)} + res = model(inputs_val) + + # Check that inputs were flattened in the correct order. + self.assertFalse(model._enable_dict_to_input_mapping) + self.assertEqual(self.evaluate(res), [1234]) @test_combinations.generate(test_combinations.keras_mode_combinations()) class AddLossTest(test_combinations.TestCase): - - def test_add_loss_outside_call_only_loss(self): - inputs = input_layer_lib.Input((10,)) - mid = layers.Dense(10)(inputs) - outputs = layers.Dense(1)(mid) - model = training_lib.Model(inputs, outputs) - model.add_loss(tf.reduce_mean(outputs)) - self.assertLen(model.losses, 1) - - initial_weights = model.get_weights() - - x = np.ones((10, 10)) - model.compile( - 'sgd', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, batch_size=2, epochs=1) - - model2 = model.from_config(model.get_config()) - model2.compile( - 'sgd', - run_eagerly=test_utils.should_run_eagerly()) - model2.set_weights(initial_weights) - model2.fit(x, batch_size=2, epochs=1) - - # The TFOpLayer and the AddLoss layer are serialized. - self.assertLen(model2.layers, 5) - self.assertAllClose(model.get_weights(), model2.get_weights()) - - def test_add_loss_outside_call_multiple_losses(self): - inputs = input_layer_lib.Input((10,)) - x1 = layers.Dense(10)(inputs) - x2 = layers.Dense(10)(x1) - outputs = layers.Dense(1)(x2) - model = training_lib.Model(inputs, outputs) - model.add_loss(tf.reduce_sum(x1 * x2)) - model.add_loss(tf.reduce_mean(outputs)) - self.assertLen(model.losses, 2) - - initial_weights = model.get_weights() - - x, y = np.ones((10, 10)), np.ones((10, 1)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=2, epochs=1) - - model2 = model.from_config(model.get_config()) - model2.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model2.set_weights(initial_weights) - model2.fit(x, y, batch_size=2, epochs=1) - - self.assertAllClose(model.get_weights(), model2.get_weights()) - - def test_add_loss_crossentropy_backtracking(self): - inputs = input_layer_lib.Input((2,)) - labels = input_layer_lib.Input((1,)) - outputs = layers.Dense(1, activation='sigmoid')(inputs) - model = functional.Functional([inputs, labels], outputs) - model.add_loss(losses.binary_crossentropy(labels, outputs)) - model.compile('adam') - x = np.random.random((2, 2)) - y = np.random.random((2, 1)) - model.fit([x, y]) - - inputs = input_layer_lib.Input((2,)) - labels = input_layer_lib.Input((2,)) - outputs = layers.Dense(2, activation='softmax')(inputs) - model = functional.Functional([inputs, labels], outputs) - model.add_loss(losses.categorical_crossentropy(labels, outputs)) - model.compile('adam') - x = np.random.random((2, 2)) - y = np.random.random((2, 2)) - model.fit([x, y]) - - inputs = input_layer_lib.Input((2,)) - labels = input_layer_lib.Input((1,), dtype='int32') - outputs = layers.Dense(2, activation='softmax')(inputs) - model = functional.Functional([inputs, labels], outputs) - model.add_loss(losses.sparse_categorical_crossentropy(labels, outputs)) - model.compile('adam') - x = np.random.random((2, 2)) - y = np.random.randint(0, 2, size=(2, 1)) - model.fit([x, y]) + def test_add_loss_outside_call_only_loss(self): + inputs = input_layer_lib.Input((10,)) + mid = layers.Dense(10)(inputs) + outputs = layers.Dense(1)(mid) + model = training_lib.Model(inputs, outputs) + model.add_loss(tf.reduce_mean(outputs)) + self.assertLen(model.losses, 1) + + initial_weights = model.get_weights() + + x = np.ones((10, 10)) + model.compile("sgd", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, batch_size=2, epochs=1) + + model2 = model.from_config(model.get_config()) + model2.compile("sgd", run_eagerly=test_utils.should_run_eagerly()) + model2.set_weights(initial_weights) + model2.fit(x, batch_size=2, epochs=1) + + # The TFOpLayer and the AddLoss layer are serialized. + self.assertLen(model2.layers, 5) + self.assertAllClose(model.get_weights(), model2.get_weights()) + + def test_add_loss_outside_call_multiple_losses(self): + inputs = input_layer_lib.Input((10,)) + x1 = layers.Dense(10)(inputs) + x2 = layers.Dense(10)(x1) + outputs = layers.Dense(1)(x2) + model = training_lib.Model(inputs, outputs) + model.add_loss(tf.reduce_sum(x1 * x2)) + model.add_loss(tf.reduce_mean(outputs)) + self.assertLen(model.losses, 2) + + initial_weights = model.get_weights() + + x, y = np.ones((10, 10)), np.ones((10, 1)) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, y, batch_size=2, epochs=1) + + model2 = model.from_config(model.get_config()) + model2.compile( + "sgd", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + model2.set_weights(initial_weights) + model2.fit(x, y, batch_size=2, epochs=1) + + self.assertAllClose(model.get_weights(), model2.get_weights()) + + def test_add_loss_crossentropy_backtracking(self): + inputs = input_layer_lib.Input((2,)) + labels = input_layer_lib.Input((1,)) + outputs = layers.Dense(1, activation="sigmoid")(inputs) + model = functional.Functional([inputs, labels], outputs) + model.add_loss(losses.binary_crossentropy(labels, outputs)) + model.compile("adam") + x = np.random.random((2, 2)) + y = np.random.random((2, 1)) + model.fit([x, y]) + + inputs = input_layer_lib.Input((2,)) + labels = input_layer_lib.Input((2,)) + outputs = layers.Dense(2, activation="softmax")(inputs) + model = functional.Functional([inputs, labels], outputs) + model.add_loss(losses.categorical_crossentropy(labels, outputs)) + model.compile("adam") + x = np.random.random((2, 2)) + y = np.random.random((2, 2)) + model.fit([x, y]) + + inputs = input_layer_lib.Input((2,)) + labels = input_layer_lib.Input((1,), dtype="int32") + outputs = layers.Dense(2, activation="softmax")(inputs) + model = functional.Functional([inputs, labels], outputs) + model.add_loss(losses.sparse_categorical_crossentropy(labels, outputs)) + model.compile("adam") + x = np.random.random((2, 2)) + y = np.random.randint(0, 2, size=(2, 1)) + model.fit([x, y]) @test_combinations.generate(test_combinations.keras_mode_combinations()) class WeightAccessTest(test_combinations.TestCase): + def test_functional_model(self): + inputs = input_layer_lib.Input((10,)) + x1 = layers.Dense(10)(inputs) + x2 = layers.Dense(10)(x1) + outputs = layers.Dense(1)(x2) + model = training_lib.Model(inputs, outputs) - def test_functional_model(self): - inputs = input_layer_lib.Input((10,)) - x1 = layers.Dense(10)(inputs) - x2 = layers.Dense(10)(x1) - outputs = layers.Dense(1)(x2) - model = training_lib.Model(inputs, outputs) - - self.assertEqual(len(model.weights), 6) - - def test_sequential_model_with_input_shape(self): - x1 = layers.Dense(10, input_shape=(10,)) - x2 = layers.Dense(10) - x3 = layers.Dense(1) - model = sequential.Sequential([x1, x2, x3]) + self.assertEqual(len(model.weights), 6) - self.assertEqual(len(model.weights), 6) + def test_sequential_model_with_input_shape(self): + x1 = layers.Dense(10, input_shape=(10,)) + x2 = layers.Dense(10) + x3 = layers.Dense(1) + model = sequential.Sequential([x1, x2, x3]) - def test_sequential_model_without_input_shape(self): - x1 = layers.Dense(10) - x2 = layers.Dense(10) - x3 = layers.Dense(1) - model = sequential.Sequential([x1, x2, x3]) + self.assertEqual(len(model.weights), 6) - with self.assertRaisesRegex( - ValueError, 'Weights for model .* have not yet been created'): - _ = model.weights + def test_sequential_model_without_input_shape(self): + x1 = layers.Dense(10) + x2 = layers.Dense(10) + x3 = layers.Dense(1) + model = sequential.Sequential([x1, x2, x3]) - def test_subclass_model_with_build_method(self): + with self.assertRaisesRegex( + ValueError, "Weights for model .* have not yet been created" + ): + _ = model.weights - class SubclassModel(models.Model): + def test_subclass_model_with_build_method(self): + class SubclassModel(models.Model): + def build(self, input_shape): + self.w = self.add_weight( + shape=input_shape[-1], initializer="ones" + ) - def build(self, input_shape): - self.w = self.add_weight(shape=input_shape[-1], initializer='ones') + def call(self, inputs): + return inputs * self.w - def call(self, inputs): - return inputs * self.w + model = SubclassModel() - model = SubclassModel() + with self.assertRaisesRegex( + ValueError, "Weights for model .* have not yet been created" + ): + _ = model.weights - with self.assertRaisesRegex( - ValueError, 'Weights for model .* have not yet been created'): - _ = model.weights + model(input_layer_lib.Input((10,))) + self.assertEqual(len(model.weights), 1) - model(input_layer_lib.Input((10,))) - self.assertEqual(len(model.weights), 1) + def test_subclass_model_without_build_method(self): + class SubclassModel(models.Model): + def __init__(self): + super().__init__() + self.w = self.add_weight(shape=(), initializer="ones") - def test_subclass_model_without_build_method(self): + def call(self, inputs): + return inputs * self.w - class SubclassModel(models.Model): + model = SubclassModel() + self.assertEqual(len(model.weights), 1) - def __init__(self): - super().__init__() - self.w = self.add_weight(shape=(), initializer='ones') - def call(self, inputs): - return inputs * self.w - - model = SubclassModel() - self.assertEqual(len(model.weights), 1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class DTypeTest(test_combinations.TestCase): + @test_utils.enable_v2_dtype_behavior + def test_graph_network_dtype(self): + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + network = functional.Functional(inputs, outputs) + self.assertEqual(network.dtype, "float32") - @test_utils.enable_v2_dtype_behavior - def test_graph_network_dtype(self): - inputs = input_layer_lib.Input((10,)) - outputs = layers.Dense(10)(inputs) - network = functional.Functional(inputs, outputs) - self.assertEqual(network.dtype, 'float32') - - @test_utils.enable_v2_dtype_behavior - def test_subclassed_network_dtype(self): + @test_utils.enable_v2_dtype_behavior + def test_subclassed_network_dtype(self): + class IdentityNetwork(training_lib.Model): + def call(self, inputs): + return inputs - class IdentityNetwork(training_lib.Model): + network = IdentityNetwork() + self.assertEqual(network.dtype, "float32") + self.assertEqual(network(tf.constant(1, "float64")).dtype, "float32") - def call(self, inputs): - return inputs + network = IdentityNetwork(dtype="float16") + self.assertEqual(network.dtype, "float16") + self.assertEqual(network(tf.constant(1, "float64")).dtype, "float16") - network = IdentityNetwork() - self.assertEqual(network.dtype, 'float32') - self.assertEqual(network(tf.constant(1, 'float64')).dtype, 'float32') - - network = IdentityNetwork(dtype='float16') - self.assertEqual(network.dtype, 'float16') - self.assertEqual(network(tf.constant(1, 'float64')).dtype, 'float16') - - network = IdentityNetwork(autocast=False) - self.assertEqual(network.dtype, 'float32') - self.assertEqual(network(tf.constant(1, 'float64')).dtype, 'float64') + network = IdentityNetwork(autocast=False) + self.assertEqual(network.dtype, "float32") + self.assertEqual(network(tf.constant(1, "float64")).dtype, "float64") class AttrTrackingLayer(base_layer.Layer): - """Count how many times `dynamic` and `stateful` are called. + """Count how many times `dynamic` and `stateful` are called. + + These counts are used to test that the attribute cache behaves as expected. + """ - These counts are used to test that the attribute cache behaves as expected. - """ - def __init__(self, *args, **kwargs): - self.stateful_count = 0 - self.dynamic_count = 0 - super().__init__(*args, **kwargs) + def __init__(self, *args, **kwargs): + self.stateful_count = 0 + self.dynamic_count = 0 + super().__init__(*args, **kwargs) - @base_layer.Layer.stateful.getter - def stateful(self): - self.stateful_count += 1 - return super().stateful + @base_layer.Layer.stateful.getter + def stateful(self): + self.stateful_count += 1 + return super().stateful - @property - def dynamic(self): - self.dynamic_count += 1 - return super().dynamic + @property + def dynamic(self): + self.dynamic_count += 1 + return super().dynamic -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CacheCorrectnessTest(test_combinations.TestCase): + def layer_and_network_test(self): + # Top level layer + network = functional.Functional() + + layer_0 = AttrTrackingLayer() + + sub_network = functional.Functional() + layer_1 = AttrTrackingLayer(dynamic=True) + layer_2 = AttrTrackingLayer() + sub_network.sub_layers = [layer_1, layer_2] + + network.sub_layer = layer_0 + + for _ in range(2): + self.assertEqual(network.dynamic, False) + self.assertEqual(network.stateful, False) + + # The second pass should be a cache hit. + self.assertEqual(layer_0.dynamic_count, 1) + self.assertEqual(layer_0.stateful_count, 1) + + # Mutations of the sub-layer should force recalculation of the network's + # stateful attribute. (mutations bubble up.) + layer_0.stateful = True + self.assertEqual(network.stateful, True) + self.assertEqual(layer_0.stateful_count, 2) + + layer_0.stateful = False + self.assertEqual(network.stateful, False) + self.assertEqual(layer_0.stateful_count, 3) + + # But changing stateful should not affect dynamic. + self.assertEqual(network.dynamic, False) + self.assertEqual(layer_0.dynamic_count, 1) + + network.sub_network = sub_network + + # Adding to the topology should invalidate the cache and reflect in the + # top level network. + self.assertEqual(network.dynamic, True) + self.assertEqual(layer_0.dynamic_count, 2) + self.assertEqual(layer_1.dynamic_count, 1) + + # Still dynamic, but we need to recompute. + sub_network.sub_layers.pop() + self.assertEqual(network.dynamic, True) + self.assertEqual(layer_0.dynamic_count, 3) + self.assertEqual(layer_1.dynamic_count, 2) + + # Now that we've removed the dynamic layer deep in the layer hierarchy, + # we need to make sure that that bubbles up through all the levels. + sub_network.sub_layers.pop() + self.assertEqual(network.dynamic, False) + self.assertEqual(layer_0.dynamic_count, 4) + self.assertEqual(layer_1.dynamic_count, 2) + + # Now check with a tracked dict. + sub_network.sub_layers = { + "layer_1": layer_1, + "layer_2": layer_2, + } + + self.assertEqual(network.dynamic, True) + self.assertEqual(layer_0.dynamic_count, 5) + self.assertEqual(layer_1.dynamic_count, 3) + + # In-place assignment should still invalidate the cache. + sub_network.sub_layers["layer_1"] = layer_1 + self.assertEqual(network.dynamic, True) + self.assertEqual(layer_0.dynamic_count, 6) + self.assertEqual(layer_1.dynamic_count, 4) + + sub_network.sub_layers["layer_1"] = None + for _ in range(2): + self.assertEqual(network.dynamic, False) + self.assertEqual(layer_0.dynamic_count, 7) + self.assertEqual(layer_1.dynamic_count, 4) + + layer_3 = AttrTrackingLayer() + layer_3.stateful = True + + sub_network.sub_layers = None + self.assertEqual(network.dynamic, False) + self.assertEqual(network.stateful, False) + + # Test duplicate layers. + sub_network.sub_layers = [layer_1, layer_1, layer_1, layer_3] + self.assertEqual(network.dynamic, True) + self.assertEqual(network.stateful, True) + + for _ in range(3): + sub_network.sub_layers.pop() + self.assertEqual(network.dynamic, True) + self.assertEqual(network.stateful, False) + + sub_network.sub_layers.pop() + self.assertEqual(network.dynamic, False) + self.assertEqual(network.stateful, False) + + def test_compute_output_shape_cache(self): + # See https://github.com/tensorflow/tensorflow/issues/32029. + x = input_layer_lib.Input(shape=(None, 32)) + dense = layers.Dense(2) + y = dense(x) + network = functional.Functional(x, y, name="dense_network") + + for i in range(999, 1024): + self.assertEqual( + network.compute_output_shape((1, i, 32)), (1, i, 2) + ) + + def test_2d_inputs_squeezed_to_1d(self): + input_1d = input_layer_lib.Input(shape=()) + outputs = input_1d * 2.0 + net = functional.Functional(input_1d, outputs) + + x = np.ones((10, 1)) + y = net(x) + self.assertEqual(y.shape.rank, 1) + + def test_1d_inputs_expanded_to_2d(self): + input_1d = input_layer_lib.Input(shape=(1,)) + outputs = input_1d * 2.0 + net = functional.Functional(input_1d, outputs) + + x = np.ones((10,)) + y = net(x) + self.assertEqual(y.shape.rank, 2) + + def test_training_passed_during_construction(self): + def _call(inputs, training): + if training is None: + return inputs * -1.0 + elif training: + return inputs + else: + return inputs * 0.0 + + class MyLayer(base_layer.Layer): + def call(self, inputs, training=True): + return _call(inputs, training) + + my_layer = MyLayer() + x = np.ones((1, 10)) + + # Hard-coded `true` value passed during construction is respected. + inputs = input_layer_lib.Input(10) + outputs = my_layer(inputs, training=True) + network = functional.Functional(inputs, outputs) + self.assertAllEqual(network(x, training=True), _call(x, True)) + self.assertAllEqual(network(x, training=False), _call(x, True)) + self.assertAllEqual(network(x), _call(x, True)) - def layer_and_network_test(self): - # Top level layer - network = functional.Functional() - - layer_0 = AttrTrackingLayer() - - sub_network = functional.Functional() - layer_1 = AttrTrackingLayer(dynamic=True) - layer_2 = AttrTrackingLayer() - sub_network.sub_layers = [layer_1, layer_2] - - network.sub_layer = layer_0 - - for _ in range(2): - self.assertEqual(network.dynamic, False) - self.assertEqual(network.stateful, False) - - # The second pass should be a cache hit. - self.assertEqual(layer_0.dynamic_count, 1) - self.assertEqual(layer_0.stateful_count, 1) - - # Mutations of the sub-layer should force recalculation of the network's - # stateful attribute. (mutations bubble up.) - layer_0.stateful = True - self.assertEqual(network.stateful, True) - self.assertEqual(layer_0.stateful_count, 2) - - layer_0.stateful = False - self.assertEqual(network.stateful, False) - self.assertEqual(layer_0.stateful_count, 3) - - # But changing stateful should not affect dynamic. - self.assertEqual(network.dynamic, False) - self.assertEqual(layer_0.dynamic_count, 1) - - network.sub_network = sub_network - - # Adding to the topology should invalidate the cache and reflect in the top - # level network. - self.assertEqual(network.dynamic, True) - self.assertEqual(layer_0.dynamic_count, 2) - self.assertEqual(layer_1.dynamic_count, 1) - - # Still dynamic, but we need to recompute. - sub_network.sub_layers.pop() - self.assertEqual(network.dynamic, True) - self.assertEqual(layer_0.dynamic_count, 3) - self.assertEqual(layer_1.dynamic_count, 2) - - # Now that we've removed the dynamic layer deep in the layer hierarchy, we - # need to make sure that that bubbles up through all the levels. - sub_network.sub_layers.pop() - self.assertEqual(network.dynamic, False) - self.assertEqual(layer_0.dynamic_count, 4) - self.assertEqual(layer_1.dynamic_count, 2) - - # Now check with a tracked dict. - sub_network.sub_layers = { - "layer_1": layer_1, - "layer_2": layer_2, - } - - self.assertEqual(network.dynamic, True) - self.assertEqual(layer_0.dynamic_count, 5) - self.assertEqual(layer_1.dynamic_count, 3) - - # In-place assignment should still invalidate the cache. - sub_network.sub_layers["layer_1"] = layer_1 - self.assertEqual(network.dynamic, True) - self.assertEqual(layer_0.dynamic_count, 6) - self.assertEqual(layer_1.dynamic_count, 4) - - sub_network.sub_layers["layer_1"] = None - for _ in range(2): - self.assertEqual(network.dynamic, False) - self.assertEqual(layer_0.dynamic_count, 7) - self.assertEqual(layer_1.dynamic_count, 4) - - layer_3 = AttrTrackingLayer() - layer_3.stateful = True - - sub_network.sub_layers = None - self.assertEqual(network.dynamic, False) - self.assertEqual(network.stateful, False) - - # Test duplicate layers. - sub_network.sub_layers = [layer_1, layer_1, layer_1, layer_3] - self.assertEqual(network.dynamic, True) - self.assertEqual(network.stateful, True) - - for _ in range(3): - sub_network.sub_layers.pop() - self.assertEqual(network.dynamic, True) - self.assertEqual(network.stateful, False) - - sub_network.sub_layers.pop() - self.assertEqual(network.dynamic, False) - self.assertEqual(network.stateful, False) - - def test_compute_output_shape_cache(self): - # See https://github.com/tensorflow/tensorflow/issues/32029. - x = input_layer_lib.Input(shape=(None, 32)) - dense = layers.Dense(2) - y = dense(x) - network = functional.Functional(x, y, name='dense_network') - - for i in range(999, 1024): - self.assertEqual(network.compute_output_shape((1, i, 32)), (1, i, 2)) - - def test_2d_inputs_squeezed_to_1d(self): - input_1d = input_layer_lib.Input(shape=()) - outputs = input_1d * 2. - net = functional.Functional(input_1d, outputs) - - x = np.ones((10, 1)) - y = net(x) - self.assertEqual(y.shape.rank, 1) - - def test_1d_inputs_expanded_to_2d(self): - input_1d = input_layer_lib.Input(shape=(1,)) - outputs = input_1d * 2. - net = functional.Functional(input_1d, outputs) - - x = np.ones((10,)) - y = net(x) - self.assertEqual(y.shape.rank, 2) - - def test_training_passed_during_construction(self): - - def _call(inputs, training): - if training is None: - return inputs * -1.0 - elif training: - return inputs - else: - return inputs * 0.0 - - class MyLayer(base_layer.Layer): - - def call(self, inputs, training=True): - return _call(inputs, training) - - my_layer = MyLayer() - x = np.ones((1, 10)) - - # Hard-coded `true` value passed during construction is respected. - inputs = input_layer_lib.Input(10) - outputs = my_layer(inputs, training=True) - network = functional.Functional(inputs, outputs) - self.assertAllEqual(network(x, training=True), _call(x, True)) - self.assertAllEqual(network(x, training=False), _call(x, True)) - self.assertAllEqual(network(x), _call(x, True)) - - # Hard-coded `false` value passed during construction is respected. - inputs = input_layer_lib.Input(10) - outputs = my_layer(inputs, training=False) - network = functional.Functional(inputs, outputs) - self.assertAllEqual(network(x, training=True), _call(x, False)) - self.assertAllEqual(network(x, training=False), _call(x, False)) - self.assertAllEqual(network(x), _call(x, False)) - - if tf.executing_eagerly(): - # In v2, construction still works when no `training` is specified - # When no value passed during construction, it uses the local default. - inputs = input_layer_lib.Input(10) - outputs = my_layer(inputs) - network = functional.Functional(inputs, outputs) - self.assertAllEqual(network(x, training=True), _call(x, True)) - self.assertAllEqual(network(x, training=False), _call(x, False)) - self.assertAllEqual(network(x), _call(x, True)) # Use local default - - # `None` value passed positionally during construction is ignored at runtime - inputs = input_layer_lib.Input(10) - outputs = my_layer(inputs, None) - network = functional.Functional(inputs, outputs) - self.assertAllEqual(network(x, training=True), _call(x, True)) - self.assertAllEqual(network(x, training=False), _call(x, False)) - if tf.executing_eagerly(): - self.assertAllEqual(network(x), _call(x, True)) # Use local default - else: - # in v1 training would have defaulted to using the `None` inside the layer - # if training is not passed at runtime - self.assertAllEqual(network(x), _call(x, None)) - - # `None` value passed as kwarg during construction is ignored at runtime. - inputs = input_layer_lib.Input(10) - outputs = my_layer(inputs, training=None) - network = functional.Functional(inputs, outputs) - self.assertAllEqual(network(x, training=True), _call(x, True)) - self.assertAllEqual(network(x, training=False), _call(x, False)) - if tf.executing_eagerly(): - self.assertAllEqual(network(x), _call(x, True)) # Use local default - else: - # in v1 training would have defaulted to using the `None` inside the layer - # if training is not passed at runtime - self.assertAllEqual(network(x), _call(x, None)) + # Hard-coded `false` value passed during construction is respected. + inputs = input_layer_lib.Input(10) + outputs = my_layer(inputs, training=False) + network = functional.Functional(inputs, outputs) + self.assertAllEqual(network(x, training=True), _call(x, False)) + self.assertAllEqual(network(x, training=False), _call(x, False)) + self.assertAllEqual(network(x), _call(x, False)) + + if tf.executing_eagerly(): + # In v2, construction still works when no `training` is specified + # When no value passed during construction, it uses the local + # default. + inputs = input_layer_lib.Input(10) + outputs = my_layer(inputs) + network = functional.Functional(inputs, outputs) + self.assertAllEqual(network(x, training=True), _call(x, True)) + self.assertAllEqual(network(x, training=False), _call(x, False)) + self.assertAllEqual(network(x), _call(x, True)) # Use local default + + # `None` value passed positionally during construction is ignored at + # runtime + inputs = input_layer_lib.Input(10) + outputs = my_layer(inputs, None) + network = functional.Functional(inputs, outputs) + self.assertAllEqual(network(x, training=True), _call(x, True)) + self.assertAllEqual(network(x, training=False), _call(x, False)) + if tf.executing_eagerly(): + self.assertAllEqual(network(x), _call(x, True)) # Use local default + else: + # in v1 training would have defaulted to using the `None` inside the + # layer if training is not passed at runtime + self.assertAllEqual(network(x), _call(x, None)) + + # `None` value passed as kwarg during construction is ignored at + # runtime. + inputs = input_layer_lib.Input(10) + outputs = my_layer(inputs, training=None) + network = functional.Functional(inputs, outputs) + self.assertAllEqual(network(x, training=True), _call(x, True)) + self.assertAllEqual(network(x, training=False), _call(x, False)) + if tf.executing_eagerly(): + self.assertAllEqual(network(x), _call(x, True)) # Use local default + else: + # in v1 training would have defaulted to using the `None` inside the + # layer if training is not passed at runtime + self.assertAllEqual(network(x), _call(x, None)) class InputsOutputsErrorTest(test_combinations.TestCase): - - @test_utils.enable_v2_dtype_behavior - def test_input_error(self): - inputs = input_layer_lib.Input((10,)) - outputs = layers.Dense(10)(inputs) - with self.assertRaisesRegex( - TypeError, "('Keyword argument not understood:', 'input')"): - models.Model(input=inputs, outputs=outputs) - - @test_utils.enable_v2_dtype_behavior - def test_output_error(self): - inputs = input_layer_lib.Input((10,)) - outputs = layers.Dense(10)(inputs) - with self.assertRaisesRegex( - TypeError, "('Keyword argument not understood:', 'output')"): - models.Model(inputs=inputs, output=outputs) - - def test_input_spec(self): - if not tf.executing_eagerly(): - return - inputs = input_layer_lib.Input((10,)) - outputs = layers.Dense(10)(inputs) - model = models.Model(inputs, outputs) - with self.assertRaisesRegex( - ValueError, r'.*expected shape=.*'): - model(np.zeros((3, 11))) - - def test_input_spec_list_of_inputs(self): - if not tf.executing_eagerly(): - return - input_1 = input_layer_lib.Input((10,), name='1') - input_2 = input_layer_lib.Input((5,), name='2') - x = layers.Concatenate()([input_1, input_2]) - outputs = layers.Dense(10)(x) - model = models.Model([input_1, input_2], outputs) - with self.assertRaisesRegex( - ValueError, r'.*expects 2 input.*'): - model(np.zeros((3, 10))) - with self.assertRaisesRegex( - ValueError, r'.*expects 2 input.*'): - model([np.zeros((3, 10)), np.zeros((3, 5)), np.zeros((3, 10))]) - with self.assertRaisesRegex( - ValueError, r'.*expected shape=.*'): - model([np.zeros((3, 10)), np.zeros((3, 6))]) - - # Test passing data via dict keyed by input name - with self.assertRaisesRegex( - ValueError, r'Missing data for input.*'): - model({'1': np.zeros((3, 10))}) - with self.assertRaisesRegex( - ValueError, r'.*expected shape=.*'): - model({'1': np.zeros((3, 10)), '2': np.zeros((3, 6))}) - - def test_input_spec_dict(self): - if not tf.executing_eagerly(): - return - input_1 = input_layer_lib.Input((10,)) - input_2 = input_layer_lib.Input((5,)) - x = layers.Concatenate()([input_1, input_2]) - outputs = layers.Dense(10)(x) - model = models.Model({'1': input_1, '2': input_2}, outputs) - with self.assertRaisesRegex( - ValueError, r'Missing data for input.*'): - model({'1': np.zeros((3, 10))}) - with self.assertRaisesRegex( - ValueError, r'.*expected shape=.*'): - model({'1': np.zeros((3, 10)), '2': np.zeros((3, 6))}) + @test_utils.enable_v2_dtype_behavior + def test_input_error(self): + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + with self.assertRaisesRegex( + TypeError, "('Keyword argument not understood:', 'input')" + ): + models.Model(input=inputs, outputs=outputs) + + @test_utils.enable_v2_dtype_behavior + def test_output_error(self): + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + with self.assertRaisesRegex( + TypeError, "('Keyword argument not understood:', 'output')" + ): + models.Model(inputs=inputs, output=outputs) + + def test_input_spec(self): + if not tf.executing_eagerly(): + return + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + model = models.Model(inputs, outputs) + with self.assertRaisesRegex(ValueError, r".*expected shape=.*"): + model(np.zeros((3, 11))) + + def test_input_spec_list_of_inputs(self): + if not tf.executing_eagerly(): + return + input_1 = input_layer_lib.Input((10,), name="1") + input_2 = input_layer_lib.Input((5,), name="2") + x = layers.Concatenate()([input_1, input_2]) + outputs = layers.Dense(10)(x) + model = models.Model([input_1, input_2], outputs) + with self.assertRaisesRegex(ValueError, r".*expects 2 input.*"): + model(np.zeros((3, 10))) + with self.assertRaisesRegex(ValueError, r".*expects 2 input.*"): + model([np.zeros((3, 10)), np.zeros((3, 5)), np.zeros((3, 10))]) + with self.assertRaisesRegex(ValueError, r".*expected shape=.*"): + model([np.zeros((3, 10)), np.zeros((3, 6))]) + + # Test passing data via dict keyed by input name + with self.assertRaisesRegex(ValueError, r"Missing data for input.*"): + model({"1": np.zeros((3, 10))}) + with self.assertRaisesRegex(ValueError, r".*expected shape=.*"): + model({"1": np.zeros((3, 10)), "2": np.zeros((3, 6))}) + + def test_input_spec_dict(self): + if not tf.executing_eagerly(): + return + input_1 = input_layer_lib.Input((10,)) + input_2 = input_layer_lib.Input((5,)) + x = layers.Concatenate()([input_1, input_2]) + outputs = layers.Dense(10)(x) + model = models.Model({"1": input_1, "2": input_2}, outputs) + with self.assertRaisesRegex(ValueError, r"Missing data for input.*"): + model({"1": np.zeros((3, 10))}) + with self.assertRaisesRegex(ValueError, r".*expected shape=.*"): + model({"1": np.zeros((3, 10)), "2": np.zeros((3, 6))}) class FunctionalSubclassModel(training_lib.Model): - - def __init__(self, *args, **kwargs): - self.foo = {'foo': 'bar'} # Make sure users can assign dict attributes - my_input = input_layer_lib.Input(shape=(16,)) - dense = layers.Dense(32, activation='relu') - output = dense(my_input) - outputs = {'output': output} - super().__init__(inputs=[my_input], outputs=outputs, *args, **kwargs) + def __init__(self, *args, **kwargs): + self.foo = {"foo": "bar"} # Make sure users can assign dict attributes + my_input = input_layer_lib.Input(shape=(16,)) + dense = layers.Dense(32, activation="relu") + output = dense(my_input) + outputs = {"output": output} + super().__init__(inputs=[my_input], outputs=outputs, *args, **kwargs) class MixinClass: + def __init__(self, foo, **kwargs): + self._foo = foo + super().__init__(**kwargs) - def __init__(self, foo, **kwargs): - self._foo = foo - super().__init__(**kwargs) - - def get_foo(self): - return self._foo + def get_foo(self): + return self._foo class SubclassedModel(training_lib.Model): + def __init__(self, bar, **kwargs): + self._bar = bar + super().__init__(**kwargs) - def __init__(self, bar, **kwargs): - self._bar = bar - super().__init__(**kwargs) - - def get_bar(self): - return self._bar + def get_bar(self): + return self._bar class MultipleInheritanceModelTest(test_combinations.TestCase): - - def testFunctionalSubclass(self): - m = FunctionalSubclassModel() - # Some smoke test for the weights and output shape of the model - self.assertLen(m.weights, 2) - self.assertEqual(m.outputs[0].shape.as_list(), [None, 32]) - - def testFunctionalSubclassPreMixin(self): - class MixedFunctionalSubclassModel(MixinClass, FunctionalSubclassModel): - pass - - m = MixedFunctionalSubclassModel(foo='123') - self.assertTrue(m._is_graph_network) - self.assertLen(m.weights, 2) - self.assertEqual(m.outputs[0].shape.as_list(), [None, 32]) - self.assertEqual(m.get_foo(), '123') - - def testFunctionalSubclassPostMixin(self): - # Make sure the the mixin class is also init correct when the order changed. - - class MixedFunctionalSubclassModel(FunctionalSubclassModel, MixinClass): - pass - - m = MixedFunctionalSubclassModel(foo='123') - self.assertTrue(m._is_graph_network) - self.assertLen(m.weights, 2) - self.assertEqual(m.outputs[0].shape.as_list(), [None, 32]) - self.assertEqual(m.get_foo(), '123') - - def testSubclassModelPreMixin(self): - class MixedSubclassModel(MixinClass, SubclassedModel): - pass - - m = MixedSubclassModel(foo='123', bar='456') - self.assertFalse(m._is_graph_network) - self.assertEqual(m.get_foo(), '123') - self.assertEqual(m.get_bar(), '456') - - -if __name__ == '__main__': - tf.test.main() + def testFunctionalSubclass(self): + m = FunctionalSubclassModel() + # Some smoke test for the weights and output shape of the model + self.assertLen(m.weights, 2) + self.assertEqual(m.outputs[0].shape.as_list(), [None, 32]) + + def testFunctionalSubclassPreMixin(self): + class MixedFunctionalSubclassModel(MixinClass, FunctionalSubclassModel): + pass + + m = MixedFunctionalSubclassModel(foo="123") + self.assertTrue(m._is_graph_network) + self.assertLen(m.weights, 2) + self.assertEqual(m.outputs[0].shape.as_list(), [None, 32]) + self.assertEqual(m.get_foo(), "123") + + def testFunctionalSubclassPostMixin(self): + # Make sure the the mixin class is also init correct when the order + # changed. + + class MixedFunctionalSubclassModel(FunctionalSubclassModel, MixinClass): + pass + + m = MixedFunctionalSubclassModel(foo="123") + self.assertTrue(m._is_graph_network) + self.assertLen(m.weights, 2) + self.assertEqual(m.outputs[0].shape.as_list(), [None, 32]) + self.assertEqual(m.get_foo(), "123") + + def testSubclassModelPreMixin(self): + class MixedSubclassModel(MixinClass, SubclassedModel): + pass + + m = MixedSubclassModel(foo="123", bar="456") + self.assertFalse(m._is_graph_network) + self.assertEqual(m.get_foo(), "123") + self.assertEqual(m.get_bar(), "456") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/functional_utils.py b/keras/engine/functional_utils.py index bd4e2e77eafa..bfc4acc4104a 100644 --- a/keras/engine/functional_utils.py +++ b/keras/engine/functional_utils.py @@ -14,235 +14,247 @@ # ============================================================================== """Utilities for keras functional model.""" +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import input_layer as input_layer_module from keras.engine import keras_tensor from keras.engine import node as node_module -import tensorflow.compat.v2 as tf - _KERAS_TENSOR_TYPE_CHECK_ERROR_MSG = ( - 'Found unexpected instance while processing input tensors for keras ' - 'functional model. Expecting KerasTensor which is from tf.keras.Input() ' - 'or output from keras layer call(). Got: {}') + "Found unexpected instance while processing input tensors for keras " + "functional model. Expecting KerasTensor which is from tf.keras.Input() " + "or output from keras layer call(). Got: {}" +) def is_input_keras_tensor(tensor): - """Check if tensor is directly generated from `tf.keras.Input`. + """Check if tensor is directly generated from `tf.keras.Input`. - This check is useful when constructing the functional model, since we will - need to clone Nodes and KerasTensors if the model is building from non input - tensor. + This check is useful when constructing the functional model, since we will + need to clone Nodes and KerasTensors if the model is building from non input + tensor. - Args: - tensor: A `KerasTensor` as inputs to the functional model. + Args: + tensor: A `KerasTensor` as inputs to the functional model. - Returns: - bool. Whether the tensor is directly generated from `tf.keras.Input`. + Returns: + bool. Whether the tensor is directly generated from `tf.keras.Input`. - Raises: - ValueError: if the tensor is not a KerasTensor instance. - """ - if not node_module.is_keras_tensor(tensor): - raise ValueError(_KERAS_TENSOR_TYPE_CHECK_ERROR_MSG.format(tensor)) - return tensor.node.is_input + Raises: + ValueError: if the tensor is not a KerasTensor instance. + """ + if not node_module.is_keras_tensor(tensor): + raise ValueError(_KERAS_TENSOR_TYPE_CHECK_ERROR_MSG.format(tensor)) + return tensor.node.is_input def find_nodes_by_inputs_and_outputs(inputs, outputs): - """Fetch all Nodes in the graph defined by "inputs" and "outputs". - - This method is used to find and then clone Nodes when creating a new - sub-model from an existing functional model. - - Args: - inputs: A nested structure of KerasTensor to use as model inputs. - outputs: A nested structure of KerasTensor to use as model outputs. - - Returns: - A list of Nodes that are connected to the inputs and outputs. - - Raises: - ValueError: when inputs and outputs are disconnected or in case of - unexpected objects in the inputs/outputs. - """ - # We walk the graph bottom up, starting from output nodes, and keep tracing - # the upstream node, until we find all the inputs nodes. We don't use top - # down search here since we don't know whether a certain node is in the graph - # between inputs and outputs, e.g. a functional graph could have multiple - # outputs, and the user could choose a subset of them to build the model. - # The bottom up approach will ensure all the nodes we visit are actually - # in use. If we reach the top and didn't find the nodes in the `inputs`, - # that's an error, since the user didn't specify the correct inputs. - start_keras_tensors = tf.nest.flatten(outputs) - end_keras_tensors = tf.nest.flatten(inputs) - - for t in start_keras_tensors + end_keras_tensors: - if not node_module.is_keras_tensor(t): - raise ValueError(_KERAS_TENSOR_TYPE_CHECK_ERROR_MSG.format(t)) - end_ids = set([id(kt) for kt in end_keras_tensors]) - # Track all the end tensors we found so far, if we didn't reach all the - # user-specified keras inputs after we finish the search, then that's an - # error since the inputs are disconnected from the outputs. - end_ids_found = set() - - nodes_to_visit = [] - nodes_in_graph = [] - node_id_visited = set() - for t in start_keras_tensors: - nodes_to_visit.append(t.node) - - while nodes_to_visit: - node = nodes_to_visit.pop(0) - if id(node) in node_id_visited: - continue - node_id_visited.add(id(node)) - nodes_in_graph.append(node) - # Any input keras_tensor that produce the current node. - for kt in node.keras_inputs: - if id(kt) in end_ids: - # We found the inputs of the model, stop tracing upstream nodes - end_ids_found.add(id(kt)) - continue - - inbound_node = kt.node - # In case this is the tf.keras.Input node, we have reached the end of the - # tracing of upstream nodes. Any further tracing will just be an - # infinite loop. we should raise an error here since we didn't find the - # input in the user-specified inputs. - if inbound_node.is_input: - raise ValueError('Found input tensor cannot be reached given provided ' - 'output tensors. Please make sure the tensor {} is ' - 'included in the model inputs when building ' - 'functional model.'.format(kt)) - nodes_to_visit.append(inbound_node) - - # Do a final check and make sure we have reached all the user-specified inputs - if end_ids != end_ids_found: - unvisited_inputs = [kt for kt in end_keras_tensors - if id(kt) not in end_ids_found] - raise ValueError('Found unvisited input tensors that are disconnected from ' - 'the outputs: {}'.format(unvisited_inputs)) - return nodes_in_graph + """Fetch all Nodes in the graph defined by "inputs" and "outputs". + + This method is used to find and then clone Nodes when creating a new + sub-model from an existing functional model. + + Args: + inputs: A nested structure of KerasTensor to use as model inputs. + outputs: A nested structure of KerasTensor to use as model outputs. + + Returns: + A list of Nodes that are connected to the inputs and outputs. + + Raises: + ValueError: when inputs and outputs are disconnected or in case of + unexpected objects in the inputs/outputs. + """ + # We walk the graph bottom up, starting from output nodes, and keep tracing + # the upstream node, until we find all the inputs nodes. We don't use top + # down search here since we don't know whether a certain node is in the + # graph between inputs and outputs, e.g. a functional graph could have + # multiple outputs, and the user could choose a subset of them to build the + # model. The bottom up approach will ensure all the nodes we visit are + # actually in use. If we reach the top and didn't find the nodes in the + # `inputs`, that's an error, since the user didn't specify the correct + # inputs. + start_keras_tensors = tf.nest.flatten(outputs) + end_keras_tensors = tf.nest.flatten(inputs) + + for t in start_keras_tensors + end_keras_tensors: + if not node_module.is_keras_tensor(t): + raise ValueError(_KERAS_TENSOR_TYPE_CHECK_ERROR_MSG.format(t)) + end_ids = set([id(kt) for kt in end_keras_tensors]) + # Track all the end tensors we found so far, if we didn't reach all the + # user-specified keras inputs after we finish the search, then that's an + # error since the inputs are disconnected from the outputs. + end_ids_found = set() + + nodes_to_visit = [] + nodes_in_graph = [] + node_id_visited = set() + for t in start_keras_tensors: + nodes_to_visit.append(t.node) + + while nodes_to_visit: + node = nodes_to_visit.pop(0) + if id(node) in node_id_visited: + continue + node_id_visited.add(id(node)) + nodes_in_graph.append(node) + # Any input keras_tensor that produce the current node. + for kt in node.keras_inputs: + if id(kt) in end_ids: + # We found the inputs of the model, stop tracing upstream nodes + end_ids_found.add(id(kt)) + continue + + inbound_node = kt.node + # In case this is the tf.keras.Input node, we have reached the end + # of the tracing of upstream nodes. Any further tracing will just be + # an infinite loop. we should raise an error here since we didn't + # find the input in the user-specified inputs. + if inbound_node.is_input: + raise ValueError( + "Found input tensor cannot be reached given provided " + "output tensors. Please make sure the tensor {} is " + "included in the model inputs when building " + "functional model.".format(kt) + ) + nodes_to_visit.append(inbound_node) + + # Do a final check and make sure we have reached all the user-specified + # inputs + if end_ids != end_ids_found: + unvisited_inputs = [ + kt for kt in end_keras_tensors if id(kt) not in end_ids_found + ] + raise ValueError( + "Found unvisited input tensors that are disconnected from " + "the outputs: {}".format(unvisited_inputs) + ) + return nodes_in_graph def clone_graph_nodes(inputs, outputs): - """Clone the `Node` between the inputs and output tensors. - - This function is used to create a new functional model from any intermediate - keras tensors. The clone of the nodes mimic the behavior of reconstructing the - functional graph network by re-executing all the __call__ methods. The cloned - nodes will be appended to the layers. - - Note that a new tf.keras.Inputs will be created for any items in the `inputs` - - Args: - inputs: A nested structure of keras_tensors. - outputs: A nested structure of keras_tensors. - - Returns: - A pair of inputs and outputs, with cloned keras_tensors. They can be used to - create a new functional model. - """ - nodes_to_clone = find_nodes_by_inputs_and_outputs(inputs, outputs) - cloned_inputs = [] - cloned_outputs = [] - # We not only need to create copies of Nodes (mimic the calls), also need to - # clone keras_tensors to avoid the override of _keras_history attached on the - # keras_tensor. The following dict is used to track any keras tensor we cloned - # The key is the string ID of the original keras tensor, and value is the - # cloned keras_tensor instance. - kt_id_mapping = {} - - for kt_input in tf.nest.flatten(inputs): - if kt_input.node.is_input: - # For any existing keras_tensor from tf.keras.Input, we leave them as is. - cloned_inputs.append(kt_input) - kt_id_mapping[id(kt_input)] = kt_input - else: - # We need to create a new tf.keras.Input for any intermediate keras_tensor - cpy = _clone_keras_tensor(kt_input) - cloned_input = input_layer_module.Input(tensor=cpy) - cloned_inputs.append(cloned_input) - kt_id_mapping[id(kt_input)] = cloned_input - cloned_inputs = tf.nest.pack_sequence_as(inputs, cloned_inputs) - - for kt_output in tf.nest.flatten(outputs): - cpy = _clone_keras_tensor(kt_output) - # We reuse the _keras_history here, which contains the old information. It - # is used in the Node constructor to check if the tensor "is_keras_tensor()" - # The history will be override by the Node constructor anyway for the - # corresponding layer output anyway. - cpy._keras_history = kt_output._keras_history # pylint: disable=protected-access - cloned_outputs.append(cpy) - kt_id_mapping[id(kt_output)] = cpy - cloned_outputs = tf.nest.pack_sequence_as(outputs, cloned_outputs) - - for node in nodes_to_clone: - # Clone any keras_tensors to avoid override of _keras_history - # Or reuse an existing keras_tensor if it has already been cloned. - output_copy = clone_keras_tensors(node.output_tensors, kt_id_mapping) - call_args_copy = clone_keras_tensors(node.call_args, kt_id_mapping) - call_kwargs_copy = clone_keras_tensors(node.call_kwargs, kt_id_mapping) - # Creating new nodes based on the existing node information. - # Node wires itself to inbound and outbound layers. - # The Node constructor actually updates this layer's self._inbound_nodes, - # sets _keras_history on the outputs, and adds itself to the - # `_outbound_nodes` of the layers that produced the inputs to this - # layer call. - node_module.Node(node.layer, - call_args=call_args_copy, - call_kwargs=call_kwargs_copy, - outputs=output_copy) - return cloned_inputs, cloned_outputs + """Clone the `Node` between the inputs and output tensors. + + This function is used to create a new functional model from any intermediate + keras tensors. The clone of the nodes mimic the behavior of reconstructing + the functional graph network by re-executing all the __call__ methods. The + cloned nodes will be appended to the layers. + + Note that a new tf.keras.Inputs will be created for any items in the + `inputs` + + Args: + inputs: A nested structure of keras_tensors. + outputs: A nested structure of keras_tensors. + + Returns: + A pair of inputs and outputs, with cloned keras_tensors. They can be used + to create a new functional model. + """ + nodes_to_clone = find_nodes_by_inputs_and_outputs(inputs, outputs) + cloned_inputs = [] + cloned_outputs = [] + # We not only need to create copies of Nodes (mimic the calls), also need to + # clone keras_tensors to avoid the override of _keras_history attached on + # the keras_tensor. The following dict is used to track any keras tensor we + # cloned The key is the string ID of the original keras tensor, and value is + # the cloned keras_tensor instance. + kt_id_mapping = {} + + for kt_input in tf.nest.flatten(inputs): + if kt_input.node.is_input: + # For any existing keras_tensor from tf.keras.Input, we leave them + # as is. + cloned_inputs.append(kt_input) + kt_id_mapping[id(kt_input)] = kt_input + else: + # We need to create a new tf.keras.Input for any intermediate + # keras_tensor + cpy = _clone_keras_tensor(kt_input) + cloned_input = input_layer_module.Input(tensor=cpy) + cloned_inputs.append(cloned_input) + kt_id_mapping[id(kt_input)] = cloned_input + cloned_inputs = tf.nest.pack_sequence_as(inputs, cloned_inputs) + + for kt_output in tf.nest.flatten(outputs): + cpy = _clone_keras_tensor(kt_output) + # We reuse the _keras_history here, which contains the old information. + # It is used in the Node constructor to check if the tensor + # "is_keras_tensor()" The history will be override by the Node + # constructor anyway for the corresponding layer output anyway. + cpy._keras_history = kt_output._keras_history + cloned_outputs.append(cpy) + kt_id_mapping[id(kt_output)] = cpy + cloned_outputs = tf.nest.pack_sequence_as(outputs, cloned_outputs) + + for node in nodes_to_clone: + # Clone any keras_tensors to avoid override of _keras_history + # Or reuse an existing keras_tensor if it has already been cloned. + output_copy = clone_keras_tensors(node.output_tensors, kt_id_mapping) + call_args_copy = clone_keras_tensors(node.call_args, kt_id_mapping) + call_kwargs_copy = clone_keras_tensors(node.call_kwargs, kt_id_mapping) + # Creating new nodes based on the existing node information. Node wires + # itself to inbound and outbound layers. The Node constructor actually + # updates this layer's self._inbound_nodes, sets _keras_history on the + # outputs, and adds itself to the `_outbound_nodes` of the layers that + # produced the inputs to this layer call. + node_module.Node( + node.layer, + call_args=call_args_copy, + call_kwargs=call_kwargs_copy, + outputs=output_copy, + ) + return cloned_inputs, cloned_outputs def clone_keras_tensors(args, keras_tensor_mapping): - """Clone the keras tensors from the inputs. - - For any KerasTensor instance in the `args`, a new copy of KerasTensor will - be created if it has not been cloned yet (by checking the - `keras_tensor_mapping`). For any other types, the instance will be unchanged. - This function is useful for cloning the Nodes since KerasTensor can't be - reused across the models. - - Args: - args: A nested structure of objects, which could contain KerasTensor. - keras_tensor_mapping: A dict contains the ID of original KerasTensor, and - the cloned KerasTensor instance. The dict will be updated with newly - copied KerasTensor instances within this method. - Returns: - Same structure as inputs, with KerasTensor cloned. - """ - result = [] - for obj in tf.nest.flatten(args): - if node_module.is_keras_tensor(obj): - if id(obj) in keras_tensor_mapping: - cpy = keras_tensor_mapping[id(obj)] - else: - # Create copy of keras_tensor if we haven't done it before - cpy = _clone_keras_tensor(obj) - cpy._keras_history = obj._keras_history # pylint: disable=protected-access - keras_tensor_mapping[id(obj)] = cpy - result.append(cpy) - else: - result.append(obj) - return tf.nest.pack_sequence_as(args, result) + """Clone the keras tensors from the inputs. + + For any KerasTensor instance in the `args`, a new copy of KerasTensor will + be created if it has not been cloned yet (by checking the + `keras_tensor_mapping`). For any other types, the instance will be + unchanged. This function is useful for cloning the Nodes since KerasTensor + can't be reused across the models. + + Args: + args: A nested structure of objects, which could contain KerasTensor. + keras_tensor_mapping: A dict contains the ID of original KerasTensor, and + the cloned KerasTensor instance. The dict will be updated with newly + copied KerasTensor instances within this method. + Returns: + Same structure as inputs, with KerasTensor cloned. + """ + result = [] + for obj in tf.nest.flatten(args): + if node_module.is_keras_tensor(obj): + if id(obj) in keras_tensor_mapping: + cpy = keras_tensor_mapping[id(obj)] + else: + # Create copy of keras_tensor if we haven't done it before + cpy = _clone_keras_tensor(obj) + cpy._keras_history = obj._keras_history + keras_tensor_mapping[id(obj)] = cpy + result.append(cpy) + else: + result.append(obj) + return tf.nest.pack_sequence_as(args, result) def _clone_keras_tensor(kt): - """Create an identical keras_tensor based on the input. - - We use keras_tensor_to_placeholder and keras_tensor_from_tensor to make sure - inferred shape are not lost during the copy. - - Args: - kt: the input KerasTensor. - - Returns: - An identical copy of the input KerasTensor. - """ - # Create a scratch graph since we don't intend to use the placeholders. - with backend._scratch_graph() as scratch_graph: # pylint: disable=protected-access - with scratch_graph.as_default(): - placeholder = keras_tensor.keras_tensor_to_placeholder(kt) - return keras_tensor.keras_tensor_from_tensor(placeholder) + """Create an identical keras_tensor based on the input. + + We use keras_tensor_to_placeholder and keras_tensor_from_tensor to make sure + inferred shape are not lost during the copy. + + Args: + kt: the input KerasTensor. + + Returns: + An identical copy of the input KerasTensor. + """ + # Create a scratch graph since we don't intend to use the placeholders. + with backend._scratch_graph() as scratch_graph: + with scratch_graph.as_default(): + placeholder = keras_tensor.keras_tensor_to_placeholder(kt) + return keras_tensor.keras_tensor_from_tensor(placeholder) diff --git a/keras/engine/functional_utils_test.py b/keras/engine/functional_utils_test.py index aeb6dc163d9f..3d5be79a157c 100644 --- a/keras/engine/functional_utils_test.py +++ b/keras/engine/functional_utils_test.py @@ -11,200 +11,257 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#,============================================================================ +# ,============================================================================ """Tests for functional_utils.""" import collections import os +import numpy as np +import tensorflow.compat.v2 as tf + from keras import layers from keras import models from keras.engine import functional_utils from keras.engine import input_layer as input_layer_lib from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf - @test_combinations.run_all_keras_modes(always_skip_v1=True) class FunctionalModelSlideTest(test_combinations.TestCase): - - def test_find_nodes_by_inputs_and_outputs(self): - inputs = input_layer_lib.Input((10,)) - unconnected_inputs = input_layer_lib.Input((10,)) - x = layers.Dense(8)(inputs) - y = layers.Dense(6)(x) - output = layers.Dense(4)(y) - - nodes_in_graph = functional_utils.find_nodes_by_inputs_and_outputs( - x, output) - self.assertLen(nodes_in_graph, 2) - expected_nodes = [output.node, y.node] - self.assertCountEqual(nodes_in_graph, expected_nodes) - - # Make sure we raise error if we specify invalid input/output pair - with self.assertRaisesRegex( - ValueError, 'Found input tensor cannot be reached'): - functional_utils.find_nodes_by_inputs_and_outputs(output, x) - - with self.assertRaisesRegex( - ValueError, 'Found input tensor cannot be reached'): - functional_utils.find_nodes_by_inputs_and_outputs(unconnected_inputs, - output) - - with self.assertRaisesRegex( - ValueError, 'Found unvisited input tensors that are disconnected'): - functional_utils.find_nodes_by_inputs_and_outputs( - [inputs, unconnected_inputs], output) - - def test_find_nodes_by_inputs_and_outputs_with_complicated_network(self): - input1 = input_layer_lib.Input((10,)) - input2 = input_layer_lib.Input((10,)) - input3 = input_layer_lib.Input((10,)) - unconnected_input = input_layer_lib.Input((10,)) - - dense1 = layers.Dense(4, name='dense1') - dense2 = layers.Dense(4, name='dense2') - # dense1 are shared between input1 and input2 - a = dense1(input1) - b = dense1(input2) - - c = layers.Add()([a, b]) - d = dense2(input3) - e = layers.Add()([c, d]) - # There are 5 nodes (invoke of __call__) in the graph. - - nodes = functional_utils.find_nodes_by_inputs_and_outputs(input1, a) - self.assertCountEqual(nodes, [a.node]) - - nodes = functional_utils.find_nodes_by_inputs_and_outputs(input2, b) - self.assertCountEqual(nodes, [b.node]) - - nodes = functional_utils.find_nodes_by_inputs_and_outputs([input2, input1], - c) - # This should contains 2 dense call and 1 add - self.assertCountEqual(nodes, [a.node, b.node, c.node]) - - # Missing input3 - with self.assertRaisesRegex( - ValueError, 'Found input tensor cannot be reached'): - functional_utils.find_nodes_by_inputs_and_outputs([input1, input2], e) - - nodes = functional_utils.find_nodes_by_inputs_and_outputs( - [input1, input2, input3], e) - self.assertCountEqual(nodes, [a.node, b.node, c.node, d.node, e.node]) - - # Make sure we can create from intermediate tensors - nodes = functional_utils.find_nodes_by_inputs_and_outputs([a, b, input3], e) - self.assertCountEqual(nodes, [c.node, d.node, e.node]) - # Also make sure we can add intermediate outputs - nodes = functional_utils.find_nodes_by_inputs_and_outputs([a, b, input3], - [d, e]) - self.assertCountEqual(nodes, [c.node, d.node, e.node]) - - # input1 and 2 are not needed for computing d - with self.assertRaisesRegex( - ValueError, 'Found unvisited input tensors that are disconnected'): - functional_utils.find_nodes_by_inputs_and_outputs( - [input1, input2, input3], d) - - with self.assertRaisesRegex( - ValueError, 'Found unvisited input tensors that are disconnected'): - functional_utils.find_nodes_by_inputs_and_outputs( - [a, b, input3, unconnected_input], [e, d, c]) - - def test_build_model_from_intermediate_tensor(self): - batch_size = 4 - inputs = input_layer_lib.Input(shape=(8,)) - layer1 = layers.Dense(32) - layer2 = layers.Dense(16) - x = layer1(inputs) - y = layer2(x) - model = models.Model(x, y) - # Make sure a new node is attached to layer2, which mimic y = layer2(x) - self.assertLen(layer2.inbound_nodes, 2) - - self.assertIsInstance(model, models.Model) - # The model only contains 1 dense layer and 1 input layer. - self.assertLen(model.layers, 2) - self.assertIs(model.layers[1], layer2) - - model.compile('rmsprop', 'mse') - model.fit(np.random.randn(batch_size, 32), np.random.randn(batch_size, 16)) - # Test for model saving - output_path = os.path.join(self.get_temp_dir(), 'tf_keras_saved_model') - model.save(output_path, save_format='tf') - loaded_model = models.load_model(output_path) - self.assertEqual(model.summary(), loaded_model.summary()) - - # Also make sure the original inputs and y can still be used to build model - new_model = models.Model(inputs, y) - # Make sure no new node is attached to layer2 - self.assertLen(layer2.inbound_nodes, 2) - - self.assertLen(new_model.layers, 3) - self.assertIs(new_model.layers[1], layer1) - self.assertIs(new_model.layers[2], layer2) - - def test_build_model_from_intermediate_tensor_with_complicated_model(self): - # The topology is like below: - # input1 -> dense1 -> a - # + -> c - + --> d - + --> output - # input2 -> dense1 -> b -------^ ^ - # input3 -> dense2 -> e -----------------| - batch_size = 8 - input1 = input_layer_lib.Input((2,)) - input2 = input_layer_lib.Input((2,)) - input3 = input_layer_lib.Input((8,)) - - dense1 = layers.Dense(8, name='dense1') - dense2 = layers.Dense(8, name='dense2') - - # dense1 are shared between input1 and input2 - a = dense1(input1) - b = dense1(input2) - - c = layers.Add()([a, b]) - # d has a residual connection from b. - d = layers.Add()([b, c]) - e = dense2(input3) - output = layers.Add()([d, e]) - - # We skip the input2 here and use b instead. - model = models.Model([input1, b, input3], output) - # Make sure we have 8 layers, 3 for inputs, 2 for dense and 3 for Add. - # Note that dense1 is still in use by input1. - self.assertLen(model.layers, 8) - # Since the layers are not ordered, let's check class of the layers to make - # sure it match the expectation. - class_count = collections.Counter([l.__class__ for l in model.layers]) - self.assertEqual(class_count[input_layer_lib.InputLayer], 3) - self.assertEqual(class_count[layers.Dense], 2) - self.assertEqual(class_count[layers.Add], 3) - - model.compile('rmsprop', 'mse') - model.fit([np.random.randn(batch_size, 2), - np.random.randn(batch_size, 8), # The shape of b is (batch, 8) - np.random.randn(batch_size, 8)], - np.random.randn(batch_size, 8)) - output_path = os.path.join(self.get_temp_dir(), 'tf_keras_saved_model') - model.save(output_path, save_format='tf') - loaded_model = models.load_model(output_path) - self.assertEqual(model.summary(), loaded_model.summary()) - - model2 = models.Model([a, b], d) - # 2 input layers and 2 Add layer. - self.assertLen(model2.layers, 4) - class_count = collections.Counter([l.__class__ for l in model2.layers]) - self.assertEqual(class_count[input_layer_lib.InputLayer], 2) - self.assertEqual(class_count[layers.Add], 2) - - model2.compile('rmsprop', 'mse') - model2.fit([np.random.randn(batch_size, 8), - np.random.randn(batch_size, 8)], - np.random.randn(batch_size, 8)) - - -if __name__ == '__main__': - tf.test.main() + def test_find_nodes_by_inputs_and_outputs(self): + inputs = input_layer_lib.Input((10,)) + unconnected_inputs = input_layer_lib.Input((10,)) + x = layers.Dense(8)(inputs) + y = layers.Dense(6)(x) + output = layers.Dense(4)(y) + + nodes_in_graph = functional_utils.find_nodes_by_inputs_and_outputs( + x, output + ) + self.assertLen(nodes_in_graph, 2) + expected_nodes = [output.node, y.node] + self.assertCountEqual(nodes_in_graph, expected_nodes) + + # Make sure we raise error if we specify invalid input/output pair + with self.assertRaisesRegex( + ValueError, "Found input tensor cannot be reached" + ): + functional_utils.find_nodes_by_inputs_and_outputs(output, x) + + with self.assertRaisesRegex( + ValueError, "Found input tensor cannot be reached" + ): + functional_utils.find_nodes_by_inputs_and_outputs( + unconnected_inputs, output + ) + + with self.assertRaisesRegex( + ValueError, "Found unvisited input tensors that are disconnected" + ): + functional_utils.find_nodes_by_inputs_and_outputs( + [inputs, unconnected_inputs], output + ) + + def test_find_nodes_by_inputs_and_outputs_with_complicated_network(self): + input1 = input_layer_lib.Input((10,)) + input2 = input_layer_lib.Input((10,)) + input3 = input_layer_lib.Input((10,)) + unconnected_input = input_layer_lib.Input((10,)) + + dense1 = layers.Dense(4, name="dense1") + dense2 = layers.Dense(4, name="dense2") + # dense1 are shared between input1 and input2 + a = dense1(input1) + b = dense1(input2) + + c = layers.Add()([a, b]) + d = dense2(input3) + e = layers.Add()([c, d]) + # There are 5 nodes (invoke of __call__) in the graph. + + nodes = functional_utils.find_nodes_by_inputs_and_outputs(input1, a) + self.assertCountEqual(nodes, [a.node]) + + nodes = functional_utils.find_nodes_by_inputs_and_outputs(input2, b) + self.assertCountEqual(nodes, [b.node]) + + nodes = functional_utils.find_nodes_by_inputs_and_outputs( + [input2, input1], c + ) + # This should contains 2 dense call and 1 add + self.assertCountEqual(nodes, [a.node, b.node, c.node]) + + # Missing input3 + with self.assertRaisesRegex( + ValueError, "Found input tensor cannot be reached" + ): + functional_utils.find_nodes_by_inputs_and_outputs( + [input1, input2], e + ) + + nodes = functional_utils.find_nodes_by_inputs_and_outputs( + [input1, input2, input3], e + ) + self.assertCountEqual(nodes, [a.node, b.node, c.node, d.node, e.node]) + + # Make sure we can create from intermediate tensors + nodes = functional_utils.find_nodes_by_inputs_and_outputs( + [a, b, input3], e + ) + self.assertCountEqual(nodes, [c.node, d.node, e.node]) + # Also make sure we can add intermediate outputs + nodes = functional_utils.find_nodes_by_inputs_and_outputs( + [a, b, input3], [d, e] + ) + self.assertCountEqual(nodes, [c.node, d.node, e.node]) + + # input1 and 2 are not needed for computing d + with self.assertRaisesRegex( + ValueError, "Found unvisited input tensors that are disconnected" + ): + functional_utils.find_nodes_by_inputs_and_outputs( + [input1, input2, input3], d + ) + + with self.assertRaisesRegex( + ValueError, "Found unvisited input tensors that are disconnected" + ): + functional_utils.find_nodes_by_inputs_and_outputs( + [a, b, input3, unconnected_input], [e, d, c] + ) + + def test_build_model_from_intermediate_tensor(self): + batch_size = 4 + inputs = input_layer_lib.Input(shape=(8,)) + layer1 = layers.Dense(32) + layer2 = layers.Dense(16) + x = layer1(inputs) + y = layer2(x) + model = models.Model(x, y) + # Make sure a new node is attached to layer2, which mimic y = layer2(x) + self.assertLen(layer2.inbound_nodes, 2) + + self.assertIsInstance(model, models.Model) + # The model only contains 1 dense layer and 1 input layer. + self.assertLen(model.layers, 2) + self.assertIs(model.layers[1], layer2) + + model.compile("rmsprop", "mse") + model.fit( + np.random.randn(batch_size, 32), np.random.randn(batch_size, 16) + ) + + # Also make sure the original inputs and y can still be used to build + # model + new_model = models.Model(inputs, y) + # Make sure no new node is attached to layer2 + self.assertLen(layer2.inbound_nodes, 2) + + self.assertLen(new_model.layers, 3) + self.assertIs(new_model.layers[1], layer1) + self.assertIs(new_model.layers[2], layer2) + + # Test for model saving + with self.subTest("savedmodel"): + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model" + ) + model.save(output_path, save_format="tf") + loaded_model = models.load_model(output_path) + self.assertEqual(model.summary(), loaded_model.summary()) + + with self.subTest("keras_v3"): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_v3_model.keras" + ) + model.save(output_path, save_format="keras_v3") + loaded_model = models.load_model(output_path) + self.assertEqual(model.summary(), loaded_model.summary()) + + def test_build_model_from_intermediate_tensor_with_complicated_model(self): + # The topology is like below: + # input1 -> dense1 -> a + # + -> c - + --> d - + --> output + # input2 -> dense1 -> b -------^ ^ + # input3 -> dense2 -> e -----------------| + batch_size = 8 + input1 = input_layer_lib.Input((2,)) + input2 = input_layer_lib.Input((2,)) + input3 = input_layer_lib.Input((8,)) + + dense1 = layers.Dense(8, name="dense1") + dense2 = layers.Dense(8, name="dense2") + + # dense1 are shared between input1 and input2 + a = dense1(input1) + b = dense1(input2) + + c = layers.Add()([a, b]) + # d has a residual connection from b. + d = layers.Add()([b, c]) + e = dense2(input3) + output = layers.Add()([d, e]) + + # We skip the input2 here and use b instead. + model = models.Model([input1, b, input3], output) + # Make sure we have 8 layers, 3 for inputs, 2 for dense and 3 for Add. + # Note that dense1 is still in use by input1. + self.assertLen(model.layers, 8) + # Since the layers are not ordered, let's check class of the layers to + # make sure it match the expectation. + class_count = collections.Counter([l.__class__ for l in model.layers]) + self.assertEqual(class_count[input_layer_lib.InputLayer], 3) + self.assertEqual(class_count[layers.Dense], 2) + self.assertEqual(class_count[layers.Add], 3) + + model.compile("rmsprop", "mse") + model.fit( + [ + np.random.randn(batch_size, 2), + np.random.randn(batch_size, 8), # The shape of b is (batch, 8) + np.random.randn(batch_size, 8), + ], + np.random.randn(batch_size, 8), + ) + + model2 = models.Model([a, b], d) + # 2 input layers and 2 Add layer. + self.assertLen(model2.layers, 4) + class_count = collections.Counter([l.__class__ for l in model2.layers]) + self.assertEqual(class_count[input_layer_lib.InputLayer], 2) + self.assertEqual(class_count[layers.Add], 2) + + model2.compile("rmsprop", "mse") + model2.fit( + [np.random.randn(batch_size, 8), np.random.randn(batch_size, 8)], + np.random.randn(batch_size, 8), + ) + + with self.subTest("savedmodel"): + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model" + ) + model.save(output_path, save_format="tf") + loaded_model = models.load_model(output_path) + self.assertEqual(model.summary(), loaded_model.summary()) + + with self.subTest("keras_v3"): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_v3_model.keras" + ) + model.save(output_path, save_format="keras_v3") + loaded_model = models.load_model(output_path) + self.assertEqual(model.summary(), loaded_model.summary()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/input_layer.py b/keras/engine/input_layer.py index fd0e196d443d..b4f57818fb3d 100644 --- a/keras/engine/input_layer.py +++ b/keras/engine/input_layer.py @@ -12,252 +12,289 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Input layer code (`Input` and `InputLayer`).""" import tensorflow.compat.v2 as tf + from keras import backend from keras.distribute import distributed_training_utils from keras.engine import base_layer from keras.engine import keras_tensor from keras.engine import node as node_module -from keras.saving.saved_model import layer_serialization +from keras.saving import serialization_lib +from keras.saving.legacy.saved_model import layer_serialization from keras.utils import tf_utils from keras.utils import traceback_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export def _assert_other_arg_none(arg_name, arg): - if arg is not None: - raise ValueError('When `type_spec` is not None, all other args ' - 'except `name` must be None, ' - 'but %s is not None.' % arg_name) + if arg is not None: + raise ValueError( + "When `type_spec` is not None, all other args " + "except `name` must be None, " + "but %s is not None." % arg_name + ) -@keras_export('keras.layers.InputLayer') +@keras_export("keras.layers.InputLayer") class InputLayer(base_layer.Layer): - """Layer to be used as an entry point into a Network (a graph of layers). - - It can either wrap an existing tensor (pass an `input_tensor` argument) - or create a placeholder tensor (pass arguments `input_shape`, and - optionally, `dtype`). - - It is generally recommend to use the Keras Functional model via `Input`, - (which creates an `InputLayer`) without directly using `InputLayer`. - - When using `InputLayer` with the Keras Sequential model, it can be skipped by - moving the `input_shape` parameter to the first layer after the `InputLayer`. - - This class can create placeholders for `tf.Tensors`, `tf.SparseTensors`, and - `tf.RaggedTensors` by choosing `sparse=True` or `ragged=True`. Note that - `sparse` and `ragged` can't be configured to `True` at the same time. - Usage: - - ```python - # With explicit InputLayer. - model = tf.keras.Sequential([ - tf.keras.layers.InputLayer(input_shape=(4,)), - tf.keras.layers.Dense(8)]) - model.compile(tf.optimizers.RMSprop(0.001), loss='mse') - model.fit(np.zeros((10, 4)), - np.ones((10, 8))) - - # Without InputLayer and let the first layer to have the input_shape. - # Keras will add a input for the model behind the scene. - model = tf.keras.Sequential([ - tf.keras.layers.Dense(8, input_shape=(4,))]) - model.compile(tf.optimizers.RMSprop(0.001), loss='mse') - model.fit(np.zeros((10, 4)), - np.ones((10, 8))) - ``` - - Args: - input_shape: Shape tuple (not including the batch axis), or `TensorShape` - instance (not including the batch axis). - batch_size: Optional input batch size (integer or `None`). - dtype: Optional datatype of the input. When not provided, the Keras - default `float` type will be used. - input_tensor: Optional tensor to use as layer input. If set, the layer - will use the `tf.TypeSpec` of this tensor rather - than creating a new placeholder tensor. - sparse: Boolean, whether the placeholder created is meant to be sparse. - Default to `False`. - ragged: Boolean, whether the placeholder created is meant to be ragged. - In this case, values of `None` in the `shape` argument represent - ragged dimensions. For more information about `tf.RaggedTensor`, see - [this guide](https://www.tensorflow.org/guide/ragged_tensor). - Default to `False`. - type_spec: A `tf.TypeSpec` object to create Input from. This `tf.TypeSpec` - represents the entire batch. When provided, all other args except - name must be `None`. - name: Optional name of the layer (string). - """ - - @traceback_utils.filter_traceback - def __init__(self, - input_shape=None, - batch_size=None, - dtype=None, - input_tensor=None, - sparse=None, - name=None, - ragged=None, - type_spec=None, - **kwargs): - self._init_input_shape = input_shape - self._init_batch_size = batch_size - self._init_dtype = dtype - self._init_sparse = sparse - self._init_ragged = ragged - self._init_type_spec = type_spec - - strategy = tf.distribute.get_strategy() - if strategy and batch_size is not None and \ - distributed_training_utils.global_batch_size_supported(strategy): - if batch_size % strategy.num_replicas_in_sync != 0: - raise ValueError('The `batch_size` argument ({}) must be divisible by ' - 'the number of replicas ({})'.format( - batch_size, strategy.num_replicas_in_sync)) - batch_size = batch_size // strategy.num_replicas_in_sync - - if 'batch_input_shape' in kwargs: - batch_input_shape = kwargs.pop('batch_input_shape') - if input_shape and batch_input_shape: - raise ValueError('Only provide the input_shape OR ' - 'batch_input_shape argument to ' - 'InputLayer, not both at the same time.') - # Set the input shape and batch size from the batch_input_shape. - # Note that batch_input_shape can be None (unknown rank) or [] (scalar), - # in which case the batch size must be None. - if batch_input_shape: - batch_size = batch_input_shape[0] - input_shape = batch_input_shape[1:] - if kwargs: - raise ValueError(f'Unrecognized keyword arguments: {list(kwargs.keys())}') - - if sparse and ragged: - raise ValueError( - 'Cannot set both sparse and ragged to True in a Keras input.') - - if not name: - prefix = 'input' - name = prefix + '_' + str(backend.get_uid(prefix)) - - if not dtype: - if input_tensor is None: - dtype = backend.floatx() - else: - dtype = backend.dtype(input_tensor) - elif input_tensor is not None and input_tensor.dtype != dtype: - raise ValueError( - '`input_tensor.dtype` differs from `dtype`. Received: ' - f'input_tensor.dtype={input_tensor.dtype} ' - f'but expected dtype={dtype}') - super().__init__(dtype=dtype, name=name) - self.built = True - self.sparse = True if sparse else False - self.ragged = True if ragged else False - self.batch_size = batch_size - self.supports_masking = True - - if isinstance(input_shape, tf.TensorShape): - input_shape = tuple(input_shape.as_list()) - elif isinstance(input_shape, int): - input_shape = (input_shape,) - - if type_spec is not None: - args_that_must_be_none = [ - ('(input_)shape', self._init_input_shape), - ('batch_size', self._init_batch_size), - ('dtype', self._init_dtype), - ('input_tensor', input_tensor), - ('sparse', self._init_sparse), - ('ragged', self._init_ragged), - ] - for arg_name, arg in args_that_must_be_none: - _assert_other_arg_none(arg_name, arg) - if not tf.compat.v1.executing_eagerly_outside_functions(): - raise ValueError('Creating Keras inputs from a type_spec is only ' - 'supported when eager execution is enabled.') - input_tensor = keras_tensor.keras_tensor_from_type_spec(type_spec) - if isinstance(input_tensor, keras_tensor.SparseKerasTensor): - self.sparse = True - if isinstance(input_tensor, keras_tensor.RaggedKerasTensor): - self.ragged = True - self.is_placeholder = True - try: - self._batch_input_shape = tuple(input_tensor.shape.as_list()) - except ValueError: - # If the shape cannot be represented as a tuple (e.g. unknown rank) - self._batch_input_shape = None - elif input_tensor is None: - if input_shape is not None: - batch_input_shape = (batch_size,) + tuple(input_shape) - else: - batch_input_shape = None - graph = backend.get_graph() - with graph.as_default(): - input_tensor = backend.placeholder( - shape=batch_input_shape, - dtype=dtype, - name=self.name, - sparse=sparse, - ragged=ragged) - - self.is_placeholder = True - self._batch_input_shape = batch_input_shape - else: - if tf.compat.v1.executing_eagerly_outside_functions(): - if not isinstance(input_tensor, keras_tensor.KerasTensor): - input_tensor = keras_tensor.keras_tensor_from_tensor(input_tensor) - else: - if not tf_utils.is_symbolic_tensor(input_tensor): - raise ValueError('You should not pass an EagerTensor to `Input`. ' - 'For example, instead of creating an ' - '`InputLayer`, you should instantiate your model ' - 'and directly call it on your input.') - self.is_placeholder = False - try: - self._batch_input_shape = tuple(input_tensor.shape.as_list()) - except ValueError: - # If the shape cannot be represented as a tuple (e.g. unknown rank) - self._batch_input_shape = None - # Create an input node. - input_tensor._keras_mask = None - node_module.Node(layer=self, outputs=input_tensor) - - # Store type spec - if isinstance(input_tensor, keras_tensor.KerasTensor) or ( - tf_utils.is_extension_type(input_tensor)): - self._type_spec = input_tensor._type_spec # pylint: disable=protected-access - else: - self._type_spec = tf.TensorSpec( - shape=input_tensor.shape, dtype=input_tensor.dtype, name=self.name) - - def get_config(self): - if self._init_type_spec is not None: - config = { - 'name': self.name, - 'type_spec': self._init_type_spec - } - else: - config = { - 'batch_input_shape': self._batch_input_shape, - 'dtype': self.dtype, - 'sparse': self.sparse, - 'ragged': self.ragged, - 'name': self.name, - } - return config - - @property - def _trackable_saved_model_saver(self): - return layer_serialization.InputLayerSavedModelSaver(self) - - -@keras_export('keras.Input', 'keras.layers.Input') + """Layer to be used as an entry point into a Network (a graph of layers). + + It can either wrap an existing tensor (pass an `input_tensor` argument) + or create a placeholder tensor (pass arguments `input_shape`, and + optionally, `dtype`). + + It is generally recommend to use the Keras Functional model via `Input`, + (which creates an `InputLayer`) without directly using `InputLayer`. + + When using `InputLayer` with the Keras Sequential model, it can be skipped + by moving the `input_shape` parameter to the first layer after the + `InputLayer`. + + This class can create placeholders for `tf.Tensors`, `tf.SparseTensors`, and + `tf.RaggedTensors` by choosing `sparse=True` or `ragged=True`. Note that + `sparse` and `ragged` can't be configured to `True` at the same time. + Usage: + + ```python + # With explicit InputLayer. + model = tf.keras.Sequential([ + tf.keras.layers.InputLayer(input_shape=(4,)), + tf.keras.layers.Dense(8)]) + model.compile(tf.keras.optimizers.RMSprop(0.001), loss='mse') + model.fit(np.zeros((10, 4)), + np.ones((10, 8))) + + # Without InputLayer and let the first layer to have the input_shape. + # Keras will add a input for the model behind the scene. + model = tf.keras.Sequential([ + tf.keras.layers.Dense(8, input_shape=(4,))]) + model.compile(tf.keras.optimizers.RMSprop(0.001), loss='mse') + model.fit(np.zeros((10, 4)), + np.ones((10, 8))) + ``` + + Args: + input_shape: Shape tuple (not including the batch axis), or + `TensorShape` instance (not including the batch axis). + batch_size: Optional input batch size (integer or `None`). + dtype: Optional datatype of the input. When not provided, the Keras + default `float` type will be used. + input_tensor: Optional tensor to use as layer input. If set, the layer + will use the `tf.TypeSpec` of this tensor rather + than creating a new placeholder tensor. + sparse: Boolean, whether the placeholder created is meant to be sparse. + Defaults to `False`. + ragged: Boolean, whether the placeholder created is meant to be ragged. + In this case, values of `None` in the `shape` argument represent + ragged dimensions. For more information about `tf.RaggedTensor`, see + [this guide](https://www.tensorflow.org/guide/ragged_tensor). + Defaults to `False`. + type_spec: A `tf.TypeSpec` object to create Input from. This + `tf.TypeSpec` represents the entire batch. When provided, all other + args except name must be `None`. + name: Optional name of the layer (string). + """ + + @traceback_utils.filter_traceback + def __init__( + self, + input_shape=None, + batch_size=None, + dtype=None, + input_tensor=None, + sparse=None, + name=None, + ragged=None, + type_spec=None, + **kwargs, + ): + self._init_input_shape = input_shape + self._init_batch_size = batch_size + self._init_dtype = dtype + self._init_sparse = sparse + self._init_ragged = ragged + self._init_type_spec = type_spec + + strategy = tf.distribute.get_strategy() + if ( + strategy + and batch_size is not None + and distributed_training_utils.global_batch_size_supported(strategy) + ): + if batch_size % strategy.num_replicas_in_sync != 0: + raise ValueError( + "The `batch_size` argument ({}) must be divisible by " + "the number of replicas ({})".format( + batch_size, strategy.num_replicas_in_sync + ) + ) + batch_size = batch_size // strategy.num_replicas_in_sync + + if "batch_input_shape" in kwargs: + batch_input_shape = kwargs.pop("batch_input_shape") + if input_shape and batch_input_shape: + raise ValueError( + "Only provide the input_shape OR " + "batch_input_shape argument to " + "InputLayer, not both at the same time." + ) + # Set the input shape and batch size from the batch_input_shape. + # Note that batch_input_shape can be None (unknown rank) or [] + # (scalar), in which case the batch size must be None. + if batch_input_shape: + batch_size = batch_input_shape[0] + input_shape = batch_input_shape[1:] + if kwargs: + raise ValueError( + f"Unrecognized keyword arguments: {list(kwargs.keys())}" + ) + + if sparse and ragged: + raise ValueError( + "Cannot set both sparse and ragged to True in a Keras input." + ) + + if not name: + prefix = "input" + name = prefix + "_" + str(backend.get_uid(prefix)) + + if not dtype: + if input_tensor is None: + dtype = backend.floatx() + else: + dtype = backend.dtype(input_tensor) + elif input_tensor is not None and input_tensor.dtype != dtype: + raise ValueError( + "`input_tensor.dtype` differs from `dtype`. Received: " + f"input_tensor.dtype={input_tensor.dtype} " + f"but expected dtype={dtype}" + ) + super().__init__(dtype=dtype, name=name) + self.built = True + self.sparse = True if sparse else False + self.ragged = True if ragged else False + self.batch_size = batch_size + self.supports_masking = True + + if isinstance(input_shape, tf.TensorShape): + input_shape = tuple(input_shape.as_list()) + elif isinstance(input_shape, int): + input_shape = (input_shape,) + + if type_spec is not None: + args_that_must_be_none = [ + ("(input_)shape", self._init_input_shape), + ("batch_size", self._init_batch_size), + ("dtype", self._init_dtype), + ("input_tensor", input_tensor), + ("sparse", self._init_sparse), + ("ragged", self._init_ragged), + ] + for arg_name, arg in args_that_must_be_none: + _assert_other_arg_none(arg_name, arg) + if not tf.compat.v1.executing_eagerly_outside_functions(): + raise ValueError( + "Creating Keras inputs from a type_spec is only " + "supported when eager execution is enabled." + ) + # Needed for type_spec deserialization since TypeSpec objects + # are not Keras-native (not automatically deserialized). + if isinstance(type_spec, dict): + type_spec = serialization_lib.deserialize_keras_object( + type_spec + ) + input_tensor = keras_tensor.keras_tensor_from_type_spec(type_spec) + if isinstance(input_tensor, keras_tensor.SparseKerasTensor): + self.sparse = True + if isinstance(input_tensor, keras_tensor.RaggedKerasTensor): + self.ragged = True + self.is_placeholder = True + try: + self._batch_input_shape = tuple(input_tensor.shape.as_list()) + except ValueError: + # If the shape cannot be represented as a tuple (e.g. unknown + # rank) + self._batch_input_shape = None + elif input_tensor is None: + if input_shape is not None: + batch_input_shape = (batch_size,) + tuple(input_shape) + else: + batch_input_shape = None + graph = backend.get_graph() + with graph.as_default(): + input_tensor = backend.placeholder( + shape=batch_input_shape, + dtype=dtype, + name=self.name, + sparse=sparse, + ragged=ragged, + ) + + self.is_placeholder = True + self._batch_input_shape = batch_input_shape + else: + if tf.compat.v1.executing_eagerly_outside_functions(): + if not isinstance(input_tensor, keras_tensor.KerasTensor): + input_tensor = keras_tensor.keras_tensor_from_tensor( + input_tensor + ) + else: + if not tf_utils.is_symbolic_tensor(input_tensor): + raise ValueError( + "You should not pass an EagerTensor to `Input`. " + "For example, instead of creating an " + "`InputLayer`, you should instantiate your model " + "and directly call it on your input." + ) + self.is_placeholder = False + try: + self._batch_input_shape = tuple(input_tensor.shape.as_list()) + except ValueError: + # If the shape cannot be represented as a tuple (e.g. unknown + # rank) + self._batch_input_shape = None + # Create an input node. + input_tensor._keras_mask = None + node_module.Node(layer=self, outputs=input_tensor) + + # Store type spec + if isinstance(input_tensor, keras_tensor.KerasTensor) or ( + tf_utils.is_extension_type(input_tensor) + ): + self._type_spec = input_tensor._type_spec + else: + self._type_spec = tf.TensorSpec( + shape=input_tensor.shape, + dtype=input_tensor.dtype, + name=self.name, + ) + + def get_config(self): + if self._init_type_spec is not None: + config = {"name": self.name, "type_spec": self._init_type_spec} + else: + config = { + "batch_input_shape": self._batch_input_shape, + "dtype": self.dtype, + "sparse": self.sparse, + "ragged": self.ragged, + "name": self.name, + } + return config + + @property + def _trackable_saved_model_saver(self): + return layer_serialization.InputLayerSavedModelSaver(self) + + +@keras_export("keras.Input", "keras.layers.Input") @traceback_utils.filter_traceback -def Input( # pylint: disable=invalid-name +def Input( shape=None, batch_size=None, name=None, @@ -266,131 +303,161 @@ def Input( # pylint: disable=invalid-name tensor=None, ragged=None, type_spec=None, - **kwargs): - """`Input()` is used to instantiate a Keras tensor. - - A Keras tensor is a symbolic tensor-like object, - which we augment with certain attributes that allow us to build a Keras model - just by knowing the inputs and outputs of the model. - - For instance, if `a`, `b` and `c` are Keras tensors, - it becomes possible to do: - `model = Model(input=[a, b], output=c)` - - Args: - shape: A shape tuple (integers), not including the batch size. - For instance, `shape=(32,)` indicates that the expected input - will be batches of 32-dimensional vectors. Elements of this tuple - can be None; 'None' elements represent dimensions where the shape is - not known. - batch_size: optional static batch size (integer). - name: An optional name string for the layer. - Should be unique in a model (do not reuse the same name twice). - It will be autogenerated if it isn't provided. - dtype: The data type expected by the input, as a string - (`float32`, `float64`, `int32`...) - sparse: A boolean specifying whether the placeholder to be created is - sparse. Only one of 'ragged' and 'sparse' can be True. Note that, - if `sparse` is False, sparse tensors can still be passed into the - input - they will be densified with a default value of 0. - tensor: Optional existing tensor to wrap into the `Input` layer. - If set, the layer will use the `tf.TypeSpec` of this tensor rather - than creating a new placeholder tensor. - ragged: A boolean specifying whether the placeholder to be created is - ragged. Only one of 'ragged' and 'sparse' can be True. In this case, - values of 'None' in the 'shape' argument represent ragged dimensions. - For more information about RaggedTensors, see - [this guide](https://www.tensorflow.org/guide/ragged_tensors). - type_spec: A `tf.TypeSpec` object to create the input placeholder from. - When provided, all other args except name must be None. - **kwargs: deprecated arguments support. Supports `batch_shape` and - `batch_input_shape`. - - Returns: - A `tensor`. - - Example: - - ```python - # this is a logistic regression in Keras - x = Input(shape=(32,)) - y = Dense(16, activation='softmax')(x) - model = Model(x, y) - ``` - - Note that even if eager execution is enabled, - `Input` produces a symbolic tensor-like object (i.e. a placeholder). - This symbolic tensor-like object can be used with lower-level - TensorFlow ops that take tensors as inputs, as such: - - ```python - x = Input(shape=(32,)) - y = tf.square(x) # This op will be treated like a layer - model = Model(x, y) - ``` - - (This behavior does not work for higher-order TensorFlow APIs such as - control flow and being directly watched by a `tf.GradientTape`). - - However, the resulting model will not track any variables that were - used as inputs to TensorFlow ops. All variable usages must happen within - Keras layers to make sure they will be tracked by the model's weights. - - The Keras Input can also create a placeholder from an arbitrary `tf.TypeSpec`, - e.g: - - ```python - x = Input(type_spec=tf.RaggedTensorSpec(shape=[None, None], - dtype=tf.float32, ragged_rank=1)) - y = x.values - model = Model(x, y) - ``` - When passing an arbitrary `tf.TypeSpec`, it must represent the signature of an - entire batch instead of just one example. - - Raises: - ValueError: If both `sparse` and `ragged` are provided. - ValueError: If both `shape` and (`batch_input_shape` or `batch_shape`) are - provided. - ValueError: If `shape`, `tensor` and `type_spec` are None. - ValueError: If arguments besides `type_spec` are non-None while `type_spec` - is passed. - ValueError: if any unrecognized parameters are provided. - """ - if sparse and ragged: - raise ValueError( - 'Cannot set both `sparse` and `ragged` to `True` in a Keras `Input`.') - - input_layer_config = {'name': name, 'dtype': dtype, 'sparse': sparse, - 'ragged': ragged, 'input_tensor': tensor, - 'type_spec': type_spec} - - batch_input_shape = kwargs.pop('batch_input_shape', - kwargs.pop('batch_shape', None)) - if shape is not None and batch_input_shape is not None: - raise ValueError('Only provide the `shape` OR `batch_input_shape` argument ' - 'to Input, not both at the same time.') - if (batch_input_shape is None and shape is None and tensor is None - and type_spec is None): - raise ValueError('Please provide to Input a `shape` ' - 'or a `tensor` or a `type_spec` argument. Note that ' - '`shape` does not include the batch ' - 'dimension.') - if kwargs: - raise ValueError(f'Unrecognized keyword arguments: {list(kwargs.keys())}') - - if batch_input_shape: - shape = batch_input_shape[1:] - input_layer_config.update({'batch_input_shape': batch_input_shape}) - else: - input_layer_config.update( - {'batch_size': batch_size, 'input_shape': shape}) - input_layer = InputLayer(**input_layer_config) - - # Return tensor including `_keras_history`. - # Note that in this case train_output and test_output are the same pointer. - outputs = input_layer._inbound_nodes[0].outputs - if isinstance(outputs, list) and len(outputs) == 1: - return outputs[0] - else: - return outputs + **kwargs, +): + """`Input()` is used to instantiate a Keras tensor. + + A Keras tensor is a symbolic tensor-like object, which we augment with + certain attributes that allow us to build a Keras model just by knowing the + inputs and outputs of the model. + + For instance, if `a`, `b` and `c` are Keras tensors, + it becomes possible to do: + `model = Model(input=[a, b], output=c)` + + Args: + shape: A shape tuple (integers), not including the batch size. + For instance, `shape=(32,)` indicates that the expected input + will be batches of 32-dimensional vectors. Elements of this tuple + can be None; 'None' elements represent dimensions where the shape is + not known. + batch_size: optional static batch size (integer). + name: An optional name string for the layer. + Should be unique in a model (do not reuse the same name twice). + It will be autogenerated if it isn't provided. + dtype: The data type expected by the input, as a string + (`float32`, `float64`, `int32`...) + sparse: A boolean specifying whether the placeholder to be created is + sparse. Only one of 'ragged' and 'sparse' can be True. Note that, + if `sparse` is False, sparse tensors can still be passed into the + input - they will be densified with a default value of 0. + tensor: Optional existing tensor to wrap into the `Input` layer. + If set, the layer will use the `tf.TypeSpec` of this tensor rather + than creating a new placeholder tensor. + ragged: A boolean specifying whether the placeholder to be created is + ragged. Only one of 'ragged' and 'sparse' can be True. In this case, + values of 'None' in the 'shape' argument represent ragged + dimensions. For more information about RaggedTensors, see + [this guide](https://www.tensorflow.org/guide/ragged_tensor). + type_spec: A `tf.TypeSpec` object to create the input placeholder from. + When provided, all other args except name must be None. + **kwargs: deprecated arguments support. Supports `batch_shape` and + `batch_input_shape`. + + Returns: + A `tensor`. + + Example: + + ```python + # this is a logistic regression in Keras + x = Input(shape=(32,)) + y = Dense(16, activation='softmax')(x) + model = Model(x, y) + ``` + + Note that even if eager execution is enabled, + `Input` produces a symbolic tensor-like object (i.e. a placeholder). + This symbolic tensor-like object can be used with lower-level + TensorFlow ops that take tensors as inputs, as such: + + ```python + x = Input(shape=(32,)) + y = tf.square(x) # This op will be treated like a layer + model = Model(x, y) + ``` + + (This behavior does not work for higher-order TensorFlow APIs such as + control flow and being directly watched by a `tf.GradientTape`). + + However, the resulting model will not track any variables that were + used as inputs to TensorFlow ops. All variable usages must happen within + Keras layers to make sure they will be tracked by the model's weights. + + The Keras Input can also create a placeholder from an arbitrary + `tf.TypeSpec`, e.g: + + ```python + x = Input(type_spec=tf.RaggedTensorSpec(shape=[None, None], + dtype=tf.float32, ragged_rank=1)) + y = x.values + model = Model(x, y) + ``` + When passing an arbitrary `tf.TypeSpec`, it must represent the signature of + an entire batch instead of just one example. + + Raises: + ValueError: If both `sparse` and `ragged` are provided. + ValueError: If both `shape` and (`batch_input_shape` or `batch_shape`) are + provided. + ValueError: If `shape`, `tensor` and `type_spec` are None. + ValueError: If arguments besides `type_spec` are non-None while + `type_spec` is passed. + ValueError: if any unrecognized parameters are provided. + """ + if sparse and ragged: + raise ValueError( + "Cannot set both `sparse` and `ragged` to `True` in a " + "Keras `Input`." + ) + + has_spec_name = ( + name is None and type_spec is not None and hasattr(type_spec, "name") + ) + + if has_spec_name: + name = type_spec.name + + input_layer_config = { + "name": name, + "dtype": dtype, + "sparse": sparse, + "ragged": ragged, + "input_tensor": tensor, + "type_spec": type_spec, + } + + batch_input_shape = kwargs.pop( + "batch_input_shape", kwargs.pop("batch_shape", None) + ) + if shape is not None and batch_input_shape is not None: + raise ValueError( + "Only provide the `shape` OR `batch_input_shape` argument " + "to Input, not both at the same time." + ) + if ( + batch_input_shape is None + and shape is None + and tensor is None + and type_spec is None + ): + raise ValueError( + "Please provide to Input a `shape` " + "or a `tensor` or a `type_spec` argument. Note that " + "`shape` does not include the batch " + "dimension." + ) + if kwargs: + raise ValueError( + f"Unrecognized keyword arguments: {list(kwargs.keys())}" + ) + + if batch_input_shape: + shape = batch_input_shape[1:] + input_layer_config.update({"batch_input_shape": batch_input_shape}) + else: + input_layer_config.update( + {"batch_size": batch_size, "input_shape": shape} + ) + input_layer = InputLayer(**input_layer_config) + + # Return tensor including `_keras_history`. + # Note that in this case train_output and test_output are the same pointer. + outputs = input_layer._inbound_nodes[0].outputs + if isinstance(outputs, list) and len(outputs) == 1: + output = outputs[0] + else: + output = outputs + if has_spec_name and hasattr(output, "_name"): + output._name = input_layer.name + return output diff --git a/keras/engine/input_layer_test.py b/keras/engine/input_layer_test.py index 142119fb3ee1..636d6aa4faee 100644 --- a/keras/engine/input_layer_test.py +++ b/keras/engine/input_layer_test.py @@ -11,359 +11,456 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#,============================================================================ +# ,============================================================================ """Tests for InputLayer construction.""" + import tensorflow.compat.v2 as tf -from tensorflow.python.framework import type_spec + +from keras import Sequential from keras import backend -from keras.testing_infra import test_combinations +from keras import models from keras.engine import functional from keras.engine import input_layer as input_layer_lib +from keras.layers import Dense from keras.layers import core -from keras.saving import model_config - - -class TwoTensors(tf.__internal__.CompositeTensor): - """A simple value type to test TypeSpec. - - Contains two tensors (x, y) and a string (color). The color value is a - stand-in for any extra type metadata we might need to store. +from keras.saving.legacy import model_config +from keras.saving.serialization_lib import SafeModeScope +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils - This value type contains no single dtype. - """ +# isort: off +from tensorflow.python.framework import type_spec +from tensorflow.python.framework import type_spec_registry - def __init__(self, x, y, color='red', assign_variant_dtype=False): - assert isinstance(color, str) - self.x = tf.convert_to_tensor(x) - self.y = tf.convert_to_tensor(y) - self.color = color - self.shape = tf.TensorShape(None) - self._shape = tf.TensorShape(None) - if assign_variant_dtype: - self.dtype = tf.variant - self._assign_variant_dtype = assign_variant_dtype - def _type_spec(self): - return TwoTensorsSpecNoOneDtype( - self.x.shape, self.x.dtype, self.y.shape, - self.y.dtype, color=self.color, - assign_variant_dtype=self._assign_variant_dtype) +class TwoTensors(tf.__internal__.CompositeTensor): + """A simple value type to test TypeSpec. + + Contains two tensors (x, y) and a string (color). The color value is a + stand-in for any extra type metadata we might need to store. + + This value type contains no single dtype. + """ + + def __init__(self, x, y, color="red", assign_variant_dtype=False): + assert isinstance(color, str) + self.x = tf.convert_to_tensor(x) + self.y = tf.convert_to_tensor(y) + self.color = color + self.shape = tf.TensorShape(None) + self._shape = tf.TensorShape(None) + if assign_variant_dtype: + self.dtype = tf.variant + self._assign_variant_dtype = assign_variant_dtype + + def _type_spec(self): + return TwoTensorsSpecNoOneDtype( + self.x.shape, + self.x.dtype, + self.y.shape, + self.y.dtype, + color=self.color, + assign_variant_dtype=self._assign_variant_dtype, + ) def as_shape(shape): - """Converts the given object to a TensorShape.""" - if isinstance(shape, tf.TensorShape): - return shape - else: - return tf.TensorShape(shape) + """Converts the given object to a TensorShape.""" + if isinstance(shape, tf.TensorShape): + return shape + else: + return tf.TensorShape(shape) -@type_spec.register('tf.TwoTensorsSpec') +@type_spec_registry.register("tf.TwoTensorsSpec") class TwoTensorsSpecNoOneDtype(tf.TypeSpec): - """A TypeSpec for the TwoTensors value type.""" - - def __init__( - self, x_shape, x_dtype, y_shape, y_dtype, color='red', - assign_variant_dtype=False): - self.x_shape = as_shape(x_shape) - self.x_dtype = tf.as_dtype(x_dtype) - self.y_shape = as_shape(y_shape) - self.y_dtype = tf.as_dtype(y_dtype) - self.color = color - self.shape = tf.TensorShape(None) - self._shape = tf.TensorShape(None) - if assign_variant_dtype: - self.dtype = tf.variant - self._assign_variant_dtype = assign_variant_dtype + """A TypeSpec for the TwoTensors value type.""" + + def __init__( + self, + x_shape, + x_dtype, + y_shape, + y_dtype, + color="red", + assign_variant_dtype=False, + ): + self.x_shape = as_shape(x_shape) + self.x_dtype = tf.as_dtype(x_dtype) + self.y_shape = as_shape(y_shape) + self.y_dtype = tf.as_dtype(y_dtype) + self.color = color + self.shape = tf.TensorShape(None) + self._shape = tf.TensorShape(None) + if assign_variant_dtype: + self.dtype = tf.variant + self._assign_variant_dtype = assign_variant_dtype + + value_type = property(lambda self: TwoTensors) + + @property + def _component_specs(self): + return ( + tf.TensorSpec(self.x_shape, self.x_dtype), + tf.TensorSpec(self.y_shape, self.y_dtype), + ) + + def _to_components(self, value): + return (value.x, value.y) + + def _from_components(self, components): + x, y = components + return TwoTensors(x, y, self.color) + + def _serialize(self): + return ( + self.x_shape, + self.x_dtype, + self.y_shape, + self.y_dtype, + self.color, + ) + + @classmethod + def from_value(cls, value): + return cls( + value.x.shape, + value.x.dtype, + value.y.shape, + value.y.dtype, + value.color, + ) - value_type = property(lambda self: TwoTensors) - @property - def _component_specs(self): - return (tf.TensorSpec(self.x_shape, self.x_dtype), - tf.TensorSpec(self.y_shape, self.y_dtype)) +type_spec.register_type_spec_from_value_converter( + TwoTensors, TwoTensorsSpecNoOneDtype.from_value +) - def _to_components(self, value): - return (value.x, value.y) - def _from_components(self, components): - x, y = components - return TwoTensors(x, y, self.color) +class InputLayerTest(test_combinations.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicOutputShapeNoBatchSize(self): + # Create a Keras Input + x = input_layer_lib.Input(shape=(32,), name="input_a") + self.assertAllEqual(x.shape.as_list(), [None, 32]) - def _serialize(self): - return (self.x_shape, self.x_dtype, self.y_shape, self.y_dtype, self.color) + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(tf.ones((3, 32))), tf.ones((3, 32)) * 2.0) - @classmethod - def from_value(cls, value): - return cls(value.x.shape, value.x.dtype, value.y.shape, value.y.dtype, - value.color) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicOutputShapeWithBatchSize(self): + # Create a Keras Input + x = input_layer_lib.Input(batch_size=6, shape=(32,), name="input_b") + self.assertAllEqual(x.shape.as_list(), [6, 32]) + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(tf.ones(x.shape)), tf.ones(x.shape) * 2.0) -type_spec.register_type_spec_from_value_converter( - TwoTensors, TwoTensorsSpecNoOneDtype.from_value) + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testBasicOutputShapeNoBatchSizeInTFFunction(self): + model = None + @tf.function + def run_model(inp): + nonlocal model + if not model: + # Create a Keras Input + x = input_layer_lib.Input(shape=(8,), name="input_a") + self.assertAllEqual(x.shape.as_list(), [None, 8]) -class InputLayerTest(test_combinations.TestCase): + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + return model(inp) - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testBasicOutputShapeNoBatchSize(self): - # Create a Keras Input - x = input_layer_lib.Input(shape=(32,), name='input_a') - self.assertAllEqual(x.shape.as_list(), [None, 32]) - - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, x * 2.0) - self.assertAllEqual(model(tf.ones((3, 32))), - tf.ones((3, 32)) * 2.0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testBasicOutputShapeWithBatchSize(self): - # Create a Keras Input - x = input_layer_lib.Input(batch_size=6, shape=(32,), name='input_b') - self.assertAllEqual(x.shape.as_list(), [6, 32]) - - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, x * 2.0) - self.assertAllEqual(model(tf.ones(x.shape)), - tf.ones(x.shape) * 2.0) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testBasicOutputShapeNoBatchSizeInTFFunction(self): - model = None - @tf.function - def run_model(inp): - nonlocal model - if not model: - # Create a Keras Input - x = input_layer_lib.Input(shape=(8,), name='input_a') - self.assertAllEqual(x.shape.as_list(), [None, 8]) + self.assertAllEqual(run_model(tf.ones((10, 8))), tf.ones((10, 8)) * 2.0) - # Verify you can construct and use a model w/ this input + @test_combinations.run_all_keras_modes + def testBasicOutputShapeWithBatchSizeAndNoneDimensionsPlaceholder(self): + x = input_layer_lib.Input((2, 3), batch_size=4, dtype=tf.float32) model = functional.Functional(x, x * 2.0) - return model(inp) - - self.assertAllEqual(run_model(tf.ones((10, 8))), - tf.ones((10, 8)) * 2.0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputTensorArg(self): - # Create a Keras Input - x = input_layer_lib.Input(tensor=tf.zeros((7, 32))) - self.assertAllEqual(x.shape.as_list(), [7, 32]) - - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, x * 2.0) - self.assertAllEqual(model(tf.ones(x.shape)), - tf.ones(x.shape) * 2.0) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testInputTensorArgInTFFunction(self): - # We use a mutable model container instead of a model python variable, - # because python 2.7 does not have `nonlocal` - model_container = {} - - @tf.function - def run_model(inp): - if not model_container: + output = model(backend.placeholder(shape=[None, None, 3])) + # batch size and dimension defined in Input should not be applied + self.assertAllEqual(output.shape.as_list(), [None, None, 3]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputTensorArg(self): # Create a Keras Input - x = input_layer_lib.Input(tensor=tf.zeros((10, 16))) - self.assertAllEqual(x.shape.as_list(), [10, 16]) + x = input_layer_lib.Input(tensor=tf.zeros((7, 32))) + self.assertAllEqual(x.shape.as_list(), [7, 32]) # Verify you can construct and use a model w/ this input - model_container['model'] = functional.Functional(x, x * 3.0) - return model_container['model'](inp) - - self.assertAllEqual(run_model(tf.ones((10, 16))), - tf.ones((10, 16)) * 3.0) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testCompositeInputTensorArg(self): - # Create a Keras Input - rt = tf.RaggedTensor.from_row_splits( - values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - x = input_layer_lib.Input(tensor=rt) - - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, x * 2) - - # And that the model works - rt = tf.RaggedTensor.from_row_splits( - values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - self.assertAllEqual(model(rt), rt * 2) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testCompositeInputTensorArgInTFFunction(self): - # We use a mutable model container instead of a model python variable, - # because python 2.7 does not have `nonlocal` - model_container = {} - - @tf.function - def run_model(inp): - if not model_container: + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(tf.ones(x.shape)), tf.ones(x.shape) * 2.0) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testInputTensorArgInTFFunction(self): + # We use a mutable model container instead of a model python variable, + # because python 2.7 does not have `nonlocal` + model_container = {} + + @tf.function + def run_model(inp): + if not model_container: + # Create a Keras Input + x = input_layer_lib.Input(tensor=tf.zeros((10, 16))) + self.assertAllEqual(x.shape.as_list(), [10, 16]) + + # Verify you can construct and use a model w/ this input + model_container["model"] = functional.Functional(x, x * 3.0) + return model_container["model"](inp) + + self.assertAllEqual( + run_model(tf.ones((10, 16))), tf.ones((10, 16)) * 3.0 + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCompositeInputTensorArg(self): # Create a Keras Input rt = tf.RaggedTensor.from_row_splits( - values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) x = input_layer_lib.Input(tensor=rt) # Verify you can construct and use a model w/ this input - model_container['model'] = functional.Functional(x, x * 3) - return model_container['model'](inp) - - # And verify the model works - rt = tf.RaggedTensor.from_row_splits( - values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - self.assertAllEqual(run_model(rt), rt * 3) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testNoMixingArgsWithTypeSpecArg(self): - with self.assertRaisesRegexp( - ValueError, 'all other args except `name` must be None'): - input_layer_lib.Input( - shape=(4, 7), - type_spec=tf.TensorSpec((2, 7, 32), tf.float32)) - with self.assertRaisesRegexp( - ValueError, 'all other args except `name` must be None'): - input_layer_lib.Input( - batch_size=4, - type_spec=tf.TensorSpec((7, 32), tf.float32)) - with self.assertRaisesRegexp( - ValueError, 'all other args except `name` must be None'): - input_layer_lib.Input( - dtype=tf.int64, - type_spec=tf.TensorSpec((7, 32), tf.float32)) - with self.assertRaisesRegexp( - ValueError, 'all other args except `name` must be None'): - input_layer_lib.Input( - sparse=True, - type_spec=tf.TensorSpec((7, 32), tf.float32)) - with self.assertRaisesRegexp( - ValueError, 'all other args except `name` must be None'): - input_layer_lib.Input( - ragged=True, - type_spec=tf.TensorSpec((7, 32), tf.float32)) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testTypeSpecArg(self): - # Create a Keras Input - x = input_layer_lib.Input( - type_spec=tf.TensorSpec((7, 32), tf.float32)) - self.assertAllEqual(x.shape.as_list(), [7, 32]) - - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, x * 2.0) - self.assertAllEqual(model(tf.ones(x.shape)), - tf.ones(x.shape) * 2.0) - - # Test serialization / deserialization - model = functional.Functional.from_config(model.get_config()) - self.assertAllEqual(model(tf.ones(x.shape)), - tf.ones(x.shape) * 2.0) - - model = model_config.model_from_json(model.to_json()) - self.assertAllEqual(model(tf.ones(x.shape)), - tf.ones(x.shape) * 2.0) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testTypeSpecArgInTFFunction(self): - # We use a mutable model container instead of a model python variable, - # because python 2.7 does not have `nonlocal` - model_container = {} - - @tf.function - def run_model(inp): - if not model_container: + model = functional.Functional(x, x * 2) + + # And that the model works + rt = tf.RaggedTensor.from_row_splits( + values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) + self.assertAllEqual(model(rt), rt * 2) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCompositeInputTensorArgInTFFunction(self): + # We use a mutable model container instead of a model python variable, + # because python 2.7 does not have `nonlocal` + model_container = {} + + @tf.function + def run_model(inp): + if not model_container: + # Create a Keras Input + rt = tf.RaggedTensor.from_row_splits( + values=[3, 1, 4, 1, 5, 9, 2, 6], + row_splits=[0, 4, 4, 7, 8, 8], + ) + x = input_layer_lib.Input(tensor=rt) + + # Verify you can construct and use a model w/ this input + model_container["model"] = functional.Functional(x, x * 3) + return model_container["model"](inp) + + # And verify the model works + rt = tf.RaggedTensor.from_row_splits( + values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) + self.assertAllEqual(run_model(rt), rt * 3) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testNoMixingArgsWithTypeSpecArg(self): + with self.assertRaisesRegexp( + ValueError, "all other args except `name` must be None" + ): + input_layer_lib.Input( + shape=(4, 7), type_spec=tf.TensorSpec((2, 7, 32), tf.float32) + ) + with self.assertRaisesRegexp( + ValueError, "all other args except `name` must be None" + ): + input_layer_lib.Input( + batch_size=4, type_spec=tf.TensorSpec((7, 32), tf.float32) + ) + with self.assertRaisesRegexp( + ValueError, "all other args except `name` must be None" + ): + input_layer_lib.Input( + dtype=tf.int64, type_spec=tf.TensorSpec((7, 32), tf.float32) + ) + with self.assertRaisesRegexp( + ValueError, "all other args except `name` must be None" + ): + input_layer_lib.Input( + sparse=True, type_spec=tf.TensorSpec((7, 32), tf.float32) + ) + with self.assertRaisesRegexp( + ValueError, "all other args except `name` must be None" + ): + input_layer_lib.Input( + ragged=True, type_spec=tf.TensorSpec((7, 32), tf.float32) + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testTypeSpecArg(self): # Create a Keras Input - x = input_layer_lib.Input( - type_spec=tf.TensorSpec((10, 16), tf.float32)) - self.assertAllEqual(x.shape.as_list(), [10, 16]) + x = input_layer_lib.Input(type_spec=tf.TensorSpec((7, 32), tf.float32)) + self.assertAllEqual(x.shape.as_list(), [7, 32]) # Verify you can construct and use a model w/ this input - model_container['model'] = functional.Functional(x, x * 3.0) - return model_container['model'](inp) - - self.assertAllEqual(run_model(tf.ones((10, 16))), - tf.ones((10, 16)) * 3.0) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testCompositeTypeSpecArg(self): - # Create a Keras Input - rt = tf.RaggedTensor.from_row_splits( - values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - x = input_layer_lib.Input(type_spec=rt._type_spec) - - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, x * 2) - - # And that the model works - rt = tf.RaggedTensor.from_row_splits( - values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - self.assertAllEqual(model(rt), rt * 2) - - # Test serialization / deserialization - model = functional.Functional.from_config(model.get_config()) - self.assertAllEqual(model(rt), rt * 2) - model = model_config.model_from_json(model.to_json()) - self.assertAllEqual(model(rt), rt * 2) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testCompositeTypeSpecArgInTFFunction(self): - # We use a mutable model container instead of a model pysthon variable, - # because python 2.7 does not have `nonlocal` - model_container = {} - - @tf.function - def run_model(inp): - if not model_container: + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(tf.ones(x.shape)), tf.ones(x.shape) * 2.0) + + # Test serialization / deserialization + model = functional.Functional.from_config(model.get_config()) + self.assertAllEqual(model(tf.ones(x.shape)), tf.ones(x.shape) * 2.0) + + model = model_config.model_from_json(model.to_json()) + self.assertAllEqual(model(tf.ones(x.shape)), tf.ones(x.shape) * 2.0) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testTypeSpecArgInTFFunction(self): + # We use a mutable model container instead of a model python variable, + # because python 2.7 does not have `nonlocal` + model_container = {} + + @tf.function + def run_model(inp): + if not model_container: + # Create a Keras Input + x = input_layer_lib.Input( + type_spec=tf.TensorSpec((10, 16), tf.float32) + ) + self.assertAllEqual(x.shape.as_list(), [10, 16]) + + # Verify you can construct and use a model w/ this input + model_container["model"] = functional.Functional(x, x * 3.0) + return model_container["model"](inp) + + self.assertAllEqual( + run_model(tf.ones((10, 16))), tf.ones((10, 16)) * 3.0 + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCompositeTypeSpecArg(self): # Create a Keras Input rt = tf.RaggedTensor.from_row_splits( - values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) x = input_layer_lib.Input(type_spec=rt._type_spec) # Verify you can construct and use a model w/ this input - model_container['model'] = functional.Functional(x, x * 3) - return model_container['model'](inp) - - # And verify the model works - rt = tf.RaggedTensor.from_row_splits( - values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - self.assertAllEqual(run_model(rt), rt * 3) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testCompositeTypeSpecArgWithoutDtype(self): - for assign_variant_dtype in [False, True]: - # Create a Keras Input - spec = TwoTensorsSpecNoOneDtype( - (1, 2, 3), tf.float32, (1, 2, 3), tf.int64, - assign_variant_dtype=assign_variant_dtype) - x = input_layer_lib.Input(type_spec=spec) - - def lambda_fn(tensors): - return (tf.cast(tensors.x, tf.float64) - + tf.cast(tensors.y, tf.float64)) - # Verify you can construct and use a model w/ this input - model = functional.Functional(x, core.Lambda(lambda_fn)(x)) - - # And that the model works - two_tensors = TwoTensors(tf.ones((1, 2, 3)) * 2.0, - tf.ones(1, 2, 3)) - self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) - - # Test serialization / deserialization - model = functional.Functional.from_config(model.get_config()) - self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) - model = model_config.model_from_json(model.to_json()) - self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) - - def test_serialize_with_unknown_rank(self): - inp = backend.placeholder(shape=None, dtype=tf.string) - x = input_layer_lib.InputLayer(input_tensor=inp, dtype=tf.string) - loaded = input_layer_lib.InputLayer.from_config(x.get_config()) - self.assertIsNone(loaded._batch_input_shape) - - -if __name__ == '__main__': - tf.test.main() + model = functional.Functional(x, x * 2) + + # And that the model works + rt = tf.RaggedTensor.from_row_splits( + values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) + self.assertAllEqual(model(rt), rt * 2) + + # Test serialization / deserialization + model = functional.Functional.from_config(model.get_config()) + self.assertAllEqual(model(rt), rt * 2) + model = model_config.model_from_json(model.to_json()) + self.assertAllEqual(model(rt), rt * 2) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCompositeTypeSpecArgInTFFunction(self): + # We use a mutable model container instead of a model pysthon variable, + # because python 2.7 does not have `nonlocal` + model_container = {} + + @tf.function + def run_model(inp): + if not model_container: + # Create a Keras Input + rt = tf.RaggedTensor.from_row_splits( + values=[3, 1, 4, 1, 5, 9, 2, 6], + row_splits=[0, 4, 4, 7, 8, 8], + ) + x = input_layer_lib.Input(type_spec=rt._type_spec) + + # Verify you can construct and use a model w/ this input + model_container["model"] = functional.Functional(x, x * 3) + return model_container["model"](inp) + + # And verify the model works + rt = tf.RaggedTensor.from_row_splits( + values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) + self.assertAllEqual(run_model(rt), rt * 3) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCompositeTypeSpecArgWithoutDtype(self): + for assign_variant_dtype in [False, True]: + # Create a Keras Input + spec = TwoTensorsSpecNoOneDtype( + (1, 2, 3), + tf.float32, + (1, 2, 3), + tf.int64, + assign_variant_dtype=assign_variant_dtype, + ) + x = input_layer_lib.Input(type_spec=spec) + + def lambda_fn(tensors): + return tf.cast(tensors.x, tf.float64) + tf.cast( + tensors.y, tf.float64 + ) + + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, core.Lambda(lambda_fn)(x)) + + # And that the model works + two_tensors = TwoTensors(tf.ones((1, 2, 3)) * 2.0, tf.ones(1, 2, 3)) + self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) + + # Test serialization / deserialization + with SafeModeScope(safe_mode=False): + model = functional.Functional.from_config(model.get_config()) + self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) + model = model_config.model_from_json(model.to_json()) + self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) + + def test_serialize_with_unknown_rank(self): + inp = backend.placeholder(shape=None, dtype=tf.string) + x = input_layer_lib.InputLayer(input_tensor=inp, dtype=tf.string) + loaded = input_layer_lib.InputLayer.from_config(x.get_config()) + self.assertIsNone(loaded._batch_input_shape) + + @test_utils.run_v2_only + def test_typespec_naming_propagation(self): + type_spec = tf.TensorSpec(name="test", shape=(None, None, 2)) + input1 = input_layer_lib.Input(type_spec=type_spec) + self.assertEqual(input1.name, "test") + + @test_utils.run_v2_only + def test_save_input_naming(self): + x = input_layer_lib.Input(shape=(10,), name="features") + y = Dense(1)(x) + model = functional.Functional(x, y) + self.assertEqual(model.layers[0].name, "features") + save_path = self.get_temp_dir() + "/basic_model.keras" + model.save(save_path) + reloaded_model = models.load_model(save_path) + self.assertEqual(reloaded_model.layers[0].name, "features") + + @test_utils.run_v2_only + def test_export_input_naming(self): + model = Sequential( + layers=[ + input_layer_lib.Input(shape=(8,), name="features"), + Dense(1), + ] + ) + x = tf.random.normal((8, 8)) + model(x) + + export_path = self.get_temp_dir() + "test_model" + model.export(export_path) + reloaded_artifact = tf.saved_model.load(export_path) + self.assertEqual( + reloaded_artifact.signatures._signatures["serve"]._arg_keywords[-1], + "features", + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/input_spec.py b/keras/engine/input_spec.py index 354b0b7e0f46..1e18c83cd0df 100644 --- a/keras/engine/input_spec.py +++ b/keras/engine/input_spec.py @@ -12,269 +12,305 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -# pylint: disable=g-classes-have-attributes + + """Contains the InputSpec class.""" import tensorflow.compat.v2 as tf + from keras import backend + +# isort: off from tensorflow.python.util.tf_export import keras_export from tensorflow.python.util.tf_export import tf_export -@keras_export('keras.layers.InputSpec', - v1=['keras.layers.InputSpec', - 'keras.__internal__.legacy.layers.InputSpec']) -@tf_export(v1=['layers.InputSpec']) +@keras_export( + "keras.layers.InputSpec", + v1=["keras.layers.InputSpec", "keras.__internal__.legacy.layers.InputSpec"], +) +@tf_export(v1=["layers.InputSpec"]) class InputSpec: - """Specifies the rank, dtype and shape of every input to a layer. - - Layers can expose (if appropriate) an `input_spec` attribute: - an instance of `InputSpec`, or a nested structure of `InputSpec` instances - (one per input tensor). These objects enable the layer to run input - compatibility checks for input structure, input rank, input shape, and - input dtype. - - A None entry in a shape is compatible with any dimension, - a None shape is compatible with any shape. - - Args: - dtype: Expected DataType of the input. - shape: Shape tuple, expected shape of the input - (may include None for unchecked axes). Includes the batch size. - ndim: Integer, expected rank of the input. - max_ndim: Integer, maximum rank of the input. - min_ndim: Integer, minimum rank of the input. - axes: Dictionary mapping integer axes to - a specific dimension value. - allow_last_axis_squeeze: If True, then allow inputs of rank N+1 as long - as the last axis of the input is 1, as well as inputs of rank N-1 - as long as the last axis of the spec is 1. - name: Expected key corresponding to this input when passing data as - a dictionary. - - Example: - - ```python - class MyLayer(Layer): - def __init__(self): - super(MyLayer, self).__init__() - # The layer will accept inputs with shape (?, 28, 28) & (?, 28, 28, 1) - # and raise an appropriate error message otherwise. - self.input_spec = InputSpec( - shape=(None, 28, 28, 1), - allow_last_axis_squeeze=True) - ``` - """ - - def __init__(self, - dtype=None, - shape=None, - ndim=None, - max_ndim=None, - min_ndim=None, - axes=None, - allow_last_axis_squeeze=False, - name=None): - self.dtype = tf.as_dtype(dtype).name if dtype is not None else None - shape = tf.TensorShape(shape) - if shape.rank is None: - shape = None - else: - shape = tuple(shape.as_list()) - if shape is not None: - self.ndim = len(shape) - self.shape = shape - else: - self.ndim = ndim - self.shape = None - self.max_ndim = max_ndim - self.min_ndim = min_ndim - self.name = name - self.allow_last_axis_squeeze = allow_last_axis_squeeze - try: - axes = axes or {} - self.axes = {int(k): axes[k] for k in axes} - except (ValueError, TypeError): - raise TypeError('Argument `axes` must be a dict with integer keys. ' - f'Received: axes={axes}') - - if self.axes and (self.ndim is not None or self.max_ndim is not None): - max_dim = (self.ndim if self.ndim else self.max_ndim) - 1 - max_axis = max(self.axes) - if max_axis > max_dim: - raise ValueError('Axis {} is greater than the maximum allowed value: {}' - .format(max_axis, max_dim)) - - def __repr__(self): - spec = [('dtype=' + str(self.dtype)) if self.dtype else '', - ('shape=' + str(self.shape)) if self.shape else '', - ('ndim=' + str(self.ndim)) if self.ndim else '', - ('max_ndim=' + str(self.max_ndim)) if self.max_ndim else '', - ('min_ndim=' + str(self.min_ndim)) if self.min_ndim else '', - ('axes=' + str(self.axes)) if self.axes else ''] - return 'InputSpec(%s)' % ', '.join(x for x in spec if x) - - def get_config(self): - return { - 'dtype': self.dtype, - 'shape': self.shape, - 'ndim': self.ndim, - 'max_ndim': self.max_ndim, - 'min_ndim': self.min_ndim, - 'axes': self.axes} - - @classmethod - def from_config(cls, config): - return cls(**config) + """Specifies the rank, dtype and shape of every input to a layer. + + Layers can expose (if appropriate) an `input_spec` attribute: + an instance of `InputSpec`, or a nested structure of `InputSpec` instances + (one per input tensor). These objects enable the layer to run input + compatibility checks for input structure, input rank, input shape, and + input dtype. + + A None entry in a shape is compatible with any dimension, + a None shape is compatible with any shape. + + Args: + dtype: Expected DataType of the input. + shape: Shape tuple, expected shape of the input + (may include None for unchecked axes). Includes the batch size. + ndim: Integer, expected rank of the input. + max_ndim: Integer, maximum rank of the input. + min_ndim: Integer, minimum rank of the input. + axes: Dictionary mapping integer axes to + a specific dimension value. + allow_last_axis_squeeze: If True, then allow inputs of rank N+1 as long + as the last axis of the input is 1, as well as inputs of rank N-1 + as long as the last axis of the spec is 1. + name: Expected key corresponding to this input when passing data as + a dictionary. + + Example: + + ```python + class MyLayer(Layer): + def __init__(self): + super(MyLayer, self).__init__() + # The layer will accept inputs with + # shape (?, 28, 28) & (?, 28, 28, 1) + # and raise an appropriate error message otherwise. + self.input_spec = InputSpec( + shape=(None, 28, 28, 1), + allow_last_axis_squeeze=True) + ``` + """ + + def __init__( + self, + dtype=None, + shape=None, + ndim=None, + max_ndim=None, + min_ndim=None, + axes=None, + allow_last_axis_squeeze=False, + name=None, + ): + self.dtype = tf.as_dtype(dtype).name if dtype is not None else None + shape = tf.TensorShape(shape) + if shape.rank is None: + shape = None + else: + shape = tuple(shape.as_list()) + if shape is not None: + self.ndim = len(shape) + self.shape = shape + else: + self.ndim = ndim + self.shape = None + self.max_ndim = max_ndim + self.min_ndim = min_ndim + self.name = name + self.allow_last_axis_squeeze = allow_last_axis_squeeze + try: + axes = axes or {} + self.axes = {int(k): axes[k] for k in axes} + except (ValueError, TypeError): + raise TypeError( + "Argument `axes` must be a dict with integer keys. " + f"Received: axes={axes}" + ) + + if self.axes and (self.ndim is not None or self.max_ndim is not None): + max_dim = (self.ndim if self.ndim else self.max_ndim) - 1 + max_axis = max(self.axes) + if max_axis > max_dim: + raise ValueError( + "Axis {} is greater than the maximum " + "allowed value: {}".format(max_axis, max_dim) + ) + + def __repr__(self): + spec = [ + ("dtype=" + str(self.dtype)) if self.dtype else "", + ("shape=" + str(self.shape)) if self.shape else "", + ("ndim=" + str(self.ndim)) if self.ndim else "", + ("max_ndim=" + str(self.max_ndim)) if self.max_ndim else "", + ("min_ndim=" + str(self.min_ndim)) if self.min_ndim else "", + ("axes=" + str(self.axes)) if self.axes else "", + ] + return f"InputSpec({', '.join(x for x in spec if x)})" + + def get_config(self): + return { + "dtype": self.dtype, + "shape": self.shape, + "ndim": self.ndim, + "max_ndim": self.max_ndim, + "min_ndim": self.min_ndim, + "axes": self.axes, + } + + @classmethod + def from_config(cls, config): + return cls(**config) def to_tensor_shape(spec): - """Returns a tf.TensorShape object that matches the shape specifications. + """Returns a tf.TensorShape object that matches the shape specifications. - If the InputSpec's shape or ndim is defined, this method will return a fully - or partially-known shape. Otherwise, the returned TensorShape is None. + If the InputSpec's shape or ndim is defined, this method will return a fully + or partially-known shape. Otherwise, the returned TensorShape is None. - Args: - spec: an InputSpec object. + Args: + spec: an InputSpec object. - Returns: - a tf.TensorShape object - """ - if spec.ndim is None and spec.shape is None: - return tf.TensorShape(None) - elif spec.shape is not None: - return tf.TensorShape(spec.shape) - else: - shape = [None] * spec.ndim - for a in spec.axes: - shape[a] = spec.axes[a] # Assume that axes is defined - return tf.TensorShape(shape) + Returns: + a tf.TensorShape object + """ + if spec.ndim is None and spec.shape is None: + return tf.TensorShape(None) + elif spec.shape is not None: + return tf.TensorShape(spec.shape) + else: + shape = [None] * spec.ndim + for a in spec.axes: + shape[a] = spec.axes[a] # Assume that axes is defined + return tf.TensorShape(shape) def assert_input_compatibility(input_spec, inputs, layer_name): - """Checks compatibility between the layer and provided inputs. - - This checks that the tensor(s) `inputs` verify the input assumptions - of a layer (if any). If not, a clear and actional exception gets raised. - - Args: - input_spec: An InputSpec instance, list of InputSpec instances, a nested - structure of InputSpec instances, or None. - inputs: Input tensor, list of input tensors, or a nested structure of - input tensors. - layer_name: String, name of the layer (for error message formatting). - - Raises: - ValueError: in case of mismatch between - the provided inputs and the expectations of the layer. - """ - if not input_spec: - return - - input_spec = tf.nest.flatten(input_spec) - if isinstance(inputs, dict): - # Flatten `inputs` by reference order if input spec names are provided - names = [spec.name for spec in input_spec] - if all(names): - list_inputs = [] - for name in names: - if name not in inputs: - raise ValueError(f'Missing data for input "{name}". ' - 'You passed a data dictionary with keys ' - f'{list(inputs.keys())}. ' - f'Expected the following keys: {names}') - list_inputs.append(inputs[name]) - inputs = list_inputs - - inputs = tf.nest.flatten(inputs) - for x in inputs: - # Having a shape/dtype is the only commonality of the various tensor-like - # objects that may be passed. The most common kind of invalid type we are - # guarding for is a Layer instance (Functional API), which does not - # have a `shape` attribute. - if not hasattr(x, 'shape'): - raise TypeError(f'Inputs to a layer should be tensors. Got: {x}') - - if len(inputs) != len(input_spec): - raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),' - f' but it received {len(inputs)} input tensors. ' - f'Inputs received: {inputs}') - for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): - if spec is None: - continue - - shape = tf.TensorShape(x.shape) - if shape.rank is None: - return - # Check ndim. - if spec.ndim is not None and not spec.allow_last_axis_squeeze: - ndim = shape.rank - if ndim != spec.ndim: - raise ValueError(f'Input {input_index} of layer "{layer_name}" ' - 'is incompatible with the layer: ' - f'expected ndim={spec.ndim}, found ndim={ndim}. ' - f'Full shape received: {tuple(shape)}') - if spec.max_ndim is not None: - ndim = x.shape.rank - if ndim is not None and ndim > spec.max_ndim: - raise ValueError(f'Input {input_index} of layer "{layer_name}" ' - 'is incompatible with the layer: ' - f'expected max_ndim={spec.max_ndim}, ' - f'found ndim={ndim}') - if spec.min_ndim is not None: - ndim = x.shape.rank - if ndim is not None and ndim < spec.min_ndim: - raise ValueError(f'Input {input_index} of layer "{layer_name}" ' - 'is incompatible with the layer: ' - f'expected min_ndim={spec.min_ndim}, ' - f'found ndim={ndim}. ' - f'Full shape received: {tuple(shape)}') - # Check dtype. - if spec.dtype is not None: - if x.dtype.name != spec.dtype: - raise ValueError(f'Input {input_index} of layer "{layer_name}" ' - 'is incompatible with the layer: ' - f'expected dtype={spec.dtype}, ' - f'found dtype={x.dtype}') - - # Check specific shape axes. - shape_as_list = shape.as_list() - if spec.axes: - for axis, value in spec.axes.items(): - if hasattr(value, 'value'): - value = value.value - if value is not None and shape_as_list[int(axis)] not in {value, None}: - raise ValueError( - f'Input {input_index} of layer "{layer_name}" is ' - f'incompatible with the layer: expected axis {axis} ' - f'of input shape to have value {value}, ' - f'but received input with shape {display_shape(x.shape)}') - # Check shape. - if spec.shape is not None and shape.rank is not None: - spec_shape = spec.shape - if spec.allow_last_axis_squeeze: - if shape_as_list and shape_as_list[-1] == 1: - shape_as_list = shape_as_list[:-1] - if spec_shape and spec_shape[-1] == 1: - spec_shape = spec_shape[:-1] - for spec_dim, dim in zip(spec_shape, shape_as_list): - if spec_dim is not None and dim is not None: - if spec_dim != dim: - raise ValueError(f'Input {input_index} of layer "{layer_name}" is ' - 'incompatible with the layer: ' - f'expected shape={spec.shape}, ' - f'found shape={display_shape(x.shape)}') + """Checks compatibility between the layer and provided inputs. + + This checks that the tensor(s) `inputs` verify the input assumptions + of a layer (if any). If not, a clear and actional exception gets raised. + + Args: + input_spec: An InputSpec instance, list of InputSpec instances, a nested + structure of InputSpec instances, or None. + inputs: Input tensor, list of input tensors, or a nested structure of + input tensors. + layer_name: String, name of the layer (for error message formatting). + + Raises: + ValueError: in case of mismatch between + the provided inputs and the expectations of the layer. + """ + if not input_spec: + return + + input_spec = tf.nest.flatten(input_spec) + if isinstance(inputs, dict): + # Flatten `inputs` by reference order if input spec names are provided + names = [spec.name for spec in input_spec] + if all(names): + list_inputs = [] + for name in names: + if name not in inputs: + raise ValueError( + f'Missing data for input "{name}". ' + "You passed a data dictionary with keys " + f"{list(inputs.keys())}. " + f"Expected the following keys: {names}" + ) + list_inputs.append(inputs[name]) + inputs = list_inputs + + inputs = tf.nest.flatten(inputs) + for x in inputs: + # Having a shape/dtype is the only commonality of the various + # tensor-like objects that may be passed. The most common kind of + # invalid type we are guarding for is a Layer instance (Functional API), + # which does not have a `shape` attribute. + if not hasattr(x, "shape"): + raise TypeError( + f"Inputs to a layer should be tensors. Got '{x}' " + f"(of type {type(x)}) as input for layer '{layer_name}'." + ) + + if len(inputs) != len(input_spec): + raise ValueError( + f'Layer "{layer_name}" expects {len(input_spec)} input(s),' + f" but it received {len(inputs)} input tensors. " + f"Inputs received: {inputs}" + ) + for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): + if spec is None: + continue + + shape = tf.TensorShape(x.shape) + if shape.rank is None: + return + # Check ndim. + if spec.ndim is not None and not spec.allow_last_axis_squeeze: + ndim = shape.rank + if ndim != spec.ndim: + raise ValueError( + f'Input {input_index} of layer "{layer_name}" ' + "is incompatible with the layer: " + f"expected ndim={spec.ndim}, found ndim={ndim}. " + f"Full shape received: {tuple(shape)}" + ) + if spec.max_ndim is not None: + ndim = x.shape.rank + if ndim is not None and ndim > spec.max_ndim: + raise ValueError( + f'Input {input_index} of layer "{layer_name}" ' + "is incompatible with the layer: " + f"expected max_ndim={spec.max_ndim}, " + f"found ndim={ndim}" + ) + if spec.min_ndim is not None: + ndim = x.shape.rank + if ndim is not None and ndim < spec.min_ndim: + raise ValueError( + f'Input {input_index} of layer "{layer_name}" ' + "is incompatible with the layer: " + f"expected min_ndim={spec.min_ndim}, " + f"found ndim={ndim}. " + f"Full shape received: {tuple(shape)}" + ) + # Check dtype. + if spec.dtype is not None: + if x.dtype.name != spec.dtype: + raise ValueError( + f'Input {input_index} of layer "{layer_name}" ' + "is incompatible with the layer: " + f"expected dtype={spec.dtype}, " + f"found dtype={x.dtype}" + ) + + # Check specific shape axes. + shape_as_list = shape.as_list() + if spec.axes: + for axis, value in spec.axes.items(): + if hasattr(value, "value"): + value = value.value + if value is not None and shape_as_list[int(axis)] not in { + value, + None, + }: + raise ValueError( + f'Input {input_index} of layer "{layer_name}" is ' + f"incompatible with the layer: expected axis {axis} " + f"of input shape to have value {value}, " + "but received input with " + f"shape {display_shape(x.shape)}" + ) + # Check shape. + if spec.shape is not None and shape.rank is not None: + spec_shape = spec.shape + if spec.allow_last_axis_squeeze: + if shape_as_list and shape_as_list[-1] == 1: + shape_as_list = shape_as_list[:-1] + if spec_shape and spec_shape[-1] == 1: + spec_shape = spec_shape[:-1] + for spec_dim, dim in zip(spec_shape, shape_as_list): + if spec_dim is not None and dim is not None: + if spec_dim != dim: + raise ValueError( + f'Input {input_index} of layer "{layer_name}" is ' + "incompatible with the layer: " + f"expected shape={spec.shape}, " + f"found shape={display_shape(x.shape)}" + ) def display_shape(shape): - return str(tuple(shape.as_list())) + return str(tuple(shape.as_list())) def to_tensor_spec(input_spec, default_dtype=None): - """Converts a Keras InputSpec object to a TensorSpec.""" - default_dtype = default_dtype or backend.floatx() - if isinstance(input_spec, InputSpec): - dtype = input_spec.dtype or default_dtype - return tf.TensorSpec(to_tensor_shape(input_spec), dtype) - return tf.TensorSpec(None, default_dtype) + """Converts a Keras InputSpec object to a TensorSpec.""" + default_dtype = default_dtype or backend.floatx() + if isinstance(input_spec, InputSpec): + dtype = input_spec.dtype or default_dtype + return tf.TensorSpec(to_tensor_shape(input_spec), dtype) + return tf.TensorSpec(None, default_dtype) diff --git a/keras/engine/input_spec_test.py b/keras/engine/input_spec_test.py index 2fb54f39bd2a..95f295ff5309 100644 --- a/keras/engine/input_spec_test.py +++ b/keras/engine/input_spec_test.py @@ -24,44 +24,46 @@ class InputSpecTest(tf.test.TestCase): - - def test_axes_initialization(self): - input_spec.InputSpec(shape=[1, None, 2, 3], axes={3: 5, '2': 2}) - with self.assertRaisesRegex(ValueError, 'Axis 4 is greater than'): - input_spec.InputSpec(shape=[1, None, 2, 3], axes={4: 5}) - with self.assertRaisesRegex(TypeError, 'Argument `axes` must be a dict'): - input_spec.InputSpec(shape=[1, None, 2, 3], axes={'string': 5}) + def test_axes_initialization(self): + input_spec.InputSpec(shape=[1, None, 2, 3], axes={3: 5, "2": 2}) + with self.assertRaisesRegex(ValueError, "Axis 4 is greater than"): + input_spec.InputSpec(shape=[1, None, 2, 3], axes={4: 5}) + with self.assertRaisesRegex( + TypeError, "Argument `axes` must be a dict" + ): + input_spec.InputSpec(shape=[1, None, 2, 3], axes={"string": 5}) class InputSpecToTensorShapeTest(tf.test.TestCase): - - def test_defined_shape(self): - spec = input_spec.InputSpec(shape=[1, None, 2, 3]) - self.assertAllEqual( - [1, None, 2, 3], input_spec.to_tensor_shape(spec).as_list()) - - def test_defined_ndims(self): - spec = input_spec.InputSpec(ndim=5) - self.assertAllEqual( - [None] * 5, input_spec.to_tensor_shape(spec).as_list()) - - spec = input_spec.InputSpec(ndim=0) - self.assertAllEqual( - [], input_spec.to_tensor_shape(spec).as_list()) - - spec = input_spec.InputSpec(ndim=3, axes={1: 3, -1: 2}) - self.assertAllEqual( - [None, 3, 2], input_spec.to_tensor_shape(spec).as_list()) - - def test_undefined_shapes(self): - spec = input_spec.InputSpec(max_ndim=5) - with self.assertRaisesRegex(ValueError, 'unknown TensorShape'): - input_spec.to_tensor_shape(spec).as_list() - - spec = input_spec.InputSpec(min_ndim=5, max_ndim=5) - with self.assertRaisesRegex(ValueError, 'unknown TensorShape'): - input_spec.to_tensor_shape(spec).as_list() - - -if __name__ == '__main__': - tf.test.main() + def test_defined_shape(self): + spec = input_spec.InputSpec(shape=[1, None, 2, 3]) + self.assertAllEqual( + [1, None, 2, 3], input_spec.to_tensor_shape(spec).as_list() + ) + + def test_defined_ndims(self): + spec = input_spec.InputSpec(ndim=5) + self.assertAllEqual( + [None] * 5, input_spec.to_tensor_shape(spec).as_list() + ) + + spec = input_spec.InputSpec(ndim=0) + self.assertAllEqual([], input_spec.to_tensor_shape(spec).as_list()) + + spec = input_spec.InputSpec(ndim=3, axes={1: 3, -1: 2}) + self.assertAllEqual( + [None, 3, 2], input_spec.to_tensor_shape(spec).as_list() + ) + + def test_undefined_shapes(self): + spec = input_spec.InputSpec(max_ndim=5) + with self.assertRaisesRegex(ValueError, "unknown TensorShape"): + input_spec.to_tensor_shape(spec).as_list() + + spec = input_spec.InputSpec(min_ndim=5, max_ndim=5) + with self.assertRaisesRegex(ValueError, "unknown TensorShape"): + input_spec.to_tensor_shape(spec).as_list() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/keras_tensor.py b/keras/engine/keras_tensor.py index 7b225bb92def..cc04cc26c25b 100644 --- a/keras/engine/keras_tensor.py +++ b/keras/engine/keras_tensor.py @@ -14,12 +14,13 @@ # ============================================================================== """Keras Input Tensor used to track functional API Topology.""" -from keras.utils import object_identity import tensorflow.compat.v2 as tf -from tensorflow.python.data.util import structure +from keras.utils import object_identity -# pylint: disable=g-classes-have-attributes +# isort: off +from tensorflow.python.data.util import structure +from tensorflow.python.util.tf_export import keras_export # Tensorflow tensors have a maximum rank of 254 @@ -29,463 +30,516 @@ _MAX_TENSOR_RANK = 254 +@keras_export("keras.__internal__.KerasTensor", v1=[]) class KerasTensor: - """A representation of a Keras in/output during Functional API construction. - - `KerasTensor`s are tensor-like objects that represent the symbolic inputs - and outputs of Keras layers during Functional model construction. They are - comprised of the `tf.TypeSpec` of the (Composite)Tensor that will be - consumed/produced in the corresponding location of the Functional model. - - KerasTensors are intended as a private API, so users should never need to - directly instantiate `KerasTensor`s. - - **Building Functional Models with KerasTensors** - `tf.keras.Input` produces `KerasTensor`s that represent the symbolic inputs - to your model. - - Passing a `KerasTensor` to a `tf.keras.Layer` `__call__` lets the layer know - that you are building a Functional model. The layer __call__ will - infer the output signature and return `KerasTensor`s with `tf.TypeSpec`s - corresponding to the symbolic outputs of that layer call. These output - `KerasTensor`s will have all of the internal KerasHistory metadata attached - to them that Keras needs to construct a Functional Model. - - Currently, layers infer the output signature by: - * creating a scratch `FuncGraph` - * making placeholders in the scratch graph that match the input typespecs - * Calling `layer.call` on these placeholders - * extracting the signatures of the outputs before clearing the scratch graph - - (Note: names assigned to KerasTensors by this process are not guaranteed to - be unique, and are subject to implementation details). - - `tf.nest` methods are used to insure all of the inputs/output data - structures get maintained, with elements swapped between KerasTensors and - placeholders. - - In rare cases (such as when directly manipulating shapes using Keras layers), - the layer may be able to partially infer the value of the output in addition - to just inferring the signature. - When this happens, the returned KerasTensor will also contain the inferred - value information. Follow-on layers can use this information. - during their own output signature inference. - E.g. if one layer produces a symbolic `KerasTensor` that the next layer uses - as the shape of its outputs, partially knowing the value helps infer the - output shape. - - **Automatically converting TF APIs to layers**: - If you passing a `KerasTensor` to a TF API that supports dispatching, - Keras will automatically turn that API call into a lambda - layer in the Functional model, and return KerasTensors representing the - symbolic outputs. - - Most TF APIs that take only tensors as input and produce output tensors - will support dispatching. - - Calling a `tf.function` does not support dispatching, so you cannot pass - `KerasTensor`s as inputs to a `tf.function`. - - Higher-order APIs that take methods which produce tensors (e.g. `tf.while`, - `tf.map_fn`, `tf.cond`) also do not currently support dispatching. So, you - cannot directly pass KerasTensors as inputs to these APIs either. If you - want to use these APIs inside of a Functional model, you must put them inside - of a custom layer. - - Args: - type_spec: The `tf.TypeSpec` for the symbolic input created by - `tf.keras.Input`, or symbolically inferred for the output - during a symbolic layer `__call__`. - inferred_value: (Optional) a non-symbolic static value, possibly partially - specified, that could be symbolically inferred for the outputs during - a symbolic layer `__call__`. This will generally only happen when - grabbing and manipulating `tf.int32` shapes directly as tensors. - Statically inferring values in this way and storing them in the - KerasTensor allows follow-on layers to infer output signatures - more effectively. (e.g. when using a symbolic shape tensor to later - construct a tensor with that shape). - name: (optional) string name for this KerasTensor. Names automatically - generated by symbolic layer `__call__`s are not guaranteed to be unique, - and are subject to implementation details. - """ - - def __init__(self, type_spec, inferred_value=None, name=None): - """Constructs a KerasTensor.""" - if not isinstance(type_spec, tf.TypeSpec): - raise ValueError('KerasTensors must be constructed with a `tf.TypeSpec`.') - - self._type_spec = type_spec - self._inferred_value = inferred_value - self._name = name - - if not isinstance(type_spec, structure.NoneTensorSpec): - if not hasattr(type_spec, 'shape'): - raise ValueError( - 'KerasTensor only supports TypeSpecs that have a shape field; got ' - f'{type(type_spec).__qualname__}, which does not have a shape.') - if not isinstance(type_spec.shape, tf.TensorShape): - raise TypeError( - "KerasTensor requires that wrapped TypeSpec's shape is a " - f'TensorShape; got TypeSpec {type(type_spec).__qualname__}, whose ' - 'shape field has unexpected type ' - f'{type(type_spec.dtype).__qualname__}.') - - @property - def type_spec(self): - """Returns the `tf.TypeSpec` symbolically inferred for this Keras output.""" - return self._type_spec - - @property - def shape(self): - """Returns the `TensorShape` symbolically inferred for this Keras output.""" - return self._type_spec.shape - - @classmethod - def from_tensor(cls, tensor): - """Convert a traced (composite)tensor to a representative KerasTensor.""" - if isinstance(tensor, tf.Tensor): - name = getattr(tensor, 'name', None) - type_spec = tf.type_spec_from_value(tensor) - inferred_value = None - if (type_spec.dtype == tf.int32 and type_spec.shape.rank is not None - and type_spec.shape.rank < 2): - # If this tensor might be representing shape information, - # (dtype=int32, rank of 0 or 1, not too large to represent a shape) - # we attempt to capture any value information tensorflow's - # shape handling can extract from the current scratch graph. - # - # Even though keras layers each trace in their own scratch - # graph, this shape value info extraction allows us to capture - # a sizable and useful subset of the C++ shape value inference TF can do - # if all tf ops appear in the same graph when using shape ops. - # - # Examples of things this cannot infer concrete dimensions for - # that the full single-graph C++ shape inference sometimes can are: - # * cases where the shape tensor is cast out of int32 before being - # manipulated w/ floating point numbers then converted back - # * cases where int32 tensors w/ rank >= 2 are manipulated before being - # used as a shape tensor - # * cases where int32 tensors too large to represent shapes are - # manipulated to a smaller size before being used as a shape tensor - inferred_value = tf.ones(shape=tensor).shape - if inferred_value.dims: - inferred_value = inferred_value.as_list() - if len(inferred_value) > _MAX_TENSOR_RANK: - inferred_value = None - else: - inferred_value = None - - return KerasTensor(type_spec, inferred_value=inferred_value, name=name) - else: - # Fallback to the generic arbitrary-typespec KerasTensor - name = getattr(tensor, 'name', None) - type_spec = tf.type_spec_from_value(tensor) - return cls(type_spec, name=name) - - @classmethod - def from_type_spec(cls, type_spec, name=None): - return cls(type_spec=type_spec, name=name) - - def _to_placeholder(self): - """Convert this KerasTensor to a placeholder in a graph.""" - # If there is an inferred value for this tensor, inject the inferred value - if self._inferred_value is not None: - # If we suspect this KerasTensor might be representing a shape tensor, - # and we were able to extract value information with TensorFlow's shape - # handling when making the KerasTensor, we construct the placeholder by - # re-injecting the inferred value information into the graph. We - # do this injection through the shape of a placeholder, because that - # allows us to specify partially-unspecified shape values. - # - # See the comment on value extraction inside `from_tensor` for more info. - inferred_value = tf.shape( - tf.compat.v1.placeholder( - shape=self._inferred_value, dtype=tf.int32)) - if self.type_spec.shape.rank == 0: - # `tf.shape` always returns a rank-1, we may need to turn it back to a - # scalar. - inferred_value = inferred_value[0] - return inferred_value - - # Use the generic conversion from typespec to a placeholder. - def component_to_placeholder(component): - return tf.compat.v1.placeholder(component.dtype, component.shape) - - return tf.nest.map_structure( - component_to_placeholder, self.type_spec, expand_composites=True) - - def get_shape(self): - return self.shape - - def __len__(self): - raise TypeError('Keras symbolic inputs/outputs do not ' - 'implement `__len__`. You may be ' - 'trying to pass Keras symbolic inputs/outputs ' - 'to a TF API that does not register dispatching, ' - 'preventing Keras from automatically ' - 'converting the API call to a lambda layer ' - 'in the Functional Model. This error will also get raised ' - 'if you try asserting a symbolic input/output directly.') - - @property - def op(self): - raise TypeError('Keras symbolic inputs/outputs do not ' - 'implement `op`. You may be ' - 'trying to pass Keras symbolic inputs/outputs ' - 'to a TF API that does not register dispatching, ' - 'preventing Keras from automatically ' - 'converting the API call to a lambda layer ' - 'in the Functional Model.') - - def __hash__(self): - raise TypeError(f'Tensors are unhashable (this tensor: {self}). ' - 'Instead, use tensor.ref() as the key.') - - # Note: This enables the KerasTensor's overloaded "right" binary - # operators to run when the left operand is an ndarray, because it - # accords the Tensor class higher priority than an ndarray, or a - # numpy matrix. - # In the future explore changing this to using numpy's __numpy_ufunc__ - # mechanism, which allows more control over how Tensors interact - # with ndarrays. - __array_priority__ = 100 - - def __array__(self, dtype=None): - raise TypeError( - f'You are passing {self}, an intermediate Keras symbolic input/output, ' - 'to a TF API that does not allow registering custom dispatchers, such ' - 'as `tf.cond`, `tf.function`, gradient tapes, or `tf.map_fn`. ' - 'Keras Functional model construction only supports ' - 'TF API calls that *do* support dispatching, such as `tf.math.add` or ' - '`tf.reshape`. ' - 'Other APIs cannot be called directly on symbolic Keras' - 'inputs/outputs. You can work around ' - 'this limitation by putting the operation in a custom Keras layer ' - '`call` and calling that layer ' - 'on this symbolic input/output.') - - @property - def is_tensor_like(self): - return True - - def set_shape(self, shape): - """Updates the shape of this KerasTensor. Mimics `tf.Tensor.set_shape()`.""" - if not isinstance(shape, tf.TensorShape): - shape = tf.TensorShape(shape) - if not self.shape.is_compatible_with(shape): - raise ValueError( - f"Keras symbolic input/output's shape {self.shape} is not " - f"compatible with supplied shape {shape}.") - else: - shape = self.shape.merge_with(shape) - self._type_spec = type_spec_with_shape(self._type_spec, shape) - - def __str__(self): - symbolic_description = '' - inferred_value_string = '' - name_string = '' - - if hasattr(self, '_keras_history'): - layer = self._keras_history.layer - symbolic_description = ( - ', description="created by layer \'%s\'"' % (layer.name,)) - if self._inferred_value is not None: - inferred_value_string = ( - ', inferred_value=%s' % self._inferred_value) - if self.name is not None: - name_string = ', name=\'%s\'' % self._name - return 'KerasTensor(type_spec=%s%s%s%s)' % ( - self.type_spec, inferred_value_string, - name_string, symbolic_description) - - def __repr__(self): - symbolic_description = '' - inferred_value_string = '' - if isinstance(self.type_spec, tf.TensorSpec): - type_spec_string = 'shape=%s dtype=%s' % (self.shape, self.dtype.name) - else: - type_spec_string = 'type_spec=%s' % self.type_spec - - if hasattr(self, '_keras_history'): - layer = self._keras_history.layer - symbolic_description = ' (created by layer \'%s\')' % (layer.name,) - if self._inferred_value is not None: - inferred_value_string = ( - ' inferred_value=%s' % self._inferred_value) - return '' % ( - type_spec_string, inferred_value_string, symbolic_description) - - @property - def dtype(self): - """Returns the `dtype` symbolically inferred for this Keras output.""" - type_spec = self._type_spec - if not hasattr(type_spec, 'dtype'): - raise AttributeError( - f'KerasTensor wraps TypeSpec {type(type_spec).__qualname__}, ' - 'which does not have a dtype.') - if not isinstance(type_spec.dtype, tf.DType): - raise TypeError( - "KerasTensor requires that wrapped TypeSpec's dtype is a DType; got " - f'TypeSpec {type(type_spec).__qualname__}, whose dtype field has ' - f'unexpected type {type(type_spec.dtype).__qualname__}.') - return type_spec.dtype - - def ref(self): - """Returns a hashable reference object to this KerasTensor. - - The primary use case for this API is to put KerasTensors in a - set/dictionary. We can't put tensors in a set/dictionary as - `tensor.__hash__()` is not available and tensor equality (`==`) is supposed - to produce a tensor representing if the two inputs are equal. - - See the documentation of `tf.Tensor.ref()` for more info. - """ - return object_identity.Reference(self) - - @property - def node(self): - """Find the corresponding `Node` that produce this keras_tensor. - - During functional model construction, Keras will attach `KerasHistory` to - keras tensor to track the connectivity between calls of layers. Return - None if there isn't any KerasHistory attached to this tensor. - """ - if hasattr(self, '_keras_history'): - layer, node_index, _ = self._keras_history - return layer.inbound_nodes[node_index] - return None - - def __iter__(self): - shape = None - if self.shape.ndims is not None: - shape = [dim.value for dim in self.shape.dims] - - if shape is None: - raise TypeError('Cannot iterate over a Tensor with unknown shape.') - if not shape: - raise TypeError('Cannot iterate over a scalar.') - if shape[0] is None: - raise TypeError( - 'Cannot iterate over a Tensor with unknown first dimension.') - return _KerasTensorIterator(self, shape[0]) - - @property - def name(self): - """Returns the (non-unique, optional) name of this symbolic Keras value.""" - return self._name - - @classmethod - def _overload_all_operators(cls, tensor_class): # pylint: disable=invalid-name - """Register overloads for all operators.""" - for operator in tf.Tensor.OVERLOADABLE_OPERATORS: - cls._overload_operator(tensor_class, operator) - - # We include `experimental_ref` for versions of TensorFlow that - # still include the deprecated method in Tensors. - if hasattr(tensor_class, 'experimental_ref'): - cls._overload_operator(tensor_class, 'experimental_ref') - - @classmethod - def _overload_operator(cls, tensor_class, operator): # pylint: disable=invalid-name - """Overload an operator with the same implementation as a base Tensor class. - - We pull the operator out of the class dynamically to avoid ordering issues. + """A representation of a Keras in/output during Functional API construction. + + `KerasTensor`s are tensor-like objects that represent the symbolic inputs + and outputs of Keras layers during Functional model construction. They are + comprised of the `tf.TypeSpec` of the (Composite)Tensor that will be + consumed/produced in the corresponding location of the Functional model. + + KerasTensors are intended as a private API, so users should never need to + directly instantiate `KerasTensor`s. + + **Building Functional Models with KerasTensors** + `tf.keras.Input` produces `KerasTensor`s that represent the symbolic inputs + to your model. + + Passing a `KerasTensor` to a `tf.keras.Layer` `__call__` lets the layer know + that you are building a Functional model. The layer __call__ will + infer the output signature and return `KerasTensor`s with `tf.TypeSpec`s + corresponding to the symbolic outputs of that layer call. These output + `KerasTensor`s will have all of the internal KerasHistory metadata attached + to them that Keras needs to construct a Functional Model. + + Currently, layers infer the output signature by: + * creating a scratch `FuncGraph` + * making placeholders in the scratch graph that match the input typespecs + * Calling `layer.call` on these placeholders + * extracting the signatures of the outputs before clearing the scratch + graph + + (Note: names assigned to KerasTensors by this process are not guaranteed to + be unique, and are subject to implementation details). + + `tf.nest` methods are used to insure all of the inputs/output data + structures get maintained, with elements swapped between KerasTensors and + placeholders. + + In rare cases (such as when directly manipulating shapes using Keras + layers), the layer may be able to partially infer the value of the output in + addition to just inferring the signature. + When this happens, the returned KerasTensor will also contain the inferred + value information. Follow-on layers can use this information. + during their own output signature inference. + E.g. if one layer produces a symbolic `KerasTensor` that the next layer uses + as the shape of its outputs, partially knowing the value helps infer the + output shape. + + **Automatically converting TF APIs to layers**: + If you passing a `KerasTensor` to a TF API that supports dispatching, + Keras will automatically turn that API call into a lambda + layer in the Functional model, and return KerasTensors representing the + symbolic outputs. + + Most TF APIs that take only tensors as input and produce output tensors + will support dispatching. + + Calling a `tf.function` does not support dispatching, so you cannot pass + `KerasTensor`s as inputs to a `tf.function`. + + Higher-order APIs that take methods which produce tensors (e.g. `tf.while`, + `tf.map_fn`, `tf.cond`) also do not currently support dispatching. So, you + cannot directly pass KerasTensors as inputs to these APIs either. If you + want to use these APIs inside of a Functional model, you must put them + inside of a custom layer. Args: - tensor_class: The (Composite)Tensor to get the method from. - operator: string. The operator name. + type_spec: The `tf.TypeSpec` for the symbolic input created by + `tf.keras.Input`, or symbolically inferred for the output + during a symbolic layer `__call__`. + inferred_value: (Optional) a non-symbolic static value, possibly partially + specified, that could be symbolically inferred for the outputs during + a symbolic layer `__call__`. This will generally only happen when + grabbing and manipulating `tf.int32` shapes directly as tensors. + Statically inferring values in this way and storing them in the + KerasTensor allows follow-on layers to infer output signatures + more effectively. (e.g. when using a symbolic shape tensor to later + construct a tensor with that shape). + name: (optional) string name for this KerasTensor. Names automatically + generated by symbolic layer `__call__`s are not guaranteed to be unique, + and are subject to implementation details. """ - tensor_oper = getattr(tensor_class, operator) - # Compatibility with Python 2: - # Python 2 unbound methods have type checks for the first arg, - # so we need to extract the underlying function - tensor_oper = getattr(tensor_oper, '__func__', tensor_oper) - - setattr(cls, operator, tensor_oper) + def __init__(self, type_spec, inferred_value=None, name=None): + """Constructs a KerasTensor.""" + if not isinstance(type_spec, tf.TypeSpec): + raise ValueError( + "KerasTensors must be constructed with a `tf.TypeSpec`." + ) + + self._type_spec = type_spec + self._inferred_value = inferred_value + self._name = name + + if not isinstance(type_spec, structure.NoneTensorSpec): + if not hasattr(type_spec, "shape"): + raise ValueError( + "KerasTensor only supports TypeSpecs that have a shape " + f"field; got {type(type_spec).__qualname__}, " + "which does not have a shape." + ) + if not isinstance(type_spec.shape, tf.TensorShape): + raise TypeError( + "KerasTensor requires that wrapped TypeSpec's shape is a " + f"TensorShape; got TypeSpec {type(type_spec).__qualname__}" + ", whose shape field has unexpected type " + f"{type(type_spec.dtype).__qualname__}." + ) + + @property + def type_spec(self): + """Returns the `tf.TypeSpec` symbolically inferred for Keras output.""" + return self._type_spec + + @property + def shape(self): + """Returns the `TensorShape` symbolically inferred for Keras output.""" + return self._type_spec.shape + + @classmethod + def from_tensor(cls, tensor): + """Convert a traced (composite)tensor to a representative + KerasTensor.""" + if isinstance(tensor, tf.Tensor): + name = getattr(tensor, "name", None) + type_spec = tf.type_spec_from_value(tensor) + inferred_value = None + if ( + type_spec.dtype == tf.int32 + and type_spec.shape.rank is not None + and type_spec.shape.rank < 2 + ): + # If this tensor might be representing shape information, + # (dtype=int32, rank of 0 or 1, not too large to represent a + # shape) we attempt to capture any value information + # tensorflow's shape handling can extract from the current + # scratch graph. + # + # Even though keras layers each trace in their own scratch + # graph, this shape value info extraction allows us to capture a + # sizable and useful subset of the C++ shape value inference TF + # can do if all tf ops appear in the same graph when using shape + # ops. + # + # Examples of things this cannot infer concrete dimensions for + # that the full single-graph C++ shape inference sometimes can + # are: + # * cases where the shape tensor is cast out of int32 before + # being manipulated w/ floating point numbers then converted + # back + # * cases where int32 tensors w/ rank >= 2 are manipulated + # before being used as a shape tensor + # * cases where int32 tensors too large to represent shapes are + # manipulated to a smaller size before being used as a shape + # tensor + inferred_value = tf.ones(shape=tensor).shape + if inferred_value.dims: + inferred_value = inferred_value.as_list() + if len(inferred_value) > _MAX_TENSOR_RANK: + inferred_value = None + else: + inferred_value = None + + return KerasTensor( + type_spec, inferred_value=inferred_value, name=name + ) + else: + # Fallback to the generic arbitrary-typespec KerasTensor + name = getattr(tensor, "name", None) + type_spec = tf.type_spec_from_value(tensor) + return cls(type_spec, name=name) + + @classmethod + def from_type_spec(cls, type_spec, name=None): + return cls(type_spec=type_spec, name=name) + + def _to_placeholder(self): + """Convert this KerasTensor to a placeholder in a graph.""" + # If there is an inferred value for this tensor, inject the inferred + # value + if self._inferred_value is not None: + # If we suspect this KerasTensor might be representing a shape + # tensor, and we were able to extract value information with + # TensorFlow's shape handling when making the KerasTensor, we + # construct the placeholder by re-injecting the inferred value + # information into the graph. We do this injection through the shape + # of a placeholder, because that allows us to specify + # partially-unspecified shape values. + # + # See the comment on value extraction inside `from_tensor` for more + # info. + inferred_value = tf.shape( + tf.compat.v1.placeholder( + shape=self._inferred_value, dtype=tf.int32 + ) + ) + if self.type_spec.shape.rank == 0: + # `tf.shape` always returns a rank-1, we may need to turn it + # back to a scalar. + inferred_value = inferred_value[0] + return inferred_value + + # Use the generic conversion from typespec to a placeholder. + def component_to_placeholder(component): + return tf.compat.v1.placeholder(component.dtype, component.shape) + + return tf.nest.map_structure( + component_to_placeholder, self.type_spec, expand_composites=True + ) + + def get_shape(self): + return self.shape + + def __len__(self): + raise TypeError( + "Keras symbolic inputs/outputs do not " + "implement `__len__`. You may be " + "trying to pass Keras symbolic inputs/outputs " + "to a TF API that does not register dispatching, " + "preventing Keras from automatically " + "converting the API call to a lambda layer " + "in the Functional Model. This error will also get raised " + "if you try asserting a symbolic input/output directly." + ) + + @property + def op(self): + raise TypeError( + "Keras symbolic inputs/outputs do not " + "implement `op`. You may be " + "trying to pass Keras symbolic inputs/outputs " + "to a TF API that does not register dispatching, " + "preventing Keras from automatically " + "converting the API call to a lambda layer " + "in the Functional Model." + ) + + def __hash__(self): + raise TypeError( + f"Tensors are unhashable (this tensor: {self}). " + "Instead, use tensor.ref() as the key." + ) + + # Note: This enables the KerasTensor's overloaded "right" binary + # operators to run when the left operand is an ndarray, because it + # accords the Tensor class higher priority than an ndarray, or a + # numpy matrix. + # In the future explore changing this to using numpy's __numpy_ufunc__ + # mechanism, which allows more control over how Tensors interact + # with ndarrays. + __array_priority__ = 100 + + def __array__(self, dtype=None): + raise TypeError( + f"You are passing {self}, an intermediate Keras symbolic " + "input/output, to a TF API that does not allow registering custom " + "dispatchers, such as `tf.cond`, `tf.function`, gradient tapes, " + "or `tf.map_fn`. Keras Functional model construction only supports " + "TF API calls that *do* support dispatching, such as `tf.math.add` " + "or `tf.reshape`. " + "Other APIs cannot be called directly on symbolic Keras" + "inputs/outputs. You can work around " + "this limitation by putting the operation in a custom Keras layer " + "`call` and calling that layer " + "on this symbolic input/output." + ) + + @property + def is_tensor_like(self): + return True + + def set_shape(self, shape): + """Updates the shape of this KerasTensor. Mimics + `tf.Tensor.set_shape()`.""" + if not isinstance(shape, tf.TensorShape): + shape = tf.TensorShape(shape) + if not self.shape.is_compatible_with(shape): + raise ValueError( + f"Keras symbolic input/output's shape {self.shape} is not " + f"compatible with supplied shape {shape}." + ) + else: + shape = self.shape.merge_with(shape) + self._type_spec = type_spec_with_shape(self._type_spec, shape) + + def __str__(self): + symbolic_description = "" + inferred_value_string = "" + name_string = "" + + if hasattr(self, "_keras_history"): + layer = self._keras_history.layer + symbolic_description = ", description=\"created by layer '%s'\"" % ( + layer.name, + ) + if self._inferred_value is not None: + inferred_value_string = f", inferred_value={self._inferred_value}" + if self.name is not None: + name_string = f", name='{self._name}'" + return "KerasTensor(type_spec=%s%s%s%s)" % ( + self.type_spec, + inferred_value_string, + name_string, + symbolic_description, + ) + + def __repr__(self): + symbolic_description = "" + inferred_value_string = "" + if isinstance(self.type_spec, tf.TensorSpec): + type_spec_string = f"shape={self.shape} dtype={self.dtype.name}" + else: + type_spec_string = f"type_spec={self.type_spec}" + + if hasattr(self, "_keras_history"): + layer = self._keras_history.layer + symbolic_description = f" (created by layer '{layer.name}')" + if self._inferred_value is not None: + inferred_value_string = f" inferred_value={self._inferred_value}" + return "" % ( + type_spec_string, + inferred_value_string, + symbolic_description, + ) + + @property + def dtype(self): + """Returns the `dtype` symbolically inferred for this Keras output.""" + type_spec = self._type_spec + if not hasattr(type_spec, "dtype"): + raise AttributeError( + f"KerasTensor wraps TypeSpec {type(type_spec).__qualname__}, " + "which does not have a dtype." + ) + if not isinstance(type_spec.dtype, tf.DType): + raise TypeError( + "KerasTensor requires that wrapped TypeSpec's dtype is a " + f"DType; got TypeSpec {type(type_spec).__qualname__}, whose " + "dtype field has unexpected type " + f"{type(type_spec.dtype).__qualname__}." + ) + return type_spec.dtype + + def ref(self): + """Returns a hashable reference object to this KerasTensor. + + The primary use case for this API is to put KerasTensors in a + set/dictionary. We can't put tensors in a set/dictionary as + `tensor.__hash__()` is not available and tensor equality (`==`) is + supposed to produce a tensor representing if the two inputs are equal. + + See the documentation of `tf.Tensor.ref()` for more info. + """ + return object_identity.Reference(self) + + @property + def node(self): + """Find the corresponding `Node` that produce this keras_tensor. + + During functional model construction, Keras will attach `KerasHistory` + to keras tensor to track the connectivity between calls of layers. + Return None if there isn't any KerasHistory attached to this tensor. + """ + if hasattr(self, "_keras_history"): + layer, node_index, _ = self._keras_history + return layer.inbound_nodes[node_index] + return None + + def __iter__(self): + shape = None + if self.shape.ndims is not None: + shape = [dim.value for dim in self.shape.dims] + + if shape is None: + raise TypeError("Cannot iterate over a Tensor with unknown shape.") + if not shape: + raise TypeError("Cannot iterate over a scalar.") + if shape[0] is None: + raise TypeError( + "Cannot iterate over a Tensor with unknown first dimension." + ) + return _KerasTensorIterator(self, shape[0]) + + @property + def name(self): + """Returns the (non-unique, optional) name of this symbolic Keras + value.""" + return self._name + + @classmethod + def _overload_all_operators(cls, tensor_class): + """Register overloads for all operators.""" + for operator in tf.Tensor.OVERLOADABLE_OPERATORS: + cls._overload_operator(tensor_class, operator) + + # We include `experimental_ref` for versions of TensorFlow that + # still include the deprecated method in Tensors. + if hasattr(tensor_class, "experimental_ref"): + cls._overload_operator(tensor_class, "experimental_ref") + + @classmethod + def _overload_operator(cls, tensor_class, operator): + """Overload operator with the same implementation as the Tensor class. + + We pull the operator out of the class dynamically to avoid ordering + issues. + + Args: + tensor_class: The (Composite)Tensor to get the method from. + operator: string. The operator name. + """ + tensor_oper = getattr(tensor_class, operator) + + # Compatibility with Python 2: + # Python 2 unbound methods have type checks for the first arg, + # so we need to extract the underlying function + tensor_oper = getattr(tensor_oper, "__func__", tensor_oper) + + setattr(cls, operator, tensor_oper) + + +KerasTensor._overload_all_operators(tf.Tensor) + + +@keras_export("keras.__internal__.SparseKerasTensor", v1=[]) +class SparseKerasTensor(KerasTensor): + """A specialized KerasTensor representation for `tf.sparse.SparseTensor`s. + Specifically, it specializes the conversion to a placeholder in order + to maintain dense shape information. + """ -KerasTensor._overload_all_operators(tf.Tensor) # pylint: disable=protected-access + def _to_placeholder(self): + spec = self.type_spec + # nest.map_structure loses dense shape information for sparse tensors. + # So, we special-case sparse placeholder creation. + # This only preserves shape information for top-level sparse tensors; + # not for sparse tensors that are nested inside another composite + # tensor. + return tf.compat.v1.sparse_placeholder( + dtype=spec.dtype, shape=spec.shape + ) -class SparseKerasTensor(KerasTensor): - """A specialized KerasTensor representation for `tf.sparse.SparseTensor`s. - Specifically, it specializes the conversion to a placeholder in order - to maintain dense shape information. - """ +@keras_export("keras.__internal__.RaggedKerasTensor", v1=[]) +class RaggedKerasTensor(KerasTensor): + """A specialized KerasTensor representation for `tf.RaggedTensor`s. - def _to_placeholder(self): - spec = self.type_spec + Specifically, it: - # nest.map_structure loses dense shape information for sparse tensors. - # So, we special-case sparse placeholder creation. - # This only preserves shape information for top-level sparse tensors; - # not for sparse tensors that are nested inside another composite - # tensor. - return tf.compat.v1.sparse_placeholder(dtype=spec.dtype, shape=spec.shape) + 1. Specializes the conversion to a placeholder in order + to maintain shape information for non-ragged dimensions. + 2. Overloads the KerasTensor's operators with the RaggedTensor versions + when they don't match the `tf.Tensor` versions + 3. Exposes some of the instance method/attribute that are unique to + the RaggedTensor API (such as ragged_rank). + """ + def _to_placeholder(self): + ragged_spec = self.type_spec + if ragged_spec.ragged_rank == 0 or ragged_spec.shape.rank is None: + return super()._to_placeholder() + + flat_shape = ragged_spec.shape[ragged_spec.ragged_rank :] + result = tf.compat.v1.placeholder(ragged_spec.dtype, flat_shape) + + known_num_splits = [] + prod = 1 + for axis_size in ragged_spec.shape: + if prod is not None: + if axis_size is None or ( + getattr(axis_size, "value", True) is None + ): + prod = None + else: + prod = prod * axis_size + known_num_splits.append(prod) + + for axis in range(ragged_spec.ragged_rank, 0, -1): + axis_size = ragged_spec.shape[axis] + if axis_size is None or (getattr(axis_size, "value", True) is None): + num_splits = known_num_splits[axis - 1] + if num_splits is not None: + num_splits = num_splits + 1 + splits = tf.compat.v1.placeholder( + ragged_spec.row_splits_dtype, [num_splits] + ) + result = tf.RaggedTensor.from_row_splits( + result, splits, validate=False + ) + else: + rowlen = tf.constant(axis_size, ragged_spec.row_splits_dtype) + result = tf.RaggedTensor.from_uniform_row_length( + result, rowlen, validate=False + ) + return result + + @property + def ragged_rank(self): + return self.type_spec.ragged_rank -class RaggedKerasTensor(KerasTensor): - """A specialized KerasTensor representation for `tf.RaggedTensor`s. - - Specifically, it: - - 1. Specializes the conversion to a placeholder in order - to maintain shape information for non-ragged dimensions. - 2. Overloads the KerasTensor's operators with the RaggedTensor versions - when they don't match the `tf.Tensor` versions - 3. Exposes some of the instance method/attribute that are unique to - the RaggedTensor API (such as ragged_rank). - """ - - def _to_placeholder(self): - ragged_spec = self.type_spec - if ragged_spec.ragged_rank == 0 or ragged_spec.shape.rank is None: - return super()._to_placeholder() - - flat_shape = ragged_spec.shape[ragged_spec.ragged_rank:] - result = tf.compat.v1.placeholder(ragged_spec.dtype, flat_shape) - - known_num_splits = [] - prod = 1 - for axis_size in ragged_spec.shape: - if prod is not None: - if axis_size is None or ( - getattr(axis_size, 'value', True) is None): - prod = None - else: - prod = prod * axis_size - known_num_splits.append(prod) - - for axis in range(ragged_spec.ragged_rank, 0, -1): - axis_size = ragged_spec.shape[axis] - if axis_size is None or (getattr(axis_size, 'value', True) is None): - num_splits = known_num_splits[axis-1] - if num_splits is not None: - num_splits = num_splits + 1 - splits = tf.compat.v1.placeholder( - ragged_spec.row_splits_dtype, [num_splits]) - result = tf.RaggedTensor.from_row_splits( - result, splits, validate=False) - else: - rowlen = tf.constant(axis_size, ragged_spec.row_splits_dtype) - result = tf.RaggedTensor.from_uniform_row_length( - result, rowlen, validate=False) - return result - - @property - def ragged_rank(self): - return self.type_spec.ragged_rank # Overload slicing -RaggedKerasTensor._overload_operator(tf.RaggedTensor, '__getitem__') # pylint: disable=protected-access +RaggedKerasTensor._overload_operator(tf.RaggedTensor, "__getitem__") # Overload math ops -RaggedKerasTensor._overload_operator(tf.RaggedTensor, '__add__') # pylint: disable=protected-access -RaggedKerasTensor._overload_operator(tf.RaggedTensor, '__radd__') # pylint: disable=protected-access -RaggedKerasTensor._overload_operator(tf.RaggedTensor, '__mul__') # pylint: disable=protected-access -RaggedKerasTensor._overload_operator(tf.RaggedTensor, '__rmul__') # pylint: disable=protected-access +RaggedKerasTensor._overload_operator(tf.RaggedTensor, "__add__") +RaggedKerasTensor._overload_operator(tf.RaggedTensor, "__radd__") +RaggedKerasTensor._overload_operator(tf.RaggedTensor, "__mul__") +RaggedKerasTensor._overload_operator(tf.RaggedTensor, "__rmul__") # TODO(b/161487382): @@ -495,27 +549,27 @@ def ragged_rank(self): # This is needed to not break Tensorflow probability # while they finish migrating to composite tensors. class UserRegisteredSpec(tf.TypeSpec): - """TypeSpec to represent user-registered symbolic objects.""" + """TypeSpec to represent user-registered symbolic objects.""" - def __init__(self, shape, dtype): - self.shape = shape - self._dtype = dtype - self.dtype = dtype + def __init__(self, shape, dtype): + self.shape = shape + self._dtype = dtype + self.dtype = dtype - def _component_specs(self): - raise NotImplementedError + def _component_specs(self): + raise NotImplementedError - def _from_components(self, components): - raise NotImplementedError + def _from_components(self, components): + raise NotImplementedError - def _serialize(self): - raise NotImplementedError + def _serialize(self): + raise NotImplementedError - def _to_components(self, value): - raise NotImplementedError + def _to_components(self, value): + raise NotImplementedError - def value_type(self): - raise NotImplementedError + def value_type(self): + raise NotImplementedError # TODO(b/161487382): @@ -525,46 +579,49 @@ def value_type(self): # This is needed to not break Tensorflow probability # while they finish migrating to composite tensors. class UserRegisteredTypeKerasTensor(KerasTensor): - """KerasTensor that represents legacy register_symbolic_tensor_type.""" + """KerasTensor that represents legacy register_symbolic_tensor_type.""" - def __init__(self, user_registered_symbolic_object): - x = user_registered_symbolic_object - self._user_registered_symbolic_object = x - type_spec = UserRegisteredSpec(x.shape, x.dtype) - name = getattr(x, 'name', None) + def __init__(self, user_registered_symbolic_object): + x = user_registered_symbolic_object + self._user_registered_symbolic_object = x + type_spec = UserRegisteredSpec(x.shape, x.dtype) + name = getattr(x, "name", None) - super().__init__(type_spec, name) + super().__init__(type_spec, name) - @classmethod - def from_tensor(cls, tensor): - return cls(tensor) + @classmethod + def from_tensor(cls, tensor): + return cls(tensor) - @classmethod - def from_type_spec(cls, type_spec, name=None): - raise NotImplementedError('You cannot instantiate a KerasTensor ' - 'directly from TypeSpec: %s' % type_spec) + @classmethod + def from_type_spec(cls, type_spec, name=None): + raise NotImplementedError( + "You cannot instantiate a KerasTensor directly from TypeSpec: %s" + % type_spec + ) - def _to_placeholder(self): - return self._user_registered_symbolic_object + def _to_placeholder(self): + return self._user_registered_symbolic_object class _KerasTensorIterator: - """Iterates over the leading dim of a KerasTensor. Performs 0 error checks.""" + """Iterates over the leading dim of a KerasTensor. Performs 0 error + checks.""" - def __init__(self, tensor, dim0): - self._tensor = tensor - self._index = 0 - self._limit = dim0 + def __init__(self, tensor, dim0): + self._tensor = tensor + self._index = 0 + self._limit = dim0 - def __iter__(self): - return self + def __iter__(self): + return self - def __next__(self): - if self._index == self._limit: - raise StopIteration - result = self._tensor[self._index] - self._index += 1 - return result + def __next__(self): + if self._index == self._limit: + raise StopIteration + result = self._tensor[self._index] + self._index += 1 + return result # Specify the mappings of tensor class to KerasTensor class. @@ -579,76 +636,83 @@ def __next__(self): (tf.Tensor, KerasTensor), (tf.SparseTensor, SparseKerasTensor), (tf.RaggedTensor, RaggedKerasTensor), - (object, KerasTensor) + (object, KerasTensor), ] def register_keras_tensor_specialization(cls, keras_tensor_subclass): - """Register a specialized KerasTensor subclass for a Tensor type.""" - # We always leave (object, KerasTensor) at the end as a generic fallback - keras_tensor_classes.insert(-1, (cls, keras_tensor_subclass)) + """Register a specialized KerasTensor subclass for a Tensor type.""" + # We always leave (object, KerasTensor) at the end as a generic fallback + keras_tensor_classes.insert(-1, (cls, keras_tensor_subclass)) def keras_tensor_to_placeholder(x): - """Construct a graph placeholder to represent a KerasTensor when tracing.""" - if isinstance(x, KerasTensor): - return x._to_placeholder() # pylint: disable=protected-access - else: - return x + """Construct a graph placeholder to represent a KerasTensor when tracing.""" + if isinstance(x, KerasTensor): + return x._to_placeholder() + else: + return x def keras_tensor_from_tensor(tensor): - """Convert a traced (composite)tensor to a representative KerasTensor.""" - # Create a specialized KerasTensor that supports instance methods, - # operators, and additional value inference if possible - keras_tensor_cls = None - for tensor_type, cls in keras_tensor_classes: - if isinstance(tensor, tensor_type): - keras_tensor_cls = cls - break + """Convert a traced (composite)tensor to a representative KerasTensor.""" + # Create a specialized KerasTensor that supports instance methods, + # operators, and additional value inference if possible + keras_tensor_cls = None + for tensor_type, cls in keras_tensor_classes: + if isinstance(tensor, tensor_type): + keras_tensor_cls = cls + break - out = keras_tensor_cls.from_tensor(tensor) + out = keras_tensor_cls.from_tensor(tensor) - if hasattr(tensor, '_keras_mask'): - out._keras_mask = keras_tensor_from_tensor(tensor._keras_mask) # pylint: disable=protected-access - return out + if getattr(tensor, "_keras_mask", None) is not None: + out._keras_mask = keras_tensor_from_tensor(tensor._keras_mask) + return out def keras_tensor_from_type_spec(type_spec, name=None): - """Convert a TypeSpec to a representative KerasTensor.""" - # Create a specialized KerasTensor that supports instance methods, - # operators, and additional value inference if possible - keras_tensor_cls = None - value_type = type_spec.value_type - for tensor_type, cls in keras_tensor_classes: - if issubclass(value_type, tensor_type): - keras_tensor_cls = cls - break + """Convert a TypeSpec to a representative KerasTensor.""" + # Create a specialized KerasTensor that supports instance methods, + # operators, and additional value inference if possible + keras_tensor_cls = None + value_type = type_spec.value_type + for tensor_type, cls in keras_tensor_classes: + if issubclass(value_type, tensor_type): + keras_tensor_cls = cls + break - return keras_tensor_cls.from_type_spec(type_spec, name=name) + return keras_tensor_cls.from_type_spec(type_spec, name=name) def type_spec_with_shape(spec, shape): - """Returns a copy of TypeSpec `spec` with its shape set to `shape`.""" - if isinstance(spec, tf.TensorSpec): - # pylint: disable=protected-access - # TODO(b/203201161) Figure out why mutation is needed here, and remove it. - # (TensorSpec objects should be immutable; and we should not be modifying - # private fields.) - shape = tf.TensorShape(shape) - spec._shape = shape - return spec - elif isinstance(spec, tf.RaggedTensorSpec): - return tf.RaggedTensorSpec(shape, spec.dtype, spec.ragged_rank, - spec.row_splits_dtype, - spec.flat_values_spec) - elif isinstance(spec, tf.SparseTensorSpec): - return tf.SparseTensorSpec(shape, spec.dtype) - elif hasattr(spec, 'with_shape'): - # TODO(edloper): Consider adding .with_shape method to TensorSpec, - # RaggedTensorSpec, and SparseTensorSpec. - return spec.with_shape(shape) - else: - # TODO(edloper): Consider moving this check to the KerasTensor constructor. - raise ValueError('Keras requires TypeSpec to have a `with_shape` method ' - 'that returns a copy of `self` with an updated shape.') + """Returns a copy of TypeSpec `spec` with its shape set to `shape`.""" + if isinstance(spec, tf.TensorSpec): + + # TODO(b/203201161) Figure out why mutation is needed here, and remove + # it. (TensorSpec objects should be immutable; and we should not be + # modifying private fields.) + shape = tf.TensorShape(shape) + spec._shape = shape + return spec + elif isinstance(spec, tf.RaggedTensorSpec): + return tf.RaggedTensorSpec( + shape, + spec.dtype, + spec.ragged_rank, + spec.row_splits_dtype, + spec.flat_values_spec, + ) + elif isinstance(spec, tf.SparseTensorSpec): + return tf.SparseTensorSpec(shape, spec.dtype) + elif hasattr(spec, "with_shape"): + # TODO(edloper): Consider adding .with_shape method to TensorSpec, + # RaggedTensorSpec, and SparseTensorSpec. + return spec.with_shape(shape) + else: + # TODO(edloper): Consider moving this check to the KerasTensor + # constructor. + raise ValueError( + "Keras requires TypeSpec to have a `with_shape` method " + "that returns a copy of `self` with an updated shape." + ) diff --git a/keras/engine/keras_tensor_test.py b/keras/engine/keras_tensor_test.py index bd0b4f271454..6f08689c7ebf 100644 --- a/keras/engine/keras_tensor_test.py +++ b/keras/engine/keras_tensor_test.py @@ -13,211 +13,265 @@ # limitations under the License. # ============================================================================== """InputSpec tests.""" -# pylint: disable=g-bad-import-order -import tensorflow.compat.v2 as tf +import tensorflow.compat.v2 as tf from absl.testing import parameterized -from keras.testing_infra import test_combinations + from keras import layers -from keras.testing_infra import test_utils from keras.engine import keras_tensor from keras.engine import training +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils class CustomTypeSpec(tf.TypeSpec): - """Stubbed-out custom type spec, for testing.""" + """Stubbed-out custom type spec, for testing.""" - def __init__(self, shape, dtype): - self.shape = tf.TensorShape(shape) - self.dtype = tf.dtypes.as_dtype(dtype) + def __init__(self, shape, dtype): + self.shape = tf.TensorShape(shape) + self.dtype = tf.dtypes.as_dtype(dtype) - # Stub implementations for all the TypeSpec methods: - value_type = None - _to_components = lambda self, value: None - _from_components = lambda self, components: None - _component_specs = property(lambda self: None) - _serialize = lambda self: (self.shape, self.dtype) + # Stub implementations for all the TypeSpec methods: + value_type = None + _to_components = lambda self, value: None + _from_components = lambda self, components: None + _component_specs = property(lambda self: None) + _serialize = lambda self: (self.shape, self.dtype) class CustomTypeSpec2(CustomTypeSpec): - """Adds a with_shape method to CustomTypeSpec.""" + """Adds a with_shape method to CustomTypeSpec.""" - def with_shape(self, new_shape): - return CustomTypeSpec2(new_shape, self.dtype) + def with_shape(self, new_shape): + return CustomTypeSpec2(new_shape, self.dtype) @test_utils.run_v2_only class KerasTensorTest(test_combinations.TestCase): + def test_repr_and_string(self): + kt = keras_tensor.KerasTensor( + type_spec=tf.TensorSpec(shape=(1, 2, 3), dtype=tf.float32) + ) + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(1, 2, 3), " + "dtype=tf.float32, name=None))" + ) + expected_repr = "" + self.assertEqual(expected_str, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kt = keras_tensor.KerasTensor( + type_spec=tf.TensorSpec(shape=(2,), dtype=tf.int32), + inferred_value=[2, 3], + ) + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(2,), " + "dtype=tf.int32, name=None), inferred_value=[2, 3])" + ) + expected_repr = ( + "" + ) + self.assertEqual(expected_str, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kt = keras_tensor.KerasTensor( + type_spec=tf.SparseTensorSpec(shape=(1, 2, 3), dtype=tf.float32) + ) + expected_str = ( + "KerasTensor(type_spec=SparseTensorSpec(" + "TensorShape([1, 2, 3]), tf.float32))" + ) + expected_repr = ( + "" + ) + self.assertEqual(expected_str, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + inp = layers.Input(shape=(3, 5)) + kt = layers.Dense(10)(inp) + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(None, 3, 10), " + "dtype=tf.float32, name=None), name='dense/BiasAdd:0', " + "description=\"created by layer 'dense'\")" + ) + expected_repr = ( + "" + ) + self.assertEqual(expected_str, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kt = tf.reshape(kt, shape=(3, 5, 2)) + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(3, 5, 2), " + "dtype=tf.float32, name=None), name='tf.reshape/Reshape:0', " + "description=\"created by layer 'tf.reshape'\")" + ) + expected_repr = ( + "" + ) + self.assertEqual(expected_str, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kts = tf.unstack(kt) + for i in range(3): + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(5, 2), " + "dtype=tf.float32, name=None), name='tf.unstack/unstack:%s', " + "description=\"created by layer 'tf.unstack'\")" % (i,) + ) + expected_repr = ( + "" + ) + self.assertEqual(expected_str, str(kts[i])) + self.assertEqual(expected_repr, repr(kts[i])) + + @parameterized.parameters( + {"property_name": "values"}, + {"property_name": "indices"}, + {"property_name": "dense_shape"}, + ) + def test_sparse_instance_property(self, property_name): + inp = layers.Input(shape=[3], sparse=True) + out = getattr(inp, property_name) + model = training.Model(inp, out) + + x = tf.SparseTensor( + [[0, 0], [0, 1], [1, 1], [1, 2]], [1, 2, 3, 4], [2, 3] + ) + expected_property = getattr(x, property_name) + self.assertAllEqual(model(x), expected_property) + + # Test that it works with serialization and deserialization as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected_property) + + @parameterized.parameters( + [ + (tf.TensorSpec([2, 3], tf.int32), [2, 3]), + (tf.RaggedTensorSpec([2, None]), [2, None]), + (tf.SparseTensorSpec([8]), [8]), + (CustomTypeSpec([3, 8], tf.int32), [3, 8]), + ] + ) + def test_shape(self, spec, expected_shape): + kt = keras_tensor.KerasTensor(spec) + self.assertEqual(kt.shape.as_list(), expected_shape) + + @parameterized.parameters( + [ + (tf.TensorSpec([8, 3], tf.int32), [8, 3], [8, 3]), + (tf.TensorSpec([None, 3], tf.int32), [8, 3], [8, 3]), + (tf.TensorSpec([8, 3], tf.int32), [None, 3], [8, 3]), + (tf.TensorSpec(None, tf.int32), [8, 3], [8, 3]), + (tf.TensorSpec(None, tf.int32), [8, None], [8, None]), + (tf.TensorSpec(None, tf.int32), None, None), + (tf.RaggedTensorSpec([2, None, None]), [2, None, 5], [2, None, 5]), + (tf.SparseTensorSpec([8]), [8], [8]), + (CustomTypeSpec2([3, None], tf.int32), [3, 8], [3, 8]), + ] + ) + def test_set_shape(self, spec, new_shape, expected_shape): + kt = keras_tensor.KerasTensor(spec) + kt.set_shape(new_shape) + if expected_shape is None: + self.assertIsNone(kt.type_spec.shape.rank) + else: + self.assertEqual(kt.type_spec.shape.as_list(), expected_shape) + self.assertTrue(kt.type_spec.is_compatible_with(spec)) + + @parameterized.parameters( + [ + (layers.Input(shape=[3, 4], batch_size=7), tf.reshape), + (layers.Input(shape=[3, 4], ragged=True, batch_size=7), tf.reshape), + ( + layers.Input(shape=[3, 4], sparse=True, batch_size=7), + tf.sparse.reshape, + ), + ] + ) + def test_reshape(self, inp, reshape_op): + out = reshape_op(inp, shape=[7, 4, 3]) + self.assertEqual(out.type_spec.shape.as_list(), [7, 4, 3]) + + def test_set_shape_error(self): + spec = CustomTypeSpec([3, None], tf.int32) + kt = keras_tensor.KerasTensor(spec) + with self.assertRaisesRegex( + ValueError, "Keras requires TypeSpec to have a `with_shape` method" + ): + kt.set_shape([3, 3]) + + def test_set_shape_equals_expected_shape(self): + # Tests b/203201161: DenseSpec has both a _shape and a _shape_tuple + # field, and we need to be sure both get updated. + kt = keras_tensor.KerasTensor(tf.TensorSpec([8, None], tf.int32)) + kt.set_shape([8, 3]) + self.assertEqual(kt.type_spec, tf.TensorSpec([8, 3], tf.int32)) + + def test_type_spec_with_shape_equals_expected_shape(self): + # Tests b/203201161: DenseSpec has both a _shape and a _shape_tuple + # field, and we need to be sure both get updated. + spec1 = tf.TensorSpec([8, None], tf.int32) + spec2 = keras_tensor.type_spec_with_shape(spec1, [8, 3]) + expected = tf.TensorSpec([8, 3], tf.int32) + self.assertEqual(spec2, expected) + + def test_missing_shape_error(self): + spec = CustomTypeSpec(None, tf.int32) + del spec.shape + with self.assertRaisesRegex( + ValueError, + "KerasTensor only supports TypeSpecs that have a shape field; .*", + ): + keras_tensor.KerasTensor(spec) + + def test_wrong_shape_type_error(self): + spec = CustomTypeSpec(None, tf.int32) + spec.shape = "foo" + with self.assertRaisesRegex( + TypeError, + "KerasTensor requires that wrapped TypeSpec's shape is a " + "TensorShape; .*", + ): + keras_tensor.KerasTensor(spec) + + def test_missing_dtype_error(self): + spec = CustomTypeSpec(None, tf.int32) + del spec.dtype + kt = keras_tensor.KerasTensor(spec) + with self.assertRaisesRegex( + AttributeError, + "KerasTensor wraps TypeSpec .* which does not have a dtype.", + ): + kt.dtype + + def test_wrong_dtype_type_error(self): + spec = CustomTypeSpec(None, tf.int32) + spec.dtype = "foo" + kt = keras_tensor.KerasTensor(spec) + with self.assertRaisesRegex( + TypeError, + "KerasTensor requires that wrapped TypeSpec's dtype is a DType; .*", + ): + kt.dtype + + def test_from_tensor_mask_tensor_is_none(self): + tensor = tf.constant([1.0]) + kt = keras_tensor.keras_tensor_from_tensor(tensor) + self.assertIsNone(getattr(kt, "_keras_mask", None)) - def test_repr_and_string(self): - kt = keras_tensor.KerasTensor( - type_spec=tf.TensorSpec(shape=(1, 2, 3), dtype=tf.float32)) - expected_str = ("KerasTensor(type_spec=TensorSpec(shape=(1, 2, 3), " - "dtype=tf.float32, name=None))") - expected_repr = "" - self.assertEqual(expected_str, str(kt)) - self.assertEqual(expected_repr, repr(kt)) - - kt = keras_tensor.KerasTensor( - type_spec=tf.TensorSpec(shape=(2,), dtype=tf.int32), - inferred_value=[2, 3]) - expected_str = ("KerasTensor(type_spec=TensorSpec(shape=(2,), " - "dtype=tf.int32, name=None), inferred_value=[2, 3])") - expected_repr = ( - "") - self.assertEqual(expected_str, str(kt)) - self.assertEqual(expected_repr, repr(kt)) - - kt = keras_tensor.KerasTensor( - type_spec=tf.SparseTensorSpec( - shape=(1, 2, 3), dtype=tf.float32)) - expected_str = ("KerasTensor(type_spec=SparseTensorSpec(" - "TensorShape([1, 2, 3]), tf.float32))") - expected_repr = ( - "") - self.assertEqual(expected_str, str(kt)) - self.assertEqual(expected_repr, repr(kt)) - - inp = layers.Input(shape=(3, 5)) - kt = layers.Dense(10)(inp) - expected_str = ( - "KerasTensor(type_spec=TensorSpec(shape=(None, 3, 10), " - "dtype=tf.float32, name=None), name='dense/BiasAdd:0', " - "description=\"created by layer 'dense'\")") - expected_repr = ( - "") - self.assertEqual(expected_str, str(kt)) - self.assertEqual(expected_repr, repr(kt)) - - kt = tf.reshape(kt, shape=(3, 5, 2)) - expected_str = ( - "KerasTensor(type_spec=TensorSpec(shape=(3, 5, 2), dtype=tf.float32, " - "name=None), name='tf.reshape/Reshape:0', description=\"created " - "by layer 'tf.reshape'\")") - expected_repr = ("") - self.assertEqual(expected_str, str(kt)) - self.assertEqual(expected_repr, repr(kt)) - - kts = tf.unstack(kt) - for i in range(3): - expected_str = ( - "KerasTensor(type_spec=TensorSpec(shape=(5, 2), dtype=tf.float32, " - "name=None), name='tf.unstack/unstack:%s', description=\"created " - "by layer 'tf.unstack'\")" % (i,)) - expected_repr = ("") - self.assertEqual(expected_str, str(kts[i])) - self.assertEqual(expected_repr, repr(kts[i])) - - @parameterized.parameters( - {"property_name": "values"}, - {"property_name": "indices"}, - {"property_name": "dense_shape"}, - ) - def test_sparse_instance_property(self, property_name): - inp = layers.Input(shape=[3], sparse=True) - out = getattr(inp, property_name) - model = training.Model(inp, out) - - x = tf.SparseTensor([[0, 0], [0, 1], [1, 1], [1, 2]], [1, 2, 3, 4], [2, 3]) - expected_property = getattr(x, property_name) - self.assertAllEqual(model(x), expected_property) - - # Test that it works with serialization and deserialization as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected_property) - - @parameterized.parameters([ - (tf.TensorSpec([2, 3], tf.int32), [2, 3]), - (tf.RaggedTensorSpec([2, None]), [2, None]), - (tf.SparseTensorSpec([8]), [8]), - (CustomTypeSpec([3, 8], tf.int32), [3, 8]), - ]) - def test_shape(self, spec, expected_shape): - kt = keras_tensor.KerasTensor(spec) - self.assertEqual(kt.shape.as_list(), expected_shape) - - @parameterized.parameters([ - (tf.TensorSpec([8, 3], tf.int32), [8, 3], [8, 3]), - (tf.TensorSpec([None, 3], tf.int32), [8, 3], [8, 3]), - (tf.TensorSpec([8, 3], tf.int32), [None, 3], [8, 3]), - (tf.TensorSpec(None, tf.int32), [8, 3], [8, 3]), - (tf.TensorSpec(None, tf.int32), [8, None], [8, None]), - (tf.TensorSpec(None, tf.int32), None, None), - (tf.RaggedTensorSpec([2, None, None]), [2, None, 5], [2, None, 5]), - (tf.SparseTensorSpec([8]), [8], [8]), - (CustomTypeSpec2([3, None], tf.int32), [3, 8], [3, 8]), - ]) - def test_set_shape(self, spec, new_shape, expected_shape): - kt = keras_tensor.KerasTensor(spec) - kt.set_shape(new_shape) - if expected_shape is None: - self.assertIsNone(kt.type_spec.shape.rank) - else: - self.assertEqual(kt.type_spec.shape.as_list(), expected_shape) - self.assertTrue(kt.type_spec.is_compatible_with(spec)) - - def test_set_shape_error(self): - spec = CustomTypeSpec([3, None], tf.int32) - kt = keras_tensor.KerasTensor(spec) - with self.assertRaisesRegex( - ValueError, "Keras requires TypeSpec to have a `with_shape` method"): - kt.set_shape([3, 3]) - - def test_set_shape_equals_expected_shape(self): - # Tests b/203201161: DenseSpec has both a _shape and a _shape_tuple field, - # and we need to be sure both get updated. - kt = keras_tensor.KerasTensor(tf.TensorSpec([8, None], tf.int32)) - kt.set_shape([8, 3]) - self.assertEqual(kt.type_spec, tf.TensorSpec([8, 3], tf.int32)) - - def test_type_spec_with_shape_equals_expected_shape(self): - # Tests b/203201161: DenseSpec has both a _shape and a _shape_tuple field, - # and we need to be sure both get updated. - spec1 = tf.TensorSpec([8, None], tf.int32) - spec2 = keras_tensor.type_spec_with_shape(spec1, [8, 3]) - expected = tf.TensorSpec([8, 3], tf.int32) - self.assertEqual(spec2, expected) - - def test_missing_shape_error(self): - spec = CustomTypeSpec(None, tf.int32) - del spec.shape - with self.assertRaisesRegex( - ValueError, - "KerasTensor only supports TypeSpecs that have a shape field; .*"): - keras_tensor.KerasTensor(spec) - - def test_wrong_shape_type_error(self): - spec = CustomTypeSpec(None, tf.int32) - spec.shape = "foo" - with self.assertRaisesRegex( - TypeError, "KerasTensor requires that wrapped TypeSpec's shape is a " - "TensorShape; .*"): - keras_tensor.KerasTensor(spec) - - def test_missing_dtype_error(self): - spec = CustomTypeSpec(None, tf.int32) - del spec.dtype - kt = keras_tensor.KerasTensor(spec) - with self.assertRaisesRegex( - AttributeError, - "KerasTensor wraps TypeSpec .* which does not have a dtype."): - kt.dtype # pylint: disable=pointless-statement - - def test_wrong_dtype_type_error(self): - spec = CustomTypeSpec(None, tf.int32) - spec.dtype = "foo" - kt = keras_tensor.KerasTensor(spec) - with self.assertRaisesRegex( - TypeError, - "KerasTensor requires that wrapped TypeSpec's dtype is a DType; .*"): - kt.dtype # pylint: disable=pointless-statement + def test_from_tensor_mask_tensor_is_not_none(self): + tensor = tf.constant([1.0]) + tensor._keras_mask = tf.constant([1.0]) + kt = keras_tensor.keras_tensor_from_tensor(tensor) + self.assertIsInstance(kt._keras_mask, keras_tensor.KerasTensor) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/engine/node.py b/keras/engine/node.py index 2647f44d614a..946b9fce32b2 100644 --- a/keras/engine/node.py +++ b/keras/engine/node.py @@ -12,306 +12,333 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -# pylint: disable=g-classes-have-attributes -"""Contains the `Node` class.""" -import tensorflow.compat.v2 as tf + +"""Contains the `Node` class.""" import collections import copy import json + import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import base_layer_utils -from keras.saving.saved_model import json_utils +from keras.saving.legacy.saved_model import json_utils from keras.utils import tf_utils -_CONSTANT_VALUE = '_CONSTANT_VALUE' +_CONSTANT_VALUE = "_CONSTANT_VALUE" # Using dict to avoid conflict with constant string tensor. -_COMPOSITE_TYPE = {'_TYPE': 'COMPOSITE'} +_COMPOSITE_TYPE = {"_TYPE": "COMPOSITE"} class Node: - """A `Node` describes a layer `__call__()` event. - - A Functional model is a DAG with `Node` instances as nodes, and `KerasTensor` - instances as edges. Nodes aren't `Layer` instances, because a single layer - could be called multiple times, which would result in graph cycles. - - A `__call__()` event involves input tensors (and other input arguments), - the layer that was called, and the resulting output tensors. - A `Node` will include all this information. - - Since a single `Layer` could be called multiple times, the `Node` instances - are stored on layers as a list. Each time a layer is called - a node is added to `layer._inbound_nodes`. Each time the output of a layer is - used by another layer, a node is added to `layer._outbound_nodes`. - - Every `KerasTensor` instance has a `KerasHistory` object attached, - which tracks the `Node` that records the `__call__()` event that created - the tensor. By recursively walking through `Node` instances - via the `KerasHistory` metadata of `KerasTensor` instances, once can - retrieve the entire DAG of a Functional model. - - Args: - layer: The layer that was called in the `Layer.__call__()` - event that this node represents. - call_args: The positional arguments the layer was called with. - call_kwargs: The keyword arguments the layer was called with. - outputs: The output tensors of the `Layer.__call__()` - """ - - def __init__(self, - layer, - call_args=None, - call_kwargs=None, - outputs=None): - call_args = [] if call_args is None else call_args - call_kwargs = {} if call_kwargs is None else call_kwargs - outputs = [] if outputs is None else outputs - - self.layer = layer - self.is_input = not call_args and not call_kwargs - - # These arguments are user-provided. Copy the structures here so that - # future user modifications do not affect the node's metadata. - # We copy using map_structure rather than python's shallow or deep copy, - # because the args can be data structures (so shallow copy is - # insufficient), but individual values might not support copy.copy - # or be too expensive to deep copy. - call_args = tf.nest.map_structure(lambda t: t, call_args) - call_kwargs = tf.nest.map_structure(lambda t: t, call_kwargs) - self.outputs = tf.nest.map_structure(lambda t: t, outputs) - self.call_args = call_args - self.call_kwargs = call_kwargs - - # Cached for performance. - self._flat_arguments = tf.nest.flatten((self.call_args, self.call_kwargs)) - # Used to avoid expensive `nest` operations in the most common case. - self._single_positional_tensor_passed = (not self.call_kwargs and len( - self.call_args) == 1 and tf.is_tensor(self.call_args[0])) - - if not tf.compat.v1.executing_eagerly_outside_functions(): - # Create TensorFlowOpLayers if needed (in TF1) - for obj in self._flat_arguments: - if (isinstance(obj, tf.Tensor) and - base_layer_utils.needs_keras_history( - obj, ignore_call_context=True)): - base_layer_utils.create_keras_history(obj) - - self._keras_inputs = [] - self._keras_inputs_ids_and_indices = [] - for i, ele in enumerate(self._flat_arguments): - if is_keras_tensor(ele): - self._keras_inputs.append(ele) - kt_id = str(id(ele)) - kt_index = i - self._keras_inputs_ids_and_indices.append((kt_id, kt_index)) - - # Wire up Node to Layers. - self.layer._inbound_nodes.append(self) - for kt in self.keras_inputs: - inbound_layer = kt._keras_history.layer - if inbound_layer is not None: # `None` for `Input` tensors. - inbound_layer._outbound_nodes.append(self) - - # Set metadata on outputs. - node_index = len(self.layer._inbound_nodes) - 1 - for i, tensor in enumerate(tf.nest.flatten(outputs)): - tensor._keras_history = KerasHistory( - layer=layer, node_index=node_index, tensor_index=i) - - # Cached for performance. - self.flat_input_ids = [str(id(t)) for t in self._keras_inputs] - self.flat_output_ids = [str(id(t)) for t in tf.nest.flatten(self.outputs)] - - @property - def keras_inputs(self): - """Tensors input to this node that can be traced back to a `keras.Input`.""" - return self._keras_inputs - - @property - def parent_nodes(self): - """Returns all the `Node`s whose output this node immediately depends on.""" - node_deps = [] - for kt in self.keras_inputs: - layer = kt._keras_history.layer - node_index = kt._keras_history.node_index - if layer is not None: # `None` for `Input` tensors. - node_deps.append(layer._inbound_nodes[node_index]) - return node_deps - - def iterate_inbound(self): - """Yields tuples representing the data inbound from other nodes. - - Yields: - tuples like: (inbound_layer, node_index, tensor_index, tensor). + """A `Node` describes a layer `__call__()` event. + + A Functional model is a DAG with `Node` instances as nodes, and + `KerasTensor` instances as edges. Nodes aren't `Layer` instances, because a + single layer could be called multiple times, which would result in graph + cycles. + + A `__call__()` event involves input tensors (and other input arguments), + the layer that was called, and the resulting output tensors. + A `Node` will include all this information. + + Since a single `Layer` could be called multiple times, the `Node` instances + are stored on layers as a list. Each time a layer is called a node is added + to `layer._inbound_nodes`. Each time the output of a layer is used by + another layer, a node is added to `layer._outbound_nodes`. + + Every `KerasTensor` instance has a `KerasHistory` object attached, + which tracks the `Node` that records the `__call__()` event that created + the tensor. By recursively walking through `Node` instances + via the `KerasHistory` metadata of `KerasTensor` instances, once can + retrieve the entire DAG of a Functional model. + + Args: + layer: The layer that was called in the `Layer.__call__()` + event that this node represents. + call_args: The positional arguments the layer was called with. + call_kwargs: The keyword arguments the layer was called with. + outputs: The output tensors of the `Layer.__call__()` """ - for kt in self.keras_inputs: - keras_history = kt._keras_history - layer = keras_history.layer - node_index = keras_history.node_index - tensor_index = keras_history.tensor_index - yield layer, node_index, tensor_index, kt - - def map_arguments(self, tensor_dict): - """Maps Keras Tensors to computed Tensors using `tensor_dict`.""" - if self._single_positional_tensor_passed: - # Performance optimization for most common case. - kt_id, _ = self._keras_inputs_ids_and_indices[0] - return (tensor_dict[kt_id].pop(),), {} - else: - flat_arguments = copy.copy(self._flat_arguments) - for kt_id, kt_index in self._keras_inputs_ids_and_indices: - flat_arguments[kt_index] = tensor_dict[kt_id].pop() - - args, kwargs = tf.nest.pack_sequence_as((self.call_args, self.call_kwargs), - flat_arguments) - return args, kwargs - - def serialize(self, make_node_key, node_conversion_map): - """Serializes `Node` for Functional API's `get_config`.""" - # Serialization still special-cases first argument. - args, kwargs = self.call_args, self.call_kwargs - inputs, args, kwargs = self.layer._call_spec.split_out_first_arg( - args, kwargs) - - # Treat everything other than first argument as a kwarg. - arguments = dict(zip(self.layer._call_spec.arg_names[1:], args)) - arguments.update(kwargs) - kwargs = arguments - - def _serialize_keras_tensor(t): - """Serializes a single Tensor passed to `call`.""" - if hasattr(t, '_keras_history'): - kh = t._keras_history - node_index = kh.node_index - node_key = make_node_key(kh.layer.name, node_index) - new_node_index = node_conversion_map.get(node_key, 0) - return [kh.layer.name, new_node_index, kh.tensor_index] - - if isinstance(t, np.ndarray): - return t.tolist() - - if isinstance(t, tf.Tensor): - return backend.get_value(t).tolist() - - # Not using json_utils to serialize both constant Tensor and constant - # CompositeTensor for saving format backward compatibility. - if isinstance(t, tf.__internal__.CompositeTensor): - return (_COMPOSITE_TYPE, json_utils.Encoder().encode(t)) - - return t - - kwargs = tf.nest.map_structure(_serialize_keras_tensor, kwargs) - try: - json.dumps(kwargs, default=json_utils.get_json_type) - except TypeError: - kwarg_types = tf.nest.map_structure(type, kwargs) - raise TypeError('Layer ' + self.layer.name + - ' was passed non-JSON-serializable arguments. ' + - 'Arguments had types: ' + - str(kwarg_types) + '. They cannot be serialized out ' - 'when saving the model.') - - # `kwargs` is added to each Tensor in the first arg. This should be - # changed in a future version of the serialization format. - def serialize_first_arg_tensor(t): - if is_keras_tensor(t): - kh = t._keras_history - node_index = kh.node_index - node_key = make_node_key(kh.layer.name, node_index) - new_node_index = node_conversion_map.get(node_key, 0) - data = [kh.layer.name, new_node_index, kh.tensor_index, kwargs] - else: - # If an element in the first call argument did not originate as a - # keras tensor and is a constant value, we save it using the format - # ['_CONSTANT_VALUE', -1, serialized_tensor_or_python_constant] - # (potentially including serialized kwargs in an optional 4th argument). - data = [_CONSTANT_VALUE, -1, _serialize_keras_tensor(t), kwargs] - return tf_utils.ListWrapper(data) - - data = tf.nest.map_structure(serialize_first_arg_tensor, inputs) - if (not tf.nest.is_nested(data) and - not self.layer._preserve_input_structure_in_config): - data = [data] - data = tf_utils.convert_inner_node_data(data) - return data - - ############################################################# - # Properties for Backwards compatibility. - # These only check the first input argument - # As nodes are internal, they may be removed in the future. - ############################################################# - - @property - def input_tensors(self): - if self.is_input: - return [self.outputs] # Used in `Layer.input`. - return self.call_args[0] - - @property - def output_tensors(self): - if self.is_input: - return [self.outputs] # Used in `Layer.input`. - return self.outputs - - @property - def input_shapes(self): - input_shapes = tf.nest.map_structure(backend.int_shape, self.input_tensors) - if len(input_shapes) == 1 and not self.is_input: - return input_shapes[0] - return input_shapes - - @property - def output_shapes(self): - return tf.nest.map_structure(backend.int_shape, self.output_tensors) - - @property - def outbound_layer(self): - return self.layer - - @property - def inbound_layers(self): - """Return all layers that feed into the current node.""" - if self.is_input: - return [] - tensor_call_args = [x for x in self._flat_arguments - if tf.is_tensor(x) and hasattr(x, '_keras_history')] - inbound_layers = tf.nest.map_structure(lambda t: t._keras_history.layer, - tensor_call_args) - if len(inbound_layers) == 1: - return inbound_layers[0] - return inbound_layers + + def __init__(self, layer, call_args=None, call_kwargs=None, outputs=None): + call_args = [] if call_args is None else call_args + call_kwargs = {} if call_kwargs is None else call_kwargs + outputs = [] if outputs is None else outputs + + self.layer = layer + self.is_input = not call_args and not call_kwargs + + # These arguments are user-provided. Copy the structures here so that + # future user modifications do not affect the node's metadata. + # We copy using map_structure rather than python's shallow or deep copy, + # because the args can be data structures (so shallow copy is + # insufficient), but individual values might not support copy.copy + # or be too expensive to deep copy. + call_args = tf.nest.map_structure(lambda t: t, call_args) + call_kwargs = tf.nest.map_structure(lambda t: t, call_kwargs) + self.outputs = tf.nest.map_structure(lambda t: t, outputs) + self.call_args = call_args + self.call_kwargs = call_kwargs + + # Cached for performance. + self._flat_arguments = tf.nest.flatten( + (self.call_args, self.call_kwargs) + ) + # Used to avoid expensive `nest` operations in the most common case. + self._single_positional_tensor_passed = ( + not self.call_kwargs + and len(self.call_args) == 1 + and tf.is_tensor(self.call_args[0]) + ) + + if not tf.compat.v1.executing_eagerly_outside_functions(): + # Create TensorFlowOpLayers if needed (in TF1) + for obj in self._flat_arguments: + if isinstance( + obj, tf.Tensor + ) and base_layer_utils.needs_keras_history( + obj, ignore_call_context=True + ): + base_layer_utils.create_keras_history(obj) + + self._keras_inputs = [] + self._keras_inputs_ids_and_indices = [] + for i, ele in enumerate(self._flat_arguments): + if is_keras_tensor(ele): + self._keras_inputs.append(ele) + kt_id = str(id(ele)) + kt_index = i + self._keras_inputs_ids_and_indices.append((kt_id, kt_index)) + + # Wire up Node to Layers. + self.layer._inbound_nodes.append(self) + for kt in self.keras_inputs: + inbound_layer = kt._keras_history.layer + if inbound_layer is not None: # `None` for `Input` tensors. + inbound_layer._outbound_nodes.append(self) + + # Set metadata on outputs. + node_index = len(self.layer._inbound_nodes) - 1 + for i, tensor in enumerate(tf.nest.flatten(outputs)): + tensor._keras_history = KerasHistory( + layer=layer, node_index=node_index, tensor_index=i + ) + + # Cached for performance. + self.flat_input_ids = [str(id(t)) for t in self._keras_inputs] + self.flat_output_ids = [ + str(id(t)) for t in tf.nest.flatten(self.outputs) + ] + + @property + def keras_inputs(self): + """Tensors input to this node that can be traced back to a + `keras.Input`.""" + return self._keras_inputs + + @property + def parent_nodes(self): + """Returns all the `Node`s whose output this node immediately depends + on.""" + node_deps = [] + for kt in self.keras_inputs: + layer = kt._keras_history.layer + node_index = kt._keras_history.node_index + if layer is not None: # `None` for `Input` tensors. + node_deps.append(layer._inbound_nodes[node_index]) + return node_deps + + def iterate_inbound(self): + """Yields tuples representing the data inbound from other nodes. + + Yields: + tuples like: (inbound_layer, node_index, tensor_index, tensor). + """ + for kt in self.keras_inputs: + keras_history = kt._keras_history + layer = keras_history.layer + node_index = keras_history.node_index + tensor_index = keras_history.tensor_index + yield layer, node_index, tensor_index, kt + + def map_arguments(self, tensor_dict): + """Maps Keras Tensors to computed Tensors using `tensor_dict`.""" + if self._single_positional_tensor_passed: + # Performance optimization for most common case. + kt_id, _ = self._keras_inputs_ids_and_indices[0] + return (tensor_dict[kt_id].pop(),), {} + else: + flat_arguments = copy.copy(self._flat_arguments) + for kt_id, kt_index in self._keras_inputs_ids_and_indices: + flat_arguments[kt_index] = tensor_dict[kt_id].pop() + + args, kwargs = tf.nest.pack_sequence_as( + (self.call_args, self.call_kwargs), flat_arguments + ) + return args, kwargs + + def serialize(self, make_node_key, node_conversion_map): + """Serializes `Node` for Functional API's `get_config`.""" + # Serialization still special-cases first argument. + args, kwargs = self.call_args, self.call_kwargs + inputs, args, kwargs = self.layer._call_spec.split_out_first_arg( + args, kwargs + ) + + # Treat everything other than first argument as a kwarg. + arguments = dict(zip(self.layer._call_spec.arg_names[1:], args)) + arguments.update(kwargs) + kwargs = arguments + + def _serialize_keras_tensor(t): + """Serializes a single Tensor passed to `call`.""" + if hasattr(t, "_keras_history"): + kh = t._keras_history + node_index = kh.node_index + node_key = make_node_key(kh.layer.name, node_index) + new_node_index = node_conversion_map.get(node_key, 0) + return [kh.layer.name, new_node_index, kh.tensor_index] + + if isinstance(t, np.ndarray): + return t.tolist() + + if isinstance(t, tf.Tensor): + return backend.get_value(t).tolist() + + # Not using json_utils to serialize both constant Tensor and + # constant CompositeTensor for saving format backward compatibility. + if isinstance(t, tf.__internal__.CompositeTensor): + return (_COMPOSITE_TYPE, json_utils.Encoder().encode(t)) + + return t + + kwargs = tf.nest.map_structure(_serialize_keras_tensor, kwargs) + try: + json.dumps(kwargs, default=json_utils.get_json_type) + except TypeError: + kwarg_types = tf.nest.map_structure(type, kwargs) + raise TypeError( + "Layer " + + self.layer.name + + " was passed non-JSON-serializable arguments. " + + "Arguments had types: " + + str(kwarg_types) + + ". They cannot be serialized out when saving the model." + ) + + # `kwargs` is added to each Tensor in the first arg. This should be + # changed in a future version of the serialization format. + def serialize_first_arg_tensor(t): + if is_keras_tensor(t): + kh = t._keras_history + node_index = kh.node_index + node_key = make_node_key(kh.layer.name, node_index) + new_node_index = node_conversion_map.get(node_key, 0) + data = [kh.layer.name, new_node_index, kh.tensor_index, kwargs] + else: + # If an element in the first call argument did not originate as + # a keras tensor and is a constant value, we save it using the + # format ['_CONSTANT_VALUE', -1, + # serialized_tensor_or_python_constant] (potentially including + # serialized kwargs in an optional 4th argument). + data = [_CONSTANT_VALUE, -1, _serialize_keras_tensor(t), kwargs] + return tf_utils.ListWrapper(data) + + data = tf.nest.map_structure(serialize_first_arg_tensor, inputs) + if ( + not tf.nest.is_nested(data) + and not self.layer._preserve_input_structure_in_config + ): + data = [data] + data = tf_utils.convert_inner_node_data(data) + return data + + ############################################################# + # Properties for Backwards compatibility. + # These only check the first input argument + # As nodes are internal, they may be removed in the future. + ############################################################# + + @property + def input_tensors(self): + if self.is_input: + return [self.outputs] # Used in `Layer.input`. + return self.call_args[0] + + @property + def output_tensors(self): + if self.is_input: + return [self.outputs] # Used in `Layer.input`. + return self.outputs + + @property + def input_shapes(self): + input_shapes = tf.nest.map_structure( + backend.int_shape, self.input_tensors + ) + if len(input_shapes) == 1 and not self.is_input: + return input_shapes[0] + return input_shapes + + @property + def output_shapes(self): + return tf.nest.map_structure(backend.int_shape, self.output_tensors) + + @property + def outbound_layer(self): + return self.layer + + @property + def inbound_layers(self): + """Return all layers that feed into the current node.""" + if self.is_input: + return [] + tensor_call_args = [ + x + for x in self._flat_arguments + if tf.is_tensor(x) and hasattr(x, "_keras_history") + ] + inbound_layers = tf.nest.map_structure( + lambda t: t._keras_history.layer, tensor_call_args + ) + if len(inbound_layers) == 1: + return inbound_layers[0] + return inbound_layers class KerasHistory( - collections.namedtuple('KerasHistory', - ['layer', 'node_index', 'tensor_index'])): - """Tracks the Layer call that created a Tensor, for Keras Graph Networks. - - During construction of Keras Graph Networks, this metadata is added to - each Tensor produced as the output of a Layer, starting with an - `InputLayer`. This allows Keras to track how each Tensor was produced, and - this information is later retraced by the `keras.engine.Network` class to - reconstruct the Keras Graph Network. - - Attributes: - layer: The Layer that produced the Tensor. - node_index: The specific call to the Layer that produced this Tensor. Layers - can be called multiple times in order to share weights. A new node is - created every time a Layer is called. The corresponding node that - represents the call event that produced the Tensor can be found at - `layer._inbound_nodes[node_index]`. - tensor_index: The output index for this Tensor. Always zero if the Layer - that produced this Tensor only has one output. Nested structures of - Tensors are deterministically assigned an index via `nest.flatten`. - """ - # Added to maintain memory and performance characteristics of `namedtuple` - # while subclassing. - __slots__ = () + collections.namedtuple( + "KerasHistory", ["layer", "node_index", "tensor_index"] + ) +): + """Tracks the Layer call that created a Tensor, for Keras Graph Networks. + + During construction of Keras Graph Networks, this metadata is added to + each Tensor produced as the output of a Layer, starting with an + `InputLayer`. This allows Keras to track how each Tensor was produced, and + this information is later retraced by the `keras.engine.Network` class to + reconstruct the Keras Graph Network. + + Attributes: + layer: The Layer that produced the Tensor. + node_index: The specific call to the Layer that produced this Tensor. + Layers can be called multiple times in order to share weights. A new + node is created every time a Layer is called. The corresponding node + that represents the call event that produced the Tensor can be found at + `layer._inbound_nodes[node_index]`. + tensor_index: The output index for this Tensor. Always zero if the Layer + that produced this Tensor only has one output. Nested structures of + Tensors are deterministically assigned an index via `nest.flatten`. + """ + + # Added to maintain memory and performance characteristics of `namedtuple` + # while subclassing. + __slots__ = () def is_keras_tensor(obj): - return hasattr(obj, '_keras_history') + return hasattr(obj, "_keras_history") diff --git a/keras/engine/node_test.py b/keras/engine/node_test.py index 4f2c30590433..5fa822e30131 100644 --- a/keras/engine/node_test.py +++ b/keras/engine/node_test.py @@ -11,148 +11,162 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#,============================================================================ +# ,============================================================================ """Tests for layer graphs construction & handling.""" +import tensorflow.compat.v2 as tf + from keras.engine import base_layer from keras.engine import node as node_module from keras.testing_infra import test_combinations -import tensorflow.compat.v2 as tf class DummyTensor(tf.__internal__.types.Tensor): + def __init__(self, shape=None): + self._shape = shape - def __init__(self, shape=None): - self._shape = shape - - @property - def shape(self): - return self._shape + @property + def shape(self): + return self._shape class DummyLayer(base_layer.Layer): - pass + pass class NetworkConstructionTest(test_combinations.TestCase): - - def test_chained_node_construction(self): - # test basics - a = DummyTensor(shape=(None, 32)) - b = DummyTensor(shape=(None, 32)) - - a_layer = DummyLayer() - node = node_module.Node(a_layer, outputs=a) - self.assertEqual(node.outbound_layer, a_layer) - - self.assertTrue(node.is_input) - self.assertListEqual(node.inbound_layers, []) - self.assertListEqual(node.input_tensors, [a]) - self.assertListEqual(node.input_shapes, [(None, 32)]) - self.assertListEqual(node.output_tensors, [a]) - self.assertListEqual(node.output_shapes, [(None, 32)]) - - b_layer = DummyLayer() - node_module.Node(b_layer, outputs=b) - - dense = DummyLayer() - a_2 = DummyTensor() - node_a = node_module.Node(layer=dense, call_args=(a,), outputs=a_2) - b_2 = DummyTensor() - node_b = node_module.Node(layer=dense, call_args=(b,), outputs=b_2) - - # test the node attributes - self.assertFalse(node_a.is_input) - self.assertFalse(node_b.is_input) - self.assertEqual(node_a.call_args, (a,)) - self.assertEqual(node_a.call_kwargs, {}) - self.assertEqual(node_a.outputs, a_2) - - # Test the layer wiring - self.assertLen(dense._inbound_nodes, 2) - self.assertLen(dense._outbound_nodes, 0) - self.assertEqual(dense._inbound_nodes, [node_a, node_b]) - self.assertEqual(dense._inbound_nodes[0].inbound_layers, a_layer) - self.assertEqual(dense._inbound_nodes[0].outbound_layer, dense) - self.assertEqual(dense._inbound_nodes[1].inbound_layers, b_layer) - self.assertEqual(dense._inbound_nodes[1].outbound_layer, dense) - self.assertIs(dense._inbound_nodes[0].input_tensors, a) - self.assertIs(dense._inbound_nodes[1].input_tensors, b) - - def test_multi_input_node(self): - # test multi-input layer - a = DummyTensor() - b = DummyTensor() - - dense = DummyLayer() - a_2 = DummyTensor() - node_module.Node(layer=dense, call_args=(a,), outputs=a_2) - b_2 = DummyTensor() - node_module.Node(layer=dense, call_args=(b,), outputs=b_2) - - concat_layer = DummyLayer() - merged = DummyTensor() - node_module.Node(layer=concat_layer, call_args=([a_2, b_2],), - outputs=merged) - - merge_layer, merge_node_index, merge_tensor_index = merged._keras_history - - self.assertEqual(merge_node_index, 0) - self.assertEqual(merge_tensor_index, 0) - - self.assertLen(merge_layer._inbound_nodes, 1) - self.assertLen(merge_layer._outbound_nodes, 0) - - self.assertLen(merge_layer._inbound_nodes[0].input_tensors, 2) - self.assertEqual(merge_layer._inbound_nodes[0].input_tensors, [a_2, b_2]) - self.assertLen(merge_layer._inbound_nodes[0].inbound_layers, 2) - - def test_arg_and_kwarg_mix(self): - input_layer = DummyLayer() - input_layer_2 = DummyLayer() - a = DummyTensor() - node_a = node_module.Node(layer=input_layer, outputs=a) - b = DummyTensor() - node_b = node_module.Node(layer=input_layer_2, outputs=b) - - arg_2 = DummyTensor() - arg_3 = DummyTensor() - node_c = node_module.Node(layer=input_layer, outputs=arg_3) - - kwarg_x = DummyTensor() - kwarg_y = DummyTensor() - node_d = node_module.Node(layer=input_layer, outputs=kwarg_y) - - merge_layer = DummyLayer() - merged = DummyTensor() - node = node_module.Node(layer=merge_layer, - call_args=([a, b], arg_2, arg_3), - call_kwargs={'x': kwarg_x, 'y': kwarg_y}, - outputs=merged) - - merge_layer, merge_node_index, merge_tensor_index = merged._keras_history - - # Check the saved call args/kwargs - self.assertEqual(([a, b], arg_2, arg_3), node.call_args) - self.assertEqual({'x': kwarg_x, 'y': kwarg_y}, node.call_kwargs) - - # Only the inputs that were produced by input nodes should appear in - # keras_tensors - self.assertEqual({a, b, arg_3, kwarg_y}, set(node.keras_inputs)) - self.assertEqual(set(node.parent_nodes), {node_a, node_b, node_c, node_d}) - - # Check the layer wirings - self.assertEqual(merge_node_index, 0) - self.assertEqual(merge_tensor_index, 0) - self.assertLen(merge_layer._inbound_nodes, 1) - self.assertLen(merge_layer._outbound_nodes, 0) - self.assertLen(input_layer._outbound_nodes, 3) - self.assertLen(input_layer_2._outbound_nodes, 1) - - self.assertLen(merge_layer._inbound_nodes[0].input_tensors, 2) - self.assertEqual(merge_layer._inbound_nodes[0].input_tensors, [a, b]) - self.assertLen(merge_layer._inbound_nodes[0].inbound_layers, 4) - - -if __name__ == '__main__': - tf.test.main() + def test_chained_node_construction(self): + # test basics + a = DummyTensor(shape=(None, 32)) + b = DummyTensor(shape=(None, 32)) + + a_layer = DummyLayer() + node = node_module.Node(a_layer, outputs=a) + self.assertEqual(node.outbound_layer, a_layer) + + self.assertTrue(node.is_input) + self.assertListEqual(node.inbound_layers, []) + self.assertListEqual(node.input_tensors, [a]) + self.assertListEqual(node.input_shapes, [(None, 32)]) + self.assertListEqual(node.output_tensors, [a]) + self.assertListEqual(node.output_shapes, [(None, 32)]) + + b_layer = DummyLayer() + node_module.Node(b_layer, outputs=b) + + dense = DummyLayer() + a_2 = DummyTensor() + node_a = node_module.Node(layer=dense, call_args=(a,), outputs=a_2) + b_2 = DummyTensor() + node_b = node_module.Node(layer=dense, call_args=(b,), outputs=b_2) + + # test the node attributes + self.assertFalse(node_a.is_input) + self.assertFalse(node_b.is_input) + self.assertEqual(node_a.call_args, (a,)) + self.assertEqual(node_a.call_kwargs, {}) + self.assertEqual(node_a.outputs, a_2) + + # Test the layer wiring + self.assertLen(dense._inbound_nodes, 2) + self.assertLen(dense._outbound_nodes, 0) + self.assertEqual(dense._inbound_nodes, [node_a, node_b]) + self.assertEqual(dense._inbound_nodes[0].inbound_layers, a_layer) + self.assertEqual(dense._inbound_nodes[0].outbound_layer, dense) + self.assertEqual(dense._inbound_nodes[1].inbound_layers, b_layer) + self.assertEqual(dense._inbound_nodes[1].outbound_layer, dense) + self.assertIs(dense._inbound_nodes[0].input_tensors, a) + self.assertIs(dense._inbound_nodes[1].input_tensors, b) + + def test_multi_input_node(self): + # test multi-input layer + a = DummyTensor() + b = DummyTensor() + + dense = DummyLayer() + a_2 = DummyTensor() + node_module.Node(layer=dense, call_args=(a,), outputs=a_2) + b_2 = DummyTensor() + node_module.Node(layer=dense, call_args=(b,), outputs=b_2) + + concat_layer = DummyLayer() + merged = DummyTensor() + node_module.Node( + layer=concat_layer, call_args=([a_2, b_2],), outputs=merged + ) + + ( + merge_layer, + merge_node_index, + merge_tensor_index, + ) = merged._keras_history + + self.assertEqual(merge_node_index, 0) + self.assertEqual(merge_tensor_index, 0) + + self.assertLen(merge_layer._inbound_nodes, 1) + self.assertLen(merge_layer._outbound_nodes, 0) + + self.assertLen(merge_layer._inbound_nodes[0].input_tensors, 2) + self.assertEqual( + merge_layer._inbound_nodes[0].input_tensors, [a_2, b_2] + ) + self.assertLen(merge_layer._inbound_nodes[0].inbound_layers, 2) + + def test_arg_and_kwarg_mix(self): + input_layer = DummyLayer() + input_layer_2 = DummyLayer() + a = DummyTensor() + node_a = node_module.Node(layer=input_layer, outputs=a) + b = DummyTensor() + node_b = node_module.Node(layer=input_layer_2, outputs=b) + + arg_2 = DummyTensor() + arg_3 = DummyTensor() + node_c = node_module.Node(layer=input_layer, outputs=arg_3) + + kwarg_x = DummyTensor() + kwarg_y = DummyTensor() + node_d = node_module.Node(layer=input_layer, outputs=kwarg_y) + + merge_layer = DummyLayer() + merged = DummyTensor() + node = node_module.Node( + layer=merge_layer, + call_args=([a, b], arg_2, arg_3), + call_kwargs={"x": kwarg_x, "y": kwarg_y}, + outputs=merged, + ) + + ( + merge_layer, + merge_node_index, + merge_tensor_index, + ) = merged._keras_history + + # Check the saved call args/kwargs + self.assertEqual(([a, b], arg_2, arg_3), node.call_args) + self.assertEqual({"x": kwarg_x, "y": kwarg_y}, node.call_kwargs) + + # Only the inputs that were produced by input nodes should appear in + # keras_tensors + self.assertEqual({a, b, arg_3, kwarg_y}, set(node.keras_inputs)) + self.assertEqual( + set(node.parent_nodes), {node_a, node_b, node_c, node_d} + ) + + # Check the layer wirings + self.assertEqual(merge_node_index, 0) + self.assertEqual(merge_tensor_index, 0) + self.assertLen(merge_layer._inbound_nodes, 1) + self.assertLen(merge_layer._outbound_nodes, 0) + self.assertLen(input_layer._outbound_nodes, 3) + self.assertLen(input_layer_2._outbound_nodes, 1) + + self.assertLen(merge_layer._inbound_nodes[0].input_tensors, 2) + self.assertEqual(merge_layer._inbound_nodes[0].input_tensors, [a, b]) + self.assertLen(merge_layer._inbound_nodes[0].inbound_layers, 4) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/partial_batch_padding_handler.py b/keras/engine/partial_batch_padding_handler.py index 998526f6c1c5..a67fa70de6d1 100644 --- a/keras/engine/partial_batch_padding_handler.py +++ b/keras/engine/partial_batch_padding_handler.py @@ -14,92 +14,101 @@ # ============================================================================== """Utility object to handler partial batches for TPUStrategy.""" +import numpy as np import tensorflow.compat.v2 as tf -# pylint: disable=protected-access -import numpy as np from keras import backend class PartialBatchPaddingHandler: - """A container that holds info about partial batches for `predict()`.""" - - def __init__(self, output_shape): - self.padded_batch_size = 0 - self.padding_mask = tf.zeros(0) - self.output_shape = output_shape - - def get_real_batch_size(self, dataset_batch): - """Returns the number of elements in a potentially partial batch.""" - if isinstance(dataset_batch, (tuple, list)): - dataset_batch = dataset_batch[0] - - assert tf.nest.flatten(dataset_batch) - - def _find_any_tensor(batch_features): - tensors = [ - x for x in tf.nest.flatten(batch_features) if tf.is_tensor(x) - ] - if not tensors: - raise ValueError('Cannot find any Tensor in features dict.') - return tensors[0] - - return backend.cast(backend.shape(_find_any_tensor(dataset_batch))[0], - dtype='int64') - - def update_mask(self, padding_mask, dataset_batch): - """Calculate and cache the amount of padding required for a batch.""" - original_batch_size = self.get_real_batch_size(dataset_batch) - missing_count = self.padded_batch_size - original_batch_size - mask = backend.concatenate([tf.ones(original_batch_size), - tf.zeros(missing_count)], axis=0) - return backend.concatenate([padding_mask, mask], axis=0) - - def pad_batch(self, *dataset_batch_elements): - """Pads out the batch dimension of a tensor to the complete batch size.""" - def _pad(batch): - """Helper function to pad nested data within each batch elements.""" - padded_dict_batch = {} - if isinstance(batch, dict): - for key, value in batch.items(): - padded_dict_batch[key] = _pad(value) - return padded_dict_batch - - rank = len(batch.shape) - assert rank > 0 - missing_count = (self.padded_batch_size - - self.get_real_batch_size(batch)) - padding = backend.stack([[0, missing_count]] + [[0, 0]] * (rank - 1)) - return tf.pad(batch, padding, 'constant') - - if len(dataset_batch_elements) == 1: - return _pad(dataset_batch_elements[0]) - - batch_elements = [] - for batch_element in dataset_batch_elements: - batch_elements.append(_pad(batch_element)) - return tuple(batch_elements) - - def apply_mask(self, prediction_result): - """Removes prediction output that corresponds to padded input.""" - padding_mask = backend.get_value(self.padding_mask) - assert len(padding_mask.shape) == 1 - - if len(self.output_shape) == 1: - prediction = np.take(prediction_result, - np.nonzero( - padding_mask[:len(prediction_result)]), - axis=0) - if prediction.shape[0] == 1: - prediction = np.squeeze(prediction, axis=0) - return prediction - - else: - predictions = [] - for i in range(len(self.output_shape)): - prediction = prediction_result[i] - prediction = np.take(prediction, np.nonzero( - padding_mask[:len(prediction)]), axis=0) - predictions.append(np.squeeze(prediction)) - - return predictions + """A container that holds info about partial batches for `predict()`.""" + + def __init__(self, output_shape): + self.padded_batch_size = 0 + self.padding_mask = tf.zeros(0) + self.output_shape = output_shape + + def get_real_batch_size(self, dataset_batch): + """Returns the number of elements in a potentially partial batch.""" + if isinstance(dataset_batch, (tuple, list)): + dataset_batch = dataset_batch[0] + + assert tf.nest.flatten(dataset_batch) + + def _find_any_tensor(batch_features): + tensors = [ + x for x in tf.nest.flatten(batch_features) if tf.is_tensor(x) + ] + if not tensors: + raise ValueError("Cannot find any Tensor in features dict.") + return tensors[0] + + return backend.cast( + backend.shape(_find_any_tensor(dataset_batch))[0], dtype="int64" + ) + + def update_mask(self, padding_mask, dataset_batch): + """Calculate and cache the amount of padding required for a batch.""" + original_batch_size = self.get_real_batch_size(dataset_batch) + missing_count = self.padded_batch_size - original_batch_size + mask = backend.concatenate( + [tf.ones(original_batch_size), tf.zeros(missing_count)], axis=0 + ) + return backend.concatenate([padding_mask, mask], axis=0) + + def pad_batch(self, *dataset_batch_elements): + """Pads the batch dimension of a tensor to the complete batch size.""" + + def _pad(batch): + """Helper function to pad nested data within each batch elements.""" + padded_dict_batch = {} + if isinstance(batch, dict): + for key, value in batch.items(): + padded_dict_batch[key] = _pad(value) + return padded_dict_batch + + rank = len(batch.shape) + assert rank > 0 + missing_count = self.padded_batch_size - self.get_real_batch_size( + batch + ) + padding = backend.stack( + [[0, missing_count]] + [[0, 0]] * (rank - 1) + ) + return tf.pad(batch, padding, "constant") + + if len(dataset_batch_elements) == 1: + return _pad(dataset_batch_elements[0]) + + batch_elements = [] + for batch_element in dataset_batch_elements: + batch_elements.append(_pad(batch_element)) + return tuple(batch_elements) + + def apply_mask(self, prediction_result): + """Removes prediction output that corresponds to padded input.""" + padding_mask = backend.get_value(self.padding_mask) + assert len(padding_mask.shape) == 1 + + if len(self.output_shape) == 1: + prediction = np.take( + prediction_result, + np.nonzero(padding_mask[: len(prediction_result)]), + axis=0, + ) + if prediction.shape[0] == 1: + prediction = np.squeeze(prediction, axis=0) + return prediction + + else: + predictions = [] + for i in range(len(self.output_shape)): + prediction = prediction_result[i] + prediction = np.take( + prediction, + np.nonzero(padding_mask[: len(prediction)]), + axis=0, + ) + predictions.append(np.squeeze(prediction)) + + return predictions diff --git a/keras/engine/ragged_keras_tensor_test.py b/keras/engine/ragged_keras_tensor_test.py index c31908b05c47..cad4e02e281b 100644 --- a/keras/engine/ragged_keras_tensor_test.py +++ b/keras/engine/ragged_keras_tensor_test.py @@ -14,365 +14,357 @@ # ============================================================================== """RaggedKerasTensor tests.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations + from keras import layers -from keras.testing_infra import test_utils from keras.engine import training +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils @test_utils.run_v2_only class RaggedKerasTensorTest(test_combinations.TestCase): - - @parameterized.parameters( - {'batch_size': None, 'shape': (None, 5), 'ragged_rank': 1}, - {'batch_size': None, 'shape': (None, 3, 5), 'ragged_rank': 1}, - {'batch_size': None, 'shape': (5, None), 'ragged_rank': 2}, - {'batch_size': None, 'shape': (3, 5, None), 'ragged_rank': 3}, - {'batch_size': None, 'shape': (None, 3, 5, None), 'ragged_rank': 4}, - {'batch_size': None, 'shape': (2, 3, None, 4, 5, None), 'ragged_rank': 6}, - {'batch_size': 8, 'shape': (None, 5), 'ragged_rank': 1}, - {'batch_size': 9, 'shape': (None, 3, 5), 'ragged_rank': 1}, - {'batch_size': 1, 'shape': (5, None), 'ragged_rank': 2}, - {'batch_size': 4, 'shape': (3, 5, None), 'ragged_rank': 3}, - {'batch_size': 7, 'shape': (None, 3, 5, None), 'ragged_rank': 4}, - {'batch_size': 12, 'shape': (2, 3, None, 4, 5, None), 'ragged_rank': 6}, - ) - def test_to_placeholder(self, shape, batch_size, ragged_rank): - inp = layers.Input(shape=shape, batch_size=batch_size, ragged=True) - self.assertEqual(inp.ragged_rank, ragged_rank) - self.assertAllEqual(inp.shape, [batch_size] + list(shape)) - with tf.__internal__.FuncGraph('test').as_default(): - placeholder = inp._to_placeholder() - self.assertEqual(placeholder.ragged_rank, ragged_rank) - self.assertAllEqual(placeholder.shape, [batch_size] + list(shape)) - - def test_add(self): - inp = layers.Input(shape=[None], ragged=True) - out = inp + inp - model = training.Model(inp, out) - - x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) - self.assertAllEqual(model(x), x + x) - - def test_mul(self): - inp = layers.Input(shape=[None], ragged=True) - out = inp * inp - model = training.Model(inp, out) - - x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) - self.assertAllEqual(model(x), x * x) - - def test_sub(self): - inp = layers.Input(shape=[None], ragged=True) - out = inp - inp - model = training.Model(inp, out) - - x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) - self.assertAllEqual(model(x), x - x) - - def test_div(self): - inp = layers.Input(shape=[None], ragged=True) - out = inp / inp - model = training.Model(inp, out) - - x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) - self.assertAllEqual(model(x), x / x) - - def test_getitem(self): - # Test slicing / getitem - inp = layers.Input(shape=(None, 2), ragged=True) - out = inp[:, :2] - model = training.Model(inp, out) - - x = tf.RaggedTensor.from_row_lengths( - tf.cast(np.random.randn(6, 2), dtype=tf.float32), [3, 1, 2]) - expected = x[:, :2] - - self.assertAllEqual(model(x), expected) - - # Test that models w/ slicing are correctly serialized/deserialized - config = model.get_config() - model = training.Model.from_config(config) - - self.assertAllEqual(model(x), expected) - - @parameterized.parameters( - {'property_name': 'values'}, - {'property_name': 'flat_values'}, - {'property_name': 'row_splits'}, - {'property_name': 'nested_row_splits'}, - ) - def test_instance_property(self, property_name): - inp = layers.Input(shape=[None], ragged=True) - out = getattr(inp, property_name) - model = training.Model(inp, out) - - x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) - expected_property = getattr(x, property_name) - self.assertAllEqual(model(x), expected_property) - - # Test that it works with serialization and deserialization as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected_property) - - @parameterized.parameters( - {'name': 'value_rowids'}, - {'name': 'nested_value_rowids'}, - {'name': 'nrows'}, - {'name': 'row_starts'}, - {'name': 'row_limits'}, - {'name': 'row_lengths'}, - {'name': 'nested_row_lengths'}, - {'name': 'bounding_shape'}, - { - 'name': 'with_values', - 'args': [[1, 2, 3, 4, 5, 6]] - }, - { - 'name': 'with_flat_values', - 'kwargs': { - 'new_values': [1, 2, 3, 4, 5, 6] - } - }, - { - 'name': 'with_row_splits_dtype', - 'kwargs': { - 'dtype': tf.int32 - } - }, - { - 'name': 'merge_dims', - 'args': [0], - 'kwargs': { - 'inner_axis': 1 - } - }, - {'name': 'to_tensor'}, - {'name': 'to_sparse'}, - ) - def test_instance_method(self, name, args=None, kwargs=None): - if not args: - args = [] - if not kwargs: - kwargs = {} - - inp = layers.Input(shape=[None], ragged=True) - out = getattr(inp, name)(*args, **kwargs) - model = training.Model(inp, out) - - x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) - expected_property = getattr(x, name)(*args, **kwargs) - # We expand composites before checking equality because - # assertAllEqual otherwise wouldn't work for SparseTensor outputs - for a, b in zip(tf.nest.flatten(model(x), expand_composites=True), - tf.nest.flatten(expected_property, expand_composites=True)): - self.assertAllEqual(a, b) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - for a, b in zip(tf.nest.flatten(model2(x), expand_composites=True), - tf.nest.flatten(expected_property, expand_composites=True)): - self.assertAllEqual(a, b) + @parameterized.parameters( + {"batch_size": None, "shape": (None, 5), "ragged_rank": 1}, + {"batch_size": None, "shape": (None, 3, 5), "ragged_rank": 1}, + {"batch_size": None, "shape": (5, None), "ragged_rank": 2}, + {"batch_size": None, "shape": (3, 5, None), "ragged_rank": 3}, + {"batch_size": None, "shape": (None, 3, 5, None), "ragged_rank": 4}, + { + "batch_size": None, + "shape": (2, 3, None, 4, 5, None), + "ragged_rank": 6, + }, + {"batch_size": 8, "shape": (None, 5), "ragged_rank": 1}, + {"batch_size": 9, "shape": (None, 3, 5), "ragged_rank": 1}, + {"batch_size": 1, "shape": (5, None), "ragged_rank": 2}, + {"batch_size": 4, "shape": (3, 5, None), "ragged_rank": 3}, + {"batch_size": 7, "shape": (None, 3, 5, None), "ragged_rank": 4}, + {"batch_size": 12, "shape": (2, 3, None, 4, 5, None), "ragged_rank": 6}, + ) + def test_to_placeholder(self, shape, batch_size, ragged_rank): + inp = layers.Input(shape=shape, batch_size=batch_size, ragged=True) + self.assertEqual(inp.ragged_rank, ragged_rank) + self.assertAllEqual(inp.shape, [batch_size] + list(shape)) + with tf.__internal__.FuncGraph("test").as_default(): + placeholder = inp._to_placeholder() + self.assertEqual(placeholder.ragged_rank, ragged_rank) + self.assertAllEqual(placeholder.shape, [batch_size] + list(shape)) + + def test_add(self): + inp = layers.Input(shape=[None], ragged=True) + out = inp + inp + model = training.Model(inp, out) + + x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) + self.assertAllEqual(model(x), x + x) + + def test_mul(self): + inp = layers.Input(shape=[None], ragged=True) + out = inp * inp + model = training.Model(inp, out) + + x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) + self.assertAllEqual(model(x), x * x) + + def test_sub(self): + inp = layers.Input(shape=[None], ragged=True) + out = inp - inp + model = training.Model(inp, out) + + x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) + self.assertAllEqual(model(x), x - x) + + def test_div(self): + inp = layers.Input(shape=[None], ragged=True) + out = inp / inp + model = training.Model(inp, out) + + x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) + self.assertAllEqual(model(x), x / x) + + def test_getitem(self): + # Test slicing / getitem + inp = layers.Input(shape=(None, 2), ragged=True) + out = inp[:, :2] + model = training.Model(inp, out) + + x = tf.RaggedTensor.from_row_lengths( + tf.cast(np.random.randn(6, 2), dtype=tf.float32), [3, 1, 2] + ) + expected = x[:, :2] + + self.assertAllEqual(model(x), expected) + + # Test that models w/ slicing are correctly serialized/deserialized + config = model.get_config() + model = training.Model.from_config(config) + + self.assertAllEqual(model(x), expected) + + @parameterized.parameters( + {"property_name": "values"}, + {"property_name": "flat_values"}, + {"property_name": "row_splits"}, + {"property_name": "nested_row_splits"}, + ) + def test_instance_property(self, property_name): + inp = layers.Input(shape=[None], ragged=True) + out = getattr(inp, property_name) + model = training.Model(inp, out) + + x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) + expected_property = getattr(x, property_name) + self.assertAllEqual(model(x), expected_property) + + # Test that it works with serialization and deserialization as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected_property) + + @parameterized.parameters( + {"name": "value_rowids"}, + {"name": "nested_value_rowids"}, + {"name": "nrows"}, + {"name": "row_starts"}, + {"name": "row_limits"}, + {"name": "row_lengths"}, + {"name": "nested_row_lengths"}, + {"name": "bounding_shape"}, + {"name": "with_values", "args": [[1, 2, 3, 4, 5, 6]]}, + { + "name": "with_flat_values", + "kwargs": {"new_values": [1, 2, 3, 4, 5, 6]}, + }, + {"name": "with_row_splits_dtype", "kwargs": {"dtype": tf.int32}}, + {"name": "merge_dims", "args": [0], "kwargs": {"inner_axis": 1}}, + {"name": "to_tensor"}, + {"name": "to_sparse"}, + ) + def test_instance_method(self, name, args=None, kwargs=None): + if not args: + args = [] + if not kwargs: + kwargs = {} + + inp = layers.Input(shape=[None], ragged=True) + out = getattr(inp, name)(*args, **kwargs) + model = training.Model(inp, out) + + x = tf.ragged.constant([[3, 4], [1, 2], [3, 5]]) + expected_property = getattr(x, name)(*args, **kwargs) + # We expand composites before checking equality because + # assertAllEqual otherwise wouldn't work for SparseTensor outputs + for a, b in zip( + tf.nest.flatten(model(x), expand_composites=True), + tf.nest.flatten(expected_property, expand_composites=True), + ): + self.assertAllEqual(a, b) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + for a, b in zip( + tf.nest.flatten(model2(x), expand_composites=True), + tf.nest.flatten(expected_property, expand_composites=True), + ): + self.assertAllEqual(a, b) @test_utils.run_v2_only class RaggedTensorClassMethodAsLayerTest(test_combinations.TestCase): - - def test_from_value_rowids(self): - inp = layers.Input(shape=[None]) - out = tf.RaggedTensor.from_value_rowids( - inp, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5) - model = training.Model(inp, out) - - x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) - expected = tf.RaggedTensor.from_value_rowids( - x, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_row_splits(self): - inp = layers.Input(shape=[None]) - out = tf.RaggedTensor.from_row_splits( - inp, row_splits=[0, 4, 4, 7, 8, 8]) - model = training.Model(inp, out) - - x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) - expected = tf.RaggedTensor.from_row_splits( - x, row_splits=[0, 4, 4, 7, 8, 8]) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_row_lengths(self): - inp = layers.Input(shape=[None]) - out = tf.RaggedTensor.from_row_lengths( - inp, row_lengths=[4, 0, 3, 1, 0]) - model = training.Model(inp, out) - - x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) - expected = tf.RaggedTensor.from_row_lengths( - x, row_lengths=[4, 0, 3, 1, 0]) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_row_starts(self): - inp = layers.Input(shape=[None]) - out = tf.RaggedTensor.from_row_starts( - inp, row_starts=[0, 4, 4, 7, 8]) - model = training.Model(inp, out) - - x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) - expected = tf.RaggedTensor.from_row_starts( - x, row_starts=[0, 4, 4, 7, 8]) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_row_limits(self): - row_limits = tf.constant([2, 2, 5, 6, 7], tf.int64) - - inp = layers.Input(shape=[None], dtype=tf.string) - out = tf.RaggedTensor.from_row_limits( - inp, row_limits, validate=False) - model = training.Model(inp, out) - - x = tf.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g']) - expected = tf.RaggedTensor.from_row_limits( - x, row_limits, validate=False) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_uniform_row_length(self): - inp = layers.Input(shape=[None]) - out = tf.RaggedTensor.from_uniform_row_length(inp, 2, 8) - model = training.Model(inp, out) - - x = tf.constant( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) - expected = tf.RaggedTensor.from_uniform_row_length(x, 2, 8) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_nested_value_row_ids(self): - nested_value_rowids = [ - tf.constant([0, 0, 1, 3, 3], tf.int64), - tf.constant([0, 0, 2, 2, 2, 3, 4], tf.int64) - ] - inp = layers.Input(shape=[None], dtype=tf.string) - out = tf.RaggedTensor.from_nested_value_rowids( - inp, nested_value_rowids) - model = training.Model(inp, out) - - x = tf.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g']) - expected = tf.RaggedTensor.from_nested_value_rowids( - x, nested_value_rowids) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_nested_row_splits(self): - nested_row_splits = [ - tf.constant([0, 2, 3, 3, 5], tf.int64), - tf.constant([0, 2, 2, 5, 6, 7], tf.int64) - ] - inp = layers.Input(shape=[None], dtype=tf.string) - out = tf.RaggedTensor.from_nested_row_splits( - inp, nested_row_splits) - model = training.Model(inp, out) - - x = tf.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g']) - expected = tf.RaggedTensor.from_nested_row_splits( - x, nested_row_splits) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_nested_row_lengths(self): - nested_row_lengths = [ - tf.constant([2, 1, 0, 2], tf.int64), - tf.constant([2, 0, 3, 1, 1], tf.int64) - ] - inp = layers.Input(shape=[None], dtype=tf.string) - out = tf.RaggedTensor.from_nested_row_lengths( - inp, nested_row_lengths) - model = training.Model(inp, out) - - x = tf.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g']) - expected = tf.RaggedTensor.from_nested_row_lengths( - x, nested_row_lengths) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_tensor(self): - inp = layers.Input(shape=[None], ragged=False) - out = tf.RaggedTensor.from_tensor(inp) - model = training.Model(inp, out) - - x = tf.constant([[3., 4.], [1., 2.], [3., 5.]]) - expected = tf.RaggedTensor.from_tensor(x) - self.assertAllEqual(model(x), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(x), expected) - - def test_from_sparse(self): - inp = layers.Input(shape=[None], sparse=True, dtype=tf.string) - out = tf.RaggedTensor.from_sparse(inp) - model = training.Model(inp, out) - - indices = [[0, 0], [1, 0], [1, 1], [2, 0]] - values = [b'a', b'b', b'c', b'd'] - shape = [4, 5] - sp_value = tf.SparseTensor(indices, values, shape) - - expected = tf.RaggedTensor.from_sparse(sp_value) - self.assertAllEqual(model(sp_value), expected) - - # Test that the model can serialize and deserialize as well - model_config = model.get_config() - model2 = training.Model.from_config(model_config) - self.assertAllEqual(model2(sp_value), expected) - - -if __name__ == '__main__': - tf.test.main() + def test_from_value_rowids(self): + inp = layers.Input(shape=[None]) + out = tf.RaggedTensor.from_value_rowids( + inp, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5 + ) + model = training.Model(inp, out) + + x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) + expected = tf.RaggedTensor.from_value_rowids( + x, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5 + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_row_splits(self): + inp = layers.Input(shape=[None]) + out = tf.RaggedTensor.from_row_splits( + inp, row_splits=[0, 4, 4, 7, 8, 8] + ) + model = training.Model(inp, out) + + x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) + expected = tf.RaggedTensor.from_row_splits( + x, row_splits=[0, 4, 4, 7, 8, 8] + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_row_lengths(self): + inp = layers.Input(shape=[None]) + out = tf.RaggedTensor.from_row_lengths(inp, row_lengths=[4, 0, 3, 1, 0]) + model = training.Model(inp, out) + + x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) + expected = tf.RaggedTensor.from_row_lengths( + x, row_lengths=[4, 0, 3, 1, 0] + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_row_starts(self): + inp = layers.Input(shape=[None]) + out = tf.RaggedTensor.from_row_starts(inp, row_starts=[0, 4, 4, 7, 8]) + model = training.Model(inp, out) + + x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6]) + expected = tf.RaggedTensor.from_row_starts( + x, row_starts=[0, 4, 4, 7, 8] + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_row_limits(self): + row_limits = tf.constant([2, 2, 5, 6, 7], tf.int64) + + inp = layers.Input(shape=[None], dtype=tf.string) + out = tf.RaggedTensor.from_row_limits(inp, row_limits, validate=False) + model = training.Model(inp, out) + + x = tf.constant(["a", "b", "c", "d", "e", "f", "g"]) + expected = tf.RaggedTensor.from_row_limits( + x, row_limits, validate=False + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_uniform_row_length(self): + inp = layers.Input(shape=[None]) + out = tf.RaggedTensor.from_uniform_row_length(inp, 2, 8) + model = training.Model(inp, out) + + x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) + expected = tf.RaggedTensor.from_uniform_row_length(x, 2, 8) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_nested_value_row_ids(self): + nested_value_rowids = [ + tf.constant([0, 0, 1, 3, 3], tf.int64), + tf.constant([0, 0, 2, 2, 2, 3, 4], tf.int64), + ] + inp = layers.Input(shape=[None], dtype=tf.string) + out = tf.RaggedTensor.from_nested_value_rowids(inp, nested_value_rowids) + model = training.Model(inp, out) + + x = tf.constant(["a", "b", "c", "d", "e", "f", "g"]) + expected = tf.RaggedTensor.from_nested_value_rowids( + x, nested_value_rowids + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_nested_row_splits(self): + nested_row_splits = [ + tf.constant([0, 2, 3, 3, 5], tf.int64), + tf.constant([0, 2, 2, 5, 6, 7], tf.int64), + ] + inp = layers.Input(shape=[None], dtype=tf.string) + out = tf.RaggedTensor.from_nested_row_splits(inp, nested_row_splits) + model = training.Model(inp, out) + + x = tf.constant(["a", "b", "c", "d", "e", "f", "g"]) + expected = tf.RaggedTensor.from_nested_row_splits(x, nested_row_splits) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_nested_row_lengths(self): + nested_row_lengths = [ + tf.constant([2, 1, 0, 2], tf.int64), + tf.constant([2, 0, 3, 1, 1], tf.int64), + ] + inp = layers.Input(shape=[None], dtype=tf.string) + out = tf.RaggedTensor.from_nested_row_lengths(inp, nested_row_lengths) + model = training.Model(inp, out) + + x = tf.constant(["a", "b", "c", "d", "e", "f", "g"]) + expected = tf.RaggedTensor.from_nested_row_lengths( + x, nested_row_lengths + ) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_tensor(self): + inp = layers.Input(shape=[None], ragged=False) + out = tf.RaggedTensor.from_tensor(inp) + model = training.Model(inp, out) + + x = tf.constant([[3.0, 4.0], [1.0, 2.0], [3.0, 5.0]]) + expected = tf.RaggedTensor.from_tensor(x) + self.assertAllEqual(model(x), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(x), expected) + + def test_from_sparse(self): + inp = layers.Input(shape=[None], sparse=True, dtype=tf.string) + out = tf.RaggedTensor.from_sparse(inp) + model = training.Model(inp, out) + + indices = [[0, 0], [1, 0], [1, 1], [2, 0]] + values = [b"a", b"b", b"c", b"d"] + shape = [4, 5] + sp_value = tf.SparseTensor(indices, values, shape) + + expected = tf.RaggedTensor.from_sparse(sp_value) + self.assertAllEqual(model(sp_value), expected) + + # Test that the model can serialize and deserialize as well + model_config = model.get_config() + model2 = training.Model.from_config(model_config) + self.assertAllEqual(model2(sp_value), expected) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/saving.py b/keras/engine/saving.py index fdddf130cee5..f72fe1c22165 100644 --- a/keras/engine/saving.py +++ b/keras/engine/saving.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Model saving utilities. Everything has been moved to keras/saving/. This file will be deleted soon. """ -from keras.saving import * # pylint: disable=wildcard-import +from keras.saving import * # noqa: F401,F403 diff --git a/keras/engine/sequential.py b/keras/engine/sequential.py index 6fc7208efb96..137926b97c84 100644 --- a/keras/engine/sequential.py +++ b/keras/engine/sequential.py @@ -12,503 +12,541 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Home of the `Sequential` model.""" +import copy + import tensorflow.compat.v2 as tf -import copy from keras import layers as layer_module from keras.engine import base_layer from keras.engine import functional from keras.engine import input_layer +from keras.engine import training from keras.engine import training_utils -from keras.saving.saved_model import model_serialization +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.legacy.saved_model import model_serialization from keras.utils import generic_utils from keras.utils import layer_utils from keras.utils import tf_inspect from keras.utils import tf_utils from keras.utils import traceback_utils -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export -SINGLE_LAYER_OUTPUT_ERROR_MSG = ('All layers in a Sequential model should have ' - 'a single output tensor. For multi-output ' - 'layers, use the functional API.') +SINGLE_LAYER_OUTPUT_ERROR_MSG = ( + "All layers in a Sequential model should have " + "a single output tensor. For multi-output " + "layers, use the functional API." +) -@keras_export('keras.Sequential', 'keras.models.Sequential') +@keras_export("keras.Sequential", "keras.models.Sequential") class Sequential(functional.Functional): - """`Sequential` groups a linear stack of layers into a `tf.keras.Model`. - - `Sequential` provides training and inference features on this model. - - Examples: - - ```python - # Optionally, the first layer can receive an `input_shape` argument: - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(8, input_shape=(16,))) - # Afterwards, we do automatic shape inference: - model.add(tf.keras.layers.Dense(4)) - - # This is identical to the following: - model = tf.keras.Sequential() - model.add(tf.keras.Input(shape=(16,))) - model.add(tf.keras.layers.Dense(8)) - - # Note that you can also omit the `input_shape` argument. - # In that case the model doesn't have any weights until the first call - # to a training/evaluation method (since it isn't yet built): - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(8)) - model.add(tf.keras.layers.Dense(4)) - # model.weights not created yet - - # Whereas if you specify the input shape, the model gets built - # continuously as you are adding layers: - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(8, input_shape=(16,))) - model.add(tf.keras.layers.Dense(4)) - len(model.weights) - # Returns "4" - - # When using the delayed-build pattern (no input shape specified), you can - # choose to manually build your model by calling - # `build(batch_input_shape)`: - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(8)) - model.add(tf.keras.layers.Dense(4)) - model.build((None, 16)) - len(model.weights) - # Returns "4" - - # Note that when using the delayed-build pattern (no input shape specified), - # the model gets built the first time you call `fit`, `eval`, or `predict`, - # or the first time you call the model on some input data. - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(8)) - model.add(tf.keras.layers.Dense(1)) - model.compile(optimizer='sgd', loss='mse') - # This builds the model for the first time: - model.fit(x, y, batch_size=32, epochs=10) - ``` - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - @traceback_utils.filter_traceback - def __init__(self, layers=None, name=None): - """Creates a `Sequential` model instance. - - Args: - layers: Optional list of layers to add to the model. - name: Optional name for the model. - """ - # Skip the init in FunctionalModel since model doesn't have input/output yet - super(functional.Functional, self).__init__( # pylint: disable=bad-super-call - name=name, autocast=False) - base_layer.keras_api_gauge.get_cell('Sequential').set(True) - self.supports_masking = True - self._compute_output_and_mask_jointly = True - self._auto_track_sub_layers = False - self._inferred_input_shape = None - self._has_explicit_input_shape = False - self._input_dtype = None - self._layer_call_argspecs = {} - self._created_nodes = set() - # Flag that indicate whether the sequential network topology has been - # created. It is false when there isn't any layer, or the layers don't - # have an input shape. - self._graph_initialized = False - - # Unfortunately some Sequential models using custom layers or FeatureColumn - # layers have multiple inputs. This is fundamentally incompatible with - # most of the Sequential API, and we have to disable a number of features - # for such models. - self._use_legacy_deferred_behavior = False - - # Add to the model any layers passed to the constructor. - if layers: - if not isinstance(layers, (list, tuple)): - layers = [layers] - for layer in layers: - self.add(layer) - - @property - def layers(self): - # Historically, `sequential.layers` only returns layers that were added - # via `add`, and omits the auto-generated `InputLayer` that comes at the - # bottom of the stack. - # `Trackable` manages the `_layers` attributes and does filtering - # over it. - layers = super().layers - if layers and isinstance(layers[0], input_layer.InputLayer): - return layers[1:] - return layers[:] - - @tf.__internal__.tracking.no_automatic_dependency_tracking - @traceback_utils.filter_traceback - def add(self, layer): - """Adds a layer instance on top of the layer stack. - - Args: - layer: layer instance. - - Raises: - TypeError: If `layer` is not a layer instance. - ValueError: In case the `layer` argument does not - know its input shape. - ValueError: In case the `layer` argument has - multiple output tensors, or is already connected - somewhere else (forbidden in `Sequential` models). + """`Sequential` groups a linear stack of layers into a `tf.keras.Model`. + + `Sequential` provides training and inference features on this model. + + Examples: + + ```python + model = tf.keras.Sequential() + model.add(tf.keras.Input(shape=(16,))) + model.add(tf.keras.layers.Dense(8)) + + # Note that you can also omit the initial `Input`. + # In that case the model doesn't have any weights until the first call + # to a training/evaluation method (since it isn't yet built): + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(8)) + model.add(tf.keras.layers.Dense(4)) + # model.weights not created yet + + # Whereas if you specify an `Input`, the model gets built + # continuously as you are adding layers: + model = tf.keras.Sequential() + model.add(tf.keras.Input(shape=(16,))) + model.add(tf.keras.layers.Dense(4)) + len(model.weights) + # Returns "2" + + # When using the delayed-build pattern (no input shape specified), you can + # choose to manually build your model by calling + # `build(batch_input_shape)`: + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(8)) + model.add(tf.keras.layers.Dense(4)) + model.build((None, 16)) + len(model.weights) + # Returns "4" + + # Note that when using the delayed-build pattern (no input shape specified), + # the model gets built the first time you call `fit`, `eval`, or `predict`, + # or the first time you call the model on some input data. + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(8)) + model.add(tf.keras.layers.Dense(1)) + model.compile(optimizer='sgd', loss='mse') + # This builds the model for the first time: + model.fit(x, y, batch_size=32, epochs=10) + ``` """ - # If we are passed a Keras tensor created by keras.Input(), we can extract - # the input layer from its keras history and use that without any loss of - # generality. - if hasattr(layer, '_keras_history'): - origin_layer = layer._keras_history[0] - if isinstance(origin_layer, input_layer.InputLayer): - layer = origin_layer - - if isinstance(layer, tf.Module): - if not isinstance(layer, base_layer.Layer): - layer = functional.ModuleWrapper(layer) - else: - raise TypeError('The added layer must be an instance of class Layer. ' - f'Received: layer={layer} of type {type(layer)}.') - - tf_utils.assert_no_legacy_layers([layer]) - if not self._is_layer_name_unique(layer): - raise ValueError( - 'All layers added to a Sequential model ' - f'should have unique names. Name "{layer.name}" is already the name ' - 'of a layer in this model. Update the `name` argument ' - 'to pass a unique name.') - - self.built = False - set_inputs = False - self._maybe_create_attribute('_self_tracked_trackables', []) - if not self._self_tracked_trackables: - if isinstance(layer, input_layer.InputLayer): - # Case where the user passes an Input or InputLayer layer via `add`. - set_inputs = True - else: - batch_shape, dtype = training_utils.get_input_shape_and_dtype(layer) - if batch_shape: - # Instantiate an input layer. - x = input_layer.Input( - batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input') - # This will build the current layer - # and create the node connecting the current layer - # to the input layer we just created. - layer(x) - set_inputs = True - - if set_inputs: - outputs = tf.nest.flatten(layer._inbound_nodes[-1].outputs) - if len(outputs) != 1: - raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) - self.outputs = outputs - self.inputs = layer_utils.get_source_inputs(self.outputs[0]) - self.built = True - self._has_explicit_input_shape = True - - elif self.outputs: - # If the model is being built continuously on top of an input layer: - # refresh its output. - output_tensor = layer(self.outputs[0]) - if len(tf.nest.flatten(output_tensor)) != 1: - raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) - self.outputs = [output_tensor] - self.built = True - - if set_inputs or self._graph_initialized: - self._init_graph_network(self.inputs, self.outputs) - self._graph_initialized = True - else: - self._self_tracked_trackables.append(layer) - self._handle_deferred_layer_dependencies([layer]) - - self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - @traceback_utils.filter_traceback - def pop(self): - """Removes the last layer in the model. - - Raises: - TypeError: if there are no layers in the model. - """ - if not self.layers: - raise TypeError('There are no layers in the model.') - - layer = self._self_tracked_trackables.pop() - self._layer_call_argspecs.pop(layer) - if not self.layers: - self.outputs = None - self.inputs = None - self.built = False - self._inferred_input_shape = None - self._has_explicit_input_shape = False - self._graph_initialized = False - elif self._graph_initialized: - self.layers[-1]._outbound_nodes = [] - self.outputs = [self.layers[-1].output] - self._init_graph_network(self.inputs, self.outputs) - self.built = True - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _build_graph_network_for_inferred_shape(self, - input_shape, - input_dtype=None): - if input_shape is None or not self.layers: - return - if not tf.__internal__.tf2.enabled() or not tf.compat.v1.executing_eagerly_outside_functions(): - # This behavior is disabled in V1 or when eager execution is disabled. - return - if (not self._has_explicit_input_shape and - not self._use_legacy_deferred_behavior): - # Determine whether the input shape is novel, i.e. whether the model - # should be rebuilt. - input_shape = tuple(input_shape) - if self._inferred_input_shape is None: - new_shape = input_shape - else: - new_shape = relax_input_shape(self._inferred_input_shape, input_shape) - if (new_shape is not None and new_shape != self._inferred_input_shape): - # A novel shape has been received: we need to rebuild the model. - # In case we are inside a graph function, we step out of it. - with tf.init_scope(): - inputs = input_layer.Input( - batch_shape=new_shape, - dtype=input_dtype, - name=self.layers[0].name + '_input') - layer_input = inputs - created_nodes = set() - for layer in self.layers: - # Clear nodes previously created via this method. This prevents - # node accumulation and ensures that e.g. `layer.output` is - # always connected to `model.inputs` - # (this is important e.g. for the feature extraction use case). - # We don't just do `layer._inbound_nodes = []` in order - # not to break shared layers added to Sequential models (which is - # technically illegal as per the `add()` docstring, - # but wasn't previously disabled). - clear_previously_created_nodes(layer, self._created_nodes) - try: - # Create Functional API connection by calling the current layer - layer_output = layer(layer_input) - except: # pylint:disable=bare-except - # Functional API calls may fail for a number of reasons: - # 1) The layer may be buggy. In this case it will be easier for - # the user to debug if we fail on the first call on concrete data, - # instead of our own call on a symbolic input. - # 2) The layer is dynamic (graph-incompatible) and hasn't - # overridden `compute_output_shape`. In this case, it is - # impossible to build a graph network. - # 3) The layer is otherwise incompatible with the Functional API - # (e.g. this is the case for some probabilistic layers that rely - # on hacks and that do not return tensors). - # In all these cases, we should avoid creating a graph network - # (or we simply can't). - self._use_legacy_deferred_behavior = True - return - if len(tf.nest.flatten(layer_output)) != 1: - raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) - # Keep track of nodes just created above - track_nodes_created_by_last_call(layer, created_nodes) - layer_input = layer_output - outputs = layer_output - self._created_nodes = created_nodes - try: - # Initialize a graph Network. This call will never fail for - # a stack of valid Keras layers. - # However some users have layers that are fundamentally incompatible - # with the Functional API, which do not return tensors. In this - # case, we fall back to the legacy deferred behavior. - # TODO(fchollet): consider raising here, as we should not be - # supporting such layers. - self._init_graph_network(inputs, outputs) - self._graph_initialized = True - except: # pylint:disable=bare-except - self._use_legacy_deferred_behavior = True - self._inferred_input_shape = new_shape - - @generic_utils.default - def build(self, input_shape=None): - if self._graph_initialized: - self._init_graph_network(self.inputs, self.outputs) - else: - if input_shape is None: - raise ValueError('You must provide an `input_shape` argument.') - self._build_graph_network_for_inferred_shape(input_shape) - if not self.built: - input_shape = tuple(input_shape) - self._build_input_shape = input_shape - super().build(input_shape) - self.built = True - - def call(self, inputs, training=None, mask=None): # pylint: disable=redefined-outer-name - # If applicable, update the static input shape of the model. - if not self._has_explicit_input_shape: - if not tf.is_tensor(inputs) and not isinstance( - inputs, tf.Tensor): - # This is a Sequential with multiple inputs. This is technically an - # invalid use case of Sequential, but we tolerate it for backwards - # compatibility. - self._use_legacy_deferred_behavior = True - self._build_input_shape = tf.nest.map_structure( - _get_shape_tuple, inputs) - if tf.__internal__.tf2.enabled(): - logging.warning('Layers in a Sequential model should only have a ' - f'single input tensor. Received: inputs={inputs}. ' - 'Consider rewriting this model with the Functional ' - 'API.') - else: - self._build_graph_network_for_inferred_shape(inputs.shape, inputs.dtype) - - if self._graph_initialized: - if not self.built: - self._init_graph_network(self.inputs, self.outputs) - return super().call(inputs, training=training, mask=mask) - - outputs = inputs # handle the corner case where self.layers is empty - for layer in self.layers: - # During each iteration, `inputs` are the inputs to `layer`, and `outputs` - # are the outputs of `layer` applied to `inputs`. At the end of each - # iteration `inputs` is set to `outputs` to prepare for the next layer. - kwargs = {} - argspec = self._layer_call_argspecs[layer].args - if 'mask' in argspec: - kwargs['mask'] = mask - if 'training' in argspec: - kwargs['training'] = training - - outputs = layer(inputs, **kwargs) - - if len(tf.nest.flatten(outputs)) != 1: - raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) - # `outputs` will be the inputs to the next layer. - inputs = outputs - mask = getattr(outputs, '_keras_mask', None) - return outputs - - def compute_output_shape(self, input_shape): - shape = input_shape - for layer in self.layers: - shape = layer.compute_output_shape(shape) - return shape - - def compute_mask(self, inputs, mask): - # TODO(omalleyt): b/123540974 This function is not really safe to call - # by itself because it will duplicate any updates and losses in graph - # mode by `call`ing the Layers again. - outputs = self.call(inputs, mask=mask) # pylint: disable=unexpected-keyword-arg - return getattr(outputs, '_keras_mask', None) - - def get_config(self): - layer_configs = [] - for layer in super().layers: - # `super().layers` include the InputLayer if available (it is filtered out - # of `self.layers`). Note that `self._self_tracked_trackables` is managed - # by the tracking infrastructure and should not be used. - layer_configs.append(generic_utils.serialize_keras_object(layer)) - config = { - 'name': self.name, - 'layers': copy.deepcopy(layer_configs) - } - if not self._is_graph_network and self._build_input_shape is not None: - config['build_input_shape'] = self._build_input_shape - return config - - @classmethod - def from_config(cls, config, custom_objects=None): - if 'name' in config: - name = config['name'] - build_input_shape = config.get('build_input_shape') - layer_configs = config['layers'] - else: - name = None - build_input_shape = None - layer_configs = config - model = cls(name=name) - for layer_config in layer_configs: - layer = layer_module.deserialize(layer_config, - custom_objects=custom_objects) - model.add(layer) - if (not model.inputs and build_input_shape and - isinstance(build_input_shape, (tuple, list))): - model.build(build_input_shape) - return model - - @property - def input_spec(self): - if hasattr(self, '_manual_input_spec'): - return self._manual_input_spec - if self._has_explicit_input_shape: - return super().input_spec - return None - - @input_spec.setter - def input_spec(self, value): - self._manual_input_spec = value - - @property - def _trackable_saved_model_saver(self): - return model_serialization.SequentialSavedModelSaver(self) - def _is_layer_name_unique(self, layer): - for ref_layer in self.layers: - if layer.name == ref_layer.name and ref_layer is not layer: - return False - return True + @tf.__internal__.tracking.no_automatic_dependency_tracking + @traceback_utils.filter_traceback + def __init__(self, layers=None, name=None): + """Creates a `Sequential` model instance. + + Args: + layers: Optional list of layers to add to the model. + name: Optional name for the model. + """ + # Skip the init in FunctionalModel since model doesn't have input/output + # yet + super(functional.Functional, self).__init__(name=name, autocast=False) + base_layer.keras_api_gauge.get_cell("Sequential").set(True) + self.supports_masking = True + self._compute_output_and_mask_jointly = True + self._auto_track_sub_layers = False + self._inferred_input_shape = None + self._has_explicit_input_shape = False + self._input_dtype = None + self._layer_call_argspecs = {} + self._created_nodes = set() + # Flag that indicate whether the sequential network topology has been + # created. It is false when there isn't any layer, or the layers don't + # have an input shape. + self._graph_initialized = False + + # Unfortunately some Sequential models using custom layers or + # FeatureColumn layers have multiple inputs. This is fundamentally + # incompatible with most of the Sequential API, and we have to disable a + # number of features for such models. + self._use_legacy_deferred_behavior = False + + # Add to the model any layers passed to the constructor. + if layers: + if not isinstance(layers, (list, tuple)): + layers = [layers] + for layer in layers: + self.add(layer) + + @property + def layers(self): + # Historically, `sequential.layers` only returns layers that were added + # via `add`, and omits the auto-generated `InputLayer` that comes at the + # bottom of the stack. + # `Trackable` manages the `_layers` attributes and does filtering + # over it. + layers = super().layers + if layers and isinstance(layers[0], input_layer.InputLayer): + return layers[1:] + return layers[:] + + @tf.__internal__.tracking.no_automatic_dependency_tracking + @traceback_utils.filter_traceback + def add(self, layer): + """Adds a layer instance on top of the layer stack. + + Args: + layer: layer instance. + + Raises: + TypeError: If `layer` is not a layer instance. + ValueError: In case the `layer` argument does not + know its input shape. + ValueError: In case the `layer` argument has + multiple output tensors, or is already connected + somewhere else (forbidden in `Sequential` models). + """ + # If we are passed a Keras tensor created by keras.Input(), we can + # extract the input layer from its keras history and use that without + # any loss of + # generality. + if hasattr(layer, "_keras_history"): + origin_layer = layer._keras_history[0] + if isinstance(origin_layer, input_layer.InputLayer): + layer = origin_layer + + if isinstance(layer, tf.Module): + if not isinstance(layer, base_layer.Layer): + layer = functional.ModuleWrapper(layer) + else: + raise TypeError( + "The added layer must be an instance of class Layer. " + f"Received: layer={layer} of type {type(layer)}." + ) + + tf_utils.assert_no_legacy_layers([layer]) + if not self._is_layer_name_unique(layer): + raise ValueError( + "All layers added to a Sequential model " + f'should have unique names. Name "{layer.name}" is already ' + "the name of a layer in this model. Update the `name` argument " + "to pass a unique name." + ) + + self.built = False + set_inputs = False + self._maybe_create_attribute("_self_tracked_trackables", []) + if not self._self_tracked_trackables: + if isinstance(layer, input_layer.InputLayer): + # Case where the user passes an Input or InputLayer layer via + # `add`. + set_inputs = True + else: + batch_shape, dtype = training_utils.get_input_shape_and_dtype( + layer + ) + if batch_shape: + # Instantiate an input layer. + x = input_layer.Input( + batch_shape=batch_shape, + dtype=dtype, + name=layer.name + "_input", + ) + # This will build the current layer + # and create the node connecting the current layer + # to the input layer we just created. + layer(x) + set_inputs = True + + if set_inputs: + outputs = tf.nest.flatten(layer._inbound_nodes[-1].outputs) + if len(outputs) != 1: + raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) + self.outputs = outputs + self.inputs = layer_utils.get_source_inputs(self.outputs[0]) + self.built = True + self._has_explicit_input_shape = True + + elif self.outputs: + # If the model is being built continuously on top of an input layer: + # refresh its output. + output_tensor = layer(self.outputs[0]) + if len(tf.nest.flatten(output_tensor)) != 1: + raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) + self.outputs = [output_tensor] + self.built = True + + if set_inputs or self._graph_initialized: + self._init_graph_network(self.inputs, self.outputs) + self._graph_initialized = True + else: + self._self_tracked_trackables.append(layer) + self._handle_deferred_layer_dependencies([layer]) + + self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + @traceback_utils.filter_traceback + def pop(self): + """Removes the last layer in the model. + + Raises: + TypeError: if there are no layers in the model. + """ + if not self.layers: + raise TypeError("There are no layers in the model.") + + layer = self._self_tracked_trackables.pop() + self._layer_call_argspecs.pop(layer) + if not self.layers: + self.outputs = None + self.inputs = None + self.built = False + self._inferred_input_shape = None + self._has_explicit_input_shape = False + self._graph_initialized = False + elif self._graph_initialized: + self.layers[-1]._outbound_nodes = [] + self.outputs = [self.layers[-1].output] + self._init_graph_network(self.inputs, self.outputs) + self.built = True + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _build_graph_network_for_inferred_shape( + self, input_shape, input_dtype=None + ): + if input_shape is None or not self.layers: + return + if ( + not tf.__internal__.tf2.enabled() + or not tf.compat.v1.executing_eagerly_outside_functions() + ): + # This behavior is disabled in V1 or when eager execution is + # disabled. + return + if ( + not self._has_explicit_input_shape + and not self._use_legacy_deferred_behavior + ): + # Determine whether the input shape is novel, i.e. whether the model + # should be rebuilt. + input_shape = tuple(input_shape) + if self._inferred_input_shape is None: + new_shape = input_shape + else: + new_shape = relax_input_shape( + self._inferred_input_shape, input_shape + ) + if ( + new_shape is not None + and new_shape != self._inferred_input_shape + ): + # A novel shape has been received: we need to rebuild the model. + # In case we are inside a graph function, we step out of it. + with tf.init_scope(): + inputs = input_layer.Input( + batch_shape=new_shape, + dtype=input_dtype, + name=self.layers[0].name + "_input", + ) + layer_input = inputs + created_nodes = set() + for layer in self.layers: + # Clear nodes previously created via this method. This + # prevents node accumulation and ensures that e.g. + # `layer.output` is always connected to `model.inputs` + # (this is important e.g. for the feature extraction use + # case). We don't just do `layer._inbound_nodes = []` + # in order not to break shared layers added to + # Sequential models (which is technically illegal as per + # the `add()` docstring, but wasn't previously + # disabled). + clear_previously_created_nodes( + layer, self._created_nodes + ) + try: + # Create Functional API connection by calling the + # current layer + layer_output = layer(layer_input) + except: # noqa: E722 + # Functional API calls may fail for a number of + # reasons: 1) The layer may be buggy. In this case + # it will be easier for the user to debug if we fail + # on the first call on concrete data, instead of our + # own call on a symbolic input. 2) The layer is + # dynamic (graph-incompatible) and hasn't overridden + # `compute_output_shape`. In this case, it is + # impossible to build a graph network. 3) The layer + # is otherwise incompatible with the Functional API + # (e.g. this is the case for some probabilistic + # layers that rely on hacks and that do not return + # tensors). In all these cases, we should avoid + # creating a graph network (or we simply can't). + self._use_legacy_deferred_behavior = True + return + if len(tf.nest.flatten(layer_output)) != 1: + raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) + # Keep track of nodes just created above + track_nodes_created_by_last_call(layer, created_nodes) + layer_input = layer_output + outputs = layer_output + self._created_nodes = created_nodes + try: + # Initialize a graph Network. This call will never fail + # for a stack of valid Keras layers. However some users + # have layers that are fundamentally incompatible with + # the Functional API, which do not return tensors. In + # this case, we fall back to the legacy deferred + # behavior. + # TODO(fchollet): consider raising here, as we should + # not be supporting such layers. + self._init_graph_network(inputs, outputs) + self._graph_initialized = True + except: # noqa: E722 + self._use_legacy_deferred_behavior = True + self._inferred_input_shape = new_shape + + @generic_utils.default + def build(self, input_shape=None): + if self._graph_initialized: + self._init_graph_network(self.inputs, self.outputs) + else: + if input_shape is None: + raise ValueError("You must provide an `input_shape` argument.") + self._build_graph_network_for_inferred_shape(input_shape) + if not self.built: + input_shape = tuple(input_shape) + self._build_input_shape = input_shape + super().build(input_shape) + self.built = True - def _assert_weights_created(self): - if self._graph_initialized: - return - # When the graph has not been initialized, use the Model's implementation to - # to check if the weights has been created. - super(functional.Functional, self)._assert_weights_created() # pylint: disable=bad-super-call + def call(self, inputs, training=None, mask=None): + # If applicable, update the static input shape of the model. + if not self._has_explicit_input_shape: + if not tf.is_tensor(inputs) and not isinstance(inputs, tf.Tensor): + # This is a Sequential with multiple inputs. This is technically + # an invalid use case of Sequential, but we tolerate it for + # backwards compatibility. + self._use_legacy_deferred_behavior = True + self._build_input_shape = tf.nest.map_structure( + _get_shape_tuple, inputs + ) + else: + self._build_graph_network_for_inferred_shape( + inputs.shape, inputs.dtype + ) + + if self._graph_initialized: + if not self.built: + self._init_graph_network(self.inputs, self.outputs) + return super().call(inputs, training=training, mask=mask) + + outputs = inputs # handle the corner case where self.layers is empty + for layer in self.layers: + # During each iteration, `inputs` are the inputs to `layer`, and + # `outputs` are the outputs of `layer` applied to `inputs`. At the + # end of each iteration `inputs` is set to `outputs` to prepare for + # the next layer. + kwargs = {} + argspec = self._layer_call_argspecs[layer].args + if "mask" in argspec: + kwargs["mask"] = mask + if "training" in argspec: + kwargs["training"] = training + + outputs = layer(inputs, **kwargs) + + inputs = outputs + + def _get_mask_from_keras_tensor(kt): + return getattr(kt, "_keras_mask", None) + + mask = tf.nest.map_structure(_get_mask_from_keras_tensor, outputs) + return outputs + + def compute_output_shape(self, input_shape): + shape = input_shape + for layer in self.layers: + shape = layer.compute_output_shape(shape) + return shape + + def compute_mask(self, inputs, mask): + # TODO(omalleyt): b/123540974 This function is not really safe to call + # by itself because it will duplicate any updates and losses in graph + # mode by `call`ing the Layers again. + outputs = self.call(inputs, mask=mask) + return getattr(outputs, "_keras_mask", None) + + def get_config(self): + layer_configs = [] + serialize_obj_fn = serialization_lib.serialize_keras_object + if getattr(self, "use_legacy_config", None): + serialize_obj_fn = legacy_serialization.serialize_keras_object + for layer in super().layers: + # `super().layers` include the InputLayer if available (it is + # filtered out of `self.layers`). Note that + # `self._self_tracked_trackables` is managed by the tracking + # infrastructure and should not be used. + layer_configs.append(serialize_obj_fn(layer)) + config = training.Model.get_config(self) + config["name"] = self.name + config["layers"] = copy.deepcopy(layer_configs) + if not self._is_graph_network and self._build_input_shape is not None: + config["build_input_shape"] = self._build_input_shape + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + if "name" in config: + name = config["name"] + build_input_shape = config.get("build_input_shape") + layer_configs = config["layers"] + else: + name = None + layer_configs = config + model = cls(name=name) + for layer_config in layer_configs: + use_legacy_format = "module" not in layer_config + layer = layer_module.deserialize( + layer_config, + custom_objects=custom_objects, + use_legacy_format=use_legacy_format, + ) + model.add(layer) + + if ( + not model.inputs + and build_input_shape + and isinstance(build_input_shape, (tuple, list)) + ): + model.build(build_input_shape) + + return model + + @property + def input_spec(self): + if hasattr(self, "_manual_input_spec"): + return self._manual_input_spec + if self._has_explicit_input_shape: + return super().input_spec + return None + + @input_spec.setter + def input_spec(self, value): + self._manual_input_spec = value + + @property + def _trackable_saved_model_saver(self): + return model_serialization.SequentialSavedModelSaver(self) + + def _is_layer_name_unique(self, layer): + for ref_layer in self.layers: + if layer.name == ref_layer.name and ref_layer is not layer: + return False + return True + + def _assert_weights_created(self): + if self._graph_initialized: + return + # When the graph has not been initialized, use the Model's + # implementation to to check if the weights has been created. + super(functional.Functional, self)._assert_weights_created() def _get_shape_tuple(t): - if hasattr(t, 'shape'): - shape = t.shape - if isinstance(shape, tuple): - return shape - if shape.rank is not None: - return tuple(shape.as_list()) + if hasattr(t, "shape"): + shape = t.shape + if isinstance(shape, tuple): + return shape + if shape.rank is not None: + return tuple(shape.as_list()) + return None return None - return None def relax_input_shape(shape_1, shape_2): - if shape_1 is None or shape_2 is None: - return None - if len(shape_1) != len(shape_2): - return None - return tuple(None if d1 != d2 else d1 for d1, d2 in zip(shape_1, shape_2)) + if shape_1 is None or shape_2 is None: + return None + if len(shape_1) != len(shape_2): + return None + return tuple(None if d1 != d2 else d1 for d1, d2 in zip(shape_1, shape_2)) def clear_previously_created_nodes(layer, created_nodes): - """Remove nodes from `created_nodes` from the layer's inbound_nodes.""" - for node in layer._inbound_nodes: - prev_layers = node.inbound_layers - for prev_layer in tf.nest.flatten(prev_layers): - prev_layer._outbound_nodes = [ - n for n in prev_layer._outbound_nodes - if n not in created_nodes] - layer._inbound_nodes = [ - n for n in layer._inbound_nodes if n not in created_nodes] + """Remove nodes from `created_nodes` from the layer's inbound_nodes.""" + for node in layer._inbound_nodes: + prev_layers = node.inbound_layers + for prev_layer in tf.nest.flatten(prev_layers): + prev_layer._outbound_nodes = [ + n for n in prev_layer._outbound_nodes if n not in created_nodes + ] + layer._inbound_nodes = [ + n for n in layer._inbound_nodes if n not in created_nodes + ] def track_nodes_created_by_last_call(layer, created_nodes): - """Adds to `created_nodes` the nodes created by the last call to `layer`.""" - if not layer._inbound_nodes: - return - created_nodes.add(layer._inbound_nodes[-1]) - prev_layers = layer._inbound_nodes[-1].inbound_layers - for prev_layer in tf.nest.flatten(prev_layers): - if prev_layer._outbound_nodes: - created_nodes.add(prev_layer._outbound_nodes[-1]) + """Adds to `created_nodes` the nodes created by the last call to `layer`.""" + if not layer._inbound_nodes: + return + created_nodes.add(layer._inbound_nodes[-1]) + prev_layers = layer._inbound_nodes[-1].inbound_layers + for prev_layer in tf.nest.flatten(prev_layers): + if prev_layer._outbound_nodes: + created_nodes.add(prev_layer._outbound_nodes[-1]) diff --git a/keras/engine/sequential_test.py b/keras/engine/sequential_test.py index 11b22397da44..54097e71b42b 100644 --- a/keras/engine/sequential_test.py +++ b/keras/engine/sequential_test.py @@ -14,555 +14,638 @@ # ============================================================================== """Tests specific to `Sequential` model.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras -from tensorflow.python.framework import test_util as tf_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + class TestSequential(test_combinations.TestCase): - """Most Sequential model API tests are covered in `training_test.py`. - """ - - @test_combinations.run_all_keras_modes - def test_basic_methods(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_dim=2)) - model.add(keras.layers.Dropout(0.3, name='dp')) - model.add(keras.layers.Dense(2, kernel_regularizer='l2', - kernel_constraint='max_norm')) - self.assertEqual(len(model.layers), 3) - self.assertEqual(len(model.weights), 2 * 2) - self.assertEqual(model.get_layer(name='dp').name, 'dp') - - @test_combinations.run_all_keras_modes - def test_input_defined_first_layer(self): - model = keras.models.Sequential() - model.add(keras.Input(shape=(2,), name='input_layer')) - model.add(keras.layers.Dense(1)) - model.add(keras.layers.Dropout(0.3, name='dp')) - model.add(keras.layers.Dense(2, kernel_regularizer='l2', - kernel_constraint='max_norm')) - self.assertLen(model.layers, 3) - self.assertLen(model.weights, 2 * 2) - self.assertEqual(model.get_layer(name='dp').name, 'dp') - - @test_combinations.run_all_keras_modes - def test_single_layer_in_init(self): - model = keras.models.Sequential(keras.layers.Dense(1)) - self.assertLen(model.layers, 1) - - @test_combinations.run_all_keras_modes - def test_sequential_pop(self): - num_hidden = 5 - input_dim = 3 - batch_size = 5 - num_classes = 2 - - model = test_utils.get_small_sequential_mlp( - num_hidden, num_classes, input_dim) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - x = np.random.random((batch_size, input_dim)) - y = np.random.random((batch_size, num_classes)) - model.fit(x, y, epochs=1) - model.pop() - self.assertEqual(len(model.layers), 1) - self.assertEqual(model.output_shape, (None, num_hidden)) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - y = np.random.random((batch_size, num_hidden)) - model.fit(x, y, epochs=1) - - # Test popping single-layer model - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.pop() - self.assertEqual(model.layers, []) - self.assertEqual(model.outputs, None) - - # Invalid use case - model = keras.models.Sequential() - with self.assertRaises(TypeError): - model.pop() - - @test_combinations.run_all_keras_modes - def test_sequential_deferred_build_with_np_arrays(self): - num_hidden = 5 - input_dim = 3 - batch_size = 5 - num_classes = 2 - - model = test_utils.get_small_sequential_mlp(num_hidden, num_classes) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=[keras.metrics.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - self.assertEqual(len(model.layers), 2) - with self.assertRaisesRegex( - ValueError, 'Weights for model .* have not yet been created'): - len(model.weights) - self.assertFalse(model.built) - - x = np.random.random((batch_size, input_dim)) - y = np.random.random((batch_size, num_classes)) - model.fit(x, y, epochs=1) - self.assertTrue(model.built) - self.assertEqual(len(model.weights), 2 * 2) - - @test_combinations.run_all_keras_modes - def test_sequential_deferred_build_with_dataset_iterators(self): - num_hidden = 5 - input_dim = 3 - num_classes = 2 - num_samples = 50 - steps_per_epoch = 10 - - model = test_utils.get_small_sequential_mlp(num_hidden, num_classes) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=[keras.metrics.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - self.assertEqual(len(model.layers), 2) - with self.assertRaisesRegex( - ValueError, 'Weights for model .* have not yet been created'): - len(model.weights) - self.assertFalse(model.built) - - x = tf.ones((num_samples, input_dim)) - y = tf.zeros((num_samples, num_classes)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=steps_per_epoch) - self.assertTrue(model.built) - self.assertEqual(len(model.weights), 2 * 2) - - # TODO(kaftan) This test fails w/ run_with_all_keras_modes. File ticket - @parameterized.parameters((True,), (False,)) - def test_training_and_eval_methods_on_symbolic_tensors(self, deferred): - with tf.Graph().as_default(), self.cached_session(): - - def get_model(): - if deferred: - model = test_utils.get_small_sequential_mlp(10, 4) - else: - model = test_utils.get_small_sequential_mlp(10, 4, input_dim=3) + """Most Sequential model API tests are covered in `training_test.py`.""" + + @test_combinations.run_all_keras_modes + def test_basic_methods(self): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_dim=2)) + model.add(keras.layers.Dropout(0.3, name="dp")) + model.add( + keras.layers.Dense( + 2, kernel_regularizer="l2", kernel_constraint="max_norm" + ) + ) + self.assertEqual(len(model.layers), 3) + self.assertEqual(len(model.weights), 2 * 2) + self.assertEqual(model.get_layer(name="dp").name, "dp") + + @test_combinations.run_all_keras_modes + def test_input_defined_first_layer(self): + model = keras.models.Sequential() + model.add(keras.Input(shape=(2,), name="input_layer")) + model.add(keras.layers.Dense(1)) + model.add(keras.layers.Dropout(0.3, name="dp")) + model.add( + keras.layers.Dense( + 2, kernel_regularizer="l2", kernel_constraint="max_norm" + ) + ) + self.assertLen(model.layers, 3) + self.assertLen(model.weights, 2 * 2) + self.assertEqual(model.get_layer(name="dp").name, "dp") + + @test_combinations.run_all_keras_modes + def test_single_layer_in_init(self): + model = keras.models.Sequential(keras.layers.Dense(1)) + self.assertLen(model.layers, 1) + + @test_combinations.run_all_keras_modes + def test_sequential_pop(self): + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + + model = test_utils.get_small_sequential_mlp( + num_hidden, num_classes, input_dim + ) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.random.random((batch_size, input_dim)) + y = np.random.random((batch_size, num_classes)) + model.fit(x, y, epochs=1) + model.pop() + self.assertEqual(len(model.layers), 1) + self.assertEqual(model.output_shape, (None, num_hidden)) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + y = np.random.random((batch_size, num_hidden)) + model.fit(x, y, epochs=1) + + # Test popping single-layer model + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.pop() + self.assertEqual(model.layers, []) + self.assertEqual(model.outputs, None) + + # Invalid use case + model = keras.models.Sequential() + with self.assertRaises(TypeError): + model.pop() + + @test_combinations.run_all_keras_modes + def test_sequential_deferred_build_with_np_arrays(self): + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + + model = test_utils.get_small_sequential_mlp(num_hidden, num_classes) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=[keras.metrics.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertEqual(len(model.layers), 2) + with self.assertRaisesRegex( + ValueError, "Weights for model .* have not yet been created" + ): + len(model.weights) + self.assertFalse(model.built) + + x = np.random.random((batch_size, input_dim)) + y = np.random.random((batch_size, num_classes)) + model.fit(x, y, epochs=1) + self.assertTrue(model.built) + self.assertEqual(len(model.weights), 2 * 2) + + @test_combinations.run_all_keras_modes + def test_sequential_deferred_build_with_dataset_iterators(self): + num_hidden = 5 + input_dim = 3 + num_classes = 2 + num_samples = 50 + steps_per_epoch = 10 + + model = test_utils.get_small_sequential_mlp(num_hidden, num_classes) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=[keras.metrics.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertEqual(len(model.layers), 2) + with self.assertRaisesRegex( + ValueError, "Weights for model .* have not yet been created" + ): + len(model.weights) + self.assertFalse(model.built) + + x = tf.ones((num_samples, input_dim)) + y = tf.zeros((num_samples, num_classes)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=steps_per_epoch) + self.assertTrue(model.built) + self.assertEqual(len(model.weights), 2 * 2) + + # TODO(kaftan) This test fails w/ run_with_all_keras_modes. File ticket + @parameterized.parameters((True,), (False,)) + def test_training_and_eval_methods_on_symbolic_tensors(self, deferred): + with tf.Graph().as_default(), self.cached_session(): + + def get_model(): + if deferred: + model = test_utils.get_small_sequential_mlp(10, 4) + else: + model = test_utils.get_small_sequential_mlp( + 10, 4, input_dim=3 + ) + model.compile( + optimizer="rmsprop", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + return model + + inputs = keras.backend.zeros(shape=(10, 3)) + targets = keras.backend.zeros(shape=(10, 4)) + + model = get_model() + model.fit(inputs, targets, epochs=10, steps_per_epoch=30) + + model = get_model() + model.evaluate(inputs, targets, steps=2, verbose=0) + + model = get_model() + model.predict(inputs, steps=2) + + model = get_model() + model.train_on_batch(inputs, targets) + + model = get_model() + model.test_on_batch(inputs, targets) + + model = get_model() + model.fit( + inputs, + targets, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=(inputs, targets), + validation_steps=2, + ) + + @test_combinations.run_all_keras_modes + def test_invalid_use_cases(self): + # Added objects must be layer instances + with self.assertRaises(TypeError): + model = keras.models.Sequential() + model.add(None) + + @test_combinations.run_all_keras_modes + def test_nested_sequential_trainability(self): + input_dim = 20 + num_units = 10 + num_classes = 2 + + inner_model = keras.models.Sequential() + inner_model.add(keras.layers.Dense(num_units, input_shape=(input_dim,))) + + model = keras.models.Sequential() + model.add(inner_model) + model.add(keras.layers.Dense(num_classes)) + + self.assertEqual(len(model.layers), 2) + + self.assertEqual(len(model.trainable_weights), 4) + inner_model.trainable = False + self.assertEqual(len(model.trainable_weights), 2) + inner_model.trainable = True + self.assertEqual(len(model.trainable_weights), 4) + + @test_combinations.run_all_keras_modes + def test_sequential_update_disabling(self): + val_a = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + model = keras.models.Sequential() + model.add(keras.layers.BatchNormalization(input_shape=(4,))) + + model.trainable = False + model.compile("sgd", "mse") + + x1 = model.predict(val_a) + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + self.assertAllClose(x1, x2, atol=1e-7) + + model.trainable = True + model.compile("sgd", "mse") + + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + assert np.abs(np.sum(x1 - x2)) > 1e-5 + + @test_combinations.run_all_keras_modes + def test_sequential_deferred_build_serialization(self): + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + + model = test_utils.get_small_sequential_mlp(num_hidden, num_classes) model.compile( - optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - return model - - inputs = keras.backend.zeros(shape=(10, 3)) - targets = keras.backend.zeros(shape=(10, 4)) - - model = get_model() - model.fit(inputs, targets, epochs=10, steps_per_epoch=30) - - model = get_model() - model.evaluate(inputs, targets, steps=2, verbose=0) - - model = get_model() - model.predict(inputs, steps=2) - - model = get_model() - model.train_on_batch(inputs, targets) - - model = get_model() - model.test_on_batch(inputs, targets) - - model = get_model() - model.fit( - inputs, - targets, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_data=(inputs, targets), - validation_steps=2) - - @test_combinations.run_all_keras_modes - def test_invalid_use_cases(self): - # Added objects must be layer instances - with self.assertRaises(TypeError): - model = keras.models.Sequential() - model.add(None) - - @test_combinations.run_all_keras_modes - def test_nested_sequential_trainability(self): - input_dim = 20 - num_units = 10 - num_classes = 2 - - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(num_units, input_shape=(input_dim,))) - - model = keras.models.Sequential() - model.add(inner_model) - model.add(keras.layers.Dense(num_classes)) - - self.assertEqual(len(model.layers), 2) - - self.assertEqual(len(model.trainable_weights), 4) - inner_model.trainable = False - self.assertEqual(len(model.trainable_weights), 2) - inner_model.trainable = True - self.assertEqual(len(model.trainable_weights), 4) - - @test_combinations.run_all_keras_modes - def test_sequential_update_disabling(self): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - model = keras.models.Sequential() - model.add(keras.layers.BatchNormalization(input_shape=(4,))) - - model.trainable = False - model.compile('sgd', 'mse') - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - self.assertAllClose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile('sgd', 'mse') - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 - - @test_combinations.run_all_keras_modes - def test_sequential_deferred_build_serialization(self): - num_hidden = 5 - input_dim = 3 - batch_size = 5 - num_classes = 2 - - model = test_utils.get_small_sequential_mlp(num_hidden, num_classes) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=[keras.metrics.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - self.assertFalse(model.built) - - x = np.random.random((batch_size, input_dim)) - y = np.random.random((batch_size, num_classes)) - model.train_on_batch(x, y) - self.assertTrue(model.built) - - config = model.get_config() - new_model = keras.models.Sequential.from_config(config) - new_model.compile( - loss='mse', - optimizer='rmsprop', - metrics=[keras.metrics.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - x = np.random.random((batch_size, input_dim)) - y = np.random.random((batch_size, num_classes)) - new_model.train_on_batch(x, y) - self.assertEqual(len(new_model.layers), 2) - self.assertEqual(len(new_model.weights), 4) - - @test_combinations.run_all_keras_modes - def test_sequential_shape_inference_deferred(self): - model = test_utils.get_small_sequential_mlp(4, 5) - output_shape = model.compute_output_shape((None, 7)) - self.assertEqual(tuple(output_shape.as_list()), (None, 5)) - - @test_combinations.run_all_keras_modes - def test_sequential_build_deferred(self): - model = test_utils.get_small_sequential_mlp(4, 5) - - model.build((None, 10)) - self.assertTrue(model.built) - self.assertEqual(len(model.weights), 4) - - # Test with nested model - model = test_utils.get_small_sequential_mlp(4, 3) - inner_model = test_utils.get_small_sequential_mlp(4, 5) - model.add(inner_model) - - model.build((None, 10)) - self.assertTrue(model.built) - self.assertEqual(len(model.weights), 8) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_sequential_deferred_manual_build(self): - model = test_utils.get_small_sequential_mlp(4, 5) - self.assertFalse(model.built) - model(tf.zeros([1, 2])) - self.assertTrue(model.built) - model.compile( - 'rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((1, 2)), np.zeros((1, 5))) - - @test_combinations.run_all_keras_modes - def test_sequential_nesting(self): - model = test_utils.get_small_sequential_mlp(4, 3) - inner_model = test_utils.get_small_sequential_mlp(4, 5) - model.add(inner_model) - - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - x = np.random.random((2, 6)) - y = np.random.random((2, 5)) - model.fit(x, y, epochs=1) - - @tf_test_utils.run_v1_only('Behavior changed in V2.') - def test_variable_names_deferred(self): - model = keras.models.Sequential([keras.layers.Dense(3)]) - model.add(keras.layers.Dense(2)) - model(tf.ones([2, 4])) - # Note that for regular sequential models (wrapping graph network), - # the layers' weights are built - # without the model name as prefix (because the Functional API __call__ - # reset the name scope). This is fixable, but it would be - # backwards incompatible. - self.assertEqual( - ['sequential/dense/kernel:0', 'sequential/dense/bias:0', - 'sequential/dense_1/kernel:0', 'sequential/dense_1/bias:0'], - [v.name for v in model.variables]) - - @test_combinations.run_all_keras_modes - def test_input_assumptions_propagation(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1)) - if tf.executing_eagerly(): - with self.assertRaisesRegex(ValueError, - 'expected min_ndim=2, found ndim=0'): - model(1.0) - - @test_combinations.run_all_keras_modes - def test_string_input(self): - seq = keras.Sequential([ - keras.layers.InputLayer(input_shape=(1,), dtype=tf.string), - keras.layers.Lambda(lambda x: x[0]) - ]) - seq.run_eagerly = test_utils.should_run_eagerly() - preds = seq.predict([['tensorflow eager']]) - self.assertEqual(preds.shape, (1,)) - - @test_combinations.run_all_keras_modes - def test_multi_output_layer_not_accepted(self): - - class MultiOutputLayer(keras.layers.Layer): - - def call(self, inputs): - return inputs, inputs - - with self.assertRaisesRegex(ValueError, - 'should have a single output tensor'): - keras.Sequential([MultiOutputLayer(input_shape=(3,))]) - - with self.assertRaisesRegex(ValueError, - 'should have a single output tensor'): - keras.Sequential([ - keras.layers.Dense(1, input_shape=(3,)), - MultiOutputLayer()]) - - # Should also raise error in a deferred build mode - with self.assertRaisesRegex(ValueError, - 'should have a single output tensor'): - keras.Sequential([MultiOutputLayer()])(np.zeros((10, 10))) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_layer_add_after_compile_deferred(self): - model = keras.Sequential([keras.layers.Dense(3)]) - self.assertFalse(model.built) - - model.compile('adam', loss='mse') - model.fit(np.random.random((1, 3)), np.random.random((1, 3))) - self.assertTrue(model.built) - - model.add(keras.layers.Dense(3)) - - model.compile('adam', loss='mse') - model.fit(np.random.random((1, 3)), np.random.random((1, 3))) - self.assertTrue(model.built) - - def test_sequential_layer_tracking(self): - """Test that Sequential only tracks layers added in init or `.add`.""" - layer = keras.layers.Dense(1) - model = keras.Sequential([layer]) - self.assertEqual( - list(model._flatten_layers(include_self=False, recursive=False))[-1], - layer) - - model.a = [keras.layers.Dense(3)] # should not be added to the layers list. - self.assertEqual( - list(model._flatten_layers(include_self=False, recursive=False))[-1], - layer) - - layer2 = keras.layers.Dense(2) - model.add(layer2) - self.assertEqual( - list(model._flatten_layers(include_self=False, recursive=False))[-1], - layer2) - - model.a = [keras.layers.Dense(3)] # should not be added to the layers list. - self.assertEqual( - list(model._flatten_layers(include_self=False, recursive=False))[-1], - layer2) - - model.pop() - self.assertEqual( - list(model._flatten_layers(include_self=False, recursive=False))[-1], - layer) - - def test_config_preserves_input_layer(self): - model = keras.Sequential([ - keras.Input((None,), name='my_embedding_input', dtype='int32'), - keras.layers.Embedding(32, 32), - keras.layers.Dense(3), - ]) - config = model.get_config() - new_model = keras.Sequential.from_config(config) - self.assertTrue(new_model.built) - layers = list( - new_model._flatten_layers(include_self=False, recursive=False)) - self.assertEqual(layers[0].dtype, 'int32') - self.assertEqual(layers[0].name, 'my_embedding_input') - - def test_name_unicity(self): - model = keras.Sequential() - model.add(keras.layers.Dense(3, name='specific_name')) - with self.assertRaisesRegex(ValueError, 'should have unique names'): - model.add(keras.layers.Dense(3, name='specific_name')) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_tf_module_call(self): - - class MyModule(tf.Module): - - def __init__(self): - self.v = tf.Variable(2.) - - def __call__(self, x): - return self.v * x - - model = keras.Sequential() - model.add(MyModule()) - model.compile('sgd', 'mse') - x, y = np.ones((10, 1)), np.ones((10, 1)) - model.fit(x, y, batch_size=2) - self.assertLen(model.trainable_variables, 1) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_tf_module_training(self): - - class MyModule(tf.Module): - - def __init__(self): - self.v = tf.Variable(2.) - - def call(self, x, training=None): - # training should be set by Sequential. - assert training is not None - return self.v * x - - model = keras.Sequential() - model.add(MyModule()) - model.compile('sgd', 'mse') - x, y = np.ones((10, 1)), np.ones((10, 1)) - model.fit(x, y, batch_size=2) - self.assertLen(model.trainable_variables, 1) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_tf_module_error(self): - - class MyModule(tf.Module): - - def __init__(self): - self.v = tf.Variable(2.) - - model = keras.Sequential() - with self.assertRaisesRegex(ValueError, 'is not defined'): - model.add(MyModule()) + loss="mse", + optimizer="rmsprop", + metrics=[keras.metrics.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertFalse(model.built) + + x = np.random.random((batch_size, input_dim)) + y = np.random.random((batch_size, num_classes)) + model.train_on_batch(x, y) + self.assertTrue(model.built) + + config = model.get_config() + new_model = keras.models.Sequential.from_config(config) + new_model.compile( + loss="mse", + optimizer="rmsprop", + metrics=[keras.metrics.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.random.random((batch_size, input_dim)) + y = np.random.random((batch_size, num_classes)) + new_model.train_on_batch(x, y) + self.assertEqual(len(new_model.layers), 2) + self.assertEqual(len(new_model.weights), 4) + + @test_combinations.run_all_keras_modes + def test_sequential_shape_inference_deferred(self): + model = test_utils.get_small_sequential_mlp(4, 5) + output_shape = model.compute_output_shape((None, 7)) + self.assertEqual(tuple(output_shape.as_list()), (None, 5)) + + @test_combinations.run_all_keras_modes + def test_sequential_build_deferred(self): + model = test_utils.get_small_sequential_mlp(4, 5) + + model.build((None, 10)) + self.assertTrue(model.built) + self.assertEqual(len(model.weights), 4) + + # Test with nested model + model = test_utils.get_small_sequential_mlp(4, 3) + inner_model = test_utils.get_small_sequential_mlp(4, 5) + model.add(inner_model) + + model.build((None, 10)) + self.assertTrue(model.built) + self.assertEqual(len(model.weights), 8) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_sequential_deferred_manual_build(self): + model = test_utils.get_small_sequential_mlp(4, 5) + self.assertFalse(model.built) + model(tf.zeros([1, 2])) + self.assertTrue(model.built) + model.compile( + "rmsprop", loss="mse", run_eagerly=test_utils.should_run_eagerly() + ) + model.train_on_batch(np.zeros((1, 2)), np.zeros((1, 5))) + @test_combinations.run_all_keras_modes + def test_sequential_nesting(self): + model = test_utils.get_small_sequential_mlp(4, 3) + inner_model = test_utils.get_small_sequential_mlp(4, 5) + model.add(inner_model) -class TestSequentialEagerIntegration(test_combinations.TestCase): + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.random.random((2, 6)) + y = np.random.random((2, 5)) + model.fit(x, y, epochs=1) + + @tf_test_utils.run_v1_only("Behavior changed in V2.") + def test_variable_names_deferred(self): + model = keras.models.Sequential([keras.layers.Dense(3)]) + model.add(keras.layers.Dense(2)) + model(tf.ones([2, 4])) + # Note that for regular sequential models (wrapping graph network), + # the layers' weights are built + # without the model name as prefix (because the Functional API __call__ + # reset the name scope). This is fixable, but it would be + # backwards incompatible. + self.assertEqual( + [ + "sequential/dense/kernel:0", + "sequential/dense/bias:0", + "sequential/dense_1/kernel:0", + "sequential/dense_1/bias:0", + ], + [v.name for v in model.variables], + ) + + @test_combinations.run_all_keras_modes + def test_input_assumptions_propagation(self): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1)) + if tf.executing_eagerly(): + with self.assertRaisesRegex( + ValueError, "expected min_ndim=2, found ndim=0" + ): + model(1.0) + + @test_combinations.run_all_keras_modes + def test_string_input(self): + seq = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(1,), dtype=tf.string), + keras.layers.Lambda(lambda x: x[0]), + ] + ) + seq.run_eagerly = test_utils.should_run_eagerly() + preds = seq.predict([["tensorflow eager"]]) + self.assertEqual(preds.shape, (1,)) + + @test_combinations.run_all_keras_modes + def test_multi_output_layer_not_accepted(self): + class MultiOutputLayer(keras.layers.Layer): + def call(self, inputs): + return inputs, inputs + + with self.assertRaisesRegex( + ValueError, "should have a single output tensor" + ): + keras.Sequential([MultiOutputLayer(input_shape=(3,))]) + + with self.assertRaisesRegex( + ValueError, "should have a single output tensor" + ): + keras.Sequential( + [keras.layers.Dense(1, input_shape=(3,)), MultiOutputLayer()] + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_layer_add_after_compile_deferred(self): + model = keras.Sequential([keras.layers.Dense(3)]) + self.assertFalse(model.built) + + model.compile("adam", loss="mse") + model.fit(np.random.random((1, 3)), np.random.random((1, 3))) + self.assertTrue(model.built) + + model.add(keras.layers.Dense(3)) + + model.compile("adam", loss="mse") + model.fit(np.random.random((1, 3)), np.random.random((1, 3))) + self.assertTrue(model.built) + + def test_sequential_layer_tracking(self): + """Test that Sequential only tracks layers added in init or `.add`.""" + layer = keras.layers.Dense(1) + model = keras.Sequential([layer]) + self.assertEqual( + list(model._flatten_layers(include_self=False, recursive=False))[ + -1 + ], + layer, + ) + + model.a = [ + keras.layers.Dense(3) + ] # should not be added to the layers list. + self.assertEqual( + list(model._flatten_layers(include_self=False, recursive=False))[ + -1 + ], + layer, + ) + + layer2 = keras.layers.Dense(2) + model.add(layer2) + self.assertEqual( + list(model._flatten_layers(include_self=False, recursive=False))[ + -1 + ], + layer2, + ) + + model.a = [ + keras.layers.Dense(3) + ] # should not be added to the layers list. + self.assertEqual( + list(model._flatten_layers(include_self=False, recursive=False))[ + -1 + ], + layer2, + ) + + model.pop() + self.assertEqual( + list(model._flatten_layers(include_self=False, recursive=False))[ + -1 + ], + layer, + ) + + def test_config_preserves_input_layer(self): + model = keras.Sequential( + [ + keras.Input((None,), name="my_embedding_input", dtype="int32"), + keras.layers.Embedding(32, 32), + keras.layers.Dense(3), + ] + ) + config = model.get_config() + new_model = keras.Sequential.from_config(config) + self.assertTrue(new_model.built) + layers = list( + new_model._flatten_layers(include_self=False, recursive=False) + ) + self.assertEqual(layers[0].dtype, "int32") + self.assertEqual(layers[0].name, "my_embedding_input") + + def test_name_unicity(self): + model = keras.Sequential() + model.add(keras.layers.Dense(3, name="specific_name")) + with self.assertRaisesRegex(ValueError, "should have unique names"): + model.add(keras.layers.Dense(3, name="specific_name")) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_tf_module_call(self): + class MyModule(tf.Module): + def __init__(self): + self.v = tf.Variable(2.0) + + def __call__(self, x): + return self.v * x + + model = keras.Sequential() + model.add(MyModule()) + model.compile("sgd", "mse") + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, batch_size=2) + self.assertLen(model.trainable_variables, 1) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_tf_module_training(self): + class MyModule(tf.Module): + def __init__(self): + self.v = tf.Variable(2.0) + + def call(self, x, training=None): + # training should be set by Sequential. + assert training is not None + return self.v * x + + model = keras.Sequential() + model.add(MyModule()) + model.compile("sgd", "mse") + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, batch_size=2) + self.assertLen(model.trainable_variables, 1) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_tf_module_error(self): + class MyModule(tf.Module): + def __init__(self): + self.v = tf.Variable(2.0) + + model = keras.Sequential() + with self.assertRaisesRegex(ValueError, "is not defined"): + model.add(MyModule()) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_multi_inputs_outputs(self): + model = keras.Sequential( + [ + ImageAugmentLayer(), + ImageAugmentLayer(), + ] + ) + + image_inputs = tf.ones((2, 512, 512, 3)) + label_inputs = tf.ones((2, 2)) + + output = model({"images": image_inputs, "labels": label_inputs}) + self.assertAllClose(output["images"], image_inputs) + self.assertAllClose(output["labels"], label_inputs) + + model.compile(loss="mse") + model.fit( + x={"images": image_inputs, "labels": label_inputs}, + y={"images": image_inputs, "labels": label_inputs}, + steps_per_epoch=1, + ) + self.assertIsNone(model.inputs) + self.assertIsNone(model.outputs) + + # Use the same model with image input only + model({"images": image_inputs}) + model.fit( + x={"images": image_inputs}, + y={"images": image_inputs}, + steps_per_epoch=1, + ) + + model(image_inputs) + model.fit(x=image_inputs, y=image_inputs, steps_per_epoch=1) - @test_combinations.run_all_keras_modes - def test_defun_on_call(self): - # Check that one can subclass Sequential and place the `call` in a `defun`. - class MySequential(keras.Sequential): +class TestSequentialEagerIntegration(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + def test_defun_on_call(self): + # Check that one can subclass Sequential and place the `call` in a + # `defun`. - def __init__(self, name=None): - super().__init__(name=name) - self.call = tf.function(self.call) + class MySequential(keras.Sequential): + def __init__(self, name=None): + super().__init__(name=name) + self.call = tf.function(self.call) - model = MySequential() - model.add(keras.layers.Dense(4, activation='relu')) - model.add(keras.layers.Dense(5, activation='softmax')) + model = MySequential() + model.add(keras.layers.Dense(4, activation="relu")) + model.add(keras.layers.Dense(5, activation="softmax")) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.random.random((2, 6)) + y = np.random.random((2, 5)) + model.fit(x, y, epochs=1) + + @test_combinations.run_all_keras_modes + def test_build_before_fit(self): + # Fix for b/112433577 + model = test_utils.get_small_sequential_mlp(4, 5) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) - x = np.random.random((2, 6)) - y = np.random.random((2, 5)) - model.fit(x, y, epochs=1) + model.build((None, 6)) - @test_combinations.run_all_keras_modes - def test_build_before_fit(self): - # Fix for b/112433577 - model = test_utils.get_small_sequential_mlp(4, 5) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) + x = np.random.random((2, 6)) + y = np.random.random((2, 5)) + model.fit(x, y, epochs=1) - model.build((None, 6)) + @test_combinations.run_all_keras_modes + def test_build_empty_network(self): + x = np.random.random((2, 6)) + y = np.random.random((2, 5)) + model = keras.Sequential() - x = np.random.random((2, 6)) - y = np.random.random((2, 5)) - model.fit(x, y, epochs=1) + # Make sure an empty sequential model can still work with build(). + model.build((None, 6)) + self.assertTrue(model.built) - @test_combinations.run_all_keras_modes - def test_build_empty_network(self): - x = np.random.random((2, 6)) - y = np.random.random((2, 5)) - model = keras.Sequential() + model.add(keras.layers.Dense(5, input_shape=(6,))) - # Make sure an empty sequential model can still work with build(). - model.build((None, 6)) - self.assertTrue(model.built) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y) - model.add(keras.layers.Dense(5, input_shape=(6,))) + model.pop() + self.assertFalse(model.built) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y) + model.build((None, 6)) + self.assertTrue(model.built) - model.pop() - self.assertFalse(model.built) - model.build((None, 6)) - self.assertTrue(model.built) +class ImageAugmentLayer(keras.layers.Layer): + def call(self, inputs): + return inputs -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training.py b/keras/engine/training.py index 510d8c2d5fb5..f8b2dbcfa2a3 100644 --- a/keras/engine/training.py +++ b/keras/engine/training.py @@ -17,13 +17,22 @@ import copy import itertools import json -import os import warnings import weakref +import numpy as np +import tensorflow.compat.v2 as tf +from tensorflow.python.distribute import distribute_utils +from tensorflow.python.distribute import input_ops +from tensorflow.python.eager import context +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.tf_export import keras_export +from tensorflow.tools.docs import doc_controls + from keras import backend from keras import callbacks as callbacks_module from keras import optimizers +from keras.dtensor import dtensor_api from keras.dtensor import layout_map as layout_map_lib from keras.engine import base_layer from keras.engine import base_layer_utils @@ -31,3559 +40,4439 @@ from keras.engine import data_adapter from keras.engine import input_layer as input_layer_module from keras.engine import training_utils +from keras.metrics import base_metric from keras.mixed_precision import loss_scale_optimizer as lso +from keras.optimizers import optimizer from keras.optimizers import optimizer_v1 -from keras.optimizers.optimizer_experimental import optimizer as optimizer_experimental -from keras.saving import hdf5_format from keras.saving import pickle_utils -from keras.saving import save -from keras.saving import saving_utils -from keras.saving.experimental import saving_lib -from keras.saving.saved_model import json_utils -from keras.saving.saved_model import model_serialization +from keras.saving import saving_api +from keras.saving import saving_lib +from keras.saving import serialization_lib +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import json_utils +from keras.saving.legacy.saved_model import model_serialization from keras.utils import generic_utils from keras.utils import io_utils from keras.utils import layer_utils +from keras.utils import steps_per_execution_tuning +from keras.utils import tf_inspect from keras.utils import tf_utils from keras.utils import traceback_utils from keras.utils import version_utils from keras.utils.mode_keys import ModeKeys -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.python.eager import context -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util.tf_export import keras_export -from tensorflow.tools.docs import doc_controls - -# pylint: disable=g-import-not-at-top try: - import h5py + import h5py except ImportError: - h5py = None -# pylint: enable=g-import-not-at-top + h5py = None -@keras_export('keras.Model', 'keras.models.Model') +@keras_export("keras.Model", "keras.models.Model") class Model(base_layer.Layer, version_utils.ModelVersionSelector): - """`Model` groups layers into an object with training and inference features. - - Args: - inputs: The input(s) of the model: a `keras.Input` object or list of - `keras.Input` objects. - outputs: The output(s) of the model. See Functional API example below. - name: String, the name of the model. - - There are two ways to instantiate a `Model`: - - 1 - With the "Functional API", where you start from `Input`, - you chain layer calls to specify the model's forward pass, - and finally you create your model from inputs and outputs: - - ```python - import tensorflow as tf - - inputs = tf.keras.Input(shape=(3,)) - x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) - outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x) - model = tf.keras.Model(inputs=inputs, outputs=outputs) - ``` - - Note: Only dicts, lists, and tuples of input tensors are supported. Nested - inputs are not supported (e.g. lists of list or dicts of dict). - - A new Functional API model can also be created by using the - intermediate tensors. This enables you to quickly extract sub-components - of the model. - - Example: - - ```python - inputs = keras.Input(shape=(None, None, 3)) - processed = keras.layers.RandomCrop(width=32, height=32)(inputs) - conv = keras.layers.Conv2D(filters=2, kernel_size=3)(processed) - pooling = keras.layers.GlobalAveragePooling2D()(conv) - feature = keras.layers.Dense(10)(pooling) - - full_model = keras.Model(inputs, feature) - backbone = keras.Model(processed, conv) - activations = keras.Model(conv, feature) - ``` - - Note that the `backbone` and `activations` models are not - created with `keras.Input` objects, but with the tensors that are originated - from `keras.Inputs` objects. Under the hood, the layers and weights will - be shared across these models, so that user can train the `full_model`, and - use `backbone` or `activations` to do feature extraction. - The inputs and outputs of the model can be nested structures of tensors as - well, and the created models are standard Functional API models that support - all the existing APIs. - - 2 - By subclassing the `Model` class: in that case, you should define your - layers in `__init__()` and you should implement the model's forward pass - in `call()`. - - ```python - import tensorflow as tf - - class MyModel(tf.keras.Model): - - def __init__(self): - super().__init__() - self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) - self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) - - def call(self, inputs): - x = self.dense1(inputs) - return self.dense2(x) - - model = MyModel() - ``` - - If you subclass `Model`, you can optionally have - a `training` argument (boolean) in `call()`, which you can use to specify - a different behavior in training and inference: - - ```python - import tensorflow as tf - - class MyModel(tf.keras.Model): - - def __init__(self): - super().__init__() - self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) - self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) - self.dropout = tf.keras.layers.Dropout(0.5) - - def call(self, inputs, training=False): - x = self.dense1(inputs) - if training: - x = self.dropout(x, training=training) - return self.dense2(x) - - model = MyModel() - ``` - - Once the model is created, you can config the model with losses and metrics - with `model.compile()`, train the model with `model.fit()`, or use the model - to do prediction with `model.predict()`. - """ - _TF_MODULE_IGNORED_PROPERTIES = frozenset( - itertools.chain(('_train_counter', '_test_counter', '_predict_counter', - '_steps_per_execution'), - base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES)) # pylint: disable=protected-access - _SCALAR_UPRANKING_ON = False - - def __new__(cls, *args, **kwargs): - # Signature detection - if is_functional_model_init_params(args, kwargs) and cls == Model: - # Functional model - from keras.engine import functional # pylint: disable=g-import-not-at-top - return functional.Functional(skip_init=True, *args, **kwargs) - else: - return super(Model, cls).__new__(cls, *args, **kwargs) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - @traceback_utils.filter_traceback - def __init__(self, *args, **kwargs): - self._is_model_for_instrumentation = True - base_layer.keras_api_gauge.get_cell('model').set(True) - - # Special case for Subclassed Functional Model, which we couldn't detect - # when __new__ is called. We only realize it is a functional model when it - # calls super.__init__ with input and output tensor. - from keras.engine import functional # pylint: disable=g-import-not-at-top - if (is_functional_model_init_params(args, kwargs) and - not isinstance(self, functional.Functional)): - # Filter the kwargs for multiple inheritance. - supported_kwargs = ['inputs', 'outputs', 'name', 'trainable', 'skip_init'] - model_kwargs = {k: kwargs[k] for k in kwargs if k in supported_kwargs} - other_kwargs = {k: kwargs[k] for k in kwargs if k not in supported_kwargs} - inject_functional_model_class(self.__class__) - functional.Functional.__init__(self, *args, **model_kwargs) - - # In case there is any multiple inheritance here, we need to call the - # __init__ for any class that appears after the Functional class. - clz_to_init = [] - found_functional_class = False - for clz in self.__class__.__bases__: - if issubclass(clz, functional.Functional): - found_functional_class = True - continue - if found_functional_class: - clz_to_init.append(clz) - - if clz_to_init: - for clz in clz_to_init: - clz.__init__(self, *args, **other_kwargs) - elif other_kwargs: - # In case there are unused kwargs, we should raise an error to user, in - # case they have a typo in the param name. - raise TypeError( - 'The following keyword arguments passed to `Model` aren\'t ' - 'supported: {}.'.format(other_kwargs)) - return - - base_layer.keras_api_gauge.get_cell('Model subclass').set(True) - # The following are implemented as property functions: - # self.trainable_weights - # self.non_trainable_weights - # `inputs` / `outputs` will only appear in kwargs if either are misspelled. - generic_utils.validate_kwargs(kwargs, { - 'trainable', 'dtype', 'dynamic', 'name', 'autocast', 'inputs', 'outputs' - }) - super().__init__(**kwargs) - # By default, Model is a subclass model, which is not in graph network. - self._is_graph_network = False - - self.inputs = None - self.outputs = None - self.input_names = None - self.output_names = None - # stop_training is used by callback to stop training when error happens - self.stop_training = False - self.history = None - # These objects are used in the default `Model.compile`. They are not - # guaranteed to be set after `Model.compile` is called, as users can - # override compile with custom logic. - self.compiled_loss = None - self.compiled_metrics = None - - # This is True for Sequential networks and Functional networks. - self._compute_output_and_mask_jointly = False - - # Don't reset compilation if already done. This may occur if calling - # `__init__` (or `_init_graph_network`) on an already-compiled model - # such as a Sequential model. Sequential models may need to rebuild - # themselves after compilation. - self._maybe_create_attribute('_is_compiled', False) - self._maybe_create_attribute('optimizer', None) - - # Model must be created under scope of DistStrat it will be trained with. - if tf.distribute.has_strategy(): - self._distribution_strategy = tf.distribute.get_strategy() - else: - self._distribution_strategy = None - - self._cluster_coordinator = None - - # Defaults to value of `tf.config.experimental_functions_run_eagerly`. - self._run_eagerly = None - # Initialize cache attrs. - self._reset_compile_cache() - - # Fault-tolerance handler. Set in `ModelCheckpoint`. - self._training_state = None - self._saved_model_inputs_spec = None - self._saved_model_arg_spec = None - self._checkpoint = tf.train.Checkpoint(root=weakref.ref(self)) - - self._steps_per_execution = None - - self._init_batch_counters() - self._base_model_initialized = True - - # `jit_compile` starts off with None as default and gets overwritten by the - # value specified in `Model.compile`, and this is effective for `fit`, - # `evaluate`, and `predict`. - self._jit_compile = None - - self._layout_map = layout_map_lib.get_current_layout_map() - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _init_batch_counters(self): - # Untracked Variables, used to keep track of mini-batches seen in `fit`, - # `evaluate`, and `predict`. - agg = tf.VariableAggregation.ONLY_FIRST_REPLICA - self._train_counter = tf.Variable(0, dtype='int64', aggregation=agg) - self._test_counter = tf.Variable(0, dtype='int64', aggregation=agg) - self._predict_counter = tf.Variable( - 0, dtype='int64', aggregation=agg) - - def __setattr__(self, name, value): - if not getattr(self, '_self_setattr_tracking', True): - super().__setattr__(name, value) - return - - if all( - isinstance(v, (base_layer.Layer, tf.Variable)) or - base_layer_utils.has_weights(v) for v in tf.nest.flatten(value)): - try: - self._base_model_initialized - except AttributeError: - raise RuntimeError( - 'It looks like you are subclassing `Model` and you ' - 'forgot to call `super().__init__()`.' - ' Always start with this line.') - - super().__setattr__(name, value) - - def __reduce__(self): - if self.built: - return (pickle_utils.deserialize_model_from_bytecode, - pickle_utils.serialize_model_as_bytecode(self)) - else: - # SavedModel (and hence serialize_model_as_bytecode) only support - # built models, but if the model is not built, - # it may be possible to serialize as a plain Python object, - # as long as the constituent parts (layers, optimizers, losses, etc.) - # can be serialized as plain Python objects. - # Thus we call up the superclass hierarchy to get an implementation of - # __reduce__ that can pickle this Model as a plain Python object. - return super().__reduce__() - - def __deepcopy__(self, memo): - if self.built: - new = pickle_utils.deserialize_model_from_bytecode( - *pickle_utils.serialize_model_as_bytecode(self)) - memo[id(self)] = new - else: - # See comment in __reduce__ for explanation - deserializer, serialized, *rest = super().__reduce__() - new = deserializer(*serialized) - memo[id(self)] = new - if rest: - state = copy.deepcopy(rest[0], memo=memo) - new.__setstate__(state) - return new - - def __copy__(self): - return self.__deepcopy__({}) - - @generic_utils.default - def build(self, input_shape): - """Builds the model based on input shapes received. - - This is to be used for subclassed models, which do not know at instantiation - time what their inputs look like. - - This method only exists for users who want to call `model.build()` in a - standalone way (as a substitute for calling the model on real data to - build it). It will never be called by the framework (and thus it will - never throw unexpected errors in an unrelated workflow). + """A model grouping layers into an object with training/inference features. Args: - input_shape: Single tuple, `TensorShape` instance, or list/dict of shapes, - where shapes are tuples, integers, or `TensorShape` instances. + inputs: The input(s) of the model: a `keras.Input` object or a + combination of `keras.Input` objects in a dict, list or tuple. + outputs: The output(s) of the model: a tensor that originated from + `keras.Input` objects or a combination of such tensors in a dict, + list or tuple. See Functional API example below. + name: String, the name of the model. - Raises: - ValueError: - 1. In case of invalid user-provided data (not of type tuple, - list, `TensorShape`, or dict). - 2. If the model requires call arguments that are agnostic - to the input shapes (positional or keyword arg in call signature). - 3. If not all layers were properly built. - 4. If float type inputs are not supported within the layers. - - In each of these cases, the user should build their model by calling it - on real tensor data. - """ - if self._is_graph_network: - super().build(input_shape) - return - - if input_shape is None: - raise ValueError('Input shape must be defined when calling `build()` on ' - 'a `Model` subclass.') - valid_types = (tuple, list, tf.TensorShape, dict) - if not isinstance(input_shape, valid_types): - raise ValueError('Specified input shape is not one of the valid types. ' - 'Please specify a batch input shape of type tuple or ' - 'list of input shapes. User provided ' - 'input type: {}.'.format(type(input_shape))) - - if input_shape and not self.inputs: - # We create placeholders for the `None`s in the shape and build the model - # in a Graph. Since tf.Variable is compatible with both eager execution - # and graph building, the variables created after building the model in - # a Graph are still valid when executing eagerly. - if tf.executing_eagerly(): - graph = tf.__internal__.FuncGraph('build_graph') - else: - graph = backend.get_graph() - with graph.as_default(): - if (isinstance(input_shape, list) and - all(d is None or isinstance(d, int) for d in input_shape)): - input_shape = tuple(input_shape) - if isinstance(input_shape, list): - x = [base_layer_utils.generate_placeholders_from_shape(shape) - for shape in input_shape] - elif isinstance(input_shape, dict): - x = { - k: base_layer_utils.generate_placeholders_from_shape(shape) - for k, shape in input_shape.items() - } - else: - x = base_layer_utils.generate_placeholders_from_shape(input_shape) - - kwargs = {} - call_signature = self._call_spec.full_argspec - call_args = call_signature.args - # Exclude `self`, `inputs`, and any argument with a default value. - if len(call_args) > 2: - if call_signature.defaults: - call_args = call_args[2:-len(call_signature.defaults)] - else: - call_args = call_args[2:] - for arg in call_args: - if arg == 'training': - # Case where `training` is a positional arg with no default. - kwargs['training'] = False - else: - # Has invalid call signature with unknown positional arguments. - raise ValueError( - 'Currently, you cannot build your model if it has ' - 'positional or keyword arguments that are not ' - 'inputs to the model, but are required for its ' - '`call()` method. Instead, in order to instantiate ' - 'and build your model, `call()` your model on real ' - 'tensor data with all expected call arguments. The argument ' - 'for `call()` can be a single list/tuple that contains ' - 'multiple inputs.') - elif len(call_args) < 2: - # Signature without `inputs`. - raise ValueError( - 'You can only call `build()` on a model if its `call()` ' - 'method accepts an `inputs` argument.') - try: - self.call(x, **kwargs) - except (tf.errors.InvalidArgumentError, TypeError) as e: - raise ValueError('You cannot build your model by calling `build` ' - 'if your layers do not support float type inputs. ' - 'Instead, in order to instantiate and build your ' - 'model, call your model on real tensor data (of ' - 'the correct dtype).\n\nThe actual error from ' - f'`call` is: {e}.') - super().build(input_shape) - - @traceback_utils.filter_traceback - def __call__(self, *args, **kwargs): - if self._layout_map is not None and not self.built: - # Note that this method is only overridden for DTensor and layout - # injection purpose. - # Capture the inputs and create graph input as replacement for model - # to initialize its weights first. - copied_args = copy.copy(args) - copied_kwargs = copy.copy(kwargs) - - inputs, copied_args, copied_kwargs = self._call_spec.split_out_first_arg( - copied_args, copied_kwargs) - - def _convert_to_graph_inputs(x): - if isinstance(x, (tf.Tensor, np.ndarray, float, int)): - x = tf.convert_to_tensor(x) - return input_layer_module.Input(x.shape) - - # TODO(scottzhu): maybe better handle mask and training flag. - inputs = tf.nest.map_structure(_convert_to_graph_inputs, inputs) - copied_args = tf.nest.map_structure(_convert_to_graph_inputs, copied_args) - copied_kwargs = tf.nest.map_structure( - _convert_to_graph_inputs, copied_kwargs) - - # pylint: disable=g-import-not-at-top - with layout_map_lib.layout_map_scope(self._layout_map): - # We ignore the result here. - super().__call__(inputs, *copied_args, **copied_kwargs) - - layout_map_lib._map_subclass_model_variable(self, self._layout_map) - - return super().__call__(*args, **kwargs) - - @doc_controls.doc_in_current_and_subclasses - def call(self, inputs, training=None, mask=None): - """Calls the model on new inputs and returns the outputs as tensors. - - In this case `call()` just reapplies - all ops in the graph to the new inputs - (e.g. build a new computational graph from the provided inputs). - - Note: This method should not be called directly. It is only meant to be - overridden when subclassing `tf.keras.Model`. - To call a model on an input, always use the `__call__()` method, - i.e. `model(inputs)`, which relies on the underlying `call()` method. + There are two ways to instantiate a `Model`: - Args: - inputs: Input tensor, or dict/list/tuple of input tensors. - training: Boolean or boolean scalar tensor, indicating whether to run - the `Network` in training mode or inference mode. - mask: A mask or list of masks. A mask can be either a boolean tensor or - None (no mask). For more details, check the guide - [here](https://www.tensorflow.org/guide/keras/masking_and_padding). - - Returns: - A tensor if there is a single output, or - a list of tensors if there are more than one outputs. - """ - raise NotImplementedError('Unimplemented `tf.keras.Model.call()`: if you ' - 'intend to create a `Model` with the Functional ' - 'API, please provide `inputs` and `outputs` ' - 'arguments. Otherwise, subclass `Model` with an ' - 'overridden `call()` method.') - - @traceback_utils.filter_traceback - def compile(self, - optimizer='rmsprop', - loss=None, - metrics=None, - loss_weights=None, - weighted_metrics=None, - run_eagerly=None, - steps_per_execution=None, - jit_compile=None, - **kwargs): - """Configures the model for training. - - Example: + 1 - With the "Functional API", where you start from `Input`, + you chain layer calls to specify the model's forward pass, + and finally you create your model from inputs and outputs: ```python - model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), - loss=tf.keras.losses.BinaryCrossentropy(), - metrics=[tf.keras.metrics.BinaryAccuracy(), - tf.keras.metrics.FalseNegatives()]) - ``` + import tensorflow as tf - Args: - optimizer: String (name of optimizer) or optimizer instance. See - `tf.keras.optimizers`. - loss: Loss function. May be a string (name of loss function), or - a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss - function is any callable with the signature `loss = fn(y_true, - y_pred)`, where `y_true` are the ground truth values, and - `y_pred` are the model's predictions. - `y_true` should have shape - `(batch_size, d0, .. dN)` (except in the case of - sparse loss functions such as - sparse categorical crossentropy which expects integer arrays of shape - `(batch_size, d0, .. dN-1)`). - `y_pred` should have shape `(batch_size, d0, .. dN)`. - The loss function should return a float tensor. - If a custom `Loss` instance is - used and reduction is set to `None`, return value has shape - `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss - values; otherwise, it is a scalar. If the model has multiple outputs, - you can use a different loss on each output by passing a dictionary - or a list of losses. The loss value that will be minimized by the - model will then be the sum of all individual losses, unless - `loss_weights` is specified. - metrics: List of metrics to be evaluated by the model during training - and testing. Each of this can be a string (name of a built-in - function), function or a `tf.keras.metrics.Metric` instance. See - `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A - function is any callable with the signature `result = fn(y_true, - y_pred)`. To specify different metrics for different outputs of a - multi-output model, you could also pass a dictionary, such as - `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`. - You can also pass a list to specify a metric or a list of metrics - for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]` - or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the - strings 'accuracy' or 'acc', we convert this to one of - `tf.keras.metrics.BinaryAccuracy`, - `tf.keras.metrics.CategoricalAccuracy`, - `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss - function used and the model output shape. We do a similar - conversion for the strings 'crossentropy' and 'ce' as well. - The metrics passed here are evaluated without sample weighting; if you - would like sample weighting to apply, you can specify your - metrics via the `weighted_metrics` argument instead. - loss_weights: Optional list or dictionary specifying scalar coefficients - (Python floats) to weight the loss contributions of different model - outputs. The loss value that will be minimized by the model will then - be the *weighted sum* of all individual losses, weighted by the - `loss_weights` coefficients. - If a list, it is expected to have a 1:1 mapping to the model's - outputs. If a dict, it is expected to map output names (strings) - to scalar coefficients. - weighted_metrics: List of metrics to be evaluated and weighted by - `sample_weight` or `class_weight` during training and testing. - run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s - logic will not be wrapped in a `tf.function`. Recommended to leave - this as `None` unless your `Model` cannot be run inside a - `tf.function`. `run_eagerly=True` is not supported when using - `tf.distribute.experimental.ParameterServerStrategy`. - steps_per_execution: Int. Defaults to 1. The number of batches to run - during each `tf.function` call. Running multiple batches inside a - single `tf.function` call can greatly improve performance on TPUs or - small models with a large Python overhead. At most, one full epoch - will be run each execution. If a number larger than the size of the - epoch is passed, the execution will be truncated to the size of the - epoch. Note that if `steps_per_execution` is set to `N`, - `Callback.on_batch_begin` and `Callback.on_batch_end` methods will - only be called every `N` batches (i.e. before/after each `tf.function` - execution). - jit_compile: If `True`, compile the model training step with XLA. - [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for - machine learning. - `jit_compile` is not enabled for by default. - This option cannot be enabled with `run_eagerly=True`. - Note that `jit_compile=True` - may not necessarily work for all models. - For more information on supported operations please refer to the - [XLA documentation](https://www.tensorflow.org/xla). - Also refer to - [known XLA issues](https://www.tensorflow.org/xla/known_issues) for - more details. - **kwargs: Arguments supported for backwards compatibility only. - """ - base_layer.keras_api_gauge.get_cell('compile').set(True) - with self.distribute_strategy.scope(): - if 'experimental_steps_per_execution' in kwargs: - logging.warning('The argument `steps_per_execution` is no longer ' - 'experimental. Pass `steps_per_execution` instead of ' - '`experimental_steps_per_execution`.') - if not steps_per_execution: - steps_per_execution = kwargs.pop('experimental_steps_per_execution') - - # When compiling from an already-serialized model, we do not want to - # reapply some processing steps (e.g. metric renaming for multi-output - # models, which have prefixes added for each corresponding output name). - from_serialized = kwargs.pop('from_serialized', False) - - self._validate_compile(optimizer, metrics, **kwargs) - self._run_eagerly = run_eagerly - - self.optimizer = self._get_optimizer(optimizer) - if isinstance(loss, compile_utils.LossesContainer): - self.compiled_loss = loss - else: - self.compiled_loss = compile_utils.LossesContainer( - loss, loss_weights, output_names=self.output_names) - self.compiled_metrics = compile_utils.MetricsContainer( - metrics, weighted_metrics, output_names=self.output_names, - from_serialized=from_serialized) - - self._configure_steps_per_execution(steps_per_execution or 1) - - # Initializes attrs that are reset each time `compile` is called. - self._reset_compile_cache() - self._is_compiled = True - self.loss = loss or {} - if (self._run_eagerly or self.dynamic) and jit_compile: - raise ValueError( - 'You cannot enable `run_eagerly` and `jit_compile` ' - 'at the same time.') - else: - self._jit_compile = jit_compile - - def _get_optimizer(self, optimizer): - """Wraps `optimizer` in `LossScaleOptimizer` if necessary.""" - def _get_single_optimizer(opt): - opt = optimizers.get(opt) - if (self.dtype_policy.name == 'mixed_float16' and - not isinstance(opt, lso.LossScaleOptimizer)): - # Loss scaling is necessary with mixed_float16 for models to converge to - # the same accuracy as with float32. - opt = lso.LossScaleOptimizer(opt) - return opt - - return tf.nest.map_structure(_get_single_optimizer, optimizer) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _reset_compile_cache(self): - self.train_function = None - self.test_function = None - self.predict_function = None - # Used to cache the `tf.function`'ed `train_function` to be logged in - # TensorBoard, since the original `train_function` is not necessarily - # a `tf.function` (e.g., with ParameterServerStrategy, the `train_function` - # is a scheduling of the actual training function to a remote worker). - self.train_tf_function = None - - # Used to cache `trainable` attr of `Layer`s for `fit`. - self._compiled_trainable_state = self._get_trainable_state() - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _configure_steps_per_execution(self, steps_per_execution): - self._steps_per_execution = tf.Variable( - steps_per_execution, - dtype='int64', - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - - @property - def _should_compute_mask(self): - return False + inputs = tf.keras.Input(shape=(3,)) + x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) + outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x) + model = tf.keras.Model(inputs=inputs, outputs=outputs) + ``` - @property - def metrics(self): - """Returns the model's metrics added using `compile()`, `add_metric()` APIs. - - Note: Metrics passed to `compile()` are available only after a `keras.Model` - has been trained/evaluated on actual data. - - Examples: - - >>> inputs = tf.keras.layers.Input(shape=(3,)) - >>> outputs = tf.keras.layers.Dense(2)(inputs) - >>> model = tf.keras.models.Model(inputs=inputs, outputs=outputs) - >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) - >>> [m.name for m in model.metrics] - [] - - >>> x = np.random.random((2, 3)) - >>> y = np.random.randint(0, 2, (2, 2)) - >>> model.fit(x, y) - >>> [m.name for m in model.metrics] - ['loss', 'mae'] - - >>> inputs = tf.keras.layers.Input(shape=(3,)) - >>> d = tf.keras.layers.Dense(2, name='out') - >>> output_1 = d(inputs) - >>> output_2 = d(inputs) - >>> model = tf.keras.models.Model( - ... inputs=inputs, outputs=[output_1, output_2]) - >>> model.add_metric( - ... tf.reduce_sum(output_2), name='mean', aggregation='mean') - >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae", "acc"]) - >>> model.fit(x, (y, y)) - >>> [m.name for m in model.metrics] - ['loss', 'out_loss', 'out_1_loss', 'out_mae', 'out_acc', 'out_1_mae', - 'out_1_acc', 'mean'] + Note: Only dicts, lists, and tuples of input tensors are supported. Nested + inputs are not supported (e.g. lists of list or dicts of dict). - """ - metrics = [] - if self._is_compiled: - # TODO(omalleyt): Track `LossesContainer` and `MetricsContainer` objects - # so that attr names are not load-bearing. - if self.compiled_loss is not None: - metrics += self.compiled_loss.metrics - if self.compiled_metrics is not None: - metrics += self.compiled_metrics.metrics - - for l in self._flatten_layers(): - metrics.extend(l._metrics) # pylint: disable=protected-access - return metrics - - @property - def metrics_names(self): - """Returns the model's display labels for all outputs. - - Note: `metrics_names` are available only after a `keras.Model` has been - trained/evaluated on actual data. - - Examples: - - >>> inputs = tf.keras.layers.Input(shape=(3,)) - >>> outputs = tf.keras.layers.Dense(2)(inputs) - >>> model = tf.keras.models.Model(inputs=inputs, outputs=outputs) - >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) - >>> model.metrics_names - [] - - >>> x = np.random.random((2, 3)) - >>> y = np.random.randint(0, 2, (2, 2)) - >>> model.fit(x, y) - >>> model.metrics_names - ['loss', 'mae'] - - >>> inputs = tf.keras.layers.Input(shape=(3,)) - >>> d = tf.keras.layers.Dense(2, name='out') - >>> output_1 = d(inputs) - >>> output_2 = d(inputs) - >>> model = tf.keras.models.Model( - ... inputs=inputs, outputs=[output_1, output_2]) - >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae", "acc"]) - >>> model.fit(x, (y, y)) - >>> model.metrics_names - ['loss', 'out_loss', 'out_1_loss', 'out_mae', 'out_acc', 'out_1_mae', - 'out_1_acc'] + A new Functional API model can also be created by using the + intermediate tensors. This enables you to quickly extract sub-components + of the model. - """ - - # This property includes all output names including `loss` and per-output - # losses for backward compatibility. - return [m.name for m in self.metrics] + Example: - @property - def distribute_strategy(self): - """The `tf.distribute.Strategy` this model was created under.""" - return self._distribution_strategy or tf.distribute.get_strategy() + ```python + inputs = keras.Input(shape=(None, None, 3)) + processed = keras.layers.RandomCrop(width=32, height=32)(inputs) + conv = keras.layers.Conv2D(filters=2, kernel_size=3)(processed) + pooling = keras.layers.GlobalAveragePooling2D()(conv) + feature = keras.layers.Dense(10)(pooling) + + full_model = keras.Model(inputs, feature) + backbone = keras.Model(processed, conv) + activations = keras.Model(conv, feature) + ``` - @property - def run_eagerly(self): - """Settable attribute indicating whether the model should run eagerly. + Note that the `backbone` and `activations` models are not + created with `keras.Input` objects, but with the tensors that are originated + from `keras.Input` objects. Under the hood, the layers and weights will + be shared across these models, so that user can train the `full_model`, and + use `backbone` or `activations` to do feature extraction. + The inputs and outputs of the model can be nested structures of tensors as + well, and the created models are standard Functional API models that support + all the existing APIs. - Running eagerly means that your model will be run step by step, - like Python code. Your model might run slower, but it should become easier - for you to debug it by stepping into individual layer calls. + 2 - By subclassing the `Model` class: in that case, you should define your + layers in `__init__()` and you should implement the model's forward pass + in `call()`. - By default, we will attempt to compile your model to a static graph to - deliver the best execution performance. + ```python + import tensorflow as tf - Returns: - Boolean, whether the model should run eagerly. - """ - if self.dynamic and self._run_eagerly is False: # pylint:disable=g-bool-id-comparison - # TODO(fchollet): consider using py_func to enable this. - raise ValueError('Your model contains layers that can only be ' - 'successfully run in eager execution (layers ' - 'constructed with `dynamic=True`). ' - 'You cannot set `run_eagerly=False`.') - - if self._cluster_coordinator and self._run_eagerly: - raise ValueError('When using `Model` with `ParameterServerStrategy`, ' - '`run_eagerly` is not supported.') - - # Run eagerly logic, by priority: - # (1) Dynamic models must be run eagerly. - # (2) Explicitly setting run_eagerly causes a Model to be run eagerly. - # (3) Not explicitly setting run_eagerly defaults to TF's global setting. - return (self.dynamic or self._run_eagerly or - (tf.config.functions_run_eagerly() and - self._run_eagerly is None)) - - @run_eagerly.setter - def run_eagerly(self, value): - self._run_eagerly = value - - def _validate_target_and_loss(self, y, loss): - """Raises error if target or loss is not found. - - This method verifies that the target and loss are properly populated - when applicable, or raises errors. + class MyModel(tf.keras.Model): - Args: - y: the target for training. - loss: the total loss tensor including loss added via `compile` and - `add_loss`. - """ + def __init__(self): + super().__init__() + self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) + self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) - # `self.loss` references the loss added via `compile` call. If users have - # provided such, the target must be provided; otherwise it's a user error. - # Note that `self.loss` does not include losses added via `add_loss`, and it - # is a valid use when such loss from `add_loss` exists and target does not. - if self.loss and y is None: - raise ValueError( - 'Target data is missing. Your model was compiled with ' - f'loss={self.loss}, ' - 'and therefore expects target data to be provided in `fit()`.') - - # For training, there must be compiled loss or regularization loss to exist - # in order to apply the gradients. If one is not found, it means no loss - # was supplied via `compile` or `add_loss`. - elif loss is None: - raise ValueError( - 'No loss found. You may have forgotten to provide a `loss` argument ' - 'in the `compile()` method.') - - def train_step(self, data): - """The logic for one training step. - - This method can be overridden to support custom training logic. - For concrete examples of how to override this method see - [Customizing what happends in fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit). - This method is called by `Model.make_train_function`. - - This method should contain the mathematical logic for one step of training. - This typically includes the forward pass, loss calculation, backpropagation, - and metric updates. - - Configuration details for *how* this logic is run (e.g. `tf.function` and - `tf.distribute.Strategy` settings), should be left to - `Model.make_train_function`, which can also be overridden. + def call(self, inputs): + x = self.dense1(inputs) + return self.dense2(x) - Args: - data: A nested structure of `Tensor`s. + model = MyModel() + ``` - Returns: - A `dict` containing values that will be passed to - `tf.keras.callbacks.CallbackList.on_train_batch_end`. Typically, the - values of the `Model`'s metrics are returned. Example: - `{'loss': 0.2, 'accuracy': 0.7}`. - """ - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) - # Run forward pass. - with tf.GradientTape() as tape: - y_pred = self(x, training=True) - loss = self.compute_loss(x, y, y_pred, sample_weight) - self._validate_target_and_loss(y, loss) - # Run backwards pass. - self.optimizer.minimize(loss, self.trainable_variables, tape=tape) - return self.compute_metrics(x, y, y_pred, sample_weight) - - def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None): - """Compute the total loss, validate it, and return it. - - Subclasses can optionally override this method to provide custom loss - computation logic. + If you subclass `Model`, you can optionally have + a `training` argument (boolean) in `call()`, which you can use to specify + a different behavior in training and inference: - Example: ```python - class MyModel(tf.keras.Model): + import tensorflow as tf - def __init__(self, *args, **kwargs): - super(MyModel, self).__init__(*args, **kwargs) - self.loss_tracker = tf.keras.metrics.Mean(name='loss') - - def compute_loss(self, x, y, y_pred, sample_weight): - loss = tf.reduce_mean(tf.math.squared_difference(y_pred, y)) - loss += tf.add_n(self.losses) - self.loss_tracker.update_state(loss) - return loss - - def reset_metrics(self): - self.loss_tracker.reset_states() - - @property - def metrics(self): - return [self.loss_tracker] + class MyModel(tf.keras.Model): - tensors = tf.random.uniform((10, 10)), tf.random.uniform((10,)) - dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) + def __init__(self): + super().__init__() + self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) + self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) + self.dropout = tf.keras.layers.Dropout(0.5) - inputs = tf.keras.layers.Input(shape=(10,), name='my_input') - outputs = tf.keras.layers.Dense(10)(inputs) - model = MyModel(inputs, outputs) - model.add_loss(tf.reduce_sum(outputs)) + def call(self, inputs, training=False): + x = self.dense1(inputs) + if training: + x = self.dropout(x, training=training) + return self.dense2(x) - optimizer = tf.keras.optimizers.SGD() - model.compile(optimizer, loss='mse', steps_per_execution=10) - model.fit(dataset, epochs=2, steps_per_epoch=10) - print('My custom loss: ', model.loss_tracker.result().numpy()) + model = MyModel() ``` - Args: - x: Input data. - y: Target data. - y_pred: Predictions returned by the model (output of `model(x)`) - sample_weight: Sample weights for weighting the loss function. - - Returns: - The total loss as a `tf.Tensor`, or `None` if no loss results (which is - the case when called by `Model.test_step`). + Once the model is created, you can config the model with losses and metrics + with `model.compile()`, train the model with `model.fit()`, or use the model + to do prediction with `model.predict()`. """ - del x # The default implementation does not use `x`. - return self.compiled_loss( - y, y_pred, sample_weight, regularization_losses=self.losses) - def compute_metrics(self, x, y, y_pred, sample_weight): - """Update metric states and collect all metrics to be returned. + _TF_MODULE_IGNORED_PROPERTIES = frozenset( + itertools.chain( + ( + "_train_counter", + "_test_counter", + "_predict_counter", + "_steps_per_execution", + "_compiled_trainable_state", + ), + base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES, + ) + ) + _SCALAR_UPRANKING_ON = False - Subclasses can optionally override this method to provide custom metric - updating and collection logic. + def __new__(cls, *args, **kwargs): + # Signature detection + if is_functional_model_init_params(args, kwargs) and cls == Model: + # Functional model + from keras.engine import functional - Example: - ```python - class MyModel(tf.keras.Sequential): + return functional.Functional(skip_init=True, *args, **kwargs) + else: + return super(Model, cls).__new__(cls, *args, **kwargs) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + @traceback_utils.filter_traceback + def __init__(self, *args, **kwargs): + self._is_model_for_instrumentation = True + base_layer.keras_api_gauge.get_cell("model").set(True) + + # Special case for Subclassed Functional Model, which we couldn't detect + # when __new__ is called. We only realize it is a functional model when + # it calls super.__init__ with input and output tensor. + from keras.engine import functional + + if is_functional_model_init_params(args, kwargs) and not isinstance( + self, functional.Functional + ): + # Filter the kwargs for multiple inheritance. + supported_kwargs = [ + "inputs", + "outputs", + "name", + "trainable", + "skip_init", + ] + model_kwargs = { + k: kwargs[k] for k in kwargs if k in supported_kwargs + } + other_kwargs = { + k: kwargs[k] for k in kwargs if k not in supported_kwargs + } + inject_functional_model_class(self.__class__) + functional.Functional.__init__(self, *args, **model_kwargs) + + # In case there is any multiple inheritance here, we need to call + # the __init__ for any class that appears after the Functional + # class. + clz_to_init = [] + found_functional_class = False + for clz in self.__class__.__bases__: + if issubclass(clz, functional.Functional): + found_functional_class = True + continue + if found_functional_class: + clz_to_init.append(clz) + + if clz_to_init: + for clz in clz_to_init: + clz.__init__(self, *args, **other_kwargs) + elif other_kwargs: + # In case there are unused kwargs, we should raise an error to + # user, in case they have a typo in the param name. + raise TypeError( + "The following keyword arguments passed to `Model` aren't " + "supported: {}.".format(other_kwargs) + ) + return + + base_layer.keras_api_gauge.get_cell("Model subclass").set(True) + # The following are implemented as property functions: + # self.trainable_weights + # self.non_trainable_weights + # `inputs` / `outputs` will only appear in kwargs if either are + # misspelled. + generic_utils.validate_kwargs( + kwargs, + { + "trainable", + "dtype", + "dynamic", + "name", + "autocast", + "inputs", + "outputs", + }, + ) + super().__init__(**kwargs) + # By default, Model is a subclass model, which is not in graph network. + self._is_graph_network = False + + self.inputs = None + self.outputs = None + self.input_names = None + self.output_names = None + # stop_training is used by callback to stop training when error happens + self.stop_training = False + self.history = None + # These objects are used in the default `Model.compile`. They are not + # guaranteed to be set after `Model.compile` is called, as users can + # override compile with custom logic. + self.compiled_loss = None + self.compiled_metrics = None + + # This is True for Sequential networks and Functional networks. + self._compute_output_and_mask_jointly = False + + # Don't reset compilation if already done. This may occur if calling + # `__init__` (or `_init_graph_network`) on an already-compiled model + # such as a Sequential model. Sequential models may need to rebuild + # themselves after compilation. + self._maybe_create_attribute("_is_compiled", False) + self._maybe_create_attribute("optimizer", None) + + # Model must be created under scope of DistStrat it will be trained + # with. + if tf.distribute.has_strategy(): + self._distribution_strategy = tf.distribute.get_strategy() + else: + self._distribution_strategy = None + self._distribute_reduction_method = None + + self._cluster_coordinator = None + + # Defaults to value of `tf.config.experimental_functions_run_eagerly`. + self._run_eagerly = None + # Initialize cache attrs. + self._reset_compile_cache() + + # Fault-tolerance handler. Set in `ModelCheckpoint`. + self._training_state = None + self._saved_model_inputs_spec = None + self._saved_model_arg_spec = None + self._checkpoint = tf.train.Checkpoint(root=weakref.ref(self)) + + self._steps_per_execution = None + self._steps_per_execution_tuner = None + self._autotune_steps_per_execution = False + + self._layout_map = layout_map_lib.get_current_layout_map() + + self._init_batch_counters() + self._base_model_initialized = True + + # `jit_compile` starts off with None as default and gets overwritten by + # the value specified in `Model.compile`, and this is effective for + # `fit`, `evaluate`, and `predict`. + self._jit_compile = None + + def _create_counter_variable(self, init_value): + """Helper function for counter variable creation. + + For the DTensor use case with layout map, since the variable are not + tracked by model, they can't be visited by the layout map, and need to + be properly initialized as DVariable. + """ + # This function should be removed after we move to the strategy based + # implementation for DTensor. + if self._layout_map is None: + agg = tf.VariableAggregation.ONLY_FIRST_REPLICA + return tf.Variable(init_value, dtype="int64", aggregation=agg) + else: + layout = dtensor_api.Layout.replicated( + mesh=self._layout_map.get_default_mesh(), rank=0 + ) + return dtensor_api.DVariable( + init_value, dtype="int64", layout=layout + ) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _init_batch_counters(self): + # Untracked Variables, used to keep track of mini-batches seen in `fit`, + # `evaluate`, and `predict`. + if not tf.inside_function(): + # Creating variables inside tf.function is not allowed, hence + # these would otherwise prevent users from creating Keras layers + # inside tf.function. + # These variables are not connected to outputs so they have no + # effect on graph generation anyway. + + self._train_counter = self._create_counter_variable(0) + self._test_counter = self._create_counter_variable(0) + self._predict_counter = self._create_counter_variable(0) + + def __setattr__(self, name, value): + if not getattr(self, "_self_setattr_tracking", True): + super().__setattr__(name, value) + return + + if all( + isinstance(v, (base_layer.Layer, tf.Variable)) + or base_layer_utils.has_weights(v) + for v in tf.nest.flatten(value) + ): + try: + self._base_model_initialized + except AttributeError: + raise RuntimeError( + "It looks like you are subclassing `Model` and you " + "forgot to call `super().__init__()`." + " Always start with this line." + ) + + super().__setattr__(name, value) + + def __reduce__(self): + if self.built: + return ( + pickle_utils.deserialize_model_from_bytecode, + (pickle_utils.serialize_model_as_bytecode(self),), + ) + else: + # SavedModel (and hence serialize_model_as_bytecode) only support + # built models, but if the model is not built, + # it may be possible to serialize as a plain Python object, + # as long as the constituent parts (layers, optimizers, losses, + # etc.) can be serialized as plain Python objects. Thus we call up + # the superclass hierarchy to get an implementation of __reduce__ + # that can pickle this Model as a plain Python object. + return super().__reduce__() + + def __deepcopy__(self, memo): + if self.built: + new = pickle_utils.deserialize_model_from_bytecode( + pickle_utils.serialize_model_as_bytecode(self) + ) + memo[id(self)] = new + else: + # See comment in __reduce__ for explanation + deserializer, serialized, *rest = super().__reduce__() + new = deserializer(*serialized) + memo[id(self)] = new + if rest: + state = copy.deepcopy(rest[0], memo=memo) + new.__setstate__(state) + return new + + def __copy__(self): + return self.__deepcopy__({}) + + @generic_utils.default + def build(self, input_shape): + """Builds the model based on input shapes received. + + This is to be used for subclassed models, which do not know at + instantiation time what their inputs look like. + + This method only exists for users who want to call `model.build()` in a + standalone way (as a substitute for calling the model on real data to + build it). It will never be called by the framework (and thus it will + never throw unexpected errors in an unrelated workflow). + + Args: + input_shape: Single tuple, `TensorShape` instance, or list/dict of + shapes, where shapes are tuples, integers, or `TensorShape` + instances. + + Raises: + ValueError: + 1. In case of invalid user-provided data (not of type tuple, + list, `TensorShape`, or dict). + 2. If the model requires call arguments that are agnostic + to the input shapes (positional or keyword arg in call + signature). + 3. If not all layers were properly built. + 4. If float type inputs are not supported within the layers. + + In each of these cases, the user should build their model by calling + it on real tensor data. + """ + if self._is_graph_network: + super().build(input_shape) + return + + if input_shape is None: + raise ValueError( + "Input shape must be defined when calling `build()` on " + "a `Model` subclass." + ) + valid_types = (tuple, list, tf.TensorShape, dict) + if not isinstance(input_shape, valid_types): + raise ValueError( + "Specified input shape is not one of the valid types. " + "Please specify a batch input shape of type tuple or " + "list of input shapes. User provided " + "input type: {}.".format(type(input_shape)) + ) + + if input_shape and not self.inputs: + # We create placeholders for the `None`s in the shape and build the + # model in a Graph. Since tf.Variable is compatible with both eager + # execution and graph building, the variables created after building + # the model in a Graph are still valid when executing eagerly. + if tf.executing_eagerly(): + graph = tf.__internal__.FuncGraph("build_graph") + else: + graph = backend.get_graph() + with graph.as_default(): + if isinstance(input_shape, list) and all( + d is None or isinstance(d, int) for d in input_shape + ): + input_shape = tuple(input_shape) + if isinstance(input_shape, list): + x = [ + base_layer_utils.generate_placeholders_from_shape(shape) + for shape in input_shape + ] + elif isinstance(input_shape, dict): + x = { + k: base_layer_utils.generate_placeholders_from_shape( + shape + ) + for k, shape in input_shape.items() + } + else: + x = base_layer_utils.generate_placeholders_from_shape( + input_shape + ) + + kwargs = {} + call_signature = self._call_spec.full_argspec + call_args = call_signature.args + # Exclude `self`, `inputs`, and any argument with a default + # value. + if len(call_args) > 2: + if call_signature.defaults: + call_args = call_args[2 : -len(call_signature.defaults)] + else: + call_args = call_args[2:] + for arg in call_args: + if arg == "training": + # Case where `training` is a positional arg with no + # default. + kwargs["training"] = False + else: + # Has invalid call signature with unknown positional + # arguments. + raise ValueError( + "Currently, you cannot build your model if it " + "has positional or keyword arguments that are " + "not inputs to the model, but are required for " + "its `call()` method. Instead, in order to " + "instantiate and build your model, `call()` " + "your model on real tensor data with all " + "expected call arguments. The argument " + "for `call()` can be a single list/tuple that " + "contains multiple inputs." + ) + elif len(call_args) < 2: + # Signature without `inputs`. + raise ValueError( + "You can only call `build()` on a model if its " + "`call()` method accepts an `inputs` argument." + ) + try: + self.call(x, **kwargs) + except (tf.errors.InvalidArgumentError, TypeError) as e: + raise ValueError( + "You cannot build your model by calling `build` " + "if your layers do not support float type inputs. " + "Instead, in order to instantiate and build your " + "model, call your model on real tensor data (of " + "the correct dtype).\n\nThe actual error from " + f"`call` is: {e}." + ) + super().build(input_shape) + + @traceback_utils.filter_traceback + def __call__(self, *args, **kwargs): + if self._layout_map is not None and not self.built: + # Note that this method is only overridden for DTensor and layout + # injection purpose. + # Capture the inputs and create graph input as replacement for model + # to initialize its weights first. + copied_args = copy.copy(args) + copied_kwargs = copy.copy(kwargs) + + ( + inputs, + copied_args, + copied_kwargs, + ) = self._call_spec.split_out_first_arg(copied_args, copied_kwargs) + + def _convert_to_graph_inputs(x): + if isinstance(x, (tf.Tensor, np.ndarray, float, int)): + x = tf.convert_to_tensor(x) + return input_layer_module.Input(x.shape) + + # TODO(scottzhu): maybe better handle mask and training flag. + inputs = tf.nest.map_structure(_convert_to_graph_inputs, inputs) + copied_args = tf.nest.map_structure( + _convert_to_graph_inputs, copied_args + ) + copied_kwargs = tf.nest.map_structure( + _convert_to_graph_inputs, copied_kwargs + ) + + with layout_map_lib.layout_map_scope(self._layout_map): + # We ignore the result here. + super().__call__(inputs, *copied_args, **copied_kwargs) + + layout_map_lib._map_subclass_model_variable(self, self._layout_map) + + return super().__call__(*args, **kwargs) + + @doc_controls.doc_in_current_and_subclasses + def call(self, inputs, training=None, mask=None): + """Calls the model on new inputs and returns the outputs as tensors. + + In this case `call()` just reapplies + all ops in the graph to the new inputs + (e.g. build a new computational graph from the provided inputs). + + Note: This method should not be called directly. It is only meant to be + overridden when subclassing `tf.keras.Model`. + To call a model on an input, always use the `__call__()` method, + i.e. `model(inputs)`, which relies on the underlying `call()` method. + + Args: + inputs: Input tensor, or dict/list/tuple of input tensors. + training: Boolean or boolean scalar tensor, indicating whether to + run the `Network` in training mode or inference mode. + mask: A mask or list of masks. A mask can be either a boolean tensor + or None (no mask). For more details, check the guide + [here](https://www.tensorflow.org/guide/keras/masking_and_padding). + + Returns: + A tensor if there is a single output, or + a list of tensors if there are more than one outputs. + """ + raise NotImplementedError( + "Unimplemented `tf.keras.Model.call()`: if you " + "intend to create a `Model` with the Functional " + "API, please provide `inputs` and `outputs` " + "arguments. Otherwise, subclass `Model` with an " + "overridden `call()` method." + ) + + @traceback_utils.filter_traceback + def compile( + self, + optimizer="rmsprop", + loss=None, + metrics=None, + loss_weights=None, + weighted_metrics=None, + run_eagerly=None, + steps_per_execution=None, + jit_compile=None, + pss_evaluation_shards=0, + **kwargs, + ): + """Configures the model for training. + + Example: + + ```python + model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), + loss=tf.keras.losses.BinaryCrossentropy(), + metrics=[tf.keras.metrics.BinaryAccuracy(), + tf.keras.metrics.FalseNegatives()]) + ``` + + Args: + optimizer: String (name of optimizer) or optimizer instance. See + `tf.keras.optimizers`. + loss: Loss function. May be a string (name of loss function), or + a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss + function is any callable with the signature `loss = fn(y_true, + y_pred)`, where `y_true` are the ground truth values, and + `y_pred` are the model's predictions. + `y_true` should have shape + `(batch_size, d0, .. dN)` (except in the case of + sparse loss functions such as + sparse categorical crossentropy which expects integer arrays of + shape `(batch_size, d0, .. dN-1)`). + `y_pred` should have shape `(batch_size, d0, .. dN)`. + The loss function should return a float tensor. + If a custom `Loss` instance is + used and reduction is set to `None`, return value has shape + `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss + values; otherwise, it is a scalar. If the model has multiple + outputs, you can use a different loss on each output by passing a + dictionary or a list of losses. The loss value that will be + minimized by the model will then be the sum of all individual + losses, unless `loss_weights` is specified. + metrics: List of metrics to be evaluated by the model during + training and testing. Each of this can be a string (name of a + built-in function), function or a `tf.keras.metrics.Metric` + instance. See `tf.keras.metrics`. Typically you will use + `metrics=['accuracy']`. + A function is any callable with the signature `result = fn(y_true, + y_pred)`. To specify different metrics for different outputs of a + multi-output model, you could also pass a dictionary, such as + `metrics={'output_a':'accuracy', 'output_b':['accuracy', 'mse']}`. + You can also pass a list to specify a metric or a list of metrics + for each output, such as + `metrics=[['accuracy'], ['accuracy', 'mse']]` + or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the + strings 'accuracy' or 'acc', we convert this to one of + `tf.keras.metrics.BinaryAccuracy`, + `tf.keras.metrics.CategoricalAccuracy`, + `tf.keras.metrics.SparseCategoricalAccuracy` based on the shapes + of the targets and of the model output. We do a similar + conversion for the strings 'crossentropy' and 'ce' as well. + The metrics passed here are evaluated without sample weighting; if + you would like sample weighting to apply, you can specify your + metrics via the `weighted_metrics` argument instead. + loss_weights: Optional list or dictionary specifying scalar + coefficients (Python floats) to weight the loss contributions of + different model outputs. The loss value that will be minimized by + the model will then be the *weighted sum* of all individual + losses, weighted by the `loss_weights` coefficients. If a list, + it is expected to have a 1:1 mapping to the model's outputs. If a + dict, it is expected to map output names (strings) to scalar + coefficients. + weighted_metrics: List of metrics to be evaluated and weighted by + `sample_weight` or `class_weight` during training and testing. + run_eagerly: Bool. If `True`, this `Model`'s logic will not be + wrapped in a `tf.function`. Recommended to leave this as `None` + unless your `Model` cannot be run inside a `tf.function`. + `run_eagerly=True` is not supported when using + `tf.distribute.experimental.ParameterServerStrategy`. Defaults to + `False`. + steps_per_execution: Int or `'auto'`. The number of batches to + run during each `tf.function` call. If set to "auto", keras will + automatically tune `steps_per_execution` during runtime. Running + multiple batches inside a single `tf.function` call can greatly + improve performance on TPUs, when used with distributed strategies + such as `ParameterServerStrategy`, or with small models with a + large Python overhead. At most, one full epoch will be run each + execution. If a number larger than the size of the epoch is + passed, the execution will be truncated to the size of the epoch. + Note that if `steps_per_execution` is set to `N`, + `Callback.on_batch_begin` and `Callback.on_batch_end` methods will + only be called every `N` batches (i.e. before/after each + `tf.function` execution). Defaults to `1`. + jit_compile: If `True`, compile the model training step with XLA. + [XLA](https://www.tensorflow.org/xla) is an optimizing compiler + for machine learning. + `jit_compile` is not enabled for by default. + Note that `jit_compile=True` + may not necessarily work for all models. + For more information on supported operations please refer to the + [XLA documentation](https://www.tensorflow.org/xla). + Also refer to + [known XLA issues](https://www.tensorflow.org/xla/known_issues) + for more details. + pss_evaluation_shards: Integer or 'auto'. Used for + `tf.distribute.ParameterServerStrategy` training only. This arg + sets the number of shards to split the dataset into, to enable an + exact visitation guarantee for evaluation, meaning the model will + be applied to each dataset element exactly once, even if workers + fail. The dataset must be sharded to ensure separate workers do + not process the same data. The number of shards should be at least + the number of workers for good performance. A value of 'auto' + turns on exact evaluation and uses a heuristic for the number of + shards based on the number of workers. 0, meaning no + visitation guarantee is provided. NOTE: Custom implementations of + `Model.test_step` will be ignored when doing exact evaluation. + Defaults to `0`. + **kwargs: Arguments supported for backwards compatibility only. + """ + if jit_compile and not tf_utils.can_jit_compile(warn=True): + jit_compile = False + base_layer.keras_api_gauge.get_cell("compile").set(True) + self._compile_config = serialization_lib.Config( + optimizer=optimizer, + loss=loss, + metrics=metrics, + loss_weights=loss_weights, + weighted_metrics=weighted_metrics, + run_eagerly=run_eagerly, + steps_per_execution=steps_per_execution, + jit_compile=jit_compile, + ) + with self.distribute_strategy.scope(): + if "experimental_steps_per_execution" in kwargs: + logging.warning( + "The argument `steps_per_execution` is no longer " + "experimental. Pass `steps_per_execution` instead of " + "`experimental_steps_per_execution`." + ) + if not steps_per_execution: + steps_per_execution = kwargs.pop( + "experimental_steps_per_execution" + ) + + # When compiling from an already-serialized model, we do not want to + # reapply some processing steps (e.g. metric renaming for + # multi-output models, which have prefixes added for each + # corresponding output name). + from_serialized = kwargs.pop("from_serialized", False) + + self._validate_compile(optimizer, metrics, **kwargs) + self._run_eagerly = run_eagerly + + self.optimizer = self._get_optimizer(optimizer) + + mesh = None + if self._layout_map is not None: + mesh = self._layout_map.get_default_mesh() + + if isinstance(loss, compile_utils.LossesContainer): + self.compiled_loss = loss + else: + self.compiled_loss = compile_utils.LossesContainer( + loss, + loss_weights, + output_names=self.output_names, + mesh=mesh, + ) + self.compiled_metrics = compile_utils.MetricsContainer( + metrics, + weighted_metrics, + output_names=self.output_names, + from_serialized=from_serialized, + mesh=mesh, + ) + + if steps_per_execution == "auto": + if self._steps_per_execution is None: + self._configure_steps_per_execution(1) + self._steps_per_execution_tuner = ( + steps_per_execution_tuning.StepsPerExecutionTuner( + self.optimizer, self._steps_per_execution + ) + ) + self._autotune_steps_per_execution = True + else: + self._configure_steps_per_execution(steps_per_execution or 1) + + self._pss_evaluation_shards = self._infer_exact_eval_shards( + pss_evaluation_shards + ) + + # Initializes attrs that are reset each time `compile` is called. + self._reset_compile_cache() + self._is_compiled = True + self.loss = loss or {} + if (self._run_eagerly or self.dynamic) and jit_compile: + raise ValueError( + "You cannot enable `run_eagerly` and `jit_compile` " + "at the same time." + ) + else: + self._jit_compile = jit_compile + + def _get_optimizer(self, optimizer): + """Wraps `optimizer` in `LossScaleOptimizer` if necessary.""" + + def _get_single_optimizer(opt): + opt = optimizers.get(opt) + if self.dtype_policy.name == "mixed_float16" and not isinstance( + opt, lso.BaseLossScaleOptimizer + ): + # Loss scaling is necessary with mixed_float16 for models to + # converge to the same accuracy as with float32. + opt = lso.BaseLossScaleOptimizer(opt) + return opt + + return tf.nest.map_structure(_get_single_optimizer, optimizer) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _reset_compile_cache(self): + self.train_function = None + self.test_function = None + self.predict_function = None + # Used to cache the `tf.function`'ed `train_function` to be logged in + # TensorBoard, since the original `train_function` is not necessarily + # a `tf.function` (e.g., with ParameterServerStrategy, the + # `train_function` is a scheduling of the actual training function to a + # remote worker). + self.train_tf_function = None + + # Used to cache `trainable` attr of `Layer`s for `fit`. + self._compiled_trainable_state = self._get_trainable_state() + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _configure_steps_per_execution(self, steps_per_execution): + self._steps_per_execution = self._create_counter_variable( + steps_per_execution + ) + + @property + def _should_compute_mask(self): + return False + + @property + def metrics(self): + """Return metrics added using `compile()` or `add_metric()`. + + Note: Metrics passed to `compile()` are available only after a + `keras.Model` has been trained/evaluated on actual data. + + Examples: + + >>> inputs = tf.keras.layers.Input(shape=(3,)) + >>> outputs = tf.keras.layers.Dense(2)(inputs) + >>> model = tf.keras.models.Model(inputs=inputs, outputs=outputs) + >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) + >>> [m.name for m in model.metrics] + [] + + >>> x = np.random.random((2, 3)) + >>> y = np.random.randint(0, 2, (2, 2)) + >>> model.fit(x, y) + >>> [m.name for m in model.metrics] + ['loss', 'mae'] + + >>> inputs = tf.keras.layers.Input(shape=(3,)) + >>> d = tf.keras.layers.Dense(2, name='out') + >>> output_1 = d(inputs) + >>> output_2 = d(inputs) + >>> model = tf.keras.models.Model( + ... inputs=inputs, outputs=[output_1, output_2]) + >>> model.add_metric( + ... tf.reduce_sum(output_2), name='mean', aggregation='mean') + >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae", "acc"]) + >>> model.fit(x, (y, y)) + >>> [m.name for m in model.metrics] + ['loss', 'out_loss', 'out_1_loss', 'out_mae', 'out_acc', 'out_1_mae', + 'out_1_acc', 'mean'] + + """ + metrics = [] + if self._is_compiled: + if self.compiled_loss is not None: + metrics += self.compiled_loss.metrics + if self.compiled_metrics is not None: + metrics += self.compiled_metrics.metrics + + for l in self._flatten_layers(): + metrics.extend(l._metrics) + return metrics + + @property + def metrics_names(self): + """Returns the model's display labels for all outputs. + + Note: `metrics_names` are available only after a `keras.Model` has been + trained/evaluated on actual data. + + Examples: + + >>> inputs = tf.keras.layers.Input(shape=(3,)) + >>> outputs = tf.keras.layers.Dense(2)(inputs) + >>> model = tf.keras.models.Model(inputs=inputs, outputs=outputs) + >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) + >>> model.metrics_names + [] + + >>> x = np.random.random((2, 3)) + >>> y = np.random.randint(0, 2, (2, 2)) + >>> model.fit(x, y) + >>> model.metrics_names + ['loss', 'mae'] + + >>> inputs = tf.keras.layers.Input(shape=(3,)) + >>> d = tf.keras.layers.Dense(2, name='out') + >>> output_1 = d(inputs) + >>> output_2 = d(inputs) + >>> model = tf.keras.models.Model( + ... inputs=inputs, outputs=[output_1, output_2]) + >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae", "acc"]) + >>> model.fit(x, (y, y)) + >>> model.metrics_names + ['loss', 'out_loss', 'out_1_loss', 'out_mae', 'out_acc', 'out_1_mae', + 'out_1_acc'] + + """ + + # This property includes all output names including `loss` and + # per-output losses for backward compatibility. + return [m.name for m in self.metrics] + + @property + def distribute_strategy(self): + """The `tf.distribute.Strategy` this model was created under.""" + return self._distribution_strategy or tf.distribute.get_strategy() + + @property + def run_eagerly(self): + """Settable attribute indicating whether the model should run eagerly. + + Running eagerly means that your model will be run step by step, + like Python code. Your model might run slower, but it should become + easier for you to debug it by stepping into individual layer calls. + + By default, we will attempt to compile your model to a static graph to + deliver the best execution performance. + + Returns: + Boolean, whether the model should run eagerly. + """ + if self.dynamic and self._run_eagerly == False: + # TODO(fchollet): consider using py_func to enable this. + raise ValueError( + "Your model contains layers that can only be " + "successfully run in eager execution (layers " + "constructed with `dynamic=True`). " + "You cannot set `run_eagerly=False`." + ) + + if self._cluster_coordinator and self._run_eagerly: + raise ValueError( + "When using `Model` with `ParameterServerStrategy`, " + "`run_eagerly` is not supported." + ) + + # Run eagerly logic, by priority: + # (1) Dynamic models must be run eagerly. + # (2) Explicitly setting run_eagerly causes a Model to be run eagerly. + # (3) Not explicitly setting run_eagerly defaults to TF's global + # setting. + return ( + self.dynamic + or self._run_eagerly + or (tf.config.functions_run_eagerly() and self._run_eagerly is None) + ) + + @run_eagerly.setter + def run_eagerly(self, value): + self._run_eagerly = value + + @property + def autotune_steps_per_execution(self): + """Settable property to enable tuning for steps_per_execution""" + return self._autotune_steps_per_execution + + @autotune_steps_per_execution.setter + def autotune_steps_per_execution(self, value): + self._autotune_steps_per_execution = value + if value and self._steps_per_execution_tuner is None: + if self._steps_per_execution is None: + self._configure_steps_per_execution(1) + self._steps_per_execution_tuner = ( + steps_per_execution_tuning.StepsPerExecutionTuner( + self.optimizer, self._steps_per_execution + ) + ) + + @property + def steps_per_execution(self): + """Settable `steps_per_execution variable. Requires a compiled model.""" + return self._steps_per_execution + + @steps_per_execution.setter + def steps_per_execution(self, value): + if self._steps_per_execution is None: + self._configure_steps_per_execution(value) + else: + self._steps_per_execution.assign(value) + + @property + def jit_compile(self): + """Specify whether to compile the model with XLA. + + [XLA](https://www.tensorflow.org/xla) is an optimizing compiler + for machine learning. `jit_compile` is not enabled by default. + Note that `jit_compile=True` may not necessarily work for all models. + + For more information on supported operations please refer to the + [XLA documentation](https://www.tensorflow.org/xla). Also refer to + [known XLA issues](https://www.tensorflow.org/xla/known_issues) + for more details. + """ + return self._jit_compile + + @jit_compile.setter + def jit_compile(self, value): + # Function remains cached with previous jit_compile settings + if self._jit_compile == value: + # Avoid resetting compiler cache if possible if the value is the + # same + return + # Check if TensorFlow is compiled with XLA before setting the value + if value and not tf_utils.can_jit_compile(warn=True): + self._jit_compile = False + return + + self._jit_compile = value + # Setting `jit_compile` should invalidate previously cached functions. + self._reset_compile_cache() + + @property + def distribute_reduction_method(self): + """The method employed to reduce per-replica values during training. + + Unless specified, the value "auto" will be assumed, indicating that + the reduction strategy should be chosen based on the current + running environment. + See `reduce_per_replica` function for more details. + + """ + return self._distribute_reduction_method or "auto" + + @distribute_reduction_method.setter + def distribute_reduction_method(self, value): + self._distribute_reduction_method = value + + def _validate_target_and_loss(self, y, loss): + """Raises error if target or loss is not found. + + This method verifies that the target and loss are properly populated + when applicable, or raises errors. + + Args: + y: the target for training. + loss: the total loss tensor including loss added via `compile` and + `add_loss`. + """ + + # `self.loss` references the loss added via `compile` call. If users + # have provided such, the target must be provided; otherwise it's a user + # error. Note that `self.loss` does not include losses added via + # `add_loss`, and it is a valid use when such loss from `add_loss` + # exists and target does not. + if self.loss and y is None: + raise ValueError( + "Target data is missing. Your model was compiled with " + f"loss={self.loss}, " + "and therefore expects target data to be provided in `fit()`." + ) + + # For training, there must be compiled loss or regularization loss to + # exist in order to apply the gradients. If one is not found, it means + # no loss was supplied via `compile` or `add_loss`. + elif loss is None: + raise ValueError( + "No loss found. You may have forgotten to provide a `loss` " + "argument in the `compile()` method." + ) + + def train_step(self, data): + """The logic for one training step. + + This method can be overridden to support custom training logic. + For concrete examples of how to override this method see + [Customizing what happens in fit]( + https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit). + This method is called by `Model.make_train_function`. + + This method should contain the mathematical logic for one step of + training. This typically includes the forward pass, loss calculation, + backpropagation, and metric updates. + + Configuration details for *how* this logic is run (e.g. `tf.function` + and `tf.distribute.Strategy` settings), should be left to + `Model.make_train_function`, which can also be overridden. + + Args: + data: A nested structure of `Tensor`s. + + Returns: + A `dict` containing values that will be passed to + `tf.keras.callbacks.CallbackList.on_train_batch_end`. Typically, the + values of the `Model`'s metrics are returned. Example: + `{'loss': 0.2, 'accuracy': 0.7}`. + """ + x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + # Run forward pass. + with tf.GradientTape() as tape: + y_pred = self(x, training=True) + loss = self.compute_loss(x, y, y_pred, sample_weight) + self._validate_target_and_loss(y, loss) + # Run backwards pass. + self.optimizer.minimize(loss, self.trainable_variables, tape=tape) + return self.compute_metrics(x, y, y_pred, sample_weight) + + def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None): + """Compute the total loss, validate it, and return it. + + Subclasses can optionally override this method to provide custom loss + computation logic. + + Example: + ```python + class MyModel(tf.keras.Model): + + def __init__(self, *args, **kwargs): + super(MyModel, self).__init__(*args, **kwargs) + self.loss_tracker = tf.keras.metrics.Mean(name='loss') + + def compute_loss(self, x, y, y_pred, sample_weight): + loss = tf.reduce_mean(tf.math.squared_difference(y_pred, y)) + loss += tf.add_n(self.losses) + self.loss_tracker.update_state(loss) + return loss + + def reset_metrics(self): + self.loss_tracker.reset_states() + + @property + def metrics(self): + return [self.loss_tracker] + + tensors = tf.random.uniform((10, 10)), tf.random.uniform((10,)) + dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) + + inputs = tf.keras.layers.Input(shape=(10,), name='my_input') + outputs = tf.keras.layers.Dense(10)(inputs) + model = MyModel(inputs, outputs) + model.add_loss(tf.reduce_sum(outputs)) + + optimizer = tf.keras.optimizers.SGD() + model.compile(optimizer, loss='mse', steps_per_execution=10) + model.fit(dataset, epochs=2, steps_per_epoch=10) + print('My custom loss: ', model.loss_tracker.result().numpy()) + ``` + + Args: + x: Input data. + y: Target data. + y_pred: Predictions returned by the model (output of `model(x)`) + sample_weight: Sample weights for weighting the loss function. + + Returns: + The total loss as a `tf.Tensor`, or `None` if no loss results (which + is the case when called by `Model.test_step`). + """ + del x # The default implementation does not use `x`. + return self.compiled_loss( + y, y_pred, sample_weight, regularization_losses=self.losses + ) + + def compute_metrics(self, x, y, y_pred, sample_weight): + """Update metric states and collect all metrics to be returned. + + Subclasses can optionally override this method to provide custom metric + updating and collection logic. + + Example: + ```python + class MyModel(tf.keras.Sequential): + + def compute_metrics(self, x, y, y_pred, sample_weight): + + # This super call updates `self.compiled_metrics` and returns + # results for all metrics listed in `self.metrics`. + metric_results = super(MyModel, self).compute_metrics( + x, y, y_pred, sample_weight) + + # Note that `self.custom_metric` is not listed in `self.metrics`. + self.custom_metric.update_state(x, y, y_pred, sample_weight) + metric_results['custom_metric_name'] = self.custom_metric.result() + return metric_results + ``` + + Args: + x: Input data. + y: Target data. + y_pred: Predictions returned by the model (output of `model.call(x)`) + sample_weight: Sample weights for weighting the loss function. + + Returns: + A `dict` containing values that will be passed to + `tf.keras.callbacks.CallbackList.on_train_batch_end()`. Typically, the + values of the metrics listed in `self.metrics` are returned. Example: + `{'loss': 0.2, 'accuracy': 0.7}`. + """ + del x # The default implementation does not use `x`. + self.compiled_metrics.update_state(y, y_pred, sample_weight) + return self.get_metrics_result() + + def get_metrics_result(self): + """Returns the model's metrics values as a dict. + + If any of the metric result is a dict (containing multiple metrics), + each of them gets added to the top level returned dict of this method. + + Returns: + A `dict` containing values of the metrics listed in `self.metrics`. + Example: + `{'loss': 0.2, 'accuracy': 0.7}`. + """ + # Collect metrics to return + return_metrics = {} + for metric in self.metrics: + result = metric.result() + if isinstance(result, dict): + return_metrics.update(result) + else: + return_metrics[metric.name] = result + return return_metrics + + def _validate_and_get_metrics_result(self, logs): + """Returns model metrics as a dict if the keys match with input logs. + + When the training / evalution is performed with asynchronous steps, such + as the case with `tf.distribute.ParameterServerStrategy`, the last + scheduled `train / test_step` may not give the latest metrics because it + is not guaranteed to be executed the last. This method gets metrics from + the model directly instead of relying on the return from last step + function. + + It logs a warning if the metric results could not be overridden when + used with `tf.distribute.ParameterServerStrategy`. + + When the user has custom train / test step functions, the metrics + returned may be different from `Model.metrics`. In those instances, + this function will be no-op and return the logs. + + Args: + logs: A `dict` of metrics returned by train / test step function. + + Returns: + A `dict` containing values of the metrics listed in `self.metrics` + when logs and model metrics keys match. Otherwise it returns input + `logs`. + """ + PSS_WARN_MSG = "Could not get Model metric results. \ + Using the results of last step function could lead to incorrect \ + results when used with ParameterServerStrategy" + try: + metric_logs = self.get_metrics_result() + except TypeError: + if self._cluster_coordinator: + logging.warning(PSS_WARN_MSG) + else: + # Verify that train / test step logs passed and metric logs have + # matching keys. Could be different when using custom step functions + if isinstance(logs, dict) and set(logs.keys()) == set( + metric_logs.keys() + ): + logs = tf_utils.sync_to_numpy_or_python_type(metric_logs) + elif self._cluster_coordinator: + logging.warning(PSS_WARN_MSG) + return logs - def compute_metrics(self, x, y, y_pred, sample_weight): + def _aggregate_exact_metrics(self, logs): + # When doing exact evaluation, `logs` is a list of each data shard's + # metric variables, which will be used to update the metrics. + for shard_result in logs: + for metric in self.metrics: + if metric.name not in shard_result.keys(): + logging.log_first_n( + logging.WARN, + f"No matching result found for metric {metric.name}. " + "This metric's computed result may be incorrect.", + 3, + ) + continue + metric_result = shard_result[metric.name] + if len(metric_result) != len(metric.weights): + raise ValueError( + f"Expected {len(metric.weights)} variables in result " + f"for metric {metric.name}, but found " + f"{len(metric_result)}." + ) + for weight, val in zip(metric.weights, metric_result): + weight.assign_add(val) + return self.get_metrics_result() + + def make_train_function(self, force=False): + """Creates a function that executes one step of training. + + This method can be overridden to support custom training logic. + This method is called by `Model.fit` and `Model.train_on_batch`. + + Typically, this method directly controls `tf.function` and + `tf.distribute.Strategy` settings, and delegates the actual training + logic to `Model.train_step`. + + This function is cached the first time `Model.fit` or + `Model.train_on_batch` is called. The cache is cleared whenever + `Model.compile` is called. You can skip the cache and generate again the + function with `force=True`. + + Args: + force: Whether to regenerate the train function and skip the cached + function if available. + + Returns: + Function. The function created by this method should accept a + `tf.data.Iterator`, and return a `dict` containing values that will + be passed to `tf.keras.Callbacks.on_train_batch_end`, such as + `{'loss': 0.2, 'accuracy': 0.7}`. + """ + if self.train_function is not None and not force: + return self.train_function + + def step_function(model, iterator): + """Runs a single training step.""" + + def run_step(data): + outputs = model.train_step(data) + # Ensure counter is updated only if `train_step` succeeds. + with tf.control_dependencies(_minimum_control_deps(outputs)): + model._train_counter.assign_add(1) + return outputs + + if self.jit_compile: + run_step = tf.function( + run_step, jit_compile=True, reduce_retracing=True + ) + data = next(iterator) + outputs = model.distribute_strategy.run(run_step, args=(data,)) + outputs = reduce_per_replica( + outputs, + self.distribute_strategy, + reduction=self.distribute_reduction_method, + ) + return outputs + + # Special case if steps_per_execution is one. + if ( + self._steps_per_execution is None + or self._steps_per_execution.numpy().item() == 1 + and not self.autotune_steps_per_execution + ): + + def train_function(iterator): + """Runs a training execution with a single step.""" + return step_function(self, iterator) + + if not self.run_eagerly: + train_function = tf.function( + train_function, reduce_retracing=True + ) + self.train_tf_function = train_function + + if self._cluster_coordinator: + self.train_function = ( + lambda it: self._cluster_coordinator.schedule( + train_function, args=(it,) + ) + ) + else: + self.train_function = train_function + + # If we're using a coordinator, use the value of + # self._steps_per_execution at the time the function is + # called/scheduled, and not when it is actually executed. + elif self._cluster_coordinator: + + def train_function(iterator, steps_per_execution): + """Runs a training execution with multiple steps.""" + for _ in tf.range(steps_per_execution): + outputs = step_function(self, iterator) + return outputs + + if not self.run_eagerly: + train_function = tf.function( + train_function, reduce_retracing=True + ) + self.train_tf_function = train_function + + self.train_function = lambda it: self._cluster_coordinator.schedule( + train_function, args=(it, self._steps_per_execution.value()) + ) + else: - # This super call updates `self.compiled_metrics` and returns results - # for all metrics listed in `self.metrics`. - metric_results = super(MyModel, self).compute_metrics( - x, y, y_pred, sample_weight) + def train_function(iterator): + """Runs a training execution with multiple steps.""" + for _ in tf.range(self._steps_per_execution): + outputs = step_function(self, iterator) + return outputs - # Note that `self.custom_metric` is not listed in `self.metrics`. - self.custom_metric.update_state(x, y, y_pred, sample_weight) - metric_results['custom_metric_name'] = self.custom_metric.result() - return metric_results - ``` + if not self.run_eagerly: + train_function = tf.function( + train_function, reduce_retracing=True + ) + self.train_tf_function = train_function + self.train_function = train_function - Args: - x: Input data. - y: Target data. - y_pred: Predictions returned by the model (output of `model.call(x)`) - sample_weight: Sample weights for weighting the loss function. + return self.train_function - Returns: - A `dict` containing values that will be passed to - `tf.keras.callbacks.CallbackList.on_train_batch_end()`. Typically, the - values of the metrics listed in `self.metrics` are returned. Example: - `{'loss': 0.2, 'accuracy': 0.7}`. - """ - del x # The default implementation does not use `x`. - self.compiled_metrics.update_state(y, y_pred, sample_weight) - # Collect metrics to return - return_metrics = {} - for metric in self.metrics: - result = metric.result() - if isinstance(result, dict): - return_metrics.update(result) - else: - return_metrics[metric.name] = result - return return_metrics - - def make_train_function(self, force=False): - """Creates a function that executes one step of training. - - This method can be overridden to support custom training logic. - This method is called by `Model.fit` and `Model.train_on_batch`. - - Typically, this method directly controls `tf.function` and - `tf.distribute.Strategy` settings, and delegates the actual training - logic to `Model.train_step`. - - This function is cached the first time `Model.fit` or - `Model.train_on_batch` is called. The cache is cleared whenever - `Model.compile` is called. You can skip the cache and generate again the - function with `force=True`. + @traceback_utils.filter_traceback + def fit( + self, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose="auto", + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_batch_size=None, + validation_freq=1, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + """Trains the model for a fixed number of epochs (dataset iterations). + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset. Should return a tuple + of either `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + - A generator or `keras.utils.Sequence` returning `(inputs, + targets)` or `(inputs, targets, sample_weights)`. + - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a + callable that takes a single argument of type + `tf.distribute.InputContext`, and returns a `tf.data.Dataset`. + `DatasetCreator` should be used when users prefer to specify the + per-replica batching and sharding logic for the `Dataset`. + See `tf.keras.utils.experimental.DatasetCreator` doc for more + information. + A more detailed description of unpacking behavior for iterator + types (Dataset, generator, Sequence) is given below. If these + include `sample_weights` as a third component, note that sample + weighting applies to the `weighted_metrics` argument but not the + `metrics` argument in `compile()`. If using + `tf.distribute.experimental.ParameterServerStrategy`, only + `DatasetCreator` type is supported for `x`. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset, generator, + or `keras.utils.Sequence` instance, `y` should + not be specified (since targets will be obtained from `x`). + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.Sequence` + instances (since they generate batches). + epochs: Integer. Number of epochs to train the model. + An epoch is an iteration over the entire `x` and `y` + data provided + (unless the `steps_per_epoch` flag is set to + something other than None). + Note that in conjunction with `initial_epoch`, + `epochs` is to be understood as "final epoch". + The model is not trained for a number of iterations + given by `epochs`, but merely until the epoch + of index `epochs` is reached. + verbose: 'auto', 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + 'auto' becomes 1 for most cases, but 2 when used with + `ParameterServerStrategy`. Note that the progress bar is not + particularly useful when logged to a file, so verbose=2 is + recommended when not running interactively (eg, in a production + environment). Defaults to 'auto'. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during training. + See `tf.keras.callbacks`. Note + `tf.keras.callbacks.ProgbarLogger` and + `tf.keras.callbacks.History` callbacks are created automatically + and need not be passed into `model.fit`. + `tf.keras.callbacks.ProgbarLogger` is created or not based on + `verbose` argument to `model.fit`. + Callbacks with batch-level calls are currently unsupported with + `tf.distribute.experimental.ParameterServerStrategy`, and users + are advised to implement epoch-level calls instead with an + appropriate `steps_per_epoch` value. + validation_split: Float between 0 and 1. + Fraction of the training data to be used as validation data. + The model will set apart this fraction of the training data, + will not train on it, and will evaluate + the loss and any model metrics + on this data at the end of each epoch. + The validation data is selected from the last samples + in the `x` and `y` data provided, before shuffling. This + argument is not supported when `x` is a dataset, generator or + `keras.utils.Sequence` instance. + If both `validation_data` and `validation_split` are provided, + `validation_data` will override `validation_split`. + `validation_split` is not yet supported with + `tf.distribute.experimental.ParameterServerStrategy`. + validation_data: Data on which to evaluate + the loss and any model metrics at the end of each epoch. + The model will not be trained on this data. Thus, note the fact + that the validation loss of data provided using + `validation_split` or `validation_data` is not affected by + regularization layers like noise and dropout. + `validation_data` will override `validation_split`. + `validation_data` could be: + - A tuple `(x_val, y_val)` of Numpy arrays or tensors. + - A tuple `(x_val, y_val, val_sample_weights)` of NumPy + arrays. + - A `tf.data.Dataset`. + - A Python generator or `keras.utils.Sequence` returning + `(inputs, targets)` or `(inputs, targets, sample_weights)`. + `validation_data` is not yet supported with + `tf.distribute.experimental.ParameterServerStrategy`. + shuffle: Boolean (whether to shuffle the training data + before each epoch) or str (for 'batch'). This argument is + ignored when `x` is a generator or an object of tf.data.Dataset. + 'batch' is a special option for dealing + with the limitations of HDF5 data; it shuffles in batch-sized + chunks. Has no effect when `steps_per_epoch` is not `None`. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) value, used for weighting the loss function + (during training only). + This can be useful to tell the model to + "pay more attention" to samples from + an under-represented class. When `class_weight` is specified + and targets have a rank of 2 or greater, either `y` must be + one-hot encoded, or an explicit final dimension of `1` must + be included for sparse class labels. + sample_weight: Optional Numpy array of weights for + the training samples, used for weighting the loss function + (during training only). You can either pass a flat (1D) + Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, + to apply a different weight to every timestep of every sample. + This argument is not supported when `x` is a dataset, generator, + or `keras.utils.Sequence` instance, instead provide the + sample_weights as the third element of `x`. + Note that sample weighting does not apply to metrics specified + via the `metrics` argument in `compile()`. To apply sample + weighting to your metrics, you can specify them via the + `weighted_metrics` in `compile()` instead. + initial_epoch: Integer. + Epoch at which to start training + (useful for resuming a previous training run). + steps_per_epoch: Integer or `None`. + Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps_per_epoch' + is None, the epoch will run until the input dataset is + exhausted. When passing an infinitely repeating dataset, you + must specify the `steps_per_epoch` argument. If + `steps_per_epoch=-1` the training will run indefinitely with an + infinitely repeating dataset. This argument is not supported + with array inputs. + When using `tf.distribute.experimental.ParameterServerStrategy`: + * `steps_per_epoch=None` is not supported. + validation_steps: Only relevant if `validation_data` is provided and + is a `tf.data` dataset. Total number of steps (batches of + samples) to draw before stopping when performing validation + at the end of every epoch. If 'validation_steps' is None, + validation will run until the `validation_data` dataset is + exhausted. In the case of an infinitely repeated dataset, it + will run into an infinite loop. If 'validation_steps' is + specified and only part of the dataset will be consumed, the + evaluation will start from the beginning of the dataset at each + epoch. This ensures that the same validation samples are used + every time. + validation_batch_size: Integer or `None`. + Number of samples per validation batch. + If unspecified, will default to `batch_size`. + Do not specify the `validation_batch_size` if your data is in + the form of datasets, generators, or `keras.utils.Sequence` + instances (since they generate batches). + validation_freq: Only relevant if validation data is provided. + Integer or `collections.abc.Container` instance (e.g. list, tuple, + etc.). If an integer, specifies how many training epochs to run + before a new validation run is performed, e.g. `validation_freq=2` + runs validation every 2 epochs. If a Container, specifies the + epochs on which to run validation, e.g. + `validation_freq=[1, 2, 10]` runs validation at the end of the + 1st, 2nd, and 10th epochs. + max_queue_size: Integer. Used for generator or + `keras.utils.Sequence` input only. Maximum size for the generator + queue. If unspecified, `max_queue_size` will default to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up + when using process-based threading. If unspecified, `workers` + will default to 1. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-pickleable arguments to + the generator as they can't be passed easily to children + processes. + + Unpacking behavior for iterator-like inputs: + A common pattern is to pass a tf.data.Dataset, generator, or + tf.keras.utils.Sequence to the `x` argument of fit, which will in fact + yield not only features (x) but optionally targets (y) and sample + weights. Keras requires that the output of such iterator-likes be + unambiguous. The iterator should return a tuple of length 1, 2, or 3, + where the optional second and third elements will be used for y and + sample_weight respectively. Any other type provided will be wrapped in + a length one tuple, effectively treating everything as 'x'. When + yielding dicts, they should still adhere to the top-level tuple + structure. + e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate + features, targets, and weights from the keys of a single dict. + A notable unsupported data type is the namedtuple. The reason is + that it behaves like both an ordered datatype (tuple) and a mapping + datatype (dict). So given a namedtuple of the form: + `namedtuple("example_tuple", ["y", "x"])` + it is ambiguous whether to reverse the order of the elements when + interpreting the value. Even worse is a tuple of the form: + `namedtuple("other_tuple", ["x", "y", "z"])` + where it is unclear if the tuple was intended to be unpacked into x, + y, and sample_weight or passed through as a single element to `x`. As + a result the data processing code will simply raise a ValueError if it + encounters a namedtuple. (Along with instructions to remedy the + issue.) + + Returns: + A `History` object. Its `History.history` attribute is + a record of training loss values and metrics values + at successive epochs, as well as validation loss values + and validation metrics values (if applicable). + + Raises: + RuntimeError: 1. If the model was never compiled or, + 2. If `model.fit` is wrapped in `tf.function`. + + ValueError: In case of mismatch between the provided input data + and what the model expects or when the input data is empty. + """ + base_layer.keras_api_gauge.get_cell("fit").set(True) + # Legacy graph support is contained in `training_v1.Model`. + version_utils.disallow_legacy_graph("Model", "fit") + self._assert_compile_was_called() + self._check_call_args("fit") + _disallow_inside_tf_function("fit") + + verbose = _get_verbosity(verbose, self.distribute_strategy) + + if validation_split and validation_data is None: + # Create the validation data using the training data. Only supported + # for `Tensor` and `NumPy` input. + ( + x, + y, + sample_weight, + ), validation_data = data_adapter.train_validation_split( + (x, y, sample_weight), validation_split=validation_split + ) + + if validation_data: + ( + val_x, + val_y, + val_sample_weight, + ) = data_adapter.unpack_x_y_sample_weight(validation_data) + + if self.distribute_strategy._should_use_with_coordinator: + self._cluster_coordinator = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self.distribute_strategy + ) + ) + + with self.distribute_strategy.scope(), training_utils.RespectCompiledTrainableState( # noqa: E501 + self + ): + # Creates a `tf.data.Dataset` and handles batch and epoch iteration. + data_handler = data_adapter.get_data_handler( + x=x, + y=y, + sample_weight=sample_weight, + batch_size=batch_size, + steps_per_epoch=steps_per_epoch, + initial_epoch=initial_epoch, + epochs=epochs, + shuffle=shuffle, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + model=self, + steps_per_execution=self._steps_per_execution, + ) + + # Container that configures and calls `tf.keras.Callback`s. + if not isinstance(callbacks, callbacks_module.CallbackList): + callbacks = callbacks_module.CallbackList( + callbacks, + add_history=True, + add_progbar=verbose != 0, + model=self, + verbose=verbose, + epochs=epochs, + steps=data_handler.inferred_steps, + ) + + self.stop_training = False + self.train_function = self.make_train_function() + self._train_counter.assign(0) + callbacks.on_train_begin() + training_logs = None + if self.autotune_steps_per_execution: + self._steps_per_execution_tuner.start() + # Handle fault-tolerance for multi-worker. + # TODO(omalleyt): Fix the ordering issues that mean this has to + # happen after `callbacks.on_train_begin`. + steps_per_epoch_inferred = ( + steps_per_epoch or data_handler.inferred_steps + ) + ( + data_handler._initial_epoch, + data_handler._initial_step, + ) = self._maybe_load_initial_counters_from_ckpt( + steps_per_epoch_inferred, initial_epoch + ) + logs = None + for epoch, iterator in data_handler.enumerate_epochs(): + self.reset_metrics() + callbacks.on_epoch_begin(epoch) + with data_handler.catch_stop_iteration(): + for step in data_handler.steps(): + with tf.profiler.experimental.Trace( + "train", + epoch_num=epoch, + step_num=step, + batch_size=batch_size, + _r=1, + ): + callbacks.on_train_batch_begin(step) + tmp_logs = self.train_function(iterator) + if data_handler.should_sync: + context.async_wait() + # No error, now safe to assign to logs. + logs = tmp_logs + end_step = step + data_handler.step_increment + callbacks.on_train_batch_end(end_step, logs) + if self.stop_training: + break + + logs = tf_utils.sync_to_numpy_or_python_type(logs) + if logs is None: + raise ValueError( + "Unexpected result of `train_function` " + "(Empty logs). This could be due to issues in input " + "pipeline that resulted in an empty dataset. " + "Otherwise, please use " + "`Model.compile(..., run_eagerly=True)`, or " + "`tf.config.run_functions_eagerly(True)` for more " + "information of where went wrong, or file a " + "issue/bug to `tf.keras`." + ) + # Override with model metrics instead of last step logs + logs = self._validate_and_get_metrics_result(logs) + epoch_logs = copy.copy(logs) + + # Run validation. + if validation_data and self._should_eval( + epoch, validation_freq + ): + if self._pss_evaluation_shards: + self._disallow_exact_eval_with_add_metrics() + # Create data_handler for evaluation and cache it. + if getattr(self, "_eval_data_handler", None) is None: + self._eval_data_handler = data_adapter.get_data_handler( + x=val_x, + y=val_y, + sample_weight=val_sample_weight, + batch_size=validation_batch_size or batch_size, + steps_per_epoch=validation_steps, + initial_epoch=0, + epochs=1, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + model=self, + steps_per_execution=self._steps_per_execution, + pss_evaluation_shards=self._pss_evaluation_shards, + ) + val_logs = self.evaluate( + x=val_x, + y=val_y, + sample_weight=val_sample_weight, + batch_size=validation_batch_size or batch_size, + steps=validation_steps, + callbacks=callbacks, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + return_dict=True, + _use_cached_eval_dataset=True, + ) + val_logs = { + "val_" + name: val for name, val in val_logs.items() + } + epoch_logs.update(val_logs) + + callbacks.on_epoch_end(epoch, epoch_logs) + training_logs = epoch_logs + if self.stop_training: + break + + if isinstance(self.optimizer, optimizer.Optimizer) and epochs > 0: + self.optimizer.finalize_variable_values( + self.trainable_variables + ) + + # If eval data_handler exists, delete it after all epochs are done. + if getattr(self, "_eval_data_handler", None) is not None: + del self._eval_data_handler + if self.autotune_steps_per_execution: + self._steps_per_execution_tuner.stop() + callbacks.on_train_end(logs=training_logs) + return self.history + + def test_step(self, data): + """The logic for one evaluation step. + + This method can be overridden to support custom evaluation logic. + This method is called by `Model.make_test_function`. + + This function should contain the mathematical logic for one step of + evaluation. + This typically includes the forward pass, loss calculation, and metrics + updates. + + Configuration details for *how* this logic is run (e.g. `tf.function` + and `tf.distribute.Strategy` settings), should be left to + `Model.make_test_function`, which can also be overridden. + + Args: + data: A nested structure of `Tensor`s. + + Returns: + A `dict` containing values that will be passed to + `tf.keras.callbacks.CallbackList.on_train_batch_end`. Typically, the + values of the `Model`'s metrics are returned. + """ + x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + + y_pred = self(x, training=False) + # Updates stateful loss metrics. + self.compute_loss(x, y, y_pred, sample_weight) + return self.compute_metrics(x, y, y_pred, sample_weight) + + def _make_test_function_exact(self): + if getattr(self, "_shard_test_function", None): + return self._shard_test_function + + def step_function(batch): + def run_step(data): + # TODO(b/272050910): Use sample_weight for weighted metrics. + x, y, sample_weight = data_adapter.unpack_x_y_sample_weight( + data + ) + y_pred = self(x, training=False) + return x, y, y_pred, sample_weight + + if self._jit_compile: + run_step = tf.function( + run_step, jit_compile=True, reduce_retracing=True + ) + + outputs = self.distribute_strategy.run(run_step, args=(batch,)) + outputs = reduce_per_replica( + outputs, + self.distribute_strategy, + reduction=self.distribute_reduction_method, + ) + return outputs + + def shard_test_function(dataset, total_shards, shard_idx): + # Copy loss and metric variables to the worker and work with them + # locally. This ensures each shard function is atomic: if a worker + # is preempted, the intermediate progress is discarded and that + # shard is retried. This in turn guarantees exactly-once visitation. + local_unweighted_metrics, local_weighted_metrics = [], [] + with tf_utils.with_metric_local_vars_scope(): + # TODO(jmullenbach): implement and use a clone for + # `MetricsContainer` and use its `update_state` method directly. + for metric in self.compiled_metrics.unweighted_metrics: + if metric is not None: + local_unweighted_metrics.append( + base_metric.clone_metric(metric) + ) + for metric in self.compiled_metrics.weighted_metrics: + if metric is not None: + local_weighted_metrics.append( + base_metric.clone_metric(metric) + ) + local_loss = compile_utils.LossesContainer.from_config( + self.compiled_loss.get_config() + ) + + dataset = input_ops.auto_shard_dataset( + dataset, total_shards, shard_idx + ) + iterator = iter(dataset) + with distribute_utils.cache_variable_reads(): + for batch in iterator: + x, y, y_pred, sample_weight = step_function(batch) + for weighted_metric in local_weighted_metrics: + weighted_metric.update_state(y, y_pred, sample_weight) + for unweighted_metric in local_unweighted_metrics: + unweighted_metric.update_state(y, y_pred) + local_loss(y, y_pred, sample_weight) + local_metrics = ( + local_unweighted_metrics + + local_weighted_metrics + + local_loss.metrics + ) + outputs = {metric.name: metric.weights for metric in local_metrics} + with tf.control_dependencies(_minimum_control_deps(outputs)): + self._test_counter.assign_add(1) + return outputs + + if not self.run_eagerly: + shard_test_function = tf.function( + shard_test_function, reduce_retracing=True + ) + + self._shard_test_function = ( + lambda *args: self._cluster_coordinator.schedule( + shard_test_function, + args=args, + ) + ) + return self._shard_test_function + + def make_test_function(self, force=False): + """Creates a function that executes one step of evaluation. + + This method can be overridden to support custom evaluation logic. + This method is called by `Model.evaluate` and `Model.test_on_batch`. + + Typically, this method directly controls `tf.function` and + `tf.distribute.Strategy` settings, and delegates the actual evaluation + logic to `Model.test_step`. + + This function is cached the first time `Model.evaluate` or + `Model.test_on_batch` is called. The cache is cleared whenever + `Model.compile` is called. You can skip the cache and generate again the + function with `force=True`. + + Args: + force: Whether to regenerate the test function and skip the cached + function if available. + + Returns: + Function. The function created by this method should accept a + `tf.data.Iterator`, and return a `dict` containing values that will + be passed to `tf.keras.Callbacks.on_test_batch_end`. + """ + if self.test_function is not None and not force: + return self.test_function + + def step_function(model, iterator): + """Runs a single evaluation step.""" + + def run_step(data): + outputs = model.test_step(data) + # Ensure counter is updated only if `test_step` succeeds. + with tf.control_dependencies(_minimum_control_deps(outputs)): + model._test_counter.assign_add(1) + return outputs + + if self.jit_compile: + run_step = tf.function( + run_step, jit_compile=True, reduce_retracing=True + ) + + data = next(iterator) + outputs = model.distribute_strategy.run(run_step, args=(data,)) + outputs = reduce_per_replica( + outputs, + self.distribute_strategy, + reduction=self.distribute_reduction_method, + ) + return outputs + + # Special case if steps_per_execution is one. + if ( + self._steps_per_execution is None + or self._steps_per_execution.numpy().item() == 1 + and not self.autotune_steps_per_execution + ): + + def test_function(iterator): + """Runs a test execution with a single step.""" + return step_function(self, iterator) + + if not self.run_eagerly: + test_function = tf.function( + test_function, reduce_retracing=True + ) + + if self._cluster_coordinator: + self.test_function = ( + lambda it: self._cluster_coordinator.schedule( + test_function, args=(it,) + ) + ) + else: + self.test_function = test_function + + # If we're using a coordinator, use the value of + # self._steps_per_execution at the time the function is + # called/scheduled, and not when it is actually executed. + elif self._cluster_coordinator: + + def test_function(iterator, steps_per_execution): + """Runs a test execution with multiple steps.""" + for _ in tf.range(steps_per_execution): + outputs = step_function(self, iterator) + return outputs + + if not self.run_eagerly: + test_function = tf.function( + test_function, reduce_retracing=True + ) + + self.test_function = lambda it: self._cluster_coordinator.schedule( + test_function, args=(it, self._steps_per_execution.value()) + ) + else: - Args: - force: Whether to regenerate the train function and skip the cached - function if available. + def test_function(iterator): + """Runs a test execution with multiple steps.""" + for _ in tf.range(self._steps_per_execution): + outputs = step_function(self, iterator) + return outputs - Returns: - Function. The function created by this method should accept a - `tf.data.Iterator`, and return a `dict` containing values that will - be passed to `tf.keras.Callbacks.on_train_batch_end`, such as - `{'loss': 0.2, 'accuracy': 0.7}`. - """ - if self.train_function is not None and not force: - return self.train_function - - def step_function(model, iterator): - """Runs a single training step.""" - - def run_step(data): - outputs = model.train_step(data) - # Ensure counter is updated only if `train_step` succeeds. - with tf.control_dependencies(_minimum_control_deps(outputs)): - model._train_counter.assign_add(1) # pylint: disable=protected-access - return outputs - - if self._jit_compile: - run_step = tf.function( - run_step, jit_compile=True, reduce_retracing=True) - data = next(iterator) - outputs = model.distribute_strategy.run(run_step, args=(data,)) - outputs = reduce_per_replica( - outputs, self.distribute_strategy, reduction='first') - return outputs - - # Special case if steps_per_execution is one. - if (self._steps_per_execution is None or - self._steps_per_execution.numpy().item() == 1): - - def train_function(iterator): - """Runs a training execution with a single step.""" - return step_function(self, iterator) - - if not self.run_eagerly: - train_function = tf.function( - train_function, reduce_retracing=True) - self.train_tf_function = train_function - - if self._cluster_coordinator: - self.train_function = lambda it: self._cluster_coordinator.schedule( # pylint: disable=g-long-lambda - train_function, args=(it,)) - else: - self.train_function = train_function - - # If we're using a coordinator, use the value of self._steps_per_execution - # at the time the function is called/scheduled, and not when it is actually - # executed. - elif self._cluster_coordinator: - - def train_function(iterator, steps_per_execution): - """Runs a training execution with multiple steps.""" - for _ in tf.range(steps_per_execution): - outputs = step_function(self, iterator) - return outputs - - if not self.run_eagerly: - train_function = tf.function( - train_function, reduce_retracing=True) - self.train_tf_function = train_function - - self.train_function = lambda it: self._cluster_coordinator.schedule( # pylint: disable=g-long-lambda - train_function, - args=(it, self._steps_per_execution.value())) - else: + if not self.run_eagerly: + test_function = tf.function( + test_function, reduce_retracing=True + ) + self.test_function = test_function - def train_function(iterator): - """Runs a training execution with multiple steps.""" - for _ in tf.range(self._steps_per_execution): - outputs = step_function(self, iterator) - return outputs - - if not self.run_eagerly: - train_function = tf.function( - train_function, reduce_retracing=True) - self.train_tf_function = train_function - self.train_function = train_function - - return self.train_function - - @traceback_utils.filter_traceback - def fit(self, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose='auto', - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_batch_size=None, - validation_freq=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - """Trains the model for a fixed number of epochs (iterations on a dataset). + return self.test_function - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - - A `tf.data` dataset. Should return a tuple - of either `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - - A generator or `keras.utils.Sequence` returning `(inputs, targets)` - or `(inputs, targets, sample_weights)`. - - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a - callable that takes a single argument of type - `tf.distribute.InputContext`, and returns a `tf.data.Dataset`. - `DatasetCreator` should be used when users prefer to specify the - per-replica batching and sharding logic for the `Dataset`. - See `tf.keras.utils.experimental.DatasetCreator` doc for more - information. - A more detailed description of unpacking behavior for iterator types - (Dataset, generator, Sequence) is given below. If these include - `sample_weights` as a third component, note that sample weighting - applies to the `weighted_metrics` argument but not the `metrics` - argument in `compile()`. If using - `tf.distribute.experimental.ParameterServerStrategy`, only - `DatasetCreator` type is supported for `x`. - y: Target data. Like the input data `x`, - it could be either Numpy array(s) or TensorFlow tensor(s). - It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset, generator, - or `keras.utils.Sequence` instance, `y` should - not be specified (since targets will be obtained from `x`). - batch_size: Integer or `None`. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - epochs: Integer. Number of epochs to train the model. - An epoch is an iteration over the entire `x` and `y` - data provided - (unless the `steps_per_epoch` flag is set to - something other than None). - Note that in conjunction with `initial_epoch`, - `epochs` is to be understood as "final epoch". - The model is not trained for a number of iterations - given by `epochs`, but merely until the epoch - of index `epochs` is reached. - verbose: 'auto', 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - 'auto' defaults to 1 for most cases, but 2 when used with - `ParameterServerStrategy`. Note that the progress bar is not - particularly useful when logged to a file, so verbose=2 is - recommended when not running interactively (eg, in a production - environment). - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger` - and `tf.keras.callbacks.History` callbacks are created automatically - and need not be passed into `model.fit`. - `tf.keras.callbacks.ProgbarLogger` is created or not based on - `verbose` argument to `model.fit`. - Callbacks with batch-level calls are currently unsupported with - `tf.distribute.experimental.ParameterServerStrategy`, and users are - advised to implement epoch-level calls instead with an appropriate - `steps_per_epoch` value. - validation_split: Float between 0 and 1. - Fraction of the training data to be used as validation data. - The model will set apart this fraction of the training data, - will not train on it, and will evaluate - the loss and any model metrics - on this data at the end of each epoch. - The validation data is selected from the last samples - in the `x` and `y` data provided, before shuffling. This argument is - not supported when `x` is a dataset, generator or - `keras.utils.Sequence` instance. - If both `validation_data` and `validation_split` are provided, - `validation_data` will override `validation_split`. - `validation_split` is not yet supported with - `tf.distribute.experimental.ParameterServerStrategy`. - validation_data: Data on which to evaluate - the loss and any model metrics at the end of each epoch. - The model will not be trained on this data. Thus, note the fact - that the validation loss of data provided using `validation_split` - or `validation_data` is not affected by regularization layers like - noise and dropout. - `validation_data` will override `validation_split`. - `validation_data` could be: - - A tuple `(x_val, y_val)` of Numpy arrays or tensors. - - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays. - - A `tf.data.Dataset`. - - A Python generator or `keras.utils.Sequence` returning - `(inputs, targets)` or `(inputs, targets, sample_weights)`. - `validation_data` is not yet supported with - `tf.distribute.experimental.ParameterServerStrategy`. - shuffle: Boolean (whether to shuffle the training data - before each epoch) or str (for 'batch'). This argument is ignored - when `x` is a generator or an object of tf.data.Dataset. - 'batch' is a special option for dealing - with the limitations of HDF5 data; it shuffles in batch-sized - chunks. Has no effect when `steps_per_epoch` is not `None`. - class_weight: Optional dictionary mapping class indices (integers) - to a weight (float) value, used for weighting the loss function - (during training only). - This can be useful to tell the model to - "pay more attention" to samples from - an under-represented class. - sample_weight: Optional Numpy array of weights for - the training samples, used for weighting the loss function - (during training only). You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), - or in the case of temporal data, - you can pass a 2D array with shape - `(samples, sequence_length)`, - to apply a different weight to every timestep of every sample. This - argument is not supported when `x` is a dataset, generator, or - `keras.utils.Sequence` instance, instead provide the sample_weights - as the third element of `x`. - Note that sample weighting does not apply to metrics specified - via the `metrics` argument in `compile()`. To apply sample weighting - to your metrics, you can specify them via the `weighted_metrics` in - `compile()` instead. - initial_epoch: Integer. - Epoch at which to start training - (useful for resuming a previous training run). - steps_per_epoch: Integer or `None`. - Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps_per_epoch' - is None, the epoch will run until the input dataset is exhausted. - When passing an infinitely repeating dataset, you must specify the - `steps_per_epoch` argument. If `steps_per_epoch=-1` the training - will run indefinitely with an infinitely repeating dataset. - This argument is not supported with array inputs. - When using `tf.distribute.experimental.ParameterServerStrategy`: - * `steps_per_epoch=None` is not supported. - validation_steps: Only relevant if `validation_data` is provided and - is a `tf.data` dataset. Total number of steps (batches of - samples) to draw before stopping when performing validation - at the end of every epoch. If 'validation_steps' is None, validation - will run until the `validation_data` dataset is exhausted. In the - case of an infinitely repeated dataset, it will run into an - infinite loop. If 'validation_steps' is specified and only part of - the dataset will be consumed, the evaluation will start from the - beginning of the dataset at each epoch. This ensures that the same - validation samples are used every time. - validation_batch_size: Integer or `None`. - Number of samples per validation batch. - If unspecified, will default to `batch_size`. - Do not specify the `validation_batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - validation_freq: Only relevant if validation data is provided. Integer - or `collections.abc.Container` instance (e.g. list, tuple, etc.). - If an integer, specifies how many training epochs to run before a - new validation run is performed, e.g. `validation_freq=2` runs - validation every 2 epochs. If a Container, specifies the epochs on - which to run validation, e.g. `validation_freq=[1, 2, 10]` runs - validation at the end of the 1st, 2nd, and 10th epochs. - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up - when using process-based threading. If unspecified, `workers` - will default to 1. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - - Unpacking behavior for iterator-like inputs: - A common pattern is to pass a tf.data.Dataset, generator, or - tf.keras.utils.Sequence to the `x` argument of fit, which will in fact - yield not only features (x) but optionally targets (y) and sample weights. - Keras requires that the output of such iterator-likes be unambiguous. The - iterator should return a tuple of length 1, 2, or 3, where the optional - second and third elements will be used for y and sample_weight - respectively. Any other type provided will be wrapped in a length one - tuple, effectively treating everything as 'x'. When yielding dicts, they - should still adhere to the top-level tuple structure. - e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate - features, targets, and weights from the keys of a single dict. - A notable unsupported data type is the namedtuple. The reason is that - it behaves like both an ordered datatype (tuple) and a mapping - datatype (dict). So given a namedtuple of the form: - `namedtuple("example_tuple", ["y", "x"])` - it is ambiguous whether to reverse the order of the elements when - interpreting the value. Even worse is a tuple of the form: - `namedtuple("other_tuple", ["x", "y", "z"])` - where it is unclear if the tuple was intended to be unpacked into x, y, - and sample_weight or passed through as a single element to `x`. As a - result the data processing code will simply raise a ValueError if it - encounters a namedtuple. (Along with instructions to remedy the issue.) + @traceback_utils.filter_traceback + def evaluate( + self, + x=None, + y=None, + batch_size=None, + verbose="auto", + sample_weight=None, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + return_dict=False, + **kwargs, + ): + """Returns the loss value & metrics values for the model in test mode. + + Computation is done in batches (see the `batch_size` arg.) + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset. Should return a tuple + of either `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + - A generator or `keras.utils.Sequence` returning `(inputs, + targets)` or `(inputs, targets, sample_weights)`. + A more detailed description of unpacking behavior for iterator + types (Dataset, generator, Sequence) is given in the `Unpacking + behavior for iterator-like inputs` section of `Model.fit`. + y: Target data. Like the input data `x`, it could be either Numpy + array(s) or TensorFlow tensor(s). It should be consistent with `x` + (you cannot have Numpy inputs and tensor targets, or inversely). + If `x` is a dataset, generator or `keras.utils.Sequence` instance, + `y` should not be specified (since targets will be obtained from + the iterator/dataset). + batch_size: Integer or `None`. Number of samples per batch of + computation. If unspecified, `batch_size` will default to 32. Do + not specify the `batch_size` if your data is in the form of a + dataset, generators, or `keras.utils.Sequence` instances (since + they generate batches). + verbose: `"auto"`, 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = single line. + `"auto"` becomes 1 for most cases, and to 2 when used with + `ParameterServerStrategy`. Note that the progress bar is not + particularly useful when logged to a file, so `verbose=2` is + recommended when not running interactively (e.g. in a production + environment). Defaults to 'auto'. + sample_weight: Optional Numpy array of weights for the test samples, + used for weighting the loss function. You can either pass a flat + (1D) Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), or in the case of + temporal data, you can pass a 2D array with shape `(samples, + sequence_length)`, to apply a different weight to every + timestep of every sample. This argument is not supported when + `x` is a dataset, instead pass sample weights as the third + element of `x`. + steps: Integer or `None`. Total number of steps (batches of samples) + before declaring the evaluation round finished. Ignored with the + default value of `None`. If x is a `tf.data` dataset and `steps` + is None, 'evaluate' will run until the dataset is exhausted. This + argument is not supported with array inputs. + callbacks: List of `keras.callbacks.Callback` instances. List of + callbacks to apply during evaluation. See + [callbacks](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks). + max_queue_size: Integer. Used for generator or + `keras.utils.Sequence` input only. Maximum size for the generator + queue. If unspecified, `max_queue_size` will default to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default to + 1. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-pickleable arguments to + the generator as they can't be passed easily to children + processes. + return_dict: If `True`, loss and metric results are returned as a + dict, with each key being the name of the metric. If `False`, they + are returned as a list. + **kwargs: Unused at this time. + + See the discussion of `Unpacking behavior for iterator-like inputs` for + `Model.fit`. + + Returns: + Scalar test loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: If `model.evaluate` is wrapped in a `tf.function`. + """ + base_layer.keras_api_gauge.get_cell("evaluate").set(True) + version_utils.disallow_legacy_graph("Model", "evaluate") + self._assert_compile_was_called() + self._check_call_args("evaluate") + self._check_sample_weight_warning(x, sample_weight) + _disallow_inside_tf_function("evaluate") + use_cached_eval_dataset = kwargs.pop("_use_cached_eval_dataset", False) + if kwargs: + raise TypeError(f"Invalid keyword arguments: {list(kwargs.keys())}") + + if self.distribute_strategy._should_use_with_coordinator: + self._cluster_coordinator = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self.distribute_strategy + ) + ) + + verbose = _get_verbosity(verbose, self.distribute_strategy) + if self._pss_evaluation_shards: + self._disallow_exact_eval_with_add_metrics() + with self.distribute_strategy.scope(): + # Use cached evaluation data only when it's called in `Model.fit` + if ( + use_cached_eval_dataset + and getattr(self, "_eval_data_handler", None) is not None + ): + data_handler = self._eval_data_handler + else: + # Creates a `tf.data.Dataset` and handles batch and epoch + # iteration. + data_handler = data_adapter.get_data_handler( + x=x, + y=y, + sample_weight=sample_weight, + batch_size=batch_size, + steps_per_epoch=steps, + initial_epoch=0, + epochs=1, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + model=self, + steps_per_execution=self._steps_per_execution, + pss_evaluation_shards=self._pss_evaluation_shards, + ) + + # Container that configures and calls `tf.keras.Callback`s. + if not isinstance(callbacks, callbacks_module.CallbackList): + callbacks = callbacks_module.CallbackList( + callbacks, + add_history=True, + add_progbar=verbose != 0, + model=self, + verbose=verbose, + epochs=1, + steps=data_handler.inferred_steps, + ) + + # Initialize to prevent errors if 0 epochs are evaluated. + logs = {} + + test_function_runner = self._get_test_function_runner(callbacks) + self._test_counter.assign(0) + callbacks.on_test_begin() + if self.autotune_steps_per_execution: + self._steps_per_execution_tuner.start() + for ( + _, + dataset_or_iterator, + ) in data_handler.enumerate_epochs(): # Single epoch. + self.reset_metrics() + with data_handler.catch_stop_iteration(): + for step in data_handler.steps(): + with tf.profiler.experimental.Trace( + "test", step_num=step, _r=1 + ): + callbacks.on_test_batch_begin(step) + logs = test_function_runner.run_step( + dataset_or_iterator, + data_handler, + step, + self._pss_evaluation_shards, + ) + + logs = tf_utils.sync_to_numpy_or_python_type(logs) + # Override with model metrics instead of last step logs + if self._pss_evaluation_shards: + logs = self._aggregate_exact_metrics(logs) + else: + logs = self._validate_and_get_metrics_result(logs) + if self.autotune_steps_per_execution: + self._steps_per_execution_tuner.stop() + callbacks.on_test_end(logs=logs) - Returns: - A `History` object. Its `History.history` attribute is - a record of training loss values and metrics values - at successive epochs, as well as validation loss values - and validation metrics values (if applicable). + if return_dict: + return logs + else: + return flatten_metrics_in_order(logs, self.metrics_names) + + def _disallow_exact_eval_with_add_metrics(self): + metrics_from_add_metric = [ + metric + for layer in self._flatten_layers() + for metric in layer._metrics + ] + compiled_metrics = self.compiled_metrics.metrics + if any( + [ + metric not in compiled_metrics + for metric in metrics_from_add_metric + ] + ): + raise ValueError( + "Detected that a metric was added to this model " + "via `Model.add_metric`. This is not currently " + "supported when using exact evaluation with " + "`tf.distribute.ParameterServerStrategy`." + ) + + def _infer_exact_eval_shards(self, pss_evaluation_shards): + if not self.distribute_strategy._should_use_with_coordinator: + return 0 + if pss_evaluation_shards == "auto": + # TODO(b/264265138) evaluate and improve this heuristic + return self.distribute_strategy._num_workers * 5 + return pss_evaluation_shards + + def _get_test_function_runner(self, callbacks): + if ( + self._pss_evaluation_shards + and self.distribute_strategy._should_use_with_coordinator + ): + self.test_function = self._make_test_function_exact() + test_function_runner = _ExactTestFunction( + self.test_function, callbacks + ) + else: + self.test_function = self.make_test_function() + test_function_runner = _TestFunction(self.test_function, callbacks) + return test_function_runner + + def predict_step(self, data): + """The logic for one inference step. + + This method can be overridden to support custom inference logic. + This method is called by `Model.make_predict_function`. + + This method should contain the mathematical logic for one step of + inference. This typically includes the forward pass. + + Configuration details for *how* this logic is run (e.g. `tf.function` + and `tf.distribute.Strategy` settings), should be left to + `Model.make_predict_function`, which can also be overridden. + + Args: + data: A nested structure of `Tensor`s. + + Returns: + The result of one inference step, typically the output of calling the + `Model` on data. + """ + x, _, _ = data_adapter.unpack_x_y_sample_weight(data) + return self(x, training=False) + + def make_predict_function(self, force=False): + """Creates a function that executes one step of inference. + + This method can be overridden to support custom inference logic. + This method is called by `Model.predict` and `Model.predict_on_batch`. + + Typically, this method directly controls `tf.function` and + `tf.distribute.Strategy` settings, and delegates the actual evaluation + logic to `Model.predict_step`. + + This function is cached the first time `Model.predict` or + `Model.predict_on_batch` is called. The cache is cleared whenever + `Model.compile` is called. You can skip the cache and generate again the + function with `force=True`. + + Args: + force: Whether to regenerate the predict function and skip the cached + function if available. + + Returns: + Function. The function created by this method should accept a + `tf.data.Iterator`, and return the outputs of the `Model`. + """ + if self.predict_function is not None and not force: + return self.predict_function + + def step_function(model, iterator): + """Runs a single evaluation step.""" + + def run_step(data): + outputs = model.predict_step(data) + # Ensure counter is updated only if `test_step` succeeds. + with tf.control_dependencies(_minimum_control_deps(outputs)): + model._predict_counter.assign_add(1) + return outputs + + if self.jit_compile: + run_step = tf.function( + run_step, jit_compile=True, reduce_retracing=True + ) + + data = next(iterator) + outputs = model.distribute_strategy.run(run_step, args=(data,)) + outputs = reduce_per_replica( + outputs, self.distribute_strategy, reduction="concat" + ) + return outputs + + # Special case if steps_per_execution is one. + if ( + self._steps_per_execution is None + or self._steps_per_execution.numpy().item() == 1 + and not self.autotune_steps_per_execution + ): + + def predict_function(iterator): + """Runs an evaluation execution with a single step.""" + return step_function(self, iterator) - Raises: - RuntimeError: 1. If the model was never compiled or, - 2. If `model.fit` is wrapped in `tf.function`. + else: - ValueError: In case of mismatch between the provided input data - and what the model expects or when the input data is empty. - """ - base_layer.keras_api_gauge.get_cell('fit').set(True) - # Legacy graph support is contained in `training_v1.Model`. - version_utils.disallow_legacy_graph('Model', 'fit') - self._assert_compile_was_called() - self._check_call_args('fit') - _disallow_inside_tf_function('fit') - - verbose = _get_verbosity(verbose, self.distribute_strategy) - - if validation_split and validation_data is None: - # Create the validation data using the training data. Only supported for - # `Tensor` and `NumPy` input. - (x, y, sample_weight), validation_data = ( - data_adapter.train_validation_split( - (x, y, sample_weight), validation_split=validation_split)) - - if validation_data: - val_x, val_y, val_sample_weight = ( - data_adapter.unpack_x_y_sample_weight(validation_data)) - - if self.distribute_strategy._should_use_with_coordinator: # pylint: disable=protected-access - self._cluster_coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator( - self.distribute_strategy) - - with self.distribute_strategy.scope(), \ - training_utils.RespectCompiledTrainableState(self): - # Creates a `tf.data.Dataset` and handles batch and epoch iteration. - data_handler = data_adapter.get_data_handler( - x=x, - y=y, - sample_weight=sample_weight, - batch_size=batch_size, - steps_per_epoch=steps_per_epoch, - initial_epoch=initial_epoch, - epochs=epochs, - shuffle=shuffle, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - model=self, - steps_per_execution=self._steps_per_execution) - - # Container that configures and calls `tf.keras.Callback`s. - if not isinstance(callbacks, callbacks_module.CallbackList): - callbacks = callbacks_module.CallbackList( - callbacks, - add_history=True, - add_progbar=verbose != 0, - model=self, - verbose=verbose, - epochs=epochs, - steps=data_handler.inferred_steps) - - self.stop_training = False - self.train_function = self.make_train_function() - self._train_counter.assign(0) - callbacks.on_train_begin() - training_logs = None - # Handle fault-tolerance for multi-worker. - # TODO(omalleyt): Fix the ordering issues that mean this has to - # happen after `callbacks.on_train_begin`. - data_handler._initial_epoch = ( # pylint: disable=protected-access - self._maybe_load_initial_epoch_from_ckpt(initial_epoch)) - logs = None - for epoch, iterator in data_handler.enumerate_epochs(): - self.reset_metrics() - callbacks.on_epoch_begin(epoch) - with data_handler.catch_stop_iteration(): - data_handler._initial_step = self._maybe_load_initial_step_from_ckpt() # pylint: disable=protected-access - for step in data_handler.steps(): - with tf.profiler.experimental.Trace( - 'train', - epoch_num=epoch, - step_num=step, + def predict_function(iterator): + """Runs an evaluation execution with multiple steps.""" + outputs = step_function(self, iterator) + for _ in tf.range(self._steps_per_execution - 1): + tf.autograph.experimental.set_loop_options( + shape_invariants=[ + ( + outputs, + tf.nest.map_structure( + lambda t: tf_utils.get_tensor_spec( + t, dynamic_batch=True + ).shape, + outputs, + ), + ) + ] + ) + step_outputs = step_function(self, iterator) + outputs = tf.nest.map_structure( + lambda t1, t2: concat([t1, t2]), outputs, step_outputs + ) + return outputs + + if not self.run_eagerly: + predict_function = tf.function( + predict_function, reduce_retracing=True + ) + self.predict_function = predict_function + + return self.predict_function + + @traceback_utils.filter_traceback + def predict( + self, + x, + batch_size=None, + verbose="auto", + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + """Generates output predictions for the input samples. + + Computation is done in batches. This method is designed for batch + processing of large numbers of inputs. It is not intended for use inside + of loops that iterate over your data and process small numbers of inputs + at a time. + + For small numbers of inputs that fit in one batch, + directly use `__call__()` for faster execution, e.g., + `model(x)`, or `model(x, training=False)` if you have layers such as + `tf.keras.layers.BatchNormalization` that behave differently during + inference. You may pair the individual model call with a `tf.function` + for additional performance inside your inner loop. + If you need access to numpy array values instead of tensors after your + model call, you can use `tensor.numpy()` to get the numpy array value of + an eager tensor. + + Also, note the fact that test loss is not affected by + regularization layers like noise and dropout. + + Note: See [this FAQ entry]( + https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call) + for more details about the difference between `Model` methods + `predict()` and `__call__()`. + + Args: + x: Input samples. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A `tf.data` dataset. + - A generator or `keras.utils.Sequence` instance. + A more detailed description of unpacking behavior for iterator + types (Dataset, generator, Sequence) is given in the `Unpacking + behavior for iterator-like inputs` section of `Model.fit`. + batch_size: Integer or `None`. + Number of samples per batch. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of dataset, generators, or `keras.utils.Sequence` instances + (since they generate batches). + verbose: `"auto"`, 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = single line. + `"auto"` becomes 1 for most cases, and to 2 when used with + `ParameterServerStrategy`. Note that the progress bar is not + particularly useful when logged to a file, so `verbose=2` is + recommended when not running interactively (e.g. in a production + environment). Defaults to 'auto'. + steps: Total number of steps (batches of samples) + before declaring the prediction round finished. + Ignored with the default value of `None`. If x is a `tf.data` + dataset and `steps` is None, `predict()` will + run until the input dataset is exhausted. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during prediction. + See [callbacks]( + https://www.tensorflow.org/api_docs/python/tf/keras/callbacks). + max_queue_size: Integer. Used for generator or + `keras.utils.Sequence` input only. Maximum size for the + generator queue. If unspecified, `max_queue_size` will default + to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default + to 1. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-pickleable arguments to + the generator as they can't be passed easily to children + processes. + + See the discussion of `Unpacking behavior for iterator-like inputs` for + `Model.fit`. Note that Model.predict uses the same interpretation rules + as `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for + all three methods. + + Returns: + Numpy array(s) of predictions. + + Raises: + RuntimeError: If `model.predict` is wrapped in a `tf.function`. + ValueError: In case of mismatch between the provided + input data and the model's expectations, + or in case a stateful model receives a number of samples + that is not a multiple of the batch size. + """ + base_layer.keras_api_gauge.get_cell("predict").set(True) + version_utils.disallow_legacy_graph("Model", "predict") + self._check_call_args("predict") + _disallow_inside_tf_function("predict") + + # TODO(yashkatariya): Cache model on the coordinator for faster + # prediction. If running under PSS, then swap it with OneDeviceStrategy + # so that execution will run on the coordinator. + original_pss_strategy = None + if self.distribute_strategy._should_use_with_coordinator: + original_pss_strategy = self.distribute_strategy + self._distribution_strategy = None + + # Cluster coordinator is set by `.fit()` and `.evaluate()` which is not + # needed in `.predict()` because all the predictions happen on the + # coordinator/locally. + if self._cluster_coordinator: + self._cluster_coordinator = None + + verbose = _get_verbosity(verbose, self.distribute_strategy) + outputs = None + with self.distribute_strategy.scope(): + # Creates a `tf.data.Dataset` and handles batch and epoch iteration. + dataset_types = (tf.compat.v1.data.Dataset, tf.data.Dataset) + if ( + self._in_multi_worker_mode() + or _is_tpu_multi_host(self.distribute_strategy) + ) and isinstance(x, dataset_types): + try: + options = tf.data.Options() + data_option = tf.data.experimental.AutoShardPolicy.DATA + options.experimental_distribute.auto_shard_policy = ( + data_option + ) + x = x.with_options(options) + except ValueError: + warnings.warn( + "Using Model.predict with MultiWorkerMirroredStrategy " + "or TPUStrategy and AutoShardPolicy.FILE might lead to " + "out-of-order result. Consider setting it to " + "AutoShardPolicy.DATA.", + stacklevel=2, + ) + + data_handler = data_adapter.get_data_handler( + x=x, batch_size=batch_size, - _r=1): - callbacks.on_train_batch_begin(step) - tmp_logs = self.train_function(iterator) - if data_handler.should_sync: - context.async_wait() - logs = tmp_logs # No error, now safe to assign to logs. - end_step = step + data_handler.step_increment - callbacks.on_train_batch_end(end_step, logs) - if self.stop_training: - break - - logs = tf_utils.sync_to_numpy_or_python_type(logs) - if logs is None: - raise ValueError('Unexpected result of `train_function` ' - '(Empty logs). Please use ' - '`Model.compile(..., run_eagerly=True)`, or ' - '`tf.config.run_functions_eagerly(True)` for more ' - 'information of where went wrong, or file a ' - 'issue/bug to `tf.keras`.') - epoch_logs = copy.copy(logs) - - # Run validation. - if validation_data and self._should_eval(epoch, validation_freq): - # Create data_handler for evaluation and cache it. - if getattr(self, '_eval_data_handler', None) is None: - self._eval_data_handler = data_adapter.get_data_handler( - x=val_x, - y=val_y, - sample_weight=val_sample_weight, - batch_size=validation_batch_size or batch_size, - steps_per_epoch=validation_steps, + steps_per_epoch=steps, initial_epoch=0, epochs=1, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, model=self, - steps_per_execution=self._steps_per_execution) - val_logs = self.evaluate( - x=val_x, - y=val_y, - sample_weight=val_sample_weight, - batch_size=validation_batch_size or batch_size, - steps=validation_steps, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - return_dict=True, - _use_cached_eval_dataset=True) - val_logs = {'val_' + name: val for name, val in val_logs.items()} - epoch_logs.update(val_logs) - - callbacks.on_epoch_end(epoch, epoch_logs) - training_logs = epoch_logs - if self.stop_training: - break - - if isinstance(self.optimizer, optimizer_experimental.Optimizer): - self.optimizer.finalize_variable_values(self.trainable_variables) - - # If eval data_handler exists, delete it after all epochs are done. - if getattr(self, '_eval_data_handler', None) is not None: - del self._eval_data_handler - callbacks.on_train_end(logs=training_logs) - return self.history - - def test_step(self, data): - """The logic for one evaluation step. - - This method can be overridden to support custom evaluation logic. - This method is called by `Model.make_test_function`. - - This function should contain the mathematical logic for one step of - evaluation. - This typically includes the forward pass, loss calculation, and metrics - updates. - - Configuration details for *how* this logic is run (e.g. `tf.function` and - `tf.distribute.Strategy` settings), should be left to - `Model.make_test_function`, which can also be overridden. - - Args: - data: A nested structure of `Tensor`s. - - Returns: - A `dict` containing values that will be passed to - `tf.keras.callbacks.CallbackList.on_train_batch_end`. Typically, the - values of the `Model`'s metrics are returned. - """ - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) - - y_pred = self(x, training=False) - # Updates stateful loss metrics. - self.compute_loss(x, y, y_pred, sample_weight) - return self.compute_metrics(x, y, y_pred, sample_weight) - - def make_test_function(self, force=False): - """Creates a function that executes one step of evaluation. - - This method can be overridden to support custom evaluation logic. - This method is called by `Model.evaluate` and `Model.test_on_batch`. - - Typically, this method directly controls `tf.function` and - `tf.distribute.Strategy` settings, and delegates the actual evaluation - logic to `Model.test_step`. - - This function is cached the first time `Model.evaluate` or - `Model.test_on_batch` is called. The cache is cleared whenever - `Model.compile` is called. You can skip the cache and generate again the - function with `force=True`. - - Args: - force: Whether to regenerate the test function and skip the cached - function if available. - - Returns: - Function. The function created by this method should accept a - `tf.data.Iterator`, and return a `dict` containing values that will - be passed to `tf.keras.Callbacks.on_test_batch_end`. - """ - if self.test_function is not None and not force: - return self.test_function - - def step_function(model, iterator): - """Runs a single evaluation step.""" - - def run_step(data): - outputs = model.test_step(data) - # Ensure counter is updated only if `test_step` succeeds. - with tf.control_dependencies(_minimum_control_deps(outputs)): - model._test_counter.assign_add(1) # pylint: disable=protected-access - return outputs - - if self._jit_compile: - run_step = tf.function( - run_step, jit_compile=True, reduce_retracing=True) - - data = next(iterator) - outputs = model.distribute_strategy.run(run_step, args=(data,)) - outputs = reduce_per_replica( - outputs, self.distribute_strategy, reduction='first') - return outputs - - # Special case if steps_per_execution is one. - if (self._steps_per_execution is None or - self._steps_per_execution.numpy().item() == 1): - - def test_function(iterator): - """Runs a test execution with a single step.""" - return step_function(self, iterator) - - if not self.run_eagerly: - test_function = tf.function( - test_function, reduce_retracing=True) - - if self._cluster_coordinator: - self.test_function = lambda it: self._cluster_coordinator.schedule( # pylint: disable=g-long-lambda - test_function, args=(it,)) - else: - self.test_function = test_function - - # If we're using a coordinator, use the value of self._steps_per_execution - # at the time the function is called/scheduled, and not when it is actually - # executed. - elif self._cluster_coordinator: - - def test_function(iterator, steps_per_execution): - """Runs a test execution with multiple steps.""" - for _ in tf.range(steps_per_execution): - outputs = step_function(self, iterator) - return outputs - - if not self.run_eagerly: - test_function = tf.function( - test_function, reduce_retracing=True) - - self.test_function = lambda it: self._cluster_coordinator.schedule( # pylint: disable=g-long-lambda - test_function, - args=(it, self._steps_per_execution.value())) - else: + steps_per_execution=self._steps_per_execution, + ) + + # Container that configures and calls `tf.keras.Callback`s. + if not isinstance(callbacks, callbacks_module.CallbackList): + callbacks = callbacks_module.CallbackList( + callbacks, + add_history=True, + add_progbar=verbose != 0, + model=self, + verbose=verbose, + epochs=1, + steps=data_handler.inferred_steps, + ) + + self.predict_function = self.make_predict_function() + self._predict_counter.assign(0) + callbacks.on_predict_begin() + if self.autotune_steps_per_execution: + self._steps_per_execution_tuner.start() + batch_outputs = None + for _, iterator in data_handler.enumerate_epochs(): # Single epoch. + with data_handler.catch_stop_iteration(): + for step in data_handler.steps(): + callbacks.on_predict_batch_begin(step) + tmp_batch_outputs = self.predict_function(iterator) + if data_handler.should_sync: + context.async_wait() + batch_outputs = ( + tmp_batch_outputs # No error, now safe to assign. + ) + if outputs is None: + outputs = tf.nest.map_structure( + lambda batch_output: [batch_output], + batch_outputs, + ) + else: + tf.__internal__.nest.map_structure_up_to( + batch_outputs, + lambda output, batch_output: output.append( + batch_output + ), + outputs, + batch_outputs, + ) + end_step = step + data_handler.step_increment + callbacks.on_predict_batch_end( + end_step, {"outputs": batch_outputs} + ) + if batch_outputs is None: + raise ValueError( + "Unexpected result of `predict_function` " + "(Empty batch_outputs). Please use " + "`Model.compile(..., run_eagerly=True)`, or " + "`tf.config.run_functions_eagerly(True)` for more " + "information of where went wrong, or file a " + "issue/bug to `tf.keras`." + ) + if self.autotune_steps_per_execution: + self._steps_per_execution_tuner.stop() + callbacks.on_predict_end() + all_outputs = tf.__internal__.nest.map_structure_up_to( + batch_outputs, potentially_ragged_concat, outputs + ) + + # If originally PSS strategy was used, then replace it back since + # predict is running under `OneDeviceStrategy` after the swap and once + # its done we need to replace it back to PSS again. + if original_pss_strategy is not None: + self._distribution_strategy = original_pss_strategy + + return tf_utils.sync_to_numpy_or_python_type(all_outputs) + + def reset_metrics(self): + """Resets the state of all the metrics in the model. + + Examples: + + >>> inputs = tf.keras.layers.Input(shape=(3,)) + >>> outputs = tf.keras.layers.Dense(2)(inputs) + >>> model = tf.keras.models.Model(inputs=inputs, outputs=outputs) + >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) + + >>> x = np.random.random((2, 3)) + >>> y = np.random.randint(0, 2, (2, 2)) + >>> _ = model.fit(x, y, verbose=0) + >>> assert all(float(m.result()) for m in model.metrics) + + >>> model.reset_metrics() + >>> assert all(float(m.result()) == 0 for m in model.metrics) + + """ + for m in self.metrics: + m.reset_state() + + def train_on_batch( + self, + x, + y=None, + sample_weight=None, + class_weight=None, + reset_metrics=True, + return_dict=False, + ): + """Runs a single gradient update on a single batch of data. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + y: Target data. Like the input data `x`, it could be either Numpy + array(s) or TensorFlow tensor(s). + sample_weight: Optional array of the same length as x, containing + weights to apply to the model's loss for each sample. In the case + of temporal data, you can pass a 2D array with shape (samples, + sequence_length), to apply a different weight to every timestep of + every sample. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) to apply to the model's loss for the samples + from this class during training. This can be useful to tell the + model to "pay more attention" to samples from an under-represented + class. When `class_weight` is specified and targets have a rank of + 2 or greater, either `y` must be one-hot encoded, or an explicit + final dimension of `1` must be included for sparse class labels. + reset_metrics: If `True`, the metrics returned will be only for this + batch. If `False`, the metrics will be statefully accumulated + across batches. + return_dict: If `True`, loss and metric results are returned as a + dict, with each key being the name of the metric. If `False`, they + are returned as a list. + + Returns: + Scalar training loss + (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: If `model.train_on_batch` is wrapped in a `tf.function`. + """ + self._assert_compile_was_called() + self._check_call_args("train_on_batch") + _disallow_inside_tf_function("train_on_batch") + if reset_metrics: + self.reset_metrics() + with self.distribute_strategy.scope(), training_utils.RespectCompiledTrainableState( # noqa: E501 + self + ): + iterator = data_adapter.single_batch_iterator( + self.distribute_strategy, x, y, sample_weight, class_weight + ) + self.train_function = self.make_train_function() + logs = self.train_function(iterator) - def test_function(iterator): - """Runs a test execution with multiple steps.""" - for _ in tf.range(self._steps_per_execution): - outputs = step_function(self, iterator) - return outputs - - if not self.run_eagerly: - test_function = tf.function( - test_function, reduce_retracing=True) - self.test_function = test_function - - return self.test_function - - @traceback_utils.filter_traceback - def evaluate(self, - x=None, - y=None, - batch_size=None, - verbose='auto', - sample_weight=None, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - return_dict=False, - **kwargs): - """Returns the loss value & metrics values for the model in test mode. - - Computation is done in batches (see the `batch_size` arg.) + logs = tf_utils.sync_to_numpy_or_python_type(logs) + if return_dict: + return logs + else: + return flatten_metrics_in_order(logs, self.metrics_names) - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - - A `tf.data` dataset. Should return a tuple - of either `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - - A generator or `keras.utils.Sequence` returning `(inputs, targets)` - or `(inputs, targets, sample_weights)`. - A more detailed description of unpacking behavior for iterator types - (Dataset, generator, Sequence) is given in the `Unpacking behavior - for iterator-like inputs` section of `Model.fit`. - y: Target data. Like the input data `x`, it could be either Numpy - array(s) or TensorFlow tensor(s). It should be consistent with `x` - (you cannot have Numpy inputs and tensor targets, or inversely). If - `x` is a dataset, generator or `keras.utils.Sequence` instance, `y` - should not be specified (since targets will be obtained from the - iterator/dataset). - batch_size: Integer or `None`. Number of samples per batch of - computation. If unspecified, `batch_size` will default to 32. Do not - specify the `batch_size` if your data is in the form of a dataset, - generators, or `keras.utils.Sequence` instances (since they generate - batches). - verbose: `"auto"`, 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = single line. - `"auto"` defaults to 1 for most cases, and to 2 when used with - `ParameterServerStrategy`. Note that the progress bar is not - particularly useful when logged to a file, so `verbose=2` is - recommended when not running interactively (e.g. in a production - environment). - sample_weight: Optional Numpy array of weights for the test samples, - used for weighting the loss function. You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), or in the case of - temporal data, you can pass a 2D array with shape `(samples, - sequence_length)`, to apply a different weight to every timestep - of every sample. This argument is not supported when `x` is a - dataset, instead pass sample weights as the third element of `x`. - steps: Integer or `None`. Total number of steps (batches of samples) - before declaring the evaluation round finished. Ignored with the - default value of `None`. If x is a `tf.data` dataset and `steps` is - None, 'evaluate' will run until the dataset is exhausted. This - argument is not supported with array inputs. - callbacks: List of `keras.callbacks.Callback` instances. List of - callbacks to apply during evaluation. See - [callbacks](/api_docs/python/tf/keras/callbacks). - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. If unspecified, - `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up when using process-based - threading. If unspecified, `workers` will default to 1. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to the - generator as they can't be passed easily to children processes. - return_dict: If `True`, loss and metric results are returned as a dict, - with each key being the name of the metric. If `False`, they are - returned as a list. - **kwargs: Unused at this time. - - See the discussion of `Unpacking behavior for iterator-like inputs` for - `Model.fit`. + def test_on_batch( + self, + x, + y=None, + sample_weight=None, + reset_metrics=True, + return_dict=False, + ): + """Test the model on a single batch of samples. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays (in case the + model has multiple inputs). + - A TensorFlow tensor, or a list of tensors (in case the model has + multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + y: Target data. Like the input data `x`, it could be either Numpy + array(s) or TensorFlow tensor(s). It should be consistent with `x` + (you cannot have Numpy inputs and tensor targets, or inversely). + sample_weight: Optional array of the same length as x, containing + weights to apply to the model's loss for each sample. In the case + of temporal data, you can pass a 2D array with shape (samples, + sequence_length), to apply a different weight to every timestep of + every sample. + reset_metrics: If `True`, the metrics returned will be only for this + batch. If `False`, the metrics will be statefully accumulated + across batches. + return_dict: If `True`, loss and metric results are returned as a + dict, with each key being the name of the metric. If `False`, they + are returned as a list. + + Returns: + Scalar test loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: If `model.test_on_batch` is wrapped in a + `tf.function`. + """ + self._assert_compile_was_called() + self._check_call_args("test_on_batch") + _disallow_inside_tf_function("test_on_batch") + if reset_metrics: + self.reset_metrics() + with self.distribute_strategy.scope(): + iterator = data_adapter.single_batch_iterator( + self.distribute_strategy, x, y, sample_weight + ) + self.test_function = self.make_test_function() + logs = self.test_function(iterator) - Returns: - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. + logs = tf_utils.sync_to_numpy_or_python_type(logs) + if return_dict: + return logs + else: + return flatten_metrics_in_order(logs, self.metrics_names) + + def predict_on_batch(self, x): + """Returns predictions for a single batch of samples. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays (in case the + model has multiple inputs). + - A TensorFlow tensor, or a list of tensors (in case the model has + multiple inputs). + + Returns: + Numpy array(s) of predictions. + + Raises: + RuntimeError: If `model.predict_on_batch` is wrapped in a + `tf.function`. + """ + self._check_call_args("predict_on_batch") + _disallow_inside_tf_function("predict_on_batch") + with self.distribute_strategy.scope(): + iterator = data_adapter.single_batch_iterator( + self.distribute_strategy, x + ) + self.predict_function = self.make_predict_function() + outputs = self.predict_function(iterator) + return tf_utils.sync_to_numpy_or_python_type(outputs) + + @doc_controls.do_not_generate_docs + def fit_generator( + self, + generator, + steps_per_epoch=None, + epochs=1, + verbose=1, + callbacks=None, + validation_data=None, + validation_steps=None, + validation_freq=1, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + shuffle=True, + initial_epoch=0, + ): + """Fits the model on data yielded batch-by-batch by a Python generator. + + DEPRECATED: + `Model.fit` now supports generators, so there is no longer any need to + use this endpoint. + """ + warnings.warn( + "`Model.fit_generator` is deprecated and " + "will be removed in a future version. " + "Please use `Model.fit`, which supports generators.", + stacklevel=2, + ) + return self.fit( + generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + validation_freq=validation_freq, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch, + ) - Raises: - RuntimeError: If `model.evaluate` is wrapped in a `tf.function`. - """ - base_layer.keras_api_gauge.get_cell('evaluate').set(True) - version_utils.disallow_legacy_graph('Model', 'evaluate') - self._assert_compile_was_called() - self._check_call_args('evaluate') - self._check_sample_weight_warning(x, sample_weight) - _disallow_inside_tf_function('evaluate') - use_cached_eval_dataset = kwargs.pop('_use_cached_eval_dataset', False) - if kwargs: - raise TypeError(f'Invalid keyword arguments: {list(kwargs.keys())}') - - if self.distribute_strategy._should_use_with_coordinator: # pylint: disable=protected-access - self._cluster_coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator( - self.distribute_strategy) - - verbose = _get_verbosity(verbose, self.distribute_strategy) - with self.distribute_strategy.scope(): - # Use cached evaluation data only when it's called in `Model.fit` - if (use_cached_eval_dataset - and getattr(self, '_eval_data_handler', None) is not None): - data_handler = self._eval_data_handler - else: - # Creates a `tf.data.Dataset` and handles batch and epoch iteration. - data_handler = data_adapter.get_data_handler( - x=x, - y=y, - sample_weight=sample_weight, - batch_size=batch_size, - steps_per_epoch=steps, - initial_epoch=0, - epochs=1, + @doc_controls.do_not_generate_docs + def evaluate_generator( + self, + generator, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0, + ): + """Evaluates the model on a data generator. + + DEPRECATED: + `Model.evaluate` now supports generators, so there is no longer any + need to use this endpoint. + """ + warnings.warn( + "`Model.evaluate_generator` is deprecated and " + "will be removed in a future version. " + "Please use `Model.evaluate`, which supports generators.", + stacklevel=2, + ) + self._check_call_args("evaluate_generator") + + return self.evaluate( + generator, + steps=steps, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, - model=self, - steps_per_execution=self._steps_per_execution) - - # Container that configures and calls `tf.keras.Callback`s. - if not isinstance(callbacks, callbacks_module.CallbackList): - callbacks = callbacks_module.CallbackList( - callbacks, - add_history=True, - add_progbar=verbose != 0, - model=self, verbose=verbose, - epochs=1, - steps=data_handler.inferred_steps) - - logs = {} - self.test_function = self.make_test_function() - self._test_counter.assign(0) - callbacks.on_test_begin() - for _, iterator in data_handler.enumerate_epochs(): # Single epoch. - self.reset_metrics() - with data_handler.catch_stop_iteration(): - for step in data_handler.steps(): - with tf.profiler.experimental.Trace('test', step_num=step, _r=1): - callbacks.on_test_batch_begin(step) - tmp_logs = self.test_function(iterator) - if data_handler.should_sync: - context.async_wait() - logs = tmp_logs # No error, now safe to assign to logs. - end_step = step + data_handler.step_increment - callbacks.on_test_batch_end(end_step, logs) - logs = tf_utils.sync_to_numpy_or_python_type(logs) - callbacks.on_test_end(logs=logs) - - if return_dict: - return logs - else: - return flatten_metrics_in_order(logs, self.metrics_names) - - def predict_step(self, data): - """The logic for one inference step. - - This method can be overridden to support custom inference logic. - This method is called by `Model.make_predict_function`. - - This method should contain the mathematical logic for one step of inference. - This typically includes the forward pass. + callbacks=callbacks, + ) - Configuration details for *how* this logic is run (e.g. `tf.function` and - `tf.distribute.Strategy` settings), should be left to - `Model.make_predict_function`, which can also be overridden. - - Args: - data: A nested structure of `Tensor`s. - - Returns: - The result of one inference step, typically the output of calling the - `Model` on data. - """ - x, _, _ = data_adapter.unpack_x_y_sample_weight(data) - return self(x, training=False) - - def make_predict_function(self, force=False): - """Creates a function that executes one step of inference. - - This method can be overridden to support custom inference logic. - This method is called by `Model.predict` and `Model.predict_on_batch`. - - Typically, this method directly controls `tf.function` and - `tf.distribute.Strategy` settings, and delegates the actual evaluation - logic to `Model.predict_step`. - - This function is cached the first time `Model.predict` or - `Model.predict_on_batch` is called. The cache is cleared whenever - `Model.compile` is called. You can skip the cache and generate again the - function with `force=True`. - - Args: - force: Whether to regenerate the predict function and skip the cached - function if available. - - Returns: - Function. The function created by this method should accept a - `tf.data.Iterator`, and return the outputs of the `Model`. - """ - if self.predict_function is not None and not force: - return self.predict_function - - def step_function(model, iterator): - """Runs a single evaluation step.""" - - def run_step(data): - outputs = model.predict_step(data) - # Ensure counter is updated only if `test_step` succeeds. - with tf.control_dependencies(_minimum_control_deps(outputs)): - model._predict_counter.assign_add(1) # pylint: disable=protected-access - return outputs - - if self._jit_compile: - run_step = tf.function( - run_step, jit_compile=True, reduce_retracing=True) - - data = next(iterator) - outputs = model.distribute_strategy.run(run_step, args=(data,)) - outputs = reduce_per_replica( - outputs, self.distribute_strategy, reduction='concat') - return outputs - - # Special case if steps_per_execution is one. - if (self._steps_per_execution is None or - self._steps_per_execution.numpy().item() == 1): - - def predict_function(iterator): - """Runs an evaluation execution with a single step.""" - return step_function(self, iterator) - - else: - - def predict_function(iterator): - """Runs an evaluation execution with multiple steps.""" - outputs = step_function(self, iterator) - for _ in tf.range(self._steps_per_execution - 1): - tf.autograph.experimental.set_loop_options(shape_invariants=[( - outputs, - tf.nest.map_structure( - lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=True). - shape, outputs))]) - step_outputs = step_function(self, iterator) - outputs = tf.nest.map_structure(lambda t1, t2: concat([t1, t2]), - outputs, step_outputs) - return outputs - - if not self.run_eagerly: - predict_function = tf.function( - predict_function, reduce_retracing=True) - self.predict_function = predict_function - - return self.predict_function - - @traceback_utils.filter_traceback - def predict(self, - x, - batch_size=None, - verbose='auto', - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - """Generates output predictions for the input samples. - - Computation is done in batches. This method is designed for batch processing - of large numbers of inputs. It is not intended for use inside of loops - that iterate over your data and process small numbers of inputs at a time. - - For small numbers of inputs that fit in one batch, - directly use `__call__()` for faster execution, e.g., - `model(x)`, or `model(x, training=False)` if you have layers such as - `tf.keras.layers.BatchNormalization` that behave differently during - inference. You may pair the individual model call with a `tf.function` - for additional performance inside your inner loop. - If you need access to numpy array values instead of tensors after your - model call, you can use `tensor.numpy()` to get the numpy array value of - an eager tensor. - - Also, note the fact that test loss is not affected by - regularization layers like noise and dropout. - - Note: See [this FAQ entry]( - https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call) - for more details about the difference between `Model` methods `predict()` - and `__call__()`. - - Args: - x: Input samples. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A `tf.data` dataset. - - A generator or `keras.utils.Sequence` instance. - A more detailed description of unpacking behavior for iterator types - (Dataset, generator, Sequence) is given in the `Unpacking behavior - for iterator-like inputs` section of `Model.fit`. - batch_size: Integer or `None`. - Number of samples per batch. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of dataset, generators, or `keras.utils.Sequence` instances - (since they generate batches). - verbose: `"auto"`, 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = single line. - `"auto"` defaults to 1 for most cases, and to 2 when used with - `ParameterServerStrategy`. Note that the progress bar is not - particularly useful when logged to a file, so `verbose=2` is - recommended when not running interactively (e.g. in a production - environment). - steps: Total number of steps (batches of samples) - before declaring the prediction round finished. - Ignored with the default value of `None`. If x is a `tf.data` - dataset and `steps` is None, `predict()` will - run until the input dataset is exhausted. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during prediction. - See [callbacks](/api_docs/python/tf/keras/callbacks). - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default - to 1. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - - See the discussion of `Unpacking behavior for iterator-like inputs` for - `Model.fit`. Note that Model.predict uses the same interpretation rules as - `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all - three methods. - - Returns: - Numpy array(s) of predictions. - - Raises: - RuntimeError: If `model.predict` is wrapped in a `tf.function`. - ValueError: In case of mismatch between the provided - input data and the model's expectations, - or in case a stateful model receives a number of samples - that is not a multiple of the batch size. - """ - base_layer.keras_api_gauge.get_cell('predict').set(True) - version_utils.disallow_legacy_graph('Model', 'predict') - self._check_call_args('predict') - _disallow_inside_tf_function('predict') - - # TODO(yashkatariya): Cache model on the coordinator for faster prediction. - # If running under PSS, then swap it with OneDeviceStrategy so that - # execution will run on the coordinator. - original_pss_strategy = None - if self.distribute_strategy._should_use_with_coordinator: # pylint: disable=protected-access - original_pss_strategy = self.distribute_strategy - self._distribution_strategy = None - - # Cluster coordinator is set by `.fit()` and `.evaluate()` which is not - # needed in `.predict()` because all the predictions happen on the - # coordinator/locally. - if self._cluster_coordinator: - self._cluster_coordinator = None - - verbose = _get_verbosity(verbose, self.distribute_strategy) - outputs = None - with self.distribute_strategy.scope(): - # Creates a `tf.data.Dataset` and handles batch and epoch iteration. - dataset_types = (tf.compat.v1.data.Dataset, tf.data.Dataset) - if (self._in_multi_worker_mode() or _is_tpu_multi_host( - self.distribute_strategy)) and isinstance(x, dataset_types): - try: - options = tf.data.Options() - data_option = tf.data.experimental.AutoShardPolicy.DATA - options.experimental_distribute.auto_shard_policy = data_option - x = x.with_options(options) - except ValueError: - warnings.warn( - 'Using Model.predict with MultiWorkerMirroredStrategy or ' - 'TPUStrategy and AutoShardPolicy.FILE might lead to out-of-order ' - 'result. Consider setting it to AutoShardPolicy.DATA.', - stacklevel=2) - - data_handler = data_adapter.get_data_handler( - x=x, - batch_size=batch_size, - steps_per_epoch=steps, - initial_epoch=0, - epochs=1, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - model=self, - steps_per_execution=self._steps_per_execution) - - # Container that configures and calls `tf.keras.Callback`s. - if not isinstance(callbacks, callbacks_module.CallbackList): - callbacks = callbacks_module.CallbackList( - callbacks, - add_history=True, - add_progbar=verbose != 0, - model=self, + @doc_controls.do_not_generate_docs + def predict_generator( + self, + generator, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0, + ): + """Generates predictions for the input samples from a data generator. + + DEPRECATED: + `Model.predict` now supports generators, so there is no longer any + need to use this endpoint. + """ + warnings.warn( + "`Model.predict_generator` is deprecated and " + "will be removed in a future version. " + "Please use `Model.predict`, which supports generators.", + stacklevel=2, + ) + return self.predict( + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, verbose=verbose, - epochs=1, - steps=data_handler.inferred_steps) - - self.predict_function = self.make_predict_function() - self._predict_counter.assign(0) - callbacks.on_predict_begin() - batch_outputs = None - for _, iterator in data_handler.enumerate_epochs(): # Single epoch. - with data_handler.catch_stop_iteration(): - for step in data_handler.steps(): - callbacks.on_predict_batch_begin(step) - tmp_batch_outputs = self.predict_function(iterator) - if data_handler.should_sync: - context.async_wait() - batch_outputs = tmp_batch_outputs # No error, now safe to assign. - if outputs is None: - outputs = tf.nest.map_structure(lambda batch_output: [batch_output], - batch_outputs) - else: - tf.__internal__.nest.map_structure_up_to( - batch_outputs, - lambda output, batch_output: output.append(batch_output), - outputs, batch_outputs) - end_step = step + data_handler.step_increment - callbacks.on_predict_batch_end(end_step, {'outputs': batch_outputs}) - if batch_outputs is None: - raise ValueError('Unexpected result of `predict_function` ' - '(Empty batch_outputs). Please use ' - '`Model.compile(..., run_eagerly=True)`, or ' - '`tf.config.run_functions_eagerly(True)` for more ' - 'information of where went wrong, or file a ' - 'issue/bug to `tf.keras`.') - callbacks.on_predict_end() - all_outputs = tf.__internal__.nest.map_structure_up_to( - batch_outputs, potentially_ragged_concat, outputs) - - # If originally PSS strategy was used, then replace it back since predict - # is running under `OneDeviceStrategy` after the swap and once its done - # we need to replace it back to PSS again. - if original_pss_strategy is not None: - self._distribution_strategy = original_pss_strategy - - return tf_utils.sync_to_numpy_or_python_type(all_outputs) - - def reset_metrics(self): - """Resets the state of all the metrics in the model. - - Examples: - - >>> inputs = tf.keras.layers.Input(shape=(3,)) - >>> outputs = tf.keras.layers.Dense(2)(inputs) - >>> model = tf.keras.models.Model(inputs=inputs, outputs=outputs) - >>> model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) - - >>> x = np.random.random((2, 3)) - >>> y = np.random.randint(0, 2, (2, 2)) - >>> _ = model.fit(x, y, verbose=0) - >>> assert all(float(m.result()) for m in model.metrics) - - >>> model.reset_metrics() - >>> assert all(float(m.result()) == 0 for m in model.metrics) - - """ - for m in self.metrics: - m.reset_state() - - def train_on_batch(self, - x, - y=None, - sample_weight=None, - class_weight=None, - reset_metrics=True, - return_dict=False): - """Runs a single gradient update on a single batch of data. - - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - y: Target data. Like the input data `x`, it could be either Numpy - array(s) or TensorFlow tensor(s). - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. In the case of - temporal data, you can pass a 2D array with shape (samples, - sequence_length), to apply a different weight to every timestep of - every sample. - class_weight: Optional dictionary mapping class indices (integers) to a - weight (float) to apply to the model's loss for the samples from this - class during training. This can be useful to tell the model to "pay - more attention" to samples from an under-represented class. - reset_metrics: If `True`, the metrics returned will be only for this - batch. If `False`, the metrics will be statefully accumulated across - batches. - return_dict: If `True`, loss and metric results are returned as a dict, - with each key being the name of the metric. If `False`, they are - returned as a list. + callbacks=callbacks, + ) + + ###################################################################### + # Functions below are not training related. They are for model weights + # tracking, save/load, serialization, etc. + ###################################################################### + + @property + def trainable_weights(self): + self._assert_weights_created() + if not self._trainable: + return [] + trainable_variables = [] + for trackable_obj in self._self_tracked_trackables: + trainable_variables += trackable_obj.trainable_variables + trainable_variables += self._trainable_weights + return self._dedup_weights(trainable_variables) + + @property + def non_trainable_weights(self): + self._assert_weights_created() + non_trainable_variables = [] + for trackable_obj in self._self_tracked_trackables: + non_trainable_variables += trackable_obj.non_trainable_variables + + if not self._trainable: + # Return order is all trainable vars, then all non-trainable vars. + trainable_variables = [] + for trackable_obj in self._self_tracked_trackables: + trainable_variables += trackable_obj.trainable_variables + + non_trainable_variables = ( + trainable_variables + + self._trainable_weights + + non_trainable_variables + + self._non_trainable_weights + ) + else: + non_trainable_variables = ( + non_trainable_variables + self._non_trainable_weights + ) + + return self._dedup_weights(non_trainable_variables) + + def get_weights(self): + """Retrieves the weights of the model. + + Returns: + A flat list of Numpy arrays. + """ + with self.distribute_strategy.scope(): + return super().get_weights() + + @traceback_utils.filter_traceback + def save(self, filepath, overwrite=True, save_format=None, **kwargs): + """Saves a model as a TensorFlow SavedModel or HDF5 file. + + See the [Serialization and Saving guide]( + https://keras.io/guides/serialization_and_saving/) for details. + + Args: + model: Keras model instance to be saved. + filepath: `str` or `pathlib.Path` object. Path where to save the + model. + overwrite: Whether we should overwrite any existing model at the + target location, or instead ask the user via an interactive + prompt. + save_format: Either `"keras"`, `"tf"`, `"h5"`, + indicating whether to save the model + in the native Keras format (`.keras`), + in the TensorFlow SavedModel format + (referred to as "SavedModel" below), + or in the legacy HDF5 format (`.h5`). + Defaults to `"tf"` in TF 2.X, and `"h5"` in TF 1.X. + + SavedModel format arguments: + include_optimizer: Only applied to SavedModel and legacy HDF5 + formats. If False, do not save the optimizer state. + Defaults to `True`. + signatures: Only applies to SavedModel format. Signatures to save + with the SavedModel. See the `signatures` argument in + `tf.saved_model.save` for details. + options: Only applies to SavedModel format. + `tf.saved_model.SaveOptions` object that specifies SavedModel + saving options. + save_traces: Only applies to SavedModel format. When enabled, the + SavedModel will store the function traces for each layer. This + can be disabled, so that only the configs of each layer are + stored. Defaults to `True`. + Disabling this will decrease serialization time + and reduce file size, but it requires that all custom + layers/models implement a `get_config()` method. + + Example: + + ```python + model = tf.keras.Sequential([ + tf.keras.layers.Dense(5, input_shape=(3,)), + tf.keras.layers.Softmax()]) + model.save("model.keras") + loaded_model = tf.keras.models.load_model("model.keras") + x = tf.random.uniform((10, 3)) + assert np.allclose(model.predict(x), loaded_model.predict(x)) + ``` + + Note that `model.save()` is an alias for `tf.keras.models.save_model()`. + """ + saving_api.save_model( + self, + filepath=filepath, + overwrite=overwrite, + save_format=save_format, + **kwargs, + ) + + @traceback_utils.filter_traceback + def save_weights( + self, filepath, overwrite=True, save_format=None, options=None + ): + """Saves all layer weights. + + Either saves in HDF5 or in TensorFlow format based on the `save_format` + argument. + + When saving in HDF5 format, the weight file has: + - `layer_names` (attribute), a list of strings + (ordered names of model layers). + - For every layer, a `group` named `layer.name` + - For every such layer group, a group attribute `weight_names`, + a list of strings + (ordered names of weights tensor of the layer). + - For every weight in the layer, a dataset + storing the weight value, named after the weight tensor. + + When saving in TensorFlow format, all objects referenced by the network + are saved in the same format as `tf.train.Checkpoint`, including any + `Layer` instances or `Optimizer` instances assigned to object + attributes. For networks constructed from inputs and outputs using + `tf.keras.Model(inputs, outputs)`, `Layer` instances used by the network + are tracked/saved automatically. For user-defined classes which inherit + from `tf.keras.Model`, `Layer` instances must be assigned to object + attributes, typically in the constructor. See the documentation of + `tf.train.Checkpoint` and `tf.keras.Model` for details. + + While the formats are the same, do not mix `save_weights` and + `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should + be loaded using `Model.load_weights`. Checkpoints saved using + `tf.train.Checkpoint.save` should be restored using the corresponding + `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over + `save_weights` for training checkpoints. + + The TensorFlow format matches objects and variables by starting at a + root object, `self` for `save_weights`, and greedily matching attribute + names. For `Model.save` this is the `Model`, and for `Checkpoint.save` + this is the `Checkpoint` even if the `Checkpoint` has a model attached. + This means saving a `tf.keras.Model` using `save_weights` and loading + into a `tf.train.Checkpoint` with a `Model` attached (or vice versa) + will not match the `Model`'s variables. See the + [guide to training checkpoints]( + https://www.tensorflow.org/guide/checkpoint) for details on + the TensorFlow format. + + Args: + filepath: String or PathLike, path to the file to save the weights + to. When saving in TensorFlow format, this is the prefix used + for checkpoint files (multiple files are generated). Note that + the '.h5' suffix causes weights to be saved in HDF5 format. + overwrite: Whether to silently overwrite any existing file at the + target location, or provide the user with a manual prompt. + save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or + '.keras' will default to HDF5 if `save_format` is `None`. + Otherwise, `None` becomes 'tf'. Defaults to `None`. + options: Optional `tf.train.CheckpointOptions` object that specifies + options for saving weights. + + Raises: + ImportError: If `h5py` is not available when attempting to save in + HDF5 format. + """ + saving_api.save_weights( + self, + filepath=filepath, + overwrite=overwrite, + save_format=save_format, + options=options, + ) + + @traceback_utils.filter_traceback + def load_weights( + self, filepath, skip_mismatch=False, by_name=False, options=None + ): + """Loads all layer weights from a saved files. + + The saved file could be a SavedModel file, a `.keras` file (v3 saving + format), or a file created via `model.save_weights()`. + + By default, weights are loaded based on the network's + topology. This means the architecture should be the same as when the + weights were saved. Note that layers that don't have weights are not + taken into account in the topological ordering, so adding or removing + layers is fine as long as they don't have weights. + + **Partial weight loading** + + If you have modified your model, for instance by adding a new layer + (with weights) or by changing the shape of the weights of a layer, + you can choose to ignore errors and continue loading + by setting `skip_mismatch=True`. In this case any layer with + mismatching weights will be skipped. A warning will be displayed + for each skipped layer. + + **Weight loading by name** + + If your weights are saved as a `.h5` file created + via `model.save_weights()`, you can use the argument `by_name=True`. + + In this case, weights are loaded into layers only if they share + the same name. This is useful for fine-tuning or transfer-learning + models where some of the layers have changed. + + Note that only topological loading (`by_name=False`) is supported when + loading weights from the `.keras` v3 format or from the TensorFlow + SavedModel format. + + Args: + filepath: String, path to the weights file to load. For weight files + in TensorFlow format, this is the file prefix (the same as was + passed to `save_weights()`). This can also be a path to a + SavedModel or a `.keras` file (v3 saving format) saved + via `model.save()`. + skip_mismatch: Boolean, whether to skip loading of layers where + there is a mismatch in the number of weights, or a mismatch in + the shape of the weights. + by_name: Boolean, whether to load weights by name or by topological + order. Only topological loading is supported for weight files in + the `.keras` v3 format or in the TensorFlow SavedModel format. + options: Optional `tf.train.CheckpointOptions` object that specifies + options for loading weights (only valid for a SavedModel file). + """ + return saving_api.load_weights( + self, + filepath=filepath, + by_name=by_name, + skip_mismatch=skip_mismatch, + options=options, + ) + + def _updated_config(self): + """Util shared between different serialization methods. + + Returns: + Model config with Keras version information added. + """ + from keras import __version__ as keras_version + + config = self.get_config() + model_config = { + "class_name": self.__class__.__name__, + "config": config, + "keras_version": keras_version, + "backend": backend.backend(), + } + return model_config + + @generic_utils.default + def get_config(self): + """Returns the config of the `Model`. + + Config is a Python dictionary (serializable) containing the + configuration of an object, which in this case is a `Model`. This allows + the `Model` to be be reinstantiated later (without its trained weights) + from this configuration. + + Note that `get_config()` does not guarantee to return a fresh copy of + dict every time it is called. The callers should make a copy of the + returned dict if they want to modify it. + + Developers of subclassed `Model` are advised to override this method, + and continue to update the dict from `super(MyModel, self).get_config()` + to provide the proper configuration of this `Model`. The default config + will return config dict for init parameters if they are basic types. + Raises `NotImplementedError` when in cases where a custom + `get_config()` implementation is required for the subclassed model. + + Returns: + Python dictionary containing the configuration of this `Model`. + """ + # If sublcass doesn't implement `get_config()` parse from init args + # otherwise default to empty dict + if generic_utils.is_default(self.get_config): + try: + config = base_layer.Layer.get_config(self) + except NotImplementedError: + config = {} + logging.warning( + "Model's `__init__()` arguments contain non-serializable " + "objects. Please implement a `get_config()` method in the " + "subclassed Model for proper saving and loading. " + "Defaulting to empty config." + ) + else: + config = {} + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + # `from_config` assumes `cls` is either `Functional` or a child class of + # `Functional`. In the case that `cls` is meant to behave like a child + # class of `Functional` but only inherits from the `Model` class, we + # have to call `cls(...)` instead of `Functional.from_config`. + from keras.engine import functional + + with serialization.SharedObjectLoadingScope(): + functional_config_keys = [ + "name", + "layers", + "input_layers", + "output_layers", + ] + is_functional_config = all( + key in config for key in functional_config_keys + ) + argspec = tf_inspect.getfullargspec(cls.__init__) + functional_init_args = tf_inspect.getfullargspec( + functional.Functional.__init__ + ).args[1:] + revivable_as_functional = ( + cls in {functional.Functional, Model} + or argspec.args[1:] == functional_init_args + or (argspec.varargs == "args" and argspec.varkw == "kwargs") + ) + if is_functional_config and revivable_as_functional: + # Revive Functional model + # (but not Functional subclasses with a custom __init__) + inputs, outputs, layers = functional.reconstruct_from_config( + config, custom_objects + ) + model = cls( + inputs=inputs, outputs=outputs, name=config.get("name") + ) + functional.connect_ancillary_layers(model, layers) - Returns: - Scalar training loss - (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. + else: + # Either the model has a custom __init__, or the config + # does not contain all the information necessary to + # revive a Functional model. This happens when the user creates + # subclassed models where `get_config()` is returning + # insufficient information to be considered a Functional model. + # In this case, we fall back to provide all config into the + # constructor of the class. + try: + model = cls(**config) + except TypeError as e: + raise TypeError( + "Unable to revive model from config. When overriding " + "the `get_config()` method, make sure that the " + "returned config contains all items used as arguments " + f"in the constructor to {cls}, " + "which is the default behavior. " + "You can override this default behavior by defining a " + "`from_config(cls, config)` class method to specify " + "how to create an " + f"instance of {cls.__name__} from its config.\n\n" + f"Received config={config}\n\n" + f"Error encountered during deserialization: {e}" + ) + return model + + def to_json(self, **kwargs): + """Returns a JSON string containing the network configuration. + + To load a network from a JSON save file, use + `keras.models.model_from_json(json_string, custom_objects={})`. + + Args: + **kwargs: Additional keyword arguments to be passed to + *`json.dumps()`. + + Returns: + A JSON string. + """ + model_config = self._updated_config() + return json.dumps( + model_config, default=json_utils.get_json_type, **kwargs + ) + + def to_yaml(self, **kwargs): + """Returns a yaml string containing the network configuration. + + Note: Since TF 2.6, this method is no longer supported and will raise a + RuntimeError. + + To load a network from a yaml save file, use + `keras.models.model_from_yaml(yaml_string, custom_objects={})`. + + `custom_objects` should be a dictionary mapping + the names of custom losses / layers / etc to the corresponding + functions / classes. + + Args: + **kwargs: Additional keyword arguments + to be passed to `yaml.dump()`. + + Returns: + A YAML string. + + Raises: + RuntimeError: announces that the method poses a security risk + """ + raise RuntimeError( + "Method `model.to_yaml()` has been removed due to security risk of " + "arbitrary code execution. Please use `model.to_json()` instead." + ) + + def reset_states(self): + for layer in self.layers: + if hasattr(layer, "reset_states") and getattr( + layer, "stateful", False + ): + layer.reset_states() + + @property + @doc_controls.do_not_generate_docs + def state_updates(self): + """Deprecated, do NOT use! + + Returns the `updates` from all layers that are stateful. + + This is useful for separating training updates and + state updates, e.g. when we need to update a layer's internal state + during prediction. + + Returns: + A list of update ops. + """ + warnings.warn( + "`Model.state_updates` will be removed in a future version. " + "This property should not be used in TensorFlow 2.0, " + "as `updates` are applied automatically.", + stacklevel=2, + ) + state_updates = [] + for layer in self.layers: + if getattr(layer, "stateful", False): + if hasattr(layer, "updates"): + state_updates += layer.updates + return state_updates + + @property + def weights(self): + """Returns the list of all layer variables/weights. + + Note: This will not track the weights of nested `tf.Modules` that are + not themselves Keras layers. + + Returns: + A list of variables. + """ + return self._dedup_weights(self._undeduplicated_weights) + + @property + def _undeduplicated_weights(self): + """Returns the undeduplicated list of all layer variables/weights.""" + self._assert_weights_created() + weights = [] + for layer in self._self_tracked_trackables: + weights += layer.variables + weights += self._trainable_weights + self._non_trainable_weights + return weights + + def summary( + self, + line_length=None, + positions=None, + print_fn=None, + expand_nested=False, + show_trainable=False, + layer_range=None, + ): + """Prints a string summary of the network. + + Args: + line_length: Total length of printed lines + (e.g. set this to adapt the display to different + terminal window sizes). + positions: Relative or absolute positions of log elements + in each line. If not provided, becomes + `[0.3, 0.6, 0.70, 1.]`. Defaults to `None`. + print_fn: Print function to use. By default, prints to `stdout`. + If `stdout` doesn't work in your environment, change to `print`. + It will be called on each line of the summary. + You can set it to a custom function + in order to capture the string summary. + expand_nested: Whether to expand the nested models. + Defaults to `False`. + show_trainable: Whether to show if a layer is trainable. + Defaults to `False`. + layer_range: a list or tuple of 2 strings, + which is the starting layer name and ending layer name + (both inclusive) indicating the range of layers to be printed + in summary. It also accepts regex patterns instead of exact + name. In such case, start predicate will be the first element + it matches to `layer_range[0]` and the end predicate will be + the last element it matches to `layer_range[1]`. + By default `None` which considers all layers of model. + + Raises: + ValueError: if `summary()` is called before the model is built. + """ + if not self.built: + raise ValueError( + "This model has not yet been built. " + "Build the model first by calling `build()` or by calling " + "the model on a batch of data." + ) + layer_utils.print_summary( + self, + line_length=line_length, + positions=positions, + print_fn=print_fn, + expand_nested=expand_nested, + show_trainable=show_trainable, + layer_range=layer_range, + ) + + @property + def layers(self): + return list(self._flatten_layers(include_self=False, recursive=False)) + + @layers.setter + def layers(self, _): + raise AttributeError( + "`Model.layers` attribute is reserved and should not be used. " + "Please use another name." + ) + + def get_layer(self, name=None, index=None): + """Retrieves a layer based on either its name (unique) or index. + + If `name` and `index` are both provided, `index` will take precedence. + Indices are based on order of horizontal graph traversal (bottom-up). + + Args: + name: String, name of layer. + index: Integer, index of layer. + + Returns: + A layer instance. + """ + # TODO(fchollet): We could build a dictionary based on layer names + # since they are constant, but we have not done that yet. + if index is not None and name is not None: + raise ValueError( + "Provide only a layer name or a layer index. Received: " + f"index={index}, name={name}." + ) + + if index is not None: + if len(self.layers) <= index: + raise ValueError( + f"Was asked to retrieve layer at index {index}" + f" but model only has {len(self.layers)}" + " layers." + ) + else: + return self.layers[index] + + if name is not None: + for layer in self.layers: + if layer.name == name: + return layer + raise ValueError( + f"No such layer: {name}. Existing layers are: " + f"{list(layer.name for layer in self.layers)}." + ) + raise ValueError( + "Provide either a layer name or layer index at `get_layer`." + ) + + def get_weight_paths(self): + """Retrieve all the variables and their paths for the model. + + The variable path (string) is a stable key to identify a `tf.Variable` + instance owned by the model. It can be used to specify variable-specific + configurations (e.g. DTensor, quantization) from a global view. + + This method returns a dict with weight object paths as keys + and the corresponding `tf.Variable` instances as values. + + Note that if the model is a subclassed model and the weights haven't + been initialized, an empty dict will be returned. + + Returns: + A dict where keys are variable paths and values are `tf.Variable` + instances. + + Example: + + ```python + class SubclassModel(tf.keras.Model): + + def __init__(self, name=None): + super().__init__(name=name) + self.d1 = tf.keras.layers.Dense(10) + self.d2 = tf.keras.layers.Dense(20) + + def call(self, inputs): + x = self.d1(inputs) + return self.d2(x) + + model = SubclassModel() + model(tf.zeros((10, 10))) + weight_paths = model.get_weight_paths() + # weight_paths: + # { + # 'd1.kernel': model.d1.kernel, + # 'd1.bias': model.d1.bias, + # 'd2.kernel': model.d2.kernel, + # 'd2.bias': model.d2.bias, + # } + + # Functional model + inputs = tf.keras.Input((10,), batch_size=10) + x = tf.keras.layers.Dense(20, name='d1')(inputs) + output = tf.keras.layers.Dense(30, name='d2')(x) + model = tf.keras.Model(inputs, output) + d1 = model.layers[1] + d2 = model.layers[2] + weight_paths = model.get_weight_paths() + # weight_paths: + # { + # 'd1.kernel': d1.kernel, + # 'd1.bias': d1.bias, + # 'd2.kernel': d2.kernel, + # 'd2.bias': d2.bias, + # } + ``` + """ + result = {} + ( + descendants, + object_paths_dict, + ) = tf.__internal__.tracking.ObjectGraphView( + self + ).breadth_first_traversal() + for descendant in descendants: + if isinstance(descendant, tf.Variable): + trackable_references = object_paths_dict[descendant] + object_path = ".".join([t.name for t in trackable_references]) + result[object_path] = descendant + return result + + def get_compile_config(self): + """Returns a serialized config with information for compiling the model. + + This method returns a config dictionary containing all the information + (optimizer, loss, metrics, etc.) with which the model was compiled. + + Returns: + A dict containing information for compiling the model. + """ + if self._is_compiled and hasattr(self, "_compile_config"): + return self._compile_config.serialize() + + def compile_from_config(self, config): + """Compiles the model with the information given in config. + + This method uses the information in the config (optimizer, loss, + metrics, etc.) to compile the model. + + Args: + config: Dict containing information for compiling the model. + """ + has_overridden_compile = self.__class__.compile != Model.compile + if has_overridden_compile: + logging.warning( + "`compile()` was not called as part of model loading " + "because the model's `compile()` method is custom. " + "All subclassed Models that have `compile()` " + "overridden should also override " + "`get_compile_config()` and `compile_from_config(config)`. " + "Alternatively, you can " + "call `compile()` manually after loading." + ) + return + config = saving_lib.deserialize_keras_object(config) + self.compile(**config) + if hasattr(self, "optimizer") and self.built: + # Create optimizer variables. + self.optimizer.build(self.trainable_variables) + + def export(self, filepath): + """Create a SavedModel artifact for inference (e.g. via TF-Serving). + + This method lets you export a model to a lightweight SavedModel artifact + that contains the model's forward pass only (its `call()` method) + and can be served via e.g. TF-Serving. The forward pass is registered + under the name `serve()` (see example below). + + The original code of the model (including any custom layers you may + have used) is *no longer* necessary to reload the artifact -- it is + entirely standalone. + + Args: + filepath: `str` or `pathlib.Path` object. Path where to save + the artifact. + + Example: + + ```python + # Create the artifact + model.export("path/to/location") + + # Later, in a different process / environment... + reloaded_artifact = tf.saved_model.load("path/to/location") + predictions = reloaded_artifact.serve(input_data) + ``` + + If you would like to customize your serving endpoints, you can + use the lower-level `keras.export.ExportArchive` class. The `export()` + method relies on `ExportArchive` internally. + """ + from keras.export import export_lib + + export_lib.export_model(self, filepath) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _set_save_spec(self, inputs, args=None, kwargs=None): + """Defines the save spec so that serialization can trace `call()`. + + The TensorSpecs of the call function `inputs`, `args`, and `kwargs` are + saved into a tuple of `([inputs] + args, kwargs)`. The input + `TensorSpec` names are updated to match the built `input_names`. + + The specs can be retrieved with the `save_spec` property. + + Args: + inputs: possibly nested inputs passed into the call function. + args: a list of positional arguments passed into call. + kwargs: a dictionary of keyword arguments passed into call. + """ + if self._saved_model_inputs_spec is not None: + return # Already set. + args = args or [] + kwargs = kwargs or {} + + input_names = self.input_names + if not input_names: + input_names = compile_utils.create_pseudo_input_names(inputs) + + flat_inputs = tf.nest.flatten(inputs) + inputs_spec = [] + for name, tensor in zip(input_names, flat_inputs): + inputs_spec.append( + tf_utils.get_tensor_spec(tensor, dynamic_batch=False, name=name) + ) + inputs_spec = tf.nest.pack_sequence_as(inputs, inputs_spec) + super()._set_save_spec(inputs_spec, args, kwargs) + + # Store the input shapes + if ( + self.__class__.__name__ == "Sequential" + and self._build_input_shape is None + ): + self._build_input_shape = tf.nest.map_structure( + lambda x: None if x is None else x.shape, inputs_spec + ) + + def save_spec(self, dynamic_batch=True): + """Returns the `tf.TensorSpec` of call args as a tuple `(args, kwargs)`. + + This value is automatically defined after calling the model for the + first time. Afterwards, you can use it when exporting the model for + serving: + + ```python + model = tf.keras.Model(...) + + @tf.function + def serve(*args, **kwargs): + outputs = model(*args, **kwargs) + # Apply postprocessing steps, or add additional outputs. + ... + return outputs + + # arg_specs is `[tf.TensorSpec(...), ...]`. kwarg_specs, in this + # example, is an empty dict since functional models do not use keyword + # arguments. + arg_specs, kwarg_specs = model.save_spec() + + model.save(path, signatures={ + 'serving_default': serve.get_concrete_function(*arg_specs, + **kwarg_specs) + }) + ``` + + Args: + dynamic_batch: Whether to set the batch sizes of all the returned + `tf.TensorSpec` to `None`. (Note that when defining functional or + Sequential models with `tf.keras.Input([...], batch_size=X)`, the + batch size will always be preserved). Defaults to `True`. + Returns: + If the model inputs are defined, returns a tuple `(args, kwargs)`. All + elements in `args` and `kwargs` are `tf.TensorSpec`. + If the model inputs are not defined, returns `None`. + The model inputs are automatically set when calling the model, + `model.fit`, `model.evaluate` or `model.predict`. + """ + return self._get_save_spec(dynamic_batch, inputs_only=False) + + def _assert_weights_created(self): + """Asserts that all the weights for the model have been created. + + For a non-dynamic model, the weights must already be created after the + layer has been called. For a dynamic model, the exact list of weights + can never be known for certain since it may change at any time during + execution. + + We run this check right before accessing weights or getting the Numpy + value for the current weights. Otherwise, if the layer has never been + called, the user would just get an empty list, which is misleading. + + Raises: + ValueError: if the weights of the network have not yet been created. + """ + if self.dynamic: + return + + if ( + "build" in self.__class__.__dict__ + and self.__class__ != Model + and not self.built + ): + # For any model that has customized build() method but hasn't been + # invoked yet, this will cover both sequential and subclass model. + # Also make sure to exclude Model class itself which has build() + # defined. + raise ValueError( + f"Weights for model '{self.name}' have not yet been " + "created. " + "Weights are created when the model is first called on " + "inputs or `build()` is called with an `input_shape`." + ) + + def _check_call_args(self, method_name): + """Check that `call()` has only one positional arg.""" + # Always allow first arg, regardless of arg name. + fullargspec = self._call_spec.full_argspec + if fullargspec.defaults: + positional_args = fullargspec.args[: -len(fullargspec.defaults)] + else: + positional_args = fullargspec.args + if "training" in positional_args: + positional_args.remove("training") + + # self and first arg can be positional. + if len(positional_args) > 2: + extra_args = positional_args[2:] + raise ValueError( + f"Models passed to `{method_name}` can only have `training` " + "and the first argument in `call()` as positional arguments, " + f"found: {extra_args}." + ) + + def _validate_compile(self, optimizer, metrics, **kwargs): + """Performs validation checks for the default `compile()`.""" + if any( + isinstance(opt, optimizer_v1.Optimizer) + for opt in tf.nest.flatten(optimizer) + ): + raise ValueError( + f"`tf.compat.v1.keras` Optimizer ({optimizer}) is " + "not supported when eager execution is enabled. Use a " + "`tf.keras` Optimizer instead, or disable eager " + "execution." + ) + + kwargs.pop("cloning", None) # Legacy DistStrat argument, never used. + kwargs.pop("experimental_run_tf_function", None) # Always `True`. + distribute_arg = kwargs.pop("distribute", None) + if distribute_arg is not None: + raise ValueError( + "`distribute` argument in compile is not available in TF 2.0. " + "Please create the model under the `strategy.scope()`. " + f"Received: {distribute_arg}." + ) + target_tensor_arg = kwargs.pop("target_tensors", None) + if target_tensor_arg is not None: + raise ValueError( + "`target_tensors` argument is not supported when executing " + f"eagerly. Received: {target_tensor_arg}." + ) + invalid_kwargs = set(kwargs) - {"sample_weight_mode"} + if invalid_kwargs: + raise TypeError( + "Invalid keyword argument(s) in `compile()`: " + f"{(invalid_kwargs,)}. Valid keyword arguments include " + '"cloning", "experimental_run_tf_function", "distribute",' + ' "target_tensors", or "sample_weight_mode".' + ) + + # Model must be created and compiled with the same DistStrat. + if self.built and tf.distribute.has_strategy(): + strategy = tf.distribute.get_strategy() + for v in self.variables: + if not strategy.extended.variable_created_in_scope(v): + raise ValueError( + f"Variable ({v}) was not created in the distribution " + f"strategy scope of ({strategy}). It is most likely " + "because some layers, model, or optimizer was being " + "created outside the distribution strategy scope. Try " + "to make sure your code looks similar " + "to the following.\nwith strategy.scope():\n" + " model=_create_model()\n" + " model.compile(...)" + ) + + # Model metrics must be created in the same distribution strategy scope + # as the model. + strategy = self.distribute_strategy + for metric in tf.nest.flatten(metrics): + for v in getattr(metric, "variables", []): + if not strategy.extended.variable_created_in_scope(v): + raise ValueError( + f"Metric ({metric}) passed to `model.compile` was " + "created inside a different distribution strategy " + "scope than the model. All metrics must be created " + "in the same distribution strategy " + f"scope as the model (in this case {strategy}). " + "If you pass in a string identifier for a metric to " + "compile, the metric will automatically be created " + "in the correct distribution strategy scope." + ) + + # Model metrics must be created in the same distribution strategy scope + # as the model. + for opt in tf.nest.flatten(optimizer): + for v in getattr(opt, "_weights", []): + if not strategy.extended.variable_created_in_scope(v): + raise ValueError( + f"Optimizer ({optimizer}) passed to `model.compile` " + "was created inside a different distribution strategy " + "scope than the model. All optimizers must be created " + "in the same distribution strategy scope as the model " + f"(in this case {strategy}). If you pass in a string " + "identifier for an optimizer to compile, the optimizer " + "will automatically be created in the correct " + "distribution strategy scope." + ) + + def _maybe_load_initial_counters_from_ckpt( + self, steps_per_epoch, initial_epoch + ): + """Maybe load initial epoch from ckpt, considering worker recovery. + + Refer to tensorflow/python/keras/distribute/worker_training_state.py + for more information. + + Args: + steps_per_epoch: The number of step per epoch. + initial_epoch: The original initial_epoch user passes in `fit()`. + mode: The mode for running `model.fit()`. + + Returns: + If the training is recovering from previous failure under multi-worker + training setting, return the (epoch, step) the training is supposed to + continue at. Otherwise, return the `initial_epoch, initial_step` the + user passes in. + """ + initial_step = 0 + if self._training_state is not None: + return self._training_state.maybe_load_initial_counters_from_ckpt( + steps_per_epoch, initial_epoch, mode=ModeKeys.TRAIN + ) + return (initial_epoch, initial_step) + + def _assert_compile_was_called(self): + # Checks whether `compile` has been called. If it has been called, + # then the optimizer is set. This is different from whether the + # model is compiled + # (i.e. whether the model is built and its inputs/outputs are set). + if not self._is_compiled: + raise RuntimeError( + "You must compile your model before " + "training/testing. " + "Use `model.compile(optimizer, loss)`." + ) + + def _check_sample_weight_warning(self, x, sample_weight): + # Datasets can include sample weight, by returning a tuple with the + # structure of `(x, y, sample_weight)`. + sample_weight_present = sample_weight is not None or ( + isinstance(x, tf.data.Dataset) + and isinstance(x.element_spec, tuple) + and len(x.element_spec) == 3 + ) + + if ( + sample_weight_present + and self.compiled_metrics._user_weighted_metrics is None + ): + logging.warning( + "`evaluate()` received a value for `sample_weight`, but " + "`weighted_metrics` were not provided. Did you mean to pass " + "metrics to `weighted_metrics` in `compile()`? If this is " + "intentional you can pass `weighted_metrics=[]` to `compile()` " + "in order to silence this warning." + ) + + def _set_inputs(self, inputs, outputs=None, training=None): + """This method is for compat with Modelv1. Only inputs are needed + here.""" + self._set_save_spec(inputs) + + @property + def _trackable_saved_model_saver(self): + return model_serialization.ModelSavedModelSaver(self) + + def _trackable_children(self, save_type="checkpoint", **kwargs): + if save_type == "savedmodel": + # SavedModel needs to ignore the execution functions. + train_function = self.train_function + test_function = self.test_function + predict_function = self.predict_function + train_tf_function = self.train_tf_function + self.train_function = None + self.test_function = None + self.predict_function = None + self.train_tf_function = None + + children = super()._trackable_children(save_type, **kwargs) + + if save_type == "savedmodel": + self.train_function = train_function + self.test_function = test_function + self.predict_function = predict_function + self.train_tf_function = train_tf_function + + return children + + def _should_eval(self, epoch, validation_freq): + epoch = epoch + 1 # one-index the user-facing epoch. + if isinstance(validation_freq, int): + return epoch % validation_freq == 0 + elif isinstance(validation_freq, list): + return epoch in validation_freq + else: + raise ValueError( + "Expected `validation_freq` to be a list or int. " + f"Received: validation_freq={validation_freq} of the " + f"type {type(validation_freq)}." + ) + + ###################################################################### + # Functions below exist only as v1 / v2 compatibility shims. + ###################################################################### + + def _get_compile_args(self, user_metrics=True): + """Used for saving or cloning a Model. + + Args: + user_metrics: Whether to return user-supplied metrics or `Metric` + objects. If True, returns the user-supplied metrics. + Defaults to `True`. + + Returns: + Dictionary of arguments that were used when compiling the model. + """ + self._assert_compile_was_called() + saved_metrics = self.compiled_metrics._user_metrics + saved_weighted_metrics = self.compiled_metrics._user_weighted_metrics + + if not user_metrics: + if saved_metrics is not None: + saved_metrics = self.compiled_metrics._metrics + if saved_weighted_metrics is not None: + saved_weighted_metrics = self.compiled_metrics._weighted_metrics + + compile_args = { + "optimizer": self.optimizer, + "loss": self.compiled_loss._user_losses, + "metrics": saved_metrics, + "weighted_metrics": saved_weighted_metrics, + "loss_weights": self.compiled_loss._user_loss_weights, + } + return compile_args + + def _get_callback_model(self): + return self + + def _in_multi_worker_mode(self): + return self.distribute_strategy.extended._in_multi_worker_mode() + + @property + def _compile_was_called(self): + return self._is_compiled + + def _save_experimental(self, filepath): + return saving_lib.save_model(self, filepath) + + +class _TestFunction: + def __init__(self, function, callbacks): + self._function = function + self._callbacks = callbacks + + def run_step(self, dataset_or_iterator, data_handler, step, unused_shards): + tmp_logs = self._function(dataset_or_iterator) + if data_handler.should_sync: + context.async_wait() + logs = tmp_logs + end_step = step + data_handler.step_increment + self._callbacks.on_test_batch_end(end_step, logs) + return logs - Raises: - RuntimeError: If `model.train_on_batch` is wrapped in a `tf.function`. - """ - self._assert_compile_was_called() - self._check_call_args('train_on_batch') - _disallow_inside_tf_function('train_on_batch') - if reset_metrics: - self.reset_metrics() - with self.distribute_strategy.scope(), \ - training_utils.RespectCompiledTrainableState(self): - iterator = data_adapter.single_batch_iterator(self.distribute_strategy, x, - y, sample_weight, - class_weight) - self.train_function = self.make_train_function() - logs = self.train_function(iterator) - - logs = tf_utils.sync_to_numpy_or_python_type(logs) - if return_dict: - return logs - else: - return flatten_metrics_in_order(logs, self.metrics_names) - def test_on_batch(self, - x, - y=None, - sample_weight=None, - reset_metrics=True, - return_dict=False): - """Test the model on a single batch of samples. +class _ExactTestFunction(_TestFunction): + def __init__(self, function, callbacks): + super().__init__(function, callbacks) + self._logs = [] + + def run_step(self, dataset_or_iterator, data_handler, step, shards): + tmp_logs = self._function( + dataset_or_iterator, + tf.constant(shards, dtype=tf.int64), + tf.constant(step, dtype=tf.int64), + ) + if data_handler.should_sync: + context.async_wait() + self._logs.append(tmp_logs) + return self._logs + + +def reduce_per_replica(values, strategy, reduction): + """Attempt to reduce the structure `values` to single values. + + Given `values` (a `tf.Tensor` or a `PerReplica` structure), + which represents the values across all the replicas, `reduce_per_replica` + attempts to "reduce" those values and returns the corresponding structure + that represents only single values. + + Currently, `reduce_per_replica` is only used for reducing the metric results + from `tf.distribute.Strategy.run()`. Depending on the underlying + `Strategy` implementation, `values` may be a `PerReplica` object, + which can be thought of as a collection of values across the replicas, + or a `tf.Tensor`, if the strategy has already conducted the reduction + for the downstream library. + + There are five possible outcomes of reduction: + + 1) if the `values` is a structure of simple `tf.Tensor`s, meaning that + reduction is not actually needed, `reduce_per_replica` returns the + structure as-is. + 2) else, if `reduction="auto"`, then the best reduction strategy is + chosen based on the current environment. This should only be used + for training cases (`fit()`). + 3) else, if `reduction="first"`, then `reduce_per_replica` + returns the values of the first replica. This is used in the case of + training and evaluation, where `values` is expected to hold the same + value across the replicas as a result of `Strategy`'s synchronization + across the replicas. + `reduce_per_replica` does not synchronize the values. + 4) else, if `reduction="sum"`, then `reduce_per_replica` returns the sum + of values for all replicas. This may be used in the custom training loop + case, where each replica contain different values which are not + synchronized. + 5) else, if `reduction="concat"`, then `reduce_per_replica` + returns the concatenation of the values across the replicas, along the + axis of dimension 0. This is used in the inference case (`predict()`). Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays (in case the - model has multiple inputs). - - A TensorFlow tensor, or a list of tensors (in case the model has - multiple inputs). - - A dict mapping input names to the corresponding array/tensors, if - the model has named inputs. - y: Target data. Like the input data `x`, it could be either Numpy - array(s) or TensorFlow tensor(s). It should be consistent with `x` - (you cannot have Numpy inputs and tensor targets, or inversely). - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. In the case of - temporal data, you can pass a 2D array with shape (samples, - sequence_length), to apply a different weight to every timestep of - every sample. - reset_metrics: If `True`, the metrics returned will be only for this - batch. If `False`, the metrics will be statefully accumulated across - batches. - return_dict: If `True`, loss and metric results are returned as a dict, - with each key being the name of the metric. If `False`, they are - returned as a list. + values: Structure of `PerReplica` objects or `tf.Tensor`s. `tf.Tensor`s + are returned as-is. + strategy: `tf.distribute.Strategy` object. + reduction: One of `"auto"`, `"first"`, `"concat"`, or `"sum"`. + `"auto"` will select `"first"` when used under a TPUStrategy, or + `"sum"` otherwise. Returns: - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. + Structure of `Tensor`s, representing the result of reduction. Raises: - RuntimeError: If `model.test_on_batch` is wrapped in a `tf.function`. + ValueError: if the reduction method is not supported. """ - self._assert_compile_was_called() - self._check_call_args('test_on_batch') - _disallow_inside_tf_function('test_on_batch') - if reset_metrics: - self.reset_metrics() - with self.distribute_strategy.scope(): - iterator = data_adapter.single_batch_iterator(self.distribute_strategy, x, - y, sample_weight) - self.test_function = self.make_test_function() - logs = self.test_function(iterator) - - logs = tf_utils.sync_to_numpy_or_python_type(logs) - if return_dict: - return logs - else: - return flatten_metrics_in_order(logs, self.metrics_names) - def predict_on_batch(self, x): - """Returns predictions for a single batch of samples. + if reduction == "auto": + reduction = "first" if backend.is_tpu_strategy(strategy) else "sum" + + def _reduce(v): + """Reduce a single `PerReplica` object.""" + if _collective_all_reduce_multi_worker(strategy): + if reduction == "concat": + return _multi_worker_concat(v, strategy) + elif reduction == "sum": + return strategy.reduce("SUM", v, axis=None) + + if _is_dtensor_per_replica_instance(v): + return _reduce_dtensor_per_replica(v, strategy, reduction) + elif not _is_per_replica_instance(v): + return v + elif reduction == "first": + return strategy.experimental_local_results(v)[0] + elif reduction == "concat": + if _is_tpu_multi_host(strategy): + return _tpu_multi_host_concat(v, strategy) + else: + return concat(strategy.experimental_local_results(v)) + elif reduction == "sum": + return tf.reduce_sum(strategy.experimental_local_results(v)) + else: + raise ValueError( + '`reduction` must be "first", "concat", "sum", or "auto". ' + f"Received: reduction={reduction}." + ) - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays (in case the - model has multiple inputs). - - A TensorFlow tensor, or a list of tensors (in case the model has - multiple inputs). + return tf.nest.map_structure(_reduce, values) - Returns: - Numpy array(s) of predictions. - Raises: - RuntimeError: If `model.predict_on_batch` is wrapped in a `tf.function`. - """ - self._check_call_args('predict_on_batch') - _disallow_inside_tf_function('predict_on_batch') - with self.distribute_strategy.scope(): - iterator = data_adapter.single_batch_iterator(self.distribute_strategy, x) - self.predict_function = self.make_predict_function() - outputs = self.predict_function(iterator) - return tf_utils.sync_to_numpy_or_python_type(outputs) - - @doc_controls.do_not_generate_docs - def fit_generator(self, - generator, - steps_per_epoch=None, - epochs=1, - verbose=1, - callbacks=None, - validation_data=None, - validation_steps=None, - validation_freq=1, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - shuffle=True, - initial_epoch=0): - """Fits the model on data yielded batch-by-batch by a Python generator. - - DEPRECATED: - `Model.fit` now supports generators, so there is no longer any need to use - this endpoint. - """ - warnings.warn( - '`Model.fit_generator` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `Model.fit`, which supports generators.', - stacklevel=2) - return self.fit( - generator, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle, - initial_epoch=initial_epoch) - - @doc_controls.do_not_generate_docs - def evaluate_generator(self, - generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """Evaluates the model on a data generator. - - DEPRECATED: - `Model.evaluate` now supports generators, so there is no longer any need - to use this endpoint. - """ - warnings.warn( - '`Model.evaluate_generator` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `Model.evaluate`, which supports generators.', - stacklevel=2) - self._check_call_args('evaluate_generator') - - return self.evaluate( - generator, - steps=steps, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose, - callbacks=callbacks) - - @doc_controls.do_not_generate_docs - def predict_generator(self, - generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """Generates predictions for the input samples from a data generator. - - DEPRECATED: - `Model.predict` now supports generators, so there is no longer any need - to use this endpoint. - """ - warnings.warn( - '`Model.predict_generator` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `Model.predict`, which supports generators.', - stacklevel=2) - return self.predict( - generator, - steps=steps, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose, - callbacks=callbacks) - - ###################################################################### - # Functions below are not training related. They are for model weights - # tracking, save/load, serialization, etc. - ###################################################################### - - @property - def trainable_weights(self): - self._assert_weights_created() - if not self._trainable: - return [] - trainable_variables = [] - for trackable_obj in self._self_tracked_trackables: - trainable_variables += trackable_obj.trainable_variables - trainable_variables += self._trainable_weights - return self._dedup_weights(trainable_variables) - - @property - def non_trainable_weights(self): - self._assert_weights_created() - non_trainable_variables = [] - for trackable_obj in self._self_tracked_trackables: - non_trainable_variables += trackable_obj.non_trainable_variables - - if not self._trainable: - # Return order is all trainable vars, then all non-trainable vars. - trainable_variables = [] - for trackable_obj in self._self_tracked_trackables: - trainable_variables += trackable_obj.trainable_variables - - non_trainable_variables = ( - trainable_variables + self._trainable_weights + - non_trainable_variables + self._non_trainable_weights) +def concat(tensors, axis=0): + """Concats `tensor`s along `axis`.""" + if isinstance(tensors[0], tf.SparseTensor): + return tf.sparse.concat(axis=axis, sp_inputs=tensors) + elif _is_scalar(tensors[0]): + return tf.stack(tensors, axis=axis) else: - non_trainable_variables = ( - non_trainable_variables + self._non_trainable_weights) - - return self._dedup_weights(non_trainable_variables) - - def get_weights(self): - """Retrieves the weights of the model. - - Returns: - A flat list of Numpy arrays. - """ - with self.distribute_strategy.scope(): - return super().get_weights() - - @traceback_utils.filter_traceback - def save(self, - filepath, - overwrite=True, - include_optimizer=True, - save_format=None, - signatures=None, - options=None, - save_traces=True): - # pylint: disable=line-too-long - """Saves the model to Tensorflow SavedModel or a single HDF5 file. - - Please see `tf.keras.models.save_model` or the - [Serialization and Saving guide](https://keras.io/guides/serialization_and_saving/) - for details. - - Args: - filepath: String, PathLike, path to SavedModel or H5 file to save the - model. - overwrite: Whether to silently overwrite any existing file at the - target location, or provide the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - save_format: Either `'tf'` or `'h5'`, indicating whether to save the - model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, - and 'h5' in TF 1.X. - signatures: Signatures to save with the SavedModel. Applicable to the - 'tf' format only. Please see the `signatures` argument in - `tf.saved_model.save` for details. - options: (only applies to SavedModel format) - `tf.saved_model.SaveOptions` object that specifies options for - saving to SavedModel. - save_traces: (only applies to SavedModel format) When enabled, the - SavedModel will store the function traces for each layer. This - can be disabled, so that only the configs of each layer are stored. - Defaults to `True`. Disabling this will decrease serialization time - and reduce file size, but it requires that all custom layers/models - implement a `get_config()` method. + return tf.concat(tensors, axis=axis) - Example: - ```python - from keras.models import load_model - - model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' - del model # deletes the existing model - - # returns a compiled model - # identical to the previous one - model = load_model('my_model.h5') - ``` - """ - # pylint: enable=line-too-long - save.save_model(self, filepath, overwrite, include_optimizer, save_format, - signatures, options, save_traces) - - @traceback_utils.filter_traceback - def save_weights(self, - filepath, - overwrite=True, - save_format=None, - options=None): - """Saves all layer weights. - - Either saves in HDF5 or in TensorFlow format based on the `save_format` - argument. - - When saving in HDF5 format, the weight file has: - - `layer_names` (attribute), a list of strings - (ordered names of model layers). - - For every layer, a `group` named `layer.name` - - For every such layer group, a group attribute `weight_names`, - a list of strings - (ordered names of weights tensor of the layer). - - For every weight in the layer, a dataset - storing the weight value, named after the weight tensor. - - When saving in TensorFlow format, all objects referenced by the network are - saved in the same format as `tf.train.Checkpoint`, including any `Layer` - instances or `Optimizer` instances assigned to object attributes. For - networks constructed from inputs and outputs using `tf.keras.Model(inputs, - outputs)`, `Layer` instances used by the network are tracked/saved - automatically. For user-defined classes which inherit from `tf.keras.Model`, - `Layer` instances must be assigned to object attributes, typically in the - constructor. See the documentation of `tf.train.Checkpoint` and - `tf.keras.Model` for details. - - While the formats are the same, do not mix `save_weights` and - `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should be - loaded using `Model.load_weights`. Checkpoints saved using - `tf.train.Checkpoint.save` should be restored using the corresponding - `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over - `save_weights` for training checkpoints. - - The TensorFlow format matches objects and variables by starting at a root - object, `self` for `save_weights`, and greedily matching attribute - names. For `Model.save` this is the `Model`, and for `Checkpoint.save` this - is the `Checkpoint` even if the `Checkpoint` has a model attached. This - means saving a `tf.keras.Model` using `save_weights` and loading into a - `tf.train.Checkpoint` with a `Model` attached (or vice versa) will not match - the `Model`'s variables. See the - [guide to training checkpoints](https://www.tensorflow.org/guide/checkpoint) - for details on the TensorFlow format. - - Args: - filepath: String or PathLike, path to the file to save the weights to. - When saving in TensorFlow format, this is the prefix used for - checkpoint files (multiple files are generated). Note that the '.h5' - suffix causes weights to be saved in HDF5 format. - overwrite: Whether to silently overwrite any existing file at the - target location, or provide the user with a manual prompt. - save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or - '.keras' will default to HDF5 if `save_format` is `None`. Otherwise - `None` defaults to 'tf'. - options: Optional `tf.train.CheckpointOptions` object that specifies - options for saving weights. - - Raises: - ImportError: If `h5py` is not available when attempting to save in HDF5 - format. - """ - self._assert_weights_created() - filepath = io_utils.path_to_string(filepath) - filepath_is_h5 = saving_utils.is_hdf5_filepath(filepath) - if save_format is None: - if filepath_is_h5: - save_format = 'h5' - else: - save_format = 'tf' - else: - user_format = save_format.lower().strip() - if user_format in ('tensorflow', 'tf'): - save_format = 'tf' - elif user_format in ('hdf5', 'h5', 'keras'): - save_format = 'h5' - else: - raise ValueError( - f'Unknown format. Received: `save_format`={save_format}. Was ' - 'expecting one of {"tf", "h5"}.') - if save_format == 'tf' and filepath_is_h5: - raise ValueError( - 'save_weights got save_format="tf"/"tensorflow", but the ' - f'filepath ({filepath}) looks like an HDF5 file. ' - 'Omit the ".h5"/".keras" when saving in TensorFlow format.') - - if save_format == 'h5' and h5py is None: - raise ImportError( - '`save_weights` requires h5py when saving in hdf5, but h5py is not ' - 'available. Try installing h5py package.') - if save_format == 'tf': - check_filepath = filepath + '.index' - else: - check_filepath = filepath - # If file exists and should not be overwritten: - if not overwrite and os.path.isfile(check_filepath): - proceed = io_utils.ask_to_proceed_with_overwrite(check_filepath) - if not proceed: - return - if save_format == 'h5': - with h5py.File(filepath, 'w') as f: - hdf5_format.save_weights_to_hdf5_group(f, self) - else: - if not tf.executing_eagerly(): - # Call `get_session` to initialize any uninitialized variables. - backend.get_session() - self._checkpoint.write(filepath, options=options) - - # Record this checkpoint so it's visible from tf.train.latest_checkpoint. - tf.__internal__.train.update_checkpoint_state( - save_dir=os.path.dirname(filepath), - model_checkpoint_path=filepath, - save_relative_paths=True, - all_model_checkpoint_paths=[filepath]) - - @traceback_utils.filter_traceback - def load_weights(self, - filepath, - by_name=False, - skip_mismatch=False, - options=None): - """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. - - If `by_name` is False weights are loaded based on the network's - topology. This means the architecture should be the same as when the weights - were saved. Note that layers that don't have weights are not taken into - account in the topological ordering, so adding or removing layers is fine as - long as they don't have weights. - - If `by_name` is True, weights are loaded into layers only if they share the - same name. This is useful for fine-tuning or transfer-learning models where - some of the layers have changed. - - Only topological loading (`by_name=False`) is supported when loading weights - from the TensorFlow format. Note that topological loading differs slightly - between TensorFlow and HDF5 formats for user-defined classes inheriting from - `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the - TensorFlow format loads based on the object-local names of attributes to - which layers are assigned in the `Model`'s constructor. +def potentially_ragged_concat(tensors): + """Concats `Tensor`s along their first dimension. Args: - filepath: String, path to the weights file to load. For weight files in - TensorFlow format, this is the file prefix (the same as was passed - to `save_weights`). This can also be a path to a SavedModel - saved from `model.save`. - by_name: Boolean, whether to load weights by name or by topological - order. Only topological loading is supported for weight files in - TensorFlow format. - skip_mismatch: Boolean, whether to skip loading of layers where there is - a mismatch in the number of weights, or a mismatch in the shape of - the weight (only valid when `by_name=True`). - options: Optional `tf.train.CheckpointOptions` object that specifies - options for loading weights. + tensors: List of `Tensor`s. Returns: - When loading a weight file in TensorFlow format, returns the same status - object as `tf.train.Checkpoint.restore`. When graph building, restore - ops are run automatically as soon as the network is built (on first call - for user-defined classes inheriting from `Model`, immediately if it is - already built). - - When loading weights in HDF5 format, returns `None`. - - Raises: - ImportError: If `h5py` is not available and the weight file is in HDF5 - format. - ValueError: If `skip_mismatch` is set to `True` when `by_name` is - `False`. + Concatenation of the inputs along the first dimension -- of type `Tensor` + if all input shapes are compatible, or `RaggedTensor` if not. """ - if backend.is_tpu_strategy(self._distribution_strategy): - if (self._distribution_strategy.extended.steps_per_run > 1 and - (not saving_utils.is_hdf5_filepath(filepath))): - spr = self._distribution_strategy.extended.steps_per_run - raise ValueError('Load weights is not implemented with TPUStrategy ' - 'with `steps_per_run` greater than 1. The ' - f'`steps_per_run` is {spr}') - if skip_mismatch and not by_name: - raise ValueError( - 'When calling model.load_weights, skip_mismatch can only be set to ' - 'True when by_name is True.') - - filepath, save_format = _detect_save_format(filepath) - if save_format == 'tf': - status = self._checkpoint.read(filepath, options) - if by_name: - raise NotImplementedError( - 'Weights may only be loaded based on topology into Models when ' - 'loading TensorFlow-formatted weights (got by_name=True to ' - 'load_weights).') - if not tf.executing_eagerly(): - session = backend.get_session() - # Restore existing variables (if any) immediately, and set up a - # streaming restore for any variables created in the future. - tf.__internal__.tracking.streaming_restore(status=status, - session=session) - status.assert_nontrivial_match() - else: - status = None - if h5py is None: - raise ImportError( - '`load_weights` requires h5py package when loading weights from ' - 'HDF5. Try installing h5py.') - if not self._is_graph_network and not self.built: - raise ValueError( - 'Unable to load weights saved in HDF5 format into a subclassed ' - 'Model which has not created its variables yet. Call the Model ' - 'first, then load the weights.') - self._assert_weights_created() - with h5py.File(filepath, 'r') as f: - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - if by_name: - hdf5_format.load_weights_from_hdf5_group_by_name( - f, self, skip_mismatch) + if len(tensors) == 1: + return tensors[0] + if isinstance(tensors[0], tf.SparseTensor): + return tf.sparse.concat(axis=0, sp_inputs=tensors) + elif isinstance(tensors[0], tf.RaggedTensor): + return tf.concat(tensors, axis=0) + elif not tf.__internal__.tf2.enabled(): + return tf.concat(tensors, axis=0) + + non_batch_shapes = tf.stack([tf.shape(tensor)[1:] for tensor in tensors]) + constant_dims = tf.math.reduce_all( + non_batch_shapes == non_batch_shapes[:1], axis=0 + ) + if tf.math.reduce_all(constant_dims).numpy().item(): + # All non-batch dims are constant + if _is_scalar(tensors[0]): + return tf.stack(tensors, axis=0) else: - hdf5_format.load_weights_from_hdf5_group(f, self) - - # Perform any layer defined finalization of the layer state. - for layer in self.layers: - layer.finalize_state() - return status + return tf.concat(tensors, axis=0) - def _updated_config(self): - """Util shared between different serialization methods. - - Returns: - Model config with Keras version information added. - """ - from keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - - config = self.get_config() - model_config = { - 'class_name': self.__class__.__name__, - 'config': config, - 'keras_version': keras_version, - 'backend': backend.backend() - } - return model_config - - def get_config(self): - """Returns the config of the `Model`. - - Config is a Python dictionary (serializable) containing the configuration of - an object, which in this case is a `Model`. This allows the `Model` to be - be reinstantiated later (without its trained weights) from this - configuration. - - Note that `get_config()` does not guarantee to return a fresh copy of dict - every time it is called. The callers should make a copy of the returned dict - if they want to modify it. - - Developers of subclassed `Model` are advised to override this method, and - continue to update the dict from `super(MyModel, self).get_config()` - to provide the proper configuration of this `Model`. The default config - is an empty dict. Optionally, raise `NotImplementedError` to allow Keras to - attempt a default serialization. - - Returns: - Python dictionary containing the configuration of this `Model`. - """ - - # Return an empty dict here because otherwise subclass model developers may - # see their model's `__init__()` be fed with unexpected keyword argument, if - # their `__init__()` takes no argument for example, and they don't override - # `from_config()`, which would use `cls(**config)` as a result. - config = {} - - if saving_lib._ENABLED: # pylint: disable=protected-access - if self.optimizer: - config['optimizer'] = saving_lib.serialize_keras_object(self.optimizer) - if self.compiled_loss: - config['loss'] = saving_lib.serialize_keras_object(self.compiled_loss) - if self.built: - config['input_shape'] = self._build_input_shape - - return config - - @classmethod - def from_config(cls, config, custom_objects=None): - # `from_config` assumes `cls` is either `Functional` or a child class of - # `Functional`. In the case that `cls` is meant to behave like a child class - # of `Functional` but only inherits from the `Model` class, we have to call - # `cls(...)` instead of `Functional.from_config`. - from keras.engine import functional # pylint: disable=g-import-not-at-top - with generic_utils.SharedObjectLoadingScope(): - functional_model_keys = [ - 'name', 'layers', 'input_layers', 'output_layers' - ] - if all(key in config for key in functional_model_keys): - inputs, outputs, layers = functional.reconstruct_from_config( - config, custom_objects) - model = cls(inputs=inputs, outputs=outputs, name=config.get('name')) - functional.connect_ancillary_layers(model, layers) - return model - - # The config does not contain all the information necessary to revive a - # Functional model. This happens when the user creates subclassed models - # where `get_config()` is returning insufficient information to be - # considered a Functional model. In this case, we fall back to provide - # all config into the constructor of the class. - optimizer, loss = None, None - - optimizer_dict = config.pop('optimizer', {}) - if optimizer_dict: - optimizer = saving_lib.deserialize_keras_object(optimizer_dict) - - loss_dict = config.pop('loss', {}) - if loss_dict: - loss = saving_lib.deserialize_keras_object(loss_dict) - - input_shape = config.pop('input_shape', {}) - - try: - model = cls(**config) - except TypeError as e: - raise TypeError('Unable to revive model from config. When overriding ' - 'the `get_config()`, make sure that the returned ' - 'config contains all items used as arguments in the ' - f'constructor to {cls}, which is the default behavior. ' - 'You can override this default behavior by defining a ' - '`from_config` method to specify how to create an ' - f'instance of {cls.__name__} from the config. \n\n' - f'Error encountered during deserialization:\n{e}') - - if saving_lib._ENABLED: # pylint: disable=protected-access - - if optimizer or loss: - model.compile(optimizer=optimizer, loss=loss) - - if input_shape: - model.build(input_shape) - - return model - - def to_json(self, **kwargs): - """Returns a JSON string containing the network configuration. - - To load a network from a JSON save file, use - `keras.models.model_from_json(json_string, custom_objects={})`. - - Args: - **kwargs: Additional keyword arguments to be passed to `json.dumps()`. - - Returns: - A JSON string. - """ - model_config = self._updated_config() - return json.dumps( - model_config, default=json_utils.get_json_type, **kwargs) - - def to_yaml(self, **kwargs): - """Returns a yaml string containing the network configuration. - - Note: Since TF 2.6, this method is no longer supported and will raise a - RuntimeError. - - To load a network from a yaml save file, use - `keras.models.model_from_yaml(yaml_string, custom_objects={})`. - - `custom_objects` should be a dictionary mapping - the names of custom losses / layers / etc to the corresponding - functions / classes. - - Args: - **kwargs: Additional keyword arguments - to be passed to `yaml.dump()`. - - Returns: - A YAML string. - - Raises: - RuntimeError: announces that the method poses a security risk - """ - raise RuntimeError( - 'Method `model.to_yaml()` has been removed due to security risk of ' - 'arbitrary code execution. Please use `model.to_json()` instead.' + # First, identify constant inner dimensions by finding the + # rightmost dimension that is not constant + constant_inner_dimensions = ( + constant_dims.numpy().tolist()[::-1].index(False) ) - - def reset_states(self): - for layer in self.layers: - if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False): - layer.reset_states() - - @property - @doc_controls.do_not_generate_docs - def state_updates(self): - """Deprecated, do NOT use! - - Returns the `updates` from all layers that are stateful. - - This is useful for separating training updates and - state updates, e.g. when we need to update a layer's internal state - during prediction. - - Returns: - A list of update ops. - """ - warnings.warn( - '`Model.state_updates` will be removed in a future version. ' - 'This property should not be used in TensorFlow 2.0, ' - 'as `updates` are applied automatically.', - stacklevel=2) - state_updates = [] - for layer in self.layers: - if getattr(layer, 'stateful', False): - if hasattr(layer, 'updates'): - state_updates += layer.updates - return state_updates - - @property - def weights(self): - """Returns the list of all layer variables/weights. - - Note: This will not track the weights of nested `tf.Modules` that are not - themselves Keras layers. - - Returns: - A list of variables. - """ - return self._dedup_weights(self._undeduplicated_weights) - - @property - def _undeduplicated_weights(self): - """Returns the undeduplicated list of all layer variables/weights.""" - self._assert_weights_created() - weights = [] - for layer in self._self_tracked_trackables: - weights += layer.variables - weights += (self._trainable_weights + self._non_trainable_weights) - return weights - - def summary(self, - line_length=None, - positions=None, - print_fn=None, - expand_nested=False, - show_trainable=False): - """Prints a string summary of the network. - - Args: - line_length: Total length of printed lines - (e.g. set this to adapt the display to different - terminal window sizes). - positions: Relative or absolute positions of log elements - in each line. If not provided, - defaults to `[.33, .55, .67, 1.]`. - print_fn: Print function to use. Defaults to `print`. - It will be called on each line of the summary. - You can set it to a custom function - in order to capture the string summary. - expand_nested: Whether to expand the nested models. - If not provided, defaults to `False`. - show_trainable: Whether to show if a layer is trainable. - If not provided, defaults to `False`. - - Raises: - ValueError: if `summary()` is called before the model is built. - """ - if not self.built: - raise ValueError( - 'This model has not yet been built. ' - 'Build the model first by calling `build()` or by calling ' - 'the model on a batch of data.') - layer_utils.print_summary( - self, - line_length=line_length, - positions=positions, - print_fn=print_fn, - expand_nested=expand_nested, - show_trainable=show_trainable) - - @property - def layers(self): - return list(self._flatten_layers(include_self=False, recursive=False)) - - @layers.setter - def layers(self, _): - raise AttributeError( - '`Model.layers` attribute is reserved and should not be used. ' - 'Please use another name.') - - def get_layer(self, name=None, index=None): - """Retrieves a layer based on either its name (unique) or index. - - If `name` and `index` are both provided, `index` will take precedence. - Indices are based on order of horizontal graph traversal (bottom-up). - - Args: - name: String, name of layer. - index: Integer, index of layer. - - Returns: - A layer instance. - """ - # TODO(fchollet): We could build a dictionary based on layer names - # since they are constant, but we have not done that yet. - if index is not None and name is not None: - raise ValueError('Provide only a layer name or a layer index. Received: ' - f'index={index}, name={name}.') - - if index is not None: - if len(self.layers) <= index: - raise ValueError(f'Was asked to retrieve layer at index {index}' - f' but model only has {len(self.layers)}' - ' layers.') - else: - return self.layers[index] - - if name is not None: - for layer in self.layers: - if layer.name == name: - return layer - raise ValueError(f'No such layer: {name}. Existing layers are: ' - f'{list(layer.name for layer in self.layers)}.') - raise ValueError('Provide either a layer name or layer index at ' - '`get_layer`.') - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _set_save_spec(self, inputs, args=None, kwargs=None): - """Defines the save spec so that serialization is able to trace model call. - - The TensorSpecs of the call function `inputs`, `args`, and `kwargs` are - saved into a tuple of `([inputs] + args, kwargs)`. The input `TensorSpec` - names are updated to match the built `input_names`. - - The specs can be retrieved with the `save_spec` property. - - Args: - inputs: possibly nested inputs passed into the call function. - args: a list of positional arguments passed into call. - kwargs: a dictionary of keyword arguments passed into call. - """ - if self._saved_model_inputs_spec is not None: - return # Already set. - args = args or [] - kwargs = kwargs or {} - - input_names = self.input_names - if not input_names: - input_names = compile_utils.create_pseudo_input_names(inputs) - - flat_inputs = tf.nest.flatten(inputs) - inputs_spec = [] - for name, tensor in zip(input_names, flat_inputs): - inputs_spec.append( - tf_utils.get_tensor_spec(tensor, dynamic_batch=False, name=name)) - inputs_spec = tf.nest.pack_sequence_as(inputs, inputs_spec) - super()._set_save_spec(inputs_spec, args, kwargs) - - # Store the input shapes - if (self.__class__.__name__ == 'Sequential' and - self._build_input_shape is None): - self._build_input_shape = tf.nest.map_structure( - lambda x: None if x is None else x.shape, inputs_spec) - - def save_spec(self, dynamic_batch=True): - """Returns the `tf.TensorSpec` of call inputs as a tuple `(args, kwargs)`. - - This value is automatically defined after calling the model for the first - time. Afterwards, you can use it when exporting the model for serving: - - ```python - model = tf.keras.Model(...) - - @tf.function - def serve(*args, **kwargs): - outputs = model(*args, **kwargs) - # Apply postprocessing steps, or add additional outputs. - ... - return outputs - - # arg_specs is `[tf.TensorSpec(...), ...]`. kwarg_specs, in this example, is - # an empty dict since functional models do not use keyword arguments. - arg_specs, kwarg_specs = model.save_spec() - - model.save(path, signatures={ - 'serving_default': serve.get_concrete_function(*arg_specs, **kwarg_specs) - }) - ``` - - Args: - dynamic_batch: Whether to set the batch sizes of all the returned - `tf.TensorSpec` to `None`. (Note that when defining functional or - Sequential models with `tf.keras.Input([...], batch_size=X)`, the - batch size will always be preserved). Defaults to `True`. - Returns: - If the model inputs are defined, returns a tuple `(args, kwargs)`. All - elements in `args` and `kwargs` are `tf.TensorSpec`. - If the model inputs are not defined, returns `None`. - The model inputs are automatically set when calling the model, - `model.fit`, `model.evaluate` or `model.predict`. - """ - return self._get_save_spec(dynamic_batch, inputs_only=False) - - def _assert_weights_created(self): - """Asserts that all the weights for the model have been created. - - For a non-dynamic model, the weights must already be created after the - layer has been called. For a dynamic model, the exact list of weights can - never be known for certain since it may change at any time during execution. - - We run this check right before accessing weights or getting the Numpy value - for the current weights. Otherwise, if the layer has never been called, - the user would just get an empty list, which is misleading. - - Raises: - ValueError: if the weights of the network have not yet been created. - """ - if self.dynamic: - return - - if ('build' in self.__class__.__dict__ and - self.__class__ != Model and - not self.built): - # For any model that has customized build() method but hasn't - # been invoked yet, this will cover both sequential and subclass model. - # Also make sure to exclude Model class itself which has build() defined. - raise ValueError(f'Weights for model {self.name} have not yet been ' - 'created. ' - 'Weights are created when the Model is first called on ' - 'inputs or `build()` is called with an `input_shape`.') - - def _check_call_args(self, method_name): - """Check that `call()` has only one positional arg.""" - # Always allow first arg, regardless of arg name. - fullargspec = self._call_spec.full_argspec - if fullargspec.defaults: - positional_args = fullargspec.args[:-len(fullargspec.defaults)] - else: - positional_args = fullargspec.args - if 'training' in positional_args: - positional_args.remove('training') - - # self and first arg can be positional. - if len(positional_args) > 2: - extra_args = positional_args[2:] - raise ValueError( - f'Models passed to `{method_name}` can only have `training` ' - 'and the first argument in `call()` as positional arguments, ' - f'found: {extra_args}.') - - def _validate_compile(self, optimizer, metrics, **kwargs): - """Performs validation checks for the default `compile()`.""" - if any( - isinstance(opt, optimizer_v1.Optimizer) - for opt in tf.nest.flatten(optimizer)): - raise ValueError( - f'`tf.compat.v1.keras` Optimizer ({optimizer}) is ' - 'not supported when eager execution is enabled. Use a ' - '`tf.keras` Optimizer instead, or disable eager ' - 'execution.') - - kwargs.pop('cloning', None) # Legacy DistStrat argument, never used. - kwargs.pop('experimental_run_tf_function', None) # Always `True`. - distribute_arg = kwargs.pop('distribute', None) - if distribute_arg is not None: - raise ValueError( - '`distribute` argument in compile is not available in TF 2.0. Please ' - 'create the model under the `strategy.scope()`. Received: ' - f'{distribute_arg}.') - target_tensor_arg = kwargs.pop('target_tensors', None) - if target_tensor_arg is not None: - raise ValueError( - '`target_tensors` argument is not supported when executing eagerly. ' - f'Received: {target_tensor_arg}.') - invalid_kwargs = set(kwargs) - {'sample_weight_mode'} - if invalid_kwargs: - raise TypeError('Invalid keyword argument(s) in `compile()`: ' - f'{(invalid_kwargs,)}. Valid keyword arguments include ' - '"cloning", "experimental_run_tf_function", "distribute",' - ' "target_tensors", or "sample_weight_mode".') - - # Model must be created and compiled with the same DistStrat. - if self.built and tf.distribute.has_strategy(): - strategy = tf.distribute.get_strategy() - for v in self.variables: - if not strategy.extended.variable_created_in_scope(v): - raise ValueError( - f'Variable ({v}) was not created in the distribution strategy ' - f'scope of ({strategy}). It is most likely because some ' - 'layers, model, or optimizer was being created outside the ' - 'distribution strategy scope. Try to make sure your code looks ' - 'similar to the following.\n' - 'with strategy.scope():\n' - ' model=_create_model()\n' - ' model.compile(...)') - - # Model metrics must be created in the same distribution strategy scope - # as the model. - strategy = self.distribute_strategy - for metric in tf.nest.flatten(metrics): - for v in getattr(metric, 'variables', []): - if not strategy.extended.variable_created_in_scope(v): - raise ValueError( - f'Metric ({metric}) passed to `model.compile` was created inside ' - 'a different distribution strategy scope than the model. All ' - 'metrics must be created in the same distribution strategy ' - f'scope as the model (in this case {strategy}). If you pass in a ' - 'string identifier for a metric to compile, the metric will ' - 'automatically be created in the correct distribution ' - 'strategy scope.' - ) - - # Model metrics must be created in the same distribution strategy scope - # as the model. - for opt in tf.nest.flatten(optimizer): - for v in getattr(opt, '_weights', []): - if not strategy.extended.variable_created_in_scope(v): - raise ValueError( - f'Optimizer ({optimizer}) passed to `model.compile` was created ' - 'inside a different distribution strategy scope than the model. ' - 'All optimizers must be created in the same distribution ' - f'strategy scope as the model (in this case {strategy}). If you ' - 'pass in a string identifier for an optimizer to compile, the ' - 'optimizer will automatically be created in the correct ' - 'distribution strategy scope.' - ) - - def _maybe_load_initial_epoch_from_ckpt(self, initial_epoch): - """Maybe load initial epoch from ckpt considering possible worker recovery. - - Refer to tensorflow/python/keras/distribute/worker_training_state.py - for more information. - - Args: - initial_epoch: The original initial_epoch user passes in in `fit()`. - - Returns: - If the training is recovering from previous failure under multi-worker - training setting, return the epoch the training is supposed to continue - at. Otherwise, return the `initial_epoch` the user passes in. - """ - if self._training_state is not None: - return self._training_state.maybe_load_initial_epoch_from_ckpt( - initial_epoch, mode=ModeKeys.TRAIN) - - return initial_epoch - - def _maybe_load_initial_step_from_ckpt(self): - if getattr(self, '_callback_step', 0) > 0: - return self._callback_step.numpy() + 1 - - return 0 - - def _assert_compile_was_called(self): - # Checks whether `compile` has been called. If it has been called, - # then the optimizer is set. This is different from whether the - # model is compiled - # (i.e. whether the model is built and its inputs/outputs are set). - if not self._is_compiled: - raise RuntimeError('You must compile your model before ' - 'training/testing. ' - 'Use `model.compile(optimizer, loss)`.') - - def _check_sample_weight_warning(self, x, sample_weight): - # Datasets can include sample weight, by returning a tuple with the - # structure of `(x, y, sample_weight)`. - sample_weight_present = sample_weight is not None or ( - isinstance(x, tf.data.Dataset) and isinstance(x.element_spec, tuple) and - len(x.element_spec) == 3) - - # pylint: disable=protected-access - if (sample_weight_present and - self.compiled_metrics._user_weighted_metrics is None): - logging.warning( - '`evaluate()` received a value for `sample_weight`, but ' - '`weighted_metrics` were not provided. Did you mean to pass metrics ' - 'to `weighted_metrics` in `compile()`? If this is intentional ' - 'you can pass `weighted_metrics=[]` to `compile()` in order to ' - 'silence this warning.') - - def _set_inputs(self, inputs, outputs=None, training=None): - """This method is for compat with Modelv1. Only inputs are needed here.""" - self._set_save_spec(inputs) - - @property - def _trackable_saved_model_saver(self): - return model_serialization.ModelSavedModelSaver(self) - - def _trackable_children(self, save_type='checkpoint', **kwargs): - if save_type == 'savedmodel': - # SavedModel needs to ignore the execution functions. - train_function = self.train_function - test_function = self.test_function - predict_function = self.predict_function - train_tf_function = self.train_tf_function - self.train_function = None - self.test_function = None - self.predict_function = None - self.train_tf_function = None - - children = super()._trackable_children(save_type, **kwargs) - - if save_type == 'savedmodel': - self.train_function = train_function - self.test_function = test_function - self.predict_function = predict_function - self.train_tf_function = train_tf_function - - return children - - def _should_eval(self, epoch, validation_freq): - epoch = epoch + 1 # one-index the user-facing epoch. - if isinstance(validation_freq, int): - return epoch % validation_freq == 0 - elif isinstance(validation_freq, list): - return epoch in validation_freq + # If there are constant inner dimensions, define a constant inner shape + if constant_inner_dimensions == 0: + constant_inner_shape = None else: - raise ValueError('Expected `validation_freq` to be a list or int. ' - f'Received: validation_freq={validation_freq} of the ' - f'type {type(validation_freq)}.') - - ###################################################################### - # Functions below exist only as v1 / v2 compatibility shims. - ###################################################################### - - def _get_compile_args(self, user_metrics=True): - """Used for saving or cloning a Model. - - Args: - user_metrics: Whether to return user-supplied metrics or `Metric` objects. - Defaults to returning the user-supplied metrics. - - Returns: - Dictionary of arguments that were used when compiling the model. - """ - self._assert_compile_was_called() - # pylint: disable=protected-access - - saved_metrics = self.compiled_metrics._user_metrics - saved_weighted_metrics = self.compiled_metrics._user_weighted_metrics - - if not user_metrics: - if saved_metrics is not None: - saved_metrics = self.compiled_metrics._metrics - if saved_weighted_metrics is not None: - saved_weighted_metrics = self.compiled_metrics._weighted_metrics - - compile_args = { - 'optimizer': self.optimizer, - 'loss': self.compiled_loss._user_losses, - 'metrics': saved_metrics, - 'weighted_metrics': saved_weighted_metrics, - 'loss_weights': self.compiled_loss._user_loss_weights, - } - # pylint: enable=protected-access - return compile_args - - def _get_callback_model(self): - return self - - def _in_multi_worker_mode(self): - return self.distribute_strategy.extended._in_multi_worker_mode() # pylint: disable=protected-access - - @property - def _compile_was_called(self): - return self._is_compiled - - def _save_new(self, dirpath): - return saving_lib.save(self, dirpath) - - -def reduce_per_replica(values, strategy, reduction='first'): - """Reduce PerReplica objects. - - Args: - values: Structure of `PerReplica` objects or `Tensor`s. `Tensor`s are - returned as-is. - strategy: `tf.distribute.Strategy` object. - reduction: One of 'first', 'concat'. - - Returns: - Structure of `Tensor`s. - """ - - def _reduce(v): - """Reduce a single `PerReplica` object.""" - if reduction == 'concat' and _collective_all_reduce_multi_worker(strategy): - return _multi_worker_concat(v, strategy) - if not _is_per_replica_instance(v): - return v - elif reduction == 'first': - return strategy.experimental_local_results(v)[0] - elif reduction == 'concat': - if _is_tpu_multi_host(strategy): - return _tpu_multi_host_concat(v, strategy) - else: - return concat(strategy.experimental_local_results(v)) + constant_inner_shape = tensors[0].shape[-constant_inner_dimensions:] + return tf.ragged.constant( + [tensor.numpy() for tensor in tensors], inner_shape=constant_inner_shape + ).merge_dims(0, 1) + + +def _reduce_dtensor_per_replica(value, strategy, reduction): + # Note that this function could happen in graph, so we can't just access + # the per-replica.values(), which will trigger unpack in graph and result + # into error. + # For now we will perform ops on dtensor instance directly on a global + # context. + dtensor = value._dtensor + if reduction == "first": + num_replica = strategy.num_replicas_in_sync + return tf.split(dtensor, num_replica, axis=0)[0] + elif reduction == "concat": + # Since dtensor is already in global context, the concat is a no-op + return dtensor + elif reduction == "sum": + return tf.reduce_sum(dtensor) else: - raise ValueError('`reduction` must be "first" or "concat". Received: ' - f'reduction={reduction}.') - - return tf.nest.map_structure(_reduce, values) - - -def concat(tensors, axis=0): - """Concats `tensor`s along `axis`.""" - if isinstance(tensors[0], tf.SparseTensor): - return tf.sparse.concat(axis=axis, sp_inputs=tensors) - return tf.concat(tensors, axis=axis) - - -def potentially_ragged_concat(tensors): - """Concats `Tensor`s along their first dimension. - - Args: - tensors: List of `Tensor`s. - - Returns: - Concatenation of the inputs along the first dimension -- of type `Tensor` - if all input shapes are compatible, or `RaggedTensor` if not. - """ - if len(tensors) == 1: - return tensors[0] - if isinstance(tensors[0], tf.SparseTensor): - return tf.sparse.concat(axis=0, sp_inputs=tensors) - elif isinstance(tensors[0], tf.RaggedTensor): - return tf.concat(tensors, axis=0) - elif not tf.__internal__.tf2.enabled(): - return tf.concat(tensors, axis=0) - - non_batch_shapes = tf.stack([tf.shape(tensor)[1:] for tensor in tensors]) - constant_dims = tf.math.reduce_all( - non_batch_shapes == non_batch_shapes[:1], axis=0) - if tf.math.reduce_all(constant_dims).numpy().item(): - # All non-batch dims are constant - return tf.concat(tensors, axis=0) - - # First, identify constant inner dimensions by finding the - # rightmost dimension that is not constant - constant_inner_dimensions = constant_dims.numpy().tolist()[::-1].index(False) - # If there are constant inner dimensions, define a constant inner shape - if constant_inner_dimensions == 0: - constant_inner_shape = None - else: - constant_inner_shape = tensors[0].shape[-constant_inner_dimensions:] - return tf.ragged.constant([tensor.numpy() for tensor in tensors], - inner_shape=constant_inner_shape).merge_dims(0, 1) + raise ValueError( + '`reduction` must be one of "first", "concat", "sum", or "auto". ' + f"Received: reduction={reduction}." + ) def _get_verbosity(verbose, distribute_strategy): - """Find the right verbosity value for 'auto'.""" - if verbose == 1 and distribute_strategy._should_use_with_coordinator: # pylint: disable=protected-access - raise ValueError( - '`verbose=1` is not allowed with `ParameterServerStrategy` for ' - f'performance reasons. Received: verbose={verbose}') - if verbose == 'auto': - if (distribute_strategy._should_use_with_coordinator or # pylint: disable=protected-access - not io_utils.is_interactive_logging_enabled()): - # Default to epoch-level logging for PSStrategy or using absl logging. - return 2 - else: - return 1 # Default to batch-level logging otherwise. - return verbose + """Find the right verbosity value for 'auto'.""" + if verbose == 1 and distribute_strategy._should_use_with_coordinator: + raise ValueError( + "`verbose=1` is not allowed with `ParameterServerStrategy` for " + f"performance reasons. Received: verbose={verbose}" + ) + if verbose == "auto": + if ( + distribute_strategy._should_use_with_coordinator + or not io_utils.is_interactive_logging_enabled() + ): + # Defaults to epoch-level logging for PSStrategy or using absl + # logging. + return 2 + else: + return 1 # Defaults to batch-level logging otherwise. + return verbose def _is_tpu_multi_host(strategy): - return (backend.is_tpu_strategy(strategy) and - strategy.extended.num_hosts > 1) + return backend.is_tpu_strategy(strategy) and strategy.extended.num_hosts > 1 def _tpu_multi_host_concat(v, strategy): - """Correctly order TPU PerReplica objects.""" - replicas = strategy.experimental_local_results(v) - # When distributed datasets are created from Tensors / NumPy, - # TPUStrategy.experimental_distribute_dataset shards data in - # (Replica, Host) order, and TPUStrategy.experimental_local_results returns - # it in (Host, Replica) order. - # TODO(b/150317897): Figure out long-term plan here. - num_replicas_per_host = strategy.extended.num_replicas_per_host - ordered_replicas = [] - for replica_id in range(num_replicas_per_host): - ordered_replicas += replicas[replica_id::num_replicas_per_host] - return concat(ordered_replicas) + """Correctly order TPU PerReplica objects.""" + replicas = strategy.experimental_local_results(v) + # When distributed datasets are created from Tensors / NumPy, + # TPUStrategy.experimental_distribute_dataset shards data in + # (Replica, Host) order, and TPUStrategy.experimental_local_results returns + # it in (Host, Replica) order. + # TODO(b/150317897): Figure out long-term plan here. + num_replicas_per_host = strategy.extended.num_replicas_per_host + ordered_replicas = [] + for replica_id in range(num_replicas_per_host): + ordered_replicas += replicas[replica_id::num_replicas_per_host] + return concat(ordered_replicas) def _collective_all_reduce_multi_worker(strategy): - return (isinstance(strategy, - tf.distribute.MultiWorkerMirroredStrategy) - ) and strategy.extended._in_multi_worker_mode() # pylint: disable=protected-access + return ( + isinstance(strategy, tf.distribute.MultiWorkerMirroredStrategy) + ) and strategy.extended._in_multi_worker_mode() # TODO(wxinyi): merge this with _tpu_multi_host_concat once we have all_gather # for all strategies def _multi_worker_concat(v, strategy): - """Order PerReplica objects for CollectiveAllReduceStrategy and concat.""" - replicas = strategy.gather(v, axis=0) - # v might not have the same shape on different replicas - if _is_per_replica_instance(v): - shapes = tf.concat([ - tf.expand_dims(tf.shape(single_value)[0], axis=0) - for single_value in v.values - ], axis=0) - all_shapes = strategy.gather(shapes, axis=0) - else: - # v is a tensor. This may happen when, say, we have 2x1 multi-worker. - all_shapes = strategy.gather( - tf.expand_dims(tf.shape(v)[0], axis=0), axis=0) - - replicas = tf.split( - replicas, - num_or_size_splits=all_shapes, - num=strategy.num_replicas_in_sync) - ordered_replicas = [] - num_replicas_per_worker = len(strategy.extended.worker_devices) - for replica_id in range(num_replicas_per_worker): - ordered_replicas += replicas[replica_id::num_replicas_per_worker] - return concat(ordered_replicas) + """Order PerReplica objects for CollectiveAllReduceStrategy and concat.""" + replicas = strategy.gather(v, axis=0) + # v might not have the same shape on different replicas + if _is_per_replica_instance(v): + shapes = tf.concat( + [ + tf.expand_dims(tf.shape(single_value)[0], axis=0) + for single_value in v.values + ], + axis=0, + ) + all_shapes = strategy.gather(shapes, axis=0) + else: + # v is a tensor. This may happen when, say, we have 2x1 multi-worker. + all_shapes = strategy.gather( + tf.expand_dims(tf.shape(v)[0], axis=0), axis=0 + ) + + replicas = tf.split( + replicas, + num_or_size_splits=all_shapes, + num=strategy.num_replicas_in_sync, + ) + ordered_replicas = [] + num_replicas_per_worker = len(strategy.extended.worker_devices) + for replica_id in range(num_replicas_per_worker): + ordered_replicas += replicas[replica_id::num_replicas_per_worker] + return concat(ordered_replicas) def _is_scalar(x): - return isinstance(x, (tf.Tensor, tf.Variable)) and x.shape.rank == 0 + return isinstance(x, (tf.Tensor, tf.Variable)) and x.shape.rank == 0 def _minimum_control_deps(outputs): - """Returns the minimum control dependencies to ensure step succeeded.""" - if tf.executing_eagerly(): - return [] # Control dependencies not needed. - outputs = tf.nest.flatten(outputs, expand_composites=True) - for out in outputs: - # Variables can't be control dependencies. - if not isinstance(out, tf.Variable): - return [out] # Return first Tensor or Op from outputs. - return [] # No viable Tensor or Op to use for control deps. + """Returns the minimum control dependencies to ensure step succeeded.""" + if tf.executing_eagerly(): + return [] # Control dependencies not needed. + outputs = tf.nest.flatten(outputs, expand_composites=True) + for out in outputs: + # Variables can't be control dependencies. + if not isinstance(out, tf.Variable): + return [out] # Return first Tensor or Op from outputs. + return [] # No viable Tensor or Op to use for control deps. def _disallow_inside_tf_function(method_name): - if tf.inside_function(): - error_msg = ( - 'Detected a call to `Model.{method_name}` inside a `tf.function`. ' - '`Model.{method_name} is a high-level endpoint that manages its own ' - '`tf.function`. Please move the call to `Model.{method_name}` outside ' - 'of all enclosing `tf.function`s. Note that you can call a `Model` ' - 'directly on `Tensor`s inside a `tf.function` like: `model(x)`.' - ).format(method_name=method_name) - raise RuntimeError(error_msg) - - -def _detect_save_format(filepath): - """Returns path to weights file and save format.""" - - filepath = io_utils.path_to_string(filepath) - if saving_utils.is_hdf5_filepath(filepath): - return filepath, 'h5' - - # Filepath could be a TensorFlow checkpoint file prefix or SavedModel - # directory. It's possible for filepath to be both a prefix and directory. - # Prioritize checkpoint over SavedModel. - if _is_readable_tf_checkpoint(filepath): - save_format = 'tf' - elif tf.saved_model.contains_saved_model(filepath): - ckpt_path = os.path.join(filepath, tf.saved_model.VARIABLES_DIRECTORY, - tf.saved_model.VARIABLES_FILENAME) - if _is_readable_tf_checkpoint(ckpt_path): - filepath = ckpt_path - save_format = 'tf' - else: - raise ValueError('Unable to load weights. filepath {} appears to be a ' - 'SavedModel directory, but checkpoint either doesn\'t ' - 'exist, or is incorrectly formatted.'.format(filepath)) - else: - # Not a TensorFlow checkpoint. This filepath is likely an H5 file that - # doesn't have the hdf5/keras extensions. - save_format = 'h5' - return filepath, save_format - - -def _is_readable_tf_checkpoint(filepath): - try: - tf.compat.v1.train.NewCheckpointReader(filepath) - return True - except tf.errors.DataLossError: - # The checkpoint is not readable in TensorFlow format. - return False + if tf.inside_function(): + error_msg = ( + "Detected a call to `Model.{method_name}` inside a `tf.function`. " + "`Model.{method_name} is a high-level endpoint that manages its " + "own `tf.function`. Please move the call to `Model.{method_name}` " + "outside of all enclosing `tf.function`s. Note that you can call a " + "`Model` directly on `Tensor`s inside a `tf.function` like: " + "`model(x)`." + ).format(method_name=method_name) + raise RuntimeError(error_msg) def flatten_metrics_in_order(logs, metrics_names): - """Turns the `logs` dict into a list as per key order of `metrics_names`.""" - results = [] - for name in metrics_names: - if name in logs: - results.append(logs[name]) - for key in sorted(logs.keys()): - if key not in metrics_names: - results.append(logs[key]) - if len(results) == 1: - return results[0] - return results + """Turns the `logs` dict into a list as per key order of `metrics_names`.""" + results = [] + for name in metrics_names: + if name in logs: + results.append(logs[name]) + for key in sorted(logs.keys()): + if key not in metrics_names: + results.append(logs[key]) + if len(results) == 1: + return results[0] + return results def _is_per_replica_instance(obj): - return (isinstance(obj, tf.distribute.DistributedValues) and - isinstance(obj, tf.__internal__.CompositeTensor)) + return isinstance(obj, tf.distribute.DistributedValues) and isinstance( + obj, tf.__internal__.CompositeTensor + ) -def disable_multi_worker(method): - """Decorator that disallows multi-worker use of `method`.""" +def _is_dtensor_per_replica_instance(obj): + # This is a temp check for DTensorDistributedValue, which is not public API + # yet. + # TODO(scottzhu): Move to more stable API when dtensor based strategy is + # ready. + return isinstance(obj, tf.distribute.DistributedValues) and hasattr( + obj, "_dtensor" + ) - def _method_wrapper(self, *args, **kwargs): - if self._in_multi_worker_mode(): # pylint: disable=protected-access - raise ValueError(f'{method.__name__} is not supported in multi-worker ' - 'mode. Please use a non-multi-worker ' - '`tf.distribute.Strategy` such as ' - '`tf.distribute.MirroredStrategy`.') - return method(self, *args, **kwargs) - return tf.__internal__.decorator.make_decorator( - target=method, decorator_func=_method_wrapper) +def disable_multi_worker(method): + """Decorator that disallows multi-worker use of `method`.""" + + def _method_wrapper(self, *args, **kwargs): + if self._in_multi_worker_mode(): + raise ValueError( + f"{method.__name__} is not supported in multi-worker " + "mode. Please use a non-multi-worker " + "`tf.distribute.Strategy` such as " + "`tf.distribute.MirroredStrategy`." + ) + return method(self, *args, **kwargs) + + return tf.__internal__.decorator.make_decorator( + target=method, decorator_func=_method_wrapper + ) def inject_functional_model_class(cls): - """Inject `Functional` into the hierarchy of this class if needed.""" - from keras.engine import functional # pylint: disable=g-import-not-at-top - from keras.engine import training_v1 # pylint: disable=g-import-not-at-top - if cls == Model or cls == training_v1.Model: - return functional.Functional - # In case there is any multiple inheritance, we stop injecting the - # class if keras model is not in its class hierarchy. - if cls == object: - return object - - cls.__bases__ = tuple(inject_functional_model_class(base) - for base in cls.__bases__) - # Trigger any `__new__` class swapping that needed to happen on `Functional` - # but did not because functional was not in the class hierarchy. - cls.__new__(cls) + """Inject `Functional` into the hierarchy of this class if needed.""" + from keras.engine import functional + from keras.engine import training_v1 + + if cls == Model or cls == training_v1.Model: + return functional.Functional + # In case there is any multiple inheritance, we stop injecting the + # class if keras model is not in its class hierarchy. + if cls == object: + return object + + cls.__bases__ = tuple( + inject_functional_model_class(base) for base in cls.__bases__ + ) + # Trigger any `__new__` class swapping that needed to happen on `Functional` + # but did not because functional was not in the class hierarchy. + cls.__new__(cls) - return cls + return cls def is_functional_model_init_params(args, kwargs): - return (len(args) == 2 or - len(args) == 1 and 'outputs' in kwargs or - 'inputs' in kwargs and 'outputs' in kwargs) + # Both inputs and outputs in args + if len(args) == 2: + return True + # Both inputs in args, outputs in kwargs + if len(args) == 1 and "outputs" in kwargs: + return True + # Both in kwargs + if "inputs" in kwargs and "outputs" in kwargs: + return True + return False diff --git a/keras/engine/training_arrays_test.py b/keras/engine/training_arrays_test.py index f94d6b46c79b..cf85bafc3a25 100644 --- a/keras/engine/training_arrays_test.py +++ b/keras/engine/training_arrays_test.py @@ -14,226 +14,255 @@ # ============================================================================== """Tests for model.fit calls with a Dataset object passed as validation_data.""" -import tensorflow.compat.v2 as tf - import io import sys from unittest import mock -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from tensorflow.python.framework import test_util as tf_test_utils from keras.engine import data_adapter +from keras.layers import core from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.layers import core from keras.utils import io_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + def _create_dataset(num_samples, batch_size): - input_data = np.random.rand(num_samples, 1) - expected_data = input_data * 3 - dataset = tf.data.Dataset.from_tensor_slices((input_data, expected_data)) - return dataset.shuffle(10 * batch_size).batch(batch_size) + input_data = np.random.rand(num_samples, 1) + expected_data = input_data * 3 + dataset = tf.data.Dataset.from_tensor_slices((input_data, expected_data)) + return dataset.shuffle(10 * batch_size).batch(batch_size) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes(always_skip_v1=True) -class ValidationDatasetAndValidationSplit(test_combinations.TestCase, - parameterized.TestCase): - """Verifies when validation_data is provided validation_split is ignored. - - The validation_split arg can't be passed in v1 mode because - training_utils_v1.py:validate_dataset_input will raise a ValueError that - validation_split is not supported when input x is a dataset or a dataset - iterator. - """ - - @parameterized.named_parameters(("with_default_falsey_validation_split", 0.), - ("with_non_falsey_validation_split", 0.1)) - def test_ignore_validation_split_when_validation_dataset_is_present( - self, validation_split): - # Create a model that learns y=Mx. - layers = [core.Dense(1)] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - model.compile(loss="mse", optimizer="adam", metrics=["mean_absolute_error"]) - - train_dataset = _create_dataset(num_samples=200, batch_size=10) - eval_dataset = _create_dataset(num_samples=50, batch_size=25) - - # Make sure model.fit doesn't raise an error because of the mocking alone. - mock_train_validation_split_return = ((train_dataset, None, None), - eval_dataset) - - with mock.patch.object( - data_adapter, - "train_validation_split", - return_value=mock_train_validation_split_return - ) as mock_train_validation_split: - model.fit( - x=train_dataset, - validation_split=validation_split, - validation_data=eval_dataset, - epochs=2) - mock_train_validation_split.assert_not_called() - - history = model.fit( - x=train_dataset, validation_data=eval_dataset, epochs=2) - evaluation = model.evaluate(x=eval_dataset) - - # See test_validation_dataset_with_no_step_arg for details. - self.assertAlmostEqual( - history.history["val_mean_absolute_error"][-1], - evaluation[-1], - places=5) +class ValidationDatasetAndValidationSplit( + test_combinations.TestCase, parameterized.TestCase +): + """Verifies when validation_data is provided validation_split is ignored. + + The validation_split arg can't be passed in v1 mode because + training_utils_v1.py:validate_dataset_input will raise a ValueError that + validation_split is not supported when input x is a dataset or a dataset + iterator. + """ + + @parameterized.named_parameters( + ("with_default_falsey_validation_split", 0.0), + ("with_non_falsey_validation_split", 0.1), + ) + def test_ignore_validation_split_when_validation_dataset_is_present( + self, validation_split + ): + # Create a model that learns y=Mx. + layers = [core.Dense(1)] + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + model.compile( + loss="mse", optimizer="adam", metrics=["mean_absolute_error"] + ) + + train_dataset = _create_dataset(num_samples=200, batch_size=10) + eval_dataset = _create_dataset(num_samples=50, batch_size=25) + + # Make sure model.fit doesn't raise an error because of the mocking + # alone. + mock_train_validation_split_return = ( + (train_dataset, None, None), + eval_dataset, + ) + + with mock.patch.object( + data_adapter, + "train_validation_split", + return_value=mock_train_validation_split_return, + ) as mock_train_validation_split: + model.fit( + x=train_dataset, + validation_split=validation_split, + validation_data=eval_dataset, + epochs=2, + ) + mock_train_validation_split.assert_not_called() + + history = model.fit( + x=train_dataset, validation_data=eval_dataset, epochs=2 + ) + evaluation = model.evaluate(x=eval_dataset) + + # See test_validation_dataset_with_no_step_arg for details. + self.assertAlmostEqual( + history.history["val_mean_absolute_error"][-1], + evaluation[-1], + places=5, + ) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class ValidationDatasetNoLimitTest(test_combinations.TestCase): - - def test_validation_dataset_with_no_step_arg(self): - # Create a model that learns y=Mx. - layers = [core.Dense(1)] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - model.compile(loss="mse", optimizer="adam", metrics=["mean_absolute_error"]) - - train_dataset = _create_dataset(num_samples=200, batch_size=10) - eval_dataset = _create_dataset(num_samples=50, batch_size=25) - - history = model.fit(x=train_dataset, validation_data=eval_dataset, epochs=2) - evaluation = model.evaluate(x=eval_dataset) - - # If the fit call used the entire dataset, then the final val MAE error - # from the fit history should be equal to the final element in the output - # of evaluating the model on the same eval dataset. - self.assertAlmostEqual(history.history["val_mean_absolute_error"][-1], - evaluation[-1], places=5) - - -class PrintTrainingInfoTest(test_combinations.TestCase, - parameterized.TestCase): - - @tf_test_utils.run_v1_only("Only relevant in graph mode.") - def test_print_info_with_datasets(self): - """Print training info should work with val datasets (b/133391839).""" - - model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(1,))]) - model.compile(loss="mse", optimizer="sgd") - - dataset = tf.data.Dataset.from_tensors( - ([1.], [1.])).repeat(100).batch(10) - - val_dataset = tf.data.Dataset.from_tensors( - ([1.], [1.])).repeat(50).batch(10) - - mock_stdout = io.StringIO() - io_utils.enable_interactive_logging() - with tf.compat.v1.test.mock.patch.object(sys, "stdout", mock_stdout): - model.fit(dataset, epochs=2, validation_data=val_dataset) - - self.assertIn( - "Train on 10 steps, validate on 5 steps", mock_stdout.getvalue()) - - @parameterized.named_parameters( - ("with_validation", True), ("without_validation", False)) - @tf_test_utils.run_v1_only("Only relevant in graph mode.") - def test_print_info_with_numpy(self, do_validation): - """Print training info should work with val datasets (b/133391839).""" - - model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(2,))]) - model.compile(loss="mse", optimizer="sgd") - - dataset = np.arange(200).reshape(100, 2) - - if do_validation: - val_data = (np.arange(100).reshape(50, 2), np.arange(50).reshape(50, 1)) - else: - val_data = None - - mock_stdout = io.StringIO() - with tf.compat.v1.test.mock.patch.object(sys, "stdout", mock_stdout): - model.fit(dataset, batch_size=10, epochs=2, validation_data=val_data) - - self.assertIn("Train on 100 samples", mock_stdout.getvalue()) - - if do_validation: - self.assertIn(", validate on 50 samples", mock_stdout.getvalue()) - - @test_combinations.run_all_keras_modes - def test_dict_float64_input(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__(self) - self.dense1 = keras.layers.Dense(10, activation="relu") - self.dense2 = keras.layers.Dense(10, activation="relu") - self.concat = keras.layers.Concatenate() - self.dense3 = keras.layers.Dense(1, activation="sigmoid") - - def call(self, inputs): - d1 = self.dense1(inputs["one"]) - d2 = self.dense2(inputs["two"]) - concat = self.concat([d1, d2]) - return self.dense3(concat) - - model = MyModel() - model.compile( - loss="mae", - optimizer="adam", - run_eagerly=test_utils.should_run_eagerly()) - - model.fit( - x={ - "one": np.random.rand(100, 10, 1), - "two": np.random.rand(100, 10, 1) - }, - y=np.random.rand(100, 10, 1)) - - def test_dict_validation_input(self): - """Test case for GitHub issue 30122.""" - train_input_0 = np.random.rand(1000, 1) - train_input_1 = np.random.rand(1000, 1) - train_labels = np.random.rand(1000, 1) - val_input_0 = np.random.rand(1000, 1) - val_input_1 = np.random.rand(1000, 1) - val_labels = np.random.rand(1000, 1) - - input_0 = keras.Input(shape=(None,), name="input_0") - input_1 = keras.Input(shape=(None,), name="input_1") - - class my_model(keras.Model): - - def __init__(self): - super().__init__(self) - self.hidden_layer_0 = keras.layers.Dense(100, activation="relu") - self.hidden_layer_1 = keras.layers.Dense(100, activation="relu") - self.concat = keras.layers.Concatenate() - self.out_layer = keras.layers.Dense(1, activation="sigmoid") - - def call(self, inputs=[input_0, input_1]): - activation_0 = self.hidden_layer_0(inputs["input_0"]) - activation_1 = self.hidden_layer_1(inputs["input_1"]) - concat = self.concat([activation_0, activation_1]) - return self.out_layer(concat) - - model = my_model() - model.compile(loss="mae", optimizer="adam") - - model.fit( - x={ - "input_0": train_input_0, - "input_1": train_input_1 - }, - y=train_labels, - validation_data=({ - "input_0": val_input_0, - "input_1": val_input_1 - }, val_labels)) + def test_validation_dataset_with_no_step_arg(self): + # Create a model that learns y=Mx. + layers = [core.Dense(1)] + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + model.compile( + loss="mse", optimizer="adam", metrics=["mean_absolute_error"] + ) + + train_dataset = _create_dataset(num_samples=200, batch_size=10) + eval_dataset = _create_dataset(num_samples=50, batch_size=25) + + history = model.fit( + x=train_dataset, validation_data=eval_dataset, epochs=2 + ) + evaluation = model.evaluate(x=eval_dataset) + + # If the fit call used the entire dataset, then the final val MAE error + # from the fit history should be equal to the final element in the + # output of evaluating the model on the same eval dataset. + self.assertAlmostEqual( + history.history["val_mean_absolute_error"][-1], + evaluation[-1], + places=5, + ) + + +class PrintTrainingInfoTest(test_combinations.TestCase, parameterized.TestCase): + @tf_test_utils.run_v1_only("Only relevant in graph mode.") + def test_print_info_with_datasets(self): + """Print training info should work with val datasets (b/133391839).""" + + model = keras.models.Sequential( + [keras.layers.Dense(1, input_shape=(1,))] + ) + model.compile(loss="mse", optimizer="sgd") + + dataset = ( + tf.data.Dataset.from_tensors(([1.0], [1.0])).repeat(100).batch(10) + ) + + val_dataset = ( + tf.data.Dataset.from_tensors(([1.0], [1.0])).repeat(50).batch(10) + ) + + mock_stdout = io.StringIO() + io_utils.enable_interactive_logging() + with tf.compat.v1.test.mock.patch.object(sys, "stdout", mock_stdout): + model.fit(dataset, epochs=2, validation_data=val_dataset) + + self.assertIn( + "Train on 10 steps, validate on 5 steps", mock_stdout.getvalue() + ) + + @parameterized.named_parameters( + ("with_validation", True), ("without_validation", False) + ) + @tf_test_utils.run_v1_only("Only relevant in graph mode.") + def test_print_info_with_numpy(self, do_validation): + """Print training info should work with val datasets (b/133391839).""" + + model = keras.models.Sequential( + [keras.layers.Dense(1, input_shape=(2,))] + ) + model.compile(loss="mse", optimizer="sgd") + + dataset = np.arange(200).reshape(100, 2) + + if do_validation: + val_data = ( + np.arange(100).reshape(50, 2), + np.arange(50).reshape(50, 1), + ) + else: + val_data = None + + mock_stdout = io.StringIO() + with tf.compat.v1.test.mock.patch.object(sys, "stdout", mock_stdout): + model.fit( + dataset, batch_size=10, epochs=2, validation_data=val_data + ) + + self.assertIn("Train on 100 samples", mock_stdout.getvalue()) + + if do_validation: + self.assertIn(", validate on 50 samples", mock_stdout.getvalue()) + + @test_combinations.run_all_keras_modes + def test_dict_float64_input(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__(self) + self.dense1 = keras.layers.Dense(10, activation="relu") + self.dense2 = keras.layers.Dense(10, activation="relu") + self.concat = keras.layers.Concatenate() + self.dense3 = keras.layers.Dense(1, activation="sigmoid") + + def call(self, inputs): + d1 = self.dense1(inputs["one"]) + d2 = self.dense2(inputs["two"]) + concat = self.concat([d1, d2]) + return self.dense3(concat) + + model = MyModel() + model.compile( + loss="mae", + optimizer="adam", + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit( + x={ + "one": np.random.rand(100, 10, 1), + "two": np.random.rand(100, 10, 1), + }, + y=np.random.rand(100, 10, 1), + ) + + def test_dict_validation_input(self): + """Test case for GitHub issue 30122.""" + train_input_0 = np.random.rand(1000, 1) + train_input_1 = np.random.rand(1000, 1) + train_labels = np.random.rand(1000, 1) + val_input_0 = np.random.rand(1000, 1) + val_input_1 = np.random.rand(1000, 1) + val_labels = np.random.rand(1000, 1) + + input_0 = keras.Input(shape=(None,), name="input_0") + input_1 = keras.Input(shape=(None,), name="input_1") + + class my_model(keras.Model): + def __init__(self): + super().__init__(self) + self.hidden_layer_0 = keras.layers.Dense(100, activation="relu") + self.hidden_layer_1 = keras.layers.Dense(100, activation="relu") + self.concat = keras.layers.Concatenate() + self.out_layer = keras.layers.Dense(1, activation="sigmoid") + + def call(self, inputs=[input_0, input_1]): + activation_0 = self.hidden_layer_0(inputs["input_0"]) + activation_1 = self.hidden_layer_1(inputs["input_1"]) + concat = self.concat([activation_0, activation_1]) + return self.out_layer(concat) + + model = my_model() + model.compile(loss="mae", optimizer="adam") + + model.fit( + x={"input_0": train_input_0, "input_1": train_input_1}, + y=train_labels, + validation_data=( + {"input_0": val_input_0, "input_1": val_input_1}, + val_labels, + ), + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/engine/training_arrays_v1.py b/keras/engine/training_arrays_v1.py index 463511009263..a3920e2a1a6b 100644 --- a/keras/engine/training_arrays_v1.py +++ b/keras/engine/training_arrays_v1.py @@ -14,693 +14,795 @@ # ============================================================================== """Part of the Keras training engine related to plain array data.""" -import tensorflow.compat.v2 as tf -# pylint: disable=protected-access - import functools import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import callbacks as cbks from keras.distribute import distributed_training_utils_v1 from keras.engine import training_utils_v1 +from keras.utils import io_utils from keras.utils.generic_utils import make_batches from keras.utils.generic_utils import slice_arrays -from keras.utils import io_utils from keras.utils.mode_keys import ModeKeys + +# isort: off from tensorflow.python.platform import tf_logging as logging + try: - from scipy.sparse import issparse # pylint: disable=g-import-not-at-top + from scipy.sparse import issparse except ImportError: - issparse = None - - -def model_iteration(model, - inputs, - targets=None, - sample_weights=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - val_inputs=None, - val_targets=None, - val_sample_weights=None, - shuffle=True, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - mode=ModeKeys.TRAIN, - validation_in_fit=False, - prepared_feed_values_from_dataset=False, - steps_name='steps', - **kwargs): - """Loop function for arrays of data with modes TRAIN/TEST/PREDICT. - - Args: - model: Keras Model instance. - inputs: Either a list or dictionary of arrays, or a dataset instance. - targets: List/dictionary of input arrays. - sample_weights: Optional list of sample weight arrays. - batch_size: Integer batch size or None if unknown. - epochs: Number of times to iterate over the data - verbose: 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - Note that the progress bar is not particularly useful when - logged to a file, so verbose=2 is recommended when not running - interactively (eg, in a production environment). - callbacks: List of callbacks to be called during training - val_inputs: Either a list or dictionary of arrays, or a dataset instance. - val_targets: List/dictionary of target arrays. - val_sample_weights: Optional list of sample weight arrays. - shuffle: Whether to shuffle the data at the beginning of each epoch - concatenation of list the display names of the outputs of `f` and the - list of display names of the outputs of `f_val`. - initial_epoch: Epoch at which to start training (useful for resuming a - previous training run) - steps_per_epoch: Total number of steps (batches of samples) before - declaring one epoch finished and starting the next epoch. Ignored with - the default value of `None`. - validation_steps: Number of steps to run validation for (only if doing - validation from data tensors). Ignored with the default value of - `None`. - validation_freq: Only relevant if validation data is provided. Integer or - `collections.abc.Container` instance (e.g. list, tuple, etc.). If an - integer, specifies how many training epochs to run before a new - validation run is performed, e.g. `validation_freq=2` runs - validation every 2 epochs. If a Container, specifies the epochs on - which to run validation, e.g. `validation_freq=[1, 2, 10]` runs - validation at the end of the 1st, 2nd, and 10th epochs. - mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. - validation_in_fit: if true, then this method is invoked from within - training iteration (for validation). In the case where `val_inputs` is - a dataset, this flag indicates that its iterator and feed values are - already created so should properly reuse resources. - prepared_feed_values_from_dataset: if True, `inputs` is a list of feed - tensors returned from `_prepare_feed_values` call on the validation - dataset, so do not call it again on `inputs`. Should only be used for - inline validation (i.e., only if `validation_in_fit` is also True). - steps_name: The string name of the steps argument, either `steps`, - `validation_steps`, or `steps_per_epoch`. Only used for error message - formatting. - **kwargs: Additional arguments for backwards compatibility. - - Returns: - - In TRAIN mode: `History` object. - - In TEST mode: Evaluation metrics. - - In PREDICT mode: Outputs of the Model called on inputs. - - Raises: - ValueError: in case of invalid arguments. - """ - # Backwards compatibility. - if 'steps' in kwargs: - steps_per_epoch = kwargs.pop('steps') - if kwargs: - raise TypeError('Unknown arguments: %s' % (kwargs,)) - - # In case we were passed a dataset, we extract symbolic tensors from it. - reset_dataset_after_each_epoch = False - input_iterator = None - is_dataset = isinstance(inputs, - (tf.compat.v1.data.Dataset, tf.data.Dataset)) - # TODO(fchollet): consider moving `steps_per_epoch` inference to - # _standardize_user_data and set reset_dataset_after_each_epoch as an - # attribute on the dataset instance. - if is_dataset: - if steps_per_epoch is None: - reset_dataset_after_each_epoch = True - steps_per_epoch = training_utils_v1.infer_steps_for_dataset( - model, inputs, steps_per_epoch, epochs=epochs, steps_name=steps_name) - input_iterator = _get_iterator(inputs, model._distribution_strategy) - - # Enter tf.distribute.Strategy scope. - if model._distribution_strategy: - scope = distributed_training_utils_v1.distributed_scope( - strategy=model._distribution_strategy, - learning_phase=(1 if mode == ModeKeys.TRAIN else 0)) - scope.__enter__() - - use_steps = is_dataset or steps_per_epoch is not None - do_validation = val_inputs is not None - - # Prepare input data. - inputs = input_iterator or inputs - if validation_in_fit and prepared_feed_values_from_dataset: - # When invoking validation in training loop, avoid creating iterator and - # list of feed values for the same validation dataset multiple times (which - # essentially would call `iterator.get_next()` that slows down execution and - # leads to OOM errors eventually. - ins = inputs - else: - ins = _prepare_feed_values(model, inputs, targets, sample_weights, mode) - # `ins` is a function when a distribute strategy is used in Eager mode. In - # that case `is_dataset` is True. The code branches that have requirements - # about the type of `ins` do not trigger in the distributed case. - - if not is_dataset: - num_samples_or_steps = _get_num_samples_or_steps(ins, batch_size, - steps_per_epoch) - else: - num_samples_or_steps = steps_per_epoch - - # Update sample_weight_mode of the model if sample_weights is specified by the - # user. We need to call this function after we have a handle on the inputs - # (both numpy arrays and datasets) in order to determine if the user has - # specified sample_weights. - _update_sample_weight_mode(model, mode, ins) - - # Get step function and loop type. As part of building the execution - # function we recompile the metrics based on the updated - # sample_weight_mode value. - f = _make_execution_function(model, mode) - - # Prepare validation data. Hold references to the iterator and the input list - # to properly reinitialize and reuse in multiple validation passes. - val_iterator = None - if isinstance(val_inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset)): - if validation_steps is None: - # Because we pass an iterator feed instead of a Dataset to the eval - # model_iteration() call, it will not trigger the dataset-input path - # that determines the number of steps required. To avoid this issue, - # set validation_steps here if validation_steps is None. - validation_steps = training_utils_v1.infer_steps_for_dataset( - model, - val_inputs, - validation_steps, - epochs=epochs, - steps_name='validation_steps') - val_iterator = _get_iterator(val_inputs, model._distribution_strategy) - val_inputs = _prepare_feed_values( - model, val_iterator, val_targets, val_sample_weights, ModeKeys.TEST) - # Get num steps for printing. - val_samples_or_steps = validation_steps - else: - # Get num samples for printing. - val_samples_or_steps = val_inputs and tf.nest.flatten( - val_inputs)[0].shape[0] or None - - if mode == ModeKeys.TRAIN and verbose: - _print_train_info(num_samples_or_steps, val_samples_or_steps, is_dataset) - - # Configure callbacks. - count_mode = 'steps' if use_steps else 'samples' - callbacks = cbks.configure_callbacks( - callbacks, - model, - do_validation=do_validation, - batch_size=batch_size, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - samples=num_samples_or_steps, - count_mode=count_mode, - verbose=verbose, - mode=mode) - - # Find beforehand arrays that need sparse-to-dense conversion. - if issparse is not None and not use_steps: - indices_for_conversion_to_dense = [] - feed = _get_model_feed(model, mode) - for i, (input_data, feed_tensor) in enumerate(zip(ins, feed)): - if issparse(input_data) and not backend.is_sparse(feed_tensor): - indices_for_conversion_to_dense.append(i) - - # Select aggregation method. - if mode == ModeKeys.PREDICT: - aggregator = training_utils_v1.OutputsAggregator( - use_steps, - num_samples=None if steps_per_epoch else num_samples_or_steps, - steps=steps_per_epoch) - else: - aggregator = training_utils_v1.MetricsAggregator( - use_steps, - num_samples=None if steps_per_epoch else num_samples_or_steps, - steps=steps_per_epoch) - - if model._compile_distribution: - distributed_training_utils_v1._copy_weights_to_distributed_model( - model, mode) - - callbacks.model.stop_training = False - callbacks._call_begin_hook(mode) - - initial_epoch = model._maybe_load_initial_epoch_from_ckpt(initial_epoch, mode) - - for epoch in range(initial_epoch, epochs): - if callbacks.model.stop_training: - break - - # Setup work for each epoch - epoch_logs = {} - if mode != ModeKeys.PREDICT: - # Collecting and resetting metrics has non-zero cost and will needlessly - # slow down model.predict. - model.reset_metrics() - if mode == ModeKeys.TRAIN: - callbacks.on_epoch_begin(epoch, epoch_logs) - - if use_steps: - # Step-wise loop. - if steps_per_epoch is None: - # Loop over dataset until `OutOfRangeError` is raised. - target_steps = np.inf - else: - # Loop over dataset for the specified number of steps. - target_steps = steps_per_epoch - - step = 0 - while step < target_steps: - batch_logs = {'batch': step, 'size': 1} - callbacks._call_batch_hook(mode, 'begin', step, batch_logs) - - # Get outputs. - try: - # `ins` can be callable in tf.distribute.Strategy + eager case. - if not callable(ins) or (model._distribution_strategy and - not distributed_training_utils_v1 - .is_distributing_by_cloning(model)): - actual_inputs = ins - else: - actual_inputs = ins() - batch_outs = f(actual_inputs) - except tf.errors.OutOfRangeError: - if is_dataset: - # The dataset passed by the user ran out of batches. - # Now we know the cardinality of the dataset. - # If steps_per_epoch was specified, then running out of data is - # unexpected, so we stop training and inform the user. - if steps_per_epoch: - callbacks.model.stop_training = True - logging.warning( - 'Your dataset ran out of data; interrupting training. ' - 'Make sure that your dataset can generate at least ' - '`%s * epochs` batches (in this case, %d batches). ' - 'You may need to use the repeat() function when ' - 'building your dataset.' - % (steps_name, steps_per_epoch * epochs)) - elif step > 0: - steps_per_epoch = step - aggregator.steps = steps_per_epoch - else: - # We ran out of batches while the user passed an iterator (legacy). - callbacks.model.stop_training = True - logging.warning( - 'Your dataset iterator ran out of data; ' - 'interrupting training. Make sure that your iterator ' - 'can generate at least `%s * epochs` ' - 'batches (in this case, %d batches). You may need to' - 'use the repeat() function when building your ' - 'dataset.' % (steps_name, steps_per_epoch * epochs)) - break - - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - - if model._distribution_strategy: - batch_outs = ( - distributed_training_utils_v1._per_replica_aggregate_batch( - model._distribution_strategy, batch_outs, model, mode)) - - # Aggregate results. - if step == 0: - aggregator.create(batch_outs) - aggregator.aggregate(batch_outs) - - # Callbacks batch end. - batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode) - callbacks._call_batch_hook(mode, 'end', step, batch_logs) - step += 1 - - if callbacks.model.stop_training: - break + issparse = None + + +def model_iteration( + model, + inputs, + targets=None, + sample_weights=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + val_inputs=None, + val_targets=None, + val_sample_weights=None, + shuffle=True, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + mode=ModeKeys.TRAIN, + validation_in_fit=False, + prepared_feed_values_from_dataset=False, + steps_name="steps", + **kwargs, +): + """Loop function for arrays of data with modes TRAIN/TEST/PREDICT. + + Args: + model: Keras Model instance. + inputs: Either a list or dictionary of arrays, or a dataset instance. + targets: List/dictionary of input arrays. + sample_weights: Optional list of sample weight arrays. + batch_size: Integer batch size or None if unknown. + epochs: Number of times to iterate over the data + verbose: 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + Note that the progress bar is not particularly useful when + logged to a file, so verbose=2 is recommended when not running + interactively (eg, in a production environment). + callbacks: List of callbacks to be called during training + val_inputs: Either a list or dictionary of arrays, or a dataset + instance. + val_targets: List/dictionary of target arrays. + val_sample_weights: Optional list of sample weight arrays. + shuffle: Whether to shuffle the data at the beginning of each epoch + concatenation of list the display names of the outputs of `f` and the + list of display names of the outputs of `f_val`. + initial_epoch: Epoch at which to start training (useful for resuming a + previous training run) + steps_per_epoch: Total number of steps (batches of samples) before + declaring one epoch finished and starting the next epoch. Ignored with + the default value of `None`. + validation_steps: Number of steps to run validation for (only if doing + validation from data tensors). Ignored with the default value of + `None`. + validation_freq: Only relevant if validation data is provided. Integer + or `collections.abc.Container` instance (e.g. list, tuple, etc.). If + an integer, specifies how many training epochs to run before a new + validation run is performed, e.g. `validation_freq=2` runs validation + every 2 epochs. If a Container, specifies the epochs on which to run + validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the + end of the 1st, 2nd, and 10th epochs. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. + validation_in_fit: if true, then this method is invoked from within + training iteration (for validation). In the case where `val_inputs` is + a dataset, this flag indicates that its iterator and feed values are + already created so should properly reuse resources. + prepared_feed_values_from_dataset: if True, `inputs` is a list of feed + tensors returned from `_prepare_feed_values` call on the validation + dataset, so do not call it again on `inputs`. Should only be used for + inline validation (i.e., only if `validation_in_fit` is also True). + steps_name: The string name of the steps argument, either `steps`, + `validation_steps`, or `steps_per_epoch`. Only used for error message + formatting. + **kwargs: Additional arguments for backwards compatibility. + + Returns: + - In TRAIN mode: `History` object. + - In TEST mode: Evaluation metrics. + - In PREDICT mode: Outputs of the Model called on inputs. + + Raises: + ValueError: in case of invalid arguments. + """ + # Backwards compatibility. + if "steps" in kwargs: + steps_per_epoch = kwargs.pop("steps") + if kwargs: + raise TypeError(f"Unknown arguments: {kwargs}") + + # In case we were passed a dataset, we extract symbolic tensors from it. + reset_dataset_after_each_epoch = False + input_iterator = None + is_dataset = isinstance( + inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset) + ) + # TODO(fchollet): consider moving `steps_per_epoch` inference to + # _standardize_user_data and set reset_dataset_after_each_epoch as an + # attribute on the dataset instance. + if is_dataset: + if steps_per_epoch is None: + reset_dataset_after_each_epoch = True + steps_per_epoch = training_utils_v1.infer_steps_for_dataset( + model, + inputs, + steps_per_epoch, + epochs=epochs, + steps_name=steps_name, + ) + input_iterator = _get_iterator(inputs, model._distribution_strategy) + + # Enter tf.distribute.Strategy scope. + if model._distribution_strategy: + scope = distributed_training_utils_v1.distributed_scope( + strategy=model._distribution_strategy, + learning_phase=(1 if mode == ModeKeys.TRAIN else 0), + ) + scope.__enter__() + + use_steps = is_dataset or steps_per_epoch is not None + do_validation = val_inputs is not None + + # Prepare input data. + inputs = input_iterator or inputs + if validation_in_fit and prepared_feed_values_from_dataset: + # When invoking validation in training loop, avoid creating iterator and + # list of feed values for the same validation dataset multiple times + # (which essentially would call `iterator.get_next()` that slows down + # execution and leads to OOM errors eventually. + ins = inputs else: - # Sample-wise loop. - index_array = np.arange(num_samples_or_steps) - if shuffle == 'batch': - index_array = training_utils_v1.batch_shuffle(index_array, batch_size) - elif shuffle: - np.random.shuffle(index_array) - batches = make_batches(num_samples_or_steps, batch_size) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - # Slice into a batch. - if len(batches) == 1: - # If we only have one batch, do not slice. This takes care of - # composite tensors in non-Dataset modes; we currently don't support - # slicing them. - # TODO(b/133517906): Add slicing support. - ins_batch = ins - else: - try: - if ins and isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - except TypeError: - raise TypeError('TypeError while preparing batch. ' - 'If using HDF5 input data, ' - 'pass shuffle="batch".') - - # Sparse to dense conversion. - if issparse is not None: - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - # Callbacks batch_begin. - batch_logs = {'batch': batch_index, 'size': len(batch_ids)} - callbacks._call_batch_hook(mode, 'begin', batch_index, batch_logs) - - # Get outputs. - batch_outs = f(ins_batch) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - - # Aggregate results. - if batch_index == 0: - aggregator.create(batch_outs) - aggregator.aggregate(batch_outs, batch_start, batch_end) - - # Callbacks batch end. - batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode) - callbacks._call_batch_hook(mode, 'end', batch_index, batch_logs) - - if callbacks.model.stop_training: - break - - aggregator.finalize() - results = aggregator.results - epoch_logs = cbks.make_logs(model, epoch_logs, results, mode) - if len(results) == 1: - results = results[0] - - # Run the test loop every `validation_freq` epochs during training. - if (do_validation and - training_utils_v1.should_run_validation(validation_freq, epoch) and - not callbacks.model.stop_training): - - if model._compile_distribution: - # Since we create a new clone from the original model we need to copy - # the weights back to the original model before we can run validation. - distributed_training_utils_v1._copy_weights_to_original_model( - model, ModeKeys.TRAIN) - - val_results = model_iteration( - model, - val_inputs, - targets=val_targets, - sample_weights=val_sample_weights, - batch_size=batch_size, - steps_per_epoch=validation_steps, - callbacks=callbacks, - verbose=0, - mode=ModeKeys.TEST, - validation_in_fit=True, - prepared_feed_values_from_dataset=(val_iterator is not None), - steps_name='validation_steps') - if not isinstance(val_results, list): - val_results = [val_results] - epoch_logs = cbks.make_logs( - model, epoch_logs, val_results, mode, prefix='val_') - if val_iterator and epoch < epochs - 1: - _reinitialize_iterator(val_iterator, model._distribution_strategy) + ins = _prepare_feed_values(model, inputs, targets, sample_weights, mode) + # `ins` is a function when a distribute strategy is used in Eager mode. + # In that case `is_dataset` is True. The code branches that have + # requirements about the type of `ins` do not trigger in the distributed + # case. + + if not is_dataset: + num_samples_or_steps = _get_num_samples_or_steps( + ins, batch_size, steps_per_epoch + ) + else: + num_samples_or_steps = steps_per_epoch + + # Update sample_weight_mode of the model if sample_weights is specified by + # the user. We need to call this function after we have a handle on the + # inputs (both numpy arrays and datasets) in order to determine if the user + # has specified sample_weights. + _update_sample_weight_mode(model, mode, ins) + + # Get step function and loop type. As part of building the execution + # function we recompile the metrics based on the updated + # sample_weight_mode value. + f = _make_execution_function(model, mode) + + # Prepare validation data. Hold references to the iterator and the input + # list to properly reinitialize and reuse in multiple validation passes. + val_iterator = None + if isinstance(val_inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset)): + if validation_steps is None: + # Because we pass an iterator feed instead of a Dataset to the eval + # model_iteration() call, it will not trigger the dataset-input path + # that determines the number of steps required. To avoid this issue, + # set validation_steps here if validation_steps is None. + validation_steps = training_utils_v1.infer_steps_for_dataset( + model, + val_inputs, + validation_steps, + epochs=epochs, + steps_name="validation_steps", + ) + val_iterator = _get_iterator(val_inputs, model._distribution_strategy) + val_inputs = _prepare_feed_values( + model, val_iterator, val_targets, val_sample_weights, ModeKeys.TEST + ) + # Get num steps for printing. + val_samples_or_steps = validation_steps + else: + # Get num samples for printing. + val_samples_or_steps = ( + val_inputs and tf.nest.flatten(val_inputs)[0].shape[0] or None + ) + + if mode == ModeKeys.TRAIN and verbose: + _print_train_info( + num_samples_or_steps, val_samples_or_steps, is_dataset + ) + + # Configure callbacks. + count_mode = "steps" if use_steps else "samples" + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=do_validation, + batch_size=batch_size, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + samples=num_samples_or_steps, + count_mode=count_mode, + verbose=verbose, + mode=mode, + ) + + # Find beforehand arrays that need sparse-to-dense conversion. + if issparse is not None and not use_steps: + indices_for_conversion_to_dense = [] + feed = _get_model_feed(model, mode) + for i, (input_data, feed_tensor) in enumerate(zip(ins, feed)): + if issparse(input_data) and not backend.is_sparse(feed_tensor): + indices_for_conversion_to_dense.append(i) + + # Select aggregation method. + if mode == ModeKeys.PREDICT: + aggregator = training_utils_v1.OutputsAggregator( + use_steps, + num_samples=None if steps_per_epoch else num_samples_or_steps, + steps=steps_per_epoch, + ) + else: + aggregator = training_utils_v1.MetricsAggregator( + use_steps, + num_samples=None if steps_per_epoch else num_samples_or_steps, + steps=steps_per_epoch, + ) - if mode == ModeKeys.TRAIN: - # Epochs only apply to `fit`. - callbacks.on_epoch_end(epoch, epoch_logs) + if model._compile_distribution: + distributed_training_utils_v1._copy_weights_to_distributed_model( + model, mode + ) - # Reinitialize dataset iterator for the next epoch. - if reset_dataset_after_each_epoch and epoch < epochs - 1: - _reinitialize_iterator(input_iterator, model._distribution_strategy) + callbacks.model.stop_training = False + callbacks._call_begin_hook(mode) - model._successful_loop_finish = True - callbacks._call_end_hook(mode) + initial_epoch = model._maybe_load_initial_epoch_from_ckpt( + initial_epoch, mode + ) - if model._distribution_strategy: - if model._compile_distribution: - # TODO(priyag, psv): Copy back metrics to the original model as well? - distributed_training_utils_v1._copy_weights_to_original_model(model, mode) - scope.__exit__(None, None, None) + for epoch in range(initial_epoch, epochs): + if callbacks.model.stop_training: + break + + # Setup work for each epoch + epoch_logs = {} + if mode != ModeKeys.PREDICT: + # Collecting and resetting metrics has non-zero cost and will + # needlessly slow down model.predict. + model.reset_metrics() + if mode == ModeKeys.TRAIN: + callbacks.on_epoch_begin(epoch, epoch_logs) + + if use_steps: + # Step-wise loop. + if steps_per_epoch is None: + # Loop over dataset until `OutOfRangeError` is raised. + target_steps = np.inf + else: + # Loop over dataset for the specified number of steps. + target_steps = steps_per_epoch + + step = 0 + while step < target_steps: + batch_logs = {"batch": step, "size": 1} + callbacks._call_batch_hook(mode, "begin", step, batch_logs) + + # Get outputs. + try: + # `ins` can be callable in tf.distribute.Strategy + eager + # case. + if not callable(ins) or ( + model._distribution_strategy + and not distributed_training_utils_v1.is_distributing_by_cloning( # noqa: E501 + model + ) + ): + actual_inputs = ins + else: + actual_inputs = ins() + batch_outs = f(actual_inputs) + except tf.errors.OutOfRangeError: + if is_dataset: + # The dataset passed by the user ran out of batches. + # Now we know the cardinality of the dataset. If + # steps_per_epoch was specified, then running out of + # data is unexpected, so we stop training and inform the + # user. + if steps_per_epoch: + callbacks.model.stop_training = True + logging.warning( + "Your dataset ran out of data; interrupting " + "training. Make sure that your dataset can " + "generate at least `%s * epochs` batches (in " + "this case, %d batches). You may need to use " + "the repeat() function when building your " + "dataset." + % (steps_name, steps_per_epoch * epochs) + ) + elif step > 0: + steps_per_epoch = step + aggregator.steps = steps_per_epoch + else: + # We ran out of batches while the user passed an + # iterator (legacy). + callbacks.model.stop_training = True + logging.warning( + "Your dataset iterator ran out of data; " + "interrupting training. Make sure that your " + "iterator can generate at least `%s * epochs` " + "batches (in this case, %d batches). You may need " + "to use the repeat() function when building your " + "dataset." % (steps_name, steps_per_epoch * epochs) + ) + break + + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + + if model._distribution_strategy: + batch_outs = distributed_training_utils_v1._per_replica_aggregate_batch( # noqa: E501 + model._distribution_strategy, batch_outs, model, mode + ) + + # Aggregate results. + if step == 0: + aggregator.create(batch_outs) + aggregator.aggregate(batch_outs) + + # Callbacks batch end. + batch_logs = callbacks.make_logs( + model, batch_logs, batch_outs, mode + ) + callbacks._call_batch_hook(mode, "end", step, batch_logs) + step += 1 + + if callbacks.model.stop_training: + break + else: + # Sample-wise loop. + index_array = np.arange(num_samples_or_steps) + if shuffle == "batch": + index_array = training_utils_v1.batch_shuffle( + index_array, batch_size + ) + elif shuffle: + np.random.shuffle(index_array) + batches = make_batches(num_samples_or_steps, batch_size) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + # Slice into a batch. + if len(batches) == 1: + # If we only have one batch, do not slice. This takes care + # of composite tensors in non-Dataset modes; we currently + # don't support slicing them. + # TODO(b/133517906): Add slicing support. + ins_batch = ins + else: + try: + if ins and isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ + ins[-1] + ] + else: + ins_batch = slice_arrays(ins, batch_ids) + except TypeError: + raise TypeError( + "TypeError while preparing batch. " + "If using HDF5 input data, " + 'pass shuffle="batch".' + ) + + # Sparse to dense conversion. + if issparse is not None: + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + # Callbacks batch_begin. + batch_logs = {"batch": batch_index, "size": len(batch_ids)} + callbacks._call_batch_hook( + mode, "begin", batch_index, batch_logs + ) + + # Get outputs. + batch_outs = f(ins_batch) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + + # Aggregate results. + if batch_index == 0: + aggregator.create(batch_outs) + aggregator.aggregate(batch_outs, batch_start, batch_end) + + # Callbacks batch end. + batch_logs = callbacks.make_logs( + model, batch_logs, batch_outs, mode + ) + callbacks._call_batch_hook(mode, "end", batch_index, batch_logs) + + if callbacks.model.stop_training: + break + + aggregator.finalize() + results = aggregator.results + epoch_logs = callbacks.make_logs(model, epoch_logs, results, mode) + if len(results) == 1: + results = results[0] + + # Run the test loop every `validation_freq` epochs during training. + if ( + do_validation + and training_utils_v1.should_run_validation(validation_freq, epoch) + and not callbacks.model.stop_training + ): + + if model._compile_distribution: + # Since we create a new clone from the original model we need to + # copy the weights back to the original model before we can run + # validation. + distributed_training_utils_v1._copy_weights_to_original_model( + model, ModeKeys.TRAIN + ) + + val_results = model_iteration( + model, + val_inputs, + targets=val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + steps_per_epoch=validation_steps, + callbacks=callbacks, + verbose=0, + mode=ModeKeys.TEST, + validation_in_fit=True, + prepared_feed_values_from_dataset=(val_iterator is not None), + steps_name="validation_steps", + ) + if not isinstance(val_results, list): + val_results = [val_results] + epoch_logs = callbacks.make_logs( + model, epoch_logs, val_results, mode, prefix="val_" + ) + if val_iterator and epoch < epochs - 1: + _reinitialize_iterator( + val_iterator, model._distribution_strategy + ) + + if mode == ModeKeys.TRAIN: + # Epochs only apply to `fit`. + callbacks.on_epoch_end(epoch, epoch_logs) + + # Reinitialize dataset iterator for the next epoch. + if reset_dataset_after_each_epoch and epoch < epochs - 1: + _reinitialize_iterator(input_iterator, model._distribution_strategy) + + model._successful_loop_finish = True + callbacks._call_end_hook(mode) + + if model._distribution_strategy: + if model._compile_distribution: + # TODO(priyag, psv): Copy back metrics to the original model as + # well? + distributed_training_utils_v1._copy_weights_to_original_model( + model, mode + ) + scope.__exit__(None, None, None) - if mode == ModeKeys.TRAIN: - return model.history - return results + if mode == ModeKeys.TRAIN: + return model.history + return results def _get_model_feed(model, mode): - if mode == ModeKeys.PREDICT: - feed = model._feed_inputs - else: - feed = ( - model._feed_inputs + model._feed_targets + model._feed_sample_weights) - return feed + if mode == ModeKeys.PREDICT: + feed = model._feed_inputs + else: + feed = ( + model._feed_inputs + + model._feed_targets + + model._feed_sample_weights + ) + return feed def _print_train_info(num_samples_or_steps, val_samples_or_steps, is_dataset): - increment = 'steps' if is_dataset else 'samples' - msg = 'Train on {0} {increment}'.format( - num_samples_or_steps, increment=increment) - if val_samples_or_steps: - msg += ', validate on {0} {increment}'.format( - val_samples_or_steps, increment=increment) - io_utils.print_msg(msg) + increment = "steps" if is_dataset else "samples" + msg = f"Train on {num_samples_or_steps} {increment}" + if val_samples_or_steps: + msg += f", validate on {val_samples_or_steps} {increment}" + io_utils.print_msg(msg) def _get_num_samples_or_steps(ins, batch_size, steps_per_epoch): - """Returns total number of samples (when training in batch mode) or steps.""" - if steps_per_epoch: - return steps_per_epoch - return training_utils_v1.check_num_samples(ins, batch_size, steps_per_epoch, - 'steps_per_epoch') + """Returns total number of samples when training in batch mode or steps.""" + if steps_per_epoch: + return steps_per_epoch + return training_utils_v1.check_num_samples( + ins, batch_size, steps_per_epoch, "steps_per_epoch" + ) def _prepare_feed_values(model, inputs, targets, sample_weights, mode): - """Prepare feed values to the model execution function. - - Args: - model: Model to prepare feed values for. - inputs: List or dict of model inputs. - targets: Optional list of model targets. - sample_weights: Optional list of sample weight arrays. - mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. - - Returns: - Feed values for the model in the given mode. - """ - if model._distribution_strategy: - if isinstance(inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset)): - inputs = distributed_training_utils_v1.get_iterator( - inputs, model._distribution_strategy) - - def get_distributed_inputs(): - return distributed_training_utils_v1._prepare_feed_values( - model, inputs, targets, sample_weights, mode) - - # In the eager case, we want to call the input method per step, so return - # a lambda from here that can be called. Note that this is applicable only - # in Distribution Strategy case as it follows the same code path for both - # eager and graph modes. - # TODO(priyag,omalleyt): Either we should move the training DS with - # IteratorBase to use training_generator code path, or figure out how to - # set a symbolic Iterator out of a Dataset when in eager mode. - if tf.executing_eagerly(): - return get_distributed_inputs - else: - return get_distributed_inputs() + """Prepare feed values to the model execution function. - if isinstance(inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset, - tf.compat.v1.data.Iterator)): - inputs, targets, sample_weights = model._standardize_user_data( - inputs, - extract_tensors_from_dataset=True) + Args: + model: Model to prepare feed values for. + inputs: List or dict of model inputs. + targets: Optional list of model targets. + sample_weights: Optional list of sample weight arrays. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. + + Returns: + Feed values for the model in the given mode. + """ + if model._distribution_strategy: + if isinstance(inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset)): + inputs = distributed_training_utils_v1.get_iterator( + inputs, model._distribution_strategy + ) + + def get_distributed_inputs(): + return distributed_training_utils_v1._prepare_feed_values( + model, inputs, targets, sample_weights, mode + ) + + # In the eager case, we want to call the input method per step, so + # return a lambda from here that can be called. Note that this is + # applicable only in Distribution Strategy case as it follows the same + # code path for both eager and graph modes. + # TODO(priyag,omalleyt): Either we should move the training DS with + # IteratorBase to use training_generator code path, or figure out how to + # set a symbolic Iterator out of a Dataset when in eager mode. + if tf.executing_eagerly(): + return get_distributed_inputs + else: + return get_distributed_inputs() - inputs = training_utils_v1.ModelInputs(inputs).as_list() - targets = list(targets or []) - sample_weights = list(sample_weights or []) - ins = inputs + targets + sample_weights - if mode == ModeKeys.TRAIN and not isinstance( - backend.symbolic_learning_phase(), int): - ins += [True] # Add learning phase value. - return ins + if isinstance( + inputs, + ( + tf.compat.v1.data.Dataset, + tf.data.Dataset, + tf.compat.v1.data.Iterator, + ), + ): + inputs, targets, sample_weights = model._standardize_user_data( + inputs, extract_tensors_from_dataset=True + ) + + inputs = training_utils_v1.ModelInputs(inputs).as_list() + targets = list(targets or []) + sample_weights = list(sample_weights or []) + ins = inputs + targets + sample_weights + if mode == ModeKeys.TRAIN and not isinstance( + backend.symbolic_learning_phase(), int + ): + ins += [True] # Add learning phase value. + return ins def _get_iterator(inputs, distribution_strategy=None): - if distribution_strategy: - return distributed_training_utils_v1.get_iterator( - inputs, distribution_strategy) - return training_utils_v1.get_iterator(inputs) + if distribution_strategy: + return distributed_training_utils_v1.get_iterator( + inputs, distribution_strategy + ) + return training_utils_v1.get_iterator(inputs) def _reinitialize_iterator(iterator, distribution_strategy=None): - if distribution_strategy: - distributed_training_utils_v1.initialize_iterator( - iterator, distribution_strategy) - else: - training_utils_v1.initialize_iterator(iterator) + if distribution_strategy: + distributed_training_utils_v1.initialize_iterator( + iterator, distribution_strategy + ) + else: + training_utils_v1.initialize_iterator(iterator) def _make_execution_function(model, mode): - """Makes function to run one step of model execution.""" - if model._distribution_strategy: - return distributed_training_utils_v1._make_execution_function(model, mode) - return model._make_execution_function(mode) + """Makes function to run one step of model execution.""" + if model._distribution_strategy: + return distributed_training_utils_v1._make_execution_function( + model, mode + ) + return model._make_execution_function(mode) def _update_sample_weight_mode(model, mode, inputs): - """Updates the sample_weight_mode of a given model.""" - # Add a quick return to prevent us from calling model._feed_targets that - # accesses certain model properties that may not be set in the `PREDICT` mode. - if mode == ModeKeys.PREDICT: - return - - sample_weights = None - # `inputs` is the model's inputs + targets + sample_weights + - # learning phase placeholder if specified. To update the sample_weight_mode - # we need to determine if the user has passed sample weights as part of the - # input. - if not callable(inputs): - sample_weights = inputs[len(model._feed_inputs) + len(model._feed_targets):] - has_learning_phase_pl = (mode == ModeKeys.TRAIN and - not isinstance(backend.symbolic_learning_phase(), - int)) - if has_learning_phase_pl: - sample_weights = sample_weights[:-1] - model._update_sample_weight_modes(sample_weights=sample_weights) - - # Call the DistributionStrategy specific function to update the - # sample_weight_mode on the model. - if model._distribution_strategy: - distributed_training_utils_v1._update_sample_weight_modes(model, mode, - sample_weights) + """Updates the sample_weight_mode of a given model.""" + # Add a quick return to prevent us from calling model._feed_targets that + # accesses certain model properties that may not be set in the `PREDICT` + # mode. + if mode == ModeKeys.PREDICT: + return + + sample_weights = None + # `inputs` is the model's inputs + targets + sample_weights + + # learning phase placeholder if specified. To update the sample_weight_mode + # we need to determine if the user has passed sample weights as part of the + # input. + if not callable(inputs): + sample_weights = inputs[ + len(model._feed_inputs) + len(model._feed_targets) : + ] + has_learning_phase_pl = mode == ModeKeys.TRAIN and not isinstance( + backend.symbolic_learning_phase(), int + ) + if has_learning_phase_pl: + sample_weights = sample_weights[:-1] + model._update_sample_weight_modes(sample_weights=sample_weights) + + # Call the DistributionStrategy specific function to update the + # sample_weight_mode on the model. + if model._distribution_strategy: + distributed_training_utils_v1._update_sample_weight_modes( + model, mode, sample_weights + ) + # For backwards compatibility for internal users of these loops. fit_loop = functools.partial(model_iteration, mode=ModeKeys.TRAIN) test_loop = functools.partial( - model_iteration, mode=ModeKeys.TEST, shuffle=False) + model_iteration, mode=ModeKeys.TEST, shuffle=False +) predict_loop = functools.partial( - model_iteration, mode=ModeKeys.PREDICT, shuffle=False) + model_iteration, mode=ModeKeys.PREDICT, shuffle=False +) class ArrayLikeTrainingLoop(training_utils_v1.TrainingLoop): - """TrainingLoop that handle inputs like array. - - This is the default handler for most of the input data types, includes - symbolic tensors or Numpy array-like, Datasets and iterators in graph mode - (since they generate symbolic tensors). This Function is used to handle model - with `run_eagerly` = False. - """ - - def fit(self, - model, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - **kwargs): - batch_size = model._validate_or_infer_batch_size(batch_size, - steps_per_epoch, x) - - x, y, sample_weights = model._standardize_user_data( - x, - y, - sample_weight=sample_weight, - class_weight=class_weight, - batch_size=batch_size, - check_steps=True, - steps_name='steps_per_epoch', - steps=steps_per_epoch, - validation_split=validation_split, - shuffle=shuffle) - - if validation_data: - val_x, val_y, val_sample_weights = model._prepare_validation_data( - validation_data, batch_size, validation_steps) - elif validation_split and 0. < validation_split < 1.: - (x, y, sample_weights, val_x, val_y, val_sample_weights - ) = training_utils_v1.split_training_and_validation_data( - x, y, sample_weights, validation_split) - else: - if validation_steps: - raise ValueError('`validation_steps` should not be specified if ' - '`validation_data` is None.') - val_x, val_y, val_sample_weights = None, None, None + """TrainingLoop that handle inputs like array. - return fit_loop( + This is the default handler for most of the input data types, includes + symbolic tensors or Numpy array-like, Datasets and iterators in graph mode + (since they generate symbolic tensors). This Function is used to handle + model with `run_eagerly` = False. + """ + + def fit( + self, model, - inputs=x, - targets=y, - sample_weights=sample_weights, - batch_size=batch_size, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - val_inputs=val_x, - val_targets=val_y, - val_sample_weights=val_sample_weights, - shuffle=shuffle, - initial_epoch=initial_epoch, - steps_per_epoch=steps_per_epoch, - validation_steps=validation_steps, - validation_freq=validation_freq, - steps_name='steps_per_epoch') - - def evaluate(self, - model, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - **kwargs): - batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) - x, y, sample_weights = model._standardize_user_data( - x, - y, - sample_weight=sample_weight, - batch_size=batch_size, - check_steps=True, - steps_name='steps', - steps=steps) - return test_loop( + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + **kwargs, + ): + batch_size = model._validate_or_infer_batch_size( + batch_size, steps_per_epoch, x + ) + + x, y, sample_weights = model._standardize_user_data( + x, + y, + sample_weight=sample_weight, + class_weight=class_weight, + batch_size=batch_size, + check_steps=True, + steps_name="steps_per_epoch", + steps=steps_per_epoch, + validation_split=validation_split, + shuffle=shuffle, + ) + + if validation_data: + val_x, val_y, val_sample_weights = model._prepare_validation_data( + validation_data, batch_size, validation_steps + ) + elif validation_split and 0.0 < validation_split < 1.0: + ( + x, + y, + sample_weights, + val_x, + val_y, + val_sample_weights, + ) = training_utils_v1.split_training_and_validation_data( + x, y, sample_weights, validation_split + ) + else: + if validation_steps: + raise ValueError( + "`validation_steps` should not be specified if " + "`validation_data` is None." + ) + val_x, val_y, val_sample_weights = None, None, None + + return fit_loop( + model, + inputs=x, + targets=y, + sample_weights=sample_weights, + batch_size=batch_size, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + val_inputs=val_x, + val_targets=val_y, + val_sample_weights=val_sample_weights, + shuffle=shuffle, + initial_epoch=initial_epoch, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps, + validation_freq=validation_freq, + steps_name="steps_per_epoch", + ) + + def evaluate( + self, model, - inputs=x, - targets=y, - sample_weights=sample_weights, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks) - - def predict(self, - model, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - **kwargs): - batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) - x, _, _ = model._standardize_user_data( - x, check_steps=True, steps_name='steps', steps=steps) - return predict_loop( + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + **kwargs, + ): + batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) + x, y, sample_weights = model._standardize_user_data( + x, + y, + sample_weight=sample_weight, + batch_size=batch_size, + check_steps=True, + steps_name="steps", + steps=steps, + ) + return test_loop( + model, + inputs=x, + targets=y, + sample_weights=sample_weights, + batch_size=batch_size, + verbose=verbose, + steps=steps, + callbacks=callbacks, + ) + + def predict( + self, model, x, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks) + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + **kwargs, + ): + batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) + x, _, _ = model._standardize_user_data( + x, check_steps=True, steps_name="steps", steps=steps + ) + return predict_loop( + model, + x, + batch_size=batch_size, + verbose=verbose, + steps=steps, + callbacks=callbacks, + ) diff --git a/keras/engine/training_dataset_test.py b/keras/engine/training_dataset_test.py index 55335d95699f..07d5d839c72f 100644 --- a/keras/engine/training_dataset_test.py +++ b/keras/engine/training_dataset_test.py @@ -14,558 +14,621 @@ # ============================================================================== """Tests for training routines.""" -import tensorflow.compat.v2 as tf - import io import sys import numpy as np +import tensorflow.compat.v2 as tf import keras from keras import callbacks -from keras.testing_infra import test_combinations from keras import metrics as metrics_module +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import io_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging class BatchCounterCallback(callbacks.Callback): + def __init__(self): + self.batch_begin_count = 0 + self.batch_end_count = 0 - def __init__(self): - self.batch_begin_count = 0 - self.batch_end_count = 0 - - def on_batch_begin(self, *args, **kwargs): - self.batch_begin_count += 1 + def on_batch_begin(self, *args, **kwargs): + self.batch_begin_count += 1 - def on_batch_end(self, *args, **kwargs): - self.batch_end_count += 1 + def on_batch_end(self, *args, **kwargs): + self.batch_end_count += 1 class TestTrainingWithDataset(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_calling_model_on_same_dataset(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - optimizer = 'rmsprop' - loss = 'mse' - metrics = ['mae'] - model.compile( - optimizer, - loss, - metrics=metrics, - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((10, 3), np.float32) - targets = np.zeros((10, 4), np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - # Call fit with validation data - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_data=dataset, - validation_steps=2) - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_data=dataset, - validation_steps=2) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_training_and_eval_methods_on_dataset(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - optimizer = 'rmsprop' - loss = 'mse' - metrics = ['mae', metrics_module.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics, - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((10, 3), np.float32) - targets = np.zeros((10, 4), np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat() # Infinite dataset. - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - model.evaluate(dataset, steps=2, verbose=1) - model.predict(dataset, steps=2) - - # Test with validation data - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_data=dataset, - validation_steps=2) - - # Test with validation split - with self.assertRaises(ValueError): - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - validation_split=0.5, - validation_steps=2) - - # Test with sample weight. - sample_weight = np.random.random((10,)) - with self.assertRaisesRegex( - ValueError, r'`sample_weight` argument is not supported .+dataset'): - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - sample_weight=sample_weight) - - with self.assertRaisesRegex( - ValueError, '(you should not specify a target)|' - '(`y` argument is not supported when using dataset as input.)'): - model.fit(dataset, dataset, epochs=1, steps_per_epoch=2, verbose=0) - - # With an infinite dataset, `steps_per_epoch`/`steps` argument is required. - with self.assertRaises(ValueError): - model.fit(dataset, epochs=1, verbose=0) - with self.assertRaises(ValueError): - model.evaluate(dataset, verbose=0) - with self.assertRaises(ValueError): - model.predict(dataset, verbose=0) - - @test_combinations.run_with_all_model_types(exclude_models='sequential') - @test_combinations.run_all_keras_modes - def test_training_and_eval_methods_on_multi_input_output_dataset(self): - input_a = keras.layers.Input(shape=(3,), name='input_1') - input_b = keras.layers.Input(shape=(3,), name='input_2') - dense = keras.layers.Dense(4, name='dense') - dropout = keras.layers.Dropout(0.5, name='dropout') - branch_a = [input_a, dense] - branch_b = [input_b, dense, dropout] - - model = test_utils.get_multi_io_model(branch_a, branch_b) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - input_a_np = np.random.random((10, 3)).astype(dtype=np.float32) - input_b_np = np.random.random((10, 3)).astype(dtype=np.float32) - output_d_np = np.random.random((10, 4)).astype(dtype=np.float32) - output_e_np = np.random.random((10, 4)).astype(dtype=np.float32) - - # Test with tuples - dataset_tuple = tf.data.Dataset.from_tensor_slices( - ((input_a_np, input_b_np), (output_d_np, output_e_np))) - dataset_tuple = dataset_tuple.repeat(100) - dataset_tuple = dataset_tuple.batch(10) - - model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) - model.evaluate(dataset_tuple, steps=2, verbose=1) - - # Test with dict - input_dict = {'input_1': input_a_np, 'input_2': input_b_np} - if test_utils.get_model_type() == 'subclass': - output_dict = {'output_1': output_d_np, 'output_2': output_e_np} - else: - output_dict = {'dense': output_d_np, 'dropout': output_e_np} - - dataset_dict = tf.data.Dataset.from_tensor_slices( - (input_dict, output_dict)) - dataset_dict = dataset_dict.repeat(100) - dataset_dict = dataset_dict.batch(10) - - model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) - model.evaluate(dataset_dict, steps=2, verbose=1) - - predict_dataset_dict = tf.data.Dataset.from_tensor_slices(input_dict) - predict_dataset_dict = predict_dataset_dict.repeat(100) - predict_dataset_dict = predict_dataset_dict.batch(10) - model.predict(predict_dataset_dict, steps=1) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_dataset_with_sample_weights(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - optimizer = 'rmsprop' - loss = 'mse' - metrics = ['mae', metrics_module.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics, - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((10, 3), np.float32) - targets = np.zeros((10, 4), np.float32) - sample_weights = np.ones((10), np.float32) - dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets, sample_weights)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - model.evaluate(dataset, steps=2, verbose=1) - model.predict(dataset, steps=2) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_dataset_with_sample_weights_correctness(self): - x = keras.layers.Input(shape=(1,), name='input') - y = keras.layers.Dense( - 1, kernel_initializer='ones', bias_initializer='zeros', name='dense')( - x) - model = keras.Model(x, y) - optimizer = 'rmsprop' - loss = 'mse' - model.compile(optimizer, loss) - inputs = np.array([[0], [1], [2], [3]], np.float32) - targets = np.array([[2], [4], [6], [8]], np.float32) - sample_weights = np.array([0.25, 0.5, 0.75, 1], np.float32) - ds = tf.data.Dataset.from_tensor_slices( - (inputs, targets, sample_weights)).batch(2) - result = model.evaluate(ds, verbose=1) - # The per sample loss is multiplied by the corresponding sample weight. The - # average of these weighted losses is the return value of the `evaluate` - # call. For example, in the test above the average weighted loss is - # calculated in the following manner: - # ((2-0)^2) * 0.25 + ((4-1)^2) * 0.5 + ((6-2)^2 * 0.75) + ((8-3)^2 * 1) - # equals 42.5 / 4 = 10.625 - self.assertEqual(result, 10.625) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_dataset_with_sparse_labels(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - optimizer = 'rmsprop' - model.compile( - optimizer, - loss='sparse_categorical_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.random.randint(0, 4, size=10, dtype=np.int32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - - @test_combinations.run_all_keras_modes - def test_dataset_fit_correctness(self): - - class SumLayer(keras.layers.Layer): - - def build(self, _): - self.w = self.add_weight('w', ()) - - def call(self, inputs): - return keras.backend.sum(inputs, axis=1, keepdims=True) + self.w * 0 - - model = keras.Sequential([SumLayer(input_shape=(2,))]) - model.compile( - 'rmsprop', loss='mae', run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((40, 2), dtype=np.float32) - inputs[10:20, :] = 2 - inputs[20:30, :] = 1 - inputs[30:, :] = 4 - targets = np.zeros((40, 1), dtype=np.float32) - - # Test correctness with `steps_per_epoch`. - train_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - val_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - history = model.fit( - train_dataset, - epochs=2, - steps_per_epoch=2, - verbose=1, - validation_data=val_dataset, - validation_steps=2) - self.assertAllClose(history.history['loss'], - [inputs[:20].sum() / 20, inputs[20:].sum() / 20]) - # The validation dataset will be reset at the end of each validation run. - self.assertAllClose(history.history['val_loss'], - [inputs[:20].sum() / 20, inputs[:20].sum() / 20]) - - # Test correctness with dataset reset. - train_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - val_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - history = model.fit( - train_dataset, epochs=2, verbose=1, validation_data=val_dataset) - self.assertAllClose( - history.history['loss'], - [inputs.sum() / 40, inputs.sum() / 40]) - self.assertAllClose( - history.history['val_loss'], - [inputs.sum() / 40, inputs.sum() / 40]) - - def test_dataset_input_shape_validation(self): - with tf.compat.v1.get_default_graph().as_default(), self.cached_session(): - model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) - model.compile(optimizer='rmsprop', loss='mse') - - # User forgets to batch the dataset - inputs = np.zeros((10, 3)) - targets = np.zeros((10, 4)) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - - with self.assertRaisesRegex( - ValueError, - r'expected (.*?) to have shape \(3,\) but got array with shape \(1,\)' - ): - model.train_on_batch(dataset) - - # Wrong input shape - inputs = np.zeros((10, 5)) - targets = np.zeros((10, 4)) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - with self.assertRaisesRegex(ValueError, - r'expected (.*?) to have shape \(3,\)'): - model.train_on_batch(dataset) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_finite_dataset_known_cardinality_no_steps_arg(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.compile( - 'rmsprop', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((100, 3), dtype=np.float32) - targets = np.random.randint(0, 4, size=100, dtype=np.int32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.batch(10) - - batch_counter = BatchCounterCallback() - history = model.fit(dataset, epochs=2, verbose=1, callbacks=[batch_counter]) - - self.assertLen(history.history['loss'], 2) - self.assertEqual(batch_counter.batch_end_count, 20) - model.evaluate(dataset) - out = model.predict(dataset) - self.assertEqual(out.shape[0], 100) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_finite_dataset_unknown_cardinality_no_steps_arg(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.compile( - 'rmsprop', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((100, 3), dtype=np.float32) - targets = np.random.randint(0, 4, size=100, dtype=np.int32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.filter(lambda x, y: True).batch(10) - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - - batch_counter = BatchCounterCallback() - history = model.fit(dataset, epochs=2, verbose=1, callbacks=[batch_counter]) - - self.assertLen(history.history['loss'], 2) - self.assertEqual(batch_counter.batch_end_count, 20) - model.evaluate(dataset) - out = model.predict(dataset) - self.assertEqual(out.shape[0], 100) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_finite_dataset_unknown_cardinality_no_step_with_train_and_val(self): - - class CaptureStdout: - - def __enter__(self): - self._stdout = sys.stdout - string_io = io.StringIO() - sys.stdout = string_io - self._stringio = string_io - return self - - def __exit__(self, *args): - self.output = self._stringio.getvalue() - sys.stdout = self._stdout - - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.compile( - 'rmsprop', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((100, 3), dtype=np.float32) - targets = np.random.randint(0, 4, size=100, dtype=np.int32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.filter(lambda x, y: True).batch(10) - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - - batch_counter = BatchCounterCallback() - io_utils.enable_interactive_logging() - with CaptureStdout() as capture: - history = model.fit( - dataset, - epochs=2, - callbacks=[batch_counter], - validation_data=dataset.take(3)) - - lines = capture.output.splitlines() - - self.assertIn('10/10', lines[-1]) - - self.assertLen(history.history['loss'], 2) - self.assertEqual(batch_counter.batch_begin_count, 21) - self.assertEqual(batch_counter.batch_end_count, 20) - model.evaluate(dataset) - out = model.predict(dataset) - self.assertEqual(out.shape[0], 100) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_finite_dataset_unknown_cardinality_out_of_data(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.compile( - 'rmsprop', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((100, 3), dtype=np.float32) - targets = np.random.randint(0, 4, size=100, dtype=np.int32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.filter(lambda x, y: True).batch(10) - self.assertEqual( - keras.backend.get_value(tf.data.experimental.cardinality(dataset)), - tf.data.experimental.UNKNOWN_CARDINALITY) - - batch_counter = BatchCounterCallback() - with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: - # steps_per_epoch (200) is greater than the dataset size (100). As this is - # unexpected, training will stop and not make it to the second epoch. - history = model.fit( - dataset, - epochs=2, - verbose=1, - callbacks=[batch_counter], - steps_per_epoch=200) - self.assertIn('ran out of data; interrupting training.', - str(mock_log.call_args)) - self.assertIn( - 'can generate at least ' - '`steps_per_epoch * epochs` batches (in this case, 400 batches). ' - 'You may need to use the repeat() function when ' - 'building your dataset.', str(mock_log.call_args)) - - self.assertLen(history.history['loss'], 1) - self.assertEqual(batch_counter.batch_end_count, 10) - model.evaluate(dataset) - out = model.predict(dataset) - self.assertEqual(out.shape[0], 100) - - @test_combinations.run_all_keras_modes - def test_with_external_loss(self): - inp = keras.Input(shape=(4,), name='inp1') - out = keras.layers.Dense(2)(inp) - model = keras.Model(inp, out) - model.add_loss(tf.reduce_mean(out)) - model.compile('rmsprop') - x = np.ones((10, 4)) - - # dataset contains only features, no labels. - dataset = tf.data.Dataset.from_tensor_slices(x).repeat(10).batch(10) - model.fit(dataset) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_train_eval_with_steps(self): - # See b/142880049 for more details. - inp = keras.Input(shape=(4,), name='inp1') - out = keras.layers.Dense(2)(inp) - model = keras.Model(inp, out) - model.compile( - 'rmsprop', loss='mse', run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((100, 4), dtype=np.float32) - targets = np.random.randint(0, 2, size=100, dtype=np.int32) - training_ds = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).repeat().batch(10) - - # Create eval dataset with generator, so that dataset won't contain the - # overall size metadata. Without eval_steps, we expect to run through all - # the data in this dataset every epoch. - def gen(): - for _ in range(100): - yield (np.zeros(4, dtype=np.float32), - np.random.randint(0, 2, size=1, dtype=np.int32)) - - eval_ds = tf.data.Dataset.from_generator( - generator=gen, - output_types=('float64', 'int32'), - output_shapes=([4], [1])).batch(100) - batch_counter = BatchCounterCallback() - - model.fit( - training_ds, - steps_per_epoch=10, - epochs=10, - validation_data=eval_ds, - callbacks=[batch_counter]) - - # Expect 10 batch from training per epoch. - self.assertEqual(batch_counter.batch_end_count, 100) + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_calling_model_on_same_dataset(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + optimizer = "rmsprop" + loss = "mse" + metrics = ["mae"] + model.compile( + optimizer, + loss, + metrics=metrics, + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.zeros((10, 3), np.float32) + targets = np.zeros((10, 4), np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + # Call fit with validation data + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=dataset, + validation_steps=2, + ) + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=dataset, + validation_steps=2, + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_training_and_eval_methods_on_dataset(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + optimizer = "rmsprop" + loss = "mse" + metrics = ["mae", metrics_module.CategoricalAccuracy()] + model.compile( + optimizer, + loss, + metrics=metrics, + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.zeros((10, 3), np.float32) + targets = np.zeros((10, 4), np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat() # Infinite dataset. + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(dataset, steps=2) + + # Test with validation data + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=dataset, + validation_steps=2, + ) + + # Test with validation split + with self.assertRaises(ValueError): + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_split=0.5, + validation_steps=2, + ) + + # Test with sample weight. + sample_weight = np.random.random((10,)) + with self.assertRaisesRegex( + ValueError, r"`sample_weight` argument is not supported .+dataset" + ): + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + sample_weight=sample_weight, + ) + + with self.assertRaisesRegex( + ValueError, + "(you should not specify a target)|" + "(`y` argument is not supported when using dataset as input.)", + ): + model.fit(dataset, dataset, epochs=1, steps_per_epoch=2, verbose=0) + + # With an infinite dataset, `steps_per_epoch`/`steps` argument is + # required. + with self.assertRaises(ValueError): + model.fit(dataset, epochs=1, verbose=0) + with self.assertRaises(ValueError): + model.evaluate(dataset, verbose=0) + with self.assertRaises(ValueError): + model.predict(dataset, verbose=0) + + @test_combinations.run_with_all_model_types(exclude_models="sequential") + @test_combinations.run_all_keras_modes + def test_training_and_eval_methods_on_multi_input_output_dataset(self): + input_a = keras.layers.Input(shape=(3,), name="input_1") + input_b = keras.layers.Input(shape=(3,), name="input_2") + dense = keras.layers.Dense(4, name="dense") + dropout = keras.layers.Dropout(0.5, name="dropout") + branch_a = [input_a, dense] + branch_b = [input_b, dense, dropout] + + model = test_utils.get_multi_io_model(branch_a, branch_b) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + input_a_np = np.random.random((10, 3)).astype(dtype=np.float32) + input_b_np = np.random.random((10, 3)).astype(dtype=np.float32) + output_d_np = np.random.random((10, 4)).astype(dtype=np.float32) + output_e_np = np.random.random((10, 4)).astype(dtype=np.float32) + + # Test with tuples + dataset_tuple = tf.data.Dataset.from_tensor_slices( + ((input_a_np, input_b_np), (output_d_np, output_e_np)) + ) + dataset_tuple = dataset_tuple.repeat(100) + dataset_tuple = dataset_tuple.batch(10) + + model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset_tuple, steps=2, verbose=1) + + # Test with dict + input_dict = {"input_1": input_a_np, "input_2": input_b_np} + if test_utils.get_model_type() == "subclass": + output_dict = {"output_1": output_d_np, "output_2": output_e_np} + else: + output_dict = {"dense": output_d_np, "dropout": output_e_np} + + dataset_dict = tf.data.Dataset.from_tensor_slices( + (input_dict, output_dict) + ) + dataset_dict = dataset_dict.repeat(100) + dataset_dict = dataset_dict.batch(10) + + model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset_dict, steps=2, verbose=1) + + predict_dataset_dict = tf.data.Dataset.from_tensor_slices(input_dict) + predict_dataset_dict = predict_dataset_dict.repeat(100) + predict_dataset_dict = predict_dataset_dict.batch(10) + model.predict(predict_dataset_dict, steps=1) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_dataset_with_sample_weights(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + optimizer = "rmsprop" + loss = "mse" + metrics = ["mae", metrics_module.CategoricalAccuracy()] + model.compile( + optimizer, + loss, + metrics=metrics, + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.zeros((10, 3), np.float32) + targets = np.zeros((10, 4), np.float32) + sample_weights = np.ones((10), np.float32) + dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets, sample_weights) + ) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(dataset, steps=2) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_dataset_with_sample_weights_correctness(self): + x = keras.layers.Input(shape=(1,), name="input") + y = keras.layers.Dense( + 1, kernel_initializer="ones", bias_initializer="zeros", name="dense" + )(x) + model = keras.Model(x, y) + optimizer = "rmsprop" + loss = "mse" + model.compile(optimizer, loss) + inputs = np.array([[0], [1], [2], [3]], np.float32) + targets = np.array([[2], [4], [6], [8]], np.float32) + sample_weights = np.array([0.25, 0.5, 0.75, 1], np.float32) + ds = tf.data.Dataset.from_tensor_slices( + (inputs, targets, sample_weights) + ).batch(2) + result = model.evaluate(ds, verbose=1) + # The per sample loss is multiplied by the corresponding sample weight. + # The average of these weighted losses is the return value of the + # `evaluate` call. For example, in the test above the average weighted + # loss is calculated in the following manner: + # ((2-0)^2) * 0.25 + ((4-1)^2) * 0.5 + ((6-2)^2 * 0.75) + ((8-3)^2 * 1) + # equals 42.5 / 4 = 10.625 + self.assertEqual(result, 10.625) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_dataset_with_sparse_labels(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + optimizer = "rmsprop" + model.compile( + optimizer, + loss="sparse_categorical_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.random.randint(0, 4, size=10, dtype=np.int32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + + @test_combinations.run_all_keras_modes + def test_dataset_fit_correctness(self): + class SumLayer(keras.layers.Layer): + def build(self, _): + self.w = self.add_weight("w", ()) + + def call(self, inputs): + return ( + keras.backend.sum(inputs, axis=1, keepdims=True) + + self.w * 0 + ) + + model = keras.Sequential([SumLayer(input_shape=(2,))]) + model.compile( + "rmsprop", loss="mae", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.zeros((40, 2), dtype=np.float32) + inputs[10:20, :] = 2 + inputs[20:30, :] = 1 + inputs[30:, :] = 4 + targets = np.zeros((40, 1), dtype=np.float32) + + # Test correctness with `steps_per_epoch`. + train_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + val_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + history = model.fit( + train_dataset, + epochs=2, + steps_per_epoch=2, + verbose=1, + validation_data=val_dataset, + validation_steps=2, + ) + self.assertAllClose( + history.history["loss"], + [inputs[:20].sum() / 20, inputs[20:].sum() / 20], + ) + # The validation dataset will be reset at the end of each validation + # run. + self.assertAllClose( + history.history["val_loss"], + [inputs[:20].sum() / 20, inputs[:20].sum() / 20], + ) + + # Test correctness with dataset reset. + train_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + val_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + history = model.fit( + train_dataset, epochs=2, verbose=1, validation_data=val_dataset + ) + self.assertAllClose( + history.history["loss"], [inputs.sum() / 40, inputs.sum() / 40] + ) + self.assertAllClose( + history.history["val_loss"], [inputs.sum() / 40, inputs.sum() / 40] + ) + + def test_dataset_input_shape_validation(self): + with tf.compat.v1.get_default_graph().as_default(), self.cached_session(): # noqa: E501 + model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) + model.compile(optimizer="rmsprop", loss="mse") + + # User forgets to batch the dataset + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + + with self.assertRaisesRegex( + ValueError, + r"expected (.*?) to have shape \(3,\) " + r"but got array with shape \(1,\)", + ): + model.train_on_batch(dataset) + + # Wrong input shape + inputs = np.zeros((10, 5)) + targets = np.zeros((10, 4)) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + with self.assertRaisesRegex( + ValueError, r"expected (.*?) to have shape \(3,\)" + ): + model.train_on_batch(dataset) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_finite_dataset_known_cardinality_no_steps_arg(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.zeros((100, 3), dtype=np.float32) + targets = np.random.randint(0, 4, size=100, dtype=np.int32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.batch(10) + + batch_counter = BatchCounterCallback() + history = model.fit( + dataset, epochs=2, verbose=1, callbacks=[batch_counter] + ) + + self.assertLen(history.history["loss"], 2) + self.assertEqual(batch_counter.batch_end_count, 20) + model.evaluate(dataset) + out = model.predict(dataset) + self.assertEqual(out.shape[0], 100) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_finite_dataset_unknown_cardinality_no_steps_arg(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.zeros((100, 3), dtype=np.float32) + targets = np.random.randint(0, 4, size=100, dtype=np.int32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.filter(lambda x, y: True).batch(10) + self.assertEqual( + keras.backend.get_value(tf.data.experimental.cardinality(dataset)), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + batch_counter = BatchCounterCallback() + history = model.fit( + dataset, epochs=2, verbose=1, callbacks=[batch_counter] + ) + + self.assertLen(history.history["loss"], 2) + self.assertEqual(batch_counter.batch_end_count, 20) + model.evaluate(dataset) + out = model.predict(dataset) + self.assertEqual(out.shape[0], 100) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_finite_dataset_unknown_cardinality_no_step_with_train_and_val( + self, + ): + class CaptureStdout: + def __enter__(self): + self._stdout = sys.stdout + string_io = io.StringIO() + sys.stdout = string_io + self._stringio = string_io + return self + + def __exit__(self, *args): + self.output = self._stringio.getvalue() + sys.stdout = self._stdout + + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.zeros((100, 3), dtype=np.float32) + targets = np.random.randint(0, 4, size=100, dtype=np.int32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.filter(lambda x, y: True).batch(10) + self.assertEqual( + keras.backend.get_value(tf.data.experimental.cardinality(dataset)), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + batch_counter = BatchCounterCallback() + io_utils.enable_interactive_logging() + with CaptureStdout() as capture: + history = model.fit( + dataset, + epochs=2, + callbacks=[batch_counter], + validation_data=dataset.take(3), + ) + + lines = capture.output.splitlines() + + self.assertIn("10/10", lines[-1]) + + self.assertLen(history.history["loss"], 2) + self.assertEqual(batch_counter.batch_begin_count, 21) + self.assertEqual(batch_counter.batch_end_count, 20) + model.evaluate(dataset) + out = model.predict(dataset) + self.assertEqual(out.shape[0], 100) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_finite_dataset_unknown_cardinality_out_of_data(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.zeros((100, 3), dtype=np.float32) + targets = np.random.randint(0, 4, size=100, dtype=np.int32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.filter(lambda x, y: True).batch(10) + self.assertEqual( + keras.backend.get_value(tf.data.experimental.cardinality(dataset)), + tf.data.experimental.UNKNOWN_CARDINALITY, + ) + + batch_counter = BatchCounterCallback() + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + # steps_per_epoch (200) is greater than the dataset size (100). As + # this is unexpected, training will stop and not make it to the + # second epoch. + history = model.fit( + dataset, + epochs=2, + verbose=1, + callbacks=[batch_counter], + steps_per_epoch=200, + ) + self.assertIn( + "ran out of data; interrupting training.", + str(mock_log.call_args), + ) + self.assertIn( + "can generate at least " + "`steps_per_epoch * epochs` batches (in this case, " + "400 batches). You may need to use the repeat() function when " + "building your dataset.", + str(mock_log.call_args), + ) + + self.assertLen(history.history["loss"], 1) + self.assertEqual(batch_counter.batch_end_count, 10) + model.evaluate(dataset) + out = model.predict(dataset) + self.assertEqual(out.shape[0], 100) + + @test_combinations.run_all_keras_modes + def test_with_external_loss(self): + inp = keras.Input(shape=(4,), name="inp1") + out = keras.layers.Dense(2)(inp) + model = keras.Model(inp, out) + model.add_loss(tf.reduce_mean(out)) + model.compile("rmsprop") + x = np.ones((10, 4)) + + # dataset contains only features, no labels. + dataset = tf.data.Dataset.from_tensor_slices(x).repeat(10).batch(10) + model.fit(dataset) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_train_eval_with_steps(self): + # See b/142880049 for more details. + inp = keras.Input(shape=(4,), name="inp1") + out = keras.layers.Dense(2)(inp) + model = keras.Model(inp, out) + model.compile( + "rmsprop", loss="mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.zeros((100, 4), dtype=np.float32) + targets = np.random.randint(0, 2, size=100, dtype=np.int32) + training_ds = ( + tf.data.Dataset.from_tensor_slices((inputs, targets)) + .repeat() + .batch(10) + ) + + # Create eval dataset with generator, so that dataset won't contain the + # overall size metadata. Without eval_steps, we expect to run through + # all the data in this dataset every epoch. + def gen(): + for _ in range(100): + yield ( + np.zeros(4, dtype=np.float32), + np.random.randint(0, 2, size=1, dtype=np.int32), + ) + + eval_ds = tf.data.Dataset.from_generator( + generator=gen, + output_types=("float64", "int32"), + output_shapes=([4], [1]), + ).batch(100) + batch_counter = BatchCounterCallback() + + model.fit( + training_ds, + steps_per_epoch=10, + epochs=10, + validation_data=eval_ds, + callbacks=[batch_counter], + ) + + # Expect 10 batch from training per epoch. + self.assertEqual(batch_counter.batch_end_count, 100) class TestMetricsWithDatasets(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_metrics_correctness_with_dataset(self): - layers = [ - keras.layers.Dense( - 8, activation='relu', input_dim=4, kernel_initializer='ones'), - keras.layers.Dense(1, activation='sigmoid', kernel_initializer='ones') - ] - - model = test_utils.get_model_from_layers(layers, (4,)) - - model.compile( - loss='binary_crossentropy', - metrics=['accuracy', metrics_module.BinaryAccuracy()], - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - - np.random.seed(123) - x = np.random.randint(10, size=(100, 4)).astype(np.float32) - y = np.random.randint(2, size=(100, 1)).astype(np.float32) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.batch(10) - outs = model.evaluate(dataset, steps=10) - self.assertEqual(np.around(outs[1], decimals=1), 0.5) - self.assertEqual(np.around(outs[2], decimals=1), 0.5) - - y = np.zeros((100, 1), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - outs = model.evaluate(dataset, steps=10) - self.assertEqual(outs[1], 0.) - self.assertEqual(outs[2], 0.) - - -if __name__ == '__main__': - tf.test.main() + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_metrics_correctness_with_dataset(self): + layers = [ + keras.layers.Dense( + 8, activation="relu", input_dim=4, kernel_initializer="ones" + ), + keras.layers.Dense( + 1, activation="sigmoid", kernel_initializer="ones" + ), + ] + + model = test_utils.get_model_from_layers(layers, (4,)) + + model.compile( + loss="binary_crossentropy", + metrics=["accuracy", metrics_module.BinaryAccuracy()], + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + + np.random.seed(123) + x = np.random.randint(10, size=(100, 4)).astype(np.float32) + y = np.random.randint(2, size=(100, 1)).astype(np.float32) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.batch(10) + outs = model.evaluate(dataset, steps=10) + self.assertEqual(np.around(outs[1], decimals=1), 0.5) + self.assertEqual(np.around(outs[2], decimals=1), 0.5) + + y = np.zeros((100, 1), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + outs = model.evaluate(dataset, steps=10) + self.assertEqual(outs[1], 0.0) + self.assertEqual(outs[2], 0.0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training_distributed_v1.py b/keras/engine/training_distributed_v1.py index 70e8cfaaecb3..dc600160d658 100644 --- a/keras/engine/training_distributed_v1.py +++ b/keras/engine/training_distributed_v1.py @@ -14,11 +14,9 @@ # ============================================================================== """Part of the Keras training engine related to distributed training.""" +import numpy as np import tensorflow.compat.v2 as tf -# pylint: disable=protected-access -import numpy as np -from tensorflow.python.distribute import input_lib from keras import backend from keras import callbacks as cbks from keras.distribute import distribute_coordinator_utils as dc @@ -28,761 +26,898 @@ from keras.engine import training_utils_v1 from keras.utils.generic_utils import Progbar from keras.utils.mode_keys import ModeKeys + +# isort: off +from tensorflow.python.distribute import input_lib from tensorflow.python.platform import tf_logging as logging def _per_replica_execution_function(model, mode): - exec_func = model._make_execution_function(mode) - return (exec_func.inputs, exec_func.outputs, exec_func.updates_op, - exec_func.session_kwargs) + exec_func = model._make_execution_function(mode) + return ( + exec_func.inputs, + exec_func.outputs, + exec_func.updates_op, + exec_func.session_kwargs, + ) def _build_model(strategy, model, mode, inputs, targets=None): - if model._compile_distribution: - dist_utils.clone_model_on_replicas( - model, strategy, mode, inputs=inputs, targets=targets) - else: - dist_utils._build_distributed_network(model, strategy, mode, inputs, - targets) + if model._compile_distribution: + dist_utils.clone_model_on_replicas( + model, strategy, mode, inputs=inputs, targets=targets + ) + else: + dist_utils._build_distributed_network( + model, strategy, mode, inputs, targets + ) def _make_train_step_fn(model, mode, strategy, output_labels): - """Create step fn. - - Args: - model: a Keras Model instance. - mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. - strategy: a `tf.distribute.Strategy` instance. - output_labels: the output labels for the step function. - - Returns: - A step function to run by `tf.distribute.Strategy`. - """ - - def _step_fn(ctx, inputs): - """A step fn that returns update ops.""" - if isinstance(inputs, (tuple, list)) and len(inputs) == 2: - inputs, targets = inputs - else: - targets = None - - # When input feature is a dictionary of tensors, dictionary is flattended - # to an array and passed as a model input. This results in input mismatch - # when model input layer names are not sorted in alphabetical order as - # `nest.flatten()`sorts dictionary elements by keys. As so, transform input - # tensors into an array and order it along `model._feed_input_names`. - if isinstance(inputs, dict): - inputs = [inputs[input_name] for input_name in model._feed_input_names] - - _build_model(strategy, model, mode, inputs, targets) - - (grouped_inputs, grouped_outputs, grouped_updates, - grouped_session_args) = strategy.extended.call_for_each_replica( - _per_replica_execution_function, - args=(dist_utils.get_distributed_model(model, mode), mode)) - (all_inputs, all_outputs, all_updates, - all_session_args) = dist_utils.unwrap_values(strategy, grouped_inputs, - grouped_outputs, - grouped_updates, - grouped_session_args) - combined_fn = backend.function( - all_inputs, - all_outputs, - updates=all_updates, - name='distributed_' + str(mode) + '_function', - **all_session_args) - - for label, output in zip(output_labels, combined_fn.outputs): - if label == 'loss': - reduce_op = tf.distribute.ReduceOp.SUM - else: - # We reduce all other metrics using mean for now. This is temporary - # workaround until new metrics are in place. - reduce_op = tf.distribute.ReduceOp.MEAN - ctx.set_last_step_output(label, output, reduce_op) - - # TODO(priyag, sourabhbajaj): Ignoring these things from the combined_fn: - # feed_dict, session kwargs, run options, run_metadata for now. These should - # be handled appropriately - return combined_fn.updates_op - - return _step_fn - - -def experimental_tpu_fit_loop(model, - dataset, - epochs=100, - verbose=1, - callbacks=None, - initial_epoch=0, - steps_per_epoch=None, - val_dataset=None, - validation_steps=None, - validation_freq=1): - """Fit loop for training with TPU tf.distribute.Strategy. - - Args: - model: Keras Model instance. - dataset: Dataset that returns inputs and targets - epochs: Number of times to iterate over the data - verbose: Integer, Verbosity mode, 0, 1 or 2 - callbacks: List of callbacks to be called during training - initial_epoch: Epoch at which to start training - (useful for resuming a previous training run) - steps_per_epoch: Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. Ignored with the default value of `None`. - val_dataset: Dataset for validation data. - validation_steps: Number of steps to run validation for - (only if doing validation from data tensors). - Ignored with the default value of `None`. - validation_freq: Only relevant if validation data is provided. Integer or - `collections.abc.Container` instance (e.g. list, tuple, etc.). If an - integer, specifies how many training epochs to run before a new - validation run is performed, e.g. `validation_freq=2` runs - validation every 2 epochs. If a Container, specifies the epochs on - which to run validation, e.g. `validation_freq=[1, 2, 10]` runs - validation at the end of the 1st, 2nd, and 10th epochs. - - Returns: - Returns `None`. - - Raises: - ValueError: in case of invalid arguments. - """ - mode = ModeKeys.TRAIN - - current_strategy = model._distribution_strategy - iteration_value = min(steps_per_epoch, - current_strategy.extended.steps_per_run) - steps_per_run = backend.variable( - value=iteration_value, - dtype='int32', - name='steps_per_run') - - # TODO(fchollet): add support for `steps_per_epoch=None` in TPU loops. - iterator = dist_utils.get_iterator(dataset, current_strategy) - - scope = dist_utils.distributed_scope( - strategy=current_strategy, learning_phase=1) - scope.__enter__() - - out_labels = model.metrics_names or [] - - step_fn = _make_train_step_fn(model, ModeKeys.TRAIN, current_strategy, - out_labels) - - # Add initial dummy values for loss and other metric tensors. - initial_loop_values = {} - initial_loop_values['loss'] = tf.constant(1e7) - for m in model._get_training_eval_metrics(): - tensor = m.result() - initial_loop_values[m.name] = tf.zeros(tensor.shape, tensor.dtype) - - ctx = current_strategy.extended.experimental_run_steps_on_iterator( - step_fn, iterator, iterations=steps_per_run, - initial_loop_values=initial_loop_values) - train_op = ctx.run_op - output_tensors = ctx.last_step_outputs - - do_validation = bool(validation_steps) - - if model._compile_distribution: - dist_utils._copy_weights_to_distributed_model(model, mode) - - callbacks = cbks.configure_callbacks( - callbacks, - model, - do_validation=do_validation, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - verbose=verbose, - count_mode='steps', - mode=mode) - - # Calculate the steps each time on the device. - steps_to_run = ([current_strategy.extended.steps_per_run] * - (steps_per_epoch // - current_strategy.extended.steps_per_run)) - if steps_per_epoch % current_strategy.extended.steps_per_run: - steps_to_run.append( - steps_per_epoch % current_strategy.extended.steps_per_run) - target_steps = len(steps_to_run) - - callbacks._call_begin_hook(mode) - - initial_epoch = model._maybe_load_initial_epoch_from_ckpt(initial_epoch, mode) - - for epoch in range(initial_epoch, epochs): + """Create step fn. + + Args: + model: a Keras Model instance. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. + strategy: a `tf.distribute.Strategy` instance. + output_labels: the output labels for the step function. + + Returns: + A step function to run by `tf.distribute.Strategy`. + """ + + def _step_fn(ctx, inputs): + """A step fn that returns update ops.""" + if isinstance(inputs, (tuple, list)) and len(inputs) == 2: + inputs, targets = inputs + else: + targets = None + + # When input feature is a dictionary of tensors, dictionary is + # flattended to an array and passed as a model input. This results in + # input mismatch when model input layer names are not sorted in + # alphabetical order as `nest.flatten()`sorts dictionary elements by + # keys. As so, transform input tensors into an array and order it along + # `model._feed_input_names`. + if isinstance(inputs, dict): + inputs = [ + inputs[input_name] for input_name in model._feed_input_names + ] + + _build_model(strategy, model, mode, inputs, targets) + + ( + grouped_inputs, + grouped_outputs, + grouped_updates, + grouped_session_args, + ) = strategy.extended.call_for_each_replica( + _per_replica_execution_function, + args=(dist_utils.get_distributed_model(model, mode), mode), + ) + ( + all_inputs, + all_outputs, + all_updates, + all_session_args, + ) = dist_utils.unwrap_values( + strategy, + grouped_inputs, + grouped_outputs, + grouped_updates, + grouped_session_args, + ) + combined_fn = backend.function( + all_inputs, + all_outputs, + updates=all_updates, + name="distributed_" + str(mode) + "_function", + **all_session_args + ) + + for label, output in zip(output_labels, combined_fn.outputs): + if label == "loss": + reduce_op = tf.distribute.ReduceOp.SUM + else: + # We reduce all other metrics using mean for now. This is + # temporary workaround until new metrics are in place. + reduce_op = tf.distribute.ReduceOp.MEAN + ctx.set_last_step_output(label, output, reduce_op) + + # TODO(priyag, sourabhbajaj): Ignoring these things from the + # combined_fn: feed_dict, session kwargs, run options, run_metadata for + # now. These should be handled appropriately + return combined_fn.updates_op + + return _step_fn + + +def experimental_tpu_fit_loop( + model, + dataset, + epochs=100, + verbose=1, + callbacks=None, + initial_epoch=0, + steps_per_epoch=None, + val_dataset=None, + validation_steps=None, + validation_freq=1, +): + """Fit loop for training with TPU tf.distribute.Strategy. + + Args: + model: Keras Model instance. + dataset: Dataset that returns inputs and targets + epochs: Number of times to iterate over the data + verbose: Integer, Verbosity mode, 0, 1 or 2 + callbacks: List of callbacks to be called during training + initial_epoch: Epoch at which to start training + (useful for resuming a previous training run) + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. Ignored with the default value of `None`. + val_dataset: Dataset for validation data. + validation_steps: Number of steps to run validation for + (only if doing validation from data tensors). + Ignored with the default value of `None`. + validation_freq: Only relevant if validation data is provided. Integer + or `collections.abc.Container` instance (e.g. list, tuple, etc.). If + an integer, specifies how many training epochs to run before a new + validation run is performed, e.g. `validation_freq=2` runs + validation every 2 epochs. If a Container, specifies the epochs on + which to run validation, e.g. `validation_freq=[1, 2, 10]` runs + validation at the end of the 1st, 2nd, and 10th epochs. + + Returns: + Returns `None`. + + Raises: + ValueError: in case of invalid arguments. + """ + mode = ModeKeys.TRAIN + + current_strategy = model._distribution_strategy + iteration_value = min( + steps_per_epoch, current_strategy.extended.steps_per_run + ) + steps_per_run = backend.variable( + value=iteration_value, dtype="int32", name="steps_per_run" + ) + + # TODO(fchollet): add support for `steps_per_epoch=None` in TPU loops. + iterator = dist_utils.get_iterator(dataset, current_strategy) + + scope = dist_utils.distributed_scope( + strategy=current_strategy, learning_phase=1 + ) + scope.__enter__() + + out_labels = model.metrics_names or [] + + step_fn = _make_train_step_fn( + model, ModeKeys.TRAIN, current_strategy, out_labels + ) + + # Add initial dummy values for loss and other metric tensors. + initial_loop_values = {} + initial_loop_values["loss"] = tf.constant(1e7) + for m in model._get_training_eval_metrics(): + tensor = m.result() + initial_loop_values[m.name] = tf.zeros(tensor.shape, tensor.dtype) + + ctx = current_strategy.extended.experimental_run_steps_on_iterator( + step_fn, + iterator, + iterations=steps_per_run, + initial_loop_values=initial_loop_values, + ) + train_op = ctx.run_op + output_tensors = ctx.last_step_outputs + + do_validation = bool(validation_steps) + + if model._compile_distribution: + dist_utils._copy_weights_to_distributed_model(model, mode) + + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=do_validation, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + verbose=verbose, + count_mode="steps", + mode=mode, + ) + + # Calculate the steps each time on the device. + steps_to_run = [current_strategy.extended.steps_per_run] * ( + steps_per_epoch // current_strategy.extended.steps_per_run + ) + if steps_per_epoch % current_strategy.extended.steps_per_run: + steps_to_run.append( + steps_per_epoch % current_strategy.extended.steps_per_run + ) + target_steps = len(steps_to_run) + + callbacks._call_begin_hook(mode) + + initial_epoch = model._maybe_load_initial_epoch_from_ckpt( + initial_epoch, mode + ) + + for epoch in range(initial_epoch, epochs): + dist_utils._reset_metrics(model) + callbacks.on_epoch_begin(epoch) + epoch_logs = {} + step_index = 0 + prev_step_count = None + current_step = 0 + while current_step < target_steps: + step_count = steps_to_run[current_step] + batch_logs = { + "batch": step_index, + "size": 1, + "num_steps": step_count, + } + callbacks._call_batch_hook(mode, "begin", step_index, batch_logs) + if prev_step_count is None or step_count != prev_step_count: + backend.get_session().run(steps_per_run.assign(step_count)) + prev_step_count = step_count + try: + _, outputs = backend.batch_get_value([train_op, output_tensors]) + except tf.errors.OutOfRangeError: + logging.warning( + "Your dataset iterator ran out of data; " + "interrupting training. Make sure that your dataset " + "can generate at least `steps_per_epoch * epochs` " + "batches (in this case, %d batches)." + % steps_per_epoch + * epochs + ) + break + + batch_logs.update(outputs) + callbacks._call_batch_hook(mode, "end", step_index, batch_logs) + step_index = step_index + step_count + current_step += 1 + + if callbacks.model.stop_training: + break + + if do_validation and training_utils_v1.should_run_validation( + validation_freq, epoch + ): + logging.info("Running validation at fit epoch: %s", epoch) + + if model._compile_distribution: + # Since we create a new clone from the original model we need to + # copy the weights back to the original model before we can run + # validation. + dist_utils._copy_weights_to_original_model( + model, ModeKeys.TRAIN + ) + + val_outs = experimental_tpu_test_loop( + model, + val_dataset, + steps=validation_steps, + verbose=verbose, + callbacks=callbacks, + ) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for label, val_out in zip(out_labels, val_outs): + epoch_logs["val_" + label] = val_out + + callbacks.on_epoch_end(epoch, epoch_logs) + if callbacks.model.stop_training: + break + model._successful_loop_finish = True + callbacks._call_end_hook(mode) + + if model._compile_distribution: + # Copy the weights back from the replicated model to the original model. + dist_utils._copy_weights_to_original_model(model, ModeKeys.TRAIN) + scope.__exit__(None, None, None) + return model.history + + +def experimental_tpu_test_loop( + model, dataset, verbose=0, steps=None, callbacks=None +): + """Test loop for evaluating with TPU tf.distribute.Strategy. + + Args: + model: Keras Model instance. + dataset: Dataset for input data. + verbose: Integer, Verbosity mode 0 or 1. + steps: Total number of steps (batches of samples) + before declaring predictions finished. + Ignored with the default value of `None`. + callbacks: List of callbacks to be called during training + + Returns: + Scalar loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the outputs. + """ + mode = ModeKeys.TEST + current_strategy = model._distribution_strategy + iterator = dist_utils.get_iterator(dataset, current_strategy) + + scope = dist_utils.distributed_scope( + strategy=current_strategy, learning_phase=0 + ) + scope.__enter__() + + out_labels = model.metrics_names + + def _test_step_fn(inputs): + """A fn that returns output of single test step.""" + if isinstance(inputs, (tuple, list)) and len(inputs) == 2: + inputs, targets = inputs + else: + targets = None + + ( + tf.distribute.get_replica_context().merge_call( + _build_model, args=(model, mode, inputs, targets) + ) + ) + + (_, outputs, updates, _) = _per_replica_execution_function( + dist_utils.get_distributed_model(model, mode), mode + ) + with tf.control_dependencies([updates]): + return [tf.identity(out) for out in outputs] + + test_input_data = iterator.get_next() + per_replica_outputs = current_strategy.run( + _test_step_fn, args=(test_input_data,) + ) + output_tensors = {} + for label, output in zip(out_labels, per_replica_outputs): + if label == "loss": + reduce_op = tf.distribute.ReduceOp.SUM + else: + # We reduce all other metrics using mean for now. This is temporary + # workaround until new metrics are in place. + reduce_op = tf.distribute.ReduceOp.MEAN + output_tensors[label] = current_strategy.reduce( + reduce_op, output, axis=None + ) + test_op = tf.group(list(output_tensors.values())) + + if verbose >= 1: + progbar = Progbar(target=steps) + + if model._compile_distribution: + dist_utils._copy_weights_to_distributed_model(model, mode) + dist_utils._reset_metrics(model) - callbacks.on_epoch_begin(epoch) - epoch_logs = {} - step_index = 0 - prev_step_count = None + + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=False, + epochs=1, + steps_per_epoch=steps, + verbose=verbose, + count_mode="steps", + mode=ModeKeys.TEST, + ) + callbacks._call_begin_hook(mode) + + outs = [0.0] * len(model.metrics_names) + if steps is not None: + target_steps = steps + else: + raise ValueError( + "Number of steps could not be inferred from the data, " + "please pass the steps argument." + ) + current_step = 0 while current_step < target_steps: - step_count = steps_to_run[current_step] - batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count} - callbacks._call_batch_hook(mode, 'begin', step_index, batch_logs) - if prev_step_count is None or step_count != prev_step_count: - backend.get_session().run(steps_per_run.assign(step_count)) - prev_step_count = step_count - try: - _, outputs = backend.batch_get_value([train_op, output_tensors]) - except tf.errors.OutOfRangeError: - logging.warning('Your dataset iterator ran out of data; ' - 'interrupting training. Make sure that your dataset ' - 'can generate at least `steps_per_epoch * epochs` ' - 'batches (in this case, %d batches).' % - steps_per_epoch * epochs) - break - - batch_logs.update(outputs) - callbacks._call_batch_hook(mode, 'end', step_index, batch_logs) - step_index = step_index + step_count - current_step += 1 - - if callbacks.model.stop_training: - break - - if (do_validation and - training_utils_v1.should_run_validation(validation_freq, epoch)): - logging.info('Running validation at fit epoch: %s', epoch) - - if model._compile_distribution: - # Since we create a new clone from the original model we need to copy - # the weights back to the original model before we can run validation. - dist_utils._copy_weights_to_original_model(model, ModeKeys.TRAIN) + batch_logs = {"batch": current_step, "size": 1} + callbacks._call_batch_hook(mode, "begin", current_step, batch_logs) + try: + _, batch_outs = backend.batch_get_value([test_op, output_tensors]) + except tf.errors.OutOfRangeError: + warning_msg = ( + "Make sure that your dataset can generate at least " + "`steps` batches (in this case, {} batches).".format(steps) + ) + + logging.warning( + "Your dataset iterator ran out of data; " + "interrupting evaluation. " + warning_msg + ) + target_steps = current_step + break + for i, label in enumerate(model.metrics_names): + if i == 0: + # Loss is stateless metrics. + outs[i] += batch_outs[label] + else: + # For all stateful metrics, the aggregation is handled by + # mirrored vars. + outs[i] = batch_outs[label] + + batch_logs = callbacks.make_logs(model, batch_logs, outs, mode) + callbacks._call_batch_hook(mode, "end", current_step, batch_logs) + if verbose == 1: + progbar.update(current_step + 1) + current_step += 1 + + if verbose >= 1: + # Progress bar finishes at the end. + progbar.update(target_steps) + callbacks._call_end_hook(mode) + + scope.__exit__(None, None, None) + if len(outs) > 0: + outs[0] /= target_steps + + if len(outs) == 1: + return outs[0] + return outs + + +def experimental_tpu_predict_loop( + model, dataset, verbose=0, steps=None, callbacks=None +): + """Predict loop for predicting with TPU tf.distribute.Strategy. + + Args: + model: Keras Model instance. + dataset: Dataset for input data. + verbose: Integer, Verbosity mode 0 or 1. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. + callbacks: List of callbacks to be called during training + + Returns: + Array of predictions (if the model has a single output) + or list of arrays of predictions + (if the model has multiple outputs). + """ + mode = ModeKeys.PREDICT + dataset_fully_shaped = dist_utils.is_dataset_shape_fully_defined(dataset) + padding_handler = None + if not dataset_fully_shaped: + # TODO(hongjunchoi): Investigate whether operations from + # PartialBatchPaddingHandler are unnecessarily pruned out + # during graph optimization. + padding_handler = padding_util.PartialBatchPaddingHandler( + model._feed_output_shapes + ) + batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes( + dataset + ) + padding_handler.padded_batch_size = batch_size + padding_handler.padding_mask = dataset.reduce( + padding_handler.padding_mask, padding_handler.update_mask + ) + + dataset = dataset.map(padding_handler.pad_batch) + dataset = dataset.unbatch() + # Upon this point, it is guaranteed that the dataset does not + # have partial batches. Thus, we set `drop_remainder=True` to + # get static shape information about the elements in the dataset. + dataset = dataset.batch(batch_size, drop_remainder=True) + + if prefetch_buffer is not None: + dataset = dataset.prefetch(prefetch_buffer) + + current_strategy = model._distribution_strategy + iterator = dist_utils.get_iterator(dataset, current_strategy) + + scope = dist_utils.distributed_scope( + strategy=current_strategy, learning_phase=0 + ) + scope.__enter__() + + def _predict_step_fn(inputs): + """A fn that returns output of single prediction step.""" + + ( + tf.distribute.get_replica_context().merge_call( + _build_model, args=(model, mode, inputs) + ) + ) + + (_, outputs, updates, _) = _per_replica_execution_function( + dist_utils.get_distributed_model(model, mode), mode + ) + + with tf.control_dependencies([updates]): + return [tf.identity(out) for out in outputs] + + # TODO(hongjunchoi): When numpy array is passed as an input to `predict()` + # use numpy arrays directly to avoid cumulating unnecessary input pipeline + # ops. + predict_input_data = iterator.get_next() + per_replica_outputs = current_strategy.run( + _predict_step_fn, args=(predict_input_data,) + ) + output_tensors = dist_utils.flatten_per_replica_values( + current_strategy, per_replica_outputs + ) + + if verbose >= 1: + progbar = Progbar(target=steps) + + if model._compile_distribution: + dist_utils._copy_weights_to_distributed_model(model, mode) + + dist_utils._reset_metrics(model) - val_outs = experimental_tpu_test_loop( # pylint: disable=undefined-variable - model, - val_dataset, - steps=validation_steps, - verbose=verbose, - callbacks=callbacks) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for label, val_out in zip(out_labels, val_outs): - epoch_logs['val_' + label] = val_out - - callbacks.on_epoch_end(epoch, epoch_logs) - if callbacks.model.stop_training: - break - model._successful_loop_finish = True - callbacks._call_end_hook(mode) - - if model._compile_distribution: - # Copy the weights back from the replicated model to the original model. - dist_utils._copy_weights_to_original_model(model, ModeKeys.TRAIN) - scope.__exit__(None, None, None) - return model.history - - -def experimental_tpu_test_loop(model, - dataset, - verbose=0, - steps=None, - callbacks=None): - """Test loop for evaluating with TPU tf.distribute.Strategy. - - Args: - model: Keras Model instance. - dataset: Dataset for input data. - verbose: Integer, Verbosity mode 0 or 1. - steps: Total number of steps (batches of samples) - before declaring predictions finished. - Ignored with the default value of `None`. - callbacks: List of callbacks to be called during training - - Returns: - Scalar loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the outputs. - """ - mode = ModeKeys.TEST - current_strategy = model._distribution_strategy - iterator = dist_utils.get_iterator(dataset, current_strategy) - - scope = dist_utils.distributed_scope( - strategy=current_strategy, learning_phase=0) - scope.__enter__() - - out_labels = model.metrics_names - - def _test_step_fn(inputs): - """A fn that returns output of single test step.""" - if isinstance(inputs, (tuple, list)) and len(inputs) == 2: - inputs, targets = inputs + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=False, + epochs=1, + steps_per_epoch=steps, + verbose=verbose, + count_mode="steps", + mode=mode, + ) + callbacks._call_begin_hook(mode) + + # Since we do not know how many samples we will see, we cannot pre-allocate + # the returned Numpy arrays. Instead, we store one array per batch seen + # and concatenate them upon returning. + num_model_outputs = len(model.output_names) + unconcatenated_outs = [[] for _ in range(num_model_outputs)] + if steps is not None: + target_steps = steps else: - targets = None - - (tf.distribute.get_replica_context().merge_call( - _build_model, args=(model, mode, inputs, targets))) - - (_, outputs, updates, _) = _per_replica_execution_function( - dist_utils.get_distributed_model(model, mode), mode) - with tf.control_dependencies([updates]): - return [tf.identity(out) for out in outputs] - - test_input_data = iterator.get_next() - per_replica_outputs = current_strategy.run( - _test_step_fn, args=(test_input_data,)) - output_tensors = {} - for label, output in zip(out_labels, per_replica_outputs): - if label == 'loss': - reduce_op = tf.distribute.ReduceOp.SUM + raise ValueError( + "Number of steps could not be inferred from the data, " + "please pass the steps argument." + ) + + current_step = 0 + while current_step < target_steps: + batch_logs = {"batch": current_step, "size": 1} + callbacks._call_batch_hook(mode, "begin", current_step, batch_logs) + try: + predict_ops = tf.group(output_tensors) + _, batch_outs = backend.batch_get_value( + [predict_ops, output_tensors] + ) + + except tf.errors.OutOfRangeError: + warning_msg = ( + "Make sure that your dataset can generate at least " + "`steps` batches (in this case, {} batches).".format(steps) + ) + + logging.warning( + "Your dataset iterator ran out of data; " + "interrupting evaluation. " + warning_msg + ) + break + + # TODO(priyag): maybe need to unwrap the outputs first for + # MirroredStrategy. + for i in range(num_model_outputs): + output_start_index = i * current_strategy.num_replicas_in_sync + output_end_index = ( + output_start_index + current_strategy.num_replicas_in_sync + ) + single_model_output = batch_outs[ + output_start_index:output_end_index + ] + unconcatenated_outs[i].extend(single_model_output) + + batch_logs = callbacks.make_logs(model, batch_logs, batch_outs, mode) + callbacks._call_batch_hook(mode, "end", current_step, batch_logs) + if verbose == 1: + progbar.update(current_step + 1) + current_step += 1 + + if verbose >= 1: + # Progress bar finishes at the end. + progbar.update(current_step) + + callbacks._call_end_hook(mode) + + scope.__exit__(None, None, None) + + if len(unconcatenated_outs) == 1: + prediction_result = np.concatenate(unconcatenated_outs[0], axis=0) else: - # We reduce all other metrics using mean for now. This is temporary - # workaround until new metrics are in place. - reduce_op = tf.distribute.ReduceOp.MEAN - output_tensors[label] = current_strategy.reduce(reduce_op, output, - axis=None) - test_op = tf.group(list(output_tensors.values())) - - if verbose >= 1: - progbar = Progbar(target=steps) - - if model._compile_distribution: - dist_utils._copy_weights_to_distributed_model(model, mode) - - dist_utils._reset_metrics(model) - - callbacks = cbks.configure_callbacks( - callbacks, - model, - do_validation=False, - epochs=1, - steps_per_epoch=steps, - verbose=verbose, - count_mode='steps', - mode=ModeKeys.TEST) - callbacks._call_begin_hook(mode) - - outs = [0.] * len(model.metrics_names) - if steps is not None: - target_steps = steps - else: - raise ValueError('Number of steps could not be inferred from the data, ' - 'please pass the steps argument.') - - current_step = 0 - while current_step < target_steps: - batch_logs = {'batch': current_step, 'size': 1} - callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs) - try: - _, batch_outs = backend.batch_get_value([test_op, output_tensors]) - except tf.errors.OutOfRangeError: - warning_msg = ( - 'Make sure that your dataset can generate at least ' - '`steps` batches (in this case, {} batches).'.format(steps)) - - logging.warning('Your dataset iterator ran out of data; ' - 'interrupting evaluation. ' + warning_msg) - target_steps = current_step - break - for i, label in enumerate(model.metrics_names): - if i == 0: - # Loss is stateless metrics. - outs[i] += batch_outs[label] - else: - # For all stateful metrics, the aggregation is handled by mirrored vars. - outs[i] = batch_outs[label] - - batch_logs = cbks.make_logs(model, batch_logs, outs, mode) - callbacks._call_batch_hook(mode, 'end', current_step, batch_logs) - if verbose == 1: - progbar.update(current_step + 1) - current_step += 1 - - if verbose >= 1: - # Progress bar finishes at the end. - progbar.update(target_steps) - callbacks._call_end_hook(mode) - - scope.__exit__(None, None, None) - if len(outs) >= 0: - outs[0] /= (target_steps) - - if len(outs) == 1: - return outs[0] - return outs - - -def experimental_tpu_predict_loop(model, - dataset, - verbose=0, - steps=None, - callbacks=None): - """Predict loop for predicting with TPU tf.distribute.Strategy. - - Args: - model: Keras Model instance. - dataset: Dataset for input data. - verbose: Integer, Verbosity mode 0 or 1. - steps: Total number of steps (batches of samples) - before declaring `_predict_loop` finished. - Ignored with the default value of `None`. - callbacks: List of callbacks to be called during training - - Returns: - Array of predictions (if the model has a single output) - or list of arrays of predictions - (if the model has multiple outputs). - """ - mode = ModeKeys.PREDICT - dataset_fully_shaped = dist_utils.is_dataset_shape_fully_defined(dataset) - padding_handler = None - if not dataset_fully_shaped: - # TODO(hongjunchoi): Investigate whether operations from - # PartialBatchPaddingHandler are unnecessarily pruned out - # during graph optimization. - padding_handler = padding_util.PartialBatchPaddingHandler( - model._feed_output_shapes) - batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes(dataset) - padding_handler.padded_batch_size = batch_size - padding_handler.padding_mask = dataset.reduce(padding_handler.padding_mask, - padding_handler.update_mask) - - dataset = dataset.map(padding_handler.pad_batch) - dataset = dataset.unbatch() - # Upon this point, it is guaranteed that the dataset does not - # have partial batches. Thus, we set `drop_remainder=True` to - # get static shape information about the elements in the dataset. - dataset = dataset.batch(batch_size, drop_remainder=True) - - if prefetch_buffer is not None: - dataset = dataset.prefetch(prefetch_buffer) - - current_strategy = model._distribution_strategy - iterator = dist_utils.get_iterator(dataset, current_strategy) - - scope = dist_utils.distributed_scope( - strategy=current_strategy, learning_phase=0) - scope.__enter__() - - def _predict_step_fn(inputs): - """A fn that returns output of single prediction step.""" - - (tf.distribute.get_replica_context().merge_call( - _build_model, args=(model, mode, inputs))) - - (_, outputs, updates, _) = _per_replica_execution_function( - dist_utils.get_distributed_model(model, mode), mode) - - with tf.control_dependencies([updates]): - return [tf.identity(out) for out in outputs] - - # TODO(hongjunchoi): When numpy array is passed as an input to `predict()` - # use numpy arrays directly to avoid cumulating unnecessary input pipeline - # ops. - predict_input_data = iterator.get_next() - per_replica_outputs = current_strategy.run( - _predict_step_fn, args=(predict_input_data,)) - output_tensors = dist_utils.flatten_per_replica_values( - current_strategy, per_replica_outputs) - - if verbose >= 1: - progbar = Progbar(target=steps) - - if model._compile_distribution: - dist_utils._copy_weights_to_distributed_model(model, mode) - - dist_utils._reset_metrics(model) - - callbacks = cbks.configure_callbacks( - callbacks, - model, - do_validation=False, - epochs=1, - steps_per_epoch=steps, - verbose=verbose, - count_mode='steps', - mode=mode) - callbacks._call_begin_hook(mode) - - # Since we do not know how many samples we will see, we cannot pre-allocate - # the returned Numpy arrays. Instead, we store one array per batch seen - # and concatenate them upon returning. - num_model_outputs = len(model.output_names) - unconcatenated_outs = [[] for _ in range(num_model_outputs)] - if steps is not None: - target_steps = steps - else: - raise ValueError('Number of steps could not be inferred from the data, ' - 'please pass the steps argument.') - - current_step = 0 - while current_step < target_steps: - batch_logs = {'batch': current_step, 'size': 1} - callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs) - try: - predict_ops = tf.group(output_tensors) - _, batch_outs = backend.batch_get_value([predict_ops, output_tensors]) - - except tf.errors.OutOfRangeError: - warning_msg = ( - 'Make sure that your dataset can generate at least ' - '`steps` batches (in this case, {} batches).'.format(steps)) - - logging.warning('Your dataset iterator ran out of data; ' - 'interrupting evaluation. ' + warning_msg) - break - - # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy. - for i in range(num_model_outputs): - output_start_index = i * current_strategy.num_replicas_in_sync - output_end_index = ( - output_start_index + current_strategy.num_replicas_in_sync) - single_model_output = batch_outs[output_start_index:output_end_index] - unconcatenated_outs[i].extend(single_model_output) - - batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode) - callbacks._call_batch_hook(mode, 'end', current_step, batch_logs) - if verbose == 1: - progbar.update(current_step + 1) - current_step += 1 - - if verbose >= 1: - # Progress bar finishes at the end. - progbar.update(current_step) - - callbacks._call_end_hook(mode) - - scope.__exit__(None, None, None) - - if len(unconcatenated_outs) == 1: - prediction_result = np.concatenate(unconcatenated_outs[0], axis=0) - else: - prediction_result = [ - np.concatenate(out, axis=0) for out in unconcatenated_outs - ] - - if padding_handler: - prediction_result = padding_handler.apply_mask(prediction_result) - - return prediction_result + prediction_result = [ + np.concatenate(out, axis=0) for out in unconcatenated_outs + ] + + if padding_handler: + prediction_result = padding_handler.apply_mask(prediction_result) + + return prediction_result class DistributionSingleWorkerTrainingLoop(training_utils_v1.TrainingLoop): - """Training loop for distribution strategy with single worker.""" - - def fit(self, - model, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - **kwargs): - """Fit loop for Distribution Strategies.""" - dist_utils.validate_callbacks(input_callbacks=callbacks, - optimizer=model.optimizer) - dist_utils.validate_inputs(x, y) - - batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size( - model._distribution_strategy, - x, - batch_size, - steps_per_epoch, - ModeKeys.TRAIN, - validation_split=validation_split) - batch_size = model._validate_or_infer_batch_size( - batch_size, steps_per_epoch, x) - dataset = model._distribution_standardize_user_data( - x, y, - sample_weight=sample_weight, - class_weight=class_weight, - batch_size=batch_size, - validation_split=validation_split, - shuffle=shuffle, - epochs=epochs) - if not dist_utils.is_distributing_by_cloning(model): - with model._distribution_strategy.scope(): - (dataset, _, _) = model._standardize_user_data( - dataset, + """Training loop for distribution strategy with single worker.""" + + def fit( + self, + model, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + **kwargs + ): + """Fit loop for Distribution Strategies.""" + dist_utils.validate_callbacks( + input_callbacks=callbacks, optimizer=model.optimizer + ) + dist_utils.validate_inputs(x, y) + + batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size( + model._distribution_strategy, + x, + batch_size, + steps_per_epoch, + ModeKeys.TRAIN, + validation_split=validation_split, + ) + batch_size = model._validate_or_infer_batch_size( + batch_size, steps_per_epoch, x + ) + dataset = model._distribution_standardize_user_data( + x, + y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size, validation_split=validation_split, - shuffle=shuffle) - - val_dataset = None - if validation_data: - val_x, val_y, val_sample_weights = ( - training_utils_v1.unpack_validation_data(validation_data)) - dist_utils.validate_inputs(val_x, val_y) - _, validation_steps = dist_utils.process_batch_and_step_size( - model._distribution_strategy, val_x, batch_size, validation_steps, - ModeKeys.TEST) - - val_dataset = model._distribution_standardize_user_data( - val_x, val_y, - sample_weight=val_sample_weights, - class_weight=None, - batch_size=batch_size, - validation_split=validation_split, - shuffle=shuffle, - allow_partial_batch=True) - elif validation_split: - raise ValueError('validation_split argument is not supported with ' - 'distribution strategies.') - - if backend.is_tpu_strategy(model._distribution_strategy): - steps_per_epoch = training_utils_v1.infer_steps_for_dataset( - model, dataset, steps_per_epoch, epochs, steps_name='steps_per_epoch') - if steps_per_epoch is None: - raise ValueError('Number of steps could not be inferred from the data, ' - 'please pass the steps_per_epoch argument.') - - if not tf.executing_eagerly(): - # Run TPU training in a custom loop in graph mode. - return experimental_tpu_fit_loop( + shuffle=shuffle, + epochs=epochs, + ) + if not dist_utils.is_distributing_by_cloning(model): + with model._distribution_strategy.scope(): + (dataset, _, _) = model._standardize_user_data( + dataset, + sample_weight=sample_weight, + class_weight=class_weight, + batch_size=batch_size, + validation_split=validation_split, + shuffle=shuffle, + ) + + val_dataset = None + if validation_data: + ( + val_x, + val_y, + val_sample_weights, + ) = training_utils_v1.unpack_validation_data(validation_data) + dist_utils.validate_inputs(val_x, val_y) + _, validation_steps = dist_utils.process_batch_and_step_size( + model._distribution_strategy, + val_x, + batch_size, + validation_steps, + ModeKeys.TEST, + ) + + val_dataset = model._distribution_standardize_user_data( + val_x, + val_y, + sample_weight=val_sample_weights, + class_weight=None, + batch_size=batch_size, + validation_split=validation_split, + shuffle=shuffle, + allow_partial_batch=True, + ) + elif validation_split: + raise ValueError( + "validation_split argument is not supported with " + "distribution strategies." + ) + + if backend.is_tpu_strategy(model._distribution_strategy): + steps_per_epoch = training_utils_v1.infer_steps_for_dataset( + model, + dataset, + steps_per_epoch, + epochs, + steps_name="steps_per_epoch", + ) + if steps_per_epoch is None: + raise ValueError( + "Number of steps could not be inferred from the data, " + "please pass the steps_per_epoch argument." + ) + + if not tf.executing_eagerly(): + # Run TPU training in a custom loop in graph mode. + return experimental_tpu_fit_loop( + model, + dataset, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + val_dataset=val_dataset, + initial_epoch=initial_epoch, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps, + validation_freq=validation_freq, + ) + + return training_arrays_v1.fit_loop( model, dataset, + batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, - val_dataset=val_dataset, + val_inputs=val_dataset, + shuffle=shuffle, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, - validation_freq=validation_freq) + validation_freq=validation_freq, + steps_name="steps_per_epoch", + ) - return training_arrays_v1.fit_loop( + def evaluate( + self, model, - dataset, - batch_size=batch_size, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - val_inputs=val_dataset, - shuffle=shuffle, - initial_epoch=initial_epoch, - steps_per_epoch=steps_per_epoch, - validation_steps=validation_steps, - validation_freq=validation_freq, - steps_name='steps_per_epoch') - - def evaluate(self, - model, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - **kwargs): - """Evaluate loop for Distribution Strategies.""" - dist_utils.validate_inputs(x, y) - batch_size, steps = dist_utils.process_batch_and_step_size( - model._distribution_strategy, x, batch_size, steps, ModeKeys.TEST) - batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) - dataset = model._distribution_standardize_user_data( - x, y, - sample_weight=sample_weight, - batch_size=batch_size, - allow_partial_batch=True) - - if backend.is_tpu_strategy(model._distribution_strategy): - steps = training_utils_v1.infer_steps_for_dataset( - model, dataset, steps, steps_name='steps') - if steps is None: - raise ValueError('Number of steps could not be inferred from the data, ' - 'please pass the steps argument.') - - if not tf.executing_eagerly(): - # Run TPU evaluation in a custom loop in graph mode. - return experimental_tpu_test_loop( - model, dataset, verbose=verbose, steps=steps, callbacks=callbacks) - - return training_arrays_v1.test_loop( + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + **kwargs + ): + """Evaluate loop for Distribution Strategies.""" + dist_utils.validate_inputs(x, y) + batch_size, steps = dist_utils.process_batch_and_step_size( + model._distribution_strategy, x, batch_size, steps, ModeKeys.TEST + ) + batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) + dataset = model._distribution_standardize_user_data( + x, + y, + sample_weight=sample_weight, + batch_size=batch_size, + allow_partial_batch=True, + ) + + if backend.is_tpu_strategy(model._distribution_strategy): + steps = training_utils_v1.infer_steps_for_dataset( + model, dataset, steps, steps_name="steps" + ) + if steps is None: + raise ValueError( + "Number of steps could not be inferred from the data, " + "please pass the steps argument." + ) + + if not tf.executing_eagerly(): + # Run TPU evaluation in a custom loop in graph mode. + return experimental_tpu_test_loop( + model, + dataset, + verbose=verbose, + steps=steps, + callbacks=callbacks, + ) + + return training_arrays_v1.test_loop( + model, + inputs=dataset, + batch_size=batch_size, + verbose=verbose, + steps=steps, + callbacks=callbacks, + ) + + def predict( + self, model, - inputs=dataset, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks) - - def predict(self, - model, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - **kwargs): - """Predict loop for Distribution Strategies.""" - dist_utils.validate_inputs(x=x, y=None) - batch_size, steps = dist_utils.process_batch_and_step_size( - model._distribution_strategy, x, batch_size, steps, ModeKeys.PREDICT) - batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) - dataset = model._distribution_standardize_user_data( x, - batch_size=batch_size, - allow_partial_batch=True) - if backend.is_tpu_strategy(model._distribution_strategy): - steps = training_utils_v1.infer_steps_for_dataset( - model, dataset, steps, steps_name='steps') - if steps is None: - raise ValueError('Number of steps could not be inferred from the data, ' - 'please pass the steps argument.') - if not tf.executing_eagerly(): - return experimental_tpu_predict_loop( - model, dataset, verbose=verbose, steps=steps, callbacks=callbacks) - return training_arrays_v1.predict_loop( - model, - dataset, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks) + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + **kwargs + ): + """Predict loop for Distribution Strategies.""" + dist_utils.validate_inputs(x=x, y=None) + batch_size, steps = dist_utils.process_batch_and_step_size( + model._distribution_strategy, x, batch_size, steps, ModeKeys.PREDICT + ) + batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) + dataset = model._distribution_standardize_user_data( + x, batch_size=batch_size, allow_partial_batch=True + ) + if backend.is_tpu_strategy(model._distribution_strategy): + steps = training_utils_v1.infer_steps_for_dataset( + model, dataset, steps, steps_name="steps" + ) + if steps is None: + raise ValueError( + "Number of steps could not be inferred from the data, " + "please pass the steps argument." + ) + if not tf.executing_eagerly(): + return experimental_tpu_predict_loop( + model, + dataset, + verbose=verbose, + steps=steps, + callbacks=callbacks, + ) + return training_arrays_v1.predict_loop( + model, + dataset, + batch_size=batch_size, + verbose=verbose, + steps=steps, + callbacks=callbacks, + ) def _train_with_multi_worker(method): - """Decorator that handles multi worker training with distribution strategy.""" + """Decorator handles multi worker training with distribution strategy.""" - def wrapper(model, **kwargs): - def _worker_fn(_): - callbacks = kwargs.pop('callbacks', None) - filtered_callbacks = dist_utils.filter_distributed_callbacks( - callbacks, model) - kwargs['callbacks'] = filtered_callbacks - return method(model, **kwargs) + def wrapper(model, **kwargs): + def _worker_fn(_): + callbacks = kwargs.pop("callbacks", None) + filtered_callbacks = dist_utils.filter_distributed_callbacks( + callbacks, model + ) + kwargs["callbacks"] = filtered_callbacks + return method(model, **kwargs) - return dc.run_distribute_coordinator( - _worker_fn, - model._distribution_strategy) + return dc.run_distribute_coordinator( + _worker_fn, model._distribution_strategy + ) - return wrapper + return wrapper class DistributionMultiWorkerTrainingLoop(training_utils_v1.TrainingLoop): - """Training loop for distribution strategy with multiple worker.""" + """Training loop for distribution strategy with multiple worker.""" - def __init__(self, single_worker_loop): - self._single_worker_loop = single_worker_loop + def __init__(self, single_worker_loop): + self._single_worker_loop = single_worker_loop - def fit(self, *args, **kwargs): - return _train_with_multi_worker(self._single_worker_loop.fit)( - *args, **kwargs) + def fit(self, *args, **kwargs): + return _train_with_multi_worker(self._single_worker_loop.fit)( + *args, **kwargs + ) - def evaluate(self, *args, **kwargs): - return _train_with_multi_worker(self._single_worker_loop.evaluate)( - *args, **kwargs) + def evaluate(self, *args, **kwargs): + return _train_with_multi_worker(self._single_worker_loop.evaluate)( + *args, **kwargs + ) - def predict(self, *args, **kwargs): - # Currently predict is still using the single worker implementation. - return self._single_worker_loop.predict(*args, **kwargs) + def predict(self, *args, **kwargs): + # Currently predict is still using the single worker implementation. + return self._single_worker_loop.predict(*args, **kwargs) diff --git a/keras/engine/training_eager_test.py b/keras/engine/training_eager_test.py index 0b4ecd42c91d..317ca1f790dc 100644 --- a/keras/engine/training_eager_test.py +++ b/keras/engine/training_eager_test.py @@ -14,341 +14,404 @@ # ============================================================================== """Tests for training routines.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras -from keras.testing_infra import test_combinations from keras import metrics as metrics_module +from keras.optimizers.legacy import rmsprop +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.optimizers.optimizer_v2 import rmsprop class TrainingTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_dynamic_model_has_trainable_weights(self): - if not tf.executing_eagerly(): - # Only test Eager modes, as Graph mode is not relevant for dynamic models. - return - - class DynamicModel(keras.Model): - - def __init__(self): - super().__init__(dynamic=True) - self.dense = keras.layers.Dense( - 1, kernel_initializer='zeros', bias_initializer='ones') - - def call(self, inputs): - return self.dense(inputs) - - model = DynamicModel() - model.compile( - 'rmsprop', 'mae', - run_eagerly=True) - hist = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) - self.assertEqual(hist.history['loss'][-1], 1) - self.assertEqual(len(model.trainable_weights), 2) - loss = model.train_on_batch(np.zeros((1, 1)), np.zeros((1, 1))) - # The loss must have been updated if the trainable weights are taken into - # account during tracking. - self.assertLess(loss, 1) - - @test_combinations.run_with_all_model_types(exclude_models='sequential') - @test_combinations.run_all_keras_modes - def test_model_methods_with_eager_tensors_multi_io(self): - if not tf.executing_eagerly(): - # Only test V2 Function and V2 Eager modes, as V1 Graph mode with - # symbolic tensors has different requirements. - return - - input_a = keras.layers.Input(shape=(3,), name='input_a') - input_b = keras.layers.Input(shape=(3,), name='input_b') - - dense = keras.layers.Dense(4, name='dense') - dropout = keras.layers.Dropout(0.5, name='dropout') - - model = test_utils.get_multi_io_model( - [input_a, dense], [input_b, dense, dropout]) - - optimizer = rmsprop.RMSprop(learning_rate=0.001) - loss = 'mse' - loss_weights = [1., 0.5] - metrics = ['mae', metrics_module.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics, - loss_weights=loss_weights, - run_eagerly=test_utils.should_run_eagerly(), - sample_weight_mode=None) - - input_a = tf.zeros(shape=(10, 3)) - input_b = tf.zeros(shape=(10, 3)) - target_a = tf.zeros(shape=(10, 4)) - target_b = tf.zeros(shape=(10, 4)) - - model.fit( - [input_a, input_b], [target_a, target_b], - epochs=1, - batch_size=5, - verbose=0) - # Test: no shuffle. - model.fit( - [input_a, input_b], [target_a, target_b], - epochs=1, - batch_size=5, - verbose=0, - shuffle=False) - # Test: validation data. - model.fit([input_a, input_b], [target_a, target_b], - epochs=1, batch_size=2, verbose=0, - validation_data=([input_a, input_b], [target_a, target_b])) - model.train_on_batch([input_a, input_b], [target_a, target_b]) - model.predict([input_a, input_b], batch_size=5) - model.evaluate([input_a, input_b], [target_a, target_b], - batch_size=2, verbose=0) - model.test_on_batch([input_a, input_b], [target_a, target_b]) - - # Test: mix np and tensors. - input_b = np.zeros(shape=(10, 3)).astype('float32') - target_b = np.zeros(shape=(10, 4)).astype('float32') - model.fit( - [input_a, input_b], [target_a, target_b], - epochs=1, - batch_size=5, - verbose=0) - model.fit([input_a, input_b], [target_a, target_b], - epochs=1, batch_size=2, verbose=0, - validation_data=([input_a, input_b], [target_a, target_b])) - model.fit( - [input_a, input_b], [target_a, target_b], - epochs=1, - batch_size=5, - verbose=0, - shuffle=False) - model.train_on_batch([input_a, input_b], [target_a, target_b]) - model.predict([input_a, input_b], batch_size=5) - model.evaluate([input_a, input_b], [target_a, target_b], - batch_size=2, verbose=0) - model.test_on_batch([input_a, input_b], [target_a, target_b]) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_model_methods_with_eager_tensors_single_io(self): - if not tf.executing_eagerly(): - # Only test V2 Function and V2 Eager modes, as V1 Graph mode with - # symbolic tensors has different requirements. - return - - model = test_utils.get_small_mlp(10, 4, 3) - - optimizer = rmsprop.RMSprop(learning_rate=0.001) - loss = 'mse' - metrics = ['mae', metrics_module.CategoricalAccuracy()] - model.compile( - optimizer, - loss, - metrics=metrics, - run_eagerly=test_utils.should_run_eagerly()) - - inputs = tf.zeros(shape=(10, 3)) - targets = tf.zeros(shape=(10, 4)) - - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0) - model.fit(inputs, targets, epochs=1, batch_size=3, verbose=0, shuffle=False) - model.fit(inputs, targets, epochs=1, batch_size=4, verbose=0, - validation_data=(inputs, targets)) - model.evaluate(inputs, targets, batch_size=2, verbose=0) - model.predict(inputs, batch_size=2) - model.train_on_batch(inputs, targets) - model.test_on_batch(inputs, targets) - - @test_combinations.run_with_all_model_types - def test_model_fit_and_validation_with_missing_arg_errors(self): - model = test_utils.get_small_mlp(10, 4, 3) - model.compile(optimizer=rmsprop.RMSprop(learning_rate=0.001), - loss='mse', - run_eagerly=True) - - x = tf.zeros(shape=(10, 3)) - y = tf.zeros(shape=(10, 4)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5) - validation_dataset = tf.data.Dataset.from_tensor_slices( - (x, y)).repeat().batch(5) # Infinite dataset. - - model.fit(dataset, epochs=1, verbose=0) - - # Step argument is required for infinite datasets. - with self.assertRaises(ValueError): - model.fit(dataset, steps_per_epoch=2, epochs=1, verbose=0, - validation_data=validation_dataset) - with self.assertRaises(ValueError): - model.fit(dataset, steps_per_epoch=2, epochs=1, verbose=0, - validation_data=validation_dataset) - - # TODO(b/120931266): Enable test on subclassed models after bug causing an - # extra dimension to be added to predict outputs is fixed. - @test_combinations.run_with_all_model_types(exclude_models='subclass') - def test_generator_methods(self): - model = test_utils.get_small_mlp(10, 4, 3) - optimizer = rmsprop.RMSprop(learning_rate=0.001) - model.compile( - optimizer, - loss='mse', - metrics=['mae', metrics_module.CategoricalAccuracy()], - run_eagerly=True) - - x = np.random.random((10, 3)) - y = np.random.random((10, 4)) - - def numpy_iterator(): - while True: - yield x, y - - model.fit_generator(numpy_iterator(), steps_per_epoch=3, epochs=1) - model.evaluate_generator(numpy_iterator(), steps=3) - - def inference_numpy_iterator(): - while True: - yield x - - out = model.predict_generator(inference_numpy_iterator(), steps=3) - self.assertEqual(out.shape, (30, 4)) + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_dynamic_model_has_trainable_weights(self): + if not tf.executing_eagerly(): + # Only test Eager modes, as Graph mode is not relevant for dynamic + # models. + return + + class DynamicModel(keras.Model): + def __init__(self): + super().__init__(dynamic=True) + self.dense = keras.layers.Dense( + 1, kernel_initializer="zeros", bias_initializer="ones" + ) + + def call(self, inputs): + return self.dense(inputs) + + model = DynamicModel() + model.compile("rmsprop", "mae", run_eagerly=True) + hist = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(hist.history["loss"][-1], 1) + self.assertEqual(len(model.trainable_weights), 2) + loss = model.train_on_batch(np.zeros((1, 1)), np.zeros((1, 1))) + # The loss must have been updated if the trainable weights are taken + # into account during tracking. + self.assertLess(loss, 1) + + @test_combinations.run_with_all_model_types(exclude_models="sequential") + @test_combinations.run_all_keras_modes + def test_model_methods_with_eager_tensors_multi_io(self): + if not tf.executing_eagerly(): + # Only test V2 Function and V2 Eager modes, as V1 Graph mode with + # symbolic tensors has different requirements. + return + + input_a = keras.layers.Input(shape=(3,), name="input_a") + input_b = keras.layers.Input(shape=(3,), name="input_b") + + dense = keras.layers.Dense(4, name="dense") + dropout = keras.layers.Dropout(0.5, name="dropout") + + model = test_utils.get_multi_io_model( + [input_a, dense], [input_b, dense, dropout] + ) + + optimizer = rmsprop.RMSprop(learning_rate=0.001) + loss = "mse" + loss_weights = [1.0, 0.5] + metrics = ["mae", metrics_module.CategoricalAccuracy()] + model.compile( + optimizer, + loss, + metrics=metrics, + loss_weights=loss_weights, + run_eagerly=test_utils.should_run_eagerly(), + sample_weight_mode=None, + ) + + input_a = tf.zeros(shape=(10, 3)) + input_b = tf.zeros(shape=(10, 3)) + target_a = tf.zeros(shape=(10, 4)) + target_b = tf.zeros(shape=(10, 4)) + + model.fit( + [input_a, input_b], + [target_a, target_b], + epochs=1, + batch_size=5, + verbose=0, + ) + # Test: no shuffle. + model.fit( + [input_a, input_b], + [target_a, target_b], + epochs=1, + batch_size=5, + verbose=0, + shuffle=False, + ) + # Test: validation data. + model.fit( + [input_a, input_b], + [target_a, target_b], + epochs=1, + batch_size=2, + verbose=0, + validation_data=([input_a, input_b], [target_a, target_b]), + ) + model.train_on_batch([input_a, input_b], [target_a, target_b]) + model.predict([input_a, input_b], batch_size=5) + model.evaluate( + [input_a, input_b], [target_a, target_b], batch_size=2, verbose=0 + ) + model.test_on_batch([input_a, input_b], [target_a, target_b]) + + # Test: mix np and tensors. + input_b = np.zeros(shape=(10, 3)).astype("float32") + target_b = np.zeros(shape=(10, 4)).astype("float32") + model.fit( + [input_a, input_b], + [target_a, target_b], + epochs=1, + batch_size=5, + verbose=0, + ) + model.fit( + [input_a, input_b], + [target_a, target_b], + epochs=1, + batch_size=2, + verbose=0, + validation_data=([input_a, input_b], [target_a, target_b]), + ) + model.fit( + [input_a, input_b], + [target_a, target_b], + epochs=1, + batch_size=5, + verbose=0, + shuffle=False, + ) + model.train_on_batch([input_a, input_b], [target_a, target_b]) + model.predict([input_a, input_b], batch_size=5) + model.evaluate( + [input_a, input_b], [target_a, target_b], batch_size=2, verbose=0 + ) + model.test_on_batch([input_a, input_b], [target_a, target_b]) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_model_methods_with_eager_tensors_single_io(self): + if not tf.executing_eagerly(): + # Only test V2 Function and V2 Eager modes, as V1 Graph mode with + # symbolic tensors has different requirements. + return + + model = test_utils.get_small_mlp(10, 4, 3) + + optimizer = rmsprop.RMSprop(learning_rate=0.001) + loss = "mse" + metrics = ["mae", metrics_module.CategoricalAccuracy()] + model.compile( + optimizer, + loss, + metrics=metrics, + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = tf.zeros(shape=(10, 3)) + targets = tf.zeros(shape=(10, 4)) + + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0) + model.fit( + inputs, targets, epochs=1, batch_size=3, verbose=0, shuffle=False + ) + model.fit( + inputs, + targets, + epochs=1, + batch_size=4, + verbose=0, + validation_data=(inputs, targets), + ) + model.evaluate(inputs, targets, batch_size=2, verbose=0) + model.predict(inputs, batch_size=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + + @test_combinations.run_with_all_model_types + def test_model_fit_and_validation_with_missing_arg_errors(self): + model = test_utils.get_small_mlp(10, 4, 3) + model.compile( + optimizer=rmsprop.RMSprop(learning_rate=0.001), + loss="mse", + run_eagerly=True, + ) + + x = tf.zeros(shape=(10, 3)) + y = tf.zeros(shape=(10, 4)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5) + validation_dataset = ( + tf.data.Dataset.from_tensor_slices((x, y)).repeat().batch(5) + ) # Infinite dataset. + + model.fit(dataset, epochs=1, verbose=0) + + # Step argument is required for infinite datasets. + with self.assertRaises(ValueError): + model.fit( + dataset, + steps_per_epoch=2, + epochs=1, + verbose=0, + validation_data=validation_dataset, + ) + with self.assertRaises(ValueError): + model.fit( + dataset, + steps_per_epoch=2, + epochs=1, + verbose=0, + validation_data=validation_dataset, + ) + + # TODO(b/120931266): Enable test on subclassed models after bug causing an + # extra dimension to be added to predict outputs is fixed. + @test_combinations.run_with_all_model_types(exclude_models="subclass") + def test_generator_methods(self): + model = test_utils.get_small_mlp(10, 4, 3) + optimizer = rmsprop.RMSprop(learning_rate=0.001) + model.compile( + optimizer, + loss="mse", + metrics=["mae", metrics_module.CategoricalAccuracy()], + run_eagerly=True, + ) + + x = np.random.random((10, 3)) + y = np.random.random((10, 4)) + + def numpy_iterator(): + while True: + yield x, y + + model.fit_generator(numpy_iterator(), steps_per_epoch=3, epochs=1) + model.evaluate_generator(numpy_iterator(), steps=3) + + def inference_numpy_iterator(): + while True: + yield x + + out = model.predict_generator(inference_numpy_iterator(), steps=3) + self.assertEqual(out.shape, (30, 4)) class CorrectnessTest(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('', dict()), - ('_clipvalue_inf', {'clipvalue': 999999}), - ('_clipnorm_inf', {'clipnorm': 999999}), - ]) - def test_loss_correctness(self, optimizer_kwargs): - # Test that training loss is the same in eager and graph - # (by comparing it to a reference value in a deterministic case) - layers = [ - keras.layers.Dense(3, activation='relu', - kernel_initializer='ones'), - keras.layers.Dense(2, activation='softmax', kernel_initializer='ones')] - model = test_utils.get_model_from_layers(layers, input_shape=(4,)) - model.compile( - loss='sparse_categorical_crossentropy', - optimizer=rmsprop.RMSprop(learning_rate=0.001, **optimizer_kwargs), - run_eagerly=test_utils.should_run_eagerly()) - x = np.ones((100, 4)) - np.random.seed(123) - y = np.random.randint(0, 1, size=(100, 1)) - history = model.fit(x, y, epochs=1, batch_size=10) - self.assertAlmostEqual(history.history['loss'][-1], 0.5836, 4) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_loss_correctness_clipvalue_zero(self): - # Test that training loss is the same in eager and graph - # (by comparing it to a reference value in a deterministic case) - # And confirm that setting clipvalue to zero stops all training - layers = [ - keras.layers.Dense(3, activation='relu', - kernel_initializer='ones'), - keras.layers.Dense(2, activation='softmax', kernel_initializer='ones')] - model = test_utils.get_model_from_layers(layers, input_shape=(4,)) - model.compile( - loss='sparse_categorical_crossentropy', - optimizer=rmsprop.RMSprop(learning_rate=0.001, clipvalue=0.0), - run_eagerly=test_utils.should_run_eagerly()) - x = np.ones((100, 4)) - np.random.seed(123) - y = np.random.randint(0, 1, size=(100, 1)) - history = model.fit(x, y, epochs=3, batch_size=10) - self.assertAlmostEqual(history.history['loss'][-3], 0.6931, 4) - self.assertAlmostEqual(history.history['loss'][-2], 0.6931, 4) - self.assertAlmostEqual(history.history['loss'][-1], 0.6931, 4) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_loss_correctness_with_iterator(self): - # Test that training loss is the same in eager and graph - # (by comparing it to a reference value in a deterministic case) - layers = [ - keras.layers.Dense(3, activation='relu', - kernel_initializer='ones'), - keras.layers.Dense(2, activation='softmax', kernel_initializer='ones')] - model = test_utils.get_model_from_layers(layers, input_shape=(4,)) - model.compile( - loss='sparse_categorical_crossentropy', - optimizer=rmsprop.RMSprop(learning_rate=0.001), - run_eagerly=test_utils.should_run_eagerly()) - x = np.ones((100, 4), dtype=np.float32) - np.random.seed(123) - y = np.random.randint(0, 1, size=(100, 1)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - history = model.fit(dataset, epochs=1, steps_per_epoch=10) - self.assertAlmostEqual(history.history['loss'][-1], 0.5836, 4) - - @parameterized.named_parameters([ - ('_None', None, 0., 4.), - ('_False', False, 4., 4.), - ('_True', True, 0., 0.), - ]) - def test_nested_model_learning_phase(self, training, - expected_training_loss, - expected_validation_loss): - """Tests that learning phase is correctly set in an intermediate layer.""" - - def _make_unregularized_model(): - inputs = keras.Input((4,)) - # Zero out activations when `training=True`. - x = keras.layers.Dropout(1. - 1. / (1 << 24))(inputs) - x = keras.layers.Dense( - 10, - activation='relu', - trainable=False, - bias_initializer='zeros', - kernel_initializer='ones')( - x) # Just sum together all the activations. - outputs = keras.layers.Dense(3)(x) - return keras.Model(inputs, outputs) - - def _regularize_model(unregularized_model): - # Regularize the most recent activations of a post-dropout layer. - sample_activations = unregularized_model.get_layer( - index=-2).get_output_at(-1) - regularization_loss = keras.backend.mean(sample_activations) - unregularized_model.add_loss(regularization_loss) - unregularized_model.add_metric( - regularization_loss, aggregation='mean', name='regularization_loss') - inputs = keras.Input(unregularized_model.inputs[0].shape[1:]) - logits = unregularized_model(inputs, training=training) - outputs = keras.activations.softmax(logits) - model = keras.Model(inputs, outputs) - return model - - # Make and compile models. - model = _regularize_model(_make_unregularized_model()) - model.compile('sgd', 'sparse_categorical_crossentropy') - # Prepare fake data. - x = np.ones((20, 4)).astype(np.float32) - y = np.random.randint(0, 3, size=(20,)).astype(np.int64) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) - results = model.evaluate(dataset) - evaluation_results = dict(zip(model.metrics_names, results)) - # Rate of dropout depends on the learning phase. - self.assertEqual(evaluation_results['regularization_loss'], - expected_validation_loss) - history = model.fit(dataset, epochs=2, validation_data=dataset).history - self.assertAllEqual(history['regularization_loss'], - [expected_training_loss] * 2) - self.assertAllEqual(history['val_regularization_loss'], - [expected_validation_loss] * 2) - - -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("", dict()), + ("_clipvalue_inf", {"clipvalue": 999999}), + ("_clipnorm_inf", {"clipnorm": 999999}), + ] + ) + def test_loss_correctness(self, optimizer_kwargs): + # Test that training loss is the same in eager and graph + # (by comparing it to a reference value in a deterministic case) + layers = [ + keras.layers.Dense(3, activation="relu", kernel_initializer="ones"), + keras.layers.Dense( + 2, activation="softmax", kernel_initializer="ones" + ), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(4,)) + model.compile( + loss="sparse_categorical_crossentropy", + optimizer=rmsprop.RMSprop(learning_rate=0.001, **optimizer_kwargs), + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.ones((100, 4)) + np.random.seed(123) + y = np.random.randint(0, 1, size=(100, 1)) + history = model.fit(x, y, epochs=1, batch_size=10) + self.assertAlmostEqual(history.history["loss"][-1], 0.5836, 4) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_loss_correctness_clipvalue_zero(self): + # Test that training loss is the same in eager and graph + # (by comparing it to a reference value in a deterministic case) + # And confirm that setting clipvalue to zero stops all training + layers = [ + keras.layers.Dense(3, activation="relu", kernel_initializer="ones"), + keras.layers.Dense( + 2, activation="softmax", kernel_initializer="ones" + ), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(4,)) + model.compile( + loss="sparse_categorical_crossentropy", + optimizer=rmsprop.RMSprop(learning_rate=0.001, clipvalue=0.0), + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.ones((100, 4)) + np.random.seed(123) + y = np.random.randint(0, 1, size=(100, 1)) + history = model.fit(x, y, epochs=3, batch_size=10) + self.assertAlmostEqual(history.history["loss"][-3], 0.6931, 4) + self.assertAlmostEqual(history.history["loss"][-2], 0.6931, 4) + self.assertAlmostEqual(history.history["loss"][-1], 0.6931, 4) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_loss_correctness_with_iterator(self): + # Test that training loss is the same in eager and graph + # (by comparing it to a reference value in a deterministic case) + layers = [ + keras.layers.Dense(3, activation="relu", kernel_initializer="ones"), + keras.layers.Dense( + 2, activation="softmax", kernel_initializer="ones" + ), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(4,)) + model.compile( + loss="sparse_categorical_crossentropy", + optimizer=rmsprop.RMSprop(learning_rate=0.001), + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.ones((100, 4), dtype=np.float32) + np.random.seed(123) + y = np.random.randint(0, 1, size=(100, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + history = model.fit(dataset, epochs=1, steps_per_epoch=10) + self.assertAlmostEqual(history.history["loss"][-1], 0.5836, 4) + + @parameterized.named_parameters( + [ + ("_None", None, 0.0, 4.0), + ("_False", False, 4.0, 4.0), + ("_True", True, 0.0, 0.0), + ] + ) + def test_nested_model_learning_phase( + self, training, expected_training_loss, expected_validation_loss + ): + """Tests learning phase is correctly set in an intermediate layer.""" + + def _make_unregularized_model(): + inputs = keras.Input((4,)) + # Zero out activations when `training=True`. + x = keras.layers.Dropout(1.0 - 1.0 / (1 << 24))(inputs) + x = keras.layers.Dense( + 10, + activation="relu", + trainable=False, + bias_initializer="zeros", + kernel_initializer="ones", + )( + x + ) # Just sum together all the activations. + outputs = keras.layers.Dense(3)(x) + return keras.Model(inputs, outputs) + + def _regularize_model(unregularized_model): + # Regularize the most recent activations of a post-dropout layer. + sample_activations = unregularized_model.get_layer( + index=-2 + ).get_output_at(-1) + regularization_loss = keras.backend.mean(sample_activations) + unregularized_model.add_loss(regularization_loss) + unregularized_model.add_metric( + regularization_loss, + aggregation="mean", + name="regularization_loss", + ) + inputs = keras.Input(unregularized_model.inputs[0].shape[1:]) + logits = unregularized_model(inputs, training=training) + outputs = keras.activations.softmax(logits) + model = keras.Model(inputs, outputs) + return model + + # Make and compile models. + model = _regularize_model(_make_unregularized_model()) + model.compile("sgd", "sparse_categorical_crossentropy") + # Prepare fake data. + x = np.ones((20, 4)).astype(np.float32) + y = np.random.randint(0, 3, size=(20,)).astype(np.int64) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + results = model.evaluate(dataset) + evaluation_results = dict(zip(model.metrics_names, results)) + # Rate of dropout depends on the learning phase. + self.assertEqual( + evaluation_results["regularization_loss"], expected_validation_loss + ) + history = model.fit(dataset, epochs=2, validation_data=dataset).history + self.assertAllEqual( + history["regularization_loss"], [expected_training_loss] * 2 + ) + self.assertAllEqual( + history["val_regularization_loss"], [expected_validation_loss] * 2 + ) + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/engine/training_eager_v1.py b/keras/engine/training_eager_v1.py index 8d02110610c7..427b816f8478 100644 --- a/keras/engine/training_eager_v1.py +++ b/keras/engine/training_eager_v1.py @@ -14,351 +14,392 @@ # ============================================================================== """Keras training and evaluation routines for eager execution.""" -import tensorflow.compat.v2 as tf -# pylint: disable=protected-access - import numpy as np +import tensorflow.compat.v2 as tf -from tensorflow.python.eager.backprop import GradientTape from keras import backend from keras.engine import training_utils from keras.engine import training_utils_v1 from keras.mixed_precision import loss_scale_optimizer from keras.utils import losses_utils + +# isort: off +from tensorflow.python.eager.backprop import GradientTape from tensorflow.python.platform import tf_logging as logging def _eager_loss_fn(outputs, targets, loss_fn, output_name): - with backend.name_scope(output_name + '_loss'): - loss = loss_fn(targets, outputs) - return loss + with backend.name_scope(output_name + "_loss"): + loss = loss_fn(targets, outputs) + return loss def _eager_metrics_fn(model, outputs, targets, sample_weights=None, masks=None): - """Calculates the metrics for each output of the given model. - - Args: - model: The model on which metrics are being calculated. - outputs: The outputs of the given model. - targets: The predictions or targets of the given model. - sample_weights: Optional list of sample weights for each output. - masks: Optional list of masks for each output. - - Returns: - Returns the metric results for each output of the model. - """ - outputs = tf.nest.flatten(outputs) - targets = tf.nest.flatten(targets) - # Invoke all(weighted and unweighted) metrics. - metric_results = [] - if targets: - # Insert None values corresponding to the targets that need to be skipped - # on the model. - if len(model._targets) != len(targets): - new_targets = [ - None if t is None else targets.pop(0) for t in model._targets - ] - targets = new_targets - - metric_results = model._handle_metrics( - outputs, - targets=targets, - sample_weights=sample_weights, - masks=masks, - return_weighted_and_unweighted_metrics=True, - skip_target_masks=model._prepare_skip_target_masks()) - - # Add metric results from the `add_metric` metrics. - metric_results.extend([ - m.result() - for m in model.metrics - if m not in model._compile_metric_functions - ]) - return metric_results + """Calculates the metrics for each output of the given model. + + Args: + model: The model on which metrics are being calculated. + outputs: The outputs of the given model. + targets: The predictions or targets of the given model. + sample_weights: Optional list of sample weights for each output. + masks: Optional list of masks for each output. + + Returns: + Returns the metric results for each output of the model. + """ + outputs = tf.nest.flatten(outputs) + targets = tf.nest.flatten(targets) + # Invoke all(weighted and unweighted) metrics. + metric_results = [] + if targets: + # Insert None values corresponding to the targets that need to be + # skipped on the model. + if len(model._targets) != len(targets): + new_targets = [ + None if t is None else targets.pop(0) for t in model._targets + ] + targets = new_targets + + metric_results = model._handle_metrics( + outputs, + targets=targets, + sample_weights=sample_weights, + masks=masks, + return_weighted_and_unweighted_metrics=True, + skip_target_masks=model._prepare_skip_target_masks(), + ) + + # Add metric results from the `add_metric` metrics. + metric_results.extend( + [ + m.result() + for m in model.metrics + if m not in model._compile_metric_functions + ] + ) + return metric_results + + +def _model_loss( + model, + inputs, + targets, + output_loss_metrics=None, + sample_weights=None, + training=False, +): + """Calculates the loss for a given model. + + Args: + model: The model on which metrics are being calculated. + inputs: Either a dictionary of inputs to the model or a list of input + arrays. + targets: List of target arrays. + output_loss_metrics: List of metrics that are used to aggregated output + loss values. + sample_weights: Optional list of sample weight arrays. + training: Whether the model should be run in inference or training mode. + + Returns: + Returns the model output, total loss, loss value calculated using the + specified loss function and masks for each output. The total loss + includes regularization losses and applies masking and sample weighting + to the loss value. + """ + # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn. + # Used to keep track of the total loss value (stateless). + # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + + # loss_weight_2 * output_2_loss_fn(...) + + # layer losses. + total_loss = 0 + kwargs = {} + if model._expects_training_arg: + kwargs["training"] = training + if len(inputs) == 1 and not isinstance(inputs, dict): + inputs = inputs[0] + + # Allow mixed `NumPy` and `EagerTensor` input here. + if any( + isinstance(input_t, (np.ndarray, float, int)) + for input_t in tf.nest.flatten(inputs) + ): + inputs = tf.nest.map_structure(tf.convert_to_tensor, inputs) + + outs = model(inputs, **kwargs) + outs = tf.nest.flatten(outs) + + if targets: + targets = training_utils_v1.cast_if_floating_dtype_and_mismatch( + targets, outs + ) + # TODO(sallymatson/psv): check if we should do same mismatch fix for weights + if sample_weights: + sample_weights = [ + training_utils_v1.cast_if_floating_dtype(tf.convert_to_tensor(val)) + if val is not None + else None + for val in sample_weights + ] + + masks = [getattr(t, "_keras_mask", None) for t in outs] + targets = tf.nest.flatten(targets) + + # Used to keep track of individual output losses. + output_losses = [] + + with backend.name_scope("loss"): + loss_fns = [ + loss_fn for loss_fn in model.loss_functions if loss_fn is not None + ] + custom_losses = model.losses # Regularization losses + + if not loss_fns and not custom_losses: + if training: + raise ValueError( + "The model cannot be trained " + "because it has no loss to optimize." + ) + else: + raise ValueError( + "The model cannot be evaluated " + "because it has no loss to compute." + ) + + for i, loss_fn in enumerate(loss_fns): + weights = sample_weights[i] if sample_weights else None + mask = masks[i] + with backend.name_scope(model.output_names[i] + "_loss"): + if mask is not None: + mask = tf.cast(mask, outs[i].dtype) + # Update weights with mask. + if weights is None: + weights = mask + else: + # Update dimensions of weights to match with mask if + # possible. + weights = tf.cast(weights, outs[i].dtype) + ( + mask, + _, + weights, + ) = losses_utils.squeeze_or_expand_dimensions( + mask, sample_weight=weights + ) + weights *= mask + + if hasattr(loss_fn, "reduction"): + per_sample_losses = loss_fn.call(targets[i], outs[i]) + weighted_losses = losses_utils.compute_weighted_loss( + per_sample_losses, + sample_weight=weights, + reduction=losses_utils.ReductionV2.NONE, + ) + loss_reduction = loss_fn.reduction + + # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` + # for all compile use cases. + if loss_reduction == losses_utils.ReductionV2.AUTO: + loss_reduction = ( + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + ) + + # Compute the stateless loss value. + output_loss = losses_utils.reduce_weighted_loss( + weighted_losses, reduction=loss_reduction + ) + else: + # Compute the stateless loss value for a custom loss class. + # Here we assume that the class takes care of loss reduction + # because if this class returns a vector value we cannot + # differentiate between use case where a custom optimizer + # expects a vector loss value vs unreduced per-sample loss + # value. + output_loss = loss_fn( + targets[i], outs[i], sample_weight=weights + ) + loss_reduction = ( + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + ) + + # If the number of outputs is 1 then we don't append the loss metric + # associated with each model output. When there are multiple outputs + # associated with a model, each output's loss is calculated and + # returned as part of the loss_metrics. + if len(model.outputs) > 1: + # Keep track of the stateful output loss result. + output_losses.append(output_loss_metrics[i](output_loss)) + + # Scale output loss for distribution. For custom losses we assume + # reduction was mean. + if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: + output_loss = losses_utils.scale_loss_for_distribution( + output_loss + ) + total_loss += model._loss_weights_list[i] * output_loss + + # Add regularization losses + if custom_losses: + total_loss += losses_utils.scale_loss_for_distribution( + tf.add_n(custom_losses) + ) + return outs, total_loss, output_losses, masks -def _model_loss(model, +def _process_single_batch( + model, + inputs, + targets, + output_loss_metrics=None, + sample_weights=None, + training=False, +): + """Calculate the loss and gradient for one input batch. + + The model weights are updated if training is set to True. + + Args: + model: Model whose loss has to be calculated. + inputs: List of input arrays. + targets: List of target arrays. + output_loss_metrics: List of metrics that are used to aggregated output + loss values. + sample_weights: Optional list of sample weight arrays. + training: The boolean represents if the weights of the model are + updated. 'fit' methods will set this to True while 'evaluate' methods + will set this to False. + + Returns: + output of the model, total loss, the loss and the mask + associated with each output. + + Raises: + ValueError: If the model has no loss to optimize. + """ + with backend.eager_learning_phase_scope( + 1 if training else 0 + ), training_utils.RespectCompiledTrainableState(model): + with GradientTape() as tape: + outs, total_loss, output_losses, masks = _model_loss( + model, inputs, targets, - output_loss_metrics=None, - sample_weights=None, - training=False): - """Calculates the loss for a given model. - - Args: - model: The model on which metrics are being calculated. - inputs: Either a dictionary of inputs to the model or a list of input - arrays. - targets: List of target arrays. - output_loss_metrics: List of metrics that are used to aggregated output - loss values. - sample_weights: Optional list of sample weight arrays. - training: Whether the model should be run in inference or training mode. - - Returns: - Returns the model output, total loss, loss value calculated using the - specified loss function and masks for each output. The total loss includes - regularization losses and applies masking and sample weighting - to the loss value. - """ - # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn. - # Used to keep track of the total loss value (stateless). - # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + - # loss_weight_2 * output_2_loss_fn(...) + - # layer losses. - total_loss = 0 - kwargs = {} - if model._expects_training_arg: - kwargs['training'] = training - if len(inputs) == 1 and not isinstance(inputs, dict): - inputs = inputs[0] - - # Allow mixed `NumPy` and `EagerTensor` input here. - if any( - isinstance(input_t, (np.ndarray, float, int)) - for input_t in tf.nest.flatten(inputs)): - inputs = tf.nest.map_structure(tf.convert_to_tensor, inputs) - - outs = model(inputs, **kwargs) - outs = tf.nest.flatten(outs) - - if targets: - targets = training_utils_v1.cast_if_floating_dtype_and_mismatch( - targets, outs) - # TODO(sallymatson/psv): check if we should do same mismatch fix for weights - if sample_weights: - sample_weights = [ - training_utils_v1.cast_if_floating_dtype( - tf.convert_to_tensor(val)) - if val is not None else None for val in sample_weights - ] - - masks = [getattr(t, '_keras_mask', None) for t in outs] - targets = tf.nest.flatten(targets) - - # Used to keep track of individual output losses. - output_losses = [] - - with backend.name_scope('loss'): - loss_fns = [ - loss_fn for loss_fn in model.loss_functions if loss_fn is not None - ] - custom_losses = model.losses # Regularization losses - - if not loss_fns and not custom_losses: - if training: - raise ValueError('The model cannot be trained ' - 'because it has no loss to optimize.') - else: - raise ValueError('The model cannot be evaluated ' - 'because it has no loss to compute.') - - for i, loss_fn in enumerate(loss_fns): - weights = sample_weights[i] if sample_weights else None - mask = masks[i] - with backend.name_scope(model.output_names[i] + '_loss'): - if mask is not None: - mask = tf.cast(mask, outs[i].dtype) - # Update weights with mask. - if weights is None: - weights = mask - else: - # Update dimensions of weights to match with mask if possible. - weights = tf.cast(weights, outs[i].dtype) - mask, _, weights = ( - losses_utils.squeeze_or_expand_dimensions( - mask, sample_weight=weights)) - weights *= mask - - if hasattr(loss_fn, 'reduction'): - per_sample_losses = loss_fn.call(targets[i], outs[i]) - weighted_losses = losses_utils.compute_weighted_loss( - per_sample_losses, - sample_weight=weights, - reduction=losses_utils.ReductionV2.NONE) - loss_reduction = loss_fn.reduction - - # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all - # compile use cases. - if loss_reduction == losses_utils.ReductionV2.AUTO: - loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE - - # Compute the stateless loss value. - output_loss = losses_utils.reduce_weighted_loss( - weighted_losses, reduction=loss_reduction) - else: - # Compute the stateless loss value for a custom loss class. - # Here we assume that the class takes care of loss reduction - # because if this class returns a vector value we cannot - # differentiate between use case where a custom optimizer - # expects a vector loss value vs unreduced per-sample loss value. - output_loss = loss_fn(targets[i], outs[i], sample_weight=weights) - loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE - - # If the number of outputs is 1 then we don't append the loss metric - # associated with each model output. When there are multiple outputs - # associated with a model, each output's loss is calculated and returned - # as part of the loss_metrics. - if len(model.outputs) > 1: - # Keep track of the stateful output loss result. - output_losses.append(output_loss_metrics[i](output_loss)) - - # Scale output loss for distribution. For custom losses we assume - # reduction was mean. - if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: - output_loss = losses_utils.scale_loss_for_distribution(output_loss) - total_loss += model._loss_weights_list[i] * output_loss - - # Add regularization losses - if custom_losses: - total_loss += losses_utils.scale_loss_for_distribution( - tf.add_n(custom_losses)) - return outs, total_loss, output_losses, masks - - -def _process_single_batch(model, - inputs, - targets, - output_loss_metrics=None, - sample_weights=None, - training=False): - """Calculate the loss and gradient for one input batch. - - The model weights are updated if training is set to True. - - Args: - model: Model whose loss has to be calculated. - inputs: List of input arrays. - targets: List of target arrays. - output_loss_metrics: List of metrics that are used to aggregated output - loss values. - sample_weights: Optional list of sample weight arrays. - training: The boolean represents if the weights of the model are updated. - 'fit' methods will set this to True while 'evaluate' methods will - set this to False. - - Returns: - output of the model, total loss, the loss and the mask - associated with each output. - - Raises: - ValueError: If the model has no loss to optimize. - """ - with backend.eager_learning_phase_scope(1 if training else 0), \ - training_utils.RespectCompiledTrainableState(model): - with GradientTape() as tape: - outs, total_loss, output_losses, masks = ( - _model_loss( - model, - inputs, - targets, - output_loss_metrics=output_loss_metrics, - sample_weights=sample_weights, - training=training)) - if isinstance(model.optimizer, loss_scale_optimizer.LossScaleOptimizer): - scaled_total_loss = model.optimizer.get_scaled_loss(total_loss) - else: - scaled_total_loss = total_loss - if training: - trainable_weights = model.trainable_weights - if trainable_weights: - # TODO(tanzheny) b/132690565: Provide mechanism for user to override - # model.train_on_batch. - if hasattr(model, '_backwards'): - model._backwards(tape, scaled_total_loss) - else: - grads = tape.gradient(scaled_total_loss, trainable_weights) - if isinstance(model.optimizer, - loss_scale_optimizer.LossScaleOptimizer): - grads = model.optimizer.get_unscaled_gradients(grads) - model.optimizer.apply_gradients(zip(grads, trainable_weights)) - else: - logging.warning('The list of trainable weights is empty. Make sure that' - ' you are not setting model.trainable to False before ' - 'compiling the model.') - return outs, total_loss, output_losses, masks - - -def train_on_batch(model, - inputs, - targets, - sample_weights=None, - output_loss_metrics=None): - """Calculates the loss and gradient updates for one input batch. - - Args: - model: Model whose loss has to be calculated. - inputs: Input batch data. - targets: Target batch data. - sample_weights: Sample weight batch data. - output_loss_metrics: List of metrics that are used to aggregated output - loss values. - - Returns: - Dict with three items: - 'total_loss': list with a single tensor for overall loss, - 'output_losses': list of tensors for loss corresponding to each of the - model output. Could be a empty list when model has only one output. - 'metrics': list of tensors for metric specified. - """ - inputs = training_utils_v1.cast_to_model_input_dtypes(inputs, model) - outs, total_loss, output_losses, masks = ( - _process_single_batch( - model, - inputs, - targets, - sample_weights=sample_weights, - training=True, - output_loss_metrics=output_loss_metrics)) - if not isinstance(outs, list): - outs = [outs] - metrics_results = _eager_metrics_fn( - model, outs, targets, sample_weights=sample_weights, masks=masks) - total_loss = tf.nest.flatten(total_loss) - return {'total_loss': total_loss, - 'output_losses': output_losses, - 'metrics': metrics_results} - - -def test_on_batch(model, - inputs, - targets, - sample_weights=None, - output_loss_metrics=None): - """Calculates the loss for one input batch. - - Args: - model: Model whose loss has to be calculated. - inputs: Input batch data. - targets: Target batch data. - sample_weights: Sample weight batch data. - output_loss_metrics: List of metrics that are used to aggregated output - loss values. - - Returns: - Dict with three items: - 'total_loss': single tensor for overall loss, - 'output_losses': list of tensors for loss corresponding to each of the - model output. Could be a empty list when model has only one output. - 'metrics': list of tensors for metric specified. - """ - inputs = training_utils_v1.cast_to_model_input_dtypes(inputs, model) - - with backend.eager_learning_phase_scope(0): - outs, total_loss, output_losses, masks = ( - _model_loss( + output_loss_metrics=output_loss_metrics, + sample_weights=sample_weights, + training=training, + ) + if isinstance( + model.optimizer, loss_scale_optimizer.LossScaleOptimizer + ): + scaled_total_loss = model.optimizer.get_scaled_loss(total_loss) + else: + scaled_total_loss = total_loss + if training: + trainable_weights = model.trainable_weights + if trainable_weights: + # TODO(tanzheny) b/132690565: Provide mechanism for user to + # override model.train_on_batch. + if hasattr(model, "_backwards"): + model._backwards(tape, scaled_total_loss) + else: + grads = tape.gradient(scaled_total_loss, trainable_weights) + if isinstance( + model.optimizer, loss_scale_optimizer.LossScaleOptimizer + ): + grads = model.optimizer.get_unscaled_gradients(grads) + model.optimizer.apply_gradients( + zip(grads, trainable_weights) + ) + else: + logging.warning( + "The list of trainable weights is empty. Make sure that" + " you are not setting model.trainable to False before " + "compiling the model." + ) + return outs, total_loss, output_losses, masks + + +def train_on_batch( + model, inputs, targets, sample_weights=None, output_loss_metrics=None +): + """Calculates the loss and gradient updates for one input batch. + + Args: + model: Model whose loss has to be calculated. + inputs: Input batch data. + targets: Target batch data. + sample_weights: Sample weight batch data. + output_loss_metrics: List of metrics that are used to aggregated output + loss values. + + Returns: + Dict with three items: + 'total_loss': list with a single tensor for overall loss, + 'output_losses': list of tensors for loss corresponding to each of the + model output. Could be a empty list when model has only one output. + 'metrics': list of tensors for metric specified. + """ + inputs = training_utils_v1.cast_to_model_input_dtypes(inputs, model) + outs, total_loss, output_losses, masks = _process_single_batch( + model, + inputs, + targets, + sample_weights=sample_weights, + training=True, + output_loss_metrics=output_loss_metrics, + ) + if not isinstance(outs, list): + outs = [outs] + metrics_results = _eager_metrics_fn( + model, outs, targets, sample_weights=sample_weights, masks=masks + ) + total_loss = tf.nest.flatten(total_loss) + return { + "total_loss": total_loss, + "output_losses": output_losses, + "metrics": metrics_results, + } + + +def test_on_batch( + model, inputs, targets, sample_weights=None, output_loss_metrics=None +): + """Calculates the loss for one input batch. + + Args: + model: Model whose loss has to be calculated. + inputs: Input batch data. + targets: Target batch data. + sample_weights: Sample weight batch data. + output_loss_metrics: List of metrics that are used to aggregated output + loss values. + + Returns: + Dict with three items: + 'total_loss': single tensor for overall loss, + 'output_losses': list of tensors for loss corresponding to each of the + model output. Could be a empty list when model has only one output. + 'metrics': list of tensors for metric specified. + """ + inputs = training_utils_v1.cast_to_model_input_dtypes(inputs, model) + + with backend.eager_learning_phase_scope(0): + outs, total_loss, output_losses, masks = _model_loss( model, inputs, targets, sample_weights=sample_weights, training=False, - output_loss_metrics=output_loss_metrics)) - if not isinstance(outs, list): - outs = [outs] - metrics_results = _eager_metrics_fn( - model, outs, targets, sample_weights=sample_weights, masks=masks) - total_loss = tf.nest.flatten(total_loss) - - return {'total_loss': total_loss, - 'output_losses': output_losses, - 'metrics': metrics_results} + output_loss_metrics=output_loss_metrics, + ) + if not isinstance(outs, list): + outs = [outs] + metrics_results = _eager_metrics_fn( + model, outs, targets, sample_weights=sample_weights, masks=masks + ) + total_loss = tf.nest.flatten(total_loss) + + return { + "total_loss": total_loss, + "output_losses": output_losses, + "metrics": metrics_results, + } diff --git a/keras/engine/training_generator_test.py b/keras/engine/training_generator_test.py index 3c64c36eaea5..70c32ca78d66 100644 --- a/keras/engine/training_generator_test.py +++ b/keras/engine/training_generator_test.py @@ -14,515 +14,594 @@ # ============================================================================== """Tests for training routines.""" -import tensorflow.compat.v2 as tf - import itertools -from absl.testing import parameterized import numpy as np -from keras.testing_infra import test_combinations +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import layers as layers_module from keras import losses from keras import metrics as metrics_module -from keras.testing_infra import test_utils from keras.engine import input_layer from keras.engine import training from keras.engine import training_generator_v1 -from keras.optimizers.optimizer_v2 import rmsprop +from keras.optimizers.legacy import rmsprop +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils from keras.utils import data_utils def custom_generator(mode=2): - batch_size = 10 - num_samples = 50 - arr_data = np.random.random((num_samples, 2)) - arr_labels = np.random.random((num_samples, 4)) - arr_weights = np.random.random((num_samples,)) - i = 0 - while True: - batch_index = i * batch_size % num_samples - i += 1 - start = batch_index - end = start + batch_size - x = arr_data[start: end] - y = arr_labels[start: end] - w = arr_weights[start: end] - if mode == 1: - yield x - elif mode == 2: - yield x, y - else: - yield x, y, w + batch_size = 10 + num_samples = 50 + arr_data = np.random.random((num_samples, 2)) + arr_labels = np.random.random((num_samples, 4)) + arr_weights = np.random.random((num_samples,)) + i = 0 + while True: + batch_index = i * batch_size % num_samples + i += 1 + start = batch_index + end = start + batch_size + x = arr_data[start:end] + y = arr_labels[start:end] + w = arr_weights[start:end] + if mode == 1: + yield x + elif mode == 2: + yield x, y + else: + yield x, y, w def custom_generator_changing_batch_size(mode=2): - batch_size = 10 - cur_batch_size = 11 - num_samples = 50 - arr_data = np.random.random((num_samples, 2)) - arr_labels = np.random.random((num_samples, 4)) - arr_weights = np.random.random((num_samples,)) - i = 0 - while True: - if cur_batch_size > 1: - cur_batch_size -= 1 - batch_index = i * batch_size % num_samples - i += 1 - start = batch_index - end = start + cur_batch_size - x = arr_data[start: end] - y = arr_labels[start: end] - w = arr_weights[start: end] - if mode == 1: - yield x - elif mode == 2: - yield x, y - else: - yield x, y, w + batch_size = 10 + cur_batch_size = 11 + num_samples = 50 + arr_data = np.random.random((num_samples, 2)) + arr_labels = np.random.random((num_samples, 4)) + arr_weights = np.random.random((num_samples,)) + i = 0 + while True: + if cur_batch_size > 1: + cur_batch_size -= 1 + batch_index = i * batch_size % num_samples + i += 1 + start = batch_index + end = start + cur_batch_size + x = arr_data[start:end] + y = arr_labels[start:end] + w = arr_weights[start:end] + if mode == 1: + yield x + elif mode == 2: + yield x, y + else: + yield x, y, w + custom_generator_threads = data_utils.threadsafe_generator(custom_generator) class TestGeneratorMethods(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @data_utils.dont_use_multiprocessing_pool - def test_fit_generator_method(self): - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.compile( - loss='mse', - optimizer=rmsprop.RMSprop(1e-3), - metrics=['mae', metrics_module.CategoricalAccuracy()]) - - model.fit_generator(custom_generator_threads(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - workers=4, - use_multiprocessing=True) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=custom_generator(), - validation_steps=10) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - validation_data=custom_generator(), - validation_steps=1, - workers=0) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @data_utils.dont_use_multiprocessing_pool - def test_evaluate_generator_method(self): - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.compile( - loss='mse', - optimizer=rmsprop.RMSprop(1e-3), - metrics=['mae', metrics_module.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - - model.evaluate_generator(custom_generator_threads(), - steps=5, - max_queue_size=10, - workers=2, - verbose=1, - use_multiprocessing=True) - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False, - workers=0) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @data_utils.dont_use_multiprocessing_pool - def test_predict_generator_method(self): - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.run_eagerly = test_utils.should_run_eagerly() - - model.predict_generator(custom_generator_threads(), - steps=5, - max_queue_size=10, - workers=2, - use_multiprocessing=True) - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - workers=0) - # Test generator with just inputs (no targets) - model.predict_generator(custom_generator_threads(mode=1), - steps=5, - max_queue_size=10, - workers=2, - use_multiprocessing=True) - model.predict_generator(custom_generator(mode=1), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.predict_generator(custom_generator(mode=1), - steps=5, - max_queue_size=10, - workers=0) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_generator_methods_with_sample_weights(self): - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.compile( - loss='mse', - optimizer=rmsprop.RMSprop(1e-3), - metrics=['mae', metrics_module.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - - model.fit_generator(custom_generator(mode=3), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - model.fit_generator(custom_generator(mode=3), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=custom_generator(mode=3), - validation_steps=10) - model.predict_generator(custom_generator(mode=3), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.evaluate_generator(custom_generator(mode=3), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_generator_methods_invalid_use_case(self): - def invalid_generator(): - while 1: - yield (0, 0, 0, 0) - - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.compile( - loss='mse', - optimizer=rmsprop.RMSprop(1e-3), - run_eagerly=test_utils.should_run_eagerly()) - - with self.assertRaises(ValueError): - model.fit_generator(invalid_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - with self.assertRaises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=invalid_generator(), - validation_steps=10) - with self.assertRaises(ValueError): - model.predict_generator(invalid_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - with self.assertRaises(ValueError): - model.evaluate_generator(invalid_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_generator_input_to_fit_eval_predict(self): - val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - def ones_generator(): - while True: - yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - model = test_utils.get_small_mlp( - num_hidden=10, num_classes=1, input_dim=10) - - model.compile( - rmsprop.RMSprop(0.001), - 'binary_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - model.fit( - ones_generator(), - steps_per_epoch=2, - validation_data=val_data, - epochs=2) - model.evaluate(ones_generator(), steps=2) - model.predict(ones_generator(), steps=2) - - # Test with a changing batch size - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.compile( - loss='mse', - optimizer=rmsprop.RMSprop(1e-3), - metrics=['mae', metrics_module.CategoricalAccuracy()]) - model.fit_generator(custom_generator_changing_batch_size(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - model.fit_generator(custom_generator_changing_batch_size(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=custom_generator_changing_batch_size(), - validation_steps=10) - - model.fit( - custom_generator_changing_batch_size(), - steps_per_epoch=5, - validation_data=custom_generator_changing_batch_size(), - validation_steps=10, - epochs=2) - model.evaluate(custom_generator_changing_batch_size(), steps=5) - model.predict(custom_generator_changing_batch_size(), steps=5) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @data_utils.dont_use_multiprocessing_pool - def test_generator_dynamic_shapes(self): - - x = [ - 'I think juice is great', - 'unknown is the best language since slicedbread', 'a a a a a a a', - 'matmul', 'Yaks are also quite nice' - ] - y = [1, 0, 0, 1, 1] - - vocab = { - word: i + 1 for i, word in - enumerate( - sorted(set(itertools.chain(*[i.split() for i in x])))) - } - - def data_gen(batch_size=2): - np.random.seed(0) - data = list(zip(x, y)) * 10 - np.random.shuffle(data) - - def pack_and_pad(queue): - x = [[vocab[j] for j in i[0].split()] for i in queue] - pad_len = max(len(i) for i in x) - x = np.array([i + [0] * (pad_len - len(i)) for i in x]) - y = np.array([i[1] for i in queue]) - del queue[:] - return x, y[:, np.newaxis] - - queue = [] - for i, element in enumerate(data): - queue.append(element) - if not (i + 1) % batch_size: - yield pack_and_pad(queue) - - if queue: - # Last partial batch - yield pack_and_pad(queue) - - model = test_utils.get_model_from_layers([ - layers_module.Embedding(input_dim=len(vocab) + 1, output_dim=4), - layers_module.SimpleRNN(units=1), - layers_module.Activation('sigmoid') - ], input_shape=(None,)) - - model.compile(loss=losses.binary_crossentropy, optimizer='sgd') - model.fit(data_gen(), epochs=1, steps_per_epoch=5) + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @data_utils.dont_use_multiprocessing_pool + def test_fit_generator_method(self): + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.compile( + loss="mse", + optimizer=rmsprop.RMSprop(1e-3), + metrics=["mae", metrics_module.CategoricalAccuracy()], + ) + + model.fit_generator( + custom_generator_threads(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + workers=4, + use_multiprocessing=True, + ) + model.fit_generator( + custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + ) + model.fit_generator( + custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=custom_generator(), + validation_steps=10, + ) + model.fit_generator( + custom_generator(), + steps_per_epoch=5, + validation_data=custom_generator(), + validation_steps=1, + workers=0, + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @data_utils.dont_use_multiprocessing_pool + def test_evaluate_generator_method(self): + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.compile( + loss="mse", + optimizer=rmsprop.RMSprop(1e-3), + metrics=["mae", metrics_module.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.evaluate_generator( + custom_generator_threads(), + steps=5, + max_queue_size=10, + workers=2, + verbose=1, + use_multiprocessing=True, + ) + model.evaluate_generator( + custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + model.evaluate_generator( + custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + workers=0, + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @data_utils.dont_use_multiprocessing_pool + def test_predict_generator_method(self): + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.run_eagerly = test_utils.should_run_eagerly() + + model.predict_generator( + custom_generator_threads(), + steps=5, + max_queue_size=10, + workers=2, + use_multiprocessing=True, + ) + model.predict_generator( + custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + model.predict_generator( + custom_generator(), steps=5, max_queue_size=10, workers=0 + ) + # Test generator with just inputs (no targets) + model.predict_generator( + custom_generator_threads(mode=1), + steps=5, + max_queue_size=10, + workers=2, + use_multiprocessing=True, + ) + model.predict_generator( + custom_generator(mode=1), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + model.predict_generator( + custom_generator(mode=1), steps=5, max_queue_size=10, workers=0 + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_generator_methods_with_sample_weights(self): + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.compile( + loss="mse", + optimizer=rmsprop.RMSprop(1e-3), + metrics=["mae", metrics_module.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit_generator( + custom_generator(mode=3), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + ) + model.fit_generator( + custom_generator(mode=3), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=custom_generator(mode=3), + validation_steps=10, + ) + model.predict_generator( + custom_generator(mode=3), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + model.evaluate_generator( + custom_generator(mode=3), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_generator_methods_invalid_use_case(self): + def invalid_generator(): + while 1: + yield (0, 0, 0, 0) + + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.compile( + loss="mse", + optimizer=rmsprop.RMSprop(1e-3), + run_eagerly=test_utils.should_run_eagerly(), + ) + + with self.assertRaises(ValueError): + model.fit_generator( + invalid_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + ) + with self.assertRaises(ValueError): + model.fit_generator( + custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=invalid_generator(), + validation_steps=10, + ) + with self.assertRaises(ValueError): + model.predict_generator( + invalid_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + with self.assertRaises(ValueError): + model.evaluate_generator( + invalid_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_generator_input_to_fit_eval_predict(self): + val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + def ones_generator(): + while True: + yield np.ones([10, 10], np.float32), np.ones( + [10, 1], np.float32 + ) + + model = test_utils.get_small_mlp( + num_hidden=10, num_classes=1, input_dim=10 + ) + + model.compile( + rmsprop.RMSprop(0.001), + "binary_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + ones_generator(), + steps_per_epoch=2, + validation_data=val_data, + epochs=2, + ) + model.evaluate(ones_generator(), steps=2) + model.predict(ones_generator(), steps=2) + + # Test with a changing batch size + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.compile( + loss="mse", + optimizer=rmsprop.RMSprop(1e-3), + metrics=["mae", metrics_module.CategoricalAccuracy()], + ) + model.fit_generator( + custom_generator_changing_batch_size(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + ) + model.fit_generator( + custom_generator_changing_batch_size(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=custom_generator_changing_batch_size(), + validation_steps=10, + ) + + model.fit( + custom_generator_changing_batch_size(), + steps_per_epoch=5, + validation_data=custom_generator_changing_batch_size(), + validation_steps=10, + epochs=2, + ) + model.evaluate(custom_generator_changing_batch_size(), steps=5) + model.predict(custom_generator_changing_batch_size(), steps=5) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @data_utils.dont_use_multiprocessing_pool + def test_generator_dynamic_shapes(self): + + x = [ + "I think juice is great", + "unknown is the best language since slicedbread", + "a a a a a a a", + "matmul", + "Yaks are also quite nice", + ] + y = [1, 0, 0, 1, 1] + + vocab = { + word: i + 1 + for i, word in enumerate( + sorted(set(itertools.chain(*[i.split() for i in x]))) + ) + } + + def data_gen(batch_size=2): + np.random.seed(0) + data = list(zip(x, y)) * 10 + np.random.shuffle(data) + + def pack_and_pad(queue): + x = [[vocab[j] for j in i[0].split()] for i in queue] + pad_len = max(len(i) for i in x) + x = np.array([i + [0] * (pad_len - len(i)) for i in x]) + y = np.array([i[1] for i in queue]) + del queue[:] + return x, y[:, np.newaxis] + + queue = [] + for i, element in enumerate(data): + queue.append(element) + if not (i + 1) % batch_size: + yield pack_and_pad(queue) + + if queue: + # Last partial batch + yield pack_and_pad(queue) + + model = test_utils.get_model_from_layers( + [ + layers_module.Embedding(input_dim=len(vocab) + 1, output_dim=4), + layers_module.SimpleRNN(units=1), + layers_module.Activation("sigmoid"), + ], + input_shape=(None,), + ) + + model.compile(loss=losses.binary_crossentropy, optimizer="sgd") + model.fit(data_gen(), epochs=1, steps_per_epoch=5) class TestGeneratorMethodsWithSequences(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @data_utils.dont_use_multiprocessing_pool - def test_training_with_sequences(self): - - class DummySequence(data_utils.Sequence): - - def __getitem__(self, idx): - return np.zeros([10, 2]), np.ones([10, 4]) - - def __len__(self): - return 10 - - model = test_utils.get_small_mlp( - num_hidden=3, num_classes=4, input_dim=2) - model.compile(loss='mse', optimizer=rmsprop.RMSprop(1e-3)) - - model.fit_generator(DummySequence(), - steps_per_epoch=10, - validation_data=custom_generator(), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - model.fit_generator(DummySequence(), - steps_per_epoch=10, - validation_data=custom_generator(), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @data_utils.dont_use_multiprocessing_pool - def test_sequence_input_to_fit_eval_predict(self): - val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - class CustomSequence(data_utils.Sequence): - - def __getitem__(self, idx): - return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - def __len__(self): - return 2 - - class CustomSequenceChangingBatchSize(data_utils.Sequence): - - def __getitem__(self, idx): - batch_size = 10 - idx - return (np.ones([batch_size, 10], np.float32), - np.ones([batch_size, 1], np.float32)) - - def __len__(self): - return 2 - - model = test_utils.get_small_mlp( - num_hidden=10, num_classes=1, input_dim=10) - - model.compile(rmsprop.RMSprop(0.001), 'binary_crossentropy') - model.fit(CustomSequence(), validation_data=val_data, epochs=2) - model.evaluate(CustomSequence()) - model.predict(CustomSequence()) - - with self.assertRaisesRegex(ValueError, '`y` argument is not supported'): - model.fit(CustomSequence(), y=np.ones([10, 1])) - - with self.assertRaisesRegex(ValueError, - '`sample_weight` argument is not supported'): - model.fit(CustomSequence(), sample_weight=np.ones([10, 1])) - - model.compile(rmsprop.RMSprop(0.001), 'binary_crossentropy') - model.fit(CustomSequenceChangingBatchSize(), - validation_data=val_data, epochs=2) - model.evaluate(CustomSequenceChangingBatchSize()) - model.predict(CustomSequenceChangingBatchSize()) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_sequence_on_epoch_end(self): - - class MySequence(data_utils.Sequence): - - def __init__(self): - self.epochs = 0 - - def __getitem__(self, idx): - return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - def __len__(self): - return 2 - - def on_epoch_end(self): - self.epochs += 1 - - inputs = input_layer.Input(10) - outputs = layers_module.Dense(1)(inputs) - model = training.Model(inputs, outputs) - model.compile('sgd', 'mse') - my_seq = MySequence() - model.fit(my_seq, epochs=2) - self.assertEqual(my_seq.epochs, 2) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @data_utils.dont_use_multiprocessing_pool + def test_training_with_sequences(self): + class DummySequence(data_utils.Sequence): + def __getitem__(self, idx): + return np.zeros([10, 2]), np.ones([10, 4]) + + def __len__(self): + return 10 + + model = test_utils.get_small_mlp( + num_hidden=3, num_classes=4, input_dim=2 + ) + model.compile(loss="mse", optimizer=rmsprop.RMSprop(1e-3)) + + model.fit_generator( + DummySequence(), + steps_per_epoch=10, + validation_data=custom_generator(), + validation_steps=1, + max_queue_size=10, + workers=0, + use_multiprocessing=True, + ) + model.fit_generator( + DummySequence(), + steps_per_epoch=10, + validation_data=custom_generator(), + validation_steps=1, + max_queue_size=10, + workers=0, + use_multiprocessing=False, + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @data_utils.dont_use_multiprocessing_pool + def test_sequence_input_to_fit_eval_predict(self): + val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + class CustomSequence(data_utils.Sequence): + def __getitem__(self, idx): + return np.ones([10, 10], np.float32), np.ones( + [10, 1], np.float32 + ) + + def __len__(self): + return 2 + + class CustomSequenceChangingBatchSize(data_utils.Sequence): + def __getitem__(self, idx): + batch_size = 10 - idx + return ( + np.ones([batch_size, 10], np.float32), + np.ones([batch_size, 1], np.float32), + ) + + def __len__(self): + return 2 + + model = test_utils.get_small_mlp( + num_hidden=10, num_classes=1, input_dim=10 + ) + + model.compile(rmsprop.RMSprop(0.001), "binary_crossentropy") + model.fit(CustomSequence(), validation_data=val_data, epochs=2) + model.evaluate(CustomSequence()) + model.predict(CustomSequence()) + + with self.assertRaisesRegex( + ValueError, "`y` argument is not supported" + ): + model.fit(CustomSequence(), y=np.ones([10, 1])) + + with self.assertRaisesRegex( + ValueError, "`sample_weight` argument is not supported" + ): + model.fit(CustomSequence(), sample_weight=np.ones([10, 1])) + + model.compile(rmsprop.RMSprop(0.001), "binary_crossentropy") + model.fit( + CustomSequenceChangingBatchSize(), + validation_data=val_data, + epochs=2, + ) + model.evaluate(CustomSequenceChangingBatchSize()) + model.predict(CustomSequenceChangingBatchSize()) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_sequence_on_epoch_end(self): + class MySequence(data_utils.Sequence): + def __init__(self): + self.epochs = 0 + + def __getitem__(self, idx): + return np.ones([10, 10], np.float32), np.ones( + [10, 1], np.float32 + ) + + def __len__(self): + return 2 + + def on_epoch_end(self): + self.epochs += 1 + + inputs = input_layer.Input(10) + outputs = layers_module.Dense(1)(inputs) + model = training.Model(inputs, outputs) + model.compile("sgd", "mse") + my_seq = MySequence() + model.fit(my_seq, epochs=2) + self.assertEqual(my_seq.epochs, 2) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class TestConvertToGeneratorLike(tf.test.TestCase, parameterized.TestCase): - simple_inputs = (np.ones((10, 10)), np.ones((10, 1))) - nested_inputs = ((np.ones((10, 10)), np.ones((10, 20))), (np.ones((10, 1)), - np.ones((10, 3)))) - - def _make_dataset(self, inputs, batches): - return tf.data.Dataset.from_tensors(inputs).repeat(batches) - - def _make_iterator(self, inputs, batches): - return tf.compat.v1.data.make_one_shot_iterator( - self._make_dataset(inputs, batches)) - - def _make_generator(self, inputs, batches): - - def _gen(): - for _ in range(batches): - yield inputs - - return _gen() - - def _make_numpy(self, inputs, _): - return inputs - - @parameterized.named_parameters( - ('simple_dataset', _make_dataset, simple_inputs), - ('simple_iterator', _make_iterator, simple_inputs), - ('simple_generator', _make_generator, simple_inputs), - ('simple_numpy', _make_numpy, simple_inputs), - ('nested_dataset', _make_dataset, nested_inputs), - ('nested_iterator', _make_iterator, nested_inputs), - ('nested_generator', _make_generator, nested_inputs), - ('nested_numpy', _make_numpy, nested_inputs)) - def test_convert_to_generator_like(self, input_fn, inputs): - expected_batches = 5 - data = input_fn(self, inputs, expected_batches) - - # Dataset and Iterator not supported in Legacy Graph mode. - if (not tf.executing_eagerly() and - isinstance(data, (tf.data.Dataset, tf.compat.v1.data.Iterator))): - return - - generator, steps = training_generator_v1.convert_to_generator_like( - data, batch_size=2, steps_per_epoch=expected_batches) - self.assertEqual(steps, expected_batches) - - for _ in range(expected_batches): - outputs = next(generator) - tf.nest.assert_same_structure(outputs, inputs) - - -if __name__ == '__main__': - tf.test.main() + simple_inputs = (np.ones((10, 10)), np.ones((10, 1))) + nested_inputs = ( + (np.ones((10, 10)), np.ones((10, 20))), + (np.ones((10, 1)), np.ones((10, 3))), + ) + + def _make_dataset(self, inputs, batches): + return tf.data.Dataset.from_tensors(inputs).repeat(batches) + + def _make_iterator(self, inputs, batches): + return tf.compat.v1.data.make_one_shot_iterator( + self._make_dataset(inputs, batches) + ) + + def _make_generator(self, inputs, batches): + def _gen(): + for _ in range(batches): + yield inputs + + return _gen() + + def _make_numpy(self, inputs, _): + return inputs + + @parameterized.named_parameters( + ("simple_dataset", _make_dataset, simple_inputs), + ("simple_iterator", _make_iterator, simple_inputs), + ("simple_generator", _make_generator, simple_inputs), + ("simple_numpy", _make_numpy, simple_inputs), + ("nested_dataset", _make_dataset, nested_inputs), + ("nested_iterator", _make_iterator, nested_inputs), + ("nested_generator", _make_generator, nested_inputs), + ("nested_numpy", _make_numpy, nested_inputs), + ) + def test_convert_to_generator_like(self, input_fn, inputs): + expected_batches = 5 + data = input_fn(self, inputs, expected_batches) + + # Dataset and Iterator not supported in Legacy Graph mode. + if not tf.executing_eagerly() and isinstance( + data, (tf.data.Dataset, tf.compat.v1.data.Iterator) + ): + return + + generator, steps = training_generator_v1.convert_to_generator_like( + data, batch_size=2, steps_per_epoch=expected_batches + ) + self.assertEqual(steps, expected_batches) + + for _ in range(expected_batches): + outputs = next(generator) + tf.nest.assert_same_structure(outputs, inputs) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training_generator_v1.py b/keras/engine/training_generator_v1.py index ae9e7ec6e457..4b82fad14d81 100644 --- a/keras/engine/training_generator_v1.py +++ b/keras/engine/training_generator_v1.py @@ -15,13 +15,12 @@ """Part of the Keras training engine related to Python generators of array data. """ -import tensorflow.compat.v2 as tf -# pylint: disable=protected-access - import functools import math import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import callbacks as cbks from keras.engine import training_utils @@ -29,796 +28,926 @@ from keras.utils import data_utils from keras.utils import generic_utils from keras.utils.mode_keys import ModeKeys + +# isort: off from tensorflow.python.platform import tf_logging as logging -def model_iteration(model, - data, - steps_per_epoch=None, - epochs=1, - verbose=1, - callbacks=None, - validation_data=None, - validation_steps=None, - validation_freq=1, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - shuffle=False, - initial_epoch=0, - mode=ModeKeys.TRAIN, - batch_size=None, - steps_name='steps', - **kwargs): - """Loop function for arrays of data with modes TRAIN/TEST/PREDICT. - - Args: - model: Keras Model instance. - data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, y)` or - `(x, y, sample_weights)`) or a generator or - `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. - steps_per_epoch: Total number of steps (batches of samples) before - declaring one epoch finished and starting the next epoch. Ignored with - the default value of `None`. - epochs: Number of times to iterate over the data. - verbose: 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - Note that the progress bar is not particularly useful when - logged to a file, so verbose=2 is recommended when not running - interactively (eg, in a production environment). - callbacks: List of callbacks to be called during training. - validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or - `(x, y)` or `(x, y, sample_weights)`) or a generator or - `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. - validation_steps: Total number of steps (batches of samples) before - declaring validation finished. - validation_freq: Only relevant if validation data is provided. Integer or - `collections.abc.Container` instance (e.g. list, tuple, etc.). If an - integer, specifies how many training epochs to run before a new - validation run is performed, e.g. `validation_freq=2` runs - validation every 2 epochs. If a Container, specifies the epochs on - which to run validation, e.g. `validation_freq=[1, 2, 10]` runs - validation at the end of the 1st, 2nd, and 10th epochs. - class_weight: Dictionary mapping class indices to a weight for the class. - max_queue_size: Integer. Maximum size for the generator queue. If - unspecified, `max_queue_size` will default to 10. - workers: Integer. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default to 1. If - 0, will execute the generator on the main thread. - use_multiprocessing: Boolean. If `True`, use process-based threading. If - unspecified, `use_multiprocessing` will default to `False`. Note that - because this implementation relies on multiprocessing, you should not - pass non-picklable arguments to the generator as they can't be passed - easily to children processes. - shuffle: Boolean. Whether to shuffle the order of the batches at the - beginning of each epoch. Only used with instances of `Sequence` - (`keras.utils.Sequence`). Has no effect when `steps_per_epoch` is not - `None`. - initial_epoch: Epoch at which to start training (useful for resuming a - previous training run). - mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. - batch_size: Integer batch size or None if unknown. Will only be used if - `data` is in NumPy/Tensor format. - steps_name: The string name of the steps argument, either `steps`, - `validation_steps`, or `steps_per_epoch`. Only used for error message - formatting. - **kwargs: Additional arguments for backwards compatibility. `steps` is - accepted as an alias for `steps_per_epoch`. - - Returns: - - In TRAIN mode: `History` object. - - In TEST mode: Evaluation metrics. - - In PREDICT mode: Outputs of the Model called on inputs. - - Raises: - ValueError: in case of invalid arguments. - """ - if 'steps' in kwargs: - steps_per_epoch = kwargs['steps'] - - # Determine the number of steps per epoch and whether we should reset the - # dataset at the end of each epoch. - reset_dataset_after_each_epoch = False - original_dataset = None - is_dataset = isinstance(data, (tf.data.Dataset, tf.compat.v1.data.Dataset)) - if is_dataset: - original_dataset = data - if steps_per_epoch is None: - reset_dataset_after_each_epoch = True - steps_per_epoch = training_utils_v1.infer_steps_for_dataset( - model, data, steps_per_epoch, epochs=epochs, steps_name=steps_name) - - # Convert to a format that supports `next(generator)`. - generator, steps_per_epoch = convert_to_generator_like( - data, - steps_per_epoch=steps_per_epoch, - batch_size=batch_size, - epochs=epochs - initial_epoch, - shuffle=shuffle) - - do_validation = validation_data is not None - is_sequence = isinstance(generator, data_utils.Sequence) - _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers, - steps_per_epoch, validation_data, validation_steps, mode, - kwargs) - - batch_function = _make_execution_function( - model, mode, class_weight=class_weight) - - # Create the queue for the generator. - enqueuer = None - if not is_dataset: - generator, enqueuer = _make_enqueued_generator( - generator, - workers=workers, - use_multiprocessing=use_multiprocessing, - max_queue_size=max_queue_size, - shuffle=shuffle) - - num_samples_or_steps, use_steps = _get_num_samples_or_steps( - data, steps_per_epoch) - - count_mode = 'steps' if use_steps else 'samples' - callbacks = cbks.configure_callbacks( - callbacks, - model, - do_validation=do_validation, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - batch_size=batch_size, - samples=num_samples_or_steps, - count_mode=count_mode, - verbose=verbose, - mode=mode) - - if mode == ModeKeys.PREDICT: - aggregator = training_utils_v1.OutputsAggregator( - True, steps=steps_per_epoch) - else: - aggregator = training_utils_v1.MetricsAggregator( - True, steps=steps_per_epoch) - - should_set_learning_phase = tf.executing_eagerly() and model.run_eagerly - if should_set_learning_phase: - learning_phase_scope = backend.eager_learning_phase_scope( - 1 if mode == ModeKeys.TRAIN else 0) - learning_phase_scope.__enter__() - - callbacks.model.stop_training = False - callbacks._call_begin_hook(mode) - - initial_epoch = model._maybe_load_initial_epoch_from_ckpt(initial_epoch, mode) - - for epoch in range(initial_epoch, epochs): - if callbacks.model.stop_training: - break - - # Setup work for each epoch. - model.reset_metrics() - epoch_logs = {} - if mode == ModeKeys.TRAIN: - callbacks.on_epoch_begin(epoch, epoch_logs) +def model_iteration( + model, + data, + steps_per_epoch=None, + epochs=1, + verbose=1, + callbacks=None, + validation_data=None, + validation_steps=None, + validation_freq=1, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + shuffle=False, + initial_epoch=0, + mode=ModeKeys.TRAIN, + batch_size=None, + steps_name="steps", + **kwargs, +): + """Loop function for arrays of data with modes TRAIN/TEST/PREDICT. + + Args: + model: Keras Model instance. + data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, y)` or + `(x, y, sample_weights)`) or a generator or + `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. + steps_per_epoch: Total number of steps (batches of samples) before + declaring one epoch finished and starting the next epoch. Ignored with + the default value of `None`. + epochs: Number of times to iterate over the data. + verbose: 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + Note that the progress bar is not particularly useful when + logged to a file, so verbose=2 is recommended when not running + interactively (eg, in a production environment). + callbacks: List of callbacks to be called during training. + validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or + `(x, y)` or `(x, y, sample_weights)`) or a generator or + `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. + validation_steps: Total number of steps (batches of samples) before + declaring validation finished. + validation_freq: Only relevant if validation data is provided. Integer + or `collections.abc.Container` instance (e.g. list, tuple, etc.). If + an integer, specifies how many training epochs to run before a new + validation run is performed, e.g. `validation_freq=2` runs validation + every 2 epochs. If a Container, specifies the epochs on which to run + validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the + end of the 1st, 2nd, and 10th epochs. + class_weight: Dictionary mapping class indices to a weight for the + class. + max_queue_size: Integer. Maximum size for the generator queue. If + unspecified, `max_queue_size` will default to 10. + workers: Integer. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default to 1. + If 0, will execute the generator on the main thread. + use_multiprocessing: Boolean. If `True`, use process-based threading. If + unspecified, `use_multiprocessing` will default to `False`. Note that + because this implementation relies on multiprocessing, you should not + pass non-pickleable arguments to the generator as they can't be passed + easily to children processes. + shuffle: Boolean. Whether to shuffle the order of the batches at the + beginning of each epoch. Only used with instances of `Sequence` + (`keras.utils.Sequence`). Has no effect when `steps_per_epoch` is not + `None`. + initial_epoch: Epoch at which to start training (useful for resuming a + previous training run). + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. + batch_size: Integer batch size or None if unknown. Will only be used if + `data` is in NumPy/Tensor format. + steps_name: The string name of the steps argument, either `steps`, + `validation_steps`, or `steps_per_epoch`. Only used for error message + formatting. + **kwargs: Additional arguments for backwards compatibility. `steps` is + accepted as an alias for `steps_per_epoch`. + + Returns: + - In TRAIN mode: `History` object. + - In TEST mode: Evaluation metrics. + - In PREDICT mode: Outputs of the Model called on inputs. + + Raises: + ValueError: in case of invalid arguments. + """ + if "steps" in kwargs: + steps_per_epoch = kwargs["steps"] + + # Determine the number of steps per epoch and whether we should reset the + # dataset at the end of each epoch. + reset_dataset_after_each_epoch = False + original_dataset = None + is_dataset = isinstance(data, (tf.data.Dataset, tf.compat.v1.data.Dataset)) + if is_dataset: + original_dataset = data + if steps_per_epoch is None: + reset_dataset_after_each_epoch = True + steps_per_epoch = training_utils_v1.infer_steps_for_dataset( + model, + data, + steps_per_epoch, + epochs=epochs, + steps_name=steps_name, + ) + + # Convert to a format that supports `next(generator)`. + generator, steps_per_epoch = convert_to_generator_like( + data, + steps_per_epoch=steps_per_epoch, + batch_size=batch_size, + epochs=epochs - initial_epoch, + shuffle=shuffle, + ) + + do_validation = validation_data is not None + is_sequence = isinstance(generator, data_utils.Sequence) + _validate_arguments( + is_sequence, + is_dataset, + use_multiprocessing, + workers, + steps_per_epoch, + validation_data, + validation_steps, + mode, + kwargs, + ) + + batch_function = _make_execution_function( + model, mode, class_weight=class_weight + ) + + # Create the queue for the generator. + enqueuer = None + if not is_dataset: + generator, enqueuer = _make_enqueued_generator( + generator, + workers=workers, + use_multiprocessing=use_multiprocessing, + max_queue_size=max_queue_size, + shuffle=shuffle, + ) + + num_samples_or_steps, use_steps = _get_num_samples_or_steps( + data, steps_per_epoch + ) + + count_mode = "steps" if use_steps else "samples" + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=do_validation, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + batch_size=batch_size, + samples=num_samples_or_steps, + count_mode=count_mode, + verbose=verbose, + mode=mode, + ) - if steps_per_epoch is None: - # Loop over dataset until `OutOfRangeError` is raised. - target_steps = np.inf + if mode == ModeKeys.PREDICT: + aggregator = training_utils_v1.OutputsAggregator( + True, steps=steps_per_epoch + ) else: - # Loop over dataset for the specified number of steps. - target_steps = steps_per_epoch - - step = 0 - while step < target_steps: - batch_data = _get_next_batch(generator) - if batch_data is None: - if is_dataset: - # The dataset passed by the user ran out of batches. - # Now we know the cardinality of the dataset. - # If steps_per_epoch was specified, then running out of data is - # unexpected, so we stop training and inform the user. - if steps_per_epoch: - callbacks.model.stop_training = True - logging.warning( - 'Your dataset ran out of data; interrupting training. ' - 'Make sure that your dataset can generate at least ' - '`%s * epochs` batches (in this case, %d batches). ' - 'You may need to use the repeat() function when ' - 'building your dataset.' - % (steps_name, steps_per_epoch * epochs)) - elif step > 0: - steps_per_epoch = step - aggregator.steps = steps_per_epoch + aggregator = training_utils_v1.MetricsAggregator( + True, steps=steps_per_epoch + ) + + should_set_learning_phase = tf.executing_eagerly() and model.run_eagerly + if should_set_learning_phase: + learning_phase_scope = backend.eager_learning_phase_scope( + 1 if mode == ModeKeys.TRAIN else 0 + ) + learning_phase_scope.__enter__() + + callbacks.model.stop_training = False + callbacks._call_begin_hook(mode) + + initial_epoch = model._maybe_load_initial_epoch_from_ckpt( + initial_epoch, mode + ) + + for epoch in range(initial_epoch, epochs): + if callbacks.model.stop_training: + break + + # Setup work for each epoch. + model.reset_metrics() + epoch_logs = {} + if mode == ModeKeys.TRAIN: + callbacks.on_epoch_begin(epoch, epoch_logs) + + if steps_per_epoch is None: + # Loop over dataset until `OutOfRangeError` is raised. + target_steps = np.inf else: - # We ran out of batches while the user passed an iterator (legacy). - callbacks.model.stop_training = True - logging.warning( - 'Your dataset iterator ran out of data; ' - 'interrupting training. Make sure that your iterator ' - 'can generate at least `%s * epochs` ' - 'batches (in this case, %d batches). You may need to' - 'use the repeat() function when building your ' - 'dataset.' % (steps_name, steps_per_epoch * epochs)) - break - - # `batch_size` used for validation data if validation - # data is NumPy/EagerTensors. - batch_size = int(tf.nest.flatten(batch_data)[0].shape[0]) - - # Callbacks batch begin. - batch_logs = {'batch': step, 'size': batch_size} - callbacks._call_batch_hook(mode, 'begin', step, batch_logs) - - is_deferred = not model._is_compiled - batch_outs = batch_function(*batch_data) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - - if step == 0: - aggregator.create(batch_outs) - - if is_deferred: - # Set callbacks params. We do this here when model is compiled only - # in the first iteration of this loop (deferred build scenario). - cbks.set_callback_parameters( - callbacks, - model, - do_validation=do_validation, - batch_size=batch_size, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - samples=num_samples_or_steps, - verbose=verbose, - mode=mode) - - # Aggregate results. - aggregator.aggregate(batch_outs) - - # Callbacks batch end. - batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode) - callbacks._call_batch_hook(mode, 'end', step, batch_logs) - step += 1 - - if callbacks.model.stop_training: - break - - aggregator.finalize() - results = aggregator.results - epoch_logs = cbks.make_logs(model, epoch_logs, results, mode) - if len(results) == 1: - results = results[0] - - # Run the test loop every epoch during training. - if (do_validation and - training_utils_v1.should_run_validation(validation_freq, epoch) and - not callbacks.model.stop_training): - val_results = model_iteration( - model, - validation_data, - steps_per_epoch=validation_steps, - batch_size=batch_size, - class_weight=class_weight, - workers=workers, - use_multiprocessing=use_multiprocessing, - max_queue_size=max_queue_size, - callbacks=callbacks, - verbose=verbose, - mode=ModeKeys.TEST, - steps_name='validation_steps') - - if not isinstance(val_results, list): - val_results = [val_results] - epoch_logs = cbks.make_logs( - model, epoch_logs, val_results, mode, prefix='val_') + # Loop over dataset for the specified number of steps. + target_steps = steps_per_epoch + + step = 0 + while step < target_steps: + batch_data = _get_next_batch(generator) + if batch_data is None: + if is_dataset: + # The dataset passed by the user ran out of batches. Now we + # know the cardinality of the dataset. If steps_per_epoch + # was specified, then running out of data is unexpected, so + # we stop training and inform the user. + if steps_per_epoch: + callbacks.model.stop_training = True + logging.warning( + "Your dataset ran out of data; interrupting " + "training. Make sure that your dataset can " + "generate at least `%s * epochs` batches (in " + "this case, %d batches). You may need to use " + "the repeat() function when building your dataset." + % (steps_name, steps_per_epoch * epochs) + ) + elif step > 0: + steps_per_epoch = step + aggregator.steps = steps_per_epoch + else: + # We ran out of batches while the user passed an iterator + # (legacy). + callbacks.model.stop_training = True + logging.warning( + "Your dataset iterator ran out of data; " + "interrupting training. Make sure that your iterator " + "can generate at least `%s * epochs` " + "batches (in this case, %d batches). You may need to" + "use the repeat() function when building your " + "dataset." % (steps_name, steps_per_epoch * epochs) + ) + break + + # `batch_size` used for validation data if validation + # data is NumPy/EagerTensors. + batch_size = int(tf.nest.flatten(batch_data)[0].shape[0]) + + # Callbacks batch begin. + batch_logs = {"batch": step, "size": batch_size} + callbacks._call_batch_hook(mode, "begin", step, batch_logs) + + is_deferred = not model._is_compiled + batch_outs = batch_function(*batch_data) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + + if step == 0: + aggregator.create(batch_outs) + + if is_deferred: + # Set callbacks params. We do this here when model is + # compiled only in the first iteration of this loop + # (deferred build scenario). + cbks.set_callback_parameters( + callbacks, + model, + do_validation=do_validation, + batch_size=batch_size, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + samples=num_samples_or_steps, + verbose=verbose, + mode=mode, + ) + + # Aggregate results. + aggregator.aggregate(batch_outs) + + # Callbacks batch end. + batch_logs = callbacks.make_logs( + model, batch_logs, batch_outs, mode + ) + callbacks._call_batch_hook(mode, "end", step, batch_logs) + step += 1 + + if callbacks.model.stop_training: + break + + aggregator.finalize() + results = aggregator.results + epoch_logs = callbacks.make_logs(model, epoch_logs, results, mode) + if len(results) == 1: + results = results[0] + + # Run the test loop every epoch during training. + if ( + do_validation + and training_utils_v1.should_run_validation(validation_freq, epoch) + and not callbacks.model.stop_training + ): + val_results = model_iteration( + model, + validation_data, + steps_per_epoch=validation_steps, + batch_size=batch_size, + class_weight=class_weight, + workers=workers, + use_multiprocessing=use_multiprocessing, + max_queue_size=max_queue_size, + callbacks=callbacks, + verbose=verbose, + mode=ModeKeys.TEST, + steps_name="validation_steps", + ) + + if not isinstance(val_results, list): + val_results = [val_results] + epoch_logs = callbacks.make_logs( + model, epoch_logs, val_results, mode, prefix="val_" + ) + + if mode == ModeKeys.TRAIN: + # Epochs only apply to `fit`. + callbacks.on_epoch_end(epoch, epoch_logs) + + # Recreate dataset iterator for the next epoch. + if reset_dataset_after_each_epoch and epoch < epochs - 1: + generator = tf.compat.v1.data.make_one_shot_iterator( + original_dataset + ) + + model._successful_loop_finish = True + callbacks._call_end_hook(mode) + + if enqueuer is not None: + enqueuer.stop() + + if should_set_learning_phase: + learning_phase_scope.__exit__(None, None, None) if mode == ModeKeys.TRAIN: - # Epochs only apply to `fit`. - callbacks.on_epoch_end(epoch, epoch_logs) - - # Recreate dataset iterator for the next epoch. - if reset_dataset_after_each_epoch and epoch < epochs - 1: - generator = tf.compat.v1.data.make_one_shot_iterator(original_dataset) - - model._successful_loop_finish = True - callbacks._call_end_hook(mode) - - if enqueuer is not None: - enqueuer.stop() - - if should_set_learning_phase: - learning_phase_scope.__exit__(None, None, None) - - if mode == ModeKeys.TRAIN: - return model.history - return results + return model.history + return results # Maintain compatibility with the existing names. fit_generator = functools.partial(model_iteration, mode=ModeKeys.TRAIN) evaluate_generator = functools.partial( - model_iteration, mode=ModeKeys.TEST, shuffle=False) + model_iteration, mode=ModeKeys.TEST, shuffle=False +) predict_generator = functools.partial( - model_iteration, mode=ModeKeys.PREDICT, shuffle=False) + model_iteration, mode=ModeKeys.PREDICT, shuffle=False +) def _get_next_batch(generator): - """Retrieves the next batch of input data.""" - try: - generator_output = next(generator) - except (StopIteration, tf.errors.OutOfRangeError): - return None - - if not isinstance(generator_output, tuple): - # Always wrap in a tuple. - generator_output = (generator_output,) - if len(generator_output) not in [1, 2, 3]: - raise ValueError( - 'Output of generator should be a tuple of 1 or 2 or 3 ' - 'elements: (input,) or (input, target) or ' - '(input, target, sample_weights). Received {}'.format(generator_output)) - return generator_output - - -def _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers, - steps_per_epoch, validation_data, validation_steps, - mode, kwargs): - """Raises errors if arguments are invalid. - - Args: - is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence` - instance. - is_dataset: Boolean, whether data is a dataset instance. - use_multiprocessing: Boolean. If `True`, use process-based threading. If - unspecified, `use_multiprocessing` will default to `False`. Note that - because this implementation relies on multiprocessing, you should not pass - non-picklable arguments to the generator as they can't be passed easily to - children processes. - workers: Integer. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default to 1. If - 0, will execute the generator on the main thread. - steps_per_epoch: Total number of steps (batches of samples) before declaring - one epoch finished and starting the next epoch. Ignored with the default - value of `None`. - validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, - y)` or `(x, y, sample_weights)`) or a generator or - `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. - validation_steps: Total number of steps (batches of samples) before - declaring validation finished. - mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. - kwargs: Additional arguments for backwards compatibility. - - Raises: - ValueError: If `steps_per_epoch` or `validation_steps` are not passed - for data types that require them, or if unrecognized keyword - arguments are passed. - """ - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the `keras.utils.Sequence`' - ' class.')) - - if steps_per_epoch is None and not is_dataset: - arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps' - raise ValueError('Please specify the number of steps via the ' - '`{}` argument.'.format(arg_name)) - - val_gen = ( - data_utils.is_generator_or_sequence(validation_data) or - isinstance(validation_data, tf.data.Iterator)) - if (val_gen and not isinstance(validation_data, data_utils.Sequence) and - not validation_steps): - raise ValueError('Please specify the `validation_steps` argument.') - - if any(k != 'steps' for k in kwargs): - raise ValueError('Invalid arguments passed: {}'.format( - [k for k in kwargs if k != 'steps'])) - - -def convert_to_generator_like(data, - batch_size=None, - steps_per_epoch=None, - epochs=1, - shuffle=False): - """Make a generator out of NumPy or EagerTensor inputs. - - Args: - data: Either a generator or `keras.utils.data_utils.Sequence` object or - `Dataset`, `Iterator`, or a {1,2,3}-tuple of NumPy arrays or EagerTensors. - If a tuple, the elements represent `(x, y, sample_weights)` and may be - `None` or `[None]`. - batch_size: Used when creating a generator out of tuples of NumPy arrays or - EagerTensors. - steps_per_epoch: Steps of the generator to run each epoch. If `None` the - number of steps will be read from the data (for - `keras.utils.data_utils.Sequence` types). - epochs: Total number of epochs to run. - shuffle: Whether the data should be shuffled. - - Returns: - - Generator, `keras.utils.data_utils.Sequence`, or `Iterator`. - - Raises: - - ValueError: If `batch_size` is not provided for NumPy or EagerTensor - inputs. - """ - if isinstance(data, tuple): - # Scrub `Nones` that might have been passed for `targets`, `sample_weights`. - data = tuple( - ele for ele in data if not all(e is None for e in tf.nest.flatten(ele))) - - if data_utils.is_generator_or_sequence(data) or isinstance( - data, tf.data.Iterator): - if isinstance(data, data_utils.Sequence): - if steps_per_epoch is None: - steps_per_epoch = len(data) - return data, steps_per_epoch - if isinstance(data, tf.data.Dataset): - return tf.compat.v1.data.make_one_shot_iterator(data), steps_per_epoch - - # Create generator from NumPy or EagerTensor Input. - num_samples = int(tf.nest.flatten(data)[0].shape[0]) - if batch_size is None: - raise ValueError( - 'When passing input data as arrays, do not specify ' - '`steps_per_epoch`/`steps` argument. Please use `batch_size` instead.') - steps_per_epoch = int(math.ceil(num_samples / batch_size)) - - def _gen(data): - """Makes a generator out of a structure of NumPy/EagerTensors.""" - index_array = np.arange(num_samples) - for _ in range(epochs): - if shuffle: - np.random.shuffle(index_array) - batches = generic_utils.make_batches(num_samples, batch_size) - for (batch_start, batch_end) in batches: - batch_ids = index_array[batch_start:batch_end] - flat_batch_data = training_utils.slice_arrays( - tf.nest.flatten(data), batch_ids, contiguous=(not shuffle)) - yield tf.nest.pack_sequence_as(data, flat_batch_data) - - return _gen(data), steps_per_epoch - - -def _make_enqueued_generator(generator, - workers=1, - use_multiprocessing=False, - max_queue_size=10, - shuffle=False): - """Create a buffered queue of next elements of the generator.""" - is_sequence = isinstance(generator, data_utils.Sequence) - enqueuer = None - if workers > 0: - if is_sequence: - enqueuer = data_utils.OrderedEnqueuer( - generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle) - else: - enqueuer = data_utils.GeneratorEnqueuer( - generator, use_multiprocessing=use_multiprocessing) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - if is_sequence: - output_generator = data_utils.iter_sequence_infinite(generator) + """Retrieves the next batch of input data.""" + try: + generator_output = next(generator) + except (StopIteration, tf.errors.OutOfRangeError): + return None + + if not isinstance(generator_output, tuple): + # Always wrap in a tuple. + generator_output = (generator_output,) + if len(generator_output) not in [1, 2, 3]: + raise ValueError( + "Output of generator should be a tuple of 1 or 2 or 3 " + "elements: (input,) or (input, target) or " + "(input, target, sample_weights). Received {}".format( + generator_output + ) + ) + return generator_output + + +def _validate_arguments( + is_sequence, + is_dataset, + use_multiprocessing, + workers, + steps_per_epoch, + validation_data, + validation_steps, + mode, + kwargs, +): + """Raises errors if arguments are invalid. + + Args: + is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence` + instance. + is_dataset: Boolean, whether data is a dataset instance. + use_multiprocessing: Boolean. If `True`, use process-based threading. If + unspecified, `use_multiprocessing` will default to `False`. Note that + because this implementation relies on multiprocessing, you should not + pass non-pickleable arguments to the generator as they can't be passed + easily to children processes. + workers: Integer. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default to 1. If + 0, will execute the generator on the main thread. + steps_per_epoch: Total number of steps (batches of samples) before + declaring one epoch finished and starting the next epoch. Ignored with + the default value of `None`. + validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or + `(x, y)` or `(x, y, sample_weights)`) or a generator or + `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. + validation_steps: Total number of steps (batches of samples) before + declaring validation finished. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. + kwargs: Additional arguments for backwards compatibility. + + Raises: + ValueError: If `steps_per_epoch` or `validation_steps` are not passed + for data types that require them, or if unrecognized keyword + arguments are passed. + """ + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning( + "Using a generator with `use_multiprocessing=True`" + " and multiple workers may duplicate your data." + " Please consider using the `keras.utils.Sequence`" + " class." + ) + ) + + if steps_per_epoch is None and not is_dataset: + arg_name = "steps_per_epoch" if mode == ModeKeys.TRAIN else "steps" + raise ValueError( + f"Please specify the number of steps via the `{arg_name}` argument." + ) + + val_gen = data_utils.is_generator_or_sequence( + validation_data + ) or isinstance(validation_data, tf.data.Iterator) + if ( + val_gen + and not isinstance(validation_data, data_utils.Sequence) + and not validation_steps + ): + raise ValueError("Please specify the `validation_steps` argument.") + + if any(k != "steps" for k in kwargs): + raise ValueError( + f"Invalid arguments passed: {[k for k in kwargs if k != 'steps']}" + ) + + +def convert_to_generator_like( + data, batch_size=None, steps_per_epoch=None, epochs=1, shuffle=False +): + """Make a generator out of NumPy or EagerTensor inputs. + + Args: + data: Either a generator or `keras.utils.data_utils.Sequence` object or + `Dataset`, `Iterator`, or a {1,2,3}-tuple of NumPy arrays or + EagerTensors. If a tuple, the elements represent `(x, y, + sample_weights)` and may be `None` or `[None]`. + batch_size: Used when creating a generator out of tuples of NumPy arrays + or EagerTensors. + steps_per_epoch: Steps of the generator to run each epoch. If `None` the + number of steps will be read from the data (for + `keras.utils.data_utils.Sequence` types). + epochs: Total number of epochs to run. + shuffle: Whether the data should be shuffled. + + Returns: + - Generator, `keras.utils.data_utils.Sequence`, or `Iterator`. + + Raises: + - ValueError: If `batch_size` is not provided for NumPy or EagerTensor + inputs. + """ + if isinstance(data, tuple): + # Scrub `Nones` that might have been passed for `targets`, + # `sample_weights`. + data = tuple( + ele + for ele in data + if not all(e is None for e in tf.nest.flatten(ele)) + ) + + if data_utils.is_generator_or_sequence(data) or isinstance( + data, tf.data.Iterator + ): + if isinstance(data, data_utils.Sequence): + if steps_per_epoch is None: + steps_per_epoch = len(data) + return data, steps_per_epoch + if isinstance(data, tf.data.Dataset): + return tf.compat.v1.data.make_one_shot_iterator(data), steps_per_epoch + + # Create generator from NumPy or EagerTensor Input. + num_samples = int(tf.nest.flatten(data)[0].shape[0]) + if batch_size is None: + raise ValueError( + "When passing input data as arrays, do not specify " + "`steps_per_epoch`/`steps` argument. " + "Please use `batch_size` instead." + ) + steps_per_epoch = int(math.ceil(num_samples / batch_size)) + + def _gen(data): + """Makes a generator out of a structure of NumPy/EagerTensors.""" + index_array = np.arange(num_samples) + for _ in range(epochs): + if shuffle: + np.random.shuffle(index_array) + batches = generic_utils.make_batches(num_samples, batch_size) + for batch_start, batch_end in batches: + batch_ids = index_array[batch_start:batch_end] + flat_batch_data = training_utils.slice_arrays( + tf.nest.flatten(data), batch_ids, contiguous=(not shuffle) + ) + yield tf.nest.pack_sequence_as(data, flat_batch_data) + + return _gen(data), steps_per_epoch + + +def _make_enqueued_generator( + generator, + workers=1, + use_multiprocessing=False, + max_queue_size=10, + shuffle=False, +): + """Create a buffered queue of next elements of the generator.""" + is_sequence = isinstance(generator, data_utils.Sequence) + enqueuer = None + if workers > 0: + if is_sequence: + enqueuer = data_utils.OrderedEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + ) + else: + enqueuer = data_utils.GeneratorEnqueuer( + generator, use_multiprocessing=use_multiprocessing + ) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() else: - output_generator = generator - return output_generator, enqueuer + if is_sequence: + output_generator = data_utils.iter_sequence_infinite(generator) + else: + output_generator = generator + return output_generator, enqueuer def _make_execution_function(model, mode, class_weight=None): - """Makes function to run one step of model execution.""" - if mode == ModeKeys.TRAIN: - f = functools.partial(model.train_on_batch, class_weight=class_weight) - elif mode == ModeKeys.TEST: - f = model.test_on_batch - else: - # Match signature of other modes to allow - # 1, 2, or 3-tuples from generator - def predict_on_batch(x, y=None, sample_weights=None): # pylint: disable=unused-argument - return model.predict_on_batch(x) + """Makes function to run one step of model execution.""" + if mode == ModeKeys.TRAIN: + f = functools.partial(model.train_on_batch, class_weight=class_weight) + elif mode == ModeKeys.TEST: + f = model.test_on_batch + else: + # Match signature of other modes to allow + # 1, 2, or 3-tuples from generator + def predict_on_batch(x, y=None, sample_weights=None): + return model.predict_on_batch(x) - f = predict_on_batch + f = predict_on_batch - # Maintain stateful metrics across batch-level calls. - if mode != ModeKeys.PREDICT: - f = functools.partial(f, reset_metrics=False) + # Maintain stateful metrics across batch-level calls. + if mode != ModeKeys.PREDICT: + f = functools.partial(f, reset_metrics=False) - return f + return f def _get_num_samples_or_steps(data, steps_per_epoch): - """Returns number of samples or steps, and whether to use steps count mode.""" - flat_inputs = tf.nest.flatten(data) - if hasattr(flat_inputs[0], 'shape'): - return int(flat_inputs[0].shape[0]), False - return steps_per_epoch, True + """Returns number of samples or steps, and whether to use steps count + mode.""" + flat_inputs = tf.nest.flatten(data) + if hasattr(flat_inputs[0], "shape"): + return int(flat_inputs[0].shape[0]), False + return steps_per_epoch, True class GeneratorOrSequenceTrainingLoop(training_utils_v1.TrainingLoop): - """Generator-like. - - Input is Python generator, or Sequence object. - - The difference between this class and `GeneratorLikeTrainingFunction` is that - this class only handles inputs that with x, y and sample_weight fused into one - param. - """ - - def fit(self, - model, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x) - training_utils_v1.check_generator_arguments( - y, sample_weight, validation_split=validation_split) - return fit_generator( + """Generator-like. + + Input is Python generator, or Sequence object. + + The difference between this class and `GeneratorLikeTrainingFunction` is + that this class only handles inputs that with x, y and sample_weight fused + into one param. + """ + + def fit( + self, model, - x, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle, - initial_epoch=initial_epoch, - steps_name='steps_per_epoch') - - def evaluate(self, - model, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - model._validate_or_infer_batch_size(batch_size, steps, x) - training_utils_v1.check_generator_arguments(y, sample_weight) - return evaluate_generator( + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x) + training_utils_v1.check_generator_arguments( + y, sample_weight, validation_split=validation_split + ) + return fit_generator( + model, + x, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + validation_freq=validation_freq, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch, + steps_name="steps_per_epoch", + ) + + def evaluate( + self, model, - x, - steps=steps, - verbose=verbose, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - def predict(self, - model, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - model._validate_or_infer_batch_size(batch_size, steps, x) - return predict_generator( + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + model._validate_or_infer_batch_size(batch_size, steps, x) + training_utils_v1.check_generator_arguments(y, sample_weight) + return evaluate_generator( + model, + x, + steps=steps, + verbose=verbose, + callbacks=callbacks, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + ) + + def predict( + self, model, x, - steps=steps, - verbose=verbose, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + model._validate_or_infer_batch_size(batch_size, steps, x) + return predict_generator( + model, + x, + steps=steps, + verbose=verbose, + callbacks=callbacks, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + ) class EagerDatasetOrIteratorTrainingLoop(training_utils_v1.TrainingLoop): - """A non-distributed Dataset or iterator in eager execution.""" - - def fit(self, - model, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - **kwargs): - model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x) - # Make sure that y, sample_weights, validation_split are not passed. - training_utils_v1.validate_dataset_input(x, y, sample_weight, - validation_split) - if (isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)) and - shuffle): - training_utils_v1.verify_dataset_shuffled(x) - - return fit_generator( + """A non-distributed Dataset or iterator in eager execution.""" + + def fit( + self, + model, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + **kwargs, + ): + model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x) + # Make sure that y, sample_weights, validation_split are not passed. + training_utils_v1.validate_dataset_input( + x, y, sample_weight, validation_split + ) + if ( + isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)) + and shuffle + ): + training_utils_v1.verify_dataset_shuffled(x) + + return fit_generator( + model, + x, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + validation_freq=validation_freq, + class_weight=class_weight, + workers=0, + shuffle=shuffle, + initial_epoch=initial_epoch, + steps_name="steps_per_epoch", + ) + + def evaluate( + self, + model, + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + **kwargs, + ): + model._validate_or_infer_batch_size(batch_size, steps, x) + # Make sure that y, sample_weights, validation_split are not passed. + training_utils_v1.validate_dataset_input(x, y, sample_weight) + return evaluate_generator( + model, + x, + steps=steps, + verbose=verbose, + workers=0, + callbacks=callbacks, + ) + + def predict( + self, model, x, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - class_weight=class_weight, - workers=0, - shuffle=shuffle, - initial_epoch=initial_epoch, - steps_name='steps_per_epoch') - - def evaluate(self, - model, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - **kwargs): - model._validate_or_infer_batch_size(batch_size, steps, x) - # Make sure that y, sample_weights, validation_split are not passed. - training_utils_v1.validate_dataset_input(x, y, sample_weight) - return evaluate_generator( - model, x, steps=steps, verbose=verbose, workers=0, callbacks=callbacks) - - def predict(self, - model, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - **kwargs): - model._validate_or_infer_batch_size(batch_size, steps, x) - return predict_generator( - model, x, steps=steps, verbose=verbose, workers=0, callbacks=callbacks) + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + **kwargs, + ): + model._validate_or_infer_batch_size(batch_size, steps, x) + return predict_generator( + model, + x, + steps=steps, + verbose=verbose, + workers=0, + callbacks=callbacks, + ) class GeneratorLikeTrainingLoop(training_utils_v1.TrainingLoop): - """TrainingLoop that handle inputs like python generator. - - This is the default handler for most of the input data types, includes - symbolic tensors or Numpy array-like, Datasets and iterators in graph mode - (since they generate symbolic tensors). This Function is used to handle model - with `run_eagerly` = True. - """ - - def fit(self, - model, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - **kwargs): - batch_size = model._validate_or_infer_batch_size(batch_size, - steps_per_epoch, x) - x, y, sample_weights = model._standardize_user_data( - x, - y, - sample_weight=sample_weight, - class_weight=class_weight, - batch_size=batch_size, - check_steps=True, - steps_name='steps_per_epoch', - steps=steps_per_epoch, - validation_split=validation_split, - shuffle=shuffle) - - if validation_data: - validation_data = model._prepare_validation_data(validation_data, - batch_size, - validation_steps) - elif validation_split and 0. < validation_split < 1.: - (x, y, sample_weights, val_x, val_y, - val_sample_weights) = ( - training_utils_v1.split_training_and_validation_data( - x, y, sample_weights, validation_split)) - validation_data = (val_x, val_y, val_sample_weights) - else: - if validation_steps: - raise ValueError('`validation_steps` should not be specified if ' - '`validation_data` is None.') + """TrainingLoop that handle inputs like python generator. - return fit_generator( - model, (x, y, sample_weights), - steps_per_epoch=steps_per_epoch, - batch_size=batch_size, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - workers=0, - shuffle=shuffle, - initial_epoch=initial_epoch, - steps_name='steps_per_epoch') - - def evaluate(self, - model, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - **kwargs): - batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) - x, y, sample_weights = model._standardize_user_data( - x, - y, - sample_weight=sample_weight, - batch_size=batch_size, - check_steps=True, - steps_name='steps', - steps=steps) - return evaluate_generator( - model, (x, y, sample_weights), - steps=steps, - batch_size=batch_size, - verbose=verbose, - workers=0, - callbacks=callbacks) - - def predict(self, - model, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - **kwargs): - batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) - x, _, _ = model._standardize_user_data( - x, check_steps=True, steps_name='steps', steps=steps) - return predict_generator( + This is the default handler for most of the input data types, includes + symbolic tensors or Numpy array-like, Datasets and iterators in graph mode + (since they generate symbolic tensors). This Function is used to handle + model with `run_eagerly` = True. + """ + + def fit( + self, + model, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + **kwargs, + ): + batch_size = model._validate_or_infer_batch_size( + batch_size, steps_per_epoch, x + ) + x, y, sample_weights = model._standardize_user_data( + x, + y, + sample_weight=sample_weight, + class_weight=class_weight, + batch_size=batch_size, + check_steps=True, + steps_name="steps_per_epoch", + steps=steps_per_epoch, + validation_split=validation_split, + shuffle=shuffle, + ) + + if validation_data: + validation_data = model._prepare_validation_data( + validation_data, batch_size, validation_steps + ) + elif validation_split and 0.0 < validation_split < 1.0: + ( + x, + y, + sample_weights, + val_x, + val_y, + val_sample_weights, + ) = training_utils_v1.split_training_and_validation_data( + x, y, sample_weights, validation_split + ) + validation_data = (val_x, val_y, val_sample_weights) + else: + if validation_steps: + raise ValueError( + "`validation_steps` should not be specified if " + "`validation_data` is None." + ) + + return fit_generator( + model, + (x, y, sample_weights), + steps_per_epoch=steps_per_epoch, + batch_size=batch_size, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + validation_freq=validation_freq, + workers=0, + shuffle=shuffle, + initial_epoch=initial_epoch, + steps_name="steps_per_epoch", + ) + + def evaluate( + self, + model, + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + **kwargs, + ): + batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) + x, y, sample_weights = model._standardize_user_data( + x, + y, + sample_weight=sample_weight, + batch_size=batch_size, + check_steps=True, + steps_name="steps", + steps=steps, + ) + return evaluate_generator( + model, + (x, y, sample_weights), + steps=steps, + batch_size=batch_size, + verbose=verbose, + workers=0, + callbacks=callbacks, + ) + + def predict( + self, model, x, - steps=steps, - batch_size=batch_size, - verbose=verbose, - workers=0, - callbacks=callbacks) + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + **kwargs, + ): + batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) + x, _, _ = model._standardize_user_data( + x, check_steps=True, steps_name="steps", steps=steps + ) + return predict_generator( + model, + x, + steps=steps, + batch_size=batch_size, + verbose=verbose, + workers=0, + callbacks=callbacks, + ) diff --git a/keras/engine/training_gpu_test.py b/keras/engine/training_gpu_test.py index 0972670f9105..cfa3eb5b394c 100644 --- a/keras/engine/training_gpu_test.py +++ b/keras/engine/training_gpu_test.py @@ -14,113 +14,151 @@ # ============================================================================== """Tests for training routines.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np + from keras import backend -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.engine import input_layer from keras.engine import training from keras.layers.convolutional import Conv2D +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils class TrainingGPUTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_model_with_crossentropy_losses_channels_first(self): - """Tests use of all crossentropy losses with `channels_first`. - - Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, - and `binary_crossentropy`. - Verifies that evaluate gives the same result with either `channels_first` - or `channels_last` image_data_format. - """ - def prepare_simple_model(input_tensor, loss_name, target): - axis = 1 if backend.image_data_format() == 'channels_first' else -1 - loss = None - num_channels = None - activation = None - if loss_name == 'sparse_categorical_crossentropy': - loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy( # pylint: disable=g-long-lambda - y_true, y_pred, axis=axis) - num_channels = int(np.amax(target) + 1) - activation = 'softmax' - elif loss_name == 'categorical_crossentropy': - loss = lambda y_true, y_pred: backend.categorical_crossentropy( # pylint: disable=g-long-lambda - y_true, y_pred, axis=axis) - num_channels = target.shape[axis] - activation = 'softmax' - elif loss_name == 'binary_crossentropy': - loss = lambda y_true, y_pred: backend.binary_crossentropy( # pylint: disable=g-long-lambda, unnecessary-lambda - y_true, y_pred) - num_channels = target.shape[axis] - activation = 'sigmoid' - - predictions = Conv2D(num_channels, - 1, - activation=activation, - kernel_initializer='ones', - bias_initializer='ones')(input_tensor) - simple_model = training.Model(inputs=input_tensor, outputs=predictions) - simple_model.compile(optimizer='rmsprop', loss=loss) - return simple_model - - if tf.test.is_gpu_available(cuda_only=True): - with test_utils.use_gpu(): - losses_to_test = ['sparse_categorical_crossentropy', - 'categorical_crossentropy', 'binary_crossentropy'] - - data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55], - [0.9, 4.2, 11.2]]]], dtype=np.float32) - # Labels for testing 4-class sparse_categorical_crossentropy, 4-class - # categorical_crossentropy, and 2-class binary_crossentropy: - labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32), # pylint: disable=line-too-long - np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]], - [[1, 0, 0], [0, 0, 1], [0, 1, 0]], - [[0, 0, 0], [1, 0, 0], [0, 0, 1]], - [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32), # pylint: disable=line-too-long - np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]], - [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)] # pylint: disable=line-too-long - # Compute one loss for each loss function in the list `losses_to_test`: - loss_channels_last = [0., 0., 0.] - loss_channels_first = [0., 0., 0.] - - old_data_format = backend.image_data_format() - - # Evaluate a simple network with channels last, with all three loss - # functions: - backend.set_image_data_format('channels_last') - data = np.moveaxis(data_channels_first, 1, -1) - for index, loss_function in enumerate(losses_to_test): - labels = np.moveaxis(labels_channels_first[index], 1, -1) - inputs = input_layer.Input(shape=(3, 3, 1)) - model = prepare_simple_model(inputs, loss_function, labels) - loss_channels_last[index] = model.evaluate(x=data, y=labels, - batch_size=1, verbose=0) - - # Evaluate the same network with channels first, with all three loss - # functions: - backend.set_image_data_format('channels_first') - data = data_channels_first - for index, loss_function in enumerate(losses_to_test): - labels = labels_channels_first[index] - inputs = input_layer.Input(shape=(1, 3, 3)) - model = prepare_simple_model(inputs, loss_function, labels) - loss_channels_first[index] = model.evaluate(x=data, y=labels, - batch_size=1, verbose=0) - - backend.set_image_data_format(old_data_format) - - np.testing.assert_allclose( - loss_channels_first, - loss_channels_last, - rtol=1e-06, - err_msg='{}{}'.format('Computed different losses for ', - 'channels_first and channels_last')) - - -if __name__ == '__main__': - tf.test.main() + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_model_with_crossentropy_losses_channels_first(self): + """Tests use of all crossentropy losses with `channels_first`. + + Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, + and `binary_crossentropy`. + Verifies that evaluate gives the same result with either + `channels_first` or `channels_last` image_data_format. + """ + + def prepare_simple_model(input_tensor, loss_name, target): + axis = 1 if backend.image_data_format() == "channels_first" else -1 + loss = None + num_channels = None + activation = None + if loss_name == "sparse_categorical_crossentropy": + loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy( # noqa: E501 + y_true, y_pred, axis=axis + ) + num_channels = int(np.amax(target) + 1) + activation = "softmax" + elif loss_name == "categorical_crossentropy": + loss = lambda y_true, y_pred: backend.categorical_crossentropy( + y_true, y_pred, axis=axis + ) + num_channels = target.shape[axis] + activation = "softmax" + elif loss_name == "binary_crossentropy": + loss = lambda y_true, y_pred: backend.binary_crossentropy( + y_true, y_pred + ) + num_channels = target.shape[axis] + activation = "sigmoid" + + predictions = Conv2D( + num_channels, + 1, + activation=activation, + kernel_initializer="ones", + bias_initializer="ones", + )(input_tensor) + simple_model = training.Model( + inputs=input_tensor, outputs=predictions + ) + simple_model.compile(optimizer="rmsprop", loss=loss) + return simple_model + + if tf.test.is_gpu_available(cuda_only=True): + with test_utils.use_gpu(): + losses_to_test = [ + "sparse_categorical_crossentropy", + "categorical_crossentropy", + "binary_crossentropy", + ] + + data_channels_first = np.array( + [[[[8.0, 7.1, 0.0], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]], + dtype=np.float32, + ) + # Labels for testing 4-class sparse_categorical_crossentropy, + # 4-class categorical_crossentropy, and 2-class + # binary_crossentropy: + labels_channels_first = [ + np.array( + [[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32 + ), + np.array( + [ + [ + [[0, 1, 0], [0, 1, 0], [0, 0, 0]], + [[1, 0, 0], [0, 0, 1], [0, 1, 0]], + [[0, 0, 0], [1, 0, 0], [0, 0, 1]], + [[0, 0, 1], [0, 0, 0], [1, 0, 0]], + ] + ], + dtype=np.float32, + ), + np.array( + [ + [ + [[0, 1, 0], [0, 1, 0], [0, 0, 1]], + [[1, 0, 1], [1, 0, 1], [1, 1, 0]], + ] + ], + dtype=np.float32, + ), + ] + # Compute one loss for each loss function in the list + # `losses_to_test`: + loss_channels_last = [0.0, 0.0, 0.0] + loss_channels_first = [0.0, 0.0, 0.0] + + old_data_format = backend.image_data_format() + + # Evaluate a simple network with channels last, with all three + # loss functions: + backend.set_image_data_format("channels_last") + data = np.moveaxis(data_channels_first, 1, -1) + for index, loss_function in enumerate(losses_to_test): + labels = np.moveaxis(labels_channels_first[index], 1, -1) + inputs = input_layer.Input(shape=(3, 3, 1)) + model = prepare_simple_model(inputs, loss_function, labels) + loss_channels_last[index] = model.evaluate( + x=data, y=labels, batch_size=1, verbose=0 + ) + + # Evaluate the same network with channels first, with all three + # loss functions: + backend.set_image_data_format("channels_first") + data = data_channels_first + for index, loss_function in enumerate(losses_to_test): + labels = labels_channels_first[index] + inputs = input_layer.Input(shape=(1, 3, 3)) + model = prepare_simple_model(inputs, loss_function, labels) + loss_channels_first[index] = model.evaluate( + x=data, y=labels, batch_size=1, verbose=0 + ) + + backend.set_image_data_format(old_data_format) + + np.testing.assert_allclose( + loss_channels_first, + loss_channels_last, + rtol=1e-06, + err_msg="{}{}".format( + "Computed different losses for ", + "channels_first and channels_last", + ), + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training_integration_test.py b/keras/engine/training_integration_test.py index f3516718ad12..8b6050c396bc 100644 --- a/keras/engine/training_integration_test.py +++ b/keras/engine/training_integration_test.py @@ -14,13 +14,12 @@ # ============================================================================== """End-to-end tests for a variety of small models.""" -import tensorflow.compat.v2 as tf - import collections import itertools -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras.testing_infra import test_combinations @@ -28,8 +27,8 @@ def _conv2d_filter(**kwargs): - """Convolution with non-default strides and dilation rate is not supported.""" - return kwargs['strides'] <= 1 or kwargs['dilation_rate'] <= 1 + """Conv with non-default strides and dilation rate is not supported.""" + return kwargs["strides"] <= 1 or kwargs["dilation_rate"] <= 1 # Scheme: (layer_class, data_shape, fuzz_dims, constructor_args, filter_fn) @@ -51,147 +50,211 @@ def _conv2d_filter(**kwargs): # constructor args, and prevents generation of contradictory combinations. # A True return value indicates a valid test. _LAYERS_TO_TEST = [ - (keras.layers.Dense, (1,), (False,), collections.OrderedDict([ - ('units', [1])]), None), - (keras.layers.Activation, (2, 2), (True, True), collections.OrderedDict([ - ('activation', ['relu'])]), None), - (keras.layers.Dropout, (16,), (False,), collections.OrderedDict([ - ('rate', [0.25])]), None), - (keras.layers.BatchNormalization, (8, 8, 3), (True, True, False), - collections.OrderedDict([ - ('axis', [3]), - ('center', [True, False]), - ('scale', [True, False]) - ]), None), - (keras.layers.Conv1D, (8, 8), (False, False), collections.OrderedDict([ - ('filters', [1]), - ('kernel_size', [1, 3]), - ('strides', [1, 2]), - ('padding', ['valid', 'same']), - ('use_bias', [True]), - ('kernel_regularizer', ['l2']), - ('data_format', ['channels_last']) - ]), None), - (keras.layers.Conv2D, (8, 8, 3), (True, True, False), - collections.OrderedDict([ - ('filters', [1]), - ('kernel_size', [1, 3]), - ('strides', [1, 2]), - ('padding', ['valid', 'same']), - ('use_bias', [True, False]), - ('kernel_regularizer', ['l2']), - ('dilation_rate', [1, 2]), - ('data_format', ['channels_last']) - ]), _conv2d_filter), - (keras.layers.LSTM, (4, 4), (False, False), collections.OrderedDict([ - ('units', [1]), - ('kernel_regularizer', ['l2']), - ('dropout', [0, 0.5]), - ('stateful', [True, False]), - ('unroll', [True, False]), - ('return_sequences', [True, False]) - ]), None), + ( + keras.layers.Dense, + (1,), + (False,), + collections.OrderedDict([("units", [1])]), + None, + ), + ( + keras.layers.Activation, + (2, 2), + (True, True), + collections.OrderedDict([("activation", ["relu"])]), + None, + ), + ( + keras.layers.Dropout, + (16,), + (False,), + collections.OrderedDict([("rate", [0.25])]), + None, + ), + ( + keras.layers.BatchNormalization, + (8, 8, 3), + (True, True, False), + collections.OrderedDict( + [("axis", [3]), ("center", [True, False]), ("scale", [True, False])] + ), + None, + ), + ( + keras.layers.Conv1D, + (8, 8), + (False, False), + collections.OrderedDict( + [ + ("filters", [1]), + ("kernel_size", [1, 3]), + ("strides", [1, 2]), + ("padding", ["valid", "same"]), + ("use_bias", [True]), + ("kernel_regularizer", ["l2"]), + ("data_format", ["channels_last"]), + ] + ), + None, + ), + ( + keras.layers.Conv2D, + (8, 8, 3), + (True, True, False), + collections.OrderedDict( + [ + ("filters", [1]), + ("kernel_size", [1, 3]), + ("strides", [1, 2]), + ("padding", ["valid", "same"]), + ("use_bias", [True, False]), + ("kernel_regularizer", ["l2"]), + ("dilation_rate", [1, 2]), + ("data_format", ["channels_last"]), + ] + ), + _conv2d_filter, + ), + ( + keras.layers.LSTM, + (4, 4), + (False, False), + collections.OrderedDict( + [ + ("units", [1]), + ("kernel_regularizer", ["l2"]), + ("dropout", [0, 0.5]), + ("stateful", [True, False]), + ("unroll", [True, False]), + ("return_sequences", [True, False]), + ] + ), + None, + ), ] def _gather_test_cases(): - cases = [] - for layer_type, inp_shape, fuzz_dims, arg_dict, filter_fn in _LAYERS_TO_TEST: - arg_combinations = [[(k, i) for i in v] for k, v in arg_dict.items()] # pylint: disable=g-complex-comprehension - for arguments in itertools.product(*arg_combinations): - layer_kwargs = {k: v for k, v in arguments} - if filter_fn is not None and not filter_fn(**layer_kwargs): - continue - - name = '_{}_{}'.format(layer_type.__name__, - '_'.join('{}_{}'.format(*i) for i in arguments)) - cases.append((name, layer_type, inp_shape, fuzz_dims, layer_kwargs)) - return cases + cases = [] + for ( + layer_type, + inp_shape, + fuzz_dims, + arg_dict, + filter_fn, + ) in _LAYERS_TO_TEST: + arg_combinations = [[(k, i) for i in v] for k, v in arg_dict.items()] + for arguments in itertools.product(*arg_combinations): + layer_kwargs = {k: v for k, v in arguments} + if filter_fn is not None and not filter_fn(**layer_kwargs): + continue + + name = "_{}_{}".format( + layer_type.__name__, + "_".join("{}_{}".format(*i) for i in arguments), + ) + cases.append((name, layer_type, inp_shape, fuzz_dims, layer_kwargs)) + return cases OUTPUT_TEST_CASES = _gather_test_cases() class CoreLayerIntegrationTest(test_combinations.TestCase): - """Test that layers and models produce the correct tensor types.""" - - # In v1 graph there are only symbolic tensors. - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @parameterized.named_parameters(*OUTPUT_TEST_CASES) - def test_layer_output_type(self, layer_to_test, input_shape, _, layer_kwargs): - layer = layer_to_test(**layer_kwargs) - - input_data = np.ones(shape=(2,) + input_shape, dtype=np.float32) - layer_result = layer(input_data) - - inp = keras.layers.Input(shape=input_shape, batch_size=2) - model = keras.models.Model(inp, layer_to_test(**layer_kwargs)(inp)) - model_result = model(input_data) - - for x in [layer_result, model_result]: - if not isinstance(x, tf.Tensor): - raise ValueError('Tensor or EagerTensor expected, got type {}' - .format(type(x))) - - if isinstance(x, tf.__internal__.EagerTensor) != tf.executing_eagerly(): - expected_type = (tf.__internal__.EagerTensor if tf.executing_eagerly() - else tf.Tensor) - raise ValueError('Expected type {}, got type {}' - .format(expected_type, type(x))) - - def _run_fit_eval_predict(self, layer_to_test, input_shape, data_shape, - layer_kwargs): - batch_size = 2 - run_eagerly = test_utils.should_run_eagerly() - - def map_fn(_): - x = keras.backend.random_uniform(shape=data_shape) - y = keras.backend.random_uniform(shape=(1,)) - return x, y - - dataset = tf.data.Dataset.range(4).map(map_fn).batch(batch_size) - - inp = keras.layers.Input(shape=input_shape, batch_size=batch_size) - layer = layer_to_test(**layer_kwargs)(inp) - - # Condense the output down to a single scalar. - layer = keras.layers.Flatten()(layer) - layer = keras.layers.Lambda( - lambda x: tf.reduce_mean(x, keepdims=True))(layer) - layer = keras.layers.Dense(1, activation=None)(layer) - model = keras.models.Model(inp, layer) - - model.compile(loss='mse', optimizer='sgd', run_eagerly=run_eagerly) - model.fit(dataset, verbose=2, epochs=2) - - model.compile(loss='mse', optimizer='sgd', run_eagerly=run_eagerly) - model.fit(dataset.repeat(2), verbose=2, epochs=2, steps_per_epoch=2) - - eval_dataset = tf.data.Dataset.range(4).map(map_fn).batch(batch_size) - model.evaluate(eval_dataset, verbose=2) - - def pred_map_fn(_): - return keras.backend.random_uniform(shape=data_shape) - - pred_dataset = tf.data.Dataset.range(4) - pred_dataset = pred_dataset.map(pred_map_fn).batch(batch_size) - model.predict(pred_dataset, verbose=2) - - @test_combinations.run_all_keras_modes(always_skip_v1=False) - @parameterized.named_parameters(*OUTPUT_TEST_CASES) - def test_model_loops(self, layer_to_test, input_shape, fuzz_dims, - layer_kwargs): - self._run_fit_eval_predict(layer_to_test, input_shape, - input_shape, layer_kwargs) - - if any(fuzz_dims): - fuzzed_shape = [] - for dim, should_fuzz in zip(input_shape, fuzz_dims): - fuzzed_shape.append(None if should_fuzz else dim) - - self._run_fit_eval_predict(layer_to_test, fuzzed_shape, - input_shape, layer_kwargs) - - -if __name__ == '__main__': - tf.test.main() + """Test that layers and models produce the correct tensor types.""" + + # In v1 graph there are only symbolic tensors. + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters(*OUTPUT_TEST_CASES) + def test_layer_output_type( + self, layer_to_test, input_shape, _, layer_kwargs + ): + layer = layer_to_test(**layer_kwargs) + + input_data = np.ones(shape=(2,) + input_shape, dtype=np.float32) + layer_result = layer(input_data) + + inp = keras.layers.Input(shape=input_shape, batch_size=2) + model = keras.models.Model(inp, layer_to_test(**layer_kwargs)(inp)) + model_result = model(input_data) + + for x in [layer_result, model_result]: + if not isinstance(x, tf.Tensor): + raise ValueError( + f"Tensor or EagerTensor expected, got type {type(x)}" + ) + + if ( + isinstance(x, tf.__internal__.EagerTensor) + != tf.executing_eagerly() + ): + expected_type = ( + tf.__internal__.EagerTensor + if tf.executing_eagerly() + else tf.Tensor + ) + raise ValueError( + f"Expected type {expected_type}, got type {type(x)}" + ) + + def _run_fit_eval_predict( + self, layer_to_test, input_shape, data_shape, layer_kwargs + ): + batch_size = 2 + run_eagerly = test_utils.should_run_eagerly() + + def map_fn(_): + x = keras.backend.random_uniform(shape=data_shape) + y = keras.backend.random_uniform(shape=(1,)) + return x, y + + dataset = tf.data.Dataset.range(4).map(map_fn).batch(batch_size) + + inp = keras.layers.Input(shape=input_shape, batch_size=batch_size) + layer = layer_to_test(**layer_kwargs)(inp) + + # Condense the output down to a single scalar. + layer = keras.layers.Flatten()(layer) + layer = keras.layers.Lambda(lambda x: tf.reduce_mean(x, keepdims=True))( + layer + ) + layer = keras.layers.Dense(1, activation=None)(layer) + model = keras.models.Model(inp, layer) + + model.compile(loss="mse", optimizer="sgd", run_eagerly=run_eagerly) + model.fit(dataset, verbose=2, epochs=2) + + model.compile(loss="mse", optimizer="sgd", run_eagerly=run_eagerly) + model.fit(dataset.repeat(2), verbose=2, epochs=2, steps_per_epoch=2) + + eval_dataset = tf.data.Dataset.range(4).map(map_fn).batch(batch_size) + model.evaluate(eval_dataset, verbose=2) + + def pred_map_fn(_): + return keras.backend.random_uniform(shape=data_shape) + + pred_dataset = tf.data.Dataset.range(4) + pred_dataset = pred_dataset.map(pred_map_fn).batch(batch_size) + model.predict(pred_dataset, verbose=2) + + @test_combinations.run_all_keras_modes(always_skip_v1=False) + @parameterized.named_parameters(*OUTPUT_TEST_CASES) + def test_model_loops( + self, layer_to_test, input_shape, fuzz_dims, layer_kwargs + ): + self._run_fit_eval_predict( + layer_to_test, input_shape, input_shape, layer_kwargs + ) + + if any(fuzz_dims): + fuzzed_shape = [] + for dim, should_fuzz in zip(input_shape, fuzz_dims): + fuzzed_shape.append(None if should_fuzz else dim) + + self._run_fit_eval_predict( + layer_to_test, fuzzed_shape, input_shape, layer_kwargs + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training_test.py b/keras/engine/training_test.py index 3227b076adb2..579367c3c24d 100644 --- a/keras/engine/training_test.py +++ b/keras/engine/training_test.py @@ -20,7 +20,10 @@ import sys import tempfile +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras import backend from keras import layers as layers_module @@ -32,4267 +35,5096 @@ from keras.engine import training as training_module from keras.engine import training_utils_v1 from keras.layers.preprocessing import string_lookup -from keras.optimizers import optimizer_v2 -from keras.optimizers.optimizer_experimental import sgd as sgd_experimental +from keras.mixed_precision import policy +from keras.optimizers import legacy as optimizer_legacy +from keras.optimizers import rmsprop +from keras.optimizers import sgd as sgd_experimental from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import data_utils from keras.utils import io_utils from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training.rmsprop import RMSPropOptimizer +from tensorflow.python.training.rmsprop import ( + RMSPropOptimizer, +) try: - import scipy.sparse as scipy_sparse # pylint: disable=g-import-not-at-top + import scipy.sparse as scipy_sparse except ImportError: - scipy_sparse = None + scipy_sparse = None class TrainingTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_model_instrumentation(self): - layers = [ - layers_module.Dense(10, dtype=np.float64), - layers_module.Dense(10, dtype=np.float64) - ] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - - self.assertTrue(model._instrumented_keras_api) - self.assertTrue(model._instrumented_keras_model_class) - self.assertFalse(model._instrumented_keras_layer_class) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_fit_training_arg(self): - - class ReturnTraining(layers_module.Layer): - - def call(self, inputs, training): - if training: - return inputs + tf.constant([100], 'float32') - else: - return inputs + tf.constant([0], 'float32') - - model = sequential.Sequential([ReturnTraining()]) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - hist = model.fit(x=np.array([0.]), y=np.array([0.])) - self.assertAllClose(hist.history['loss'][0], 10000) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_fit_on_empty(self): - model = sequential.Sequential([layers_module.Dense(1)]) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - with self.assertRaisesRegex(ValueError, - 'Unexpected result of `train_function`.*'): - model.fit(x=np.array([]), y=np.array([])) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_compile_fit_with_jit_compile(self): - # Test with jit_compile = True - model = sequential.Sequential([layers_module.Dense(1)]) - model.compile( - 'sgd', loss='mse', run_eagerly=False, jit_compile=True) - x, y = np.ones((10, 1)), np.ones((10, 1)) - model.fit(x, y, epochs=2) - # Test fcompile fit for a RNN model - model = sequential.Sequential() - model.add( - layers_module.TimeDistributed( - layers_module.Embedding(5, 6, mask_zero=True), - input_shape=(None, None))) # N by t_1 by t_2 by 6 - model.add( - layers_module.TimeDistributed( - layers_module.SimpleRNN(7, return_sequences=True))) - model.add( - layers_module.TimeDistributed( - layers_module.SimpleRNN(8, return_sequences=False))) - model.add(layers_module.SimpleRNN(1, return_sequences=False)) - model.compile(optimizer='sgd', loss='mse', jit_compile=True) - model_input = np.random.randint( - low=1, high=5, size=(10, 3, 4), dtype='int32') - for i in range(4): - model_input[i, i:, i:] = 0 - model.fit(model_input, np.random.random((10, 1)), epochs=1, batch_size=10) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_compile_fit_evaluate_predict_with_mirrored_strategy(self): - # Test with jit_compile = True - strategy = tf.distribute.MirroredStrategy() - with strategy.scope(): - model = sequential.Sequential([layers_module.Dense(1)]) - model.compile('sgd', loss='mse', run_eagerly=False, jit_compile=True) - x, y = np.ones((10, 1)), np.ones((10, 1)) - model.fit(x, y, epochs=2) - model.evaluate(x, y) - model.predict(x) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_verify_xla_compile_with_jit_compile(self): - vocab_data = ['earth', 'wind', 'and', 'fire'] - input_array = np.array([['earth', 'wind', 'and', 'fire'], - ['fire', 'and', 'earth', 'michigan']]) - expected_output = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) - strategy = tf.distribute.MirroredStrategy() - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup(vocabulary=vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - model.compile('sgd', loss='mse', run_eagerly=False, jit_compile=True) - # Added a string op unsupported by XLA compiler to make sure that an - # error is thrown, This ensures that the graph is indeed being compiled - # using XLA - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - 'Graph execution error'): - model.fit(input_array, expected_output, epochs=1) - model.predict(input_array) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_jit_compile_for_compile_evaluate_predict(self): - # Test with jit_compile = True for model.compile(), model.evaluate(), - # model.predict() - model = sequential.Sequential([layers_module.Dense(1)]) - self.assertIsNone(model._jit_compile) - model.compile('sgd', loss='mse', run_eagerly=False, jit_compile=True) - self.assertTrue(model._jit_compile) - x, y = np.ones((10, 1)), np.ones((10, 1)) - model.fit(x, y, epochs=2) - model.evaluate(x, y) - model.predict(x) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_fit_without_loss_at_compile(self): - model = sequential.Sequential([layers_module.Dense(1)]) - model.compile('sgd', run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 1)), np.ones((10, 1)) - with self.assertRaisesRegex(ValueError, 'No loss found..*'): - model.fit(x, y, epochs=2) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_fit_without_loss_at_compile_but_with_add_loss(self): - - class MyModel(sequential.Sequential): - - def call(self, x): - self.add_loss(tf.reduce_sum(x)) - return x - - model = MyModel([layers_module.Dense(1)]) - model.compile('sgd', run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 1)), np.ones((10, 1)) - model.fit(x, y, epochs=2) - - @test_combinations.run_all_keras_modes - def test_run_eagerly_setting(self): - model = sequential.Sequential([layers_module.Dense(1)]) - run_eagerly = test_utils.should_run_eagerly() - model.compile('sgd', 'mse', run_eagerly=run_eagerly) - self.assertEqual(model.run_eagerly, run_eagerly) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @parameterized.named_parameters( - ('train_on_batch', 'train_on_batch'), - ('test_on_batch', 'test_on_batch'), - ('predict_on_batch', 'predict_on_batch'), - ('fit', 'fit'), - ('evaluate', 'evaluate'), - ('predict', 'predict'), - ) - def test_disallow_methods_inside_tf_function(self, method_name): - model = sequential.Sequential([layers_module.Dense(1)]) - run_eagerly = test_utils.should_run_eagerly() - model.compile('sgd', 'mse', run_eagerly=run_eagerly) - - @tf.function - def my_fn(): - getattr(model, method_name)(1) - - error_msg = 'inside a `tf.function`' - with self.assertRaisesRegex(RuntimeError, error_msg): - my_fn() - - @test_combinations.run_all_keras_modes - def test_fit_and_validate_learning_phase(self): - - class ReturnTraining(layers_module.Layer): - - def call(self, inputs): - return backend.in_train_phase(lambda: tf.ones_like(inputs), - lambda: tf.zeros_like(inputs)) - - model = sequential.Sequential([ReturnTraining(input_shape=(2,))]) - model.compile( - 'sgd', - loss='mae', - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.ones((40, 2), dtype=np.float32) - targets = np.ones((40, 1), dtype=np.float32) - - # Test correctness with `steps_per_epoch`. - train_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - val_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - history = model.fit( - train_dataset, epochs=2, verbose=1, validation_data=val_dataset) - - # The training loss should be 0.0 - self.assertAllClose(history.history['loss'][0], 0.0) - # The validation loss should be 1.0. - self.assertAllClose(history.history['val_loss'][0], 1.0) - - @test_combinations.run_all_keras_modes( - always_skip_v1=True) - def test_warn_on_evaluate(self): - i = layers_module.Input((1,)) - x = np.ones((100, 1)) - y = np.ones((100, 1)) - sample_weight = np.ones((100,)) - model = training_module.Model(i, i) - model.compile(loss='mse', metrics=['mse']) - - logging.set_verbosity(2) - with self.assertLogs(level=2) as logs: - model.evaluate(x, y, sample_weight=sample_weight) - self.assertTrue( - any('`evaluate()` received a value for `sample_weight`' in log - for log in logs.output)) - - @test_combinations.run_all_keras_modes( - always_skip_v1=True) - def test_sample_weight_warning_disable(self): - i = layers_module.Input((1,)) - x = np.ones((100, 1)) - y = np.ones((100, 1)) - sample_weight = np.ones((100,)) - model = training_module.Model(i, i) - model.compile(loss='mse', metrics=['mse'], weighted_metrics=[]) - - logging.set_verbosity(2) - with self.assertLogs(level=2) as logs: - model.evaluate(x, y, sample_weight=sample_weight) - self.assertFalse( - any('`evaluate()` received a value for `sample_weight`' in log - for log in logs.output)) - - @test_combinations.run_all_keras_modes( - always_skip_v1=True) - def test_warn_on_evaluate_with_tf_dataset(self): - i = layers_module.Input((1,)) - - x = tf.ones((100, 1), tf.float32) - y = tf.ones((100, 1), tf.float32) - sample_weight = tf.ones((100,), dtype=tf.float32) - val_dataset = tf.data.Dataset.from_tensor_slices( - (x, y, sample_weight)).batch(10) - model = training_module.Model(i, i) - model.compile(loss='mse', metrics=['mse']) - - logging.set_verbosity(2) - with self.assertLogs(level=2) as logs: - model.evaluate(val_dataset) - self.assertTrue( - any('`evaluate()` received a value for `sample_weight`' in log - for log in logs.output)) - - @test_combinations.run_all_keras_modes - def test_fit_and_validate_training_arg(self): - - class ReturnTraining(layers_module.Layer): - - def call(self, inputs, training=None): - return backend.in_train_phase( - lambda: tf.ones_like(inputs), - lambda: tf.zeros_like(inputs), - training=training) - - model = sequential.Sequential([ReturnTraining(input_shape=(2,))]) - model.compile( - 'sgd', - loss='mae', - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.ones((40, 2), dtype=np.float32) - targets = np.ones((40, 1), dtype=np.float32) - - # Test correctness with `steps_per_epoch`. - train_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - val_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - history = model.fit( - train_dataset, epochs=2, verbose=1, validation_data=val_dataset) - - # The training loss should be 0.0 - self.assertAllClose(history.history['loss'][0], 0.0) - # The validation loss should be 1.0. - self.assertAllClose(history.history['val_loss'][0], 1.0) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_target_dtype_matches_output(self): - - def loss_fn(labels, preds): - self.assertEqual(labels.dtype, preds.dtype) - return labels - preds - - layers = [ - layers_module.Dense(10, dtype=np.float64), - layers_module.Dense(10, dtype=np.float64) - ] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - inputs = np.ones(shape=(10, 1), dtype=np.float64) - targets = np.ones(shape=(10, 1), dtype=np.float64) - model.compile( - 'sgd', - loss=loss_fn, - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(inputs, targets) - model.test_on_batch(inputs, targets) - self.assertEqual(model.predict(inputs).dtype, np.float64) - - @test_combinations.run_all_keras_modes - def test_fit_and_validate_nested_training_arg(self): - - class NestedReturnTraining(layers_module.Layer): - - def call(self, inputs, training=None): - return backend.in_train_phase( - lambda: tf.ones_like(inputs), - lambda: tf.zeros_like(inputs), - training=training) - - class ReturnTraining(layers_module.Layer): - - def __init__(self, input_shape=None, **kwargs): - super().__init__(input_shape=input_shape, **kwargs) - self._nested_layer = None - - def build(self, input_shape): - self._nested_layer = NestedReturnTraining() - self.built = True - - def call(self, inputs): - return self._nested_layer(inputs) - - model = sequential.Sequential([ReturnTraining(input_shape=(2,))]) - model.compile( - 'sgd', - loss='mae', - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.ones((40, 2), dtype=np.float32) - targets = np.ones((40, 1), dtype=np.float32) - - # Test correctness with `steps_per_epoch`. - train_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - val_dataset = tf.data.Dataset.from_tensor_slices( - (inputs, targets)).batch(10) - history = model.fit( - train_dataset, epochs=2, verbose=1, validation_data=val_dataset) - - # The training loss should be 0.0 - self.assertAllClose(history.history['loss'][0], 0.0) - # The validation loss should be 1.0. - self.assertAllClose(history.history['val_loss'][0], 1.0) - - @test_combinations.run_with_all_model_types(exclude_models='sequential') - @test_combinations.run_all_keras_modes - def test_fit_on_arrays(self): - input_a = layers_module.Input(shape=(3,), name='input_a') - input_b = layers_module.Input(shape=(3,), name='input_b') - - dense = layers_module.Dense(4, name='dense') - dropout = layers_module.Dropout(0.5, name='dropout') - branch_a = [input_a, dense] - branch_b = [input_b, dense, dropout] - - model = test_utils.get_multi_io_model(branch_a, branch_b) - - optimizer = RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - loss_weights = [1., 0.5] - model.compile( - optimizer, - loss, - metrics=[metrics_module.CategoricalAccuracy(), 'mae'], - loss_weights=loss_weights, - run_eagerly=test_utils.should_run_eagerly()) - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_d_np = np.random.random((10, 4)) - output_e_np = np.random.random((10, 4)) - - # Test fit at different verbosity - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5, - verbose=0) - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5, - verbose=1) - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=2, - batch_size=5, - verbose=2) - model.train_on_batch([input_a_np, input_b_np], [output_d_np, output_e_np]) - - # Test with validation data - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - validation_data=([input_a_np, input_b_np], [output_d_np, - output_e_np]), - epochs=1, - batch_size=5, - verbose=0) - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - validation_data=([input_a_np, input_b_np], [output_d_np, - output_e_np]), - epochs=2, - batch_size=5, - verbose=1) - model.fit([input_a_np, input_b_np], [output_d_np, output_e_np], - validation_data=([input_a_np, - input_b_np], [output_d_np, output_e_np]), - epochs=2, - batch_size=5, - verbose=2) - model.fit([input_a_np, input_b_np], [output_d_np, output_e_np], - validation_data=[[input_a_np, input_b_np], - [output_d_np, output_e_np]], - epochs=2, - batch_size=5, - verbose=2) - # Test with validation split - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=2, - batch_size=5, - verbose=0, - validation_split=0.2) - - if test_utils.get_model_type() == 'functional': - # Test with dictionary inputs - model.fit( - { - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }, - epochs=1, - batch_size=5, - verbose=0) - model.fit( - { - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }, - epochs=1, - batch_size=5, - verbose=1) - model.fit( - { - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }, - validation_data=({ - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }), - epochs=1, - batch_size=5, - verbose=0) - model.train_on_batch({ - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }) - - # Test with lists for loss, metrics - loss = ['mae', 'mse'] - model.compile( - optimizer, - loss, - metrics=[metrics_module.CategoricalAccuracy(), 'mae'], - run_eagerly=test_utils.should_run_eagerly()) - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5, - verbose=0) - - # Test with dictionaries for loss, metrics, loss weights - if test_utils.get_model_type() == 'functional': - loss = {'dense': 'mse', 'dropout': 'mae'} - loss_weights = {'dense': 1., 'dropout': 0.5} - metrics = { - 'dense': 'mse', - 'dropout': metrics_module.CategoricalAccuracy() - } - model.compile( - optimizer, - loss, - metrics=metrics, - loss_weights=loss_weights, - run_eagerly=test_utils.should_run_eagerly()) - model.fit( - [input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5, - verbose=0) - - # Build single-input model - x = layers_module.Input(shape=(3,), name='input_a') - y = layers_module.Dense(4)(x) - model = training_module.Model(x, y) - model.compile( - optimizer, - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - # This will work - model.fit([input_a_np], output_d_np, epochs=1) - - # Test model on a list of floats - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 4)) - - # Test execution on inputs that are lists of scalars. - # TF2 and TF1 have slightly different semantics: - if tf.executing_eagerly(): - # In TF2 to avoid any ambiguity when there are nested lists - # the entire input gets converted to a - # single numpy array (& it only works in the case of a single io model) - model.fit(np.ndarray.tolist(input_a_np), + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def test_model_instrumentation(self): + layers = [ + layers_module.Dense(10, dtype=np.float64), + layers_module.Dense(10, dtype=np.float64), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + + self.assertTrue(model._instrumented_keras_api) + self.assertTrue(model._instrumented_keras_model_class) + self.assertFalse(model._instrumented_keras_layer_class) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_fit_training_arg(self): + class ReturnTraining(layers_module.Layer): + def call(self, inputs, training): + if training: + return inputs + tf.constant([100], "float32") + else: + return inputs + tf.constant([0], "float32") + + model = sequential.Sequential([ReturnTraining()]) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + hist = model.fit(x=np.array([0.0]), y=np.array([0.0])) + self.assertAllClose(hist.history["loss"][0], 10000) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_fit_on_empty(self): + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + with self.assertRaisesRegex( + ValueError, "Expected input data to be non-empty." + ): + model.fit(x=np.array([]), y=np.array([])) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_compile_fit_with_jit_compile(self): + # Test with jit_compile = True + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile("sgd", loss="mse", run_eagerly=False, jit_compile=True) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + # Test fcompile fit for a RNN model + model = sequential.Sequential() + model.add( + layers_module.TimeDistributed( + layers_module.Embedding(5, 6, mask_zero=True), + input_shape=(None, None), + ) + ) # N by t_1 by t_2 by 6 + model.add( + layers_module.TimeDistributed( + layers_module.SimpleRNN(7, return_sequences=True) + ) + ) + model.add( + layers_module.TimeDistributed( + layers_module.SimpleRNN(8, return_sequences=False) + ) + ) + model.add(layers_module.SimpleRNN(1, return_sequences=False)) + model.compile(optimizer="sgd", loss="mse", jit_compile=True) + model_input = np.random.randint( + low=1, high=5, size=(10, 3, 4), dtype="int32" + ) + for i in range(4): + model_input[i, i:, i:] = 0 + model.fit( + model_input, np.random.random((10, 1)), epochs=1, batch_size=10 + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_compile_fit_evaluate_predict_with_mirrored_strategy(self): + # Test with jit_compile = True + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile("sgd", loss="mse", run_eagerly=False, jit_compile=True) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + model.evaluate(x, y) + model.predict(x) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_distribution_reduction_method_sum_default_train_step(self): + + strategy = tf.distribute.MirroredStrategy( + ["/cpu:1", "/cpu:2", "/cpu:3", "/cpu:4"] + ) + BATCH_SIZE = 10 + + # A model that always outputs `1`: + with strategy.scope(): + inputs = layers_module.Input(shape=(1,), name="my_input") + outputs = layers_module.Dense( + units=1, kernel_initializer="zeros", bias_initializer="ones" + )(inputs) + model = training_module.Model(inputs, outputs) + + model.trainable = False + model.compile(optimizer="sgd", loss="mean_absolute_error") + + # Data points are always equal to `2`: + x, y = 2 * np.ones((40, 1)), 2 * np.ones((40, 1)) + + # For every output x_i = 1, every target y_i = 2, + # loss_i = |1-2| = 1; and + # loss_total = sum([1, 1, ..., 1]) / BATCH_SIZE = 1.0 + history = model.fit(x, y, epochs=1, batch_size=BATCH_SIZE) + self.assertAllClose(history.history["loss"][-1], 1.0) + + eval_output = model.evaluate(x, y, batch_size=BATCH_SIZE) + self.assertAllClose(eval_output, 1.0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_distribution_reduction_method_sum_custom_train_step(self): + + strategy = tf.distribute.MirroredStrategy( + ["/cpu:1", "/cpu:2", "/cpu:3", "/cpu:4"] + ) + BATCH_SIZE = 10 + + class MyModel(training_module.Model): + @staticmethod + def reduce_loss(loss_value, global_batch_size): + REDUCTION_AXES = range(1, backend.ndim(loss_value)) + loss_value = tf.reduce_mean(loss_value, axis=REDUCTION_AXES) + return tf.nn.compute_average_loss( + loss_value, global_batch_size=global_batch_size + ) + + def train_step(self, data): + loss_value = tf.ones_like(data[0]) + return { + "loss": MyModel.reduce_loss( + loss_value, global_batch_size=BATCH_SIZE + ) + } + + def test_step(self, data): + loss_value = tf.ones_like(data[0]) + return { + "metric": MyModel.reduce_loss( + loss_value, global_batch_size=BATCH_SIZE + ) + } + + with strategy.scope(): + inputs = layers_module.Input(shape=(1,), name="my_input") + outputs = layers_module.Dense(1)(inputs) + model = MyModel(inputs, outputs) + + model.compile() + + x, y = np.ones((40, 1)), np.ones((40, 1)) + history = model.fit(x, y, epochs=2, batch_size=BATCH_SIZE) + self.assertAllClose(history.history["loss"][-1], 1.0) + + eval_output = model.evaluate(x, y, batch_size=BATCH_SIZE) + self.assertAllClose(eval_output, 1.0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_verify_xla_compile_with_jit_compile(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(vocabulary=vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + model.compile( + "sgd", loss="mse", run_eagerly=False, jit_compile=True + ) + # Added a string op unsupported by XLA compiler to make sure that an + # error is thrown, This ensures that the graph is indeed being + # compiled using XLA + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "Graph execution error" + ): + model.fit(input_array, expected_output, epochs=1) + model.predict(input_array) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_jit_compile_for_compile_evaluate_predict(self): + # Test with jit_compile = True for model.compile(), model.evaluate(), + # model.predict() + model = sequential.Sequential([layers_module.Dense(1)]) + self.assertIsNone(model._jit_compile) + model.compile("sgd", loss="mse", run_eagerly=False, jit_compile=True) + self.assertTrue(model._jit_compile) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + model.evaluate(x, y) + model.predict(x) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_jit_compile_true_for_evaluate_predict_but_false_for_compile(self): + # Test with jit_compile = True for model.compile(), model.evaluate(), + # model.predict() + model = sequential.Sequential([layers_module.Dense(1)]) + self.assertIsNone(model._jit_compile) + self.assertIsNone(model.jit_compile) + model.compile("sgd", loss="mse") + model.jit_compile = True + self.assertTrue(model._jit_compile) + self.assertTrue(model.jit_compile) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + model.evaluate(x, y) + model.predict(x) + self.assertTrue(model._jit_compile) + self.assertTrue(model.jit_compile) + model.compile("sgd", loss="mse", jit_compile=False) + self.assertFalse(model._jit_compile) + self.assertFalse(model.jit_compile) + model.compile("sgd", loss="mse", jit_compile=True) + self.assertTrue(model._jit_compile) + self.assertTrue(model.jit_compile) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_predict_xla_compile_with_jit_compile_setter_false_then_true(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + # Added a string op unsupported by XLA compiler to make sure that an + # error is thrown, This ensures that the graph is indeed being + # compiled using XLA + layer = string_lookup.StringLookup(vocabulary=vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + # Compiled without jit_compile + model.predict(input_array) + model.jit_compile = True + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "Graph execution error" + ): + model.predict(input_array) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_fit_without_loss_at_compile(self): + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile("sgd", run_eagerly=test_utils.should_run_eagerly()) + x, y = np.ones((10, 1)), np.ones((10, 1)) + with self.assertRaisesRegex(ValueError, "No loss found..*"): + model.fit(x, y, epochs=2) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_fit_without_loss_at_compile_but_with_add_loss(self): + class MyModel(sequential.Sequential): + def call(self, x): + self.add_loss(tf.reduce_sum(x)) + return x + + model = MyModel([layers_module.Dense(1)]) + model.compile("sgd", run_eagerly=test_utils.should_run_eagerly()) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + + @test_combinations.run_all_keras_modes + def test_run_eagerly_setting(self): + model = sequential.Sequential([layers_module.Dense(1)]) + run_eagerly = test_utils.should_run_eagerly() + model.compile("sgd", "mse", run_eagerly=run_eagerly) + self.assertEqual(model.run_eagerly, run_eagerly) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters( + ("train_on_batch", "train_on_batch"), + ("test_on_batch", "test_on_batch"), + ("predict_on_batch", "predict_on_batch"), + ("fit", "fit"), + ("evaluate", "evaluate"), + ("predict", "predict"), + ) + def test_disallow_methods_inside_tf_function(self, method_name): + model = sequential.Sequential([layers_module.Dense(1)]) + run_eagerly = test_utils.should_run_eagerly() + model.compile("sgd", "mse", run_eagerly=run_eagerly) + + @tf.function + def my_fn(): + getattr(model, method_name)(1) + + error_msg = "inside a `tf.function`" + with self.assertRaisesRegex(RuntimeError, error_msg): + my_fn() + + @test_combinations.run_all_keras_modes + def test_fit_and_validate_learning_phase(self): + class ReturnTraining(layers_module.Layer): + def call(self, inputs): + return backend.in_train_phase( + lambda: tf.ones_like(inputs), lambda: tf.zeros_like(inputs) + ) + + model = sequential.Sequential([ReturnTraining(input_shape=(2,))]) + model.compile( + "sgd", loss="mae", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.ones((40, 2), dtype=np.float32) + targets = np.ones((40, 1), dtype=np.float32) + + # Test correctness with `steps_per_epoch`. + train_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + val_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + history = model.fit( + train_dataset, epochs=2, verbose=1, validation_data=val_dataset + ) + + # The training loss should be 0.0 + self.assertAllClose(history.history["loss"][0], 0.0) + # The validation loss should be 1.0. + self.assertAllClose(history.history["val_loss"][0], 1.0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_warn_on_evaluate(self): + i = layers_module.Input((1,)) + x = np.ones((100, 1)) + y = np.ones((100, 1)) + sample_weight = np.ones((100,)) + model = training_module.Model(i, i) + model.compile(loss="mse", metrics=["mse"]) + + logging.set_verbosity(2) + with self.assertLogs(level=2) as logs: + model.evaluate(x, y, sample_weight=sample_weight) + self.assertTrue( + any( + "`evaluate()` received a value for `sample_weight`" in log + for log in logs.output + ) + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_sample_weight_warning_disable(self): + i = layers_module.Input((1,)) + x = np.ones((100, 1)) + y = np.ones((100, 1)) + sample_weight = np.ones((100,)) + model = training_module.Model(i, i) + model.compile(loss="mse", metrics=["mse"], weighted_metrics=[]) + + logging.set_verbosity(2) + with self.assertLogs(level=2) as logs: + model.evaluate(x, y, sample_weight=sample_weight) + self.assertFalse( + any( + "`evaluate()` received a value for `sample_weight`" in log + for log in logs.output + ) + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_warn_on_evaluate_with_tf_dataset(self): + i = layers_module.Input((1,)) + + x = tf.ones((100, 1), tf.float32) + y = tf.ones((100, 1), tf.float32) + sample_weight = tf.ones((100,), dtype=tf.float32) + val_dataset = tf.data.Dataset.from_tensor_slices( + (x, y, sample_weight) + ).batch(10) + model = training_module.Model(i, i) + model.compile(loss="mse", metrics=["mse"]) + + logging.set_verbosity(2) + with self.assertLogs(level=2) as logs: + model.evaluate(val_dataset) + self.assertTrue( + any( + "`evaluate()` received a value for `sample_weight`" in log + for log in logs.output + ) + ) + + @test_combinations.run_all_keras_modes + def test_fit_and_validate_training_arg(self): + class ReturnTraining(layers_module.Layer): + def call(self, inputs, training=None): + return backend.in_train_phase( + lambda: tf.ones_like(inputs), + lambda: tf.zeros_like(inputs), + training=training, + ) + + model = sequential.Sequential([ReturnTraining(input_shape=(2,))]) + model.compile( + "sgd", loss="mae", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.ones((40, 2), dtype=np.float32) + targets = np.ones((40, 1), dtype=np.float32) + + # Test correctness with `steps_per_epoch`. + train_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + val_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + history = model.fit( + train_dataset, epochs=2, verbose=1, validation_data=val_dataset + ) + + # The training loss should be 0.0 + self.assertAllClose(history.history["loss"][0], 0.0) + # The validation loss should be 1.0. + self.assertAllClose(history.history["val_loss"][0], 1.0) + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def test_target_dtype_matches_output(self): + def loss_fn(labels, preds): + self.assertEqual(labels.dtype, preds.dtype) + return labels - preds + + layers = [ + layers_module.Dense(10, dtype=np.float64), + layers_module.Dense(10, dtype=np.float64), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + inputs = np.ones(shape=(10, 1), dtype=np.float64) + targets = np.ones(shape=(10, 1), dtype=np.float64) + model.compile( + "sgd", loss=loss_fn, run_eagerly=test_utils.should_run_eagerly() + ) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + self.assertEqual(model.predict(inputs).dtype, np.float64) + + @test_combinations.run_all_keras_modes + def test_fit_and_validate_nested_training_arg(self): + class NestedReturnTraining(layers_module.Layer): + def call(self, inputs, training=None): + return backend.in_train_phase( + lambda: tf.ones_like(inputs), + lambda: tf.zeros_like(inputs), + training=training, + ) + + class ReturnTraining(layers_module.Layer): + def __init__(self, input_shape=None, **kwargs): + super().__init__(input_shape=input_shape, **kwargs) + self._nested_layer = None + + def build(self, input_shape): + self._nested_layer = NestedReturnTraining() + self.built = True + + def call(self, inputs): + return self._nested_layer(inputs) + + model = sequential.Sequential([ReturnTraining(input_shape=(2,))]) + model.compile( + "sgd", loss="mae", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.ones((40, 2), dtype=np.float32) + targets = np.ones((40, 1), dtype=np.float32) + + # Test correctness with `steps_per_epoch`. + train_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + val_dataset = tf.data.Dataset.from_tensor_slices( + (inputs, targets) + ).batch(10) + history = model.fit( + train_dataset, epochs=2, verbose=1, validation_data=val_dataset + ) + + # The training loss should be 0.0 + self.assertAllClose(history.history["loss"][0], 0.0) + # The validation loss should be 1.0. + self.assertAllClose(history.history["val_loss"][0], 1.0) + + @test_combinations.run_with_all_model_types(exclude_models="sequential") + @test_combinations.run_all_keras_modes + def test_fit_on_arrays(self): + input_a = layers_module.Input(shape=(3,), name="input_a") + input_b = layers_module.Input(shape=(3,), name="input_b") + + dense = layers_module.Dense(4, name="dense") + dropout = layers_module.Dropout(0.5, name="dropout") + branch_a = [input_a, dense] + branch_b = [input_b, dense, dropout] + + model = test_utils.get_multi_io_model(branch_a, branch_b) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = "mse" + loss_weights = [1.0, 0.5] + model.compile( + optimizer, + loss, + metrics=[metrics_module.CategoricalAccuracy(), "mae"], + loss_weights=loss_weights, + run_eagerly=test_utils.should_run_eagerly(), + ) + + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_d_np = np.random.random((10, 4)) + output_e_np = np.random.random((10, 4)) + + # Test fit at different verbosity + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + verbose=0, + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + verbose=1, + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=2, + batch_size=5, + verbose=2, + ) + model.train_on_batch( + [input_a_np, input_b_np], [output_d_np, output_e_np] + ) + + # Test with validation data + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + validation_data=( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + ), + epochs=1, + batch_size=5, + verbose=0, + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + validation_data=( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + ), + epochs=2, + batch_size=5, + verbose=1, + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + validation_data=( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + ), + epochs=2, + batch_size=5, + verbose=2, + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + validation_data=[ + [input_a_np, input_b_np], + [output_d_np, output_e_np], + ], + epochs=2, + batch_size=5, + verbose=2, + ) + # Test with validation split + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=2, + batch_size=5, + verbose=0, + validation_split=0.2, + ) + + if test_utils.get_model_type() == "functional": + # Test with dictionary inputs + model.fit( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + epochs=1, + batch_size=5, + verbose=0, + ) + model.fit( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + epochs=1, + batch_size=5, + verbose=1, + ) + model.fit( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + validation_data=( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + ), + epochs=1, + batch_size=5, + verbose=0, + ) + model.train_on_batch( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + ) + + # Test with lists for loss, metrics + loss = ["mae", "mse"] + model.compile( + optimizer, + loss, + metrics=[metrics_module.CategoricalAccuracy(), "mae"], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + verbose=0, + ) + + # Test with dictionaries for loss, metrics, loss weights + if test_utils.get_model_type() == "functional": + loss = {"dense": "mse", "dropout": "mae"} + loss_weights = {"dense": 1.0, "dropout": 0.5} + metrics = { + "dense": "mse", + "dropout": metrics_module.CategoricalAccuracy(), + } + model.compile( + optimizer, + loss, + metrics=metrics, + loss_weights=loss_weights, + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + verbose=0, + ) + + # Build single-input model + x = layers_module.Input(shape=(3,), name="input_a") + y = layers_module.Dense(4)(x) + model = training_module.Model(x, y) + model.compile( + optimizer, loss="mse", run_eagerly=test_utils.should_run_eagerly() + ) + # This will work + model.fit([input_a_np], output_d_np, epochs=1) + + # Test model on a list of floats + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 4)) + + # Test execution on inputs that are lists of scalars. + # TF2 and TF1 have slightly different semantics: + if tf.executing_eagerly(): + # In TF2 to avoid any ambiguity when there are nested lists + # the entire input gets converted to a + # single numpy array (& it only works in the case of a single io + # model) + model.fit( + np.ndarray.tolist(input_a_np), np.ndarray.tolist(input_b_np), epochs=2, batch_size=5, - verbose=2) - else: - # In TF1 there was logic to try disambiguating between the individual - # inputs when lists are nested. This allowed multi-io functional models - # to support lists of scalars as input, but it caused ambiguity issues - # for subclass models & made it trickier to pass multi-dimensional inputs - # as lists of scalars to single io models. This was an excessive amount - # of complexity for what boiled down to a convenience method we were - # mainly just using for writing tests. - model.fit([np.ndarray.tolist(input_a_np)], + verbose=2, + ) + else: + # In TF1 there was logic to try disambiguating between the + # individual inputs when lists are nested. This allowed multi-io + # functional models to support lists of scalars as input, but it + # caused ambiguity issues for subclass models & made it trickier to + # pass multi-dimensional inputs as lists of scalars to single io + # models. This was an excessive amount of complexity for what boiled + # down to a convenience method we were mainly just using for writing + # tests. + model.fit( + [np.ndarray.tolist(input_a_np)], [np.ndarray.tolist(input_b_np)], epochs=2, batch_size=5, - verbose=2) - - @test_combinations.run_all_keras_modes - def test_evaluate_predict_on_arrays(self): - a = layers_module.Input(shape=(3,), name='input_a') - b = layers_module.Input(shape=(3,), name='input_b') - - dense = layers_module.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = layers_module.Dropout(0.5, name='dropout')(c) - - model = training_module.Model([a, b], [d, e]) - - optimizer = RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - loss_weights = [1., 0.5] - model.compile( - optimizer, - loss, - metrics=['mae', metrics_module.CategoricalAccuracy()], - loss_weights=loss_weights, - sample_weight_mode=None, - run_eagerly=test_utils.should_run_eagerly()) - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_d_np = np.random.random((10, 4)) - output_e_np = np.random.random((10, 4)) - - # Test evaluate at different verbosity - out = model.evaluate( - [input_a_np, input_b_np], [output_d_np, output_e_np], - batch_size=5, - verbose=0) - self.assertEqual(len(out), 7) - out = model.evaluate( - [input_a_np, input_b_np], [output_d_np, output_e_np], - batch_size=5, - verbose=1) - self.assertEqual(len(out), 7) - out = model.evaluate( - [input_a_np, input_b_np], [output_d_np, output_e_np], - batch_size=5, - verbose=2) - self.assertEqual(len(out), 7) - out = model.test_on_batch([input_a_np, input_b_np], - [output_d_np, output_e_np]) - self.assertEqual(len(out), 7) - - # Test evaluate with dictionary inputs - model.evaluate( - { - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }, - batch_size=5, - verbose=0) - model.evaluate( - { - 'input_a': input_a_np, - 'input_b': input_b_np - }, { - 'dense': output_d_np, - 'dropout': output_e_np - }, - batch_size=5, - verbose=1) - - # Test predict - out = model.predict([input_a_np, input_b_np], batch_size=5) - self.assertEqual(len(out), 2) - out = model.predict({'input_a': input_a_np, 'input_b': input_b_np}) - self.assertEqual(len(out), 2) - out = model.predict_on_batch({ - 'input_a': input_a_np, - 'input_b': input_b_np - }) - self.assertEqual(len(out), 2) - - def _make_sequence_input_functions(self, input_type): - # train and test - xy_namedtuple = collections.namedtuple('xy_namedtuple', ['x', 'y']) - - # predict - x_namedtuple = collections.namedtuple('x_namedtuple', ['x']) - - if input_type == 'dataset': - dataset = tf.data.Dataset.range(16).map( - lambda _: tf.ones(shape=(1,))) - - xy_dataset = tf.data.Dataset.zip((dataset, dataset)).batch(4) - x_dataset = dataset.batch(4) - def xy_function(use_namedtuple): - return xy_dataset.map(xy_namedtuple) if use_namedtuple else xy_dataset - - def x_function(use_namedtuple): - return x_dataset.map(x_namedtuple) if use_namedtuple else x_dataset - - return xy_function, x_function - - elif input_type == 'generator': - def xy_generator(use_namedtuple): - x, y = np.ones((4, 1)), np.ones((4, 1)) - for _ in range(4): - if use_namedtuple: - yield xy_namedtuple(x, y) - else: - yield x, y - - def x_generator(use_namedtuple): - x = np.ones((4, 1)) - for _ in range(4): - if use_namedtuple: - yield x_namedtuple(x) - else: - yield x - - return xy_generator, x_generator - - elif input_type == 'sequence': - class XYSequence(data_utils.Sequence): - - def __init__(self, use_namedtuple): - self._use_namedtuple = use_namedtuple - super().__init__() - - def __getitem__(self, idx): - x, y = np.ones((4, 1)), np.ones((4, 1)) - if self._use_namedtuple: - return xy_namedtuple(x, y) - return x, y - - def __len__(self): - return 4 - - class XSequence(data_utils.Sequence): - - def __init__(self, use_namedtuple): - self._use_namedtuple = use_namedtuple - super().__init__() - - def __getitem__(self, idx): - x = np.ones((4, 1)) - if self._use_namedtuple: - return x_namedtuple(x) - return x - - def __len__(self): - return 4 - - return XYSequence, XSequence - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @test_combinations.run_with_all_model_types - @parameterized.named_parameters( - ('dataset', 'dataset'), - ('generator', 'generator'), - ('sequence', 'sequence'), - ) - def test_sequence_input_types(self, input_type): - """Ensure that namedtuples and tuples are plumbed identically.""" - if not tf.executing_eagerly(): - self.skipTest('Improved checking is only present in data_adapter.') - - xy_function, x_function = self._make_sequence_input_functions(input_type) - fit_kwargs, evaluate_kwargs, predict_kwargs = {}, {}, {} - if input_type == 'generator': - fit_kwargs['steps_per_epoch'] = 4 - evaluate_kwargs['steps'] = 4 - predict_kwargs['steps'] = 4 - - model = test_utils.get_small_mlp(1, 1, 1) - model.compile( - loss='mse', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - - model.fit(xy_function(use_namedtuple=False), **fit_kwargs) - model.evaluate(xy_function(use_namedtuple=False), **evaluate_kwargs) - model.predict(x_function(use_namedtuple=False), **predict_kwargs) - - @test_combinations.run_all_keras_modes - def test_custom_mapping_in_config(self): - - class MyModel(training_module.Model): - - def call(self, inputs): - return inputs - - def get_config(self): - self.a = {} - return {'a': self.a} - - model = MyModel() - self.assertIn('{"a": {}}', model.to_json()) - - def test_training_on_sparse_data_with_dense_placeholders_v1(self): - with tf.Graph().as_default(): - if scipy_sparse is None: - return - - test_inputs = [ - scipy_sparse.random(6, 3, density=0.25).tocsr() for _ in range(2) - ] - test_outputs = [ - scipy_sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5) - ] - in1 = layers_module.Input(shape=(3,)) - in2 = layers_module.Input(shape=(3,)) - out1 = layers_module.Dropout(0.5, name='dropout')(in1) - out2 = layers_module.Dense(4, name='dense_1')(in2) - model = training_module.Model([in1, in2], [out1, out2]) - model.predict(test_inputs, batch_size=2) - optimizer = 'rmsprop' - model.compile( - optimizer, - 'mse', - metrics=['mae', metrics_module.CategoricalAccuracy()]) - model.fit(test_inputs, test_outputs, - epochs=1, batch_size=2, validation_split=0.5) - model.evaluate(test_inputs, test_outputs, batch_size=2) - - @test_combinations.run_all_keras_modes - def test_compile_with_sparse_placeholders(self): - inputs = layers_module.Input(shape=(10,), sparse=True) - weights = tf.Variable( - np.ones((10, 1)).astype(np.float32), name='weights') - weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) - output_layer = layers_module.Lambda(weights_mult)(inputs) - model = training_module.Model([inputs], output_layer) - model.compile( - loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - @test_combinations.run_all_keras_modes - def test_that_trainable_disables_updates(self): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - a = layers_module.Input(shape=(4,)) - layer = layers_module.BatchNormalization(input_shape=(4,)) - b = layer(a) - model = training_module.Model(a, b) - - model.trainable = False - if not tf.compat.v1.executing_eagerly_outside_functions(): - self.assertEmpty(model.updates) - - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - if not tf.compat.v1.executing_eagerly_outside_functions(): - self.assertEmpty(model.updates) - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - self.assertAllClose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - if not tf.compat.v1.executing_eagerly_outside_functions(): - self.assertAllGreater(len(model.updates), 0) - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 - - layer.trainable = False - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - if not tf.compat.v1.executing_eagerly_outside_functions(): - self.assertEmpty(model.updates) - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - self.assertAllClose(x1, x2, atol=1e-7) - - def test_weight_deduplication_in_methods(self): - inp = layers_module.Input(shape=(1,)) - bn = layers_module.BatchNormalization() - d = layers_module.Dense(1) - - m0 = training_module.Model(inp, d(bn(inp))) - m1 = training_module.Model(inp, d(bn(inp))) - - x0 = m0(inp) - x1 = m1(inp) - x = layers_module.Add()([x0, x1]) - - model = training_module.Model(inp, x) - self.assertLen(model.trainable_weights, 4) - self.assertLen(model.non_trainable_weights, 2) - self.assertLen(model.weights, 6) - - @test_combinations.run_all_keras_modes - def test_weight_deduplication(self): - - class WatchingLayer(layers_module.Layer): - - def __init__(self, dense_to_track): - # This will cause the kernel and bias to be double counted, effectively - # doubling the learning rate if weights are not deduped. - self._kernel = dense_to_track.kernel - self._bias = dense_to_track.bias - super().__init__() - - inp = layers_module.Input(shape=(1,)) - dense_layer = layers_module.Dense(1) - dense_output = dense_layer(inp) # This will build the dense kernel - - # Deterministically set weights to make the test repeatable. - dense_layer.set_weights([np.ones((1, 1)), np.zeros((1,))]) - output = WatchingLayer(dense_layer)(dense_output) - - model = training_module.Model(inp, output) - - # 0.25 is the edge of the radius of convergence for the double apply case. - # At lr=0.24, the double apply case will very slowly descend while the - # correct case will drop very quickly. - model.compile( - loss='mse', - optimizer=optimizer_v2.gradient_descent.SGD(0.24), - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((64 * 2,)) - y = 4.5 * x - 3. - - history = model.fit(x, y, batch_size=64, epochs=2, verbose=2) - - # If the gradient apply is duplicated then the loss after 2 epochs will - # be ~0.15, compared to the correct answer of O(1e-7). - self.assertLess(history.history['loss'][-1], 1e-6) - - @test_combinations.run_all_keras_modes - def test_weight_shared_across_layers(self): - - class AddWeightLayer(layers_module.Layer): - - def __init__(self, trainable_var, non_trainable_var): - self.trainable_var = trainable_var - self.non_trainable_var = non_trainable_var - super().__init__() - - def call(self, inputs): - return inputs + self.trainable_var - - class LayerWithWeightSharedLayers(layers_module.Layer): - - def __init__(self): - super().__init__() - shared_trainable_var = tf.Variable(1.) - shared_non_trainable_var = tf.Variable( - 1., trainable=False) - self.layer1 = AddWeightLayer(shared_trainable_var, - shared_non_trainable_var) - self.layer2 = AddWeightLayer(shared_trainable_var, - shared_non_trainable_var) - - def call(self, inputs): - return self.layer2(self.layer1(inputs)) - - l = LayerWithWeightSharedLayers() - layers = list(l._flatten_layers(include_self=False, recursive=False)) - self.assertEqual(layers, [l.layer1, l.layer2]) - self.assertEqual(l.variables, - [l.layer1.trainable_var, l.layer1.non_trainable_var]) - self.assertEqual(l.trainable_variables, [l.layer1.trainable_var]) - self.assertEqual(l.non_trainable_variables, [l.layer1.non_trainable_var]) - self.assertLen(l.get_weights(), 2) - - @test_combinations.run_all_keras_modes - def test_weight_tracking_for_template(self): - def variable_scoped_function(trainable=True): - return tf.compat.v1.get_variable( - 'dummy', shape=[1], trainable=trainable, - initializer=tf.compat.v1.zeros_initializer()) - def nested_template(): - nested1 = tf.compat.v1.make_template('nested', variable_scoped_function) - nested2 = tf.compat.v1.make_template('nested', variable_scoped_function) - v1 = nested1() - v2 = nested2() - - # nested1 and nested2 should not share variables - self.assertIsNot(v1, v2) - - # Variables created by nested1 should be isolated from variables - # created by nested2. - self.assertEqual(1, len(nested1.variables)) - self.assertEqual(1, len(nested2.variables)) - self.assertIs(nested1.variables[0], v1) - self.assertIs(nested2.variables[0], v2) - self.assertEqual(1, len(nested1.trainable_variables)) - self.assertEqual(1, len(nested2.trainable_variables)) - self.assertIs(nested1.trainable_variables[0], v1) - self.assertIs(nested2.trainable_variables[0], v2) - self.assertEqual(len(nested1.non_trainable_variables), 0) - self.assertEqual(len(nested2.non_trainable_variables), 0) - return v1, v2 - - tmpl1 = tf.compat.v1.make_template('s1', nested_template) - tmpl2 = tf.compat.v1.make_template('s1', nested_template) - - v1, v2 = tmpl1() - v5, v6 = tmpl2() - - model = training_module.Model() - model.template = tmpl1 - self.assertEqual(2, len(model.variables)) - self.assertIs(model.variables[0], v1) - self.assertIs(model.variables[1], v2) - self.assertEqual(2, len(model.variables)) - self.assertIs(model.trainable_variables[0], v1) - self.assertIs(model.trainable_variables[1], v2) - self.assertEqual(len(model.non_trainable_variables), 0) - model.templates = [tmpl2] - for v, w in zip(model.variables, [v1, v2, v5, v6]): - self.assertIs(v, w) - for v, w in zip(model.trainable_variables, [v1, v2, v5, v6]): - self.assertIs(v, w) - self.assertEqual(len(model.non_trainable_variables), 0) - # Make sure losses, layers, and updates aren't broken by having a Template - # in the mix, which does not expose any updates or losses. - self.assertEqual([], model.layers) - self.assertEqual([], model.updates) - self.assertEqual([], model.losses) - self.assertEqual([], model.templates.layers) - self.assertEqual([], model.templates.updates) - self.assertEqual([], model.templates.losses) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_logs_passed_to_callbacks(self): - input_dim = 5 - num_classes = 1 - - class TestCallback(Callback): - - def __init__(self): - super().__init__() - self.epoch_end_logs = None - self.batch_end_logs = None - self.epoch_end_call_count = 0 - self.batch_end_call_count = 0 - - def on_epoch_end(self, epoch, logs=None): - self.epoch_end_logs = logs - self.epoch_end_call_count += 1 - - def on_batch_end(self, batch, logs=None): - self.batch_end_logs = logs - self.batch_end_call_count += 1 - - model = test_utils.get_small_sequential_mlp( - num_hidden=10, num_classes=num_classes, input_dim=input_dim) - model.compile( - loss='binary_crossentropy', - metrics=['acc'], - weighted_metrics=['mae'], - optimizer=RMSPropOptimizer(learning_rate=0.01), - run_eagerly=test_utils.should_run_eagerly()) - - np.random.seed(1337) - (x_train, y_train), (_, _) = test_utils.get_test_data( - train_samples=10, - test_samples=10, - input_shape=(input_dim,), - num_classes=num_classes) - - test_callback = TestCallback() - model.fit( - x_train, - y_train, - batch_size=2, - epochs=2, - verbose=0, - callbacks=[test_callback], - validation_data=(x_train, y_train)) - self.assertEqual(test_callback.batch_end_call_count, 10) - self.assertEqual(test_callback.epoch_end_call_count, 2) - - self.assertSetEqual( - set(test_callback.batch_end_logs.keys()), set(['acc', 'loss', 'mae'])) - self.assertSetEqual( - set(test_callback.epoch_end_logs.keys()), - set(['acc', 'loss', 'mae', 'val_acc', 'val_loss', 'val_mae'])) - - @test_combinations.run_all_keras_modes - def test_mismatched_output_shape_and_target_shape(self): - model = sequential.Sequential([ - layers_module.Dense(2, input_shape=(3, 4)), - layers_module.Dense(5), - ]) - model.compile( - RMSPropOptimizer(learning_rate=0.001), - loss='sparse_categorical_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - # Test with Numpy data - x_train = np.random.random((10, 3, 4)).astype(np.float32) - y_train = np.random.randint(0, 5, size=(10, 3)).astype(np.float32) - model.fit(x_train, y_train, batch_size=5, epochs=1) - - # Test with iterator - dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - dataset = dataset.repeat(10) - dataset = dataset.batch(10) - model.fit(dataset, epochs=1, steps_per_epoch=2) - - if tf.executing_eagerly(): - # Test with eager execution - model.compile(RMSPropOptimizer(learning_rate=0.001), - loss='sparse_categorical_crossentropy', - run_eagerly=True) - model.fit(x_train, y_train, batch_size=5, epochs=1) - - # Test with eager execution and iterator - model.fit(dataset, epochs=1, steps_per_epoch=2) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_losses_in_defun(self): - layer = layers_module.Dense(1, kernel_regularizer='l1') - layer(tf.ones([1, 10])) - - @tf.function - def get_losses(): - return layer.losses - - self.assertAllEqual( - self.evaluate(layer.losses), self.evaluate(get_losses())) - - @test_combinations.run_all_keras_modes - def test_logging(self): - mock_stdout = io.StringIO() - model = sequential.Sequential() - model.add(layers_module.Dense(10, activation='relu')) - model.add(layers_module.Dense(1, activation='sigmoid')) - model.compile( - RMSPropOptimizer(learning_rate=0.001), - loss='binary_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - io_utils.enable_interactive_logging() - with tf.compat.v1.test.mock.patch.object(sys, 'stdout', mock_stdout): - model.fit( - np.ones((10, 10), 'float32'), np.ones((10, 1), 'float32'), epochs=10) - self.assertTrue('Epoch 5/10' in mock_stdout.getvalue()) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_training_with_loss_instance(self): - a = layers_module.Input(shape=(3,), name='input_a') - b = layers_module.Input(shape=(3,), name='input_b') - - dense = layers_module.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = layers_module.Dropout(0.5, name='dropout')(c) - - model = training_module.Model([a, b], [d, e]) - loss_weights = [1., 0.5] - model.compile( - RMSPropOptimizer(learning_rate=0.001), - loss=losses.MeanSquaredError(), - metrics=[metrics_module.CategoricalAccuracy(), 'mae'], - loss_weights=loss_weights) - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_d_np = np.random.random((10, 4)) - output_e_np = np.random.random((10, 4)) - - model.fit([input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_static_batch_in_input_layer(self): - if tf.executing_eagerly(): - self.skipTest('Not inferred in eager.') - - class Counter(Callback): - - def __init__(self): - self.batches = 0 - - def on_batch_end(self, batch, logs=None): - self.batches += 1 - - x, y = np.ones((64, 10), 'float32'), np.ones((64, 1), 'float32') - - for batch_size, expected_batches in [(None, 2), (4, 16)]: - inputs = input_layer.Input(batch_size=batch_size, shape=(10,)) - outputs = layers_module.Dense(1, activation='sigmoid')(inputs) - model = training_module.Model(inputs, outputs) - - model.compile(optimizer_v2.adam.Adam(0.001), 'binary_crossentropy') - counter = Counter() - model.fit(x, y, callbacks=[counter]) - self.assertEqual(counter.batches, expected_batches) - - model = sequential.Sequential( - [layers_module.Dense(1, batch_input_shape=(batch_size, 10))]) - model.compile(optimizer_v2.adam.Adam(0.001), 'binary_crossentropy') - counter = Counter() - model.fit(x, y, callbacks=[counter]) - self.assertEqual(counter.batches, expected_batches) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_static_batch_in_input_layer_consistency_checks(self): - if tf.executing_eagerly(): - self.skipTest('Not inferred in eager.') - x, y = np.ones((64, 10), 'float32'), np.ones((64, 1), 'float32') - - inputs = input_layer.Input(batch_size=2, shape=(10,)) - outputs = layers_module.Dense(1, activation='sigmoid')(inputs) - model = training_module.Model(inputs, outputs) - model.compile(optimizer_v2.adam.Adam(0.001), 'binary_crossentropy') - with self.assertRaisesRegex(ValueError, - 'incompatible with the specified batch size'): - model.fit(x, y, batch_size=4) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_compatible_batch_size_functional_model(self): - - class MyLayer(layers_module.Layer): - - def call(self, inputs): - return tf.concat(inputs, axis=0) - - input1 = input_layer.Input(batch_size=2, shape=(10,)) - input2 = input_layer.Input(batch_size=3, shape=(10,)) - outputs = MyLayer()([input1, input2]) - with tf.compat.v1.test.mock.patch.object( - logging, 'warning') as mock_warn: - training_module.Model([input1, input2], outputs) - self.assertEqual( - mock_warn.call_args_list[0][0][0], - 'Found incompatible static batch sizes among the inputs. ' - 'Batch sizes: [2, 3]') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_calling_subclass_model_on_different_datasets(self): - - class SubclassedModel(training_module.Model): - - def call(self, inputs): - return inputs * 2 - - model = SubclassedModel() - dataset_one = tf.data.Dataset.from_tensor_slices([[0], [1]]).batch(2) - dataset_two = tf.data.Dataset.from_tensor_slices( - [[3], [4], [5], [6], [7], [8]]).batch(2) - self.assertAllEqual([[0], [2]], model.predict(dataset_one, steps=1)) - self.assertAllEqual([[6], [8], [10], [12]], - model.predict(dataset_two, steps=2)) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_training_on_sparse_categorical_crossentropy_loss_with_softmax(self): - np.random.seed(1337) - train_x = np.ones((100, 4)) - train_y = np.random.randint(0, 1, size=(100, 1)) - - reference_model = test_utils.get_small_sequential_mlp(16, 2, - input_dim=4) - reference_model.compile(loss='sparse_categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=True) - fixed_weights = reference_model.get_weights() - reference_model_loss = reference_model.train_on_batch(train_x, train_y) - - test_model = test_utils.get_small_sequential_mlp(16, 2, input_dim=4) - test_model.compile(loss='sparse_categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=False) - test_model.set_weights(fixed_weights) - test_model_loss = test_model.train_on_batch(train_x, train_y) - self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_training_on_categorical_crossentropy_loss_with_softmax(self): - np.random.seed(1337) - train_x = np.ones((100, 4)) - train_y = np_utils.to_categorical( - np.random.randint(0, 1, size=(100, 1)), 2) - - reference_model = test_utils.get_small_sequential_mlp(16, 2, - input_dim=4) - reference_model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=True) - fixed_weights = reference_model.get_weights() - reference_model_loss = reference_model.train_on_batch(train_x, train_y) - - test_model = test_utils.get_small_sequential_mlp(16, 2, input_dim=4) - test_model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=False) - test_model.set_weights(fixed_weights) - test_model_loss = test_model.train_on_batch(train_x, train_y) - self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_training_on_binary_crossentropy_loss(self): - train_x = np.ones((100, 4), dtype=np.float32) - train_y = np.ones((100, 1), dtype=np.float32) - reference_model = test_utils.get_small_sequential_mlp(16, 1, - input_dim=4) - reference_model.compile(loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=True) - fixed_weights = reference_model.get_weights() - reference_model_loss = reference_model.train_on_batch(train_x, train_y) - - test_model = test_utils.get_small_sequential_mlp(16, 1, input_dim=4) - test_model.compile(loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=False) - test_model.set_weights(fixed_weights) - test_model_loss = test_model.train_on_batch(train_x, train_y) - self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - ('default', 1, 4), ('integer_two', 2, 2), ('integer_four', 4, 1), - ('simple_list', [1, 3, 4], 3), ('duplicated_list', [4, 2, 2], 2)) - def test_validation_freq(self, validation_freq, expected_runs): - x, y = np.ones((10, 10)), np.ones((10, 1)) - model = test_utils.get_small_mlp(2, 1, 10) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - class ValCounter(Callback): - - def __init__(self): - self.val_runs = 0 - - def on_test_begin(self, logs=None): - self.val_runs += 1 - - val_counter = ValCounter() - model.fit( - x, - y, - epochs=4, - validation_data=(x, y), - validation_freq=validation_freq, - callbacks=[val_counter]) - self.assertEqual(val_counter.val_runs, expected_runs) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_validation_steps_without_data(self): - if tf.executing_eagerly(): - self.skipTest('Check removed in new `fit`') - x, y = np.ones((10, 10)), np.ones((10, 1)) - model = test_utils.get_small_mlp(2, 1, 10) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - with self.assertRaisesRegex( - ValueError, '`validation_steps` should not be specified if ' - '`validation_data` is None.'): - model.fit(x, y, epochs=4, validation_data=None, validation_steps=3) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_layer_with_variable_output(self): - - class VariableOutputLayer(layers_module.Layer): - - def build(self, input_shape): - self.v = self.add_weight('output_var', shape=(2, 5), initializer='ones') - - def call(self, inputs): - return self.v - - model = test_utils.get_model_from_layers( - [VariableOutputLayer(), layers_module.Dense(1)], input_shape=(10,)) - # TODO(omalleyt): Make this work with `run_eagerly=True`. - model.compile('sgd', 'mse', run_eagerly=False) - model.fit(np.ones((10, 10)), np.ones((10, 1)), batch_size=2, epochs=5) - - self.assertLen(model.trainable_variables, 3) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @test_utils.enable_v2_dtype_behavior - def test_model_dtype(self): - - class AssertTypeLayer(layers_module.Layer): - - def call(self, inputs): - assert inputs.dtype.name == self.dtype, ( - 'Input tensor has type %s which does not match assert type %s' % - (inputs.dtype.name, self.assert_type)) - return inputs + 1. - - for dtype in ('float16', 'float32', 'float64'): - model = test_utils.get_model_from_layers( - [AssertTypeLayer(dtype=dtype)], input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((10, 10)) - y = np.ones((10, 10)) - model.fit(x, y) - model.test_on_batch(x, y) - model(x) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @test_utils.enable_v2_dtype_behavior - def test_model_input_dtype(self): - model = test_utils.get_small_mlp(1, 10, 10) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x = np.ones((10, 10)).astype(np.float64) - y = np.ones((10, 10)).astype(np.float64) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) - model.fit(dataset) - self.assertEqual(model._compute_dtype, 'float32') - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_subclassed_model_with_training_arg(self): - - class LayerWithTrainingArg(layers_module.Layer): - - def call(self, inputs, training=None): - self.training = training - return inputs - - class ModelWithTrainingArg(training_module.Model): - - def __init__(self): - super().__init__() - self.l1 = LayerWithTrainingArg() - - def call(self, inputs, training=None): - self.training = training - inputs = self.l1(inputs, training=training) - return inputs - - x = np.zeros((1, 2)) - model = ModelWithTrainingArg() - model.compile( - loss='mse', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, x, epochs=1) - - if tf.executing_eagerly(): - expected_training_arg = True - else: - expected_training_arg = backend.symbolic_learning_phase() - - self.assertIs(model.training, expected_training_arg) - self.assertIs(model.l1.training, expected_training_arg) - - @test_combinations.run_all_keras_modes - def test_error_when_model_is_not_compiled(self): - inputs = input_layer.Input(shape=(1,)) - outputs = layers_module.Dense(1)(inputs) - model = training_module.Model(inputs, outputs) - with self.assertRaisesRegex(RuntimeError, 'must compile your model'): - model.fit(np.ones((1, 1)), np.ones((1, 1))) - - class MyModel(training_module.Model): - - def call(self, x): - self.add_loss(tf.reduce_sum(x)) - return x - - model = MyModel() - with self.assertRaisesRegex(RuntimeError, 'must compile your model'): - model.fit(np.random.random((32, 1)), epochs=2) - - @test_combinations.run_all_keras_modes - @test_utils.enable_v2_dtype_behavior - def test_losses_of_different_dtypes(self): - inp = input_layer.Input(shape=(2,)) - out_1 = layers_module.Dense( - 2, dtype='float32', kernel_regularizer='l2')( - inp) - out_2 = layers_module.Dense( - 2, dtype='float16', kernel_regularizer='l2')( - inp) - model = training_module.Model(inp, [out_1, out_2]) - extra_loss = tf.reduce_sum(tf.cast(out_2, 'float64')) - model.add_loss(extra_loss) - model.compile('sgd', ['mse', 'mse'], - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 2)), np.ones((10, 2)) - model.fit(x, [y, y]) - - @test_combinations.run_all_keras_modes - @test_utils.enable_v2_dtype_behavior - def test_losses_of_different_dtypes_with_subclassed_model(self): - - class MyModel(training_module.Model): - - def build(self, _): - self.dense = layers_module.Dense(2) - - def call(self, inputs): - self.add_loss(tf.cast(tf.nn.l2_loss(inputs), 'float64')) - return self.dense(inputs) - - model = MyModel(dtype='float32') - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 2)), np.ones((10, 2)) - model.fit(x, y) - - @test_combinations.run_all_keras_modes - @test_utils.enable_v2_dtype_behavior - def test_regularizer_of_different_dtype(self): - inp = input_layer.Input(shape=(2,)) - - def regularizer(weight): - return tf.cast(tf.nn.l2_loss(weight), 'float64') - - out = layers_module.Dense( - 2, dtype='float32', kernel_regularizer=regularizer)( - inp) - model = training_module.Model(inp, out) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 2)), np.ones((10, 2)) - model.fit(x, y) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_outputs_are_floats(self): - x, y = np.ones((10, 1)), np.ones((10, 1)) - model = sequential.Sequential([layers_module.Dense(1)]) - model.compile('sgd', 'mse', metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit(x, y, epochs=2) - self.assertIsInstance(history.history['loss'][0], float) - self.assertIsInstance(history.history['accuracy'][0], float) - - loss, accuracy = model.train_on_batch(x, y) - self.assertIsInstance(loss, float) - self.assertIsInstance(accuracy, float) - - loss, accuracy = model.evaluate(x, y) - self.assertIsInstance(loss, float) - self.assertIsInstance(accuracy, float) - - loss, accuracy = model.test_on_batch(x, y) - self.assertIsInstance(loss, float) - self.assertIsInstance(accuracy, float) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_int_output(self): - x, y = np.ones((10, 1)), np.ones((10, 1)) - model = sequential.Sequential([layers_module.Dense(1)]) - - class MyMetric(metrics_module.Metric): - - def update_state(self, y_true, y_pred, sample_weight=None): - del y_true, y_pred, sample_weight - - def result(self): - return tf.constant(1, dtype='int64') - - model.compile('sgd', 'mse', metrics=[MyMetric()], - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(x, y, epochs=2) - self.assertIsInstance(history.history['my_metric'][0], int) - - @test_combinations.run_all_keras_modes - def test_calling_aggregate_gradient(self): - - class _Optimizer(optimizer_v2.gradient_descent.SGD): - """Mock optimizer to check if _aggregate_gradient is called.""" - - _HAS_AGGREGATE_GRAD = True - - def __init__(self): - self.aggregate_gradients_called = False - super().__init__(name='MyOptimizer') - - def _aggregate_gradients(self, grads): - self.aggregate_gradients_called = True - return super()._aggregate_gradients(grads) - - mock_optimizer = _Optimizer() - - model = sequential.Sequential() - model.add(layers_module.Dense(10, activation='relu')) - - model.compile(mock_optimizer, 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 10)), np.ones((10, 10)) - model.fit(x, y) - self.assertEqual(model.optimizer.aggregate_gradients_called, True) - - class _OptimizerOverrideApplyGradients(_Optimizer): - """Override apply_gradients. - - To test the case where the optimizer does not define the - experimental_aggregate_gradients parameter. - """ - - _HAS_AGGREGATE_GRAD = False - - def apply_gradients(self, grads_and_vars, name=None): # pylint: disable=useless-super-delegation - return super().apply_gradients(grads_and_vars, name) - - mock_optimizer = _OptimizerOverrideApplyGradients() - model.compile(mock_optimizer, 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 10)), np.ones((10, 10)) - model.fit(x, y) - self.assertEqual(model.optimizer.aggregate_gradients_called, True) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_gradients_are_none(self): - - class DenseWithExtraWeight(layers_module.Dense): - - def build(self, input_shape): - # Gradients w.r.t. extra_weights are None - self.extra_weight_1 = self.add_weight('extra_weight_1', shape=(), - initializer='ones') - super().build(input_shape) - self.extra_weight_2 = self.add_weight('extra_weight_2', shape=(), - initializer='ones') - - model = sequential.Sequential([DenseWithExtraWeight(4, input_shape=(4,))]) - # Test clipping can handle None gradients - opt = optimizer_v2.adam.Adam(clipnorm=1.0, clipvalue=1.0) - model.compile(opt, 'mse', run_eagerly=test_utils.should_run_eagerly()) - inputs = np.random.normal(size=(64, 4)) - targets = np.random.normal(size=(64, 4)) - old_kernel = model.get_weights()[1] - model.fit(inputs, targets) - new_kernel = model.get_weights()[1] - self.assertNotAllEqual(old_kernel, new_kernel) - - @test_combinations.run_all_keras_modes - def test_layer_ordering(self): - - class MyLayer(layers_module.Layer): - pass - - class MyModel(training_module.Model): - - def __init__(self, name): - super().__init__(name=name) - - self.weight = tf.Variable(0, name=name) - - self.direct_sublayer = MyLayer(name='direct') - self.direct_sublayer.d = {'d': MyLayer(name='direct/dict')} - - self.dict_sublayer = {'d': MyLayer(name='dict')} - self.dict_sublayer['d'].direct = MyLayer(name='dict/direct') - - model = MyModel('model') - # All sublayers, including self and recursive sublayers. - self.assertEqual(['model', 'direct', 'direct/dict', 'dict', 'dict/direct'], - [l.name for l in model._flatten_layers()]) - # Only direct sublayers, including those in data structures. - self.assertEqual(['direct', 'dict'], [l.name for l in model.layers]) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_trainable_state_setting(self): - - class UpdateLayer(layers_module.Layer): - - def __init__(self): - super().__init__() - self.v = tf.Variable(0., trainable=False) - - def call(self, x): - self.add_update(lambda: self.v.assign_add(1.)) - return x * self.v - - layer = UpdateLayer() - model_with_updates = sequential.Sequential([layer]) - model_with_updates.compile( - 'sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - layer.trainable = False - model_without_updates = sequential.Sequential([layer]) - model_without_updates.compile( - 'sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - x, y = np.ones((10, 1)), np.ones((10, 1)) - - self.assertEqual(self.evaluate(layer.v), 0.) - model_with_updates.fit(x, y, batch_size=10) - # assign_add called. - self.assertEqual(self.evaluate(layer.v), 1.) - model_without_updates.fit(x, y, batch_size=10) - # assign_add not called. - self.assertEqual(self.evaluate(layer.v), 1.) - - @test_combinations.run_all_keras_modes( - always_skip_v1=True) - @parameterized.named_parameters( - ('numpy_array', 'numpy_array'), - ('dataset_array', 'dataset_array'), - ('dataset_dict', 'dataset_dict')) - def test_single_input_no_tuple_wrapping(self, input_type): - x = np.ones((10, 1)) - - if input_type == 'numpy_array': - batch_size = 3 - expected_data_type = tf.Tensor - elif input_type == 'dataset_array': - x = tf.data.Dataset.from_tensor_slices(x).batch(3) - batch_size = None - expected_data_type = tf.Tensor - else: - x = {'my_input': x} - x = tf.data.Dataset.from_tensor_slices(x).batch(3) - batch_size = None - expected_data_type = dict - - test_case = self - - class MyModel(training_module.Model): - - def train_step(self, data): - # No tuple wrapping for single x input and no targets. - test_case.assertIsInstance(data, expected_data_type) - return super().train_step(data) - - def test_step(self, data): - test_case.assertIsInstance(data, expected_data_type) - return super().test_step(data) - - def predict_step(self, data): - test_case.assertIsInstance(data, expected_data_type) - return super().predict_step(data) - - inputs = layers_module.Input(shape=(1,), name='my_input') - outputs = layers_module.Dense(1)(inputs) - model = MyModel(inputs, outputs) - model.add_loss(tf.reduce_sum(outputs)) - model.compile('sgd') - model.fit(x, batch_size=batch_size) - model.evaluate(x, batch_size=batch_size) - model.predict(x, batch_size=batch_size) - - @test_combinations.run_all_keras_modes( - always_skip_v1=True) - @parameterized.named_parameters( - ('custom_metrics', False, True), - ('compiled_metrics', True, False), - ('both_compiled_and_custom_metrics', True, True)) - def test_evaluate_with_custom_test_step( - self, use_compiled_metrics, use_custom_metrics): - - class MyModel(training_module.Model): - - def test_step(self, data): - x, y = data - pred = self(x) - metrics = {} - if use_compiled_metrics: - self.compiled_metrics.update_state(y, pred) - self.compiled_loss(y, pred) - for metric in self.metrics: - metrics[metric.name] = metric.result() - if use_custom_metrics: - custom_metrics = { - 'mean': tf.reduce_mean(pred), - 'sum': tf.reduce_sum(pred) - } - metrics.update(custom_metrics) - return metrics - - inputs = layers_module.Input((2,)) - outputs = layers_module.Dense(3)(inputs) - model = MyModel(inputs, outputs) - if use_compiled_metrics: - model.compile('adam', 'mse', metrics=['mae', 'mape'], - run_eagerly=test_utils.should_run_eagerly()) - else: - model.compile('adam', 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x = np.random.random((4, 2)) - y = np.random.random((4, 3)) - results_list = model.evaluate(x, y) - results_dict = model.evaluate(x, y, return_dict=True) - self.assertLen(results_list, len(results_dict)) - if use_compiled_metrics and use_custom_metrics: - self.assertLen(results_list, 5) - self.assertEqual(results_list, - [results_dict['loss'], - results_dict['mae'], results_dict['mape'], - results_dict['mean'], results_dict['sum']]) - if use_compiled_metrics and not use_custom_metrics: - self.assertLen(results_list, 3) - self.assertEqual(results_list, - [results_dict['loss'], - results_dict['mae'], results_dict['mape']]) - if not use_compiled_metrics and use_custom_metrics: - self.assertLen(results_list, 2) - self.assertEqual(results_list, - [results_dict['mean'], results_dict['sum']]) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_model_make_function(self): - layers = [ - layers_module.Dense(10, dtype=np.float64), - layers_module.Dense(10, dtype=np.float64) - ] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - original_train_function = model.make_train_function() - self.assertIsNotNone(original_train_function) - self.assertEqual(model.make_train_function(), original_train_function) - # Check that we regenerate it without reusing the cached version. - self.assertNotEqual( - model.make_train_function(force=True), original_train_function) - - original_test_function = model.make_test_function() - self.assertIsNotNone(original_test_function) - self.assertEqual(model.make_test_function(), original_test_function) - # Check that we regenerate it without reusing the cached version. - self.assertNotEqual( - model.make_test_function(force=True), original_test_function) - - original_predict_function = model.make_predict_function() - self.assertIsNotNone(original_predict_function) - self.assertEqual(model.make_predict_function(), original_predict_function) - # Check that we regenerate it without reusing the cached version. - self.assertNotEqual( - model.make_predict_function(force=True), original_predict_function) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_custom_compute_metrics(self): - - class CustomMetric(metrics_module.Mean): - - def sq_diff_plus_x(self, x, y_true, y_pred): - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - sq_diff_plus_x = tf.add(x, tf.math.squared_difference(y_pred, y_true)) - return backend.mean(sq_diff_plus_x, axis=-1) - - def update_state(self, x, y_true, y_pred, sample_weight=None): - matches = self.sq_diff_plus_x(x, y_true, y_pred) - return super().update_state(matches) - - class MyModel(sequential.Sequential): - - def compute_metrics(self, x, y, y_pred, sample_weight): - metric_results = super().compute_metrics(x, y, y_pred, - sample_weight) - self.custom_metric.update_state(x, y, y_pred, sample_weight) - metric_results['custom_metric_name'] = self.custom_metric.result() - return metric_results - - tensors = tf.random.uniform((10, 10)), tf.random.uniform((10,)) - dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) - model = MyModel([layers_module.Dense(10)]) - model.custom_metric = CustomMetric('my_metric') - initial_result = model.custom_metric.result() - optimizer = optimizer_v2.gradient_descent.SGD() - model.compile(optimizer, loss='mse', steps_per_execution=10) - model.fit(dataset, epochs=2, steps_per_epoch=10, verbose=2) - after_fit_result = model.custom_metric.result() - - self.assertEqual(self.evaluate(initial_result), 0.0) - self.assertNotEqual(self.evaluate(initial_result), - self.evaluate(after_fit_result)) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_custom_compute_loss(self): - - class MyModel(training_module.Model): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.loss_metric = metrics_module.Mean(name='loss') - - def compute_loss(self, x, y, y_pred, sample_weight): - loss = tf.reduce_mean(tf.math.squared_difference(y_pred, y)) - loss += tf.add_n(self.losses) - self.loss_metric.update_state(loss) - return loss - - def reset_metrics(self): - self.loss_metric.reset_states() - - @property - def metrics(self): - return [self.loss_metric] - - tensors = tf.random.uniform((10, 10)), tf.random.uniform((10,)) - dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) - - inputs = layers_module.Input(shape=(10,), name='my_input') - outputs = layers_module.Dense(10)(inputs) - model = MyModel(inputs, outputs) - model.add_loss(tf.reduce_sum(outputs)) - - optimizer = optimizer_v2.gradient_descent.SGD() - model.compile(optimizer, loss='mse', steps_per_execution=10) - history = model.fit(dataset, epochs=2, steps_per_epoch=10) - self.assertLen(history.history['loss'], 2) - self.assertAllClose(history.history['loss'][1], model.loss_metric.result()) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_ema_overwrite(self): - - model = sequential.Sequential() - model.add(input_layer.Input(shape=(4,))) - model.add(layers_module.Dense(1, activation='relu')) - - tensors = tf.random.uniform((4, 4)), tf.random.uniform((4,)) - dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) - - optimizer = sgd_experimental.SGD(use_ema=True, ema_momentum=1) - model.compile(optimizer, loss='mse', steps_per_execution=10) - initial_value = tf.Variable(model.trainable_variables[0]) - history = model.fit(dataset, epochs=2, steps_per_epoch=10) - self.assertLen(history.history['loss'], 2) - self.assertAllClose(initial_value, model.trainable_variables[0]) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_get_verbosity(self): - class MyStrategy(tf.distribute.Strategy): - - def __init__(self): - self._should_use_with_coordinator = True - with self.assertRaisesRegex(ValueError, '`verbose=1` is not allowed'): - training_module._get_verbosity(1, MyStrategy()) - - io_utils.enable_interactive_logging() - self.assertEqual(training_module._get_verbosity('auto', MyStrategy()), 2) - self.assertEqual(training_module._get_verbosity( - 'auto', tf.distribute.MirroredStrategy()), 1) - self.assertEqual(training_module._get_verbosity( - 2, tf.distribute.MirroredStrategy()), 2) - - io_utils.disable_interactive_logging() - self.assertEqual(training_module._get_verbosity( - 'auto', tf.distribute.MirroredStrategy()), 2) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_save_spec(self): - - class Model(training_module.Model): - - def call(self, arg_input_1, arg_input_2, keyword_input, training=None): - return 0 - - # Test subclassed model save specs. - model = Model() - model(tf.ones([1, 1]), tf.ones([2, 2]), keyword_input=tf.ones([3, 3]), - training=False) - spec = model.save_spec(dynamic_batch=False) - self.assertEqual(spec[0][0].shape.as_list(), [1, 1]) - self.assertEqual(spec[0][1].shape.as_list(), [2, 2]) - self.assertEqual(spec[1]['keyword_input'].shape.as_list(), [3, 3]) - spec = model.save_spec(dynamic_batch=True) - self.assertEqual(spec[0][0].shape.as_list(), [None, 1]) - - # Test functional model save specs. - input_1 = layers_module.Input((1,), batch_size=1) - input_2 = layers_module.Input((2,), batch_size=2) - input_3 = layers_module.Input((3,), batch_size=3) - output = model(input_1, input_2, keyword_input=input_3, training=True) - functional = training_module.Model([input_1, input_2, input_3], output) - # Functional models should ignore dynamic_batch if the input layers have a - # known batch size. - spec = functional.save_spec(dynamic_batch=True) - input_specs = spec[0][0] - self.assertEqual(input_specs[0].shape.as_list(), [1, 1]) - self.assertEqual(input_specs[1].shape.as_list(), [2, 2]) - self.assertEqual(input_specs[2].shape.as_list(), [3, 3]) - - -class TestExceptionsAndWarnings(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @test_combinations.run_with_all_model_types - def test_fit_on_no_output(self): - inputs = layers_module.Input((3,)) - outputs = layers_module.Dense(2)(inputs) - model = training_module.Model(inputs, outputs) - model.compile('rmsprop', 'mse') - x = np.zeros((32, 3)) - with self.assertRaisesRegex(ValueError, 'Target data is missing..*'): - model.fit(x) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @test_combinations.run_with_all_model_types - def test_fit_on_wrong_output_type(self): - inputs1 = layers_module.Input((3,), name='a') - inputs2 = layers_module.Input((3,), name='b') - x = layers_module.Concatenate()([inputs1, inputs2]) - outputs = layers_module.Dense(2, name='c')(x) - model = training_module.Model([inputs1, inputs2], outputs) - model.compile('rmsprop', 'mse') - x = np.zeros((32, 3)) - y = np.zeros((32, 2)) - with self.assertRaisesRegex(ValueError, 'Target data is missing..*'): - model.fit({'a': x, 'b': x, 'c': y}) - - @test_combinations.run_all_keras_modes - def test_compile_warning_for_loss_missing_output(self): - with self.cached_session(): - inp = layers_module.Input(shape=(16,), name='input_a') - out_1 = layers_module.Dense(8, name='dense_1')(inp) - out_2 = layers_module.Dense( - 3, activation='softmax', name='dense_2')( - out_1) - model = training_module.Model(inputs=[inp], outputs=[out_1, out_2]) - optimizer = RMSPropOptimizer(learning_rate=0.001) - - model.compile( - optimizer, - loss={ - 'dense_2': 'categorical_crossentropy', - }, - metrics={ - 'dense_2': 'categorical_accuracy', - 'dense_1': metrics_module.CategoricalAccuracy(), - }, - run_eagerly=test_utils.should_run_eagerly()) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_predict_error_with_empty_x(self): - inputs = layers_module.Input(shape=(2,)) - outputs = layers_module.Dense(4)(inputs) - model = training_module.Model(inputs=inputs, outputs=outputs) - model.compile(loss='mse') - - with self.assertRaisesRegex(ValueError, - 'Unexpected result of `predict_function`.*'): - model.predict(np.array([])) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @parameterized.named_parameters( - ('dynamic', 0, False), - ('dynamic_multistep', 10, False), - ('static', 0, True), - ('static_multistep', 10, True), - ) - def test_predict_structured(self, spe, static_batch): - inputs = layers_module.Input(shape=(2,)) - outputs = layers_module.Dense(2)(inputs) - model = training_module.Model( - inputs=inputs, - outputs={'out': outputs}, + verbose=2, + ) + + @test_combinations.run_all_keras_modes + def test_evaluate_predict_on_arrays(self): + a = layers_module.Input(shape=(3,), name="input_a") + b = layers_module.Input(shape=(3,), name="input_b") + + dense = layers_module.Dense(4, name="dense") + c = dense(a) + d = dense(b) + e = layers_module.Dropout(0.5, name="dropout")(c) + + model = training_module.Model([a, b], [d, e]) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = "mse" + loss_weights = [1.0, 0.5] + model.compile( + optimizer, + loss, + metrics=["mae", metrics_module.CategoricalAccuracy()], + loss_weights=loss_weights, + sample_weight_mode=None, + run_eagerly=test_utils.should_run_eagerly(), + ) + + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_d_np = np.random.random((10, 4)) + output_e_np = np.random.random((10, 4)) + + # Test evaluate at different verbosity + out = model.evaluate( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + batch_size=5, + verbose=0, + ) + self.assertEqual(len(out), 7) + out = model.evaluate( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + batch_size=5, + verbose=1, + ) + self.assertEqual(len(out), 7) + out = model.evaluate( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + batch_size=5, + verbose=2, + ) + self.assertEqual(len(out), 7) + out = model.test_on_batch( + [input_a_np, input_b_np], [output_d_np, output_e_np] + ) + self.assertEqual(len(out), 7) + + # Test evaluate with dictionary inputs + model.evaluate( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + batch_size=5, + verbose=0, + ) + model.evaluate( + {"input_a": input_a_np, "input_b": input_b_np}, + {"dense": output_d_np, "dropout": output_e_np}, + batch_size=5, + verbose=1, + ) + + # Test predict + out = model.predict([input_a_np, input_b_np], batch_size=5) + self.assertEqual(len(out), 2) + out = model.predict({"input_a": input_a_np, "input_b": input_b_np}) + self.assertEqual(len(out), 2) + out = model.predict_on_batch( + {"input_a": input_a_np, "input_b": input_b_np} + ) + self.assertEqual(len(out), 2) + + def _make_sequence_input_functions(self, input_type): + # train and test + xy_namedtuple = collections.namedtuple("xy_namedtuple", ["x", "y"]) + + # predict + x_namedtuple = collections.namedtuple("x_namedtuple", ["x"]) + + if input_type == "dataset": + dataset = tf.data.Dataset.range(16).map( + lambda _: tf.ones(shape=(1,)) + ) + + xy_dataset = tf.data.Dataset.zip((dataset, dataset)).batch(4) + x_dataset = dataset.batch(4) + + def xy_function(use_namedtuple): + return ( + xy_dataset.map(xy_namedtuple) + if use_namedtuple + else xy_dataset + ) + + def x_function(use_namedtuple): + return ( + x_dataset.map(x_namedtuple) if use_namedtuple else x_dataset + ) + + return xy_function, x_function + + elif input_type == "generator": + + def xy_generator(use_namedtuple): + x, y = np.ones((4, 1)), np.ones((4, 1)) + for _ in range(4): + if use_namedtuple: + yield xy_namedtuple(x, y) + else: + yield x, y + + def x_generator(use_namedtuple): + x = np.ones((4, 1)) + for _ in range(4): + if use_namedtuple: + yield x_namedtuple(x) + else: + yield x + + return xy_generator, x_generator + + elif input_type == "sequence": + + class XYSequence(data_utils.Sequence): + def __init__(self, use_namedtuple): + self._use_namedtuple = use_namedtuple + super().__init__() + + def __getitem__(self, idx): + x, y = np.ones((4, 1)), np.ones((4, 1)) + if self._use_namedtuple: + return xy_namedtuple(x, y) + return x, y + + def __len__(self): + return 4 + + class XSequence(data_utils.Sequence): + def __init__(self, use_namedtuple): + self._use_namedtuple = use_namedtuple + super().__init__() + + def __getitem__(self, idx): + x = np.ones((4, 1)) + if self._use_namedtuple: + return x_namedtuple(x) + return x + + def __len__(self): + return 4 + + return XYSequence, XSequence + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @test_combinations.run_with_all_model_types + @parameterized.named_parameters( + ("dataset", "dataset"), + ("generator", "generator"), + ("sequence", "sequence"), ) - model.compile( - loss='mse', - steps_per_execution=spe, - run_eagerly=test_utils.should_run_eagerly(), + def test_sequence_input_types(self, input_type): + """Ensure that namedtuples and tuples are plumbed identically.""" + if not tf.executing_eagerly(): + self.skipTest("Improved checking is only present in data_adapter.") + + xy_function, x_function = self._make_sequence_input_functions( + input_type + ) + fit_kwargs, evaluate_kwargs, predict_kwargs = {}, {}, {} + if input_type == "generator": + fit_kwargs["steps_per_epoch"] = 4 + evaluate_kwargs["steps"] = 4 + predict_kwargs["steps"] = 4 + + model = test_utils.get_small_mlp(1, 1, 1) + model.compile( + loss="mse", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit(xy_function(use_namedtuple=False), **fit_kwargs) + model.evaluate(xy_function(use_namedtuple=False), **evaluate_kwargs) + model.predict(x_function(use_namedtuple=False), **predict_kwargs) + + @test_combinations.run_all_keras_modes + def test_custom_mapping_in_config(self): + class MyModel(training_module.Model): + def call(self, inputs): + return inputs + + def get_config(self): + self.a = {} + return {"a": self.a} + + model = MyModel() + self.assertIn('{"a": {}}', model.to_json()) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_get_config_default(self): + class MyModel(training_module.Model): + def __init__(self, units): + super().__init__() + self.units = units + + def call(self, inputs): + return inputs + + # Test default config with named args + model = MyModel(units=10) + config = model.get_config() + self.assertLen(config, 1) + self.assertEqual(config["units"], 10) + model = model.from_config(config) + self.assertDictEqual(model.get_config(), config) + + # Test default config with positinal args + model = MyModel(10) + config = model.get_config() + self.assertLen(config, 1) + self.assertEqual(config["units"], 10) + model = model.from_config(config) + self.assertDictEqual(model.get_config(), config) + + # Test non-serializable + model = MyModel(units=np.int32(10)) + config = model.get_config() + self.assertNotIn("units", config) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_get_config_kwargs(self): + class MyModel(training_module.Model): + def __init__(self, units, **kwargs): + super().__init__() + self.units = units + + def call(self, inputs): + return inputs + + model = MyModel(10, extra=1) + config = model.get_config() + # config = {'name': 'my_model', 'trainable': True, 'dtype': 'float32', + # 'extra': 1, 'units': 10} + self.assertLen(config, 5) + self.assertEqual(config["units"], 10) + self.assertEqual(config["extra"], 1) + model = model.from_config(config) + self.assertDictEqual(model.get_config(), config) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_get_config_override(self): + class MyModel(training_module.Model): + def __init__(self, units): + super().__init__() + self.units = units + + def call(self, inputs): + return inputs + + def get_config(self): + config = {"units": int(self.units)} + config.update(super().get_config()) + return config + + model = MyModel(units=np.int32(10)) + config = model.get_config() + self.assertLen(config, 1) + self.assertEqual(config["units"], 10) + model = model.from_config(config) + self.assertDictEqual(model.get_config(), config) + + def test_training_on_sparse_data_with_dense_placeholders_v1(self): + with tf.Graph().as_default(): + if scipy_sparse is None: + return + + test_inputs = [ + scipy_sparse.random(6, 3, density=0.25).tocsr() + for _ in range(2) + ] + test_outputs = [ + scipy_sparse.random(6, i, density=0.25).tocsr() + for i in range(3, 5) + ] + in1 = layers_module.Input(shape=(3,)) + in2 = layers_module.Input(shape=(3,)) + out1 = layers_module.Dropout(0.5, name="dropout")(in1) + out2 = layers_module.Dense(4, name="dense_1")(in2) + model = training_module.Model([in1, in2], [out1, out2]) + model.predict(test_inputs, batch_size=2) + optimizer = "rmsprop" + model.compile( + optimizer, + "mse", + metrics=["mae", metrics_module.CategoricalAccuracy()], + ) + model.fit( + test_inputs, + test_outputs, + epochs=1, + batch_size=2, + validation_split=0.5, + ) + model.evaluate(test_inputs, test_outputs, batch_size=2) + + @test_combinations.run_all_keras_modes + def test_compile_with_sparse_placeholders(self): + inputs = layers_module.Input(shape=(10,), sparse=True) + weights = tf.Variable( + np.ones((10, 1)).astype(np.float32), name="weights" + ) + weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) + output_layer = layers_module.Lambda(weights_mult)(inputs) + model = training_module.Model([inputs], output_layer) + model.compile( + loss="binary_crossentropy", + optimizer="adam", + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + @test_combinations.run_all_keras_modes + def test_that_trainable_disables_updates(self): + val_a = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + a = layers_module.Input(shape=(4,)) + layer = layers_module.BatchNormalization(input_shape=(4,)) + b = layer(a) + model = training_module.Model(a, b) + + model.trainable = False + if not tf.compat.v1.executing_eagerly_outside_functions(): + self.assertEmpty(model.updates) + + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + if not tf.compat.v1.executing_eagerly_outside_functions(): + self.assertEmpty(model.updates) + + x1 = model.predict(val_a) + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + self.assertAllClose(x1, x2, atol=1e-7) + + model.trainable = True + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + if not tf.compat.v1.executing_eagerly_outside_functions(): + self.assertAllGreater(len(model.updates), 0) + + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + assert np.abs(np.sum(x1 - x2)) > 1e-5 + + layer.trainable = False + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + if not tf.compat.v1.executing_eagerly_outside_functions(): + self.assertEmpty(model.updates) + + x1 = model.predict(val_a) + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + self.assertAllClose(x1, x2, atol=1e-7) + + def test_weight_deduplication_in_methods(self): + inp = layers_module.Input(shape=(1,)) + bn = layers_module.BatchNormalization() + d = layers_module.Dense(1) + + m0 = training_module.Model(inp, d(bn(inp))) + m1 = training_module.Model(inp, d(bn(inp))) + + x0 = m0(inp) + x1 = m1(inp) + x = layers_module.Add()([x0, x1]) + + model = training_module.Model(inp, x) + self.assertLen(model.trainable_weights, 4) + self.assertLen(model.non_trainable_weights, 2) + self.assertLen(model.weights, 6) + + @test_combinations.run_all_keras_modes + def test_weight_deduplication(self): + class WatchingLayer(layers_module.Layer): + def __init__(self, dense_to_track): + # This will cause the kernel and bias to be double counted, + # effectively doubling the learning rate if weights are not + # deduped. + self._kernel = dense_to_track.kernel + self._bias = dense_to_track.bias + super().__init__() + + inp = layers_module.Input(shape=(1,)) + dense_layer = layers_module.Dense(1) + dense_output = dense_layer(inp) # This will build the dense kernel + + # Deterministically set weights to make the test repeatable. + dense_layer.set_weights([np.ones((1, 1)), np.zeros((1,))]) + output = WatchingLayer(dense_layer)(dense_output) + + model = training_module.Model(inp, output) + + # 0.25 is the edge of the radius of convergence for the double apply + # case. At lr=0.24, the double apply case will very slowly descend + # while the correct case will drop very quickly. + model.compile( + loss="mse", + optimizer=optimizer_legacy.gradient_descent.SGD(0.24), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((64 * 2,)) + y = 4.5 * x - 3.0 + + history = model.fit(x, y, batch_size=64, epochs=2, verbose=2) + + # If the gradient apply is duplicated then the loss after 2 epochs will + # be ~0.15, compared to the correct answer of O(1e-7). + self.assertLess(history.history["loss"][-1], 1e-6) + + @test_combinations.run_all_keras_modes + def test_weight_shared_across_layers(self): + class AddWeightLayer(layers_module.Layer): + def __init__(self, trainable_var, non_trainable_var): + self.trainable_var = trainable_var + self.non_trainable_var = non_trainable_var + super().__init__() + + def call(self, inputs): + return inputs + self.trainable_var + + class LayerWithWeightSharedLayers(layers_module.Layer): + def __init__(self): + super().__init__() + shared_trainable_var = tf.Variable(1.0) + shared_non_trainable_var = tf.Variable(1.0, trainable=False) + self.layer1 = AddWeightLayer( + shared_trainable_var, shared_non_trainable_var + ) + self.layer2 = AddWeightLayer( + shared_trainable_var, shared_non_trainable_var + ) + + def call(self, inputs): + return self.layer2(self.layer1(inputs)) + + l = LayerWithWeightSharedLayers() + layers = list(l._flatten_layers(include_self=False, recursive=False)) + self.assertEqual(layers, [l.layer1, l.layer2]) + self.assertEqual( + l.variables, [l.layer1.trainable_var, l.layer1.non_trainable_var] + ) + self.assertEqual(l.trainable_variables, [l.layer1.trainable_var]) + self.assertEqual( + l.non_trainable_variables, [l.layer1.non_trainable_var] + ) + self.assertLen(l.get_weights(), 2) + + @test_combinations.run_all_keras_modes + def test_weight_tracking_for_template(self): + def variable_scoped_function(trainable=True): + return tf.compat.v1.get_variable( + "dummy", + shape=[1], + trainable=trainable, + initializer=tf.compat.v1.zeros_initializer(), + ) + + def nested_template(): + nested1 = tf.compat.v1.make_template( + "nested", variable_scoped_function + ) + nested2 = tf.compat.v1.make_template( + "nested", variable_scoped_function + ) + v1 = nested1() + v2 = nested2() + + # nested1 and nested2 should not share variables + self.assertIsNot(v1, v2) + + # Variables created by nested1 should be isolated from variables + # created by nested2. + self.assertEqual(1, len(nested1.variables)) + self.assertEqual(1, len(nested2.variables)) + self.assertIs(nested1.variables[0], v1) + self.assertIs(nested2.variables[0], v2) + self.assertEqual(1, len(nested1.trainable_variables)) + self.assertEqual(1, len(nested2.trainable_variables)) + self.assertIs(nested1.trainable_variables[0], v1) + self.assertIs(nested2.trainable_variables[0], v2) + self.assertEqual(len(nested1.non_trainable_variables), 0) + self.assertEqual(len(nested2.non_trainable_variables), 0) + return v1, v2 + + tmpl1 = tf.compat.v1.make_template("s1", nested_template) + tmpl2 = tf.compat.v1.make_template("s1", nested_template) + + v1, v2 = tmpl1() + v5, v6 = tmpl2() + + model = training_module.Model() + model.template = tmpl1 + self.assertEqual(2, len(model.variables)) + self.assertIs(model.variables[0], v1) + self.assertIs(model.variables[1], v2) + self.assertEqual(2, len(model.variables)) + self.assertIs(model.trainable_variables[0], v1) + self.assertIs(model.trainable_variables[1], v2) + self.assertEqual(len(model.non_trainable_variables), 0) + model.templates = [tmpl2] + for v, w in zip(model.variables, [v1, v2, v5, v6]): + self.assertIs(v, w) + for v, w in zip(model.trainable_variables, [v1, v2, v5, v6]): + self.assertIs(v, w) + self.assertEqual(len(model.non_trainable_variables), 0) + # Make sure losses, layers, and updates aren't broken by having a + # Template in the mix, which does not expose any updates or losses. + self.assertEqual([], model.layers) + self.assertEqual([], model.updates) + self.assertEqual([], model.losses) + self.assertEqual([], model.templates.layers) + self.assertEqual([], model.templates.updates) + self.assertEqual([], model.templates.losses) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_logs_passed_to_callbacks(self): + input_dim = 5 + num_classes = 1 + + class TestCallback(Callback): + def __init__(self): + super().__init__() + self.epoch_end_logs = None + self.batch_end_logs = None + self.epoch_end_call_count = 0 + self.batch_end_call_count = 0 + + def on_epoch_end(self, epoch, logs=None): + self.epoch_end_logs = logs + self.epoch_end_call_count += 1 + + def on_batch_end(self, batch, logs=None): + self.batch_end_logs = logs + self.batch_end_call_count += 1 + + model = test_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=num_classes, input_dim=input_dim + ) + model.compile( + loss="binary_crossentropy", + metrics=["acc"], + weighted_metrics=["mae"], + optimizer=RMSPropOptimizer(learning_rate=0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + np.random.seed(1337) + (x_train, y_train), (_, _) = test_utils.get_test_data( + train_samples=10, + test_samples=10, + input_shape=(input_dim,), + num_classes=num_classes, + ) + + test_callback = TestCallback() + model.fit( + x_train, + y_train, + batch_size=2, + epochs=2, + verbose=0, + callbacks=[test_callback], + validation_data=(x_train, y_train), + ) + self.assertEqual(test_callback.batch_end_call_count, 10) + self.assertEqual(test_callback.epoch_end_call_count, 2) + + self.assertSetEqual( + set(test_callback.batch_end_logs.keys()), + set(["acc", "loss", "mae"]), + ) + self.assertSetEqual( + set(test_callback.epoch_end_logs.keys()), + set(["acc", "loss", "mae", "val_acc", "val_loss", "val_mae"]), + ) + + @test_combinations.run_all_keras_modes + def test_mismatched_output_shape_and_target_shape(self): + model = sequential.Sequential( + [ + layers_module.Dense(2, input_shape=(3, 4)), + layers_module.Dense(5), + ] + ) + model.compile( + RMSPropOptimizer(learning_rate=0.001), + loss="sparse_categorical_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + # Test with Numpy data + x_train = np.random.random((10, 3, 4)).astype(np.float32) + y_train = np.random.randint(0, 5, size=(10, 3)).astype(np.float32) + model.fit(x_train, y_train, batch_size=5, epochs=1) + + # Test with iterator + dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + dataset = dataset.repeat(10) + dataset = dataset.batch(10) + model.fit(dataset, epochs=1, steps_per_epoch=2) + + if tf.executing_eagerly(): + # Test with eager execution + model.compile( + RMSPropOptimizer(learning_rate=0.001), + loss="sparse_categorical_crossentropy", + run_eagerly=True, + ) + model.fit(x_train, y_train, batch_size=5, epochs=1) + + # Test with eager execution and iterator + model.fit(dataset, epochs=1, steps_per_epoch=2) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_losses_in_defun(self): + layer = layers_module.Dense(1, kernel_regularizer="l1") + layer(tf.ones([1, 10])) + + @tf.function + def get_losses(): + return layer.losses + + self.assertAllEqual( + self.evaluate(layer.losses), self.evaluate(get_losses()) + ) + + @test_combinations.run_all_keras_modes + def test_logging(self): + mock_stdout = io.StringIO() + model = sequential.Sequential() + model.add(layers_module.Dense(10, activation="relu")) + model.add(layers_module.Dense(1, activation="sigmoid")) + model.compile( + RMSPropOptimizer(learning_rate=0.001), + loss="binary_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + io_utils.enable_interactive_logging() + with tf.compat.v1.test.mock.patch.object(sys, "stdout", mock_stdout): + model.fit( + np.ones((10, 10), "float32"), + np.ones((10, 1), "float32"), + epochs=10, + ) + self.assertTrue("Epoch 5/10" in mock_stdout.getvalue()) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) ) - xdata = np.random.uniform(size=(8, 2)).astype(np.float32) - dataset = tf.data.Dataset.from_tensor_slices((xdata, xdata)) - dataset = dataset.batch(8, drop_remainder=static_batch) - ret = model.predict(dataset, steps=1) - tf.nest.assert_same_structure(ret, {'out': ''}) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_on_batch_error_inconsistent_batch_size(self): - input_node1 = layers_module.Input(shape=(5,)) - input_node2 = layers_module.Input(shape=(5,)) - output_node = layers_module.Concatenate()([input_node1, input_node2]) - output_node = layers_module.Dense(4)(output_node) - model = training_module.Model([input_node1, input_node2], output_node) - model.compile(loss='mse') + def test_training_with_loss_instance(self): + a = layers_module.Input(shape=(3,), name="input_a") + b = layers_module.Input(shape=(3,), name="input_b") + + dense = layers_module.Dense(4, name="dense") + c = dense(a) + d = dense(b) + e = layers_module.Dropout(0.5, name="dropout")(c) + + model = training_module.Model([a, b], [d, e]) + loss_weights = [1.0, 0.5] + model.compile( + RMSPropOptimizer(learning_rate=0.001), + loss=losses.MeanSquaredError(), + metrics=[metrics_module.CategoricalAccuracy(), "mae"], + loss_weights=loss_weights, + ) + + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_d_np = np.random.random((10, 4)) + output_e_np = np.random.random((10, 4)) + + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_static_batch_in_input_layer(self): + if tf.executing_eagerly(): + self.skipTest("Not inferred in eager.") + + class Counter(Callback): + def __init__(self): + self.batches = 0 + + def on_batch_end(self, batch, logs=None): + self.batches += 1 + + x, y = np.ones((64, 10), "float32"), np.ones((64, 1), "float32") + + for batch_size, expected_batches in [(None, 2), (4, 16)]: + inputs = input_layer.Input(batch_size=batch_size, shape=(10,)) + outputs = layers_module.Dense(1, activation="sigmoid")(inputs) + model = training_module.Model(inputs, outputs) + + model.compile( + optimizer_legacy.adam.Adam(0.001), "binary_crossentropy" + ) + counter = Counter() + model.fit(x, y, callbacks=[counter]) + self.assertEqual(counter.batches, expected_batches) + + model = sequential.Sequential( + [layers_module.Dense(1, batch_input_shape=(batch_size, 10))] + ) + model.compile( + optimizer_legacy.adam.Adam(0.001), "binary_crossentropy" + ) + counter = Counter() + model.fit(x, y, callbacks=[counter]) + self.assertEqual(counter.batches, expected_batches) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_static_batch_in_input_layer_consistency_checks(self): + if tf.executing_eagerly(): + self.skipTest("Not inferred in eager.") + x, y = np.ones((64, 10), "float32"), np.ones((64, 1), "float32") + + inputs = input_layer.Input(batch_size=2, shape=(10,)) + outputs = layers_module.Dense(1, activation="sigmoid")(inputs) + model = training_module.Model(inputs, outputs) + model.compile(optimizer_legacy.adam.Adam(0.001), "binary_crossentropy") + with self.assertRaisesRegex( + ValueError, "incompatible with the specified batch size" + ): + model.fit(x, y, batch_size=4) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_compatible_batch_size_functional_model(self): + class MyLayer(layers_module.Layer): + def call(self, inputs): + return tf.concat(inputs, axis=0) + + input1 = input_layer.Input(batch_size=2, shape=(10,)) + input2 = input_layer.Input(batch_size=3, shape=(10,)) + outputs = MyLayer()([input1, input2]) + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_warn: + training_module.Model([input1, input2], outputs) + self.assertEqual( + mock_warn.call_args_list[0][0][0], + "Found incompatible static batch sizes among the inputs. " + "Batch sizes: [2, 3]", + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_calling_subclass_model_on_different_datasets(self): + class SubclassedModel(training_module.Model): + def call(self, inputs): + return inputs * 2 + + model = SubclassedModel() + dataset_one = tf.data.Dataset.from_tensor_slices([[0], [1]]).batch(2) + dataset_two = tf.data.Dataset.from_tensor_slices( + [[3], [4], [5], [6], [7], [8]] + ).batch(2) + self.assertAllEqual([[0], [2]], model.predict(dataset_one, steps=1)) + self.assertAllEqual( + [[6], [8], [10], [12]], model.predict(dataset_two, steps=2) + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_training_on_sparse_categorical_crossentropy_loss_with_softmax( + self, + ): + np.random.seed(1337) + train_x = np.ones((100, 4)) + train_y = np.random.randint(0, 1, size=(100, 1)) + + reference_model = test_utils.get_small_sequential_mlp( + 16, 2, input_dim=4 + ) + reference_model.compile( + loss="sparse_categorical_crossentropy", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=True, + ) + fixed_weights = reference_model.get_weights() + reference_model_loss = reference_model.train_on_batch(train_x, train_y) + + test_model = test_utils.get_small_sequential_mlp(16, 2, input_dim=4) + test_model.compile( + loss="sparse_categorical_crossentropy", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=False, + ) + test_model.set_weights(fixed_weights) + test_model_loss = test_model.train_on_batch(train_x, train_y) + self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_training_on_categorical_crossentropy_loss_with_softmax(self): + np.random.seed(1337) + train_x = np.ones((100, 4)) + train_y = np_utils.to_categorical( + np.random.randint(0, 1, size=(100, 1)), 2 + ) + + reference_model = test_utils.get_small_sequential_mlp( + 16, 2, input_dim=4 + ) + reference_model.compile( + loss="categorical_crossentropy", + optimizer=rmsprop.RMSprop(learning_rate=0.001), + run_eagerly=True, + ) + fixed_weights = reference_model.get_weights() + reference_model_loss = reference_model.train_on_batch(train_x, train_y) + + test_model = test_utils.get_small_sequential_mlp(16, 2, input_dim=4) + test_model.compile( + loss="categorical_crossentropy", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=False, + ) + test_model.set_weights(fixed_weights) + test_model_loss = test_model.train_on_batch(train_x, train_y) + self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_training_on_binary_crossentropy_loss(self): + train_x = np.ones((100, 4), dtype=np.float32) + train_y = np.ones((100, 1), dtype=np.float32) + reference_model = test_utils.get_small_sequential_mlp( + 16, 1, input_dim=4 + ) + reference_model.compile( + loss="binary_crossentropy", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=True, + ) + fixed_weights = reference_model.get_weights() + reference_model_loss = reference_model.train_on_batch(train_x, train_y) + + test_model = test_utils.get_small_sequential_mlp(16, 1, input_dim=4) + test_model.compile( + loss="binary_crossentropy", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=False, + ) + test_model.set_weights(fixed_weights) + test_model_loss = test_model.train_on_batch(train_x, train_y) + self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + ("default", 1, 4), + ("integer_two", 2, 2), + ("integer_four", 4, 1), + ("simple_list", [1, 3, 4], 3), + ("duplicated_list", [4, 2, 2], 2), + ) + def test_validation_freq(self, validation_freq, expected_runs): + x, y = np.ones((10, 10)), np.ones((10, 1)) + model = test_utils.get_small_mlp(2, 1, 10) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + class ValCounter(Callback): + def __init__(self): + self.val_runs = 0 + + def on_test_begin(self, logs=None): + self.val_runs += 1 + + val_counter = ValCounter() + model.fit( + x, + y, + epochs=4, + validation_data=(x, y), + validation_freq=validation_freq, + callbacks=[val_counter], + ) + self.assertEqual(val_counter.val_runs, expected_runs) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_validation_steps_without_data(self): + if tf.executing_eagerly(): + self.skipTest("Check removed in new `fit`") + x, y = np.ones((10, 10)), np.ones((10, 1)) + model = test_utils.get_small_mlp(2, 1, 10) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + with self.assertRaisesRegex( + ValueError, + "`validation_steps` should not be specified if " + "`validation_data` is None.", + ): + model.fit(x, y, epochs=4, validation_data=None, validation_steps=3) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_layer_with_variable_output(self): + class VariableOutputLayer(layers_module.Layer): + def build(self, input_shape): + self.v = self.add_weight( + "output_var", shape=(2, 5), initializer="ones" + ) + + def call(self, inputs): + return self.v + + model = test_utils.get_model_from_layers( + [VariableOutputLayer(), layers_module.Dense(1)], input_shape=(10,) + ) + # TODO(omalleyt): Make this work with `run_eagerly=True`. + model.compile("sgd", "mse", run_eagerly=False) + model.fit(np.ones((10, 10)), np.ones((10, 1)), batch_size=2, epochs=5) + + self.assertLen(model.trainable_variables, 3) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @test_utils.enable_v2_dtype_behavior + def test_model_dtype(self): + class AssertTypeLayer(layers_module.Layer): + def call(self, inputs): + assert inputs.dtype.name == self.dtype, ( + "Input tensor has type %s which does not match assert " + "type %s" % (inputs.dtype.name, self.assert_type) + ) + return inputs + 1.0 + + for dtype in ("float16", "float32", "float64"): + model = test_utils.get_model_from_layers( + [AssertTypeLayer(dtype=dtype)], input_shape=(10,) + ) + model.compile( + "sgd", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + x = np.ones((10, 10)) + y = np.ones((10, 10)) + model.fit(x, y) + model.test_on_batch(x, y) + model(x) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @test_utils.enable_v2_dtype_behavior + def test_model_input_dtype(self): + model = test_utils.get_small_mlp(1, 10, 10) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x = np.ones((10, 10)).astype(np.float64) + y = np.ones((10, 10)).astype(np.float64) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + model.fit(dataset) + self.assertEqual(model._compute_dtype, "float32") + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_subclassed_model_with_training_arg(self): + class LayerWithTrainingArg(layers_module.Layer): + def call(self, inputs, training=None): + self.training = training + return inputs + + class ModelWithTrainingArg(training_module.Model): + def __init__(self): + super().__init__() + self.l1 = LayerWithTrainingArg() + + def call(self, inputs, training=None): + self.training = training + inputs = self.l1(inputs, training=training) + return inputs + + x = np.zeros((1, 2)) + model = ModelWithTrainingArg() + model.compile( + loss="mse", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, x, epochs=1) + + if tf.executing_eagerly(): + expected_training_arg = True + else: + expected_training_arg = backend.symbolic_learning_phase() + + self.assertIs(model.training, expected_training_arg) + self.assertIs(model.l1.training, expected_training_arg) + + @test_combinations.run_all_keras_modes + def test_error_when_model_is_not_compiled(self): + inputs = input_layer.Input(shape=(1,)) + outputs = layers_module.Dense(1)(inputs) + model = training_module.Model(inputs, outputs) + with self.assertRaisesRegex(RuntimeError, "must compile your model"): + model.fit(np.ones((1, 1)), np.ones((1, 1))) + + class MyModel(training_module.Model): + def call(self, x): + self.add_loss(tf.reduce_sum(x)) + return x + + model = MyModel() + with self.assertRaisesRegex(RuntimeError, "must compile your model"): + model.fit(np.random.random((32, 1)), epochs=2) + + @test_combinations.run_all_keras_modes + @test_utils.enable_v2_dtype_behavior + def test_losses_of_different_dtypes(self): + inp = input_layer.Input(shape=(2,)) + out_1 = layers_module.Dense( + 2, dtype="float32", kernel_regularizer="l2" + )(inp) + out_2 = layers_module.Dense( + 2, dtype="float16", kernel_regularizer="l2" + )(inp) + model = training_module.Model(inp, [out_1, out_2]) + extra_loss = tf.reduce_sum(tf.cast(out_2, "float64")) + model.add_loss(extra_loss) + model.compile( + "sgd", ["mse", "mse"], run_eagerly=test_utils.should_run_eagerly() + ) + x, y = np.ones((10, 2)), np.ones((10, 2)) + model.fit(x, [y, y]) + + @test_combinations.run_all_keras_modes + @test_utils.enable_v2_dtype_behavior + def test_losses_of_different_dtypes_with_subclassed_model(self): + class MyModel(training_module.Model): + def build(self, _): + self.dense = layers_module.Dense(2) + + def call(self, inputs): + self.add_loss(tf.cast(tf.nn.l2_loss(inputs), "float64")) + return self.dense(inputs) + + model = MyModel(dtype="float32") + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x, y = np.ones((10, 2)), np.ones((10, 2)) + model.fit(x, y) + + @test_combinations.run_all_keras_modes + @test_utils.enable_v2_dtype_behavior + def test_regularizer_of_different_dtype(self): + inp = input_layer.Input(shape=(2,)) + + def regularizer(weight): + return tf.cast(tf.nn.l2_loss(weight), "float64") + + out = layers_module.Dense( + 2, dtype="float32", kernel_regularizer=regularizer + )(inp) + model = training_module.Model(inp, out) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x, y = np.ones((10, 2)), np.ones((10, 2)) + model.fit(x, y) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_outputs_are_floats(self): + x, y = np.ones((10, 1)), np.ones((10, 1)) + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile( + "sgd", + "mse", + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit(x, y, epochs=2) + self.assertIsInstance(history.history["loss"][0], float) + self.assertIsInstance(history.history["accuracy"][0], float) + + loss, accuracy = model.train_on_batch(x, y) + self.assertIsInstance(loss, float) + self.assertIsInstance(accuracy, float) + + loss, accuracy = model.evaluate(x, y) + self.assertIsInstance(loss, float) + self.assertIsInstance(accuracy, float) + + loss, accuracy = model.test_on_batch(x, y) + self.assertIsInstance(loss, float) + self.assertIsInstance(accuracy, float) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_int_output(self): + x, y = np.ones((10, 1)), np.ones((10, 1)) + model = sequential.Sequential([layers_module.Dense(1)]) + + class MyMetric(metrics_module.Metric): + def update_state(self, y_true, y_pred, sample_weight=None): + del y_true, y_pred, sample_weight + + def result(self): + return tf.constant(1, dtype="int64") + + model.compile( + "sgd", + "mse", + metrics=[MyMetric()], + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit(x, y, epochs=2) + self.assertIsInstance(history.history["my_metric"][0], int) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @test_utils.enable_v2_dtype_behavior + def test_mixed_precision(self): + x, y = np.ones((10, 1)), np.ones((10, 1)) + policy.set_global_policy("mixed_float16") + model = sequential.Sequential([layers_module.Dense(1)]) + optimizer = sgd_experimental.SGD() + model.compile( + optimizer, + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=2) + policy.set_global_policy("float32") + + @test_combinations.run_all_keras_modes + def test_calling_aggregate_gradient(self): + class _Optimizer(optimizer_legacy.gradient_descent.SGD): + """Mock optimizer to check if _aggregate_gradient is called.""" + + _HAS_AGGREGATE_GRAD = True + + def __init__(self): + self.aggregate_gradients_called = False + super().__init__(name="MyOptimizer") + + def _aggregate_gradients(self, grads): + self.aggregate_gradients_called = True + return super()._aggregate_gradients(grads) + + mock_optimizer = _Optimizer() + + model = sequential.Sequential() + model.add(layers_module.Dense(10, activation="relu")) + + model.compile( + mock_optimizer, "mse", run_eagerly=test_utils.should_run_eagerly() + ) + x, y = np.ones((10, 10)), np.ones((10, 10)) + model.fit(x, y) + self.assertEqual(model.optimizer.aggregate_gradients_called, True) + + class _OptimizerOverrideApplyGradients(_Optimizer): + """Override apply_gradients. + + To test the case where the optimizer does not define the + experimental_aggregate_gradients parameter. + """ + + _HAS_AGGREGATE_GRAD = False + + def apply_gradients(self, grads_and_vars, name=None): + return super().apply_gradients(grads_and_vars, name) + + mock_optimizer = _OptimizerOverrideApplyGradients() + model.compile( + mock_optimizer, "mse", run_eagerly=test_utils.should_run_eagerly() + ) + x, y = np.ones((10, 10)), np.ones((10, 10)) + model.fit(x, y) + self.assertEqual(model.optimizer.aggregate_gradients_called, True) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_gradients_are_none(self): + class DenseWithExtraWeight(layers_module.Dense): + def build(self, input_shape): + # Gradients w.r.t. extra_weights are None + self.extra_weight_1 = self.add_weight( + "extra_weight_1", shape=(), initializer="ones" + ) + super().build(input_shape) + self.extra_weight_2 = self.add_weight( + "extra_weight_2", shape=(), initializer="ones" + ) + + model = sequential.Sequential( + [DenseWithExtraWeight(4, input_shape=(4,))] + ) + # Test clipping can handle None gradients + opt = optimizer_legacy.adam.Adam(clipnorm=1.0, clipvalue=1.0) + model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly()) + inputs = np.random.normal(size=(64, 4)) + targets = np.random.normal(size=(64, 4)) + old_kernel = model.get_weights()[1] + model.fit(inputs, targets) + new_kernel = model.get_weights()[1] + self.assertNotAllEqual(old_kernel, new_kernel) + + @test_combinations.run_all_keras_modes + def test_layer_ordering(self): + class MyLayer(layers_module.Layer): + pass + + class MyModel(training_module.Model): + def __init__(self, name): + super().__init__(name=name) + + self.weight = tf.Variable(0, name=name) + + self.direct_sublayer = MyLayer(name="direct") + self.direct_sublayer.d = {"d": MyLayer(name="direct/dict")} + + self.dict_sublayer = {"d": MyLayer(name="dict")} + self.dict_sublayer["d"].direct = MyLayer(name="dict/direct") + + model = MyModel("model") + # All sublayers, including self and recursive sublayers. + self.assertEqual( + ["model", "direct", "direct/dict", "dict", "dict/direct"], + [l.name for l in model._flatten_layers()], + ) + # Only direct sublayers, including those in data structures. + self.assertEqual(["direct", "dict"], [l.name for l in model.layers]) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_trainable_state_setting(self): + class UpdateLayer(layers_module.Layer): + def __init__(self): + super().__init__() + self.v = tf.Variable(0.0, trainable=False) + + def call(self, x): + self.add_update(lambda: self.v.assign_add(1.0)) + return x * self.v + + layer = UpdateLayer() + model_with_updates = sequential.Sequential([layer]) + model_with_updates.compile( + "sgd", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + layer.trainable = False + model_without_updates = sequential.Sequential([layer]) + model_without_updates.compile( + "sgd", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + x, y = np.ones((10, 1)), np.ones((10, 1)) + + self.assertEqual(self.evaluate(layer.v), 0.0) + model_with_updates.fit(x, y, batch_size=10) + # assign_add called. + self.assertEqual(self.evaluate(layer.v), 1.0) + model_without_updates.fit(x, y, batch_size=10) + # assign_add not called. + self.assertEqual(self.evaluate(layer.v), 1.0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters( + ("numpy_array", "numpy_array"), + ("dataset_array", "dataset_array"), + ("dataset_dict", "dataset_dict"), + ) + def test_single_input_no_tuple_wrapping(self, input_type): + x = np.ones((10, 1)) + + if input_type == "numpy_array": + batch_size = 3 + expected_data_type = tf.Tensor + elif input_type == "dataset_array": + x = tf.data.Dataset.from_tensor_slices(x).batch(3) + batch_size = None + expected_data_type = tf.Tensor + else: + x = {"my_input": x} + x = tf.data.Dataset.from_tensor_slices(x).batch(3) + batch_size = None + expected_data_type = dict + + test_case = self + + class MyModel(training_module.Model): + def train_step(self, data): + # No tuple wrapping for single x input and no targets. + test_case.assertIsInstance(data, expected_data_type) + return super().train_step(data) + + def test_step(self, data): + test_case.assertIsInstance(data, expected_data_type) + return super().test_step(data) + + def predict_step(self, data): + test_case.assertIsInstance(data, expected_data_type) + return super().predict_step(data) + + inputs = layers_module.Input(shape=(1,), name="my_input") + outputs = layers_module.Dense(1)(inputs) + model = MyModel(inputs, outputs) + model.add_loss(tf.reduce_sum(outputs)) + model.compile("sgd") + model.fit(x, batch_size=batch_size) + model.evaluate(x, batch_size=batch_size) + model.predict(x, batch_size=batch_size) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters( + ("custom_metrics", False, True), + ("compiled_metrics", True, False), + ("both_compiled_and_custom_metrics", True, True), + ) + def test_evaluate_with_custom_test_step( + self, use_compiled_metrics, use_custom_metrics + ): + class MyModel(training_module.Model): + def test_step(self, data): + x, y = data + pred = self(x) + metrics = {} + if use_compiled_metrics: + self.compiled_metrics.update_state(y, pred) + self.compiled_loss(y, pred) + for metric in self.metrics: + metrics[metric.name] = metric.result() + if use_custom_metrics: + custom_metrics = { + "mean": tf.reduce_mean(pred), + "sum": tf.reduce_sum(pred), + } + metrics.update(custom_metrics) + return metrics + + inputs = layers_module.Input((2,)) + outputs = layers_module.Dense(3)(inputs) + model = MyModel(inputs, outputs) + if use_compiled_metrics: + model.compile( + "adam", + "mse", + metrics=["mae", "mape"], + run_eagerly=test_utils.should_run_eagerly(), + ) + else: + model.compile( + "adam", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + x = np.random.random((4, 2)) + y = np.random.random((4, 3)) + results_list = model.evaluate(x, y) + results_dict = model.evaluate(x, y, return_dict=True) + self.assertLen(results_list, len(results_dict)) + if use_compiled_metrics and use_custom_metrics: + self.assertLen(results_list, 5) + self.assertEqual( + results_list, + [ + results_dict["loss"], + results_dict["mae"], + results_dict["mape"], + results_dict["mean"], + results_dict["sum"], + ], + ) + if use_compiled_metrics and not use_custom_metrics: + self.assertLen(results_list, 3) + self.assertEqual( + results_list, + [ + results_dict["loss"], + results_dict["mae"], + results_dict["mape"], + ], + ) + if not use_compiled_metrics and use_custom_metrics: + self.assertLen(results_list, 2) + self.assertEqual( + results_list, [results_dict["mean"], results_dict["sum"]] + ) + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def test_model_make_function(self): + layers = [ + layers_module.Dense(10, dtype=np.float64), + layers_module.Dense(10, dtype=np.float64), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + original_train_function = model.make_train_function() + self.assertIsNotNone(original_train_function) + self.assertEqual(model.make_train_function(), original_train_function) + # Check that we regenerate it without reusing the cached version. + self.assertNotEqual( + model.make_train_function(force=True), original_train_function + ) + + original_test_function = model.make_test_function() + self.assertIsNotNone(original_test_function) + self.assertEqual(model.make_test_function(), original_test_function) + # Check that we regenerate it without reusing the cached version. + self.assertNotEqual( + model.make_test_function(force=True), original_test_function + ) + + original_predict_function = model.make_predict_function() + self.assertIsNotNone(original_predict_function) + self.assertEqual( + model.make_predict_function(), original_predict_function + ) + # Check that we regenerate it without reusing the cached version. + self.assertNotEqual( + model.make_predict_function(force=True), original_predict_function + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_custom_compute_metrics(self): + class CustomMetric(metrics_module.Mean): + def sq_diff_plus_x(self, x, y_true, y_pred): + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + sq_diff_plus_x = tf.add( + x, tf.math.squared_difference(y_pred, y_true) + ) + return backend.mean(sq_diff_plus_x, axis=-1) + + def update_state(self, x, y_true, y_pred, sample_weight=None): + matches = self.sq_diff_plus_x(x, y_true, y_pred) + return super().update_state(matches) + + class MyModel(sequential.Sequential): + def compute_metrics(self, x, y, y_pred, sample_weight): + metric_results = super().compute_metrics( + x, y, y_pred, sample_weight + ) + self.custom_metric.update_state(x, y, y_pred, sample_weight) + metric_results[ + "custom_metric_name" + ] = self.custom_metric.result() + return metric_results + + tensors = tf.random.uniform((10, 10)), tf.random.uniform((10,)) + dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) + model = MyModel([layers_module.Dense(10)]) + model.custom_metric = CustomMetric("my_metric") + initial_result = model.custom_metric.result() + optimizer = optimizer_legacy.gradient_descent.SGD() + model.compile(optimizer, loss="mse", steps_per_execution=10) + model.fit(dataset, epochs=2, steps_per_epoch=10, verbose=2) + after_fit_result = model.custom_metric.result() + + self.assertEqual(self.evaluate(initial_result), 0.0) + self.assertNotEqual( + self.evaluate(initial_result), self.evaluate(after_fit_result) + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_custom_compute_loss(self): + class MyModel(training_module.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_metric = metrics_module.Mean(name="loss") + + def compute_loss(self, x, y, y_pred, sample_weight): + loss = tf.reduce_mean(tf.math.squared_difference(y_pred, y)) + loss += tf.add_n(self.losses) + self.loss_metric.update_state(loss) + return loss + + def reset_metrics(self): + self.loss_metric.reset_states() + + @property + def metrics(self): + return [self.loss_metric] + + tensors = tf.random.uniform((10, 10)), tf.random.uniform((10,)) + dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) + + inputs = layers_module.Input(shape=(10,), name="my_input") + outputs = layers_module.Dense(10)(inputs) + model = MyModel(inputs, outputs) + model.add_loss(tf.reduce_sum(outputs)) + + optimizer = optimizer_legacy.gradient_descent.SGD() + model.compile(optimizer, loss="mse", steps_per_execution=10) + history = model.fit(dataset, epochs=2, steps_per_epoch=10) + self.assertLen(history.history["loss"], 2) + self.assertAllClose( + history.history["loss"][1], model.loss_metric.result() + ) - with self.assertRaisesRegex(ValueError, 'Data cardinality is ambiguous'): - model.train_on_batch([np.ones((10, 5)), np.ones((10, 5))], - np.ones((11, 4))) + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters( + ("mixed_float16", "mixed_float16"), ("float32", "float32") + ) + def test_ema_overwrite(self, test_policy): + if not tf.__internal__.tf2.enabled(): + self.skipTest("EMA optimizer is only available in TF2.") + policy.set_global_policy(test_policy) + model = sequential.Sequential() + model.add(input_layer.Input(shape=(4,))) + model.add(layers_module.Dense(1, activation="relu")) + + tensors = tf.random.uniform((4, 4)), tf.random.uniform((4,)) + dataset = tf.data.Dataset.from_tensor_slices(tensors).repeat().batch(1) + + optimizer = sgd_experimental.SGD(use_ema=True, ema_momentum=1) + model.compile(optimizer, loss="mse", steps_per_execution=10) + initial_value = tf.Variable(model.trainable_variables[0]) + history = model.fit(dataset, epochs=2, steps_per_epoch=10) + self.assertLen(history.history["loss"], 2) + self.assertAllClose(initial_value, model.trainable_variables[0]) + policy.set_global_policy("float32") + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_get_verbosity(self): + class MyStrategy(tf.distribute.Strategy): + def __init__(self): + self._should_use_with_coordinator = True + + with self.assertRaisesRegex(ValueError, "`verbose=1` is not allowed"): + training_module._get_verbosity(1, MyStrategy()) + + io_utils.enable_interactive_logging() + self.assertEqual( + training_module._get_verbosity("auto", MyStrategy()), 2 + ) + self.assertEqual( + training_module._get_verbosity( + "auto", tf.distribute.MirroredStrategy() + ), + 1, + ) + self.assertEqual( + training_module._get_verbosity(2, tf.distribute.MirroredStrategy()), + 2, + ) + + io_utils.disable_interactive_logging() + self.assertEqual( + training_module._get_verbosity( + "auto", tf.distribute.MirroredStrategy() + ), + 2, + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_save_spec(self): + class Model(training_module.Model): + def call( + self, arg_input_1, arg_input_2, keyword_input, training=None + ): + return 0 + + # Test subclassed model save specs. + model = Model() + model( + tf.ones([1, 1]), + tf.ones([2, 2]), + keyword_input=tf.ones([3, 3]), + training=False, + ) + spec = model.save_spec(dynamic_batch=False) + self.assertEqual(spec[0][0].shape.as_list(), [1, 1]) + self.assertEqual(spec[0][1].shape.as_list(), [2, 2]) + self.assertEqual(spec[1]["keyword_input"].shape.as_list(), [3, 3]) + spec = model.save_spec(dynamic_batch=True) + self.assertEqual(spec[0][0].shape.as_list(), [None, 1]) + + # Test functional model save specs. + input_1 = layers_module.Input((1,), batch_size=1) + input_2 = layers_module.Input((2,), batch_size=2) + input_3 = layers_module.Input((3,), batch_size=3) + output = model(input_1, input_2, keyword_input=input_3, training=True) + functional = training_module.Model([input_1, input_2, input_3], output) + # Functional models should ignore dynamic_batch if the input layers have + # a known batch size. + spec = functional.save_spec(dynamic_batch=True) + input_specs = spec[0][0] + self.assertEqual(input_specs[0].shape.as_list(), [1, 1]) + self.assertEqual(input_specs[1].shape.as_list(), [2, 2]) + self.assertEqual(input_specs[2].shape.as_list(), [3, 3]) + + +class TestAutotuneSPE(test_combinations.TestCase): + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_compile_fit_with_jit_compile(self): + # Test with jit_compile = True + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile( + "sgd", + loss="mse", + run_eagerly=False, + jit_compile=True, + steps_per_execution="auto", + ) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + # Test fcompile fit for a RNN model + model = sequential.Sequential() + model.add( + layers_module.TimeDistributed( + layers_module.Embedding(5, 6, mask_zero=True), + input_shape=(None, None), + ) + ) # N by t_1 by t_2 by 6 + model.add( + layers_module.TimeDistributed( + layers_module.SimpleRNN(7, return_sequences=True) + ) + ) + model.add( + layers_module.TimeDistributed( + layers_module.SimpleRNN(8, return_sequences=False) + ) + ) + model.add(layers_module.SimpleRNN(1, return_sequences=False)) + model.compile( + optimizer="sgd", + loss="mse", + jit_compile=True, + steps_per_execution="auto", + ) + model_input = np.random.randint( + low=1, high=5, size=(10, 3, 4), dtype="int32" + ) + for i in range(4): + model_input[i, i:, i:] = 0 + model.fit( + model_input, np.random.random((10, 1)), epochs=1, batch_size=10 + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_compile_fit_evaluate_predict_with_mirrored_strategy(self): + # Test with jit_compile = True + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile( + "sgd", + loss="mse", + run_eagerly=False, + jit_compile=True, + steps_per_execution="auto", + ) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + model.evaluate(x, y) + model.predict(x) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_spe_tune_compile_fit_then_false_predict(self): + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile( + "sgd", + loss="mse", + run_eagerly=False, + jit_compile=True, + steps_per_execution="auto", + ) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + model.evaluate(x, y) + model.autotune_steps_per_execution = False + model.predict(x) + assert model.autotune_steps_per_execution == False + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_spe_tune_set_after_compile(self): + model = sequential.Sequential([layers_module.Dense(1)]) + model.compile( + "sgd", + loss="mse", + run_eagerly=False, + jit_compile=True, + steps_per_execution=5, + ) + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) + assert model._steps_per_execution_tuner is None + model.autotune_steps_per_execution = True + model.fit(x, y, epochs=2) + assert model.steps_per_execution.numpy().item() == 5 + assert model._steps_per_execution_tuner + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_spe_tune_set_before_compile(self): + model = sequential.Sequential([layers_module.Dense(1)]) + model.steps_per_execution = 5 + model.compile( + "sgd", + loss="mse", + run_eagerly=False, + jit_compile=True, + steps_per_execution="auto", + ) + assert model.steps_per_execution.numpy().item() == 5 + assert model._steps_per_execution_tuner + + x, y = np.ones((10, 1)), np.ones((10, 1)) + model.fit(x, y, epochs=2) - with self.assertRaisesRegex(ValueError, 'Data cardinality is ambiguous'): - model.test_on_batch([np.ones((10, 5)), np.ones((10, 5))], - np.ones((11, 4))) - with self.assertRaisesRegex(ValueError, 'Data cardinality is ambiguous'): - model.predict_on_batch([np.ones((10, 5)), np.ones((11, 5))]) +class TestExceptionsAndWarnings(test_combinations.TestCase): + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @test_combinations.run_with_all_model_types + def test_fit_on_no_output(self): + inputs = layers_module.Input((3,)) + outputs = layers_module.Dense(2)(inputs) + model = training_module.Model(inputs, outputs) + model.compile("rmsprop", "mse") + x = np.zeros((32, 3)) + with self.assertRaisesRegex(ValueError, "Target data is missing..*"): + model.fit(x) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @test_combinations.run_with_all_model_types + def test_fit_on_wrong_output_type(self): + inputs1 = layers_module.Input((3,), name="a") + inputs2 = layers_module.Input((3,), name="b") + x = layers_module.Concatenate()([inputs1, inputs2]) + outputs = layers_module.Dense(2, name="c")(x) + model = training_module.Model([inputs1, inputs2], outputs) + model.compile("rmsprop", "mse") + x = np.zeros((32, 3)) + y = np.zeros((32, 2)) + with self.assertRaisesRegex(ValueError, "Target data is missing..*"): + model.fit({"a": x, "b": x, "c": y}) + + @test_combinations.run_all_keras_modes + def test_compile_warning_for_loss_missing_output(self): + with self.cached_session(): + inp = layers_module.Input(shape=(16,), name="input_a") + out_1 = layers_module.Dense(8, name="dense_1")(inp) + out_2 = layers_module.Dense( + 3, activation="softmax", name="dense_2" + )(out_1) + model = training_module.Model(inputs=[inp], outputs=[out_1, out_2]) + optimizer = RMSPropOptimizer(learning_rate=0.001) + + model.compile( + optimizer, + loss={ + "dense_2": "categorical_crossentropy", + }, + metrics={ + "dense_2": "categorical_accuracy", + "dense_1": metrics_module.CategoricalAccuracy(), + }, + run_eagerly=test_utils.should_run_eagerly(), + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_predict_error_with_empty_x(self): + inputs = layers_module.Input(shape=(2,)) + outputs = layers_module.Dense(4)(inputs) + model = training_module.Model(inputs=inputs, outputs=outputs) + model.compile(loss="mse") + + with self.assertRaisesRegex( + ValueError, "Expected input data to be non-empty." + ): + model.predict(np.array([])) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters( + ("dynamic", 0, False), + ("dynamic_multistep", 10, False), + ("static", 0, True), + ("static_multistep", 10, True), + ) + def test_predict_structured(self, spe, static_batch): + inputs = layers_module.Input(shape=(2,)) + outputs = layers_module.Dense(2)(inputs) + model = training_module.Model( + inputs=inputs, + outputs={"out": outputs}, + ) + model.compile( + loss="mse", + steps_per_execution=spe, + run_eagerly=test_utils.should_run_eagerly(), + ) + xdata = np.random.uniform(size=(8, 2)).astype(np.float32) + dataset = tf.data.Dataset.from_tensor_slices((xdata, xdata)) + dataset = dataset.batch(8, drop_remainder=static_batch) + ret = model.predict(dataset, steps=1) + tf.nest.assert_same_structure(ret, {"out": ""}) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_on_batch_error_inconsistent_batch_size(self): + input_node1 = layers_module.Input(shape=(5,)) + input_node2 = layers_module.Input(shape=(5,)) + output_node = layers_module.Concatenate()([input_node1, input_node2]) + output_node = layers_module.Dense(4)(output_node) + model = training_module.Model([input_node1, input_node2], output_node) + model.compile(loss="mse") + + with self.assertRaisesRegex( + ValueError, "Data cardinality is ambiguous" + ): + model.train_on_batch( + [np.ones((10, 5)), np.ones((10, 5))], np.ones((11, 4)) + ) + + with self.assertRaisesRegex( + ValueError, "Data cardinality is ambiguous" + ): + model.test_on_batch( + [np.ones((10, 5)), np.ones((10, 5))], np.ones((11, 4)) + ) + + with self.assertRaisesRegex( + ValueError, "Data cardinality is ambiguous" + ): + model.predict_on_batch([np.ones((10, 5)), np.ones((11, 5))]) class LossWeightingTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes - def test_class_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 10 - weighted_class = 3 - weight = .5 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - learning_rate = 0.001 - - model = test_utils.get_small_sequential_mlp( - num_hidden=10, num_classes=num_classes, input_dim=input_dim) - model.compile( - loss='categorical_crossentropy', - metrics=['acc', metrics_module.CategoricalAccuracy()], - weighted_metrics=['mae', metrics_module.CategoricalAccuracy()], - optimizer=RMSPropOptimizer(learning_rate=learning_rate), - run_eagerly=test_utils.should_run_eagerly()) - - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_test = y_test.copy() - # convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, num_classes) - y_test = np_utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = weight - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - class_weight=class_weight, - validation_data=(x_train, y_train)) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight, - validation_split=0.1) - - model.train_on_batch( - x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) - ref_score = model.evaluate(x_test, y_test, verbose=0) # pylint: disable=unused-variable - score = model.evaluate( # pylint: disable=unused-variable - x_test[test_ids, :], y_test[test_ids, :], verbose=0) - # TODO(b/152990697): Fix the class weights test here. - # self.assertLess(score[0], ref_score[0]) - - @test_combinations.run_all_keras_modes - def test_temporal_sample_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 10 - weighted_class = 3 - weight = 10. - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - learning_rate = 0.001 - - with self.cached_session(): - model = sequential.Sequential() - model.add( - layers_module.TimeDistributed( - layers_module.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(layers_module.Activation('softmax')) - - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_test = y_test.copy() - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, num_classes) - y_test = np_utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = weight - - temporal_x_train = np.reshape(x_train, (len(x_train), 1, - x_train.shape[1])) - temporal_x_train = np.repeat(temporal_x_train, timesteps, axis=1) - temporal_x_test = np.reshape(x_test, (len(x_test), 1, x_test.shape[1])) - temporal_x_test = np.repeat(temporal_x_test, timesteps, axis=1) - - temporal_y_train = np.reshape(y_train, (len(y_train), 1, - y_train.shape[1])) - temporal_y_train = np.repeat(temporal_y_train, timesteps, axis=1) - temporal_y_test = np.reshape(y_test, (len(y_test), 1, y_test.shape[1])) - temporal_y_test = np.repeat(temporal_y_test, timesteps, axis=1) - - temporal_sample_weight = np.reshape(sample_weight, (len(sample_weight), - 1)) - temporal_sample_weight = np.repeat( - temporal_sample_weight, timesteps, axis=1) - - model.compile( - RMSPropOptimizer(learning_rate=learning_rate), - loss='categorical_crossentropy', - metrics=['acc', metrics_module.CategoricalAccuracy()], - weighted_metrics=['mae', metrics_module.CategoricalAccuracy()], - sample_weight_mode='temporal', - run_eagerly=test_utils.should_run_eagerly()) - - model.fit( - temporal_x_train, - temporal_y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=temporal_sample_weight) - model.fit( - temporal_x_train, - temporal_y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=temporal_sample_weight, - validation_split=0.1) - - model.train_on_batch( - temporal_x_train[:batch_size], - temporal_y_train[:batch_size], - sample_weight=temporal_sample_weight[:batch_size]) - model.test_on_batch( - temporal_x_train[:batch_size], - temporal_y_train[:batch_size], - sample_weight=temporal_sample_weight[:batch_size]) - ref_score = model.evaluate(temporal_x_test, temporal_y_test, verbose=0) - if not tf.executing_eagerly(): - score = model.evaluate( - temporal_x_test[test_ids], temporal_y_test[test_ids], verbose=0) - self.assertLess(score[0], ref_score[0]) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types(exclude_models='sequential') - def test_fit_with_incorrect_weights(self): - input_a = layers_module.Input(shape=(3,), name='input_a') - input_b = layers_module.Input(shape=(3,), name='input_b') - - dense = layers_module.Dense(2, name='output_1') - dropout = layers_module.Dropout(0.5, name='output_2') - branch_a = [input_a, dense] - branch_b = [input_b, dense, dropout] - - model = test_utils.get_multi_io_model(branch_a, branch_b) - model.compile( - optimizer='adam', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - x = np.random.random((10, 3)) - y = np.random.random((10, 2)) - - with self.assertRaises(ValueError): - model.fit([x, x], [y, y], epochs=1, sample_weight={'unknown': x}) - - with self.assertRaises(ValueError): - model.fit([x, x], [y, y], epochs=1, class_weight={'unknown': 1}) - - @test_combinations.run_all_keras_modes - def test_default_sample_weight(self): - """Verifies that fit works without having to set sample_weight.""" - num_classes = 5 - input_dim = 5 - timesteps = 3 - learning_rate = 0.001 - - with self.cached_session(): - model = sequential.Sequential() - model.add( - layers_module.TimeDistributed( - layers_module.Dense(num_classes), - input_shape=(timesteps, input_dim))) - - x = np.random.random((10, timesteps, input_dim)) - y = np.random.random((10, timesteps, num_classes)) - optimizer = RMSPropOptimizer(learning_rate=learning_rate) - - # sample_weight_mode is a list and mode value is None - model.compile( - optimizer, - loss='mse', - sample_weight_mode=[None], - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=10) - - # sample_weight_mode is a list and mode value is `temporal` - model.compile( - optimizer, - loss='mse', - sample_weight_mode=['temporal'], - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=10) - - # sample_weight_mode is a dict and mode value is None - model.compile( - optimizer, - loss='mse', - sample_weight_mode={'time_distributed': None}, - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=10) - - # sample_weight_mode is a dict and mode value is `temporal` - model.compile( - optimizer, - loss='mse', - sample_weight_mode={'time_distributed': 'temporal'}, - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=10) - - # sample_weight_mode is a not a list/dict and mode value is None - model.compile( - optimizer, - loss='mse', - sample_weight_mode=None, - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=10) - - # sample_weight_mode is a not a list/dict and mode value is `temporal` - model.compile( - optimizer, - loss='mse', - sample_weight_mode='temporal', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=10) - - def test_sample_weight_tensor(self): - """Tests that sample weight may be defined as a tensor in the graph.""" - with tf.compat.v1.get_default_graph().as_default(): - # Create a simple pass-through model - inputs = layers_module.Input(shape=1, name='input_layer') - model = training_module.Model(inputs=inputs, outputs=inputs) - model.compile( - loss='mean_absolute_error', - optimizer='adam') - - # Prepare sample weights iterator tensor - sample_weights = tf.constant( - [[0, .4, 1, 1], [2, .4, .3, 1]]) - dataset = tf.data.Dataset.from_tensor_slices(sample_weights) - sample_weights = tf.compat.v1.data.make_one_shot_iterator( - dataset).get_next() - sample_weights = training_utils_v1.standardize_sample_weights( - sample_weights, model.output_names) - - # Update model loss with sample weight tensor. - model._compile_weights_loss_and_weighted_metrics(sample_weights) - - feeds = {'input_layer:0': [[0], [0], [0], [0]], - 'input_layer_target:0': [[1], [1], [1], [1]]} - with self.cached_session() as sess: - self.assertAllClose( - (.4 + 1 + 1) / 4, sess.run(model.total_loss, feed_dict=feeds)) - self.assertAllClose( - (2+ .4 + .3 + 1) / 4, sess.run(model.total_loss, feed_dict=feeds)) + @test_combinations.run_all_keras_modes + def test_class_weights(self): + num_classes = 5 + batch_size = 5 + epochs = 10 + weighted_class = 3 + weight = 0.5 + train_samples = 1000 + test_samples = 1000 + input_dim = 5 + learning_rate = 0.001 + + model = test_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=num_classes, input_dim=input_dim + ) + model.compile( + loss="categorical_crossentropy", + metrics=["acc", metrics_module.CategoricalAccuracy()], + weighted_metrics=["mae", metrics_module.CategoricalAccuracy()], + optimizer=RMSPropOptimizer(learning_rate=learning_rate), + run_eagerly=test_utils.should_run_eagerly(), + ) + + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes, + ) + int_y_test = y_test.copy() + # convert class vectors to binary class matrices + y_train = np_utils.to_categorical(y_train, num_classes) + y_test = np_utils.to_categorical(y_test, num_classes) + test_ids = np.where(int_y_test == np.array(weighted_class))[0] + + class_weight = dict([(i, 1.0) for i in range(num_classes)]) + class_weight[weighted_class] = weight + + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs // 3, + verbose=0, + class_weight=class_weight, + validation_data=(x_train, y_train), + ) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs // 2, + verbose=0, + class_weight=class_weight, + ) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs // 2, + verbose=0, + class_weight=class_weight, + validation_split=0.1, + ) + + model.train_on_batch( + x_train[:batch_size], + y_train[:batch_size], + class_weight=class_weight, + ) + ref_score = model.evaluate(x_test, y_test, verbose=0) # noqa: F841 + score = model.evaluate( # noqa: F841 + x_test[test_ids, :], y_test[test_ids, :], verbose=0 + ) + # TODO(b/152990697): Fix the class weights test here. + # self.assertLess(score[0], ref_score[0]) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_segmentation_class_weights(self): + num_channels = 3 + num_classes = 5 + batch_size = 2 + image_width = 8 + + input_shape = (batch_size, image_width, image_width, num_channels) + output_shape = (batch_size, image_width, image_width, num_classes) + + model = sequential.Sequential([layers_module.Conv2D(num_classes, 1)]) + + model.compile( + loss="categorical_crossentropy", + metrics=["acc", metrics_module.CategoricalAccuracy()], + weighted_metrics=["mae", metrics_module.CategoricalAccuracy()], + optimizer="adam", + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = tf.random.uniform(input_shape) + y = tf.random.uniform(output_shape, dtype=tf.int32, maxval=num_classes) + + # Class weights are just the class value + 1 + class_weight = dict([(i, i + 1) for i in range(num_classes)]) + + # This test simply asserts that the model can be compiled and fit + # can run without error. Verification that the class weights are + # applied correctly is performed in data_adapter_test. + model.fit(x, y, class_weight=class_weight, steps_per_epoch=1) + + sample_weight = np.array([x + 1 for x in range(batch_size)]) + model.fit( + x, + y, + class_weight=class_weight, + sample_weight=sample_weight, + steps_per_epoch=1, + ) + + @test_combinations.run_all_keras_modes + def test_temporal_sample_weights(self): + num_classes = 5 + batch_size = 5 + epochs = 10 + weighted_class = 3 + weight = 10.0 + train_samples = 1000 + test_samples = 1000 + input_dim = 5 + timesteps = 3 + learning_rate = 0.001 + + with self.cached_session(): + model = sequential.Sequential() + model.add( + layers_module.TimeDistributed( + layers_module.Dense(num_classes), + input_shape=(timesteps, input_dim), + ) + ) + model.add(layers_module.Activation("softmax")) + + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes, + ) + int_y_test = y_test.copy() + int_y_train = y_train.copy() + # convert class vectors to binary class matrices + y_train = np_utils.to_categorical(y_train, num_classes) + y_test = np_utils.to_categorical(y_test, num_classes) + test_ids = np.where(int_y_test == np.array(weighted_class))[0] + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = weight + + temporal_x_train = np.reshape( + x_train, (len(x_train), 1, x_train.shape[1]) + ) + temporal_x_train = np.repeat(temporal_x_train, timesteps, axis=1) + temporal_x_test = np.reshape( + x_test, (len(x_test), 1, x_test.shape[1]) + ) + temporal_x_test = np.repeat(temporal_x_test, timesteps, axis=1) + + temporal_y_train = np.reshape( + y_train, (len(y_train), 1, y_train.shape[1]) + ) + temporal_y_train = np.repeat(temporal_y_train, timesteps, axis=1) + temporal_y_test = np.reshape( + y_test, (len(y_test), 1, y_test.shape[1]) + ) + temporal_y_test = np.repeat(temporal_y_test, timesteps, axis=1) + + temporal_sample_weight = np.reshape( + sample_weight, (len(sample_weight), 1) + ) + temporal_sample_weight = np.repeat( + temporal_sample_weight, timesteps, axis=1 + ) + + model.compile( + RMSPropOptimizer(learning_rate=learning_rate), + loss="categorical_crossentropy", + metrics=["acc", metrics_module.CategoricalAccuracy()], + weighted_metrics=["mae", metrics_module.CategoricalAccuracy()], + sample_weight_mode="temporal", + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit( + temporal_x_train, + temporal_y_train, + batch_size=batch_size, + epochs=epochs // 3, + verbose=0, + sample_weight=temporal_sample_weight, + ) + model.fit( + temporal_x_train, + temporal_y_train, + batch_size=batch_size, + epochs=epochs // 3, + verbose=0, + sample_weight=temporal_sample_weight, + validation_split=0.1, + ) + + model.train_on_batch( + temporal_x_train[:batch_size], + temporal_y_train[:batch_size], + sample_weight=temporal_sample_weight[:batch_size], + ) + model.test_on_batch( + temporal_x_train[:batch_size], + temporal_y_train[:batch_size], + sample_weight=temporal_sample_weight[:batch_size], + ) + ref_score = model.evaluate( + temporal_x_test, temporal_y_test, verbose=0 + ) + if not tf.executing_eagerly(): + score = model.evaluate( + temporal_x_test[test_ids], + temporal_y_test[test_ids], + verbose=0, + ) + self.assertLess(score[0], ref_score[0]) + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types(exclude_models="sequential") + def test_fit_with_incorrect_weights(self): + input_a = layers_module.Input(shape=(3,), name="input_a") + input_b = layers_module.Input(shape=(3,), name="input_b") + + dense = layers_module.Dense(2, name="output_1") + dropout = layers_module.Dropout(0.5, name="output_2") + branch_a = [input_a, dense] + branch_b = [input_b, dense, dropout] + + model = test_utils.get_multi_io_model(branch_a, branch_b) + model.compile( + optimizer="adam", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.random.random((10, 3)) + y = np.random.random((10, 2)) + + with self.assertRaises(ValueError): + model.fit([x, x], [y, y], epochs=1, sample_weight={"unknown": x}) + + with self.assertRaises(ValueError): + model.fit([x, x], [y, y], epochs=1, class_weight={"unknown": 1}) + + @test_combinations.run_all_keras_modes + def test_default_sample_weight(self): + """Verifies that fit works without having to set sample_weight.""" + num_classes = 5 + input_dim = 5 + timesteps = 3 + learning_rate = 0.001 + + with self.cached_session(): + model = sequential.Sequential() + model.add( + layers_module.TimeDistributed( + layers_module.Dense(num_classes), + input_shape=(timesteps, input_dim), + ) + ) + + x = np.random.random((10, timesteps, input_dim)) + y = np.random.random((10, timesteps, num_classes)) + optimizer = RMSPropOptimizer(learning_rate=learning_rate) + + # sample_weight_mode is a list and mode value is None + model.compile( + optimizer, + loss="mse", + sample_weight_mode=[None], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=10) + + # sample_weight_mode is a list and mode value is `temporal` + model.compile( + optimizer, + loss="mse", + sample_weight_mode=["temporal"], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=10) + + # sample_weight_mode is a dict and mode value is None + model.compile( + optimizer, + loss="mse", + sample_weight_mode={"time_distributed": None}, + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=10) + + # sample_weight_mode is a dict and mode value is `temporal` + model.compile( + optimizer, + loss="mse", + sample_weight_mode={"time_distributed": "temporal"}, + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=10) + + # sample_weight_mode is a not a list/dict and mode value is None + model.compile( + optimizer, + loss="mse", + sample_weight_mode=None, + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=10) + + # sample_weight_mode is a not a list/dict and mode value is + # `temporal` + model.compile( + optimizer, + loss="mse", + sample_weight_mode="temporal", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=10) + + def test_sample_weight_tensor(self): + """Tests that sample weight may be defined as a tensor in the graph.""" + with tf.compat.v1.get_default_graph().as_default(): + # Create a simple pass-through model + inputs = layers_module.Input(shape=1, name="input_layer") + model = training_module.Model(inputs=inputs, outputs=inputs) + model.compile(loss="mean_absolute_error", optimizer="adam") + + # Prepare sample weights iterator tensor + sample_weights = tf.constant([[0, 0.4, 1, 1], [2, 0.4, 0.3, 1]]) + dataset = tf.data.Dataset.from_tensor_slices(sample_weights) + sample_weights = tf.compat.v1.data.make_one_shot_iterator( + dataset + ).get_next() + sample_weights = training_utils_v1.standardize_sample_weights( + sample_weights, model.output_names + ) + + # Update model loss with sample weight tensor. + model._compile_weights_loss_and_weighted_metrics(sample_weights) + + feeds = { + "input_layer:0": [[0], [0], [0], [0]], + "input_layer_target:0": [[1], [1], [1], [1]], + } + with self.cached_session() as sess: + self.assertAllClose( + (0.4 + 1 + 1) / 4, + sess.run(model.total_loss, feed_dict=feeds), + ) + self.assertAllClose( + (2 + 0.4 + 0.3 + 1) / 4, + sess.run(model.total_loss, feed_dict=feeds), + ) @test_combinations.run_all_keras_modes class MaskingTest(test_combinations.TestCase): - - def _get_model(self, input_shape=None): - layers = [ - layers_module.Masking(mask_value=0), - layers_module.TimeDistributed( - layers_module.Dense(1, kernel_initializer='one')) - ] - model = test_utils.get_model_from_layers(layers, input_shape) - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=test_utils.should_run_eagerly()) - return model - - @test_combinations.run_with_all_model_types - def test_masking(self): - model = self._get_model(input_shape=(2, 1)) - x = np.array([[[1], [1]], [[0], [0]]]) - y = np.array([[[1], [1]], [[1], [1]]]) - loss = model.train_on_batch(x, y) - self.assertEqual(loss, 0) - - @test_combinations.run_with_all_model_types(exclude_models='functional') - def test_masking_deferred(self): - model = self._get_model() - x = np.array([[[1], [1]], [[0], [0]]]) - y = np.array([[[1], [1]], [[1], [1]]]) - loss = model.train_on_batch(x, y) - self.assertEqual(loss, 0) - - def test_mask_argument_in_layer(self): - # Test that the mask argument gets correctly passed to a layer in the - # functional API. - - class CustomMaskedLayer(layers_module.Layer): - - def __init__(self): - super().__init__() - self.supports_masking = True - - def call(self, inputs, mask=None): - assert mask is not None - return inputs - - def compute_output_shape(self, input_shape): - return input_shape - - x = np.random.random((5, 3)) - inputs = layers_module.Input((3,)) - masked = layers_module.Masking(mask_value=0)(inputs) - outputs = CustomMaskedLayer()(masked) - - model = training_module.Model(inputs, outputs) - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=test_utils.should_run_eagerly()) - y = np.random.random((5, 3)) - model.train_on_batch(x, y) + def _get_model(self, input_shape=None): + layers = [ + layers_module.Masking(mask_value=0), + layers_module.TimeDistributed( + layers_module.Dense(1, kernel_initializer="one") + ), + ] + model = test_utils.get_model_from_layers(layers, input_shape) + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + @test_combinations.run_with_all_model_types + def test_masking(self): + model = self._get_model(input_shape=(2, 1)) + x = np.array([[[1], [1]], [[0], [0]]]) + y = np.array([[[1], [1]], [[1], [1]]]) + loss = model.train_on_batch(x, y) + self.assertEqual(loss, 0) + + @test_combinations.run_with_all_model_types(exclude_models="functional") + def test_masking_deferred(self): + model = self._get_model() + x = np.array([[[1], [1]], [[0], [0]]]) + y = np.array([[[1], [1]], [[1], [1]]]) + loss = model.train_on_batch(x, y) + self.assertEqual(loss, 0) + + def test_mask_argument_in_layer(self): + # Test that the mask argument gets correctly passed to a layer in the + # functional API. + + class CustomMaskedLayer(layers_module.Layer): + def __init__(self): + super().__init__() + self.supports_masking = True + + def call(self, inputs, mask=None): + assert mask is not None + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + x = np.random.random((5, 3)) + inputs = layers_module.Input((3,)) + masked = layers_module.Masking(mask_value=0)(inputs) + outputs = CustomMaskedLayer()(masked) + + model = training_module.Model(inputs, outputs) + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=test_utils.should_run_eagerly(), + ) + y = np.random.random((5, 3)) + model.train_on_batch(x, y) @test_combinations.run_all_keras_modes class TestDynamicTrainability(test_combinations.TestCase): - - def test_trainable_warning(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = sequential.Sequential() - model.add(layers_module.Dense(2, input_dim=3)) - model.trainable = False - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.trainable = True - model.train_on_batch(x, y) - self.assertRaises(Warning) - - def test_trainable_argument(self): - with self.cached_session(): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = sequential.Sequential() - model.add(layers_module.Dense(2, input_dim=3, trainable=False)) - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - out = model.predict(x) - model.train_on_batch(x, y) - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - # test with nesting - inputs = layers_module.Input(shape=(3,)) - output = model(inputs) - model = training_module.Model(inputs, output) - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - out = model.predict(x) - model.train_on_batch(x, y) - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - def test_layer_trainability_switch(self): - # with constructor argument, in Sequential - model = sequential.Sequential() - model.add(layers_module.Dense(2, trainable=False, input_dim=1)) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Sequential - model = sequential.Sequential() - layer = layers_module.Dense(2, input_dim=1) - model.add(layer) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # with constructor argument, in Model - x = layers_module.Input(shape=(1,)) - y = layers_module.Dense(2, trainable=False)(x) - model = training_module.Model(x, y) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Model - x = layers_module.Input(shape=(1,)) - layer = layers_module.Dense(2) - y = layer(x) - model = training_module.Model(x, y) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_model_trainability_switch(self): - # a non-trainable model has no trainable weights - x = layers_module.Input(shape=(1,)) - y = layers_module.Dense(2)(x) - model = training_module.Model(x, y) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # same for Sequential - model = sequential.Sequential() - model.add(layers_module.Dense(2, input_dim=1)) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_nested_model_trainability(self): - # a Sequential inside a Model - inner_model = sequential.Sequential() - inner_model.add(layers_module.Dense(2, input_dim=1)) - - x = layers_module.Input(shape=(1,)) - y = inner_model(x) - outer_model = training_module.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Sequential inside a Sequential - inner_model = sequential.Sequential() - inner_model.add(layers_module.Dense(2, input_dim=1)) - outer_model = sequential.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Model - x = layers_module.Input(shape=(1,)) - y = layers_module.Dense(2)(x) - inner_model = training_module.Model(x, y) - x = layers_module.Input(shape=(1,)) - y = inner_model(x) - outer_model = training_module.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Sequential - x = layers_module.Input(shape=(1,)) - y = layers_module.Dense(2)(x) - inner_model = training_module.Model(x, y) - outer_model = sequential.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - def test_gan_workflow(self): - shared_layer = layers_module.BatchNormalization() - - inputs1 = input_layer.Input(10) - outputs1 = shared_layer(inputs1) - model1 = training_module.Model(inputs1, outputs1) - shared_layer.trainable = False - model1.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - inputs2 = input_layer.Input(10) - outputs2 = shared_layer(inputs2) - model2 = training_module.Model(inputs2, outputs2) - shared_layer.trainable = True - model2.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - x, y = np.ones((10, 10)), np.ones((10, 10)) - - out1_0 = model1.predict_on_batch(x) - model1.train_on_batch(x, y) - out1_1 = model1.predict_on_batch(x) - self.assertAllClose(out1_0, out1_1) - - out2_0 = model2.predict_on_batch(x) - model2.train_on_batch(x, y) - out2_1 = model2.predict_on_batch(x) - self.assertNotAllClose(out2_0, out2_1) - - def test_toggle_value(self): - input_0 = layers_module.Input(shape=(1,)) - dense_0 = layers_module.Dense( - 1, kernel_initializer='ones', bias_initializer='ones') - dense_1 = layers_module.Dense( - 1, kernel_initializer='ones', bias_initializer='ones') - result = layers_module.Add()([dense_0(input_0), dense_1(input_0)]) - model = training_module.Model(input_0, result) - dense_0.trainable = False - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((10, 1)) - y = 5 * x + 2 - model.train_on_batch(x, y) - dense_0.trainable = True - model.train_on_batch(x, y) - kernel, bias = dense_0.get_weights() - self.assertAllEqual([kernel[0, 0], bias[0]], [1., 1.]) - - kernel, bias = dense_1.get_weights() - self.assertAllClose([kernel[0, 0], bias[0]], [1.1176, 1.1176]) + def test_trainable_warning(self): + x = np.random.random((5, 3)) + y = np.random.random((5, 2)) + + model = sequential.Sequential() + model.add(layers_module.Dense(2, input_dim=3)) + model.trainable = False + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + model.trainable = True + model.train_on_batch(x, y) + self.assertRaises(Warning) + + def test_trainable_argument(self): + with self.cached_session(): + x = np.random.random((5, 3)) + y = np.random.random((5, 2)) + + model = sequential.Sequential() + model.add(layers_module.Dense(2, input_dim=3, trainable=False)) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + out = model.predict(x) + model.train_on_batch(x, y) + out_2 = model.predict(x) + self.assertAllClose(out, out_2) + + # test with nesting + inputs = layers_module.Input(shape=(3,)) + output = model(inputs) + model = training_module.Model(inputs, output) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + out = model.predict(x) + model.train_on_batch(x, y) + out_2 = model.predict(x) + self.assertAllClose(out, out_2) + + def test_layer_trainability_switch(self): + # with constructor argument, in Sequential + model = sequential.Sequential() + model.add(layers_module.Dense(2, trainable=False, input_dim=1)) + self.assertListEqual(model.trainable_weights, []) + + # by setting the `trainable` argument, in Sequential + model = sequential.Sequential() + layer = layers_module.Dense(2, input_dim=1) + model.add(layer) + self.assertListEqual(model.trainable_weights, layer.trainable_weights) + layer.trainable = False + self.assertListEqual(model.trainable_weights, []) + + # with constructor argument, in Model + x = layers_module.Input(shape=(1,)) + y = layers_module.Dense(2, trainable=False)(x) + model = training_module.Model(x, y) + self.assertListEqual(model.trainable_weights, []) + + # by setting the `trainable` argument, in Model + x = layers_module.Input(shape=(1,)) + layer = layers_module.Dense(2) + y = layer(x) + model = training_module.Model(x, y) + self.assertListEqual(model.trainable_weights, layer.trainable_weights) + layer.trainable = False + self.assertListEqual(model.trainable_weights, []) + + def test_model_trainability_switch(self): + # a non-trainable model has no trainable weights + x = layers_module.Input(shape=(1,)) + y = layers_module.Dense(2)(x) + model = training_module.Model(x, y) + model.trainable = False + self.assertListEqual(model.trainable_weights, []) + + # same for Sequential + model = sequential.Sequential() + model.add(layers_module.Dense(2, input_dim=1)) + model.trainable = False + self.assertListEqual(model.trainable_weights, []) + + def test_nested_model_trainability(self): + # a Sequential inside a Model + inner_model = sequential.Sequential() + inner_model.add(layers_module.Dense(2, input_dim=1)) + + x = layers_module.Input(shape=(1,)) + y = inner_model(x) + outer_model = training_module.Model(x, y) + self.assertListEqual( + outer_model.trainable_weights, inner_model.trainable_weights + ) + inner_model.trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + inner_model.trainable = True + inner_model.layers[-1].trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + + # a Sequential inside a Sequential + inner_model = sequential.Sequential() + inner_model.add(layers_module.Dense(2, input_dim=1)) + outer_model = sequential.Sequential() + outer_model.add(inner_model) + self.assertListEqual( + outer_model.trainable_weights, inner_model.trainable_weights + ) + inner_model.trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + inner_model.trainable = True + inner_model.layers[-1].trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + + # a Model inside a Model + x = layers_module.Input(shape=(1,)) + y = layers_module.Dense(2)(x) + inner_model = training_module.Model(x, y) + x = layers_module.Input(shape=(1,)) + y = inner_model(x) + outer_model = training_module.Model(x, y) + self.assertListEqual( + outer_model.trainable_weights, inner_model.trainable_weights + ) + inner_model.trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + inner_model.trainable = True + inner_model.layers[-1].trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + + # a Model inside a Sequential + x = layers_module.Input(shape=(1,)) + y = layers_module.Dense(2)(x) + inner_model = training_module.Model(x, y) + outer_model = sequential.Sequential() + outer_model.add(inner_model) + self.assertListEqual( + outer_model.trainable_weights, inner_model.trainable_weights + ) + inner_model.trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + inner_model.trainable = True + inner_model.layers[-1].trainable = False + self.assertListEqual(outer_model.trainable_weights, []) + + def test_gan_workflow(self): + shared_layer = layers_module.BatchNormalization() + + inputs1 = input_layer.Input(10) + outputs1 = shared_layer(inputs1) + model1 = training_module.Model(inputs1, outputs1) + shared_layer.trainable = False + model1.compile( + "sgd", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs2 = input_layer.Input(10) + outputs2 = shared_layer(inputs2) + model2 = training_module.Model(inputs2, outputs2) + shared_layer.trainable = True + model2.compile( + "sgd", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + x, y = np.ones((10, 10)), np.ones((10, 10)) + + out1_0 = model1.predict_on_batch(x) + model1.train_on_batch(x, y) + out1_1 = model1.predict_on_batch(x) + self.assertAllClose(out1_0, out1_1) + + out2_0 = model2.predict_on_batch(x) + model2.train_on_batch(x, y) + out2_1 = model2.predict_on_batch(x) + self.assertNotAllClose(out2_0, out2_1) + + def test_toggle_value(self): + input_0 = layers_module.Input(shape=(1,)) + dense_0 = layers_module.Dense( + 1, kernel_initializer="ones", bias_initializer="ones" + ) + dense_1 = layers_module.Dense( + 1, kernel_initializer="ones", bias_initializer="ones" + ) + result = layers_module.Add()([dense_0(input_0), dense_1(input_0)]) + model = training_module.Model(input_0, result) + dense_0.trainable = False + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + x = np.ones((10, 1)) + y = 5 * x + 2 + model.train_on_batch(x, y) + dense_0.trainable = True + model.train_on_batch(x, y) + kernel, bias = dense_0.get_weights() + self.assertAllEqual([kernel[0, 0], bias[0]], [1.0, 1.0]) + + kernel, bias = dense_1.get_weights() + self.assertAllClose([kernel[0, 0], bias[0]], [1.1176, 1.1176]) class TestTrainingWithDataTensors(test_combinations.TestCase): - - def test_training_and_eval_methods_on_symbolic_tensors_single_io(self): - with tf.Graph().as_default(): - x = layers_module.Input(shape=(3,), name='input') - y = layers_module.Dense(4, name='dense')(x) - model = training_module.Model(x, y) - - optimizer = RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - model.compile( - optimizer, - loss, - metrics=['mae', metrics_module.CategoricalAccuracy()]) - - inputs = backend.zeros(shape=(10, 3)) - targets = backend.zeros(shape=(10, 4)) - - model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) - model.evaluate(inputs, targets, steps=2, verbose=0) - model.predict(inputs, steps=2) - model.train_on_batch(inputs, targets) - model.test_on_batch(inputs, targets) - model.fit(inputs, targets, - epochs=1, steps_per_epoch=2, verbose=0, - validation_data=(inputs, targets), validation_steps=2) - - # Test with dynamic shape - inputs = tf.compat.v1.placeholder_with_default( - np.zeros((2, 3)), shape=tf.TensorShape([None, 3])) - targets = tf.compat.v1.placeholder_with_default( - np.zeros((2, 4)), shape=tf.TensorShape([None, 4])) - self.assertEqual(inputs.shape.dims[0].value, None) - model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) - model.evaluate(inputs, targets, steps=2, verbose=0) - model.predict(inputs, steps=2) - model.train_on_batch(inputs, targets) - model.test_on_batch(inputs, targets) - model.fit(inputs, targets, - epochs=1, steps_per_epoch=2, verbose=0, - validation_data=(inputs, targets), validation_steps=2) - - def test_training_and_eval_methods_on_symbolic_tensors_multi_io(self): - a = layers_module.Input(shape=(3,), name='input_a') - b = layers_module.Input(shape=(3,), name='input_b') - - dense = layers_module.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = layers_module.Dropout(0.5, name='dropout')(c) - - model = training_module.Model([a, b], [d, e]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - model.compile( - optimizer, - loss, - metrics=['mae', metrics_module.CategoricalAccuracy()], - loss_weights=loss_weights) - - input_a_tf = tf.zeros(shape=(10, 3)) - input_b_tf = tf.zeros(shape=(10, 3)) - - output_d_tf = tf.zeros(shape=(10, 4)) - output_e_tf = tf.zeros(shape=(10, 4)) - - model.fit([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - epochs=1, - steps_per_epoch=2, - verbose=0) - model.train_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) - - # Test with dictionary inputs - model.fit({ - 'input_a': input_a_tf, - 'input_b': input_b_tf - }, { - 'dense': output_d_tf, - 'dropout': output_e_tf - }, - epochs=1, - steps_per_epoch=2, - verbose=0) - model.fit({ - 'input_a': input_a_tf, - 'input_b': input_b_tf - }, { - 'dense': output_d_tf, - 'dropout': output_e_tf - }, - validation_data=({ - 'input_a': input_a_tf, - 'input_b': input_b_tf - }, { - 'dense': output_d_tf, - 'dropout': output_e_tf - }), - epochs=1, - steps_per_epoch=2, - validation_steps=2, - verbose=0) - model.train_on_batch({ - 'input_a': input_a_tf, - 'input_b': input_b_tf - }, { - 'dense': output_d_tf, - 'dropout': output_e_tf - }) - - # Test with validation data - model.fit([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - validation_data=([input_a_tf, - input_b_tf], [output_d_tf, output_e_tf]), - epochs=1, - steps_per_epoch=2, - validation_steps=2, - verbose=0) - # Test evaluation / prediction methods - model.evaluate([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - steps=2, - verbose=0) - model.predict([input_a_tf, input_b_tf], steps=2) - model.test_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) - - @tf_test_utils.run_deprecated_v1 - def test_model_with_input_feed_tensor(self): - """We test building a model with a TF variable as input. - - We should be able to call fit, evaluate, predict, - by only passing them data for the placeholder inputs - in the model. - """ - with tf.Graph().as_default(), self.cached_session(): - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - input_v = tf.Variable(input_a_np, dtype='float32') - self.evaluate(tf.compat.v1.variables_initializer([input_v])) - a = input_layer.Input(tensor=input_v) - b = input_layer.Input(shape=(3,), name='input_b') - - a_2 = layers_module.Dense(4, name='dense_1')(a) - dp = layers_module.Dropout(0.5, name='dropout') - b_2 = dp(b) - - model = training_module.Model([a, b], [a_2, b_2]) - model.summary() - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - model.compile(optimizer, loss, metrics=['mean_squared_error'], + def test_training_and_eval_methods_on_symbolic_tensors_single_io(self): + with tf.Graph().as_default(): + x = layers_module.Input(shape=(3,), name="input") + y = layers_module.Dense(4, name="dense")(x) + model = training_module.Model(x, y) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = "mse" + model.compile( + optimizer, + loss, + metrics=["mae", metrics_module.CategoricalAccuracy()], + ) + + inputs = backend.zeros(shape=(10, 3)) + targets = backend.zeros(shape=(10, 4)) + + model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) + model.evaluate(inputs, targets, steps=2, verbose=0) + model.predict(inputs, steps=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + model.fit( + inputs, + targets, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=(inputs, targets), + validation_steps=2, + ) + + # Test with dynamic shape + inputs = tf.compat.v1.placeholder_with_default( + np.zeros((2, 3)), shape=tf.TensorShape([None, 3]) + ) + targets = tf.compat.v1.placeholder_with_default( + np.zeros((2, 4)), shape=tf.TensorShape([None, 4]) + ) + self.assertEqual(inputs.shape.dims[0].value, None) + model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) + model.evaluate(inputs, targets, steps=2, verbose=0) + model.predict(inputs, steps=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + model.fit( + inputs, + targets, + epochs=1, + steps_per_epoch=2, + verbose=0, + validation_data=(inputs, targets), + validation_steps=2, + ) + + def test_training_and_eval_methods_on_symbolic_tensors_multi_io(self): + a = layers_module.Input(shape=(3,), name="input_a") + b = layers_module.Input(shape=(3,), name="input_b") + + dense = layers_module.Dense(4, name="dense") + c = dense(a) + d = dense(b) + e = layers_module.Dropout(0.5, name="dropout")(c) + + model = training_module.Model([a, b], [d, e]) + + optimizer = "rmsprop" + loss = "mse" + loss_weights = [1.0, 0.5] + model.compile( + optimizer, + loss, + metrics=["mae", metrics_module.CategoricalAccuracy()], + loss_weights=loss_weights, + ) + + input_a_tf = tf.zeros(shape=(10, 3)) + input_b_tf = tf.zeros(shape=(10, 3)) + + output_d_tf = tf.zeros(shape=(10, 4)) + output_e_tf = tf.zeros(shape=(10, 4)) + + model.fit( + [input_a_tf, input_b_tf], + [output_d_tf, output_e_tf], + epochs=1, + steps_per_epoch=2, + verbose=0, + ) + model.train_on_batch( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf] + ) + + # Test with dictionary inputs + model.fit( + {"input_a": input_a_tf, "input_b": input_b_tf}, + {"dense": output_d_tf, "dropout": output_e_tf}, + epochs=1, + steps_per_epoch=2, + verbose=0, + ) + model.fit( + {"input_a": input_a_tf, "input_b": input_b_tf}, + {"dense": output_d_tf, "dropout": output_e_tf}, + validation_data=( + {"input_a": input_a_tf, "input_b": input_b_tf}, + {"dense": output_d_tf, "dropout": output_e_tf}, + ), + epochs=1, + steps_per_epoch=2, + validation_steps=2, + verbose=0, + ) + model.train_on_batch( + {"input_a": input_a_tf, "input_b": input_b_tf}, + {"dense": output_d_tf, "dropout": output_e_tf}, + ) + + # Test with validation data + model.fit( + [input_a_tf, input_b_tf], + [output_d_tf, output_e_tf], + validation_data=( + [input_a_tf, input_b_tf], + [output_d_tf, output_e_tf], + ), + epochs=1, + steps_per_epoch=2, + validation_steps=2, + verbose=0, + ) + # Test evaluation / prediction methods + model.evaluate( + [input_a_tf, input_b_tf], + [output_d_tf, output_e_tf], + steps=2, + verbose=0, + ) + model.predict([input_a_tf, input_b_tf], steps=2) + model.test_on_batch( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf] + ) + + @tf_test_utils.run_deprecated_v1 + def test_model_with_input_feed_tensor(self): + """We test building a model with a TF variable as input. + + We should be able to call fit, evaluate, predict, + by only passing them data for the placeholder inputs + in the model. + """ + with tf.Graph().as_default(), self.cached_session(): + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_a_np = np.random.random((10, 4)) + output_b_np = np.random.random((10, 3)) + + input_v = tf.Variable(input_a_np, dtype="float32") + self.evaluate(tf.compat.v1.variables_initializer([input_v])) + a = input_layer.Input(tensor=input_v) + b = input_layer.Input(shape=(3,), name="input_b") + + a_2 = layers_module.Dense(4, name="dense_1")(a) + dp = layers_module.Dropout(0.5, name="dropout") + b_2 = dp(b) + + model = training_module.Model([a, b], [a_2, b_2]) + model.summary() + + optimizer = "rmsprop" + loss = "mse" + loss_weights = [1.0, 0.5] + model.compile( + optimizer, + loss, + metrics=["mean_squared_error"], + loss_weights=loss_weights, + sample_weight_mode=None, + ) + + # test train_on_batch + out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) + out = model.train_on_batch( + {"input_b": input_b_np}, [output_a_np, output_b_np] + ) + out = model.test_on_batch( + {"input_b": input_b_np}, [output_a_np, output_b_np] + ) + out = model.predict_on_batch({"input_b": input_b_np}) + + # test fit + out = model.fit( + {"input_b": input_b_np}, + [output_a_np, output_b_np], + epochs=1, + batch_size=10, + ) + out = model.fit( + input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10 + ) + + # test evaluate + out = model.evaluate( + {"input_b": input_b_np}, + [output_a_np, output_b_np], + batch_size=10, + ) + out = model.evaluate( + input_b_np, [output_a_np, output_b_np], batch_size=10 + ) + + # test predict + out = model.predict({"input_b": input_b_np}, batch_size=10) + out = model.predict(input_b_np, batch_size=10) + self.assertEqual(len(out), 2) + + # Now test a model with a single input + # i.e. we don't pass any data to fit the model. + self.evaluate(tf.compat.v1.variables_initializer([input_v])) + a = input_layer.Input(tensor=input_v) + a_2 = layers_module.Dense(4, name="dense_1")(a) + a_2 = layers_module.Dropout(0.5, name="dropout")(a_2) + model = training_module.Model(a, a_2) + model.summary() + + optimizer = "rmsprop" + loss = "mse" + model.compile(optimizer, loss, metrics=["mean_squared_error"]) + + # test train_on_batch + out = model.train_on_batch(None, output_a_np) + out = model.train_on_batch(None, output_a_np) + out = model.test_on_batch(None, output_a_np) + out = model.predict_on_batch(None) + out = model.train_on_batch([], output_a_np) + out = model.train_on_batch({}, output_a_np) + + # test fit + _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=3) + _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=3) + + # test evaluate + _ = model.evaluate(None, output_a_np, steps=3) + _ = model.evaluate(None, output_a_np, steps=3) + + # test predict + out = model.predict(None, steps=3) + out = model.predict(None, steps=3) + self.assertEqual(out.shape, (10 * 3, 4)) + + # Same, without learning phase + # i.e. we don't pass any data to fit the model. + self.evaluate(tf.compat.v1.variables_initializer([input_v])) + a = input_layer.Input(tensor=input_v) + a_2 = layers_module.Dense(4, name="dense_1")(a) + model = training_module.Model(a, a_2) + model.summary() + + optimizer = "rmsprop" + loss = "mse" + model.compile(optimizer, loss, metrics=["mean_squared_error"]) + + # test train_on_batch + out = model.train_on_batch(None, output_a_np) + out = model.train_on_batch(None, output_a_np) + out = model.test_on_batch(None, output_a_np) + out = model.predict_on_batch(None) + out = model.train_on_batch([], output_a_np) + out = model.train_on_batch({}, output_a_np) + + # test fit + _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=10) + _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=10) + + # test evaluate + _ = model.evaluate(None, output_a_np, steps=10) + _ = model.evaluate(None, output_a_np, steps=10) + + # test predict + out = model.predict(None, steps=3) + out = model.predict(None, steps=3) + self.assertEqual(out.shape, (10 * 3, 4)) + + @test_combinations.run_all_keras_modes + def test_model_with_partial_loss(self): + with self.cached_session(): + a = input_layer.Input(shape=(3,), name="input_a") + a_2 = layers_module.Dense(4, name="dense_1")(a) + dp = layers_module.Dropout(0.5, name="dropout") + a_3 = dp(a_2) + model = training_module.Model(a, [a_2, a_3]) + + optimizer = "rmsprop" + loss = {"dropout": "mse"} + model.compile(optimizer, loss, metrics=["mae"]) + + input_a_np = np.random.random((10, 3)) + output_a_np = np.random.random((10, 4)) + + # test train_on_batch + _ = model.train_on_batch(input_a_np, output_a_np) + _ = model.test_on_batch(input_a_np, output_a_np) + # fit + _ = model.fit(input_a_np, output_a_np) + # evaluate + _ = model.evaluate(input_a_np, output_a_np) + + # Same without dropout. + a = input_layer.Input(shape=(3,), name="input_a") + a_2 = layers_module.Dense(4, name="dense_1")(a) + a_3 = layers_module.Dense(4, name="dense_2")(a_2) + model = training_module.Model(a, [a_2, a_3]) + + optimizer = "rmsprop" + loss = {"dense_2": "mse"} + model.compile(optimizer, loss, metrics={"dense_1": "mae"}) + + # test train_on_batch + _ = model.train_on_batch(input_a_np, output_a_np) + _ = model.test_on_batch(input_a_np, output_a_np) + # fit + _ = model.fit(input_a_np, output_a_np) + # evaluate + _ = model.evaluate(input_a_np, output_a_np) + + def test_model_with_external_loss(self): + with tf.Graph().as_default(), self.cached_session(): + # None loss, only regularization loss. + a = input_layer.Input(shape=(3,), name="input_a") + a_2 = layers_module.Dense( + 4, + name="dense_1", + kernel_regularizer="l1", + bias_regularizer="l2", + )(a) + dp = layers_module.Dropout(0.5, name="dropout") + a_3 = dp(a_2) + + model = training_module.Model(a, [a_2, a_3]) + + optimizer = "rmsprop" + loss = None + model.compile(optimizer, loss, metrics=["mae"]) + + input_a_np = np.random.random((10, 3)) + + # test train_on_batch + out = model.train_on_batch(input_a_np, None) + out = model.test_on_batch(input_a_np, None) + # fit + out = model.fit(input_a_np, None) + # evaluate + out = model.evaluate(input_a_np, None) + + # No dropout, external loss. + a = input_layer.Input(shape=(3,), name="input_a") + a_2 = layers_module.Dense(4, name="dense_1")(a) + a_3 = layers_module.Dense(4, name="dense_2")(a) + + model = training_module.Model(a, [a_2, a_3]) + model.add_loss(backend.mean(a_3 + a_2)) + + optimizer = "rmsprop" + loss = None + model.compile(optimizer, loss, metrics=["mae"]) + + # test train_on_batch + out = model.train_on_batch(input_a_np, None) + out = model.test_on_batch(input_a_np, None) + # fit + out = model.fit(input_a_np, None) + # evaluate + out = model.evaluate(input_a_np, None) + + # Test model with no external data at all. + input_v = tf.Variable(input_a_np, dtype="float32") + self.evaluate(tf.compat.v1.variables_initializer([input_v])) + a = input_layer.Input(tensor=input_v) + a_2 = layers_module.Dense(4, name="dense_1")(a) + a_2 = layers_module.Dropout(0.5, name="dropout")(a_2) + model = training_module.Model(a, a_2) + model.add_loss(backend.mean(a_2)) + + model.compile( + optimizer="rmsprop", loss=None, metrics=["mean_squared_error"] + ) + + # test train_on_batch + out = model.train_on_batch(None, None) + out = model.test_on_batch(None, None) + out = model.predict_on_batch(None) + + # Test multi-output model with no external data at all. + self.evaluate(tf.compat.v1.variables_initializer([input_v])) + a = input_layer.Input(tensor=input_v) + a_1 = layers_module.Dense(4, name="dense_1")(a) + a_2 = layers_module.Dropout(0.5, name="dropout")(a_1) + model = training_module.Model(a, [a_1, a_2]) + model.add_loss(backend.mean(a_2)) + + model.compile( + optimizer="rmsprop", loss=None, metrics=["mean_squared_error"] + ) + + # test train_on_batch + out = model.train_on_batch(None, None) + out = model.test_on_batch(None, None) + out = model.predict_on_batch(None) + + out = model.predict(None, steps=3) + self.assertEqual(len(out), 2) + self.assertEqual(out[0].shape, (10 * 3, 4)) + self.assertEqual(out[1].shape, (10 * 3, 4)) + + def test_target_tensors(self): + with tf.Graph().as_default(), self.cached_session(): + # single-output, as list + model = sequential.Sequential() + model.add(layers_module.Dense(4, input_shape=(4,), name="dense")) + input_val = np.random.random((10, 4)) + target_val = np.random.random((10, 4)) + target = backend.variable(target_val) + model.compile( + optimizer="rmsprop", loss="mse", target_tensors=[target] + ) + model.train_on_batch(input_val, None) + + # single-output, as single tensor + model.compile( + optimizer="rmsprop", loss="mse", target_tensors=target + ) + model.train_on_batch(input_val, None) + + # single-output, as dict + model.compile( + optimizer="rmsprop", + loss="mse", + target_tensors={"dense": target}, + ) + model.train_on_batch(input_val, None) + + # test invalid arguments + with self.assertRaises(TypeError): + model.compile( + optimizer="rmsprop", loss="mse", target_tensors=set() + ) + with self.assertRaises(ValueError): + model.compile( + optimizer="rmsprop", + loss="mse", + target_tensors=[target, target], + ) + with self.assertRaises(ValueError): + model.compile( + optimizer="rmsprop", + loss="mse", + target_tensors={"dense2": None}, + ) + with self.assertRaises(ValueError): + model.compile( + optimizer="rmsprop", loss="mse", target_tensors=[target] + ) + model.train_on_batch(input_val, target_val) + + # multi-output, as list + input_val = np.random.random((10, 4)) + target_val_a = np.random.random((10, 4)) + target_val_b = np.random.random((10, 4)) + target_a = backend.variable(target_val_a) + target_b = backend.variable(target_val_b) + + inputs = layers_module.Input(shape=(4,)) + output_a = layers_module.Dense(4, name="dense_a")(inputs) + output_b = layers_module.Dense(4, name="dense_b")(inputs) + model = training_module.Model(inputs, [output_a, output_b]) + model.compile( + optimizer="rmsprop", + loss="mse", + target_tensors=[target_a, target_b], + ) + model.train_on_batch(input_val, None) + + # multi-output, as dict + model.compile( + optimizer="rmsprop", + loss="mse", + target_tensors={"dense_a": target_a, "dense_b": target_b}, + ) + model.train_on_batch(input_val, None) + + # test with sample weights + model.compile( + optimizer="rmsprop", + loss="mse", + metrics=["mae", metrics_module.CategoricalAccuracy()], + target_tensors=[target_a, target_b], + ) + model.train_on_batch( + input_val, + None, + sample_weight={"dense_a": np.random.random((10,))}, + ) + + def test_model_custom_target_tensors(self): + with tf.Graph().as_default(), self.cached_session(): + a = input_layer.Input(shape=(3,), name="input_a") + b = input_layer.Input(shape=(3,), name="input_b") + + a_2 = layers_module.Dense(4, name="dense_1")(a) + dp = layers_module.Dropout(0.5, name="dropout") + b_2 = dp(b) + + y = backend.placeholder([10, 4], name="y") + y1 = backend.placeholder([10, 3], name="y1") + y2 = backend.placeholder([7, 5], name="y2") + model = training_module.Model([a, b], [a_2, b_2]) + + optimizer = "rmsprop" + loss = "mse" + loss_weights = [1.0, 0.5] + + # test list of target tensors + with self.assertRaises(ValueError): + model.compile( + optimizer, + loss, + metrics=[], loss_weights=loss_weights, - sample_weight_mode=None) - - # test train_on_batch - out = model.train_on_batch(input_b_np, - [output_a_np, output_b_np]) - out = model.train_on_batch({'input_b': input_b_np}, - [output_a_np, output_b_np]) - out = model.test_on_batch({'input_b': input_b_np}, - [output_a_np, output_b_np]) - out = model.predict_on_batch({'input_b': input_b_np}) - - # test fit - out = model.fit({'input_b': input_b_np}, - [output_a_np, output_b_np], epochs=1, batch_size=10) - out = model.fit(input_b_np, - [output_a_np, output_b_np], epochs=1, batch_size=10) - - # test evaluate - out = model.evaluate({'input_b': input_b_np}, - [output_a_np, output_b_np], batch_size=10) - out = model.evaluate(input_b_np, - [output_a_np, output_b_np], batch_size=10) - - # test predict - out = model.predict({'input_b': input_b_np}, batch_size=10) - out = model.predict(input_b_np, batch_size=10) - self.assertEqual(len(out), 2) - - # Now test a model with a single input - # i.e. we don't pass any data to fit the model. - self.evaluate(tf.compat.v1.variables_initializer([input_v])) - a = input_layer.Input(tensor=input_v) - a_2 = layers_module.Dense(4, name='dense_1')(a) - a_2 = layers_module.Dropout(0.5, name='dropout')(a_2) - model = training_module.Model(a, a_2) - model.summary() - - optimizer = 'rmsprop' - loss = 'mse' - model.compile(optimizer, loss, metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, - output_a_np) - out = model.train_on_batch(None, - output_a_np) - out = model.test_on_batch(None, - output_a_np) - out = model.predict_on_batch(None) - out = model.train_on_batch([], - output_a_np) - out = model.train_on_batch({}, - output_a_np) - - # test fit - _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=3) - _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=3) - - # test evaluate - _ = model.evaluate(None, output_a_np, steps=3) - _ = model.evaluate(None, output_a_np, steps=3) - - # test predict - out = model.predict(None, steps=3) - out = model.predict(None, steps=3) - self.assertEqual(out.shape, (10 * 3, 4)) - - # Same, without learning phase - # i.e. we don't pass any data to fit the model. - self.evaluate(tf.compat.v1.variables_initializer([input_v])) - a = input_layer.Input(tensor=input_v) - a_2 = layers_module.Dense(4, name='dense_1')(a) - model = training_module.Model(a, a_2) - model.summary() - - optimizer = 'rmsprop' - loss = 'mse' - model.compile(optimizer, loss, metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, - output_a_np) - out = model.train_on_batch(None, - output_a_np) - out = model.test_on_batch(None, - output_a_np) - out = model.predict_on_batch(None) - out = model.train_on_batch([], - output_a_np) - out = model.train_on_batch({}, - output_a_np) - - # test fit - _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=10) - _ = model.fit(None, output_a_np, epochs=1, steps_per_epoch=10) - - # test evaluate - _ = model.evaluate(None, output_a_np, steps=10) - _ = model.evaluate(None, output_a_np, steps=10) - - # test predict - out = model.predict(None, steps=3) - out = model.predict(None, steps=3) - self.assertEqual(out.shape, (10 * 3, 4)) - - @test_combinations.run_all_keras_modes - def test_model_with_partial_loss(self): - with self.cached_session(): - a = input_layer.Input(shape=(3,), name='input_a') - a_2 = layers_module.Dense(4, name='dense_1')(a) - dp = layers_module.Dropout(0.5, name='dropout') - a_3 = dp(a_2) - model = training_module.Model(a, [a_2, a_3]) - - optimizer = 'rmsprop' - loss = {'dropout': 'mse'} - model.compile(optimizer, loss, metrics=['mae']) - - input_a_np = np.random.random((10, 3)) - output_a_np = np.random.random((10, 4)) - - # test train_on_batch - _ = model.train_on_batch(input_a_np, output_a_np) - _ = model.test_on_batch(input_a_np, output_a_np) - # fit - _ = model.fit(input_a_np, output_a_np) - # evaluate - _ = model.evaluate(input_a_np, output_a_np) - - # Same without dropout. - a = input_layer.Input(shape=(3,), name='input_a') - a_2 = layers_module.Dense(4, name='dense_1')(a) - a_3 = layers_module.Dense(4, name='dense_2')(a_2) - model = training_module.Model(a, [a_2, a_3]) - - optimizer = 'rmsprop' - loss = {'dense_2': 'mse'} - model.compile(optimizer, loss, metrics={'dense_1': 'mae'}) - - # test train_on_batch - _ = model.train_on_batch(input_a_np, output_a_np) - _ = model.test_on_batch(input_a_np, output_a_np) - # fit - _ = model.fit(input_a_np, output_a_np) - # evaluate - _ = model.evaluate(input_a_np, output_a_np) - - def test_model_with_external_loss(self): - with tf.Graph().as_default(), self.cached_session(): - # None loss, only regularization loss. - a = input_layer.Input(shape=(3,), name='input_a') - a_2 = layers_module.Dense( - 4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')( - a) - dp = layers_module.Dropout(0.5, name='dropout') - a_3 = dp(a_2) - - model = training_module.Model(a, [a_2, a_3]) - - optimizer = 'rmsprop' - loss = None - model.compile(optimizer, loss, metrics=['mae']) - - input_a_np = np.random.random((10, 3)) - - # test train_on_batch - out = model.train_on_batch(input_a_np, None) - out = model.test_on_batch(input_a_np, None) - # fit - out = model.fit(input_a_np, None) - # evaluate - out = model.evaluate(input_a_np, None) - - # No dropout, external loss. - a = input_layer.Input(shape=(3,), name='input_a') - a_2 = layers_module.Dense(4, name='dense_1')(a) - a_3 = layers_module.Dense(4, name='dense_2')(a) - - model = training_module.Model(a, [a_2, a_3]) - model.add_loss(backend.mean(a_3 + a_2)) - - optimizer = 'rmsprop' - loss = None - model.compile(optimizer, loss, metrics=['mae']) - - # test train_on_batch - out = model.train_on_batch(input_a_np, None) - out = model.test_on_batch(input_a_np, None) - # fit - out = model.fit(input_a_np, None) - # evaluate - out = model.evaluate(input_a_np, None) - - # Test model with no external data at all. - input_v = tf.Variable(input_a_np, dtype='float32') - self.evaluate(tf.compat.v1.variables_initializer([input_v])) - a = input_layer.Input(tensor=input_v) - a_2 = layers_module.Dense(4, name='dense_1')(a) - a_2 = layers_module.Dropout(0.5, name='dropout')(a_2) - model = training_module.Model(a, a_2) - model.add_loss(backend.mean(a_2)) - - model.compile(optimizer='rmsprop', - loss=None, - metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, None) - out = model.test_on_batch(None, None) - out = model.predict_on_batch(None) - - # Test multi-output model with no external data at all. - self.evaluate(tf.compat.v1.variables_initializer([input_v])) - a = input_layer.Input(tensor=input_v) - a_1 = layers_module.Dense(4, name='dense_1')(a) - a_2 = layers_module.Dropout(0.5, name='dropout')(a_1) - model = training_module.Model(a, [a_1, a_2]) - model.add_loss(backend.mean(a_2)) - - model.compile(optimizer='rmsprop', - loss=None, - metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, None) - out = model.test_on_batch(None, None) - out = model.predict_on_batch(None) - - out = model.predict(None, steps=3) - self.assertEqual(len(out), 2) - self.assertEqual(out[0].shape, (10 * 3, 4)) - self.assertEqual(out[1].shape, (10 * 3, 4)) - - def test_target_tensors(self): - with tf.Graph().as_default(), self.cached_session(): - # single-output, as list - model = sequential.Sequential() - model.add(layers_module.Dense(4, input_shape=(4,), name='dense')) - input_val = np.random.random((10, 4)) - target_val = np.random.random((10, 4)) - target = backend.variable(target_val) - model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target]) - model.train_on_batch(input_val, None) - - # single-output, as single tensor - model.compile(optimizer='rmsprop', loss='mse', target_tensors=target) - model.train_on_batch(input_val, None) - - # single-output, as dict - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense': target}) - model.train_on_batch(input_val, None) - - # test invalid arguments - with self.assertRaises(TypeError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=set()) - with self.assertRaises(ValueError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target, target]) - with self.assertRaises(ValueError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense2': None}) - with self.assertRaises(ValueError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target]) - model.train_on_batch(input_val, target_val) - - # multi-output, as list - input_val = np.random.random((10, 4)) - target_val_a = np.random.random((10, 4)) - target_val_b = np.random.random((10, 4)) - target_a = backend.variable(target_val_a) - target_b = backend.variable(target_val_b) - - inputs = layers_module.Input(shape=(4,)) - output_a = layers_module.Dense(4, name='dense_a')(inputs) - output_b = layers_module.Dense(4, name='dense_b')(inputs) - model = training_module.Model(inputs, [output_a, output_b]) - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target_a, target_b]) - model.train_on_batch(input_val, None) - - # multi-output, as dict - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense_a': target_a, - 'dense_b': target_b}) - model.train_on_batch(input_val, None) - - # test with sample weights - model.compile( - optimizer='rmsprop', - loss='mse', - metrics=['mae', metrics_module.CategoricalAccuracy()], - target_tensors=[target_a, target_b]) - model.train_on_batch(input_val, None, - sample_weight={'dense_a': np.random.random((10,))}) - - def test_model_custom_target_tensors(self): - with tf.Graph().as_default(), self.cached_session(): - a = input_layer.Input(shape=(3,), name='input_a') - b = input_layer.Input(shape=(3,), name='input_b') - - a_2 = layers_module.Dense(4, name='dense_1')(a) - dp = layers_module.Dropout(0.5, name='dropout') - b_2 = dp(b) - - y = backend.placeholder([10, 4], name='y') - y1 = backend.placeholder([10, 3], name='y1') - y2 = backend.placeholder([7, 5], name='y2') - model = training_module.Model([a, b], [a_2, b_2]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - - # test list of target tensors - with self.assertRaises(ValueError): - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None, target_tensors=[y, y1, y2]) - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None, target_tensors=[y, y1]) - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - _ = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], { - 'dense_1': np.random.random((10,)), - 'dropout': np.random.random((10,)) - }) - # test dictionary of target_tensors - with self.assertRaises(ValueError): - model.compile(optimizer, loss, - metrics=[], - loss_weights=loss_weights, - sample_weight_mode=None, - target_tensors={'does_not_exist': y2}) - # test dictionary of target_tensors - model.compile(optimizer, loss, + sample_weight_mode=None, + target_tensors=[y, y1, y2], + ) + model.compile( + optimizer, + loss, + metrics=[], + loss_weights=loss_weights, + sample_weight_mode=None, + target_tensors=[y, y1], + ) + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_a_np = np.random.random((10, 4)) + output_b_np = np.random.random((10, 3)) + + _ = model.train_on_batch( + [input_a_np, input_b_np], + [output_a_np, output_b_np], + { + "dense_1": np.random.random((10,)), + "dropout": np.random.random((10,)), + }, + ) + # test dictionary of target_tensors + with self.assertRaises(ValueError): + model.compile( + optimizer, + loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, - target_tensors={'dense_1': y, 'dropout': y1}) - _ = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], { - 'dense_1': np.random.random((10,)), - 'dropout': np.random.random((10,)) - }) - - # test with custom TF placeholder as target - pl_target_a = tf.compat.v1.placeholder('float32', shape=(None, 4)) - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense_1': pl_target_a}) - model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) + target_tensors={"does_not_exist": y2}, + ) + # test dictionary of target_tensors + model.compile( + optimizer, + loss, + metrics=[], + loss_weights=loss_weights, + sample_weight_mode=None, + target_tensors={"dense_1": y, "dropout": y1}, + ) + _ = model.train_on_batch( + [input_a_np, input_b_np], + [output_a_np, output_b_np], + { + "dense_1": np.random.random((10,)), + "dropout": np.random.random((10,)), + }, + ) + + # test with custom TF placeholder as target + pl_target_a = tf.compat.v1.placeholder("float32", shape=(None, 4)) + model.compile( + optimizer="rmsprop", + loss="mse", + target_tensors={"dense_1": pl_target_a}, + ) + model.train_on_batch( + [input_a_np, input_b_np], [output_a_np, output_b_np] + ) class TestTrainingWithMetrics(test_combinations.TestCase): - """Training tests related to metrics.""" - - @test_combinations.run_all_keras_modes - def test_metrics_names(self): - a = layers_module.Input(shape=(3,), name='input_a') - b = layers_module.Input(shape=(3,), name='input_b') - - dense = layers_module.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = layers_module.Dropout(0.5, name='dropout')(c) - - model = training_module.Model([a, b], [d, e]) - - optimizer = RMSPropOptimizer(learning_rate=0.001) - metrics = ['mse', metrics_module.BinaryAccuracy()] - model.compile( - optimizer, - loss='mae', - metrics=metrics, - run_eagerly=test_utils.should_run_eagerly()) - - mse_metric = 'mse' if tf.executing_eagerly() else 'mean_squared_error' - reference_metric_names = [ - 'loss', 'dense_loss', 'dropout_loss', 'dense_' + mse_metric, - 'dense_binary_accuracy', 'dropout_' + mse_metric, - 'dropout_binary_accuracy' - ] - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_d_np = np.random.random((10, 4)) - output_e_np = np.random.random((10, 4)) - - model.fit([input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5) - self.assertEqual(reference_metric_names, model.metrics_names) - - @test_combinations.run_all_keras_modes - def test_metric_state_reset_between_fit_and_evaluate(self): - model = sequential.Sequential() - model.add(layers_module.Dense(3, activation='relu', input_dim=4)) - model.add(layers_module.Dense(1, activation='sigmoid')) - acc_obj = metrics_module.BinaryAccuracy() - model.compile( - loss='mae', - metrics=[acc_obj], - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=test_utils.should_run_eagerly()) - - x_train = np.random.random((100, 4)) - y_train = np.random.random((100, 1)) - model.fit(x_train, y_train, batch_size=5, epochs=2) - self.assertEqual(self.evaluate(acc_obj.count), 100) - - x_test = np.random.random((10, 4)) - y_test = np.random.random((10, 1)) - model.evaluate(x_test, y_test, batch_size=5) - self.assertEqual(self.evaluate(acc_obj.count), 10) - - @test_combinations.run_all_keras_modes - def test_metric_state_reset_between_test_on_batch_and_evaluate(self): - model = sequential.Sequential() - model.add(layers_module.Dense(3, activation='relu', input_dim=4)) - model.add(layers_module.Dense(1, activation='sigmoid')) - acc_obj = metrics_module.BinaryAccuracy() - model.compile( - loss='mae', - metrics=[acc_obj], - optimizer=RMSPropOptimizer(learning_rate=0.001), - run_eagerly=test_utils.should_run_eagerly()) - - x_test = np.random.random((10, 4)) - y_test = np.random.random((10, 1)) - loss, acc = model.test_on_batch(x_test[:2], y_test[:2]) - loss_eval, acc_eval = model.evaluate(x_test, y_test) - loss_1, acc_1 = model.test_on_batch(x_test[:2], y_test[:2]) - loss_eval_1, acc_eval_1 = model.evaluate(x_test, y_test) - self.assertEqual(loss, loss_1) - self.assertEqual(acc, acc_1) - self.assertEqual(loss_eval, loss_eval_1) - self.assertEqual(acc_eval, acc_eval_1) - - @test_combinations.run_with_all_model_types(exclude_models=['sequential']) - @test_combinations.run_all_keras_modes - def test_metrics_valid_compile_input_formats(self): - inp_1 = layers_module.Input(shape=(1,), name='input_1') - inp_2 = layers_module.Input(shape=(1,), name='input_2') - x = layers_module.Dense(3, kernel_initializer='ones', trainable=False) - out_1 = layers_module.Dense( - 1, kernel_initializer='ones', name='output_1', trainable=False) - out_2 = layers_module.Dense( - 1, kernel_initializer='ones', name='output_2', trainable=False) - - branch_a = [inp_1, x, out_1] - branch_b = [inp_2, x, out_2] - model = test_utils.get_multi_io_model(branch_a, branch_b) - - # list of metrics. - model.compile( - optimizer='rmsprop', - loss='mse', - metrics=[metrics_module.MeanSquaredError()], - weighted_metrics=[metrics_module.MeanSquaredError()], - run_eagerly=test_utils.should_run_eagerly()) - - # list of list of metrics. - model.compile( - optimizer='rmsprop', - loss='mse', - metrics=[ - metrics_module.MeanSquaredError(), - [metrics_module.MeanSquaredError(), - metrics_module.Accuracy()] - ], - weighted_metrics=[ - metrics_module.MeanSquaredError(), - [metrics_module.MeanSquaredError(), - metrics_module.Accuracy()] - ], - run_eagerly=test_utils.should_run_eagerly()) - - # dict of metrics. - model.compile( - optimizer='rmsprop', - loss='mse', - metrics={ - 'output_1': - metrics_module.MeanSquaredError(), - 'output_2': [ + """Training tests related to metrics.""" + + @test_combinations.run_all_keras_modes + def test_metrics_names(self): + a = layers_module.Input(shape=(3,), name="input_a") + b = layers_module.Input(shape=(3,), name="input_b") + + dense = layers_module.Dense(4, name="dense") + c = dense(a) + d = dense(b) + e = layers_module.Dropout(0.5, name="dropout")(c) + + model = training_module.Model([a, b], [d, e]) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + metrics = ["mse", metrics_module.BinaryAccuracy()] + model.compile( + optimizer, + loss="mae", + metrics=metrics, + run_eagerly=test_utils.should_run_eagerly(), + ) + + mse_metric = "mse" if tf.executing_eagerly() else "mean_squared_error" + reference_metric_names = [ + "loss", + "dense_loss", + "dropout_loss", + "dense_" + mse_metric, + "dense_binary_accuracy", + "dropout_" + mse_metric, + "dropout_binary_accuracy", + ] + + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_d_np = np.random.random((10, 4)) + output_e_np = np.random.random((10, 4)) + + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + ) + self.assertEqual(reference_metric_names, model.metrics_names) + + @test_combinations.run_all_keras_modes + def test_metric_state_reset_between_fit_and_evaluate(self): + model = sequential.Sequential() + model.add(layers_module.Dense(3, activation="relu", input_dim=4)) + model.add(layers_module.Dense(1, activation="sigmoid")) + acc_obj = metrics_module.BinaryAccuracy() + model.compile( + loss="mae", + metrics=[acc_obj], + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x_train = np.random.random((100, 4)) + y_train = np.random.random((100, 1)) + model.fit(x_train, y_train, batch_size=5, epochs=2) + self.assertEqual(self.evaluate(acc_obj.count), 100) + + x_test = np.random.random((10, 4)) + y_test = np.random.random((10, 1)) + model.evaluate(x_test, y_test, batch_size=5) + self.assertEqual(self.evaluate(acc_obj.count), 10) + + @test_combinations.run_all_keras_modes + def test_metric_state_reset_between_test_on_batch_and_evaluate(self): + model = sequential.Sequential() + model.add(layers_module.Dense(3, activation="relu", input_dim=4)) + model.add(layers_module.Dense(1, activation="sigmoid")) + acc_obj = metrics_module.BinaryAccuracy() + model.compile( + loss="mae", + metrics=[acc_obj], + optimizer=RMSPropOptimizer(learning_rate=0.001), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x_test = np.random.random((10, 4)) + y_test = np.random.random((10, 1)) + loss, acc = model.test_on_batch(x_test[:2], y_test[:2]) + loss_eval, acc_eval = model.evaluate(x_test, y_test) + loss_1, acc_1 = model.test_on_batch(x_test[:2], y_test[:2]) + loss_eval_1, acc_eval_1 = model.evaluate(x_test, y_test) + self.assertEqual(loss, loss_1) + self.assertEqual(acc, acc_1) + self.assertEqual(loss_eval, loss_eval_1) + self.assertEqual(acc_eval, acc_eval_1) + + @test_combinations.run_with_all_model_types(exclude_models=["sequential"]) + @test_combinations.run_all_keras_modes + def test_metrics_valid_compile_input_formats(self): + inp_1 = layers_module.Input(shape=(1,), name="input_1") + inp_2 = layers_module.Input(shape=(1,), name="input_2") + x = layers_module.Dense(3, kernel_initializer="ones", trainable=False) + out_1 = layers_module.Dense( + 1, kernel_initializer="ones", name="output_1", trainable=False + ) + out_2 = layers_module.Dense( + 1, kernel_initializer="ones", name="output_2", trainable=False + ) + + branch_a = [inp_1, x, out_1] + branch_b = [inp_2, x, out_2] + model = test_utils.get_multi_io_model(branch_a, branch_b) + + # list of metrics. + model.compile( + optimizer="rmsprop", + loss="mse", + metrics=[metrics_module.MeanSquaredError()], + weighted_metrics=[metrics_module.MeanSquaredError()], + run_eagerly=test_utils.should_run_eagerly(), + ) + + # list of list of metrics. + model.compile( + optimizer="rmsprop", + loss="mse", + metrics=[ metrics_module.MeanSquaredError(), - metrics_module.Accuracy() + [metrics_module.MeanSquaredError(), metrics_module.Accuracy()], ], - }, - weighted_metrics={ - 'output_1': + weighted_metrics=[ metrics_module.MeanSquaredError(), - 'output_2': [ - metrics_module.MeanSquaredError(), - metrics_module.Accuracy() + [metrics_module.MeanSquaredError(), metrics_module.Accuracy()], ], - }, - run_eagerly=test_utils.should_run_eagerly()) - - @test_combinations.run_all_keras_modes - def test_metrics_masking(self): - np.random.seed(1337) - model = sequential.Sequential() - model.add(layers_module.Masking(mask_value=0, input_shape=(2, 1))) - model.add( - layers_module.TimeDistributed( - layers_module.Dense(1, kernel_initializer='ones'))) - model.compile( - RMSPropOptimizer(learning_rate=0.001), - loss='mse', - weighted_metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - - # verify that masking is applied. - x = np.array([[[1], [1]], [[1], [1]], [[0], [0]]]) - y = np.array([[[1], [1]], [[0], [1]], [[1], [1]]]) - scores = model.train_on_batch(x, y) - self.assertArrayNear(scores, [0.25, 0.75], 0.1) - - # verify that masking is combined with sample weights. - w = np.array([3, 2, 4]) - scores = model.train_on_batch(x, y, sample_weight=w) - self.assertArrayNear(scores, [0.3328, 0.8], 0.001) - - @test_combinations.run_all_keras_modes - def test_add_metric_with_tensor_on_model(self): - x = layers_module.Input(shape=(1,)) - y = layers_module.Dense(1, kernel_initializer='ones')(x) - model = training_module.Model(x, y) - model.add_metric( - tf.reduce_sum(y), name='metric_1', aggregation='mean') - - if tf.executing_eagerly(): - # This is not a use case in v1 graph mode. - mean_result = metrics_module.Mean()(y) - with self.assertRaisesRegex( - ValueError, 'Expected a symbolic Tensor for the metric value'): - model.add_metric(mean_result, name='metric_2') - else: - with self.assertRaisesRegex( - ValueError, 'Using the result of calling a `Metric` object '): - with backend.get_graph().as_default(): - model.add_metric(metrics_module.Mean(name='metric_2')(y)) - - model.compile( - 'sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.ones(shape=(10, 1)) - targets = np.ones(shape=(10, 1)) - history = model.fit( - inputs, - targets, - epochs=2, - batch_size=5, - validation_data=(inputs, targets)) - self.assertEqual(history.history['metric_1'][-1], 5) - self.assertEqual(history.history['val_metric_1'][-1], 5) - - eval_results = model.evaluate(inputs, targets, batch_size=5) - self.assertEqual(eval_results[-1], 5) - - model.predict(inputs, batch_size=5) - model.train_on_batch(inputs, targets) - model.test_on_batch(inputs, targets) - - @test_combinations.run_all_keras_modes - def test_add_metric_in_model_call(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - self.mean = metrics_module.Mean(name='metric_1') - - def call(self, x): - self.add_metric( - tf.reduce_sum(x), name='metric_2', aggregation='mean') - # Provide same name as in the instance created in __init__ - # for eager mode - self.add_metric(self.mean(x), name='metric_1') - return self.dense1(x) - - model = TestModel() - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - history = model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) - self.assertAlmostEqual(history.history['metric_1'][-1], 1, 0) - self.assertAlmostEqual(history.history['val_metric_1'][-1], 1, 0) - self.assertAlmostEqual(history.history['metric_2'][-1], 5, 0) - self.assertAlmostEqual(history.history['val_metric_2'][-1], 5, 0) - - eval_results = model.evaluate(x, y, batch_size=5) - self.assertAlmostEqual(eval_results[1], 1, 0) - self.assertAlmostEqual(eval_results[2], 5, 0) - - model.predict(x, batch_size=5) - model.train_on_batch(x, y) - model.test_on_batch(x, y) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_add_metric_in_layer_call(self): - - class TestLayer(layers_module.Layer): - - def build(self, input_shape): - self.a = self.add_weight( - 'a', (1, 1), initializer='ones', trainable=False) - self.built = True - - def call(self, inputs): - self.add_metric( - tf.reduce_sum(inputs), name='metric_1', aggregation='mean') - return inputs + 1 - - layers = [ - TestLayer(input_shape=(1,)), - layers_module.Dense(2, kernel_initializer='ones') - ] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - history = model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) - self.assertEqual(history.history['metric_1'][-1], 5) - self.assertAlmostEqual(history.history['val_metric_1'][-1], 5, 0) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_model_metrics_list(self): - - class LayerWithAddMetric(layers_module.Layer): - - def __init__(self): - super().__init__() - self.dense = layers_module.Dense(1, kernel_initializer='ones') - - def __call__(self, inputs): - outputs = self.dense(inputs) - self.add_metric( - tf.reduce_sum(outputs), name='metric_1', aggregation='mean') - return outputs - - class LayerWithNestedAddMetricLayer(layers_module.Layer): - - def __init__(self): - super().__init__() - self.layer = LayerWithAddMetric() - - def call(self, inputs): - outputs = self.layer(inputs) - self.add_metric( - tf.reduce_sum(outputs), name='metric_2', aggregation='mean') - return outputs - - x = layers_module.Input(shape=(1,)) - y = LayerWithNestedAddMetricLayer()(x) - - model = training_module.Model(x, y) - model.add_metric( - tf.reduce_sum(y), name='metric_3', aggregation='mean') - - if tf.executing_eagerly(): - # This is not a use case in v1 graph mode. - mean_result = metrics_module.Mean()(y) - with self.assertRaisesRegex( - ValueError, 'Expected a symbolic Tensor for the metric value'): - model.add_metric(mean_result, name='metric_4') - - else: - with self.assertRaisesRegex( - ValueError, 'Using the result of calling a `Metric` object '): - with backend.get_graph().as_default(): - model.add_metric(metrics_module.Mean(name='metric_4')(y)) - - model.compile( - 'sgd', - loss='mse', - metrics=[metrics_module.Accuracy('metric_4')], - run_eagerly=test_utils.should_run_eagerly()) - - model.fit(np.ones((10, 1)), np.ones((10, 1)), batch_size=10) - - # Verify that the metrics added using `compile` and `add_metric` API are - # included - self.assertEqual([m.name for m in model.metrics], - ['loss', 'metric_4', 'metric_2', 'metric_1', 'metric_3']) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_model_metrics_list_in_call(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - - def call(self, x): - self.add_metric( - tf.reduce_sum(x), name='metric_1', aggregation='mean') - return self.dense1(x) - - model = TestModel() - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(0.01), - metrics=[metrics_module.Accuracy('acc')], - run_eagerly=test_utils.should_run_eagerly()) - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) - - self.assertEqual([m.name for m in model.metrics], - ['loss', 'acc', 'metric_1']) - - @test_combinations.run_all_keras_modes - def test_multiple_add_metric_calls(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - self.mean1 = metrics_module.Mean(name='metric_1') - self.mean2 = metrics_module.Mean(name='metric_2') - - def call(self, x): - self.add_metric(self.mean2(x), name='metric_2') - self.add_metric(self.mean1(x), name='metric_1') - self.add_metric( - tf.reduce_sum(x), name='metric_3', aggregation='mean') - return self.dense1(x) - - model = TestModel() - self.assertListEqual([m.name for m in model.metrics], - ['metric_1', 'metric_2']) - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - history = model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) - self.assertAlmostEqual(history.history['metric_1'][-1], 1, 0) - self.assertAlmostEqual(history.history['metric_2'][-1], 1, 0) - self.assertAlmostEqual(history.history['metric_3'][-1], 5, 0) - - eval_results = model.evaluate(x, y, batch_size=5) - self.assertArrayNear(eval_results[1:4], [1, 1, 5], 0.1) - - model.predict(x, batch_size=5) - model.train_on_batch(x, y) - model.test_on_batch(x, y) - - @test_combinations.run_all_keras_modes - def test_multiple_add_metric_calls_layer(self): - - class TestLayer(layers_module.Layer): - - def __init__(self): - super().__init__(name='test_layer') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - self.m1 = metrics_module.Mean(name='m_1') - self.m2 = [ - metrics_module.Mean(name='m_2'), - metrics_module.Mean(name='m_3') + run_eagerly=test_utils.should_run_eagerly(), + ) + + # dict of metrics. + model.compile( + optimizer="rmsprop", + loss="mse", + metrics={ + "output_1": metrics_module.MeanSquaredError(), + "output_2": [ + metrics_module.MeanSquaredError(), + metrics_module.Accuracy(), + ], + }, + weighted_metrics={ + "output_1": metrics_module.MeanSquaredError(), + "output_2": [ + metrics_module.MeanSquaredError(), + metrics_module.Accuracy(), + ], + }, + run_eagerly=test_utils.should_run_eagerly(), + ) + + @test_combinations.run_all_keras_modes + def test_metrics_masking(self): + np.random.seed(1337) + model = sequential.Sequential() + model.add(layers_module.Masking(mask_value=0, input_shape=(2, 1))) + model.add( + layers_module.TimeDistributed( + layers_module.Dense(1, kernel_initializer="ones") + ) + ) + model.compile( + RMSPropOptimizer(learning_rate=0.001), + loss="mse", + weighted_metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + # verify that masking is applied. + x = np.array( + # third row is masked + [[[1], [1]], [[1], [1]], [[0], [0]]] + ) + y = np.array([[[1], [1]], [[0], [1]], [[1], [1]]]) + + scores = model.test_on_batch(x, y) + self.assertArrayNear(scores, [0.25, 0.75], 0.0001) + + # verify that masking is combined with sample weights. + w = np.array([3, 2, 4]) + scores = model.test_on_batch(x, y, sample_weight=w) + self.assertArrayNear(scores, [0.5, 0.8], 0.0001) + + scores = model.train_on_batch(x, y) + self.assertArrayNear(scores, [0.25, 0.75], 0.0001) + + scores = model.train_on_batch(x, y, sample_weight=w) + self.assertArrayNear(scores, [0.5 - 0.001037, 0.8], 0.0001) + + @test_combinations.run_all_keras_modes + def test_add_metric_with_tensor_on_model(self): + x = layers_module.Input(shape=(1,)) + y = layers_module.Dense(1, kernel_initializer="ones")(x) + model = training_module.Model(x, y) + model.add_metric(tf.reduce_sum(y), name="metric_1", aggregation="mean") + + if tf.executing_eagerly(): + # This is not a use case in v1 graph mode. + mean_result = metrics_module.Mean()(y) + with self.assertRaisesRegex( + ValueError, "Expected a symbolic Tensor for the metric value" + ): + model.add_metric(mean_result, name="metric_2") + else: + with self.assertRaisesRegex( + ValueError, "Using the result of calling a `Metric` object " + ): + with backend.get_graph().as_default(): + model.add_metric(metrics_module.Mean(name="metric_2")(y)) + + model.compile( + "sgd", loss="mse", run_eagerly=test_utils.should_run_eagerly() + ) + + inputs = np.ones(shape=(10, 1)) + targets = np.ones(shape=(10, 1)) + history = model.fit( + inputs, + targets, + epochs=2, + batch_size=5, + validation_data=(inputs, targets), + ) + self.assertEqual(history.history["metric_1"][-1], 5) + self.assertEqual(history.history["val_metric_1"][-1], 5) + + eval_results = model.evaluate(inputs, targets, batch_size=5) + self.assertEqual(eval_results[-1], 5) + + model.predict(inputs, batch_size=5) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + + @test_combinations.run_all_keras_modes + def test_add_metric_in_model_call(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + self.mean = metrics_module.Mean(name="metric_1") + + def call(self, x): + self.add_metric( + tf.reduce_sum(x), name="metric_2", aggregation="mean" + ) + # Provide same name as in the instance created in __init__ + # for eager mode + self.add_metric(self.mean(x), name="metric_1") + return self.dense1(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + history = model.fit( + x, y, epochs=2, batch_size=5, validation_data=(x, y) + ) + self.assertAlmostEqual(history.history["metric_1"][-1], 1, 0) + self.assertAlmostEqual(history.history["val_metric_1"][-1], 1, 0) + self.assertAlmostEqual(history.history["metric_2"][-1], 5, 0) + self.assertAlmostEqual(history.history["val_metric_2"][-1], 5, 0) + + eval_results = model.evaluate(x, y, batch_size=5) + self.assertAlmostEqual(eval_results[1], 1, 0) + self.assertAlmostEqual(eval_results[2], 5, 0) + + model.predict(x, batch_size=5) + model.train_on_batch(x, y) + model.test_on_batch(x, y) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_add_metric_in_layer_call(self): + class TestLayer(layers_module.Layer): + def build(self, input_shape): + self.a = self.add_weight( + "a", (1, 1), initializer="ones", trainable=False + ) + self.built = True + + def call(self, inputs): + self.add_metric( + tf.reduce_sum(inputs), name="metric_1", aggregation="mean" + ) + return inputs + 1 + + layers = [ + TestLayer(input_shape=(1,)), + layers_module.Dense(2, kernel_initializer="ones"), ] - self.m3 = { - 'mean4': metrics_module.Mean(name='m_4'), - 'mean5': metrics_module.Mean(name='m_5') - } - - def call(self, x): - self.add_metric(self.m2[0](x)) - self.add_metric(self.m2[1](x)) - self.add_metric(self.m1(x)) - self.add_metric(self.m3['mean4'](x)) - self.add_metric(self.m3['mean5'](x)) - self.add_metric(tf.reduce_sum(x), name='m_6', aggregation='mean') - return self.dense1(x) - - layer = TestLayer() - self.assertListEqual([m.name for m in layer.metrics], - ['m_1', 'm_2', 'm_3', 'm_4', 'm_5']) - - layer(np.ones((10, 10))) - self.assertListEqual([m.name for m in layer.metrics], - ['m_1', 'm_2', 'm_3', 'm_4', 'm_5', 'm_6']) - - @test_combinations.run_all_keras_modes - def test_duplicate_metric_name_in_add_metric(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - self.mean = metrics_module.Mean(name='metric_1') - self.mean2 = metrics_module.Mean(name='metric_1') - - def call(self, x): - self.add_metric(self.mean(x), name='metric_1') - return self.dense1(x) - - model = TestModel() - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - with self.assertRaisesRegex( - ValueError, - 'Please provide different names for the metrics you have added. ' - 'We found 2 metrics with the name: "metric_1"'): - model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) - - @test_combinations.run_all_keras_modes - def test_add_metric_without_name(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - - def call(self, x): - self.add_metric(tf.reduce_sum(x), aggregation='mean') - return self.dense1(x) - - model = TestModel() - model.compile( - loss='mse', - optimizer=RMSPropOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - x = np.ones(shape=(10, 1)) - y = np.ones(shape=(10, 2)) - - with self.assertRaisesRegex(ValueError, - 'Please provide a name for your metric like'): - model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) - - @test_combinations.run_all_keras_modes - def test_add_metric_correctness(self): - inputs = input_layer.Input(shape=(1,)) - targets = input_layer.Input(shape=(1,)) - - class Bias(layers_module.Layer): - - def build(self, input_shape): - self.bias = self.add_weight('bias', (1,), initializer='zeros') - self.mae = metrics_module.MeanAbsoluteError(name='mae_1') - - def call(self, inputs): - inputs, targets = inputs - outputs = inputs + self.bias - self.add_metric(self.mae(targets, outputs), name='mae_1') - return outputs - - outputs = Bias()([inputs, targets]) - model = training_module.Model([inputs, targets], outputs) - - model.add_metric( - metrics_module.mean_absolute_error(targets, outputs), - name='mae_2', - aggregation='mean') - - model.compile( - loss='mae', - optimizer=optimizer_v2.gradient_descent.SGD(0.1), - metrics=[metrics_module.MeanAbsoluteError(name='mae_3')], - run_eagerly=test_utils.should_run_eagerly()) - - x = np.array([[0.], [1.], [2.]]) - y = np.array([[0.5], [2.], [3.5]]) - history = model.fit([x, y], y, batch_size=3, epochs=5) - - expected_val = [1., 0.9, 0.8, 0.7, 0.6] - for key in ['loss', 'mae_1', 'mae_2', 'mae_3']: - self.assertAllClose(history.history[key], expected_val, 1e-3) - - @test_combinations.run_all_keras_modes - def test_add_metric_order(self): - - class MyLayer(layers_module.Layer): - - def call(self, inputs, training=None, mask=None): - self.add_metric( - tf.ones([32]) * 2.0, name='two', aggregation='mean') - return inputs + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + history = model.fit( + x, y, epochs=2, batch_size=5, validation_data=(x, y) + ) + self.assertEqual(history.history["metric_1"][-1], 5) + self.assertAlmostEqual(history.history["val_metric_1"][-1], 5, 0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_model_metrics_list(self): + class LayerWithAddMetric(layers_module.Layer): + def __init__(self): + super().__init__() + self.dense = layers_module.Dense(1, kernel_initializer="ones") + + def __call__(self, inputs): + outputs = self.dense(inputs) + self.add_metric( + tf.reduce_sum(outputs), name="metric_1", aggregation="mean" + ) + return outputs + + class LayerWithNestedAddMetricLayer(layers_module.Layer): + def __init__(self): + super().__init__() + self.layer = LayerWithAddMetric() + + def call(self, inputs): + outputs = self.layer(inputs) + self.add_metric( + tf.reduce_sum(outputs), name="metric_2", aggregation="mean" + ) + return outputs + + x = layers_module.Input(shape=(1,)) + y = LayerWithNestedAddMetricLayer()(x) + + model = training_module.Model(x, y) + model.add_metric(tf.reduce_sum(y), name="metric_3", aggregation="mean") + + if tf.executing_eagerly(): + # This is not a use case in v1 graph mode. + mean_result = metrics_module.Mean()(y) + with self.assertRaisesRegex( + ValueError, "Expected a symbolic Tensor for the metric value" + ): + model.add_metric(mean_result, name="metric_4") - class MyModel(training_module.Model): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._sampler = MyLayer(name='sampler') - - def call(self, inputs, training=None, mask=None): - z = self._sampler(inputs) - self.add_metric( - tf.ones([32]) * 1.0, name='one', aggregation='mean') - self.add_metric( - tf.ones([32]) * 3.0, name='three', aggregation='mean') - return z - - xdata = np.random.uniform(size=[32, 16]).astype(np.float32) - dataset_train = tf.data.Dataset.from_tensor_slices((xdata, xdata)) - dataset_train = dataset_train.batch(32, drop_remainder=True) - - model = MyModel() - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(dataset_train, epochs=3) - self.assertDictEqual( - history.history, { - 'loss': [0.0, 0.0, 0.0], - 'three': [3.0, 3.0, 3.0], - 'two': [2.0, 2.0, 2.0], - 'one': [1.0, 1.0, 1.0] - }) - - @test_combinations.run_all_keras_modes - def test_add_metric_aggregation_mean(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - - def call(self, x): - self.add_metric( - tf.reduce_sum(x), name='metric_1', aggregation='mean') - return self.dense1(x) - - model = TestModel() - model.compile( - 'rmsprop', 'mse', run_eagerly=test_utils.should_run_eagerly()) - model.fit(np.ones(shape=(10, 1)), np.ones(shape=(10, 2)), batch_size=5) - - @test_combinations.run_all_keras_modes - def test_add_metric_aggregation_none(self): - - class TestModel(training_module.Model): - - def __init__(self): - super().__init__(name='test_model') - self.dense1 = layers_module.Dense(2, kernel_initializer='ones') - self.mean = metrics_module.Mean(name='metric_1') - - def call(self, x): - self.add_metric(self.mean(x), name='metric_1', aggregation=None) - return self.dense1(x) - - model = TestModel() - model.compile( - 'rmsprop', 'mse', run_eagerly=test_utils.should_run_eagerly()) - model.fit(np.ones(shape=(10, 1)), np.ones(shape=(10, 2)), batch_size=5) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def DISABLED_test_add_metric_invalid_aggregation(self): - # TODO(psv): Re-enable test once it is fixed. - x = layers_module.Input(shape=(1,)) - y = layers_module.Dense(1, kernel_initializer='ones')(x) - model = training_module.Model(x, y) - with self.assertRaisesRegex(ValueError, - 'only `mean` sample-wise metric aggregation'): - model.add_metric( - tf.reduce_sum(y), name='metric_1', aggregation='sum') - - with self.assertRaisesRegex(ValueError, - 'only `mean` sample-wise metric aggregation'): - model.add_metric( - tf.reduce_sum(y), name='metric_1', aggregation=None) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_calling_evaluate_in_callback_during_fit(self): - # Check fix for a bug that caused `evaluate` to hit a cached dataset - # when run from inside a fit callback. - x = layers_module.Input(shape=(2,)) - y = layers_module.Dense(2, kernel_initializer='ones', use_bias=False)(x) - model = training_module.Model(x, y) - - ones = np.ones((10, 2), dtype=np.float32) - zeros = np.zeros((10, 2), dtype=np.float32) - train_ds = tf.data.Dataset.from_tensor_slices( - (ones, ones)).batch(5) - val_ds_1 = tf.data.Dataset.from_tensor_slices( - (ones, ones)).batch(5) - val_ds_2 = tf.data.Dataset.from_tensor_slices( - (zeros, zeros)).batch(5) - model.compile('sgd', 'mse', run_eagerly=test_utils.should_run_eagerly()) - - class MyCallback(Callback): - - def on_epoch_end(self, *args, **kwargs): - eval_result = self.model.evaluate(val_ds_2) - if abs(eval_result) > 1e-7: - raise AssertionError( - 'Expected to hit the zeros dataset but got high loss value of %s' - % eval_result) - - history = model.fit( - train_ds, validation_data=val_ds_1, callbacks=[MyCallback()]) - # Evaluate at the end of fit should hit the ones dataset (cached) - self.assertGreater(abs(history.history['val_loss'][-1]), 0.1) - # Standalone call to evaluate should not hit the cached dataset - eval_result = model.evaluate(val_ds_2) - self.assertLess(abs(eval_result), 1e-7) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_model_with_nested_compiled_model(self): - - class LayerWithAddMetric(layers_module.Layer): - - def __init__(self): - super().__init__() - self.dense = layers_module.Dense(1, kernel_initializer='ones') - - def call(self, inputs): - outputs = self.dense(inputs) - self.add_metric( - tf.reduce_sum(outputs), name='mean', aggregation='mean') - return outputs - - x = layers_module.Input(shape=(1,)) - y = LayerWithAddMetric()(x) - - inner_model = training_module.Model(x, y) - inner_model.add_metric( - tf.reduce_sum(y), name='mean1', aggregation='mean') - - inner_model.compile( - 'sgd', - loss='mse', - metrics=[metrics_module.Accuracy('acc')], - run_eagerly=test_utils.should_run_eagerly()) - inner_model.fit(np.ones((10, 1)), np.ones((10, 1)), batch_size=10) - - self.assertEqual([m.name for m in inner_model.metrics], - ['loss', 'acc', 'mean', 'mean1']) - - x = layers_module.Input(shape=[1]) - y = inner_model(x) - outer_model = training_module.Model(x, y) - outer_model.add_metric( - tf.reduce_sum(y), name='mean2', aggregation='mean') - - outer_model.compile( - 'sgd', - loss='mse', - metrics=[metrics_module.Accuracy('acc2')], - run_eagerly=test_utils.should_run_eagerly()) - outer_model.fit(np.ones((10, 1)), np.ones((10, 1)), batch_size=10) - self.assertEqual([m.name for m in outer_model.metrics], - ['loss', 'acc2', 'mean', 'mean1', 'mean2']) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_model_with_metric_class_that_returns_dict(self): - x = layers_module.Input(shape=(2,)) - y = layers_module.Dense(3)(x) - model = training_module.Model(x, y) - - class DictMetric(metrics_module.Metric): - - def __init__(self): - super().__init__() - self.sample_count = tf.Variable(0) - self.l2_sum = tf.Variable(0.) - - def update_state(self, y_true, y_pred, sample_weight=None): - self.l2_sum.assign_add( - tf.reduce_sum(tf.square(y_true - y_pred))) - self.sample_count.assign_add(tf.shape(y_true)[0]) - - def reset_state(self): - self.sample_count.assign(0) - self.l2_sum.assign(0.) - - def result(self): - mse = self.l2_sum / tf.cast(self.sample_count, 'float32') - rmse = tf.sqrt(mse) - return {'my_mse': mse, - 'my_rmse': rmse} - - model.compile('sgd', - 'mse', - metrics=['mae', DictMetric()], - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit(np.ones((10, 2)), np.ones((10, 3))) - self.assertEqual(list(history.history.keys()), - ['loss', 'mae', 'my_mse', 'my_rmse']) - list_evaluate_res = model.evaluate( - np.ones((10, 2)), np.ones((10, 3))) - self.assertEqual(len(list_evaluate_res), 4) - dict_evaluate_res = model.evaluate( - np.ones((10, 2)), np.ones((10, 3)), return_dict=True) - self.assertEqual(list(dict_evaluate_res.keys()), - ['loss', 'mae', 'my_mse', 'my_rmse']) - list_train_on_batch_res = model.train_on_batch( - np.ones((10, 2)), np.ones((10, 3))) - self.assertEqual(len(list_train_on_batch_res), 4) - dict_train_on_batch_res = model.train_on_batch( - np.ones((10, 2)), np.ones((10, 3)), return_dict=True) - self.assertEqual(list(dict_train_on_batch_res.keys()), - ['loss', 'mae', 'my_mse', 'my_rmse']) - list_test_on_batch_res = model.test_on_batch( - np.ones((10, 2)), np.ones((10, 3))) - self.assertEqual(len(list_test_on_batch_res), 4) - dict_test_on_batch_res = model.test_on_batch( - np.ones((10, 2)), np.ones((10, 3)), return_dict=True) - self.assertEqual(list(dict_test_on_batch_res.keys()), - ['loss', 'mae', 'my_mse', 'my_rmse']) + else: + with self.assertRaisesRegex( + ValueError, "Using the result of calling a `Metric` object " + ): + with backend.get_graph().as_default(): + model.add_metric(metrics_module.Mean(name="metric_4")(y)) + + model.compile( + "sgd", + loss="mse", + metrics=[metrics_module.Accuracy("metric_4")], + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit(np.ones((10, 1)), np.ones((10, 1)), batch_size=10) + + # Verify that the metrics added using `compile` and `add_metric` API are + # included + self.assertEqual( + [m.name for m in model.metrics], + ["loss", "metric_4", "metric_2", "metric_1", "metric_3"], + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_model_metrics_list_in_call(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + + def call(self, x): + self.add_metric( + tf.reduce_sum(x), name="metric_1", aggregation="mean" + ) + return self.dense1(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + metrics=[metrics_module.Accuracy("acc")], + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) + + self.assertEqual( + [m.name for m in model.metrics], ["loss", "acc", "metric_1"] + ) + + @test_combinations.run_all_keras_modes + def test_multiple_add_metric_calls(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + self.mean1 = metrics_module.Mean(name="metric_1") + self.mean2 = metrics_module.Mean(name="metric_2") + + def call(self, x): + self.add_metric(self.mean2(x), name="metric_2") + self.add_metric(self.mean1(x), name="metric_1") + self.add_metric( + tf.reduce_sum(x), name="metric_3", aggregation="mean" + ) + return self.dense1(x) + + model = TestModel() + self.assertListEqual( + [m.name for m in model.metrics], ["metric_1", "metric_2"] + ) + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + history = model.fit( + x, y, epochs=2, batch_size=5, validation_data=(x, y) + ) + self.assertAlmostEqual(history.history["metric_1"][-1], 1, 0) + self.assertAlmostEqual(history.history["metric_2"][-1], 1, 0) + self.assertAlmostEqual(history.history["metric_3"][-1], 5, 0) + + eval_results = model.evaluate(x, y, batch_size=5) + self.assertArrayNear(eval_results[1:4], [1, 1, 5], 0.1) + + model.predict(x, batch_size=5) + model.train_on_batch(x, y) + model.test_on_batch(x, y) + + @test_combinations.run_all_keras_modes + def test_multiple_add_metric_calls_layer(self): + class TestLayer(layers_module.Layer): + def __init__(self): + super().__init__(name="test_layer") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + self.m1 = metrics_module.Mean(name="m_1") + self.m2 = [ + metrics_module.Mean(name="m_2"), + metrics_module.Mean(name="m_3"), + ] + self.m3 = { + "mean4": metrics_module.Mean(name="m_4"), + "mean5": metrics_module.Mean(name="m_5"), + } + + def call(self, x): + self.add_metric(self.m2[0](x)) + self.add_metric(self.m2[1](x)) + self.add_metric(self.m1(x)) + self.add_metric(self.m3["mean4"](x)) + self.add_metric(self.m3["mean5"](x)) + self.add_metric( + tf.reduce_sum(x), name="m_6", aggregation="mean" + ) + return self.dense1(x) + + layer = TestLayer() + self.assertListEqual( + [m.name for m in layer.metrics], ["m_1", "m_2", "m_3", "m_4", "m_5"] + ) + + layer(np.ones((10, 10))) + self.assertListEqual( + [m.name for m in layer.metrics], + ["m_1", "m_2", "m_3", "m_4", "m_5", "m_6"], + ) + + @test_combinations.run_all_keras_modes + def test_duplicate_metric_name_in_add_metric(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + self.mean = metrics_module.Mean(name="metric_1") + self.mean2 = metrics_module.Mean(name="metric_1") + + def call(self, x): + self.add_metric(self.mean(x), name="metric_1") + return self.dense1(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + with self.assertRaisesRegex( + ValueError, + "Please provide different names for the metrics you have added. " + 'We found 2 metrics with the name: "metric_1"', + ): + model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) + + @test_combinations.run_all_keras_modes + def test_add_metric_without_name(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + + def call(self, x): + self.add_metric(tf.reduce_sum(x), aggregation="mean") + return self.dense1(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + + with self.assertRaisesRegex( + ValueError, "Please provide a name for your metric like" + ): + model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y)) + + @test_combinations.run_all_keras_modes + def test_add_metric_correctness(self): + inputs = input_layer.Input(shape=(1,)) + targets = input_layer.Input(shape=(1,)) + + class Bias(layers_module.Layer): + def build(self, input_shape): + self.bias = self.add_weight("bias", (1,), initializer="zeros") + self.mae = metrics_module.MeanAbsoluteError(name="mae_1") + + def call(self, inputs): + inputs, targets = inputs + outputs = inputs + self.bias + self.add_metric(self.mae(targets, outputs), name="mae_1") + return outputs + + outputs = Bias()([inputs, targets]) + model = training_module.Model([inputs, targets], outputs) + + model.add_metric( + metrics_module.mean_absolute_error(targets, outputs), + name="mae_2", + aggregation="mean", + ) + + model.compile( + loss="mae", + optimizer=optimizer_legacy.gradient_descent.SGD(0.1), + metrics=[metrics_module.MeanAbsoluteError(name="mae_3")], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.array([[0.0], [1.0], [2.0]]) + y = np.array([[0.5], [2.0], [3.5]]) + history = model.fit([x, y], y, batch_size=3, epochs=5) + + expected_val = [1.0, 0.9, 0.8, 0.7, 0.6] + for key in ["loss", "mae_1", "mae_2", "mae_3"]: + self.assertAllClose(history.history[key], expected_val, 1e-3) + + @test_combinations.run_all_keras_modes + def test_add_metric_order(self): + class MyLayer(layers_module.Layer): + def call(self, inputs, training=None, mask=None): + self.add_metric( + tf.ones([32]) * 2.0, name="two", aggregation="mean" + ) + return inputs + + class MyModel(training_module.Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._sampler = MyLayer(name="sampler") + + def call(self, inputs, training=None, mask=None): + z = self._sampler(inputs) + self.add_metric( + tf.ones([32]) * 1.0, name="one", aggregation="mean" + ) + self.add_metric( + tf.ones([32]) * 3.0, name="three", aggregation="mean" + ) + return z + + xdata = np.random.uniform(size=[32, 16]).astype(np.float32) + dataset_train = tf.data.Dataset.from_tensor_slices((xdata, xdata)) + dataset_train = dataset_train.batch(32, drop_remainder=True) + + model = MyModel() + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit(dataset_train, epochs=3) + self.assertDictEqual( + history.history, + { + "loss": [0.0, 0.0, 0.0], + "three": [3.0, 3.0, 3.0], + "two": [2.0, 2.0, 2.0], + "one": [1.0, 1.0, 1.0], + }, + ) + + @test_combinations.run_all_keras_modes + def test_add_metric_aggregation_mean(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + + def call(self, x): + self.add_metric( + tf.reduce_sum(x), name="metric_1", aggregation="mean" + ) + return self.dense1(x) + + model = TestModel() + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + model.fit(np.ones(shape=(10, 1)), np.ones(shape=(10, 2)), batch_size=5) + + @test_combinations.run_all_keras_modes + def test_add_metric_aggregation_none(self): + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + self.mean = metrics_module.Mean(name="metric_1") + + def call(self, x): + self.add_metric(self.mean(x), name="metric_1", aggregation=None) + return self.dense1(x) + + model = TestModel() + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + model.fit(np.ones(shape=(10, 1)), np.ones(shape=(10, 2)), batch_size=5) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def DISABLED_test_add_metric_invalid_aggregation(self): + # TODO(psv): Re-enable test once it is fixed. + x = layers_module.Input(shape=(1,)) + y = layers_module.Dense(1, kernel_initializer="ones")(x) + model = training_module.Model(x, y) + with self.assertRaisesRegex( + ValueError, "only `mean` sample-wise metric aggregation" + ): + model.add_metric( + tf.reduce_sum(y), name="metric_1", aggregation="sum" + ) + + with self.assertRaisesRegex( + ValueError, "only `mean` sample-wise metric aggregation" + ): + model.add_metric( + tf.reduce_sum(y), name="metric_1", aggregation=None + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_calling_evaluate_in_callback_during_fit(self): + # Check fix for a bug that caused `evaluate` to hit a cached dataset + # when run from inside a fit callback. + x = layers_module.Input(shape=(2,)) + y = layers_module.Dense(2, kernel_initializer="ones", use_bias=False)(x) + model = training_module.Model(x, y) + + ones = np.ones((10, 2), dtype=np.float32) + zeros = np.zeros((10, 2), dtype=np.float32) + train_ds = tf.data.Dataset.from_tensor_slices((ones, ones)).batch(5) + val_ds_1 = tf.data.Dataset.from_tensor_slices((ones, ones)).batch(5) + val_ds_2 = tf.data.Dataset.from_tensor_slices((zeros, zeros)).batch(5) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + class MyCallback(Callback): + def on_epoch_end(self, *args, **kwargs): + eval_result = self.model.evaluate(val_ds_2) + if abs(eval_result) > 1e-7: + raise AssertionError( + "Expected to hit the zeros dataset but got high loss " + "value of %s" % eval_result + ) + + history = model.fit( + train_ds, validation_data=val_ds_1, callbacks=[MyCallback()] + ) + # Evaluate at the end of fit should hit the ones dataset (cached) + self.assertGreater(abs(history.history["val_loss"][-1]), 0.1) + # Standalone call to evaluate should not hit the cached dataset + eval_result = model.evaluate(val_ds_2) + self.assertLess(abs(eval_result), 1e-7) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_model_with_nested_compiled_model(self): + class LayerWithAddMetric(layers_module.Layer): + def __init__(self): + super().__init__() + self.dense = layers_module.Dense(1, kernel_initializer="ones") + + def call(self, inputs): + outputs = self.dense(inputs) + self.add_metric( + tf.reduce_sum(outputs), name="mean", aggregation="mean" + ) + return outputs + + x = layers_module.Input(shape=(1,)) + y = LayerWithAddMetric()(x) + + inner_model = training_module.Model(x, y) + inner_model.add_metric( + tf.reduce_sum(y), name="mean1", aggregation="mean" + ) + + inner_model.compile( + "sgd", + loss="mse", + metrics=[metrics_module.Accuracy("acc")], + run_eagerly=test_utils.should_run_eagerly(), + ) + inner_model.fit(np.ones((10, 1)), np.ones((10, 1)), batch_size=10) + + self.assertEqual( + [m.name for m in inner_model.metrics], + ["loss", "acc", "mean", "mean1"], + ) + + x = layers_module.Input(shape=[1]) + y = inner_model(x) + outer_model = training_module.Model(x, y) + outer_model.add_metric( + tf.reduce_sum(y), name="mean2", aggregation="mean" + ) + + outer_model.compile( + "sgd", + loss="mse", + metrics=[metrics_module.Accuracy("acc2")], + run_eagerly=test_utils.should_run_eagerly(), + ) + outer_model.fit(np.ones((10, 1)), np.ones((10, 1)), batch_size=10) + self.assertEqual( + [m.name for m in outer_model.metrics], + ["loss", "acc2", "mean", "mean1", "mean2"], + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_model_with_metric_class_that_returns_dict(self): + x = layers_module.Input(shape=(2,)) + y = layers_module.Dense(3)(x) + model = training_module.Model(x, y) + + class DictMetric(metrics_module.Metric): + def __init__(self): + super().__init__() + self.sample_count = tf.Variable(0) + self.l2_sum = tf.Variable(0.0) + + def update_state(self, y_true, y_pred, sample_weight=None): + self.l2_sum.assign_add( + tf.reduce_sum(tf.square(y_true - y_pred)) + ) + self.sample_count.assign_add(tf.shape(y_true)[0]) + + def reset_state(self): + self.sample_count.assign(0) + self.l2_sum.assign(0.0) + + def result(self): + mse = self.l2_sum / tf.cast(self.sample_count, "float32") + rmse = tf.sqrt(mse) + return {"my_mse": mse, "my_rmse": rmse} + + model.compile( + "sgd", + "mse", + metrics=["mae", DictMetric()], + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit(np.ones((10, 2)), np.ones((10, 3))) + self.assertEqual( + list(history.history.keys()), ["loss", "mae", "my_mse", "my_rmse"] + ) + list_evaluate_res = model.evaluate(np.ones((10, 2)), np.ones((10, 3))) + self.assertEqual(len(list_evaluate_res), 4) + dict_evaluate_res = model.evaluate( + np.ones((10, 2)), np.ones((10, 3)), return_dict=True + ) + self.assertEqual( + list(dict_evaluate_res.keys()), ["loss", "mae", "my_mse", "my_rmse"] + ) + list_train_on_batch_res = model.train_on_batch( + np.ones((10, 2)), np.ones((10, 3)) + ) + self.assertEqual(len(list_train_on_batch_res), 4) + dict_train_on_batch_res = model.train_on_batch( + np.ones((10, 2)), np.ones((10, 3)), return_dict=True + ) + self.assertEqual( + list(dict_train_on_batch_res.keys()), + ["loss", "mae", "my_mse", "my_rmse"], + ) + list_test_on_batch_res = model.test_on_batch( + np.ones((10, 2)), np.ones((10, 3)) + ) + self.assertEqual(len(list_test_on_batch_res), 4) + dict_test_on_batch_res = model.test_on_batch( + np.ones((10, 2)), np.ones((10, 3)), return_dict=True + ) + self.assertEqual( + list(dict_test_on_batch_res.keys()), + ["loss", "mae", "my_mse", "my_rmse"], + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_add_metric_in_model_call_that_returns_dict(self): + class DictMetric(metrics_module.Metric): + def __init__(self): + super().__init__() + self.sample_count = tf.Variable(0) + self.l2_sum = tf.Variable(0.0) + + def update_state(self, y_true, y_pred, sample_weight=None): + self.l2_sum.assign_add( + tf.reduce_sum(tf.square(y_true - y_pred)) + ) + self.sample_count.assign_add(tf.shape(y_true)[0]) + + def reset_state(self): + self.sample_count.assign(0) + self.l2_sum.assign(0.0) + + def result(self): + mse = self.l2_sum / tf.cast(self.sample_count, "float32") + rmse = tf.sqrt(mse) + return {"my_mse": mse, "my_rmse": rmse} + + class TestModel(training_module.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense1 = layers_module.Dense(2, kernel_initializer="ones") + self.dict_metric = DictMetric() + + def call(self, x): + self.add_metric( + tf.reduce_sum(x), name="metric_2", aggregation="mean" + ) + # Provide same name as in the instance created in __init__ + # for eager mode + self.add_metric(self.dict_metric(x, 1 - x), name="metric_1") + return self.dense1(x) + + model = TestModel() + model.compile( + loss="mse", + optimizer=RMSPropOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones(shape=(10, 1)) + y = np.ones(shape=(10, 2)) + history = model.fit( + x, y, epochs=2, batch_size=5, validation_data=(x, y) + ) + self.assertAlmostEqual(history.history["metric_2"][-1], 5, 0) + self.assertAlmostEqual(history.history["val_metric_2"][-1], 5, 0) + self.assertAlmostEqual(history.history["my_mse"][-1], 1, 0) + self.assertAlmostEqual(history.history["val_my_mse"][-1], 1, 0) + self.assertAlmostEqual(history.history["my_rmse"][-1], 1, 0) + self.assertAlmostEqual(history.history["val_my_rmse"][-1], 1, 0) + + eval_results = model.evaluate(x, y, batch_size=5, return_dict=True) + self.assertAlmostEqual(eval_results["metric_2"], 5, 0) + self.assertAlmostEqual(eval_results["my_mse"], 1, 0) + self.assertAlmostEqual(eval_results["my_rmse"], 1, 0) + + model.predict(x, batch_size=5) + model.train_on_batch(x, y) + model.test_on_batch(x, y) class BareUpdateLayer(layers_module.Layer): + def build(self, input_shape): + self.counter = self.add_weight( + "counter", + dtype="int32", + shape=(), + initializer="zeros", + trainable=False, + ) - def build(self, input_shape): - self.counter = self.add_weight( - 'counter', - dtype='int32', - shape=(), - initializer='zeros', - trainable=False) - - def call(self, inputs): - tf.compat.v1.assign_add(self.counter, 1) - return tf.cast(self.counter, inputs.dtype) * inputs + def call(self, inputs): + tf.compat.v1.assign_add(self.counter, 1) + return tf.cast(self.counter, inputs.dtype) * inputs class LambdaUpdateLayer(layers_module.Layer): + def build(self, input_shape): + self.counter = self.add_weight( + "counter", + dtype="int32", + shape=(), + initializer="zeros", + trainable=False, + ) - def build(self, input_shape): - self.counter = self.add_weight( - 'counter', - dtype='int32', - shape=(), - initializer='zeros', - trainable=False) - - def call(self, inputs): - # Make sure update isn't run twice. - self.add_update(lambda: tf.compat.v1.assign_add(self.counter, 1)) - return tf.cast(self.counter, inputs.dtype) * inputs + def call(self, inputs): + # Make sure update isn't run twice. + self.add_update(lambda: tf.compat.v1.assign_add(self.counter, 1)) + return tf.cast(self.counter, inputs.dtype) * inputs class NestedUpdateLayer(layers_module.Layer): + def build(self, input_shape): + self.layer = BareUpdateLayer() + self.layer.build(input_shape) - def build(self, input_shape): - self.layer = BareUpdateLayer() - self.layer.build(input_shape) + @property + def counter(self): + return self.layer.counter - @property - def counter(self): - return self.layer.counter - - def call(self, inputs): - return self.layer(inputs) + def call(self, inputs): + return self.layer(inputs) class SubgraphUpdateLayer(layers_module.Layer): + def build(self, input_shape): + self.counter = self.add_weight( + "counter", + dtype="int32", + shape=(), + initializer="zeros", + trainable=False, + ) + + def call(self, inputs, training=None): + if training is None: + training = backend.learning_phase() - def build(self, input_shape): - self.counter = self.add_weight( - 'counter', - dtype='int32', - shape=(), - initializer='zeros', - trainable=False) - - def call(self, inputs, training=None): - if training is None: - training = backend.learning_phase() - - if training: - self.counter.assign(self.counter + 1) - return inputs + if training: + self.counter.assign(self.counter + 1) + return inputs @test_combinations.run_all_keras_modes(always_skip_v1=True) class TestAutoUpdates(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - @parameterized.named_parameters( - ('bare_update', BareUpdateLayer), - ('lambda_update', LambdaUpdateLayer), - ('nested_update', NestedUpdateLayer)) - def test_updates_in_model(self, layer_builder): - layer = layer_builder() - x, y = np.ones((10, 10)), np.ones((10, 1)) - model = test_utils.get_model_from_layers( - [layer, layers_module.Dense(1)], input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=2, epochs=1) - self.assertEqual(self.evaluate(layer.counter), 5) - - @test_combinations.run_with_all_model_types - def test_lambda_updates_trainable_false(self): - x, y = np.ones((10, 10)), np.ones((10, 1)) - layer = LambdaUpdateLayer() - model = test_utils.get_model_from_layers( - [layer, layers_module.Dense(1)], input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=2, epochs=1) - self.assertEqual(self.evaluate(layer.counter), 5) - layer.trainable = False - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=2, epochs=1) - self.assertEqual(self.evaluate(layer.counter), 5) - - @test_combinations.run_with_all_model_types - def test_subgraph_updates_in_model(self): - layer = SubgraphUpdateLayer() - x, y = np.ones((10, 10)), np.ones((10, 1)) - model = test_utils.get_model_from_layers( - [layer, layers_module.Dense(1)], input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=2, epochs=1) - self.assertEqual(self.evaluate(layer.counter), 5) - - @parameterized.named_parameters( - ('bare_update', BareUpdateLayer), - ('lambda_update', LambdaUpdateLayer), - ('nested_update', NestedUpdateLayer)) - def test_updates_standalone_layer(self, layer_builder): - layer = layer_builder() - y = layer(np.ones((10, 10))) - self.evaluate(layer.counter.initializer) - self.evaluate(y) - self.assertEqual(self.evaluate(layer.counter), 1) - - def test_trainable_false_standalone_layer(self): - layer = LambdaUpdateLayer() - y = layer(np.ones((10, 10))) - self.evaluate(layer.counter.initializer) - self.evaluate(y) - self.assertEqual(self.evaluate(layer.counter), 1) - layer.trainable = False - y = layer(np.ones((10, 10))) - self.evaluate(y) - self.assertEqual(self.evaluate(layer.counter), 1) - - @test_combinations.run_with_all_model_types - def test_batchnorm_trainable_false(self): - bn = layers_module.BatchNormalization() - model = test_utils.get_model_from_layers([bn, layers_module.Dense(1)], - input_shape=(10,)) - bn.trainable = False - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 10)), np.ones((10, 1)) - model.fit(x, y, batch_size=2, epochs=1) - self.assertAllEqual(self.evaluate(bn.moving_mean), np.zeros((10,))) - self.assertAllEqual(self.evaluate(bn.moving_variance), np.ones((10,))) + @test_combinations.run_with_all_model_types + @parameterized.named_parameters( + ("bare_update", BareUpdateLayer), + ("lambda_update", LambdaUpdateLayer), + ("nested_update", NestedUpdateLayer), + ) + def test_updates_in_model(self, layer_builder): + layer = layer_builder() + x, y = np.ones((10, 10)), np.ones((10, 1)) + model = test_utils.get_model_from_layers( + [layer, layers_module.Dense(1)], input_shape=(10,) + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, y, batch_size=2, epochs=1) + self.assertEqual(self.evaluate(layer.counter), 5) + + @test_combinations.run_with_all_model_types + def test_lambda_updates_trainable_false(self): + x, y = np.ones((10, 10)), np.ones((10, 1)) + layer = LambdaUpdateLayer() + model = test_utils.get_model_from_layers( + [layer, layers_module.Dense(1)], input_shape=(10,) + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, y, batch_size=2, epochs=1) + self.assertEqual(self.evaluate(layer.counter), 5) + layer.trainable = False + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, y, batch_size=2, epochs=1) + self.assertEqual(self.evaluate(layer.counter), 5) + + @test_combinations.run_with_all_model_types + def test_subgraph_updates_in_model(self): + layer = SubgraphUpdateLayer() + x, y = np.ones((10, 10)), np.ones((10, 1)) + model = test_utils.get_model_from_layers( + [layer, layers_module.Dense(1)], input_shape=(10,) + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, y, batch_size=2, epochs=1) + self.assertEqual(self.evaluate(layer.counter), 5) + + @parameterized.named_parameters( + ("bare_update", BareUpdateLayer), + ("lambda_update", LambdaUpdateLayer), + ("nested_update", NestedUpdateLayer), + ) + def test_updates_standalone_layer(self, layer_builder): + layer = layer_builder() + y = layer(np.ones((10, 10))) + self.evaluate(layer.counter.initializer) + self.evaluate(y) + self.assertEqual(self.evaluate(layer.counter), 1) + + def test_trainable_false_standalone_layer(self): + layer = LambdaUpdateLayer() + y = layer(np.ones((10, 10))) + self.evaluate(layer.counter.initializer) + self.evaluate(y) + self.assertEqual(self.evaluate(layer.counter), 1) + layer.trainable = False + y = layer(np.ones((10, 10))) + self.evaluate(y) + self.assertEqual(self.evaluate(layer.counter), 1) + + @test_combinations.run_with_all_model_types + def test_batchnorm_trainable_false(self): + bn = layers_module.BatchNormalization() + model = test_utils.get_model_from_layers( + [bn, layers_module.Dense(1)], input_shape=(10,) + ) + bn.trainable = False + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x, y = np.ones((10, 10)), np.ones((10, 1)) + model.fit(x, y, batch_size=2, epochs=1) + self.assertAllEqual(self.evaluate(bn.moving_mean), np.zeros((10,))) + self.assertAllEqual(self.evaluate(bn.moving_variance), np.ones((10,))) class TestFunctionTracing(test_combinations.TestCase): + def _seq_model_and_data(self): + model = sequential.Sequential( + [layers_module.Dense(4, activation="relu")] + ) + model.compile(loss="mse", optimizer="rmsprop") + x = np.random.random((10, 6)) + y = np.random.random((10, 4)) + return model, x, y + + @test_combinations.run_all_keras_modes( + always_skip_v1=True, always_skip_eager=True + ) + def test_no_tracing_between_epoch(self): + if _is_oss(): + self.skipTest("b/198729465") - def _seq_model_and_data(self): - model = sequential.Sequential([layers_module.Dense(4, activation='relu')]) - model.compile(loss='mse', optimizer='rmsprop') - x = np.random.random((10, 6)) - y = np.random.random((10, 4)) - return model, x, y - - @test_combinations.run_all_keras_modes( - always_skip_v1=True, always_skip_eager=True) - def test_no_tracing_between_epoch(self): - if _is_oss(): - self.skipTest('b/198729465') - - model, x, y = self._seq_model_and_data() + model, x, y = self._seq_model_and_data() - logging.set_verbosity(1) - with self.assertLogs(level=1) as logs: - model.fit(x, y, epochs=10, batch_size=5, validation_data=(x, y)) + logging.set_verbosity(1) + with self.assertLogs(level=1) as logs: + model.fit(x, y, epochs=10, batch_size=5, validation_data=(x, y)) - new_func_graph = 'INFO:absl:Creating new FuncGraph for Python function' - self.assertEqual(sum(new_func_graph in log for log in logs.output), 9) + new_func_graph = "INFO:absl:Creating new FuncGraph for Python function" + self.assertEqual(sum(new_func_graph in log for log in logs.output), 9) - @test_combinations.run_all_keras_modes( - always_skip_v1=True, always_skip_eager=True) - def test_evaluate_no_cached_data(self): - if _is_oss(): - self.skipTest('b/198729465') + @test_combinations.run_all_keras_modes( + always_skip_v1=True, always_skip_eager=True + ) + def test_evaluate_no_cached_data(self): + if _is_oss(): + self.skipTest("b/198729465") - model, x, y = self._seq_model_and_data() + model, x, y = self._seq_model_and_data() - new_func_graph = 'INFO:absl:Creating new FuncGraph for Python function' - logging.set_verbosity(1) - with self.assertLogs(level=1) as eval_logs: - for _ in range(6): - model.evaluate(x, y, batch_size=5) - self.assertEqual(sum(new_func_graph in log for log in eval_logs.output), 20) + new_func_graph = "INFO:absl:Creating new FuncGraph for Python function" + logging.set_verbosity(1) + with self.assertLogs(level=1) as eval_logs: + for _ in range(6): + model.evaluate(x, y, batch_size=5) + self.assertEqual( + sum(new_func_graph in log for log in eval_logs.output), 20 + ) class TestBuildCustomModel(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes - def test_build_list_of_inputs(self): - - class MyModel(training_module.Model): - - def __init__(self): - super().__init__() - self.l1 = layers_module.Dense(1) - self.l2 = layers_module.Dense(2) - - def call(self, x): - a, b = x - return self.l1(a) + self.l2(b) - - # List of tuples - model = MyModel() - model.build([(None, 1), (None, 2)]) - self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) - self.assertEqual(model.l2.kernel.shape.as_list(), [2, 2]) - # List of lists - model = MyModel() - model.build([[None, 1], [None, 2]]) - self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) - self.assertEqual(model.l2.kernel.shape.as_list(), [2, 2]) - - @test_combinations.run_all_keras_modes - def test_build_single_inputs(self): - - class MyModel(training_module.Model): - - def __init__(self): - super().__init__() - self.l1 = layers_module.Dense(1) - - def call(self, x): - return self.l1(x) - - model = MyModel() - model.build((None, 1)) - self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) - model = MyModel() - model.build([None, 1]) - self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) - - @test_combinations.run_all_keras_modes - def test_build_dict_inputs(self): - - class MyModel(training_module.Model): - - def __init__(self): - super().__init__() - self.l1 = layers_module.Dense(1) - - def call(self, inputs): - return self.l1(inputs['x']) - - model = MyModel() - model.build({'x': [None, 16]}) - self.assertEqual(model.l1.kernel.shape.as_list(), [16, 1]) - - def test_save_top_level_model_weights_h5(self): - - class MyModel(training_module.Model): - - def __init__(self): - super().__init__() - self.class_token = self.add_weight(shape=(1,), name='class_token') - self.inner_layer = layers_module.Dense(1) - - def call(self, inputs): - return self.inner_layer(inputs) * self.class_token - - h5_file = tempfile.mktemp('.h5') - m1 = MyModel() - m1.build((1, 1)) - m1.save_weights(h5_file) - - m2 = MyModel() - m2.build((1, 1)) - m2.load_weights(h5_file) - self.assertAllEqual(m1.get_weights(), m2.get_weights()) - m2.load_weights(h5_file, by_name=True) - self.assertAllEqual(m1.get_weights(), m2.get_weights()) + @test_combinations.run_all_keras_modes + def test_build_list_of_inputs(self): + class MyModel(training_module.Model): + def __init__(self): + super().__init__() + self.l1 = layers_module.Dense(1) + self.l2 = layers_module.Dense(2) + + def call(self, x): + a, b = x + return self.l1(a) + self.l2(b) + + # List of tuples + model = MyModel() + model.build([(None, 1), (None, 2)]) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + self.assertEqual(model.l2.kernel.shape.as_list(), [2, 2]) + # List of lists + model = MyModel() + model.build([[None, 1], [None, 2]]) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + self.assertEqual(model.l2.kernel.shape.as_list(), [2, 2]) + + @test_combinations.run_all_keras_modes + def test_build_single_inputs(self): + class MyModel(training_module.Model): + def __init__(self): + super().__init__() + self.l1 = layers_module.Dense(1) + + def call(self, x): + return self.l1(x) + + model = MyModel() + model.build((None, 1)) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + model = MyModel() + model.build([None, 1]) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + + @test_combinations.run_all_keras_modes + def test_build_dict_inputs(self): + class MyModel(training_module.Model): + def __init__(self): + super().__init__() + self.l1 = layers_module.Dense(1) + + def call(self, inputs): + return self.l1(inputs["x"]) + + model = MyModel() + model.build({"x": [None, 16]}) + self.assertEqual(model.l1.kernel.shape.as_list(), [16, 1]) + + def test_save_top_level_model_weights_h5(self): + class MyModel(training_module.Model): + def __init__(self): + super().__init__() + self.class_token = self.add_weight( + shape=(1,), name="class_token" + ) + self.inner_layer = layers_module.Dense(1) + + def call(self, inputs): + return self.inner_layer(inputs) * self.class_token + + h5_file = tempfile.mktemp(".h5") + m1 = MyModel() + m1.build((1, 1)) + m1.save_weights(h5_file) + + m2 = MyModel() + m2.build((1, 1)) + m2.load_weights(h5_file) + self.assertAllEqual(m1.get_weights(), m2.get_weights()) + m2.load_weights(h5_file, by_name=True) + self.assertAllEqual(m1.get_weights(), m2.get_weights()) class ScalarDataModelTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_scalar_loss_reduction(self): - - class MyModel(training_module.Model): - - def __init__(self): - super().__init__() - self.w = self.add_weight(initializer='ones', name='kernel') - self.b = self.add_weight(initializer='zeros', name='bias') - - def call(self, inputs): - return inputs * self.w + self.b - - model = MyModel() - model.compile(optimizer_v2.gradient_descent.SGD(1e-2), - loss='mse', - metrics=['binary_accuracy']) - # learn y = x * 2 + 0.5 - x = np.array([3, 5, 5, 3, 5], dtype='float32') - y = x * 2 + 0.5 - x2d = np.expand_dims(x, axis=-1) - y2d = np.expand_dims(y, axis=-1) - loss, acc = model.evaluate(x, y) - loss2d, acc2d = model.evaluate(x2d, y2d) - self.assertAllClose([loss, acc], [loss2d, acc2d], atol=1e-6) - model.fit(x, y, epochs=20) - preds = model.predict(x) - self.assertEqual(preds.shape, (5,)) - self.assertAllClose(preds, y, atol=2e-1) + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_scalar_loss_reduction(self): + class MyModel(training_module.Model): + def __init__(self): + super().__init__() + self.w = self.add_weight(initializer="ones", name="kernel") + self.b = self.add_weight(initializer="zeros", name="bias") + + def call(self, inputs): + return inputs * self.w + self.b + + model = MyModel() + model.compile( + optimizer_legacy.gradient_descent.SGD(1e-2), + loss="mse", + metrics=["binary_accuracy"], + ) + # learn y = x * 2 + 0.5 + x = np.array([3, 5, 5, 3, 5], dtype="float32") + y = x * 2 + 0.5 + x2d = np.expand_dims(x, axis=-1) + y2d = np.expand_dims(y, axis=-1) + loss, acc = model.evaluate(x, y) + loss2d, acc2d = model.evaluate(x2d, y2d) + self.assertAllClose([loss, acc], [loss2d, acc2d], atol=1e-6) + model.fit(x, y, epochs=20) + preds = model.predict(x) + self.assertEqual(preds.shape, (5,)) + self.assertAllClose(preds, y, atol=2e-1) + + +# Class used for testing. +class SubclassModel(training_module.Model): + def __init__(self, name=None): + super().__init__(name=name) + self.d1 = layers_module.Dense(1000) + self.d2 = layers_module.Dense(1000) + self.dropout = layers_module.Dropout(0.1) + + def call(self, inputs, training=None): + x = self.d1(inputs) + x = self.dropout(x, training=training) + return self.d2(x) + + +class TestVariableObjectPathMapping(test_combinations.TestCase): + def test_subclass_model_get_weight_paths(self): + model = SubclassModel() + # Make sure the object path produce nothing when weights are not + # initialized + self.assertEmpty(model.get_weight_paths()) + + model(tf.zeros((10, 10))) + mapping = model.get_weight_paths() + self.assertEqual( + mapping.keys(), {"d1.kernel", "d1.bias", "d2.kernel", "d2.bias"} + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_functional_model_get_weight_paths(self): + inputs = input_layer.Input(shape=(10,)) + x = layers_module.Dense(100, name="d1")(inputs) + output = layers_module.Dense(200, name="d2", activation="softmax")(x) + model = training_module.Model(inputs, output) + mapping = model.get_weight_paths() + self.assertEqual( + mapping.keys(), {"d1.kernel", "d1.bias", "d2.kernel", "d2.bias"} + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_sequential_model_get_weight_paths(self): + model = sequential.Sequential( + [ + layers_module.Dense(100, name="d1", input_shape=(10,)), + layers_module.Dense(200, name="d2", activation="softmax"), + ] + ) + mapping = model.get_weight_paths() + self.assertEqual( + mapping.keys(), {"d1.kernel", "d1.bias", "d2.kernel", "d2.bias"} + ) def _is_oss(): - """Returns whether the test is run under OSS.""" - return len(sys.argv) >= 1 and 'bazel' in sys.argv[0] + """Returns whether the test is run under OSS.""" + return len(sys.argv) >= 1 and "bazel" in sys.argv[0] -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training_utils.py b/keras/engine/training_utils.py index 617713b543e5..4e298157378b 100644 --- a/keras/engine/training_utils.py +++ b/keras/engine/training_utils.py @@ -14,206 +14,225 @@ # ============================================================================== """Training-related utilities.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras.utils import generic_utils def slice_arrays(arrays, indices, contiguous=True): - """Slices batches out of provided arrays (workaround for eager tensors). - - Unfortunately eager tensors don't have the same slicing behavior as - Numpy arrays (they follow the same slicing behavior as symbolic TF tensors), - hence we cannot use `generic_utils.slice_arrays` directly - and we have to implement this workaround based on `concat`. This has a - performance cost. - - Args: - arrays: Single array or list of arrays. - indices: List of indices in the array that should be included in the output - batch. - contiguous: Boolean flag indicating whether the indices are contiguous. - - Returns: - Slice of data (either single array or list of arrays). - """ - converted_to_list = False - if not isinstance(arrays, list): - converted_to_list = True - arrays = [arrays] - if any(tf.is_tensor(x) for x in arrays): - if not contiguous: - entries = [[x[i:i + 1] for i in indices] for x in arrays] - slices = [tf.concat(x, axis=0) for x in entries] + """Slices batches out of provided arrays (workaround for eager tensors). + + Unfortunately eager tensors don't have the same slicing behavior as + Numpy arrays (they follow the same slicing behavior as symbolic TF tensors), + hence we cannot use `generic_utils.slice_arrays` directly + and we have to implement this workaround based on `concat`. This has a + performance cost. + + Args: + arrays: Single array or list of arrays. + indices: List of indices in the array that should be included in the + output batch. + contiguous: Boolean flag indicating whether the indices are contiguous. + + Returns: + Slice of data (either single array or list of arrays). + """ + converted_to_list = False + if not isinstance(arrays, list): + converted_to_list = True + arrays = [arrays] + if any(tf.is_tensor(x) for x in arrays): + if not contiguous: + entries = [[x[i : i + 1] for i in indices] for x in arrays] + slices = [tf.concat(x, axis=0) for x in entries] + else: + slices = [x[indices[0] : indices[-1] + 1] for x in arrays] else: - slices = [x[indices[0]:indices[-1] + 1] for x in arrays] - else: - slices = generic_utils.slice_arrays(arrays, indices) - - if converted_to_list: - slices = slices[0] - return slices - - -def handle_partial_sample_weights(outputs, sample_weights, sample_weight_modes, - check_all_flat=False): - """Adds 1.0 as sample weights for the outputs for which there is no weight. - - Args: - outputs: List of model outputs. - sample_weights: List of sample weight inputs. - sample_weight_modes: List of sample weight modes or None. - check_all_flat: Ensure that inputs are not nested structures. This is not - a free check, so we may not want to run it eagerly every iteration. - - Returns: - Tuple of sample weights, one sample weight for every output, and booleans - describing the raw sample weights. - """ - any_sample_weight = sample_weights is not None and any( - w is not None for w in sample_weights) - partial_sample_weight = any_sample_weight and any( - w is None for w in sample_weights) - - if not any_sample_weight: - return None, any_sample_weight, partial_sample_weight - - if not partial_sample_weight: - return sample_weights, any_sample_weight, partial_sample_weight - - if check_all_flat: - tf.nest.assert_same_structure( - list_to_tuple(sample_weights), - list_to_tuple(tf.nest.flatten(sample_weights))) - tf.nest.assert_same_structure( - list_to_tuple(outputs), - list_to_tuple(tf.nest.flatten(outputs))) - if sample_weight_modes is not None: - tf.nest.assert_same_structure( - sample_weight_modes, tf.nest.flatten(sample_weight_modes)) - - new_sample_weights = [] - for i, sw in enumerate(sample_weights): - if sw is None: - as_numpy = isinstance(outputs[i], np.ndarray) - output = outputs[i] - output_shape = output.shape if as_numpy else tf.shape(output) - - is_temporal = ( - sample_weight_modes is not None and - sample_weight_modes[i] == 'temporal') - sw_shape = (output_shape[0], - output_shape[1]) if is_temporal else (output_shape[0],) - - new_sample_weights.append( - np.ones(sw_shape) if as_numpy else tf.ones(sw_shape)) - + slices = generic_utils.slice_arrays(arrays, indices) + + if converted_to_list: + slices = slices[0] + return slices + + +def handle_partial_sample_weights( + outputs, sample_weights, sample_weight_modes, check_all_flat=False +): + """Adds 1.0 as sample weights for the outputs for which there is no weight. + + Args: + outputs: List of model outputs. + sample_weights: List of sample weight inputs. + sample_weight_modes: List of sample weight modes or None. + check_all_flat: Ensure that inputs are not nested structures. This is not + a free check, so we may not want to run it eagerly every iteration. + + Returns: + Tuple of sample weights, one sample weight for every output, and booleans + describing the raw sample weights. + """ + if not isinstance(sample_weights, (list, tuple)): + any_sample_weight = sample_weights is not None + partial_sample_weight = any_sample_weight and sample_weights is None else: - new_sample_weights.append(sw) - return (list_to_tuple(new_sample_weights), - any_sample_weight, partial_sample_weight) + any_sample_weight = sample_weights is not None and any( + w is not None for w in sample_weights + ) + partial_sample_weight = any_sample_weight and any( + w is None for w in sample_weights + ) + + if not any_sample_weight: + return None, any_sample_weight, partial_sample_weight + + if not partial_sample_weight: + return sample_weights, any_sample_weight, partial_sample_weight + + if check_all_flat: + tf.nest.assert_same_structure( + list_to_tuple(sample_weights), + list_to_tuple(tf.nest.flatten(sample_weights)), + ) + tf.nest.assert_same_structure( + list_to_tuple(outputs), list_to_tuple(tf.nest.flatten(outputs)) + ) + if sample_weight_modes is not None: + tf.nest.assert_same_structure( + sample_weight_modes, tf.nest.flatten(sample_weight_modes) + ) + + new_sample_weights = [] + for i, sw in enumerate(sample_weights): + if sw is None: + as_numpy = isinstance(outputs[i], np.ndarray) + output = outputs[i] + output_shape = output.shape if as_numpy else tf.shape(output) + + is_temporal = ( + sample_weight_modes is not None + and sample_weight_modes[i] == "temporal" + ) + sw_shape = ( + (output_shape[0], output_shape[1]) + if is_temporal + else (output_shape[0],) + ) + + new_sample_weights.append( + np.ones(sw_shape) if as_numpy else tf.ones(sw_shape) + ) + + else: + new_sample_weights.append(sw) + return ( + list_to_tuple(new_sample_weights), + any_sample_weight, + partial_sample_weight, + ) class RespectCompiledTrainableState: - """Set and restore trainable state if it has changed since compile. - - The keras API guarantees that the value of each Layer's `trainable` property - at `Model.compile` time will be used when training that model. In order to - respect this requirement, it may be necessary to set the trainable value of - layers to their compile time values before beginning a training endpoint and - restore the values before returning from said endpoint. This scope checks if - any layer's trainable state has changed since Model compile, and performs this - set and un-set bookkeeping. - - However, the trainable state of a layer changes quite infrequently, if ever, - for many kinds of workflows. Moreover, updating every layer in a model is an - expensive operation. As a result, we will only explicitly set and unset the - trainable state of a model if a trainable value has changed since compile. - """ - - def __init__(self, model): - self._model = model - self._current_trainable_state = None - self._compiled_trainable_state = None - self._should_set_trainable = False - - def __enter__(self): - self._current_trainable_state = self._model._get_trainable_state() # pylint: disable=protected-access - self._compiled_trainable_state = self._model._compiled_trainable_state # pylint: disable=protected-access - - # Check to see if any layer's trainable state has changed since `compile`. - for layer, trainable in self._compiled_trainable_state.items(): - if (layer in self._current_trainable_state and - trainable != self._current_trainable_state[layer]): - self._should_set_trainable = True - break - - # If so, restore the model to its compiled state. - if self._should_set_trainable: - self._model._set_trainable_state(self._compiled_trainable_state) # pylint: disable=protected-access - - def __exit__(self, type_arg, value_arg, traceback_arg): - # If we set the values to their compiled state in __enter__, we need to - # restore the original values before leaving the scope. - if self._should_set_trainable: - self._model._set_trainable_state(self._current_trainable_state) # pylint: disable=protected-access - return False # False values do not suppress exceptions + """Set and restore trainable state if it has changed since compile. + + The keras API guarantees that the value of each Layer's `trainable` property + at `Model.compile` time will be used when training that model. In order to + respect this requirement, it may be necessary to set the trainable value of + layers to their compile time values before beginning a training endpoint and + restore the values before returning from said endpoint. This scope checks if + any layer's trainable state has changed since Model compile, and performs + this set and un-set bookkeeping. + + However, the trainable state of a layer changes quite infrequently, if ever, + for many kinds of workflows. Moreover, updating every layer in a model is an + expensive operation. As a result, we will only explicitly set and unset the + trainable state of a model if a trainable value has changed since compile. + """ + + def __init__(self, model): + self._model = model + self._current_trainable_state = None + self._compiled_trainable_state = None + self._should_set_trainable = False + + def __enter__(self): + self._current_trainable_state = self._model._get_trainable_state() + self._compiled_trainable_state = self._model._compiled_trainable_state + + # Check to see if any layer's trainable state has changed since + # `compile`. + for layer, trainable in self._compiled_trainable_state.items(): + if ( + layer in self._current_trainable_state + and trainable != self._current_trainable_state[layer] + ): + self._should_set_trainable = True + break + + # If so, restore the model to its compiled state. + if self._should_set_trainable: + self._model._set_trainable_state(self._compiled_trainable_state) + + def __exit__(self, type_arg, value_arg, traceback_arg): + # If we set the values to their compiled state in __enter__, we need to + # restore the original values before leaving the scope. + if self._should_set_trainable: + self._model._set_trainable_state(self._current_trainable_state) + return False # False values do not suppress exceptions # Allow use of methods not exposed to the user. -# pylint: disable=protected-access -def get_input_shape_and_dtype(layer): - """Retrieves input shape and input dtype of layer if applicable. - Args: - layer: Layer (or model) instance. - Returns: - Tuple (input_shape, input_dtype). Both could be None if the layer - does not have a defined input shape. +def get_input_shape_and_dtype(layer): + """Retrieves input shape and input dtype of layer if applicable. - Raises: - ValueError: in case an empty Sequential or Functional model is passed. - """ + Args: + layer: Layer (or model) instance. - def _is_graph_model(layer): - return ((hasattr(layer, '_is_graph_network') and layer._is_graph_network) or - layer.__class__.__name__ == 'Sequential') + Returns: + Tuple (input_shape, input_dtype). Both could be None if the layer + does not have a defined input shape. - # In case of nested models: recover the first layer - # of the deepest model to infer input shape and dtype. - # Subclassed Models may not have been built so can't be checked. - while _is_graph_model(layer): - if not layer.layers: - raise ValueError('An empty Model cannot be used as a Layer.') - layer = layer.layers[0] + Raises: + ValueError: in case an empty Sequential or Functional model is passed. + """ - if getattr(layer, '_batch_input_shape', None): - return layer._batch_input_shape, layer.dtype - return None, None + def _is_graph_model(layer): + return ( + hasattr(layer, "_is_graph_network") and layer._is_graph_network + ) or layer.__class__.__name__ == "Sequential" + # In case of nested models: recover the first layer + # of the deepest model to infer input shape and dtype. + # Subclassed Models may not have been built so can't be checked. + while _is_graph_model(layer): + if not layer.layers: + raise ValueError("An empty Model cannot be used as a Layer.") + layer = layer.layers[0] -# pylint: enable=protected-access + if getattr(layer, "_batch_input_shape", None): + return layer._batch_input_shape, layer.dtype + return None, None def get_static_batch_size(layer): - """Gets the static batch size of a Layer. + """Gets the static batch size of a Layer. - Args: - layer: a `Layer` instance. + Args: + layer: a `Layer` instance. - Returns: - The static batch size of a Layer. - """ - batch_input_shape, _ = get_input_shape_and_dtype(layer) - if batch_input_shape is not None: - return tf.compat.v1.Dimension(batch_input_shape[0]).value - return None + Returns: + The static batch size of a Layer. + """ + batch_input_shape, _ = get_input_shape_and_dtype(layer) + if batch_input_shape is not None: + return tf.compat.v1.Dimension(batch_input_shape[0]).value + return None def list_to_tuple(maybe_list): - """Datasets will stack the list of tensor, so switch them to tuples.""" - if isinstance(maybe_list, list): - return tuple(maybe_list) - return maybe_list + """Datasets will stack the list of tensor, so switch them to tuples.""" + if isinstance(maybe_list, list): + return tuple(maybe_list) + return maybe_list diff --git a/keras/engine/training_utils_v1.py b/keras/engine/training_utils_v1.py index 371e86b027e9..48cfdd4c02f3 100644 --- a/keras/engine/training_utils_v1.py +++ b/keras/engine/training_utils_v1.py @@ -14,8 +14,6 @@ # ============================================================================== """Training-related utilities.""" -import tensorflow.compat.v2 as tf - import abc import atexit import collections @@ -25,6 +23,8 @@ import time import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import callbacks as cbks from keras import losses @@ -33,255 +33,281 @@ from keras.utils import generic_utils from keras.utils import losses_utils from keras.utils import tf_inspect + +# isort: off from tensorflow.python.platform import tf_logging as logging def is_composite_or_composite_value(tensor): - """Returns true if 'tensor' is a CompositeTensor or a CT Value object.""" - # TODO(b/125094323): This should be isinstance(CompositeTensor) or - # isinstance(CompositeTensorValue) once we support that. - return isinstance( - tensor, - (tf.__internal__.CompositeTensor, tf.compat.v1.SparseTensorValue, - tf.compat.v1.ragged.RaggedTensorValue)) + """Returns true if 'tensor' is a CompositeTensor or a CT Value object.""" + # TODO(b/125094323): This should be isinstance(CompositeTensor) or + # isinstance(CompositeTensorValue) once we support that. + return isinstance( + tensor, + ( + tf.__internal__.CompositeTensor, + tf.compat.v1.SparseTensorValue, + tf.compat.v1.ragged.RaggedTensorValue, + ), + ) class Aggregator(object, metaclass=abc.ABCMeta): - """Abstract base class used to aggregate batch-level outputs of a loop. - - Attributes: - use_steps: Whether the loop is using `step` or `batch_size`. - num_samples: Total number of samples: `batch_size * num_batches`. - steps: Total number of steps. - batch_size: Batch size. It is used for validation checks between inputs and - outputs. - results: What to return at the end of the aggregation loop. - """ - - def __init__(self, use_steps, num_samples=None, steps=None, batch_size=None): - self.use_steps = use_steps - self.num_samples = num_samples - self.steps = steps - self.batch_size = batch_size - self.results = [] - - @abc.abstractmethod - def create(self, batch_outs): - """Creates the initial results from the first batch outputs. - - Args: - batch_outs: A list of batch-level outputs. + """Abstract base class used to aggregate batch-level outputs of a loop. + + Attributes: + use_steps: Whether the loop is using `step` or `batch_size`. + num_samples: Total number of samples: `batch_size * num_batches`. + steps: Total number of steps. + batch_size: Batch size. It is used for validation checks between inputs + and outputs. + results: What to return at the end of the aggregation loop. """ - raise NotImplementedError('Must be implemented in subclasses.') - @abc.abstractmethod - def aggregate(self, batch_outs, batch_start=None, batch_end=None): - """Aggregates batch-level results into total results. + def __init__( + self, use_steps, num_samples=None, steps=None, batch_size=None + ): + self.use_steps = use_steps + self.num_samples = num_samples + self.steps = steps + self.batch_size = batch_size + self.results = [] + + @abc.abstractmethod + def create(self, batch_outs): + """Creates the initial results from the first batch outputs. + + Args: + batch_outs: A list of batch-level outputs. + """ + raise NotImplementedError("Must be implemented in subclasses.") + + @abc.abstractmethod + def aggregate(self, batch_outs, batch_start=None, batch_end=None): + """Aggregates batch-level results into total results. + + Args: + batch_outs: A list of batch-level outputs. + batch_start: The start index of this batch. Always `None` if + `use_steps` is `True`. + batch_end: The end index of this batch. Always `None` if `use_steps` + is `True`. + """ + raise NotImplementedError("Must be implemented in subclasses.") + + @abc.abstractmethod + def finalize(self): + """Prepares the total results to be returned.""" + raise NotImplementedError("Must be implemented in subclasses.") - Args: - batch_outs: A list of batch-level outputs. - batch_start: The start index of this batch. Always `None` if `use_steps` - is `True`. - batch_end: The end index of this batch. Always `None` if `use_steps` is - `True`. - """ - raise NotImplementedError('Must be implemented in subclasses.') - @abc.abstractmethod - def finalize(self): - """Prepares the total results to be returned.""" - raise NotImplementedError('Must be implemented in subclasses.') +class MetricsAggregator(Aggregator): + """Aggregator that calculates loss and metrics info. + Attributes: + use_steps: Whether the loop is using `step` or `batch_size`. + num_samples: Total number of samples: `batch_size*num_batches`. + steps: Total number of steps, ie number of times to iterate over a dataset + to cover all samples. + """ -class MetricsAggregator(Aggregator): - """Aggregator that calculates loss and metrics info. - - Attributes: - use_steps: Whether the loop is using `step` or `batch_size`. - num_samples: Total number of samples: `batch_size*num_batches`. - steps: Total number of steps, ie number of times to iterate over a dataset - to cover all samples. - """ - - def __init__(self, use_steps, num_samples=None, steps=None): - super().__init__( - use_steps=use_steps, - num_samples=num_samples, - steps=steps, - batch_size=None) - - def create(self, batch_outs): - self.results = [0.] * len(batch_outs) - - def aggregate(self, batch_outs, batch_start=None, batch_end=None): - # Loss. - if self.use_steps: - self.results[0] += batch_outs[0] - else: - self.results[0] += batch_outs[0] * (batch_end - batch_start) - # Metrics (always stateful, just grab current values.) - self.results[1:] = batch_outs[1:] + def __init__(self, use_steps, num_samples=None, steps=None): + super().__init__( + use_steps=use_steps, + num_samples=num_samples, + steps=steps, + batch_size=None, + ) + + def create(self, batch_outs): + self.results = [0.0] * len(batch_outs) + + def aggregate(self, batch_outs, batch_start=None, batch_end=None): + # Loss. + if self.use_steps: + self.results[0] += batch_outs[0] + else: + self.results[0] += batch_outs[0] * (batch_end - batch_start) + # Metrics (always stateful, just grab current values.) + self.results[1:] = batch_outs[1:] - def finalize(self): - if not self.results: - raise ValueError('Empty training data.') - self.results[0] /= (self.num_samples or self.steps) + def finalize(self): + if not self.results: + raise ValueError("Empty training data.") + self.results[0] /= self.num_samples or self.steps def _append_sparse_tensor_value(target, to_append): - """Append sparse tensor value objects.""" - # Make sure the sparse tensors are of the same size (except for the 0th dim). - if len(target.dense_shape) != len(to_append.dense_shape): - raise RuntimeError( - 'Unable to concatenate %s and %s. The inner dense shapes do not ' - 'have the same number of dimensions (%s vs %s)' % - (target, to_append, target.dense_shape, to_append.dense_shape)) - - if target.dense_shape[1:] != to_append.dense_shape[1:]: - raise RuntimeError( - 'Unable to concatenate %s and %s. The inner dense shapes do not ' - 'match inner dimensions (%s vs %s)' % - (target, to_append, target.dense_shape[1:], to_append.dense_shape[1:])) - - # Add the to_append indices to target, updating the 0th value, and keeping - # track of the maximum so we know the final dense_shape of this tensor. - base_dim0_value = target.dense_shape[0] - max_dim0_value = target.dense_shape[0] - new_indices = target.indices - for index in to_append.indices: - # Here, we iterate through the sparse indices of the tensor to append. For - # each index, we update its zeroth value (the batch index) by adding the - # number of batch items in the tensor we are appending to (so an index - # of [0, 0, 1] for a value that is being appended to a tensor with 0th dim - # size 3 would become [3, 0, 1].) - index[0] += base_dim0_value - max_dim0_value = max(max_dim0_value, index[0]) - new_indices = np.append(new_indices, [index], axis=0) - - # Extend the values array to contain all of the appended values. These will - # be in the same order as the indices added above. - new_values = np.concatenate((target.values, to_append.values), axis=0) - - # Create a new dense shape by replacing the value for the 0th dimension - # with the new max dim0 value. - new_dense_shape = list(target.dense_shape) - new_dense_shape[0] = max_dim0_value + 1 - new_dense_shape = tuple(new_dense_shape) - - return tf.compat.v1.SparseTensorValue( - indices=new_indices, values=new_values, dense_shape=new_dense_shape) + """Append sparse tensor value objects.""" + # Make sure the sparse tensors are of the same size (except for the 0th + # dim). + if len(target.dense_shape) != len(to_append.dense_shape): + raise RuntimeError( + "Unable to concatenate %s and %s. The inner dense shapes do not " + "have the same number of dimensions (%s vs %s)" + % (target, to_append, target.dense_shape, to_append.dense_shape) + ) + + if target.dense_shape[1:] != to_append.dense_shape[1:]: + raise RuntimeError( + "Unable to concatenate %s and %s. The inner dense shapes do not " + "match inner dimensions (%s vs %s)" + % ( + target, + to_append, + target.dense_shape[1:], + to_append.dense_shape[1:], + ) + ) + + # Add the to_append indices to target, updating the 0th value, and keeping + # track of the maximum so we know the final dense_shape of this tensor. + base_dim0_value = target.dense_shape[0] + max_dim0_value = target.dense_shape[0] + new_indices = target.indices + for index in to_append.indices: + # Here, we iterate through the sparse indices of the tensor to append. + # For each index, we update its zeroth value (the batch index) by adding + # the number of batch items in the tensor we are appending to (so an + # index of [0, 0, 1] for a value that is being appended to a tensor with + # 0th dim size 3 would become [3, 0, 1].) + index[0] += base_dim0_value + max_dim0_value = max(max_dim0_value, index[0]) + new_indices = np.append(new_indices, [index], axis=0) + + # Extend the values array to contain all of the appended values. These will + # be in the same order as the indices added above. + new_values = np.concatenate((target.values, to_append.values), axis=0) + + # Create a new dense shape by replacing the value for the 0th dimension + # with the new max dim0 value. + new_dense_shape = list(target.dense_shape) + new_dense_shape[0] = max_dim0_value + 1 + new_dense_shape = tuple(new_dense_shape) + + return tf.compat.v1.SparseTensorValue( + indices=new_indices, values=new_values, dense_shape=new_dense_shape + ) def _append_ragged_tensor_value(target, to_append): - """Append ragged tensor value objects.""" - # Make sure the ragged tensors are of the same size (save for the 0th dim). - if len(target.shape) != len(to_append.shape): - raise RuntimeError('Unable to concatenate %s and %s' % (target, to_append)) - - if target.shape[1:] != to_append.shape[1:]: - raise RuntimeError('Unable to concatenate %s and %s' % (target, to_append)) - - adjusted_row_splits = to_append.row_splits[1:] + target.row_splits[-1] - new_row_splits = np.append(target.row_splits, adjusted_row_splits) - if isinstance(target.values, tf.compat.v1.ragged.RaggedTensorValue): - new_values = _append_ragged_tensor_value(target.values, to_append.values) - else: - new_values = np.concatenate((target.values, to_append.values), axis=0) + """Append ragged tensor value objects.""" + # Make sure the ragged tensors are of the same size (save for the 0th dim). + if len(target.shape) != len(to_append.shape): + raise RuntimeError(f"Unable to concatenate {target} and {to_append}") + + if target.shape[1:] != to_append.shape[1:]: + raise RuntimeError(f"Unable to concatenate {target} and {to_append}") + + adjusted_row_splits = to_append.row_splits[1:] + target.row_splits[-1] + new_row_splits = np.append(target.row_splits, adjusted_row_splits) + if isinstance(target.values, tf.compat.v1.ragged.RaggedTensorValue): + new_values = _append_ragged_tensor_value( + target.values, to_append.values + ) + else: + new_values = np.concatenate((target.values, to_append.values), axis=0) - return tf.compat.v1.ragged.RaggedTensorValue(new_values, new_row_splits) + return tf.compat.v1.ragged.RaggedTensorValue(new_values, new_row_splits) def _append_composite_tensor(target, to_append): - """Helper function to append composite tensors to each other in the 0 axis. - - In order to support batching within a fit/evaluate/predict call, we need - to be able to aggregate within a CompositeTensor. Unfortunately, the CT - API currently does not make this easy - especially in V1 mode, where we're - working with CompositeTensor Value objects that have no connection with the - CompositeTensors that created them. - - Args: - target: CompositeTensor or CompositeTensor value object that will be - appended to. - to_append: CompositeTensor or CompositeTensor value object to append to. - 'target'. - - Returns: - A CompositeTensor or CompositeTensor value object. - - Raises: - RuntimeError: if concatenation is not possible. - """ - if type(target) is not type(to_append): - raise RuntimeError('Unable to concatenate %s and %s' % - (type(target), type(to_append))) - - # Perform type-specific concatenation. - # TODO(b/125094323): This should be replaced by a simple call to - # target.append() that should work on all of the below classes. - - # If we're seeing a CompositeTensor here, we know it's because we're in - # Eager mode (or else we'd have evaluated the CT to a CT Value object - # already). Therefore, it's safe to call concat() on it without evaluating - # the result any further. If not - that is, if we're seeing a - # SparseTensorValue or a RaggedTensorValue - we need to hand-update it - # since we're outside of the graph anyways. - if isinstance(target, tf.SparseTensor): - # We need to invoke the sparse version of concatenate here - tf.concat - # won't work. - return tf.compat.v1.sparse_concat(sp_inputs=[target, to_append], axis=0) - elif isinstance(target, tf.RaggedTensor): - return tf.concat([target, to_append], axis=0) - elif isinstance(target, tf.compat.v1.SparseTensorValue): - return _append_sparse_tensor_value(target, to_append) - elif isinstance(target, tf.compat.v1.ragged.RaggedTensorValue): - return _append_ragged_tensor_value(target, to_append) - else: - raise RuntimeError('Attempted to concatenate unsupported object %s.' % - type(target)) + """Helper function to append composite tensors to each other in the 0 axis. + In order to support batching within a fit/evaluate/predict call, we need + to be able to aggregate within a CompositeTensor. Unfortunately, the CT + API currently does not make this easy - especially in V1 mode, where we're + working with CompositeTensor Value objects that have no connection with the + CompositeTensors that created them. -class ConcatAggregator(Aggregator): - """Combine tensor-likes which cannot be merged on the fly. - - This class expects to aggregate a single tensor-like rather than a nested - structure of tensor-likes. - """ - - def __init__(self, batch_size): - self.composite = None - super().__init__( - use_steps=True, num_samples=None, steps=None, batch_size=batch_size) - - def create(self, batch_element): - self.composite = is_composite_or_composite_value(batch_element) - - def aggregate(self, batch_element, batch_start=None, batch_end=None): - - # TODO(psv): Add num_samples check here to detect when output batch - # #samples is < batch size and != input batch #samples. - if self.batch_size and self.batch_size < batch_element.shape[0]: - raise ValueError( - 'Mismatch between expected batch size and model output batch size. ' - 'Output shape = {}, expected output shape = shape {}'.format( - batch_element.shape, - (self.batch_size,) + batch_element.shape[1:])) - self.results.append(batch_element) - - def finalize(self): - # Special case of single batch inference which skips a copy. - if len(self.results) == 1: - self.results = self.results[0] - - elif self.composite: - # TODO(taylorrobie): efficiently concatenate. - results = self.results[0] - for r in self.results[1:]: - results = _append_composite_tensor(results, r) - self.results = results + Args: + target: CompositeTensor or CompositeTensor value object that will be + appended to. + to_append: CompositeTensor or CompositeTensor value object to append to. + 'target'. + Returns: + A CompositeTensor or CompositeTensor value object. + + Raises: + RuntimeError: if concatenation is not possible. + """ + if type(target) is not type(to_append): + raise RuntimeError( + f"Unable to concatenate {type(target)} and {type(to_append)}" + ) + + # Perform type-specific concatenation. + # TODO(b/125094323): This should be replaced by a simple call to + # target.append() that should work on all of the below classes. + + # If we're seeing a CompositeTensor here, we know it's because we're in + # Eager mode (or else we'd have evaluated the CT to a CT Value object + # already). Therefore, it's safe to call concat() on it without evaluating + # the result any further. If not - that is, if we're seeing a + # SparseTensorValue or a RaggedTensorValue - we need to hand-update it + # since we're outside of the graph anyways. + if isinstance(target, tf.SparseTensor): + # We need to invoke the sparse version of concatenate here - tf.concat + # won't work. + return tf.compat.v1.sparse_concat(sp_inputs=[target, to_append], axis=0) + elif isinstance(target, tf.RaggedTensor): + return tf.concat([target, to_append], axis=0) + elif isinstance(target, tf.compat.v1.SparseTensorValue): + return _append_sparse_tensor_value(target, to_append) + elif isinstance(target, tf.compat.v1.ragged.RaggedTensorValue): + return _append_ragged_tensor_value(target, to_append) else: - self.results = np.concatenate(self.results, axis=0) + raise RuntimeError( + f"Attempted to concatenate unsupported object {type(target)}." + ) + + +class ConcatAggregator(Aggregator): + """Combine tensor-likes which cannot be merged on the fly. + + This class expects to aggregate a single tensor-like rather than a nested + structure of tensor-likes. + """ + + def __init__(self, batch_size): + self.composite = None + super().__init__( + use_steps=True, num_samples=None, steps=None, batch_size=batch_size + ) + + def create(self, batch_element): + self.composite = is_composite_or_composite_value(batch_element) + + def aggregate(self, batch_element, batch_start=None, batch_end=None): + + # TODO(psv): Add num_samples check here to detect when output batch + # #samples is < batch size and != input batch #samples. + if self.batch_size and self.batch_size < batch_element.shape[0]: + raise ValueError( + "Mismatch between expected batch size and model output batch " + "size. Output shape = {}, " + "expected output shape = shape {}".format( + batch_element.shape, + (self.batch_size,) + batch_element.shape[1:], + ) + ) + self.results.append(batch_element) + + def finalize(self): + # Special case of single batch inference which skips a copy. + if len(self.results) == 1: + self.results = self.results[0] + + elif self.composite: + # TODO(taylorrobie): efficiently concatenate. + results = self.results[0] + for r in self.results[1:]: + results = _append_composite_tensor(results, r) + self.results = results + + else: + self.results = np.concatenate(self.results, axis=0) _COPY_THREADS = 4 @@ -289,1652 +315,1912 @@ def finalize(self): def get_copy_pool(): - """Shared threadpool for copying arrays. + """Shared threadpool for copying arrays. - Pool instantiation takes ~ 2ms, so a singleton pool is used rather than - creating a pool per SliceAggregator. + Pool instantiation takes ~ 2ms, so a singleton pool is used rather than + creating a pool per SliceAggregator. - Returns: - The global copy threadpool. - """ - global _COPY_POOL - if _COPY_POOL is None: - _COPY_POOL = multiprocessing.pool.ThreadPool(_COPY_THREADS) - atexit.register(_COPY_POOL.close) - return _COPY_POOL + Returns: + The global copy threadpool. + """ + global _COPY_POOL + if _COPY_POOL is None: + _COPY_POOL = multiprocessing.pool.ThreadPool(_COPY_THREADS) + atexit.register(_COPY_POOL.close) + return _COPY_POOL class SliceAggregator(Aggregator): - """Combine arrays where the final size is known. - - This class expects to aggregate a single tensor-like rather than a nested - structure of tensor-likes. - - NumPy copies are an operation that threads handle quite well because all of - the heavy lifting is in c and does not need the GIL. Moreover, we can perform - lock-free writes to the same buffer in multiple threads because the nature of - result aggregation guarantees that either the indices are disjoint or the - aggregator will throw an exception in finalize. Moreover, because aggregation - is performed on the slowest varying dimension, assignments for a given batch - will write to contiguous blocks of memory, further minimizing contention. - - There is, however, some scheduling and context switching overhead which will - offset the gains from pipelining the slice assignment. Below a given threshold - it is faster to simply assign in the main thread rather than enqueue the - assignment in a side thread. The exact threshold will vary from system to - system, but the time is not very sensitive to the exact transition so a value - of 2 ** 14 was chosen which should be reasonable on most systems. - """ - - _BINARY_SIZE_THRESHOLD = 2 ** 14 - _MAX_COPY_SECONDS = 300 - - def __init__(self, num_samples, batch_size): - self._async_copies = [] - self._pool = get_copy_pool() - self._errors = [] - super().__init__( - use_steps=False, - num_samples=num_samples, - steps=None, - batch_size=batch_size) - - def create(self, batch_element): - # This step does not need to be pipelined because NumPy empty array - # initialization is effectively instantaneous. - shape = (self.num_samples,) + batch_element.shape[1:] - dtype = batch_element.dtype - - self.results = np.empty(shape=shape, dtype=dtype) - - def aggregate(self, batch_element, batch_start, batch_end): - # Fail early. - if self._errors: - raise self._errors[0] + """Combine arrays where the final size is known. + + This class expects to aggregate a single tensor-like rather than a nested + structure of tensor-likes. + + NumPy copies are an operation that threads handle quite well because all of + the heavy lifting is in c and does not need the GIL. Moreover, we can + perform lock-free writes to the same buffer in multiple threads because the + nature of result aggregation guarantees that either the indices are disjoint + or the aggregator will throw an exception in finalize. Moreover, because + aggregation is performed on the slowest varying dimension, assignments for a + given batch will write to contiguous blocks of memory, further minimizing + contention. + + There is, however, some scheduling and context switching overhead which will + offset the gains from pipelining the slice assignment. Below a given + threshold it is faster to simply assign in the main thread rather than + enqueue the assignment in a side thread. The exact threshold will vary from + system to system, but the time is not very sensitive to the exact transition + so a value of 2 ** 14 was chosen which should be reasonable on most systems. + """ - # In the special case of single batch inference, no copy is needed. - if batch_end - batch_start == self.num_samples: - if self.num_samples != batch_element.shape[0]: - raise ValueError( - 'Mismatch between expected batch size and model output batch size. ' - 'Output shape = {}, expected output shape = shape {}'.format( - batch_element.shape, self.results.shape)) - - self.results = batch_element - return - - # This is an approximate threshold, so we don't need to consider the number - # of bytes per element. - num_elements = np.prod(batch_element.shape) - if num_elements < self._BINARY_SIZE_THRESHOLD: - self.results[batch_start:batch_end] = batch_element - else: - is_finished = threading.Event() - self._pool.apply_async( - self._slice_assign, - args=(batch_element, batch_start, batch_end, is_finished)) - self._async_copies.append(is_finished) - - def _slice_assign(self, batch_element, batch_start, batch_end, is_finished): - """Legacy utility method to slice input arrays.""" - try: - self.results[batch_start:batch_end] = batch_element + _BINARY_SIZE_THRESHOLD = 2**14 + _MAX_COPY_SECONDS = 300 + + def __init__(self, num_samples, batch_size): + self._async_copies = [] + self._pool = get_copy_pool() + self._errors = [] + super().__init__( + use_steps=False, + num_samples=num_samples, + steps=None, + batch_size=batch_size, + ) + + def create(self, batch_element): + # This step does not need to be pipelined because NumPy empty array + # initialization is effectively instantaneous. + shape = (self.num_samples,) + batch_element.shape[1:] + dtype = batch_element.dtype + + self.results = np.empty(shape=shape, dtype=dtype) + + def aggregate(self, batch_element, batch_start, batch_end): + # Fail early. + if self._errors: + raise self._errors[0] + + # In the special case of single batch inference, no copy is needed. + if batch_end - batch_start == self.num_samples: + if self.num_samples != batch_element.shape[0]: + raise ValueError( + "Mismatch between expected batch size and model " + "output batch size. Output shape = {}, " + "expected output shape = shape {}".format( + batch_element.shape, self.results.shape + ) + ) + + self.results = batch_element + return + + # This is an approximate threshold, so we don't need to consider the + # number of bytes per element. + num_elements = np.prod(batch_element.shape) + if num_elements < self._BINARY_SIZE_THRESHOLD: + self.results[batch_start:batch_end] = batch_element + else: + is_finished = threading.Event() + self._pool.apply_async( + self._slice_assign, + args=(batch_element, batch_start, batch_end, is_finished), + ) + self._async_copies.append(is_finished) + + def _slice_assign(self, batch_element, batch_start, batch_end, is_finished): + """Legacy utility method to slice input arrays.""" + try: + self.results[batch_start:batch_end] = batch_element + + except Exception as e: + # `_slice_assign` should only be called in threads and exceptions + # raised in threads do not carry over to the main thread. So instead + # we perform a a broad catch in the thread and then store the + # exception to be re-raised in the main thread. + self._errors.append(e) + + finally: + is_finished.set() + + def finalize(self): + start_time = time.time() + for is_finished in self._async_copies: + timeout = max( + [0.0, self._MAX_COPY_SECONDS - (time.time() - start_time)] + ) + if not is_finished.wait(timeout): + raise ValueError("Timed out waiting for copy to complete.") + + if self._errors: + raise self._errors[0] - except Exception as e: # pylint: disable=broad-except - # `_slice_assign` should only be called in threads and exceptions raised - # in threads do not carry over to the main thread. So instead we perform a - # a broad catch in the thread and then store the exception to be re-raised - # in the main thread. - self._errors.append(e) - finally: - is_finished.set() +class OutputsAggregator(Aggregator): + """Aggregator that concatenates outputs.""" + + _structure = None + + def create(self, batch_outs): + # SparseTensorValue is a named tuple which nest will flatten, so we need + # to guard it to properly handle the structure. + self._structure = tf.__internal__.nest.get_traverse_shallow_structure( + lambda x: not is_composite_or_composite_value(x), batch_outs + ) + batch_outs = tf.__internal__.nest.flatten_up_to( + self._structure, batch_outs + ) + + for batch_element in batch_outs: + if is_composite_or_composite_value(batch_element): + # If the output is not a ndarray, it will be either a composite + # tensor or a composite tensor's Value object. In either case, + # we can't allocate an array to hold the object - we'll handle + # it later. + self.results.append(ConcatAggregator(self.batch_size)) + elif isinstance(batch_element, np.ndarray): + self.results.append( + ( + ConcatAggregator(self.batch_size) + if self.use_steps + else SliceAggregator(self.num_samples, self.batch_size) + ) + ) + else: + # This is not a ndarray, a CompositeTensor, or a + # CompositeTensorValue. Fail fast rather than trying to + # concatenate it. + raise RuntimeError( + "Attempted to aggregate unsupported object {}.".format( + batch_element + ) + ) + + self.results[-1].create(batch_element) + + def aggregate(self, batch_outs, batch_start=None, batch_end=None): + batch_outs = tf.__internal__.nest.flatten_up_to( + self._structure, batch_outs + ) + for batch_element, result in zip(batch_outs, self.results): + result.aggregate(batch_element, batch_start, batch_end) + + def finalize(self): + for result in self.results: + result.finalize() + self.results = [i.results for i in self.results] + self.results = tf.nest.pack_sequence_as(self._structure, self.results) - def finalize(self): - start_time = time.time() - for is_finished in self._async_copies: - timeout = max([0., self._MAX_COPY_SECONDS - (time.time() - start_time)]) - if not is_finished.wait(timeout): - raise ValueError('Timed out waiting for copy to complete.') - if self._errors: - raise self._errors[0] +def get_progbar(model, count_mode, include_metrics=True): + """Get Progbar.""" + if include_metrics: + stateful_metric_names = getattr(model, "metrics_names", None) + if stateful_metric_names: + stateful_metric_names = stateful_metric_names[1:] # Exclude `loss` + else: + stateful_metric_names = None + return cbks.ProgbarLogger( + count_mode, stateful_metrics=stateful_metric_names + ) -class OutputsAggregator(Aggregator): - """Aggregator that concatenates outputs.""" - - _structure = None - - def create(self, batch_outs): - # SparseTensorValue is a named tuple which nest will flatten, so we need - # to guard it to properly handle the structure. - self._structure = tf.__internal__.nest.get_traverse_shallow_structure( - lambda x: not is_composite_or_composite_value(x), batch_outs) - batch_outs = tf.__internal__.nest.flatten_up_to(self._structure, batch_outs) - - for batch_element in batch_outs: - if is_composite_or_composite_value(batch_element): - # If the output is not a ndarray, it will be either a composite tensor - # or a composite tensor's Value object. In either case, we can't - # allocate an array to hold the object - we'll handle it later. - self.results.append(ConcatAggregator(self.batch_size)) - elif isinstance(batch_element, np.ndarray): - self.results.append( - (ConcatAggregator(self.batch_size) if self.use_steps else - SliceAggregator(self.num_samples, self.batch_size))) - else: - # This is not a ndarray, a CompositeTensor, or a CompositeTensorValue. - # Fail fast rather than trying to concatenate it. - raise RuntimeError('Attempted to aggregate unsupported object {}.' - .format(batch_element)) - - self.results[-1].create(batch_element) - - def aggregate(self, batch_outs, batch_start=None, batch_end=None): - batch_outs = tf.__internal__.nest.flatten_up_to(self._structure, batch_outs) - for batch_element, result in zip(batch_outs, self.results): - result.aggregate(batch_element, batch_start, batch_end) - - def finalize(self): - for result in self.results: - result.finalize() - self.results = [i.results for i in self.results] - self.results = tf.nest.pack_sequence_as(self._structure, self.results) +def check_num_samples(ins, batch_size=None, steps=None, steps_name="steps"): + """Determine the number of samples provided for training and evaluation. + The number of samples is not defined when running with `steps`, + in which case the number of samples is set to `None`. -def get_progbar(model, count_mode, include_metrics=True): - """Get Progbar.""" - if include_metrics: - stateful_metric_names = getattr(model, 'metrics_names', None) - if stateful_metric_names: - stateful_metric_names = stateful_metric_names[1:] # Exclude `loss` - else: - stateful_metric_names = None - return cbks.ProgbarLogger(count_mode, stateful_metrics=stateful_metric_names) - - -def check_num_samples(ins, batch_size=None, steps=None, steps_name='steps'): - """Determine the number of samples provided for training and evaluation. - - The number of samples is not defined when running with `steps`, - in which case the number of samples is set to `None`. - - Args: - ins: List of tensors to be fed to the Keras function. - batch_size: Integer batch size or `None` if not defined. - steps: Total number of steps (batches of samples) before declaring - `_predict_loop` finished. Ignored with the default value of `None`. - steps_name: The public API's parameter name for `steps`. - - Raises: - ValueError: when `steps` is `None` and the attribute `ins.shape` - does not exist. Also raises ValueError when `steps` is not `None` - and `batch_size` is not `None` because they are mutually - exclusive. - - Returns: - When steps is `None`, returns the number of samples to be - processed based on the size of the first dimension of the - first input numpy array. When steps is not `None` and - `batch_size` is `None`, returns `None`. - """ - if steps is not None and batch_size is not None: - raise ValueError('If ' + steps_name + - ' is set, the `batch_size` must be None.') - if check_steps_argument(ins, steps, steps_name): - return None + Args: + ins: List of tensors to be fed to the Keras function. + batch_size: Integer batch size or `None` if not defined. + steps: Total number of steps (batches of samples) before declaring + `_predict_loop` finished. Ignored with the default value of `None`. + steps_name: The public API's parameter name for `steps`. + + Raises: + ValueError: when `steps` is `None` and the attribute `ins.shape` + does not exist. Also raises ValueError when `steps` is not `None` + and `batch_size` is not `None` because they are mutually + exclusive. + + Returns: + When steps is `None`, returns the number of samples to be + processed based on the size of the first dimension of the + first input numpy array. When steps is not `None` and + `batch_size` is `None`, returns `None`. + """ + if steps is not None and batch_size is not None: + raise ValueError( + "If " + steps_name + " is set, the `batch_size` must be None." + ) + if check_steps_argument(ins, steps, steps_name): + return None - if hasattr(ins[0], 'shape'): - return int(ins[0].shape[0]) - return None # Edge case where ins == [static_learning_phase] + if hasattr(ins[0], "shape"): + return int(ins[0].shape[0]) + return None # Edge case where ins == [static_learning_phase] def standardize_single_array(x, expected_shape=None): - """Expand data of shape (x,) to (x, 1), unless len(expected_shape)==1.""" - if x is None: - return None + """Expand data of shape (x,) to (x, 1), unless len(expected_shape)==1.""" + if x is None: + return None - if is_composite_or_composite_value(x): + if is_composite_or_composite_value(x): + return x + + if isinstance(x, int): + raise ValueError( + f"Expected an array data type but received an integer: {x}" + ) + + if ( + x.shape is not None + and len(x.shape) == 1 + and (expected_shape is None or len(expected_shape) != 1) + ): + if tf.is_tensor(x): + x = tf.compat.v1.expand_dims(x, axis=1) + else: + x = np.expand_dims(x, 1) return x - if isinstance(x, int): - raise ValueError( - 'Expected an array data type but received an integer: {}'.format(x)) - if (x.shape is not None and len(x.shape) == 1 and - (expected_shape is None or len(expected_shape) != 1)): - if tf.is_tensor(x): - x = tf.compat.v1.expand_dims(x, axis=1) +def get_composite_shape(tensor): + """Returns the shape of the passed composite tensor.""" + if isinstance(tensor, tf.compat.v1.SparseTensorValue): + # SparseTensorValues use a 'dense_shape' attribute + return tensor.dense_shape else: - x = np.expand_dims(x, 1) - return x + return tensor.shape -def get_composite_shape(tensor): - """Returns the shape of the passed composite tensor.""" - if isinstance(tensor, tf.compat.v1.SparseTensorValue): - # SparseTensorValues use a 'dense_shape' attribute - return tensor.dense_shape - else: - return tensor.shape - - -def standardize_input_data(data, - names, - shapes=None, - check_batch_axis=True, - exception_prefix=''): - """Normalizes inputs and targets provided by users. - - Users may pass data as a list of arrays, dictionary of arrays, - or as a single array. We normalize this to an ordered list of - arrays (same order as `names`), while checking that the provided - arrays have shapes that match the network's expectations. - - Args: - data: User-provided input data (polymorphic). - names: List of expected array names. - shapes: Optional list of expected array shapes. - check_batch_axis: Boolean; whether to check that the batch axis of the - arrays matches the expected value found in `shapes`. - exception_prefix: String prefix used for exception formatting. - - Returns: - List of standardized input arrays (one array per model input). - - Raises: - ValueError: in case of improperly formatted user-provided data. - """ - try: - data_len = len(data) - except TypeError: - # For instance if data is `None` or a symbolic Tensor. - data_len = None - - if not names: - if data_len and not isinstance(data, dict): - raise ValueError( - 'Error when checking model ' + exception_prefix + ': ' - 'expected no data, but got:', data) - return [] - if data is None: - return [None for _ in range(len(names))] - - if isinstance(data, dict): +def standardize_input_data( + data, names, shapes=None, check_batch_axis=True, exception_prefix="" +): + """Normalizes inputs and targets provided by users. + + Users may pass data as a list of arrays, dictionary of arrays, + or as a single array. We normalize this to an ordered list of + arrays (same order as `names`), while checking that the provided + arrays have shapes that match the network's expectations. + + Args: + data: User-provided input data (polymorphic). + names: List of expected array names. + shapes: Optional list of expected array shapes. + check_batch_axis: Boolean; whether to check that the batch axis of the + arrays matches the expected value found in `shapes`. + exception_prefix: String prefix used for exception formatting. + + Returns: + List of standardized input arrays (one array per model input). + + Raises: + ValueError: in case of improperly formatted user-provided data. + """ try: - data = [ - data[x].values - if data[x].__class__.__name__ == 'DataFrame' else data[x] - for x in names - ] - except KeyError as e: - raise ValueError('No data provided for "' + e.args[0] + '". Need data ' - 'for each key in: ' + str(names)) - elif isinstance(data, (list, tuple)): - if isinstance(data[0], (list, tuple)): - data = [np.asarray(d) for d in data] - elif len(names) == 1 and isinstance(data[0], (float, int)): - data = [np.asarray(data)] - else: - data = [ - x.values if x.__class__.__name__ == 'DataFrame' else x for x in data - ] - else: - data = data.values if data.__class__.__name__ == 'DataFrame' else data - data = [data] - - if shapes is not None: - data = [ - standardize_single_array(x, shape) for (x, shape) in zip(data, shapes) - ] - else: - data = [standardize_single_array(x) for x in data] - - if len(data) != len(names): - if data and hasattr(data[0], 'shape'): - raise ValueError('Error when checking model ' + exception_prefix + - ': the list of Numpy arrays that you are passing to ' - 'your model is not the size the model expected. ' - 'Expected to see ' + str(len(names)) + ' array(s), ' + - 'for inputs ' + str(names) + ' but instead got the ' - 'following list of ' + str(len(data)) + ' arrays: ' + - str(data)[:200] + '...') - elif len(names) > 1: - raise ValueError('Error when checking model ' + exception_prefix + - ': you are passing a list as input to your model, ' - 'but the model expects a list of ' + str(len(names)) + - ' Numpy arrays instead. The list you passed was: ' + - str(data)[:200]) - elif len(data) == 1 and not hasattr(data[0], 'shape'): - raise TypeError('Error when checking model ' + exception_prefix + - ': data should be a Numpy array, or list/dict of ' - 'Numpy arrays. Found: ' + str(data)[:200] + '...') - elif len(names) == 1: - data = [np.asarray(data)] - - # Check shapes compatibility. - if shapes: - for i in range(len(names)): - if shapes[i] is not None: - if tf.is_tensor(data[i]): - tensorshape = data[i].shape - if not tensorshape: - continue - data_shape = tuple(tensorshape.as_list()) - elif is_composite_or_composite_value(data[i]): - tensorshape = get_composite_shape(data[i]) - data_shape = tuple(tensorshape.as_list()) + data_len = len(data) + except TypeError: + # For instance if data is `None` or a symbolic Tensor. + data_len = None + + if not names: + if data_len and not isinstance(data, dict): + raise ValueError( + "Error when checking model " + + exception_prefix + + ": expected no data, but got:", + data, + ) + return [] + if data is None: + return [None for _ in range(len(names))] + + if isinstance(data, dict): + try: + data = [ + data[x].values + if data[x].__class__.__name__ == "DataFrame" + else data[x] + for x in names + ] + except KeyError as e: + raise ValueError( + 'No data provided for "' + + e.args[0] + + '". Need data for each key in: ' + + str(names) + ) + elif isinstance(data, (list, tuple)): + if isinstance(data[0], (list, tuple)): + data = [np.asarray(d) for d in data] + elif len(names) == 1 and isinstance(data[0], (float, int)): + data = [np.asarray(data)] else: - data_shape = data[i].shape - - shape = shapes[i] - if len(data_shape) != len(shape): - raise ValueError('Error when checking ' + exception_prefix + - ': expected ' + names[i] + ' to have ' + - str(len(shape)) + ' dimensions, but got array ' - 'with shape ' + str(data_shape)) - if not check_batch_axis: - data_shape = data_shape[1:] - shape = shape[1:] - for dim, ref_dim in zip(data_shape, shape): - if ref_dim != dim and ref_dim is not None and dim is not None: - raise ValueError('Error when checking ' + exception_prefix + - ': expected ' + names[i] + ' to have shape ' + - str(shape) + ' but got array with shape ' + - str(data_shape)) - return data + data = [ + x.values if x.__class__.__name__ == "DataFrame" else x + for x in data + ] + else: + data = data.values if data.__class__.__name__ == "DataFrame" else data + data = [data] + + if shapes is not None: + data = [ + standardize_single_array(x, shape) + for (x, shape) in zip(data, shapes) + ] + else: + data = [standardize_single_array(x) for x in data] + + if len(data) != len(names): + if data and hasattr(data[0], "shape"): + raise ValueError( + "Error when checking model " + + exception_prefix + + ": the list of Numpy arrays that you are passing to " + "your model is not the size the model expected. " + "Expected to see " + + str(len(names)) + + " array(s), " + + "for inputs " + + str(names) + + " but instead got the following list of " + + str(len(data)) + + " arrays: " + + str(data)[:200] + + "..." + ) + elif len(names) > 1: + raise ValueError( + "Error when checking model " + + exception_prefix + + ": you are passing a list as input to your model, " + "but the model expects a list of " + + str(len(names)) + + " Numpy arrays instead. The list you passed was: " + + str(data)[:200] + ) + elif len(data) == 1 and not hasattr(data[0], "shape"): + raise TypeError( + "Error when checking model " + + exception_prefix + + ": data should be a Numpy array, or list/dict of " + "Numpy arrays. Found: " + str(data)[:200] + "..." + ) + elif len(names) == 1: + data = [np.asarray(data)] + + # Check shapes compatibility. + if shapes: + for i in range(len(names)): + if shapes[i] is not None: + if tf.is_tensor(data[i]): + tensorshape = data[i].shape + if not tensorshape: + continue + data_shape = tuple(tensorshape.as_list()) + elif is_composite_or_composite_value(data[i]): + tensorshape = get_composite_shape(data[i]) + data_shape = tuple(tensorshape.as_list()) + else: + data_shape = data[i].shape + + shape = shapes[i] + if len(data_shape) != len(shape): + raise ValueError( + "Error when checking " + + exception_prefix + + ": expected " + + names[i] + + " to have " + + str(len(shape)) + + " dimensions, but got array with shape " + + str(data_shape) + ) + if not check_batch_axis: + data_shape = data_shape[1:] + shape = shape[1:] + for dim, ref_dim in zip(data_shape, shape): + if ( + ref_dim != dim + and ref_dim is not None + and dim is not None + ): + raise ValueError( + "Error when checking " + + exception_prefix + + ": expected " + + names[i] + + " to have shape " + + str(shape) + + " but got array with shape " + + str(data_shape) + ) + return data def standardize_sample_or_class_weights(x_weight, output_names, weight_type): - """Maps `sample_weight` or `class_weight` to model outputs. - - Args: - x_weight: User-provided `sample_weight` or `class_weight` argument. - output_names: List of output names (strings) in the model. - weight_type: A string used purely for exception printing. - - Returns: - A list of `sample_weight` or `class_weight` where there are exactly - one element per model output. - - Raises: - ValueError: In case of invalid user-provided argument. - """ - if x_weight is None or (isinstance(x_weight, (list, tuple)) and - len(x_weight) == 0): # pylint: disable=g-explicit-length-test - return [None for _ in output_names] - if len(output_names) == 1: - if isinstance(x_weight, (list, tuple)) and len(x_weight) == 1: - return x_weight - if isinstance(x_weight, dict) and output_names[0] in x_weight: - return [x_weight[output_names[0]]] + """Maps `sample_weight` or `class_weight` to model outputs. + + Args: + x_weight: User-provided `sample_weight` or `class_weight` argument. + output_names: List of output names (strings) in the model. + weight_type: A string used purely for exception printing. + + Returns: + A list of `sample_weight` or `class_weight` where there are exactly + one element per model output. + + Raises: + ValueError: In case of invalid user-provided argument. + """ + if x_weight is None or ( + isinstance(x_weight, (list, tuple)) and len(x_weight) == 0 + ): + return [None for _ in output_names] + if len(output_names) == 1: + if isinstance(x_weight, (list, tuple)) and len(x_weight) == 1: + return x_weight + if isinstance(x_weight, dict) and output_names[0] in x_weight: + return [x_weight[output_names[0]]] + else: + return [x_weight] + if isinstance(x_weight, (list, tuple)): + if len(x_weight) != len(output_names): + raise ValueError( + "Provided `" + + weight_type + + "` was a list of " + + str(len(x_weight)) + + " elements, but the model has " + + str(len(output_names)) + + " outputs. You should provide one `" + + weight_type + + "`array per model output." + ) + return x_weight + if isinstance(x_weight, collections.abc.Mapping): + generic_utils.check_for_unexpected_keys( + weight_type, x_weight, output_names + ) + x_weights = [] + for name in output_names: + x_weights.append(x_weight.get(name)) + return x_weights else: - return [x_weight] - if isinstance(x_weight, (list, tuple)): - if len(x_weight) != len(output_names): - raise ValueError('Provided `' + weight_type + '` was a list of ' + - str(len(x_weight)) + ' elements, but the model has ' + - str(len(output_names)) + ' outputs. ' - 'You should provide one `' + weight_type + '`' - 'array per model output.') - return x_weight - if isinstance(x_weight, collections.abc.Mapping): - generic_utils.check_for_unexpected_keys(weight_type, x_weight, output_names) - x_weights = [] - for name in output_names: - x_weights.append(x_weight.get(name)) - return x_weights - else: - raise TypeError('The model has multiple outputs, so `' + weight_type + '` ' - 'should be either a list or a dict. ' - 'Provided `' + weight_type + '` type not understood: ' + - str(x_weight)) + raise TypeError( + "The model has multiple outputs, so `" + + weight_type + + "` should be either a list or a dict. Provided `" + + weight_type + + "` type not understood: " + + str(x_weight) + ) def standardize_class_weights(class_weight, output_names): - return standardize_sample_or_class_weights(class_weight, output_names, - 'class_weight') + return standardize_sample_or_class_weights( + class_weight, output_names, "class_weight" + ) def standardize_sample_weights(sample_weight, output_names): - return standardize_sample_or_class_weights(sample_weight, output_names, - 'sample_weight') + return standardize_sample_or_class_weights( + sample_weight, output_names, "sample_weight" + ) def check_array_lengths(inputs, targets, weights=None): - """Does user input validation for numpy arrays. + """Does user input validation for numpy arrays. - Args: - inputs: list of Numpy arrays of inputs. - targets: list of Numpy arrays of targets. - weights: list of Numpy arrays of sample weights. + Args: + inputs: list of Numpy arrays of inputs. + targets: list of Numpy arrays of targets. + weights: list of Numpy arrays of sample weights. - Raises: - ValueError: in case of incorrectly formatted data. - """ + Raises: + ValueError: in case of incorrectly formatted data. + """ - def is_tensor_or_composite_tensor(x): - return tf.is_tensor(x) or is_composite_or_composite_value(x) + def is_tensor_or_composite_tensor(x): + return tf.is_tensor(x) or is_composite_or_composite_value(x) - def set_of_lengths(x): - # Returns a set with the variation between - # different shapes, with None => 0 - if x is None: - return {} - else: - return set([ - y.shape[0] - for y in x - if y is not None and not is_tensor_or_composite_tensor(y) - ]) - - set_x = set_of_lengths(inputs) - set_y = set_of_lengths(targets) - set_w = set_of_lengths(weights) - if len(set_x) > 1: - raise ValueError('All input arrays (x) should have ' - 'the same number of samples. Got array shapes: ' + - str([x.shape for x in inputs])) - if len(set_y) > 1: - raise ValueError('All target arrays (y) should have ' - 'the same number of samples. Got array shapes: ' + - str([y.shape for y in targets])) - if set_x and set_y and list(set_x)[0] != list(set_y)[0]: - raise ValueError('Input arrays should have ' - 'the same number of samples as target arrays. ' - 'Found ' + str(list(set_x)[0]) + ' input samples ' - 'and ' + str(list(set_y)[0]) + ' target samples.') - if len(set_w) > 1: - raise ValueError('All sample_weight arrays should have ' - 'the same number of samples. Got array shapes: ' + - str([w.shape for w in weights])) - if set_y and set_w and list(set_y)[0] != list(set_w)[0]: - raise ValueError('Sample_weight arrays should have ' - 'the same number of samples as target arrays. Got ' + - str(list(set_y)[0]) + ' input samples and ' + - str(list(set_w)[0]) + ' target samples.') + def set_of_lengths(x): + # Returns a set with the variation between + # different shapes, with None => 0 + if x is None: + return {} + else: + return set( + [ + y.shape[0] + for y in x + if y is not None and not is_tensor_or_composite_tensor(y) + ] + ) + + set_x = set_of_lengths(inputs) + set_y = set_of_lengths(targets) + set_w = set_of_lengths(weights) + if len(set_x) > 1: + raise ValueError( + "All input arrays (x) should have " + "the same number of samples. Got array shapes: " + + str([x.shape for x in inputs]) + ) + if len(set_y) > 1: + raise ValueError( + "All target arrays (y) should have " + "the same number of samples. Got array shapes: " + + str([y.shape for y in targets]) + ) + if set_x and set_y and list(set_x)[0] != list(set_y)[0]: + raise ValueError( + "Input arrays should have " + "the same number of samples as target arrays. " + "Found " + + str(list(set_x)[0]) + + " input samples and " + + str(list(set_y)[0]) + + " target samples." + ) + if len(set_w) > 1: + raise ValueError( + "All sample_weight arrays should have " + "the same number of samples. Got array shapes: " + + str([w.shape for w in weights]) + ) + if set_y and set_w and list(set_y)[0] != list(set_w)[0]: + raise ValueError( + "Sample_weight arrays should have " + "the same number of samples as target arrays. Got " + + str(list(set_y)[0]) + + " input samples and " + + str(list(set_w)[0]) + + " target samples." + ) def check_loss_and_target_compatibility(targets, loss_fns, output_shapes): - """Does validation on the compatibility of targets and loss functions. - - This helps prevent users from using loss functions incorrectly. This check - is purely for UX purposes. - - Args: - targets: list of Numpy arrays of targets. - loss_fns: list of loss functions. - output_shapes: list of shapes of model outputs. - - Raises: - ValueError: if a loss function or target array - is incompatible with an output. - """ - key_loss_fns = { - losses.mean_squared_error, losses.binary_crossentropy, - losses.categorical_crossentropy - } - key_loss_classes = (losses.MeanSquaredError, losses.BinaryCrossentropy, - losses.CategoricalCrossentropy) - for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None or tf.is_tensor(y): - continue - if losses.is_categorical_crossentropy(loss): - if y.shape[-1] == 1: - raise ValueError('You are passing a target array of shape ' + - str(y.shape) + - ' while using as loss `categorical_crossentropy`. ' - '`categorical_crossentropy` expects ' - 'targets to be binary matrices (1s and 0s) ' - 'of shape (samples, classes). ' - 'If your targets are integer classes, ' - 'you can convert them to the expected format via:\n' - '```\n' - 'from keras.utils import to_categorical\n' - 'y_binary = to_categorical(y_int)\n' - '```\n' - '\n' - 'Alternatively, you can use the loss function ' - '`sparse_categorical_crossentropy` instead, ' - 'which does expect integer targets.') - - is_loss_wrapper = isinstance(loss, losses.LossFunctionWrapper) - if (isinstance(loss, key_loss_classes) or (is_loss_wrapper and - (loss.fn in key_loss_fns))): - for target_dim, out_dim in zip(y.shape[1:], shape[1:]): - if out_dim is not None and target_dim != out_dim: - loss_name = loss.name - if loss_name is None: - loss_type = loss.fn if is_loss_wrapper else type(loss) - loss_name = loss_type.__name__ - raise ValueError('A target array with shape ' + str(y.shape) + - ' was passed for an output of shape ' + str(shape) + - ' while using as loss `' + loss_name + '`. ' - 'This loss expects targets to have the same shape ' - 'as the output.') - - -def collect_per_output_metric_info(metrics, - output_names, - output_shapes, - loss_fns, - from_serialized=False, - is_weighted=False): - """Maps metric names and functions to model outputs. - - Args: - metrics: a list or a list of lists or a dict of metric functions. - output_names: a list of the names (strings) of model outputs. - output_shapes: a list of the shapes (strings) of model outputs. - loss_fns: a list of the loss functions corresponding to the model outputs. - from_serialized: whether the model the metrics are being sourced from is - being initialized from a serialized format. - is_weighted: Boolean indicating whether the given metrics are weighted. - - Returns: - A list (one entry per model output) of dicts. - For instance, if the model has 2 outputs, and for the first output - we want to compute "binary_accuracy" and "binary_crossentropy", - and just "binary_accuracy" for the second output, - the list would look like: `[{ - 'acc': binary_accuracy(), - 'ce': binary_crossentropy(), - }, { - 'acc': binary_accuracy(), - }]` - - Raises: - TypeError: if an incorrect type is passed for the `metrics` argument. - """ - if not metrics: - return [{} for _ in output_names] - - if isinstance(metrics, list): - any_sub_list = any(isinstance(m, list) for m in metrics) - if any_sub_list: - if len(metrics) != len(output_names): - raise ValueError('When passing a list of lists as `metrics`, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(output_names)) + - ' outputs, but you passed metrics=' + str(metrics)) - # User has provided a list of len = len(outputs). - nested_metrics = [generic_utils.to_list(m) for m in metrics] - else: - # If it is a single list we then apply all metrics to all outputs. - if len(output_names) > 1: + """Does validation on the compatibility of targets and loss functions. + + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. + + Args: + targets: list of Numpy arrays of targets. + loss_fns: list of loss functions. + output_shapes: list of shapes of model outputs. + + Raises: + ValueError: if a loss function or target array + is incompatible with an output. + """ + key_loss_fns = { + losses.mean_squared_error, + losses.binary_crossentropy, + losses.categorical_crossentropy, + } + key_loss_classes = ( + losses.MeanSquaredError, + losses.BinaryCrossentropy, + losses.CategoricalCrossentropy, + ) + for y, loss, shape in zip(targets, loss_fns, output_shapes): + if y is None or loss is None or tf.is_tensor(y): + continue + if losses.is_categorical_crossentropy(loss): + if y.shape[-1] == 1: + raise ValueError( + "You are passing a target array of shape " + + str(y.shape) + + " while using as loss `categorical_crossentropy`. " + "`categorical_crossentropy` expects " + "targets to be binary matrices (1s and 0s) " + "of shape (samples, classes). " + "If your targets are integer classes, " + "you can convert them to the expected format via:\n" + "```\n" + "from keras.utils import to_categorical\n" + "y_binary = to_categorical(y_int)\n" + "```\n" + "\n" + "Alternatively, you can use the loss function " + "`sparse_categorical_crossentropy` instead, " + "which does expect integer targets." + ) + + is_loss_wrapper = isinstance(loss, losses.LossFunctionWrapper) + if isinstance(loss, key_loss_classes) or ( + is_loss_wrapper and (loss.fn in key_loss_fns) + ): + for target_dim, out_dim in zip(y.shape[1:], shape[1:]): + if out_dim is not None and target_dim != out_dim: + loss_name = loss.name + if loss_name is None: + loss_type = loss.fn if is_loss_wrapper else type(loss) + loss_name = loss_type.__name__ + raise ValueError( + "A target array with shape " + + str(y.shape) + + " was passed for an output of shape " + + str(shape) + + " while using as loss `" + + loss_name + + "`. " + "This loss expects targets to have the same shape " + "as the output." + ) + + +def collect_per_output_metric_info( + metrics, + output_names, + output_shapes, + loss_fns, + from_serialized=False, + is_weighted=False, +): + """Maps metric names and functions to model outputs. + + Args: + metrics: a list or a list of lists or a dict of metric functions. + output_names: a list of the names (strings) of model outputs. + output_shapes: a list of the shapes (strings) of model outputs. + loss_fns: a list of the loss functions corresponding to the model + outputs. + from_serialized: whether the model the metrics are being sourced from is + being initialized from a serialized format. + is_weighted: Boolean indicating whether the given metrics are weighted. + + Returns: + A list (one entry per model output) of dicts. + For instance, if the model has 2 outputs, and for the first output + we want to compute "binary_accuracy" and "binary_crossentropy", + and just "binary_accuracy" for the second output, + the list would look like: `[{ + 'acc': binary_accuracy(), + 'ce': binary_crossentropy(), + }, { + 'acc': binary_accuracy(), + }]` + + Raises: + TypeError: if an incorrect type is passed for the `metrics` argument. + """ + if not metrics: + return [{} for _ in output_names] + + if isinstance(metrics, list): + any_sub_list = any(isinstance(m, list) for m in metrics) + if any_sub_list: + if len(metrics) != len(output_names): + raise ValueError( + "When passing a list of lists as `metrics`, " + "it should have one entry per model output. " + "The model has " + + str(len(output_names)) + + " outputs, but you passed metrics=" + + str(metrics) + ) + # User has provided a list of len = len(outputs). + nested_metrics = [generic_utils.to_list(m) for m in metrics] + else: + # If it is a single list we then apply all metrics to all outputs. + if len(output_names) > 1: + nested_metrics = [] + for _ in output_names: + nested_metrics.append( + [metrics_module.clone_metric(m) for m in metrics] + ) + else: + nested_metrics = [metrics] + elif isinstance(metrics, collections.abc.Mapping): + generic_utils.check_for_unexpected_keys( + "metrics", metrics, output_names + ) nested_metrics = [] - for _ in output_names: - nested_metrics.append( - [metrics_module.clone_metric(m) for m in metrics]) - else: - nested_metrics = [metrics] - elif isinstance(metrics, collections.abc.Mapping): - generic_utils.check_for_unexpected_keys('metrics', metrics, output_names) - nested_metrics = [] - for name in output_names: - output_metrics = generic_utils.to_list(metrics.get(name, [])) - nested_metrics.append(output_metrics) - else: - raise TypeError('Type of `metrics` argument not understood. ' - 'Expected a list or dictionary, found: ' + str(metrics)) - - per_output_metrics = [] - for i, metrics in enumerate(nested_metrics): - metrics_dict = collections.OrderedDict() - for metric in metrics: - metric_name = get_metric_name(metric, is_weighted) - metric_fn = get_metric_function( - metric, output_shape=output_shapes[i], loss_fn=loss_fns[i]) - metric_fn._from_serialized = from_serialized # pylint: disable=protected-access - - # If the metric function is not stateful, we create a stateful version. - if not isinstance(metric_fn, metrics_module.Metric): - metric_fn = metrics_module.MeanMetricWrapper( - metric_fn, name=metric_name) - # If the metric is being revived from something stateless, such as a - # string (e.g. "accuracy"), we may need to later reapply transformations - # such as renaming. - metric_fn._from_serialized = False # pylint: disable=protected-access - metrics_dict[metric_name] = metric_fn - per_output_metrics.append(metrics_dict) - - return per_output_metrics + for name in output_names: + output_metrics = generic_utils.to_list(metrics.get(name, [])) + nested_metrics.append(output_metrics) + else: + raise TypeError( + "Type of `metrics` argument not understood. " + "Expected a list or dictionary, found: " + str(metrics) + ) + + per_output_metrics = [] + for i, metrics in enumerate(nested_metrics): + metrics_dict = collections.OrderedDict() + for metric in metrics: + metric_name = get_metric_name(metric, is_weighted) + metric_fn = get_metric_function( + metric, output_shape=output_shapes[i], loss_fn=loss_fns[i] + ) + metric_fn._from_serialized = from_serialized + + # If the metric function is not stateful, we create a stateful + # version. + if not isinstance(metric_fn, metrics_module.Metric): + metric_fn = metrics_module.MeanMetricWrapper( + metric_fn, name=metric_name + ) + # If the metric is being revived from something stateless, such + # as a string (e.g. "accuracy"), we may need to later reapply + # transformations such as renaming. + metric_fn._from_serialized = False + metrics_dict[metric_name] = metric_fn + per_output_metrics.append(metrics_dict) + + return per_output_metrics def batch_shuffle(index_array, batch_size): - """Shuffles an array in a batch-wise fashion. - - Useful for shuffling HDF5 arrays - (where one cannot access arbitrary indices). - - Args: - index_array: array of indices to be shuffled. - batch_size: integer. - - Returns: - The `index_array` array, shuffled in a batch-wise fashion. - """ - batch_count = int(len(index_array) / batch_size) - # to reshape we need to be cleanly divisible by batch size - # we stash extra items and reappend them after shuffling - last_batch = index_array[batch_count * batch_size:] - index_array = index_array[:batch_count * batch_size] - index_array = index_array.reshape((batch_count, batch_size)) - np.random.shuffle(index_array) - index_array = index_array.flatten() - return np.append(index_array, last_batch) - - -def standardize_weights(y, - sample_weight=None, - class_weight=None, - sample_weight_mode=None): - """Performs sample weight validation and standardization. - - Everything gets normalized to a single sample-wise (or timestep-wise) - weight array. If both `sample_weight` and `class_weight` are provided, - the weights are multiplied. - - Args: - y: Numpy array or Tensor of model targets to be weighted. - sample_weight: User-provided `sample_weight` argument. - class_weight: User-provided `class_weight` argument. - sample_weight_mode: One of `None` or `"temporal"`. `"temporal"` indicated - that we expect 2D weight data that will be applied to the last 2 - dimensions of the targets (i.e. we are weighting timesteps, not - samples). - - Returns: - A numpy array of target weights, one entry per sample to weight. - - Raises: - ValueError: In case of invalid user-provided arguments. - """ - # Iterator may return sample_weight as 1-tuple - if isinstance(sample_weight, tuple): - sample_weight = sample_weight[0] - if sample_weight_mode is not None and sample_weight_mode != 'samplewise': - if sample_weight_mode != 'temporal': - raise ValueError('"sample_weight_mode ' - 'should be None or "temporal". ' - 'Found: ' + str(sample_weight_mode)) - if len(y.shape) < 3: - raise ValueError('Found a sample_weight array for ' - 'an input with shape ' + str(y.shape) + '. ' - 'Timestep-wise sample weighting (use of ' - 'sample_weight_mode="temporal") is restricted to ' - 'outputs that are at least 3D, i.e. that have ' - 'a time dimension.') - if sample_weight is not None and len(sample_weight.shape) != 2: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weighting, ' - 'you should pass a 2D sample_weight array.') - else: - if sample_weight is not None and len(sample_weight.shape) != 1: - raise ValueError( - 'Found a sample_weight array with shape {}. In order to ' - 'use timestep-wise sample weights, you should specify ' - 'sample_weight_mode="temporal" in compile(); founssd "{}" ' - 'instead. If you just mean to use sample-wise weights, ' - 'make sure your sample_weight array is 1D.'.format( - sample_weight.shape, sample_weight_mode)) - - if sample_weight is not None: - if len(sample_weight.shape) > len(y.shape): - raise ValueError('Found a sample_weight with shape' + - str(sample_weight.shape) + '.' - 'Expected sample_weight with rank ' - 'less than or equal to ' + str(len(y.shape))) - - if (not tf.is_tensor(sample_weight) and - y.shape[:sample_weight.ndim] != sample_weight.shape): - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + ' for an input with shape ' + - str(y.shape) + '. ' - 'sample_weight cannot be broadcast.') - - # Class weights applied per-sample. - class_sample_weight = None - if isinstance(class_weight, dict): - if len(y.shape) > 2: - raise ValueError('`class_weight` not supported for ' - '3+ dimensional targets.') - - if tf.is_tensor(y): - # Few classes are expected, so densifying is reasonable. - keys = np.array(sorted(class_weight.keys())) - values = np.array([class_weight[i] for i in keys]) - weight_vector = np.zeros(np.max(keys) + 1) - weight_vector[:] = np.nan - weight_vector[keys] = values - - y_classes = tf.__internal__.smart_cond.smart_cond( - len(y.shape.as_list()) == 2 and backend.shape(y)[1] > 1, - lambda: backend.argmax(y, axis=1), - lambda: tf.cast(backend.reshape(y, (-1,)), tf.int64)) - class_sample_weight = tf.compat.v1.gather(weight_vector, y_classes) - tf.debugging.check_numerics( - class_sample_weight, - 'Invalid classes or class weights detected. NaN values indicate that ' - 'an appropriate class weight could not be determined.') - class_sample_weight = tf.cast(class_sample_weight, backend.floatx()) - if sample_weight is not None: - sample_weight = tf.cast( - tf.convert_to_tensor(sample_weight), - backend.floatx()) - else: - y_classes = y - if len(y.shape) == 2: - if y.shape[1] > 1: - y_classes = np.argmax(y, axis=1) - elif y.shape[1] == 1: - y_classes = np.reshape(y, y.shape[0]) - - class_sample_weight = np.asarray( - [class_weight[cls] for cls in y_classes if cls in class_weight]) - - if len(class_sample_weight) != len(y_classes): - # subtract the sets to pick all missing classes - existing_classes = set(y_classes) - existing_class_weight = set(class_weight.keys()) - raise ValueError( - '`class_weight` must contain all classes in the data.' - ' The classes %s exist in the data but not in ' - '`class_weight`.' % (existing_classes - existing_class_weight)) + """Shuffles an array in a batch-wise fashion. - if class_sample_weight is not None and sample_weight is not None: - # Multiply weights if both are provided. - return class_sample_weight * sample_weight - if sample_weight is not None: - return sample_weight - if class_sample_weight is not None: - return class_sample_weight - return None + Useful for shuffling HDF5 arrays + (where one cannot access arbitrary indices). + + Args: + index_array: array of indices to be shuffled. + batch_size: integer. + + Returns: + The `index_array` array, shuffled in a batch-wise fashion. + """ + batch_count = int(len(index_array) / batch_size) + # to reshape we need to be cleanly divisible by batch size + # we stash extra items and reappend them after shuffling + last_batch = index_array[batch_count * batch_size :] + index_array = index_array[: batch_count * batch_size] + index_array = index_array.reshape((batch_count, batch_size)) + np.random.shuffle(index_array) + index_array = index_array.flatten() + return np.append(index_array, last_batch) + + +def standardize_weights( + y, sample_weight=None, class_weight=None, sample_weight_mode=None +): + """Performs sample weight validation and standardization. + + Everything gets normalized to a single sample-wise (or timestep-wise) + weight array. If both `sample_weight` and `class_weight` are provided, + the weights are multiplied. + + Args: + y: Numpy array or Tensor of model targets to be weighted. + sample_weight: User-provided `sample_weight` argument. + class_weight: User-provided `class_weight` argument. + sample_weight_mode: One of `None` or `"temporal"`. `"temporal"` + indicated that we expect 2D weight data that will be applied to the + last 2 dimensions of the targets (i.e. we are weighting timesteps, not + samples). + + Returns: + A numpy array of target weights, one entry per sample to weight. + + Raises: + ValueError: In case of invalid user-provided arguments. + """ + # Iterator may return sample_weight as 1-tuple + if isinstance(sample_weight, tuple): + sample_weight = sample_weight[0] + if sample_weight_mode is not None and sample_weight_mode != "samplewise": + if sample_weight_mode != "temporal": + raise ValueError( + '"sample_weight_mode should be None or "temporal". Found: ' + + str(sample_weight_mode) + ) + if len(y.shape) < 3: + raise ValueError( + "Found a sample_weight array for an input with shape " + + str(y.shape) + + ". " + "Timestep-wise sample weighting (use of " + 'sample_weight_mode="temporal") is restricted to ' + "outputs that are at least 3D, i.e. that have " + "a time dimension." + ) + if sample_weight is not None and len(sample_weight.shape) != 2: + raise ValueError( + "Found a sample_weight array with shape " + + str(sample_weight.shape) + + ". " + "In order to use timestep-wise sample weighting, " + "you should pass a 2D sample_weight array." + ) + else: + if sample_weight is not None and len(sample_weight.shape) != 1: + raise ValueError( + "Found a sample_weight array with shape {}. In order to " + "use timestep-wise sample weights, you should specify " + 'sample_weight_mode="temporal" in compile(); founssd "{}" ' + "instead. If you just mean to use sample-wise weights, " + "make sure your sample_weight array is 1D.".format( + sample_weight.shape, sample_weight_mode + ) + ) + + if sample_weight is not None: + if len(sample_weight.shape) > len(y.shape): + raise ValueError( + "Found a sample_weight with shape" + + str(sample_weight.shape) + + ".Expected sample_weight with rank less than or equal to " + + str(len(y.shape)) + ) + + if ( + not tf.is_tensor(sample_weight) + and y.shape[: sample_weight.ndim] != sample_weight.shape + ): + raise ValueError( + "Found a sample_weight array with shape " + + str(sample_weight.shape) + + " for an input with shape " + + str(y.shape) + + ". sample_weight cannot be broadcast." + ) + + # Class weights applied per-sample. + class_sample_weight = None + if isinstance(class_weight, dict): + if len(y.shape) > 2: + raise ValueError( + "`class_weight` not supported for 3+ dimensional targets." + ) + + if tf.is_tensor(y): + # Few classes are expected, so densifying is reasonable. + keys = np.array(sorted(class_weight.keys())) + values = np.array([class_weight[i] for i in keys]) + weight_vector = np.zeros(np.max(keys) + 1) + weight_vector[:] = np.nan + weight_vector[keys] = values + + y_classes = tf.__internal__.smart_cond.smart_cond( + len(y.shape.as_list()) == 2 and backend.shape(y)[1] > 1, + lambda: backend.argmax(y, axis=1), + lambda: tf.cast(backend.reshape(y, (-1,)), tf.int64), + ) + class_sample_weight = tf.compat.v1.gather(weight_vector, y_classes) + tf.debugging.check_numerics( + class_sample_weight, + "Invalid classes or class weights detected. NaN values " + "indicate that an appropriate class weight could not be " + "determined.", + ) + class_sample_weight = tf.cast(class_sample_weight, backend.floatx()) + if sample_weight is not None: + sample_weight = tf.cast( + tf.convert_to_tensor(sample_weight), backend.floatx() + ) + else: + y_classes = y + if len(y.shape) == 2: + if y.shape[1] > 1: + y_classes = np.argmax(y, axis=1) + elif y.shape[1] == 1: + y_classes = np.reshape(y, y.shape[0]) + + class_sample_weight = np.asarray( + [class_weight[cls] for cls in y_classes if cls in class_weight] + ) + + if len(class_sample_weight) != len(y_classes): + # subtract the sets to pick all missing classes + existing_classes = set(y_classes) + existing_class_weight = set(class_weight.keys()) + raise ValueError( + "`class_weight` must contain all classes in the data." + " The classes %s exist in the data but not in " + "`class_weight`." + % (existing_classes - existing_class_weight) + ) + + if class_sample_weight is not None and sample_weight is not None: + # Multiply weights if both are provided. + return class_sample_weight * sample_weight + if sample_weight is not None: + return sample_weight + if class_sample_weight is not None: + return class_sample_weight + return None def has_symbolic_tensors(ls): - if tf.executing_eagerly(): - return False - return has_tensors(ls) + if tf.executing_eagerly(): + return False + return has_tensors(ls) def has_tensors(ls): - """Returns true if `ls` contains tensors.""" - # Note: at some point in time ragged tensors didn't count as tensors, so this - # returned false for ragged tensors. Making this return true fails some tests - # which would then require a steps_per_epoch argument. - if isinstance(ls, (list, tuple)): - return any( - tf.is_tensor(v) and - not isinstance(v, tf.RaggedTensor) for v in ls) - if isinstance(ls, dict): - return any( - tf.is_tensor(v) and - not isinstance(v, tf.RaggedTensor) - for _, v in ls.items()) - return tf.is_tensor(ls) and not isinstance( - ls, tf.RaggedTensor) + """Returns true if `ls` contains tensors.""" + # Note: at some point in time ragged tensors didn't count as tensors, so + # this returned false for ragged tensors. Making this return true fails some + # tests which would then require a steps_per_epoch argument. + if isinstance(ls, (list, tuple)): + return any( + tf.is_tensor(v) and not isinstance(v, tf.RaggedTensor) for v in ls + ) + if isinstance(ls, dict): + return any( + tf.is_tensor(v) and not isinstance(v, tf.RaggedTensor) + for _, v in ls.items() + ) + return tf.is_tensor(ls) and not isinstance(ls, tf.RaggedTensor) def get_metric_name(metric, weighted=False): - """Returns the name corresponding to the given metric input. - - Args: - metric: Metric function name or reference. - weighted: Boolean indicating if the given metric is weighted. - - Returns: - The metric name. - """ - if tf.__internal__.tf2.enabled(): - # We keep the string that the user has set in compile as the metric name. - if isinstance(metric, str): - return metric - - metric = metrics_module.get(metric) - return metric.name if hasattr(metric, 'name') else metric.__name__ - else: - metric_name_prefix = 'weighted_' if weighted else '' - if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): - if metric in ('accuracy', 'acc'): - suffix = 'acc' - elif metric in ('crossentropy', 'ce'): - suffix = 'ce' + """Returns the name corresponding to the given metric input. + + Args: + metric: Metric function name or reference. + weighted: Boolean indicating if the given metric is weighted. + + Returns: + The metric name. + """ + if tf.__internal__.tf2.enabled(): + # We keep the string that the user has set in compile as the metric + # name. + if isinstance(metric, str): + return metric + + metric = metrics_module.get(metric) + return metric.name if hasattr(metric, "name") else metric.__name__ else: - metric_fn = metrics_module.get(metric) - # Get metric name as string - if hasattr(metric_fn, 'name'): - suffix = metric_fn.name - else: - suffix = metric_fn.__name__ - metric_name = metric_name_prefix + suffix - return metric_name + metric_name_prefix = "weighted_" if weighted else "" + if metric in ("accuracy", "acc", "crossentropy", "ce"): + if metric in ("accuracy", "acc"): + suffix = "acc" + elif metric in ("crossentropy", "ce"): + suffix = "ce" + else: + metric_fn = metrics_module.get(metric) + # Get metric name as string + if hasattr(metric_fn, "name"): + suffix = metric_fn.name + else: + suffix = metric_fn.__name__ + metric_name = metric_name_prefix + suffix + return metric_name def get_metric_function(metric, output_shape=None, loss_fn=None): - """Returns the metric function corresponding to the given metric input. + """Returns the metric function corresponding to the given metric input. - Args: - metric: Metric function name or reference. - output_shape: The shape of the output that this metric will be calculated - for. - loss_fn: The loss function used. - - Returns: - The metric function. - """ - if metric not in ['accuracy', 'acc', 'crossentropy', 'ce']: - return metrics_module.get(metric) - - is_sparse_categorical_crossentropy = ( - isinstance(loss_fn, losses.SparseCategoricalCrossentropy) or - (isinstance(loss_fn, losses.LossFunctionWrapper) and - loss_fn.fn == losses.sparse_categorical_crossentropy)) - - is_binary_crossentropy = ( - isinstance(loss_fn, losses.BinaryCrossentropy) or - (isinstance(loss_fn, losses.LossFunctionWrapper) and - loss_fn.fn == losses.binary_crossentropy)) - - if metric in ['accuracy', 'acc']: - if output_shape[-1] == 1 or is_binary_crossentropy: - return metrics_module.binary_accuracy - elif is_sparse_categorical_crossentropy: - return metrics_module.sparse_categorical_accuracy - # If the output_shape[-1] is not 1, then we know output is `categorical`. - # We assume it is sparse categorical only if loss is explicitly given - # as sparse categorical crossentropy loss. - return metrics_module.categorical_accuracy - else: - if output_shape[-1] == 1 or is_binary_crossentropy: - return metrics_module.binary_crossentropy - elif is_sparse_categorical_crossentropy: - return metrics_module.sparse_categorical_crossentropy - return metrics_module.categorical_crossentropy - - -def call_metric_function(metric_fn, - y_true, - y_pred=None, - weights=None, - mask=None): - """Invokes metric function and returns the metric result tensor.""" - if mask is not None: - mask = tf.cast(mask, y_pred.dtype) - if weights is None: - # Use mask as sample weight. - weights = mask + Args: + metric: Metric function name or reference. + output_shape: The shape of the output that this metric will be + calculated for. + loss_fn: The loss function used. + + Returns: + The metric function. + """ + if metric not in ["accuracy", "acc", "crossentropy", "ce"]: + return metrics_module.get(metric) + + is_sparse_categorical_crossentropy = isinstance( + loss_fn, losses.SparseCategoricalCrossentropy + ) or ( + isinstance(loss_fn, losses.LossFunctionWrapper) + and loss_fn.fn == losses.sparse_categorical_crossentropy + ) + + is_binary_crossentropy = isinstance(loss_fn, losses.BinaryCrossentropy) or ( + isinstance(loss_fn, losses.LossFunctionWrapper) + and loss_fn.fn == losses.binary_crossentropy + ) + + if metric in ["accuracy", "acc"]: + if output_shape[-1] == 1 or is_binary_crossentropy: + return metrics_module.binary_accuracy + elif is_sparse_categorical_crossentropy: + return metrics_module.sparse_categorical_accuracy + # If the output_shape[-1] is not 1, then we know output is + # `categorical`. We assume it is sparse categorical only if loss is + # explicitly given as sparse categorical crossentropy loss. + return metrics_module.categorical_accuracy else: - # Update dimensions of weights to match with mask. - weights = tf.cast(weights, dtype=y_pred.dtype) - mask, _, weights = losses_utils.squeeze_or_expand_dimensions( - mask, sample_weight=weights) - weights *= mask + if output_shape[-1] == 1 or is_binary_crossentropy: + return metrics_module.binary_crossentropy + elif is_sparse_categorical_crossentropy: + return metrics_module.sparse_categorical_crossentropy + return metrics_module.categorical_crossentropy + + +def call_metric_function( + metric_fn, y_true, y_pred=None, weights=None, mask=None +): + """Invokes metric function and returns the metric result tensor.""" + if mask is not None: + mask = tf.cast(mask, y_pred.dtype) + if weights is None: + # Use mask as sample weight. + weights = mask + else: + # Update dimensions of weights to match with mask. + weights = tf.cast(weights, dtype=y_pred.dtype) + mask, _, weights = losses_utils.squeeze_or_expand_dimensions( + mask, sample_weight=weights + ) + weights *= mask - if y_pred is not None: - return metric_fn(y_true, y_pred, sample_weight=weights) - # `Mean` metric only takes a single value. - return metric_fn(y_true, sample_weight=weights) + if y_pred is not None: + return metric_fn(y_true, y_pred, sample_weight=weights) + # `Mean` metric only takes a single value. + return metric_fn(y_true, sample_weight=weights) def get_loss_function(loss): - """Returns the loss corresponding to the loss input in `compile` API.""" - if loss is None or isinstance(loss, losses.Loss): - return loss + """Returns the loss corresponding to the loss input in `compile` API.""" + if loss is None or isinstance(loss, losses.Loss): + return loss - if tf_inspect.isclass(loss) and issubclass(loss, losses.Loss): - # It is not safe to assume that the loss takes no constructor arguments. - raise ValueError( - 'Received uninstantiated Loss class: {}\nPlease call loss ""classes ' - 'before passing them to Model.compile.'.format(loss)) - - # Deserialize loss configuration, if needed. - if isinstance(loss, collections.abc.Mapping): - loss = losses.get(loss) + if tf_inspect.isclass(loss) and issubclass(loss, losses.Loss): + # It is not safe to assume that the loss takes no constructor arguments. + raise ValueError( + "Received uninstantiated Loss class: {}\n" + "Please call loss classes " + "before passing them to Model.compile.".format(loss) + ) + + # Deserialize loss configuration, if needed. + if isinstance(loss, collections.abc.Mapping): + loss = losses.get(loss) + + # Custom callable class. + if callable(loss) and not hasattr(loss, "__name__"): + return loss + + # Wrap loss function with signature `(y_true, y_pred, **kwargs)` + # in `LossFunctionWrapper` class. + loss_fn = losses.get(loss) + + # For losses which are given as strings/functions in the compile API, + # we always set the loss reduction type to be `SUM_OVER_BATCH_SIZE` + # (both in distribution strategy context and otherwise). + return losses.LossFunctionWrapper( + loss_fn, + name=loss_fn.__name__, + reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + ) - # Custom callable class. - if callable(loss) and not hasattr(loss, '__name__'): - return loss - # Wrap loss function with signature `(y_true, y_pred, **kwargs)` - # in `LossFunctionWrapper` class. - loss_fn = losses.get(loss) +def validate_dataset_input(x, y, sample_weight, validation_split=None): + """Validates user input arguments when a dataset iterator is passed. - # For losses which are given as strings/functions in the compile API, - # we always set the loss reduction type to be `SUM_OVER_BATCH_SIZE` - # (both in distribution strategy context and otherwise). - return losses.LossFunctionWrapper( - loss_fn, - name=loss_fn.__name__, - reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE) + Args: + x: Input data. A `tf.data` dataset or iterator. + y: Target data. It could be either Numpy array(s) or TensorFlow tensor(s). + Expected to be `None` when `x` is a dataset iterator. + sample_weight: An optional sample-weight array passed by the user to + weight the importance of each sample in `x`. Expected to be `None` when + `x` is a dataset iterator + validation_split: Float between 0 and 1. Fraction of the training data to + be used as validation data. Expected to be `None` when `x` is a dataset + iterator. + + Raises: + ValueError: if argument `y` or `sample_weight` or `validation_split` are + provided by user. + """ + if y is not None: + raise ValueError( + "You passed a dataset or dataset iterator (%s) as " + "input `x` to your model. In that case, you should " + "not specify a target (`y`) argument, since the dataset " + "or dataset iterator generates both input data and " + "target data. " + "Received: %s" % (x, y) + ) + if sample_weight is not None: + raise ValueError( + "`sample_weight` argument is not supported when input " + "`x` is a dataset or a dataset iterator. Instead, you" + "can provide sample_weight as the third element of your" + "dataset, i.e. (inputs, targets, sample_weight). " + "Received: x=%s, sample_weight=%s" % (x, sample_weight) + ) + if validation_split is not None and validation_split != 0.0: + raise ValueError( + "`validation_split` argument is not supported when " + "input `x` is a dataset or a dataset iterator. " + "Received: x=%s, validation_split=%f" % (x, validation_split) + ) + + +def validate_input_types(inp, orig_inp, allow_dict=True, field_name="inputs"): + """Helper function to validate either inputs or targets.""" + if isinstance(inp, (list, tuple)): + if not all(isinstance(v, np.ndarray) or tf.is_tensor(v) for v in inp): + raise ValueError( + "Please provide as model inputs either a single array or a " + f"list of arrays. You passed: {field_name}={str(orig_inp)}" + ) + elif isinstance(inp, dict): + if not allow_dict: + raise ValueError( + f"You cannot pass a dictionary as model {field_name}." + ) + elif not isinstance(inp, np.ndarray) and not tf.is_tensor(inp): + raise ValueError( + "Please provide as model inputs either a single array or a list of " + "arrays. You passed: {}={}".format(field_name, orig_inp) + ) -def validate_dataset_input(x, y, sample_weight, validation_split=None): - """Validates user input arguments when a dataset iterator is passed. - - Args: - x: Input data. A `tf.data` dataset or iterator. - y: Target data. It could be either Numpy array(s) or TensorFlow tensor(s). - Expected to be `None` when `x` is a dataset iterator. - sample_weight: An optional sample-weight array passed by the user to weight - the importance of each sample in `x`. Expected to be `None` when `x` is a - dataset iterator - validation_split: Float between 0 and 1. Fraction of the training data to be - used as validation data. Expected to be `None` when `x` is a dataset - iterator. - - Raises: - ValueError: if argument `y` or `sample_weight` or `validation_split` are - provided by user. - """ - if y is not None: - raise ValueError('You passed a dataset or dataset iterator (%s) as ' - 'input `x` to your model. In that case, you should ' - 'not specify a target (`y`) argument, since the dataset ' - 'or dataset iterator generates both input data and ' - 'target data. ' - 'Received: %s' % (x, y)) - if sample_weight is not None: - raise ValueError('`sample_weight` argument is not supported when input ' - '`x` is a dataset or a dataset iterator. Instead, you' - 'can provide sample_weight as the third element of your' - 'dataset, i.e. (inputs, targets, sample_weight). ' - 'Received: x=%s, sample_weight=%s' % (x, sample_weight)) - if validation_split is not None and validation_split != 0.0: - raise ValueError( - '`validation_split` argument is not supported when ' - 'input `x` is a dataset or a dataset iterator. ' - 'Received: x=%s, validation_split=%f' % (x, validation_split)) - - -def validate_input_types(inp, orig_inp, allow_dict=True, field_name='inputs'): - """Helper function to validate either inputs or targets.""" - if isinstance(inp, (list, tuple)): - if not all(isinstance(v, np.ndarray) or - tf.is_tensor(v) for v in inp): - raise ValueError( - 'Please provide as model inputs either a single array or a list of ' - 'arrays. You passed: {}={}'.format(field_name, str(orig_inp))) - elif isinstance(inp, dict): - if not allow_dict: - raise ValueError( - 'You cannot pass a dictionary as model {}.'.format(field_name)) - elif not isinstance(inp, np.ndarray) and not tf.is_tensor(inp): - raise ValueError( - 'Please provide as model inputs either a single array or a list of ' - 'arrays. You passed: {}={}'.format(field_name, orig_inp)) - - -def check_generator_arguments(y=None, sample_weight=None, - validation_split=None): - """Validates arguments passed when using a generator.""" - if y is not None: - raise ValueError('`y` argument is not supported when data is' - 'a generator or Sequence instance. Instead pass targets' - ' as the second element of the generator.') - if sample_weight is not None: - raise ValueError('`sample_weight` argument is not supported when data is' - 'a generator or Sequence instance. Instead pass sample' - ' weights as the third element of the generator.') - if validation_split: - raise ValueError('If your data is in the form of a Python generator, ' - 'you cannot use `validation_split`.') +def check_generator_arguments( + y=None, sample_weight=None, validation_split=None +): + """Validates arguments passed when using a generator.""" + if y is not None: + raise ValueError( + "`y` argument is not supported when data is" + "a generator or Sequence instance. Instead pass targets" + " as the second element of the generator." + ) + if sample_weight is not None: + raise ValueError( + "`sample_weight` argument is not supported when data is" + "a generator or Sequence instance. Instead pass sample" + " weights as the third element of the generator." + ) + if validation_split: + raise ValueError( + "If your data is in the form of a Python generator, " + "you cannot use `validation_split`." + ) def check_steps_argument(input_data, steps, steps_name): - """Validates `steps` argument based on input data's type. - - The cases when `steps` value must be provided are when - 1. input data passed is an iterator. - 2. model was built on top of symbolic tensors, input data is not - required and is `None`. - 3. input data passed is a symbolic tensor. - - Args: - input_data: Input data. Can be Numpy array(s) or TensorFlow tensor(s) or - tf.data.Dataset iterator or `None`. - steps: Integer or `None`. Total number of steps (batches of samples) to - execute. - steps_name: The public API's parameter name for `steps`. - - Returns: - boolean, True if `steps` argument is required, else False. - - Raises: - ValueError: if `steps` argument is required for given input data type - but not provided. - """ - is_x_iterator = isinstance( - input_data, (tf.compat.v1.data.Iterator, tf.data.Iterator)) - if (input_data is None or is_x_iterator or has_symbolic_tensors(input_data) or - (isinstance(input_data, list) and not input_data)): - if steps is None: - input_type_str = 'a Dataset iterator' if is_x_iterator else 'data tensors' - raise ValueError('When using {input_type} as input to a model, you should' - ' specify the `{steps_name}` argument.'.format( - input_type=input_type_str, steps_name=steps_name)) - return True - - if isinstance(input_data, (tf.compat.v1.data.Dataset, tf.data.Dataset)): - return True - - if steps is not None: - list_types = (np.ndarray, list, tuple) - if (isinstance(input_data, list_types) or - (isinstance(input_data, dict) and - any(isinstance(v, list_types) for v in input_data.values()))): - logging.warning('When passing input data as arrays, do not specify ' - '`steps_per_epoch`/`steps` argument. ' - 'Please use `batch_size` instead.') - return False + """Validates `steps` argument based on input data's type. + + The cases when `steps` value must be provided are when + 1. input data passed is an iterator. + 2. model was built on top of symbolic tensors, input data is not + required and is `None`. + 3. input data passed is a symbolic tensor. + + Args: + input_data: Input data. Can be Numpy array(s) or TensorFlow tensor(s) or + tf.data.Dataset iterator or `None`. + steps: Integer or `None`. Total number of steps (batches of samples) to + execute. + steps_name: The public API's parameter name for `steps`. + + Returns: + boolean, True if `steps` argument is required, else False. + + Raises: + ValueError: if `steps` argument is required for given input data type + but not provided. + """ + is_x_iterator = isinstance( + input_data, (tf.compat.v1.data.Iterator, tf.data.Iterator) + ) + if ( + input_data is None + or is_x_iterator + or has_symbolic_tensors(input_data) + or (isinstance(input_data, list) and not input_data) + ): + if steps is None: + input_type_str = ( + "a Dataset iterator" if is_x_iterator else "data tensors" + ) + raise ValueError( + "When using {input_type} as input to a model, you should" + " specify the `{steps_name}` argument.".format( + input_type=input_type_str, steps_name=steps_name + ) + ) + return True + + if isinstance(input_data, (tf.compat.v1.data.Dataset, tf.data.Dataset)): + return True + + if steps is not None: + list_types = (np.ndarray, list, tuple) + if isinstance(input_data, list_types) or ( + isinstance(input_data, dict) + and any(isinstance(v, list_types) for v in input_data.values()) + ): + logging.warning( + "When passing input data as arrays, do not specify " + "`steps_per_epoch`/`steps` argument. " + "Please use `batch_size` instead." + ) + return False def cast_single_tensor(x, dtype=None): - if isinstance(x, np.ndarray): - x = tf.convert_to_tensor(x) - dtype = dtype or backend.floatx() - if x.dtype.is_floating: - return tf.cast(x, dtype=dtype) - return x + if isinstance(x, np.ndarray): + x = tf.convert_to_tensor(x) + dtype = dtype or backend.floatx() + if x.dtype.is_floating: + return tf.cast(x, dtype=dtype) + return x def cast_if_floating_dtype_and_mismatch(targets, outputs): - """Returns target data tensors using correct datatype. - - Checks that each target and output pair are the same datatype. If not, casts - the target to the output's datatype. - - Args: - targets: tensor or list of targets. - outputs: tensor or list of outputs. - - Returns: - Targets in appropriate datatype. - """ - if tf.is_tensor(targets): - # There is one target, so output[0] should be the only output. - return cast_single_tensor(targets, dtype=outputs[0].dtype) - new_targets = [] - for target, out in zip(targets, outputs): - if isinstance(target, np.ndarray): - target = tf.convert_to_tensor(target) - if target.dtype != out.dtype: - new_targets.append(cast_single_tensor(target, dtype=out.dtype)) - else: - new_targets.append(target) - return new_targets + """Returns target data tensors using correct datatype. + + Checks that each target and output pair are the same datatype. If not, casts + the target to the output's datatype. + + Args: + targets: tensor or list of targets. + outputs: tensor or list of outputs. + + Returns: + Targets in appropriate datatype. + """ + if tf.is_tensor(targets): + # There is one target, so output[0] should be the only output. + return cast_single_tensor(targets, dtype=outputs[0].dtype) + new_targets = [] + for target, out in zip(targets, outputs): + if isinstance(target, np.ndarray): + target = tf.convert_to_tensor(target) + if target.dtype != out.dtype: + new_targets.append(cast_single_tensor(target, dtype=out.dtype)) + else: + new_targets.append(target) + return new_targets def cast_if_floating_dtype(x, dtype=None): - """Casts the given data tensors to the default floating point type. + """Casts the given data tensors to the default floating point type. - Casts only if the input is already a floating point type. - Args: - x: tensor or list/tuple of tensors. - dtype: The dtype to which Tensors should be cast. + Casts only if the input is already a floating point type. + Args: + x: tensor or list/tuple of tensors. + dtype: The dtype to which Tensors should be cast. - Returns: - Converted input. - """ - return tf.nest.map_structure(functools.partial(cast_single_tensor, dtype=dtype), - x) + Returns: + Converted input. + """ + return tf.nest.map_structure( + functools.partial(cast_single_tensor, dtype=dtype), x + ) def cast_to_model_input_dtypes(x, model): - """Casts the given data tensors to the dtypes of the model inputs. + """Casts the given data tensors to the dtypes of the model inputs. - Args: - x: tensor or list/tuple of tensors. - model: The model. + Args: + x: tensor or list/tuple of tensors. + model: The model. - Returns: - Converted input. Each tensor is casted to the corresponding input in - `model.inputs`. - """ - input_dtypes = tf.nest.map_structure(lambda t: t.dtype, model.inputs) - return tf.nest.map_structure(tf.cast, x, input_dtypes) + Returns: + Converted input. Each tensor is casted to the corresponding input in + `model.inputs`. + """ + input_dtypes = tf.nest.map_structure(lambda t: t.dtype, model.inputs) + return tf.nest.map_structure(tf.cast, x, input_dtypes) def prepare_sample_weight_modes(training_endpoints, sample_weight_mode): - """Prepares sample weight modes for the model. - - Args: - training_endpoints: List of model _TrainingEndpoints. - sample_weight_mode: sample weight mode user input passed from compile API. - - Raises: - ValueError: In case of invalid `sample_weight_mode` input. - """ - - if isinstance(sample_weight_mode, collections.abc.Mapping): - generic_utils.check_for_unexpected_keys( - 'sample_weight_mode', sample_weight_mode, - [e.output_name for e in training_endpoints]) - - for end_point in training_endpoints: - if not end_point.should_skip_target_weights(): - if end_point.output_name not in sample_weight_mode: - raise ValueError('Output ' + end_point.output_name + - 'missing from `_sample_weight_modes` dictionary') - else: - end_point.sample_weight_mode = sample_weight_mode.get( - end_point.output_name) - elif isinstance(sample_weight_mode, (list, tuple)): - if len(sample_weight_mode) != len(training_endpoints): - raise ValueError('When passing a list as sample_weight_mode, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(training_endpoints)) + - ' outputs, but you passed ' + - str(len(sample_weight_mode)) + '_sample_weight_modes.') - for mode, endpoint in zip(sample_weight_mode, training_endpoints): - if not endpoint.should_skip_target_weights(): - endpoint.sample_weight_mode = mode - else: - for endpoint in training_endpoints: - if not endpoint.should_skip_target_weights(): - endpoint.sample_weight_mode = sample_weight_mode + """Prepares sample weight modes for the model. + + Args: + training_endpoints: List of model _TrainingEndpoints. + sample_weight_mode: sample weight mode user input passed from compile API. + + Raises: + ValueError: In case of invalid `sample_weight_mode` input. + """ + + if isinstance(sample_weight_mode, collections.abc.Mapping): + generic_utils.check_for_unexpected_keys( + "sample_weight_mode", + sample_weight_mode, + [e.output_name for e in training_endpoints], + ) + + for end_point in training_endpoints: + if not end_point.should_skip_target_weights(): + if end_point.output_name not in sample_weight_mode: + raise ValueError( + "Output " + + end_point.output_name + + "missing from `_sample_weight_modes` dictionary" + ) + else: + end_point.sample_weight_mode = sample_weight_mode.get( + end_point.output_name + ) + elif isinstance(sample_weight_mode, (list, tuple)): + if len(sample_weight_mode) != len(training_endpoints): + raise ValueError( + "When passing a list as sample_weight_mode, " + "it should have one entry per model output. " + "The model has " + + str(len(training_endpoints)) + + " outputs, but you passed " + + str(len(sample_weight_mode)) + + "_sample_weight_modes." + ) + for mode, endpoint in zip(sample_weight_mode, training_endpoints): + if not endpoint.should_skip_target_weights(): + endpoint.sample_weight_mode = mode + else: + for endpoint in training_endpoints: + if not endpoint.should_skip_target_weights(): + endpoint.sample_weight_mode = sample_weight_mode def prepare_loss_functions(loss, output_names): - """Converts loss to a list of loss functions. - - Args: - loss: String (name of objective function), objective function or - `tf.losses.Loss` instance. See `tf.losses`. If the model has multiple - outputs, you can use a different loss on each output by passing a - dictionary or a list of losses. The loss value that will be minimized by - the model will then be the sum of all individual losses. - output_names: List of model output names. - - Returns: - A list of loss objective functions. - - Raises: - ValueError: If loss is a dict with keys not in model output names, - or if loss is a list with len not equal to model outputs. - """ - if isinstance(loss, collections.abc.Mapping): - generic_utils.check_for_unexpected_keys('loss', loss, output_names) - loss_functions = [] - for name in output_names: - if name not in loss: - logging.warning( - 'Output {0} missing from loss dictionary. We assume ' - 'this was done on purpose. The fit and evaluate APIs will not be ' - 'expecting any data to be passed to {0}.'.format(name)) - loss_functions.append(get_loss_function(loss.get(name, None))) - elif isinstance(loss, str): - loss_functions = [get_loss_function(loss) for _ in output_names] - elif isinstance(loss, collections.abc.Sequence): - if len(loss) != len(output_names): - raise ValueError('When passing a list as loss, it should have one entry ' - 'per model outputs. The model has {} outputs, but you ' - 'passed loss={}'.format(len(output_names), loss)) - loss_functions = tf.nest.map_structure(get_loss_function, loss) - else: - loss_functions = [get_loss_function(loss) for _ in range(len(output_names))] - - return loss_functions + """Converts loss to a list of loss functions. + + Args: + loss: String (name of objective function), objective function or + `tf.keras.losses.Loss` instance. See `tf.keras.losses`. + If the model has multiple + outputs, you can use a different loss on each output by passing a + dictionary or a list of losses. The loss value that will be minimized + by the model will then be the sum of all individual losses. + output_names: List of model output names. + + Returns: + A list of loss objective functions. + + Raises: + ValueError: If loss is a dict with keys not in model output names, + or if loss is a list with len not equal to model outputs. + """ + if isinstance(loss, collections.abc.Mapping): + generic_utils.check_for_unexpected_keys("loss", loss, output_names) + loss_functions = [] + for name in output_names: + if name not in loss: + logging.warning( + "Output {0} missing from loss dictionary. We assume " + "this was done on purpose. The fit and evaluate APIs will " + f"not be expecting any data to be passed to {name}." + ) + loss_functions.append(get_loss_function(loss.get(name, None))) + elif isinstance(loss, str): + loss_functions = [get_loss_function(loss) for _ in output_names] + elif isinstance(loss, collections.abc.Sequence): + if len(loss) != len(output_names): + raise ValueError( + "When passing a list as loss, it should have one entry " + "per model outputs. The model has {} outputs, but you " + "passed loss={}".format(len(output_names), loss) + ) + loss_functions = tf.nest.map_structure(get_loss_function, loss) + else: + loss_functions = [ + get_loss_function(loss) for _ in range(len(output_names)) + ] + + return loss_functions def prepare_loss_weights(training_endpoints, loss_weights=None): - """Converts loss weights to a list of loss weights. + """Converts loss weights to a list of loss weights. - The result loss weights will be populated on the training endpoint. + The result loss weights will be populated on the training endpoint. - Args: - training_endpoints: List of model training endpoints. - loss_weights: Optional list or dictionary specifying scalar coefficients - (Python floats) to weight the loss contributions of different model - outputs. The loss value that will be minimized by the model will then be - the *weighted sum* of all individual losses, weighted by the + Args: + training_endpoints: List of model training endpoints. + loss_weights: Optional list or dictionary specifying scalar coefficients + (Python floats) to weight the loss contributions of different model + outputs. The loss value that will be minimized by the model will then + be the *weighted sum* of all individual losses, weighted by the `loss_weights` coefficients. If a list, it is expected to have a 1:1 - mapping to the model's outputs. If a dict, it is expected to map - output names (strings) to scalar coefficients. - - Raises: - ValueError: If loss weight is a dict with key not in model output names, - or if loss is a list with len not equal to model outputs. - """ - if loss_weights is None: - for e in training_endpoints: - e.loss_weight = 1. - elif isinstance(loss_weights, collections.abc.Mapping): - generic_utils.check_for_unexpected_keys( - 'loss_weights', loss_weights, - [e.output_name for e in training_endpoints]) - for e in training_endpoints: - e.loss_weight = loss_weights.get(e.output_name, 1.) - elif isinstance(loss_weights, list): - if len(loss_weights) != len(training_endpoints): - raise ValueError('When passing a list as loss_weights, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(training_endpoints)) + - ' outputs, but you passed loss_weights=' + - str(loss_weights)) - for w, e in zip(loss_weights, training_endpoints): - e.loss_weight = w - else: - raise TypeError('Could not interpret loss_weights argument: ' + - str(loss_weights) + ' - expected a list of dicts.') + mapping to the model's outputs. If a dict, it is expected to map + output names (strings) to scalar coefficients. + + Raises: + ValueError: If loss weight is a dict with key not in model output names, + or if loss is a list with len not equal to model outputs. + """ + if loss_weights is None: + for e in training_endpoints: + e.loss_weight = 1.0 + elif isinstance(loss_weights, collections.abc.Mapping): + generic_utils.check_for_unexpected_keys( + "loss_weights", + loss_weights, + [e.output_name for e in training_endpoints], + ) + for e in training_endpoints: + e.loss_weight = loss_weights.get(e.output_name, 1.0) + elif isinstance(loss_weights, list): + if len(loss_weights) != len(training_endpoints): + raise ValueError( + "When passing a list as loss_weights, " + "it should have one entry per model output. " + "The model has " + + str(len(training_endpoints)) + + " outputs, but you passed loss_weights=" + + str(loss_weights) + ) + for w, e in zip(loss_weights, training_endpoints): + e.loss_weight = w + else: + raise TypeError( + "Could not interpret loss_weights argument: " + + str(loss_weights) + + " - expected a list of dicts." + ) # TODO(rohanj): This is a hack to get around not depending on feature_column and # create a cyclical dependency. Figure out a cleaner solution def is_feature_layer(layer): - """Returns whether `layer` is a FeatureLayer or not.""" - return getattr(layer, '_is_feature_layer', False) + """Returns whether `layer` is a FeatureLayer or not.""" + return getattr(layer, "_is_feature_layer", False) def is_eager_dataset_or_iterator(data): - return tf.executing_eagerly() and isinstance( - data, (tf.compat.v1.data.Dataset, tf.data.Dataset, - tf.data.Iterator)) + return tf.executing_eagerly() and isinstance( + data, (tf.compat.v1.data.Dataset, tf.data.Dataset, tf.data.Iterator) + ) -# pylint: disable=protected-access def get_dataset_graph_def(dataset): - if tf.executing_eagerly(): - graph_def_str = dataset._as_serialized_graph().numpy() - else: - graph_def_str = backend.get_value(dataset._as_serialized_graph()) - return tf.compat.v1.GraphDef().FromString(graph_def_str) + if tf.executing_eagerly(): + graph_def_str = dataset._as_serialized_graph().numpy() + else: + graph_def_str = backend.get_value(dataset._as_serialized_graph()) + return tf.compat.v1.GraphDef().FromString(graph_def_str) def verify_dataset_shuffled(x): - """Verifies that the dataset is shuffled. - - Args: - x: Dataset passed as an input to the model. - - Returns: - boolean, whether the input dataset is shuffled or not. - """ - assert isinstance(x, tf.data.Dataset) - graph_def = get_dataset_graph_def(x) - for node in graph_def.node: - if node.op.startswith('ShuffleDataset'): - return True - # Also check graph_def.library.function for ds.interleave or ds.flat_map - for function in graph_def.library.function: - for node in function.node_def: - if node.op.startswith('ShuffleDataset'): - return True - logging.warning('Expected a shuffled dataset but input dataset `x` is ' - 'not shuffled. Please invoke `shuffle()` on input dataset.') - return False + """Verifies that the dataset is shuffled. + + Args: + x: Dataset passed as an input to the model. + + Returns: + boolean, whether the input dataset is shuffled or not. + """ + assert isinstance(x, tf.data.Dataset) + graph_def = get_dataset_graph_def(x) + for node in graph_def.node: + if node.op.startswith("ShuffleDataset"): + return True + # Also check graph_def.library.function for ds.interleave or ds.flat_map + for function in graph_def.library.function: + for node in function.node_def: + if node.op.startswith("ShuffleDataset"): + return True + logging.warning( + "Expected a shuffled dataset but input dataset `x` is " + "not shuffled. Please invoke `shuffle()` on input dataset." + ) + return False def is_dataset_or_iterator(data): - return isinstance(data, (tf.compat.v1.data.Dataset, tf.data.Dataset, - tf.compat.v1.data.Iterator, tf.data.Iterator)) + return isinstance( + data, + ( + tf.compat.v1.data.Dataset, + tf.data.Dataset, + tf.compat.v1.data.Iterator, + tf.data.Iterator, + ), + ) def get_iterator(dataset): - """Create and initialize an iterator from a dataset.""" - if tf.executing_eagerly(): - iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) - else: - iterator = tf.compat.v1.data.make_initializable_iterator(dataset) - initialize_iterator(iterator) - return iterator + """Create and initialize an iterator from a dataset.""" + if tf.executing_eagerly(): + iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + else: + iterator = tf.compat.v1.data.make_initializable_iterator(dataset) + initialize_iterator(iterator) + return iterator def initialize_iterator(iterator): - if not tf.executing_eagerly(): - init_op = iterator.initializer - backend.get_session((init_op,)).run(init_op) + if not tf.executing_eagerly(): + init_op = iterator.initializer + backend.get_session((init_op,)).run(init_op) def extract_tensors_from_dataset(dataset): - """Extract a tuple of tensors `inputs, targets, sample_weight` from a dataset. + """Extract tuple of tensors `inputs, targets, sample_weight` from a dataset. - Args: - dataset: Dataset instance. + Args: + dataset: Dataset instance. - Returns: - Tuple of tensors `x, y, weights`. `y` and `weights` entry may be None. - """ - iterator = get_iterator(dataset) - inputs, targets, sample_weight = unpack_iterator_input(iterator) - return inputs, targets, sample_weight + Returns: + Tuple of tensors `x, y, weights`. `y` and `weights` entry may be None. + """ + iterator = get_iterator(dataset) + inputs, targets, sample_weight = unpack_iterator_input(iterator) + return inputs, targets, sample_weight def unpack_iterator_input(iterator): - """Convert a dataset iterator to a tuple of tensors `x, y, sample_weights`. - - Args: - iterator: Instance of a dataset iterator. - - Returns: - Tuple of tensors `x, y, weights`. `y` and `weights` entry may be None. - """ - try: - next_element = iterator.get_next() - except tf.errors.OutOfRangeError: - raise RuntimeError('Your dataset iterator ran out of data; ' - 'Make sure that your dataset can generate ' - 'required number of samples.') - - if isinstance(next_element, (list, tuple)): - if len(next_element) not in [2, 3]: - raise ValueError( - 'Please provide model inputs as a list or tuple of 2 or 3 ' - 'elements: (input, target) or (input, target, sample_weights) ' - 'Received %s' % next_element) - if len(next_element) == 2: - x, y = next_element - weights = None - else: - x, y, weights = next_element - else: - x = next_element - y = None - weights = None - return x, y, weights - - -def infer_steps_for_dataset(model, - dataset, - steps, - epochs=1, - steps_name='steps'): - """Infers steps_per_epoch needed to loop through a dataset. - - Args: - model: Keras model instance. - dataset: Input data of type tf.data.Dataset. - steps: Number of steps to draw from the dataset (may be None if unknown). - epochs: Number of times to iterate over the dataset. - steps_name: The string name of the steps argument, either `steps`, - `validation_steps`, or `steps_per_epoch`. Only used for error message - formatting. - - Returns: - Integer or `None`. Inferred number of steps to loop through the dataset. - `None` is returned if 1) the size of the dataset is unknown and `steps` was - not specified, or 2) this is multi-worker training and auto sharding is - enabled. - - Raises: - ValueError: In case of invalid argument values. - """ - assert isinstance(dataset, tf.data.Dataset) - if (model._in_multi_worker_mode() and - (dataset.options().experimental_distribute.auto_shard_policy != - tf.data.experimental.AutoShardPolicy.OFF)): - # If the dataset would be auto-sharded, we should not infer a local - # steps_per_epoch due to the possible imbalanced sharding between workers. - return None - - size = backend.get_value(tf.data.experimental.cardinality(dataset)) - if size == tf.data.experimental.INFINITE_CARDINALITY and steps is None: - raise ValueError('When passing an infinitely repeating dataset, you ' - 'must specify the `%s` argument.' % (steps_name,)) - if size >= 0: - if steps is not None and steps * epochs > size: - if epochs > 1: - raise ValueError('The dataset you passed contains %s batches, but you ' - 'passed `epochs=%s` and `%s=%s`, which is a total of ' - '%s steps. We cannot draw that many steps from this ' - 'dataset. We suggest to set `%s=%s`.' % - (size, epochs, steps_name, steps, steps * epochs, - steps_name, size // epochs)) - else: - raise ValueError('The dataset you passed contains %s batches, but you ' - 'passed `%s=%s`. We cannot draw that many steps from ' - 'this dataset. We suggest to set `%s=%s`.' % - (size, steps_name, steps, steps_name, size)) - if steps is None: - if size >= 0: - return size - return None - return steps + """Convert a dataset iterator to a tuple of tensors `x, y, sample_weights`. + Args: + iterator: Instance of a dataset iterator. -class ModelInputs: - """Encapsulates model inputs. + Returns: + Tuple of tensors `x, y, weights`. `y` and `weights` entry may be None. + """ + try: + next_element = iterator.get_next() + except tf.errors.OutOfRangeError: + raise RuntimeError( + "Your dataset iterator ran out of data; " + "Make sure that your dataset can generate " + "required number of samples." + ) + + if isinstance(next_element, (list, tuple)): + if len(next_element) not in [2, 3]: + raise ValueError( + "Please provide model inputs as a list or tuple of 2 or 3 " + "elements: (input, target) or (input, target, sample_weights) " + "Received %s" % next_element + ) + if len(next_element) == 2: + x, y = next_element + weights = None + else: + x, y, weights = next_element + else: + x = next_element + y = None + weights = None + return x, y, weights - Allows for transforming model inputs while keeping the same structure. - """ - def __init__(self, inputs): - self._inputs = inputs - self._is_dict = isinstance(self._inputs, dict) - self._is_single_input = not isinstance(self._inputs, (list, tuple, dict)) +def infer_steps_for_dataset( + model, dataset, steps, epochs=1, steps_name="steps" +): + """Infers steps_per_epoch needed to loop through a dataset. - self._flattened_inputs = [] - self._input_names = [] + Args: + model: Keras model instance. + dataset: Input data of type tf.data.Dataset. + steps: Number of steps to draw from the dataset (may be None if + unknown). + epochs: Number of times to iterate over the dataset. + steps_name: The string name of the steps argument, either `steps`, + `validation_steps`, or `steps_per_epoch`. Only used for error message + formatting. + + Returns: + Integer or `None`. Inferred number of steps to loop through the dataset. + `None` is returned if 1) the size of the dataset is unknown and `steps` + was not specified, or 2) this is multi-worker training and auto sharding + is enabled. + + Raises: + ValueError: In case of invalid argument values. + """ + assert isinstance(dataset, tf.data.Dataset) + if model._in_multi_worker_mode() and ( + dataset.options().experimental_distribute.auto_shard_policy + != tf.data.experimental.AutoShardPolicy.OFF + ): + # If the dataset would be auto-sharded, we should not infer a local + # steps_per_epoch due to the possible imbalanced sharding between + # workers. + return None + + size = backend.get_value(tf.data.experimental.cardinality(dataset)) + if size == tf.data.experimental.INFINITE_CARDINALITY and steps is None: + raise ValueError( + "When passing an infinitely repeating dataset, you " + "must specify the `%s` argument." % (steps_name,) + ) + if size >= 0: + if steps is not None and steps * epochs > size: + if epochs > 1: + raise ValueError( + "The dataset you passed contains %s batches, but you " + "passed `epochs=%s` and `%s=%s`, which is a total of " + "%s steps. We cannot draw that many steps from this " + "dataset. We suggest to set `%s=%s`." + % ( + size, + epochs, + steps_name, + steps, + steps * epochs, + steps_name, + size // epochs, + ) + ) + else: + raise ValueError( + "The dataset you passed contains %s batches, but you " + "passed `%s=%s`. We cannot draw that many steps from " + "this dataset. We suggest to set `%s=%s`." + % (size, steps_name, steps, steps_name, size) + ) + if steps is None: + if size >= 0: + return size + return None + return steps - if self._is_dict: - for k in sorted(self._inputs.keys()): - self._flattened_inputs.append(self._inputs[k]) - self._input_names.append(k) - else: - self._flattened_inputs = tf.nest.flatten(self._inputs) - self._input_names = [ - 'input_%d' % (i + 1) for i in range(len(self._flattened_inputs)) - ] - def get_input_names(self): - """Returns keys to name inputs by. +class ModelInputs: + """Encapsulates model inputs. - In case inputs provided were a list, tuple or single entry, we make up a - key 'input_%d'. For dictionary case, we return a sorted list of keys. + Allows for transforming model inputs while keeping the same structure. """ - return self._input_names - - def get_symbolic_inputs(self, return_single_as_list=False): - """Returns inputs to be set as self.inputs for a model.""" - # TODO(karmel): There is a side-effect here where what you get - # with as_list and as_dict depends on whether you have called this - # method first, since it modifies in place. - for i, (k, v) in enumerate(zip(self._input_names, self._flattened_inputs)): - if isinstance(v, (list, float, int)): - v = np.asarray(v) - if v.ndim == 1: - v = np.expand_dims(v, 1) - - if isinstance(v, np.ndarray): - # We fix the placeholder shape except the batch size. - # This is suboptimal, but it is the best we can do with the info - # we have. The user should call `model._set_inputs(placeholders)` - # to specify custom placeholders if the need arises. - shape = (None,) + tuple(v.shape[1:]) - if shape == (None,): - shape = (None, 1) - dtype = tf.as_dtype(v.dtype) - if dtype.is_floating: - dtype = backend.floatx() - v = backend.placeholder(shape=shape, name=k, dtype=dtype) - elif isinstance(v, tf.TensorSpec): - shape = (None,) + tuple(v.shape.as_list()[1:]) - if shape == (None,): - shape = (None, 1) - v = backend.placeholder(shape=shape, name=k, dtype=v.dtype) - - self._flattened_inputs[i] = v - - if self._is_dict: - return dict(zip(self._input_names, self._flattened_inputs)) - if self._is_single_input and not return_single_as_list: - return self._flattened_inputs[0] - return self._flattened_inputs - - def as_dict(self): - """An iterable over a dictionary version of inputs.""" - for k, v in zip(self._input_names, self._flattened_inputs): - yield k, v - - def as_list(self): - """Returning the inputs as a list.""" - return self._flattened_inputs + def __init__(self, inputs): + self._inputs = inputs + self._is_dict = isinstance(self._inputs, dict) + self._is_single_input = not isinstance( + self._inputs, (list, tuple, dict) + ) -# Allow use of methods not exposed to the user. -# pylint: disable=protected-access + self._flattened_inputs = [] + self._input_names = [] + + if self._is_dict: + for k in sorted(self._inputs.keys()): + self._flattened_inputs.append(self._inputs[k]) + self._input_names.append(k) + else: + self._flattened_inputs = tf.nest.flatten(self._inputs) + self._input_names = [ + "input_%d" % (i + 1) for i in range(len(self._flattened_inputs)) + ] + + def get_input_names(self): + """Returns keys to name inputs by. + + In case inputs provided were a list, tuple or single entry, we make up a + key 'input_%d'. For dictionary case, we return a sorted list of keys. + """ + return self._input_names + + def get_symbolic_inputs(self, return_single_as_list=False): + """Returns inputs to be set as self.inputs for a model.""" + # TODO(karmel): There is a side-effect here where what you get + # with as_list and as_dict depends on whether you have called this + # method first, since it modifies in place. + for i, (k, v) in enumerate( + zip(self._input_names, self._flattened_inputs) + ): + if isinstance(v, (list, float, int)): + v = np.asarray(v) + if v.ndim == 1: + v = np.expand_dims(v, 1) + + if isinstance(v, np.ndarray): + # We fix the placeholder shape except the batch size. + # This is suboptimal, but it is the best we can do with the info + # we have. The user should call + # `model._set_inputs(placeholders)` to specify custom + # placeholders if the need arises. + shape = (None,) + tuple(v.shape[1:]) + if shape == (None,): + shape = (None, 1) + dtype = tf.as_dtype(v.dtype) + if dtype.is_floating: + dtype = backend.floatx() + v = backend.placeholder(shape=shape, name=k, dtype=dtype) + elif isinstance(v, tf.TensorSpec): + shape = (None,) + tuple(v.shape.as_list()[1:]) + if shape == (None,): + shape = (None, 1) + v = backend.placeholder(shape=shape, name=k, dtype=v.dtype) + + self._flattened_inputs[i] = v + + if self._is_dict: + return dict(zip(self._input_names, self._flattened_inputs)) + if self._is_single_input and not return_single_as_list: + return self._flattened_inputs[0] + return self._flattened_inputs + + def as_dict(self): + """An iterable over a dictionary version of inputs.""" + for k, v in zip(self._input_names, self._flattened_inputs): + yield k, v + + def as_list(self): + """Returning the inputs as a list.""" + return self._flattened_inputs -# pylint: enable=protected-access +# Allow use of methods not exposed to the user. def generic_output_names(outputs_list): - return ['output_%d' % (i + 1) for i in range(len(outputs_list))] + return ["output_%d" % (i + 1) for i in range(len(outputs_list))] def should_run_validation(validation_freq, epoch): - """Checks if validation should be run this epoch. + """Checks if validation should be run this epoch. - Args: - validation_freq: Integer or list. If an integer, specifies how many training - epochs to run before a new validation run is performed. If a list, - specifies the epochs on which to run validation. - epoch: Integer, the number of the training epoch just completed. + Args: + validation_freq: Integer or list. If an integer, specifies how many + training epochs to run before a new validation run is performed. If a + list, specifies the epochs on which to run validation. + epoch: Integer, the number of the training epoch just completed. - Returns: - Bool, True if validation should be run. + Returns: + Bool, True if validation should be run. - Raises: - ValueError: if `validation_freq` is an Integer and less than 1, or if - it is neither an Integer nor a Sequence. - """ - # `epoch` is 0-indexed internally but 1-indexed in the public API. - one_indexed_epoch = epoch + 1 + Raises: + ValueError: if `validation_freq` is an Integer and less than 1, or if + it is neither an Integer nor a Sequence. + """ + # `epoch` is 0-indexed internally but 1-indexed in the public API. + one_indexed_epoch = epoch + 1 - if isinstance(validation_freq, int): - if validation_freq < 1: - raise ValueError('`validation_freq` can not be less than 1.') - return one_indexed_epoch % validation_freq == 0 + if isinstance(validation_freq, int): + if validation_freq < 1: + raise ValueError("`validation_freq` can not be less than 1.") + return one_indexed_epoch % validation_freq == 0 - if not isinstance(validation_freq, collections.abc.Container): - raise ValueError('`validation_freq` must be an Integer or ' - '`collections.abc.Container` (e.g. list, tuple, etc.)') - return one_indexed_epoch in validation_freq + if not isinstance(validation_freq, collections.abc.Container): + raise ValueError( + "`validation_freq` must be an Integer or " + "`collections.abc.Container` (e.g. list, tuple, etc.)" + ) + return one_indexed_epoch in validation_freq def split_training_and_validation_data(x, y, sample_weights, validation_split): - """Split input data into train/eval section based on validation_split.""" - if has_symbolic_tensors(x): - raise ValueError('If your data is in the form of symbolic tensors, ' - 'you cannot use `validation_split`.') - if hasattr(x[0], 'shape'): - split_at = int(x[0].shape[0] * (1. - validation_split)) - else: - split_at = int(len(x[0]) * (1. - validation_split)) - x, val_x = (generic_utils.slice_arrays(x, 0, split_at), - generic_utils.slice_arrays(x, split_at)) - y, val_y = (generic_utils.slice_arrays(y, 0, split_at), - generic_utils.slice_arrays(y, split_at)) - if sample_weights: - sample_weights, val_sample_weights = ( - generic_utils.slice_arrays(sample_weights, 0, split_at), - generic_utils.slice_arrays(sample_weights, split_at), + """Split input data into train/eval section based on validation_split.""" + if has_symbolic_tensors(x): + raise ValueError( + "If your data is in the form of symbolic tensors, " + "you cannot use `validation_split`." + ) + if hasattr(x[0], "shape"): + split_at = int(x[0].shape[0] * (1.0 - validation_split)) + else: + split_at = int(len(x[0]) * (1.0 - validation_split)) + x, val_x = ( + generic_utils.slice_arrays(x, 0, split_at), + generic_utils.slice_arrays(x, split_at), + ) + y, val_y = ( + generic_utils.slice_arrays(y, 0, split_at), + generic_utils.slice_arrays(y, split_at), ) - else: - val_sample_weights = None - return x, y, sample_weights, val_x, val_y, val_sample_weights + if sample_weights: + sample_weights, val_sample_weights = ( + generic_utils.slice_arrays(sample_weights, 0, split_at), + generic_utils.slice_arrays(sample_weights, split_at), + ) + else: + val_sample_weights = None + return x, y, sample_weights, val_x, val_y, val_sample_weights def unpack_validation_data(validation_data, raise_if_ambiguous=True): - """Unpack validation data based input type. - - The validation data is not touched if its dataset or dataset iterator. - For other type of input (Numpy or tensor), it will be unpacked into tuple of - 3 which is x, y and sample weights. - - Args: - validation_data: dataset, dataset iterator, or numpy, tensor tuple. - raise_if_ambiguous: boolean on whether to fail if validation_data cannot be - parsed. Otherwise simply return validation_data, None, None and defer the - decision to the caller. - - Returns: - tuple of 3, (x, y, sample_weights) for numpy and tensor input. - """ - if (isinstance(validation_data, (tf.compat.v1.data.Iterator, - tf.data.Iterator, - tf.data.Dataset, - data_utils.Sequence)) - or not hasattr(validation_data, '__len__')): - val_x = validation_data - val_y = None - val_sample_weight = None - elif len(validation_data) == 2: - try: - val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence - val_sample_weight = None - except ValueError: - val_x, val_y, val_sample_weight = validation_data, None, None - elif len(validation_data) == 3: - try: - val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence - except ValueError: - val_x, val_y, val_sample_weight = validation_data, None, None - else: - if raise_if_ambiguous: - raise ValueError( - 'When passing a `validation_data` argument, ' - 'it must contain either 2 items (x_val, y_val), ' - 'or 3 items (x_val, y_val, val_sample_weights), ' - 'or alternatively it could be a dataset or a ' - 'dataset or a dataset iterator. ' - 'However we received `validation_data=%s`' % validation_data) - val_x, val_y, val_sample_weight = validation_data, None, None - return val_x, val_y, val_sample_weight + """Unpack validation data based input type. + + The validation data is not touched if its dataset or dataset iterator. + For other type of input (Numpy or tensor), it will be unpacked into tuple of + 3 which is x, y and sample weights. + + Args: + validation_data: dataset, dataset iterator, or numpy, tensor tuple. + raise_if_ambiguous: boolean on whether to fail if validation_data cannot + be parsed. Otherwise simply return validation_data, None, None and defer + the decision to the caller. + + Returns: + tuple of 3, (x, y, sample_weights) for numpy and tensor input. + """ + if isinstance( + validation_data, + ( + tf.compat.v1.data.Iterator, + tf.data.Iterator, + tf.data.Dataset, + data_utils.Sequence, + ), + ) or not hasattr(validation_data, "__len__"): + val_x = validation_data + val_y = None + val_sample_weight = None + elif len(validation_data) == 2: + try: + ( + val_x, + val_y, + ) = validation_data + val_sample_weight = None + except ValueError: + val_x, val_y, val_sample_weight = validation_data, None, None + elif len(validation_data) == 3: + try: + ( + val_x, + val_y, + val_sample_weight, + ) = validation_data + except ValueError: + val_x, val_y, val_sample_weight = validation_data, None, None + else: + if raise_if_ambiguous: + raise ValueError( + "When passing a `validation_data` argument, " + "it must contain either 2 items (x_val, y_val), " + "or 3 items (x_val, y_val, val_sample_weights), " + "or alternatively it could be a dataset or a " + "dataset or a dataset iterator. " + "However we received `validation_data=%s`" % validation_data + ) + val_x, val_y, val_sample_weight = validation_data, None, None + return val_x, val_y, val_sample_weight class TrainingLoop: - """TrainingLoop is a wrapper class around the training logic. - - This class is trying to encapsulate the different logic of fit/eval/predict - with regard to different data input and model condition. - - Note that TrainingLoop is stateless, which means it doesn't contain any - internal field and can be reused with different model and inputs. - """ - - def fit(self, - model, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - **kwargs): - """Train the model with the inputs and targets.""" - raise NotImplementedError() - - def evaluate(self, - model, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - **kwargs): - """Returns the loss value & metrics values for the model in test mode.""" - raise NotImplementedError() - - def predict(self, - model, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - **kwargs): - raise NotImplementedError() + """TrainingLoop is a wrapper class around the training logic. + + This class is trying to encapsulate the different logic of fit/eval/predict + with regard to different data input and model condition. + + Note that TrainingLoop is stateless, which means it doesn't contain any + internal field and can be reused with different model and inputs. + """ + + def fit( + self, + model, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + **kwargs, + ): + """Train the model with the inputs and targets.""" + raise NotImplementedError() + + def evaluate( + self, + model, + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + **kwargs, + ): + """Returns the loss value & metrics values for the model in test + mode.""" + raise NotImplementedError() + + def predict( + self, + model, + x, + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + **kwargs, + ): + raise NotImplementedError() diff --git a/keras/engine/training_utils_v1_test.py b/keras/engine/training_utils_v1_test.py index cd7aed6bdc37..d4cfb802765c 100644 --- a/keras/engine/training_utils_v1_test.py +++ b/keras/engine/training_utils_v1_test.py @@ -14,414 +14,492 @@ # ============================================================================== """Tests for training utility functions.""" -import tensorflow.compat.v2 as tf - import functools import multiprocessing.pool import time -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import backend -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.engine import keras_tensor from keras.engine import training_utils_v1 +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging class ModelInputsTest(tf.test.TestCase): - - def test_single_thing(self): - a = np.ones(10) - model_inputs = training_utils_v1.ModelInputs(a) - self.assertEqual(['input_1'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertTrue(tf.is_tensor(vals)) - vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) - self.assertEqual(1, len(vals)) - self.assertTrue(tf.is_tensor(vals[0])) - self.assertEqual(backend.floatx(), vals[0].dtype) - - def test_single_thing_eager(self): - if not tf.executing_eagerly(): - self.skipTest('Run in eager mode only.') - a = np.ones(10, dtype=np.int32) - model_inputs = training_utils_v1.ModelInputs(a) - self.assertEqual(['input_1'], model_inputs.get_input_names()) - val = model_inputs.get_symbolic_inputs() - self.assertIsInstance(val, keras_tensor.KerasTensor) - vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) - self.assertEqual(1, len(vals)) - self.assertIsInstance(vals[0], keras_tensor.KerasTensor) - self.assertEqual(tf.int32, vals[0].dtype) - - def test_list(self): - a = [np.ones(10), np.ones(20)] - model_inputs = training_utils_v1.ModelInputs(a) - self.assertEqual(['input_1', 'input_2'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertTrue(tf.is_tensor(vals[0])) - self.assertTrue(tf.is_tensor(vals[1])) - - def test_list_eager(self): - if not tf.executing_eagerly(): - self.skipTest('Run in eager mode only.') - a = [np.ones(10), np.ones(20)] - model_inputs = training_utils_v1.ModelInputs(a) - self.assertEqual(['input_1', 'input_2'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertIsInstance(vals[0], keras_tensor.KerasTensor) - self.assertIsInstance(vals[1], keras_tensor.KerasTensor) - - def test_dict(self): - a = {'b': np.ones(10), 'a': np.ones(20)} - model_inputs = training_utils_v1.ModelInputs(a) - self.assertEqual(['a', 'b'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertTrue(tf.is_tensor(vals['a'])) - self.assertTrue(tf.is_tensor(vals['b'])) - - def test_dict_eager(self): - if not tf.executing_eagerly(): - self.skipTest('Run in eager mode only.') - a = {'b': np.ones(10), 'a': np.ones(20)} - model_inputs = training_utils_v1.ModelInputs(a) - self.assertEqual(['a', 'b'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertIsInstance(vals['a'], keras_tensor.KerasTensor) - self.assertIsInstance(vals['b'], keras_tensor.KerasTensor) + def test_single_thing(self): + a = np.ones(10) + model_inputs = training_utils_v1.ModelInputs(a) + self.assertEqual(["input_1"], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertTrue(tf.is_tensor(vals)) + vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) + self.assertEqual(1, len(vals)) + self.assertTrue(tf.is_tensor(vals[0])) + self.assertEqual(backend.floatx(), vals[0].dtype) + + def test_single_thing_eager(self): + if not tf.executing_eagerly(): + self.skipTest("Run in eager mode only.") + a = np.ones(10, dtype=np.int32) + model_inputs = training_utils_v1.ModelInputs(a) + self.assertEqual(["input_1"], model_inputs.get_input_names()) + val = model_inputs.get_symbolic_inputs() + self.assertIsInstance(val, keras_tensor.KerasTensor) + vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) + self.assertEqual(1, len(vals)) + self.assertIsInstance(vals[0], keras_tensor.KerasTensor) + self.assertEqual(tf.int32, vals[0].dtype) + + def test_list(self): + a = [np.ones(10), np.ones(20)] + model_inputs = training_utils_v1.ModelInputs(a) + self.assertEqual(["input_1", "input_2"], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertTrue(tf.is_tensor(vals[0])) + self.assertTrue(tf.is_tensor(vals[1])) + + def test_list_eager(self): + if not tf.executing_eagerly(): + self.skipTest("Run in eager mode only.") + a = [np.ones(10), np.ones(20)] + model_inputs = training_utils_v1.ModelInputs(a) + self.assertEqual(["input_1", "input_2"], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertIsInstance(vals[0], keras_tensor.KerasTensor) + self.assertIsInstance(vals[1], keras_tensor.KerasTensor) + + def test_dict(self): + a = {"b": np.ones(10), "a": np.ones(20)} + model_inputs = training_utils_v1.ModelInputs(a) + self.assertEqual(["a", "b"], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertTrue(tf.is_tensor(vals["a"])) + self.assertTrue(tf.is_tensor(vals["b"])) + + def test_dict_eager(self): + if not tf.executing_eagerly(): + self.skipTest("Run in eager mode only.") + a = {"b": np.ones(10), "a": np.ones(20)} + model_inputs = training_utils_v1.ModelInputs(a) + self.assertEqual(["a", "b"], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertIsInstance(vals["a"], keras_tensor.KerasTensor) + self.assertIsInstance(vals["b"], keras_tensor.KerasTensor) class DatasetUtilsTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - # pylint: disable=g-long-lambda - ('Batch', lambda: tf.data.Dataset.range(5).batch(2)), - ('Cache', lambda: tf.data.Dataset.range(5).cache()), - ('Concatenate', lambda: tf.data.Dataset.range(5).concatenate( - tf.data.Dataset.range(5))), - ('FlatMap', lambda: tf.data.Dataset.range(5).flat_map( - lambda _: tf.data.Dataset.from_tensors(0))), - ('FlatMap_Shuffle', lambda: tf.data.Dataset.range(5).flat_map( - lambda _: tf.data.Dataset.from_tensors(0).shuffle(1)), True), - ('Filter', lambda: tf.data.Dataset.range(5).filter(lambda _: True)), - ('FixedLengthRecordDatasetV2', - lambda: tf.data.FixedLengthRecordDataset([], 42)), - ('FromTensors', lambda: tf.data.Dataset.from_tensors(0)), - ('FromTensorSlices', - lambda: tf.data.Dataset.from_tensor_slices([0, 0, 0])), - ('Interleave', lambda: tf.data.Dataset.range(5).interleave( - lambda _: tf.data.Dataset.from_tensors(0), cycle_length=1)), - ('Interleave_Shuffle', lambda: tf.data.Dataset.range(5).interleave( - lambda _: tf.data.Dataset.from_tensors(0).shuffle(1), - cycle_length=1), True), - ('Map', lambda: tf.data.Dataset.range(5).map(lambda x: x)), - ('Options', - lambda: tf.data.Dataset.range(5).with_options(tf.data.Options()) - ), - ('PaddedBatch', lambda: tf.data.Dataset.range(5).padded_batch(2, [])), - ('ParallelInterleave', lambda: tf.data.Dataset.range(5).interleave( - lambda _: tf.data.Dataset.from_tensors(0), - cycle_length=1, - num_parallel_calls=1)), - ('ParallelMap', lambda: tf.data.Dataset.range(5).map( - lambda x: x, num_parallel_calls=1)), - ('Prefetch', lambda: tf.data.Dataset.range(5).prefetch(1)), - ('Range', lambda: tf.data.Dataset.range(0)), - ('Repeat', lambda: tf.data.Dataset.range(0).repeat(0)), - ('Shuffle', lambda: tf.data.Dataset.range(5).shuffle(1), True), - ('Skip', lambda: tf.data.Dataset.range(5).skip(2)), - ('Take', lambda: tf.data.Dataset.range(5).take(2)), - ('TextLineDataset', lambda: tf.data.TextLineDataset([])), - ('TFRecordDataset', lambda: tf.data.TFRecordDataset([])), - ('Window', lambda: tf.data.Dataset.range(5).window(2)), - ('Zip', lambda: tf.data.Dataset.zip(tf.data.Dataset.range(5))), - # pylint: enable=g-long-lambda - ) - def test_verify_dataset_shuffled(self, dataset_fn, expect_shuffled=False): - dataset = dataset_fn() - - if not expect_shuffled: - with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: - shuffled = training_utils_v1.verify_dataset_shuffled(dataset) - self.assertRegex( - str(mock_log.call_args), 'input dataset `x` is not shuffled.') - self.assertFalse(shuffled) - else: - self.assertTrue(training_utils_v1.verify_dataset_shuffled(dataset)) + @parameterized.named_parameters( + ("Batch", lambda: tf.data.Dataset.range(5).batch(2)), + ("Cache", lambda: tf.data.Dataset.range(5).cache()), + ( + "Concatenate", + lambda: tf.data.Dataset.range(5).concatenate( + tf.data.Dataset.range(5) + ), + ), + ( + "FlatMap", + lambda: tf.data.Dataset.range(5).flat_map( + lambda _: tf.data.Dataset.from_tensors(0) + ), + ), + ( + "FlatMap_Shuffle", + lambda: tf.data.Dataset.range(5).flat_map( + lambda _: tf.data.Dataset.from_tensors(0).shuffle(1) + ), + True, + ), + ("Filter", lambda: tf.data.Dataset.range(5).filter(lambda _: True)), + ( + "FixedLengthRecordDatasetV2", + lambda: tf.data.FixedLengthRecordDataset([], 42), + ), + ("FromTensors", lambda: tf.data.Dataset.from_tensors(0)), + ( + "FromTensorSlices", + lambda: tf.data.Dataset.from_tensor_slices([0, 0, 0]), + ), + ( + "Interleave", + lambda: tf.data.Dataset.range(5).interleave( + lambda _: tf.data.Dataset.from_tensors(0), cycle_length=1 + ), + ), + ( + "Interleave_Shuffle", + lambda: tf.data.Dataset.range(5).interleave( + lambda _: tf.data.Dataset.from_tensors(0).shuffle(1), + cycle_length=1, + ), + True, + ), + ("Map", lambda: tf.data.Dataset.range(5).map(lambda x: x)), + ( + "Options", + lambda: tf.data.Dataset.range(5).with_options(tf.data.Options()), + ), + ("PaddedBatch", lambda: tf.data.Dataset.range(5).padded_batch(2, [])), + ( + "ParallelInterleave", + lambda: tf.data.Dataset.range(5).interleave( + lambda _: tf.data.Dataset.from_tensors(0), + cycle_length=1, + num_parallel_calls=1, + ), + ), + ( + "ParallelMap", + lambda: tf.data.Dataset.range(5).map( + lambda x: x, num_parallel_calls=1 + ), + ), + ("Prefetch", lambda: tf.data.Dataset.range(5).prefetch(1)), + ("Range", lambda: tf.data.Dataset.range(0)), + ("Repeat", lambda: tf.data.Dataset.range(0).repeat(0)), + ("Shuffle", lambda: tf.data.Dataset.range(5).shuffle(1), True), + ("Skip", lambda: tf.data.Dataset.range(5).skip(2)), + ("Take", lambda: tf.data.Dataset.range(5).take(2)), + ("TextLineDataset", lambda: tf.data.TextLineDataset([])), + ("TFRecordDataset", lambda: tf.data.TFRecordDataset([])), + ("Window", lambda: tf.data.Dataset.range(5).window(2)), + ("Zip", lambda: tf.data.Dataset.zip(tf.data.Dataset.range(5))), + ) + def test_verify_dataset_shuffled(self, dataset_fn, expect_shuffled=False): + dataset = dataset_fn() + + if not expect_shuffled: + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + shuffled = training_utils_v1.verify_dataset_shuffled(dataset) + self.assertRegex( + str(mock_log.call_args), + "input dataset `x` is not shuffled.", + ) + self.assertFalse(shuffled) + else: + self.assertTrue(training_utils_v1.verify_dataset_shuffled(dataset)) class StandardizeWeightsTest(test_combinations.TestCase): - - def test_sample_weights(self): - y = np.array([0, 1, 0, 0, 2]) - sample_weights = np.array([0.5, 1., 1., 0., 2.]) - weights = training_utils_v1.standardize_weights(y, sample_weights) - self.assertAllClose(weights, sample_weights) - - def test_class_weights(self): - y = np.array([0, 1, 0, 0, 2]) - class_weights = {0: 0.5, 1: 1., 2: 1.5} - weights = training_utils_v1.standardize_weights( - y, class_weight=class_weights) - self.assertAllClose(weights, np.array([0.5, 1., 0.5, 0.5, 1.5])) - - def test_sample_weights_and_class_weights(self): - y = np.array([0, 1, 0, 0, 2]) - sample_weights = np.array([0.5, 1., 1., 0., 2.]) - class_weights = {0: 0.5, 1: 1., 2: 1.5} - weights = training_utils_v1.standardize_weights(y, sample_weights, - class_weights) - expected = sample_weights * np.array([0.5, 1., 0.5, 0.5, 1.5]) - self.assertAllClose(weights, expected) - - def test_dataset_with_class_weight(self): - model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) - model.compile('rmsprop', 'mse') - - inputs = np.zeros((10, 3), np.float32) - targets = np.zeros((10, 4), np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - class_weight_np = np.array([0.25, 0.25, 0.25, 0.25]) - class_weight = dict(enumerate(class_weight_np)) - - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=1, - class_weight=class_weight) + def test_sample_weights(self): + y = np.array([0, 1, 0, 0, 2]) + sample_weights = np.array([0.5, 1.0, 1.0, 0.0, 2.0]) + weights = training_utils_v1.standardize_weights(y, sample_weights) + self.assertAllClose(weights, sample_weights) + + def test_class_weights(self): + y = np.array([0, 1, 0, 0, 2]) + class_weights = {0: 0.5, 1: 1.0, 2: 1.5} + weights = training_utils_v1.standardize_weights( + y, class_weight=class_weights + ) + self.assertAllClose(weights, np.array([0.5, 1.0, 0.5, 0.5, 1.5])) + + def test_sample_weights_and_class_weights(self): + y = np.array([0, 1, 0, 0, 2]) + sample_weights = np.array([0.5, 1.0, 1.0, 0.0, 2.0]) + class_weights = {0: 0.5, 1: 1.0, 2: 1.5} + weights = training_utils_v1.standardize_weights( + y, sample_weights, class_weights + ) + expected = sample_weights * np.array([0.5, 1.0, 0.5, 0.5, 1.5]) + self.assertAllClose(weights, expected) + + def test_dataset_with_class_weight(self): + model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) + model.compile("rmsprop", "mse") + + inputs = np.zeros((10, 3), np.float32) + targets = np.zeros((10, 4), np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + class_weight_np = np.array([0.25, 0.25, 0.25, 0.25]) + class_weight = dict(enumerate(class_weight_np)) + + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=1, + class_weight=class_weight, + ) class MonitoredPool(multiprocessing.pool.ThreadPool): + def __init__(self, *args, **kwargs): + self._apply_counter = 0 + self._func_wrapper = None + super().__init__(*args, **kwargs) - def __init__(self, *args, **kwargs): - self._apply_counter = 0 - self._func_wrapper = None - super().__init__(*args, **kwargs) - - def apply_async(self, func, *args, **kwargs): - self._apply_counter += 1 - if self._func_wrapper: - func = self._func_wrapper(func) # pylint: disable=not-callable - return super().apply_async(func, *args, **kwargs) + def apply_async(self, func, *args, **kwargs): + self._apply_counter += 1 + if self._func_wrapper: + func = self._func_wrapper(func) + return super().apply_async(func, *args, **kwargs) def add_sleep(f): - @functools.wraps(f) - def wrapped(*args, **kwargs): - time.sleep(1.) - return f(*args, **kwargs) - return wrapped + @functools.wraps(f) + def wrapped(*args, **kwargs): + time.sleep(1.0) + return f(*args, **kwargs) + + return wrapped def cause_error(f): - @functools.wraps(f) - def wrapped(batch_element, batch_start, batch_end, is_finished): # pylint: disable=unused-argument - # Induce a TypeError during assignment. - return f(None, None, None, is_finished) - return wrapped + @functools.wraps(f) + def wrapped(batch_element, batch_start, batch_end, is_finished): + # Induce a TypeError during assignment. + return f(None, None, None, is_finished) + return wrapped -_TEST_DATA = np.array(( - (3, 1, 3, 1, 2, 0, 3, 3, 1, 2), - (0, 1, 2, 1, 3, 0, 0, 1, 3, 0), - (3, 2, 1, 1, 1, 1, 1, 3, 2, 3), - (2, 2, 0, 1, 0, 3, 3, 2, 1, 1), - (3, 0, 3, 3, 3, 2, 1, 0, 0, 1), - (1, 0, 3, 3, 3, 2, 1, 2, 3, 1),)) +_TEST_DATA = np.array( + ( + (3, 1, 3, 1, 2, 0, 3, 3, 1, 2), + (0, 1, 2, 1, 3, 0, 0, 1, 3, 0), + (3, 2, 1, 1, 1, 1, 1, 3, 2, 3), + (2, 2, 0, 1, 0, 3, 3, 2, 1, 1), + (3, 0, 3, 3, 3, 2, 1, 0, 0, 1), + (1, 0, 3, 3, 3, 2, 1, 2, 3, 1), + ) +) -class AggregationTest(test_combinations.TestCase): - def setUp(self): - super().setUp() - self._old_pool = training_utils_v1._COPY_POOL - self._old_threshold = ( - training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD) - self._old_timeout = training_utils_v1.SliceAggregator._MAX_COPY_SECONDS - training_utils_v1._COPY_POOL = MonitoredPool( - training_utils_v1._COPY_THREADS) - - def tearDown(self): - super().tearDown() - training_utils_v1._COPY_POOL = self._old_pool - training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = ( - self._old_threshold) - training_utils_v1.SliceAggregator._MAX_COPY_SECONDS = self._old_timeout - - def _run_with_steps(self): - aggregator = training_utils_v1.OutputsAggregator(use_steps=True) - for i, batch in enumerate(np.array_split(_TEST_DATA, 4)): - if i == 0: - aggregator.create(batch) - aggregator.aggregate(batch) - - assert len(aggregator.results) == 1 - assert isinstance(aggregator.results[0], training_utils_v1.ConcatAggregator) - - aggregator.finalize() - return aggregator.results - - def _run_without_steps(self): - aggregator = training_utils_v1.OutputsAggregator( - use_steps=False, num_samples=6) - - batch_start = 0 - for i, batch in enumerate(np.array_split(_TEST_DATA, 4)): - if i == 0: - aggregator.create(batch) - - batch_end = batch_start + batch.shape[0] - aggregator.aggregate(batch, batch_start, batch_end) - batch_start = batch_end - - assert len(aggregator.results) == 1 - assert isinstance(aggregator.results[0], training_utils_v1.SliceAggregator) - - aggregator.finalize() - return aggregator.results - - def test_with_steps(self): - self.assertAllEqual(self._run_with_steps(), _TEST_DATA) - - def test_without_steps(self): - self.assertAllEqual(self._run_without_steps(), _TEST_DATA) - - def test_nested_aggregation(self): - aggregator = training_utils_v1.OutputsAggregator( - use_steps=False, num_samples=6) - - batches = np.array_split(_TEST_DATA, 4) - batch_start = 0 - for i, batch in enumerate(zip(batches, batches)): - if i == 0: - aggregator.create(batch) - - batch_end = batch_start + batch[0].shape[0] - aggregator.aggregate(batch, batch_start, batch_end) - batch_start = batch_end - - assert len(aggregator.results) == 2 - aggregator.finalize() - self.assertAllEqual(aggregator.results, (_TEST_DATA, _TEST_DATA)) - - def test_concat_single_batch(self): - aggregator = training_utils_v1.OutputsAggregator(use_steps=True) - data = _TEST_DATA.copy() - aggregator.create(data) - assert len(aggregator.results) == 1 - assert isinstance(aggregator.results[0], training_utils_v1.ConcatAggregator) - - aggregator.aggregate(data) - aggregator.finalize() - assert aggregator.results is data # No copy. - - def test_slice_single_batch(self): - aggregator = training_utils_v1.OutputsAggregator( - use_steps=False, num_samples=6) - data = _TEST_DATA.copy() - aggregator.create(data) - assert len(aggregator.results) == 1 - assert isinstance(aggregator.results[0], training_utils_v1.SliceAggregator) - - aggregator.aggregate(data, 0, 6) - aggregator.finalize() - assert aggregator.results is data # No copy. - - def test_async_copy(self): - training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = 15 - self.assertAllEqual(self._run_without_steps(), _TEST_DATA) - - # Two of the four batches will have 20 elements and two will have 10. - self.assertEqual(training_utils_v1._COPY_POOL._apply_counter, 2) - - def test_async_copy_timeout(self): - training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = 15 - training_utils_v1.SliceAggregator._MAX_COPY_SECONDS = 0.1 - training_utils_v1._COPY_POOL._func_wrapper = add_sleep - with self.assertRaisesRegex(ValueError, 'Timed out waiting for copy'): - self._run_without_steps() - - def test_async_copy_reraise(self): - training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = 15 - training_utils_v1.SliceAggregator._MAX_COPY_SECONDS = 1. - training_utils_v1._COPY_POOL._func_wrapper = cause_error - with self.assertRaisesRegex(TypeError, 'NoneType'): - self._run_without_steps() +class AggregationTest(test_combinations.TestCase): + def setUp(self): + super().setUp() + self._old_pool = training_utils_v1._COPY_POOL + self._old_threshold = ( + training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD + ) + self._old_timeout = training_utils_v1.SliceAggregator._MAX_COPY_SECONDS + training_utils_v1._COPY_POOL = MonitoredPool( + training_utils_v1._COPY_THREADS + ) + + def tearDown(self): + super().tearDown() + training_utils_v1._COPY_POOL = self._old_pool + training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = ( + self._old_threshold + ) + training_utils_v1.SliceAggregator._MAX_COPY_SECONDS = self._old_timeout + + def _run_with_steps(self): + aggregator = training_utils_v1.OutputsAggregator(use_steps=True) + for i, batch in enumerate(np.array_split(_TEST_DATA, 4)): + if i == 0: + aggregator.create(batch) + aggregator.aggregate(batch) + + assert len(aggregator.results) == 1 + assert isinstance( + aggregator.results[0], training_utils_v1.ConcatAggregator + ) + + aggregator.finalize() + return aggregator.results + + def _run_without_steps(self): + aggregator = training_utils_v1.OutputsAggregator( + use_steps=False, num_samples=6 + ) + + batch_start = 0 + for i, batch in enumerate(np.array_split(_TEST_DATA, 4)): + if i == 0: + aggregator.create(batch) + + batch_end = batch_start + batch.shape[0] + aggregator.aggregate(batch, batch_start, batch_end) + batch_start = batch_end + + assert len(aggregator.results) == 1 + assert isinstance( + aggregator.results[0], training_utils_v1.SliceAggregator + ) + + aggregator.finalize() + return aggregator.results + + def test_with_steps(self): + self.assertAllEqual(self._run_with_steps(), _TEST_DATA) + + def test_without_steps(self): + self.assertAllEqual(self._run_without_steps(), _TEST_DATA) + + def test_nested_aggregation(self): + aggregator = training_utils_v1.OutputsAggregator( + use_steps=False, num_samples=6 + ) + + batches = np.array_split(_TEST_DATA, 4) + batch_start = 0 + for i, batch in enumerate(zip(batches, batches)): + if i == 0: + aggregator.create(batch) + + batch_end = batch_start + batch[0].shape[0] + aggregator.aggregate(batch, batch_start, batch_end) + batch_start = batch_end + + assert len(aggregator.results) == 2 + aggregator.finalize() + self.assertAllEqual(aggregator.results, (_TEST_DATA, _TEST_DATA)) + + def test_concat_single_batch(self): + aggregator = training_utils_v1.OutputsAggregator(use_steps=True) + data = _TEST_DATA.copy() + aggregator.create(data) + assert len(aggregator.results) == 1 + assert isinstance( + aggregator.results[0], training_utils_v1.ConcatAggregator + ) + + aggregator.aggregate(data) + aggregator.finalize() + assert aggregator.results is data # No copy. + + def test_slice_single_batch(self): + aggregator = training_utils_v1.OutputsAggregator( + use_steps=False, num_samples=6 + ) + data = _TEST_DATA.copy() + aggregator.create(data) + assert len(aggregator.results) == 1 + assert isinstance( + aggregator.results[0], training_utils_v1.SliceAggregator + ) + + aggregator.aggregate(data, 0, 6) + aggregator.finalize() + assert aggregator.results is data # No copy. + + def test_async_copy(self): + training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = 15 + self.assertAllEqual(self._run_without_steps(), _TEST_DATA) + + # Two of the four batches will have 20 elements and two will have 10. + self.assertEqual(training_utils_v1._COPY_POOL._apply_counter, 2) + + def test_async_copy_timeout(self): + training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = 15 + training_utils_v1.SliceAggregator._MAX_COPY_SECONDS = 0.1 + training_utils_v1._COPY_POOL._func_wrapper = add_sleep + with self.assertRaisesRegex(ValueError, "Timed out waiting for copy"): + self._run_without_steps() + + def test_async_copy_reraise(self): + training_utils_v1.SliceAggregator._BINARY_SIZE_THRESHOLD = 15 + training_utils_v1.SliceAggregator._MAX_COPY_SECONDS = 1.0 + training_utils_v1._COPY_POOL._func_wrapper = cause_error + with self.assertRaisesRegex(TypeError, "NoneType"): + self._run_without_steps() class CompositeTensorTestUtils(test_combinations.TestCase): - - def test_is_composite(self): - # Validate that all composite tensor and value types return true. - self.assertTrue( - training_utils_v1.is_composite_or_composite_value( - tf.SparseTensor([[0, 0]], [1], [1, 1]))) - self.assertTrue( - training_utils_v1.is_composite_or_composite_value( - tf.compat.v1.SparseTensorValue([[0, 0]], [1], [1, 1]))) - self.assertTrue( - training_utils_v1.is_composite_or_composite_value( - tf.RaggedTensor.from_row_splits( - np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64)))) - self.assertTrue( - training_utils_v1.is_composite_or_composite_value( - tf.compat.v1.ragged.RaggedTensorValue( - np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64)))) - - # Test that numpy arrays and tensors return false. - self.assertFalse( - training_utils_v1.is_composite_or_composite_value(np.ndarray([0, 1]))) - self.assertFalse( - training_utils_v1.is_composite_or_composite_value( - tf.convert_to_tensor([3, 1]))) - - def test_sparse_concatenation(self): - tensor_1 = tf.SparseTensor([[0, 0]], [1], [1, 1]) - tensor_2 = tf.SparseTensor([[0, 0]], [2], [1, 1]) - concatenated_tensor = training_utils_v1._append_composite_tensor( - tensor_1, tensor_2) - evaluated_tensor = self.evaluate(concatenated_tensor) - self.assertAllEqual(evaluated_tensor.indices, [[0, 0], [1, 0]]) - self.assertAllEqual(evaluated_tensor.values, [1, 2]) - self.assertAllEqual(evaluated_tensor.dense_shape, [2, 1]) - - def test_sparse_value_concatenation(self): - tensor_1 = tf.compat.v1.SparseTensorValue([[0, 0]], [1], [1, 1]) - tensor_2 = tf.compat.v1.SparseTensorValue([[0, 0]], [2], [1, 1]) - concatenated_tensor = training_utils_v1._append_composite_tensor( - tensor_1, tensor_2) - self.assertAllEqual(concatenated_tensor.indices, [[0, 0], [1, 0]]) - self.assertAllEqual(concatenated_tensor.values, [1, 2]) - self.assertAllEqual(concatenated_tensor.dense_shape, [2, 1]) - - def test_ragged_concatenation(self): - tensor_1 = tf.RaggedTensor.from_row_splits( - np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64)) - tensor_2 = tf.RaggedTensor.from_row_splits( - np.array([3, 4, 5]), np.array([0, 2, 3], dtype=np.int64)) - concatenated_tensor = training_utils_v1._append_composite_tensor( - tensor_1, tensor_2) - evaluated_tensor = self.evaluate(concatenated_tensor) - - self.assertAllEqual(evaluated_tensor.values, [0, 1, 2, 3, 4, 5]) - self.assertAllEqual(evaluated_tensor.row_splits, [0, 1, 3, 5, 6]) - - def test_ragged_value_concatenation(self): - tensor_1 = tf.compat.v1.ragged.RaggedTensorValue( - np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64)) - tensor_2 = tf.compat.v1.ragged.RaggedTensorValue( - np.array([3, 4, 5]), np.array([0, 2, 3], dtype=np.int64)) - concatenated_tensor = training_utils_v1._append_composite_tensor( - tensor_1, tensor_2) - - self.assertAllEqual(concatenated_tensor.values, [0, 1, 2, 3, 4, 5]) - self.assertAllEqual(concatenated_tensor.row_splits, [0, 1, 3, 5, 6]) - - -if __name__ == '__main__': - tf.test.main() + def test_is_composite(self): + # Validate that all composite tensor and value types return true. + self.assertTrue( + training_utils_v1.is_composite_or_composite_value( + tf.SparseTensor([[0, 0]], [1], [1, 1]) + ) + ) + self.assertTrue( + training_utils_v1.is_composite_or_composite_value( + tf.compat.v1.SparseTensorValue([[0, 0]], [1], [1, 1]) + ) + ) + self.assertTrue( + training_utils_v1.is_composite_or_composite_value( + tf.RaggedTensor.from_row_splits( + np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64) + ) + ) + ) + self.assertTrue( + training_utils_v1.is_composite_or_composite_value( + tf.compat.v1.ragged.RaggedTensorValue( + np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64) + ) + ) + ) + + # Test that numpy arrays and tensors return false. + self.assertFalse( + training_utils_v1.is_composite_or_composite_value( + np.ndarray([0, 1]) + ) + ) + self.assertFalse( + training_utils_v1.is_composite_or_composite_value( + tf.convert_to_tensor([3, 1]) + ) + ) + + def test_sparse_concatenation(self): + tensor_1 = tf.SparseTensor([[0, 0]], [1], [1, 1]) + tensor_2 = tf.SparseTensor([[0, 0]], [2], [1, 1]) + concatenated_tensor = training_utils_v1._append_composite_tensor( + tensor_1, tensor_2 + ) + evaluated_tensor = self.evaluate(concatenated_tensor) + self.assertAllEqual(evaluated_tensor.indices, [[0, 0], [1, 0]]) + self.assertAllEqual(evaluated_tensor.values, [1, 2]) + self.assertAllEqual(evaluated_tensor.dense_shape, [2, 1]) + + def test_sparse_value_concatenation(self): + tensor_1 = tf.compat.v1.SparseTensorValue([[0, 0]], [1], [1, 1]) + tensor_2 = tf.compat.v1.SparseTensorValue([[0, 0]], [2], [1, 1]) + concatenated_tensor = training_utils_v1._append_composite_tensor( + tensor_1, tensor_2 + ) + self.assertAllEqual(concatenated_tensor.indices, [[0, 0], [1, 0]]) + self.assertAllEqual(concatenated_tensor.values, [1, 2]) + self.assertAllEqual(concatenated_tensor.dense_shape, [2, 1]) + + def test_ragged_concatenation(self): + tensor_1 = tf.RaggedTensor.from_row_splits( + np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64) + ) + tensor_2 = tf.RaggedTensor.from_row_splits( + np.array([3, 4, 5]), np.array([0, 2, 3], dtype=np.int64) + ) + concatenated_tensor = training_utils_v1._append_composite_tensor( + tensor_1, tensor_2 + ) + evaluated_tensor = self.evaluate(concatenated_tensor) + + self.assertAllEqual(evaluated_tensor.values, [0, 1, 2, 3, 4, 5]) + self.assertAllEqual(evaluated_tensor.row_splits, [0, 1, 3, 5, 6]) + + def test_ragged_value_concatenation(self): + tensor_1 = tf.compat.v1.ragged.RaggedTensorValue( + np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64) + ) + tensor_2 = tf.compat.v1.ragged.RaggedTensorValue( + np.array([3, 4, 5]), np.array([0, 2, 3], dtype=np.int64) + ) + concatenated_tensor = training_utils_v1._append_composite_tensor( + tensor_1, tensor_2 + ) + + self.assertAllEqual(concatenated_tensor.values, [0, 1, 2, 3, 4, 5]) + self.assertAllEqual(concatenated_tensor.row_splits, [0, 1, 3, 5, 6]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/engine/training_v1.py b/keras/engine/training_v1.py index 371feb42b0ed..3324e1c2b707 100644 --- a/keras/engine/training_v1.py +++ b/keras/engine/training_v1.py @@ -13,17 +13,15 @@ # limitations under the License. # ============================================================================== """V1 Training-related part of the Keras engine.""" -# pylint: disable=g-classes-have-attributes -import tensorflow.compat.v2 as tf - import collections import warnings import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import losses from keras import metrics as metrics_module -from keras.optimizers import optimizer_v1 from keras import optimizers from keras.distribute import distributed_training_utils from keras.distribute import distributed_training_utils_v1 @@ -36,3160 +34,3599 @@ from keras.engine import training_utils from keras.engine import training_utils_v1 from keras.mixed_precision import loss_scale_optimizer -from keras.optimizers.optimizer_v2 import optimizer_v2 -from keras.saving import saving_utils -from keras.saving.saved_model import model_serialization +from keras.optimizers import optimizer_v1 +from keras.optimizers.legacy import optimizer_v2 +from keras.saving.legacy import saving_utils +from keras.saving.legacy.saved_model import model_serialization from keras.utils import data_utils from keras.utils import layer_utils from keras.utils import losses_utils from keras.utils import tf_inspect from keras.utils import tf_utils from keras.utils.mode_keys import ModeKeys + +# isort: off from tensorflow.python.platform import tf_logging as logging try: - from scipy.sparse import issparse # pylint: disable=g-import-not-at-top + from scipy.sparse import issparse except ImportError: - issparse = None + issparse = None class Model(training_lib.Model): - """`Model` groups layers into an object with training and inference features. - - There are two ways to instantiate a `Model`: - - 1 - With the "functional API", where you start from `Input`, - you chain layer calls to specify the model's forward pass, - and finally you create your model from inputs and outputs: + """A model groups layers into an object with training & inference features. - ```python - import tensorflow as tf + There are two ways to instantiate a `Model`: - inputs = tf.keras.Input(shape=(3,)) - x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) - outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x) - model = tf.keras.Model(inputs=inputs, outputs=outputs) - ``` + 1 - With the "functional API", where you start from `Input`, + you chain layer calls to specify the model's forward pass, + and finally you create your model from inputs and outputs: - 2 - By subclassing the `Model` class: in that case, you should define your - layers in `__init__` and you should implement the model's forward pass - in `call`. + ```python + import tensorflow as tf - ```python - import tensorflow as tf + inputs = tf.keras.Input(shape=(3,)) + x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) + outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x) + model = tf.keras.Model(inputs=inputs, outputs=outputs) + ``` - class MyModel(tf.keras.Model): + 2 - By subclassing the `Model` class: in that case, you should define your + layers in `__init__` and you should implement the model's forward pass + in `call`. - def __init__(self): - super().__init__() - self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) - self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) + ```python + import tensorflow as tf - def call(self, inputs): - x = self.dense1(inputs) - return self.dense2(x) + class MyModel(tf.keras.Model): - model = MyModel() - ``` + def __init__(self): + super().__init__() + self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) + self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) - If you subclass `Model`, you can optionally have - a `training` argument (boolean) in `call`, which you can use to specify - a different behavior in training and inference: + def call(self, inputs): + x = self.dense1(inputs) + return self.dense2(x) - ```python - import tensorflow as tf + model = MyModel() + ``` - class MyModel(tf.keras.Model): + If you subclass `Model`, you can optionally have + a `training` argument (boolean) in `call`, which you can use to specify + a different behavior in training and inference: - def __init__(self): - super().__init__() - self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) - self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) - self.dropout = tf.keras.layers.Dropout(0.5) + ```python + import tensorflow as tf - def call(self, inputs, training=False): - x = self.dense1(inputs) - if training: - x = self.dropout(x, training=training) - return self.dense2(x) + class MyModel(tf.keras.Model): - model = MyModel() - ``` - """ + def __init__(self): + super().__init__() + self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) + self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) + self.dropout = tf.keras.layers.Dropout(0.5) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # initializing _distribution_strategy here since it is possible to call - # predict on a model without compiling it. - self._distribution_strategy = None - self._compile_time_distribution_strategy = None - if (tf.compat.v1.executing_eagerly_outside_functions() and - tf.distribute.has_strategy()): - self._set_strategy( - tf.distribute.get_strategy()) + def call(self, inputs, training=False): + x = self.dense1(inputs) + if training: + x = self.dropout(x, training=training) + return self.dense2(x) - # This flag is used to track if the user is using the deprecated path of - # passing distribution strategy to compile rather than creating the model - # under distribution strategy scope. - self._compile_distribution = False - - self._run_eagerly = None - self._experimental_run_tf_function = ( - tf.compat.v1.executing_eagerly_outside_functions()) - - self._v1_compile_was_called = False - - def _init_batch_counters(self): - pass # Batch counters should not be created in legacy graph mode. - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _set_strategy(self, strategy): - self._compile_time_distribution_strategy = strategy - - def get_weights(self): - """Retrieves the weights of the model. - - Returns: - A flat list of Numpy arrays. + model = MyModel() + ``` """ - strategy = (self._distribution_strategy or - self._compile_time_distribution_strategy) - if strategy: - with strategy.scope(): - return base_layer.Layer.get_weights(self) - return base_layer.Layer.get_weights(self) - def load_weights(self, filepath, by_name=False, skip_mismatch=False): - """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. - - If `by_name` is False weights are loaded based on the network's - topology. This means the architecture should be the same as when the weights - were saved. Note that layers that don't have weights are not taken into - account in the topological ordering, so adding or removing layers is fine as - long as they don't have weights. - - If `by_name` is True, weights are loaded into layers only if they share the - same name. This is useful for fine-tuning or transfer-learning models where - some of the layers have changed. - - Only topological loading (`by_name=False`) is supported when loading weights - from the TensorFlow format. Note that topological loading differs slightly - between TensorFlow and HDF5 formats for user-defined classes inheriting from - `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the - TensorFlow format loads based on the object-local names of attributes to - which layers are assigned in the `Model`'s constructor. - - Args: - filepath: String, path to the weights file to load. For weight files in - TensorFlow format, this is the file prefix (the same as was passed - to `save_weights`). - by_name: Boolean, whether to load weights by name or by topological - order. Only topological loading is supported for weight files in - TensorFlow format. - skip_mismatch: Boolean, whether to skip loading of layers where there is - a mismatch in the number of weights, or a mismatch in the shape of - the weight (only valid when `by_name=True`). - - Returns: - When loading a weight file in TensorFlow format, returns the same status - object as `tf.train.Checkpoint.restore`. When graph building, restore - ops are run automatically as soon as the network is built (on first call - for user-defined classes inheriting from `Model`, immediately if it is - already built). - - When loading weights in HDF5 format, returns `None`. - - Raises: - ImportError: If h5py is not available and the weight file is in HDF5 - format. - ValueError: If `skip_mismatch` is set to `True` when `by_name` is - `False`. - """ - if backend.is_tpu_strategy(self._distribution_strategy): - if (self._distribution_strategy.extended.steps_per_run > 1 and - (not saving_utils.is_hdf5_filepath(filepath))): # pylint: disable=protected-access - raise ValueError('Load weights is not yet supported with TPUStrategy ' - 'with steps_per_run greater than 1.') - return super().load_weights(filepath, by_name, skip_mismatch) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def compile(self, - optimizer='rmsprop', - loss=None, - metrics=None, - loss_weights=None, - sample_weight_mode=None, - weighted_metrics=None, - target_tensors=None, - distribute=None, - **kwargs): - """Configures the model for training. + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # initializing _distribution_strategy here since it is possible to call + # predict on a model without compiling it. + self._distribution_strategy = None + self._compile_time_distribution_strategy = None + if ( + tf.compat.v1.executing_eagerly_outside_functions() + and tf.distribute.has_strategy() + ): + self._set_strategy(tf.distribute.get_strategy()) + + # This flag is used to track if the user is using the deprecated path of + # passing distribution strategy to compile rather than creating the + # model under distribution strategy scope. + self._compile_distribution = False + + self._run_eagerly = None + self._experimental_run_tf_function = ( + tf.compat.v1.executing_eagerly_outside_functions() + ) + + self._v1_compile_was_called = False + + def _init_batch_counters(self): + pass # Batch counters should not be created in legacy graph mode. + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _set_strategy(self, strategy): + self._compile_time_distribution_strategy = strategy + + def get_weights(self): + """Retrieves the weights of the model. + + Returns: + A flat list of Numpy arrays. + """ + strategy = ( + self._distribution_strategy + or self._compile_time_distribution_strategy + ) + if strategy: + with strategy.scope(): + return base_layer.Layer.get_weights(self) + return base_layer.Layer.get_weights(self) - Args: - optimizer: String (name of optimizer) or optimizer instance. - See `tf.keras.optimizers`. - loss: String (name of objective function), objective function or - `tf.keras.losses.Loss` instance. See `tf.keras.losses`. An objective - function is any callable with the signature - `scalar_loss = fn(y_true, y_pred)`. If the model has multiple - outputs, you can use a different loss on each output by passing a - dictionary or a list of losses. The loss value that will be - minimized by the model will then be the sum of all individual - losses. - metrics: List of metrics to be evaluated by the model during training - and testing. Typically you will use `metrics=['accuracy']`. - To specify different metrics for different outputs of a - multi-output model, you could also pass a dictionary, such as - `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`. - You can also pass a list (len = len(outputs)) of lists of metrics - such as `metrics=[['accuracy'], ['accuracy', 'mse']]` or - `metrics=['accuracy', ['accuracy', 'mse']]`. - loss_weights: Optional list or dictionary specifying scalar - coefficients (Python floats) to weight the loss contributions - of different model outputs. - The loss value that will be minimized by the model - will then be the *weighted sum* of all individual losses, - weighted by the `loss_weights` coefficients. - If a list, it is expected to have a 1:1 mapping - to the model's outputs. If a tensor, it is expected to map - output names (strings) to scalar coefficients. - sample_weight_mode: If you need to do timestep-wise - sample weighting (2D weights), set this to `"temporal"`. - `None` defaults to sample-wise weights (1D). - If the model has multiple outputs, you can use a different - `sample_weight_mode` on each output by passing a - dictionary or a list of modes. - weighted_metrics: List of metrics to be evaluated and weighted - by sample_weight or class_weight during training and testing. - target_tensors: By default, Keras will create placeholders for the - model's target, which will be fed with the target data during - training. If instead you would like to use your own - target tensors (in turn, Keras will not expect external - Numpy data for these targets at training time), you - can specify them via the `target_tensors` argument. It can be - a single tensor (for a single-output model), a list of tensors, - or a dict mapping output names to target tensors. - distribute: NOT SUPPORTED IN TF 2.0, please create and compile the - model under distribution strategy scope instead of passing it to - compile. - **kwargs: Any additional arguments. - - Raises: - ValueError: In case of invalid arguments for - `optimizer`, `loss`, `metrics` or `sample_weight_mode`. - """ - self._assert_built_as_v1() - self._run_eagerly = kwargs.pop('run_eagerly', None) - self._experimental_run_tf_function = kwargs.pop( - 'experimental_run_tf_function', True) - self._v1_compile_was_called = True - - # Prepare Session arguments (legacy). - kwargs.pop('cloning', None) # Legacy DistStrat argument, never used. - self._from_serialized = kwargs.pop('from_serialized', False) - allowed_kwargs = {'feed_dict', 'fetches', 'options', 'run_metadata'} - unknown_kwargs = set(kwargs.keys()) - allowed_kwargs - if unknown_kwargs: - raise TypeError( - 'Invalid keyword argument(s) in `compile`: %s' % (unknown_kwargs,)) - self._function_kwargs = kwargs - if self._function_kwargs: - self._experimental_run_tf_function = False - if self.run_eagerly: - raise ValueError( - 'Session keyword arguments are not supported ' - 'when `run_eagerly=True`. You passed the following ' - 'Session arguments: %s' % (self._function_kwargs,)) - - self._set_optimizer(optimizer) - is_any_keras_optimizer_v1 = any( - (isinstance(opt, optimizer_v1.Optimizer) - and not isinstance(opt, optimizer_v1.TFOptimizer) - ) for opt in tf.nest.flatten(self.optimizer)) - - if is_any_keras_optimizer_v1 and tf.compat.v1.executing_eagerly_outside_functions(): - raise ValueError('`tf.compat.v1.keras` Optimizer (', optimizer, ') is ' - 'not supported when eager execution is enabled. Use a ' - '`tf.keras` Optimizer instead, or disable eager ' - 'execution.') - - if ((target_tensors is not None) - or not tf.compat.v1.executing_eagerly_outside_functions()): - # Fallback out of things that aren't supported with v2 loops - self._experimental_run_tf_function = False - - if distribute is not None: - if tf.__internal__.tf2.enabled() or self._experimental_run_tf_function: - raise ValueError( - 'Distribute argument in compile is not available in TF 2.0 please ' - 'create the model under the distribution strategy scope.') - logging.warning('Distribute argument in compile is deprecated please ' - 'create the model under the distribution strategy scope.') - self._distribution_strategy = distribute - self._compile_distribution = True - else: - if tf.distribute.has_strategy(): - # When the user builds the model in the DS scope and cross replica - # context we want distribution strategy to be set but when building the - # replica copies of the models internally we should not be compiling - # with distribution strategy and use the default compilation path. - if tf.distribute.in_cross_replica_context(): - self._distribution_strategy = ( - tf.distribute.get_strategy()) - - if isinstance(self._distribution_strategy, - tf.compat.v1.distribute.experimental.ParameterServerStrategy): - raise NotImplementedError( - '`tf.compat.v1.distribute.experimental.ParameterServerStrategy` ' - 'currently only works with the tf.Estimator API') - - if isinstance(self._distribution_strategy, - tf.distribute.experimental.ParameterServerStrategy): - raise NotImplementedError( - '`tf.distribute.experimental.ParameterServerStrategy` is only ' - 'supported in TF2.') - - if not self._experimental_run_tf_function: - self._validate_compile_param_for_distribution_strategy(self.run_eagerly, - sample_weight_mode, - target_tensors, - weighted_metrics) - # We've disabled automatic dependency tracking for this method, but do want - # to add a checkpoint dependency on the optimizer if it's trackable. - if isinstance(self.optimizer, tf.__internal__.tracking.Trackable): - self._track_trackable( - self.optimizer, name='optimizer', overwrite=True) - self.loss = loss or {} - self.loss_weights = loss_weights - self.sample_weight_mode = sample_weight_mode - self._compile_metrics = metrics or [] - self._compile_weighted_metrics = weighted_metrics - if self.run_eagerly and target_tensors is not None: - raise ValueError( - 'target_tensors argument is not supported when ' - 'running a model eagerly.') - - # _training_endpoints contains a list of _TrainingEndpoint object, which has - # all the model output/target/loss and related metadata. - self._training_endpoints = [] - - # Used to freeze the behavior of the Model once `compile` has been called. - self._compiled_trainable_state = self._get_trainable_state() - - # Set tf.distribute.Strategy specific parameters. - self._distributed_model_cache = {} - self._distributed_function_cache = {} - - # Clear any `_eager_losses` that was added. - self._clear_losses() - - if (not tf.executing_eagerly() and - self._distribution_strategy is not None): - # Ensures a Session is created and configured correctly for Distribution - # Strategy. - backend.configure_and_create_distributed_session( - self._distribution_strategy) - # Initialize model metric attributes. - self._init_metric_attributes() - if not self.built or not self.inputs or not self.outputs: - # Model is not compilable because it does not know its number of inputs - # and outputs, nor their shapes and names. We will compile after the first - # time the model gets called on training data. - return - self._is_compiled = True - base_layer.keras_api_gauge.get_cell('compile').set(True) - - # Prepare list of loss functions, same size of model outputs. - self.loss_functions = training_utils_v1.prepare_loss_functions( - self.loss, self.output_names) - - target_tensors = self._process_target_tensor_for_compile(target_tensors) - - for o, n, l, t in zip(self.outputs, self.output_names, - self.loss_functions, target_tensors): - endpoint = _TrainingEndpoint(o, n, l) - endpoint.create_training_target(t, run_eagerly=self.run_eagerly) - self._training_endpoints.append(endpoint) - - # Prepare list loss weights, same size of model outputs. - training_utils_v1.prepare_loss_weights(self._training_endpoints, - loss_weights) - - # Initialization for Eager mode execution. - if self.run_eagerly: - self._compile_eagerly(metrics, weighted_metrics, sample_weight_mode) - return - - with backend.get_graph().as_default(): - # Save all metric attributes per output of the model. - self._cache_output_metric_attributes(metrics, weighted_metrics) - - # Set metric attributes on model. - self._set_metric_attributes() - - # Invoke metric functions (unweighted) for all the outputs. - self._handle_metrics( - self.outputs, - targets=self._targets, - skip_target_masks=self._prepare_skip_target_masks(), - masks=self._prepare_output_masks()) - - # Prepare sample weight modes. List with the same length as model outputs. - training_utils_v1.prepare_sample_weight_modes( - self._training_endpoints, sample_weight_mode) - - # Creates the model loss and weighted metrics sub-graphs. - self._compile_weights_loss_and_weighted_metrics() - - # Functions for train, test and predict will - # be compiled lazily when required. - # This saves time when the user is not using all functions. - self.train_function = None - self.test_function = None - self.predict_function = None - - # Collected trainable weights, sorted in topological order. - self._collected_trainable_weights = self.trainable_weights - - # Validate all variables were correctly created in distribution scope. - if self._distribution_strategy and not self._compile_distribution: - for v in self.variables: - strategy = self._distribution_strategy - if not strategy.extended.variable_created_in_scope(v): + def load_weights(self, filepath, by_name=False, skip_mismatch=False): + """Loads all layer weights, either from a TensorFlow or an HDF5 file. + + If `by_name` is False weights are loaded based on the network's + topology. This means the architecture should be the same as when the + weights were saved. Note that layers that don't have weights are not + taken into account in the topological ordering, so adding or removing + layers is fine as long as they don't have weights. + + If `by_name` is True, weights are loaded into layers only if they share + the same name. This is useful for fine-tuning or transfer-learning + models where some of the layers have changed. + + Only topological loading (`by_name=False`) is supported when loading + weights from the TensorFlow format. Note that topological loading + differs slightly between TensorFlow and HDF5 formats for user-defined + classes inheriting from `tf.keras.Model`: HDF5 loads based on a + flattened list of weights, while the TensorFlow format loads based on + the object-local names of attributes to which layers are assigned in the + `Model`'s constructor. + + Args: + filepath: String, path to the weights file to load. For weight files + in TensorFlow format, this is the file prefix (the same as was + passed to `save_weights`). + by_name: Boolean, whether to load weights by name or by topological + order. Only topological loading is supported for weight files in + TensorFlow format. + skip_mismatch: Boolean, whether to skip loading of layers where + there is a mismatch in the number of weights, or a mismatch in + the shape of the weight (only valid when `by_name=True`). + + Returns: + When loading a weight file in TensorFlow format, returns the same + status object as `tf.train.Checkpoint.restore`. When graph building, + restore ops are run automatically as soon as the network is built + (on first call for user-defined classes inheriting from `Model`, + immediately if it is already built). + + When loading weights in HDF5 format, returns `None`. + + Raises: + ImportError: If h5py is not available and the weight file is in HDF5 + format. + ValueError: If `skip_mismatch` is set to `True` when `by_name` is + `False`. + """ + if backend.is_tpu_strategy(self._distribution_strategy): + if self._distribution_strategy.extended.steps_per_run > 1 and ( + not saving_utils.is_hdf5_filepath(filepath) + ): + raise ValueError( + "Load weights is not yet supported with TPUStrategy " + "with steps_per_run greater than 1." + ) + return super().load_weights( + filepath, by_name=by_name, skip_mismatch=skip_mismatch + ) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def compile( + self, + optimizer="rmsprop", + loss=None, + metrics=None, + loss_weights=None, + sample_weight_mode=None, + weighted_metrics=None, + target_tensors=None, + distribute=None, + **kwargs, + ): + """Configures the model for training. + + Args: + optimizer: String (name of optimizer) or optimizer instance. + See `tf.keras.optimizers`. + loss: String (name of objective function), objective function or + `tf.keras.losses.Loss` instance. See `tf.keras.losses`. An + objective function is any callable with the signature + `scalar_loss = fn(y_true, y_pred)`. If the model has multiple + outputs, you can use a different loss on each output by passing + a dictionary or a list of losses. The loss value that will be + minimized by the model will then be the sum of all individual + losses. + metrics: List of metrics to be evaluated by the model during + training and testing. Typically you will use + `metrics=['accuracy']`. To specify different metrics for + different outputs of a multi-output model, you could also pass a + dictionary, such as `metrics={'output_a': 'accuracy', + 'output_b': ['accuracy', 'mse']}`. You can also pass a list + (len = len(outputs)) of lists of metrics such as + `metrics=[['accuracy'], ['accuracy', 'mse']]` or + `metrics=['accuracy', ['accuracy', 'mse']]`. + loss_weights: Optional list or dictionary specifying scalar + coefficients (Python floats) to weight the loss contributions + of different model outputs. + The loss value that will be minimized by the model + will then be the *weighted sum* of all individual losses, + weighted by the `loss_weights` coefficients. + If a list, it is expected to have a 1:1 mapping + to the model's outputs. If a tensor, it is expected to map + output names (strings) to scalar coefficients. + sample_weight_mode: If you need to do timestep-wise + sample weighting (2D weights), set this to `"temporal"`. + `None` becomes sample-wise weights (1D). + If the model has multiple outputs, you can use a different + `sample_weight_mode` on each output by passing a + dictionary or a list of modes. Defaults to `None`. + weighted_metrics: List of metrics to be evaluated and weighted + by sample_weight or class_weight during training and testing. + target_tensors: By default, Keras will create placeholders for the + model's target, which will be fed with the target data during + training. If instead you would like to use your own + target tensors (in turn, Keras will not expect external + Numpy data for these targets at training time), you + can specify them via the `target_tensors` argument. It can be + a single tensor (for a single-output model), a list of tensors, + or a dict mapping output names to target tensors. + distribute: NOT SUPPORTED IN TF 2.0, please create and compile the + model under distribution strategy scope instead of passing it to + compile. + **kwargs: Any additional arguments. + + Raises: + ValueError: In case of invalid arguments for + `optimizer`, `loss`, `metrics` or `sample_weight_mode`. + """ + self._assert_built_as_v1() + self._run_eagerly = kwargs.pop("run_eagerly", None) + self._experimental_run_tf_function = kwargs.pop( + "experimental_run_tf_function", True + ) + self._v1_compile_was_called = True + + # Prepare Session arguments (legacy). + kwargs.pop("cloning", None) # Legacy DistStrat argument, never used. + self._from_serialized = kwargs.pop("from_serialized", False) + allowed_kwargs = {"feed_dict", "fetches", "options", "run_metadata"} + unknown_kwargs = set(kwargs.keys()) - allowed_kwargs + if unknown_kwargs: + raise TypeError( + f"Invalid keyword argument(s) in `compile`: {unknown_kwargs}" + ) + self._function_kwargs = kwargs + if self._function_kwargs: + self._experimental_run_tf_function = False + if self.run_eagerly: + raise ValueError( + "Session keyword arguments are not supported " + "when `run_eagerly=True`. You passed the following " + "Session arguments: %s" % (self._function_kwargs,) + ) + + self._set_optimizer(optimizer) + is_any_keras_optimizer_v1 = any( + ( + isinstance(opt, optimizer_v1.Optimizer) + and not isinstance(opt, optimizer_v1.TFOptimizer) + ) + for opt in tf.nest.flatten(self.optimizer) + ) + + if ( + is_any_keras_optimizer_v1 + and tf.compat.v1.executing_eagerly_outside_functions() + ): raise ValueError( - 'Variable (%s) was not created in the distribution strategy ' - 'scope of (%s). It is most likely due to not all layers or ' - 'the model or optimizer being created outside the distribution ' - 'strategy scope. Try to make sure your code looks similar ' - 'to the following.\n' - 'with strategy.scope():\n' - ' model=_create_model()\n' - ' model.compile(...)'% (v, strategy)) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _init_distributed_function_cache_if_not_compiled(self): - if not hasattr(self, '_distributed_function_cache'): - self._distributed_function_cache = {} - - @property - def metrics(self): - """Returns the model's metrics added using `compile`, `add_metric` APIs.""" - metrics = [] - if self._is_compiled: - if not hasattr(self, '_v1_compile_was_called'): - # See b/155687393 for more details, the model is created as a v2 - # instance but converted to v1. Fallback to use base Model to retrieve - # the metrics. - return super().metrics - metrics += self._compile_metric_functions - metrics.extend(self._metrics) - metrics.extend( - _get_metrics_from_layers( - list(self._flatten_layers(include_self=False, recursive=False)))) - return metrics - - @property - def metrics_names(self): - """Returns the model's display labels for all outputs.""" - - # This property includes all output names including `loss` and per-output - # losses for backward compatibility. - metrics_names = ['loss'] - if self._is_compiled: - if not hasattr(self, '_v1_compile_was_called'): - # See b/155687393 for more details, the model is created as a v2 - # instance but converted to v1. Fallback to use base Model to retrieve - # the metrics name - return super().metrics_names - - # Add output loss metric names to the metric names list. - if len(self._training_endpoints) > 1: - metrics_names.extend([ - e.loss_name() - for e in self._training_endpoints - if not e.should_skip_target() - ]) - - # Add all metric names. - metrics_names += [m.name for m in self.metrics] - return metrics_names - - @property - def run_eagerly(self): - """Settable attribute indicating whether the model should run eagerly. - - Running eagerly means that your model will be run step by step, - like Python code. Your model might run slower, but it should become easier - for you to debug it by stepping into individual layer calls. - - By default, we will attempt to compile your model to a static graph to - deliver the best execution performance. - - Returns: - Boolean, whether the model should run eagerly. - """ - if self._run_eagerly is True and not tf.executing_eagerly(): - raise ValueError('You can only set `run_eagerly=True` if eager execution ' - 'is enabled.') - if not self.dynamic: - if self._run_eagerly is None: - # Respect `tf.config.run_functions_eagerly` unless - # `run_eagerly` was explicitly passed to `compile`. - return tf.config.functions_run_eagerly() - else: - return self._run_eagerly - else: - if not tf.executing_eagerly(): - raise ValueError('Your model contains layers that can only be ' - 'successfully run in eager execution (layers ' - 'constructed with `dynamic=True`). ' - 'You must enable eager execution with ' - '`tf.enable_eager_execution()`.') - if self._run_eagerly is False: - # TODO(fchollet): consider using py_func to enable this. - raise ValueError('Your model contains layers that can only be ' - 'successfully run in eager execution (layers ' - 'constructed with `dynamic=True`). ' - 'You cannot set `run_eagerly=False`.') - return tf.executing_eagerly() - - @run_eagerly.setter - def run_eagerly(self, value): - self._run_eagerly = value - - def _select_training_loop(self, inputs): - """Select training loop for fit/eval/predict based on the inputs.""" - # TODO(kaftan) or TODO(scottzhu): This check should eventually be nicely - # integrated into the data adapters in the v2 loop. We can't do this yet - # because we currently have to fall back for unhandled data types. - if isinstance(inputs, (tf.compat.v1.data.Iterator, - tf.data.Iterator)): - raise ValueError('For performance reasons Keras `fit`, `evaluate` and' - '`predict` accept tf.data `Datasets` as input but not ' - 'iterators that have been manually generated from ' - 'Datasets by users. Please directly pass in the ' - 'original `Dataset` object instead of passing in ' - '`iter(dataset)`.') - - # Case 1: distribution strategy. - if self._distribution_strategy: - if self._in_multi_worker_mode(): - return training_distributed_v1.DistributionMultiWorkerTrainingLoop( - training_distributed_v1.DistributionSingleWorkerTrainingLoop()) - else: - return training_distributed_v1.DistributionSingleWorkerTrainingLoop() - - # Case 2: generator-like. Input is Python generator, or Sequence object, - # or a non-distributed Dataset or iterator in eager execution. - if data_utils.is_generator_or_sequence(inputs): - return training_generator_v1.GeneratorOrSequenceTrainingLoop() - if training_utils_v1.is_eager_dataset_or_iterator(inputs): - return training_generator_v1.EagerDatasetOrIteratorTrainingLoop() - - # Case 3: Symbolic tensors or Numpy array-like. - # This includes Datasets and iterators in graph mode (since they - # generate symbolic tensors). - if self.run_eagerly: - return training_generator_v1.GeneratorLikeTrainingLoop() - else: - return training_arrays_v1.ArrayLikeTrainingLoop() - - def fit(self, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - **kwargs): - """Trains the model for a fixed number of epochs (iterations on a dataset). - - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - - A `tf.data` dataset. Should return a tuple - of either `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - - A generator or `keras.utils.Sequence` returning `(inputs, targets)` - or `(inputs, targets, sample weights)`. - y: Target data. Like the input data `x`, - it could be either Numpy array(s) or TensorFlow tensor(s). - It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset, generator, - or `keras.utils.Sequence` instance, `y` should - not be specified (since targets will be obtained from `x`). - batch_size: Integer or `None`. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of symbolic tensors, datasets, - generators, or `keras.utils.Sequence` instances (since they generate - batches). - epochs: Integer. Number of epochs to train the model. - An epoch is an iteration over the entire `x` and `y` - data provided. - Note that in conjunction with `initial_epoch`, - `epochs` is to be understood as "final epoch". - The model is not trained for a number of iterations - given by `epochs`, but merely until the epoch - of index `epochs` is reached. - verbose: 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - Note that the progress bar is not particularly useful when - logged to a file, so verbose=2 is recommended when not running - interactively (eg, in a production environment). - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - See `tf.keras.callbacks`. - validation_split: Float between 0 and 1. - Fraction of the training data to be used as validation data. - The model will set apart this fraction of the training data, - will not train on it, and will evaluate - the loss and any model metrics - on this data at the end of each epoch. - The validation data is selected from the last samples - in the `x` and `y` data provided, before shuffling. This argument is - not supported when `x` is a dataset, generator or - `keras.utils.Sequence` instance. - validation_data: Data on which to evaluate - the loss and any model metrics at the end of each epoch. - The model will not be trained on this data. - `validation_data` will override `validation_split`. - `validation_data` could be: - - tuple `(x_val, y_val)` of Numpy arrays or tensors - - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - - dataset - For the first two cases, `batch_size` must be provided. - For the last case, `validation_steps` could be provided. - shuffle: Boolean (whether to shuffle the training data - before each epoch) or str (for 'batch'). - 'batch' is a special option for dealing with the - limitations of HDF5 data; it shuffles in batch-sized chunks. - Has no effect when `steps_per_epoch` is not `None`. - class_weight: Optional dictionary mapping class indices (integers) - to a weight (float) value, used for weighting the loss function - (during training only). - This can be useful to tell the model to - "pay more attention" to samples from - an under-represented class. - sample_weight: Optional Numpy array of weights for - the training samples, used for weighting the loss function - (during training only). You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), - or in the case of temporal data, - you can pass a 2D array with shape - `(samples, sequence_length)`, - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - `sample_weight_mode="temporal"` in `compile()`. This argument is not - supported when `x` is a dataset, generator, or - `keras.utils.Sequence` instance, instead provide the sample_weights - as the third element of `x`. - initial_epoch: Integer. - Epoch at which to start training - (useful for resuming a previous training run). - steps_per_epoch: Integer or `None`. - Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps_per_epoch' - is None, the epoch will run until the input dataset is exhausted. - This argument is not supported with array inputs. - validation_steps: Only relevant if `validation_data` is provided and - is a `tf.data` dataset. Total number of steps (batches of - samples) to draw before stopping when performing validation - at the end of every epoch. If 'validation_steps' is None, validation - will run until the `validation_data` dataset is exhausted. In the - case of a infinite dataset, it will run into a infinite loop. - If 'validation_steps' is specified and only part of the dataset - will be consumed, the evaluation will start from the beginning of - the dataset at each epoch. This ensures that the same validation - samples are used every time. - validation_freq: Only relevant if validation data is provided. Integer - or `collections.abc.Container` instance (e.g. list, tuple, etc.). - If an integer, specifies how many training epochs to run before a - new validation run is performed, e.g. `validation_freq=2` runs - validation every 2 epochs. If a Container, specifies the epochs on - which to run validation, e.g. `validation_freq=[1, 2, 10]` runs - validation at the end of the 1st, 2nd, and 10th epochs. - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up - when using process-based threading. If unspecified, `workers` - will default to 1. If 0, will execute the generator on the main - thread. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - **kwargs: Used for backwards compatibility. + "`tf.compat.v1.keras` Optimizer (", + optimizer, + ") is " + "not supported when eager execution is enabled. Use a " + "`tf.keras` Optimizer instead, or disable eager " + "execution.", + ) + + if ( + target_tensors is not None + ) or not tf.compat.v1.executing_eagerly_outside_functions(): + # Fallback out of things that aren't supported with v2 loops + self._experimental_run_tf_function = False + + if distribute is not None: + if ( + tf.__internal__.tf2.enabled() + or self._experimental_run_tf_function + ): + raise ValueError( + "Distribute argument in compile is not available in TF 2.0 " + "please create the model under the distribution strategy " + "scope." + ) + logging.warning( + "Distribute argument in compile is deprecated please " + "create the model under the distribution strategy scope." + ) + self._distribution_strategy = distribute + self._compile_distribution = True + else: + if tf.distribute.has_strategy(): + # When the user builds the model in the DS scope and cross + # replica context we want distribution strategy to be set but + # when building the replica copies of the models internally we + # should not be compiling with distribution strategy and use the + # default compilation path. + if tf.distribute.in_cross_replica_context(): + self._distribution_strategy = tf.distribute.get_strategy() + + if isinstance( + self._distribution_strategy, + tf.compat.v1.distribute.experimental.ParameterServerStrategy, + ): + raise NotImplementedError( + "`tf.compat.v1.distribute.experimental.ParameterServerStrategy`" + " currently only works with the tf.Estimator API" + ) + + if isinstance( + self._distribution_strategy, + tf.distribute.experimental.ParameterServerStrategy, + ): + raise NotImplementedError( + "`tf.distribute.experimental.ParameterServerStrategy` is only " + "supported in TF2." + ) + + if not self._experimental_run_tf_function: + self._validate_compile_param_for_distribution_strategy( + self.run_eagerly, + sample_weight_mode, + target_tensors, + weighted_metrics, + ) + # We've disabled automatic dependency tracking for this method, but do + # want to add a checkpoint dependency on the optimizer if it's + # trackable. + if isinstance(self.optimizer, tf.__internal__.tracking.Trackable): + self._track_trackable( + self.optimizer, name="optimizer", overwrite=True + ) + self.loss = loss or {} + self.loss_weights = loss_weights + self.sample_weight_mode = sample_weight_mode + self._compile_metrics = metrics or [] + self._compile_weighted_metrics = weighted_metrics + if self.run_eagerly and target_tensors is not None: + raise ValueError( + "target_tensors argument is not supported when " + "running a model eagerly." + ) + + # _training_endpoints contains a list of _TrainingEndpoint object, which + # has all the model output/target/loss and related metadata. + self._training_endpoints = [] + + # Used to freeze the behavior of the Model once `compile` has been + # called. + self._compiled_trainable_state = self._get_trainable_state() + + # Set tf.distribute.Strategy specific parameters. + self._distributed_model_cache = {} + self._distributed_function_cache = {} + + # Clear any `_eager_losses` that was added. + self._clear_losses() + + if ( + not tf.executing_eagerly() + and self._distribution_strategy is not None + ): + # Ensures a Session is created and configured correctly for + # Distribution Strategy. + backend.configure_and_create_distributed_session( + self._distribution_strategy + ) + # Initialize model metric attributes. + self._init_metric_attributes() + if not self.built or not self.inputs or not self.outputs: + # Model is not compilable because it does not know its number of + # inputs and outputs, nor their shapes and names. We will compile + # after the first time the model gets called on training data. + return + self._is_compiled = True + base_layer.keras_api_gauge.get_cell("compile").set(True) + + # Prepare list of loss functions, same size of model outputs. + self.loss_functions = training_utils_v1.prepare_loss_functions( + self.loss, self.output_names + ) + + target_tensors = self._process_target_tensor_for_compile(target_tensors) + + for o, n, l, t in zip( + self.outputs, self.output_names, self.loss_functions, target_tensors + ): + endpoint = _TrainingEndpoint(o, n, l) + endpoint.create_training_target(t, run_eagerly=self.run_eagerly) + self._training_endpoints.append(endpoint) + + # Prepare list loss weights, same size of model outputs. + training_utils_v1.prepare_loss_weights( + self._training_endpoints, loss_weights + ) + + # Initialization for Eager mode execution. + if self.run_eagerly: + self._compile_eagerly(metrics, weighted_metrics, sample_weight_mode) + return + + with backend.get_graph().as_default(): + # Save all metric attributes per output of the model. + self._cache_output_metric_attributes(metrics, weighted_metrics) + + # Set metric attributes on model. + self._set_metric_attributes() + + # Invoke metric functions (unweighted) for all the outputs. + self._handle_metrics( + self.outputs, + targets=self._targets, + skip_target_masks=self._prepare_skip_target_masks(), + masks=self._prepare_output_masks(), + ) + + # Prepare sample weight modes. List with the same length as model + # outputs. + training_utils_v1.prepare_sample_weight_modes( + self._training_endpoints, sample_weight_mode + ) + + # Creates the model loss and weighted metrics sub-graphs. + self._compile_weights_loss_and_weighted_metrics() + + # Functions for train, test and predict will + # be compiled lazily when required. + # This saves time when the user is not using all functions. + self.train_function = None + self.test_function = None + self.predict_function = None + + # Collected trainable weights, sorted in topological order. + self._collected_trainable_weights = self.trainable_weights + + # Validate all variables were correctly created in distribution + # scope. + if self._distribution_strategy and not self._compile_distribution: + for v in self.variables: + strategy = self._distribution_strategy + if not strategy.extended.variable_created_in_scope(v): + raise ValueError( + "Variable (%s) was not created in the distribution " + "strategy scope of (%s). It is most likely due to " + "not all layers or the model or optimizer being " + "created outside the distribution strategy scope. " + "Try to make sure your code looks similar " + "to the following.\n" + "with strategy.scope():\n" + " model=_create_model()\n" + " model.compile(...)" % (v, strategy) + ) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _init_distributed_function_cache_if_not_compiled(self): + if not hasattr(self, "_distributed_function_cache"): + self._distributed_function_cache = {} + + @property + def metrics(self): + """Returns the model's metrics added using `compile`, `add_metric` + APIs.""" + metrics = [] + if self._is_compiled: + if not hasattr(self, "_v1_compile_was_called"): + # See b/155687393 for more details, the model is created as a v2 + # instance but converted to v1. Fallback to use base Model to + # retrieve the metrics. + return super().metrics + metrics += self._compile_metric_functions + metrics.extend(self._metrics) + metrics.extend( + _get_metrics_from_layers( + list(self._flatten_layers(include_self=False, recursive=False)) + ) + ) + return metrics + + @property + def metrics_names(self): + """Returns the model's display labels for all outputs.""" + + # This property includes all output names including `loss` and + # per-output losses for backward compatibility. + metrics_names = ["loss"] + if self._is_compiled: + if not hasattr(self, "_v1_compile_was_called"): + # See b/155687393 for more details, the model is created as a v2 + # instance but converted to v1. Fallback to use base Model to + # retrieve the metrics name + return super().metrics_names + + # Add output loss metric names to the metric names list. + if len(self._training_endpoints) > 1: + metrics_names.extend( + [ + e.loss_name() + for e in self._training_endpoints + if not e.should_skip_target() + ] + ) + + # Add all metric names. + metrics_names += [m.name for m in self.metrics] + return metrics_names + + @property + def run_eagerly(self): + """Settable attribute indicating whether the model should run eagerly. + + Running eagerly means that your model will be run step by step, + like Python code. Your model might run slower, but it should become + easier for you to debug it by stepping into individual layer calls. + + By default, we will attempt to compile your model to a static graph to + deliver the best execution performance. + + Returns: + Boolean, whether the model should run eagerly. + """ + if self._run_eagerly is True and not tf.executing_eagerly(): + raise ValueError( + "You can only set `run_eagerly=True` if eager execution " + "is enabled." + ) + if not self.dynamic: + if self._run_eagerly is None: + # Respect `tf.config.run_functions_eagerly` unless + # `run_eagerly` was explicitly passed to `compile`. + return tf.config.functions_run_eagerly() + else: + return self._run_eagerly + else: + if not tf.executing_eagerly(): + raise ValueError( + "Your model contains layers that can only be " + "successfully run in eager execution (layers " + "constructed with `dynamic=True`). " + "You must enable eager execution with " + "`tf.enable_eager_execution()`." + ) + if self._run_eagerly is False: + # TODO(fchollet): consider using py_func to enable this. + raise ValueError( + "Your model contains layers that can only be " + "successfully run in eager execution (layers " + "constructed with `dynamic=True`). " + "You cannot set `run_eagerly=False`." + ) + return tf.executing_eagerly() + + @run_eagerly.setter + def run_eagerly(self, value): + self._run_eagerly = value + + def _select_training_loop(self, inputs): + """Select training loop for fit/eval/predict based on the inputs.""" + # TODO(kaftan) or TODO(scottzhu): This check should eventually be nicely + # integrated into the data adapters in the v2 loop. We can't do this yet + # because we currently have to fall back for unhandled data types. + if isinstance(inputs, (tf.compat.v1.data.Iterator, tf.data.Iterator)): + raise ValueError( + "For performance reasons Keras `fit`, `evaluate` and" + "`predict` accept tf.data `Datasets` as input but not " + "iterators that have been manually generated from " + "Datasets by users. Please directly pass in the " + "original `Dataset` object instead of passing in " + "`iter(dataset)`." + ) + + # Case 1: distribution strategy. + if self._distribution_strategy: + if self._in_multi_worker_mode(): + return training_distributed_v1.DistributionMultiWorkerTrainingLoop( # noqa: E501 + training_distributed_v1.DistributionSingleWorkerTrainingLoop() # noqa: E501 + ) + else: + return ( + training_distributed_v1.DistributionSingleWorkerTrainingLoop() # noqa: E501 + ) + + # Case 2: generator-like. Input is Python generator, or Sequence object, + # or a non-distributed Dataset or iterator in eager execution. + if data_utils.is_generator_or_sequence(inputs): + return training_generator_v1.GeneratorOrSequenceTrainingLoop() + if training_utils_v1.is_eager_dataset_or_iterator(inputs): + return training_generator_v1.EagerDatasetOrIteratorTrainingLoop() + + # Case 3: Symbolic tensors or Numpy array-like. + # This includes Datasets and iterators in graph mode (since they + # generate symbolic tensors). + if self.run_eagerly: + return training_generator_v1.GeneratorLikeTrainingLoop() + else: + return training_arrays_v1.ArrayLikeTrainingLoop() - Returns: - A `History` object. Its `History.history` attribute is - a record of training loss values and metrics values - at successive epochs, as well as validation loss values - and validation metrics values (if applicable). - - Raises: - RuntimeError: If the model was never compiled. - ValueError: In case of mismatch between the provided input data - and what the model expects. - """ - self._assert_built_as_v1() - base_layer.keras_api_gauge.get_cell('fit').set(True) - # Legacy support - if 'nb_epoch' in kwargs: - logging.warning( - 'The `nb_epoch` argument in `fit` has been renamed `epochs`.') - epochs = kwargs.pop('nb_epoch') - if kwargs: - raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) - self._assert_compile_was_called() - self._check_call_args('fit') - - func = self._select_training_loop(x) - return func.fit( + def fit( self, - x=x, - y=y, - batch_size=batch_size, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_split=validation_split, - validation_data=validation_data, - shuffle=shuffle, - class_weight=class_weight, - sample_weight=sample_weight, - initial_epoch=initial_epoch, - steps_per_epoch=steps_per_epoch, - validation_steps=validation_steps, - validation_freq=validation_freq, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - def evaluate(self, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - """Returns the loss value & metrics values for the model in test mode. - - Computation is done in batches (see the `batch_size` arg.) - - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - - A `tf.data` dataset. - - A generator or `keras.utils.Sequence` instance. - y: Target data. Like the input data `x`, - it could be either Numpy array(s) or TensorFlow tensor(s). - It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). - If `x` is a dataset, generator or - `keras.utils.Sequence` instance, `y` should not be specified (since - targets will be obtained from the iterator/dataset). - batch_size: Integer or `None`. - Number of samples per batch of computation. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of symbolic tensors, dataset, - generators, or `keras.utils.Sequence` instances (since they generate - batches). - verbose: 0 or 1. Verbosity mode. - 0 = silent, 1 = progress bar. - sample_weight: Optional Numpy array of weights for - the test samples, used for weighting the loss function. - You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), - or in the case of temporal data, - you can pass a 2D array with shape - `(samples, sequence_length)`, - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - `sample_weight_mode="temporal"` in `compile()`. This argument is not - supported when `x` is a dataset, instead pass - sample weights as the third element of `x`. - steps: Integer or `None`. - Total number of steps (batches of samples) - before declaring the evaluation round finished. - Ignored with the default value of `None`. - If x is a `tf.data` dataset and `steps` is - None, 'evaluate' will run until the dataset is exhausted. - This argument is not supported with array inputs. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during evaluation. - See [callbacks](/api_docs/python/tf/keras/callbacks). - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default - to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - - Returns: - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - ValueError: in case of invalid arguments. - """ - self._assert_built_as_v1() - base_layer.keras_api_gauge.get_cell('evaluate').set(True) - self._assert_compile_was_called() - self._check_call_args('evaluate') - - func = self._select_training_loop(x) - return func.evaluate( + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_freq=1, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + **kwargs, + ): + """Trains the model for a fixed number of epochs (dataset iterations). + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset. Should return a tuple + of either `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + - A generator or `keras.utils.Sequence` returning `(inputs, + targets)` or `(inputs, targets, sample weights)`. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset, generator, + or `keras.utils.Sequence` instance, `y` should + not be specified (since targets will be obtained from `x`). + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of symbolic tensors, datasets, + generators, or `keras.utils.Sequence` instances (since they + generate batches). + epochs: Integer. Number of epochs to train the model. + An epoch is an iteration over the entire `x` and `y` + data provided. + Note that in conjunction with `initial_epoch`, + `epochs` is to be understood as "final epoch". + The model is not trained for a number of iterations + given by `epochs`, but merely until the epoch + of index `epochs` is reached. + verbose: 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + Note that the progress bar is not particularly useful when + logged to a file, so verbose=2 is recommended when not running + interactively (eg, in a production environment). + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during training. + See `tf.keras.callbacks`. + validation_split: Float between 0 and 1. + Fraction of the training data to be used as validation data. + The model will set apart this fraction of the training data, + will not train on it, and will evaluate + the loss and any model metrics + on this data at the end of each epoch. + The validation data is selected from the last samples + in the `x` and `y` data provided, before shuffling. This + argument is not supported when `x` is a dataset, generator or + `keras.utils.Sequence` instance. + validation_data: Data on which to evaluate + the loss and any model metrics at the end of each epoch. + The model will not be trained on this data. + `validation_data` will override `validation_split`. + `validation_data` could be: + - tuple `(x_val, y_val)` of Numpy arrays or tensors + - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays + - dataset + For the first two cases, `batch_size` must be provided. + For the last case, `validation_steps` could be provided. + shuffle: Boolean (whether to shuffle the training data + before each epoch) or str (for 'batch'). + 'batch' is a special option for dealing with the + limitations of HDF5 data; it shuffles in batch-sized chunks. + Has no effect when `steps_per_epoch` is not `None`. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) value, used for weighting the loss function + (during training only). + This can be useful to tell the model to + "pay more attention" to samples from + an under-represented class. + sample_weight: Optional Numpy array of weights for + the training samples, used for weighting the loss function + (during training only). You can either pass a flat (1D) + Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, + to apply a different weight to every timestep of every sample. + In this case you should make sure to specify + `sample_weight_mode="temporal"` in `compile()`. This argument is + not supported when `x` is a dataset, generator, or + `keras.utils.Sequence` instance, instead provide the + sample_weights as the third element of `x`. + initial_epoch: Integer. + Epoch at which to start training + (useful for resuming a previous training run). + steps_per_epoch: Integer or `None`. + Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps_per_epoch' + is None, the epoch will run until the input dataset is + exhausted. This argument is not supported with array inputs. + validation_steps: Only relevant if `validation_data` is provided and + is a `tf.data` dataset. Total number of steps (batches of + samples) to draw before stopping when performing validation at + the end of every epoch. If 'validation_steps' is None, + validation will run until the `validation_data` dataset is + exhausted. In the case of a infinite dataset, it will run into a + infinite loop. If 'validation_steps' is specified and only part + of the dataset will be consumed, the evaluation will start from + the beginning of the dataset at each epoch. This ensures that + the same validation samples are used every time. + validation_freq: Only relevant if validation data is provided. + Integer or `collections.abc.Container` instance (e.g. list, + tuple, etc.). If an integer, specifies how many training epochs + to run before a new validation run is performed, e.g. + `validation_freq=2` runs validation every 2 epochs. If a + Container, specifies the epochs on which to run validation, e.g. + `validation_freq=[1, 2, 10]` runs validation at the end of the + 1st, 2nd, and 10th epochs. + max_queue_size: Integer. Used for generator or + `keras.utils.Sequence` input only. Maximum size for the + generator queue. If unspecified, `max_queue_size` will default + to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up + when using process-based threading. If unspecified, `workers` + will default to 1. If 0, will execute the generator on the main + thread. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-pickleable arguments to + the generator as they can't be passed easily to children + processes. + **kwargs: Used for backwards compatibility. + + Returns: + A `History` object. Its `History.history` attribute is + a record of training loss values and metrics values + at successive epochs, as well as validation loss values + and validation metrics values (if applicable). + + Raises: + RuntimeError: If the model was never compiled. + ValueError: In case of mismatch between the provided input data + and what the model expects. + """ + self._assert_built_as_v1() + base_layer.keras_api_gauge.get_cell("fit").set(True) + # Legacy support + if "nb_epoch" in kwargs: + logging.warning( + "The `nb_epoch` argument in `fit` has been renamed `epochs`." + ) + epochs = kwargs.pop("nb_epoch") + if kwargs: + raise TypeError("Unrecognized keyword arguments: " + str(kwargs)) + self._assert_compile_was_called() + self._check_call_args("fit") + + func = self._select_training_loop(x) + return func.fit( + self, + x=x, + y=y, + batch_size=batch_size, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_split=validation_split, + validation_data=validation_data, + shuffle=shuffle, + class_weight=class_weight, + sample_weight=sample_weight, + initial_epoch=initial_epoch, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps, + validation_freq=validation_freq, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + ) + + def evaluate( self, - x=x, - y=y, - batch_size=batch_size, - verbose=verbose, - sample_weight=sample_weight, - steps=steps, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - def predict(self, - x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - """Generates output predictions for the input samples. - - Computation is done in batches (see the `batch_size` arg.) - - Args: - x: Input samples. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A `tf.data` dataset. - - A generator or `keras.utils.Sequence` instance. - batch_size: Integer or `None`. - Number of samples per batch of computation. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of symbolic tensors, dataset, - generators, or `keras.utils.Sequence` instances (since they generate - batches). - verbose: Verbosity mode, 0 or 1. - steps: Total number of steps (batches of samples) - before declaring the prediction round finished. - Ignored with the default value of `None`. If x is a `tf.data` - dataset and `steps` is None, `predict` will - run until the input dataset is exhausted. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during prediction. - See [callbacks](/api_docs/python/tf/keras/callbacks). - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default - to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - + x=None, + y=None, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + """Returns the loss value & metrics values for the model in test mode. + + Computation is done in batches (see the `batch_size` arg.) + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset. + - A generator or `keras.utils.Sequence` instance. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). + If `x` is a dataset, generator or + `keras.utils.Sequence` instance, `y` should not be specified + (since targets will be obtained from the iterator/dataset). + batch_size: Integer or `None`. + Number of samples per batch of computation. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of symbolic tensors, dataset, + generators, or `keras.utils.Sequence` instances (since they + generate batches). + verbose: 0 or 1. Verbosity mode. + 0 = silent, 1 = progress bar. + sample_weight: Optional Numpy array of weights for + the test samples, used for weighting the loss function. + You can either pass a flat (1D) + Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, + to apply a different weight to every timestep of every sample. + In this case you should make sure to specify + `sample_weight_mode="temporal"` in `compile()`. This argument is + not supported when `x` is a dataset, instead pass sample weights + as the third element of `x`. + steps: Integer or `None`. + Total number of steps (batches of samples) + before declaring the evaluation round finished. + Ignored with the default value of `None`. + If x is a `tf.data` dataset and `steps` is + None, 'evaluate' will run until the dataset is exhausted. + This argument is not supported with array inputs. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during evaluation. + See [callbacks](/api_docs/python/tf/keras/callbacks). + max_queue_size: Integer. Used for generator or + `keras.utils.Sequence` input only. Maximum size for the + generator queue. If unspecified, `max_queue_size` will default + to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default + to 1. If 0, will execute the generator on the main thread. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-pickleable arguments to + the generator as they can't be passed easily to children + processes. + + Returns: + Scalar test loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + ValueError: in case of invalid arguments. + """ + self._assert_built_as_v1() + base_layer.keras_api_gauge.get_cell("evaluate").set(True) + self._assert_compile_was_called() + self._check_call_args("evaluate") + + func = self._select_training_loop(x) + return func.evaluate( + self, + x=x, + y=y, + batch_size=batch_size, + verbose=verbose, + sample_weight=sample_weight, + steps=steps, + callbacks=callbacks, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + ) + + def predict( + self, + x, + batch_size=None, + verbose=0, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + ): + """Generates output predictions for the input samples. + + Computation is done in batches (see the `batch_size` arg.) + + Args: + x: Input samples. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A `tf.data` dataset. + - A generator or `keras.utils.Sequence` instance. + batch_size: Integer or `None`. + Number of samples per batch of computation. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of symbolic tensors, dataset, + generators, or `keras.utils.Sequence` instances (since they + generate batches). + verbose: Verbosity mode, 0 or 1. + steps: Total number of steps (batches of samples) + before declaring the prediction round finished. + Ignored with the default value of `None`. If x is a `tf.data` + dataset and `steps` is None, `predict` will + run until the input dataset is exhausted. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during prediction. + See [callbacks](/api_docs/python/tf/keras/callbacks). + max_queue_size: Integer. Used for generator or + `keras.utils.Sequence` input only. Maximum size for the + generator queue. If unspecified, `max_queue_size` will default + to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default + to 1. If 0, will execute the generator on the main thread. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-pickleable arguments to + the generator as they can't be passed easily to children + processes. + + + Returns: + Numpy array(s) of predictions. + + Raises: + ValueError: In case of mismatch between the provided + input data and the model's expectations, + or in case a stateful model receives a number of samples + that is not a multiple of the batch size. + """ + self._assert_built_as_v1() + base_layer.keras_api_gauge.get_cell("predict").set(True) + self._check_call_args("predict") + + func = self._select_training_loop(x) + return func.predict( + self, + x=x, + batch_size=batch_size, + verbose=verbose, + steps=steps, + callbacks=callbacks, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + ) + + def reset_metrics(self): + """Resets the state of metrics.""" + metrics = self._get_training_eval_metrics() + for m in metrics: + m.reset_state() - Returns: - Numpy array(s) of predictions. + # Reset metrics on all the distributed (cloned) models. + if self._distribution_strategy: + distributed_training_utils_v1._reset_metrics(self) - Raises: - ValueError: In case of mismatch between the provided - input data and the model's expectations, - or in case a stateful model receives a number of samples - that is not a multiple of the batch size. - """ - self._assert_built_as_v1() - base_layer.keras_api_gauge.get_cell('predict').set(True) - self._check_call_args('predict') + def train_on_batch( + self, + x, + y=None, + sample_weight=None, + class_weight=None, + reset_metrics=True, + ): + """Runs a single gradient update on a single batch of data. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset. + y: Target data. Like the input data `x`, it could be either Numpy + array(s) or TensorFlow tensor(s). It should be consistent with `x` + (you cannot have Numpy inputs and tensor targets, or inversely). + If `x` is a dataset, `y` should not be specified + (since targets will be obtained from the iterator). + sample_weight: Optional array of the same length as x, containing + weights to apply to the model's loss for each sample. In the case + of temporal data, you can pass a 2D array with shape (samples, + sequence_length), to apply a different weight to every timestep of + every sample. In this case you should make sure to specify + sample_weight_mode="temporal" in compile(). This argument is not + supported when `x` is a dataset. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) to apply to the model's loss for the samples + from this class during training. This can be useful to tell the + model to "pay more attention" to samples from an under-represented + class. + reset_metrics: If `True`, the metrics returned will be only for this + batch. If `False`, the metrics will be statefully accumulated + across batches. + + Returns: + Scalar training loss + (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + ValueError: In case of invalid user-provided arguments. + """ + self._assert_compile_was_called() + self._check_call_args("train_on_batch") + + # If at this point we are in the replica context, then it is okay to + # execute the Eager code path. The expected way to get here is to call + # `fit` that calls `train_on_batch` on each replica. + if ( + self._distribution_strategy + and tf.distribute.in_cross_replica_context() + ): + raise NotImplementedError( + "`train_on_batch` is not supported for models " + "distributed with tf.distribute.Strategy." + ) + # Validate and standardize user data. + x, y, sample_weights = self._standardize_user_data( + x, + y, + sample_weight=sample_weight, + class_weight=class_weight, + extract_tensors_from_dataset=True, + ) + + # If `self._distribution_strategy` is True, then we are in a replica + # context at this point because of the check above. `train_on_batch` is + # being run for each replica by `self._distribution_strategy` and the + # same code path as Eager is expected to be taken. + if self.run_eagerly or self._distribution_strategy: + output_dict = training_eager_v1.train_on_batch( + self, + x, + y, + sample_weights=sample_weights, + output_loss_metrics=self._output_loss_metrics, + ) + outputs = ( + output_dict["total_loss"] + + output_dict["output_losses"] + + output_dict["metrics"] + ) + outputs = [_non_none_constant_value(v) for v in outputs] + else: + x = training_utils_v1.ModelInputs(x).as_list() + ins = x + list(y or []) + list(sample_weights or []) + + if not isinstance(backend.symbolic_learning_phase(), int): + ins += [True] # Add learning phase value. + + self._update_sample_weight_modes(sample_weights=sample_weights) + self._make_train_function() + outputs = self.train_function(ins) + + if reset_metrics: + self.reset_metrics() + + if len(outputs) == 1: + return outputs[0] + return outputs + + def test_on_batch(self, x, y=None, sample_weight=None, reset_metrics=True): + """Test the model on a single batch of samples. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset `y` should + not be specified (since targets will be obtained from the + iterator). + sample_weight: Optional array of the same length as x, containing + weights to apply to the model's loss for each sample. + In the case of temporal data, you can pass a 2D array + with shape (samples, sequence_length), + to apply a different weight to every timestep of every sample. + In this case you should make sure to specify + sample_weight_mode="temporal" in compile(). This argument is not + supported when `x` is a dataset. + reset_metrics: If `True`, the metrics returned will be only for this + batch. If `False`, the metrics will be statefully accumulated + across batches. + + Returns: + Scalar test loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + ValueError: In case of invalid user-provided arguments. + """ + self._assert_compile_was_called() + self._check_call_args("test_on_batch") + + if ( + self._distribution_strategy + and tf.distribute.in_cross_replica_context() + ): + raise NotImplementedError( + "`test_on_batch` is not supported for models " + "distributed with tf.distribute.Strategy." + ) + # Validate and standardize user data. + x, y, sample_weights = self._standardize_user_data( + x, y, sample_weight=sample_weight, extract_tensors_from_dataset=True + ) + + # If `self._distribution_strategy` is True, then we are in a replica + # context at this point. + if self.run_eagerly or self._distribution_strategy: + output_dict = training_eager_v1.test_on_batch( + self, + x, + y, + sample_weights=sample_weights, + output_loss_metrics=self._output_loss_metrics, + ) + outputs = ( + output_dict["total_loss"] + + output_dict["output_losses"] + + output_dict["metrics"] + ) + outputs = [_non_none_constant_value(v) for v in outputs] + else: + x = training_utils_v1.ModelInputs(x).as_list() + inputs = x + list(y or []) + list(sample_weights or []) + + self._update_sample_weight_modes(sample_weights=sample_weights) + self._make_test_function() + outputs = self.test_function(inputs) + + if reset_metrics: + self.reset_metrics() + + if len(outputs) == 1: + return outputs[0] + return outputs + + def predict_on_batch(self, x): + """Returns predictions for a single batch of samples. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A `tf.data` dataset. + + Returns: + Numpy array(s) of predictions. + + Raises: + ValueError: In case of mismatch between given number of inputs and + expectations of the model. + """ + self._check_call_args("predict_on_batch") + + if ( + self._distribution_strategy + and tf.distribute.in_cross_replica_context() + ): + raise NotImplementedError( + "`predict_on_batch` is not supported for models distributed " + "with tf.distribute.Strategy." + ) + # Validate and standardize user data. + inputs, _, _ = self._standardize_user_data( + x, extract_tensors_from_dataset=True + ) + # If `self._distribution_strategy` is True, then we are in a replica + # context at this point. + if self.run_eagerly or self._distribution_strategy: + inputs = training_utils_v1.cast_if_floating_dtype(inputs) + if isinstance(inputs, collections.abc.Sequence): + # Unwrap lists with only one input, as we do when training on + # batch + if len(inputs) == 1: + inputs = inputs[0] + + return self(inputs) + + self._make_predict_function() + outputs = self.predict_function(inputs) + + if len(outputs) == 1: + return outputs[0] + return outputs + + def fit_generator( + self, + generator, + steps_per_epoch=None, + epochs=1, + verbose=1, + callbacks=None, + validation_data=None, + validation_steps=None, + validation_freq=1, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + shuffle=True, + initial_epoch=0, + ): + """Fits the model on data yielded batch-by-batch by a Python generator. + + DEPRECATED: + `Model.fit` now supports generators, so there is no longer any need to + use this endpoint. + """ + warnings.warn( + "`model.fit_generator` is deprecated and " + "will be removed in a future version. " + "Please use `Model.fit`, which supports generators.", + stacklevel=2, + ) + return self.fit( + generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + validation_freq=validation_freq, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch, + ) + + def evaluate_generator( + self, + generator, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0, + ): + """Evaluates the model on a data generator. + + DEPRECATED: + `Model.evaluate` now supports generators, so there is no longer any + need to use this endpoint. + """ + warnings.warn( + "`Model.evaluate_generator` is deprecated and " + "will be removed in a future version. " + "Please use `Model.evaluate`, which supports generators.", + stacklevel=2, + ) + self._check_call_args("evaluate_generator") + + return self.evaluate( + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + verbose=verbose, + callbacks=callbacks, + ) + + def predict_generator( + self, + generator, + steps=None, + callbacks=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0, + ): + """Generates predictions for the input samples from a data generator. + + DEPRECATED: + `Model.predict` now supports generators, so there is no longer any + need to use this endpoint. + """ + warnings.warn( + "`Model.predict_generator` is deprecated and " + "will be removed in a future version. " + "Please use `Model.predict`, which supports generators.", + stacklevel=2, + ) + return self.predict( + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + verbose=verbose, + callbacks=callbacks, + ) + + def _check_call_args(self, method_name): + """Check that `call` has only one positional arg.""" + # Always allow first arg, regardless of arg name. + fullargspec = self._call_spec.full_argspec + if fullargspec.defaults: + positional_args = fullargspec.args[: -len(fullargspec.defaults)] + else: + positional_args = fullargspec.args + if "training" in positional_args: + positional_args.remove("training") - func = self._select_training_loop(x) - return func.predict( + # self and first arg can be positional. + if len(positional_args) > 2: + extra_args = positional_args[2:] + raise ValueError( + "Models passed to `" + + method_name + + "` can only have `training` " + "and the first argument in `call` as positional arguments, " + "found: " + str(extra_args) + "." + ) + + def _set_optimizer(self, optimizer): + """Sets self.optimizer. + + Sets self.optimizer to `optimizer`, potentially wrapping it with a + LossScaleOptimizer. + + Args: + optimizer: The optimizer(s) to assign to self.optimizer. + """ + if isinstance(optimizer, (list, tuple)): + self.optimizer = [optimizers.get(opt) for opt in optimizer] + else: + self.optimizer = optimizers.get(optimizer) + + if self._dtype_policy.name == "mixed_float16" and not isinstance( + self.optimizer, loss_scale_optimizer.LossScaleOptimizer + ): + if isinstance(self.optimizer, list): + raise ValueError( + 'When the "mixed_float16" dtype policy is used, you ' + "can only pass a single optimizer. Using policy %s " + "and got optimizers: %s" % self._dtype_policy, + self.optimizer, + ) + if not isinstance(self.optimizer, optimizer_v2.OptimizerV2): + raise ValueError( + '"optimizer" must be an instance of ' + "tf.keras.optimizers.legacy.Optimizer when a dype policy " + "with a loss scale is used, but got: %s. Using policy: " + "%s" % (self.optimizer, self._dtype_policy) + ) + self.optimizer = loss_scale_optimizer.LossScaleOptimizer( + self.optimizer + ) + + def _prepare_validation_data( + self, validation_data, batch_size, validation_steps + ): + """Unpack and check the validation data.""" + ( + val_x, + val_y, + val_sample_weights, + ) = training_utils_v1.unpack_validation_data(validation_data) + return self._standardize_user_data( + val_x, + val_y, + sample_weight=val_sample_weights, + batch_size=batch_size, + steps=validation_steps, + steps_name="validation_steps", + ) + + def _validate_compile_param_for_distribution_strategy( + self, run_eagerly, sample_weight_mode, target_tensors, weighted_metrics + ): + # Validate that arguments passed by the user to `compile` are supported + # by tf.distribute.Strategy. + if self._distribution_strategy: + if sample_weight_mode: + raise NotImplementedError( + "sample_weight_mode is not supported with " + "tf.distribute.Strategy." + ) + if weighted_metrics: + raise NotImplementedError( + "weighted_metrics is not supported with " + "tf.distribute.Strategy." + ) + if target_tensors: + raise ValueError( + "target_tensors is not supported with " + "tf.distribute.Strategy." + ) + + if run_eagerly: + raise ValueError( + "We currently do not support enabling `run_eagerly` with " + "distribution strategy." + ) + + if distributed_training_utils_v1.is_distributing_by_cloning( + self + ) and (not self.built or not self.inputs or not self.outputs): + raise ValueError( + "We currently do not support distribution strategy with a " + "`Sequential` model that is created without `input_shape`/" + "`input_dim` set in its first layer or a subclassed model." + ) + + def _process_target_tensor_for_compile(self, target_tensors): + if self.run_eagerly: + # target tensor is not supported with run_eagerly. Create a list + # with None as placeholder for each output. + return [None for _ in self.output_names] + + if target_tensors is not None and not ( + isinstance(target_tensors, list) and target_tensors == [] + ): + if isinstance(target_tensors, list): + if len(target_tensors) != len(self.outputs): + raise ValueError( + "When passing a list as `target_tensors`, " + "it should have one entry per model output. " + "The model has %s outputs, " + "but you passed target_tensors=%s" + % (len(self.outputs), target_tensors) + ) + elif isinstance(target_tensors, dict): + unexpected_target_tensor_names = set( + target_tensors.keys() + ).difference(self.output_names) + if unexpected_target_tensor_names: + raise ValueError( + "Unknown entry in `target_tensors` dictionary: " + '"{name}". ' + "Only expected the following keys: {keys}".format( + name=unexpected_target_tensor_names, + keys=str(self.output_names), + ) + ) + tmp_target_tensors = [] + for name in self.output_names: + tmp_target_tensors.append(target_tensors.get(name, None)) + target_tensors = tmp_target_tensors + elif tf.is_tensor(target_tensors): + target_tensors = [target_tensors] + else: + raise TypeError( + "Expected `target_tensors` to be a list or tuple or " + "dict or a single tensor, but got:", + target_tensors, + ) + else: + # In case target tensor is empty or None, create a list with Nones + # that has same length as self.output_names. With that, the None + # check of target tensor can be skipped downstream. + target_tensors = [None for _ in self.output_names] + return target_tensors + + def _compile_eagerly(self, metrics, weighted_metrics, sample_weight_mode): + # Prepare sample weight modes. List with the same length as model + # outputs. + training_utils_v1.prepare_sample_weight_modes( + self._training_endpoints, sample_weight_mode + ) + # Prepare sample weights. + self._prepare_sample_weights() + # Save all metric attributes per output of the model. + self._cache_output_metric_attributes(metrics, weighted_metrics) + self.total_loss = None + # Set metric attributes on model. + self._set_metric_attributes() + + self._collected_trainable_weights = self.trainable_weights + + def _update_sample_weight_modes(self, sample_weights=None): + """Updates sample weight modes based on training/eval inputs. + + Sample weight placeholders will be created for all or no outputs + based on whether sample_weight is provided for any output. + + If model contains `_sample_weight_modes` we check if the input + `sample_weights` corresponds to the sample weight modes. + 1. Set sample weight mode to be 'temporal' for output i, if `compile` + sample_weight_mode was set to `temporal` and sample weight inputs + are given for one or more outputs. + 2. Set sample weight mode to be 'samplewise' for output i, if + `compile` sample_weight_mode was not set and sample weight inputs + are given for one or more outputs. + 3. Reset sample weight mode to None for output i if sample weight mode + was set but there is no sample weight input. + + Args: + sample_weights: List of sample weights of the same length as model + outputs or None. + """ + if not self._is_compiled: + return + if sample_weights and any(s is not None for s in sample_weights): + for endpoint in self._training_endpoints: + endpoint.sample_weight_mode = ( + endpoint.sample_weight_mode or "samplewise" + ) + else: + for endpoint in self._training_endpoints: + endpoint.sample_weight_mode = None + + def _recompile_weights_loss_and_weighted_metrics(self): + if not self._is_compiled: + return False + recompile = any( + e.sample_weights_mismatch() for e in self._training_endpoints + ) + + if recompile: + self._compile_weights_loss_and_weighted_metrics() + return recompile + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _compile_weights_loss_and_weighted_metrics(self, sample_weights=None): + """Compiles the model loss and weighted metric sub-graphs. + + This may be used to set graph tensors as sample weights (instead of + creating placeholders). This functionality is necessary for + `tf.keras.estimator.model_to_estimator`, which calls Keras models in a + v1 graph, and creates iterator tensors for inputs, targets, and sample + weights. + + Args: + sample_weights: List of tensors to use as the sample weights. Must be + the same length as the number of outputs. If left as `None`, + placeholders are used instead. + """ + with backend.get_graph().as_default(): + if sample_weights is not None: + self._update_sample_weight_modes(sample_weights) + self._prepare_sample_weights(sample_weights) + + masks = self._prepare_output_masks() + + # Compute weighted metrics. + self._handle_metrics( + self.outputs, + targets=self._targets, + skip_target_masks=self._prepare_skip_target_masks(), + sample_weights=self.sample_weights, + masks=masks, + return_weighted_metrics=True, + ) + + # Compute total loss. + # Used to keep track of the total loss value (stateless). + # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + + # loss_weight_2 * output_2_loss_fn(...) + + # layer losses. + self.total_loss = self._prepare_total_loss(masks) + + def _prepare_skip_target_masks(self): + """Boolean mask for whether target in output list should be skipped. + + If the loss function corresponding to a model output is None, then this + output will be skipped during total loss calculation and feed targets + preparation. + + Returns: + A boolean list for whether the corresponding target in the output list + should be skipped during loss calculation. + """ + return [l is None for l in self.loss_functions] + + def _prepare_output_masks(self): + """Returns masks corresponding to model outputs.""" + return [getattr(x, "_keras_mask", None) for x in self.outputs] + + def _prepare_total_loss(self, masks): + """Computes total loss from loss functions. + + Args: + masks: List of mask values corresponding to each model output. + + Returns: + A list of loss weights of python floats. + + Raises: + TypeError: If model run_eagerly is True. + """ + if self.run_eagerly: + raise TypeError( + "total loss can not be computed when compiled with " + "run_eagerly = True." + ) + loss_list = [] + with backend.name_scope("loss"): + for endpoint, mask in zip(self._training_endpoints, masks): + if endpoint.should_skip_target(): + continue + y_true = endpoint.training_target.target + y_pred = endpoint.output + loss_fn = endpoint.loss_fn + loss_weight = endpoint.loss_weight + loss_name = endpoint.loss_name() + sample_weight = endpoint.sample_weight + + with backend.name_scope(loss_name): + if mask is not None: + mask = tf.cast(mask, y_pred.dtype) + # Update weights with mask. + if sample_weight is None: + sample_weight = mask + else: + # Update dimensions of weights to match with mask if + # possible. + ( + mask, + _, + sample_weight, + ) = losses_utils.squeeze_or_expand_dimensions( + mask, sample_weight=sample_weight + ) + + if hasattr(loss_fn, "reduction"): + per_sample_losses = loss_fn.call(y_true, y_pred) + sample_weight = losses_utils.apply_valid_mask( + per_sample_losses, + sample_weight, + mask, + loss_fn.reduction, + ) + weighted_losses = losses_utils.compute_weighted_loss( + per_sample_losses, + sample_weight=sample_weight, + reduction=losses_utils.ReductionV2.NONE, + ) + loss_reduction = loss_fn.reduction + + # `AUTO` loss reduction defaults to + # `SUM_OVER_BATCH_SIZE` for all compile use cases. + if loss_reduction == losses_utils.ReductionV2.AUTO: + loss_reduction = ( + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + ) + + # Compute the stateless loss value. + output_loss = losses_utils.reduce_weighted_loss( + weighted_losses, reduction=loss_reduction + ) + else: + # Compute the stateless loss value for a custom loss + # class. Here we assume that the class takes care of + # loss reduction because if this class returns a vector + # value we cannot differentiate between use case where a + # custom optimizer expects a vector loss value vs + # unreduced per-sample loss value. + output_loss = loss_fn( + y_true, y_pred, sample_weight=sample_weight + ) + loss_reduction = ( + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + ) + + if len(self.outputs) > 1: + # Keep track of stateful result tensor for the loss. + endpoint.output_loss_metric(output_loss) + + # Scale output loss for distribution. For custom losses we + # assume reduction was mean. + if ( + loss_reduction + == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + ): + output_loss = losses_utils.scale_loss_for_distribution( + output_loss + ) + + loss_list.append(loss_weight * output_loss) + if not loss_list and not self.losses: + raise ValueError( + "The model cannot be compiled " + "because it has no loss to optimize." + ) + + # Add regularization penalties and other layer-specific losses. + custom_losses = self.get_losses_for(None) + self.get_losses_for( + self.inputs + ) + if custom_losses: + total_custom_loss = tf.add_n( + losses_utils.cast_losses_to_common_dtype(custom_losses) + ) + loss_list.append( + losses_utils.scale_loss_for_distribution(total_custom_loss) + ) + + loss_list = losses_utils.cast_losses_to_common_dtype(loss_list) + if loss_list: + total_loss = tf.add_n(loss_list) + else: + total_loss = 0.0 + return total_loss + + def _get_callback_model(self): + """Returns the Callback Model for this Model.""" + + if hasattr(self, "_replicated_model") and self._replicated_model: + # When using training_distributed, we set the callback model + # to an instance of the `DistributedModel` that we create in + # the `compile` call. The `DistributedModel` is initialized + # with the first replicated model. We need to set the callback + # model to a DistributedModel to allow us to override saving + # and loading weights when we checkpoint the model during training. + return self._replicated_model + if hasattr(self, "callback_model") and self.callback_model: + return self.callback_model + return self + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _make_callback_model(self, grouped_model): + first_replicated_model = self._distribution_strategy.unwrap( + grouped_model + )[0] + # We initialize the callback model with the first replicated model. + self._replicated_model = DistributedCallbackModel( + first_replicated_model + ) + self._replicated_model.set_original_model(self) + + def _validate_or_infer_batch_size(self, batch_size, steps, x): + """Validates that `batch_size` provided is consistent with InputLayer. + + It's possible that the user specified a static batch size in their + InputLayer. If so, this method checks the provided `batch_size` and `x` + arguments are consistent with this static batch size. Also, if + `batch_size` is `None`, this method will attempt to infer the batch size + from the static batch size of the InputLayer. Lastly, ValueError will be + raised if `x` is a tf.data.Dataset and `batch_size` is specified as we + expect users to provide batched datasets. + + Args: + batch_size: The batch_size provided as an argument to + fit/evaluate/predict. + steps: The steps provided as an argument to fit/evaluate/predict. + x: The data passed as `x` to fit/evaluate/predict. + + Returns: + The validated batch_size, auto-inferred from the first layer if not + provided. + """ + if isinstance( + x, (tf.compat.v1.data.Dataset, tf.data.Dataset, data_utils.Sequence) + ) or tf_inspect.isgenerator(x): + if batch_size is not None: + raise ValueError( + "The `batch_size` argument must not be specified for the " + "given input type. Received input: " + "{}, batch_size: {}".format(x, batch_size) + ) + return + + # Avoids the override in Sequential.layers which filters Input layers. + # (Which are often the very layers that we're after.) + layers = self._flatten_layers(include_self=False, recursive=False) + first_layer = next(layers, None) + if first_layer: + # The per-replica static batch size. + static_batch_size = training_utils.get_static_batch_size( + first_layer + ) + if static_batch_size is not None: + + # Determine number of times the user-supplied batch size will be + # split. + if ( + self._distribution_strategy + and distributed_training_utils.global_batch_size_supported( + self._distribution_strategy + ) + ): + num_splits_for_ds = ( + self._distribution_strategy.num_replicas_in_sync + ) + else: + num_splits_for_ds = 1 + + # Check `batch_size` argument is consistent with InputLayer. + if batch_size is not None: + if batch_size % num_splits_for_ds != 0: + raise ValueError( + "The `batch_size` argument ({}) must be divisible " + "the by number of replicas ({})".format( + batch_size, num_splits_for_ds + ) + ) + per_replica_batch_size = batch_size // num_splits_for_ds + + if per_replica_batch_size != static_batch_size: + raise ValueError( + "The `batch_size` argument value {} is " + "incompatible with the specified batch size of " + "your Input Layer: {}".format( + per_replica_batch_size, static_batch_size + ) + ) + + # Check Dataset/Iterator batch size is consistent with + # InputLayer. + if isinstance( + x, + ( + tf.data.Dataset, + tf.compat.v1.data.Iterator, + tf.data.Iterator, + ), + ): + ds_batch_size = tf.compat.v1.Dimension( + tf.nest.flatten(tf.compat.v1.data.get_output_shapes(x))[ + 0 + ][0] + ).value + if ds_batch_size is not None: + if ds_batch_size % num_splits_for_ds != 0: + raise ValueError( + "The batch output shape of your `Dataset` {} " + "cannot be divisible by number of " + "replicas {}".format( + ds_batch_size, num_splits_for_ds + ) + ) + + ds_per_replica_batch_size = ( + ds_batch_size // num_splits_for_ds + ) + if ds_per_replica_batch_size != static_batch_size: + raise ValueError( + "The batch output shape of your `Dataset` is " + "{}, which is incompatible with the specified " + "batch size of your Input Layer: {}".format( + ds_per_replica_batch_size, static_batch_size + ) + ) + + # Set inferred batch size from the InputLayer. + if steps is None: + batch_size = static_batch_size * num_splits_for_ds + + if batch_size is None and steps is None: + # Backwards compatibility + batch_size = 32 + return batch_size + + def _prepare_sample_weights(self, sample_weights=None): + """Sets sample weight attribute on the model.""" + # List with the same length as model outputs. + if sample_weights is not None: + if len(sample_weights) != len(self._training_endpoints): + raise ValueError( + "Provided sample weights must have same length as the " + "number of outputs. Expected: {}, got: {}.".format( + len(self._training_endpoints), len(sample_weights) + ) + ) + else: + sample_weights = [None] * len(self._training_endpoints) + for endpoint, weight in zip(self._training_endpoints, sample_weights): + endpoint.populate_sample_weight(weight, endpoint.sample_weight_mode) + + def _cache_output_metric_attributes(self, metrics, weighted_metrics): + """Caches metric name and function attributes for every model output.""" + output_shapes = [] + for output in self.outputs: + if output is None or output.shape.rank is None: + output_shapes.append(None) + else: + output_shapes.append(output.shape.as_list()) + self._per_output_metrics = ( + training_utils_v1.collect_per_output_metric_info( + metrics, + self.output_names, + output_shapes, + self.loss_functions, + from_serialized=self._from_serialized, + ) + ) + self._per_output_weighted_metrics = ( + training_utils_v1.collect_per_output_metric_info( + weighted_metrics, + self.output_names, + output_shapes, + self.loss_functions, + from_serialized=self._from_serialized, + is_weighted=True, + ) + ) + + def _add_unique_metric_name(self, metric_name, metric_fn, output_index): + """Makes the metric name unique. + + If there are multiple outputs for which the metrics are calculated, + the metric names have to be made unique by appending an integer. + + Args: + metric_name: Metric name that corresponds to the metric specified by + the user. For example: 'acc'. + metric_fn: The Metric object. + output_index: The index of the model output for which the metric name + is being added. + + Returns: + string, name of the model's unique metric name + """ + # For multi-output models, prepend the output names to the metric name. + if len(self.output_names) > 1: + # If we're loading from an already-serialized model, we've already + # prepended the output name, and we don't want to do it again. + # + # Alternatively, we may be receiving a stateless metric (e.g. the + # string "accuracy") rather than a `Metric` object, in which case we + # want to prepend the output name even if we are loading a + # serialized model. + if not getattr(metric_fn, "_from_serialized", False): + metric_name = f"{self.output_names[output_index]}_{metric_name}" + + j = 1 + base_metric_name = metric_name + while metric_name in self.metrics_names: + metric_name = "%s_%d" % (base_metric_name, j) + j += 1 + + return metric_name + + def _init_metric_attributes(self): + """Initialized model metric attributes.""" + # List of stateful metric functions. Used for resetting metric state + # during training/eval. + self._compile_metric_functions = [] + + def _set_per_output_metric_attributes(self, metrics_dict, output_index): + """Sets the metric attributes on the model for the given output. + + Args: + metrics_dict: A dict with metric names as keys and metric fns as + values. + output_index: The index of the model output for which the metric + attributes are added. + + Returns: + Metrics dict updated with unique metric names as keys. + """ + updated_metrics_dict = collections.OrderedDict() + for metric_name, metric_fn in metrics_dict.items(): + metric_name = self._add_unique_metric_name( + metric_name, metric_fn, output_index + ) + + # Update the name on the metric class to be the unique generated + # name. + metric_fn._name = metric_name + updated_metrics_dict[metric_name] = metric_fn + # Keep track of metric name and function. + self._compile_metric_functions.append(metric_fn) + return updated_metrics_dict + + def _set_metric_attributes(self): + """Sets the metric attributes on the model for all the model outputs.""" + updated_per_output_metrics = [] + updated_per_output_weighted_metrics = [] + for i, endpoint in enumerate(self._training_endpoints): + if endpoint.should_skip_target(): + updated_per_output_metrics.append(self._per_output_metrics[i]) + updated_per_output_weighted_metrics.append( + self._per_output_weighted_metrics[i] + ) + continue + updated_per_output_metrics.append( + self._set_per_output_metric_attributes( + self._per_output_metrics[i], i + ) + ) + updated_per_output_weighted_metrics.append( + self._set_per_output_metric_attributes( + self._per_output_weighted_metrics[i], i + ) + ) + + # Create a metric wrapper for each output loss. This computes mean of an + # output loss across mini-batches (irrespective of how we reduce within + # a batch). + if len(self._training_endpoints) > 1: + for endpoint in self._training_endpoints: + if not endpoint.should_skip_target(): + endpoint.output_loss_metric = metrics_module.Mean( + name=endpoint.loss_name() + ) + + self._per_output_metrics = updated_per_output_metrics + self._per_output_weighted_metrics = updated_per_output_weighted_metrics + + def _handle_per_output_metrics( + self, metrics_dict, y_true, y_pred, mask, weights=None + ): + """Calls metric functions for a single output. + + Args: + metrics_dict: A dict with metric names as keys and metric fns as + values. + y_true: Target output. + y_pred: Predicted output. + mask: Computed mask value for the current output. + weights: Weights to be applied on the current output. + + Returns: + A list of metric result tensors. + """ + metric_results = [] + for metric_name, metric_fn in metrics_dict.items(): + with backend.name_scope(metric_name): + metric_result = training_utils_v1.call_metric_function( + metric_fn, y_true, y_pred, weights=weights, mask=mask + ) + metric_results.append(metric_result) + return metric_results + + def _handle_metrics( + self, + outputs, + targets=None, + skip_target_masks=None, + sample_weights=None, + masks=None, + return_weighted_metrics=False, + return_weighted_and_unweighted_metrics=False, + ): + """Handles calling metric functions. + + Args: + outputs: List of outputs (predictions). + targets: List of targets. + skip_target_masks: Optional. List of boolean for whether the + corresponding target should be ignored or not. + sample_weights: Optional list of sample weight arrays. + masks: List of computed output mask values. + return_weighted_metrics: Flag that indicates whether weighted metrics + should be computed instead of unweighted metrics. This flag is + ignored when `return_weighted_and_unweighted_metrics` is enabled. + return_weighted_and_unweighted_metrics: Flag that is used to indicate + whether both weighted and unweighted metrics should be computed. + When this is not enabled, we use `return_weighted_metrics` param to + indicate whether weighted or unweighted metrics should be returned. + + Returns: + A list of metric result tensors. + """ + # TODO(scottzhu): Update this to use the new training_endpoints. + # Currently the eager and graph logic is bit different. + skip_target_masks = skip_target_masks or [False] * len(outputs) + metric_results = [] + with backend.name_scope("metrics"): + # Invoke all metrics added using `compile`. + for i in range(len(outputs)): + if skip_target_masks[i]: + continue + output = outputs[i] if outputs else None + target = targets[i] if targets else None + output_mask = masks[i] if masks else None + + if ( + return_weighted_and_unweighted_metrics + or not return_weighted_metrics + ): + metric_results.extend( + self._handle_per_output_metrics( + self._per_output_metrics[i], + target, + output, + output_mask, + ) + ) + if ( + return_weighted_and_unweighted_metrics + or return_weighted_metrics + ): + metric_results.extend( + self._handle_per_output_metrics( + self._per_output_weighted_metrics[i], + target, + output, + output_mask, + weights=sample_weights[i] + if sample_weights + else None, + ) + ) + return metric_results + + def _check_trainable_weights_consistency(self): + """Check trainable weights count consistency. + + This will raise a warning if `trainable_weights` and + `_collected_trainable_weights` are inconsistent (i.e. have different + number of parameters). + Inconsistency will typically arise when one modifies `model.trainable` + without calling `model.compile` again. + """ + if not hasattr(self, "_collected_trainable_weights"): + return + + if len(self.trainable_weights) != len( + self._collected_trainable_weights + ): + logging.log_first_n( + logging.WARN, + "Discrepancy between trainable weights and collected" + " trainable weights, did you set `model.trainable`" + " without calling `model.compile` after ?", + 1, + ) + + def _make_train_function(self): + has_recompiled = self._recompile_weights_loss_and_weighted_metrics() + self._check_trainable_weights_consistency() + if isinstance(self.optimizer, list): + raise ValueError( + "The `optimizer` in `compile` should be a single optimizer." + ) + # If we have re-compiled the loss/weighted metric sub-graphs then create + # train function even if one exists already. This is because + # `_feed_sample_weights` list has been updated on re-compile. + if getattr(self, "train_function", None) is None or has_recompiled: + # Restore the compiled trainable state. + current_trainable_state = self._get_trainable_state() + self._set_trainable_state(self._compiled_trainable_state) + + inputs = ( + self._feed_inputs + + self._feed_targets + + self._feed_sample_weights + ) + if not isinstance(backend.symbolic_learning_phase(), int): + inputs += [backend.symbolic_learning_phase()] + + with backend.get_graph().as_default(): + with backend.name_scope("training"): + # Training updates + updates = self.optimizer.get_updates( + params=self._collected_trainable_weights, + loss=self.total_loss, + ) + # Unconditional updates + updates += self.get_updates_for(None) + # Conditional updates relevant to this model + updates += self.get_updates_for(self.inputs) + + metrics = self._get_training_eval_metrics() + metrics_tensors = [ + m._call_result + for m in metrics + if hasattr(m, "_call_result") + ] + + with backend.name_scope("training"): + # Gets loss and metrics. Updates weights at each call. + fn = backend.function( + inputs, + [self.total_loss] + metrics_tensors, + updates=updates, + name="train_function", + **self._function_kwargs, + ) + setattr(self, "train_function", fn) + + # Restore the current trainable state + self._set_trainable_state(current_trainable_state) + + def _make_test_function(self): + has_recompiled = self._recompile_weights_loss_and_weighted_metrics() + # If we have re-compiled the loss/weighted metric sub-graphs then create + # test function even if one exists already. This is because + # `_feed_sample_weights` list has been updated on re-compile. + if getattr(self, "test_function", None) is None or has_recompiled: + inputs = ( + self._feed_inputs + + self._feed_targets + + self._feed_sample_weights + ) + + with backend.get_graph().as_default(): + metrics = self._get_training_eval_metrics() + metrics_tensors = [ + m._call_result + for m in metrics + if hasattr(m, "_call_result") + ] + + with backend.name_scope("evaluation"): + updates = self.state_updates + # Return loss and metrics, no gradient updates. + # Does update the network states. + fn = backend.function( + inputs, + [self.total_loss] + metrics_tensors, + updates=updates, + name="test_function", + **self._function_kwargs, + ) + setattr(self, "test_function", fn) + + def _make_predict_function(self): + if not hasattr(self, "predict_function"): + self.predict_function = None + if self.predict_function is None: + inputs = self._feed_inputs + # Gets network outputs. Does not update weights. + # Does update the network states. + kwargs = getattr(self, "_function_kwargs", {}) + with backend.name_scope(ModeKeys.PREDICT): + self.predict_function = backend.function( + inputs, + self.outputs, + updates=self.state_updates, + name="predict_function", + **kwargs, + ) + + def _make_execution_function(self, mode): + if mode == ModeKeys.TRAIN: + self._make_train_function() + return self.train_function + if mode == ModeKeys.TEST: + self._make_test_function() + return self.test_function + if mode == ModeKeys.PREDICT: + self._make_predict_function() + return self.predict_function + + def _distribution_standardize_user_data( self, - x=x, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - def reset_metrics(self): - """Resets the state of metrics.""" - metrics = self._get_training_eval_metrics() - for m in metrics: - m.reset_state() - - # Reset metrics on all the distributed (cloned) models. - if self._distribution_strategy: - distributed_training_utils_v1._reset_metrics(self) # pylint: disable=protected-access - - def train_on_batch(self, - x, - y=None, - sample_weight=None, - class_weight=None, - reset_metrics=True): - """Runs a single gradient update on a single batch of data. + x, + y=None, + sample_weight=None, + class_weight=None, + batch_size=None, + validation_split=0.0, + shuffle=False, + epochs=1, + allow_partial_batch=False, + ): + """Runs validation checks on input and target data passed by the user. + + This is called when using tf.distribute.Strategy to train, evaluate or + serve the model. + + Args: + x: Input data. A numpy array or `tf.data` dataset. + y: Target data. A numpy array or None if x is a `tf.data` dataset. + sample_weight: An optional sample-weight array passed by the user to + weight the importance of each sample in `x`. + class_weight: An optional class-weight array by the user to + weight the importance of samples in `x` based on the class they + belong to, as conveyed by `y`. + batch_size: Integer batch size. If provided, it is used to run + additional validation checks on stateful models. + validation_split: Float between 0 and 1. + Fraction of the training data to be used as validation data. + shuffle: Boolean whether to shuffle the training data before each + epoch. + epochs: Integer epochs. If > 1, repeat the numpy training data epochs + times when converting to training dataset. + allow_partial_batch: Boolean whether to enforce that all batches have + the same size. + + Returns: + Dataset instance. + + Raises: + ValueError: In case of invalid user-provided data. + RuntimeError: If the model was never compiled. + """ + if class_weight: + raise NotImplementedError( + "`class_weight` is currently not supported " + "when using tf.distribute.Strategy." + ) + + if ( + sample_weight is not None + and sample_weight.all() + and backend.is_tpu_strategy(self._distribution_strategy) + ): + raise NotImplementedError( + "`sample_weight` is currently not supported " + "when using TPUStrategy." + ) + + # Validates `steps` and `shuffle` arguments right at the beginning + # since we use it to construct the dataset object. + # TODO(anjalisridhar): Remove this check once we refactor the + # _standardize_user_data code path. This check is already present + # elsewhere in the codebase. + if isinstance(x, tf.data.Dataset): + if shuffle: + training_utils_v1.verify_dataset_shuffled(x) + + strategy = self._distribution_strategy + with strategy.scope(): + # We should be sure to call get_session() inside the + # strategy.scope() so the strategy can affect the session options. + if tf.compat.v1.executing_eagerly_outside_functions(): + session = None + else: + session = backend.get_session() + + first_x_value = tf.nest.flatten(x)[0] + if isinstance(first_x_value, np.ndarray): + x = training_utils.list_to_tuple(x) + if y is not None: + y = training_utils.list_to_tuple(y) + if sample_weight is not None: + sample_weight = training_utils.list_to_tuple( + sample_weight + ) + in_tuple = (x, y, sample_weight) + else: + in_tuple = (x, y) + else: + in_tuple = x + + ds = strategy.extended.experimental_make_numpy_dataset( + in_tuple, session=session + ) + if shuffle: + # We want a buffer size that is larger than the batch size + # provided by the user and provides sufficient randomness. + # Note that larger numbers introduce more memory usage based + # on the size of each sample. + ds = ds.shuffle(max(1024, batch_size * 8)) + if epochs > 1: + ds = ds.repeat(epochs) + + # We need to use the drop_remainder argument to get a known + # static input shape which is required for TPUs. + drop_remainder = ( + not allow_partial_batch + and strategy.extended.experimental_require_static_shapes + ) + + # TODO(b/131720208): We still drop remainder here if number of + # examples is divisible by batch size, as sometimes dynamic + # padder will time out with keras.metrics.CategoricalAccuracy() + # metric. + if backend.is_tpu_strategy(strategy) and not drop_remainder: + dataset_size = first_x_value.shape[0] + if dataset_size % batch_size == 0: + drop_remainder = True + + x = ds.batch(batch_size, drop_remainder=drop_remainder) + else: + assert isinstance(x, tf.data.Dataset) + training_utils_v1.validate_dataset_input( + x, y, sample_weight, validation_split + ) + return x - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays + def _standardize_user_data( + self, + x, + y=None, + sample_weight=None, + class_weight=None, + batch_size=None, + check_steps=False, + steps_name="steps", + steps=None, + validation_split=0.0, + shuffle=False, + extract_tensors_from_dataset=False, + ): + """Runs validation checks on input and target data passed by the user. + + Also standardizes the data to lists of arrays, in order. + + Also builds and compiles the model on the fly if it is a subclassed + model that has never been called before (and thus has no + inputs/outputs). + + This is a purely internal method, subject to refactoring at any time. + + Args: + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors + - A TensorFlow tensor, or a list of tensors (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, + - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset. - y: Target data. Like the input data `x`, it could be either Numpy - array(s) or TensorFlow tensor(s). It should be consistent with `x` - (you cannot have Numpy inputs and tensor targets, or inversely). If - `x` is a dataset, `y` should not be specified - (since targets will be obtained from the iterator). - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. In the case of - temporal data, you can pass a 2D array with shape (samples, - sequence_length), to apply a different weight to every timestep of - every sample. In this case you should make sure to specify - sample_weight_mode="temporal" in compile(). This argument is not - supported when `x` is a dataset. - class_weight: Optional dictionary mapping class indices (integers) to a - weight (float) to apply to the model's loss for the samples from this - class during training. This can be useful to tell the model to "pay - more attention" to samples from an under-represented class. - reset_metrics: If `True`, the metrics returned will be only for this - batch. If `False`, the metrics will be statefully accumulated across - batches. - - Returns: - Scalar training loss - (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - ValueError: In case of invalid user-provided arguments. - """ - self._assert_compile_was_called() - self._check_call_args('train_on_batch') - - # If at this point we are in the replica context, then it is okay to execute - # the Eager code path. The expected way to get here is to call `fit` that - # calls `train_on_batch` on each replica. - if (self._distribution_strategy and - tf.distribute.in_cross_replica_context()): - raise NotImplementedError('`train_on_batch` is not supported for models ' - 'distributed with tf.distribute.Strategy.') - # Validate and standardize user data. - x, y, sample_weights = self._standardize_user_data( - x, y, sample_weight=sample_weight, class_weight=class_weight, - extract_tensors_from_dataset=True) - - # If `self._distribution_strategy` is True, then we are in a replica context - # at this point because of the check above. `train_on_batch` is being run - # for each replica by `self._distribution_strategy` and the same code path - # as Eager is expected to be taken. - if self.run_eagerly or self._distribution_strategy: - output_dict = training_eager_v1.train_on_batch( - self, - x, - y, - sample_weights=sample_weights, - output_loss_metrics=self._output_loss_metrics) - outputs = (output_dict['total_loss'] + output_dict['output_losses'] - + output_dict['metrics']) - outputs = [_non_none_constant_value(v) for v in outputs] # pylint: disable=protected-access - else: - x = training_utils_v1.ModelInputs(x).as_list() - ins = x + list(y or []) + list(sample_weights or []) - - if not isinstance(backend.symbolic_learning_phase(), int): - ins += [True] # Add learning phase value. - - self._update_sample_weight_modes(sample_weights=sample_weights) - self._make_train_function() - outputs = self.train_function(ins) # pylint: disable=not-callable - - if reset_metrics: - self.reset_metrics() - - if len(outputs) == 1: - return outputs[0] - return outputs - - def test_on_batch(self, x, y=None, sample_weight=None, reset_metrics=True): - """Test the model on a single batch of samples. - - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - - A `tf.data` dataset. - y: Target data. Like the input data `x`, - it could be either Numpy array(s) or TensorFlow tensor(s). - It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset `y` should - not be specified (since targets will be obtained from the iterator). - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. - In the case of temporal data, you can pass a 2D array - with shape (samples, sequence_length), - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - sample_weight_mode="temporal" in compile(). This argument is not - supported when `x` is a dataset. - reset_metrics: If `True`, the metrics returned will be only for this - batch. If `False`, the metrics will be statefully accumulated across - batches. - - Returns: - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - ValueError: In case of invalid user-provided arguments. - """ - self._assert_compile_was_called() - self._check_call_args('test_on_batch') - - if (self._distribution_strategy and - tf.distribute.in_cross_replica_context()): - raise NotImplementedError('`test_on_batch` is not supported for models ' - 'distributed with tf.distribute.Strategy.') - # Validate and standardize user data. - x, y, sample_weights = self._standardize_user_data( - x, y, sample_weight=sample_weight, extract_tensors_from_dataset=True) - - # If `self._distribution_strategy` is True, then we are in a replica context - # at this point. - if self.run_eagerly or self._distribution_strategy: - output_dict = training_eager_v1.test_on_batch( - self, - x, - y, - sample_weights=sample_weights, - output_loss_metrics=self._output_loss_metrics) - outputs = (output_dict['total_loss'] + output_dict['output_losses'] - + output_dict['metrics']) - outputs = [_non_none_constant_value(v) for v in outputs] # pylint: disable=protected-access - else: - x = training_utils_v1.ModelInputs(x).as_list() - inputs = x + list(y or []) + list(sample_weights or []) - - self._update_sample_weight_modes(sample_weights=sample_weights) - self._make_test_function() - outputs = self.test_function(inputs) # pylint: disable=not-callable - - if reset_metrics: - self.reset_metrics() - - if len(outputs) == 1: - return outputs[0] - return outputs - - def predict_on_batch(self, x): - """Returns predictions for a single batch of samples. - - Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A `tf.data` dataset. - - Returns: - Numpy array(s) of predictions. + - A `tf.data` dataset. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset, `y` should not + be specified (since targets will be obtained from the iterator). + sample_weight: An optional sample-weight array passed by the user to + weight the importance of each sample in `x`. + class_weight: An optional class-weight array by the user to + weight the importance of samples in `x` based on the class they + belong to, as conveyed by `y`. If both `sample_weight` and + `class_weight` are provided, the weights are multiplied. + batch_size: Integer batch size. If provided, it is used to run + additional validation checks on stateful models. + check_steps: boolean, True if we want to check for validity of `steps` + and False, otherwise. For example, when we are standardizing one + batch of data for train_on_batch/predict_on_batch/test_on_batch + APIs, `steps` value is not required and we should not check for its + validity in these cases. + steps_name: The public API's parameter name for `steps`. + steps: Integer or `None`. Total number of steps (batches of samples) + to execute. + validation_split: Float between 0 and 1. + Fraction of the training data to be used as validation data. + shuffle: Boolean whether to shuffle the training data before each + epoch. + extract_tensors_from_dataset: Boolean. When `x` is a dataset instance, + this indicates whether to extract actual tensors from the dataset or + instead output the dataset instance itself. + Set to True when calling from `train_on_batch`/etc. + + Returns: + A tuple of 3: inputs (arrays or dicts, depending on whether `x` was a + dict or not), target arrays, sample-weight arrays. If the model's + input and targets are symbolic, these lists are empty (since the model + takes no user-provided data, instead the data comes from the symbolic + inputs/targets). + + Raises: + ValueError: In case of invalid user-provided data. + RuntimeError: If the model was never compiled. + """ + if isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)): + # Graph mode dataset. We'll pass the dataset as-is (unless + # `extract_tensors_from_dataset` is True, in which case we extract + # the tensors from the dataset and we output them. + training_utils_v1.validate_dataset_input( + x, y, sample_weight, validation_split + ) + if shuffle: + training_utils_v1.verify_dataset_shuffled(x) + + is_dataset = True + if extract_tensors_from_dataset: + # We do this for `train_on_batch`/etc. + ( + x, + y, + sample_weight, + ) = training_utils_v1.extract_tensors_from_dataset(x) + elif isinstance(x, tf.compat.v1.data.Iterator): + # Graph mode iterator. We extract the symbolic tensors. + training_utils_v1.validate_dataset_input( + x, y, sample_weight, validation_split + ) + iterator = x + x, y, sample_weight = training_utils_v1.unpack_iterator_input( + iterator + ) + is_dataset = True + else: + is_dataset = False + + # Validates `steps` argument based on x's type. + if check_steps: + training_utils_v1.check_steps_argument(x, steps, steps_name) + + # First, we build the model on the fly if necessary. + if not self.inputs: + all_inputs, y_input, dict_inputs = self._build_model_with_inputs( + x, y + ) + is_build_called = True + else: + all_inputs = [] + # Whether this is a subclassed model that expects dictionary inputs + # rather than list inputs (e.g. FeatureColumn-based models). + dict_inputs = isinstance(self.inputs, dict) + is_build_called = False + y_input = y + + # Second, we compile the model on the fly if necessary, mostly for + # subclass models. + is_compile_called = False + if not self._is_compiled and self.optimizer: + self._compile_from_inputs(all_inputs, y_input, x, y) + is_compile_called = True + + # In graph mode, if we had just set inputs and targets as symbolic + # tensors by invoking build and compile on the model respectively, we do + # not have to feed anything to the model. Model already has input and + # target data as part of the graph. Note: in this case, `any` and `all` + # are equivalent since we disallow mixed symbolic/value inputs. + + # self.run_eagerly is not free to compute, so we want to reuse the + # value. + run_eagerly = self.run_eagerly + + if ( + not run_eagerly + and is_build_called + and is_compile_called + and not is_dataset + and any(_is_symbolic_tensor(v) for v in all_inputs) + ): + return [], [], None + + return self._standardize_tensors( + x, + y, + sample_weight, + run_eagerly=run_eagerly, + dict_inputs=dict_inputs, + is_dataset=is_dataset, + class_weight=class_weight, + batch_size=batch_size, + ) + + def _standardize_tensors( + self, + x, + y, + sample_weight, + run_eagerly, + dict_inputs, + is_dataset, + class_weight=None, + batch_size=None, + ): + if run_eagerly: + # In eager mode, do not do shape validation + # since the network has no input nodes (placeholders) to be fed. + feed_input_names = self.input_names + feed_input_shapes = None + elif not self._is_graph_network: + # Case: symbolic-mode subclassed network. Do not do shape + # validation. + feed_input_names = self._feed_input_names + feed_input_shapes = None + else: + # Case: symbolic-mode graph network. + # In this case, we run extensive shape validation checks. + feed_input_names = self._feed_input_names + feed_input_shapes = self._feed_input_shapes + + # Standardize the inputs. + if not isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)): + # TODO(fchollet): run static checks with dataset output shape(s). + x = training_utils_v1.standardize_input_data( + x, + feed_input_names, + feed_input_shapes, + check_batch_axis=False, # Don't enforce the batch size. + exception_prefix="input", + ) + + # Get typespecs for the input data and sanitize it if necessary. + # TODO(momernick): This should be capable of doing full input validation + # at all times - validate that this is so and refactor the + # standardization code. + if isinstance(x, tf.data.Dataset): + x_shapes = tf.data.experimental.get_structure(x) + if isinstance(x_shapes, tuple): + # If the output of a Dataset is a tuple, we assume it's either + # of the form (x_data, y_data) or (x_data, y_data, + # sample_weights). In either case, we only care about x_data + # here. + x_shapes = x_shapes[0] + else: + flat_inputs = tf.nest.flatten(x) + flat_expected_inputs = tf.nest.flatten(self.inputs) + converted_x = [] + for a, b in zip(flat_inputs, flat_expected_inputs): + converted_x.append(_convert_scipy_sparse_tensor(a, b)) + x = tf.nest.pack_sequence_as(x, converted_x) + + # Convert ResourceVariables to tensors so nest.assert_same_structure + # below won't fail with Variable and Tensor. + x_tensors = tf_utils.convert_variables_to_tensors(x) + x_shapes = tf.nest.map_structure( + tf_utils.type_spec_from_value, x_tensors + ) + + flat_inputs = tf.nest.flatten(x_shapes) + # Convert ResourceVariables to tensors so nest.assert_same_structure + # below won't fail with Variable and Tensor. + flat_expected_inputs = tf.nest.flatten( + tf_utils.convert_variables_to_tensors(self.inputs) + ) + for a, b in zip(flat_inputs, flat_expected_inputs): + tf.nest.assert_same_structure(a, b, expand_composites=True) - Raises: - ValueError: In case of mismatch between given number of inputs and - expectations of the model. - """ - self._check_call_args('predict_on_batch') - - if (self._distribution_strategy and - tf.distribute.in_cross_replica_context()): - raise NotImplementedError( - '`predict_on_batch` is not supported for models distributed with' - ' tf.distribute.Strategy.') - # Validate and standardize user data. - inputs, _, _ = self._standardize_user_data( - x, extract_tensors_from_dataset=True) - # If `self._distribution_strategy` is True, then we are in a replica context - # at this point. - if self.run_eagerly or self._distribution_strategy: - inputs = training_utils_v1.cast_if_floating_dtype(inputs) - if isinstance(inputs, collections.abc.Sequence): - # Unwrap lists with only one input, as we do when training on batch - if len(inputs) == 1: - inputs = inputs[0] - - return self(inputs) # pylint: disable=not-callable - - self._make_predict_function() - outputs = self.predict_function(inputs) - - if len(outputs) == 1: - return outputs[0] - return outputs - - def fit_generator(self, - generator, - steps_per_epoch=None, - epochs=1, - verbose=1, - callbacks=None, - validation_data=None, - validation_steps=None, - validation_freq=1, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - shuffle=True, - initial_epoch=0): - """Fits the model on data yielded batch-by-batch by a Python generator. - - DEPRECATED: - `Model.fit` now supports generators, so there is no longer any need to use - this endpoint. - """ - warnings.warn( - '`model.fit_generator` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `Model.fit`, which supports generators.', - stacklevel=2) - return self.fit( - generator, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle, - initial_epoch=initial_epoch) - - def evaluate_generator(self, - generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """Evaluates the model on a data generator. - - DEPRECATED: - `Model.evaluate` now supports generators, so there is no longer any need - to use this endpoint. - """ - warnings.warn( - '`Model.evaluate_generator` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `Model.evaluate`, which supports generators.', - stacklevel=2) - self._check_call_args('evaluate_generator') - - return self.evaluate( - generator, - steps=steps, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose, - callbacks=callbacks) - - def predict_generator(self, - generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """Generates predictions for the input samples from a data generator. - - DEPRECATED: - `Model.predict` now supports generators, so there is no longer any need - to use this endpoint. - """ - warnings.warn( - '`Model.predict_generator` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `Model.predict`, which supports generators.', - stacklevel=2) - return self.predict( - generator, - steps=steps, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose, - callbacks=callbacks) - - def _check_call_args(self, method_name): - """Check that `call` has only one positional arg.""" - # Always allow first arg, regardless of arg name. - fullargspec = self._call_spec.full_argspec - if fullargspec.defaults: - positional_args = fullargspec.args[:-len(fullargspec.defaults)] - else: - positional_args = fullargspec.args - if 'training' in positional_args: - positional_args.remove('training') + if y is not None: + # Prepare self._sample_weight_modes. List with the same length as + # model outputs. + training_utils_v1.prepare_sample_weight_modes( + self._training_endpoints, self.sample_weight_mode + ) + feed_output_names = self._feed_output_names + feed_sample_weight_modes = self._sample_weight_modes + if not self._is_graph_network: + feed_output_shapes = None + else: + feed_output_shapes = self._feed_output_shapes + + # Standardize the outputs. + y = training_utils_v1.standardize_input_data( + y, + feed_output_names, + # Don't enforce target shapes to match output shapes. + # Precise checks will be run in + # `check_loss_and_target_compatibility`. + shapes=None, + check_batch_axis=False, # Don't enforce the batch size. + exception_prefix="target", + ) + + # Generate sample-wise weight values given the `sample_weight` and + # `class_weight` arguments. + sample_weights = training_utils_v1.standardize_sample_weights( + sample_weight, feed_output_names + ) + class_weights = training_utils_v1.standardize_class_weights( + class_weight, feed_output_names + ) + + sample_weights = [ + training_utils_v1.standardize_weights(ref, sw, cw, mode) + for (ref, sw, cw, mode) in zip( + y, sample_weights, class_weights, feed_sample_weight_modes + ) + ] + # Check that all arrays have the same length. + if not self._distribution_strategy: + training_utils_v1.check_array_lengths(x, y, sample_weights) + if self._is_graph_network and not run_eagerly: + # Additional checks to avoid users mistakenly using improper + # loss fns. + training_utils_v1.check_loss_and_target_compatibility( + y, self._feed_loss_fns, feed_output_shapes + ) + + sample_weights, _, _ = training_utils.handle_partial_sample_weights( + y, sample_weights, feed_sample_weight_modes, check_all_flat=True + ) + else: + y = [] + sample_weights = None + + if self.stateful and batch_size and not is_dataset: + # Check that for stateful networks, number of samples is a multiple + # of the static batch size. + if x[0].shape[0] % batch_size != 0: + raise ValueError( + "In a stateful network, " + "you should only pass inputs with " + "a number of samples that can be " + "divided by the batch size. Found: " + + str(x[0].shape[0]) + + " samples" + ) + + # If dictionary inputs were provided, we return a dictionary as well. + if dict_inputs and not isinstance( + x, (tf.compat.v1.data.Dataset, tf.data.Dataset) + ): + x = dict(zip(feed_input_names, x)) + return x, y, sample_weights + + def _build_model_with_inputs(self, inputs, targets): + """Build the model (set model inputs/outputs), mainly for subclass + model.""" + processed_inputs = [] + is_dict_inputs = False + orig_inputs = inputs + # We need to use `inputs` to set the model inputs. + # If input data is a dataset iterator in graph mode or if it is an eager + # iterator and only one batch of samples is required, we fetch the data + # tensors from the iterator and then standardize them. + if isinstance(inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset)): + inputs, targets, _ = training_utils_v1.extract_tensors_from_dataset( + inputs + ) + # We type-check that `inputs` and `targets` are either single arrays + # or lists of arrays, and extract a flat list of inputs from the passed + # structure. + training_utils_v1.validate_input_types(inputs, orig_inputs) + + if isinstance(inputs, (list, tuple)): + processed_inputs += list(inputs) + elif isinstance(inputs, dict): + is_dict_inputs = True + keys = sorted(inputs.keys()) + processed_inputs = [inputs[k] for k in keys] + else: + processed_inputs.append(inputs) + # Now that we have a flat set of inputs, we make sure that none of them + # are CompositeTensors or CompositeTensorValues of any type (or scipy + # sparse arrays, which we treat as SparseTensor values). We cannot + # safely infer input data from an arbitrary composite tensor, so we + # don't try - users should explicitly add composite tensor inputs to + # their subclassed models. + for input_tensor in processed_inputs: + if training_utils_v1.is_composite_or_composite_value( + input_tensor + ) and not isinstance(input_tensor, tf.Variable): + # TODO(b/132691975): Document subclass-model CT input handling. + raise ValueError( + "All SparseTensor and RaggedTensor inputs must be " + "explicitly declared using a keras.Input() with " + "sparse=True or ragged=True. We found an undeclared " + "input %s. For Sequential models, please add a " + "keras.Input() as your first Layer. For subclassed models, " + "please call self._set_inputs() on your input set, which " + "you can create using keras.Input() for each input to your " + "model." % (input_tensor,) + ) + # Build the model using the retrieved inputs (value or symbolic). + # If values are generated from a dataset, then in symbolic-mode + # placeholders will be created to match the value shapes. + if isinstance( + orig_inputs, + ( + tf.compat.v1.data.Dataset, + tf.data.Dataset, + tf.compat.v1.data.Iterator, + ), + ): + if not self.inputs: + # For subclassed models, a robust input spec is not available so + # we must cast to the model dtype. + inputs = training_utils_v1.cast_if_floating_dtype( + inputs, self.dtype + ) + + def create_tensor_spec(t): + return tf.TensorSpec(t.shape, t.dtype) + + cast_inputs = tf.nest.map_structure(create_tensor_spec, inputs) + elif training_utils_v1.has_tensors(inputs): + cast_inputs = training_utils_v1.cast_if_floating_dtype(inputs) + else: + cast_inputs = inputs + self._set_inputs(cast_inputs) + return processed_inputs, targets, is_dict_inputs + + def _compile_from_inputs( + self, all_inputs, target, orig_inputs, orig_target + ): + if target is not None: + # We need to use `y` to set the model targets. + if training_utils_v1.has_tensors(target): + target = training_utils_v1.cast_if_floating_dtype_and_mismatch( + target, self.outputs + ) + training_utils_v1.validate_input_types( + target, orig_target, allow_dict=False, field_name="target" + ) + if isinstance(target, (list, tuple)): + all_inputs += list(target) + else: + all_inputs.append(target) + # Type check that all inputs are *either* value *or* symbolic. + # TODO(fchollet): this check could be removed in Eager mode? + if any(tf.is_tensor(v) for v in all_inputs): + if not all(tf.is_tensor(v) for v in all_inputs): + raise ValueError( + "Do not pass inputs that mix Numpy arrays and " + "TensorFlow tensors. " + "You passed: x=" + + str(orig_inputs) + + "; y=" + + str(orig_target) + ) + is_dataset = isinstance( + orig_inputs, + ( + tf.compat.v1.data.Dataset, + tf.data.Dataset, + tf.compat.v1.data.Iterator, + ), + ) + if is_dataset or tf.executing_eagerly(): + target_tensors = None + else: + # Handle target tensors if any passed. + if target is not None: + if not isinstance(target, (list, tuple)): + target = [target] + target_tensors = [v for v in target if _is_symbolic_tensor(v)] + else: + target_tensors = None + + self.compile( + optimizer=self.optimizer, + loss=self.loss, + metrics=self._compile_metrics, + weighted_metrics=self._compile_weighted_metrics, + loss_weights=self.loss_weights, + target_tensors=target_tensors, + sample_weight_mode=self.sample_weight_mode, + run_eagerly=self.run_eagerly, + experimental_run_tf_function=self._experimental_run_tf_function, + ) + + # TODO(omalleyt): Consider changing to a more descriptive function name. + def _set_inputs(self, inputs, outputs=None, training=None): + """Set model's input and output specs based on the input data received. + + This is to be used for Model subclasses, which do not know at + instantiation time what their inputs look like. + + Args: + inputs: Single array, or list of arrays. The arrays could be + placeholders, Numpy arrays, data tensors, or TensorSpecs. + - if placeholders: the model is built on top of these placeholders, + and we expect Numpy data to be fed for them when calling + `fit`/etc. + - if Numpy data or TensorShapes: we create placeholders matching the + TensorShapes or shapes of the Numpy arrays. We expect Numpy data + to be fed for these placeholders when calling `fit`/etc. + - if data tensors: the model is built on top of these tensors. + We do not expect any Numpy data to be provided when calling + `fit`/etc. + outputs: None, a data tensor, or a list of tensors. If None, the + outputs will be determined by invoking `self.call()`, otherwise the + provided value will be used. + training: Boolean or None. Only relevant in symbolic mode. Specifies + whether to build the model's graph in inference mode (False), + training mode (True), or using the Keras learning phase (None). + Raises: + ValueError: If dict inputs are passed to a Sequential Model where the + first layer isn't FeatureLayer. + """ + self._set_save_spec(inputs) + inputs = self._set_input_attrs(inputs) + + if outputs is None: + kwargs = {} + if self._expects_training_arg: + # In V2 mode, feeding `training=None` is not allowed because any + # value explicitly passed by the user is respected, even + # `None`.` + if ( + training is None + and not tf.compat.v1.executing_eagerly_outside_functions() + ): + training = backend.learning_phase() + if training is not None: + kwargs["training"] = training + try: + outputs = self(inputs, **kwargs) + except NotImplementedError: + # This Model or a submodel is dynamic and hasn't overridden + # `compute_output_shape`. + outputs = None + + self._set_output_attrs(outputs) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _set_input_attrs(self, inputs): + """Sets attributes related to the inputs of the Model.""" + if self.inputs: + raise ValueError("Model inputs are already set.") + + if self.__class__.__name__ == "Sequential" and not self.built: + if tf.is_tensor(inputs): + input_shape = (None,) + tuple(inputs.shape.as_list()[1:]) + elif isinstance(inputs, tf.TensorShape): + input_shape = (None,) + tuple(inputs.as_list()[1:]) + elif isinstance(inputs, dict): + # We assert that the first layer is a FeatureLayer. + if not training_utils_v1.is_feature_layer(self.layers[0]): + raise ValueError( + "Passing a dictionary input to a Sequential Model " + "which doesn't have FeatureLayer as the first layer" + " is an error." + ) + input_shape = (None,) + else: + input_shape = (None,) + tuple(inputs.shape[1:]) + self._build_input_shape = input_shape + + # Cast inputs to the compute dtype. This is primarily used + # when saving to determine the correct dtype in the input signature. + inputs = self._maybe_cast_inputs(inputs) + + # On-the-fly setting of symbolic model inputs (either by using the + # tensor provided, or by creating a placeholder if Numpy data was + # provided). + model_inputs = training_utils_v1.ModelInputs(inputs) + inputs = model_inputs.get_symbolic_inputs() + self.inputs = model_inputs.get_symbolic_inputs( + return_single_as_list=True + ) + self.input_names = model_inputs.get_input_names() + + self._feed_inputs = [] + self._feed_input_names = [] + self._feed_input_shapes = [] + + for k, v in model_inputs.as_dict(): + if backend.is_placeholder(v): + self._feed_input_names.append(k) + self._feed_inputs.append(v) + self._feed_input_shapes.append(backend.int_shape(v)) + + return inputs + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _set_output_attrs(self, outputs): + """Sets attributes related to the outputs of the Model.""" + # NOTE(taylorrobie): This convention cannot be changed without updating + # the data adapter since it assumes nest.flatten ordering. + outputs = tf.nest.flatten(outputs) + self.outputs = outputs + self.output_names = training_utils_v1.generic_output_names(outputs) + # TODO(scottzhu): Should we cleanup the self._training_endpoints here? + self.built = True + + @property + def _targets(self): + """The output target tensors for the model.""" + return [ + e.training_target.target + for e in self._training_endpoints + if e.has_training_target() + ] - # self and first arg can be positional. - if len(positional_args) > 2: - extra_args = positional_args[2:] - raise ValueError( - 'Models passed to `' + method_name + '` can only have `training` ' - 'and the first argument in `call` as positional arguments, ' - 'found: ' + str(extra_args) + '.') + @property + def _feed_targets(self): + return [ + e.training_target.target + for e in self._training_endpoints + if e.has_feedable_training_target() + ] - def _set_optimizer(self, optimizer): - """Sets self.optimizer. + @property + def _feed_output_names(self): + return [ + e.output_name + for e in self._training_endpoints + if e.has_feedable_training_target() + ] - Sets self.optimizer to `optimizer`, potentially wrapping it with a - LossScaleOptimizer. + @property + def _feed_output_shapes(self): + return [ + e.feed_output_shape + for e in self._training_endpoints + if e.has_feedable_training_target() + ] - Args: - optimizer: The optimizer(s) to assign to self.optimizer. - """ - if isinstance(optimizer, (list, tuple)): - self.optimizer = [optimizers.get(opt) for opt in optimizer] - else: - self.optimizer = optimizers.get(optimizer) - - if (self._dtype_policy.name == 'mixed_float16' and - not isinstance(self.optimizer, - loss_scale_optimizer.LossScaleOptimizer)): - if isinstance(self.optimizer, list): - raise ValueError('When the "mixed_float16" dtype policy is used, you ' - 'can only pass a single optimizer. Using policy %s ' - 'and got optimizers: %s' % - self._dtype_policy, self.optimizer) - if not isinstance(self.optimizer, optimizer_v2.OptimizerV2): - raise ValueError('"optimizer" must be an instance of ' - 'tf.keras.optimizers.Optimizer when a dype policy ' - 'with a loss scale used, but got: %s. Using policy: ' - '%s' % - (self.optimizer, self._dtype_policy)) - self.optimizer = loss_scale_optimizer.LossScaleOptimizer(self.optimizer) - - def _prepare_validation_data(self, validation_data, batch_size, - validation_steps): - """Unpack and check the validation data.""" - val_x, val_y, val_sample_weights = training_utils_v1.unpack_validation_data( - validation_data) - return self._standardize_user_data( - val_x, - val_y, - sample_weight=val_sample_weights, - batch_size=batch_size, - steps=validation_steps, - steps_name='validation_steps') - - def _validate_compile_param_for_distribution_strategy( - self, run_eagerly, sample_weight_mode, target_tensors, weighted_metrics): - # Validate that arguments passed by the user to `compile` are supported by - # tf.distribute.Strategy. - if self._distribution_strategy: - if sample_weight_mode: - raise NotImplementedError('sample_weight_mode is not supported with ' - 'tf.distribute.Strategy.') - if weighted_metrics: - raise NotImplementedError('weighted_metrics is not supported with ' - 'tf.distribute.Strategy.') - if target_tensors: - raise ValueError('target_tensors is not supported with ' - 'tf.distribute.Strategy.') - - if run_eagerly: - raise ValueError( - 'We currently do not support enabling `run_eagerly` with ' - 'distribution strategy.') - - if (distributed_training_utils_v1.is_distributing_by_cloning(self) and - (not self.built or not self.inputs or not self.outputs)): - raise ValueError( - 'We currently do not support distribution strategy with a ' - '`Sequential` model that is created without `input_shape`/' - '`input_dim` set in its first layer or a subclassed model.') - - def _process_target_tensor_for_compile(self, target_tensors): - if self.run_eagerly: - # target tensor is not supported with run_eagerly. Create a list with None - # as placeholder for each output. - return [None for _ in self.output_names] - - if target_tensors is not None and not (isinstance(target_tensors, list) and - target_tensors == []): # pylint: disable=g-explicit-bool-comparison - if isinstance(target_tensors, list): - if len(target_tensors) != len(self.outputs): - raise ValueError( - 'When passing a list as `target_tensors`, ' - 'it should have one entry per model output. ' - 'The model has %s outputs, but you passed target_tensors=%s' % - (len(self.outputs), target_tensors)) - elif isinstance(target_tensors, dict): - unexpected_target_tensor_names = set(target_tensors.keys()).difference( - self.output_names) - if unexpected_target_tensor_names: - raise ValueError( - 'Unknown entry in `target_tensors` dictionary: "{name}". ' - 'Only expected the following keys: {keys}'.format( - name=unexpected_target_tensor_names, - keys=str(self.output_names))) - tmp_target_tensors = [] - for name in self.output_names: - tmp_target_tensors.append(target_tensors.get(name, None)) - target_tensors = tmp_target_tensors - elif tf.is_tensor(target_tensors): - target_tensors = [target_tensors] - else: - raise TypeError('Expected `target_tensors` to be a list or tuple or ' - 'dict or a single tensor, but got:', target_tensors) - else: - # In case target tensor is empty or None, create a list with Nones - # that has same length as self.output_names. With that, the None check of - # target tensor can be skipped downstream. - target_tensors = [None for _ in self.output_names] - return target_tensors - - def _compile_eagerly(self, metrics, weighted_metrics, sample_weight_mode): - # Prepare sample weight modes. List with the same length as model outputs. - training_utils_v1.prepare_sample_weight_modes( - self._training_endpoints, sample_weight_mode) - # Prepare sample weights. - self._prepare_sample_weights() - # Save all metric attributes per output of the model. - self._cache_output_metric_attributes(metrics, weighted_metrics) - self.total_loss = None - # Set metric attributes on model. - self._set_metric_attributes() - - self._collected_trainable_weights = self.trainable_weights - - def _update_sample_weight_modes(self, sample_weights=None): - """Updates sample weight modes based on training/eval inputs. - - Sample weight placeholders will be created for all or no outputs - based on whether sample_weight is provided for any output. - - If model contains `_sample_weight_modes` we check if the input - `sample_weights` corresponds to the sample weight modes. - 1. Set sample weight mode to be 'temporal' for output i, if `compile` - sample_weight_mode was set to `temporal` and sample weight inputs - are given for one or more outputs. - 2. Set sample weight mode to be 'samplewise' for output i, if `compile` - sample_weight_mode was not set and sample weight inputs are given for - one or more outputs. - 3. Reset sample weight mode to None for output i if sample weight mode - was set but there is no sample weight input. + @property + def _feed_loss_fns(self): + return [ + e.loss_fn + for e in self._training_endpoints + if e.has_feedable_training_target() + ] - Args: - sample_weights: List of sample weights of the same length as model outputs - or None. - """ - if not self._is_compiled: - return - if sample_weights and any(s is not None for s in sample_weights): - for endpoint in self._training_endpoints: - endpoint.sample_weight_mode = ( - endpoint.sample_weight_mode or 'samplewise') - else: - for endpoint in self._training_endpoints: - endpoint.sample_weight_mode = None + @property + def _loss_weights_list(self): + return [e.loss_weight for e in self._training_endpoints] + + @property + def _output_loss_metrics(self): + if hasattr(self, "_training_endpoints"): + return [ + e.output_loss_metric + for e in self._training_endpoints + if e.output_loss_metric is not None + ] + return None + + @property + def sample_weights(self): + return [e.sample_weight for e in self._training_endpoints] + + @property + def _sample_weight_modes(self): + return [e.sample_weight_mode for e in self._training_endpoints] + + @property + def _feed_sample_weights(self): + return [ + e.sample_weight + for e in self._training_endpoints + if e.sample_weight is not None + ] - def _recompile_weights_loss_and_weighted_metrics(self): - if not self._is_compiled: - return False - recompile = any( - e.sample_weights_mismatch() for e in self._training_endpoints) + def _maybe_load_initial_epoch_from_ckpt(self, initial_epoch, mode): + """Maybe load 1st epoch from checkpoint, considering worker recovery. + + Refer to tensorflow/python/keras/distribute/worker_training_state.py + for more information. + + Args: + initial_epoch: The original initial_epoch user passes in in `fit()`. + mode: The mode for running `model.fit()`. + + Returns: + If the training is recovering from previous failure under multi-worker + training setting, return the epoch the training is supposed to + continue at. Otherwise, return the `initial_epoch` the user passes in. + """ + if self._training_state is not None: + return self._training_state.maybe_load_initial_epoch_from_ckpt( + initial_epoch, mode + ) + return initial_epoch + + def _get_training_eval_metrics(self): + """Returns all the metrics that are to be reported. + + This includes the output loss metrics, compile metrics/weighted metrics, + add_metric metrics. + """ + metrics = [] + metrics.extend(getattr(self, "_output_loss_metrics", None) or []) + metrics.extend(getattr(self, "metrics", None) or []) + return metrics + + def _assert_compile_was_called(self): + # Checks whether `compile` has been called. If it has been called, + # then the optimizer is set. This is different from whether the + # model is compiled + # (i.e. whether the model is built and its inputs/outputs are set). + if not self._compile_was_called: + raise RuntimeError( + "You must compile your model before " + "training/testing. " + "Use `model.compile(optimizer, loss)`." + ) + + def _in_multi_worker_mode(self): + """Method to infer if this `Model` is working in multi-worker settings. + + Multi-worker training refers to the setup where the training is + distributed across multiple workers, as opposed to the case where + only a local process performs the training. This function is + used to infer for example whether or not a distribute coordinator + should be run, and thus TensorFlow servers should be started for + communication with other servers in the cluster, or whether or not + saving/restoring checkpoints is relevant for preemption fault tolerance. + + Experimental. Signature and implementation are subject to change. + + Returns: + Whether this model indicates it's working in multi-worker settings. + """ + strategy = self._distribution_strategy + + # Otherwise, use the strategy whose scope this is in. + if not strategy and tf.distribute.has_strategy(): + strategy = tf.distribute.get_strategy() + return strategy and strategy.extended._in_multi_worker_mode() + + @property + def _trackable_saved_model_saver(self): + return model_serialization.ModelSavedModelSaver(self) + + def _get_compile_args(self, user_metrics=True): + del user_metrics + self._assert_compile_was_called() + kwargs = { + "loss": self.loss, + "metrics": self._compile_metrics, + "loss_weights": self.loss_weights, + "sample_weight_mode": self.sample_weight_mode, + "weighted_metrics": self._compile_weighted_metrics, + } + return kwargs + + @property + def _compile_was_called(self): + return self._v1_compile_was_called - if recompile: - self._compile_weights_loss_and_weighted_metrics() - return recompile - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _compile_weights_loss_and_weighted_metrics(self, sample_weights=None): - """Compiles the model loss and weighted metric sub-graphs. +class DistributedCallbackModel(Model): + """Model that is used for callbacks with tf.distribute.Strategy.""" + + def __init__(self, model): + super().__init__() + self.optimizer = model.optimizer + + def set_original_model(self, orig_model): + self._original_model = orig_model + + def save_weights(self, filepath, overwrite=True, save_format=None): + self._replicated_model.save_weights( + filepath, overwrite=overwrite, save_format=save_format + ) + + def save(self, filepath, overwrite=True, include_optimizer=True): + # save weights from the distributed model to the original model + distributed_model_weights = self.get_weights() + self._original_model.set_weights(distributed_model_weights) + # TODO(anjalisridhar): Do we need to save the original model here? + # Saving the first replicated model works as well. + self._original_model.save( + filepath, overwrite=True, include_optimizer=False + ) + + def load_weights(self, filepath, by_name=False): + self._original_model.load_weights(filepath, by_name=False) + # Copy the weights from the original model to each of the replicated + # models. + orig_model_weights = self._original_model.get_weights() + distributed_training_utils_v1.set_weights( + self._original_model._distribution_strategy, + self, + orig_model_weights, + ) + + def __getattr__(self, item): + # Allowed attributes of the model that can be accessed by the user + # during a callback. + if item not in ("_setattr_tracking", "_layers"): + logging.warning( + "You are accessing attribute " + item + " of the " + "DistributedCallbackModel that may not have been set " + "correctly." + ) + return super().__getattr__(item) - This may be used to set graph tensors as sample weights (instead of creating - placeholders). This functionality is necessary for - `tf.keras.estimator.model_to_estimator`, which calls Keras models in a v1 - graph, and creates iterator tensors for inputs, targets, and sample weights. - Args: - sample_weights: List of tensors to use as the sample weights. Must be the - same length as the number of outputs. If left as `None`, placeholders - are used instead. - """ - with backend.get_graph().as_default(): - if sample_weights is not None: - self._update_sample_weight_modes(sample_weights) - self._prepare_sample_weights(sample_weights) - - masks = self._prepare_output_masks() - - # Compute weighted metrics. - self._handle_metrics( - self.outputs, - targets=self._targets, - skip_target_masks=self._prepare_skip_target_masks(), - sample_weights=self.sample_weights, - masks=masks, - return_weighted_metrics=True) - - # Compute total loss. - # Used to keep track of the total loss value (stateless). - # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + - # loss_weight_2 * output_2_loss_fn(...) + - # layer losses. - self.total_loss = self._prepare_total_loss(masks) - - def _prepare_skip_target_masks(self): - """Boolean mask for whether the target in the output list should be skipped. - - If the loss function corresponding to a model output is None, then this - output will be skipped during total loss calculation and feed targets - preparation. +class _TrainingEndpoint: + """A container for the training output/target and related entities. - Returns: - A boolean list for whether the corresponding target in the output list - should be skipped during loss calculation. + In the case of model with multiple outputs, there is a one-to-one mapping + between model output (y_pred), model target (y_true), loss, metrics etc. + By unifying these entities into one class, different entity can access + information between each other, rather than currently access different list + of attributes of the model. """ - return [l is None for l in self.loss_functions] - - def _prepare_output_masks(self): - """Returns masks corresponding to model outputs.""" - return [getattr(x, '_keras_mask', None) for x in self.outputs] - - def _prepare_total_loss(self, masks): - """Computes total loss from loss functions. - - Args: - masks: List of mask values corresponding to each model output. - - Returns: - A list of loss weights of python floats. - Raises: - TypeError: If model run_eagerly is True. - """ - if self.run_eagerly: - raise TypeError('total loss can not be computed when compiled with ' - 'run_eagerly = True.') - loss_list = [] - with backend.name_scope('loss'): - for endpoint, mask in zip(self._training_endpoints, masks): - if endpoint.should_skip_target(): - continue - y_true = endpoint.training_target.target - y_pred = endpoint.output - loss_fn = endpoint.loss_fn - loss_weight = endpoint.loss_weight - loss_name = endpoint.loss_name() - sample_weight = endpoint.sample_weight - - with backend.name_scope(loss_name): - if mask is not None: - mask = tf.cast(mask, y_pred.dtype) - # Update weights with mask. - if sample_weight is None: - sample_weight = mask + def __init__( + self, + output, + output_name, + loss_fn, + loss_weight=None, + training_target=None, + output_loss_metric=None, + sample_weight=None, + sample_weight_mode=None, + ): + """Initialize the _TrainingEndpoint. + + Note that the output and output_name should be stable as long as the + model structure doesn't change. The training_target suppose to be + mutable since the information is provided via `compile()` + + Args: + output: the output tensor of the model. + output_name: the unique name of the output tensor. + loss_fn: the loss function for the output tensor. + loss_weight: float, the weights for the loss. + training_target: the _TrainingTarget for the model. + output_loss_metric: the metric object for the loss function. + sample_weight: the weights for how a sample is weighted during metric + and loss calculation. Could be None. + sample_weight_mode: string, 'temporal', 'samplewise' or None. The mode + for how the sample_weight is populated. + """ + self._output = output + self._output_name = output_name + self._loss_fn = loss_fn + self._loss_weight = loss_weight + self._training_target = training_target + self._output_loss_metric = output_loss_metric + self._sample_weight = sample_weight + self._sample_weight_mode = sample_weight_mode + + @property + def output(self): + return self._output + + @property + def output_name(self): + return self._output_name + + @property + def shape(self): + return backend.int_shape(self.output) + + @property + def loss_fn(self): + return self._loss_fn + + @property + def loss_weight(self): + return self._loss_weight + + @loss_weight.setter + def loss_weight(self, value): + self._loss_weight = value + + @property + def training_target(self): + return self._training_target + + @training_target.setter + def training_target(self, value): + self._training_target = value + + def create_training_target(self, target, run_eagerly=False): + """Create training_target instance and update the self.training_target. + + Note that the input target should just be a tensor or None, and + corresponding training target will be created based on the output and + loss_fn. + + Args: + target: the target tensor for the current output. Could be None. + run_eagerly: boolean, whether the model is in run_eagerly mode. + + Raises: + ValueError if the training_target field for the current instance has + already been populated. + """ + if self.has_training_target(): + raise ValueError( + "The training_target field for the _TrainingEndpoint " + "instance has already been populated" + ) + if run_eagerly: + # When run_eagerly, the target tensor is ignored, and the None + # placeholder is created instead. + self.training_target = _TrainingTarget( + None, feedable=True, skip_target_weights=False + ) + return + + if self.should_skip_target(): + self.training_target = _TrainingTarget(None) + else: + if target is not None and not backend.is_placeholder(target): + feedable = False + skip_target_weights = True else: - # Update dimensions of weights to match with mask if possible. - mask, _, sample_weight = ( - losses_utils.squeeze_or_expand_dimensions( - mask, sample_weight=sample_weight)) - sample_weight *= mask - - if hasattr(loss_fn, 'reduction'): - per_sample_losses = loss_fn.call(y_true, y_pred) - weighted_losses = losses_utils.compute_weighted_loss( - per_sample_losses, - sample_weight=sample_weight, - reduction=losses_utils.ReductionV2.NONE) - loss_reduction = loss_fn.reduction - - # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all - # compile use cases. - if loss_reduction == losses_utils.ReductionV2.AUTO: - loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE - - # Compute the stateless loss value. - output_loss = losses_utils.reduce_weighted_loss( - weighted_losses, reduction=loss_reduction) - else: - # Compute the stateless loss value for a custom loss class. - # Here we assume that the class takes care of loss reduction - # because if this class returns a vector value we cannot - # differentiate between use case where a custom optimizer - # expects a vector loss value vs unreduced per-sample loss value. - output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight) - loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE - - if len(self.outputs) > 1: - # Keep track of stateful result tensor for the loss. - endpoint.output_loss_metric(output_loss) - - # Scale output loss for distribution. For custom losses we assume - # reduction was mean. - if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: - output_loss = losses_utils.scale_loss_for_distribution(output_loss) - - loss_list.append(loss_weight * output_loss) - if not loss_list and not self.losses: - raise ValueError('The model cannot be compiled ' - 'because it has no loss to optimize.') - - # Add regularization penalties and other layer-specific losses. - custom_losses = self.get_losses_for(None) + self.get_losses_for( - self.inputs) - if custom_losses: - total_custom_loss = tf.add_n( - losses_utils.cast_losses_to_common_dtype(custom_losses)) - loss_list.append( - losses_utils.scale_loss_for_distribution(total_custom_loss)) - - loss_list = losses_utils.cast_losses_to_common_dtype(loss_list) - if loss_list: - total_loss = tf.add_n(loss_list) - else: - total_loss = 0. - return total_loss - - def _get_callback_model(self): - """Returns the Callback Model for this Model.""" - - if hasattr(self, '_replicated_model') and self._replicated_model: - # When using training_distributed, we set the callback model - # to an instance of the `DistributedModel` that we create in - # the `compile` call. The `DistributedModel` is initialized - # with the first replicated model. We need to set the callback - # model to a DistributedModel to allow us to override saving - # and loading weights when we checkpoint the model during training. - return self._replicated_model - if hasattr(self, 'callback_model') and self.callback_model: - return self.callback_model - return self - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _make_callback_model(self, grouped_model): - first_replicated_model = self._distribution_strategy.unwrap( - grouped_model)[0] - # We initialize the callback model with the first replicated model. - self._replicated_model = DistributedCallbackModel(first_replicated_model) - self._replicated_model.set_original_model(self) - - def _validate_or_infer_batch_size(self, batch_size, steps, x): - """Validates that the `batch_size` provided is consistent with InputLayer. - - It's possible that the user specified a static batch size in their - InputLayer. If so, this method checks the provided `batch_size` and `x` - arguments are consistent with this static batch size. Also, if - `batch_size` is `None`, this method will attempt to infer the batch size - from the static batch size of the InputLayer. Lastly, ValueError will be - raised if `x` is a tf.data.Dataset and `batch_size` is specified as we - expect users to provide batched datasets. - - Args: - batch_size: The batch_size provided as an argument to - fit/evaluate/predict. - steps: The steps provided as an argument to fit/evaluate/predict. - x: The data passed as `x` to fit/evaluate/predict. - - Returns: - The validated batch_size, auto-inferred from the first layer if not - provided. - """ - if (isinstance(x, (tf.compat.v1.data.Dataset, - tf.data.Dataset, - data_utils.Sequence)) or - tf_inspect.isgenerator(x)): - if batch_size is not None: - raise ValueError( - 'The `batch_size` argument must not be specified for the given ' - 'input type. Received input: {}, batch_size: {}'.format( - x, batch_size)) - return - - # Avoids the override in Sequential.layers which filters Input layers. - # (Which are often the very layers that we're after.) - layers = self._flatten_layers(include_self=False, recursive=False) - first_layer = next(layers, None) - if first_layer: - # The per-replica static batch size. - static_batch_size = training_utils.get_static_batch_size(first_layer) - if static_batch_size is not None: - - # Determine number of times the user-supplied batch size will be split. - if (self._distribution_strategy and - distributed_training_utils.global_batch_size_supported( - self._distribution_strategy)): - num_splits_for_ds = self._distribution_strategy.num_replicas_in_sync + feedable = True + skip_target_weights = False + + if target is None: + target_dtype = losses.LABEL_DTYPES_FOR_LOSSES.get( + self.loss_fn, backend.dtype(self.output) + ) + + target = backend.placeholder( + ndim=len(self.shape), + name=self.output_name + "_target", + sparse=backend.is_sparse(self.output), + dtype=target_dtype, + ) + + self.training_target = _TrainingTarget( + target, + feedable=feedable, + skip_target_weights=skip_target_weights, + ) + + @property + def output_loss_metric(self): + return self._output_loss_metric + + @output_loss_metric.setter + def output_loss_metric(self, value): + self._output_loss_metric = value + + @property + def sample_weight(self): + return self._sample_weight + + @sample_weight.setter + def sample_weight(self, value): + self._sample_weight = value + + @property + def sample_weight_mode(self): + return self._sample_weight_mode + + @sample_weight_mode.setter + def sample_weight_mode(self, value): + self._sample_weight_mode = value + + def should_skip_target(self): + return self._loss_fn is None + + def should_skip_target_weights(self): + return ( + self.should_skip_target() + or self.training_target is None + or self.training_target.skip_target_weights + ) + + def has_training_target(self): + return self.training_target is not None + + def has_feedable_training_target(self): + return ( + not self.should_skip_target() + and self.training_target is not None + and self.training_target.feedable + ) + + def loss_name(self): + if self._loss_fn is not None: + return self._output_name + "_loss" + return None + + @property + def feed_output_shape(self): + """The output shape for the feedable target.""" + if not self.has_feedable_training_target(): + return None + + if ( + ( + isinstance(self.loss_fn, losses.LossFunctionWrapper) + and self.loss_fn.fn == losses.sparse_categorical_crossentropy + ) + ) or (isinstance(self.loss_fn, losses.SparseCategoricalCrossentropy)): + if backend.image_data_format() == "channels_first": + return (self.shape[0], 1) + self.shape[2:] + else: + return self.shape[:-1] + (1,) + elif not isinstance(self.loss_fn, losses.Loss) or ( + isinstance(self.loss_fn, losses.LossFunctionWrapper) + and (getattr(losses, self.loss_fn.fn.__name__, None) is None) + ): + # If the given loss is not an instance of the `Loss` class (custom + # class) or if the loss function that is wrapped is not in the + # `losses` module, then it is a user-defined loss and we make no + # assumptions about it. + return None else: - num_splits_for_ds = 1 - - # Check `batch_size` argument is consistent with InputLayer. - if batch_size is not None: - if batch_size % num_splits_for_ds != 0: - raise ValueError('The `batch_size` argument ({}) must be divisible ' - 'the by number of replicas ({})'.format( - batch_size, num_splits_for_ds)) - per_replica_batch_size = batch_size // num_splits_for_ds - - if per_replica_batch_size != static_batch_size: - raise ValueError('The `batch_size` argument value {} is ' - 'incompatible with the specified batch size of ' - 'your Input Layer: {}'.format( - per_replica_batch_size, static_batch_size)) - - # Check Dataset/Iterator batch size is consistent with InputLayer. - if isinstance(x, (tf.data.Dataset, tf.compat.v1.data.Iterator, - tf.data.Iterator)): - ds_batch_size = tf.compat.v1.Dimension( - tf.nest.flatten(tf.compat.v1.data.get_output_shapes(x))[0][0]).value - if ds_batch_size is not None: - if ds_batch_size % num_splits_for_ds != 0: - raise ValueError( - 'The batch output shape of your `Dataset` {} ' - 'cannot be divisible by number of replicas {}'.format( - ds_batch_size, num_splits_for_ds)) - - ds_per_replica_batch_size = ds_batch_size // num_splits_for_ds - if ds_per_replica_batch_size != static_batch_size: - raise ValueError('The batch output shape of your `Dataset` is ' - '{}, which is incompatible with the specified ' - 'batch size of your Input Layer: {}'.format( - ds_per_replica_batch_size, - static_batch_size)) - - # Set inferred batch size from the InputLayer. - if steps is None: - batch_size = static_batch_size * num_splits_for_ds - - if batch_size is None and steps is None: - # Backwards compatibility - batch_size = 32 - return batch_size - - def _prepare_sample_weights(self, sample_weights=None): - """Sets sample weight attribute on the model.""" - # List with the same length as model outputs. - if sample_weights is not None: - if len(sample_weights) != len(self._training_endpoints): - raise ValueError('Provided sample weights must have same length as the ' - 'number of outputs. Expected: {}, got: {}.'.format( - len(self._training_endpoints), - len(sample_weights))) - else: - sample_weights = [None] * len(self._training_endpoints) - for endpoint, weight in zip(self._training_endpoints, sample_weights): - endpoint.populate_sample_weight(weight, endpoint.sample_weight_mode) - - def _cache_output_metric_attributes(self, metrics, weighted_metrics): - """Caches metric name and function attributes for every model output.""" - output_shapes = [] - for output in self.outputs: - if output is None or output.shape.rank is None: - output_shapes.append(None) - else: - output_shapes.append(output.shape.as_list()) - self._per_output_metrics = training_utils_v1.collect_per_output_metric_info( - metrics, self.output_names, output_shapes, self.loss_functions, - from_serialized=self._from_serialized) - self._per_output_weighted_metrics = ( - training_utils_v1.collect_per_output_metric_info( - weighted_metrics, - self.output_names, - output_shapes, - self.loss_functions, - from_serialized=self._from_serialized, - is_weighted=True)) - - def _add_unique_metric_name(self, metric_name, metric_fn, output_index): - """Makes the metric name unique. - - If there are multiple outputs for which the metrics are calculated, the - metric names have to be made unique by appending an integer. + return self.shape + + def sample_weights_mismatch(self): + """Check if the sample weight and the mode match or not.""" + # If there is a mismatch between sample weight mode and the placeholders + # created, then recompile the sub-graphs that depend on sample weights. + return ( + self.sample_weight_mode is not None and self.sample_weight is None + ) or ( + self.sample_weight_mode is None and self.sample_weight is not None + ) + + def populate_sample_weight(self, sample_weight, sample_weight_mode): + """Populate the sample weight and based on the sample weight mode.""" + if sample_weight is None and ( + self.should_skip_target_weights() + or sample_weight_mode is None + or tf.executing_eagerly() + ): + self._sample_weight = None + return + + assert sample_weight_mode in ["temporal", "samplewise"] + if sample_weight_mode == "temporal": + default_value = [[1.0]] + shape = [None, None] + else: + # sample_weight_mode == 'samplewise' + default_value = [1.0] + shape = [None] + + if sample_weight is not None: + if not sample_weight.shape.is_compatible_with(shape): + raise ValueError( + "Received sample weight with shape {}. Expected shape " + "{}.".format(sample_weight.shape, shape) + ) + self._sample_weight = sample_weight + else: + self._sample_weight = tf.compat.v1.placeholder_with_default( + tf.constant(default_value, dtype=backend.floatx()), + shape=shape, + name=self.output_name + "_sample_weights", + ) - Args: - metric_name: Metric name that corresponds to the metric specified by the - user. For example: 'acc'. - metric_fn: The Metric object. - output_index: The index of the model output for which the metric name is - being added. - Returns: - string, name of the model's unique metric name - """ - # For multi-output models, prepend the output names to the metric name. - if len(self.output_names) > 1: - # If we're loading from an already-serialized model, we've already - # prepended the output name, and we don't want to do it again. - # - # Alternatively, we may be receiving a stateless metric (e.g. the string - # "accuracy") rather than a `Metric` object, in which case we want to - # prepend the output name even if we are loading a serialized model. - if not getattr(metric_fn, '_from_serialized', False): - metric_name = '%s_%s' % (self.output_names[output_index], metric_name) - - j = 1 - base_metric_name = metric_name - while metric_name in self.metrics_names: - metric_name = '%s_%d' % (base_metric_name, j) - j += 1 - - return metric_name - - def _init_metric_attributes(self): - """Initialized model metric attributes.""" - # List of stateful metric functions. Used for resetting metric state during - # training/eval. - self._compile_metric_functions = [] - - def _set_per_output_metric_attributes(self, metrics_dict, output_index): - """Sets the metric attributes on the model for the given output. +class _TrainingTarget: + """Container for a target tensor (y_true) and its metadata (shape, loss...). Args: - metrics_dict: A dict with metric names as keys and metric fns as values. - output_index: The index of the model output for which the metric - attributes are added. - - Returns: - Metrics dict updated with unique metric names as keys. + target: A target tensor for the model. It may be `None` if the + output is excluded from loss computation. It is still kept as None + since each output of the model should have a corresponding target. If + the target is None, the rest of the attributes will be None as well. + feedable: Boolean, whether the target is feedable (requires data to be + passed in `fit` or `train_on_batch`), or not (model compiled with + `target_tensors` argument). + skip_target_weights: Boolean, whether the target should be skipped during + weights calculation. """ - updated_metrics_dict = collections.OrderedDict() - for metric_name, metric_fn in metrics_dict.items(): - metric_name = self._add_unique_metric_name( - metric_name, metric_fn, output_index) - - # Update the name on the metric class to be the unique generated name. - metric_fn._name = metric_name # pylint: disable=protected-access - updated_metrics_dict[metric_name] = metric_fn - # Keep track of metric name and function. - self._compile_metric_functions.append(metric_fn) - return updated_metrics_dict - - def _set_metric_attributes(self): - """Sets the metric attributes on the model for all the model outputs.""" - updated_per_output_metrics = [] - updated_per_output_weighted_metrics = [] - for i, endpoint in enumerate(self._training_endpoints): - if endpoint.should_skip_target(): - updated_per_output_metrics.append(self._per_output_metrics[i]) - updated_per_output_weighted_metrics.append( - self._per_output_weighted_metrics[i]) - continue - updated_per_output_metrics.append( - self._set_per_output_metric_attributes(self._per_output_metrics[i], - i)) - updated_per_output_weighted_metrics.append( - self._set_per_output_metric_attributes( - self._per_output_weighted_metrics[i], i)) - - # Create a metric wrapper for each output loss. This computes mean of an - # output loss across mini-batches (irrespective of how we reduce within a - # batch). - if len(self._training_endpoints) > 1: - for endpoint in self._training_endpoints: - if not endpoint.should_skip_target(): - endpoint.output_loss_metric = metrics_module.Mean( - name=endpoint.loss_name()) - - self._per_output_metrics = updated_per_output_metrics - self._per_output_weighted_metrics = updated_per_output_weighted_metrics - - def _handle_per_output_metrics(self, - metrics_dict, - y_true, - y_pred, - mask, - weights=None): - """Calls metric functions for a single output. - - Args: - metrics_dict: A dict with metric names as keys and metric fns as values. - y_true: Target output. - y_pred: Predicted output. - mask: Computed mask value for the current output. - weights: Weights to be applied on the current output. - Returns: - A list of metric result tensors. - """ - metric_results = [] - for metric_name, metric_fn in metrics_dict.items(): - with backend.name_scope(metric_name): - metric_result = training_utils_v1.call_metric_function( - metric_fn, y_true, y_pred, weights=weights, mask=mask) - metric_results.append(metric_result) - return metric_results - - def _handle_metrics(self, - outputs, - targets=None, - skip_target_masks=None, - sample_weights=None, - masks=None, - return_weighted_metrics=False, - return_weighted_and_unweighted_metrics=False): - """Handles calling metric functions. + def __init__(self, target, feedable=False, skip_target_weights=True): + self._target = target + self._feedable = feedable + self._skip_target_weights = skip_target_weights - Args: - outputs: List of outputs (predictions). - targets: List of targets. - skip_target_masks: Optional. List of boolean for whether the corresponding - target should be ignored or not. - sample_weights: Optional list of sample weight arrays. - masks: List of computed output mask values. - return_weighted_metrics: Flag that indicates whether weighted metrics - should be computed instead of unweighted metrics. This flag is ignored - when `return_weighted_and_unweighted_metrics` is enabled. - return_weighted_and_unweighted_metrics: Flag that is used to indicate - whether both weighted and unweighted metrics should be computed. When - this is not enabled, we use `return_weighted_metrics` param to indicate - whether weighted or unweighted metrics should be returned. + @property + def target(self): + return self._target - Returns: - A list of metric result tensors. - """ - # TODO(scottzhu): Update this to use the new training_endpoints. Currently - # the eager and graph logic is bit different. - skip_target_masks = skip_target_masks or [False] * len(outputs) - metric_results = [] - with backend.name_scope('metrics'): - # Invoke all metrics added using `compile`. - for i in range(len(outputs)): - if skip_target_masks[i]: - continue - output = outputs[i] if outputs else None - target = targets[i] if targets else None - output_mask = masks[i] if masks else None - - if (return_weighted_and_unweighted_metrics or - not return_weighted_metrics): - metric_results.extend( - self._handle_per_output_metrics(self._per_output_metrics[i], - target, output, output_mask)) - if return_weighted_and_unweighted_metrics or return_weighted_metrics: - metric_results.extend( - self._handle_per_output_metrics( - self._per_output_weighted_metrics[i], - target, - output, - output_mask, - weights=sample_weights[i] if sample_weights else None)) - return metric_results - - def _check_trainable_weights_consistency(self): - """Check trainable weights count consistency. - - This will raise a warning if `trainable_weights` and - `_collected_trainable_weights` are inconsistent (i.e. have different - number of parameters). - Inconsistency will typically arise when one modifies `model.trainable` - without calling `model.compile` again. - """ - if not hasattr(self, '_collected_trainable_weights'): - return - - if len(self.trainable_weights) != len(self._collected_trainable_weights): - logging.log_first_n( - logging.WARN, 'Discrepancy between trainable weights and collected' - ' trainable weights, did you set `model.trainable`' - ' without calling `model.compile` after ?', 1) - - def _make_train_function(self): - has_recompiled = self._recompile_weights_loss_and_weighted_metrics() - self._check_trainable_weights_consistency() - if isinstance(self.optimizer, list): - raise ValueError('The `optimizer` in `compile` should be a single ' - 'optimizer.') - # If we have re-compiled the loss/weighted metric sub-graphs then create - # train function even if one exists already. This is because - # `_feed_sample_weights` list has been updated on re-compile. - if getattr(self, 'train_function', None) is None or has_recompiled: - # Restore the compiled trainable state. - current_trainable_state = self._get_trainable_state() - self._set_trainable_state(self._compiled_trainable_state) - - inputs = (self._feed_inputs + - self._feed_targets + - self._feed_sample_weights) - if not isinstance(backend.symbolic_learning_phase(), int): - inputs += [backend.symbolic_learning_phase()] - - with backend.get_graph().as_default(): - with backend.name_scope('training'): - # Training updates - updates = self.optimizer.get_updates( - params=self._collected_trainable_weights, loss=self.total_loss) - # Unconditional updates - updates += self.get_updates_for(None) - # Conditional updates relevant to this model - updates += self.get_updates_for(self.inputs) + @property + def feedable(self): + return self._feedable - metrics = self._get_training_eval_metrics() - metrics_tensors = [ - m._call_result for m in metrics if hasattr(m, '_call_result') # pylint: disable=protected-access - ] + @property + def skip_target_weights(self): + return self._skip_target_weights - with backend.name_scope('training'): - # Gets loss and metrics. Updates weights at each call. - fn = backend.function( - inputs, [self.total_loss] + metrics_tensors, - updates=updates, - name='train_function', - **self._function_kwargs) - setattr(self, 'train_function', fn) - - # Restore the current trainable state - self._set_trainable_state(current_trainable_state) - - def _make_test_function(self): - has_recompiled = self._recompile_weights_loss_and_weighted_metrics() - # If we have re-compiled the loss/weighted metric sub-graphs then create - # test function even if one exists already. This is because - # `_feed_sample_weights` list has been updated on re-compile. - if getattr(self, 'test_function', None) is None or has_recompiled: - inputs = (self._feed_inputs + - self._feed_targets + - self._feed_sample_weights) - - with backend.get_graph().as_default(): - metrics = self._get_training_eval_metrics() - metrics_tensors = [ - m._call_result for m in metrics if hasattr(m, '_call_result') # pylint: disable=protected-access - ] - with backend.name_scope('evaluation'): - updates = self.state_updates - # Return loss and metrics, no gradient updates. - # Does update the network states. - fn = backend.function( - inputs, [self.total_loss] + metrics_tensors, - updates=updates, - name='test_function', - **self._function_kwargs) - setattr(self, 'test_function', fn) - - def _make_predict_function(self): - if not hasattr(self, 'predict_function'): - self.predict_function = None - if self.predict_function is None: - inputs = self._feed_inputs - # Gets network outputs. Does not update weights. - # Does update the network states. - kwargs = getattr(self, '_function_kwargs', {}) - with backend.name_scope(ModeKeys.PREDICT): - self.predict_function = backend.function( - inputs, - self.outputs, - updates=self.state_updates, - name='predict_function', - **kwargs) - - def _make_execution_function(self, mode): - if mode == ModeKeys.TRAIN: - self._make_train_function() - return self.train_function - if mode == ModeKeys.TEST: - self._make_test_function() - return self.test_function - if mode == ModeKeys.PREDICT: - self._make_predict_function() - return self.predict_function - - def _distribution_standardize_user_data(self, - x, - y=None, - sample_weight=None, - class_weight=None, - batch_size=None, - validation_split=0., - shuffle=False, - epochs=1, - allow_partial_batch=False): - """Runs validation checks on input and target data passed by the user. - - This is called when using tf.distribute.Strategy to train, evaluate or serve - the model. +def _is_symbolic_tensor(x): + return tf.is_tensor(x) - Args: - x: Input data. A numpy array or `tf.data` dataset. - y: Target data. A numpy array or None if x is a `tf.data` dataset. - sample_weight: An optional sample-weight array passed by the user to - weight the importance of each sample in `x`. - class_weight: An optional class-weight array by the user to - weight the importance of samples in `x` based on the class they belong - to, as conveyed by `y`. - batch_size: Integer batch size. If provided, it is used to run additional - validation checks on stateful models. - validation_split: Float between 0 and 1. - Fraction of the training data to be used as validation data. - shuffle: Boolean whether to shuffle the training data before each epoch. - epochs: Integer epochs. If > 1, repeat the numpy training data epochs - times when converting to training dataset. - allow_partial_batch: Boolean whether to enforce that all batches have the - same size. - Returns: - Dataset instance. +def _convert_scipy_sparse_tensor(value, expected_input): + """Handle scipy sparse tensor conversions. - Raises: - ValueError: In case of invalid user-provided data. - RuntimeError: If the model was never compiled. - """ - if class_weight: - raise NotImplementedError('`class_weight` is currently not supported ' - 'when using tf.distribute.Strategy.') - - if (sample_weight is not None and sample_weight.all() and - backend.is_tpu_strategy(self._distribution_strategy)): - raise NotImplementedError('`sample_weight` is currently not supported ' - 'when using TPUStrategy.') - - # Validates `steps` and `shuffle` arguments right at the beginning - # since we use it to construct the dataset object. - # TODO(anjalisridhar): Remove this check once we refactor the - # _standardize_user_data code path. This check is already present elsewhere - # in the codebase. - if isinstance(x, tf.data.Dataset): - if shuffle: - training_utils_v1.verify_dataset_shuffled(x) - - strategy = self._distribution_strategy - with strategy.scope(): - # We should be sure to call get_session() inside the strategy.scope() - # so the strategy can affect the session options. - if tf.compat.v1.executing_eagerly_outside_functions(): - session = None - else: - session = backend.get_session() - - first_x_value = tf.nest.flatten(x)[0] - if isinstance(first_x_value, np.ndarray): - x = training_utils.list_to_tuple(x) - if y is not None: - y = training_utils.list_to_tuple(y) - if sample_weight is not None: - sample_weight = training_utils.list_to_tuple(sample_weight) - in_tuple = (x, y, sample_weight) - else: - in_tuple = (x, y) - else: - in_tuple = x - - ds = strategy.extended.experimental_make_numpy_dataset(in_tuple, - session=session) - if shuffle: - # We want a buffer size that is larger than the batch size provided by - # the user and provides sufficient randomness. Note that larger - # numbers introduce more memory usage based on the size of each - # sample. - ds = ds.shuffle(max(1024, batch_size * 8)) - if epochs > 1: - ds = ds.repeat(epochs) - - # We need to use the drop_remainder argument to get a known static - # input shape which is required for TPUs. - drop_remainder = (not allow_partial_batch and - strategy.extended.experimental_require_static_shapes) - - # TODO(b/131720208): We still drop remainder here if number of examples - # is divisible by batch size, as sometimes dynamic padder will time out - # with keras.metrics.CategoricalAccuracy() metric. - if backend.is_tpu_strategy(strategy) and not drop_remainder: - dataset_size = first_x_value.shape[0] - if dataset_size % batch_size == 0: - drop_remainder = True - - x = ds.batch(batch_size, drop_remainder=drop_remainder) - else: - assert isinstance(x, tf.data.Dataset) - training_utils_v1.validate_dataset_input(x, y, sample_weight, - validation_split) - return x - - def _standardize_user_data(self, - x, - y=None, - sample_weight=None, - class_weight=None, - batch_size=None, - check_steps=False, - steps_name='steps', - steps=None, - validation_split=0., - shuffle=False, - extract_tensors_from_dataset=False): - """Runs validation checks on input and target data passed by the user. - - Also standardizes the data to lists of arrays, in order. - - Also builds and compiles the model on the fly if it is a subclassed model - that has never been called before (and thus has no inputs/outputs). - - This is a purely internal method, subject to refactoring at any time. + This method takes a value 'value' and returns the proper conversion. If + value is a scipy sparse tensor and the expected input is a dense tensor, + we densify 'value'. If value is a scipy sparse tensor and the expected input + is a TF SparseTensor, we convert 'value' to a SparseTensor. If 'value' is + not a scipy sparse tensor, or scipy is not imported, we pass it through + unchanged. Args: - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A TensorFlow tensor, or a list of tensors - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding array/tensors, - if the model has named inputs. - - A `tf.data` dataset. - y: Target data. Like the input data `x`, - it could be either Numpy array(s) or TensorFlow tensor(s). - It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset, `y` should not be - specified (since targets will be obtained from the iterator). - sample_weight: An optional sample-weight array passed by the user to - weight the importance of each sample in `x`. - class_weight: An optional class-weight array by the user to - weight the importance of samples in `x` based on the class they belong - to, as conveyed by `y`. If both `sample_weight` and `class_weight` are - provided, the weights are multiplied. - batch_size: Integer batch size. If provided, it is used to run additional - validation checks on stateful models. - check_steps: boolean, True if we want to check for validity of `steps` and - False, otherwise. For example, when we are standardizing one batch of - data for train_on_batch/predict_on_batch/test_on_batch APIs, `steps` - value is not required and we should not check for its validity in these - cases. - steps_name: The public API's parameter name for `steps`. - steps: Integer or `None`. Total number of steps (batches of samples) to - execute. - validation_split: Float between 0 and 1. - Fraction of the training data to be used as validation data. - shuffle: Boolean whether to shuffle the training data before each epoch. - extract_tensors_from_dataset: Boolean. When `x` is a dataset instance, - this indicates whether to extract actual tensors from the dataset or - instead output the dataset instance itself. - Set to True when calling from `train_on_batch`/etc. + value: An object that may be a scipy sparse tensor + expected_input: The expected input placeholder. Returns: - A tuple of 3: inputs (arrays or dicts, depending on whether `x` was a dict - or not), target arrays, sample-weight arrays. - If the model's input and targets are symbolic, these lists are empty - (since the model takes no user-provided data, instead the data comes - from the symbolic inputs/targets). - - Raises: - ValueError: In case of invalid user-provided data. - RuntimeError: If the model was never compiled. + The possibly-converted 'value'. """ - if isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)): - # Graph mode dataset. We'll pass the dataset as-is (unless - # `extract_tensors_from_dataset` is True, in which case we extract - # the tensors from the dataset and we output them. - training_utils_v1.validate_dataset_input(x, y, sample_weight, - validation_split) - if shuffle: - training_utils_v1.verify_dataset_shuffled(x) - - is_dataset = True - if extract_tensors_from_dataset: - # We do this for `train_on_batch`/etc. - x, y, sample_weight = training_utils_v1.extract_tensors_from_dataset(x) - elif isinstance(x, tf.compat.v1.data.Iterator): - # Graph mode iterator. We extract the symbolic tensors. - training_utils_v1.validate_dataset_input(x, y, sample_weight, - validation_split) - iterator = x - x, y, sample_weight = training_utils_v1.unpack_iterator_input(iterator) - is_dataset = True + if issparse is not None and issparse(value): + if backend.is_sparse(expected_input): + sparse_coo = value.tocoo() + row, col = sparse_coo.row, sparse_coo.col + data, shape = sparse_coo.data, sparse_coo.shape + indices = np.concatenate( + (np.expand_dims(row, 1), np.expand_dims(col, 1)), 1 + ) + return tf.SparseTensor(indices, data, shape) + else: + if tf.compat.v1.executing_eagerly_outside_functions(): + # In TF2 we do not silently densify sparse matrices. + raise ValueError( + "A SciPy sparse matrix was passed to a model " + "that expects dense inputs. Please densify your " + "inputs first, such as by calling `x.toarray()." + ) + return value.toarray() else: - is_dataset = False + return value - # Validates `steps` argument based on x's type. - if check_steps: - training_utils_v1.check_steps_argument(x, steps, steps_name) - # First, we build the model on the fly if necessary. - if not self.inputs: - all_inputs, y_input, dict_inputs = self._build_model_with_inputs(x, y) - is_build_called = True - else: - all_inputs = [] - # Whether this is a subclassed model that expects dictionary inputs - # rather than list inputs (e.g. FeatureColumn-based models). - dict_inputs = isinstance(self.inputs, dict) - is_build_called = False - y_input = y - - # Second, we compile the model on the fly if necessary, mostly for subclass - # models. - is_compile_called = False - if not self._is_compiled and self.optimizer: - self._compile_from_inputs(all_inputs, y_input, x, y) - is_compile_called = True - - # In graph mode, if we had just set inputs and targets as symbolic tensors - # by invoking build and compile on the model respectively, we do not have to - # feed anything to the model. Model already has input and target data as - # part of the graph. - # Note: in this case, `any` and `all` are equivalent since we disallow - # mixed symbolic/value inputs. - - # self.run_eagerly is not free to compute, so we want to reuse the value. - run_eagerly = self.run_eagerly - - if (not run_eagerly and is_build_called and is_compile_called and - not is_dataset and any(_is_symbolic_tensor(v) for v in all_inputs)): - return [], [], None - - return self._standardize_tensors( - x, y, sample_weight, - run_eagerly=run_eagerly, - dict_inputs=dict_inputs, - is_dataset=is_dataset, - class_weight=class_weight, - batch_size=batch_size) - - def _standardize_tensors(self, x, y, sample_weight, run_eagerly, dict_inputs, - is_dataset, class_weight=None, batch_size=None): - if run_eagerly: - # In eager mode, do not do shape validation - # since the network has no input nodes (placeholders) to be fed. - feed_input_names = self.input_names - feed_input_shapes = None - elif not self._is_graph_network: - # Case: symbolic-mode subclassed network. Do not do shape validation. - feed_input_names = self._feed_input_names - feed_input_shapes = None - else: - # Case: symbolic-mode graph network. - # In this case, we run extensive shape validation checks. - feed_input_names = self._feed_input_names - feed_input_shapes = self._feed_input_shapes - - # Standardize the inputs. - if not isinstance(x, (tf.compat.v1.data.Dataset, tf.data.Dataset)): - # TODO(fchollet): run static checks with dataset output shape(s). - x = training_utils_v1.standardize_input_data( - x, - feed_input_names, - feed_input_shapes, - check_batch_axis=False, # Don't enforce the batch size. - exception_prefix='input') - - # Get typespecs for the input data and sanitize it if necessary. - # TODO(momernick): This should be capable of doing full input validation - # at all times - validate that this is so and refactor the standardization - # code. - if isinstance(x, tf.data.Dataset): - x_shapes = tf.data.experimental.get_structure(x) - if isinstance(x_shapes, tuple): - # If the output of a Dataset is a tuple, we assume it's either of the - # form (x_data, y_data) or (x_data, y_data, sample_weights). In either - # case, we only care about x_data here. - x_shapes = x_shapes[0] - else: - flat_inputs = tf.nest.flatten(x, expand_composites=False) - flat_expected_inputs = tf.nest.flatten(self.inputs, expand_composites=False) - converted_x = [] - for (a, b) in zip(flat_inputs, flat_expected_inputs): - converted_x.append(_convert_scipy_sparse_tensor(a, b)) - x = tf.nest.pack_sequence_as(x, converted_x, expand_composites=False) - - def _type_spec_from_value(value): - """Grab type_spec without converting array-likes to tensors.""" - if tf_utils.is_extension_type(value): - return value._type_spec # pylint: disable=protected-access - # Get a TensorSpec for array-like data without - # converting the data to a Tensor - if hasattr(value, 'shape') and hasattr(value, 'dtype'): - return tf.TensorSpec(value.shape, value.dtype) - else: - return tf.type_spec_from_value(value) - - x_shapes = tf.nest.map_structure(_type_spec_from_value, x) - - flat_inputs = tf.nest.flatten(x_shapes, expand_composites=False) - flat_expected_inputs = tf.nest.flatten(self.inputs, expand_composites=False) - for (a, b) in zip(flat_inputs, flat_expected_inputs): - tf.nest.assert_same_structure(a, b, expand_composites=True) - - if y is not None: - # Prepare self._sample_weight_modes. List with the same length as - # model outputs. - training_utils_v1.prepare_sample_weight_modes(self._training_endpoints, - self.sample_weight_mode) - feed_output_names = self._feed_output_names - feed_sample_weight_modes = self._sample_weight_modes - if not self._is_graph_network: - feed_output_shapes = None - else: - feed_output_shapes = self._feed_output_shapes - - # Standardize the outputs. - y = training_utils_v1.standardize_input_data( - y, - feed_output_names, - # Don't enforce target shapes to match output shapes. - # Precise checks will be run in `check_loss_and_target_compatibility`. - shapes=None, - check_batch_axis=False, # Don't enforce the batch size. - exception_prefix='target') - - # Generate sample-wise weight values given the `sample_weight` and - # `class_weight` arguments. - sample_weights = training_utils_v1.standardize_sample_weights( - sample_weight, feed_output_names) - class_weights = training_utils_v1.standardize_class_weights( - class_weight, feed_output_names) - - sample_weights = [ - training_utils_v1.standardize_weights(ref, sw, cw, mode) - for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights, - feed_sample_weight_modes) - ] - # Check that all arrays have the same length. - if not self._distribution_strategy: - training_utils_v1.check_array_lengths(x, y, sample_weights) - if self._is_graph_network and not run_eagerly: - # Additional checks to avoid users mistakenly using improper loss fns. - training_utils_v1.check_loss_and_target_compatibility( - y, self._feed_loss_fns, feed_output_shapes) - - sample_weights, _, _ = training_utils.handle_partial_sample_weights( - y, sample_weights, feed_sample_weight_modes, check_all_flat=True) - else: - y = [] - sample_weights = None - - if self.stateful and batch_size and not is_dataset: - # Check that for stateful networks, number of samples is a multiple - # of the static batch size. - if x[0].shape[0] % batch_size != 0: - raise ValueError('In a stateful network, ' - 'you should only pass inputs with ' - 'a number of samples that can be ' - 'divided by the batch size. Found: ' + - str(x[0].shape[0]) + ' samples') - - # If dictionary inputs were provided, we return a dictionary as well. - if dict_inputs and not isinstance(x, (tf.compat.v1.data.Dataset, - tf.data.Dataset)): - x = dict(zip(feed_input_names, x)) - return x, y, sample_weights - - def _build_model_with_inputs(self, inputs, targets): - """Build the model (set model inputs/outputs), mainly for subclass model.""" - processed_inputs = [] - is_dict_inputs = False - orig_inputs = inputs - # We need to use `inputs` to set the model inputs. - # If input data is a dataset iterator in graph mode or if it is an eager - # iterator and only one batch of samples is required, we fetch the data - # tensors from the iterator and then standardize them. - if isinstance(inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset)): - inputs, targets, _ = training_utils_v1.extract_tensors_from_dataset( - inputs) - # We type-check that `inputs` and `targets` are either single arrays - # or lists of arrays, and extract a flat list of inputs from the passed - # structure. - training_utils_v1.validate_input_types(inputs, orig_inputs) - - if isinstance(inputs, (list, tuple)): - processed_inputs += list(inputs) - elif isinstance(inputs, dict): - is_dict_inputs = True - keys = sorted(inputs.keys()) - processed_inputs = [inputs[k] for k in keys] - else: - processed_inputs.append(inputs) - # Now that we have a flat set of inputs, we make sure that none of them - # are CompositeTensors or CompositeTensorValues of any type (or scipy - # sparse arrays, which we treat as SparseTensor values). We cannot safely - # infer input data from an arbitrary composite tensor, so we don't try - - # users should explicitly add composite tensor inputs to their subclassed - # models. - for input_tensor in processed_inputs: - if training_utils_v1.is_composite_or_composite_value(input_tensor): - # TODO(b/132691975): Document subclass-model CT input handling. - raise ValueError( - 'All SparseTensor and RaggedTensor inputs must be explicitly ' - 'declared using a keras.Input() with sparse=True or ragged=True. ' - 'We found an undeclared input %s. For Sequential models, please ' - 'add a keras.Input() as your first Layer. For subclassed models, ' - 'please call self._set_inputs() on your input set, which you can ' - 'create using keras.Input() for each input to your model.' % - (input_tensor,)) - # Build the model using the retrieved inputs (value or symbolic). - # If values are generated from a dataset, then in symbolic-mode - # placeholders will be created to match the value shapes. - if isinstance(orig_inputs, (tf.compat.v1.data.Dataset, tf.data.Dataset, - tf.compat.v1.data.Iterator)): - if not self.inputs: - # For subclassed models, a robust input spec is not available so we - # must cast to the model dtype. - inputs = training_utils_v1.cast_if_floating_dtype(inputs, self.dtype) - - def create_tensor_spec(t): - return tf.TensorSpec(t.shape, t.dtype) - - cast_inputs = tf.nest.map_structure(create_tensor_spec, inputs) - elif training_utils_v1.has_tensors(inputs): - cast_inputs = training_utils_v1.cast_if_floating_dtype(inputs) - else: - cast_inputs = inputs - self._set_inputs(cast_inputs) - return processed_inputs, targets, is_dict_inputs - - def _compile_from_inputs(self, all_inputs, target, orig_inputs, orig_target): - if target is not None: - # We need to use `y` to set the model targets. - if training_utils_v1.has_tensors(target): - target = training_utils_v1.cast_if_floating_dtype_and_mismatch( - target, self.outputs) - training_utils_v1.validate_input_types( - target, orig_target, allow_dict=False, field_name='target') - if isinstance(target, (list, tuple)): - all_inputs += list(target) - else: - all_inputs.append(target) - # Type check that all inputs are *either* value *or* symbolic. - # TODO(fchollet): this check could be removed in Eager mode? - if any(tf.is_tensor(v) for v in all_inputs): - if not all(tf.is_tensor(v) for v in all_inputs): - raise ValueError('Do not pass inputs that mix Numpy arrays and ' - 'TensorFlow tensors. ' - 'You passed: x=' + str(orig_inputs) + - '; y=' + str(orig_target)) - is_dataset = isinstance(orig_inputs, (tf.compat.v1.data.Dataset, - tf.data.Dataset, - tf.compat.v1.data.Iterator)) - if is_dataset or tf.executing_eagerly(): - target_tensors = None - else: - # Handle target tensors if any passed. - if target is not None: - if not isinstance(target, (list, tuple)): - target = [target] - target_tensors = [v for v in target if _is_symbolic_tensor(v)] - else: - target_tensors = None - - self.compile( - optimizer=self.optimizer, - loss=self.loss, - metrics=self._compile_metrics, - weighted_metrics=self._compile_weighted_metrics, - loss_weights=self.loss_weights, - target_tensors=target_tensors, - sample_weight_mode=self.sample_weight_mode, - run_eagerly=self.run_eagerly, - experimental_run_tf_function=self._experimental_run_tf_function) - - # TODO(omalleyt): Consider changing to a more descriptive function name. - def _set_inputs(self, inputs, outputs=None, training=None): - """Set model's input and output specs based on the input data received. - - This is to be used for Model subclasses, which do not know at instantiation - time what their inputs look like. +def _get_metrics_from_layers(layers): + """Returns list of metrics from the given layers. - Args: - inputs: Single array, or list of arrays. The arrays could be placeholders, - Numpy arrays, data tensors, or TensorSpecs. - - if placeholders: the model is built on top of these placeholders, - and we expect Numpy data to be fed for them when calling `fit`/etc. - - if Numpy data or TensorShapes: we create placeholders matching the - TensorShapes or shapes of the Numpy arrays. We expect Numpy data to be - fed for these placeholders when calling `fit`/etc. - - if data tensors: the model is built on top of these tensors. - We do not expect any Numpy data to be provided when calling `fit`/etc. - outputs: None, a data tensor, or a list of tensors. If None, the - outputs will be determined by invoking `self.call()`, otherwise the - provided value will be used. - training: Boolean or None. Only relevant in symbolic mode. Specifies - whether to build the model's graph in inference mode (False), training - mode (True), or using the Keras learning phase (None). - Raises: - ValueError: If dict inputs are passed to a Sequential Model where the - first layer isn't FeatureLayer. - """ - self._set_save_spec(inputs) - inputs = self._set_input_attrs(inputs) - - if outputs is None: - kwargs = {} - if self._expects_training_arg: - # In V2 mode, feeding `training=None` is not allowed because any value - # explicitly passed by the user is respected, even `None`.` - if training is None and not tf.compat.v1.executing_eagerly_outside_functions(): - training = backend.learning_phase() - if training is not None: - kwargs['training'] = training - try: - outputs = self(inputs, **kwargs) - except NotImplementedError: - # This Model or a submodel is dynamic and hasn't overridden - # `compute_output_shape`. - outputs = None - - self._set_output_attrs(outputs) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _set_input_attrs(self, inputs): - """Sets attributes related to the inputs of the Model.""" - if self.inputs: - raise ValueError('Model inputs are already set.') - - if self.__class__.__name__ == 'Sequential' and not self.built: - if tf.is_tensor(inputs): - input_shape = (None,) + tuple(inputs.shape.as_list()[1:]) - elif isinstance(inputs, tf.TensorShape): - input_shape = (None,) + tuple(inputs.as_list()[1:]) - elif isinstance(inputs, dict): - # We assert that the first layer is a FeatureLayer. - if not training_utils_v1.is_feature_layer(self.layers[0]): - raise ValueError('Passing a dictionary input to a Sequential Model ' - 'which doesn\'t have FeatureLayer as the first layer' - ' is an error.') - input_shape = (None,) - else: - input_shape = (None,) + tuple(inputs.shape[1:]) - self._build_input_shape = input_shape - - # Cast inputs to the compute dtype. This is primarily used - # when saving to determine the correct dtype in the input signature. - inputs = self._maybe_cast_inputs(inputs) - - # On-the-fly setting of symbolic model inputs (either by using the tensor - # provided, or by creating a placeholder if Numpy data was provided). - model_inputs = training_utils_v1.ModelInputs(inputs) - inputs = model_inputs.get_symbolic_inputs() - self.inputs = model_inputs.get_symbolic_inputs(return_single_as_list=True) - self.input_names = model_inputs.get_input_names() - - self._feed_inputs = [] - self._feed_input_names = [] - self._feed_input_shapes = [] - - for k, v in model_inputs.as_dict(): - if backend.is_placeholder(v): - self._feed_input_names.append(k) - self._feed_inputs.append(v) - self._feed_input_shapes.append(backend.int_shape(v)) - - return inputs - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _set_output_attrs(self, outputs): - """Sets attributes related to the outputs of the Model.""" - # NOTE(taylorrobie): This convention cannot be changed without updating the - # data adapter since it assumes nest.flatten ordering. - outputs = tf.nest.flatten(outputs) - self.outputs = outputs - self.output_names = training_utils_v1.generic_output_names(outputs) - # TODO(scottzhu): Should we cleanup the self._training_endpoints here? - self.built = True - - @property - def _targets(self): - """The output target tensors for the model.""" - return [ - e.training_target.target - for e in self._training_endpoints - if e.has_training_target() - ] - - @property - def _feed_targets(self): - return [ - e.training_target.target - for e in self._training_endpoints - if e.has_feedable_training_target() - ] - - @property - def _feed_output_names(self): - return [ - e.output_name - for e in self._training_endpoints - if e.has_feedable_training_target() - ] - - @property - def _feed_output_shapes(self): - return [ - e.feed_output_shape - for e in self._training_endpoints - if e.has_feedable_training_target() - ] - - @property - def _feed_loss_fns(self): - return [ - e.loss_fn - for e in self._training_endpoints - if e.has_feedable_training_target() - ] - - @property - def _loss_weights_list(self): - return [e.loss_weight for e in self._training_endpoints] - - @property - def _output_loss_metrics(self): - if hasattr(self, '_training_endpoints'): - return [ - e.output_loss_metric - for e in self._training_endpoints - if e.output_loss_metric is not None - ] - return None - - @property - def sample_weights(self): - return [e.sample_weight for e in self._training_endpoints] - - @property - def _sample_weight_modes(self): - return [e.sample_weight_mode for e in self._training_endpoints] - - @property - def _feed_sample_weights(self): - return [e.sample_weight for e in self._training_endpoints - if e.sample_weight is not None] - - def _maybe_load_initial_epoch_from_ckpt(self, initial_epoch, mode): - """Maybe load initial epoch from ckpt considering possible worker recovery. - - Refer to tensorflow/python/keras/distribute/worker_training_state.py - for more information. + This will not include the `compile` metrics of a model layer. Args: - initial_epoch: The original initial_epoch user passes in in `fit()`. - mode: The mode for running `model.fit()`. + layers: List of layers. Returns: - If the training is recovering from previous failure under multi-worker - training setting, return the epoch the training is supposed to continue - at. Otherwise, return the `initial_epoch` the user passes in. - """ - if self._training_state is not None: - return self._training_state.maybe_load_initial_epoch_from_ckpt( - initial_epoch, mode) - return initial_epoch - - def _get_training_eval_metrics(self): - """Returns all the metrics that are to be reported. - - This includes the output loss metrics, compile metrics/weighted metrics, - add_metric metrics. + List of metrics. """ metrics = [] - metrics.extend(getattr(self, '_output_loss_metrics', None) or []) - metrics.extend(getattr(self, 'metrics', None) or []) + layers = layer_utils.filter_empty_layer_containers(layers) + for layer in layers: + if isinstance(layer, Model): + # We cannot call 'metrics' on the model because we do not want to + # include the metrics that were added in compile API of a nested + # model. + metrics.extend(layer._metrics) + metrics.extend(_get_metrics_from_layers(layer.layers)) + else: + metrics.extend(layer.metrics) return metrics - def _assert_compile_was_called(self): - # Checks whether `compile` has been called. If it has been called, - # then the optimizer is set. This is different from whether the - # model is compiled - # (i.e. whether the model is built and its inputs/outputs are set). - if not self._compile_was_called: - raise RuntimeError('You must compile your model before ' - 'training/testing. ' - 'Use `model.compile(optimizer, loss)`.') - - def _in_multi_worker_mode(self): - """Method to infer if this `Model` is working in multi-worker settings. - - Multi-worker training refers to the setup where the training is - distributed across multiple workers, as opposed to the case where - only a local process performs the training. This function is - used to infer for example whether or not a distribute coordinator - should be run, and thus TensorFlow servers should be started for - communication with other servers in the cluster, or whether or not - saving/restoring checkpoints is relevant for preemption fault tolerance. - - Experimental. Signature and implementation are subject to change. - - Returns: - Whether this model indicates it's working in multi-worker settings. - """ - strategy = self._distribution_strategy - - # Otherwise, use the strategy whose scope this is in. - if not strategy and tf.distribute.has_strategy(): - strategy = tf.distribute.get_strategy() - return strategy and strategy.extended._in_multi_worker_mode() # pylint: disable=protected-access - - @property - def _trackable_saved_model_saver(self): - return model_serialization.ModelSavedModelSaver(self) - - def _get_compile_args(self, user_metrics=True): - del user_metrics - self._assert_compile_was_called() - kwargs = { - 'loss': self.loss, - 'metrics': self._compile_metrics, - 'loss_weights': self.loss_weights, - 'sample_weight_mode': self.sample_weight_mode, - 'weighted_metrics': self._compile_weighted_metrics, - } - return kwargs - - @property - def _compile_was_called(self): - return self._v1_compile_was_called - - -class DistributedCallbackModel(Model): - """Model that is used for callbacks with tf.distribute.Strategy.""" - - def __init__(self, model): - super().__init__() - self.optimizer = model.optimizer - - def set_original_model(self, orig_model): - self._original_model = orig_model - - def save_weights(self, filepath, overwrite=True, save_format=None): - self._replicated_model.save_weights(filepath, overwrite=overwrite, - save_format=save_format) - - def save(self, filepath, overwrite=True, include_optimizer=True): - # save weights from the distributed model to the original model - distributed_model_weights = self.get_weights() - self._original_model.set_weights(distributed_model_weights) - # TODO(anjalisridhar): Do we need to save the original model here? - # Saving the first replicated model works as well. - self._original_model.save(filepath, overwrite=True, include_optimizer=False) - - def load_weights(self, filepath, by_name=False): - self._original_model.load_weights(filepath, by_name=False) - # Copy the weights from the original model to each of the replicated models. - orig_model_weights = self._original_model.get_weights() - distributed_training_utils_v1.set_weights( - self._original_model._distribution_strategy, self, # pylint: disable=protected-access - orig_model_weights) - - def __getattr__(self, item): - # Allowed attributes of the model that can be accessed by the user - # during a callback. - if item not in ('_setattr_tracking', '_layers'): - logging.warning('You are accessing attribute ' + item + ' of the ' - 'DistributedCallbackModel that may not have been set ' - 'correctly.') - return super().__getattr__(item) - - -class _TrainingEndpoint: - """A container for the training output/target and related entities. - - In the case of model with multiple outputs, there is a one-to-one mapping - between model output (y_pred), model target (y_true), loss, metrics etc. - By unifying these entities into one class, different entity can access - information between each other, rather than currently access different list of - attributes of the model. - """ - - def __init__(self, - output, - output_name, - loss_fn, - loss_weight=None, - training_target=None, - output_loss_metric=None, - sample_weight=None, - sample_weight_mode=None): - """Initialize the _TrainingEndpoint. - - Note that the output and output_name should be stable as long as the model - structure doesn't change. The training_target suppose to be mutable since - the information is provided via `compile()` - - Args: - output: the output tensor of the model. - output_name: the unique name of the output tensor. - loss_fn: the loss function for the output tensor. - loss_weight: float, the weights for the loss. - training_target: the _TrainingTarget for the model. - output_loss_metric: the metric object for the loss function. - sample_weight: the weights for how a sample is weighted during metric and - loss calculation. Could be None. - sample_weight_mode: string, 'temporal', 'samplewise' or None. The mode for - how the sample_weight is populated. - """ - self._output = output - self._output_name = output_name - self._loss_fn = loss_fn - self._loss_weight = loss_weight - self._training_target = training_target - self._output_loss_metric = output_loss_metric - self._sample_weight = sample_weight - self._sample_weight_mode = sample_weight_mode - - @property - def output(self): - return self._output - - @property - def output_name(self): - return self._output_name - - @property - def shape(self): - return backend.int_shape(self.output) - - @property - def loss_fn(self): - return self._loss_fn - - @property - def loss_weight(self): - return self._loss_weight - - @loss_weight.setter - def loss_weight(self, value): - self._loss_weight = value - - @property - def training_target(self): - return self._training_target - - @training_target.setter - def training_target(self, value): - self._training_target = value - - def create_training_target(self, target, run_eagerly=False): - """Create training_target instance and update the self.training_target. - - Note that the input target should just be a tensor or None, and - corresponding training target will be created based on the output and - loss_fn. - - Args: - target: the target tensor for the current output. Could be None. - run_eagerly: boolean, whether the model is in run_eagerly mode. - - Raises: - ValueError if the training_target field for the current instance has - already been populated. - """ - if self.has_training_target(): - raise ValueError('The training_target field for the _TrainingEndpoint ' - 'instance has already been populated') - if run_eagerly: - # When run_eagerly, the target tensor is ignored, and the None placeholder - # is created instead. - self.training_target = _TrainingTarget( - None, feedable=True, skip_target_weights=False) - return - - if self.should_skip_target(): - self.training_target = _TrainingTarget(None) - else: - if target is not None and not backend.is_placeholder(target): - feedable = False - skip_target_weights = True - else: - feedable = True - skip_target_weights = False - - if target is None: - target_dtype = losses.LABEL_DTYPES_FOR_LOSSES.get( - self.loss_fn, backend.dtype(self.output)) - - target = backend.placeholder( - ndim=len(self.shape), - name=self.output_name + '_target', - sparse=backend.is_sparse(self.output), - dtype=target_dtype) - - self.training_target = _TrainingTarget( - target, - feedable=feedable, - skip_target_weights=skip_target_weights) - - @property - def output_loss_metric(self): - return self._output_loss_metric - - @output_loss_metric.setter - def output_loss_metric(self, value): - self._output_loss_metric = value - - @property - def sample_weight(self): - return self._sample_weight - - @sample_weight.setter - def sample_weight(self, value): - self._sample_weight = value - - @property - def sample_weight_mode(self): - return self._sample_weight_mode - - @sample_weight_mode.setter - def sample_weight_mode(self, value): - self._sample_weight_mode = value - - def should_skip_target(self): - return self._loss_fn is None - - def should_skip_target_weights(self): - return (self.should_skip_target() or self.training_target is None or - self.training_target.skip_target_weights) - - def has_training_target(self): - return self.training_target is not None - - def has_feedable_training_target(self): - return (not self.should_skip_target() and - self.training_target is not None and self.training_target.feedable) - - def loss_name(self): - if self._loss_fn is not None: - return self._output_name + '_loss' - return None - - @property - def feed_output_shape(self): - """The output shape for the feedable target.""" - if not self.has_feedable_training_target(): - return None - - if ((isinstance(self.loss_fn, losses.LossFunctionWrapper) and - self.loss_fn.fn == losses.sparse_categorical_crossentropy)) or ( - isinstance(self.loss_fn, losses.SparseCategoricalCrossentropy)): - if backend.image_data_format() == 'channels_first': - return (self.shape[0], 1) + self.shape[2:] - else: - return self.shape[:-1] + (1,) - elif (not isinstance(self.loss_fn, losses.Loss) or - (isinstance(self.loss_fn, losses.LossFunctionWrapper) and - (getattr(losses, self.loss_fn.fn.__name__, None) is None))): - # If the given loss is not an instance of the `Loss` class (custom - # class) or if the loss function that is wrapped is not in the - # `losses` module, then it is a user-defined loss and we make no - # assumptions about it. - return None - else: - return self.shape - - def sample_weights_mismatch(self): - """Check if the sample weight and the mode match or not.""" - # If there is a mismatch between sample weight mode and the placeholders - # created, then recompile the sub-graphs that depend on sample weights. - return ( - (self.sample_weight_mode is not None and self.sample_weight is None) or - (self.sample_weight_mode is None and self.sample_weight is not None)) - - def populate_sample_weight(self, sample_weight, sample_weight_mode): - """Populate the sample weight and based on the sample weight mode.""" - if (sample_weight is None and - (self.should_skip_target_weights() or sample_weight_mode is None or - tf.executing_eagerly())): - self._sample_weight = None - return - - assert sample_weight_mode in ['temporal', 'samplewise'] - if sample_weight_mode == 'temporal': - default_value = [[1.]] - shape = [None, None] - else: - # sample_weight_mode == 'samplewise' - default_value = [1.] - shape = [None] - - if sample_weight is not None: - if not sample_weight.shape.is_compatible_with(shape): - raise ValueError('Received sample weight with shape {}. Expected shape ' - '{}.'.format(sample_weight.shape, shape)) - self._sample_weight = sample_weight - else: - self._sample_weight = tf.compat.v1.placeholder_with_default( - tf.constant(default_value, dtype=backend.floatx()), - shape=shape, - name=self.output_name + '_sample_weights') - - -class _TrainingTarget: - """Container for a target tensor (y_true) and its metadata (shape, loss...). - - Args: - target: A target tensor for the model. It may be `None` if the - output is excluded from loss computation. It is still kept as None - since each output of the model should have a corresponding target. If - the target is None, the rest of the attributes will be None as well. - feedable: Boolean, whether the target is feedable (requires data to be - passed in `fit` or `train_on_batch`), or not (model compiled with - `target_tensors` argument). - skip_target_weights: Boolean, whether the target should be skipped during - weights calculation. - """ - - def __init__(self, target, feedable=False, skip_target_weights=True): - self._target = target - self._feedable = feedable - self._skip_target_weights = skip_target_weights - - @property - def target(self): - return self._target - - @property - def feedable(self): - return self._feedable - - @property - def skip_target_weights(self): - return self._skip_target_weights - - -def _is_symbolic_tensor(x): - return tf.is_tensor(x) - - -def _convert_scipy_sparse_tensor(value, expected_input): - """Handle scipy sparse tensor conversions. - - This method takes a value 'value' and returns the proper conversion. If - value is a scipy sparse tensor and the expected input is a dense tensor, - we densify 'value'. If value is a scipy sparse tensor and the expected input - is a TF SparseTensor, we convert 'value' to a SparseTensor. If 'value' is - not a scipy sparse tensor, or scipy is not imported, we pass it through - unchanged. - - Args: - value: An object that may be a scipy sparse tensor - expected_input: The expected input placeholder. - - Returns: - The possibly-converted 'value'. - """ - if issparse is not None and issparse(value): - if backend.is_sparse(expected_input): - sparse_coo = value.tocoo() - row, col = sparse_coo.row, sparse_coo.col - data, shape = sparse_coo.data, sparse_coo.shape - indices = np.concatenate((np.expand_dims(row, 1), np.expand_dims(col, 1)), - 1) - return tf.SparseTensor(indices, data, shape) - else: - if tf.compat.v1.executing_eagerly_outside_functions(): - # In TF2 we do not silently densify sparse matrices. - raise ValueError('A SciPy sparse matrix was passed to a model ' - 'that expects dense inputs. Please densify your ' - 'inputs first, such as by calling `x.toarray().') - return value.toarray() - else: - return value - - -def _get_metrics_from_layers(layers): - """Returns list of metrics from the given layers. - - This will not include the `compile` metrics of a model layer. - - Args: - layers: List of layers. - - Returns: - List of metrics. - """ - metrics = [] - layers = layer_utils.filter_empty_layer_containers(layers) - for layer in layers: - if isinstance(layer, Model): - # We cannot call 'metrics' on the model because we do not want to - # include the metrics that were added in compile API of a nested model. - metrics.extend(layer._metrics) # pylint: disable=protected-access - metrics.extend(_get_metrics_from_layers(layer.layers)) - else: - metrics.extend(layer.metrics) - return metrics - def _non_none_constant_value(v): - constant_value = tf.get_static_value(v) - return constant_value if constant_value is not None else v + constant_value = tf.get_static_value(v) + return constant_value if constant_value is not None else v diff --git a/keras/estimator/BUILD b/keras/estimator/BUILD index 6d6ffd441685..6b871702e627 100644 --- a/keras/estimator/BUILD +++ b/keras/estimator/BUILD @@ -1,7 +1,10 @@ # Description: # Contains Keras models to Estimator converter +# Placeholder: load unaliased py_library + package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], diff --git a/keras/estimator/__init__.py b/keras/estimator/__init__.py index b5efcbc14647..00fa3c96e2d0 100644 --- a/keras/estimator/__init__.py +++ b/keras/estimator/__init__.py @@ -16,6 +16,7 @@ import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export # Keras has undeclared dependency on tensorflow/estimator:estimator_py. @@ -23,345 +24,365 @@ # everything will work as normal. _model_to_estimator_usage_gauge = tf.__internal__.monitoring.BoolGauge( - '/tensorflow/api/keras/model_to_estimator', - 'Whether tf.keras.estimator.model_to_estimator() is called.', 'version') + "/tensorflow/api/keras/model_to_estimator", + "Whether tf.keras.estimator.model_to_estimator() is called.", + "version", +) # LINT.IfChange -@keras_export(v1=['keras.estimator.model_to_estimator']) +@keras_export(v1=["keras.estimator.model_to_estimator"]) def model_to_estimator( keras_model=None, keras_model_path=None, custom_objects=None, model_dir=None, config=None, - checkpoint_format='saver', + checkpoint_format="saver", metric_names_map=None, - export_outputs=None): - """Constructs an `Estimator` instance from given keras model. + export_outputs=None, +): + """Constructs an `Estimator` instance from given keras model. - If you use infrastructure or other tooling that relies on Estimators, you can - still build a Keras model and use model_to_estimator to convert the Keras - model to an Estimator for use with downstream systems. + If you use infrastructure or other tooling that relies on Estimators, you + can still build a Keras model and use model_to_estimator to convert the + Keras model to an Estimator for use with downstream systems. - For usage example, please see: - [Creating estimators from Keras Models]( + For usage example, please see: + [Creating estimators from Keras Models]( https://www.tensorflow.org/guide/estimator#create_an_estimator_from_a_keras_model). - Sample Weights: - Estimators returned by `model_to_estimator` are configured so that they can - handle sample weights (similar to `keras_model.fit(x, y, sample_weights)`). - - To pass sample weights when training or evaluating the Estimator, the first - item returned by the input function should be a dictionary with keys - `features` and `sample_weights`. Example below: - - ```python - keras_model = tf.keras.Model(...) - keras_model.compile(...) - - estimator = tf.keras.estimator.model_to_estimator(keras_model) - - def input_fn(): - return dataset_ops.Dataset.from_tensors( - ({'features': features, 'sample_weights': sample_weights}, - targets)) - - estimator.train(input_fn, steps=1) - ``` - - Example with customized export signature: - ```python - inputs = {'a': tf.keras.Input(..., name='a'), - 'b': tf.keras.Input(..., name='b')} - outputs = {'c': tf.keras.layers.Dense(..., name='c')(inputs['a']), - 'd': tf.keras.layers.Dense(..., name='d')(inputs['b'])} - keras_model = tf.keras.Model(inputs, outputs) - keras_model.compile(...) - export_outputs = {'c': tf.estimator.export.RegressionOutput, - 'd': tf.estimator.export.ClassificationOutput} - - estimator = tf.keras.estimator.model_to_estimator( - keras_model, export_outputs=export_outputs) - - def input_fn(): - return dataset_ops.Dataset.from_tensors( - ({'features': features, 'sample_weights': sample_weights}, - targets)) - - estimator.train(input_fn, steps=1) - ``` - - Args: - keras_model: A compiled Keras model object. This argument is mutually - exclusive with `keras_model_path`. Estimator's `model_fn` uses the - structure of the model to clone the model. Defaults to `None`. - keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 - format, which can be generated with the `save()` method of a Keras model. - This argument is mutually exclusive with `keras_model`. - Defaults to `None`. - custom_objects: Dictionary for cloning customized objects. This is - used with classes that is not part of this pip package. For example, if - user maintains a `relu6` class that inherits from `tf.keras.layers.Layer`, - then pass `custom_objects={'relu6': relu6}`. Defaults to `None`. - model_dir: Directory to save `Estimator` model parameters, graph, summary - files for TensorBoard, etc. If unset a directory will be created with - `tempfile.mkdtemp` - config: `RunConfig` to config `Estimator`. Allows setting up things in - `model_fn` based on configuration such as `num_ps_replicas`, or - `model_dir`. Defaults to `None`. If both `config.model_dir` and the - `model_dir` argument (above) are specified the `model_dir` **argument** - takes precedence. - checkpoint_format: Sets the format of the checkpoint saved by the estimator - when training. May be `saver` or `checkpoint`, depending on whether to - save checkpoints from `tf.train.Saver` or `tf.train.Checkpoint`. This - argument currently defaults to `saver`. When 2.0 is released, the default - will be `checkpoint`. Estimators use name-based `tf.train.Saver` - checkpoints, while Keras models use object-based checkpoints from - `tf.train.Checkpoint`. Currently, saving object-based checkpoints from - `model_to_estimator` is only supported by Functional and Sequential - models. Defaults to 'saver'. - metric_names_map: Optional dictionary mapping Keras model output metric - names to custom names. This can be used to override the default Keras - model output metrics names in a multi IO model use case and provide custom - names for the `eval_metric_ops` in Estimator. - The Keras model metric names can be obtained using `model.metrics_names` - excluding any loss metrics such as total loss and output losses. - For example, if your Keras model has two outputs `out_1` and `out_2`, - with `mse` loss and `acc` metric, then `model.metrics_names` will be - `['loss', 'out_1_loss', 'out_2_loss', 'out_1_acc', 'out_2_acc']`. - The model metric names excluding the loss metrics will be - `['out_1_acc', 'out_2_acc']`. - export_outputs: Optional dictionary. This can be used to override the - default Keras model output exports in a multi IO model use case and - provide custom names for the `export_outputs` in - `tf.estimator.EstimatorSpec`. Default is None, which is equivalent to - {'serving_default': `tf.estimator.export.PredictOutput`}. If not None, - the keys must match the keys of `model.output_names`. - A dict `{name: output}` where: - * name: An arbitrary name for this output. - * output: an `ExportOutput` class such as `ClassificationOutput`, - `RegressionOutput`, or `PredictOutput`. Single-headed models only need - to specify one entry in this dictionary. Multi-headed models should - specify one entry for each head, one of which must be named using - `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY` - If no entry is provided, a default `PredictOutput` mapping to - `predictions` will be created. - - Returns: - An Estimator from given keras model. - - Raises: - ValueError: If neither keras_model nor keras_model_path was given. - ValueError: If both keras_model and keras_model_path was given. - ValueError: If the keras_model_path is a GCS URI. - ValueError: If keras_model has not been compiled. - ValueError: If an invalid checkpoint_format was given. - """ - - try: - from tensorflow_estimator.python.estimator import keras_lib # pylint: disable=g-import-not-at-top - except ImportError: - raise NotImplementedError( - 'tf.keras.estimator.model_to_estimator function not available in your ' - 'installation.') - _model_to_estimator_usage_gauge.get_cell('v1').set(True) - return keras_lib.model_to_estimator( # pylint:disable=unexpected-keyword-arg - keras_model=keras_model, - keras_model_path=keras_model_path, - custom_objects=custom_objects, - model_dir=model_dir, - config=config, - checkpoint_format=checkpoint_format, - use_v2_estimator=False, - metric_names_map=metric_names_map, - export_outputs=export_outputs) - - -@keras_export('keras.estimator.model_to_estimator', v1=[]) -def model_to_estimator_v2(keras_model=None, - keras_model_path=None, - custom_objects=None, - model_dir=None, - config=None, - checkpoint_format='checkpoint', - metric_names_map=None, - export_outputs=None): - """Constructs an `Estimator` instance from given keras model. - - If you use infrastructure or other tooling that relies on Estimators, you can - still build a Keras model and use model_to_estimator to convert the Keras - model to an Estimator for use with downstream systems. - - For usage example, please see: - [Creating estimators from Keras Models]( + Sample Weights: + Estimators returned by `model_to_estimator` are configured so that they can + handle sample weights (similar to `keras_model.fit(x, y, sample_weights)`). + + To pass sample weights when training or evaluating the Estimator, the first + item returned by the input function should be a dictionary with keys + `features` and `sample_weights`. Example below: + + ```python + keras_model = tf.keras.Model(...) + keras_model.compile(...) + + estimator = tf.keras.estimator.model_to_estimator(keras_model) + + def input_fn(): + return dataset_ops.Dataset.from_tensors( + ({'features': features, 'sample_weights': sample_weights}, + targets)) + + estimator.train(input_fn, steps=1) + ``` + + Example with customized export signature: + ```python + inputs = {'a': tf.keras.Input(..., name='a'), + 'b': tf.keras.Input(..., name='b')} + outputs = {'c': tf.keras.layers.Dense(..., name='c')(inputs['a']), + 'd': tf.keras.layers.Dense(..., name='d')(inputs['b'])} + keras_model = tf.keras.Model(inputs, outputs) + keras_model.compile(...) + export_outputs = {'c': tf.estimator.export.RegressionOutput, + 'd': tf.estimator.export.ClassificationOutput} + + estimator = tf.keras.estimator.model_to_estimator( + keras_model, export_outputs=export_outputs) + + def input_fn(): + return dataset_ops.Dataset.from_tensors( + ({'features': features, 'sample_weights': sample_weights}, + targets)) + + estimator.train(input_fn, steps=1) + ``` + + Args: + keras_model: A compiled Keras model object. This argument is mutually + exclusive with `keras_model_path`. Estimator's `model_fn` uses the + structure of the model to clone the model. Defaults to `None`. + keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 + format, which can be generated with the `save()` method of a Keras + model. This argument is mutually exclusive with `keras_model`. + Defaults to `None`. + custom_objects: Dictionary for cloning customized objects. This is + used with classes that is not part of this pip package. For example, if + user maintains a `relu6` class that inherits from + `tf.keras.layers.Layer`, then pass `custom_objects={'relu6': relu6}`. + Defaults to `None`. + model_dir: Directory to save `Estimator` model parameters, graph, summary + files for TensorBoard, etc. If unset a directory will be created with + `tempfile.mkdtemp` + config: `RunConfig` to config `Estimator`. Allows setting up things in + `model_fn` based on configuration such as `num_ps_replicas`, or + `model_dir`. If both `config.model_dir` and the + `model_dir` argument (above) are specified the `model_dir` **argument** + takes precedence. Defaults to `None`. + checkpoint_format: Sets the format of the checkpoint saved by the + estimator when training. May be `saver` or `checkpoint`, depending on + whether to save checkpoints from `tf.train.Saver` or + `tf.train.Checkpoint`. Estimators use name-based `tf.train.Saver` + checkpoints, while Keras models use object-based checkpoints from + `tf.train.Checkpoint`. Currently, saving object-based checkpoints + from `model_to_estimator` is only supported by Functional and + Sequential models. Defaults to 'saver'. + metric_names_map: Optional dictionary mapping Keras model output metric + names to custom names. This can be used to override the default Keras + model output metrics names in a multi IO model use case and provide + custom names for the `eval_metric_ops` in Estimator. + The Keras model metric names can be obtained using `model.metrics_names` + excluding any loss metrics such as total loss and output losses. + For example, if your Keras model has two outputs `out_1` and `out_2`, + with `mse` loss and `acc` metric, then `model.metrics_names` will be + `['loss', 'out_1_loss', 'out_2_loss', 'out_1_acc', 'out_2_acc']`. + The model metric names excluding the loss metrics will be + `['out_1_acc', 'out_2_acc']`. + export_outputs: Optional dictionary. This can be used to override the + default Keras model output exports in a multi IO model use case and + provide custom names for the `export_outputs` in + `tf.estimator.EstimatorSpec`. Default is None, which is equivalent to + {'serving_default': `tf.estimator.export.PredictOutput`}. If not None, + the keys must match the keys of `model.output_names`. + A dict `{name: output}` where: + * name: An arbitrary name for this output. + * output: an `ExportOutput` class such as `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. Single-headed models only + need to specify one entry in this dictionary. Multi-headed models + should specify one entry for each head, one of which must be named + using + `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY` + If no entry is provided, a default `PredictOutput` mapping to + `predictions` will be created. + + Returns: + An Estimator from given keras model. + + Raises: + ValueError: If neither keras_model nor keras_model_path was given. + ValueError: If both keras_model and keras_model_path was given. + ValueError: If the keras_model_path is a GCS URI. + ValueError: If keras_model has not been compiled. + ValueError: If an invalid checkpoint_format was given. + """ + + try: + # isort: off + from tensorflow_estimator.python.estimator import ( + keras_lib, + ) + except ImportError: + raise NotImplementedError( + "tf.keras.estimator.model_to_estimator function not available in " + "your installation." + ) + _model_to_estimator_usage_gauge.get_cell("v1").set(True) + return keras_lib.model_to_estimator( + keras_model=keras_model, + keras_model_path=keras_model_path, + custom_objects=custom_objects, + model_dir=model_dir, + config=config, + checkpoint_format=checkpoint_format, + use_v2_estimator=False, + metric_names_map=metric_names_map, + export_outputs=export_outputs, + ) + + +@keras_export("keras.estimator.model_to_estimator", v1=[]) +def model_to_estimator_v2( + keras_model=None, + keras_model_path=None, + custom_objects=None, + model_dir=None, + config=None, + checkpoint_format="checkpoint", + metric_names_map=None, + export_outputs=None, +): + """Constructs an `Estimator` instance from given keras model. + + If you use infrastructure or other tooling that relies on Estimators, you + can still build a Keras model and use model_to_estimator to convert the + Keras model to an Estimator for use with downstream systems. + + For usage example, please see: + [Creating estimators from Keras Models]( https://www.tensorflow.org/guide/estimators#creating_estimators_from_keras_models). - Sample Weights: - Estimators returned by `model_to_estimator` are configured so that they can - handle sample weights (similar to `keras_model.fit(x, y, sample_weights)`). - - To pass sample weights when training or evaluating the Estimator, the first - item returned by the input function should be a dictionary with keys - `features` and `sample_weights`. Example below: - - ```python - keras_model = tf.keras.Model(...) - keras_model.compile(...) - - estimator = tf.keras.estimator.model_to_estimator(keras_model) - - def input_fn(): - return dataset_ops.Dataset.from_tensors( - ({'features': features, 'sample_weights': sample_weights}, - targets)) - - estimator.train(input_fn, steps=1) - ``` - - Example with customized export signature: - ```python - inputs = {'a': tf.keras.Input(..., name='a'), - 'b': tf.keras.Input(..., name='b')} - outputs = {'c': tf.keras.layers.Dense(..., name='c')(inputs['a']), - 'd': tf.keras.layers.Dense(..., name='d')(inputs['b'])} - keras_model = tf.keras.Model(inputs, outputs) - keras_model.compile(...) - export_outputs = {'c': tf.estimator.export.RegressionOutput, - 'd': tf.estimator.export.ClassificationOutput} - - estimator = tf.keras.estimator.model_to_estimator( - keras_model, export_outputs=export_outputs) - - def input_fn(): - return dataset_ops.Dataset.from_tensors( - ({'features': features, 'sample_weights': sample_weights}, - targets)) - - estimator.train(input_fn, steps=1) - ``` - - Note: We do not support creating weighted metrics in Keras and converting them - to weighted metrics in the Estimator API using `model_to_estimator`. - You will have to create these metrics directly on the estimator spec using the - `add_metrics` function. - - To customize the estimator `eval_metric_ops` names, you can pass in the - `metric_names_map` dictionary mapping the keras model output metric names - to the custom names as follows: - - ```python - input_a = tf.keras.layers.Input(shape=(16,), name='input_a') - input_b = tf.keras.layers.Input(shape=(16,), name='input_b') - dense = tf.keras.layers.Dense(8, name='dense_1') - interm_a = dense(input_a) - interm_b = dense(input_b) - merged = tf.keras.layers.concatenate([interm_a, interm_b], name='merge') - output_a = tf.keras.layers.Dense(3, activation='softmax', name='dense_2')( - merged) - output_b = tf.keras.layers.Dense(2, activation='softmax', name='dense_3')( - merged) - keras_model = tf.keras.models.Model( - inputs=[input_a, input_b], outputs=[output_a, output_b]) - keras_model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - metrics={ - 'dense_2': 'categorical_accuracy', - 'dense_3': 'categorical_accuracy' - }) - - metric_names_map = { - 'dense_2_categorical_accuracy': 'acc_1', - 'dense_3_categorical_accuracy': 'acc_2', - } - keras_est = tf.keras.estimator.model_to_estimator( + Sample Weights: + Estimators returned by `model_to_estimator` are configured so that they can + handle sample weights (similar to `keras_model.fit(x, y, sample_weights)`). + + To pass sample weights when training or evaluating the Estimator, the first + item returned by the input function should be a dictionary with keys + `features` and `sample_weights`. Example below: + + ```python + keras_model = tf.keras.Model(...) + keras_model.compile(...) + + estimator = tf.keras.estimator.model_to_estimator(keras_model) + + def input_fn(): + return dataset_ops.Dataset.from_tensors( + ({'features': features, 'sample_weights': sample_weights}, + targets)) + + estimator.train(input_fn, steps=1) + ``` + + Example with customized export signature: + ```python + inputs = {'a': tf.keras.Input(..., name='a'), + 'b': tf.keras.Input(..., name='b')} + outputs = {'c': tf.keras.layers.Dense(..., name='c')(inputs['a']), + 'd': tf.keras.layers.Dense(..., name='d')(inputs['b'])} + keras_model = tf.keras.Model(inputs, outputs) + keras_model.compile(...) + export_outputs = {'c': tf.estimator.export.RegressionOutput, + 'd': tf.estimator.export.ClassificationOutput} + + estimator = tf.keras.estimator.model_to_estimator( + keras_model, export_outputs=export_outputs) + + def input_fn(): + return dataset_ops.Dataset.from_tensors( + ({'features': features, 'sample_weights': sample_weights}, + targets)) + + estimator.train(input_fn, steps=1) + ``` + + Note: We do not support creating weighted metrics in Keras and converting + them to weighted metrics in the Estimator API using `model_to_estimator`. + You will have to create these metrics directly on the estimator spec using + the `add_metrics` function. + + To customize the estimator `eval_metric_ops` names, you can pass in the + `metric_names_map` dictionary mapping the keras model output metric names + to the custom names as follows: + + ```python + input_a = tf.keras.layers.Input(shape=(16,), name='input_a') + input_b = tf.keras.layers.Input(shape=(16,), name='input_b') + dense = tf.keras.layers.Dense(8, name='dense_1') + interm_a = dense(input_a) + interm_b = dense(input_b) + merged = tf.keras.layers.concatenate([interm_a, interm_b], name='merge') + output_a = tf.keras.layers.Dense(3, activation='softmax', name='dense_2')( + merged) + output_b = tf.keras.layers.Dense(2, activation='softmax', name='dense_3')( + merged) + keras_model = tf.keras.models.Model( + inputs=[input_a, input_b], outputs=[output_a, output_b]) + keras_model.compile( + loss='categorical_crossentropy', + optimizer='rmsprop', + metrics={ + 'dense_2': 'categorical_accuracy', + 'dense_3': 'categorical_accuracy' + }) + + metric_names_map = { + 'dense_2_categorical_accuracy': 'acc_1', + 'dense_3_categorical_accuracy': 'acc_2', + } + keras_est = tf.keras.estimator.model_to_estimator( + keras_model=keras_model, + config=config, + metric_names_map=metric_names_map) + ``` + + Args: + keras_model: A compiled Keras model object. This argument is mutually + exclusive with `keras_model_path`. Estimator's `model_fn` uses the + structure of the model to clone the model. Defaults to `None`. + keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 + format, which can be generated with the `save()` method of a Keras + model. This argument is mutually exclusive with `keras_model`. + Defaults to `None`. + custom_objects: Dictionary for cloning customized objects. This is + used with classes that is not part of this pip package. For example, if + user maintains a `relu6` class that inherits from + `tf.keras.layers.Layer`, then pass `custom_objects={'relu6': relu6}`. + Defaults to `None`. + model_dir: Directory to save `Estimator` model parameters, graph, summary + files for TensorBoard, etc. If unset a directory will be created with + `tempfile.mkdtemp` + config: `RunConfig` to config `Estimator`. Allows setting up things in + `model_fn` based on configuration such as `num_ps_replicas`, or + `model_dir`. If both `config.model_dir` and the + `model_dir` argument (above) are specified the `model_dir` **argument** + takes precedence. Defaults to `None`. + checkpoint_format: Sets the format of the checkpoint saved by the + estimator when training. May be `saver` or `checkpoint`, depending on + whether to save checkpoints from `tf.compat.v1.train.Saver` or + `tf.train.Checkpoint`. The default is `checkpoint`. Estimators use + name-based `tf.train.Saver` checkpoints, while Keras models use + object-based checkpoints from `tf.train.Checkpoint`. Currently, saving + object-based checkpoints from `model_to_estimator` is only supported by + Functional and Sequential models. Defaults to 'checkpoint'. + metric_names_map: Optional dictionary mapping Keras model output metric + names to custom names. This can be used to override the default Keras + model output metrics names in a multi IO model use case and provide + custom names for the `eval_metric_ops` in Estimator. + The Keras model metric names can be obtained using `model.metrics_names` + excluding any loss metrics such as total loss and output losses. + For example, if your Keras model has two outputs `out_1` and `out_2`, + with `mse` loss and `acc` metric, then `model.metrics_names` will be + `['loss', 'out_1_loss', 'out_2_loss', 'out_1_acc', 'out_2_acc']`. + The model metric names excluding the loss metrics will be + `['out_1_acc', 'out_2_acc']`. + export_outputs: Optional dictionary. This can be used to override the + default Keras model output exports in a multi IO model use case and + provide custom names for the `export_outputs` in + `tf.estimator.EstimatorSpec`. Default is None, which is equivalent to + {'serving_default': `tf.estimator.export.PredictOutput`}. If not None, + the keys must match the keys of `model.output_names`. + A dict `{name: output}` where: + * name: An arbitrary name for this output. + * output: an `ExportOutput` class such as `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. Single-headed models only + need to specify one entry in this dictionary. Multi-headed models + should specify one entry for each head, one of which must be named + using + `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY` + If no entry is provided, a default `PredictOutput` mapping to + `predictions` will be created. + + Returns: + An Estimator from given keras model. + + Raises: + ValueError: If neither keras_model nor keras_model_path was given. + ValueError: If both keras_model and keras_model_path was given. + ValueError: If the keras_model_path is a GCS URI. + ValueError: If keras_model has not been compiled. + ValueError: If an invalid checkpoint_format was given. + """ + + try: + # isort: off + from tensorflow_estimator.python.estimator import ( + keras_lib, + ) + except ImportError: + raise NotImplementedError( + "tf.keras.estimator.model_to_estimator function not available in " + "your installation." + ) + _model_to_estimator_usage_gauge.get_cell("v2").set(True) + return keras_lib.model_to_estimator( keras_model=keras_model, + keras_model_path=keras_model_path, + custom_objects=custom_objects, + model_dir=model_dir, config=config, - metric_names_map=metric_names_map) - ``` - - Args: - keras_model: A compiled Keras model object. This argument is mutually - exclusive with `keras_model_path`. Estimator's `model_fn` uses the - structure of the model to clone the model. Defaults to `None`. - keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 - format, which can be generated with the `save()` method of a Keras model. - This argument is mutually exclusive with `keras_model`. - Defaults to `None`. - custom_objects: Dictionary for cloning customized objects. This is - used with classes that is not part of this pip package. For example, if - user maintains a `relu6` class that inherits from `tf.keras.layers.Layer`, - then pass `custom_objects={'relu6': relu6}`. Defaults to `None`. - model_dir: Directory to save `Estimator` model parameters, graph, summary - files for TensorBoard, etc. If unset a directory will be created with - `tempfile.mkdtemp` - config: `RunConfig` to config `Estimator`. Allows setting up things in - `model_fn` based on configuration such as `num_ps_replicas`, or - `model_dir`. Defaults to `None`. If both `config.model_dir` and the - `model_dir` argument (above) are specified the `model_dir` **argument** - takes precedence. - checkpoint_format: Sets the format of the checkpoint saved by the estimator - when training. May be `saver` or `checkpoint`, depending on whether to - save checkpoints from `tf.compat.v1.train.Saver` or `tf.train.Checkpoint`. - The default is `checkpoint`. Estimators use name-based `tf.train.Saver` - checkpoints, while Keras models use object-based checkpoints from - `tf.train.Checkpoint`. Currently, saving object-based checkpoints from - `model_to_estimator` is only supported by Functional and Sequential - models. Defaults to 'checkpoint'. - metric_names_map: Optional dictionary mapping Keras model output metric - names to custom names. This can be used to override the default Keras - model output metrics names in a multi IO model use case and provide custom - names for the `eval_metric_ops` in Estimator. - The Keras model metric names can be obtained using `model.metrics_names` - excluding any loss metrics such as total loss and output losses. - For example, if your Keras model has two outputs `out_1` and `out_2`, - with `mse` loss and `acc` metric, then `model.metrics_names` will be - `['loss', 'out_1_loss', 'out_2_loss', 'out_1_acc', 'out_2_acc']`. - The model metric names excluding the loss metrics will be - `['out_1_acc', 'out_2_acc']`. - export_outputs: Optional dictionary. This can be used to override the - default Keras model output exports in a multi IO model use case and - provide custom names for the `export_outputs` in - `tf.estimator.EstimatorSpec`. Default is None, which is equivalent to - {'serving_default': `tf.estimator.export.PredictOutput`}. If not None, - the keys must match the keys of `model.output_names`. - A dict `{name: output}` where: - * name: An arbitrary name for this output. - * output: an `ExportOutput` class such as `ClassificationOutput`, - `RegressionOutput`, or `PredictOutput`. Single-headed models only need - to specify one entry in this dictionary. Multi-headed models should - specify one entry for each head, one of which must be named using - `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY` - If no entry is provided, a default `PredictOutput` mapping to - `predictions` will be created. - - Returns: - An Estimator from given keras model. - - Raises: - ValueError: If neither keras_model nor keras_model_path was given. - ValueError: If both keras_model and keras_model_path was given. - ValueError: If the keras_model_path is a GCS URI. - ValueError: If keras_model has not been compiled. - ValueError: If an invalid checkpoint_format was given. - """ - - try: - from tensorflow_estimator.python.estimator import keras_lib # pylint: disable=g-import-not-at-top - except ImportError: - raise NotImplementedError( - 'tf.keras.estimator.model_to_estimator function not available in your ' - 'installation.') - _model_to_estimator_usage_gauge.get_cell('v2').set(True) - return keras_lib.model_to_estimator( # pylint:disable=unexpected-keyword-arg - keras_model=keras_model, - keras_model_path=keras_model_path, - custom_objects=custom_objects, - model_dir=model_dir, - config=config, - checkpoint_format=checkpoint_format, - use_v2_estimator=True, - metric_names_map=metric_names_map, - export_outputs=export_outputs) + checkpoint_format=checkpoint_format, + use_v2_estimator=True, + metric_names_map=metric_names_map, + export_outputs=export_outputs, + ) + + # LINT.ThenChange(//tensorflow_estimator/python/estimator/keras_lib.py) diff --git a/keras/saving/experimental/BUILD b/keras/export/BUILD similarity index 55% rename from keras/saving/experimental/BUILD rename to keras/export/BUILD index e0dd9e851600..329076cafce1 100644 --- a/keras/saving/experimental/BUILD +++ b/keras/export/BUILD @@ -1,39 +1,39 @@ # Description: -# Contains the Keras experimental idempotent saving API. +# Contains the Keras save model API (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove non-keras deps from TF. default_visibility = [ "//keras:friends", - "//third_party/tensorflow/python/distribute:__pkg__", ], licenses = ["notice"], ) py_library( - name = "experimental", + name = "export_lib", srcs = [ - "saving_lib.py", + "export_lib.py", ], srcs_version = "PY3", deps = [ "//:expect_tensorflow_installed", - "//keras/saving/saved_model", - "//keras/utils:generic_utils", ], ) tf_py_test( - name = "saving_lib_test", - size = "small", - srcs = ["saving_lib_test.py"], + name = "export_lib_test", + size = "medium", + srcs = ["export_lib_test.py"], python_version = "PY3", deps = [ + ":export_lib", "//:expect_absl_installed", "//:expect_tensorflow_installed", "//keras", - "//keras/utils:generic_utils", + "//keras/testing_infra:test_combinations", ], ) diff --git a/keras/optimizers/optimizer_experimental/__init__.py b/keras/export/__init__.py similarity index 84% rename from keras/optimizers/optimizer_experimental/__init__.py rename to keras/export/__init__.py index bdf2826104b1..a82948d13416 100644 --- a/keras/optimizers/optimizer_experimental/__init__.py +++ b/keras/export/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental optimizer package.""" + +from keras.export.export_lib import ExportArchive diff --git a/keras/export/export_lib.py b/keras/export/export_lib.py new file mode 100644 index 000000000000..eb8dc63f83e8 --- /dev/null +++ b/keras/export/export_lib.py @@ -0,0 +1,581 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library for exporting inference-only Keras models/layers.""" + +import tensorflow.compat.v2 as tf +from tensorflow.python.util.tf_export import keras_export + +from keras.engine import base_layer +from keras.engine import functional +from keras.engine import sequential +from keras.utils import io_utils + + +@keras_export("keras.export.ExportArchive") +class ExportArchive(tf.__internal__.tracking.AutoTrackable): + """ExportArchive is used to write SavedModel artifacts (e.g. for inference). + + If you have a Keras model or layer that you want to export as SavedModel for + serving (e.g. via TensorFlow-Serving), you can use `ExportArchive` + to configure the different serving endpoints you need to make available, + as well as their signatures. Simply instantiate an `ExportArchive`, + use `track()` to register the layer(s) or model(s) to be used, + then use the `add_endpoint()` method to register a new serving endpoint. + When done, use the `write_out()` method to save the artifact. + + The resulting artifact is a SavedModel and can be reloaded via + `tf.saved_model.load`. + + Examples: + + Here's how to export a model for inference. + + ```python + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="serve", + fn=model.call, + input_signature=[tf.TensorSpec(shape=(None, 3), dtype=tf.float32)], + ) + export_archive.write_out("path/to/location") + + # Elsewhere, we can reload the artifact and serve it. + # The endpoint we added is available as a method: + serving_model = tf.saved_model.load("path/to/location") + outputs = serving_model.serve(inputs) + ``` + + Here's how to export a model with one endpoint for inference and one + endpoint for a training-mode forward pass (e.g. with dropout on). + + ```python + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="call_inference", + fn=lambda x: model.call(x, training=False), + input_signature=[tf.TensorSpec(shape=(None, 3), dtype=tf.float32)], + ) + export_archive.add_endpoint( + name="call_training", + fn=lambda x: model.call(x, training=True), + input_signature=[tf.TensorSpec(shape=(None, 3), dtype=tf.float32)], + ) + export_archive.write_out("path/to/location") + ``` + + **Note on resource tracking:** + + `ExportArchive` is able to automatically track all `tf.Variables` used + by its endpoints, so most of the time calling `.track(model)` + is not strictly required. However, if your model uses lookup layers such + as `IntegerLookup`, `StringLookup`, or `TextVectorization`, + it will need to be tracked explicitly via `.track(model)`. + + Explicit tracking is also required if you need to be able to access + the properties `variables`, `trainable_variables`, or + `non_trainable_variables` on the revived archive. + """ + + def __init__(self): + self._endpoint_names = [] + self._endpoint_signatures = {} + self.tensorflow_version = tf.__version__ + self.variables = [] + self.trainable_variables = [] + self.non_trainable_variables = [] + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def track(self, resource): + """Track the variables (and other assets) of a layer or model.""" + if not isinstance(resource, tf.__internal__.tracking.Trackable): + raise ValueError( + "Invalid resource type. Expected an instance of a " + "TensorFlow `Trackable` (such as a Keras `Layer` or `Model`). " + f"Received instead an object of type '{type(resource)}'. " + f"Object received: {resource}" + ) + if isinstance(resource, base_layer.Layer): + if not resource.built: + raise ValueError( + "The layer provided has not yet been built. " + "It must be built before export." + ) + + # Layers in `_tracked` are not part of the trackables that get saved, + # because we're creating the attribute in a + # no_automatic_dependency_tracking scope. + if not hasattr(self, "_tracked"): + self._tracked = [] + self._tracked.append(resource) + + if isinstance(resource, base_layer.Layer): + # Variables in the lists below are actually part of the trackables + # that get saved, because the lists are created in __init__. + self.variables += resource.variables + self.trainable_variables += resource.trainable_variables + self.non_trainable_variables += resource.non_trainable_variables + + def add_endpoint(self, name, fn, input_signature=None): + """Register a new serving endpoint. + + Arguments: + name: Str, name of the endpoint. + fn: A function. It should only leverage resources + (e.g. `tf.Variable` objects or `tf.lookup.StaticHashTable` + objects) that are available on the models/layers + tracked by the `ExportArchive` (you can call `.track(model)` + to track a new model). + The shape and dtype of the inputs to the function must be + known. For that purpose, you can either 1) make sure that + `fn` is a `tf.function` that has been called at least once, or + 2) provide an `input_signature` argument that specifies the + shape and dtype of the inputs (see below). + input_signature: Used to specify the shape and dtype of the + inputs to `fn`. List of `tf.TensorSpec` objects (one + per positional input argument of `fn`). Nested arguments are + allowed (see below for an example showing a Functional model + with 2 input arguments). + + Example: + + Adding an endpoint using the `input_signature` argument when the + model has a single input argument: + + ```python + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="serve", + fn=model.call, + input_signature=[tf.TensorSpec(shape=(None, 3), dtype=tf.float32)], + ) + ``` + + Adding an endpoint using the `input_signature` argument when the + model has two positional input arguments: + + ```python + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="serve", + fn=model.call, + input_signature=[ + tf.TensorSpec(shape=(None, 3), dtype=tf.float32), + tf.TensorSpec(shape=(None, 4), dtype=tf.float32), + ], + ) + ``` + + Adding an endpoint using the `input_signature` argument when the + model has one input argument that is a list of 2 tensors (e.g. + a Functional model with 2 inputs): + + ```python + model = keras.Model(inputs=[x1, x2], outputs=outputs) + + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="serve", + fn=model.call, + input_signature=[ + [ + tf.TensorSpec(shape=(None, 3), dtype=tf.float32), + tf.TensorSpec(shape=(None, 4), dtype=tf.float32), + ], + ], + ) + ``` + + This also works with dictionary inputs: + + ```python + model = keras.Model(inputs={"x1": x1, "x2": x2}, outputs=outputs) + + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="serve", + fn=model.call, + input_signature=[ + { + "x1": tf.TensorSpec(shape=(None, 3), dtype=tf.float32), + "x2": tf.TensorSpec(shape=(None, 4), dtype=tf.float32), + }, + ], + ) + ``` + + Adding an endpoint that is a `tf.function`: + + ```python + @tf.function() + def serving_fn(x): + return model(x) + + # The function must be traced, i.e. it must be called at least once. + serving_fn(tf.random.normal(shape=(2, 3))) + + export_archive = ExportArchive() + export_archive.track(model) + export_archive.add_endpoint(name="serve", fn=serving_fn) + ``` + """ + if name in self._endpoint_names: + raise ValueError(f"Endpoint name '{name}' is already taken.") + + if input_signature: + decorated_fn = tf.function(fn, input_signature=input_signature) + self._endpoint_signatures[name] = input_signature + else: + if isinstance(fn, tf.types.experimental.GenericFunction): + if not fn._list_all_concrete_functions(): + raise ValueError( + f"The provided tf.function '{fn}' " + "has never been called. " + "To specify the expected shape and dtype " + "of the function's arguments, " + "you must either provide a function that " + "has been called at least once, or alternatively pass " + "an `input_signature` argument in `add_endpoint()`." + ) + decorated_fn = fn + else: + raise ValueError( + "If the `fn` argument provided is not a `tf.function`, " + "you must provide an `input_signature` argument to " + "specify the shape and dtype of the function arguments. " + "Example:\n\n" + "export_archive.add_endpoint(\n" + " name='call',\n" + " fn=model.call,\n" + " input_signature=[\n" + " tf.TensorSpec(\n" + " shape=(None, 224, 224, 3),\n" + " dtype=tf.float32,\n" + " )\n" + " ],\n" + ")" + ) + setattr(self, name, decorated_fn) + self._endpoint_names.append(name) + + def add_variable_collection(self, name, variables): + """Register a set of variables to be retrieved after reloading. + + Arguments: + name: The string name for the collection. + variables: A tuple/list/set of `tf.Variable` instances. + + Example: + + ```python + export_archive = ExportArchive() + export_archive.track(model) + # Register an endpoint + export_archive.add_endpoint( + name="serve", + fn=model.call, + input_signature=[tf.TensorSpec(shape=(None, 3), dtype=tf.float32)], + ) + # Save a variable collection + export_archive.add_variable_collection( + name="optimizer_variables", variables=model.optimizer.variables) + export_archive.write_out("path/to/location") + + # Reload the object + revived_object = tf.saved_model.load("path/to/location") + # Retrieve the variables + optimizer_variables = revived_object.optimizer_variables + ``` + """ + if not isinstance(variables, (list, tuple, set)): + raise ValueError( + "Expected `variables` to be a list/tuple/set. " + f"Received instead object of type '{type(variables)}'." + ) + if not all(isinstance(v, tf.Variable) for v in variables): + raise ValueError( + "Expected all elements in `variables` to be " + "`tf.Variable` instances. Found instead the following types: " + f"{list(set(type(v) for v in variables))}" + ) + setattr(self, name, list(variables)) + + def write_out(self, filepath, options=None): + """Write the corresponding SavedModel to disk. + + Arguments: + filepath: `str` or `pathlib.Path` object. + Path where to save the artifact. + options: `tf.saved_model.SaveOptions` object that specifies + SavedModel saving options. + + **Note on TF-Serving**: all endpoints registered via `add_endpoint()` + are made visible for TF-Serving in the SavedModel artifact. In addition, + the first endpoint registered is made visible under the alias + `"serving_default"` (unless an endpoint with the name + `"serving_default"` was already registered manually), + since TF-Serving requires this endpoint to be set. + """ + if not self._endpoint_names: + raise ValueError( + "No endpoints have been set yet. Call add_endpoint()." + ) + self._filter_and_track_resources() + + signatures = {} + for name in self._endpoint_names: + signatures[name] = self._get_concrete_fn(name) + # Add "serving_default" signature key for TFServing + if "serving_default" not in self._endpoint_names: + signatures["serving_default"] = self._get_concrete_fn( + self._endpoint_names[0] + ) + tf.saved_model.save( + self, filepath, options=options, signatures=signatures + ) + # Print out available endpoints + endpoints = "\n\n".join( + _print_signature(getattr(self, name), name) + for name in self._endpoint_names + ) + io_utils.print_msg( + f"Saved artifact at '{filepath}'. " + "The following endpoints are available:\n\n" + f"{endpoints}" + ) + + def _get_concrete_fn(self, endpoint): + """Workaround for some SavedModel quirks.""" + if endpoint in self._endpoint_signatures: + return getattr(self, endpoint) + else: + traces = getattr(self, endpoint)._trackable_children("saved_model") + return list(traces.values())[0] + + def _get_variables_used_by_endpoints(self): + fns = [self._get_concrete_fn(name) for name in self._endpoint_names] + return _list_variables_used_by_fns(fns) + + def _filter_and_track_resources(self): + """Track resources used by endpoints / referenced in `track()` calls.""" + # Start by extracting variables from endpoints. + fns = [self._get_concrete_fn(name) for name in self._endpoint_names] + tvs, ntvs = _list_variables_used_by_fns(fns) + self._all_variables = list(tvs + ntvs) + + # Next, track lookup tables. + # Hopefully, one day this will be automated at the tf.function level. + self._misc_assets = [] + from keras.layers.preprocessing.index_lookup import IndexLookup + + if hasattr(self, "_tracked"): + for root in self._tracked: + descendants = tf.train.TrackableView(root).descendants() + for trackable in descendants: + if isinstance(trackable, IndexLookup): + self._misc_assets.append(trackable) + + +def export_model(model, filepath): + export_archive = ExportArchive() + export_archive.track(model) + if isinstance(model, (functional.Functional, sequential.Sequential)): + input_signature = tf.nest.map_structure(_make_tensor_spec, model.inputs) + if isinstance(input_signature, list) and len(input_signature) > 1: + input_signature = [input_signature] + export_archive.add_endpoint("serve", model.__call__, input_signature) + else: + save_spec = model._get_save_spec() + if not save_spec: + raise ValueError( + "The model provided has never called. " + "It must be called at least once before export." + ) + input_signature = [save_spec] + export_archive.add_endpoint("serve", model.__call__, input_signature) + export_archive.write_out(filepath) + + +class ReloadedLayer(base_layer.Layer): + """Reload a Keras model/layer that was saved via SavedModel / ExportArchive. + + Arguments: + filepath: `str` or `pathlib.Path` object. The path to the SavedModel. + call_endpoint: Name of the endpoint to use as the `call()` method + of the reloaded layer. If the SavedModel was created + via `model.export()`, + then the default endpoint name is `'serve'`. In other cases + it may be named `'serving_default'`. + + Example: + + ```python + model.export("path/to/artifact") + reloaded_layer = ReloadedLayer("path/to/artifact") + outputs = reloaded_layer(inputs) + ``` + + The reloaded object can be used like a regular Keras layer, and supports + training/fine-tuning of its trainable weights. Note that the reloaded + object retains none of the internal structure or custom methods of the + original object -- it's a brand new layer created around the saved + function. + + **Limitations:** + + * Only call endpoints with a single `inputs` tensor argument + (which may optionally be a dict/tuple/list of tensors) are supported. + For endpoints with multiple separate input tensor arguments, consider + subclassing `ReloadedLayer` and implementing a `call()` method with a + custom signature. + * If you need training-time behavior to differ from inference-time behavior + (i.e. if you need the reloaded object to support a `training=True` argument + in `__call__()`), make sure that the training-time call function is + saved as a standalone endpoint in the artifact, and provide its name + to the `ReloadedLayer` via the `call_training_endpoint` argument. + """ + + def __init__( + self, + filepath, + call_endpoint="serve", + call_training_endpoint=None, + trainable=True, + name=None, + dtype=None, + ): + # Initialize an empty layer, then add_weight() etc. as needed. + super().__init__(trainable=trainable, name=name, dtype=dtype) + + self._reloaded_obj = tf.saved_model.load(filepath) + + self.filepath = filepath + self.call_endpoint = call_endpoint + self.call_training_endpoint = call_training_endpoint + + # Resolve the call function. + if hasattr(self._reloaded_obj, call_endpoint): + # Case 1: it's set as an attribute. + self.call_endpoint_fn = getattr(self._reloaded_obj, call_endpoint) + elif call_endpoint in self._reloaded_obj.signatures: + # Case 2: it's listed in the `signatures` field. + self.call_endpoint_fn = self._reloaded_obj.signatures[call_endpoint] + else: + raise ValueError( + f"The endpoint '{call_endpoint}' is neither an " + "attribute of the reloaded SavedModel, nor an entry " + "in the `signatures` field of the reloaded SavedModel. " + ) + + # Resolving the training function. + if call_training_endpoint: + if hasattr(self._reloaded_obj, call_training_endpoint): + self.call_training_endpoint_fn = getattr( + self._reloaded_obj, call_training_endpoint + ) + elif call_training_endpoint in self._reloaded_obj.signatures: + self.call_training_endpoint_fn = self._reloaded_obj.signatures[ + call_training_endpoint + ] + else: + raise ValueError( + f"The endpoint '{call_training_endpoint}' is " + "neither an attribute of the reloaded SavedModel, " + "nor an entry in the `signatures` field of " + "the reloaded SavedModel. " + ) + + # Add trainable and non-trainable weights from the call_endpoint_fn. + all_fns = [self.call_endpoint_fn] + if call_training_endpoint: + all_fns.append(self.call_training_endpoint_fn) + tvs, ntvs = _list_variables_used_by_fns(all_fns) + for v in tvs: + self._add_existing_weight(v, trainable=True) + for v in ntvs: + self._add_existing_weight(v, trainable=False) + self.built = True + + def _add_existing_weight(self, weight, trainable): + """Calls add_weight() to register but not create an existing weight.""" + self.add_weight( + name=weight.name, + shape=weight.shape, + dtype=weight.dtype, + trainable=trainable, + getter=lambda *_, **__: weight, + ) + + def call(self, inputs, training=False, **kwargs): + if training: + if self.call_training_endpoint: + return self.call_training_endpoint_fn(inputs, **kwargs) + return self.call_endpoint_fn(inputs, **kwargs) + + def get_config(self): + base_config = super().get_config() + config = { + # Note: this is not intended to be portable. + "filepath": self.filepath, + "call_endpoint": self.call_endpoint, + "call_training_endpoint": self.call_training_endpoint, + } + return {**base_config, **config} + + +def _make_tensor_spec(x): + return tf.TensorSpec(x.shape, dtype=x.dtype, name=x.name) + + +def _print_signature(fn, name): + concrete_fn = fn._list_all_concrete_functions()[0] + pprinted_signature = concrete_fn.pretty_printed_signature(verbose=True) + lines = pprinted_signature.split("\n") + lines = [f"* Endpoint '{name}'"] + lines[1:] + endpoint = "\n".join(lines) + return endpoint + + +def _list_variables_used_by_fns(fns): + trainable_variables = [] + non_trainable_variables = [] + trainable_variables_ids = set() + non_trainable_variables_ids = set() + for fn in fns: + if hasattr(fn, "concrete_functions"): + concrete_functions = fn.concrete_functions + elif hasattr(fn, "get_concrete_function"): + concrete_functions = [fn.get_concrete_function()] + else: + concrete_functions = [fn] + for concrete_fn in concrete_functions: + for v in concrete_fn.trainable_variables: + if id(v) not in trainable_variables_ids: + trainable_variables.append(v) + trainable_variables_ids.add(id(v)) + + for v in concrete_fn.variables: + if ( + id(v) not in trainable_variables_ids + and id(v) not in non_trainable_variables_ids + ): + non_trainable_variables.append(v) + non_trainable_variables_ids.add(id(v)) + return trainable_variables, non_trainable_variables diff --git a/keras/export/export_lib_test.py b/keras/export/export_lib_test.py new file mode 100644 index 000000000000..988b9a14904d --- /dev/null +++ b/keras/export/export_lib_test.py @@ -0,0 +1,625 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for inference-only model/layer exporting utilities.""" +import os + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras.export import export_lib +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +def get_model(): + layers = [ + keras.layers.Dense(10, activation="relu"), + keras.layers.BatchNormalization(), + keras.layers.Dense(1, activation="sigmoid"), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(10,)) + return model + + +@test_utils.run_v2_only +class ExportArchiveTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.run_with_all_model_types + def test_standard_model_export(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = get_model() + ref_input = tf.random.normal((3, 10)) + ref_output = model(ref_input).numpy() + + export_lib.export_model(model, temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output, revived_model.serve(ref_input).numpy(), atol=1e-6 + ) + + @test_combinations.run_with_all_model_types + def test_low_level_model_export(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + + model = get_model() + ref_input = tf.random.normal((3, 10)) + ref_output = model(ref_input).numpy() + + # Test variable tracking + export_archive = export_lib.ExportArchive() + export_archive.track(model) + self.assertLen(export_archive.variables, 8) + self.assertLen(export_archive.trainable_variables, 6) + self.assertLen(export_archive.non_trainable_variables, 2) + + @tf.function() + def my_endpoint(x): + return model(x) + + # Test registering an endpoint that is a tf.function (called) + my_endpoint(ref_input) # Trace fn + + export_archive.add_endpoint( + "call", + my_endpoint, + ) + export_archive.write_out(temp_filepath) + + revived_model = tf.saved_model.load(temp_filepath) + self.assertFalse(hasattr(revived_model, "_tracked")) + self.assertAllClose( + ref_output, revived_model.call(ref_input).numpy(), atol=1e-6 + ) + self.assertLen(revived_model.variables, 8) + self.assertLen(revived_model.trainable_variables, 6) + self.assertLen(revived_model.non_trainable_variables, 2) + + # Test registering an endpoint that is NOT a tf.function + export_archive = export_lib.ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + "call", + model.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 10), + dtype=tf.float32, + ) + ], + ) + export_archive.write_out(temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output, revived_model.call(ref_input).numpy(), atol=1e-6 + ) + + def test_layer_export(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_layer") + + layer = keras.layers.BatchNormalization() + ref_input = tf.random.normal((3, 10)) + ref_output = layer(ref_input).numpy() # Build layer (important) + + export_archive = export_lib.ExportArchive() + export_archive.track(layer) + export_archive.add_endpoint( + "call", + layer.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 10), + dtype=tf.float32, + ) + ], + ) + export_archive.write_out(temp_filepath) + revived_layer = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output, revived_layer.call(ref_input).numpy(), atol=1e-6 + ) + + def test_multi_input_output_functional_model(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + x1 = keras.Input((2,)) + x2 = keras.Input((2,)) + y1 = keras.layers.Dense(3)(x1) + y2 = keras.layers.Dense(3)(x2) + model = keras.Model([x1, x2], [y1, y2]) + + ref_inputs = [tf.random.normal((3, 2)), tf.random.normal((3, 2))] + ref_outputs = model(ref_inputs) + + export_archive = export_lib.ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + "serve", + model.call, + input_signature=[ + [ + tf.TensorSpec( + shape=(None, 2), + dtype=tf.float32, + ), + tf.TensorSpec( + shape=(None, 2), + dtype=tf.float32, + ), + ] + ], + ) + export_archive.write_out(temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_outputs[0].numpy(), + revived_model.serve(ref_inputs)[0].numpy(), + atol=1e-6, + ) + self.assertAllClose( + ref_outputs[1].numpy(), + revived_model.serve(ref_inputs)[1].numpy(), + atol=1e-6, + ) + + # Now test dict inputs + model = keras.Model({"x1": x1, "x2": x2}, [y1, y2]) + + ref_inputs = { + "x1": tf.random.normal((3, 2)), + "x2": tf.random.normal((3, 2)), + } + ref_outputs = model(ref_inputs) + + export_archive = export_lib.ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + "serve", + model.call, + input_signature=[ + { + "x1": tf.TensorSpec( + shape=(None, 2), + dtype=tf.float32, + ), + "x2": tf.TensorSpec( + shape=(None, 2), + dtype=tf.float32, + ), + } + ], + ) + export_archive.write_out(temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_outputs[0].numpy(), + revived_model.serve(ref_inputs)[0].numpy(), + atol=1e-6, + ) + self.assertAllClose( + ref_outputs[1].numpy(), + revived_model.serve(ref_inputs)[1].numpy(), + atol=1e-6, + ) + + def test_model_with_lookup_table(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + text_vectorization = keras.layers.TextVectorization() + text_vectorization.adapt(["one two", "three four", "five six"]) + model = keras.Sequential( + [ + text_vectorization, + keras.layers.Embedding(10, 32), + keras.layers.Dense(1), + ] + ) + ref_input = tf.convert_to_tensor(["one two three four"]) + ref_output = model(ref_input).numpy() + + export_lib.export_model(model, temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output, revived_model.serve(ref_input).numpy(), atol=1e-6 + ) + + def test_track_multiple_layers(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + layer_1 = keras.layers.Dense(2) + ref_input_1 = tf.random.normal((3, 4)) + ref_output_1 = layer_1(ref_input_1).numpy() + layer_2 = keras.layers.Dense(3) + ref_input_2 = tf.random.normal((3, 5)) + ref_output_2 = layer_2(ref_input_2).numpy() + + export_archive = export_lib.ExportArchive() + export_archive.add_endpoint( + "call_1", + layer_1.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 4), + dtype=tf.float32, + ), + ], + ) + export_archive.add_endpoint( + "call_2", + layer_2.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 5), + dtype=tf.float32, + ), + ], + ) + export_archive.write_out(temp_filepath) + revived_layer = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output_1, + revived_layer.call_1(ref_input_1).numpy(), + atol=1e-6, + ) + self.assertAllClose( + ref_output_2, + revived_layer.call_2(ref_input_2).numpy(), + atol=1e-6, + ) + + def test_non_standard_layer_signature(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_layer") + + layer = keras.layers.MultiHeadAttention(2, 2) + x1 = tf.random.normal((3, 2, 2)) + x2 = tf.random.normal((3, 2, 2)) + ref_output = layer(x1, x2).numpy() # Build layer (important) + export_archive = export_lib.ExportArchive() + export_archive.track(layer) + export_archive.add_endpoint( + "call", + layer.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 2, 2), + dtype=tf.float32, + ), + tf.TensorSpec( + shape=(None, 2, 2), + dtype=tf.float32, + ), + ], + ) + export_archive.write_out(temp_filepath) + revived_layer = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output, + revived_layer.call(query=x1, value=x2).numpy(), + atol=1e-6, + ) + + def test_variable_collection(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + + model = keras.Sequential( + [ + keras.Input((10,)), + keras.layers.Dense(2), + keras.layers.Dense(2), + ] + ) + + # Test variable tracking + export_archive = export_lib.ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + "call", + model.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 10), + dtype=tf.float32, + ) + ], + ) + export_archive.add_variable_collection( + "my_vars", model.layers[1].weights + ) + self.assertLen(export_archive.my_vars, 2) + export_archive.write_out(temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertLen(revived_model.my_vars, 2) + + def test_export_model_errors(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + + # Model has not been built + model = keras.Sequential([keras.layers.Dense(2)]) + with self.assertRaisesRegex(ValueError, "It must be built"): + export_lib.export_model(model, temp_filepath) + + # Subclassed model has not been called + class MyModel(keras.Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.dense = keras.layers.Dense(2) + + def build(self, input_shape): + self.dense.build(input_shape) + self.built = True + + def call(self, x): + return self.dense(x) + + model = MyModel() + model.build((2, 3)) + with self.assertRaisesRegex(ValueError, "It must be called"): + export_lib.export_model(model, temp_filepath) + + def test_export_archive_errors(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = keras.Sequential([keras.layers.Dense(2)]) + model(tf.random.normal((2, 3))) + + # Endpoint name reuse + export_archive = export_lib.ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + "call", + model.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 3), + dtype=tf.float32, + ) + ], + ) + with self.assertRaisesRegex(ValueError, "already taken"): + export_archive.add_endpoint( + "call", + model.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 3), + dtype=tf.float32, + ) + ], + ) + + # Write out with no endpoints + export_archive = export_lib.ExportArchive() + export_archive.track(model) + with self.assertRaisesRegex(ValueError, "No endpoints have been set"): + export_archive.write_out(temp_filepath) + + # Invalid object type + with self.assertRaisesRegex(ValueError, "Invalid resource type"): + export_archive = export_lib.ExportArchive() + export_archive.track("model") + + # Set endpoint with no input signature + export_archive = export_lib.ExportArchive() + export_archive.track(model) + with self.assertRaisesRegex( + ValueError, "you must provide an `input_signature`" + ): + export_archive.add_endpoint( + "call", + model.call, + ) + + # Set endpoint that has never been called + export_archive = export_lib.ExportArchive() + export_archive.track(model) + + @tf.function() + def my_endpoint(x): + return model(x) + + export_archive = export_lib.ExportArchive() + export_archive.track(model) + with self.assertRaisesRegex( + ValueError, "you must either provide a function" + ): + export_archive.add_endpoint( + "call", + my_endpoint, + ) + + def test_export_no_assets(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + + # Case where there are legitimately no assets. + model = keras.Sequential([keras.layers.Flatten()]) + model(tf.random.normal((2, 3))) + export_archive = export_lib.ExportArchive() + export_archive.add_endpoint( + "call", + model.call, + input_signature=[ + tf.TensorSpec( + shape=(None, 3), + dtype=tf.float32, + ) + ], + ) + export_archive.write_out(temp_filepath) + + @test_combinations.run_with_all_model_types + def test_model_export_method(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = get_model() + ref_input = tf.random.normal((3, 10)) + ref_output = model(ref_input).numpy() + + model.export(temp_filepath) + revived_model = tf.saved_model.load(temp_filepath) + self.assertAllClose( + ref_output, revived_model.serve(ref_input).numpy(), atol=1e-6 + ) + + +@test_utils.run_v2_only +class TestReloadedLayer(tf.test.TestCase, parameterized.TestCase): + @test_combinations.run_with_all_model_types + def test_reloading_export_archive(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = get_model() + ref_input = tf.random.normal((3, 10)) + ref_output = model(ref_input).numpy() + + export_lib.export_model(model, temp_filepath) + reloaded_layer = export_lib.ReloadedLayer(temp_filepath) + self.assertAllClose( + reloaded_layer(ref_input).numpy(), ref_output, atol=1e-7 + ) + self.assertLen(reloaded_layer.weights, len(model.weights)) + self.assertLen( + reloaded_layer.trainable_weights, len(model.trainable_weights) + ) + self.assertLen( + reloaded_layer.non_trainable_weights, + len(model.non_trainable_weights), + ) + + # Test fine-tuning + new_model = keras.Sequential([reloaded_layer]) + new_model.compile(optimizer="rmsprop", loss="mse") + x = tf.random.normal((32, 10)) + y = tf.random.normal((32, 1)) + new_model.train_on_batch(x, y) + new_output = reloaded_layer(ref_input).numpy() + self.assertNotAllClose(new_output, ref_output, atol=1e-5) + + # Test that trainable can be set to False + reloaded_layer.trainable = False + new_model.compile(optimizer="rmsprop", loss="mse") + x = tf.random.normal((32, 10)) + y = tf.random.normal((32, 1)) + new_model.train_on_batch(x, y) + # The output must not have changed + self.assertAllClose( + reloaded_layer(ref_input).numpy(), new_output, atol=1e-7 + ) + + @test_combinations.run_with_all_model_types + def test_reloading_default_saved_model(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = get_model() + ref_input = tf.random.normal((3, 10)) + ref_output = model(ref_input).numpy() + + tf.saved_model.save(model, temp_filepath) + reloaded_layer = export_lib.ReloadedLayer( + temp_filepath, call_endpoint="serving_default" + ) + # The output is a dict, due to the nature of SavedModel saving. + new_output = reloaded_layer(ref_input) + self.assertAllClose( + new_output[list(new_output.keys())[0]].numpy(), + ref_output, + atol=1e-7, + ) + self.assertLen(reloaded_layer.weights, len(model.weights)) + self.assertLen( + reloaded_layer.trainable_weights, len(model.trainable_weights) + ) + self.assertLen( + reloaded_layer.non_trainable_weights, + len(model.non_trainable_weights), + ) + + def test_call_training(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + keras.utils.set_random_seed(1337) + model = keras.Sequential( + [ + keras.Input((10,)), + keras.layers.Dense(10), + keras.layers.Dropout(0.99999), + ] + ) + export_archive = export_lib.ExportArchive() + export_archive.track(model) + export_archive.add_endpoint( + name="call_inference", + fn=lambda x: model(x, training=False), + input_signature=[tf.TensorSpec(shape=(None, 10), dtype=tf.float32)], + ) + export_archive.add_endpoint( + name="call_training", + fn=lambda x: model(x, training=True), + input_signature=[tf.TensorSpec(shape=(None, 10), dtype=tf.float32)], + ) + export_archive.write_out(temp_filepath) + reloaded_layer = export_lib.ReloadedLayer( + temp_filepath, + call_endpoint="call_inference", + call_training_endpoint="call_training", + ) + inference_output = reloaded_layer( + tf.random.normal((1, 10)), training=False + ) + training_output = reloaded_layer( + tf.random.normal((1, 10)), training=True + ) + self.assertAllClose(np.mean(training_output), 0.0, atol=1e-7) + self.assertNotAllClose(np.mean(inference_output), 0.0, atol=1e-7) + + @test_combinations.run_with_all_model_types + def test_serialization(self): + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = get_model() + ref_input = tf.random.normal((3, 10)) + ref_output = model(ref_input).numpy() + + export_lib.export_model(model, temp_filepath) + reloaded_layer = export_lib.ReloadedLayer(temp_filepath) + + # Test reinstantiation from config + config = reloaded_layer.get_config() + rereloaded_layer = export_lib.ReloadedLayer.from_config(config) + self.assertAllClose( + rereloaded_layer(ref_input).numpy(), ref_output, atol=1e-7 + ) + + # Test whole model saving with reloaded layer inside + model = keras.Sequential([reloaded_layer]) + temp_model_filepath = os.path.join(self.get_temp_dir(), "m.keras") + model.save(temp_model_filepath, save_format="keras_v3") + reloaded_model = keras.models.load_model( + temp_model_filepath, + custom_objects={"ReloadedLayer": export_lib.ReloadedLayer}, + ) + self.assertAllClose( + reloaded_model(ref_input).numpy(), ref_output, atol=1e-7 + ) + + def test_errors(self): + # Test missing call endpoint + temp_filepath = os.path.join(self.get_temp_dir(), "exported_model") + model = keras.Sequential([keras.Input((2,)), keras.layers.Dense(3)]) + export_lib.export_model(model, temp_filepath) + with self.assertRaisesRegex(ValueError, "The endpoint 'wrong'"): + export_lib.ReloadedLayer(temp_filepath, call_endpoint="wrong") + + # Test missing call training endpoint + with self.assertRaisesRegex(ValueError, "The endpoint 'wrong'"): + export_lib.ReloadedLayer( + temp_filepath, + call_endpoint="serve", + call_training_endpoint="wrong", + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/feature_column/BUILD b/keras/feature_column/BUILD index e9eb317b72b5..6684bc5dafcc 100644 --- a/keras/feature_column/BUILD +++ b/keras/feature_column/BUILD @@ -1,6 +1,8 @@ +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/python/feature_column:__subpackages__", # For unit testing diff --git a/keras/feature_column/base_feature_layer.py b/keras/feature_column/base_feature_layer.py index 3e44981260d0..085ccc6c3b55 100644 --- a/keras/feature_column/base_feature_layer.py +++ b/keras/feature_column/base_feature_layer.py @@ -21,196 +21,222 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import collections import re + +import tensorflow.compat.v2 as tf + from keras.engine.base_layer import Layer -from keras.utils import generic_utils +from keras.saving import serialization_lib class _BaseFeaturesLayer(Layer): - """Base class for DenseFeatures and SequenceFeatures. - - Defines common methods and helpers. - - Args: - feature_columns: An iterable containing the FeatureColumns to use as - inputs to your model. - expected_column_type: Expected class for provided feature columns. - trainable: Boolean, whether the layer's variables will be updated via - gradient descent during training. - name: Name to give to the DenseFeatures. - **kwargs: Keyword arguments to construct a layer. - - Raises: - ValueError: if an item in `feature_columns` doesn't match - `expected_column_type`. - """ - - def __init__(self, - feature_columns, - expected_column_type, - trainable, - name, - partitioner=None, - **kwargs): - super().__init__( - name=name, trainable=trainable, **kwargs) - self._feature_columns = _normalize_feature_columns( - feature_columns) - self._state_manager = tf.__internal__.feature_column.StateManager( # pylint: disable=protected-access - self, self.trainable) - self._partitioner = partitioner - for column in self._feature_columns: - if not isinstance(column, expected_column_type): - raise ValueError( - 'Items of feature_columns must be a {}. ' - 'You can wrap a categorical column with an ' - 'embedding_column or indicator_column. Given: {}'.format( - expected_column_type, column)) - - def build(self, _): - for column in self._feature_columns: - with tf.compat.v1.variable_scope( - self.name, partitioner=self._partitioner): - with tf.compat.v1.variable_scope( - _sanitize_column_name_for_variable_scope(column.name)): - column.create_state(self._state_manager) - super().build(None) - - def _output_shape(self, input_shape, num_elements): - """Computes expected output shape of the layer or a column's dense tensor. + """Base class for DenseFeatures and SequenceFeatures. - Args: - input_shape: Tensor or array with batch shape. - num_elements: Size of the last dimension of the output. + Defines common methods and helpers. - Returns: - Tuple with output shape. + Args: + feature_columns: An iterable containing the FeatureColumns to use as + inputs to your model. + expected_column_type: Expected class for provided feature columns. + trainable: Boolean, whether the layer's variables will be updated via + gradient descent during training. + name: Name to give to the DenseFeatures. + **kwargs: Keyword arguments to construct a layer. + + Raises: + ValueError: if an item in `feature_columns` doesn't match + `expected_column_type`. """ - raise NotImplementedError('Calling an abstract method.') - - def compute_output_shape(self, input_shape): - total_elements = 0 - for column in self._feature_columns: - total_elements += column.variable_shape.num_elements() - return self._target_shape(input_shape, total_elements) - def _process_dense_tensor(self, column, tensor): - """Reshapes the dense tensor output of a column based on expected shape. + def __init__( + self, + feature_columns, + expected_column_type, + trainable, + name, + partitioner=None, + **kwargs + ): + super().__init__(name=name, trainable=trainable, **kwargs) + self._feature_columns = _normalize_feature_columns(feature_columns) + self._state_manager = tf.__internal__.feature_column.StateManager( + self, self.trainable + ) + self._partitioner = partitioner + for column in self._feature_columns: + if not isinstance(column, expected_column_type): + raise ValueError( + "Items of feature_columns must be a {}. " + "You can wrap a categorical column with an " + "embedding_column or indicator_column. Given: {}".format( + expected_column_type, column + ) + ) + + def build(self, _): + for column in self._feature_columns: + with tf.compat.v1.variable_scope( + self.name, partitioner=self._partitioner + ): + with tf.compat.v1.variable_scope( + _sanitize_column_name_for_variable_scope(column.name) + ): + column.create_state(self._state_manager) + super().build(None) + + def _output_shape(self, input_shape, num_elements): + """Computes expected output shape of the dense tensor of the layer. + + Args: + input_shape: Tensor or array with batch shape. + num_elements: Size of the last dimension of the output. + + Returns: + Tuple with output shape. + """ + raise NotImplementedError("Calling an abstract method.") + + def compute_output_shape(self, input_shape): + total_elements = 0 + for column in self._feature_columns: + total_elements += column.variable_shape.num_elements() + return self._target_shape(input_shape, total_elements) + + def _process_dense_tensor(self, column, tensor): + """Reshapes the dense tensor output of a column based on expected shape. + + Args: + column: A DenseColumn or SequenceDenseColumn object. + tensor: A dense tensor obtained from the same column. + + Returns: + Reshaped dense tensor. + """ + num_elements = column.variable_shape.num_elements() + target_shape = self._target_shape(tf.shape(tensor), num_elements) + return tf.reshape(tensor, shape=target_shape) + + def _verify_and_concat_tensors(self, output_tensors): + """Verifies and concatenates the dense output of several columns.""" + _verify_static_batch_size_equality( + output_tensors, self._feature_columns + ) + return tf.concat(output_tensors, -1) + + def get_config(self): + column_configs = [ + tf.__internal__.feature_column.serialize_feature_column(fc) + for fc in self._feature_columns + ] + config = {"feature_columns": column_configs} + config["partitioner"] = serialization_lib.serialize_keras_object( + self._partitioner + ) + + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + config_cp = config.copy() + columns_by_name = {} + config_cp["feature_columns"] = [ + tf.__internal__.feature_column.deserialize_feature_column( + c, custom_objects, columns_by_name + ) + for c in config["feature_columns"] + ] + config_cp["partitioner"] = serialization_lib.deserialize_keras_object( + config["partitioner"], custom_objects + ) + + return cls(**config_cp) - Args: - column: A DenseColumn or SequenceDenseColumn object. - tensor: A dense tensor obtained from the same column. - - Returns: - Reshaped dense tensor. - """ - num_elements = column.variable_shape.num_elements() - target_shape = self._target_shape(tf.shape(tensor), num_elements) - return tf.reshape(tensor, shape=target_shape) - def _verify_and_concat_tensors(self, output_tensors): - """Verifies and concatenates the dense output of several columns.""" - _verify_static_batch_size_equality(output_tensors, self._feature_columns) - return tf.concat(output_tensors, -1) +def _sanitize_column_name_for_variable_scope(name): + """Sanitizes user-provided feature names for use as variable scopes.""" + invalid_char = re.compile("[^A-Za-z0-9_.\\-]") + return invalid_char.sub("_", name) - def get_config(self): - column_configs = [tf.__internal__.feature_column.serialize_feature_column(fc) - for fc in self._feature_columns] - config = {'feature_columns': column_configs} - config['partitioner'] = generic_utils.serialize_keras_object( - self._partitioner) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) +def _verify_static_batch_size_equality(tensors, columns): + """Verify equality between static batch sizes. - @classmethod - def from_config(cls, config, custom_objects=None): - config_cp = config.copy() - columns_by_name = {} - config_cp['feature_columns'] = [tf.__internal__.feature_column.deserialize_feature_column( - c, custom_objects, columns_by_name) for c in config['feature_columns']] - config_cp['partitioner'] = generic_utils.deserialize_keras_object( - config['partitioner'], custom_objects) + Args: + tensors: iterable of input tensors. + columns: Corresponding feature columns. - return cls(**config_cp) + Raises: + ValueError: in case of mismatched batch sizes. + """ + expected_batch_size = None + for i in range(0, len(tensors)): + # bath_size is a Dimension object. + batch_size = tf.compat.v1.Dimension( + tf.compat.dimension_value(tensors[i].shape[0]) + ) + if batch_size.value is not None: + if expected_batch_size is None: + bath_size_column_index = i + expected_batch_size = batch_size + elif not expected_batch_size.is_compatible_with(batch_size): + raise ValueError( + "Batch size (first dimension) of each feature must be " + "same. Batch size of columns ({}, {}): ({}, {})".format( + columns[bath_size_column_index].name, + columns[i].name, + expected_batch_size, + batch_size, + ) + ) -def _sanitize_column_name_for_variable_scope(name): - """Sanitizes user-provided feature names for use as variable scopes.""" - invalid_char = re.compile('[^A-Za-z0-9_.\\-]') - return invalid_char.sub('_', name) +def _normalize_feature_columns(feature_columns): + """Normalizes the `feature_columns` input. + This method converts the `feature_columns` to list type as best as it can. + In addition, verifies the type and other parts of feature_columns, required + by downstream library. -def _verify_static_batch_size_equality(tensors, columns): - """Verify equality between static batch sizes. - - Args: - tensors: iterable of input tensors. - columns: Corresponding feature columns. - - Raises: - ValueError: in case of mismatched batch sizes. - """ - expected_batch_size = None - for i in range(0, len(tensors)): - # bath_size is a Dimension object. - batch_size = tf.compat.v1.Dimension(tf.compat.dimension_value( - tensors[i].shape[0])) - if batch_size.value is not None: - if expected_batch_size is None: - bath_size_column_index = i - expected_batch_size = batch_size - elif not expected_batch_size.is_compatible_with(batch_size): - raise ValueError( - 'Batch size (first dimension) of each feature must be same. ' - 'Batch size of columns ({}, {}): ({}, {})'.format( - columns[bath_size_column_index].name, columns[i].name, - expected_batch_size, batch_size)) + Args: + feature_columns: The raw feature columns, usually passed by users. + Returns: + The normalized feature column list. -def _normalize_feature_columns(feature_columns): - """Normalizes the `feature_columns` input. - - This method converts the `feature_columns` to list type as best as it can. In - addition, verifies the type and other parts of feature_columns, required by - downstream library. - - Args: - feature_columns: The raw feature columns, usually passed by users. - - Returns: - The normalized feature column list. - - Raises: - ValueError: for any invalid inputs, such as empty, duplicated names, etc. - """ - if isinstance(feature_columns, tf.__internal__.feature_column.FeatureColumn): - feature_columns = [feature_columns] - - if isinstance(feature_columns, collections.abc.Iterator): - feature_columns = list(feature_columns) - - if isinstance(feature_columns, dict): - raise ValueError('Expected feature_columns to be iterable, found dict.') - - for column in feature_columns: - if not isinstance(column, tf.__internal__.feature_column.FeatureColumn): - raise ValueError('Items of feature_columns must be a FeatureColumn. ' - 'Given (type {}): {}.'.format(type(column), column)) - if not feature_columns: - raise ValueError('feature_columns must not be empty.') - name_to_column = {} - for column in feature_columns: - if column.name in name_to_column: - raise ValueError('Duplicate feature column name found for columns: {} ' - 'and {}. This usually means that these columns refer to ' - 'same base feature. Either one must be discarded or a ' - 'duplicated but renamed item must be inserted in ' - 'features dict.'.format(column, - name_to_column[column.name])) - name_to_column[column.name] = column - - return sorted(feature_columns, key=lambda x: x.name) + Raises: + ValueError: for any invalid inputs, such as empty, duplicated names, etc. + """ + if isinstance( + feature_columns, tf.__internal__.feature_column.FeatureColumn + ): + feature_columns = [feature_columns] + + if isinstance(feature_columns, collections.abc.Iterator): + feature_columns = list(feature_columns) + + if isinstance(feature_columns, dict): + raise ValueError("Expected feature_columns to be iterable, found dict.") + + for column in feature_columns: + if not isinstance(column, tf.__internal__.feature_column.FeatureColumn): + raise ValueError( + "Items of feature_columns must be a FeatureColumn. " + "Given (type {}): {}.".format(type(column), column) + ) + if not feature_columns: + raise ValueError("feature_columns must not be empty.") + name_to_column = {} + for column in feature_columns: + if column.name in name_to_column: + raise ValueError( + "Duplicate feature column name found for columns: {} " + "and {}. This usually means that these columns refer to " + "same base feature. Either one must be discarded or a " + "duplicated but renamed item must be inserted in " + "features dict.".format(column, name_to_column[column.name]) + ) + name_to_column[column.name] = column + + return sorted(feature_columns, key=lambda x: x.name) diff --git a/keras/feature_column/dense_features.py b/keras/feature_column/dense_features.py index 9c2b4e868104..f5ae664581cc 100644 --- a/keras/feature_column/dense_features.py +++ b/keras/feature_column/dense_features.py @@ -18,157 +18,174 @@ from __future__ import division from __future__ import print_function +import json + import tensorflow.compat.v2 as tf -import json from keras import backend from keras.feature_column import base_feature_layer as kfc -from keras.saving.saved_model import json_utils +from keras.saving.legacy.saved_model import json_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export(v1=['keras.layers.DenseFeatures']) -class DenseFeatures(kfc._BaseFeaturesLayer): # pylint: disable=protected-access - """A layer that produces a dense `Tensor` based on given `feature_columns`. - - Generally a single example in training data is described with FeatureColumns. - At the first layer of the model, this column-oriented data should be converted - to a single `Tensor`. - - This layer can be called multiple times with different features. - - This is the V1 version of this layer that uses variable_scope's or partitioner - to create variables which works well with PartitionedVariables. Variable - scopes are deprecated in V2, so the V2 version uses name_scopes instead. But - currently that lacks support for partitioned variables. Use this if you need - partitioned variables. Use the partitioner argument if you have a Keras model - and uses `tf.compat.v1.keras.estimator.model_to_estimator` for training. - - Example: - - ```python - price = tf.feature_column.numeric_column('price') - keywords_embedded = tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_hash_bucket("keywords", 10K), - dimension=16) - columns = [price, keywords_embedded, ...] - partitioner = tf.compat.v1.fixed_size_partitioner(num_shards=4) - feature_layer = tf.compat.v1.keras.layers.DenseFeatures( - feature_columns=columns, partitioner=partitioner) - - features = tf.io.parse_example( - ..., features=tf.feature_column.make_parse_example_spec(columns)) - dense_tensor = feature_layer(features) - for units in [128, 64, 32]: - dense_tensor = tf.compat.v1.keras.layers.Dense( - units, activation='relu')(dense_tensor) - prediction = tf.compat.v1.keras.layers.Dense(1)(dense_tensor) - ``` - """ - - def __init__(self, - feature_columns, - trainable=True, - name=None, - partitioner=None, - **kwargs): - """Constructs a DenseFeatures layer. - - Args: - feature_columns: An iterable containing the FeatureColumns to use as - inputs to your model. All items should be instances of classes derived - from `DenseColumn` such as `numeric_column`, `embedding_column`, - `bucketized_column`, `indicator_column`. If you have categorical - features, you can wrap them with an `embedding_column` or - `indicator_column`. - trainable: Boolean, whether the layer's variables will be updated via - gradient descent during training. - name: Name to give to the DenseFeatures. - partitioner: Partitioner for input layer. Defaults to None. - **kwargs: Keyword arguments to construct a layer. - - Raises: - ValueError: if an item in `feature_columns` is not a `DenseColumn`. +@keras_export(v1=["keras.layers.DenseFeatures"]) +class DenseFeatures(kfc._BaseFeaturesLayer): + """A layer that produces a dense `Tensor` based on given `feature_columns`. + + Generally a single example in training data is described with + FeatureColumns. At the first layer of the model, this column-oriented data + should be converted to a single `Tensor`. + + This layer can be called multiple times with different features. + + This is the V1 version of this layer that uses variable_scope's or + partitioner to create variables which works well with PartitionedVariables. + Variable scopes are deprecated in V2, so the V2 version uses name_scopes + instead. But currently that lacks support for partitioned variables. Use + this if you need partitioned variables. Use the partitioner argument if you + have a Keras model and uses + `tf.compat.v1.keras.estimator.model_to_estimator` for training. + + Example: + + ```python + price = tf.feature_column.numeric_column('price') + keywords_embedded = tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_hash_bucket("keywords", 10K), + dimension=16) + columns = [price, keywords_embedded, ...] + partitioner = tf.compat.v1.fixed_size_partitioner(num_shards=4) + feature_layer = tf.compat.v1.keras.layers.DenseFeatures( + feature_columns=columns, partitioner=partitioner) + + features = tf.io.parse_example( + ..., features=tf.feature_column.make_parse_example_spec(columns)) + dense_tensor = feature_layer(features) + for units in [128, 64, 32]: + dense_tensor = tf.compat.v1.keras.layers.Dense( + units, activation='relu')(dense_tensor) + prediction = tf.compat.v1.keras.layers.Dense(1)(dense_tensor) + ``` """ - super().__init__( - feature_columns=feature_columns, - trainable=trainable, - name=name, - partitioner=partitioner, - expected_column_type=tf.__internal__.feature_column.DenseColumn, - **kwargs) - - @property - def _is_feature_layer(self): - return True - - @property - def _tracking_metadata(self): - """String stored in metadata field in the SavedModel proto. - - Returns: - A serialized JSON storing information necessary for recreating this layer. - """ - metadata = json.loads(super()._tracking_metadata) - metadata['_is_feature_layer'] = True - return json.dumps(metadata, default=json_utils.get_json_type) - - def _target_shape(self, input_shape, total_elements): - return (input_shape[0], total_elements) - - def call(self, features, cols_to_output_tensors=None, training=None): - """Returns a dense tensor corresponding to the `feature_columns`. - - Example usage: - - >>> t1 = tf.feature_column.embedding_column( - ... tf.feature_column.categorical_column_with_hash_bucket("t1", 2), - ... dimension=8) - >>> t2 = tf.feature_column.numeric_column('t2') - >>> feature_layer = tf.compat.v1.keras.layers.DenseFeatures([t1, t2]) - >>> features = {"t1": tf.constant(["a", "b"]), "t2": tf.constant([1, 2])} - >>> dense_tensor = feature_layer(features, training=True) - - Args: - features: A mapping from key to tensors. `FeatureColumn`s look up via - these keys. For example `numeric_column('price')` will look at 'price' - key in this dict. Values can be a `SparseTensor` or a `Tensor` depends - on corresponding `FeatureColumn`. - cols_to_output_tensors: If not `None`, this will be filled with a dict - mapping feature columns to output tensors created. - training: Python boolean or None, indicating whether to the layer is being - run in training mode. This argument is passed to the call method of any - `FeatureColumn` that takes a `training` argument. For example, if a - `FeatureColumn` performed dropout, the column could expose a `training` - argument to control whether the dropout should be applied. If `None`, - defaults to `tf.keras.backend.learning_phase()`. - - - Returns: - A `Tensor` which represents input layer of a model. Its shape - is (batch_size, first_layer_dimension) and its dtype is `float32`. - first_layer_dimension is determined based on given `feature_columns`. - - Raises: - ValueError: If features are not a dictionary. - """ - if training is None: - training = backend.learning_phase() - if not isinstance(features, dict): - raise ValueError('We expected a dictionary here. Instead we got: ', - features) - transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features) - output_tensors = [] - for column in self._feature_columns: - with backend.name_scope(column.name): - try: - tensor = column.get_dense_tensor( - transformation_cache, self._state_manager, training=training) - except TypeError: - tensor = column.get_dense_tensor(transformation_cache, - self._state_manager) - processed_tensors = self._process_dense_tensor(column, tensor) - if cols_to_output_tensors is not None: - cols_to_output_tensors[column] = processed_tensors - output_tensors.append(processed_tensors) - return self._verify_and_concat_tensors(output_tensors) + + def __init__( + self, + feature_columns, + trainable=True, + name=None, + partitioner=None, + **kwargs + ): + """Constructs a DenseFeatures layer. + + Args: + feature_columns: An iterable containing the FeatureColumns to use as + inputs to your model. All items should be instances of classes + derived from `DenseColumn` such as `numeric_column`, + `embedding_column`, `bucketized_column`, `indicator_column`. If you + have categorical features, you can wrap them with an + `embedding_column` or `indicator_column`. + trainable: Boolean, whether the layer's variables will be updated via + gradient descent during training. + name: Name to give to the DenseFeatures. + partitioner: Partitioner for input layer. Defaults to `None`. + **kwargs: Keyword arguments to construct a layer. + + Raises: + ValueError: if an item in `feature_columns` is not a `DenseColumn`. + """ + super().__init__( + feature_columns=feature_columns, + trainable=trainable, + name=name, + partitioner=partitioner, + expected_column_type=tf.__internal__.feature_column.DenseColumn, + **kwargs + ) + + @property + def _is_feature_layer(self): + return True + + @property + def _tracking_metadata(self): + """String stored in metadata field in the SavedModel proto. + + Returns: + A serialized JSON storing information necessary for recreating this + layer. + """ + metadata = json.loads(super()._tracking_metadata) + metadata["_is_feature_layer"] = True + return json.dumps(metadata, default=json_utils.get_json_type) + + def _target_shape(self, input_shape, total_elements): + return (input_shape[0], total_elements) + + def call(self, features, cols_to_output_tensors=None, training=None): + """Returns a dense tensor corresponding to the `feature_columns`. + + Example usage: + + >>> t1 = tf.feature_column.embedding_column( + ... tf.feature_column.categorical_column_with_hash_bucket("t1", 2), + ... dimension=8) + >>> t2 = tf.feature_column.numeric_column('t2') + >>> feature_layer = tf.compat.v1.keras.layers.DenseFeatures([t1, t2]) + >>> features = {"t1": tf.constant(["a", "b"]), + ... "t2": tf.constant([1, 2])} + >>> dense_tensor = feature_layer(features, training=True) + + Args: + features: A mapping from key to tensors. `FeatureColumn`s look up via + these keys. For example `numeric_column('price')` will look at + 'price' key in this dict. Values can be a `SparseTensor` or a + `Tensor` depends on corresponding `FeatureColumn`. + cols_to_output_tensors: If not `None`, this will be filled with a dict + mapping feature columns to output tensors created. + training: Python boolean or None, indicating whether to the layer is + being run in training mode. This argument is passed to the call + method of any `FeatureColumn` that takes a `training` argument. For + example, if a `FeatureColumn` performed dropout, the column could + expose a `training` argument to control whether the dropout should + be applied. If `None`, becomes `tf.keras.backend.learning_phase()`. + Defaults to `None`. + + + Returns: + A `Tensor` which represents input layer of a model. Its shape + is (batch_size, first_layer_dimension) and its dtype is `float32`. + first_layer_dimension is determined based on given `feature_columns`. + + Raises: + ValueError: If features are not a dictionary. + """ + if training is None: + training = backend.learning_phase() + if not isinstance(features, dict): + raise ValueError( + "We expected a dictionary here. Instead we got: ", features + ) + transformation_cache = ( + tf.__internal__.feature_column.FeatureTransformationCache(features) + ) + output_tensors = [] + for column in self._feature_columns: + with backend.name_scope(column.name): + try: + tensor = column.get_dense_tensor( + transformation_cache, + self._state_manager, + training=training, + ) + except TypeError: + tensor = column.get_dense_tensor( + transformation_cache, self._state_manager + ) + processed_tensors = self._process_dense_tensor(column, tensor) + if cols_to_output_tensors is not None: + cols_to_output_tensors[column] = processed_tensors + output_tensors.append(processed_tensors) + return self._verify_and_concat_tensors(output_tensors) diff --git a/keras/feature_column/dense_features_test.py b/keras/feature_column/dense_features_test.py index 135cb3270bb5..a89c0f2566b4 100644 --- a/keras/feature_column/dense_features_test.py +++ b/keras/feature_column/dense_features_test.py @@ -18,1129 +18,1357 @@ from __future__ import division from __future__ import print_function +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np -from tensorflow.python.eager import backprop -from tensorflow.python.framework import test_util as tf_test_utils -from keras.testing_infra import test_combinations + from keras.feature_column import dense_features as df +from keras.testing_infra import test_combinations + +# isort: off +from tensorflow.python.eager import backprop +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) def _initialized_session(config=None): - sess = tf.compat.v1.Session(config=config) - sess.run(tf.compat.v1.global_variables_initializer()) - sess.run(tf.compat.v1.tables_initializer()) - return sess + sess = tf.compat.v1.Session(config=config) + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.tables_initializer()) + return sess class DenseFeaturesTest(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_retrieving_input(self): - features = {'a': [0.]} - dense_features = df.DenseFeatures(tf.feature_column.numeric_column('a')) - inputs = self.evaluate(dense_features(features)) - self.assertAllClose([[0.]], inputs) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_reuses_variables(self): - sparse_input = tf.SparseTensor( - indices=((0, 0), (1, 0), (2, 0)), values=(0, 1, 2), dense_shape=(3, 3)) - - # Create feature columns (categorical and embedding). - categorical_column = tf.feature_column.categorical_column_with_identity( - key='a', num_buckets=3) - embedding_dimension = 2 - - def _embedding_column_initializer(shape, dtype, partition_info=None): - del shape # unused - del dtype # unused - del partition_info # unused - embedding_values = ( - (1, 0), # id 0 - (0, 1), # id 1 - (1, 1)) # id 2 - return embedding_values - - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_embedding_column_initializer) - - dense_features = df.DenseFeatures([embedding_column]) - features = {'a': sparse_input} - - inputs = dense_features(features) - variables = dense_features.variables - - # Sanity check: test that the inputs are correct. - self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) - - # Check that only one variable was created. - self.assertEqual(1, len(variables)) - - # Check that invoking dense_features on the same features does not create - # additional variables - _ = dense_features(features) - self.assertEqual(1, len(variables)) - self.assertIs(variables[0], dense_features.variables[0]) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_dense_feature_with_partitioner(self): - sparse_input = tf.SparseTensor( - indices=((0, 0), (1, 0), (2, 0), (3, 0)), - values=(0, 1, 3, 2), - dense_shape=(4, 4)) - - # Create feature columns (categorical and embedding). - categorical_column = tf.feature_column.categorical_column_with_identity( - key='a', num_buckets=4) - embedding_dimension = 2 - - def _embedding_column_initializer(shape, dtype, partition_info=None): - offset = partition_info._var_offset[0] - del shape # unused - del dtype # unused - if offset == 0: - embedding_values = ( - (1, 0), # id 0 - (0, 1)) # id 1 - else: - embedding_values = ( - (1, 1), # id 2 - (2, 2)) # id 3 - return embedding_values - - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_embedding_column_initializer) - - dense_features = df.DenseFeatures( - [embedding_column], partitioner=tf.compat.v1.fixed_size_partitioner(2)) - features = {'a': sparse_input} - - inputs = dense_features(features) - variables = dense_features.variables - - # Sanity check: test that the inputs are correct. - self.assertAllEqual([[1, 0], [0, 1], [2, 2], [1, 1]], inputs) - - # Check that only one variable was created. - self.assertEqual(2, len(variables)) - - # Check that invoking dense_features on the same features does not create - # additional variables - _ = dense_features(features) - self.assertEqual(2, len(variables)) - self.assertIs(variables[0], dense_features.variables[0]) - self.assertIs(variables[1], dense_features.variables[1]) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_feature_column_dense_features_gradient(self): - sparse_input = tf.SparseTensor( - indices=((0, 0), (1, 0), (2, 0)), values=(0, 1, 2), dense_shape=(3, 3)) - - # Create feature columns (categorical and embedding). - categorical_column = tf.feature_column.categorical_column_with_identity( - key='a', num_buckets=3) - embedding_dimension = 2 - - def _embedding_column_initializer(shape, dtype, partition_info=None): - del shape # unused - del dtype # unused - del partition_info # unused - embedding_values = ( - (1, 0), # id 0 - (0, 1), # id 1 - (1, 1)) # id 2 - return embedding_values - - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_embedding_column_initializer) - - dense_features = df.DenseFeatures([embedding_column]) - features = {'a': sparse_input} - - def scale_matrix(): - matrix = dense_features(features) - return 2 * matrix - - # Sanity check: Verify that scale_matrix returns the correct output. - self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix()) - - # Check that the returned gradient is correct. - grad_function = backprop.implicit_grad(scale_matrix) - grads_and_vars = grad_function() - indexed_slice = grads_and_vars[0][0] - gradient = grads_and_vars[0][0].values - - self.assertAllEqual([0, 1, 2], indexed_slice.indices) - self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient) - - def test_raises_if_empty_feature_columns(self): - with self.assertRaisesRegex(ValueError, - 'feature_columns must not be empty'): - df.DenseFeatures(feature_columns=[])(features={}) - - def test_should_be_dense_column(self): - with self.assertRaisesRegex(ValueError, 'must be a .*DenseColumn'): - df.DenseFeatures(feature_columns=[ - tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) - ])( - features={ - 'a': [[0]] - }) - - def test_does_not_support_dict_columns(self): - with self.assertRaisesRegex( - ValueError, 'Expected feature_columns to be iterable, found dict.'): - df.DenseFeatures( - feature_columns={'a': tf.feature_column.numeric_column('a')})( - features={ - 'a': [[0]] - }) - - def test_bare_column(self): - with tf.Graph().as_default(): - features = features = {'a': [0.]} - net = df.DenseFeatures(tf.feature_column.numeric_column('a'))(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[0.]], self.evaluate(net)) - - def test_column_generator(self): - with tf.Graph().as_default(): - features = features = {'a': [0.], 'b': [1.]} - columns = (tf.feature_column.numeric_column(key) for key in features) - net = df.DenseFeatures(columns)(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[0., 1.]], self.evaluate(net)) - - def test_raises_if_duplicate_name(self): - with self.assertRaisesRegex( - ValueError, 'Duplicate feature column name found for columns'): - df.DenseFeatures(feature_columns=[ - tf.feature_column.numeric_column('a'), - tf.feature_column.numeric_column('a') - ])( - features={ - 'a': [[0]] - }) - - def test_one_column(self): - price = tf.feature_column.numeric_column('price') - with tf.Graph().as_default(): - features = {'price': [[1.], [5.]]} - net = df.DenseFeatures([price])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1.], [5.]], self.evaluate(net)) - - def test_multi_dimension(self): - price = tf.feature_column.numeric_column('price', shape=2) - with tf.Graph().as_default(): - features = {'price': [[1., 2.], [5., 6.]]} - net = df.DenseFeatures([price])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net)) - - def test_compute_output_shape(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2', shape=4) - with tf.Graph().as_default(): - features = { - 'price1': [[1., 2.], [5., 6.]], - 'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]] - } - dense_features = df.DenseFeatures([price1, price2]) - self.assertEqual((None, 6), dense_features.compute_output_shape((None,))) - net = dense_features(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], - self.evaluate(net)) - - def test_raises_if_shape_mismatch(self): - price = tf.feature_column.numeric_column('price', shape=2) - with tf.Graph().as_default(): - features = {'price': [[1.], [5.]]} - with self.assertRaisesRegex( - Exception, - r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - df.DenseFeatures([price])(features) - - def test_reshaping(self): - price = tf.feature_column.numeric_column('price', shape=[1, 2]) - with tf.Graph().as_default(): - features = {'price': [[[1., 2.]], [[5., 6.]]]} - net = df.DenseFeatures([price])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net)) - - def test_multi_column(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - net = df.DenseFeatures([price1, price2])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net)) - - def test_cols_to_output_tensors(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - cols_dict = {} - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - dense_features = df.DenseFeatures([price1, price2]) - net = dense_features(features, cols_dict) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2.], [5., 6.]], - self.evaluate(cols_dict[price1])) - self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2])) - self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net)) - - def test_column_order(self): - price_a = tf.feature_column.numeric_column('price_a') - price_b = tf.feature_column.numeric_column('price_b') - with tf.Graph().as_default(): - features = { - 'price_a': [[1.]], - 'price_b': [[3.]], - } - net1 = df.DenseFeatures([price_a, price_b])(features) - net2 = df.DenseFeatures([price_b, price_a])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 3.]], self.evaluate(net1)) - self.assertAllClose([[1., 3.]], self.evaluate(net2)) - - def test_fails_for_categorical_column(self): - animal = tf.feature_column.categorical_column_with_identity( - 'animal', num_buckets=4) - with tf.Graph().as_default(): - features = { - 'animal': - tf.SparseTensor( - indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) - } - with self.assertRaisesRegex(Exception, 'must be a .*DenseColumn'): - df.DenseFeatures([animal])(features) - - def test_static_batch_size_mismatch(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = { - 'price1': [[1.], [5.], [7.]], # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - with self.assertRaisesRegex( - ValueError, - r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - df.DenseFeatures([price1, price2])(features) - - def test_subset_of_static_batch_size_mismatch(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - price3 = tf.feature_column.numeric_column('price3') - with tf.Graph().as_default(): - features = { - 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 3 - 'price2': [[3.], [4.]], # batchsize = 2 - 'price3': [[3.], [4.], [5.]] # batchsize = 3 - } - with self.assertRaisesRegex( - ValueError, - r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - df.DenseFeatures([price1, price2, price3])(features) - - def test_runtime_batch_size_mismatch(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = { - 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - net = df.DenseFeatures([price1, price2])(features) - with _initialized_session() as sess: - with self.assertRaisesRegex(tf.errors.OpError, - 'Dimension 0 in both shapes must be equal|' - 'Dimensions of inputs should match'): - sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]}) - - def test_runtime_batch_size_matches(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = { - 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 2 - 'price2': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 2 - } - net = df.DenseFeatures([price1, price2])(features) - with _initialized_session() as sess: - sess.run( - net, - feed_dict={ - features['price1']: [[1.], [5.]], - features['price2']: [[1.], [5.]], - }) - - def test_multiple_layers_with_same_embedding_column(self): - some_sparse_column = tf.feature_column.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) - some_embedding_column = tf.feature_column.embedding_column( - some_sparse_column, dimension=10) - - with tf.Graph().as_default(): - features = { - 'sparse_feature': [['a'], ['x']], - } - all_cols = [some_embedding_column] - df.DenseFeatures(all_cols)(features) - df.DenseFeatures(all_cols)(features) - # Make sure that 2 variables get created in this case. - self.assertEqual( - 2, - len( - tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - expected_var_names = [ - 'dense_features/sparse_feature_embedding/embedding_weights:0', - 'dense_features_1/sparse_feature_embedding/embedding_weights:0' - ] - self.assertCountEqual(expected_var_names, [ - v.name for v in tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - ]) - - @tf_test_utils.run_deprecated_v1 - def test_multiple_layers_with_same_shared_embedding_column(self): - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = tf.feature_column.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = tf.feature_column.shared_embeddings( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - - with tf.Graph().as_default(): - features = { - 'aaa': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - all_cols = [embedding_column_a, embedding_column_b] - df.DenseFeatures(all_cols)(features) - df.DenseFeatures(all_cols)(features) - # Make sure that only 1 variable gets created in this case. - self.assertEqual( - 1, - len( - tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - self.assertCountEqual(['aaa_bbb_shared_embedding:0'], [ - v.name for v in tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - ]) - - @tf_test_utils.run_deprecated_v1 - def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = tf.feature_column.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = tf.feature_column.shared_embeddings( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - all_cols = [embedding_column_a, embedding_column_b] - - with tf.Graph().as_default(): - features = { - 'aaa': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - df.DenseFeatures(all_cols)(features) - # Make sure that only 1 variable gets created in this case. - self.assertEqual( - 1, - len( - tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - - with tf.Graph().as_default(): - features1 = { - 'aaa': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - - df.DenseFeatures(all_cols)(features1) - # Make sure that only 1 variable gets created in this case. - self.assertEqual( - 1, - len( - tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - self.assertCountEqual(['aaa_bbb_shared_embedding:0'], [ - v.name for v in tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - ]) - - @tf_test_utils.run_deprecated_v1 - def test_with_1d_sparse_tensor(self): - embedding_values = ( - (1., 2., 3., 4., 5.), # id 0 - (6., 7., 8., 9., 10.), # id 1 - (11., 12., 13., 14., 15.) # id 2 + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) ) - - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values - - # price has 1 dimension in dense_features - price = tf.feature_column.numeric_column('price') - - # one_hot_body_style has 3 dims in dense_features. - body_style = tf.feature_column.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - one_hot_body_style = tf.feature_column.indicator_column(body_style) - - # embedded_body_style has 5 dims in dense_features. - country = tf.feature_column.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - embedded_country = tf.feature_column.embedding_column( - country, dimension=5, initializer=_initializer) - - # Provides 1-dim tensor and dense tensor. - features = { - 'price': - tf.constant([ - 11., - 12., - ]), - 'body-style': - tf.SparseTensor( + def test_retrieving_input(self): + features = {"a": [0.0]} + dense_features = df.DenseFeatures(tf.feature_column.numeric_column("a")) + inputs = self.evaluate(dense_features(features)) + self.assertAllClose([[0.0]], inputs) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_reuses_variables(self): + sparse_input = tf.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3), + ) + + # Create feature columns (categorical and embedding). + categorical_column = tf.feature_column.categorical_column_with_identity( + key="a", num_buckets=3 + ) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info=None): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ((1, 0), (0, 1), (1, 1)) # id 0 # id 1 # id 2 + return embedding_values + + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer, + ) + + dense_features = df.DenseFeatures([embedding_column]) + features = {"a": sparse_input} + + inputs = dense_features(features) + variables = dense_features.variables + + # Sanity check: test that the inputs are correct. + self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) + + # Check that only one variable was created. + self.assertEqual(1, len(variables)) + + # Check that invoking dense_features on the same features does not + # create additional variables + _ = dense_features(features) + self.assertEqual(1, len(variables)) + self.assertIs(variables[0], dense_features.variables[0]) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_dense_feature_with_partitioner(self): + sparse_input = tf.SparseTensor( + indices=((0, 0), (1, 0), (2, 0), (3, 0)), + values=(0, 1, 3, 2), + dense_shape=(4, 4), + ) + + # Create feature columns (categorical and embedding). + categorical_column = tf.feature_column.categorical_column_with_identity( + key="a", num_buckets=4 + ) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info=None): + offset = partition_info._var_offset[0] + del shape # unused + del dtype # unused + if offset == 0: + embedding_values = ((1, 0), (0, 1)) # id 0 # id 1 + else: + embedding_values = ((1, 1), (2, 2)) # id 2 # id 3 + return embedding_values + + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer, + ) + + dense_features = df.DenseFeatures( + [embedding_column], + partitioner=tf.compat.v1.fixed_size_partitioner(2), + ) + features = {"a": sparse_input} + + inputs = dense_features(features) + variables = dense_features.variables + + # Sanity check: test that the inputs are correct. + self.assertAllEqual([[1, 0], [0, 1], [2, 2], [1, 1]], inputs) + + # Check that only one variable was created. + self.assertEqual(2, len(variables)) + + # Check that invoking dense_features on the same features does not + # create additional variables + _ = dense_features(features) + self.assertEqual(2, len(variables)) + self.assertIs(variables[0], dense_features.variables[0]) + self.assertIs(variables[1], dense_features.variables[1]) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_feature_column_dense_features_gradient(self): + sparse_input = tf.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3), + ) + + # Create feature columns (categorical and embedding). + categorical_column = tf.feature_column.categorical_column_with_identity( + key="a", num_buckets=3 + ) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info=None): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ((1, 0), (0, 1), (1, 1)) # id 0 # id 1 # id 2 + return embedding_values + + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer, + ) + + dense_features = df.DenseFeatures([embedding_column]) + features = {"a": sparse_input} + + def scale_matrix(): + matrix = dense_features(features) + return 2 * matrix + + # Sanity check: Verify that scale_matrix returns the correct output. + self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix()) + + # Check that the returned gradient is correct. + grad_function = backprop.implicit_grad(scale_matrix) + grads_and_vars = grad_function() + indexed_slice = grads_and_vars[0][0] + gradient = grads_and_vars[0][0].values + + self.assertAllEqual([0, 1, 2], indexed_slice.indices) + self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient) + + def test_raises_if_empty_feature_columns(self): + with self.assertRaisesRegex( + ValueError, "feature_columns must not be empty" + ): + df.DenseFeatures(feature_columns=[])(features={}) + + def test_should_be_dense_column(self): + with self.assertRaisesRegex(ValueError, "must be a .*DenseColumn"): + df.DenseFeatures( + feature_columns=[ + tf.feature_column.categorical_column_with_hash_bucket( + "wire_cast", 4 + ) + ] + )(features={"a": [[0]]}) + + def test_does_not_support_dict_columns(self): + with self.assertRaisesRegex( + ValueError, "Expected feature_columns to be iterable, found dict." + ): + df.DenseFeatures( + feature_columns={"a": tf.feature_column.numeric_column("a")} + )(features={"a": [[0]]}) + + def test_bare_column(self): + with tf.Graph().as_default(): + features = features = {"a": [0.0]} + net = df.DenseFeatures(tf.feature_column.numeric_column("a"))( + features + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[0.0]], self.evaluate(net)) + + def test_column_generator(self): + with tf.Graph().as_default(): + features = features = {"a": [0.0], "b": [1.0]} + columns = ( + tf.feature_column.numeric_column(key) for key in features + ) + net = df.DenseFeatures(columns)(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[0.0, 1.0]], self.evaluate(net)) + + def test_raises_if_duplicate_name(self): + with self.assertRaisesRegex( + ValueError, "Duplicate feature column name found for columns" + ): + df.DenseFeatures( + feature_columns=[ + tf.feature_column.numeric_column("a"), + tf.feature_column.numeric_column("a"), + ] + )(features={"a": [[0]]}) + + def test_one_column(self): + price = tf.feature_column.numeric_column("price") + with tf.Graph().as_default(): + features = {"price": [[1.0], [5.0]]} + net = df.DenseFeatures([price])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0], [5.0]], self.evaluate(net)) + + def test_multi_dimension(self): + price = tf.feature_column.numeric_column("price", shape=2) + with tf.Graph().as_default(): + features = {"price": [[1.0, 2.0], [5.0, 6.0]]} + net = df.DenseFeatures([price])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0, 2.0], [5.0, 6.0]], self.evaluate(net)) + + def test_compute_output_shape(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2", shape=4) + with tf.Graph().as_default(): + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 9.0, 10.0]], + } + dense_features = df.DenseFeatures([price1, price2]) + self.assertEqual( + (None, 6), dense_features.compute_output_shape((None,)) + ) + net = dense_features(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], + [5.0, 6.0, 7.0, 8.0, 9.0, 10.0], + ], + self.evaluate(net), + ) + + def test_raises_if_shape_mismatch(self): + price = tf.feature_column.numeric_column("price", shape=2) + with tf.Graph().as_default(): + features = {"price": [[1.0], [5.0]]} + with self.assertRaisesRegex( + Exception, + r"Cannot reshape a tensor with 2 elements to shape \[2,2\]", + ): + df.DenseFeatures([price])(features) + + def test_reshaping(self): + price = tf.feature_column.numeric_column("price", shape=[1, 2]) + with tf.Graph().as_default(): + features = {"price": [[[1.0, 2.0]], [[5.0, 6.0]]]} + net = df.DenseFeatures([price])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0, 2.0], [5.0, 6.0]], self.evaluate(net)) + + def test_multi_column(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0], [4.0]], + } + net = df.DenseFeatures([price1, price2])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [[1.0, 2.0, 3.0], [5.0, 6.0, 4.0]], self.evaluate(net) + ) + + def test_cols_to_output_tensors(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + cols_dict = {} + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0], [4.0]], + } + dense_features = df.DenseFeatures([price1, price2]) + net = dense_features(features, cols_dict) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [[1.0, 2.0], [5.0, 6.0]], self.evaluate(cols_dict[price1]) + ) + self.assertAllClose( + [[3.0], [4.0]], self.evaluate(cols_dict[price2]) + ) + self.assertAllClose( + [[1.0, 2.0, 3.0], [5.0, 6.0, 4.0]], self.evaluate(net) + ) + + def test_column_order(self): + price_a = tf.feature_column.numeric_column("price_a") + price_b = tf.feature_column.numeric_column("price_b") + with tf.Graph().as_default(): + features = { + "price_a": [[1.0]], + "price_b": [[3.0]], + } + net1 = df.DenseFeatures([price_a, price_b])(features) + net2 = df.DenseFeatures([price_b, price_a])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0, 3.0]], self.evaluate(net1)) + self.assertAllClose([[1.0, 3.0]], self.evaluate(net2)) + + def test_fails_for_categorical_column(self): + animal = tf.feature_column.categorical_column_with_identity( + "animal", num_buckets=4 + ) + with tf.Graph().as_default(): + features = { + "animal": tf.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2] + ) + } + with self.assertRaisesRegex(Exception, "must be a .*DenseColumn"): + df.DenseFeatures([animal])(features) + + def test_static_batch_size_mismatch(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": [[1.0], [5.0], [7.0]], # batchsize = 3 + "price2": [[3.0], [4.0]], # batchsize = 2 + } + with self.assertRaisesRegex( + ValueError, + r"Batch size \(first dimension\) of each feature must be same.", + ): + df.DenseFeatures([price1, price2])(features) + + def test_subset_of_static_batch_size_mismatch(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + price3 = tf.feature_column.numeric_column("price3") + with tf.Graph().as_default(): + features = { + "price1": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 3 + "price2": [[3.0], [4.0]], # batchsize = 2 + "price3": [[3.0], [4.0], [5.0]], # batchsize = 3 + } + with self.assertRaisesRegex( + ValueError, + r"Batch size \(first dimension\) of each feature must be same.", + ): + df.DenseFeatures([price1, price2, price3])(features) + + def test_runtime_batch_size_mismatch(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 3 + "price2": [[3.0], [4.0]], # batchsize = 2 + } + net = df.DenseFeatures([price1, price2])(features) + with _initialized_session() as sess: + with self.assertRaisesRegex( + tf.errors.OpError, + "Dimension 0 in both shapes must be equal|" + "Dimensions of inputs should match", + ): + sess.run( + net, + feed_dict={features["price1"]: [[1.0], [5.0], [7.0]]}, + ) + + def test_runtime_batch_size_matches(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 2 + "price2": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 2 + } + net = df.DenseFeatures([price1, price2])(features) + with _initialized_session() as sess: + sess.run( + net, + feed_dict={ + features["price1"]: [[1.0], [5.0]], + features["price2"]: [[1.0], [5.0]], + }, + ) + + def test_multiple_layers_with_same_embedding_column(self): + some_sparse_column = ( + tf.feature_column.categorical_column_with_hash_bucket( + "sparse_feature", hash_bucket_size=5 + ) + ) + some_embedding_column = tf.feature_column.embedding_column( + some_sparse_column, dimension=10 + ) + + with tf.Graph().as_default(): + features = { + "sparse_feature": [["a"], ["x"]], + } + all_cols = [some_embedding_column] + df.DenseFeatures(all_cols)(features) + df.DenseFeatures(all_cols)(features) + # Make sure that 2 variables get created in this case. + self.assertEqual( + 2, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + expected_var_names = [ + "dense_features/sparse_feature_embedding/embedding_weights:0", + "dense_features_1/sparse_feature_embedding/embedding_weights:0", + ] + self.assertCountEqual( + expected_var_names, + [ + v.name + for v in tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ], + ) + + @tf_test_utils.run_deprecated_v1 + def test_multiple_layers_with_same_shared_embedding_column(self): + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=3 + ) + ) + categorical_column_b = ( + tf.feature_column.categorical_column_with_identity( + key="bbb", num_buckets=3 + ) + ) + embedding_dimension = 2 + ( + embedding_column_b, + embedding_column_a, + ) = tf.feature_column.shared_embeddings( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension, + ) + + with tf.Graph().as_default(): + features = { + "aaa": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2), + ), + "bbb": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2), + ), + } + all_cols = [embedding_column_a, embedding_column_b] + df.DenseFeatures(all_cols)(features) + df.DenseFeatures(all_cols)(features) + # Make sure that only 1 variable gets created in this case. + self.assertEqual( + 1, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + self.assertCountEqual( + ["aaa_bbb_shared_embedding:0"], + [ + v.name + for v in tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ], + ) + + @tf_test_utils.run_deprecated_v1 + def test_multiple_layers_with_same_shared_embedding_column_diff_graphs( + self, + ): + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=3 + ) + ) + categorical_column_b = ( + tf.feature_column.categorical_column_with_identity( + key="bbb", num_buckets=3 + ) + ) + embedding_dimension = 2 + ( + embedding_column_b, + embedding_column_a, + ) = tf.feature_column.shared_embeddings( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension, + ) + all_cols = [embedding_column_a, embedding_column_b] + + with tf.Graph().as_default(): + features = { + "aaa": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2), + ), + "bbb": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2), + ), + } + df.DenseFeatures(all_cols)(features) + # Make sure that only 1 variable gets created in this case. + self.assertEqual( + 1, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + + with tf.Graph().as_default(): + features1 = { + "aaa": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2), + ), + "bbb": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2), + ), + } + + df.DenseFeatures(all_cols)(features1) + # Make sure that only 1 variable gets created in this case. + self.assertEqual( + 1, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + self.assertCountEqual( + ["aaa_bbb_shared_embedding:0"], + [ + v.name + for v in tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ], + ) + + @tf_test_utils.run_deprecated_v1 + def test_with_1d_sparse_tensor(self): + embedding_values = ( + (1.0, 2.0, 3.0, 4.0, 5.0), # id 0 + (6.0, 7.0, 8.0, 9.0, 10.0), # id 1 + (11.0, 12.0, 13.0, 14.0, 15.0), # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in dense_features + price = tf.feature_column.numeric_column("price") + + # one_hot_body_style has 3 dims in dense_features. + body_style = tf.feature_column.categorical_column_with_vocabulary_list( + "body-style", vocabulary_list=["hardtop", "wagon", "sedan"] + ) + one_hot_body_style = tf.feature_column.indicator_column(body_style) + + # embedded_body_style has 5 dims in dense_features. + country = tf.feature_column.categorical_column_with_vocabulary_list( + "country", vocabulary_list=["US", "JP", "CA"] + ) + embedded_country = tf.feature_column.embedding_column( + country, dimension=5, initializer=_initializer + ) + + # Provides 1-dim tensor and dense tensor. + features = { + "price": tf.constant( + [ + 11.0, + 12.0, + ] + ), + "body-style": tf.SparseTensor( indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - # This is dense tensor for the categorical_column. - 'country': - tf.constant(['CA', 'US']), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - self.assertEqual(1, features['country'].shape.ndims) - - net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( - features) - self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: - - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.], - [1., 0., 0., 1., 2., 3., 4., 5., 12.]], - sess.run(net)) - - @tf_test_utils.run_deprecated_v1 - def test_with_1d_unknown_shape_sparse_tensor(self): - embedding_values = ( - (1., 2.), # id 0 - (6., 7.), # id 1 - (11., 12.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values - - # price has 1 dimension in dense_features - price = tf.feature_column.numeric_column('price') - - # one_hot_body_style has 3 dims in dense_features. - body_style = tf.feature_column.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - one_hot_body_style = tf.feature_column.indicator_column(body_style) - - # embedded_body_style has 5 dims in dense_features. - country = tf.feature_column.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - embedded_country = tf.feature_column.embedding_column( - country, dimension=2, initializer=_initializer) - - # Provides 1-dim tensor and dense tensor. - features = { - 'price': tf.compat.v1.placeholder(tf.float32), - 'body-style': tf.compat.v1.sparse_placeholder(tf.string), - # This is dense tensor for the categorical_column. - 'country': tf.compat.v1.placeholder(tf.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) - self.assertIsNone(features['country'].shape.ndims) - - price_data = np.array([11., 12.]) - body_style_data = tf.compat.v1.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) - country_data = np.array([['US'], ['CA']]) - - net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( - features) - self.assertEqual(1 + 3 + 2, net.shape[1]) - with _initialized_session() as sess: - - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) - - @tf_test_utils.run_deprecated_v1 - def test_with_rank_0_feature(self): - # price has 1 dimension in dense_features - price = tf.feature_column.numeric_column('price') - features = { - 'price': tf.constant(0), - } - self.assertEqual(0, features['price'].shape.ndims) - - # Static rank 0 should fail - with self.assertRaisesRegex(ValueError, 'Feature .* cannot have rank 0'): - df.DenseFeatures([price])(features) - - # Dynamic rank 0 should fail - features = { - 'price': tf.compat.v1.placeholder(tf.float32), - } - net = df.DenseFeatures([price])(features) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) + values=("sedan", "hardtop"), + dense_shape=(2,), + ), + # This is dense tensor for the categorical_column. + "country": tf.constant(["CA", "US"]), + } + self.assertEqual(1, features["price"].shape.ndims) + self.assertEqual(1, features["body-style"].dense_shape.get_shape()[0]) + self.assertEqual(1, features["country"].shape.ndims) + + net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( + features + ) + self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session() as sess: + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [ + [0.0, 0.0, 1.0, 11.0, 12.0, 13.0, 14.0, 15.0, 11.0], + [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 12.0], + ], + sess.run(net), + ) + + @tf_test_utils.run_deprecated_v1 + def test_with_1d_unknown_shape_sparse_tensor(self): + embedding_values = ( + (1.0, 2.0), # id 0 + (6.0, 7.0), # id 1 + (11.0, 12.0), # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in dense_features + price = tf.feature_column.numeric_column("price") + + # one_hot_body_style has 3 dims in dense_features. + body_style = tf.feature_column.categorical_column_with_vocabulary_list( + "body-style", vocabulary_list=["hardtop", "wagon", "sedan"] + ) + one_hot_body_style = tf.feature_column.indicator_column(body_style) + + # embedded_body_style has 5 dims in dense_features. + country = tf.feature_column.categorical_column_with_vocabulary_list( + "country", vocabulary_list=["US", "JP", "CA"] + ) + embedded_country = tf.feature_column.embedding_column( + country, dimension=2, initializer=_initializer + ) + + # Provides 1-dim tensor and dense tensor. + features = { + "price": tf.compat.v1.placeholder(tf.float32), + "body-style": tf.compat.v1.sparse_placeholder(tf.string), + # This is dense tensor for the categorical_column. + "country": tf.compat.v1.placeholder(tf.string), + } + self.assertIsNone(features["price"].shape.ndims) + self.assertIsNone(features["body-style"].get_shape().ndims) + self.assertIsNone(features["country"].shape.ndims) + + price_data = np.array([11.0, 12.0]) + body_style_data = tf.compat.v1.SparseTensorValue( + indices=((0,), (1,)), values=("sedan", "hardtop"), dense_shape=(2,) + ) + country_data = np.array([["US"], ["CA"]]) + + net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( + features + ) + self.assertEqual(1 + 3 + 2, net.shape[1]) + with _initialized_session() as sess: + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [ + [0.0, 0.0, 1.0, 1.0, 2.0, 11.0], + [1.0, 0.0, 0.0, 11.0, 12.0, 12.0], + ], + sess.run( + net, + feed_dict={ + features["price"]: price_data, + features["body-style"]: body_style_data, + features["country"]: country_data, + }, + ), + ) + + @tf_test_utils.run_deprecated_v1 + def test_with_rank_0_feature(self): + # price has 1 dimension in dense_features + price = tf.feature_column.numeric_column("price") + features = { + "price": tf.constant(0), + } + self.assertEqual(0, features["price"].shape.ndims) + + # Static rank 0 should fail + with self.assertRaisesRegex( + ValueError, "Feature .* cannot have rank 0" + ): + df.DenseFeatures([price])(features) + + # Dynamic rank 0 should fail + features = { + "price": tf.compat.v1.placeholder(tf.float32), + } + net = df.DenseFeatures([price])(features) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError("Feature .* cannot have rank 0"): + sess.run(net, feed_dict={features["price"]: np.array(1)}) class IndicatorColumnTest(tf.test.TestCase): + @tf_test_utils.run_deprecated_v1 + def test_dense_features(self): + animal = tf.feature_column.indicator_column( + tf.feature_column.categorical_column_with_identity( + "animal", num_buckets=4 + ) + ) + with tf.Graph().as_default(): + features = { + "animal": tf.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2] + ) + } + net = df.DenseFeatures([animal])(features) - @tf_test_utils.run_deprecated_v1 - def test_dense_features(self): - animal = tf.feature_column.indicator_column( - tf.feature_column.categorical_column_with_identity( - 'animal', num_buckets=4)) - with tf.Graph().as_default(): - features = { - 'animal': - tf.SparseTensor( - indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) - } - net = df.DenseFeatures([animal])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) - self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net)) + self.assertAllClose([[0.0, 1.0, 1.0, 0.0]], self.evaluate(net)) class EmbeddingColumnTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - { - 'testcase_name': 'use_safe_embedding_lookup', - 'use_safe_embedding_lookup': True, - 'partition_variables': False, - }, { - 'testcase_name': 'dont_use_safe_embedding_lookup', - 'use_safe_embedding_lookup': False, - 'partition_variables': False, - }, { - 'testcase_name': 'use_safe_embedding_lookup_partitioned', - 'use_safe_embedding_lookup': True, - 'partition_variables': True, - }, { - 'testcase_name': 'dont_use_safe_embedding_lookup_partitioned', - 'use_safe_embedding_lookup': False, - 'partition_variables': True, - }) - @tf_test_utils.run_deprecated_v1 - def test_dense_features(self, use_safe_embedding_lookup, partition_variables): - # Inputs. - vocabulary_size = 4 - sparse_input = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.), # id 2 - (9., 13.) # id 3 - ) - - def _initializer(shape, dtype, partition_info=None): - if partition_variables: - self.assertEqual([vocabulary_size, embedding_dimension], - partition_info.full_shape) - self.assertAllEqual((2, embedding_dimension), shape) - else: - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertIsNone(partition_info) - - self.assertEqual(tf.float32, dtype) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), + @parameterized.named_parameters( + { + "testcase_name": "use_safe_embedding_lookup", + "use_safe_embedding_lookup": True, + "partition_variables": False, + }, + { + "testcase_name": "dont_use_safe_embedding_lookup", + "use_safe_embedding_lookup": False, + "partition_variables": False, + }, + { + "testcase_name": "use_safe_embedding_lookup_partitioned", + "use_safe_embedding_lookup": True, + "partition_variables": True, + }, + { + "testcase_name": "dont_use_safe_embedding_lookup_partitioned", + "use_safe_embedding_lookup": False, + "partition_variables": True, + }, ) - - # Build columns. - categorical_column = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - partitioner = None - if partition_variables: - partitioner = tf.compat.v1.fixed_size_partitioner(2, axis=0) - with tf.compat.v1.variable_scope('vars', partitioner=partitioner): - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) - - # Provide sparse input and get dense result. - l = df.DenseFeatures((embedding_column,)) - dense_features = l({'aaa': sparse_input}) - - # Assert expected embedding variable and lookups. - global_vars = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - if partition_variables: - self.assertCountEqual( - ('vars/dense_features/aaa_embedding/embedding_weights/part_0:0', - 'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'), - tuple([v.name for v in global_vars])) - else: - self.assertCountEqual( - ('vars/dense_features/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - for v in global_vars: - self.assertIsInstance(v, tf.Variable) - trainable_vars = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) - if partition_variables: - self.assertCountEqual( - ('vars/dense_features/aaa_embedding/embedding_weights/part_0:0', - 'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'), - tuple([v.name for v in trainable_vars])) - else: - self.assertCountEqual( - ('vars/dense_features/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in trainable_vars])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) - self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) - - if use_safe_embedding_lookup: - self.assertIn( - 'SparseFillEmptyRows', - [x.type for x in tf.compat.v1.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in tf.compat.v1.get_default_graph().get_operations()]) - - @tf_test_utils.run_deprecated_v1 - def test_dense_features_not_trainable(self): - # Inputs. - vocabulary_size = 3 - sparse_input = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(tf.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) - - # Build columns. - categorical_column = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer, - trainable=False) - - # Provide sparse input and get dense result. - dense_features = df.DenseFeatures((embedding_column,))({ - 'aaa': sparse_input - }) - - # Assert expected embedding variable and lookups. - global_vars = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - self.assertCountEqual([], - tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) - self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) + @tf_test_utils.run_deprecated_v1 + def test_dense_features( + self, use_safe_embedding_lookup, partition_variables + ): + # Inputs. + vocabulary_size = 4 + sparse_input = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5), + ) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1.0, 2.0), # id 0 + (3.0, 5.0), # id 1 + (7.0, 11.0), # id 2 + (9.0, 13.0), # id 3 + ) + + def _initializer(shape, dtype, partition_info=None): + if partition_variables: + self.assertEqual( + [vocabulary_size, embedding_dimension], + partition_info.full_shape, + ) + self.assertAllEqual((2, embedding_dimension), shape) + else: + self.assertAllEqual( + (vocabulary_size, embedding_dimension), shape + ) + self.assertIsNone(partition_info) + + self.assertEqual(tf.float32, dtype) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7.0, 11.0), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, + # 3.5] + (2.0, 3.5), + # example 2, ids [], embedding = [0, 0] + (0.0, 0.0), + # example 3, ids [1], embedding = [3, 5] + (3.0, 5.0), + ) + + # Build columns. + categorical_column = tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + partitioner = None + if partition_variables: + partitioner = tf.compat.v1.fixed_size_partitioner(2, axis=0) + with tf.compat.v1.variable_scope("vars", partitioner=partitioner): + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup, + ) + + # Provide sparse input and get dense result. + l = df.DenseFeatures((embedding_column,)) + dense_features = l({"aaa": sparse_input}) + + # Assert expected embedding variable and lookups. + global_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + if partition_variables: + self.assertCountEqual( + ( + "vars/dense_features/aaa_embedding/embedding_weights/" + "part_0:0", + "vars/dense_features/aaa_embedding/embedding_weights/" + "part_1:0", + ), + tuple([v.name for v in global_vars]), + ) + else: + self.assertCountEqual( + ("vars/dense_features/aaa_embedding/embedding_weights:0",), + tuple([v.name for v in global_vars]), + ) + for v in global_vars: + self.assertIsInstance(v, tf.Variable) + trainable_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + if partition_variables: + self.assertCountEqual( + ( + "vars/dense_features/aaa_embedding/embedding_weights/" + "part_0:0", + "vars/dense_features/aaa_embedding/embedding_weights/" + "part_1:0", + ), + tuple([v.name for v in trainable_vars]), + ) + else: + self.assertCountEqual( + ("vars/dense_features/aaa_embedding/embedding_weights:0",), + tuple([v.name for v in trainable_vars]), + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) + self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) + + if use_safe_embedding_lookup: + self.assertIn( + "SparseFillEmptyRows", + [ + x.type + for x in tf.compat.v1.get_default_graph().get_operations() + ], + ) + else: + self.assertNotIn( + "SparseFillEmptyRows", + [ + x.type + for x in tf.compat.v1.get_default_graph().get_operations() + ], + ) + + @tf_test_utils.run_deprecated_v1 + def test_dense_features_not_trainable(self): + # Inputs. + vocabulary_size = 3 + sparse_input = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5), + ) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1.0, 2.0), # id 0 + (3.0, 5.0), # id 1 + (7.0, 11.0), # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(tf.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7.0, 11.0), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, + # 3.5] + (2.0, 3.5), + # example 2, ids [], embedding = [0, 0] + (0.0, 0.0), + # example 3, ids [1], embedding = [3, 5] + (3.0, 5.0), + ) + + # Build columns. + categorical_column = tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + trainable=False, + ) + + # Provide sparse input and get dense result. + dense_features = df.DenseFeatures((embedding_column,))( + {"aaa": sparse_input} + ) + + # Assert expected embedding variable and lookups. + global_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + self.assertCountEqual( + ("dense_features/aaa_embedding/embedding_weights:0",), + tuple([v.name for v in global_vars]), + ) + self.assertCountEqual( + [], + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ), + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) class SharedEmbeddingColumnTest(tf.test.TestCase, parameterized.TestCase): - - def _test_dense_features(self, trainable=True): - # Inputs. - vocabulary_size = 3 - sparse_input_a = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 4)), - values=(2, 0, 1), - dense_shape=(2, 5)) - sparse_input_b = tf.compat.v1.SparseTensorValue( - # example 0, ids [0] - # example 1, ids [] - indices=((0, 0),), - values=(0,), - dense_shape=(2, 5)) - sparse_input_c = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 1), (1, 1), (1, 3)), - values=(2, 0, 1), - dense_shape=(2, 5)) - sparse_input_d = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [] - indices=((0, 1),), - values=(2,), - dense_shape=(2, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 + def _test_dense_features(self, trainable=True): + # Inputs. + vocabulary_size = 3 + sparse_input_a = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 4)), + values=(2, 0, 1), + dense_shape=(2, 5), + ) + sparse_input_b = tf.compat.v1.SparseTensorValue( + # example 0, ids [0] + # example 1, ids [] + indices=((0, 0),), + values=(0,), + dense_shape=(2, 5), + ) + sparse_input_c = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 1), (1, 1), (1, 3)), + values=(2, 0, 1), + dense_shape=(2, 5), + ) + sparse_input_d = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [] + indices=((0, 1),), + values=(2,), + dense_shape=(2, 5), + ) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1.0, 2.0), # id 0 + (3.0, 5.0), # id 1 + (7.0, 11.0), # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(tf.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0: + # A ids [2], embedding = [7, 11] + # B ids [0], embedding = [1, 2] + # C ids [2], embedding = [7, 11] + # D ids [2], embedding = [7, 11] + (7.0, 11.0, 1.0, 2.0, 7.0, 11.0, 7.0, 11.0), + # example 1: + # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + # B ids [], embedding = [0, 0] + # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + # D ids [], embedding = [0, 0] + (2.0, 3.5, 0.0, 0.0, 2.0, 3.5, 0.0, 0.0), + ) + + # Build columns. + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + categorical_column_b = ( + tf.feature_column.categorical_column_with_identity( + key="bbb", num_buckets=vocabulary_size + ) + ) + categorical_column_c = ( + tf.feature_column.categorical_column_with_identity( + key="ccc", num_buckets=vocabulary_size + ) + ) + categorical_column_d = ( + tf.feature_column.categorical_column_with_identity( + key="ddd", num_buckets=vocabulary_size + ) + ) + + ( + embedding_column_a, + embedding_column_b, + ) = tf.feature_column.shared_embeddings( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + trainable=trainable, + ) + ( + embedding_column_c, + embedding_column_d, + ) = tf.feature_column.shared_embeddings( + [categorical_column_c, categorical_column_d], + dimension=embedding_dimension, + initializer=_initializer, + trainable=trainable, + ) + + features = { + "aaa": sparse_input_a, + "bbb": sparse_input_b, + "ccc": sparse_input_c, + "ddd": sparse_input_d, + } + + # Provide sparse input and get dense result. + dense_features = df.DenseFeatures( + feature_columns=( + embedding_column_b, + embedding_column_a, + embedding_column_c, + embedding_column_d, + ) + )(features) + + # Assert expected embedding variable and lookups. + global_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + self.assertCountEqual( + ["aaa_bbb_shared_embedding:0", "ccc_ddd_shared_embedding:0"], + tuple([v.name for v in global_vars]), + ) + for v in global_vars: + self.assertIsInstance(v, tf.Variable) + trainable_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + if trainable: + self.assertCountEqual( + ["aaa_bbb_shared_embedding:0", "ccc_ddd_shared_embedding:0"], + tuple([v.name for v in trainable_vars]), + ) + else: + self.assertCountEqual([], tuple([v.name for v in trainable_vars])) + shared_embedding_vars = global_vars + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllEqual( + embedding_values, self.evaluate(shared_embedding_vars[0]) + ) + self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) + + @tf_test_utils.run_deprecated_v1 + def test_dense_features(self): + self._test_dense_features() + + @tf_test_utils.run_deprecated_v1 + def test_dense_features_no_trainable(self): + self._test_dense_features(trainable=False) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class DenseFeaturesSerializationTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.named_parameters( + ("trainable", True, "trainable"), ("not_trainable", False, "frozen") ) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(tf.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0: - # A ids [2], embedding = [7, 11] - # B ids [0], embedding = [1, 2] - # C ids [2], embedding = [7, 11] - # D ids [2], embedding = [7, 11] - (7., 11., 1., 2., 7., 11., 7., 11.), - # example 1: - # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - # B ids [], embedding = [0, 0] - # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - # D ids [], embedding = [0, 0] - (2., 3.5, 0., 0., 2., 3.5, 0., 0.), + def test_get_config(self, trainable, name): + cols = [ + tf.feature_column.numeric_column("a"), + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + key="b", num_buckets=3 + ), + dimension=2, + ), + ] + orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name) + config = orig_layer.get_config() + + self.assertEqual(config["name"], orig_layer.name) + self.assertEqual(config["trainable"], trainable) + self.assertLen(config["feature_columns"], 2) + self.assertEqual( + config["feature_columns"][0]["class_name"], "NumericColumn" + ) + self.assertEqual(config["feature_columns"][0]["config"]["shape"], (1,)) + self.assertEqual( + config["feature_columns"][1]["class_name"], "EmbeddingColumn" + ) + + @parameterized.named_parameters( + ("trainable", True, "trainable"), ("not_trainable", False, "frozen") ) - - # Build columns. - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = tf.feature_column.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - categorical_column_c = tf.feature_column.categorical_column_with_identity( - key='ccc', num_buckets=vocabulary_size) - categorical_column_d = tf.feature_column.categorical_column_with_identity( - key='ddd', num_buckets=vocabulary_size) - - embedding_column_a, embedding_column_b = tf.feature_column.shared_embeddings( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer, - trainable=trainable) - embedding_column_c, embedding_column_d = tf.feature_column.shared_embeddings( - [categorical_column_c, categorical_column_d], - dimension=embedding_dimension, - initializer=_initializer, - trainable=trainable) - - features = { - 'aaa': sparse_input_a, - 'bbb': sparse_input_b, - 'ccc': sparse_input_c, - 'ddd': sparse_input_d - } - - # Provide sparse input and get dense result. - dense_features = df.DenseFeatures( - feature_columns=(embedding_column_b, embedding_column_a, - embedding_column_c, embedding_column_d))( - features) - - # Assert expected embedding variable and lookups. - global_vars = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual( - ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], - tuple([v.name for v in global_vars])) - for v in global_vars: - self.assertIsInstance(v, tf.Variable) - trainable_vars = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) - if trainable: - self.assertCountEqual( - ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], - tuple([v.name for v in trainable_vars])) - else: - self.assertCountEqual([], tuple([v.name for v in trainable_vars])) - shared_embedding_vars = global_vars - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllEqual(embedding_values, - self.evaluate(shared_embedding_vars[0])) - self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) - - @tf_test_utils.run_deprecated_v1 - def test_dense_features(self): - self._test_dense_features() - - @tf_test_utils.run_deprecated_v1 - def test_dense_features_no_trainable(self): - self._test_dense_features(trainable=False) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class DenseFeaturesSerializationTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters(('trainable', True, 'trainable'), - ('not_trainable', False, 'frozen')) - def test_get_config(self, trainable, name): - cols = [ - tf.feature_column.numeric_column('a'), - tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_identity( - key='b', num_buckets=3), - dimension=2) - ] - orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name) - config = orig_layer.get_config() - - self.assertEqual(config['name'], orig_layer.name) - self.assertEqual(config['trainable'], trainable) - self.assertLen(config['feature_columns'], 2) - self.assertEqual(config['feature_columns'][0]['class_name'], - 'NumericColumn') - self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,)) - self.assertEqual(config['feature_columns'][1]['class_name'], - 'EmbeddingColumn') - - @parameterized.named_parameters(('trainable', True, 'trainable'), - ('not_trainable', False, 'frozen')) - def test_from_config(self, trainable, name): - cols = [ - tf.feature_column.numeric_column('a'), - tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_vocabulary_list( - 'b', vocabulary_list=['1', '2', '3']), - dimension=2), - tf.feature_column.indicator_column( - tf.feature_column.categorical_column_with_hash_bucket( - key='c', hash_bucket_size=3)) - ] - orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name) - config = orig_layer.get_config() - - new_layer = df.DenseFeatures.from_config(config) - - self.assertEqual(new_layer.name, orig_layer.name) - self.assertEqual(new_layer.trainable, trainable) - self.assertLen(new_layer._feature_columns, 3) - self.assertEqual(new_layer._feature_columns[0].name, 'a') - self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0) - self.assertEqual(new_layer._feature_columns[1].categorical_column.name, 'b') - self.assertIsInstance(new_layer._feature_columns[0], cols[0].__class__) - self.assertIsInstance(new_layer._feature_columns[1], cols[1].__class__) - self.assertIsInstance(new_layer._feature_columns[2], cols[2].__class__) - - def test_crossed_column(self): - a = tf.feature_column.categorical_column_with_vocabulary_list( - 'a', vocabulary_list=['1', '2', '3']) - b = tf.feature_column.categorical_column_with_vocabulary_list( - 'b', vocabulary_list=['1', '2', '3']) - ab = tf.feature_column.crossed_column([a, b], hash_bucket_size=2) - cols = [tf.feature_column.indicator_column(ab)] - - orig_layer = df.DenseFeatures(cols) - config = orig_layer.get_config() - - new_layer = df.DenseFeatures.from_config(config) - - self.assertLen(new_layer._feature_columns, 1) - self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator') - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_from_config(self, trainable, name): + cols = [ + tf.feature_column.numeric_column("a"), + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_vocabulary_list( + "b", vocabulary_list=["1", "2", "3"] + ), + dimension=2, + ), + tf.feature_column.indicator_column( + tf.feature_column.categorical_column_with_hash_bucket( + key="c", hash_bucket_size=3 + ) + ), + ] + orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name) + config = orig_layer.get_config() + + new_layer = df.DenseFeatures.from_config(config) + + self.assertEqual(new_layer.name, orig_layer.name) + self.assertEqual(new_layer.trainable, trainable) + self.assertLen(new_layer._feature_columns, 3) + self.assertEqual(new_layer._feature_columns[0].name, "a") + self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0) + self.assertEqual( + new_layer._feature_columns[1].categorical_column.name, "b" + ) + self.assertIsInstance(new_layer._feature_columns[0], cols[0].__class__) + self.assertIsInstance(new_layer._feature_columns[1], cols[1].__class__) + self.assertIsInstance(new_layer._feature_columns[2], cols[2].__class__) + + def test_crossed_column(self): + a = tf.feature_column.categorical_column_with_vocabulary_list( + "a", vocabulary_list=["1", "2", "3"] + ) + b = tf.feature_column.categorical_column_with_vocabulary_list( + "b", vocabulary_list=["1", "2", "3"] + ) + ab = tf.feature_column.crossed_column([a, b], hash_bucket_size=2) + cols = [tf.feature_column.indicator_column(ab)] + + orig_layer = df.DenseFeatures(cols) + config = orig_layer.get_config() + + new_layer = df.DenseFeatures.from_config(config) + + self.assertLen(new_layer._feature_columns, 1) + self.assertEqual(new_layer._feature_columns[0].name, "a_X_b_indicator") + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SequenceFeatureColumnsTest(tf.test.TestCase): - """Tests DenseFeatures with sequence feature columns.""" - - def test_embedding_column(self): - """Tests that error is raised for sequence embedding column.""" - vocabulary_size = 3 - sparse_input = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column_a = tf.feature_column.embedding_column( - categorical_column_a, dimension=2) - - input_layer = df.DenseFeatures([embedding_column_a]) - with self.assertRaisesRegex( - ValueError, - r'In embedding_column: aaa_embedding\. categorical_column must not be ' - r'of type SequenceCategoricalColumn\.'): - _ = input_layer({'aaa': sparse_input}) - - def test_indicator_column(self): - """Tests that error is raised for sequence indicator column.""" - vocabulary_size = 3 - sparse_input = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - indicator_column_a = tf.feature_column.indicator_column( - categorical_column_a) - - input_layer = df.DenseFeatures([indicator_column_a]) - with self.assertRaisesRegex( - ValueError, - r'In indicator_column: aaa_indicator\. categorical_column must not be ' - r'of type SequenceCategoricalColumn\.'): - _ = input_layer({'aaa': sparse_input}) - - -if __name__ == '__main__': - tf.test.main() + """Tests DenseFeatures with sequence feature columns.""" + + def test_embedding_column(self): + """Tests that error is raised for sequence embedding column.""" + vocabulary_size = 3 + sparse_input = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + + categorical_column_a = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + embedding_column_a = tf.feature_column.embedding_column( + categorical_column_a, dimension=2 + ) + + input_layer = df.DenseFeatures([embedding_column_a]) + with self.assertRaisesRegex( + ValueError, + r"In embedding_column: aaa_embedding\. categorical_column must not " + r"be of type SequenceCategoricalColumn\.", + ): + _ = input_layer({"aaa": sparse_input}) + + def test_indicator_column(self): + """Tests that error is raised for sequence indicator column.""" + vocabulary_size = 3 + sparse_input = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + + categorical_column_a = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + indicator_column_a = tf.feature_column.indicator_column( + categorical_column_a + ) + + input_layer = df.DenseFeatures([indicator_column_a]) + with self.assertRaisesRegex( + ValueError, + r"In indicator_column: aaa_indicator\. categorical_column must not " + r"be of type SequenceCategoricalColumn\.", + ): + _ = input_layer({"aaa": sparse_input}) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/feature_column/dense_features_v2.py b/keras/feature_column/dense_features_v2.py index 16259f78125a..f731d7163a94 100644 --- a/keras/feature_column/dense_features_v2.py +++ b/keras/feature_column/dense_features_v2.py @@ -19,140 +19,146 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.feature_column import base_feature_layer as kfc from keras.feature_column import dense_features from keras.utils import tf_contextlib + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.DenseFeatures', v1=[]) +@keras_export("keras.layers.DenseFeatures", v1=[]) class DenseFeatures(dense_features.DenseFeatures): - """A layer that produces a dense `Tensor` based on given `feature_columns`. - - Generally a single example in training data is described with FeatureColumns. - At the first layer of the model, this column oriented data should be converted - to a single `Tensor`. - - This layer can be called multiple times with different features. - - This is the V2 version of this layer that uses name_scopes to create - variables instead of variable_scopes. But this approach currently lacks - support for partitioned variables. In that case, use the V1 version instead. - - Example: - - ```python - price = tf.feature_column.numeric_column('price') - keywords_embedded = tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_hash_bucket("keywords", 10000), - dimensions=16) - columns = [price, keywords_embedded, ...] - feature_layer = tf.keras.layers.DenseFeatures(columns) - - features = tf.io.parse_example( - ..., features=tf.feature_column.make_parse_example_spec(columns)) - dense_tensor = feature_layer(features) - for units in [128, 64, 32]: - dense_tensor = tf.keras.layers.Dense(units, activation='relu')(dense_tensor) - prediction = tf.keras.layers.Dense(1)(dense_tensor) - ``` - """ - - def __init__(self, - feature_columns, - trainable=True, - name=None, - **kwargs): - """Creates a DenseFeatures object. - - Args: - feature_columns: An iterable containing the FeatureColumns to use as - inputs to your model. All items should be instances of classes derived - from `DenseColumn` such as `numeric_column`, `embedding_column`, - `bucketized_column`, `indicator_column`. If you have categorical - features, you can wrap them with an `embedding_column` or - `indicator_column`. - trainable: Boolean, whether the layer's variables will be updated via - gradient descent during training. - name: Name to give to the DenseFeatures. - **kwargs: Keyword arguments to construct a layer. - - Raises: - ValueError: if an item in `feature_columns` is not a `DenseColumn`. + """A layer that produces a dense `Tensor` based on given `feature_columns`. + + Generally a single example in training data is described with + FeatureColumns. At the first layer of the model, this column oriented data + should be converted to a single `Tensor`. + + This layer can be called multiple times with different features. + + This is the V2 version of this layer that uses name_scopes to create + variables instead of variable_scopes. But this approach currently lacks + support for partitioned variables. In that case, use the V1 version instead. + + Example: + + ```python + price = tf.feature_column.numeric_column('price') + keywords_embedded = tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_hash_bucket("keywords", + 10000), + dimensions=16) + columns = [price, keywords_embedded, ...] + feature_layer = tf.keras.layers.DenseFeatures(columns) + + features = tf.io.parse_example( + ..., features=tf.feature_column.make_parse_example_spec(columns)) + dense_tensor = feature_layer(features) + for units in [128, 64, 32]: + dense_tensor = tf.keras.layers.Dense(units, activation='relu')( + dense_tensor) + prediction = tf.keras.layers.Dense(1)(dense_tensor) + ``` """ - super().__init__( - feature_columns=feature_columns, - trainable=trainable, - name=name, - **kwargs) - self._state_manager = _StateManagerImplV2(self, self.trainable) - - def build(self, _): - for column in self._feature_columns: - with tf.name_scope(column.name): - column.create_state(self._state_manager) - # We would like to call Layer.build and not _DenseFeaturesHelper.build. - # pylint: disable=protected-access - super(kfc._BaseFeaturesLayer, self).build(None) # pylint: disable=bad-super-call - - -class _StateManagerImplV2(tf.__internal__.feature_column.StateManager): # pylint: disable=protected-access - """Manages the state of DenseFeatures.""" - - def create_variable(self, - feature_column, - name, - shape, - dtype=None, - trainable=True, - use_resource=True, - initializer=None): - if name in self._cols_to_vars_map[feature_column]: - raise ValueError('Variable already exists.') - - # We explicitly track these variables since `name` is not guaranteed to be - # unique and disable manual tracking that the add_weight call does. - with no_manual_dependency_tracking_scope(self._layer): - var = self._layer.add_weight( - name=name, - shape=shape, - dtype=dtype, - initializer=initializer, - trainable=self._trainable and trainable, - use_resource=use_resource) - if isinstance(var, tf.__internal__.tracking.Trackable): - self._layer._track_trackable(var, feature_column.name + '/' + name) # pylint: disable=protected-access - self._cols_to_vars_map[feature_column][name] = var - return var + + def __init__(self, feature_columns, trainable=True, name=None, **kwargs): + """Creates a DenseFeatures object. + + Args: + feature_columns: An iterable containing the FeatureColumns to use as + inputs to your model. All items should be instances of classes + derived from `DenseColumn` such as `numeric_column`, + `embedding_column`, `bucketized_column`, `indicator_column`. If you + have categorical features, you can wrap them with an + `embedding_column` or `indicator_column`. + trainable: Boolean, whether the layer's variables will be updated via + gradient descent during training. + name: Name to give to the DenseFeatures. + **kwargs: Keyword arguments to construct a layer. + + Raises: + ValueError: if an item in `feature_columns` is not a `DenseColumn`. + """ + super().__init__( + feature_columns=feature_columns, + trainable=trainable, + name=name, + **kwargs + ) + self._state_manager = _StateManagerImplV2(self, self.trainable) + + def build(self, _): + for column in self._feature_columns: + with tf.name_scope(column.name): + column.create_state(self._state_manager) + # We would like to call Layer.build and not _DenseFeaturesHelper.build. + + super(kfc._BaseFeaturesLayer, self).build(None) + + +class _StateManagerImplV2(tf.__internal__.feature_column.StateManager): + """Manages the state of DenseFeatures.""" + + def create_variable( + self, + feature_column, + name, + shape, + dtype=None, + trainable=True, + use_resource=True, + initializer=None, + ): + if name in self._cols_to_vars_map[feature_column]: + raise ValueError("Variable already exists.") + + # We explicitly track these variables since `name` is not guaranteed to + # be unique and disable manual tracking that the add_weight call does. + with no_manual_dependency_tracking_scope(self._layer): + var = self._layer.add_weight( + name=name, + shape=shape, + dtype=dtype, + initializer=initializer, + trainable=self._trainable and trainable, + use_resource=use_resource, + ) + if isinstance(var, tf.__internal__.tracking.Trackable): + self._layer._track_trackable(var, feature_column.name + "/" + name) + self._cols_to_vars_map[feature_column][name] = var + return var @tf_contextlib.contextmanager def no_manual_dependency_tracking_scope(obj): - """A context that disables manual dependency tracking for the given `obj`. - - Sometimes library methods might track objects on their own and we might want - to disable that and do the tracking on our own. One can then use this context - manager to disable the tracking the library method does and do your own - tracking. - - For example: - - class TestLayer(tf.keras.Layer): - def build(): - with no_manual_dependency_tracking_scope(self): - var = self.add_weight("name1") # Creates a var and doesn't track it - self._track_trackable("name2", var) # We track variable with name `name2` - - Args: - obj: A trackable object. - - Yields: - a scope in which the object doesn't track dependencies manually. - """ - # pylint: disable=protected-access - previous_value = getattr(obj, '_manual_tracking', True) - obj._manual_tracking = False - try: - yield - finally: - obj._manual_tracking = previous_value + """A context that disables manual dependency tracking for the given `obj`. + + Sometimes library methods might track objects on their own and we might want + to disable that and do the tracking on our own. One can then use this + context manager to disable the tracking the library method does and do your + own tracking. + + For example: + + class TestLayer(tf.keras.Layer): + def build(): + with no_manual_dependency_tracking_scope(self): + var = self.add_weight("name1") # Creates a var and doesn't track it + # We track variable with name `name2` + self._track_trackable("name2", var) + + Args: + obj: A trackable object. + + Yields: + a scope in which the object doesn't track dependencies manually. + """ + + previous_value = getattr(obj, "_manual_tracking", True) + obj._manual_tracking = False + try: + yield + finally: + obj._manual_tracking = previous_value diff --git a/keras/feature_column/dense_features_v2_test.py b/keras/feature_column/dense_features_v2_test.py index d0b2ab342075..d984fced6ba8 100644 --- a/keras/feature_column/dense_features_v2_test.py +++ b/keras/feature_column/dense_features_v2_test.py @@ -18,638 +18,790 @@ from __future__ import division from __future__ import print_function +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np -from tensorflow.python.eager import backprop -from keras.testing_infra import test_combinations from keras.feature_column import dense_features_v2 as df +from keras.testing_infra import test_combinations + +# isort: off +from tensorflow.python.eager import backprop def _initialized_session(config=None): - sess = tf.compat.v1.Session(config=config) - sess.run(tf.compat.v1.global_variables_initializer()) - sess.run(tf.compat.v1.tables_initializer()) - return sess + sess = tf.compat.v1.Session(config=config) + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.tables_initializer()) + return sess class DenseFeaturesTest(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_retrieving_input(self): - features = {'a': [0.]} - dense_features = df.DenseFeatures(tf.feature_column.numeric_column('a')) - inputs = self.evaluate(dense_features(features)) - self.assertAllClose([[0.]], inputs) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_reuses_variables(self): - sparse_input = tf.SparseTensor( - indices=((0, 0), (1, 0), (2, 0)), - values=(0, 1, 2), - dense_shape=(3, 3)) - - # Create feature columns (categorical and embedding). - categorical_column = tf.feature_column.categorical_column_with_identity( - key='a', num_buckets=3) - embedding_dimension = 2 - - def _embedding_column_initializer(shape, dtype, partition_info=None): - del shape # unused - del dtype # unused - del partition_info # unused - embedding_values = ( - (1, 0), # id 0 - (0, 1), # id 1 - (1, 1)) # id 2 - return embedding_values - - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_embedding_column_initializer) - - dense_features = df.DenseFeatures([embedding_column]) - features = {'a': sparse_input} - - inputs = dense_features(features) - variables = dense_features.variables - - # Sanity check: test that the inputs are correct. - self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) - - # Check that only one variable was created. - self.assertEqual(1, len(variables)) - - # Check that invoking dense_features on the same features does not create - # additional variables - _ = dense_features(features) - self.assertEqual(1, len(variables)) - self.assertIs(variables[0], dense_features.variables[0]) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_feature_column_dense_features_gradient(self): - sparse_input = tf.SparseTensor( - indices=((0, 0), (1, 0), (2, 0)), - values=(0, 1, 2), - dense_shape=(3, 3)) - - # Create feature columns (categorical and embedding). - categorical_column = tf.feature_column.categorical_column_with_identity( - key='a', num_buckets=3) - embedding_dimension = 2 - - def _embedding_column_initializer(shape, dtype, partition_info=None): - del shape # unused - del dtype # unused - del partition_info # unused - embedding_values = ( - (1, 0), # id 0 - (0, 1), # id 1 - (1, 1)) # id 2 - return embedding_values - - embedding_column = tf.feature_column.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_embedding_column_initializer) - - dense_features = df.DenseFeatures([embedding_column]) - features = {'a': sparse_input} - - def scale_matrix(): - matrix = dense_features(features) - return 2 * matrix - - # Sanity check: Verify that scale_matrix returns the correct output. - self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix()) - - # Check that the returned gradient is correct. - grad_function = backprop.implicit_grad(scale_matrix) - grads_and_vars = grad_function() - indexed_slice = grads_and_vars[0][0] - gradient = grads_and_vars[0][0].values - - self.assertAllEqual([0, 1, 2], indexed_slice.indices) - self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient) - - def test_dense_feature_with_training_arg(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2') - - # Monkey patch the second numeric column to simulate a column that has - # different behavior by mode. - def training_aware_get_dense_tensor(transformation_cache, - state_manager, - training=None): - return transformation_cache.get(price2, state_manager, training=training) - - def training_aware_transform_feature(transformation_cache, - state_manager, - training=None): - input_tensor = transformation_cache.get( - price2.key, state_manager, training=training) - if training: - return input_tensor * 10.0 - else: - return input_tensor * 20.0 - - price2.get_dense_tensor = training_aware_get_dense_tensor - price2.transform_feature = training_aware_transform_feature - with tf.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - train_mode = df.DenseFeatures([price1, price2])(features, training=True) - predict_mode = df.DenseFeatures([price1, price2 - ])(features, training=False) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2., 30.], [5., 6., 40.]], - self.evaluate(train_mode)) - self.assertAllClose([[1., 2., 60.], [5., 6., 80.]], - self.evaluate(predict_mode)) - - def test_raises_if_empty_feature_columns(self): - with self.assertRaisesRegex(ValueError, - 'feature_columns must not be empty'): - df.DenseFeatures(feature_columns=[])(features={}) - - def test_should_be_dense_column(self): - with self.assertRaisesRegex(ValueError, 'must be a .*DenseColumn'): - df.DenseFeatures(feature_columns=[ - tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) - ])( - features={ - 'a': [[0]] - }) - - def test_does_not_support_dict_columns(self): - with self.assertRaisesRegex( - ValueError, 'Expected feature_columns to be iterable, found dict.'): - df.DenseFeatures(feature_columns={'a': tf.feature_column.numeric_column('a')})( - features={ - 'a': [[0]] - }) - - def test_bare_column(self): - with tf.Graph().as_default(): - features = features = {'a': [0.]} - net = df.DenseFeatures(tf.feature_column.numeric_column('a'))(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[0.]], self.evaluate(net)) - - def test_column_generator(self): - with tf.Graph().as_default(): - features = features = {'a': [0.], 'b': [1.]} - columns = (tf.feature_column.numeric_column(key) for key in features) - net = df.DenseFeatures(columns)(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[0., 1.]], self.evaluate(net)) - - def test_raises_if_duplicate_name(self): - with self.assertRaisesRegex( - ValueError, 'Duplicate feature column name found for columns'): - df.DenseFeatures( - feature_columns=[tf.feature_column.numeric_column('a'), - tf.feature_column.numeric_column('a')])( - features={ - 'a': [[0]] - }) - - def test_one_column(self): - price = tf.feature_column.numeric_column('price') - with tf.Graph().as_default(): - features = {'price': [[1.], [5.]]} - net = df.DenseFeatures([price])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1.], [5.]], self.evaluate(net)) - - def test_multi_dimension(self): - price = tf.feature_column.numeric_column('price', shape=2) - with tf.Graph().as_default(): - features = {'price': [[1., 2.], [5., 6.]]} - net = df.DenseFeatures([price])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net)) - - def test_compute_output_shape(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2', shape=4) - with tf.Graph().as_default(): - features = { - 'price1': [[1., 2.], [5., 6.]], - 'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]] - } - dense_features = df.DenseFeatures([price1, price2]) - self.assertEqual((None, 6), dense_features.compute_output_shape((None,))) - net = dense_features(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], - self.evaluate(net)) - - def test_raises_if_shape_mismatch(self): - price = tf.feature_column.numeric_column('price', shape=2) - with tf.Graph().as_default(): - features = {'price': [[1.], [5.]]} - with self.assertRaisesRegex( - Exception, - r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - df.DenseFeatures([price])(features) - - def test_reshaping(self): - price = tf.feature_column.numeric_column('price', shape=[1, 2]) - with tf.Graph().as_default(): - features = {'price': [[[1., 2.]], [[5., 6.]]]} - net = df.DenseFeatures([price])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net)) - - def test_multi_column(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - net = df.DenseFeatures([price1, price2])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net)) - - def test_cols_to_output_tensors(self): - price1 = tf.feature_column.numeric_column('price1', shape=2) - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - cols_dict = {} - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - dense_features = df.DenseFeatures([price1, price2]) - net = dense_features(features, cols_dict) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 2.], [5., 6.]], - self.evaluate(cols_dict[price1])) - self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2])) - self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net)) - - def test_column_order(self): - price_a = tf.feature_column.numeric_column('price_a') - price_b = tf.feature_column.numeric_column('price_b') - with tf.Graph().as_default(): - features = { - 'price_a': [[1.]], - 'price_b': [[3.]], - } - net1 = df.DenseFeatures([price_a, price_b])(features) - net2 = df.DenseFeatures([price_b, price_a])(features) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[1., 3.]], self.evaluate(net1)) - self.assertAllClose([[1., 3.]], self.evaluate(net2)) - - def test_fails_for_categorical_column(self): - animal = tf.feature_column.categorical_column_with_identity('animal', num_buckets=4) - with tf.Graph().as_default(): - features = { - 'animal': - tf.SparseTensor( - indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) - } - with self.assertRaisesRegex(Exception, 'must be a .*DenseColumn'): - df.DenseFeatures([animal])(features) - - def test_static_batch_size_mismatch(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = { - 'price1': [[1.], [5.], [7.]], # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - with self.assertRaisesRegex( - ValueError, - r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - df.DenseFeatures([price1, price2])(features) - - def test_subset_of_static_batch_size_mismatch(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - price3 = tf.feature_column.numeric_column('price3') - with tf.Graph().as_default(): - features = { - 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 3 - 'price2': [[3.], [4.]], # batchsize = 2 - 'price3': [[3.], [4.], [5.]] # batchsize = 3 - } - with self.assertRaisesRegex( - ValueError, - r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - df.DenseFeatures([price1, price2, price3])(features) - - def test_runtime_batch_size_mismatch(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = { - 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - net = df.DenseFeatures([price1, price2])(features) - with _initialized_session() as sess: - with self.assertRaisesRegex(tf.errors.OpError, - 'Dimension 0 in both shapes must be equal|' - 'Dimensions of inputs should match'): - sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]}) - - def test_runtime_batch_size_matches(self): - price1 = tf.feature_column.numeric_column('price1') - price2 = tf.feature_column.numeric_column('price2') - with tf.Graph().as_default(): - features = { - 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 2 - 'price2': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 2 - } - net = df.DenseFeatures([price1, price2])(features) - with _initialized_session() as sess: - sess.run( - net, - feed_dict={ - features['price1']: [[1.], [5.]], - features['price2']: [[1.], [5.]], - }) - - def test_multiple_layers_with_same_embedding_column(self): - some_sparse_column = tf.feature_column.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) - some_embedding_column = tf.feature_column.embedding_column( - some_sparse_column, dimension=10) - - with tf.Graph().as_default(): - features = { - 'sparse_feature': [['a'], ['x']], - } - all_cols = [some_embedding_column] - df.DenseFeatures(all_cols)(features) - df.DenseFeatures(all_cols)(features) - # Make sure that 2 variables get created in this case. - self.assertEqual(2, - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - expected_var_names = [ - 'dense_features/sparse_feature_embedding/embedding_weights:0', - 'dense_features_1/sparse_feature_embedding/embedding_weights:0' - ] - self.assertItemsEqual( - expected_var_names, - [v.name for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)]) - - def test_multiple_layers_with_same_shared_embedding_column(self): - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = tf.feature_column.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - - # feature_column.shared_embeddings is not supported in eager. - with tf.Graph().as_default(): - embedding_column_b, embedding_column_a = tf.feature_column.shared_embeddings( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - features = { - 'aaa': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - all_cols = [embedding_column_a, embedding_column_b] - df.DenseFeatures(all_cols)(features) - df.DenseFeatures(all_cols)(features) - # Make sure that only 1 variable gets created in this case. - self.assertEqual(1, - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - self.assertItemsEqual( - ['aaa_bbb_shared_embedding:0'], - [v.name for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)]) - - def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = tf.feature_column.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - - # feature_column.shared_embeddings is not supported in eager. - with tf.Graph().as_default(): - embedding_column_b, embedding_column_a = tf.feature_column.shared_embeddings( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - all_cols = [embedding_column_a, embedding_column_b] - features = { - 'aaa': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - df.DenseFeatures(all_cols)(features) - # Make sure that only 1 variable gets created in this case. - self.assertEqual(1, - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - - with tf.Graph().as_default(): - features1 = { - 'aaa': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - tf.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - - df.DenseFeatures(all_cols)(features1) - # Make sure that only 1 variable gets created in this case. - self.assertEqual(1, - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) - self.assertItemsEqual( - ['aaa_bbb_shared_embedding:0'], - [v.name for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)]) - - def test_with_1d_sparse_tensor(self): - embedding_values = ( - (1., 2., 3., 4., 5.), # id 0 - (6., 7., 8., 9., 10.), # id 1 - (11., 12., 13., 14., 15.) # id 2 + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) ) - - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values - - # price has 1 dimension in dense_features - price = tf.feature_column.numeric_column('price') - - # one_hot_body_style has 3 dims in dense_features. - body_style = tf.feature_column.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - one_hot_body_style = tf.feature_column.indicator_column(body_style) - - # embedded_body_style has 5 dims in dense_features. - country = tf.feature_column.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - embedded_country = tf.feature_column.embedding_column( - country, dimension=5, initializer=_initializer) - - with tf.Graph().as_default(): - # Provides 1-dim tensor and dense tensor. - features = { - 'price': - tf.constant([ - 11., - 12., - ]), - 'body-style': - tf.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - # This is dense tensor for the categorical_column. - 'country': - tf.constant(['CA', 'US']), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - self.assertEqual(1, features['country'].shape.ndims) - - net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( - features) - self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: - - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.], - [1., 0., 0., 1., 2., 3., 4., 5., 12.]], - sess.run(net)) - - def test_with_1d_unknown_shape_sparse_tensor(self): - embedding_values = ( - (1., 2.), # id 0 - (6., 7.), # id 1 - (11., 12.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values - - # price has 1 dimension in dense_features - price = tf.feature_column.numeric_column('price') - - # one_hot_body_style has 3 dims in dense_features. - body_style = tf.feature_column.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - one_hot_body_style = tf.feature_column.indicator_column(body_style) - - # embedded_body_style has 5 dims in dense_features. - country = tf.feature_column.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - embedded_country = tf.feature_column.embedding_column( - country, dimension=2, initializer=_initializer) - - # Provides 1-dim tensor and dense tensor. - with tf.Graph().as_default(): - features = { - 'price': tf.compat.v1.placeholder(tf.float32), - 'body-style': tf.compat.v1.sparse_placeholder(tf.string), - # This is dense tensor for the categorical_column. - 'country': tf.compat.v1.placeholder(tf.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) - self.assertIsNone(features['country'].shape.ndims) - - price_data = np.array([11., 12.]) - body_style_data = tf.compat.v1.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) - country_data = np.array([['US'], ['CA']]) - - net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( - features) - self.assertEqual(1 + 3 + 2, net.shape[1]) - with _initialized_session() as sess: - - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) - - def test_with_rank_0_feature(self): - # price has 1 dimension in dense_features - price = tf.feature_column.numeric_column('price') - features = { - 'price': tf.constant(0), - } - self.assertEqual(0, features['price'].shape.ndims) - - # Static rank 0 should fail - with self.assertRaisesRegex(ValueError, 'Feature .* cannot have rank 0'): - df.DenseFeatures([price])(features) - - with tf.Graph().as_default(): - # Dynamic rank 0 should fail - features = { - 'price': tf.compat.v1.placeholder(tf.float32), - } - net = df.DenseFeatures([price])(features) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) - - -if __name__ == '__main__': - tf.test.main() + def test_retrieving_input(self): + features = {"a": [0.0]} + dense_features = df.DenseFeatures(tf.feature_column.numeric_column("a")) + inputs = self.evaluate(dense_features(features)) + self.assertAllClose([[0.0]], inputs) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_reuses_variables(self): + sparse_input = tf.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3), + ) + + # Create feature columns (categorical and embedding). + categorical_column = tf.feature_column.categorical_column_with_identity( + key="a", num_buckets=3 + ) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info=None): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ((1, 0), (0, 1), (1, 1)) # id 0 # id 1 # id 2 + return embedding_values + + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer, + ) + + dense_features = df.DenseFeatures([embedding_column]) + features = {"a": sparse_input} + + inputs = dense_features(features) + variables = dense_features.variables + + # Sanity check: test that the inputs are correct. + self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) + + # Check that only one variable was created. + self.assertEqual(1, len(variables)) + + # Check that invoking dense_features on the same features does not + # create additional variables + _ = dense_features(features) + self.assertEqual(1, len(variables)) + self.assertIs(variables[0], dense_features.variables[0]) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_feature_column_dense_features_gradient(self): + sparse_input = tf.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3), + ) + + # Create feature columns (categorical and embedding). + categorical_column = tf.feature_column.categorical_column_with_identity( + key="a", num_buckets=3 + ) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info=None): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ((1, 0), (0, 1), (1, 1)) # id 0 # id 1 # id 2 + return embedding_values + + embedding_column = tf.feature_column.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer, + ) + + dense_features = df.DenseFeatures([embedding_column]) + features = {"a": sparse_input} + + def scale_matrix(): + matrix = dense_features(features) + return 2 * matrix + + # Sanity check: Verify that scale_matrix returns the correct output. + self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix()) + + # Check that the returned gradient is correct. + grad_function = backprop.implicit_grad(scale_matrix) + grads_and_vars = grad_function() + indexed_slice = grads_and_vars[0][0] + gradient = grads_and_vars[0][0].values + + self.assertAllEqual([0, 1, 2], indexed_slice.indices) + self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient) + + def test_dense_feature_with_training_arg(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2") + + # Monkey patch the second numeric column to simulate a column that has + # different behavior by mode. + def training_aware_get_dense_tensor( + transformation_cache, state_manager, training=None + ): + return transformation_cache.get( + price2, state_manager, training=training + ) + + def training_aware_transform_feature( + transformation_cache, state_manager, training=None + ): + input_tensor = transformation_cache.get( + price2.key, state_manager, training=training + ) + if training: + return input_tensor * 10.0 + else: + return input_tensor * 20.0 + + price2.get_dense_tensor = training_aware_get_dense_tensor + price2.transform_feature = training_aware_transform_feature + with tf.Graph().as_default(): + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0], [4.0]], + } + train_mode = df.DenseFeatures([price1, price2])( + features, training=True + ) + predict_mode = df.DenseFeatures([price1, price2])( + features, training=False + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [[1.0, 2.0, 30.0], [5.0, 6.0, 40.0]], self.evaluate(train_mode) + ) + self.assertAllClose( + [[1.0, 2.0, 60.0], [5.0, 6.0, 80.0]], + self.evaluate(predict_mode), + ) + + def test_raises_if_empty_feature_columns(self): + with self.assertRaisesRegex( + ValueError, "feature_columns must not be empty" + ): + df.DenseFeatures(feature_columns=[])(features={}) + + def test_should_be_dense_column(self): + with self.assertRaisesRegex(ValueError, "must be a .*DenseColumn"): + df.DenseFeatures( + feature_columns=[ + tf.feature_column.categorical_column_with_hash_bucket( + "wire_cast", 4 + ) + ] + )(features={"a": [[0]]}) + + def test_does_not_support_dict_columns(self): + with self.assertRaisesRegex( + ValueError, "Expected feature_columns to be iterable, found dict." + ): + df.DenseFeatures( + feature_columns={"a": tf.feature_column.numeric_column("a")} + )(features={"a": [[0]]}) + + def test_bare_column(self): + with tf.Graph().as_default(): + features = features = {"a": [0.0]} + net = df.DenseFeatures(tf.feature_column.numeric_column("a"))( + features + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[0.0]], self.evaluate(net)) + + def test_column_generator(self): + with tf.Graph().as_default(): + features = features = {"a": [0.0], "b": [1.0]} + columns = ( + tf.feature_column.numeric_column(key) for key in features + ) + net = df.DenseFeatures(columns)(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[0.0, 1.0]], self.evaluate(net)) + + def test_raises_if_duplicate_name(self): + with self.assertRaisesRegex( + ValueError, "Duplicate feature column name found for columns" + ): + df.DenseFeatures( + feature_columns=[ + tf.feature_column.numeric_column("a"), + tf.feature_column.numeric_column("a"), + ] + )(features={"a": [[0]]}) + + def test_one_column(self): + price = tf.feature_column.numeric_column("price") + with tf.Graph().as_default(): + features = {"price": [[1.0], [5.0]]} + net = df.DenseFeatures([price])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0], [5.0]], self.evaluate(net)) + + def test_multi_dimension(self): + price = tf.feature_column.numeric_column("price", shape=2) + with tf.Graph().as_default(): + features = {"price": [[1.0, 2.0], [5.0, 6.0]]} + net = df.DenseFeatures([price])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0, 2.0], [5.0, 6.0]], self.evaluate(net)) + + def test_compute_output_shape(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2", shape=4) + with tf.Graph().as_default(): + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 9.0, 10.0]], + } + dense_features = df.DenseFeatures([price1, price2]) + self.assertEqual( + (None, 6), dense_features.compute_output_shape((None,)) + ) + net = dense_features(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], + [5.0, 6.0, 7.0, 8.0, 9.0, 10.0], + ], + self.evaluate(net), + ) + + def test_raises_if_shape_mismatch(self): + price = tf.feature_column.numeric_column("price", shape=2) + with tf.Graph().as_default(): + features = {"price": [[1.0], [5.0]]} + with self.assertRaisesRegex( + Exception, + r"Cannot reshape a tensor with 2 elements to shape \[2,2\]", + ): + df.DenseFeatures([price])(features) + + def test_reshaping(self): + price = tf.feature_column.numeric_column("price", shape=[1, 2]) + with tf.Graph().as_default(): + features = {"price": [[[1.0, 2.0]], [[5.0, 6.0]]]} + net = df.DenseFeatures([price])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0, 2.0], [5.0, 6.0]], self.evaluate(net)) + + def test_multi_column(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0], [4.0]], + } + net = df.DenseFeatures([price1, price2])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [[1.0, 2.0, 3.0], [5.0, 6.0, 4.0]], self.evaluate(net) + ) + + def test_cols_to_output_tensors(self): + price1 = tf.feature_column.numeric_column("price1", shape=2) + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + cols_dict = {} + features = { + "price1": [[1.0, 2.0], [5.0, 6.0]], + "price2": [[3.0], [4.0]], + } + dense_features = df.DenseFeatures([price1, price2]) + net = dense_features(features, cols_dict) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose( + [[1.0, 2.0], [5.0, 6.0]], self.evaluate(cols_dict[price1]) + ) + self.assertAllClose( + [[3.0], [4.0]], self.evaluate(cols_dict[price2]) + ) + self.assertAllClose( + [[1.0, 2.0, 3.0], [5.0, 6.0, 4.0]], self.evaluate(net) + ) + + def test_column_order(self): + price_a = tf.feature_column.numeric_column("price_a") + price_b = tf.feature_column.numeric_column("price_b") + with tf.Graph().as_default(): + features = { + "price_a": [[1.0]], + "price_b": [[3.0]], + } + net1 = df.DenseFeatures([price_a, price_b])(features) + net2 = df.DenseFeatures([price_b, price_a])(features) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertAllClose([[1.0, 3.0]], self.evaluate(net1)) + self.assertAllClose([[1.0, 3.0]], self.evaluate(net2)) + + def test_fails_for_categorical_column(self): + animal = tf.feature_column.categorical_column_with_identity( + "animal", num_buckets=4 + ) + with tf.Graph().as_default(): + features = { + "animal": tf.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2] + ) + } + with self.assertRaisesRegex(Exception, "must be a .*DenseColumn"): + df.DenseFeatures([animal])(features) + + def test_static_batch_size_mismatch(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": [[1.0], [5.0], [7.0]], # batchsize = 3 + "price2": [[3.0], [4.0]], # batchsize = 2 + } + with self.assertRaisesRegex( + ValueError, + r"Batch size \(first dimension\) of each feature must be same.", + ): + df.DenseFeatures([price1, price2])(features) + + def test_subset_of_static_batch_size_mismatch(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + price3 = tf.feature_column.numeric_column("price3") + with tf.Graph().as_default(): + features = { + "price1": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 3 + "price2": [[3.0], [4.0]], # batchsize = 2 + "price3": [[3.0], [4.0], [5.0]], # batchsize = 3 + } + with self.assertRaisesRegex( + ValueError, + r"Batch size \(first dimension\) of each feature must be same.", + ): + df.DenseFeatures([price1, price2, price3])(features) + + def test_runtime_batch_size_mismatch(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 3 + "price2": [[3.0], [4.0]], # batchsize = 2 + } + net = df.DenseFeatures([price1, price2])(features) + with _initialized_session() as sess: + with self.assertRaisesRegex( + tf.errors.OpError, + "Dimension 0 in both shapes must be equal|" + "Dimensions of inputs should match", + ): + sess.run( + net, + feed_dict={features["price1"]: [[1.0], [5.0], [7.0]]}, + ) + + def test_runtime_batch_size_matches(self): + price1 = tf.feature_column.numeric_column("price1") + price2 = tf.feature_column.numeric_column("price2") + with tf.Graph().as_default(): + features = { + "price1": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 2 + "price2": tf.compat.v1.placeholder( + dtype=tf.int64 + ), # batchsize = 2 + } + net = df.DenseFeatures([price1, price2])(features) + with _initialized_session() as sess: + sess.run( + net, + feed_dict={ + features["price1"]: [[1.0], [5.0]], + features["price2"]: [[1.0], [5.0]], + }, + ) + + def test_multiple_layers_with_same_embedding_column(self): + some_sparse_column = ( + tf.feature_column.categorical_column_with_hash_bucket( + "sparse_feature", hash_bucket_size=5 + ) + ) + some_embedding_column = tf.feature_column.embedding_column( + some_sparse_column, dimension=10 + ) + + with tf.Graph().as_default(): + features = { + "sparse_feature": [["a"], ["x"]], + } + all_cols = [some_embedding_column] + df.DenseFeatures(all_cols)(features) + df.DenseFeatures(all_cols)(features) + # Make sure that 2 variables get created in this case. + self.assertEqual( + 2, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + expected_var_names = [ + "dense_features/sparse_feature_embedding/embedding_weights:0", + "dense_features_1/sparse_feature_embedding/embedding_weights:0", + ] + self.assertItemsEqual( + expected_var_names, + [ + v.name + for v in tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ], + ) + + def test_multiple_layers_with_same_shared_embedding_column(self): + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=3 + ) + ) + categorical_column_b = ( + tf.feature_column.categorical_column_with_identity( + key="bbb", num_buckets=3 + ) + ) + embedding_dimension = 2 + + # feature_column.shared_embeddings is not supported in eager. + with tf.Graph().as_default(): + ( + embedding_column_b, + embedding_column_a, + ) = tf.feature_column.shared_embeddings( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension, + ) + features = { + "aaa": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2), + ), + "bbb": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2), + ), + } + all_cols = [embedding_column_a, embedding_column_b] + df.DenseFeatures(all_cols)(features) + df.DenseFeatures(all_cols)(features) + # Make sure that only 1 variable gets created in this case. + self.assertEqual( + 1, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + self.assertItemsEqual( + ["aaa_bbb_shared_embedding:0"], + [ + v.name + for v in tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ], + ) + + def test_multiple_layers_with_same_shared_embedding_column_diff_graphs( + self, + ): + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=3 + ) + ) + categorical_column_b = ( + tf.feature_column.categorical_column_with_identity( + key="bbb", num_buckets=3 + ) + ) + embedding_dimension = 2 + + # feature_column.shared_embeddings is not supported in eager. + with tf.Graph().as_default(): + ( + embedding_column_b, + embedding_column_a, + ) = tf.feature_column.shared_embeddings( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension, + ) + all_cols = [embedding_column_a, embedding_column_b] + features = { + "aaa": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2), + ), + "bbb": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2), + ), + } + df.DenseFeatures(all_cols)(features) + # Make sure that only 1 variable gets created in this case. + self.assertEqual( + 1, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + + with tf.Graph().as_default(): + features1 = { + "aaa": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2), + ), + "bbb": tf.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2), + ), + } + + df.DenseFeatures(all_cols)(features1) + # Make sure that only 1 variable gets created in this case. + self.assertEqual( + 1, + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ), + ) + self.assertItemsEqual( + ["aaa_bbb_shared_embedding:0"], + [ + v.name + for v in tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + ], + ) + + def test_with_1d_sparse_tensor(self): + embedding_values = ( + (1.0, 2.0, 3.0, 4.0, 5.0), # id 0 + (6.0, 7.0, 8.0, 9.0, 10.0), # id 1 + (11.0, 12.0, 13.0, 14.0, 15.0), # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in dense_features + price = tf.feature_column.numeric_column("price") + + # one_hot_body_style has 3 dims in dense_features. + body_style = tf.feature_column.categorical_column_with_vocabulary_list( + "body-style", vocabulary_list=["hardtop", "wagon", "sedan"] + ) + one_hot_body_style = tf.feature_column.indicator_column(body_style) + + # embedded_body_style has 5 dims in dense_features. + country = tf.feature_column.categorical_column_with_vocabulary_list( + "country", vocabulary_list=["US", "JP", "CA"] + ) + embedded_country = tf.feature_column.embedding_column( + country, dimension=5, initializer=_initializer + ) + + with tf.Graph().as_default(): + # Provides 1-dim tensor and dense tensor. + features = { + "price": tf.constant( + [ + 11.0, + 12.0, + ] + ), + "body-style": tf.SparseTensor( + indices=((0,), (1,)), + values=("sedan", "hardtop"), + dense_shape=(2,), + ), + # This is dense tensor for the categorical_column. + "country": tf.constant(["CA", "US"]), + } + self.assertEqual(1, features["price"].shape.ndims) + self.assertEqual( + 1, features["body-style"].dense_shape.get_shape()[0] + ) + self.assertEqual(1, features["country"].shape.ndims) + + net = df.DenseFeatures( + [price, one_hot_body_style, embedded_country] + )(features) + self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session() as sess: + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [ + [0.0, 0.0, 1.0, 11.0, 12.0, 13.0, 14.0, 15.0, 11.0], + [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 12.0], + ], + sess.run(net), + ) + + def test_with_1d_unknown_shape_sparse_tensor(self): + embedding_values = ( + (1.0, 2.0), # id 0 + (6.0, 7.0), # id 1 + (11.0, 12.0), # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in dense_features + price = tf.feature_column.numeric_column("price") + + # one_hot_body_style has 3 dims in dense_features. + body_style = tf.feature_column.categorical_column_with_vocabulary_list( + "body-style", vocabulary_list=["hardtop", "wagon", "sedan"] + ) + one_hot_body_style = tf.feature_column.indicator_column(body_style) + + # embedded_body_style has 5 dims in dense_features. + country = tf.feature_column.categorical_column_with_vocabulary_list( + "country", vocabulary_list=["US", "JP", "CA"] + ) + embedded_country = tf.feature_column.embedding_column( + country, dimension=2, initializer=_initializer + ) + + # Provides 1-dim tensor and dense tensor. + with tf.Graph().as_default(): + features = { + "price": tf.compat.v1.placeholder(tf.float32), + "body-style": tf.compat.v1.sparse_placeholder(tf.string), + # This is dense tensor for the categorical_column. + "country": tf.compat.v1.placeholder(tf.string), + } + self.assertIsNone(features["price"].shape.ndims) + self.assertIsNone(features["body-style"].get_shape().ndims) + self.assertIsNone(features["country"].shape.ndims) + + price_data = np.array([11.0, 12.0]) + body_style_data = tf.compat.v1.SparseTensorValue( + indices=((0,), (1,)), + values=("sedan", "hardtop"), + dense_shape=(2,), + ) + country_data = np.array([["US"], ["CA"]]) + + net = df.DenseFeatures( + [price, one_hot_body_style, embedded_country] + )(features) + self.assertEqual(1 + 3 + 2, net.shape[1]) + with _initialized_session() as sess: + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [ + [0.0, 0.0, 1.0, 1.0, 2.0, 11.0], + [1.0, 0.0, 0.0, 11.0, 12.0, 12.0], + ], + sess.run( + net, + feed_dict={ + features["price"]: price_data, + features["body-style"]: body_style_data, + features["country"]: country_data, + }, + ), + ) + + def test_with_rank_0_feature(self): + # price has 1 dimension in dense_features + price = tf.feature_column.numeric_column("price") + features = { + "price": tf.constant(0), + } + self.assertEqual(0, features["price"].shape.ndims) + + # Static rank 0 should fail + with self.assertRaisesRegex( + ValueError, "Feature .* cannot have rank 0" + ): + df.DenseFeatures([price])(features) + + with tf.Graph().as_default(): + # Dynamic rank 0 should fail + features = { + "price": tf.compat.v1.placeholder(tf.float32), + } + net = df.DenseFeatures([price])(features) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError("Feature .* cannot have rank 0"): + sess.run(net, feed_dict={features["price"]: np.array(1)}) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/feature_column/sequence_feature_column.py b/keras/feature_column/sequence_feature_column.py index 2d6bf69ef58e..89e4f5cfdb76 100644 --- a/keras/feature_column/sequence_feature_column.py +++ b/keras/feature_column/sequence_feature_column.py @@ -22,16 +22,17 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras import backend from keras.feature_column import base_feature_layer as kfc -from tensorflow.python.util.tf_export import keras_export -# pylint: disable=protected-access +# isort: off +from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.experimental.SequenceFeatures') +@keras_export("keras.experimental.SequenceFeatures") class SequenceFeatures(kfc._BaseFeaturesLayer): - """A layer for sequence input. + """A layer for sequence input. All `feature_columns` must be sequence dense columns with the same `sequence_length`. The output of this method can be fed into sequence @@ -76,104 +77,119 @@ class SequenceFeatures(kfc._BaseFeaturesLayer): rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask) ``` - """ - - def __init__( - self, - feature_columns, - trainable=True, - name=None, - **kwargs): - """"Constructs a SequenceFeatures layer. - - Args: - feature_columns: An iterable of dense sequence columns. Valid columns are - - `embedding_column` that wraps a `sequence_categorical_column_with_*` - - `sequence_numeric_column`. - trainable: Boolean, whether the layer's variables will be updated via - gradient descent during training. - name: Name to give to the SequenceFeatures. - **kwargs: Keyword arguments to construct a layer. - - Raises: - ValueError: If any of the `feature_columns` is not a - `SequenceDenseColumn`. """ - super().__init__( - feature_columns=feature_columns, - trainable=trainable, - name=name, - expected_column_type=tf.__internal__.feature_column.SequenceDenseColumn, - **kwargs) - - @property - def _is_feature_layer(self): - return True - - def _target_shape(self, input_shape, total_elements): - return (input_shape[0], input_shape[1], total_elements) - - def call(self, features, training=None): - """Returns sequence input corresponding to the `feature_columns`. - - Args: - features: A dict mapping keys to tensors. - training: Python boolean or None, indicating whether to the layer is being - run in training mode. This argument is passed to the call method of any - `FeatureColumn` that takes a `training` argument. For example, if a - `FeatureColumn` performed dropout, the column could expose a `training` - argument to control whether the dropout should be applied. If `None`, - defaults to `tf.keras.backend.learning_phase()`. - - - Returns: - An `(input_layer, sequence_length)` tuple where: - - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. - `T` is the maximum sequence length for this batch, which could differ - from batch to batch. `D` is the sum of `num_elements` for all - `feature_columns`. - - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence - length for each example. - - Raises: - ValueError: If features are not a dictionary. - """ - if not isinstance(features, dict): - raise ValueError('We expected a dictionary here. Instead we got: ', - features) - if training is None: - training = backend.learning_phase() - transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features) - output_tensors = [] - sequence_lengths = [] - - for column in self._feature_columns: - with backend.name_scope(column.name): - try: - dense_tensor, sequence_length = column.get_sequence_dense_tensor( - transformation_cache, self._state_manager, training=training) - except TypeError: - dense_tensor, sequence_length = column.get_sequence_dense_tensor( - transformation_cache, self._state_manager) - # Flattens the final dimension to produce a 3D Tensor. - output_tensors.append(self._process_dense_tensor(column, dense_tensor)) - sequence_lengths.append(sequence_length) - - # Check and process sequence lengths. - kfc._verify_static_batch_size_equality( # pylint: disable=protected-access - sequence_lengths, self._feature_columns) - sequence_length = _assert_all_equal_and_return(sequence_lengths) - - return self._verify_and_concat_tensors(output_tensors), sequence_length + + def __init__(self, feature_columns, trainable=True, name=None, **kwargs): + """ "Constructs a SequenceFeatures layer. + + Args: + feature_columns: An iterable of dense sequence columns. Valid columns + are + - `embedding_column` that wraps a + `sequence_categorical_column_with_*` + - `sequence_numeric_column`. + trainable: Boolean, whether the layer's variables will be updated via + gradient descent during training. + name: Name to give to the SequenceFeatures. + **kwargs: Keyword arguments to construct a layer. + + Raises: + ValueError: If any of the `feature_columns` is not a + `SequenceDenseColumn`. + """ + super().__init__( + feature_columns=feature_columns, + trainable=trainable, + name=name, + expected_column_type=tf.__internal__.feature_column.SequenceDenseColumn, # noqa: E501 + **kwargs + ) + + @property + def _is_feature_layer(self): + return True + + def _target_shape(self, input_shape, total_elements): + return (input_shape[0], input_shape[1], total_elements) + + def call(self, features, training=None): + """Returns sequence input corresponding to the `feature_columns`. + + Args: + features: A dict mapping keys to tensors. + training: Python boolean or None, indicating whether to the layer is + being run in training mode. This argument is passed to the call + method of any `FeatureColumn` that takes a `training` argument. For + example, if a `FeatureColumn` performed dropout, the column could + expose a `training` argument to control whether the dropout should + be applied. If `None`, becomes `tf.keras.backend.learning_phase()`. + Defaults to `None`. + + + Returns: + An `(input_layer, sequence_length)` tuple where: + - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could + differ from batch to batch. `D` is the sum of `num_elements` for + all `feature_columns`. + - sequence_length: An int `Tensor` of shape `[batch_size]`. The + sequence length for each example. + + Raises: + ValueError: If features are not a dictionary. + """ + if not isinstance(features, dict): + raise ValueError( + "We expected a dictionary here. Instead we got: ", features + ) + if training is None: + training = backend.learning_phase() + transformation_cache = ( + tf.__internal__.feature_column.FeatureTransformationCache(features) + ) + output_tensors = [] + sequence_lengths = [] + + for column in self._feature_columns: + with backend.name_scope(column.name): + try: + ( + dense_tensor, + sequence_length, + ) = column.get_sequence_dense_tensor( + transformation_cache, + self._state_manager, + training=training, + ) + except TypeError: + ( + dense_tensor, + sequence_length, + ) = column.get_sequence_dense_tensor( + transformation_cache, self._state_manager + ) + # Flattens the final dimension to produce a 3D Tensor. + output_tensors.append( + self._process_dense_tensor(column, dense_tensor) + ) + sequence_lengths.append(sequence_length) + + # Check and process sequence lengths. + kfc._verify_static_batch_size_equality( + sequence_lengths, self._feature_columns + ) + sequence_length = _assert_all_equal_and_return(sequence_lengths) + + return self._verify_and_concat_tensors(output_tensors), sequence_length def _assert_all_equal_and_return(tensors, name=None): - """Asserts that all tensors are equal and returns the first one.""" - with backend.name_scope(name or 'assert_all_equal'): - if len(tensors) == 1: - return tensors[0] - assert_equal_ops = [] - for t in tensors[1:]: - assert_equal_ops.append(tf.compat.v1.assert_equal(tensors[0], t)) - with tf.control_dependencies(assert_equal_ops): - return tf.identity(tensors[0]) + """Asserts that all tensors are equal and returns the first one.""" + with backend.name_scope(name or "assert_all_equal"): + if len(tensors) == 1: + return tensors[0] + assert_equal_ops = [] + for t in tensors[1:]: + assert_equal_ops.append(tf.compat.v1.assert_equal(tensors[0], t)) + with tf.control_dependencies(assert_equal_ops): + return tf.identity(tensors[0]) diff --git a/keras/feature_column/sequence_feature_column_integration_test.py b/keras/feature_column/sequence_feature_column_integration_test.py index e0a19df1ccf0..b76c04d1facc 100644 --- a/keras/feature_column/sequence_feature_column_integration_test.py +++ b/keras/feature_column/sequence_feature_column_integration_test.py @@ -20,12 +20,6 @@ import tensorflow.compat.v2 as tf - -from google.protobuf import text_format - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 -from tensorflow.python.framework import test_util as tf_test_utils from keras import backend from keras.feature_column import dense_features from keras.feature_column import sequence_feature_column as ksfc @@ -33,115 +27,147 @@ from keras.layers.rnn import base_rnn from keras.layers.rnn import simple_rnn +# isort: off +from google.protobuf import text_format +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + class SequenceFeatureColumnIntegrationTest(tf.test.TestCase): + def _make_sequence_example(self): + example = example_pb2.SequenceExample() + example.context.feature["int_ctx"].int64_list.value.extend([5]) + example.context.feature["float_ctx"].float_list.value.extend([123.6]) + for val in range(0, 10, 2): + feat = feature_pb2.Feature() + feat.int64_list.value.extend([val] * val) + example.feature_lists.feature_list["int_list"].feature.extend( + [feat] + ) + for val in range(1, 11, 2): + feat = feature_pb2.Feature() + feat.bytes_list.value.extend([tf.compat.as_bytes(str(val))] * val) + example.feature_lists.feature_list["str_list"].feature.extend( + [feat] + ) - def _make_sequence_example(self): - example = example_pb2.SequenceExample() - example.context.feature['int_ctx'].int64_list.value.extend([5]) - example.context.feature['float_ctx'].float_list.value.extend([123.6]) - for val in range(0, 10, 2): - feat = feature_pb2.Feature() - feat.int64_list.value.extend([val] * val) - example.feature_lists.feature_list['int_list'].feature.extend([feat]) - for val in range(1, 11, 2): - feat = feature_pb2.Feature() - feat.bytes_list.value.extend([tf.compat.as_bytes(str(val))] * val) - example.feature_lists.feature_list['str_list'].feature.extend([feat]) - - return example + return example - def _build_feature_columns(self): - col = tf.feature_column.categorical_column_with_identity('int_ctx', num_buckets=100) - ctx_cols = [ - tf.feature_column.embedding_column(col, dimension=10), - tf.feature_column.numeric_column('float_ctx') - ] + def _build_feature_columns(self): + col = tf.feature_column.categorical_column_with_identity( + "int_ctx", num_buckets=100 + ) + ctx_cols = [ + tf.feature_column.embedding_column(col, dimension=10), + tf.feature_column.numeric_column("float_ctx"), + ] - identity_col = tf.feature_column.sequence_categorical_column_with_identity( - 'int_list', num_buckets=10) - bucket_col = tf.feature_column.sequence_categorical_column_with_hash_bucket( - 'bytes_list', hash_bucket_size=100) - seq_cols = [ - tf.feature_column.embedding_column(identity_col, dimension=10), - tf.feature_column.embedding_column(bucket_col, dimension=20) - ] + identity_col = ( + tf.feature_column.sequence_categorical_column_with_identity( + "int_list", num_buckets=10 + ) + ) + bucket_col = ( + tf.feature_column.sequence_categorical_column_with_hash_bucket( + "bytes_list", hash_bucket_size=100 + ) + ) + seq_cols = [ + tf.feature_column.embedding_column(identity_col, dimension=10), + tf.feature_column.embedding_column(bucket_col, dimension=20), + ] - return ctx_cols, seq_cols + return ctx_cols, seq_cols - def test_sequence_example_into_input_layer(self): - examples = [_make_sequence_example().SerializeToString()] * 100 - ctx_cols, seq_cols = self._build_feature_columns() + def test_sequence_example_into_input_layer(self): + examples = [_make_sequence_example().SerializeToString()] * 100 + ctx_cols, seq_cols = self._build_feature_columns() - def _parse_example(example): - ctx, seq = tf.io.parse_single_sequence_example( - example, - context_features=tf.feature_column.make_parse_example_spec(ctx_cols), - sequence_features=tf.feature_column.make_parse_example_spec(seq_cols)) - ctx.update(seq) - return ctx + def _parse_example(example): + ctx, seq = tf.io.parse_single_sequence_example( + example, + context_features=tf.feature_column.make_parse_example_spec( + ctx_cols + ), + sequence_features=tf.feature_column.make_parse_example_spec( + seq_cols + ), + ) + ctx.update(seq) + return ctx - ds = tf.data.Dataset.from_tensor_slices(examples) - ds = ds.map(_parse_example) - ds = ds.batch(20) + ds = tf.data.Dataset.from_tensor_slices(examples) + ds = ds.map(_parse_example) + ds = ds.batch(20) - # Test on a single batch - features = tf.compat.v1.data.make_one_shot_iterator(ds).get_next() + # Test on a single batch + features = tf.compat.v1.data.make_one_shot_iterator(ds).get_next() - # Tile the context features across the sequence features - sequence_input_layer = ksfc.SequenceFeatures(seq_cols) - seq_input, _ = sequence_input_layer(features) - dense_input_layer = dense_features.DenseFeatures(ctx_cols) - ctx_input = dense_input_layer(features) - ctx_input = backend.repeat(ctx_input, tf.shape(seq_input)[1]) - concatenated_input = merging.concatenate([seq_input, ctx_input]) + # Tile the context features across the sequence features + sequence_input_layer = ksfc.SequenceFeatures(seq_cols) + seq_input, _ = sequence_input_layer(features) + dense_input_layer = dense_features.DenseFeatures(ctx_cols) + ctx_input = dense_input_layer(features) + ctx_input = backend.repeat(ctx_input, tf.shape(seq_input)[1]) + concatenated_input = merging.concatenate([seq_input, ctx_input]) - rnn_layer = base_rnn.RNN(simple_rnn.SimpleRNNCell(10)) - output = rnn_layer(concatenated_input) + rnn_layer = base_rnn.RNN(simple_rnn.SimpleRNNCell(10)) + output = rnn_layer(concatenated_input) - with self.cached_session() as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - features_r = sess.run(features) - self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) + with self.cached_session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + features_r = sess.run(features) + self.assertAllEqual(features_r["int_list"].dense_shape, [20, 3, 6]) - output_r = sess.run(output) - self.assertAllEqual(output_r.shape, [20, 10]) + output_r = sess.run(output) + self.assertAllEqual(output_r.shape, [20, 10]) - @tf_test_utils.run_deprecated_v1 - def test_shared_sequence_non_sequence_into_input_layer(self): - non_seq = tf.feature_column.categorical_column_with_identity('non_seq', - num_buckets=10) - seq = tf.feature_column.sequence_categorical_column_with_identity('seq', - num_buckets=10) - shared_non_seq, shared_seq = tf.feature_column.shared_embeddings( - [non_seq, seq], - dimension=4, - combiner='sum', - initializer=tf.ones_initializer(), - shared_embedding_collection_name='shared') + @tf_test_utils.run_deprecated_v1 + def test_shared_sequence_non_sequence_into_input_layer(self): + non_seq = tf.feature_column.categorical_column_with_identity( + "non_seq", num_buckets=10 + ) + seq = tf.feature_column.sequence_categorical_column_with_identity( + "seq", num_buckets=10 + ) + shared_non_seq, shared_seq = tf.feature_column.shared_embeddings( + [non_seq, seq], + dimension=4, + combiner="sum", + initializer=tf.ones_initializer(), + shared_embedding_collection_name="shared", + ) - seq = tf.SparseTensor( - indices=[[0, 0], [0, 1], [1, 0]], - values=[0, 1, 2], - dense_shape=[2, 2]) - non_seq = tf.SparseTensor( - indices=[[0, 0], [0, 1], [1, 0]], - values=[0, 1, 2], - dense_shape=[2, 2]) - features = {'seq': seq, 'non_seq': non_seq} + seq = tf.SparseTensor( + indices=[[0, 0], [0, 1], [1, 0]], + values=[0, 1, 2], + dense_shape=[2, 2], + ) + non_seq = tf.SparseTensor( + indices=[[0, 0], [0, 1], [1, 0]], + values=[0, 1, 2], + dense_shape=[2, 2], + ) + features = {"seq": seq, "non_seq": non_seq} - # Tile the context features across the sequence features - seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features) - non_seq_input = dense_features.DenseFeatures([shared_non_seq])(features) + # Tile the context features across the sequence features + seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features) + non_seq_input = dense_features.DenseFeatures([shared_non_seq])(features) - with self.cached_session() as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - output_seq, output_seq_length, output_non_seq = sess.run( - [seq_input, seq_length, non_seq_input]) - self.assertAllEqual(output_seq, [[[1, 1, 1, 1], [1, 1, 1, 1]], - [[1, 1, 1, 1], [0, 0, 0, 0]]]) - self.assertAllEqual(output_seq_length, [2, 1]) - self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]]) + with self.cached_session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + output_seq, output_seq_length, output_non_seq = sess.run( + [seq_input, seq_length, non_seq_input] + ) + self.assertAllEqual( + output_seq, + [[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]], + ) + self.assertAllEqual(output_seq_length, [2, 1]) + self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]]) _SEQ_EX_PROTO = """ @@ -248,9 +274,9 @@ def test_shared_sequence_non_sequence_into_input_layer(self): def _make_sequence_example(): - example = example_pb2.SequenceExample() - return text_format.Parse(_SEQ_EX_PROTO, example) + example = example_pb2.SequenceExample() + return text_format.Parse(_SEQ_EX_PROTO, example) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/feature_column/sequence_feature_column_test.py b/keras/feature_column/sequence_feature_column_test.py index 26a6d0895ad4..3e5b9ef1878d 100644 --- a/keras/feature_column/sequence_feature_column_test.py +++ b/keras/feature_column/sequence_feature_column_test.py @@ -18,650 +18,971 @@ from __future__ import division from __future__ import print_function +import numpy as np import tensorflow.compat.v2 as tf - - from absl.testing import parameterized -import numpy as np import keras -from keras.testing_infra import test_combinations from keras.feature_column import sequence_feature_column as ksfc -from keras.saving import model_config +from keras.saving.legacy import model_config +from keras.testing_infra import test_combinations def _initialized_session(config=None): - sess = tf.compat.v1.Session(config=config) - sess.run(tf.compat.v1.global_variables_initializer()) - sess.run(tf.compat.v1.tables_initializer()) - return sess + sess = tf.compat.v1.Session(config=config) + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.tables_initializer()) + return sess -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SequenceFeaturesTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - {'testcase_name': '2D', - 'sparse_input_args_a': { - # example 0, ids [2] - # example 1, ids [0, 1] - 'indices': ((0, 0), (1, 0), (1, 1)), - 'values': (2, 0, 1), - 'dense_shape': (2, 2)}, - 'sparse_input_args_b': { - # example 0, ids [1] - # example 1, ids [2, 0] - 'indices': ((0, 0), (1, 0), (1, 1)), - 'values': (1, 2, 0), - 'dense_shape': (2, 2)}, - 'expected_input_layer': [ - # example 0, ids_a [2], ids_b [1] - [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [2, 0] - [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],], - 'expected_sequence_length': [1, 2]}, - {'testcase_name': '3D', - 'sparse_input_args_a': { - # feature 0, ids [[2], [0, 1]] - # feature 1, ids [[0, 0], [1]] - 'indices': ( - (0, 0, 0), (0, 1, 0), (0, 1, 1), - (1, 0, 0), (1, 0, 1), (1, 1, 0)), - 'values': (2, 0, 1, 0, 0, 1), - 'dense_shape': (2, 2, 2)}, - 'sparse_input_args_b': { - # feature 0, ids [[1, 1], [1]] - # feature 1, ids [[2], [0]] - 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - 'values': (1, 1, 1, 2, 0), - 'dense_shape': (2, 2, 2)}, - 'expected_input_layer': [ - # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] - [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]], - # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -] - [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]], - 'expected_sequence_length': [2, 2]}, - ) - def test_embedding_column( - self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, - expected_sequence_length): - - sparse_input_a = tf.compat.v1.SparseTensorValue(**sparse_input_args_a) - sparse_input_b = tf.compat.v1.SparseTensorValue(**sparse_input_args_b) - vocabulary_size = 3 - embedding_dimension_a = 2 - embedding_values_a = ( - (1., 2.), # id 0 - (3., 4.), # id 1 - (5., 6.) # id 2 + @parameterized.named_parameters( + { + "testcase_name": "2D", + "sparse_input_args_a": { + # example 0, ids [2] + # example 1, ids [0, 1] + "indices": ((0, 0), (1, 0), (1, 1)), + "values": (2, 0, 1), + "dense_shape": (2, 2), + }, + "sparse_input_args_b": { + # example 0, ids [1] + # example 1, ids [2, 0] + "indices": ((0, 0), (1, 0), (1, 1)), + "values": (1, 2, 0), + "dense_shape": (2, 2), + }, + "expected_input_layer": [ + # example 0, ids_a [2], ids_b [1] + [[5.0, 6.0, 14.0, 15.0, 16.0], [0.0, 0.0, 0.0, 0.0, 0.0]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1.0, 2.0, 17.0, 18.0, 19.0], [3.0, 4.0, 11.0, 12.0, 13.0]], + ], + "expected_sequence_length": [1, 2], + }, + { + "testcase_name": "3D", + "sparse_input_args_a": { + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + "indices": ( + (0, 0, 0), + (0, 1, 0), + (0, 1, 1), + (1, 0, 0), + (1, 0, 1), + (1, 1, 0), + ), + "values": (2, 0, 1, 0, 0, 1), + "dense_shape": (2, 2, 2), + }, + "sparse_input_args_b": { + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[2], [0]] + "indices": ( + (0, 0, 0), + (0, 0, 1), + (0, 1, 0), + (1, 0, 0), + (1, 1, 0), + ), + "values": (1, 1, 1, 2, 0), + "dense_shape": (2, 2, 2), + }, + "expected_input_layer": [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[5.0, 6.0, 14.0, 15.0, 16.0], [2.0, 3.0, 14.0, 15.0, 16.0]], + # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -] + [[1.0, 2.0, 17.0, 18.0, 19.0], [3.0, 4.0, 11.0, 12.0, 13.0]], + ], + "expected_sequence_length": [2, 2], + }, ) - embedding_dimension_b = 3 - embedding_values_b = ( - (11., 12., 13.), # id 0 - (14., 15., 16.), # id 1 - (17., 18., 19.) # id 2 + def test_embedding_column( + self, + sparse_input_args_a, + sparse_input_args_b, + expected_input_layer, + expected_sequence_length, + ): + + sparse_input_a = tf.compat.v1.SparseTensorValue(**sparse_input_args_a) + sparse_input_b = tf.compat.v1.SparseTensorValue(**sparse_input_args_b) + vocabulary_size = 3 + embedding_dimension_a = 2 + embedding_values_a = ( + (1.0, 2.0), # id 0 + (3.0, 4.0), # id 1 + (5.0, 6.0), # id 2 + ) + embedding_dimension_b = 3 + embedding_values_b = ( + (11.0, 12.0, 13.0), # id 0 + (14.0, 15.0, 16.0), # id 1 + (17.0, 18.0, 19.0), # id 2 + ) + + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual( + (vocabulary_size, embedding_dimension), shape + ) + self.assertEqual(tf.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + return _initializer + + categorical_column_a = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + embedding_column_a = tf.feature_column.embedding_column( + categorical_column_a, + dimension=embedding_dimension_a, + initializer=_get_initializer( + embedding_dimension_a, embedding_values_a + ), + ) + categorical_column_b = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="bbb", num_buckets=vocabulary_size + ) + ) + embedding_column_b = tf.feature_column.embedding_column( + categorical_column_b, + dimension=embedding_dimension_b, + initializer=_get_initializer( + embedding_dimension_b, embedding_values_b + ), + ) + + # Test that columns are reordered alphabetically. + sequence_input_layer = ksfc.SequenceFeatures( + [embedding_column_b, embedding_column_a] + ) + input_layer, sequence_length = sequence_input_layer( + { + "aaa": sparse_input_a, + "bbb": sparse_input_b, + } + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = sequence_input_layer.weights + self.assertCountEqual( + ( + "sequence_features/aaa_embedding/embedding_weights:0", + "sequence_features/bbb_embedding/embedding_weights:0", + ), + tuple([v.name for v in weights]), + ) + self.assertAllEqual(embedding_values_a, self.evaluate(weights[0])) + self.assertAllEqual(embedding_values_b, self.evaluate(weights[1])) + self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) + self.assertAllEqual( + expected_sequence_length, self.evaluate(sequence_length) + ) + + def test_embedding_column_with_non_sequence_categorical(self): + """Tests that error is raised for non-sequence embedding column.""" + vocabulary_size = 3 + sparse_input = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + embedding_column_a = tf.feature_column.embedding_column( + categorical_column_a, dimension=2 + ) + sequence_input_layer = ksfc.SequenceFeatures([embedding_column_a]) + with self.assertRaisesRegex( + ValueError, + r"In embedding_column: aaa_embedding\. categorical_column must be " + r"of type SequenceCategoricalColumn to use SequenceFeatures\.", + ): + _, _ = sequence_input_layer({"aaa": sparse_input}) + + def test_shared_embedding_column(self): + with tf.Graph().as_default(): + vocabulary_size = 3 + sparse_input_a = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + sparse_input_b = tf.compat.v1.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2), + ) + + embedding_dimension = 2 + embedding_values = ( + (1.0, 2.0), # id 0 + (3.0, 4.0), # id 1 + (5.0, 6.0), # id 2 + ) + + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual( + (vocabulary_size, embedding_dimension), shape + ) + self.assertEqual(tf.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + return _initializer + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[5.0, 6.0, 3.0, 4.0], [0.0, 0.0, 0.0, 0.0]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1.0, 2.0, 5.0, 6.0], [3.0, 4.0, 1.0, 2.0]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + categorical_column_b = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="bbb", num_buckets=vocabulary_size + ) + ) + # Test that columns are reordered alphabetically. + shared_embedding_columns = tf.feature_column.shared_embeddings( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension, + initializer=_get_initializer( + embedding_dimension, embedding_values + ), + ) + + sequence_input_layer = ksfc.SequenceFeatures( + shared_embedding_columns + ) + input_layer, sequence_length = sequence_input_layer( + {"aaa": sparse_input_a, "bbb": sparse_input_b} + ) + + global_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + self.assertCountEqual( + ("aaa_bbb_shared_embedding:0",), + tuple([v.name for v in global_vars]), + ) + with _initialized_session() as sess: + self.assertAllEqual( + embedding_values, global_vars[0].eval(session=sess) + ) + self.assertAllEqual( + expected_input_layer, input_layer.eval(session=sess) + ) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess) + ) + + def test_shared_embedding_column_with_non_sequence_categorical(self): + """Tests that error is raised for non-sequence shared embedding + column.""" + with tf.Graph().as_default(): + vocabulary_size = 3 + sparse_input_a = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + sparse_input_b = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + categorical_column_b = ( + tf.feature_column.categorical_column_with_identity( + key="bbb", num_buckets=vocabulary_size + ) + ) + shared_embedding_columns = tf.feature_column.shared_embeddings( + [categorical_column_a, categorical_column_b], dimension=2 + ) + + sequence_input_layer = ksfc.SequenceFeatures( + shared_embedding_columns + ) + with self.assertRaisesRegex( + ValueError, + r"In embedding_column: aaa_shared_embedding\. " + r"categorical_column must " + r"be of type SequenceCategoricalColumn to use " + r"SequenceFeatures\.", + ): + _, _ = sequence_input_layer( + {"aaa": sparse_input_a, "bbb": sparse_input_b} + ) + + @parameterized.named_parameters( + { + "testcase_name": "2D", + "sparse_input_args_a": { + # example 0, ids [2] + # example 1, ids [0, 1] + "indices": ((0, 0), (1, 0), (1, 1)), + "values": (2, 0, 1), + "dense_shape": (2, 2), + }, + "sparse_input_args_b": { + # example 0, ids [1] + # example 1, ids [1, 0] + "indices": ((0, 0), (1, 0), (1, 1)), + "values": (1, 1, 0), + "dense_shape": (2, 2), + }, + "expected_input_layer": [ + # example 0, ids_a [2], ids_b [1] + [[0.0, 0.0, 1.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0]], + # example 1, ids_a [0, 1], ids_b [1, 0] + [[1.0, 0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 1.0, 0.0]], + ], + "expected_sequence_length": [1, 2], + }, + { + "testcase_name": "3D", + "sparse_input_args_a": { + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + "indices": ( + (0, 0, 0), + (0, 1, 0), + (0, 1, 1), + (1, 0, 0), + (1, 0, 1), + (1, 1, 0), + ), + "values": (2, 0, 1, 0, 0, 1), + "dense_shape": (2, 2, 2), + }, + "sparse_input_args_b": { + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[1], [0]] + "indices": ( + (0, 0, 0), + (0, 0, 1), + (0, 1, 0), + (1, 0, 0), + (1, 1, 0), + ), + "values": (1, 1, 1, 1, 0), + "dense_shape": (2, 2, 2), + }, + "expected_input_layer": [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[0.0, 0.0, 1.0, 0.0, 2.0], [1.0, 1.0, 0.0, 0.0, 1.0]], + # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -] + [[2.0, 0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 1.0, 0.0]], + ], + "expected_sequence_length": [2, 2], + }, ) - def _get_initializer(embedding_dimension, embedding_values): - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(tf.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - return _initializer - - categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column_a = tf.feature_column.embedding_column( - categorical_column_a, - dimension=embedding_dimension_a, - initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) - categorical_column_b = tf.feature_column.sequence_categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_b = tf.feature_column.embedding_column( - categorical_column_b, - dimension=embedding_dimension_b, - initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) - - # Test that columns are reordered alphabetically. - sequence_input_layer = ksfc.SequenceFeatures( - [embedding_column_b, embedding_column_a]) - input_layer, sequence_length = sequence_input_layer({ - 'aaa': sparse_input_a, 'bbb': sparse_input_b,}) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = sequence_input_layer.weights - self.assertCountEqual( - ('sequence_features/aaa_embedding/embedding_weights:0', - 'sequence_features/bbb_embedding/embedding_weights:0'), - tuple([v.name for v in weights])) - self.assertAllEqual(embedding_values_a, self.evaluate(weights[0])) - self.assertAllEqual(embedding_values_b, self.evaluate(weights[1])) - self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) - self.assertAllEqual( - expected_sequence_length, self.evaluate(sequence_length)) - - def test_embedding_column_with_non_sequence_categorical(self): - """Tests that error is raised for non-sequence embedding column.""" - vocabulary_size = 3 - sparse_input = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column_a = tf.feature_column.embedding_column( - categorical_column_a, dimension=2) - sequence_input_layer = ksfc.SequenceFeatures([embedding_column_a]) - with self.assertRaisesRegex( - ValueError, - r'In embedding_column: aaa_embedding\. categorical_column must be of ' - r'type SequenceCategoricalColumn to use SequenceFeatures\.'): - _, _ = sequence_input_layer({'aaa': sparse_input}) - - def test_shared_embedding_column(self): - with tf.Graph().as_default(): - vocabulary_size = 3 - sparse_input_a = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - sparse_input_b = tf.compat.v1.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [2, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 4.), # id 1 - (5., 6.) # id 2 - ) - - def _get_initializer(embedding_dimension, embedding_values): - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(tf.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - return _initializer - - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[5., 6., 3., 4.], [0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [2, 0] - [[1., 2., 5., 6.], [3., 4., 1., 2.]], - ] - expected_sequence_length = [1, 2] - - categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = tf.feature_column.sequence_categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - # Test that columns are reordered alphabetically. - shared_embedding_columns = tf.feature_column.shared_embeddings( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension, - initializer=_get_initializer(embedding_dimension, embedding_values)) - - sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns) - input_layer, sequence_length = sequence_input_layer({ - 'aaa': sparse_input_a, 'bbb': sparse_input_b}) - - global_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual( - ('aaa_bbb_shared_embedding:0',), - tuple([v.name for v in global_vars])) - with _initialized_session() as sess: - self.assertAllEqual(embedding_values, - global_vars[0].eval(session=sess)) - self.assertAllEqual(expected_input_layer, - input_layer.eval(session=sess)) + def test_indicator_column( + self, + sparse_input_args_a, + sparse_input_args_b, + expected_input_layer, + expected_sequence_length, + ): + sparse_input_a = tf.compat.v1.SparseTensorValue(**sparse_input_args_a) + sparse_input_b = tf.compat.v1.SparseTensorValue(**sparse_input_args_b) + + vocabulary_size_a = 3 + vocabulary_size_b = 2 + + categorical_column_a = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size_a + ) + ) + indicator_column_a = tf.feature_column.indicator_column( + categorical_column_a + ) + categorical_column_b = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="bbb", num_buckets=vocabulary_size_b + ) + ) + indicator_column_b = tf.feature_column.indicator_column( + categorical_column_b + ) + # Test that columns are reordered alphabetically. + sequence_input_layer = ksfc.SequenceFeatures( + [indicator_column_b, indicator_column_a] + ) + input_layer, sequence_length = sequence_input_layer( + {"aaa": sparse_input_a, "bbb": sparse_input_b} + ) + + self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_shared_embedding_column_with_non_sequence_categorical(self): - """Tests that error is raised for non-sequence shared embedding column.""" - with tf.Graph().as_default(): - vocabulary_size = 3 - sparse_input_a = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - sparse_input_b = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = tf.feature_column.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - shared_embedding_columns = tf.feature_column.shared_embeddings( - [categorical_column_a, categorical_column_b], dimension=2) - - sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns) - with self.assertRaisesRegex( - ValueError, - r'In embedding_column: aaa_shared_embedding\. ' - r'categorical_column must ' - r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'): - _, _ = sequence_input_layer({'aaa': sparse_input_a, - 'bbb': sparse_input_b}) - - @parameterized.named_parameters( - {'testcase_name': '2D', - 'sparse_input_args_a': { - # example 0, ids [2] - # example 1, ids [0, 1] - 'indices': ((0, 0), (1, 0), (1, 1)), - 'values': (2, 0, 1), - 'dense_shape': (2, 2)}, - 'sparse_input_args_b': { - # example 0, ids [1] - # example 1, ids [1, 0] - 'indices': ((0, 0), (1, 0), (1, 1)), - 'values': (1, 1, 0), - 'dense_shape': (2, 2)}, - 'expected_input_layer': [ - # example 0, ids_a [2], ids_b [1] - [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [1, 0] - [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], - 'expected_sequence_length': [1, 2]}, - {'testcase_name': '3D', - 'sparse_input_args_a': { - # feature 0, ids [[2], [0, 1]] - # feature 1, ids [[0, 0], [1]] - 'indices': ( - (0, 0, 0), (0, 1, 0), (0, 1, 1), - (1, 0, 0), (1, 0, 1), (1, 1, 0)), - 'values': (2, 0, 1, 0, 0, 1), - 'dense_shape': (2, 2, 2)}, - 'sparse_input_args_b': { - # feature 0, ids [[1, 1], [1]] - # feature 1, ids [[1], [0]] - 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - 'values': (1, 1, 1, 1, 0), - 'dense_shape': (2, 2, 2)}, - 'expected_input_layer': [ - # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] - [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]], - # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -] - [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], - 'expected_sequence_length': [2, 2]}, - ) - def test_indicator_column( - self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, - expected_sequence_length): - sparse_input_a = tf.compat.v1.SparseTensorValue(**sparse_input_args_a) - sparse_input_b = tf.compat.v1.SparseTensorValue(**sparse_input_args_b) - - vocabulary_size_a = 3 - vocabulary_size_b = 2 - - categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size_a) - indicator_column_a = tf.feature_column.indicator_column(categorical_column_a) - categorical_column_b = tf.feature_column.sequence_categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size_b) - indicator_column_b = tf.feature_column.indicator_column(categorical_column_b) - # Test that columns are reordered alphabetically. - sequence_input_layer = ksfc.SequenceFeatures( - [indicator_column_b, indicator_column_a]) - input_layer, sequence_length = sequence_input_layer({ - 'aaa': sparse_input_a, 'bbb': sparse_input_b}) - - self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) - self.assertAllEqual( - expected_sequence_length, self.evaluate(sequence_length)) - - def test_indicator_column_with_non_sequence_categorical(self): - """Tests that error is raised for non-sequence categorical column.""" - vocabulary_size = 3 - sparse_input = tf.compat.v1.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = tf.feature_column.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - indicator_column_a = tf.feature_column.indicator_column(categorical_column_a) - - sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a]) - with self.assertRaisesRegex( - ValueError, - r'In indicator_column: aaa_indicator\. categorical_column must be of ' - r'type SequenceCategoricalColumn to use SequenceFeatures\.'): - _, _ = sequence_input_layer({'aaa': sparse_input}) - - @parameterized.named_parameters( - {'testcase_name': '2D', - 'sparse_input_args': { - # example 0, values [0., 1] - # example 1, [10.] - 'indices': ((0, 0), (0, 1), (1, 0)), - 'values': (0., 1., 10.), - 'dense_shape': (2, 2)}, - 'expected_input_layer': [ - [[0.], [1.]], - [[10.], [0.]]], - 'expected_sequence_length': [2, 1]}, - {'testcase_name': '3D', - 'sparse_input_args': { - # feature 0, ids [[20, 3], [5]] - # feature 1, ids [[3], [8]] - 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - 'values': (20., 3., 5., 3., 8.), - 'dense_shape': (2, 2, 2)}, - 'expected_input_layer': [ - [[20.], [3.], [5.], [0.]], - [[3.], [0.], [8.], [0.]]], - 'expected_sequence_length': [2, 2]}, - ) - def test_numeric_column( - self, sparse_input_args, expected_input_layer, expected_sequence_length): - sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) - - numeric_column = tf.feature_column.sequence_numeric_column('aaa') - - sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) - input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input}) - - self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) - self.assertAllEqual( - expected_sequence_length, self.evaluate(sequence_length)) - - @parameterized.named_parameters( - {'testcase_name': '2D', - 'sparse_input_args': { - # example 0, values [0., 1., 2., 3., 4., 5., 6., 7.] - # example 1, [10., 11., 12., 13.] - 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), - (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), - 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - 'dense_shape': (2, 8)}, - 'expected_input_layer': [ - # The output of numeric_column._get_dense_tensor should be flattened. - [[0., 1., 2., 3.], [4., 5., 6., 7.]], - [[10., 11., 12., 13.], [0., 0., 0., 0.]]], - 'expected_sequence_length': [2, 1]}, - {'testcase_name': '3D', - 'sparse_input_args': { - # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] - # example 1, [[10., 11., 12., 13.], []] - 'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), - (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), - (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), - 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - 'dense_shape': (2, 2, 4)}, - 'expected_input_layer': [ - # The output of numeric_column._get_dense_tensor should be flattened. - [[0., 1., 2., 3.], [4., 5., 6., 7.]], - [[10., 11., 12., 13.], [0., 0., 0., 0.]]], - 'expected_sequence_length': [2, 1]}, - ) - def test_numeric_column_multi_dim( - self, sparse_input_args, expected_input_layer, expected_sequence_length): - """Tests SequenceFeatures for multi-dimensional numeric_column.""" - sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) - - numeric_column = tf.feature_column.sequence_numeric_column('aaa', shape=(2, 2)) - - sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) - input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input}) - - self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) - self.assertAllEqual( - expected_sequence_length, self.evaluate(sequence_length)) - - def test_sequence_length_not_equal(self): - """Tests that an error is raised when sequence lengths are not equal.""" - # Input a with sequence_length = [2, 1] - sparse_input_a = tf.compat.v1.SparseTensorValue( - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - # Input b with sequence_length = [1, 1] - sparse_input_b = tf.compat.v1.SparseTensorValue( - indices=((0, 0), (1, 0)), - values=(1., 10.), - dense_shape=(2, 2)) - numeric_column_a = tf.feature_column.sequence_numeric_column('aaa') - numeric_column_b = tf.feature_column.sequence_numeric_column('bbb') - - sequence_input_layer = ksfc.SequenceFeatures( - [numeric_column_a, numeric_column_b]) - - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - r'Condition x == y did not hold.*'): - _, sequence_length = sequence_input_layer({ - 'aaa': sparse_input_a, - 'bbb': sparse_input_b - }) - self.evaluate(sequence_length) - - @parameterized.named_parameters( - {'testcase_name': '2D', - 'sparse_input_args': { - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), - (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), - 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - 'dense_shape': (2, 8)}, - 'expected_shape': [2, 2, 4]}, - {'testcase_name': '3D', - 'sparse_input_args': { - # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] - # example 1, [[10., 11., 12., 13.], []] - 'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), - (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), - (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), - 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - 'dense_shape': (2, 2, 4)}, - 'expected_shape': [2, 2, 4]}, - ) - def test_static_shape_from_tensors_numeric( - self, sparse_input_args, expected_shape): - """Tests that we return a known static shape when we have one.""" - sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) - numeric_column = tf.feature_column.sequence_numeric_column('aaa', shape=(2, 2)) - - sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) - input_layer, _ = sequence_input_layer({'aaa': sparse_input}) - shape = input_layer.get_shape() - self.assertEqual(shape, expected_shape) - - @parameterized.named_parameters( - {'testcase_name': '2D', - 'sparse_input_args': { - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), - 'values': (2, 0, 1, 1), - 'dense_shape': (4, 2)}, - 'expected_shape': [4, 2, 3]}, - {'testcase_name': '3D', - 'sparse_input_args': { - # example 0, ids [[2]] - # example 1, ids [[0, 1], [2]] - # example 2, ids [] - # example 3, ids [[1], [0, 2]] - 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), - (3, 0, 0), (3, 1, 0), (3, 1, 1)), - 'values': (2, 0, 1, 2, 1, 0, 2), - 'dense_shape': (4, 2, 2)}, - 'expected_shape': [4, 2, 3]} - ) - def test_static_shape_from_tensors_indicator( - self, sparse_input_args, expected_shape): - """Tests that we return a known static shape when we have one.""" - sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) - categorical_column = tf.feature_column.sequence_categorical_column_with_identity( - key='aaa', num_buckets=3) - indicator_column = tf.feature_column.indicator_column(categorical_column) - - sequence_input_layer = ksfc.SequenceFeatures([indicator_column]) - input_layer, _ = sequence_input_layer({'aaa': sparse_input}) - shape = input_layer.get_shape() - self.assertEqual(shape, expected_shape) - - def test_compute_output_shape(self): - price1 = tf.feature_column.sequence_numeric_column('price1', shape=2) - price2 = tf.feature_column.sequence_numeric_column('price2') - features = { - 'price1': tf.SparseTensor( - indices=[[0, 0, 0], [0, 0, 1], - [0, 1, 0], [0, 1, 1], - [1, 0, 0], [1, 0, 1], - [2, 0, 0], [2, 0, 1], - [3, 0, 0], [3, 0, 1]], - values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.], - dense_shape=(4, 3, 2)), - 'price2': tf.SparseTensor( - indices=[[0, 0], - [0, 1], - [1, 0], - [2, 0], - [3, 0]], - values=[10., 11., 20., 30., 40.], - dense_shape=(4, 3))} - sequence_features = ksfc.SequenceFeatures([price1, price2]) - seq_input, seq_len = sequence_features(features) - self.assertEqual( - sequence_features.compute_output_shape((None, None)), - (None, None, 3)) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]], - [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]], - [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]], - [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]], - self.evaluate(seq_input)) - self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class SequenceFeaturesSerializationTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters(('trainable', True, 'trainable'), - ('not_trainable', False, 'frozen')) - def test_get_config(self, trainable, name): - cols = [tf.feature_column.sequence_numeric_column('a')] - orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) - config = orig_layer.get_config() - - self.assertEqual(config['name'], orig_layer.name) - self.assertEqual(config['trainable'], trainable) - self.assertLen(config['feature_columns'], 1) - self.assertEqual(config['feature_columns'][0]['class_name'], - 'SequenceNumericColumn') - self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,)) - - @parameterized.named_parameters(('trainable', True, 'trainable'), - ('not_trainable', False, 'frozen')) - def test_from_config(self, trainable, name): - cols = [tf.feature_column.sequence_numeric_column('a')] - orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) - config = orig_layer.get_config() - - new_layer = ksfc.SequenceFeatures.from_config(config) - - self.assertEqual(new_layer.name, orig_layer.name) - self.assertEqual(new_layer.trainable, trainable) - self.assertLen(new_layer._feature_columns, 1) - self.assertEqual(new_layer._feature_columns[0].name, 'a') - - def test_serialization_sequence_features(self): - rating = tf.feature_column.sequence_numeric_column('rating') - sequence_feature = ksfc.SequenceFeatures([rating]) - config = keras.layers.serialize(sequence_feature) - - revived = keras.layers.deserialize(config) - self.assertIsInstance(revived, ksfc.SequenceFeatures) + expected_sequence_length, self.evaluate(sequence_length) + ) + + def test_indicator_column_with_non_sequence_categorical(self): + """Tests that error is raised for non-sequence categorical column.""" + vocabulary_size = 3 + sparse_input = tf.compat.v1.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2), + ) + + categorical_column_a = ( + tf.feature_column.categorical_column_with_identity( + key="aaa", num_buckets=vocabulary_size + ) + ) + indicator_column_a = tf.feature_column.indicator_column( + categorical_column_a + ) + + sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a]) + with self.assertRaisesRegex( + ValueError, + r"In indicator_column: aaa_indicator\. categorical_column must be " + r"of type SequenceCategoricalColumn to use SequenceFeatures\.", + ): + _, _ = sequence_input_layer({"aaa": sparse_input}) + + @parameterized.named_parameters( + { + "testcase_name": "2D", + "sparse_input_args": { + # example 0, values [0., 1] + # example 1, [10.] + "indices": ((0, 0), (0, 1), (1, 0)), + "values": (0.0, 1.0, 10.0), + "dense_shape": (2, 2), + }, + "expected_input_layer": [[[0.0], [1.0]], [[10.0], [0.0]]], + "expected_sequence_length": [2, 1], + }, + { + "testcase_name": "3D", + "sparse_input_args": { + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + "indices": ( + (0, 0, 0), + (0, 0, 1), + (0, 1, 0), + (1, 0, 0), + (1, 1, 0), + ), + "values": (20.0, 3.0, 5.0, 3.0, 8.0), + "dense_shape": (2, 2, 2), + }, + "expected_input_layer": [ + [[20.0], [3.0], [5.0], [0.0]], + [[3.0], [0.0], [8.0], [0.0]], + ], + "expected_sequence_length": [2, 2], + }, + ) + def test_numeric_column( + self, sparse_input_args, expected_input_layer, expected_sequence_length + ): + sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) + numeric_column = tf.feature_column.sequence_numeric_column("aaa") -class SequenceFeaturesSavingTest(tf.test.TestCase, parameterized.TestCase): + sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) + input_layer, sequence_length = sequence_input_layer( + {"aaa": sparse_input} + ) - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_saving_with_sequence_features(self): - cols = [ - tf.feature_column.sequence_numeric_column('a'), - tf.feature_column.indicator_column( - tf.feature_column.sequence_categorical_column_with_vocabulary_list( - 'b', ['one', 'two'])) - ] - input_layers = { - 'a': - keras.layers.Input(shape=(None, 1), sparse=True, name='a'), - 'b': - keras.layers.Input( - shape=(None, 1), sparse=True, name='b', dtype='string') - } - - fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) - # TODO(tibell): Figure out the right dtype and apply masking. - # sequence_length_mask = array_ops.sequence_mask(sequence_length) - # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) - x = keras.layers.GRU(32)(fc_layer) - output = keras.layers.Dense(10)(x) - - model = keras.models.Model(input_layers, output) - - model.compile( - loss=keras.losses.MSE, - optimizer='rmsprop', - metrics=[keras.metrics.categorical_accuracy]) - - config = model.to_json() - loaded_model = model_config.model_from_json(config) - - batch_size = 10 - timesteps = 1 - - values_a = np.arange(10, dtype=np.float32) - indices_a = np.zeros((10, 3), dtype=np.int64) - indices_a[:, 0] = np.arange(10) - inputs_a = tf.SparseTensor(indices_a, values_a, - (batch_size, timesteps, 1)) - - values_b = np.zeros(10, dtype=np.str) - indices_b = np.zeros((10, 3), dtype=np.int64) - indices_b[:, 0] = np.arange(10) - inputs_b = tf.SparseTensor(indices_b, values_b, - (batch_size, timesteps, 1)) - - with self.cached_session(): - # Initialize tables for V1 lookup. - if not tf.executing_eagerly(): + self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) + self.assertAllEqual( + expected_sequence_length, self.evaluate(sequence_length) + ) + + @parameterized.named_parameters( + { + "testcase_name": "2D", + "sparse_input_args": { + # example 0, values [0., 1., 2., 3., 4., 5., 6., 7.] + # example 1, [10., 11., 12., 13.] + "indices": ( + (0, 0), + (0, 1), + (0, 2), + (0, 3), + (0, 4), + (0, 5), + (0, 6), + (0, 7), + (1, 0), + (1, 1), + (1, 2), + (1, 3), + ), + "values": ( + 0.0, + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + 7.0, + 10.0, + 11.0, + 12.0, + 13.0, + ), + "dense_shape": (2, 8), + }, + "expected_input_layer": [ + # The output of numeric_column._get_dense_tensor should be + # flattened. + [[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0]], + [[10.0, 11.0, 12.0, 13.0], [0.0, 0.0, 0.0, 0.0]], + ], + "expected_sequence_length": [2, 1], + }, + { + "testcase_name": "3D", + "sparse_input_args": { + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + "indices": ( + (0, 0, 0), + (0, 0, 1), + (0, 0, 2), + (0, 0, 3), + (0, 1, 0), + (0, 1, 1), + (0, 1, 2), + (0, 1, 3), + (1, 0, 0), + (1, 0, 1), + (1, 0, 2), + (1, 0, 3), + ), + "values": ( + 0.0, + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + 7.0, + 10.0, + 11.0, + 12.0, + 13.0, + ), + "dense_shape": (2, 2, 4), + }, + "expected_input_layer": [ + # The output of numeric_column._get_dense_tensor should be + # flattened. + [[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0]], + [[10.0, 11.0, 12.0, 13.0], [0.0, 0.0, 0.0, 0.0]], + ], + "expected_sequence_length": [2, 1], + }, + ) + def test_numeric_column_multi_dim( + self, sparse_input_args, expected_input_layer, expected_sequence_length + ): + """Tests SequenceFeatures for multi-dimensional numeric_column.""" + sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) + + numeric_column = tf.feature_column.sequence_numeric_column( + "aaa", shape=(2, 2) + ) + + sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) + input_layer, sequence_length = sequence_input_layer( + {"aaa": sparse_input} + ) + + self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) + self.assertAllEqual( + expected_sequence_length, self.evaluate(sequence_length) + ) + + def test_sequence_length_not_equal(self): + """Tests that an error is raised when sequence lengths are not equal.""" + # Input a with sequence_length = [2, 1] + sparse_input_a = tf.compat.v1.SparseTensorValue( + indices=((0, 0), (0, 1), (1, 0)), + values=(0.0, 1.0, 10.0), + dense_shape=(2, 2), + ) + # Input b with sequence_length = [1, 1] + sparse_input_b = tf.compat.v1.SparseTensorValue( + indices=((0, 0), (1, 0)), values=(1.0, 10.0), dense_shape=(2, 2) + ) + numeric_column_a = tf.feature_column.sequence_numeric_column("aaa") + numeric_column_b = tf.feature_column.sequence_numeric_column("bbb") + + sequence_input_layer = ksfc.SequenceFeatures( + [numeric_column_a, numeric_column_b] + ) + + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, r"Condition x == y did not hold.*" + ): + _, sequence_length = sequence_input_layer( + {"aaa": sparse_input_a, "bbb": sparse_input_b} + ) + self.evaluate(sequence_length) + + @parameterized.named_parameters( + { + "testcase_name": "2D", + "sparse_input_args": { + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., + # 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + "indices": ( + (0, 0), + (0, 1), + (0, 2), + (0, 3), + (0, 4), + (0, 5), + (0, 6), + (0, 7), + (1, 0), + (1, 1), + (1, 2), + (1, 3), + ), + "values": ( + 0.0, + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + 7.0, + 10.0, + 11.0, + 12.0, + 13.0, + ), + "dense_shape": (2, 8), + }, + "expected_shape": [2, 2, 4], + }, + { + "testcase_name": "3D", + "sparse_input_args": { + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + "indices": ( + (0, 0, 0), + (0, 0, 1), + (0, 0, 2), + (0, 0, 3), + (0, 1, 0), + (0, 1, 1), + (0, 1, 2), + (0, 1, 3), + (1, 0, 0), + (1, 0, 1), + (1, 0, 2), + (1, 0, 3), + ), + "values": ( + 0.0, + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + 7.0, + 10.0, + 11.0, + 12.0, + 13.0, + ), + "dense_shape": (2, 2, 4), + }, + "expected_shape": [2, 2, 4], + }, + ) + def test_static_shape_from_tensors_numeric( + self, sparse_input_args, expected_shape + ): + """Tests that we return a known static shape when we have one.""" + sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) + numeric_column = tf.feature_column.sequence_numeric_column( + "aaa", shape=(2, 2) + ) + + sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) + input_layer, _ = sequence_input_layer({"aaa": sparse_input}) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + @parameterized.named_parameters( + { + "testcase_name": "2D", + "sparse_input_args": { + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + "indices": ((0, 0), (1, 0), (1, 1), (3, 0)), + "values": (2, 0, 1, 1), + "dense_shape": (4, 2), + }, + "expected_shape": [4, 2, 3], + }, + { + "testcase_name": "3D", + "sparse_input_args": { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + "indices": ( + (0, 0, 0), + (1, 0, 0), + (1, 0, 1), + (1, 1, 0), + (3, 0, 0), + (3, 1, 0), + (3, 1, 1), + ), + "values": (2, 0, 1, 2, 1, 0, 2), + "dense_shape": (4, 2, 2), + }, + "expected_shape": [4, 2, 3], + }, + ) + def test_static_shape_from_tensors_indicator( + self, sparse_input_args, expected_shape + ): + """Tests that we return a known static shape when we have one.""" + sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) + categorical_column = ( + tf.feature_column.sequence_categorical_column_with_identity( + key="aaa", num_buckets=3 + ) + ) + indicator_column = tf.feature_column.indicator_column( + categorical_column + ) + + sequence_input_layer = ksfc.SequenceFeatures([indicator_column]) + input_layer, _ = sequence_input_layer({"aaa": sparse_input}) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + def test_compute_output_shape(self): + price1 = tf.feature_column.sequence_numeric_column("price1", shape=2) + price2 = tf.feature_column.sequence_numeric_column("price2") + features = { + "price1": tf.SparseTensor( + indices=[ + [0, 0, 0], + [0, 0, 1], + [0, 1, 0], + [0, 1, 1], + [1, 0, 0], + [1, 0, 1], + [2, 0, 0], + [2, 0, 1], + [3, 0, 0], + [3, 0, 1], + ], + values=[ + 0.0, + 1.0, + 10.0, + 11.0, + 100.0, + 101.0, + 200.0, + 201.0, + 300.0, + 301.0, + ], + dense_shape=(4, 3, 2), + ), + "price2": tf.SparseTensor( + indices=[[0, 0], [0, 1], [1, 0], [2, 0], [3, 0]], + values=[10.0, 11.0, 20.0, 30.0, 40.0], + dense_shape=(4, 3), + ), + } + sequence_features = ksfc.SequenceFeatures([price1, price2]) + seq_input, seq_len = sequence_features(features) + self.assertEqual( + sequence_features.compute_output_shape((None, None)), + (None, None, 3), + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.tables_initializer()) - self.assertLen( - loaded_model.predict({ - 'a': inputs_a, - 'b': inputs_b - }, steps=1), batch_size) + self.assertAllClose( + [ + [[0.0, 1.0, 10.0], [10.0, 11.0, 11.0], [0.0, 0.0, 0.0]], + [[100.0, 101.0, 20.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + [[200.0, 201.0, 30.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + [[300.0, 301.0, 40.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + ], + self.evaluate(seq_input), + ) + self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SequenceFeaturesSerializationTest( + tf.test.TestCase, parameterized.TestCase +): + @parameterized.named_parameters( + ("trainable", True, "trainable"), ("not_trainable", False, "frozen") + ) + def test_get_config(self, trainable, name): + cols = [tf.feature_column.sequence_numeric_column("a")] + orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) + config = orig_layer.get_config() + + self.assertEqual(config["name"], orig_layer.name) + self.assertEqual(config["trainable"], trainable) + self.assertLen(config["feature_columns"], 1) + self.assertEqual( + config["feature_columns"][0]["class_name"], "SequenceNumericColumn" + ) + self.assertEqual(config["feature_columns"][0]["config"]["shape"], (1,)) + + @parameterized.named_parameters( + ("trainable", True, "trainable"), ("not_trainable", False, "frozen") + ) + def test_from_config(self, trainable, name): + cols = [tf.feature_column.sequence_numeric_column("a")] + orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) + config = orig_layer.get_config() + + new_layer = ksfc.SequenceFeatures.from_config(config) + self.assertEqual(new_layer.name, orig_layer.name) + self.assertEqual(new_layer.trainable, trainable) + self.assertLen(new_layer._feature_columns, 1) + self.assertEqual(new_layer._feature_columns[0].name, "a") -if __name__ == '__main__': - tf.test.main() + def test_serialization_sequence_features(self): + rating = tf.feature_column.sequence_numeric_column("rating") + sequence_feature = ksfc.SequenceFeatures([rating]) + config = keras.layers.serialize(sequence_feature) + + revived = keras.layers.deserialize(config) + self.assertIsInstance(revived, ksfc.SequenceFeatures) + + +class SequenceFeaturesSavingTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_saving_with_sequence_features(self): + cols = [ + tf.feature_column.sequence_numeric_column("a"), + tf.feature_column.indicator_column( + tf.feature_column.sequence_categorical_column_with_vocabulary_list( # noqa: E501 + "b", ["one", "two"] + ) + ), + ] + input_layers = { + "a": keras.layers.Input(shape=(None, 1), sparse=True, name="a"), + "b": keras.layers.Input( + shape=(None, 1), sparse=True, name="b", dtype="string" + ), + } + + fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) + # TODO(tibell): Figure out the right dtype and apply masking. + # sequence_length_mask = array_ops.sequence_mask(sequence_length) + # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) + x = keras.layers.GRU(32)(fc_layer) + output = keras.layers.Dense(10)(x) + + model = keras.models.Model(input_layers, output) + + model.compile( + loss=keras.losses.MSE, + optimizer="rmsprop", + metrics=[keras.metrics.categorical_accuracy], + ) + + config = model.to_json() + loaded_model = model_config.model_from_json(config) + + batch_size = 10 + timesteps = 1 + + values_a = np.arange(10, dtype=np.float32) + indices_a = np.zeros((10, 3), dtype=np.int64) + indices_a[:, 0] = np.arange(10) + inputs_a = tf.SparseTensor( + indices_a, values_a, (batch_size, timesteps, 1) + ) + + values_b = np.zeros(10, dtype=str) + indices_b = np.zeros((10, 3), dtype=np.int64) + indices_b[:, 0] = np.arange(10) + inputs_b = tf.SparseTensor( + indices_b, values_b, (batch_size, timesteps, 1) + ) + + with self.cached_session(): + # Initialize tables for V1 lookup. + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertLen( + loaded_model.predict({"a": inputs_a, "b": inputs_b}, steps=1), + batch_size, + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/initializers/BUILD b/keras/initializers/BUILD index 17b421722145..5dadf380f4c4 100644 --- a/keras/initializers/BUILD +++ b/keras/initializers/BUILD @@ -1,9 +1,11 @@ # Description: # Contains the Keras initializer API (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], @@ -14,14 +16,15 @@ py_library( name = "initializers", srcs = [ "__init__.py", + "initializers.py", "initializers_v1.py", - "initializers_v2.py", ], srcs_version = "PY3", deps = [ "//:expect_tensorflow_installed", "//keras:backend", "//keras/dtensor:utils", + "//keras/saving:serialization_lib", "//keras/utils:generic_utils", "//keras/utils:tf_inspect", ], diff --git a/keras/initializers/__init__.py b/keras/initializers/__init__.py index abb4fa36e46b..0069ca2a082e 100644 --- a/keras/initializers/__init__.py +++ b/keras/initializers/__init__.py @@ -14,108 +14,116 @@ # ============================================================================== """Keras initializer serialization / deserialization.""" -import tensorflow.compat.v2 as tf - import threading +import warnings -from tensorflow.python import tf2 +import tensorflow.compat.v2 as tf + +from keras.initializers import initializers from keras.initializers import initializers_v1 -from keras.initializers import initializers_v2 +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization from keras.utils import generic_utils from keras.utils import tf_inspect as inspect + +# isort: off +from tensorflow.python import tf2 from tensorflow.python.ops import init_ops from tensorflow.python.util.tf_export import keras_export - # LOCAL.ALL_OBJECTS is meant to be a global mutable. Hence we need to make it # thread-local to avoid concurrent mutations. LOCAL = threading.local() def populate_deserializable_objects(): - """Populates dict ALL_OBJECTS with every built-in initializer. - """ - global LOCAL - if not hasattr(LOCAL, 'ALL_OBJECTS'): + """Populates dict ALL_OBJECTS with every built-in initializer.""" + global LOCAL + if not hasattr(LOCAL, "ALL_OBJECTS"): + LOCAL.ALL_OBJECTS = {} + LOCAL.GENERATED_WITH_V2 = None + + if ( + LOCAL.ALL_OBJECTS + and LOCAL.GENERATED_WITH_V2 == tf.__internal__.tf2.enabled() + ): + # Objects dict is already generated for the proper TF version: + # do nothing. + return + LOCAL.ALL_OBJECTS = {} - LOCAL.GENERATED_WITH_V2 = None - - if LOCAL.ALL_OBJECTS and LOCAL.GENERATED_WITH_V2 == tf.__internal__.tf2.enabled(): - # Objects dict is already generated for the proper TF version: - # do nothing. - return - - LOCAL.ALL_OBJECTS = {} - LOCAL.GENERATED_WITH_V2 = tf.__internal__.tf2.enabled() - - # Compatibility aliases (need to exist in both V1 and V2). - LOCAL.ALL_OBJECTS['ConstantV2'] = initializers_v2.Constant - LOCAL.ALL_OBJECTS['GlorotNormalV2'] = initializers_v2.GlorotNormal - LOCAL.ALL_OBJECTS['GlorotUniformV2'] = initializers_v2.GlorotUniform - LOCAL.ALL_OBJECTS['HeNormalV2'] = initializers_v2.HeNormal - LOCAL.ALL_OBJECTS['HeUniformV2'] = initializers_v2.HeUniform - LOCAL.ALL_OBJECTS['IdentityV2'] = initializers_v2.Identity - LOCAL.ALL_OBJECTS['LecunNormalV2'] = initializers_v2.LecunNormal - LOCAL.ALL_OBJECTS['LecunUniformV2'] = initializers_v2.LecunUniform - LOCAL.ALL_OBJECTS['OnesV2'] = initializers_v2.Ones - LOCAL.ALL_OBJECTS['OrthogonalV2'] = initializers_v2.Orthogonal - LOCAL.ALL_OBJECTS['RandomNormalV2'] = initializers_v2.RandomNormal - LOCAL.ALL_OBJECTS['RandomUniformV2'] = initializers_v2.RandomUniform - LOCAL.ALL_OBJECTS['TruncatedNormalV2'] = initializers_v2.TruncatedNormal - LOCAL.ALL_OBJECTS['VarianceScalingV2'] = initializers_v2.VarianceScaling - LOCAL.ALL_OBJECTS['ZerosV2'] = initializers_v2.Zeros - - # Out of an abundance of caution we also include these aliases that have - # a non-zero probability of having been included in saved configs in the past. - LOCAL.ALL_OBJECTS['glorot_normalV2'] = initializers_v2.GlorotNormal - LOCAL.ALL_OBJECTS['glorot_uniformV2'] = initializers_v2.GlorotUniform - LOCAL.ALL_OBJECTS['he_normalV2'] = initializers_v2.HeNormal - LOCAL.ALL_OBJECTS['he_uniformV2'] = initializers_v2.HeUniform - LOCAL.ALL_OBJECTS['lecun_normalV2'] = initializers_v2.LecunNormal - LOCAL.ALL_OBJECTS['lecun_uniformV2'] = initializers_v2.LecunUniform - - if tf.__internal__.tf2.enabled(): - # For V2, entries are generated automatically based on the content of - # initializers_v2.py. - v2_objs = {} - base_cls = initializers_v2.Initializer - generic_utils.populate_dict_with_module_objects( - v2_objs, - [initializers_v2], - obj_filter=lambda x: inspect.isclass(x) and issubclass(x, base_cls)) - for key, value in v2_objs.items(): - LOCAL.ALL_OBJECTS[key] = value - # Functional aliases. - LOCAL.ALL_OBJECTS[generic_utils.to_snake_case(key)] = value - else: - # V1 initializers. - v1_objs = { - 'Constant': tf.compat.v1.constant_initializer, - 'GlorotNormal': tf.compat.v1.glorot_normal_initializer, - 'GlorotUniform': tf.compat.v1.glorot_uniform_initializer, - 'Identity': tf.compat.v1.initializers.identity, - 'Ones': tf.compat.v1.ones_initializer, - 'Orthogonal': tf.compat.v1.orthogonal_initializer, - 'VarianceScaling': tf.compat.v1.variance_scaling_initializer, - 'Zeros': tf.compat.v1.zeros_initializer, - 'HeNormal': initializers_v1.HeNormal, - 'HeUniform': initializers_v1.HeUniform, - 'LecunNormal': initializers_v1.LecunNormal, - 'LecunUniform': initializers_v1.LecunUniform, - 'RandomNormal': initializers_v1.RandomNormal, - 'RandomUniform': initializers_v1.RandomUniform, - 'TruncatedNormal': initializers_v1.TruncatedNormal, - } - for key, value in v1_objs.items(): - LOCAL.ALL_OBJECTS[key] = value - # Functional aliases. - LOCAL.ALL_OBJECTS[generic_utils.to_snake_case(key)] = value - - # More compatibility aliases. - LOCAL.ALL_OBJECTS['normal'] = LOCAL.ALL_OBJECTS['random_normal'] - LOCAL.ALL_OBJECTS['uniform'] = LOCAL.ALL_OBJECTS['random_uniform'] - LOCAL.ALL_OBJECTS['one'] = LOCAL.ALL_OBJECTS['ones'] - LOCAL.ALL_OBJECTS['zero'] = LOCAL.ALL_OBJECTS['zeros'] + LOCAL.GENERATED_WITH_V2 = tf.__internal__.tf2.enabled() + + # Compatibility aliases (need to exist in both V1 and V2). + LOCAL.ALL_OBJECTS["ConstantV2"] = initializers.Constant + LOCAL.ALL_OBJECTS["GlorotNormalV2"] = initializers.GlorotNormal + LOCAL.ALL_OBJECTS["GlorotUniformV2"] = initializers.GlorotUniform + LOCAL.ALL_OBJECTS["HeNormalV2"] = initializers.HeNormal + LOCAL.ALL_OBJECTS["HeUniformV2"] = initializers.HeUniform + LOCAL.ALL_OBJECTS["IdentityV2"] = initializers.Identity + LOCAL.ALL_OBJECTS["LecunNormalV2"] = initializers.LecunNormal + LOCAL.ALL_OBJECTS["LecunUniformV2"] = initializers.LecunUniform + LOCAL.ALL_OBJECTS["OnesV2"] = initializers.Ones + LOCAL.ALL_OBJECTS["OrthogonalV2"] = initializers.Orthogonal + LOCAL.ALL_OBJECTS["RandomNormalV2"] = initializers.RandomNormal + LOCAL.ALL_OBJECTS["RandomUniformV2"] = initializers.RandomUniform + LOCAL.ALL_OBJECTS["TruncatedNormalV2"] = initializers.TruncatedNormal + LOCAL.ALL_OBJECTS["VarianceScalingV2"] = initializers.VarianceScaling + LOCAL.ALL_OBJECTS["ZerosV2"] = initializers.Zeros + + # Out of an abundance of caution we also include these aliases that have + # a non-zero probability of having been included in saved configs in the + # past. + LOCAL.ALL_OBJECTS["glorot_normalV2"] = initializers.GlorotNormal + LOCAL.ALL_OBJECTS["glorot_uniformV2"] = initializers.GlorotUniform + LOCAL.ALL_OBJECTS["he_normalV2"] = initializers.HeNormal + LOCAL.ALL_OBJECTS["he_uniformV2"] = initializers.HeUniform + LOCAL.ALL_OBJECTS["lecun_normalV2"] = initializers.LecunNormal + LOCAL.ALL_OBJECTS["lecun_uniformV2"] = initializers.LecunUniform + + if tf.__internal__.tf2.enabled(): + # For V2, entries are generated automatically based on the content of + # initializers.py. + v2_objs = {} + base_cls = initializers.Initializer + generic_utils.populate_dict_with_module_objects( + v2_objs, + [initializers], + obj_filter=lambda x: inspect.isclass(x) and issubclass(x, base_cls), + ) + for key, value in v2_objs.items(): + LOCAL.ALL_OBJECTS[key] = value + # Functional aliases. + LOCAL.ALL_OBJECTS[generic_utils.to_snake_case(key)] = value + else: + # V1 initializers. + v1_objs = { + "Constant": tf.compat.v1.constant_initializer, + "GlorotNormal": tf.compat.v1.glorot_normal_initializer, + "GlorotUniform": tf.compat.v1.glorot_uniform_initializer, + "Identity": tf.compat.v1.initializers.identity, + "Ones": tf.compat.v1.ones_initializer, + "Orthogonal": tf.compat.v1.orthogonal_initializer, + "VarianceScaling": tf.compat.v1.variance_scaling_initializer, + "Zeros": tf.compat.v1.zeros_initializer, + "HeNormal": initializers_v1.HeNormal, + "HeUniform": initializers_v1.HeUniform, + "LecunNormal": initializers_v1.LecunNormal, + "LecunUniform": initializers_v1.LecunUniform, + "RandomNormal": initializers_v1.RandomNormal, + "RandomUniform": initializers_v1.RandomUniform, + "TruncatedNormal": initializers_v1.TruncatedNormal, + } + for key, value in v1_objs.items(): + LOCAL.ALL_OBJECTS[key] = value + # Functional aliases. + LOCAL.ALL_OBJECTS[generic_utils.to_snake_case(key)] = value + + # More compatibility aliases. + LOCAL.ALL_OBJECTS["normal"] = LOCAL.ALL_OBJECTS["random_normal"] + LOCAL.ALL_OBJECTS["uniform"] = LOCAL.ALL_OBJECTS["random_uniform"] + LOCAL.ALL_OBJECTS["one"] = LOCAL.ALL_OBJECTS["ones"] + LOCAL.ALL_OBJECTS["zero"] = LOCAL.ALL_OBJECTS["zeros"] # For backwards compatibility, we populate this file with the objects @@ -127,67 +135,91 @@ def populate_deserializable_objects(): # Utility functions -@keras_export('keras.initializers.serialize') -def serialize(initializer): - return generic_utils.serialize_keras_object(initializer) - - -@keras_export('keras.initializers.deserialize') -def deserialize(config, custom_objects=None): - """Return an `Initializer` object from its config.""" - populate_deserializable_objects() - return generic_utils.deserialize_keras_object( - config, - module_objects=LOCAL.ALL_OBJECTS, - custom_objects=custom_objects, - printable_module_name='initializer') - - -@keras_export('keras.initializers.get') +@keras_export("keras.initializers.serialize") +def serialize(initializer, use_legacy_format=False): + populate_deserializable_objects() + if initializer is None: + return None + if not isinstance(initializer, tuple(LOCAL.ALL_OBJECTS.values())): + warnings.warn( + "The `keras.initializers.serialize()` API should only be used for " + "objects of type `keras.initializers.Initializer`. Found an " + f"instance of type {type(initializer)}, which may lead to improper " + "serialization." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(initializer) + + return serialization_lib.serialize_keras_object(initializer) + + +@keras_export("keras.initializers.deserialize") +def deserialize(config, custom_objects=None, use_legacy_format=False): + """Return an `Initializer` object from its config.""" + populate_deserializable_objects() + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=LOCAL.ALL_OBJECTS, + custom_objects=custom_objects, + printable_module_name="initializer", + ) + + return serialization_lib.deserialize_keras_object( + config, + module_objects=LOCAL.ALL_OBJECTS, + custom_objects=custom_objects, + printable_module_name="initializer", + ) + + +@keras_export("keras.initializers.get") def get(identifier): - """Retrieve a Keras initializer by the identifier. - - The `identifier` may be the string name of a initializers function or class ( - case-sensitively). - - >>> identifier = 'Ones' - >>> tf.keras.initializers.deserialize(identifier) - <...keras.initializers.initializers_v2.Ones...> - - You can also specify `config` of the initializer to this function by passing - dict containing `class_name` and `config` as an identifier. Also note that the - `class_name` must map to a `Initializer` class. - - >>> cfg = {'class_name': 'Ones', 'config': {}} - >>> tf.keras.initializers.deserialize(cfg) - <...keras.initializers.initializers_v2.Ones...> - - In the case that the `identifier` is a class, this method will return a new - instance of the class by its constructor. - - Args: - identifier: String or dict that contains the initializer name or - configurations. - - Returns: - Initializer instance base on the input identifier. - - Raises: - ValueError: If the input identifier is not a supported type or in a bad - format. - """ - - if identifier is None: - return None - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, str): - identifier = str(identifier) - return deserialize(identifier) - elif callable(identifier): - if inspect.isclass(identifier): - identifier = identifier() - return identifier - else: - raise ValueError('Could not interpret initializer identifier: ' + - str(identifier)) + """Retrieve a Keras initializer by the identifier. + + The `identifier` may be the string name of a initializers function or class + (case-sensitively). + + >>> identifier = 'Ones' + >>> tf.keras.initializers.deserialize(identifier) + <...keras.initializers.initializers.Ones...> + + You can also specify `config` of the initializer to this function by passing + dict containing `class_name` and `config` as an identifier. Also note that + the `class_name` must map to a `Initializer` class. + + >>> cfg = {'class_name': 'Ones', 'config': {}} + >>> tf.keras.initializers.deserialize(cfg) + <...keras.initializers.initializers.Ones...> + + In the case that the `identifier` is a class, this method will return a new + instance of the class by its constructor. + + Args: + identifier: String or dict that contains the initializer name or + configurations. + + Returns: + Initializer instance base on the input identifier. + + Raises: + ValueError: If the input identifier is not a supported type or in a bad + format. + """ + + if identifier is None: + return None + if isinstance(identifier, dict): + use_legacy_format = "module" not in identifier + return deserialize(identifier, use_legacy_format=use_legacy_format) + elif isinstance(identifier, str): + config = {"class_name": str(identifier), "config": {}} + return get(config) + elif callable(identifier): + if inspect.isclass(identifier): + identifier = identifier() + return identifier + else: + raise ValueError( + "Could not interpret initializer identifier: " + str(identifier) + ) diff --git a/keras/initializers/initializers.py b/keras/initializers/initializers.py new file mode 100644 index 000000000000..8fc3da655947 --- /dev/null +++ b/keras/initializers/initializers.py @@ -0,0 +1,1191 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keras initializers.""" + +import math +import warnings + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.dtensor import utils +from keras.saving import serialization_lib + +# isort: off +from tensorflow.python.util.tf_export import keras_export + +_PARTITION_SHAPE = "partition_shape" +_PARTITION_OFFSET = "partition_offset" +_LAYOUT = "layout" +_ALLOWED_INITIALIZER_KWARGS = [_PARTITION_SHAPE, _PARTITION_OFFSET, _LAYOUT] + + +@keras_export("keras.initializers.Initializer") +class Initializer: + """Initializer base class: all Keras initializers inherit from this class. + + Initializers should implement a `__call__()` method with the following + signature: + + ```python + def __call__(self, shape, dtype=None, **kwargs): + # returns a tensor of shape `shape` and dtype `dtype` + # containing values drawn from a distribution of your choice. + return tf.random.uniform(shape=shape, dtype=dtype) + ``` + + Optionally, you an also implement the method `get_config()` and the class + method `from_config()` in order to support serialization -- just like with + any Keras object. + + Here's a simple example: a random normal initializer. + + ```python + class ExampleRandomNormal(Initializer): + def __init__(self, mean, stddev): + self.mean = mean + self.stddev = stddev + + def __call__(self, shape, dtype=None, **kwargs): + return tf.random.normal( + shape, mean=self.mean, stddev=self.stddev, dtype=dtype + ) + + def get_config(self): # To support serialization + return {"mean": self.mean, "stddev": self.stddev} + ``` + + Note that we don't have to implement `from_config()` in the example above + since the constructor arguments of the class the keys in the config returned + by `get_config` are the same. In this case, the default `from_config()` + works fine. + """ + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized as specified by the initializer. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. + **kwargs: Additional keyword arguments. + """ + raise NotImplementedError( + "Initializer subclasses must implement the `__call__()` method." + ) + + def get_config(self): + """Returns the initializer's configuration as a JSON-serializable dict. + + Returns: + A JSON-serializable Python dict. + """ + return {} + + @classmethod + def from_config(cls, config): + """Instantiates an initializer from a configuration dictionary. + + Example: + + ```python + initializer = RandomUniform(-1, 1) + config = initializer.get_config() + initializer = RandomUniform.from_config(config) + ``` + + Args: + config: A Python dictionary, the output of `get_config()`. + + Returns: + An `Initializer` instance. + """ + config.pop("dtype", None) + return cls(**config) + + def _warn_reuse(self): + if getattr(self, "_used", False): + if getattr(self, "seed", None) is None: + warnings.warn( + f"The initializer {self.__class__.__name__} is unseeded " + "and being called multiple times, which will return " + "identical values each time (even if the initializer is " + "unseeded). Please update your code to provide a seed to " + "the initializer, or avoid using the same initializer " + "instance more than once." + ) + else: + self._used = True + + +@keras_export("keras.initializers.Zeros", "keras.initializers.zeros", v1=[]) +class Zeros(Initializer): + """Initializer that generates tensors initialized to 0. + + Also available via the shortcut function `tf.keras.initializers.zeros`. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.Zeros() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.Zeros() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + """ + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized as specified by the initializer. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only numeric or boolean dtypes + are supported. If not specified, `keras.backend.floatx()` is + used, which defaults to `float32` unless you configured it + otherwise (via `keras.backend.set_floatx(float_dtype)`). + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _get_dtype(dtype) + if not dtype.is_numpy_compatible or dtype == tf.string: + raise ValueError(f"Expected numeric or boolean dtype, got {dtype}.") + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + layout = kwargs.pop("layout", None) + if layout: + return utils.call_with_layout( + tf.zeros, layout, shape=shape, dtype=dtype + ) + return tf.zeros(shape, dtype) + + +@keras_export("keras.initializers.Ones", "keras.initializers.ones", v1=[]) +class Ones(Initializer): + """Initializer that generates tensors initialized to 1. + + Also available via the shortcut function `tf.keras.initializers.ones`. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.Ones() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.Ones() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + """ + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized as specified by the initializer. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only numeric or boolean dtypes + are supported. If not specified, `keras.backend.floatx()` is + used, which defaults to `float32` unless you configured it + otherwise (via `keras.backend.set_floatx(float_dtype)`). + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _get_dtype(dtype) + if not dtype.is_numpy_compatible or dtype == tf.string: + raise ValueError(f"Expected numeric or boolean dtype, got {dtype}.") + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + layout = kwargs.pop("layout", None) + if layout: + return utils.call_with_layout( + tf.ones, layout, shape=shape, dtype=dtype + ) + return tf.ones(shape, dtype) + + +@keras_export( + "keras.initializers.Constant", "keras.initializers.constant", v1=[] +) +class Constant(Initializer): + """Initializer that generates tensors with constant values. + + Also available via the shortcut function `tf.keras.initializers.constant`. + + Only scalar values are allowed. + The constant value provided must be convertible to the dtype requested + when calling the initializer. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.Constant(3.) + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.Constant(3.) + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + value: A Python scalar. + """ + + def __init__(self, value=0): + self.value = value + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized to `self.value`. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. If not specified, + `keras.backend.floatx()` is used, + which defaults to `float32` unless you configured it + otherwise (via `keras.backend.set_floatx(float_dtype)`). + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _get_dtype(dtype) + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + layout = kwargs.pop("layout", None) + if layout: + return utils.call_with_layout( + tf.constant, layout, self.value, shape=shape, dtype=dtype + ) + return tf.constant(self.value, dtype=_get_dtype(dtype), shape=shape) + + def get_config(self): + return {"value": self.value} + + @classmethod + def from_config(cls, config): + config.pop("dtype", None) + if "value" in config: + if isinstance(config["value"], dict): + config["value"] = serialization_lib.deserialize_keras_object( + config["value"] + ) + return cls(**config) + + +@keras_export( + "keras.initializers.RandomUniform", + "keras.initializers.random_uniform", + v1=[], +) +class RandomUniform(Initializer): + """Initializer that generates tensors with a uniform distribution. + + Also available via the shortcut function + `tf.keras.initializers.random_uniform`. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.) + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.) + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + minval: A python scalar or a scalar tensor. Lower bound of the range of + random values to generate (inclusive). + maxval: A python scalar or a scalar tensor. Upper bound of the range of + random values to generate (exclusive). + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will produce the same + random values across multiple calls. + """ + + def __init__(self, minval=-0.05, maxval=0.05, seed=None): + self.minval = minval + self.maxval = maxval + self.seed = seed + self._random_generator = backend.RandomGenerator( + seed, rng_type="stateless" + ) + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized as specified by the initializer. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only floating point and integer + types are supported. If not specified, + `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise + (via `tf.keras.backend.set_floatx(float_dtype)`). + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _get_dtype(dtype) + if not dtype.is_floating and not dtype.is_integer: + raise ValueError(f"Expected float or integer dtype, got {dtype}.") + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + partition_offset = kwargs.get(_PARTITION_OFFSET, None) + if partition_offset is None: + # We skip the reuse warning for partitioned variable, since the same + # initializer will be called multiple times for each partition. + self._warn_reuse() + nonce = hash(partition_offset) if partition_offset else None + layout = kwargs.pop("layout", None) + if layout: + _ensure_keras_seeded() + return utils.call_with_layout( + self._random_generator.random_uniform, + layout, + shape, + self.minval, + self.maxval, + dtype, + nonce, + ) + return self._random_generator.random_uniform( + shape, self.minval, self.maxval, dtype, nonce + ) + + def get_config(self): + return {"minval": self.minval, "maxval": self.maxval, "seed": self.seed} + + +@keras_export( + "keras.initializers.RandomNormal", "keras.initializers.random_normal", v1=[] +) +class RandomNormal(Initializer): + """Initializer that generates tensors with a normal distribution. + + Also available via the shortcut function + `tf.keras.initializers.random_normal`. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.) + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.) + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + mean: a python scalar or a scalar tensor. Mean of the random values to + generate. + stddev: a python scalar or a scalar tensor. Standard deviation of the + random values to generate. + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will produce the same + random values across multiple calls. + """ + + def __init__(self, mean=0.0, stddev=0.05, seed=None): + self.mean = mean + self.stddev = stddev + self.seed = seed + self._random_generator = backend.RandomGenerator( + seed, rng_type="stateless" + ) + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized to random normal values. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise (via + `tf.keras.backend.set_floatx(float_dtype)`) + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _assert_float_dtype(_get_dtype(dtype)) + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + partition_offset = kwargs.get(_PARTITION_OFFSET, None) + if partition_offset is None: + # We skip the reuse warning for partitioned variable, since the same + # initializer will be called multiple times for each partition. + self._warn_reuse() + nonce = hash(partition_offset) if partition_offset else None + layout = kwargs.pop("layout", None) + if layout: + _ensure_keras_seeded() + return utils.call_with_layout( + self._random_generator.random_normal, + layout, + shape, + self.mean, + self.stddev, + dtype, + nonce, + ) + return self._random_generator.random_normal( + shape, self.mean, self.stddev, dtype, nonce + ) + + def get_config(self): + return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed} + + +@keras_export( + "keras.initializers.TruncatedNormal", + "keras.initializers.truncated_normal", + v1=[], +) +class TruncatedNormal(Initializer): + """Initializer that generates a truncated normal distribution. + + Also available via the shortcut function + `tf.keras.initializers.truncated_normal`. + + The values generated are similar to values from a + `tf.keras.initializers.RandomNormal` initializer except that values more + than two standard deviations from the mean are + discarded and re-drawn. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.) + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.) + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + mean: a python scalar or a scalar tensor. Mean of the random values + to generate. + stddev: a python scalar or a scalar tensor. Standard deviation of the + random values to generate before truncation. + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will produce the same + random values across multiple calls. + """ + + def __init__(self, mean=0.0, stddev=0.05, seed=None): + self.mean = mean + self.stddev = stddev + self.seed = seed + self._random_generator = backend.RandomGenerator( + seed, rng_type="stateless" + ) + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor initialized to random normal values (truncated). + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise (via + `tf.keras.backend.set_floatx(float_dtype)`) + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _assert_float_dtype(_get_dtype(dtype)) + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + partition_offset = kwargs.get(_PARTITION_OFFSET, None) + if partition_offset is None: + # We skip the reuse warning for partitioned variable, since the same + # initializer will be called multiple times for each partition. + self._warn_reuse() + nonce = hash(partition_offset) if partition_offset else None + layout = kwargs.pop("layout", None) + if layout: + # TODO(scottzhu): Remove this once the forward compat period above + # is expired. + self._random_generator._rng_type = ( + self._random_generator.RNG_STATEFUL + ) + _ensure_keras_seeded() + return utils.call_with_layout( + self._random_generator.truncated_normal, + layout, + shape, + self.mean, + self.stddev, + dtype, + nonce, + ) + return self._random_generator.truncated_normal( + shape, self.mean, self.stddev, dtype, nonce + ) + + def get_config(self): + return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed} + + +@keras_export( + "keras.initializers.VarianceScaling", + "keras.initializers.variance_scaling", + v1=[], +) +class VarianceScaling(Initializer): + """Initializer that adapts its scale to the shape of its input tensors. + + Also available via the shortcut function + `tf.keras.initializers.variance_scaling`. + + With `distribution="truncated_normal" or "untruncated_normal"`, samples are + drawn from a truncated/untruncated normal distribution with a mean of zero + and a standard deviation (after truncation, if used) `stddev = sqrt(scale / + n)`, where `n` is: + + - number of input units in the weight tensor, if `mode="fan_in"` + - number of output units, if `mode="fan_out"` + - average of the numbers of input and output units, if `mode="fan_avg"` + + With `distribution="uniform"`, samples are drawn from a uniform distribution + within `[-limit, limit]`, where `limit = sqrt(3 * scale / n)`. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.VarianceScaling( + ... scale=0.1, mode='fan_in', distribution='uniform') + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.VarianceScaling( + ... scale=0.1, mode='fan_in', distribution='uniform') + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + scale: Scaling factor (positive float). + mode: One of `"fan_in"`, `"fan_out"`, `"fan_avg"`. + distribution: Random distribution to use. One of `"truncated_normal"`, + `"untruncated_normal"`, or `"uniform"`. + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will produce the same + random values across multiple calls. + """ + + def __init__( + self, + scale=1.0, + mode="fan_in", + distribution="truncated_normal", + seed=None, + ): + if scale <= 0.0: + raise ValueError( + f"`scale` must be positive float. Received: scale={scale}." + ) + allowed_modes = {"fan_in", "fan_out", "fan_avg"} + if mode not in allowed_modes: + raise ValueError( + f"Invalid `mode` argument: {mode}. " + f"Please use one of the {allowed_modes}." + ) + distribution = distribution.lower() + # Compatibility with keras-team/keras. + if distribution == "normal": + distribution = "truncated_normal" + allowed_distributions = { + "uniform", + "truncated_normal", + "untruncated_normal", + } + if distribution not in allowed_distributions: + raise ValueError( + f"Invalid `distribution` argument: {distribution}." + f"Allowed distributions: {allowed_distributions}." + ) + self.scale = scale + self.mode = mode + self.distribution = distribution + self.seed = seed + self._random_generator = backend.RandomGenerator( + seed, rng_type="stateless" + ) + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized as specified by the initializer. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise (via + `tf.keras.backend.set_floatx(float_dtype)`) + **kwargs: Additional keyword arguments. + """ + _validate_kwargs(self.__class__.__name__, kwargs) + dtype = _assert_float_dtype(_get_dtype(dtype)) + if _PARTITION_SHAPE in kwargs: + shape = kwargs[_PARTITION_SHAPE] + partition_offset = kwargs.get(_PARTITION_OFFSET, None) + if partition_offset is None: + # We skip the reuse warning for partitioned variable, since the same + # initializer will be called multiple times for each partition. + self._warn_reuse() + nonce = hash(partition_offset) if partition_offset else None + layout = kwargs.pop("layout", None) + if layout: + _ensure_keras_seeded() + return utils.call_with_layout( + self._generate_init_val, + layout, + shape=shape, + dtype=dtype, + nonce=nonce, + ) + return self._generate_init_val(shape=shape, dtype=dtype, nonce=nonce) + + def _generate_init_val(self, shape, dtype, nonce): + scale = self.scale + fan_in, fan_out = _compute_fans(shape) + if self.mode == "fan_in": + scale /= max(1.0, fan_in) + elif self.mode == "fan_out": + scale /= max(1.0, fan_out) + else: + scale /= max(1.0, (fan_in + fan_out) / 2.0) + if self.distribution == "truncated_normal": + # constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., + # scale=1.) + stddev = math.sqrt(scale) / 0.87962566103423978 + return self._random_generator.truncated_normal( + shape, 0.0, stddev, dtype, nonce + ) + elif self.distribution == "untruncated_normal": + stddev = math.sqrt(scale) + return self._random_generator.random_normal( + shape, 0.0, stddev, dtype, nonce + ) + else: + limit = math.sqrt(3.0 * scale) + return self._random_generator.random_uniform( + shape, -limit, limit, dtype, nonce + ) + + def get_config(self): + return { + "scale": self.scale, + "mode": self.mode, + "distribution": self.distribution, + "seed": self.seed, + } + + +@keras_export( + "keras.initializers.Orthogonal", "keras.initializers.orthogonal", v1=[] +) +class Orthogonal(Initializer): + """Initializer that generates an orthogonal matrix. + + Also available via the shortcut function `tf.keras.initializers.orthogonal`. + + If the shape of the tensor to initialize is two-dimensional, it is + initialized with an orthogonal matrix obtained from the QR decomposition of + a matrix of random numbers drawn from a normal distribution. If the matrix + has fewer rows than columns then the output will have orthogonal rows. + Otherwise, the output will have orthogonal columns. + + If the shape of the tensor to initialize is more than two-dimensional, + a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` + is initialized, where `n` is the length of the shape vector. + The matrix is subsequently reshaped to give a tensor of the desired shape. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.Orthogonal() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.Orthogonal() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + gain: multiplicative factor to apply to the orthogonal matrix + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will produce the same + random values across multiple calls. + + References: + - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C) + """ + + def __init__(self, gain=1.0, seed=None): + self.gain = gain + self.seed = seed + self._random_generator = backend.RandomGenerator( + seed, rng_type="stateless" + ) + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized to an orthogonal matrix. + + Args: + shape: Shape of the tensor. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise + (via `tf.keras.backend.set_floatx(float_dtype)`) + **kwargs: Additional keyword arguments. + """ + _validate_kwargs( + self.__class__.__name__, kwargs, support_partition=False + ) + dtype = _assert_float_dtype(_get_dtype(dtype)) + # Check the shape + if len(shape) < 2: + raise ValueError( + "The tensor to initialize must be " + "at least two-dimensional. Received: " + f"shape={shape} of rank {len(shape)}." + ) + self._warn_reuse() + layout = kwargs.pop("layout", None) + if layout: + _ensure_keras_seeded() + return utils.call_with_layout( + self._generate_init_val, layout, shape=shape, dtype=dtype + ) + return self._generate_init_val(shape, dtype) + + def _generate_init_val(self, shape, dtype): + # Flatten the input shape with the last dimension remaining + # its original shape so it works for conv2d + num_rows = 1 + for dim in shape[:-1]: + num_rows *= dim + num_cols = shape[-1] + flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows)) + + # Generate a random matrix + a = self._random_generator.random_normal(flat_shape, dtype=dtype) + # Compute the qr factorization + q, r = tf.linalg.qr(a, full_matrices=False) + # Make Q uniform + d = tf.linalg.tensor_diag_part(r) + q *= tf.sign(d) + if num_rows < num_cols: + q = tf.linalg.matrix_transpose(q) + return self.gain * tf.reshape(q, shape) + + def get_config(self): + return {"gain": self.gain, "seed": self.seed} + + +@keras_export( + "keras.initializers.Identity", "keras.initializers.identity", v1=[] +) +class Identity(Initializer): + """Initializer that generates the identity matrix. + + Also available via the shortcut function `tf.keras.initializers.identity`. + + Only usable for generating 2D matrices. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.Identity() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.Identity() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + gain: Multiplicative factor to apply to the identity matrix. + """ + + def __init__(self, gain=1.0): + self.gain = gain + + def __call__(self, shape, dtype=None, **kwargs): + """Returns a tensor object initialized to a 2D identity matrix. + + Args: + shape: Shape of the tensor. It should have exactly rank 2. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `tf.keras.backend.floatx()` is used, + which default to `float32` unless you configured it otherwise + (via `tf.keras.backend.set_floatx(float_dtype)`) + **kwargs: Additional keyword arguments. + """ + _validate_kwargs( + self.__class__.__name__, kwargs, support_partition=False + ) + dtype = _assert_float_dtype(_get_dtype(dtype)) + if len(shape) != 2: + raise ValueError( + "Identity matrix initializer can only be used for 2D matrices. " + f"Received: shape={shape} of rank {len(shape)}." + ) + layout = kwargs.pop("layout", None) + if layout: + return utils.call_with_layout( + self._generate_init_val, layout, shape=shape, dtype=dtype + ) + return self._generate_init_val(shape, dtype) + + def _generate_init_val(self, shape, dtype): + initializer = tf.eye(*shape, dtype=dtype) + return self.gain * initializer + + def get_config(self): + return {"gain": self.gain} + + +@keras_export( + "keras.initializers.GlorotUniform", + "keras.initializers.glorot_uniform", + v1=[], +) +class GlorotUniform(VarianceScaling): + """The Glorot uniform initializer, also called Xavier uniform initializer. + + Also available via the shortcut function + `tf.keras.initializers.glorot_uniform`. + + Draws samples from a uniform distribution within `[-limit, limit]`, where + `limit = sqrt(6 / (fan_in + fan_out))` (`fan_in` is the number of input + units in the weight tensor and `fan_out` is the number of output units). + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.GlorotUniform() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.GlorotUniform() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will not produce the same + random values across multiple calls, but multiple initializers will + produce the same sequence when constructed with the same seed value. + + References: + - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html) + """ + + def __init__(self, seed=None): + super().__init__( + scale=1.0, mode="fan_avg", distribution="uniform", seed=seed + ) + + def get_config(self): + return {"seed": self.seed} + + +@keras_export( + "keras.initializers.GlorotNormal", "keras.initializers.glorot_normal", v1=[] +) +class GlorotNormal(VarianceScaling): + """The Glorot normal initializer, also called Xavier normal initializer. + + Also available via the shortcut function + `tf.keras.initializers.glorot_normal`. + + Draws samples from a truncated normal distribution centered on 0 with + `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of + input units in the weight tensor and `fan_out` is the number of output units + in the weight tensor. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.GlorotNormal() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.GlorotNormal() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will not produce the same + random values across multiple calls, but multiple initializers will + produce the same sequence when constructed with the same seed value. + + References: + - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html) + """ + + def __init__(self, seed=None): + super().__init__( + scale=1.0, + mode="fan_avg", + distribution="truncated_normal", + seed=seed, + ) + + def get_config(self): + return {"seed": self.seed} + + +@keras_export( + "keras.initializers.LecunNormal", "keras.initializers.lecun_normal", v1=[] +) +class LecunNormal(VarianceScaling): + """Lecun normal initializer. + + Also available via the shortcut function + `tf.keras.initializers.lecun_normal`. + + Initializers allow you to pre-specify an initialization strategy, encoded in + the Initializer object, without knowing the shape and dtype of the variable + being initialized. + + Draws samples from a truncated normal distribution centered on 0 with + `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of input units in + the weight tensor. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.LecunNormal() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.LecunNormal() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will not produce the same + random values across multiple calls, but multiple initializers will + produce the same sequence when constructed with the same seed value. + + References: + - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) + """ + + def __init__(self, seed=None): + super().__init__( + scale=1.0, mode="fan_in", distribution="truncated_normal", seed=seed + ) + + def get_config(self): + return {"seed": self.seed} + + +@keras_export( + "keras.initializers.LecunUniform", "keras.initializers.lecun_uniform", v1=[] +) +class LecunUniform(VarianceScaling): + """Lecun uniform initializer. + + Also available via the shortcut function + `tf.keras.initializers.lecun_uniform`. + + Draws samples from a uniform distribution within `[-limit, limit]`, where + `limit = sqrt(3 / fan_in)` (`fan_in` is the number of input units in the + weight tensor). + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.LecunUniform() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.LecunUniform() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will not produce the same + random values across multiple calls, but multiple initializers will + produce the same sequence when constructed with the same seed value. + + References: + - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) + """ + + def __init__(self, seed=None): + super().__init__( + scale=1.0, mode="fan_in", distribution="uniform", seed=seed + ) + + def get_config(self): + return {"seed": self.seed} + + +@keras_export( + "keras.initializers.HeNormal", "keras.initializers.he_normal", v1=[] +) +class HeNormal(VarianceScaling): + """He normal initializer. + + Also available via the shortcut function + `tf.keras.initializers.he_normal`. + + It draws samples from a truncated normal distribution centered on 0 with + `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in + the weight tensor. + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.HeNormal() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.HeNormal() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will not produce the same + random values across multiple calls, but multiple initializers will + produce the same sequence when constructed with the same seed value. + + References: + - [He et al., 2015](https://arxiv.org/abs/1502.01852) + """ + + def __init__(self, seed=None): + super().__init__( + scale=2.0, mode="fan_in", distribution="truncated_normal", seed=seed + ) + + def get_config(self): + return {"seed": self.seed} + + +@keras_export( + "keras.initializers.HeUniform", "keras.initializers.he_uniform", v1=[] +) +class HeUniform(VarianceScaling): + """He uniform variance scaling initializer. + + Also available via the shortcut function + `tf.keras.initializers.he_uniform`. + + Draws samples from a uniform distribution within `[-limit, limit]`, where + `limit = sqrt(6 / fan_in)` (`fan_in` is the number of input units in the + weight tensor). + + Examples: + + >>> # Standalone usage: + >>> initializer = tf.keras.initializers.HeUniform() + >>> values = initializer(shape=(2, 2)) + + >>> # Usage in a Keras layer: + >>> initializer = tf.keras.initializers.HeUniform() + >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) + + Args: + seed: A Python integer. Used to make the behavior of the initializer + deterministic. Note that a seeded initializer will not produce the same + random values across multiple calls, but multiple initializers will + produce the same sequence when constructed with the same seed value. + + References: + - [He et al., 2015](https://arxiv.org/abs/1502.01852) + """ + + def __init__(self, seed=None): + super().__init__( + scale=2.0, mode="fan_in", distribution="uniform", seed=seed + ) + + def get_config(self): + return {"seed": self.seed} + + +def _get_dtype(dtype): + if dtype is None: + dtype = backend.floatx() + return tf.as_dtype(dtype) + + +def _assert_float_dtype(dtype): + """Validate and return floating point type based on `dtype`. + + `dtype` must be a floating point type. + + Args: + dtype: The data type to validate. + + Returns: + Validated type. + + Raises: + ValueError: if `dtype` is not a floating point type. + """ + dtype = tf.as_dtype(dtype) + if not dtype.is_floating: + raise ValueError(f"Expected floating point type, got {dtype}.") + return dtype + + +def _compute_fans(shape): + """Computes the number of input and output units for a weight shape. + + Args: + shape: Integer shape tuple or TF tensor shape. + + Returns: + A tuple of integer scalars (fan_in, fan_out). + """ + if len(shape) < 1: # Just to avoid errors for constants. + fan_in = fan_out = 1 + elif len(shape) == 1: + fan_in = fan_out = shape[0] + elif len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + else: + # Assuming convolution kernels (2D, 3D, or more). + # kernel shape: (..., input_depth, depth) + receptive_field_size = 1 + for dim in shape[:-2]: + receptive_field_size *= dim + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + return int(fan_in), int(fan_out) + + +def _validate_kwargs(cls_name, kwargs, support_partition=True): + invalid_kwargs = [k for k in kwargs if k not in _ALLOWED_INITIALIZER_KWARGS] + if invalid_kwargs: + raise TypeError( + f"Unknown keyword arguments: {invalid_kwargs}. Allowed " + f"keyword arguments: {_ALLOWED_INITIALIZER_KWARGS}." + ) + if not support_partition and ( + _PARTITION_SHAPE in kwargs or _PARTITION_OFFSET in kwargs + ): + raise ValueError( + f"{cls_name} initializer doesn't support " + "partition-related arguments." + ) + + +def _ensure_keras_seeded(): + """Make sure the keras.backend global seed generator is set. + + This is important for DTensor use case to ensure that each client are + initialized with same seed for tf.random.Generator, so that the value + created are in sync among all the clients. + """ + if not getattr(backend._SEED_GENERATOR, "generator", None): + raise ValueError( + "When using DTensor APIs, you need to set the global seed " + "before using any Keras initializers. Please make sure " + "to call `tf.keras.utils.set_random_seed()` in your code." + ) diff --git a/keras/initializers/initializers_test.py b/keras/initializers/initializers_test.py index b460aab6b727..a45f54f6d0de 100644 --- a/keras/initializers/initializers_test.py +++ b/keras/initializers/initializers_test.py @@ -14,296 +14,312 @@ # ============================================================================== """Tests for Keras initializers.""" +import warnings + +import tensorflow.compat.v2 as tf from absl.testing import parameterized -import numpy as np from keras import backend -from keras.testing_infra import test_combinations from keras import initializers from keras import models -from keras.testing_infra import test_utils from keras.engine import input_layer from keras.layers import core +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf +RANDOM_INITIALIZERS = [ + initializers.RandomUniformV2, + initializers.RandomNormalV2, + initializers.OrthogonalV2, + # TODO(scottzhu): Enable this after the forward compat period expires for + # TruncatedNormalV2 + # initializers.TruncatedNormalV2, + initializers.VarianceScalingV2, + initializers.LecunUniformV2, + initializers.LecunNormalV2, + initializers.GlorotUniformV2, + initializers.GlorotNormalV2, + initializers.HeNormalV2, + initializers.HeUniformV2, +] def _compute_fans(shape): - """Computes the number of input and output units for a weight shape. - - Args: - shape: Integer shape tuple or TF tensor shape. - - Returns: - A tuple of integer scalars (fan_in, fan_out). - """ - if len(shape) < 1: # Just to avoid errors for constants. - fan_in = fan_out = 1 - elif len(shape) == 1: - fan_in = fan_out = shape[0] - elif len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - else: - # Assuming convolution kernels (2D, 3D, or more). - # kernel shape: (..., input_depth, depth) - receptive_field_size = 1 - for dim in shape[:-2]: - receptive_field_size *= dim - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - return int(fan_in), int(fan_out) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + """Computes the number of input and output units for a weight shape. + + Args: + shape: Integer shape tuple or TF tensor shape. + + Returns: + A tuple of integer scalars (fan_in, fan_out). + """ + if len(shape) < 1: # Just to avoid errors for constants. + fan_in = fan_out = 1 + elif len(shape) == 1: + fan_in = fan_out = shape[0] + elif len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + else: + # Assuming convolution kernels (2D, 3D, or more). + # kernel shape: (..., input_depth, depth) + receptive_field_size = 1 + for dim in shape[:-2]: + receptive_field_size *= dim + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + return int(fan_in), int(fan_out) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class KerasInitializersTest(tf.test.TestCase, parameterized.TestCase): - - def _runner(self, init, shape, target_mean=None, target_std=None, - target_max=None, target_min=None): - # The global seed is set so that we can get the same random streams between - # eager and graph mode when stateful op is used. - tf.random.set_seed(1337) - variable = backend.variable(init(shape)) - output = backend.get_value(variable) - # Test serialization (assumes deterministic behavior). - config = init.get_config() - reconstructed_init = init.__class__.from_config(config) - - tf.random.set_seed(1337) - variable = backend.variable(reconstructed_init(shape)) - output_2 = backend.get_value(variable) - self.assertAllClose(output, output_2, atol=1e-4) - - def test_uniform(self): - tensor_shape = (3, 2, 3) - with self.cached_session(): - self._runner( - initializers.RandomUniformV2(minval=-1, maxval=1, seed=124), - tensor_shape, - target_mean=0., - target_max=1, - target_min=-1) - - def test_normal(self): - tensor_shape = (8, 12, 99) - with self.cached_session(): - self._runner( - initializers.RandomNormalV2(mean=0, stddev=1, seed=153), - tensor_shape, - target_mean=0., - target_std=1) - - def test_truncated_normal(self): - tensor_shape = (12, 99, 7) - with self.cached_session(): - self._runner( - initializers.TruncatedNormalV2(mean=0, stddev=1, seed=126), - tensor_shape, - target_mean=0., - target_max=2, - target_min=-2) - - def test_constant(self): - tensor_shape = (5, 6, 4) - with self.cached_session(): - self._runner( - initializers.ConstantV2(2.), - tensor_shape, - target_mean=2, - target_max=2, - target_min=2) - - def test_lecun_uniform(self): - tensor_shape = (5, 6, 4, 2) - with self.cached_session(): - fan_in, _ = _compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - self._runner( - initializers.LecunUniformV2(seed=123), - tensor_shape, - target_mean=0., - target_std=std) - - def test_glorot_uniform(self): - tensor_shape = (5, 6, 4, 2) - with self.cached_session(): - fan_in, fan_out = _compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - self._runner( - initializers.GlorotUniformV2(seed=123), - tensor_shape, - target_mean=0., - target_std=std) - - def test_he_uniform(self): - tensor_shape = (5, 6, 4, 2) - with self.cached_session(): - fan_in, _ = _compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - self._runner( - initializers.HeUniformV2(seed=123), - tensor_shape, - target_mean=0., - target_std=std) - - def test_lecun_normal(self): - tensor_shape = (5, 6, 4, 2) - with self.cached_session(): - fan_in, _ = _compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - self._runner( - initializers.LecunNormalV2(seed=123), - tensor_shape, - target_mean=0., - target_std=std) - - def test_glorot_normal(self): - tensor_shape = (5, 6, 4, 2) - with self.cached_session(): - fan_in, fan_out = _compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - self._runner( - initializers.GlorotNormalV2(seed=123), - tensor_shape, - target_mean=0., - target_std=std) - - def test_he_normal(self): - tensor_shape = (5, 6, 4, 2) - with self.cached_session(): - fan_in, _ = _compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - self._runner( - initializers.HeNormalV2(seed=123), - tensor_shape, - target_mean=0., - target_std=std) - - def test_orthogonal(self): - tensor_shape = (20, 20) - with self.cached_session(): - self._runner( - initializers.OrthogonalV2(seed=123), tensor_shape, target_mean=0.) - - def test_identity(self): - with self.cached_session(): - tensor_shape = (3, 4, 5) - with self.assertRaises(ValueError): - self._runner( - initializers.IdentityV2(), - tensor_shape, - target_mean=1. / tensor_shape[0], - target_max=1.) - - tensor_shape = (3, 3) - self._runner( - initializers.IdentityV2(), - tensor_shape, - target_mean=1. / tensor_shape[0], - target_max=1.) - - def test_zero(self): - tensor_shape = (4, 5) - with self.cached_session(): - self._runner( - initializers.ZerosV2(), tensor_shape, target_mean=0., target_max=0.) - - def test_one(self): - tensor_shape = (4, 5) - with self.cached_session(): - self._runner( - initializers.OnesV2(), tensor_shape, target_mean=1., target_max=1.) - - def test_default_random_uniform(self): - ru = initializers.get('uniform') - self.assertEqual(ru.minval, -0.05) - self.assertEqual(ru.maxval, 0.05) - - def test_default_random_normal(self): - rn = initializers.get('normal') - self.assertEqual(rn.mean, 0.0) - self.assertEqual(rn.stddev, 0.05) - - def test_default_truncated_normal(self): - tn = initializers.get('truncated_normal') - self.assertEqual(tn.mean, 0.0) - self.assertEqual(tn.stddev, 0.05) - - def test_custom_initializer_saving(self): - - def my_initializer(shape, dtype=None): - return tf.ones(shape, dtype=dtype) - - inputs = input_layer.Input((10,)) - outputs = core.Dense(1, kernel_initializer=my_initializer)(inputs) - model = models.Model(inputs, outputs) - model2 = model.from_config( - model.get_config(), custom_objects={'my_initializer': my_initializer}) - self.assertEqual(model2.layers[1].kernel_initializer, my_initializer) - - @test_utils.run_v2_only - def test_load_external_variance_scaling_v2(self): - external_serialized_json = { - 'class_name': 'VarianceScaling', - 'config': { - 'distribution': 'normal', - 'mode': 'fan_avg', - 'scale': 1.0, - 'seed': None + def _runner( + self, + init, + shape, + ): + # The global seed is set so that we can get the same random streams + # between eager and graph mode when stateful op is used. + tf.random.set_seed(1337) + variable = backend.variable(init(shape)) + output = backend.get_value(variable) + # Test serialization (assumes deterministic behavior). + config = init.get_config() + reconstructed_init = init.__class__.from_config(config) + + tf.random.set_seed(1337) + variable = backend.variable(reconstructed_init(shape)) + output_2 = backend.get_value(variable) + self.assertAllClose(output, output_2, atol=1e-4) + + def test_uniform(self): + tensor_shape = (3, 2, 3) + with self.cached_session(): + self._runner( + initializers.RandomUniformV2(minval=-1, maxval=1, seed=124), + tensor_shape, + ) + + def test_normal(self): + tensor_shape = (8, 12, 99) + with self.cached_session(): + self._runner( + initializers.RandomNormalV2(mean=0, stddev=1, seed=153), + tensor_shape, + ) + + def test_truncated_normal(self): + tensor_shape = (12, 99, 7) + with self.cached_session(): + self._runner( + initializers.TruncatedNormalV2(mean=0, stddev=1, seed=126), + tensor_shape, + ) + + def test_constant(self): + tensor_shape = (5, 6, 4) + with self.cached_session(): + self._runner(initializers.ConstantV2(2.0), tensor_shape) + + def test_lecun_uniform(self): + tensor_shape = (5, 6, 4, 2) + with self.cached_session(): + self._runner(initializers.LecunUniformV2(seed=123), tensor_shape) + + def test_glorot_uniform(self): + tensor_shape = (5, 6, 4, 2) + with self.cached_session(): + self._runner(initializers.GlorotUniformV2(seed=123), tensor_shape) + + def test_he_uniform(self): + tensor_shape = (5, 6, 4, 2) + with self.cached_session(): + self._runner(initializers.HeUniformV2(seed=123), tensor_shape) + + def test_lecun_normal(self): + tensor_shape = (5, 6, 4, 2) + with self.cached_session(): + self._runner(initializers.LecunNormalV2(seed=123), tensor_shape) + + def test_glorot_normal(self): + tensor_shape = (5, 6, 4, 2) + with self.cached_session(): + self._runner(initializers.GlorotNormalV2(seed=123), tensor_shape) + + def test_he_normal(self): + tensor_shape = (5, 6, 4, 2) + with self.cached_session(): + self._runner(initializers.HeNormalV2(seed=123), tensor_shape) + + def test_orthogonal(self): + tensor_shape = (20, 20) + with self.cached_session(): + self._runner(initializers.OrthogonalV2(seed=123), tensor_shape) + + def test_identity(self): + with self.cached_session(): + tensor_shape = (3, 4, 5) + with self.assertRaises(ValueError): + self._runner(initializers.IdentityV2(), tensor_shape) + + tensor_shape = (3, 3) + self._runner(initializers.IdentityV2(), tensor_shape) + + def test_zero(self): + tensor_shape = (4, 5) + with self.cached_session(): + self._runner(initializers.ZerosV2(), tensor_shape) + + def test_one(self): + tensor_shape = (4, 5) + with self.cached_session(): + self._runner(initializers.OnesV2(), tensor_shape) + + def test_default_random_uniform(self): + ru = initializers.get("uniform") + self.assertEqual(ru.minval, -0.05) + self.assertEqual(ru.maxval, 0.05) + + def test_default_random_normal(self): + rn = initializers.get("normal") + self.assertEqual(rn.mean, 0.0) + self.assertEqual(rn.stddev, 0.05) + + def test_default_truncated_normal(self): + tn = initializers.get("truncated_normal") + self.assertEqual(tn.mean, 0.0) + self.assertEqual(tn.stddev, 0.05) + + def test_custom_initializer_saving(self): + def my_initializer(shape, dtype=None): + return tf.ones(shape, dtype=dtype) + + inputs = input_layer.Input((10,)) + outputs = core.Dense(1, kernel_initializer=my_initializer)(inputs) + model = models.Model(inputs, outputs) + model2 = model.from_config( + model.get_config(), + custom_objects={"my_initializer": my_initializer}, + ) + self.assertEqual(model2.layers[1].kernel_initializer, my_initializer) + + @test_utils.run_v2_only + def test_load_external_variance_scaling_v2(self): + external_serialized_json = { + "class_name": "VarianceScaling", + "config": { + "distribution": "normal", + "mode": "fan_avg", + "scale": 1.0, + "seed": None, + }, } - } - initializer = initializers.deserialize(external_serialized_json) - self.assertEqual(initializer.distribution, 'truncated_normal') - - @parameterized.named_parameters( - ('Zeros', initializers.ZerosV2, {}), - ('Ones', initializers.OnesV2, {}), - ('Constant', initializers.ConstantV2, {}), - ('RandomUniform', initializers.RandomUniformV2, {}), - ('RandomUniform_seeded', initializers.RandomUniformV2, {'seed': 123}), - ('RandomNormal', initializers.RandomNormalV2, {}), - ('RandomNormal_seeded', initializers.RandomNormalV2, {'seed': 123}), - ('TruncatedNormal', initializers.TruncatedNormalV2, {}), - ('TruncatedNormal_seeded', initializers.TruncatedNormalV2, {'seed': 123}), - ('LecunUniform', initializers.LecunUniformV2, {}), - ('LecunUniform_seeded', initializers.LecunUniformV2, {'seed': 123}), - ('GlorotUniform', initializers.GlorotUniformV2, {}), - ('GlorotUniform_seeded', initializers.GlorotUniformV2, {'seed': 123}), - ('HeUniform', initializers.HeUniformV2, {}), - ('HeUniform_seeded', initializers.HeUniformV2, {'seed': 123}), - ) - def test_partition(self, initializer_cls, kwargs): - with self.cached_session(): - initializer = initializer_cls(**kwargs) - result = initializer( - shape=(4, 2), partition_shape=(2, 2), partition_offset=(0, 0)) - self.assertEqual(result.shape, (2, 2)) - - if hasattr(initializer, 'seed'): - # Make sure the result are different when the partition_shape is same, - # but partition_offset is different, for random related initializers. - result_2 = initializer( - shape=(4, 2), partition_shape=(2, 2), partition_offset=(1, 0)) - self.assertNotAllClose(result, result_2) - - # Make sure initializer produce same result when provide same - # partition offset. - # TODO(scottzhu): Enable this assert when initializer is fully stateless - # result_3 = initializer( - # shape=(4, 2), partition_shape=(2, 2), partition_offset=(1, 0)) - # self.assertAllClose(result_2, result_3) - - @parameterized.named_parameters( - ('Orthogonal', initializers.OrthogonalV2), - ('Identity', initializers.IdentityV2), - ) - def test_partition_unsupported(self, initializer_cls): - with self.assertRaisesRegex( - ValueError, - "initializer doesn't support partition-related arguments"): - initializer_cls()( - shape=(4, 2), partition_shape=(2, 2), partition_offset=(0, 0)) - - -if __name__ == '__main__': - tf.test.main() + initializer = initializers.deserialize(external_serialized_json) + self.assertEqual(initializer.distribution, "truncated_normal") + + @parameterized.named_parameters( + ("Zeros", initializers.ZerosV2, {}), + ("Ones", initializers.OnesV2, {}), + ("Constant", initializers.ConstantV2, {}), + ("RandomUniform", initializers.RandomUniformV2, {}), + ("RandomUniform_seeded", initializers.RandomUniformV2, {"seed": 123}), + ("RandomNormal", initializers.RandomNormalV2, {}), + ("RandomNormal_seeded", initializers.RandomNormalV2, {"seed": 123}), + # TODO(scottzhu): Enable these tests after the forward compat period + # expires for TruncatedNormalV2. + # ("TruncatedNormal", initializers.TruncatedNormalV2, {}), + # ( + # "TruncatedNormal_seeded", + # initializers.TruncatedNormalV2, + # {"seed": 123}, + # ), + ("LecunUniform", initializers.LecunUniformV2, {}), + ("LecunUniform_seeded", initializers.LecunUniformV2, {"seed": 123}), + ("GlorotUniform", initializers.GlorotUniformV2, {}), + ("GlorotUniform_seeded", initializers.GlorotUniformV2, {"seed": 123}), + ("HeUniform", initializers.HeUniformV2, {}), + ("HeUniform_seeded", initializers.HeUniformV2, {"seed": 123}), + ) + def test_partition(self, initializer_cls, kwargs): + with self.cached_session(): + initializer = initializer_cls(**kwargs) + result = initializer( + shape=(4, 2), partition_shape=(2, 2), partition_offset=(0, 0) + ) + self.assertEqual(result.shape, (2, 2)) + + if hasattr(initializer, "seed"): + # Make sure the result are different when the partition_shape is + # same, but partition_offset is different, for random related + # initializers. + result_2 = initializer( + shape=(4, 2), + partition_shape=(2, 2), + partition_offset=(1, 0), + ) + self.assertNotAllClose(result, result_2) + + # Make sure initializer produce same result when provide same + # partition offset. + result_3 = initializer( + shape=(4, 2), + partition_shape=(2, 2), + partition_offset=(1, 0), + ) + self.assertAllClose(result_2, result_3) + + @parameterized.named_parameters( + ("Orthogonal", initializers.OrthogonalV2), + ("Identity", initializers.IdentityV2), + ) + def test_partition_unsupported(self, initializer_cls): + with self.assertRaisesRegex( + ValueError, + "initializer doesn't support partition-related arguments", + ): + initializer_cls()( + shape=(4, 2), partition_shape=(2, 2), partition_offset=(0, 0) + ) + + @parameterized.parameters(RANDOM_INITIALIZERS) + def test_stateless(self, initializer_cl): + with self.cached_session(): + initializer = initializer_cl() + output1 = initializer(shape=[2, 3]) + output2 = initializer(shape=[2, 3]) + initializer2 = initializer_cl() + output3 = initializer2(shape=[2, 3]) + output4 = initializer2(shape=[2, 3]) + + self.assertAllClose(output1, output2) + self.assertAllClose(output3, output4) + self.assertNotAllClose(output1, output3) + + with warnings.catch_warnings(record=True) as w: + initializer(shape=[2, 3]) + self.assertLen(w, 1) + self.assertIn("being called multiple times", str(w[0].message)) + + @parameterized.parameters(RANDOM_INITIALIZERS) + def test_seed_stateless(self, initializer_cl): + with self.cached_session(): + seed = 1337 + initializer = initializer_cl(seed=seed) + output1 = initializer(shape=[2, 3]) + output2 = initializer(shape=[2, 3]) + initializer2 = initializer_cl(seed=seed) + output3 = initializer2(shape=[2, 3]) + output4 = initializer2(shape=[2, 3]) + + self.assertAllClose(output1, output2) + self.assertAllClose(output3, output4) + self.assertAllClose(output1, output3) + + # We don't raise warning for seeded initializer. + with warnings.catch_warnings(record=True) as w: + initializer(shape=[2, 3]) + self.assertEmpty(w) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/initializers/initializers_v1.py b/keras/initializers/initializers_v1.py index d48cdfb3d280..ccac2d3a664a 100644 --- a/keras/initializers/initializers_v1.py +++ b/keras/initializers/initializers_v1.py @@ -13,11 +13,12 @@ # limitations under the License. # ============================================================================== """Keras initializers for TF 1.""" -# pylint:disable=g-classes-have-attributes + import tensorflow.compat.v2 as tf -from tensorflow.python.util.tf_export import keras_export +# isort: off +from tensorflow.python.util.tf_export import keras_export _v1_zeros_initializer = tf.compat.v1.zeros_initializer _v1_ones_initializer = tf.compat.v1.ones_initializer @@ -28,372 +29,282 @@ _v1_glorot_uniform_initializer = tf.compat.v1.glorot_uniform_initializer _v1_glorot_normal_initializer = tf.compat.v1.glorot_normal_initializer -keras_export(v1=['keras.initializers.Zeros', 'keras.initializers.zeros'], allow_multiple_exports=True)( - _v1_zeros_initializer) -keras_export(v1=['keras.initializers.Ones', 'keras.initializers.ones'], allow_multiple_exports=True)( - _v1_ones_initializer) -keras_export(v1=['keras.initializers.Constant', 'keras.initializers.constant'], allow_multiple_exports=True)( - _v1_constant_initializer) -keras_export(v1=['keras.initializers.VarianceScaling'], allow_multiple_exports=True)( - _v1_variance_scaling_initializer) -keras_export(v1=['keras.initializers.Orthogonal', - 'keras.initializers.orthogonal'], allow_multiple_exports=True)(_v1_orthogonal_initializer) -keras_export(v1=['keras.initializers.Identity', - 'keras.initializers.identity'], allow_multiple_exports=True)(_v1_identity) -keras_export(v1=['keras.initializers.glorot_uniform'], allow_multiple_exports=True)( - _v1_glorot_uniform_initializer) -keras_export(v1=['keras.initializers.glorot_normal'], allow_multiple_exports=True)( - _v1_glorot_normal_initializer) - - -@keras_export(v1=['keras.initializers.RandomNormal', - 'keras.initializers.random_normal', - 'keras.initializers.normal']) +keras_export(v1=["keras.initializers.Zeros", "keras.initializers.zeros"])( + _v1_zeros_initializer +) +keras_export(v1=["keras.initializers.Ones", "keras.initializers.ones"])( + _v1_ones_initializer +) +keras_export(v1=["keras.initializers.Constant", "keras.initializers.constant"])( + _v1_constant_initializer +) +keras_export(v1=["keras.initializers.VarianceScaling"])( + _v1_variance_scaling_initializer +) +keras_export( + v1=["keras.initializers.Orthogonal", "keras.initializers.orthogonal"] +)(_v1_orthogonal_initializer) +keras_export(v1=["keras.initializers.Identity", "keras.initializers.identity"])( + _v1_identity +) +keras_export(v1=["keras.initializers.glorot_uniform"])( + _v1_glorot_uniform_initializer +) +keras_export(v1=["keras.initializers.glorot_normal"])( + _v1_glorot_normal_initializer +) + + +@keras_export( + v1=[ + "keras.initializers.RandomNormal", + "keras.initializers.random_normal", + "keras.initializers.normal", + ] +) class RandomNormal(tf.compat.v1.random_normal_initializer): - """Initializer that generates a normal distribution. - - Args: - mean: a python scalar or a scalar tensor. Mean of the random values to - generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the random - values to generate. - seed: A Python integer. Used to create random seeds. See - `tf.compat.v1.set_random_seed` for behavior. - dtype: Default data type, used if no `dtype` argument is provided when - calling the initializer. Only floating point types are supported. - - @compatibility(TF2) - Although it is a legacy compat.v1 api, - `tf.compat.v1.keras.initializers.RandomNormal` is compatible with eager - execution and `tf.function`. - - To switch to native TF2, switch to using - `tf.keras.initializers.RandomNormal` (not from `compat.v1`) and - if you need to change the default dtype use - `tf.keras.backend.set_floatx(float_dtype)` - or pass the dtype when calling the initializer, rather than passing it - when constructing the initializer. - - Random seed behavior: - Also be aware that if you pass a seed to the TF2 initializer - API it will reuse that same seed for every single initialization - (unlike the TF1 initializer) - - #### Structural Mapping to Native TF2 - - Before: - - ```python - initializer = tf.compat.v1.keras.initializers.RandomNormal( - mean=mean, - stddev=stddev, - seed=seed, - dtype=dtype) - - weight_one = tf.Variable(initializer(shape_one)) - weight_two = tf.Variable(initializer(shape_two)) - ``` - - After: - - ```python - initializer = tf.keras.initializers.RandomNormal( - mean=mean, - # seed=seed, # Setting a seed in the native TF2 API - # causes it to produce the same initializations - # across multiple calls of the same initializer. - stddev=stddev) - - weight_one = tf.Variable(initializer(shape_one, dtype=dtype)) - weight_two = tf.Variable(initializer(shape_two, dtype=dtype)) - ``` - - #### How to Map Arguments - - | TF1 Arg Name | TF2 Arg Name | Note | - | :---------------- | :-------------- | :------------------------- | - | `mean` | `mean` | No change to defaults | - | `stddev` | `stddev` | No change to defaults | - | `seed` | `seed` | Different random number generation | - : : : semantics (to change in a : - : : : future version). If set, the TF2 version : - : : : will use stateless random number : - : : : generation which will produce the exact : - : : : same initialization even across multiple : - : : : calls of the initializer instance. the : - : : : `compat.v1` version will generate new : - : : : initializations each time. Do not set : - : : : a seed if you need different : - : : : initializations each time. Instead : - : : : either set a global tf seed with : - : : : `tf.random.set_seed` if you need : - : : : determinism, or initialize each weight: - : : : with a separate initializer instance : - : : : and a different seed. : - | `dtype` | `dtype` | The TF2 native api only takes it | - : : : as a `__call__` arg, not a constructor arg. : - | `partition_info` | - | (`__call__` arg in TF1) Not supported | - - #### Example of fixed-seed behavior differences - - `compat.v1` Fixed seed behavior: - - >>> initializer = tf.compat.v1.keras.initializers.TruncatedNormal(seed=10) - >>> a = initializer(shape=(2, 2)) - >>> b = initializer(shape=(2, 2)) - >>> tf.reduce_sum(a - b) == 0 - - - After: - - >>> initializer = tf.keras.initializers.TruncatedNormal(seed=10) - >>> a = initializer(shape=(2, 2)) - >>> b = initializer(shape=(2, 2)) - >>> tf.reduce_sum(a - b) == 0 - - - @end_compatibility - """ - - def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=tf.float32): - super().__init__( - mean=mean, stddev=stddev, seed=seed, dtype=dtype) - - -@keras_export(v1=['keras.initializers.RandomUniform', - 'keras.initializers.random_uniform', - 'keras.initializers.uniform']) + """Initializer that generates a normal distribution. + + Args: + mean: a python scalar or a scalar tensor. Mean of the random values to + generate. + stddev: a python scalar or a scalar tensor. Standard deviation of the + random values to generate. + seed: A Python integer. Used to create random seeds. See + `tf.compat.v1.set_random_seed` for behavior. + dtype: Default data type, used if no `dtype` argument is provided when + calling the initializer. Only floating point types are supported. + + @compatibility(TF2) + Although it is a legacy compat.v1 api, + `tf.compat.v1.keras.initializers.RandomNormal` is compatible with eager + execution and `tf.function`. + + To switch to native TF2, switch to using + `tf.keras.initializers.RandomNormal` (not from `compat.v1`) and + if you need to change the default dtype use + `tf.keras.backend.set_floatx(float_dtype)` + or pass the dtype when calling the initializer, rather than passing it + when constructing the initializer. + + Random seed behavior: + Also be aware that if you pass a seed to the TF2 initializer + API it will reuse that same seed for every single initialization + (unlike the TF1 initializer) + + #### Structural Mapping to Native TF2 + + Before: + + ```python + initializer = tf.compat.v1.keras.initializers.RandomNormal( + mean=mean, + stddev=stddev, + seed=seed, + dtype=dtype) + + weight_one = tf.Variable(initializer(shape_one)) + weight_two = tf.Variable(initializer(shape_two)) + ``` + + After: + + ```python + initializer = tf.keras.initializers.RandomNormal( + mean=mean, + # seed=seed, # Setting a seed in the native TF2 API + # causes it to produce the same initializations + # across multiple calls of the same initializer. + stddev=stddev) + + weight_one = tf.Variable(initializer(shape_one, dtype=dtype)) + weight_two = tf.Variable(initializer(shape_two, dtype=dtype)) + ``` + + #### How to Map Arguments + + | TF1 Arg Name | TF2 Arg Name | Note | + | :---------------- | :-------------- | :------------------------- | + | `mean` | `mean` | No change to defaults | + | `stddev` | `stddev` | No change to defaults | + | `seed` | `seed` | Different random number generation | + : : : semantics (to change in a : + : : : future version). If set, the TF2 version : + : : : will use stateless random number : + : : : generation which will produce the exact : + : : : same initialization even across multiple : + : : : calls of the initializer instance. the : + : : : `compat.v1` version will generate new : + : : : initializations each time. Do not set : + : : : a seed if you need different : + : : : initializations each time. Instead : + : : : either set a global tf seed with : + : : : `tf.random.set_seed` if you need : + : : : determinism, or initialize each weight: + : : : with a separate initializer instance : + : : : and a different seed. : + | `dtype` | `dtype` | The TF2 native api only takes it | + : : : as a `__call__` arg, not a constructor arg. : + | `partition_info` | - | (`__call__` arg in TF1) Not supported | + + #### Example of fixed-seed behavior differences + + `compat.v1` Fixed seed behavior: + + >>> initializer = tf.compat.v1.keras.initializers.RandomNormal(seed=10) + >>> a = initializer(shape=(2, 2)) + >>> b = initializer(shape=(2, 2)) + >>> tf.reduce_sum(a - b) == 0 + + + After: + + >>> initializer = tf.keras.initializers.RandomNormal(seed=10) + >>> a = initializer(shape=(2, 2)) + >>> b = initializer(shape=(2, 2)) + >>> tf.reduce_sum(a - b) == 0 + + + @end_compatibility + """ + + def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=tf.float32): + super().__init__(mean=mean, stddev=stddev, seed=seed, dtype=dtype) + + +@keras_export( + v1=[ + "keras.initializers.RandomUniform", + "keras.initializers.random_uniform", + "keras.initializers.uniform", + ] +) class RandomUniform(tf.compat.v1.random_uniform_initializer): - """Initializer that generates tensors with a uniform distribution. - - Args: - minval: A python scalar or a scalar tensor. Lower bound of the range of - random values to generate. - maxval: A python scalar or a scalar tensor. Upper bound of the range of - random values to generate. Defaults to 1 for float types. - seed: A Python integer. Used to create random seeds. See - `tf.compat.v1.set_random_seed` for behavior. - dtype: Default data type, used if no `dtype` argument is provided when - calling the initializer. - - @compatibility(TF2) - Although it is a legacy `compat.v1` api, - `tf.compat.v1.keras.initializers.RandomUniform` is compatible with eager - execution and `tf.function`. - - To switch to native TF2, switch to using - `tf.keras.initializers.RandomUniform` (not from `compat.v1`) and - if you need to change the default dtype use - `tf.keras.backend.set_floatx(float_dtype)` - or pass the dtype when calling the initializer, rather than passing it - when constructing the initializer. - - Random seed behavior: - - Also be aware that if you pass a seed to the TF2 initializer - API it will reuse that same seed for every single initialization - (unlike the TF1 initializer) - - #### Structural Mapping to Native TF2 - - Before: - - ```python - - initializer = tf.compat.v1.keras.initializers.RandomUniform( - minval=minval, - maxval=maxval, - seed=seed, - dtype=dtype) - - weight_one = tf.Variable(initializer(shape_one)) - weight_two = tf.Variable(initializer(shape_two)) - ``` - - After: - - ```python - initializer = tf.keras.initializers.RandomUniform( - minval=minval, - maxval=maxval, - # seed=seed, # Setting a seed in the native TF2 API - # causes it to produce the same initializations - # across multiple calls of the same initializer. - ) - - weight_one = tf.Variable(initializer(shape_one, dtype=dtype)) - weight_two = tf.Variable(initializer(shape_two, dtype=dtype)) - ``` - - #### How to Map Arguments - - | TF1 Arg Name | TF2 Arg Name | Note | - | :---------------- | :-------------- | :------------------------- | - | `minval` | `minval` | No change to defaults | - | `maxval` | `maxval` | No change to defaults | - | `seed` | `seed` | Different random number generation | - : : : semantics (to change in a : - : : : future version). If set, the TF2 version : - : : : will use stateless random number : - : : : generation which will produce the exact : - : : : same initialization even across multiple : - : : : calls of the initializer instance. the : - : : : `compat.v1` version will generate new : - : : : initializations each time. Do not set : - : : : a seed if you need different : - : : : initializations each time. Instead : - : : : either set a global tf seed with - : : : `tf.random.set_seed` if you need : - : : : determinism, or initialize each weight : - : : : with a separate initializer instance : - : : : and a different seed. : - | `dtype` | `dtype` | The TF2 native api only takes it | - : : : as a `__call__` arg, not a constructor arg. : - | `partition_info` | - | (`__call__` arg in TF1) Not supported | - - #### Example of fixed-seed behavior differences - - `compat.v1` Fixed seed behavior: - - >>> initializer = tf.compat.v1.keras.initializers.RandomUniform(seed=10) - >>> a = initializer(shape=(2, 2)) - >>> b = initializer(shape=(2, 2)) - >>> tf.reduce_sum(a - b) == 0 - - - After: - - >>> initializer = tf.keras.initializers.RandomUniform(seed=10) - >>> a = initializer(shape=(2, 2)) - >>> b = initializer(shape=(2, 2)) - >>> tf.reduce_sum(a - b) == 0 - - - @end_compatibility - """ - - def __init__(self, minval=-0.05, maxval=0.05, seed=None, - dtype=tf.float32): - super().__init__( - minval=minval, maxval=maxval, seed=seed, dtype=dtype) - - -@keras_export(v1=['keras.initializers.TruncatedNormal', - 'keras.initializers.truncated_normal']) + """Initializer that generates tensors with a uniform distribution. + + Args: + minval: A python scalar or a scalar tensor. Lower bound of the range of + random values to generate. Defaults to `-0.05`. + maxval: A python scalar or a scalar tensor. Upper bound of the range of + random values to generate. Defaults to `0.05`. + seed: A Python integer. Used to create random seeds. See + `tf.compat.v1.set_random_seed` for behavior. + dtype: Default data type, used if no `dtype` argument is provided when + calling the initializer. + + @compatibility(TF2) + Although it is a legacy `compat.v1` api, + `tf.compat.v1.keras.initializers.RandomUniform` is compatible with eager + execution and `tf.function`. + + To switch to native TF2, switch to using + `tf.keras.initializers.RandomUniform` (not from `compat.v1`) and + if you need to change the default dtype use + `tf.keras.backend.set_floatx(float_dtype)` + or pass the dtype when calling the initializer, rather than passing it + when constructing the initializer. + + Random seed behavior: + + Also be aware that if you pass a seed to the TF2 initializer + API it will reuse that same seed for every single initialization + (unlike the TF1 initializer) + + #### Structural Mapping to Native TF2 + + Before: + + ```python + + initializer = tf.compat.v1.keras.initializers.RandomUniform( + minval=minval, + maxval=maxval, + seed=seed, + dtype=dtype) + + weight_one = tf.Variable(initializer(shape_one)) + weight_two = tf.Variable(initializer(shape_two)) + ``` + + After: + + ```python + initializer = tf.keras.initializers.RandomUniform( + minval=minval, + maxval=maxval, + # seed=seed, # Setting a seed in the native TF2 API + # causes it to produce the same initializations + # across multiple calls of the same initializer. + ) + + weight_one = tf.Variable(initializer(shape_one, dtype=dtype)) + weight_two = tf.Variable(initializer(shape_two, dtype=dtype)) + ``` + + #### How to Map Arguments + + | TF1 Arg Name | TF2 Arg Name | Note | + | :---------------- | :-------------- | :------------------------- | + | `minval` | `minval` | No change to defaults | + | `maxval` | `maxval` | No change to defaults | + | `seed` | `seed` | Different random number generation | + : : : semantics (to change in a : + : : : future version). If set, the TF2 version : + : : : will use stateless random number : + : : : generation which will produce the exact : + : : : same initialization even across multiple : + : : : calls of the initializer instance. the : + : : : `compat.v1` version will generate new : + : : : initializations each time. Do not set : + : : : a seed if you need different : + : : : initializations each time. Instead : + : : : either set a global tf seed with + : : : `tf.random.set_seed` if you need : + : : : determinism, or initialize each weight : + : : : with a separate initializer instance : + : : : and a different seed. : + | `dtype` | `dtype` | The TF2 native api only takes it | + : : : as a `__call__` arg, not a constructor arg. : + | `partition_info` | - | (`__call__` arg in TF1) Not supported | + + #### Example of fixed-seed behavior differences + + `compat.v1` Fixed seed behavior: + + >>> initializer = tf.compat.v1.keras.initializers.RandomUniform(seed=10) + >>> a = initializer(shape=(2, 2)) + >>> b = initializer(shape=(2, 2)) + >>> tf.reduce_sum(a - b) == 0 + + + After: + + >>> initializer = tf.keras.initializers.RandomUniform(seed=10) + >>> a = initializer(shape=(2, 2)) + >>> b = initializer(shape=(2, 2)) + >>> tf.reduce_sum(a - b) == 0 + + + @end_compatibility + """ + + def __init__(self, minval=-0.05, maxval=0.05, seed=None, dtype=tf.float32): + super().__init__(minval=minval, maxval=maxval, seed=seed, dtype=dtype) + + +@keras_export( + v1=[ + "keras.initializers.TruncatedNormal", + "keras.initializers.truncated_normal", + ] +) class TruncatedNormal(tf.compat.v1.truncated_normal_initializer): - """Initializer that generates a truncated normal distribution. - - These values are similar to values from a `random_normal_initializer` - except that values more than two standard deviations from the mean - are discarded and re-drawn. This is the recommended initializer for - neural network weights and filters. - - Args: - mean: a python scalar or a scalar tensor. Mean of the random values to - generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed: A Python integer. Used to create random seeds. See - `tf.compat.v1.set_random_seed` for behavior. - dtype: Default data type, used if no `dtype` argument is provided when - calling the initializer. Only floating point types are supported. - - @compatibility(TF2) - Although it is a legacy compat.v1 api, - `tf.compat.v1.keras.initializers.TruncatedNormal` is compatible with eager - execution and `tf.function`. - - To switch to native TF2, switch to using - `tf.keras.initializers.TruncatedNormal` (not from `compat.v1`) and - if you need to change the default dtype use - `tf.keras.backend.set_floatx(float_dtype)` - or pass the dtype when calling the initializer, rather than passing it - when constructing the initializer. - - Random seed behavior: - Also be aware that if you pass a seed to the TF2 initializer - API it will reuse that same seed for every single initialization - (unlike the TF1 initializer) - - #### Structural Mapping to Native TF2 - - Before: - - ```python - initializer = tf.compat.v1.keras.initializers.TruncatedNormal( - mean=mean, - stddev=stddev, - seed=seed, - dtype=dtype) - - weight_one = tf.Variable(initializer(shape_one)) - weight_two = tf.Variable(initializer(shape_two)) - ``` - - After: - - ```python - initializer = tf.keras.initializers.TruncatedNormal( - mean=mean, - # seed=seed, # Setting a seed in the native TF2 API - # causes it to produce the same initializations - # across multiple calls of the same initializer. - stddev=stddev) - - weight_one = tf.Variable(initializer(shape_one, dtype=dtype)) - weight_two = tf.Variable(initializer(shape_two, dtype=dtype)) - ``` - - #### How to Map Arguments - - | TF1 Arg Name | TF2 Arg Name | Note | - | :---------------- | :-------------- | :------------------------- | - | `mean` | `mean` | No change to defaults | - | `stddev` | `stddev` | No change to defaults | - | `seed` | `seed` | Different random number generation | - : : : semantics (to change in a : - : : : future version). If set, the TF2 version : - : : : will use stateless random number : - : : : generation which will produce the exact : - : : : same initialization even across multiple : - : : : calls of the initializer instance. the : - : : : `compat.v1` version will generate new : - : : : initializations each time. Do not set : - : : : a seed if you need different : - : : : initializations each time. Instead : - : : : either set a global tf seed with - : : : `tf.random.set_seed` if you need : - : : : determinism, or initialize each weight : - : : : with a separate initializer instance : - : : : and a different seed. : - | `dtype` | `dtype` | The TF2 native api only takes it | - : : : as a `__call__` arg, not a constructor arg. : - | `partition_info` | - | (`__call__` arg in TF1) Not supported | - - #### Example of fixed-seed behavior differences - - `compat.v1` Fixed seed behavior: - - >>> initializer = tf.compat.v1.keras.initializers.TruncatedNormal(seed=10) - >>> a = initializer(shape=(2, 2)) - >>> b = initializer(shape=(2, 2)) - >>> tf.reduce_sum(a - b) == 0 - - - After: - - >>> initializer = tf.keras.initializers.TruncatedNormal(seed=10) - >>> a = initializer(shape=(2, 2)) - >>> b = initializer(shape=(2, 2)) - >>> tf.reduce_sum(a - b) == 0 - - - @end_compatibility - """ - - def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=tf.float32): """Initializer that generates a truncated normal distribution. + These values are similar to values from a `random_normal_initializer` + except that values more than two standard deviations from the mean + are discarded and re-drawn. This is the recommended initializer for + neural network weights and filters. Args: mean: a python scalar or a scalar tensor. Mean of the random values to @@ -404,50 +315,156 @@ def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=tf.float32): `tf.compat.v1.set_random_seed` for behavior. dtype: Default data type, used if no `dtype` argument is provided when calling the initializer. Only floating point types are supported. + + @compatibility(TF2) + Although it is a legacy compat.v1 api, + `tf.compat.v1.keras.initializers.TruncatedNormal` is compatible with eager + execution and `tf.function`. + + To switch to native TF2, switch to using + `tf.keras.initializers.TruncatedNormal` (not from `compat.v1`) and + if you need to change the default dtype use + `tf.keras.backend.set_floatx(float_dtype)` + or pass the dtype when calling the initializer, rather than passing it + when constructing the initializer. + + Random seed behavior: + Also be aware that if you pass a seed to the TF2 initializer + API it will reuse that same seed for every single initialization + (unlike the TF1 initializer) + + #### Structural Mapping to Native TF2 + + Before: + + ```python + initializer = tf.compat.v1.keras.initializers.TruncatedNormal( + mean=mean, + stddev=stddev, + seed=seed, + dtype=dtype) + + weight_one = tf.Variable(initializer(shape_one)) + weight_two = tf.Variable(initializer(shape_two)) + ``` + + After: + + ```python + initializer = tf.keras.initializers.TruncatedNormal( + mean=mean, + # seed=seed, # Setting a seed in the native TF2 API + # causes it to produce the same initializations + # across multiple calls of the same initializer. + stddev=stddev) + + weight_one = tf.Variable(initializer(shape_one, dtype=dtype)) + weight_two = tf.Variable(initializer(shape_two, dtype=dtype)) + ``` + + #### How to Map Arguments + + | TF1 Arg Name | TF2 Arg Name | Note | + | :---------------- | :-------------- | :------------------------- | + | `mean` | `mean` | No change to defaults | + | `stddev` | `stddev` | No change to defaults | + | `seed` | `seed` | Different random number generation | + : : : semantics (to change in a : + : : : future version). If set, the TF2 version : + : : : will use stateless random number : + : : : generation which will produce the exact : + : : : same initialization even across multiple : + : : : calls of the initializer instance. the : + : : : `compat.v1` version will generate new : + : : : initializations each time. Do not set : + : : : a seed if you need different : + : : : initializations each time. Instead : + : : : either set a global tf seed with + : : : `tf.random.set_seed` if you need : + : : : determinism, or initialize each weight : + : : : with a separate initializer instance : + : : : and a different seed. : + | `dtype` | `dtype` | The TF2 native api only takes it | + : : : as a `__call__` arg, not a constructor arg. : + | `partition_info` | - | (`__call__` arg in TF1) Not supported | + + #### Example of fixed-seed behavior differences + + `compat.v1` Fixed seed behavior: + + >>> initializer = tf.compat.v1.keras.initializers.TruncatedNormal(seed=10) + >>> a = initializer(shape=(2, 2)) + >>> b = initializer(shape=(2, 2)) + >>> tf.reduce_sum(a - b) == 0 + + + After: + + >>> initializer = tf.keras.initializers.TruncatedNormal(seed=10) + >>> a = initializer(shape=(2, 2)) + >>> b = initializer(shape=(2, 2)) + >>> tf.reduce_sum(a - b) == 0 + + + @end_compatibility """ - super().__init__( - mean=mean, stddev=stddev, seed=seed, dtype=dtype) + def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=tf.float32): + """Initializer that generates a truncated normal distribution. -@keras_export(v1=['keras.initializers.lecun_normal']) -class LecunNormal(tf.compat.v1.variance_scaling_initializer): - def __init__(self, seed=None): - super().__init__( - scale=1., mode='fan_in', distribution='truncated_normal', seed=seed) + Args: + mean: a python scalar or a scalar tensor. Mean of the random values to + generate. + stddev: a python scalar or a scalar tensor. Standard deviation of the + random values to generate. + seed: A Python integer. Used to create random seeds. See + `tf.compat.v1.set_random_seed` for behavior. + dtype: Default data type, used if no `dtype` argument is provided when + calling the initializer. Only floating point types are supported. + """ + super().__init__(mean=mean, stddev=stddev, seed=seed, dtype=dtype) - def get_config(self): - return {'seed': self.seed} +@keras_export(v1=["keras.initializers.lecun_normal"]) +class LecunNormal(tf.compat.v1.variance_scaling_initializer): + def __init__(self, seed=None): + super().__init__( + scale=1.0, mode="fan_in", distribution="truncated_normal", seed=seed + ) -@keras_export(v1=['keras.initializers.lecun_uniform']) -class LecunUniform(tf.compat.v1.variance_scaling_initializer): + def get_config(self): + return {"seed": self.seed} - def __init__(self, seed=None): - super().__init__( - scale=1., mode='fan_in', distribution='uniform', seed=seed) - def get_config(self): - return {'seed': self.seed} +@keras_export(v1=["keras.initializers.lecun_uniform"]) +class LecunUniform(tf.compat.v1.variance_scaling_initializer): + def __init__(self, seed=None): + super().__init__( + scale=1.0, mode="fan_in", distribution="uniform", seed=seed + ) + def get_config(self): + return {"seed": self.seed} -@keras_export(v1=['keras.initializers.he_normal']) -class HeNormal(tf.compat.v1.variance_scaling_initializer): - def __init__(self, seed=None): - super().__init__( - scale=2., mode='fan_in', distribution='truncated_normal', seed=seed) +@keras_export(v1=["keras.initializers.he_normal"]) +class HeNormal(tf.compat.v1.variance_scaling_initializer): + def __init__(self, seed=None): + super().__init__( + scale=2.0, mode="fan_in", distribution="truncated_normal", seed=seed + ) - def get_config(self): - return {'seed': self.seed} + def get_config(self): + return {"seed": self.seed} -@keras_export(v1=['keras.initializers.he_uniform']) +@keras_export(v1=["keras.initializers.he_uniform"]) class HeUniform(tf.compat.v1.variance_scaling_initializer): + def __init__(self, seed=None): + super().__init__( + scale=2.0, mode="fan_in", distribution="uniform", seed=seed + ) - def __init__(self, seed=None): - super().__init__( - scale=2., mode='fan_in', distribution='uniform', seed=seed) - - def get_config(self): - return {'seed': self.seed} + def get_config(self): + return {"seed": self.seed} diff --git a/keras/initializers/initializers_v2.py b/keras/initializers/initializers_v2.py deleted file mode 100644 index 8048f158e99d..000000000000 --- a/keras/initializers/initializers_v2.py +++ /dev/null @@ -1,1098 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras initializers for TF 2.""" -# pylint: disable=g-classes-have-attributes, missing-docstring, g-direct-tensorflow-import - -import math - -from keras import backend -from keras.dtensor import utils - -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - -_PARTITION_SHAPE = 'partition_shape' -_PARTITION_OFFSET = 'partition_offset' -_LAYOUT = 'layout' -_ALLOWED_INITIALIZER_KWARGS = [_PARTITION_SHAPE, _PARTITION_OFFSET, _LAYOUT] - - -@keras_export('keras.initializers.Initializer') -class Initializer: - """Initializer base class: all Keras initializers inherit from this class. - - Initializers should implement a `__call__` method with the following - signature: - - ```python - def __call__(self, shape, dtype=None, **kwargs): - # returns a tensor of shape `shape` and dtype `dtype` - # containing values drawn from a distribution of your choice. - ``` - - Optionally, you an also implement the method `get_config` and the class - method `from_config` in order to support serialization -- just like with - any Keras object. - - Here's a simple example: a random normal initializer. - - ```python - import tensorflow as tf - - class ExampleRandomNormal(tf.keras.initializers.Initializer): - - def __init__(self, mean, stddev): - self.mean = mean - self.stddev = stddev - - def __call__(self, shape, dtype=None, **kwargs): - return tf.random.normal( - shape, mean=self.mean, stddev=self.stddev, dtype=dtype) - - def get_config(self): # To support serialization - return {"mean": self.mean, "stddev": self.stddev} - ``` - - Note that we don't have to implement `from_config` in the example above since - the constructor arguments of the class the keys in the config returned by - `get_config` are the same. In this case, the default `from_config` - works fine. - """ - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized as specified by the initializer. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. - **kwargs: Additional keyword arguments. - """ - raise NotImplementedError('Initializer subclasses must implement the ' - '`__call__()` method.') - - def get_config(self): - """Returns the configuration of the initializer as a JSON-serializable dict. - - Returns: - A JSON-serializable Python dict. - """ - return {} - - @classmethod - def from_config(cls, config): - """Instantiates an initializer from a configuration dictionary. - - Example: - - ```python - initializer = RandomUniform(-1, 1) - config = initializer.get_config() - initializer = RandomUniform.from_config(config) - ``` - - Args: - config: A Python dictionary, the output of `get_config`. - - Returns: - A `tf.keras.initializers.Initializer` instance. - """ - config.pop('dtype', None) - return cls(**config) - - -@keras_export('keras.initializers.Zeros', 'keras.initializers.zeros', v1=[]) -class Zeros(Initializer): - """Initializer that generates tensors initialized to 0. - - Also available via the shortcut function `tf.keras.initializers.zeros`. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.Zeros() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.Zeros() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - """ - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized as specified by the initializer. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only numeric or boolean dtypes are - supported. If not specified, `tf.keras.backend.floatx()` is used, - which default to `float32` unless you configured it otherwise - (via `tf.keras.backend.set_floatx(float_dtype)`). - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _get_dtype(dtype) - if not dtype.is_numpy_compatible or dtype == tf.string: - raise ValueError(f'Expected numeric or boolean dtype, got {dtype}.') - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - layout = kwargs.pop('layout', None) - if layout: - return utils.call_with_layout(tf.zeros, layout, shape=shape, dtype=dtype) - return tf.zeros(shape, dtype) - - -@keras_export('keras.initializers.Ones', 'keras.initializers.ones', v1=[]) -class Ones(Initializer): - """Initializer that generates tensors initialized to 1. - - Also available via the shortcut function `tf.keras.initializers.ones`. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.Ones() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.Ones() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - """ - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized as specified by the initializer. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only numeric or boolean dtypes are - supported. If not specified, `tf.keras.backend.floatx()` is used, - which default to `float32` unless you configured it otherwise - (via `tf.keras.backend.set_floatx(float_dtype)`). - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _get_dtype(dtype) - if not dtype.is_numpy_compatible or dtype == tf.string: - raise ValueError(f'Expected numeric or boolean dtype, got {dtype}.') - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - layout = kwargs.pop('layout', None) - if layout: - return utils.call_with_layout(tf.ones, layout, shape=shape, dtype=dtype) - return tf.ones(shape, dtype) - - -@keras_export('keras.initializers.Constant', - 'keras.initializers.constant', - v1=[]) -class Constant(Initializer): - """Initializer that generates tensors with constant values. - - Also available via the shortcut function `tf.keras.initializers.constant`. - - Only scalar values are allowed. - The constant value provided must be convertible to the dtype requested - when calling the initializer. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.Constant(3.) - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.Constant(3.) - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - value: A Python scalar. - """ - - def __init__(self, value=0): - self.value = value - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized to `self.value`. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. If not specified, - `tf.keras.backend.floatx()` is used, - which default to `float32` unless you configured it otherwise - (via `tf.keras.backend.set_floatx(float_dtype)`). - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _get_dtype(dtype) - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - layout = kwargs.pop('layout', None) - if layout: - return utils.call_with_layout(tf.constant, layout, self.value, - shape=shape, dtype=dtype) - return tf.constant( - self.value, dtype=_get_dtype(dtype), shape=shape) - - def get_config(self): - return {'value': self.value} - - -@keras_export('keras.initializers.RandomUniform', - 'keras.initializers.random_uniform', - v1=[]) -class RandomUniform(Initializer): - """Initializer that generates tensors with a uniform distribution. - - Also available via the shortcut function - `tf.keras.initializers.random_uniform`. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.) - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.) - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - minval: A python scalar or a scalar tensor. Lower bound of the range of - random values to generate (inclusive). - maxval: A python scalar or a scalar tensor. Upper bound of the range of - random values to generate (exclusive). - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - """ - - def __init__(self, minval=-0.05, maxval=0.05, seed=None): - self.minval = minval - self.maxval = maxval - self.seed = seed - self._random_generator = backend.RandomGenerator(seed) - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized as specified by the initializer. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only floating point and integer - types are supported. If not specified, - `tf.keras.backend.floatx()` is used, - which default to `float32` unless you configured it otherwise - (via `tf.keras.backend.set_floatx(float_dtype)`). - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _get_dtype(dtype) - if not dtype.is_floating and not dtype.is_integer: - raise ValueError(f'Expected float or integer dtype, got {dtype}.') - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - partition_offset = kwargs.get(_PARTITION_OFFSET, None) - nonce = hash(partition_offset) if partition_offset else None - layout = kwargs.pop('layout', None) - if layout: - self._random_generator._rng_type = self._random_generator.RNG_STATEFUL - _ensure_keras_seeded() - return utils.call_with_layout( - self._random_generator.random_uniform, layout, shape, self.minval, - self.maxval, dtype, nonce) - return self._random_generator.random_uniform( - shape, self.minval, self.maxval, dtype, nonce) - - def get_config(self): - return { - 'minval': self.minval, - 'maxval': self.maxval, - 'seed': self.seed - } - - -@keras_export('keras.initializers.RandomNormal', - 'keras.initializers.random_normal', - v1=[]) -class RandomNormal(Initializer): - """Initializer that generates tensors with a normal distribution. - - Also available via the shortcut function - `tf.keras.initializers.random_normal`. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.) - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.) - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - mean: a python scalar or a scalar tensor. Mean of the random values to - generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the random - values to generate. - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - """ - - def __init__(self, mean=0.0, stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - self._random_generator = backend.RandomGenerator(seed) - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized to random normal values. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, which - default to `float32` unless you configured it otherwise (via - `tf.keras.backend.set_floatx(float_dtype)`) - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _assert_float_dtype(_get_dtype(dtype)) - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - partition_offset = kwargs.get(_PARTITION_OFFSET, None) - nonce = hash(partition_offset) if partition_offset else None - layout = kwargs.pop('layout', None) - if layout: - self._random_generator._rng_type = self._random_generator.RNG_STATEFUL - _ensure_keras_seeded() - return utils.call_with_layout( - self._random_generator.random_normal, layout, shape, self.mean, - self.stddev, dtype, nonce) - return self._random_generator.random_normal( - shape, self.mean, self.stddev, dtype, nonce) - - def get_config(self): - return { - 'mean': self.mean, - 'stddev': self.stddev, - 'seed': self.seed - } - - -@keras_export('keras.initializers.TruncatedNormal', - 'keras.initializers.truncated_normal', - v1=[]) -class TruncatedNormal(Initializer): - """Initializer that generates a truncated normal distribution. - - Also available via the shortcut function - `tf.keras.initializers.truncated_normal`. - - The values generated are similar to values from a - `tf.keras.initializers.RandomNormal` initializer except that values more - than two standard deviations from the mean are - discarded and re-drawn. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.) - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.) - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - mean: a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the - random values to generate before truncation. - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - """ - - def __init__(self, mean=0.0, stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - self._random_generator = backend.RandomGenerator(seed) - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized to random normal values (truncated). - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, which - default to `float32` unless you configured it otherwise (via - `tf.keras.backend.set_floatx(float_dtype)`) - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _assert_float_dtype(_get_dtype(dtype)) - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - partition_offset = kwargs.get(_PARTITION_OFFSET, None) - nonce = hash(partition_offset) if partition_offset else None - layout = kwargs.pop('layout', None) - if layout: - self._random_generator._rng_type = self._random_generator.RNG_STATEFUL - _ensure_keras_seeded() - return utils.call_with_layout( - self._random_generator.truncated_normal, layout, shape, self.mean, - self.stddev, dtype, nonce) - return self._random_generator.truncated_normal( - shape, self.mean, self.stddev, dtype, nonce) - - def get_config(self): - return { - 'mean': self.mean, - 'stddev': self.stddev, - 'seed': self.seed - } - - -@keras_export('keras.initializers.VarianceScaling', - 'keras.initializers.variance_scaling', - v1=[]) -class VarianceScaling(Initializer): - """Initializer capable of adapting its scale to the shape of weights tensors. - - Also available via the shortcut function - `tf.keras.initializers.variance_scaling`. - - With `distribution="truncated_normal" or "untruncated_normal"`, samples are - drawn from a truncated/untruncated normal distribution with a mean of zero and - a standard deviation (after truncation, if used) `stddev = sqrt(scale / n)`, - where `n` is: - - - number of input units in the weight tensor, if `mode="fan_in"` - - number of output units, if `mode="fan_out"` - - average of the numbers of input and output units, if `mode="fan_avg"` - - With `distribution="uniform"`, samples are drawn from a uniform distribution - within `[-limit, limit]`, where `limit = sqrt(3 * scale / n)`. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.VarianceScaling( - ... scale=0.1, mode='fan_in', distribution='uniform') - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.VarianceScaling( - ... scale=0.1, mode='fan_in', distribution='uniform') - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - scale: Scaling factor (positive float). - mode: One of "fan_in", "fan_out", "fan_avg". - distribution: Random distribution to use. One of "truncated_normal", - "untruncated_normal" and "uniform". - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - """ - - def __init__(self, - scale=1.0, - mode='fan_in', - distribution='truncated_normal', - seed=None): - if scale <= 0.: - raise ValueError('`scale` must be positive float. ' - f'Received: scale={scale}.') - allowed_modes = {'fan_in', 'fan_out', 'fan_avg'} - if mode not in allowed_modes: - raise ValueError(f'Invalid `mode` argument: {mode}. ' - f'Please use one of the {allowed_modes}.') - distribution = distribution.lower() - # Compatibility with keras-team/keras. - if distribution == 'normal': - distribution = 'truncated_normal' - allowed_distributions = { - 'uniform', 'truncated_normal', 'untruncated_normal' - } - if distribution not in allowed_distributions: - raise ValueError(f'Invalid `distribution` argument: {distribution}.' - f'Allowed distributions: {allowed_distributions}.') - self.scale = scale - self.mode = mode - self.distribution = distribution - self.seed = seed - self._random_generator = backend.RandomGenerator(seed) - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized as specified by the initializer. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, which - default to `float32` unless you configured it otherwise (via - `tf.keras.backend.set_floatx(float_dtype)`) - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs) - dtype = _assert_float_dtype(_get_dtype(dtype)) - if _PARTITION_SHAPE in kwargs: - shape = kwargs[_PARTITION_SHAPE] - partition_offset = kwargs.get(_PARTITION_OFFSET, None) - nonce = hash(partition_offset) if partition_offset else None - layout = kwargs.pop('layout', None) - if layout: - self._random_generator._rng_type = self._random_generator.RNG_STATEFUL - _ensure_keras_seeded() - return utils.call_with_layout( - self._generate_init_val, layout, shape=shape, dtype=dtype, - nonce=nonce) - return self._generate_init_val(shape=shape, dtype=dtype, - nonce=nonce) - - def _generate_init_val(self, shape, dtype, nonce): - scale = self.scale - fan_in, fan_out = _compute_fans(shape) - if self.mode == 'fan_in': - scale /= max(1., fan_in) - elif self.mode == 'fan_out': - scale /= max(1., fan_out) - else: - scale /= max(1., (fan_in + fan_out) / 2.) - if self.distribution == 'truncated_normal': - # constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) - stddev = math.sqrt(scale) / .87962566103423978 - return self._random_generator.truncated_normal( - shape, 0.0, stddev, dtype, nonce) - elif self.distribution == 'untruncated_normal': - stddev = math.sqrt(scale) - return self._random_generator.random_normal( - shape, 0.0, stddev, dtype, nonce) - else: - limit = math.sqrt(3.0 * scale) - return self._random_generator.random_uniform( - shape, -limit, limit, dtype, nonce) - - def get_config(self): - return { - 'scale': self.scale, - 'mode': self.mode, - 'distribution': self.distribution, - 'seed': self.seed - } - - -@keras_export('keras.initializers.Orthogonal', - 'keras.initializers.orthogonal', - v1=[]) -class Orthogonal(Initializer): - """Initializer that generates an orthogonal matrix. - - Also available via the shortcut function `tf.keras.initializers.orthogonal`. - - If the shape of the tensor to initialize is two-dimensional, it is initialized - with an orthogonal matrix obtained from the QR decomposition of a matrix of - random numbers drawn from a normal distribution. - If the matrix has fewer rows than columns then the output will have orthogonal - rows. Otherwise, the output will have orthogonal columns. - - If the shape of the tensor to initialize is more than two-dimensional, - a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` - is initialized, where `n` is the length of the shape vector. - The matrix is subsequently reshaped to give a tensor of the desired shape. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.Orthogonal() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.Orthogonal() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - gain: multiplicative factor to apply to the orthogonal matrix - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C) - """ - - def __init__(self, gain=1.0, seed=None): - self.gain = gain - self.seed = seed - self._random_generator = backend.RandomGenerator(seed) - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized to an orthogonal matrix. - - Args: - shape: Shape of the tensor. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, - which default to `float32` unless you configured it otherwise - (via `tf.keras.backend.set_floatx(float_dtype)`) - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs, support_partition=False) - dtype = _assert_float_dtype(_get_dtype(dtype)) - # Check the shape - if len(shape) < 2: - raise ValueError('The tensor to initialize must be ' - 'at least two-dimensional. Received: ' - f'shape={shape} of rank {len(shape)}.') - layout = kwargs.pop('layout', None) - if layout: - self._random_generator._rng_type = self._random_generator.RNG_STATEFUL - _ensure_keras_seeded() - return utils.call_with_layout( - self._generate_init_val, layout, shape=shape, dtype=dtype) - return self._generate_init_val(shape, dtype) - - def _generate_init_val(self, shape, dtype): - # Flatten the input shape with the last dimension remaining - # its original shape so it works for conv2d - num_rows = 1 - for dim in shape[:-1]: - num_rows *= dim - num_cols = shape[-1] - flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows)) - - # Generate a random matrix - a = self._random_generator.random_normal(flat_shape, dtype=dtype) - # Compute the qr factorization - q, r = tf.linalg.qr(a, full_matrices=False) - # Make Q uniform - d = tf.linalg.tensor_diag_part(r) - q *= tf.sign(d) - if num_rows < num_cols: - q = tf.linalg.matrix_transpose(q) - return self.gain * tf.reshape(q, shape) - - def get_config(self): - return {'gain': self.gain, 'seed': self.seed} - - -@keras_export('keras.initializers.Identity', - 'keras.initializers.identity', - v1=[]) -class Identity(Initializer): - """Initializer that generates the identity matrix. - - Also available via the shortcut function `tf.keras.initializers.identity`. - - Only usable for generating 2D matrices. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.Identity() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.Identity() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - gain: Multiplicative factor to apply to the identity matrix. - """ - - def __init__(self, gain=1.0): - self.gain = gain - - def __call__(self, shape, dtype=None, **kwargs): - """Returns a tensor object initialized to a 2D identity matrix. - - Args: - shape: Shape of the tensor. It should have exactly rank 2. - dtype: Optional dtype of the tensor. Only floating point types are - supported. If not specified, `tf.keras.backend.floatx()` is used, - which default to `float32` unless you configured it otherwise - (via `tf.keras.backend.set_floatx(float_dtype)`) - **kwargs: Additional keyword arguments. - """ - _validate_kwargs(self.__class__.__name__, kwargs, support_partition=False) - dtype = _assert_float_dtype(_get_dtype(dtype)) - if len(shape) != 2: - raise ValueError( - 'Identity matrix initializer can only be used for 2D matrices. ' - f'Received: shape={shape} of rank {len(shape)}.') - layout = kwargs.pop('layout', None) - if layout: - return utils.call_with_layout( - self._generate_init_val, layout, shape=shape, dtype=dtype) - return self._generate_init_val(shape, dtype) - - def _generate_init_val(self, shape, dtype): - initializer = tf.eye(*shape, dtype=dtype) - return self.gain * initializer - - def get_config(self): - return {'gain': self.gain} - - -@keras_export('keras.initializers.GlorotUniform', - 'keras.initializers.glorot_uniform', - v1=[]) -class GlorotUniform(VarianceScaling): - """The Glorot uniform initializer, also called Xavier uniform initializer. - - Also available via the shortcut function - `tf.keras.initializers.glorot_uniform`. - - Draws samples from a uniform distribution within `[-limit, limit]`, where - `limit = sqrt(6 / (fan_in + fan_out))` (`fan_in` is the number of input units - in the weight tensor and `fan_out` is the number of output units). - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.GlorotUniform() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.GlorotUniform() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html) - """ - - def __init__(self, seed=None): - super().__init__( - scale=1.0, - mode='fan_avg', - distribution='uniform', - seed=seed) - - def get_config(self): - return {'seed': self.seed} - - -@keras_export('keras.initializers.GlorotNormal', - 'keras.initializers.glorot_normal', - v1=[]) -class GlorotNormal(VarianceScaling): - """The Glorot normal initializer, also called Xavier normal initializer. - - Also available via the shortcut function - `tf.keras.initializers.glorot_normal`. - - Draws samples from a truncated normal distribution centered on 0 with `stddev - = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of input units in - the weight tensor and `fan_out` is the number of output units in the weight - tensor. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.GlorotNormal() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.GlorotNormal() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html) - """ - - def __init__(self, seed=None): - super().__init__( - scale=1.0, - mode='fan_avg', - distribution='truncated_normal', - seed=seed) - - def get_config(self): - return {'seed': self.seed} - - -@keras_export('keras.initializers.LecunNormal', - 'keras.initializers.lecun_normal', - v1=[]) -class LecunNormal(VarianceScaling): - """Lecun normal initializer. - - Also available via the shortcut function - `tf.keras.initializers.lecun_normal`. - - Initializers allow you to pre-specify an initialization strategy, encoded in - the Initializer object, without knowing the shape and dtype of the variable - being initialized. - - Draws samples from a truncated normal distribution centered on 0 with `stddev - = sqrt(1 / fan_in)` where `fan_in` is the number of input units in the weight - tensor. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.LecunNormal() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.LecunNormal() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) - """ - - def __init__(self, seed=None): - super().__init__( - scale=1., mode='fan_in', distribution='truncated_normal', seed=seed) - - def get_config(self): - return {'seed': self.seed} - - -@keras_export('keras.initializers.LecunUniform', - 'keras.initializers.lecun_uniform', - v1=[]) -class LecunUniform(VarianceScaling): - """Lecun uniform initializer. - - Also available via the shortcut function - `tf.keras.initializers.lecun_uniform`. - - Draws samples from a uniform distribution within `[-limit, limit]`, - where `limit = sqrt(3 / fan_in)` (`fan_in` is the number of input units in the - weight tensor). - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.LecunUniform() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.LecunUniform() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) - """ - - def __init__(self, seed=None): - super().__init__( - scale=1., mode='fan_in', distribution='uniform', seed=seed) - - def get_config(self): - return {'seed': self.seed} - - -@keras_export('keras.initializers.HeNormal', - 'keras.initializers.he_normal', - v1=[]) -class HeNormal(VarianceScaling): - """He normal initializer. - - Also available via the shortcut function - `tf.keras.initializers.he_normal`. - - It draws samples from a truncated normal distribution centered on 0 with - `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in the - weight tensor. - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.HeNormal() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.HeNormal() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [He et al., 2015](https://arxiv.org/abs/1502.01852) - """ - - def __init__(self, seed=None): - super().__init__( - scale=2., mode='fan_in', distribution='truncated_normal', seed=seed) - - def get_config(self): - return {'seed': self.seed} - - -@keras_export('keras.initializers.HeUniform', - 'keras.initializers.he_uniform', - v1=[]) -class HeUniform(VarianceScaling): - """He uniform variance scaling initializer. - - Also available via the shortcut function - `tf.keras.initializers.he_uniform`. - - Draws samples from a uniform distribution within `[-limit, limit]`, where - `limit = sqrt(6 / fan_in)` (`fan_in` is the number of input units in the - weight tensor). - - Examples: - - >>> # Standalone usage: - >>> initializer = tf.keras.initializers.HeUniform() - >>> values = initializer(shape=(2, 2)) - - >>> # Usage in a Keras layer: - >>> initializer = tf.keras.initializers.HeUniform() - >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer) - - Args: - seed: A Python integer. Used to make the behavior of the initializer - deterministic. Note that a seeded - initializer will not produce the same random values across multiple calls, - but multiple initializers will produce the same sequence when constructed - with the same seed value. - - References: - - [He et al., 2015](https://arxiv.org/abs/1502.01852) - """ - - def __init__(self, seed=None): - super().__init__( - scale=2., mode='fan_in', distribution='uniform', seed=seed) - - def get_config(self): - return {'seed': self.seed} - - -def _get_dtype(dtype): - if dtype is None: - dtype = backend.floatx() - return tf.as_dtype(dtype) - - -def _assert_float_dtype(dtype): - """Validate and return floating point type based on `dtype`. - - `dtype` must be a floating point type. - - Args: - dtype: The data type to validate. - - Returns: - Validated type. - - Raises: - ValueError: if `dtype` is not a floating point type. - """ - dtype = tf.as_dtype(dtype) - if not dtype.is_floating: - raise ValueError(f'Expected floating point type, got {dtype}.') - return dtype - - -def _compute_fans(shape): - """Computes the number of input and output units for a weight shape. - - Args: - shape: Integer shape tuple or TF tensor shape. - - Returns: - A tuple of integer scalars (fan_in, fan_out). - """ - if len(shape) < 1: # Just to avoid errors for constants. - fan_in = fan_out = 1 - elif len(shape) == 1: - fan_in = fan_out = shape[0] - elif len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - else: - # Assuming convolution kernels (2D, 3D, or more). - # kernel shape: (..., input_depth, depth) - receptive_field_size = 1 - for dim in shape[:-2]: - receptive_field_size *= dim - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - return int(fan_in), int(fan_out) - - -def _validate_kwargs(cls_name, kwargs, support_partition=True): - invalid_kwargs = [k for k in kwargs if k not in _ALLOWED_INITIALIZER_KWARGS] - if invalid_kwargs: - raise TypeError(f'Unknown keyword arguments: {invalid_kwargs}. Allowed ' - f'keyword arguments: {_ALLOWED_INITIALIZER_KWARGS}.') - if not support_partition and (_PARTITION_SHAPE in kwargs or - _PARTITION_OFFSET in kwargs): - raise ValueError(f'{cls_name} initializer doesn\'t support ' - 'partition-related arguments.') - - -def _ensure_keras_seeded(): - """Make sure the keras.backend global seed generator is set. - - This is important for DTensor use case to ensure that each client are - initialized with same seed for tf.random.Generator, so that the value created - are in sync among all the clients. - """ - if not getattr(backend._SEED_GENERATOR, 'generator', None): # pylint:disable=protected-access - raise ValueError('When using DTensor APIs, you need to set the global seed ' - 'before using any Keras initializers. Please make sure ' - 'to call `tf.keras.utils.set_random_seed()` in your code.') diff --git a/keras/integration_test/BUILD b/keras/integration_test/BUILD index 9d520a57e65b..348db2520583 100644 --- a/keras/integration_test/BUILD +++ b/keras/integration_test/BUILD @@ -1,12 +1,14 @@ # Description: # Contains Keras integration tests that verify with other TF high level APIs. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test") # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "tpu_py_test") load("@org_keras//keras:keras.bzl", "distribute_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/tools/pip_package:__pkg__", @@ -89,28 +91,28 @@ cuda_py_test( name = "gradient_checkpoint_test", srcs = ["gradient_checkpoint_test.py"], python_version = "PY3", + tags = ["no_oss"], # TODO(b/249526796) deps = [ "//:expect_tensorflow_installed", "//keras/api:keras_api", ], ) -# cuda_py_test( -# name = "central_storage_strategy_test", -# srcs = ["central_storage_strategy_test.py"], -# python_version = "PY3", -# tags = [ -# "multi_and_single_gpu", -# "no_windows_gpu", # TODO(b/130551176) -# ], -# deps = [ -# "//:expect_absl_installed", -# "//:expect_tensorflow_installed", -# "//third_party/tensorflow/python/distribute:combinations", -# "//third_party/tensorflow/python/distribute:strategy_combinations", -# "//third_party/tensorflow/python/keras/utils:kpl_test_utils", -# ], -# ) +cuda_py_test( + name = "central_storage_strategy_test", + srcs = ["central_storage_strategy_test.py"], + python_version = "PY3", + tags = [ + "multi_and_single_gpu", + "no_windows_gpu", # TODO(b/130551176) + ], + deps = [ + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras/api:keras_api", + "//keras/utils:kpl_test_utils", + ], +) tpu_py_test( name = "tpu_strategy_test", @@ -145,11 +147,28 @@ tf_py_test( ], ) +distribute_py_test( + name = "ctl_tutorial_test", + srcs = ["ctl_tutorial_test.py"], + main = "ctl_tutorial_test.py", + shard_count = 5, + tags = [ + "multi_and_single_gpu", + "nomultivm", # TODO(b/170502145) + ], + deps = [ + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras/api:keras_api", + "//keras/distribute:strategy_combinations", + ], +) + distribute_py_test( name = "parameter_server_keras_preprocessing_test", srcs = ["parameter_server_keras_preprocessing_test.py"], python_version = "PY3", - shard_count = 4, # TODO(b/184290570): Investigate why only 1 shard times out. + shard_count = 6, # TODO(b/184290570): Investigate why only 1 shard times out. tags = [ "multi_and_single_gpu", "no_oss", # TODO(b/194935930): Flaky test @@ -304,3 +323,77 @@ tf_py_test( "//keras/testing_infra:test_combinations", ], ) + +tf_py_test( + name = "parameter_server_training_metric_test", + srcs = ["parameter_server_training_metric_test.py"], + python_version = "PY3", + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + "notsan", # TODO(b/156029134) + ], + deps = [ + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/api:keras_api", + "//keras/testing_infra:test_combinations", + ], +) + +tf_py_test( + name = "fit_test", + size = "medium", + srcs = ["fit_test.py"], + python_version = "PY3", + shard_count = 28, + deps = [ + "//:expect_tensorflow_installed", + "//keras/api:keras_api", + "//keras/integration_test/models", + "//keras/testing_infra:test_combinations", + ], +) + +tf_py_test( + name = "saving_v3_test", + size = "medium", + srcs = ["saving_v3_test.py"], + python_version = "PY3", + shard_count = 12, + deps = [ + "//:expect_tensorflow_installed", + "//keras/api:keras_api", + "//keras/integration_test/models", + "//keras/testing_infra:test_combinations", + ], +) + +tf_py_test( + name = "py_metric_test", + size = "medium", + srcs = ["py_metric_test.py"], + python_version = "PY3", + shard_count = 2, + deps = [ + "//:expect_tensorflow_installed", + "//keras/api:keras_api", + "//keras/metrics", + "//keras/testing_infra:test_combinations", + ], +) + +tf_py_test( + name = "extension_type_test", + size = "medium", + srcs = ["extension_type_test.py"], + python_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + "//keras", + "//keras/api:keras_api", + "//keras/engine", + "//keras/engine:input_layer", + "//keras/saving", + ], +) diff --git a/keras/integration_test/central_storage_strategy_test.py b/keras/integration_test/central_storage_strategy_test.py index e0be1235a03c..5c1a670853c6 100644 --- a/keras/integration_test/central_storage_strategy_test.py +++ b/keras/integration_test/central_storage_strategy_test.py @@ -14,73 +14,81 @@ # ============================================================================== """Tests for KPL + CentralStorageStrategy.""" -from absl.testing import parameterized import tensorflow.compat.v2 as tf +from absl.testing import parameterized -from tensorflow.python.distribute import combinations as ds_combinations -from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.framework import test_combinations as combinations -from tensorflow.python.keras.utils import kpl_test_utils +# isort: off +from tensorflow.compat.v2.__internal__.distribute import combinations +from keras.utils import kpl_test_utils # TODO(b/182278926): Combine this test with other strategies. -@ds_combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.central_storage_strategy_with_gpu_and_cpu, - ], - mode=["eager"])) +@combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[combinations.central_storage_strategy_with_gpu_and_cpu], + mode=["eager"], + ) +) class CentralStorageStrategyTest(tf.test.TestCase, parameterized.TestCase): + def testTrainAndServeWithKPL(self, distribution): + use_adapt = False + test_utils_obj = kpl_test_utils.DistributeKplTestUtils() + with distribution.scope(): + ( + feature_mapper, + label_mapper, + ) = test_utils_obj.define_kpls_for_training(use_adapt) + model = test_utils_obj.define_model() + optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) + accuracy = tf.keras.metrics.Accuracy() - def testTrainAndServeWithKPL(self, distribution): - use_adapt = False - test_utils_obj = kpl_test_utils.DistributeKplTestUtils() - with distribution.scope(): - feature_mapper, label_mapper = test_utils_obj.define_kpls_for_training( - use_adapt) - model = test_utils_obj.define_model() - optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) - accuracy = tf.keras.metrics.Accuracy() - - def dataset_fn(_): - return test_utils_obj.dataset_fn(feature_mapper, label_mapper) + def dataset_fn(_): + return test_utils_obj.dataset_fn(feature_mapper, label_mapper) - @tf.function - def train_step(iterator): - """The step function for one training step.""" + @tf.function + def train_step(iterator): + """The step function for one training step.""" - def step_fn(inputs): - """The computation to run on each replica.""" - features, labels = inputs - with tf.GradientTape() as tape: - pred = model(features, training=True) - loss = tf.keras.losses.binary_crossentropy(labels, pred) - loss = tf.nn.compute_average_loss(loss) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(list(zip(grads, model.trainable_variables))) + def step_fn(inputs): + """The computation to run on each replica.""" + features, labels = inputs + with tf.GradientTape() as tape: + pred = model(features, training=True) + loss = tf.keras.losses.binary_crossentropy(labels, pred) + loss = tf.nn.compute_average_loss(loss) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + list(zip(grads, model.trainable_variables)) + ) - actual_pred = tf.cast(tf.math.greater(pred, 0.5), tf.dtypes.int64) - accuracy.update_state(labels, actual_pred) + actual_pred = tf.cast( + tf.math.greater(pred, 0.5), tf.dtypes.int64 + ) + accuracy.update_state(labels, actual_pred) - distribution.run(step_fn, args=(next(iterator),)) + distribution.run(step_fn, args=(next(iterator),)) - distributed_dataset = distribution.distribute_datasets_from_function( - dataset_fn) - distributed_iterator = iter(distributed_dataset) - num_epochs = 4 - num_steps = 7 - for _ in range(num_epochs): - accuracy.reset_state() - for _ in range(num_steps): - train_step(distributed_iterator) + distributed_dataset = ( + distribution.distribute_datasets_from_function(dataset_fn) + ) + distributed_iterator = iter(distributed_dataset) + num_epochs = 4 + num_steps = 7 + for _ in range(num_epochs): + accuracy.reset_state() + for _ in range(num_steps): + train_step(distributed_iterator) - self.assertGreater(accuracy.result().numpy(), 0.5) - self.assertEqual(optimizer.iterations.numpy(), num_epochs * num_steps) + self.assertGreater(accuracy.result().numpy(), 0.5) + self.assertEqual( + optimizer.iterations.numpy(), num_epochs * num_steps + ) - # Test save/load/serving the trained model. - test_utils_obj.test_save_load_serving_model( - model, feature_mapper, test_utils_obj.define_reverse_lookup_layer()) + # Test save/load/serving the trained model. + test_utils_obj.test_save_load_serving_model( + model, feature_mapper, test_utils_obj.define_reverse_lookup_layer() + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/integration_test/ctl_tutorial_test.py b/keras/integration_test/ctl_tutorial_test.py new file mode 100644 index 000000000000..e700d9ed4e93 --- /dev/null +++ b/keras/integration_test/ctl_tutorial_test.py @@ -0,0 +1,451 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests that Custom Training Loop docs match actual behavior. + +The tutorial at https://www.tensorflow.org/tutorials/distribute/custom_training, +defined at +https://github.com/tensorflow/docs/blob/master/site/en/tutorials/distribute/custom_training.ipynb +makes several statements about + + * ways to reduce loss terms to the actual training loss, and + * how they compare to the built-in behavior of Keras Model.fit(). + +This test verifies that these statements match the actual behavior, +under a variety of distribution strategies. +""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.distribute import strategy_combinations + + +def make_compute_loss_fn(variant, loss_object, GLOBAL_BATCH_SIZE): + """Returns the `compute_loss()` function as defined in the tutorial.""" + + if variant == "basic": + # The basic form of the loss function, shown verbatim in the tutorial. + def compute_loss(labels, predictions, model_losses): + per_example_loss = loss_object(labels, predictions) + loss = tf.nn.compute_average_loss(per_example_loss) + if model_losses: + loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses)) + return loss + + elif variant == "fixed_batch_size": + # The variant that adds a fixed `global_batch_size=` arg + # (described but not shown verbatim). + def compute_loss(labels, predictions, model_losses): + per_example_loss = loss_object(labels, predictions) + loss = tf.nn.compute_average_loss( + per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE + ) + if model_losses: + loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses)) + return loss + + elif variant == "balanced": + # The variant that scales the loss to balance out varying batch sizes + # (described but not shown verbatim). + def compute_loss(labels, predictions, model_losses): + per_example_loss = loss_object(labels, predictions) + loss = tf.nn.compute_average_loss(per_example_loss) + if model_losses: + loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses)) + observed_global_batch_size = ( + tf.distribute.get_strategy().num_replicas_in_sync + * tf.shape(per_example_loss)[0] + ) + loss *= tf.math.divide( + tf.cast(observed_global_batch_size, tf.float32), + tf.cast(GLOBAL_BATCH_SIZE, tf.float32), + ) + return loss + + else: + raise ValueError(f"Unknown {variant=}") + + return compute_loss + + +def create_dataset(global_batch_size): + """Creates the dataset for ImpliedExampleWeightsTest. + + It contains two batches: the first has full size, the second just 1 element. + The i-th element `(x,y)` has model input `x = onehot(i)` and label `y = 0`. + """ + n = global_batch_size + 1 + ds = tf.data.Dataset.from_tensor_slices((tf.eye(n), tf.zeros([n, 1]))) + ds = ds.batch(global_batch_size) + return ds + + +def create_model(n): + """Creates the model for ImpliedExampleWeightsTest. + + The model has three trainable weights of interest, all initialized to 1.0: + + * "predicting/kernel:0" of shape [n, 1] maps a one-hot encoded input to + the model output. When used with the MeanAbsoluteError loss, an input + onehot(i) produces a gradient onehot(i) for this weight, subject to + the training loop's loss reduction across examples. + * "activity_regularized/kernel:0" of shape [n, 1] has an activity + regularizer loss in the model so that input onehot(i) produces a + gradient of 1/batch_size * onehot(i) for this weight. + * "weight_regularized:0" of shape [1] has a weight regularizer loss in + the model that produces a gradient of 1 for this weight, independent + of batch size. + """ + inputs = tf.keras.Input(shape=(n,), name="inputs") + + predicting = tf.keras.layers.Dense( + 1, use_bias=False, kernel_initializer="ones", name="predicting" + ) + activity_regularized = tf.keras.layers.Dense( + 1, + use_bias=False, + kernel_initializer="ones", + activity_regularizer=tf.keras.regularizers.L1(l1=1.0), + name="activity_regularized", + ) + weight_regularized = tf.keras.layers.Dense( + 1, + kernel_initializer="zeros", + bias_initializer="ones", + bias_regularizer=tf.keras.regularizers.L1(l1=1.0), + name="weight_regularized", + ) + + # Make outputs = predicting(inputs), depending on the other Layers as well. + add = tf.keras.layers.Add(name="add") + multiply = tf.keras.layers.Multiply(name="multiply") + outputs = add( + [ + predicting(inputs), + multiply( + [np.array([[0.0]], np.float32), activity_regularized(inputs)] + ), + multiply( + [np.array([[0.0]], np.float32), weight_regularized(inputs)] + ), + ] + ) + + model = tf.keras.Model(inputs, outputs) + return model + + +def create_loss(**kwargs): + """Returns the loss to be used with the model from create_model().""" + return tf.keras.losses.MeanAbsoluteError(**kwargs) + + +def create_optimizer(learning_rate): + """Returns the optimizer that applies gradients in the most obvious way.""" + return tf.keras.optimizers.SGD(learning_rate) + + +def get_expected_example_weights( + ctl_variant, *, local_batch_size, num_replicas_in_sync +): + """Returns the weights that examples have in the gradient updates seen.""" + + global_batch_size = local_batch_size * num_replicas_in_sync + n = global_batch_size + 1 + num_batches = 2 + + expected = dict( + # Examples in a full batch receive the expected gradient weight, + # independent of the CTL variant. + example_prediction_fullbatch=1.0, + example_activity_fullbatch=1.0, + ) + if ctl_variant == "basic": + # In the basic variant of the CTL, when a batch of size 1 hits a + # replica, the singleton example receives the weight that is + # normally spread evenly across the local_batch_size. + expected["example_prediction_singleton"] = local_batch_size + expected["example_activity_singleton"] = local_batch_size + # Weight regularization applies equally in each batch, + # irrespective of its size. + expected["total_weight_regularization"] = num_batches + elif ctl_variant == "fixed_batch_size": + # In the CTL variant that fixes GLOBAL_BATCH_SIZE for the reduction + # of prediction losses, the weight of a singleton example is + # reverted to normal for prediction, but activity and weight + # regularization behaves as in the "basic" variant. + expected["example_prediction_singleton"] = 1.0 + expected["example_activity_singleton"] = local_batch_size + expected["total_weight_regularization"] = num_batches + elif ctl_variant == "balanced": + # The CTL variant that corrects both prediction and regularization + # losses for the batch size achieves equal weights of examples + # both for the prediction and for an activity regularizer + expected["example_prediction_singleton"] = 1.0 + expected["example_activity_singleton"] = 1.0 + # Weight regularization, in sync with the other loss terms, + # applies proportional to the number of examples. + expected["total_weight_regularization"] = n / global_batch_size + return expected + + +class MaybeStrategyScope: + """Provides a context allowing no distribution strategy.""" + + def __init__(self, strategy): + self._strategy = strategy + self._scope = None + + def __enter__(self): + if self._strategy: + self._scope = self._strategy.scope() + self._scope.__enter__() + + def __exit__(self, exc_type, value, traceback): + if self._strategy: + self._scope.__exit__(exc_type, value, traceback) + self._scope = None + + +class ImpliedExampleWeightsTest(tf.test.TestCase, parameterized.TestCase): + """Tests weights of loss terms depending on batch size and training loop.""" + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=strategy_combinations.all_strategies + + strategy_combinations.multiworker_strategies + + [None], + ctl_variant=["basic", "fixed_batch_size", "balanced"], + ) + ) + def test_ctl(self, strategy, ctl_variant): + """Tests a variant of the CTL under a distribution strategy.""" + if strategy is None: + num_replicas_in_sync = 1 + else: + num_replicas_in_sync = strategy.num_replicas_in_sync + + local_batch_size = 2 # For a full batch; greater than 1. + global_batch_size = local_batch_size * num_replicas_in_sync + ds = create_dataset(global_batch_size) + if strategy is not None: + ds = strategy.experimental_distribute_dataset(ds) + + n = global_batch_size + 1 + learning_rate = 0.01 + with MaybeStrategyScope(strategy): + model = create_model(n) + loss_object = create_loss(reduction=tf.keras.losses.Reduction.NONE) + compute_loss = make_compute_loss_fn( + ctl_variant, loss_object, global_batch_size + ) + optimizer = create_optimizer(learning_rate) + + def train_step(inputs): + x, labels = inputs + with tf.GradientTape() as tape: + predictions = model(x, training=True) + loss = compute_loss(labels, predictions, model.losses) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + zip(gradients, model.trainable_variables) + ) + return loss + + @tf.function + def wrapped_train_step(inputs): + if strategy is None: + return train_step(inputs) + else: + per_replica_losses = strategy.run( + train_step, args=(inputs,) + ) + return strategy.reduce( + tf.distribute.ReduceOp.SUM, + per_replica_losses, + axis=None, + ) + + num_epochs = 1 + num_batches = 0 + for epoch in range(num_epochs): + total_loss = 0.0 + for x in ds: + total_loss += wrapped_train_step(x) + num_batches += 1 + train_loss = total_loss / num_batches + self.assertTrue(tf.math.is_finite(train_loss).numpy()) + + self.assertEqual(num_batches, 2) + + expected = get_expected_example_weights( + ctl_variant, + local_batch_size=local_batch_size, + num_replicas_in_sync=num_replicas_in_sync, + ) + self.assert_implied_example_weights( + model, + **expected, + rtol=1e-6 if strategy is None else 1e-4, + learning_rate=learning_rate, + global_batch_size=global_batch_size, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=strategy_combinations.all_strategies + + strategy_combinations.multiworker_strategies + + [None], + ) + ) + def test_fit(self, strategy): + """Tests Model.fit().""" + if strategy is None: + num_replicas_in_sync = 1 + else: + num_replicas_in_sync = strategy.num_replicas_in_sync + + local_batch_size = 2 # For a full batch; greater than 1. + global_batch_size = local_batch_size * num_replicas_in_sync + ds = create_dataset(global_batch_size) + + n = global_batch_size + 1 + learning_rate = 0.01 + with MaybeStrategyScope(strategy): + model = create_model(n) + model.compile( + optimizer=create_optimizer(learning_rate), loss=create_loss() + ) + epochs = 1 + steps_per_epoch = 2 + model.fit(ds, epochs=epochs, steps_per_epoch=steps_per_epoch) + + expected = get_expected_example_weights( + ctl_variant="basic", # The tutorial claims this consistency! + local_batch_size=local_batch_size, + num_replicas_in_sync=num_replicas_in_sync, + ) + self.assert_implied_example_weights( + model, + **expected, + rtol=1e-6 if strategy is None else 1e-4, + learning_rate=learning_rate, + global_batch_size=global_batch_size, + ) + + def assert_implied_example_weights( + self, + model, + *, + learning_rate, + global_batch_size, + rtol, + example_prediction_fullbatch, + example_prediction_singleton, + example_activity_fullbatch, + example_activity_singleton, + total_weight_regularization, + ): + """Checks model.weights for the expected effects of training.""" + model_weights = { + v.name: self._get_var_value(v).numpy() + for v in model.trainable_variables + } + + # The total weight received by each one-hot example in the prediction + # loss is the change of its corresponding weight from the initial + # value 1, adjusted for the expected averaging by global_batch_size and + # scaling by SGD's learning_rate. + predicting_kernel = model_weights["predicting/kernel:0"] + example_prediction_weights = ( + (1.0 - predicting_kernel) / learning_rate * global_batch_size + ) + # There was one full batch of examples, followed by a singleton. + self.assertEqual(predicting_kernel.shape, (global_batch_size + 1, 1)) + # Check the examples in the full batch. + actual_example_prediction_fullbatch = self.reduce_assert_equal( + example_prediction_weights[:-1, 0] + ) + self.assertAllClose( + example_prediction_fullbatch, + actual_example_prediction_fullbatch, + rtol=rtol, + ) + # Check the singleton example after the full batch. + actual_example_prediction_singleton = example_prediction_weights[-1, 0] + self.assertAllClose( + example_prediction_singleton, + actual_example_prediction_singleton, + rtol=rtol, + ) + + # Analogous to predictions, check weights for acticity regularization. + activity_regularized_kernel = model_weights[ + "activity_regularized/kernel:0" + ] + example_activity_weights = ( + (1.0 - activity_regularized_kernel) + / learning_rate + * global_batch_size + ) + self.assertEqual( + activity_regularized_kernel.shape, (global_batch_size + 1, 1) + ) + actual_example_activity_fullbatch = self.reduce_assert_equal( + example_activity_weights[:-1, 0] + ) + self.assertAllClose( + example_activity_fullbatch, + actual_example_activity_fullbatch, + rtol=rtol, + ) + actual_example_activity_singleton = example_activity_weights[-1, 0] + self.assertAllClose( + example_activity_singleton, + actual_example_activity_singleton, + rtol=rtol, + ) + + # The total weight of weight regularization is the change of this + # (otherwise unused) bias term from its initial value 1, + # adjusted for the expected scaling by SGD's learning_rate. + actual_total_weight_reguarization = ( + 1.0 - model_weights["weight_regularized/bias:0"][0] + ) / learning_rate + self.assertAllClose( + total_weight_regularization, + actual_total_weight_reguarization, + rtol=rtol, + ) + + def reduce_assert_equal(self, x): + """Returns first element of x and asserts all others are equal.""" + result = x[0] + for i, value in enumerate(x[1:]): + self.assertAllEqual(result, value, msg=f"at position {i=}") + return result + + def _get_var_value(self, var): + """Returns the (unique) value of a (possibly distributed) Variable.""" + if hasattr(var, "values"): # Distributed. + result = self.reduce_assert_equal([v.value() for v in var.values]) + else: + result = var.value() + return result + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/integration_test/custom_object_saving_test.py b/keras/integration_test/custom_object_saving_test.py index a9d8eb97911d..3c20d80d42a2 100644 --- a/keras/integration_test/custom_object_saving_test.py +++ b/keras/integration_test/custom_object_saving_test.py @@ -16,137 +16,136 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import os import sys -from absl.testing import parameterized -from keras.saving.experimental import saving_lib -from keras.testing_infra import test_utils -from keras.utils import generic_utils + import numpy as np import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.testing_infra import test_utils +from keras.utils import get_custom_objects # `tf.print` message is only available in stderr in TF2, which this test checks. @test_utils.run_v2_only class CustomObjectSavingTest(tf.test.TestCase, parameterized.TestCase): - """Test for custom Keras object saving with `register_keras_serializable`.""" - - def setUp(self): - super().setUp() - generic_utils.get_custom_objects().clear() - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], idempotent_saving_enabled=[True, False])) - def test_register_keras_serializable_correct_class(self, - idempotent_saving_enabled): - saving_lib._ENABLED = idempotent_saving_enabled - - train_step_message = 'This is my training step' - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - - @tf.keras.utils.register_keras_serializable('CustomModelX') - class CustomModelX(tf.keras.Model): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dense1 = MyDense( - 1, - kernel_regularizer=MyRegularizer(0.01), - activity_regularizer=MyRegularizer(0.01)) - - def call(self, inputs): - return self.dense1(inputs) - - def train_step(self, data): - tf.print(train_step_message) - x, y = data - with tf.GradientTape() as tape: - y_pred = self(x) - loss = self.compiled_loss(y, y_pred) - - gradients = tape.gradient(loss, self.trainable_variables) - self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) - return {} - - def one(self): - return 1 - - @tf.keras.utils.register_keras_serializable('MyDense') - class MyDense(tf.keras.layers.Dense): - - def two(self): - return 2 - - @tf.keras.utils.register_keras_serializable('MyAdam') - class MyAdam(tf.keras.optimizers.Adam): - - def three(self): - return 3 - - @tf.keras.utils.register_keras_serializable('MyLoss') - class MyLoss(tf.keras.losses.MeanSquaredError): - - def four(self): - return 4 - - @tf.keras.utils.register_keras_serializable('MyMetric') - class MyMetric(tf.keras.metrics.MeanAbsoluteError): - - def five(self): - return 5 - - @tf.keras.utils.register_keras_serializable('MyRegularizer') - class MyRegularizer(tf.keras.regularizers.L2): - - def six(self): - return 6 - - @tf.keras.utils.register_keras_serializable('my_sq_diff') - def my_sq_diff(y_true, y_pred): - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - sq_diff_plus_x = tf.math.squared_difference(y_pred, y_true) - return tf.reduce_mean(sq_diff_plus_x, axis=-1) - - subclassed_model = CustomModelX() - subclassed_model.compile( - optimizer=MyAdam(), loss=MyLoss(), metrics=[MyMetric(), my_sq_diff]) - - x = np.random.random((100, 32)) - y = np.random.random((100, 1)) - subclassed_model.fit(x, y, epochs=1) - subclassed_model.save(temp_dir, save_format='tf') - - loaded_model = tf.keras.models.load_model(temp_dir) - - # `tf.print` writes to stderr. - with self.captureWritesToStream(sys.stderr) as printed: - loaded_model.fit(x, y, epochs=1) - self.assertRegex(printed.contents(), train_step_message) - - # Check that the custom classes do get used. - self.assertIs(loaded_model.__class__, CustomModelX) - self.assertIs(loaded_model.optimizer.__class__, MyAdam) - self.assertIs(loaded_model.compiled_loss._losses[0].__class__, MyLoss) - self.assertIs(loaded_model.compiled_metrics._metrics[0].__class__, MyMetric) - self.assertIs(loaded_model.compiled_metrics._metrics[1], my_sq_diff) - self.assertIs(loaded_model.layers[0].__class__, MyDense) - self.assertIs(loaded_model.layers[0].activity_regularizer.__class__, - MyRegularizer) - self.assertIs(loaded_model.layers[0].kernel_regularizer.__class__, - MyRegularizer) - - # Check that the custom methods are available. - self.assertEqual(loaded_model.one(), 1) - self.assertEqual(loaded_model.layers[0].two(), 2) - self.assertEqual(loaded_model.optimizer.three(), 3) - self.assertEqual(loaded_model.compiled_loss._losses[0].four(), 4) - self.assertEqual(loaded_model.compiled_metrics._metrics[0].five(), 5) - self.assertEqual(loaded_model.layers[0].activity_regularizer.six(), 6) - self.assertEqual(loaded_model.layers[0].kernel_regularizer.six(), 6) - self.assertEqual(loaded_model.compiled_metrics._metrics[1]([1], [3]), 4) - - -if __name__ == '__main__': - tf.test.main() + """Test for custom Keras object saving with + `register_keras_serializable`.""" + + def setUp(self): + super().setUp() + get_custom_objects().clear() + + def test_register_keras_serializable_correct_class(self): + train_step_message = "This is my training step" + temp_dir = os.path.join(self.get_temp_dir(), "my_model") + + @tf.keras.utils.register_keras_serializable("CustomModelX") + class CustomModelX(tf.keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dense1 = MyDense( + 1, + kernel_regularizer=MyRegularizer(0.01), + activity_regularizer=MyRegularizer(0.01), + ) + + def call(self, inputs): + return self.dense1(inputs) + + def train_step(self, data): + tf.print(train_step_message) + x, y = data + with tf.GradientTape() as tape: + y_pred = self(x) + loss = self.compiled_loss(y, y_pred) + + gradients = tape.gradient(loss, self.trainable_variables) + self.optimizer.apply_gradients( + zip(gradients, self.trainable_variables) + ) + return {} + + def one(self): + return 1 + + @tf.keras.utils.register_keras_serializable("MyDense") + class MyDense(tf.keras.layers.Dense): + def two(self): + return 2 + + @tf.keras.utils.register_keras_serializable("MyAdam") + class MyAdam(tf.keras.optimizers.Adam): + def three(self): + return 3 + + @tf.keras.utils.register_keras_serializable("MyLoss") + class MyLoss(tf.keras.losses.MeanSquaredError): + def four(self): + return 4 + + @tf.keras.utils.register_keras_serializable("MyMetric") + class MyMetric(tf.keras.metrics.MeanAbsoluteError): + def five(self): + return 5 + + @tf.keras.utils.register_keras_serializable("MyRegularizer") + class MyRegularizer(tf.keras.regularizers.L2): + def six(self): + return 6 + + @tf.keras.utils.register_keras_serializable("my_sq_diff") + def my_sq_diff(y_true, y_pred): + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + sq_diff_plus_x = tf.math.squared_difference(y_pred, y_true) + return tf.reduce_mean(sq_diff_plus_x, axis=-1) + + subclassed_model = CustomModelX() + subclassed_model.compile( + optimizer=MyAdam(), loss=MyLoss(), metrics=[MyMetric(), my_sq_diff] + ) + + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + subclassed_model.fit(x, y, epochs=1) + subclassed_model.save(temp_dir, save_format="tf") + + loaded_model = tf.keras.models.load_model(temp_dir) + + # `tf.print` writes to stderr. + with self.captureWritesToStream(sys.stderr) as printed: + loaded_model.fit(x, y, epochs=1) + self.assertRegex(printed.contents(), train_step_message) + + # Check that the custom classes do get used. + self.assertIs(loaded_model.__class__, CustomModelX) + self.assertIs(loaded_model.optimizer.__class__, MyAdam) + self.assertIs(loaded_model.compiled_loss._losses[0].__class__, MyLoss) + self.assertIs( + loaded_model.compiled_metrics._metrics[0].__class__, MyMetric + ) + self.assertIs(loaded_model.compiled_metrics._metrics[1], my_sq_diff) + self.assertIs(loaded_model.layers[0].__class__, MyDense) + self.assertIs( + loaded_model.layers[0].activity_regularizer.__class__, MyRegularizer + ) + self.assertIs( + loaded_model.layers[0].kernel_regularizer.__class__, MyRegularizer + ) + + # Check that the custom methods are available. + self.assertEqual(loaded_model.one(), 1) + self.assertEqual(loaded_model.layers[0].two(), 2) + self.assertEqual(loaded_model.optimizer.three(), 3) + self.assertEqual(loaded_model.compiled_loss._losses[0].four(), 4) + self.assertEqual(loaded_model.compiled_metrics._metrics[0].five(), 5) + self.assertEqual(loaded_model.layers[0].activity_regularizer.six(), 6) + self.assertEqual(loaded_model.layers[0].kernel_regularizer.six(), 6) + self.assertEqual(loaded_model.compiled_metrics._metrics[1]([1], [3]), 4) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/integration_test/distributed_training_test.py b/keras/integration_test/distributed_training_test.py index 8f2ec67905cc..a0aa112d998b 100644 --- a/keras/integration_test/distributed_training_test.py +++ b/keras/integration_test/distributed_training_test.py @@ -17,7 +17,11 @@ from __future__ import division from __future__ import print_function +import glob +import os + import tensorflow.compat.v2 as tf + ds_combinations = tf.__internal__.distribute.combinations # Note: Strategy combinations are not (yet) public APIs, so they are subject @@ -25,7 +29,7 @@ # TODO(b/188763034): Proceed to export the strategy combinations as public APIs. STRATEGIES = [ ds_combinations.default_strategy, - ds_combinations.mirrored_strategy_with_cpu_1_and_2, + ds_combinations.mirrored_strategy_with_two_cpus, ds_combinations.mirrored_strategy_with_two_gpus, ds_combinations.tpu_strategy, ds_combinations.cloud_tpu_strategy, @@ -38,39 +42,88 @@ @ds_combinations.generate( - tf.__internal__.test.combinations.combine( - strategy=STRATEGIES, mode="eager")) + tf.__internal__.test.combinations.combine(strategy=STRATEGIES, mode="eager") +) class DistributedTrainingTest(tf.test.TestCase): - """Test to demonstrate basic Keras training with a variety of strategies.""" + """Test to demonstrate basic Keras training with a variety of strategies.""" + + def testKerasTrainingAPI(self, strategy): + if not tf.__internal__.tf2.enabled() and isinstance( + strategy, tf.distribute.experimental.ParameterServerStrategy + ): + self.skipTest( + "Parameter Server strategy with dataset creator need to be run " + "when eager execution is enabled." + ) + + # A `dataset_fn` is required for `Model.fit` to work across all + # strategies. + def dataset_fn(input_context): + batch_size = input_context.get_per_replica_batch_size( + global_batch_size=64 + ) + x = tf.random.uniform((10, 10)) + y = tf.random.uniform((10,)) + dataset = ( + tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat() + ) + dataset = dataset.shard( + input_context.num_input_pipelines, + input_context.input_pipeline_id, + ) + return dataset.batch(batch_size).prefetch(2) + + with strategy.scope(): + model = tf.keras.Sequential([tf.keras.layers.Dense(10)]) + optimizer = tf.keras.optimizers.SGD() + model.compile(optimizer, loss="mse", steps_per_execution=5) + + x = tf.keras.utils.experimental.DatasetCreator(dataset_fn) - def testKerasTrainingAPI(self, strategy): - if (not tf.__internal__.tf2.enabled() - and isinstance(strategy, - tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - "Parameter Server strategy with dataset creator need to be run when " - "eager execution is enabled.") + logdir = os.path.join(self.get_temp_dir(), "logdir") + model.fit( + x, + epochs=2, + steps_per_epoch=20, + callbacks=[ + tf.keras.callbacks.TensorBoard( + logdir, + update_freq=5, + write_steps_per_second=True, + ) + ], + ) - # A `dataset_fn` is required for `Model.fit` to work across all strategies. - def dataset_fn(input_context): - batch_size = input_context.get_per_replica_batch_size( - global_batch_size=64) - x = tf.random.uniform((10, 10)) - y = tf.random.uniform((10,)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat() - dataset = dataset.shard( - input_context.num_input_pipelines, input_context.input_pipeline_id) - return dataset.batch(batch_size).prefetch(2) + events_got = [] + for event_file in glob.glob(logdir + "/train/events.out.*"): + for event in tf.compat.v1.train.summary_iterator(event_file): + if not event.summary: + continue + for value in event.summary.value: + if value.tag != "batch_loss": + continue + events_got += [event.step] - with strategy.scope(): - model = tf.keras.Sequential([tf.keras.layers.Dense(10)]) - optimizer = tf.keras.optimizers.SGD() - model.compile(optimizer, loss="mse", steps_per_execution=10) + # total steps = epochs * steps_per_epoch + events_expected = [5, 10, 15, 20, 25, 30, 35, 40] - x = tf.keras.utils.experimental.DatasetCreator(dataset_fn) + if isinstance( + strategy, tf.distribute.experimental.ParameterServerStrategy + ): + # Metrics are not logged with this strategy as they are not + # immediately available on batch end + events_expected = [] + if ( + strategy.cluster_resolver + and strategy.cluster_resolver.task_type == "worker" + ): + # The below assertion is run by both chief and workers when using + # `tf.distribute.MultiWorkerMirroredStrategy`, but only the chief + # will log events. + events_expected = [] - model.fit(x, epochs=2, steps_per_epoch=10) + self.assertEqual(events_got, events_expected) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/integration_test/extension_type_test.py b/keras/integration_test/extension_type_test.py new file mode 100644 index 000000000000..a7a0d050566f --- /dev/null +++ b/keras/integration_test/extension_type_test.py @@ -0,0 +1,102 @@ +"""Test Model inference and save/load with an ExtensionType.""" + +import os +import typing + +import tensorflow.compat.v2 as tf + +import keras +from keras.engine.input_layer import Input +from keras.engine.training import Model +from keras.saving.saving_api import load_model +from keras.testing_infra import test_utils + + +class MaskedTensor(tf.experimental.BatchableExtensionType): + """Example subclass of ExtensionType, used for testing. + + This version adds Keras required properties to MaskedTensor and its Spec + class, to test Keras integration. + """ + + __name__ = "tf.test.MaskedTensor.Spec" + + values: typing.Union[tf.Tensor, tf.RaggedTensor] + mask: typing.Union[tf.Tensor, tf.RaggedTensor] + + def __init__(self, values, mask): + if isinstance(values, tf.RaggedTensor): + assert isinstance(mask, tf.RaggedTensor) + assert mask.dtype == tf.dtypes.bool + else: + values = tf.convert_to_tensor(values) + mask = tf.convert_to_tensor(mask, tf.dtypes.bool) + self.values = values + self.mask = mask + + # Required by assert_input_compatibility in keras/engine/input_spec.py + @property + def shape(self): + return self.values.shape + + @property + def dtype(self): + return self.values.dtype + + class Spec: + + # Required by KerasTensor.shape in keras/engine/keras_tensor.py + @property + def shape(self): + return self.values._shape + + +class ExtensionTypeTest(tf.test.TestCase): + @test_utils.run_v2_only + def testKerasModel(self): + mt_spec = MaskedTensor.Spec( + tf.TensorSpec(shape=[None, 1], dtype=tf.dtypes.int32), + tf.TensorSpec(shape=[None, 1], dtype=tf.dtypes.bool), + ) + model_input = Input(type_spec=mt_spec) + model_output = keras.layers.Lambda( + lambda x: tf.identity(x, name="output") + )(model_input) + model = Model(inputs=model_input, outputs=model_output) + mt = MaskedTensor([[1], [2], [3]], [[True], [False], [True]]) + self.assertEqual(model(mt), mt) + ds = tf.data.Dataset.from_tensors(mt) + self.assertEqual(model.predict(ds), mt) + + with self.subTest("keras save"): + path = self.create_tempdir().full_path + model.save(path) + loaded_model = load_model(path) + self.assertEqual(loaded_model.input.type_spec, mt_spec) + self.assertEqual(loaded_model(mt), mt) + + loaded_fn = tf.saved_model.load(path) + self.assertEqual(loaded_fn(mt), mt) + with self.assertRaisesRegex( + ValueError, + "Could not find matching concrete function to call " + "loaded from the SavedModel", + ): + loaded_fn(MaskedTensor([1, 2, 3], [True, False, True])) + + # The serving_fn use flatten signature + serving_fn = loaded_fn.signatures["serving_default"] + self.assertEqual( + serving_fn(args_0=mt.values, args_0_1=mt.mask)["lambda"], mt + ) + + with self.subTest("keras v3"): + path = os.path.join(self.create_tempdir().full_path, "model.keras") + model.save(path) + loaded_model = load_model(path, safe_mode=False) + self.assertEqual(loaded_model.input.type_spec, mt_spec) + self.assertEqual(loaded_model(mt), mt) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/integration_test/fit_test.py b/keras/integration_test/fit_test.py new file mode 100644 index 000000000000..bbd0134d4cba --- /dev/null +++ b/keras/integration_test/fit_test.py @@ -0,0 +1,101 @@ +"""Test Model.fit across a diverse range of models.""" + +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.integration_test.models import bert +from keras.integration_test.models import dcgan +from keras.integration_test.models import edge_case_model +from keras.integration_test.models import efficientnet_v2 +from keras.integration_test.models import input_spec +from keras.integration_test.models import low_level_model +from keras.integration_test.models import mini_unet +from keras.integration_test.models import mini_xception +from keras.integration_test.models import retinanet +from keras.integration_test.models import structured_data_classification +from keras.integration_test.models import text_classification +from keras.integration_test.models import timeseries_forecasting +from keras.integration_test.models import vae +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +# from keras.integration_test.models import ctc_speech_rnn +# from keras.integration_test.models import translation + + +def get_dataset(data_specs, batch_size): + values = tf.nest.map_structure(input_spec.spec_to_value, data_specs) + dataset = ( + tf.data.Dataset.from_tensor_slices(values) + .prefetch(batch_size * 2) + .batch(batch_size) + ) + return dataset + + +@test_utils.run_v2_only +class FitTest(test_combinations.TestCase): + @parameterized.named_parameters( + ("bert", bert), + # ("ctc_speech_rnn", ctc_speech_rnn), # Buggy? + ("dcgan", dcgan), + ("edge_case_model", edge_case_model), + ("efficientnet_v2", efficientnet_v2), + ("low_level_model", low_level_model), + ("mini_unet", mini_unet), + ("mini_xception", mini_xception), + ("retinanet", retinanet), + ("structured_data_classification", structured_data_classification), + ("text_classification", text_classification), + ("timeseries_forecasting", timeseries_forecasting), + # ("translation", translation), # Buggy? + ("vae", vae), + ) + def test_fit_on_all_models_with_sync_preprocessing(self, module): + batch_size = 4 + data_specs = module.get_data_spec(batch_size * 3) + dataset = get_dataset(data_specs, batch_size) + + model = module.get_model( + build=True, + compile=True, + jit_compile=False, + include_preprocessing=True, + ) + model.fit(dataset, epochs=1) + + @parameterized.named_parameters( + ("bert", bert), + # ("ctc_speech_rnn", ctc_speech_rnn), # Buggy? + ("dcgan", dcgan), + ("edge_case_model", edge_case_model), + ("efficientnet_v2", efficientnet_v2), + ("low_level_model", low_level_model), + # ("mini_unet", mini_unet), # Not XLA compatible b/c of UpSampling2D + ("mini_xception", mini_xception), + # ("retinanet", retinanet), # Not XLA compatible b/c of UpSampling2D + ("structured_data_classification", structured_data_classification), + ("text_classification", text_classification), + ("timeseries_forecasting", timeseries_forecasting), + # ("translation", translation), # Buggy? + ("vae", vae), + ) + def test_fit_on_all_models_with_async_preprocessing_and_xla(self, module): + batch_size = 4 + data_specs = module.get_data_spec(batch_size * 3) + dataset = get_dataset(data_specs, batch_size) + preprocessor = module.get_input_preprocessor() + if preprocessor is not None: + dataset = dataset.map(lambda x, y: (preprocessor(x), y)) + + model = module.get_model( + build=True, + compile=True, + jit_compile=True, + include_preprocessing=False, + ) + model.fit(dataset, epochs=1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/integration_test/forwardprop_test.py b/keras/integration_test/forwardprop_test.py index e786a16e0190..5ef71e591454 100644 --- a/keras/integration_test/forwardprop_test.py +++ b/keras/integration_test/forwardprop_test.py @@ -15,301 +15,348 @@ import functools -from absl.testing import parameterized import numpy as np import tensorflow.compat.v2 as tf +from absl.testing import parameterized def _jvp(f, primals, tangents): - """Compute the jacobian of `f` at `primals` multiplied by `tangents`.""" - with tf.autodiff.ForwardAccumulator(primals, tangents) as acc: - primals_out = f(*primals) - return primals_out, acc.jvp( - primals_out, unconnected_gradients=tf.UnconnectedGradients.ZERO) + """Compute the jacobian of `f` at `primals` multiplied by `tangents`.""" + with tf.autodiff.ForwardAccumulator(primals, tangents) as acc: + primals_out = f(*primals) + return primals_out, acc.jvp( + primals_out, unconnected_gradients=tf.UnconnectedGradients.ZERO + ) def _jacfwd(f, primals): - """Compute the jacobian of `f` at `primals` using forward-mode autodiff.""" - jac_flat = [] - flat_primals = tf.nest.flatten(primals) - tangent_mask = [tf.zeros_like(primal) for primal in flat_primals] - for primal_index, primal in enumerate(flat_primals): - primal_vector = tf.reshape(primal, [-1]) - primal_vector_length = tf.size(primal_vector) - jac_columns = [] - for element_index in tf.range(primal_vector_length): - mask = tf.one_hot(element_index, primal_vector_length) - tangent_mask[primal_index] = tf.reshape(mask, tf.shape(primal)) - jac_columns.append( - tf.nest.map_structure( - functools.partial(tf.reshape, shape=[-1]), - _jvp(f, primals, tf.nest.pack_sequence_as(primals, - tangent_mask))[1])) - jac_flat.append(tf.stack(jac_columns, axis=1)) - tangent_mask[primal_index] = tf.zeros_like(primal) - return tf.nest.pack_sequence_as(primals, jac_flat) + """Compute the jacobian of `f` at `primals` using forward-mode autodiff.""" + jac_flat = [] + flat_primals = tf.nest.flatten(primals) + tangent_mask = [tf.zeros_like(primal) for primal in flat_primals] + for primal_index, primal in enumerate(flat_primals): + primal_vector = tf.reshape(primal, [-1]) + primal_vector_length = tf.size(primal_vector) + jac_columns = [] + for element_index in tf.range(primal_vector_length): + mask = tf.one_hot(element_index, primal_vector_length) + tangent_mask[primal_index] = tf.reshape(mask, tf.shape(primal)) + jac_columns.append( + tf.nest.map_structure( + functools.partial(tf.reshape, shape=[-1]), + _jvp( + f, + primals, + tf.nest.pack_sequence_as(primals, tangent_mask), + )[1], + ) + ) + jac_flat.append(tf.stack(jac_columns, axis=1)) + tangent_mask[primal_index] = tf.zeros_like(primal) + return tf.nest.pack_sequence_as(primals, jac_flat) def _grad(f, argnums=0): - """Return a function which computes the gradient of `f`.""" + """Return a function which computes the gradient of `f`.""" - def _f(*params): - with tf.GradientTape() as tape: - tape.watch(params) - primals_out = f(*params) - return tape.gradient( - primals_out, - params[argnums], - unconnected_gradients=tf.UnconnectedGradients.ZERO) + def _f(*params): + with tf.GradientTape() as tape: + tape.watch(params) + primals_out = f(*params) + return tape.gradient( + primals_out, + params[argnums], + unconnected_gradients=tf.UnconnectedGradients.ZERO, + ) - return _f + return _f def _hvp(f, primals, tangents): - """Compute a forward-over-back Hessian-vector product.""" - with tf.autodiff.ForwardAccumulator(primals, tangents) as acc: - with tf.GradientTape() as tape: - tape.watch(primals) - f_out = f(*primals) - f_out.shape.assert_is_compatible_with([]) - return acc.jvp(tape.gradient(f_out, primals)) + """Compute a forward-over-back Hessian-vector product.""" + with tf.autodiff.ForwardAccumulator(primals, tangents) as acc: + with tf.GradientTape() as tape: + tape.watch(primals) + f_out = f(*primals) + f_out.shape.assert_is_compatible_with([]) + return acc.jvp(tape.gradient(f_out, primals)) def _vectorize_parameters(f, params, use_pfor, dtype): - """Loop over `params`, providing a one-hot mask to `f` for each.""" - parameter_sizes = [tf.size(param) for param in params] - total_size = tf.math.add_n(parameter_sizes) + """Loop over `params`, providing a one-hot mask to `f` for each.""" + parameter_sizes = [tf.size(param) for param in params] + total_size = tf.math.add_n(parameter_sizes) - def _wrapper(index): - full_onehot = tf.one_hot(index, total_size) - split_onehot = tf.split(full_onehot, parameter_sizes) - tangents = [ - tf.reshape(v, tf.shape(param)) - for param, v in zip(params, split_onehot) - ] - return f(tangents) + def _wrapper(index): + full_onehot = tf.one_hot(index, total_size) + split_onehot = tf.split(full_onehot, parameter_sizes) + tangents = [ + tf.reshape(v, tf.shape(param)) + for param, v in zip(params, split_onehot) + ] + return f(tangents) - if use_pfor: - return tf.vectorized_map(_wrapper, tf.range(total_size)) - else: - return tf.map_fn(_wrapper, tf.range(total_size), dtype) + if use_pfor: + return tf.vectorized_map(_wrapper, tf.range(total_size)) + else: + return tf.map_fn(_wrapper, tf.range(total_size), dtype) def _forward_over_back_hessian(f, params, use_pfor, dtype=None): - """Computes the full Hessian matrix for the scalar-valued f(*params). - - Args: - f: A function taking `params` and returning a scalar. - params: A possibly nested structure of tensors. - use_pfor: If true, uses `tf.vectorized_map` calls instead of looping. - dtype: Required if `use_pfor=False`. A possibly nested structure of dtypes - (e.g. `tf.float32`) matching the structure of `f`'s returns. - - Returns: - A possibly nested structure of matrix slices corresponding to `params`. Each - slice has shape [P, p_s] where `p_s` is the number of parameters (`tf.size`) - in the corresponding element of `params` and `P` is the total number of - parameters (`sum_s(p_s)`). The full matrix can be obtained by concatenating - along the second axis. - """ - return _vectorize_parameters( - functools.partial(_hvp, f, params), - params, - use_pfor=use_pfor, - dtype=dtype) - - -def _test_gradients(testcase, - f, - primals, - order, - delta=1e-3, - rtol=1e-2, - atol=1e-6): - """Tests forward/backward jacobians of `f`'s [0, `order`)-order gradients.""" - if order < 1: - raise ValueError( - "`order` should be a positive integer, got '{}'.".format(order)) - if order > 1: - _test_gradients( - testcase=testcase, - f=_grad(f), - primals=primals, - order=order - 1, - delta=delta, - rtol=rtol, - atol=atol) - sym_jac_back, num_jac = tf.test.compute_gradient(f, primals, delta=delta) - testcase.assertAllClose(num_jac, sym_jac_back, rtol=rtol, atol=atol) - sym_jac_fwd = _jacfwd(f, primals) - testcase.assertAllClose(num_jac, sym_jac_fwd, rtol=rtol, atol=atol) - # And the symbolic computations should be much closer. - testcase.assertAllClose(sym_jac_back, sym_jac_fwd) + """Computes the full Hessian matrix for the scalar-valued f(*params). + + Args: + f: A function taking `params` and returning a scalar. + params: A possibly nested structure of tensors. + use_pfor: If true, uses `tf.vectorized_map` calls instead of looping. + dtype: Required if `use_pfor=False`. A possibly nested structure of dtypes + (e.g. `tf.float32`) matching the structure of `f`'s returns. + + Returns: + A possibly nested structure of matrix slices corresponding to `params`. + Each slice has shape [P, p_s] where `p_s` is the number of parameters + (`tf.size`) in the corresponding element of `params` and `P` is the total + number of parameters (`sum_s(p_s)`). The full matrix can be obtained by + concatenating along the second axis. + """ + return _vectorize_parameters( + functools.partial(_hvp, f, params), + params, + use_pfor=use_pfor, + dtype=dtype, + ) + + +def _test_gradients( + testcase, f, primals, order, delta=1e-3, rtol=1e-2, atol=1e-6 +): + """Tests forward/backward jacobians of `f`'s [0, `order`)-order + gradients.""" + if order < 1: + raise ValueError( + f"`order` should be a positive integer, got '{order}'." + ) + if order > 1: + _test_gradients( + testcase=testcase, + f=_grad(f), + primals=primals, + order=order - 1, + delta=delta, + rtol=rtol, + atol=atol, + ) + sym_jac_back, num_jac = tf.test.compute_gradient(f, primals, delta=delta) + testcase.assertAllClose(num_jac, sym_jac_back, rtol=rtol, atol=atol) + sym_jac_fwd = _jacfwd(f, primals) + testcase.assertAllClose(num_jac, sym_jac_fwd, rtol=rtol, atol=atol) + # And the symbolic computations should be much closer. + testcase.assertAllClose(sym_jac_back, sym_jac_fwd) class ForwardpropTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters([ - ("Dense", [[0.1]], functools.partial(tf.keras.layers.Dense, 5)), - ("Conv2D", - np.reshape( - np.arange(start=-1., stop=1., step=2. / (1 * 2 * 4 * 4)), - [1, 2, 4, 4]), functools.partial(tf.keras.layers.Conv2D, 2, 2), 1e-3) - ]) - def testKerasLayers(self, value, op_fn, atol=1e-6): - layer = op_fn() - input_value = tf.constant(value, dtype=tf.float32) - layer.build(input_value.shape) - # Make sure the test is deterministic by avoiding random variable - # initialization. - for v in layer.trainable_variables: - v.assign( - tf.reshape( - tf.range( - -1., - 1., - 2. / tf.size(v, out_type=tf.float32), - dtype=tf.float32), v.shape)) - _test_gradients( - self, - layer, - [input_value], - atol=atol, - # These are linear, so second-order is pretty boring. - order=2) - - @parameterized.named_parameters([ - ("NonFused", [[0.1], [0.2], [-0.3]], - functools.partial(tf.keras.layers.BatchNormalization, fused=False)), - ("Fused", [[[[0.1, 2.]]], [[[0.2, -3.]]], [[[-0.3, 4.]]]], - functools.partial(tf.keras.layers.BatchNormalization, fused=True)) - ]) - def testBatchNorm(self, value, op_fn): - for training in [True, False]: - layer = op_fn() - input_value = tf.constant(value, dtype=tf.float32) - layer.build(input_value.shape) - _test_gradients( - self, - functools.partial(layer, training=training), [input_value], - order=2, - atol=1e-3) - - @parameterized.named_parameters([ - ("NonFused", [[0.1], [0.2], [-0.3]], - functools.partial(tf.keras.layers.BatchNormalization, fused=False)), - ("Fused", [[[[0.1, 2.]]], [[[0.2, -3.]]], [[[-0.3, 4.]]]], - functools.partial(tf.keras.layers.BatchNormalization, fused=True)) - ]) - def testBatchNormLayerParamGrads(self, value, op_fn): - for training in [True, False]: - layer = op_fn() - with tf.GradientTape() as tape: + @parameterized.named_parameters( + [ + ("Dense", [[0.1]], functools.partial(tf.keras.layers.Dense, 5)), + ( + "Conv2D", + np.reshape( + np.arange(start=-1.0, stop=1.0, step=2.0 / (1 * 2 * 4 * 4)), + [1, 2, 4, 4], + ), + functools.partial(tf.keras.layers.Conv2D, 2, 2), + 1e-3, + ), + ] + ) + def testKerasLayers(self, value, op_fn, atol=1e-6): + layer = op_fn() input_value = tf.constant(value, dtype=tf.float32) - tape.watch(input_value) - output = layer(input_value, training=training) - jac_back = tape.jacobian(output, - [input_value] + layer.trainable_variables) - jac_forward = _jacfwd( - lambda *args: layer(args[0], training=training), # pylint:disable=cell-var-from-loop - [input_value] + layer.trainable_variables) - for backward, forward in zip(jac_back, jac_forward): - forward = tf.reshape(forward, tf.shape(backward)) - self.assertAllClose(backward, forward) - - @parameterized.named_parameters([("Function", tf.function), - ("NoFunction", lambda f: f)]) - def testVariablesHVP(self, decorator): - - class _Model(tf.Module): - - def __init__(self): - self._first_dense = tf.keras.layers.Dense(18) - self._conv = tf.keras.layers.Conv2D(2, 2) - self._norm = tf.keras.layers.BatchNormalization() - self._second_dense = tf.keras.layers.Dense(1) - - def __call__(self, x): - x = self._first_dense(x) - x = tf.nn.relu(x) - x = self._norm(x) - x = tf.nn.relu(self._conv(tf.reshape(x, [-1, 2, 3, 3]))) - return self._second_dense(x) - - model = _Model() - - def _loss(): - input_value = tf.constant([[-0.5, 1.], [0.5, -1.]]) - target = tf.constant([[-1.], [2.]]) - return tf.math.reduce_sum((model(input_value) - target)**2.) - - @decorator - def _compute_hvps(): - with tf.GradientTape() as tape: - loss = _loss() - vector = tape.gradient(loss, model.trainable_variables) - variable_input_fn = lambda unused_variables: _loss() - forward_over_back_hvp, = _hvp(variable_input_fn, - [model.trainable_variables], [vector]) - with tf.GradientTape(persistent=True) as tape: - tape.watch(model.trainable_variables) - loss = _loss() - first_grads = tape.gradient(loss, model.trainable_variables) - back_over_back_hvp = tape.gradient( - first_grads, model.trainable_variables, output_gradients=vector) - return forward_over_back_hvp, back_over_back_hvp - - self.assertAllClose(*_compute_hvps(), rtol=1e-5, atol=1e-5) - - def testEmbeddingLayerInFunction(self): - - class M(tf.keras.Model): - - def __init__(self): - super().__init__() - self.embed = tf.keras.layers.Embedding(5, 1) - self.proj = tf.keras.layers.Dense(1) - - @tf.function - def call(self, x): - return self.proj(self.embed(x)) - - model = M() - model(tf.zeros([3, 3], dtype=tf.int32)) # pylint: disable=not-callable - parameters = model.embed.variables - tangents = [tf.ones_like(v) for v in parameters] - with tf.autodiff.ForwardAccumulator(parameters, tangents): - # Note that forwardprop runs alongside the original computation. This test - # is just checking that it doesn't crash; correctness is tested in core - # TF. - model(tf.zeros([3, 3], dtype=tf.int32)) # pylint: disable=not-callable + layer.build(input_value.shape) + # Make sure the test is deterministic by avoiding random variable + # initialization. + for v in layer.trainable_variables: + v.assign( + tf.reshape( + tf.range( + -1.0, + 1.0, + 2.0 / tf.size(v, out_type=tf.float32), + dtype=tf.float32, + ), + v.shape, + ) + ) + _test_gradients( + self, + layer, + [input_value], + atol=atol, + # These are linear, so second-order is pretty boring. + order=2, + ) + + @parameterized.named_parameters( + [ + ( + "NonFused", + [[0.1], [0.2], [-0.3]], + functools.partial( + tf.keras.layers.BatchNormalization, fused=False + ), + ), + ( + "Fused", + [[[[0.1, 2.0]]], [[[0.2, -3.0]]], [[[-0.3, 4.0]]]], + functools.partial( + tf.keras.layers.BatchNormalization, fused=True + ), + ), + ] + ) + def testBatchNorm(self, value, op_fn): + for training in [True, False]: + layer = op_fn() + input_value = tf.constant(value, dtype=tf.float32) + layer.build(input_value.shape) + _test_gradients( + self, + functools.partial(layer, training=training), + [input_value], + order=2, + atol=1e-3, + ) + + @parameterized.named_parameters( + [ + ( + "NonFused", + [[0.1], [0.2], [-0.3]], + functools.partial( + tf.keras.layers.BatchNormalization, fused=False + ), + ), + ( + "Fused", + [[[[0.1, 2.0]]], [[[0.2, -3.0]]], [[[-0.3, 4.0]]]], + functools.partial( + tf.keras.layers.BatchNormalization, fused=True + ), + ), + ] + ) + def testBatchNormLayerParamGrads(self, value, op_fn): + for training in [True, False]: + layer = op_fn() + with tf.GradientTape() as tape: + input_value = tf.constant(value, dtype=tf.float32) + tape.watch(input_value) + output = layer(input_value, training=training) + jac_back = tape.jacobian( + output, [input_value] + layer.trainable_variables + ) + jac_forward = _jacfwd( + lambda *args: layer(args[0], training=training), + [input_value] + layer.trainable_variables, + ) + for backward, forward in zip(jac_back, jac_forward): + forward = tf.reshape(forward, tf.shape(backward)) + self.assertAllClose(backward, forward) + + @parameterized.named_parameters( + [("Function", tf.function), ("NoFunction", lambda f: f)] + ) + def testVariablesHVP(self, decorator): + class _Model(tf.Module): + def __init__(self): + self._first_dense = tf.keras.layers.Dense(18) + self._conv = tf.keras.layers.Conv2D(2, 2) + self._norm = tf.keras.layers.BatchNormalization() + self._second_dense = tf.keras.layers.Dense(1) + + def __call__(self, x): + x = self._first_dense(x) + x = tf.nn.relu(x) + x = self._norm(x) + x = tf.nn.relu(self._conv(tf.reshape(x, [-1, 2, 3, 3]))) + return self._second_dense(x) + + model = _Model() + + def _loss(): + input_value = tf.constant([[-0.5, 1.0], [0.5, -1.0]]) + target = tf.constant([[-1.0], [2.0]]) + return tf.math.reduce_sum((model(input_value) - target) ** 2.0) + + @decorator + def _compute_hvps(): + with tf.GradientTape() as tape: + loss = _loss() + vector = tape.gradient(loss, model.trainable_variables) + variable_input_fn = lambda unused_variables: _loss() + (forward_over_back_hvp,) = _hvp( + variable_input_fn, [model.trainable_variables], [vector] + ) + with tf.GradientTape(persistent=True) as tape: + tape.watch(model.trainable_variables) + loss = _loss() + first_grads = tape.gradient(loss, model.trainable_variables) + back_over_back_hvp = tape.gradient( + first_grads, model.trainable_variables, output_gradients=vector + ) + return forward_over_back_hvp, back_over_back_hvp + + self.assertAllClose(*_compute_hvps(), rtol=1e-5, atol=1e-5) + + def testEmbeddingLayerInFunction(self): + class M(tf.keras.Model): + def __init__(self): + super().__init__() + self.embed = tf.keras.layers.Embedding(5, 1) + self.proj = tf.keras.layers.Dense(1) + + @tf.function + def call(self, x): + return self.proj(self.embed(x)) + + model = M() + model(tf.zeros([3, 3], dtype=tf.int32)) + parameters = model.embed.variables + tangents = [tf.ones_like(v) for v in parameters] + with tf.autodiff.ForwardAccumulator(parameters, tangents): + # Note that forwardprop runs alongside the original computation. + # This test is just checking that it doesn't crash; correctness is + # tested in core TF. + model(tf.zeros([3, 3], dtype=tf.int32)) class HessianTests(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters([("PFor", True), ("MapFn", False)]) - def testHessianOfVariables(self, use_pfor): - model = tf.keras.layers.Dense(1) - model.build([None, 2]) - - def _loss(*unused_args): - input_value = tf.constant([[-0.5, 1.], [0.5, -1.]]) - target = tf.constant([[-1.], [2.]]) - return tf.math.reduce_sum((model(input_value) - target)**2.) - - kernel_hess, bias_hess = _forward_over_back_hessian( - _loss, [model.kernel, model.bias], - use_pfor=use_pfor, - dtype=[tf.float32, tf.float32]) - # 3 total parameters, the whole hessian is the 3x3 concatenation - self.assertEqual([3, 2, 1], kernel_hess.shape) - self.assertEqual([3, 1], bias_hess.shape) - full_hessian = tf.concat([tf.reshape(kernel_hess, [3, 2]), bias_hess], - axis=1) - # The full Hessian should be symmetric. - self.assertAllClose(full_hessian, tf.transpose(full_hessian)) + @parameterized.named_parameters([("PFor", True), ("MapFn", False)]) + def testHessianOfVariables(self, use_pfor): + model = tf.keras.layers.Dense(1) + model.build([None, 2]) + + def _loss(*unused_args): + input_value = tf.constant([[-0.5, 1.0], [0.5, -1.0]]) + target = tf.constant([[-1.0], [2.0]]) + return tf.math.reduce_sum((model(input_value) - target) ** 2.0) + + kernel_hess, bias_hess = _forward_over_back_hessian( + _loss, + [model.kernel, model.bias], + use_pfor=use_pfor, + dtype=[tf.float32, tf.float32], + ) + # 3 total parameters, the whole hessian is the 3x3 concatenation + self.assertEqual([3, 2, 1], kernel_hess.shape) + self.assertEqual([3, 1], bias_hess.shape) + full_hessian = tf.concat( + [tf.reshape(kernel_hess, [3, 2]), bias_hess], axis=1 + ) + # The full Hessian should be symmetric. + self.assertAllClose(full_hessian, tf.transpose(full_hessian)) if __name__ == "__main__": - if tf.__internal__.tf2.enabled(): - tf.test.main() + if tf.__internal__.tf2.enabled(): + tf.test.main() diff --git a/keras/integration_test/function_test.py b/keras/integration_test/function_test.py index 14e6e14be1b8..ba89f0424e80 100644 --- a/keras/integration_test/function_test.py +++ b/keras/integration_test/function_test.py @@ -19,221 +19,240 @@ class MiniModel(tf.keras.Model): - """Minimal model for mnist. + """Minimal model for mnist. - Useful for testing and debugging on slow TPU simulators. - """ + Useful for testing and debugging on slow TPU simulators. + """ - def __init__(self): - super().__init__(name='') - self.fc = tf.keras.layers.Dense(1, name='fc', kernel_initializer='ones', - bias_initializer='ones') + def __init__(self): + super().__init__(name="") + self.fc = tf.keras.layers.Dense( + 1, name="fc", kernel_initializer="ones", bias_initializer="ones" + ) - def call(self, inputs, training=True): - return self.fc(inputs) + def call(self, inputs, training=True): + return self.fc(inputs) class DefunnedMiniModel(MiniModel): - - @tf.function - def call(self, inputs, training=True): - return super(DefunnedMiniModel, self).call(inputs, training=training) + @tf.function + def call(self, inputs, training=True): + return super(DefunnedMiniModel, self).call(inputs, training=training) class ModelWithOptimizer(tf.keras.Model): - - def __init__(self): - super().__init__() - self.dense = tf.keras.layers.Dense(1) - self.optimizer = tf.keras.optimizers.Adam(0.01) - - @tf.function( - input_signature=(tf.TensorSpec([None, 2], tf.float32), - tf.TensorSpec([None], tf.float32))) - def call(self, x, y): - with tf.GradientTape() as tape: - loss = tf.math.reduce_mean((self.dense(x) - y) ** 2.) - trainable_variables = self.trainable_variables - gradients = tape.gradient(loss, trainable_variables) - self.optimizer.apply_gradients(zip(gradients, trainable_variables)) - return {'loss': loss} - - -class FunctionTest(tf.test.TestCase): - - def testFunctionRelaxationLosesInnerDimWithKerasLayer(self): - layer = tf.keras.layers.Dense(1) - fn = tf.function(reduce_retracing=True)(layer) - - with self.captureWritesToStream(sys.stderr) as printed: - fn(tf.ones((3, 2))) - self.assertNotIn('ValueError', printed.contents()) - with self.captureWritesToStream(sys.stderr) as printed: - # Use batch size 2 to trigger a second cache miss on the shape. - fn(tf.ones((2, 2))) - self.assertNotIn('ValueError', printed.contents()) - - # Shape relaxation passes TensorShape([None, None]), which causes layer - # matmul to fail, due to incompatible dims. What would have been a graph - # build time error (layer would complain about the inner dim being 4). - with self.captureWritesToStream(sys.stderr) as printed: - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - r'Matrix size-incompatible'): - fn(tf.ones((3, 4))) - - def testDefunKerasModelCall(self): - model = MiniModel() - model.call = tf.function(model.call) - - x = tf.ones([1, 2]) - y = model(x) # pylint:disable=not-callable - - self.assertAllEqual([[3.0]], self.evaluate(y)) - - # Break the reference cycle between the MiniModel and the defun: - # `MiniModel` --(through its `call` method)--> `Function` - # `Function` --(instancemethod on `MiniModel`)--> `MiniModel` - del model.call - - def testDecoratedMethod(self): - m = DefunnedMiniModel() - instance_call_one = m.call(tf.ones([1, 2]), training=True) - instance_call_two = m.call( - inputs=tf.ones([1, 2]), training=True) - class_call = DefunnedMiniModel.call(m, tf.ones([1, 2]), training=True) - self.assertAllEqual(instance_call_one, instance_call_two) - self.assertAllEqual(instance_call_one, class_call) - - def testDecoratedMethodUniqueFunctionPerInstance(self): - m = DefunnedMiniModel() - n = DefunnedMiniModel() - - class_method_one = DefunnedMiniModel.call - class_method_two = DefunnedMiniModel.call - - m_method_one = m.call - m_method_two = m.call - - n_method_one = n.call - n_method_two = n.call - - self.assertEqual(class_method_one, class_method_two) - self.assertEqual(m_method_one, m_method_two) - self.assertEqual(n_method_one, n_method_two) - self.assertNotEqual(m.call, n.call) - - def testDecoratedMethodGetConcreteFunction(self): - m = DefunnedMiniModel() - instance_call_one = m.call.get_concrete_function( - tf.ones([1, 2]), training=False) - instance_call_two = m.call.get_concrete_function( - inputs=tf.ones([1, 2]), training=False) - self.assertAllEqual(instance_call_one(tf.ones([1, 2])), - instance_call_two(tf.ones([1, 2]))) - - # Also make sure get_concrete_function works on the class method - DefunnedMiniModel.call.get_concrete_function( - m, tf.ones([1, 2]), training=False) - DefunnedMiniModel.call.get_concrete_function( - m, inputs=tf.ones([1, 2]), training=True) - - def testDecoratedMethodVariableCleanup(self): - m = DefunnedMiniModel() - m(tf.ones([1, 2])) # pylint:disable=not-callable - variable_refs = list({v.ref() for v in m.variables}) - self.assertLen(variable_refs, 2) - del m - - # Verifying if the variables are only referenced from variable_refs. - # We expect the reference counter to be 1, but `sys.getrefcount` reports - # one higher reference counter because a temporary is created when we call - # sys.getrefcount(). Hence check if the number returned is 2. - # https://docs.python.org/3/library/sys.html#sys.getrefcount - self.assertEqual(sys.getrefcount(variable_refs[0].deref()), 2) - self.assertEqual(sys.getrefcount(variable_refs[1].deref()), 2) - - def testStandardTrainingLoopInFunction(self): - layer = tf.keras.layers.Dense(2) - dataset = ( - tf.data.Dataset.from_tensors((tf.ones([784]), tf.ones([], tf.int32))) - .map(lambda x, y: (x, y)) - .repeat(10) - .batch(32)) - optimizer = tf.keras.optimizers.Adam() - - @tf.function - def train(): - for x, y in dataset: - with tf.GradientTape() as tape: - out = layer(x) - loss = tf.reduce_mean( - tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=out, labels=y)) - layer_variables = layer.trainable_variables - gradients = tape.gradient(loss, layer_variables) - optimizer.apply_gradients(zip(gradients, layer_variables)) - - train() - - def testEarlyStoppingTrainingLoopInFunction(self): - layer = tf.keras.layers.Dense(2) - dataset = ( - tf.data.Dataset.from_tensors((tf.ones([784]), tf.ones([], tf.int32))) - .map(lambda x, y: (x, y)) - .repeat(10) - .batch(32)) - optimizer = tf.keras.optimizers.Adam() - - @tf.function - def train(): - for x, y in dataset: + def __init__(self): + super().__init__() + self.dense = tf.keras.layers.Dense(1) + self.optimizer = tf.keras.optimizers.Adam(0.01) + + @tf.function( + input_signature=( + tf.TensorSpec([None, 2], tf.float32), + tf.TensorSpec([None], tf.float32), + ) + ) + def call(self, x, y): with tf.GradientTape() as tape: - out = layer(x) - loss = tf.math.reduce_mean( - tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=out, labels=y)) - layer_variables = layer.trainable_variables - gradients = tape.gradient(loss, layer_variables) - optimizer.apply_gradients(zip(gradients, layer_variables)) - if optimizer.iterations > 3: - break + loss = tf.math.reduce_mean((self.dense(x) - y) ** 2.0) + trainable_variables = self.trainable_variables + gradients = tape.gradient(loss, trainable_variables) + self.optimizer.apply_gradients(zip(gradients, trainable_variables)) + return {"loss": loss} - train() - def test_optimizer(self): - x = tf.constant([[3., 4.]]) - y = tf.constant([2.]) - model = ModelWithOptimizer() - model(x, y) # pylint:disable=not-callable +class FunctionTest(tf.test.TestCase): + def testFunctionRelaxationLosesInnerDimWithKerasLayer(self): + layer = tf.keras.layers.Dense(1) + fn = tf.function(reduce_retracing=True)(layer) + + with self.captureWritesToStream(sys.stderr) as printed: + fn(tf.ones((3, 2))) + self.assertNotIn("ValueError", printed.contents()) + with self.captureWritesToStream(sys.stderr) as printed: + # Use batch size 2 to trigger a second cache miss on the shape. + fn(tf.ones((2, 2))) + self.assertNotIn("ValueError", printed.contents()) + + # Shape relaxation passes TensorShape([None, None]), which causes layer + # matmul to fail, due to incompatible dims. What would have been a + # graph build time error (layer would complain about the inner dim being + # 4). + with self.captureWritesToStream(sys.stderr) as printed: + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, r"Matrix size-incompatible" + ): + fn(tf.ones((3, 4))) + + def testDefunKerasModelCall(self): + model = MiniModel() + model.call = tf.function(model.call) + + x = tf.ones([1, 2]) + y = model(x) + + self.assertAllEqual([[3.0]], self.evaluate(y)) + + # Break the reference cycle between the MiniModel and the defun: + # `MiniModel` --(through its `call` method)--> `Function` + # `Function` --(instancemethod on `MiniModel`)--> `MiniModel` + del model.call + + def testDecoratedMethod(self): + m = DefunnedMiniModel() + instance_call_one = m.call(tf.ones([1, 2]), training=True) + instance_call_two = m.call(inputs=tf.ones([1, 2]), training=True) + class_call = DefunnedMiniModel.call(m, tf.ones([1, 2]), training=True) + self.assertAllEqual(instance_call_one, instance_call_two) + self.assertAllEqual(instance_call_one, class_call) + + def testDecoratedMethodUniqueFunctionPerInstance(self): + m = DefunnedMiniModel() + n = DefunnedMiniModel() + + class_method_one = DefunnedMiniModel.call + class_method_two = DefunnedMiniModel.call + + m_method_one = m.call + m_method_two = m.call + + n_method_one = n.call + n_method_two = n.call + + self.assertEqual(class_method_one, class_method_two) + self.assertEqual(m_method_one, m_method_two) + self.assertEqual(n_method_one, n_method_two) + self.assertNotEqual(m.call, n.call) + + def testDecoratedMethodGetConcreteFunction(self): + m = DefunnedMiniModel() + instance_call_one = m.call.get_concrete_function( + tf.ones([1, 2]), training=False + ) + instance_call_two = m.call.get_concrete_function( + inputs=tf.ones([1, 2]), training=False + ) + self.assertAllEqual( + instance_call_one(tf.ones([1, 2])), + instance_call_two(tf.ones([1, 2])), + ) + + # Also make sure get_concrete_function works on the class method + DefunnedMiniModel.call.get_concrete_function( + m, tf.ones([1, 2]), training=False + ) + DefunnedMiniModel.call.get_concrete_function( + m, inputs=tf.ones([1, 2]), training=True + ) + + def testDecoratedMethodVariableCleanup(self): + m = DefunnedMiniModel() + m(tf.ones([1, 2])) + variable_refs = list({v.ref() for v in m.variables}) + self.assertLen(variable_refs, 2) + del m + + # Verifying if the variables are only referenced from variable_refs. + # We expect the reference counter to be 1, but `sys.getrefcount` reports + # one higher reference counter because a temporary is created when we + # call sys.getrefcount(). Hence check if the number returned is 2. + # https://docs.python.org/3/library/sys.html#sys.getrefcount + self.assertEqual(sys.getrefcount(variable_refs[0].deref()), 2) + self.assertEqual(sys.getrefcount(variable_refs[1].deref()), 2) + + def testStandardTrainingLoopInFunction(self): + layer = tf.keras.layers.Dense(2) + dataset = ( + tf.data.Dataset.from_tensors( + (tf.ones([784]), tf.ones([], tf.int32)) + ) + .map(lambda x, y: (x, y)) + .repeat(10) + .batch(32) + ) + optimizer = tf.keras.optimizers.Adam() + + @tf.function + def train(): + for x, y in dataset: + with tf.GradientTape() as tape: + out = layer(x) + loss = tf.reduce_mean( + tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=out, labels=y + ) + ) + layer_variables = layer.trainable_variables + gradients = tape.gradient(loss, layer_variables) + optimizer.apply_gradients(zip(gradients, layer_variables)) + + train() + + def testEarlyStoppingTrainingLoopInFunction(self): + layer = tf.keras.layers.Dense(2) + dataset = ( + tf.data.Dataset.from_tensors( + (tf.ones([784]), tf.ones([], tf.int32)) + ) + .map(lambda x, y: (x, y)) + .repeat(10) + .batch(32) + ) + optimizer = tf.keras.optimizers.Adam() + + @tf.function + def train(): + for x, y in dataset: + with tf.GradientTape() as tape: + out = layer(x) + loss = tf.math.reduce_mean( + tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=out, labels=y + ) + ) + layer_variables = layer.trainable_variables + gradients = tape.gradient(loss, layer_variables) + optimizer.apply_gradients(zip(gradients, layer_variables)) + if optimizer.iterations > 3: + break + + train() + + def test_optimizer(self): + x = tf.constant([[3.0, 4.0]]) + y = tf.constant([2.0]) + model = ModelWithOptimizer() + model(x, y) class AutomaticControlDependenciesTest(tf.test.TestCase): - - def testVariableInitializersCanBeLifted(self): - # The initializer is a stateful op, but using it inside a function should - # *not* create additional dependencies. That's what we're testing. - layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform') - - @tf.function - def fn(x): - # Stateful operation - tf.debugging.Assert(x, ['Error']) - # Variable initialization should be lifted. Prior to the change that - # added this test, the lifting would crash because of an auto control dep - # added on `x`. Note, the error did not happen if we - # manually created a tf.Variable outside of function and used it - # here. Alternatively, creating a tf.Variable inside fn() causes - # a different sort of error that is out of scope for this test. - return layer(tf.convert_to_tensor([[1.0, 1.0]])) - - true = tf.convert_to_tensor(True) - - concrete = fn.get_concrete_function( - tf.TensorSpec(shape=(), dtype=tf.bool)) - self.evaluate(concrete(true)) - self.evaluate(fn(True)) - - -if __name__ == '__main__': - if tf.__internal__.tf2.enabled(): - tf.test.main() + def testVariableInitializersCanBeLifted(self): + # The initializer is a stateful op, but using it inside a function + # should *not* create additional dependencies. That's what we're + # testing. + layer = tf.keras.layers.Dense(1, kernel_initializer="glorot_uniform") + + @tf.function + def fn(x): + # Stateful operation + tf.debugging.Assert(x, ["Error"]) + # Variable initialization should be lifted. Prior to the change + # that added this test, the lifting would crash because of an auto + # control dep added on `x`. Note, the error did not happen if we + # manually created a tf.Variable outside of function and used it + # here. Alternatively, creating a tf.Variable inside fn() causes a + # different sort of error that is out of scope for this test. + return layer(tf.convert_to_tensor([[1.0, 1.0]])) + + true = tf.convert_to_tensor(True) + + concrete = fn.get_concrete_function( + tf.TensorSpec(shape=(), dtype=tf.bool) + ) + self.evaluate(concrete(true)) + self.evaluate(fn(True)) + + +if __name__ == "__main__": + if tf.__internal__.tf2.enabled(): + tf.test.main() diff --git a/keras/integration_test/gradient_checkpoint_test.py b/keras/integration_test/gradient_checkpoint_test.py index 691df25c6ad1..50efbbd98920 100644 --- a/keras/integration_test/gradient_checkpoint_test.py +++ b/keras/integration_test/gradient_checkpoint_test.py @@ -17,159 +17,194 @@ import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) from tensorflow.python.platform import test as test_lib layers = tf.keras.layers optimizers = tf.keras.optimizers -def _get_big_cnn_model(img_dim, n_channels, num_partitions, - blocks_per_partition): - """Creates a test model whose activations are significantly larger than model size.""" - model = tf.keras.Sequential() - model.add(layers.Input(shape=(img_dim, img_dim, n_channels))) - for _ in range(num_partitions): - for _ in range(blocks_per_partition): - model.add(layers.Conv2D(10, 5, padding='same', activation=tf.nn.relu)) - model.add(layers.MaxPooling2D((1, 1), padding='same')) - model.add(layers.Conv2D(40, 5, padding='same', activation=tf.nn.relu)) - model.add(layers.MaxPooling2D((1, 1), padding='same')) - model.add(layers.Conv2D(20, 5, padding='same', activation=tf.nn.relu)) - model.add(layers.MaxPooling2D((1, 1), padding='same')) - model.add(layers.Flatten()) - model.add(layers.Dense(32, activation=tf.nn.relu)) - model.add(layers.Dense(10)) - return model - - -def _get_split_cnn_model(img_dim, n_channels, num_partitions, - blocks_per_partition): - """Creates a test model that is split into `num_partitions` smaller models.""" - models = [tf.keras.Sequential() for _ in range(num_partitions)] - models[0].add(layers.Input(shape=(img_dim, img_dim, n_channels))) - for i in range(num_partitions): - model = models[i] - if i > 0: - last_shape = models[i - 1].layers[-1].output_shape - model.add(layers.Input(shape=last_shape[1:])) - for _ in range(blocks_per_partition): - model.add(layers.Conv2D(10, 5, padding='same', activation=tf.nn.relu)) - model.add(layers.MaxPooling2D((1, 1), padding='same')) - model.add(layers.Conv2D(40, 5, padding='same', activation=tf.nn.relu)) - model.add(layers.MaxPooling2D((1, 1), padding='same')) - model.add(layers.Conv2D(20, 5, padding='same', activation=tf.nn.relu)) - model.add(layers.MaxPooling2D((1, 1), padding='same')) - models[-1].add(layers.Flatten()) - models[-1].add(layers.Dense(32, activation=tf.nn.relu)) - models[-1].add(layers.Dense(10)) - return models +def _get_big_cnn_model( + img_dim, n_channels, num_partitions, blocks_per_partition +): + """Creates a test model whose activations are significantly larger than + model size.""" + model = tf.keras.Sequential() + model.add(layers.Input(shape=(img_dim, img_dim, n_channels))) + for _ in range(num_partitions): + for _ in range(blocks_per_partition): + model.add( + layers.Conv2D(10, 5, padding="same", activation=tf.nn.relu) + ) + model.add(layers.MaxPooling2D((1, 1), padding="same")) + model.add( + layers.Conv2D(40, 5, padding="same", activation=tf.nn.relu) + ) + model.add(layers.MaxPooling2D((1, 1), padding="same")) + model.add( + layers.Conv2D(20, 5, padding="same", activation=tf.nn.relu) + ) + model.add(layers.MaxPooling2D((1, 1), padding="same")) + model.add(layers.Flatten()) + model.add(layers.Dense(32, activation=tf.nn.relu)) + model.add(layers.Dense(10)) + return model + + +def _get_split_cnn_model( + img_dim, n_channels, num_partitions, blocks_per_partition +): + """Creates a test model that is split into `num_partitions` smaller + models.""" + models = [tf.keras.Sequential() for _ in range(num_partitions)] + models[0].add(layers.Input(shape=(img_dim, img_dim, n_channels))) + for i in range(num_partitions): + model = models[i] + if i > 0: + last_shape = models[i - 1].layers[-1].output_shape + model.add(layers.Input(shape=last_shape[1:])) + for _ in range(blocks_per_partition): + model.add( + layers.Conv2D(10, 5, padding="same", activation=tf.nn.relu) + ) + model.add(layers.MaxPooling2D((1, 1), padding="same")) + model.add( + layers.Conv2D(40, 5, padding="same", activation=tf.nn.relu) + ) + model.add(layers.MaxPooling2D((1, 1), padding="same")) + model.add( + layers.Conv2D(20, 5, padding="same", activation=tf.nn.relu) + ) + model.add(layers.MaxPooling2D((1, 1), padding="same")) + models[-1].add(layers.Flatten()) + models[-1].add(layers.Dense(32, activation=tf.nn.relu)) + models[-1].add(layers.Dense(10)) + return models def _compute_loss(logits, labels): - return tf.reduce_mean( - tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=labels)) + return tf.reduce_mean( + tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=logits, labels=labels + ) + ) def _limit_gpu_memory(): - """Helper function to limit GPU memory for testing.""" - gpus = tf.config.experimental.list_physical_devices('GPU') - if gpus: - tf.config.experimental.set_virtual_device_configuration( - gpus[0], - [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)]) - return True - return False + """Helper function to limit GPU memory for testing.""" + gpus = tf.config.experimental.list_physical_devices("GPU") + if gpus: + tf.config.experimental.set_virtual_device_configuration( + gpus[0], + [ + tf.config.experimental.VirtualDeviceConfiguration( + memory_limit=2048 + ) + ], + ) + return True + return False def _get_dummy_data(img_dim, n_channels, batch_size): - inputs = tf.ones([batch_size, img_dim, img_dim, n_channels]) - labels = tf.ones([batch_size], dtype=tf.int64) - return inputs, labels + inputs = tf.ones([batch_size, img_dim, img_dim, n_channels]) + labels = tf.ones([batch_size], dtype=tf.int64) + return inputs, labels def _train_no_recompute(n_steps): - """Trains a single large model without gradient checkpointing.""" - img_dim, n_channels, batch_size = 256, 1, 4 - x, y = _get_dummy_data(img_dim, n_channels, batch_size) - model = _get_big_cnn_model( - img_dim, n_channels, num_partitions=3, blocks_per_partition=2) - optimizer = optimizers.SGD() - losses = [] - tr_vars = model.trainable_variables - for _ in range(n_steps): - with tf.GradientTape() as tape: - logits = model(x) - loss = _compute_loss(logits, y) - losses.append(loss) - grads = tape.gradient(loss, tr_vars) # tr_vars - optimizer.apply_gradients(zip(grads, tr_vars)) - del grads - return losses + """Trains a single large model without gradient checkpointing.""" + img_dim, n_channels, batch_size = 256, 1, 4 + x, y = _get_dummy_data(img_dim, n_channels, batch_size) + model = _get_big_cnn_model( + img_dim, n_channels, num_partitions=3, blocks_per_partition=2 + ) + optimizer = optimizers.SGD() + losses = [] + tr_vars = model.trainable_variables + for _ in range(n_steps): + with tf.GradientTape() as tape: + logits = model(x) + loss = _compute_loss(logits, y) + losses.append(loss) + grads = tape.gradient(loss, tr_vars) # tr_vars + optimizer.apply_gradients(zip(grads, tr_vars)) + del grads + return losses def _train_with_recompute(n_steps): - """Trains a single large model with gradient checkpointing using tf.recompute_grad.""" - img_dim, n_channels, batch_size = 256, 1, 4 - x, y = _get_dummy_data(img_dim, n_channels, batch_size) - # This model is the same model as _get_big_cnn_model but split into 3 parts. - models = _get_split_cnn_model( - img_dim, n_channels, num_partitions=3, blocks_per_partition=2) - model1, model2, model3 = models - # Apply gradient checkpointing to the submodels using tf.recompute_grad. - model1_re = tf.recompute_grad(model1) - model2_re = tf.recompute_grad(model2) - model3_re = tf.recompute_grad(model3) - optimizer = optimizers.SGD() - tr_vars = ( - model1.trainable_variables + model2.trainable_variables + - model3.trainable_variables) - losses = [] - for _ in range(n_steps): - with tf.GradientTape() as tape: - logits1 = model1_re(x) - logits2 = model2_re(logits1) - logits3 = model3_re(logits2) - loss = _compute_loss(logits3, y) - losses.append(loss) - grads = tape.gradient(loss, tr_vars) # tr_vars - optimizer.apply_gradients(zip(grads, tr_vars)) - del grads - return losses + """Trains a single large model with gradient checkpointing using + tf.recompute_grad.""" + img_dim, n_channels, batch_size = 256, 1, 4 + x, y = _get_dummy_data(img_dim, n_channels, batch_size) + # This model is the same model as _get_big_cnn_model but split into 3 parts. + models = _get_split_cnn_model( + img_dim, n_channels, num_partitions=3, blocks_per_partition=2 + ) + model1, model2, model3 = models + # Apply gradient checkpointing to the submodels using tf.recompute_grad. + model1_re = tf.recompute_grad(model1) + model2_re = tf.recompute_grad(model2) + model3_re = tf.recompute_grad(model3) + optimizer = optimizers.SGD() + tr_vars = ( + model1.trainable_variables + + model2.trainable_variables + + model3.trainable_variables + ) + losses = [] + for _ in range(n_steps): + with tf.GradientTape() as tape: + logits1 = model1_re(x) + logits2 = model2_re(logits1) + logits3 = model3_re(logits2) + loss = _compute_loss(logits3, y) + losses.append(loss) + grads = tape.gradient(loss, tr_vars) # tr_vars + optimizer.apply_gradients(zip(grads, tr_vars)) + del grads + return losses @tf_test_utils.with_eager_op_as_function class GradientCheckpointTest(tf.test.TestCase): - - def test_raises_oom_exception(self): - self.skipTest('b/232015009: flaky test') - if not _limit_gpu_memory(): - self.skipTest('No virtual GPUs found') - with self.assertRaises(Exception) as context: - _train_no_recompute(1) - self.assertIsInstance(context.exception, tf.errors.ResourceExhaustedError) - - @tf_test_utils.disable_xla( - 'xla does not support searching for memory-limited solvers.') - def test_does_not_raise_oom_exception(self): - if not _limit_gpu_memory(): - self.skipTest('No virtual GPUs found') - if test_lib.is_built_with_rocm(): - self.skipTest( - 'ROCm MIOpen does not support searching for memory-limited' - 'solvers yet so skip the subtest which would result in OOM.') - n_step = 2 - losses = _train_with_recompute(n_step) - self.assertLen(losses, n_step) - - def tearDown(self): - super().tearDown() - # Make sure all the models created in keras has been deleted and cleared - # from the global keras grpah, also do a force GC to recycle the GPU memory. - tf.keras.backend.clear_session() - gc.collect() - - -if __name__ == '__main__': - tf.test.main() + def test_raises_oom_exception(self): + self.skipTest("b/232015009: flaky test") + if not _limit_gpu_memory(): + self.skipTest("No virtual GPUs found") + with self.assertRaises(Exception) as context: + _train_no_recompute(1) + self.assertIsInstance( + context.exception, tf.errors.ResourceExhaustedError + ) + + @tf_test_utils.disable_xla( + "xla does not support searching for memory-limited solvers." + ) + def test_does_not_raise_oom_exception(self): + if not _limit_gpu_memory(): + self.skipTest("No virtual GPUs found") + if test_lib.is_built_with_rocm(): + self.skipTest( + "ROCm MIOpen does not support searching for memory-limited" + "solvers yet so skip the subtest which would result in OOM." + ) + n_step = 2 + losses = _train_with_recompute(n_step) + self.assertLen(losses, n_step) + + def tearDown(self): + super().tearDown() + # Make sure all the models created in keras has been deleted and cleared + # from the global keras grpah, also do a force GC to recycle the GPU + # memory. + tf.keras.backend.clear_session() + gc.collect() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/integration_test/gradients_test.py b/keras/integration_test/gradients_test.py index 361ed8112744..dd24e9c8d7df 100644 --- a/keras/integration_test/gradients_test.py +++ b/keras/integration_test/gradients_test.py @@ -18,120 +18,122 @@ class TestKerasModelClass(tf.keras.Model): - """A simple tensorflow keras Model class definition.""" + """A simple tensorflow keras Model class definition.""" - def __init__(self, width): - super().__init__() - self.width = width - - def build(self, input_shape): - self.weight = self.add_weight( - name="test_keras_var", - shape=(self.width,), - dtype=tf.float32, - trainable=True, - ) - - def call(self, inputs): - return self.weight * inputs - - -class GradientsTest(tf.test.TestCase): - - def _TestVariablesGradient(self, inputs, test_model, vars_to_grad): - """Returns gradients of `test_model` with respect to `vars_to_grad`.""" - - test_model_re = tf.recompute_grad(test_model) - - with tf.GradientTape(persistent=True) as tape: - tape.watch(vars_to_grad) - out_re = test_model_re(inputs) - out = test_model(inputs) - - grads_re = tape.gradient(out_re, vars_to_grad) - grads = tape.gradient(out, vars_to_grad) - - return grads_re, grads - - def testKerasRecompute(self): - """Checks that recompute_grad works for a simple Keras Model.""" - - test_model = TestKerasModelClass(10) - test_input = tf.constant(tf.zeros((10, 10), dtype=np.float32)) - # Ensures keras model is initialized. - test_model(test_input) # pylint: disable=not-callable - grads_re, grads = self._TestVariablesGradient(test_input, test_model, - test_input) - - grads_re = self.evaluate(grads_re) - grads = self.evaluate(grads) - for g, g_re in zip(grads, grads_re): - self.assertAllClose(g, g_re) - - grads_re, grads = self._TestVariablesGradient(test_input, test_model, - test_model.variables) - - grads_re = self.evaluate(grads_re) - grads = self.evaluate(grads) - for g, g_re in zip(grads, grads_re): - self.assertAllClose(g, g_re) - - def testLSTMBatchJacobian(self): - class HasLSTM(tf.keras.Model): - - def __init__(self): + def __init__(self, width): super().__init__() - self.lstm = tf.keras.layers.LSTM(units=5) - self.dense = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid) - - def call(self, x): - return self.dense(self.lstm(x)) - - m = HasLSTM() - - def jacobian(x): - with tf.GradientTape() as tape: - tape.watch(x) - y = m(x) # pylint: disable=not-callable - return tape.batch_jacobian(y, x) + self.width = width - inp = tf.nn.l2_normalize(tf.ones([1, 2, 3]), axis=[1, 2]) - eager_result = jacobian(inp) - function_result = tf.function(jacobian)(inp) - self.assertAllClose(eager_result, function_result) - backprop_result, numeric_result = tf.test.compute_gradient( - m, [inp], delta=1e-3) - self.assertAllClose(numeric_result, backprop_result, atol=1e-3) - self.assertAllClose(tf.reshape(numeric_result, [-1]), - tf.reshape(eager_result, [-1]), atol=1e-3) + def build(self, input_shape): + self.weight = self.add_weight( + name="test_keras_var", + shape=(self.width,), + dtype=tf.float32, + trainable=True, + ) - def testEmbeddingLookupGradientsHaveKnownShape(self): + def call(self, inputs): + return self.weight * inputs - class MyLayer(tf.keras.layers.Layer): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.embedding = None - - def build(self, input_shape): - self.embedding = tf.Variable(tf.random.uniform([50, 16])) - - def call(self, x): - return tf.nn.embedding_lookup(self.embedding, x) - - layer = MyLayer() - - @tf.function - def _run(x): - with tf.GradientTape() as tape: - y = layer(x) - loss = tf.math.reduce_sum(y) - gradients = tape.gradient(loss, layer.weights) - self.assertListEqual(gradients[0].shape.as_list(), [50, 16]) - - _run(tf.random.uniform([4, 16], minval=0, maxval=50, dtype=tf.int64)) +class GradientsTest(tf.test.TestCase): + def _TestVariablesGradient(self, inputs, test_model, vars_to_grad): + """Returns gradients of `test_model` with respect to `vars_to_grad`.""" + + test_model_re = tf.recompute_grad(test_model) + + with tf.GradientTape(persistent=True) as tape: + tape.watch(vars_to_grad) + out_re = test_model_re(inputs) + out = test_model(inputs) + + grads_re = tape.gradient(out_re, vars_to_grad) + grads = tape.gradient(out, vars_to_grad) + + return grads_re, grads + + def testKerasRecompute(self): + """Checks that recompute_grad works for a simple Keras Model.""" + + test_model = TestKerasModelClass(10) + test_input = tf.constant(tf.zeros((10, 10), dtype=np.float32)) + # Ensures keras model is initialized. + test_model(test_input) + grads_re, grads = self._TestVariablesGradient( + test_input, test_model, test_input + ) + + grads_re = self.evaluate(grads_re) + grads = self.evaluate(grads) + for g, g_re in zip(grads, grads_re): + self.assertAllClose(g, g_re) + + grads_re, grads = self._TestVariablesGradient( + test_input, test_model, test_model.variables + ) + + grads_re = self.evaluate(grads_re) + grads = self.evaluate(grads) + for g, g_re in zip(grads, grads_re): + self.assertAllClose(g, g_re) + + def testLSTMBatchJacobian(self): + class HasLSTM(tf.keras.Model): + def __init__(self): + super().__init__() + self.lstm = tf.keras.layers.LSTM(units=5) + self.dense = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid) + + def call(self, x): + return self.dense(self.lstm(x)) + + m = HasLSTM() + + def jacobian(x): + with tf.GradientTape() as tape: + tape.watch(x) + y = m(x) + return tape.batch_jacobian(y, x) + + inp = tf.nn.l2_normalize(tf.ones([1, 2, 3]), axis=[1, 2]) + eager_result = jacobian(inp) + function_result = tf.function(jacobian)(inp) + self.assertAllClose(eager_result, function_result) + backprop_result, numeric_result = tf.test.compute_gradient( + m, [inp], delta=1e-3 + ) + self.assertAllClose(numeric_result, backprop_result, atol=1e-3) + self.assertAllClose( + tf.reshape(numeric_result, [-1]), + tf.reshape(eager_result, [-1]), + atol=1e-3, + ) + + def testEmbeddingLookupGradientsHaveKnownShape(self): + class MyLayer(tf.keras.layers.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.embedding = None + + def build(self, input_shape): + self.embedding = tf.Variable(tf.random.uniform([50, 16])) + + def call(self, x): + return tf.nn.embedding_lookup(self.embedding, x) + + layer = MyLayer() + + @tf.function + def _run(x): + with tf.GradientTape() as tape: + y = layer(x) + loss = tf.math.reduce_sum(y) + gradients = tape.gradient(loss, layer.weights) + self.assertListEqual(gradients[0].shape.as_list(), [50, 16]) + + _run(tf.random.uniform([4, 16], minval=0, maxval=50, dtype=tf.int64)) if __name__ == "__main__": - if tf.__internal__.tf2.enabled(): - tf.test.main() + if tf.__internal__.tf2.enabled(): + tf.test.main() diff --git a/keras/integration_test/legacy_rnn_test.py b/keras/integration_test/legacy_rnn_test.py index 8d006e29ceb3..0b85d3643377 100644 --- a/keras/integration_test/legacy_rnn_test.py +++ b/keras/integration_test/legacy_rnn_test.py @@ -20,366 +20,391 @@ class KerasNetworkTFRNNs(tf.keras.Model): + def __init__(self, name=None): + super().__init__(name=name) + self._cell = tf.nn.rnn_cell.MultiRNNCell( + [tf.nn.rnn_cell.LSTMCell(1) for _ in range(2)] + ) - def __init__(self, name=None): - super().__init__(name=name) - self._cell = tf.nn.rnn_cell.MultiRNNCell( - [tf.nn.rnn_cell.LSTMCell(1) for _ in range(2)]) - - def call(self, inputs): - return self._cell(inputs, self._cell.get_initial_state(inputs)) + def call(self, inputs): + return self._cell(inputs, self._cell.get_initial_state(inputs)) class KerasNetworkKerasRNNs(tf.keras.Model): + def __init__(self, name=None): + super().__init__(name=name) + self._cell = tf.keras.layers.StackedRNNCells( + [tf.keras.layers.LSTMCell(1) for _ in range(2)] + ) - def __init__(self, name=None): - super().__init__(name=name) - self._cell = tf.keras.layers.StackedRNNCells( - [tf.keras.layers.LSTMCell(1) for _ in range(2)]) - - def call(self, inputs): - return self._cell(inputs, self._cell.get_initial_state(inputs)) + def call(self, inputs): + return self._cell(inputs, self._cell.get_initial_state(inputs)) class LegacyRNNTest(tf.test.TestCase): - - def setUp(self): - super().setUp() - self._seed = 23489 - np.random.seed(self._seed) - - def testRNNWithKerasSimpleRNNCell(self): - with self.cached_session() as sess: - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 100 - (x_train, y_train), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - y_train = tf.keras.utils.to_categorical(y_train) - cell = tf.keras.layers.SimpleRNNCell(output_shape) - - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - predict = tf.placeholder( - tf.float32, shape=(None, output_shape)) - - outputs, state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape]) - self.assertEqual(state.shape.as_list(), [None, output_shape]) - loss = tf.losses.softmax_cross_entropy(predict, state) - train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) - - sess.run([tf.global_variables_initializer()]) - _, outputs, state = sess.run( - [train_op, outputs, state], {inputs: x_train, predict: y_train}) - - self.assertEqual(len(outputs), batch) - self.assertEqual(len(state), batch) - - def testRNNWithKerasGRUCell(self): - with self.cached_session() as sess: - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 100 - (x_train, y_train), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - y_train = tf.keras.utils.to_categorical(y_train) - cell = tf.keras.layers.GRUCell(output_shape) - - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - predict = tf.placeholder( - tf.float32, shape=(None, output_shape)) - - outputs, state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape]) - self.assertEqual(state.shape.as_list(), [None, output_shape]) - loss = tf.losses.softmax_cross_entropy(predict, state) - train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) - - sess.run([tf.global_variables_initializer()]) - _, outputs, state = sess.run( - [train_op, outputs, state], {inputs: x_train, predict: y_train}) - - self.assertEqual(len(outputs), batch) - self.assertEqual(len(state), batch) - - def testRNNWithKerasLSTMCell(self): - with self.cached_session() as sess: - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 100 - (x_train, y_train), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - y_train = tf.keras.utils.to_categorical(y_train) - cell = tf.keras.layers.LSTMCell(output_shape) - - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - predict = tf.placeholder( - tf.float32, shape=(None, output_shape)) - - outputs, state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape]) - self.assertEqual(len(state), 2) - self.assertEqual(state[0].shape.as_list(), [None, output_shape]) - self.assertEqual(state[1].shape.as_list(), [None, output_shape]) - loss = tf.losses.softmax_cross_entropy(predict, state[0]) - train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) - - sess.run([tf.global_variables_initializer()]) - _, outputs, state = sess.run( - [train_op, outputs, state], {inputs: x_train, predict: y_train}) - - self.assertEqual(len(outputs), batch) - self.assertEqual(len(state), 2) - self.assertEqual(len(state[0]), batch) - self.assertEqual(len(state[1]), batch) - - def testRNNWithStackKerasCell(self): - with self.cached_session() as sess: - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 100 - (x_train, y_train), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - y_train = tf.keras.utils.to_categorical(y_train) - cell = tf.keras.layers.StackedRNNCells( - [tf.keras.layers.LSTMCell(2 * output_shape), - tf.keras.layers.LSTMCell(output_shape)]) - - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - predict = tf.placeholder( - tf.float32, shape=(None, output_shape)) - - outputs, state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape]) - self.assertEqual(len(state), 2) - state = tf.nest.flatten(state) - self.assertEqual(len(state), 4) - self.assertEqual(state[0].shape.as_list(), [None, 2 * output_shape]) - self.assertEqual(state[1].shape.as_list(), [None, 2 * output_shape]) - self.assertEqual(state[2].shape.as_list(), [None, output_shape]) - self.assertEqual(state[3].shape.as_list(), [None, output_shape]) - loss = tf.losses.softmax_cross_entropy(predict, state[2]) - train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) - - sess.run([tf.global_variables_initializer()]) - _, outputs, state = sess.run( - [train_op, outputs, state], {inputs: x_train, predict: y_train}) - - self.assertEqual(len(outputs), batch) - self.assertEqual(len(state), 4) - for s in state: - self.assertEqual(len(s), batch) - - def testStaticRNNWithKerasSimpleRNNCell(self): - with self.cached_session() as sess: - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 100 - (x_train, y_train), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - x_train = np.transpose(x_train, (1, 0, 2)) - y_train = tf.keras.utils.to_categorical(y_train) - cell = tf.keras.layers.SimpleRNNCell(output_shape) - - inputs = [tf.placeholder( - tf.float32, shape=(None, input_shape))] * timestep - predict = tf.placeholder( - tf.float32, shape=(None, output_shape)) - - outputs, state = tf.nn.static_rnn( - cell, inputs, dtype=tf.float32) - self.assertEqual(len(outputs), timestep) - self.assertEqual(outputs[0].shape.as_list(), [None, output_shape]) - self.assertEqual(state.shape.as_list(), [None, output_shape]) - loss = tf.losses.softmax_cross_entropy(predict, state) - train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) - - sess.run([tf.global_variables_initializer()]) - feed_dict = {i: d for i, d in zip(inputs, x_train)} - feed_dict[predict] = y_train - _, outputs, state = sess.run( - [train_op, outputs, state], feed_dict) - - self.assertEqual(len(outputs), timestep) - self.assertEqual(len(outputs[0]), batch) - self.assertEqual(len(state), batch) - - def testKerasAndTFRNNLayerOutputComparison(self): - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 20 - (x_train, _), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - fix_weights_generator = tf.keras.layers.SimpleRNNCell(output_shape) - fix_weights_generator.build((None, input_shape)) - weights = fix_weights_generator.get_weights() - - with self.session(graph=tf.Graph()) as sess: - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - cell = tf.keras.layers.SimpleRNNCell(output_shape) - tf_out, tf_state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - cell.set_weights(weights) - [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train}) - with self.session(graph=tf.Graph()) as sess: - k_input = tf.keras.Input(shape=(timestep, input_shape), - dtype=tf.float32) - cell = tf.keras.layers.SimpleRNNCell(output_shape) - layer = tf.keras.layers.RNN( - cell, return_sequences=True, return_state=True) - keras_out = layer(k_input) - cell.set_weights(weights) - k_out, k_state = sess.run(keras_out, {k_input: x_train}) - self.assertAllClose(tf_out, k_out) - self.assertAllClose(tf_state, k_state) - - def testSimpleRNNCellAndBasicRNNCellComparison(self): - input_shape = 10 - output_shape = 5 - timestep = 4 - batch = 20 - (x_train, _), _ = get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - fix_weights_generator = tf.keras.layers.SimpleRNNCell(output_shape) - fix_weights_generator.build((None, input_shape)) - # The SimpleRNNCell contains 3 weights: kernel, recurrent_kernel, and bias - # The BasicRNNCell contains 2 weight: kernel and bias, where kernel is - # zipped [kernel, recurrent_kernel] in SimpleRNNCell. - keras_weights = fix_weights_generator.get_weights() - kernel, recurrent_kernel, bias = keras_weights - tf_weights = [np.concatenate((kernel, recurrent_kernel)), bias] - - with self.session(graph=tf.Graph()) as sess: - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - cell = tf.keras.layers.SimpleRNNCell(output_shape) - k_out, k_state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - cell.set_weights(keras_weights) - [k_out, k_state] = sess.run([k_out, k_state], {inputs: x_train}) - with self.session(graph=tf.Graph()) as sess: - inputs = tf.placeholder( - tf.float32, shape=(None, timestep, input_shape)) - cell = tf.nn.rnn_cell.BasicRNNCell(output_shape) - tf_out, tf_state = tf.nn.dynamic_rnn( - cell, inputs, dtype=tf.float32) - cell.set_weights(tf_weights) - [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train}) - - self.assertAllClose(tf_out, k_out, atol=1e-5) - self.assertAllClose(tf_state, k_state, atol=1e-5) - - def testRNNCellSerialization(self): - for cell in [ - tf.nn.rnn_cell.LSTMCell(32, use_peepholes=True, cell_clip=True), - tf.nn.rnn_cell.BasicLSTMCell(32, dtype=tf.float32), - tf.nn.rnn_cell.BasicRNNCell(32, activation="relu", dtype=tf.float32), - tf.nn.rnn_cell.GRUCell(32, dtype=tf.float32) - ]: - with self.cached_session(): - x = tf.keras.Input((None, 5)) - layer = tf.keras.layers.RNN(cell) - y = layer(x) - model = tf.keras.models.Model(x, y) - model.compile(optimizer="rmsprop", loss="mse") - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - # The custom_objects is important here since rnn_cell_impl is - # not visible as a Keras layer, and also has a name conflict with - # keras.LSTMCell and GRUCell. - layer = tf.keras.layers.RNN.from_config( - config, - custom_objects={ - "BasicRNNCell": tf.nn.rnn_cell.BasicRNNCell, - "GRUCell": tf.nn.rnn_cell.GRUCell, - "LSTMCell": tf.nn.rnn_cell.LSTMCell, - "BasicLSTMCell": tf.nn.rnn_cell.BasicLSTMCell - }) - y = layer(x) - model = tf.keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - def testRNNCellActsLikeKerasRNNCellInProperScope(self): - with tf.layers.experimental.keras_style_scope(): - kn1 = KerasNetworkTFRNNs(name="kn1") - kn2 = KerasNetworkKerasRNNs(name="kn2") - - z = tf.zeros((2, 3)) - - kn1(z) # pylint:disable=not-callable - kn2(z) # pylint:disable=not-callable - - # pylint: disable=protected-access - self.assertTrue(all("kn1" in v.name for v in kn1._cell.variables)) - self.assertTrue(all("kn2" in v.name for v in kn2._cell.variables)) - - with tf.layers.experimental.keras_style_scope(): - kn1_new = KerasNetworkTFRNNs(name="kn1_new") - kn2_new = KerasNetworkKerasRNNs(name="kn2_new") - - kn2_new(z) # pylint:disable=not-callable - # Most importantly, this doesn't fail due to variable scope reuse issues. - kn1_new(z) # pylint:disable=not-callable - - self.assertTrue(all("kn1_new" in v.name for v in kn1_new._cell.variables)) - self.assertTrue(all("kn2_new" in v.name for v in kn2_new._cell.variables)) - - -def get_test_data(train_samples, - test_samples, - input_shape, - num_classes): - num_sample = train_samples + test_samples - templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) - y = np.random.randint(0, num_classes, size=(num_sample,)) - x = np.zeros((num_sample,) + input_shape, dtype=np.float32) - for i in range(num_sample): - x[i] = templates[y[i]] + np.random.normal(loc=0, scale=1., size=input_shape) - return ((x[:train_samples], y[:train_samples]), - (x[train_samples:], y[train_samples:])) + def setUp(self): + super().setUp() + self._seed = 23489 + np.random.seed(self._seed) + + def testRNNWithKerasSimpleRNNCell(self): + with self.cached_session() as sess: + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 100 + (x_train, y_train), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + y_train = tf.keras.utils.to_categorical(y_train) + cell = tf.keras.layers.SimpleRNNCell(output_shape) + + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + predict = tf.placeholder(tf.float32, shape=(None, output_shape)) + + outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + self.assertEqual( + outputs.shape.as_list(), [None, timestep, output_shape] + ) + self.assertEqual(state.shape.as_list(), [None, output_shape]) + loss = tf.keras.losses.categorical_crossentropy(predict, state) + train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) + + sess.run([tf.global_variables_initializer()]) + _, outputs, state = sess.run( + [train_op, outputs, state], {inputs: x_train, predict: y_train} + ) + + self.assertEqual(len(outputs), batch) + self.assertEqual(len(state), batch) + + def testRNNWithKerasGRUCell(self): + with self.cached_session() as sess: + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 100 + (x_train, y_train), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + y_train = tf.keras.utils.to_categorical(y_train) + cell = tf.keras.layers.GRUCell(output_shape) + + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + predict = tf.placeholder(tf.float32, shape=(None, output_shape)) + + outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + self.assertEqual( + outputs.shape.as_list(), [None, timestep, output_shape] + ) + self.assertEqual(state.shape.as_list(), [None, output_shape]) + loss = tf.keras.losses.categorical_crossentropy(predict, state) + train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) + + sess.run([tf.global_variables_initializer()]) + _, outputs, state = sess.run( + [train_op, outputs, state], {inputs: x_train, predict: y_train} + ) + + self.assertEqual(len(outputs), batch) + self.assertEqual(len(state), batch) + + def testRNNWithKerasLSTMCell(self): + with self.cached_session() as sess: + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 100 + (x_train, y_train), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + y_train = tf.keras.utils.to_categorical(y_train) + cell = tf.keras.layers.LSTMCell(output_shape) + + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + predict = tf.placeholder(tf.float32, shape=(None, output_shape)) + + outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + self.assertEqual( + outputs.shape.as_list(), [None, timestep, output_shape] + ) + self.assertEqual(len(state), 2) + self.assertEqual(state[0].shape.as_list(), [None, output_shape]) + self.assertEqual(state[1].shape.as_list(), [None, output_shape]) + loss = tf.keras.losses.categorical_crossentropy(predict, state[0]) + train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) + + sess.run([tf.global_variables_initializer()]) + _, outputs, state = sess.run( + [train_op, outputs, state], {inputs: x_train, predict: y_train} + ) + + self.assertEqual(len(outputs), batch) + self.assertEqual(len(state), 2) + self.assertEqual(len(state[0]), batch) + self.assertEqual(len(state[1]), batch) + + def testRNNWithStackKerasCell(self): + with self.cached_session() as sess: + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 100 + (x_train, y_train), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + y_train = tf.keras.utils.to_categorical(y_train) + cell = tf.keras.layers.StackedRNNCells( + [ + tf.keras.layers.LSTMCell(2 * output_shape), + tf.keras.layers.LSTMCell(output_shape), + ] + ) + + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + predict = tf.placeholder(tf.float32, shape=(None, output_shape)) + + outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + self.assertEqual( + outputs.shape.as_list(), [None, timestep, output_shape] + ) + self.assertEqual(len(state), 2) + state = tf.nest.flatten(state) + self.assertEqual(len(state), 4) + self.assertEqual(state[0].shape.as_list(), [None, 2 * output_shape]) + self.assertEqual(state[1].shape.as_list(), [None, 2 * output_shape]) + self.assertEqual(state[2].shape.as_list(), [None, output_shape]) + self.assertEqual(state[3].shape.as_list(), [None, output_shape]) + loss = tf.keras.losses.categorical_crossentropy(predict, state[2]) + train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) + + sess.run([tf.global_variables_initializer()]) + _, outputs, state = sess.run( + [train_op, outputs, state], {inputs: x_train, predict: y_train} + ) + + self.assertEqual(len(outputs), batch) + self.assertEqual(len(state), 4) + for s in state: + self.assertEqual(len(s), batch) + + def testStaticRNNWithKerasSimpleRNNCell(self): + with self.cached_session() as sess: + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 100 + (x_train, y_train), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + x_train = np.transpose(x_train, (1, 0, 2)) + y_train = tf.keras.utils.to_categorical(y_train) + cell = tf.keras.layers.SimpleRNNCell(output_shape) + + inputs = [ + tf.placeholder(tf.float32, shape=(None, input_shape)) + ] * timestep + predict = tf.placeholder(tf.float32, shape=(None, output_shape)) + + outputs, state = tf.nn.static_rnn(cell, inputs, dtype=tf.float32) + self.assertEqual(len(outputs), timestep) + self.assertEqual(outputs[0].shape.as_list(), [None, output_shape]) + self.assertEqual(state.shape.as_list(), [None, output_shape]) + loss = tf.keras.losses.categorical_crossentropy(predict, state) + train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss) + + sess.run([tf.global_variables_initializer()]) + feed_dict = {i: d for i, d in zip(inputs, x_train)} + feed_dict[predict] = y_train + _, outputs, state = sess.run([train_op, outputs, state], feed_dict) + + self.assertEqual(len(outputs), timestep) + self.assertEqual(len(outputs[0]), batch) + self.assertEqual(len(state), batch) + + def testKerasAndTFRNNLayerOutputComparison(self): + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 20 + (x_train, _), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + fix_weights_generator = tf.keras.layers.SimpleRNNCell(output_shape) + fix_weights_generator.build((None, input_shape)) + weights = fix_weights_generator.get_weights() + + with self.session(graph=tf.Graph()) as sess: + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + cell = tf.keras.layers.SimpleRNNCell(output_shape) + tf_out, tf_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + cell.set_weights(weights) + [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train}) + with self.session(graph=tf.Graph()) as sess: + k_input = tf.keras.Input( + shape=(timestep, input_shape), dtype=tf.float32 + ) + cell = tf.keras.layers.SimpleRNNCell(output_shape) + layer = tf.keras.layers.RNN( + cell, return_sequences=True, return_state=True + ) + keras_out = layer(k_input) + cell.set_weights(weights) + k_out, k_state = sess.run(keras_out, {k_input: x_train}) + self.assertAllClose(tf_out, k_out) + self.assertAllClose(tf_state, k_state) + + def testSimpleRNNCellAndBasicRNNCellComparison(self): + input_shape = 10 + output_shape = 5 + timestep = 4 + batch = 20 + (x_train, _), _ = get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + fix_weights_generator = tf.keras.layers.SimpleRNNCell(output_shape) + fix_weights_generator.build((None, input_shape)) + # The SimpleRNNCell contains 3 weights: kernel, recurrent_kernel, and + # bias The BasicRNNCell contains 2 weight: kernel and bias, where kernel + # is zipped [kernel, recurrent_kernel] in SimpleRNNCell. + keras_weights = fix_weights_generator.get_weights() + kernel, recurrent_kernel, bias = keras_weights + tf_weights = [np.concatenate((kernel, recurrent_kernel)), bias] + + with self.session(graph=tf.Graph()) as sess: + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + cell = tf.keras.layers.SimpleRNNCell(output_shape) + k_out, k_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + cell.set_weights(keras_weights) + [k_out, k_state] = sess.run([k_out, k_state], {inputs: x_train}) + with self.session(graph=tf.Graph()) as sess: + inputs = tf.placeholder( + tf.float32, shape=(None, timestep, input_shape) + ) + cell = tf.nn.rnn_cell.BasicRNNCell(output_shape) + tf_out, tf_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) + cell.set_weights(tf_weights) + [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train}) + + self.assertAllClose(tf_out, k_out, atol=1e-5) + self.assertAllClose(tf_state, k_state, atol=1e-5) + + def testRNNCellSerialization(self): + for cell in [ + tf.nn.rnn_cell.LSTMCell(32, use_peepholes=True, cell_clip=True), + tf.nn.rnn_cell.BasicLSTMCell(32, dtype=tf.float32), + tf.nn.rnn_cell.BasicRNNCell( + 32, activation="relu", dtype=tf.float32 + ), + tf.nn.rnn_cell.GRUCell(32, dtype=tf.float32), + ]: + with self.cached_session(): + x = tf.keras.Input((None, 5)) + layer = tf.keras.layers.RNN(cell) + y = layer(x) + model = tf.keras.models.Model(x, y) + model.compile(optimizer="rmsprop", loss="mse") + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + # The custom_objects is important here since rnn_cell_impl is + # not visible as a Keras layer, and also has a name conflict + # with keras.LSTMCell and GRUCell. + layer = tf.keras.layers.RNN.from_config( + config, + custom_objects={ + "BasicRNNCell": tf.nn.rnn_cell.BasicRNNCell, + "GRUCell": tf.nn.rnn_cell.GRUCell, + "LSTMCell": tf.nn.rnn_cell.LSTMCell, + "BasicLSTMCell": tf.nn.rnn_cell.BasicLSTMCell, + }, + ) + y = layer(x) + model = tf.keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + def testRNNCellActsLikeKerasRNNCellInProperScope(self): + with tf.layers.experimental.keras_style_scope(): + kn1 = KerasNetworkTFRNNs(name="kn1") + kn2 = KerasNetworkKerasRNNs(name="kn2") + + z = tf.zeros((2, 3)) + + kn1(z) + kn2(z) + + self.assertTrue(all("kn1" in v.name for v in kn1._cell.variables)) + self.assertTrue(all("kn2" in v.name for v in kn2._cell.variables)) + + with tf.layers.experimental.keras_style_scope(): + kn1_new = KerasNetworkTFRNNs(name="kn1_new") + kn2_new = KerasNetworkKerasRNNs(name="kn2_new") + + kn2_new(z) + # Most importantly, this doesn't fail due to variable scope reuse + # issues. + kn1_new(z) + + self.assertTrue( + all("kn1_new" in v.name for v in kn1_new._cell.variables) + ) + self.assertTrue( + all("kn2_new" in v.name for v in kn2_new._cell.variables) + ) + + +def get_test_data(train_samples, test_samples, input_shape, num_classes): + num_sample = train_samples + test_samples + templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) + y = np.random.randint(0, num_classes, size=(num_sample,)) + x = np.zeros((num_sample,) + input_shape, dtype=np.float32) + for i in range(num_sample): + x[i] = templates[y[i]] + np.random.normal( + loc=0, scale=1.0, size=input_shape + ) + return ( + (x[:train_samples], y[:train_samples]), + (x[train_samples:], y[train_samples:]), + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/integration_test/models/BUILD b/keras/integration_test/models/BUILD new file mode 100644 index 000000000000..daf1ba141adb --- /dev/null +++ b/keras/integration_test/models/BUILD @@ -0,0 +1,36 @@ +# Description: +# Contains a collection of diverse Keras models to be used for integration tests. + +# Placeholder: load unaliased py_library + +package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], + default_visibility = [ + "//keras:friends", + ], + licenses = ["notice"], +) + +py_library( + name = "models", + srcs = [ + "__init__.py", + "bert.py", + "ctc_speech_rnn.py", + "dcgan.py", + "edge_case_model.py", + "efficientnet_v2.py", + "input_spec.py", + "low_level_model.py", + "mini_unet.py", + "mini_xception.py", + "retinanet.py", + "structured_data_classification.py", + "text_classification.py", + "timeseries_forecasting.py", + "translation.py", + "vae.py", + ], + srcs_version = "PY3", + deps = ["//:expect_tensorflow_installed"], +) diff --git a/keras/wrappers/__init__.py b/keras/integration_test/models/__init__.py similarity index 100% rename from keras/wrappers/__init__.py rename to keras/integration_test/models/__init__.py diff --git a/keras/integration_test/models/bert.py b/keras/integration_test/models/bert.py new file mode 100644 index 000000000000..ea20aa041dbd --- /dev/null +++ b/keras/integration_test/models/bert.py @@ -0,0 +1,150 @@ +"""Bert model. + +Adapted from https://keras.io/examples/nlp/masked_language_modeling/ +""" +import numpy as np +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +SEQUENCE_LENGTH = 16 +VOCAB_SIZE = 1000 +EMBED_DIM = 64 +NUM_HEAD = 2 +FF_DIM = 32 +NUM_LAYERS = 2 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size,), dtype="string"), + InputSpec((batch_size, SEQUENCE_LENGTH, VOCAB_SIZE)), + ) + + +def get_input_preprocessor(): + input_vectorizer = keras.layers.TextVectorization( + max_tokens=VOCAB_SIZE, + output_mode="int", + output_sequence_length=SEQUENCE_LENGTH, + ) + text_ds = tf.data.Dataset.from_tensor_slices( + [ + "Lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt ut", + "labore et dolore magna aliqua.", + "Ut enim ad minim veniam", + "quis nostrud exercitation ullamco", + "laboris nisi ut aliquip ex ea commodo consequat.", + ] + ) + input_vectorizer.adapt(text_ds) + return input_vectorizer + + +def bert_module(query, key, value, i): + attention_output = keras.layers.MultiHeadAttention( + num_heads=NUM_HEAD, + key_dim=EMBED_DIM // NUM_HEAD, + )(query, key, value) + attention_output = keras.layers.Dropout(0.1)(attention_output) + attention_output = keras.layers.LayerNormalization(epsilon=1e-6)( + query + attention_output + ) + + ffn = keras.Sequential( + [ + keras.layers.Dense(FF_DIM, activation="relu"), + keras.layers.Dense(EMBED_DIM), + ], + ) + ffn_output = ffn(attention_output) + ffn_output = keras.layers.Dropout(0.1)(ffn_output) + sequence_output = keras.layers.LayerNormalization(epsilon=1e-6)( + attention_output + ffn_output + ) + return sequence_output + + +def get_pos_encoding_matrix(max_len, d_emb): + pos_enc = np.array( + [ + [pos / np.power(10000, 2 * (j // 2) / d_emb) for j in range(d_emb)] + if pos != 0 + else np.zeros(d_emb) + for pos in range(max_len) + ] + ) + pos_enc[1:, 0::2] = np.sin(pos_enc[1:, 0::2]) + pos_enc[1:, 1::2] = np.cos(pos_enc[1:, 1::2]) + return pos_enc + + +loss_fn = keras.losses.CategoricalCrossentropy() +loss_tracker = keras.metrics.Mean(name="loss") + + +class MaskedLanguageModel(keras.Model): + def train_step(self, inputs): + if len(inputs) == 3: + features, labels, sample_weight = inputs + else: + features, labels = inputs + sample_weight = None + + with tf.GradientTape() as tape: + predictions = self(features, training=True) + loss = loss_fn(labels, predictions, sample_weight=sample_weight) + + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + loss_tracker.update_state(loss, sample_weight=sample_weight) + return {"loss": loss_tracker.result()} + + @property + def metrics(self): + return [loss_tracker] + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + if include_preprocessing: + inputs = keras.layers.Input((), dtype="string") + x = get_input_preprocessor()(inputs) + else: + inputs = keras.layers.Input((SEQUENCE_LENGTH,), dtype=tf.int64) + x = inputs + word_embeddings = keras.layers.Embedding(VOCAB_SIZE, EMBED_DIM)(x) + position_embeddings = keras.layers.Embedding( + input_dim=SEQUENCE_LENGTH, + output_dim=EMBED_DIM, + weights=[get_pos_encoding_matrix(SEQUENCE_LENGTH, EMBED_DIM)], + trainable=False, + )(tf.range(start=0, limit=SEQUENCE_LENGTH, delta=1)) + embeddings = word_embeddings + position_embeddings + + encoder_output = embeddings + for i in range(NUM_LAYERS): + encoder_output = bert_module( + encoder_output, encoder_output, encoder_output, i + ) + + mlm_output = keras.layers.Dense( + VOCAB_SIZE, name="mlm_cls", activation="softmax" + )(encoder_output) + model = MaskedLanguageModel(inputs, mlm_output) + + if compile: + optimizer = keras.optimizers.Adam() + model.compile(optimizer=optimizer, jit_compile=jit_compile) + return model + + +def get_custom_objects(): + return { + "MaskedLanguageModel": MaskedLanguageModel, + } diff --git a/keras/integration_test/models/ctc_speech_rnn.py b/keras/integration_test/models/ctc_speech_rnn.py new file mode 100644 index 000000000000..1324581b8ed4 --- /dev/null +++ b/keras/integration_test/models/ctc_speech_rnn.py @@ -0,0 +1,100 @@ +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +TIMESTEPS = 64 +INPUT_DIM = 50 +OUTPUT_DIM = 40 +NUM_RNN_LAYERS = 2 +RNN_UNITS = 32 + + +def get_input_preprocessor(): + return None + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size, TIMESTEPS, INPUT_DIM)), + InputSpec((batch_size, 1), dtype="int64", range=[0, OUTPUT_DIM]), + ) + + +def ctc_loss(y_true, y_pred): + batch_length = tf.cast(tf.shape(y_true)[0], dtype="int64") + input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64") + label_length = tf.cast(tf.shape(y_true)[1], dtype="int64") + + input_length = input_length * tf.ones( + shape=(batch_length, 1), dtype="int64" + ) + label_length = label_length * tf.ones( + shape=(batch_length, 1), dtype="int64" + ) + + return keras.backend.ctc_batch_cost( + y_true, y_pred, input_length, label_length + ) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + input_spectrogram = keras.layers.Input((None, INPUT_DIM), name="input") + x = keras.layers.Reshape((-1, INPUT_DIM, 1), name="expand_dim")( + input_spectrogram + ) + x = keras.layers.Conv2D( + filters=32, + kernel_size=[11, 41], + strides=[2, 2], + padding="same", + use_bias=False, + name="conv_1", + )(x) + x = keras.layers.BatchNormalization(name="conv_1_bn")(x) + x = keras.layers.ReLU(name="conv_1_relu")(x) + x = keras.layers.Conv2D( + filters=32, + kernel_size=[11, 21], + strides=[1, 2], + padding="same", + use_bias=False, + name="conv_2", + )(x) + x = keras.layers.BatchNormalization(name="conv_2_bn")(x) + x = keras.layers.ReLU(name="conv_2_relu")(x) + x = keras.layers.Reshape((-1, x.shape[-2] * x.shape[-1]))(x) + for i in range(1, NUM_RNN_LAYERS + 1): + recurrent = keras.layers.GRU( + units=RNN_UNITS, + activation="tanh", + recurrent_activation="sigmoid", + use_bias=True, + return_sequences=True, + reset_after=True, + name=f"gru_{i}", + ) + x = keras.layers.Bidirectional( + recurrent, name=f"bidirectional_{i}", merge_mode="concat" + )(x) + if i < NUM_RNN_LAYERS: + x = keras.layers.Dropout(rate=0.5)(x) + x = keras.layers.Dense(units=RNN_UNITS * 2, name="dense_1")(x) + x = keras.layers.ReLU(name="dense_1_relu")(x) + x = keras.layers.Dropout(rate=0.5)(x) + output = keras.layers.Dense(units=OUTPUT_DIM + 1, activation="softmax")(x) + model = keras.Model(input_spectrogram, output, name="DeepSpeech_2") + + if compile: + model.compile( + optimizer=keras.optimizers.Adam(learning_rate=1e-4), + loss=ctc_loss, + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return {"ctc_loss": ctc_loss} diff --git a/keras/integration_test/models/dcgan.py b/keras/integration_test/models/dcgan.py new file mode 100644 index 000000000000..ec23da91b331 --- /dev/null +++ b/keras/integration_test/models/dcgan.py @@ -0,0 +1,179 @@ +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec +from keras.saving import serialization_lib + +IMG_SIZE = (64, 64) +LATENT_DIM = 128 + + +def get_data_spec(batch_size): + return InputSpec((batch_size,) + IMG_SIZE + (3,)) + + +def get_input_preprocessor(): + return None + + +class GAN(keras.Model): + def __init__(self, discriminator, generator, latent_dim): + super(GAN, self).__init__() + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + + def compile(self, d_optimizer, g_optimizer, loss_fn, jit_compile=False): + super(GAN, self).compile(jit_compile=jit_compile) + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + self.d_loss_metric = keras.metrics.Mean(name="d_loss") + self.g_loss_metric = keras.metrics.Mean(name="g_loss") + + @property + def metrics(self): + return [self.d_loss_metric, self.g_loss_metric] + + def train_step(self, real_images): + batch_size = tf.shape(real_images)[0] + random_latent_vectors = tf.random.normal( + shape=(batch_size, self.latent_dim) + ) + generated_images = self.generator(random_latent_vectors) + combined_images = tf.concat([generated_images, real_images], axis=0) + labels = tf.concat( + [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0 + ) + labels += 0.05 * tf.random.uniform(tf.shape(labels)) + + with tf.GradientTape() as tape: + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + grads = tape.gradient(d_loss, self.discriminator.trainable_weights) + self.d_optimizer.apply_gradients( + zip(grads, self.discriminator.trainable_weights) + ) + + random_latent_vectors = tf.random.normal( + shape=(batch_size, self.latent_dim) + ) + misleading_labels = tf.zeros((batch_size, 1)) + + with tf.GradientTape() as tape: + predictions = self.discriminator( + self.generator(random_latent_vectors) + ) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, self.generator.trainable_weights) + self.g_optimizer.apply_gradients( + zip(grads, self.generator.trainable_weights) + ) + self.d_loss_metric.update_state(d_loss) + self.g_loss_metric.update_state(g_loss) + return { + "d_loss": self.d_loss_metric.result(), + "g_loss": self.g_loss_metric.result(), + } + + def get_config(self): + return { + "discriminator": self.discriminator, + "generator": self.generator, + "latent_dim": self.latent_dim, + } + + @classmethod + def from_config(cls, config): + discriminator = serialization_lib.deserialize_keras_object( + config["discriminator"] + ) + generator = serialization_lib.deserialize_keras_object( + config["generator"] + ) + latent_dim = config["latent_dim"] + return cls(discriminator, generator, latent_dim) + + def get_compile_config(self): + return { + "loss_fn": self.loss_fn, + "d_optimizer": self.d_optimizer, + "g_optimizer": self.g_optimizer, + "jit_compile": self.jit_compile, + } + + def compile_from_config(self, config): + loss_fn = serialization_lib.deserialize_keras_object(config["loss_fn"]) + d_optimizer = serialization_lib.deserialize_keras_object( + config["d_optimizer"] + ) + g_optimizer = serialization_lib.deserialize_keras_object( + config["g_optimizer"] + ) + jit_compile = config["jit_compile"] + self.compile( + loss_fn=loss_fn, + d_optimizer=d_optimizer, + g_optimizer=g_optimizer, + jit_compile=jit_compile, + ) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + discriminator = keras.Sequential( + [ + keras.Input(shape=IMG_SIZE + (3,)), + keras.layers.Conv2D(64, kernel_size=4, strides=2, padding="same"), + keras.layers.LeakyReLU(alpha=0.2), + keras.layers.Conv2D(128, kernel_size=4, strides=2, padding="same"), + keras.layers.LeakyReLU(alpha=0.2), + keras.layers.Conv2D(128, kernel_size=4, strides=2, padding="same"), + keras.layers.LeakyReLU(alpha=0.2), + keras.layers.Flatten(), + keras.layers.Dropout(0.2), + keras.layers.Dense(1, activation="sigmoid"), + ], + name="discriminator", + ) + + generator = keras.Sequential( + [ + keras.Input(shape=(LATENT_DIM,)), + keras.layers.Dense(8 * 8 * 128), + keras.layers.Reshape((8, 8, 128)), + keras.layers.Conv2DTranspose( + 128, kernel_size=4, strides=2, padding="same" + ), + keras.layers.LeakyReLU(alpha=0.2), + keras.layers.Conv2DTranspose( + 256, kernel_size=4, strides=2, padding="same" + ), + keras.layers.LeakyReLU(alpha=0.2), + keras.layers.Conv2DTranspose( + 512, kernel_size=4, strides=2, padding="same" + ), + keras.layers.LeakyReLU(alpha=0.2), + keras.layers.Conv2D( + 3, kernel_size=5, padding="same", activation="sigmoid" + ), + ], + name="generator", + ) + + gan = GAN( + discriminator=discriminator, generator=generator, latent_dim=LATENT_DIM + ) + if compile: + gan.compile( + d_optimizer=keras.optimizers.Adam(learning_rate=0.0001), + g_optimizer=keras.optimizers.Adam(learning_rate=0.0001), + loss_fn=keras.losses.BinaryCrossentropy(), + jit_compile=jit_compile, + ) + return gan + + +def get_custom_objects(): + return {"GAN": GAN} diff --git a/keras/integration_test/models/edge_case_model.py b/keras/integration_test/models/edge_case_model.py new file mode 100644 index 000000000000..0fd8d1670424 --- /dev/null +++ b/keras/integration_test/models/edge_case_model.py @@ -0,0 +1,155 @@ +"""Model that incorporates a set of edge case development patterns. +""" + +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +INPUT_DIM = 32 +NUM_CLASSES = 5 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size, INPUT_DIM)), + InputSpec((batch_size, NUM_CLASSES)), + ) + + +def get_input_preprocessor(): + return None + + +class LinearA(keras.layers.Layer): + """Standard custom layer with 2 call() inputs.""" + + def __init__(self, units=32, input_dim=32): + super().__init__() + self.w = self.add_weight( + shape=(input_dim, units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(units,), initializer="zeros", trainable=True + ) + + def call(self, inputs_1, inputs_2): + return ( + tf.matmul(inputs_1, self.w) + tf.matmul(inputs_2, self.w) + self.b + ) + + +class LinearB(keras.layers.Layer): + """Layer that tracks weights in a dict attribute that gets updated later.""" + + def __init__(self, units=32, input_dim=32, **kwargs): + super().__init__(**kwargs) + w_init = tf.random_normal_initializer() + b_init = tf.zeros_initializer() + self.state = { + "kernel": tf.Variable( + initial_value=w_init(shape=(input_dim, units), dtype="float32"), + trainable=True, + name="kernel", + ) + } + self.state["bias"] = tf.Variable( + initial_value=b_init(shape=(units,), dtype="float32"), + trainable=True, + name="bias", + ) + + def call(self, inputs): + return tf.matmul(inputs, self.state["kernel"]) + self.state["bias"] + + +class LinearC(keras.layers.Layer): + """Layer that creates weights in call().""" + + def __init__(self, units=32, input_dim=32, **kwargs): + super().__init__(**kwargs) + self._custom_built = False + self.units = units + self.input_dim = input_dim + + def call(self, inputs): + if not self._custom_built: + self.w = self.add_weight( + shape=(self.input_dim, self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="zeros", trainable=True + ) + self._custom_built = True + return tf.matmul(inputs, self.w) + self.b + + +class BatchNorm(keras.layers.Layer): + """Layer with different training/test behavior and non-trainable updates.""" + + def __init__( + self, scale=True, center=True, epsilon=1e-6, momentum=0.9, **kwargs + ): + super().__init__(**kwargs) + self.scale = scale + self.center = center + self.epsilon = epsilon + self.momentum = momentum + + def build(self, input_shape): + self.var = self.add_weight( + shape=[input_shape[1]], initializer="ones", trainable=False + ) + self.mean = self.add_weight( + shape=[input_shape[1]], initializer="zeros", trainable=False + ) + self.gamma = self.add_weight(shape=[input_shape[1]], initializer="ones") + self.beta = self.add_weight(shape=[input_shape[1]], initializer="zeros") + + def call(self, inputs, training=False): + if training: + mean, var = tf.nn.moments(inputs, axes=[0]) + outputs = (inputs - mean) / (var + self.epsilon) + self.var.assign(self.var * self.momentum + var * 0.1) + self.mean.assign(self.mean * self.momentum + mean * 0.1) + else: + outputs = (inputs - self.mean) / (self.var + self.epsilon) + if self.scale: + outputs *= self.gamma + if self.center: + outputs += self.beta + return outputs + + +class FunctionalSubclassModel(keras.Model): + def __init__(self, **kwargs): + inputs = keras.Input((INPUT_DIM,)) + x = inputs + x = LinearA(32, INPUT_DIM)(x, x) + x = LinearB(32, 32)(x) + x = LinearC(32, 32)(x) + x = BatchNorm()(x) + outputs = keras.layers.Dense(NUM_CLASSES, activation="softmax")(x) + super().__init__(inputs, outputs, **kwargs) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + model = FunctionalSubclassModel() + if compile: + model.compile("rmsprop", "mse", jit_compile=jit_compile) + return model + + +def get_custom_objects(): + return { + "LinearA": LinearA, + "LinearB": LinearB, + "LinearC": LinearC, + "BatchNorm": BatchNorm, + } diff --git a/keras/integration_test/models/efficientnet_v2.py b/keras/integration_test/models/efficientnet_v2.py new file mode 100644 index 000000000000..68e392671908 --- /dev/null +++ b/keras/integration_test/models/efficientnet_v2.py @@ -0,0 +1,315 @@ +"""Image classification with EfficientNetV2 architecture. + +Adapted from the EfficientNetV2 Keras Application. +""" +import math + +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +IMG_SIZE = (96, 96) +NUM_CLASSES = 5 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size,) + IMG_SIZE + (3,)), + InputSpec((batch_size, NUM_CLASSES)), + ) + + +def get_input_preprocessor(): + return keras.layers.Rescaling(scale=1.0 / 128.0, offset=-1) + + +def round_filters(filters, width_coefficient, min_depth, depth_divisor): + filters *= width_coefficient + minimum_depth = min_depth or depth_divisor + new_filters = max( + minimum_depth, + int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, + ) + return int(new_filters) + + +def MBConvBlock( + input_filters: int, + output_filters: int, + expand_ratio=1, + kernel_size=3, + strides=1, + se_ratio=0.0, + activation="swish", + survival_probability: float = 0.8, +): + def apply(inputs): + filters = input_filters * expand_ratio + if expand_ratio != 1: + x = keras.layers.Conv2D( + filters=filters, + kernel_size=1, + strides=1, + padding="same", + data_format="channels_last", + use_bias=False, + )(inputs) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation(activation)(x) + else: + x = inputs + + x = keras.layers.DepthwiseConv2D( + kernel_size=kernel_size, + strides=strides, + padding="same", + data_format="channels_last", + use_bias=False, + )(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation(activation)(x) + + if 0 < se_ratio <= 1: + filters_se = max(1, int(input_filters * se_ratio)) + se = keras.layers.GlobalAveragePooling2D()(x) + se = keras.layers.Reshape((1, 1, filters))(se) + se = keras.layers.Conv2D( + filters_se, + 1, + padding="same", + activation=activation, + )(se) + se = keras.layers.Conv2D( + filters, + 1, + padding="same", + activation="sigmoid", + )(se) + x = keras.layers.multiply([x, se]) + x = keras.layers.Conv2D( + filters=output_filters, + kernel_size=1, + strides=1, + padding="same", + data_format="channels_last", + use_bias=False, + )(x) + x = keras.layers.BatchNormalization()(x) + + if strides == 1 and input_filters == output_filters: + if survival_probability: + x = keras.layers.Dropout( + survival_probability, + noise_shape=(None, 1, 1, 1), + )(x) + x = keras.layers.add([x, inputs]) + return x + + return apply + + +def FusedMBConvBlock( + input_filters: int, + output_filters: int, + expand_ratio=1, + kernel_size=3, + strides=1, + se_ratio=0.0, + activation="swish", + survival_probability: float = 0.8, +): + def apply(inputs): + filters = input_filters * expand_ratio + if expand_ratio != 1: + x = keras.layers.Conv2D( + filters, + kernel_size=kernel_size, + strides=strides, + data_format="channels_last", + padding="same", + use_bias=False, + )(inputs) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation(activation)(x) + else: + x = inputs + + if 0 < se_ratio <= 1: + filters_se = max(1, int(input_filters * se_ratio)) + se = keras.layers.GlobalAveragePooling2D()(x) + se = keras.layers.Reshape((1, 1, filters))(se) + se = keras.layers.Conv2D( + filters_se, + 1, + padding="same", + activation=activation, + )(se) + se = keras.layers.Conv2D( + filters, + 1, + padding="same", + activation="sigmoid", + )(se) + x = keras.layers.multiply([x, se]) + + x = keras.layers.Conv2D( + output_filters, + kernel_size=1 if expand_ratio != 1 else kernel_size, + strides=1 if expand_ratio != 1 else strides, + padding="same", + use_bias=False, + )(x) + x = keras.layers.BatchNormalization()(x) + + if expand_ratio == 1: + x = keras.layers.Activation(activation)(x) + + if strides == 1 and input_filters == output_filters: + if survival_probability: + x = keras.layers.Dropout( + survival_probability, + noise_shape=(None, 1, 1, 1), + )(x) + x = keras.layers.add([x, inputs]) + + return x + + return apply + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + width_coefficient = 1.0 + depth_coefficient = 1.0 + dropout_rate = 0.2 + drop_connect_rate = 0.2 + depth_divisor = 8 + min_depth = 8 + activation = "swish" + blocks_args = [ + { + "kernel_size": 3, + "num_repeat": 2, + "input_filters": 24, + "output_filters": 24, + "expand_ratio": 1, + "se_ratio": 0.0, + "strides": 1, + "conv_type": 1, + }, + { + "kernel_size": 3, + "num_repeat": 4, + "input_filters": 24, + "output_filters": 48, + "expand_ratio": 4, + "se_ratio": 0.0, + "strides": 2, + "conv_type": 1, + }, + { + "conv_type": 1, + "expand_ratio": 4, + "input_filters": 48, + "kernel_size": 3, + "num_repeat": 4, + "output_filters": 64, + "se_ratio": 0, + "strides": 2, + }, + { + "conv_type": 0, + "expand_ratio": 4, + "input_filters": 64, + "kernel_size": 3, + "num_repeat": 6, + "output_filters": 128, + "se_ratio": 0.25, + "strides": 2, + }, + ] + + inputs = keras.layers.Input(shape=IMG_SIZE + (3,)) + if include_preprocessing: + x = get_input_preprocessor()(inputs) + else: + x = inputs + + stem_filters = round_filters( + filters=blocks_args[0]["input_filters"], + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + ) + x = keras.layers.Conv2D( + filters=stem_filters, + kernel_size=3, + strides=2, + padding="same", + use_bias=False, + )(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation(activation, name="stem_activation")(x) + + b = 0 + blocks = float(sum(args["num_repeat"] for args in blocks_args)) + for _, args in enumerate(blocks_args): + args["input_filters"] = round_filters( + filters=args["input_filters"], + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + ) + args["output_filters"] = round_filters( + filters=args["output_filters"], + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + ) + block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")] + repeats = int(math.ceil(depth_coefficient * args.pop("num_repeat"))) + for j in range(repeats): + if j > 0: + args["strides"] = 1 + args["input_filters"] = args["output_filters"] + + x = block( + activation=activation, + survival_probability=drop_connect_rate * b / blocks, + **args, + )(x) + b += 1 + + top_filters = round_filters( + filters=1280, + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + ) + x = keras.layers.Conv2D( + filters=top_filters, + kernel_size=1, + strides=1, + padding="same", + data_format="channels_last", + use_bias=False, + )(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation(activation=activation, name="top_activation")(x) + x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x) + x = keras.layers.Dropout(dropout_rate, name="top_dropout")(x) + x = keras.layers.Dense( + NUM_CLASSES, + activation="softmax", + )(x) + model = keras.Model(inputs, x) + if compile: + model.compile( + "adam", loss="categorical_crossentropy", jit_compile=jit_compile + ) + return model + + +def get_custom_objects(): + return {} diff --git a/keras/integration_test/models/input_spec.py b/keras/integration_test/models/input_spec.py new file mode 100644 index 000000000000..5805fcbbc108 --- /dev/null +++ b/keras/integration_test/models/input_spec.py @@ -0,0 +1,24 @@ +"""Class to specify an input's shape/dtype/value range. +""" + +import tensorflow as tf + + +class InputSpec: + def __init__(self, shape, dtype="float32", range=None): + self.shape = shape + self.dtype = dtype + self.range = range + + +def spec_to_value(spec): + shape = spec.shape + dtype = spec.dtype + rg = spec.range or [0, 1] + if dtype == "string": + return tf.constant( + ["some string" for _ in range(shape[0])], dtype="string" + ) + return tf.random.stateless_uniform( + shape, seed=[123, 1], minval=rg[0], maxval=rg[1], dtype=dtype + ) diff --git a/keras/integration_test/models/low_level_model.py b/keras/integration_test/models/low_level_model.py new file mode 100644 index 000000000000..1bf03bbab4eb --- /dev/null +++ b/keras/integration_test/models/low_level_model.py @@ -0,0 +1,162 @@ +"""Model where almost everything is implemented from scratch. + +- Custom layers +- Custom model subclass +- Custom train_step and test_step +- Custom compile() +- Custom learning rate schedule +- Custom metrics +""" + +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +INPUT_DIM = 32 +NUM_CLASSES = 5 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size, INPUT_DIM)), + InputSpec((batch_size, NUM_CLASSES)), + ) + + +def get_input_preprocessor(): + return None + + +class Linear(keras.layers.Layer): + def __init__(self, units=32, name=None): + super().__init__(name=name) + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + name="w", + ) + self.b = self.add_weight( + shape=(self.units,), + initializer="random_normal", + trainable=True, + name="b", + ) + + def call(self, inputs): + return tf.matmul(inputs, self.w) + self.b + + +class BinaryTruePositives(tf.keras.metrics.Metric): + def __init__(self, name="binary_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name="tp", initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) + values = tf.cast(values, self.dtype) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self.dtype) + values = tf.multiply(values, sample_weight) + self.true_positives.assign_add(tf.reduce_sum(values)) + + def result(self): + return self.true_positives + + def reset_state(self): + self.true_positives.assign(0) + + +class CustomModel(keras.Model): + def __init__(self): + super().__init__() + self.loss_tracker = keras.metrics.Mean(name="loss") + self.btp_metric = BinaryTruePositives(name="mae") + + self.linear_1 = Linear(32, name="linear_1") + self.linear_2 = Linear(NUM_CLASSES, name="linear_2") + + def call(self, inputs, training=False): + x = self.linear_1(inputs) + x = self.linear_2(x) + return x + + def train_step(self, data): + x, y = data + with tf.GradientTape() as tape: + y_pred = self(x, training=True) + loss = keras.losses.mean_squared_error(y, y_pred) + + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + self.loss_tracker.update_state(loss) + self.btp_metric.update_state(y, y_pred) + return { + "loss": self.loss_tracker.result(), + "btp": self.btp_metric.result(), + } + + def test_step(self, data): + x, y = data + y_pred = self(x, training=True) + loss = keras.losses.mean_squared_error(y, y_pred) + self.loss_tracker.update_state(loss) + self.btp_metric.update_state(y, y_pred) + return { + "loss": self.loss_tracker.result(), + "btp": self.btp_metric.result(), + } + + @property + def metrics(self): + return [self.loss_tracker, self.btp_metric] + + +class CustomLRSchedule(keras.optimizers.schedules.LearningRateSchedule): + def __init__(self, initial_learning_rate): + self.initial_learning_rate = initial_learning_rate + + def __call__(self, step): + return self.initial_learning_rate / tf.cast(step + 1, "float32") + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + } + + +def custom_loss(y_true, y_pred): + return keras.losses.mse(y_true, y_pred) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + model = CustomModel() + if build: + model(tf.zeros((1, INPUT_DIM))) + if compile: + model.compile( + optimizer=keras.optimizers.Adam(CustomLRSchedule(0.1)), + loss=custom_loss, + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return { + "Linear": Linear, + "CustomLRSchedule": CustomLRSchedule, + "CustomModel": CustomModel, + "BinaryTruePositives": BinaryTruePositives, + "custom_loss": custom_loss, + } diff --git a/keras/integration_test/models/mini_unet.py b/keras/integration_test/models/mini_unet.py new file mode 100644 index 000000000000..c44662b3f1a8 --- /dev/null +++ b/keras/integration_test/models/mini_unet.py @@ -0,0 +1,80 @@ +"""Segmentation model. + +Adapted from https://keras.io/examples/vision/oxford_pets_image_segmentation/ +""" +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +IMG_SIZE = (224, 224) +NUM_CLASSES = 5 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size,) + IMG_SIZE + (3,)), + InputSpec((batch_size,) + IMG_SIZE + (NUM_CLASSES,)), + ) + + +def get_input_preprocessor(): + return None + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + inputs = keras.Input(shape=IMG_SIZE + (3,)) + x = keras.layers.Conv2D(32, 3, strides=2, padding="same")(inputs) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation("relu")(x) + + previous_block_activation = x + for filters in [64, 128, 256]: + x = keras.layers.Activation("relu")(x) + x = keras.layers.SeparableConv2D(filters, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + + x = keras.layers.Activation("relu")(x) + x = keras.layers.SeparableConv2D(filters, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + + x = keras.layers.MaxPooling2D(3, strides=2, padding="same")(x) + + residual = keras.layers.Conv2D(filters, 1, strides=2, padding="same")( + previous_block_activation + ) + x = keras.layers.add([x, residual]) + previous_block_activation = x + + for filters in [256, 128, 64, 32]: + x = keras.layers.Activation("relu")(x) + x = keras.layers.Conv2DTranspose(filters, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + + x = keras.layers.Activation("relu")(x) + x = keras.layers.Conv2DTranspose(filters, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + + x = keras.layers.UpSampling2D(2)(x) + + residual = keras.layers.UpSampling2D(2)(previous_block_activation) + residual = keras.layers.Conv2D(filters, 1, padding="same")(residual) + x = keras.layers.add([x, residual]) + previous_block_activation = x + + outputs = keras.layers.Conv2D( + NUM_CLASSES, 3, activation="softmax", padding="same" + )(x) + model = keras.Model(inputs, outputs) + if compile: + model.compile( + optimizer="rmsprop", + loss="categorical_crossentropy", + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return {} diff --git a/keras/integration_test/models/mini_xception.py b/keras/integration_test/models/mini_xception.py new file mode 100644 index 000000000000..456e53390c53 --- /dev/null +++ b/keras/integration_test/models/mini_xception.py @@ -0,0 +1,84 @@ +"""Mini-Xception classification model. + +Adapted from https://keras.io/examples/vision/image_classification_from_scratch/ +""" +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +IMG_SIZE = (120, 120) + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size,) + IMG_SIZE + (3,)), + InputSpec((batch_size, 1), dtype="int32", range=[0, 2]), + ) + + +def get_input_preprocessor(): + return keras.Sequential( + [ + keras.layers.RandomFlip(), + keras.layers.RandomRotation(0.2), + keras.layers.RandomZoom(0.2), + keras.layers.Rescaling(1.0 / 255), + ] + ) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + inputs = keras.Input(shape=IMG_SIZE + (3,)) + + if include_preprocessing: + x = get_input_preprocessor()(inputs) + else: + x = inputs + + x = keras.layers.Conv2D(32, 3, strides=2, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation("relu")(x) + + x = keras.layers.Conv2D(64, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation("relu")(x) + + previous_block_activation = x + + for size in [128, 256, 512, 728]: + x = keras.layers.Activation("relu")(x) + x = keras.layers.SeparableConv2D(size, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation("relu")(x) + x = keras.layers.SeparableConv2D(size, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.MaxPooling2D(3, strides=2, padding="same")(x) + + residual = keras.layers.Conv2D(size, 1, strides=2, padding="same")( + previous_block_activation + ) + x = keras.layers.add([x, residual]) + previous_block_activation = x + + x = keras.layers.SeparableConv2D(1024, 3, padding="same")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Activation("relu")(x) + + x = keras.layers.GlobalAveragePooling2D()(x) + x = keras.layers.Dropout(0.5)(x) + outputs = keras.layers.Dense(1, activation="sigmoid")(x) + model = keras.Model(inputs, outputs) + if compile: + model.compile( + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return {} diff --git a/keras/integration_test/models/retinanet.py b/keras/integration_test/models/retinanet.py new file mode 100644 index 000000000000..188fc3e9947a --- /dev/null +++ b/keras/integration_test/models/retinanet.py @@ -0,0 +1,260 @@ +"""RetinaNet object detection model. + +Adapted from https://keras.io/examples/vision/retinanet/ +""" +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec +from keras.saving import serialization_lib + +NUM_CLASSES = 10 +IMG_SIZE = (224, 224) + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size,) + IMG_SIZE + (3,)), + InputSpec((batch_size, 9441, 5)), + ) + + +def get_input_preprocessor(): + return None + + +def get_backbone(): + backbone = keras.applications.ResNet50( + include_top=False, + input_shape=[None, None, 3], + weights=None, + ) + c3_output, c4_output, c5_output = [ + backbone.get_layer(layer_name).output + for layer_name in [ + "conv3_block4_out", + "conv4_block6_out", + "conv5_block3_out", + ] + ] + return keras.Model( + inputs=[backbone.inputs], outputs=[c3_output, c4_output, c5_output] + ) + + +class FeaturePyramid(keras.layers.Layer): + def __init__(self, backbone=None, **kwargs): + super().__init__(name="FeaturePyramid", **kwargs) + self.backbone = backbone if backbone else get_backbone() + self.conv_c3_1x1 = keras.layers.Conv2D(256, 1, 1, "same") + self.conv_c4_1x1 = keras.layers.Conv2D(256, 1, 1, "same") + self.conv_c5_1x1 = keras.layers.Conv2D(256, 1, 1, "same") + self.conv_c3_3x3 = keras.layers.Conv2D(256, 3, 1, "same") + self.conv_c4_3x3 = keras.layers.Conv2D(256, 3, 1, "same") + self.conv_c5_3x3 = keras.layers.Conv2D(256, 3, 1, "same") + self.conv_c6_3x3 = keras.layers.Conv2D(256, 3, 2, "same") + self.conv_c7_3x3 = keras.layers.Conv2D(256, 3, 2, "same") + self.upsample_2x = keras.layers.UpSampling2D(2) + + def call(self, images, training=False): + c3_output, c4_output, c5_output = self.backbone( + images, training=training + ) + p3_output = self.conv_c3_1x1(c3_output) + p4_output = self.conv_c4_1x1(c4_output) + p5_output = self.conv_c5_1x1(c5_output) + p4_output = p4_output + self.upsample_2x(p5_output) + p3_output = p3_output + self.upsample_2x(p4_output) + p3_output = self.conv_c3_3x3(p3_output) + p4_output = self.conv_c4_3x3(p4_output) + p5_output = self.conv_c5_3x3(p5_output) + p6_output = self.conv_c6_3x3(c5_output) + p7_output = self.conv_c7_3x3(tf.nn.relu(p6_output)) + return p3_output, p4_output, p5_output, p6_output, p7_output + + +def build_head(output_filters, bias_init): + head = keras.Sequential([keras.Input(shape=[None, None, 256])]) + kernel_init = tf.initializers.RandomNormal(0.0, 0.01) + for _ in range(4): + head.add( + keras.layers.Conv2D( + 256, 3, padding="same", kernel_initializer=kernel_init + ) + ) + head.add(keras.layers.ReLU()) + head.add( + keras.layers.Conv2D( + output_filters, + 3, + 1, + padding="same", + kernel_initializer=kernel_init, + bias_initializer=bias_init, + ) + ) + return head + + +class RetinaNet(keras.Model): + def __init__(self, num_classes, backbone=None, **kwargs): + super().__init__(name="RetinaNet", **kwargs) + self.fpn = FeaturePyramid(backbone) + self.num_classes = num_classes + + prior_probability = keras.initializers.Constant( + -tf.math.log((1 - 0.01) / 0.01) + ) + self.cls_head = build_head(9 * num_classes, prior_probability) + self.box_head = build_head(9 * 4, "zeros") + + def call(self, image, training=False): + features = self.fpn(image, training=training) + N = tf.shape(image)[0] + cls_outputs = [] + box_outputs = [] + for feature in features: + box_outputs.append(tf.reshape(self.box_head(feature), [N, -1, 4])) + cls_outputs.append( + tf.reshape(self.cls_head(feature), [N, -1, self.num_classes]) + ) + cls_outputs = tf.concat(cls_outputs, axis=1) + box_outputs = tf.concat(box_outputs, axis=1) + return tf.concat([box_outputs, cls_outputs], axis=-1) + + def get_config(self): + return { + "num_classes": self.num_classes, + "backbone": self.fpn.backbone, + } + + @classmethod + def from_config(cls, config): + backbone = serialization_lib.deserialize_keras_object( + config.pop("backbone") + ) + num_classes = config["num_classes"] + retinanet = cls(num_classes=num_classes, backbone=backbone) + retinanet(tf.zeros((1, 32, 32, 3))) # Build model + return retinanet + + +class RetinaNetBoxLoss(keras.losses.Loss): + def __init__(self, delta): + super().__init__(reduction="none", name="RetinaNetBoxLoss") + self._delta = delta + + def call(self, y_true, y_pred): + difference = y_true - y_pred + absolute_difference = tf.abs(difference) + squared_difference = difference**2 + loss = tf.where( + tf.less(absolute_difference, self._delta), + 0.5 * squared_difference, + absolute_difference - 0.5, + ) + return tf.reduce_sum(loss, axis=-1) + + def get_config(self): + return {"delta": self._delta} + + +class RetinaNetClassificationLoss(keras.losses.Loss): + def __init__(self, alpha, gamma): + super().__init__(reduction="none", name="RetinaNetClassificationLoss") + self._alpha = alpha + self._gamma = gamma + + def call(self, y_true, y_pred): + cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits( + labels=y_true, logits=y_pred + ) + probs = tf.nn.sigmoid(y_pred) + alpha = tf.where( + tf.equal(y_true, 1.0), self._alpha, (1.0 - self._alpha) + ) + pt = tf.where(tf.equal(y_true, 1.0), probs, 1 - probs) + loss = alpha * tf.pow(1.0 - pt, self._gamma) * cross_entropy + return tf.reduce_sum(loss, axis=-1) + + def get_config(self): + return {"alpha": self._alpha, "gamma": self._gamma} + + +class RetinaNetLoss(keras.losses.Loss): + def __init__(self, num_classes=80, alpha=0.25, gamma=2.0, delta=1.0): + super().__init__(reduction="auto", name="RetinaNetLoss") + self._clf_loss = RetinaNetClassificationLoss(alpha, gamma) + self._box_loss = RetinaNetBoxLoss(delta) + self._num_classes = num_classes + self._alpha = alpha + self._gamma = gamma + self._delta = delta + + def call(self, y_true, y_pred): + y_pred = tf.cast(y_pred, dtype=tf.float32) + box_labels = y_true[:, :, :4] + box_predictions = y_pred[:, :, :4] + cls_labels = tf.one_hot( + tf.cast(y_true[:, :, 4], dtype=tf.int32), + depth=self._num_classes, + dtype=tf.float32, + ) + cls_predictions = y_pred[:, :, 4:] + positive_mask = tf.cast( + tf.greater(y_true[:, :, 4], -1.0), dtype=tf.float32 + ) + ignore_mask = tf.cast(tf.equal(y_true[:, :, 4], -2.0), dtype=tf.float32) + clf_loss = self._clf_loss(cls_labels, cls_predictions) + box_loss = self._box_loss(box_labels, box_predictions) + clf_loss = tf.where(tf.equal(ignore_mask, 1.0), 0.0, clf_loss) + box_loss = tf.where(tf.equal(positive_mask, 1.0), box_loss, 0.0) + normalizer = tf.reduce_sum(positive_mask, axis=-1) + clf_loss = tf.math.divide_no_nan( + tf.reduce_sum(clf_loss, axis=-1), normalizer + ) + box_loss = tf.math.divide_no_nan( + tf.reduce_sum(box_loss, axis=-1), normalizer + ) + loss = clf_loss + box_loss + return loss + + def get_config(self): + return { + "num_classes": self._num_classes, + "alpha": self._alpha, + "gamma": self._gamma, + "delta": self._delta, + } + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + resnet50_backbone = get_backbone() + loss_fn = RetinaNetLoss(NUM_CLASSES) + model = RetinaNet(NUM_CLASSES, resnet50_backbone) + + if compile: + learning_rates = [2.5e-06, 0.000625, 0.00125, 0.0025, 0.00025, 2.5e-05] + learning_rate_boundaries = [125, 250, 500, 240000, 360000] + learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay( + boundaries=learning_rate_boundaries, values=learning_rates + ) + optimizer = keras.optimizers.SGD( + learning_rate=learning_rate_fn, momentum=0.9 + ) + model.compile( + loss=loss_fn, optimizer=optimizer, jit_compile=jit_compile + ) + return model + + +def get_custom_objects(): + return { + "RetinaNetLoss": RetinaNetLoss, + "RetinaNetClassificationLoss": RetinaNetClassificationLoss, + "RetinaNetBoxLoss": RetinaNetBoxLoss, + "RetinaNet": RetinaNet, + "FeaturePyramid": FeaturePyramid, + } diff --git a/keras/integration_test/models/structured_data_classification.py b/keras/integration_test/models/structured_data_classification.py new file mode 100644 index 000000000000..e53bfb063696 --- /dev/null +++ b/keras/integration_test/models/structured_data_classification.py @@ -0,0 +1,100 @@ +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + + +def get_data_spec(batch_size): + return ( + { + "num_cat_feat": InputSpec( + (batch_size,), dtype="int32", range=[0, 5] + ), + "string_cat_feat": InputSpec((batch_size,), dtype="string"), + "num_feat": InputSpec((batch_size,)), + }, + InputSpec((batch_size, 1), dtype="int32", range=[0, 2]), + ) + + +def get_input_preprocessor(): + dataset = tf.data.Dataset.from_tensor_slices( + { + "num_cat_feat": [0, 1, 2, 3, 4, 5], + "string_cat_feat": ["zero", "one", "two", "three", "four", "five"], + "num_feat": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5], + } + ).batch(3) + + num_cat_feat = keras.Input(shape=(1,), name="num_cat_feat", dtype="int64") + string_cat_feat = keras.Input( + shape=(1,), name="string_cat_feat", dtype="string" + ) + num_feat = keras.Input(shape=(1,), name="num_feat", dtype="float32") + + all_inputs = [ + num_cat_feat, + string_cat_feat, + num_feat, + ] + + all_features = keras.layers.concatenate( + [ + encode_categorical_feature( + num_cat_feat, "num_cat_feat", dataset, False + ), + encode_categorical_feature( + string_cat_feat, "string_cat_feat", dataset, True + ), + encode_numerical_feature(num_feat, "num_feat", dataset), + ] + ) + preprocessor = keras.Model(all_inputs, all_features) + return preprocessor + + +def encode_numerical_feature(feature, name, dataset): + normalizer = keras.layers.Normalization(mean=[1.0], variance=[2.0]) + encoded_feature = normalizer(feature) + return encoded_feature + + +def encode_categorical_feature(feature, name, dataset, is_string): + lookup_class = ( + keras.layers.StringLookup if is_string else keras.layers.IntegerLookup + ) + lookup = lookup_class(output_mode="binary") + feature_ds = dataset.map(lambda x: x[name]) + feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) + lookup.adapt(feature_ds) + encoded_feature = lookup(feature) + return encoded_feature + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + preprocessor = get_input_preprocessor() + if include_preprocessing: + all_inputs = preprocessor.inputs + all_features = preprocessor.outputs[0] + else: + all_inputs = keras.Input(shape=preprocessor.outputs[0].shape) + all_features = all_inputs + x = keras.layers.Dense(32, activation="relu")(all_features) + x = keras.layers.Dropout(0.5)(x) + output = keras.layers.Dense(1, activation="sigmoid")(x) + model = keras.Model(all_inputs, output) + + if compile: + model.compile( + "adam", + "binary_crossentropy", + metrics=["accuracy"], + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return {} diff --git a/keras/integration_test/models/text_classification.py b/keras/integration_test/models/text_classification.py new file mode 100644 index 000000000000..6da5a2a741dc --- /dev/null +++ b/keras/integration_test/models/text_classification.py @@ -0,0 +1,91 @@ +"""Text classification model. + +Adapted from https://keras.io/examples/nlp/text_classification_from_scratch/ +""" +import re +import string + +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +MAX_FEATURES = 1000 +EMBEDDING_DIM = 64 +SEQUENCE_LENGTH = 32 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size,), dtype="string"), + InputSpec((batch_size, 1), dtype="int32", range=[0, 2]), + ) + + +def custom_standardization(input_data): + lowercase = tf.strings.lower(input_data) + stripped_html = tf.strings.regex_replace(lowercase, "
", " ") + return tf.strings.regex_replace( + stripped_html, f"[{re.escape(string.punctuation)}]", "" + ) + + +def get_input_preprocessor(): + input_vectorizer = keras.layers.TextVectorization( + standardize=custom_standardization, + max_tokens=MAX_FEATURES, + output_mode="int", + output_sequence_length=SEQUENCE_LENGTH, + ) + text_ds = tf.data.Dataset.from_tensor_slices( + [ + "Lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt ut", + "labore et dolore magna aliqua.", + "Ut enim ad minim veniam", + "quis nostrud exercitation ullamco", + "laboris nisi ut aliquip ex ea commodo consequat.", + ] + ) + input_vectorizer.adapt(text_ds) + return input_vectorizer + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + if include_preprocessing: + inputs = keras.Input(shape=(), dtype="string") + x = get_input_preprocessor()(inputs) + else: + inputs = keras.Input(shape=(None,), dtype="int64") + x = inputs + x = keras.layers.Embedding(MAX_FEATURES, EMBEDDING_DIM)(x) + x = keras.layers.Dropout(0.5)(x) + x = keras.layers.Conv1D( + 128, 7, padding="valid", activation="relu", strides=3 + )(x) + x = keras.layers.Conv1D( + 128, 7, padding="valid", activation="relu", strides=3 + )(x) + x = keras.layers.GlobalMaxPooling1D()(x) + x = keras.layers.Dense(128, activation="relu")(x) + x = keras.layers.Dropout(0.5)(x) + predictions = keras.layers.Dense( + 1, activation="sigmoid", name="predictions" + )(x) + model = keras.Model(inputs, predictions) + + if compile: + model.compile( + loss="binary_crossentropy", + optimizer="adam", + metrics=["accuracy"], + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return {"custom_standardization": custom_standardization} diff --git a/keras/integration_test/models/timeseries_forecasting.py b/keras/integration_test/models/timeseries_forecasting.py new file mode 100644 index 000000000000..7f38f0821372 --- /dev/null +++ b/keras/integration_test/models/timeseries_forecasting.py @@ -0,0 +1,41 @@ +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +TIMESTEPS = 32 + + +def get_data_spec(batch_size): + return ( + InputSpec((batch_size, TIMESTEPS, 1)), + InputSpec((batch_size, 1)), + ) + + +def get_input_preprocessor(): + return None + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + model = keras.Sequential( + [ + keras.layers.LSTM(32, return_sequences=True), + keras.layers.LSTM(32), + keras.layers.Dense(1), + ] + ) + if build: + model.build((None, TIMESTEPS, 1)) + if compile: + model.compile( + optimizer=keras.optimizers.Adam(), + loss="mse", + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return {} diff --git a/keras/integration_test/models/translation.py b/keras/integration_test/models/translation.py new file mode 100644 index 000000000000..b8488600ba7f --- /dev/null +++ b/keras/integration_test/models/translation.py @@ -0,0 +1,225 @@ +"""Machine translation model. + +Adapted from +https://keras.io/examples/nlp/neural_machine_translation_with_transformer/ +""" +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec + +VOCAB_SIZE = 1500 +SEQUENCE_LENGTH = 20 + + +def get_data_spec(batch_size): + return ( + ( + InputSpec((batch_size,), dtype="string"), + InputSpec((batch_size,), dtype="string"), + ), + InputSpec( + (batch_size, SEQUENCE_LENGTH), dtype="int64", range=[0, VOCAB_SIZE] + ), + ) + + +def get_input_preprocessor(): + encoder_input_vectorizer = keras.layers.TextVectorization( + max_tokens=VOCAB_SIZE, + output_mode="int", + output_sequence_length=SEQUENCE_LENGTH, + ) + decoder_input_vectorizer = keras.layers.TextVectorization( + max_tokens=VOCAB_SIZE, + output_mode="int", + output_sequence_length=SEQUENCE_LENGTH, + ) + text_ds = tf.data.Dataset.from_tensor_slices( + [ + "Lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt ut", + "labore et dolore magna aliqua.", + "Ut enim ad minim veniam", + "quis nostrud exercitation ullamco", + "laboris nisi ut aliquip ex ea commodo consequat.", + ] + ) + encoder_input_vectorizer.adapt(text_ds) + decoder_input_vectorizer.adapt(text_ds) + return lambda x: ( + encoder_input_vectorizer(x[0]), + decoder_input_vectorizer(x[1]), + ) + + +class TransformerEncoder(keras.layers.Layer): + def __init__(self, embed_dim, dense_dim, num_heads, **kwargs): + super().__init__(**kwargs) + self.embed_dim = embed_dim + self.dense_dim = dense_dim + self.num_heads = num_heads + self.attention = keras.layers.MultiHeadAttention( + num_heads=num_heads, key_dim=embed_dim + ) + self.dense_proj = keras.Sequential( + [ + keras.layers.Dense(dense_dim, activation="relu"), + keras.layers.Dense(embed_dim), + ] + ) + self.layernorm_1 = keras.layers.LayerNormalization() + self.layernorm_2 = keras.layers.LayerNormalization() + self.supports_masking = True + + def call(self, inputs, mask=None): + if mask is not None: + padding_mask = tf.cast( + mask[:, tf.newaxis, tf.newaxis, :], dtype="int32" + ) + attention_output = self.attention( + query=inputs, value=inputs, key=inputs, attention_mask=padding_mask + ) + proj_input = self.layernorm_1(inputs + attention_output) + proj_output = self.dense_proj(proj_input) + return self.layernorm_2(proj_input + proj_output) + + +class PositionalEmbedding(keras.layers.Layer): + def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs): + super().__init__(**kwargs) + self.token_embeddings = keras.layers.Embedding( + input_dim=vocab_size, output_dim=embed_dim + ) + self.position_embeddings = keras.layers.Embedding( + input_dim=sequence_length, output_dim=embed_dim + ) + self.sequence_length = sequence_length + self.vocab_size = vocab_size + self.embed_dim = embed_dim + + def call(self, inputs): + length = tf.shape(inputs)[-1] + positions = tf.range(start=0, limit=length, delta=1) + embedded_tokens = self.token_embeddings(inputs) + embedded_positions = self.position_embeddings(positions) + return embedded_tokens + embedded_positions + + def compute_mask(self, inputs, mask=None): + return tf.math.not_equal(inputs, 0) + + +class TransformerDecoder(keras.layers.Layer): + def __init__(self, embed_dim, latent_dim, num_heads, **kwargs): + super().__init__(**kwargs) + self.embed_dim = embed_dim + self.latent_dim = latent_dim + self.num_heads = num_heads + self.attention_1 = keras.layers.MultiHeadAttention( + num_heads=num_heads, key_dim=embed_dim + ) + self.attention_2 = keras.layers.MultiHeadAttention( + num_heads=num_heads, key_dim=embed_dim + ) + self.dense_proj = keras.Sequential( + [ + keras.layers.Dense(latent_dim, activation="relu"), + keras.layers.Dense(embed_dim), + ] + ) + self.layernorm_1 = keras.layers.LayerNormalization() + self.layernorm_2 = keras.layers.LayerNormalization() + self.layernorm_3 = keras.layers.LayerNormalization() + self.supports_masking = True + + def call(self, inputs, encoder_outputs, mask=None): + causal_mask = self.get_causal_attention_mask(inputs) + if mask is not None: + padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32") + padding_mask = tf.minimum(padding_mask, causal_mask) + + attention_output_1 = self.attention_1( + query=inputs, value=inputs, key=inputs, attention_mask=causal_mask + ) + out_1 = self.layernorm_1(inputs + attention_output_1) + + attention_output_2 = self.attention_2( + query=out_1, + value=encoder_outputs, + key=encoder_outputs, + attention_mask=padding_mask, + ) + out_2 = self.layernorm_2(out_1 + attention_output_2) + + proj_output = self.dense_proj(out_2) + return self.layernorm_3(out_2 + proj_output) + + def get_causal_attention_mask(self, inputs): + input_shape = tf.shape(inputs) + batch_size, sequence_length = input_shape[0], input_shape[1] + i = tf.range(sequence_length)[:, tf.newaxis] + j = tf.range(sequence_length) + mask = tf.cast(i >= j, dtype="int32") + mask = tf.reshape(mask, (1, input_shape[1], input_shape[1])) + mult = tf.concat( + [ + tf.expand_dims(batch_size, -1), + tf.constant([1, 1], dtype=tf.int32), + ], + axis=0, + ) + return tf.tile(mask, mult) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + embed_dim = 256 + latent_dim = 256 + num_heads = 2 + + if include_preprocessing: + encoder_inputs = keras.Input(shape=(), dtype="string") + decoder_inputs = keras.Input(shape=(), dtype="string") + encoder_x, decoder_x = get_input_preprocessor()( + (encoder_inputs, decoder_inputs) + ) + else: + encoder_inputs = keras.Input(shape=(None,), dtype="int64") + decoder_inputs = keras.Input(shape=(None,), dtype="int64") + encoder_x = encoder_inputs + decoder_x = decoder_inputs + + x = PositionalEmbedding(SEQUENCE_LENGTH, VOCAB_SIZE, embed_dim)(encoder_x) + encoder_outputs = TransformerEncoder(embed_dim, latent_dim, num_heads)(x) + + encoded_seq_inputs = keras.Input(shape=(None, embed_dim)) + x = PositionalEmbedding(SEQUENCE_LENGTH, VOCAB_SIZE, embed_dim)(decoder_x) + x = TransformerDecoder(embed_dim, latent_dim, num_heads)( + x, encoded_seq_inputs + ) + x = keras.layers.Dropout(0.5)(x) + decoder_outputs = keras.layers.Dense(VOCAB_SIZE, activation="softmax")(x) + decoder = keras.Model([decoder_inputs, encoded_seq_inputs], decoder_outputs) + + decoder_outputs = decoder([decoder_inputs, encoder_outputs]) + model = keras.Model( + [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer" + ) + if compile: + model.compile( + "rmsprop", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + jit_compile=jit_compile, + ) + return model + + +def get_custom_objects(): + return { + "TransformerEncoder": TransformerEncoder, + "TransformerDecoder": TransformerDecoder, + "PositionalEmbedding": PositionalEmbedding, + } diff --git a/keras/integration_test/models/vae.py b/keras/integration_test/models/vae.py new file mode 100644 index 000000000000..f9f08e1420fb --- /dev/null +++ b/keras/integration_test/models/vae.py @@ -0,0 +1,137 @@ +"""Variable autoencoder. + +Adapted from https://keras.io/examples/generative/vae/ +""" + +import tensorflow as tf +from tensorflow import keras + +from keras.integration_test.models.input_spec import InputSpec +from keras.saving import serialization_lib + +IMG_SIZE = (28, 28) +LATENT_DIM = 64 + + +def get_input_preprocessor(): + return None + + +class Sampling(keras.layers.Layer): + def call(self, inputs): + z_mean, z_log_var = inputs + batch = tf.shape(z_mean)[0] + dim = tf.shape(z_mean)[1] + epsilon = tf.keras.backend.random_normal(shape=(batch, dim)) + return z_mean + tf.exp(0.5 * z_log_var) * epsilon + + +class VAE(keras.Model): + def __init__(self, encoder, decoder, **kwargs): + super(VAE, self).__init__(**kwargs) + self.encoder = encoder + self.decoder = decoder + self.total_loss_tracker = keras.metrics.Mean(name="total_loss") + self.reconstruction_loss_tracker = keras.metrics.Mean( + name="reconstruction_loss" + ) + self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss") + + @property + def metrics(self): + return [ + self.total_loss_tracker, + self.reconstruction_loss_tracker, + self.kl_loss_tracker, + ] + + def train_step(self, data): + with tf.GradientTape() as tape: + z_mean, z_log_var, z = self.encoder(data) + reconstruction = self.decoder(z) + reconstruction_loss = tf.reduce_mean( + tf.reduce_sum( + keras.losses.binary_crossentropy(data, reconstruction), + axis=(1, 2), + ) + ) + kl_loss = -0.5 * ( + 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + ) + kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1)) + total_loss = reconstruction_loss + kl_loss + grads = tape.gradient(total_loss, self.trainable_weights) + self.optimizer.apply_gradients(zip(grads, self.trainable_weights)) + self.total_loss_tracker.update_state(total_loss) + self.reconstruction_loss_tracker.update_state(reconstruction_loss) + self.kl_loss_tracker.update_state(kl_loss) + return { + "loss": self.total_loss_tracker.result(), + "reconstruction_loss": self.reconstruction_loss_tracker.result(), + "kl_loss": self.kl_loss_tracker.result(), + } + + def get_config(self): + base_config = super().get_config() + return { + "encoder": self.encoder, + "decoder": self.decoder, + **base_config, + } + + @classmethod + def from_config(cls, config): + encoder = serialization_lib.deserialize_keras_object( + config.pop("encoder") + ) + decoder = serialization_lib.deserialize_keras_object( + config.pop("decoder") + ) + return cls(encoder, decoder, **config) + + +def get_data_spec(batch_size): + return InputSpec((batch_size,) + IMG_SIZE + (1,)) + + +def get_model( + build=False, compile=False, jit_compile=False, include_preprocessing=True +): + encoder_inputs = keras.Input(shape=IMG_SIZE + (1,)) + x = keras.layers.Conv2D( + 32, 3, activation="relu", strides=2, padding="same" + )(encoder_inputs) + x = keras.layers.Conv2D( + 64, 3, activation="relu", strides=2, padding="same" + )(x) + x = keras.layers.Flatten()(x) + x = keras.layers.Dense(16, activation="relu")(x) + z_mean = keras.layers.Dense(LATENT_DIM, name="z_mean")(x) + z_log_var = keras.layers.Dense(LATENT_DIM, name="z_log_var")(x) + z = Sampling()([z_mean, z_log_var]) + encoder = keras.Model( + encoder_inputs, [z_mean, z_log_var, z], name="encoder" + ) + + latent_inputs = keras.Input(shape=(LATENT_DIM,)) + x = keras.layers.Dense(7 * 7 * 64, activation="relu")(latent_inputs) + x = keras.layers.Reshape((7, 7, 64))(x) + x = keras.layers.Conv2DTranspose( + 64, 3, activation="relu", strides=2, padding="same" + )(x) + x = keras.layers.Conv2DTranspose( + 32, 3, activation="relu", strides=2, padding="same" + )(x) + decoder_outputs = keras.layers.Conv2DTranspose( + 1, 3, activation="sigmoid", padding="same" + )(x) + decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder") + + vae = VAE(encoder, decoder) + if compile: + vae.compile(optimizer=keras.optimizers.Adam(), jit_compile=jit_compile) + return vae + + +def get_custom_objects(): + return {"VAE": VAE, "Sampling": Sampling} diff --git a/keras/integration_test/module_test.py b/keras/integration_test/module_test.py index 2fe54431d17e..0454d70999b3 100644 --- a/keras/integration_test/module_test.py +++ b/keras/integration_test/module_test.py @@ -17,44 +17,60 @@ class ModuleTest(tf.test.TestCase): + def test_module_discover_layer_variable(self): + m = tf.Module() + m.a = tf.keras.layers.Dense(1) + m.b = tf.keras.layers.Dense(2) - def test_module_discover_layer_variable(self): - m = tf.Module() - m.a = tf.keras.layers.Dense(1) - m.b = tf.keras.layers.Dense(2) + # The weights of the layer has not been created yet. + self.assertEmpty(m.variables) + self.assertLen(m.submodules, 2) - # The weights of the layer has not been created yet. - self.assertEmpty(m.variables) - self.assertLen(m.submodules, 2) + inputs = tf.keras.layers.Input((1,)) + m.a(inputs) + m.b(inputs) - inputs = tf.keras.layers.Input((1,)) - m.a(inputs) - m.b(inputs) + variable_list = m.variables + self.assertLen(variable_list, 4) + self.assertIs(variable_list[0], m.a.kernel) + self.assertIs(variable_list[1], m.a.bias) + self.assertIs(variable_list[2], m.b.kernel) + self.assertIs(variable_list[3], m.b.bias) - variable_list = m.variables - self.assertLen(variable_list, 4) - self.assertIs(variable_list[0], m.a.kernel) - self.assertIs(variable_list[1], m.a.bias) - self.assertIs(variable_list[2], m.b.kernel) - self.assertIs(variable_list[3], m.b.bias) + def test_model_discover_submodule(self): + m = tf.keras.models.Sequential( + layers=[tf.keras.layers.Dense(1), tf.keras.layers.Dense(2)] + ) - def test_model_discover_submodule(self): - m = tf.keras.models.Sequential( - layers=[tf.keras.layers.Dense(1), tf.keras.layers.Dense(2)]) + self.assertEqual(m.submodules, (m.layers[0], m.layers[1])) + m(tf.keras.layers.Input((1,))) + self.assertLen(m.variables, 4) - self.assertEqual(m.submodules, (m.layers[0], m.layers[1])) - m(tf.keras.layers.Input((1,))) - self.assertLen(m.variables, 4) + def test_model_wrapped_in_module_discovers_submodules(self): + linear = tf.keras.models.Sequential( + [tf.keras.layers.Dense(units=1, input_shape=[1])] + ) + linear.compile(optimizer="sgd", loss="mean_squared_error") + m = tf.Module() + m.l = linear + self.assertNotEmpty(m.submodules) + self.assertLen(m.variables, 2) - def test_model_wrapped_in_module_discovers_submodules(self): - linear = tf.keras.models.Sequential( - [tf.keras.layers.Dense(units=1, input_shape=[1])]) - linear.compile(optimizer="sgd", loss="mean_squared_error") - m = tf.Module() - m.l = linear - self.assertNotEmpty(m.submodules) - self.assertLen(m.variables, 2) + def test_subclass_model(self): + class Model(tf.keras.Model): + def __init__(self): + super().__init__() + self.dense = tf.keras.layers.Dense(units=1) + + def call(self, inputs, training=None, mask=None): + return self.dense(inputs) + + model = Model() + self.assertLen(model.submodules, 1) # For the dense layer + model.compile(loss="mse", optimizer="sgd") + # Make sure the compiled metric doesn't break tf.module + self.assertLen(model.submodules, 1) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/integration_test/multi_worker_tutorial_test.py b/keras/integration_test/multi_worker_tutorial_test.py index 87ca7a7da8f6..31a605efbf12 100644 --- a/keras/integration_test/multi_worker_tutorial_test.py +++ b/keras/integration_test/multi_worker_tutorial_test.py @@ -20,10 +20,11 @@ import unittest import uuid import zipfile -from absl import logging -from absl.testing import parameterized + import numpy as np import tensorflow.compat.v2 as tf +from absl import logging +from absl.testing import parameterized PER_WORKER_BATCH_SIZE = 64 NUM_WORKERS = 2 @@ -32,319 +33,401 @@ def _is_chief(task_type, task_id): - # Note: there are two possible `TF_CONFIG` configuration. - # 1) In addition to `worker` tasks, a `chief` task type is use; - # in this case, this function should be modified to - # `return task_type == 'chief'`. - # 2) Only `worker` task type is used; in this case, worker 0 is - # regarded as the chief. The implementation demonstrated here - # is for this case. - return task_type == 'worker' and task_id == 0 + # Note: there are two possible `TF_CONFIG` configuration. + # 1) In addition to `worker` tasks, a `chief` task type is use; + # in this case, this function should be modified to + # `return task_type == 'chief'`. + # 2) Only `worker` task type is used; in this case, worker 0 is + # regarded as the chief. The implementation demonstrated here + # is for this case. + return task_type == "worker" and task_id == 0 def _get_temp_dir(dirpath, task_id): - base_dirpath = 'workertemp_' + str(task_id) - temp_dir = os.path.join(dirpath, base_dirpath) - tf.io.gfile.makedirs(temp_dir) - return temp_dir + base_dirpath = "workertemp_" + str(task_id) + temp_dir = os.path.join(dirpath, base_dirpath) + tf.io.gfile.makedirs(temp_dir) + return temp_dir def write_filepath(filepath, task_type, task_id): - dirpath = os.path.dirname(filepath) - base = os.path.basename(filepath) - if not _is_chief(task_type, task_id): - dirpath = _get_temp_dir(dirpath, task_id) - return os.path.join(dirpath, base) + dirpath = os.path.dirname(filepath) + base = os.path.basename(filepath) + if not _is_chief(task_type, task_id): + dirpath = _get_temp_dir(dirpath, task_id) + return os.path.join(dirpath, base) class MultiWorkerTutorialTest(parameterized.TestCase, tf.test.TestCase): - """Test of multi-worker training flow in tutorials on tensorflow.org. - - Please see below test method docs for what actual tutorial is being covered. - """ - - # TODO(rchao): Add a test to demonstrate gather with MWMS. - - @contextlib.contextmanager - def skip_fetch_failure_exception(self): - try: - yield - except zipfile.BadZipfile as e: - # There can be a race when multiple processes are downloading the data. - # Skip the test if that results in loading errors. - self.skipTest('Data loading error: Bad magic number for file header.') - except Exception as e: # pylint: disable=broad-except - if 'URL fetch failure' in str(e): - self.skipTest('URL fetch error not considered failure of the test.') - else: - raise - - def mnist_dataset(self): - path_to_use = 'mnist_{}.npz'.format(str(uuid.uuid4())) - with self.skip_fetch_failure_exception(): - (x_train, - y_train), _ = tf.keras.datasets.mnist.load_data(path=path_to_use) - # The `x` arrays are in uint8 and have values in the range [0, 255]. - # We need to convert them to float32 with values in the range [0, 1] - x_train = x_train / np.float32(255) - y_train = y_train.astype(np.int64) - train_dataset = tf.data.Dataset.from_tensor_slices( - (x_train, y_train)).shuffle(60000) - return train_dataset - - def dataset_fn(self, global_batch_size, input_context): - batch_size = input_context.get_per_replica_batch_size(global_batch_size) - dataset = self.mnist_dataset() - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - dataset = dataset.batch(batch_size) - return dataset - - def build_cnn_model(self): - return tf.keras.Sequential([ - tf.keras.layers.Input(shape=(28, 28)), - tf.keras.layers.Reshape(target_shape=(28, 28, 1)), - tf.keras.layers.Conv2D(32, 3, activation='relu'), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(128, activation='relu'), - tf.keras.layers.Dense(10) - ]) - - def build_and_compile_cnn_model(self): - model = self.build_cnn_model() - model.compile( - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), - metrics=['accuracy']) - return model - - @tf.__internal__.test.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], tf_api_version=2)) - def testSingleWorkerModelFit(self): - single_worker_dataset = self.mnist_dataset().batch( - PER_WORKER_BATCH_SIZE) - single_worker_model = self.build_and_compile_cnn_model() - single_worker_model.fit(single_worker_dataset, epochs=NUM_EPOCHS) - - @tf.__internal__.test.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], tf_api_version=2)) - def testMwmsWithModelFit(self, mode): - """Test multi-worker training flow demo'ed in go/multi-worker-with-keras. - - This test should be kept in sync with the code samples in - go/multi-worker-with-keras. - - Args: - mode: Runtime mode. + """Test of multi-worker training flow in tutorials on tensorflow.org. + + Please see below test method docs for what actual tutorial is being covered. """ - def fn(model_path, checkpoint_dir): - global_batch_size = PER_WORKER_BATCH_SIZE * NUM_WORKERS - strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() - with strategy.scope(): - multi_worker_model = self.build_and_compile_cnn_model() - - callbacks = [ - tf.keras.callbacks.ModelCheckpoint( - filepath=os.path.join(self.get_temp_dir(), 'checkpoint')) - ] - - multi_worker_dataset = strategy.distribute_datasets_from_function( - lambda input_context: self.dataset_fn(global_batch_size, input_context - )) - - multi_worker_model.fit( - multi_worker_dataset, - epochs=NUM_EPOCHS, - steps_per_epoch=50, - callbacks=callbacks) - - task_type, task_id = (strategy.cluster_resolver.task_type, - strategy.cluster_resolver.task_id) - write_model_path = write_filepath(model_path, task_type, task_id) - - multi_worker_model.save(write_model_path) - if not _is_chief(task_type, task_id): - tf.io.gfile.rmtree(os.path.dirname(write_model_path)) - - # Make sure chief finishes saving before non-chief's assertions. - tf.__internal__.distribute.multi_process_runner.get_barrier().wait() - - if not tf.io.gfile.exists(model_path): - raise RuntimeError() - if tf.io.gfile.exists(write_model_path) != _is_chief(task_type, task_id): - raise RuntimeError() - - with strategy.scope(): - loaded_model = tf.keras.models.load_model(model_path) - loaded_model.fit(multi_worker_dataset, epochs=1, steps_per_epoch=1) - - checkpoint = tf.train.Checkpoint(model=multi_worker_model) - write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, directory=write_checkpoint_dir, max_to_keep=1) - - checkpoint_manager.save() - if not _is_chief(task_type, task_id): - tf.io.gfile.rmtree(write_checkpoint_dir) - - # Make sure chief finishes saving before non-chief's assertions. - tf.__internal__.distribute.multi_process_runner.get_barrier().wait() - - if not tf.io.gfile.exists(checkpoint_dir): - raise RuntimeError() - if tf.io.gfile.exists(write_checkpoint_dir) != _is_chief( - task_type, task_id): - raise RuntimeError() - - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - checkpoint.restore(latest_checkpoint) - multi_worker_model.fit(multi_worker_dataset, epochs=1, steps_per_epoch=1) - - logging.info('testMwmsWithModelFit successfully ends') - - model_path = os.path.join(self.get_temp_dir(), 'model.tf') - checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') - try: - mpr_result = tf.__internal__.distribute.multi_process_runner.run( - fn, - tf.__internal__.distribute.multi_process_runner.create_cluster_spec( - num_workers=NUM_WORKERS), - args=(model_path, checkpoint_dir), - return_output=True) - except tf.errors.UnavailableError: - self.skipTest('Skipping rare disconnection among the workers.') - - self.assertTrue( - any([ - 'testMwmsWithModelFit successfully ends' in msg - for msg in mpr_result.stdout - ])) - - def extract_accuracy(worker_id, input_string): - match = re.match( - r'\[worker\-{}\].*accuracy: (\d+\.\d+).*'.format(worker_id), - input_string) - return None if match is None else float(match.group(1)) - - for worker_id in range(NUM_WORKERS): - accu_result = tf.nest.map_structure( - lambda x: extract_accuracy(worker_id, x), # pylint: disable=cell-var-from-loop - mpr_result.stdout) - self.assertTrue( - any(accu_result), 'Every worker is supposed to have accuracy result.') - - @tf.__internal__.test.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=['eager'], tf_api_version=2)) - def testMwmsWithCtl(self, mode): - """Test multi-worker CTL training flow demo'ed in a to-be-added tutorial.""" - - def proc_func(checkpoint_dir): - global_batch_size = PER_WORKER_BATCH_SIZE * NUM_WORKERS - strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() - try: - - with strategy.scope(): - multi_worker_model = self.build_cnn_model() - - multi_worker_dataset = strategy.distribute_datasets_from_function( - lambda input_context: self.dataset_fn(global_batch_size, # pylint: disable=g-long-lambda - input_context)) - optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001) - train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( - name='train_accuracy') - - @tf.function - def train_step(iterator): - """Training step function.""" - - def step_fn(inputs): - """Per-Replica step function.""" - x, y = inputs - with tf.GradientTape() as tape: - predictions = multi_worker_model(x, training=True) - per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True, - reduction=tf.keras.losses.Reduction.NONE)(y, predictions) - loss = tf.nn.compute_average_loss( - per_batch_loss, global_batch_size=global_batch_size) - - grads = tape.gradient(loss, multi_worker_model.trainable_variables) - optimizer.apply_gradients( - zip(grads, multi_worker_model.trainable_variables)) - train_accuracy.update_state(y, predictions) - - return loss - - per_replica_losses = strategy.run(step_fn, args=(next(iterator),)) - return strategy.reduce( - tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) - - epoch = tf.Variable( - initial_value=tf.constant(0, dtype=tf.dtypes.int64), name='epoch') - step_in_epoch = tf.Variable( - initial_value=tf.constant(0, dtype=tf.dtypes.int64), - name='step_in_epoch') - - task_type, task_id = (strategy.cluster_resolver.task_type, - strategy.cluster_resolver.task_id) - checkpoint = tf.train.Checkpoint( - model=multi_worker_model, epoch=epoch, step_in_epoch=step_in_epoch) - write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, - task_id) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, directory=write_checkpoint_dir, max_to_keep=1) - - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - if latest_checkpoint: - checkpoint.restore(latest_checkpoint) - - while epoch.numpy() < NUM_EPOCHS: - iterator = iter(multi_worker_dataset) - total_loss = 0.0 - num_batches = 0 - - while step_in_epoch.numpy() < NUM_STEPS_PER_EPOCH: - total_loss += train_step(iterator) - num_batches += 1 - step_in_epoch.assign_add(1) - - train_loss = total_loss / num_batches - logging.info('Epoch: %d, accuracy: %f, train_loss: %f.', - epoch.numpy(), train_accuracy.result(), train_loss) - - train_accuracy.reset_state() - - checkpoint_manager.save() - if not _is_chief(task_type, task_id): - tf.io.gfile.rmtree(write_checkpoint_dir) - - epoch.assign_add(1) - step_in_epoch.assign(0) - - except tf.errors.UnavailableError as e: - logging.info('UnavailableError occurred: %r', e) - raise unittest.SkipTest('Skipping test due to UnavailableError') - - logging.info('testMwmsWithCtl successfully ends') - - checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') - - mpr_result = tf.__internal__.distribute.multi_process_runner.run( - proc_func, - tf.__internal__.distribute.multi_process_runner.create_cluster_spec( - num_workers=NUM_WORKERS), - return_output=True, - args=(checkpoint_dir,)) - - self.assertTrue( - any([ - 'testMwmsWithCtl successfully ends' in msg - for msg in mpr_result.stdout - ])) - - -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() + + # TODO(rchao): Add a test to demonstrate gather with MWMS. + + @contextlib.contextmanager + def skip_fetch_failure_exception(self): + try: + yield + except zipfile.BadZipfile: + # There can be a race when multiple processes are downloading the + # data. Skip the test if that results in loading errors. + self.skipTest( + "Data loading error: Bad magic number for file header." + ) + except Exception as e: + if "URL fetch failure" in str(e): + self.skipTest( + "URL fetch error not considered failure of the test." + ) + else: + raise + + def mnist_dataset(self): + path_to_use = f"mnist_{str(uuid.uuid4())}.npz" + with self.skip_fetch_failure_exception(): + (x_train, y_train), _ = tf.keras.datasets.mnist.load_data( + path=path_to_use + ) + # The `x` arrays are in uint8 and have values in the range [0, 255]. + # We need to convert them to float32 with values in the range [0, 1] + x_train = x_train / np.float32(255) + y_train = y_train.astype(np.int64) + train_dataset = tf.data.Dataset.from_tensor_slices( + (x_train, y_train) + ).shuffle(60000) + return train_dataset + + def dataset_fn(self, global_batch_size, input_context): + batch_size = input_context.get_per_replica_batch_size(global_batch_size) + dataset = self.mnist_dataset() + dataset = dataset.shard( + input_context.num_input_pipelines, input_context.input_pipeline_id + ) + dataset = dataset.batch(batch_size) + return dataset + + def build_cnn_model(self): + return tf.keras.Sequential( + [ + tf.keras.layers.Input(shape=(28, 28)), + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), + tf.keras.layers.Conv2D(32, 3, activation="relu"), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.Dense(10), + ] + ) + + def build_and_compile_cnn_model(self): + model = self.build_cnn_model() + model.compile( + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True + ), + optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), + metrics=["accuracy"], + ) + return model + + @tf.__internal__.test.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], tf_api_version=2 + ) + ) + def testSingleWorkerModelFit(self): + single_worker_dataset = self.mnist_dataset().batch( + PER_WORKER_BATCH_SIZE + ) + single_worker_model = self.build_and_compile_cnn_model() + single_worker_model.fit(single_worker_dataset, epochs=NUM_EPOCHS) + + @tf.__internal__.test.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], tf_api_version=2 + ) + ) + def testMwmsWithModelFit(self, mode): + """Test multi-worker training flow demoed in go/multi-worker-with-keras. + + This test should be kept in sync with the code samples in + go/multi-worker-with-keras. + + Args: + mode: Runtime mode. + """ + + def fn(model_path, checkpoint_dir): + global_batch_size = PER_WORKER_BATCH_SIZE * NUM_WORKERS + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + with strategy.scope(): + multi_worker_model = self.build_and_compile_cnn_model() + + callbacks = [ + tf.keras.callbacks.ModelCheckpoint( + filepath=os.path.join(self.get_temp_dir(), "checkpoint") + ) + ] + + multi_worker_dataset = strategy.distribute_datasets_from_function( + lambda input_context: self.dataset_fn( + global_batch_size, input_context + ) + ) + + multi_worker_model.fit( + multi_worker_dataset, + epochs=NUM_EPOCHS, + steps_per_epoch=50, + callbacks=callbacks, + ) + + task_type, task_id = ( + strategy.cluster_resolver.task_type, + strategy.cluster_resolver.task_id, + ) + write_model_path = write_filepath(model_path, task_type, task_id) + + multi_worker_model.save(write_model_path) + if not _is_chief(task_type, task_id): + tf.io.gfile.rmtree(os.path.dirname(write_model_path)) + + # Make sure chief finishes saving before non-chief's assertions. + tf.__internal__.distribute.multi_process_runner.get_barrier().wait() + + if not tf.io.gfile.exists(model_path): + raise RuntimeError() + if tf.io.gfile.exists(write_model_path) != _is_chief( + task_type, task_id + ): + raise RuntimeError() + + with strategy.scope(): + loaded_model = tf.keras.models.load_model(model_path) + loaded_model.fit(multi_worker_dataset, epochs=1, steps_per_epoch=1) + + checkpoint = tf.train.Checkpoint(model=multi_worker_model) + write_checkpoint_dir = write_filepath( + checkpoint_dir, task_type, task_id + ) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, directory=write_checkpoint_dir, max_to_keep=1 + ) + + checkpoint_manager.save() + if not _is_chief(task_type, task_id): + tf.io.gfile.rmtree(write_checkpoint_dir) + + # Make sure chief finishes saving before non-chief's assertions. + tf.__internal__.distribute.multi_process_runner.get_barrier().wait() + + if not tf.io.gfile.exists(checkpoint_dir): + raise RuntimeError() + if tf.io.gfile.exists(write_checkpoint_dir) != _is_chief( + task_type, task_id + ): + raise RuntimeError() + + latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) + checkpoint.restore(latest_checkpoint) + multi_worker_model.fit( + multi_worker_dataset, epochs=1, steps_per_epoch=1 + ) + + logging.info("testMwmsWithModelFit successfully ends") + + model_path = os.path.join(self.get_temp_dir(), "model.tf") + checkpoint_dir = os.path.join(self.get_temp_dir(), "ckpt") + try: + mpr_result = tf.__internal__.distribute.multi_process_runner.run( + fn, + tf.__internal__.distribute.multi_process_runner.create_cluster_spec( # noqa: E501 + num_workers=NUM_WORKERS + ), + args=(model_path, checkpoint_dir), + return_output=True, + ) + except tf.errors.UnavailableError: + self.skipTest("Skipping rare disconnection among the workers.") + + self.assertTrue( + any( + [ + "testMwmsWithModelFit successfully ends" in msg + for msg in mpr_result.stdout + ] + ) + ) + + def extract_accuracy(worker_id, input_string): + match = re.match( + r"\[worker\-{}\].*accuracy: (\d+\.\d+).*".format(worker_id), + input_string, + ) + return None if match is None else float(match.group(1)) + + for worker_id in range(NUM_WORKERS): + accu_result = tf.nest.map_structure( + lambda x: extract_accuracy(worker_id, x), + mpr_result.stdout, + ) + self.assertTrue( + any(accu_result), + "Every worker is supposed to have accuracy result.", + ) + + @tf.__internal__.test.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], tf_api_version=2 + ) + ) + def testMwmsWithCtl(self, mode): + """Test multi-worker CTL training flow demo'ed in a to-be-added + tutorial.""" + + def proc_func(checkpoint_dir): + global_batch_size = PER_WORKER_BATCH_SIZE * NUM_WORKERS + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + try: + + with strategy.scope(): + multi_worker_model = self.build_cnn_model() + + multi_worker_dataset = ( + strategy.distribute_datasets_from_function( + lambda input_context: self.dataset_fn( + global_batch_size, + input_context, + ) + ) + ) + optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001) + train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name="train_accuracy" + ) + + @tf.function + def train_step(iterator): + """Training step function.""" + + def step_fn(inputs): + """Per-Replica step function.""" + x, y = inputs + with tf.GradientTape() as tape: + predictions = multi_worker_model(x, training=True) + per_batch_loss = ( + tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, + reduction=tf.keras.losses.Reduction.NONE, + )(y, predictions) + ) + loss = tf.nn.compute_average_loss( + per_batch_loss, + global_batch_size=global_batch_size, + ) + + grads = tape.gradient( + loss, multi_worker_model.trainable_variables + ) + optimizer.apply_gradients( + zip(grads, multi_worker_model.trainable_variables) + ) + train_accuracy.update_state(y, predictions) + + return loss + + per_replica_losses = strategy.run( + step_fn, args=(next(iterator),) + ) + return strategy.reduce( + tf.distribute.ReduceOp.SUM, + per_replica_losses, + axis=None, + ) + + epoch = tf.Variable( + initial_value=tf.constant(0, dtype=tf.dtypes.int64), + name="epoch", + ) + step_in_epoch = tf.Variable( + initial_value=tf.constant(0, dtype=tf.dtypes.int64), + name="step_in_epoch", + ) + + task_type, task_id = ( + strategy.cluster_resolver.task_type, + strategy.cluster_resolver.task_id, + ) + checkpoint = tf.train.Checkpoint( + model=multi_worker_model, + epoch=epoch, + step_in_epoch=step_in_epoch, + ) + write_checkpoint_dir = write_filepath( + checkpoint_dir, task_type, task_id + ) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, directory=write_checkpoint_dir, max_to_keep=1 + ) + + latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) + if latest_checkpoint: + checkpoint.restore(latest_checkpoint) + + while epoch.numpy() < NUM_EPOCHS: + iterator = iter(multi_worker_dataset) + total_loss = 0.0 + num_batches = 0 + + while step_in_epoch.numpy() < NUM_STEPS_PER_EPOCH: + total_loss += train_step(iterator) + num_batches += 1 + step_in_epoch.assign_add(1) + + train_loss = total_loss / num_batches + logging.info( + "Epoch: %d, accuracy: %f, train_loss: %f.", + epoch.numpy(), + train_accuracy.result(), + train_loss, + ) + + train_accuracy.reset_state() + + checkpoint_manager.save() + if not _is_chief(task_type, task_id): + tf.io.gfile.rmtree(write_checkpoint_dir) + + epoch.assign_add(1) + step_in_epoch.assign(0) + + except tf.errors.UnavailableError as e: + logging.info("UnavailableError occurred: %r", e) + raise unittest.SkipTest("Skipping test due to UnavailableError") + + logging.info("testMwmsWithCtl successfully ends") + + checkpoint_dir = os.path.join(self.get_temp_dir(), "ckpt") + + mpr_result = tf.__internal__.distribute.multi_process_runner.run( + proc_func, + tf.__internal__.distribute.multi_process_runner.create_cluster_spec( + num_workers=NUM_WORKERS + ), + return_output=True, + args=(checkpoint_dir,), + ) + + self.assertTrue( + any( + [ + "testMwmsWithCtl successfully ends" in msg + for msg in mpr_result.stdout + ] + ) + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/integration_test/mwms_multi_process_runner_test.py b/keras/integration_test/mwms_multi_process_runner_test.py index 17f72e3d576c..178b843af8d5 100644 --- a/keras/integration_test/mwms_multi_process_runner_test.py +++ b/keras/integration_test/mwms_multi_process_runner_test.py @@ -18,8 +18,9 @@ from __future__ import print_function import os -from absl import logging + import tensorflow.compat.v2 as tf +from absl import logging NUM_WORKERS = 2 NUM_EPOCHS = 2 @@ -27,51 +28,59 @@ class MwmsMultiProcessRunnerTest(tf.test.TestCase): - """Test to demonstrate Keras training with MultiWorkerMirroredStrategy.""" - - def testMwmsWithModelFit(self): - - def worker_fn(): + """Test to demonstrate Keras training with MultiWorkerMirroredStrategy.""" - def dataset_fn(input_context): - del input_context # User should shard data accordingly. Omitted here. - return tf.data.Dataset.from_tensor_slices((tf.random.uniform( - (6, 10)), tf.random.uniform((6, 10)))).batch(2) + def testMwmsWithModelFit(self): + def worker_fn(): + def dataset_fn(input_context): + # User should shard data accordingly. Omitted here. + del input_context + return tf.data.Dataset.from_tensor_slices( + (tf.random.uniform((6, 10)), tf.random.uniform((6, 10))) + ).batch(2) - strategy = tf.distribute.MultiWorkerMirroredStrategy() - with strategy.scope(): - model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - model.compile( - loss=tf.keras.losses.CategoricalCrossentropy(), - optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001), - metrics=['accuracy']) + strategy = tf.distribute.MultiWorkerMirroredStrategy() + with strategy.scope(): + model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + model.compile( + loss=tf.keras.losses.CategoricalCrossentropy(), + optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001), + metrics=["accuracy"], + ) - callbacks = [ - tf.keras.callbacks.ModelCheckpoint( - filepath=os.path.join(self.get_temp_dir(), 'checkpoint')) - ] - dataset = strategy.distribute_datasets_from_function(dataset_fn) - model.fit( - dataset, - epochs=NUM_EPOCHS, - steps_per_epoch=NUM_STEPS_PER_EPOCH, - callbacks=callbacks) + callbacks = [ + tf.keras.callbacks.ModelCheckpoint( + filepath=os.path.join(self.get_temp_dir(), "checkpoint") + ) + ] + dataset = strategy.distribute_datasets_from_function(dataset_fn) + model.fit( + dataset, + epochs=NUM_EPOCHS, + steps_per_epoch=NUM_STEPS_PER_EPOCH, + callbacks=callbacks, + ) - logging.info('testMwmsWithModelFit successfully ends') + logging.info("testMwmsWithModelFit successfully ends") - mpr_result = tf.__internal__.distribute.multi_process_runner.run( - worker_fn, - tf.__internal__.distribute.multi_process_runner.create_cluster_spec( - num_workers=NUM_WORKERS), - return_output=True) + mpr_result = tf.__internal__.distribute.multi_process_runner.run( + worker_fn, + tf.__internal__.distribute.multi_process_runner.create_cluster_spec( + num_workers=NUM_WORKERS + ), + return_output=True, + ) - # Verifying the worker functions ended successfully. - self.assertTrue( - any([ - 'testMwmsWithModelFit successfully ends' in msg - for msg in mpr_result.stdout - ])) + # Verifying the worker functions ended successfully. + self.assertTrue( + any( + [ + "testMwmsWithModelFit successfully ends" in msg + for msg in mpr_result.stdout + ] + ) + ) -if __name__ == '__main__': - tf.__internal__.distribute.multi_process_runner.test_main() +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/integration_test/parameter_server_custom_training_loop_test.py b/keras/integration_test/parameter_server_custom_training_loop_test.py index f30afc56f535..b35393b5bbad 100644 --- a/keras/integration_test/parameter_server_custom_training_loop_test.py +++ b/keras/integration_test/parameter_server_custom_training_loop_test.py @@ -16,10 +16,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import multiprocessing -from absl import logging + import portpicker import tensorflow.compat.v2 as tf +from absl import logging NUM_EPOCHS = 10 NUM_STEPS = 100 @@ -27,108 +29,129 @@ class ParameterServerCustomTrainingLoopTest(tf.test.TestCase): - """Test to demonstrate custom training loop with ParameterServerStrategy.""" - - def create_in_process_cluster(self, num_workers, num_ps): - """Creates and starts local servers and returns the cluster_resolver.""" - worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] - ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] - - cluster_dict = {} - cluster_dict["worker"] = ["localhost:%s" % port for port in worker_ports] - if num_ps > 0: - cluster_dict["ps"] = ["localhost:%s" % port for port in ps_ports] - - cluster_spec = tf.train.ClusterSpec(cluster_dict) - - # Workers need some inter_ops threads to work properly. - worker_config = tf.compat.v1.ConfigProto() - if multiprocessing.cpu_count() < num_workers + 1: - worker_config.inter_op_parallelism_threads = num_workers + 1 - - for i in range(num_workers): - tf.distribute.Server( - cluster_spec, - job_name="worker", - task_index=i, - config=worker_config, - protocol="grpc") - - for i in range(num_ps): - tf.distribute.Server( - cluster_spec, job_name="ps", task_index=i, protocol="grpc") - - return cluster_spec - - def setUp(self): - super().setUp() - - cluster_spec = self.create_in_process_cluster(num_workers=3, num_ps=2) - cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( - cluster_spec, rpc_layer="grpc") - self.strategy = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - self.coordinator = ( - tf.distribute.experimental.coordinator.ClusterCoordinator( - self.strategy)) - - def testCustomTrainingLoop(self): - - coordinator, strategy = self.coordinator, self.strategy - - def per_worker_dataset_fn(): - - def dataset_fn(_): - return tf.data.Dataset.from_tensor_slices((tf.random.uniform( - (6, 10)), tf.random.uniform((6, 10)))).batch(2).repeat() - - return strategy.distribute_datasets_from_function(dataset_fn) - - per_worker_dataset = coordinator.create_per_worker_dataset( - per_worker_dataset_fn) - with strategy.scope(): - model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001) - train_accuracy = tf.keras.metrics.CategoricalAccuracy( - name="train_accuracy") - - @tf.function - def worker_train_fn(iterator): - - def replica_fn(inputs): - """Training loop function.""" - batch_data, labels = inputs - with tf.GradientTape() as tape: - predictions = model(batch_data, training=True) - loss = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE)(labels, predictions) - gradients = tape.gradient(loss, model.trainable_variables) - - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - train_accuracy.update_state(labels, predictions) - - for _ in tf.range(STEPS_PER_EXECUTION): - strategy.run(replica_fn, args=(next(iterator),)) - - for epoch in range(NUM_EPOCHS): - - distributed_iterator = iter(per_worker_dataset) - - for step in range(0, NUM_STEPS, STEPS_PER_EXECUTION): - coordinator.schedule(worker_train_fn, args=(distributed_iterator,)) - logging.info("Epoch %d, step %d scheduled.", epoch, step) - - logging.info("Now joining at epoch %d.", epoch) - coordinator.join() - logging.info( - "Finished joining at epoch %d. Training accuracy: %f. " - "Total iterations: %d", epoch, train_accuracy.result(), - optimizer.iterations.value()) - - if epoch < NUM_EPOCHS - 1: - train_accuracy.reset_states() + """Test to demonstrate custom training loop with ParameterServerStrategy.""" + + def create_in_process_cluster(self, num_workers, num_ps): + """Creates and starts local servers and returns the cluster_resolver.""" + worker_ports = [ + portpicker.pick_unused_port() for _ in range(num_workers) + ] + ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] + + cluster_dict = {} + cluster_dict["worker"] = [f"localhost:{port}" for port in worker_ports] + if num_ps > 0: + cluster_dict["ps"] = [f"localhost:{port}" for port in ps_ports] + + cluster_spec = tf.train.ClusterSpec(cluster_dict) + + # Workers need some inter_ops threads to work properly. + worker_config = tf.compat.v1.ConfigProto() + if multiprocessing.cpu_count() < num_workers + 1: + worker_config.inter_op_parallelism_threads = num_workers + 1 + + for i in range(num_workers): + tf.distribute.Server( + cluster_spec, + job_name="worker", + task_index=i, + config=worker_config, + protocol="grpc", + ) + + for i in range(num_ps): + tf.distribute.Server( + cluster_spec, job_name="ps", task_index=i, protocol="grpc" + ) + + return cluster_spec + + def setUp(self): + super().setUp() + + cluster_spec = self.create_in_process_cluster(num_workers=3, num_ps=2) + cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( + cluster_spec, rpc_layer="grpc" + ) + self.strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + self.coordinator = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self.strategy + ) + ) + + def testCustomTrainingLoop(self): + + coordinator, strategy = self.coordinator, self.strategy + + def per_worker_dataset_fn(): + def dataset_fn(_): + return ( + tf.data.Dataset.from_tensor_slices( + (tf.random.uniform((6, 10)), tf.random.uniform((6, 10))) + ) + .batch(2) + .repeat() + ) + + return strategy.distribute_datasets_from_function(dataset_fn) + + per_worker_dataset = coordinator.create_per_worker_dataset( + per_worker_dataset_fn + ) + with strategy.scope(): + model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001) + train_accuracy = tf.keras.metrics.CategoricalAccuracy( + name="train_accuracy" + ) + + @tf.function + def worker_train_fn(iterator): + def replica_fn(inputs): + """Training loop function.""" + batch_data, labels = inputs + with tf.GradientTape() as tape: + predictions = model(batch_data, training=True) + loss = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE + )(labels, predictions) + gradients = tape.gradient(loss, model.trainable_variables) + + optimizer.apply_gradients( + zip(gradients, model.trainable_variables) + ) + train_accuracy.update_state(labels, predictions) + + for _ in tf.range(STEPS_PER_EXECUTION): + strategy.run(replica_fn, args=(next(iterator),)) + + for epoch in range(NUM_EPOCHS): + + distributed_iterator = iter(per_worker_dataset) + + for step in range(0, NUM_STEPS, STEPS_PER_EXECUTION): + coordinator.schedule( + worker_train_fn, args=(distributed_iterator,) + ) + logging.info("Epoch %d, step %d scheduled.", epoch, step) + + logging.info("Now joining at epoch %d.", epoch) + coordinator.join() + logging.info( + "Finished joining at epoch %d. Training accuracy: %f. " + "Total iterations: %d", + epoch, + train_accuracy.result(), + optimizer.iterations.value(), + ) + + if epoch < NUM_EPOCHS - 1: + train_accuracy.reset_states() if __name__ == "__main__": - if tf.__internal__.tf2.enabled(): - tf.__internal__.distribute.multi_process_runner.test_main() + if tf.__internal__.tf2.enabled(): + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/integration_test/parameter_server_keras_preprocessing_test.py b/keras/integration_test/parameter_server_keras_preprocessing_test.py index 987115683d48..5dcda78fe120 100644 --- a/keras/integration_test/parameter_server_keras_preprocessing_test.py +++ b/keras/integration_test/parameter_server_keras_preprocessing_test.py @@ -18,309 +18,393 @@ import os import random import tempfile -from absl.testing import parameterized -from keras.testing_infra import test_utils + import numpy as np import portpicker import tensorflow.compat.v2 as tf +from absl.testing import parameterized +from keras.testing_infra import test_utils # These vocabularies usually come from TFT or a Beam pipeline. FEATURE_VOCAB = [ - "avenger", "ironman", "batman", "hulk", "spiderman", "kingkong", - "wonder_woman" + "avenger", + "ironman", + "batman", + "hulk", + "spiderman", + "kingkong", + "wonder_woman", ] LABEL_VOCAB = ["yes", "no"] def create_in_process_cluster(num_workers, num_ps): - """Creates and starts local servers and returns the cluster_resolver.""" + """Creates and starts local servers and returns the cluster_resolver.""" - worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] - ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] + worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] + ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] - cluster_dict = {} - cluster_dict["worker"] = ["localhost:%s" % port for port in worker_ports] - if num_ps > 0: - cluster_dict["ps"] = ["localhost:%s" % port for port in ps_ports] + cluster_dict = {} + cluster_dict["worker"] = [f"localhost:{port}" for port in worker_ports] + if num_ps > 0: + cluster_dict["ps"] = [f"localhost:{port}" for port in ps_ports] - cluster_spec = tf.train.ClusterSpec(cluster_dict) + cluster_spec = tf.train.ClusterSpec(cluster_dict) - # Workers need some inter_ops threads to work properly. - worker_config = tf.compat.v1.ConfigProto() - if multiprocessing.cpu_count() < num_workers + 1: - worker_config.inter_op_parallelism_threads = num_workers + 1 + # Workers need some inter_ops threads to work properly. + worker_config = tf.compat.v1.ConfigProto() + if multiprocessing.cpu_count() < num_workers + 1: + worker_config.inter_op_parallelism_threads = num_workers + 1 - for i in range(num_workers): - tf.distribute.Server( - cluster_spec, - job_name="worker", - task_index=i, - config=worker_config, - protocol="grpc") + for i in range(num_workers): + tf.distribute.Server( + cluster_spec, + job_name="worker", + task_index=i, + config=worker_config, + protocol="grpc", + ) - for i in range(num_ps): - tf.distribute.Server( - cluster_spec, job_name="ps", task_index=i, protocol="grpc") + for i in range(num_ps): + tf.distribute.Server( + cluster_spec, job_name="ps", task_index=i, protocol="grpc" + ) - return cluster_spec + return cluster_spec @test_utils.run_v2_only class KPLTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super().setUp() - - cluster_spec = create_in_process_cluster(num_workers=3, num_ps=2) - cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( - cluster_spec, rpc_layer="grpc") - self.strategy = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - self.coordinator = ( - tf.distribute.experimental.coordinator.ClusterCoordinator( - self.strategy)) - - def define_kpls_for_training(self, use_adapt): - # Define KPLs under strategy's scope. Right now, if they have look up - # tables, they will be created on the client. Their variables will be - # created on PS. Ideally they should be cached on each worker since they - # will not be changed in a training step. - if use_adapt: - feature_lookup_layer = ( - tf.keras.layers.StringLookup( - num_oov_indices=1)) - feature_lookup_layer.adapt(FEATURE_VOCAB) - label_lookup_layer = ( - tf.keras.layers.StringLookup( - num_oov_indices=0, mask_token=None)) - label_lookup_layer.adapt(LABEL_VOCAB) - else: - # Do vocab shuffling. - shuffled_vocab = FEATURE_VOCAB.copy() - random.shuffle(shuffled_vocab) - feature_lookup_layer = ( - tf.keras.layers.StringLookup( - vocabulary=shuffled_vocab, num_oov_indices=1)) - label_lookup_layer = ( - tf.keras.layers.StringLookup( - vocabulary=LABEL_VOCAB, num_oov_indices=0, mask_token=None)) - - raw_feature_input = tf.keras.Input( - shape=(3,), dtype=tf.string, name="feature", ragged=True) - feature_id_input = feature_lookup_layer(raw_feature_input) - - # Model creates variables as well. - feature_ps = tf.keras.Model({"features": raw_feature_input}, - feature_id_input) - - raw_label_input = tf.keras.Input(shape=(1,), dtype=tf.string, name="label") - label_id_input = label_lookup_layer(raw_label_input) - label_ps = tf.keras.Model({"label": raw_label_input}, label_id_input) - - return feature_ps, label_ps - - def define_reverse_lookup_layer(self): - # Only needed for serving. - label_inverse_lookup_layer = ( - tf.keras.layers.StringLookup( + def setUp(self): + super().setUp() + + cluster_spec = create_in_process_cluster(num_workers=3, num_ps=2) + cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( + cluster_spec, rpc_layer="grpc" + ) + self.strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + self.coordinator = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self.strategy + ) + ) + + def define_kpls_for_training(self, use_adapt): + # Define KPLs under strategy's scope. Right now, if they have look up + # tables, they will be created on the client. Their variables will be + # created on PS. Ideally they should be cached on each worker since they + # will not be changed in a training step. + if use_adapt: + feature_lookup_layer = tf.keras.layers.StringLookup( + num_oov_indices=1 + ) + feature_lookup_layer.adapt(FEATURE_VOCAB) + label_lookup_layer = tf.keras.layers.StringLookup( + num_oov_indices=0, mask_token=None + ) + label_lookup_layer.adapt(LABEL_VOCAB) + else: + # Do vocab shuffling. + shuffled_vocab = FEATURE_VOCAB.copy() + random.shuffle(shuffled_vocab) + feature_lookup_layer = tf.keras.layers.StringLookup( + vocabulary=shuffled_vocab, num_oov_indices=1 + ) + label_lookup_layer = tf.keras.layers.StringLookup( + vocabulary=LABEL_VOCAB, num_oov_indices=0, mask_token=None + ) + + raw_feature_input = tf.keras.Input( + shape=(3,), dtype=tf.string, name="feature", ragged=True + ) + feature_id_input = feature_lookup_layer(raw_feature_input) + + # Model creates variables as well. + feature_ps = tf.keras.Model( + {"features": raw_feature_input}, feature_id_input + ) + + raw_label_input = tf.keras.Input( + shape=(1,), dtype=tf.string, name="label" + ) + label_id_input = label_lookup_layer(raw_label_input) + label_ps = tf.keras.Model({"label": raw_label_input}, label_id_input) + + return feature_ps, label_ps + + def define_reverse_lookup_layer(self): + # Only needed for serving. + label_inverse_lookup_layer = tf.keras.layers.StringLookup( num_oov_indices=0, mask_token=None, vocabulary=LABEL_VOCAB, - invert=True)) - return label_inverse_lookup_layer - - @tf.__internal__.distribute.combinations.generate( - tf.__internal__.test.combinations.combine( - mode=["eager"], - use_adapt=[True, False], - # TODO(b/1949359300): `load_under_strategy=True` flakily times out. - load_under_strategy=[False])) - def testTrainAndServe(self, use_adapt, load_under_strategy): - - with self.coordinator.strategy.scope(): - - feature_ps, label_ps = self.define_kpls_for_training(use_adapt) - - def dataset_fn(): - - def feature_and_label_gen(): - while True: - features = random.sample(FEATURE_VOCAB, 3) - label = ["yes"] if "avenger" in features else ["no"] - yield {"features": features, "label": label} - - # The dataset will be created on the coordinator. - raw_dataset = tf.data.Dataset.from_generator( - feature_and_label_gen, - output_signature={ - "features": tf.TensorSpec([3], tf.string), - "label": tf.TensorSpec([1], tf.string) - }).shuffle(100).batch(32) - - train_dataset = raw_dataset.map(lambda x: ( # pylint: disable=g-long-lambda - { - "features": feature_ps(x["features"]) - }, label_ps(x["label"]))) - return train_dataset - - # Create the model. The input needs to be compatible with KPLs. - model_input = tf.keras.Input( - shape=(3,), dtype=tf.int64, name="model_input") - - # input_dim includes a mask token and an oov token. - emb_output = tf.keras.layers.Embedding( - input_dim=len(FEATURE_VOCAB) + 2, output_dim=20)( - model_input) - emb_output = tf.reduce_mean(emb_output, axis=1) - dense_output = tf.keras.layers.Dense( - units=1, activation="sigmoid")( - emb_output) - model = tf.keras.Model({"features": model_input}, dense_output) - - optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) - accuracy = tf.keras.metrics.Accuracy() - - @tf.function - def worker_fn(iterator): - - def replica_fn(iterator): - batch_data, labels = next(iterator) - with tf.GradientTape() as tape: - pred = model(batch_data, training=True) - loss = tf.nn.compute_average_loss( - tf.keras.losses.BinaryCrossentropy( - reduction=tf.keras.losses.Reduction.NONE)(labels, pred)) - gradients = tape.gradient(loss, model.trainable_variables) - - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - - actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64) - accuracy.update_state(labels, actual_pred) - - self.coordinator.strategy.run(replica_fn, args=(iterator,)) - - distributed_dataset = self.coordinator.create_per_worker_dataset(dataset_fn) - distributed_iterator = iter(distributed_dataset) - for _ in range(4): - accuracy.reset_state() - for _ in range(7): - self.coordinator.schedule(worker_fn, args=(distributed_iterator,)) - self.coordinator.join() - self.assertGreater(accuracy.result().numpy(), 0.5) - - # Create a saved model. - model.feature_ps = feature_ps - model.label_ps = label_ps - model.label_inverse_lookup_layer = self.define_reverse_lookup_layer() - - def create_serving_signature(model): - - @tf.function - def serve_fn(raw_features): - raw_features = tf.expand_dims(raw_features, axis=0) - transformed_features = model.feature_ps(raw_features) - outputs = model(transformed_features) - outputs = tf.squeeze(outputs, axis=0) - outputs = tf.cast(tf.greater(outputs, 0.5), tf.int64) - decoded_outputs = model.label_inverse_lookup_layer(outputs) - return tf.squeeze(decoded_outputs, axis=0) - - # serving does NOT have batch dimension - return serve_fn.get_concrete_function( - tf.TensorSpec(shape=(3), dtype=tf.string, name="example")) - - serving_fn = create_serving_signature(model) - - saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - model.save(saved_model_dir, signatures={"serving_default": serving_fn}) - - if load_under_strategy: - with self.coordinator.strategy.scope(): - - loaded_serving_fn = tf.keras.models.load_model( - saved_model_dir).signatures["serving_default"] - - outputs = [] - for _ in range(7): - outputs.append( - self.coordinator.schedule( - loaded_serving_fn, - args=(tf.constant(["avenger", "ironman", "avenger"]),))) - self.coordinator.join() - for prediction0 in outputs: - self.assertIn(prediction0._get_values()["output_0"], ("yes", "no")) - else: - loaded_serving_fn = tf.keras.models.load_model( - saved_model_dir).signatures["serving_default"] - - # check the result w/ and w/o avenger. - prediction0 = loaded_serving_fn( - tf.constant(["avenger", "ironman", "avenger"]))["output_0"] - self.assertIn(prediction0, ("yes", "no")) - - prediction1 = loaded_serving_fn( - tf.constant(["ironman", "ironman", "unknown"]))["output_0"] - self.assertIn(prediction1, ("yes", "no")) + invert=True, + ) + return label_inverse_lookup_layer + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + use_adapt=[True, False], + test_training_with_loaded=[True, False], + # TODO(b/1949359300): `load_for_serving_under_strategy=True` flakily + # times out. + load_for_serving_under_strategy=[False], + ) + ) + def testTrainAndLoadAndServe( + self, + use_adapt, + test_training_with_loaded, + load_for_serving_under_strategy, + ): + + # test_training_with_loaded=False tests distributed training with newly + # constructed KPL, while test_training_with_loaded=True tests + # distributed training with a loaded KPL which was created under + # strategy scope as well. + # + # load_for_serving_under_strategy test serving with a model loaded + # under distribution strategy or not. + + with self.coordinator.strategy.scope(): + + feature_ps, label_ps = self.define_kpls_for_training(use_adapt) + + if test_training_with_loaded: + saved_kpl_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + feature_ps_dir = os.path.join(saved_kpl_dir, "feature") + label_ps_dir = os.path.join(saved_kpl_dir, "label") + + feature_ps.save(feature_ps_dir) + label_ps.save(label_ps_dir) + + del feature_ps, label_ps + + feature_ps = tf.keras.models.load_model(feature_ps_dir) + label_ps = tf.keras.models.load_model(label_ps_dir) + + def dataset_fn(): + def feature_and_label_gen(): + while True: + features = random.sample(FEATURE_VOCAB, 3) + label = ["yes"] if "avenger" in features else ["no"] + yield {"features": features, "label": label} + + # The dataset will be created on the coordinator. + raw_dataset = ( + tf.data.Dataset.from_generator( + feature_and_label_gen, + output_signature={ + "features": tf.TensorSpec([3], tf.string), + "label": tf.TensorSpec([1], tf.string), + }, + ) + .shuffle(100) + .batch(32) + ) + + train_dataset = raw_dataset.map( + lambda x: ( + {"features": feature_ps(x["features"])}, + label_ps(x["label"]), + ) + ) + return train_dataset + + # Create the model. The input needs to be compatible with KPLs. + model_input = tf.keras.Input( + shape=(3,), dtype=tf.int64, name="model_input" + ) + + # input_dim includes a mask token and an oov token. + emb_output = tf.keras.layers.Embedding( + input_dim=len(FEATURE_VOCAB) + 2, output_dim=20 + )(model_input) + emb_output = tf.reduce_mean(emb_output, axis=1) + dense_output = tf.keras.layers.Dense(units=1, activation="sigmoid")( + emb_output + ) + model = tf.keras.Model({"features": model_input}, dense_output) + + optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) + accuracy = tf.keras.metrics.Accuracy() + + @tf.function + def worker_fn(iterator): + def replica_fn(iterator): + batch_data, labels = next(iterator) + with tf.GradientTape() as tape: + pred = model(batch_data, training=True) + loss = tf.nn.compute_average_loss( + tf.keras.losses.BinaryCrossentropy( + reduction=tf.keras.losses.Reduction.NONE + )(labels, pred) + ) + gradients = tape.gradient(loss, model.trainable_variables) + + optimizer.apply_gradients( + zip(gradients, model.trainable_variables) + ) + + actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64) + accuracy.update_state(labels, actual_pred) + + self.coordinator.strategy.run(replica_fn, args=(iterator,)) + + distributed_dataset = self.coordinator.create_per_worker_dataset( + dataset_fn + ) + distributed_iterator = iter(distributed_dataset) + for _ in range(4): + accuracy.reset_state() + for _ in range(7): + self.coordinator.schedule( + worker_fn, args=(distributed_iterator,) + ) + self.coordinator.join() + self.assertGreater(accuracy.result().numpy(), 0.5) + + # Create a saved model. + model.feature_ps = feature_ps + model.label_ps = label_ps + model.label_inverse_lookup_layer = self.define_reverse_lookup_layer() + + def create_serving_signature(model): + @tf.function + def serve_fn(raw_features): + raw_features = tf.expand_dims(raw_features, axis=0) + transformed_features = model.feature_ps(raw_features) + outputs = model(transformed_features) + outputs = tf.squeeze(outputs, axis=0) + outputs = tf.cast(tf.greater(outputs, 0.5), tf.int64) + decoded_outputs = model.label_inverse_lookup_layer(outputs) + return tf.squeeze(decoded_outputs, axis=0) + + # serving does NOT have batch dimension + return serve_fn.get_concrete_function( + tf.TensorSpec(shape=(3), dtype=tf.string, name="example") + ) + + serving_fn = create_serving_signature(model) + + saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + model.save(saved_model_dir, signatures={"serving_default": serving_fn}) + + if load_for_serving_under_strategy: + with self.coordinator.strategy.scope(): + + loaded_serving_fn = tf.keras.models.load_model( + saved_model_dir + ).signatures["serving_default"] + + outputs = [] + for _ in range(7): + outputs.append( + self.coordinator.schedule( + loaded_serving_fn, + args=(tf.constant(["avenger", "ironman", "avenger"]),), + ) + ) + self.coordinator.join() + for prediction0 in outputs: + self.assertIn( + prediction0._get_values()["output_0"], ("yes", "no") + ) + else: + loaded_serving_fn = tf.keras.models.load_model( + saved_model_dir + ).signatures["serving_default"] + + # check the result w/ and w/o avenger. + prediction0 = loaded_serving_fn( + tf.constant(["avenger", "ironman", "avenger"]) + )["output_0"] + self.assertIn(prediction0, ("yes", "no")) + + prediction1 = loaded_serving_fn( + tf.constant(["ironman", "ironman", "unknown"]) + )["output_0"] + self.assertIn(prediction1, ("yes", "no")) @test_utils.run_v2_only -class KPLCreatedInDatasetsFromFunctionTest(tf.test.TestCase, - parameterized.TestCase): - - def setUp(self): - super().setUp() - - cluster_spec = create_in_process_cluster(num_workers=3, num_ps=2) - cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( - cluster_spec, rpc_layer="grpc") - self.strategy = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - self.coordinator = ( - tf.distribute.experimental.coordinator.ClusterCoordinator( - self.strategy)) - - def testKPLCreatedInDatasetsFromFunction(self): - - filepath = os.path.join(self.get_temp_dir(), "vocab") - with open(filepath, "w") as f: - f.write("\n".join(["earth", "wind", "and", "fire"])) - - def per_worker_dataset_fn(): - - def dataset_fn(input_context): - del input_context - lookup_layer = tf.keras.layers.StringLookup( - num_oov_indices=1, vocabulary=filepath) - x = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - y = np.array([0, 1]) - map_fn = lambda x, y: (lookup_layer(x), y) - return tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(10).repeat().batch(2).map(map_fn) - - return self.coordinator.strategy.distribute_datasets_from_function( - dataset_fn) - - per_worker_distribute_dataset = self.coordinator.create_per_worker_dataset( - per_worker_dataset_fn) - per_worker_iter = iter(per_worker_distribute_dataset) - - @tf.function - def worker_fn(iterator): - - def replica_fn(data): - return data - - return self.coordinator.strategy.run(replica_fn, args=(next(iterator),)) - - result = [] - for _ in range(10): - result.append( - self.coordinator.schedule(worker_fn, args=(per_worker_iter,))) - - self.coordinator.join() +class KPLCreatedInDatasetsFromFunctionTest( + tf.test.TestCase, parameterized.TestCase +): + def setUp(self): + super().setUp() + + cluster_spec = create_in_process_cluster(num_workers=3, num_ps=2) + cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( + cluster_spec, rpc_layer="grpc" + ) + self.strategy = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver + ) + self.coordinator = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self.strategy + ) + ) + + def testKPLCreatedInDatasetsFromFunction(self): + + filepath = os.path.join(self.get_temp_dir(), "vocab") + with open(filepath, "w") as f: + f.write("\n".join(["earth", "wind", "and", "fire"])) + + def per_worker_dataset_fn(): + def dataset_fn(input_context): + del input_context + lookup_layer = tf.keras.layers.StringLookup( + num_oov_indices=1, vocabulary=filepath + ) + x = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + y = np.array([0, 1]) + map_fn = lambda x, y: (lookup_layer(x), y) + return ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(10) + .repeat() + .batch(2) + .map(map_fn) + ) + + return self.coordinator.strategy.distribute_datasets_from_function( + dataset_fn + ) + + per_worker_distribute_dataset = ( + self.coordinator.create_per_worker_dataset(per_worker_dataset_fn) + ) + per_worker_iter = iter(per_worker_distribute_dataset) + + @tf.function + def worker_fn(iterator): + def replica_fn(data): + return data + + return self.coordinator.strategy.run( + replica_fn, args=(next(iterator),) + ) + + result = [] + for _ in range(10): + result.append( + self.coordinator.schedule(worker_fn, args=(per_worker_iter,)) + ) + + self.coordinator.join() if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/integration_test/parameter_server_training_metric_test.py b/keras/integration_test/parameter_server_training_metric_test.py new file mode 100644 index 000000000000..adae47960738 --- /dev/null +++ b/keras/integration_test/parameter_server_training_metric_test.py @@ -0,0 +1,134 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests training metrics with PSS distribution strategy.""" + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras import layers as layers_module +from keras import metrics as metrics_module +from keras.engine import training as training_module +from keras.testing_infra import test_combinations + +# isort: off +from tensorflow.python.distribute import ( + multi_process_runner, + multi_worker_test_base, +) + + +class ParameterServerTrainingMetricTest(test_combinations.TestCase): + """Test Parameter Server Distribution strategy with Keras Model Training""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.cluster = multi_worker_test_base.create_multi_process_cluster( + num_workers=2, num_ps=3, rpc_layer="grpc" + ) + cls.cluster_resolver = cls.cluster.cluster_resolver + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + cls.cluster.stop() + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_pss_fit_metric_batch_counter(self): + """Verify that metric data is complete during fit when using + ParameterServerStrategy + """ + strategy = tf.distribute.ParameterServerStrategy( + self.cluster_resolver, + variable_partitioner=None, + ) + + class BatchCount(metrics_module.Sum): + def __init__(self, name="batch_count", dtype=tf.int64): + super().__init__(name=name, dtype=dtype) + + def update_state(self, y_true, y_pred, sample_weight=None): + return super().update_state(1, sample_weight) + + # Build and compile model within strategy scope. + with strategy.scope(): + inputs = layers_module.Input((1,)) + outputs = layers_module.Dense(1)(inputs) + model = training_module.Model(inputs, outputs) + model.compile( + loss="mse", metrics=[BatchCount()], steps_per_execution=2 + ) + + BATCH_SIZE = 10 + x, y = np.ones((400, 1)), np.ones((400, 1)) + val_x, val_y = np.ones((100, 1)), np.ones((100, 1)) + train_dataset = tf.data.Dataset.from_tensor_slices((x, y)) + train_dataset = train_dataset.batch(BATCH_SIZE) + val_dataset = tf.data.Dataset.from_tensor_slices((val_x, val_y)) + val_dataset = val_dataset.batch(BATCH_SIZE) + train_batch_count = x.shape[0] // BATCH_SIZE + val_batch_count = val_x.shape[0] // BATCH_SIZE + # Verify that Model fit doesn't drop any batches + hist = model.fit( + train_dataset, + steps_per_epoch=train_batch_count, + validation_data=val_dataset, + validation_steps=val_batch_count, + epochs=5, + ) + # Verify that min and max value of batch count metric is accurate + self.assertEqual(max(hist.history["batch_count"]), train_batch_count) + self.assertEqual(min(hist.history["batch_count"]), train_batch_count) + self.assertEqual(max(hist.history["val_batch_count"]), val_batch_count) + self.assertEqual(min(hist.history["val_batch_count"]), val_batch_count) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_pss_evaluate_metric_batch_counter(self): + """Verify that metric data is complete during evaluate when using + ParameterServerStrategy + """ + strategy = tf.distribute.ParameterServerStrategy( + self.cluster_resolver, + variable_partitioner=None, + ) + + class BatchCount(metrics_module.Sum): + def __init__(self, name="batch_count", dtype=tf.int64): + super().__init__(name=name, dtype=dtype) + + def update_state(self, y_true, y_pred, sample_weight=None): + return super().update_state(1, sample_weight) + + # Build and compile model within strategy scope. + with strategy.scope(): + inputs = layers_module.Input((1,)) + outputs = layers_module.Dense(1)(inputs) + model = training_module.Model(inputs, outputs) + model.compile( + loss="mse", metrics=[BatchCount()], steps_per_execution=2 + ) + + BATCH_SIZE = 10 + x, y = np.ones((400, 1)), np.ones((400, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + batch_count = x.shape[0] // BATCH_SIZE + # Verify that Model Eval batch counter metric is accurate. + eval_results = model.evaluate(dataset, steps=batch_count) + self.assertEqual(eval_results[-1], batch_count) + + +if __name__ == "__main__": + tf.enable_v2_behavior() + multi_process_runner.test_main() diff --git a/keras/integration_test/preprocessing_applied_in_dataset_creator_test.py b/keras/integration_test/preprocessing_applied_in_dataset_creator_test.py index 152656fb54c1..3c490a1f5800 100644 --- a/keras/integration_test/preprocessing_applied_in_dataset_creator_test.py +++ b/keras/integration_test/preprocessing_applied_in_dataset_creator_test.py @@ -18,6 +18,7 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.integration_test import preprocessing_test_utils as utils ds_combinations = tf.__internal__.distribute.combinations @@ -28,7 +29,7 @@ # to API changes and backward-compatibility is not guaranteed. STRATEGIES = [ ds_combinations.default_strategy, - ds_combinations.mirrored_strategy_with_cpu_1_and_2, + ds_combinations.mirrored_strategy_with_two_cpus, ds_combinations.mirrored_strategy_with_two_gpus, ds_combinations.tpu_strategy, ds_combinations.cloud_tpu_strategy, @@ -41,34 +42,43 @@ @ds_combinations.generate( - test_combinations.combine(strategy=STRATEGIES, mode="eager")) + test_combinations.combine(strategy=STRATEGIES, mode="eager") +) class PreprocessingAppliedInDatasetCreatorTest(tf.test.TestCase): - """Demonstrate Keras preprocessing layers applied in tf.data.Dataset.map.""" + """Demonstrate Keras preprocessing layers applied in tf.data.Dataset.map.""" - def testDistributedModelFit(self, strategy): - if (not tf.__internal__.tf2.enabled() - and isinstance(strategy, - tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - "Parameter Server strategy with dataset creator need to be run when " - "eager execution is enabled.") - with strategy.scope(): - preprocessing_model = utils.make_preprocessing_model(self.get_temp_dir()) - training_model = utils.make_training_model() - training_model.compile(optimizer="sgd", loss="binary_crossentropy") + def testDistributedModelFit(self, strategy): + if not tf.__internal__.tf2.enabled() and isinstance( + strategy, tf.distribute.experimental.ParameterServerStrategy + ): + self.skipTest( + "Parameter Server strategy with dataset creator need to be run " + "when eager execution is enabled." + ) + with strategy.scope(): + preprocessing_model = utils.make_preprocessing_model( + self.get_temp_dir() + ) + training_model = utils.make_training_model() + training_model.compile(optimizer="sgd", loss="binary_crossentropy") - def dataset_fn(input_context): - dataset = utils.make_dataset() - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - batch_size = input_context.get_per_replica_batch_size( - global_batch_size=utils.BATCH_SIZE) - dataset = dataset.batch(batch_size).repeat().prefetch(2) - return dataset.map(lambda x, y: (preprocessing_model(x), y)) + def dataset_fn(input_context): + dataset = utils.make_dataset() + dataset = dataset.shard( + input_context.num_input_pipelines, + input_context.input_pipeline_id, + ) + batch_size = input_context.get_per_replica_batch_size( + global_batch_size=utils.BATCH_SIZE + ) + dataset = dataset.batch(batch_size).repeat().prefetch(2) + return dataset.map(lambda x, y: (preprocessing_model(x), y)) - dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn) - training_model.fit(dataset_creator, epochs=2, steps_per_epoch=utils.STEPS) + dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn) + training_model.fit( + dataset_creator, epochs=2, steps_per_epoch=utils.STEPS + ) if __name__ == "__main__": - multi_process_runner.test_main() + multi_process_runner.test_main() diff --git a/keras/integration_test/preprocessing_applied_in_dataset_test.py b/keras/integration_test/preprocessing_applied_in_dataset_test.py index ec73457f4c58..d54f9fdefaf3 100644 --- a/keras/integration_test/preprocessing_applied_in_dataset_test.py +++ b/keras/integration_test/preprocessing_applied_in_dataset_test.py @@ -18,6 +18,7 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.integration_test import preprocessing_test_utils as utils ds_combinations = tf.__internal__.distribute.combinations @@ -30,7 +31,7 @@ # a DatasetCreator when training on a tf.data.Dataset. STRATEGIES = [ ds_combinations.default_strategy, - ds_combinations.mirrored_strategy_with_cpu_1_and_2, + ds_combinations.mirrored_strategy_with_two_cpus, ds_combinations.mirrored_strategy_with_two_gpus, ds_combinations.tpu_strategy, ds_combinations.cloud_tpu_strategy, @@ -41,21 +42,24 @@ @ds_combinations.generate( - test_combinations.combine(strategy=STRATEGIES, mode="eager")) + test_combinations.combine(strategy=STRATEGIES, mode="eager") +) class PreprocessingAppliedInDatasetTest(tf.test.TestCase): - """Demonstrate Keras preprocessing layers applied in tf.data.Dataset.map.""" + """Demonstrate Keras preprocessing layers applied in tf.data.Dataset.map.""" - def testDistributedModelFit(self, strategy): - with strategy.scope(): - preprocessing_model = utils.make_preprocessing_model(self.get_temp_dir()) - training_model = utils.make_training_model() - training_model.compile(optimizer="sgd", loss="binary_crossentropy") + def testDistributedModelFit(self, strategy): + with strategy.scope(): + preprocessing_model = utils.make_preprocessing_model( + self.get_temp_dir() + ) + training_model = utils.make_training_model() + training_model.compile(optimizer="sgd", loss="binary_crossentropy") - dataset = utils.make_dataset() - dataset = dataset.batch(utils.BATCH_SIZE) - dataset = dataset.map(lambda x, y: (preprocessing_model(x), y)) - training_model.fit(dataset, epochs=2) + dataset = utils.make_dataset() + dataset = dataset.batch(utils.BATCH_SIZE) + dataset = dataset.map(lambda x, y: (preprocessing_model(x), y)) + training_model.fit(dataset, epochs=2) if __name__ == "__main__": - multi_process_runner.test_main() + multi_process_runner.test_main() diff --git a/keras/integration_test/preprocessing_applied_in_model_test.py b/keras/integration_test/preprocessing_applied_in_model_test.py index 29f338115c6a..4b1a20706955 100644 --- a/keras/integration_test/preprocessing_applied_in_model_test.py +++ b/keras/integration_test/preprocessing_applied_in_model_test.py @@ -18,6 +18,7 @@ from __future__ import print_function import tensorflow.compat.v2 as tf + from keras.integration_test import preprocessing_test_utils as utils ds_combinations = tf.__internal__.distribute.combinations @@ -28,7 +29,7 @@ # to API changes and backward-compatibility is not guaranteed. STRATEGIES = [ ds_combinations.default_strategy, - ds_combinations.mirrored_strategy_with_cpu_1_and_2, + ds_combinations.mirrored_strategy_with_two_cpus, ds_combinations.mirrored_strategy_with_two_gpus, # TODO(b/183044870) TPU strategies with soft placement do not yet work. # ds_combinations.tpu_strategy, @@ -42,37 +43,44 @@ @ds_combinations.generate( - test_combinations.combine(strategy=STRATEGIES, mode="eager")) + test_combinations.combine(strategy=STRATEGIES, mode="eager") +) class PreprocessingAppliedInModelTest(tf.test.TestCase): - """Demonstrate Keras preprocessing layers applied inside a Model.""" + """Demonstrate Keras preprocessing layers applied inside a Model.""" - def testDistributedModelFit(self, strategy): - if (not tf.__internal__.tf2.enabled() - and isinstance(strategy, - tf.distribute.experimental.ParameterServerStrategy)): - self.skipTest( - "Parameter Server strategy with dataset creator need to be run when " - "eager execution is enabled.") - with strategy.scope(): - preprocessing_model = utils.make_preprocessing_model(self.get_temp_dir()) - training_model = utils.make_training_model() - # Merge the two separate models into a single model for training. - inputs = preprocessing_model.inputs - outputs = training_model(preprocessing_model(inputs)) - merged_model = tf.keras.Model(inputs, outputs) - merged_model.compile(optimizer="sgd", loss="binary_crossentropy") + def testDistributedModelFit(self, strategy): + if not tf.__internal__.tf2.enabled() and isinstance( + strategy, tf.distribute.experimental.ParameterServerStrategy + ): + self.skipTest( + "Parameter Server strategy with dataset creator need to be run " + "when eager execution is enabled." + ) + with strategy.scope(): + preprocessing_model = utils.make_preprocessing_model( + self.get_temp_dir() + ) + training_model = utils.make_training_model() + # Merge the two separate models into a single model for training. + inputs = preprocessing_model.inputs + outputs = training_model(preprocessing_model(inputs)) + merged_model = tf.keras.Model(inputs, outputs) + merged_model.compile(optimizer="sgd", loss="binary_crossentropy") - def dataset_fn(input_context): - dataset = utils.make_dataset() - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - batch_size = input_context.get_per_replica_batch_size( - global_batch_size=utils.BATCH_SIZE) - return dataset.batch(batch_size).repeat().prefetch(2) + def dataset_fn(input_context): + dataset = utils.make_dataset() + dataset = dataset.shard( + input_context.num_input_pipelines, + input_context.input_pipeline_id, + ) + batch_size = input_context.get_per_replica_batch_size( + global_batch_size=utils.BATCH_SIZE + ) + return dataset.batch(batch_size).repeat().prefetch(2) - dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn) - merged_model.fit(dataset_creator, epochs=2, steps_per_epoch=utils.STEPS) + dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn) + merged_model.fit(dataset_creator, epochs=2, steps_per_epoch=utils.STEPS) if __name__ == "__main__": - multi_process_runner.test_main() + multi_process_runner.test_main() diff --git a/keras/integration_test/preprocessing_test_utils.py b/keras/integration_test/preprocessing_test_utils.py index ace50be24164..8287dc83a348 100644 --- a/keras/integration_test/preprocessing_test_utils.py +++ b/keras/integration_test/preprocessing_test_utils.py @@ -17,6 +17,7 @@ import os import tensorflow.compat.v2 as tf + preprocessing = tf.keras.layers BATCH_SIZE = 64 @@ -26,85 +27,87 @@ def make_dataset(): - """Make a simple structured dataset. - - The dataset contains three feature columns. - - float_col: an unnormalized numeric column. - - int_col: an column of integer IDs. - - string_col: a column of fixed vocabulary terms. - - Returns: - The dataset. - """ - tf.random.set_seed(197011) - floats = tf.random.uniform((DS_SIZE, 1), maxval=10, dtype="float32") - # Generate a 100 unique integer values, but over a wide range to showcase a - # common use case for IntegerLookup. - ints = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64") - ints = ints * 1000 - # Use a fixed vocabulary of strings from 0 to 99, to showcase loading a - # vocabulary from a file. - strings = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64") - strings = tf.strings.as_string(strings) - features = {"float_col": floats, "int_col": ints, "string_col": strings} - # Random binary label. - labels = tf.random.uniform((DS_SIZE, 1), maxval=2, dtype="int64") - ds = tf.data.Dataset.from_tensor_slices((features, labels)) - return ds + """Make a simple structured dataset. + + The dataset contains three feature columns. + - float_col: an unnormalized numeric column. + - int_col: an column of integer IDs. + - string_col: a column of fixed vocabulary terms. + + Returns: + The dataset. + """ + tf.random.set_seed(197011) + floats = tf.random.uniform((DS_SIZE, 1), maxval=10, dtype="float32") + # Generate a 100 unique integer values, but over a wide range to showcase a + # common use case for IntegerLookup. + ints = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64") + ints = ints * 1000 + # Use a fixed vocabulary of strings from 0 to 99, to showcase loading a + # vocabulary from a file. + strings = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64") + strings = tf.strings.as_string(strings) + features = {"float_col": floats, "int_col": ints, "string_col": strings} + # Random binary label. + labels = tf.random.uniform((DS_SIZE, 1), maxval=2, dtype="int64") + ds = tf.data.Dataset.from_tensor_slices((features, labels)) + return ds def make_preprocessing_model(file_dir): - """Make a standalone preprocessing model.""" - # The name of our keras.Input should match the column name in the dataset. - float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col") - int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col") - string_in = tf.keras.Input(shape=(1,), dtype="string", name="string_col") - - # We need to batch a dataset before adapting. - ds = make_dataset().batch(BATCH_SIZE) - # Normalize floats by adapting the mean and variance of the input. - normalization = preprocessing.Normalization() - normalization.adapt(ds.map(lambda features, labels: features["float_col"])) - float_out = normalization(float_in) - # Lookup ints by adapting a vocab of integer IDs. - int_lookup = preprocessing.IntegerLookup() - int_lookup.adapt(ds.map(lambda features, labels: features["int_col"])) - int_out = int_lookup(int_in) - # Lookup strings from a fixed file based vocabulary. - string_vocab = list(str(i) for i in range(VOCAB_SIZE)) - vocab_file = os.path.join(file_dir, "vocab_file.txt") - with open(vocab_file, "w") as f: - f.write("\n".join(string_vocab)) - string_lookup = preprocessing.StringLookup(vocabulary=vocab_file) - string_out = string_lookup(string_in) - - return tf.keras.Model( - inputs=(float_in, int_in, string_in), - outputs=(float_out, int_out, string_out)) + """Make a standalone preprocessing model.""" + # The name of our keras.Input should match the column name in the dataset. + float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col") + int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col") + string_in = tf.keras.Input(shape=(1,), dtype="string", name="string_col") + + # We need to batch a dataset before adapting. + ds = make_dataset().batch(BATCH_SIZE) + # Normalize floats by adapting the mean and variance of the input. + normalization = preprocessing.Normalization() + normalization.adapt(ds.map(lambda features, labels: features["float_col"])) + float_out = normalization(float_in) + # Lookup ints by adapting a vocab of integer IDs. + int_lookup = preprocessing.IntegerLookup() + int_lookup.adapt(ds.map(lambda features, labels: features["int_col"])) + int_out = int_lookup(int_in) + # Lookup strings from a fixed file based vocabulary. + string_vocab = list(str(i) for i in range(VOCAB_SIZE)) + vocab_file = os.path.join(file_dir, "vocab_file.txt") + with open(vocab_file, "w") as f: + f.write("\n".join(string_vocab)) + string_lookup = preprocessing.StringLookup(vocabulary=vocab_file) + string_out = string_lookup(string_in) + + return tf.keras.Model( + inputs=(float_in, int_in, string_in), + outputs=(float_out, int_out, string_out), + ) def make_training_model(): - """Make a trainable model for the preprocessed inputs.""" - float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col") - # After preprocessing, both the string and int column are integer ready for - # embedding. - int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col") - string_in = tf.keras.Input(shape=(1,), dtype="int64", name="string_col") - - # Feed the lookup layers into an embedding. - int_embedding = tf.keras.layers.Embedding(VOCAB_SIZE + 1, 8, input_length=1) - int_out = int_embedding(int_in) - int_out = tf.keras.layers.Flatten()(int_out) - string_embedding = tf.keras.layers.Embedding( - VOCAB_SIZE + 1, 8, input_length=1) - string_out = string_embedding(string_in) - string_out = tf.keras.layers.Flatten()(string_out) - - # Concatenate outputs. - concatate = tf.keras.layers.Concatenate() - # Feed our preprocessed inputs into a simple MLP. - x = concatate((float_in, int_out, string_out)) - x = tf.keras.layers.Dense(32, activation="relu")(x) - x = tf.keras.layers.Dense(32, activation="relu")(x) - outputs = tf.keras.layers.Dense(1, activation="softmax")(x) - return tf.keras.Model(inputs=(float_in, int_in, string_in), outputs=outputs) + """Make a trainable model for the preprocessed inputs.""" + float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col") + # After preprocessing, both the string and int column are integer ready for + # embedding. + int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col") + string_in = tf.keras.Input(shape=(1,), dtype="int64", name="string_col") + + # Feed the lookup layers into an embedding. + int_embedding = tf.keras.layers.Embedding(VOCAB_SIZE + 1, 8, input_length=1) + int_out = int_embedding(int_in) + int_out = tf.keras.layers.Flatten()(int_out) + string_embedding = tf.keras.layers.Embedding( + VOCAB_SIZE + 1, 8, input_length=1 + ) + string_out = string_embedding(string_in) + string_out = tf.keras.layers.Flatten()(string_out) + + # Concatenate outputs. + concatate = tf.keras.layers.Concatenate() + # Feed our preprocessed inputs into a simple MLP. + x = concatate((float_in, int_out, string_out)) + x = tf.keras.layers.Dense(32, activation="relu")(x) + x = tf.keras.layers.Dense(32, activation="relu")(x) + outputs = tf.keras.layers.Dense(1, activation="softmax")(x) + return tf.keras.Model(inputs=(float_in, int_in, string_in), outputs=outputs) diff --git a/keras/integration_test/py_metric_test.py b/keras/integration_test/py_metric_test.py new file mode 100644 index 000000000000..f07f019ab120 --- /dev/null +++ b/keras/integration_test/py_metric_test.py @@ -0,0 +1,72 @@ +"""Test Model.fit with a PyMetric.""" + +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras import Sequential +from keras import layers +from keras import losses +from keras import metrics +from keras.testing_infra import test_combinations + + +def get_dataset(num_batches=5, batch_size=2): + x = tf.random.uniform((num_batches * batch_size, 100)) + y = tf.random.uniform((num_batches * batch_size, 2)) + dataset = ( + tf.data.Dataset.from_tensor_slices((x, y)) + .prefetch(batch_size * 2) + .batch(batch_size) + ) + return dataset + + +class CountingPyMetric(metrics.PyMetric): + """A test-only PyMetric which simply counts how many results it's seen.""" + + def update_state(self, y_true, y_pred, sample_weight=None): + self.y_pred.append(y_pred) + + def reset_state(self): + self.y_pred = [] + + def result(self): + return len(self.y_pred) + + +class PyMetricTest(test_combinations.TestCase): + @parameterized.named_parameters(("eager", True), ("graph", False)) + def test_fit(self, run_eagerly): + num_batches = 5 + dataset = get_dataset(num_batches=num_batches) + + counting_metric = CountingPyMetric() + + model = Sequential(layers.Dense(2)) + model.compile( + loss=losses.BinaryCrossentropy(), + metrics=[counting_metric], + run_eagerly=run_eagerly, + ) + model.fit(dataset, epochs=1) + + self.assertEqual(counting_metric.result(), num_batches) + + @parameterized.named_parameters(("eager", True), ("graph", False)) + def test_evaluate(self, run_eagerly): + num_batches = 5 + dataset = get_dataset(num_batches=num_batches) + + model = Sequential(layers.Dense(2)) + model.compile( + loss=losses.BinaryCrossentropy(), + metrics=[CountingPyMetric()], + run_eagerly=run_eagerly, + ) + loss, count = model.evaluate(dataset) + + self.assertEqual(count, num_batches) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/integration_test/saved_model_test.py b/keras/integration_test/saved_model_test.py index 81d1c3dfe183..63cbf28fc846 100644 --- a/keras/integration_test/saved_model_test.py +++ b/keras/integration_test/saved_model_test.py @@ -16,224 +16,236 @@ import os import tempfile -from absl.testing import parameterized - import tensorflow.compat.v2 as tf +from absl.testing import parameterized def cycle(obj, cycles, signatures=None): - to_save = obj - # TODO(vbardiovsky): It would be nice if exported protos reached a fixed - # point w.r.t. saving/restoring, ideally after 2nd saving. - for _ in range(cycles): - path = tempfile.mkdtemp(prefix=tf.compat.v1.test.get_temp_dir()) - # If available, we'll run the save and restore preferring the GPU. This - # just makes sure we aren't throwing errors and have enough - # device("CPU") blocks to satisfy the placer. - device = "/device:GPU:0" if tf.test.is_gpu_available() else "/device:CPU:0" - with tf.device(device): - tf.saved_model.save(to_save, path, signatures) - loaded = tf.saved_model.load(path) - to_save = loaded - return loaded + to_save = obj + # TODO(vbardiovsky): It would be nice if exported protos reached a fixed + # point w.r.t. saving/restoring, ideally after 2nd saving. + for _ in range(cycles): + path = tempfile.mkdtemp(prefix=tf.compat.v1.test.get_temp_dir()) + # If available, we'll run the save and restore preferring the GPU. This + # just makes sure we aren't throwing errors and have enough + # device("CPU") blocks to satisfy the placer. + device = ( + "/device:GPU:0" if tf.test.is_gpu_available() else "/device:CPU:0" + ) + with tf.device(device): + tf.saved_model.save(to_save, path, signatures) + loaded = tf.saved_model.load(path) + to_save = loaded + return loaded class _ModelWithOptimizer(tf.train.Checkpoint): + def __init__(self): + self.dense = tf.keras.layers.Dense(1) + self.optimizer = tf.keras.optimizers.Adam(0.01) - def __init__(self): - self.dense = tf.keras.layers.Dense(1) - self.optimizer = tf.keras.optimizers.Adam(0.01) - - @tf.function( - input_signature=(tf.TensorSpec([None, 2], tf.float32), - tf.TensorSpec([None], tf.float32))) - def call(self, x, y): - with tf.GradientTape() as tape: - loss = tf.math.reduce_mean((self.dense(x) - y) ** 2.) - trainable_variables = self.dense.trainable_variables - gradients = tape.gradient(loss, trainable_variables) - self.optimizer.apply_gradients(zip(gradients, trainable_variables)) - return {"loss": loss} + @tf.function( + input_signature=( + tf.TensorSpec([None, 2], tf.float32), + tf.TensorSpec([None], tf.float32), + ) + ) + def call(self, x, y): + with tf.GradientTape() as tape: + loss = tf.math.reduce_mean((self.dense(x) - y) ** 2.0) + trainable_variables = self.dense.trainable_variables + gradients = tape.gradient(loss, trainable_variables) + self.optimizer.apply_gradients(zip(gradients, trainable_variables)) + return {"loss": loss} def _import_and_infer(save_dir, inputs, signature_key="serving_default"): - """Import a SavedModel into a TF 1.x-style graph and run `signature_key`.""" - graph = tf.Graph() - with graph.as_default(), tf.compat.v1.Session() as session: - model = tf.compat.v1.saved_model.load(session, ["serve"], save_dir) - return _run_signature(session, model, inputs, signature_key) + """Import a SavedModel into a TF 1.x-style graph and run `signature_key`.""" + graph = tf.Graph() + with graph.as_default(), tf.compat.v1.Session() as session: + model = tf.compat.v1.saved_model.load(session, ["serve"], save_dir) + return _run_signature(session, model, inputs, signature_key) def _run_signature(session, meta_graph_def, inputs, signature_key): - signature = meta_graph_def.signature_def[signature_key] - assert set(inputs.keys()) == set(signature.inputs.keys()) - feed_dict = {} - for arg_name in inputs.keys(): - input_tensor = session.graph.get_tensor_by_name( - signature.inputs[arg_name].name) - feed_dict[input_tensor] = inputs[arg_name] - output_dict = {} - for output_name, output_tensor_info in signature.outputs.items(): - output_dict[output_name] = session.graph.get_tensor_by_name( - output_tensor_info.name) - return session.run(output_dict, feed_dict=feed_dict) + signature = meta_graph_def.signature_def[signature_key] + assert set(inputs.keys()) == set(signature.inputs.keys()) + feed_dict = {} + for arg_name in inputs.keys(): + input_tensor = session.graph.get_tensor_by_name( + signature.inputs[arg_name].name + ) + feed_dict[input_tensor] = inputs[arg_name] + output_dict = {} + for output_name, output_tensor_info in signature.outputs.items(): + output_dict[output_name] = session.graph.get_tensor_by_name( + output_tensor_info.name + ) + return session.run(output_dict, feed_dict=feed_dict) class SaveTest(tf.test.TestCase): - - def test_unbuilt_model_does_not_prevent_saving(self): - root = tf.train.Checkpoint( - model=tf.keras.Sequential([tf.keras.layers.Dense(2)])) - tf.saved_model.save(root, os.path.join(self.get_temp_dir(), "saved_model")) - - def test_optimizer(self): - x = tf.constant([[3., 4.]]) - y = tf.constant([2.]) - model = _ModelWithOptimizer() - first_loss = model.call(x, y) - save_dir = os.path.join(self.get_temp_dir(), "saved_model") - tf.saved_model.save(model, save_dir, model.call) - second_loss = model.call(x, y) - self.assertNotEqual(first_loss, second_loss) - self.assertAllClose( - second_loss, - _import_and_infer(save_dir, {"x": [[3., 4.]], "y": [2.]})) - - def test_single_method_default_signature(self): - model = _ModelWithOptimizer() - x = tf.constant([[3., 4.]]) - y = tf.constant([2.]) - model.call(x, y) - save_dir = os.path.join(self.get_temp_dir(), "saved_model") - tf.saved_model.save(model, save_dir) - self.assertIn("loss", - _import_and_infer(save_dir, - {"x": [[3., 4.]], "y": [2.]})) + def test_unbuilt_model_does_not_prevent_saving(self): + root = tf.train.Checkpoint( + model=tf.keras.Sequential([tf.keras.layers.Dense(2)]) + ) + tf.saved_model.save( + root, os.path.join(self.get_temp_dir(), "saved_model") + ) + + def test_optimizer(self): + x = tf.constant([[3.0, 4.0]]) + y = tf.constant([2.0]) + model = _ModelWithOptimizer() + first_loss = model.call(x, y) + save_dir = os.path.join(self.get_temp_dir(), "saved_model") + tf.saved_model.save(model, save_dir, model.call) + second_loss = model.call(x, y) + self.assertNotEqual(first_loss, second_loss) + self.assertAllClose( + second_loss, + _import_and_infer(save_dir, {"x": [[3.0, 4.0]], "y": [2.0]}), + ) + + def test_single_method_default_signature(self): + model = _ModelWithOptimizer() + x = tf.constant([[3.0, 4.0]]) + y = tf.constant([2.0]) + model.call(x, y) + save_dir = os.path.join(self.get_temp_dir(), "saved_model") + tf.saved_model.save(model, save_dir) + self.assertIn( + "loss", _import_and_infer(save_dir, {"x": [[3.0, 4.0]], "y": [2.0]}) + ) @parameterized.named_parameters( dict(testcase_name="ReloadOnce", cycles=1), dict(testcase_name="ReloadTwice", cycles=2), - dict(testcase_name="ReloadThrice", cycles=3)) + dict(testcase_name="ReloadThrice", cycles=3), +) class LoadTest(tf.test.TestCase, parameterized.TestCase): - - def test_optimizer(self, cycles): - - class _HasOptimizer(tf.Module): - - def __init__(self): - super().__init__() - self.layer = tf.keras.layers.Dense(1) - self.optimizer = tf.keras.optimizers.Adam(0.01) - - @tf.function - def __call__(self, x): - return self.layer(x) - - @tf.function - def train(self, x, y): - with tf.GradientTape() as tape: - predicted = self(x) - loss = tf.math.reduce_sum(tf.math.abs(y - predicted)) - train_vars = self.layer.trainable_variables - grads = tape.gradient(loss, train_vars) - self.optimizer.apply_gradients(zip(grads, train_vars)) - - root = _HasOptimizer() - train_input = dict(x=tf.constant([[1.]]), - y=tf.constant([[2.]])) - root.train(**train_input) - imported = cycle(root, cycles) - self.assertAllClose(root.optimizer.learning_rate.numpy(), - imported.optimizer.learning_rate.numpy()) - self.assertAllClose(root(tf.constant([[-0.5]])), - imported(tf.constant([[-0.5]]))) - root.train(**train_input) - imported.train(**train_input) - self.assertAllClose(root(tf.constant([[-0.5]])), - imported(tf.constant([[-0.5]]))) - - def test_model_with_custom_function_attached(self, cycles): - root = tf.train.Checkpoint( - model=tf.keras.Sequential([tf.keras.layers.Dense(2)])) - - @tf.function - def _use_sequential(x): - return root.model.call(x) - - root.model.traced_call = _use_sequential - - original = root.model.traced_call(tf.zeros([1, 1])).numpy() - root = cycle(root, cycles) - self.assertAllEqual( - original, - root.model.traced_call(tf.zeros([1, 1])).numpy()) + def test_optimizer(self, cycles): + class _HasOptimizer(tf.Module): + def __init__(self): + super().__init__() + self.layer = tf.keras.layers.Dense(1) + self.optimizer = tf.keras.optimizers.Adam(0.01) + + @tf.function + def __call__(self, x): + return self.layer(x) + + @tf.function + def train(self, x, y): + with tf.GradientTape() as tape: + predicted = self(x) + loss = tf.math.reduce_sum(tf.math.abs(y - predicted)) + train_vars = self.layer.trainable_variables + grads = tape.gradient(loss, train_vars) + self.optimizer.apply_gradients(zip(grads, train_vars)) + + root = _HasOptimizer() + train_input = dict(x=tf.constant([[1.0]]), y=tf.constant([[2.0]])) + root.train(**train_input) + imported = cycle(root, cycles) + self.assertAllClose( + root.optimizer.learning_rate.numpy(), + imported.optimizer.learning_rate.numpy(), + ) + self.assertAllClose( + root(tf.constant([[-0.5]])), imported(tf.constant([[-0.5]])) + ) + root.train(**train_input) + imported.train(**train_input) + self.assertAllClose( + root(tf.constant([[-0.5]])), imported(tf.constant([[-0.5]])) + ) + + def test_model_with_custom_function_attached(self, cycles): + root = tf.train.Checkpoint( + model=tf.keras.Sequential([tf.keras.layers.Dense(2)]) + ) + + @tf.function + def _use_sequential(x): + return root.model.call(x) + + root.model.traced_call = _use_sequential + + original = root.model.traced_call(tf.zeros([1, 1])).numpy() + root = cycle(root, cycles) + self.assertAllEqual( + original, root.model.traced_call(tf.zeros([1, 1])).numpy() + ) @parameterized.named_parameters( dict(testcase_name="ReloadOnce", cycles=1), dict(testcase_name="ReloadTwice", cycles=2), - dict(testcase_name="ReloadThrice", cycles=3)) + dict(testcase_name="ReloadThrice", cycles=3), +) class KerasLoadTest(tf.test.TestCase, parameterized.TestCase): - - def test_dense_features_layer(self, cycles): - columns = [ - tf.feature_column.numeric_column("x"), - tf.feature_column.numeric_column("y") - ] - layer = tf.keras.layers.DenseFeatures(columns) - model = tf.keras.Sequential([layer]) - model_input = {"x": tf.constant([[1.]]), - "y": tf.constant([[2.]])} - self.assertAllClose([[1., 2.]], model.predict(model_input, steps=1)) - loaded = cycle(model, cycles) - output, = loaded._default_save_signature(model_input).values() - self.assertAllClose([[1., 2.]], output) - signature_output, = loaded.signatures["serving_default"]( - **model_input).values() - self.assertAllClose([[1., 2.]], signature_output) - - def test_dense_features_layer_fit(self, cycles): - columns = [tf.feature_column.numeric_column("x")] - model = tf.keras.Sequential( - [tf.keras.layers.DenseFeatures(columns), - tf.keras.layers.Dense(1)]) - model_input = {"x": tf.constant([[1.]])} - model.compile(optimizer="adam", loss="mse", run_eagerly=True) - model.fit(model_input, tf.constant([[3.]])) - loaded = cycle(model, cycles) - loaded._default_save_signature(model_input) - loaded.signatures["serving_default"](**model_input) - - def test_multi_output_layer(self, cycles): - - inp = tf.keras.Input(name="inp", shape=(None,), dtype=tf.float32) - - class _MultiOutput(tf.keras.layers.Layer): - - def call(self, x): - return x + 1., x + 2. - - out = _MultiOutput(name="out")(inp) # pylint: disable=not-callable - model = tf.keras.Model(inp, out) - loaded = cycle(model, cycles) - self.assertAllClose( - dict(out=2., out_1=3.), - loaded.signatures["serving_default"](tf.constant(1.))) - - def test_functional_model_with_conv(self, cycles): - x = tf.keras.Input(name="x", shape=(None, None, 3), dtype=tf.float32) - conved = tf.keras.layers.Conv2D( - filters=3, kernel_size=3, dilation_rate=2)(x) - model = tf.keras.Model([x], conved) - model_input = tf.ones((1, 10, 10, 3)) - initial_output = model.predict([model_input]) - model = cycle(model, cycles) - self.assertAllClose( - [initial_output], - list(model.signatures["serving_default"](model_input).values())) + def test_dense_features_layer(self, cycles): + columns = [ + tf.feature_column.numeric_column("x"), + tf.feature_column.numeric_column("y"), + ] + layer = tf.keras.layers.DenseFeatures(columns) + model = tf.keras.Sequential([layer]) + model_input = {"x": tf.constant([[1.0]]), "y": tf.constant([[2.0]])} + self.assertAllClose([[1.0, 2.0]], model.predict(model_input, steps=1)) + loaded = cycle(model, cycles) + (output,) = loaded._default_save_signature(model_input).values() + self.assertAllClose([[1.0, 2.0]], output) + (signature_output,) = loaded.signatures["serving_default"]( + **model_input + ).values() + self.assertAllClose([[1.0, 2.0]], signature_output) + + def test_dense_features_layer_fit(self, cycles): + columns = [tf.feature_column.numeric_column("x")] + model = tf.keras.Sequential( + [tf.keras.layers.DenseFeatures(columns), tf.keras.layers.Dense(1)] + ) + model_input = {"x": tf.constant([[1.0]])} + model.compile(optimizer="adam", loss="mse", run_eagerly=True) + model.fit(model_input, tf.constant([[3.0]])) + loaded = cycle(model, cycles) + loaded._default_save_signature(model_input) + loaded.signatures["serving_default"](**model_input) + + def test_multi_output_layer(self, cycles): + + inp = tf.keras.Input(name="inp", shape=(None,), dtype=tf.float32) + + class _MultiOutput(tf.keras.layers.Layer): + def call(self, x): + return x + 1.0, x + 2.0 + + out = _MultiOutput(name="out")(inp) + model = tf.keras.Model(inp, out) + loaded = cycle(model, cycles) + self.assertAllClose( + dict(out=2.0, out_1=3.0), + loaded.signatures["serving_default"](tf.constant(1.0)), + ) + + def test_functional_model_with_conv(self, cycles): + x = tf.keras.Input(name="x", shape=(None, None, 3), dtype=tf.float32) + conved = tf.keras.layers.Conv2D( + filters=3, kernel_size=3, dilation_rate=2 + )(x) + model = tf.keras.Model([x], conved) + model_input = tf.ones((1, 10, 10, 3)) + initial_output = model.predict([model_input]) + model = cycle(model, cycles) + self.assertAllClose( + [initial_output], + list(model.signatures["serving_default"](model_input).values()), + ) if __name__ == "__main__": - if tf.__internal__.tf2.enabled(): - tf.test.main() + if tf.__internal__.tf2.enabled(): + tf.test.main() diff --git a/keras/integration_test/saving_v3_test.py b/keras/integration_test/saving_v3_test.py new file mode 100644 index 000000000000..de4906cbabbb --- /dev/null +++ b/keras/integration_test/saving_v3_test.py @@ -0,0 +1,130 @@ +"""Test Model.fit across a diverse range of models.""" + +import os + +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.integration_test.models import bert +from keras.integration_test.models import dcgan +from keras.integration_test.models import edge_case_model +from keras.integration_test.models import input_spec +from keras.integration_test.models import low_level_model +from keras.integration_test.models import mini_unet +from keras.integration_test.models import mini_xception +from keras.integration_test.models import retinanet +from keras.integration_test.models import structured_data_classification +from keras.integration_test.models import text_classification +from keras.integration_test.models import timeseries_forecasting +from keras.integration_test.models import vae +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +def get_dataset(data_specs, batch_size): + values = tf.nest.map_structure(input_spec.spec_to_value, data_specs) + dataset = ( + tf.data.Dataset.from_tensor_slices(values) + .prefetch(batch_size * 2) + .batch(batch_size) + ) + return dataset + + +@test_utils.run_v2_only +class SavingV3Test(test_combinations.TestCase): + @parameterized.named_parameters( + ("bert", bert), + ("edge_case_model", edge_case_model), + # ("efficientnet_v2", efficientnet_v2), # Too expensive to run on CI + ("low_level_model", low_level_model), + ("mini_unet", mini_unet), + ("mini_xception", mini_xception), + ("retinanet", retinanet), + ("structured_data_classification", structured_data_classification), + ("text_classification", text_classification), + ("timeseries_forecasting", timeseries_forecasting), + ) + def test_saving_v3(self, module): + batch_size = 2 + data_specs = module.get_data_spec(batch_size * 2) + dataset = get_dataset(data_specs, batch_size) + for batch in dataset.take(1): + pass + if isinstance(batch, tuple): + batch = batch[0] + + model = module.get_model( + build=True, + compile=True, + jit_compile=False, + include_preprocessing=True, + ) + model.fit(dataset, epochs=1, steps_per_epoch=1) + temp_filepath = os.path.join( + self.get_temp_dir(), f"{module.__name__}.keras" + ) + model.save(temp_filepath, save_format="keras_v3") + with tf.keras.utils.custom_object_scope(module.get_custom_objects()): + new_model = tf.keras.models.load_model(temp_filepath) + + # Test model weights + self.assertIs(new_model.__class__, model.__class__) + self.assertEqual(len(model.get_weights()), len(new_model.get_weights())) + for w1, w2 in zip(model.get_weights(), new_model.get_weights()): + if w1.dtype == "object": + self.assertEqual(str(w1), str(w2)) + else: + self.assertAllClose(w1, w2, atol=1e-6) + + # Test forward pass + self.assertAllClose(new_model(batch), model(batch), atol=1e-6) + + # Test optimizer state + if hasattr(model, "optimizer"): + self.assertEqual( + len(model.optimizer.variables()), + len(new_model.optimizer.variables()), + ) + for v1, v2 in zip( + model.optimizer.variables(), new_model.optimizer.variables() + ): + self.assertAllClose(v1.numpy(), v2.numpy(), atol=1e-6) + + # Test training still works + new_model.fit(dataset, epochs=1, steps_per_epoch=1) + + @parameterized.named_parameters(("dcgan", dcgan), ("vae", vae)) + def test_saving_v3_no_call(self, module): + batch_size = 2 + data_specs = module.get_data_spec(batch_size * 2) + dataset = get_dataset(data_specs, batch_size) + + model = module.get_model( + build=True, + compile=True, + jit_compile=False, + include_preprocessing=True, + ) + temp_filepath = os.path.join( + self.get_temp_dir(), f"{module.__name__}.keras" + ) + model.save(temp_filepath, save_format="keras_v3") + with tf.keras.utils.custom_object_scope(module.get_custom_objects()): + new_model = tf.keras.models.load_model(temp_filepath) + + # Test model weights + self.assertIs(new_model.__class__, model.__class__) + self.assertEqual(len(model.get_weights()), len(new_model.get_weights())) + for w1, w2 in zip(model.get_weights(), new_model.get_weights()): + if w1.dtype == "object": + self.assertEqual(str(w1), str(w2)) + else: + self.assertAllClose(w1, w2, atol=1e-6) + + # Test training still works + new_model.fit(dataset, epochs=1, steps_per_epoch=1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/integration_test/tf_trt_test.py b/keras/integration_test/tf_trt_test.py index ba472b264e1c..93f18013ed9b 100644 --- a/keras/integration_test/tf_trt_test.py +++ b/keras/integration_test/tf_trt_test.py @@ -16,52 +16,56 @@ import os import tempfile -from absl import flags - import tensorflow.compat.v2 as tf import tensorflow_text as tf_text +from absl import flags class ConvertResource(tf.test.TestCase): + def testConvertResource(self): + """Test general resource inputs don't crash the converter.""" + if not tf.test.is_built_with_cuda(): + self.skipTest("test is only applicable with CUDA") - def testConvertResource(self): - """Test general resource inputs don't crash the converter.""" - if not tf.test.is_built_with_cuda(): - self.skipTest('test is only applicable with CUDA') - - class TokenizeLayer(tf.keras.layers.Layer): - - def __init__(self, vocab_file): - super().__init__() - serialized_proto = tf.compat.v1.gfile.GFile(vocab_file, "rb").read() - self.tokenizer = tf_text.SentencepieceTokenizer( - model=serialized_proto, add_bos=True, add_eos=True) + class TokenizeLayer(tf.keras.layers.Layer): + def __init__(self, vocab_file): + super().__init__() + serialized_proto = tf.compat.v1.gfile.GFile( + vocab_file, "rb" + ).read() + self.tokenizer = tf_text.SentencepieceTokenizer( + model=serialized_proto, add_bos=True, add_eos=True + ) - def call(self, inputs): - word_ids = self.tokenizer.tokenize(inputs) - word_ids = word_ids.to_tensor(default_value=1, shape=(None, 192)) - return word_ids + def call(self, inputs): + word_ids = self.tokenizer.tokenize(inputs) + word_ids = word_ids.to_tensor( + default_value=1, shape=(None, 192) + ) + return word_ids - vocab_file = os.path.join( - flags.FLAGS['test_srcdir'].value, - 'org_keras/keras', - 'integration_test/data/sentencepiece.pb') - # vocab_file = tf.compat.v1.test.test_src_dir_path( - # "python/keras/integration_test/data/sentencepiece.pb") - output_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + vocab_file = os.path.join( + flags.FLAGS["test_srcdir"].value, + "org_keras/keras", + "integration_test/data/sentencepiece.pb", + ) + # vocab_file = tf.compat.v1.test.test_src_dir_path( + # "python/keras/integration_test/data/sentencepiece.pb") + output_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - # Create and save a Tokenizer - tokenizer = TokenizeLayer(vocab_file) - inputs = tf.keras.layers.Input(shape=(), dtype=tf.dtypes.string) - tokens = tokenizer(inputs) - model = tf.keras.models.Model(inputs=inputs, outputs=tokens) - model.save(output_dir) + # Create and save a Tokenizer + tokenizer = TokenizeLayer(vocab_file) + inputs = tf.keras.layers.Input(shape=(), dtype=tf.dtypes.string) + tokens = tokenizer(inputs) + model = tf.keras.models.Model(inputs=inputs, outputs=tokens) + model.save(output_dir) - converter = tf.experimental.tensorrt.Converter( - input_saved_model_dir=output_dir, - conversion_params=tf.experimental.tensorrt.ConversionParams()) - converter.convert() + converter = tf.experimental.tensorrt.Converter( + input_saved_model_dir=output_dir, + conversion_params=tf.experimental.tensorrt.ConversionParams(), + ) + converter.convert() if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/integration_test/tpu_strategy_test.py b/keras/integration_test/tpu_strategy_test.py index ff52374966c1..de02d1e27463 100644 --- a/keras/integration_test/tpu_strategy_test.py +++ b/keras/integration_test/tpu_strategy_test.py @@ -17,10 +17,13 @@ import random import tempfile +import tensorflow.compat.v2 as tf from absl import flags -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) FLAGS = flags.FLAGS flags.DEFINE_string("tpu", "", "Name of TPU to connect to.") @@ -29,213 +32,258 @@ # These vocabularies usually come from TFT or a Beam pipeline. FEATURE_VOCAB = [ - "avenger", "ironman", "batman", "hulk", "spiderman", "kingkong", - "wonder_woman" + "avenger", + "ironman", + "batman", + "hulk", + "spiderman", + "kingkong", + "wonder_woman", ] LABEL_VOCAB = ["yes", "no"] def get_tpu_cluster_resolver(): - resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=FLAGS.tpu, - zone=FLAGS.zone, - project=FLAGS.project, - ) - return resolver + resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu=FLAGS.tpu, + zone=FLAGS.zone, + project=FLAGS.project, + ) + return resolver def get_tpu_strategy(): - resolver = get_tpu_cluster_resolver() - tf.config.experimental_connect_to_cluster(resolver) - tf.tpu.experimental.initialize_tpu_system(resolver) - return tf.distribute.experimental.TPUStrategy(resolver) + resolver = get_tpu_cluster_resolver() + tf.config.experimental_connect_to_cluster(resolver) + tf.tpu.experimental.initialize_tpu_system(resolver) + return tf.distribute.experimental.TPUStrategy(resolver) class TpuStrategyTest(tf.test.TestCase): - - def define_kpls_for_training(self, use_adapt): - if use_adapt: - feature_lookup_layer = ( - tf.keras.layers.StringLookup( - num_oov_indices=1)) - feature_lookup_layer.adapt(FEATURE_VOCAB) - label_lookup_layer = ( - tf.keras.layers.StringLookup( - num_oov_indices=0, mask_token=None)) - label_lookup_layer.adapt(LABEL_VOCAB) - else: - feature_lookup_layer = ( - tf.keras.layers.StringLookup( - vocabulary=FEATURE_VOCAB, num_oov_indices=1)) - label_lookup_layer = ( - tf.keras.layers.StringLookup( - vocabulary=LABEL_VOCAB, num_oov_indices=0, mask_token=None)) - - raw_feature_input = tf.keras.layers.Input( - shape=(3,), dtype=tf.dtypes.string, name="feature", ragged=True) - feature_id_input = feature_lookup_layer(raw_feature_input) - feature_mapper = tf.keras.Model({"features": raw_feature_input}, - feature_id_input) - - raw_label_input = tf.keras.layers.Input( - shape=(1,), dtype=tf.dtypes.string, name="label") - label_id_input = label_lookup_layer(raw_label_input) - label_mapper = tf.keras.Model({"label": raw_label_input}, label_id_input) - - return feature_mapper, label_mapper - - def define_inverse_lookup_layer(self): - # Only needed for serving. - label_inverse_lookup_layer = ( - tf.keras.layers.StringLookup( + def define_kpls_for_training(self, use_adapt): + if use_adapt: + feature_lookup_layer = tf.keras.layers.StringLookup( + num_oov_indices=1 + ) + feature_lookup_layer.adapt(FEATURE_VOCAB) + label_lookup_layer = tf.keras.layers.StringLookup( + num_oov_indices=0, mask_token=None + ) + label_lookup_layer.adapt(LABEL_VOCAB) + else: + feature_lookup_layer = tf.keras.layers.StringLookup( + vocabulary=FEATURE_VOCAB, num_oov_indices=1 + ) + label_lookup_layer = tf.keras.layers.StringLookup( + vocabulary=LABEL_VOCAB, num_oov_indices=0, mask_token=None + ) + + raw_feature_input = tf.keras.layers.Input( + shape=(3,), dtype=tf.dtypes.string, name="feature", ragged=True + ) + feature_id_input = feature_lookup_layer(raw_feature_input) + feature_mapper = tf.keras.Model( + {"features": raw_feature_input}, feature_id_input + ) + + raw_label_input = tf.keras.layers.Input( + shape=(1,), dtype=tf.dtypes.string, name="label" + ) + label_id_input = label_lookup_layer(raw_label_input) + label_mapper = tf.keras.Model( + {"label": raw_label_input}, label_id_input + ) + + return feature_mapper, label_mapper + + def define_inverse_lookup_layer(self): + # Only needed for serving. + label_inverse_lookup_layer = tf.keras.layers.StringLookup( num_oov_indices=0, mask_token=None, vocabulary=LABEL_VOCAB, - invert=True)) - return label_inverse_lookup_layer - - def test_keras_metric_outside_strategy_scope_per_replica(self): - if not tf.compat.v1.executing_eagerly(): - self.skipTest("connect_to_cluster() can only be called in eager mode") - strategy = get_tpu_strategy() - metric = tf.keras.metrics.Mean("test_metric", dtype=tf.float32) - - dataset = tf.data.Dataset.range(strategy.num_replicas_in_sync * 2).batch(2) - dataset = strategy.experimental_distribute_dataset(dataset) - - @tf.function - def step_fn(i): - metric.update_state(i) - - with self.assertRaisesRegex( - ValueError, "Trying to run metric.update_state " - "in replica context"): - with strategy.scope(): - for i in dataset: - strategy.run(step_fn, args=(i,)) - - @tf_test_utils.disable_mlir_bridge( - "TODO(b/168036682): Support dynamic padder") - def test_train_and_serve(self): - if not tf.compat.v1.executing_eagerly(): - self.skipTest("connect_to_cluster() can only be called in eager mode") - strategy = get_tpu_strategy() - use_adapt = False - - with strategy.scope(): - feature_mapper, label_mapper = self.define_kpls_for_training(use_adapt) - - def dataset_fn(_): - - def feature_and_label_gen(): - # Generator of dataset. - while True: - features = random.sample(FEATURE_VOCAB, 3) - label = ["yes"] if "avenger" in features else ["no"] - yield {"features": features, "label": label} - - raw_dataset = tf.data.Dataset.from_generator( - feature_and_label_gen, - output_signature={ - "features": tf.TensorSpec([3], tf.dtypes.string), - "label": tf.TensorSpec([1], tf.dtypes.string) - }).shuffle(100).batch(32) - - train_dataset = raw_dataset.map(lambda x: ( # pylint: disable=g-long-lambda - { - "features": feature_mapper(x["features"]) - }, label_mapper(x["label"]))) - return train_dataset - - # Create the model. The input needs to be compatible with KPLs. - model_input = tf.keras.layers.Input( - shape=(3,), dtype=tf.dtypes.int64, name="model_input") - - # input_dim includes a mask token and an oov token. - emb_output = tf.keras.layers.Embedding( - input_dim=len(FEATURE_VOCAB) + 2, output_dim=20)( - model_input) - emb_output = tf.math.reduce_mean(emb_output, axis=1) - dense_output = tf.keras.layers.Dense( - units=1, activation="sigmoid")( - emb_output) - model = tf.keras.Model({"features": model_input}, dense_output) - - optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) - accuracy = tf.keras.metrics.Accuracy() - - @tf.function - def train_step(iterator): - """The step function for one training step.""" - - def step_fn(inputs): - """The computation to run on each TPU device.""" - features, labels = inputs - with tf.GradientTape() as tape: - pred = model(features, training=True) - loss = tf.keras.losses.binary_crossentropy(labels, pred) - loss = tf.nn.compute_average_loss(loss) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(list(zip(grads, model.trainable_variables))) - - actual_pred = tf.cast(tf.math.greater(pred, 0.5), tf.dtypes.int64) - accuracy.update_state(labels, actual_pred) - - strategy.run(step_fn, args=(next(iterator),)) - - distributed_dataset = strategy.distribute_datasets_from_function( - dataset_fn) - distributed_iterator = iter(distributed_dataset) - num_epochs = 4 - num_steps = 7 - for _ in range(num_epochs): - accuracy.reset_state() - for _ in range(num_steps): - train_step(distributed_iterator) - - self.assertGreater(accuracy.result().numpy(), 0.5) - self.assertEqual(optimizer.iterations.numpy(), num_epochs * num_steps) - - # Create a saved model. - model.feature_mapper = feature_mapper - model.label_mapper = label_mapper - model.label_inverse_lookup_layer = self.define_inverse_lookup_layer() - - def create_serving_signature(model): + invert=True, + ) + return label_inverse_lookup_layer + + def test_keras_metric_outside_strategy_scope_per_replica(self): + if not tf.compat.v1.executing_eagerly(): + self.skipTest( + "connect_to_cluster() can only be called in eager mode" + ) + strategy = get_tpu_strategy() + metric = tf.keras.metrics.Mean("test_metric", dtype=tf.float32) + + dataset = tf.data.Dataset.range( + strategy.num_replicas_in_sync * 2 + ).batch(2) + dataset = strategy.experimental_distribute_dataset(dataset) @tf.function - def serve_fn(raw_features): - raw_features = tf.expand_dims(raw_features, axis=0) - transformed_features = model.feature_mapper(raw_features) - outputs = model(transformed_features) - outputs = tf.squeeze(outputs, axis=0) - outputs = tf.cast(tf.math.greater(outputs, 0.5), tf.dtypes.int64) - decoded_outputs = model.label_inverse_lookup_layer(outputs) - return tf.squeeze(decoded_outputs, axis=0) - - # Serving does NOT have batch dimension - return serve_fn.get_concrete_function( - tf.TensorSpec(shape=(3), dtype=tf.dtypes.string, name="example")) - - serving_fn = create_serving_signature(model) - - saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - model.save(saved_model_dir, save_format="tf", - signatures={"serving_default": serving_fn}) - - # Test the saved_model. - loaded_serving_fn = tf.keras.models.load_model( - saved_model_dir).signatures["serving_default"] - - # Check model calling with serving signature. - prediction1 = loaded_serving_fn( - tf.constant(["avenger", "ironman", "avenger"]))["output_0"] - self.assertIn(prediction1, ("yes", "no")) - - prediction2 = loaded_serving_fn( - tf.constant(["ironman", "ironman", "unknown"]))["output_0"] - self.assertIn(prediction2, ("yes", "no")) + def step_fn(i): + metric.update_state(i) + + with self.assertRaisesRegex( + ValueError, + "Trying to run metric.update_state in replica context", + ): + with strategy.scope(): + for i in dataset: + strategy.run(step_fn, args=(i,)) + + @tf_test_utils.disable_mlir_bridge( + "TODO(b/168036682): Support dynamic padder" + ) + def test_train_and_serve(self): + if not tf.compat.v1.executing_eagerly(): + self.skipTest( + "connect_to_cluster() can only be called in eager mode" + ) + strategy = get_tpu_strategy() + use_adapt = False + + with strategy.scope(): + feature_mapper, label_mapper = self.define_kpls_for_training( + use_adapt + ) + + def dataset_fn(_): + def feature_and_label_gen(): + # Generator of dataset. + while True: + features = random.sample(FEATURE_VOCAB, 3) + label = ["yes"] if "avenger" in features else ["no"] + yield {"features": features, "label": label} + + raw_dataset = ( + tf.data.Dataset.from_generator( + feature_and_label_gen, + output_signature={ + "features": tf.TensorSpec([3], tf.dtypes.string), + "label": tf.TensorSpec([1], tf.dtypes.string), + }, + ) + .shuffle(100) + .batch(32) + ) + + train_dataset = raw_dataset.map( + lambda x: ( + {"features": feature_mapper(x["features"])}, + label_mapper(x["label"]), + ) + ) + return train_dataset + + # Create the model. The input needs to be compatible with KPLs. + model_input = tf.keras.layers.Input( + shape=(3,), dtype=tf.dtypes.int64, name="model_input" + ) + + # input_dim includes a mask token and an oov token. + emb_output = tf.keras.layers.Embedding( + input_dim=len(FEATURE_VOCAB) + 2, output_dim=20 + )(model_input) + emb_output = tf.math.reduce_mean(emb_output, axis=1) + dense_output = tf.keras.layers.Dense(units=1, activation="sigmoid")( + emb_output + ) + model = tf.keras.Model({"features": model_input}, dense_output) + + optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) + accuracy = tf.keras.metrics.Accuracy() + + @tf.function + def train_step(iterator): + """The step function for one training step.""" + + def step_fn(inputs): + """The computation to run on each TPU device.""" + features, labels = inputs + with tf.GradientTape() as tape: + pred = model(features, training=True) + loss = tf.keras.losses.binary_crossentropy(labels, pred) + loss = tf.nn.compute_average_loss(loss) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + list(zip(grads, model.trainable_variables)) + ) + + actual_pred = tf.cast( + tf.math.greater(pred, 0.5), tf.dtypes.int64 + ) + accuracy.update_state(labels, actual_pred) + + strategy.run(step_fn, args=(next(iterator),)) + + distributed_dataset = strategy.distribute_datasets_from_function( + dataset_fn + ) + distributed_iterator = iter(distributed_dataset) + num_epochs = 4 + num_steps = 7 + for _ in range(num_epochs): + accuracy.reset_state() + for _ in range(num_steps): + train_step(distributed_iterator) + + self.assertGreater(accuracy.result().numpy(), 0.5) + self.assertEqual( + optimizer.iterations.numpy(), num_epochs * num_steps + ) + + # Create a saved model. + model.feature_mapper = feature_mapper + model.label_mapper = label_mapper + model.label_inverse_lookup_layer = ( + self.define_inverse_lookup_layer() + ) + + def create_serving_signature(model): + @tf.function + def serve_fn(raw_features): + raw_features = tf.expand_dims(raw_features, axis=0) + transformed_features = model.feature_mapper(raw_features) + outputs = model(transformed_features) + outputs = tf.squeeze(outputs, axis=0) + outputs = tf.cast( + tf.math.greater(outputs, 0.5), tf.dtypes.int64 + ) + decoded_outputs = model.label_inverse_lookup_layer(outputs) + return tf.squeeze(decoded_outputs, axis=0) + + # Serving does NOT have batch dimension + return serve_fn.get_concrete_function( + tf.TensorSpec( + shape=(3), dtype=tf.dtypes.string, name="example" + ) + ) + + serving_fn = create_serving_signature(model) + + saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + model.save( + saved_model_dir, + save_format="tf", + signatures={"serving_default": serving_fn}, + ) + + # Test the saved_model. + loaded_serving_fn = tf.keras.models.load_model( + saved_model_dir + ).signatures["serving_default"] + + # Check model calling with serving signature. + prediction1 = loaded_serving_fn( + tf.constant(["avenger", "ironman", "avenger"]) + )["output_0"] + self.assertIn(prediction1, ("yes", "no")) + + prediction2 = loaded_serving_fn( + tf.constant(["ironman", "ironman", "unknown"]) + )["output_0"] + self.assertIn(prediction2, ("yes", "no")) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/integration_test/vectorized_map_test.py b/keras/integration_test/vectorized_map_test.py index 15c50caea397..5b215280b221 100644 --- a/keras/integration_test/vectorized_map_test.py +++ b/keras/integration_test/vectorized_map_test.py @@ -17,28 +17,28 @@ class VectorizedMapTest(tf.test.TestCase): - - def test_vectorized_map(self): - batch_size = 10 - num_features = 32 - layer = tf.keras.layers.Dense(1) - - def model_fn(arg): - with tf.GradientTape() as g: - inp, label = arg - inp = tf.expand_dims(inp, 0) - label = tf.expand_dims(label, 0) - prediction = layer(inp) - loss = tf.nn.l2_loss(label - prediction) - return g.gradient(loss, (layer.kernel, layer.bias)) - - inputs = tf.random.uniform([batch_size, num_features]) - labels = tf.random.uniform([batch_size, 1]) - per_example_gradients = tf.vectorized_map(model_fn, (inputs, labels)) - self.assertEqual(per_example_gradients[0].shape, - (batch_size, num_features, 1)) - self.assertEqual(per_example_gradients[1].shape, (batch_size, 1)) + def test_vectorized_map(self): + batch_size = 10 + num_features = 32 + layer = tf.keras.layers.Dense(1) + + def model_fn(arg): + with tf.GradientTape() as g: + inp, label = arg + inp = tf.expand_dims(inp, 0) + label = tf.expand_dims(label, 0) + prediction = layer(inp) + loss = tf.nn.l2_loss(label - prediction) + return g.gradient(loss, (layer.kernel, layer.bias)) + + inputs = tf.random.uniform([batch_size, num_features]) + labels = tf.random.uniform([batch_size, 1]) + per_example_gradients = tf.vectorized_map(model_fn, (inputs, labels)) + self.assertEqual( + per_example_gradients[0].shape, (batch_size, num_features, 1) + ) + self.assertEqual(per_example_gradients[1].shape, (batch_size, 1)) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/keras.bzl b/keras/keras.bzl index cbabaf8779ae..4a787d7b9901 100644 --- a/keras/keras.bzl +++ b/keras/keras.bzl @@ -1,5 +1,7 @@ """Keras common starlark macros.""" +# Placeholder: load aliased py_test + # Macro to run Keras py_tests against pip installation. def py_test(deps = [], data = [], kernels = [], **kwargs): native.py_test( @@ -152,3 +154,13 @@ def distribute_py_test( args = args, **kwargs ) + +# We are never indexing generated code in the OSS build, but still +# return a select() for consistency. +def if_indexing_source_code( + if_true, # @unused + if_false): + """Return a select() on whether or not we are building for source code indexing.""" + return select({ + "//conditions:default": if_false, + }) diff --git a/keras/kokoro/github/ubuntu/cpu/build.sh b/keras/kokoro/github/ubuntu/cpu/build.sh index c88a25605b3a..a826667f2eb7 100644 --- a/keras/kokoro/github/ubuntu/cpu/build.sh +++ b/keras/kokoro/github/ubuntu/cpu/build.sh @@ -43,6 +43,6 @@ pip uninstall -y keras-nightly bazel test --test_timeout 300,450,1200,3600 --test_output=errors --keep_going \ --define=use_fast_cpp_protos=false \ --build_tests_only \ - --build_tag_filters="-no_oss" \ - --test_tag_filters="-no_oss" \ + --build_tag_filters="-no_oss,-oss_excluded" \ + --test_tag_filters="-no_oss,-oss_excluded" \ -- //keras/... diff --git a/keras/kokoro/github/ubuntu/gpu/build.sh b/keras/kokoro/github/ubuntu/gpu/build.sh index 0095d639bb61..cc7f23bc81dc 100644 --- a/keras/kokoro/github/ubuntu/gpu/build.sh +++ b/keras/kokoro/github/ubuntu/gpu/build.sh @@ -38,11 +38,14 @@ pip install -r requirements.txt # keras code from local workspace. pip uninstall -y keras-nightly -export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" -export TF_CUDA_COMPUTE_CAPABILITIES=6.0 -TF_CUDA_CONFIG_REPO="@ubuntu16.04-py3-gcc7_manylinux2010-cuda10.1-cudnn7-tensorrt6.0_config_cuda" +# LD Library Path needs to be same as TensorFlow Ubuntu Docker build - +# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/tf_sig_build_dockerfiles/ +export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/tensorrt/lib" +CUDA_TOOLKIT_PATH="/usr/local/cuda-11.8" +TF_CUDA_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda11.8-cudnn8.6-tensorrt8.4_config_cuda" +TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80" -tag_filters="gpu,-no_gpu,-nogpu,-benchmark-test,-no_oss,-oss_serial,-no_gpu_presubmit" +tag_filters="gpu,-no_gpu,-nogpu,-benchmark-test,-no_oss,-oss_excluded,-oss_serial,-no_gpu_presubmit" # There are only 4 GPU available on the local test machine. TF_GPU_COUNT=4 TF_TESTS_PER_GPU=8 @@ -55,13 +58,13 @@ bazel test --test_timeout 300,600,1200,3600 --test_output=errors --keep_going \ --build_tests_only \ --action_env=TF_CUDA_COMPUTE_CAPABILITIES="${TF_CUDA_COMPUTE_CAPABILITIES}" \ --action_env=TF_CUDA_CONFIG_REPO="${TF_CUDA_CONFIG_REPO}" \ - --action_env=TF_CUDA_VERSION=10 \ - --action_env=TF_CUDNN_VERSION=7 \ + --action_env=TF_CUDA_VERSION=11 \ + --action_env=TF_CUDNN_VERSION=8 \ + --action_env=CUDA_TOOLKIT_PATH="${CUDA_TOOLKIT_PATH}" \ --test_env=TF_GPU_COUNT=${TF_GPU_COUNT} \ --test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} \ --build_tag_filters="${tag_filters}" \ --test_tag_filters="${tag_filters}" \ --run_under=@org_keras//keras/tools/gpu_build:parallel_gpu_execute \ --local_test_jobs=${LOCAL_TEST_JOBS} \ - --nodistinct_host_configuration \ -- //keras/... diff --git a/keras/layers/BUILD b/keras/layers/BUILD index 9d37404575d3..4c48d7e57c09 100644 --- a/keras/layers/BUILD +++ b/keras/layers/BUILD @@ -1,15 +1,17 @@ # Description: # Contains the Keras layers (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove non-keras deps from TF. default_visibility = [ "//keras:friends", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", ], licenses = ["notice"], diff --git a/keras/layers/__init__.py b/keras/layers/__init__.py index 3fc21041b185..6812e92aa4ec 100644 --- a/keras/layers/__init__.py +++ b/keras/layers/__init__.py @@ -14,106 +14,57 @@ # ============================================================================== """Keras layers API.""" +# isort: off import tensorflow.compat.v2 as tf -# pylint: disable=g-bad-import-order,g-direct-tensorflow-import,disable=g-import-not-at-top -from tensorflow.python import tf2 +from keras.engine.base_layer import Layer +from keras.engine.base_preprocessing_layer import PreprocessingLayer # Generic layers. from keras.engine.input_layer import Input from keras.engine.input_layer import InputLayer from keras.engine.input_spec import InputSpec -from keras.engine.base_layer import Layer -from keras.engine.base_preprocessing_layer import PreprocessingLayer - -# Image preprocessing layers. -from keras.layers.preprocessing.image_preprocessing import CenterCrop -from keras.layers.preprocessing.image_preprocessing import RandomCrop -from keras.layers.preprocessing.image_preprocessing import RandomFlip -from keras.layers.preprocessing.image_preprocessing import RandomContrast -from keras.layers.preprocessing.image_preprocessing import RandomHeight -from keras.layers.preprocessing.image_preprocessing import RandomRotation -from keras.layers.preprocessing.image_preprocessing import RandomTranslation -from keras.layers.preprocessing.image_preprocessing import RandomWidth -from keras.layers.preprocessing.image_preprocessing import RandomZoom -from keras.layers.preprocessing.image_preprocessing import Resizing -from keras.layers.preprocessing.image_preprocessing import Rescaling - -# Preprocessing layers. -from keras.layers.preprocessing.category_encoding import CategoryEncoding -from keras.layers.preprocessing.discretization import Discretization -from keras.layers.preprocessing.hashing import Hashing -from keras.layers.preprocessing.hashed_crossing import HashedCrossing -from keras.layers.preprocessing.integer_lookup import IntegerLookup -from keras.layers.preprocessing.normalization import Normalization -from keras.layers.preprocessing.string_lookup import StringLookup -from keras.layers.preprocessing.text_vectorization import TextVectorization +from keras.layers.activation.elu import ELU +from keras.layers.activation.leaky_relu import LeakyReLU +from keras.layers.activation.prelu import PReLU # Activations layers. from keras.layers.activation.relu import ReLU from keras.layers.activation.softmax import Softmax -from keras.layers.activation.leaky_relu import LeakyReLU -from keras.layers.activation.prelu import PReLU -from keras.layers.activation.elu import ELU from keras.layers.activation.thresholded_relu import ThresholdedReLU +from keras.layers.attention.additive_attention import AdditiveAttention +from keras.layers.attention.attention import Attention # Attention layers. from keras.layers.attention.multi_head_attention import MultiHeadAttention -from keras.layers.attention.attention import Attention -from keras.layers.attention.additive_attention import AdditiveAttention +# Convolution layer aliases. # Convolution layers. from keras.layers.convolutional.conv1d import Conv1D -from keras.layers.convolutional.conv2d import Conv2D -from keras.layers.convolutional.conv3d import Conv3D +from keras.layers.convolutional.conv1d import Convolution1D from keras.layers.convolutional.conv1d_transpose import Conv1DTranspose +from keras.layers.convolutional.conv1d_transpose import Convolution1DTranspose +from keras.layers.convolutional.conv2d import Conv2D +from keras.layers.convolutional.conv2d import Convolution2D from keras.layers.convolutional.conv2d_transpose import Conv2DTranspose +from keras.layers.convolutional.conv2d_transpose import Convolution2DTranspose +from keras.layers.convolutional.conv3d import Conv3D +from keras.layers.convolutional.conv3d import Convolution3D from keras.layers.convolutional.conv3d_transpose import Conv3DTranspose +from keras.layers.convolutional.conv3d_transpose import Convolution3DTranspose from keras.layers.convolutional.depthwise_conv1d import DepthwiseConv1D from keras.layers.convolutional.depthwise_conv2d import DepthwiseConv2D from keras.layers.convolutional.separable_conv1d import SeparableConv1D -from keras.layers.convolutional.separable_conv2d import SeparableConv2D - -# Convolution layer aliases. -from keras.layers.convolutional.conv1d import Convolution1D -from keras.layers.convolutional.conv2d import Convolution2D -from keras.layers.convolutional.conv3d import Convolution3D -from keras.layers.convolutional.conv1d_transpose import Convolution1DTranspose -from keras.layers.convolutional.conv2d_transpose import Convolution2DTranspose -from keras.layers.convolutional.conv3d_transpose import Convolution3DTranspose from keras.layers.convolutional.separable_conv1d import SeparableConvolution1D +from keras.layers.convolutional.separable_conv2d import SeparableConv2D from keras.layers.convolutional.separable_conv2d import SeparableConvolution2D -# Regularization layers. -from keras.layers.regularization.dropout import Dropout -from keras.layers.regularization.spatial_dropout1d import SpatialDropout1D -from keras.layers.regularization.spatial_dropout2d import SpatialDropout2D -from keras.layers.regularization.spatial_dropout3d import SpatialDropout3D -from keras.layers.regularization.gaussian_dropout import GaussianDropout -from keras.layers.regularization.gaussian_noise import GaussianNoise -from keras.layers.regularization.activity_regularization import ActivityRegularization -from keras.layers.regularization.alpha_dropout import AlphaDropout - -# Reshaping layers. -from keras.layers.reshaping.cropping1d import Cropping1D -from keras.layers.reshaping.cropping2d import Cropping2D -from keras.layers.reshaping.cropping3d import Cropping3D -from keras.layers.reshaping.flatten import Flatten -from keras.layers.reshaping.permute import Permute -from keras.layers.reshaping.repeat_vector import RepeatVector -from keras.layers.reshaping.reshape import Reshape -from keras.layers.reshaping.up_sampling1d import UpSampling1D -from keras.layers.reshaping.up_sampling2d import UpSampling2D -from keras.layers.reshaping.up_sampling3d import UpSampling3D -from keras.layers.reshaping.zero_padding1d import ZeroPadding1D -from keras.layers.reshaping.zero_padding2d import ZeroPadding2D -from keras.layers.reshaping.zero_padding3d import ZeroPadding3D - # Core layers. from keras.layers.core.activation import Activation from keras.layers.core.dense import Dense from keras.layers.core.einsum_dense import EinsumDense from keras.layers.core.embedding import Embedding +from keras.layers.core.identity import Identity from keras.layers.core.lambda_layer import Lambda from keras.layers.core.masking import Masking from keras.layers.core.tf_op_layer import ClassMethod @@ -123,148 +74,219 @@ from keras.layers.core.tf_op_layer import TFOpLambda # Locally-connected layers. -from keras.layers.locally_connected.locally_connected1d import LocallyConnected1D -from keras.layers.locally_connected.locally_connected2d import LocallyConnected2D +from keras.layers.locally_connected.locally_connected1d import ( + LocallyConnected1D, +) +from keras.layers.locally_connected.locally_connected2d import ( + LocallyConnected2D, +) +# Merging functions. # Merging layers. from keras.layers.merging.add import Add -from keras.layers.merging.subtract import Subtract -from keras.layers.merging.multiply import Multiply +from keras.layers.merging.add import add from keras.layers.merging.average import Average -from keras.layers.merging.maximum import Maximum -from keras.layers.merging.minimum import Minimum +from keras.layers.merging.average import average from keras.layers.merging.concatenate import Concatenate +from keras.layers.merging.concatenate import concatenate from keras.layers.merging.dot import Dot - -# Merging functions. -from keras.layers.merging.add import add -from keras.layers.merging.subtract import subtract -from keras.layers.merging.multiply import multiply -from keras.layers.merging.average import average +from keras.layers.merging.dot import dot +from keras.layers.merging.maximum import Maximum from keras.layers.merging.maximum import maximum +from keras.layers.merging.minimum import Minimum from keras.layers.merging.minimum import minimum -from keras.layers.merging.concatenate import concatenate -from keras.layers.merging.dot import dot +from keras.layers.merging.multiply import Multiply +from keras.layers.merging.multiply import multiply +from keras.layers.merging.subtract import Subtract +from keras.layers.merging.subtract import subtract +from keras.layers.normalization.batch_normalization import ( + SyncBatchNormalization, +) # Normalization layers. +from keras.layers.normalization.group_normalization import GroupNormalization from keras.layers.normalization.layer_normalization import LayerNormalization -from keras.layers.normalization.batch_normalization import SyncBatchNormalization from keras.layers.normalization.unit_normalization import UnitNormalization +from keras.layers.normalization.spectral_normalization import ( + SpectralNormalization, +) # noqa: E501 + +# Preprocessing layers. +from keras.layers.preprocessing.category_encoding import CategoryEncoding +from keras.layers.preprocessing.discretization import Discretization +from keras.layers.preprocessing.hashed_crossing import HashedCrossing +from keras.layers.preprocessing.hashing import Hashing + +# Image preprocessing layers. +from keras.layers.preprocessing.image_preprocessing import CenterCrop +from keras.layers.preprocessing.image_preprocessing import RandomBrightness +from keras.layers.preprocessing.image_preprocessing import RandomContrast +from keras.layers.preprocessing.image_preprocessing import RandomCrop +from keras.layers.preprocessing.image_preprocessing import RandomFlip +from keras.layers.preprocessing.image_preprocessing import RandomHeight +from keras.layers.preprocessing.image_preprocessing import RandomRotation +from keras.layers.preprocessing.image_preprocessing import RandomTranslation +from keras.layers.preprocessing.image_preprocessing import RandomWidth +from keras.layers.preprocessing.image_preprocessing import RandomZoom +from keras.layers.preprocessing.image_preprocessing import Rescaling +from keras.layers.preprocessing.image_preprocessing import Resizing +from keras.layers.preprocessing.integer_lookup import IntegerLookup +from keras.layers.preprocessing.normalization import Normalization +from keras.layers.preprocessing.string_lookup import StringLookup +from keras.layers.preprocessing.text_vectorization import TextVectorization +from keras.layers.regularization.activity_regularization import ( + ActivityRegularization, +) +from keras.layers.regularization.alpha_dropout import AlphaDropout + +# Regularization layers. +from keras.layers.regularization.dropout import Dropout +from keras.layers.regularization.gaussian_dropout import GaussianDropout +from keras.layers.regularization.gaussian_noise import GaussianNoise +from keras.layers.regularization.spatial_dropout1d import SpatialDropout1D +from keras.layers.regularization.spatial_dropout2d import SpatialDropout2D +from keras.layers.regularization.spatial_dropout3d import SpatialDropout3D + +# Reshaping layers. +from keras.layers.reshaping.cropping1d import Cropping1D +from keras.layers.reshaping.cropping2d import Cropping2D +from keras.layers.reshaping.cropping3d import Cropping3D +from keras.layers.reshaping.flatten import Flatten +from keras.layers.reshaping.permute import Permute +from keras.layers.reshaping.repeat_vector import RepeatVector +from keras.layers.reshaping.reshape import Reshape +from keras.layers.reshaping.up_sampling1d import UpSampling1D +from keras.layers.reshaping.up_sampling2d import UpSampling2D +from keras.layers.reshaping.up_sampling3d import UpSampling3D +from keras.layers.reshaping.zero_padding1d import ZeroPadding1D +from keras.layers.reshaping.zero_padding2d import ZeroPadding2D +from keras.layers.reshaping.zero_padding3d import ZeroPadding3D if tf.__internal__.tf2.enabled(): - from keras.layers.normalization.batch_normalization import BatchNormalization - from keras.layers.normalization.batch_normalization_v1 import BatchNormalization as BatchNormalizationV1 - BatchNormalizationV2 = BatchNormalization + from keras.layers.normalization.batch_normalization import ( + BatchNormalization, + ) + from keras.layers.normalization.batch_normalization_v1 import ( + BatchNormalization as BatchNormalizationV1, + ) + + BatchNormalizationV2 = BatchNormalization else: - from keras.layers.normalization.batch_normalization_v1 import BatchNormalization - from keras.layers.normalization.batch_normalization import BatchNormalization as BatchNormalizationV2 - BatchNormalizationV1 = BatchNormalization + from keras.layers.normalization.batch_normalization import ( + BatchNormalization as BatchNormalizationV2, + ) + from keras.layers.normalization.batch_normalization_v1 import ( + BatchNormalization, + ) + + BatchNormalizationV1 = BatchNormalization # Kernelized layers. from keras.layers.kernelized import RandomFourierFeatures +# Pooling layer aliases. # Pooling layers. from keras.layers.pooling.average_pooling1d import AveragePooling1D +from keras.layers.pooling.average_pooling1d import AvgPool1D from keras.layers.pooling.average_pooling2d import AveragePooling2D +from keras.layers.pooling.average_pooling2d import AvgPool2D from keras.layers.pooling.average_pooling3d import AveragePooling3D -from keras.layers.pooling.max_pooling1d import MaxPooling1D -from keras.layers.pooling.max_pooling2d import MaxPooling2D -from keras.layers.pooling.max_pooling3d import MaxPooling3D +from keras.layers.pooling.average_pooling3d import AvgPool3D from keras.layers.pooling.global_average_pooling1d import GlobalAveragePooling1D +from keras.layers.pooling.global_average_pooling1d import GlobalAvgPool1D from keras.layers.pooling.global_average_pooling2d import GlobalAveragePooling2D +from keras.layers.pooling.global_average_pooling2d import GlobalAvgPool2D from keras.layers.pooling.global_average_pooling3d import GlobalAveragePooling3D +from keras.layers.pooling.global_average_pooling3d import GlobalAvgPool3D +from keras.layers.pooling.global_max_pooling1d import GlobalMaxPool1D from keras.layers.pooling.global_max_pooling1d import GlobalMaxPooling1D +from keras.layers.pooling.global_max_pooling2d import GlobalMaxPool2D from keras.layers.pooling.global_max_pooling2d import GlobalMaxPooling2D +from keras.layers.pooling.global_max_pooling3d import GlobalMaxPool3D from keras.layers.pooling.global_max_pooling3d import GlobalMaxPooling3D - -# Pooling layer aliases. -from keras.layers.pooling.average_pooling1d import AvgPool1D -from keras.layers.pooling.average_pooling2d import AvgPool2D -from keras.layers.pooling.average_pooling3d import AvgPool3D from keras.layers.pooling.max_pooling1d import MaxPool1D +from keras.layers.pooling.max_pooling1d import MaxPooling1D from keras.layers.pooling.max_pooling2d import MaxPool2D +from keras.layers.pooling.max_pooling2d import MaxPooling2D from keras.layers.pooling.max_pooling3d import MaxPool3D -from keras.layers.pooling.global_average_pooling1d import GlobalAvgPool1D -from keras.layers.pooling.global_average_pooling2d import GlobalAvgPool2D -from keras.layers.pooling.global_average_pooling3d import GlobalAvgPool3D -from keras.layers.pooling.global_max_pooling1d import GlobalMaxPool1D -from keras.layers.pooling.global_max_pooling2d import GlobalMaxPool2D -from keras.layers.pooling.global_max_pooling3d import GlobalMaxPool3D +from keras.layers.pooling.max_pooling3d import MaxPooling3D +from keras.layers.rnn.abstract_rnn_cell import AbstractRNNCell # Recurrent layers. from keras.layers.rnn.base_rnn import RNN -from keras.layers.rnn.abstract_rnn_cell import AbstractRNNCell -from keras.layers.rnn.stacked_rnn_cells import StackedRNNCells -from keras.layers.rnn.simple_rnn import SimpleRNNCell from keras.layers.rnn.simple_rnn import SimpleRNN +from keras.layers.rnn.simple_rnn import SimpleRNNCell +from keras.layers.rnn.stacked_rnn_cells import StackedRNNCells if tf.__internal__.tf2.enabled(): - from keras.layers.rnn.gru import GRU - from keras.layers.rnn.gru import GRUCell - from keras.layers.rnn.lstm import LSTM - from keras.layers.rnn.lstm import LSTMCell - from keras.layers.rnn.gru_v1 import GRU as GRUV1 - from keras.layers.rnn.gru_v1 import GRUCell as GRUCellV1 - from keras.layers.rnn.lstm_v1 import LSTM as LSTMV1 - from keras.layers.rnn.lstm_v1 import LSTMCell as LSTMCellV1 - GRUV2 = GRU - GRUCellV2 = GRUCell - LSTMV2 = LSTM - LSTMCellV2 = LSTMCell + from keras.layers.rnn.gru import GRU + from keras.layers.rnn.gru import GRUCell + from keras.layers.rnn.gru_v1 import GRU as GRUV1 + from keras.layers.rnn.gru_v1 import GRUCell as GRUCellV1 + from keras.layers.rnn.lstm import LSTM + from keras.layers.rnn.lstm import LSTMCell + from keras.layers.rnn.lstm_v1 import LSTM as LSTMV1 + from keras.layers.rnn.lstm_v1 import LSTMCell as LSTMCellV1 + + GRUV2 = GRU + GRUCellV2 = GRUCell + LSTMV2 = LSTM + LSTMCellV2 = LSTMCell else: - from keras.layers.rnn.gru_v1 import GRU - from keras.layers.rnn.gru_v1 import GRUCell - from keras.layers.rnn.lstm_v1 import LSTM - from keras.layers.rnn.lstm_v1 import LSTMCell - from keras.layers.rnn.gru import GRU as GRUV2 - from keras.layers.rnn.gru import GRUCell as GRUCellV2 - from keras.layers.rnn.lstm import LSTM as LSTMV2 - from keras.layers.rnn.lstm import LSTMCell as LSTMCellV2 - GRUV1 = GRU - GRUCellV1 = GRUCell - LSTMV1 = LSTM - LSTMCellV1 = LSTMCell + from keras.layers.rnn.gru import GRU as GRUV2 + from keras.layers.rnn.gru import GRUCell as GRUCellV2 + from keras.layers.rnn.gru_v1 import GRU + from keras.layers.rnn.gru_v1 import GRUCell + from keras.layers.rnn.lstm import LSTM as LSTMV2 + from keras.layers.rnn.lstm import LSTMCell as LSTMCellV2 + from keras.layers.rnn.lstm_v1 import LSTM + from keras.layers.rnn.lstm_v1 import LSTMCell -# Convolutional-recurrent layers. -from keras.layers.rnn.conv_lstm1d import ConvLSTM1D -from keras.layers.rnn.conv_lstm2d import ConvLSTM2D -from keras.layers.rnn.conv_lstm3d import ConvLSTM3D + GRUV1 = GRU + GRUCellV1 = GRUCell + LSTMV1 = LSTM + LSTMCellV1 = LSTMCell -# cuDNN recurrent layers. -from keras.layers.rnn.cudnn_lstm import CuDNNLSTM -from keras.layers.rnn.cudnn_gru import CuDNNGRU +# Serialization functions. +from keras.layers import serialization # Wrapper functions. from keras.layers.rnn.base_wrapper import Wrapper from keras.layers.rnn.bidirectional import Bidirectional -from keras.layers.rnn.time_distributed import TimeDistributed # RNN Cell wrappers. from keras.layers.rnn.cell_wrappers import DeviceWrapper from keras.layers.rnn.cell_wrappers import DropoutWrapper from keras.layers.rnn.cell_wrappers import ResidualWrapper -# Serialization functions. -from keras.layers import serialization +# Convolutional-recurrent layers. +from keras.layers.rnn.conv_lstm1d import ConvLSTM1D +from keras.layers.rnn.conv_lstm2d import ConvLSTM2D +from keras.layers.rnn.conv_lstm3d import ConvLSTM3D +from keras.layers.rnn.cudnn_gru import CuDNNGRU + +# cuDNN recurrent layers. +from keras.layers.rnn.cudnn_lstm import CuDNNLSTM +from keras.layers.rnn.time_distributed import TimeDistributed from keras.layers.serialization import deserialize from keras.layers.serialization import deserialize_from_json -from keras.layers.serialization import serialize from keras.layers.serialization import get_builtin_layer +from keras.layers.serialization import serialize class VersionAwareLayers: - """Utility to be used internally to access layers in a V1/V2-aware fashion. - - When using layers within the Keras codebase, under the constraint that - e.g. `layers.BatchNormalization` should be the `BatchNormalization` version - corresponding to the current runtime (TF1 or TF2), do not simply access - `layers.BatchNormalization` since it would ignore e.g. an early - `compat.v2.disable_v2_behavior()` call. Instead, use an instance - of `VersionAwareLayers` (which you can use just like the `layers` module). - """ - - def __getattr__(self, name): - serialization.populate_deserializable_objects() - if name in serialization.LOCAL.ALL_OBJECTS: - return serialization.LOCAL.ALL_OBJECTS[name] - return super().__getattr__(name) + """Utility to be used internally to access layers in a V1/V2-aware fashion. + + When using layers within the Keras codebase, under the constraint that + e.g. `layers.BatchNormalization` should be the `BatchNormalization` version + corresponding to the current runtime (TF1 or TF2), do not simply access + `layers.BatchNormalization` since it would ignore e.g. an early + `compat.v2.disable_v2_behavior()` call. Instead, use an instance + of `VersionAwareLayers` (which you can use just like the `layers` module). + """ + + def __getattr__(self, name): + serialization.populate_deserializable_objects() + if name in serialization.LOCAL.ALL_OBJECTS: + return serialization.LOCAL.ALL_OBJECTS[name] + return super().__getattr__(name) diff --git a/keras/layers/activation/BUILD b/keras/layers/activation/BUILD index 8ca482de7223..2b81f4897a5f 100644 --- a/keras/layers/activation/BUILD +++ b/keras/layers/activation/BUILD @@ -1,9 +1,11 @@ # Description: # Contains the Keras activation layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], diff --git a/keras/layers/activation/__init__.py b/keras/layers/activation/__init__.py index c39011ade3ea..f571762759e4 100644 --- a/keras/layers/activation/__init__.py +++ b/keras/layers/activation/__init__.py @@ -13,11 +13,11 @@ # limitations under the License. # ============================================================================== """Layers that act as activation functions.""" -# pylint: disable=g-bad-import-order -from keras.layers.activation.relu import ReLU -from keras.layers.activation.softmax import Softmax + +from keras.layers.activation.elu import ELU from keras.layers.activation.leaky_relu import LeakyReLU from keras.layers.activation.prelu import PReLU -from keras.layers.activation.elu import ELU +from keras.layers.activation.relu import ReLU +from keras.layers.activation.softmax import Softmax from keras.layers.activation.thresholded_relu import ThresholdedReLU diff --git a/keras/layers/activation/elu.py b/keras/layers/activation/elu.py index 598313325808..8bba10fb7080 100644 --- a/keras/layers/activation/elu.py +++ b/keras/layers/activation/elu.py @@ -13,55 +13,57 @@ # limitations under the License. # ============================================================================== """Exponential Linear Unit activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.engine.base_layer import Layer from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ELU') +@keras_export("keras.layers.ELU") class ELU(Layer): - """Exponential Linear Unit. + """Exponential Linear Unit. - It follows: + It follows: - ``` - f(x) = alpha * (exp(x) - 1.) for x < 0 - f(x) = x for x >= 0 - ``` + ``` + f(x) = alpha * (exp(x) - 1.) for x < 0 + f(x) = x for x >= 0 + ``` - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. - Output shape: - Same shape as the input. + Output shape: + Same shape as the input. - Args: - alpha: Scale for the negative factor. - """ + Args: + alpha: Scale for the negative factor. + """ - def __init__(self, alpha=1.0, **kwargs): - super().__init__(**kwargs) - if alpha is None: - raise ValueError( - 'Alpha of an ELU layer cannot be None, expecting a float. ' - f'Received: {alpha}') - self.supports_masking = True - self.alpha = backend.cast_to_floatx(alpha) + def __init__(self, alpha=1.0, **kwargs): + super().__init__(**kwargs) + if alpha is None: + raise ValueError( + "Alpha of an ELU layer cannot be None, expecting a float. " + f"Received: {alpha}" + ) + self.supports_masking = True + self.alpha = backend.cast_to_floatx(alpha) - def call(self, inputs): - return backend.elu(inputs, self.alpha) + def call(self, inputs): + return backend.elu(inputs, self.alpha) - def get_config(self): - config = {'alpha': float(self.alpha)} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"alpha": float(self.alpha)} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/activation/elu_test.py b/keras/layers/activation/elu_test.py index 14cf9cc53e69..63f20d12b8e4 100644 --- a/keras/layers/activation/elu_test.py +++ b/keras/layers/activation/elu_test.py @@ -14,33 +14,38 @@ # ============================================================================== """Tests for ELU layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class ELUTest(test_combinations.TestCase): + def test_elu(self): + for alpha in [0.0, 0.5, -1.0]: + test_utils.layer_test( + keras.layers.ELU, + kwargs={"alpha": alpha}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - def test_elu(self): - for alpha in [0., .5, -1.]: - test_utils.layer_test(keras.layers.ELU, - kwargs={'alpha': alpha}, - input_shape=(2, 3, 4), - supports_masking=True) - - def test_elu_with_invalid_alpha(self): - # Test case for GitHub issue 46993. - with self.assertRaisesRegex( - ValueError, 'Alpha of an ELU layer cannot be None, ' - 'expecting a float. Received: None'): - test_utils.layer_test( - keras.layers.ELU, - kwargs={'alpha': None}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_elu_with_invalid_alpha(self): + # Test case for GitHub issue 46993. + with self.assertRaisesRegex( + ValueError, + "Alpha of an ELU layer cannot be None, " + "expecting a float. Received: None", + ): + test_utils.layer_test( + keras.layers.ELU, + kwargs={"alpha": None}, + input_shape=(2, 3, 4), + supports_masking=True, + ) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/activation/leaky_relu.py b/keras/layers/activation/leaky_relu.py index 4c382dea76be..fa3e373d734c 100644 --- a/keras/layers/activation/leaky_relu.py +++ b/keras/layers/activation/leaky_relu.py @@ -13,67 +13,69 @@ # limitations under the License. # ============================================================================== """Leaky version of a Rectified Linear Unit activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.engine.base_layer import Layer from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.LeakyReLU') +@keras_export("keras.layers.LeakyReLU") class LeakyReLU(Layer): - """Leaky version of a Rectified Linear Unit. - - It allows a small gradient when the unit is not active: - - ``` - f(x) = alpha * x if x < 0 - f(x) = x if x >= 0 - ``` - - Usage: - - >>> layer = tf.keras.layers.LeakyReLU() - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [-0.9, -0.3, 0.0, 2.0] - >>> layer = tf.keras.layers.LeakyReLU(alpha=0.1) - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [-0.3, -0.1, 0.0, 2.0] - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the batch axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as the input. - - Args: - alpha: Float >= 0. Negative slope coefficient. Default to 0.3. - - """ - - def __init__(self, alpha=0.3, **kwargs): - super().__init__(**kwargs) - if alpha is None: - raise ValueError( - 'The alpha value of a Leaky ReLU layer cannot be None, ' - f'Expecting a float. Received: {alpha}') - self.supports_masking = True - self.alpha = backend.cast_to_floatx(alpha) - - def call(self, inputs): - return backend.relu(inputs, alpha=self.alpha) - - def get_config(self): - config = {'alpha': float(self.alpha)} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Leaky version of a Rectified Linear Unit. + + It allows a small gradient when the unit is not active: + + ``` + f(x) = alpha * x if x < 0 + f(x) = x if x >= 0 + ``` + + Usage: + + >>> layer = tf.keras.layers.LeakyReLU() + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [-0.9, -0.3, 0.0, 2.0] + >>> layer = tf.keras.layers.LeakyReLU(alpha=0.1) + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [-0.3, -0.1, 0.0, 2.0] + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the batch axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as the input. + + Args: + alpha: Float >= `0.`. Negative slope coefficient. Defaults to `0.3`. + + """ + + def __init__(self, alpha=0.3, **kwargs): + super().__init__(**kwargs) + if alpha is None: + raise ValueError( + "The alpha value of a Leaky ReLU layer cannot be None, " + f"Expecting a float. Received: {alpha}" + ) + self.supports_masking = True + self.alpha = backend.cast_to_floatx(alpha) + + def call(self, inputs): + return backend.relu(inputs, alpha=self.alpha) + + def get_config(self): + config = {"alpha": float(self.alpha)} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/activation/leaky_relu_test.py b/keras/layers/activation/leaky_relu_test.py index 9cbbc809b7fe..13d25699b3c3 100644 --- a/keras/layers/activation/leaky_relu_test.py +++ b/keras/layers/activation/leaky_relu_test.py @@ -14,33 +14,38 @@ # ============================================================================== """Tests for LeakyReLU layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class LeakyReLUTest(test_combinations.TestCase): + def test_leaky_relu(self): + for alpha in [0.0, 0.5]: + test_utils.layer_test( + keras.layers.LeakyReLU, + kwargs={"alpha": alpha}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - def test_leaky_relu(self): - for alpha in [0., .5]: - test_utils.layer_test(keras.layers.LeakyReLU, - kwargs={'alpha': alpha}, - input_shape=(2, 3, 4), - supports_masking=True) - - def test_leaky_relu_with_invalid_alpha(self): - # Test case for GitHub issue 46993. - with self.assertRaisesRegex( - ValueError, 'The alpha value of a Leaky ReLU layer ' - 'cannot be None. Expecting a float. Received: None'): - test_utils.layer_test( - keras.layers.LeakyReLU, - kwargs={'alpha': None}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_leaky_relu_with_invalid_alpha(self): + # Test case for GitHub issue 46993. + with self.assertRaisesRegex( + ValueError, + "The alpha value of a Leaky ReLU layer " + "cannot be None. Expecting a float. Received: None", + ): + test_utils.layer_test( + keras.layers.LeakyReLU, + kwargs={"alpha": None}, + input_shape=(2, 3, 4), + supports_masking=True, + ) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/activation/prelu.py b/keras/layers/activation/prelu.py index 94b1738e7c6a..09164599df54 100644 --- a/keras/layers/activation/prelu.py +++ b/keras/layers/activation/prelu.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Parametric Rectified Linear Unit activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras import constraints @@ -23,98 +23,102 @@ from keras.engine.input_spec import InputSpec from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.PReLU') +@keras_export("keras.layers.PReLU") class PReLU(Layer): - """Parametric Rectified Linear Unit. - - It follows: - - ``` - f(x) = alpha * x for x < 0 - f(x) = x for x >= 0 - ``` - - where `alpha` is a learned array with the same shape as x. - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as the input. - - Args: - alpha_initializer: Initializer function for the weights. - alpha_regularizer: Regularizer for the weights. - alpha_constraint: Constraint for the weights. - shared_axes: The axes along which to share learnable - parameters for the activation function. - For example, if the incoming feature maps - are from a 2D convolution - with output shape `(batch, height, width, channels)`, - and you wish to share parameters across space - so that each filter only has one set of parameters, - set `shared_axes=[1, 2]`. - """ - - def __init__(self, - alpha_initializer='zeros', - alpha_regularizer=None, - alpha_constraint=None, - shared_axes=None, - **kwargs): - super().__init__(**kwargs) - self.supports_masking = True - self.alpha_initializer = initializers.get(alpha_initializer) - self.alpha_regularizer = regularizers.get(alpha_regularizer) - self.alpha_constraint = constraints.get(alpha_constraint) - if shared_axes is None: - self.shared_axes = None - elif not isinstance(shared_axes, (list, tuple)): - self.shared_axes = [shared_axes] - else: - self.shared_axes = list(shared_axes) - - @tf_utils.shape_type_conversion - def build(self, input_shape): - param_shape = list(input_shape[1:]) - if self.shared_axes is not None: - for i in self.shared_axes: - param_shape[i - 1] = 1 - self.alpha = self.add_weight( - shape=param_shape, - name='alpha', - initializer=self.alpha_initializer, - regularizer=self.alpha_regularizer, - constraint=self.alpha_constraint) - # Set input spec - axes = {} - if self.shared_axes: - for i in range(1, len(input_shape)): - if i not in self.shared_axes: - axes[i] = input_shape[i] - self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) - self.built = True - - def call(self, inputs): - pos = backend.relu(inputs) - neg = -self.alpha * backend.relu(-inputs) - return pos + neg - - def get_config(self): - config = { - 'alpha_initializer': initializers.serialize(self.alpha_initializer), - 'alpha_regularizer': regularizers.serialize(self.alpha_regularizer), - 'alpha_constraint': constraints.serialize(self.alpha_constraint), - 'shared_axes': self.shared_axes - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Parametric Rectified Linear Unit. + + It follows: + + ``` + f(x) = alpha * x for x < 0 + f(x) = x for x >= 0 + ``` + + where `alpha` is a learned array with the same shape as x. + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as the input. + + Args: + alpha_initializer: Initializer function for the weights. + alpha_regularizer: Regularizer for the weights. + alpha_constraint: Constraint for the weights. + shared_axes: The axes along which to share learnable + parameters for the activation function. + For example, if the incoming feature maps + are from a 2D convolution + with output shape `(batch, height, width, channels)`, + and you wish to share parameters across space + so that each filter only has one set of parameters, + set `shared_axes=[1, 2]`. + """ + + def __init__( + self, + alpha_initializer="zeros", + alpha_regularizer=None, + alpha_constraint=None, + shared_axes=None, + **kwargs + ): + super().__init__(**kwargs) + self.supports_masking = True + self.alpha_initializer = initializers.get(alpha_initializer) + self.alpha_regularizer = regularizers.get(alpha_regularizer) + self.alpha_constraint = constraints.get(alpha_constraint) + if shared_axes is None: + self.shared_axes = None + elif not isinstance(shared_axes, (list, tuple)): + self.shared_axes = [shared_axes] + else: + self.shared_axes = list(shared_axes) + + @tf_utils.shape_type_conversion + def build(self, input_shape): + param_shape = list(input_shape[1:]) + if self.shared_axes is not None: + for i in self.shared_axes: + param_shape[i - 1] = 1 + self.alpha = self.add_weight( + shape=param_shape, + name="alpha", + initializer=self.alpha_initializer, + regularizer=self.alpha_regularizer, + constraint=self.alpha_constraint, + ) + # Set input spec + axes = {} + if self.shared_axes: + for i in range(1, len(input_shape)): + if i not in self.shared_axes: + axes[i] = input_shape[i] + self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) + self.built = True + + def call(self, inputs): + pos = backend.relu(inputs) + neg = -self.alpha * backend.relu(-inputs) + return pos + neg + + def get_config(self): + config = { + "alpha_initializer": initializers.serialize(self.alpha_initializer), + "alpha_regularizer": regularizers.serialize(self.alpha_regularizer), + "alpha_constraint": constraints.serialize(self.alpha_constraint), + "shared_axes": self.shared_axes, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/activation/prelu_test.py b/keras/layers/activation/prelu_test.py index 382bbe66ec6b..0d07f3aa9c51 100644 --- a/keras/layers/activation/prelu_test.py +++ b/keras/layers/activation/prelu_test.py @@ -14,26 +14,31 @@ # ============================================================================== """Tests for PReLU layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class PReLUTest(test_combinations.TestCase): + def test_prelu(self): + test_utils.layer_test( + keras.layers.PReLU, + kwargs={}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - def test_prelu(self): - test_utils.layer_test(keras.layers.PReLU, kwargs={}, - input_shape=(2, 3, 4), - supports_masking=True) - - def test_prelu_share(self): - test_utils.layer_test(keras.layers.PReLU, - kwargs={'shared_axes': 1}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_prelu_share(self): + test_utils.layer_test( + keras.layers.PReLU, + kwargs={"shared_axes": 1}, + input_shape=(2, 3, 4), + supports_masking=True, + ) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/activation/relu.py b/keras/layers/activation/relu.py index b714c70c900c..dbb5f2194b1c 100644 --- a/keras/layers/activation/relu.py +++ b/keras/layers/activation/relu.py @@ -13,100 +13,112 @@ # limitations under the License. # ============================================================================== """Rectified Linear Unit activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.engine.base_layer import Layer from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ReLU') +@keras_export("keras.layers.ReLU") class ReLU(Layer): - """Rectified Linear Unit activation function. - - With default values, it returns element-wise `max(x, 0)`. - - Otherwise, it follows: - - ``` - f(x) = max_value if x >= max_value - f(x) = x if threshold <= x < max_value - f(x) = negative_slope * (x - threshold) otherwise - ``` - - Usage: - - >>> layer = tf.keras.layers.ReLU() - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [0.0, 0.0, 0.0, 2.0] - >>> layer = tf.keras.layers.ReLU(max_value=1.0) - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [0.0, 0.0, 0.0, 1.0] - >>> layer = tf.keras.layers.ReLU(negative_slope=1.0) - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [-3.0, -1.0, 0.0, 2.0] - >>> layer = tf.keras.layers.ReLU(threshold=1.5) - >>> output = layer([-3.0, -1.0, 1.0, 2.0]) - >>> list(output.numpy()) - [0.0, 0.0, 0.0, 2.0] - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the batch axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as the input. - - Args: - max_value: Float >= 0. Maximum activation value. Default to None, which - means unlimited. - negative_slope: Float >= 0. Negative slope coefficient. Default to 0. - threshold: Float >= 0. Threshold value for thresholded activation. Default - to 0. - """ - - def __init__(self, max_value=None, negative_slope=0., threshold=0., **kwargs): - super().__init__(**kwargs) - if max_value is not None and max_value < 0.: - raise ValueError('max_value of a ReLU layer cannot be a negative ' - f'value. Received: {max_value}') - if negative_slope is None or negative_slope < 0.: - raise ValueError('negative_slope of a ReLU layer cannot be a negative ' - f'value. Received: {negative_slope}') - if threshold is None or threshold < 0.: - raise ValueError('threshold of a ReLU layer cannot be a negative ' - f'value. Received: {threshold}') - - self.supports_masking = True - if max_value is not None: - max_value = backend.cast_to_floatx(max_value) - self.max_value = max_value - self.negative_slope = backend.cast_to_floatx(negative_slope) - self.threshold = backend.cast_to_floatx(threshold) - - def call(self, inputs): - # alpha is used for leaky relu slope in activations instead of - # negative_slope. - return backend.relu(inputs, - alpha=self.negative_slope, - max_value=self.max_value, - threshold=self.threshold) - - def get_config(self): - config = { - 'max_value': self.max_value, - 'negative_slope': self.negative_slope, - 'threshold': self.threshold - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Rectified Linear Unit activation function. + + With default values, it returns element-wise `max(x, 0)`. + + Otherwise, it follows: + + ``` + f(x) = max_value if x >= max_value + f(x) = x if threshold <= x < max_value + f(x) = negative_slope * (x - threshold) otherwise + ``` + + Usage: + + >>> layer = tf.keras.layers.ReLU() + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 2.0] + >>> layer = tf.keras.layers.ReLU(max_value=1.0) + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 1.0] + >>> layer = tf.keras.layers.ReLU(negative_slope=1.0) + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [-3.0, -1.0, 0.0, 2.0] + >>> layer = tf.keras.layers.ReLU(threshold=1.5) + >>> output = layer([-3.0, -1.0, 1.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 2.0] + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the batch axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as the input. + + Args: + max_value: Float >= 0. Maximum activation value. None means unlimited. + Defaults to `None`. + negative_slope: Float >= 0. Negative slope coefficient. + Defaults to `0.`. + threshold: Float >= 0. Threshold value for thresholded activation. + Defaults to `0.`. + """ + + def __init__( + self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs + ): + super().__init__(**kwargs) + if max_value is not None and max_value < 0.0: + raise ValueError( + "max_value of a ReLU layer cannot be a negative " + f"value. Received: {max_value}" + ) + if negative_slope is None or negative_slope < 0.0: + raise ValueError( + "negative_slope of a ReLU layer cannot be a negative " + f"value. Received: {negative_slope}" + ) + if threshold is None or threshold < 0.0: + raise ValueError( + "threshold of a ReLU layer cannot be a negative " + f"value. Received: {threshold}" + ) + + self.supports_masking = True + if max_value is not None: + max_value = backend.cast_to_floatx(max_value) + self.max_value = max_value + self.negative_slope = backend.cast_to_floatx(negative_slope) + self.threshold = backend.cast_to_floatx(threshold) + + def call(self, inputs): + # alpha is used for leaky relu slope in activations instead of + # negative_slope. + return backend.relu( + inputs, + alpha=self.negative_slope, + max_value=self.max_value, + threshold=self.threshold, + ) + + def get_config(self): + config = { + "max_value": self.max_value, + "negative_slope": self.negative_slope, + "threshold": self.threshold, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/activation/relu_test.py b/keras/layers/activation/relu_test.py index 1d4daad98a63..70ded16275d6 100644 --- a/keras/layers/activation/relu_test.py +++ b/keras/layers/activation/relu_test.py @@ -14,88 +14,104 @@ # ============================================================================== """Tests for ReLU layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class ReLUTest(test_combinations.TestCase): + def test_relu(self): + test_utils.layer_test( + keras.layers.ReLU, + kwargs={"max_value": 10}, + input_shape=(2, 3, 4), + supports_masking=True, + ) + x = keras.backend.ones((3, 4)) + if not tf.executing_eagerly(): + # Test that we use `leaky_relu` when appropriate in graph mode. + self.assertIn( + "LeakyRelu", keras.layers.ReLU(negative_slope=0.2)(x).name + ) + # Test that we use `relu` when appropriate in graph mode. + self.assertIn("Relu", keras.layers.ReLU()(x).name) + # Test that we use `relu6` when appropriate in graph mode. + self.assertIn("Relu6", keras.layers.ReLU(max_value=6)(x).name) - def test_relu(self): - test_utils.layer_test(keras.layers.ReLU, - kwargs={'max_value': 10}, - input_shape=(2, 3, 4), - supports_masking=True) - x = keras.backend.ones((3, 4)) - if not tf.executing_eagerly(): - # Test that we use `leaky_relu` when appropriate in graph mode. - self.assertIn('LeakyRelu', keras.layers.ReLU(negative_slope=0.2)(x).name) - # Test that we use `relu` when appropriate in graph mode. - self.assertIn('Relu', keras.layers.ReLU()(x).name) - # Test that we use `relu6` when appropriate in graph mode. - self.assertIn('Relu6', keras.layers.ReLU(max_value=6)(x).name) - - def test_relu_with_invalid_max_value(self): - with self.assertRaisesRegex( - ValueError, 'max_value of a ReLU layer cannot be a negative ' - 'value. Received: -10'): - test_utils.layer_test( - keras.layers.ReLU, - kwargs={'max_value': -10}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_relu_with_invalid_max_value(self): + with self.assertRaisesRegex( + ValueError, + "max_value of a ReLU layer cannot be a negative " + "value. Received: -10", + ): + test_utils.layer_test( + keras.layers.ReLU, + kwargs={"max_value": -10}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - def test_relu_with_invalid_negative_slope(self): - with self.assertRaisesRegex( - ValueError, 'negative_slope of a ReLU layer cannot be a negative ' - 'value. Received: None'): - test_utils.layer_test( - keras.layers.ReLU, - kwargs={'negative_slope': None}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_relu_with_invalid_negative_slope(self): + with self.assertRaisesRegex( + ValueError, + "negative_slope of a ReLU layer cannot be a negative " + "value. Received: None", + ): + test_utils.layer_test( + keras.layers.ReLU, + kwargs={"negative_slope": None}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - with self.assertRaisesRegex( - ValueError, 'negative_slope of a ReLU layer cannot be a negative ' - 'value. Received: -10'): - test_utils.layer_test( - keras.layers.ReLU, - kwargs={'negative_slope': -10}, - input_shape=(2, 3, 4), - supports_masking=True) + with self.assertRaisesRegex( + ValueError, + "negative_slope of a ReLU layer cannot be a negative " + "value. Received: -10", + ): + test_utils.layer_test( + keras.layers.ReLU, + kwargs={"negative_slope": -10}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - def test_relu_with_invalid_threshold(self): - with self.assertRaisesRegex( - ValueError, 'threshold of a ReLU layer cannot be a negative ' - 'value. Received: None'): - test_utils.layer_test( - keras.layers.ReLU, - kwargs={'threshold': None}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_relu_with_invalid_threshold(self): + with self.assertRaisesRegex( + ValueError, + "threshold of a ReLU layer cannot be a negative " + "value. Received: None", + ): + test_utils.layer_test( + keras.layers.ReLU, + kwargs={"threshold": None}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - with self.assertRaisesRegex( - ValueError, 'threshold of a ReLU layer cannot be a negative ' - 'value. Received: -10'): - test_utils.layer_test( - keras.layers.ReLU, - kwargs={'threshold': -10}, - input_shape=(2, 3, 4), - supports_masking=True) + with self.assertRaisesRegex( + ValueError, + "threshold of a ReLU layer cannot be a negative " + "value. Received: -10", + ): + test_utils.layer_test( + keras.layers.ReLU, + kwargs={"threshold": -10}, + input_shape=(2, 3, 4), + supports_masking=True, + ) - @test_combinations.run_with_all_model_types - def test_relu_layer_as_activation(self): - layer = keras.layers.Dense(1, activation=keras.layers.ReLU()) - model = test_utils.get_model_from_layers([layer], input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(np.ones((10, 10)), np.ones((10, 1)), batch_size=2) + @test_combinations.run_with_all_model_types + def test_relu_layer_as_activation(self): + layer = keras.layers.Dense(1, activation=keras.layers.ReLU()) + model = test_utils.get_model_from_layers([layer], input_shape=(10,)) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(np.ones((10, 10)), np.ones((10, 1)), batch_size=2) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/activation/softmax.py b/keras/layers/activation/softmax.py index c72949af6a9b..aed2dbdec6f5 100644 --- a/keras/layers/activation/softmax.py +++ b/keras/layers/activation/softmax.py @@ -13,96 +13,105 @@ # limitations under the License. # ============================================================================== """Softmax activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export def _large_compatible_negative(tensor_type): - """Large negative number as Tensor. + """Large negative number as Tensor. - This function is necessary because the standard value for epsilon - in this module (-1e9) cannot be represented using tf.float16 + This function is necessary because the standard value for epsilon + in this module (-1e9) cannot be represented using tf.float16 - Args: - tensor_type: a dtype to determine the type. + Args: + tensor_type: a dtype to determine the type. - Returns: - a large negative number. - """ - if tensor_type == tf.float16: - return tf.float16.min - return -1e9 + Returns: + a large negative number. + """ + # In case of dtype=float16 (e.g., for mixed-precision), the largest + # negative number (dtypes.float16.min) is divided by 2, in order to + # avoid overflows when summing negative inputs. + if tensor_type == tf.float16: + return tf.float16.min / 2.0 + return -1e9 -@keras_export('keras.layers.Softmax') +@keras_export("keras.layers.Softmax") class Softmax(Layer): - """Softmax activation function. - - Example without mask: - - >>> inp = np.asarray([1., 2., 1.]) - >>> layer = tf.keras.layers.Softmax() - >>> layer(inp).numpy() - array([0.21194157, 0.5761169 , 0.21194157], dtype=float32) - >>> mask = np.asarray([True, False, True], dtype=bool) - >>> layer(inp, mask).numpy() - array([0.5, 0. , 0.5], dtype=float32) - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as the input. - - Args: - axis: Integer, or list of Integers, axis along which the softmax - normalization is applied. - Call arguments: - inputs: The inputs, or logits to the softmax layer. - mask: A boolean mask of the same shape as `inputs`. Defaults to `None`. The - mask specifies 1 to keep and 0 to mask. - - Returns: - softmaxed output with the same shape as `inputs`. - """ - - def __init__(self, axis=-1, **kwargs): - super().__init__(**kwargs) - self.supports_masking = True - self.axis = axis - - def call(self, inputs, mask=None): - if mask is not None: - # Since mask is 1.0 for positions we want to keep and 0.0 for - # masked positions, this operation will create a tensor which is 0.0 for - # positions we want to attend and -1e.9 for masked positions. - adder = (1.0 - tf.cast(mask, inputs.dtype)) * ( - _large_compatible_negative(inputs.dtype)) - - # Since we are adding it to the raw scores before the softmax, this is - # effectively the same as removing these entirely. - inputs += adder - if isinstance(self.axis, (tuple, list)): - if len(self.axis) > 1: - return tf.exp(inputs - tf.reduce_logsumexp( - inputs, axis=self.axis, keepdims=True)) - else: - return backend.softmax(inputs, axis=self.axis[0]) - return backend.softmax(inputs, axis=self.axis) - - def get_config(self): - config = {'axis': self.axis} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Softmax activation function. + + Example without mask: + + >>> inp = np.asarray([[1., 2., 1.]]) + >>> layer = tf.keras.layers.Softmax() + >>> layer(inp).numpy() + array([[0.21194157, 0.5761169 , 0.21194157]], dtype=float32) + >>> mask = np.asarray([[True, False, True]], dtype=bool) + >>> layer(inp, mask).numpy() + array([[0.5, 0. , 0.5]], dtype=float32) + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as the input. + + Args: + axis: Integer, or list of Integers, axis along which the softmax + normalization is applied. + Call arguments: + inputs: The inputs, or logits to the softmax layer. + mask: A boolean mask of the same shape as `inputs`. The mask + specifies 1 to keep and 0 to mask. Defaults to `None`. + + + Returns: + Softmaxed output with the same shape as `inputs`. + """ + + def __init__(self, axis=-1, **kwargs): + super().__init__(**kwargs) + self.supports_masking = True + self.axis = axis + + def call(self, inputs, mask=None): + if mask is not None: + # Since mask is 1.0 for positions we want to keep and 0.0 for masked + # positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -1e.9 for masked positions. + adder = (1.0 - tf.cast(mask, inputs.dtype)) * ( + _large_compatible_negative(inputs.dtype) + ) + + # Since we are adding it to the raw scores before the softmax, this + # is effectively the same as removing these entirely. + inputs += adder + if isinstance(self.axis, (tuple, list)): + if len(self.axis) > 1: + return tf.exp( + inputs + - tf.reduce_logsumexp(inputs, axis=self.axis, keepdims=True) + ) + else: + return backend.softmax(inputs, axis=self.axis[0]) + return backend.softmax(inputs, axis=self.axis) + + def get_config(self): + config = {"axis": self.axis} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/activation/softmax_test.py b/keras/layers/activation/softmax_test.py index 0c615791558c..86562425d452 100644 --- a/keras/layers/activation/softmax_test.py +++ b/keras/layers/activation/softmax_test.py @@ -14,21 +14,23 @@ # ============================================================================== """Tests for Softmax layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class SoftmaxTest(test_combinations.TestCase): - - def test_softmax(self): - test_utils.layer_test(keras.layers.Softmax, - kwargs={'axis': 1}, - input_shape=(2, 3, 4), - supports_masking=True) + def test_softmax(self): + test_utils.layer_test( + keras.layers.Softmax, + kwargs={"axis": 1}, + input_shape=(2, 3, 4), + supports_masking=True, + ) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/activation/thresholded_relu.py b/keras/layers/activation/thresholded_relu.py index cc3abeb15c76..9d575af1ee2d 100644 --- a/keras/layers/activation/thresholded_relu.py +++ b/keras/layers/activation/thresholded_relu.py @@ -13,60 +13,65 @@ # limitations under the License. # ============================================================================== """Thresholded Rectified Linear Unit activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ThresholdedReLU') +@keras_export("keras.layers.ThresholdedReLU") class ThresholdedReLU(Layer): - """Thresholded Rectified Linear Unit. + """Thresholded Rectified Linear Unit. - It follows: + It follows: - ``` - f(x) = x for x > theta - f(x) = 0 otherwise` - ``` + ``` + f(x) = x for x > theta + f(x) = 0 otherwise` + ``` - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. - Output shape: - Same shape as the input. + Output shape: + Same shape as the input. - Args: - theta: Float >= 0. Threshold location of activation. - """ + Args: + theta: Float >= 0. Threshold location of activation. + """ - def __init__(self, theta=1.0, **kwargs): - super().__init__(**kwargs) - if theta is None: - raise ValueError( - 'Theta of a Thresholded ReLU layer cannot be None, expecting a float.' - f' Received: {theta}') - if theta < 0: - raise ValueError('The theta value of a Thresholded ReLU layer ' - f'should be >=0. Received: {theta}') - self.supports_masking = True - self.theta = backend.cast_to_floatx(theta) + def __init__(self, theta=1.0, **kwargs): + super().__init__(**kwargs) + if theta is None: + raise ValueError( + "Theta of a Thresholded ReLU layer cannot be None, expecting a " + f"float. Received: {theta}" + ) + if theta < 0: + raise ValueError( + "The theta value of a Thresholded ReLU layer " + f"should be >=0. Received: {theta}" + ) + self.supports_masking = True + self.theta = backend.cast_to_floatx(theta) - def call(self, inputs): - dtype = self.compute_dtype - return inputs * tf.cast(tf.greater(inputs, self.theta), dtype) + def call(self, inputs): + dtype = self.compute_dtype + return inputs * tf.cast(tf.greater(inputs, self.theta), dtype) - def get_config(self): - config = {'theta': float(self.theta)} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"theta": float(self.theta)} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/activation/thresholded_relu_test.py b/keras/layers/activation/thresholded_relu_test.py index 3a554be59110..f7f4170a4988 100644 --- a/keras/layers/activation/thresholded_relu_test.py +++ b/keras/layers/activation/thresholded_relu_test.py @@ -14,40 +14,48 @@ # ============================================================================== """Tests for ThresholdedReLU layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class ThresholdedReLUTest(test_combinations.TestCase): - - def test_thresholded_relu(self): - test_utils.layer_test(keras.layers.ThresholdedReLU, - kwargs={'theta': 0.5}, - input_shape=(2, 3, 4), - supports_masking=True) - - def test_threshold_relu_with_invalid_theta(self): - with self.assertRaisesRegex( - ValueError, 'Theta of a Thresholded ReLU layer cannot ' - 'be None, expecting a float. Received: None'): - test_utils.layer_test( - keras.layers.ThresholdedReLU, - kwargs={'theta': None}, - input_shape=(2, 3, 4), - supports_masking=True) - - with self.assertRaisesRegex( - ValueError, 'The theta value of a Thresholded ReLU ' - 'layer should be >=0. Received: -10'): - test_utils.layer_test( - keras.layers.ThresholdedReLU, - kwargs={'theta': -10}, - input_shape=(2, 3, 4), - supports_masking=True) - - -if __name__ == '__main__': - tf.test.main() + def test_thresholded_relu(self): + test_utils.layer_test( + keras.layers.ThresholdedReLU, + kwargs={"theta": 0.5}, + input_shape=(2, 3, 4), + supports_masking=True, + ) + + def test_threshold_relu_with_invalid_theta(self): + with self.assertRaisesRegex( + ValueError, + "Theta of a Thresholded ReLU layer cannot " + "be None, expecting a float. Received: None", + ): + test_utils.layer_test( + keras.layers.ThresholdedReLU, + kwargs={"theta": None}, + input_shape=(2, 3, 4), + supports_masking=True, + ) + + with self.assertRaisesRegex( + ValueError, + "The theta value of a Thresholded ReLU " + "layer should be >=0. Received: -10", + ): + test_utils.layer_test( + keras.layers.ThresholdedReLU, + kwargs={"theta": -10}, + input_shape=(2, 3, 4), + supports_masking=True, + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/attention/BUILD b/keras/layers/attention/BUILD index 14f6b63f5fe4..fffdb146f493 100644 --- a/keras/layers/attention/BUILD +++ b/keras/layers/attention/BUILD @@ -1,15 +1,17 @@ # Description: # Contains the Keras attention layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/py/tensorflow_gnn:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], diff --git a/keras/layers/attention/__init__.py b/keras/layers/attention/__init__.py index 1914077daffa..e285718b4f0b 100644 --- a/keras/layers/attention/__init__.py +++ b/keras/layers/attention/__init__.py @@ -13,8 +13,8 @@ # limitations under the License. # ============================================================================== """Keras attention layers.""" -# pylint: disable=g-bad-import-order -from keras.layers.attention.multi_head_attention import MultiHeadAttention -from keras.layers.attention.attention import Attention + from keras.layers.attention.additive_attention import AdditiveAttention +from keras.layers.attention.attention import Attention +from keras.layers.attention.multi_head_attention import MultiHeadAttention diff --git a/keras/layers/attention/additive_attention.py b/keras/layers/attention/additive_attention.py index aa9ee50c8bb4..49b826c11c2f 100644 --- a/keras/layers/attention/additive_attention.py +++ b/keras/layers/attention/additive_attention.py @@ -17,159 +17,161 @@ This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2. Attention is formed by three tensors: Query, Key and Value. """ -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.layers.attention.base_dense_attention import BaseDenseAttention + import tensorflow.compat.v2 as tf +from keras.layers.attention.base_dense_attention import BaseDenseAttention + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.AdditiveAttention') +@keras_export("keras.layers.AdditiveAttention") class AdditiveAttention(BaseDenseAttention): - """Additive attention layer, a.k.a. Bahdanau-style attention. - - Inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor of - shape `[batch_size, Tv, dim]` and `key` tensor of shape - `[batch_size, Tv, dim]`. The calculation follows the steps: - - 1. Reshape `query` and `key` into shapes `[batch_size, Tq, 1, dim]` - and `[batch_size, 1, Tv, dim]` respectively. - 2. Calculate scores with shape `[batch_size, Tq, Tv]` as a non-linear - sum: `scores = tf.reduce_sum(tf.tanh(query + key), axis=-1)` - 3. Use scores to calculate a distribution with shape - `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`. - 4. Use `distribution` to create a linear combination of `value` with - shape `[batch_size, Tq, dim]`: - `return tf.matmul(distribution, value)`. - - Args: - use_scale: If `True`, will create a variable to scale the attention scores. - causal: Boolean. Set to `True` for decoder self-attention. Adds a mask such - that position `i` cannot attend to positions `j > i`. This prevents the - flow of information from the future towards the past. - Defaults to `False`. - dropout: Float between 0 and 1. Fraction of the units to drop for the - attention scores. Defaults to 0.0. - - Call Args: - - inputs: List of the following tensors: - * query: Query `Tensor` of shape `[batch_size, Tq, dim]`. - * value: Value `Tensor` of shape `[batch_size, Tv, dim]`. - * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If not - given, will use `value` for both `key` and `value`, which is the - most common case. - mask: List of the following tensors: - * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. - If given, the output will be zero at the positions where - `mask==False`. - * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. - If given, will apply the mask such that values at positions where - `mask==False` do not contribute to the result. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (no dropout). - return_attention_scores: bool, it `True`, returns the attention scores - (after masking and softmax) as an additional output argument. - - Output: - - Attention outputs of shape `[batch_size, Tq, dim]`. - [Optional] Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. - - The meaning of `query`, `value` and `key` depend on the application. In the - case of text similarity, for example, `query` is the sequence embeddings of - the first piece of text and `value` is the sequence embeddings of the second - piece of text. `key` is usually the same tensor as `value`. - - Here is a code example for using `AdditiveAttention` in a CNN+Attention - network: - - ```python - # Variable-length int sequences. - query_input = tf.keras.Input(shape=(None,), dtype='int32') - value_input = tf.keras.Input(shape=(None,), dtype='int32') - - # Embedding lookup. - token_embedding = tf.keras.layers.Embedding(max_tokens, dimension) - # Query embeddings of shape [batch_size, Tq, dimension]. - query_embeddings = token_embedding(query_input) - # Value embeddings of shape [batch_size, Tv, dimension]. - value_embeddings = token_embedding(value_input) - - # CNN layer. - cnn_layer = tf.keras.layers.Conv1D( - filters=100, - kernel_size=4, - # Use 'same' padding so outputs have the same shape as inputs. - padding='same') - # Query encoding of shape [batch_size, Tq, filters]. - query_seq_encoding = cnn_layer(query_embeddings) - # Value encoding of shape [batch_size, Tv, filters]. - value_seq_encoding = cnn_layer(value_embeddings) - - # Query-value attention of shape [batch_size, Tq, filters]. - query_value_attention_seq = tf.keras.layers.AdditiveAttention()( - [query_seq_encoding, value_seq_encoding]) - - # Reduce over the sequence axis to produce encodings of shape - # [batch_size, filters]. - query_encoding = tf.keras.layers.GlobalAveragePooling1D()( - query_seq_encoding) - query_value_attention = tf.keras.layers.GlobalAveragePooling1D()( - query_value_attention_seq) - - # Concatenate query and document encodings to produce a DNN input layer. - input_layer = tf.keras.layers.Concatenate()( - [query_encoding, query_value_attention]) - - # Add DNN layers, and create Model. - # ... - ``` - """ - - def __init__(self, use_scale=True, **kwargs): - super().__init__(**kwargs) - self.use_scale = use_scale - - def build(self, input_shape): - v_shape = tf.TensorShape(input_shape[1]) - dim = v_shape[-1] - dim = tf.compat.dimension_value(dim) - if self.use_scale: - self.scale = self.add_weight( - name='scale', - shape=[dim], - initializer='glorot_uniform', - dtype=self.dtype, - trainable=True) - else: - self.scale = None - super().build(input_shape) - - def _calculate_scores(self, query, key): - """Calculates attention scores as a nonlinear sum of query and key. + """Additive attention layer, a.k.a. Bahdanau-style attention. + + Inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor + of shape `[batch_size, Tv, dim]` and `key` tensor of shape + `[batch_size, Tv, dim]`. The calculation follows the steps: + + 1. Reshape `query` and `key` into shapes `[batch_size, Tq, 1, dim]` + and `[batch_size, 1, Tv, dim]` respectively. + 2. Calculate scores with shape `[batch_size, Tq, Tv]` as a non-linear + sum: `scores = tf.reduce_sum(tf.tanh(query + key), axis=-1)` + 3. Use scores to calculate a distribution with shape + `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`. + 4. Use `distribution` to create a linear combination of `value` with + shape `[batch_size, Tq, dim]`: + `return tf.matmul(distribution, value)`. Args: - query: Query tensor of shape `[batch_size, Tq, dim]`. - key: Key tensor of shape `[batch_size, Tv, dim]`. - Returns: - Tensor of shape `[batch_size, Tq, Tv]`. + use_scale: If `True`, will create a variable to scale the attention + scores. + dropout: Float between 0 and 1. Fraction of the units to drop for the + attention scores. Defaults to `0.0`. + + Call arguments: + inputs: List of the following tensors: + * query: Query `Tensor` of shape `[batch_size, Tq, dim]`. + * value: Value `Tensor` of shape `[batch_size, Tv, dim]`. + * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. + If not given, will use `value` for both `key` and `value`, + which is the most common case. + mask: List of the following tensors: + * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. + If given, the output will be zero at the positions where + `mask==False`. + * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. + If given, will apply the mask such that values at positions + where `mask==False` do not contribute to the result. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. + use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds + a mask such that position `i` cannot attend to positions `j > i`. + This prevents the flow of information from the future towards the + past. Defaults to `False`. + + Output: + + Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. + + The meaning of `query`, `value` and `key` depend on the application. In the + case of text similarity, for example, `query` is the sequence embeddings of + the first piece of text and `value` is the sequence embeddings of the second + piece of text. `key` is usually the same tensor as `value`. + + Here is a code example for using `AdditiveAttention` in a CNN+Attention + network: + + ```python + # Variable-length int sequences. + query_input = tf.keras.Input(shape=(None,), dtype='int32') + value_input = tf.keras.Input(shape=(None,), dtype='int32') + + # Embedding lookup. + token_embedding = tf.keras.layers.Embedding(max_tokens, dimension) + # Query embeddings of shape [batch_size, Tq, dimension]. + query_embeddings = token_embedding(query_input) + # Value embeddings of shape [batch_size, Tv, dimension]. + value_embeddings = token_embedding(value_input) + + # CNN layer. + cnn_layer = tf.keras.layers.Conv1D( + filters=100, + kernel_size=4, + # Use 'same' padding so outputs have the same shape as inputs. + padding='same') + # Query encoding of shape [batch_size, Tq, filters]. + query_seq_encoding = cnn_layer(query_embeddings) + # Value encoding of shape [batch_size, Tv, filters]. + value_seq_encoding = cnn_layer(value_embeddings) + + # Query-value attention of shape [batch_size, Tq, filters]. + query_value_attention_seq = tf.keras.layers.AdditiveAttention()( + [query_seq_encoding, value_seq_encoding]) + + # Reduce over the sequence axis to produce encodings of shape + # [batch_size, filters]. + query_encoding = tf.keras.layers.GlobalAveragePooling1D()( + query_seq_encoding) + query_value_attention = tf.keras.layers.GlobalAveragePooling1D()( + query_value_attention_seq) + + # Concatenate query and document encodings to produce a DNN input layer. + input_layer = tf.keras.layers.Concatenate()( + [query_encoding, query_value_attention]) + + # Add DNN layers, and create Model. + # ... + ``` """ - # Reshape tensors to enable broadcasting. - # Reshape into [batch_size, Tq, 1, dim]. - q_reshaped = tf.expand_dims(query, axis=-2) - # Reshape into [batch_size, 1, Tv, dim]. - k_reshaped = tf.expand_dims(key, axis=-3) - if self.use_scale: - scale = self.scale - else: - scale = 1. - return tf.reduce_sum( - scale * tf.tanh(q_reshaped + k_reshaped), axis=-1) - - def get_config(self): - config = {'use_scale': self.use_scale} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + + def __init__(self, use_scale=True, **kwargs): + super().__init__(**kwargs) + self.use_scale = use_scale + + def build(self, input_shape): + v_shape = tf.TensorShape(input_shape[1]) + dim = v_shape[-1] + dim = tf.compat.dimension_value(dim) + if self.use_scale: + self.scale = self.add_weight( + name="scale", + shape=[dim], + initializer="glorot_uniform", + dtype=self.dtype, + trainable=True, + ) + else: + self.scale = None + super().build(input_shape) + + def _calculate_scores(self, query, key): + """Calculates attention scores as a nonlinear sum of query and key. + + Args: + query: Query tensor of shape `[batch_size, Tq, dim]`. + key: Key tensor of shape `[batch_size, Tv, dim]`. + Returns: + Tensor of shape `[batch_size, Tq, Tv]`. + """ + # Reshape tensors to enable broadcasting. + # Reshape into [batch_size, Tq, 1, dim]. + q_reshaped = tf.expand_dims(query, axis=-2) + # Reshape into [batch_size, 1, Tv, dim]. + k_reshaped = tf.expand_dims(key, axis=-3) + if self.use_scale: + scale = self.scale + else: + scale = 1.0 + return tf.reduce_sum(scale * tf.tanh(q_reshaped + k_reshaped), axis=-1) + + def get_config(self): + config = {"use_scale": self.use_scale} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/attention/additive_attention_test.py b/keras/layers/attention/additive_attention_test.py index e9309f51a471..690053bcf065 100644 --- a/keras/layers/attention/additive_attention_test.py +++ b/keras/layers/attention/additive_attention_test.py @@ -14,266 +14,324 @@ # ============================================================================== """Tests AdditiveAttention layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.mixed_precision import policy from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class AdditiveAttentionTest(tf.test.TestCase, parameterized.TestCase): + def test_calculate_scores_one_dim(self): + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Key tensor of shape [1, 1, 1] + k = np.array([[[1.6]]], dtype=np.float32) + attention_layer = keras.layers.AdditiveAttention() + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + # Scale tensor of shape [1] + attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) + actual = attention_layer._calculate_scores(query=q, key=k) - def test_calculate_scores_one_dim(self): - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Key tensor of shape [1, 1, 1] - k = np.array([[[1.6]]], dtype=np.float32) - attention_layer = keras.layers.AdditiveAttention() - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - # Scale tensor of shape [1] - attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) - actual = attention_layer._calculate_scores(query=q, key=k) - - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.5 * tanh(1.1 + 1.6) = 0.49550372683 - expected = np.array([[[0.49550372683]]], dtype=np.float32) - self.assertAllClose(expected, actual) + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.5 * tanh(1.1 + 1.6) = 0.49550372683 + expected = np.array([[[0.49550372683]]], dtype=np.float32) + self.assertAllClose(expected, actual) - def test_calculate_scores_multi_dim(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Key tensor of shape [1, 3, 4] - k = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - attention_layer = keras.layers.AdditiveAttention() - attention_layer.build(input_shape=([1, 2, 4], [1, 3, 4])) - # Scale tensor of shape [4] - attention_layer.scale = np.array([[[0.5, 0.6, 0.7, 0.8]]], dtype=np.float32) - actual = attention_layer._calculate_scores(query=q, key=k) + def test_calculate_scores_multi_dim(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Key tensor of shape [1, 3, 4] + k = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + attention_layer = keras.layers.AdditiveAttention() + attention_layer.build(input_shape=([1, 2, 4], [1, 3, 4])) + # Scale tensor of shape [4] + attention_layer.scale = np.array( + [[[0.5, 0.6, 0.7, 0.8]]], dtype=np.float32 + ) + actual = attention_layer._calculate_scores(query=q, key=k) - # pylint:disable=line-too-long - # expected000 = 0.5*tanh(1.+1.5) + 0.6*tanh(1.1+1.6) + 0.7*tanh(1.2+1.7) + 0.8*tanh(1.3+1.8) = 2.58044532581 - # expected001 = 0.5*tanh(1.+2.5) + 0.6*tanh(1.1+2.6) + 0.7*tanh(1.2+2.7) + 0.8*tanh(1.3+2.8) = 2.59734317449 - # expected002 = 0.5*tanh(1.+3.5) + 0.6*tanh(1.1+3.6) + 0.7*tanh(1.2+3.7) + 0.8*tanh(1.3+3.8) = 2.59964024652 - # expected010 = 0.5*tanh(2.+1.5) + 0.6*tanh(2.1+1.6) + 0.7*tanh(2.2+1.7) + 0.8*tanh(2.3+1.8) = 2.59734317449 - # expected011 = 0.5*tanh(2.+2.5) + 0.6*tanh(2.1+2.6) + 0.7*tanh(2.2+2.7) + 0.8*tanh(2.3+2.8) = 2.59964024652 - # expected012 = 0.5*tanh(2.+3.5) + 0.6*tanh(2.1+3.6) + 0.7*tanh(2.2+3.7) + 0.8*tanh(2.3+3.8) = 2.59995130916 - # pylint:enable=line-too-long - expected = np.array([[[2.58044532581, 2.59734317449, 2.59964024652], - [2.59734317449, 2.59964024652, 2.59995130916]]], - dtype=np.float32) - self.assertAllClose(expected, actual) + # expected000 = 0.5*tanh(1.+1.5) + 0.6*tanh(1.1+1.6) + \ + # 0.7*tanh(1.2+1.7) + 0.8*tanh(1.3+1.8) = 2.58044532581 + # expected001 = 0.5*tanh(1.+2.5) + 0.6*tanh(1.1+2.6) + \ + # 0.7*tanh(1.2+2.7) + 0.8*tanh(1.3+2.8) = 2.59734317449 + # expected002 = 0.5*tanh(1.+3.5) + 0.6*tanh(1.1+3.6) + \ + # 0.7*tanh(1.2+3.7) + 0.8*tanh(1.3+3.8) = 2.59964024652 + # expected010 = 0.5*tanh(2.+1.5) + 0.6*tanh(2.1+1.6) + \ + # 0.7*tanh(2.2+1.7) + 0.8*tanh(2.3+1.8) = 2.59734317449 + # expected011 = 0.5*tanh(2.+2.5) + 0.6*tanh(2.1+2.6) + \ + # 0.7*tanh(2.2+2.7) + 0.8*tanh(2.3+2.8) = 2.59964024652 + # expected012 = 0.5*tanh(2.+3.5) + 0.6*tanh(2.1+3.6) + \ + # 0.7*tanh(2.2+3.7) + 0.8*tanh(2.3+3.8) = 2.59995130916 + expected = np.array( + [ + [ + [2.58044532581, 2.59734317449, 2.59964024652], + [2.59734317449, 2.59964024652, 2.59995130916], + ] + ], + dtype=np.float32, + ) + self.assertAllClose(expected, actual) - def test_calculate_scores_one_dim_batch_size_two(self): - # Query tensor of shape [2, 1, 1] - q = np.array([[[1.1]], [[2.1]]], dtype=np.float32) - # Key tensor of shape [2, 1, 1] - k = np.array([[[1.6]], [[2.6]]], dtype=np.float32) - attention_layer = keras.layers.AdditiveAttention() - attention_layer.build(input_shape=([2, 1, 1], [2, 1, 1])) - # Scale tensor of shape [1] - attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) - actual = attention_layer._calculate_scores(query=q, key=k) + def test_calculate_scores_one_dim_batch_size_two(self): + # Query tensor of shape [2, 1, 1] + q = np.array([[[1.1]], [[2.1]]], dtype=np.float32) + # Key tensor of shape [2, 1, 1] + k = np.array([[[1.6]], [[2.6]]], dtype=np.float32) + attention_layer = keras.layers.AdditiveAttention() + attention_layer.build(input_shape=([2, 1, 1], [2, 1, 1])) + # Scale tensor of shape [1] + attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) + actual = attention_layer._calculate_scores(query=q, key=k) - # Expected tensor of shape [2, 1, 1]. - # expected000 = 0.5 * tanh(1.1 + 1.6) = 0.49550372683 - # expected100 = 0.5 * tanh(2.1 + 2.6) = 0.49991728277 - expected = np.array([[[0.49550372683]], [[0.49991728277]]], - dtype=np.float32) - self.assertAllClose(expected, actual) + # Expected tensor of shape [2, 1, 1]. + # expected000 = 0.5 * tanh(1.1 + 1.6) = 0.49550372683 + # expected100 = 0.5 * tanh(2.1 + 2.6) = 0.49991728277 + expected = np.array( + [[[0.49550372683]], [[0.49991728277]]], dtype=np.float32 + ) + self.assertAllClose(expected, actual) - def test_shape(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.AdditiveAttention() - actual = attention_layer([q, v], mask=[None, v_mask]) + def test_shape(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.AdditiveAttention() + actual = attention_layer([q, v], mask=[None, v_mask]) - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) - def test_shape_no_scale(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.AdditiveAttention(use_scale=False) - actual = attention_layer([q, v], mask=[None, v_mask]) + def test_shape_no_scale(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.AdditiveAttention(use_scale=False) + actual = attention_layer([q, v], mask=[None, v_mask]) - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) - def test_shape_with_key(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Key tensor of shape [1, 3, 4] - k = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.AdditiveAttention() - actual = attention_layer([q, v, k], mask=[None, v_mask]) + def test_shape_with_key(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Key tensor of shape [1, 3, 4] + k = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.AdditiveAttention() + actual = attention_layer([q, v, k], mask=[None, v_mask]) - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) - def test_multi_dim(self): - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.AdditiveAttention() - attention_layer.build(input_shape=([1, 1, 1], [1, 3, 1])) - # Scale tensor of shape [1] - attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) - actual = attention_layer([q, v], mask=[None, v_mask]) + def test_multi_dim(self): + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.AdditiveAttention() + attention_layer.build(input_shape=([1, 1, 1], [1, 3, 1])) + # Scale tensor of shape [1] + attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) + actual = attention_layer([q, v], mask=[None, v_mask]) - # pylint:disable=line-too-long - # Expected scores of shape [1, 1, 3] - # scores = [[[0.5 * tanh(1.1 + 1.6), 0.5 * tanh(1.1 + 0.7), 0.5 * tanh(1.1 - 0.8)]]] - # = [[[0.49550372683, 0.47340300642, 0.14565630622]]] - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 - # = exp(0.49550372683)/(exp(0.49550372683) + exp(0.47340300642)) - # = 0.50552495521 - # attention_distribution001 - # = exp(0.47340300642)/(exp(0.49550372683) + exp(0.47340300642)) - # = 0.49447504478 - # attention_distribution002 = 0 - # - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.50552495521 * 1.6 + 0.49447504478 * 0.7 - 0 * 0.8 - # = 1.15497245968 - # pylint:enable=line-too-long - expected = np.array([[[1.15497245968]]], dtype=np.float32) - self.assertAllClose(expected, actual) + # Expected scores of shape [1, 1, 3] + # scores = [[[0.5 * tanh(1.1 + 1.6), + # 0.5 * tanh(1.1 + 0.7), + # 0.5 * tanh(1.1 - 0.8)]]] + # = [[[0.49550372683, 0.47340300642, 0.14565630622]]] + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 + # = exp(0.49550372683)/(exp(0.49550372683) + exp(0.47340300642)) + # = 0.50552495521 + # attention_distribution001 + # = exp(0.47340300642)/(exp(0.49550372683) + exp(0.47340300642)) + # = 0.49447504478 + # attention_distribution002 = 0 + # + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.50552495521 * 1.6 + 0.49447504478 * 0.7 - 0 * 0.8 + # = 1.15497245968 + expected = np.array([[[1.15497245968]]], dtype=np.float32) + self.assertAllClose(expected, actual) - def test_multi_dim_with_key(self): - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) - # Key tensor of shape [1, 3, 1] - k = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.AdditiveAttention() - attention_layer.build(input_shape=([1, 1, 1], [1, 3, 1])) - # Scale tensor of shape [1] - attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) - actual = attention_layer([q, v, k], mask=[None, v_mask]) + def test_multi_dim_with_key(self): + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) + # Key tensor of shape [1, 3, 1] + k = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.AdditiveAttention() + attention_layer.build(input_shape=([1, 1, 1], [1, 3, 1])) + # Scale tensor of shape [1] + attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) + actual = attention_layer([q, v, k], mask=[None, v_mask]) - # pylint:disable=line-too-long - # Expected scores of shape [1, 1, 3] - # scores = [[[0.5 * tanh(1.1 + 1.6), 0.5 * tanh(1.1 + 0.7), 0.5 * tanh(1.1 - 0.8)]]] - # = [[[0.49550372683, 0.47340300642, 0.14565630622]]] - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 - # = exp(0.49550372683)/(exp(0.49550372683) + exp(0.47340300642)) - # = 0.50552495521 - # attention_distribution001 - # = exp(0.47340300642)/(exp(0.49550372683) + exp(0.47340300642)) - # = 0.49447504478 - # attention_distribution002 = 0 - # - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.50552495521 * 0.5 + 0.49447504478 * 0.8 - 0 * 0.3 - # = 0.64834251342 - # pylint:enable=line-too-long - expected = np.array([[[0.64834251342]]], dtype=np.float32) - self.assertAllClose(expected, actual) + # Expected scores of shape [1, 1, 3] + # scores = [[[0.5 * tanh(1.1 + 1.6), + # 0.5 * tanh(1.1 + 0.7), + # 0.5 * tanh(1.1 - 0.8)]]] + # = [[[0.49550372683, 0.47340300642, 0.14565630622]]] + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 + # = exp(0.49550372683)/(exp(0.49550372683) + exp(0.47340300642)) + # = 0.50552495521 + # attention_distribution001 + # = exp(0.47340300642)/(exp(0.49550372683) + exp(0.47340300642)) + # = 0.49447504478 + # attention_distribution002 = 0 + # + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.50552495521 * 0.5 + 0.49447504478 * 0.8 - 0 * 0.3 + # = 0.64834251342 + expected = np.array([[[0.64834251342]]], dtype=np.float32) + self.assertAllClose(expected, actual) - def test_multi_dim_with_query_mask(self): - # Query tensor of shape [1, 2, 1] - q = np.array([[[1.1], [-0.5]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Query mask tensor of shape [1, 2] - q_mask = np.array([[True, False]], dtype=np.bool_) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.AdditiveAttention() - attention_layer.build(input_shape=([1, 1, 1], [1, 3, 1])) - # Scale tensor of shape [1] - attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) - actual = attention_layer([q, v], mask=[q_mask, v_mask]) + def test_multi_dim_with_query_mask(self): + # Query tensor of shape [1, 2, 1] + q = np.array([[[1.1], [-0.5]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Query mask tensor of shape [1, 2] + q_mask = np.array([[True, False]], dtype=np.bool_) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.AdditiveAttention() + attention_layer.build(input_shape=([1, 1, 1], [1, 3, 1])) + # Scale tensor of shape [1] + attention_layer.scale = np.array([[[0.5]]], dtype=np.float32) + actual = attention_layer([q, v], mask=[q_mask, v_mask]) - # pylint:disable=line-too-long - # Expected scores of shape [1, 2, 3] - # scores = [[[0.5 * tanh(1.1 + 1.6), 0.5 * tanh(1.1 + 0.7), 0.5 * tanh(1.1 - 0.8)], - # [0.5 * tanh(-0.5 + 1.6), 0.5 * tanh(-0.5 + 0.7), 0.5 * tanh(-0.5 - 0.8)]]] - # = [[[0.49550372683, 0.47340300642, 0.14565630622], - # [0.40024951088, 0.09868766011, -0.43086157965]]] - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 - # = exp(0.49550372683)/(exp(0.49550372683) + exp(0.47340300642)) - # = 0.50552495521 - # attention_distribution001 - # = exp(0.47340300642)/(exp(0.49550372683) + exp(0.47340300642)) - # = 0.49447504478 - # attention_distribution002 = 0 - # => attention_distribution010 - # = exp(0.40024951088)/(exp(0.40024951088) + exp(0.09868766011)) - # = 0.57482427975 - # attention_distribution011 - # = exp(0.09868766011)/(exp(0.40024951088) + exp(0.09868766011)) - # = 0.42517572025 - # attention_distribution012 = 0 - # - # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. - # expected000 = 0.50552495521 * 1.6 + 0.49447504478 * 0.7 - 0 * 0.8 - # = 1.15497245968 - # expected000 = 0 - # pylint:enable=line-too-long - expected = np.array([[[1.15497245968], [0.]]], dtype=np.float32) - self.assertAllClose(expected, actual) + # Expected scores of shape [1, 2, 3] + # scores = [[[0.5 * tanh(1.1 + 1.6), + # 0.5 * tanh(1.1 + 0.7), + # 0.5 * tanh(1.1 - 0.8)], + # [0.5 * tanh(-0.5 + 1.6), + # 0.5 * tanh(-0.5 + 0.7), + # 0.5 * tanh(-0.5 - 0.8)]]] + # = [[[0.49550372683, 0.47340300642, 0.14565630622], + # [0.40024951088, 0.09868766011, -0.43086157965]]] + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 + # = exp(0.49550372683)/(exp(0.49550372683) + exp(0.47340300642)) + # = 0.50552495521 + # attention_distribution001 + # = exp(0.47340300642)/(exp(0.49550372683) + exp(0.47340300642)) + # = 0.49447504478 + # attention_distribution002 = 0 + # => attention_distribution010 + # = exp(0.40024951088)/(exp(0.40024951088) + exp(0.09868766011)) + # = 0.57482427975 + # attention_distribution011 + # = exp(0.09868766011)/(exp(0.40024951088) + exp(0.09868766011)) + # = 0.42517572025 + # attention_distribution012 = 0 + # + # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. + # expected000 = 0.50552495521 * 1.6 + 0.49447504478 * 0.7 - 0 * 0.8 + # = 1.15497245968 + # expected000 = 0 + expected = np.array([[[1.15497245968], [0.0]]], dtype=np.float32) + self.assertAllClose(expected, actual) - def test_serialization(self): - # Test serialization with use_scale - layer = keras.layers.AdditiveAttention(use_scale=True) + def test_serialization(self): + # Test serialization with use_scale + layer = keras.layers.AdditiveAttention(use_scale=True) - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.use_scale, True) + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.use_scale, True) - config = layer.get_config() - new_layer = keras.layers.AdditiveAttention.from_config(config) - self.assertEqual(new_layer.use_scale, True) + config = layer.get_config() + new_layer = keras.layers.AdditiveAttention.from_config(config) + self.assertEqual(new_layer.use_scale, True) - @test_utils.enable_v2_dtype_behavior - def test_mixed_float16_policy(self): - # Test case for GitHub issue: - # https://github.com/tensorflow/tensorflow/issues/46064 - with policy.policy_scope('mixed_float16'): - q = tf.cast(tf.random.uniform((2, 3, 4), seed=1), 'float16') - v = tf.cast(tf.random.uniform((2, 3, 4), seed=2), 'float16') - k = tf.cast(tf.random.uniform((2, 3, 4), seed=3), 'float16') - layer = keras.layers.AdditiveAttention(causal=True) - _ = layer([q, v, k]) + @test_utils.enable_v2_dtype_behavior + def test_mixed_float16_policy(self): + # Test case for GitHub issue: + # https://github.com/tensorflow/tensorflow/issues/46064 + with policy.policy_scope("mixed_float16"): + q = tf.cast(tf.random.uniform((2, 3, 4), seed=1), "float16") + v = tf.cast(tf.random.uniform((2, 3, 4), seed=2), "float16") + k = tf.cast(tf.random.uniform((2, 3, 4), seed=3), "float16") + layer = keras.layers.AdditiveAttention() + _ = layer([q, v, k], use_causal_mask=True) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/attention/attention.py b/keras/layers/attention/attention.py index 91036776ee7b..380c2f557696 100644 --- a/keras/layers/attention/attention.py +++ b/keras/layers/attention/attention.py @@ -17,179 +17,188 @@ This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2. Attention is formed by three tensors: Query, Key and Value. """ -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.layers.attention.base_dense_attention import BaseDenseAttention + import tensorflow.compat.v2 as tf +from keras.layers.attention.base_dense_attention import BaseDenseAttention + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Attention') +@keras_export("keras.layers.Attention") class Attention(BaseDenseAttention): - """Dot-product attention layer, a.k.a. Luong-style attention. - - Inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor of - shape `[batch_size, Tv, dim]` and `key` tensor of shape - `[batch_size, Tv, dim]`. The calculation follows the steps: - - 1. Calculate scores with shape `[batch_size, Tq, Tv]` as a `query`-`key` dot - product: `scores = tf.matmul(query, key, transpose_b=True)`. - 2. Use scores to calculate a distribution with shape - `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`. - 3. Use `distribution` to create a linear combination of `value` with - shape `[batch_size, Tq, dim]`: - `return tf.matmul(distribution, value)`. - - Args: - use_scale: If `True`, will create a scalar variable to scale the attention - scores. - causal: Boolean. Set to `True` for decoder self-attention. Adds a mask such - that position `i` cannot attend to positions `j > i`. This prevents the - flow of information from the future towards the past. - Defaults to `False`. - dropout: Float between 0 and 1. Fraction of the units to drop for the - attention scores. Defaults to 0.0. - score_mode: Function to use to compute attention scores, one of - `{"dot", "concat"}`. `"dot"` refers to the dot product between the query - and key vectors. `"concat"` refers to the hyperbolic tangent of the - concatenation of the query and key vectors. - - Call Args: - - inputs: List of the following tensors: - * query: Query `Tensor` of shape `[batch_size, Tq, dim]`. - * value: Value `Tensor` of shape `[batch_size, Tv, dim]`. - * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If not - given, will use `value` for both `key` and `value`, which is the - most common case. - mask: List of the following tensors: - * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. - If given, the output will be zero at the positions where - `mask==False`. - * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. - If given, will apply the mask such that values at positions where - `mask==False` do not contribute to the result. - return_attention_scores: bool, it `True`, returns the attention scores - (after masking and softmax) as an additional output argument. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (no dropout). - - Output: - - Attention outputs of shape `[batch_size, Tq, dim]`. - [Optional] Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. - - The meaning of `query`, `value` and `key` depend on the application. In the - case of text similarity, for example, `query` is the sequence embeddings of - the first piece of text and `value` is the sequence embeddings of the second - piece of text. `key` is usually the same tensor as `value`. - - Here is a code example for using `Attention` in a CNN+Attention network: - - ```python - # Variable-length int sequences. - query_input = tf.keras.Input(shape=(None,), dtype='int32') - value_input = tf.keras.Input(shape=(None,), dtype='int32') - - # Embedding lookup. - token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64) - # Query embeddings of shape [batch_size, Tq, dimension]. - query_embeddings = token_embedding(query_input) - # Value embeddings of shape [batch_size, Tv, dimension]. - value_embeddings = token_embedding(value_input) - - # CNN layer. - cnn_layer = tf.keras.layers.Conv1D( - filters=100, - kernel_size=4, - # Use 'same' padding so outputs have the same shape as inputs. - padding='same') - # Query encoding of shape [batch_size, Tq, filters]. - query_seq_encoding = cnn_layer(query_embeddings) - # Value encoding of shape [batch_size, Tv, filters]. - value_seq_encoding = cnn_layer(value_embeddings) - - # Query-value attention of shape [batch_size, Tq, filters]. - query_value_attention_seq = tf.keras.layers.Attention()( - [query_seq_encoding, value_seq_encoding]) - - # Reduce over the sequence axis to produce encodings of shape - # [batch_size, filters]. - query_encoding = tf.keras.layers.GlobalAveragePooling1D()( - query_seq_encoding) - query_value_attention = tf.keras.layers.GlobalAveragePooling1D()( - query_value_attention_seq) - - # Concatenate query and document encodings to produce a DNN input layer. - input_layer = tf.keras.layers.Concatenate()( - [query_encoding, query_value_attention]) - - # Add DNN layers, and create Model. - # ... - ``` - """ - - def __init__(self, use_scale=False, score_mode='dot', **kwargs): - super().__init__(**kwargs) - self.use_scale = use_scale - self.score_mode = score_mode - if self.score_mode not in ['dot', 'concat']: - raise ValueError(f'Received: score_mode={score_mode}. Acceptable values ' - 'are: ["dot", "concat"]') - - def build(self, input_shape): - """Creates variable when `use_scale` is True or `score_mode` is `concat`.""" - if self.use_scale: - self.scale = self.add_weight( - name='scale', - shape=(), - initializer='ones', - dtype=self.dtype, - trainable=True) - else: - self.scale = None - if self.score_mode == 'concat': - self.concat_score_weight = self.add_weight( - name='concat_score_weight', - shape=(), - initializer='ones', - dtype=self.dtype, - trainable=True) - else: - self.concat_score_weight = None - super().build(input_shape) - - def _calculate_scores(self, query, key): - """Calculates attention scores as a query-key dot product. + """Dot-product attention layer, a.k.a. Luong-style attention. + + Inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor + of shape `[batch_size, Tv, dim]` and `key` tensor of shape + `[batch_size, Tv, dim]`. The calculation follows the steps: + + 1. Calculate scores with shape `[batch_size, Tq, Tv]` as a `query`-`key` dot + product: `scores = tf.matmul(query, key, transpose_b=True)`. + 2. Use scores to calculate a distribution with shape + `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`. + 3. Use `distribution` to create a linear combination of `value` with + shape `[batch_size, Tq, dim]`: + `return tf.matmul(distribution, value)`. Args: - query: Query tensor of shape `[batch_size, Tq, dim]`. - key: Key tensor of shape `[batch_size, Tv, dim]`. - Returns: - Tensor of shape `[batch_size, Tq, Tv]`. + use_scale: If `True`, will create a scalar variable to scale the + attention scores. + dropout: Float between 0 and 1. Fraction of the units to drop for the + attention scores. Defaults to 0.0. + score_mode: Function to use to compute attention scores, one of + `{"dot", "concat"}`. `"dot"` refers to the dot product between the + query and key vectors. `"concat"` refers to the hyperbolic tangent + of the concatenation of the query and key vectors. + + Call arguments: + inputs: List of the following tensors: + * query: Query `Tensor` of shape `[batch_size, Tq, dim]`. + * value: Value `Tensor` of shape `[batch_size, Tv, dim]`. + * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If + not given, will use `value` for both `key` and `value`, which is + the most common case. + mask: List of the following tensors: + * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. + If given, the output will be zero at the positions where + `mask==False`. + * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. + If given, will apply the mask such that values at positions + where `mask==False` do not contribute to the result. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds + a mask such that position `i` cannot attend to positions `j > i`. + This prevents the flow of information from the future towards the + past. + Defaults to `False`. + + Output: + + Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. + + The meaning of `query`, `value` and `key` depend on the application. In the + case of text similarity, for example, `query` is the sequence embeddings of + the first piece of text and `value` is the sequence embeddings of the second + piece of text. `key` is usually the same tensor as `value`. + + Here is a code example for using `Attention` in a CNN+Attention network: + + ```python + # Variable-length int sequences. + query_input = tf.keras.Input(shape=(None,), dtype='int32') + value_input = tf.keras.Input(shape=(None,), dtype='int32') + + # Embedding lookup. + token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64) + # Query embeddings of shape [batch_size, Tq, dimension]. + query_embeddings = token_embedding(query_input) + # Value embeddings of shape [batch_size, Tv, dimension]. + value_embeddings = token_embedding(value_input) + + # CNN layer. + cnn_layer = tf.keras.layers.Conv1D( + filters=100, + kernel_size=4, + # Use 'same' padding so outputs have the same shape as inputs. + padding='same') + # Query encoding of shape [batch_size, Tq, filters]. + query_seq_encoding = cnn_layer(query_embeddings) + # Value encoding of shape [batch_size, Tv, filters]. + value_seq_encoding = cnn_layer(value_embeddings) + + # Query-value attention of shape [batch_size, Tq, filters]. + query_value_attention_seq = tf.keras.layers.Attention()( + [query_seq_encoding, value_seq_encoding]) + + # Reduce over the sequence axis to produce encodings of shape + # [batch_size, filters]. + query_encoding = tf.keras.layers.GlobalAveragePooling1D()( + query_seq_encoding) + query_value_attention = tf.keras.layers.GlobalAveragePooling1D()( + query_value_attention_seq) + + # Concatenate query and document encodings to produce a DNN input layer. + input_layer = tf.keras.layers.Concatenate()( + [query_encoding, query_value_attention]) + + # Add DNN layers, and create Model. + # ... + ``` """ - if self.score_mode == 'dot': - scores = tf.matmul(query, key, transpose_b=True) - if self.scale is not None: - scores *= self.scale - elif self.score_mode == 'concat': - # Reshape tensors to enable broadcasting. - # Reshape into [batch_size, Tq, 1, dim]. - q_reshaped = tf.expand_dims(query, axis=-2) - # Reshape into [batch_size, 1, Tv, dim]. - k_reshaped = tf.expand_dims(key, axis=-3) - if self.scale is not None: - scores = self.concat_score_weight * tf.reduce_sum( - tf.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1) - else: - scores = self.concat_score_weight * tf.reduce_sum( - tf.tanh(q_reshaped + k_reshaped), axis=-1) - - return scores - - def get_config(self): - config = {'use_scale': self.use_scale, 'score_mode': self.score_mode} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + + def __init__(self, use_scale=False, score_mode="dot", **kwargs): + super().__init__(**kwargs) + self.use_scale = use_scale + self.score_mode = score_mode + if self.score_mode not in ["dot", "concat"]: + raise ValueError( + f"Received: score_mode={score_mode}. Acceptable values " + 'are: ["dot", "concat"]' + ) + + def build(self, input_shape): + """Creates variable when `use_scale` is True or `score_mode` is + `concat`.""" + if self.use_scale: + self.scale = self.add_weight( + name="scale", + shape=(), + initializer="ones", + dtype=self.dtype, + trainable=True, + ) + else: + self.scale = None + if self.score_mode == "concat": + self.concat_score_weight = self.add_weight( + name="concat_score_weight", + shape=(), + initializer="ones", + dtype=self.dtype, + trainable=True, + ) + else: + self.concat_score_weight = None + super().build(input_shape) + + def _calculate_scores(self, query, key): + """Calculates attention scores as a query-key dot product. + + Args: + query: Query tensor of shape `[batch_size, Tq, dim]`. + key: Key tensor of shape `[batch_size, Tv, dim]`. + Returns: + Tensor of shape `[batch_size, Tq, Tv]`. + """ + if self.score_mode == "dot": + scores = tf.matmul(query, key, transpose_b=True) + if self.scale is not None: + scores *= self.scale + elif self.score_mode == "concat": + # Reshape tensors to enable broadcasting. + # Reshape into [batch_size, Tq, 1, dim]. + q_reshaped = tf.expand_dims(query, axis=-2) + # Reshape into [batch_size, 1, Tv, dim]. + k_reshaped = tf.expand_dims(key, axis=-3) + if self.scale is not None: + scores = self.concat_score_weight * tf.reduce_sum( + tf.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1 + ) + else: + scores = self.concat_score_weight * tf.reduce_sum( + tf.tanh(q_reshaped + k_reshaped), axis=-1 + ) + + return scores + + def get_config(self): + config = {"use_scale": self.use_scale, "score_mode": self.score_mode} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/attention/attention_test.py b/keras/layers/attention/attention_test.py index 1ddc288316b7..43debfb26551 100644 --- a/keras/layers/attention/attention_test.py +++ b/keras/layers/attention/attention_test.py @@ -14,442 +14,572 @@ # ============================================================================== """Tests Attention layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers import core from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class AttentionTest(tf.test.TestCase, parameterized.TestCase): - - def test_calculate_scores_one_dim(self): - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Key tensor of shape [1, 1, 1] - k = np.array([[[1.6]]], dtype=np.float32) - attention_layer = keras.layers.Attention() - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - actual = attention_layer._calculate_scores(query=q, key=k) - - # Expected tensor of shape [1, 1, 1]. - # expected000 = 1.1*1.6 = 1.76 - expected = np.array([[[1.76]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_calculate_scores_multi_dim(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Key tensor of shape [1, 3, 4] - k = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - attention_layer = keras.layers.Attention() - attention_layer.build(input_shape=([1, 2, 4], [1, 3, 4])) - actual = attention_layer._calculate_scores(query=q, key=k) - - # Expected tensor of shape [1, 2, 3]. - # expected000 = 1.*1.5+1.1*1.6+1.2*1.7+1.3*1.8 = 7.64 - # expected001 = 1.*2.5+1.1*2.6+1.2*2.7+1.3*2.8 = 12.24 - # expected002 = 1.*3.5+1.1*3.6+1.2*3.7+1.3*3.8 = 16.84 - # expected010 = 2.*1.5+2.1*1.6+2.2*1.7+2.3*1.8 = 14.24 - # expected011 = 2.*2.5+2.1*2.6+2.2*2.7+2.3*2.8 = 22.84 - # expected012 = 2.*3.5+2.1*3.6+2.2*3.7+2.3*3.8 = 31.44 - expected = np.array([[[7.64, 12.24, 16.84], [14.24, 22.84, 31.44]]], - dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_calculate_scores_multi_dim_concat(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Key tensor of shape [1, 3, 4] - k = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - attention_layer = keras.layers.Attention(score_mode='concat') - attention_layer.concat_score_weight = 1 - attention_layer.build(input_shape=([1, 2, 4], [1, 3, 4])) - actual = keras.backend.get_value( - attention_layer._calculate_scores(query=q, key=k)) - - # pylint:disable=line-too-long - # expected000 = tanh(1.+1.5) + tanh(1.1+1.6) + tanh(1.2+1.7) + tanh(1.3+1.8) = 3.96753427840 - # expected001 = tanh(1.+2.5) + tanh(1.1+2.6) + tanh(1.2+2.7) + tanh(1.3+2.8) = 3.99558784825 - # expected002 = tanh(1.+3.5) + tanh(1.1+3.6) + tanh(1.2+3.7) + tanh(1.3+3.8) = 3.99940254147 - # expected010 = tanh(2.+1.5) + tanh(2.1+1.6) + tanh(2.2+1.7) + tanh(2.3+1.8) = 3.99558784825 - # expected011 = tanh(2.+2.5) + tanh(2.1+2.6) + tanh(2.2+2.7) + tanh(2.3+2.8) = 3.99940254147 - # expected012 = tanh(2.+3.5) + tanh(2.1+3.6) + tanh(2.2+3.7) + tanh(2.3+3.8) = 3.99991913657 - expected = np.array([[[3.96753427840, 3.99558784825, 3.99940254147], - [3.99558784825, 3.99940254147, 3.99991913657]]], - dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_calculate_scores_one_dim_batch_size_two(self): - # Query tensor of shape [2, 1, 1] - q = np.array([[[1.1]], [[2.1]]], dtype=np.float32) - # Key tensor of shape [2, 1, 1] - k = np.array([[[1.6]], [[2.6]]], dtype=np.float32) - attention_layer = keras.layers.Attention() - attention_layer.build(input_shape=([2, 1, 1], [2, 1, 1])) - actual = attention_layer._calculate_scores(query=q, key=k) - - # Expected tensor of shape [2, 1, 1]. - # expected000 = 1.1*1.6 = 1.76 - # expected100 = 2.1*2.6 = 5.46 - expected = np.array([[[1.76]], [[5.46]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_calculate_scores_one_dim_with_scale(self): - """Tests that scores are multiplied by scale.""" - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Key tensor of shape [1, 1, 1] - k = np.array([[[1.6]]], dtype=np.float32) - attention_layer = keras.layers.Attention(use_scale=True) - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - attention_layer.scale = -2. - actual = attention_layer._calculate_scores(query=q, key=k) - - # Expected tensor of shape [1, 1, 1]. - # expected000 = -2*1.1*1.6 = -3.52 - expected = np.array([[[-3.52]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_calculate_scores_one_dim_with_scale_concat(self): - """Tests that scores are multiplied by scale.""" - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Key tensor of shape [1, 1, 1] - k = np.array([[[1.6]]], dtype=np.float32) - attention_layer = keras.layers.Attention( - use_scale=True, score_mode='concat') - attention_layer.concat_score_weight = 1 - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - attention_layer.scale = 2. - actual = keras.backend.get_value( - attention_layer._calculate_scores(query=q, key=k)) - - # Expected tensor of shape [1, 1, 1]. - # expected000 = tanh(2*(1.1+1.6)) = 0.9999592018254402 - expected = np.array([[[0.999959202]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_shape(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention() - actual = attention_layer([q, v], mask=[None, v_mask]) - - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) - - def test_shape_concat(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention(score_mode='concat') - attention_layer.concat_score_weight = 1 - actual = attention_layer([q, v], mask=[None, v_mask]) - - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) - - def test_shape_with_key(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Key tensor of shape [1, 3, 4] - k = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention() - actual = attention_layer([q, v, k], mask=[None, v_mask]) - - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) - - def test_shape_with_key_concat(self): - # Query tensor of shape [1, 2, 4] - q = np.array([[[1., 1.1, 1.2, 1.3], [2., 2.1, 2.2, 2.3]]], dtype=np.float32) - # Value tensor of shape [1, 3, 4] - v = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Key tensor of shape [1, 3, 4] - k = np.array( - [[[1.5, 1.6, 1.7, 1.8], [2.5, 2.6, 2.7, 2.8], [3.5, 3.6, 3.7, 3.8]]], - dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention(score_mode='concat') - attention_layer.concat_score_weight = 1 - actual = attention_layer([q, v, k], mask=[None, v_mask]) - - expected_shape = [1, 2, 4] - self.assertAllEqual(expected_shape, tf.shape(actual)) - - def test_multi_dim(self): - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention() - actual = attention_layer([q, v], mask=[None, v_mask]) - - # Expected scores of shape [1, 1, 3] - # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8]]] = [[[1.76, 0.77, -0.88]]] - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 = exp(1.76)/(exp(1.76) + exp(0.77)) - # = 0.72908792234 - # attention_distribution001 = exp(0.77)/(exp(1.76) + exp(0.77)) - # = 0.27091207765 - # attention_distribution002 = 0 - # - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 - # = 1.3561791301 - expected = np.array([[[1.3561791301]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_multi_dim_with_key(self): - # Query tensor of shape [1, 1, 1] - q = np.array([[[1.1]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) - # Key tensor of shape [1, 3, 1] - k = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention() - actual = attention_layer([q, v, k], mask=[None, v_mask]) - - # Expected scores of shape [1, 1, 3] - # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8]]] = [[[1.76, 0.77, -0.88]]] - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 = exp(1.76)/(exp(1.76) + exp(0.77)) - # = 0.72908792234 - # attention_distribution001 = exp(0.77)/(exp(1.76) + exp(0.77)) - # = 0.27091207765 - # attention_distribution002 = 0 - # - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.72908792234 * 0.5 + 0.27091207765 * 0.8 - 0 * 0.3 - # = 0.58127362329 - expected = np.array([[[0.58127362329]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - @parameterized.named_parameters( - ('', False), - ('return_attention_scores', True), - ) - def test_multi_dim_with_query_mask(self, return_attention_scores): - # Query tensor of shape [1, 2, 1] - q = np.array([[[1.1], [-0.5]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Query mask tensor of shape [1, 2] - q_mask = np.array([[True, False]], dtype=np.bool_) - # Value mask tensor of shape [1, 3] - v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = keras.layers.Attention() - if return_attention_scores: - actual, actual_scores = attention_layer( - [q, v], - mask=[q_mask, v_mask], - return_attention_scores=return_attention_scores) - else: - actual = attention_layer([q, v], - mask=[q_mask, v_mask], - return_attention_scores=return_attention_scores) - - # Expected scores of shape [1, 2, 3] - # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8], [-0.5*1.6, -0.5*0.7, 0.5*0.8]]] - # = [[[1.76, 0.77, -0.88], [-0.8, -0.35, 0.4]]] - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 = exp(1.76)/(exp(1.76) + exp(0.77)) - # = 0.72908792234 - # attention_distribution001 = exp(0.77)/(exp(1.76) + exp(0.77)) - # = 0.27091207765 - # attention_distribution002 = 0 - # => attention_distribution010 = exp(-0.8)/(exp(-0.8) + exp(-0.35)) - # = 0.38936076605 - # attention_distribution011 = exp(-0.35)/(exp(-0.8) + exp(-0.35)) - # = 0.61063923394 - # attention_distribution012 = 0 - if return_attention_scores: - expected_scores = np.array([[[0.72908792234, 0.27091207765, 0.], - [0.38936076605, 0.61063923394, 0.]]], - dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. - # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 - # = 1.3561791301 - # expected000 = 0 - expected = np.array([[[1.3561791301], [0.]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_scale_none(self): - """Tests that scale is None by default.""" - attention_layer = keras.layers.Attention() - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - self.assertIsNone(attention_layer.scale) - - def test_scale_init_eager(self): - """Tests that scale initializes to 1 when use_scale=True.""" - if not tf.executing_eagerly(): - self.skipTest('Only run in eager mode') - attention_layer = keras.layers.Attention(use_scale=True) - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - self.assertAllClose(1., attention_layer.scale.value()) - - def test_scale_init_graph(self): - """Tests that scale initializes to 1 when use_scale=True.""" - with self.cached_session() as sess: - attention_layer = keras.layers.Attention(use_scale=True) - attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) - sess.run(attention_layer.scale.initializer) - self.assertAllClose(1., attention_layer.scale.value()) - - @parameterized.named_parameters( - ('', False), - ('return_attention_scores', True), - ) - def test_self_attention_causal(self, return_attention_scores): - # Query-value tensor of shape [1, 3, 1] - q = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) - attention_layer = keras.layers.Attention(causal=True) - if return_attention_scores: - actual, actual_scores = attention_layer( - [q, q], return_attention_scores=return_attention_scores) - else: - actual = attention_layer([q, q], - return_attention_scores=return_attention_scores) - - # Expected scores of shape [1, 3, 3] - # scores = [[0.25, 0.4, -0.15], [0.4, 0.64, -0.24], [-0.15, -0.24, 0.09]] - # Expected attention distribution = softmax(scores) lower triangular - # => attention_distribution00 = [1., 0., 0.] - # attention_distribution01 - # = [exp(0.4), exp(0.64), 0.] / (exp(0.4) + exp(0.64)) - # = [0.44028635073, 0.55971364926, 0.] - # attention_distribution02 - # = [exp(-0.15), exp(-0.24), exp(0.09)] - # / (exp(-0.15) + exp(-0.24) + exp(0.09)) - # = [0.31395396638, 0.28693232061, 0.399113713] - if return_attention_scores: - expected_scores = np.array( - [[[1., 0., 0.], [0.44028635073, 0.55971364926, 0.], - [0.31395396638, 0.28693232061, 0.399113713]]], - dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [1, 3, 1]. - # expected000 = 0.5 - # expected010 = 0.44028635073 * 0.5 + 0.55971364926 * 0.8 - # = 0.66791409477 - # expected020 = 0.31395396638 * 0.5 +0.28693232061 * 0.8 -0.399113713 * 0.3 - # = 0.26678872577 - expected = np.array([[[0.5], [0.66791409477], [0.26678872577]]], - dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_inputs_not_list(self): - attention_layer = keras.layers.Attention() - q = np.array([[[1.1]]], dtype=np.float32) - with self.assertRaisesRegex( - ValueError, 'Attention layer must be called on a list of inputs'): - attention_layer(q) - - def test_inputs_too_short(self): - attention_layer = keras.layers.Attention() - q = np.array([[[1.1]]], dtype=np.float32) - with self.assertRaisesRegex( - ValueError, 'Attention layer accepts inputs list of length 2 or 3'): - attention_layer([q]) - - def test_inputs_too_long(self): - attention_layer = keras.layers.Attention() - q = np.array([[[1.1]]], dtype=np.float32) - with self.assertRaisesRegex( - ValueError, 'Attention layer accepts inputs list of length 2 or 3'): - attention_layer([q, q, q, q]) - - def test_mask_not_list(self): - attention_layer = keras.layers.Attention() - q = np.array([[[1.1]]], dtype=np.float32) - mask = np.array([[True]], dtype=np.bool_) - with self.assertRaisesRegex(ValueError, - 'Attention layer mask must be a list'): - attention_layer([q, q], mask=mask) - - def test_mask_too_short(self): - attention_layer = keras.layers.Attention() - q = np.array([[[1.1]]], dtype=np.float32) - mask = np.array([[True]], dtype=np.bool_) - with self.assertRaisesRegex( - ValueError, 'Attention layer mask must be a list of length 2'): - attention_layer([q, q], mask=[mask]) - - def test_mask_too_long(self): - attention_layer = keras.layers.Attention() - q = np.array([[[1.1]]], dtype=np.float32) - mask = np.array([[True]], dtype=np.bool_) - with self.assertRaisesRegex( - ValueError, 'Attention layer mask must be a list of length 2'): - attention_layer([q, q], mask=[mask, mask, mask]) - - def test_override_mask(self): - attention_layer = keras.layers.Attention() - q = core.Masking()(np.array([[[1.1]]], dtype=np.float32)) - mask = np.array([[False]], dtype=np.bool_) - actual = attention_layer([q, q], mask=[mask, mask]) - self.assertAllClose([[[0]]], actual) - - def test_implicit_mask(self): - attention_layer = keras.layers.Attention() - q = core.Masking(1.1)(np.array([[[1.1], [1]]], dtype=np.float32)) - v = core.Masking(1.2)(np.array([[[1.2], [1]]], dtype=np.float32)) - actual = attention_layer([q, v]) - self.assertAllClose([[[0], [1]]], actual) - - @parameterized.named_parameters( - ('', False), - ('use_scale', True), - ) - def test_serialization(self, use_scale): - # Test serialization with use_scale - layer = keras.layers.Attention(use_scale=use_scale) - - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.use_scale, use_scale) - - config = layer.get_config() - new_layer = keras.layers.Attention.from_config(config) - self.assertEqual(new_layer.use_scale, use_scale) - - -if __name__ == '__main__': - tf.test.main() + def test_calculate_scores_one_dim(self): + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Key tensor of shape [1, 1, 1] + k = np.array([[[1.6]]], dtype=np.float32) + attention_layer = keras.layers.Attention() + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + actual = attention_layer._calculate_scores(query=q, key=k) + + # Expected tensor of shape [1, 1, 1]. + # expected000 = 1.1*1.6 = 1.76 + expected = np.array([[[1.76]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_calculate_scores_multi_dim(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Key tensor of shape [1, 3, 4] + k = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + attention_layer = keras.layers.Attention() + attention_layer.build(input_shape=([1, 2, 4], [1, 3, 4])) + actual = attention_layer._calculate_scores(query=q, key=k) + + # Expected tensor of shape [1, 2, 3]. + # expected000 = 1.*1.5+1.1*1.6+1.2*1.7+1.3*1.8 = 7.64 + # expected001 = 1.*2.5+1.1*2.6+1.2*2.7+1.3*2.8 = 12.24 + # expected002 = 1.*3.5+1.1*3.6+1.2*3.7+1.3*3.8 = 16.84 + # expected010 = 2.*1.5+2.1*1.6+2.2*1.7+2.3*1.8 = 14.24 + # expected011 = 2.*2.5+2.1*2.6+2.2*2.7+2.3*2.8 = 22.84 + # expected012 = 2.*3.5+2.1*3.6+2.2*3.7+2.3*3.8 = 31.44 + expected = np.array( + [[[7.64, 12.24, 16.84], [14.24, 22.84, 31.44]]], dtype=np.float32 + ) + self.assertAllClose(expected, actual) + + def test_calculate_scores_multi_dim_concat(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Key tensor of shape [1, 3, 4] + k = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + attention_layer = keras.layers.Attention(score_mode="concat") + attention_layer.concat_score_weight = 1 + attention_layer.build(input_shape=([1, 2, 4], [1, 3, 4])) + actual = keras.backend.get_value( + attention_layer._calculate_scores(query=q, key=k) + ) + + # expected000 = tanh(1.+1.5) + tanh(1.1+1.6) + \ + # tanh(1.2+1.7) + tanh(1.3+1.8) = 3.96753427840 + # expected001 = tanh(1.+2.5) + tanh(1.1+2.6) + \ + # tanh(1.2+2.7) + tanh(1.3+2.8) = 3.99558784825 + # expected002 = tanh(1.+3.5) + tanh(1.1+3.6) + \ + # tanh(1.2+3.7) + tanh(1.3+3.8) = 3.99940254147 + # expected010 = tanh(2.+1.5) + tanh(2.1+1.6) + \ + # tanh(2.2+1.7) + tanh(2.3+1.8) = 3.99558784825 + # expected011 = tanh(2.+2.5) + tanh(2.1+2.6) + \ + # tanh(2.2+2.7) + tanh(2.3+2.8) = 3.99940254147 + # expected012 = tanh(2.+3.5) + tanh(2.1+3.6) + \ + # tanh(2.2+3.7) + tanh(2.3+3.8) = 3.99991913657 + expected = np.array( + [ + [ + [3.96753427840, 3.99558784825, 3.99940254147], + [3.99558784825, 3.99940254147, 3.99991913657], + ] + ], + dtype=np.float32, + ) + self.assertAllClose(expected, actual) + + def test_calculate_scores_one_dim_batch_size_two(self): + # Query tensor of shape [2, 1, 1] + q = np.array([[[1.1]], [[2.1]]], dtype=np.float32) + # Key tensor of shape [2, 1, 1] + k = np.array([[[1.6]], [[2.6]]], dtype=np.float32) + attention_layer = keras.layers.Attention() + attention_layer.build(input_shape=([2, 1, 1], [2, 1, 1])) + actual = attention_layer._calculate_scores(query=q, key=k) + + # Expected tensor of shape [2, 1, 1]. + # expected000 = 1.1*1.6 = 1.76 + # expected100 = 2.1*2.6 = 5.46 + expected = np.array([[[1.76]], [[5.46]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_calculate_scores_one_dim_with_scale(self): + """Tests that scores are multiplied by scale.""" + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Key tensor of shape [1, 1, 1] + k = np.array([[[1.6]]], dtype=np.float32) + attention_layer = keras.layers.Attention(use_scale=True) + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + attention_layer.scale = -2.0 + actual = attention_layer._calculate_scores(query=q, key=k) + + # Expected tensor of shape [1, 1, 1]. + # expected000 = -2*1.1*1.6 = -3.52 + expected = np.array([[[-3.52]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_calculate_scores_one_dim_with_scale_concat(self): + """Tests that scores are multiplied by scale.""" + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Key tensor of shape [1, 1, 1] + k = np.array([[[1.6]]], dtype=np.float32) + attention_layer = keras.layers.Attention( + use_scale=True, score_mode="concat" + ) + attention_layer.concat_score_weight = 1 + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + attention_layer.scale = 2.0 + actual = keras.backend.get_value( + attention_layer._calculate_scores(query=q, key=k) + ) + + # Expected tensor of shape [1, 1, 1]. + # expected000 = tanh(2*(1.1+1.6)) = 0.9999592018254402 + expected = np.array([[[0.999959202]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_shape(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention() + actual = attention_layer([q, v], mask=[None, v_mask]) + + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) + + def test_shape_concat(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention(score_mode="concat") + attention_layer.concat_score_weight = 1 + actual = attention_layer([q, v], mask=[None, v_mask]) + + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) + + def test_shape_with_key(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Key tensor of shape [1, 3, 4] + k = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention() + actual = attention_layer([q, v, k], mask=[None, v_mask]) + + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) + + def test_shape_with_key_concat(self): + # Query tensor of shape [1, 2, 4] + q = np.array( + [[[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]]], dtype=np.float32 + ) + # Value tensor of shape [1, 3, 4] + v = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Key tensor of shape [1, 3, 4] + k = np.array( + [ + [ + [1.5, 1.6, 1.7, 1.8], + [2.5, 2.6, 2.7, 2.8], + [3.5, 3.6, 3.7, 3.8], + ] + ], + dtype=np.float32, + ) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention(score_mode="concat") + attention_layer.concat_score_weight = 1 + actual = attention_layer([q, v, k], mask=[None, v_mask]) + + expected_shape = [1, 2, 4] + self.assertAllEqual(expected_shape, tf.shape(actual)) + + def test_multi_dim(self): + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention() + actual = attention_layer([q, v], mask=[None, v_mask]) + + # Expected scores of shape [1, 1, 3] + # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8]]] = [[[1.76, 0.77, -0.88]]] + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 = exp(1.76)/(exp(1.76) + exp(0.77)) + # = 0.72908792234 + # attention_distribution001 = exp(0.77)/(exp(1.76) + exp(0.77)) + # = 0.27091207765 + # attention_distribution002 = 0 + # + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 + # = 1.3561791301 + expected = np.array([[[1.3561791301]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_multi_dim_with_key(self): + # Query tensor of shape [1, 1, 1] + q = np.array([[[1.1]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) + # Key tensor of shape [1, 3, 1] + k = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention() + actual = attention_layer([q, v, k], mask=[None, v_mask]) + + # Expected scores of shape [1, 1, 3] + # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8]]] = [[[1.76, 0.77, -0.88]]] + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 = exp(1.76)/(exp(1.76) + exp(0.77)) + # = 0.72908792234 + # attention_distribution001 = exp(0.77)/(exp(1.76) + exp(0.77)) + # = 0.27091207765 + # attention_distribution002 = 0 + # + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.72908792234 * 0.5 + 0.27091207765 * 0.8 - 0 * 0.3 + # = 0.58127362329 + expected = np.array([[[0.58127362329]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + @parameterized.named_parameters( + ("", False), + ("return_attention_scores", True), + ) + def test_multi_dim_with_query_mask(self, return_attention_scores): + # Query tensor of shape [1, 2, 1] + q = np.array([[[1.1], [-0.5]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Query mask tensor of shape [1, 2] + q_mask = np.array([[True, False]], dtype=np.bool_) + # Value mask tensor of shape [1, 3] + v_mask = np.array([[True, True, False]], dtype=np.bool_) + attention_layer = keras.layers.Attention() + if return_attention_scores: + actual, actual_scores = attention_layer( + [q, v], + mask=[q_mask, v_mask], + return_attention_scores=return_attention_scores, + ) + else: + actual = attention_layer( + [q, v], + mask=[q_mask, v_mask], + return_attention_scores=return_attention_scores, + ) + + # Expected scores of shape [1, 2, 3] + # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8], + # [-0.5*1.6, -0.5*0.7, 0.5*0.8]]] + # = [[[1.76, 0.77, -0.88], [-0.8, -0.35, 0.4]]] + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 = exp(1.76)/(exp(1.76) + exp(0.77)) + # = 0.72908792234 + # attention_distribution001 = exp(0.77)/(exp(1.76) + exp(0.77)) + # = 0.27091207765 + # attention_distribution002 = 0 + # => attention_distribution010 = exp(-0.8)/(exp(-0.8) + exp(-0.35)) + # = 0.38936076605 + # attention_distribution011 = exp(-0.35)/(exp(-0.8) + exp(-0.35)) + # = 0.61063923394 + # attention_distribution012 = 0 + if return_attention_scores: + expected_scores = np.array( + [ + [ + [0.72908792234, 0.27091207765, 0.0], + [0.38936076605, 0.61063923394, 0.0], + ] + ], + dtype=np.float32, + ) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. + # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 + # = 1.3561791301 + # expected000 = 0 + expected = np.array([[[1.3561791301], [0.0]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_scale_none(self): + """Tests that scale is None by default.""" + attention_layer = keras.layers.Attention() + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + self.assertIsNone(attention_layer.scale) + + def test_scale_init_eager(self): + """Tests that scale initializes to 1 when use_scale=True.""" + if not tf.executing_eagerly(): + self.skipTest("Only run in eager mode") + attention_layer = keras.layers.Attention(use_scale=True) + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + self.assertAllClose(1.0, attention_layer.scale.value()) + + def test_scale_init_graph(self): + """Tests that scale initializes to 1 when use_scale=True.""" + with self.cached_session() as sess: + attention_layer = keras.layers.Attention(use_scale=True) + attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) + sess.run(attention_layer.scale.initializer) + self.assertAllClose(1.0, attention_layer.scale.value()) + + @parameterized.named_parameters( + ("", False), + ("return_attention_scores", True), + ) + def test_self_attention_causal(self, return_attention_scores): + # Query-value tensor of shape [1, 3, 1] + q = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) + attention_layer = keras.layers.Attention() + if return_attention_scores: + actual, actual_scores = attention_layer( + [q, q], + return_attention_scores=return_attention_scores, + use_causal_mask=True, + ) + else: + actual = attention_layer( + [q, q], + return_attention_scores=return_attention_scores, + use_causal_mask=True, + ) + + # Expected scores of shape [1, 3, 3] + # scores = [[0.25, 0.4, -0.15], + # [0.4, 0.64, -0.24], + # [-0.15, -0.24, 0.09]] + # Expected attention distribution = softmax(scores) lower triangular + # => attention_distribution00 = [1., 0., 0.] + # attention_distribution01 + # = [exp(0.4), exp(0.64), 0.] / (exp(0.4) + exp(0.64)) + # = [0.44028635073, 0.55971364926, 0.] + # attention_distribution02 + # = [exp(-0.15), exp(-0.24), exp(0.09)] + # / (exp(-0.15) + exp(-0.24) + exp(0.09)) + # = [0.31395396638, 0.28693232061, 0.399113713] + if return_attention_scores: + expected_scores = np.array( + [ + [ + [1.0, 0.0, 0.0], + [0.44028635073, 0.55971364926, 0.0], + [0.31395396638, 0.28693232061, 0.399113713], + ] + ], + dtype=np.float32, + ) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [1, 3, 1]. + # expected000 = 0.5 + # expected010 = 0.44028635073 * 0.5 + 0.55971364926 * 0.8 + # = 0.66791409477 + # expected020 = 0.31395396638 * 0.5 + \ + # 0.28693232061 * 0.8 -0.399113713 * 0.3 + # = 0.26678872577 + expected = np.array( + [[[0.5], [0.66791409477], [0.26678872577]]], dtype=np.float32 + ) + self.assertAllClose(expected, actual) + + def test_self_attention_causal_deprecated(self): + """Verify deprecated specification of causal masking still works.""" + # Query-value tensor of shape [1, 3, 1] + q = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) + attention_layer_new = keras.layers.Attention() + new_scores = attention_layer_new( + [q, q], + use_causal_mask=True, + ) + attention_layer_old = keras.layers.Attention(causal=True) + old_scores = attention_layer_old( + [q, q], + ) + self.assertAllClose(new_scores, old_scores) + + def test_inputs_not_list(self): + attention_layer = keras.layers.Attention() + q = np.array([[[1.1]]], dtype=np.float32) + with self.assertRaisesRegex( + ValueError, "Attention layer must be called on a list of inputs" + ): + attention_layer(q) + + def test_inputs_too_short(self): + attention_layer = keras.layers.Attention() + q = np.array([[[1.1]]], dtype=np.float32) + with self.assertRaisesRegex( + ValueError, "Attention layer accepts inputs list of length 2 or 3" + ): + attention_layer([q]) + + def test_inputs_too_long(self): + attention_layer = keras.layers.Attention() + q = np.array([[[1.1]]], dtype=np.float32) + with self.assertRaisesRegex( + ValueError, "Attention layer accepts inputs list of length 2 or 3" + ): + attention_layer([q, q, q, q]) + + def test_mask_not_list(self): + attention_layer = keras.layers.Attention() + q = np.array([[[1.1]]], dtype=np.float32) + mask = np.array([[True]], dtype=np.bool_) + with self.assertRaisesRegex( + ValueError, "Attention layer mask must be a list" + ): + attention_layer([q, q], mask=mask) + + def test_mask_too_short(self): + attention_layer = keras.layers.Attention() + q = np.array([[[1.1]]], dtype=np.float32) + mask = np.array([[True]], dtype=np.bool_) + with self.assertRaisesRegex( + ValueError, "Attention layer mask must be a list of length 2" + ): + attention_layer([q, q], mask=[mask]) + + def test_mask_too_long(self): + attention_layer = keras.layers.Attention() + q = np.array([[[1.1]]], dtype=np.float32) + mask = np.array([[True]], dtype=np.bool_) + with self.assertRaisesRegex( + ValueError, "Attention layer mask must be a list of length 2" + ): + attention_layer([q, q], mask=[mask, mask, mask]) + + def test_override_mask(self): + attention_layer = keras.layers.Attention() + q = core.Masking()(np.array([[[1.1]]], dtype=np.float32)) + mask = np.array([[False]], dtype=np.bool_) + actual = attention_layer([q, q], mask=[mask, mask]) + self.assertAllClose([[[0]]], actual) + + def test_implicit_mask(self): + attention_layer = keras.layers.Attention() + q = core.Masking(1.1)(np.array([[[1.1], [1]]], dtype=np.float32)) + v = core.Masking(1.2)(np.array([[[1.2], [1]]], dtype=np.float32)) + actual = attention_layer([q, v]) + self.assertAllClose([[[0], [1]]], actual) + + @parameterized.named_parameters( + ("", False), + ("use_scale", True), + ) + def test_serialization(self, use_scale): + # Test serialization with use_scale + layer = keras.layers.Attention(use_scale=use_scale) + + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.use_scale, use_scale) + + config = layer.get_config() + new_layer = keras.layers.Attention.from_config(config) + self.assertEqual(new_layer.use_scale, use_scale) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/attention/base_dense_attention.py b/keras/layers/attention/base_dense_attention.py index 13d48b6a5157..c51907465fd0 100644 --- a/keras/layers/attention/base_dense_attention.py +++ b/keras/layers/attention/base_dense_attention.py @@ -17,217 +17,246 @@ This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2. Attention is formed by three tensors: Query, Key and Value. """ -# pylint: disable=g-classes-have-attributes + +import tensorflow.compat.v2 as tf +from absl import logging from keras import backend from keras.engine import base_layer from keras.utils import control_flow_util -import tensorflow.compat.v2 as tf +# isort: off +from tensorflow.python.util.tf_export import keras_export + +@keras_export("keras.__internal__.layers.BaseDenseAttention", v1=[]) class BaseDenseAttention(base_layer.BaseRandomLayer): - """Base Attention class for Dense networks. - - This class is suitable for Dense or CNN networks, and not for RNN networks. - - Implementations of attention mechanisms should inherit from this class, and - reuse the `apply_attention_scores()` method. - - Args: - causal: Boolean. Set to `True` for decoder self-attention. Adds a mask such - that position `i` cannot attend to positions `j > i`. This prevents the - flow of information from the future towards the past. - dropout: Float between 0 and 1. Fraction of the units to drop for the - attention scores. - - Call Args: - - inputs: List of the following tensors: - * query: Query `Tensor` of shape `[batch_size, Tq, dim]`. - * value: Value `Tensor` of shape `[batch_size, Tv, dim]`. - * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If not - given, will use `value` for both `key` and `value`, which is the - most common case. - mask: List of the following tensors: - * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. - If given, the output will be zero at the positions where - `mask==False`. - * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. - If given, will apply the mask such that values at positions where - `mask==False` do not contribute to the result. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (no dropout). - return_attention_scores: bool, if `True`, returns the attention scores - (after masking and softmax) as an additional output argument. - - Output: - - Attention outputs of shape `[batch_size, Tq, dim]`. - [Optional] Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. - """ - - def __init__(self, causal=False, dropout=0.0, **kwargs): - super().__init__(**kwargs) - self.causal = causal - self.dropout = dropout - self.supports_masking = True - - def _calculate_scores(self, query, key): - """Calculates attention scores. + """Base Attention class for Dense networks. + + This class is suitable for Dense or CNN networks, and not for RNN networks. + + Implementations of attention mechanisms should inherit from this class, and + reuse the `apply_attention_scores()` method. Args: - query: Query tensor of shape `[batch_size, Tq, dim]`. - key: Key tensor of shape `[batch_size, Tv, dim]`. + dropout: Float between 0 and 1. Fraction of the units to drop for the + attention scores. - Returns: - Tensor of shape `[batch_size, Tq, Tv]`. + Call arguments: + inputs: List of the following tensors: + * query: Query `Tensor` of shape `[batch_size, Tq, dim]`. + * value: Value `Tensor` of shape `[batch_size, Tv, dim]`. + * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If + not given, will use `value` for both `key` and `value`, which is + the most common case. + mask: List of the following tensors: + * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. + If given, the output will be zero at the positions where + `mask==False`. + * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. + If given, will apply the mask such that values at positions + where `mask==False` do not contribute to the result. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + return_attention_scores: bool, if `True`, returns the attention scores + (after masking and softmax) as an additional output argument. + + Output: + + Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. """ - return NotImplementedError - def _apply_scores(self, scores, value, scores_mask=None, training=None): - """Applies attention scores to the given value tensor. + def __init__(self, dropout=0.0, **kwargs): + # Deprecated field `causal` determines whether to using causal masking. + # Use `use_causal_mask` in call() method instead. + if "causal" in kwargs: + logging.warning( + "`causal` argument is deprecated. Please use `use_causal_mask` " + "in call() method to specify causal masking." + ) + self.causal = kwargs.pop("causal", False) + super().__init__(**kwargs) + self.dropout = dropout + self.supports_masking = True - To use this method in your attention layer, follow the steps: + def build(self, input_shape): + # Skip RNG initialization if dropout rate is 0. This will let the layer + # be purely stateless, with no reference to any variable. + if self.dropout > 0: + super().build(input_shape) + self.built = True - * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape - `[batch_size, Tv]` to calculate the attention `scores`. - * Pass `scores` and `value` tensors to this method. The method applies - `scores_mask`, calculates `attention_distribution = softmax(scores)`, then - returns `matmul(attention_distribution, value). - * Apply `query_mask` and return the result. + def _calculate_scores(self, query, key): + """Calculates attention scores. - Args: - scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. - value: Value tensor of shape `[batch_size, Tv, dim]`. - scores_mask: A boolean mask `Tensor` of shape `[batch_size, 1, Tv]` or - `[batch_size, Tq, Tv]`. If given, scores at positions where - `scores_mask==False` do not contribute to the result. It must contain - at least one `True` value in each line along the last dimension. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (no dropout). - - Returns: - Tensor of shape `[batch_size, Tq, dim]`. - Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. - """ - if scores_mask is not None: - padding_mask = tf.logical_not(scores_mask) - # Bias so padding positions do not contribute to attention distribution. - # Note 65504. is the max float16 value. - if scores.dtype is tf.float16: - scores -= 65504. * tf.cast(padding_mask, dtype=scores.dtype) - else: - scores -= 1.e9 * tf.cast(padding_mask, dtype=scores.dtype) - if training is None: - training = backend.learning_phase() - weights = tf.nn.softmax(scores) - - def dropped_weights(): - return self._random_generator.dropout(weights, rate=self.dropout) - - weights = control_flow_util.smart_cond(training, dropped_weights, - lambda: tf.identity(weights)) - return tf.matmul(weights, value), weights - - # TODO(b/125916026): Consider exposing a __call__ method with named args. - def call(self, - inputs, - mask=None, - training=None, - return_attention_scores=False): - self._validate_call_args(inputs=inputs, mask=mask) - q = inputs[0] - v = inputs[1] - k = inputs[2] if len(inputs) > 2 else v - q_mask = mask[0] if mask else None - v_mask = mask[1] if mask else None - scores = self._calculate_scores(query=q, key=k) - if v_mask is not None: - # Mask of shape [batch_size, 1, Tv]. - v_mask = tf.expand_dims(v_mask, axis=-2) - if self.causal: - # Creates a lower triangular mask, so position i cannot attend to - # positions j>i. This prevents the flow of information from the future - # into the past. - scores_shape = tf.shape(scores) - # causal_mask_shape = [1, Tq, Tv]. - causal_mask_shape = tf.concat( - [tf.ones_like(scores_shape[:-2]), scores_shape[-2:]], - axis=0) - causal_mask = _lower_triangular_mask(causal_mask_shape) - else: - causal_mask = None - scores_mask = _merge_masks(v_mask, causal_mask) - result, attention_scores = self._apply_scores( - scores=scores, value=v, scores_mask=scores_mask, training=training) - if q_mask is not None: - # Mask of shape [batch_size, Tq, 1]. - q_mask = tf.expand_dims(q_mask, axis=-1) - result *= tf.cast(q_mask, dtype=result.dtype) - if return_attention_scores: - return result, attention_scores - return result - - def compute_mask(self, inputs, mask=None): - self._validate_call_args(inputs=inputs, mask=mask) - if mask: - q_mask = mask[0] - if q_mask is None: + Args: + query: Query tensor of shape `[batch_size, Tq, dim]`. + key: Key tensor of shape `[batch_size, Tv, dim]`. + + Returns: + Tensor of shape `[batch_size, Tq, Tv]`. + """ + return NotImplementedError + + def _apply_scores(self, scores, value, scores_mask=None, training=None): + """Applies attention scores to the given value tensor. + + To use this method in your attention layer, follow the steps: + + * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of + shape `[batch_size, Tv]` to calculate the attention `scores`. + * Pass `scores` and `value` tensors to this method. The method applies + `scores_mask`, calculates + `attention_distribution = softmax(scores)`, then returns + `matmul(attention_distribution, value). + * Apply `query_mask` and return the result. + + Args: + scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. + value: Value tensor of shape `[batch_size, Tv, dim]`. + scores_mask: A boolean mask `Tensor` of shape `[batch_size, 1, Tv]` + or `[batch_size, Tq, Tv]`. If given, scores at positions where + `scores_mask==False` do not contribute to the result. It must + contain at least one `True` value in each line along the last + dimension. + training: Python boolean indicating whether the layer should behave + in training mode (adding dropout) or in inference mode + (no dropout). + + Returns: + Tensor of shape `[batch_size, Tq, dim]`. + Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. + """ + if scores_mask is not None: + padding_mask = tf.logical_not(scores_mask) + # Bias so padding positions do not contribute to attention + # distribution. Note 65504. is the max float16 value. + if scores.dtype is tf.float16: + scores -= 65504.0 * tf.cast(padding_mask, dtype=scores.dtype) + else: + scores -= 1.0e9 * tf.cast(padding_mask, dtype=scores.dtype) + if training is None: + training = backend.learning_phase() + weights = tf.nn.softmax(scores) + + if self.dropout > 0: + + def dropped_weights(): + return self._random_generator.dropout( + weights, rate=self.dropout + ) + + weights = control_flow_util.smart_cond( + training, dropped_weights, lambda: tf.identity(weights) + ) + return tf.matmul(weights, value), weights + + # TODO(b/125916026): Consider exposing a __call__ method with named args. + def call( + self, + inputs, + mask=None, + training=None, + return_attention_scores=False, + use_causal_mask=False, + ): + self._validate_call_args(inputs=inputs, mask=mask) + q = inputs[0] + v = inputs[1] + k = inputs[2] if len(inputs) > 2 else v + q_mask = mask[0] if mask else None + v_mask = mask[1] if mask else None + scores = self._calculate_scores(query=q, key=k) + if v_mask is not None: + # Mask of shape [batch_size, 1, Tv]. + v_mask = tf.expand_dims(v_mask, axis=-2) + if self.causal or use_causal_mask: + # Creates a lower triangular mask, so position i cannot attend to + # positions j>i. This prevents the flow of information from the + # future into the past. + scores_shape = tf.shape(scores) + # causal_mask_shape = [1, Tq, Tv]. + causal_mask_shape = tf.concat( + [tf.ones_like(scores_shape[:-2]), scores_shape[-2:]], axis=0 + ) + causal_mask = _lower_triangular_mask(causal_mask_shape) + else: + causal_mask = None + scores_mask = _merge_masks(v_mask, causal_mask) + result, attention_scores = self._apply_scores( + scores=scores, value=v, scores_mask=scores_mask, training=training + ) + if q_mask is not None: + # Mask of shape [batch_size, Tq, 1]. + q_mask = tf.expand_dims(q_mask, axis=-1) + result *= tf.cast(q_mask, dtype=result.dtype) + if return_attention_scores: + return result, attention_scores + return result + + def compute_mask(self, inputs, mask=None): + self._validate_call_args(inputs=inputs, mask=mask) + if mask: + q_mask = mask[0] + if q_mask is None: + return None + return tf.convert_to_tensor(q_mask) return None - return tf.convert_to_tensor(q_mask) - return None - - def compute_output_shape(self, input_shape): - # return_attention_scores argument of BaseDenseAttention.call method - # is ignored. Output shape of attention_scores cannot be returned. - return tf.TensorShape(input_shape[0]) - - def _validate_call_args(self, inputs, mask): - """Validates arguments of the call method.""" - class_name = self.__class__.__name__ - if not isinstance(inputs, list): - raise ValueError( - f'{class_name} layer must be called on a list of inputs, ' - 'namely [query, value] or [query, value, key]. ' - f'Received: {inputs}.') - if len(inputs) < 2 or len(inputs) > 3: - raise ValueError( - f'{class_name} layer accepts inputs list of length 2 or 3, ' - 'namely [query, value] or [query, value, key]. ' - f'Received length: {len(inputs)}.') - if mask: - if not isinstance(mask, list): - raise ValueError( - f'{class_name} layer mask must be a list, ' - f'namely [query_mask, value_mask]. Received: {mask}.') - if len(mask) < 2 or len(mask) > len(inputs): - raise ValueError( - f'{class_name} layer mask must be a list of length 2, ' - f'namely [query_mask, value_mask]. Received length: {len(mask)}.') - - def get_config(self): - config = { - 'causal': self.causal, - 'dropout': self.dropout, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + # return_attention_scores argument of BaseDenseAttention.call method + # is ignored. Output shape of attention_scores cannot be returned. + return tf.TensorShape(input_shape[0]) + + def _validate_call_args(self, inputs, mask): + """Validates arguments of the call method.""" + class_name = self.__class__.__name__ + if not isinstance(inputs, list): + raise ValueError( + f"{class_name} layer must be called on a list of inputs, " + "namely [query, value] or [query, value, key]. " + f"Received: {inputs}." + ) + if len(inputs) < 2 or len(inputs) > 3: + raise ValueError( + f"{class_name} layer accepts inputs list of length 2 or 3, " + "namely [query, value] or [query, value, key]. " + f"Received length: {len(inputs)}." + ) + if mask: + if not isinstance(mask, list): + raise ValueError( + f"{class_name} layer mask must be a list, " + f"namely [query_mask, value_mask]. Received: {mask}." + ) + if len(mask) < 2 or len(mask) > len(inputs): + raise ValueError( + f"{class_name} layer mask must be a list of length 2, " + "namely [query_mask, value_mask]. " + f"Received length: {len(mask)}." + ) + + def get_config(self): + config = { + "dropout": self.dropout, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) def _lower_triangular_mask(shape): - """Creates a lower-triangular boolean mask over the last 2 dimensions.""" - row_index = tf.cumsum( - tf.ones(shape=shape, dtype=tf.int32), axis=-2) - col_index = tf.cumsum( - tf.ones(shape=shape, dtype=tf.int32), axis=-1) - return tf.greater_equal(row_index, col_index) + """Creates a lower-triangular boolean mask over the last 2 dimensions.""" + row_index = tf.cumsum(tf.ones(shape=shape, dtype=tf.int32), axis=-2) + col_index = tf.cumsum(tf.ones(shape=shape, dtype=tf.int32), axis=-1) + return tf.greater_equal(row_index, col_index) def _merge_masks(x, y): - if x is None: - return y - if y is None: - return x - return tf.logical_and(x, y) + if x is None: + return y + if y is None: + return x + return tf.logical_and(x, y) diff --git a/keras/layers/attention/base_dense_attention_test.py b/keras/layers/attention/base_dense_attention_test.py index 7c8c98504224..86b9f4b05a7d 100644 --- a/keras/layers/attention/base_dense_attention_test.py +++ b/keras/layers/attention/base_dense_attention_test.py @@ -14,163 +14,186 @@ # ============================================================================== """Tests BaseDenseAttention layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized -from keras.layers.attention.base_dense_attention import _lower_triangular_mask + from keras.layers.attention.base_dense_attention import BaseDenseAttention +from keras.layers.attention.base_dense_attention import _lower_triangular_mask from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class BaseDenseAttentionTest(tf.test.TestCase, parameterized.TestCase): - - def test_one_dim_with_mask(self): - # Scores tensor of shape [1, 1, 1] - scores = np.array([[[1.1]]], dtype=np.float32) - # Value tensor of shape [1, 1, 1] - v = np.array([[[1.6]]], dtype=np.float32) - # Scores mask tensor of shape [1, 1, 1] - scores_mask = np.array([[[True]]], dtype=np.bool_) - actual, actual_scores = BaseDenseAttention()._apply_scores( - scores=scores, value=v, scores_mask=scores_mask) - - # Expected softmax_scores = [[[1]]] - expected_scores = np.array([[[1.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 - expected = np.array([[[1.6]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_one_dim_no_mask(self): - # Scores tensor of shape [1, 1, 1] - scores = np.array([[[1.1]]], dtype=np.float32) - # Value tensor of shape [1, 1, 1] - v = np.array([[[1.6]]], dtype=np.float32) - actual, actual_scores = BaseDenseAttention()._apply_scores( - scores=scores, value=v) - - # Expected softmax_scores = [[[1]]] - expected_scores = np.array([[[1.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 - expected = np.array([[[1.6]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_multi_dim_with_mask(self): - # Scores tensor of shape [1, 1, 3] - scores = np.array([[[1., 0., 1.]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - # Scores mask tensor of shape [1, 1, 3] - scores_mask = np.array([[[True, True, False]]], dtype=np.bool_) - actual, actual_scores = BaseDenseAttention()._apply_scores( - scores=scores, value=v, scores_mask=scores_mask) - - # Expected softmax scores = softmax(scores) with zeros in positions where - # v_mask == False. - # => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 - # softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 - # softmax_scores002 = 0 - expected_scores = np.array([[[0.73105857863, 0.26894142137, 0.]]], - dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8 - # = 1.35795272077 - expected = np.array([[[1.35795272077]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_multi_dim_no_mask(self): - # Scores tensor of shape [1, 1, 3] - scores = np.array([[[1., 0., 1.]]], dtype=np.float32) - # Value tensor of shape [1, 3, 1] - v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - actual, actual_scores = BaseDenseAttention()._apply_scores( - scores=scores, value=v) - - # Expected softmax_scores = softmax(scores). - # => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - # softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1)) - # = 0.15536240349 - # softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - expected_scores = np.array( - [[[0.42231879825, 0.15536240349, 0.42231879825]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [1, 1, 1]. - # expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7 - # - 0.42231879825 * 0.8 - # = 0.44660872104 - expected = np.array([[[0.44660872104]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_one_dim_batch_size_two(self): - # Scores tensor of shape [2, 1, 1] - scores = np.array([[[1.1]], [[2.1]]], dtype=np.float32) - # Value tensor of shape [2, 1, 1] - v = np.array([[[1.6]], [[2.6]]], dtype=np.float32) - # Scpres mask tensor of shape [2, 1, 1] - scores_mask = np.array([[[True]], [[True]]], dtype=np.bool_) - actual, actual_scores = BaseDenseAttention()._apply_scores( - scores=scores, value=v, scores_mask=scores_mask) - - # Expected softmax_scores = [[[1]], [[1]]] - expected_scores = np.array([[[1.]], [[1.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) - # Expected tensor of shape [2, 1, 1]. - # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 - # expected100 = softmax_scores[1, 0] * 2.6 = 2.6 - expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32) - self.assertAllClose(expected, actual) - - def test_shape_with_dropout(self): - # scores: Scores float tensor of shape `[batch_size, tq, tv]`. - # value: Value tensor of shape `[batch_size, tv, dim]`. - batch_size = 4 - tq = 5 - tv = 6 - dim = 7 - scores = np.ones((batch_size, tq, tv)) - value = np.ones((batch_size, tv, dim)) - actual, actual_scores = BaseDenseAttention( - dropout=0.1)._apply_scores( - scores=scores, value=value, training=False) - - # Expected Tensor of shape `[batch_size, tq, tv]`. - expected_scores_shape = [batch_size, tq, tv] - self.assertAllEqual(expected_scores_shape, tf.shape(actual_scores)) - # Expected Tensor of shape `[batch_size, tq, dim]`. - expected_shape = [batch_size, tq, dim] - self.assertAllEqual(expected_shape, tf.shape(actual)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_one_dim_with_mask(self): + # Scores tensor of shape [1, 1, 1] + scores = np.array([[[1.1]]], dtype=np.float32) + # Value tensor of shape [1, 1, 1] + v = np.array([[[1.6]]], dtype=np.float32) + # Scores mask tensor of shape [1, 1, 1] + scores_mask = np.array([[[True]]], dtype=np.bool_) + actual, actual_scores = BaseDenseAttention()._apply_scores( + scores=scores, value=v, scores_mask=scores_mask + ) + + # Expected softmax_scores = [[[1]]] + expected_scores = np.array([[[1.0]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [1, 1, 1]. + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + expected = np.array([[[1.6]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_one_dim_no_mask(self): + # Scores tensor of shape [1, 1, 1] + scores = np.array([[[1.1]]], dtype=np.float32) + # Value tensor of shape [1, 1, 1] + v = np.array([[[1.6]]], dtype=np.float32) + actual, actual_scores = BaseDenseAttention()._apply_scores( + scores=scores, value=v + ) + + # Expected softmax_scores = [[[1]]] + expected_scores = np.array([[[1.0]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [1, 1, 1]. + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + expected = np.array([[[1.6]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_multi_dim_with_mask(self): + # Scores tensor of shape [1, 1, 3] + scores = np.array([[[1.0, 0.0, 1.0]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + # Scores mask tensor of shape [1, 1, 3] + scores_mask = np.array([[[True, True, False]]], dtype=np.bool_) + actual, actual_scores = BaseDenseAttention()._apply_scores( + scores=scores, value=v, scores_mask=scores_mask + ) + + # Expected softmax scores = softmax(scores) with zeros in positions + # where v_mask == False. + # => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 + # softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 + # softmax_scores002 = 0 + expected_scores = np.array( + [[[0.73105857863, 0.26894142137, 0.0]]], dtype=np.float32 + ) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8 + # = 1.35795272077 + expected = np.array([[[1.35795272077]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_multi_dim_no_mask(self): + # Scores tensor of shape [1, 1, 3] + scores = np.array([[[1.0, 0.0, 1.0]]], dtype=np.float32) + # Value tensor of shape [1, 3, 1] + v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) + actual, actual_scores = BaseDenseAttention()._apply_scores( + scores=scores, value=v + ) + + # Expected softmax_scores = softmax(scores). + # => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + # softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1)) + # = 0.15536240349 + # softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + expected_scores = np.array( + [[[0.42231879825, 0.15536240349, 0.42231879825]]], dtype=np.float32 + ) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [1, 1, 1]. + # expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7 + # - 0.42231879825 * 0.8 + # = 0.44660872104 + expected = np.array([[[0.44660872104]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_one_dim_batch_size_two(self): + # Scores tensor of shape [2, 1, 1] + scores = np.array([[[1.1]], [[2.1]]], dtype=np.float32) + # Value tensor of shape [2, 1, 1] + v = np.array([[[1.6]], [[2.6]]], dtype=np.float32) + # Scpres mask tensor of shape [2, 1, 1] + scores_mask = np.array([[[True]], [[True]]], dtype=np.bool_) + actual, actual_scores = BaseDenseAttention()._apply_scores( + scores=scores, value=v, scores_mask=scores_mask + ) + + # Expected softmax_scores = [[[1]], [[1]]] + expected_scores = np.array([[[1.0]], [[1.0]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) + # Expected tensor of shape [2, 1, 1]. + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + # expected100 = softmax_scores[1, 0] * 2.6 = 2.6 + expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32) + self.assertAllClose(expected, actual) + + def test_shape_with_dropout(self): + # scores: Scores float tensor of shape `[batch_size, tq, tv]`. + # value: Value tensor of shape `[batch_size, tv, dim]`. + batch_size = 4 + tq = 5 + tv = 6 + dim = 7 + scores = np.ones((batch_size, tq, tv)) + value = np.ones((batch_size, tv, dim)) + actual, actual_scores = BaseDenseAttention(dropout=0.1)._apply_scores( + scores=scores, value=value, training=False + ) + + # Expected Tensor of shape `[batch_size, tq, tv]`. + expected_scores_shape = [batch_size, tq, tv] + self.assertAllEqual(expected_scores_shape, tf.shape(actual_scores)) + # Expected Tensor of shape `[batch_size, tq, dim]`. + expected_shape = [batch_size, tq, dim] + self.assertAllEqual(expected_shape, tf.shape(actual)) + + def test_skip_rng_init_when_no_dropout(self): + batch_size = 4 + tq = 5 + tv = 6 + dim = 7 + scores = np.ones((batch_size, tq, tv)) + value = np.ones((batch_size, tv, dim)) + layer = BaseDenseAttention() + layer.build(None) # The input shape is not used by this layer + _, _ = layer._apply_scores(scores=scores, value=value, training=True) + # Make sure the rng is not built and no tf.random.Generator created. + self.assertFalse(layer._random_generator._built) + self.assertIsNone(getattr(layer._random_generator, "_generator", None)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LowerTriangularMaskTest(tf.test.TestCase, parameterized.TestCase): - - def test_square_shape(self): - actual = _lower_triangular_mask([3, 3]) - expected = np.array( - [[True, False, False], [True, True, False], [True, True, True]], - dtype=np.bool_) - self.assertAllEqual(expected, actual) - - def test_orthogonal_shape(self): - actual = _lower_triangular_mask([3, 2]) - expected = np.array([[True, False], [True, True], [True, True]], - dtype=np.bool_) - self.assertAllEqual(expected, actual) - - def test_three_dim(self): - actual = _lower_triangular_mask([1, 3, 3]) - expected = np.array( - [[[True, False, False], [True, True, False], [True, True, True]]], - dtype=np.bool_) - self.assertAllEqual(expected, actual) - - -if __name__ == '__main__': - tf.test.main() + def test_square_shape(self): + actual = _lower_triangular_mask([3, 3]) + expected = np.array( + [[True, False, False], [True, True, False], [True, True, True]], + dtype=np.bool_, + ) + self.assertAllEqual(expected, actual) + + def test_orthogonal_shape(self): + actual = _lower_triangular_mask([3, 2]) + expected = np.array( + [[True, False], [True, True], [True, True]], dtype=np.bool_ + ) + self.assertAllEqual(expected, actual) + + def test_three_dim(self): + actual = _lower_triangular_mask([1, 3, 3]) + expected = np.array( + [[[True, False, False], [True, True, False], [True, True, True]]], + dtype=np.bool_, + ) + self.assertAllEqual(expected, actual) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/attention/multi_head_attention.py b/keras/layers/attention/multi_head_attention.py index 49711f29099d..e2b5fc3d76e4 100644 --- a/keras/layers/attention/multi_head_attention.py +++ b/keras/layers/attention/multi_head_attention.py @@ -13,12 +13,15 @@ # limitations under the License. # ============================================================================== """Keras-based multi-head attention layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import collections import math import string +import numpy as np +import tensorflow.compat.v2 as tf + from keras import constraints from keras import initializers from keras import regularizers @@ -27,521 +30,702 @@ from keras.layers import core from keras.layers import regularization from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export - _CHR_IDX = string.ascii_lowercase def _build_attention_equation(rank, attn_axes): - """Builds einsum equations for the attention computation. - - Query, key, value inputs after projection are expected to have the shape as: - `(bs, , , num_heads, channels)`. - `bs` and `` are treated as ``. - - The attention operations can be generalized: - (1) Query-key dot product: - `(, , num_heads, channels), (, - , num_heads, channels) -> (, - num_heads, , )` - (2) Combination: - `(, num_heads, , ), - (, , num_heads, channels) -> (, - , num_heads, channels)` - - Args: - rank: Rank of query, key, value tensors. - attn_axes: List/tuple of axes, `[-1, rank)`, - that attention will be applied to. - - Returns: - Einsum equations. - """ - target_notation = _CHR_IDX[:rank] - # `batch_dims` includes the head dim. - batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,))) - letter_offset = rank - source_notation = "" - for i in range(rank): - if i in batch_dims or i == rank - 1: - source_notation += target_notation[i] - else: - source_notation += _CHR_IDX[letter_offset] - letter_offset += 1 - - product_notation = "".join([target_notation[i] for i in batch_dims] + - [target_notation[i] for i in attn_axes] + - [source_notation[i] for i in attn_axes]) - dot_product_equation = "%s,%s->%s" % (source_notation, target_notation, - product_notation) - attn_scores_rank = len(product_notation) - combine_equation = "%s,%s->%s" % (product_notation, source_notation, - target_notation) - return dot_product_equation, combine_equation, attn_scores_rank + """Builds einsum equations for the attention computation. + + Query, key, value inputs after projection are expected to have the shape as: + `(bs, , , num_heads, channels)`. + `bs` and `` are treated as ``. + + The attention operations can be generalized: + (1) Query-key dot product: + `(, , num_heads, channels), (, + , num_heads, channels) -> (, + num_heads, , )` + (2) Combination: + `(, num_heads, , ), + (, , num_heads, channels) -> (, , num_heads, channels)` + + Args: + rank: Rank of query, key, value tensors. + attn_axes: List/tuple of axes, `[-1, rank)`, + that attention will be applied to. + + Returns: + Einsum equations. + """ + target_notation = _CHR_IDX[:rank] + # `batch_dims` includes the head dim. + batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,))) + letter_offset = rank + source_notation = "" + for i in range(rank): + if i in batch_dims or i == rank - 1: + source_notation += target_notation[i] + else: + source_notation += _CHR_IDX[letter_offset] + letter_offset += 1 + + product_notation = "".join( + [target_notation[i] for i in batch_dims] + + [target_notation[i] for i in attn_axes] + + [source_notation[i] for i in attn_axes] + ) + dot_product_equation = "%s,%s->%s" % ( + source_notation, + target_notation, + product_notation, + ) + attn_scores_rank = len(product_notation) + combine_equation = "%s,%s->%s" % ( + product_notation, + source_notation, + target_notation, + ) + return dot_product_equation, combine_equation, attn_scores_rank def _build_proj_equation(free_dims, bound_dims, output_dims): - """Builds an einsum equation for projections inside multi-head attention.""" - input_str = "" - kernel_str = "" - output_str = "" - bias_axes = "" - letter_offset = 0 - for i in range(free_dims): - char = _CHR_IDX[i + letter_offset] - input_str += char - output_str += char - - letter_offset += free_dims - for i in range(bound_dims): - char = _CHR_IDX[i + letter_offset] - input_str += char - kernel_str += char - - letter_offset += bound_dims - for i in range(output_dims): - char = _CHR_IDX[i + letter_offset] - kernel_str += char - output_str += char - bias_axes += char - equation = "%s,%s->%s" % (input_str, kernel_str, output_str) - - return equation, bias_axes, len(output_str) + """Builds an einsum equation for projections inside multi-head attention.""" + input_str = "" + kernel_str = "" + output_str = "" + bias_axes = "" + letter_offset = 0 + for i in range(free_dims): + char = _CHR_IDX[i + letter_offset] + input_str += char + output_str += char + + letter_offset += free_dims + for i in range(bound_dims): + char = _CHR_IDX[i + letter_offset] + input_str += char + kernel_str += char + + letter_offset += bound_dims + for i in range(output_dims): + char = _CHR_IDX[i + letter_offset] + kernel_str += char + output_str += char + bias_axes += char + equation = f"{input_str},{kernel_str}->{output_str}" + + return equation, bias_axes, len(output_str) def _get_output_shape(output_rank, known_last_dims): - return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims) + return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims) @keras_export("keras.layers.MultiHeadAttention") class MultiHeadAttention(Layer): - """MultiHeadAttention layer. - - This is an implementation of multi-headed attention as described in the paper - "Attention is all you Need" (Vaswani et al., 2017). - If `query`, `key,` `value` are the same, then - this is self-attention. Each timestep in `query` attends to the - corresponding sequence in `key`, and returns a fixed-width vector. - - This layer first projects `query`, `key` and `value`. These are - (effectively) a list of tensors of length `num_attention_heads`, where the - corresponding shapes are `(batch_size, , key_dim)`, - `(batch_size, , key_dim)`, - `(batch_size, , value_dim)`. - - Then, the query and key tensors are dot-producted and scaled. These are - softmaxed to obtain attention probabilities. The value tensors are then - interpolated by these probabilities, then concatenated back to a single - tensor. - - Finally, the result tensor with the last dimension as value_dim can take an - linear projection and return. - - When using MultiHeadAttention inside a custom Layer, the custom Layer must - implement `build()` and call MultiHeadAttention's `_build_from_signature()`. - This enables weights to be restored correctly when the model is loaded. - TODO(b/172609172): link to documentation about calling custom build functions - when used in a custom Layer. - - Examples: - - Performs 1D cross-attention over two sequence inputs with an attention mask. - Returns the additional attention weights over heads. - - >>> layer = MultiHeadAttention(num_heads=2, key_dim=2) - >>> target = tf.keras.Input(shape=[8, 16]) - >>> source = tf.keras.Input(shape=[4, 16]) - >>> output_tensor, weights = layer(target, source, - ... return_attention_scores=True) - >>> print(output_tensor.shape) - (None, 8, 16) - >>> print(weights.shape) - (None, 2, 8, 4) - - Performs 2D self-attention over a 5D input tensor on axes 2 and 3. - - >>> layer = MultiHeadAttention(num_heads=2, key_dim=2, attention_axes=(2, 3)) - >>> input_tensor = tf.keras.Input(shape=[5, 3, 4, 16]) - >>> output_tensor = layer(input_tensor, input_tensor) - >>> print(output_tensor.shape) - (None, 5, 3, 4, 16) - - Args: - num_heads: Number of attention heads. - key_dim: Size of each attention head for query and key. - value_dim: Size of each attention head for value. - dropout: Dropout probability. - use_bias: Boolean, whether the dense layers use bias vectors/matrices. - output_shape: The expected shape of an output tensor, besides the batch and - sequence dims. If not specified, projects back to the key feature dim. - attention_axes: axes over which the attention is applied. `None` means - attention over all axes, but batch, heads, and features. - kernel_initializer: Initializer for dense layer kernels. - bias_initializer: Initializer for dense layer biases. - kernel_regularizer: Regularizer for dense layer kernels. - bias_regularizer: Regularizer for dense layer biases. - activity_regularizer: Regularizer for dense layer activity. - kernel_constraint: Constraint for dense layer kernels. - bias_constraint: Constraint for dense layer kernels. - - Call arguments: - query: Query `Tensor` of shape `(B, T, dim)`. - value: Value `Tensor` of shape `(B, S, dim)`. - key: Optional key `Tensor` of shape `(B, S, dim)`. If not given, will use - `value` for both `key` and `value`, which is the most common case. - attention_mask: a boolean mask of shape `(B, T, S)`, that prevents - attention to certain positions. The boolean mask specifies which query - elements can attend to which key elements, 1 indicates attention and 0 - indicates no attention. Broadcasting can happen for the missing batch - dimensions and the head dimension. - return_attention_scores: A boolean to indicate whether the output should - be `(attention_output, attention_scores)` if `True`, or `attention_output` - if `False`. Defaults to `False`. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (no dropout). - Defaults to either using the training mode of the parent layer/model, - or False (inference) if there is no parent layer. - - Returns: - attention_output: The result of the computation, of shape `(B, T, E)`, - where `T` is for target sequence shapes and `E` is the query input last - dimension if `output_shape` is `None`. Otherwise, the multi-head outputs - are project to the shape specified by `output_shape`. - attention_scores: [Optional] multi-head attention coefficients over - attention axes. - """ - - def __init__(self, - num_heads, - key_dim, - value_dim=None, - dropout=0.0, - use_bias=True, - output_shape=None, - attention_axes=None, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__(**kwargs) - self._num_heads = num_heads - self._key_dim = key_dim - self._value_dim = value_dim if value_dim else key_dim - self._dropout = dropout - self._use_bias = use_bias - self._output_shape = output_shape - self._kernel_initializer = initializers.get(kernel_initializer) - self._bias_initializer = initializers.get(bias_initializer) - self._kernel_regularizer = regularizers.get(kernel_regularizer) - self._bias_regularizer = regularizers.get(bias_regularizer) - self._activity_regularizer = regularizers.get(activity_regularizer) - self._kernel_constraint = constraints.get(kernel_constraint) - self._bias_constraint = constraints.get(bias_constraint) - if attention_axes is not None and not isinstance(attention_axes, - collections.abc.Sized): - self._attention_axes = (attention_axes,) - else: - self._attention_axes = attention_axes - self._built_from_signature = False - self._query_shape, self._key_shape, self._value_shape = None, None, None - - def get_config(self): - config = { - "num_heads": self._num_heads, - "key_dim": self._key_dim, - "value_dim": self._value_dim, - "dropout": self._dropout, - "use_bias": self._use_bias, - "output_shape": self._output_shape, - "attention_axes": self._attention_axes, - "kernel_initializer": - initializers.serialize(self._kernel_initializer), - "bias_initializer": - initializers.serialize(self._bias_initializer), - "kernel_regularizer": - regularizers.serialize(self._kernel_regularizer), - "bias_regularizer": - regularizers.serialize(self._bias_regularizer), - "activity_regularizer": - regularizers.serialize(self._activity_regularizer), - "kernel_constraint": - constraints.serialize(self._kernel_constraint), - "bias_constraint": - constraints.serialize(self._bias_constraint), - "query_shape": self._query_shape, - "key_shape": self._key_shape, - "value_shape": self._value_shape, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - # If the layer has a different build() function from the Keras default, - # we need to trigger the customized build to create weights. - query_shape = config.pop("query_shape") - key_shape = config.pop("key_shape") - value_shape = config.pop("value_shape") - layer = cls(**config) - if None in [query_shape, key_shape, value_shape]: - logging.warning( - "One of dimensions of the input shape is missing. It should have been" - " memorized when the layer was serialized. " - "%s is created without weights.", - str(cls)) - else: - layer._build_from_signature(query_shape, value_shape, key_shape) # pylint: disable=protected-access - return layer - - def _build_from_signature(self, query, value, key=None): - """Builds layers and variables. - - Once the method is called, self._built_from_signature will be set to True. - - Args: - query: Query tensor or TensorShape. - value: Value tensor or TensorShape. - key: Key tensor or TensorShape. - """ - self._built_from_signature = True - if hasattr(query, "shape"): - self._query_shape = tf.TensorShape(query.shape) - else: - self._query_shape = tf.TensorShape(query) - if hasattr(value, "shape"): - self._value_shape = tf.TensorShape(value.shape) - else: - self._value_shape = tf.TensorShape(value) - if key is None: - self._key_shape = self._value_shape - elif hasattr(key, "shape"): - self._key_shape = tf.TensorShape(key.shape) - else: - self._key_shape = tf.TensorShape(key) - - # Any setup work performed only once should happen in an `init_scope` - # to avoid creating symbolic Tensors that will later pollute any eager - # operations. - with tf_utils.maybe_init_scope(self): - free_dims = self._query_shape.rank - 1 - einsum_equation, bias_axes, output_rank = _build_proj_equation( - free_dims, bound_dims=1, output_dims=2) - self._query_dense = core.EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, - [self._num_heads, self._key_dim]), - bias_axes=bias_axes if self._use_bias else None, - name="query", - **self._get_common_kwargs_for_sublayer()) - einsum_equation, bias_axes, output_rank = _build_proj_equation( - self._key_shape.rank - 1, bound_dims=1, output_dims=2) - self._key_dense = core.EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, - [self._num_heads, self._key_dim]), - bias_axes=bias_axes if self._use_bias else None, - name="key", - **self._get_common_kwargs_for_sublayer()) - einsum_equation, bias_axes, output_rank = _build_proj_equation( - self._value_shape.rank - 1, bound_dims=1, output_dims=2) - self._value_dense = core.EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, - [self._num_heads, self._value_dim]), - bias_axes=bias_axes if self._use_bias else None, - name="value", - **self._get_common_kwargs_for_sublayer()) - - # Builds the attention computations for multi-head dot product attention. - # These computations could be wrapped into the keras attention layer once - # it supports mult-head einsum computations. - self._build_attention(output_rank) - self._output_dense = self._make_output_dense( - free_dims, self._get_common_kwargs_for_sublayer(), - "attention_output") - - def _get_common_kwargs_for_sublayer(self): - common_kwargs = dict( - kernel_regularizer=self._kernel_regularizer, - bias_regularizer=self._bias_regularizer, - activity_regularizer=self._activity_regularizer, - kernel_constraint=self._kernel_constraint, - bias_constraint=self._bias_constraint) - # Create new clone of kernel/bias initializer, so that we don't reuse the - # initializer instance, which could lead to same init value since - # initializer is stateless. - kernel_initializer = self._kernel_initializer.__class__.from_config( - self._kernel_initializer.get_config()) - bias_initializer = self._bias_initializer.__class__.from_config( - self._bias_initializer.get_config()) - common_kwargs['kernel_initializer'] = kernel_initializer - common_kwargs['bias_initializer'] = bias_initializer - return common_kwargs - - def _make_output_dense(self, free_dims, common_kwargs, name=None): - """Builds the output projection matrix. + """MultiHeadAttention layer. + + This is an implementation of multi-headed attention as described in the + paper "Attention is all you Need" (Vaswani et al., 2017). + If `query`, `key,` `value` are the same, then + this is self-attention. Each timestep in `query` attends to the + corresponding sequence in `key`, and returns a fixed-width vector. + + This layer first projects `query`, `key` and `value`. These are + (effectively) a list of tensors of length `num_attention_heads`, where the + corresponding shapes are `(batch_size, , key_dim)`, + `(batch_size, , key_dim)`, + `(batch_size, , value_dim)`. + + Then, the query and key tensors are dot-producted and scaled. These are + softmaxed to obtain attention probabilities. The value tensors are then + interpolated by these probabilities, then concatenated back to a single + tensor. + + Finally, the result tensor with the last dimension as value_dim can take an + linear projection and return. + + When using `MultiHeadAttention` inside a custom layer, the custom layer must + implement its own `build()` method and call `MultiHeadAttention`'s + `_build_from_signature()` there. + This enables weights to be restored correctly when the model is loaded. + + Examples: + + Performs 1D cross-attention over two sequence inputs with an attention mask. + Returns the additional attention weights over heads. + + >>> layer = MultiHeadAttention(num_heads=2, key_dim=2) + >>> target = tf.keras.Input(shape=[8, 16]) + >>> source = tf.keras.Input(shape=[4, 16]) + >>> output_tensor, weights = layer(target, source, + ... return_attention_scores=True) + >>> print(output_tensor.shape) + (None, 8, 16) + >>> print(weights.shape) + (None, 2, 8, 4) + + Performs 2D self-attention over a 5D input tensor on axes 2 and 3. + + >>> layer = MultiHeadAttention( + ... num_heads=2, key_dim=2, attention_axes=(2, 3)) + >>> input_tensor = tf.keras.Input(shape=[5, 3, 4, 16]) + >>> output_tensor = layer(input_tensor, input_tensor) + >>> print(output_tensor.shape) + (None, 5, 3, 4, 16) Args: - free_dims: Number of free dimensions for einsum equation building. - common_kwargs: Common keyword arguments for einsum layer. - name: Name for the projection layer. + num_heads: Number of attention heads. + key_dim: Size of each attention head for query and key. + value_dim: Size of each attention head for value. + dropout: Dropout probability. + use_bias: Boolean, whether the dense layers use bias vectors/matrices. + output_shape: The expected shape of an output tensor, besides the batch + and sequence dims. If not specified, projects back to the query + feature dim (the query input's last dimension). + attention_axes: axes over which the attention is applied. `None` means + attention over all axes, but batch, heads, and features. + kernel_initializer: Initializer for dense layer kernels. + bias_initializer: Initializer for dense layer biases. + kernel_regularizer: Regularizer for dense layer kernels. + bias_regularizer: Regularizer for dense layer biases. + activity_regularizer: Regularizer for dense layer activity. + kernel_constraint: Constraint for dense layer kernels. + bias_constraint: Constraint for dense layer kernels. + + Call arguments: + query: Query `Tensor` of shape `(B, T, dim)`. + value: Value `Tensor` of shape `(B, S, dim)`. + key: Optional key `Tensor` of shape `(B, S, dim)`. If not given, will + use `value` for both `key` and `value`, which is the most common + case. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. The boolean mask specifies which + query elements can attend to which key elements, 1 indicates + attention and 0 indicates no attention. Broadcasting can happen for + the missing batch dimensions and the head dimension. + return_attention_scores: A boolean to indicate whether the output should + be `(attention_output, attention_scores)` if `True`, or + `attention_output` if `False`. Defaults to `False`. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + Will go with either using the training mode of the parent + layer/model, or False (inference) if there is no parent layer. + use_causal_mask: A boolean to indicate whether to apply a causal mask to + prevent tokens from attending to future tokens (e.g., used in a + decoder Transformer). Returns: - Projection layer. + attention_output: The result of the computation, of shape `(B, T, E)`, + where `T` is for target sequence shapes and `E` is the query input + last dimension if `output_shape` is `None`. Otherwise, the + multi-head outputs are projected to the shape specified by + `output_shape`. + attention_scores: [Optional] multi-head attention coefficients over + attention axes. """ - if self._output_shape: - if not isinstance(self._output_shape, collections.abc.Sized): - output_shape = [self._output_shape] - else: - output_shape = self._output_shape - else: - output_shape = [self._query_shape[-1]] - einsum_equation, bias_axes, output_rank = _build_proj_equation( - free_dims, bound_dims=2, output_dims=len(output_shape)) - return core.EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, output_shape), - bias_axes=bias_axes if self._use_bias else None, - name=name, - **common_kwargs) - - def _build_attention(self, rank): - """Builds multi-head dot-product attention computations. - - This function builds attributes necessary for `_compute_attention` to - costomize attention computation to replace the default dot-product - attention. - Args: - rank: the rank of query, key, value tensors. - """ - if self._attention_axes is None: - self._attention_axes = tuple(range(1, rank - 2)) - else: - self._attention_axes = tuple(self._attention_axes) - self._dot_product_equation, self._combine_equation, attn_scores_rank = ( - _build_attention_equation(rank, attn_axes=self._attention_axes)) - norm_axes = tuple( - range(attn_scores_rank - len(self._attention_axes), attn_scores_rank)) - self._softmax = activation.Softmax(axis=norm_axes) - self._dropout_layer = regularization.Dropout(rate=self._dropout) - - def _masked_softmax(self, attention_scores, attention_mask=None): - # Normalize the attention scores to probabilities. - # `attention_scores` = [B, N, T, S] - if attention_mask is not None: - # The expand dim happens starting from the `num_heads` dimension, - # (, num_heads, ) - mask_expansion_axis = -len(self._attention_axes) * 2 - 1 - for _ in range(len(attention_scores.shape) - len(attention_mask.shape)): - attention_mask = tf.expand_dims( - attention_mask, axis=mask_expansion_axis) - return self._softmax(attention_scores, attention_mask) - - def _compute_attention(self, - query, - key, - value, - attention_mask=None, - training=None): - """Applies Dot-product attention with query, key, value tensors. - - This function defines the computation inside `call` with projected - multi-head Q, K, V inputs. Users can override this function for customized - attention implementation. - - Args: - query: Projected query `Tensor` of shape `(B, T, N, key_dim)`. - key: Projected key `Tensor` of shape `(B, T, N, key_dim)`. - value: Projected value `Tensor` of shape `(B, T, N, value_dim)`. - attention_mask: a boolean mask of shape `(B, T, S)`, that prevents - attention to certain positions. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - - Returns: - attention_output: Multi-headed outputs of attention computation. - attention_scores: Multi-headed attention weights. - """ - # Note: Applying scalar multiply at the smaller end of einsum improves - # XLA performance, but may introduce slight numeric differences in - # the Transformer attention head. - query = tf.multiply(query, 1.0 / math.sqrt(float(self._key_dim))) - - # Take the dot product between "query" and "key" to get the raw - # attention scores. - attention_scores = tf.einsum(self._dot_product_equation, key, query) - - attention_scores = self._masked_softmax(attention_scores, attention_mask) - - # This is actually dropping out entire tokens to attend to, which might - # seem a bit unusual, but is taken from the original Transformer paper. - attention_scores_dropout = self._dropout_layer( - attention_scores, training=training) - - # `context_layer` = [B, T, N, H] - attention_output = tf.einsum(self._combine_equation, - attention_scores_dropout, value) - return attention_output, attention_scores - - def call(self, - query, - value, - key=None, - attention_mask=None, - return_attention_scores=False, - training=None): - if not self._built_from_signature: - self._build_from_signature(query=query, value=value, key=key) - if key is None: - key = value - - query_is_ragged = isinstance(query, tf.RaggedTensor) - if query_is_ragged: - query_lengths = query.nested_row_lengths() - query = query.to_tensor() - - key_is_ragged = isinstance(key, tf.RaggedTensor) - value_is_ragged = isinstance(value, tf.RaggedTensor) - if key_is_ragged and value_is_ragged: - # Ensure they have the same shape. - bounding_shape = tf.math.maximum( - key.bounding_shape(), value.bounding_shape()) - key = key.to_tensor(shape=bounding_shape) - value = value.to_tensor(shape=bounding_shape) - elif key_is_ragged: - key = key.to_tensor(shape=tf.shape(value)) - elif value_is_ragged: - value = value.to_tensor(shape=tf.shape(key)) - - # N = `num_attention_heads` - # H = `size_per_head` - # `query` = [B, T, N ,H] - query = self._query_dense(query) - - # `key` = [B, S, N, H] - key = self._key_dense(key) - - # `value` = [B, S, N, H] - value = self._value_dense(value) - - attention_output, attention_scores = self._compute_attention( - query, key, value, attention_mask, training) - attention_output = self._output_dense(attention_output) - - if query_is_ragged: - attention_output = tf.RaggedTensor.from_tensor( - attention_output, lengths=query_lengths) - - if return_attention_scores: - return attention_output, attention_scores - return attention_output + def __init__( + self, + num_heads, + key_dim, + value_dim=None, + dropout=0.0, + use_bias=True, + output_shape=None, + attention_axes=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__(**kwargs) + self.supports_masking = True + self._num_heads = num_heads + self._key_dim = key_dim + self._value_dim = value_dim if value_dim else key_dim + self._dropout = dropout + self._use_bias = use_bias + self._output_shape = output_shape + self._kernel_initializer = initializers.get(kernel_initializer) + self._bias_initializer = initializers.get(bias_initializer) + self._kernel_regularizer = regularizers.get(kernel_regularizer) + self._bias_regularizer = regularizers.get(bias_regularizer) + self._activity_regularizer = regularizers.get(activity_regularizer) + self._kernel_constraint = constraints.get(kernel_constraint) + self._bias_constraint = constraints.get(bias_constraint) + if attention_axes is not None and not isinstance( + attention_axes, collections.abc.Sized + ): + self._attention_axes = (attention_axes,) + else: + self._attention_axes = attention_axes + self._built_from_signature = False + self._query_shape, self._key_shape, self._value_shape = None, None, None + + def get_config(self): + config = { + "num_heads": self._num_heads, + "key_dim": self._key_dim, + "value_dim": self._value_dim, + "dropout": self._dropout, + "use_bias": self._use_bias, + "output_shape": self._output_shape, + "attention_axes": self._attention_axes, + "kernel_initializer": initializers.serialize( + self._kernel_initializer + ), + "bias_initializer": initializers.serialize(self._bias_initializer), + "kernel_regularizer": regularizers.serialize( + self._kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self._bias_regularizer), + "activity_regularizer": regularizers.serialize( + self._activity_regularizer + ), + "kernel_constraint": constraints.serialize(self._kernel_constraint), + "bias_constraint": constraints.serialize(self._bias_constraint), + "query_shape": self._query_shape, + "key_shape": self._key_shape, + "value_shape": self._value_shape, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + # If the layer has a different build() function from the Keras default, + # we need to trigger the customized build to create weights. + query_shape = config.pop("query_shape") + key_shape = config.pop("key_shape") + value_shape = config.pop("value_shape") + layer = cls(**config) + if None in [query_shape, key_shape, value_shape]: + logging.warning( + "One of dimensions of the input shape is missing. It " + "should have been memorized when the layer was serialized. " + "%s is created without weights.", + str(cls), + ) + else: + layer._build_from_signature(query_shape, value_shape, key_shape) + return layer + + def _build_from_signature(self, query, value, key=None): + """Builds layers and variables. + + Once the method is called, self._built_from_signature will be set to + True. + + Args: + query: Query tensor or TensorShape. + value: Value tensor or TensorShape. + key: Key tensor or TensorShape. + """ + self._built_from_signature = True + if hasattr(query, "shape"): + self._query_shape = tf.TensorShape(query.shape) + else: + self._query_shape = tf.TensorShape(query) + if hasattr(value, "shape"): + self._value_shape = tf.TensorShape(value.shape) + else: + self._value_shape = tf.TensorShape(value) + if key is None: + self._key_shape = self._value_shape + elif hasattr(key, "shape"): + self._key_shape = tf.TensorShape(key.shape) + else: + self._key_shape = tf.TensorShape(key) + + # Any setup work performed only once should happen in an `init_scope` + # to avoid creating symbolic Tensors that will later pollute any eager + # operations. + with tf_utils.maybe_init_scope(self): + free_dims = self._query_shape.rank - 1 + einsum_equation, bias_axes, output_rank = _build_proj_equation( + free_dims, bound_dims=1, output_dims=2 + ) + self._query_dense = core.EinsumDense( + einsum_equation, + output_shape=_get_output_shape( + output_rank - 1, [self._num_heads, self._key_dim] + ), + bias_axes=bias_axes if self._use_bias else None, + name="query", + **self._get_common_kwargs_for_sublayer(), + ) + einsum_equation, bias_axes, output_rank = _build_proj_equation( + self._key_shape.rank - 1, bound_dims=1, output_dims=2 + ) + self._key_dense = core.EinsumDense( + einsum_equation, + output_shape=_get_output_shape( + output_rank - 1, [self._num_heads, self._key_dim] + ), + bias_axes=bias_axes if self._use_bias else None, + name="key", + **self._get_common_kwargs_for_sublayer(), + ) + einsum_equation, bias_axes, output_rank = _build_proj_equation( + self._value_shape.rank - 1, bound_dims=1, output_dims=2 + ) + self._value_dense = core.EinsumDense( + einsum_equation, + output_shape=_get_output_shape( + output_rank - 1, [self._num_heads, self._value_dim] + ), + bias_axes=bias_axes if self._use_bias else None, + name="value", + **self._get_common_kwargs_for_sublayer(), + ) + + # Builds the attention computations for multi-head dot product + # attention. These computations could be wrapped into the keras + # attention layer once it supports mult-head einsum computations. + self._build_attention(output_rank) + self._output_dense = self._make_output_dense( + free_dims, + self._get_common_kwargs_for_sublayer(), + "attention_output", + ) + + def _get_common_kwargs_for_sublayer(self): + common_kwargs = dict( + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint, + dtype=self._dtype_policy, + ) + # Create new clone of kernel/bias initializer, so that we don't reuse + # the initializer instance, which could lead to same init value since + # initializer is stateless. + kernel_initializer = self._kernel_initializer.__class__.from_config( + self._kernel_initializer.get_config() + ) + bias_initializer = self._bias_initializer.__class__.from_config( + self._bias_initializer.get_config() + ) + common_kwargs["kernel_initializer"] = kernel_initializer + common_kwargs["bias_initializer"] = bias_initializer + return common_kwargs + + def _make_output_dense(self, free_dims, common_kwargs, name=None): + """Builds the output projection matrix. + + Args: + free_dims: Number of free dimensions for einsum equation building. + common_kwargs: Common keyword arguments for einsum layer. + name: Name for the projection layer. + + Returns: + Projection layer. + """ + if self._output_shape: + if not isinstance(self._output_shape, collections.abc.Sized): + output_shape = [self._output_shape] + else: + output_shape = self._output_shape + else: + output_shape = [self._query_shape[-1]] + einsum_equation, bias_axes, output_rank = _build_proj_equation( + free_dims, bound_dims=2, output_dims=len(output_shape) + ) + return core.EinsumDense( + einsum_equation, + output_shape=_get_output_shape(output_rank - 1, output_shape), + bias_axes=bias_axes if self._use_bias else None, + name=name, + **common_kwargs, + ) + + def _build_attention(self, rank): + """Builds multi-head dot-product attention computations. + + This function builds attributes necessary for `_compute_attention` to + customize attention computation to replace the default dot-product + attention. + + Args: + rank: the rank of query, key, value tensors. + """ + if self._attention_axes is None: + self._attention_axes = tuple(range(1, rank - 2)) + else: + self._attention_axes = tuple(self._attention_axes) + ( + self._dot_product_equation, + self._combine_equation, + attn_scores_rank, + ) = _build_attention_equation(rank, attn_axes=self._attention_axes) + norm_axes = tuple( + range( + attn_scores_rank - len(self._attention_axes), attn_scores_rank + ) + ) + self._softmax = activation.Softmax( + axis=norm_axes, dtype=self._dtype_policy + ) + self._dropout_layer = regularization.Dropout( + rate=self._dropout, dtype=self._dtype_policy + ) + + def _masked_softmax(self, attention_scores, attention_mask=None): + # Normalize the attention scores to probabilities. + # `attention_scores` = [B, N, T, S] + if attention_mask is not None: + # The expand dim happens starting from the `num_heads` dimension, + # (, num_heads, ) + mask_expansion_axis = -len(self._attention_axes) * 2 - 1 + for _ in range( + len(attention_scores.shape) - len(attention_mask.shape) + ): + attention_mask = tf.expand_dims( + attention_mask, axis=mask_expansion_axis + ) + return self._softmax(attention_scores, attention_mask) + + def _compute_attention( + self, query, key, value, attention_mask=None, training=None + ): + """Applies Dot-product attention with query, key, value tensors. + + This function defines the computation inside `call` with projected + multi-head Q, K, V inputs. Users can override this function for + customized attention implementation. + + Args: + query: Projected query `Tensor` of shape `(B, T, N, key_dim)`. + key: Projected key `Tensor` of shape `(B, S, N, key_dim)`. + value: Projected value `Tensor` of shape `(B, S, N, value_dim)`. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. It is generally not needed if + the `query` and `value` (and/or `key`) are masked. + training: Python boolean indicating whether the layer should behave + in training mode (adding dropout) or in inference mode (doing + nothing). + + Returns: + attention_output: Multi-headed outputs of attention computation. + attention_scores: Multi-headed attention weights. + """ + # Note: Applying scalar multiply at the smaller end of einsum improves + # XLA performance, but may introduce slight numeric differences in + # the Transformer attention head. + query = tf.multiply(query, 1.0 / math.sqrt(float(self._key_dim))) + + # Take the dot product between "query" and "key" to get the raw + # attention scores. + attention_scores = tf.einsum(self._dot_product_equation, key, query) + attention_scores = self._masked_softmax( + attention_scores, attention_mask + ) + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_scores_dropout = self._dropout_layer( + attention_scores, training=training + ) + # `context_layer` = [B, T, N, H] + attention_output = tf.einsum( + self._combine_equation, attention_scores_dropout, value + ) + return attention_output, attention_scores + + def call( + self, + query, + value, + key=None, + attention_mask=None, + return_attention_scores=False, + training=None, + use_causal_mask=False, + ): + if not self._built_from_signature: + self._build_from_signature(query=query, value=value, key=key) + if key is None: + key = value + + # Convert RaggedTensor to Tensor. + query_is_ragged = isinstance(query, tf.RaggedTensor) + if query_is_ragged: + query_lengths = query.nested_row_lengths() + query = query.to_tensor() + key_is_ragged = isinstance(key, tf.RaggedTensor) + value_is_ragged = isinstance(value, tf.RaggedTensor) + if key_is_ragged and value_is_ragged: + # Ensure they have the same shape. + bounding_shape = tf.math.maximum( + key.bounding_shape(), value.bounding_shape() + ) + key = key.to_tensor(shape=bounding_shape) + value = value.to_tensor(shape=bounding_shape) + elif key_is_ragged: + key = key.to_tensor(shape=tf.shape(value)) + elif value_is_ragged: + value = value.to_tensor(shape=tf.shape(key)) + + attention_mask = self._compute_attention_mask( + query, + value, + key=key, + attention_mask=attention_mask, + use_causal_mask=use_causal_mask, + ) + + # N = `num_attention_heads` + # H = `size_per_head` + # `query` = [B, T, N ,H] + query = self._query_dense(query) + + # `key` = [B, S, N, H] + key = self._key_dense(key) + + # `value` = [B, S, N, H] + value = self._value_dense(value) + + attention_output, attention_scores = self._compute_attention( + query, key, value, attention_mask, training + ) + attention_output = self._output_dense(attention_output) + + if query_is_ragged: + attention_output = tf.RaggedTensor.from_tensor( + attention_output, lengths=query_lengths + ) + + if return_attention_scores: + return attention_output, attention_scores + return attention_output + + def _compute_attention_mask( + self, query, value, key=None, attention_mask=None, use_causal_mask=False + ): + """Computes the attention mask, using the Keras masks of the inputs. + + * The `query`'s mask is reshaped from [B, T] to [B, T, 1]. + * The `value`'s mask is reshaped from [B, S] to [B, 1, S]. + * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s + mask is ignored if `key` is `None` or if `key is value`. + * If `use_causal_mask=True`, then the causal mask is computed. Its shape + is [1, T, S]. + + All defined masks are merged using a logical AND operation (`&`). + + In general, if the `query` and `value` are masked, then there is no need + to define the `attention_mask`. + + Args: + query: Projected query `Tensor` of shape `(B, T, N, key_dim)`. + key: Projected key `Tensor` of shape `(B, T, N, key_dim)`. + value: Projected value `Tensor` of shape `(B, T, N, value_dim)`. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. + use_causal_mask: A boolean to indicate whether to apply a causal + mask to prevent tokens from attending to future tokens (e.g., + used in a decoder Transformer). + + Returns: + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions, based on the Keras masks of the + `query`, `key`, `value`, and `attention_mask` tensors, and the + causal mask if `use_causal_mask=True`. + """ + query_mask = getattr(query, "_keras_mask", None) + value_mask = getattr(value, "_keras_mask", None) + key_mask = getattr(key, "_keras_mask", None) + auto_mask = None + if query_mask is not None: + query_mask = tf.cast(query_mask, tf.bool) # defensive casting + # B = batch size, T = max query length + auto_mask = query_mask[:, :, tf.newaxis] # shape is [B, T, 1] + if value_mask is not None: + value_mask = tf.cast(value_mask, tf.bool) # defensive casting + # B = batch size, S == max value length + mask = value_mask[:, tf.newaxis, :] # shape is [B, 1, S] + auto_mask = mask if auto_mask is None else auto_mask & mask + if key_mask is not None: + key_mask = tf.cast(key_mask, tf.bool) # defensive casting + # B == batch size, S == max key length == max value length + mask = key_mask[:, tf.newaxis, :] # shape is [B, 1, S] + auto_mask = mask if auto_mask is None else auto_mask & mask + if use_causal_mask: + # the shape of the causal mask is [1, T, S] + mask = self._compute_causal_mask(query, value) + auto_mask = mask if auto_mask is None else auto_mask & mask + if auto_mask is not None: + # merge attention_mask & automatic mask, to shape [B, T, S] + attention_mask = ( + auto_mask + if attention_mask is None + else tf.cast(attention_mask, bool) & auto_mask + ) + return attention_mask + + def _compute_causal_mask(self, query, value=None): + """Computes a causal mask (e.g., for masked self-attention layers). + + For example, if query and value both contain sequences of length 4, + this function returns a boolean `Tensor` equal to: + + ``` + [[[True, False, False, False], + [True, True, False, False], + [True, True, True, False], + [True, True, True, True]]] + ``` + + Args: + query: query `Tensor` of shape `(B, T, ...)`. + value: value `Tensor` of shape `(B, S, ...)` (optional, defaults to + query). + + Returns: + mask: a boolean `Tensor` of shape [1, T, S] containing a lower + triangular matrix of shape [T, S]. + """ + q_seq_length = tf.shape(query)[1] + v_seq_length = q_seq_length if value is None else tf.shape(value)[1] + return tf.linalg.band_part( # creates a lower triangular matrix + tf.ones((1, q_seq_length, v_seq_length), tf.bool), -1, 0 + ) + + def compute_output_shape(self, query_shape, value_shape, key_shape=None): + if key_shape is None: + key_shape = value_shape + + query_shape = tf.TensorShape(query_shape) + value_shape = tf.TensorShape(value_shape) + key_shape = tf.TensorShape(key_shape) + + if query_shape[-1] != value_shape[-1]: + raise ValueError( + "The last dimension of `query_shape` and `value_shape` " + f"must be equal, but are {query_shape[-1]}, {value_shape[-1]}. " + "Received: query_shape={query_shape}, value_shape={value_shape}" + ) + + if value_shape[1:-1] != key_shape[1:-1]: + raise ValueError( + "All dimensions of `value` and `key`, except the last one, " + f"must be equal. Received {value_shape} and " + f"{key_shape}" + ) + + if self._output_shape: + return query_shape[:-1].concatenate(self._output_shape) + + return query_shape diff --git a/keras/layers/attention/multi_head_attention_test.py b/keras/layers/attention/multi_head_attention_test.py index fcd73cd4d194..aa4d15aed6f5 100644 --- a/keras/layers/attention/multi_head_attention_test.py +++ b/keras/layers/attention/multi_head_attention_test.py @@ -14,358 +14,611 @@ # ============================================================================== """Tests for the MultiHeadAttention layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras +from keras.saving import object_registration from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf +from keras.testing_infra import test_utils # This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It # guarantees forward compatibility of this code for the V2 switchover. @test_combinations.run_all_keras_modes class MultiHeadAttentionTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ("key_value_same_proj", None, None, [40, 80]), - ("key_value_different_proj", 32, 60, [40, 60]), - ) - def test_non_masked_attention(self, value_dim, output_shape, output_dims): - """Test that the attention layer can be created without a mask tensor.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=12, - key_dim=64, - value_dim=value_dim, - output_shape=output_shape) - # Create a 3-dimensional input (the first dimension is implicit). - query = keras.Input(shape=(40, 80)) - value = keras.Input(shape=(20, 80)) - output = test_layer(query=query, value=value) - self.assertEqual(output.shape.as_list(), [None] + output_dims) - - def test_non_masked_self_attention(self): - """Test with one input (self-attenntion) and no mask tensor.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=12, key_dim=64) - # Create a 3-dimensional input (the first dimension is implicit). - query = keras.Input(shape=(40, 80)) - output = test_layer(query, query) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - - def test_attention_scores(self): - """Test attention outputs with coefficients.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=12, key_dim=64) - # Create a 3-dimensional input (the first dimension is implicit). - query = keras.Input(shape=(40, 80)) - output, coef = test_layer(query, query, return_attention_scores=True) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40]) - - def test_attention_scores_with_values(self): - """Test attention outputs with coefficients.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=12, key_dim=64) - # Create a 3-dimensional input (the first dimension is implicit). - query = keras.Input(shape=(40, 80)) - value = keras.Input(shape=(60, 80)) - output, coef = test_layer(query, value, return_attention_scores=True) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - self.assertEqual(coef.shape.as_list(), [None, 12, 40, 60]) - - @parameterized.named_parameters(("with_bias", True), ("no_bias", False)) - def test_masked_attention(self, use_bias): - """Test with a mask tensor.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=2, key_dim=2, use_bias=use_bias) - # Create a 3-dimensional input (the first dimension is implicit). - batch_size = 3 - query = keras.Input(shape=(4, 8)) - value = keras.Input(shape=(2, 8)) - mask_tensor = keras.Input(shape=(4, 2)) - output = test_layer(query=query, value=value, attention_mask=mask_tensor) - - # Create a model containing the test layer. - model = keras.Model([query, value, mask_tensor], output) - - # Generate data for the input (non-mask) tensors. - from_data = 10 * np.random.random_sample((batch_size, 4, 8)) - to_data = 10 * np.random.random_sample((batch_size, 2, 8)) - - # Invoke the data with a random set of mask data. This should mask at least - # one element. - mask_data = np.random.randint(2, size=(batch_size, 4, 2)) - masked_output_data = model.predict([from_data, to_data, mask_data]) - - # Invoke the same data, but with a null mask (where no elements are masked). - null_mask_data = np.ones((batch_size, 4, 2)) - unmasked_output_data = model.predict([from_data, to_data, null_mask_data]) - - # Because one data is masked and one is not, the outputs should not be the - # same. - self.assertNotAllClose(masked_output_data, unmasked_output_data) - - # Tests the layer with three inputs: Q, K, V. - key = keras.Input(shape=(2, 8)) - output = test_layer(query, value=value, key=key, attention_mask=mask_tensor) - model = keras.Model([query, value, key, mask_tensor], output) - - masked_output_data = model.predict([from_data, to_data, to_data, mask_data]) - unmasked_output_data = model.predict( - [from_data, to_data, to_data, null_mask_data]) - # Because one data is masked and one is not, the outputs should not be the - # same. - self.assertNotAllClose(masked_output_data, unmasked_output_data) - - if use_bias: - self.assertLen(test_layer._query_dense.trainable_variables, 2) - self.assertLen(test_layer._output_dense.trainable_variables, 2) - else: - self.assertLen(test_layer._query_dense.trainable_variables, 1) - self.assertLen(test_layer._output_dense.trainable_variables, 1) - - def test_initializer(self): - """Test with a specified initializer.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=12, - key_dim=64, - kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02)) - # Create a 3-dimensional input (the first dimension is implicit). - query = keras.Input(shape=(40, 80)) - output = test_layer(query, query) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - - # Make sure the sub layers have different kernel init value, and not reusing - # the initializers. - self.assertNotAllClose(keras.backend.eval(test_layer._query_dense.kernel), - keras.backend.eval(test_layer._key_dense.kernel)) - self.assertNotAllClose(keras.backend.eval(test_layer._query_dense.kernel), - keras.backend.eval(test_layer._value_dense.kernel)) - self.assertNotAllClose(keras.backend.eval(test_layer._query_dense.kernel), - keras.backend.eval(test_layer._output_dense.kernel)) - - def test_masked_attention_with_scores(self): - """Test with a mask tensor.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=2, key_dim=2) - # Create a 3-dimensional input (the first dimension is implicit). - batch_size = 3 - query = keras.Input(shape=(4, 8)) - value = keras.Input(shape=(2, 8)) - mask_tensor = keras.Input(shape=(4, 2)) - output = test_layer(query=query, value=value, attention_mask=mask_tensor) - - # Create a model containing the test layer. - model = keras.Model([query, value, mask_tensor], output) - - # Generate data for the input (non-mask) tensors. - from_data = 10 * np.random.random_sample((batch_size, 4, 8)) - to_data = 10 * np.random.random_sample((batch_size, 2, 8)) - - # Invoke the data with a random set of mask data. This should mask at least - # one element. - mask_data = np.random.randint(2, size=(batch_size, 4, 2)) - masked_output_data = model.predict([from_data, to_data, mask_data]) - - # Invoke the same data, but with a null mask (where no elements are masked). - null_mask_data = np.ones((batch_size, 4, 2)) - unmasked_output_data = model.predict([from_data, to_data, null_mask_data]) - - # Because one data is masked and one is not, the outputs should not be the - # same. - self.assertNotAllClose(masked_output_data, unmasked_output_data) - - # Create a model containing attention scores. - output, scores = test_layer( - query=query, value=value, attention_mask=mask_tensor, - return_attention_scores=True) - model = keras.Model([query, value, mask_tensor], [output, scores]) - masked_output_data_score, masked_score = model.predict( - [from_data, to_data, mask_data]) - unmasked_output_data_score, unmasked_score = model.predict( - [from_data, to_data, null_mask_data]) - self.assertNotAllClose(masked_output_data_score, unmasked_output_data_score) - self.assertAllClose(masked_output_data, masked_output_data_score) - self.assertAllClose(unmasked_output_data, unmasked_output_data_score) - self.assertNotAllClose(masked_score, unmasked_score) - - @parameterized.named_parameters( - ("4d_inputs_1freebatch_mask2", [3, 4], [3, 2], [4, 2], - (2,)), ("4d_inputs_1freebatch_mask3", [3, 4], [3, 2], [3, 4, 2], (2,)), - ("4d_inputs_1freebatch_mask4", [3, 4], [3, 2], [3, 2, 4, 2], - (2,)), ("4D_inputs_2D_attention", [3, 4], [3, 2], [3, 4, 3, 2], (1, 2)), - ("5D_inputs_2D_attention", [5, 3, 4], [5, 3, 2], [3, 4, 3, 2], (2, 3)), - ("5D_inputs_2D_attention_fullmask", [5, 3, 4], [5, 3, 2], [5, 3, 4, 3, 2], - (2, 3))) - def test_high_dim_attention(self, q_dims, v_dims, mask_dims, attention_axes): - """Test with a mask tensor.""" - test_layer = keras.layers.MultiHeadAttention( - num_heads=2, key_dim=2, attention_axes=attention_axes) - batch_size, hidden_size = 3, 8 - # Generate data for the input (non-mask) tensors. - query_shape = [batch_size] + q_dims + [hidden_size] - value_shape = [batch_size] + v_dims + [hidden_size] - mask_shape = [batch_size] + mask_dims - query = 10 * np.random.random_sample(query_shape) - value = 10 * np.random.random_sample(value_shape) - - # Invoke the data with a random set of mask data. This should mask at least - # one element. - mask_data = np.random.randint(2, size=mask_shape).astype("bool") - # Invoke the same data, but with a null mask (where no elements are masked). - null_mask_data = np.ones(mask_shape) - # Because one data is masked and one is not, the outputs should not be the - # same. - query_tensor = keras.Input(query_shape[1:], name="query") - value_tensor = keras.Input(value_shape[1:], name="value") - mask_tensor = keras.Input(mask_shape[1:], name="mask") - output = test_layer(query=query_tensor, value=value_tensor, - attention_mask=mask_tensor) - model = keras.Model([query_tensor, value_tensor, mask_tensor], output) - - self.assertNotAllClose( - model.predict([query, value, mask_data]), - model.predict([query, value, null_mask_data])) - - def test_dropout(self): - test_layer = keras.layers.MultiHeadAttention( - num_heads=2, key_dim=2, dropout=0.5) - - # Generate data for the input (non-mask) tensors. - from_data = keras.backend.ones(shape=(32, 4, 8)) - to_data = keras.backend.ones(shape=(32, 2, 8)) - train_out = test_layer(from_data, to_data, None, None, None, True) - test_out = test_layer(from_data, to_data, None, None, None, False) - - # Output should be close when not in training mode, - # and should not be close when enabling dropout in training mode. - self.assertNotAllClose( - keras.backend.eval(train_out), - keras.backend.eval(test_out)) - - @test_combinations.generate(test_combinations.combine( - ragged_query=[True, False], - ragged_value=[True, False], - ragged_key=[True, False])) - def test_ragged_tensor(self, ragged_query, ragged_value, ragged_key): - if ragged_query: - query = tf.ragged.constant( - [[[3., 1.], [4., 1.]], [[5., 9.], [2., 6.], [3., 1.]], [[1., 2.]]], - inner_shape=(2,)) - else: - query = keras.backend.ones(shape=(3, 2, 2)) - - if ragged_value: - value = tf.ragged.constant( - [[[3., 1.], [4., 1.]], [[5., 9.]], [[1., 2.]]], inner_shape=(2,)) - else: - value = keras.backend.ones(shape=(3, 4, 2)) - - if ragged_key: - key = tf.ragged.constant( - [[[3., 1.], [4., 1.]], - [[5., 9.], [2., 6.], [3., 1.], [1., 5.]], - [[1., 2.]]], - inner_shape=(2,)) - else: - key = keras.backend.ones(shape=(3, 4, 2)) - - test_layer = keras.layers.MultiHeadAttention(num_heads=5, key_dim=2) - results = test_layer(query, value, key) - self.assertAllEqual(results.shape.as_list(), query.shape.as_list()) + @parameterized.named_parameters( + ("key_value_same_proj", None, None, [40, 80]), + ("key_value_different_proj", 32, 60, [40, 60]), + ) + def test_non_masked_attention(self, value_dim, output_shape, output_dims): + """Test that the attention layer can be created without a mask + tensor.""" + test_layer = keras.layers.MultiHeadAttention( + num_heads=12, + key_dim=64, + value_dim=value_dim, + output_shape=output_shape, + ) + # Create a 3-dimensional input (the first dimension is implicit). + query = keras.Input(shape=(40, 80)) + value = keras.Input(shape=(20, 80)) + output = test_layer(query=query, value=value) + self.assertEqual(output.shape.as_list(), [None] + output_dims) + + def test_non_masked_self_attention(self): + """Test with one input (self-attenntion) and no mask tensor.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64) + # Create a 3-dimensional input (the first dimension is implicit). + query = keras.Input(shape=(40, 80)) + output = test_layer(query, query) + self.assertEqual(output.shape.as_list(), [None, 40, 80]) + + def test_attention_scores(self): + """Test attention outputs with coefficients.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64) + # Create a 3-dimensional input (the first dimension is implicit). + query = keras.Input(shape=(40, 80)) + output, coef = test_layer(query, query, return_attention_scores=True) + self.assertEqual(output.shape.as_list(), [None, 40, 80]) + self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40]) + + def test_attention_scores_with_values(self): + """Test attention outputs with coefficients.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64) + # Create a 3-dimensional input (the first dimension is implicit). + query = keras.Input(shape=(40, 80)) + value = keras.Input(shape=(60, 80)) + output, coef = test_layer(query, value, return_attention_scores=True) + self.assertEqual(output.shape.as_list(), [None, 40, 80]) + self.assertEqual(coef.shape.as_list(), [None, 12, 40, 60]) + + @parameterized.named_parameters(("with_bias", True), ("no_bias", False)) + def test_masked_attention(self, use_bias): + """Test with a mask tensor.""" + test_layer = keras.layers.MultiHeadAttention( + num_heads=2, key_dim=2, use_bias=use_bias + ) + # Create a 3-dimensional input (the first dimension is implicit). + batch_size = 3 + query = keras.Input(shape=(4, 8)) + value = keras.Input(shape=(2, 8)) + mask_tensor = keras.Input(shape=(4, 2)) + output = test_layer( + query=query, value=value, attention_mask=mask_tensor + ) + + # Create a model containing the test layer. + model = keras.Model([query, value, mask_tensor], output) + + # Generate data for the input (non-mask) tensors. + from_data = 10 * np.random.random_sample((batch_size, 4, 8)) + to_data = 10 * np.random.random_sample((batch_size, 2, 8)) + + # Invoke the data with a random set of mask data. This should mask at + # least one element. + mask_data = np.random.randint(2, size=(batch_size, 4, 2)) + masked_output_data = model.predict([from_data, to_data, mask_data]) + + # Invoke the same data, but with a null mask (where no elements are + # masked). + null_mask_data = np.ones((batch_size, 4, 2)) + unmasked_output_data = model.predict( + [from_data, to_data, null_mask_data] + ) + + # Because one data is masked and one is not, the outputs should not be + # the same. + self.assertNotAllClose(masked_output_data, unmasked_output_data) + + # Tests the layer with three inputs: Q, K, V. + key = keras.Input(shape=(2, 8)) + output = test_layer( + query, value=value, key=key, attention_mask=mask_tensor + ) + model = keras.Model([query, value, key, mask_tensor], output) + + masked_output_data = model.predict( + [from_data, to_data, to_data, mask_data] + ) + unmasked_output_data = model.predict( + [from_data, to_data, to_data, null_mask_data] + ) + # Because one data is masked and one is not, the outputs should not be + # the same. + self.assertNotAllClose(masked_output_data, unmasked_output_data) + + if use_bias: + self.assertLen(test_layer._query_dense.trainable_variables, 2) + self.assertLen(test_layer._output_dense.trainable_variables, 2) + else: + self.assertLen(test_layer._query_dense.trainable_variables, 1) + self.assertLen(test_layer._output_dense.trainable_variables, 1) + + def test_initializer(self): + """Test with a specified initializer.""" + test_layer = keras.layers.MultiHeadAttention( + num_heads=12, + key_dim=64, + kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02), + ) + # Create a 3-dimensional input (the first dimension is implicit). + query = keras.Input(shape=(40, 80)) + output = test_layer(query, query) + self.assertEqual(output.shape.as_list(), [None, 40, 80]) + + # Make sure the sub layers have different kernel init value, and not + # reusing the initializers. + self.assertNotAllClose( + keras.backend.eval(test_layer._query_dense.kernel), + keras.backend.eval(test_layer._key_dense.kernel), + ) + self.assertNotAllClose( + keras.backend.eval(test_layer._query_dense.kernel), + keras.backend.eval(test_layer._value_dense.kernel), + ) + self.assertNotAllClose( + keras.backend.eval(test_layer._query_dense.kernel), + keras.backend.eval(test_layer._output_dense.kernel), + ) + + @parameterized.named_parameters( + ("bfloat16", tf.bfloat16), + ("float16", tf.float16), + ("float32", tf.float32), + ("float64", tf.float64), + ) + def test_sublayer_dtypes(self, dtype): + test_layer = keras.layers.MultiHeadAttention( + num_heads=12, key_dim=64, dtype=dtype + ) + + query = keras.Input(shape=(40, 80), dtype=dtype) + # Build the layer + test_layer(query=query, value=query) + + self.assertEqual(test_layer._query_dense.dtype, dtype) + self.assertEqual(test_layer._key_dense.dtype, dtype) + self.assertEqual(test_layer._value_dense.dtype, dtype) + self.assertEqual(test_layer._output_dense.dtype, dtype) + + def test_masked_attention_with_scores(self): + """Test with a mask tensor.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2) + # Create a 3-dimensional input (the first dimension is implicit). + batch_size = 3 + query = keras.Input(shape=(4, 8)) + value = keras.Input(shape=(2, 8)) + mask_tensor = keras.Input(shape=(4, 2)) + output = test_layer( + query=query, value=value, attention_mask=mask_tensor + ) + + # Create a model containing the test layer. + model = keras.Model([query, value, mask_tensor], output) + + # Generate data for the input (non-mask) tensors. + from_data = 10 * np.random.random_sample((batch_size, 4, 8)) + to_data = 10 * np.random.random_sample((batch_size, 2, 8)) + + # Invoke the data with a random set of mask data. This should mask at + # least one element. + mask_data = np.random.randint(2, size=(batch_size, 4, 2)) + masked_output_data = model.predict([from_data, to_data, mask_data]) + + # Invoke the same data, but with a null mask (where no elements are + # masked). + null_mask_data = np.ones((batch_size, 4, 2)) + unmasked_output_data = model.predict( + [from_data, to_data, null_mask_data] + ) + + # Because one data is masked and one is not, the outputs should not be + # the same. + self.assertNotAllClose(masked_output_data, unmasked_output_data) + + # Create a model containing attention scores. + output, scores = test_layer( + query=query, + value=value, + attention_mask=mask_tensor, + return_attention_scores=True, + ) + model = keras.Model([query, value, mask_tensor], [output, scores]) + masked_output_data_score, masked_score = model.predict( + [from_data, to_data, mask_data] + ) + unmasked_output_data_score, unmasked_score = model.predict( + [from_data, to_data, null_mask_data] + ) + self.assertNotAllClose( + masked_output_data_score, unmasked_output_data_score + ) + self.assertAllClose(masked_output_data, masked_output_data_score) + self.assertAllClose(unmasked_output_data, unmasked_output_data_score) + self.assertNotAllClose(masked_score, unmasked_score) + + @parameterized.named_parameters( + ("4d_inputs_1freebatch_mask2", [3, 4], [3, 2], [4, 2], (2,)), + ("4d_inputs_1freebatch_mask3", [3, 4], [3, 2], [3, 4, 2], (2,)), + ("4d_inputs_1freebatch_mask4", [3, 4], [3, 2], [3, 2, 4, 2], (2,)), + ("4D_inputs_2D_attention", [3, 4], [3, 2], [3, 4, 3, 2], (1, 2)), + ("5D_inputs_2D_attention", [5, 3, 4], [5, 3, 2], [3, 4, 3, 2], (2, 3)), + ( + "5D_inputs_2D_attention_fullmask", + [5, 3, 4], + [5, 3, 2], + [5, 3, 4, 3, 2], + (2, 3), + ), + ) + def test_high_dim_attention( + self, q_dims, v_dims, mask_dims, attention_axes + ): + """Test with a mask tensor.""" + test_layer = keras.layers.MultiHeadAttention( + num_heads=2, key_dim=2, attention_axes=attention_axes + ) + batch_size, hidden_size = 3, 8 + # Generate data for the input (non-mask) tensors. + query_shape = [batch_size] + q_dims + [hidden_size] + value_shape = [batch_size] + v_dims + [hidden_size] + mask_shape = [batch_size] + mask_dims + query = 10 * np.random.random_sample(query_shape) + value = 10 * np.random.random_sample(value_shape) + + # Invoke the data with a random set of mask data. This should mask at + # least one element. + mask_data = np.random.randint(2, size=mask_shape).astype("bool") + # Invoke the same data, but with a null mask (where no elements are + # masked). + null_mask_data = np.ones(mask_shape) + # Because one data is masked and one is not, the outputs should not be + # the same. + query_tensor = keras.Input(query_shape[1:], name="query") + value_tensor = keras.Input(value_shape[1:], name="value") + mask_tensor = keras.Input(mask_shape[1:], name="mask") + output = test_layer( + query=query_tensor, value=value_tensor, attention_mask=mask_tensor + ) + model = keras.Model([query_tensor, value_tensor, mask_tensor], output) + + self.assertNotAllClose( + model.predict([query, value, mask_data]), + model.predict([query, value, null_mask_data]), + ) + + def test_dropout(self): + test_layer = keras.layers.MultiHeadAttention( + num_heads=2, key_dim=2, dropout=0.5 + ) + + # Generate data for the input (non-mask) tensors. + from_data = keras.backend.ones(shape=(32, 4, 8)) + to_data = keras.backend.ones(shape=(32, 2, 8)) + train_out = test_layer(from_data, to_data, None, None, None, True) + test_out = test_layer(from_data, to_data, None, None, None, False) + + # Output should be close when not in training mode, + # and should not be close when enabling dropout in training mode. + self.assertNotAllClose( + keras.backend.eval(train_out), keras.backend.eval(test_out) + ) + + @test_combinations.generate( + test_combinations.combine( + ragged_query=[True, False], + ragged_value=[True, False], + ragged_key=[True, False], + ) + ) + def test_ragged_tensor(self, ragged_query, ragged_value, ragged_key): + if ragged_query: + query = tf.ragged.constant( + [ + [[3.0, 1.0], [4.0, 1.0]], + [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0]], + [[1.0, 2.0]], + ], + inner_shape=(2,), + ) + else: + query = keras.backend.ones(shape=(3, 2, 2)) + + if ragged_value: + value = tf.ragged.constant( + [[[3.0, 1.0], [4.0, 1.0]], [[5.0, 9.0]], [[1.0, 2.0]]], + inner_shape=(2,), + ) + else: + value = keras.backend.ones(shape=(3, 4, 2)) + + if ragged_key: + key = tf.ragged.constant( + [ + [[3.0, 1.0], [4.0, 1.0]], + [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0], [1.0, 5.0]], + [[1.0, 2.0]], + ], + inner_shape=(2,), + ) + else: + key = keras.backend.ones(shape=(3, 4, 2)) + + test_layer = keras.layers.MultiHeadAttention(num_heads=5, key_dim=2) + results = test_layer(query, value, key) + self.assertAllEqual(results.shape.as_list(), query.shape.as_list()) + + def test_ragged_tensor_with_causal_mask_no_error(self): + ragged_tensor = tf.ragged.constant( + [ + [[3.0, 1.0], [4.0, 1.0]], + [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0]], + [[1.0, 2.0]], + ], + inner_shape=(2,), + ) + test_layer = keras.layers.MultiHeadAttention(num_heads=5, key_dim=2) + results = test_layer( + ragged_tensor, ragged_tensor, ragged_tensor, use_causal_mask=True + ) + self.assertAllEqual( + results.shape.as_list(), ragged_tensor.shape.as_list() + ) + + def test_query_mask_progagation(self): + """Test automatic propagation of the query's mask.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2) + self.assertTrue(test_layer.supports_masking) + query = tf.constant([[1, 2, 3, 0, 0], [3, 3, 1, 1, 2], [1, 0, 0, 0, 0]]) + masked_query = keras.layers.Embedding(4, 8, mask_zero=True)(query) + value = tf.random.normal((3, 3, 8)) + output = test_layer(query=masked_query, value=value) + self.assertTrue(hasattr(output, "_keras_mask")) + self.assertAllEqual(masked_query._keras_mask, output._keras_mask) + + @parameterized.named_parameters(("causal", True), ("not_causal", False)) + @test_utils.run_v2_only + def test_value_mask(self, use_causal_mask): + """Test that the value and causal masks are taken into account.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2) + query = tf.constant([[1, 2, 3, 0, 0], [3, 3, 1, 1, 2], [1, 0, 0, 0, 0]]) + masked_query = keras.layers.Embedding(4, 8, mask_zero=True)(query) + value = tf.constant([[5, 4, 0], [3, 0, 0], [2, 1, 1]]) + masked_value = keras.layers.Embedding(6, 8, mask_zero=True)(value) + output = test_layer( + query=masked_query, + value=masked_value, + use_causal_mask=use_causal_mask, + ) + mask = tf.constant( + [[[True, True, False]] * 3 + [[False, False, False]] * 2] + + [[[True, False, False]] * 5] + + [[[True, True, True]] + [[False, False, False]] * 4] + ) + if use_causal_mask: + mask = mask & tf.constant( + [ + [[True, False, False], [True, True, False]] + + [[True, True, True]] * 3 + ] + ) + del masked_query._keras_mask + del masked_value._keras_mask + output_with_manual_mask = test_layer( + query=masked_query, value=masked_value, attention_mask=mask + ) + self.assertAllClose(output, output_with_manual_mask) + + def test_masks_are_cast_to_bool(self): + """Test that the implicit and explicit masks are cast to bool.""" + test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2) + query = np.array([[1, 2, 3, 0, 0], [3, 3, 1, 1, 2], [1, 0, 0, 0, 0]]) + masked_query = keras.layers.Embedding(4, 8, mask_zero=True)(query) + masked_query._keras_mask = tf.cast(masked_query._keras_mask, tf.float32) + value = np.array([[5, 4, 0], [3, 0, 0], [2, 1, 1]]) + masked_value = keras.layers.Embedding(6, 8, mask_zero=True)(value) + masked_value._keras_mask = tf.cast(masked_value._keras_mask, tf.float32) + float_mask = tf.constant([[[1.0]]]) + # if all works well, the following should not raise any exception: + _ = test_layer( + query=masked_query, + value=masked_value, + use_causal_mask=True, + attention_mask=float_mask, + ) + + @parameterized.named_parameters( + ("without_key_same_proj", [40, 80], [20, 80], None, None), + ("with_key_same_proj", [40, 80], [20, 80], [20, 30], None), + ("wihtout_key_different_proj", [40, 80], [20, 80], None, [30, 40]), + ("with_key_different_proj", [40, 80], [20, 80], [20, 30], [15, 50]), + ( + "high_dim_same_proj", + [40, 20, 30, 80], + [10, 10, 50, 80], + [10, 10, 50, 20], + None, + ), + ( + "high_dim_different_proj", + [40, 20, 30, 80], + [10, 10, 50, 80], + [10, 10, 50, 20], + [30, 20], + ), + ) + def test_compute_output_shape( + self, query_dims, value_dims, key_dims, output_shape + ): + """Test computed shape is equal to the layer output's shape.""" + test_layer = keras.layers.MultiHeadAttention( + num_heads=2, + key_dim=2, + value_dim=2, + output_shape=output_shape, + ) + batch_size = None + query_shape = [batch_size] + query_dims + value_shape = [batch_size] + value_dims + + if key_dims: + key_shape = [batch_size] + key_dims + else: + key_shape = None + + query = keras.Input(query_shape[1:]) + value = keras.Input(value_shape[1:]) + if key_shape: + key = keras.Input(key_shape[1:]) + else: + key = None + output = test_layer(query=query, value=value, key=key) + comp_output_shape = test_layer.compute_output_shape( + query_shape, value_shape, key_shape + ) + self.assertListEqual( + output.shape.as_list(), comp_output_shape.as_list() + ) + + @parameterized.named_parameters( + ("query_value_dim_mismatch", (None, 40, 80), (None, 20, 70), None), + ( + "key_value_dim_mismatch", + (None, 40, 80), + (None, 20, 80), + (None, 10, 70), + ), + ( + "key_value_dim_mismatch_high_dim", + (None, 40, 20, 30, 80), + (None, 10, 10, 50, 80), + (None, 10, 15, 50, 20), + ), + ) + def test_compute_output_shape_raises_error( + self, query_shape, value_shape, key_shape + ): + """Test dimension mismatches""" + test_layer = keras.layers.MultiHeadAttention( + num_heads=4, + key_dim=2, + value_dim=2, + ) + with self.assertRaisesRegex(ValueError, r"must be equal"): + test_layer.compute_output_shape(query_shape, value_shape, key_shape) class SubclassAttention(keras.layers.MultiHeadAttention): + def _build_attention(self, qkv_rank): + pass - def _build_attention(self, qkv_rank): - pass - - def _compute_attention(self, - query_tensor, - key_tensor, - value_tensor, - attention_mask=None, - training=None): - return value_tensor, None + def _compute_attention( + self, + query_tensor, + key_tensor, + value_tensor, + attention_mask=None, + training=None, + ): + return value_tensor, None @test_combinations.run_all_keras_modes class AttentionSubclassTest(test_combinations.TestCase): - - def test_initializer(self): - """Test with a specified initializer.""" - test_layer = SubclassAttention(num_heads=12, key_dim=64) - # Create a 3-dimensional input (the first dimension is implicit). - query = keras.Input(shape=(40, 80)) - output = test_layer(query, query) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) + def test_initializer(self): + """Test with a specified initializer.""" + test_layer = SubclassAttention(num_heads=12, key_dim=64) + # Create a 3-dimensional input (the first dimension is implicit). + query = keras.Input(shape=(40, 80)) + output = test_layer(query, query) + self.assertEqual(output.shape.as_list(), [None, 40, 80]) +@object_registration.register_keras_serializable() class TestModel(keras.Model): + def __init__(self): + super().__init__() + self.attention = keras.layers.MultiHeadAttention( + num_heads=3, + key_dim=4, + value_dim=4, + use_bias=True, + dropout=0.0, + output_shape=[12], + ) - def __init__(self): - super().__init__() - self.attention = keras.layers.MultiHeadAttention( - num_heads=3, - key_dim=4, - value_dim=4, - use_bias=True, - dropout=0.0, - output_shape=[12]) + @classmethod + def from_config(cls, config): + return cls(**config) - @classmethod - def from_config(cls, config): - return cls(**config) + def get_config(self): + return {} - def get_config(self): - return {} - - def call(self, x, training=False): - return self.attention(x, x, training=training) + def call(self, x, training=False): + return self.attention(x, x, training=training) @test_combinations.run_all_keras_modes(always_skip_v1=True) class KerasModelSavingTest(test_combinations.TestCase): - - def test_keras_saving_subclass(self): - model = TestModel() - query = keras.Input(shape=(40, 80)) - _ = model(query) - model_path = self.get_temp_dir() + "/tmp_model" - keras.models.save_model(model, model_path, save_format="tf") - reloaded_model = keras.models.load_model(model_path) - self.assertEqual( - len(model.trainable_variables), len(reloaded_model.trainable_variables)) - for src_v, loaded_v in zip(model.trainable_variables, - reloaded_model.trainable_variables): - self.assertAllEqual(src_v, loaded_v) - - @parameterized.parameters("h5", "tf") - def test_keras_saving_functional(self, save_format): - model = TestModel() - query = keras.Input(shape=(40, 80)) - output = keras.layers.MultiHeadAttention( - num_heads=3, - key_dim=4, - value_dim=4, - use_bias=True, - dropout=0.0)(query, query) - model = keras.Model(inputs=query, outputs=output) - model_path = self.get_temp_dir() + "/tmp_model" - keras.models.save_model(model, model_path, save_format=save_format) - reloaded_model = keras.models.load_model(model_path) - self.assertEqual( - len(model.trainable_variables), len(reloaded_model.trainable_variables)) - for src_v, loaded_v in zip(model.trainable_variables, - reloaded_model.trainable_variables): - self.assertAllEqual(src_v, loaded_v) - - def test_create_without_build(self): - not_initialized_layer = keras.layers.MultiHeadAttention( - num_heads=3, key_dim=4, value_dim=4) - keras.layers.MultiHeadAttention.from_config( - not_initialized_layer.get_config()) + @parameterized.parameters("tf", "keras_v3") + def test_keras_saving_subclass(self, save_format): + model = TestModel() + query = keras.Input(shape=(40, 80)) + _ = model(query) + model_path = self.get_temp_dir() + "/tmp_model" + if save_format == "keras_v3": + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + model_path += ".keras" + keras.models.save_model(model, model_path, save_format=save_format) + reloaded_model = keras.models.load_model(model_path) + self.assertEqual( + len(model.trainable_variables), + len(reloaded_model.trainable_variables), + ) + for src_v, loaded_v in zip( + model.trainable_variables, reloaded_model.trainable_variables + ): + self.assertAllEqual(src_v, loaded_v) + + @parameterized.parameters("h5", "tf", "keras_v3") + def test_keras_saving_functional(self, save_format): + model = TestModel() + query = keras.Input(shape=(40, 80)) + output = keras.layers.MultiHeadAttention( + num_heads=3, key_dim=4, value_dim=4, use_bias=True, dropout=0.0 + )(query, query) + model = keras.Model(inputs=query, outputs=output) + model_path = self.get_temp_dir() + "/tmp_model" + if save_format == "keras_v3": + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + model_path += ".keras" + keras.models.save_model(model, model_path, save_format=save_format) + reloaded_model = keras.models.load_model(model_path) + self.assertEqual( + len(model.trainable_variables), + len(reloaded_model.trainable_variables), + ) + for src_v, loaded_v in zip( + model.trainable_variables, reloaded_model.trainable_variables + ): + self.assertAllEqual(src_v, loaded_v) + + def test_create_without_build(self): + not_initialized_layer = keras.layers.MultiHeadAttention( + num_heads=3, key_dim=4, value_dim=4 + ) + keras.layers.MultiHeadAttention.from_config( + not_initialized_layer.get_config() + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/convolutional/BUILD b/keras/layers/convolutional/BUILD index 974ff9154627..60560697c35a 100644 --- a/keras/layers/convolutional/BUILD +++ b/keras/layers/convolutional/BUILD @@ -1,15 +1,17 @@ # Description: # Contains the Keras convolution layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", "//third_party/tensorflow/python/keras:__subpackages__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], diff --git a/keras/layers/convolutional/__init__.py b/keras/layers/convolutional/__init__.py index 99cbf4e7b904..6b3d3d14cad3 100644 --- a/keras/layers/convolutional/__init__.py +++ b/keras/layers/convolutional/__init__.py @@ -13,28 +13,27 @@ # limitations under the License. # ============================================================================== """Keras convolution layers.""" -# pylint: disable=g-bad-import-order + +# Convolution layer aliases. # Convolution layers. from keras.layers.convolutional.conv1d import Conv1D -from keras.layers.convolutional.conv2d import Conv2D -from keras.layers.convolutional.conv3d import Conv3D +from keras.layers.convolutional.conv1d import Convolution1D from keras.layers.convolutional.conv1d_transpose import Conv1DTranspose +from keras.layers.convolutional.conv1d_transpose import Convolution1DTranspose +from keras.layers.convolutional.conv2d import Conv2D +from keras.layers.convolutional.conv2d import Convolution2D from keras.layers.convolutional.conv2d_transpose import Conv2DTranspose +from keras.layers.convolutional.conv2d_transpose import Convolution2DTranspose +from keras.layers.convolutional.conv3d import Conv3D +from keras.layers.convolutional.conv3d import Convolution3D from keras.layers.convolutional.conv3d_transpose import Conv3DTranspose +from keras.layers.convolutional.conv3d_transpose import Convolution3DTranspose from keras.layers.convolutional.depthwise_conv1d import DepthwiseConv1D from keras.layers.convolutional.depthwise_conv2d import DepthwiseConv2D from keras.layers.convolutional.separable_conv1d import SeparableConv1D -from keras.layers.convolutional.separable_conv2d import SeparableConv2D - -# Convolution layer aliases. -from keras.layers.convolutional.conv1d import Convolution1D -from keras.layers.convolutional.conv2d import Convolution2D -from keras.layers.convolutional.conv3d import Convolution3D -from keras.layers.convolutional.conv1d_transpose import Convolution1DTranspose -from keras.layers.convolutional.conv2d_transpose import Convolution2DTranspose -from keras.layers.convolutional.conv3d_transpose import Convolution3DTranspose from keras.layers.convolutional.separable_conv1d import SeparableConvolution1D +from keras.layers.convolutional.separable_conv2d import SeparableConv2D from keras.layers.convolutional.separable_conv2d import SeparableConvolution2D # Pooling layers imported for backwards namespace compatibility. diff --git a/keras/layers/convolutional/base_conv.py b/keras/layers/convolutional/base_conv.py index 21dfb8e80a4b..da5613cd650e 100644 --- a/keras/layers/convolutional/base_conv.py +++ b/keras/layers/convolutional/base_conv.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras base class for convolution layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import activations from keras import constraints @@ -22,370 +24,408 @@ from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class Conv(Layer): - """Abstract N-D convolution layer (private, used as implementation base). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Note: layer attributes cannot be modified after the layer has been called - once (except the `trainable` attribute). - - Args: - rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). Could be "None", eg in the case of - depth wise convolution. - kernel_size: An integer or tuple/list of n integers, specifying the - length of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros - evenly to the left/right or up/down of the input such that output has the - same height/width dimension as the input. `"causal"` results in causal - (dilated) convolutions, e.g. `output[t]` does not depend on `input[t+1:]`. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch_size, channels, ...)`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - groups: A positive integer specifying the number of groups in which the - input is split along the channel axis. Each group is convolved - separately with `filters / groups` filters. The output is the - concatenation of all the `groups` results along the channel axis. - Input channels and `filters` must both be divisible by `groups`. - activation: Activation function to use. - If you don't specify anything, no activation is applied. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. If None, the - default initializer (glorot_uniform) will be used. - bias_initializer: An initializer for the bias vector. If None, the default - initializer (zeros) will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - """ - - def __init__(self, - rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - groups=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - conv_op=None, - **kwargs): - super().__init__( - trainable=trainable, - name=name, - activity_regularizer=regularizers.get(activity_regularizer), - **kwargs) - self.rank = rank - - if isinstance(filters, float): - filters = int(filters) - if filters is not None and filters <= 0: - raise ValueError('Invalid value for argument `filters`. ' - 'Expected a strictly positive value. ' - f'Received filters={filters}.') - self.filters = filters - self.groups = groups or 1 - self.kernel_size = conv_utils.normalize_tuple( - kernel_size, rank, 'kernel_size') - self.strides = conv_utils.normalize_tuple( - strides, rank, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.dilation_rate = conv_utils.normalize_tuple( - dilation_rate, rank, 'dilation_rate') - - self.activation = activations.get(activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(min_ndim=self.rank + 2) - - self._validate_init() - self._is_causal = self.padding == 'causal' - self._channels_first = self.data_format == 'channels_first' - self._tf_data_format = conv_utils.convert_data_format( - self.data_format, self.rank + 2) - - def _validate_init(self): - if self.filters is not None and self.filters % self.groups != 0: - raise ValueError( - 'The number of filters must be evenly divisible by the number of ' - 'groups. Received: groups={}, filters={}'.format( - self.groups, self.filters)) - - if not all(self.kernel_size): - raise ValueError('The argument `kernel_size` cannot contain 0(s). ' - 'Received: %s' % (self.kernel_size,)) - - if not all(self.strides): - raise ValueError('The argument `strides` cannot contains 0(s). ' - 'Received: %s' % (self.strides,)) - - if self.padding == 'causal': - # pylint: disable=g-import-not-at-top - from keras.layers.convolutional.conv1d import Conv1D - from keras.layers.convolutional.separable_conv1d import SeparableConv1D - # pylint: enable=g-import-not-at-top - if not isinstance(self, (Conv1D, SeparableConv1D)): - raise ValueError('Causal padding is only supported for `Conv1D`' - 'and `SeparableConv1D`.') - - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - input_channel = self._get_input_channel(input_shape) - if input_channel % self.groups != 0: - raise ValueError( - 'The number of input channels must be evenly divisible by the number ' - 'of groups. Received groups={}, but the input has {} channels ' - '(full input shape is {}).'.format(self.groups, input_channel, - input_shape)) - kernel_shape = self.kernel_size + (input_channel // self.groups, - self.filters) - - # compute_output_shape contains some validation logic for the input shape, - # and make sure the output shape has all positive dimensions. - self.compute_output_shape(input_shape) - - self.kernel = self.add_weight( - name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, + """Abstract N-D convolution layer (private, used as implementation base). + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Note: layer attributes cannot be modified after the layer has been called + once (except the `trainable` attribute). + + Args: + rank: An integer, the rank of the convolution, e.g. "2" for 2D + convolution. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). Could be "None", eg in the case of + depth wise convolution. + kernel_size: An integer or tuple/list of n integers, specifying the + length of the convolution window. + strides: An integer or tuple/list of n integers, + specifying the stride length of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. `"causal"` results in + causal (dilated) convolutions, e.g. `output[t]` does not depend on + `input[t+1:]`. + data_format: A string, one of `channels_last` (default) or + `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, ..., channels)` while `channels_first` corresponds to + inputs with shape `(batch_size, channels, ...)`. + dilation_rate: An integer or tuple/list of n integers, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + groups: A positive integer specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters / groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. + Input channels and `filters` must both be divisible by `groups`. + activation: Activation function to use. + If you don't specify anything, no activation is applied. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. If None, + the default initializer (glorot_uniform) will be used. + bias_initializer: An initializer for the bias vector. If None, the default + initializer (zeros) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + """ + + def __init__( + self, + rank, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_weight( - name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - channel_axis = self._get_channel_axis() - self.input_spec = InputSpec(min_ndim=self.rank + 2, - axes={channel_axis: input_channel}) - self.built = True - - def convolution_op(self, inputs, kernel): - if self.padding == 'causal': - tf_padding = 'VALID' # Causal padding handled in `call`. - elif isinstance(self.padding, str): - tf_padding = self.padding.upper() - else: - tf_padding = self.padding - - return tf.nn.convolution( - inputs, - kernel, - strides=list(self.strides), - padding=tf_padding, - dilations=list(self.dilation_rate), - data_format=self._tf_data_format, - name=self.__class__.__name__) - - # TODO(b/213173659): remove this when grouped convolutions are fully supported - # on the CPU for compiled functions. For now, we need this as a workaround for - # CPU support. - @tf.function(jit_compile=True) - def _jit_compiled_convolution_op(self, inputs, kernel): - return self.convolution_op(inputs, kernel) - - def call(self, inputs): - input_shape = inputs.shape - - if self._is_causal: # Apply causal padding to inputs for Conv1D. - inputs = tf.pad(inputs, self._compute_causal_padding(inputs)) - - if self.groups > 1: - outputs = self._jit_compiled_convolution_op(inputs, self.kernel) - else: - outputs = self.convolution_op(inputs, self.kernel) - - if self.use_bias: - output_rank = outputs.shape.rank - if self.rank == 1 and self._channels_first: - # nn.bias_add does not accept a 1D input tensor. - bias = tf.reshape(self.bias, (1, self.filters, 1)) - outputs += bias - else: - # Handle multiple batch dimensions. - if output_rank is not None and output_rank > 2 + self.rank: - - def _apply_fn(o): - return tf.nn.bias_add( - o, self.bias, data_format=self._tf_data_format) - - outputs = conv_utils.squeeze_batch_dims( - outputs, _apply_fn, inner_rank=self.rank + 1) - else: - outputs = tf.nn.bias_add( - outputs, self.bias, data_format=self._tf_data_format) - - if not tf.executing_eagerly(): - # Infer the static output shape: - out_shape = self.compute_output_shape(input_shape) - outputs.set_shape(out_shape) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def _spatial_output_shape(self, spatial_input_shape): - return [ - conv_utils.conv_output_length( # pylint: disable=g-complex-comprehension - length, - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - for i, length in enumerate(spatial_input_shape) - ] - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - batch_rank = len(input_shape) - self.rank - 1 - try: - if self.data_format == 'channels_last': - return tf.TensorShape( - input_shape[:batch_rank] + - self._spatial_output_shape(input_shape[batch_rank:-1]) + - [self.filters]) - else: - return tf.TensorShape( - input_shape[:batch_rank] + [self.filters] + - self._spatial_output_shape(input_shape[batch_rank + 1:])) - - except ValueError: - raise ValueError( - f'One of the dimensions in the output is <= 0 ' - f'due to downsampling in {self.name}. Consider ' - f'increasing the input size. ' - f'Received input shape {input_shape} which would produce ' - f'output shape with a zero or negative value in a ' - f'dimension.') - - def _recreate_conv_op(self, inputs): # pylint: disable=unused-argument - return False - - def get_config(self): - config = { - 'filters': + name=None, + conv_op=None, + **kwargs, + ): + super().__init__( + trainable=trainable, + name=name, + activity_regularizer=regularizers.get(activity_regularizer), + **kwargs, + ) + self.rank = rank + + if isinstance(filters, float): + filters = int(filters) + if filters is not None and filters <= 0: + raise ValueError( + "Invalid value for argument `filters`. " + "Expected a strictly positive value. " + f"Received filters={filters}." + ) + self.filters = filters + self.groups = groups or 1 + self.kernel_size = conv_utils.normalize_tuple( + kernel_size, rank, "kernel_size" + ) + self.strides = conv_utils.normalize_tuple( + strides, rank, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.dilation_rate = conv_utils.normalize_tuple( + dilation_rate, rank, "dilation_rate" + ) + + self.activation = activations.get(activation) + self.use_bias = use_bias + + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.input_spec = InputSpec(min_ndim=self.rank + 2) + + self._validate_init() + self._is_causal = self.padding == "causal" + self._channels_first = self.data_format == "channels_first" + self._tf_data_format = conv_utils.convert_data_format( + self.data_format, self.rank + 2 + ) + + def _validate_init(self): + if self.filters is not None and self.filters % self.groups != 0: + raise ValueError( + "The number of filters must be evenly divisible by the " + "number of groups. Received: groups={}, filters={}".format( + self.groups, self.filters + ) + ) + + if not all(self.kernel_size): + raise ValueError( + "The argument `kernel_size` cannot contain 0(s). Received: %s" + % (self.kernel_size,) + ) + + if not all(self.strides): + raise ValueError( + "The argument `strides` cannot contains 0(s). Received: %s" + % (self.strides,) + ) + + if self.padding == "causal": + + from keras.layers.convolutional.conv1d import Conv1D + from keras.layers.convolutional.separable_conv1d import ( + SeparableConv1D, + ) + + if not isinstance(self, (Conv1D, SeparableConv1D)): + raise ValueError( + "Causal padding is only supported for `Conv1D`" + "and `SeparableConv1D`." + ) + + if max(self.strides) > 1 and max(self.dilation_rate) > 1: + raise ValueError( + "`strides > 1` not supported in conjunction with " + f"`dilation_rate > 1`. Received: strides={self.strides} and " + f"dilation_rate={self.dilation_rate}" + ) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + input_channel = self._get_input_channel(input_shape) + if input_channel % self.groups != 0: + raise ValueError( + "The number of input channels must be evenly divisible by " + "the number of groups. Received groups={}, but the input " + "has {} channels (full input shape is {}).".format( + self.groups, input_channel, input_shape + ) + ) + kernel_shape = self.kernel_size + ( + input_channel // self.groups, self.filters, - 'kernel_size': - self.kernel_size, - 'strides': - self.strides, - 'padding': - self.padding, - 'data_format': - self.data_format, - 'dilation_rate': - self.dilation_rate, - 'groups': - self.groups, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint) - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def _compute_causal_padding(self, inputs): - """Calculates padding for 'causal' option for 1-d conv layers.""" - left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1) - if getattr(inputs.shape, 'ndims', None) is None: - batch_rank = 1 - else: - batch_rank = len(inputs.shape) - 2 - if self.data_format == 'channels_last': - causal_padding = [[0, 0]] * batch_rank + [[left_pad, 0], [0, 0]] - else: - causal_padding = [[0, 0]] * batch_rank + [[0, 0], [left_pad, 0]] - return causal_padding - - def _get_channel_axis(self): - if self.data_format == 'channels_first': - return -1 - self.rank - else: - return -1 - - def _get_input_channel(self, input_shape): - channel_axis = self._get_channel_axis() - if input_shape.dims[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs should be defined. ' - f'The input_shape received is {input_shape}, ' - f'where axis {channel_axis} (0-based) ' - 'is the channel dimension, which found to be `None`.') - return int(input_shape[channel_axis]) - - def _get_padding_op(self): - if self.padding == 'causal': - op_padding = 'valid' - else: - op_padding = self.padding - if not isinstance(op_padding, (list, tuple)): - op_padding = op_padding.upper() - return op_padding + ) + + # compute_output_shape contains some validation logic for the input + # shape, and make sure the output shape has all positive dimensions. + self.compute_output_shape(input_shape) + + self.kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + channel_axis = self._get_channel_axis() + self.input_spec = InputSpec( + min_ndim=self.rank + 2, axes={channel_axis: input_channel} + ) + self.built = True + + def convolution_op(self, inputs, kernel): + if self.padding == "causal": + tf_padding = "VALID" # Causal padding handled in `call`. + elif isinstance(self.padding, str): + tf_padding = self.padding.upper() + else: + tf_padding = self.padding + + return tf.nn.convolution( + inputs, + kernel, + strides=list(self.strides), + padding=tf_padding, + dilations=list(self.dilation_rate), + data_format=self._tf_data_format, + name=self.__class__.__name__, + ) + + # TODO(b/213173659): remove this when grouped convolutions are fully + # supported on the CPU for compiled functions. For now, we need this as a + # workaround for CPU support. + @tf.function(jit_compile=True) + def _jit_compiled_convolution_op(self, inputs, kernel): + return self.convolution_op(inputs, kernel) + + def call(self, inputs): + input_shape = inputs.shape + + if self._is_causal: # Apply causal padding to inputs for Conv1D. + inputs = tf.pad(inputs, self._compute_causal_padding(inputs)) + + if self.groups > 1: + outputs = self._jit_compiled_convolution_op( + inputs, tf.convert_to_tensor(self.kernel) + ) + else: + outputs = self.convolution_op(inputs, self.kernel) + + if self.use_bias: + output_rank = outputs.shape.rank + if self.rank == 1 and self._channels_first: + # nn.bias_add does not accept a 1D input tensor. + bias = tf.reshape(self.bias, (1, self.filters, 1)) + outputs += bias + else: + # Handle multiple batch dimensions. + if output_rank is not None and output_rank > 2 + self.rank: + + def _apply_fn(o): + return tf.nn.bias_add( + o, self.bias, data_format=self._tf_data_format + ) + + outputs = conv_utils.squeeze_batch_dims( + outputs, _apply_fn, inner_rank=self.rank + 1 + ) + else: + outputs = tf.nn.bias_add( + outputs, self.bias, data_format=self._tf_data_format + ) + + if not tf.executing_eagerly() and input_shape.rank: + # Infer the static output shape: + out_shape = self.compute_output_shape(input_shape) + outputs.set_shape(out_shape) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def _spatial_output_shape(self, spatial_input_shape): + return [ + conv_utils.conv_output_length( + length, + self.kernel_size[i], + padding=self.padding, + stride=self.strides[i], + dilation=self.dilation_rate[i], + ) + for i, length in enumerate(spatial_input_shape) + ] + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + batch_rank = len(input_shape) - self.rank - 1 + try: + if self.data_format == "channels_last": + return tf.TensorShape( + input_shape[:batch_rank] + + self._spatial_output_shape(input_shape[batch_rank:-1]) + + [self.filters] + ) + else: + return tf.TensorShape( + input_shape[:batch_rank] + + [self.filters] + + self._spatial_output_shape(input_shape[batch_rank + 1 :]) + ) + + except ValueError: + raise ValueError( + "One of the dimensions in the output is <= 0 " + f"due to downsampling in {self.name}. Consider " + "increasing the input size. " + f"Received input shape {input_shape} which would produce " + "output shape with a zero or negative value in a " + "dimension." + ) + + def _recreate_conv_op(self, inputs): + return False + + def get_config(self): + config = { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "groups": self.groups, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _compute_causal_padding(self, inputs): + """Calculates padding for 'causal' option for 1-d conv layers.""" + left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1) + if getattr(inputs.shape, "ndims", None) is None: + batch_rank = 1 + else: + batch_rank = len(inputs.shape) - 2 + if self.data_format == "channels_last": + causal_padding = [[0, 0]] * batch_rank + [[left_pad, 0], [0, 0]] + else: + causal_padding = [[0, 0]] * batch_rank + [[0, 0], [left_pad, 0]] + return causal_padding + + def _get_channel_axis(self): + if self.data_format == "channels_first": + return -1 - self.rank + else: + return -1 + + def _get_input_channel(self, input_shape): + channel_axis = self._get_channel_axis() + if input_shape.dims[channel_axis].value is None: + raise ValueError( + "The channel dimension of the inputs should be defined. " + f"The input_shape received is {input_shape}, " + f"where axis {channel_axis} (0-based) " + "is the channel dimension, which found to be `None`." + ) + return int(input_shape[channel_axis]) + + def _get_padding_op(self): + if self.padding == "causal": + op_padding = "valid" + else: + op_padding = self.padding + if not isinstance(op_padding, (list, tuple)): + op_padding = op_padding.upper() + return op_padding diff --git a/keras/layers/convolutional/base_depthwise_conv.py b/keras/layers/convolutional/base_depthwise_conv.py index e2e89de2f2bc..f18c25ee89f7 100644 --- a/keras/layers/convolutional/base_depthwise_conv.py +++ b/keras/layers/convolutional/base_depthwise_conv.py @@ -13,196 +13,214 @@ # limitations under the License. # ============================================================================== """Keras abstract base for depthwise convolutions.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import constraints from keras import initializers from keras import regularizers from keras.engine.input_spec import InputSpec from keras.layers.convolutional.base_conv import Conv -import tensorflow.compat.v2 as tf class DepthwiseConv(Conv): - """Depthwise convolution. - - Depthwise convolution is a type of convolution in which each input channel is - convolved with a different kernel (called a depthwise kernel). You - can understand depthwise convolution as the first step in a depthwise - separable convolution. - - It is implemented via the following steps: - - - Split the input into individual channels. - - Convolve each channel with an individual depthwise kernel with - `depth_multiplier` output channels. - - Concatenate the convolved outputs along the channels axis. - - Unlike a regular convolution, depthwise convolution does not mix - information across different input channels. - - The `depth_multiplier` argument determines how many filter are applied to one - input channel. As such, it controls the amount of output channels that are - generated per input channel in the depthwise step. - - Args: - kernel_size: A tuple or list of integers specifying the spatial dimensions - of the filters. Can be a single integer to specify the same value for all - spatial dimensions. - strides: A tuple or list of integers specifying the strides of the - convolution. Can be a single integer to specify the same value for all - spatial dimensions. Specifying any `stride` value != 1 is incompatible - with specifying any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding with zeros evenly to the left/right - or up/down of the input such that output has the same height/width - dimension as the input. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch_size, height, width, channels)` while - `channels_first` corresponds to inputs with shape `(batch_size, channels, - height, width)`. It defaults to the `image_data_format` value found in - your Keras config file at `~/.keras/keras.json`. If you never set it, then - it will be 'channels_last'. - dilation_rate: An integer or tuple/list of 2 integers, specifying the - dilation rate to use for dilated convolution. Currently, specifying any - `dilation_rate` value != 1 is incompatible with specifying any `strides` - value != 1. - activation: Activation function to use. If you don't specify anything, no - activation is applied (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix (see - `keras.initializers`). If None, the default initializer - ('glorot_uniform') will be used. - bias_initializer: Initializer for the bias vector (see - `keras.initializers`). If None, the default initializer ('zeros') will be - used. - depthwise_regularizer: Regularizer function applied to the depthwise kernel - matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector (see - `keras.regularizers`). - activity_regularizer: Regularizer function applied to the output of the - layer (its 'activation') (see `keras.regularizers`). - depthwise_constraint: Constraint function applied to the depthwise kernel - matrix (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector (see - `keras.constraints`). - - Input shape: - 4D tensor with shape: `[batch_size, channels, rows, cols]` if - data_format='channels_first' - or 4D tensor with shape: `[batch_size, rows, cols, channels]` if - data_format='channels_last'. - - Output shape: - 4D tensor with shape: `[batch_size, channels * depth_multiplier, new_rows, - new_cols]` if `data_format='channels_first'` - or 4D tensor with shape: `[batch_size, - new_rows, new_cols, channels * depth_multiplier]` if - `data_format='channels_last'`. `rows` and `cols` values might have changed - due to padding. - - Returns: - A tensor of rank 4 representing - `activation(depthwiseconv2d(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides` > 1 and `dilation_rate` > 1. - """ - - def __init__(self, - rank, - kernel_size, - strides=1, - padding='valid', - depth_multiplier=1, - data_format=None, - dilation_rate=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( + """Depthwise convolution. + + Depthwise convolution is a type of convolution in which each input channel + is convolved with a different kernel (called a depthwise kernel). You can + understand depthwise convolution as the first step in a depthwise separable + convolution. + + It is implemented via the following steps: + + - Split the input into individual channels. + - Convolve each channel with an individual depthwise kernel with + `depth_multiplier` output channels. + - Concatenate the convolved outputs along the channels axis. + + Unlike a regular convolution, depthwise convolution does not mix + information across different input channels. + + The `depth_multiplier` argument determines how many filter are applied to + one input channel. As such, it controls the amount of output channels that + are generated per input channel in the depthwise step. + + Args: + kernel_size: A tuple or list of integers specifying the spatial dimensions + of the filters. Can be a single integer to specify the same value for + all spatial dimensions. + strides: A tuple or list of integers specifying the strides of the + convolution. Can be a single integer to specify the same value for all + spatial dimensions. Specifying any `stride` value != 1 is incompatible + with specifying any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding with zeros evenly to the + left/right or up/down of the input such that output has the same + height/width dimension as the input. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `filters_in * depth_multiplier`. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch_size, height, + width, channels)` while `channels_first` corresponds to inputs with + shape `(batch_size, channels, height, width)`. If left unspecified, + uses `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of 2 integers, specifying the + dilation rate to use for dilated convolution. Currently, specifying any + `dilation_rate` value != 1 is incompatible with specifying any `strides` + value != 1. + activation: Activation function to use. If you don't specify anything, no + activation is applied (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + depthwise_initializer: Initializer for the depthwise kernel matrix (see + `keras.initializers`). If None, the default initializer + ('glorot_uniform') will be used. + bias_initializer: Initializer for the bias vector (see + `keras.initializers`). If None, the default initializer ('zeros') will + be used. + depthwise_regularizer: Regularizer function applied to the depthwise + kernel matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector (see + `keras.regularizers`). + activity_regularizer: Regularizer function applied to the output of the + layer (its 'activation') (see `keras.regularizers`). + depthwise_constraint: Constraint function applied to the depthwise kernel + matrix (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector (see + `keras.constraints`). + + Input shape: + 4D tensor with shape: `[batch_size, channels, rows, cols]` if + data_format='channels_first' + or 4D tensor with shape: `[batch_size, rows, cols, channels]` if + data_format='channels_last'. + + Output shape: + 4D tensor with shape: `[batch_size, channels * depth_multiplier, new_rows, + new_cols]` if `data_format='channels_first'` + or 4D tensor with shape: `[batch_size, + new_rows, new_cols, channels * depth_multiplier]` if + `data_format='channels_last'`. `rows` and `cols` values might have + changed due to padding. + + Returns: + A tensor of rank 4 representing + `activation(depthwiseconv2d(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides` > 1 and `dilation_rate` > 1. + """ + + def __init__( + self, rank, - filters=None, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - bias_constraint=bias_constraint, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = initializers.get(depthwise_initializer) - self.depthwise_regularizer = regularizers.get(depthwise_regularizer) - self.depthwise_constraint = constraints.get(depthwise_constraint) - self.bias_initializer = initializers.get(bias_initializer) - - def build(self, input_shape): - if len(input_shape) != self.rank + 2: - raise ValueError('Inputs to `DepthwiseConv` should have ' - f'rank {self.rank + 2}. ' - f'Received input_shape={input_shape}.') - input_shape = tf.TensorShape(input_shape) - channel_axis = self._get_channel_axis() - if input_shape.dims[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs to `DepthwiseConv` ' - 'should be defined. ' - f'The input_shape received is {input_shape}, ' - f'where axis {channel_axis} (0-based) ' - 'is the channel dimension, which found to be `None`.') - input_dim = int(input_shape[channel_axis]) - depthwise_kernel_shape = self.kernel_size + (input_dim, - self.depth_multiplier) - - self.depthwise_kernel = self.add_weight( - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - name='depthwise_kernel', - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint) - - if self.use_bias: - self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec( - min_ndim=self.rank + 2, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = super().get_config() - config.pop('filters') - config.pop('kernel_initializer') - config.pop('kernel_regularizer') - config.pop('kernel_constraint') - config['depth_multiplier'] = self.depth_multiplier - config['depthwise_initializer'] = initializers.serialize( - self.depthwise_initializer) - config['depthwise_regularizer'] = regularizers.serialize( - self.depthwise_regularizer) - config['depthwise_constraint'] = constraints.serialize( - self.depthwise_constraint) - return config + kernel_size, + strides=1, + padding="valid", + depth_multiplier=1, + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank, + filters=None, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + bias_constraint=bias_constraint, + **kwargs, + ) + self.depth_multiplier = depth_multiplier + self.depthwise_initializer = initializers.get(depthwise_initializer) + self.depthwise_regularizer = regularizers.get(depthwise_regularizer) + self.depthwise_constraint = constraints.get(depthwise_constraint) + self.bias_initializer = initializers.get(bias_initializer) + + def build(self, input_shape): + if len(input_shape) != self.rank + 2: + raise ValueError( + "Inputs to `DepthwiseConv` should have " + f"rank {self.rank + 2}. " + f"Received input_shape={input_shape}." + ) + input_shape = tf.TensorShape(input_shape) + channel_axis = self._get_channel_axis() + if input_shape.dims[channel_axis].value is None: + raise ValueError( + "The channel dimension of the inputs to `DepthwiseConv` " + "should be defined. " + f"The input_shape received is {input_shape}, " + f"where axis {channel_axis} (0-based) " + "is the channel dimension, which found to be `None`." + ) + input_dim = int(input_shape[channel_axis]) + depthwise_kernel_shape = self.kernel_size + ( + input_dim, + self.depth_multiplier, + ) + + self.depthwise_kernel = self.add_weight( + shape=depthwise_kernel_shape, + initializer=self.depthwise_initializer, + name="depthwise_kernel", + regularizer=self.depthwise_regularizer, + constraint=self.depthwise_constraint, + ) + + if self.use_bias: + self.bias = self.add_weight( + shape=(input_dim * self.depth_multiplier,), + initializer=self.bias_initializer, + name="bias", + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + # Set input spec. + self.input_spec = InputSpec( + min_ndim=self.rank + 2, axes={channel_axis: input_dim} + ) + self.built = True + + def call(self, inputs): + raise NotImplementedError + + def get_config(self): + config = super().get_config() + config.pop("filters") + config.pop("kernel_initializer") + config.pop("kernel_regularizer") + config.pop("kernel_constraint") + config["depth_multiplier"] = self.depth_multiplier + config["depthwise_initializer"] = initializers.serialize( + self.depthwise_initializer + ) + config["depthwise_regularizer"] = regularizers.serialize( + self.depthwise_regularizer + ) + config["depthwise_constraint"] = constraints.serialize( + self.depthwise_constraint + ) + return config diff --git a/keras/layers/convolutional/base_separable_conv.py b/keras/layers/convolutional/base_separable_conv.py index 8a491daffd8d..6afb161039ca 100644 --- a/keras/layers/convolutional/base_separable_conv.py +++ b/keras/layers/convolutional/base_separable_conv.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras abstract base layer for separable nD convolution.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import activations from keras import constraints @@ -21,217 +23,226 @@ from keras import regularizers from keras.engine.input_spec import InputSpec from keras.layers.convolutional.base_conv import Conv -import tensorflow.compat.v2 as tf class SeparableConv(Conv): - """Abstract base layer for separable nD convolution. + """Abstract base layer for separable nD convolution. - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. + It then optionally applies an activation function to produce the final + output. - Args: - rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch_size, channels, ...)`. - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel - (see `keras.initializers`). If None, then the default initializer - ('glorot_uniform') will be used. - pointwise_initializer: An initializer for the pointwise convolution kernel - (see `keras.initializers`). If None, then the default initializer - ('glorot_uniform') will be used. - bias_initializer: An initializer for the bias vector. If None, the default - initializer ('zeros') will be used (see `keras.initializers`). - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel. - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer`. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` the weights of this layer will be marked as - trainable (and listed in `layer.trainable_weights`). - """ + Args: + rank: An integer, the rank of the convolution, e.g. "2" for 2D + convolution. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of integers specifying the strides + of the convolution. Can be a single integer to specify the same value + for all spatial dimensions. + Specifying any `stride` value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, ..., channels)` while `channels_first` corresponds to + inputs with shape `(batch_size, channels, ...)`. + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution kernel + (see `keras.initializers`). If None, then the default initializer + ('glorot_uniform') will be used. + pointwise_initializer: An initializer for the pointwise convolution kernel + (see `keras.initializers`). If None, then the default initializer + ('glorot_uniform') will be used. + bias_initializer: An initializer for the bias vector. If None, the default + initializer ('zeros') will be used (see `keras.initializers`). + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` the weights of this layer will be marked as + trainable (and listed in `layer.trainable_weights`). + """ - def __init__(self, - rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - pointwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( - rank=rank, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activations.get(activation), - use_bias=use_bias, - bias_initializer=initializers.get(bias_initializer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = initializers.get(depthwise_initializer) - self.pointwise_initializer = initializers.get(pointwise_initializer) - self.depthwise_regularizer = regularizers.get(depthwise_regularizer) - self.pointwise_regularizer = regularizers.get(pointwise_regularizer) - self.depthwise_constraint = constraints.get(depthwise_constraint) - self.pointwise_constraint = constraints.get(pointwise_constraint) + def __init__( + self, + rank, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + pointwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs, + ): + super().__init__( + rank=rank, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activations.get(activation), + use_bias=use_bias, + bias_initializer=initializers.get(bias_initializer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs, + ) + self.depth_multiplier = depth_multiplier + self.depthwise_initializer = initializers.get(depthwise_initializer) + self.pointwise_initializer = initializers.get(pointwise_initializer) + self.depthwise_regularizer = regularizers.get(depthwise_regularizer) + self.pointwise_regularizer = regularizers.get(pointwise_regularizer) + self.depthwise_constraint = constraints.get(depthwise_constraint) + self.pointwise_constraint = constraints.get(pointwise_constraint) - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - channel_axis = self._get_channel_axis() - if input_shape.dims[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs should be defined. ' - f'The input_shape received is {input_shape}, ' - f'where axis {channel_axis} (0-based) ' - 'is the channel dimension, which found to be `None`.') - input_dim = int(input_shape[channel_axis]) - self.input_spec = InputSpec(ndim=self.rank + 2, - axes={channel_axis: input_dim}) - depthwise_kernel_shape = self.kernel_size + (input_dim, - self.depth_multiplier) - pointwise_kernel_shape = ( - 1,) * self.rank + (self.depth_multiplier * input_dim, self.filters) + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + channel_axis = self._get_channel_axis() + if input_shape.dims[channel_axis].value is None: + raise ValueError( + "The channel dimension of the inputs should be defined. " + f"The input_shape received is {input_shape}, " + f"where axis {channel_axis} (0-based) " + "is the channel dimension, which found to be `None`." + ) + input_dim = int(input_shape[channel_axis]) + self.input_spec = InputSpec( + ndim=self.rank + 2, axes={channel_axis: input_dim} + ) + depthwise_kernel_shape = self.kernel_size + ( + input_dim, + self.depth_multiplier, + ) + pointwise_kernel_shape = (1,) * self.rank + ( + self.depth_multiplier * input_dim, + self.filters, + ) - self.depthwise_kernel = self.add_weight( - name='depthwise_kernel', - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint, - trainable=True, - dtype=self.dtype) - self.pointwise_kernel = self.add_weight( - name='pointwise_kernel', - shape=pointwise_kernel_shape, - initializer=self.pointwise_initializer, - regularizer=self.pointwise_regularizer, - constraint=self.pointwise_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_weight( - name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True + self.depthwise_kernel = self.add_weight( + name="depthwise_kernel", + shape=depthwise_kernel_shape, + initializer=self.depthwise_initializer, + regularizer=self.depthwise_regularizer, + constraint=self.depthwise_constraint, + trainable=True, + dtype=self.dtype, + ) + self.pointwise_kernel = self.add_weight( + name="pointwise_kernel", + shape=pointwise_kernel_shape, + initializer=self.pointwise_initializer, + regularizer=self.pointwise_regularizer, + constraint=self.pointwise_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True - def call(self, inputs): - raise NotImplementedError + def call(self, inputs): + raise NotImplementedError - def get_config(self): - config = { - 'filters': - self.filters, - 'kernel_size': - self.kernel_size, - 'strides': - self.strides, - 'padding': - self.padding, - 'data_format': - self.data_format, - 'depth_multiplier': - self.depth_multiplier, - 'dilation_rate': - self.dilation_rate, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'depthwise_initializer': - initializers.serialize(self.depthwise_initializer), - 'pointwise_initializer': - initializers.serialize(self.pointwise_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'depthwise_regularizer': - regularizers.serialize(self.depthwise_regularizer), - 'pointwise_regularizer': - regularizers.serialize(self.pointwise_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'depthwise_constraint': - constraints.serialize(self.depthwise_constraint), - 'pointwise_constraint': - constraints.serialize(self.pointwise_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint) - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "depth_multiplier": self.depth_multiplier, + "dilation_rate": self.dilation_rate, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "depthwise_initializer": initializers.serialize( + self.depthwise_initializer + ), + "pointwise_initializer": initializers.serialize( + self.pointwise_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "depthwise_regularizer": regularizers.serialize( + self.depthwise_regularizer + ), + "pointwise_regularizer": regularizers.serialize( + self.pointwise_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "depthwise_constraint": constraints.serialize( + self.depthwise_constraint + ), + "pointwise_constraint": constraints.serialize( + self.pointwise_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/convolutional/conv1d.py b/keras/layers/convolutional/conv1d.py index 9ddad5f3fa22..5577fca943de 100644 --- a/keras/layers/convolutional/conv1d.py +++ b/keras/layers/convolutional/conv1d.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Keras 1D convolution layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import activations from keras import constraints @@ -22,149 +22,158 @@ from keras.dtensor import utils from keras.layers.convolutional.base_conv import Conv +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Conv1D', 'keras.layers.Convolution1D') +@keras_export("keras.layers.Conv1D", "keras.layers.Convolution1D") class Conv1D(Conv): - """1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - with the layer input over a single spatial (or temporal) dimension - to produce a tensor of outputs. - If `use_bias` is True, a bias vector is created and added to the outputs. - Finally, if `activation` is not `None`, - it is applied to the outputs as well. - - When using this layer as the first layer in a model, - provide an `input_shape` argument - (tuple of integers or `None`, e.g. - `(10, 128)` for sequences of 10 vectors of 128-dimensional vectors, - or `(None, 128)` for variable-length sequences of 128-dimensional vectors. - - Examples: - - >>> # The inputs are 128-length vectors with 10 timesteps, and the batch size - >>> # is 4. - >>> input_shape = (4, 10, 128) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv1D( - ... 32, 3, activation='relu',input_shape=input_shape[1:])(x) - >>> print(y.shape) - (4, 8, 32) - - >>> # With extended batch shape [4, 7] (e.g. weather data where batch - >>> # dimensions correspond to spatial location and the third dimension - >>> # corresponds to time.) - >>> input_shape = (4, 7, 10, 128) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv1D( - ... 32, 3, activation='relu', input_shape=input_shape[2:])(x) - >>> print(y.shape) - (4, 7, 8, 32) - - Args: - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, - specifying the length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"`, `"same"` or `"causal"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - `"causal"` results in causal (dilated) convolutions, e.g. `output[t]` - does not depend on `input[t+1:]`. Useful when modeling temporal data - where the model should not violate the temporal order. - See [WaveNet: A Generative Model for Raw Audio, section - 2.1](https://arxiv.org/abs/1609.03499). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - dilation_rate: an integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - groups: A positive integer specifying the number of groups in which the - input is split along the channel axis. Each group is convolved - separately with `filters / groups` filters. The output is the - concatenation of all the `groups` results along the channel axis. - Input channels and `filters` must both be divisible by `groups`. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see `keras.initializers`). Defaults to 'glorot_uniform'. - bias_initializer: Initializer for the bias vector - (see `keras.initializers`). Defaults to 'zeros'. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector - (see `keras.regularizers`). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation") - (see `keras.regularizers`). - kernel_constraint: Constraint function applied to the kernel matrix - (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector - (see `keras.constraints`). - - Input shape: - 3+D tensor with shape: `batch_shape + (steps, input_dim)` - - Output shape: - 3+D tensor with shape: `batch_shape + (new_steps, filters)` - `steps` value might have changed due to padding or strides. - - Returns: - A tensor of rank 3 representing - `activation(conv1d(inputs, kernel) + bias)`. - - Raises: - ValueError: when both `strides > 1` and `dilation_rate > 1`. - """ - - @utils.allow_initializer_layout - def __init__(self, - filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - groups=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - groups=groups, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) + """1D convolution layer (e.g. temporal convolution). + + This layer creates a convolution kernel that is convolved + with the layer input over a single spatial (or temporal) dimension + to produce a tensor of outputs. + If `use_bias` is True, a bias vector is created and added to the outputs. + Finally, if `activation` is not `None`, + it is applied to the outputs as well. + + When using this layer as the first layer in a model, + provide an `input_shape` argument + (tuple of integers or `None`, e.g. + `(10, 128)` for sequences of 10 vectors of 128-dimensional vectors, + or `(None, 128)` for variable-length sequences of 128-dimensional vectors. + + Examples: + + >>> # The inputs are 128-length vectors with 10 timesteps, and the + >>> # batch size is 4. + >>> input_shape = (4, 10, 128) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv1D( + ... 32, 3, activation='relu',input_shape=input_shape[1:])(x) + >>> print(y.shape) + (4, 8, 32) + + >>> # With extended batch shape [4, 7] (e.g. weather data where batch + >>> # dimensions correspond to spatial location and the third dimension + >>> # corresponds to time.) + >>> input_shape = (4, 7, 10, 128) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv1D( + ... 32, 3, activation='relu', input_shape=input_shape[2:])(x) + >>> print(y.shape) + (4, 7, 8, 32) + + Args: + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + kernel_size: An integer or tuple/list of a single integer, + specifying the length of the 1D convolution window. + strides: An integer or tuple/list of a single integer, + specifying the stride length of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"`, `"same"` or `"causal"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + `"causal"` results in causal (dilated) convolutions, e.g. `output[t]` + does not depend on `input[t+1:]`. Useful when modeling temporal data + where the model should not violate the temporal order. + See [WaveNet: A Generative Model for Raw Audio, section + 2.1](https://arxiv.org/abs/1609.03499). + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch_size, width, + channels)` while `channels_first` corresponds to inputs with shape + `(batch_size, channels, width)`. Note that the `channels_first` format + is currently not supported by TensorFlow on CPU. + dilation_rate: an integer or tuple/list of a single integer, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + groups: A positive integer specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters / groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. + Input channels and `filters` must both be divisible by `groups`. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix + (see `keras.initializers`). Defaults to 'glorot_uniform'. + bias_initializer: Initializer for the bias vector + (see `keras.initializers`). Defaults to 'zeros'. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector + (see `keras.regularizers`). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation") + (see `keras.regularizers`). + kernel_constraint: Constraint function applied to the kernel matrix + (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector + (see `keras.constraints`). + + Input shape: + 3+D tensor with shape: `batch_shape + (steps, input_dim)` + + Output shape: + 3+D tensor with shape: `batch_shape + (new_steps, filters)` + `steps` value might have changed due to padding or strides. + + Returns: + A tensor of rank 3 representing + `activation(conv1d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + """ + + @utils.allow_initializer_layout + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format="channels_last", + dilation_rate=1, + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + rank=1, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs + ) + # Alias diff --git a/keras/layers/convolutional/conv1d_transpose.py b/keras/layers/convolutional/conv1d_transpose.py index 20c30aa44f5e..e74cff0332c6 100644 --- a/keras/layers/convolutional/conv1d_transpose.py +++ b/keras/layers/convolutional/conv1d_transpose.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras 1D transposed convolution layer (sometimes called deconvolution).""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import activations from keras import constraints @@ -23,260 +25,279 @@ from keras.engine.input_spec import InputSpec from keras.layers.convolutional.conv1d import Conv1D from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Conv1DTranspose', - 'keras.layers.Convolution1DTranspose') +@keras_export( + "keras.layers.Conv1DTranspose", "keras.layers.Convolution1DTranspose" +) class Conv1DTranspose(Conv1D): - """Transposed convolution layer (sometimes called Deconvolution). + """Transposed convolution layer (sometimes called Deconvolution). + + The need for transposed convolutions generally arises + from the desire to use a transformation going in the opposite direction + of a normal convolution, i.e., from something that has the shape of the + output of some convolution to something that has the shape of its input + while maintaining a connectivity pattern that is compatible with + said convolution. - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. + When using this layer as the first layer in a model, + provide the keyword argument `input_shape` + (tuple of integers or `None`, does not include the sample axis), + e.g. `input_shape=(128, 3)` for data with 128 time steps and 3 channels. - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers or `None`, does not include the sample axis), - e.g. `input_shape=(128, 3)` for data with 128 time steps and 3 channels. + Args: + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + kernel_size: An integer length of the 1D convolution window. + strides: An integer specifying the stride of the convolution along the + time dimension. Specifying a stride value != 1 is incompatible with + specifying a `dilation_rate` value != 1. Defaults to `1`. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + output_padding: An integer specifying the amount of padding along + the time dimension of the output tensor. + The amount of output padding must be lower than the stride. + If set to `None` (default), the output shape is inferred. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch_size, channels, length)`. + dilation_rate: an integer, specifying + the dilation rate to use for dilated convolution. + Currently, specifying a `dilation_rate` value != 1 is + incompatible with specifying a stride value != 1. + Also dilation rate larger than 1 is not currently supported. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix + (see `keras.initializers`). Defaults to 'glorot_uniform'. + bias_initializer: Initializer for the bias vector + (see `keras.initializers`). Defaults to 'zeros'. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector + (see `keras.regularizers`). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation") (see `keras.regularizers`). + kernel_constraint: Constraint function applied to the kernel matrix + (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector + (see `keras.constraints`). - Args: - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer length of the 1D convolution window. - strides: An integer specifying the stride of the convolution along the - time dimension. Specifying a stride value != 1 is incompatible with - specifying a `dilation_rate` value != 1. Defaults to 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - output_padding: An integer specifying the amount of padding along - the time dimension of the output tensor. - The amount of output padding must be lower than the stride. - If set to `None` (default), the output shape is inferred. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch_size, channels, length)`. - dilation_rate: an integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying a `dilation_rate` value != 1 is - incompatible with specifying a stride value != 1. - Also dilation rate larger than 1 is not currently supported. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see `keras.initializers`). Defaults to 'glorot_uniform'. - bias_initializer: Initializer for the bias vector - (see `keras.initializers`). Defaults to 'zeros'. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector - (see `keras.regularizers`). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation") (see `keras.regularizers`). - kernel_constraint: Constraint function applied to the kernel matrix - (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector - (see `keras.constraints`). + Input shape: + 3D tensor with shape: + `(batch_size, steps, channels)` - Input shape: - 3D tensor with shape: - `(batch_size, steps, channels)` + Output shape: + 3D tensor with shape: + `(batch_size, new_steps, filters)` + If `output_padding` is specified: + ``` + new_timesteps = ((timesteps - 1) * strides + kernel_size - + 2 * padding + output_padding) + ``` - Output shape: - 3D tensor with shape: - `(batch_size, new_steps, filters)` - If `output_padding` is specified: - ``` - new_timesteps = ((timesteps - 1) * strides + kernel_size - - 2 * padding + output_padding) - ``` + Returns: + A tensor of rank 3 representing + `activation(conv1dtranspose(inputs, kernel) + bias)`. - Returns: - A tensor of rank 3 representing - `activation(conv1dtranspose(inputs, kernel) + bias)`. + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides` > 1 and `dilation_rate` > 1. - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides` > 1 and `dilation_rate` > 1. + References: + - [A guide to convolution arithmetic for deep learning]( + https://arxiv.org/abs/1603.07285v1) + - [Deconvolutional Networks]( + https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + """ - References: - - [A guide to convolution arithmetic for deep learning]( - https://arxiv.org/abs/1603.07285v1) - - [Deconvolutional Networks]( - https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) - """ + @utils.allow_initializer_layout + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + output_padding=None, + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs, + ) - @utils.allow_initializer_layout - def __init__(self, - filters, - kernel_size, - strides=1, - padding='valid', - output_padding=None, - data_format=None, - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) + self.output_padding = output_padding + if self.output_padding is not None: + self.output_padding = conv_utils.normalize_tuple( + self.output_padding, 1, "output_padding", allow_zero=True + ) + for stride, out_pad in zip(self.strides, self.output_padding): + if out_pad >= stride: + raise ValueError( + "Strides must be greater than output padding. " + f"Received strides={self.strides}, " + f"output_padding={self.output_padding}." + ) - self.output_padding = output_padding - if self.output_padding is not None: - self.output_padding = conv_utils.normalize_tuple( - self.output_padding, 1, 'output_padding', allow_zero=True) - for stride, out_pad in zip(self.strides, self.output_padding): - if out_pad >= stride: - raise ValueError('Strides must be greater than output padding. ' - f'Received strides={self.strides}, ' - f'output_padding={self.output_padding}.') + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + if len(input_shape) != 3: + raise ValueError( + "Inputs should have rank 3. " + f"Received input_shape={input_shape}." + ) + channel_axis = self._get_channel_axis() + if input_shape.dims[channel_axis].value is None: + raise ValueError( + "The channel dimension of the inputs " + "to `Conv1DTranspose` should be defined. " + f"The input_shape received is {input_shape}, " + f"where axis {channel_axis} (0-based) " + "is the channel dimension, which found to be `None`." + ) + input_dim = int(input_shape[channel_axis]) + self.input_spec = InputSpec(ndim=3, axes={channel_axis: input_dim}) + kernel_shape = self.kernel_size + (self.filters, input_dim) - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - if len(input_shape) != 3: - raise ValueError('Inputs should have rank 3. ' - f'Received input_shape={input_shape}.') - channel_axis = self._get_channel_axis() - if input_shape.dims[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs ' - 'to `Conv1DTranspose` should be defined. ' - f'The input_shape received is {input_shape}, ' - f'where axis {channel_axis} (0-based) ' - 'is the channel dimension, which found to be `None`.') - input_dim = int(input_shape[channel_axis]) - self.input_spec = InputSpec(ndim=3, axes={channel_axis: input_dim}) - kernel_shape = self.kernel_size + (self.filters, input_dim) + self.kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True - self.kernel = self.add_weight( - name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_weight( - name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True + def call(self, inputs): + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + if self.data_format == "channels_first": + t_axis = 2 + else: + t_axis = 1 - def call(self, inputs): - inputs_shape = tf.shape(inputs) - batch_size = inputs_shape[0] - if self.data_format == 'channels_first': - t_axis = 2 - else: - t_axis = 1 + length = inputs_shape[t_axis] + if self.output_padding is None: + output_padding = None + else: + output_padding = self.output_padding[0] - length = inputs_shape[t_axis] - if self.output_padding is None: - output_padding = None - else: - output_padding = self.output_padding[0] + # Infer the dynamic output shape: + out_length = conv_utils.deconv_output_length( + length, + self.kernel_size[0], + padding=self.padding, + output_padding=output_padding, + stride=self.strides[0], + dilation=self.dilation_rate[0], + ) + if self.data_format == "channels_first": + output_shape = (batch_size, self.filters, out_length) + else: + output_shape = (batch_size, out_length, self.filters) + data_format = conv_utils.convert_data_format(self.data_format, ndim=3) - # Infer the dynamic output shape: - out_length = conv_utils.deconv_output_length( - length, self.kernel_size[0], padding=self.padding, - output_padding=output_padding, stride=self.strides[0], - dilation=self.dilation_rate[0]) - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, out_length) - else: - output_shape = (batch_size, out_length, self.filters) - data_format = conv_utils.convert_data_format(self.data_format, ndim=3) + output_shape_tensor = tf.stack(output_shape) + outputs = tf.nn.conv1d_transpose( + inputs, + self.kernel, + output_shape_tensor, + strides=self.strides, + padding=self.padding.upper(), + data_format=data_format, + dilations=self.dilation_rate, + ) - output_shape_tensor = tf.stack(output_shape) - outputs = tf.nn.conv1d_transpose( - inputs, - self.kernel, - output_shape_tensor, - strides=self.strides, - padding=self.padding.upper(), - data_format=data_format, - dilations=self.dilation_rate) + if not tf.executing_eagerly() and inputs.shape.rank: + # Infer the static output shape: + out_shape = self.compute_output_shape(inputs.shape) + outputs.set_shape(out_shape) - if not tf.executing_eagerly(): - # Infer the static output shape: - out_shape = self.compute_output_shape(inputs.shape) - outputs.set_shape(out_shape) + if self.use_bias: + outputs = tf.nn.bias_add( + outputs, self.bias, data_format=data_format + ) - if self.use_bias: - outputs = tf.nn.bias_add( - outputs, - self.bias, - data_format=data_format) + if self.activation is not None: + return self.activation(outputs) + return outputs - if self.activation is not None: - return self.activation(outputs) - return outputs + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + output_shape = list(input_shape) + if self.data_format == "channels_first": + c_axis, t_axis = 1, 2 + else: + c_axis, t_axis = 2, 1 - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, t_axis = 1, 2 - else: - c_axis, t_axis = 2, 1 + if self.output_padding is None: + output_padding = None + else: + output_padding = self.output_padding[0] + output_shape[c_axis] = self.filters + output_shape[t_axis] = conv_utils.deconv_output_length( + output_shape[t_axis], + self.kernel_size[0], + padding=self.padding, + output_padding=output_padding, + stride=self.strides[0], + dilation=self.dilation_rate[0], + ) + return tf.TensorShape(output_shape) - if self.output_padding is None: - output_padding = None - else: - output_padding = self.output_padding[0] - output_shape[c_axis] = self.filters - output_shape[t_axis] = conv_utils.deconv_output_length( - output_shape[t_axis], - self.kernel_size[0], - padding=self.padding, - output_padding=output_padding, - stride=self.strides[0], - dilation=self.dilation_rate[0]) - return tf.TensorShape(output_shape) + def get_config(self): + config = super().get_config() + config["output_padding"] = self.output_padding + return config - def get_config(self): - config = super().get_config() - config['output_padding'] = self.output_padding - return config # Alias diff --git a/keras/layers/convolutional/conv2d.py b/keras/layers/convolutional/conv2d.py index 257a729790bc..6a6c3aae0f41 100644 --- a/keras/layers/convolutional/conv2d.py +++ b/keras/layers/convolutional/conv2d.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Keras 2D convolution layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import activations from keras import constraints @@ -22,170 +22,181 @@ from keras.dtensor import utils from keras.layers.convolutional.base_conv import Conv +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Conv2D', 'keras.layers.Convolution2D') +@keras_export("keras.layers.Conv2D", "keras.layers.Convolution2D") class Conv2D(Conv): - """2D convolution layer (e.g. spatial convolution over images). - - This layer creates a convolution kernel that is convolved - with the layer input to produce a tensor of - outputs. If `use_bias` is True, - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers or `None`, does not include the sample axis), - e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures - in `data_format="channels_last"`. You can use `None` when - a dimension has variable size. - - Examples: - - >>> # The inputs are 28x28 RGB images with `channels_last` and the batch - >>> # size is 4. - >>> input_shape = (4, 28, 28, 3) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv2D( - ... 2, 3, activation='relu', input_shape=input_shape[1:])(x) - >>> print(y.shape) - (4, 26, 26, 2) - - >>> # With `dilation_rate` as 2. - >>> input_shape = (4, 28, 28, 3) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv2D( - ... 2, 3, activation='relu', dilation_rate=2, input_shape=input_shape[1:])(x) - >>> print(y.shape) - (4, 24, 24, 2) - - >>> # With `padding` as "same". - >>> input_shape = (4, 28, 28, 3) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv2D( - ... 2, 3, activation='relu', padding="same", input_shape=input_shape[1:])(x) - >>> print(y.shape) - (4, 28, 28, 2) - - >>> # With extended batch shape [4, 7]: - >>> input_shape = (4, 7, 28, 28, 3) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv2D( - ... 2, 3, activation='relu', input_shape=input_shape[2:])(x) - >>> print(y.shape) - (4, 7, 26, 26, 2) - - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number of - output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the height - and width of the 2D convolution window. Can be a single integer to specify - the same value for all spatial dimensions. - strides: An integer or tuple/list of 2 integers, specifying the strides of - the convolution along the height and width. Can be a single integer to - specify the same value for all spatial dimensions. Specifying any stride - value != 1 is incompatible with specifying any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input. When `padding="same"` and - `strides=1`, the output has the same size as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch_size, height, width, channels)` while - `channels_first` corresponds to inputs with shape `(batch_size, channels, - height, width)`. It defaults to the `image_data_format` value found in - your Keras config file at `~/.keras/keras.json`. If you never set it, then - it will be `channels_last`. - dilation_rate: an integer or tuple/list of 2 integers, specifying the - dilation rate to use for dilated convolution. Can be a single integer to - specify the same value for all spatial dimensions. Currently, specifying - any `dilation_rate` value != 1 is incompatible with specifying any stride - value != 1. - groups: A positive integer specifying the number of groups in which the - input is split along the channel axis. Each group is convolved separately - with `filters / groups` filters. The output is the concatenation of all - the `groups` results along the channel axis. Input channels and `filters` - must both be divisible by `groups`. - activation: Activation function to use. If you don't specify anything, no - activation is applied (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix (see - `keras.initializers`). Defaults to 'glorot_uniform'. - bias_initializer: Initializer for the bias vector (see - `keras.initializers`). Defaults to 'zeros'. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector (see - `keras.regularizers`). - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation") (see `keras.regularizers`). - kernel_constraint: Constraint function applied to the kernel matrix (see - `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector (see - `keras.constraints`). - - Input shape: - 4+D tensor with shape: `batch_shape + (channels, rows, cols)` if - `data_format='channels_first'` - or 4+D tensor with shape: `batch_shape + (rows, cols, channels)` if - `data_format='channels_last'`. - - Output shape: - 4+D tensor with shape: `batch_shape + (filters, new_rows, new_cols)` if - `data_format='channels_first'` or 4+D tensor with shape: `batch_shape + - (new_rows, new_cols, filters)` if `data_format='channels_last'`. `rows` - and `cols` values might have changed due to padding. - - Returns: - A tensor of rank 4+ representing - `activation(conv2d(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is `"causal"`. - ValueError: when both `strides > 1` and `dilation_rate > 1`. - """ - - @utils.allow_initializer_layout - def __init__(self, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - groups=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - groups=groups, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) + """2D convolution layer (e.g. spatial convolution over images). + + This layer creates a convolution kernel that is convolved + with the layer input to produce a tensor of + outputs. If `use_bias` is True, + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + When using this layer as the first layer in a model, + provide the keyword argument `input_shape` + (tuple of integers or `None`, does not include the sample axis), + e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures + in `data_format="channels_last"`. You can use `None` when + a dimension has variable size. + + Examples: + + >>> # The inputs are 28x28 RGB images with `channels_last` and the batch + >>> # size is 4. + >>> input_shape = (4, 28, 28, 3) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv2D( + ... 2, 3, activation='relu', input_shape=input_shape[1:])(x) + >>> print(y.shape) + (4, 26, 26, 2) + + >>> # With `dilation_rate` as 2. + >>> input_shape = (4, 28, 28, 3) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv2D( + ... 2, 3, + ... activation='relu', + ... dilation_rate=2, + ... input_shape=input_shape[1:])(x) + >>> print(y.shape) + (4, 24, 24, 2) + + >>> # With `padding` as "same". + >>> input_shape = (4, 28, 28, 3) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv2D( + ... 2, 3, activation='relu', padding="same", input_shape=input_shape[1:])(x) + >>> print(y.shape) + (4, 28, 28, 2) + + >>> # With extended batch shape [4, 7]: + >>> input_shape = (4, 7, 28, 28, 3) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv2D( + ... 2, 3, activation='relu', input_shape=input_shape[2:])(x) + >>> print(y.shape) + (4, 7, 26, 26, 2) + + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of output filters in the convolution). + kernel_size: An integer or tuple/list of 2 integers, specifying the height + and width of the 2D convolution window. Can be a single integer to + specify the same value for all spatial dimensions. + strides: An integer or tuple/list of 2 integers, specifying the strides of + the convolution along the height and width. Can be a single integer to + specify the same value for all spatial dimensions. Specifying any stride + value != 1 is incompatible with specifying any `dilation_rate` value != + 1. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input. When `padding="same"` + and `strides=1`, the output has the same size as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch_size, height, + width, channels)` while `channels_first` corresponds to inputs with + shape `(batch_size, channels, height, width)`. If left unspecified, it + uses the `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Note that the `channels_first` format is currently not + supported by TensorFlow on CPU. Defaults to 'channels_last'. + dilation_rate: an integer or tuple/list of 2 integers, specifying the + dilation rate to use for dilated convolution. Can be a single integer to + specify the same value for all spatial dimensions. Currently, specifying + any `dilation_rate` value != 1 is incompatible with specifying any + stride value != 1. + groups: A positive integer specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters / groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. Input + channels and `filters` must both be divisible by `groups`. + activation: Activation function to use. If you don't specify anything, no + activation is applied (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix (see + `keras.initializers`). Defaults to 'glorot_uniform'. + bias_initializer: Initializer for the bias vector (see + `keras.initializers`). Defaults to 'zeros'. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector (see + `keras.regularizers`). + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation") (see `keras.regularizers`). + kernel_constraint: Constraint function applied to the kernel matrix (see + `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector (see + `keras.constraints`). + + Input shape: + 4+D tensor with shape: `batch_shape + (channels, rows, cols)` if + `data_format='channels_first'` + or 4+D tensor with shape: `batch_shape + (rows, cols, channels)` if + `data_format='channels_last'`. + + Output shape: + 4+D tensor with shape: `batch_shape + (filters, new_rows, new_cols)` if + `data_format='channels_first'` or 4+D tensor with shape: `batch_shape + + (new_rows, new_cols, filters)` if `data_format='channels_last'`. `rows` + and `cols` values might have changed due to padding. + + Returns: + A tensor of rank 4+ representing + `activation(conv2d(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is `"causal"`. + ValueError: when both `strides > 1` and `dilation_rate > 1`. + """ + + @utils.allow_initializer_layout + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + rank=2, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs + ) + # Alias diff --git a/keras/layers/convolutional/conv2d_transpose.py b/keras/layers/convolutional/conv2d_transpose.py index ae419a5cb59a..772b761e95d8 100644 --- a/keras/layers/convolutional/conv2d_transpose.py +++ b/keras/layers/convolutional/conv2d_transpose.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras 2D transposed convolution layer (sometimes called deconvolution).""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import activations from keras import backend @@ -24,315 +26,341 @@ from keras.engine.input_spec import InputSpec from keras.layers.convolutional.conv2d import Conv2D from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Conv2DTranspose', - 'keras.layers.Convolution2DTranspose') +@keras_export( + "keras.layers.Conv2DTranspose", "keras.layers.Convolution2DTranspose" +) class Conv2DTranspose(Conv2D): - """Transposed convolution layer (sometimes called Deconvolution). - - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers or `None`, does not include the sample axis), - e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures - in `data_format="channels_last"`. - - Args: - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - output_padding: An integer or tuple/list of 2 integers, - specifying the amount of padding along the height and width - of the output tensor. - Can be a single integer to specify the same value for all - spatial dimensions. - The amount of output padding along a given dimension must be - lower than the stride along that same dimension. - If set to `None` (default), the output shape is inferred. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch_size, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer, specifying the dilation rate for all spatial - dimensions for dilated convolution. Specifying different dilation rates - for different dimensions is not supported. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see `keras.initializers`). Defaults to 'glorot_uniform'. - bias_initializer: Initializer for the bias vector - (see `keras.initializers`). Defaults to 'zeros'. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector - (see `keras.regularizers`). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation") (see `keras.regularizers`). - kernel_constraint: Constraint function applied to the kernel matrix - (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector - (see `keras.constraints`). - - Input shape: - 4D tensor with shape: - `(batch_size, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch_size, rows, cols, channels)` if data_format='channels_last'. - - Output shape: - 4D tensor with shape: - `(batch_size, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch_size, new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - If `output_padding` is specified: - ``` - new_rows = ((rows - 1) * strides[0] + kernel_size[0] - 2 * padding[0] + - output_padding[0]) - new_cols = ((cols - 1) * strides[1] + kernel_size[1] - 2 * padding[1] + - output_padding[1]) - ``` - - Returns: - A tensor of rank 4 representing - `activation(conv2dtranspose(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides` > 1 and `dilation_rate` > 1. - - References: - - [A guide to convolution arithmetic for deep - learning](https://arxiv.org/abs/1603.07285v1) - - [Deconvolutional - Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) - """ - - @utils.allow_initializer_layout - def __init__(self, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - output_padding=None, - data_format=None, - dilation_rate=(1, 1), - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) - - self.output_padding = output_padding - if self.output_padding is not None: - self.output_padding = conv_utils.normalize_tuple( - self.output_padding, 2, 'output_padding', allow_zero=True) - for stride, out_pad in zip(self.strides, self.output_padding): - if out_pad >= stride: - raise ValueError('Strides must be greater than output padding. ' - f'Received strides={self.strides}, ' - f'output_padding={self.output_padding}.') - - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - if len(input_shape) != 4: - raise ValueError('Inputs should have rank 4. ' - f'Received input_shape={input_shape}.') - channel_axis = self._get_channel_axis() - if input_shape.dims[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs ' - 'to `Conv2DTranspose` should be defined. ' - f'The input_shape received is {input_shape}, ' - f'where axis {channel_axis} (0-based) ' - 'is the channel dimension, which found to be `None`.') - input_dim = int(input_shape[channel_axis]) - self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) - kernel_shape = self.kernel_size + (self.filters, input_dim) - - self.kernel = self.add_weight( - name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_weight( - name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True - - def call(self, inputs): - inputs_shape = tf.shape(inputs) - batch_size = inputs_shape[0] - if self.data_format == 'channels_first': - h_axis, w_axis = 2, 3 - else: - h_axis, w_axis = 1, 2 - - # Use the constant height and weight when possible. - # TODO(scottzhu): Extract this into a utility function that can be applied - # to all convolutional layers, which currently lost the static shape - # information due to tf.shape(). - height, width = None, None - if inputs.shape.rank is not None: - dims = inputs.shape.as_list() - height = dims[h_axis] - width = dims[w_axis] - height = height if height is not None else inputs_shape[h_axis] - width = width if width is not None else inputs_shape[w_axis] - - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.strides - - if self.output_padding is None: - out_pad_h = out_pad_w = None - else: - out_pad_h, out_pad_w = self.output_padding - - # Infer the dynamic output shape: - out_height = conv_utils.deconv_output_length(height, - kernel_h, - padding=self.padding, - output_padding=out_pad_h, - stride=stride_h, - dilation=self.dilation_rate[0]) - out_width = conv_utils.deconv_output_length(width, - kernel_w, - padding=self.padding, - output_padding=out_pad_w, - stride=stride_w, - dilation=self.dilation_rate[1]) - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, out_height, out_width) - else: - output_shape = (batch_size, out_height, out_width, self.filters) - - output_shape_tensor = tf.stack(output_shape) - outputs = backend.conv2d_transpose( - inputs, - self.kernel, - output_shape_tensor, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - - if not tf.executing_eagerly(): - # Infer the static output shape: - out_shape = self.compute_output_shape(inputs.shape) - outputs.set_shape(out_shape) - - if self.use_bias: - outputs = tf.nn.bias_add( - outputs, - self.bias, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, h_axis, w_axis = 1, 2, 3 - else: - c_axis, h_axis, w_axis = 3, 1, 2 - - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.strides - - if self.output_padding is None: - out_pad_h = out_pad_w = None - else: - out_pad_h, out_pad_w = self.output_padding - - output_shape[c_axis] = self.filters - output_shape[h_axis] = conv_utils.deconv_output_length( - output_shape[h_axis], - kernel_h, - padding=self.padding, - output_padding=out_pad_h, - stride=stride_h, - dilation=self.dilation_rate[0]) - output_shape[w_axis] = conv_utils.deconv_output_length( - output_shape[w_axis], - kernel_w, - padding=self.padding, - output_padding=out_pad_w, - stride=stride_w, - dilation=self.dilation_rate[1]) - return tf.TensorShape(output_shape) - - def get_config(self): - config = super().get_config() - config['output_padding'] = self.output_padding - return config + """Transposed convolution layer (sometimes called Deconvolution). + + The need for transposed convolutions generally arises + from the desire to use a transformation going in the opposite direction + of a normal convolution, i.e., from something that has the shape of the + output of some convolution to something that has the shape of its input + while maintaining a connectivity pattern that is compatible with + said convolution. + + When using this layer as the first layer in a model, + provide the keyword argument `input_shape` + (tuple of integers or `None`, does not include the sample axis), + e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures + in `data_format="channels_last"`. + + Args: + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + kernel_size: An integer or tuple/list of 2 integers, specifying the + height and width of the 2D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution along the height and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + output_padding: An integer or tuple/list of 2 integers, + specifying the amount of padding along the height and width + of the output tensor. + Can be a single integer to specify the same value for all + spatial dimensions. + The amount of output padding along a given dimension must be + lower than the stride along that same dimension. + If set to `None` (default), the output shape is inferred. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch_size, channels, height, width)`. + When unspecified, uses `image_data_format` value found in your Keras + config file at `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to "channels_last". + dilation_rate: an integer, specifying the dilation rate for all spatial + dimensions for dilated convolution. Specifying different dilation rates + for different dimensions is not supported. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix + (see `keras.initializers`). Defaults to 'glorot_uniform'. + bias_initializer: Initializer for the bias vector + (see `keras.initializers`). Defaults to 'zeros'. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector + (see `keras.regularizers`). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation") (see `keras.regularizers`). + kernel_constraint: Constraint function applied to the kernel matrix + (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector + (see `keras.constraints`). + + Input shape: + 4D tensor with shape: + `(batch_size, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(batch_size, rows, cols, channels)` if data_format='channels_last'. + + Output shape: + 4D tensor with shape: + `(batch_size, filters, new_rows, new_cols)` if + data_format='channels_first' + or 4D tensor with shape: + `(batch_size, new_rows, new_cols, filters)` if + data_format='channels_last'. `rows` and `cols` values might have changed + due to padding. + If `output_padding` is specified: + ``` + new_rows = ((rows - 1) * strides[0] + kernel_size[0] - 2 * padding[0] + + output_padding[0]) + new_cols = ((cols - 1) * strides[1] + kernel_size[1] - 2 * padding[1] + + output_padding[1]) + ``` + + Returns: + A tensor of rank 4 representing + `activation(conv2dtranspose(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides` > 1 and `dilation_rate` > 1. + + References: + - [A guide to convolution arithmetic for deep + learning](https://arxiv.org/abs/1603.07285v1) + - [Deconvolutional + Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + """ + + @utils.allow_initializer_layout + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + output_padding=None, + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs, + ) + + self.output_padding = output_padding + if self.output_padding is not None: + self.output_padding = conv_utils.normalize_tuple( + self.output_padding, 2, "output_padding", allow_zero=True + ) + for stride, out_pad in zip(self.strides, self.output_padding): + if out_pad >= stride: + raise ValueError( + "Strides must be greater than output padding. " + f"Received strides={self.strides}, " + f"output_padding={self.output_padding}." + ) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + if len(input_shape) != 4: + raise ValueError( + "Inputs should have rank 4. " + f"Received input_shape={input_shape}." + ) + channel_axis = self._get_channel_axis() + if input_shape.dims[channel_axis].value is None: + raise ValueError( + "The channel dimension of the inputs " + "to `Conv2DTranspose` should be defined. " + f"The input_shape received is {input_shape}, " + f"where axis {channel_axis} (0-based) " + "is the channel dimension, which found to be `None`." + ) + input_dim = int(input_shape[channel_axis]) + self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) + kernel_shape = self.kernel_size + (self.filters, input_dim) + + self.kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs): + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + if self.data_format == "channels_first": + h_axis, w_axis = 2, 3 + else: + h_axis, w_axis = 1, 2 + + # Use the constant height and weight when possible. + # TODO(scottzhu): Extract this into a utility function that can be + # applied to all convolutional layers, which currently lost the static + # shape information due to tf.shape(). + height, width = None, None + if inputs.shape.rank is not None: + dims = inputs.shape.as_list() + height = dims[h_axis] + width = dims[w_axis] + height = height if height is not None else inputs_shape[h_axis] + width = width if width is not None else inputs_shape[w_axis] + + kernel_h, kernel_w = self.kernel_size + stride_h, stride_w = self.strides + + if self.output_padding is None: + out_pad_h = out_pad_w = None + else: + out_pad_h, out_pad_w = self.output_padding + + # Infer the dynamic output shape: + out_height = conv_utils.deconv_output_length( + height, + kernel_h, + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h, + dilation=self.dilation_rate[0], + ) + out_width = conv_utils.deconv_output_length( + width, + kernel_w, + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w, + dilation=self.dilation_rate[1], + ) + if self.data_format == "channels_first": + output_shape = (batch_size, self.filters, out_height, out_width) + else: + output_shape = (batch_size, out_height, out_width, self.filters) + + output_shape_tensor = tf.stack(output_shape) + outputs = backend.conv2d_transpose( + inputs, + self.kernel, + output_shape_tensor, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) + + if not tf.executing_eagerly() and inputs.shape.rank: + # Infer the static output shape: + out_shape = self.compute_output_shape(inputs.shape) + outputs.set_shape(out_shape) + + if self.use_bias: + outputs = tf.nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + output_shape = list(input_shape) + if self.data_format == "channels_first": + c_axis, h_axis, w_axis = 1, 2, 3 + else: + c_axis, h_axis, w_axis = 3, 1, 2 + + kernel_h, kernel_w = self.kernel_size + stride_h, stride_w = self.strides + + if self.output_padding is None: + out_pad_h = out_pad_w = None + else: + out_pad_h, out_pad_w = self.output_padding + + output_shape[c_axis] = self.filters + output_shape[h_axis] = conv_utils.deconv_output_length( + output_shape[h_axis], + kernel_h, + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h, + dilation=self.dilation_rate[0], + ) + output_shape[w_axis] = conv_utils.deconv_output_length( + output_shape[w_axis], + kernel_w, + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w, + dilation=self.dilation_rate[1], + ) + return tf.TensorShape(output_shape) + + def get_config(self): + config = super().get_config() + config["output_padding"] = self.output_padding + return config + # Alias diff --git a/keras/layers/convolutional/conv3d.py b/keras/layers/convolutional/conv3d.py index aeee2067f024..bfcfcf5012e2 100644 --- a/keras/layers/convolutional/conv3d.py +++ b/keras/layers/convolutional/conv3d.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Keras 3D convolution layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import activations from keras import constraints @@ -22,157 +22,165 @@ from keras.dtensor import utils from keras.layers.convolutional.base_conv import Conv +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Conv3D', 'keras.layers.Convolution3D') +@keras_export("keras.layers.Conv3D", "keras.layers.Convolution3D") class Conv3D(Conv): - """3D convolution layer (e.g. spatial convolution over volumes). - - This layer creates a convolution kernel that is convolved - with the layer input to produce a tensor of - outputs. If `use_bias` is True, - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers or `None`, does not include the sample axis), - e.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes - with a single channel, - in `data_format="channels_last"`. - - Examples: - - >>> # The inputs are 28x28x28 volumes with a single channel, and the - >>> # batch size is 4 - >>> input_shape =(4, 28, 28, 28, 1) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv3D( - ... 2, 3, activation='relu', input_shape=input_shape[1:])(x) - >>> print(y.shape) - (4, 26, 26, 26, 2) - - >>> # With extended batch shape [4, 7], e.g. a batch of 4 videos of 3D frames, - >>> # with 7 frames per video. - >>> input_shape = (4, 7, 28, 28, 28, 1) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Conv3D( - ... 2, 3, activation='relu', input_shape=input_shape[2:])(x) - >>> print(y.shape) - (4, 7, 26, 26, 26, 2) - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number of - output filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the depth, - height and width of the 3D convolution window. Can be a single integer to - specify the same value for all spatial dimensions. - strides: An integer or tuple/list of 3 integers, specifying the strides of - the convolution along each spatial dimension. Can be a single integer to - specify the same value for all spatial dimensions. Specifying any stride - value != 1 is incompatible with specifying any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `batch_shape + (spatial_dim1, spatial_dim2, - spatial_dim3, channels)` while `channels_first` corresponds to inputs with - shape `batch_shape + (channels, spatial_dim1, spatial_dim2, - spatial_dim3)`. It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. If you never set it, then it - will be "channels_last". - dilation_rate: an integer or tuple/list of 3 integers, specifying the - dilation rate to use for dilated convolution. Can be a single integer to - specify the same value for all spatial dimensions. Currently, specifying - any `dilation_rate` value != 1 is incompatible with specifying any stride - value != 1. - groups: A positive integer specifying the number of groups in which the - input is split along the channel axis. Each group is convolved separately - with `filters / groups` filters. The output is the concatenation of all - the `groups` results along the channel axis. Input channels and `filters` - must both be divisible by `groups`. - activation: Activation function to use. If you don't specify anything, no - activation is applied (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix (see - `keras.initializers`). Defaults to 'glorot_uniform'. - bias_initializer: Initializer for the bias vector (see - `keras.initializers`). Defaults to 'zeros'. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector (see - `keras.regularizers`). - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation") (see `keras.regularizers`). - kernel_constraint: Constraint function applied to the kernel matrix (see - `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector (see - `keras.constraints`). - - Input shape: - 5+D tensor with shape: `batch_shape + (channels, conv_dim1, conv_dim2, - conv_dim3)` if data_format='channels_first' - or 5+D tensor with shape: `batch_shape + (conv_dim1, conv_dim2, conv_dim3, - channels)` if data_format='channels_last'. - - Output shape: - 5+D tensor with shape: `batch_shape + (filters, new_conv_dim1, - new_conv_dim2, new_conv_dim3)` if data_format='channels_first' - or 5+D tensor with shape: `batch_shape + (new_conv_dim1, new_conv_dim2, - new_conv_dim3, filters)` if data_format='channels_last'. `new_conv_dim1`, - `new_conv_dim2` and `new_conv_dim3` values might have changed due to - padding. - - Returns: - A tensor of rank 5+ representing - `activation(conv3d(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides > 1` and `dilation_rate > 1`. - """ - - @utils.allow_initializer_layout - def __init__(self, - filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1, 1), - groups=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - rank=3, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - groups=groups, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) + """3D convolution layer (e.g. spatial convolution over volumes). + + This layer creates a convolution kernel that is convolved + with the layer input to produce a tensor of + outputs. If `use_bias` is True, + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + When using this layer as the first layer in a model, + provide the keyword argument `input_shape` + (tuple of integers or `None`, does not include the sample axis), + e.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes + with a single channel, + in `data_format="channels_last"`. + + Examples: + + >>> # The inputs are 28x28x28 volumes with a single channel, and the + >>> # batch size is 4 + >>> input_shape =(4, 28, 28, 28, 1) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv3D( + ... 2, 3, activation='relu', input_shape=input_shape[1:])(x) + >>> print(y.shape) + (4, 26, 26, 26, 2) + + >>> # With extended batch shape [4, 7], e.g. a batch of 4 videos of + >>> # 3D frames, with 7 frames per video. + >>> input_shape = (4, 7, 28, 28, 28, 1) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Conv3D( + ... 2, 3, activation='relu', input_shape=input_shape[2:])(x) + >>> print(y.shape) + (4, 7, 26, 26, 26, 2) + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of output filters in the convolution). + kernel_size: An integer or tuple/list of 3 integers, specifying the depth, + height and width of the 3D convolution window. Can be a single integer + to specify the same value for all spatial dimensions. + strides: An integer or tuple/list of 3 integers, specifying the strides of + the convolution along each spatial dimension. Can be a single integer to + specify the same value for all spatial dimensions. Specifying any stride + value != 1 is incompatible with specifying any `dilation_rate` value != + 1. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `batch_shape + + (spatial_dim1, spatial_dim2, spatial_dim3, channels)` while + `channels_first` corresponds to inputs with shape `batch_shape + + (channels, spatial_dim1, spatial_dim2, spatial_dim3)`. When unspecified, + uses `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. Note that the + `channels_first` format is currently not supported by TensorFlow on CPU. + Defaults to 'channels_last'. + dilation_rate: an integer or tuple/list of 3 integers, specifying the + dilation rate to use for dilated convolution. Can be a single integer to + specify the same value for all spatial dimensions. Currently, specifying + any `dilation_rate` value != 1 is incompatible with specifying any + stride value != 1. + groups: A positive integer specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters / groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. Input + channels and `filters` must both be divisible by `groups`. + activation: Activation function to use. If you don't specify anything, no + activation is applied (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix (see + `keras.initializers`). Defaults to 'glorot_uniform'. + bias_initializer: Initializer for the bias vector (see + `keras.initializers`). Defaults to 'zeros'. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector (see + `keras.regularizers`). + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation") (see `keras.regularizers`). + kernel_constraint: Constraint function applied to the kernel matrix (see + `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector (see + `keras.constraints`). + + Input shape: + 5+D tensor with shape: `batch_shape + (channels, conv_dim1, conv_dim2, + conv_dim3)` if data_format='channels_first' + or 5+D tensor with shape: `batch_shape + (conv_dim1, conv_dim2, conv_dim3, + channels)` if data_format='channels_last'. + + Output shape: + 5+D tensor with shape: `batch_shape + (filters, new_conv_dim1, + new_conv_dim2, new_conv_dim3)` if data_format='channels_first' + or 5+D tensor with shape: `batch_shape + (new_conv_dim1, new_conv_dim2, + new_conv_dim3, filters)` if data_format='channels_last'. + `new_conv_dim1`, `new_conv_dim2` and `new_conv_dim3` values might have + changed due to padding. + + Returns: + A tensor of rank 5+ representing + `activation(conv3d(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides > 1` and `dilation_rate > 1`. + """ + + @utils.allow_initializer_layout + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + rank=3, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs + ) + # Alias diff --git a/keras/layers/convolutional/conv3d_transpose.py b/keras/layers/convolutional/conv3d_transpose.py index 8e5359617517..dcb9b54a6665 100644 --- a/keras/layers/convolutional/conv3d_transpose.py +++ b/keras/layers/convolutional/conv3d_transpose.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras 3D transposed convolution layer (sometimes called deconvolution).""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import activations from keras import constraints @@ -23,326 +25,367 @@ from keras.engine.input_spec import InputSpec from keras.layers.convolutional.conv3d import Conv3D from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Conv3DTranspose', - 'keras.layers.Convolution3DTranspose') +@keras_export( + "keras.layers.Conv3DTranspose", "keras.layers.Convolution3DTranspose" +) class Conv3DTranspose(Conv3D): - """Transposed convolution layer (sometimes called Deconvolution). - - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers or `None`, does not include the sample axis), - e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels - if `data_format="channels_last"`. - - Args: - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, height - and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - output_padding: An integer or tuple/list of 3 integers, - specifying the amount of padding along the depth, height, and - width. - Can be a single integer to specify the same value for all - spatial dimensions. - The amount of output padding along a given dimension must be - lower than the stride along that same dimension. - If set to `None` (default), the output shape is inferred. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch_size, channels, depth, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see `keras.initializers`). Defaults to 'glorot_uniform'. - bias_initializer: Initializer for the bias vector - (see `keras.initializers`). Defaults to 'zeros'. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector - (see `keras.regularizers`). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation") - (see `keras.regularizers`). - kernel_constraint: Constraint function applied to the kernel matrix - (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector - (see `keras.constraints`). - - Input shape: - 5D tensor with shape: - `(batch_size, channels, depth, rows, cols)` if data_format='channels_first' - or 5D tensor with shape: - `(batch_size, depth, rows, cols, channels)` if data_format='channels_last'. - - Output shape: - 5D tensor with shape: - `(batch_size, filters, new_depth, new_rows, new_cols)` if + """Transposed convolution layer (sometimes called Deconvolution). + + The need for transposed convolutions generally arises + from the desire to use a transformation going in the opposite direction + of a normal convolution, i.e., from something that has the shape of the + output of some convolution to something that has the shape of its input + while maintaining a connectivity pattern that is compatible with + said convolution. + + When using this layer as the first layer in a model, + provide the keyword argument `input_shape` + (tuple of integers or `None`, does not include the sample axis), + e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 + channels if `data_format="channels_last"`. + + Args: + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + kernel_size: An integer or tuple/list of 3 integers, specifying the + depth, height and width of the 3D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the convolution along the depth, height + and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + output_padding: An integer or tuple/list of 3 integers, + specifying the amount of padding along the depth, height, and + width. + Can be a single integer to specify the same value for all + spatial dimensions. + The amount of output padding along a given dimension must be + lower than the stride along that same dimension. + If set to `None` (default), the output shape is inferred. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch_size, channels, depth, height, width)`. + When unspecified, uses `image_data_format` value found in your Keras + config file at `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: an integer or tuple/list of 3 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix + (see `keras.initializers`). Defaults to 'glorot_uniform'. + bias_initializer: Initializer for the bias vector + (see `keras.initializers`). Defaults to 'zeros'. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector + (see `keras.regularizers`). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation") + (see `keras.regularizers`). + kernel_constraint: Constraint function applied to the kernel matrix + (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector + (see `keras.constraints`). + + Input shape: + 5D tensor with shape: + `(batch_size, channels, depth, rows, cols)` if data_format='channels_first' - or 5D tensor with shape: - `(batch_size, new_depth, new_rows, new_cols, filters)` if + or 5D tensor with shape: + `(batch_size, depth, rows, cols, channels)` if data_format='channels_last'. - `depth` and `rows` and `cols` values might have changed due to padding. - If `output_padding` is specified:: - ``` - new_depth = ((depth - 1) * strides[0] + kernel_size[0] - 2 * padding[0] + - output_padding[0]) - new_rows = ((rows - 1) * strides[1] + kernel_size[1] - 2 * padding[1] + - output_padding[1]) - new_cols = ((cols - 1) * strides[2] + kernel_size[2] - 2 * padding[2] + - output_padding[2]) - ``` - - Returns: - A tensor of rank 5 representing - `activation(conv3dtranspose(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides` > 1 and `dilation_rate` > 1. - - References: - - [A guide to convolution arithmetic for deep - learning](https://arxiv.org/abs/1603.07285v1) - - [Deconvolutional - Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) - """ - - @utils.allow_initializer_layout - def __init__(self, - filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - output_padding=None, - data_format=None, - dilation_rate=(1, 1, 1), - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) - - self.output_padding = output_padding - if self.output_padding is not None: - self.output_padding = conv_utils.normalize_tuple( - self.output_padding, 3, 'output_padding', allow_zero=True) - for stride, out_pad in zip(self.strides, self.output_padding): - if out_pad >= stride: - raise ValueError('Strides must be greater than output padding. ' - f'Received strides={self.strides}, ' - f'output_padding={self.output_padding}.') - - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - if len(input_shape) != 5: - raise ValueError('Inputs should have rank 5. ' - f'Received input_shape={input_shape}.') - channel_axis = self._get_channel_axis() - if input_shape.dims[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs ' - 'to `Conv3DTranspose` should be defined. ' - f'The input_shape received is {input_shape}, ' - f'where axis {channel_axis} (0-based) ' - 'is the channel dimension, which found to be `None`.') - input_dim = int(input_shape[channel_axis]) - kernel_shape = self.kernel_size + (self.filters, input_dim) - self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim}) - - self.kernel = self.add_weight( - 'kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_weight( - 'bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True - - def call(self, inputs): - inputs_shape = tf.shape(inputs) - batch_size = inputs_shape[0] - if self.data_format == 'channels_first': - d_axis, h_axis, w_axis = 2, 3, 4 - else: - d_axis, h_axis, w_axis = 1, 2, 3 - - depth = inputs_shape[d_axis] - height = inputs_shape[h_axis] - width = inputs_shape[w_axis] - - kernel_d, kernel_h, kernel_w = self.kernel_size - stride_d, stride_h, stride_w = self.strides - - if self.output_padding is None: - out_pad_d = out_pad_h = out_pad_w = None - else: - out_pad_d, out_pad_h, out_pad_w = self.output_padding - - # Infer the dynamic output shape: - out_depth = conv_utils.deconv_output_length(depth, - kernel_d, - padding=self.padding, - output_padding=out_pad_d, - stride=stride_d) - out_height = conv_utils.deconv_output_length(height, - kernel_h, - padding=self.padding, - output_padding=out_pad_h, - stride=stride_h) - out_width = conv_utils.deconv_output_length(width, - kernel_w, - padding=self.padding, - output_padding=out_pad_w, - stride=stride_w) - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, out_depth, out_height, - out_width) - strides = (1, 1, stride_d, stride_h, stride_w) - else: - output_shape = (batch_size, out_depth, out_height, out_width, - self.filters) - strides = (1, stride_d, stride_h, stride_w, 1) - - output_shape_tensor = tf.stack(output_shape) - outputs = tf.nn.conv3d_transpose( - inputs, - self.kernel, - output_shape_tensor, - strides, - data_format=conv_utils.convert_data_format(self.data_format, ndim=5), - padding=self.padding.upper()) - - if not tf.executing_eagerly(): - # Infer the static output shape: - out_shape = self.compute_output_shape(inputs.shape) - outputs.set_shape(out_shape) - - if self.use_bias: - outputs = tf.nn.bias_add( - outputs, - self.bias, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 - else: - c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 - - kernel_d, kernel_h, kernel_w = self.kernel_size - stride_d, stride_h, stride_w = self.strides - - if self.output_padding is None: - out_pad_d = out_pad_h = out_pad_w = None - else: - out_pad_d, out_pad_h, out_pad_w = self.output_padding - - output_shape[c_axis] = self.filters - output_shape[d_axis] = conv_utils.deconv_output_length( - output_shape[d_axis], - kernel_d, - padding=self.padding, - output_padding=out_pad_d, - stride=stride_d) - output_shape[h_axis] = conv_utils.deconv_output_length( - output_shape[h_axis], - kernel_h, - padding=self.padding, - output_padding=out_pad_h, - stride=stride_h) - output_shape[w_axis] = conv_utils.deconv_output_length( - output_shape[w_axis], - kernel_w, - padding=self.padding, - output_padding=out_pad_w, - stride=stride_w) - return tf.TensorShape(output_shape) - - def get_config(self): - config = super().get_config() - config.pop('dilation_rate') - config['output_padding'] = self.output_padding - return config + + Output shape: + 5D tensor with shape: + `(batch_size, filters, new_depth, new_rows, new_cols)` if + data_format='channels_first' + or 5D tensor with shape: + `(batch_size, new_depth, new_rows, new_cols, filters)` if + data_format='channels_last'. + `depth` and `rows` and `cols` values might have changed due to padding. + If `output_padding` is specified:: + ``` + new_depth = ((depth - 1) * strides[0] + kernel_size[0] - 2 * padding[0] + + output_padding[0]) + new_rows = ((rows - 1) * strides[1] + kernel_size[1] - 2 * padding[1] + + output_padding[1]) + new_cols = ((cols - 1) * strides[2] + kernel_size[2] - 2 * padding[2] + + output_padding[2]) + ``` + + Returns: + A tensor of rank 5 representing + `activation(conv3dtranspose(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides` > 1 and `dilation_rate` > 1. + + References: + - [A guide to convolution arithmetic for deep + learning](https://arxiv.org/abs/1603.07285v1) + - [Deconvolutional + Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + """ + + @utils.allow_initializer_layout + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + output_padding=None, + data_format=None, + dilation_rate=(1, 1, 1), + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs, + ) + + self.output_padding = output_padding + if self.output_padding is not None: + self.output_padding = conv_utils.normalize_tuple( + self.output_padding, 3, "output_padding", allow_zero=True + ) + for stride, out_pad in zip(self.strides, self.output_padding): + if out_pad >= stride: + raise ValueError( + "Strides must be greater than output padding. " + f"Received strides={self.strides}, " + f"output_padding={self.output_padding}." + ) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + if len(input_shape) != 5: + raise ValueError( + "Inputs should have rank 5. " + f"Received input_shape={input_shape}." + ) + channel_axis = self._get_channel_axis() + if input_shape.dims[channel_axis].value is None: + raise ValueError( + "The channel dimension of the inputs " + "to `Conv3DTranspose` should be defined. " + f"The input_shape received is {input_shape}, " + f"where axis {channel_axis} (0-based) " + "is the channel dimension, which found to be `None`." + ) + input_dim = int(input_shape[channel_axis]) + kernel_shape = self.kernel_size + (self.filters, input_dim) + self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim}) + + self.kernel = self.add_weight( + "kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + "bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs): + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + if self.data_format == "channels_first": + d_axis, h_axis, w_axis = 2, 3, 4 + else: + d_axis, h_axis, w_axis = 1, 2, 3 + + depth = inputs_shape[d_axis] + height = inputs_shape[h_axis] + width = inputs_shape[w_axis] + + kernel_d, kernel_h, kernel_w = self.kernel_size + stride_d, stride_h, stride_w = self.strides + + if self.output_padding is None: + out_pad_d = out_pad_h = out_pad_w = None + else: + out_pad_d, out_pad_h, out_pad_w = self.output_padding + + # Infer the dynamic output shape: + out_depth = conv_utils.deconv_output_length( + depth, + kernel_d, + padding=self.padding, + output_padding=out_pad_d, + stride=stride_d, + ) + out_height = conv_utils.deconv_output_length( + height, + kernel_h, + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h, + ) + out_width = conv_utils.deconv_output_length( + width, + kernel_w, + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w, + ) + if self.data_format == "channels_first": + output_shape = ( + batch_size, + self.filters, + out_depth, + out_height, + out_width, + ) + strides = (1, 1, stride_d, stride_h, stride_w) + else: + output_shape = ( + batch_size, + out_depth, + out_height, + out_width, + self.filters, + ) + strides = (1, stride_d, stride_h, stride_w, 1) + + output_shape_tensor = tf.stack(output_shape) + outputs = tf.nn.conv3d_transpose( + inputs, + self.kernel, + output_shape_tensor, + strides, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=5 + ), + padding=self.padding.upper(), + ) + + if not tf.executing_eagerly() and inputs.shape.rank: + # Infer the static output shape: + out_shape = self.compute_output_shape(inputs.shape) + outputs.set_shape(out_shape) + + if self.use_bias: + outputs = tf.nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + output_shape = list(input_shape) + if self.data_format == "channels_first": + c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 + else: + c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 + + kernel_d, kernel_h, kernel_w = self.kernel_size + stride_d, stride_h, stride_w = self.strides + + if self.output_padding is None: + out_pad_d = out_pad_h = out_pad_w = None + else: + out_pad_d, out_pad_h, out_pad_w = self.output_padding + + output_shape[c_axis] = self.filters + output_shape[d_axis] = conv_utils.deconv_output_length( + output_shape[d_axis], + kernel_d, + padding=self.padding, + output_padding=out_pad_d, + stride=stride_d, + ) + output_shape[h_axis] = conv_utils.deconv_output_length( + output_shape[h_axis], + kernel_h, + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h, + ) + output_shape[w_axis] = conv_utils.deconv_output_length( + output_shape[w_axis], + kernel_w, + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w, + ) + return tf.TensorShape(output_shape) + + def get_config(self): + config = super().get_config() + config.pop("dilation_rate") + config["output_padding"] = self.output_padding + return config + # Alias diff --git a/keras/layers/convolutional/conv_test.py b/keras/layers/convolutional/conv_test.py index 86aaf8eff75a..859a45cfbeb4 100644 --- a/keras/layers/convolutional/conv_test.py +++ b/keras/layers/convolutional/conv_test.py @@ -15,544 +15,666 @@ """Tests for convolutional layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) @test_combinations.run_all_keras_modes class Conv1DTest(test_combinations.TestCase): - - def _run_test(self, kwargs, expected_output_shape): - num_samples = 2 - stack_size = 3 - length = 7 - - with self.cached_session(): - test_utils.layer_test( - keras.layers.Conv1D, - kwargs=kwargs, - input_shape=(num_samples, length, stack_size), - expected_output_shape=expected_output_shape) - - def _run_test_extra_batch_dim(self, kwargs, expected_output_shape): - batch_shape = (2, 11) - stack_size = 3 - length = 7 - - with self.cached_session(): - if expected_output_shape is not None: - expected_output_shape = (None,) + expected_output_shape - - test_utils.layer_test( - keras.layers.Conv1D, - kwargs=kwargs, - input_shape=batch_shape + (length, stack_size), - expected_output_shape=expected_output_shape) - - @parameterized.named_parameters( - ('padding_valid', { - 'padding': 'valid' - }, (None, 5, 2)), - ('padding_same', { - 'padding': 'same' - }, (None, 7, 2)), - ('padding_same_dilation_2', { - 'padding': 'same', - 'dilation_rate': 2 - }, (None, 7, 2)), - ('padding_same_dilation_3', { - 'padding': 'same', - 'dilation_rate': 3 - }, (None, 7, 2)), - ('padding_causal', { - 'padding': 'causal' - }, (None, 7, 2)), - ('strides', { - 'strides': 2 - }, (None, 3, 2)), - ('dilation_rate', { - 'dilation_rate': 2 - }, (None, 3, 2)), - ('group', { - 'groups': 3, - 'filters': 6 - }, (None, 5, 6)), - ) - def test_conv1d(self, kwargs, expected_output_shape): - kwargs['filters'] = kwargs.get('filters', 2) - kwargs['kernel_size'] = 3 - self._run_test(kwargs, expected_output_shape) - self._run_test_extra_batch_dim(kwargs, expected_output_shape) - - def test_conv1d_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv1D(**kwargs) - layer.build((None, 5, 2)) - self.assertEqual(len(layer.losses), 2) - layer(keras.backend.variable(np.ones((1, 5, 2)))) - self.assertEqual(len(layer.losses), 3) - - def test_conv1d_constraints(self): - k_constraint = lambda x: x - b_constraint = lambda x: x - - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv1D(**kwargs) - layer.build((None, 5, 2)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) - - def test_conv1d_recreate_conv(self): - with self.cached_session(): - layer = keras.layers.Conv1D(filters=1, - kernel_size=3, - strides=1, - dilation_rate=2, - padding='causal') - inpt1 = np.random.normal(size=[1, 2, 1]) - inpt2 = np.random.normal(size=[1, 1, 1]) - outp1_shape = layer(inpt1).shape - _ = layer(inpt2).shape - self.assertEqual(outp1_shape, layer(inpt1).shape) - - def test_conv1d_recreate_conv_unknown_dims(self): - with self.cached_session(): - layer = keras.layers.Conv1D(filters=1, - kernel_size=3, - strides=1, - dilation_rate=2, - padding='causal') - - inpt1 = np.random.normal(size=[1, 9, 1]).astype(np.float32) - inpt2 = np.random.normal(size=[1, 2, 1]).astype(np.float32) - outp1_shape = layer(inpt1).shape - - @tf.function(input_signature=[ - tf.TensorSpec([1, None, 1])]) - def fn(inpt): - return layer(inpt) - - fn(inpt2) - self.assertEqual(outp1_shape, layer(inpt1).shape) - - def test_conv1d_invalid_output_shapes(self): - kwargs = {'filters': 2, 'kernel_size': 20} - with self.assertRaisesRegex( - ValueError, r"""One of the dimensions in the output is <= 0"""): - layer = keras.layers.Conv1D(**kwargs) - layer.build((None, 5, 2)) + def _run_test(self, kwargs, expected_output_shape): + num_samples = 2 + stack_size = 3 + length = 7 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.Conv1D, + kwargs=kwargs, + input_shape=(num_samples, length, stack_size), + expected_output_shape=expected_output_shape, + ) + + def _run_test_extra_batch_dim(self, kwargs, expected_output_shape): + batch_shape = (2, 11) + stack_size = 3 + length = 7 + + with self.cached_session(): + if expected_output_shape is not None: + expected_output_shape = (None,) + expected_output_shape + + test_utils.layer_test( + keras.layers.Conv1D, + kwargs=kwargs, + input_shape=batch_shape + (length, stack_size), + expected_output_shape=expected_output_shape, + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}, (None, 5, 2)), + ("padding_same", {"padding": "same"}, (None, 7, 2)), + ( + "padding_same_dilation_2", + {"padding": "same", "dilation_rate": 2}, + (None, 7, 2), + ), + ( + "padding_same_dilation_3", + {"padding": "same", "dilation_rate": 3}, + (None, 7, 2), + ), + ("padding_causal", {"padding": "causal"}, (None, 7, 2)), + ("strides", {"strides": 2}, (None, 3, 2)), + ("dilation_rate", {"dilation_rate": 2}, (None, 3, 2)), + ("group", {"groups": 3, "filters": 6}, (None, 5, 6)), + ) + def test_conv1d(self, kwargs, expected_output_shape): + kwargs["filters"] = kwargs.get("filters", 2) + kwargs["kernel_size"] = 3 + self._run_test(kwargs, expected_output_shape) + self._run_test_extra_batch_dim(kwargs, expected_output_shape) + + def test_conv1d_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv1D(**kwargs) + layer.build((None, 5, 2)) + self.assertEqual(len(layer.losses), 2) + layer(keras.backend.variable(np.ones((1, 5, 2)))) + self.assertEqual(len(layer.losses), 3) + + def test_conv1d_constraints(self): + k_constraint = lambda x: x + b_constraint = lambda x: x + + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv1D(**kwargs) + layer.build((None, 5, 2)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_conv1d_recreate_conv(self): + with self.cached_session(): + layer = keras.layers.Conv1D( + filters=1, + kernel_size=3, + strides=1, + dilation_rate=2, + padding="causal", + ) + inpt1 = np.random.normal(size=[1, 2, 1]) + inpt2 = np.random.normal(size=[1, 1, 1]) + outp1_shape = layer(inpt1).shape + _ = layer(inpt2).shape + self.assertEqual(outp1_shape, layer(inpt1).shape) + + def test_conv1d_recreate_conv_unknown_dims(self): + with self.cached_session(): + layer = keras.layers.Conv1D( + filters=1, + kernel_size=3, + strides=1, + dilation_rate=2, + padding="causal", + ) + + inpt1 = np.random.normal(size=[1, 9, 1]).astype(np.float32) + inpt2 = np.random.normal(size=[1, 2, 1]).astype(np.float32) + outp1_shape = layer(inpt1).shape + + @tf.function(input_signature=[tf.TensorSpec([1, None, 1])]) + def fn(inpt): + return layer(inpt) + + fn(inpt2) + self.assertEqual(outp1_shape, layer(inpt1).shape) + + def test_conv1d_invalid_output_shapes(self): + kwargs = {"filters": 2, "kernel_size": 20} + with self.assertRaisesRegex( + ValueError, r"""One of the dimensions in the output is <= 0""" + ): + layer = keras.layers.Conv1D(**kwargs) + layer.build((None, 5, 2)) + + def test_conv1d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": 2, "dilation_rate": 2} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.Conv1D(filters=1, kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes class Conv2DTest(test_combinations.TestCase): - - def _run_test(self, kwargs, expected_output_shape, spatial_shape=(7, 6)): - num_samples = 2 - stack_size = 3 - num_row, num_col = spatial_shape - input_data = None - # Generate valid input data. - if None in spatial_shape: - input_data_shape = (num_samples, num_row or 7, num_col or 6, stack_size) - input_data = 10 * np.random.random(input_data_shape).astype(np.float32) - - with self.cached_session(): - test_utils.layer_test( - keras.layers.Conv2D, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size), - input_data=input_data, - expected_output_shape=expected_output_shape) - - def _run_test_extra_batch_dim(self, - kwargs, - expected_output_shape, - spatial_shape=(7, 6)): - batch_shape = (2, 11) - stack_size = 3 - num_row, num_col = spatial_shape - input_data = None - # Generate valid input data. - if None in spatial_shape: - input_data_shape = batch_shape + (num_row or 7, num_col or 6, stack_size) - input_data = 10 * np.random.random(input_data_shape).astype(np.float32) - - with self.cached_session(): - if expected_output_shape is not None: - expected_output_shape = (None,) + expected_output_shape - test_utils.layer_test( - keras.layers.Conv2D, - kwargs=kwargs, - input_shape=batch_shape + (num_row, num_col, stack_size), - input_data=input_data, - expected_output_shape=expected_output_shape) - - @parameterized.named_parameters( - ('padding_valid', { - 'padding': 'valid' - }, (None, 5, 4, 2)), - ('padding_same', { - 'padding': 'same' - }, (None, 7, 6, 2)), - ('padding_same_dilation_2', { - 'padding': 'same', - 'dilation_rate': 2 - }, (None, 7, 6, 2)), - ('strides', { - 'strides': (2, 2) - }, (None, 3, 2, 2)), - ('dilation_rate', { - 'dilation_rate': (2, 2) - }, (None, 3, 2, 2)), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', { - 'data_format': 'channels_first' - }, None, True), - ('group', { - 'groups': 3, - 'filters': 6 - }, (None, 5, 4, 6), False), - ('dilation_2_unknown_width', { - 'dilation_rate': (2, 2) - }, (None, None, 2, 2), False, (None, 6)), - ('dilation_2_unknown_height', { - 'dilation_rate': (2, 2) - }, (None, 3, None, 2), False, (7, None)), - ) - def test_conv2d(self, - kwargs, - expected_output_shape=None, - requires_gpu=False, - spatial_shape=(7, 6)): - kwargs['filters'] = kwargs.get('filters', 2) - kwargs['kernel_size'] = (3, 3) - if not requires_gpu or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs, expected_output_shape, spatial_shape) - self._run_test_extra_batch_dim(kwargs, expected_output_shape, - spatial_shape) - - def test_conv2d_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv2D(**kwargs) - layer.build((None, 5, 5, 2)) - self.assertEqual(len(layer.losses), 2) - layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) - self.assertEqual(len(layer.losses), 3) - - def test_conv2d_constraints(self): - k_constraint = lambda x: x - b_constraint = lambda x: x - - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv2D(**kwargs) - layer.build((None, 5, 5, 2)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) - - def test_conv2d_zero_kernel_size(self): - kwargs = {'filters': 2, 'kernel_size': 0} - with self.assertRaises(ValueError): - keras.layers.Conv2D(**kwargs) - - def test_conv2d_invalid_output_shapes(self): - kwargs = {'filters': 2, 'kernel_size': 20} - with self.assertRaisesRegex( - ValueError, r"""One of the dimensions in the output is <= 0"""): - layer = keras.layers.Conv2D(**kwargs) - layer.build((None, 5, 5, 2)) + def _run_test(self, kwargs, expected_output_shape, spatial_shape=(7, 6)): + num_samples = 2 + stack_size = 3 + num_row, num_col = spatial_shape + input_data = None + # Generate valid input data. + if None in spatial_shape: + input_data_shape = ( + num_samples, + num_row or 7, + num_col or 6, + stack_size, + ) + input_data = 10 * np.random.random(input_data_shape).astype( + np.float32 + ) + + with self.cached_session(): + test_utils.layer_test( + keras.layers.Conv2D, + kwargs=kwargs, + input_shape=(num_samples, num_row, num_col, stack_size), + input_data=input_data, + expected_output_shape=expected_output_shape, + ) + + def _run_test_extra_batch_dim( + self, kwargs, expected_output_shape, spatial_shape=(7, 6) + ): + batch_shape = (2, 11) + stack_size = 3 + num_row, num_col = spatial_shape + input_data = None + # Generate valid input data. + if None in spatial_shape: + input_data_shape = batch_shape + ( + num_row or 7, + num_col or 6, + stack_size, + ) + input_data = 10 * np.random.random(input_data_shape).astype( + np.float32 + ) + + with self.cached_session(): + if expected_output_shape is not None: + expected_output_shape = (None,) + expected_output_shape + test_utils.layer_test( + keras.layers.Conv2D, + kwargs=kwargs, + input_shape=batch_shape + (num_row, num_col, stack_size), + input_data=input_data, + expected_output_shape=expected_output_shape, + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}, (None, 5, 4, 2)), + ("padding_same", {"padding": "same"}, (None, 7, 6, 2)), + ( + "padding_same_dilation_2", + {"padding": "same", "dilation_rate": 2}, + (None, 7, 6, 2), + ), + ("strides", {"strides": (2, 2)}, (None, 3, 2, 2)), + ("dilation_rate", {"dilation_rate": (2, 2)}, (None, 3, 2, 2)), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}, None, True), + ("group", {"groups": 3, "filters": 6}, (None, 5, 4, 6), False), + ( + "dilation_2_unknown_width", + {"dilation_rate": (2, 2)}, + (None, None, 2, 2), + False, + (None, 6), + ), + ( + "dilation_2_unknown_height", + {"dilation_rate": (2, 2)}, + (None, 3, None, 2), + False, + (7, None), + ), + ) + def test_conv2d( + self, + kwargs, + expected_output_shape=None, + requires_gpu=False, + spatial_shape=(7, 6), + ): + kwargs["filters"] = kwargs.get("filters", 2) + kwargs["kernel_size"] = (3, 3) + if not requires_gpu or tf.test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, expected_output_shape, spatial_shape) + self._run_test_extra_batch_dim( + kwargs, expected_output_shape, spatial_shape + ) + + def test_conv2d_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv2D(**kwargs) + layer.build((None, 5, 5, 2)) + self.assertEqual(len(layer.losses), 2) + layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) + self.assertEqual(len(layer.losses), 3) + + def test_conv2d_constraints(self): + k_constraint = lambda x: x + b_constraint = lambda x: x + + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv2D(**kwargs) + layer.build((None, 5, 5, 2)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_conv2d_zero_kernel_size(self): + kwargs = {"filters": 2, "kernel_size": 0} + with self.assertRaises(ValueError): + keras.layers.Conv2D(**kwargs) + + def test_conv2d_invalid_output_shapes(self): + kwargs = {"filters": 2, "kernel_size": 20} + with self.assertRaisesRegex( + ValueError, r"""One of the dimensions in the output is <= 0""" + ): + layer = keras.layers.Conv2D(**kwargs) + layer.build((None, 5, 5, 2)) + + def test_conv2d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": [1, 2], "dilation_rate": [2, 1]} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.Conv2D(filters=1, kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes class Conv3DTest(test_combinations.TestCase): - - def _run_test(self, kwargs, expected_output_shape, validate_training=True): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - depth = 5 - - with self.cached_session(): - test_utils.layer_test( - keras.layers.Conv3D, - kwargs=kwargs, - input_shape=(num_samples, depth, num_row, num_col, stack_size), - expected_output_shape=expected_output_shape, - validate_training=validate_training) - - def _run_test_extra_batch_dim(self, - kwargs, - expected_output_shape, - validate_training=True): - batch_shape = (2, 11) - stack_size = 3 - num_row = 7 - num_col = 6 - depth = 5 - - with self.cached_session(): - if expected_output_shape is not None: - expected_output_shape = (None,) + expected_output_shape - - test_utils.layer_test( - keras.layers.Conv3D, - kwargs=kwargs, - input_shape=batch_shape + (depth, num_row, num_col, stack_size), - expected_output_shape=expected_output_shape, - validate_training=validate_training) - - @parameterized.named_parameters( - ('padding_valid', { - 'padding': 'valid' - }, (None, 3, 5, 4, 2)), - ('padding_same', { - 'padding': 'same' - }, (None, 5, 7, 6, 2)), - ('strides', { - 'strides': (2, 2, 2) - }, (None, 2, 3, 2, 2)), - ('dilation_rate', { - 'dilation_rate': (2, 2, 2) - }, (None, 1, 3, 2, 2)), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', { - 'data_format': 'channels_first' - }, None, True), - ('group', { - 'groups': 3, - 'filters': 6 - }, (None, 3, 5, 4, 6)), - ) - def test_conv3d(self, kwargs, expected_output_shape=None, requires_gpu=False): - kwargs['filters'] = kwargs.get('filters', 2) - kwargs['kernel_size'] = (3, 3, 3) - # train_on_batch currently fails with XLA enabled on GPUs - test_training = 'groups' not in kwargs or not tf_test_utils.is_xla_enabled() - if not requires_gpu or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs, expected_output_shape, test_training) - self._run_test_extra_batch_dim(kwargs, expected_output_shape, - test_training) - - def test_conv3d_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv3D(**kwargs) - layer.build((None, 5, 5, 5, 2)) - self.assertEqual(len(layer.losses), 2) - self.assertEqual(len(layer.losses), 2) - layer(keras.backend.variable(np.ones((1, 5, 5, 5, 2)))) - self.assertEqual(len(layer.losses), 3) - - def test_conv3d_constraints(self): - k_constraint = lambda x: x - b_constraint = lambda x: x - - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv3D(**kwargs) - layer.build((None, 5, 5, 5, 2)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) - - def test_conv3d_dynamic_shape(self): - input_data = np.random.random((1, 3, 3, 3, 3)).astype(np.float32) - with self.cached_session(): - # Won't raise error here. - test_utils.layer_test( - keras.layers.Conv3D, - kwargs={ - 'data_format': 'channels_last', - 'filters': 3, - 'kernel_size': 3 - }, - input_shape=(None, None, None, None, 3), - input_data=input_data) - if tf.test.is_gpu_available(cuda_only=True): - test_utils.layer_test( - keras.layers.Conv3D, - kwargs={ - 'data_format': 'channels_first', - 'filters': 3, - 'kernel_size': 3 - }, - input_shape=(None, 3, None, None, None), - input_data=input_data) - - def test_conv3d_invalid_output_shapes(self): - kwargs = {'filters': 2, 'kernel_size': 20} - with self.assertRaisesRegex( - ValueError, r"""One of the dimensions in the output is <= 0"""): - layer = keras.layers.Conv3D(**kwargs) - layer.build((None, 5, 5, 5, 2)) + def _run_test(self, kwargs, expected_output_shape, validate_training=True): + num_samples = 2 + stack_size = 3 + num_row = 7 + num_col = 6 + depth = 5 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.Conv3D, + kwargs=kwargs, + input_shape=(num_samples, depth, num_row, num_col, stack_size), + expected_output_shape=expected_output_shape, + validate_training=validate_training, + ) + + def _run_test_extra_batch_dim( + self, kwargs, expected_output_shape, validate_training=True + ): + batch_shape = (2, 11) + stack_size = 3 + num_row = 7 + num_col = 6 + depth = 5 + + with self.cached_session(): + if expected_output_shape is not None: + expected_output_shape = (None,) + expected_output_shape + + test_utils.layer_test( + keras.layers.Conv3D, + kwargs=kwargs, + input_shape=batch_shape + (depth, num_row, num_col, stack_size), + expected_output_shape=expected_output_shape, + validate_training=validate_training, + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}, (None, 3, 5, 4, 2)), + ("padding_same", {"padding": "same"}, (None, 5, 7, 6, 2)), + ("strides", {"strides": (2, 2, 2)}, (None, 2, 3, 2, 2)), + ("dilation_rate", {"dilation_rate": (2, 2, 2)}, (None, 1, 3, 2, 2)), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}, None, True), + ("group", {"groups": 3, "filters": 6}, (None, 3, 5, 4, 6)), + ) + def test_conv3d( + self, kwargs, expected_output_shape=None, requires_gpu=False + ): + kwargs["filters"] = kwargs.get("filters", 2) + kwargs["kernel_size"] = (3, 3, 3) + # train_on_batch currently fails with XLA enabled on GPUs + test_training = ( + "groups" not in kwargs or not tf_test_utils.is_xla_enabled() + ) + if not requires_gpu or tf.test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, expected_output_shape, test_training) + self._run_test_extra_batch_dim( + kwargs, expected_output_shape, test_training + ) + + def test_conv3d_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv3D(**kwargs) + layer.build((None, 5, 5, 5, 2)) + self.assertEqual(len(layer.losses), 2) + self.assertEqual(len(layer.losses), 2) + layer(keras.backend.variable(np.ones((1, 5, 5, 5, 2)))) + self.assertEqual(len(layer.losses), 3) + + def test_conv3d_constraints(self): + k_constraint = lambda x: x + b_constraint = lambda x: x + + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv3D(**kwargs) + layer.build((None, 5, 5, 5, 2)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_conv3d_dynamic_shape(self): + input_data = np.random.random((1, 3, 3, 3, 3)).astype(np.float32) + with self.cached_session(): + # Won't raise error here. + test_utils.layer_test( + keras.layers.Conv3D, + kwargs={ + "data_format": "channels_last", + "filters": 3, + "kernel_size": 3, + }, + input_shape=(None, None, None, None, 3), + input_data=input_data, + ) + if tf.test.is_gpu_available(cuda_only=True): + test_utils.layer_test( + keras.layers.Conv3D, + kwargs={ + "data_format": "channels_first", + "filters": 3, + "kernel_size": 3, + }, + input_shape=(None, 3, None, None, None), + input_data=input_data, + ) + + def test_conv3d_invalid_output_shapes(self): + kwargs = {"filters": 2, "kernel_size": 20} + with self.assertRaisesRegex( + ValueError, r"""One of the dimensions in the output is <= 0""" + ): + layer = keras.layers.Conv3D(**kwargs) + layer.build((None, 5, 5, 5, 2)) + + def test_conv3d_zero_dim_output(self): + conv = keras.layers.Convolution3DTranspose(2, [3, 3, 3], padding="same") + x = tf.random.uniform([1, 32, 32, 0, 3], dtype=tf.float32) + # The layer doesn't crash with 0 dim input + _ = conv(x) + + def test_conv3d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": [1, 1, 2], "dilation_rate": [1, 2, 1]} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.Conv3D(filters=1, kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes(always_skip_v1=True) class GroupedConvTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('Conv1D', keras.layers.Conv1D), - ('Conv2D', keras.layers.Conv2D), - ('Conv3D', keras.layers.Conv3D), - ) - def test_group_conv_incorrect_use(self, layer): - with self.assertRaisesRegex(ValueError, 'The number of filters'): - layer(16, 3, groups=3) - with self.assertRaisesRegex(ValueError, 'The number of input channels'): - layer(16, 3, groups=4).build((32, 12, 12, 3)) - - @parameterized.named_parameters( - ('Conv1D', keras.layers.Conv1D, (32, 12, 32)), - ('Conv2D', keras.layers.Conv2D, (32, 12, 12, 32)), - ('Conv3D', keras.layers.Conv3D, (32, 12, 12, 12, 32)), - ) - def test_group_conv(self, layer_cls, input_shape): - if tf.test.is_gpu_available(cuda_only=True): - with test_utils.use_gpu(): - inputs = tf.random.uniform(shape=input_shape) - - layer = layer_cls(16, 3, groups=4, use_bias=False) - layer.build(input_shape) - - input_slices = tf.split(inputs, 4, axis=-1) - weight_slices = tf.split(layer.kernel, 4, axis=-1) - expected_outputs = tf.concat([ - tf.nn.convolution(inputs, weights) - for inputs, weights in zip(input_slices, weight_slices) - ], - axis=-1) - self.assertAllClose( - layer(inputs), expected_outputs, rtol=3e-5, atol=3e-5) - - def test_group_conv_depthwise(self): - if tf.test.is_gpu_available(cuda_only=True): - with test_utils.use_gpu(): - inputs = tf.random.uniform(shape=(3, 27, 27, 32)) - - layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False) - layer.build((3, 27, 27, 32)) - - weights_dw = tf.reshape(layer.kernel, [3, 3, 32, 1]) - expected_outputs = tf.compat.v1.nn.depthwise_conv2d( - inputs, weights_dw, strides=[1, 1, 1, 1], padding='VALID') - - self.assertAllClose(layer(inputs), expected_outputs, rtol=1e-5) + @parameterized.named_parameters( + ("Conv1D", keras.layers.Conv1D), + ("Conv2D", keras.layers.Conv2D), + ("Conv3D", keras.layers.Conv3D), + ) + def test_group_conv_incorrect_use(self, layer): + with self.assertRaisesRegex(ValueError, "The number of filters"): + layer(16, 3, groups=3) + with self.assertRaisesRegex(ValueError, "The number of input channels"): + layer(16, 3, groups=4).build((32, 12, 12, 3)) + + @parameterized.named_parameters( + ("Conv1D", keras.layers.Conv1D, (32, 12, 32)), + ("Conv2D", keras.layers.Conv2D, (32, 12, 12, 32)), + ("Conv3D", keras.layers.Conv3D, (32, 12, 12, 12, 32)), + ) + def test_group_conv(self, layer_cls, input_shape): + if tf.test.is_gpu_available(cuda_only=True): + with test_utils.use_gpu(): + inputs = tf.random.uniform(shape=input_shape) + + layer = layer_cls(16, 3, groups=4, use_bias=False) + layer.build(input_shape) + + input_slices = tf.split(inputs, 4, axis=-1) + weight_slices = tf.split(layer.kernel, 4, axis=-1) + expected_outputs = tf.concat( + [ + tf.nn.convolution(inputs, weights) + for inputs, weights in zip(input_slices, weight_slices) + ], + axis=-1, + ) + self.assertAllClose( + layer(inputs), expected_outputs, rtol=3e-5, atol=3e-5 + ) + + def test_group_conv_depthwise(self): + if tf.test.is_gpu_available(cuda_only=True): + with test_utils.use_gpu(): + inputs = tf.random.uniform(shape=(3, 27, 27, 32)) + + layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False) + layer.build((3, 27, 27, 32)) + + weights_dw = tf.reshape(layer.kernel, [3, 3, 32, 1]) + expected_outputs = tf.compat.v1.nn.depthwise_conv2d( + inputs, weights_dw, strides=[1, 1, 1, 1], padding="VALID" + ) + + self.assertAllClose(layer(inputs), expected_outputs, rtol=1e-5) @test_combinations.run_all_keras_modes class ConvSequentialTest(test_combinations.TestCase): - - def _run_test(self, conv_layer_cls, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2): - kwargs['filters'] = 1 - kwargs['kernel_size'] = 3 - kwargs['dilation_rate'] = 2 - with self.cached_session(): - layer = conv_layer_cls(**kwargs) - output1 = layer(np.zeros(input_shape1)) - self.assertEqual(output1.shape, expected_output_shape1) - output2 = layer(np.zeros(input_shape2)) - self.assertEqual(output2.shape, expected_output_shape2) - - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}, - (1, 8, 2), (1, 5, 2), (1, 4, 1), (1, 1, 1)), - ('padding_same', {'padding': 'same'}, - (1, 8, 2), (1, 5, 2), (1, 8, 1), (1, 5, 1)), - ('padding_causal', {'padding': 'causal'}, - (1, 8, 2), (1, 5, 2), (1, 8, 1), (1, 5, 1)), - ) - def test_conv1d(self, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2): - self._run_test(keras.layers.Conv1D, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2) - - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}, - (1, 7, 6, 2), (1, 6, 5, 2), (1, 3, 2, 1), (1, 2, 1, 1)), - ('padding_same', {'padding': 'same'}, - (1, 7, 6, 2), (1, 6, 5, 2), (1, 7, 6, 1), (1, 6, 5, 1)), - ) - def test_conv2d(self, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2): - self._run_test(keras.layers.Conv2D, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2) - - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}, - (1, 5, 7, 6, 2), (1, 8, 6, 5, 2), (1, 1, 3, 2, 1), (1, 4, 2, 1, 1)), - ('padding_same', {'padding': 'same'}, - (1, 5, 7, 6, 2), (1, 8, 6, 5, 2), (1, 5, 7, 6, 1), (1, 8, 6, 5, 1)), - ) - def test_conv3d(self, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2): - self._run_test(keras.layers.Conv3D, kwargs, input_shape1, input_shape2, - expected_output_shape1, expected_output_shape2) - - def test_dynamic_shape(self): - with self.cached_session(): - layer = keras.layers.Conv3D(2, 3) - input_shape = (5, None, None, 2) - inputs = keras.Input(shape=input_shape) - x = layer(inputs) - # Won't raise error here with None values in input shape (b/144282043). - layer(x) - -if __name__ == '__main__': - tf.test.main() + def _run_test( + self, + conv_layer_cls, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ): + kwargs["filters"] = 1 + kwargs["kernel_size"] = 3 + kwargs["dilation_rate"] = 2 + with self.cached_session(): + layer = conv_layer_cls(**kwargs) + output1 = layer(np.zeros(input_shape1)) + self.assertEqual(output1.shape, expected_output_shape1) + output2 = layer(np.zeros(input_shape2)) + self.assertEqual(output2.shape, expected_output_shape2) + + @parameterized.named_parameters( + ( + "padding_valid", + {"padding": "valid"}, + (1, 8, 2), + (1, 5, 2), + (1, 4, 1), + (1, 1, 1), + ), + ( + "padding_same", + {"padding": "same"}, + (1, 8, 2), + (1, 5, 2), + (1, 8, 1), + (1, 5, 1), + ), + ( + "padding_causal", + {"padding": "causal"}, + (1, 8, 2), + (1, 5, 2), + (1, 8, 1), + (1, 5, 1), + ), + ) + def test_conv1d( + self, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ): + self._run_test( + keras.layers.Conv1D, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ) + + @parameterized.named_parameters( + ( + "padding_valid", + {"padding": "valid"}, + (1, 7, 6, 2), + (1, 6, 5, 2), + (1, 3, 2, 1), + (1, 2, 1, 1), + ), + ( + "padding_same", + {"padding": "same"}, + (1, 7, 6, 2), + (1, 6, 5, 2), + (1, 7, 6, 1), + (1, 6, 5, 1), + ), + ) + def test_conv2d( + self, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ): + self._run_test( + keras.layers.Conv2D, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ) + + @parameterized.named_parameters( + ( + "padding_valid", + {"padding": "valid"}, + (1, 5, 7, 6, 2), + (1, 8, 6, 5, 2), + (1, 1, 3, 2, 1), + (1, 4, 2, 1, 1), + ), + ( + "padding_same", + {"padding": "same"}, + (1, 5, 7, 6, 2), + (1, 8, 6, 5, 2), + (1, 5, 7, 6, 1), + (1, 8, 6, 5, 1), + ), + ) + def test_conv3d( + self, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ): + self._run_test( + keras.layers.Conv3D, + kwargs, + input_shape1, + input_shape2, + expected_output_shape1, + expected_output_shape2, + ) + + def test_dynamic_shape(self): + with self.cached_session(): + layer = keras.layers.Conv3D(2, 3) + input_shape = (5, None, None, 2) + inputs = keras.Input(shape=input_shape) + x = layer(inputs) + # Won't raise error here with None values in input shape + # (b/144282043). + layer(x) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/convolutional/conv_transpose_test.py b/keras/layers/convolutional/conv_transpose_test.py index 48823996fb45..6747773371ed 100644 --- a/keras/layers/convolutional/conv_transpose_test.py +++ b/keras/layers/convolutional/conv_transpose_test.py @@ -14,245 +14,278 @@ # ============================================================================== """Tests for convolutional transpose layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class Conv1DTransposeTest(test_combinations.TestCase): + def _run_test(self, kwargs, expected_output_shape): + num_samples = 2 + stack_size = 3 + num_col = 6 - def _run_test(self, kwargs, expected_output_shape): - num_samples = 2 - stack_size = 3 - num_col = 6 + with test_utils.use_gpu(): + test_utils.layer_test( + keras.layers.Conv1DTranspose, + kwargs=kwargs, + input_shape=(num_samples, num_col, stack_size), + expected_output_shape=expected_output_shape, + ) - with test_utils.use_gpu(): - test_utils.layer_test( - keras.layers.Conv1DTranspose, - kwargs=kwargs, - input_shape=(num_samples, num_col, stack_size), - expected_output_shape=expected_output_shape) + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}, (None, 8, 2)), + ("padding_same", {"padding": "same"}, (None, 6, 2)), + ("strides", {"strides": 2}, (None, 13, 2)), + # Only runs on GPU with CUDA, dilation_rate>1 is not supported on CPU. + ("dilation_rate", {"dilation_rate": 2}, (None, 10, 2)), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}), + ) + def test_conv1d_transpose(self, kwargs, expected_output_shape=None): + kwargs["filters"] = 2 + kwargs["kernel_size"] = 3 + if ( + "data_format" not in kwargs and "dilation_rate" not in kwargs + ) or tf.test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, expected_output_shape) - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}, (None, 8, 2)), - ('padding_same', {'padding': 'same'}, (None, 6, 2)), - ('strides', {'strides': 2}, (None, 13, 2)), - # Only runs on GPU with CUDA, dilation_rate>1 is not supported on CPU. - ('dilation_rate', {'dilation_rate': 2}, (None, 10, 2)), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', {'data_format': 'channels_first'}), - ) - def test_conv1d_transpose(self, kwargs, expected_output_shape=None): - kwargs['filters'] = 2 - kwargs['kernel_size'] = 3 - if (('data_format' not in kwargs and 'dilation_rate' not in kwargs) or - tf.test.is_gpu_available(cuda_only=True)): - self._run_test(kwargs, expected_output_shape) + def test_conv1d_transpose_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": 2, "dilation_rate": 2} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.Conv1DTranspose(filters=1, kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes class Conv2DTransposeTest(test_combinations.TestCase): + def _run_test(self, kwargs): + num_samples = 2 + stack_size = 3 + num_row = 7 + num_col = 6 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.Conv2DTranspose, + kwargs=kwargs, + input_shape=(num_samples, num_row, num_col, stack_size), + ) - def _run_test(self, kwargs): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}), + ("padding_same", {"padding": "same"}), + ("strides", {"strides": (2, 2)}), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}), + ( + "strides_output_padding", + {"strides": (2, 2), "output_padding": (1, 1)}, + ), + ) + def test_conv2d_transpose(self, kwargs): + kwargs["filters"] = 2 + kwargs["kernel_size"] = (3, 3) + if "data_format" not in kwargs or tf.test.is_gpu_available( + cuda_only=True + ): + self._run_test(kwargs) - with self.cached_session(): - test_utils.layer_test( - keras.layers.Conv2DTranspose, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size)) + def test_conv2d_transpose_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv2DTranspose(**kwargs) + layer.build((None, 5, 5, 2)) + self.assertEqual(len(layer.losses), 2) + layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) + self.assertEqual(len(layer.losses), 3) - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}), - ('padding_same', {'padding': 'same'}), - ('strides', {'strides': (2, 2)}), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', {'data_format': 'channels_first'}), - ('strides_output_padding', {'strides': (2, 2), 'output_padding': (1, 1)}), - ) - def test_conv2d_transpose(self, kwargs): - kwargs['filters'] = 2 - kwargs['kernel_size'] = (3, 3) - if 'data_format' not in kwargs or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs) + def test_conv2d_transpose_constraints(self): + k_constraint = lambda x: x + b_constraint = lambda x: x - def test_conv2d_transpose_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv2DTranspose(**kwargs) - layer.build((None, 5, 5, 2)) - self.assertEqual(len(layer.losses), 2) - layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) - self.assertEqual(len(layer.losses), 3) + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv2DTranspose(**kwargs) + layer.build((None, 5, 5, 2)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) - def test_conv2d_transpose_constraints(self): - k_constraint = lambda x: x - b_constraint = lambda x: x + def test_conv2d_transpose_dilation(self): + test_utils.layer_test( + keras.layers.Conv2DTranspose, + kwargs={ + "filters": 2, + "kernel_size": 3, + "padding": "same", + "data_format": "channels_last", + "dilation_rate": (2, 2), + }, + input_shape=(2, 5, 6, 3), + ) - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv2DTranspose(**kwargs) - layer.build((None, 5, 5, 2)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32) - def test_conv2d_transpose_dilation(self): - test_utils.layer_test( - keras.layers.Conv2DTranspose, - kwargs={'filters': 2, - 'kernel_size': 3, - 'padding': 'same', - 'data_format': 'channels_last', - 'dilation_rate': (2, 2)}, - input_shape=(2, 5, 6, 3)) + expected_output = np.float32( + [ + [192, 228, 192, 228], + [336, 372, 336, 372], + [192, 228, 192, 228], + [336, 372, 336, 372], + ] + ).reshape((1, 4, 4, 1)) + test_utils.layer_test( + keras.layers.Conv2DTranspose, + input_data=input_data, + kwargs={ + "filters": 1, + "kernel_size": 3, + "padding": "same", + "data_format": "channels_last", + "dilation_rate": (2, 2), + "kernel_initializer": "ones", + }, + expected_output=expected_output, + ) - input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32) - # pylint: disable=too-many-function-args - expected_output = np.float32([ - [192, 228, 192, 228], - [336, 372, 336, 372], - [192, 228, 192, 228], - [336, 372, 336, 372] - ]).reshape((1, 4, 4, 1)) - test_utils.layer_test(keras.layers.Conv2DTranspose, - input_data=input_data, - kwargs={'filters': 1, - 'kernel_size': 3, - 'padding': 'same', - 'data_format': 'channels_last', - 'dilation_rate': (2, 2), - 'kernel_initializer': 'ones'}, - expected_output=expected_output) + def test_conv2d_transpose_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": [2, 1], "dilation_rate": [2, 1]} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.Conv2DTranspose(filters=1, kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes class Conv3DTransposeTest(test_combinations.TestCase): + def _run_test(self, kwargs, expected_output_shape): + num_samples = 2 + stack_size = 3 + num_row = 7 + num_col = 6 + depth = 5 - def _run_test(self, kwargs, expected_output_shape): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - depth = 5 + with test_utils.use_gpu(): + test_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs=kwargs, + input_shape=(num_samples, depth, num_row, num_col, stack_size), + expected_output_shape=expected_output_shape, + ) - with test_utils.use_gpu(): - test_utils.layer_test( - keras.layers.Conv3DTranspose, - kwargs=kwargs, - input_shape=(num_samples, depth, num_row, num_col, stack_size), - expected_output_shape=expected_output_shape) + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}, (None, 7, 9, 8, 2)), + ("padding_same", {"padding": "same"}, (None, 5, 7, 6, 2)), + ("strides", {"strides": (2, 2, 2)}, (None, 11, 15, 13, 2)), + ("dilation_rate", {"dilation_rate": (2, 2, 2)}, (None, 7, 9, 8, 2)), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}), + ( + "strides_output_padding", + {"strides": (2, 2, 2), "output_padding": (1, 1, 1)}, + (None, 12, 16, 14, 2), + ), + ) + def test_conv3d_transpose(self, kwargs, expected_output_shape=None): + kwargs["filters"] = 2 + kwargs["kernel_size"] = (3, 3, 3) + if "data_format" not in kwargs or tf.test.is_gpu_available( + cuda_only=True + ): + self._run_test(kwargs, expected_output_shape) - @parameterized.named_parameters( - ('padding_valid', { - 'padding': 'valid' - }, (None, 7, 9, 8, 2)), - ('padding_same', { - 'padding': 'same' - }, (None, 5, 7, 6, 2)), - ('strides', { - 'strides': (2, 2, 2) - }, (None, 11, 15, 13, 2)), - ('dilation_rate', { - 'dilation_rate': (2, 2, 2) - }, (None, 7, 9, 8, 2)), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', { - 'data_format': 'channels_first' - }), - ('strides_output_padding', { - 'strides': (2, 2, 2), - 'output_padding': (1, 1, 1) - }, (None, 12, 16, 14, 2)), - ) - def test_conv3d_transpose(self, kwargs, expected_output_shape=None): - kwargs['filters'] = 2 - kwargs['kernel_size'] = (3, 3, 3) - if 'data_format' not in kwargs or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs, expected_output_shape) + def test_conv3d_transpose_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv3DTranspose(**kwargs) + layer.build((None, 5, 5, 5, 2)) + self.assertEqual(len(layer.losses), 2) + layer(keras.backend.variable(np.ones((1, 5, 5, 5, 2)))) + self.assertEqual(len(layer.losses), 3) - def test_conv3d_transpose_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv3DTranspose(**kwargs) - layer.build((None, 5, 5, 5, 2)) - self.assertEqual(len(layer.losses), 2) - layer(keras.backend.variable(np.ones((1, 5, 5, 5, 2)))) - self.assertEqual(len(layer.losses), 3) + def test_conv3d_transpose_constraints(self): + k_constraint = lambda x: x + b_constraint = lambda x: x - def test_conv3d_transpose_constraints(self): - k_constraint = lambda x: x - b_constraint = lambda x: x + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.Conv3DTranspose(**kwargs) + layer.build((None, 5, 5, 5, 2)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.Conv3DTranspose(**kwargs) - layer.build((None, 5, 5, 5, 2)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + def test_conv3d_transpose_dynamic_shape(self): + input_data = np.random.random((1, 3, 3, 3, 3)).astype(np.float32) + with self.cached_session(): + # Won't raise error here. + test_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs={ + "data_format": "channels_last", + "filters": 3, + "kernel_size": 3, + }, + input_shape=(None, None, None, None, 3), + input_data=input_data, + ) + if tf.test.is_gpu_available(cuda_only=True): + test_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs={ + "data_format": "channels_first", + "filters": 3, + "kernel_size": 3, + }, + input_shape=(None, 3, None, None, None), + input_data=input_data, + ) + + def test_conv3d_transpose_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": [2, 2, 1], "dilation_rate": [2, 2, 1]} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.Conv3DTranspose(filters=1, kernel_size=2, **kwargs) - def test_conv3d_transpose_dynamic_shape(self): - input_data = np.random.random((1, 3, 3, 3, 3)).astype(np.float32) - with self.cached_session(): - # Won't raise error here. - test_utils.layer_test( - keras.layers.Conv3DTranspose, - kwargs={ - 'data_format': 'channels_last', - 'filters': 3, - 'kernel_size': 3 - }, - input_shape=(None, None, None, None, 3), - input_data=input_data) - if tf.test.is_gpu_available(cuda_only=True): - test_utils.layer_test( - keras.layers.Conv3DTranspose, - kwargs={ - 'data_format': 'channels_first', - 'filters': 3, - 'kernel_size': 3 - }, - input_shape=(None, 3, None, None, None), - input_data=input_data) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/convolutional/depthwise_conv1d.py b/keras/layers/convolutional/depthwise_conv1d.py index 8c9a1581c58e..b1cca7a37353 100644 --- a/keras/layers/convolutional/depthwise_conv1d.py +++ b/keras/layers/convolutional/depthwise_conv1d.py @@ -13,188 +13,205 @@ # limitations under the License. # ============================================================================== """Keras depthwise 1D convolution.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras.layers.convolutional.base_depthwise_conv import DepthwiseConv from keras.utils import conv_utils from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.DepthwiseConv1D') +@keras_export("keras.layers.DepthwiseConv1D") class DepthwiseConv1D(DepthwiseConv): - """Depthwise 1D convolution. - - Depthwise convolution is a type of convolution in which each input channel is - convolved with a different kernel (called a depthwise kernel). You - can understand depthwise convolution as the first step in a depthwise - separable convolution. - - It is implemented via the following steps: - - - Split the input into individual channels. - - Convolve each channel with an individual depthwise kernel with - `depth_multiplier` output channels. - - Concatenate the convolved outputs along the channels axis. - - Unlike a regular 1D convolution, depthwise convolution does not mix - information across different input channels. - - The `depth_multiplier` argument determines how many filter are applied to one - input channel. As such, it controls the amount of output channels that are - generated per input channel in the depthwise step. - - Args: - kernel_size: An integer, specifying the height and width of the 1D - convolution window. Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer, specifying the strides of the convolution along the - height and width. Can be a single integer to specify the same value for - all spatial dimensions. Specifying any stride value != 1 is incompatible - with specifying any `dilation_rate` value != 1. - padding: one of `'valid'` or `'same'` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding with zeros evenly to the left/right - or up/down of the input such that output has the same height/width - dimension as the input. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch_size, height, width, channels)` while - `channels_first` corresponds to inputs with shape `(batch_size, channels, - height, width)`. It defaults to the `image_data_format` value found in - your Keras config file at `~/.keras/keras.json`. If you never set it, then - it will be 'channels_last'. - dilation_rate: A single integer, specifying the dilation rate to use for - dilated convolution. Currently, specifying any `dilation_rate` value != 1 - is incompatible with specifying any stride value != 1. - activation: Activation function to use. If you don't specify anything, no - activation is applied (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix (see - `keras.initializers`). If None, the default initializer - ('glorot_uniform') will be used. - bias_initializer: Initializer for the bias vector (see - `keras.initializers`). If None, the default initializer ('zeros') will be - used. - depthwise_regularizer: Regularizer function applied to the depthwise kernel - matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector (see - `keras.regularizers`). - activity_regularizer: Regularizer function applied to the output of the - layer (its 'activation') (see `keras.regularizers`). - depthwise_constraint: Constraint function applied to the depthwise kernel - matrix (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector (see - `keras.constraints`). - - Input shape: - 4D tensor with shape: `[batch_size, channels, rows, cols]` if - data_format='channels_first' - or 4D tensor with shape: `[batch_size, rows, cols, channels]` if - data_format='channels_last'. - - Output shape: - 4D tensor with shape: `[batch_size, channels * depth_multiplier, new_rows, - new_cols]` if `data_format='channels_first'` - or 4D tensor with shape: `[batch_size, - new_rows, new_cols, channels * depth_multiplier]` if - `data_format='channels_last'`. `rows` and `cols` values might have changed - due to padding. - - Returns: - A tensor of rank 4 representing - `activation(depthwiseconv2d(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides` > 1 and `dilation_rate` > 1. - """ - - def __init__(self, - kernel_size, - strides=1, - padding='valid', - depth_multiplier=1, - data_format=None, - dilation_rate=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - 1, - kernel_size=kernel_size, - strides=strides, - padding=padding, - depth_multiplier=depth_multiplier, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - bias_constraint=bias_constraint, - **kwargs) - - def call(self, inputs): - if self.data_format == 'channels_last': - strides = (1,) + self.strides * 2 + (1,) - spatial_start_dim = 1 - else: - strides = (1, 1) + self.strides * 2 - spatial_start_dim = 2 - inputs = tf.expand_dims(inputs, spatial_start_dim) - depthwise_kernel = tf.expand_dims(self.depthwise_kernel, axis=0) - dilation_rate = (1,) + self.dilation_rate - - outputs = tf.nn.depthwise_conv2d( - inputs, - depthwise_kernel, - strides=strides, - padding=self.padding.upper(), - dilations=dilation_rate, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - if self.use_bias: - outputs = tf.nn.bias_add( - outputs, - self.bias, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - outputs = tf.squeeze(outputs, [spatial_start_dim]) - - if self.activation is not None: - return self.activation(outputs) - - return outputs - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - out_filters = input_shape[1] * self.depth_multiplier - elif self.data_format == 'channels_last': - rows = input_shape[1] - out_filters = input_shape[2] * self.depth_multiplier - - rows = conv_utils.conv_output_length(rows, self.kernel_size[0], - self.padding, self.strides[0], - self.dilation_rate[0]) - if self.data_format == 'channels_first': - return (input_shape[0], out_filters, rows) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, out_filters) + """Depthwise 1D convolution. + + Depthwise convolution is a type of convolution in which each input channel + is convolved with a different kernel (called a depthwise kernel). You can + understand depthwise convolution as the first step in a depthwise separable + convolution. + + It is implemented via the following steps: + + - Split the input into individual channels. + - Convolve each channel with an individual depthwise kernel with + `depth_multiplier` output channels. + - Concatenate the convolved outputs along the channels axis. + + Unlike a regular 1D convolution, depthwise convolution does not mix + information across different input channels. + + The `depth_multiplier` argument determines how many filter are applied to + one input channel. As such, it controls the amount of output channels that + are generated per input channel in the depthwise step. + + Args: + kernel_size: An integer, specifying the height and width of the 1D + convolution window. Can be a single integer to specify the same value + for all spatial dimensions. + strides: An integer, specifying the strides of the convolution along the + height and width. Can be a single integer to specify the same value for + all spatial dimensions. Specifying any stride value != 1 is incompatible + with specifying any `dilation_rate` value != 1. + padding: one of `'valid'` or `'same'` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding with zeros evenly to the + left/right or up/down of the input such that output has the same + height/width dimension as the input. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `filters_in * depth_multiplier`. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch_size, height, + width, channels)` while `channels_first` corresponds to inputs with + shape `(batch_size, channels, height, width)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: A single integer, specifying the dilation rate to use for + dilated convolution. Currently, specifying any `dilation_rate` + value != 1 is incompatible with specifying any stride value != 1. + activation: Activation function to use. If you don't specify anything, no + activation is applied (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + depthwise_initializer: Initializer for the depthwise kernel matrix (see + `keras.initializers`). If None, the default initializer + ('glorot_uniform') will be used. + bias_initializer: Initializer for the bias vector (see + `keras.initializers`). If None, the default initializer ('zeros') will + be used. + depthwise_regularizer: Regularizer function applied to the depthwise + kernel matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector (see + `keras.regularizers`). + activity_regularizer: Regularizer function applied to the output of the + layer (its 'activation') (see `keras.regularizers`). + depthwise_constraint: Constraint function applied to the depthwise kernel + matrix (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector (see + `keras.constraints`). + + Input shape: + 3D tensor with shape: `[batch_size, channels, input_dim]` if + data_format='channels_first' + or 3D tensor with shape: `[batch_size, input_dim, channels]` if + data_format='channels_last'. + + Output shape: + 3D tensor with shape: + `[batch_size, channels * depth_multiplier, new_dims]` + if `data_format='channels_first'` + or 3D tensor with shape: `[batch_size, + new_dims, channels * depth_multiplier]` if + `data_format='channels_last'`. `new_dims` values might have + changed due to padding. + + Returns: + A tensor of rank 3 representing + `activation(depthwiseconv1d(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides` > 1 and `dilation_rate` > 1. + """ + + def __init__( + self, + kernel_size, + strides=1, + padding="valid", + depth_multiplier=1, + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + 1, + kernel_size=kernel_size, + strides=strides, + padding=padding, + depth_multiplier=depth_multiplier, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + bias_constraint=bias_constraint, + **kwargs + ) + + def call(self, inputs): + if self.data_format == "channels_last": + strides = (1,) + self.strides * 2 + (1,) + spatial_start_dim = 1 + else: + strides = (1, 1) + self.strides * 2 + spatial_start_dim = 2 + inputs = tf.expand_dims(inputs, spatial_start_dim) + depthwise_kernel = tf.expand_dims(self.depthwise_kernel, axis=0) + dilation_rate = (1,) + self.dilation_rate + + outputs = tf.nn.depthwise_conv2d( + inputs, + depthwise_kernel, + strides=strides, + padding=self.padding.upper(), + dilations=dilation_rate, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + if self.use_bias: + outputs = tf.nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + outputs = tf.squeeze(outputs, [spatial_start_dim]) + + if self.activation is not None: + return self.activation(outputs) + + return outputs + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if self.data_format == "channels_first": + input_dim = input_shape[2] + out_filters = input_shape[1] * self.depth_multiplier + elif self.data_format == "channels_last": + input_dim = input_shape[1] + out_filters = input_shape[2] * self.depth_multiplier + + input_dim = conv_utils.conv_output_length( + input_dim, + self.kernel_size[0], + self.padding, + self.strides[0], + self.dilation_rate[0], + ) + if self.data_format == "channels_first": + return (input_shape[0], out_filters, input_dim) + elif self.data_format == "channels_last": + return (input_shape[0], input_dim, out_filters) diff --git a/keras/layers/convolutional/depthwise_conv2d.py b/keras/layers/convolutional/depthwise_conv2d.py index 202eeeae1c8d..24edea729669 100644 --- a/keras/layers/convolutional/depthwise_conv2d.py +++ b/keras/layers/convolutional/depthwise_conv2d.py @@ -13,184 +13,197 @@ # limitations under the License. # ============================================================================== """Keras depthwise 2D convolution.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.layers.convolutional.base_depthwise_conv import DepthwiseConv from keras.utils import conv_utils from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.DepthwiseConv2D') +@keras_export("keras.layers.DepthwiseConv2D") class DepthwiseConv2D(DepthwiseConv): - """Depthwise 2D convolution. - - Depthwise convolution is a type of convolution in which each input channel is - convolved with a different kernel (called a depthwise kernel). You - can understand depthwise convolution as the first step in a depthwise - separable convolution. - - It is implemented via the following steps: - - - Split the input into individual channels. - - Convolve each channel with an individual depthwise kernel with - `depth_multiplier` output channels. - - Concatenate the convolved outputs along the channels axis. - - Unlike a regular 2D convolution, depthwise convolution does not mix - information across different input channels. - - The `depth_multiplier` argument determines how many filter are applied to one - input channel. As such, it controls the amount of output channels that are - generated per input channel in the depthwise step. - - Args: - kernel_size: An integer or tuple/list of 2 integers, specifying the height - and width of the 2D convolution window. Can be a single integer to specify - the same value for all spatial dimensions. - strides: An integer or tuple/list of 2 integers, specifying the strides of - the convolution along the height and width. Can be a single integer to - specify the same value for all spatial dimensions. Specifying any stride - value != 1 is incompatible with specifying any `dilation_rate` value != 1. - padding: one of `'valid'` or `'same'` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding with zeros evenly to the left/right - or up/down of the input such that output has the same height/width - dimension as the input. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch_size, height, width, channels)` while - `channels_first` corresponds to inputs with shape `(batch_size, channels, - height, width)`. It defaults to the `image_data_format` value found in - your Keras config file at `~/.keras/keras.json`. If you never set it, then - it will be 'channels_last'. - dilation_rate: An integer or tuple/list of 2 integers, specifying the - dilation rate to use for dilated convolution. Currently, specifying any - `dilation_rate` value != 1 is incompatible with specifying any `strides` - value != 1. - activation: Activation function to use. If you don't specify anything, no - activation is applied (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix (see - `keras.initializers`). If None, the default initializer - ('glorot_uniform') will be used. - bias_initializer: Initializer for the bias vector (see - `keras.initializers`). If None, the default initializer ('zeros') will be - used. - depthwise_regularizer: Regularizer function applied to the depthwise kernel - matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector (see - `keras.regularizers`). - activity_regularizer: Regularizer function applied to the output of the - layer (its 'activation') (see `keras.regularizers`). - depthwise_constraint: Constraint function applied to the depthwise kernel - matrix (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector (see - `keras.constraints`). - - Input shape: - 4D tensor with shape: `[batch_size, channels, rows, cols]` if - data_format='channels_first' - or 4D tensor with shape: `[batch_size, rows, cols, channels]` if - data_format='channels_last'. - - Output shape: - 4D tensor with shape: `[batch_size, channels * depth_multiplier, new_rows, - new_cols]` if `data_format='channels_first'` - or 4D tensor with shape: `[batch_size, - new_rows, new_cols, channels * depth_multiplier]` if - `data_format='channels_last'`. `rows` and `cols` values might have changed - due to padding. - - Returns: - A tensor of rank 4 representing - `activation(depthwiseconv2d(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - ValueError: when both `strides` > 1 and `dilation_rate` > 1. - """ - - def __init__(self, - kernel_size, - strides=(1, 1), - padding='valid', - depth_multiplier=1, - data_format=None, - dilation_rate=(1, 1), - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - 2, - kernel_size=kernel_size, - strides=strides, - padding=padding, - depth_multiplier=depth_multiplier, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - bias_constraint=bias_constraint, - **kwargs) - - def call(self, inputs): - outputs = backend.depthwise_conv2d( - inputs, - self.depthwise_kernel, - strides=self.strides, - padding=self.padding, - dilation_rate=self.dilation_rate, - data_format=self.data_format) - - if self.use_bias: - outputs = backend.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - - return outputs - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - out_filters = input_shape[1] * self.depth_multiplier - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - out_filters = input_shape[3] * self.depth_multiplier - - rows = conv_utils.conv_output_length(rows, self.kernel_size[0], - self.padding, - self.strides[0], - self.dilation_rate[0]) - cols = conv_utils.conv_output_length(cols, self.kernel_size[1], - self.padding, - self.strides[1], - self.dilation_rate[1]) - if self.data_format == 'channels_first': - return (input_shape[0], out_filters, rows, cols) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, cols, out_filters) + """Depthwise 2D convolution. + + Depthwise convolution is a type of convolution in which each input channel + is convolved with a different kernel (called a depthwise kernel). You can + understand depthwise convolution as the first step in a depthwise separable + convolution. + + It is implemented via the following steps: + + - Split the input into individual channels. + - Convolve each channel with an individual depthwise kernel with + `depth_multiplier` output channels. + - Concatenate the convolved outputs along the channels axis. + + Unlike a regular 2D convolution, depthwise convolution does not mix + information across different input channels. + + The `depth_multiplier` argument determines how many filter are applied to + one input channel. As such, it controls the amount of output channels that + are generated per input channel in the depthwise step. + + Args: + kernel_size: An integer or tuple/list of 2 integers, specifying the height + and width of the 2D convolution window. Can be a single integer to + specify the same value for all spatial dimensions. + strides: An integer or tuple/list of 2 integers, specifying the strides of + the convolution along the height and width. Can be a single integer to + specify the same value for all spatial dimensions. Current + implementation only supports equal length strides in row and + column dimensions. Specifying any stride value != 1 is incompatible + with specifying any `dilation_rate` value !=1. + padding: one of `'valid'` or `'same'` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding with zeros evenly to the + left/right or up/down of the input such that output has the same + height/width dimension as the input. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `filters_in * depth_multiplier`. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch_size, height, + width, channels)` while `channels_first` corresponds to inputs with + shape `(batch_size, channels, height, width)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of 2 integers, specifying the + dilation rate to use for dilated convolution. Currently, specifying any + `dilation_rate` value != 1 is incompatible with specifying any `strides` + value != 1. + activation: Activation function to use. If you don't specify anything, no + activation is applied (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + depthwise_initializer: Initializer for the depthwise kernel matrix (see + `keras.initializers`). If None, the default initializer + ('glorot_uniform') will be used. + bias_initializer: Initializer for the bias vector (see + `keras.initializers`). If None, the default initializer ('zeros') will + be used. + depthwise_regularizer: Regularizer function applied to the depthwise + kernel matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector (see + `keras.regularizers`). + activity_regularizer: Regularizer function applied to the output of the + layer (its 'activation') (see `keras.regularizers`). + depthwise_constraint: Constraint function applied to the depthwise kernel + matrix (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector (see + `keras.constraints`). + + Input shape: + 4D tensor with shape: `[batch_size, channels, rows, cols]` if + data_format='channels_first' + or 4D tensor with shape: `[batch_size, rows, cols, channels]` if + data_format='channels_last'. + + Output shape: + 4D tensor with shape: `[batch_size, channels * depth_multiplier, new_rows, + new_cols]` if `data_format='channels_first'` + or 4D tensor with shape: `[batch_size, + new_rows, new_cols, channels * depth_multiplier]` if + `data_format='channels_last'`. `rows` and `cols` values might have + changed due to padding. + + Returns: + A tensor of rank 4 representing + `activation(depthwiseconv2d(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + ValueError: when both `strides` > 1 and `dilation_rate` > 1. + """ + + def __init__( + self, + kernel_size, + strides=(1, 1), + padding="valid", + depth_multiplier=1, + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + 2, + kernel_size=kernel_size, + strides=strides, + padding=padding, + depth_multiplier=depth_multiplier, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + bias_constraint=bias_constraint, + **kwargs + ) + + def call(self, inputs): + outputs = backend.depthwise_conv2d( + inputs, + self.depthwise_kernel, + strides=self.strides, + padding=self.padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format, + ) + + if self.use_bias: + outputs = backend.bias_add( + outputs, self.bias, data_format=self.data_format + ) + + if self.activation is not None: + return self.activation(outputs) + + return outputs + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if self.data_format == "channels_first": + rows = input_shape[2] + cols = input_shape[3] + out_filters = input_shape[1] * self.depth_multiplier + elif self.data_format == "channels_last": + rows = input_shape[1] + cols = input_shape[2] + out_filters = input_shape[3] * self.depth_multiplier + + rows = conv_utils.conv_output_length( + rows, + self.kernel_size[0], + self.padding, + self.strides[0], + self.dilation_rate[0], + ) + cols = conv_utils.conv_output_length( + cols, + self.kernel_size[1], + self.padding, + self.strides[1], + self.dilation_rate[1], + ) + if self.data_format == "channels_first": + return (input_shape[0], out_filters, rows, cols) + elif self.data_format == "channels_last": + return (input_shape[0], rows, cols, out_filters) diff --git a/keras/layers/convolutional/depthwise_conv_test.py b/keras/layers/convolutional/depthwise_conv_test.py index e324ec40be20..dd8e58584970 100644 --- a/keras/layers/convolutional/depthwise_conv_test.py +++ b/keras/layers/convolutional/depthwise_conv_test.py @@ -14,124 +14,130 @@ # ============================================================================== """Tests for depthwise convolutional layers.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class DepthwiseConv1DTest(test_combinations.TestCase): + def _run_test(self, kwargs, expected_output_shape=None): + num_samples = 2 + stack_size = 3 + num_row = 7 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.DepthwiseConv1D, + kwargs=kwargs, + input_shape=(num_samples, num_row, stack_size), + expected_output_shape=expected_output_shape, + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}), + ("padding_same", {"padding": "same"}), + ("strides", {"strides": 2}), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}), + ("depth_multiplier_1", {"depth_multiplier": 1}), + ("depth_multiplier_2", {"depth_multiplier": 2}), + ("dilation_rate", {"dilation_rate": 2}, (None, 3, 3)), + ) + def test_depthwise_conv1d(self, kwargs, expected_output_shape=None): + kwargs["kernel_size"] = 3 + if "data_format" not in kwargs or tf.test.is_gpu_available( + cuda_only=True + ): + self._run_test(kwargs, expected_output_shape) + + def test_depthwise_conv1d_full(self): + kwargs = { + "kernel_size": 3, + "padding": "valid", + "data_format": "channels_last", + "dilation_rate": 1, + "activation": None, + "depthwise_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "depthwise_constraint": "unit_norm", + "use_bias": True, + "strides": 2, + "depth_multiplier": 1, + } + self._run_test(kwargs) - def _run_test(self, kwargs, expected_output_shape=None): - num_samples = 2 - stack_size = 3 - num_row = 7 - - with self.cached_session(): - test_utils.layer_test( - keras.layers.DepthwiseConv1D, - kwargs=kwargs, - input_shape=(num_samples, num_row, stack_size), - expected_output_shape=expected_output_shape) - - @parameterized.named_parameters( - ('padding_valid', { - 'padding': 'valid' - }), - ('padding_same', { - 'padding': 'same' - }), - ('strides', { - 'strides': 2 - }), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', { - 'data_format': 'channels_first' - }), - ('depth_multiplier_1', { - 'depth_multiplier': 1 - }), - ('depth_multiplier_2', { - 'depth_multiplier': 2 - }), - ('dilation_rate', { - 'dilation_rate': 2 - }, (None, 3, 3)), - ) - def test_depthwise_conv1d(self, kwargs, expected_output_shape=None): - kwargs['kernel_size'] = 3 - if 'data_format' not in kwargs or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs, expected_output_shape) - - def test_depthwise_conv1d_full(self): - kwargs = { - 'kernel_size': 3, - 'padding': 'valid', - 'data_format': 'channels_last', - 'dilation_rate': 1, - 'activation': None, - 'depthwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'depthwise_constraint': 'unit_norm', - 'use_bias': True, - 'strides': 2, - 'depth_multiplier': 1, - } - self._run_test(kwargs) + def test_depthwise_conv1d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": 2, "dilation_rate": 2} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.DepthwiseConv1D(kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes class DepthwiseConv2DTest(test_combinations.TestCase): + def _run_test(self, kwargs, expected_output_shape=None): + num_samples = 2 + stack_size = 3 + num_row = 7 + num_col = 6 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.DepthwiseConv2D, + kwargs=kwargs, + input_shape=(num_samples, num_row, num_col, stack_size), + expected_output_shape=expected_output_shape, + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}), + ("padding_same", {"padding": "same"}), + ("strides", {"strides": (2, 2)}), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}), + ("depth_multiplier_1", {"depth_multiplier": 1}), + ("depth_multiplier_2", {"depth_multiplier": 2}), + ("dilation_rate", {"dilation_rate": (2, 2)}, (None, 3, 2, 3)), + ) + def test_depthwise_conv2d(self, kwargs, expected_output_shape=None): + kwargs["kernel_size"] = (3, 3) + if "data_format" not in kwargs or tf.test.is_gpu_available( + cuda_only=True + ): + self._run_test(kwargs, expected_output_shape) + + def test_depthwise_conv2d_full(self): + kwargs = { + "kernel_size": 3, + "padding": "valid", + "data_format": "channels_last", + "dilation_rate": (1, 1), + "activation": None, + "depthwise_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "depthwise_constraint": "unit_norm", + "use_bias": True, + "strides": (2, 2), + "depth_multiplier": 1, + } + self._run_test(kwargs) + + def test_depthwise_conv2d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": [2, 1], "dilation_rate": [2, 1]} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.DepthwiseConv2D(kernel_size=2, **kwargs) + - def _run_test(self, kwargs, expected_output_shape=None): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - - with self.cached_session(): - test_utils.layer_test( - keras.layers.DepthwiseConv2D, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size), - expected_output_shape=expected_output_shape) - - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}), - ('padding_same', {'padding': 'same'}), - ('strides', {'strides': (2, 2)}), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', {'data_format': 'channels_first'}), - ('depth_multiplier_1', {'depth_multiplier': 1}), - ('depth_multiplier_2', {'depth_multiplier': 2}), - ('dilation_rate', {'dilation_rate': (2, 2)}, (None, 3, 2, 3)), - ) - def test_depthwise_conv2d(self, kwargs, expected_output_shape=None): - kwargs['kernel_size'] = (3, 3) - if 'data_format' not in kwargs or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs, expected_output_shape) - - def test_depthwise_conv2d_full(self): - kwargs = { - 'kernel_size': 3, - 'padding': 'valid', - 'data_format': 'channels_last', - 'dilation_rate': (1, 1), - 'activation': None, - 'depthwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'depthwise_constraint': 'unit_norm', - 'use_bias': True, - 'strides': (2, 2), - 'depth_multiplier': 1, - } - self._run_test(kwargs) - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/convolutional/separable_conv1d.py b/keras/layers/convolutional/separable_conv1d.py index 2f070a3f54ad..46ade298d0ff 100644 --- a/keras/layers/convolutional/separable_conv1d.py +++ b/keras/layers/convolutional/separable_conv1d.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras depthwise separable 1D convolution.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import activations from keras import constraints @@ -21,185 +23,199 @@ from keras import regularizers from keras.layers.convolutional.base_separable_conv import SeparableConv from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.SeparableConv1D', - 'keras.layers.SeparableConvolution1D') +@keras_export( + "keras.layers.SeparableConv1D", "keras.layers.SeparableConvolution1D" +) class SeparableConv1D(SeparableConv): - """Depthwise separable 1D convolution. - - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A single integer specifying the spatial - dimensions of the filters. - strides: A single integer specifying the strides - of the convolution. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. `"causal"` results in causal - (dilated) convolutions, e.g. `output[t]` does not depend on `input[t+1:]`. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch_size, channels, length)`. - dilation_rate: A single integer, specifying - the dilation rate to use for dilated convolution. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel - (see `keras.initializers`). If None, then the default initializer - ('glorot_uniform') will be used. - pointwise_initializer: An initializer for the pointwise convolution kernel - (see `keras.initializers`). If None, then the default initializer - ('glorot_uniform') will be used. - bias_initializer: An initializer for the bias vector. If None, the default - initializer ('zeros') will be used (see `keras.initializers`). - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel (see `keras.regularizers`). - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel (see `keras.regularizers`). - bias_regularizer: Optional regularizer for the bias vector - (see `keras.regularizers`). - activity_regularizer: Optional regularizer function for the output - (see `keras.regularizers`). - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training - (see `keras.constraints`). - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer` - (see `keras.constraints`). - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer` - (see `keras.constraints`). - trainable: Boolean, if `True` the weights of this layer will be marked as - trainable (and listed in `layer.trainable_weights`). - - Input shape: - 3D tensor with shape: - `(batch_size, channels, steps)` if data_format='channels_first' - or 3D tensor with shape: - `(batch_size, steps, channels)` if data_format='channels_last'. - - Output shape: - 3D tensor with shape: - `(batch_size, filters, new_steps)` if data_format='channels_first' - or 3D tensor with shape: - `(batch_size, new_steps, filters)` if data_format='channels_last'. - `new_steps` value might have changed due to padding or strides. - - Returns: - A tensor of rank 3 representing - `activation(separableconv1d(inputs, kernel) + bias)`. - """ - - def __init__(self, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - pointwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=activations.get(activation), - use_bias=use_bias, - depthwise_initializer=initializers.get(depthwise_initializer), - pointwise_initializer=initializers.get(pointwise_initializer), - bias_initializer=initializers.get(bias_initializer), - depthwise_regularizer=regularizers.get(depthwise_regularizer), - pointwise_regularizer=regularizers.get(pointwise_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - depthwise_constraint=constraints.get(depthwise_constraint), - pointwise_constraint=constraints.get(pointwise_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) - - def call(self, inputs): - if self.padding == 'causal': - inputs = tf.pad(inputs, self._compute_causal_padding(inputs)) - if self.data_format == 'channels_last': - strides = (1,) + self.strides * 2 + (1,) - spatial_start_dim = 1 - else: - strides = (1, 1) + self.strides * 2 - spatial_start_dim = 2 - - # Explicitly broadcast inputs and kernels to 4D. - # TODO(fchollet): refactor when a native separable_conv1d op is available. - inputs = tf.expand_dims(inputs, spatial_start_dim) - depthwise_kernel = tf.expand_dims(self.depthwise_kernel, 0) - pointwise_kernel = tf.expand_dims(self.pointwise_kernel, 0) - dilation_rate = (1,) + self.dilation_rate - - if self.padding == 'causal': - op_padding = 'valid' - else: - op_padding = self.padding - outputs = tf.compat.v1.nn.separable_conv2d( - inputs, - depthwise_kernel, - pointwise_kernel, - strides=strides, - padding=op_padding.upper(), - rate=dilation_rate, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - if self.use_bias: - outputs = tf.nn.bias_add( - outputs, - self.bias, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - outputs = tf.squeeze(outputs, [spatial_start_dim]) - - if self.activation is not None: - return self.activation(outputs) - return outputs + """Depthwise separable 1D convolution. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. + It then optionally applies an activation function to produce the final + output. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A single integer specifying the spatial + dimensions of the filters. + strides: A single integer specifying the strides + of the convolution. + Specifying any `stride` value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. `"causal"` results in + causal (dilated) convolutions, e.g. `output[t]` does not depend on + `input[t+1:]`. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch_size, channels, length)`. + dilation_rate: A single integer, specifying + the dilation rate to use for dilated convolution. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution kernel + (see `keras.initializers`). If None, then the default initializer + ('glorot_uniform') will be used. + pointwise_initializer: An initializer for the pointwise convolution kernel + (see `keras.initializers`). If None, then the default initializer + ('glorot_uniform') will be used. + bias_initializer: An initializer for the bias vector. If None, the default + initializer ('zeros') will be used (see `keras.initializers`). + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel (see `keras.regularizers`). + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel (see `keras.regularizers`). + bias_regularizer: Optional regularizer for the bias vector + (see `keras.regularizers`). + activity_regularizer: Optional regularizer function for the output + (see `keras.regularizers`). + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training + (see `keras.constraints`). + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer` + (see `keras.constraints`). + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer` + (see `keras.constraints`). + trainable: Boolean, if `True` the weights of this layer will be marked as + trainable (and listed in `layer.trainable_weights`). + + Input shape: + 3D tensor with shape: + `(batch_size, channels, steps)` if data_format='channels_first' + or 3D tensor with shape: + `(batch_size, steps, channels)` if data_format='channels_last'. + + Output shape: + 3D tensor with shape: + `(batch_size, filters, new_steps)` if data_format='channels_first' + or 3D tensor with shape: + `(batch_size, new_steps, filters)` if data_format='channels_last'. + `new_steps` value might have changed due to padding or strides. + + Returns: + A tensor of rank 3 representing + `activation(separableconv1d(inputs, kernel) + bias)`. + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + pointwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + rank=1, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, + activation=activations.get(activation), + use_bias=use_bias, + depthwise_initializer=initializers.get(depthwise_initializer), + pointwise_initializer=initializers.get(pointwise_initializer), + bias_initializer=initializers.get(bias_initializer), + depthwise_regularizer=regularizers.get(depthwise_regularizer), + pointwise_regularizer=regularizers.get(pointwise_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + depthwise_constraint=constraints.get(depthwise_constraint), + pointwise_constraint=constraints.get(pointwise_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs + ) + + def call(self, inputs): + if self.padding == "causal": + inputs = tf.pad(inputs, self._compute_causal_padding(inputs)) + if self.data_format == "channels_last": + strides = (1,) + self.strides * 2 + (1,) + spatial_start_dim = 1 + else: + strides = (1, 1) + self.strides * 2 + spatial_start_dim = 2 + + # Explicitly broadcast inputs and kernels to 4D. + # TODO(fchollet): refactor when a native separable_conv1d op is + # available. + inputs = tf.expand_dims(inputs, spatial_start_dim) + depthwise_kernel = tf.expand_dims(self.depthwise_kernel, 0) + pointwise_kernel = tf.expand_dims(self.pointwise_kernel, 0) + dilation_rate = (1,) + self.dilation_rate + + if self.padding == "causal": + op_padding = "valid" + else: + op_padding = self.padding + outputs = tf.compat.v1.nn.separable_conv2d( + inputs, + depthwise_kernel, + pointwise_kernel, + strides=strides, + padding=op_padding.upper(), + rate=dilation_rate, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + if self.use_bias: + outputs = tf.nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + outputs = tf.squeeze(outputs, [spatial_start_dim]) + + if self.activation is not None: + return self.activation(outputs) + return outputs + # Alias diff --git a/keras/layers/convolutional/separable_conv2d.py b/keras/layers/convolutional/separable_conv2d.py index 9f484d918a6d..18e9ad49555c 100644 --- a/keras/layers/convolutional/separable_conv2d.py +++ b/keras/layers/convolutional/separable_conv2d.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Keras depthwise separable 2D convolution.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import activations from keras import constraints @@ -21,180 +23,193 @@ from keras import regularizers from keras.layers.convolutional.base_separable_conv import SeparableConv from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.SeparableConv2D', - 'keras.layers.SeparableConvolution2D') +@keras_export( + "keras.layers.SeparableConv2D", "keras.layers.SeparableConvolution2D" +) class SeparableConv2D(SeparableConv): - """Depthwise separable 2D convolution. - - Separable convolutions consist of first performing - a depthwise spatial convolution - (which acts on each input channel separately) - followed by a pointwise convolution which mixes the resulting - output channels. The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - Intuitively, separable convolutions can be understood as - a way to factorize a convolution kernel into two smaller kernels, - or as an extreme version of an Inception block. - - Args: - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. Current implementation only supports equal - length strides in the row and column dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding with zeros evenly - to the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch_size, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (see `keras.activations`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: An initializer for the depthwise convolution kernel - (see `keras.initializers`). If None, then the default initializer - ('glorot_uniform') will be used. - pointwise_initializer: An initializer for the pointwise convolution kernel - (see `keras.initializers`). If None, then the default initializer - ('glorot_uniform') will be used. - bias_initializer: An initializer for the bias vector. If None, the default - initializer ('zeros') will be used (see `keras.initializers`). - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix (see `keras.regularizers`). - pointwise_regularizer: Regularizer function applied to - the pointwise kernel matrix (see `keras.regularizers`). - bias_regularizer: Regularizer function applied to the bias vector - (see `keras.regularizers`). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation") - (see `keras.regularizers`). - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix - (see `keras.constraints`). - pointwise_constraint: Constraint function applied to - the pointwise kernel matrix - (see `keras.constraints`). - bias_constraint: Constraint function applied to the bias vector - (see `keras.constraints`). - - Input shape: - 4D tensor with shape: - `(batch_size, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch_size, rows, cols, channels)` if data_format='channels_last'. - - Output shape: - 4D tensor with shape: - `(batch_size, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch_size, new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - - Returns: - A tensor of rank 4 representing - `activation(separableconv2d(inputs, kernel) + bias)`. - - Raises: - ValueError: if `padding` is "causal". - """ - - def __init__(self, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - pointwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=activations.get(activation), - use_bias=use_bias, - depthwise_initializer=initializers.get(depthwise_initializer), - pointwise_initializer=initializers.get(pointwise_initializer), - bias_initializer=initializers.get(bias_initializer), - depthwise_regularizer=regularizers.get(depthwise_regularizer), - pointwise_regularizer=regularizers.get(pointwise_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - depthwise_constraint=constraints.get(depthwise_constraint), - pointwise_constraint=constraints.get(pointwise_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) - - def call(self, inputs): - # Apply the actual ops. - if self.data_format == 'channels_last': - strides = (1,) + self.strides + (1,) - else: - strides = (1, 1) + self.strides - outputs = tf.compat.v1.nn.separable_conv2d( - inputs, - self.depthwise_kernel, - self.pointwise_kernel, - strides=strides, - padding=self.padding.upper(), - rate=self.dilation_rate, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - if self.use_bias: - outputs = tf.nn.bias_add( - outputs, - self.bias, - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - - if self.activation is not None: - return self.activation(outputs) - return outputs + """Depthwise separable 2D convolution. + + Separable convolutions consist of first performing + a depthwise spatial convolution + (which acts on each input channel separately) + followed by a pointwise convolution which mixes the resulting + output channels. The `depth_multiplier` argument controls how many + output channels are generated per input channel in the depthwise step. + + Intuitively, separable convolutions can be understood as + a way to factorize a convolution kernel into two smaller kernels, + or as an extreme version of an Inception block. + + Args: + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + kernel_size: An integer or tuple/list of 2 integers, specifying the + height and width of the 2D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution along the height and width. + Can be a single integer to specify the same value for + all spatial dimensions. Current implementation only supports equal + length strides in the row and column dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding with zeros + evenly to the left/right or up/down of the input such that output has + the same height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch_size, channels, height, width)`. + When unspecified, uses `image_data_format` value found in your Keras + config file at `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. + The total number of depthwise convolution output + channels will be equal to `filters_in * depth_multiplier`. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (see `keras.activations`). + use_bias: Boolean, whether the layer uses a bias vector. + depthwise_initializer: An initializer for the depthwise convolution kernel + (see `keras.initializers`). If None, then the default initializer + ('glorot_uniform') will be used. + pointwise_initializer: An initializer for the pointwise convolution kernel + (see `keras.initializers`). If None, then the default initializer + ('glorot_uniform') will be used. + bias_initializer: An initializer for the bias vector. If None, the default + initializer ('zeros') will be used (see `keras.initializers`). + depthwise_regularizer: Regularizer function applied to + the depthwise kernel matrix (see `keras.regularizers`). + pointwise_regularizer: Regularizer function applied to + the pointwise kernel matrix (see `keras.regularizers`). + bias_regularizer: Regularizer function applied to the bias vector + (see `keras.regularizers`). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation") + (see `keras.regularizers`). + depthwise_constraint: Constraint function applied to + the depthwise kernel matrix + (see `keras.constraints`). + pointwise_constraint: Constraint function applied to + the pointwise kernel matrix + (see `keras.constraints`). + bias_constraint: Constraint function applied to the bias vector + (see `keras.constraints`). + + Input shape: + 4D tensor with shape: + `(batch_size, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(batch_size, rows, cols, channels)` if data_format='channels_last'. + + Output shape: + 4D tensor with shape: + `(batch_size, filters, new_rows, new_cols)` if + data_format='channels_first' + or 4D tensor with shape: + `(batch_size, new_rows, new_cols, filters)` if + data_format='channels_last'. `rows` and `cols` values might have changed + due to padding. + + Returns: + A tensor of rank 4 representing + `activation(separableconv2d(inputs, kernel) + bias)`. + + Raises: + ValueError: if `padding` is "causal". + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + pointwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + rank=2, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, + activation=activations.get(activation), + use_bias=use_bias, + depthwise_initializer=initializers.get(depthwise_initializer), + pointwise_initializer=initializers.get(pointwise_initializer), + bias_initializer=initializers.get(bias_initializer), + depthwise_regularizer=regularizers.get(depthwise_regularizer), + pointwise_regularizer=regularizers.get(pointwise_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + depthwise_constraint=constraints.get(depthwise_constraint), + pointwise_constraint=constraints.get(pointwise_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs + ) + + def call(self, inputs): + # Apply the actual ops. + if self.data_format == "channels_last": + strides = (1,) + self.strides + (1,) + else: + strides = (1, 1) + self.strides + outputs = tf.nn.separable_conv2d( + inputs, + self.depthwise_kernel, + self.pointwise_kernel, + strides=strides, + padding=self.padding.upper(), + dilations=self.dilation_rate, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + if self.use_bias: + outputs = tf.nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format( + self.data_format, ndim=4 + ), + ) + + if self.activation is not None: + return self.activation(outputs) + return outputs + # Alias diff --git a/keras/layers/convolutional/separable_conv_test.py b/keras/layers/convolutional/separable_conv_test.py index 4f3340853d54..b4abfc1016bc 100644 --- a/keras/layers/convolutional/separable_conv_test.py +++ b/keras/layers/convolutional/separable_conv_test.py @@ -14,152 +14,170 @@ # ============================================================================== """Tests for separable convolutional layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class SeparableConv1DTest(test_combinations.TestCase): - - def _run_test(self, kwargs): - num_samples = 2 - stack_size = 3 - length = 7 - - with self.cached_session(): - test_utils.layer_test( - keras.layers.SeparableConv1D, - kwargs=kwargs, - input_shape=(num_samples, length, stack_size)) - - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}), - ('padding_same', {'padding': 'same'}), - ('padding_same_dilation_2', {'padding': 'same', 'dilation_rate': 2}), - ('padding_causal', {'padding': 'causal'}), - ('strides', {'strides': 2}), - ('dilation_rate', {'dilation_rate': 2}), - ('depth_multiplier', {'depth_multiplier': 2}), - ) - def test_separable_conv1d(self, kwargs): - kwargs['filters'] = 2 - kwargs['kernel_size'] = 3 - self._run_test(kwargs) - - def test_separable_conv1d_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'depthwise_regularizer': 'l2', - 'pointwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.SeparableConv1D(**kwargs) - layer.build((None, 5, 2)) - self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((1, 5, 2)))) - self.assertEqual(len(layer.losses), 4) - - def test_separable_conv1d_constraints(self): - d_constraint = lambda x: x - p_constraint = lambda x: x - b_constraint = lambda x: x - - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'pointwise_constraint': p_constraint, - 'depthwise_constraint': d_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.SeparableConv1D(**kwargs) - layer.build((None, 5, 2)) - self.assertEqual(layer.depthwise_kernel.constraint, d_constraint) - self.assertEqual(layer.pointwise_kernel.constraint, p_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + def _run_test(self, kwargs): + num_samples = 2 + stack_size = 3 + length = 7 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.SeparableConv1D, + kwargs=kwargs, + input_shape=(num_samples, length, stack_size), + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}), + ("padding_same", {"padding": "same"}), + ("padding_same_dilation_2", {"padding": "same", "dilation_rate": 2}), + ("padding_causal", {"padding": "causal"}), + ("strides", {"strides": 2}), + ("dilation_rate", {"dilation_rate": 2}), + ("depth_multiplier", {"depth_multiplier": 2}), + ) + def test_separable_conv1d(self, kwargs): + kwargs["filters"] = 2 + kwargs["kernel_size"] = 3 + self._run_test(kwargs) + + def test_separable_conv1d_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "depthwise_regularizer": "l2", + "pointwise_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.SeparableConv1D(**kwargs) + layer.build((None, 5, 2)) + self.assertEqual(len(layer.losses), 3) + layer(keras.backend.variable(np.ones((1, 5, 2)))) + self.assertEqual(len(layer.losses), 4) + + def test_separable_conv1d_constraints(self): + d_constraint = lambda x: x + p_constraint = lambda x: x + b_constraint = lambda x: x + + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "pointwise_constraint": p_constraint, + "depthwise_constraint": d_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.SeparableConv1D(**kwargs) + layer.build((None, 5, 2)) + self.assertEqual(layer.depthwise_kernel.constraint, d_constraint) + self.assertEqual(layer.pointwise_kernel.constraint, p_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_separable_conv1d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": 2, "dilation_rate": 2} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.SeparableConv1D(filters=1, kernel_size=2, **kwargs) @test_combinations.run_all_keras_modes class SeparableConv2DTest(test_combinations.TestCase): - - def _run_test(self, kwargs): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - - with self.cached_session(): - test_utils.layer_test( - keras.layers.SeparableConv2D, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size)) - - @parameterized.named_parameters( - ('padding_valid', {'padding': 'valid'}), - ('padding_same', {'padding': 'same'}), - ('padding_same_dilation_2', {'padding': 'same', 'dilation_rate': 2}), - ('strides', {'strides': 2}), - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - ('data_format', {'data_format': 'channels_first'}), - ('dilation_rate', {'dilation_rate': 2}), - ('depth_multiplier', {'depth_multiplier': 2}), - ) - def test_separable_conv2d(self, kwargs): - kwargs['filters'] = 2 - kwargs['kernel_size'] = 3 - if 'data_format' not in kwargs or tf.test.is_gpu_available(cuda_only=True): - self._run_test(kwargs) - - def test_separable_conv2d_regularizers(self): - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'depthwise_regularizer': 'l2', - 'pointwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.SeparableConv2D(**kwargs) - layer.build((None, 5, 5, 2)) - self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) - self.assertEqual(len(layer.losses), 4) - - def test_separable_conv2d_constraints(self): - d_constraint = lambda x: x - p_constraint = lambda x: x - b_constraint = lambda x: x - - kwargs = { - 'filters': 3, - 'kernel_size': 3, - 'padding': 'valid', - 'pointwise_constraint': p_constraint, - 'depthwise_constraint': d_constraint, - 'bias_constraint': b_constraint, - 'strides': 1 - } - with self.cached_session(): - layer = keras.layers.SeparableConv2D(**kwargs) - layer.build((None, 5, 5, 2)) - self.assertEqual(layer.depthwise_kernel.constraint, d_constraint) - self.assertEqual(layer.pointwise_kernel.constraint, p_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) - -if __name__ == '__main__': - tf.test.main() + def _run_test(self, kwargs): + num_samples = 2 + stack_size = 3 + num_row = 7 + num_col = 6 + + with self.cached_session(): + test_utils.layer_test( + keras.layers.SeparableConv2D, + kwargs=kwargs, + input_shape=(num_samples, num_row, num_col, stack_size), + ) + + @parameterized.named_parameters( + ("padding_valid", {"padding": "valid"}), + ("padding_same", {"padding": "same"}), + ("padding_same_dilation_2", {"padding": "same", "dilation_rate": 2}), + ("strides", {"strides": 2}), + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + ("data_format", {"data_format": "channels_first"}), + ("dilation_rate", {"dilation_rate": 2}), + ("depth_multiplier", {"depth_multiplier": 2}), + ) + def test_separable_conv2d(self, kwargs): + kwargs["filters"] = 2 + kwargs["kernel_size"] = 3 + if "data_format" not in kwargs or tf.test.is_gpu_available( + cuda_only=True + ): + self._run_test(kwargs) + + def test_separable_conv2d_regularizers(self): + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "depthwise_regularizer": "l2", + "pointwise_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.SeparableConv2D(**kwargs) + layer.build((None, 5, 5, 2)) + self.assertEqual(len(layer.losses), 3) + layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) + self.assertEqual(len(layer.losses), 4) + + def test_separable_conv2d_constraints(self): + d_constraint = lambda x: x + p_constraint = lambda x: x + b_constraint = lambda x: x + + kwargs = { + "filters": 3, + "kernel_size": 3, + "padding": "valid", + "pointwise_constraint": p_constraint, + "depthwise_constraint": d_constraint, + "bias_constraint": b_constraint, + "strides": 1, + } + with self.cached_session(): + layer = keras.layers.SeparableConv2D(**kwargs) + layer.build((None, 5, 5, 2)) + self.assertEqual(layer.depthwise_kernel.constraint, d_constraint) + self.assertEqual(layer.pointwise_kernel.constraint, p_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_separable_conv2d_invalid_strides_and_dilation_rate(self): + kwargs = {"strides": [2, 1], "dilation_rate": [2, 1]} + with self.assertRaisesRegex( + ValueError, r"""`strides > 1` not supported in conjunction""" + ): + keras.layers.SeparableConv2D(filters=1, kernel_size=2, **kwargs) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/core/BUILD b/keras/layers/core/BUILD index 4439c2f6710a..2148cac8fe47 100644 --- a/keras/layers/core/BUILD +++ b/keras/layers/core/BUILD @@ -1,3 +1,5 @@ +# Placeholder: load unaliased py_library + # Description: # Contains the Keras core layers. load("@org_keras//keras:keras.bzl", "cuda_py_test") @@ -6,13 +8,14 @@ load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/py/tensorflow_gnn:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", "//third_party/tensorflow/python/keras:__subpackages__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], @@ -30,6 +33,7 @@ py_library( ":dense", ":einsum_dense", ":embedding", + ":identity", ":lambda", ":masking", ":tf_op_layer", @@ -128,6 +132,16 @@ py_library( ], ) +py_library( + name = "identity", + srcs = ["identity.py"], + srcs_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + "//keras/engine:base_layer", + ], +) + tf_py_test( name = "core_test", size = "medium", diff --git a/keras/layers/core/__init__.py b/keras/layers/core/__init__.py index 89d9a7eb5272..21d3c6ab52db 100644 --- a/keras/layers/core/__init__.py +++ b/keras/layers/core/__init__.py @@ -18,20 +18,23 @@ from keras.layers.core.dense import Dense from keras.layers.core.einsum_dense import EinsumDense from keras.layers.core.embedding import Embedding +from keras.layers.core.identity import Identity from keras.layers.core.lambda_layer import Lambda from keras.layers.core.masking import Masking -# Required by third_party/py/tensorflow_gnn/graph/keras/keras_tensors.py -from keras.layers.core.tf_op_layer import _delegate_method -from keras.layers.core.tf_op_layer import _delegate_property + +# Required by third_party/py/tensorflow_gnn/keras/keras_tensors.py from keras.layers.core.tf_op_layer import ClassMethod from keras.layers.core.tf_op_layer import InstanceMethod from keras.layers.core.tf_op_layer import InstanceProperty - from keras.layers.core.tf_op_layer import SlicingOpLambda from keras.layers.core.tf_op_layer import TFOpLambda +from keras.layers.core.tf_op_layer import _delegate_method +from keras.layers.core.tf_op_layer import _delegate_property # Regularization layers imported for backwards namespace compatibility -from keras.layers.regularization.activity_regularization import ActivityRegularization +from keras.layers.regularization.activity_regularization import ( + ActivityRegularization, +) from keras.layers.regularization.dropout import Dropout from keras.layers.regularization.spatial_dropout1d import SpatialDropout1D from keras.layers.regularization.spatial_dropout2d import SpatialDropout2D diff --git a/keras/layers/core/activation.py b/keras/layers/core/activation.py index d953e208a4f7..9cfaade39a33 100644 --- a/keras/layers/core/activation.py +++ b/keras/layers/core/activation.py @@ -13,54 +13,55 @@ # limitations under the License. # ============================================================================== """Contains the Activation layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import activations from keras.engine.base_layer import Layer + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Activation') +@keras_export("keras.layers.Activation") class Activation(Layer): - """Applies an activation function to an output. - - Args: - activation: Activation function, such as `tf.nn.relu`, or string name of - built-in activation function, such as "relu". + """Applies an activation function to an output. - Usage: + Args: + activation: Activation function, such as `tf.nn.relu`, or string name of + built-in activation function, such as "relu". - >>> layer = tf.keras.layers.Activation('relu') - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [0.0, 0.0, 0.0, 2.0] - >>> layer = tf.keras.layers.Activation(tf.nn.relu) - >>> output = layer([-3.0, -1.0, 0.0, 2.0]) - >>> list(output.numpy()) - [0.0, 0.0, 0.0, 2.0] + Usage: - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the batch axis) - when using this layer as the first layer in a model. + >>> layer = tf.keras.layers.Activation('relu') + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 2.0] + >>> layer = tf.keras.layers.Activation(tf.nn.relu) + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 2.0] - Output shape: - Same shape as input. - """ + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the batch axis) + when using this layer as the first layer in a model. - def __init__(self, activation, **kwargs): - super().__init__(**kwargs) - self.supports_masking = True - self.activation = activations.get(activation) + Output shape: + Same shape as input. + """ - def call(self, inputs): - return self.activation(inputs) + def __init__(self, activation, **kwargs): + super().__init__(**kwargs) + self.supports_masking = True + self.activation = activations.get(activation) - def compute_output_shape(self, input_shape): - return input_shape + def call(self, inputs): + return self.activation(inputs) - def get_config(self): - config = {'activation': activations.serialize(self.activation)} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def compute_output_shape(self, input_shape): + return input_shape + def get_config(self): + config = {"activation": activations.serialize(self.activation)} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/core/core_test.py b/keras/layers/core/core_test.py index 0f04bd7f28bf..345eb9e33c20 100644 --- a/keras/layers/core/core_test.py +++ b/keras/layers/core/core_test.py @@ -17,629 +17,697 @@ import os import textwrap +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras import initializers from keras.layers import core from keras.mixed_precision import policy +from keras.saving.serialization_lib import SafeModeScope from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np - -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class DropoutLayersTest(test_combinations.TestCase): - - def test_dropout(self): - test_utils.layer_test( - keras.layers.Dropout, kwargs={'rate': 0.5}, input_shape=(3, 2)) - - test_utils.layer_test( - keras.layers.Dropout, - kwargs={ - 'rate': 0.5, - 'noise_shape': [3, 1] - }, - input_shape=(3, 2)) - - def test_dropout_supports_masking(self): - dropout = keras.layers.Dropout(0.5) - self.assertEqual(True, dropout.supports_masking) - - def test_spatial_dropout_1d(self): - test_utils.layer_test( - keras.layers.SpatialDropout1D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4)) - - def test_spatial_dropout_2d(self): - test_utils.layer_test( - keras.layers.SpatialDropout2D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4, 5)) - - test_utils.layer_test( - keras.layers.SpatialDropout2D, - kwargs={ - 'rate': 0.5, - 'data_format': 'channels_first' - }, - input_shape=(2, 3, 4, 5)) - - def test_spatial_dropout_3d(self): - test_utils.layer_test( - keras.layers.SpatialDropout3D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4, 4, 5)) - - test_utils.layer_test( - keras.layers.SpatialDropout3D, - kwargs={ - 'rate': 0.5, - 'data_format': 'channels_first' - }, - input_shape=(2, 3, 4, 4, 5)) - - def test_dropout_partial_noise_shape(self): - inputs = keras.Input(shape=(5, 10)) - layer = keras.layers.Dropout(0.5, noise_shape=(None, 1, None)) - outputs = layer(inputs) - model = keras.Model(inputs, outputs) - out = model(np.ones((20, 5, 10)), training=True) - out_np = keras.backend.get_value(out) - # Test that dropout mask is shared across second dim. - self.assertAllClose(out_np[:, 0, :], out_np[:, 1, :]) - - def test_dropout_with_savemodel(self): - inputs = keras.Input(shape=(5, 10)) - layer = keras.layers.Dropout(0.5, force_generator=True) - outputs = layer(inputs) - model = keras.Model(inputs, outputs) - train = model(np.ones((20, 5, 10)), training=True) - predict = model(np.ones((20, 5, 10))) - # Make sure the weights from tf.random.Generator is not present in the model - # which will cause weight loading issue for existing application models if - # it contains dropout layer. - self.assertEmpty(layer.get_weights()) - self.assertEmpty(model.get_weights()) - - # Make sure the layer does dropout value when training - self.assertNotAllClose(train, predict) - - model.save(os.path.join(self.get_temp_dir(), 'savedmodel'), - save_format='tf') - loaded_model = keras.models.load_model( - os.path.join(self.get_temp_dir(), 'savedmodel')) - predict2 = loaded_model(np.ones((20, 5, 10))) - - self.assertAllClose(predict, predict2) - # Make sure the model dropout different value after loading - train2 = loaded_model(np.ones((20, 5, 10)), training=True) - self.assertNotAllClose(train, train2) - self.assertIsNotNone(loaded_model.layers[1]._random_generator) - - # Also make sure the checkpoint doesn't contain any variable from the - # dropout layer, to keep the backward compatibility. - checkpoint = tf.train.Checkpoint(model) - save_path = checkpoint.save(os.path.join(self.get_temp_dir(), 'checkpoint')) - checkpoint_var_names = [name_value_tuple[0] for name_value_tuple in - tf.train.list_variables(save_path)] - for name in checkpoint_var_names: - self.assertNotIn('dropout', name) + def test_dropout(self): + test_utils.layer_test( + keras.layers.Dropout, kwargs={"rate": 0.5}, input_shape=(3, 2) + ) + + test_utils.layer_test( + keras.layers.Dropout, + kwargs={"rate": 0.5, "noise_shape": [3, 1]}, + input_shape=(3, 2), + ) + + def test_dropout_supports_masking(self): + dropout = keras.layers.Dropout(0.5) + self.assertEqual(True, dropout.supports_masking) + + def test_spatial_dropout_1d(self): + test_utils.layer_test( + keras.layers.SpatialDropout1D, + kwargs={"rate": 0.5}, + input_shape=(2, 3, 4), + ) + + def test_spatial_dropout_2d(self): + test_utils.layer_test( + keras.layers.SpatialDropout2D, + kwargs={"rate": 0.5}, + input_shape=(2, 3, 4, 5), + ) + + test_utils.layer_test( + keras.layers.SpatialDropout2D, + kwargs={"rate": 0.5, "data_format": "channels_first"}, + input_shape=(2, 3, 4, 5), + ) + + def test_spatial_dropout_3d(self): + test_utils.layer_test( + keras.layers.SpatialDropout3D, + kwargs={"rate": 0.5}, + input_shape=(2, 3, 4, 4, 5), + ) + + test_utils.layer_test( + keras.layers.SpatialDropout3D, + kwargs={"rate": 0.5, "data_format": "channels_first"}, + input_shape=(2, 3, 4, 4, 5), + ) + + def test_dropout_partial_noise_shape(self): + inputs = keras.Input(shape=(5, 10)) + layer = keras.layers.Dropout(0.5, noise_shape=(None, 1, None)) + outputs = layer(inputs) + model = keras.Model(inputs, outputs) + out = model(np.ones((20, 5, 10)), training=True) + out_np = keras.backend.get_value(out) + # Test that dropout mask is shared across second dim. + self.assertAllClose(out_np[:, 0, :], out_np[:, 1, :]) + + def test_dropout_with_saving(self): + inputs = keras.Input(shape=(5, 10)) + layer = keras.layers.Dropout(0.5, force_generator=True) + outputs = layer(inputs) + model = keras.Model(inputs, outputs) + train = model(np.ones((20, 5, 10)), training=True) + predict = model(np.ones((20, 5, 10))) + # Make sure the weights from tf.random.Generator is not present in the + # model which will cause weight loading issue for existing application + # models if it contains dropout layer. + self.assertEmpty(layer.get_weights()) + self.assertEmpty(model.get_weights()) + + # Make sure the layer does dropout value when training + self.assertNotAllClose(train, predict) + + with self.subTest("savedmodel"): + model.save( + os.path.join(self.get_temp_dir(), "savedmodel"), + save_format="tf", + ) + loaded_model = keras.models.load_model( + os.path.join(self.get_temp_dir(), "savedmodel") + ) + predict2 = loaded_model(np.ones((20, 5, 10))) + + self.assertAllClose(predict, predict2) + # Make sure the model dropout different value after loading + train2 = loaded_model(np.ones((20, 5, 10)), training=True) + self.assertNotAllClose(train, train2) + self.assertIsNotNone(loaded_model.layers[1]._random_generator) + + with self.subTest("keras_v3"): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + model.save(os.path.join(self.get_temp_dir(), "model.keras")) + loaded_model = keras.models.load_model( + os.path.join(self.get_temp_dir(), "model.keras") + ) + predict2 = loaded_model(np.ones((20, 5, 10))) + + self.assertAllClose(predict, predict2) + # Make sure the model dropout different value after loading + train2 = loaded_model(np.ones((20, 5, 10)), training=True) + self.assertNotAllClose(train, train2) + self.assertIsNotNone(loaded_model.layers[1]._random_generator) + + with self.subTest("checkpoint"): + # Also make sure the checkpoint doesn't contain any variable from + # the dropout layer, to keep the backward compatibility. + checkpoint = tf.train.Checkpoint(model) + save_path = checkpoint.save( + os.path.join(self.get_temp_dir(), "checkpoint") + ) + checkpoint_var_names = [ + name_value_tuple[0] + for name_value_tuple in tf.train.list_variables(save_path) + ] + for name in checkpoint_var_names: + self.assertNotIn("dropout", name) @test_combinations.run_all_keras_modes class LambdaLayerTest(test_combinations.TestCase): - - def test_lambda(self): - test_utils.layer_test( - keras.layers.Lambda, - kwargs={'function': lambda x: x + 1}, - input_shape=(3, 2)) - - test_utils.layer_test( - keras.layers.Lambda, - kwargs={ - 'function': lambda x, a, b: x * a + b, - 'arguments': { - 'a': 0.6, - 'b': 0.4 - } - }, - input_shape=(3, 2)) - - # test serialization with function - def f(x): - return x + 1 - - ld = keras.layers.Lambda(f) - config = ld.get_config() - ld = keras.layers.deserialize({'class_name': 'Lambda', 'config': config}) - self.assertEqual(ld.function(3), 4) - - # test with lambda - ld = keras.layers.Lambda( - lambda x: keras.backend.concatenate([tf.square(x), x])) - config = ld.get_config() - ld = keras.layers.Lambda.from_config(config) - self.assertAllEqual(self.evaluate(ld.function([3])), [9, 3]) - - def test_lambda_multiple_inputs(self): - ld = keras.layers.Lambda(lambda x: x[0], output_shape=lambda x: x[0]) - x1 = np.ones([3, 2], np.float32) - x2 = np.ones([3, 5], np.float32) - out = ld([x1, x2]) - self.assertAllEqual(out.shape, [3, 2]) - - def test_lambda_output_shape(self): - l = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1)) - l(keras.backend.variable(np.ones((1, 1)))) - self.assertEqual((1, 1), l.get_config()['output_shape']) - - def test_lambda_output_shape_function(self): - - def get_output_shape(input_shape): - return 1 * input_shape - - l = keras.layers.Lambda(lambda x: x + 1, output_shape=get_output_shape) - l(keras.backend.variable(np.ones((1, 1)))) - self.assertEqual('lambda', l.get_config()['output_shape_type']) - - def test_lambda_output_shape_autocalculate_multiple_inputs(self): - - def lambda_fn(x): - return tf.matmul(x[0], x[1]) - - l = keras.layers.Lambda(lambda_fn, dtype=tf.float64) - output_shape = l.compute_output_shape([(10, 10), (10, 20)]) - self.assertAllEqual((10, 20), output_shape) - output_signature = l.compute_output_signature([ - tf.TensorSpec(dtype=tf.float64, shape=(10, 10)), - tf.TensorSpec(dtype=tf.float64, shape=(10, 20)) - ]) - self.assertAllEqual((10, 20), output_signature.shape) - self.assertAllEqual(tf.float64, output_signature.dtype) - - def test_lambda_output_shape_list_multiple_outputs(self): - - def lambda_fn(x): - return x - - l = keras.layers.Lambda(lambda_fn, output_shape=[(10,), (20,)]) - output_shape = l.compute_output_shape([(10, 10), (10, 20)]) - self.assertAllEqual([(10, 10), (10, 20)], output_shape) - - def test_lambda_output_shape_tuple_with_none(self): - - def lambda_fn(x): - return x - - l = keras.layers.Lambda(lambda_fn, output_shape=(None, 10)) - output_shape = l.compute_output_shape((5, 10, 20)) - self.assertAllEqual([5, None, 10], output_shape.as_list()) - - def test_lambda_output_shape_function_multiple_outputs(self): - - def lambda_fn(x): - return x - - def output_shape_fn(input_shape): - return input_shape - - l = keras.layers.Lambda(lambda_fn, output_shape=output_shape_fn) - output_shape = l.compute_output_shape([(10, 10), (10, 20)]) - self.assertAllEqual([(10, 10), (10, 20)], output_shape) - - def test_lambda_output_shape_nested(self): - - def lambda_fn(inputs): - return (inputs[1]['a'], {'b': inputs[0]}) - - l = keras.layers.Lambda(lambda_fn) - output_shape = l.compute_output_shape(((10, 20), {'a': (10, 5)})) - self.assertAllEqual(((10, 5), {'b': (10, 20)}), output_shape) - - def test_lambda_config_serialization(self): - # Test serialization with output_shape and output_shape_type - layer = keras.layers.Lambda( - lambda x: x + 1, output_shape=(1, 1), mask=lambda i, m: m) - layer(keras.backend.variable(np.ones((1, 1)))) - config = layer.get_config() - - layer = keras.layers.deserialize({'class_name': 'Lambda', 'config': config}) - self.assertAllEqual(layer.function(1), 2) - self.assertAllEqual(layer._output_shape, (1, 1)) - self.assertAllEqual(layer.mask(1, True), True) - - layer = keras.layers.Lambda.from_config(config) - self.assertAllEqual(layer.function(1), 2) - self.assertAllEqual(layer._output_shape, (1, 1)) - self.assertAllEqual(layer.mask(1, True), True) - - def test_lambda_with_training_arg(self): - - def fn(x, training=True): - return keras.backend.in_train_phase(x, 2 * x, training=training) - - layer = keras.layers.Lambda(fn) - x = keras.backend.ones(()) - train_out = layer(x, training=True) - eval_out = layer(x, training=False) - - self.assertEqual(keras.backend.get_value(train_out), 1.) - self.assertEqual(keras.backend.get_value(eval_out), 2.) - - def test_lambda_with_mask(self): - - def add_one(inputs): - return inputs + 1.0 - - def mask(unused_inputs, previous_mask): - return previous_mask - - layer = keras.layers.Lambda(add_one, mask=mask) - x = np.ones([5, 4, 3]) - x[:, -1, :] = 0 - masking = keras.layers.Masking() - out = layer(masking(x)) - - expected_out = np.full([5, 4, 3], 2.0) - expected_out[:, -1, :] = 1.0 - expected_mask = np.ones([5, 4]) - expected_mask[:, -1] = 0.0 - - self.assertAllClose(self.evaluate(out), expected_out) - self.assertIsNotNone(out._keras_mask) - self.assertAllClose(self.evaluate(out._keras_mask), expected_mask) - - def test_lambda_with_ragged_input(self): - - def add_one(inputs): - return inputs + 1.0 - - layer = keras.layers.Lambda(add_one) - - ragged_input = tf.ragged.constant([[1.0], [2.0, 3.0]]) - out = layer(ragged_input) - expected_out = tf.ragged.constant([[2.0], [3.0, 4.0]]) - self.assertAllClose(out, expected_out) - - def test_lambda_deserialization_does_not_pollute_core(self): - layer = keras.layers.Lambda(lambda x: x + 1) - config = layer.get_config() - keras.layers.Lambda.from_config(config) - self.assertNotIn(self.__class__.__name__, dir(core)) + def test_lambda(self): + with SafeModeScope(safe_mode=False): + test_utils.layer_test( + keras.layers.Lambda, + kwargs={"function": lambda x: x + 1}, + input_shape=(3, 2), + ) + + test_utils.layer_test( + keras.layers.Lambda, + kwargs={ + "function": lambda x, a, b: x * a + b, + "arguments": {"a": 0.6, "b": 0.4}, + }, + input_shape=(3, 2), + ) + + # test serialization with function + def f(x): + return x + 1 + + ld = keras.layers.Lambda(f) + config = ld.get_config() + with SafeModeScope(safe_mode=False): + ld = keras.layers.deserialize( + {"class_name": "Lambda", "config": config} + ) + self.assertEqual(ld.function(3), 4) + + # test with lambda + ld = keras.layers.Lambda( + lambda x: keras.backend.concatenate([tf.square(x), x]) + ) + config = ld.get_config() + ld = keras.layers.Lambda.from_config(config) + self.assertAllEqual(self.evaluate(ld.function([3])), [9, 3]) + + def test_lambda_multiple_inputs(self): + ld = keras.layers.Lambda(lambda x: x[0], output_shape=lambda x: x[0]) + x1 = np.ones([3, 2], np.float32) + x2 = np.ones([3, 5], np.float32) + out = ld([x1, x2]) + self.assertAllEqual(out.shape, [3, 2]) + + def test_lambda_output_shape(self): + l = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1)) + l(keras.backend.variable(np.ones((1, 1)))) + self.assertEqual((1, 1), l.get_config()["output_shape"]) + + def test_lambda_output_shape_function(self): + def get_output_shape(input_shape): + return 1 * input_shape + + l = keras.layers.Lambda(lambda x: x + 1, output_shape=get_output_shape) + l(keras.backend.variable(np.ones((1, 1)))) + self.assertEqual("lambda", l.get_config()["output_shape_type"]) + + def test_lambda_output_shape_autocalculate_multiple_inputs(self): + def lambda_fn(x): + return tf.matmul(x[0], x[1]) + + l = keras.layers.Lambda(lambda_fn, dtype=tf.float64) + output_shape = l.compute_output_shape([(10, 10), (10, 20)]) + self.assertAllEqual((10, 20), output_shape) + output_signature = l.compute_output_signature( + [ + tf.TensorSpec(dtype=tf.float64, shape=(10, 10)), + tf.TensorSpec(dtype=tf.float64, shape=(10, 20)), + ] + ) + self.assertAllEqual((10, 20), output_signature.shape) + self.assertAllEqual(tf.float64, output_signature.dtype) + + def test_lambda_output_shape_list_multiple_outputs(self): + def lambda_fn(x): + return x + + l = keras.layers.Lambda(lambda_fn, output_shape=[(10,), (20,)]) + output_shape = l.compute_output_shape([(10, 10), (10, 20)]) + self.assertAllEqual([(10, 10), (10, 20)], output_shape) + + def test_lambda_output_shape_tuple_with_none(self): + def lambda_fn(x): + return x + + l = keras.layers.Lambda(lambda_fn, output_shape=(None, 10)) + output_shape = l.compute_output_shape((5, 10, 20)) + self.assertAllEqual([5, None, 10], output_shape.as_list()) + + def test_lambda_output_shape_function_multiple_outputs(self): + def lambda_fn(x): + return x + + def output_shape_fn(input_shape): + return input_shape + + l = keras.layers.Lambda(lambda_fn, output_shape=output_shape_fn) + output_shape = l.compute_output_shape([(10, 10), (10, 20)]) + self.assertAllEqual([(10, 10), (10, 20)], output_shape) + + def test_lambda_output_shape_nested(self): + def lambda_fn(inputs): + return (inputs[1]["a"], {"b": inputs[0]}) + + l = keras.layers.Lambda(lambda_fn) + output_shape = l.compute_output_shape(((10, 20), {"a": (10, 5)})) + self.assertAllEqual(((10, 5), {"b": (10, 20)}), output_shape) + + def test_lambda_config_serialization(self): + # Test serialization with output_shape and output_shape_type + layer = keras.layers.Lambda( + lambda x: x + 1, output_shape=(1, 1), mask=lambda i, m: m + ) + layer(keras.backend.variable(np.ones((1, 1)))) + config = layer.get_config() + + with SafeModeScope(safe_mode=False): + layer = keras.layers.deserialize( + {"class_name": "Lambda", "config": config} + ) + self.assertAllEqual(layer.function(1), 2) + self.assertAllEqual(layer._output_shape, (1, 1)) + self.assertAllEqual(layer.mask(1, True), True) + + layer = keras.layers.Lambda.from_config(config) + self.assertAllEqual(layer.function(1), 2) + self.assertAllEqual(layer._output_shape, (1, 1)) + self.assertAllEqual(layer.mask(1, True), True) + + def test_lambda_with_training_arg(self): + def fn(x, training=True): + return keras.backend.in_train_phase(x, 2 * x, training=training) + + layer = keras.layers.Lambda(fn) + x = keras.backend.ones(()) + train_out = layer(x, training=True) + eval_out = layer(x, training=False) + + self.assertEqual(keras.backend.get_value(train_out), 1.0) + self.assertEqual(keras.backend.get_value(eval_out), 2.0) + + def test_lambda_with_mask(self): + def add_one(inputs): + return inputs + 1.0 + + def mask(unused_inputs, previous_mask): + return previous_mask + + layer = keras.layers.Lambda(add_one, mask=mask) + x = np.ones([5, 4, 3]) + x[:, -1, :] = 0 + masking = keras.layers.Masking() + out = layer(masking(x)) + + expected_out = np.full([5, 4, 3], 2.0) + expected_out[:, -1, :] = 1.0 + expected_mask = np.ones([5, 4]) + expected_mask[:, -1] = 0.0 + + self.assertAllClose(self.evaluate(out), expected_out) + self.assertIsNotNone(out._keras_mask) + self.assertAllClose(self.evaluate(out._keras_mask), expected_mask) + + def test_lambda_with_ragged_input(self): + def add_one(inputs): + return inputs + 1.0 + + layer = keras.layers.Lambda(add_one) + + ragged_input = tf.ragged.constant([[1.0], [2.0, 3.0]]) + out = layer(ragged_input) + expected_out = tf.ragged.constant([[2.0], [3.0, 4.0]]) + self.assertAllClose(out, expected_out) + + def test_lambda_deserialization_does_not_pollute_core(self): + layer = keras.layers.Lambda(lambda x: x + 1) + config = layer.get_config() + keras.layers.Lambda.from_config(config) + self.assertNotIn(self.__class__.__name__, dir(core)) class TestStatefulLambda(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_lambda_with_variable_in_model(self): - v = tf.Variable(1., trainable=True) - - def lambda_fn(x, v): - return x * v - - # While it is generally not advised to mix Variables with Lambda layers, if - # the variables are explicitly set as attributes then they are still - # tracked. This is consistent with the base Layer behavior. - layer = keras.layers.Lambda(lambda_fn, arguments={'v': v}) - self.assertLen(layer.trainable_weights, 0) - layer.v = v - self.assertLen(layer.trainable_weights, 1) - - model = test_utils.get_model_from_layers([layer], input_shape=(10,)) - model.compile( - keras.optimizers.optimizer_v2.gradient_descent.SGD(0.1), - 'mae', - run_eagerly=test_utils.should_run_eagerly()) - x, y = np.ones((10, 10), 'float32'), 2 * np.ones((10, 10), 'float32') - model.fit(x, y, batch_size=2, epochs=2, validation_data=(x, y)) - self.assertLen(model.trainable_weights, 1) - self.assertAllClose(keras.backend.get_value(model.trainable_weights[0]), 2.) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_creation_inside_lambda(self): - - def lambda_fn(x): - scale = tf.Variable(1., trainable=True, name='scale') - shift = tf.Variable(1., trainable=True, name='shift') - return x * scale + shift - - expected_error = textwrap.dedent(r""" - ( )?The following Variables were created within a Lambda layer \(shift_and_scale\) - ( )?but are not tracked by said layer: - ( )? >> # Create a `Sequential` model and add a Dense layer as the first layer. - >>> model = tf.keras.models.Sequential() - >>> model.add(tf.keras.Input(shape=(16,))) - >>> model.add(tf.keras.layers.Dense(32, activation='relu')) - >>> # Now the model will take as input arrays of shape (None, 16) - >>> # and output arrays of shape (None, 32). - >>> # Note that after the first layer, you don't need to specify - >>> # the size of the input anymore: - >>> model.add(tf.keras.layers.Dense(32)) - >>> model.output_shape - (None, 32) + >>> # Create a `Sequential` model and add a Dense layer as the first layer. + >>> model = tf.keras.models.Sequential() + >>> model.add(tf.keras.Input(shape=(16,))) + >>> model.add(tf.keras.layers.Dense(32, activation='relu')) + >>> # Now the model will take as input arrays of shape (None, 16) + >>> # and output arrays of shape (None, 32). + >>> # Note that after the first layer, you don't need to specify + >>> # the size of the input anymore: + >>> model.add(tf.keras.layers.Dense(32)) + >>> model.output_shape + (None, 32) - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. - Input shape: - N-D tensor with shape: `(batch_size, ..., input_dim)`. - The most common situation would be - a 2D input with shape `(batch_size, input_dim)`. + Input shape: + N-D tensor with shape: `(batch_size, ..., input_dim)`. + The most common situation would be + a 2D input with shape `(batch_size, input_dim)`. - Output shape: - N-D tensor with shape: `(batch_size, ..., units)`. - For instance, for a 2D input with shape `(batch_size, input_dim)`, - the output would have shape `(batch_size, units)`. - """ + Output shape: + N-D tensor with shape: `(batch_size, ..., units)`. + For instance, for a 2D input with shape `(batch_size, input_dim)`, + the output would have shape `(batch_size, units)`. + """ - @utils.allow_initializer_layout - def __init__(self, - units, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__( - activity_regularizer=activity_regularizer, **kwargs) + @utils.allow_initializer_layout + def __init__( + self, + units, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__(activity_regularizer=activity_regularizer, **kwargs) - self.units = int(units) if not isinstance(units, int) else units - if self.units < 0: - raise ValueError(f'Received an invalid value for `units`, expected ' - f'a positive integer. Received: units={units}') - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) + self.units = int(units) if not isinstance(units, int) else units + if self.units < 0: + raise ValueError( + "Received an invalid value for `units`, expected " + f"a positive integer. Received: units={units}" + ) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(min_ndim=2) - self.supports_masking = True + self.input_spec = InputSpec(min_ndim=2) + self.supports_masking = True - def build(self, input_shape): - dtype = tf.as_dtype(self.dtype or backend.floatx()) - if not (dtype.is_floating or dtype.is_complex): - raise TypeError('A Dense layer can only be built with a floating-point ' - f'dtype. Received: dtype={dtype}') + def build(self, input_shape): + dtype = tf.as_dtype(self.dtype or backend.floatx()) + if not (dtype.is_floating or dtype.is_complex): + raise TypeError( + "A Dense layer can only be built with a floating-point " + f"dtype. Received: dtype={dtype}" + ) - input_shape = tf.TensorShape(input_shape) - last_dim = tf.compat.dimension_value(input_shape[-1]) - if last_dim is None: - raise ValueError('The last dimension of the inputs to a Dense layer ' - 'should be defined. Found None. ' - f'Full input shape received: {input_shape}') - self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim}) - self.kernel = self.add_weight( - 'kernel', - shape=[last_dim, self.units], - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - dtype=self.dtype, - trainable=True) - if self.use_bias: - self.bias = self.add_weight( - 'bias', - shape=[self.units,], - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - dtype=self.dtype, - trainable=True) - else: - self.bias = None - self.built = True + input_shape = tf.TensorShape(input_shape) + last_dim = tf.compat.dimension_value(input_shape[-1]) + if last_dim is None: + raise ValueError( + "The last dimension of the inputs to a Dense layer " + "should be defined. Found None. " + f"Full input shape received: {input_shape}" + ) + self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim}) + self.kernel = self.add_weight( + "kernel", + shape=[last_dim, self.units], + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + dtype=self.dtype, + trainable=True, + ) + if self.use_bias: + self.bias = self.add_weight( + "bias", + shape=[ + self.units, + ], + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + dtype=self.dtype, + trainable=True, + ) + else: + self.bias = None + self.built = True - def call(self, inputs): - if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: - inputs = tf.cast(inputs, dtype=self._compute_dtype_object) + def call(self, inputs): + if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: + inputs = tf.cast(inputs, dtype=self._compute_dtype_object) - is_ragged = isinstance(inputs, tf.RaggedTensor) - if is_ragged: - # In case we encounter a RaggedTensor with a fixed last dimension (last - # dimension not ragged), we can flatten the input and restore the ragged - # dimensions at the end. - if tf.compat.dimension_value(inputs.shape[-1]) is None: - raise ValueError('Dense layer only supports RaggedTensors when the ' - 'innermost dimension is non-ragged. Received: ' - f'inputs.shape={inputs.shape}.') - original_inputs = inputs - if inputs.flat_values.shape.rank > 1: - inputs = inputs.flat_values - else: - # Innermost partition is encoded using uniform_row_length. - # (This is unusual, but we can handle it.) - if inputs.shape.rank == 2: - inputs = inputs.to_tensor() - is_ragged = False - else: - for _ in range(original_inputs.ragged_rank - 1): - inputs = inputs.values - inputs = inputs.to_tensor() - original_inputs = tf.RaggedTensor.from_nested_row_splits( - inputs, original_inputs.nested_row_splits[:-1]) + is_ragged = isinstance(inputs, tf.RaggedTensor) + if is_ragged: + # In case we encounter a RaggedTensor with a fixed last dimension + # (last dimension not ragged), we can flatten the input and restore + # the ragged dimensions at the end. + if tf.compat.dimension_value(inputs.shape[-1]) is None: + raise ValueError( + "Dense layer only supports RaggedTensors when the " + "innermost dimension is non-ragged. Received: " + f"inputs.shape={inputs.shape}." + ) + original_inputs = inputs + if inputs.flat_values.shape.rank > 1: + inputs = inputs.flat_values + else: + # Innermost partition is encoded using uniform_row_length. + # (This is unusual, but we can handle it.) + if inputs.shape.rank == 2: + inputs = inputs.to_tensor() + is_ragged = False + else: + for _ in range(original_inputs.ragged_rank - 1): + inputs = inputs.values + inputs = inputs.to_tensor() + original_inputs = tf.RaggedTensor.from_nested_row_splits( + inputs, original_inputs.nested_row_splits[:-1] + ) - rank = inputs.shape.rank - if rank == 2 or rank is None: - # We use embedding_lookup_sparse as a more efficient matmul operation for - # large sparse input tensors. The op will result in a sparse gradient, as - # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense - # gradients. This can lead to sigfinicant speedups, see b/171762937. - if isinstance(inputs, tf.SparseTensor): - # We need to fill empty rows, as the op assumes at least one id per row. - inputs, _ = tf.sparse.fill_empty_rows(inputs, 0) - # We need to do some munging of our input to use the embedding lookup as - # a matrix multiply. We split our input matrix into separate ids and - # weights tensors. The values of the ids tensor should be the column - # indices of our input matrix and the values of the weights tensor - # can continue to the actual matrix weights. - # The column arrangement of ids and weights - # will be summed over and does not matter. See the documentation for - # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation - # of the inputs to both ops. - ids = tf.SparseTensor( - indices=inputs.indices, - values=inputs.indices[:, 1], - dense_shape=inputs.dense_shape) - weights = inputs - outputs = tf.nn.embedding_lookup_sparse( - self.kernel, ids, weights, combiner='sum') - else: - outputs = tf.matmul(a=inputs, b=self.kernel) - # Broadcast kernel to inputs. - else: - outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]]) - # Reshape the output back to the original ndim of the input. - if not tf.executing_eagerly(): - shape = inputs.shape.as_list() - output_shape = shape[:-1] + [self.kernel.shape[-1]] - outputs.set_shape(output_shape) + rank = inputs.shape.rank + if rank == 2 or rank is None: + # We use embedding_lookup_sparse as a more efficient matmul + # operation for large sparse input tensors. The op will result in a + # sparse gradient, as opposed to + # sparse_ops.sparse_tensor_dense_matmul which results in dense + # gradients. This can lead to sigfinicant speedups, see b/171762937. + if isinstance(inputs, tf.SparseTensor): + # We need to fill empty rows, as the op assumes at least one id + # per row. + inputs, _ = tf.sparse.fill_empty_rows(inputs, 0) + # We need to do some munging of our input to use the embedding + # lookup as a matrix multiply. We split our input matrix into + # separate ids and weights tensors. The values of the ids tensor + # should be the column indices of our input matrix and the + # values of the weights tensor can continue to the actual matrix + # weights. The column arrangement of ids and weights will be + # summed over and does not matter. See the documentation for + # sparse_ops.sparse_tensor_dense_matmul a more detailed + # explanation of the inputs to both ops. + ids = tf.SparseTensor( + indices=inputs.indices, + values=inputs.indices[:, 1], + dense_shape=inputs.dense_shape, + ) + weights = inputs + outputs = tf.nn.embedding_lookup_sparse( + self.kernel, ids, weights, combiner="sum" + ) + else: + outputs = tf.matmul(a=inputs, b=self.kernel) + # Broadcast kernel to inputs. + else: + outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]]) + # Reshape the output back to the original ndim of the input. + if not tf.executing_eagerly(): + shape = inputs.shape.as_list() + output_shape = shape[:-1] + [self.kernel.shape[-1]] + outputs.set_shape(output_shape) - if self.use_bias: - outputs = tf.nn.bias_add(outputs, self.bias) + if self.use_bias: + outputs = tf.nn.bias_add(outputs, self.bias) - if self.activation is not None: - outputs = self.activation(outputs) + if self.activation is not None: + outputs = self.activation(outputs) - if is_ragged: - outputs = original_inputs.with_flat_values(outputs) + if is_ragged: + outputs = original_inputs.with_flat_values(outputs) - return outputs + return outputs - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape) - input_shape = input_shape.with_rank_at_least(2) - if tf.compat.dimension_value(input_shape[-1]) is None: - raise ValueError('The last dimension of the input shape of a Dense layer ' - 'should be defined. Found None. ' - f'Received: input_shape={input_shape}') - return input_shape[:-1].concatenate(self.units) + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape) + input_shape = input_shape.with_rank_at_least(2) + if tf.compat.dimension_value(input_shape[-1]) is None: + raise ValueError( + "The last dimension of the input shape of a Dense layer " + "should be defined. Found None. " + f"Received: input_shape={input_shape}" + ) + return input_shape[:-1].concatenate(self.units) - def get_config(self): - config = super().get_config() - config.update({ - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - }) - return config + def get_config(self): + config = super().get_config() + config.update( + { + "units": self.units, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize( + self.bias_initializer + ), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize( + self.bias_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize( + self.kernel_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + ) + return config diff --git a/keras/layers/core/einsum_dense.py b/keras/layers/core/einsum_dense.py index f46d1581a45e..e1d3ca334c00 100644 --- a/keras/layers/core/einsum_dense.py +++ b/keras/layers/core/einsum_dense.py @@ -13,317 +13,349 @@ # limitations under the License. # ============================================================================== """Keras-based einsum dense layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import re +import tensorflow.compat.v2 as tf + from keras import activations from keras import constraints from keras import initializers from keras import regularizers from keras.engine.base_layer import Layer -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export("keras.layers.EinsumDense", - "keras.layers.experimental.EinsumDense") +@keras_export( + "keras.layers.EinsumDense", "keras.layers.experimental.EinsumDense" +) class EinsumDense(Layer): - """A layer that uses `tf.einsum` as the backing computation. - - This layer can perform einsum calculations of arbitrary dimensionality. - - Args: - equation: An equation describing the einsum to perform. This equation must - be a valid einsum string of the form `ab,bc->ac`, `...ab,bc->...ac`, or - `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum axis - expression sequence. - output_shape: The expected shape of the output tensor (excluding the batch - dimension and any dimensions represented by ellipses). You can specify - None for any dimension that is unknown or can be inferred from the input - shape. - activation: Activation function to use. If you don't specify anything, no - activation is applied (that is, a "linear" activation: `a(x) = x`). - bias_axes: A string containing the output dimension(s) to apply a bias to. - Each character in the `bias_axes` string should correspond to a character - in the output portion of the `equation` string. - kernel_initializer: Initializer for the `kernel` weights matrix. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - bias_constraint: Constraint function applied to the bias vector. - - Examples: - - **Biased dense layer with einsums** - - This example shows how to instantiate a standard Keras dense layer using - einsum operations. This example is equivalent to - `tf.keras.layers.Dense(64, use_bias=True)`. - - >>> layer = tf.keras.layers.EinsumDense("ab,bc->ac", - ... output_shape=64, - ... bias_axes="c") - >>> input_tensor = tf.keras.Input(shape=[32]) - >>> output_tensor = layer(input_tensor) - >>> output_tensor - <... shape=(None, 64) dtype=...> - - **Applying a dense layer to a sequence** - - This example shows how to instantiate a layer that applies the same dense - operation to every element in a sequence. Here, the `output_shape` has two - values (since there are two non-batch dimensions in the output); the first - dimension in the `output_shape` is `None`, because the sequence dimension `b` - has an unknown shape. - - >>> layer = tf.keras.layers.EinsumDense("abc,cd->abd", - ... output_shape=(None, 64), - ... bias_axes="d") - >>> input_tensor = tf.keras.Input(shape=[32, 128]) - >>> output_tensor = layer(input_tensor) - >>> output_tensor - <... shape=(None, 32, 64) dtype=...> - - **Applying a dense layer to a sequence using ellipses** - - This example shows how to instantiate a layer that applies the same dense - operation to every element in a sequence, but uses the ellipsis notation - instead of specifying the batch and sequence dimensions. - - Because we are using ellipsis notation and have specified only one axis, the - `output_shape` arg is a single value. When instantiated in this way, the layer - can handle any number of sequence dimensions - including the case where no - sequence dimension exists. - - >>> layer = tf.keras.layers.EinsumDense("...x,xy->...y", - ... output_shape=64, - ... bias_axes="y") - >>> input_tensor = tf.keras.Input(shape=[32, 128]) - >>> output_tensor = layer(input_tensor) - >>> output_tensor - <... shape=(None, 32, 64) dtype=...> - """ - - def __init__(self, - equation, - output_shape, - activation=None, - bias_axes=None, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super().__init__(**kwargs) - self.equation = equation - if isinstance(output_shape, int): - self.partial_output_shape = [output_shape] - else: - self.partial_output_shape = list(output_shape) - self.bias_axes = bias_axes - self.activation = activations.get(activation) - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - shape_data = _analyze_einsum_string(self.equation, - self.bias_axes, - input_shape, - self.partial_output_shape) - kernel_shape, bias_shape, self.full_output_shape = shape_data - self.kernel = self.add_weight( - "kernel", - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - dtype=self.dtype, - trainable=True) - - if bias_shape is not None: - self.bias = self.add_weight( - "bias", - shape=bias_shape, - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - dtype=self.dtype, - trainable=True) - else: - self.bias = None - super().build(input_shape) - - def compute_output_shape(self, _): - return tf.TensorShape(self.full_output_shape) - - def get_config(self): - config = { - "output_shape": self.partial_output_shape, - "equation": self.equation, - "activation": activations.serialize(self.activation), - "bias_axes": self.bias_axes, - "kernel_initializer": initializers.serialize(self.kernel_initializer), - "bias_initializer": initializers.serialize(self.bias_initializer), - "kernel_regularizer": regularizers.serialize(self.kernel_regularizer), - "bias_regularizer": regularizers.serialize(self.bias_regularizer), - "activity_regularizer": - regularizers.serialize(self.activity_regularizer), - "kernel_constraint": constraints.serialize(self.kernel_constraint), - "bias_constraint": constraints.serialize(self.bias_constraint), - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def call(self, inputs): - ret = tf.einsum(self.equation, inputs, self.kernel) - if self.bias is not None: - ret += self.bias - if self.activation is not None: - ret = self.activation(ret) - return ret + """A layer that uses `tf.einsum` as the backing computation. + + This layer can perform einsum calculations of arbitrary dimensionality. + + Args: + equation: An equation describing the einsum to perform. This equation must + be a valid einsum string of the form `ab,bc->ac`, `...ab,bc->...ac`, or + `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum + axis expression sequence. + output_shape: The expected shape of the output tensor (excluding the batch + dimension and any dimensions represented by ellipses). You can specify + None for any dimension that is unknown or can be inferred from the input + shape. + activation: Activation function to use. If you don't specify anything, no + activation is applied (that is, a "linear" activation: `a(x) = x`). + bias_axes: A string containing the output dimension(s) to apply a bias to. + Each character in the `bias_axes` string should correspond to a + character in the output portion of the `equation` string. + kernel_initializer: Initializer for the `kernel` weights matrix. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + bias_constraint: Constraint function applied to the bias vector. + + Examples: + + **Biased dense layer with einsums** + + This example shows how to instantiate a standard Keras dense layer using + einsum operations. This example is equivalent to + `tf.keras.layers.Dense(64, use_bias=True)`. + + >>> layer = tf.keras.layers.EinsumDense("ab,bc->ac", + ... output_shape=64, + ... bias_axes="c") + >>> input_tensor = tf.keras.Input(shape=[32]) + >>> output_tensor = layer(input_tensor) + >>> output_tensor + <... shape=(None, 64) dtype=...> + + **Applying a dense layer to a sequence** + + This example shows how to instantiate a layer that applies the same dense + operation to every element in a sequence. Here, the `output_shape` has two + values (since there are two non-batch dimensions in the output); the first + dimension in the `output_shape` is `None`, because the sequence dimension + `b` has an unknown shape. + + >>> layer = tf.keras.layers.EinsumDense("abc,cd->abd", + ... output_shape=(None, 64), + ... bias_axes="d") + >>> input_tensor = tf.keras.Input(shape=[32, 128]) + >>> output_tensor = layer(input_tensor) + >>> output_tensor + <... shape=(None, 32, 64) dtype=...> + + **Applying a dense layer to a sequence using ellipses** + + This example shows how to instantiate a layer that applies the same dense + operation to every element in a sequence, but uses the ellipsis notation + instead of specifying the batch and sequence dimensions. + + Because we are using ellipsis notation and have specified only one axis, the + `output_shape` arg is a single value. When instantiated in this way, the + layer can handle any number of sequence dimensions - including the case + where no sequence dimension exists. + + >>> layer = tf.keras.layers.EinsumDense("...x,xy->...y", + ... output_shape=64, + ... bias_axes="y") + >>> input_tensor = tf.keras.Input(shape=[32, 128]) + >>> output_tensor = layer(input_tensor) + >>> output_tensor + <... shape=(None, 32, 64) dtype=...> + """ + + def __init__( + self, + equation, + output_shape, + activation=None, + bias_axes=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__(**kwargs) + self.equation = equation + if isinstance(output_shape, int): + self.partial_output_shape = [output_shape] + else: + self.partial_output_shape = list(output_shape) + self.bias_axes = bias_axes + self.activation = activations.get(activation) + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + shape_data = _analyze_einsum_string( + self.equation, + self.bias_axes, + input_shape, + self.partial_output_shape, + ) + kernel_shape, bias_shape, self.full_output_shape = shape_data + self.kernel = self.add_weight( + "kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + dtype=self.dtype, + trainable=True, + ) + + if bias_shape is not None: + self.bias = self.add_weight( + "bias", + shape=bias_shape, + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + dtype=self.dtype, + trainable=True, + ) + else: + self.bias = None + super().build(input_shape) + + def compute_output_shape(self, _): + return tf.TensorShape(self.full_output_shape) + + def get_config(self): + config = { + "output_shape": self.partial_output_shape, + "equation": self.equation, + "activation": activations.serialize(self.activation), + "bias_axes": self.bias_axes, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs): + ret = tf.einsum(self.equation, inputs, self.kernel) + if self.bias is not None: + ret += self.bias + if self.activation is not None: + ret = self.activation(ret) + return ret def _analyze_einsum_string(equation, bias_axes, input_shape, output_shape): - """Analyzes an einsum string to determine the required weight shape.""" - - dot_replaced_string = re.sub(r"\.\.\.", "0", equation) - - # This is the case where no ellipses are present in the string. - split_string = re.match("([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)", - dot_replaced_string) - if split_string: - return _analyze_split_string(split_string, bias_axes, input_shape, - output_shape) - - # This is the case where ellipses are present on the left. - split_string = re.match("0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)", - dot_replaced_string) - if split_string: - return _analyze_split_string( - split_string, bias_axes, input_shape, output_shape, left_elided=True) - - # This is the case where ellipses are present on the right. - split_string = re.match("([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0", - dot_replaced_string) - if split_string: - return _analyze_split_string(split_string, bias_axes, input_shape, - output_shape) - - raise ValueError( - f"Invalid einsum equation '{equation}'. Equations must be in the form " - "[X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]....") - - -def _analyze_split_string(split_string, - bias_axes, - input_shape, - output_shape, - left_elided=False): - """Analyze an pre-split einsum string to find the weight shape.""" - input_spec = split_string.group(1) - weight_spec = split_string.group(2) - output_spec = split_string.group(3) - elided = len(input_shape) - len(input_spec) - - if isinstance(output_shape, int): - output_shape = [output_shape] - else: - output_shape = list(output_shape) - - output_shape.insert(0, input_shape[0]) - - if elided > 0 and left_elided: - for i in range(1, elided): - # We already inserted the 0th input dimension at dim 0, so we need to - # start at location 1 here. - output_shape.insert(1, input_shape[i]) - elif elided > 0 and not left_elided: - for i in range(len(input_shape) - elided, len(input_shape)): - output_shape.append(input_shape[i]) - - if left_elided: - # If we have beginning dimensions elided, we need to use negative indexing - # to determine where in the input dimension our values are. - input_dim_map = { - dim: (i + elided) - len(input_shape) for i, dim in enumerate(input_spec) - } - # Because we've constructed the full output shape already, we don't need - # to do negative indexing. - output_dim_map = {dim: (i + elided) for i, dim in enumerate(output_spec)} - else: - input_dim_map = {dim: i for i, dim in enumerate(input_spec)} - output_dim_map = {dim: i for i, dim in enumerate(output_spec)} - - for dim in input_spec: - input_shape_at_dim = input_shape[input_dim_map[dim]] - if dim in output_dim_map: - output_shape_at_dim = output_shape[output_dim_map[dim]] - if (output_shape_at_dim is not None and - output_shape_at_dim != input_shape_at_dim): - raise ValueError( - "Input shape and output shape do not match at shared " - f"dimension '{dim}'. Input shape is {input_shape_at_dim}, " - "and output shape " - f"is {output_shape[output_dim_map[dim]]}.") - - for dim in output_spec: - if dim not in input_spec and dim not in weight_spec: - raise ValueError( - f"Dimension '{dim}' was specified in the output '{output_spec}' but " - f"has no corresponding dim in the input spec '{input_spec}' or " - f"weight spec '{output_spec}'") - - weight_shape = [] - for dim in weight_spec: - if dim in input_dim_map: - weight_shape.append(input_shape[input_dim_map[dim]]) - elif dim in output_dim_map: - weight_shape.append(output_shape[output_dim_map[dim]]) + """Analyzes an einsum string to determine the required weight shape.""" + + dot_replaced_string = re.sub(r"\.\.\.", "0", equation) + + # This is the case where no ellipses are present in the string. + split_string = re.match( + "([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)", dot_replaced_string + ) + if split_string: + return _analyze_split_string( + split_string, bias_axes, input_shape, output_shape + ) + + # This is the case where ellipses are present on the left. + split_string = re.match( + "0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)", dot_replaced_string + ) + if split_string: + return _analyze_split_string( + split_string, bias_axes, input_shape, output_shape, left_elided=True + ) + + # This is the case where ellipses are present on the right. + split_string = re.match( + "([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0", dot_replaced_string + ) + if split_string: + return _analyze_split_string( + split_string, bias_axes, input_shape, output_shape + ) + + raise ValueError( + f"Invalid einsum equation '{equation}'. Equations must be in the form " + "[X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]...." + ) + + +def _analyze_split_string( + split_string, bias_axes, input_shape, output_shape, left_elided=False +): + """Analyze an pre-split einsum string to find the weight shape.""" + input_spec = split_string.group(1) + weight_spec = split_string.group(2) + output_spec = split_string.group(3) + elided = len(input_shape) - len(input_spec) + + if isinstance(output_shape, int): + output_shape = [output_shape] else: - raise ValueError( - f"Weight dimension '{dim}' did not have a match in either " - f"the input spec '{input_spec}' or the output spec '{output_spec}'. " - "For this layer, the weight must be fully specified.") - - if bias_axes is not None: - num_left_elided = elided if left_elided else 0 - idx_map = { - char: output_shape[i + num_left_elided] - for i, char in enumerate(output_spec) - } - - for char in bias_axes: - if char not in output_spec: - raise ValueError( - f"Bias dimension '{char}' was requested, but is not part " - f"of the output spec '{output_spec}'") - - first_bias_location = min([output_spec.find(char) for char in bias_axes]) - bias_output_spec = output_spec[first_bias_location:] - - bias_shape = [ - idx_map[char] if char in bias_axes else 1 for char in bias_output_spec - ] - - if not left_elided: - for _ in range(elided): - bias_shape.append(1) - else: - bias_shape = None - - return weight_shape, bias_shape, output_shape + output_shape = list(output_shape) + + output_shape.insert(0, input_shape[0]) + + if elided > 0 and left_elided: + for i in range(1, elided): + # We already inserted the 0th input dimension at dim 0, so we need + # to start at location 1 here. + output_shape.insert(1, input_shape[i]) + elif elided > 0 and not left_elided: + for i in range(len(input_shape) - elided, len(input_shape)): + output_shape.append(input_shape[i]) + + if left_elided: + # If we have beginning dimensions elided, we need to use negative + # indexing to determine where in the input dimension our values are. + input_dim_map = { + dim: (i + elided) - len(input_shape) + for i, dim in enumerate(input_spec) + } + # Because we've constructed the full output shape already, we don't need + # to do negative indexing. + output_dim_map = { + dim: (i + elided) for i, dim in enumerate(output_spec) + } + else: + input_dim_map = {dim: i for i, dim in enumerate(input_spec)} + output_dim_map = {dim: i for i, dim in enumerate(output_spec)} + + for dim in input_spec: + input_shape_at_dim = input_shape[input_dim_map[dim]] + if dim in output_dim_map: + output_shape_at_dim = output_shape[output_dim_map[dim]] + if ( + output_shape_at_dim is not None + and output_shape_at_dim != input_shape_at_dim + ): + raise ValueError( + "Input shape and output shape do not match at shared " + f"dimension '{dim}'. Input shape is {input_shape_at_dim}, " + "and output shape " + f"is {output_shape[output_dim_map[dim]]}." + ) + + for dim in output_spec: + if dim not in input_spec and dim not in weight_spec: + raise ValueError( + f"Dimension '{dim}' was specified in the output " + f"'{output_spec}' but has no corresponding dim in the input " + f"spec '{input_spec}' or weight spec '{output_spec}'" + ) + + weight_shape = [] + for dim in weight_spec: + if dim in input_dim_map: + weight_shape.append(input_shape[input_dim_map[dim]]) + elif dim in output_dim_map: + weight_shape.append(output_shape[output_dim_map[dim]]) + else: + raise ValueError( + f"Weight dimension '{dim}' did not have a match in either " + f"the input spec '{input_spec}' or the output " + f"spec '{output_spec}'. For this layer, the weight must " + "be fully specified." + ) + + if bias_axes is not None: + num_left_elided = elided if left_elided else 0 + idx_map = { + char: output_shape[i + num_left_elided] + for i, char in enumerate(output_spec) + } + + for char in bias_axes: + if char not in output_spec: + raise ValueError( + f"Bias dimension '{char}' was requested, but is not part " + f"of the output spec '{output_spec}'" + ) + + first_bias_location = min( + [output_spec.find(char) for char in bias_axes] + ) + bias_output_spec = output_spec[first_bias_location:] + + bias_shape = [ + idx_map[char] if char in bias_axes else 1 + for char in bias_output_spec + ] + + if not left_elided: + for _ in range(elided): + bias_shape.append(1) + else: + bias_shape = None + + return weight_shape, bias_shape, output_shape diff --git a/keras/layers/core/einsum_dense_test.py b/keras/layers/core/einsum_dense_test.py index 3561ff4dce58..f2cb24457dfc 100644 --- a/keras/layers/core/einsum_dense_test.py +++ b/keras/layers/core/einsum_dense_test.py @@ -15,13 +15,14 @@ """Tests for Keras-based einsum dense layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.core import einsum_dense from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes @@ -34,8 +35,9 @@ "output_shape": [], "expected_weight_shape": [32], "expected_bias_shape": None, - "expected_output_shape": (None,) - }, { + "expected_output_shape": (None,), + }, + { "testcase_name": "_2d_middle_weight", "equation": "ab,bc->ac", "bias_axes": None, @@ -43,8 +45,9 @@ "output_shape": (64), "expected_weight_shape": [32, 64], "expected_bias_shape": None, - "expected_output_shape": (None, 64) - }, { + "expected_output_shape": (None, 64), + }, + { "testcase_name": "_3d_bert", "equation": "abc,cde->abde", "bias_axes": None, @@ -52,8 +55,9 @@ "output_shape": (1, 3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": None, - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_3d_3_bias", "equation": "abc,cde->abde", "bias_axes": "e", @@ -61,8 +65,9 @@ "output_shape": (1, 3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": [4], - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_3d_2_bias", "equation": "abc,cde->abde", "bias_axes": "d", @@ -70,8 +75,9 @@ "output_shape": (1, 3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": [3, 1], - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_3d_1_3_bias", "equation": "abc,cde->abde", "bias_axes": "be", @@ -79,8 +85,9 @@ "output_shape": (7, 3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": [7, 1, 4], - "expected_output_shape": (None, 7, 3, 4) - }, { + "expected_output_shape": (None, 7, 3, 4), + }, + { "testcase_name": "_3d_bert_projection", "equation": "BFNH,NHD->BFD", "bias_axes": None, @@ -88,8 +95,9 @@ "output_shape": (1, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": None, - "expected_output_shape": (None, 1, 4) - }, { + "expected_output_shape": (None, 1, 4), + }, + { "testcase_name": "_2d_bert", "equation": "abc,cd->abd", "bias_axes": None, @@ -97,8 +105,9 @@ "output_shape": (1, 4), "expected_weight_shape": [2, 4], "expected_bias_shape": None, - "expected_output_shape": (None, 1, 4) - }, { + "expected_output_shape": (None, 1, 4), + }, + { "testcase_name": "_embedding_1d", "equation": "i,d->id", "bias_axes": None, @@ -106,8 +115,9 @@ "output_shape": (2), "expected_weight_shape": [2], "expected_bias_shape": None, - "expected_output_shape": (None, 2) - }, { + "expected_output_shape": (None, 2), + }, + { "testcase_name": "_xlnet_lm", "equation": "ibd,nd->ibn", "bias_axes": None, @@ -115,8 +125,9 @@ "output_shape": (None, 2), "expected_weight_shape": [2, 1], "expected_bias_shape": None, - "expected_output_shape": (None, None, 2) - }, { + "expected_output_shape": (None, None, 2), + }, + { "testcase_name": "_2d_precast", "equation": "...b,bc->...c", "bias_axes": None, @@ -124,8 +135,9 @@ "output_shape": (64), "expected_weight_shape": [32, 64], "expected_bias_shape": None, - "expected_output_shape": (None, 64) - }, { + "expected_output_shape": (None, 64), + }, + { "testcase_name": "_2d_precast_elided_input_used_in_output", "equation": "...bc,bc->...b", "bias_axes": None, @@ -133,8 +145,9 @@ "output_shape": (32), "expected_weight_shape": [32, 64], "expected_bias_shape": None, - "expected_output_shape": (None, 32) - }, { + "expected_output_shape": (None, 32), + }, + { "testcase_name": "_2d_precast_multiple_elided_dims", "equation": "...b,bc->...c", "bias_axes": None, @@ -142,8 +155,9 @@ "output_shape": (64), "expected_weight_shape": [32, 64], "expected_bias_shape": None, - "expected_output_shape": (None, None, 64) - }, { + "expected_output_shape": (None, None, 64), + }, + { "testcase_name": "_3d_precast", "equation": "...c,cde->...de", "bias_axes": None, @@ -151,8 +165,9 @@ "output_shape": (3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": None, - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_3d_precast_3_bias", "equation": "...c,cde->...de", "bias_axes": "e", @@ -160,8 +175,9 @@ "output_shape": (3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": [4], - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_3d_precast_2_bias", "equation": "...c,cde->...de", "bias_axes": "d", @@ -169,8 +185,9 @@ "output_shape": (3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": [3, 1], - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_3d_precast_2_3_bias", "equation": "...c,cde->...de", "bias_axes": "de", @@ -178,8 +195,9 @@ "output_shape": (3, 4), "expected_weight_shape": [2, 3, 4], "expected_bias_shape": [3, 4], - "expected_output_shape": (None, 1, 3, 4) - }, { + "expected_output_shape": (None, 1, 3, 4), + }, + { "testcase_name": "_2d_postcast", "equation": "bc...,cd->bd...", "bias_axes": None, @@ -187,8 +205,9 @@ "output_shape": (4), "expected_weight_shape": [1, 4], "expected_bias_shape": None, - "expected_output_shape": (None, 4, 2, 3) - }, { + "expected_output_shape": (None, 4, 2, 3), + }, + { "testcase_name": "_3d_postcast", "equation": "bc...,cde->bde...", "bias_axes": None, @@ -196,8 +215,9 @@ "output_shape": (3, 4), "expected_weight_shape": [1, 3, 4], "expected_bias_shape": None, - "expected_output_shape": (None, 3, 4, 2) - }, { + "expected_output_shape": (None, 3, 4, 2), + }, + { "testcase_name": "_3d_postcast_1_bias", "equation": "bc...,cde->bde...", "bias_axes": "d", @@ -205,8 +225,9 @@ "output_shape": (3, 4), "expected_weight_shape": [1, 3, 4], "expected_bias_shape": [3, 1, 1], - "expected_output_shape": (None, 3, 4, 2) - }, { + "expected_output_shape": (None, 3, 4, 2), + }, + { "testcase_name": "_3d_postcast_2_bias", "equation": "bc...,cde->bde...", "bias_axes": "e", @@ -214,8 +235,9 @@ "output_shape": (3, 4), "expected_weight_shape": [1, 3, 4], "expected_bias_shape": [4, 1], - "expected_output_shape": (None, 3, 4, 2) - }, { + "expected_output_shape": (None, 3, 4, 2), + }, + { "testcase_name": "_3d_postcast_1_2_bias", "equation": "bc...,cde->bde...", "bias_axes": "de", @@ -223,96 +245,124 @@ "output_shape": (3, 4), "expected_weight_shape": [1, 3, 4], "expected_bias_shape": [3, 4, 1], - "expected_output_shape": (None, 3, 4, 2) - }) + "expected_output_shape": (None, 3, 4, 2), + }, +) class TestEinsumDenseLayer(test_combinations.TestCase): + def test_weight_shapes( + self, + equation, + bias_axes, + input_shape, + output_shape, + expected_weight_shape, + expected_bias_shape, + expected_output_shape, + ): + del expected_output_shape # Not used in this test. - def test_weight_shapes(self, equation, bias_axes, input_shape, output_shape, - expected_weight_shape, expected_bias_shape, - expected_output_shape): - del expected_output_shape # Not used in this test. + weight_shape, bias_shape, _ = einsum_dense._analyze_einsum_string( + equation, bias_axes, input_shape, output_shape + ) - weight_shape, bias_shape, _ = einsum_dense._analyze_einsum_string( - equation, bias_axes, input_shape, output_shape) + self.assertAllEqual(expected_weight_shape, weight_shape) + self.assertAllEqual(expected_bias_shape, bias_shape) - self.assertAllEqual(expected_weight_shape, weight_shape) - self.assertAllEqual(expected_bias_shape, bias_shape) + def test_layer_creation( + self, + equation, + bias_axes, + input_shape, + output_shape, + expected_weight_shape, + expected_bias_shape, + expected_output_shape, + ): + # Keras elides the 0-dimension of the input shape when constructing + # inputs. + non_batch_input_shape = list(input_shape)[1:] - def test_layer_creation(self, equation, bias_axes, input_shape, output_shape, - expected_weight_shape, expected_bias_shape, - expected_output_shape): - # Keras elides the 0-dimension of the input shape when constructing inputs. - non_batch_input_shape = list(input_shape)[1:] + input_tensor = keras.Input(shape=non_batch_input_shape) + layer = einsum_dense.EinsumDense( + equation=equation, output_shape=output_shape, bias_axes=bias_axes + ) + output_tensor = layer(input_tensor) - input_tensor = keras.Input(shape=non_batch_input_shape) - layer = einsum_dense.EinsumDense( - equation=equation, output_shape=output_shape, bias_axes=bias_axes) - output_tensor = layer(input_tensor) - - self.assertAllEqual(expected_weight_shape, layer.kernel.shape.as_list()) - if expected_bias_shape is None: - self.assertIsNone(layer.bias) - else: - self.assertAllEqual(expected_bias_shape, layer.bias.shape.as_list()) - self.assertAllEqual(expected_output_shape, output_tensor.shape.as_list()) + self.assertAllEqual(expected_weight_shape, layer.kernel.shape.as_list()) + if expected_bias_shape is None: + self.assertIsNone(layer.bias) + else: + self.assertAllEqual(expected_bias_shape, layer.bias.shape.as_list()) + self.assertAllEqual( + expected_output_shape, output_tensor.shape.as_list() + ) @test_combinations.run_all_keras_modes class TestEinsumLayerAPI(test_combinations.TestCase): + def test_layer_api(self): + input_data = np.array([[1.0, 2.0], [3.0, 4.0]]) + kwargs = { + "equation": "...b,bc->...c", + "bias_axes": "c", + "output_shape": 4, + "bias_initializer": keras.initializers.constant(0.03), + "kernel_initializer": keras.initializers.constant(0.5), + "dtype": input_data.dtype, + } + expected_output = np.array( + [[1.53, 1.53, 1.53, 1.53], [3.53, 3.53, 3.53, 3.53]] + ) - def test_layer_api(self): - input_data = np.array([[1.0, 2.0], [3.0, 4.0]]) - kwargs = { - "equation": "...b,bc->...c", - "bias_axes": "c", - "output_shape": 4, - "bias_initializer": keras.initializers.constant(0.03), - "kernel_initializer": keras.initializers.constant(0.5), - "dtype": input_data.dtype - } - expected_output = np.array([[1.53, 1.53, 1.53, 1.53], - [3.53, 3.53, 3.53, 3.53]]) - - output_data = test_utils.layer_test( - einsum_dense.EinsumDense, - kwargs=kwargs, - input_shape=(None, 2), - input_data=input_data) + output_data = test_utils.layer_test( + einsum_dense.EinsumDense, + kwargs=kwargs, + input_shape=(None, 2), + input_data=input_data, + ) - self.assertAllClose(expected_output, output_data) + self.assertAllClose(expected_output, output_data) - def test_unspecified_bias_dim_fails(self): - input_tensor = keras.Input(shape=(32,)) - layer = einsum_dense.EinsumDense( - equation="ab,bc->ac", output_shape=64, bias_axes="y") - with self.assertRaisesRegex( - ValueError, ".*is not part of the output spec.*"): - _ = layer(input_tensor) + def test_unspecified_bias_dim_fails(self): + input_tensor = keras.Input(shape=(32,)) + layer = einsum_dense.EinsumDense( + equation="ab,bc->ac", output_shape=64, bias_axes="y" + ) + with self.assertRaisesRegex( + ValueError, ".*is not part of the output spec.*" + ): + _ = layer(input_tensor) - def test_incompatible_input_output_shape_fails(self): - input_tensor = keras.Input(shape=(32, 64)) - layer = einsum_dense.EinsumDense( - equation="abc,cd->abd", output_shape=(10, 96)) - with self.assertRaisesRegex( - ValueError, ".*Input shape and output shape do not match at shared " - "dimension 'b'.*"): - _ = layer(input_tensor) + def test_incompatible_input_output_shape_fails(self): + input_tensor = keras.Input(shape=(32, 64)) + layer = einsum_dense.EinsumDense( + equation="abc,cd->abd", output_shape=(10, 96) + ) + with self.assertRaisesRegex( + ValueError, + ".*Input shape and output shape do not match at shared " + "dimension 'b'.*", + ): + _ = layer(input_tensor) - def test_unspecified_output_dim_fails(self): - input_tensor = keras.Input(shape=(32,)) - layer = einsum_dense.EinsumDense(equation="ab,bc->cd", output_shape=64) - with self.assertRaisesRegex( - ValueError, ".*Dimension 'd' was specified in the output 'cd' but has " - "no corresponding dim.*"): - _ = layer(input_tensor) + def test_unspecified_output_dim_fails(self): + input_tensor = keras.Input(shape=(32,)) + layer = einsum_dense.EinsumDense(equation="ab,bc->cd", output_shape=64) + with self.assertRaisesRegex( + ValueError, + ".*Dimension 'd' was specified in the output 'cd' but has " + "no corresponding dim.*", + ): + _ = layer(input_tensor) - def test_unspecified_weight_dim_fails(self): - input_tensor = keras.Input(shape=(32,)) - layer = einsum_dense.EinsumDense(equation="ab,zd->ad", output_shape=64) - with self.assertRaisesRegex(ValueError, - ".*Weight dimension 'z' did not have a match "): - _ = layer(input_tensor) + def test_unspecified_weight_dim_fails(self): + input_tensor = keras.Input(shape=(32,)) + layer = einsum_dense.EinsumDense(equation="ab,zd->ad", output_shape=64) + with self.assertRaisesRegex( + ValueError, ".*Weight dimension 'z' did not have a match " + ): + _ = layer(input_tensor) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/core/embedding.py b/keras/layers/core/embedding.py index 7af8bd18e002..cd75001b1247 100644 --- a/keras/layers/core/embedding.py +++ b/keras/layers/core/embedding.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Embedding layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras import constraints @@ -23,200 +25,282 @@ from keras.engine import base_layer_utils from keras.engine.base_layer import Layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Embedding') +@keras_export("keras.layers.Embedding") class Embedding(Layer): - """Turns positive integers (indexes) into dense vectors of fixed size. - - e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]` - - This layer can only be used on positive integer inputs of a fixed range. The - `tf.keras.layers.TextVectorization`, `tf.keras.layers.StringLookup`, - and `tf.keras.layers.IntegerLookup` preprocessing layers can help prepare - inputs for an `Embedding` layer. - - This layer accepts `tf.Tensor` and `tf.RaggedTensor` inputs. It cannot be - called with `tf.SparseTensor` input. - - Example: - - >>> model = tf.keras.Sequential() - >>> model.add(tf.keras.layers.Embedding(1000, 64, input_length=10)) - >>> # The model will take as input an integer matrix of size (batch, - >>> # input_length), and the largest integer (i.e. word index) in the input - >>> # should be no larger than 999 (vocabulary size). - >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch - >>> # dimension. - >>> input_array = np.random.randint(1000, size=(32, 10)) - >>> model.compile('rmsprop', 'mse') - >>> output_array = model.predict(input_array) - >>> print(output_array.shape) - (32, 10, 64) - - Args: - input_dim: Integer. Size of the vocabulary, - i.e. maximum integer index + 1. - output_dim: Integer. Dimension of the dense embedding. - embeddings_initializer: Initializer for the `embeddings` - matrix (see `keras.initializers`). - embeddings_regularizer: Regularizer function applied to - the `embeddings` matrix (see `keras.regularizers`). - embeddings_constraint: Constraint function applied to - the `embeddings` matrix (see `keras.constraints`). - mask_zero: Boolean, whether or not the input value 0 is a special "padding" - value that should be masked out. - This is useful when using recurrent layers - which may take variable length input. - If this is `True`, then all subsequent layers - in the model need to support masking or an exception will be raised. - If mask_zero is set to True, as a consequence, index 0 cannot be - used in the vocabulary (input_dim should equal size of - vocabulary + 1). - input_length: Length of input sequences, when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - - Input shape: - 2D tensor with shape: `(batch_size, input_length)`. - - Output shape: - 3D tensor with shape: `(batch_size, input_length, output_dim)`. - - **Note on variable placement:** - By default, if a GPU is available, the embedding matrix will be placed on - the GPU. This achieves the best performance, but it might cause issues: - - - You may be using an optimizer that does not support sparse GPU kernels. - In this case you will see an error upon training your model. - - Your embedding matrix may be too large to fit on your GPU. In this case - you will see an Out Of Memory (OOM) error. - - In such cases, you should place the embedding matrix on the CPU memory. - You can do so with a device scope, as such: - - ```python - with tf.device('cpu:0'): - embedding_layer = Embedding(...) - embedding_layer.build() - ``` - - The pre-built `embedding_layer` instance can then be added to a `Sequential` - model (e.g. `model.add(embedding_layer)`), called in a Functional model - (e.g. `x = embedding_layer(x)`), or used in a subclassed model. - """ - - @utils.allow_initializer_layout - def __init__(self, - input_dim, - output_dim, - embeddings_initializer='uniform', - embeddings_regularizer=None, - activity_regularizer=None, - embeddings_constraint=None, - mask_zero=False, - input_length=None, - **kwargs): - if 'input_shape' not in kwargs: - if input_length: - kwargs['input_shape'] = (input_length,) - else: - kwargs['input_shape'] = (None,) - if input_dim <= 0 or output_dim <= 0: - raise ValueError( - 'Both `input_dim` and `output_dim` should be positive, ' - f'Received input_dim = {input_dim} and output_dim = {output_dim}') - if (not base_layer_utils.v2_dtype_behavior_enabled() and - 'dtype' not in kwargs): - # In TF1, the dtype defaults to the input dtype which is typically int32, - # so explicitly set it to floatx - kwargs['dtype'] = backend.floatx() - # We set autocast to False, as we do not want to cast floating- point inputs - # to self.dtype. In call(), we cast to int32, and casting to self.dtype - # before casting to int32 might cause the int32 values to be different due - # to a loss of precision. - kwargs['autocast'] = False - super().__init__(**kwargs) - - self.input_dim = input_dim - self.output_dim = output_dim - self.embeddings_initializer = initializers.get(embeddings_initializer) - self.embeddings_regularizer = regularizers.get(embeddings_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.embeddings_constraint = constraints.get(embeddings_constraint) - self.mask_zero = mask_zero - self.supports_masking = mask_zero - self.input_length = input_length - - @tf_utils.shape_type_conversion - def build(self, input_shape=None): - self.embeddings = self.add_weight( - shape=(self.input_dim, self.output_dim), - initializer=self.embeddings_initializer, - name='embeddings', - regularizer=self.embeddings_regularizer, - constraint=self.embeddings_constraint, - experimental_autocast=False) - self.built = True - - def compute_mask(self, inputs, mask=None): - if not self.mask_zero: - return None - return tf.not_equal(inputs, 0) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if self.input_length is None: - return input_shape + (self.output_dim,) - else: - # input_length can be tuple if input is 3D or higher - if isinstance(self.input_length, (list, tuple)): - in_lens = list(self.input_length) - else: - in_lens = [self.input_length] - if len(in_lens) != len(input_shape) - 1: - raise ValueError( - f'"input_length" is {self.input_length}, but received input has ' - f'shape {input_shape}') - else: - for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): - if s1 is not None and s2 is not None and s1 != s2: + """Turns positive integers (indexes) into dense vectors of fixed size. + + e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]` + + This layer can only be used on positive integer inputs of a fixed range. The + `tf.keras.layers.TextVectorization`, `tf.keras.layers.StringLookup`, + and `tf.keras.layers.IntegerLookup` preprocessing layers can help prepare + inputs for an `Embedding` layer. + + This layer accepts `tf.Tensor`, `tf.RaggedTensor` and `tf.SparseTensor` + input. + + Example: + + >>> model = tf.keras.Sequential() + >>> model.add(tf.keras.layers.Embedding(1000, 64, input_length=10)) + >>> # The model will take as input an integer matrix of size (batch, + >>> # input_length), and the largest integer (i.e. word index) in the input + >>> # should be no larger than 999 (vocabulary size). + >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch + >>> # dimension. + >>> input_array = np.random.randint(1000, size=(32, 10)) + >>> model.compile('rmsprop', 'mse') + >>> output_array = model.predict(input_array) + >>> print(output_array.shape) + (32, 10, 64) + + Args: + input_dim: Integer. Size of the vocabulary, + i.e. maximum integer index + 1. + output_dim: Integer. Dimension of the dense embedding. + embeddings_initializer: Initializer for the `embeddings` + matrix (see `keras.initializers`). + embeddings_regularizer: Regularizer function applied to + the `embeddings` matrix (see `keras.regularizers`). + embeddings_constraint: Constraint function applied to + the `embeddings` matrix (see `keras.constraints`). + mask_zero: Boolean, whether or not the input value 0 is a special + "padding" value that should be masked out. This is useful when using + recurrent layers which may take variable length input. If this is + `True`, then all subsequent layers in the model need to support masking + or an exception will be raised. If mask_zero is set to True, as a + consequence, index 0 cannot be used in the vocabulary (input_dim should + equal size of vocabulary + 1). + input_length: Length of input sequences, when it is constant. + This argument is required if you are going to connect + `Flatten` then `Dense` layers upstream + (without it, the shape of the dense outputs cannot be computed). + sparse: If True, calling this layer returns a `tf.SparseTensor`. If False, + the layer returns a dense `tf.Tensor`. For an entry with no features in + a sparse tensor (entry with value 0), the embedding vector of index 0 is + returned by default. + + Input shape: + 2D tensor with shape: `(batch_size, input_length)`. + + Output shape: + 3D tensor with shape: `(batch_size, input_length, output_dim)`. + + **Note on variable placement:** + By default, if a GPU is available, the embedding matrix will be placed on + the GPU. This achieves the best performance, but it might cause issues: + + - You may be using an optimizer that does not support sparse GPU kernels. + In this case you will see an error upon training your model. + - Your embedding matrix may be too large to fit on your GPU. In this case + you will see an Out Of Memory (OOM) error. + + In such cases, you should place the embedding matrix on the CPU memory. + You can do so with a device scope, as such: + + ```python + with tf.device('cpu:0'): + embedding_layer = Embedding(...) + embedding_layer.build() + ``` + + The pre-built `embedding_layer` instance can then be added to a `Sequential` + model (e.g. `model.add(embedding_layer)`), called in a Functional model + (e.g. `x = embedding_layer(x)`), or used in a subclassed model. + """ + + @utils.allow_initializer_layout + def __init__( + self, + input_dim, + output_dim, + embeddings_initializer="uniform", + embeddings_regularizer=None, + activity_regularizer=None, + embeddings_constraint=None, + mask_zero=False, + input_length=None, + sparse=False, + **kwargs, + ): + if "input_shape" not in kwargs: + if input_length: + kwargs["input_shape"] = (input_length,) + else: + kwargs["input_shape"] = (None,) + if input_dim <= 0 or output_dim <= 0: raise ValueError( - f'"input_length" is {self.input_length}, but received input ' - f'has shape {input_shape}') - elif s1 is None: - in_lens[i] = s2 - return (input_shape[0],) + tuple(in_lens) + (self.output_dim,) - - def call(self, inputs): - dtype = backend.dtype(inputs) - if dtype != 'int32' and dtype != 'int64': - inputs = tf.cast(inputs, 'int32') - out = tf.nn.embedding_lookup(self.embeddings, inputs) - if self._dtype_policy.compute_dtype != self._dtype_policy.variable_dtype: - # Instead of casting the variable as in most layers, cast the output, as - # this is mathematically equivalent but is faster. - out = tf.cast(out, self._dtype_policy.compute_dtype) - return out - - def get_config(self): - config = { - 'input_dim': self.input_dim, - 'output_dim': self.output_dim, - 'embeddings_initializer': - initializers.serialize(self.embeddings_initializer), - 'embeddings_regularizer': - regularizers.serialize(self.embeddings_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'embeddings_constraint': - constraints.serialize(self.embeddings_constraint), - 'mask_zero': self.mask_zero, - 'input_length': self.input_length - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + "Both `input_dim` and `output_dim` should be positive, " + f"Received input_dim = {input_dim} " + f"and output_dim = {output_dim}" + ) + if ( + not base_layer_utils.v2_dtype_behavior_enabled() + and "dtype" not in kwargs + ): + # In TF1, the dtype defaults to the input dtype which is typically + # int32, so explicitly set it to floatx + kwargs["dtype"] = backend.floatx() + # We set autocast to False, as we do not want to cast floating- point + # inputs to self.dtype. In call(), we cast to int32, and casting to + # self.dtype before casting to int32 might cause the int32 values to be + # different due to a loss of precision. + kwargs["autocast"] = False + use_one_hot_matmul = kwargs.pop("use_one_hot_matmul", False) + super().__init__(**kwargs) + + self.input_dim = input_dim + self.output_dim = output_dim + self.embeddings_initializer = initializers.get(embeddings_initializer) + self.embeddings_regularizer = regularizers.get(embeddings_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.embeddings_constraint = constraints.get(embeddings_constraint) + self.mask_zero = mask_zero + self.supports_masking = mask_zero + self.input_length = input_length + self.sparse = sparse + if self.sparse and self.mask_zero: + raise ValueError( + "`mask_zero` cannot be enabled when " + "`tf.keras.layers.Embedding` is used with `tf.SparseTensor` " + "input." + ) + # Make this flag private and do not serialize it for now. + # It will be part of the public API after further testing. + self._use_one_hot_matmul = use_one_hot_matmul + + @tf_utils.shape_type_conversion + def build(self, input_shape=None): + self.embeddings = self.add_weight( + shape=(self.input_dim, self.output_dim), + initializer=self.embeddings_initializer, + name="embeddings", + regularizer=self.embeddings_regularizer, + constraint=self.embeddings_constraint, + experimental_autocast=False, + ) + self.built = True + + def compute_mask(self, inputs, mask=None): + if not self.mask_zero: + return None + return tf.not_equal(inputs, 0) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if self.input_length is None: + return input_shape + (self.output_dim,) + else: + # input_length can be tuple if input is 3D or higher + if isinstance(self.input_length, (list, tuple)): + in_lens = list(self.input_length) + else: + in_lens = [self.input_length] + if len(in_lens) != len(input_shape) - 1: + raise ValueError( + f'"input_length" is {self.input_length}, but received ' + f"input has shape {input_shape}" + ) + else: + for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): + if s1 is not None and s2 is not None and s1 != s2: + raise ValueError( + f'"input_length" is {self.input_length}, but ' + f"received input has shape {input_shape}" + ) + elif s1 is None: + in_lens[i] = s2 + return (input_shape[0],) + tuple(in_lens) + (self.output_dim,) + + def call(self, inputs): + dtype = backend.dtype(inputs) + if dtype != "int32" and dtype != "int64": + inputs = tf.cast(inputs, "int32") + if isinstance(inputs, tf.sparse.SparseTensor): + if self.sparse: + # get sparse embedding values + embedding_values = tf.nn.embedding_lookup( + params=self.embeddings, ids=inputs.values + ) + embedding_values = tf.reshape(embedding_values, [-1]) + # get sparse embedding indices + indices_values_embed_axis = tf.range(self.output_dim) + repeat_times = [inputs.indices.shape[0]] + indices_values_embed_axis = tf.expand_dims( + tf.tile(indices_values_embed_axis, repeat_times), -1 + ) + indices_values_embed_axis = tf.cast( + indices_values_embed_axis, dtype=tf.int64 + ) + current_indices = tf.repeat( + inputs.indices, [self.output_dim], axis=0 + ) + new_indices = tf.concat( + [current_indices, indices_values_embed_axis], 1 + ) + new_shape = tf.concat( + [tf.cast(inputs.shape, dtype=tf.int64), [self.output_dim]], + axis=-1, + ) + out = tf.SparseTensor( + indices=new_indices, + values=embedding_values, + dense_shape=new_shape, + ) + else: + sparse_inputs_expanded = tf.sparse.expand_dims(inputs, axis=-1) + out = tf.nn.safe_embedding_lookup_sparse( + embedding_weights=self.embeddings, + sparse_ids=sparse_inputs_expanded, + default_id=0, + ) + elif self._use_one_hot_matmul: + # Note that we change the dtype of the one_hot to be same as the + # weight tensor, since the input data are usually ints, and weights + # are floats. The nn.embedding_lookup support ids as ints, but + # the one_hot matmul need both inputs and weights to be same dtype. + one_hot_data = tf.one_hot( + inputs, depth=self.input_dim, dtype=self.dtype + ) + out = tf.matmul(one_hot_data, self.embeddings) + else: + out = tf.nn.embedding_lookup(self.embeddings, inputs) + + if self.sparse and not isinstance(out, tf.SparseTensor): + out = tf.sparse.from_dense(out) + + if ( + self._dtype_policy.compute_dtype + != self._dtype_policy.variable_dtype + ): + # Instead of casting the variable as in most layers, cast the + # output, as this is mathematically equivalent but is faster. + out = tf.cast(out, self._dtype_policy.compute_dtype) + return out + + def get_config(self): + config = { + "input_dim": self.input_dim, + "output_dim": self.output_dim, + "embeddings_initializer": initializers.serialize( + self.embeddings_initializer + ), + "embeddings_regularizer": regularizers.serialize( + self.embeddings_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "embeddings_constraint": constraints.serialize( + self.embeddings_constraint + ), + "mask_zero": self.mask_zero, + "input_length": self.input_length, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/core/embedding_test.py b/keras/layers/core/embedding_test.py index 29c891d4157f..0994f208f87d 100644 --- a/keras/layers/core/embedding_test.py +++ b/keras/layers/core/embedding_test.py @@ -14,123 +14,245 @@ # ============================================================================== """Tests for embedding layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.mixed_precision import policy from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf class EmbeddingTest(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + def test_embedding(self): + if tf.test.is_gpu_available(): + self.skipTest("Only test embedding on CPU.") + + test_utils.layer_test( + keras.layers.Embedding, + kwargs={"output_dim": 4, "input_dim": 10, "input_length": 2}, + input_shape=(3, 2), + input_dtype="int32", + expected_output_dtype="float32", + ) + + test_utils.layer_test( + keras.layers.Embedding, + kwargs={"output_dim": 4, "input_dim": 10, "mask_zero": True}, + input_shape=(3, 2), + input_dtype="int32", + expected_output_dtype="float32", + ) + + test_utils.layer_test( + keras.layers.Embedding, + kwargs={"output_dim": 4, "input_dim": 10, "mask_zero": True}, + input_shape=(3, 4, 2), + input_dtype="int32", + expected_output_dtype="float32", + ) + + test_utils.layer_test( + keras.layers.Embedding, + kwargs={ + "output_dim": 4, + "input_dim": 10, + "mask_zero": True, + "input_length": (None, 2), + }, + input_shape=(3, 4, 2), + input_dtype="int32", + expected_output_dtype="float32", + ) + + @test_combinations.run_all_keras_modes + def test_embedding_correctness(self): + layer = keras.layers.Embedding(output_dim=2, input_dim=2) + model = keras.models.Sequential([layer]) + + layer.set_weights([np.array([[1, 1], [2, 2]])]) + model.run_eagerly = test_utils.should_run_eagerly() + outputs = model.predict(np.array([[0, 1, 0]], dtype="int32")) + self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]]) + + def test_embedding_incorrect_dimension(self): + with self.assertRaises(ValueError): + keras.layers.Embedding(input_dim=0, output_dim=1) + + with self.assertRaises(ValueError): + keras.layers.Embedding(input_dim=1, output_dim=0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_eager_gpu_cpu(self): + l = keras.layers.Embedding(output_dim=2, input_dim=2) + l.build((None, 2)) + inputs = keras.backend.constant([[0, 1, 0]], dtype="int32") + with tf.GradientTape() as tape: + output = l(inputs) + gs = tape.gradient(output, l.weights) + opt = tf.compat.v1.train.AdagradOptimizer(0.1) + opt.apply_gradients(zip(gs, l.weights)) + self.assertAllEqual(len(gs), 1) + + @test_combinations.run_all_keras_modes + def test_embedding_with_ragged_input(self): + layer = keras.layers.Embedding( + input_dim=3, + output_dim=2, + weights=[np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]])], + ) + inputs = keras.layers.Input( + shape=(None,), dtype=tf.float32, ragged=True + ) + + outputs = keras.layers.Lambda( + lambda args: keras.backend.identity(args) + )(inputs) + + outputs = layer(outputs) + + model = keras.Model(inputs, outputs) + model.run_eagerly = test_utils.should_run_eagerly() + outputs = model.predict( + tf.ragged.constant( + [[1.0, 2.0, 2.0], [0.0], [1.0, 2.0]], ragged_rank=1 + ) + ) + self.assertAllClose( + outputs, + tf.ragged.constant( + [ + [[1.0, 1.0], [2.0, 2.0], [2.0, 2.0]], + [[0.0, 0.0]], + [[1.0, 1.0], [2.0, 2.0]], + ], + ragged_rank=1, + ), + ) + + @test_utils.enable_v2_dtype_behavior + def test_mixed_precision_embedding(self): + try: + policy.set_global_policy("mixed_float16") + layer = keras.layers.Embedding(input_dim=5, output_dim=2) + self.assertEqual(layer._dtype_policy.name, "mixed_float16") + outputs = layer(np.array([0, 1, 2])) + self.assertEqual(outputs.dtype, "float16") + finally: + policy.set_global_policy("float32") + + @test_combinations.run_all_keras_modes + def test_embedding_with_sparse_input_sparse_output(self): + layer = keras.layers.Embedding( + input_dim=3, + output_dim=2, + weights=[np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]])], + sparse=True, + ) + input = tf.SparseTensor( + indices=[[0, 1], [1, 2]], values=[1, 2], dense_shape=[3, 3] + ) + output = layer(input) + expected_output = tf.SparseTensor( + indices=[[0, 1, 0], [0, 1, 1], [1, 2, 0], [1, 2, 1]], + values=[1.0, 1.0, 2.0, 2.0], + dense_shape=[3, 3, 2], + ) + self.assertAllClose(output.indices, expected_output.indices) + self.assertAllClose(output.values, expected_output.values) + self.assertAllClose(output.dense_shape, expected_output.dense_shape) + + @test_combinations.run_all_keras_modes + def test_embedding_with_sparse_input_dense_output(self): + layer = keras.layers.Embedding( + input_dim=3, + output_dim=2, + weights=[np.array([[0.1, 0.1], [1.0, 1.0], [2.0, 2.0]])], + sparse=False, + ) + input = tf.SparseTensor( + indices=[[0, 1], [1, 2]], values=[1, 2], dense_shape=[3, 3] + ) + output = layer(input) + expected_output = tf.constant( + [ + [[0.1, 0.1], [1.0, 1.0], [0.1, 0.1]], + [[0.1, 0.1], [0.1, 0.1], [2.0, 2.0]], + [[0.1, 0.1], [0.1, 0.1], [0.1, 0.1]], + ] + ) + self.assertAllClose(output, expected_output) + + @test_combinations.run_all_keras_modes + def test_error_message_for_mask_zero_enabled_with_sparse_tensor(self): + with self.assertRaisesRegex( + ValueError, + "`mask_zero` cannot be enabled when " + "`tf.keras.layers.Embedding` is used with `tf.SparseTensor` " + "input.", + ): + layer = keras.layers.Embedding( + input_dim=3, + output_dim=2, + weights=[np.array([[0.1, 0.1], [1.0, 1.0], [2.0, 2.0]])], + sparse=True, + mask_zero=True, + ) + inputs = tf.SparseTensor( + indices=[[0, 1], [1, 2]], values=[1, 2], dense_shape=[3, 3] + ) + layer(inputs) + + @test_combinations.run_all_keras_modes + def test_embedding_with_dense_input_sprase_output(self): + layer = keras.layers.Embedding( + input_dim=3, + output_dim=2, + weights=[np.array([[0, 0], [1.0, 1.0], [2.0, 2.0]])], + sparse=True, + mask_zero=False, + ) + inputs = tf.constant([0, 0, 0, 2, 1]) + output = layer(inputs) + expected_output = tf.SparseTensor( + indices=[[3, 0], [3, 1], [4, 0], [4, 1]], + values=[2.0, 2.0, 1.0, 1.0], + dense_shape=[5, 2], + ) + self.assertAllClose(output.indices, expected_output.indices) + self.assertAllClose(output.values, expected_output.values) + self.assertAllClose(output.dense_shape, expected_output.dense_shape) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_use_one_hot(self): + batch = 8 + input_length = 10 + layer = keras.layers.Embedding(input_dim=100, output_dim=16) + self.assertFalse(layer._use_one_hot_matmul) + + inputs = tf.random.uniform( + shape=[batch, input_length], minval=0, maxval=9, dtype=tf.int64 + ) + output_1 = layer(inputs) + + layer._use_one_hot_matmul = True + output_2 = layer(inputs) + + self.assertAllClose(output_1, output_2) + self.assertEqual(output_1.dtype, output_2.dtype) + + # Make sure the layer can be created with hidden kwargs, and not + # serialize it into config (for now). + layer = keras.layers.Embedding( + input_dim=100, output_dim=16, use_one_hot_matmul=True + ) + self.assertTrue(layer._use_one_hot_matmul) + + self.assertNotIn("use_one_hot_matmul", layer.get_config()) + - @test_combinations.run_all_keras_modes - def test_embedding(self): - if tf.test.is_gpu_available(): - self.skipTest('Only test embedding on CPU.') - - test_utils.layer_test( - keras.layers.Embedding, - kwargs={'output_dim': 4, - 'input_dim': 10, - 'input_length': 2}, - input_shape=(3, 2), - input_dtype='int32', - expected_output_dtype='float32') - - test_utils.layer_test( - keras.layers.Embedding, - kwargs={'output_dim': 4, - 'input_dim': 10, - 'mask_zero': True}, - input_shape=(3, 2), - input_dtype='int32', - expected_output_dtype='float32') - - test_utils.layer_test( - keras.layers.Embedding, - kwargs={'output_dim': 4, - 'input_dim': 10, - 'mask_zero': True}, - input_shape=(3, 4, 2), - input_dtype='int32', - expected_output_dtype='float32') - - test_utils.layer_test( - keras.layers.Embedding, - kwargs={'output_dim': 4, - 'input_dim': 10, - 'mask_zero': True, - 'input_length': (None, 2)}, - input_shape=(3, 4, 2), - input_dtype='int32', - expected_output_dtype='float32') - - @test_combinations.run_all_keras_modes - def test_embedding_correctness(self): - layer = keras.layers.Embedding(output_dim=2, input_dim=2) - model = keras.models.Sequential([layer]) - - layer.set_weights([np.array([[1, 1], [2, 2]])]) - model.run_eagerly = test_utils.should_run_eagerly() - outputs = model.predict(np.array([[0, 1, 0]], dtype='int32')) - self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]]) - - def test_embedding_incorrect_dimension(self): - with self.assertRaises(ValueError): - keras.layers.Embedding(input_dim=0, output_dim=1) - - with self.assertRaises(ValueError): - keras.layers.Embedding(input_dim=1, output_dim=0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_eager_gpu_cpu(self): - l = keras.layers.Embedding(output_dim=2, input_dim=2) - l.build((None, 2)) - inputs = keras.backend.constant([[0, 1, 0]], dtype='int32') - with tf.GradientTape() as tape: - output = l(inputs) - gs = tape.gradient(output, l.weights) - opt = tf.compat.v1.train.AdagradOptimizer(0.1) - opt.apply_gradients(zip(gs, l.weights)) - self.assertAllEqual(len(gs), 1) - - @test_combinations.run_all_keras_modes - def test_embedding_with_ragged_input(self): - layer = keras.layers.Embedding( - input_dim=3, - output_dim=2, - weights=[np.array([[0., 0.], [1., 1.], [2., 2.]])]) - inputs = keras.layers.Input( - shape=(None,), dtype=tf.float32, ragged=True) - # pylint: disable=unnecessary-lambda - outputs = keras.layers.Lambda(lambda args: keras.backend.identity(args))( - inputs) - # pylint: enable=unnecessary-lambda - outputs = layer(outputs) - - model = keras.Model(inputs, outputs) - model.run_eagerly = test_utils.should_run_eagerly() - outputs = model.predict( - tf.ragged.constant([[1., 2., 2.], [0.], [1., 2.]], ragged_rank=1)) - self.assertAllClose( - outputs, - tf.ragged.constant( - [[[1., 1.], [2., 2.], [2., 2.]], [[0., 0.]], [[1., 1.], [2., 2.]]], - ragged_rank=1)) - - @test_utils.enable_v2_dtype_behavior - def test_mixed_precision_embedding(self): - try: - policy.set_global_policy('mixed_float16') - layer = keras.layers.Embedding(input_dim=5, output_dim=2) - self.assertEqual(layer._dtype_policy.name, 'mixed_float16') - outputs = layer(np.array([0, 1, 2])) - self.assertEqual(outputs.dtype, 'float16') - finally: - policy.set_global_policy('float32') - - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/optimizer.py b/keras/layers/core/identity.py similarity index 58% rename from keras/optimizers/legacy/optimizer.py rename to keras/layers/core/identity.py index 925a97024508..2b5c0cff76ee 100644 --- a/keras/optimizers/legacy/optimizer.py +++ b/keras/layers/core/identity.py @@ -12,13 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Adam optimizer implementation.""" +"""Contains the Identity layer.""" -from keras.optimizers.optimizer_v2 import optimizer_v2 +import tensorflow.compat.v2 as tf +from keras.engine.base_layer import Layer + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Optimizer') -class Optimizer(optimizer_v2.OptimizerV2): - pass +@keras_export("keras.layers.Identity") +class Identity(Layer): + """Identity layer. + + This layer should be used as a placeholder when no operation is to be + performed. The layer is argument insensitive, and returns its `inputs` + argument as output. + + Args: + name: Optional name for the layer instance. + """ + + def call(self, inputs): + return tf.nest.map_structure(tf.identity, inputs) diff --git a/keras/layers/core/lambda_layer.py b/keras/layers/core/lambda_layer.py index 3be1ba108017..1a8c2142d343 100644 --- a/keras/layers/core/lambda_layer.py +++ b/keras/layers/core/lambda_layer.py @@ -13,210 +13,228 @@ # limitations under the License. # ============================================================================== """Contains the Lambda layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import sys import textwrap import types as python_types import warnings + +import numpy as np +import tensorflow.compat.v2 as tf + from keras.engine.base_layer import Layer +from keras.saving import serialization_lib from keras.utils import generic_utils from keras.utils import tf_inspect from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Lambda') +@keras_export("keras.layers.Lambda") class Lambda(Layer): - """Wraps arbitrary expressions as a `Layer` object. - - The `Lambda` layer exists so that arbitrary expressions can be used - as a `Layer` when constructing `Sequential` - and Functional API models. `Lambda` layers are best suited for simple - operations or quick experimentation. For more advanced use cases, follow - [this guide](https://www.tensorflow.org/guide/keras/custom_layers_and_models) - for subclassing `tf.keras.layers.Layer`. - - WARNING: `tf.keras.layers.Lambda` layers have (de)serialization limitations! - - The main reason to subclass `tf.keras.layers.Layer` instead of using a - `Lambda` layer is saving and inspecting a Model. `Lambda` layers - are saved by serializing the Python bytecode, which is fundamentally - non-portable. They should only be loaded in the same environment where - they were saved. Subclassed layers can be saved in a more portable way - by overriding their `get_config` method. Models that rely on - subclassed Layers are also often easier to visualize and reason about. - - Examples: - - ```python - # add a x -> x^2 layer - model.add(Lambda(lambda x: x ** 2)) - ``` - ```python - # add a layer that returns the concatenation - # of the positive part of the input and - # the opposite of the negative part - - def antirectifier(x): - x -= K.mean(x, axis=1, keepdims=True) - x = K.l2_normalize(x, axis=1) - pos = K.relu(x) - neg = K.relu(-x) - return K.concatenate([pos, neg], axis=1) - - model.add(Lambda(antirectifier)) - ``` - - Variables: - While it is possible to use Variables with Lambda layers, this practice is - discouraged as it can easily lead to bugs. For instance, consider the - following layer: - - ```python + """Wraps arbitrary expressions as a `Layer` object. + + The `Lambda` layer exists so that arbitrary expressions can be used + as a `Layer` when constructing Sequential + and Functional API models. `Lambda` layers are best suited for simple + operations or quick experimentation. For more advanced use cases, follow + [this guide]( + https://www.tensorflow.org/guide/keras/custom_layers_and_models) + for subclassing `tf.keras.layers.Layer`. + + WARNING: `tf.keras.layers.Lambda` layers have (de)serialization limitations! + + The main reason to subclass `tf.keras.layers.Layer` instead of using a + `Lambda` layer is saving and inspecting a Model. `Lambda` layers + are saved by serializing the Python bytecode, which is fundamentally + non-portable. They should only be loaded in the same environment where + they were saved. Subclassed layers can be saved in a more portable way + by overriding their `get_config()` method. Models that rely on + subclassed Layers are also often easier to visualize and reason about. + + Examples: + + ```python + # add a x -> x^2 layer + model.add(Lambda(lambda x: x ** 2)) + ``` + + ```python + # add a layer that returns the concatenation + # of the positive part of the input and + # the opposite of the negative part + + def antirectifier(x): + x -= K.mean(x, axis=1, keepdims=True) + x = K.l2_normalize(x, axis=1) + pos = K.relu(x) + neg = K.relu(-x) + return K.concatenate([pos, neg], axis=1) + + model.add(Lambda(antirectifier)) + ``` + + **Note on Variables:** + + While it is possible to use Variables with Lambda layers, + this practice is discouraged as it can easily lead to bugs. + For instance, consider the following layer: + + ```python scale = tf.Variable(1.) scale_layer = tf.keras.layers.Lambda(lambda x: x * scale) - ``` + ``` - Because scale_layer does not directly track the `scale` variable, it will + Because `scale_layer` does not directly track the `scale` variable, it will not appear in `scale_layer.trainable_weights` and will therefore not be trained if `scale_layer` is used in a Model. A better pattern is to write a subclassed Layer: - ```python + ```python class ScaleLayer(tf.keras.layers.Layer): - def __init__(self): - super(ScaleLayer, self).__init__() - self.scale = tf.Variable(1.) + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.scale = tf.Variable(1.) - def call(self, inputs): - return inputs * self.scale - ``` + def call(self, inputs): + return inputs * self.scale + ``` - In general, Lambda layers can be convenient for simple stateless + In general, `Lambda` layers can be convenient for simple stateless computation, but anything more complex should use a subclass Layer instead. - Args: - function: The function to be evaluated. Takes input tensor as first - argument. - output_shape: Expected output shape from function. This argument can be - inferred if not explicitly provided. Can be a tuple or function. If a - tuple, it only specifies the first dimension onward; - sample dimension is assumed either the same as the input: `output_shape = - (input_shape[0], ) + output_shape` or, the input is `None` and - the sample dimension is also `None`: `output_shape = (None, ) + - output_shape` If a function, it specifies the entire shape as a function - of the - input shape: `output_shape = f(input_shape)` - mask: Either None (indicating no masking) or a callable with the same - signature as the `compute_mask` layer method, or a tensor that will be - returned as output mask regardless of what the input is. - arguments: Optional dictionary of keyword arguments to be passed to the - function. - Input shape: Arbitrary. Use the keyword argument input_shape (tuple of - integers, does not include the samples axis) when using this layer as the - first layer in a model. - Output shape: Specified by `output_shape` argument - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, - function, - output_shape=None, - mask=None, - arguments=None, - **kwargs): - super().__init__(**kwargs) - - self.arguments = arguments or {} - self.function = function - - if mask is not None: - self.supports_masking = True - self.mask = mask - self._output_shape = output_shape - - # Warning on every invocation will be quite irksome in Eager mode. - self._already_warned = False - - function_args = tf_inspect.getfullargspec(function).args - self._fn_expects_training_arg = 'training' in function_args - self._fn_expects_mask_arg = 'mask' in function_args - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if self._output_shape is None: - # Make use of existing autocomputation but provide Lambda-specific - # error message. This is always safe to run even when the outer context - # is Graph mode because Lambda layers don't have side effects such as - # `add_loss`. - with tf.__internal__.eager_context.eager_mode(): - try: - return super().compute_output_shape(input_shape) - except NotImplementedError: - raise NotImplementedError( - 'We could not automatically infer the shape of the Lambda\'s ' - 'output. Please specify `output_shape` for this Lambda.') - - if callable(self._output_shape): - output_shapes = self._output_shape(input_shape) - return tf_utils.convert_shapes(output_shapes, to_tuples=False) - - # Output shapes are passed directly and don't include batch dimension. - input_tensor_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - batch_size = tf.nest.flatten( - input_tensor_shape)[0][0] if input_shape else None - - def _add_batch(shape): - return tf.TensorShape([batch_size] + shape.as_list()) - - output_shapes = tf_utils.convert_shapes(self._output_shape, to_tuples=False) - return tf.nest.map_structure(_add_batch, output_shapes) - - def call(self, inputs, mask=None, training=None): - # We must copy for thread safety, but it only needs to be a shallow copy. - kwargs = {k: v for k, v in self.arguments.items()} - if self._fn_expects_mask_arg: - kwargs['mask'] = mask - if self._fn_expects_training_arg: - kwargs['training'] = training - - created_variables = [] - - def _variable_creator(next_creator, **kwargs): - var = next_creator(**kwargs) - created_variables.append(var) - return var - - with tf.GradientTape(watch_accessed_variables=True) as tape,\ - tf.variable_creator_scope(_variable_creator): - result = self.function(inputs, **kwargs) - self._check_variables(created_variables, tape.watched_variables()) - return result - - def _check_variables(self, created_variables, accessed_variables): - if not created_variables and not accessed_variables: - # In the common case that a Lambda layer does not touch a Variable, we - # don't want to incur the runtime cost of assembling any state used for - # checking only to immediately discard it. - return - - # Filter out the state variable in the tf.random.Generator, which is - # commonly used for initializer or droput. The variable is intentionally - # not tracked and it is not a trainable variable. - created_variables = [v for v in created_variables - if 'StateVar' not in v.name] - - tracked_weights = set(v.ref() for v in self.weights) - untracked_new_vars = [ - v for v in created_variables if v.ref() not in tracked_weights - ] - if untracked_new_vars: - variable_str = '\n'.join(' {}'.format(i) for i in untracked_new_vars) - error_str = textwrap.dedent(""" + Args: + function: The function to be evaluated. Takes input tensor as first + argument. + output_shape: Expected output shape from function. This argument can be + inferred if not explicitly provided. Can be a tuple or function. If a + tuple, it only specifies the first dimension onward; + sample dimension is assumed either the same as the input: + `output_shape = (input_shape[0], ) + output_shape` or, the input is + `None` and the sample dimension is also `None`: + `output_shape = (None, ) + output_shape` If a function, it specifies the + entire shape as a function of the input shape: + `output_shape = f(input_shape)` + mask: Either None (indicating no masking) or a callable with the same + signature as the `compute_mask` layer method, or a tensor that will be + returned as output mask regardless of what the input is. + arguments: Optional dictionary of keyword arguments to be passed to the + function. + + Input shape: Arbitrary. Use the keyword argument input_shape (tuple of + integers, does not include the samples axis) when using this layer as the + first layer in a model. + + Output shape: Specified by `output_shape` argument + """ + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__( + self, function, output_shape=None, mask=None, arguments=None, **kwargs + ): + super().__init__(**kwargs) + + self.arguments = arguments or {} + self.function = function + + if mask is not None: + self.supports_masking = True + self.mask = mask + self._output_shape = output_shape + + # Warning on every invocation will be quite irksome in Eager mode. + self._already_warned = False + + function_args = tf_inspect.getfullargspec(function).args + self._fn_expects_training_arg = "training" in function_args + self._fn_expects_mask_arg = "mask" in function_args + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if self._output_shape is None: + # Make use of existing autocomputation but provide Lambda-specific + # error message. This is always safe to run even when the outer + # context is Graph mode because Lambda layers don't have side + # effects such as `add_loss`. + with tf.__internal__.eager_context.eager_mode(): + try: + return super().compute_output_shape(input_shape) + except NotImplementedError: + raise NotImplementedError( + "We could not automatically infer the shape of " + "the Lambda's output. Please specify `output_shape` " + "for this Lambda." + ) + + if callable(self._output_shape): + output_shapes = self._output_shape(input_shape) + return tf_utils.convert_shapes(output_shapes, to_tuples=False) + + # Output shapes are passed directly and don't include batch dimension. + input_tensor_shape = tf_utils.convert_shapes( + input_shape, to_tuples=False + ) + batch_size = ( + tf.nest.flatten(input_tensor_shape)[0][0] if input_shape else None + ) + + def _add_batch(shape): + return tf.TensorShape([batch_size] + shape.as_list()) + + output_shapes = tf_utils.convert_shapes( + self._output_shape, to_tuples=False + ) + return tf.nest.map_structure(_add_batch, output_shapes) + + def call(self, inputs, mask=None, training=None): + # We must copy for thread safety, but it only needs to be a shallow + # copy. + kwargs = {k: v for k, v in self.arguments.items()} + if self._fn_expects_mask_arg: + kwargs["mask"] = mask + if self._fn_expects_training_arg: + kwargs["training"] = training + + created_variables = [] + + def _variable_creator(next_creator, **kwargs): + var = next_creator(**kwargs) + created_variables.append(var) + return var + + with tf.GradientTape( + watch_accessed_variables=True + ) as tape, tf.variable_creator_scope(_variable_creator): + result = self.function(inputs, **kwargs) + self._check_variables(created_variables, tape.watched_variables()) + return result + + def _check_variables(self, created_variables, accessed_variables): + if not created_variables and not accessed_variables: + # In the common case that a Lambda layer does not touch a Variable, + # we don't want to incur the runtime cost of assembling any state + # used for checking only to immediately discard it. + return + + # Filter out the state variable in the tf.random.Generator, which is + # commonly used for initializer or droput. The variable is intentionally + # not tracked and it is not a trainable variable. + created_variables = [ + v for v in created_variables if "StateVar" not in v.name + ] + + tracked_weights = set(v.ref() for v in self.weights) + untracked_new_vars = [ + v for v in created_variables if v.ref() not in tracked_weights + ] + if untracked_new_vars: + variable_str = "\n".join(f" {i}" for i in untracked_new_vars) + error_str = textwrap.dedent( + """ The following Variables were created within a Lambda layer ({name}) but are not tracked by said layer: {variable_str} @@ -224,143 +242,175 @@ def _check_variables(self, created_variables, accessed_variables): calls, and consequently this behavior is disallowed for safety. Lambda layers are not well suited to stateful computation; instead, writing a subclassed Layer is the recommend way to define layers with - Variables.""").format( - name=self.name, variable_str=variable_str) - raise ValueError(error_str) - - untracked_used_vars = [ - v for v in accessed_variables if v.ref() not in tracked_weights - ] - if untracked_used_vars and not self._already_warned: - variable_str = '\n'.join(' {}'.format(i) for i in untracked_used_vars) - self._warn( - textwrap.dedent(""" + Variables.""" + ).format(name=self.name, variable_str=variable_str) + raise ValueError(error_str) + + untracked_used_vars = [ + v for v in accessed_variables if v.ref() not in tracked_weights + ] + if untracked_used_vars and not self._already_warned: + variable_str = "\n".join(f" {i}" for i in untracked_used_vars) + self._warn( + textwrap.dedent( + """ The following Variables were used a Lambda layer's call ({name}), but are not present in its tracked objects: {variable_str} It is possible that this is intended behavior, but it is more likely an omission. This is a strong indication that this layer should be - formulated as a subclassed Layer rather than a Lambda layer.""") - .format(name=self.name, variable_str=variable_str)) - self._already_warned = True - - def _warn(self, msg): - # This method will be overridden in a unit test to raise an error, because - # self.assertWarns is not universally implemented. - return tf_logging.warning(msg) - - def compute_mask(self, inputs, mask=None): - if callable(self.mask): - return self.mask(inputs, mask) - return self.mask - - def get_config(self): - function_config = self._serialize_function_to_config(self.function) - output_shape_config = self._serialize_function_to_config( - self._output_shape, allow_raw=True) - config = { - 'function': function_config[0], - 'function_type': function_config[1], - 'module': function_config[2], - 'output_shape': output_shape_config[0], - 'output_shape_type': output_shape_config[1], - 'output_shape_module': output_shape_config[2], - } - if self.mask is not None: - mask_config = self._serialize_function_to_config(self.mask) - config.update({ - 'mask': mask_config[0], - 'mask_type': mask_config[1], - 'mask_module': mask_config[2] - }) - config['arguments'] = self.arguments - - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def _serialize_function_to_config(self, inputs, allow_raw=False): - if isinstance(inputs, python_types.LambdaType): - output = generic_utils.func_dump(inputs) - output_type = 'lambda' - module = inputs.__module__ - elif callable(inputs): - output = inputs.__name__ - output_type = 'function' - module = inputs.__module__ - elif allow_raw: - output = inputs - output_type = 'raw' - module = None - else: - raise ValueError('Invalid input for serialization, type: %s ' % - type(inputs)) - - return output, output_type, module - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() - function = cls._parse_function_from_config(config, custom_objects, - 'function', 'module', - 'function_type') - - output_shape = cls._parse_function_from_config(config, custom_objects, - 'output_shape', - 'output_shape_module', - 'output_shape_type') - if 'mask' in config: - mask = cls._parse_function_from_config(config, custom_objects, 'mask', - 'mask_module', 'mask_type') - else: - mask = None - - config['function'] = function - config['output_shape'] = output_shape - config['mask'] = mask - - # If arguments were numpy array, they have been saved as - # list. We need to recover the ndarray - if 'arguments' in config: - for key in config['arguments']: - if isinstance(config['arguments'][key], dict): - arg_dict = config['arguments'][key] - if 'type' in arg_dict and arg_dict['type'] == 'ndarray': - # Overwrite the argument with its numpy translation - config['arguments'][key] = np.array(arg_dict['value']) - - return cls(**config) - - @classmethod - def _parse_function_from_config(cls, config, custom_objects, func_attr_name, - module_attr_name, func_type_attr_name): - globs = globals().copy() - module = config.pop(module_attr_name, None) - if module in sys.modules: - globs.update(sys.modules[module].__dict__) - elif module is not None: - # Note: we don't know the name of the function if it's a lambda. - warnings.warn( - '{} is not loaded, but a Lambda layer uses it. ' - 'It may cause errors.'.format(module), - UserWarning, - stacklevel=2) - if custom_objects: - globs.update(custom_objects) - function_type = config.pop(func_type_attr_name) - if function_type == 'function': - # Simple lookup in custom objects - function = generic_utils.deserialize_keras_object( - config[func_attr_name], - custom_objects=custom_objects, - printable_module_name='function in Lambda layer') - elif function_type == 'lambda': - # Unsafe deserialization from bytecode - function = generic_utils.func_load(config[func_attr_name], globs=globs) - elif function_type == 'raw': - function = config[func_attr_name] - else: - supported_types = ['function', 'lambda', 'raw'] - raise TypeError( - f'Unsupported value for `function_type` argument. Received: ' - f'function_type={function_type}. Expected one of {supported_types}') - return function + formulated as a subclassed Layer rather than a Lambda layer.""" + ).format(name=self.name, variable_str=variable_str) + ) + self._already_warned = True + + def _warn(self, msg): + # This method will be overridden in a unit test to raise an error, + # because self.assertWarns is not universally implemented. + return tf_logging.warning(msg) + + def compute_mask(self, inputs, mask=None): + if callable(self.mask): + return self.mask(inputs, mask) + return self.mask + + def get_config(self): + function_config = self._serialize_function_to_config(self.function) + output_shape_config = self._serialize_function_to_config( + self._output_shape, allow_raw=True + ) + config = { + "function": function_config[0], + "function_type": function_config[1], + "module": function_config[2], + "output_shape": output_shape_config[0], + "output_shape_type": output_shape_config[1], + "output_shape_module": output_shape_config[2], + } + if self.mask is not None: + mask_config = self._serialize_function_to_config(self.mask) + config.update( + { + "mask": mask_config[0], + "mask_type": mask_config[1], + "mask_module": mask_config[2], + } + ) + config["arguments"] = self.arguments + + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _serialize_function_to_config(self, inputs, allow_raw=False): + if isinstance(inputs, python_types.LambdaType): + output = generic_utils.func_dump(inputs) + output_type = "lambda" + module = inputs.__module__ + elif callable(inputs): + output = inputs.__name__ + output_type = "function" + module = inputs.__module__ + elif allow_raw: + output = inputs + output_type = "raw" + module = None + else: + raise ValueError( + f"Invalid input for serialization, type: {type(inputs)} " + ) + + return output, output_type, module + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() + function = cls._parse_function_from_config( + config, custom_objects, "function", "module", "function_type" + ) + + output_shape = cls._parse_function_from_config( + config, + custom_objects, + "output_shape", + "output_shape_module", + "output_shape_type", + ) + if "mask" in config: + mask = cls._parse_function_from_config( + config, custom_objects, "mask", "mask_module", "mask_type" + ) + else: + mask = None + + config["function"] = function + config["output_shape"] = output_shape + config["mask"] = mask + + # If arguments were numpy array, they have been saved as + # list. We need to recover the ndarray + if "arguments" in config: + for key in config["arguments"]: + if isinstance(config["arguments"][key], dict): + arg_dict = config["arguments"][key] + if "type" in arg_dict and arg_dict["type"] == "ndarray": + # Overwrite the argument with its numpy translation + config["arguments"][key] = np.array(arg_dict["value"]) + + return cls(**config) + + @classmethod + def _parse_function_from_config( + cls, + config, + custom_objects, + func_attr_name, + module_attr_name, + func_type_attr_name, + ): + globs = globals().copy() + module = config.pop(module_attr_name, None) + if module in sys.modules: + globs.update(sys.modules[module].__dict__) + elif module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn( + "{} is not loaded, but a Lambda layer uses it. " + "It may cause errors.".format(module), + UserWarning, + stacklevel=2, + ) + if custom_objects: + globs.update(custom_objects) + function_type = config.pop(func_type_attr_name) + if function_type == "function": + # Simple lookup in custom objects + function = serialization_lib.deserialize_keras_object( + config[func_attr_name], + custom_objects=custom_objects, + printable_module_name="function in Lambda layer", + ) + elif function_type == "lambda": + if serialization_lib.in_safe_mode(): + raise ValueError( + "Requested the deserialization of a Lambda layer with a " + "Python `lambda` inside it. " + "This carries a potential risk of arbitrary code execution " + "and thus it is disallowed by default. If you trust the " + "source of the saved model, you can pass `safe_mode=False` " + "to the loading function in order to allow " + "Lambda layer loading." + ) + # /!\ Unsafe deserialization from bytecode! Danger! /!\ + function = generic_utils.func_load( + config[func_attr_name], globs=globs + ) + elif function_type == "raw": + function = config[func_attr_name] + else: + supported_types = ["function", "lambda", "raw"] + raise TypeError( + "Unsupported value for `function_type` argument. Received: " + f"function_type={function_type}. " + f"Expected one of {supported_types}" + ) + return function diff --git a/keras/layers/core/masking.py b/keras/layers/core/masking.py index 2faf2d022222..c710bf34731a 100644 --- a/keras/layers/core/masking.py +++ b/keras/layers/core/masking.py @@ -13,75 +13,79 @@ # limitations under the License. # ============================================================================== """Contains the Masking layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.engine.base_layer import Layer + import tensorflow.compat.v2 as tf + +from keras.engine.base_layer import Layer + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Masking') +@keras_export("keras.layers.Masking") class Masking(Layer): - """Masks a sequence by using a mask value to skip timesteps. - - For each timestep in the input tensor (dimension #1 in the tensor), - if all values in the input tensor at that timestep - are equal to `mask_value`, then the timestep will be masked (skipped) - in all downstream layers (as long as they support masking). - - If any downstream layer does not support masking yet receives such - an input mask, an exception will be raised. - - Example: - - Consider a Numpy data array `x` of shape `(samples, timesteps, features)`, - to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you - lack data for these timesteps. You can: - - - Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.` - - Insert a `Masking` layer with `mask_value=0.` before the LSTM layer: - - ```python - samples, timesteps, features = 32, 10, 8 - inputs = np.random.random([samples, timesteps, features]).astype(np.float32) - inputs[:, 3, :] = 0. - inputs[:, 5, :] = 0. - - model = tf.keras.models.Sequential() - model.add(tf.keras.layers.Masking(mask_value=0., - input_shape=(timesteps, features))) - model.add(tf.keras.layers.LSTM(32)) - - output = model(inputs) - # The time step 3 and 5 will be skipped from LSTM calculation. - ``` - - See [the masking and padding guide]( - https://www.tensorflow.org/guide/keras/masking_and_padding) - for more details. - """ - - def __init__(self, mask_value=0., **kwargs): - super().__init__(**kwargs) - self.supports_masking = True - self.mask_value = mask_value - self._compute_output_and_mask_jointly = True - - def compute_mask(self, inputs, mask=None): - return tf.reduce_any(tf.not_equal(inputs, self.mask_value), axis=-1) - - def call(self, inputs): - boolean_mask = tf.reduce_any( - tf.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) - outputs = inputs * tf.cast(boolean_mask, inputs.dtype) - # Compute the mask and outputs simultaneously. - outputs._keras_mask = tf.squeeze(boolean_mask, axis=-1) # pylint: disable=protected-access - return outputs - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = {'mask_value': self.mask_value} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Masks a sequence by using a mask value to skip timesteps. + + For each timestep in the input tensor (dimension #1 in the tensor), + if all values in the input tensor at that timestep + are equal to `mask_value`, then the timestep will be masked (skipped) + in all downstream layers (as long as they support masking). + + If any downstream layer does not support masking yet receives such + an input mask, an exception will be raised. + + Example: + + Consider a Numpy data array `x` of shape `(samples, timesteps, features)`, + to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you + lack data for these timesteps. You can: + + - Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.` + - Insert a `Masking` layer with `mask_value=0.` before the LSTM layer: + + ```python + samples, timesteps, features = 32, 10, 8 + inputs = np.random.random([samples, timesteps, features]).astype(np.float32) + inputs[:, 3, :] = 0. + inputs[:, 5, :] = 0. + + model = tf.keras.models.Sequential() + model.add(tf.keras.layers.Masking(mask_value=0., + input_shape=(timesteps, features))) + model.add(tf.keras.layers.LSTM(32)) + + output = model(inputs) + # The time step 3 and 5 will be skipped from LSTM calculation. + ``` + + See [the masking and padding guide]( + https://www.tensorflow.org/guide/keras/masking_and_padding) + for more details. + """ + + def __init__(self, mask_value=0.0, **kwargs): + super().__init__(**kwargs) + self.supports_masking = True + self.mask_value = mask_value + self._compute_output_and_mask_jointly = True + + def compute_mask(self, inputs, mask=None): + return tf.reduce_any(tf.not_equal(inputs, self.mask_value), axis=-1) + + def call(self, inputs): + boolean_mask = tf.reduce_any( + tf.not_equal(inputs, self.mask_value), axis=-1, keepdims=True + ) + outputs = inputs * tf.cast(boolean_mask, inputs.dtype) + # Compute the mask and outputs simultaneously. + outputs._keras_mask = tf.squeeze(boolean_mask, axis=-1) + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = {"mask_value": self.mask_value} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/core/tf_op_layer.py b/keras/layers/core/tf_op_layer.py index 1972de5c2f90..41f3ae93b799 100644 --- a/keras/layers/core/tf_op_layer.py +++ b/keras/layers/core/tf_op_layer.py @@ -13,362 +13,389 @@ # limitations under the License. # ============================================================================== """Contains the TFOpLambda layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import,g-bad-import-order import tensorflow.compat.v2 as tf -# pylint: enable=g-bad-import-order from keras import backend from keras.engine import keras_tensor from keras.engine.base_layer import Layer +# isort: off from tensorflow.python.platform import tf_logging -from tensorflow.python.util.tf_export import get_canonical_name_for_symbol -from tensorflow.python.util.tf_export import get_symbol_from_name +from tensorflow.python.util.tf_export import ( + get_canonical_name_for_symbol, +) +from tensorflow.python.util.tf_export import ( + get_symbol_from_name, +) class ClassMethod(Layer): - """Wraps a TF API Class's class method in a `Layer` object. - - It is inserted by the Functional API construction whenever users call - a supported TF Class's class method on KerasTensors. - - This is useful in the case where users do something like: - x = keras.Input(...) - y = keras.Input(...) - out = tf.RaggedTensor.from_row_splits(x, y) - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, cls_ref, method_name, **kwargs): - self.cls_ref = cls_ref - self.method_name = method_name - self.cls_symbol = ( - get_canonical_name_for_symbol( - self.cls_ref, add_prefix_to_v1_names=True) or - get_canonical_name_for_symbol( - self.cls_ref, api_name='keras', add_prefix_to_v1_names=True)) - if 'name' not in kwargs: - kwargs['name'] = backend.unique_object_name( - 'tf.' + self.cls_symbol + '.' + self.method_name, - zero_based=True, - avoid_observed_names=True) - kwargs['autocast'] = False - - # Do not individually trace op layers in the SavedModel. - self._must_restore_from_config = True - - super().__init__(**kwargs) - - # Preserve all argument data structures when saving/loading a config - # (e.g., don't unnest lists that contain one element) - self._preserve_input_structure_in_config = True - - self._call_spec.expects_training_arg = False - self._call_spec.expects_mask_arg = False - - def call(self, args, kwargs): - return getattr(self.cls_ref, self.method_name)(*args, **kwargs) - - def get_config(self): - if not self.cls_symbol: - raise ValueError( - 'This Keras class method conversion tried to convert ' - f'a method belonging to class {self.cls_symbol}, a class ' - 'that is not publicly exposed in the TensorFlow API. ' - 'To ensure cross-version compatibility of Keras models ' - 'that use op layers, only op layers produced from ' - 'public TensorFlow API symbols can be serialized.') - - config = {'cls_symbol': self.cls_symbol, 'method_name': self.method_name} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() - symbol_name = config.pop('cls_symbol') - cls_ref = get_symbol_from_name(symbol_name) - if not cls_ref: - raise ValueError(f'TensorFlow symbol `{symbol_name}` could not be found.') - - config['cls_ref'] = cls_ref - - return cls(**config) + """Wraps a TF API Class's class method in a `Layer` object. + + It is inserted by the Functional API construction whenever users call + a supported TF Class's class method on KerasTensors. + + This is useful in the case where users do something like: + x = keras.Input(...) + y = keras.Input(...) + out = tf.RaggedTensor.from_row_splits(x, y) + """ + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__(self, cls_ref, method_name, **kwargs): + self.cls_ref = cls_ref + self.method_name = method_name + self.cls_symbol = get_canonical_name_for_symbol( + self.cls_ref, add_prefix_to_v1_names=True + ) or get_canonical_name_for_symbol( + self.cls_ref, api_name="keras", add_prefix_to_v1_names=True + ) + if "name" not in kwargs: + kwargs["name"] = backend.unique_object_name( + "tf." + self.cls_symbol + "." + self.method_name, + zero_based=True, + avoid_observed_names=True, + ) + kwargs["autocast"] = False + + # Do not individually trace op layers in the SavedModel. + self._must_restore_from_config = True + + super().__init__(**kwargs) + + # Preserve all argument data structures when saving/loading a config + # (e.g., don't unnest lists that contain one element) + self._preserve_input_structure_in_config = True + + self._call_spec.expects_training_arg = False + self._call_spec.expects_mask_arg = False + + def call(self, args, kwargs): + return getattr(self.cls_ref, self.method_name)(*args, **kwargs) + + def get_config(self): + if not self.cls_symbol: + raise ValueError( + "This Keras class method conversion tried to convert " + f"a method belonging to class {self.cls_symbol}, a class " + "that is not publicly exposed in the TensorFlow API. " + "To ensure cross-version compatibility of Keras models " + "that use op layers, only op layers produced from " + "public TensorFlow API symbols can be serialized." + ) + + config = { + "cls_symbol": self.cls_symbol, + "method_name": self.method_name, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() + symbol_name = config.pop("cls_symbol") + cls_ref = get_symbol_from_name(symbol_name) + if not cls_ref: + raise ValueError( + f"TensorFlow symbol `{symbol_name}` could not be found." + ) + + config["cls_ref"] = cls_ref + + return cls(**config) class KerasOpDispatcher(tf.__internal__.dispatch.GlobalOpDispatcher): - """A global dispatcher that allows building a functional model with TF Ops.""" + """A global dispatcher that allows building a functional model with TF + Ops.""" - def handle(self, op, args, kwargs): - """Handle the specified operation with the specified arguments.""" - if any( - isinstance(x, keras_tensor.KerasTensor) - for x in tf.nest.flatten([args, kwargs])): - return TFOpLambda(op)(*args, **kwargs) - else: - return self.NOT_SUPPORTED + def handle(self, op, args, kwargs): + """Handle the specified operation with the specified arguments.""" + if any( + isinstance(x, keras_tensor.KerasTensor) + for x in tf.nest.flatten([args, kwargs]) + ): + return TFOpLambda(op)(*args, **kwargs) + else: + return self.NOT_SUPPORTED KerasOpDispatcher().register() class InstanceProperty(Layer): - """Wraps an instance property access (e.g. + """Wraps an instance property access (e.g. - `x.foo`) in a Keras Layer. + `x.foo`) in a Keras Layer. - This layer takes an attribute name `attr_name` in the constructor and, - when called on input tensor `obj` returns `obj.attr_name`. + This layer takes an attribute name `attr_name` in the constructor and, + when called on input tensor `obj` returns `obj.attr_name`. - KerasTensors specialized for specific extension types use it to - represent instance property accesses on the represented object in the - case where the property needs to be dynamically accessed as opposed to - being statically computed from the typespec, e.g. + KerasTensors specialized for specific extension types use it to + represent instance property accesses on the represented object in the + case where the property needs to be dynamically accessed as opposed to + being statically computed from the typespec, e.g. - x = keras.Input(..., ragged=True) - out = x.flat_values - """ + x = keras.Input(..., ragged=True) + out = x.flat_values + """ - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, attr_name, **kwargs): - self.attr_name = attr_name + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__(self, attr_name, **kwargs): + self.attr_name = attr_name - if 'name' not in kwargs: - kwargs['name'] = backend.unique_object_name( - 'input.' + self.attr_name, zero_based=True, avoid_observed_names=True) - kwargs['autocast'] = False + if "name" not in kwargs: + kwargs["name"] = backend.unique_object_name( + "input." + self.attr_name, + zero_based=True, + avoid_observed_names=True, + ) + kwargs["autocast"] = False - # Do not individually trace op layers in the SavedModel. - self._must_restore_from_config = True + # Do not individually trace op layers in the SavedModel. + self._must_restore_from_config = True - super().__init__(**kwargs) + super().__init__(**kwargs) - # Preserve all argument data structures when saving/loading a config - # (e.g., don't unnest lists that contain one element) - self._preserve_input_structure_in_config = True + # Preserve all argument data structures when saving/loading a config + # (e.g., don't unnest lists that contain one element) + self._preserve_input_structure_in_config = True - def call(self, obj): - return getattr(obj, self.attr_name) + def call(self, obj): + return getattr(obj, self.attr_name) - def get_config(self): - config = {'attr_name': self.attr_name} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"attr_name": self.attr_name} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - @classmethod - def from_config(cls, config, custom_objects=None): - return cls(**config) + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) class InstanceMethod(InstanceProperty): - """Wraps an instance method access (e.g. `x.foo(arg)` in a Keras Layer. + """Wraps an instance method access (e.g. `x.foo(arg)` in a Keras Layer. - This layer takes an attribute name `attr_name` in the constructor and, - when called on input tensor `obj` with additional arguments `args` and - `kwargs` returns `obj.attr_name(*args, **kwargs)`. + This layer takes an attribute name `attr_name` in the constructor and, + when called on input tensor `obj` with additional arguments `args` and + `kwargs` returns `obj.attr_name(*args, **kwargs)`. - KerasTensors specialized for specific extension types use it to - represent dynamic instance method calls on the represented object, e.g. + KerasTensors specialized for specific extension types use it to + represent dynamic instance method calls on the represented object, e.g. - x = keras.Input(..., ragged=True) - new_values = keras.Input(...) - out = x.with_values(new_values) - """ + x = keras.Input(..., ragged=True) + new_values = keras.Input(...) + out = x.with_values(new_values) + """ - def call(self, obj, args, kwargs): - method = getattr(obj, self.attr_name) - return method(*args, **kwargs) + def call(self, obj, args, kwargs): + method = getattr(obj, self.attr_name) + return method(*args, **kwargs) class TFOpLambda(Layer): - """Wraps TF API symbols in a `Layer` object. - - It is inserted by the Functional API construction whenever users call - a supported TF symbol on KerasTensors. - - Like Lambda layers, this layer tries to raise warnings when it detects users - explicitly use variables in the call. (To let them know - that the layer will not capture the variables). - - This is useful in the case where users do something like: - x = keras.Input(...) - y = tf.Variable(...) - out = x * tf_variable - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, function, **kwargs): - self.function = function - self.symbol = ( - get_canonical_name_for_symbol( - self.function, add_prefix_to_v1_names=True) or - get_canonical_name_for_symbol( - self.function, api_name='keras', add_prefix_to_v1_names=True)) - if 'name' not in kwargs: - # Generate a name. - # TFOpLambda layers avoid already-observed names, - # because users cannot easily control the generated names. - # Without this avoidance, users would be more likely to run - # into unavoidable duplicate layer name collisions. - # (For standard layers users could just set `name` when creating the - # layer to work around a collision, but they can't do that for - # auto-generated layers) - if self.symbol: - name = 'tf.' + self.symbol - else: - name = self.function.__name__ - kwargs['name'] = backend.unique_object_name( - name, zero_based=True, avoid_observed_names=True) - kwargs['autocast'] = False - - # Decorate the function to produce this layer's call method - def _call_wrapper(*args, **kwargs): - return self._call_wrapper(*args, **kwargs) - - self.call = tf.__internal__.decorator.make_decorator( - function, _call_wrapper) - - # Do not individually trace op layers in the SavedModel. - self._must_restore_from_config = True - - super().__init__(**kwargs) - - # Preserve all argument data structures when saving/loading a config - # (e.g., don't unnest lists that contain one element) - self._preserve_input_structure_in_config = True - - # Warning on every invocation will be quite irksome in Eager mode. - self._already_warned = False - - self._call_spec.expects_training_arg = False - self._call_spec.expects_mask_arg = False - - def _call_wrapper(self, *args, **kwargs): - created_variables = [] - - def _variable_creator(next_creator, **creator_kwargs): - var = next_creator(**creator_kwargs) - created_variables.append(var) - return var - - with tf.GradientTape(watch_accessed_variables=True) as tape, \ - tf.variable_creator_scope(_variable_creator): - # We explicitly drop `name` arguments here, - # to guard against the case where an op explicitly has a - # `name` passed (which is susceptible to producing - # multiple ops w/ the same name when the layer is reused) - kwargs.pop('name', None) - result = self.function(*args, **kwargs) - self._check_variables(created_variables, tape.watched_variables()) - return result - - def _check_variables(self, created_variables, accessed_variables): - if not created_variables and not accessed_variables: - # In the common case that a Lambda layer does not touch a Variable, we - # don't want to incur the runtime cost of assembling any state used for - # checking only to immediately discard it. - return - - tracked_weights = set(v.ref() for v in self.weights) - untracked_new_vars = [ - v for v in created_variables if v.ref() not in tracked_weights - ] - if untracked_new_vars: - variable_str = '\n'.join(' {}'.format(i) for i in untracked_new_vars) - raise ValueError( - 'The following Variables were created within a Lambda layer ' - f'({self.name}) but are not tracked by said layer: {variable_str}\n' - 'The layer cannot safely ensure proper Variable reuse ' - 'across multiple calls, and consequently this behavior is disallowed ' - 'for safety reasons. Lambda layers are not well suited for stateful ' - 'computation; instead, writing a subclassed Layer is the recommend ' - 'way to define layers with Variables.') - - untracked_used_vars = [ - v for v in accessed_variables if v.ref() not in tracked_weights - ] - if untracked_used_vars and not self._already_warned: - variable_str = '\n'.join(' {}'.format(i) for i in untracked_used_vars) - self._warn( - 'The following Variables were used in a Lambda layer\'s call ' - f'({self.name}), but are not present in its tracked objects: ' - f'{variable_str}. This is a strong indication that the Lambda layer ' - 'should be rewritten as a subclassed Layer.') - self._already_warned = True - - def _warn(self, msg): - # This method will be overridden in a unit test to raise an error, because - # self.assertWarns is not universally implemented. - return tf_logging.warning(msg) - - def get_config(self): - if not self.symbol: - raise ValueError( - f'This Keras op layer was generated from {self.function}, a method ' - 'that is not publicly exposed in the TensorFlow API. This ' - 'may have happened if the method was explicitly ' - 'decorated to add dispatching support, and it was used ' - 'during Functional model construction. ' - 'To ensure cross-version compatibility of Keras models ' - 'that use op layers, only op layers produced from ' - 'public TensorFlow API symbols can be serialized.') - config = {'function': self.symbol} - - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() - symbol_name = config['function'] - function = get_symbol_from_name(symbol_name) - if not function: - raise ValueError(f'TF symbol `{symbol_name}` could not be found.') - - config['function'] = function - - return cls(**config) - - -def _delegate_property(keras_tensor_cls, property_name): # pylint: disable=invalid-name - """Register property on a KerasTensor class. - - Calling this multiple times with the same arguments should be a no-op. - - This method exposes a property on the KerasTensor class that will use an - `InstanceProperty` layer to access the property on the represented - intermediate values in the model. - - Args: - keras_tensor_cls: The KerasTensor subclass that should expose the property. - property_name: The name of the property to expose and delegate to the - represented (Composite)Tensor. - """ - # We use a lambda because we can't create a Keras layer at import time - # due to dynamic layer class versioning. - property_access = property(lambda self: InstanceProperty(property_name)(self)) # pylint: disable=unnecessary-lambda - setattr(keras_tensor_cls, property_name, property_access) - - -def _delegate_method(keras_tensor_cls, method_name): # pylint: disable=invalid-name - """Register method on a KerasTensor class. - - Calling this function times with the same arguments should be a no-op. - - This method exposes an instance method on the KerasTensor class that will use - an `InstanceMethod` layer to run the desired method on the represented - intermediate values in the model. - - Args: - keras_tensor_cls: The KerasTensor subclass that should expose the property. - method_name: The name of the method to expose and delegate to the - represented (Composite)Tensor. - """ - - def delegate(self, *args, **kwargs): - return InstanceMethod(method_name)(self, args, kwargs) - - setattr(keras_tensor_cls, method_name, delegate) + """Wraps TF API symbols in a `Layer` object. + + It is inserted by the Functional API construction whenever users call + a supported TF symbol on KerasTensors. + + Like Lambda layers, this layer tries to raise warnings when it detects users + explicitly use variables in the call. (To let them know + that the layer will not capture the variables). + + This is useful in the case where users do something like: + x = keras.Input(...) + y = tf.Variable(...) + out = x * tf_variable + """ + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__(self, function, **kwargs): + self.function = function + self.symbol = get_canonical_name_for_symbol( + self.function, add_prefix_to_v1_names=True + ) or get_canonical_name_for_symbol( + self.function, api_name="keras", add_prefix_to_v1_names=True + ) + if "name" not in kwargs: + # Generate a name. + # TFOpLambda layers avoid already-observed names, + # because users cannot easily control the generated names. + # Without this avoidance, users would be more likely to run + # into unavoidable duplicate layer name collisions. + # (For standard layers users could just set `name` when creating the + # layer to work around a collision, but they can't do that for + # auto-generated layers) + if self.symbol: + name = "tf." + self.symbol + else: + name = self.function.__name__ + kwargs["name"] = backend.unique_object_name( + name, zero_based=True, avoid_observed_names=True + ) + kwargs["autocast"] = False + + # Decorate the function to produce this layer's call method + def _call_wrapper(*args, **kwargs): + return self._call_wrapper(*args, **kwargs) + + self.call = tf.__internal__.decorator.make_decorator( + function, _call_wrapper + ) + + # Do not individually trace op layers in the SavedModel. + self._must_restore_from_config = True + + super().__init__(**kwargs) + + # Preserve all argument data structures when saving/loading a config + # (e.g., don't unnest lists that contain one element) + self._preserve_input_structure_in_config = True + + # Warning on every invocation will be quite irksome in Eager mode. + self._already_warned = False + + self._call_spec.expects_training_arg = False + self._call_spec.expects_mask_arg = False + + def _call_wrapper(self, *args, **kwargs): + created_variables = [] + + def _variable_creator(next_creator, **creator_kwargs): + var = next_creator(**creator_kwargs) + created_variables.append(var) + return var + + with tf.GradientTape( + watch_accessed_variables=True + ) as tape, tf.variable_creator_scope(_variable_creator): + # We explicitly drop `name` arguments here, + # to guard against the case where an op explicitly has a + # `name` passed (which is susceptible to producing + # multiple ops w/ the same name when the layer is reused) + kwargs.pop("name", None) + result = self.function(*args, **kwargs) + self._check_variables(created_variables, tape.watched_variables()) + return result + + def _check_variables(self, created_variables, accessed_variables): + if not created_variables and not accessed_variables: + # In the common case that a Lambda layer does not touch a Variable, + # we don't want to incur the runtime cost of assembling any state + # used for checking only to immediately discard it. + return + + tracked_weights = set(v.ref() for v in self.weights) + untracked_new_vars = [ + v for v in created_variables if v.ref() not in tracked_weights + ] + if untracked_new_vars: + variable_str = "\n".join(f" {i}" for i in untracked_new_vars) + raise ValueError( + "The following Variables were created within a Lambda layer " + f"({self.name}) but are not tracked by said layer: " + f"{variable_str}\n" + "The layer cannot safely ensure proper Variable reuse " + "across multiple calls, and consequently this behavior " + "is disallowed for safety reasons. Lambda layers are " + "not well suited for stateful computation; instead, " + "writing a subclassed Layer is the recommend " + "way to define layers with Variables." + ) + + untracked_used_vars = [ + v for v in accessed_variables if v.ref() not in tracked_weights + ] + if untracked_used_vars and not self._already_warned: + variable_str = "\n".join(f" {i}" for i in untracked_used_vars) + self._warn( + "The following Variables were used in a Lambda layer's call " + f"({self.name}), but are not present in its tracked objects: " + f"{variable_str}. This is a strong indication that the Lambda " + "layer should be rewritten as a subclassed Layer." + ) + self._already_warned = True + + def _warn(self, msg): + # This method will be overridden in a unit test to raise an error, + # because self.assertWarns is not universally implemented. + return tf_logging.warning(msg) + + def get_config(self): + if not self.symbol: + raise ValueError( + f"This Keras op layer was generated from {self.function}, a " + "method that is not publicly exposed in the TensorFlow API. " + "This may have happened if the method was explicitly " + "decorated to add dispatching support, and it was used " + "during Functional model construction. " + "To ensure cross-version compatibility of Keras models " + "that use op layers, only op layers produced from " + "public TensorFlow API symbols can be serialized." + ) + config = {"function": self.symbol} + + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() + symbol_name = config["function"] + function = get_symbol_from_name(symbol_name) + if not function: + raise ValueError(f"TF symbol `{symbol_name}` could not be found.") + + config["function"] = function + + return cls(**config) + + +def _delegate_property(keras_tensor_cls, property_name): + """Register property on a KerasTensor class. + + Calling this multiple times with the same arguments should be a no-op. + + This method exposes a property on the KerasTensor class that will use an + `InstanceProperty` layer to access the property on the represented + intermediate values in the model. + + Args: + keras_tensor_cls: The KerasTensor subclass that should expose the + property. + property_name: The name of the property to expose and delegate to the + represented (Composite)Tensor. + """ + # We use a lambda because we can't create a Keras layer at import time + # due to dynamic layer class versioning. + property_access = property( + lambda self: InstanceProperty(property_name)(self) + ) + setattr(keras_tensor_cls, property_name, property_access) + + +def _delegate_method(keras_tensor_cls, method_name): + """Register method on a KerasTensor class. + + Calling this function times with the same arguments should be a no-op. + + This method exposes an instance method on the KerasTensor class that will + use an `InstanceMethod` layer to run the desired method on the represented + intermediate values in the model. + + Args: + keras_tensor_cls: The KerasTensor subclass that should expose the + property. + method_name: The name of the method to expose and delegate to the + represented (Composite)Tensor. + """ + + def delegate(self, *args, **kwargs): + return InstanceMethod(method_name)(self, args, kwargs) + + setattr(keras_tensor_cls, method_name, delegate) # We do not support the `uniform_row_length` property because it @@ -378,168 +405,177 @@ def delegate(self, *args, **kwargs): # never equal `None`, breaking code that expects it to be partially-static # in unpredictable ways. for ragged_property in [ - 'values', 'flat_values', 'row_splits', 'nested_row_splits' + "values", + "flat_values", + "row_splits", + "nested_row_splits", ]: - _delegate_property(keras_tensor.RaggedKerasTensor, ragged_property) + _delegate_property(keras_tensor.RaggedKerasTensor, ragged_property) for ragged_method_name in [ - 'value_rowids', - 'nested_value_rowids', - 'nrows', - 'row_starts', - 'row_limits', - 'row_lengths', - 'nested_row_lengths', - 'bounding_shape', - 'with_values', - 'with_flat_values', - 'with_row_splits_dtype', - 'merge_dims', - 'to_tensor', - 'to_sparse', + "value_rowids", + "nested_value_rowids", + "nrows", + "row_starts", + "row_limits", + "row_lengths", + "nested_row_lengths", + "bounding_shape", + "with_values", + "with_flat_values", + "with_row_splits_dtype", + "merge_dims", + "to_tensor", + "to_sparse", ]: - _delegate_method(keras_tensor.RaggedKerasTensor, ragged_method_name) + _delegate_method(keras_tensor.RaggedKerasTensor, ragged_method_name) for sparse_property in [ - 'indices', - 'values', - 'dense_shape', + "indices", + "values", + "dense_shape", ]: - _delegate_property(keras_tensor.SparseKerasTensor, sparse_property) + _delegate_property(keras_tensor.SparseKerasTensor, sparse_property) for sparse_method in [ - 'with_values', + "with_values", ]: - _delegate_method(keras_tensor.SparseKerasTensor, sparse_method) + _delegate_method(keras_tensor.SparseKerasTensor, sparse_method) class TFClassMethodDispatcher(tf.__internal__.dispatch.OpDispatcher): - """A class method dispatcher that allows building a functional model with TF class methods.""" + """A class method dispatcher that allows building a functional model with TF + class methods.""" - def __init__(self, cls, method_name): - self.cls = cls - self.method_name = method_name + def __init__(self, cls, method_name): + self.cls = cls + self.method_name = method_name - def handle(self, args, kwargs): - """Handle the specified operation with the specified arguments.""" - if any( - isinstance(x, keras_tensor.KerasTensor) - for x in tf.nest.flatten([args, kwargs])): - return ClassMethod(self.cls, self.method_name)(args[1:], kwargs) - else: - return self.NOT_SUPPORTED + def handle(self, args, kwargs): + """Handle the specified operation with the specified arguments.""" + if any( + isinstance(x, keras_tensor.KerasTensor) + for x in tf.nest.flatten([args, kwargs]) + ): + return ClassMethod(self.cls, self.method_name)(args[1:], kwargs) + else: + return self.NOT_SUPPORTED for ragged_class_method in [ - 'from_value_rowids', - 'from_row_splits', - 'from_row_lengths', - 'from_row_starts', - 'from_row_limits', - 'from_uniform_row_length', - 'from_nested_value_rowids', - 'from_nested_row_splits', - 'from_nested_row_lengths', - 'from_tensor', - 'from_sparse', + "from_value_rowids", + "from_row_splits", + "from_row_lengths", + "from_row_starts", + "from_row_limits", + "from_uniform_row_length", + "from_nested_value_rowids", + "from_nested_row_splits", + "from_nested_row_lengths", + "from_tensor", + "from_sparse", ]: - TFClassMethodDispatcher(tf.RaggedTensor, ragged_class_method).register( - getattr(tf.RaggedTensor, ragged_class_method)) + TFClassMethodDispatcher(tf.RaggedTensor, ragged_class_method).register( + getattr(tf.RaggedTensor, ragged_class_method) + ) class SlicingOpLambda(TFOpLambda): - """Wraps TF API symbols in a `Layer` object. - - It is inserted by the Functional API construction whenever users call - a supported TF symbol on KerasTensors. - - Like Lambda layers, this layer tries to raise warnings when it detects users - explicitly use variables in the call. (To let them know - that the layer will not capture the variables). - - This is useful in the case where users do something like: - x = keras.Input(...) - y = tf.Variable(...) - out = x * tf_variable - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, function, **kwargs): - super().__init__(function, **kwargs) - - original_call = self.call - - # Decorate the function to produce this layer's call method - def _call_wrapper(*args, **kwargs): - # Turn any slice dicts in the args back into `slice` objects. - # This conversion cannot use nest.flatten/map_structure, - # because dicts are flattened by nest while slices aren't. - # So, map_structure would only see the individual elements in the - # dict. - # This can't use map_structure_up_to either because the 'shallowness' of - # the shallow tree would have to vary depending on if only one dim or - # multiple are being sliced. - new_args = [] - for arg in args: - arg = _dict_to_slice(arg) - if isinstance(arg, (list, tuple)): - new_arg = [] - for sub_arg in arg: - new_arg.append(_dict_to_slice(sub_arg)) - arg = new_arg - new_args.append(arg) - - # Handle the kwargs too. - new_kwargs = {} - for key, value in kwargs.items(): - value = _dict_to_slice(value) - if isinstance(value, (list, tuple)): - new_value = [] - for v in value: - new_value.append(_dict_to_slice(v)) - value = new_value - new_kwargs[key] = value - - return original_call(*new_args, **new_kwargs) - - self.call = tf.__internal__.decorator.make_decorator( - original_call, _call_wrapper) + """Wraps TF API symbols in a `Layer` object. + + It is inserted by the Functional API construction whenever users call + a supported TF symbol on KerasTensors. + + Like Lambda layers, this layer tries to raise warnings when it detects users + explicitly use variables in the call. (To let them know + that the layer will not capture the variables). + + This is useful in the case where users do something like: + x = keras.Input(...) + y = tf.Variable(...) + out = x * tf_variable + """ + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__(self, function, **kwargs): + super().__init__(function, **kwargs) + + original_call = self.call + + # Decorate the function to produce this layer's call method + def _call_wrapper(*args, **kwargs): + # Turn any slice dicts in the args back into `slice` objects. + # This conversion cannot use nest.flatten/map_structure, + # because dicts are flattened by nest while slices aren't. + # So, map_structure would only see the individual elements in the + # dict. + # This can't use map_structure_up_to either because the + # 'shallowness' of the shallow tree would have to vary depending on + # if only one dim or multiple are being sliced. + new_args = [] + for arg in args: + arg = _dict_to_slice(arg) + if isinstance(arg, (list, tuple)): + new_arg = [] + for sub_arg in arg: + new_arg.append(_dict_to_slice(sub_arg)) + arg = new_arg + new_args.append(arg) + + # Handle the kwargs too. + new_kwargs = {} + for key, value in kwargs.items(): + value = _dict_to_slice(value) + if isinstance(value, (list, tuple)): + new_value = [] + for v in value: + new_value.append(_dict_to_slice(v)) + value = new_value + new_kwargs[key] = value + + return original_call(*new_args, **new_kwargs) + + self.call = tf.__internal__.decorator.make_decorator( + original_call, _call_wrapper + ) def _slice_to_dict(x): - if isinstance(x, slice): - return {'start': x.start, 'stop': x.stop, 'step': x.step} - return x + if isinstance(x, slice): + return {"start": x.start, "stop": x.stop, "step": x.step} + return x def _dict_to_slice(x): - if isinstance(x, dict): - return slice(x['start'], x['stop'], x['step']) - return x + if isinstance(x, dict): + return slice(x["start"], x["stop"], x["step"]) + return x class TFSlicingOpDispatcher(tf.__internal__.dispatch.OpDispatcher): - """A global dispatcher that allows building a functional model with TF Ops.""" + """A global dispatcher that allows building a functional model with TF + Ops.""" - def __init__(self, op): - self.op = op + def __init__(self, op): + self.op = op - def handle(self, args, kwargs): - """Handle the specified operation with the specified arguments.""" - args = tf.nest.map_structure(_slice_to_dict, args) - kwargs = tf.nest.map_structure(_slice_to_dict, kwargs) - if any( - isinstance(x, keras_tensor.KerasTensor) - for x in tf.nest.flatten([args, kwargs])): - return SlicingOpLambda(self.op)(*args, **kwargs) - else: - return self.NOT_SUPPORTED + def handle(self, args, kwargs): + """Handle the specified operation with the specified arguments.""" + args = tf.nest.map_structure(_slice_to_dict, args) + kwargs = tf.nest.map_structure(_slice_to_dict, kwargs) + if any( + isinstance(x, keras_tensor.KerasTensor) + for x in tf.nest.flatten([args, kwargs]) + ): + return SlicingOpLambda(self.op)(*args, **kwargs) + else: + return self.NOT_SUPPORTED for slicing_op in [ - tf.__operators__.getitem, # pylint: disable=protected-access + tf.__operators__.getitem, tf.compat.v1.boolean_mask, tf.boolean_mask, - tf.__operators__.ragged_getitem + tf.__operators__.ragged_getitem, ]: - TFSlicingOpDispatcher(slicing_op).register(slicing_op) + TFSlicingOpDispatcher(slicing_op).register(slicing_op) diff --git a/keras/layers/kernelized.py b/keras/layers/kernelized.py index 5f3b64a0c905..f8114bbb7c74 100644 --- a/keras/layers/kernelized.py +++ b/keras/layers/kernelized.py @@ -12,254 +12,275 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes + """Keras layers that implement explicit (approximate) kernel feature maps.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras import initializers from keras.engine import base_layer from keras.engine import input_spec + +# isort: off from tensorflow.python.util.tf_export import keras_export -_SUPPORTED_RBF_KERNEL_TYPES = ['gaussian', 'laplacian'] +_SUPPORTED_RBF_KERNEL_TYPES = ["gaussian", "laplacian"] -@keras_export('keras.layers.experimental.RandomFourierFeatures') +@keras_export("keras.layers.experimental.RandomFourierFeatures") class RandomFourierFeatures(base_layer.Layer): - r"""Layer that projects its inputs into a random feature space. - - This layer implements a mapping from input space to a space with `output_dim` - dimensions, which approximates shift-invariant kernels. A kernel function - `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for some function `k`. - Many popular Radial Basis Functions (RBF), including Gaussian and - Laplacian kernels, are shift-invariant. - - The implementation of this layer is based on the following paper: - ["Random Features for Large-Scale Kernel Machines"]( - https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) - by Ali Rahimi and Ben Recht. - - The distribution from which the parameters of the random features map (layer) - are sampled determines which shift-invariant kernel the layer approximates - (see paper for more details). You can use the distribution of your - choice. The layer supports out-of-the-box - approximations of the following two RBF kernels: - - - Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))` - - Laplacian: `K(x, y) = exp(-abs(x - y) / scale))` - - **Note:** Unlike what is described in the paper and unlike what is used in - the Scikit-Learn implementation, the output of this layer does not apply - the `sqrt(2 / D)` normalization factor. - - **Usage:** Typically, this layer is used to "kernelize" linear models by - applying a non-linear transformation (this layer) to the input features and - then training a linear model on top of the transformed features. Depending on - the loss function of the linear model, the composition of this layer and the - linear model results to models that are equivalent (up to approximation) to - kernel SVMs (for hinge loss), kernel logistic regression (for logistic loss), - kernel linear regression (for squared loss), etc. - - Examples: - - A kernel multinomial logistic regression model with Gaussian kernel for MNIST: - - ```python - model = keras.Sequential([ - keras.Input(shape=(784,)), - RandomFourierFeatures( - output_dim=4096, - scale=10., - kernel_initializer='gaussian'), - layers.Dense(units=10, activation='softmax'), - ]) - model.compile( - optimizer='adam', - loss='categorical_crossentropy', - metrics=['categorical_accuracy'] - ) - ``` - - A quasi-SVM classifier for MNIST: - - ```python - model = keras.Sequential([ - keras.Input(shape=(784,)), - RandomFourierFeatures( - output_dim=4096, - scale=10., - kernel_initializer='gaussian'), - layers.Dense(units=10), - ]) - model.compile( - optimizer='adam', - loss='hinge', - metrics=['categorical_accuracy'] - ) - ``` - - To use another kernel, just replace the layer creation line with: - - ```python - random_features_layer = RandomFourierFeatures( - output_dim=500, - kernel_initializer=, - scale=..., - ...) - ``` - - Args: - output_dim: Positive integer, the dimension of the layer's output, i.e., the - number of random features used to approximate the kernel. - kernel_initializer: Determines the distribution of the parameters of the - random features map (and therefore the kernel approximated by the layer). - It can be either a string identifier or a Keras `Initializer` instance. - Currently only 'gaussian' and 'laplacian' are supported string - identifiers (case insensitive). Note that the kernel matrix is not - trainable. - scale: For Gaussian and Laplacian kernels, this corresponds to a scaling - factor of the corresponding kernel approximated by the layer (see concrete - definitions above). When provided, it should be a positive float. If None, - a default value is used: if the kernel initializer is set to "gaussian", - `scale` defaults to `sqrt(input_dim / 2)`, otherwise, it defaults to 1.0. - Both the approximation error of the kernel and the classification quality - are sensitive to this parameter. If `trainable` is set to `True`, this - parameter is learned end-to-end during training and the provided value - serves as the initial value. - **Note:** When features from this layer are fed to a linear model, - by making `scale` trainable, the resulting optimization problem is - no longer convex (even if the loss function used by the linear model - is convex). - trainable: Whether the scaling parameter of the layer should be trainable. - Defaults to `False`. - name: String, name to use for this layer. - """ - - def __init__(self, - output_dim, - kernel_initializer='gaussian', - scale=None, - trainable=False, - name=None, - **kwargs): - if output_dim <= 0: - raise ValueError( - f'`output_dim` should be a positive integer. Received: {output_dim}') - if isinstance(kernel_initializer, str): - if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES: - raise ValueError( - f'Unsupported `kernel_initializer`: {kernel_initializer} ' - f'Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}') - if scale is not None and scale <= 0.0: - raise ValueError('When provided, `scale` should be a positive float. ' - f'Received: {scale}') - super().__init__( - trainable=trainable, name=name, **kwargs) - self.output_dim = output_dim - self.kernel_initializer = kernel_initializer - self.scale = scale - - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - # TODO(pmol): Allow higher dimension inputs. Currently the input is expected - # to have shape [batch_size, dimension]. - if input_shape.rank != 2: - raise ValueError( - 'The rank of the input tensor should be 2. ' - f'Received input with rank {input_shape.ndims} instead. ' - f'Full input shape received: {input_shape}') - if input_shape.dims[1].value is None: - raise ValueError( - 'The last dimension of the input tensor should be defined. ' - f'Found `None`. Full input shape received: {input_shape}') - self.input_spec = input_spec.InputSpec( - ndim=2, axes={1: input_shape.dims[1].value}) - input_dim = input_shape.dims[1].value - - kernel_initializer = _get_random_features_initializer( - self.kernel_initializer, shape=(input_dim, self.output_dim)) - - self.unscaled_kernel = self.add_weight( - name='unscaled_kernel', - shape=(input_dim, self.output_dim), - dtype=tf.float32, - initializer=kernel_initializer, - trainable=False) - - self.bias = self.add_weight( - name='bias', - shape=(self.output_dim,), - dtype=tf.float32, - initializer=initializers.RandomUniform(minval=0.0, maxval=2 * np.pi), - trainable=False) - - if self.scale is None: - self.scale = _get_default_scale(self.kernel_initializer, input_dim) - self.kernel_scale = self.add_weight( - name='kernel_scale', - shape=(1,), - dtype=tf.float32, - initializer=tf.compat.v1.constant_initializer(self.scale), - trainable=True, - constraint='NonNeg') - super().build(input_shape) - - def call(self, inputs): - inputs = tf.convert_to_tensor(inputs, dtype=self.dtype) - inputs = tf.cast(inputs, tf.float32) - kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel - outputs = tf.matmul(a=inputs, b=kernel) - outputs = tf.nn.bias_add(outputs, self.bias) - return tf.cos(outputs) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape) - input_shape = input_shape.with_rank(2) - if input_shape.dims[-1].value is None: - raise ValueError( - 'The last dimension of the input tensor should be defined. ' - f'Found `None`. Full input shape received: {input_shape}') - return input_shape[:-1].concatenate(self.output_dim) - - def get_config(self): - kernel_initializer = self.kernel_initializer - if not isinstance(kernel_initializer, str): - kernel_initializer = initializers.serialize(kernel_initializer) - config = { - 'output_dim': self.output_dim, - 'kernel_initializer': kernel_initializer, - 'scale': self.scale, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + r"""Layer that projects its inputs into a random feature space. + + This layer implements a mapping from input space to a space with + `output_dim` dimensions, which approximates shift-invariant kernels. A + kernel function `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for + some function `k`. Many popular Radial Basis Functions (RBF), including + Gaussian and Laplacian kernels, are shift-invariant. + + The implementation of this layer is based on the following paper: + ["Random Features for Large-Scale Kernel Machines"]( + https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) + by Ali Rahimi and Ben Recht. + + The distribution from which the parameters of the random features map + (layer) are sampled determines which shift-invariant kernel the layer + approximates (see paper for more details). You can use the distribution of + your choice. The layer supports out-of-the-box approximations of the + following two RBF kernels: + + - Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))` + - Laplacian: `K(x, y) = exp(-abs(x - y) / scale))` + + **Note:** Unlike what is described in the paper and unlike what is used in + the Scikit-Learn implementation, the output of this layer does not apply + the `sqrt(2 / D)` normalization factor. + + **Usage:** Typically, this layer is used to "kernelize" linear models by + applying a non-linear transformation (this layer) to the input features and + then training a linear model on top of the transformed features. Depending + on the loss function of the linear model, the composition of this layer and + the linear model results to models that are equivalent (up to approximation) + to kernel SVMs (for hinge loss), kernel logistic regression (for logistic + loss), kernel linear regression (for squared loss), etc. + + Examples: + + A kernel multinomial logistic regression model with Gaussian kernel for + MNIST: + + ```python + model = keras.Sequential([ + keras.Input(shape=(784,)), + RandomFourierFeatures( + output_dim=4096, + scale=10., + kernel_initializer='gaussian'), + layers.Dense(units=10, activation='softmax'), + ]) + model.compile( + optimizer='adam', + loss='categorical_crossentropy', + metrics=['categorical_accuracy'] + ) + ``` + + A quasi-SVM classifier for MNIST: + + ```python + model = keras.Sequential([ + keras.Input(shape=(784,)), + RandomFourierFeatures( + output_dim=4096, + scale=10., + kernel_initializer='gaussian'), + layers.Dense(units=10), + ]) + model.compile( + optimizer='adam', + loss='hinge', + metrics=['categorical_accuracy'] + ) + ``` + + To use another kernel, just replace the layer creation line with: + + ```python + random_features_layer = RandomFourierFeatures( + output_dim=500, + kernel_initializer=, + scale=..., + ...) + ``` + + Args: + output_dim: Positive integer, the dimension of the layer's output, i.e., + the number of random features used to approximate the kernel. + kernel_initializer: Determines the distribution of the parameters of the + random features map (and therefore the kernel approximated by the + layer). It can be either a string identifier or a Keras `Initializer` + instance. Currently only 'gaussian' and 'laplacian' are supported + string identifiers (case insensitive). Note that the kernel matrix is + not trainable. + scale: For Gaussian and Laplacian kernels, this corresponds to a scaling + factor of the corresponding kernel approximated by the layer (see + concrete definitions above). When provided, it should be a positive + float. If None, a default value is used: if the kernel initializer is + set to "gaussian", `scale` becomes `sqrt(input_dim / 2)`, otherwise, + it becomes 1.0. Both the approximation error of the kernel and the + classification quality are sensitive to this parameter. If `trainable` + is set to `True`, this parameter is learned end-to-end during training + and the provided value serves as the initial value. + **Note:** When features from this layer are fed to a linear model, + by making `scale` trainable, the resulting optimization problem is + no longer convex (even if the loss function used by the linear model + is convex). + Defaults to `None`. + trainable: Whether the scaling parameter of the layer should be trainable. + Defaults to `False`. + name: String, name to use for this layer. + """ + + def __init__( + self, + output_dim, + kernel_initializer="gaussian", + scale=None, + trainable=False, + name=None, + **kwargs, + ): + if output_dim <= 0: + raise ValueError( + "`output_dim` should be a positive integer. " + f"Received: {output_dim}" + ) + if isinstance(kernel_initializer, str): + if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES: + raise ValueError( + f"Unsupported `kernel_initializer`: {kernel_initializer} " + f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}" + ) + if scale is not None and scale <= 0.0: + raise ValueError( + "When provided, `scale` should be a positive float. " + f"Received: {scale}" + ) + super().__init__(trainable=trainable, name=name, **kwargs) + self.output_dim = output_dim + self.kernel_initializer = kernel_initializer + self.scale = scale + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape) + # TODO(pmol): Allow higher dimension inputs. Currently the input is + # expected to have shape [batch_size, dimension]. + if input_shape.rank != 2: + raise ValueError( + "The rank of the input tensor should be 2. " + f"Received input with rank {input_shape.ndims} instead. " + f"Full input shape received: {input_shape}" + ) + if input_shape.dims[1].value is None: + raise ValueError( + "The last dimension of the input tensor should be defined. " + f"Found `None`. Full input shape received: {input_shape}" + ) + self.input_spec = input_spec.InputSpec( + ndim=2, axes={1: input_shape.dims[1].value} + ) + input_dim = input_shape.dims[1].value + + kernel_initializer = _get_random_features_initializer( + self.kernel_initializer, shape=(input_dim, self.output_dim) + ) + + self.unscaled_kernel = self.add_weight( + name="unscaled_kernel", + shape=(input_dim, self.output_dim), + dtype=tf.float32, + initializer=kernel_initializer, + trainable=False, + ) + + self.bias = self.add_weight( + name="bias", + shape=(self.output_dim,), + dtype=tf.float32, + initializer=initializers.RandomUniform( + minval=0.0, maxval=2 * np.pi + ), + trainable=False, + ) + + if self.scale is None: + self.scale = _get_default_scale(self.kernel_initializer, input_dim) + self.kernel_scale = self.add_weight( + name="kernel_scale", + shape=(1,), + dtype=tf.float32, + initializer=tf.compat.v1.constant_initializer(self.scale), + trainable=True, + constraint="NonNeg", + ) + super().build(input_shape) + + def call(self, inputs): + inputs = tf.convert_to_tensor(inputs, dtype=self.dtype) + inputs = tf.cast(inputs, tf.float32) + kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel + outputs = tf.matmul(a=inputs, b=kernel) + outputs = tf.nn.bias_add(outputs, self.bias) + return tf.cos(outputs) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape) + input_shape = input_shape.with_rank(2) + if input_shape.dims[-1].value is None: + raise ValueError( + "The last dimension of the input tensor should be defined. " + f"Found `None`. Full input shape received: {input_shape}" + ) + return input_shape[:-1].concatenate(self.output_dim) + + def get_config(self): + kernel_initializer = self.kernel_initializer + if not isinstance(kernel_initializer, str): + kernel_initializer = initializers.serialize(kernel_initializer) + config = { + "output_dim": self.output_dim, + "kernel_initializer": kernel_initializer, + "scale": self.scale, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) def _get_random_features_initializer(initializer, shape): - """Returns Initializer object for random features.""" + """Returns Initializer object for random features.""" - def _get_cauchy_samples(loc, scale, shape): - probs = np.random.uniform(low=0., high=1., size=shape) - return loc + scale * np.tan(np.pi * (probs - 0.5)) + def _get_cauchy_samples(loc, scale, shape): + probs = np.random.uniform(low=0.0, high=1.0, size=shape) + return loc + scale * np.tan(np.pi * (probs - 0.5)) - random_features_initializer = initializer - if isinstance(initializer, str): - if initializer.lower() == 'gaussian': - random_features_initializer = initializers.RandomNormal(stddev=1.0) - elif initializer.lower() == 'laplacian': - random_features_initializer = initializers.Constant( - _get_cauchy_samples(loc=0.0, scale=1.0, shape=shape)) + random_features_initializer = initializer + if isinstance(initializer, str): + if initializer.lower() == "gaussian": + random_features_initializer = initializers.RandomNormal(stddev=1.0) + elif initializer.lower() == "laplacian": + random_features_initializer = initializers.Constant( + _get_cauchy_samples(loc=0.0, scale=1.0, shape=shape) + ) - else: - raise ValueError( - f'Unsupported `kernel_initializer`: "{initializer}" ' - f'Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}') - return random_features_initializer + else: + raise ValueError( + f'Unsupported `kernel_initializer`: "{initializer}" ' + f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}" + ) + return random_features_initializer def _get_default_scale(initializer, input_dim): - if (isinstance(initializer, str) and - initializer.lower() == 'gaussian'): - return np.sqrt(input_dim / 2.0) - return 1.0 + if isinstance(initializer, str) and initializer.lower() == "gaussian": + return np.sqrt(input_dim / 2.0) + return 1.0 diff --git a/keras/layers/kernelized_test.py b/keras/layers/kernelized_test.py index 5f48d9864f75..33835ccd5faf 100644 --- a/keras/layers/kernelized_test.py +++ b/keras/layers/kernelized_test.py @@ -14,372 +14,440 @@ # ============================================================================== """Tests for kernelized.py.""" -import tensorflow.compat.v2 as tf - import functools import math import os import shutil -from absl.testing import parameterized import numpy as np -from tensorflow.python.framework import test_util as tf_test_utils +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import backend as keras_backend -from keras.testing_infra import test_combinations from keras import initializers -from keras.testing_infra import test_utils from keras.engine import base_layer_utils from keras.engine import input_layer from keras.engine import training from keras.layers import kernelized as kernel_layers -from keras.saving import save +from keras.saving.legacy import save +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils from keras.utils import kernelized_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + def _exact_gaussian(stddev): - return functools.partial( - kernelized_utils.exact_gaussian_kernel, stddev=stddev) + return functools.partial( + kernelized_utils.exact_gaussian_kernel, stddev=stddev + ) def _exact_laplacian(stddev): - return functools.partial( - kernelized_utils.exact_laplacian_kernel, stddev=stddev) + return functools.partial( + kernelized_utils.exact_laplacian_kernel, stddev=stddev + ) -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class RandomFourierFeaturesTest(tf.test.TestCase, parameterized.TestCase): - - def _assert_all_close(self, expected, actual, atol=0.001): - if not tf.executing_eagerly(): - with self.cached_session() as sess: - keras_backend._initialize_variables(sess) - self.assertAllClose(expected, actual, atol=atol) - else: - self.assertAllClose(expected, actual, atol=atol) - - @test_utils.run_v2_only - def test_state_saving_and_loading(self): - with self.cached_session(): - input_data = np.random.random((1, 2)) - rff_layer = kernel_layers.RandomFourierFeatures(output_dim=10, scale=3.0) - inputs = input_layer.Input((2,)) - outputs = rff_layer(inputs) - model = training.Model(inputs, outputs) - output_data = model.predict(input_data) - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - saved_model_dir = os.path.join(temp_dir, 'rff_model') - model.save(saved_model_dir) - new_model = save.load_model(saved_model_dir) - new_output_data = new_model.predict(input_data) - self.assertAllClose(output_data, new_output_data, atol=1e-4) - - def test_invalid_output_dim(self): - with self.assertRaisesRegex( - ValueError, '`output_dim` should be a positive integer'): - _ = kernel_layers.RandomFourierFeatures(output_dim=-3, scale=2.0) - - def test_unsupported_kernel_type(self): - with self.assertRaisesRegex( - ValueError, 'Unsupported `kernel_initializer`'): - _ = kernel_layers.RandomFourierFeatures( - 3, 'unsupported_kernel', stddev=2.0) - - def test_invalid_scale(self): - with self.assertRaisesRegex( - ValueError, - 'When provided, `scale` should be a positive float'): - _ = kernel_layers.RandomFourierFeatures(output_dim=10, scale=0.0) - - def test_invalid_input_shape(self): - inputs = tf.random.uniform((3, 2, 4), seed=1) - rff_layer = kernel_layers.RandomFourierFeatures(output_dim=10, scale=3.0) - with self.assertRaisesRegex( - ValueError, - 'The rank of the input tensor should be 2'): - _ = rff_layer(inputs) - - @parameterized.named_parameters( - ('gaussian', 'gaussian', 10.0, False), - ('random', tf.compat.v1.random_uniform_initializer, 1.0, True)) - def test_random_features_properties(self, initializer, scale, trainable): - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=10, - kernel_initializer=initializer, - scale=scale, - trainable=trainable) - self.assertEqual(rff_layer.output_dim, 10) - self.assertEqual(rff_layer.kernel_initializer, initializer) - self.assertEqual(rff_layer.scale, scale) - self.assertEqual(rff_layer.trainable, trainable) - - @parameterized.named_parameters(('gaussian', 'gaussian', False), - ('laplacian', 'laplacian', True), - ('other', tf.compat.v1.ones_initializer, True)) - def test_call(self, initializer, trainable): - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=10, - kernel_initializer=initializer, - scale=1.0, - trainable=trainable, - name='random_fourier_features') - inputs = tf.random.uniform((3, 2), seed=1) - outputs = rff_layer(inputs) - self.assertListEqual([3, 10], outputs.shape.as_list()) - num_trainable_vars = 1 if trainable else 0 - self.assertLen(rff_layer.non_trainable_variables, 3 - num_trainable_vars) - - @tf_test_utils.assert_no_new_pyobjects_executing_eagerly - def test_no_eager_Leak(self): - # Tests that repeatedly constructing and building a Layer does not leak - # Python objects. - inputs = tf.random.uniform((5, 4), seed=1) - kernel_layers.RandomFourierFeatures(output_dim=4, name='rff')(inputs) - kernel_layers.RandomFourierFeatures(output_dim=10, scale=2.0)(inputs) - - def test_output_shape(self): - inputs = tf.random.uniform((3, 2), seed=1) - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=7, name='random_fourier_features', trainable=True) - outputs = rff_layer(inputs) - self.assertEqual([3, 7], outputs.shape.as_list()) - - @parameterized.named_parameters( - ('gaussian', 'gaussian'), ('laplacian', 'laplacian'), - ('other', tf.compat.v1.random_uniform_initializer)) - def test_call_on_placeholder(self, initializer): - with tf.Graph().as_default(): - inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None]) - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=5, - kernel_initializer=initializer, - name='random_fourier_features') - with self.assertRaisesRegex( - ValueError, - 'The last dimension of the input tensor should be defined'): - rff_layer(inputs) - - inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, None]) - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=5, - kernel_initializer=initializer, - name='random_fourier_features') - with self.assertRaisesRegex( - ValueError, - 'The last dimension of the input tensor should be defined'): - rff_layer(inputs) - - inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3]) - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=5, name='random_fourier_features') - rff_layer(inputs) - - @parameterized.named_parameters(('gaussian', 10, 'gaussian', 2.0), - ('laplacian', 5, 'laplacian', None), - ('other', 10, tf.compat.v1.ones_initializer, 1.0)) - def test_compute_output_shape(self, output_dim, initializer, scale): - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim, initializer, scale=scale, name='rff') - with self.assertRaises(ValueError): - rff_layer.compute_output_shape(tf.TensorShape(None)) - with self.assertRaises(ValueError): - rff_layer.compute_output_shape(tf.TensorShape([])) - with self.assertRaises(ValueError): - rff_layer.compute_output_shape(tf.TensorShape([3])) - with self.assertRaises(ValueError): - rff_layer.compute_output_shape(tf.TensorShape([3, 2, 3])) - - with self.assertRaisesRegex( - ValueError, 'The last dimension of the input tensor should be defined'): - rff_layer.compute_output_shape(tf.TensorShape([3, None])) - - self.assertEqual([None, output_dim], - rff_layer.compute_output_shape((None, 3)).as_list()) - self.assertEqual([None, output_dim], - rff_layer.compute_output_shape( - tf.TensorShape([None, 2])).as_list()) - self.assertEqual([4, output_dim], - rff_layer.compute_output_shape((4, 1)).as_list()) - - @parameterized.named_parameters( - ('gaussian', 10, 'gaussian', 3.0, False), - ('laplacian', 5, 'laplacian', 5.5, True), - ('other', 7, tf.compat.v1.random_uniform_initializer(), None, True)) - def test_get_config(self, output_dim, initializer, scale, trainable): - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim, - initializer, - scale=scale, - trainable=trainable, - name='random_fourier_features', + def _assert_all_close(self, expected, actual, atol=0.001): + if not tf.executing_eagerly(): + with self.cached_session() as sess: + keras_backend._initialize_variables(sess) + self.assertAllClose(expected, actual, atol=atol) + else: + self.assertAllClose(expected, actual, atol=atol) + + @test_utils.run_v2_only + def test_state_saving_and_loading(self): + with self.cached_session(): + input_data = np.random.random((1, 2)) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=10, scale=3.0 + ) + inputs = input_layer.Input((2,)) + outputs = rff_layer(inputs) + model = training.Model(inputs, outputs) + output_data = model.predict(input_data) + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + saved_model_dir = os.path.join(temp_dir, "rff_model") + model.save(saved_model_dir) + new_model = save.load_model(saved_model_dir) + new_output_data = new_model.predict(input_data) + self.assertAllClose(output_data, new_output_data, atol=1e-4) + + def test_invalid_output_dim(self): + with self.assertRaisesRegex( + ValueError, "`output_dim` should be a positive integer" + ): + _ = kernel_layers.RandomFourierFeatures(output_dim=-3, scale=2.0) + + def test_unsupported_kernel_type(self): + with self.assertRaisesRegex( + ValueError, "Unsupported `kernel_initializer`" + ): + _ = kernel_layers.RandomFourierFeatures( + 3, "unsupported_kernel", stddev=2.0 + ) + + def test_invalid_scale(self): + with self.assertRaisesRegex( + ValueError, "When provided, `scale` should be a positive float" + ): + _ = kernel_layers.RandomFourierFeatures(output_dim=10, scale=0.0) + + def test_invalid_input_shape(self): + inputs = tf.random.uniform((3, 2, 4), seed=1) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=10, scale=3.0 + ) + with self.assertRaisesRegex( + ValueError, "The rank of the input tensor should be 2" + ): + _ = rff_layer(inputs) + + @parameterized.named_parameters( + ("gaussian", "gaussian", 10.0, False), + ("random", tf.compat.v1.random_uniform_initializer, 1.0, True), + ) + def test_random_features_properties(self, initializer, scale, trainable): + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=10, + kernel_initializer=initializer, + scale=scale, + trainable=trainable, + ) + self.assertEqual(rff_layer.output_dim, 10) + self.assertEqual(rff_layer.kernel_initializer, initializer) + self.assertEqual(rff_layer.scale, scale) + self.assertEqual(rff_layer.trainable, trainable) + + @parameterized.named_parameters( + ("gaussian", "gaussian", False), + ("laplacian", "laplacian", True), + ("other", tf.compat.v1.ones_initializer, True), + ) + def test_call(self, initializer, trainable): + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=10, + kernel_initializer=initializer, + scale=1.0, + trainable=trainable, + name="random_fourier_features", + ) + inputs = tf.random.uniform((3, 2), seed=1) + outputs = rff_layer(inputs) + self.assertListEqual([3, 10], outputs.shape.as_list()) + num_trainable_vars = 1 if trainable else 0 + self.assertLen( + rff_layer.non_trainable_variables, 3 - num_trainable_vars + ) + + @tf_test_utils.assert_no_new_pyobjects_executing_eagerly + def test_no_eager_Leak(self): + # Tests that repeatedly constructing and building a Layer does not leak + # Python objects. + inputs = tf.random.uniform((5, 4), seed=1) + kernel_layers.RandomFourierFeatures(output_dim=4, name="rff")(inputs) + kernel_layers.RandomFourierFeatures(output_dim=10, scale=2.0)(inputs) + + def test_output_shape(self): + inputs = tf.random.uniform((3, 2), seed=1) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=7, name="random_fourier_features", trainable=True + ) + outputs = rff_layer(inputs) + self.assertEqual([3, 7], outputs.shape.as_list()) + + @parameterized.named_parameters( + ("gaussian", "gaussian"), + ("laplacian", "laplacian"), + ("other", tf.compat.v1.random_uniform_initializer), + ) + def test_call_on_placeholder(self, initializer): + with tf.Graph().as_default(): + inputs = tf.compat.v1.placeholder( + dtype=tf.float32, shape=[None, None] + ) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=5, + kernel_initializer=initializer, + name="random_fourier_features", + ) + with self.assertRaisesRegex( + ValueError, + "The last dimension of the input tensor should be defined", + ): + rff_layer(inputs) + + inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, None]) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=5, + kernel_initializer=initializer, + name="random_fourier_features", + ) + with self.assertRaisesRegex( + ValueError, + "The last dimension of the input tensor should be defined", + ): + rff_layer(inputs) + + inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3]) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=5, name="random_fourier_features" + ) + rff_layer(inputs) + + @parameterized.named_parameters( + ("gaussian", 10, "gaussian", 2.0), + ("laplacian", 5, "laplacian", None), + ("other", 10, tf.compat.v1.ones_initializer, 1.0), + ) + def test_compute_output_shape(self, output_dim, initializer, scale): + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim, initializer, scale=scale, name="rff" + ) + with self.assertRaises(ValueError): + rff_layer.compute_output_shape(tf.TensorShape(None)) + with self.assertRaises(ValueError): + rff_layer.compute_output_shape(tf.TensorShape([])) + with self.assertRaises(ValueError): + rff_layer.compute_output_shape(tf.TensorShape([3])) + with self.assertRaises(ValueError): + rff_layer.compute_output_shape(tf.TensorShape([3, 2, 3])) + + with self.assertRaisesRegex( + ValueError, + "The last dimension of the input tensor should be defined", + ): + rff_layer.compute_output_shape(tf.TensorShape([3, None])) + + self.assertEqual( + [None, output_dim], + rff_layer.compute_output_shape((None, 3)).as_list(), + ) + self.assertEqual( + [None, output_dim], + rff_layer.compute_output_shape(tf.TensorShape([None, 2])).as_list(), + ) + self.assertEqual( + [4, output_dim], rff_layer.compute_output_shape((4, 1)).as_list() + ) + + @parameterized.named_parameters( + ("gaussian", 10, "gaussian", 3.0, False), + ("laplacian", 5, "laplacian", 5.5, True), + ("other", 7, tf.compat.v1.random_uniform_initializer(), None, True), + ) + def test_get_config(self, output_dim, initializer, scale, trainable): + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim, + initializer, + scale=scale, + trainable=trainable, + name="random_fourier_features", + ) + expected_initializer = initializer + if not isinstance(initializer, str): + expected_initializer = initializers.serialize(initializer) + + expected_dtype = ( + "float32" if base_layer_utils.v2_dtype_behavior_enabled() else None + ) + expected_config = { + "output_dim": output_dim, + "kernel_initializer": expected_initializer, + "scale": scale, + "name": "random_fourier_features", + "trainable": trainable, + "dtype": expected_dtype, + } + self.assertLen(expected_config, len(rff_layer.get_config())) + self.assertSameElements( + list(expected_config.items()), list(rff_layer.get_config().items()) + ) + + @parameterized.named_parameters( + ("gaussian", 5, "gaussian", None, True), + ("laplacian", 5, "laplacian", 5.5, False), + ("other", 7, tf.compat.v1.ones_initializer(), 2.0, True), + ) + def test_from_config(self, output_dim, initializer, scale, trainable): + model_config = { + "output_dim": output_dim, + "kernel_initializer": initializer, + "scale": scale, + "trainable": trainable, + "name": "random_fourier_features", + } + rff_layer = kernel_layers.RandomFourierFeatures.from_config( + model_config + ) + self.assertEqual(rff_layer.output_dim, output_dim) + self.assertEqual(rff_layer.kernel_initializer, initializer) + self.assertEqual(rff_layer.scale, scale) + self.assertEqual(rff_layer.trainable, trainable) + + inputs = tf.random.uniform((3, 2), seed=1) + outputs = rff_layer(inputs) + self.assertListEqual([3, output_dim], outputs.shape.as_list()) + num_trainable_vars = 1 if trainable else 0 + self.assertLen(rff_layer.trainable_variables, num_trainable_vars) + if trainable: + self.assertEqual( + "random_fourier_features/kernel_scale:0", + rff_layer.trainable_variables[0].name, + ) + self.assertLen( + rff_layer.non_trainable_variables, 3 - num_trainable_vars + ) + + @parameterized.named_parameters( + ("gaussian", 10, "gaussian", 3.0, True), + ("laplacian", 5, "laplacian", 5.5, False), + ("other", 10, tf.compat.v1.random_uniform_initializer(), None, True), + ) + def test_same_random_features_params_reused( + self, output_dim, initializer, scale, trainable + ): + """Applying the layer on the same input twice gives the same output.""" + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=output_dim, + kernel_initializer=initializer, + scale=scale, + trainable=trainable, + name="random_fourier_features", + ) + inputs = tf.constant(np.random.uniform(low=-1.0, high=1.0, size=(2, 4))) + output1 = rff_layer(inputs) + output2 = rff_layer(inputs) + self._assert_all_close(output1, output2) + + @parameterized.named_parameters( + ("gaussian", "gaussian", 5.0), + ("laplacian", "laplacian", 3.0), + ("other", tf.compat.v1.random_uniform_initializer(), 5.0), + ) + def test_different_params_similar_approximation(self, initializer, scale): + tf.compat.v1.set_random_seed(12345) + rff_layer1 = kernel_layers.RandomFourierFeatures( + output_dim=3000, + kernel_initializer=initializer, + scale=scale, + name="rff1", + ) + rff_layer2 = kernel_layers.RandomFourierFeatures( + output_dim=2000, + kernel_initializer=initializer, + scale=scale, + name="rff2", + ) + # Two distinct inputs. + x = tf.constant([[1.0, -1.0, 0.5]]) + y = tf.constant([[-1.0, 1.0, 1.0]]) + + # Apply both layers to both inputs. + output_x1 = math.sqrt(2.0 / 3000.0) * rff_layer1(x) + output_y1 = math.sqrt(2.0 / 3000.0) * rff_layer1(y) + output_x2 = math.sqrt(2.0 / 2000.0) * rff_layer2(x) + output_y2 = math.sqrt(2.0 / 2000.0) * rff_layer2(y) + + # Compute the inner products of the outputs (on inputs x and y) for both + # layers. For any fixed random features layer rff_layer, and inputs x, + # y, rff_layer(x)^T * rff_layer(y) ~= K(x,y) up to a normalization + # factor. + approx_kernel1 = kernelized_utils.inner_product(output_x1, output_y1) + approx_kernel2 = kernelized_utils.inner_product(output_x2, output_y2) + self._assert_all_close(approx_kernel1, approx_kernel2, atol=0.08) + + @parameterized.named_parameters( + ("gaussian", "gaussian", 5.0, _exact_gaussian(stddev=5.0)), + ("laplacian", "laplacian", 20.0, _exact_laplacian(stddev=20.0)), + ) + def test_bad_kernel_approximation( + self, initializer, scale, exact_kernel_fn + ): + """Approximation is bad when output dimension is small.""" + # Two distinct inputs. + x = tf.constant([[1.0, -1.0, 0.5]]) + y = tf.constant([[-1.0, 1.0, 1.0]]) + + small_output_dim = 10 + tf.compat.v1.set_random_seed(1234) + # Initialize layer. + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=small_output_dim, + kernel_initializer=initializer, + scale=scale, + name="random_fourier_features", + ) + + # Apply layer to both inputs. + output_x = math.sqrt(2.0 / small_output_dim) * rff_layer(x) + output_y = math.sqrt(2.0 / small_output_dim) * rff_layer(y) + + # The inner products of the outputs (on inputs x and y) approximates the + # real value of the RBF kernel but poorly since the output dimension of + # the layer is small. + exact_kernel_value = exact_kernel_fn(x, y) + approx_kernel_value = kernelized_utils.inner_product(output_x, output_y) + abs_error = tf.abs(exact_kernel_value - approx_kernel_value) + if not tf.executing_eagerly(): + with self.cached_session() as sess: + keras_backend._initialize_variables(sess) + abs_error_eval = sess.run([abs_error]) + self.assertGreater(abs_error_eval[0][0], 0.01) + self.assertLess(abs_error_eval[0][0], 0.5) + else: + self.assertGreater(abs_error, 0.01) + self.assertLess(abs_error, 0.5) + + @parameterized.named_parameters( + ("gaussian", "gaussian", 5.0, _exact_gaussian(stddev=5.0)), + ("laplacian", "laplacian", 10.0, _exact_laplacian(stddev=10.0)), ) - expected_initializer = initializer - if not isinstance(initializer, str): - expected_initializer = initializers.serialize(initializer) - - expected_dtype = ( - 'float32' if base_layer_utils.v2_dtype_behavior_enabled() else None) - expected_config = { - 'output_dim': output_dim, - 'kernel_initializer': expected_initializer, - 'scale': scale, - 'name': 'random_fourier_features', - 'trainable': trainable, - 'dtype': expected_dtype, - } - self.assertLen(expected_config, len(rff_layer.get_config())) - self.assertSameElements( - list(expected_config.items()), list(rff_layer.get_config().items())) - - @parameterized.named_parameters( - ('gaussian', 5, 'gaussian', None, True), - ('laplacian', 5, 'laplacian', 5.5, False), - ('other', 7, tf.compat.v1.ones_initializer(), 2.0, True)) - def test_from_config(self, output_dim, initializer, scale, trainable): - model_config = { - 'output_dim': output_dim, - 'kernel_initializer': initializer, - 'scale': scale, - 'trainable': trainable, - 'name': 'random_fourier_features', - } - rff_layer = kernel_layers.RandomFourierFeatures.from_config(model_config) - self.assertEqual(rff_layer.output_dim, output_dim) - self.assertEqual(rff_layer.kernel_initializer, initializer) - self.assertEqual(rff_layer.scale, scale) - self.assertEqual(rff_layer.trainable, trainable) - - inputs = tf.random.uniform((3, 2), seed=1) - outputs = rff_layer(inputs) - self.assertListEqual([3, output_dim], outputs.shape.as_list()) - num_trainable_vars = 1 if trainable else 0 - self.assertLen(rff_layer.trainable_variables, num_trainable_vars) - if trainable: - self.assertEqual('random_fourier_features/kernel_scale:0', - rff_layer.trainable_variables[0].name) - self.assertLen(rff_layer.non_trainable_variables, 3 - num_trainable_vars) - - @parameterized.named_parameters( - ('gaussian', 10, 'gaussian', 3.0, True), - ('laplacian', 5, 'laplacian', 5.5, False), - ('other', 10, tf.compat.v1.random_uniform_initializer(), None, True)) - def test_same_random_features_params_reused(self, output_dim, initializer, - scale, trainable): - """Applying the layer on the same input twice gives the same output.""" - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=output_dim, - kernel_initializer=initializer, - scale=scale, - trainable=trainable, - name='random_fourier_features') - inputs = tf.constant( - np.random.uniform(low=-1.0, high=1.0, size=(2, 4))) - output1 = rff_layer(inputs) - output2 = rff_layer(inputs) - self._assert_all_close(output1, output2) - - @parameterized.named_parameters( - ('gaussian', 'gaussian', 5.0), ('laplacian', 'laplacian', 3.0), - ('other', tf.compat.v1.random_uniform_initializer(), 5.0)) - def test_different_params_similar_approximation(self, initializer, scale): - tf.compat.v1.set_random_seed(12345) - rff_layer1 = kernel_layers.RandomFourierFeatures( - output_dim=3000, - kernel_initializer=initializer, - scale=scale, - name='rff1') - rff_layer2 = kernel_layers.RandomFourierFeatures( - output_dim=2000, - kernel_initializer=initializer, - scale=scale, - name='rff2') - # Two distinct inputs. - x = tf.constant([[1.0, -1.0, 0.5]]) - y = tf.constant([[-1.0, 1.0, 1.0]]) - - # Apply both layers to both inputs. - output_x1 = math.sqrt(2.0 / 3000.0) * rff_layer1(x) - output_y1 = math.sqrt(2.0 / 3000.0) * rff_layer1(y) - output_x2 = math.sqrt(2.0 / 2000.0) * rff_layer2(x) - output_y2 = math.sqrt(2.0 / 2000.0) * rff_layer2(y) - - # Compute the inner products of the outputs (on inputs x and y) for both - # layers. For any fixed random features layer rff_layer, and inputs x, y, - # rff_layer(x)^T * rff_layer(y) ~= K(x,y) up to a normalization factor. - approx_kernel1 = kernelized_utils.inner_product(output_x1, output_y1) - approx_kernel2 = kernelized_utils.inner_product(output_x2, output_y2) - self._assert_all_close(approx_kernel1, approx_kernel2, atol=0.08) - - @parameterized.named_parameters( - ('gaussian', 'gaussian', 5.0, _exact_gaussian(stddev=5.0)), - ('laplacian', 'laplacian', 20.0, _exact_laplacian(stddev=20.0))) - def test_bad_kernel_approximation(self, initializer, scale, exact_kernel_fn): - """Approximation is bad when output dimension is small.""" - # Two distinct inputs. - x = tf.constant([[1.0, -1.0, 0.5]]) - y = tf.constant([[-1.0, 1.0, 1.0]]) - - small_output_dim = 10 - tf.compat.v1.set_random_seed(1234) - # Initialize layer. - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=small_output_dim, - kernel_initializer=initializer, - scale=scale, - name='random_fourier_features') - - # Apply layer to both inputs. - output_x = math.sqrt(2.0 / small_output_dim) * rff_layer(x) - output_y = math.sqrt(2.0 / small_output_dim) * rff_layer(y) - - # The inner products of the outputs (on inputs x and y) approximates the - # real value of the RBF kernel but poorly since the output dimension of the - # layer is small. - exact_kernel_value = exact_kernel_fn(x, y) - approx_kernel_value = kernelized_utils.inner_product(output_x, output_y) - abs_error = tf.abs(exact_kernel_value - approx_kernel_value) - if not tf.executing_eagerly(): - with self.cached_session() as sess: - keras_backend._initialize_variables(sess) - abs_error_eval = sess.run([abs_error]) - self.assertGreater(abs_error_eval[0][0], 0.01) - self.assertLess(abs_error_eval[0][0], 0.5) - else: - self.assertGreater(abs_error, 0.01) - self.assertLess(abs_error, 0.5) - - @parameterized.named_parameters( - ('gaussian', 'gaussian', 5.0, _exact_gaussian(stddev=5.0)), - ('laplacian', 'laplacian', 10.0, _exact_laplacian(stddev=10.0))) - def test_good_kernel_approximation_multiple_inputs(self, initializer, scale, - exact_kernel_fn): - # Parameters. - input_dim = 5 - output_dim = 2000 - x_rows = 20 - y_rows = 30 - - x = tf.constant( - np.random.uniform(size=(x_rows, input_dim)), dtype=tf.float32) - y = tf.constant( - np.random.uniform(size=(y_rows, input_dim)), dtype=tf.float32) - - tf.compat.v1.set_random_seed(1234) - rff_layer = kernel_layers.RandomFourierFeatures( - output_dim=output_dim, - kernel_initializer=initializer, - scale=scale, - name='random_fourier_features') - - # The shapes of output_x and output_y are (x_rows, output_dim) and - # (y_rows, output_dim) respectively. - output_x = math.sqrt(2.0 / output_dim) * rff_layer(x) - output_y = math.sqrt(2.0 / output_dim) * rff_layer(y) - - approx_kernel_matrix = kernelized_utils.inner_product(output_x, output_y) - exact_kernel_matrix = exact_kernel_fn(x, y) - self._assert_all_close(approx_kernel_matrix, exact_kernel_matrix, atol=0.05) - - -if __name__ == '__main__': - tf.test.main() + def test_good_kernel_approximation_multiple_inputs( + self, initializer, scale, exact_kernel_fn + ): + # Parameters. + input_dim = 5 + output_dim = 2000 + x_rows = 20 + y_rows = 30 + + x = tf.constant( + np.random.uniform(size=(x_rows, input_dim)), dtype=tf.float32 + ) + y = tf.constant( + np.random.uniform(size=(y_rows, input_dim)), dtype=tf.float32 + ) + + tf.compat.v1.set_random_seed(1234) + rff_layer = kernel_layers.RandomFourierFeatures( + output_dim=output_dim, + kernel_initializer=initializer, + scale=scale, + name="random_fourier_features", + ) + + # The shapes of output_x and output_y are (x_rows, output_dim) and + # (y_rows, output_dim) respectively. + output_x = math.sqrt(2.0 / output_dim) * rff_layer(x) + output_y = math.sqrt(2.0 / output_dim) * rff_layer(y) + + approx_kernel_matrix = kernelized_utils.inner_product( + output_x, output_y + ) + exact_kernel_matrix = exact_kernel_fn(x, y) + self._assert_all_close( + approx_kernel_matrix, exact_kernel_matrix, atol=0.05 + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/layers_test.py b/keras/layers/layers_test.py index b618925a0894..1072f5948994 100644 --- a/keras/layers/layers_test.py +++ b/keras/layers/layers_test.py @@ -12,24 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes + """Tests for layers.__init__.""" -from keras import layers import tensorflow.compat.v2 as tf +from keras import layers -class LayersTest(tf.test.TestCase): - def test_keras_private_symbol(self): - normalization_parent = layers.BatchNormalization.__module__.split('.')[-1] - if tf.__internal__.tf2.enabled(): - self.assertEqual('batch_normalization', normalization_parent) - self.assertTrue(layers.BatchNormalization._USE_V2_BEHAVIOR) - else: - self.assertEqual('batch_normalization_v1', normalization_parent) - self.assertFalse(layers.BatchNormalization._USE_V2_BEHAVIOR) +class LayersTest(tf.test.TestCase): + def test_keras_private_symbol(self): + normalization_parent = layers.BatchNormalization.__module__.split(".")[ + -1 + ] + if tf.__internal__.tf2.enabled(): + self.assertEqual("batch_normalization", normalization_parent) + self.assertTrue(layers.BatchNormalization._USE_V2_BEHAVIOR) + else: + self.assertEqual("batch_normalization_v1", normalization_parent) + self.assertFalse(layers.BatchNormalization._USE_V2_BEHAVIOR) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/locally_connected/BUILD b/keras/layers/locally_connected/BUILD index c93785b661ed..e6ee324c60eb 100644 --- a/keras/layers/locally_connected/BUILD +++ b/keras/layers/locally_connected/BUILD @@ -1,9 +1,11 @@ # Description: # Contains the Keras locally-connected layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], @@ -82,7 +84,7 @@ tf_py_test( "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", ], diff --git a/keras/layers/locally_connected/__init__.py b/keras/layers/locally_connected/__init__.py index 6d424d65c177..9dbd20b3522b 100644 --- a/keras/layers/locally_connected/__init__.py +++ b/keras/layers/locally_connected/__init__.py @@ -14,5 +14,9 @@ # ============================================================================== """Keras locally-connected layers.""" -from keras.layers.locally_connected.locally_connected1d import LocallyConnected1D -from keras.layers.locally_connected.locally_connected2d import LocallyConnected2D +from keras.layers.locally_connected.locally_connected1d import ( + LocallyConnected1D, +) +from keras.layers.locally_connected.locally_connected2d import ( + LocallyConnected2D, +) diff --git a/keras/layers/locally_connected/locally_connected1d.py b/keras/layers/locally_connected/locally_connected1d.py index ddc651e6eca6..32fe80fee560 100644 --- a/keras/layers/locally_connected/locally_connected1d.py +++ b/keras/layers/locally_connected/locally_connected1d.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + """Locally-connected layer for 1D input.""" from keras import activations @@ -26,308 +26,346 @@ from keras.utils import conv_utils from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.LocallyConnected1D') +@keras_export("keras.layers.LocallyConnected1D") class LocallyConnected1D(Layer): - """Locally-connected layer for 1D inputs. - - The `LocallyConnected1D` layer works similarly to - the `Conv1D` layer, except that weights are unshared, - that is, a different set of filters is applied at each different patch - of the input. - - Note: layer attributes cannot be modified after the layer has been called - once (except the `trainable` attribute). - - Example: - ```python - # apply a unshared weight convolution 1d of length 3 to a sequence with - # 10 timesteps, with 64 output filters - model = Sequential() - model.add(LocallyConnected1D(64, 3, input_shape=(10, 32))) - # now model.output_shape == (None, 8, 64) - # add a new conv1d on top - model.add(LocallyConnected1D(32, 3)) - # now model.output_shape == (None, 6, 32) - ``` - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of output filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, specifying the - stride length of the convolution. - padding: Currently only supports `"valid"` (case-insensitive). `"same"` - may be supported in the future. `"valid"` means no padding. - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, length, - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, length)`. It defaults to the `image_data_format` - value found in your Keras config file at `~/.keras/keras.json`. If you - never set it, then it will be "channels_last". - activation: Activation function to use. If you don't specify anything, no - activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation").. - kernel_constraint: Constraint function applied to the kernel matrix. - bias_constraint: Constraint function applied to the bias vector. - implementation: implementation mode, either `1`, `2`, or `3`. `1` loops - over input spatial locations to perform the forward pass. It is - memory-efficient but performs a lot of (small) ops. `2` stores layer - weights in a dense but sparsely-populated 2D matrix and implements the - forward pass as a single matrix-multiply. It uses a lot of RAM but - performs few (large) ops. `3` stores layer weights in a sparse tensor - and implements the forward pass as a single sparse matrix-multiply. - How to choose: - `1`: large, dense models, - `2`: small models, - `3`: large, sparse models, where "large" stands for large - input/output activations (i.e. many `filters`, `input_filters`, - large `input_size`, `output_size`), and "sparse" stands for few - connections between inputs and outputs, i.e. small ratio `filters * - input_filters * kernel_size / (input_size * strides)`, where inputs - to and outputs of the layer are assumed to have shapes `(input_size, - input_filters)`, `(output_size, filters)` respectively. It is - recommended to benchmark each in the setting of interest to pick the - most efficient one (in terms of speed and memory usage). Correct - choice of implementation can lead to dramatic speed improvements - (e.g. 50X), potentially at the expense of RAM. Also, only - `padding="valid"` is supported by `implementation=1`. - Input shape: - 3D tensor with shape: `(batch_size, steps, input_dim)` - Output shape: - 3D tensor with shape: `(batch_size, new_steps, filters)` `steps` value - might have changed due to padding or strides. - """ - - def __init__(self, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - implementation=1, - **kwargs): - super().__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple(kernel_size, 1, 'kernel_size') - self.strides = conv_utils.normalize_tuple( - strides, 1, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - if self.padding != 'valid' and implementation == 1: - raise ValueError('Invalid border mode for LocallyConnected1D ' - '(only "valid" is supported if implementation is 1): ' + - padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.implementation = implementation - self.input_spec = InputSpec(ndim=3) - - @property - def _use_input_spec_as_call_signature(self): - return False - - @tf_utils.shape_type_conversion - def build(self, input_shape): - if self.data_format == 'channels_first': - input_dim, input_length = input_shape[1], input_shape[2] - else: - input_dim, input_length = input_shape[2], input_shape[1] - - if input_dim is None: - raise ValueError( - 'Axis 2 of input should be fully-defined. ' - 'Found shape:', input_shape) - self.output_length = conv_utils.conv_output_length(input_length, - self.kernel_size[0], - self.padding, - self.strides[0]) - - if self.output_length <= 0: - raise ValueError( - f'One of the dimensions in the output is <= 0 ' - f'due to downsampling in {self.name}. Consider ' - f'increasing the input size. ' - f'Received input shape {input_shape} which would produce ' - f'output shape with a zero or negative value in a ' - f'dimension.') - - if self.implementation == 1: - self.kernel_shape = (self.output_length, self.kernel_size[0] * input_dim, - self.filters) - - self.kernel = self.add_weight( - shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - elif self.implementation == 2: - if self.data_format == 'channels_first': - self.kernel_shape = (input_dim, input_length, self.filters, - self.output_length) - else: - self.kernel_shape = (input_length, input_dim, self.output_length, - self.filters) - - self.kernel = self.add_weight( - shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - self.kernel_mask = locally_connected_utils.get_locallyconnected_mask( - input_shape=(input_length,), - kernel_shape=self.kernel_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - ) - - elif self.implementation == 3: - self.kernel_shape = (self.output_length * self.filters, - input_length * input_dim) - - self.kernel_idxs = sorted( - conv_utils.conv_kernel_idxs( - input_shape=(input_length,), - kernel_shape=self.kernel_size, - strides=self.strides, - padding=self.padding, - filters_in=input_dim, - filters_out=self.filters, - data_format=self.data_format)) - - self.kernel = self.add_weight( - shape=(len(self.kernel_idxs),), - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - else: - raise ValueError('Unrecognized implementation mode: %d.' % - self.implementation) - - if self.use_bias: - self.bias = self.add_weight( - shape=(self.output_length, self.filters), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - - if self.data_format == 'channels_first': - self.input_spec = InputSpec(ndim=3, axes={1: input_dim}) - else: - self.input_spec = InputSpec(ndim=3, axes={-1: input_dim}) - self.built = True - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - input_length = input_shape[2] - else: - input_length = input_shape[1] - - length = conv_utils.conv_output_length(input_length, self.kernel_size[0], - self.padding, self.strides[0]) - - if self.data_format == 'channels_first': - return (input_shape[0], self.filters, length) - elif self.data_format == 'channels_last': - return (input_shape[0], length, self.filters) - - def call(self, inputs): - if self.implementation == 1: - output = backend.local_conv( - inputs, self.kernel, self.kernel_size, self.strides, - (self.output_length,), self.data_format) - - elif self.implementation == 2: - output = locally_connected_utils.local_conv_matmul( - inputs, self.kernel, self.kernel_mask, - self.compute_output_shape(inputs.shape)) - - elif self.implementation == 3: - output = locally_connected_utils.local_conv_sparse_matmul( - inputs, self.kernel, self.kernel_idxs, self.kernel_shape, - self.compute_output_shape(inputs.shape)) - - else: - raise ValueError('Unrecognized implementation mode: %d.' % - self.implementation) - - if self.use_bias: - output = backend.bias_add(output, self.bias, data_format=self.data_format) - - output = self.activation(output) - return output - - def get_config(self): - config = { - 'filters': - self.filters, - 'kernel_size': - self.kernel_size, - 'strides': - self.strides, - 'padding': - self.padding, - 'data_format': - self.data_format, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'implementation': - self.implementation - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Locally-connected layer for 1D inputs. + + The `LocallyConnected1D` layer works similarly to + the `Conv1D` layer, except that weights are unshared, + that is, a different set of filters is applied at each different patch + of the input. + + Note: layer attributes cannot be modified after the layer has been called + once (except the `trainable` attribute). + + Example: + ```python + # apply a unshared weight convolution 1d of length 3 to a sequence with + # 10 timesteps, with 64 output filters + model = Sequential() + model.add(LocallyConnected1D(64, 3, input_shape=(10, 32))) + # now model.output_shape == (None, 8, 64) + # add a new conv1d on top + model.add(LocallyConnected1D(32, 3)) + # now model.output_shape == (None, 6, 32) + ``` + + Args: + filters: Integer, the dimensionality of the output space (i.e. the + number of output filters in the convolution). + kernel_size: An integer or tuple/list of a single integer, specifying + the length of the 1D convolution window. + strides: An integer or tuple/list of a single integer, specifying the + stride length of the convolution. + padding: Currently only supports `"valid"` (case-insensitive). `"same"` + may be supported in the future. `"valid"` means no padding. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch, length, + channels)` while `channels_first` corresponds to inputs with shape + `(batch, channels, length)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + activation: Activation function to use. If you don't specify anything, + no activation is applied (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation").. + kernel_constraint: Constraint function applied to the kernel matrix. + bias_constraint: Constraint function applied to the bias vector. + implementation: implementation mode, either `1`, `2`, or `3`. `1` loops + over input spatial locations to perform the forward pass. It is + memory-efficient but performs a lot of (small) ops. `2` stores layer + weights in a dense but sparsely-populated 2D matrix and implements the + forward pass as a single matrix-multiply. It uses a lot of RAM but + performs few (large) ops. `3` stores layer weights in a sparse tensor + and implements the forward pass as a single sparse matrix-multiply. + How to choose: + `1`: large, dense models, + `2`: small models, + `3`: large, sparse models, where "large" stands for large + input/output activations (i.e. many `filters`, `input_filters`, + large `input_size`, `output_size`), and "sparse" stands for few + connections between inputs and outputs, i.e. small ratio + `filters * input_filters * kernel_size / (input_size * strides)`, + where inputs to and outputs of the layer are assumed to have + shapes `(input_size, input_filters)`, `(output_size, filters)` + respectively. It is recommended to benchmark each in the setting + of interest to pick the most efficient one (in terms of speed and + memory usage). Correct choice of implementation can lead to + dramatic speed improvements (e.g. 50X), potentially at the expense + of RAM. Also, only `padding="valid"` is supported by + `implementation=1`. + Input shape: + 3D tensor with shape: `(batch_size, steps, input_dim)` + Output shape: + 3D tensor with shape: `(batch_size, new_steps, filters)` `steps` value + might have changed due to padding or strides. + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + implementation=1, + **kwargs, + ): + super().__init__(**kwargs) + self.filters = filters + self.kernel_size = conv_utils.normalize_tuple( + kernel_size, 1, "kernel_size" + ) + self.strides = conv_utils.normalize_tuple( + strides, 1, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + if self.padding != "valid" and implementation == 1: + raise ValueError( + "Invalid border mode for LocallyConnected1D " + '(only "valid" is supported if implementation is 1): ' + padding + ) + self.data_format = conv_utils.normalize_data_format(data_format) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.implementation = implementation + self.input_spec = InputSpec(ndim=3) + + @property + def _use_input_spec_as_call_signature(self): + return False + + @tf_utils.shape_type_conversion + def build(self, input_shape): + if self.data_format == "channels_first": + input_dim, input_length = input_shape[1], input_shape[2] + else: + input_dim, input_length = input_shape[2], input_shape[1] + + if input_dim is None: + raise ValueError( + "Axis 2 of input should be fully-defined. Found shape:", + input_shape, + ) + self.output_length = conv_utils.conv_output_length( + input_length, self.kernel_size[0], self.padding, self.strides[0] + ) + + if self.output_length <= 0: + raise ValueError( + "One of the dimensions in the output is <= 0 " + f"due to downsampling in {self.name}. Consider " + "increasing the input size. " + f"Received input shape {input_shape} which would produce " + "output shape with a zero or negative value in a " + "dimension." + ) + + if self.implementation == 1: + self.kernel_shape = ( + self.output_length, + self.kernel_size[0] * input_dim, + self.filters, + ) + + self.kernel = self.add_weight( + shape=self.kernel_shape, + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + elif self.implementation == 2: + if self.data_format == "channels_first": + self.kernel_shape = ( + input_dim, + input_length, + self.filters, + self.output_length, + ) + else: + self.kernel_shape = ( + input_length, + input_dim, + self.output_length, + self.filters, + ) + + self.kernel = self.add_weight( + shape=self.kernel_shape, + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + self.kernel_mask = ( + locally_connected_utils.get_locallyconnected_mask( + input_shape=(input_length,), + kernel_shape=self.kernel_size, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + ) + ) + + elif self.implementation == 3: + self.kernel_shape = ( + self.output_length * self.filters, + input_length * input_dim, + ) + + self.kernel_idxs = sorted( + conv_utils.conv_kernel_idxs( + input_shape=(input_length,), + kernel_shape=self.kernel_size, + strides=self.strides, + padding=self.padding, + filters_in=input_dim, + filters_out=self.filters, + data_format=self.data_format, + ) + ) + + self.kernel = self.add_weight( + shape=(len(self.kernel_idxs),), + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + else: + raise ValueError( + "Unrecognized implementation mode: %d." % self.implementation + ) + + if self.use_bias: + self.bias = self.add_weight( + shape=(self.output_length, self.filters), + initializer=self.bias_initializer, + name="bias", + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + + if self.data_format == "channels_first": + self.input_spec = InputSpec(ndim=3, axes={1: input_dim}) + else: + self.input_spec = InputSpec(ndim=3, axes={-1: input_dim}) + self.built = True + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if self.data_format == "channels_first": + input_length = input_shape[2] + else: + input_length = input_shape[1] + + length = conv_utils.conv_output_length( + input_length, self.kernel_size[0], self.padding, self.strides[0] + ) + + if self.data_format == "channels_first": + return (input_shape[0], self.filters, length) + elif self.data_format == "channels_last": + return (input_shape[0], length, self.filters) + + def call(self, inputs): + if self.implementation == 1: + output = backend.local_conv( + inputs, + self.kernel, + self.kernel_size, + self.strides, + (self.output_length,), + self.data_format, + ) + + elif self.implementation == 2: + output = locally_connected_utils.local_conv_matmul( + inputs, + self.kernel, + self.kernel_mask, + self.compute_output_shape(inputs.shape), + ) + + elif self.implementation == 3: + output = locally_connected_utils.local_conv_sparse_matmul( + inputs, + self.kernel, + self.kernel_idxs, + self.kernel_shape, + self.compute_output_shape(inputs.shape), + ) + + else: + raise ValueError( + "Unrecognized implementation mode: %d." % self.implementation + ) + + if self.use_bias: + output = backend.bias_add( + output, self.bias, data_format=self.data_format + ) + + output = self.activation(output) + return output + + def get_config(self): + config = { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + "implementation": self.implementation, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/locally_connected/locally_connected2d.py b/keras/layers/locally_connected/locally_connected2d.py index b67aba34795e..fce8c32e2ce4 100644 --- a/keras/layers/locally_connected/locally_connected2d.py +++ b/keras/layers/locally_connected/locally_connected2d.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + """Locally-connected layer for 2D input.""" from keras import activations @@ -26,330 +26,375 @@ from keras.utils import conv_utils from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.LocallyConnected2D') +@keras_export("keras.layers.LocallyConnected2D") class LocallyConnected2D(Layer): - """Locally-connected layer for 2D inputs. - - The `LocallyConnected2D` layer works similarly - to the `Conv2D` layer, except that weights are unshared, - that is, a different set of filters is applied at each - different patch of the input. - - Note: layer attributes cannot be modified after the layer has been called - once (except the `trainable` attribute). - - Examples: - ```python - # apply a 3x3 unshared weights convolution with 64 output filters on a - 32x32 image - # with `data_format="channels_last"`: - model = Sequential() - model.add(LocallyConnected2D(64, (3, 3), input_shape=(32, 32, 3))) - # now model.output_shape == (None, 30, 30, 64) - # notice that this layer will consume (30*30)*(3*3*3*64) + (30*30)*64 - parameters - - # add a 3x3 unshared weights convolution on top, with 32 output filters: - model.add(LocallyConnected2D(32, (3, 3))) - # now model.output_shape == (None, 28, 28, 32) - ``` - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the width - and height of the 2D convolution window. Can be a single integer to - specify the same value for all spatial dimensions. - strides: An integer or tuple/list of 2 integers, specifying the strides of - the convolution along the width and height. Can be a single integer to - specify the same value for all spatial dimensions. - padding: Currently only support `"valid"` (case-insensitive). `"same"` - will be supported in future. `"valid"` means no padding. - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, height, width, - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, height, width)`. It defaults to the - `image_data_format` value found in your Keras config file at - `~/.keras/keras.json`. If you never set it, then it will be - "channels_last". - activation: Activation function to use. If you don't specify anything, no - activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). - kernel_constraint: Constraint function applied to the kernel matrix. - bias_constraint: Constraint function applied to the bias vector. - implementation: implementation mode, either `1`, `2`, or `3`. `1` loops - over input spatial locations to perform the forward pass. It is - memory-efficient but performs a lot of (small) ops. `2` stores layer - weights in a dense but sparsely-populated 2D matrix and implements the - forward pass as a single matrix-multiply. It uses a lot of RAM but - performs few (large) ops. `3` stores layer weights in a sparse tensor - and implements the forward pass as a single sparse matrix-multiply. - How to choose: - `1`: large, dense models, - `2`: small models, - `3`: large, sparse models, where "large" stands for large - input/output activations (i.e. many `filters`, `input_filters`, - large `np.prod(input_size)`, `np.prod(output_size)`), and "sparse" - stands for few connections between inputs and outputs, i.e. small - ratio `filters * input_filters * np.prod(kernel_size) / - (np.prod(input_size) * np.prod(strides))`, where inputs to and - outputs of the layer are assumed to have shapes `input_size + - (input_filters,)`, `output_size + (filters,)` respectively. It is - recommended to benchmark each in the setting of interest to pick the - most efficient one (in terms of speed and memory usage). Correct - choice of implementation can lead to dramatic speed improvements - (e.g. 50X), potentially at the expense of RAM. Also, only - `padding="valid"` is supported by `implementation=1`. - Input shape: - 4D tensor with shape: `(samples, channels, rows, cols)` if - data_format='channels_first' - or 4D tensor with shape: `(samples, rows, cols, channels)` if - data_format='channels_last'. - Output shape: - 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if - data_format='channels_first' - or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if - data_format='channels_last'. `rows` and `cols` values might have changed - due to padding. - """ - - def __init__(self, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - implementation=1, - **kwargs): - super().__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') - self.strides = conv_utils.normalize_tuple( - strides, 2, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - if self.padding != 'valid' and implementation == 1: - raise ValueError('Invalid border mode for LocallyConnected2D ' - '(only "valid" is supported if implementation is 1): ' + - padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.implementation = implementation - self.input_spec = InputSpec(ndim=4) - - @property - def _use_input_spec_as_call_signature(self): - return False - - @tf_utils.shape_type_conversion - def build(self, input_shape): - if self.data_format == 'channels_last': - input_row, input_col = input_shape[1:-1] - input_filter = input_shape[3] - else: - input_row, input_col = input_shape[2:] - input_filter = input_shape[1] - if input_row is None or input_col is None: - raise ValueError('The spatial dimensions of the inputs to ' - ' a LocallyConnected2D layer ' - 'should be fully-defined, but layer received ' - 'the inputs shape ' + str(input_shape)) - output_row = conv_utils.conv_output_length(input_row, self.kernel_size[0], - self.padding, self.strides[0]) - output_col = conv_utils.conv_output_length(input_col, self.kernel_size[1], - self.padding, self.strides[1]) - self.output_row = output_row - self.output_col = output_col - - if self.output_row <= 0 or self.output_col <= 0: - raise ValueError( - f'One of the dimensions in the output is <= 0 ' - f'due to downsampling in {self.name}. Consider ' - f'increasing the input size. ' - f'Received input shape {input_shape} which would produce ' - f'output shape with a zero or negative value in a ' - f'dimension.') - - if self.implementation == 1: - self.kernel_shape = (output_row * output_col, self.kernel_size[0] * - self.kernel_size[1] * input_filter, self.filters) - - self.kernel = self.add_weight( - shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - elif self.implementation == 2: - if self.data_format == 'channels_first': - self.kernel_shape = (input_filter, input_row, input_col, self.filters, - self.output_row, self.output_col) - else: - self.kernel_shape = (input_row, input_col, input_filter, - self.output_row, self.output_col, self.filters) - - self.kernel = self.add_weight( - shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - self.kernel_mask = locally_connected_utils.get_locallyconnected_mask( - input_shape=(input_row, input_col), - kernel_shape=self.kernel_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - ) - - elif self.implementation == 3: - self.kernel_shape = (self.output_row * self.output_col * self.filters, - input_row * input_col * input_filter) - - self.kernel_idxs = sorted( - conv_utils.conv_kernel_idxs( - input_shape=(input_row, input_col), - kernel_shape=self.kernel_size, - strides=self.strides, - padding=self.padding, - filters_in=input_filter, - filters_out=self.filters, - data_format=self.data_format)) - - self.kernel = self.add_weight( - shape=(len(self.kernel_idxs),), - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - else: - raise ValueError('Unrecognized implementation mode: %d.' % - self.implementation) - - if self.use_bias: - self.bias = self.add_weight( - shape=(output_row, output_col, self.filters), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - if self.data_format == 'channels_first': - self.input_spec = InputSpec(ndim=4, axes={1: input_filter}) - else: - self.input_spec = InputSpec(ndim=4, axes={-1: input_filter}) - self.built = True - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - - rows = conv_utils.conv_output_length(rows, self.kernel_size[0], - self.padding, self.strides[0]) - cols = conv_utils.conv_output_length(cols, self.kernel_size[1], - self.padding, self.strides[1]) - - if self.data_format == 'channels_first': - return (input_shape[0], self.filters, rows, cols) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, cols, self.filters) - - def call(self, inputs): - if self.implementation == 1: - output = backend.local_conv( - inputs, self.kernel, self.kernel_size, self.strides, - (self.output_row, self.output_col), - self.data_format) - - elif self.implementation == 2: - output = locally_connected_utils.local_conv_matmul( - inputs, self.kernel, self.kernel_mask, - self.compute_output_shape(inputs.shape)) - - elif self.implementation == 3: - output = locally_connected_utils.local_conv_sparse_matmul( - inputs, self.kernel, self.kernel_idxs, self.kernel_shape, - self.compute_output_shape(inputs.shape)) - - else: - raise ValueError('Unrecognized implementation mode: %d.' % - self.implementation) - - if self.use_bias: - output = backend.bias_add(output, self.bias, data_format=self.data_format) - - output = self.activation(output) - return output - - def get_config(self): - config = { - 'filters': - self.filters, - 'kernel_size': - self.kernel_size, - 'strides': - self.strides, - 'padding': - self.padding, - 'data_format': - self.data_format, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'implementation': - self.implementation - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Locally-connected layer for 2D inputs. + + The `LocallyConnected2D` layer works similarly + to the `Conv2D` layer, except that weights are unshared, + that is, a different set of filters is applied at each + different patch of the input. + + Note: layer attributes cannot be modified after the layer has been called + once (except the `trainable` attribute). + + Examples: + ```python + # apply a 3x3 unshared weights convolution with 64 output filters on a + 32x32 image + # with `data_format="channels_last"`: + model = Sequential() + model.add(LocallyConnected2D(64, (3, 3), input_shape=(32, 32, 3))) + # now model.output_shape == (None, 30, 30, 64) + # notice that this layer will consume (30*30)*(3*3*3*64) + (30*30)*64 + parameters + + # add a 3x3 unshared weights convolution on top, with 32 output filters: + model.add(LocallyConnected2D(32, (3, 3))) + # now model.output_shape == (None, 28, 28, 32) + ``` + + Args: + filters: Integer, the dimensionality of the output space (i.e. the + number of output filters in the convolution). + kernel_size: An integer or tuple/list of 2 integers, specifying the + width and height of the 2D convolution window. Can be a single integer + to specify the same value for all spatial dimensions. + strides: An integer or tuple/list of 2 integers, specifying the strides + of the convolution along the width and height. Can be a single integer + to specify the same value for all spatial dimensions. + padding: Currently only support `"valid"` (case-insensitive). `"same"` + will be supported in future. `"valid"` means no padding. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch, height, + width, channels)` while `channels_first` corresponds to inputs with + shape + `(batch, channels, height, width)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + activation: Activation function to use. If you don't specify anything, + no activation is applied (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). + kernel_constraint: Constraint function applied to the kernel matrix. + bias_constraint: Constraint function applied to the bias vector. + implementation: implementation mode, either `1`, `2`, or `3`. `1` loops + over input spatial locations to perform the forward pass. It is + memory-efficient but performs a lot of (small) ops. `2` stores layer + weights in a dense but sparsely-populated 2D matrix and implements the + forward pass as a single matrix-multiply. It uses a lot of RAM but + performs few (large) ops. `3` stores layer weights in a sparse tensor + and implements the forward pass as a single sparse matrix-multiply. + How to choose: + `1`: large, dense models, + `2`: small models, + `3`: large, sparse models, where "large" stands for large + input/output activations (i.e. many `filters`, `input_filters`, + large `np.prod(input_size)`, `np.prod(output_size)`), and "sparse" + stands for few connections between inputs and outputs, i.e. small + ratio `filters * input_filters * np.prod(kernel_size) / + (np.prod(input_size) * np.prod(strides))`, where inputs to and + outputs of the layer are assumed to have shapes `input_size + + (input_filters,)`, `output_size + (filters,)` respectively. It is + recommended to benchmark each in the setting of interest to pick + the most efficient one (in terms of speed and memory usage). + Correct choice of implementation can lead to dramatic speed + improvements (e.g. 50X), potentially at the expense of RAM. Also, + only `padding="valid"` is supported by `implementation=1`. + Input shape: + 4D tensor with shape: `(samples, channels, rows, cols)` if + data_format='channels_first' + or 4D tensor with shape: `(samples, rows, cols, channels)` if + data_format='channels_last'. + Output shape: + 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if + data_format='channels_first' + or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if + data_format='channels_last'. `rows` and `cols` values might have + changed due to padding. + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + implementation=1, + **kwargs, + ): + super().__init__(**kwargs) + self.filters = filters + self.kernel_size = conv_utils.normalize_tuple( + kernel_size, 2, "kernel_size" + ) + self.strides = conv_utils.normalize_tuple( + strides, 2, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + if self.padding != "valid" and implementation == 1: + raise ValueError( + "Invalid border mode for LocallyConnected2D " + '(only "valid" is supported if implementation is 1): ' + padding + ) + self.data_format = conv_utils.normalize_data_format(data_format) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.implementation = implementation + self.input_spec = InputSpec(ndim=4) + + @property + def _use_input_spec_as_call_signature(self): + return False + + @tf_utils.shape_type_conversion + def build(self, input_shape): + if self.data_format == "channels_last": + input_row, input_col = input_shape[1:-1] + input_filter = input_shape[3] + else: + input_row, input_col = input_shape[2:] + input_filter = input_shape[1] + if input_row is None or input_col is None: + raise ValueError( + "The spatial dimensions of the inputs to " + " a LocallyConnected2D layer " + "should be fully-defined, but layer received " + "the inputs shape " + str(input_shape) + ) + output_row = conv_utils.conv_output_length( + input_row, self.kernel_size[0], self.padding, self.strides[0] + ) + output_col = conv_utils.conv_output_length( + input_col, self.kernel_size[1], self.padding, self.strides[1] + ) + self.output_row = output_row + self.output_col = output_col + + if self.output_row <= 0 or self.output_col <= 0: + raise ValueError( + "One of the dimensions in the output is <= 0 " + f"due to downsampling in {self.name}. Consider " + "increasing the input size. " + f"Received input shape {input_shape} which would produce " + "output shape with a zero or negative value in a " + "dimension." + ) + + if self.implementation == 1: + self.kernel_shape = ( + output_row * output_col, + self.kernel_size[0] * self.kernel_size[1] * input_filter, + self.filters, + ) + + self.kernel = self.add_weight( + shape=self.kernel_shape, + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + elif self.implementation == 2: + if self.data_format == "channels_first": + self.kernel_shape = ( + input_filter, + input_row, + input_col, + self.filters, + self.output_row, + self.output_col, + ) + else: + self.kernel_shape = ( + input_row, + input_col, + input_filter, + self.output_row, + self.output_col, + self.filters, + ) + + self.kernel = self.add_weight( + shape=self.kernel_shape, + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + self.kernel_mask = ( + locally_connected_utils.get_locallyconnected_mask( + input_shape=(input_row, input_col), + kernel_shape=self.kernel_size, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + ) + ) + + elif self.implementation == 3: + self.kernel_shape = ( + self.output_row * self.output_col * self.filters, + input_row * input_col * input_filter, + ) + + self.kernel_idxs = sorted( + conv_utils.conv_kernel_idxs( + input_shape=(input_row, input_col), + kernel_shape=self.kernel_size, + strides=self.strides, + padding=self.padding, + filters_in=input_filter, + filters_out=self.filters, + data_format=self.data_format, + ) + ) + + self.kernel = self.add_weight( + shape=(len(self.kernel_idxs),), + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + else: + raise ValueError( + "Unrecognized implementation mode: %d." % self.implementation + ) + + if self.use_bias: + self.bias = self.add_weight( + shape=(output_row, output_col, self.filters), + initializer=self.bias_initializer, + name="bias", + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + if self.data_format == "channels_first": + self.input_spec = InputSpec(ndim=4, axes={1: input_filter}) + else: + self.input_spec = InputSpec(ndim=4, axes={-1: input_filter}) + self.built = True + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if self.data_format == "channels_first": + rows = input_shape[2] + cols = input_shape[3] + elif self.data_format == "channels_last": + rows = input_shape[1] + cols = input_shape[2] + + rows = conv_utils.conv_output_length( + rows, self.kernel_size[0], self.padding, self.strides[0] + ) + cols = conv_utils.conv_output_length( + cols, self.kernel_size[1], self.padding, self.strides[1] + ) + + if self.data_format == "channels_first": + return (input_shape[0], self.filters, rows, cols) + elif self.data_format == "channels_last": + return (input_shape[0], rows, cols, self.filters) + + def call(self, inputs): + if self.implementation == 1: + output = backend.local_conv( + inputs, + self.kernel, + self.kernel_size, + self.strides, + (self.output_row, self.output_col), + self.data_format, + ) + + elif self.implementation == 2: + output = locally_connected_utils.local_conv_matmul( + inputs, + self.kernel, + self.kernel_mask, + self.compute_output_shape(inputs.shape), + ) + + elif self.implementation == 3: + output = locally_connected_utils.local_conv_sparse_matmul( + inputs, + self.kernel, + self.kernel_idxs, + self.kernel_shape, + self.compute_output_shape(inputs.shape), + ) + + else: + raise ValueError( + "Unrecognized implementation mode: %d." % self.implementation + ) + + if self.use_bias: + output = backend.bias_add( + output, self.bias, data_format=self.data_format + ) + + output = self.activation(output) + return output + + def get_config(self): + config = { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + "implementation": self.implementation, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/locally_connected/locally_connected_test.py b/keras/layers/locally_connected/locally_connected_test.py index 9bc2bcdbd111..bb85dee7410b 100644 --- a/keras/layers/locally_connected/locally_connected_test.py +++ b/keras/layers/locally_connected/locally_connected_test.py @@ -17,708 +17,734 @@ import os +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.locally_connected import locally_connected_utils -from keras.optimizers.optimizer_v2 import rmsprop +from keras.optimizers.legacy import rmsprop from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_util -from tensorflow.python.training.rmsprop import RMSPropOptimizer - - -_DATA_FORMAT_PADDING_IMPLEMENTATION = [{ - 'data_format': 'channels_first', - 'padding': 'valid', - 'implementation': 1 -}, { - 'data_format': 'channels_first', - 'padding': 'same', - 'implementation': 1 -}, { - 'data_format': 'channels_last', - 'padding': 'valid', - 'implementation': 1 -}, { - 'data_format': 'channels_last', - 'padding': 'same', - 'implementation': 1 -}, { - 'data_format': 'channels_first', - 'padding': 'valid', - 'implementation': 2 -}, { - 'data_format': 'channels_first', - 'padding': 'same', - 'implementation': 2 -}, { - 'data_format': 'channels_last', - 'padding': 'valid', - 'implementation': 2 -}, { - 'data_format': 'channels_last', - 'padding': 'same', - 'implementation': 2 -}, { - 'data_format': 'channels_first', - 'padding': 'valid', - 'implementation': 3 -}, { - 'data_format': 'channels_first', - 'padding': 'same', - 'implementation': 3 -}, { - 'data_format': 'channels_last', - 'padding': 'valid', - 'implementation': 3 -}, { - 'data_format': 'channels_last', - 'padding': 'same', - 'implementation': 3 -}] - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_util, +) +from tensorflow.python.training.rmsprop import ( + RMSPropOptimizer, +) + +_DATA_FORMAT_PADDING_IMPLEMENTATION = [ + {"data_format": "channels_first", "padding": "valid", "implementation": 1}, + {"data_format": "channels_first", "padding": "same", "implementation": 1}, + {"data_format": "channels_last", "padding": "valid", "implementation": 1}, + {"data_format": "channels_last", "padding": "same", "implementation": 1}, + {"data_format": "channels_first", "padding": "valid", "implementation": 2}, + {"data_format": "channels_first", "padding": "same", "implementation": 2}, + {"data_format": "channels_last", "padding": "valid", "implementation": 2}, + {"data_format": "channels_last", "padding": "same", "implementation": 2}, + {"data_format": "channels_first", "padding": "valid", "implementation": 3}, + {"data_format": "channels_first", "padding": "same", "implementation": 3}, + {"data_format": "channels_last", "padding": "valid", "implementation": 3}, + {"data_format": "channels_last", "padding": "same", "implementation": 3}, +] + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LocallyConnected1DLayersTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) - def test_locallyconnected_1d(self, data_format, padding, implementation): - with self.cached_session(): - num_samples = 2 - num_steps = 8 - input_dim = 5 - filter_length = 3 - filters = 4 - - for strides in [1]: - if padding == 'same' and strides != 1: - continue + @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) + def test_locallyconnected_1d(self, data_format, padding, implementation): + with self.cached_session(): + num_samples = 2 + num_steps = 8 + input_dim = 5 + filter_length = 3 + filters = 4 + + for strides in [1]: + if padding == "same" and strides != 1: + continue + kwargs = { + "filters": filters, + "kernel_size": filter_length, + "padding": padding, + "strides": strides, + "data_format": data_format, + "implementation": implementation, + } + + if padding == "same" and implementation == 1: + self.assertRaises( + ValueError, keras.layers.LocallyConnected1D, **kwargs + ) + else: + test_utils.layer_test( + keras.layers.LocallyConnected1D, + kwargs=kwargs, + input_shape=(num_samples, num_steps, input_dim), + ) + + @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) + def test_locallyconnected_1d_regularization( + self, data_format, padding, implementation + ): + num_samples = 2 + num_steps = 8 + input_dim = 5 + filter_length = 3 + filters = 4 kwargs = { - 'filters': filters, - 'kernel_size': filter_length, - 'padding': padding, - 'strides': strides, - 'data_format': data_format, - 'implementation': implementation + "filters": filters, + "kernel_size": filter_length, + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "data_format": data_format, + "implementation": implementation, + "padding": padding, } - if padding == 'same' and implementation == 1: - self.assertRaises(ValueError, keras.layers.LocallyConnected1D, - **kwargs) + if padding == "same" and implementation == 1: + self.assertRaises( + ValueError, keras.layers.LocallyConnected1D, **kwargs + ) else: - test_utils.layer_test( - keras.layers.LocallyConnected1D, - kwargs=kwargs, - input_shape=(num_samples, num_steps, input_dim)) - - @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) - def test_locallyconnected_1d_regularization(self, data_format, padding, - implementation): - num_samples = 2 - num_steps = 8 - input_dim = 5 - filter_length = 3 - filters = 4 - kwargs = { - 'filters': filters, - 'kernel_size': filter_length, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'data_format': data_format, - 'implementation': implementation, - 'padding': padding - } - - if padding == 'same' and implementation == 1: - self.assertRaises(ValueError, keras.layers.LocallyConnected1D, **kwargs) - else: - with self.cached_session(): - layer = keras.layers.LocallyConnected1D(**kwargs) - layer.build((num_samples, num_steps, input_dim)) - self.assertLen(layer.losses, 2) - layer( - keras.backend.variable( - np.ones((num_samples, num_steps, input_dim)))) - self.assertLen(layer.losses, 3) - - k_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - kwargs = { - 'filters': filters, - 'kernel_size': filter_length, - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - } - with self.cached_session(): - layer = keras.layers.LocallyConnected1D(**kwargs) - layer.build((num_samples, num_steps, input_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) - - def test_locallyconnected1d_invalid_output_shapes(self): - kwargs = {'filters': 2, 'kernel_size': 10} - with self.assertRaisesRegex( - ValueError, r"""One of the dimensions in the output is <= 0 """): - layer = keras.layers.LocallyConnected1D(**kwargs) - layer.build((None, 5, 2)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + with self.cached_session(): + layer = keras.layers.LocallyConnected1D(**kwargs) + layer.build((num_samples, num_steps, input_dim)) + self.assertLen(layer.losses, 2) + layer( + keras.backend.variable( + np.ones((num_samples, num_steps, input_dim)) + ) + ) + self.assertLen(layer.losses, 3) + + k_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + kwargs = { + "filters": filters, + "kernel_size": filter_length, + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + } + with self.cached_session(): + layer = keras.layers.LocallyConnected1D(**kwargs) + layer.build((num_samples, num_steps, input_dim)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_locallyconnected1d_invalid_output_shapes(self): + kwargs = {"filters": 2, "kernel_size": 10} + with self.assertRaisesRegex( + ValueError, r"""One of the dimensions in the output is <= 0 """ + ): + layer = keras.layers.LocallyConnected1D(**kwargs) + layer.build((None, 5, 2)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LocallyConnected2DLayersTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) - def test_locallyconnected_2d(self, data_format, padding, implementation): - with self.cached_session(): - num_samples = 8 - filters = 3 - stack_size = 4 - num_row = 6 - num_col = 10 - - for strides in [(1, 1), (2, 2)]: - if padding == 'same' and strides != (1, 1): - continue - + @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) + def test_locallyconnected_2d(self, data_format, padding, implementation): + with self.cached_session(): + num_samples = 8 + filters = 3 + stack_size = 4 + num_row = 6 + num_col = 10 + + for strides in [(1, 1), (2, 2)]: + if padding == "same" and strides != (1, 1): + continue + + kwargs = { + "filters": filters, + "kernel_size": 3, + "padding": padding, + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "strides": strides, + "data_format": data_format, + "implementation": implementation, + } + + if padding == "same" and implementation == 1: + self.assertRaises( + ValueError, keras.layers.LocallyConnected2D, **kwargs + ) + else: + test_utils.layer_test( + keras.layers.LocallyConnected2D, + kwargs=kwargs, + input_shape=(num_samples, num_row, num_col, stack_size), + ) + + @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) + def test_locallyconnected_2d_channels_first( + self, data_format, padding, implementation + ): + with self.cached_session(): + num_samples = 8 + filters = 3 + stack_size = 4 + num_row = 6 + num_col = 10 + kwargs = { + "filters": filters, + "kernel_size": 3, + "data_format": data_format, + "implementation": implementation, + "padding": padding, + } + + if padding == "same" and implementation == 1: + self.assertRaises( + ValueError, keras.layers.LocallyConnected2D, **kwargs + ) + else: + test_utils.layer_test( + keras.layers.LocallyConnected2D, + kwargs=kwargs, + input_shape=(num_samples, num_row, num_col, stack_size), + ) + + @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) + def test_locallyconnected_2d_regularization( + self, data_format, padding, implementation + ): + num_samples = 2 + filters = 3 + stack_size = 4 + num_row = 6 + num_col = 7 kwargs = { - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'strides': strides, - 'data_format': data_format, - 'implementation': implementation + "filters": filters, + "kernel_size": 3, + "kernel_regularizer": "l2", + "bias_regularizer": "l2", + "activity_regularizer": "l2", + "implementation": implementation, + "padding": padding, + "data_format": data_format, } - if padding == 'same' and implementation == 1: - self.assertRaises(ValueError, keras.layers.LocallyConnected2D, - **kwargs) + if padding == "same" and implementation == 1: + self.assertRaises( + ValueError, keras.layers.LocallyConnected2D, **kwargs + ) else: - test_utils.layer_test( - keras.layers.LocallyConnected2D, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size)) - - @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) - def test_locallyconnected_2d_channels_first(self, data_format, padding, - implementation): - with self.cached_session(): - num_samples = 8 - filters = 3 - stack_size = 4 - num_row = 6 - num_col = 10 - kwargs = { - 'filters': filters, - 'kernel_size': 3, - 'data_format': data_format, - 'implementation': implementation, - 'padding': padding - } - - if padding == 'same' and implementation == 1: - self.assertRaises(ValueError, keras.layers.LocallyConnected2D, **kwargs) - else: - test_utils.layer_test( - keras.layers.LocallyConnected2D, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size)) - - @parameterized.parameters(_DATA_FORMAT_PADDING_IMPLEMENTATION) - def test_locallyconnected_2d_regularization(self, data_format, padding, - implementation): - num_samples = 2 - filters = 3 - stack_size = 4 - num_row = 6 - num_col = 7 - kwargs = { - 'filters': filters, - 'kernel_size': 3, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'implementation': implementation, - 'padding': padding, - 'data_format': data_format - } - - if padding == 'same' and implementation == 1: - self.assertRaises(ValueError, keras.layers.LocallyConnected2D, **kwargs) - else: - with self.cached_session(): - layer = keras.layers.LocallyConnected2D(**kwargs) - layer.build((num_samples, num_row, num_col, stack_size)) - self.assertLen(layer.losses, 2) - layer( - keras.backend.variable( - np.ones((num_samples, num_row, num_col, stack_size)))) - self.assertLen(layer.losses, 3) - - k_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - kwargs = { - 'filters': filters, - 'kernel_size': 3, - 'kernel_constraint': k_constraint, - 'bias_constraint': b_constraint, - } - with self.cached_session(): - layer = keras.layers.LocallyConnected2D(**kwargs) - layer.build((num_samples, num_row, num_col, stack_size)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) - - def test_locallyconnected2d_invalid_output_shapes(self): - kwargs = {'filters': 2, 'kernel_size': 10} - with self.assertRaisesRegex( - ValueError, r"""One of the dimensions in the output is <= 0 """): - layer = keras.layers.LocallyConnected2D(**kwargs) - layer.build((None, 5, 5, 2)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class LocallyConnectedImplementationModeTest(tf.test.TestCase, - parameterized.TestCase): - - @parameterized.parameters([ - {'width': 1, 'data_format': 'channels_first'}, - {'width': 1, 'data_format': 'channels_last'}, - {'width': 6, 'data_format': 'channels_first'}, - {'width': 6, 'data_format': 'channels_last'}, - ]) - def test_locallyconnected_implementation(self, width, data_format): - with self.cached_session(): - num_samples = 4 - num_classes = 3 - num_epochs = 2 - - np.random.seed(1) - tf_test_util.random_seed.set_seed(1) - # Following code generates sparse targets and converts them - # to one-hot encoded vectors - # Create sparse targets eg. [0,1,2] - sparse_targets = np.random.randint(0, num_classes, (num_samples,)) - - # Convert to one-hot encoding - # Final targets: - # [[ 1. 0. 0. ] - # [ 0. 1. 0. ] - # [ 0. 0. 1. ]] - - targets = np.zeros((sparse_targets.size, num_classes)) - targets[np.arange(sparse_targets.size), sparse_targets] = 1 - height = 7 - filters = 2 - inputs = get_inputs(data_format, filters, height, num_samples, width) - - kernel_x = (3,) - kernel_y = () if width == 1 else (2,) - stride_x = (1,) - stride_y = () if width == 1 else (3,) - layers = 2 - - kwargs = { - 'layers': layers, - 'filters': filters, - 'kernel_size': kernel_x + kernel_y, - 'strides': stride_x + stride_y, - 'data_format': data_format, - 'num_classes': num_classes - } - - model_1 = get_model(implementation=1, **kwargs) - model_2 = get_model(implementation=2, **kwargs) - model_3 = get_model(implementation=3, **kwargs) - - # Build models. - model_1.train_on_batch(inputs, targets) - model_2.train_on_batch(inputs, targets) - model_3.train_on_batch(inputs, targets) - - # Copy weights. - copy_model_weights(model_from=model_2, model_to=model_1) - copy_model_weights(model_from=model_2, model_to=model_3) - - # Compare outputs at initialization. - out_1 = model_1(inputs) - out_2 = model_2(inputs) - out_3 = model_3(inputs) - - self.assertAllCloseAccordingToType( - out_2, out_1, rtol=1e-5, atol=1e-5) - self.assertAllCloseAccordingToType( - out_2, out_3, rtol=1e-5, atol=1e-5) - self.assertAllCloseAccordingToType( - out_1, out_3, rtol=1e-5, atol=1e-5) - - # Train. - model_1.fit( - x=inputs, - y=targets, - epochs=num_epochs, - batch_size=num_samples, - shuffle=False) - model_2.fit( - x=inputs, - y=targets, - epochs=num_epochs, - batch_size=num_samples, - shuffle=False) - model_3.fit( - x=inputs, - y=targets, - epochs=num_epochs, - batch_size=num_samples, - shuffle=False) - - # Compare outputs after a few training steps. - out_1 = model_1(inputs) - out_2 = model_2(inputs) - out_3 = model_3(inputs) - - self.assertAllCloseAccordingToType( - out_2, out_1, atol=2e-4) - self.assertAllCloseAccordingToType( - out_2, out_3, atol=2e-4) - self.assertAllCloseAccordingToType( - out_1, out_3, atol=2e-4) - - @parameterized.parameters([ - { - 'width': 1, - 'data_format': 'channels_first' - }, - { - 'width': 1, - 'data_format': 'channels_last' - }, - { - 'width': 6, - 'data_format': 'channels_first' - }, - { - 'width': 6, - 'data_format': 'channels_last' - }, - ]) - def test_locallyconnected_save(self, width, data_format): - with self.cached_session(): - num_samples = 4 - num_classes = 3 - num_epochs = 2 - - np.random.seed(1) - tf_test_util.random_seed.set_seed(1) - # Following code generates sparse targets and converts them - # to one-hot encoded vectors - # Create sparse targets eg. [0,1,2] - sparse_targets = np.random.randint(0, num_classes, (num_samples,)) - - # Convert to one-hot encoding - # Final targets: - # [[ 1. 0. 0. ] - # [ 0. 1. 0. ] - # [ 0. 0. 1. ]] - - targets = np.zeros((sparse_targets.size, num_classes)) - targets[np.arange(sparse_targets.size), sparse_targets] = 1 - - height = 7 - filters = 2 - inputs = get_inputs(data_format, filters, height, num_samples, width) - - kernel_x = (3,) - kernel_y = () if width == 1 else (2,) - stride_x = (1,) - stride_y = () if width == 1 else (3,) - layers = 2 - - kwargs = { - 'layers': layers, - 'filters': filters, - 'kernel_size': kernel_x + kernel_y, - 'strides': stride_x + stride_y, - 'data_format': data_format, - 'num_classes': num_classes - } - - model_1 = get_model_saveable(implementation=1, **kwargs) - model_2 = get_model_saveable(implementation=2, **kwargs) - model_3 = get_model_saveable(implementation=3, **kwargs) - - # Train. - model_1.fit( - x=inputs, - y=targets, - epochs=num_epochs, - batch_size=num_samples, - shuffle=False) - model_2.fit( - x=inputs, - y=targets, - epochs=num_epochs, - batch_size=num_samples, - shuffle=False) - model_3.fit( - x=inputs, - y=targets, - epochs=num_epochs, - batch_size=num_samples, - shuffle=False) - - out_1_before = model_1(inputs) - out_2_before = model_2(inputs) - out_3_before = model_3(inputs) - - path_1 = os.path.join(self.get_temp_dir(), 'model_1_path') - model_1.save(path_1) - model_1 = keras.models.load_model(path_1, custom_objects={'xent': xent}) - path_2 = os.path.join(self.get_temp_dir(), 'model_2_path') - model_2.save(path_2) - model_2 = keras.models.load_model(path_2, custom_objects={'xent': xent}) - path_3 = os.path.join(self.get_temp_dir(), 'model_3_path') - model_3.save(path_3) - model_3 = keras.models.load_model(path_3, custom_objects={'xent': xent}) - - out_1_after = model_1(inputs) - out_2_after = model_2(inputs) - out_3_after = model_3(inputs) - - self.assertAllCloseAccordingToType(out_1_before, out_1_after, atol=2e-4) - self.assertAllCloseAccordingToType(out_2_before, out_2_after, atol=2e-4) - self.assertAllCloseAccordingToType(out_3_before, out_3_after, atol=2e-4) - - def test_make_2d(self): - input_shapes = [ - (0,), - (0, 0), - (1,), - (2,), - (3,), - (1, 0), - (0, 3), - (1, 1), - (1, 2), - (3, 1), - (2, 2), - (3, 3), - (1, 0, 1), - (5, 2, 3), - (3, 5, 6, 7, 0), - (3, 2, 2, 4, 4), - (1, 2, 3, 4, 7, 2), - ] - np.random.seed(1) - - for input_shape in input_shapes: - inputs = np.random.normal(0, 1, input_shape) - inputs_tf = keras.backend.variable(inputs) - - split_dim = np.random.randint(0, inputs.ndim + 1) - shape_2d = (int(np.prod(inputs.shape[:split_dim])), - int(np.prod(inputs.shape[split_dim:]))) - inputs_2d = np.reshape(inputs, shape_2d) - - inputs_2d_tf = locally_connected_utils.make_2d(inputs_tf, split_dim) - inputs_2d_tf = keras.backend.get_value(inputs_2d_tf) - - self.assertAllCloseAccordingToType(inputs_2d, inputs_2d_tf) + with self.cached_session(): + layer = keras.layers.LocallyConnected2D(**kwargs) + layer.build((num_samples, num_row, num_col, stack_size)) + self.assertLen(layer.losses, 2) + layer( + keras.backend.variable( + np.ones((num_samples, num_row, num_col, stack_size)) + ) + ) + self.assertLen(layer.losses, 3) + + k_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + kwargs = { + "filters": filters, + "kernel_size": 3, + "kernel_constraint": k_constraint, + "bias_constraint": b_constraint, + } + with self.cached_session(): + layer = keras.layers.LocallyConnected2D(**kwargs) + layer.build((num_samples, num_row, num_col, stack_size)) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) + + def test_locallyconnected2d_invalid_output_shapes(self): + kwargs = {"filters": 2, "kernel_size": 10} + with self.assertRaisesRegex( + ValueError, r"""One of the dimensions in the output is <= 0 """ + ): + layer = keras.layers.LocallyConnected2D(**kwargs) + layer.build((None, 5, 5, 2)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class LocallyConnectedImplementationModeTest( + tf.test.TestCase, parameterized.TestCase +): + @parameterized.parameters( + [ + {"width": 1, "data_format": "channels_first"}, + {"width": 1, "data_format": "channels_last"}, + {"width": 6, "data_format": "channels_first"}, + {"width": 6, "data_format": "channels_last"}, + ] + ) + def test_locallyconnected_implementation(self, width, data_format): + with self.cached_session(): + num_samples = 4 + num_classes = 3 + num_epochs = 2 + + np.random.seed(1) + tf_test_util.random_seed.set_seed(1) + # Following code generates sparse targets and converts them + # to one-hot encoded vectors + # Create sparse targets eg. [0,1,2] + sparse_targets = np.random.randint(0, num_classes, (num_samples,)) + + # Convert to one-hot encoding + # Final targets: + # [[ 1. 0. 0. ] + # [ 0. 1. 0. ] + # [ 0. 0. 1. ]] + + targets = np.zeros((sparse_targets.size, num_classes)) + targets[np.arange(sparse_targets.size), sparse_targets] = 1 + height = 7 + filters = 2 + inputs = get_inputs( + data_format, filters, height, num_samples, width + ) + + kernel_x = (3,) + kernel_y = () if width == 1 else (2,) + stride_x = (1,) + stride_y = () if width == 1 else (3,) + layers = 2 + + kwargs = { + "layers": layers, + "filters": filters, + "kernel_size": kernel_x + kernel_y, + "strides": stride_x + stride_y, + "data_format": data_format, + "num_classes": num_classes, + } + + model_1 = get_model(implementation=1, **kwargs) + model_2 = get_model(implementation=2, **kwargs) + model_3 = get_model(implementation=3, **kwargs) + + # Build models. + model_1.train_on_batch(inputs, targets) + model_2.train_on_batch(inputs, targets) + model_3.train_on_batch(inputs, targets) + + # Copy weights. + copy_model_weights(model_from=model_2, model_to=model_1) + copy_model_weights(model_from=model_2, model_to=model_3) + + # Compare outputs at initialization. + out_1 = model_1(inputs) + out_2 = model_2(inputs) + out_3 = model_3(inputs) + + self.assertAllCloseAccordingToType( + out_2, out_1, rtol=1e-5, atol=1e-5 + ) + self.assertAllCloseAccordingToType( + out_2, out_3, rtol=1e-5, atol=1e-5 + ) + self.assertAllCloseAccordingToType( + out_1, out_3, rtol=1e-5, atol=1e-5 + ) + + # Train. + model_1.fit( + x=inputs, + y=targets, + epochs=num_epochs, + batch_size=num_samples, + shuffle=False, + ) + model_2.fit( + x=inputs, + y=targets, + epochs=num_epochs, + batch_size=num_samples, + shuffle=False, + ) + model_3.fit( + x=inputs, + y=targets, + epochs=num_epochs, + batch_size=num_samples, + shuffle=False, + ) + + # Compare outputs after a few training steps. + out_1 = model_1(inputs) + out_2 = model_2(inputs) + out_3 = model_3(inputs) + + self.assertAllCloseAccordingToType(out_2, out_1, atol=2e-4) + self.assertAllCloseAccordingToType(out_2, out_3, atol=2e-4) + self.assertAllCloseAccordingToType(out_1, out_3, atol=2e-4) + + @parameterized.parameters( + [ + {"width": 1, "data_format": "channels_first"}, + {"width": 1, "data_format": "channels_last"}, + {"width": 6, "data_format": "channels_first"}, + {"width": 6, "data_format": "channels_last"}, + ] + ) + def test_locallyconnected_save(self, width, data_format): + with self.cached_session(): + num_samples = 4 + num_classes = 3 + num_epochs = 2 + + np.random.seed(1) + tf_test_util.random_seed.set_seed(1) + # Following code generates sparse targets and converts them + # to one-hot encoded vectors + # Create sparse targets eg. [0,1,2] + sparse_targets = np.random.randint(0, num_classes, (num_samples,)) + + # Convert to one-hot encoding + # Final targets: + # [[ 1. 0. 0. ] + # [ 0. 1. 0. ] + # [ 0. 0. 1. ]] + + targets = np.zeros((sparse_targets.size, num_classes)) + targets[np.arange(sparse_targets.size), sparse_targets] = 1 + + height = 7 + filters = 2 + inputs = get_inputs( + data_format, filters, height, num_samples, width + ) + + kernel_x = (3,) + kernel_y = () if width == 1 else (2,) + stride_x = (1,) + stride_y = () if width == 1 else (3,) + layers = 2 + + kwargs = { + "layers": layers, + "filters": filters, + "kernel_size": kernel_x + kernel_y, + "strides": stride_x + stride_y, + "data_format": data_format, + "num_classes": num_classes, + } + + model_1 = get_model_saveable(implementation=1, **kwargs) + model_2 = get_model_saveable(implementation=2, **kwargs) + model_3 = get_model_saveable(implementation=3, **kwargs) + + # Train. + model_1.fit( + x=inputs, + y=targets, + epochs=num_epochs, + batch_size=num_samples, + shuffle=False, + ) + model_2.fit( + x=inputs, + y=targets, + epochs=num_epochs, + batch_size=num_samples, + shuffle=False, + ) + model_3.fit( + x=inputs, + y=targets, + epochs=num_epochs, + batch_size=num_samples, + shuffle=False, + ) + + out_1_before = model_1(inputs) + out_2_before = model_2(inputs) + out_3_before = model_3(inputs) + + path_1 = os.path.join(self.get_temp_dir(), "model_1_path") + model_1.save(path_1) + model_1 = keras.models.load_model( + path_1, custom_objects={"xent": xent} + ) + path_2 = os.path.join(self.get_temp_dir(), "model_2_path") + model_2.save(path_2) + model_2 = keras.models.load_model( + path_2, custom_objects={"xent": xent} + ) + path_3 = os.path.join(self.get_temp_dir(), "model_3_path") + model_3.save(path_3) + model_3 = keras.models.load_model( + path_3, custom_objects={"xent": xent} + ) + + out_1_after = model_1(inputs) + out_2_after = model_2(inputs) + out_3_after = model_3(inputs) + + self.assertAllCloseAccordingToType( + out_1_before, out_1_after, atol=2e-4 + ) + self.assertAllCloseAccordingToType( + out_2_before, out_2_after, atol=2e-4 + ) + self.assertAllCloseAccordingToType( + out_3_before, out_3_after, atol=2e-4 + ) + + def test_make_2d(self): + input_shapes = [ + (0,), + (0, 0), + (1,), + (2,), + (3,), + (1, 0), + (0, 3), + (1, 1), + (1, 2), + (3, 1), + (2, 2), + (3, 3), + (1, 0, 1), + (5, 2, 3), + (3, 5, 6, 7, 0), + (3, 2, 2, 4, 4), + (1, 2, 3, 4, 7, 2), + ] + np.random.seed(1) + + for input_shape in input_shapes: + inputs = np.random.normal(0, 1, input_shape) + inputs_tf = keras.backend.variable(inputs) + + split_dim = np.random.randint(0, inputs.ndim + 1) + shape_2d = ( + int(np.prod(inputs.shape[:split_dim])), + int(np.prod(inputs.shape[split_dim:])), + ) + inputs_2d = np.reshape(inputs, shape_2d) + + inputs_2d_tf = locally_connected_utils.make_2d(inputs_tf, split_dim) + inputs_2d_tf = keras.backend.get_value(inputs_2d_tf) + + self.assertAllCloseAccordingToType(inputs_2d, inputs_2d_tf) def get_inputs(data_format, filters, height, num_samples, width): - if data_format == 'channels_first': - if width == 1: - input_shape = (filters, height) - else: - input_shape = (filters, height, width) + if data_format == "channels_first": + if width == 1: + input_shape = (filters, height) + else: + input_shape = (filters, height, width) - elif data_format == 'channels_last': - if width == 1: - input_shape = (height, filters) - else: - input_shape = (height, width, filters) + elif data_format == "channels_last": + if width == 1: + input_shape = (height, filters) + else: + input_shape = (height, width, filters) - else: - raise NotImplementedError(data_format) + else: + raise NotImplementedError(data_format) - inputs = np.random.normal(0, 1, - (num_samples,) + input_shape).astype(np.float32) - return inputs + inputs = np.random.normal(0, 1, (num_samples,) + input_shape).astype( + np.float32 + ) + return inputs def xent(y_true, y_pred): - y_true = keras.backend.cast( - keras.backend.reshape(y_true, (-1,)), - tf.int32) - - return tf.compat.v1.nn.sparse_softmax_cross_entropy_with_logits( - labels=y_true, - logits=y_pred) - - -def get_model(implementation, - filters, - kernel_size, - strides, - layers, - num_classes, - data_format): - model = keras.Sequential() - - if len(kernel_size) == 1: - lc_layer = keras.layers.LocallyConnected1D - elif len(kernel_size) == 2: - lc_layer = keras.layers.LocallyConnected2D - else: - raise NotImplementedError(kernel_size) - - for _ in range(layers): - model.add(lc_layer( - padding='valid', - kernel_initializer=keras.initializers.random_normal(), - bias_initializer=keras.initializers.random_normal(), - filters=filters, - strides=strides, - kernel_size=kernel_size, - activation=keras.activations.relu, - data_format=data_format, - implementation=implementation)) - - model.add(keras.layers.Flatten()) - model.add(keras.layers.Dense(num_classes)) - model.compile( - optimizer=RMSPropOptimizer(0.01), - metrics=[keras.metrics.categorical_accuracy], - loss=keras.losses.CategoricalCrossentropy(from_logits=True)) - return model - - -def get_model_saveable(implementation, filters, kernel_size, strides, layers, - num_classes, data_format): - model = keras.Sequential() - - if len(kernel_size) == 1: - lc_layer = keras.layers.LocallyConnected1D - elif len(kernel_size) == 2: - lc_layer = keras.layers.LocallyConnected2D - else: - raise NotImplementedError(kernel_size) - - for _ in range(layers): - model.add( - lc_layer( - padding='valid', - kernel_initializer=keras.initializers.random_normal(), - bias_initializer=keras.initializers.random_normal(), - filters=filters, - strides=strides, - kernel_size=kernel_size, - activation=keras.activations.relu, - data_format=data_format, - implementation=implementation)) - - model.add(keras.layers.Flatten()) - model.add(keras.layers.Dense(num_classes)) - model.compile( - optimizer=rmsprop.RMSProp(learning_rate=0.01), - metrics=[keras.metrics.categorical_accuracy], - loss=keras.losses.CategoricalCrossentropy(from_logits=True)) - return model + y_true = keras.backend.cast(keras.backend.reshape(y_true, (-1,)), tf.int32) + + return tf.compat.v1.nn.sparse_softmax_cross_entropy_with_logits( + labels=y_true, logits=y_pred + ) + + +def get_model( + implementation, + filters, + kernel_size, + strides, + layers, + num_classes, + data_format, +): + model = keras.Sequential() + + if len(kernel_size) == 1: + lc_layer = keras.layers.LocallyConnected1D + elif len(kernel_size) == 2: + lc_layer = keras.layers.LocallyConnected2D + else: + raise NotImplementedError(kernel_size) + + for _ in range(layers): + model.add( + lc_layer( + padding="valid", + kernel_initializer=keras.initializers.random_normal(), + bias_initializer=keras.initializers.random_normal(), + filters=filters, + strides=strides, + kernel_size=kernel_size, + activation=keras.activations.relu, + data_format=data_format, + implementation=implementation, + ) + ) + + model.add(keras.layers.Flatten()) + model.add(keras.layers.Dense(num_classes)) + model.compile( + optimizer=RMSPropOptimizer(0.01), + metrics=[keras.metrics.categorical_accuracy], + loss=keras.losses.CategoricalCrossentropy(from_logits=True), + ) + return model + + +def get_model_saveable( + implementation, + filters, + kernel_size, + strides, + layers, + num_classes, + data_format, +): + model = keras.Sequential() + + if len(kernel_size) == 1: + lc_layer = keras.layers.LocallyConnected1D + elif len(kernel_size) == 2: + lc_layer = keras.layers.LocallyConnected2D + else: + raise NotImplementedError(kernel_size) + + for _ in range(layers): + model.add( + lc_layer( + padding="valid", + kernel_initializer=keras.initializers.random_normal(), + bias_initializer=keras.initializers.random_normal(), + filters=filters, + strides=strides, + kernel_size=kernel_size, + activation=keras.activations.relu, + data_format=data_format, + implementation=implementation, + ) + ) + + model.add(keras.layers.Flatten()) + model.add(keras.layers.Dense(num_classes)) + model.compile( + optimizer=rmsprop.RMSProp(learning_rate=0.01), + metrics=[keras.metrics.categorical_accuracy], + loss=keras.losses.CategoricalCrossentropy(from_logits=True), + ) + return model def copy_lc_weights_2_to_1(lc_layer_2_from, lc_layer_1_to): - lc_2_kernel, lc_2_bias = lc_layer_2_from.weights - lc_2_kernel_masked = lc_2_kernel * lc_layer_2_from.kernel_mask + lc_2_kernel, lc_2_bias = lc_layer_2_from.weights + lc_2_kernel_masked = lc_2_kernel * lc_layer_2_from.kernel_mask - data_format = lc_layer_2_from.data_format + data_format = lc_layer_2_from.data_format - if data_format == 'channels_first': - if isinstance(lc_layer_2_from, keras.layers.LocallyConnected1D): - permutation = (3, 0, 1, 2) - elif isinstance(lc_layer_2_from, keras.layers.LocallyConnected2D): - permutation = (4, 5, 0, 1, 2, 3) - else: - raise NotImplementedError(lc_layer_2_from) + if data_format == "channels_first": + if isinstance(lc_layer_2_from, keras.layers.LocallyConnected1D): + permutation = (3, 0, 1, 2) + elif isinstance(lc_layer_2_from, keras.layers.LocallyConnected2D): + permutation = (4, 5, 0, 1, 2, 3) + else: + raise NotImplementedError(lc_layer_2_from) - elif data_format == 'channels_last': - if isinstance(lc_layer_2_from, keras.layers.LocallyConnected1D): - permutation = (2, 0, 1, 3) - elif isinstance(lc_layer_2_from, keras.layers.LocallyConnected2D): - permutation = (3, 4, 0, 1, 2, 5) - else: - raise NotImplementedError(lc_layer_2_from) + elif data_format == "channels_last": + if isinstance(lc_layer_2_from, keras.layers.LocallyConnected1D): + permutation = (2, 0, 1, 3) + elif isinstance(lc_layer_2_from, keras.layers.LocallyConnected2D): + permutation = (3, 4, 0, 1, 2, 5) + else: + raise NotImplementedError(lc_layer_2_from) - else: - raise NotImplementedError(data_format) + else: + raise NotImplementedError(data_format) - lc_2_kernel_masked = keras.backend.permute_dimensions( - lc_2_kernel_masked, permutation) + lc_2_kernel_masked = keras.backend.permute_dimensions( + lc_2_kernel_masked, permutation + ) - lc_2_kernel_mask = tf.not_equal( - lc_2_kernel_masked, 0) - lc_2_kernel_flat = tf.compat.v1.boolean_mask( - lc_2_kernel_masked, lc_2_kernel_mask) - lc_2_kernel_reshaped = keras.backend.reshape(lc_2_kernel_flat, - lc_layer_1_to.kernel.shape) + lc_2_kernel_mask = tf.not_equal(lc_2_kernel_masked, 0) + lc_2_kernel_flat = tf.compat.v1.boolean_mask( + lc_2_kernel_masked, lc_2_kernel_mask + ) + lc_2_kernel_reshaped = keras.backend.reshape( + lc_2_kernel_flat, lc_layer_1_to.kernel.shape + ) - lc_2_kernel_reshaped = keras.backend.get_value(lc_2_kernel_reshaped) - lc_2_bias = keras.backend.get_value(lc_2_bias) + lc_2_kernel_reshaped = keras.backend.get_value(lc_2_kernel_reshaped) + lc_2_bias = keras.backend.get_value(lc_2_bias) - lc_layer_1_to.set_weights([lc_2_kernel_reshaped, lc_2_bias]) + lc_layer_1_to.set_weights([lc_2_kernel_reshaped, lc_2_bias]) def copy_lc_weights_2_to_3(lc_layer_2_from, lc_layer_3_to): - lc_2_kernel, lc_2_bias = lc_layer_2_from.weights - lc_2_kernel_masked = lc_2_kernel * lc_layer_2_from.kernel_mask + lc_2_kernel, lc_2_bias = lc_layer_2_from.weights + lc_2_kernel_masked = lc_2_kernel * lc_layer_2_from.kernel_mask - lc_2_kernel_masked = locally_connected_utils.make_2d( - lc_2_kernel_masked, split_dim=keras.backend.ndim(lc_2_kernel_masked) // 2) - lc_2_kernel_masked = keras.backend.transpose(lc_2_kernel_masked) - lc_2_kernel_mask = tf.not_equal(lc_2_kernel_masked, 0) - lc_2_kernel_flat = tf.compat.v1.boolean_mask( - lc_2_kernel_masked, lc_2_kernel_mask) + lc_2_kernel_masked = locally_connected_utils.make_2d( + lc_2_kernel_masked, + split_dim=keras.backend.ndim(lc_2_kernel_masked) // 2, + ) + lc_2_kernel_masked = keras.backend.transpose(lc_2_kernel_masked) + lc_2_kernel_mask = tf.not_equal(lc_2_kernel_masked, 0) + lc_2_kernel_flat = tf.compat.v1.boolean_mask( + lc_2_kernel_masked, lc_2_kernel_mask + ) - lc_2_kernel_flat = keras.backend.get_value(lc_2_kernel_flat) - lc_2_bias = keras.backend.get_value(lc_2_bias) + lc_2_kernel_flat = keras.backend.get_value(lc_2_kernel_flat) + lc_2_bias = keras.backend.get_value(lc_2_bias) - lc_layer_3_to.set_weights([lc_2_kernel_flat, lc_2_bias]) + lc_layer_3_to.set_weights([lc_2_kernel_flat, lc_2_bias]) def copy_model_weights(model_from, model_to): - for l in range(len(model_from.layers)): - layer_from = model_from.layers[l] - layer_to = model_to.layers[l] - - if (isinstance( - layer_from, - (keras.layers.LocallyConnected2D, keras.layers.LocallyConnected1D)) and - isinstance(layer_to, (keras.layers.LocallyConnected2D, - keras.layers.LocallyConnected1D))): - if layer_from.implementation == 2: - if layer_to.implementation == 1: - copy_lc_weights_2_to_1(layer_from, layer_to) - elif layer_to.implementation == 3: - copy_lc_weights_2_to_3(layer_from, layer_to) - else: - raise NotImplementedError - - else: - raise NotImplementedError + for l in range(len(model_from.layers)): + layer_from = model_from.layers[l] + layer_to = model_to.layers[l] + + if isinstance( + layer_from, + (keras.layers.LocallyConnected2D, keras.layers.LocallyConnected1D), + ) and isinstance( + layer_to, + (keras.layers.LocallyConnected2D, keras.layers.LocallyConnected1D), + ): + if layer_from.implementation == 2: + if layer_to.implementation == 1: + copy_lc_weights_2_to_1(layer_from, layer_to) + elif layer_to.implementation == 3: + copy_lc_weights_2_to_3(layer_from, layer_to) + else: + raise NotImplementedError + + else: + raise NotImplementedError + + elif isinstance(layer_from, keras.layers.Dense): + weights_2, bias_2 = layer_from.weights + weights_2 = keras.backend.get_value(weights_2) + bias_2 = keras.backend.get_value(bias_2) + layer_to.set_weights([weights_2, bias_2]) - elif isinstance(layer_from, keras.layers.Dense): - weights_2, bias_2 = layer_from.weights - weights_2 = keras.backend.get_value(weights_2) - bias_2 = keras.backend.get_value(bias_2) - layer_to.set_weights([weights_2, bias_2]) - - else: - continue + else: + continue -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/locally_connected/locally_connected_utils.py b/keras/layers/locally_connected/locally_connected_utils.py index 435758e7e023..26695a506753 100644 --- a/keras/layers/locally_connected/locally_connected_utils.py +++ b/keras/layers/locally_connected/locally_connected_utils.py @@ -14,180 +14,193 @@ # ============================================================================== """Private utilities for locally-connected layers.""" -from keras import backend -from keras.utils import conv_utils import numpy as np import tensorflow.compat.v2 as tf +from keras import backend +from keras.utils import conv_utils + -def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding, - data_format): - """Return a mask representing connectivity of a locally-connected operation. - - This method returns a masking numpy array of 0s and 1s (of type `np.float32`) - that, when element-wise multiplied with a fully-connected weight tensor, masks - out the weights between disconnected input-output pairs and thus implements - local connectivity through a sparse fully-connected weight tensor. - - Assume an unshared convolution with given parameters is applied to an input - having N spatial dimensions with `input_shape = (d_in1, ..., d_inN)` - to produce an output with spatial shape `(d_out1, ..., d_outN)` (determined - by layer parameters such as `strides`). - - This method returns a mask which can be broadcast-multiplied (element-wise) - with a 2*(N+1)-D weight matrix (equivalent to a fully-connected layer between - (N+1)-D activations (N spatial + 1 channel dimensions for input and output) - to make it perform an unshared convolution with given `kernel_shape`, - `strides`, `padding` and `data_format`. - - Args: - input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the - input. - kernel_shape: tuple of size N, spatial shape of the convolutional kernel / - receptive field. - strides: tuple of size N, strides along each spatial dimension. - padding: type of padding, string `"same"` or `"valid"`. - data_format: a string, `"channels_first"` or `"channels_last"`. - - Returns: - a `np.float32`-type `np.ndarray` of shape - `(1, d_in1, ..., d_inN, 1, d_out1, ..., d_outN)` - if `data_format == `"channels_first"`, or - `(d_in1, ..., d_inN, 1, d_out1, ..., d_outN, 1)` - if `data_format == "channels_last"`. - - Raises: - ValueError: if `data_format` is neither `"channels_first"` nor - `"channels_last"`. - """ - mask = conv_utils.conv_kernel_mask( - input_shape=input_shape, - kernel_shape=kernel_shape, - strides=strides, - padding=padding) - - ndims = int(mask.ndim / 2) - - if data_format == 'channels_first': - mask = np.expand_dims(mask, 0) - mask = np.expand_dims(mask, -ndims - 1) - - elif data_format == 'channels_last': - mask = np.expand_dims(mask, ndims) - mask = np.expand_dims(mask, -1) - - else: - raise ValueError('Unrecognized data_format: ' + str(data_format)) - - return mask +def get_locallyconnected_mask( + input_shape, kernel_shape, strides, padding, data_format +): + """Return a mask representing connectivity of a locally-connected operation. + + This method returns a masking numpy array of 0s and 1s (of type + `np.float32`) that, when element-wise multiplied with a fully-connected + weight tensor, masks out the weights between disconnected input-output pairs + and thus implements local connectivity through a sparse fully-connected + weight tensor. + + Assume an unshared convolution with given parameters is applied to an input + having N spatial dimensions with `input_shape = (d_in1, ..., d_inN)` + to produce an output with spatial shape `(d_out1, ..., d_outN)` (determined + by layer parameters such as `strides`). + + This method returns a mask which can be broadcast-multiplied (element-wise) + with a 2*(N+1)-D weight matrix (equivalent to a fully-connected layer + between (N+1)-D activations (N spatial + 1 channel dimensions for input and + output) to make it perform an unshared convolution with given + `kernel_shape`, `strides`, `padding` and `data_format`. + + Args: + input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the + input. + kernel_shape: tuple of size N, spatial shape of the convolutional kernel / + receptive field. + strides: tuple of size N, strides along each spatial dimension. + padding: type of padding, string `"same"` or `"valid"`. + data_format: a string, `"channels_first"` or `"channels_last"`. + + Returns: + a `np.float32`-type `np.ndarray` of shape + `(1, d_in1, ..., d_inN, 1, d_out1, ..., d_outN)` + if `data_format == `"channels_first"`, or + `(d_in1, ..., d_inN, 1, d_out1, ..., d_outN, 1)` + if `data_format == "channels_last"`. + + Raises: + ValueError: if `data_format` is neither `"channels_first"` nor + `"channels_last"`. + """ + mask = conv_utils.conv_kernel_mask( + input_shape=input_shape, + kernel_shape=kernel_shape, + strides=strides, + padding=padding, + ) + + ndims = int(mask.ndim / 2) + + if data_format == "channels_first": + mask = np.expand_dims(mask, 0) + mask = np.expand_dims(mask, -ndims - 1) + + elif data_format == "channels_last": + mask = np.expand_dims(mask, ndims) + mask = np.expand_dims(mask, -1) + + else: + raise ValueError("Unrecognized data_format: " + str(data_format)) + + return mask def local_conv_matmul(inputs, kernel, kernel_mask, output_shape): - """Apply N-D convolution with un-shared weights using a single matmul call. - - This method outputs `inputs . (kernel * kernel_mask)` - (with `.` standing for matrix-multiply and `*` for element-wise multiply) - and requires a precomputed `kernel_mask` to zero-out weights in `kernel` and - hence perform the same operation as a convolution with un-shared - (the remaining entries in `kernel`) weights. It also does the necessary - reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D. - - Args: - inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ..., - d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`. - kernel: the unshared weights for N-D convolution, - an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in, d_out2, - ..., d_outN, channels_out)` or `(channels_in, d_in1, ..., d_inN, - channels_out, d_out2, ..., d_outN)`, with the ordering of channels - and spatial dimensions matching that of the input. Each entry is the - weight between a particular input and output location, similarly to - a fully-connected weight matrix. - kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1, - d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ..., - d_outN)`, with the ordering of singleton and spatial dimensions matching - that of the input. Mask represents the connectivity pattern of the layer - and is - precomputed elsewhere based on layer parameters: stride, padding, and - the receptive field shape. - output_shape: a tuple of (N+2) elements representing the output shape: - `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size, - d_out1, ..., d_outN, channels_out)`, with the ordering of channels and - spatial dimensions matching that of the input. - - Returns: - Output (N+2)-D tensor with shape `output_shape`. - """ - inputs_flat = backend.reshape(inputs, (backend.shape(inputs)[0], -1)) - - kernel = kernel_mask * kernel - kernel = make_2d(kernel, split_dim=backend.ndim(kernel) // 2) - - output_flat = tf.matmul(inputs_flat, kernel, b_is_sparse=True) - output = backend.reshape(output_flat, [ - backend.shape(output_flat)[0], - ] + output_shape.as_list()[1:]) - return output - - -def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape, - output_shape): - """Apply N-D convolution with un-shared weights using a single sparse matmul. - - This method outputs `inputs . tf.sparse.SparseTensor(indices=kernel_idxs, - values=kernel, dense_shape=kernel_shape)`, with `.` standing for - matrix-multiply. It also reshapes `inputs` to 2-D and `output` to (N+2)-D. - - Args: - inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ..., - d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`. - kernel: a 1-D tensor with shape `(len(kernel_idxs),)` containing all the - weights of the layer. - kernel_idxs: a list of integer tuples representing indices in a sparse - matrix performing the un-shared convolution as a matrix-multiply. - kernel_shape: a tuple `(input_size, output_size)`, where `input_size = - channels_in * d_in1 * ... * d_inN` and `output_size = channels_out * - d_out1 * ... * d_outN`. - output_shape: a tuple of (N+2) elements representing the output shape: - `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size, - d_out1, ..., d_outN, channels_out)`, with the ordering of channels and - spatial dimensions matching that of the input. - - Returns: - Output (N+2)-D dense tensor with shape `output_shape`. - """ - inputs_flat = backend.reshape(inputs, (backend.shape(inputs)[0], -1)) - output_flat = tf.sparse.sparse_dense_matmul( - sp_a=tf.SparseTensor(kernel_idxs, kernel, kernel_shape), - b=inputs_flat, - adjoint_b=True) - output_flat_transpose = backend.transpose(output_flat) - - output_reshaped = backend.reshape(output_flat_transpose, [ - backend.shape(output_flat_transpose)[0], - ] + output_shape.as_list()[1:]) - return output_reshaped + """Apply N-D convolution with un-shared weights using a single matmul call. + + This method outputs `inputs . (kernel * kernel_mask)` + (with `.` standing for matrix-multiply and `*` for element-wise multiply) + and requires a precomputed `kernel_mask` to zero-out weights in `kernel` and + hence perform the same operation as a convolution with un-shared + (the remaining entries in `kernel`) weights. It also does the necessary + reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D. + + Args: + inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ..., + d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`. + kernel: the unshared weights for N-D convolution, + an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in, + d_out2, ..., d_outN, channels_out)` or `(channels_in, d_in1, ..., + d_inN, channels_out, d_out2, ..., d_outN)`, with the ordering of + channels and spatial dimensions matching that of the input. Each + entry is the weight between a particular input and output location, + similarly to a fully-connected weight matrix. + kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1, + d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ..., + d_outN)`, with the ordering of singleton and spatial dimensions + matching that of the input. Mask represents the connectivity pattern + of the layer and is precomputed elsewhere based on layer parameters: + stride, padding, and the receptive field shape. + output_shape: a tuple of (N+2) elements representing the output shape: + `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size, + d_out1, ..., d_outN, channels_out)`, with the ordering of channels and + spatial dimensions matching that of the input. + + Returns: + Output (N+2)-D tensor with shape `output_shape`. + """ + inputs_flat = backend.reshape(inputs, (backend.shape(inputs)[0], -1)) + + kernel = kernel_mask * kernel + kernel = make_2d(kernel, split_dim=backend.ndim(kernel) // 2) + + output_flat = tf.matmul(inputs_flat, kernel, b_is_sparse=True) + output = backend.reshape( + output_flat, + [ + backend.shape(output_flat)[0], + ] + + output_shape.as_list()[1:], + ) + return output + + +def local_conv_sparse_matmul( + inputs, kernel, kernel_idxs, kernel_shape, output_shape +): + """Apply N-D convolution with unshared weights using a single sparse matmul. + + This method outputs `inputs . tf.sparse.SparseTensor(indices=kernel_idxs, + values=kernel, dense_shape=kernel_shape)`, with `.` standing for + matrix-multiply. It also reshapes `inputs` to 2-D and `output` to (N+2)-D. + + Args: + inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ..., + d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`. + kernel: a 1-D tensor with shape `(len(kernel_idxs),)` containing all the + weights of the layer. + kernel_idxs: a list of integer tuples representing indices in a sparse + matrix performing the un-shared convolution as a matrix-multiply. + kernel_shape: a tuple `(input_size, output_size)`, where `input_size = + channels_in * d_in1 * ... * d_inN` and `output_size = channels_out * + d_out1 * ... * d_outN`. + output_shape: a tuple of (N+2) elements representing the output shape: + `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size, + d_out1, ..., d_outN, channels_out)`, with the ordering of channels and + spatial dimensions matching that of the input. + + Returns: + Output (N+2)-D dense tensor with shape `output_shape`. + """ + inputs_flat = backend.reshape(inputs, (backend.shape(inputs)[0], -1)) + output_flat = tf.sparse.sparse_dense_matmul( + sp_a=tf.SparseTensor(kernel_idxs, kernel, kernel_shape), + b=inputs_flat, + adjoint_b=True, + ) + output_flat_transpose = backend.transpose(output_flat) + + output_reshaped = backend.reshape( + output_flat_transpose, + [ + backend.shape(output_flat_transpose)[0], + ] + + output_shape.as_list()[1:], + ) + return output_reshaped def make_2d(tensor, split_dim): - """Reshapes an N-dimensional tensor into a 2D tensor. + """Reshapes an N-dimensional tensor into a 2D tensor. - Dimensions before (excluding) and after (including) `split_dim` are grouped - together. + Dimensions before (excluding) and after (including) `split_dim` are grouped + together. - Args: - tensor: a tensor of shape `(d0, ..., d(N-1))`. - split_dim: an integer from 1 to N-1, index of the dimension to group - dimensions before (excluding) and after (including). + Args: + tensor: a tensor of shape `(d0, ..., d(N-1))`. + split_dim: an integer from 1 to N-1, index of the dimension to group + dimensions before (excluding) and after (including). - Returns: - Tensor of shape - `(d0 * ... * d(split_dim-1), d(split_dim) * ... * d(N-1))`. - """ - shape = tf.shape(tensor) - in_dims = shape[:split_dim] - out_dims = shape[split_dim:] + Returns: + Tensor of shape + `(d0 * ... * d(split_dim-1), d(split_dim) * ... * d(N-1))`. + """ + shape = tf.shape(tensor) + in_dims = shape[:split_dim] + out_dims = shape[split_dim:] - in_size = tf.reduce_prod(in_dims) - out_size = tf.reduce_prod(out_dims) + in_size = tf.reduce_prod(in_dims) + out_size = tf.reduce_prod(out_dims) - return tf.reshape(tensor, (in_size, out_size)) + return tf.reshape(tensor, (in_size, out_size)) diff --git a/keras/layers/merging/BUILD b/keras/layers/merging/BUILD index 357606ec0f92..7de776ca2a18 100644 --- a/keras/layers/merging/BUILD +++ b/keras/layers/merging/BUILD @@ -1,15 +1,17 @@ # Description: # Contains the Keras merging layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/py/tensorflow_gnn:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], diff --git a/keras/layers/merging/__init__.py b/keras/layers/merging/__init__.py index 406c6afbd8ac..beb834f31c73 100644 --- a/keras/layers/merging/__init__.py +++ b/keras/layers/merging/__init__.py @@ -13,24 +13,23 @@ # limitations under the License. # ============================================================================== """Keras merging layers.""" -# pylint: disable=g-bad-import-order + +# Merging functions. # Merging layers. from keras.layers.merging.add import Add -from keras.layers.merging.subtract import Subtract -from keras.layers.merging.multiply import Multiply +from keras.layers.merging.add import add from keras.layers.merging.average import Average -from keras.layers.merging.maximum import Maximum -from keras.layers.merging.minimum import Minimum +from keras.layers.merging.average import average from keras.layers.merging.concatenate import Concatenate +from keras.layers.merging.concatenate import concatenate from keras.layers.merging.dot import Dot - -# Merging functions. -from keras.layers.merging.add import add -from keras.layers.merging.subtract import subtract -from keras.layers.merging.multiply import multiply -from keras.layers.merging.average import average +from keras.layers.merging.dot import dot +from keras.layers.merging.maximum import Maximum from keras.layers.merging.maximum import maximum +from keras.layers.merging.minimum import Minimum from keras.layers.merging.minimum import minimum -from keras.layers.merging.concatenate import concatenate -from keras.layers.merging.dot import dot +from keras.layers.merging.multiply import Multiply +from keras.layers.merging.multiply import multiply +from keras.layers.merging.subtract import Subtract +from keras.layers.merging.subtract import subtract diff --git a/keras/layers/merging/add.py b/keras/layers/merging/add.py index 8e4997ecceb9..3df77c3efc9f 100644 --- a/keras/layers/merging/add.py +++ b/keras/layers/merging/add.py @@ -17,75 +17,76 @@ from keras.layers.merging.base_merge import _Merge +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Add') +@keras_export("keras.layers.Add") class Add(_Merge): - """Layer that adds a list of inputs. + """Layer that adds a list of inputs. - It takes as input a list of tensors, - all of the same shape, and returns - a single tensor (also of the same shape). + It takes as input a list of tensors, + all of the same shape, and returns + a single tensor (also of the same shape). - Examples: + Examples: - >>> input_shape = (2, 3, 4) - >>> x1 = tf.random.normal(input_shape) - >>> x2 = tf.random.normal(input_shape) - >>> y = tf.keras.layers.Add()([x1, x2]) - >>> print(y.shape) - (2, 3, 4) + >>> input_shape = (2, 3, 4) + >>> x1 = tf.random.normal(input_shape) + >>> x2 = tf.random.normal(input_shape) + >>> y = tf.keras.layers.Add()([x1, x2]) + >>> print(y.shape) + (2, 3, 4) - Used in a functional model: + Used in a functional model: - >>> input1 = tf.keras.layers.Input(shape=(16,)) - >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) - >>> input2 = tf.keras.layers.Input(shape=(32,)) - >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) - >>> # equivalent to `added = tf.keras.layers.add([x1, x2])` - >>> added = tf.keras.layers.Add()([x1, x2]) - >>> out = tf.keras.layers.Dense(4)(added) - >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) + >>> input1 = tf.keras.layers.Input(shape=(16,)) + >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) + >>> input2 = tf.keras.layers.Input(shape=(32,)) + >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) + >>> # equivalent to `added = tf.keras.layers.add([x1, x2])` + >>> added = tf.keras.layers.Add()([x1, x2]) + >>> out = tf.keras.layers.Dense(4)(added) + >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) - """ + """ - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output += inputs[i] - return output + def _merge_function(self, inputs): + output = inputs[0] + for i in range(1, len(inputs)): + output += inputs[i] + return output -@keras_export('keras.layers.add') +@keras_export("keras.layers.add") def add(inputs, **kwargs): - """Functional interface to the `tf.keras.layers.Add` layer. + """Functional interface to the `tf.keras.layers.Add` layer. - Args: - inputs: A list of input tensors with the same shape. - **kwargs: Standard layer keyword arguments. + Args: + inputs: A list of input tensors with the same shape. + **kwargs: Standard layer keyword arguments. - Returns: - A tensor as the sum of the inputs. It has the same shape as the inputs. + Returns: + A tensor as the sum of the inputs. It has the same shape as the inputs. - Examples: + Examples: - >>> input_shape = (2, 3, 4) - >>> x1 = tf.random.normal(input_shape) - >>> x2 = tf.random.normal(input_shape) - >>> y = tf.keras.layers.add([x1, x2]) - >>> print(y.shape) - (2, 3, 4) + >>> input_shape = (2, 3, 4) + >>> x1 = tf.random.normal(input_shape) + >>> x2 = tf.random.normal(input_shape) + >>> y = tf.keras.layers.add([x1, x2]) + >>> print(y.shape) + (2, 3, 4) - Used in a functional model: + Used in a functional model: - >>> input1 = tf.keras.layers.Input(shape=(16,)) - >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) - >>> input2 = tf.keras.layers.Input(shape=(32,)) - >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) - >>> added = tf.keras.layers.add([x1, x2]) - >>> out = tf.keras.layers.Dense(4)(added) - >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) + >>> input1 = tf.keras.layers.Input(shape=(16,)) + >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) + >>> input2 = tf.keras.layers.Input(shape=(32,)) + >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) + >>> added = tf.keras.layers.add([x1, x2]) + >>> out = tf.keras.layers.Dense(4)(added) + >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) - """ - return Add(**kwargs)(inputs) + """ + return Add(**kwargs)(inputs) diff --git a/keras/layers/merging/average.py b/keras/layers/merging/average.py index e019b6bb37e6..87261c167099 100644 --- a/keras/layers/merging/average.py +++ b/keras/layers/merging/average.py @@ -17,77 +17,78 @@ from keras.layers.merging.base_merge import _Merge +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Average') +@keras_export("keras.layers.Average") class Average(_Merge): - """Layer that averages a list of inputs element-wise. + """Layer that averages a list of inputs element-wise. - It takes as input a list of tensors, all of the same shape, and returns - a single tensor (also of the same shape). + It takes as input a list of tensors, all of the same shape, and returns + a single tensor (also of the same shape). - Example: + Example: - >>> x1 = np.ones((2, 2)) - >>> x2 = np.zeros((2, 2)) - >>> y = tf.keras.layers.Average()([x1, x2]) - >>> y.numpy().tolist() - [[0.5, 0.5], [0.5, 0.5]] + >>> x1 = np.ones((2, 2)) + >>> x2 = np.zeros((2, 2)) + >>> y = tf.keras.layers.Average()([x1, x2]) + >>> y.numpy().tolist() + [[0.5, 0.5], [0.5, 0.5]] - Usage in a functional model: + Usage in a functional model: - >>> input1 = tf.keras.layers.Input(shape=(16,)) - >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) - >>> input2 = tf.keras.layers.Input(shape=(32,)) - >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) - >>> avg = tf.keras.layers.Average()([x1, x2]) - >>> out = tf.keras.layers.Dense(4)(avg) - >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) + >>> input1 = tf.keras.layers.Input(shape=(16,)) + >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) + >>> input2 = tf.keras.layers.Input(shape=(32,)) + >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) + >>> avg = tf.keras.layers.Average()([x1, x2]) + >>> out = tf.keras.layers.Dense(4)(avg) + >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) - Raises: - ValueError: If there is a shape mismatch between the inputs and the shapes - cannot be broadcasted to match. - """ + Raises: + ValueError: If there is a shape mismatch between the inputs and the shapes + cannot be broadcasted to match. + """ - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output += inputs[i] - return output / len(inputs) + def _merge_function(self, inputs): + output = inputs[0] + for i in range(1, len(inputs)): + output += inputs[i] + return output / len(inputs) -@keras_export('keras.layers.average') +@keras_export("keras.layers.average") def average(inputs, **kwargs): - """Functional interface to the `tf.keras.layers.Average` layer. - - Example: - - >>> x1 = np.ones((2, 2)) - >>> x2 = np.zeros((2, 2)) - >>> y = tf.keras.layers.Average()([x1, x2]) - >>> y.numpy().tolist() - [[0.5, 0.5], [0.5, 0.5]] - - Usage in a functional model: - - >>> input1 = tf.keras.layers.Input(shape=(16,)) - >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) - >>> input2 = tf.keras.layers.Input(shape=(32,)) - >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) - >>> avg = tf.keras.layers.Average()([x1, x2]) - >>> out = tf.keras.layers.Dense(4)(avg) - >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) - - Args: - inputs: A list of input tensors. - **kwargs: Standard layer keyword arguments. - - Returns: - A tensor, the average of the inputs. - - Raises: - ValueError: If there is a shape mismatch between the inputs and the shapes - cannot be broadcasted to match. - """ - return Average(**kwargs)(inputs) + """Functional interface to the `tf.keras.layers.Average` layer. + + Example: + + >>> x1 = np.ones((2, 2)) + >>> x2 = np.zeros((2, 2)) + >>> y = tf.keras.layers.Average()([x1, x2]) + >>> y.numpy().tolist() + [[0.5, 0.5], [0.5, 0.5]] + + Usage in a functional model: + + >>> input1 = tf.keras.layers.Input(shape=(16,)) + >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) + >>> input2 = tf.keras.layers.Input(shape=(32,)) + >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) + >>> avg = tf.keras.layers.Average()([x1, x2]) + >>> out = tf.keras.layers.Dense(4)(avg) + >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) + + Args: + inputs: A list of input tensors. + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor, the average of the inputs. + + Raises: + ValueError: If there is a shape mismatch between the inputs and the shapes + cannot be broadcasted to match. + """ + return Average(**kwargs)(inputs) diff --git a/keras/layers/merging/base_merge.py b/keras/layers/merging/base_merge.py index a73db401984d..058de0a0eb21 100644 --- a/keras/layers/merging/base_merge.py +++ b/keras/layers/merging/base_merge.py @@ -14,205 +14,229 @@ # ============================================================================== """Private base class for layers that can merge several inputs into one.""" +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine.base_layer import Layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf class _Merge(Layer): - """Generic merge layer for elementwise merge functions. - - Used to implement `Sum`, `Average`, etc. - """ - - def __init__(self, **kwargs): - """Initializes a Merge layer. + """Generic merge layer for elementwise merge functions. - Args: - **kwargs: standard layer keyword arguments. + Used to implement `Sum`, `Average`, etc. """ - super().__init__(**kwargs) - self.supports_masking = True - def _merge_function(self, inputs): - raise NotImplementedError - - def _compute_elemwise_op_output_shape(self, shape1, shape2): - """Computes the shape of the resultant of an elementwise operation. - - Args: - shape1: tuple or None. Shape of the first tensor - shape2: tuple or None. Shape of the second tensor - - Returns: - expected output shape when an element-wise operation is - carried out on 2 tensors with shapes shape1 and shape2. - tuple or None. - - Raises: - ValueError: if shape1 and shape2 are not compatible for - element-wise operations. - """ - if None in [shape1, shape2]: - return None - elif len(shape1) < len(shape2): - return self._compute_elemwise_op_output_shape(shape2, shape1) # pylint: disable=arguments-out-of-order - elif not shape2: - return shape1 - output_shape = list(shape1[:-len(shape2)]) - for i, j in zip(shape1[-len(shape2):], shape2): - if i is None or j is None: - output_shape.append(None) - elif i == 1: - output_shape.append(j) - elif j == 1: - output_shape.append(i) - else: - if i != j: - raise ValueError( - 'Inputs have incompatible shapes. ' - f'Received shapes {shape1} and {shape2}') - output_shape.append(i) - return tuple(output_shape) - - @tf_utils.shape_type_conversion - def build(self, input_shape): - # Used purely for shape validation. - if not isinstance(input_shape[0], tuple): - raise ValueError( - 'A merge layer should be called on a list of inputs. ' - f'Received: input_shape={input_shape} (not a list of shapes)') - if len(input_shape) < 1: - raise ValueError('A merge layer should be called ' - 'on a list of at least 1 input. ' - f'Got {len(input_shape)} inputs. ' - f'Full input_shape received: {input_shape}') - batch_sizes = {s[0] for s in input_shape if s} - {None} - if len(batch_sizes) > 1: - raise ValueError( - 'Cannot merge tensors with different batch sizes. ' - f'Got tensors with shapes {input_shape}') - if input_shape[0] is None: - output_shape = None - else: - output_shape = input_shape[0][1:] - for i in range(1, len(input_shape)): - if input_shape[i] is None: - shape = None - else: - shape = input_shape[i][1:] - output_shape = self._compute_elemwise_op_output_shape(output_shape, shape) - # If the inputs have different ranks, we have to reshape them - # to make them broadcastable. - if None not in input_shape and len(set(map(len, input_shape))) == 1: - self._reshape_required = False - else: - self._reshape_required = True - - def call(self, inputs): - if not isinstance(inputs, (list, tuple)): - raise ValueError( - 'A merge layer should be called on a list of inputs. ' - f'Received: inputs={inputs} (not a list of tensors)') - if self._reshape_required: - reshaped_inputs = [] - input_ndims = list(map(backend.ndim, inputs)) - if None not in input_ndims: - # If ranks of all inputs are available, - # we simply expand each of them at axis=1 - # until all of them have the same rank. - max_ndim = max(input_ndims) - for x in inputs: - x_ndim = backend.ndim(x) - for _ in range(max_ndim - x_ndim): - x = tf.expand_dims(x, axis=1) - reshaped_inputs.append(x) - return self._merge_function(reshaped_inputs) - else: - # Transpose all inputs so that batch size is the last dimension. - # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) - transposed = False - for x in inputs: - x_ndim = backend.ndim(x) - if x_ndim is None: - x_shape = tf.shape(x) - batch_size = x_shape[0] - new_shape = backend.concatenate( - [x_shape[1:], - tf.expand_dims(batch_size, axis=-1)]) - x_transposed = tf.reshape( - x, - tf.stack( - [batch_size, tf.reduce_prod(x_shape[1:])], axis=0)) - x_transposed = tf.transpose(x_transposed, perm=(1, 0)) - x_transposed = tf.reshape(x_transposed, new_shape) - reshaped_inputs.append(x_transposed) - transposed = True - elif x_ndim > 1: - dims = list(range(1, x_ndim)) + [0] - reshaped_inputs.append(tf.transpose(x, perm=dims)) - transposed = True - else: - # We don't transpose inputs if they are 1D vectors or scalars. - reshaped_inputs.append(x) - y = self._merge_function(reshaped_inputs) - y_ndim = backend.ndim(y) - if transposed: - # If inputs have been transposed, we have to transpose the output too. - if y_ndim is None: - y_shape = tf.shape(y) - y_ndim = tf.shape(y_shape)[0] - batch_size = y_shape[y_ndim - 1] - new_shape = backend.concatenate([ - tf.expand_dims(batch_size, axis=-1), y_shape[:y_ndim - 1] - ]) - y = tf.reshape(y, (-1, batch_size)) - y = tf.transpose(y, perm=(1, 0)) - y = tf.reshape(y, new_shape) - elif y_ndim > 1: - dims = [y_ndim - 1] + list(range(y_ndim - 1)) - y = tf.transpose(y, perm=dims) - return y - else: - return self._merge_function(inputs) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if input_shape[0] is None: - output_shape = None - else: - output_shape = input_shape[0][1:] - for i in range(1, len(input_shape)): - if input_shape[i] is None: - shape = None - else: - shape = input_shape[i][1:] - output_shape = self._compute_elemwise_op_output_shape(output_shape, shape) - batch_sizes = {s[0] for s in input_shape if s is not None} - {None} - if len(batch_sizes) == 1: - output_shape = (list(batch_sizes)[0],) + output_shape - else: - output_shape = (None,) + output_shape - return output_shape - - def compute_mask(self, inputs, mask=None): - if mask is None: - return None - if not isinstance(mask, (tuple, list)): - raise ValueError(f'`mask` should be a list. Received: mask={mask}') - if not isinstance(inputs, (tuple, list)): - raise ValueError(f'`inputs` should be a list. Received: inputs={inputs}') - if len(mask) != len(inputs): - raise ValueError( - 'The lists `inputs` and `mask` should have the same length. ' - f'Received: inputs={inputs} of length {len(inputs)}, and ' - f'mask={mask} of length {len(mask)}') - if all(m is None for m in mask): - return None - masks = [tf.expand_dims(m, axis=0) for m in mask if m is not None] - return backend.all( - backend.concatenate(masks, axis=0), axis=0, keepdims=False) - - def get_config(self): # pylint: disable=useless-super-delegation - return super().get_config() + def __init__(self, **kwargs): + """Initializes a Merge layer. + + Args: + **kwargs: standard layer keyword arguments. + """ + super().__init__(**kwargs) + self.supports_masking = True + + def _merge_function(self, inputs): + raise NotImplementedError + + def _compute_elemwise_op_output_shape(self, shape1, shape2): + """Computes the shape of the resultant of an elementwise operation. + + Args: + shape1: tuple or None. Shape of the first tensor + shape2: tuple or None. Shape of the second tensor + + Returns: + expected output shape when an element-wise operation is + carried out on 2 tensors with shapes shape1 and shape2. + tuple or None. + + Raises: + ValueError: if shape1 and shape2 are not compatible for + element-wise operations. + """ + if None in [shape1, shape2]: + return None + elif len(shape1) < len(shape2): + return self._compute_elemwise_op_output_shape(shape2, shape1) + elif not shape2: + return shape1 + output_shape = list(shape1[: -len(shape2)]) + for i, j in zip(shape1[-len(shape2) :], shape2): + if i is None or j is None: + output_shape.append(None) + elif i == 1: + output_shape.append(j) + elif j == 1: + output_shape.append(i) + else: + if i != j: + raise ValueError( + "Inputs have incompatible shapes. " + f"Received shapes {shape1} and {shape2}" + ) + output_shape.append(i) + return tuple(output_shape) + + @tf_utils.shape_type_conversion + def build(self, input_shape): + # Used purely for shape validation. + if not isinstance(input_shape[0], tuple): + raise ValueError( + "A merge layer should be called on a list of inputs. " + f"Received: input_shape={input_shape} (not a list of shapes)" + ) + if len(input_shape) < 1: + raise ValueError( + "A merge layer should be called " + "on a list of at least 1 input. " + f"Got {len(input_shape)} inputs. " + f"Full input_shape received: {input_shape}" + ) + batch_sizes = {s[0] for s in input_shape if s} - {None} + if len(batch_sizes) > 1: + raise ValueError( + "Cannot merge tensors with different batch sizes. " + f"Got tensors with shapes {input_shape}" + ) + if input_shape[0] is None: + output_shape = None + else: + output_shape = input_shape[0][1:] + for i in range(1, len(input_shape)): + if input_shape[i] is None: + shape = None + else: + shape = input_shape[i][1:] + output_shape = self._compute_elemwise_op_output_shape( + output_shape, shape + ) + # If the inputs have different ranks, we have to reshape them + # to make them broadcastable. + if None not in input_shape and len(set(map(len, input_shape))) == 1: + self._reshape_required = False + else: + self._reshape_required = True + + def call(self, inputs): + if not isinstance(inputs, (list, tuple)): + raise ValueError( + "A merge layer should be called on a list of inputs. " + f"Received: inputs={inputs} (not a list of tensors)" + ) + if self._reshape_required: + reshaped_inputs = [] + input_ndims = list(map(backend.ndim, inputs)) + if None not in input_ndims: + # If ranks of all inputs are available, + # we simply expand each of them at axis=1 + # until all of them have the same rank. + max_ndim = max(input_ndims) + for x in inputs: + x_ndim = backend.ndim(x) + for _ in range(max_ndim - x_ndim): + x = tf.expand_dims(x, axis=1) + reshaped_inputs.append(x) + return self._merge_function(reshaped_inputs) + else: + # Transpose all inputs so that batch size is the last dimension. + # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , + # batch_size) + transposed = False + for x in inputs: + x_ndim = backend.ndim(x) + if x_ndim is None: + x_shape = tf.shape(x) + batch_size = x_shape[0] + new_shape = backend.concatenate( + [x_shape[1:], tf.expand_dims(batch_size, axis=-1)] + ) + x_transposed = tf.reshape( + x, + tf.stack( + [batch_size, tf.reduce_prod(x_shape[1:])], + axis=0, + ), + ) + x_transposed = tf.transpose(x_transposed, perm=(1, 0)) + x_transposed = tf.reshape(x_transposed, new_shape) + reshaped_inputs.append(x_transposed) + transposed = True + elif x_ndim > 1: + dims = list(range(1, x_ndim)) + [0] + reshaped_inputs.append(tf.transpose(x, perm=dims)) + transposed = True + else: + # We don't transpose inputs if they are 1D vectors or + # scalars. + reshaped_inputs.append(x) + y = self._merge_function(reshaped_inputs) + y_ndim = backend.ndim(y) + if transposed: + # If inputs have been transposed, we have to transpose the + # output too. + if y_ndim is None: + y_shape = tf.shape(y) + y_ndim = tf.shape(y_shape)[0] + batch_size = y_shape[y_ndim - 1] + new_shape = backend.concatenate( + [ + tf.expand_dims(batch_size, axis=-1), + y_shape[: y_ndim - 1], + ] + ) + y = tf.reshape(y, (-1, batch_size)) + y = tf.transpose(y, perm=(1, 0)) + y = tf.reshape(y, new_shape) + elif y_ndim > 1: + dims = [y_ndim - 1] + list(range(y_ndim - 1)) + y = tf.transpose(y, perm=dims) + return y + else: + return self._merge_function(inputs) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if input_shape[0] is None: + output_shape = None + else: + output_shape = input_shape[0][1:] + for i in range(1, len(input_shape)): + if input_shape[i] is None: + shape = None + else: + shape = input_shape[i][1:] + output_shape = self._compute_elemwise_op_output_shape( + output_shape, shape + ) + batch_sizes = {s[0] for s in input_shape if s is not None} - {None} + if len(batch_sizes) == 1: + output_shape = (list(batch_sizes)[0],) + output_shape + else: + output_shape = (None,) + output_shape + return output_shape + + def compute_mask(self, inputs, mask=None): + if mask is None: + return None + if not isinstance(mask, (tuple, list)): + raise ValueError(f"`mask` should be a list. Received: mask={mask}") + if not isinstance(inputs, (tuple, list)): + raise ValueError( + f"`inputs` should be a list. Received: inputs={inputs}" + ) + if len(mask) != len(inputs): + raise ValueError( + "The lists `inputs` and `mask` should have the same length. " + f"Received: inputs={inputs} of length {len(inputs)}, and " + f"mask={mask} of length {len(mask)}" + ) + if all(m is None for m in mask): + return None + masks = [tf.expand_dims(m, axis=0) for m in mask if m is not None] + return backend.all( + backend.concatenate(masks, axis=0), axis=0, keepdims=False + ) + + def get_config(self): + return super().get_config() diff --git a/keras/layers/merging/concatenate.py b/keras/layers/merging/concatenate.py index 79dff736940a..3818e332d60c 100644 --- a/keras/layers/merging/concatenate.py +++ b/keras/layers/merging/concatenate.py @@ -15,51 +15,23 @@ """Layer that concatenates several inputs.""" +import tensorflow.compat.v2 as tf + from keras import backend from keras.layers.merging.base_merge import _Merge from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Concatenate') +@keras_export("keras.layers.Concatenate") class Concatenate(_Merge): - """Layer that concatenates a list of inputs. - - It takes as input a list of tensors, all of the same shape except - for the concatenation axis, and returns a single tensor that is the - concatenation of all inputs. - - >>> x = np.arange(20).reshape(2, 2, 5) - >>> print(x) - [[[ 0 1 2 3 4] - [ 5 6 7 8 9]] - [[10 11 12 13 14] - [15 16 17 18 19]]] - >>> y = np.arange(20, 30).reshape(2, 1, 5) - >>> print(y) - [[[20 21 22 23 24]] - [[25 26 27 28 29]]] - >>> tf.keras.layers.Concatenate(axis=1)([x, y]) - - - >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) - >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) - >>> concatted = tf.keras.layers.Concatenate()([x1, x2]) - >>> concatted.shape - TensorShape([5, 16]) + """Layer that concatenates a list of inputs. - """ - - def __init__(self, axis=-1, **kwargs): - """Instantiates a Concatenate layer. + It takes as input a list of tensors, all of the same shape except + for the concatenation axis, and returns a single tensor that is the + concatenation of all inputs. >>> x = np.arange(20).reshape(2, 2, 5) >>> print(x) @@ -80,138 +52,180 @@ def __init__(self, axis=-1, **kwargs): [15, 16, 17, 18, 19], [25, 26, 27, 28, 29]]])> - Args: - axis: Axis along which to concatenate. - **kwargs: standard layer keyword arguments. + >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + >>> concatted = tf.keras.layers.Concatenate()([x1, x2]) + >>> concatted.shape + TensorShape([5, 16]) + """ - super().__init__(**kwargs) - self.axis = axis - self.supports_masking = True - self._reshape_required = False - - @tf_utils.shape_type_conversion - def build(self, input_shape): - # Used purely for shape validation. - if len(input_shape) < 1 or not isinstance(input_shape[0], tuple): - raise ValueError( - 'A `Concatenate` layer should be called on a list of ' - f'at least 1 input. Received: input_shape={input_shape}') - if all(shape is None for shape in input_shape): - return - reduced_inputs_shapes = [list(shape) for shape in input_shape] - shape_set = set() - for i in range(len(reduced_inputs_shapes)): - del reduced_inputs_shapes[i][self.axis] - shape_set.add(tuple(reduced_inputs_shapes[i])) - - if len(shape_set) != 1: - err_msg = ('A `Concatenate` layer requires inputs with matching shapes ' - 'except for the concatenation axis. ' - f'Received: input_shape={input_shape}') - # Make sure all the shapes have same ranks. - ranks = set(len(shape) for shape in shape_set) - if len(ranks) != 1: - raise ValueError(err_msg) - # Get the only rank for the set. - (rank,) = ranks - for axis in range(rank): - # Skip the Nones in the shape since they are dynamic, also the axis for - # concat has been removed above. - unique_dims = set( - shape[axis] for shape in shape_set if shape[axis] is not None) - if len(unique_dims) > 1: - raise ValueError(err_msg) - - def _merge_function(self, inputs): - return backend.concatenate(inputs, axis=self.axis) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if ((not isinstance(input_shape, (tuple, list))) or - (not isinstance(input_shape[0], (tuple, list)))): - # The tf_utils.shape_type_conversion decorator turns tensorshapes - # into tuples, so we need to verify that `input_shape` is a list/tuple, - # *and* that the individual elements are themselves shape tuples. - raise ValueError( - 'A `Concatenate` layer should be called on a list of inputs. ' - f'Received: input_shape={input_shape}') - input_shapes = input_shape - output_shape = list(input_shapes[0]) - for shape in input_shapes[1:]: - if output_shape[self.axis] is None or shape[self.axis] is None: - output_shape[self.axis] = None - break - output_shape[self.axis] += shape[self.axis] - return tuple(output_shape) - - def compute_mask(self, inputs, mask=None): - if mask is None: - return None - if not isinstance(mask, (tuple, list)): - raise ValueError(f'`mask` should be a list. Received mask={mask}') - if not isinstance(inputs, (tuple, list)): - raise ValueError(f'`inputs` should be a list. Received: inputs={inputs}') - if len(mask) != len(inputs): - raise ValueError( - 'The lists `inputs` and `mask` should have the same length. ' - f'Received: inputs={inputs} of length {len(inputs)}, and ' - f'mask={mask} of length {len(mask)}') - if all(m is None for m in mask): - return None - # Make a list of masks while making sure - # the dimensionality of each mask - # is the same as the corresponding input. - masks = [] - for input_i, mask_i in zip(inputs, mask): - if mask_i is None: - # Input is unmasked. Append all 1s to masks, - masks.append(tf.ones_like(input_i, dtype='bool')) - elif backend.ndim(mask_i) < backend.ndim(input_i): - # Mask is smaller than the input, expand it - masks.append(tf.expand_dims(mask_i, axis=-1)) - else: - masks.append(mask_i) - concatenated = backend.concatenate(masks, axis=self.axis) - return backend.all(concatenated, axis=-1, keepdims=False) - - def get_config(self): - config = { - 'axis': self.axis, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.concatenate') + + def __init__(self, axis=-1, **kwargs): + """Instantiates a Concatenate layer. + + >>> x = np.arange(20).reshape(2, 2, 5) + >>> print(x) + [[[ 0 1 2 3 4] + [ 5 6 7 8 9]] + [[10 11 12 13 14] + [15 16 17 18 19]]] + >>> y = np.arange(20, 30).reshape(2, 1, 5) + >>> print(y) + [[[20 21 22 23 24]] + [[25 26 27 28 29]]] + >>> tf.keras.layers.Concatenate(axis=1)([x, y]) + + + Args: + axis: Axis along which to concatenate. + **kwargs: standard layer keyword arguments. + """ + super().__init__(**kwargs) + self.axis = axis + self.supports_masking = True + self._reshape_required = False + + @tf_utils.shape_type_conversion + def build(self, input_shape): + # Used purely for shape validation. + if len(input_shape) < 1 or not isinstance(input_shape[0], tuple): + raise ValueError( + "A `Concatenate` layer should be called on a list of " + f"at least 1 input. Received: input_shape={input_shape}" + ) + if all(shape is None for shape in input_shape): + return + reduced_inputs_shapes = [list(shape) for shape in input_shape] + shape_set = set() + for i in range(len(reduced_inputs_shapes)): + del reduced_inputs_shapes[i][self.axis] + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) != 1: + err_msg = ( + "A `Concatenate` layer requires inputs with matching shapes " + "except for the concatenation axis. " + f"Received: input_shape={input_shape}" + ) + # Make sure all the shapes have same ranks. + ranks = set(len(shape) for shape in shape_set) + if len(ranks) != 1: + raise ValueError(err_msg) + # Get the only rank for the set. + (rank,) = ranks + for axis in range(rank): + # Skip the Nones in the shape since they are dynamic, also the + # axis for concat has been removed above. + unique_dims = set( + shape[axis] + for shape in shape_set + if shape[axis] is not None + ) + if len(unique_dims) > 1: + raise ValueError(err_msg) + + def _merge_function(self, inputs): + return backend.concatenate(inputs, axis=self.axis) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if (not isinstance(input_shape, (tuple, list))) or ( + not isinstance(input_shape[0], (tuple, list)) + ): + # The tf_utils.shape_type_conversion decorator turns tensorshapes + # into tuples, so we need to verify that `input_shape` is a + # list/tuple, *and* that the individual elements are themselves + # shape tuples. + raise ValueError( + "A `Concatenate` layer should be called on a list of inputs. " + f"Received: input_shape={input_shape}" + ) + input_shapes = input_shape + output_shape = list(input_shapes[0]) + for shape in input_shapes[1:]: + if output_shape[self.axis] is None or shape[self.axis] is None: + output_shape[self.axis] = None + break + output_shape[self.axis] += shape[self.axis] + return tuple(output_shape) + + def compute_mask(self, inputs, mask=None): + if mask is None: + return None + if not isinstance(mask, (tuple, list)): + raise ValueError(f"`mask` should be a list. Received mask={mask}") + if not isinstance(inputs, (tuple, list)): + raise ValueError( + f"`inputs` should be a list. Received: inputs={inputs}" + ) + if len(mask) != len(inputs): + raise ValueError( + "The lists `inputs` and `mask` should have the same length. " + f"Received: inputs={inputs} of length {len(inputs)}, and " + f"mask={mask} of length {len(mask)}" + ) + if all(m is None for m in mask): + return None + # Make a list of masks while making sure + # the dimensionality of each mask + # is the same as the corresponding input. + masks = [] + for input_i, mask_i in zip(inputs, mask): + if mask_i is None: + # Input is unmasked. Append all 1s to masks, + masks.append(tf.ones_like(input_i, dtype="bool")) + elif backend.ndim(mask_i) < backend.ndim(input_i): + # Mask is smaller than the input, expand it + masks.append(tf.expand_dims(mask_i, axis=-1)) + else: + masks.append(mask_i) + concatenated = backend.concatenate(masks, axis=self.axis) + return backend.all(concatenated, axis=-1, keepdims=False) + + def get_config(self): + config = { + "axis": self.axis, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.layers.concatenate") def concatenate(inputs, axis=-1, **kwargs): - """Functional interface to the `Concatenate` layer. - - >>> x = np.arange(20).reshape(2, 2, 5) - >>> print(x) - [[[ 0 1 2 3 4] - [ 5 6 7 8 9]] - [[10 11 12 13 14] - [15 16 17 18 19]]] - >>> y = np.arange(20, 30).reshape(2, 1, 5) - >>> print(y) - [[[20 21 22 23 24]] - [[25 26 27 28 29]]] - >>> tf.keras.layers.concatenate([x, y], - ... axis=1) - - - Args: - inputs: A list of input tensors. - axis: Concatenation axis. - **kwargs: Standard layer keyword arguments. - - Returns: - A tensor, the concatenation of the inputs alongside axis `axis`. - """ - return Concatenate(axis=axis, **kwargs)(inputs) + """Functional interface to the `Concatenate` layer. + + >>> x = np.arange(20).reshape(2, 2, 5) + >>> print(x) + [[[ 0 1 2 3 4] + [ 5 6 7 8 9]] + [[10 11 12 13 14] + [15 16 17 18 19]]] + >>> y = np.arange(20, 30).reshape(2, 1, 5) + >>> print(y) + [[[20 21 22 23 24]] + [[25 26 27 28 29]]] + >>> tf.keras.layers.concatenate([x, y], + ... axis=1) + + + Args: + inputs: A list of input tensors. + axis: Concatenation axis. + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor, the concatenation of the inputs alongside axis `axis`. + """ + return Concatenate(axis=axis, **kwargs)(inputs) diff --git a/keras/layers/merging/dot.py b/keras/layers/merging/dot.py index 249457c3a22d..27fb48350925 100644 --- a/keras/layers/merging/dot.py +++ b/keras/layers/merging/dot.py @@ -15,200 +15,212 @@ """Layer that computes the dot product between two inputs.""" +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import base_layer_utils from keras.layers.merging.base_merge import _Merge from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Dot') +@keras_export("keras.layers.Dot") class Dot(_Merge): - """Layer that computes a dot product between samples in two tensors. - - E.g. if applied to a list of two tensors `a` and `b` of shape - `(batch_size, n)`, the output will be a tensor of shape `(batch_size, 1)` - where each entry `i` will be the dot product between - `a[i]` and `b[i]`. - - >>> x = np.arange(10).reshape(1, 5, 2) - >>> print(x) - [[[0 1] - [2 3] - [4 5] - [6 7] - [8 9]]] - >>> y = np.arange(10, 20).reshape(1, 2, 5) - >>> print(y) - [[[10 11 12 13 14] - [15 16 17 18 19]]] - >>> tf.keras.layers.Dot(axes=(1, 2))([x, y]) - - - >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) - >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) - >>> dotted = tf.keras.layers.Dot(axes=1)([x1, x2]) - >>> dotted.shape - TensorShape([5, 1]) - - - """ - - def __init__(self, axes, normalize=False, **kwargs): - """Initializes a layer that computes the element-wise dot product. - - >>> x = np.arange(10).reshape(1, 5, 2) - >>> print(x) - [[[0 1] - [2 3] - [4 5] - [6 7] - [8 9]]] - >>> y = np.arange(10, 20).reshape(1, 2, 5) - >>> print(y) - [[[10 11 12 13 14] - [15 16 17 18 19]]] - >>> tf.keras.layers.Dot(axes=(1, 2))([x, y]) - + """Layer that computes a dot product between samples in two tensors. + + E.g. if applied to a list of two tensors `a` and `b` of shape + `(batch_size, n)`, the output will be a tensor of shape `(batch_size, 1)` + where each entry `i` will be the dot product between + `a[i]` and `b[i]`. + + >>> x = np.arange(10).reshape(1, 5, 2) + >>> print(x) + [[[0 1] + [2 3] + [4 5] + [6 7] + [8 9]]] + >>> y = np.arange(10, 20).reshape(1, 2, 5) + >>> print(y) + [[[10 11 12 13 14] + [15 16 17 18 19]]] + >>> tf.keras.layers.Dot(axes=(1, 2))([x, y]) + + + >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + >>> dotted = tf.keras.layers.Dot(axes=1)([x1, x2]) + >>> dotted.shape + TensorShape([5, 1]) + - Args: - axes: Integer or tuple of integers, - axis or axes along which to take the dot product. If a tuple, should - be two integers corresponding to the desired axis from the first input - and the desired axis from the second input, respectively. Note that the - size of the two selected axes must match. - normalize: Whether to L2-normalize samples along the - dot product axis before taking the dot product. - If set to True, then the output of the dot product - is the cosine proximity between the two samples. - **kwargs: Standard layer keyword arguments. """ - super().__init__(**kwargs) - if not isinstance(axes, int): - if not isinstance(axes, (list, tuple)): - raise TypeError( - 'Invalid type for argument `axes`: it should be ' - f'a list or an int. Received: axes={axes}') - if len(axes) != 2: - raise ValueError( - 'Invalid format for argument `axes`: it should contain two ' - f'elements. Received: axes={axes}') - if not isinstance(axes[0], int) or not isinstance(axes[1], int): - raise ValueError( - 'Invalid format for argument `axes`: list elements should be ' - f'integers. Received: axes={axes}') - self.axes = axes - self.normalize = normalize - self.supports_masking = True - self._reshape_required = False - - @tf_utils.shape_type_conversion - def build(self, input_shape): - # Used purely for shape validation. - if not isinstance(input_shape[0], tuple) or len(input_shape) != 2: - raise ValueError( - 'A `Dot` layer should be called on a list of 2 inputs. ' - f'Received: input_shape={input_shape}') - shape1 = input_shape[0] - shape2 = input_shape[1] - if shape1 is None or shape2 is None: - return - if isinstance(self.axes, int): - if self.axes < 0: - axes = [self.axes % len(shape1), self.axes % len(shape2)] - else: - axes = [self.axes] * 2 - else: - axes = self.axes - if shape1[axes[0]] != shape2[axes[1]]: - raise ValueError( - 'Incompatible input shapes: ' - f'axis values {shape1[axes[0]]} (at axis {axes[0]}) != ' - f'{shape2[axes[1]]} (at axis {axes[1]}). ' - f'Full input shapes: {shape1}, {shape2}') - - def _merge_function(self, inputs): - base_layer_utils.no_ragged_support(inputs, self.name) - if len(inputs) != 2: - raise ValueError( - 'A `Dot` layer should be called on exactly 2 inputs. ' - f'Received: inputs={inputs}') - x1 = inputs[0] - x2 = inputs[1] - if isinstance(self.axes, int): - if self.axes < 0: - axes = [self.axes % backend.ndim(x1), self.axes % backend.ndim(x2)] - else: - axes = [self.axes] * 2 - else: - axes = [] - for i in range(len(self.axes)): - if self.axes[i] < 0: - axes.append(self.axes[i] % backend.ndim(inputs[i])) + + def __init__(self, axes, normalize=False, **kwargs): + """Initializes a layer that computes the element-wise dot product. + + >>> x = np.arange(10).reshape(1, 5, 2) + >>> print(x) + [[[0 1] + [2 3] + [4 5] + [6 7] + [8 9]]] + >>> y = np.arange(10, 20).reshape(1, 2, 5) + >>> print(y) + [[[10 11 12 13 14] + [15 16 17 18 19]]] + >>> tf.keras.layers.Dot(axes=(1, 2))([x, y]) + + + Args: + axes: Integer or tuple of integers, + axis or axes along which to take the dot product. If a tuple, should + be two integers corresponding to the desired axis from the first + input and the desired axis from the second input, respectively. Note + that the size of the two selected axes must match. + normalize: Whether to L2-normalize samples along the + dot product axis before taking the dot product. + If set to True, then the output of the dot product + is the cosine proximity between the two samples. + **kwargs: Standard layer keyword arguments. + """ + super().__init__(**kwargs) + if not isinstance(axes, int): + if not isinstance(axes, (list, tuple)): + raise TypeError( + "Invalid type for argument `axes`: it should be " + f"a list or an int. Received: axes={axes}" + ) + if len(axes) != 2: + raise ValueError( + "Invalid format for argument `axes`: it should contain two " + f"elements. Received: axes={axes}" + ) + if not isinstance(axes[0], int) or not isinstance(axes[1], int): + raise ValueError( + "Invalid format for argument `axes`: list elements should " + f"be integers. Received: axes={axes}" + ) + self.axes = axes + self.normalize = normalize + self.supports_masking = True + self._reshape_required = False + + @tf_utils.shape_type_conversion + def build(self, input_shape): + # Used purely for shape validation. + if not isinstance(input_shape[0], tuple) or len(input_shape) != 2: + raise ValueError( + "A `Dot` layer should be called on a list of 2 inputs. " + f"Received: input_shape={input_shape}" + ) + shape1 = input_shape[0] + shape2 = input_shape[1] + if shape1 is None or shape2 is None: + return + if isinstance(self.axes, int): + if self.axes < 0: + axes = [self.axes % len(shape1), self.axes % len(shape2)] + else: + axes = [self.axes] * 2 else: - axes.append(self.axes[i]) - if self.normalize: - x1 = tf.linalg.l2_normalize(x1, axis=axes[0]) - x2 = tf.linalg.l2_normalize(x2, axis=axes[1]) - output = backend.batch_dot(x1, x2, axes) - return output - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if not isinstance(input_shape, (tuple, list)) or len(input_shape) != 2: - raise ValueError( - 'A `Dot` layer should be called on a list of 2 inputs. ' - f'Received: input_shape={input_shape}') - shape1 = list(input_shape[0]) - shape2 = list(input_shape[1]) - if isinstance(self.axes, int): - if self.axes < 0: - axes = [self.axes % len(shape1), self.axes % len(shape2)] - else: - axes = [self.axes] * 2 - else: - axes = self.axes - shape1.pop(axes[0]) - shape2.pop(axes[1]) - shape2.pop(0) - output_shape = shape1 + shape2 - if len(output_shape) == 1: - output_shape += [1] - return tuple(output_shape) - - def compute_mask(self, inputs, mask=None): - return None - - def get_config(self): - config = { - 'axes': self.axes, - 'normalize': self.normalize, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.dot') + axes = self.axes + if shape1[axes[0]] != shape2[axes[1]]: + raise ValueError( + "Incompatible input shapes: " + f"axis values {shape1[axes[0]]} (at axis {axes[0]}) != " + f"{shape2[axes[1]]} (at axis {axes[1]}). " + f"Full input shapes: {shape1}, {shape2}" + ) + + def _merge_function(self, inputs): + base_layer_utils.no_ragged_support(inputs, self.name) + if len(inputs) != 2: + raise ValueError( + "A `Dot` layer should be called on exactly 2 inputs. " + f"Received: inputs={inputs}" + ) + x1 = inputs[0] + x2 = inputs[1] + if isinstance(self.axes, int): + if self.axes < 0: + axes = [ + self.axes % backend.ndim(x1), + self.axes % backend.ndim(x2), + ] + else: + axes = [self.axes] * 2 + else: + axes = [] + for i in range(len(self.axes)): + if self.axes[i] < 0: + axes.append(self.axes[i] % backend.ndim(inputs[i])) + else: + axes.append(self.axes[i]) + if self.normalize: + x1 = tf.linalg.l2_normalize(x1, axis=axes[0]) + x2 = tf.linalg.l2_normalize(x2, axis=axes[1]) + output = backend.batch_dot(x1, x2, axes) + return output + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if not isinstance(input_shape, (tuple, list)) or len(input_shape) != 2: + raise ValueError( + "A `Dot` layer should be called on a list of 2 inputs. " + f"Received: input_shape={input_shape}" + ) + shape1 = list(input_shape[0]) + shape2 = list(input_shape[1]) + if isinstance(self.axes, int): + if self.axes < 0: + axes = [self.axes % len(shape1), self.axes % len(shape2)] + else: + axes = [self.axes] * 2 + else: + axes = self.axes + shape1.pop(axes[0]) + shape2.pop(axes[1]) + shape2.pop(0) + output_shape = shape1 + shape2 + if len(output_shape) == 1: + output_shape += [1] + return tuple(output_shape) + + def compute_mask(self, inputs, mask=None): + return None + + def get_config(self): + config = { + "axes": self.axes, + "normalize": self.normalize, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.layers.dot") def dot(inputs, axes, normalize=False, **kwargs): - """Functional interface to the `Dot` layer. - - Args: - inputs: A list of input tensors (at least 2). - axes: Integer or tuple of integers, - axis or axes along which to take the dot product. - normalize: Whether to L2-normalize samples along the - dot product axis before taking the dot product. - If set to True, then the output of the dot product - is the cosine proximity between the two samples. - **kwargs: Standard layer keyword arguments. - - Returns: - A tensor, the dot product of the samples from the inputs. - """ - return Dot(axes=axes, normalize=normalize, **kwargs)(inputs) + """Functional interface to the `Dot` layer. + + Args: + inputs: A list of input tensors (at least 2). + axes: Integer or tuple of integers, + axis or axes along which to take the dot product. + normalize: Whether to L2-normalize samples along the + dot product axis before taking the dot product. + If set to True, then the output of the dot product + is the cosine proximity between the two samples. + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor, the dot product of the samples from the inputs. + """ + return Dot(axes=axes, normalize=normalize, **kwargs)(inputs) diff --git a/keras/layers/merging/maximum.py b/keras/layers/merging/maximum.py index 413536220b0f..de939d2856cc 100644 --- a/keras/layers/merging/maximum.py +++ b/keras/layers/merging/maximum.py @@ -15,69 +15,71 @@ """Layer that computes the maximum (element-wise) of several inputs.""" -from keras.layers.merging.base_merge import _Merge import tensorflow.compat.v2 as tf +from keras.layers.merging.base_merge import _Merge + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Maximum') +@keras_export("keras.layers.Maximum") class Maximum(_Merge): - """Layer that computes the maximum (element-wise) a list of inputs. - - It takes as input a list of tensors, all of the same shape, and returns - a single tensor (also of the same shape). - - >>> tf.keras.layers.Maximum()([np.arange(5).reshape(5, 1), - ... np.arange(5, 10).reshape(5, 1)]) - - - >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) - >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) - >>> maxed = tf.keras.layers.Maximum()([x1, x2]) - >>> maxed.shape - TensorShape([5, 8]) - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output = tf.maximum(output, inputs[i]) - return output - - -@keras_export('keras.layers.maximum') + """Layer that computes the maximum (element-wise) a list of inputs. + + It takes as input a list of tensors, all of the same shape, and returns + a single tensor (also of the same shape). + + >>> tf.keras.layers.Maximum()([np.arange(5).reshape(5, 1), + ... np.arange(5, 10).reshape(5, 1)]) + + + >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + >>> maxed = tf.keras.layers.Maximum()([x1, x2]) + >>> maxed.shape + TensorShape([5, 8]) + """ + + def _merge_function(self, inputs): + output = inputs[0] + for i in range(1, len(inputs)): + output = tf.maximum(output, inputs[i]) + return output + + +@keras_export("keras.layers.maximum") def maximum(inputs, **kwargs): - """Functional interface to compute maximum (element-wise) list of `inputs`. - - This is equivalent to the `tf.keras.layers.Maximum` layer. - - For example: - - ```python - input1 = tf.keras.layers.Input(shape=(16,)) - x1 = tf.keras.layers.Dense(8, activation='relu')(input1) #shape=(None, 8) - input2 = tf.keras.layers.Input(shape=(32,)) - x2 = tf.keras.layers.Dense(8, activation='relu')(input2) #shape=(None, 8) - max_inp=tf.keras.layers.maximum([x1,x2]) #shape=(None, 8) - out = tf.keras.layers.Dense(4)(max_inp) - model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - - Args: - inputs: A list of input tensors of same shape. - **kwargs: Standard layer keyword arguments. - - Returns: - A tensor (of same shape as input tensor) with the element-wise - maximum of the inputs. - - Raises: - ValueError: If input tensors are of different shape. - """ - return Maximum(**kwargs)(inputs) + """Functional interface to compute maximum (element-wise) list of `inputs`. + + This is equivalent to the `tf.keras.layers.Maximum` layer. + + For example: + + ```python + input1 = tf.keras.layers.Input(shape=(16,)) + x1 = tf.keras.layers.Dense(8, activation='relu')(input1) #shape=(None, 8) + input2 = tf.keras.layers.Input(shape=(32,)) + x2 = tf.keras.layers.Dense(8, activation='relu')(input2) #shape=(None, 8) + max_inp=tf.keras.layers.maximum([x1,x2]) #shape=(None, 8) + out = tf.keras.layers.Dense(4)(max_inp) + model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) + ``` + + Args: + inputs: A list of input tensors of same shape. + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor (of same shape as input tensor) with the element-wise + maximum of the inputs. + + Raises: + ValueError: If input tensors are of different shape. + """ + return Maximum(**kwargs)(inputs) diff --git a/keras/layers/merging/merging_test.py b/keras/layers/merging/merging_test.py index f81c54e825a2..1f3b597467e6 100644 --- a/keras/layers/merging/merging_test.py +++ b/keras/layers/merging/merging_test.py @@ -14,437 +14,488 @@ # ============================================================================== """Tests for merging layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras import backend from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import tf_inspect -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class MergingLayersTest(test_combinations.TestCase): - - def test_add(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - i3 = keras.layers.Input(shape=(4, 5)) - - add_layer = keras.layers.Add() - o = add_layer([i1, i2, i3]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model([i1, i2, i3], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - x3 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2, x3]) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, x1 + x2 + x3, atol=1e-4) - - self.assertIsNone(add_layer.compute_mask([i1, i2, i3], [None, None, None])) - self.assertTrue( - np.all( - backend.eval( - add_layer.compute_mask( - [i1, i2], [backend.variable(x1), backend.variable(x2)])))) - - with self.assertRaisesRegex(ValueError, '`mask` should be a list.'): - add_layer.compute_mask([i1, i2, i3], x1) - with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'): - add_layer.compute_mask(i1, [None, None, None]) - with self.assertRaisesRegex(ValueError, ' should have the same length.'): - add_layer.compute_mask([i1, i2, i3], [None, None]) - - def test_subtract(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - i3 = keras.layers.Input(shape=(4, 5)) - - subtract_layer = keras.layers.Subtract() - o = subtract_layer([i1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, x1 - x2, atol=1e-4) - - self.assertIsNone(subtract_layer.compute_mask([i1, i2], [None, None])) - self.assertTrue( - np.all( - backend.eval( - subtract_layer.compute_mask( - [i1, i2], [backend.variable(x1), backend.variable(x2)])))) - - with self.assertRaisesRegex(ValueError, '`mask` should be a list.'): - subtract_layer.compute_mask([i1, i2], x1) - with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'): - subtract_layer.compute_mask(i1, [None, None]) - with self.assertRaisesRegex(ValueError, - 'layer should be called on exactly 2 inputs'): - subtract_layer([i1, i2, i3]) - with self.assertRaisesRegex(ValueError, - 'layer should be called on exactly 2 inputs'): - subtract_layer([i1]) - - def test_multiply(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - i3 = keras.layers.Input(shape=(4, 5)) - o = keras.layers.multiply([i1, i2, i3]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model([i1, i2, i3], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - x3 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2, x3]) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, x1 * x2 * x3, atol=1e-4) - - def test_average(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - o = keras.layers.average([i1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4) - - def test_maximum(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - o = keras.layers.maximum([i1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4) - - def test_minimum(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - o = keras.layers.minimum([i1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4) - - def test_concatenate(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - concat_layer = keras.layers.Concatenate(axis=1) - o = concat_layer([i1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 8, 5]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 8, 5)) - self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) - - self.assertIsNone(concat_layer.compute_mask([i1, i2], [None, None])) - self.assertTrue( - np.all( - backend.eval( - concat_layer.compute_mask( - [i1, i2], [backend.variable(x1), backend.variable(x2)])))) - - # Should work with unit-length input. - unit_length_o = concat_layer([i1]) - self.assertListEqual(unit_length_o.shape.as_list(), i1.shape.as_list()) - - with self.assertRaisesRegex(ValueError, '`mask` should be a list.'): - concat_layer.compute_mask([i1, i2], x1) - with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'): - concat_layer.compute_mask(i1, [None, None]) - with self.assertRaisesRegex(ValueError, 'should have the same length'): - concat_layer.compute_mask([i1, i2], [None]) - with self.assertRaisesRegex(ValueError, - 'layer should be called on a list of inputs'): - concat_layer(i1) - - def test_concatenate_numpy_inputs(self): - if tf.executing_eagerly(): - layer = keras.layers.Concatenate() - x, y = np.ones((10, 10)), np.ones((10, 10)) - self.assertAllEqual(np.ones((10, 20)), layer([x, y])) - - def test_dot(self): - i1 = keras.layers.Input(shape=(4,)) - i2 = keras.layers.Input(shape=(4,)) - o = keras.layers.dot([i1, i2], axes=1) - self.assertListEqual(o.shape.as_list(), [None, 1]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - _ = keras.layers.Dot(axes=1).get_config() - - x1 = np.random.random((2, 4)) - x2 = np.random.random((2, 4)) - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 1)) - expected = np.zeros((2, 1)) - expected[0, 0] = np.dot(x1[0], x2[0]) - expected[1, 0] = np.dot(x1[1], x2[1]) - self.assertAllClose(out, expected, atol=1e-4) - - # Test with negative tuple of axes. - o = keras.layers.dot([i1, i2], axes=(-1, -1)) - self.assertListEqual(o.shape.as_list(), [None, 1]) - model = keras.models.Model([i1, i2], o) - model.run_eagerly = test_utils.should_run_eagerly() - out = model.predict([x1, x2]) - self.assertEqual(out.shape, (2, 1)) - self.assertAllClose(out, expected, atol=1e-4) - - # test compute_output_shape - layer = keras.layers.Dot(axes=-1) - self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1)) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - layer=[keras.layers.Add, keras.layers.Subtract, - keras.layers.Multiply, keras.layers.Minimum, - keras.layers.Maximum, keras.layers.Average])) - def test_merging_with_ragged_input(self, layer): - ragged_data = tf.ragged.constant( - [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1) - dense_data = ragged_data.to_tensor() - input1 = keras.Input(shape=(None,), ragged=True) - input2 = keras.Input(shape=(None,), ragged=True) - out = layer()([input1, input2]) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - out_ragged = model.predict([ragged_data, ragged_data], steps=1) - out_ragged = convert_ragged_tensor_value(out_ragged).to_tensor() - - input1 = keras.Input(shape=(None,)) - input2 = keras.Input(shape=(None,)) - out = layer()([input1, input2]) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - out_dense = model.predict([dense_data, dense_data], steps=1) - - self.assertAllEqual(out_dense, out_ragged) - - def test_concatenate_with_ragged_input(self): - ragged1 = tf.ragged.constant([[1., 1.], [1.], [1., 1., 1.]], ragged_rank=1) - ragged2 = tf.ragged.constant([[2., 2., 2.], [2.], [2., 2.]], ragged_rank=1) - expected_concatenated_ragged = tf.ragged.constant( - [[1., 1., 2., 2., 2.], [1., 2.], [1., 1., 1., 2., 2.]], ragged_rank=1) - input1 = keras.Input(shape=(None,), ragged=True) - input2 = keras.Input(shape=(None,), ragged=True) - out = keras.layers.Concatenate(axis=1)([input1, input2]) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - out_ragged = model.predict([ragged1, ragged2], steps=1) - self.assertAllEqual(out_ragged, expected_concatenated_ragged) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - layer=[keras.layers.Add, keras.layers.Subtract, - keras.layers.Multiply, keras.layers.Minimum, - keras.layers.Maximum, keras.layers.Average])) - def test_merging_with_scalar_input(self, layer): - x1 = np.array((1)) - x2 = np.array((2)) - out = layer()([x1, x2]) - self.assertEqual(out.shape, ()) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name(layer=[ - keras.layers.Add, keras.layers.add, keras.layers.Average, keras.layers - .average, keras.layers.Concatenate, keras.layers.concatenate, - keras.layers.Maximum, keras.layers.maximum, keras.layers.Minimum, - keras.layers.minimum, keras.layers.Multiply, keras.layers.multiply - ])) - def test_single_element(self, layer): - # Instantiate the Layer subclasses - if tf_inspect.isclass(layer) and issubclass(layer, keras.layers.Layer): - layer = layer() - - # Processing a single element list should behave as identity. - i1 = keras.layers.Input(shape=(4, 5)) - o = layer([i1]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - model = keras.models.Model(i1, o) - model.run_eagerly = test_utils.should_run_eagerly() - - x1 = np.random.random((2, 4, 5)) - out = model.predict(x1) - self.assertEqual(out.shape, (2, 4, 5)) - self.assertAllClose(out, x1) - - # A single element must be passed as a list, not by itself. - with self.assertRaisesRegex(ValueError, 'called on a list'): - layer(i1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_add(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + i3 = keras.layers.Input(shape=(4, 5)) + + add_layer = keras.layers.Add() + o = add_layer([i1, i2, i3]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2, i3], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + x3 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2, x3]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, x1 + x2 + x3, atol=1e-4) + + self.assertIsNone( + add_layer.compute_mask([i1, i2, i3], [None, None, None]) + ) + self.assertTrue( + np.all( + backend.eval( + add_layer.compute_mask( + [i1, i2], [backend.variable(x1), backend.variable(x2)] + ) + ) + ) + ) + + with self.assertRaisesRegex(ValueError, "`mask` should be a list."): + add_layer.compute_mask([i1, i2, i3], x1) + with self.assertRaisesRegex(ValueError, "`inputs` should be a list."): + add_layer.compute_mask(i1, [None, None, None]) + with self.assertRaisesRegex( + ValueError, " should have the same length." + ): + add_layer.compute_mask([i1, i2, i3], [None, None]) + + def test_subtract(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + i3 = keras.layers.Input(shape=(4, 5)) + + subtract_layer = keras.layers.Subtract() + o = subtract_layer([i1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, x1 - x2, atol=1e-4) + + self.assertIsNone(subtract_layer.compute_mask([i1, i2], [None, None])) + self.assertTrue( + np.all( + backend.eval( + subtract_layer.compute_mask( + [i1, i2], [backend.variable(x1), backend.variable(x2)] + ) + ) + ) + ) + + with self.assertRaisesRegex(ValueError, "`mask` should be a list."): + subtract_layer.compute_mask([i1, i2], x1) + with self.assertRaisesRegex(ValueError, "`inputs` should be a list."): + subtract_layer.compute_mask(i1, [None, None]) + with self.assertRaisesRegex( + ValueError, "layer should be called on exactly 2 inputs" + ): + subtract_layer([i1, i2, i3]) + with self.assertRaisesRegex( + ValueError, "layer should be called on exactly 2 inputs" + ): + subtract_layer([i1]) + + def test_multiply(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + i3 = keras.layers.Input(shape=(4, 5)) + o = keras.layers.multiply([i1, i2, i3]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2, i3], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + x3 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2, x3]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, x1 * x2 * x3, atol=1e-4) + + def test_average(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + o = keras.layers.average([i1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4) + + def test_maximum(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + o = keras.layers.maximum([i1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4) + + def test_minimum(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + o = keras.layers.minimum([i1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4) + + def test_concatenate(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + concat_layer = keras.layers.Concatenate(axis=1) + o = concat_layer([i1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 8, 5]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 8, 5)) + self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) + + self.assertIsNone(concat_layer.compute_mask([i1, i2], [None, None])) + self.assertTrue( + np.all( + backend.eval( + concat_layer.compute_mask( + [i1, i2], [backend.variable(x1), backend.variable(x2)] + ) + ) + ) + ) + + # Should work with unit-length input. + unit_length_o = concat_layer([i1]) + self.assertListEqual(unit_length_o.shape.as_list(), i1.shape.as_list()) + + with self.assertRaisesRegex(ValueError, "`mask` should be a list."): + concat_layer.compute_mask([i1, i2], x1) + with self.assertRaisesRegex(ValueError, "`inputs` should be a list."): + concat_layer.compute_mask(i1, [None, None]) + with self.assertRaisesRegex(ValueError, "should have the same length"): + concat_layer.compute_mask([i1, i2], [None]) + with self.assertRaisesRegex( + ValueError, "layer should be called on a list of inputs" + ): + concat_layer(i1) + + def test_concatenate_numpy_inputs(self): + if tf.executing_eagerly(): + layer = keras.layers.Concatenate() + x, y = np.ones((10, 10)), np.ones((10, 10)) + self.assertAllEqual(np.ones((10, 20)), layer([x, y])) + + def test_dot(self): + i1 = keras.layers.Input(shape=(4,)) + i2 = keras.layers.Input(shape=(4,)) + o = keras.layers.dot([i1, i2], axes=1) + self.assertListEqual(o.shape.as_list(), [None, 1]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + _ = keras.layers.Dot(axes=1).get_config() + + x1 = np.random.random((2, 4)) + x2 = np.random.random((2, 4)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 1)) + expected = np.zeros((2, 1)) + expected[0, 0] = np.dot(x1[0], x2[0]) + expected[1, 0] = np.dot(x1[1], x2[1]) + self.assertAllClose(out, expected, atol=1e-4) + + # Test with negative tuple of axes. + o = keras.layers.dot([i1, i2], axes=(-1, -1)) + self.assertListEqual(o.shape.as_list(), [None, 1]) + model = keras.models.Model([i1, i2], o) + model.run_eagerly = test_utils.should_run_eagerly() + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 1)) + self.assertAllClose(out, expected, atol=1e-4) + + # test compute_output_shape + layer = keras.layers.Dot(axes=-1) + self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1)) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer=[ + keras.layers.Add, + keras.layers.Subtract, + keras.layers.Multiply, + keras.layers.Minimum, + keras.layers.Maximum, + keras.layers.Average, + ] + ) + ) + def test_merging_with_ragged_input(self, layer): + ragged_data = tf.ragged.constant( + [[1.0, 1.0, 1.0], [1.0, 1.0], [1.0, 1.0, 1.0, 1.0]], ragged_rank=1 + ) + dense_data = ragged_data.to_tensor() + input1 = keras.Input(shape=(None,), ragged=True) + input2 = keras.Input(shape=(None,), ragged=True) + out = layer()([input1, input2]) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + out_ragged = model.predict([ragged_data, ragged_data], steps=1) + out_ragged = convert_ragged_tensor_value(out_ragged).to_tensor() + + input1 = keras.Input(shape=(None,)) + input2 = keras.Input(shape=(None,)) + out = layer()([input1, input2]) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + out_dense = model.predict([dense_data, dense_data], steps=1) + + self.assertAllEqual(out_dense, out_ragged) + + def test_concatenate_with_ragged_input(self): + ragged1 = tf.ragged.constant( + [[1.0, 1.0], [1.0], [1.0, 1.0, 1.0]], ragged_rank=1 + ) + ragged2 = tf.ragged.constant( + [[2.0, 2.0, 2.0], [2.0], [2.0, 2.0]], ragged_rank=1 + ) + expected_concatenated_ragged = tf.ragged.constant( + [[1.0, 1.0, 2.0, 2.0, 2.0], [1.0, 2.0], [1.0, 1.0, 1.0, 2.0, 2.0]], + ragged_rank=1, + ) + input1 = keras.Input(shape=(None,), ragged=True) + input2 = keras.Input(shape=(None,), ragged=True) + out = keras.layers.Concatenate(axis=1)([input1, input2]) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + out_ragged = model.predict([ragged1, ragged2], steps=1) + self.assertAllEqual(out_ragged, expected_concatenated_ragged) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer=[ + keras.layers.Add, + keras.layers.Subtract, + keras.layers.Multiply, + keras.layers.Minimum, + keras.layers.Maximum, + keras.layers.Average, + ] + ) + ) + def test_merging_with_scalar_input(self, layer): + x1 = np.array((1)) + x2 = np.array((2)) + out = layer()([x1, x2]) + self.assertEqual(out.shape, ()) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer=[ + keras.layers.Add, + keras.layers.add, + keras.layers.Average, + keras.layers.average, + keras.layers.Concatenate, + keras.layers.concatenate, + keras.layers.Maximum, + keras.layers.maximum, + keras.layers.Minimum, + keras.layers.minimum, + keras.layers.Multiply, + keras.layers.multiply, + ] + ) + ) + def test_single_element(self, layer): + # Instantiate the Layer subclasses + if tf_inspect.isclass(layer) and issubclass(layer, keras.layers.Layer): + layer = layer() + + # Processing a single element list should behave as identity. + i1 = keras.layers.Input(shape=(4, 5)) + o = layer([i1]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + model = keras.models.Model(i1, o) + model.run_eagerly = test_utils.should_run_eagerly() + + x1 = np.random.random((2, 4, 5)) + out = model.predict(x1) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, x1) + + # A single element must be passed as a list, not by itself. + with self.assertRaisesRegex(ValueError, "called on a list"): + layer(i1) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class MergingLayersTestNoExecution(tf.test.TestCase): - - def test_add_elementwise_errors(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 6)) - with self.assertRaises(ValueError): - keras.layers.add([i1, i2]) - with self.assertRaises(ValueError): - keras.layers.add(i1) - - def test_concatenate_errors(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(3, 5)) - with self.assertRaisesRegex(ValueError, 'inputs with matching shapes'): - keras.layers.concatenate([i1, i2], axis=-1) - with self.assertRaisesRegex(ValueError, 'called on a list'): - keras.layers.concatenate(i1, axis=-1) - - def test_concatenate_with_partial_shape(self): - i1 = keras.layers.Input(shape=(5,), batch_size=32) - i2 = keras.layers.Input(shape=(5,)) - i3 = keras.layers.Input(shape=(4, 5), batch_size=32) - i4 = keras.layers.Input(shape=(None,), batch_size=64) - i5 = keras.layers.Input(shape=(7,)) - - # Valid case since the i2 has a dynamic batch size. - keras.layers.concatenate([i1, i2], axis=-1) - - # Different rank - with self.assertRaisesRegex(ValueError, 'inputs with matching shapes'): - keras.layers.concatenate([i1, i3], axis=-1) - - # Valid case with partial dimension information - keras.layers.concatenate([i1, i4], axis=0) - keras.layers.concatenate([i2, i4], axis=0) - keras.layers.concatenate([i2, i4], axis=1) - keras.layers.concatenate([i1, i2, i4], axis=0) - keras.layers.concatenate([i1, i5], axis=1) - - # Mismatch in batch dimension. - with self.assertRaisesRegex(ValueError, 'inputs with matching shapes'): - keras.layers.concatenate([i1, i4], axis=-1) - - with self.assertRaisesRegex(ValueError, 'inputs with matching shapes'): - keras.layers.concatenate([i1, i2, i4], axis=-1) - - def test_dot_errors(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 6)) - i3 = keras.layers.Input(shape=(4, 6)) - with self.assertRaises(ValueError): - keras.layers.dot([i1, i2], axes=-1) - with self.assertRaises(ValueError): - keras.layers.dot(i1, axes=-1) - with self.assertRaises(ValueError): - keras.layers.dot([i1], axes=-1) - with self.assertRaises(ValueError): - keras.layers.dot([i1, i2, i3], axes=-1) - with self.assertRaises(ValueError): - dot = keras.layers.Dot(1) - dot.compute_output_shape(1) - - def test_subtract(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - y = keras.layers.subtract([i1, i2]) - self.assertEqual(y.shape.as_list(), [None, 4, 5]) - - # Test invalid use cases - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(3, 5)) - with self.assertRaises(ValueError): - keras.layers.subtract([i1, i2]) - with self.assertRaises(ValueError): - keras.layers.subtract([i1, i1, i1]) - - def test_add_masking(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - m1 = keras.layers.Masking()(i1) - layer = keras.layers.Add() - o = layer([m1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 4, 5]) - mask = layer.output_mask - self.assertListEqual(mask.shape.as_list(), [None, 4]) - - def test_add_dynamic_shape(self): - i1 = keras.Input(batch_shape=(4, None), dtype='float32') - i2 = keras.Input(batch_shape=(4, 5), dtype='float32') - layer = keras.layers.Add() - o = layer([i1, i2]) - self.assertListEqual(o.shape.as_list(), [4, 5]) - - def test_concatenate_masking(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - m1 = keras.layers.Masking()(i1) - layer = keras.layers.Concatenate() - o = layer([m1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 4, 10]) - mask = layer.output_mask - self.assertListEqual(mask.shape.as_list(), [None, 4]) - - def test_concatenate_sparse_shape(self): - i1 = keras.layers.Input(shape=(1,), batch_size=2, sparse=True) - i2 = keras.layers.Input(shape=(2,), batch_size=2, sparse=True) - layer = keras.layers.Concatenate(axis=1) - o = layer([i1, i2]) - self.assertListEqual(o.shape.as_list(), [2, 3]) - - # Make sure it also respect None as the batch size - i1 = keras.layers.Input(shape=(1,), sparse=True) - i2 = keras.layers.Input(shape=(2,), sparse=True) - layer = keras.layers.Concatenate(axis=1) - o = layer([i1, i2]) - self.assertListEqual(o.shape.as_list(), [None, 3]) - - def test_concatenate_user_changes_to_input_structure(self): - a = keras.layers.Input(shape=(4, 5)) - struct = [a, a] - concat1 = keras.layers.Concatenate(1) - b = concat1(struct) - struct.append(b) - concat2 = keras.layers.Concatenate(1) - c = concat2(struct) - - # Checks that the append to `struct` doesn't affect `concat1`s - # node data. - self.assertLen(concat1.inbound_nodes[0].input_tensors, 2) - self.assertLen(concat2.inbound_nodes[0].input_tensors, 3) - - keras.Model(a, c) # Ensure model can be built. + def test_add_elementwise_errors(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 6)) + with self.assertRaises(ValueError): + keras.layers.add([i1, i2]) + with self.assertRaises(ValueError): + keras.layers.add(i1) + + def test_concatenate_errors(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(3, 5)) + with self.assertRaisesRegex(ValueError, "inputs with matching shapes"): + keras.layers.concatenate([i1, i2], axis=-1) + with self.assertRaisesRegex(ValueError, "called on a list"): + keras.layers.concatenate(i1, axis=-1) + + def test_concatenate_with_partial_shape(self): + i1 = keras.layers.Input(shape=(5,), batch_size=32) + i2 = keras.layers.Input(shape=(5,)) + i3 = keras.layers.Input(shape=(4, 5), batch_size=32) + i4 = keras.layers.Input(shape=(None,), batch_size=64) + i5 = keras.layers.Input(shape=(7,)) + + # Valid case since the i2 has a dynamic batch size. + keras.layers.concatenate([i1, i2], axis=-1) + + # Different rank + with self.assertRaisesRegex(ValueError, "inputs with matching shapes"): + keras.layers.concatenate([i1, i3], axis=-1) + + # Valid case with partial dimension information + keras.layers.concatenate([i1, i4], axis=0) + keras.layers.concatenate([i2, i4], axis=0) + keras.layers.concatenate([i2, i4], axis=1) + keras.layers.concatenate([i1, i2, i4], axis=0) + keras.layers.concatenate([i1, i5], axis=1) + + # Mismatch in batch dimension. + with self.assertRaisesRegex(ValueError, "inputs with matching shapes"): + keras.layers.concatenate([i1, i4], axis=-1) + + with self.assertRaisesRegex(ValueError, "inputs with matching shapes"): + keras.layers.concatenate([i1, i2, i4], axis=-1) + + def test_dot_errors(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 6)) + i3 = keras.layers.Input(shape=(4, 6)) + with self.assertRaises(ValueError): + keras.layers.dot([i1, i2], axes=-1) + with self.assertRaises(ValueError): + keras.layers.dot(i1, axes=-1) + with self.assertRaises(ValueError): + keras.layers.dot([i1], axes=-1) + with self.assertRaises(ValueError): + keras.layers.dot([i1, i2, i3], axes=-1) + with self.assertRaises(ValueError): + dot = keras.layers.Dot(1) + dot.compute_output_shape(1) + + def test_subtract(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + y = keras.layers.subtract([i1, i2]) + self.assertEqual(y.shape.as_list(), [None, 4, 5]) + + # Test invalid use cases + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(3, 5)) + with self.assertRaises(ValueError): + keras.layers.subtract([i1, i2]) + with self.assertRaises(ValueError): + keras.layers.subtract([i1, i1, i1]) + + def test_add_masking(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + m1 = keras.layers.Masking()(i1) + layer = keras.layers.Add() + o = layer([m1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 4, 5]) + mask = layer.output_mask + self.assertListEqual(mask.shape.as_list(), [None, 4]) + + def test_add_dynamic_shape(self): + i1 = keras.Input(batch_shape=(4, None), dtype="float32") + i2 = keras.Input(batch_shape=(4, 5), dtype="float32") + layer = keras.layers.Add() + o = layer([i1, i2]) + self.assertListEqual(o.shape.as_list(), [4, 5]) + + def test_concatenate_masking(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + m1 = keras.layers.Masking()(i1) + layer = keras.layers.Concatenate() + o = layer([m1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 4, 10]) + mask = layer.output_mask + self.assertListEqual(mask.shape.as_list(), [None, 4]) + + def test_concatenate_sparse_shape(self): + i1 = keras.layers.Input(shape=(1,), batch_size=2, sparse=True) + i2 = keras.layers.Input(shape=(2,), batch_size=2, sparse=True) + layer = keras.layers.Concatenate(axis=1) + o = layer([i1, i2]) + self.assertListEqual(o.shape.as_list(), [2, 3]) + + # Make sure it also respect None as the batch size + i1 = keras.layers.Input(shape=(1,), sparse=True) + i2 = keras.layers.Input(shape=(2,), sparse=True) + layer = keras.layers.Concatenate(axis=1) + o = layer([i1, i2]) + self.assertListEqual(o.shape.as_list(), [None, 3]) + + def test_concatenate_user_changes_to_input_structure(self): + a = keras.layers.Input(shape=(4, 5)) + struct = [a, a] + concat1 = keras.layers.Concatenate(1) + b = concat1(struct) + struct.append(b) + concat2 = keras.layers.Concatenate(1) + c = concat2(struct) + + # Checks that the append to `struct` doesn't affect `concat1`s + # node data. + self.assertLen(concat1.inbound_nodes[0].input_tensors, 2) + self.assertLen(concat2.inbound_nodes[0].input_tensors, 3) + + keras.Model(a, c) # Ensure model can be built. def convert_ragged_tensor_value(inputs): - if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): - flat_values = tf.convert_to_tensor( - value=inputs.flat_values, - name='flat_values') - return tf.RaggedTensor.from_nested_row_splits( - flat_values, inputs.nested_row_splits, validate=False) - return inputs - - -if __name__ == '__main__': - tf.test.main() + if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): + flat_values = tf.convert_to_tensor( + value=inputs.flat_values, name="flat_values" + ) + return tf.RaggedTensor.from_nested_row_splits( + flat_values, inputs.nested_row_splits, validate=False + ) + return inputs + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/merging/minimum.py b/keras/layers/merging/minimum.py index e3fe3fbea100..4bfbd784e771 100644 --- a/keras/layers/merging/minimum.py +++ b/keras/layers/merging/minimum.py @@ -15,51 +15,53 @@ """Layer that computes the minimum (element-wise) of several inputs.""" -from keras.layers.merging.base_merge import _Merge import tensorflow.compat.v2 as tf +from keras.layers.merging.base_merge import _Merge + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Minimum') +@keras_export("keras.layers.Minimum") class Minimum(_Merge): - """Layer that computes the minimum (element-wise) a list of inputs. + """Layer that computes the minimum (element-wise) a list of inputs. - It takes as input a list of tensors, all of the same shape, and returns - a single tensor (also of the same shape). + It takes as input a list of tensors, all of the same shape, and returns + a single tensor (also of the same shape). - >>> tf.keras.layers.Minimum()([np.arange(5).reshape(5, 1), - ... np.arange(5, 10).reshape(5, 1)]) - + >>> tf.keras.layers.Minimum()([np.arange(5).reshape(5, 1), + ... np.arange(5, 10).reshape(5, 1)]) + - >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) - >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) - >>> minned = tf.keras.layers.Minimum()([x1, x2]) - >>> minned.shape - TensorShape([5, 8]) - """ + >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + >>> minned = tf.keras.layers.Minimum()([x1, x2]) + >>> minned.shape + TensorShape([5, 8]) + """ - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output = tf.minimum(output, inputs[i]) - return output + def _merge_function(self, inputs): + output = inputs[0] + for i in range(1, len(inputs)): + output = tf.minimum(output, inputs[i]) + return output -@keras_export('keras.layers.minimum') +@keras_export("keras.layers.minimum") def minimum(inputs, **kwargs): - """Functional interface to the `Minimum` layer. + """Functional interface to the `Minimum` layer. - Args: - inputs: A list of input tensors. - **kwargs: Standard layer keyword arguments. + Args: + inputs: A list of input tensors. + **kwargs: Standard layer keyword arguments. - Returns: - A tensor, the element-wise minimum of the inputs. - """ - return Minimum(**kwargs)(inputs) + Returns: + A tensor, the element-wise minimum of the inputs. + """ + return Minimum(**kwargs)(inputs) diff --git a/keras/layers/merging/multiply.py b/keras/layers/merging/multiply.py index 2c016894814d..caae29c7907b 100644 --- a/keras/layers/merging/multiply.py +++ b/keras/layers/merging/multiply.py @@ -17,65 +17,68 @@ from keras.layers.merging.base_merge import _Merge +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Multiply') +@keras_export("keras.layers.Multiply") class Multiply(_Merge): - """Layer that multiplies (element-wise) a list of inputs. - - It takes as input a list of tensors, all of the same shape, and returns - a single tensor (also of the same shape). - - >>> tf.keras.layers.Multiply()([np.arange(5).reshape(5, 1), - ... np.arange(5, 10).reshape(5, 1)]) - - - >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) - >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) - >>> multiplied = tf.keras.layers.Multiply()([x1, x2]) - >>> multiplied.shape - TensorShape([5, 8]) - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output = output * inputs[i] - return output - - -@keras_export('keras.layers.multiply') + """Layer that multiplies (element-wise) a list of inputs. + + It takes as input a list of tensors, all of the same shape, and returns + a single tensor (also of the same shape). + + >>> tf.keras.layers.Multiply()([np.arange(5).reshape(5, 1), + ... np.arange(5, 10).reshape(5, 1)]) + + + >>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + >>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + >>> multiplied = tf.keras.layers.Multiply()([x1, x2]) + >>> multiplied.shape + TensorShape([5, 8]) + """ + + def _merge_function(self, inputs): + output = inputs[0] + for i in range(1, len(inputs)): + output = output * inputs[i] + return output + + +@keras_export("keras.layers.multiply") def multiply(inputs, **kwargs): - """Functional interface to the `Multiply` layer. - - Example: - - >>> x1 = np.arange(3.0) - >>> x2 = np.arange(3.0) - >>> tf.keras.layers.multiply([x1, x2]) - - - Usage in a functional model: - - >>> input1 = tf.keras.layers.Input(shape=(16,)) - >>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1) #shape=(None, 8) - >>> input2 = tf.keras.layers.Input(shape=(32,)) - >>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2) #shape=(None, 8) - >>> out = tf.keras.layers.multiply([x1,x2]) #shape=(None, 8) - >>> out = tf.keras.layers.Dense(4)(out) - >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) - - Args: - inputs: A list of input tensors. - **kwargs: Standard layer keyword arguments. - - Returns: - A tensor, the element-wise product of the inputs. - """ - return Multiply(**kwargs)(inputs) + """Functional interface to the `Multiply` layer. + + Example: + + >>> x1 = np.arange(3.0) + >>> x2 = np.arange(3.0) + >>> tf.keras.layers.multiply([x1, x2]) + + + Usage in a functional model: + + >>> input1 = tf.keras.layers.Input(shape=(16,)) + >>> x1 = tf.keras.layers.Dense( + ... 8, activation='relu')(input1) #shape=(None, 8) + >>> input2 = tf.keras.layers.Input(shape=(32,)) + >>> x2 = tf.keras.layers.Dense( + ... 8, activation='relu')(input2) #shape=(None, 8) + >>> out = tf.keras.layers.multiply([x1,x2]) #shape=(None, 8) + >>> out = tf.keras.layers.Dense(4)(out) + >>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out) + + Args: + inputs: A list of input tensors. + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor, the element-wise product of the inputs. + """ + return Multiply(**kwargs)(inputs) diff --git a/keras/layers/merging/subtract.py b/keras/layers/merging/subtract.py index 8d2b5ce659b9..de55fa516eaa 100644 --- a/keras/layers/merging/subtract.py +++ b/keras/layers/merging/subtract.py @@ -18,74 +18,76 @@ from keras.layers.merging.base_merge import _Merge from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Subtract') +@keras_export("keras.layers.Subtract") class Subtract(_Merge): - """Layer that subtracts two inputs. - - It takes as input a list of tensors of size 2, - both of the same shape, and returns a single tensor, (inputs[0] - inputs[1]), - also of the same shape. - - Examples: - - ```python - import keras - - input1 = keras.layers.Input(shape=(16,)) - x1 = keras.layers.Dense(8, activation='relu')(input1) - input2 = keras.layers.Input(shape=(32,)) - x2 = keras.layers.Dense(8, activation='relu')(input2) - # Equivalent to subtracted = keras.layers.subtract([x1, x2]) - subtracted = keras.layers.Subtract()([x1, x2]) - - out = keras.layers.Dense(4)(subtracted) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - """ - - @tf_utils.shape_type_conversion - def build(self, input_shape): - super().build(input_shape) - if len(input_shape) != 2: - raise ValueError( - 'A `Subtract` layer should be called on exactly 2 inputs. ' - f'Received: input_shape={input_shape}') - - def _merge_function(self, inputs): - if len(inputs) != 2: - raise ValueError( - 'A `Subtract` layer should be called on exactly 2 inputs. ' - f'Received: inputs={inputs}') - return inputs[0] - inputs[1] - - -@keras_export('keras.layers.subtract') + """Layer that subtracts two inputs. + + It takes as input a list of tensors of size 2, both of the same shape, and + returns a single tensor, (inputs[0] - inputs[1]), also of the same shape. + + Examples: + + ```python + import keras + + input1 = keras.layers.Input(shape=(16,)) + x1 = keras.layers.Dense(8, activation='relu')(input1) + input2 = keras.layers.Input(shape=(32,)) + x2 = keras.layers.Dense(8, activation='relu')(input2) + # Equivalent to subtracted = keras.layers.subtract([x1, x2]) + subtracted = keras.layers.Subtract()([x1, x2]) + + out = keras.layers.Dense(4)(subtracted) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + ``` + """ + + @tf_utils.shape_type_conversion + def build(self, input_shape): + super().build(input_shape) + if len(input_shape) != 2: + raise ValueError( + "A `Subtract` layer should be called on exactly 2 inputs. " + f"Received: input_shape={input_shape}" + ) + + def _merge_function(self, inputs): + if len(inputs) != 2: + raise ValueError( + "A `Subtract` layer should be called on exactly 2 inputs. " + f"Received: inputs={inputs}" + ) + return inputs[0] - inputs[1] + + +@keras_export("keras.layers.subtract") def subtract(inputs, **kwargs): - """Functional interface to the `Subtract` layer. + """Functional interface to the `Subtract` layer. - Args: - inputs: A list of input tensors (exactly 2). - **kwargs: Standard layer keyword arguments. + Args: + inputs: A list of input tensors (exactly 2). + **kwargs: Standard layer keyword arguments. - Returns: - A tensor, the difference of the inputs. + Returns: + A tensor, the difference of the inputs. - Examples: + Examples: - ```python - import keras + ```python + import keras - input1 = keras.layers.Input(shape=(16,)) - x1 = keras.layers.Dense(8, activation='relu')(input1) - input2 = keras.layers.Input(shape=(32,)) - x2 = keras.layers.Dense(8, activation='relu')(input2) - subtracted = keras.layers.subtract([x1, x2]) + input1 = keras.layers.Input(shape=(16,)) + x1 = keras.layers.Dense(8, activation='relu')(input1) + input2 = keras.layers.Input(shape=(32,)) + x2 = keras.layers.Dense(8, activation='relu')(input2) + subtracted = keras.layers.subtract([x1, x2]) - out = keras.layers.Dense(4)(subtracted) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - """ - return Subtract(**kwargs)(inputs) + out = keras.layers.Dense(4)(subtracted) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + ``` + """ + return Subtract(**kwargs)(inputs) diff --git a/keras/layers/noise.py b/keras/layers/noise.py index 62f113a0dc5a..7e479a435fd1 100644 --- a/keras/layers/noise.py +++ b/keras/layers/noise.py @@ -13,9 +13,14 @@ # limitations under the License. # ============================================================================== """Layers that operate regularization via the addition of noise.""" -# pylint: disable=g-bad-import-order,unused-import + + +from keras.layers.regularization.alpha_dropout import AlphaDropout # noqa: F401 # Regularization layers imported for backwards namespace compatibility -from keras.layers.regularization.gaussian_dropout import GaussianDropout -from keras.layers.regularization.gaussian_noise import GaussianNoise -from keras.layers.regularization.alpha_dropout import AlphaDropout +from keras.layers.regularization.gaussian_dropout import ( # noqa: F401,E501 + GaussianDropout, +) +from keras.layers.regularization.gaussian_noise import ( # noqa: F401,E501 + GaussianNoise, +) diff --git a/keras/layers/normalization/BUILD b/keras/layers/normalization/BUILD index 0266b9dabbd2..fffb798587da 100644 --- a/keras/layers/normalization/BUILD +++ b/keras/layers/normalization/BUILD @@ -1,18 +1,18 @@ # Description: # Contains the Keras normalization layers (internal TensorFlow version). +# Placeholder: load unaliased py_library + +# buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "cuda_py_test") +# buildifier: disable=same-origin-load +load("@org_keras//keras:keras.bzl", "tf_py_test") + package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove non-keras deps from TF. - default_visibility = [ - "//keras:friends", - "//third_party/tensorflow/python/distribute:__pkg__", - "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", - "//third_party/tensorflow/tools/pip_package:__pkg__", - "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", - ], + default_visibility = ["//keras:friends"], licenses = ["notice"], ) @@ -25,7 +25,9 @@ py_library( deps = [ ":batch_normalization", ":batch_normalization_v1", + ":group_normalization", ":layer_normalization", + ":spectral_normalization", ":unit_normalization", ], ) @@ -57,6 +59,20 @@ py_library( ], ) +py_library( + name = "group_normalization", + srcs = ["group_normalization.py"], + srcs_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + "//keras:constraints", + "//keras:regularizers", + "//keras/dtensor:utils", + "//keras/engine:base_layer", + "//keras/initializers", + ], +) + py_library( name = "layer_normalization", srcs = ["layer_normalization.py"], @@ -81,6 +97,40 @@ py_library( ], ) +py_library( + name = "spectral_normalization", + srcs = ["spectral_normalization.py"], + srcs_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + "//keras/engine:base_layer", + ], +) + +cuda_py_test( + name = "group_normalization_test", + size = "medium", + srcs = ["group_normalization_test.py"], + python_version = "PY3", + shard_count = 4, + tags = [ + "notsan", + ], + xla_tags = [ + "no_cuda_asan", # times out + ], + deps = [ + ":group_normalization", + "//:expect_absl_installed", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/layers", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + cuda_py_test( name = "batch_normalization_test", size = "medium", @@ -102,6 +152,22 @@ cuda_py_test( ], ) +tf_py_test( + name = "batch_normalization_dtensor_test", + srcs = ["batch_normalization_dtensor_test.py"], + shard_count = 2, + tags = ["no_oss"], + deps = [ + ":batch_normalization", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/dtensor:test_util", + "//keras/testing_infra:test_utils", + "//third_party/tensorflow/python/distribute/experimental:mirrored_strategy", + ], +) + cuda_py_test( name = "layer_normalization_test", size = "medium", @@ -133,3 +199,17 @@ cuda_py_test( "//keras/testing_infra:test_combinations", ], ) + +cuda_py_test( + name = "spectral_normalization_test", + size = "small", + srcs = ["spectral_normalization_test.py"], + python_version = "PY3", + deps = [ + "//:expect_absl_installed", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + ], +) diff --git a/keras/layers/normalization/batch_normalization.py b/keras/layers/normalization/batch_normalization.py index 84a6138a6b62..759b0486a735 100644 --- a/keras/layers/normalization/batch_normalization.py +++ b/keras/layers/normalization/batch_normalization.py @@ -14,7 +14,10 @@ # ============================================================================== """The V2 implementation of Normalization layers.""" +import warnings + import tensorflow.compat.v2 as tf + from keras import backend from keras import constraints from keras import initializers @@ -24,1226 +27,1570 @@ from keras.engine.input_spec import InputSpec from keras.utils import control_flow_util from keras.utils import tf_utils -from tensorflow.python.ops.control_flow_ops import get_enclosing_xla_context + +# isort: off +from tensorflow.python.ops.control_flow_ops import ( + get_enclosing_xla_context, +) from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import keras_export class BatchNormalizationBase(Layer): - r"""Layer that normalizes its inputs. - - Batch normalization applies a transformation that maintains the mean output - close to 0 and the output standard deviation close to 1. - - Importantly, batch normalization works differently during training and - during inference. - - **During training** (i.e. when using `fit()` or when calling the layer/model - with the argument `training=True`), the layer normalizes its output using - the mean and standard deviation of the current batch of inputs. That is to - say, for each channel being normalized, the layer returns - `gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where: - - - `epsilon` is small constant (configurable as part of the constructor - arguments) - - `gamma` is a learned scaling factor (initialized as 1), which - can be disabled by passing `scale=False` to the constructor. - - `beta` is a learned offset factor (initialized as 0), which - can be disabled by passing `center=False` to the constructor. - - **During inference** (i.e. when using `evaluate()` or `predict()`) or when - calling the layer/model with the argument `training=False` (which is the - default), the layer normalizes its output using a moving average of the - mean and standard deviation of the batches it has seen during training. That - is to say, it returns - `gamma * (batch - self.moving_mean) / sqrt(self.moving_var + epsilon) + beta`. - - `self.moving_mean` and `self.moving_var` are non-trainable variables that - are updated each time the layer in called in training mode, as such: - - - `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)` - - `moving_var = moving_var * momentum + var(batch) * (1 - momentum)` - - As such, the layer will only normalize its inputs during inference - *after having been trained on data that has similar statistics as the - inference data*. - - Args: - axis: Integer or a list of integers, the axis that should be normalized - (typically the features axis). For instance, after a `Conv2D` layer with - `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. - momentum: Momentum for the moving average. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. If False, `beta` - is ignored. - scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the - next layer is linear (also e.g. `nn.relu`), this can be disabled since the - scaling will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - renorm: Whether to use [Batch Renormalization]( - https://arxiv.org/abs/1702.03275). This adds extra variables during - training. The inference is the same for either value of this parameter. - renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to - scalar `Tensors` used to clip the renorm correction. The correction `(r, - d)` is used as `corrected_value = normalized_value * r + d`, with `r` - clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, - dmax are set to inf, 0, inf, respectively. - renorm_momentum: Momentum used to update the moving means and standard - deviations with renorm. Unlike `momentum`, this affects training and - should be neither too small (which would add noise) nor too large (which - would give stale estimates). Note that `momentum` is still applied to get - the means and variances for inference. - fused: if `True`, use a faster, fused implementation, or raise a ValueError - if the fused implementation cannot be used. If `None`, use the faster - implementation if possible. If False, do not used the fused - implementation. - Note that in TensorFlow 1.x, the meaning of `fused=True` is different: if - `False`, the layer uses the system-recommended implementation. - trainable: Boolean, if `True` the variables will be marked as trainable. - virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, - which means batch normalization is performed across the whole batch. When - `virtual_batch_size` is not `None`, instead perform "Ghost Batch - Normalization", which creates virtual sub-batches which are each - normalized separately (with shared gamma, beta, and moving statistics). - Must divide the actual batch size during execution. - adjustment: A function taking the `Tensor` containing the (dynamic) shape of - the input tensor and returning a pair (scale, bias) to apply to the - normalized values (before gamma and beta), only during training. For - example, if `axis=-1`, - `adjustment = lambda shape: ( - tf.random.uniform(shape[-1:], 0.93, 1.07), - tf.random.uniform(shape[-1:], -0.1, 0.1))` will scale the normalized - value by up to 7% up or down, then shift the result by up to 0.1 - (with independent scaling and bias for each feature but shared - across all examples), and finally apply gamma and/or beta. If - `None`, no adjustment is applied. Cannot be specified if - virtual_batch_size is specified. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. - - `training=True`: The layer will normalize its inputs using the mean and - variance of the current batch of inputs. - - `training=False`: The layer will normalize its inputs using the mean and - variance of its moving statistics, learned during training. - - Input shape: Arbitrary. Use the keyword argument `input_shape` (tuple of - integers, does not include the samples axis) when using this layer as the - first layer in a model. - - Output shape: Same shape as input. - - Reference: - - [Ioffe and Szegedy, 2015](https://arxiv.org/abs/1502.03167). - """ - - # By default, the base class uses V2 behavior. The BatchNormalization V1 - # subclass sets this to False to use the V1 behavior. - _USE_V2_BEHAVIOR = True - - def __init__(self, - axis=-1, - momentum=0.99, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - moving_mean_initializer='zeros', - moving_variance_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - renorm=False, - renorm_clipping=None, - renorm_momentum=0.99, - fused=None, - trainable=True, - virtual_batch_size=None, - adjustment=None, - name=None, - **kwargs): - super().__init__(name=name, **kwargs) - if isinstance(axis, (list, tuple)): - self.axis = axis[:] - elif isinstance(axis, int): - self.axis = axis - else: - raise TypeError('Expected an int or a list/tuple of ints for the ' - 'argument \'axis\', but received: %r' % axis) - self.momentum = momentum - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.moving_mean_initializer = initializers.get(moving_mean_initializer) - self.moving_variance_initializer = initializers.get( - moving_variance_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) - self.renorm = renorm - self.virtual_batch_size = virtual_batch_size - self.adjustment = adjustment - if self._USE_V2_BEHAVIOR: - if fused: - self._raise_if_fused_cannot_be_used() - # We leave fused as None if self._fused_can_be_used()==True, since we - # still may set it to False in self.build() if the input rank is not 4. - elif fused is None and not self._fused_can_be_used(): - fused = False - elif fused is None: - fused = True - self.supports_masking = True - - self.fused = fused - self._bessels_correction_test_only = True - self.trainable = trainable - - if renorm: - renorm_clipping = renorm_clipping or {} - keys = ['rmax', 'rmin', 'dmax'] - if set(renorm_clipping) - set(keys): - raise ValueError( - f'Received invalid keys for `renorm_clipping` argument: ' - f'{renorm_clipping}. Supported values: {keys}.') - self.renorm_clipping = renorm_clipping - self.renorm_momentum = renorm_momentum - - def _raise_if_fused_cannot_be_used(self): - """Raises a ValueError if fused implementation cannot be used. - - In addition to the checks done in this function, the input tensors rank must - be 4 or 5. The input rank check can only be done once the input shape is - known. + r"""Layer that normalizes its inputs. + + Batch normalization applies a transformation that maintains the mean output + close to 0 and the output standard deviation close to 1. + + Importantly, batch normalization works differently during training and + during inference. + + **During training** (i.e. when using `fit()` or when calling the layer/model + with the argument `training=True`), the layer normalizes its output using + the mean and standard deviation of the current batch of inputs. That is to + say, for each channel being normalized, the layer returns + `gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where: + + - `epsilon` is small constant (configurable as part of the constructor + arguments) + - `gamma` is a learned scaling factor (initialized as 1), which + can be disabled by passing `scale=False` to the constructor. + - `beta` is a learned offset factor (initialized as 0), which + can be disabled by passing `center=False` to the constructor. + + **During inference** (i.e. when using `evaluate()` or `predict()`) or when + calling the layer/model with the argument `training=False` (which is the + default), the layer normalizes its output using a moving average of the + mean and standard deviation of the batches it has seen during training. That + is to say, it returns + `gamma * (batch - self.moving_mean) / sqrt(self.moving_var+epsilon) + beta`. + + `self.moving_mean` and `self.moving_var` are non-trainable variables that + are updated each time the layer in called in training mode, as such: + + - `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)` + - `moving_var = moving_var * momentum + var(batch) * (1 - momentum)` + + As such, the layer will only normalize its inputs during inference + *after having been trained on data that has similar statistics as the + inference data*. + + Args: + axis: Integer or a list of integers, the axis that should be normalized + (typically the features axis). For instance, after a `Conv2D` layer with + `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. + momentum: Momentum for the moving average. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is not used. When + the next layer is linear (also e.g. `nn.relu`), this can be disabled + since the scaling will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + moving_mean_initializer: Initializer for the moving mean. + moving_variance_initializer: Initializer for the moving variance. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + renorm: Whether to use [Batch Renormalization]( + https://arxiv.org/abs/1702.03275). This adds extra variables during + training. The inference is the same for either value of this + parameter. + renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to + scalar `Tensors` used to clip the renorm correction. The correction `(r, + d)` is used as `corrected_value = normalized_value * r + d`, with `r` + clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, + dmax are set to inf, 0, inf, respectively. + renorm_momentum: Momentum used to update the moving means and standard + deviations with renorm. Unlike `momentum`, this affects training and + should be neither too small (which would add noise) nor too large (which + would give stale estimates). Note that `momentum` is still applied to + get the means and variances for inference. + fused: if `True`, use a faster, fused implementation, or raise a + ValueError if the fused implementation cannot be used. If `None`, use + the faster implementation if possible. If False, do not used the fused + implementation. Note that in TensorFlow 1.x, the meaning of + `fused=True` is different: if `False`, the layer uses the + system-recommended implementation. You cannot use `fused=True` if a + mask is passed in the `call()` method. + trainable: Boolean, if `True` the variables will be marked as trainable. + virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, + which means batch normalization is performed across the whole batch. + When `virtual_batch_size` is not `None`, instead perform "Ghost Batch + Normalization", which creates virtual sub-batches which are each + normalized separately (with shared gamma, beta, and moving statistics). + Must divide the actual batch size during execution. + adjustment: A function taking the `Tensor` containing the (dynamic) shape + of the input tensor and returning a pair (scale, bias) to apply to the + normalized values (before gamma and beta), only during training. For + example, if `axis=-1`, + `adjustment = lambda shape: ( + tf.random.uniform(shape[-1:], 0.93, 1.07), + tf.random.uniform(shape[-1:], -0.1, 0.1))` will scale the normalized + value by up to 7% up or down, then shift the result by up to 0.1 + (with independent scaling and bias for each feature but shared + across all examples), and finally apply gamma and/or beta. If + `None`, no adjustment is applied. Cannot be specified if + virtual_batch_size is specified. + synchronized: If True, synchronizes the global batch statistics (mean and + variance) for the layer across all devices at each training step in a + distributed training strategy. If False, each replica uses its own + local batch statistics. Only relevant when used inside a + `tf.distribute` strategy. + + Call arguments: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. + - `training=True`: The layer will normalize its inputs using the mean + and variance of the current batch of inputs. + - `training=False`: The layer will normalize its inputs using the mean + and variance of its moving statistics, learned during training. + mask: Binary tensor of shape broadcastable to `inputs` tensor, indicating + the positions for which the mean and variance should be computed. + + Input shape: Arbitrary. Use the keyword argument `input_shape` (tuple of + integers, does not include the samples axis) when using this layer as the + first layer in a model. + + Output shape: Same shape as input. + + Reference: + - [Ioffe and Szegedy, 2015](https://arxiv.org/abs/1502.03167). """ - # Note the ValueErrors in this function are caught and not reraised in - # _fused_can_be_used(). No other exception besides ValueError should be - # raised here. - - # Currently fused batch norm doesn't support renorm. It also only supports a - # channel dimension on axis 1 or 3 (rank=4) / 1 or 4 (rank5), when no - # virtual batch size or adjustment is used. - if self.renorm: - raise ValueError('Passing both `fused=True` and `renorm=True` is ' - 'not supported') - axis = [self.axis] if isinstance(self.axis, int) else self.axis - # Axis -3 is equivalent to 1, and axis -1 is equivalent to 3, when the - # input rank is 4. Similarly, the valid axis is -4, -1, 1, 4 when the rank - # is 5. The combination of ranks and axes will be checked later. - if len(axis) > 1 or axis[0] not in (-4, -3, -1, 1, 3, 4): - raise ValueError('Passing `fused=True` is only supported when axis is 1 ' - 'or 3 for input rank = 4 or 1 or 4 for input rank = 5. ' - 'Got axis %s' % (axis,)) - if self.virtual_batch_size is not None: - raise ValueError('Passing `fused=True` is not supported when ' - '`virtual_batch_size` is specified.') - if self.adjustment is not None: - raise ValueError('Passing `fused=True` is not supported when ' - '`adjustment` is specified.') - # TODO(reedwm): Support fp64 in FusedBatchNorm then remove this check. - if self._compute_dtype not in ('float16', 'bfloat16', 'float32', None): - raise ValueError( - 'Passing `fused=True` is only supported when the compute ' - 'dtype is float16, bfloat16, or float32. Got dtype: %s' % - (self._compute_dtype,)) - - def _fused_can_be_used(self): - try: - self._raise_if_fused_cannot_be_used() - return True - except ValueError: - return False - - @property - def trainable(self): - return self._trainable - - @trainable.setter - def trainable(self, value): - self._trainable = value - - @property - def _param_dtype(self): - # Raise parameters of fp16 batch norm to fp32 - if self.dtype == tf.float16 or self.dtype == tf.bfloat16: - return tf.float32 - else: - return self.dtype or tf.float32 - - def _support_zero_size_input(self): - if not tf.distribute.has_strategy(): - return False - strategy = tf.distribute.get_strategy() - # TODO(b/195085185): remove experimental_enable_get_next_as_optional after - # migrating all users. - return getattr( - strategy.extended, 'enable_partial_batch_handling', - getattr(strategy.extended, 'experimental_enable_get_next_as_optional', - False)) - - def build(self, input_shape): - self.axis = tf_utils.validate_axis(self.axis, input_shape) - input_shape = tf.TensorShape(input_shape) - rank = input_shape.rank - - if self.virtual_batch_size is not None: - if self.virtual_batch_size <= 0: - raise ValueError( - f'`virtual_batch_size` must be a positive integer that divides the ' - f'true batch size of the input tensor. Received: ' - f'virtual_batch_size={self.virtual_batch_size}') - # If using virtual batches, the first dimension must be the batch - # dimension and cannot be the batch norm axis - if 0 in self.axis: - raise ValueError('When using `virtual_batch_size`, the batch dimension ' - 'must be 0 and thus axis cannot include 0. ' - f'Received axis={self.axis}') - if self.adjustment is not None: - raise ValueError('When using `virtual_batch_size`, adjustment cannot ' - 'be specified') - - if self.fused in (None, True): - # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the - # output back to its original shape accordingly. - if self._USE_V2_BEHAVIOR: - if self.fused is None: - self.fused = rank in (4, 5) - elif self.fused and rank not in (4, 5): - raise ValueError('Batch normalization layers with `fused=True` only ' - 'support 4D or 5D input tensors. ' - f'Received tensor with shape: {tuple(input_shape)}') - else: - assert self.fused is not None - self.fused = (rank in (4, 5) and self._fused_can_be_used()) - # TODO(chrisying): fused batch norm is currently not supported for - # multi-axis batch norm and by extension virtual batches. In some cases, - # it might be possible to use fused batch norm but would require reshaping - # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is - # particularly tricky. A compromise might be to just support the most - # common use case (turning 5D w/ virtual batch to NCHW) - - if self.fused: - if self.axis == [1] and rank == 4: - self._data_format = 'NCHW' - elif self.axis == [1] and rank == 5: - self._data_format = 'NCDHW' - elif self.axis == [3] and rank == 4: - self._data_format = 'NHWC' - elif self.axis == [4] and rank == 5: - self._data_format = 'NDHWC' - elif rank == 5: - # 5D tensors that can be passed in but should not use fused batch norm - # due to unsupported axis. - self.fused = False - else: - if rank == 4: - raise ValueError( - 'Unsupported axis. The use of `fused=True` is only possible with ' - '`axis=1` or `axis=3` for 4D input tensors. Received: ' - f'axis={tuple(self.axis)}') + + # By default, the base class uses V2 behavior. The BatchNormalization V1 + # subclass sets this to False to use the V1 behavior. + _USE_V2_BEHAVIOR = True + + def __init__( + self, + axis=-1, + momentum=0.99, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + moving_mean_initializer="zeros", + moving_variance_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + renorm=False, + renorm_clipping=None, + renorm_momentum=0.99, + fused=None, + trainable=True, + virtual_batch_size=None, + adjustment=None, + name=None, + synchronized=False, + **kwargs, + ): + super().__init__(name=name, **kwargs) + if isinstance(axis, (list, tuple)): + self.axis = axis[:] + elif isinstance(axis, int): + self.axis = axis else: - raise ValueError( - 'Unsupported axis. The use of `fused=True` is only possible with ' - '`axis=1` or `axis=4` for 5D input tensors. Received: ' - f'axis={tuple(self.axis)}') - - axis_to_dim = {x: input_shape.dims[x].value for x in self.axis} - for x in axis_to_dim: - if axis_to_dim[x] is None: - raise ValueError('Input has undefined `axis` dimension. Received input ' - f'with shape {tuple(input_shape)} ' - f'and axis={tuple(self.axis)}') - self.input_spec = InputSpec(ndim=rank, axes=axis_to_dim) - - if len(axis_to_dim) == 1 and self.virtual_batch_size is None: - # Single axis batch norm (most common/default use-case) - param_shape = (list(axis_to_dim.values())[0],) - else: - # Parameter shape is the original shape but with 1 in all non-axis dims - param_shape = [ - axis_to_dim[i] if i in axis_to_dim else 1 for i in range(rank) - ] - if self.virtual_batch_size is not None: - # When using virtual batches, add an extra dim at index 1 - param_shape.insert(1, 1) - for idx, x in enumerate(self.axis): - self.axis[idx] = x + 1 # Account for added dimension - - if self.scale: - self.gamma = self.add_weight( - name='gamma', - shape=param_shape, - dtype=self._param_dtype, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True, - experimental_autocast=False) - else: - self.gamma = None - if self.fused: - self._gamma_const = backend.constant( - 1.0, dtype=self._param_dtype, shape=param_shape) - - if self.center: - self.beta = self.add_weight( - name='beta', - shape=param_shape, - dtype=self._param_dtype, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True, - experimental_autocast=False) - else: - self.beta = None - if self.fused: - self._beta_const = backend.constant( - 0.0, dtype=self._param_dtype, shape=param_shape) - - try: - # Disable variable partitioning when creating the moving mean and variance - if hasattr(self, '_scope') and self._scope: - partitioner = self._scope.partitioner - self._scope.set_partitioner(None) - else: - partitioner = None - self.moving_mean = self.add_weight( - name='moving_mean', - shape=param_shape, - dtype=self._param_dtype, - initializer=self.moving_mean_initializer, - synchronization=tf.VariableSynchronization.ON_READ, - trainable=False, - aggregation=tf.VariableAggregation.MEAN, - experimental_autocast=False) - - self.moving_variance = self.add_weight( - name='moving_variance', - shape=param_shape, - dtype=self._param_dtype, - initializer=self.moving_variance_initializer, - synchronization=tf.VariableSynchronization.ON_READ, - trainable=False, - aggregation=tf.VariableAggregation.MEAN, - experimental_autocast=False) - - if self.renorm: - # In batch renormalization we track the inference moving stddev instead - # of the moving variance to more closely align with the paper. - def moving_stddev_initializer(*args, **kwargs): - return tf.sqrt( - self.moving_variance_initializer(*args, **kwargs)) - - with tf.distribute.get_strategy( - ).extended.colocate_vars_with(self.moving_variance): - self.moving_stddev = self.add_weight( - name='moving_stddev', - shape=param_shape, - dtype=self._param_dtype, - initializer=moving_stddev_initializer, - synchronization=tf.VariableSynchronization.ON_READ, - trainable=False, - aggregation=tf.VariableAggregation.MEAN, - experimental_autocast=False) - - # Create variables to maintain the moving mean and standard deviation. - # These are used in training and thus are different from the moving - # averages above. The renorm variables are colocated with moving_mean - # and moving_stddev. - # NOTE: below, the outer `with device` block causes the current device - # stack to be cleared. The nested ones use a `lambda` to set the desired - # device and ignore any devices that may be set by the custom getter. - def _renorm_variable(name, - shape, - initializer='zeros'): - """Create a renorm variable.""" - var = self.add_weight( - name=name, - shape=shape, - dtype=self._param_dtype, - initializer=initializer, - synchronization=tf.VariableSynchronization.ON_READ, - trainable=False, - aggregation=tf.VariableAggregation.MEAN, - experimental_autocast=False) - return var - - with tf.distribute.get_strategy( - ).extended.colocate_vars_with(self.moving_mean): - self.renorm_mean = _renorm_variable('renorm_mean', param_shape, - self.moving_mean_initializer) - with tf.distribute.get_strategy( - ).extended.colocate_vars_with(self.moving_stddev): - self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape, - moving_stddev_initializer) - finally: - if partitioner: - self._scope.set_partitioner(partitioner) - self.built = True - - def _assign_moving_average(self, variable, value, momentum, inputs_size): - - def calculate_update_delta(): - decay = tf.convert_to_tensor( - 1.0 - momentum, name='decay') - if decay.dtype != variable.dtype.base_dtype: - decay = tf.cast(decay, variable.dtype.base_dtype) - update_delta = (variable - tf.cast(value, variable.dtype)) * decay - if inputs_size is not None: - update_delta = tf.where(inputs_size > 0, update_delta, - backend.zeros_like(update_delta)) - return update_delta - - with backend.name_scope('AssignMovingAvg') as scope: - if tf.compat.v1.executing_eagerly_outside_functions(): - return variable.assign_sub(calculate_update_delta(), name=scope) - else: - with tf.compat.v1.colocate_with(variable): # pylint: disable=protected-access - return tf.compat.v1.assign_sub( - variable, calculate_update_delta(), name=scope) - - def _assign_new_value(self, variable, value): - with backend.name_scope('AssignNewValue') as scope: - if tf.compat.v1.executing_eagerly_outside_functions(): - return variable.assign(value, name=scope) - else: - with tf.compat.v1.colocate_with(variable): # pylint: disable=protected-access - return tf.compat.v1.assign(variable, value, name=scope) - - def _fused_batch_norm(self, inputs, training): - """Returns the output of fused batch norm.""" - beta = self.beta if self.center else self._beta_const - gamma = self.gamma if self.scale else self._gamma_const - - # TODO(b/129279393): Support zero batch input in non DistributionStrategy - # code as well. - if self._support_zero_size_input(): - # Keras assumes that batch dimension is the first dimension for Batch - # Normalization. - input_batch_size = tf.shape(inputs)[0] - else: - input_batch_size = None - - # TODO(rmlarsen): Support using fused avg updates for non-eager execution - # after fixing graph pattern matching and enabling fused_batch_norm to - # take exponential_avg_factor as a tensor input. - use_fused_avg_updates = ( - tf.compat.v1.executing_eagerly_outside_functions() and - isinstance(self.momentum, - (float, int)) and get_enclosing_xla_context() is None) - if use_fused_avg_updates: - exponential_avg_factor = 1.0 - self.momentum - else: - exponential_avg_factor = None - - def _maybe_add_or_remove_bessels_correction(variance, remove=True): - r"""Add or remove Bessel's correction.""" - # Removes Bessel's correction if remove == True, adds it otherwise. - # This is to be consistent with non-fused batch norm. Note that the - # variance computed by fused batch norm is with Bessel's correction. - # This is only used in legacy V1 batch norm tests. - if self._bessels_correction_test_only: - return variance - sample_size = tf.cast( - tf.size(inputs) / tf.size(variance), variance.dtype) - if remove: - factor = (sample_size - - tf.cast(1.0, variance.dtype)) / sample_size - else: - factor = sample_size / ( - sample_size - tf.cast(1.0, variance.dtype)) - return variance * factor - - def _fused_batch_norm_training(): - return tf.compat.v1.nn.fused_batch_norm( - inputs, - gamma, - beta, - mean=self.moving_mean, - variance=_maybe_add_or_remove_bessels_correction( - self.moving_variance, remove=False), - epsilon=self.epsilon, - is_training=True, - data_format=self._data_format, - exponential_avg_factor=exponential_avg_factor) - - def _fused_batch_norm_inference(): - return tf.compat.v1.nn.fused_batch_norm( - inputs, - gamma, - beta, - mean=self.moving_mean, - variance=self.moving_variance, - epsilon=self.epsilon, - is_training=False, - data_format=self._data_format) - - output, mean, variance = control_flow_util.smart_cond( - training, _fused_batch_norm_training, _fused_batch_norm_inference) - variance = _maybe_add_or_remove_bessels_correction(variance, remove=True) - - training_value = control_flow_util.constant_value(training) - if training_value or training_value is None: - if not use_fused_avg_updates: - if training_value is None: - momentum = control_flow_util.smart_cond(training, - lambda: self.momentum, - lambda: 1.0) + raise TypeError( + "Expected an int or a list/tuple of ints for the " + "argument 'axis', but received: %r" % axis + ) + if synchronized and fused: + raise ValueError( + "`fused=True` is not supported when `synchronized=True`." + ) + self.synchronized = synchronized + if self.synchronized: + fused = False + + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = initializers.get( + moving_variance_initializer + ) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.renorm = renorm + self.virtual_batch_size = virtual_batch_size + self.adjustment = adjustment + if self._USE_V2_BEHAVIOR: + if fused: + self._raise_if_fused_cannot_be_used() + # We leave fused as None if self._fused_can_be_used()==True, since + # we still may set it to False in self.build() if the input rank is + # not 4. + elif fused is None and not self._fused_can_be_used(): + fused = False + elif fused is None: + fused = True + self.supports_masking = True + + self.fused = fused + self._bessels_correction_test_only = True + self.trainable = trainable + + if renorm: + renorm_clipping = renorm_clipping or {} + keys = ["rmax", "rmin", "dmax"] + if set(renorm_clipping) - set(keys): + raise ValueError( + "Received invalid keys for `renorm_clipping` argument: " + f"{renorm_clipping}. Supported values: {keys}." + ) + self.renorm_clipping = renorm_clipping + self.renorm_momentum = renorm_momentum + + def _raise_if_fused_cannot_be_used(self): + """Raises a ValueError if fused implementation cannot be used. + + In addition to the checks done in this function, the input tensors rank + must be 4 or 5. The input rank check can only be done once the input + shape is known. + """ + # Note the ValueErrors in this function are caught and not reraised in + # _fused_can_be_used(). No other exception besides ValueError should be + # raised here. + + # Currently fused batch norm doesn't support renorm. It also only + # supports a channel dimension on axis 1 or 3 (rank=4) / 1 or 4 (rank5), + # when no virtual batch size or adjustment is used. + if self.renorm: + raise ValueError( + "Passing both `fused=True` and `renorm=True` is not supported" + ) + axis = [self.axis] if isinstance(self.axis, int) else self.axis + # Axis -3 is equivalent to 1, and axis -1 is equivalent to 3, when the + # input rank is 4. Similarly, the valid axis is -4, -1, 1, 4 when the + # rank is 5. The combination of ranks and axes will be checked later. + if len(axis) > 1 or axis[0] not in (-4, -3, -1, 1, 3, 4): + raise ValueError( + "Passing `fused=True` is only supported when axis is 1 " + "or 3 for input rank = 4 or 1 or 4 for input rank = 5. " + "Got axis %s" % (axis,) + ) + if self.virtual_batch_size is not None: + raise ValueError( + "Passing `fused=True` is not supported when " + "`virtual_batch_size` is specified." + ) + if self.adjustment is not None: + raise ValueError( + "Passing `fused=True` is not supported when " + "`adjustment` is specified." + ) + # TODO(reedwm): Support fp64 in FusedBatchNorm then remove this check. + if self._compute_dtype not in ("float16", "bfloat16", "float32", None): + raise ValueError( + "Passing `fused=True` is only supported when the compute " + "dtype is float16, bfloat16, or float32. Got dtype: %s" + % (self._compute_dtype,) + ) + + def _fused_can_be_used(self): + try: + self._raise_if_fused_cannot_be_used() + return True + except ValueError: + return False + + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, value): + self._trainable = value + + @property + def _param_dtype(self): + # Raise parameters of fp16 batch norm to fp32 + if self.dtype == tf.float16 or self.dtype == tf.bfloat16: + return tf.float32 else: - momentum = tf.convert_to_tensor(self.momentum) - - def mean_update(): - """Update self.moving_mean with the most recent data point.""" - if use_fused_avg_updates: - if input_batch_size is not None: - new_mean = control_flow_util.smart_cond( - input_batch_size > 0, lambda: mean, lambda: self.moving_mean) - else: - new_mean = mean - return self._assign_new_value(self.moving_mean, new_mean) + return self.dtype or tf.float32 + + def build(self, input_shape): + self.axis = tf_utils.validate_axis(self.axis, input_shape) + input_shape = tf.TensorShape(input_shape) + rank = input_shape.rank + + if self.virtual_batch_size is not None: + if self.virtual_batch_size <= 0: + raise ValueError( + "`virtual_batch_size` must be a positive integer that " + "divides the true batch size of the input tensor. " + f"Received: virtual_batch_size={self.virtual_batch_size}" + ) + # If using virtual batches, the first dimension must be the batch + # dimension and cannot be the batch norm axis + if 0 in self.axis: + raise ValueError( + "When using `virtual_batch_size`, the batch dimension " + "must be 0 and thus axis cannot include 0. " + f"Received axis={self.axis}" + ) + if self.adjustment is not None: + raise ValueError( + "When using `virtual_batch_size`, adjustment cannot " + "be specified" + ) + + if self.fused in (None, True): + # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape + # the output back to its original shape accordingly. + if self._USE_V2_BEHAVIOR: + if self.fused is None: + self.fused = rank in (4, 5) + elif self.fused and rank not in (4, 5): + raise ValueError( + "Batch normalization layers with `fused=True` only " + "support 4D or 5D input tensors. " + f"Received tensor with shape: {tuple(input_shape)}" + ) + else: + assert self.fused is not None + self.fused = rank in (4, 5) and self._fused_can_be_used() + # TODO(chrisying): fused batch norm is currently not supported for + # multi-axis batch norm and by extension virtual batches. In some + # cases, it might be possible to use fused batch norm but would + # require reshaping the Tensor to 4D with the axis in 1 or 3 + # (preferred 1) which is particularly tricky. A compromise might be + # to just support the most common use case (turning 5D w/ virtual + # batch to NCHW) + + if self.fused: + if self.axis == [1] and rank == 4: + self._data_format = "NCHW" + elif self.axis == [1] and rank == 5: + self._data_format = "NCDHW" + elif self.axis == [3] and rank == 4: + self._data_format = "NHWC" + elif self.axis == [4] and rank == 5: + self._data_format = "NDHWC" + elif rank == 5: + # 5D tensors that can be passed in but should not use fused + # batch norm due to unsupported axis. + self.fused = False + else: + if rank == 4: + raise ValueError( + "Unsupported axis. The use of `fused=True` is only " + "possible with `axis=1` or `axis=3` for 4D input " + f"tensors. Received: axis={tuple(self.axis)}" + ) + else: + raise ValueError( + "Unsupported axis. The use of `fused=True` is only " + "possible with `axis=1` or `axis=4` for 5D input " + f"tensors. Received: axis={tuple(self.axis)}" + ) + + axis_to_dim = {x: input_shape.dims[x].value for x in self.axis} + for x in axis_to_dim: + if axis_to_dim[x] is None: + raise ValueError( + "Input has undefined `axis` dimension. Received input " + f"with shape {tuple(input_shape)} " + f"and axis={tuple(self.axis)}" + ) + self.input_spec = InputSpec(ndim=rank, axes=axis_to_dim) + + if len(axis_to_dim) == 1 and self.virtual_batch_size is None: + # Single axis batch norm (most common/default use-case) + param_shape = (list(axis_to_dim.values())[0],) else: - return self._assign_moving_average(self.moving_mean, mean, momentum, - input_batch_size) - - def variance_update(): - """Update self.moving_variance with the most recent data point.""" - if use_fused_avg_updates: - if input_batch_size is not None: - new_variance = control_flow_util.smart_cond( - input_batch_size > 0, lambda: variance, - lambda: self.moving_variance) - else: - new_variance = variance - return self._assign_new_value(self.moving_variance, new_variance) + # Parameter shape is the original shape but with 1 in all non-axis + # dims + param_shape = [ + axis_to_dim[i] if i in axis_to_dim else 1 for i in range(rank) + ] + if self.virtual_batch_size is not None: + # When using virtual batches, add an extra dim at index 1 + param_shape.insert(1, 1) + for idx, x in enumerate(self.axis): + self.axis[idx] = x + 1 # Account for added dimension + self._param_shape = param_shape + if self.scale: + self.gamma = self.add_weight( + name="gamma", + shape=param_shape, + dtype=self._param_dtype, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True, + experimental_autocast=False, + ) + else: + self.gamma = None + + if self.center: + self.beta = self.add_weight( + name="beta", + shape=param_shape, + dtype=self._param_dtype, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True, + experimental_autocast=False, + ) + else: + self.beta = None + + try: + # Disable variable partitioning when creating the moving mean and + # variance + if hasattr(self, "_scope") and self._scope: + partitioner = self._scope.partitioner + self._scope.set_partitioner(None) + else: + partitioner = None + self.moving_mean = self.add_weight( + name="moving_mean", + shape=param_shape, + dtype=self._param_dtype, + initializer=self.moving_mean_initializer, + synchronization=tf.VariableSynchronization.ON_READ, + trainable=False, + aggregation=tf.VariableAggregation.MEAN, + experimental_autocast=False, + ) + + self.moving_variance = self.add_weight( + name="moving_variance", + shape=param_shape, + dtype=self._param_dtype, + initializer=self.moving_variance_initializer, + synchronization=tf.VariableSynchronization.ON_READ, + trainable=False, + aggregation=tf.VariableAggregation.MEAN, + experimental_autocast=False, + ) + + if self.renorm: + # In batch renormalization we track the inference moving stddev + # instead of the moving variance to more closely align with the + # paper. + def moving_stddev_initializer(*args, **kwargs): + return tf.sqrt( + self.moving_variance_initializer(*args, **kwargs) + ) + + with tf.distribute.get_strategy().extended.colocate_vars_with( + self.moving_variance + ): + self.moving_stddev = self.add_weight( + name="moving_stddev", + shape=param_shape, + dtype=self._param_dtype, + initializer=moving_stddev_initializer, + synchronization=tf.VariableSynchronization.ON_READ, + trainable=False, + aggregation=tf.VariableAggregation.MEAN, + experimental_autocast=False, + ) + + # Create variables to maintain the moving mean and standard + # deviation. These are used in training and thus are different + # from the moving averages above. The renorm variables are + # colocated with moving_mean and moving_stddev. + # NOTE: below, the outer `with device` block causes the current + # device stack to be cleared. The nested ones use a `lambda` to + # set the desired device and ignore any devices that may be set + # by the custom getter. + def _renorm_variable(name, shape, initializer="zeros"): + """Create a renorm variable.""" + var = self.add_weight( + name=name, + shape=shape, + dtype=self._param_dtype, + initializer=initializer, + synchronization=tf.VariableSynchronization.ON_READ, + trainable=False, + aggregation=tf.VariableAggregation.MEAN, + experimental_autocast=False, + ) + return var + + with tf.distribute.get_strategy().extended.colocate_vars_with( + self.moving_mean + ): + self.renorm_mean = _renorm_variable( + "renorm_mean", param_shape, self.moving_mean_initializer + ) + with tf.distribute.get_strategy().extended.colocate_vars_with( + self.moving_stddev + ): + self.renorm_stddev = _renorm_variable( + "renorm_stddev", param_shape, moving_stddev_initializer + ) + finally: + if partitioner: + self._scope.set_partitioner(partitioner) + self.built = True + + def call(self, inputs, training=None, mask=None): + inputs = tf.cast(inputs, self.compute_dtype) + training = self._get_training_value(training) + # Determine a boolean value for `training`: could be True, False, or + # None. + training_value = control_flow_util.constant_value(training) + _raise_for_non_sync_bn_with_renorm_and_dtensor_strategy( + synchronized=self.synchronized, + training=training, + renorm=self.renorm, + ) + + if self.virtual_batch_size is not None: + # Virtual batches (aka ghost batches) can be simulated by reshaping + # the Tensor and reusing the existing batch norm implementation + original_shape = tf.shape(inputs) + original_shape = tf.concat( + [tf.constant([-1]), original_shape[1:]], axis=0 + ) + + if tf.__internal__.tf2.enabled(): + expanded_shape = ( + [self.virtual_batch_size, -1] if training_value else [-1, 1] + ) + expanded_shape = tf.concat( + [ + tf.constant(expanded_shape), + original_shape[1:], + ], + axis=0, + ) + else: + # Preserve incorrect legacy behavior for backwards compatibility + expanded_shape = tf.concat( + [ + tf.constant([self.virtual_batch_size, -1]), + original_shape[1:], + ], + axis=0, + ) + + # Will cause errors if virtual_batch_size does not divide the batch + # size + inputs = tf.reshape(inputs, expanded_shape) + + def undo_virtual_batching(outputs): + outputs = tf.reshape(outputs, original_shape) + return outputs + + if self.fused: + outputs = self._fused_batch_norm( + inputs, mask=mask, training=training + ) + if self.virtual_batch_size is not None: + # Currently never reaches here since fused_batch_norm does not + # support virtual batching + outputs = undo_virtual_batching(outputs) + return outputs + + inputs_dtype = inputs.dtype.base_dtype + if inputs_dtype in (tf.float16, tf.bfloat16): + # Do all math in float32 if given 16-bit inputs for numeric + # stability. In particular, it's very easy for variance to overflow + # in float16 and for safety we also choose to cast bfloat16 to + # float32. + inputs = tf.cast(inputs, tf.float32) + + # Compute the axes along which to reduce the mean / variance + input_shape = inputs.shape + ndims = len(input_shape) + reduction_axes = [i for i in range(ndims) if i not in self.axis] + if self.virtual_batch_size is not None: + del reduction_axes[1] # Do not reduce along virtual batch dim + + # Broadcasting only necessary for single-axis batch norm where the axis + # is not the last dimension + broadcast_shape = [1] * ndims + broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value + + def _broadcast(v): + if ( + v is not None + and len(v.shape) != ndims + and reduction_axes != list(range(ndims - 1)) + ): + return tf.reshape(v, broadcast_shape) + return v + + scale, offset = _broadcast(self.gamma), _broadcast(self.beta) + + def _compose_transforms(scale, offset, then_scale, then_offset): + if then_scale is not None: + scale *= then_scale + offset *= then_scale + if then_offset is not None: + offset += then_offset + return (scale, offset) + + if training_value == False: # noqa: E712 + mean, variance = self.moving_mean, self.moving_variance else: - return self._assign_moving_average(self.moving_variance, variance, - momentum, input_batch_size) - - self.add_update(mean_update) - self.add_update(variance_update) - - return output - - def _renorm_correction_and_moments(self, mean, variance, training, - inputs_size): - """Returns the correction and update values for renorm.""" - stddev = tf.sqrt(variance + self.epsilon) - # Compute the average mean and standard deviation, as if they were - # initialized with this batch's moments. - renorm_mean = self.renorm_mean - # Avoid divide by zero early on in training. - renorm_stddev = tf.maximum(self.renorm_stddev, tf.sqrt(self.epsilon)) - # Compute the corrections for batch renorm. - r = stddev / renorm_stddev - d = (mean - renorm_mean) / renorm_stddev - # Ensure the corrections use pre-update moving averages. - with tf.control_dependencies([r, d]): - mean = tf.identity(mean) - stddev = tf.identity(stddev) - rmin, rmax, dmax = [ - self.renorm_clipping.get(key) for key in ['rmin', 'rmax', 'dmax'] - ] - if rmin is not None: - r = tf.maximum(r, rmin) - if rmax is not None: - r = tf.minimum(r, rmax) - if dmax is not None: - d = tf.maximum(d, -dmax) - d = tf.minimum(d, dmax) - # When not training, use r=1, d=0. - r = control_flow_util.smart_cond(training, lambda: r, - lambda: tf.ones_like(r)) - d = control_flow_util.smart_cond(training, lambda: d, - lambda: tf.zeros_like(d)) - - def _update_renorm_variable(var, value, inputs_size): - """Updates a moving average and weight, returns the unbiased value.""" - value = tf.identity(value) - - def _do_update(): - """Updates the var, returns the updated value.""" - new_var = self._assign_moving_average(var, value, self.renorm_momentum, - inputs_size) - return new_var - - def _fake_update(): - return tf.identity(var) - - return control_flow_util.smart_cond(training, _do_update, _fake_update) - - # TODO(yuefengz): colocate the operations - update_new_mean = _update_renorm_variable(self.renorm_mean, mean, - inputs_size) - update_new_stddev = _update_renorm_variable(self.renorm_stddev, stddev, - inputs_size) - - # Update the inference mode moving averages with the batch value. - with tf.control_dependencies([update_new_mean, update_new_stddev]): - out_mean = tf.identity(mean) - out_variance = tf.identity(variance) - - return (r, d, out_mean, out_variance) - - def _calculate_mean_and_var(self, inputs, reduction_axes, keep_dims): - return tf.nn.moments(inputs, reduction_axes, keepdims=keep_dims) - - def _moments(self, inputs, reduction_axes, keep_dims): - mean, variance = self._calculate_mean_and_var(inputs, reduction_axes, - keep_dims) - # TODO(b/129279393): Support zero batch input in non DistributionStrategy - # code as well. - if self._support_zero_size_input(): - input_batch_size = tf.shape(inputs)[0] - mean = tf.where(input_batch_size > 0, mean, backend.zeros_like(mean)) - variance = tf.where(input_batch_size > 0, variance, - backend.zeros_like(variance)) - return mean, variance - - def _get_training_value(self, training=None): - if training is None: - training = backend.learning_phase() - if self._USE_V2_BEHAVIOR: - if isinstance(training, int): - training = bool(training) - if not self.trainable: - # When the layer is not trainable, it overrides the value passed from - # model. - training = False - return training - - def call(self, inputs, training=None): - inputs = tf.cast(inputs, self.compute_dtype) - training = self._get_training_value(training) - - if self.virtual_batch_size is not None: - # Virtual batches (aka ghost batches) can be simulated by reshaping the - # Tensor and reusing the existing batch norm implementation - original_shape = tf.shape(inputs) - original_shape = tf.concat( - [tf.constant([-1]), original_shape[1:]], axis=0) - expanded_shape = tf.concat([ - tf.constant([self.virtual_batch_size, -1]), - original_shape[1:] - ], axis=0) - - # Will cause errors if virtual_batch_size does not divide the batch size - inputs = tf.reshape(inputs, expanded_shape) - - def undo_virtual_batching(outputs): - outputs = tf.reshape(outputs, original_shape) + # The following long block are handling mean/variance update during + # the training stage in various of different settings. + if self.adjustment: + adj_scale, adj_bias = self.adjustment(tf.shape(inputs)) + # Adjust only during training. + adj_scale = control_flow_util.smart_cond( + training, lambda: adj_scale, lambda: tf.ones_like(adj_scale) + ) + adj_bias = control_flow_util.smart_cond( + training, lambda: adj_bias, lambda: tf.zeros_like(adj_bias) + ) + scale, offset = _compose_transforms( + adj_scale, adj_bias, scale, offset + ) + + # Some of the computations here are not necessary when + # training==False but not a constant. However, this makes the code + # simpler. + keep_dims = ( + self.virtual_batch_size is not None or len(self.axis) > 1 + ) + mean, variance = self._moments( + tf.cast(inputs, self._param_dtype), + reduction_axes, + keep_dims=keep_dims, + mask=mask, + ) + + moving_mean = self.moving_mean + moving_variance = self.moving_variance + + mean = control_flow_util.smart_cond( + training, + lambda: mean, + lambda: tf.convert_to_tensor(moving_mean), + ) + variance = control_flow_util.smart_cond( + training, + lambda: variance, + lambda: tf.convert_to_tensor(moving_variance), + ) + + if self.virtual_batch_size is not None: + # This isn't strictly correct since in ghost batch norm, you are + # supposed to sequentially update the moving_mean and + # moving_variance with each sub-batch. However, since the moving + # statistics are only used during evaluation, it is more + # efficient to just update in one step and should not make a + # significant difference in the result. + new_mean = tf.reduce_mean(mean, axis=1, keepdims=True) + new_variance = tf.reduce_mean(variance, axis=1, keepdims=True) + else: + if ( + utils.running_with_dtensor_strategy() + and not self.synchronized + ): + new_mean = tf.math.reduce_mean(mean, axis=reduction_axes) + new_variance = tf.math.reduce_mean( + variance, axis=reduction_axes + ) + else: + new_mean, new_variance = mean, variance + + if self._support_zero_size_input(): + # Keras assumes that batch dimension is the first dimension for + # Batch Normalization. + input_batch_size = tf.shape(inputs)[0] + else: + input_batch_size = None + + if self.renorm: + ( + r, + d, + new_mean, + new_variance, + ) = self._renorm_correction_and_moments( + new_mean, new_variance, training, input_batch_size + ) + # When training, the normalized values (say, x) will be + # transformed as x * gamma + beta without renorm, and (x * r + + # d) * gamma + beta = x * (r * gamma) + (d * gamma + beta) with + # renorm. + r = _broadcast(tf.stop_gradient(r, name="renorm_r")) + d = _broadcast(tf.stop_gradient(d, name="renorm_d")) + scale, offset = _compose_transforms(r, d, scale, offset) + + def _do_update(var, value): + """Compute the updates for mean and variance.""" + return self._assign_moving_average( + var, value, self.momentum, input_batch_size + ) + + def mean_update(): + true_branch = lambda: _do_update(self.moving_mean, new_mean) + false_branch = lambda: self.moving_mean + return control_flow_util.smart_cond( + training, true_branch, false_branch + ) + + def variance_update(): + """Update the moving variance.""" + + def true_branch_renorm(): + # We apply epsilon as part of the moving_stddev to mirror + # the training code path. + moving_stddev = _do_update( + self.moving_stddev, tf.sqrt(new_variance + self.epsilon) + ) + return self._assign_new_value( + self.moving_variance, + # Apply relu in case floating point rounding causes it + # to go negative. + backend.relu( + moving_stddev * moving_stddev - self.epsilon + ), + ) + + if self.renorm: + true_branch = true_branch_renorm + else: + true_branch = lambda: _do_update( + self.moving_variance, new_variance + ) + + false_branch = lambda: self.moving_variance + return control_flow_util.smart_cond( + training, true_branch, false_branch + ) + + self.add_update(mean_update) + self.add_update(variance_update) + # End of handling mean/variance calculation and update. + + mean = tf.cast(mean, inputs.dtype) + variance = tf.cast(variance, inputs.dtype) + if offset is not None: + offset = tf.cast(offset, inputs.dtype) + if scale is not None: + scale = tf.cast(scale, inputs.dtype) + outputs = tf.nn.batch_normalization( + inputs, + _broadcast(mean), + _broadcast(variance), + offset, + scale, + self.epsilon, + ) + if inputs_dtype in (tf.float16, tf.bfloat16): + outputs = tf.cast(outputs, inputs_dtype) + + # If some components of the shape got lost due to adjustments, fix that. + outputs.set_shape(input_shape) + + if self.virtual_batch_size is not None: + outputs = undo_virtual_batching(outputs) return outputs - if self.fused: - outputs = self._fused_batch_norm(inputs, training=training) - if self.virtual_batch_size is not None: - # Currently never reaches here since fused_batch_norm does not support - # virtual batching - outputs = undo_virtual_batching(outputs) - return outputs - - inputs_dtype = inputs.dtype.base_dtype - if inputs_dtype in (tf.float16, tf.bfloat16): - # Do all math in float32 if given 16-bit inputs for numeric stability. - # In particular, it's very easy for variance to overflow in float16 and - # for safety we also choose to cast bfloat16 to float32. - inputs = tf.cast(inputs, tf.float32) - - # Compute the axes along which to reduce the mean / variance - input_shape = inputs.shape - ndims = len(input_shape) - reduction_axes = [i for i in range(ndims) if i not in self.axis] - if self.virtual_batch_size is not None: - del reduction_axes[1] # Do not reduce along virtual batch dim - - # Broadcasting only necessary for single-axis batch norm where the axis is - # not the last dimension - broadcast_shape = [1] * ndims - broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value - - def _broadcast(v): - if (v is not None and len(v.shape) != ndims and - reduction_axes != list(range(ndims - 1))): - return tf.reshape(v, broadcast_shape) - return v - - scale, offset = _broadcast(self.gamma), _broadcast(self.beta) - - def _compose_transforms(scale, offset, then_scale, then_offset): - if then_scale is not None: - scale *= then_scale - offset *= then_scale - if then_offset is not None: - offset += then_offset - return (scale, offset) - - # Determine a boolean value for `training`: could be True, False, or None. - training_value = control_flow_util.constant_value(training) - if training_value == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison - mean, variance = self.moving_mean, self.moving_variance - else: - if self.adjustment: - adj_scale, adj_bias = self.adjustment(tf.shape(inputs)) - # Adjust only during training. - adj_scale = control_flow_util.smart_cond( - training, lambda: adj_scale, lambda: tf.ones_like(adj_scale)) - adj_bias = control_flow_util.smart_cond( - training, lambda: adj_bias, lambda: tf.zeros_like(adj_bias)) - scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset) - - # Some of the computations here are not necessary when training==False - # but not a constant. However, this makes the code simpler. - keep_dims = self.virtual_batch_size is not None or len(self.axis) > 1 - mean, variance = self._moments( - tf.cast(inputs, self._param_dtype), - reduction_axes, - keep_dims=keep_dims) - - moving_mean = self.moving_mean - moving_variance = self.moving_variance - - mean = control_flow_util.smart_cond( - training, lambda: mean, - lambda: tf.convert_to_tensor(moving_mean)) - variance = control_flow_util.smart_cond( - training, lambda: variance, - lambda: tf.convert_to_tensor(moving_variance)) - - if self.virtual_batch_size is not None: - # This isn't strictly correct since in ghost batch norm, you are - # supposed to sequentially update the moving_mean and moving_variance - # with each sub-batch. However, since the moving statistics are only - # used during evaluation, it is more efficient to just update in one - # step and should not make a significant difference in the result. - new_mean = tf.reduce_mean(mean, axis=1, keepdims=True) - new_variance = tf.reduce_mean(variance, axis=1, keepdims=True) - else: - new_mean, new_variance = mean, variance - - if self._support_zero_size_input(): - # Keras assumes that batch dimension is the first dimension for Batch - # Normalization. - input_batch_size = tf.shape(inputs)[0] - else: - input_batch_size = None - - if self.renorm: - r, d, new_mean, new_variance = self._renorm_correction_and_moments( - new_mean, new_variance, training, input_batch_size) - # When training, the normalized values (say, x) will be transformed as - # x * gamma + beta without renorm, and (x * r + d) * gamma + beta - # = x * (r * gamma) + (d * gamma + beta) with renorm. - r = _broadcast(tf.stop_gradient(r, name='renorm_r')) - d = _broadcast(tf.stop_gradient(d, name='renorm_d')) - scale, offset = _compose_transforms(r, d, scale, offset) - - def _do_update(var, value): - """Compute the updates for mean and variance.""" - return self._assign_moving_average(var, value, self.momentum, - input_batch_size) - - def mean_update(): - true_branch = lambda: _do_update(self.moving_mean, new_mean) - false_branch = lambda: self.moving_mean - return control_flow_util.smart_cond(training, true_branch, false_branch) - - def variance_update(): - """Update the moving variance.""" - - def true_branch_renorm(): - # We apply epsilon as part of the moving_stddev to mirror the training - # code path. - moving_stddev = _do_update(self.moving_stddev, - tf.sqrt(new_variance + self.epsilon)) - return self._assign_new_value( - self.moving_variance, - # Apply relu in case floating point rounding causes it to go - # negative. - backend.relu(moving_stddev * moving_stddev - self.epsilon)) - + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "axis": self.axis, + "momentum": self.momentum, + "epsilon": self.epsilon, + "center": self.center, + "scale": self.scale, + "beta_initializer": initializers.serialize(self.beta_initializer), + "gamma_initializer": initializers.serialize(self.gamma_initializer), + "moving_mean_initializer": initializers.serialize( + self.moving_mean_initializer + ), + "moving_variance_initializer": initializers.serialize( + self.moving_variance_initializer + ), + "beta_regularizer": regularizers.serialize(self.beta_regularizer), + "gamma_regularizer": regularizers.serialize(self.gamma_regularizer), + "beta_constraint": constraints.serialize(self.beta_constraint), + "gamma_constraint": constraints.serialize(self.gamma_constraint), + } + # Only add TensorFlow-specific parameters if they are set, so as to + # preserve model compatibility with external Keras. if self.renorm: - true_branch = true_branch_renorm + config["renorm"] = True + config["renorm_clipping"] = self.renorm_clipping + config["renorm_momentum"] = self.renorm_momentum + if self.virtual_batch_size is not None: + config["virtual_batch_size"] = self.virtual_batch_size + # Note: adjustment is not serializable. + if self.adjustment is not None: + logging.warning( + "The `adjustment` function of this `BatchNormalization` " + "layer cannot be serialized and has been omitted from " + "the layer config. It will not be included when " + "re-creating the layer from the saved config." + ) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + ######################## Start of private methods ########################## + def _support_zero_size_input(self): + if not tf.distribute.has_strategy(): + return False + strategy = tf.distribute.get_strategy() + # TODO(b/195085185): remove experimental_enable_get_next_as_optional + # after migrating all users. + return getattr( + strategy.extended, + "enable_partial_batch_handling", + getattr( + strategy.extended, + "experimental_enable_get_next_as_optional", + False, + ), + ) + + def _assign_moving_average(self, variable, value, momentum, inputs_size): + def calculate_update_delta(): + decay = tf.convert_to_tensor(1.0 - momentum, name="decay") + if decay.dtype != variable.dtype.base_dtype: + decay = tf.cast(decay, variable.dtype.base_dtype) + update_delta = (variable - tf.cast(value, variable.dtype)) * decay + if inputs_size is not None: + update_delta = tf.where( + inputs_size > 0, + update_delta, + backend.zeros_like(update_delta), + ) + return update_delta + + with backend.name_scope("AssignMovingAvg") as scope: + if tf.compat.v1.executing_eagerly_outside_functions(): + return variable.assign_sub(calculate_update_delta(), name=scope) + else: + with tf.compat.v1.colocate_with(variable): + return tf.compat.v1.assign_sub( + variable, calculate_update_delta(), name=scope + ) + + def _assign_new_value(self, variable, value): + with backend.name_scope("AssignNewValue") as scope: + if tf.compat.v1.executing_eagerly_outside_functions(): + return variable.assign(value, name=scope) + else: + with tf.compat.v1.colocate_with(variable): + return tf.compat.v1.assign(variable, value, name=scope) + + def _fused_batch_norm(self, inputs, mask, training): + """Returns the output of fused batch norm.""" + if mask is not None: + warnings.warn( + "Masking is not supported with `fused=True`. " + "You should either turn off fusing " + "(`fused=False`) or you should not pass a `mask` " + "argument when calling the layer. " + "For the moment `mask` will be ignored for the " + "normalization." + ) + if self.center: + beta = self.beta else: - true_branch = lambda: _do_update(self.moving_variance, new_variance) - - false_branch = lambda: self.moving_variance - return control_flow_util.smart_cond(training, true_branch, false_branch) - - self.add_update(mean_update) - self.add_update(variance_update) - - mean = tf.cast(mean, inputs.dtype) - variance = tf.cast(variance, inputs.dtype) - if offset is not None: - offset = tf.cast(offset, inputs.dtype) - if scale is not None: - scale = tf.cast(scale, inputs.dtype) - outputs = tf.nn.batch_normalization(inputs, _broadcast(mean), - _broadcast(variance), offset, scale, - self.epsilon) - if inputs_dtype in (tf.float16, tf.bfloat16): - outputs = tf.cast(outputs, inputs_dtype) - - # If some components of the shape got lost due to adjustments, fix that. - outputs.set_shape(input_shape) - - if self.virtual_batch_size is not None: - outputs = undo_virtual_batching(outputs) - return outputs - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'axis': self.axis, - 'momentum': self.momentum, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'moving_mean_initializer': - initializers.serialize(self.moving_mean_initializer), - 'moving_variance_initializer': - initializers.serialize(self.moving_variance_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - # Only add TensorFlow-specific parameters if they are set, so as to preserve - # model compatibility with external Keras. - if self.renorm: - config['renorm'] = True - config['renorm_clipping'] = self.renorm_clipping - config['renorm_momentum'] = self.renorm_momentum - if self.virtual_batch_size is not None: - config['virtual_batch_size'] = self.virtual_batch_size - # Note: adjustment is not serializable. - if self.adjustment is not None: - logging.warning('The `adjustment` function of this `BatchNormalization` ' - 'layer cannot be serialized and has been omitted from ' - 'the layer config. It will not be included when ' - 're-creating the layer from the saved config.') - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.layers.experimental.SyncBatchNormalization', v1=[]) -class SyncBatchNormalization(BatchNormalizationBase): - r"""Normalize and scale inputs or activations synchronously across replicas. - - Applies batch normalization to activations of the previous layer at each batch - by synchronizing the global batch statistics across all devices that are - training the model. For specific details about batch normalization please - refer to the `tf.keras.layers.BatchNormalization` layer docs. - - If this layer is used when using tf.distribute strategy to train models - across devices/workers, there will be an allreduce call to aggregate batch - statistics across all replicas at every training step. Without tf.distribute - strategy, this layer behaves as a regular `tf.keras.layers.BatchNormalization` - layer. - - Example usage: - - ```python - strategy = tf.distribute.MirroredStrategy() - - with strategy.scope(): - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(16)) - model.add(tf.keras.layers.experimental.SyncBatchNormalization()) - ``` - - Args: - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. - momentum: Momentum for the moving average. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. - - `training=True`: The layer will normalize its inputs using the - mean and variance of the current batch of inputs. - - `training=False`: The layer will normalize its inputs using the - mean and variance of its moving statistics, learned during training. - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as input. - - """ - - def __init__(self, - axis=-1, - momentum=0.99, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - moving_mean_initializer='zeros', - moving_variance_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - if kwargs.pop('fused', None): - raise ValueError( - '`fused` argument cannot be True for SyncBatchNormalization.') - - # Currently we only support aggregating over the global batch size. - super().__init__( - axis=axis, - momentum=momentum, - epsilon=epsilon, - center=center, - scale=scale, - beta_initializer=beta_initializer, - gamma_initializer=gamma_initializer, - moving_mean_initializer=moving_mean_initializer, - moving_variance_initializer=moving_variance_initializer, - beta_regularizer=beta_regularizer, - gamma_regularizer=gamma_regularizer, - beta_constraint=beta_constraint, - gamma_constraint=gamma_constraint, - fused=False, - **kwargs) - - def _calculate_mean_and_var(self, x, axes, keep_dims): - - with backend.name_scope('moments'): - # The dynamic range of fp16 is too limited to support the collection of - # sufficient statistics. As a workaround we simply perform the operations - # on 32-bit floats before converting the mean and variance back to fp16 - y = tf.cast(x, tf.float32) if x.dtype == tf.float16 else x - replica_ctx = tf.distribute.get_replica_context() - if replica_ctx: - local_sum = tf.reduce_sum(y, axis=axes, keepdims=True) - local_squared_sum = tf.reduce_sum(tf.square(y), axis=axes, - keepdims=True) - batch_size = tf.cast(tf.shape(y)[axes[0]], - tf.float32) - # TODO(b/163099951): batch the all-reduces once we sort out the ordering - # issue for NCCL. We don't have a mechanism to launch NCCL in the same - # order in each replica nowadays, so we limit NCCL to batch all-reduces. - y_sum = replica_ctx.all_reduce(tf.distribute.ReduceOp.SUM, local_sum) - y_squared_sum = replica_ctx.all_reduce(tf.distribute.ReduceOp.SUM, - local_squared_sum) - global_batch_size = replica_ctx.all_reduce(tf.distribute.ReduceOp.SUM, - batch_size) - - axes_vals = [(tf.shape(y))[axes[i]] - for i in range(1, len(axes))] - multiplier = tf.cast(tf.reduce_prod(axes_vals), - tf.float32) - multiplier = multiplier * global_batch_size - - mean = y_sum / multiplier - y_squared_mean = y_squared_sum / multiplier - # var = E(x^2) - E(x)^2 - variance = y_squared_mean - tf.square(mean) - else: - # Compute true mean while keeping the dims for proper broadcasting. - mean = tf.reduce_mean(y, axes, keepdims=True, name='mean') - # sample variance, not unbiased variance - # Note: stop_gradient does not change the gradient that gets - # backpropagated to the mean from the variance calculation, - # because that gradient is zero - variance = tf.reduce_mean( - tf.math.squared_difference(y, tf.stop_gradient(mean)), - axes, - keepdims=True, - name='variance') - if not keep_dims: - mean = tf.squeeze(mean, axes) - variance = tf.squeeze(variance, axes) - if x.dtype == tf.float16: - return (tf.cast(mean, tf.float16), - tf.cast(variance, tf.float16)) - else: - return (mean, variance) - - -@keras_export('keras.layers.BatchNormalization', v1=[]) + beta = backend.constant( + 0.0, dtype=self._param_dtype, shape=self._param_shape + ) + if self.scale: + gamma = self.gamma + else: + gamma = backend.constant( + 1.0, dtype=self._param_dtype, shape=self._param_shape + ) + + # TODO(b/129279393): Support zero batch input in non + # DistributionStrategy code as well. + if self._support_zero_size_input(): + # Keras assumes that batch dimension is the first dimension for + # Batch Normalization. + input_batch_size = tf.shape(inputs)[0] + else: + input_batch_size = None + + # TODO(rmlarsen): Support using fused avg updates for non-eager + # execution after fixing graph pattern matching and enabling + # fused_batch_norm to take exponential_avg_factor as a tensor input. + use_fused_avg_updates = ( + tf.compat.v1.executing_eagerly_outside_functions() + and isinstance(self.momentum, (float, int)) + and get_enclosing_xla_context() is None + ) + if use_fused_avg_updates: + exponential_avg_factor = 1.0 - self.momentum + else: + exponential_avg_factor = None + + def _maybe_add_or_remove_bessels_correction(variance, remove=True): + r"""Add or remove Bessel's correction.""" + # Removes Bessel's correction if remove == True, adds it otherwise. + # This is to be consistent with non-fused batch norm. Note that the + # variance computed by fused batch norm is with Bessel's correction. + # This is only used in legacy V1 batch norm tests. + if self._bessels_correction_test_only: + return variance + sample_size = tf.cast( + tf.size(inputs) / tf.size(variance), variance.dtype + ) + if remove: + factor = ( + sample_size - tf.cast(1.0, variance.dtype) + ) / sample_size + else: + factor = sample_size / ( + sample_size - tf.cast(1.0, variance.dtype) + ) + return variance * factor + + def _fused_batch_norm_training(): + return tf.compat.v1.nn.fused_batch_norm( + inputs, + gamma, + beta, + mean=self.moving_mean, + variance=_maybe_add_or_remove_bessels_correction( + self.moving_variance, remove=False + ), + epsilon=self.epsilon, + is_training=True, + data_format=self._data_format, + exponential_avg_factor=exponential_avg_factor, + ) + + def _fused_batch_norm_inference(): + return tf.compat.v1.nn.fused_batch_norm( + inputs, + gamma, + beta, + mean=self.moving_mean, + variance=self.moving_variance, + epsilon=self.epsilon, + is_training=False, + data_format=self._data_format, + ) + + output, mean, variance = control_flow_util.smart_cond( + training, _fused_batch_norm_training, _fused_batch_norm_inference + ) + variance = _maybe_add_or_remove_bessels_correction( + variance, remove=True + ) + + training_value = control_flow_util.constant_value(training) + if training_value or training_value is None: + if not use_fused_avg_updates: + if training_value is None: + momentum = control_flow_util.smart_cond( + training, lambda: self.momentum, lambda: 1.0 + ) + else: + momentum = tf.convert_to_tensor(self.momentum) + + def mean_update(): + """Update self.moving_mean with the most recent data point.""" + if use_fused_avg_updates: + if input_batch_size is not None: + new_mean = control_flow_util.smart_cond( + input_batch_size > 0, + lambda: mean, + lambda: self.moving_mean, + ) + else: + new_mean = mean + return self._assign_new_value(self.moving_mean, new_mean) + else: + return self._assign_moving_average( + self.moving_mean, mean, momentum, input_batch_size + ) + + def variance_update(): + """Update self.moving_variance with the most recent data + point.""" + if use_fused_avg_updates: + if input_batch_size is not None: + new_variance = control_flow_util.smart_cond( + input_batch_size > 0, + lambda: variance, + lambda: self.moving_variance, + ) + else: + new_variance = variance + return self._assign_new_value( + self.moving_variance, new_variance + ) + else: + return self._assign_moving_average( + self.moving_variance, + variance, + momentum, + input_batch_size, + ) + + self.add_update(mean_update) + self.add_update(variance_update) + + return output + + def _renorm_correction_and_moments( + self, mean, variance, training, inputs_size + ): + """Returns the correction and update values for renorm.""" + stddev = tf.sqrt(variance + self.epsilon) + # Compute the average mean and standard deviation, as if they were + # initialized with this batch's moments. + renorm_mean = self.renorm_mean + # Avoid divide by zero early on in training. + renorm_stddev = tf.maximum(self.renorm_stddev, tf.sqrt(self.epsilon)) + # Compute the corrections for batch renorm. + r = stddev / renorm_stddev + d = (mean - renorm_mean) / renorm_stddev + # Ensure the corrections use pre-update moving averages. + with tf.control_dependencies([r, d]): + mean = tf.identity(mean) + stddev = tf.identity(stddev) + rmin, rmax, dmax = [ + self.renorm_clipping.get(key) for key in ["rmin", "rmax", "dmax"] + ] + if rmin is not None: + r = tf.maximum(r, rmin) + if rmax is not None: + r = tf.minimum(r, rmax) + if dmax is not None: + d = tf.maximum(d, -dmax) + d = tf.minimum(d, dmax) + # When not training, use r=1, d=0. + r = control_flow_util.smart_cond( + training, lambda: r, lambda: tf.ones_like(r) + ) + d = control_flow_util.smart_cond( + training, lambda: d, lambda: tf.zeros_like(d) + ) + + def _update_renorm_variable(var, value, inputs_size): + """Updates a moving average and weight, returns the unbiased + value.""" + value = tf.identity(value) + + def _do_update(): + """Updates the var, returns the updated value.""" + new_var = self._assign_moving_average( + var, value, self.renorm_momentum, inputs_size + ) + return new_var + + def _fake_update(): + return tf.identity(var) + + return control_flow_util.smart_cond( + training, _do_update, _fake_update + ) + + # TODO(yuefengz): colocate the operations + update_new_mean = _update_renorm_variable( + self.renorm_mean, mean, inputs_size + ) + update_new_stddev = _update_renorm_variable( + self.renorm_stddev, stddev, inputs_size + ) + + # Update the inference mode moving averages with the batch value. + with tf.control_dependencies([update_new_mean, update_new_stddev]): + out_mean = tf.identity(mean) + out_variance = tf.identity(variance) + + return (r, d, out_mean, out_variance) + + def _calculate_mean_and_var( + self, inputs, reduction_axes, keep_dims, mask=None + ): + if self.synchronized: + return self._sync_calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask=mask + ) + return self._no_sync_calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask=mask + ) + + def _no_sync_calculate_mean_and_var( + self, inputs, reduction_axes, keep_dims, mask=None + ): + if mask is None: + return tf.nn.moments(inputs, reduction_axes, keepdims=keep_dims) + else: + mask_weights = tf.cast( + mask, self.compute_dtype, name="mask_weights" + ) + mask_weights = tf.expand_dims( + mask_weights, axis=-1, name="mask_weights_broadcasted" + ) + return tf.nn.weighted_moments( + inputs, + axes=reduction_axes, + frequency_weights=mask_weights, + keepdims=keep_dims, + ) + + def _sync_calculate_mean_and_var( + self, x, reduction_axes, keep_dims, mask=None + ): + with backend.name_scope("moments"): + # The dynamic range of fp16 is too limited to support the collection + # of sufficient statistics. As a workaround we simply perform the + # operations on 32-bit floats before converting the mean and + # variance back to fp16 + y = tf.cast(x, tf.float32) if x.dtype == tf.float16 else x + replica_ctx = tf.distribute.get_replica_context() + + if not replica_ctx: + return self._no_sync_calculate_mean_and_var( + x, reduction_axes, keep_dims, mask=mask + ) + + if mask is not None: + mask_weights = tf.cast(mask, y.dtype, name="mask_weights") + mask_weights = tf.expand_dims( + mask_weights, axis=-1, name="mask_weights_broadcasted" + ) + y *= mask_weights + local_count = tf.broadcast_to( + mask_weights, tf.shape(y), name="count" + ) + else: + local_count = tf.ones_like(y, name="count") + + local_sum = tf.reduce_sum(y, axis=reduction_axes, keepdims=True) + local_squared_sum = tf.reduce_sum( + tf.square(y), axis=reduction_axes, keepdims=True + ) + local_count = tf.reduce_sum( + local_count, axis=reduction_axes, keepdims=True + ) + + # TODO(b/163099951): batch the all-reduces once we sort out the + # ordering issue for NCCL. We don't have a mechanism to launch + # NCCL in the same order in each replica nowadays, so we limit + # NCCL to batch all-reduces. + y_sum = replica_ctx.all_reduce( + tf.distribute.ReduceOp.SUM, local_sum + ) + y_squared_sum = replica_ctx.all_reduce( + tf.distribute.ReduceOp.SUM, local_squared_sum + ) + count_sum = replica_ctx.all_reduce( + tf.distribute.ReduceOp.SUM, local_count + ) + + mean = y_sum / count_sum + y_squared_mean = y_squared_sum / count_sum + # var = E(x^2) - E(x)^2 + variance = y_squared_mean - tf.square(mean) + if not keep_dims: + mean = tf.squeeze(mean, reduction_axes) + variance = tf.squeeze(variance, reduction_axes) + if x.dtype == tf.float16: + return ( + tf.cast(mean, tf.float16), + tf.cast(variance, tf.float16), + ) + else: + return (mean, variance) + + def _dtensor_calculate_mean_and_var( + self, inputs, reduction_axes, keep_dims, mask=None + ): + if self.synchronized: + return self._dtensor_sync_calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask=mask + ) + return self._dtensor_no_sync_calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask=mask + ) + + def _dtensor_no_sync_calculate_mean_and_var( + self, inputs, reduction_axes, keep_dims, mask=None + ): + replica_tensor = _expand_tensor_with_local_replica_group(inputs) + local_batch_size = tf.shape(replica_tensor)[1] + + # Since we added a new axis in the beginning, all the value in + # reduction_axes need to be incremented by 1. + updated_reduction_axes = [n + 1 for n in reduction_axes] + + if mask is None: + mean, var = tf.nn.moments( + replica_tensor, updated_reduction_axes, keepdims=keep_dims + ) + else: + mask_weights = tf.cast( + mask, self.compute_dtype, name="mask_weights" + ) + mask_weights = tf.expand_dims( + mask_weights, axis=-1, name="mask_weights_broadcasted" + ) + mask_weights = _expand_tensor_with_local_replica_group(mask_weights) + mean, var = tf.nn.weighted_moments( + replica_tensor, + axes=updated_reduction_axes, + frequency_weights=mask_weights, + keepdims=keep_dims, + ) + # Also note that the mean/var we have here will have an extra dim in + # axis 0, which is represented for num local replica. Down the + # stream, the mean/var will be used to update the moving_mean/var + # and also normalize the inputs. To make the shape match, we will + # expand the tensor shape from [num_replica, x, y] to + # [batch_size, x, y] so that it can be properly used for + # normalization. When it reaches the mean/var update, a separate + # logic will be there to reduce_mean the value based on the batch + # dim. + mean = tf.repeat(mean, local_batch_size, axis=0) + var = tf.repeat(var, local_batch_size, axis=0) + if not keep_dims: + # We need to fill the reduced dims so that the mean/var can be + # properly broadcast to the input shapes. In the example above, + # the original reduction_axes is [0, 1]. We ignore the first 0 + # (batch dim) here since we already expand and use it as num_replica + for dim in reduction_axes[1:]: + mean = tf.expand_dims(mean, axis=dim) + var = tf.expand_dims(var, axis=dim) + return mean, var + + def _dtensor_sync_calculate_mean_and_var( + self, inputs, reduction_axes, keep_dims, mask=None + ): + # In the DTensor sync BN, since the input tensor is already in global + # context, we just need to use the normal moments/weighted_moments + # to calculate mean/var, which is same as the non-sync BN in the normal + # mode. + return self._no_sync_calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask + ) + + def _moments(self, inputs, reduction_axes, keep_dims, mask=None): + if utils.running_with_dtensor_strategy(): + mean, variance = self._dtensor_calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask=mask + ) + else: + mean, variance = self._calculate_mean_and_var( + inputs, reduction_axes, keep_dims, mask=mask + ) + # TODO(b/129279393): Support zero batch input in non + # DistributionStrategy code as well. + if self._support_zero_size_input(): + input_batch_size = tf.shape(inputs)[0] + mean = tf.where( + input_batch_size > 0, mean, backend.zeros_like(mean) + ) + variance = tf.where( + input_batch_size > 0, variance, backend.zeros_like(variance) + ) + return mean, variance + + def _get_training_value(self, training=None): + if training is None: + training = backend.learning_phase() + if self._USE_V2_BEHAVIOR: + if isinstance(training, int): + training = bool(training) + if not self.trainable: + # When the layer is not trainable, it overrides the value passed + # from model. + training = False + return training + + +@keras_export("keras.layers.BatchNormalization", v1=[]) class BatchNormalization(BatchNormalizationBase): - """Layer that normalizes its inputs. - - Batch normalization applies a transformation that maintains the mean output - close to 0 and the output standard deviation close to 1. - - Importantly, batch normalization works differently during training and - during inference. - - **During training** (i.e. when using `fit()` or when calling the layer/model - with the argument `training=True`), the layer normalizes its output using - the mean and standard deviation of the current batch of inputs. That is to - say, for each channel being normalized, the layer returns - `gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where: - - - `epsilon` is small constant (configurable as part of the constructor - arguments) - - `gamma` is a learned scaling factor (initialized as 1), which - can be disabled by passing `scale=False` to the constructor. - - `beta` is a learned offset factor (initialized as 0), which - can be disabled by passing `center=False` to the constructor. - - **During inference** (i.e. when using `evaluate()` or `predict()` or when - calling the layer/model with the argument `training=False` (which is the - default), the layer normalizes its output using a moving average of the - mean and standard deviation of the batches it has seen during training. That - is to say, it returns - `gamma * (batch - self.moving_mean) / sqrt(self.moving_var + epsilon) + beta`. - - `self.moving_mean` and `self.moving_var` are non-trainable variables that - are updated each time the layer in called in training mode, as such: - - - `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)` - - `moving_var = moving_var * momentum + var(batch) * (1 - momentum)` - - As such, the layer will only normalize its inputs during inference - *after having been trained on data that has similar statistics as the - inference data*. - - Args: - axis: Integer, the axis that should be normalized (typically the features - axis). For instance, after a `Conv2D` layer with - `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. - momentum: Momentum for the moving average. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. If False, `beta` - is ignored. - scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the - next layer is linear (also e.g. `nn.relu`), this can be disabled since the - scaling will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. - - `training=True`: The layer will normalize its inputs using the mean and - variance of the current batch of inputs. - - `training=False`: The layer will normalize its inputs using the mean and - variance of its moving statistics, learned during training. - - Input shape: - Arbitrary. Use the keyword argument `input_shape` (tuple of - integers, does not include the samples axis) when using this layer as the - first layer in a model. - - Output shape: - Same shape as input. - - Reference: - - [Ioffe and Szegedy, 2015](https://arxiv.org/abs/1502.03167). - - **About setting `layer.trainable = False` on a `BatchNormalization` layer:** - - The meaning of setting `layer.trainable = False` is to freeze the layer, - i.e. its internal state will not change during training: - its trainable weights will not be updated - during `fit()` or `train_on_batch()`, and its state updates will not be run. - - Usually, this does not necessarily mean that the layer is run in inference - mode (which is normally controlled by the `training` argument that can - be passed when calling a layer). "Frozen state" and "inference mode" - are two separate concepts. - - However, in the case of the `BatchNormalization` layer, **setting - `trainable = False` on the layer means that the layer will be - subsequently run in inference mode** (meaning that it will use - the moving mean and the moving variance to normalize the current batch, - rather than using the mean and variance of the current batch). - - This behavior has been introduced in TensorFlow 2.0, in order - to enable `layer.trainable = False` to produce the most commonly - expected behavior in the convnet fine-tuning use case. - - Note that: - - Setting `trainable` on an model containing other layers will - recursively set the `trainable` value of all inner layers. - - If the value of the `trainable` - attribute is changed after calling `compile()` on a model, - the new value doesn't take effect for this model - until `compile()` is called again. - """ - _USE_V2_BEHAVIOR = True - - @utils.allow_initializer_layout - def __init__(self, - axis=-1, - momentum=0.99, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - moving_mean_initializer='zeros', - moving_variance_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - super().__init__( - axis=axis, - momentum=momentum, - epsilon=epsilon, - center=center, - scale=scale, - beta_initializer=beta_initializer, - gamma_initializer=gamma_initializer, - moving_mean_initializer=moving_mean_initializer, - moving_variance_initializer=moving_variance_initializer, - beta_regularizer=beta_regularizer, - gamma_regularizer=gamma_regularizer, - beta_constraint=beta_constraint, - gamma_constraint=gamma_constraint, - **kwargs) + """Layer that normalizes its inputs. + + Batch normalization applies a transformation that maintains the mean output + close to 0 and the output standard deviation close to 1. + + Importantly, batch normalization works differently during training and + during inference. + + **During training** (i.e. when using `fit()` or when calling the layer/model + with the argument `training=True`), the layer normalizes its output using + the mean and standard deviation of the current batch of inputs. That is to + say, for each channel being normalized, the layer returns + `gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where: + + - `epsilon` is small constant (configurable as part of the constructor + arguments) + - `gamma` is a learned scaling factor (initialized as 1), which + can be disabled by passing `scale=False` to the constructor. + - `beta` is a learned offset factor (initialized as 0), which + can be disabled by passing `center=False` to the constructor. + + **During inference** (i.e. when using `evaluate()` or `predict()` or when + calling the layer/model with the argument `training=False` (which is the + default), the layer normalizes its output using a moving average of the + mean and standard deviation of the batches it has seen during training. That + is to say, it returns + `gamma * (batch - self.moving_mean) / sqrt(self.moving_var+epsilon) + beta`. + + `self.moving_mean` and `self.moving_var` are non-trainable variables that + are updated each time the layer in called in training mode, as such: + + - `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)` + - `moving_var = moving_var * momentum + var(batch) * (1 - momentum)` + + As such, the layer will only normalize its inputs during inference + *after having been trained on data that has similar statistics as the + inference data*. + + When `synchronized=True` is set and if this layer is used within a + `tf.distribute` strategy, there will be an `allreduce` call + to aggregate batch statistics across all replicas at every + training step. Setting `synchronized` has no impact when the model is + trained without specifying any distribution strategy. + + Example usage: + + ```python + strategy = tf.distribute.MirroredStrategy() + + with strategy.scope(): + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(16)) + model.add(tf.keras.layers.BatchNormalization(synchronized=True)) + ``` + + Args: + axis: Integer, the axis that should be normalized (typically the features + axis). For instance, after a `Conv2D` layer with + `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. + momentum: Momentum for the moving average. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is not used. When + the next layer is linear (also e.g. `nn.relu`), this can be disabled + since the scaling will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + moving_mean_initializer: Initializer for the moving mean. + moving_variance_initializer: Initializer for the moving variance. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + synchronized: If True, synchronizes the global batch statistics (mean and + variance) for the layer across all devices at each training step in a + distributed training strategy. If False, each replica uses its own + local batch statistics. Only relevant when used inside a + `tf.distribute` strategy. + + Call arguments: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. + - `training=True`: The layer will normalize its inputs using the mean + and variance of the current batch of inputs. + - `training=False`: The layer will normalize its inputs using the mean + and variance of its moving statistics, learned during training. + + Input shape: + Arbitrary. Use the keyword argument `input_shape` (tuple of + integers, does not include the samples axis) when using this layer as the + first layer in a model. + + Output shape: + Same shape as input. + + Reference: + - [Ioffe and Szegedy, 2015](https://arxiv.org/abs/1502.03167). + + **About setting `layer.trainable = False` on a `BatchNormalization` layer:** + + The meaning of setting `layer.trainable = False` is to freeze the layer, + i.e. its internal state will not change during training: + its trainable weights will not be updated + during `fit()` or `train_on_batch()`, and its state updates will not be run. + + Usually, this does not necessarily mean that the layer is run in inference + mode (which is normally controlled by the `training` argument that can + be passed when calling a layer). "Frozen state" and "inference mode" + are two separate concepts. + + However, in the case of the `BatchNormalization` layer, **setting + `trainable = False` on the layer means that the layer will be + subsequently run in inference mode** (meaning that it will use + the moving mean and the moving variance to normalize the current batch, + rather than using the mean and variance of the current batch). + + This behavior has been introduced in TensorFlow 2.0, in order + to enable `layer.trainable = False` to produce the most commonly + expected behavior in the convnet fine-tuning use case. + + Note that: + - Setting `trainable` on an model containing other layers will + recursively set the `trainable` value of all inner layers. + - If the value of the `trainable` + attribute is changed after calling `compile()` on a model, + the new value doesn't take effect for this model + until `compile()` is called again. + """ + + _USE_V2_BEHAVIOR = True + + @utils.allow_initializer_layout + def __init__( + self, + axis=-1, + momentum=0.99, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + moving_mean_initializer="zeros", + moving_variance_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + synchronized=False, + **kwargs, + ): + # Currently we only support aggregating over the global batch size. + super().__init__( + axis=axis, + momentum=momentum, + epsilon=epsilon, + center=center, + scale=scale, + beta_initializer=beta_initializer, + gamma_initializer=gamma_initializer, + moving_mean_initializer=moving_mean_initializer, + moving_variance_initializer=moving_variance_initializer, + beta_regularizer=beta_regularizer, + gamma_regularizer=gamma_regularizer, + beta_constraint=beta_constraint, + gamma_constraint=gamma_constraint, + synchronized=synchronized, + **kwargs, + ) + + +@keras_export("keras.layers.experimental.SyncBatchNormalization", v1=[]) +@deprecation.deprecated_endpoints( + "keras.layers.experimental.SyncBatchNormalization" +) +class SyncBatchNormalization(BatchNormalizationBase): + """Deprecated. Please use `tf.keras.layers.BatchNormalization` instead. + + Caution: `tf.keras.layers.experimental.SyncBatchNormalization` endpoint is + deprecated and will be removed in a future release. Please use + `tf.keras.layers.BatchNormalization` with parameter `synchronized` + set to True + """ + + def __init__( + self, + axis=-1, + momentum=0.99, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + moving_mean_initializer="zeros", + moving_variance_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs, + ): + warning = ( + "`tf.keras.layers.experimental.SyncBatchNormalization` endpoint is " + "deprecated and will be removed in a future release. Please use " + "`tf.keras.layers.BatchNormalization` with parameter " + "`synchronized` set to True." + ) + logging.log_first_n(logging.WARN, warning, 1) + super().__init__( + axis=axis, + momentum=momentum, + epsilon=epsilon, + center=center, + scale=scale, + beta_initializer=beta_initializer, + gamma_initializer=gamma_initializer, + moving_mean_initializer=moving_mean_initializer, + moving_variance_initializer=moving_variance_initializer, + beta_regularizer=beta_regularizer, + gamma_regularizer=gamma_regularizer, + beta_constraint=beta_constraint, + gamma_constraint=gamma_constraint, + synchronized=True, + **kwargs, + ) + + +def _expand_tensor_with_local_replica_group(inputs): + """Reshape the input tensor to have an extra dimension of replica group. + + Under the DTensor usage, the normal batch norm still need to perform on + a local batch size, which mean we can't directly do mean/var on a global + tensor. In order to do a local mean/var, we have to add a new dimention to + the tensor, so that the ops will not cross the replica boundary. E.g, + a global tensor with shape [8, x, y] and has 2 local replica, the output of + this will be [2, 4, x, y], where the first dim is for num of replica, and + the second dim is for the local batch size. The follow ops can do reduces + among the local batch dimension. + + Note that this function should only be used under DTensor based strategy, + and it will use the current strategy in the context to get the number of + replica. + + Args: + inputs: Tensor with shape [global_batch_size, ...] + + Returns: + Tensor with shape [num_replica, local_batch_size, ...] + """ + # TODO(b/272382109): Implement this an an Op. + input_shape = tf.shape(inputs) + global_batch_size = input_shape[0] + num_replica = tf.distribute.get_strategy().num_replicas_in_sync + local_batch_size = global_batch_size // num_replica + replica_shape = tf.stack([num_replica, local_batch_size]) + replica_shape = tf.concat([replica_shape, input_shape[1:]], axis=0) + return tf.reshape(inputs, replica_shape) + + +def _raise_for_non_sync_bn_with_renorm_and_dtensor_strategy( + synchronized, training, renorm +): + if ( + utils.running_with_dtensor_strategy() + and not synchronized + and training == True + and renorm + ): + raise NotImplementedError( + "Renorm for BatchNormalization under DTensor based distribution " + "strategy is not supported at the moment. Please file a feature " + "request if this is blocking your adoption." + ) diff --git a/keras/layers/normalization/batch_normalization_dtensor_test.py b/keras/layers/normalization/batch_normalization_dtensor_test.py new file mode 100644 index 000000000000..fffc914a672d --- /dev/null +++ b/keras/layers/normalization/batch_normalization_dtensor_test.py @@ -0,0 +1,157 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for normalization layers under DTensor context.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.dtensor import test_util +from keras.dtensor import utils +from keras.layers.normalization import batch_normalization +from keras.testing_infra import test_utils + +# isort: off +# Import the MirroredStrategy that is backed by DTensor +# It is not a public API yet, so we do a private symbol import for now. +from tensorflow.python.distribute.experimental import ( + mirrored_strategy as dtensor_mirrored_strategy, +) + + +@test_utils.run_v2_only +class BatchNormalizationDTensorTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + + global_ids = test_util.create_device_ids_array((2,)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": tf.experimental.dtensor.Mesh( + ["batch"], + global_ids, + local_device_ids, + test_util.create_device_list((2,), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + + def test_strategy_backed_by_dtensor(self): + strategy = dtensor_mirrored_strategy.MirroredStrategy(mesh=self.mesh) + + with strategy.scope(): + self.assertTrue(utils.running_with_dtensor_strategy()) + + self.assertFalse(utils.running_with_dtensor_strategy()) + + normal_mirrored_strategy = tf.distribute.MirroredStrategy( + ["CPU:0", "CPU:1"] + ) + self.assertFalse(utils.running_with_dtensor_strategy()) + with normal_mirrored_strategy.scope(): + self.assertFalse(utils.running_with_dtensor_strategy()) + + @parameterized.product( + training=[True, False], + synchronized=[True, False], + renorm=[True, False], + use_mask=[True, False], + ) + def test_batch_normalization_with_dtensor_strategy( + self, training, synchronized, renorm, use_mask + ): + num_replica = 2 + local_batch_size = 4 + global_batch_size = num_replica * local_batch_size + feature_shape = [3, 5] + global_inputs = tf.random.uniform( + shape=[global_batch_size, *feature_shape], dtype=tf.float32 + ) + replica_inputs = tf.reshape( + global_inputs, [num_replica, local_batch_size, *feature_shape] + ) + if use_mask: + mask = tf.concat( + [ + tf.ones(shape=[global_batch_size, 2]), + tf.zeros(shape=[global_batch_size, 1]), + ], + axis=-1, + ) + mask = tf.cast(mask, tf.bool) + mask = tf.reshape(mask, [num_replica, local_batch_size, 3]) + + def value_fn(value_context): + return { + "inputs": replica_inputs[ + value_context.replica_id_in_sync_group + ], + "mask": mask[value_context.replica_id_in_sync_group], + } + + else: + + def value_fn(value_context): + return replica_inputs[value_context.replica_id_in_sync_group] + + normal_strategy = tf.distribute.MirroredStrategy(["CPU:0", "CPU:1"]) + dtensor_strategy = dtensor_mirrored_strategy.MirroredStrategy( + mesh=self.mesh + ) + init_kwargs = {"synchronized": synchronized, "renorm": renorm} + bn_layer_0 = batch_normalization.BatchNormalization(**init_kwargs) + bn_layer_1 = batch_normalization.BatchNormalization(**init_kwargs) + run_kwargs = {"training": training} + + normal_strategy_result = self._run_bn_training_with_strategy( + normal_strategy, value_fn, bn_layer_0, run_kwargs + ) + if training and not synchronized and renorm: + # This is an unsupported case at the moment. + with self.assertRaisesRegexp(NotImplementedError, "not supported"): + self._run_bn_training_with_strategy( + dtensor_strategy, value_fn, bn_layer_1, run_kwargs + ) + return + else: + dtensor_strategy_result = self._run_bn_training_with_strategy( + dtensor_strategy, value_fn, bn_layer_1, run_kwargs + ) + self.assertAllClose( + normal_strategy_result.values, dtensor_strategy_result.values + ) + self.assertAllClose(bn_layer_0.moving_mean, bn_layer_1.moving_mean) + self.assertAllClose( + bn_layer_0.moving_variance, bn_layer_1.moving_variance + ) + + def _run_bn_training_with_strategy( + self, strategy, value_fn, bn_layer, run_kwargs + ): + @tf.function + def run_fn(inputs): + if isinstance(inputs, dict): + return bn_layer(**inputs, **run_kwargs) + return bn_layer(inputs, **run_kwargs) + + distributed_inputs = ( + strategy.experimental_distribute_values_from_function(value_fn) + ) + + return strategy.run(run_fn, args=(distributed_inputs,)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/normalization/batch_normalization_test.py b/keras/layers/normalization/batch_normalization_test.py index 885e9f30afbc..80ea097ca421 100644 --- a/keras/layers/normalization/batch_normalization_test.py +++ b/keras/layers/normalization/batch_normalization_test.py @@ -14,10 +14,9 @@ # ============================================================================== """Tests for normalization layers.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras from keras.layers.normalization import batch_normalization @@ -27,511 +26,648 @@ class BatchNormalizationTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes - def test_basic_batchnorm(self): - test_utils.layer_test( - keras.layers.BatchNormalization, - kwargs={ - 'momentum': 0.9, - 'epsilon': 0.1, - 'gamma_regularizer': keras.regularizers.l2(0.01), - 'beta_regularizer': keras.regularizers.l2(0.01) - }, - input_shape=(3, 4, 2)) - test_utils.layer_test( - keras.layers.BatchNormalization, - kwargs={ - 'gamma_initializer': 'ones', - 'beta_initializer': 'ones', - 'moving_mean_initializer': 'zeros', - 'moving_variance_initializer': 'ones' - }, - input_shape=(3, 4, 2)) - test_utils.layer_test( - keras.layers.BatchNormalization, - kwargs={'scale': False, - 'center': False}, - input_shape=(3, 3)) - test_utils.layer_test( - keras.layers.BatchNormalization, - kwargs={ - 'gamma_initializer': 'ones', - 'beta_initializer': 'ones', - 'moving_mean_initializer': 'zeros', - 'moving_variance_initializer': 'ones' - }, - input_shape=(3, 2, 4, 2)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_batchnorm_weights(self): - layer = keras.layers.BatchNormalization(scale=False, center=False) - layer.build((None, 3, 4)) - self.assertEqual(len(layer.trainable_weights), 0) - self.assertEqual(len(layer.weights), 2) - - layer = keras.layers.BatchNormalization() - layer.build((None, 3, 4)) - self.assertEqual(len(layer.trainable_weights), 2) - self.assertEqual(len(layer.weights), 4) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_batchnorm_regularization(self): - layer = keras.layers.BatchNormalization( - gamma_regularizer='l1', beta_regularizer='l1') - layer.build((None, 3, 4)) - self.assertEqual(len(layer.losses), 2) - max_norm = keras.constraints.max_norm - layer = keras.layers.BatchNormalization( - gamma_constraint=max_norm, beta_constraint=max_norm) - layer.build((None, 3, 4)) - self.assertEqual(layer.gamma.constraint, max_norm) - self.assertEqual(layer.beta.constraint, max_norm) - - @test_combinations.run_all_keras_modes - def test_batchnorm_convnet(self): - if tf.test.is_gpu_available(cuda_only=True): - with self.session(): + @test_combinations.run_all_keras_modes + def test_basic_batchnorm(self): + test_utils.layer_test( + keras.layers.BatchNormalization, + kwargs={ + "momentum": 0.9, + "epsilon": 0.1, + "gamma_regularizer": keras.regularizers.l2(0.01), + "beta_regularizer": keras.regularizers.l2(0.01), + }, + input_shape=(3, 4, 2), + ) + test_utils.layer_test( + keras.layers.BatchNormalization, + kwargs={ + "gamma_initializer": "ones", + "beta_initializer": "ones", + "moving_mean_initializer": "zeros", + "moving_variance_initializer": "ones", + }, + input_shape=(3, 4, 2), + ) + test_utils.layer_test( + keras.layers.BatchNormalization, + kwargs={"scale": False, "center": False}, + input_shape=(3, 3), + ) + test_utils.layer_test( + keras.layers.BatchNormalization, + kwargs={ + "gamma_initializer": "ones", + "beta_initializer": "ones", + "moving_mean_initializer": "zeros", + "moving_variance_initializer": "ones", + }, + input_shape=(3, 2, 4, 2), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_batchnorm_weights(self): + layer = keras.layers.BatchNormalization(scale=False, center=False) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.weights), 2) + + layer = keras.layers.BatchNormalization() + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 2) + self.assertEqual(len(layer.weights), 4) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_batchnorm_regularization(self): + layer = keras.layers.BatchNormalization( + gamma_regularizer="l1", beta_regularizer="l1" + ) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.losses), 2) + max_norm = keras.constraints.max_norm + layer = keras.layers.BatchNormalization( + gamma_constraint=max_norm, beta_constraint=max_norm + ) + layer.build((None, 3, 4)) + self.assertEqual(layer.gamma.constraint, max_norm) + self.assertEqual(layer.beta.constraint, max_norm) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_batchnorm_sync_fused_error(self): + with self.assertRaises(ValueError): + _ = batch_normalization.BatchNormalization( + synchronized=True, fused=True + ) + + def _test_batchnorm_convnet(self, synchronized=False): + if tf.test.is_gpu_available(cuda_only=True): + with self.session(): + model = keras.models.Sequential() + norm = keras.layers.BatchNormalization( + axis=1, + input_shape=(3, 4, 4), + momentum=0.8, + synchronized=synchronized, + ) + model.add(norm) + model.compile( + loss="mse", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) + model.fit(x, x, epochs=4, verbose=0) + out = model.predict(x) + out -= np.reshape(keras.backend.eval(norm.beta), (1, 3, 1, 1)) + out /= np.reshape(keras.backend.eval(norm.gamma), (1, 3, 1, 1)) + + np.testing.assert_allclose( + np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1 + ) + np.testing.assert_allclose( + np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1 + ) + + @test_combinations.run_all_keras_modes + def test_batchnorm_convnet(self): + self._test_batchnorm_convnet(synchronized=False) + + @test_combinations.run_all_keras_modes + def test_batchnorm_convnet_synchronized(self): + self._test_batchnorm_convnet(synchronized=True) + + @test_combinations.run_all_keras_modes + def test_batchnorm_convnet_channel_last(self): model = keras.models.Sequential() norm = keras.layers.BatchNormalization( - axis=1, input_shape=(3, 4, 4), momentum=0.8) + axis=-1, input_shape=(4, 4, 3), momentum=0.8 + ) model.add(norm) model.compile( - loss='mse', + loss="mse", optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) + run_eagerly=test_utils.should_run_eagerly(), + ) # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) model.fit(x, x, epochs=4, verbose=0) out = model.predict(x) - out -= np.reshape(keras.backend.eval(norm.beta), (1, 3, 1, 1)) - out /= np.reshape(keras.backend.eval(norm.gamma), (1, 3, 1, 1)) + out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) + out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) + + np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) + np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) + + @test_combinations.run_all_keras_modes + def test_batchnorm_correctness(self): + _run_batchnorm_correctness_test( + batch_normalization_v1.BatchNormalization, dtype="float32" + ) + _run_batchnorm_correctness_test( + batch_normalization.BatchNormalization, dtype="float32" + ) + _run_batchnorm_correctness_test( + batch_normalization.BatchNormalization, + dtype="float32", + synchronized=True, + ) + + @test_combinations.run_all_keras_modes + def test_batchnorm_float16(self): + _run_batchnorm_correctness_test( + batch_normalization_v1.BatchNormalization, dtype="float16" + ) + _run_batchnorm_correctness_test( + batch_normalization.BatchNormalization, dtype="float16" + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + @test_utils.enable_v2_dtype_behavior + def test_batchnorm_mixed_precision(self): + norm = keras.layers.BatchNormalization( + axis=-1, momentum=0.8, dtype="mixed_float16" + ) + x = np.random.normal(size=(10, 4, 4, 3)) + y = norm(x) + self.assertEqual(y.dtype, "float16") + self.assertEqual(norm.beta.dtype.base_dtype, "float32") + self.assertEqual(norm.gamma.dtype.base_dtype, "float32") + + x = np.arange(10 * 4 * 4 * 3).reshape((10, 4, 4, 3)) + y = norm(x) + self.assertEqual(y.dtype, "float16") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"], fused=[True, False]) + ) + @test_utils.enable_v2_dtype_behavior + def test_batchnorm_mixed_precision_does_not_overflow(self, fused): + norm = keras.layers.BatchNormalization( + axis=-1, input_shape=(1, 1, 1), fused=fused, dtype="mixed_float16" + ) + x = np.array([-1000.0, 1000.0]).reshape((2, 1, 1, 1)) + y = norm(x, training=True) + expected_y = np.array([-1.0, 1.0]).reshape((2, 1, 1, 1)) + self.assertAllClose(keras.backend.eval(y), expected_y) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_batchnorm_non_trainable_with_fit(self): + # We use the same data shape for all the data we use in this test. + # This will prevent any used tf.functions from retracing. + # This helps us verify that changing trainable and recompiling really + # does update the training loop, rather than a different data shape + # triggering a retrace. + data_shape = (100, 3) + + inputs = keras.Input((3,)) + bn = batch_normalization.BatchNormalization() + outputs = bn(inputs) + model = keras.Model(inputs, outputs) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + model.fit(np.random.random(data_shape), np.random.random(data_shape)) - np.testing.assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) - np.testing.assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) + test_data = np.random.random(data_shape) + test_targets = np.random.random(data_shape) + test_loss = model.evaluate(test_data, test_targets) - @test_combinations.run_all_keras_modes - def test_batchnorm_convnet_channel_last(self): - model = keras.models.Sequential() - norm = keras.layers.BatchNormalization( - axis=-1, input_shape=(4, 4, 3), momentum=0.8) - model.add(norm) - model.compile( - loss='mse', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) + bn.trainable = False + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + train_loss = model.train_on_batch(test_data, test_targets) + self.assertAlmostEqual(test_loss, train_loss) + + @test_combinations.run_all_keras_modes + def test_batchnorm_ignore_masked_values(self): + padded_data = np.array( + [[[1, 5], [2, 5], [0, 0], [0, 0]] for _ in range(10)], + dtype="float32", + ) # Pad value of 0 + + inputs = keras.layers.Input((None, 2)) + masked = keras.layers.Masking()(inputs) + normed = keras.layers.BatchNormalization(momentum=0.0)(masked) + model = keras.models.Model(inputs, normed) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + model.fit(x=padded_data, y=padded_data, batch_size=10, epochs=5) + + self.assertAllEqual(model.layers[2].moving_mean, [1.5, 5.0]) + self.assertAllEqual(model.layers[2].moving_variance, [0.25, 0.0]) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_sync_batchnorm_with_mask(self): + padded_data = np.array( + [[[1, 5], [2, 5], [0, 0], [0, 0]] for _ in range(10)], + dtype="float32", + ) # Pad value of 0 + strategy = tf.distribute.MirroredStrategy(["CPU:0"]) + distributed_data = strategy.distribute_datasets_from_function( + dataset_fn=lambda _: tf.data.Dataset.from_tensors( + (padded_data, padded_data) + ).repeat(), + options=None, + ) + with strategy.scope(): + inputs = keras.layers.Input((None, 2)) + masked = keras.layers.Masking()(inputs) + normed = keras.layers.BatchNormalization( + momentum=0.0, synchronized=True + )(masked) + model = keras.models.Model(inputs, normed) + # MirroredStrategy will be very slow when run eagerly. + model.compile("rmsprop", "mse", run_eagerly=False) + model.fit(distributed_data, steps_per_epoch=1, epochs=5) + + self.assertAllEqual(model.layers[2].moving_mean, [1.5, 5.0]) + self.assertAllEqual(model.layers[2].moving_variance, [0.25, 0.0]) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_eager_batchnorm_in_custom_model_call_with_tf_function(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.bn = keras.layers.BatchNormalization() + + @tf.function() + def call(self, x, training): + return self.bn(x, training=training) + + model = MyModel() + + for _ in range(10): + x = tf.constant(0.5, shape=[1, 1]) + model(x, training=True) + + # Make sure the moving mean and variance have been updated + self.assertAllClose(model.bn.moving_mean.numpy(), [0.047], atol=3e-3) + self.assertAllClose(model.bn.moving_variance.numpy(), [0.9], atol=3e-2) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_bessels_correction(self): + # Bessel's correction is currently only used in the fused case. In the + # future, it may be used in the nonfused case as well. + + x = tf.constant([0.0, 2.0], shape=[2, 1, 1, 1]) + layer = batch_normalization.BatchNormalization( + momentum=0.5, moving_variance_initializer="zeros" + ) + layer(x, training=True) + self.assertTrue(layer.fused) + # Since fused is used, Bessel's correction is used. The variance of [0, + # 2] is 2 with Bessel's correction. Since the momentum is 0.5, the + # variance is 2 * 0.5 == 1. + self.assertAllEqual(self.evaluate(layer.moving_variance), [1.0]) + + x = tf.constant([0.0, 2.0], shape=[2, 1, 1, 1, 1]) + layer = batch_normalization.BatchNormalization( + momentum=0.5, moving_variance_initializer="zeros" + ) + layer(x, training=True) + self.assertTrue(layer.fused) + # Since fused is used, Bessel's correction is used. The variance of [0, + # 2] is 2 with Bessel's correction. Since the momentum is 0.5, the + # variance is 2 * 0.5 == 1. + self.assertAllEqual(self.evaluate(layer.moving_variance), [1.0]) + + @test_combinations.run_all_keras_modes + def test_can_be_used_in_multiple_graphs(self): + norm = keras.layers.BatchNormalization( + scale=False, center=False, fused=True + ) - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) - out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) - - np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) - np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) - - @test_combinations.run_all_keras_modes - def test_batchnorm_correctness(self): - _run_batchnorm_correctness_test( - batch_normalization_v1.BatchNormalization, dtype='float32') - _run_batchnorm_correctness_test( - batch_normalization.BatchNormalization, dtype='float32') - - @test_combinations.run_all_keras_modes - def test_batchnorm_float16(self): - _run_batchnorm_correctness_test( - batch_normalization_v1.BatchNormalization, dtype='float16') - _run_batchnorm_correctness_test( - batch_normalization.BatchNormalization, dtype='float16') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - @test_utils.enable_v2_dtype_behavior - def test_batchnorm_mixed_precision(self): - norm = keras.layers.BatchNormalization( - axis=-1, - momentum=0.8, - dtype='mixed_float16') - x = np.random.normal(size=(10, 4, 4, 3)) - y = norm(x) - self.assertEqual(y.dtype, 'float16') - self.assertEqual(norm.beta.dtype.base_dtype, 'float32') - self.assertEqual(norm.gamma.dtype.base_dtype, 'float32') - - x = np.arange(10 * 4 * 4 * 3).reshape((10, 4, 4, 3)) - y = norm(x) - self.assertEqual(y.dtype, 'float16') - - @test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'], - fused=[True, False])) - @test_utils.enable_v2_dtype_behavior - def test_batchnorm_mixed_precision_does_not_overflow(self, fused): - norm = keras.layers.BatchNormalization( - axis=-1, - input_shape=(1, 1, 1), - fused=fused, - dtype='mixed_float16') - x = np.array([-1000., 1000.]).reshape((2, 1, 1, 1)) - y = norm(x, training=True) - expected_y = np.array([-1.0, 1.0]).reshape((2, 1, 1, 1)) - self.assertAllClose(keras.backend.eval(y), expected_y) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_batchnorm_non_trainable_with_fit(self): - # We use the same data shape for all the data we use in this test. - # This will prevent any used tf.functions from retracing. - # This helps us verify that changing trainable and recompiling really - # does update the training loop, rather than a different data shape - # triggering a retrace. - data_shape = (100, 3) - - inputs = keras.Input((3,)) - bn = batch_normalization.BatchNormalization() - outputs = bn(inputs) - model = keras.Model(inputs, outputs) - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(np.random.random(data_shape), np.random.random(data_shape)) + @tf.function + def fn1(x): + return norm(x, training=True) - test_data = np.random.random(data_shape) - test_targets = np.random.random(data_shape) - test_loss = model.evaluate(test_data, test_targets) + @tf.function + def fn2(x): + return norm(x, training=True) - bn.trainable = False - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - train_loss = model.train_on_batch(test_data, test_targets) - self.assertAlmostEqual(test_loss, train_loss) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_eager_batchnorm_in_custom_model_call_with_tf_function(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.bn = keras.layers.BatchNormalization() - - @tf.function() - def call(self, x, training): - return self.bn(x, training=training) - - model = MyModel() - - for _ in range(10): - x = tf.constant(0.5, shape=[1, 1]) - model(x, training=True) - - # Make sure the moving mean and variance have been updated - self.assertAllClose(model.bn.moving_mean.numpy(), [0.047], atol=3e-3) - self.assertAllClose(model.bn.moving_variance.numpy(), [0.9], atol=3e-2) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_bessels_correction(self): - # Bessel's correction is currently only used in the fused case. In the - # future, it may be used in the nonfused case as well. - - x = tf.constant([0., 2.], shape=[2, 1, 1, 1]) - layer = batch_normalization.BatchNormalization( - momentum=0.5, moving_variance_initializer='zeros') - layer(x, training=True) - self.assertTrue(layer.fused) - # Since fused is used, Bessel's correction is used. The variance of [0, 2] - # is 2 with Bessel's correction. Since the momentum is 0.5, the variance is - # 2 * 0.5 == 1. - self.assertAllEqual(self.evaluate(layer.moving_variance), [1.]) - - x = tf.constant([0., 2.], shape=[2, 1, 1, 1, 1]) - layer = batch_normalization.BatchNormalization( - momentum=0.5, moving_variance_initializer='zeros') - layer(x, training=True) - self.assertTrue(layer.fused) - # Since fused is used, Bessel's correction is used. The variance of [0, 2] - # is 2 with Bessel's correction. Since the momentum is 0.5, the variance is - # 2 * 0.5 == 1. - self.assertAllEqual(self.evaluate(layer.moving_variance), [1.]) + x = np.array([-1000.0, 1000.0]).reshape((2, 1, 1, 1)) + y = norm(fn2(fn1(x)), training=True) + expected_y = np.array([-0.9995, 0.9995]).reshape((2, 1, 1, 1)) + self.assertAllClose(keras.backend.eval(y), expected_y) class BatchNormalizationV1Test(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_v1_fused_attribute(self): - norm = batch_normalization_v1.BatchNormalization() - inp = keras.layers.Input((4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, True) - - norm = batch_normalization_v1.BatchNormalization(fused=False) - self.assertEqual(norm.fused, False) - inp = keras.layers.Input(shape=(4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, False) - - norm = batch_normalization_v1.BatchNormalization(virtual_batch_size=2) - self.assertEqual(norm.fused, True) - inp = keras.layers.Input(shape=(2, 2, 2)) - norm(inp) - self.assertEqual(norm.fused, False) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_v1_fused_attribute(self): + norm = batch_normalization_v1.BatchNormalization() + inp = keras.layers.Input((4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, True) + + norm = batch_normalization_v1.BatchNormalization(fused=False) + self.assertEqual(norm.fused, False) + inp = keras.layers.Input(shape=(4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, False) + + norm = batch_normalization_v1.BatchNormalization(virtual_batch_size=2) + self.assertEqual(norm.fused, True) + inp = keras.layers.Input(shape=(2, 2, 2)) + norm(inp) + self.assertEqual(norm.fused, False) class BatchNormalizationV2Test(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + def test_basic_batchnorm_v2(self): + test_utils.layer_test( + batch_normalization.BatchNormalization, + kwargs={"fused": True}, + input_shape=(3, 3, 3, 3), + ) + test_utils.layer_test( + batch_normalization.BatchNormalization, + kwargs={"fused": None}, + input_shape=(3, 3, 3), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_v2_fused_attribute(self): + norm = batch_normalization.BatchNormalization() + self.assertIsNone(norm.fused) + inp = keras.layers.Input(shape=(4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, True) + + norm = batch_normalization.BatchNormalization() + self.assertIsNone(norm.fused) + inp = keras.layers.Input(shape=(4, 4)) + norm(inp) + self.assertEqual(norm.fused, False) + + norm = batch_normalization.BatchNormalization() + self.assertIsNone(norm.fused) + inp = keras.layers.Input(shape=(4, 4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, True) + + norm = batch_normalization.BatchNormalization(virtual_batch_size=2) + self.assertEqual(norm.fused, False) + inp = keras.layers.Input(shape=(4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, False) + + norm = batch_normalization.BatchNormalization(fused=False) + self.assertEqual(norm.fused, False) + inp = keras.layers.Input(shape=(4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, False) + + norm = batch_normalization.BatchNormalization(fused=True, axis=[3]) + self.assertEqual(norm.fused, True) + inp = keras.layers.Input(shape=(4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, True) + + with self.assertRaisesRegex(ValueError, "fused.*renorm"): + batch_normalization.BatchNormalization(fused=True, renorm=True) + + with self.assertRaisesRegex(ValueError, "fused.*when axis is 1 or 3"): + batch_normalization.BatchNormalization(fused=True, axis=2) + + with self.assertRaisesRegex(ValueError, "fused.*when axis is 1 or 3"): + batch_normalization.BatchNormalization(fused=True, axis=[1, 3]) + + with self.assertRaisesRegex(ValueError, "fused.*virtual_batch_size"): + batch_normalization.BatchNormalization( + fused=True, virtual_batch_size=2 + ) + + with self.assertRaisesRegex(ValueError, "fused.*adjustment"): + batch_normalization.BatchNormalization( + fused=True, adjustment=lambda _: (1, 0) + ) + + norm = batch_normalization.BatchNormalization(fused=True) + self.assertEqual(norm.fused, True) + inp = keras.layers.Input(shape=(4, 4)) + with self.assertRaisesRegex(ValueError, "4D or 5D input tensors"): + norm(inp) + + def test_updates_in_wrap_function(self): + def my_func(): + layer = batch_normalization_v1.BatchNormalization() + x = tf.ones((10, 1)) + y = layer(x, training=True) + # Updates should be tracked in a `wrap_function`. + self.assertLen(layer.updates, 2) + return y + + wrapped_fn = tf.compat.v1.wrap_function(my_func, []) + wrapped_fn() + + @test_combinations.run_all_keras_modes + @test_utils.run_v2_only + def test_basic_batchnorm_v2_input_shape_and_virtual_batch_size(self): + # Test case for GitHub issue for 32380 + norm = batch_normalization.BatchNormalization(virtual_batch_size=8) + inp = keras.layers.Input(shape=(None, None, 3)) + _ = norm(inp) + + # Test case for https://github.com/tensorflow/tensorflow/issues/23050 + norm = batch_normalization.BatchNormalization(virtual_batch_size=8) + _ = norm(np.ones((1, 28, 28))) + + with self.assertRaisesRegex(Exception, "Reshape"): + norm = batch_normalization.BatchNormalization(virtual_batch_size=8) + _ = norm(np.ones((1, 28, 28)), training=True) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_fused_batchnorm_empty_batch(self): + # Test case for https://github.com/tensorflow/tensorflow/issues/52986 + # create a simple strategy with the enable_partial_batch_handling flag + # turned on, to trigger the empty batch code path in fused batchnorm + strategy = tf.distribute.OneDeviceStrategy("/cpu:0") + strategy.extended.enable_partial_batch_handling = True + with strategy.scope(): + layer = batch_normalization.BatchNormalization() + + def fn(): + with tf.GradientTape() as tape: + x = tf.ones((0, 2, 2, 2)) + layer(x, training=True) + return tape + + tape = strategy.run(fn) + + self.assertTrue(layer.fused) + + self.assertIsNotNone(layer.moving_mean) + self.assertIsNotNone(layer.moving_variance) + + tape_vars = tape.watched_variables() + self.assertAllEqual(layer.gamma, tape_vars[0]) + self.assertAllEqual(layer.beta, tape_vars[1]) + + +def _run_batchnorm_correctness_test( + layer, dtype="float32", fused=False, synchronized=False +): + model = keras.models.Sequential() + model.add(keras.Input(shape=(2, 2, 2), dtype=dtype)) + norm = layer(momentum=0.8, fused=fused, synchronized=synchronized) + model.add(norm) + if dtype == "float16": + # Keras models require float32 losses. + model.add( + keras.layers.Lambda(lambda x: keras.backend.cast(x, "float32")) + ) + model.compile( + loss="mse", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) - @test_combinations.run_all_keras_modes - def test_basic_batchnorm_v2(self): - test_utils.layer_test( - batch_normalization.BatchNormalization, - kwargs={'fused': True}, - input_shape=(3, 3, 3, 3)) - test_utils.layer_test( - batch_normalization.BatchNormalization, - kwargs={'fused': None}, - input_shape=(3, 3, 3)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_v2_fused_attribute(self): - norm = batch_normalization.BatchNormalization() - self.assertIsNone(norm.fused) - inp = keras.layers.Input(shape=(4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, True) - - norm = batch_normalization.BatchNormalization() - self.assertIsNone(norm.fused) - inp = keras.layers.Input(shape=(4, 4)) - norm(inp) - self.assertEqual(norm.fused, False) - - norm = batch_normalization.BatchNormalization() - self.assertIsNone(norm.fused) - inp = keras.layers.Input(shape=(4, 4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, True) - - norm = batch_normalization.BatchNormalization(virtual_batch_size=2) - self.assertEqual(norm.fused, False) - inp = keras.layers.Input(shape=(4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, False) - - norm = batch_normalization.BatchNormalization(fused=False) - self.assertEqual(norm.fused, False) - inp = keras.layers.Input(shape=(4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, False) - - norm = batch_normalization.BatchNormalization(fused=True, axis=[3]) - self.assertEqual(norm.fused, True) - inp = keras.layers.Input(shape=(4, 4, 4)) - norm(inp) - self.assertEqual(norm.fused, True) - - with self.assertRaisesRegex(ValueError, 'fused.*renorm'): - batch_normalization.BatchNormalization(fused=True, renorm=True) - - with self.assertRaisesRegex(ValueError, 'fused.*when axis is 1 or 3'): - batch_normalization.BatchNormalization(fused=True, axis=2) - - with self.assertRaisesRegex(ValueError, 'fused.*when axis is 1 or 3'): - batch_normalization.BatchNormalization(fused=True, axis=[1, 3]) - - with self.assertRaisesRegex(ValueError, 'fused.*virtual_batch_size'): - batch_normalization.BatchNormalization(fused=True, virtual_batch_size=2) - - with self.assertRaisesRegex(ValueError, 'fused.*adjustment'): - batch_normalization.BatchNormalization( - fused=True, adjustment=lambda _: (1, 0)) - - norm = batch_normalization.BatchNormalization(fused=True) - self.assertEqual(norm.fused, True) - inp = keras.layers.Input(shape=(4, 4)) - with self.assertRaisesRegex(ValueError, '4D or 5D input tensors'): - norm(inp) - - def test_updates_in_wrap_function(self): - - def my_func(): - layer = batch_normalization_v1.BatchNormalization() - x = tf.ones((10, 1)) - y = layer(x, training=True) - # Updates should be tracked in a `wrap_function`. - self.assertLen(layer.updates, 2) - return y - - wrapped_fn = tf.compat.v1.wrap_function(my_func, []) - wrapped_fn() - - @test_combinations.run_all_keras_modes - def test_basic_batchnorm_v2_none_shape_and_virtual_batch_size(self): - # Test case for GitHub issue for 32380 - norm = batch_normalization.BatchNormalization(virtual_batch_size=8) - inp = keras.layers.Input(shape=(None, None, 3)) - _ = norm(inp) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_fused_batchnorm_empty_batch(self): - # Test case for https://github.com/tensorflow/tensorflow/issues/52986 - # create a simple strategy with the enable_partial_batch_handling flag - # turned on, to trigger the empty batch code path in fused batchnorm - strategy = tf.distribute.OneDeviceStrategy('/cpu:0') - strategy.extended.enable_partial_batch_handling = True - with strategy.scope(): - layer = batch_normalization.BatchNormalization() - - def fn(): - with tf.GradientTape() as tape: - x = tf.ones((0, 2, 2, 2)) - layer(x, training=True) - return tape - - tape = strategy.run(fn) - - self.assertTrue(layer.fused) - - self.assertIsNotNone(layer.moving_mean) - self.assertIsNotNone(layer.moving_variance) - - tape_vars = tape.watched_variables() - self.assertAllEqual(layer.gamma, tape_vars[0]) - self.assertAllEqual(layer.beta, tape_vars[1]) - - -def _run_batchnorm_correctness_test(layer, dtype='float32', fused=False): - model = keras.models.Sequential() - model.add(keras.Input(shape=(2, 2, 2), dtype=dtype)) - norm = layer(momentum=0.8, fused=fused) - model.add(norm) - if dtype == 'float16': - # Keras models require float32 losses. - model.add(keras.layers.Lambda(lambda x: keras.backend.cast(x, 'float32'))) - model.compile( - loss='mse', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - - # centered on 5.0, variance 10.0 - x = (np.random.normal(loc=5.0, scale=10.0, size=(1000, 2, 2, 2)) - .astype(dtype)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= keras.backend.eval(norm.beta) - out /= keras.backend.eval(norm.gamma) + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 2, 2, 2)).astype( + dtype + ) + model.fit(x, x, epochs=4, verbose=0) + out = model.predict(x) + out -= keras.backend.eval(norm.beta) + out /= keras.backend.eval(norm.gamma) - np.testing.assert_allclose(out.mean(), 0.0, atol=2e-1) - np.testing.assert_allclose(out.std(), 1.0, atol=2e-1) + np.testing.assert_allclose(out.mean(), 0.0, atol=2e-1) + np.testing.assert_allclose(out.std(), 1.0, atol=2e-1) -@parameterized.parameters([ - batch_normalization_v1.BatchNormalization, - batch_normalization.BatchNormalization -]) +@parameterized.parameters( + [ + batch_normalization_v1.BatchNormalization, + batch_normalization.BatchNormalization, + ] +) class NormalizationLayersGraphModeOnlyTest( - tf.test.TestCase, parameterized.TestCase): - - def test_shared_batchnorm(self, layer): - """Test that a BN layer can be shared across different data streams.""" - with self.cached_session(): - # Test single layer reuse - bn = layer() - x1 = keras.layers.Input(shape=(10,)) - _ = bn(x1) - - x2 = keras.layers.Input(shape=(10,)) - y2 = bn(x2) - - x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) - model = keras.models.Model(x2, y2) - - model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.01), 'mse') - model.train_on_batch(x, x) - - # Test model-level reuse - x3 = keras.layers.Input(shape=(10,)) - y3 = model(x3) - new_model = keras.models.Model(x3, y3, name='new_model') - - new_model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.01), 'mse') - new_model.train_on_batch(x, x) - - def test_that_trainable_disables_updates(self, layer): - with self.cached_session(): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - a = keras.layers.Input(shape=(4,)) - layer = layer(input_shape=(4,)) - b = layer(a) - model = keras.models.Model(a, b) - - model.trainable = False - model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.01), 'mse') - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - self.assertAllClose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.01), 'mse') - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 - - layer.trainable = False - model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.01), 'mse') - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - self.assertAllClose(x1, x2, atol=1e-7) - - def test_batchnorm_trainable(self, layer): - """Tests that batchnorm layer is trainable when learning phase is enabled. - - Computes mean and std for current inputs then - applies batch normalization using them. - - Args: - layer: Either V1 or V2 of BatchNormalization layer. - """ - # TODO(fchollet): enable in all execution modes when issue with - # learning phase setting is resolved. - with tf.Graph().as_default(), self.cached_session(): - bn_mean = 0.5 - bn_std = 10. - val_a = np.expand_dims(np.arange(10.), axis=1) - - def get_model(bn_mean, bn_std): - inp = keras.layers.Input(shape=(1,)) - x = layer()(inp) - model1 = keras.models.Model(inp, x) - model1.set_weights([ - np.array([1.]), - np.array([0.]), - np.array([bn_mean]), - np.array([bn_std**2]) - ]) - return model1 - - # Simulates training-mode with trainable layer. - # Should use mini-batch statistics. - with keras.backend.learning_phase_scope(1): - model = get_model(bn_mean, bn_std) - model.compile(loss='mse', optimizer='rmsprop') - out = model.predict(val_a) - self.assertAllClose( - (val_a - np.mean(val_a)) / np.std(val_a), out, atol=1e-3) - - -if __name__ == '__main__': - tf.test.main() + tf.test.TestCase, parameterized.TestCase +): + def test_shared_batchnorm(self, layer): + """Test that a BN layer can be shared across different data streams.""" + with self.cached_session(): + # Test single layer reuse + bn = layer() + x1 = keras.layers.Input(shape=(10,)) + _ = bn(x1) + + x2 = keras.layers.Input(shape=(10,)) + y2 = bn(x2) + + x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) + model = keras.models.Model(x2, y2) + + model.compile( + tf.compat.v1.train.GradientDescentOptimizer(0.01), "mse" + ) + model.train_on_batch(x, x) + + # Test model-level reuse + x3 = keras.layers.Input(shape=(10,)) + y3 = model(x3) + new_model = keras.models.Model(x3, y3, name="new_model") + + new_model.compile( + tf.compat.v1.train.GradientDescentOptimizer(0.01), "mse" + ) + new_model.train_on_batch(x, x) + + def test_that_trainable_disables_updates(self, layer): + with self.cached_session(): + val_a = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + a = keras.layers.Input(shape=(4,)) + layer = layer(input_shape=(4,)) + b = layer(a) + model = keras.models.Model(a, b) + + model.trainable = False + model.compile( + tf.compat.v1.train.GradientDescentOptimizer(0.01), "mse" + ) + + x1 = model.predict(val_a) + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + self.assertAllClose(x1, x2, atol=1e-7) + + model.trainable = True + model.compile( + tf.compat.v1.train.GradientDescentOptimizer(0.01), "mse" + ) + + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + assert np.abs(np.sum(x1 - x2)) > 1e-5 + + layer.trainable = False + model.compile( + tf.compat.v1.train.GradientDescentOptimizer(0.01), "mse" + ) + + x1 = model.predict(val_a) + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + self.assertAllClose(x1, x2, atol=1e-7) + + def test_batchnorm_trainable(self, layer): + """Tests that batchnorm layer is trainable when learning phase enabled. + + Computes mean and std for current inputs then + applies batch normalization using them. + + Args: + layer: Either V1 or V2 of BatchNormalization layer. + """ + # TODO(fchollet): enable in all execution modes when issue with + # learning phase setting is resolved. + with tf.Graph().as_default(), self.cached_session(): + bn_mean = 0.5 + bn_std = 10.0 + val_a = np.expand_dims(np.arange(10.0), axis=1) + + def get_model(bn_mean, bn_std): + inp = keras.layers.Input(shape=(1,)) + x = layer()(inp) + model1 = keras.models.Model(inp, x) + model1.set_weights( + [ + np.array([1.0]), + np.array([0.0]), + np.array([bn_mean]), + np.array([bn_std**2]), + ] + ) + return model1 + + # Simulates training-mode with trainable layer. + # Should use mini-batch statistics. + with keras.backend.learning_phase_scope(1): + model = get_model(bn_mean, bn_std) + model.compile(loss="mse", optimizer="rmsprop") + out = model.predict(val_a) + self.assertAllClose( + (val_a - np.mean(val_a)) / np.std(val_a), out, atol=1e-3 + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/normalization/batch_normalization_v1.py b/keras/layers/normalization/batch_normalization_v1.py index c6d3fb2d6d00..4d9feb311da2 100644 --- a/keras/layers/normalization/batch_normalization_v1.py +++ b/keras/layers/normalization/batch_normalization_v1.py @@ -13,13 +13,19 @@ # limitations under the License. # ============================================================================== """Batch Normalization V1 layer.""" -# pylint: disable=g-classes-have-attributes + from keras.layers.normalization import batch_normalization + +# isort: off from tensorflow.python.util.tf_export import keras_export -# pylint: disable=missing-docstring -@keras_export(v1=['keras.layers.BatchNormalization']) +@keras_export(v1=["keras.layers.BatchNormalization"]) class BatchNormalization(batch_normalization.BatchNormalizationBase): - _USE_V2_BEHAVIOR = False + _USE_V2_BEHAVIOR = False + + def __init__(self, *args, **kwargs): + # synchronized not implemented in V1 + kwargs.pop("synchronized", None) + super().__init__(*args, **kwargs) diff --git a/keras/layers/normalization/group_normalization.py b/keras/layers/normalization/group_normalization.py new file mode 100644 index 000000000000..a0a39bc105bb --- /dev/null +++ b/keras/layers/normalization/group_normalization.py @@ -0,0 +1,269 @@ +# Copyright 2022 The Keras Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Group normalization layer""" + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras import constraints +from keras import initializers +from keras import regularizers +from keras.layers import InputSpec +from keras.layers import Layer +from keras.utils import tf_utils + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.layers.GroupNormalization", v1=[]) +class GroupNormalization(Layer): + """Group normalization layer. + + Group Normalization divides the channels into groups and computes + within each group the mean and variance for normalization. + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly + with batch sizes. + + Relation to Layer Normalization: + If the number of groups is set to 1, then this operation becomes nearly + identical to Layer Normalization (see Layer Normalization docs for details). + + Relation to Instance Normalization: + If the number of groups is set to the input dimension (number of groups is + equal to number of channels), then this operation becomes identical to + Instance Normalization. + + Args: + groups: Integer, the number of groups for Group Normalization. Can be in + the range [1, N] where N is the input dimension. The input dimension + must be divisible by the number of groups. Defaults to `32`. + axis: Integer or List/Tuple. The axis or axes to normalize across. + Typically, this is the features axis/axes. The left-out axes are + typically the batch axis/axes. `-1` is the last dimension in the + input. Defaults to `-1`. + epsilon: Small float added to variance to avoid dividing by zero. Defaults + to 1e-3 + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. Defaults to `True`. + scale: If True, multiply by `gamma`. If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), this can be + disabled since the scaling will be done by the next layer. + Defaults to `True`. + beta_initializer: Initializer for the beta weight. Defaults to zeros. + gamma_initializer: Initializer for the gamma weight. Defaults to ones. + beta_regularizer: Optional regularizer for the beta weight. None by + default. + gamma_regularizer: Optional regularizer for the gamma weight. None by + default. + beta_constraint: Optional constraint for the beta weight. None by default. + gamma_constraint: Optional constraint for the gamma weight. None by + default. Input shape: Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) when using this + layer as the first layer in a model. Output shape: Same shape as input. + + Call arguments: + inputs: Input tensor (of any rank). + mask: The mask parameter is a tensor that indicates the weight for each + position in the input tensor when computing the mean and variance. + + Reference: - [Yuxin Wu & Kaiming He, 2018](https://arxiv.org/abs/1803.08494) + """ + + def __init__( + self, + groups=32, + axis=-1, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs, + ): + super().__init__(**kwargs) + self.supports_masking = True + self.groups = groups + self.axis = axis + self.epsilon = epsilon + self.center = center + self.scale = scale + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + + def build(self, input_shape): + tf_utils.validate_axis(self.axis, input_shape) + + dim = input_shape[self.axis] + if dim is None: + raise ValueError( + f"Axis {self.axis} of input tensor should have a defined " + "dimension but the layer received an input with shape " + f"{input_shape}." + ) + + if self.groups == -1: + self.groups = dim + + if dim < self.groups: + raise ValueError( + f"Number of groups ({self.groups}) cannot be more than the " + f"number of channels ({dim})." + ) + + if dim % self.groups != 0: + raise ValueError( + f"Number of groups ({self.groups}) must be a multiple " + f"of the number of channels ({dim})." + ) + + self.input_spec = InputSpec( + ndim=len(input_shape), axes={self.axis: dim} + ) + + if self.scale: + self.gamma = self.add_weight( + shape=(dim,), + name="gamma", + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + ) + else: + self.gamma = None + + if self.center: + self.beta = self.add_weight( + shape=(dim,), + name="beta", + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + ) + else: + self.beta = None + + super().build(input_shape) + + def call(self, inputs, mask=None): + input_shape = tf.shape(inputs) + + if mask is None: + mask = tf.ones_like(inputs) + else: + # We broadcast before we group in case the mask does not have the + # same shape as the input. + mask = tf.broadcast_to(mask, input_shape) + + reshaped_inputs = self._reshape_into_groups(inputs) + reshaped_mask = self._reshape_into_groups(mask) + + normalized_inputs = self._apply_normalization( + reshaped_inputs=reshaped_inputs, + input_shape=input_shape, + reshaped_mask=reshaped_mask, + ) + + return tf.reshape(normalized_inputs, input_shape) + + def _reshape_into_groups(self, inputs): + input_shape = tf.shape(inputs) + group_shape = [input_shape[i] for i in range(inputs.shape.rank)] + + group_shape[self.axis] = input_shape[self.axis] // self.groups + group_shape.insert(self.axis, self.groups) + group_shape = tf.stack(group_shape) + reshaped_inputs = tf.reshape(inputs, group_shape) + return reshaped_inputs + + def _apply_normalization( + self, + *, + reshaped_inputs, + reshaped_mask, + input_shape, + ): + group_reduction_axes = list(range(1, reshaped_inputs.shape.rank)) + + axis = self.axis - 1 + group_reduction_axes.pop(axis) + + mask_weights = tf.cast(reshaped_mask, reshaped_inputs.dtype) + + mean, variance = tf.nn.weighted_moments( + reshaped_inputs, + axes=group_reduction_axes, + frequency_weights=mask_weights, + keepdims=True, + ) + + gamma, beta = self._get_reshaped_weights(input_shape) + normalized_inputs = tf.nn.batch_normalization( + reshaped_inputs, + mean=mean, + variance=variance, + scale=gamma, + offset=beta, + variance_epsilon=self.epsilon, + ) + return normalized_inputs + + def _get_reshaped_weights(self, input_shape): + broadcast_shape = self._create_broadcast_shape(input_shape) + gamma = None + beta = None + if self.scale: + gamma = tf.reshape(self.gamma, broadcast_shape) + + if self.center: + beta = tf.reshape(self.beta, broadcast_shape) + return gamma, beta + + def _create_broadcast_shape(self, input_shape): + broadcast_shape = [1] * backend.int_shape(input_shape)[0] + + broadcast_shape[self.axis] = input_shape[self.axis] // self.groups + broadcast_shape.insert(self.axis, self.groups) + + return broadcast_shape + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "groups": self.groups, + "axis": self.axis, + "epsilon": self.epsilon, + "center": self.center, + "scale": self.scale, + "beta_initializer": initializers.serialize(self.beta_initializer), + "gamma_initializer": initializers.serialize(self.gamma_initializer), + "beta_regularizer": regularizers.serialize(self.beta_regularizer), + "gamma_regularizer": regularizers.serialize(self.gamma_regularizer), + "beta_constraint": constraints.serialize(self.beta_constraint), + "gamma_constraint": constraints.serialize(self.gamma_constraint), + } + base_config = super().get_config() + return {**base_config, **config} diff --git a/keras/layers/normalization/group_normalization_test.py b/keras/layers/normalization/group_normalization_test.py new file mode 100644 index 000000000000..d73455cd4fc9 --- /dev/null +++ b/keras/layers/normalization/group_normalization_test.py @@ -0,0 +1,382 @@ +# Copyright 2022 The Keras Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import tensorflow.compat.v2 as tf + +import keras +from keras.initializers import Constant +from keras.layers import GroupNormalization +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +def _build_group_normalization_model(norm): + model = keras.models.Sequential() + model.add(norm) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + + return model + + +@test_utils.run_v2_only +class GroupNormalizationTest(test_combinations.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_trainable_weights(self): + # Check if weights get initialized correctly + layer = GroupNormalization(groups=1, scale=False, center=False) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.weights), 0) + + # Check if weights get initialized correctly + layer = GroupNormalization(groups=1, scale=True, center=True) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 2) + self.assertEqual(len(layer.weights), 2) + + @test_combinations.run_all_keras_modes + def test_groupnorm(self): + test_utils.layer_test( + GroupNormalization, + kwargs={ + "gamma_regularizer": keras.regularizers.l2(0.01), + "beta_regularizer": keras.regularizers.l2(0.01), + }, + input_shape=(3, 4, 32), + ) + + test_utils.layer_test( + GroupNormalization, + kwargs={ + "groups": 4, + "gamma_constraint": keras.constraints.UnitNorm(), + "beta_constraint": keras.constraints.UnitNorm(), + }, + input_shape=(3, 4, 4), + ) + + @test_combinations.run_all_keras_modes + def test_correctness_1d(self): + layer_with_1_group = GroupNormalization( + groups=1, axis=-1, input_shape=(8,), scale=False, center=False + ) + layer_with_2_groups = GroupNormalization( + groups=2, axis=1, input_shape=(8,), scale=False, center=False + ) + + inputs = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 2.0, 2.0, 0, -2.0], shape=(1, 8) + ) + + expected_output_1_group = tf.constant( + [-0.898, -0.898, 0.539, 0.539, 1.257, 1.257, -0.180, -1.616], + shape=(1, 8), + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_1_group)(inputs), + expected_output_1_group, + atol=1e-3, + ) + + expected_output_2_groups = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 0.904, 0.904, -0.301, -1.507], shape=(1, 8) + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_2_groups)(inputs), + expected_output_2_groups, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_correctness_1d_with_mask(self): + layer_with_1_group = GroupNormalization( + groups=1, axis=-1, input_shape=(8,), scale=False, center=False + ) + layer_with_2_groups = GroupNormalization( + groups=2, axis=1, input_shape=(8,), scale=False, center=False + ) + + inputs = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 2.0, 2.0, 0, -2.0], shape=(1, 8) + ) + + mask1 = tf.constant( + [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=(1, 8) + ) + mask2 = tf.constant( + [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], shape=(1, 8) + ) + + expected_output_1_group = tf.constant( + [-0.706, -0.706, 1.413, 1.413, 2.473, 2.473, 0.353, -1.766], + shape=(1, 8), + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_1_group)( + inputs, mask=mask1 + ), + expected_output_1_group, + atol=1e-3, + ) + + expected_output_2_groups = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 0.999, 0.999, 0.0, -0.999], shape=(1, 8) + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_2_groups)( + inputs, mask=mask2 + ), + expected_output_2_groups, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_correctness_1d_with_non_binary_mask(self): + norm = GroupNormalization( + groups=1, axis=-1, input_shape=(8,), scale=False, center=False + ) + inputs = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 2.0, 2.0, 0, -2.0], shape=(1, 8) + ) + + mask = tf.constant( + [0.5, 0.5, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=(1, 8) + ) + + expected_output = tf.constant( + [-0.999, -0.999, 0.999, 0.999, 1.999, 1.999, 0.0, -1.999], + shape=(1, 8), + ) + self.assertAllClose( + _build_group_normalization_model(norm)(inputs, mask=mask), + expected_output, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_correctness_2d(self): + layer_with_1_group = GroupNormalization( + groups=1, axis=-1, input_shape=(2, 4), scale=False, center=False + ) + layer_with_2_groups = GroupNormalization( + groups=2, axis=2, input_shape=(2, 4), scale=False, center=False + ) + + inputs = tf.constant( + [[-1.0, -1.0, 2.0, 2.0], [1.0, 1.0, 0, -2.0]], shape=(1, 2, 4) + ) + + expected_output_1_group = tf.constant( + [[-0.898, -0.898, 1.257, 1.257], [0.539, 0.539, -0.180, -1.616]], + shape=(1, 2, 4), + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_1_group)(inputs), + expected_output_1_group, + atol=1e-3, + ) + + expected_output_2_groups = tf.constant( + [[-1.0, -1.0, 0.904, 0.904], [1.0, 1.0, -0.301, -1.507]], + shape=(1, 2, 4), + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_2_groups)(inputs), + expected_output_2_groups, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_correctness_2d_with_mask(self): + layer_with_1_group = GroupNormalization( + groups=1, axis=-1, input_shape=(2, 4), scale=False, center=False + ) + layer_with_2_groups = GroupNormalization( + groups=2, axis=2, input_shape=(2, 4), scale=False, center=False + ) + + inputs = tf.constant( + [[-1.0, -1.0, 2.0, 2.0], [1.0, 1.0, 0, -2.0]], shape=(1, 2, 4) + ) + + mask1 = tf.constant( + [ + [ + 1.0, + 1.0, + 0.0, + 0.0, + ], + [1.0, 0.0, 0.0, 0.0], + ], + shape=(1, 2, 4), + ) + mask2 = tf.constant( + [ + [ + 1.0, + 1.0, + 0.0, + 1.0, + ], + [1.0, 1.0, 0.0, 1.0], + ], + shape=(1, 2, 4), + ) + + expected_output_1_group = tf.constant( + [[-0.706, -0.706, 2.473, 2.473], [1.413, 1.413, 0.353, -1.766]], + shape=(1, 2, 4), + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_1_group)( + inputs, mask=mask1 + ), + expected_output_1_group, + atol=1e-3, + ) + + expected_output_2_groups = tf.constant( + [[-1.0, -1.0, 0.999, 0.999], [1.0, 1.0, 0.0, -0.999]], + shape=(1, 2, 4), + ) + self.assertAllClose( + _build_group_normalization_model(layer_with_2_groups)( + inputs, mask=mask2 + ), + expected_output_2_groups, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_mask_broadcasting(self): + images = tf.ones((1, 2, 4, 3)) # NHWC + mask = tf.random.uniform((1, 2, 4, 1)) < 0.5 # NHWC + + norm = GroupNormalization( + groups=3, axis=-1, input_shape=(2, 4, 9), scale=False, center=False + ) + output = norm(images, mask=mask) + + self.assertEqual(output.shape, (1, 2, 4, 3)) + + @test_combinations.run_all_keras_modes + def test_correctness_instance_norm(self): + instance_norm_layer = GroupNormalization( + groups=4, axis=-1, input_shape=(2, 4), scale=False, center=False + ) + + inputs = tf.constant( + [[-1.0, 1.0, 0, 2.0], [1.0, 3.0, -4, -2.0]], shape=(1, 2, 4) + ) + + expected_instance_norm_output = tf.constant( + [[-1.0, -1.0, 1.0, 1.0], [1.0, 1.0, -1.0, -1.0]], shape=(1, 2, 4) + ) + self.assertAllClose( + _build_group_normalization_model(instance_norm_layer)(inputs), + expected_instance_norm_output, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_correctness_with_centering(self): + normalization_layer = GroupNormalization( + groups=2, + axis=-1, + input_shape=(8,), + scale=False, + center=True, + beta_initializer=Constant(10), + ) + + inputs = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 2.0, 2.0, 0, -2.0], shape=(1, 8) + ) + + expected_output = tf.constant( + [9.0, 9.0, 11.0, 11.0, 10.904, 10.904, 9.699, 8.493], shape=(1, 8) + ) + self.assertAllClose( + _build_group_normalization_model(normalization_layer)(inputs), + expected_output, + atol=1e-3, + ) + + @test_combinations.run_all_keras_modes + def test_correctness_with_scaling(self): + normalization_layer = GroupNormalization( + groups=2, + axis=-1, + input_shape=(8,), + scale=True, + center=False, + gamma_initializer=Constant(2), + ) + + inputs = tf.constant( + [-1.0, -1.0, 1.0, 1.0, 2.0, 2.0, 0, -2.0], shape=(1, 8) + ) + + expected_output = tf.constant( + [-2.0, -2.0, 2.0, 2.0, 1.809, 1.808, -0.602, -3.014], shape=(1, 8) + ) + self.assertAllClose( + _build_group_normalization_model(normalization_layer)(inputs), + expected_output, + atol=1e-3, + ) + + def test_validates_groups_against_channels(self): + with self.assertRaisesRegex( + ValueError, r"must be a multiple of the number of channels" + ): + norm = GroupNormalization(groups=3, axis=-1) + norm.build(input_shape=(2, 10)) + + with self.assertRaisesRegex( + ValueError, r"cannot be more than the number of channels" + ): + norm = GroupNormalization(groups=32, axis=-1) + norm.build(input_shape=(2, 8)) + + def test_validates_known_number_of_channels(self): + with self.assertRaisesRegex( + ValueError, r"tensor should have a defined dimension" + ): + norm = GroupNormalization(axis=-1) + norm.build(input_shape=(1, 32, None)) + + def test_rejects_invalid_axis(self): + with self.assertRaisesRegex( + ValueError, r"Invalid value for `axis` argument" + ): + norm = GroupNormalization(axis=-4) + norm.build(input_shape=(64, 32, 32)) + with self.assertRaisesRegex( + ValueError, r"Invalid value for `axis` argument" + ): + norm = GroupNormalization(axis=3) + norm.build(input_shape=(64, 32, 32)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/normalization/layer_normalization.py b/keras/layers/normalization/layer_normalization.py index 2da0e9405f0c..42bcc08d1ea6 100644 --- a/keras/layers/normalization/layer_normalization.py +++ b/keras/layers/normalization/layer_normalization.py @@ -15,7 +15,6 @@ """Layer Normalization layer.""" import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes from keras import constraints from keras import initializers @@ -24,332 +23,347 @@ from keras.engine.base_layer import Layer from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.LayerNormalization') +@keras_export("keras.layers.LayerNormalization") class LayerNormalization(Layer): - """Layer normalization layer (Ba et al., 2016). - - Normalize the activations of the previous layer for each given example in a - batch independently, rather than across a batch like Batch Normalization. - i.e. applies a transformation that maintains the mean activation within each - example close to 0 and the activation standard deviation close to 1. - - Given a tensor `inputs`, moments are calculated and normalization - is performed across the axes specified in `axis`. - - Example: - - >>> data = tf.constant(np.arange(10).reshape(5, 2) * 10, dtype=tf.float32) - >>> print(data) - tf.Tensor( - [[ 0. 10.] - [20. 30.] - [40. 50.] - [60. 70.] - [80. 90.]], shape=(5, 2), dtype=float32) - - >>> layer = tf.keras.layers.LayerNormalization(axis=1) - >>> output = layer(data) - >>> print(output) - tf.Tensor( - [[-1. 1.] - [-1. 1.] - [-1. 1.] - [-1. 1.] - [-1. 1.]], shape=(5, 2), dtype=float32) - - Notice that with Layer Normalization the normalization happens across the - axes *within* each example, rather than across different examples in the - batch. - - If `scale` or `center` are enabled, the layer will scale the normalized - outputs by broadcasting them with a trainable variable `gamma`, and center - the outputs by broadcasting with a trainable variable `beta`. `gamma` will - default to a ones tensor and `beta` will default to a zeros tensor, so that - centering and scaling are no-ops before training has begun. - - So, with scaling and centering enabled the normalization equations - are as follows: - - Let the intermediate activations for a mini-batch to be the `inputs`. - - For each sample `x_i` in `inputs` with `k` features, we compute the mean and - variance of the sample: - - ```python - mean_i = sum(x_i[j] for j in range(k)) / k - var_i = sum((x_i[j] - mean_i) ** 2 for j in range(k)) / k - ``` - - and then compute a normalized `x_i_normalized`, including a small factor - `epsilon` for numerical stability. - - ```python - x_i_normalized = (x_i - mean_i) / sqrt(var_i + epsilon) - ``` - - And finally `x_i_normalized ` is linearly transformed by `gamma` and `beta`, - which are learned parameters: - - ```python - output_i = x_i_normalized * gamma + beta - ``` - - `gamma` and `beta` will span the axes of `inputs` specified in `axis`, and - this part of the inputs' shape must be fully defined. - - For example: - - >>> layer = tf.keras.layers.LayerNormalization(axis=[1, 2, 3]) - >>> layer.build([5, 20, 30, 40]) - >>> print(layer.beta.shape) - (20, 30, 40) - >>> print(layer.gamma.shape) - (20, 30, 40) - - Note that other implementations of layer normalization may choose to define - `gamma` and `beta` over a separate set of axes from the axes being - normalized across. For example, Group Normalization - ([Wu et al. 2018](https://arxiv.org/abs/1803.08494)) with group size of 1 - corresponds to a Layer Normalization that normalizes across height, width, - and channel and has `gamma` and `beta` span only the channel dimension. - So, this Layer Normalization implementation will not match a Group - Normalization layer with group size set to 1. - - Args: - axis: Integer or List/Tuple. The axis or axes to normalize across. Typically - this is the features axis/axes. The left-out axes are typically the batch - axis/axes. This argument defaults to `-1`, the last dimension in the - input. - epsilon: Small float added to variance to avoid dividing by zero. Defaults - to 1e-3 - center: If True, add offset of `beta` to normalized tensor. If False, `beta` - is ignored. Defaults to True. - scale: If True, multiply by `gamma`. If False, `gamma` is not used. Defaults - to True. When the next layer is linear (also e.g. `nn.relu`), this can be - disabled since the scaling will be done by the next layer. - beta_initializer: Initializer for the beta weight. Defaults to zeros. - gamma_initializer: Initializer for the gamma weight. Defaults to ones. - beta_regularizer: Optional regularizer for the beta weight. None by default. - gamma_regularizer: Optional regularizer for the gamma weight. None by - default. - beta_constraint: Optional constraint for the beta weight. None by default. - gamma_constraint: Optional constraint for the gamma weight. None by default. - - Input shape: - Arbitrary. Use the keyword argument `input_shape` (tuple of - integers, does not include the samples axis) when using this layer as the - first layer in a model. - - Output shape: - Same shape as input. - - Reference: - - [Lei Ba et al., 2016](https://arxiv.org/abs/1607.06450). - """ - - @utils.allow_initializer_layout - def __init__(self, - axis=-1, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - super().__init__(**kwargs) - if isinstance(axis, (list, tuple)): - self.axis = list(axis) - elif isinstance(axis, int): - self.axis = axis - else: - raise TypeError('Expected an int or a list/tuple of ints for the ' - 'argument \'axis\', but received: %r' % axis) - - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) - - self.supports_masking = True - - # Indicates whether a faster fused implementation can be used. This will be - # set to True or False in build()" - self._fused = None - - def _fused_can_be_used(self, ndims): - """Returns false if fused implementation cannot be used. - - Check if the axis is contiguous and can be collapsed into the last axis. - The self.axis is assumed to have no duplicates. + """Layer normalization layer (Ba et al., 2016). + + Normalize the activations of the previous layer for each given example in a + batch independently, rather than across a batch like Batch Normalization. + i.e. applies a transformation that maintains the mean activation within each + example close to 0 and the activation standard deviation close to 1. + + Given a tensor `inputs`, moments are calculated and normalization + is performed across the axes specified in `axis`. + + Example: + + >>> data = tf.constant(np.arange(10).reshape(5, 2) * 10, dtype=tf.float32) + >>> print(data) + tf.Tensor( + [[ 0. 10.] + [20. 30.] + [40. 50.] + [60. 70.] + [80. 90.]], shape=(5, 2), dtype=float32) + + >>> layer = tf.keras.layers.LayerNormalization(axis=1) + >>> output = layer(data) + >>> print(output) + tf.Tensor( + [[-1. 1.] + [-1. 1.] + [-1. 1.] + [-1. 1.] + [-1. 1.]], shape=(5, 2), dtype=float32) + + Notice that with Layer Normalization the normalization happens across the + axes *within* each example, rather than across different examples in the + batch. + + If `scale` or `center` are enabled, the layer will scale the normalized + outputs by broadcasting them with a trainable variable `gamma`, and center + the outputs by broadcasting with a trainable variable `beta`. `gamma` will + default to a ones tensor and `beta` will default to a zeros tensor, so that + centering and scaling are no-ops before training has begun. + + So, with scaling and centering enabled the normalization equations + are as follows: + + Let the intermediate activations for a mini-batch to be the `inputs`. + + For each sample `x_i` in `inputs` with `k` features, we compute the mean and + variance of the sample: + + ```python + mean_i = sum(x_i[j] for j in range(k)) / k + var_i = sum((x_i[j] - mean_i) ** 2 for j in range(k)) / k + ``` + + and then compute a normalized `x_i_normalized`, including a small factor + `epsilon` for numerical stability. + + ```python + x_i_normalized = (x_i - mean_i) / sqrt(var_i + epsilon) + ``` + + And finally `x_i_normalized ` is linearly transformed by `gamma` and `beta`, + which are learned parameters: + + ```python + output_i = x_i_normalized * gamma + beta + ``` + + `gamma` and `beta` will span the axes of `inputs` specified in `axis`, and + this part of the inputs' shape must be fully defined. + + For example: + + >>> layer = tf.keras.layers.LayerNormalization(axis=[1, 2, 3]) + >>> layer.build([5, 20, 30, 40]) + >>> print(layer.beta.shape) + (20, 30, 40) + >>> print(layer.gamma.shape) + (20, 30, 40) + + Note that other implementations of layer normalization may choose to define + `gamma` and `beta` over a separate set of axes from the axes being + normalized across. For example, Group Normalization + ([Wu et al. 2018](https://arxiv.org/abs/1803.08494)) with group size of 1 + corresponds to a Layer Normalization that normalizes across height, width, + and channel and has `gamma` and `beta` span only the channel dimension. + So, this Layer Normalization implementation will not match a Group + Normalization layer with group size set to 1. + + Args: + axis: Integer or List/Tuple. The axis or axes to normalize across. + Typically, this is the features axis/axes. The left-out axes are + typically the batch axis/axes. `-1` is the last dimension in the + input. Defaults to `-1`. + epsilon: Small float added to variance to avoid dividing by zero. Defaults + to 1e-3 + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. Defaults to `True`. + scale: If True, multiply by `gamma`. If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), this can be + disabled since the scaling will be done by the next layer. + Defaults to `True`. + beta_initializer: Initializer for the beta weight. Defaults to zeros. + gamma_initializer: Initializer for the gamma weight. Defaults to ones. + beta_regularizer: Optional regularizer for the beta weight. None by + default. + gamma_regularizer: Optional regularizer for the gamma weight. None by + default. + beta_constraint: Optional constraint for the beta weight. None by default. + gamma_constraint: Optional constraint for the gamma weight. None by + default. + + Input shape: + Arbitrary. Use the keyword argument `input_shape` (tuple of + integers, does not include the samples axis) when using this layer as the + first layer in a model. + + Output shape: + Same shape as input. + + Reference: + - [Lei Ba et al., 2016](https://arxiv.org/abs/1607.06450). """ - axis = sorted(self.axis) - can_use_fused = False - - if axis[-1] == ndims - 1 and axis[-1] - axis[0] == len(axis) - 1: - can_use_fused = True - - # fused_batch_norm will silently raise epsilon to be at least 1.001e-5, so - # we cannot used the fused version if epsilon is below that value. Also, the - # variable dtype must be float32, as fused_batch_norm only supports float32 - # variables. - if self.epsilon < 1.001e-5 or self.dtype != 'float32': - can_use_fused = False - - return can_use_fused - - def build(self, input_shape): - self.axis = tf_utils.validate_axis(self.axis, input_shape) - input_shape = tf.TensorShape(input_shape) - rank = input_shape.rank - - param_shape = [input_shape[dim] for dim in self.axis] - if self.scale: - self.gamma = self.add_weight( - name='gamma', - shape=param_shape, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True, - experimental_autocast=False) - else: - self.gamma = None - - if self.center: - self.beta = self.add_weight( - name='beta', - shape=param_shape, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True, - experimental_autocast=False) - else: - self.beta = None - - self._fused = self._fused_can_be_used(rank) - self.built = True - - def call(self, inputs): - # TODO(b/229545225): Remove the RaggedTensor check. - is_ragged = isinstance(inputs, tf.RaggedTensor) - if is_ragged: - inputs_lengths = inputs.nested_row_lengths() - inputs = inputs.to_tensor() - inputs = tf.cast(inputs, self.compute_dtype) - # Compute the axes along which to reduce the mean / variance - input_shape = inputs.shape - ndims = len(input_shape) - - # Broadcasting only necessary for norm when the axis is not just - # the last dimension - broadcast_shape = [1] * ndims - for dim in self.axis: - broadcast_shape[dim] = input_shape.dims[dim].value - - def _broadcast(v): - if (v is not None and len(v.shape) != ndims and self.axis != [ndims - 1]): - return tf.reshape(v, broadcast_shape) - return v - - if not self._fused: - input_dtype = inputs.dtype - if input_dtype in ('float16', 'bfloat16') and self.dtype == 'float32': - # If mixed precision is used, cast inputs to float32 so that this is at - # least as numerically stable as the fused version. - inputs = tf.cast(inputs, 'float32') - - # Calculate the moments on the last axis (layer activations). - mean, variance = tf.nn.moments(inputs, self.axis, keepdims=True) - - scale, offset = _broadcast(self.gamma), _broadcast(self.beta) - - # Compute layer normalization using the batch_normalization function. - outputs = tf.nn.batch_normalization( - inputs, - mean, - variance, - offset=offset, - scale=scale, - variance_epsilon=self.epsilon) - outputs = tf.cast(outputs, input_dtype) - else: - # Collapse dims before self.axis, and dims in self.axis - pre_dim, in_dim = (1, 1) - axis = sorted(self.axis) - tensor_shape = tf.shape(inputs) - for dim in range(0, ndims): - dim_tensor = tensor_shape[dim] - if dim < axis[0]: - pre_dim = pre_dim * dim_tensor + + @utils.allow_initializer_layout + def __init__( + self, + axis=-1, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs + ): + super().__init__(**kwargs) + if isinstance(axis, (list, tuple)): + self.axis = list(axis) + elif isinstance(axis, int): + self.axis = axis + else: + raise TypeError( + "Expected an int or a list/tuple of ints for the " + "argument 'axis', but received: %r" % axis + ) + + self.epsilon = epsilon + self.center = center + self.scale = scale + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + + self.supports_masking = True + + # Indicates whether a faster fused implementation can be used. This will + # be set to True or False in build()" + self._fused = None + + def _fused_can_be_used(self, ndims): + """Returns false if fused implementation cannot be used. + + Check if the axis is contiguous and can be collapsed into the last axis. + The self.axis is assumed to have no duplicates. + """ + axis = sorted(self.axis) + can_use_fused = False + + if axis[-1] == ndims - 1 and axis[-1] - axis[0] == len(axis) - 1: + can_use_fused = True + + # fused_batch_norm will silently raise epsilon to be at least 1.001e-5, + # so we cannot used the fused version if epsilon is below that value. + # Also, the variable dtype must be float32, as fused_batch_norm only + # supports float32 variables. + if self.epsilon < 1.001e-5 or self.dtype != "float32": + can_use_fused = False + + return can_use_fused + + def build(self, input_shape): + self.axis = tf_utils.validate_axis(self.axis, input_shape) + input_shape = tf.TensorShape(input_shape) + rank = input_shape.rank + + param_shape = [input_shape[dim] for dim in self.axis] + if self.scale: + self.gamma = self.add_weight( + name="gamma", + shape=param_shape, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True, + experimental_autocast=False, + ) + else: + self.gamma = None + + if self.center: + self.beta = self.add_weight( + name="beta", + shape=param_shape, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True, + experimental_autocast=False, + ) + else: + self.beta = None + + self._fused = self._fused_can_be_used(rank) + self.built = True + + def call(self, inputs): + # TODO(b/229545225): Remove the RaggedTensor check. + is_ragged = isinstance(inputs, tf.RaggedTensor) + if is_ragged: + inputs_lengths = inputs.nested_row_lengths() + inputs = inputs.to_tensor() + inputs = tf.cast(inputs, self.compute_dtype) + # Compute the axes along which to reduce the mean / variance + input_shape = inputs.shape + ndims = len(input_shape) + + # Broadcasting only necessary for norm when the axis is not just + # the last dimension + broadcast_shape = [1] * ndims + for dim in self.axis: + broadcast_shape[dim] = input_shape.dims[dim].value + + def _broadcast(v): + if ( + v is not None + and len(v.shape) != ndims + and self.axis != [ndims - 1] + ): + return tf.reshape(v, broadcast_shape) + return v + + if not self._fused: + input_dtype = inputs.dtype + if ( + input_dtype in ("float16", "bfloat16") + and self.dtype == "float32" + ): + # If mixed precision is used, cast inputs to float32 so that + # this is at least as numerically stable as the fused version. + inputs = tf.cast(inputs, "float32") + + # Calculate the moments on the last axis (layer activations). + mean, variance = tf.nn.moments(inputs, self.axis, keepdims=True) + + scale, offset = _broadcast(self.gamma), _broadcast(self.beta) + + # Compute layer normalization using the batch_normalization + # function. + outputs = tf.nn.batch_normalization( + inputs, + mean, + variance, + offset=offset, + scale=scale, + variance_epsilon=self.epsilon, + ) + outputs = tf.cast(outputs, input_dtype) else: - assert dim in axis - in_dim = in_dim * dim_tensor - - squeezed_shape = [1, pre_dim, in_dim, 1] - # This fused operation requires reshaped inputs to be NCHW. - data_format = 'NCHW' - - inputs = tf.reshape(inputs, squeezed_shape) - - # self.gamma and self.beta have the wrong shape for fused_batch_norm, so - # we cannot pass them as the scale and offset parameters. Therefore, we - # create two constant tensors in correct shapes for fused_batch_norm and - # later construct a separate calculation on the scale and offset. - scale = tf.ones([pre_dim], dtype=self.dtype) - offset = tf.zeros([pre_dim], dtype=self.dtype) - - # Compute layer normalization using the fused_batch_norm function. - outputs, _, _ = tf.compat.v1.nn.fused_batch_norm( - inputs, - scale=scale, - offset=offset, - epsilon=self.epsilon, - data_format=data_format) - - outputs = tf.reshape(outputs, tensor_shape) - - scale, offset = _broadcast(self.gamma), _broadcast(self.beta) - - if scale is not None: - outputs = outputs * tf.cast(scale, outputs.dtype) - if offset is not None: - outputs = outputs + tf.cast(offset, outputs.dtype) - - # If some components of the shape got lost due to adjustments, fix that. - outputs.set_shape(input_shape) - - if is_ragged: - outputs = tf.RaggedTensor.from_tensor(outputs, inputs_lengths) - return outputs - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'axis': self.axis, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + # Collapse dims before self.axis, and dims in self.axis + + axis = sorted(self.axis) + tensor_shape = tf.shape(inputs) + pre_dim = tf.reduce_prod(tensor_shape[: axis[0]]) + in_dim = tf.reduce_prod(tensor_shape[axis[0] :]) + squeezed_shape = [1, pre_dim, in_dim, 1] + # This fused operation requires reshaped inputs to be NCHW. + data_format = "NCHW" + + inputs = tf.reshape(inputs, squeezed_shape) + + # self.gamma and self.beta have the wrong shape for + # fused_batch_norm, so we cannot pass them as the scale and offset + # parameters. Therefore, we create two constant tensors in correct + # shapes for fused_batch_norm and later construct a separate + # calculation on the scale and offset. + scale = tf.ones([pre_dim], dtype=self.dtype) + offset = tf.zeros([pre_dim], dtype=self.dtype) + + # Compute layer normalization using the fused_batch_norm function. + outputs, _, _ = tf.compat.v1.nn.fused_batch_norm( + inputs, + scale=scale, + offset=offset, + epsilon=self.epsilon, + data_format=data_format, + ) + + outputs = tf.reshape(outputs, tensor_shape) + + scale, offset = _broadcast(self.gamma), _broadcast(self.beta) + + if scale is not None: + outputs = outputs * tf.cast(scale, outputs.dtype) + if offset is not None: + outputs = outputs + tf.cast(offset, outputs.dtype) + + # If some components of the shape got lost due to adjustments, fix that. + outputs.set_shape(input_shape) + + if is_ragged: + outputs = tf.RaggedTensor.from_tensor(outputs, inputs_lengths) + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "axis": self.axis, + "epsilon": self.epsilon, + "center": self.center, + "scale": self.scale, + "beta_initializer": initializers.serialize(self.beta_initializer), + "gamma_initializer": initializers.serialize(self.gamma_initializer), + "beta_regularizer": regularizers.serialize(self.beta_regularizer), + "gamma_regularizer": regularizers.serialize(self.gamma_regularizer), + "beta_constraint": constraints.serialize(self.beta_constraint), + "gamma_constraint": constraints.serialize(self.gamma_constraint), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/normalization/layer_normalization_test.py b/keras/layers/normalization/layer_normalization_test.py index e2b2eea650ee..c3531d83fdb7 100644 --- a/keras/layers/normalization/layer_normalization_test.py +++ b/keras/layers/normalization/layer_normalization_test.py @@ -14,335 +14,402 @@ # ============================================================================== """Tests for normalization layers.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras +from keras.layers.normalization import layer_normalization from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.layers.normalization import layer_normalization - - -def _run_layernorm_correctness_test(layer, dtype='float32'): - model = keras.models.Sequential() - model.add(keras.layers.Lambda(lambda x: tf.cast(x, dtype='float16'))) - norm = layer(input_shape=(2, 2, 2), dtype=dtype) - model.add(norm) - model.compile( - loss='mse', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) - - # centered on 5.0, variance 10.0 - x = (np.random.normal(loc=5.0, scale=10.0, size=(1000, 2, 2, 2)) - .astype(dtype)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= keras.backend.eval(norm.beta) - out /= keras.backend.eval(norm.gamma) - np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) - np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) - -class LayerNormalizationTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes - def test_basic_layernorm(self): - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={ - 'gamma_regularizer': keras.regularizers.l2(0.01), - 'beta_regularizer': keras.regularizers.l2(0.01) - }, - input_shape=(3, 4, 2)) - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={ - 'gamma_initializer': 'ones', - 'beta_initializer': 'ones', - }, - input_shape=(3, 4, 2)) - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={'scale': False, - 'center': False}, - input_shape=(3, 3)) - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={'axis': (-3, -2, -1)}, - input_shape=(2, 8, 8, 3)) - test_utils.layer_test( - keras.layers.LayerNormalization, - input_shape=(1, 0, 10)) - - @test_combinations.run_all_keras_modes - def test_non_fused_layernorm(self): - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={'axis': -2}, - input_shape=(3, 4, 2)) - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={'axis': (-3, -2)}, - input_shape=(2, 8, 8, 3)) - test_utils.layer_test( - keras.layers.LayerNormalization, - kwargs={'axis': (-3, -1)}, - input_shape=(2, 8, 8, 3)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_layernorm_weights(self): - layer = keras.layers.LayerNormalization(scale=False, center=False) - layer.build((None, 3, 4)) - self.assertEqual(len(layer.trainable_weights), 0) - self.assertEqual(len(layer.weights), 0) - - layer = keras.layers.LayerNormalization() - layer.build((None, 3, 4)) - self.assertEqual(len(layer.trainable_weights), 2) - self.assertEqual(len(layer.weights), 2) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_layernorm_regularization(self): - layer = keras.layers.LayerNormalization( - gamma_regularizer='l1', beta_regularizer='l1') - layer.build((None, 3, 4)) - self.assertEqual(len(layer.losses), 2) - max_norm = keras.constraints.max_norm - layer = keras.layers.LayerNormalization( - gamma_constraint=max_norm, beta_constraint=max_norm) - layer.build((None, 3, 4)) - self.assertEqual(layer.gamma.constraint, max_norm) - self.assertEqual(layer.beta.constraint, max_norm) - - @test_combinations.run_all_keras_modes - def test_layernorm_convnet_channel_last(self): +def _run_layernorm_correctness_test(layer, dtype="float32"): model = keras.models.Sequential() - norm = keras.layers.LayerNormalization(input_shape=(4, 4, 3)) + model.add(keras.layers.Lambda(lambda x: tf.cast(x, dtype="float16"))) + norm = layer(input_shape=(2, 2, 2), dtype=dtype) model.add(norm) model.compile( - loss='mse', + loss="mse", optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - run_eagerly=test_utils.should_run_eagerly()) + run_eagerly=test_utils.should_run_eagerly(), + ) # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 2, 2, 2)).astype( + dtype + ) model.fit(x, x, epochs=4, verbose=0) out = model.predict(x) - out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) - out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) - - np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) - np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) - - @test_combinations.run_all_keras_modes - def test_layernorm_ragged_tensor(self): - x = tf.ragged.constant( - [[[3., 1., 1.], [4., 1., 1.]], - [[5., 9., 1.]], - [[1., 2., 1.]]], - inner_shape=(3,)) - layer = keras.layers.LayerNormalization() - self.assertEqual(layer(x).shape, (3, None, 3)) - - @test_combinations.run_all_keras_modes - def test_layernorm_correctness(self): - _run_layernorm_correctness_test( - layer_normalization.LayerNormalization, dtype='float32') - - @test_combinations.run_all_keras_modes - def test_layernorm_mixed_precision(self): - _run_layernorm_correctness_test( - layer_normalization.LayerNormalization, dtype='float16') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testIncorrectAxisType(self): - with self.assertRaisesRegex(TypeError, - r'Expected an int or a list/tuple of ints'): - _ = layer_normalization.LayerNormalization(axis={'axis': -1}) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInvalidAxis(self): - with self.assertRaisesRegex( - ValueError, - r'Invalid value for `axis` argument. Expected 0 <= axis < inputs.rank'): - layer_norm = layer_normalization.LayerNormalization(axis=3) - layer_norm.build(input_shape=(2, 2, 2)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDuplicateAxis(self): - with self.assertRaisesRegex(ValueError, r'Duplicate axis:'): - layer_norm = layer_normalization.LayerNormalization(axis=[-1, -1]) - layer_norm.build(input_shape=(2, 2, 2)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testFusedAttr(self): - layer_norm = layer_normalization.LayerNormalization(axis=[-2, -1]) - layer_norm.build(input_shape=(2, 2, 2)) - self.assertEqual(layer_norm._fused, True) + out -= keras.backend.eval(norm.beta) + out /= keras.backend.eval(norm.gamma) + + np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) + np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) + + +class LayerNormalizationTest(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + def test_basic_layernorm(self): + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={ + "gamma_regularizer": keras.regularizers.l2(0.01), + "beta_regularizer": keras.regularizers.l2(0.01), + }, + input_shape=(3, 4, 2), + ) + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={ + "gamma_initializer": "ones", + "beta_initializer": "ones", + }, + input_shape=(3, 4, 2), + ) + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={"scale": False, "center": False}, + input_shape=(3, 3), + ) + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={"axis": (-3, -2, -1)}, + input_shape=(2, 8, 8, 3), + ) + test_utils.layer_test( + keras.layers.LayerNormalization, input_shape=(1, 0, 10) + ) + + @test_combinations.run_all_keras_modes + def test_non_fused_layernorm(self): + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={"axis": -2}, + input_shape=(3, 4, 2), + ) + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={"axis": (-3, -2)}, + input_shape=(2, 8, 8, 3), + ) + test_utils.layer_test( + keras.layers.LayerNormalization, + kwargs={"axis": (-3, -1)}, + input_shape=(2, 8, 8, 3), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_layernorm_weights(self): + layer = keras.layers.LayerNormalization(scale=False, center=False) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.weights), 0) + + layer = keras.layers.LayerNormalization() + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 2) + self.assertEqual(len(layer.weights), 2) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_layernorm_regularization(self): + layer = keras.layers.LayerNormalization( + gamma_regularizer="l1", beta_regularizer="l1" + ) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.losses), 2) + max_norm = keras.constraints.max_norm + layer = keras.layers.LayerNormalization( + gamma_constraint=max_norm, beta_constraint=max_norm + ) + layer.build((None, 3, 4)) + self.assertEqual(layer.gamma.constraint, max_norm) + self.assertEqual(layer.beta.constraint, max_norm) + + @test_combinations.run_all_keras_modes + def test_layernorm_convnet_channel_last(self): + model = keras.models.Sequential() + norm = keras.layers.LayerNormalization(input_shape=(4, 4, 3)) + model.add(norm) + model.compile( + loss="mse", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + run_eagerly=test_utils.should_run_eagerly(), + ) + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) + model.fit(x, x, epochs=4, verbose=0) + out = model.predict(x) + out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) + out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) + + np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) + np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) + + @test_combinations.run_all_keras_modes + def test_layernorm_ragged_tensor(self): + x = tf.ragged.constant( + [ + [[3.0, 1.0, 1.0], [4.0, 1.0, 1.0]], + [[5.0, 9.0, 1.0]], + [[1.0, 2.0, 1.0]], + ], + inner_shape=(3,), + ) + layer = keras.layers.LayerNormalization() + self.assertEqual(layer(x).shape, (3, None, 3)) + + @test_combinations.run_all_keras_modes + def test_layernorm_correctness(self): + _run_layernorm_correctness_test( + layer_normalization.LayerNormalization, dtype="float32" + ) + + @test_combinations.run_all_keras_modes + def test_layernorm_mixed_precision(self): + _run_layernorm_correctness_test( + layer_normalization.LayerNormalization, dtype="float16" + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testIncorrectAxisType(self): + with self.assertRaisesRegex( + TypeError, r"Expected an int or a list/tuple of ints" + ): + _ = layer_normalization.LayerNormalization(axis={"axis": -1}) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInvalidAxis(self): + with self.assertRaisesRegex( + ValueError, + r"Invalid value for `axis` argument. " + r"Expected 0 <= axis < inputs.rank", + ): + layer_norm = layer_normalization.LayerNormalization(axis=3) + layer_norm.build(input_shape=(2, 2, 2)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDuplicateAxis(self): + with self.assertRaisesRegex(ValueError, r"Duplicate axis:"): + layer_norm = layer_normalization.LayerNormalization(axis=[-1, -1]) + layer_norm.build(input_shape=(2, 2, 2)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testFusedAttr(self): + layer_norm = layer_normalization.LayerNormalization(axis=[-2, -1]) + layer_norm.build(input_shape=(2, 2, 2)) + self.assertEqual(layer_norm._fused, True) class LayerNormalizationNumericsTest(test_combinations.TestCase): - """Tests LayerNormalization has correct and numerically stable outputs.""" - - def _expected_layer_norm(self, x, beta, gamma, batch_input_shape, axis, - epsilon): - """Returns the layer norm, which is computed using NumPy.""" - broadcast_shape = [batch_input_shape[i] if i in axis else 1 - for i in range(len(batch_input_shape))] - mean = np.mean(x, axis=axis, keepdims=True) - var = np.var(x, axis=axis, keepdims=True) - expected = (x - mean) / np.sqrt(var + epsilon) - expected *= np.reshape(gamma, broadcast_shape) - expected += np.reshape(beta, broadcast_shape) - return expected - - def _test_forward_pass(self, batch_input_shape, axis, fp64_tol=1e-14, - fp32_tol=1e-6, fp16_tol=1e-2): - """Tests the forward pass of layer layer_normalization. - - Args: - batch_input_shape: The input shape that will be used to test, including - the batch dimension. - axis: A list of axes to normalize. Will be passed to the `axis` argument - of Layerlayer_normalization. - fp64_tol: The relative and absolute tolerance for float64. - fp32_tol: The relative and absolute tolerance for float32. - fp16_tol: The relative and absolute tolerance for float16. - """ - param_shape = [batch_input_shape[i] for i in axis] - param_elems = 1 - for dim in param_shape: - param_elems *= dim - beta = np.arange(param_elems, dtype='float64').reshape(param_shape) - gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape) - x = np.random.normal(size=batch_input_shape) - - for epsilon in 1e-12, 1e-3: - expected = self._expected_layer_norm(x, beta, gamma, batch_input_shape, - axis, epsilon) - for dtype in 'float64', 'float32', 'float16': - norm = layer_normalization.LayerNormalization( - axis=axis, dtype=dtype, batch_input_shape=batch_input_shape, - epsilon=epsilon, beta_initializer=keras.initializers.constant(beta), - gamma_initializer=keras.initializers.constant(gamma)) - y = norm(keras.backend.cast(x, dtype)) - actual = keras.backend.eval(y) - - if dtype == 'float64': - tol = fp64_tol - elif dtype == 'float32': - tol = fp32_tol - else: - assert dtype == 'float16' - tol = fp16_tol - - # We use absolute tolerances in addition to relative tolerances, because - # some of the values are very close to zero. - self.assertAllClose(expected, actual, rtol=tol, atol=tol) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_forward(self): - # For numeric stability, we ensure the axis's dimension(s) have at least 4 - # elements. - self._test_forward_pass((4, 3), (0,)) - self._test_forward_pass((3, 4), (1,)) - self._test_forward_pass((4, 3, 2), (0,)) - self._test_forward_pass((2, 4, 2), (1,)) - self._test_forward_pass((2, 3, 4), (2,), fp16_tol=5e-2) - self._test_forward_pass((2, 3, 2), (0, 2)) - self._test_forward_pass((2, 2, 2, 2), (1, 3)) - self._test_forward_pass((2, 2, 2, 2), (2, 3)) - self._test_forward_pass((2, 3, 4, 5), (3,)) - - def _test_backward_pass(self, batch_input_shape, axis, fp64_tol=1e-5, - fp32_tol=1e-5, fp16_tol=2e-2): - """Tests the backwards pass of layer layer_normalization. - - Args: - batch_input_shape: The input shape that will be used to test, including - the batch dimension. - axis: A list of axes to normalize. Will be passed to the `axis` argument - of Layerlayer_normalization. - fp64_tol: The relative and absolute tolerance for float64. - fp32_tol: The relative and absolute tolerance for float32. - fp16_tol: The relative and absolute tolerance for float16. - """ - param_shape = [batch_input_shape[i] for i in axis] - param_elems = 1 - for dim in param_shape: - param_elems *= dim - beta = np.arange(param_elems, dtype='float64').reshape(param_shape) - gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape) - x = np.random.normal(size=batch_input_shape) - - for epsilon in 1e-12, 1e-3: - # Float64 must come first in this list, as we use the float64 numerical - # gradients to compare to the float32 and float16 symbolic gradients as - # well. Computing float32/float16 numerical gradients is too numerically - # unstable. - for dtype in 'float64', 'float32', 'float16': - norm = layer_normalization.LayerNormalization( - axis=axis, dtype=dtype, batch_input_shape=batch_input_shape, - epsilon=epsilon, beta_initializer=keras.initializers.constant(beta), - gamma_initializer=keras.initializers.constant(gamma)) - norm.build(x.shape) - - # pylint: disable=cell-var-from-loop - def forward_fn(x, beta, gamma): - # We must monkey-patch the attributes of `norm` with the function - # arguments, so that the gradient checker will properly compute their - # gradients. The gradient checker computes gradients with respect to - # the input arguments of `f`. - with tf.compat.v1.test.mock.patch.object(norm, 'beta', beta): - with tf.compat.v1.test.mock.patch.object(norm, 'gamma', gamma): - return norm(x) - # pylint: enable=cell-var-from-loop - results = tf.test.compute_gradient( - forward_fn, [keras.backend.cast(x, dtype), norm.beta, norm.gamma]) - ([x_grad_t, beta_grad_t, gamma_grad_t], - [x_grad_n, beta_grad_n, gamma_grad_n]) = results - - if dtype == 'float64': - # We use the float64 numeric gradients as the reference, to compare - # against the symbolic gradients for all dtypes. - x_grad_ref = x_grad_n - beta_grad_ref = beta_grad_n - gamma_grad_ref = gamma_grad_n - tol = fp64_tol - elif dtype == 'float32': - tol = fp32_tol - else: - assert dtype == 'float16' - tol = fp16_tol - - # We use absolute tolerances in addition to relative tolerances, because - # some of the values are very close to zero. - self.assertAllClose(x_grad_t, x_grad_ref, rtol=tol, atol=tol) - self.assertAllClose(beta_grad_t, beta_grad_ref, rtol=tol, atol=tol) - self.assertAllClose(gamma_grad_t, gamma_grad_ref, rtol=tol, atol=tol) - - # The gradient_checker_v2 does not work properly with LayerNorm in graph mode. - @test_utils.run_v2_only - def test_backward(self): - # For numeric stability, we ensure the axis's dimension(s) have at least 4 - # elements. - self._test_backward_pass((4, 3), (0,)) - self._test_backward_pass((2, 4, 2), (1,)) - self._test_backward_pass((2, 3, 4), (2,)) - self._test_backward_pass((2, 3, 2), (0, 2), fp64_tol=5e-4, fp32_tol=5e-4) - self._test_backward_pass((2, 2, 2, 2), (1, 3)) - self._test_backward_pass((2, 2, 2, 2), (2, 3)) - - -if __name__ == '__main__': - tf.test.main() + """Tests LayerNormalization has correct and numerically stable outputs.""" + + def _expected_layer_norm( + self, x, beta, gamma, batch_input_shape, axis, epsilon + ): + """Returns the layer norm, which is computed using NumPy.""" + broadcast_shape = [ + batch_input_shape[i] if i in axis else 1 + for i in range(len(batch_input_shape)) + ] + mean = np.mean(x, axis=axis, keepdims=True) + var = np.var(x, axis=axis, keepdims=True) + expected = (x - mean) / np.sqrt(var + epsilon) + expected *= np.reshape(gamma, broadcast_shape) + expected += np.reshape(beta, broadcast_shape) + return expected + + def _test_forward_pass( + self, + batch_input_shape, + axis, + fp64_tol=1e-14, + fp32_tol=1e-6, + fp16_tol=1e-2, + ): + """Tests the forward pass of layer layer_normalization. + + Args: + batch_input_shape: The input shape that will be used to test, + including the batch dimension. + axis: A list of axes to normalize. Will be passed to the `axis` + argument of Layerlayer_normalization. + fp64_tol: The relative and absolute tolerance for float64. + fp32_tol: The relative and absolute tolerance for float32. + fp16_tol: The relative and absolute tolerance for float16. + """ + param_shape = [batch_input_shape[i] for i in axis] + param_elems = 1 + for dim in param_shape: + param_elems *= dim + beta = np.arange(param_elems, dtype="float64").reshape(param_shape) + gamma = np.arange(1, param_elems + 1, dtype="float64").reshape( + param_shape + ) + x = np.random.normal(size=batch_input_shape) + + for epsilon in 1e-12, 1e-3: + expected = self._expected_layer_norm( + x, beta, gamma, batch_input_shape, axis, epsilon + ) + for dtype in "float64", "float32", "float16": + norm = layer_normalization.LayerNormalization( + axis=axis, + dtype=dtype, + batch_input_shape=batch_input_shape, + epsilon=epsilon, + beta_initializer=keras.initializers.constant(beta), + gamma_initializer=keras.initializers.constant(gamma), + ) + y = norm(keras.backend.cast(x, dtype)) + actual = keras.backend.eval(y) + + if dtype == "float64": + tol = fp64_tol + elif dtype == "float32": + tol = fp32_tol + else: + assert dtype == "float16" + tol = fp16_tol + + # We use absolute tolerances in addition to relative tolerances, + # because some of the values are very close to zero. + self.assertAllClose(expected, actual, rtol=tol, atol=tol) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_forward(self): + # For numeric stability, we ensure the axis's dimension(s) have at least + # 4 elements. + self._test_forward_pass((4, 3), (0,)) + self._test_forward_pass((3, 4), (1,)) + self._test_forward_pass((4, 3, 2), (0,)) + self._test_forward_pass((2, 4, 2), (1,)) + self._test_forward_pass((2, 3, 4), (2,), fp16_tol=5e-2) + self._test_forward_pass((2, 3, 2), (0, 2)) + self._test_forward_pass((2, 2, 2, 2), (1, 3)) + self._test_forward_pass((2, 2, 2, 2), (2, 3)) + self._test_forward_pass((2, 3, 4, 5), (3,)) + + def _test_backward_pass( + self, + batch_input_shape, + axis, + fp64_tol=1e-5, + fp32_tol=1e-5, + fp16_tol=2e-2, + ): + """Tests the backwards pass of layer layer_normalization. + + Args: + batch_input_shape: The input shape that will be used to test, + including the batch dimension. + axis: A list of axes to normalize. Will be passed to the `axis` + argument of Layerlayer_normalization. + fp64_tol: The relative and absolute tolerance for float64. + fp32_tol: The relative and absolute tolerance for float32. + fp16_tol: The relative and absolute tolerance for float16. + """ + param_shape = [batch_input_shape[i] for i in axis] + param_elems = 1 + for dim in param_shape: + param_elems *= dim + beta = np.arange(param_elems, dtype="float64").reshape(param_shape) + gamma = np.arange(1, param_elems + 1, dtype="float64").reshape( + param_shape + ) + x = np.random.normal(size=batch_input_shape) + + for epsilon in 1e-12, 1e-3: + # Float64 must come first in this list, as we use the float64 + # numerical gradients to compare to the float32 and float16 symbolic + # gradients as well. Computing float32/float16 numerical gradients + # is too numerically unstable. + for dtype in "float64", "float32", "float16": + norm = layer_normalization.LayerNormalization( + axis=axis, + dtype=dtype, + batch_input_shape=batch_input_shape, + epsilon=epsilon, + beta_initializer=keras.initializers.constant(beta), + gamma_initializer=keras.initializers.constant(gamma), + ) + norm.build(x.shape) + + def forward_fn(x, beta, gamma): + # We must monkey-patch the attributes of `norm` with the + # function arguments, so that the gradient checker will + # properly compute their gradients. The gradient checker + # computes gradients with respect to the input arguments of + # `f`. + with tf.compat.v1.test.mock.patch.object( + norm, "beta", beta + ): + with tf.compat.v1.test.mock.patch.object( + norm, "gamma", gamma + ): + return norm(x) + + results = tf.test.compute_gradient( + forward_fn, + [keras.backend.cast(x, dtype), norm.beta, norm.gamma], + ) + ( + [x_grad_t, beta_grad_t, gamma_grad_t], + [x_grad_n, beta_grad_n, gamma_grad_n], + ) = results + + if dtype == "float64": + # We use the float64 numeric gradients as the reference, to + # compare against the symbolic gradients for all dtypes. + x_grad_ref = x_grad_n + beta_grad_ref = beta_grad_n + gamma_grad_ref = gamma_grad_n + tol = fp64_tol + elif dtype == "float32": + tol = fp32_tol + else: + assert dtype == "float16" + tol = fp16_tol + + # We use absolute tolerances in addition to relative tolerances, + # because some of the values are very close to zero. + self.assertAllClose(x_grad_t, x_grad_ref, rtol=tol, atol=tol) + self.assertAllClose( + beta_grad_t, beta_grad_ref, rtol=tol, atol=tol + ) + self.assertAllClose( + gamma_grad_t, gamma_grad_ref, rtol=tol, atol=tol + ) + + # The gradient_checker_v2 does not work properly with LayerNorm in graph + # mode. + @test_utils.run_v2_only + def test_backward(self): + # For numeric stability, we ensure the axis's dimension(s) have at least + # 4 elements. + self._test_backward_pass((4, 3), (0,)) + self._test_backward_pass((2, 4, 2), (1,)) + self._test_backward_pass((2, 3, 4), (2,)) + self._test_backward_pass( + (2, 3, 2), (0, 2), fp64_tol=5e-4, fp32_tol=5e-4 + ) + self._test_backward_pass((2, 2, 2, 2), (1, 3)) + self._test_backward_pass((2, 2, 2, 2), (2, 3)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/normalization/spectral_normalization.py b/keras/layers/normalization/spectral_normalization.py new file mode 100644 index 000000000000..c958cd4a79ac --- /dev/null +++ b/keras/layers/normalization/spectral_normalization.py @@ -0,0 +1,141 @@ +# Copyright 2023 The Keras Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import tensorflow.compat.v2 as tf + +from keras.initializers import TruncatedNormal +from keras.layers.rnn import Wrapper + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +# Adapted from TF-Addons implementation +@keras_export("keras.layers.SpectralNormalization", v1=[]) +class SpectralNormalization(Wrapper): + """Performs spectral normalization on the weights of a target layer. + + This wrapper controls the Lipschitz constant of the weights of a layer by + constraining their spectral norm, which can stabilize the training of GANs. + + Args: + layer: A `keras.layers.Layer` instance that + has either a `kernel` (e.g. `Conv2D`, `Dense`...) + or an `embeddings` attribute (`Embedding` layer). + power_iterations: int, the number of iterations during normalization. + + Examples: + + Wrap `keras.layers.Conv2D`: + >>> x = np.random.rand(1, 10, 10, 1) + >>> conv2d = SpectralNormalization(tf.keras.layers.Conv2D(2, 2)) + >>> y = conv2d(x) + >>> y.shape + TensorShape([1, 9, 9, 2]) + + Wrap `keras.layers.Dense`: + >>> x = np.random.rand(1, 10, 10, 1) + >>> dense = SpectralNormalization(tf.keras.layers.Dense(10)) + >>> y = dense(x) + >>> y.shape + TensorShape([1, 10, 10, 10]) + + Reference: + + - [Spectral Normalization for GAN](https://arxiv.org/abs/1802.05957). + """ + + def __init__(self, layer, power_iterations=1, **kwargs): + super().__init__(layer, **kwargs) + if power_iterations <= 0: + raise ValueError( + "`power_iterations` should be greater than zero. Received: " + f"`power_iterations={power_iterations}`" + ) + self.power_iterations = power_iterations + + def build(self, input_shape): + super().build(input_shape) + input_shape = tf.TensorShape(input_shape) + self.input_spec = tf.keras.layers.InputSpec( + shape=[None] + input_shape[1:] + ) + + if hasattr(self.layer, "kernel"): + self.kernel = self.layer.kernel + elif hasattr(self.layer, "embeddings"): + self.kernel = self.layer.embeddings + else: + raise ValueError( + f"{type(self.layer).__name__} object has no attribute 'kernel' " + "nor 'embeddings'" + ) + + self.kernel_shape = self.kernel.shape.as_list() + + self.vector_u = self.add_weight( + shape=(1, self.kernel_shape[-1]), + initializer=TruncatedNormal(stddev=0.02), + trainable=False, + name="vector_u", + dtype=self.kernel.dtype, + ) + + def call(self, inputs, training=False): + if training: + self.normalize_weights() + + output = self.layer(inputs) + return output + + def compute_output_shape(self, input_shape): + return tf.TensorShape( + self.layer.compute_output_shape(input_shape).as_list() + ) + + def normalize_weights(self): + """Generate spectral normalized weights. + + This method will update the value of `self.kernel` with the + spectral normalized value, so that the layer is ready for `call()`. + """ + + weights = tf.reshape(self.kernel, [-1, self.kernel_shape[-1]]) + vector_u = self.vector_u + + # check for zeroes weights + if not tf.reduce_all(tf.equal(weights, 0.0)): + for _ in range(self.power_iterations): + vector_v = tf.math.l2_normalize( + tf.matmul(vector_u, weights, transpose_b=True) + ) + vector_u = tf.math.l2_normalize(tf.matmul(vector_v, weights)) + vector_u = tf.stop_gradient(vector_u) + vector_v = tf.stop_gradient(vector_v) + sigma = tf.matmul( + tf.matmul(vector_v, weights), vector_u, transpose_b=True + ) + self.vector_u.assign(tf.cast(vector_u, self.vector_u.dtype)) + self.kernel.assign( + tf.cast( + tf.reshape(self.kernel / sigma, self.kernel_shape), + self.kernel.dtype, + ) + ) + + def get_config(self): + config = {"power_iterations": self.power_iterations} + base_config = super().get_config() + return {**base_config, **config} diff --git a/keras/layers/normalization/spectral_normalization_test.py b/keras/layers/normalization/spectral_normalization_test.py new file mode 100644 index 000000000000..555850291af3 --- /dev/null +++ b/keras/layers/normalization/spectral_normalization_test.py @@ -0,0 +1,184 @@ +# Copyright 2023 The Keras Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import tensorflow as tf +from absl.testing import parameterized + +import keras +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +class SpectralNormalizationTest(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + def test_basic_spectralnorm(self): + test_utils.layer_test( + keras.layers.SpectralNormalization, + kwargs={"layer": keras.layers.Dense(2), "input_shape": (3, 4)}, + input_data=tf.random.uniform((10, 3, 4)), + ) + + @test_combinations.run_all_keras_modes + def test_from_to_config(self): + base_layer = keras.layers.Dense(1) + sn = keras.layers.SpectralNormalization(base_layer) + config = sn.get_config() + + new_sn = keras.layers.SpectralNormalization.from_config(config) + self.assertEqual(sn.power_iterations, new_sn.power_iterations) + + @test_combinations.run_all_keras_modes + def test_save_load_model(self): + base_layer = keras.layers.Dense(1) + input_shape = [1] + + inputs = keras.layers.Input(shape=input_shape) + sn_layer = keras.layers.SpectralNormalization(base_layer) + model = keras.models.Sequential(layers=[inputs, sn_layer]) + + # initialize model + model.predict(tf.random.uniform((2, 1))) + + with self.subTest("h5"): + model.save("test.h5") + new_model = keras.models.load_model("test.h5") + + self.assertEqual( + model.layers[0].get_config(), new_model.layers[0].get_config() + ) + with self.subTest("savedmodel"): + model.save("test") + new_model = keras.models.load_model("test") + + self.assertEqual( + model.layers[0].get_config(), new_model.layers[0].get_config() + ) + with self.subTest("keras_v3"): + model.save("test.keras") + new_model = keras.models.load_model("test.keras") + + self.assertEqual( + model.layers[0].get_config(), new_model.layers[0].get_config() + ) + + @test_combinations.run_all_keras_modes + def test_normalization(self): + inputs = keras.layers.Input(shape=[2, 2, 1]) + + base_layer = keras.layers.Conv2D( + 1, (2, 2), kernel_initializer=tf.constant_initializer(value=2) + ) + sn_layer = keras.layers.SpectralNormalization(base_layer) + model = keras.models.Sequential(layers=[inputs, sn_layer]) + + weights = tf.squeeze(model.layers[0].w.numpy()) + # This wrapper normalizes weights by the maximum eigen value + eigen_val, _ = tf.linalg.eig(weights) + weights_normalized = weights / tf.reduce_max(eigen_val) + + for training in [False, True]: + _ = model( + tf.constant(tf.ones((1, 2, 2, 1), dtype=tf.float32)), + training=training, + ) + if training: + w = weights_normalized + else: + w = weights + self.assertAllClose(w, tf.squeeze(model.layers[0].w.numpy())) + + @test_combinations.run_all_keras_modes + def test_apply_layer(self): + images = tf.ones((1, 2, 2, 1)) + sn_wrapper = keras.layers.SpectralNormalization( + keras.layers.Conv2D( + 1, [2, 2], kernel_initializer=tf.constant_initializer(value=1) + ), + input_shape=(2, 2, 1), + ) + + result = sn_wrapper(images, training=False) + result_train = sn_wrapper(images, training=True) + expected_output = tf.constant([[[[4.0]]]], dtype=tf.float32) + + self.assertAllClose(result, expected_output) + # max eigen value of 2x2 matrix of ones is 2 + self.assertAllClose(result_train, expected_output / 2) + self.assertTrue(hasattr(sn_wrapper, "u")) + + @test_combinations.run_all_keras_modes + def test_no_layer(self): + images = tf.random.uniform((2, 4, 43)) + with self.assertRaises(AssertionError): + keras.layers.SpectralNormalization(images) + + @test_combinations.run_all_keras_modes + def test_no_kernel(self): + with self.assertRaises(AttributeError): + keras.layers.SpectralNormalization( + keras.layers.MaxPooling2D(2, 2) + ).build((2, 2)) + + @parameterized.parameters( + [ + (lambda: keras.layers.Dense(2), [3, 2]), + ( + lambda: keras.layers.Conv2D(3, (2, 2), padding="same"), + [4, 4, 3], + ), + (lambda: keras.layers.Embedding(2, 10), [2]), + ], + ) + @test_combinations.run_all_keras_modes + def test_model_build(self, base_layer_fn, input_shape): + inputs = keras.layers.Input(shape=input_shape) + base_layer = base_layer_fn() + sn_layer = keras.layers.SpectralNormalization(base_layer) + model = keras.models.Sequential(layers=[inputs, sn_layer]) + model.build() + self.assertTrue(hasattr(model.layers[0], "vector_u")) + + @parameterized.parameters( + [ + (lambda: keras.layers.Dense(2), [3, 2], [3, 2]), + ( + lambda: keras.layers.Conv2D(3, (2, 2), padding="same"), + [4, 4, 3], + [4, 4, 3], + ), + (lambda: keras.layers.Embedding(2, 10), [2], [2, 10]), + ], + ) + @test_combinations.run_all_keras_modes + def test_model_fit(self, base_layer_fn, input_shape, output_shape): + inputs = keras.layers.Input(shape=input_shape) + base_layer = base_layer_fn() + + sn_layer = keras.layers.SpectralNormalization(base_layer) + model = keras.models.Sequential(layers=[inputs, sn_layer]) + model.add(keras.layers.Activation("relu")) + + model.compile( + optimizer=keras.optimizers.RMSprop(learning_rate=0.001), + loss="mse", + ) + model.fit( + tf.random.uniform((2, *input_shape)), + tf.random.uniform((2, *output_shape)), + epochs=3, + batch_size=10, + verbose=0, + ) + self.assertTrue(hasattr(model.layers[0], "vector_u")) diff --git a/keras/layers/normalization/unit_normalization.py b/keras/layers/normalization/unit_normalization.py index f8f7cd1421f2..eb1746fdde15 100644 --- a/keras/layers/normalization/unit_normalization.py +++ b/keras/layers/normalization/unit_normalization.py @@ -13,65 +13,63 @@ # limitations under the License. # ============================================================================== """Unit Normalization layer.""" -# pylint: disable=g-bad-import-order -# pylint: disable=g-classes-have-attributes import tensorflow.compat.v2 as tf from keras.engine import base_layer from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.UnitNormalization', v1=[]) +@keras_export("keras.layers.UnitNormalization", v1=[]) class UnitNormalization(base_layer.Layer): - """Unit normalization layer. + """Unit normalization layer. - Normalize a batch of inputs so that each input in the batch has a L2 norm - equal to 1 (across the axes specified in `axis`). + Normalize a batch of inputs so that each input in the batch has a L2 norm + equal to 1 (across the axes specified in `axis`). - Example: + Example: - >>> data = tf.constant(np.arange(6).reshape(2, 3), dtype=tf.float32) - >>> normalized_data = tf.keras.layers.UnitNormalization()(data) - >>> print(tf.reduce_sum(normalized_data[0, :] ** 2).numpy()) - 1.0 + >>> data = tf.constant(np.arange(6).reshape(2, 3), dtype=tf.float32) + >>> normalized_data = tf.keras.layers.UnitNormalization()(data) + >>> print(tf.reduce_sum(normalized_data[0, :] ** 2).numpy()) + 1.0 - Args: - axis: Integer or list/tuple. The axis or axes to normalize across. Typically - this is the features axis or axes. The left-out axes are typically the - batch axis or axes. Defaults to `-1`, the last dimension in - the input. - """ + Args: + axis: Integer or list/tuple. The axis or axes to normalize across. + Typically, this is the features axis or axes. The left-out axes are + typically the batch axis or axes. `-1` is the last dimension + in the input. Defaults to `-1`. + """ - def __init__(self, - axis=-1, - **kwargs): - super().__init__(**kwargs) - if isinstance(axis, (list, tuple)): - self.axis = list(axis) - elif isinstance(axis, int): - self.axis = axis - else: - raise TypeError( - 'Invalid value for `axis` argument: ' - 'expected an int or a list/tuple of ints. ' - f'Received: axis={axis}') - self.supports_masking = True + def __init__(self, axis=-1, **kwargs): + super().__init__(**kwargs) + if isinstance(axis, (list, tuple)): + self.axis = list(axis) + elif isinstance(axis, int): + self.axis = axis + else: + raise TypeError( + "Invalid value for `axis` argument: " + "expected an int or a list/tuple of ints. " + f"Received: axis={axis}" + ) + self.supports_masking = True - def build(self, input_shape): - self.axis = tf_utils.validate_axis(self.axis, input_shape) + def build(self, input_shape): + self.axis = tf_utils.validate_axis(self.axis, input_shape) - def call(self, inputs): - inputs = tf.cast(inputs, self.compute_dtype) - return tf.linalg.l2_normalize(inputs, axis=self.axis) + def call(self, inputs): + inputs = tf.cast(inputs, self.compute_dtype) + return tf.linalg.l2_normalize(inputs, axis=self.axis) - def compute_output_shape(self, input_shape): - return input_shape + def compute_output_shape(self, input_shape): + return input_shape - def get_config(self): - config = super().get_config() - config.update({'axis': self.axis}) - return config + def get_config(self): + config = super().get_config() + config.update({"axis": self.axis}) + return config diff --git a/keras/layers/normalization/unit_normalization_test.py b/keras/layers/normalization/unit_normalization_test.py index 4edc375e1280..386d5a043d03 100644 --- a/keras/layers/normalization/unit_normalization_test.py +++ b/keras/layers/normalization/unit_normalization_test.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Tests for Unit Normalization layer.""" -# pylint: disable=g-bad-import-order + import tensorflow.compat.v2 as tf @@ -23,56 +23,57 @@ def squared_l2_norm(x): - return tf.reduce_sum(x ** 2) + return tf.reduce_sum(x**2) @test_utils.run_v2_only class UnitNormalizationTest(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + def test_basics(self): + test_utils.layer_test( + keras.layers.UnitNormalization, + kwargs={"axis": -1}, + input_shape=(2, 3), + ) + test_utils.layer_test( + keras.layers.UnitNormalization, + kwargs={"axis": (1, 2)}, + input_shape=(1, 3, 3), + ) - @test_combinations.run_all_keras_modes - def test_basics(self): - test_utils.layer_test( - keras.layers.UnitNormalization, - kwargs={'axis': -1}, - input_shape=(2, 3)) - test_utils.layer_test( - keras.layers.UnitNormalization, - kwargs={'axis': (1, 2)}, - input_shape=(1, 3, 3)) - - def test_correctness(self): - layer = keras.layers.UnitNormalization(axis=-1) - inputs = tf.random.normal(shape=(2, 3)) - outputs = layer(inputs).numpy() - self.assertAllClose(squared_l2_norm(outputs[0, :]), 1.) - self.assertAllClose(squared_l2_norm(outputs[1, :]), 1.) + def test_correctness(self): + layer = keras.layers.UnitNormalization(axis=-1) + inputs = tf.random.normal(shape=(2, 3)) + outputs = layer(inputs).numpy() + self.assertAllClose(squared_l2_norm(outputs[0, :]), 1.0) + self.assertAllClose(squared_l2_norm(outputs[1, :]), 1.0) - layer = keras.layers.UnitNormalization(axis=(1, 2)) - inputs = tf.random.normal(shape=(2, 3, 3)) - outputs = layer(inputs).numpy() - self.assertAllClose(squared_l2_norm(outputs[0, :, :]), 1.) - self.assertAllClose(squared_l2_norm(outputs[1, :, :]), 1.) + layer = keras.layers.UnitNormalization(axis=(1, 2)) + inputs = tf.random.normal(shape=(2, 3, 3)) + outputs = layer(inputs).numpy() + self.assertAllClose(squared_l2_norm(outputs[0, :, :]), 1.0) + self.assertAllClose(squared_l2_norm(outputs[1, :, :]), 1.0) - layer = keras.layers.UnitNormalization(axis=1) - inputs = tf.random.normal(shape=(2, 3, 2)) - outputs = layer(inputs).numpy() - self.assertAllClose(squared_l2_norm(outputs[0, :, 0]), 1.) - self.assertAllClose(squared_l2_norm(outputs[1, :, 0]), 1.) - self.assertAllClose(squared_l2_norm(outputs[0, :, 1]), 1.) - self.assertAllClose(squared_l2_norm(outputs[1, :, 1]), 1.) + layer = keras.layers.UnitNormalization(axis=1) + inputs = tf.random.normal(shape=(2, 3, 2)) + outputs = layer(inputs).numpy() + self.assertAllClose(squared_l2_norm(outputs[0, :, 0]), 1.0) + self.assertAllClose(squared_l2_norm(outputs[1, :, 0]), 1.0) + self.assertAllClose(squared_l2_norm(outputs[0, :, 1]), 1.0) + self.assertAllClose(squared_l2_norm(outputs[1, :, 1]), 1.0) - def testInvalidAxis(self): - with self.assertRaisesRegex( - TypeError, - r'Invalid value for `axis` argument'): - layer = keras.layers.UnitNormalization(axis=None) + def testInvalidAxis(self): + with self.assertRaisesRegex( + TypeError, r"Invalid value for `axis` argument" + ): + layer = keras.layers.UnitNormalization(axis=None) - with self.assertRaisesRegex( - ValueError, - r'Invalid value for `axis` argument'): - layer = keras.layers.UnitNormalization(axis=3) - layer.build(input_shape=(2, 2, 2)) + with self.assertRaisesRegex( + ValueError, r"Invalid value for `axis` argument" + ): + layer = keras.layers.UnitNormalization(axis=3) + layer.build(input_shape=(2, 2, 2)) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/pooling/BUILD b/keras/layers/pooling/BUILD index 7aac954fe715..d622f7138420 100644 --- a/keras/layers/pooling/BUILD +++ b/keras/layers/pooling/BUILD @@ -1,15 +1,17 @@ # Description: # Contains the Keras pooling layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/py/tensorflow_gnn:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], diff --git a/keras/layers/pooling/__init__.py b/keras/layers/pooling/__init__.py index f69751662192..d70383f39eb2 100644 --- a/keras/layers/pooling/__init__.py +++ b/keras/layers/pooling/__init__.py @@ -13,32 +13,31 @@ # limitations under the License. # ============================================================================== """Keras Pooling layers.""" -# pylint: disable=g-bad-import-order + +# Pooling layer aliases. # Pooling layers. from keras.layers.pooling.average_pooling1d import AveragePooling1D +from keras.layers.pooling.average_pooling1d import AvgPool1D from keras.layers.pooling.average_pooling2d import AveragePooling2D +from keras.layers.pooling.average_pooling2d import AvgPool2D from keras.layers.pooling.average_pooling3d import AveragePooling3D -from keras.layers.pooling.max_pooling1d import MaxPooling1D -from keras.layers.pooling.max_pooling2d import MaxPooling2D -from keras.layers.pooling.max_pooling3d import MaxPooling3D +from keras.layers.pooling.average_pooling3d import AvgPool3D from keras.layers.pooling.global_average_pooling1d import GlobalAveragePooling1D +from keras.layers.pooling.global_average_pooling1d import GlobalAvgPool1D from keras.layers.pooling.global_average_pooling2d import GlobalAveragePooling2D +from keras.layers.pooling.global_average_pooling2d import GlobalAvgPool2D from keras.layers.pooling.global_average_pooling3d import GlobalAveragePooling3D +from keras.layers.pooling.global_average_pooling3d import GlobalAvgPool3D +from keras.layers.pooling.global_max_pooling1d import GlobalMaxPool1D from keras.layers.pooling.global_max_pooling1d import GlobalMaxPooling1D +from keras.layers.pooling.global_max_pooling2d import GlobalMaxPool2D from keras.layers.pooling.global_max_pooling2d import GlobalMaxPooling2D +from keras.layers.pooling.global_max_pooling3d import GlobalMaxPool3D from keras.layers.pooling.global_max_pooling3d import GlobalMaxPooling3D - -# Pooling layer aliases. -from keras.layers.pooling.average_pooling1d import AvgPool1D -from keras.layers.pooling.average_pooling2d import AvgPool2D -from keras.layers.pooling.average_pooling3d import AvgPool3D from keras.layers.pooling.max_pooling1d import MaxPool1D +from keras.layers.pooling.max_pooling1d import MaxPooling1D from keras.layers.pooling.max_pooling2d import MaxPool2D +from keras.layers.pooling.max_pooling2d import MaxPooling2D from keras.layers.pooling.max_pooling3d import MaxPool3D -from keras.layers.pooling.global_average_pooling1d import GlobalAvgPool1D -from keras.layers.pooling.global_average_pooling2d import GlobalAvgPool2D -from keras.layers.pooling.global_average_pooling3d import GlobalAvgPool3D -from keras.layers.pooling.global_max_pooling1d import GlobalMaxPool1D -from keras.layers.pooling.global_max_pooling2d import GlobalMaxPool2D -from keras.layers.pooling.global_max_pooling3d import GlobalMaxPool3D +from keras.layers.pooling.max_pooling3d import MaxPooling3D diff --git a/keras/layers/pooling/average_pooling1d.py b/keras/layers/pooling/average_pooling1d.py index 7c4a762d62ba..a4b3a9c6d22c 100644 --- a/keras/layers/pooling/average_pooling1d.py +++ b/keras/layers/pooling/average_pooling1d.py @@ -13,126 +13,134 @@ # limitations under the License. # ============================================================================== """Average pooling 1D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import functools from keras import backend from keras.layers.pooling.base_pooling1d import Pooling1D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.AveragePooling1D', 'keras.layers.AvgPool1D') +@keras_export("keras.layers.AveragePooling1D", "keras.layers.AvgPool1D") class AveragePooling1D(Pooling1D): - """Average pooling for temporal data. - - Downsamples the input representation by taking the average value over the - window defined by `pool_size`. The window is shifted by `strides`. The - resulting output when using "valid" padding option has a shape of: - `output_shape = (input_shape - pool_size + 1) / strides)` - - The resulting output shape when using the "same" padding option is: - `output_shape = input_shape / strides` - - For example, for strides=1 and padding="valid": - - >>> x = tf.constant([1., 2., 3., 4., 5.]) - >>> x = tf.reshape(x, [1, 5, 1]) - >>> x - - >>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, - ... strides=1, padding='valid') - >>> avg_pool_1d(x) - - - For example, for strides=2 and padding="valid": - - >>> x = tf.constant([1., 2., 3., 4., 5.]) - >>> x = tf.reshape(x, [1, 5, 1]) - >>> x - - >>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, - ... strides=2, padding='valid') - >>> avg_pool_1d(x) - - - For example, for strides=1 and padding="same": - - >>> x = tf.constant([1., 2., 3., 4., 5.]) - >>> x = tf.reshape(x, [1, 5, 1]) - >>> x - - >>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, - ... strides=1, padding='same') - >>> avg_pool_1d(x) - - - Args: - pool_size: Integer, size of the average pooling windows. - strides: Integer, or None. Factor by which to downscale. - E.g. 2 will halve the input. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - - Input shape: - - If `data_format='channels_last'`: - 3D tensor with shape `(batch_size, steps, features)`. - - If `data_format='channels_first'`: - 3D tensor with shape `(batch_size, features, steps)`. - - Output shape: - - If `data_format='channels_last'`: - 3D tensor with shape `(batch_size, downsampled_steps, features)`. - - If `data_format='channels_first'`: - 3D tensor with shape `(batch_size, features, downsampled_steps)`. - """ - - def __init__(self, pool_size=2, strides=None, - padding='valid', data_format='channels_last', **kwargs): - super().__init__( - functools.partial(backend.pool2d, pool_mode='avg'), - pool_size=pool_size, - strides=strides, - padding=padding, - data_format=data_format, - **kwargs) + """Average pooling for temporal data. + + Downsamples the input representation by taking the average value over the + window defined by `pool_size`. The window is shifted by `strides`. The + resulting output when using "valid" padding option has a shape of: + `output_shape = (input_shape - pool_size + 1) / strides)` + + The resulting output shape when using the "same" padding option is: + `output_shape = input_shape / strides` + + For example, for strides=1 and padding="valid": + + >>> x = tf.constant([1., 2., 3., 4., 5.]) + >>> x = tf.reshape(x, [1, 5, 1]) + >>> x + + >>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, + ... strides=1, padding='valid') + >>> avg_pool_1d(x) + + + For example, for strides=2 and padding="valid": + + >>> x = tf.constant([1., 2., 3., 4., 5.]) + >>> x = tf.reshape(x, [1, 5, 1]) + >>> x + + >>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, + ... strides=2, padding='valid') + >>> avg_pool_1d(x) + + + For example, for strides=1 and padding="same": + + >>> x = tf.constant([1., 2., 3., 4., 5.]) + >>> x = tf.reshape(x, [1, 5, 1]) + >>> x + + >>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, + ... strides=1, padding='same') + >>> avg_pool_1d(x) + + + Args: + pool_size: Integer, size of the average pooling windows. + strides: Integer, or None. Factor by which to downscale. + E.g. 2 will halve the input. + If None, it will default to `pool_size`. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + + Input shape: + - If `data_format='channels_last'`: + 3D tensor with shape `(batch_size, steps, features)`. + - If `data_format='channels_first'`: + 3D tensor with shape `(batch_size, features, steps)`. + + Output shape: + - If `data_format='channels_last'`: + 3D tensor with shape `(batch_size, downsampled_steps, features)`. + - If `data_format='channels_first'`: + 3D tensor with shape `(batch_size, features, downsampled_steps)`. + """ + + def __init__( + self, + pool_size=2, + strides=None, + padding="valid", + data_format="channels_last", + **kwargs + ): + super().__init__( + functools.partial(backend.pool2d, pool_mode="avg"), + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs + ) # Alias diff --git a/keras/layers/pooling/average_pooling2d.py b/keras/layers/pooling/average_pooling2d.py index 9c8375cdf8ca..662ec99016e6 100644 --- a/keras/layers/pooling/average_pooling2d.py +++ b/keras/layers/pooling/average_pooling2d.py @@ -13,126 +13,135 @@ # limitations under the License. # ============================================================================== """Average pooling 2D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.layers.pooling.base_pooling2d import Pooling2D + import tensorflow.compat.v2 as tf +from keras.layers.pooling.base_pooling2d import Pooling2D + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.AveragePooling2D', 'keras.layers.AvgPool2D') +@keras_export("keras.layers.AveragePooling2D", "keras.layers.AvgPool2D") class AveragePooling2D(Pooling2D): - """Average pooling operation for spatial data. - - Downsamples the input along its spatial dimensions (height and width) - by taking the average value over an input window - (of size defined by `pool_size`) for each channel of the input. - The window is shifted by `strides` along each dimension. - - The resulting output when using `"valid"` padding option has a shape - (number of rows or columns) of: - `output_shape = math.floor((input_shape - pool_size) / strides) + 1` - (when `input_shape >= pool_size`) - - The resulting output shape when using the `"same"` padding option is: - `output_shape = math.floor((input_shape - 1) / strides) + 1` - - For example, for `strides=(1, 1)` and `padding="valid"`: - - >>> x = tf.constant([[1., 2., 3.], - ... [4., 5., 6.], - ... [7., 8., 9.]]) - >>> x = tf.reshape(x, [1, 3, 3, 1]) - >>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), - ... strides=(1, 1), padding='valid') - >>> avg_pool_2d(x) - - - For example, for `stride=(2, 2)` and `padding="valid"`: - - >>> x = tf.constant([[1., 2., 3., 4.], - ... [5., 6., 7., 8.], - ... [9., 10., 11., 12.]]) - >>> x = tf.reshape(x, [1, 3, 4, 1]) - >>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), - ... strides=(2, 2), padding='valid') - >>> avg_pool_2d(x) - - - For example, for `strides=(1, 1)` and `padding="same"`: - - >>> x = tf.constant([[1., 2., 3.], - ... [4., 5., 6.], - ... [7., 8., 9.]]) - >>> x = tf.reshape(x, [1, 3, 3, 1]) - >>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), - ... strides=(1, 1), padding='same') - >>> avg_pool_2d(x) - - - Args: - pool_size: integer or tuple of 2 integers, - factors by which to downscale (vertical, horizontal). - `(2, 2)` will halve the input in both spatial dimension. - If only one integer is specified, the same window length - will be used for both dimensions. - strides: Integer, tuple of 2 integers, or None. - Strides values. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, rows, cols, channels)`. - - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, rows, cols)`. - - Output shape: - - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`. - - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`. - """ - - def __init__(self, - pool_size=(2, 2), - strides=None, - padding='valid', - data_format=None, - **kwargs): - super().__init__( - tf.nn.avg_pool, - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, **kwargs) + """Average pooling operation for spatial data. + + Downsamples the input along its spatial dimensions (height and width) + by taking the average value over an input window + (of size defined by `pool_size`) for each channel of the input. + The window is shifted by `strides` along each dimension. + + The resulting output when using `"valid"` padding option has a shape + (number of rows or columns) of: + `output_shape = math.floor((input_shape - pool_size) / strides) + 1` + (when `input_shape >= pool_size`) + + The resulting output shape when using the `"same"` padding option is: + `output_shape = math.floor((input_shape - 1) / strides) + 1` + + For example, for `strides=(1, 1)` and `padding="valid"`: + + >>> x = tf.constant([[1., 2., 3.], + ... [4., 5., 6.], + ... [7., 8., 9.]]) + >>> x = tf.reshape(x, [1, 3, 3, 1]) + >>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), + ... strides=(1, 1), padding='valid') + >>> avg_pool_2d(x) + + + For example, for `stride=(2, 2)` and `padding="valid"`: + + >>> x = tf.constant([[1., 2., 3., 4.], + ... [5., 6., 7., 8.], + ... [9., 10., 11., 12.]]) + >>> x = tf.reshape(x, [1, 3, 4, 1]) + >>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), + ... strides=(2, 2), padding='valid') + >>> avg_pool_2d(x) + + + For example, for `strides=(1, 1)` and `padding="same"`: + + >>> x = tf.constant([[1., 2., 3.], + ... [4., 5., 6.], + ... [7., 8., 9.]]) + >>> x = tf.reshape(x, [1, 3, 3, 1]) + >>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), + ... strides=(1, 1), padding='same') + >>> avg_pool_2d(x) + + + Args: + pool_size: integer or tuple of 2 integers, + factors by which to downscale (vertical, horizontal). + `(2, 2)` will halve the input in both spatial dimension. + If only one integer is specified, the same window length + will be used for both dimensions. + strides: Integer, tuple of 2 integers, or None. + Strides values. + If None, it will default to `pool_size`. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, height, width)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + - If `data_format='channels_last'`: + 4D tensor with shape `(batch_size, rows, cols, channels)`. + - If `data_format='channels_first'`: + 4D tensor with shape `(batch_size, channels, rows, cols)`. + + Output shape: + - If `data_format='channels_last'`: + 4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`. + - If `data_format='channels_first'`: + 4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`. + """ + + def __init__( + self, + pool_size=(2, 2), + strides=None, + padding="valid", + data_format=None, + **kwargs + ): + super().__init__( + tf.nn.avg_pool, + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs + ) # Alias diff --git a/keras/layers/pooling/average_pooling3d.py b/keras/layers/pooling/average_pooling3d.py index 56b7d4a9d585..9d1177e6c68d 100644 --- a/keras/layers/pooling/average_pooling3d.py +++ b/keras/layers/pooling/average_pooling3d.py @@ -13,83 +13,92 @@ # limitations under the License. # ============================================================================== """Average pooling 3D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.layers.pooling.base_pooling3d import Pooling3D + import tensorflow.compat.v2 as tf +from keras.layers.pooling.base_pooling3d import Pooling3D + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.AveragePooling3D', 'keras.layers.AvgPool3D') +@keras_export("keras.layers.AveragePooling3D", "keras.layers.AvgPool3D") class AveragePooling3D(Pooling3D): - """Average pooling operation for 3D data (spatial or spatio-temporal). - - Downsamples the input along its spatial dimensions (depth, height, and width) - by taking the average value over an input window - (of size defined by `pool_size`) for each channel of the input. - The window is shifted by `strides` along each dimension. - - Args: - pool_size: tuple of 3 integers, - factors by which to downscale (dim1, dim2, dim3). - `(2, 2, 2)` will halve the size of the 3D input in each dimension. - strides: tuple of 3 integers, or None. Strides values. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` - - Output shape: - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` - - Example: - - ```python - depth = 30 - height = 30 - width = 30 - input_channels = 3 - - inputs = tf.keras.Input(shape=(depth, height, width, input_channels)) - layer = tf.keras.layers.AveragePooling3D(pool_size=3) - outputs = layer(inputs) # Shape: (batch_size, 10, 10, 10, 3) - ``` - """ - - def __init__(self, - pool_size=(2, 2, 2), - strides=None, - padding='valid', - data_format=None, - **kwargs): - super().__init__( - tf.nn.avg_pool3d, - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, **kwargs) + """Average pooling operation for 3D data (spatial or spatio-temporal). + + Downsamples the input along its spatial dimensions (depth, height, and + width) by taking the average value over an input window + (of size defined by `pool_size`) for each channel of the input. + The window is shifted by `strides` along each dimension. + + Args: + pool_size: tuple of 3 integers, + factors by which to downscale (dim1, dim2, dim3). + `(2, 2, 2)` will halve the size of the 3D input in each dimension. + strides: tuple of 3 integers, or None. Strides values. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + - If `data_format='channels_last'`: + 5D tensor with shape: + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + - If `data_format='channels_first'`: + 5D tensor with shape: + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + + Output shape: + - If `data_format='channels_last'`: + 5D tensor with shape: + `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` + - If `data_format='channels_first'`: + 5D tensor with shape: + `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` + + Example: + + ```python + depth = 30 + height = 30 + width = 30 + input_channels = 3 + + inputs = tf.keras.Input(shape=(depth, height, width, input_channels)) + layer = tf.keras.layers.AveragePooling3D(pool_size=3) + outputs = layer(inputs) # Shape: (batch_size, 10, 10, 10, 3) + ``` + """ + + def __init__( + self, + pool_size=(2, 2, 2), + strides=None, + padding="valid", + data_format=None, + **kwargs + ): + super().__init__( + tf.nn.avg_pool3d, + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs + ) # Alias diff --git a/keras/layers/pooling/average_pooling_test.py b/keras/layers/pooling/average_pooling_test.py index 21a7fba93cd6..cd7f5ffed9ad 100644 --- a/keras/layers/pooling/average_pooling_test.py +++ b/keras/layers/pooling/average_pooling_test.py @@ -14,85 +14,79 @@ # ============================================================================== """Tests for average pooling layers.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class AveragePoolingTest(tf.test.TestCase, parameterized.TestCase): + def test_average_pooling_1d(self): + for padding in ["valid", "same"]: + for stride in [1, 2]: + test_utils.layer_test( + keras.layers.AveragePooling1D, + kwargs={"strides": stride, "padding": padding}, + input_shape=(3, 5, 4), + ) - def test_average_pooling_1d(self): - for padding in ['valid', 'same']: - for stride in [1, 2]: test_utils.layer_test( keras.layers.AveragePooling1D, - kwargs={ - 'strides': stride, - 'padding': padding - }, - input_shape=(3, 5, 4)) + kwargs={"data_format": "channels_first"}, + input_shape=(3, 2, 6), + ) - test_utils.layer_test( - keras.layers.AveragePooling1D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 2, 6)) + def test_average_pooling_2d(self): + test_utils.layer_test( + keras.layers.AveragePooling2D, + kwargs={"strides": (2, 2), "padding": "same", "pool_size": (2, 2)}, + input_shape=(3, 5, 6, 4), + ) + test_utils.layer_test( + keras.layers.AveragePooling2D, + kwargs={"strides": (2, 2), "padding": "valid", "pool_size": (3, 3)}, + input_shape=(3, 5, 6, 4), + ) - def test_average_pooling_2d(self): - test_utils.layer_test( - keras.layers.AveragePooling2D, - kwargs={ - 'strides': (2, 2), - 'padding': 'same', - 'pool_size': (2, 2) - }, - input_shape=(3, 5, 6, 4)) - test_utils.layer_test( - keras.layers.AveragePooling2D, - kwargs={ - 'strides': (2, 2), - 'padding': 'valid', - 'pool_size': (3, 3) - }, - input_shape=(3, 5, 6, 4)) + # This part of the test can only run on GPU but doesn't appear + # to be properly assigned to a GPU when running in eager mode. + if not tf.executing_eagerly(): + # Only runs on GPU with CUDA, channels_first is not supported on + # CPU. + # TODO(b/62340061): Support channels_first on CPU. + if tf.test.is_gpu_available(cuda_only=True): + test_utils.layer_test( + keras.layers.AveragePooling2D, + kwargs={ + "strides": (1, 1), + "padding": "valid", + "pool_size": (2, 2), + "data_format": "channels_first", + }, + input_shape=(3, 4, 5, 6), + ) - # This part of the test can only run on GPU but doesn't appear - # to be properly assigned to a GPU when running in eager mode. - if not tf.executing_eagerly(): - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - if tf.test.is_gpu_available(cuda_only=True): + def test_average_pooling_3d(self): + pool_size = (3, 3, 3) test_utils.layer_test( - keras.layers.AveragePooling2D, + keras.layers.AveragePooling3D, + kwargs={"strides": 2, "padding": "valid", "pool_size": pool_size}, + input_shape=(3, 11, 12, 10, 4), + ) + test_utils.layer_test( + keras.layers.AveragePooling3D, kwargs={ - 'strides': (1, 1), - 'padding': 'valid', - 'pool_size': (2, 2), - 'data_format': 'channels_first' + "strides": 3, + "padding": "valid", + "data_format": "channels_first", + "pool_size": pool_size, }, - input_shape=(3, 4, 5, 6)) + input_shape=(3, 4, 11, 12, 10), + ) - def test_average_pooling_3d(self): - pool_size = (3, 3, 3) - test_utils.layer_test( - keras.layers.AveragePooling3D, - kwargs={ - 'strides': 2, - 'padding': 'valid', - 'pool_size': pool_size - }, - input_shape=(3, 11, 12, 10, 4)) - test_utils.layer_test( - keras.layers.AveragePooling3D, - kwargs={ - 'strides': 3, - 'padding': 'valid', - 'data_format': 'channels_first', - 'pool_size': pool_size - }, - input_shape=(3, 4, 11, 12, 10)) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/pooling/base_global_pooling1d.py b/keras/layers/pooling/base_global_pooling1d.py index 073f3d8cb3ee..fbf2465109be 100644 --- a/keras/layers/pooling/base_global_pooling1d.py +++ b/keras/layers/pooling/base_global_pooling1d.py @@ -13,41 +13,56 @@ # limitations under the License. # ============================================================================== """Private base class for global pooling 1D layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class GlobalPooling1D(Layer): - """Abstract class for different global pooling 1D layers.""" - - def __init__(self, data_format='channels_last', keepdims=False, **kwargs): - super().__init__(**kwargs) - self.input_spec = InputSpec(ndim=3) - self.data_format = conv_utils.normalize_data_format(data_format) - self.keepdims = keepdims - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - if self.keepdims: - return tf.TensorShape([input_shape[0], input_shape[1], 1]) - else: - return tf.TensorShape([input_shape[0], input_shape[1]]) - else: - if self.keepdims: - return tf.TensorShape([input_shape[0], 1, input_shape[2]]) - else: - return tf.TensorShape([input_shape[0], input_shape[2]]) - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = {'data_format': self.data_format, 'keepdims': self.keepdims} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Abstract class for different global pooling 1D layers.""" + + def __init__(self, data_format="channels_last", keepdims=False, **kwargs): + super().__init__(**kwargs) + self.input_spec = InputSpec(ndim=3) + self.data_format = conv_utils.normalize_data_format(data_format) + self.keepdims = keepdims + + def _validate_reduction_axis(self, input_shape, axes): + for axis in axes: + if input_shape[axis] == 0: + raise ValueError( + f"Incorrect input shape {input_shape} " + f"with dimension 0 at reduction axis {axis}." + ) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_last": + self._validate_reduction_axis(input_shape, [1]) + else: + self._validate_reduction_axis(input_shape, [2]) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + if self.keepdims: + return tf.TensorShape([input_shape[0], input_shape[1], 1]) + else: + return tf.TensorShape([input_shape[0], input_shape[1]]) + else: + if self.keepdims: + return tf.TensorShape([input_shape[0], 1, input_shape[2]]) + else: + return tf.TensorShape([input_shape[0], input_shape[2]]) + + def call(self, inputs): + raise NotImplementedError + def get_config(self): + config = {"data_format": self.data_format, "keepdims": self.keepdims} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/pooling/base_global_pooling2d.py b/keras/layers/pooling/base_global_pooling2d.py index f1c22279cf6b..7fe7a28e890c 100644 --- a/keras/layers/pooling/base_global_pooling2d.py +++ b/keras/layers/pooling/base_global_pooling2d.py @@ -13,40 +13,56 @@ # limitations under the License. # ============================================================================== """Private base class for global pooling 2D layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class GlobalPooling2D(Layer): - """Abstract class for different global pooling 2D layers.""" - - def __init__(self, data_format=None, keepdims=False, **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=4) - self.keepdims = keepdims - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_last': - if self.keepdims: - return tf.TensorShape([input_shape[0], 1, 1, input_shape[3]]) - else: - return tf.TensorShape([input_shape[0], input_shape[3]]) - else: - if self.keepdims: - return tf.TensorShape([input_shape[0], input_shape[1], 1, 1]) - else: - return tf.TensorShape([input_shape[0], input_shape[1]]) - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = {'data_format': self.data_format, 'keepdims': self.keepdims} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Abstract class for different global pooling 2D layers.""" + + def __init__(self, data_format=None, keepdims=False, **kwargs): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=4) + self.keepdims = keepdims + + def _validate_reduction_axis(self, input_shape, axes): + for axis in axes: + if input_shape[axis] == 0: + raise ValueError( + f"Incorrect input shape {input_shape} " + f"with dimension 0 at reduction axis {axis}." + ) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_last": + self._validate_reduction_axis(input_shape, [1, 2]) + else: + self._validate_reduction_axis(input_shape, [2, 3]) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_last": + if self.keepdims: + return tf.TensorShape([input_shape[0], 1, 1, input_shape[3]]) + else: + return tf.TensorShape([input_shape[0], input_shape[3]]) + else: + if self.keepdims: + return tf.TensorShape([input_shape[0], input_shape[1], 1, 1]) + else: + return tf.TensorShape([input_shape[0], input_shape[1]]) + + def call(self, inputs): + raise NotImplementedError + + def get_config(self): + config = {"data_format": self.data_format, "keepdims": self.keepdims} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/pooling/base_global_pooling3d.py b/keras/layers/pooling/base_global_pooling3d.py index 40ccf92bf849..749475ac857b 100644 --- a/keras/layers/pooling/base_global_pooling3d.py +++ b/keras/layers/pooling/base_global_pooling3d.py @@ -13,42 +13,56 @@ # limitations under the License. # ============================================================================== """Private base class for global pooling 3D layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class GlobalPooling3D(Layer): - """Abstract class for different global pooling 3D layers.""" - - def __init__(self, data_format=None, keepdims=False, **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=5) - self.keepdims = keepdims - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_last': - if self.keepdims: - return tf.TensorShape( - [input_shape[0], 1, 1, 1, input_shape[4]]) - else: - return tf.TensorShape([input_shape[0], input_shape[4]]) - else: - if self.keepdims: - return tf.TensorShape( - [input_shape[0], input_shape[1], 1, 1, 1]) - else: - return tf.TensorShape([input_shape[0], input_shape[1]]) - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = {'data_format': self.data_format, 'keepdims': self.keepdims} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Abstract class for different global pooling 3D layers.""" + + def __init__(self, data_format=None, keepdims=False, **kwargs): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=5) + self.keepdims = keepdims + + def _validate_reduction_axis(self, input_shape, axes): + for axis in axes: + if input_shape[axis] == 0: + raise ValueError( + f"Incorrect input shape {input_shape} " + f"with dimension 0 at reduction axis {axis}." + ) + + def build(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_last": + self._validate_reduction_axis(input_shape, [1, 2, 3]) + else: + self._validate_reduction_axis(input_shape, [2, 3, 4]) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_last": + if self.keepdims: + return tf.TensorShape([input_shape[0], 1, 1, 1, input_shape[4]]) + else: + return tf.TensorShape([input_shape[0], input_shape[4]]) + else: + if self.keepdims: + return tf.TensorShape([input_shape[0], input_shape[1], 1, 1, 1]) + else: + return tf.TensorShape([input_shape[0], input_shape[1]]) + + def call(self, inputs): + raise NotImplementedError + + def get_config(self): + config = {"data_format": self.data_format, "keepdims": self.keepdims} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/pooling/base_pooling1d.py b/keras/layers/pooling/base_pooling1d.py index 2176b9d3ed17..397196d51e55 100644 --- a/keras/layers/pooling/base_pooling1d.py +++ b/keras/layers/pooling/base_pooling1d.py @@ -13,88 +13,97 @@ # limitations under the License. # ============================================================================== """Private base class for pooling 1D layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class Pooling1D(Layer): - """Pooling layer for arbitrary pooling functions, for 1D inputs. + """Pooling layer for arbitrary pooling functions, for 1D inputs. - This class only exists for code reuse. It will never be an exposed API. + This class only exists for code reuse. It will never be an exposed API. - Args: - pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. - pool_size: An integer or tuple/list of a single integer, - representing the size of the pooling window. - strides: An integer or tuple/list of a single integer, specifying the - strides of the pooling operation. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - name: A string, the name of the layer. - """ + Args: + pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. + pool_size: An integer or tuple/list of a single integer, + representing the size of the pooling window. + strides: An integer or tuple/list of a single integer, specifying the + strides of the pooling operation. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + name: A string, the name of the layer. + """ - def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - super().__init__(name=name, **kwargs) - if data_format is None: - data_format = backend.image_data_format() - if strides is None: - strides = pool_size - self.pool_function = pool_function - self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') - self.strides = conv_utils.normalize_tuple( - strides, 1, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=3) + def __init__( + self, + pool_function, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + super().__init__(name=name, **kwargs) + if data_format is None: + data_format = backend.image_data_format() + if strides is None: + strides = pool_size + self.pool_function = pool_function + self.pool_size = conv_utils.normalize_tuple(pool_size, 1, "pool_size") + self.strides = conv_utils.normalize_tuple( + strides, 1, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=3) - def call(self, inputs): - pad_axis = 2 if self.data_format == 'channels_last' else 3 - inputs = tf.expand_dims(inputs, pad_axis) - outputs = self.pool_function( - inputs, - self.pool_size + (1,), - strides=self.strides + (1,), - padding=self.padding, - data_format=self.data_format) - return tf.squeeze(outputs, pad_axis) + def call(self, inputs): + pad_axis = 2 if self.data_format == "channels_last" else 3 + inputs = tf.expand_dims(inputs, pad_axis) + outputs = self.pool_function( + inputs, + self.pool_size + (1,), + strides=self.strides + (1,), + padding=self.padding, + data_format=self.data_format, + ) + return tf.squeeze(outputs, pad_axis) - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - steps = input_shape[2] - features = input_shape[1] - else: - steps = input_shape[1] - features = input_shape[2] - length = conv_utils.conv_output_length(steps, - self.pool_size[0], - self.padding, - self.strides[0]) - if self.data_format == 'channels_first': - return tf.TensorShape([input_shape[0], features, length]) - else: - return tf.TensorShape([input_shape[0], length, features]) + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + steps = input_shape[2] + features = input_shape[1] + else: + steps = input_shape[1] + features = input_shape[2] + length = conv_utils.conv_output_length( + steps, self.pool_size[0], self.padding, self.strides[0] + ) + if self.data_format == "channels_first": + return tf.TensorShape([input_shape[0], features, length]) + else: + return tf.TensorShape([input_shape[0], length, features]) - def get_config(self): - config = { - 'strides': self.strides, - 'pool_size': self.pool_size, - 'padding': self.padding, - 'data_format': self.data_format, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = { + "strides": self.strides, + "pool_size": self.pool_size, + "padding": self.padding, + "data_format": self.data_format, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/pooling/base_pooling2d.py b/keras/layers/pooling/base_pooling2d.py index e783d4220d05..3aaa080700bd 100644 --- a/keras/layers/pooling/base_pooling2d.py +++ b/keras/layers/pooling/base_pooling2d.py @@ -13,96 +13,108 @@ # limitations under the License. # ============================================================================== """Private base class for pooling 2D layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class Pooling2D(Layer): - """Pooling layer for arbitrary pooling functions, for 2D inputs (e.g. images). + """Pooling layer for arbitrary pooling functions, for 2D data (e.g. images). - This class only exists for code reuse. It will never be an exposed API. + This class only exists for code reuse. It will never be an exposed API. - Args: - pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. - pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - name: A string, the name of the layer. - """ + Args: + pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. + pool_size: An integer or tuple/list of 2 integers: + (pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or + `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + name: A string, the name of the layer. + """ - def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format=None, - name=None, **kwargs): - super().__init__(name=name, **kwargs) - if data_format is None: - data_format = backend.image_data_format() - if strides is None: - strides = pool_size - self.pool_function = pool_function - self.pool_size = conv_utils.normalize_tuple(pool_size, 2, 'pool_size') - self.strides = conv_utils.normalize_tuple( - strides, 2, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=4) + def __init__( + self, + pool_function, + pool_size, + strides, + padding="valid", + data_format=None, + name=None, + **kwargs + ): + super().__init__(name=name, **kwargs) + if data_format is None: + data_format = backend.image_data_format() + if strides is None: + strides = pool_size + self.pool_function = pool_function + self.pool_size = conv_utils.normalize_tuple(pool_size, 2, "pool_size") + self.strides = conv_utils.normalize_tuple( + strides, 2, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=4) - def call(self, inputs): - if self.data_format == 'channels_last': - pool_shape = (1,) + self.pool_size + (1,) - strides = (1,) + self.strides + (1,) - else: - pool_shape = (1, 1) + self.pool_size - strides = (1, 1) + self.strides - outputs = self.pool_function( - inputs, - ksize=pool_shape, - strides=strides, - padding=self.padding.upper(), - data_format=conv_utils.convert_data_format(self.data_format, 4)) - return outputs + def call(self, inputs): + if self.data_format == "channels_last": + pool_shape = (1,) + self.pool_size + (1,) + strides = (1,) + self.strides + (1,) + else: + pool_shape = (1, 1) + self.pool_size + strides = (1, 1) + self.strides + outputs = self.pool_function( + inputs, + ksize=pool_shape, + strides=strides, + padding=self.padding.upper(), + data_format=conv_utils.convert_data_format(self.data_format, 4), + ) + return outputs - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - else: - rows = input_shape[1] - cols = input_shape[2] - rows = conv_utils.conv_output_length(rows, self.pool_size[0], self.padding, - self.strides[0]) - cols = conv_utils.conv_output_length(cols, self.pool_size[1], self.padding, - self.strides[1]) - if self.data_format == 'channels_first': - return tf.TensorShape( - [input_shape[0], input_shape[1], rows, cols]) - else: - return tf.TensorShape( - [input_shape[0], rows, cols, input_shape[3]]) + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + rows = input_shape[2] + cols = input_shape[3] + else: + rows = input_shape[1] + cols = input_shape[2] + rows = conv_utils.conv_output_length( + rows, self.pool_size[0], self.padding, self.strides[0] + ) + cols = conv_utils.conv_output_length( + cols, self.pool_size[1], self.padding, self.strides[1] + ) + if self.data_format == "channels_first": + return tf.TensorShape([input_shape[0], input_shape[1], rows, cols]) + else: + return tf.TensorShape([input_shape[0], rows, cols, input_shape[3]]) - def get_config(self): - config = { - 'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = { + "pool_size": self.pool_size, + "padding": self.padding, + "strides": self.strides, + "data_format": self.data_format, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/pooling/base_pooling3d.py b/keras/layers/pooling/base_pooling3d.py index ad75cc32f002..bc4d5b7bde1c 100644 --- a/keras/layers/pooling/base_pooling3d.py +++ b/keras/layers/pooling/base_pooling3d.py @@ -13,107 +13,123 @@ # limitations under the License. # ============================================================================== """Private base class for pooling 3D layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class Pooling3D(Layer): - """Pooling layer for arbitrary pooling functions, for 3D inputs. + """Pooling layer for arbitrary pooling functions, for 3D inputs. - This class only exists for code reuse. It will never be an exposed API. + This class only exists for code reuse. It will never be an exposed API. - Args: - pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. - pool_size: An integer or tuple/list of 3 integers: - (pool_depth, pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` - while `channels_first` corresponds to - inputs with shape `(batch, channels, depth, height, width)`. - name: A string, the name of the layer. - """ + Args: + pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. + pool_size: An integer or tuple/list of 3 integers: + (pool_depth, pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or + `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` + while `channels_first` corresponds to + inputs with shape `(batch, channels, depth, height, width)`. + name: A string, the name of the layer. + """ - def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - super().__init__(name=name, **kwargs) - if data_format is None: - data_format = backend.image_data_format() - if strides is None: - strides = pool_size - self.pool_function = pool_function - self.pool_size = conv_utils.normalize_tuple(pool_size, 3, 'pool_size') - self.strides = conv_utils.normalize_tuple( - strides, 3, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=5) + def __init__( + self, + pool_function, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + super().__init__(name=name, **kwargs) + if data_format is None: + data_format = backend.image_data_format() + if strides is None: + strides = pool_size + self.pool_function = pool_function + self.pool_size = conv_utils.normalize_tuple(pool_size, 3, "pool_size") + self.strides = conv_utils.normalize_tuple( + strides, 3, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=5) - def call(self, inputs): - pool_shape = (1,) + self.pool_size + (1,) - strides = (1,) + self.strides + (1,) + def call(self, inputs): + pool_shape = (1,) + self.pool_size + (1,) + strides = (1,) + self.strides + (1,) - if self.data_format == 'channels_first': - # TF does not support `channels_first` with 3D pooling operations, - # so we must handle this case manually. - # TODO(fchollet): remove this when TF pooling is feature-complete. - inputs = tf.transpose(inputs, (0, 2, 3, 4, 1)) + if self.data_format == "channels_first": + # TF does not support `channels_first` with 3D pooling operations, + # so we must handle this case manually. + # TODO(fchollet): remove this when TF pooling is feature-complete. + inputs = tf.transpose(inputs, (0, 2, 3, 4, 1)) - outputs = self.pool_function( - inputs, - ksize=pool_shape, - strides=strides, - padding=self.padding.upper()) + outputs = self.pool_function( + inputs, + ksize=pool_shape, + strides=strides, + padding=self.padding.upper(), + ) - if self.data_format == 'channels_first': - outputs = tf.transpose(outputs, (0, 4, 1, 2, 3)) - return outputs + if self.data_format == "channels_first": + outputs = tf.transpose(outputs, (0, 4, 1, 2, 3)) + return outputs - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - len_dim1 = input_shape[2] - len_dim2 = input_shape[3] - len_dim3 = input_shape[4] - else: - len_dim1 = input_shape[1] - len_dim2 = input_shape[2] - len_dim3 = input_shape[3] - len_dim1 = conv_utils.conv_output_length(len_dim1, self.pool_size[0], - self.padding, self.strides[0]) - len_dim2 = conv_utils.conv_output_length(len_dim2, self.pool_size[1], - self.padding, self.strides[1]) - len_dim3 = conv_utils.conv_output_length(len_dim3, self.pool_size[2], - self.padding, self.strides[2]) - if self.data_format == 'channels_first': - return tf.TensorShape( - [input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3]) - else: - return tf.TensorShape( - [input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]]) + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + len_dim1 = input_shape[2] + len_dim2 = input_shape[3] + len_dim3 = input_shape[4] + else: + len_dim1 = input_shape[1] + len_dim2 = input_shape[2] + len_dim3 = input_shape[3] + len_dim1 = conv_utils.conv_output_length( + len_dim1, self.pool_size[0], self.padding, self.strides[0] + ) + len_dim2 = conv_utils.conv_output_length( + len_dim2, self.pool_size[1], self.padding, self.strides[1] + ) + len_dim3 = conv_utils.conv_output_length( + len_dim3, self.pool_size[2], self.padding, self.strides[2] + ) + if self.data_format == "channels_first": + return tf.TensorShape( + [input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3] + ) + else: + return tf.TensorShape( + [input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]] + ) - def get_config(self): - config = { - 'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = { + "pool_size": self.pool_size, + "padding": self.padding, + "strides": self.strides, + "data_format": self.data_format, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/pooling/global_average_pooling1d.py b/keras/layers/pooling/global_average_pooling1d.py index 4ec277e591df..0a81e9f98b1d 100644 --- a/keras/layers/pooling/global_average_pooling1d.py +++ b/keras/layers/pooling/global_average_pooling1d.py @@ -13,87 +13,89 @@ # limitations under the License. # ============================================================================== """Global average pooling 1D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.layers.pooling.base_global_pooling1d import GlobalPooling1D -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GlobalAveragePooling1D', - 'keras.layers.GlobalAvgPool1D') +@keras_export( + "keras.layers.GlobalAveragePooling1D", "keras.layers.GlobalAvgPool1D" +) class GlobalAveragePooling1D(GlobalPooling1D): - """Global average pooling operation for temporal data. - - Examples: - - >>> input_shape = (2, 3, 4) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.GlobalAveragePooling1D()(x) - >>> print(y.shape) - (2, 4) - - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - keepdims: A boolean, whether to keep the temporal dimension or not. - If `keepdims` is `False` (default), the rank of the tensor is reduced - for spatial dimensions. - If `keepdims` is `True`, the temporal dimension are retained with - length 1. - The behavior is the same as for `tf.reduce_mean` or `np.mean`. - - Call arguments: - inputs: A 3D tensor. - mask: Binary tensor of shape `(batch_size, steps)` indicating whether - a given step should be masked (excluded from the average). - - Input shape: - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` - - Output shape: - - If `keepdims`=False: - 2D tensor with shape `(batch_size, features)`. - - If `keepdims`=True: + """Global average pooling operation for temporal data. + + Examples: + + >>> input_shape = (2, 3, 4) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.GlobalAveragePooling1D()(x) + >>> print(y.shape) + (2, 4) + + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + keepdims: A boolean, whether to keep the temporal dimension or not. + If `keepdims` is `False` (default), the rank of the tensor is reduced + for spatial dimensions. + If `keepdims` is `True`, the temporal dimension are retained with + length 1. + The behavior is the same as for `tf.reduce_mean` or `np.mean`. + + Call arguments: + inputs: A 3D tensor. + mask: Binary tensor of shape `(batch_size, steps)` indicating whether + a given step should be masked (excluded from the average). + + Input shape: - If `data_format='channels_last'`: - 3D tensor with shape `(batch_size, 1, features)` + 3D tensor with shape: + `(batch_size, steps, features)` - If `data_format='channels_first'`: - 3D tensor with shape `(batch_size, features, 1)` - """ - - def __init__(self, data_format='channels_last', **kwargs): - super().__init__(data_format=data_format, - **kwargs) - self.supports_masking = True - - def call(self, inputs, mask=None): - steps_axis = 1 if self.data_format == 'channels_last' else 2 - if mask is not None: - mask = tf.cast(mask, inputs[0].dtype) - mask = tf.expand_dims( - mask, 2 if self.data_format == 'channels_last' else 1) - inputs *= mask - return backend.sum( - inputs, axis=steps_axis, - keepdims=self.keepdims) / tf.reduce_sum( - mask, axis=steps_axis, keepdims=self.keepdims) - else: - return backend.mean(inputs, axis=steps_axis, keepdims=self.keepdims) - - def compute_mask(self, inputs, mask=None): - return None + 3D tensor with shape: + `(batch_size, features, steps)` + + Output shape: + - If `keepdims`=False: + 2D tensor with shape `(batch_size, features)`. + - If `keepdims`=True: + - If `data_format='channels_last'`: + 3D tensor with shape `(batch_size, 1, features)` + - If `data_format='channels_first'`: + 3D tensor with shape `(batch_size, features, 1)` + """ + + def __init__(self, data_format="channels_last", **kwargs): + super().__init__(data_format=data_format, **kwargs) + self.supports_masking = True + + def call(self, inputs, mask=None): + steps_axis = 1 if self.data_format == "channels_last" else 2 + if mask is not None: + mask = tf.cast(mask, inputs[0].dtype) + mask = tf.expand_dims( + mask, 2 if self.data_format == "channels_last" else 1 + ) + inputs *= mask + return backend.sum( + inputs, axis=steps_axis, keepdims=self.keepdims + ) / tf.reduce_sum(mask, axis=steps_axis, keepdims=self.keepdims) + else: + return backend.mean(inputs, axis=steps_axis, keepdims=self.keepdims) + + def compute_mask(self, inputs, mask=None): + return None # Alias diff --git a/keras/layers/pooling/global_average_pooling2d.py b/keras/layers/pooling/global_average_pooling2d.py index 54dab87a6680..e219e2414081 100644 --- a/keras/layers/pooling/global_average_pooling2d.py +++ b/keras/layers/pooling/global_average_pooling2d.py @@ -13,66 +13,68 @@ # limitations under the License. # ============================================================================== """Global average pooling 2D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.layers.pooling.base_global_pooling2d import GlobalPooling2D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GlobalAveragePooling2D', - 'keras.layers.GlobalAvgPool2D') +@keras_export( + "keras.layers.GlobalAveragePooling2D", "keras.layers.GlobalAvgPool2D" +) class GlobalAveragePooling2D(GlobalPooling2D): - """Global average pooling operation for spatial data. - - Examples: + """Global average pooling operation for spatial data. - >>> input_shape = (2, 4, 5, 3) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.GlobalAveragePooling2D()(x) - >>> print(y.shape) - (2, 3) + Examples: - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - keepdims: A boolean, whether to keep the spatial dimensions or not. - If `keepdims` is `False` (default), the rank of the tensor is reduced - for spatial dimensions. - If `keepdims` is `True`, the spatial dimensions are retained with - length 1. - The behavior is the same as for `tf.reduce_mean` or `np.mean`. + >>> input_shape = (2, 4, 5, 3) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.GlobalAveragePooling2D()(x) + >>> print(y.shape) + (2, 3) - Input shape: - - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, rows, cols, channels)`. - - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, rows, cols)`. + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, height, width)`. + When unspecified, uses `image_data_format` value found + in your Keras config file at `~/.keras/keras.json` + (if exists) else 'channels_last'. Defaults to 'channels_last'. + keepdims: A boolean, whether to keep the spatial dimensions or not. + If `keepdims` is `False` (default), the rank of the tensor is reduced + for spatial dimensions. + If `keepdims` is `True`, the spatial dimensions are retained with + length 1. + The behavior is the same as for `tf.reduce_mean` or `np.mean`. - Output shape: - - If `keepdims`=False: - 2D tensor with shape `(batch_size, channels)`. - - If `keepdims`=True: + Input shape: - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, 1, 1, channels)` + 4D tensor with shape `(batch_size, rows, cols, channels)`. - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, 1, 1)` - """ + 4D tensor with shape `(batch_size, channels, rows, cols)`. + + Output shape: + - If `keepdims`=False: + 2D tensor with shape `(batch_size, channels)`. + - If `keepdims`=True: + - If `data_format='channels_last'`: + 4D tensor with shape `(batch_size, 1, 1, channels)` + - If `data_format='channels_first'`: + 4D tensor with shape `(batch_size, channels, 1, 1)` + """ - def call(self, inputs): - if self.data_format == 'channels_last': - return backend.mean(inputs, axis=[1, 2], keepdims=self.keepdims) - else: - return backend.mean(inputs, axis=[2, 3], keepdims=self.keepdims) + def call(self, inputs): + if self.data_format == "channels_last": + return backend.mean(inputs, axis=[1, 2], keepdims=self.keepdims) + else: + return backend.mean(inputs, axis=[2, 3], keepdims=self.keepdims) # Alias diff --git a/keras/layers/pooling/global_average_pooling3d.py b/keras/layers/pooling/global_average_pooling3d.py index 2130e5294eb2..04b95667ed8e 100644 --- a/keras/layers/pooling/global_average_pooling3d.py +++ b/keras/layers/pooling/global_average_pooling3d.py @@ -13,60 +13,63 @@ # limitations under the License. # ============================================================================== """Global average pooling 3D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.layers.pooling.base_global_pooling3d import GlobalPooling3D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GlobalAveragePooling3D', - 'keras.layers.GlobalAvgPool3D') +@keras_export( + "keras.layers.GlobalAveragePooling3D", "keras.layers.GlobalAvgPool3D" +) class GlobalAveragePooling3D(GlobalPooling3D): - """Global Average pooling operation for 3D data. - - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - keepdims: A boolean, whether to keep the spatial dimensions or not. - If `keepdims` is `False` (default), the rank of the tensor is reduced - for spatial dimensions. - If `keepdims` is `True`, the spatial dimensions are retained with - length 1. - The behavior is the same as for `tf.reduce_mean` or `np.mean`. + """Global Average pooling operation for 3D data. - Input shape: - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + keepdims: A boolean, whether to keep the spatial dimensions or not. + If `keepdims` is `False` (default), the rank of the tensor is reduced + for spatial dimensions. + If `keepdims` is `True`, the spatial dimensions are retained with + length 1. + The behavior is the same as for `tf.reduce_mean` or `np.mean`. - Output shape: - - If `keepdims`=False: - 2D tensor with shape `(batch_size, channels)`. - - If `keepdims`=True: + Input shape: - If `data_format='channels_last'`: - 5D tensor with shape `(batch_size, 1, 1, 1, channels)` + 5D tensor with shape: + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - If `data_format='channels_first'`: - 5D tensor with shape `(batch_size, channels, 1, 1, 1)` - """ + 5D tensor with shape: + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + + Output shape: + - If `keepdims`=False: + 2D tensor with shape `(batch_size, channels)`. + - If `keepdims`=True: + - If `data_format='channels_last'`: + 5D tensor with shape `(batch_size, 1, 1, 1, channels)` + - If `data_format='channels_first'`: + 5D tensor with shape `(batch_size, channels, 1, 1, 1)` + """ - def call(self, inputs): - if self.data_format == 'channels_last': - return backend.mean(inputs, axis=[1, 2, 3], keepdims=self.keepdims) - else: - return backend.mean(inputs, axis=[2, 3, 4], keepdims=self.keepdims) + def call(self, inputs): + if self.data_format == "channels_last": + return backend.mean(inputs, axis=[1, 2, 3], keepdims=self.keepdims) + else: + return backend.mean(inputs, axis=[2, 3, 4], keepdims=self.keepdims) # Alias diff --git a/keras/layers/pooling/global_average_pooling_test.py b/keras/layers/pooling/global_average_pooling_test.py index f38a5a46dcc5..ed33f7c44767 100644 --- a/keras/layers/pooling/global_average_pooling_test.py +++ b/keras/layers/pooling/global_average_pooling_test.py @@ -14,131 +14,157 @@ # ============================================================================== """Tests for global average pooling layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.mixed_precision import policy from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class GlobalAveragePoolingTest(tf.test.TestCase, parameterized.TestCase): - - @test_utils.enable_v2_dtype_behavior - def test_mixed_float16_policy(self): - with policy.policy_scope('mixed_float16'): - inputs1 = keras.Input(shape=(36, 512), dtype='float16') - inputs2 = keras.Input(shape=(36,), dtype='bool') - average_layer = keras.layers.GlobalAveragePooling1D() - _ = average_layer(inputs1, inputs2) - - def test_global_average_pooling_1d(self): - test_utils.layer_test( - keras.layers.GlobalAveragePooling1D, input_shape=(3, 4, 5)) - test_utils.layer_test( - keras.layers.GlobalAveragePooling1D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 4, 5)) - - def test_global_average_pooling_1d_masking_support(self): - model = keras.Sequential() - model.add(keras.layers.Masking(mask_value=0., input_shape=(None, 4))) - model.add(keras.layers.GlobalAveragePooling1D()) - model.compile(loss='mae', optimizer='rmsprop') - - model_input = np.random.random((2, 3, 4)) - model_input[0, 1:, :] = 0 - output = model.predict(model_input) - self.assertAllClose(output[0], model_input[0, 0, :]) - - def test_global_average_pooling_1d_with_ragged(self): - ragged_data = tf.ragged.constant( - [[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], [[1.0, 1.0], [2.0, 2.0]]], - ragged_rank=1) - dense_data = ragged_data.to_tensor() - - inputs = keras.Input(shape=(None, 2), dtype='float32', ragged=True) - out = keras.layers.GlobalAveragePooling1D()(inputs) - model = keras.models.Model(inputs=inputs, outputs=out) - output_ragged = model.predict(ragged_data, steps=1) - - inputs = keras.Input(shape=(None, 2), dtype='float32') - masking = keras.layers.Masking(mask_value=0., input_shape=(3, 2))(inputs) - out = keras.layers.GlobalAveragePooling1D()(masking) - model = keras.models.Model(inputs=inputs, outputs=out) - output_dense = model.predict(dense_data, steps=1) - - self.assertAllEqual(output_ragged, output_dense) - - def test_global_average_pooling_2d(self): - test_utils.layer_test( - keras.layers.GlobalAveragePooling2D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 4, 5, 6)) - test_utils.layer_test( - keras.layers.GlobalAveragePooling2D, - kwargs={'data_format': 'channels_last'}, - input_shape=(3, 5, 6, 4)) - - def test_global_average_pooling_3d(self): - test_utils.layer_test( - keras.layers.GlobalAveragePooling3D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 4, 3, 4, 3)) - test_utils.layer_test( - keras.layers.GlobalAveragePooling3D, - kwargs={'data_format': 'channels_last'}, - input_shape=(3, 4, 3, 4, 3)) - - def test_global_average_pooling_1d_keepdims(self): - test_utils.layer_test( - keras.layers.GlobalAveragePooling1D, - kwargs={'keepdims': True}, - input_shape=(3, 4, 5), - expected_output_shape=(None, 1, 5)) - test_utils.layer_test( - keras.layers.GlobalAveragePooling1D, - kwargs={'data_format': 'channels_first', 'keepdims': True}, - input_shape=(3, 4, 5), - expected_output_shape=(None, 4, 1)) - - def test_global_average_pooling_2d_keepdims(self): - test_utils.layer_test( - keras.layers.GlobalAveragePooling2D, - kwargs={'data_format': 'channels_first', 'keepdims': True}, - input_shape=(3, 4, 5, 6), - expected_output_shape=(None, 4, 1, 1)) - test_utils.layer_test( - keras.layers.GlobalAveragePooling2D, - kwargs={'data_format': 'channels_last', 'keepdims': True}, - input_shape=(3, 4, 5, 6), - expected_output_shape=(None, 1, 1, 6)) - - def test_global_average_pooling_3d_keepdims(self): - test_utils.layer_test( - keras.layers.GlobalAveragePooling3D, - kwargs={'data_format': 'channels_first', 'keepdims': True}, - input_shape=(3, 4, 3, 4, 3), - expected_output_shape=(None, 4, 1, 1, 1)) - test_utils.layer_test( - keras.layers.GlobalAveragePooling3D, - kwargs={'data_format': 'channels_last', 'keepdims': True}, - input_shape=(3, 4, 3, 4, 3), - expected_output_shape=(None, 1, 1, 1, 3)) - - def test_global_average_pooling_1d_keepdims_masking_support(self): - model = keras.Sequential() - model.add(keras.layers.Masking(mask_value=0., input_shape=(None, 4))) - model.add(keras.layers.GlobalAveragePooling1D(keepdims=True)) - model.compile(loss='mae', optimizer='rmsprop') - - model_input = np.random.random((2, 3, 4)) - model_input[0, 1:, :] = 0 - output = model.predict(model_input) - self.assertAllEqual((2, 1, 4), output.shape) - self.assertAllClose(output[0, 0], model_input[0, 0, :]) - -if __name__ == '__main__': - tf.test.main() + @test_utils.enable_v2_dtype_behavior + def test_mixed_float16_policy(self): + with policy.policy_scope("mixed_float16"): + inputs1 = keras.Input(shape=(36, 512), dtype="float16") + inputs2 = keras.Input(shape=(36,), dtype="bool") + average_layer = keras.layers.GlobalAveragePooling1D() + _ = average_layer(inputs1, inputs2) + + def test_global_average_pooling_1d(self): + test_utils.layer_test( + keras.layers.GlobalAveragePooling1D, input_shape=(3, 4, 5) + ) + test_utils.layer_test( + keras.layers.GlobalAveragePooling1D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 4, 5), + ) + + def test_global_average_pooling_1d_masking_support(self): + model = keras.Sequential() + model.add(keras.layers.Masking(mask_value=0.0, input_shape=(None, 4))) + model.add(keras.layers.GlobalAveragePooling1D()) + model.compile(loss="mae", optimizer="rmsprop") + + model_input = np.random.random((2, 3, 4)) + model_input[0, 1:, :] = 0 + output = model.predict(model_input) + self.assertAllClose(output[0], model_input[0, 0, :]) + + def test_global_average_pooling_1d_with_ragged(self): + ragged_data = tf.ragged.constant( + [[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], [[1.0, 1.0], [2.0, 2.0]]], + ragged_rank=1, + ) + dense_data = ragged_data.to_tensor() + + inputs = keras.Input(shape=(None, 2), dtype="float32", ragged=True) + out = keras.layers.GlobalAveragePooling1D()(inputs) + model = keras.models.Model(inputs=inputs, outputs=out) + output_ragged = model.predict(ragged_data, steps=1) + + inputs = keras.Input(shape=(None, 2), dtype="float32") + masking = keras.layers.Masking(mask_value=0.0, input_shape=(3, 2))( + inputs + ) + out = keras.layers.GlobalAveragePooling1D()(masking) + model = keras.models.Model(inputs=inputs, outputs=out) + output_dense = model.predict(dense_data, steps=1) + + self.assertAllEqual(output_ragged, output_dense) + + def test_global_average_pooling_2d(self): + test_utils.layer_test( + keras.layers.GlobalAveragePooling2D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 4, 5, 6), + ) + test_utils.layer_test( + keras.layers.GlobalAveragePooling2D, + kwargs={"data_format": "channels_last"}, + input_shape=(3, 5, 6, 4), + ) + + def test_global_average_pooling_3d(self): + test_utils.layer_test( + keras.layers.GlobalAveragePooling3D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 4, 3, 4, 3), + ) + test_utils.layer_test( + keras.layers.GlobalAveragePooling3D, + kwargs={"data_format": "channels_last"}, + input_shape=(3, 4, 3, 4, 3), + ) + + def test_global_average_pooling_1d_keepdims(self): + test_utils.layer_test( + keras.layers.GlobalAveragePooling1D, + kwargs={"keepdims": True}, + input_shape=(3, 4, 5), + expected_output_shape=(None, 1, 5), + ) + test_utils.layer_test( + keras.layers.GlobalAveragePooling1D, + kwargs={"data_format": "channels_first", "keepdims": True}, + input_shape=(3, 4, 5), + expected_output_shape=(None, 4, 1), + ) + + def test_global_average_pooling_2d_keepdims(self): + test_utils.layer_test( + keras.layers.GlobalAveragePooling2D, + kwargs={"data_format": "channels_first", "keepdims": True}, + input_shape=(3, 4, 5, 6), + expected_output_shape=(None, 4, 1, 1), + ) + test_utils.layer_test( + keras.layers.GlobalAveragePooling2D, + kwargs={"data_format": "channels_last", "keepdims": True}, + input_shape=(3, 4, 5, 6), + expected_output_shape=(None, 1, 1, 6), + ) + + def test_global_average_pooling_3d_keepdims(self): + test_utils.layer_test( + keras.layers.GlobalAveragePooling3D, + kwargs={"data_format": "channels_first", "keepdims": True}, + input_shape=(3, 4, 3, 4, 3), + expected_output_shape=(None, 4, 1, 1, 1), + ) + test_utils.layer_test( + keras.layers.GlobalAveragePooling3D, + kwargs={"data_format": "channels_last", "keepdims": True}, + input_shape=(3, 4, 3, 4, 3), + expected_output_shape=(None, 1, 1, 1, 3), + ) + + def test_global_average_pooling_1d_keepdims_masking_support(self): + model = keras.Sequential() + model.add(keras.layers.Masking(mask_value=0.0, input_shape=(None, 4))) + model.add(keras.layers.GlobalAveragePooling1D(keepdims=True)) + model.compile(loss="mae", optimizer="rmsprop") + + model_input = np.random.random((2, 3, 4)) + model_input[0, 1:, :] = 0 + output = model.predict(model_input) + self.assertAllEqual((2, 1, 4), output.shape) + self.assertAllClose(output[0, 0], model_input[0, 0, :]) + + def test_global_average_pooling_1d_invalid_input_dimension(self): + with self.assertRaisesRegex(ValueError, r"""Incorrect input shape"""): + layer = keras.layers.GlobalAveragePooling1D() + layer.build((None, 0, 2)) + + def test_global_average_pooling_3d_invalid_input_dimension(self): + with self.assertRaisesRegex(ValueError, r"""Incorrect input shape"""): + layer = keras.layers.GlobalAveragePooling3D(keepdims=True) + layer.build((None, 0, 16, 16, 3)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/pooling/global_max_pooling1d.py b/keras/layers/pooling/global_max_pooling1d.py index 4bcaa6869e4f..db84f22eb53a 100644 --- a/keras/layers/pooling/global_max_pooling1d.py +++ b/keras/layers/pooling/global_max_pooling1d.py @@ -13,73 +13,74 @@ # limitations under the License. # ============================================================================== """Global max pooling 1D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.layers.pooling.base_global_pooling1d import GlobalPooling1D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D') +@keras_export("keras.layers.GlobalMaxPooling1D", "keras.layers.GlobalMaxPool1D") class GlobalMaxPooling1D(GlobalPooling1D): - """Global max pooling operation for 1D temporal data. - - Downsamples the input representation by taking the maximum value over - the time dimension. + """Global max pooling operation for 1D temporal data. - For example: + Downsamples the input representation by taking the maximum value over + the time dimension. - >>> x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) - >>> x = tf.reshape(x, [3, 3, 1]) - >>> x - - >>> max_pool_1d = tf.keras.layers.GlobalMaxPooling1D() - >>> max_pool_1d(x) - + For example: - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - keepdims: A boolean, whether to keep the temporal dimension or not. - If `keepdims` is `False` (default), the rank of the tensor is reduced - for spatial dimensions. - If `keepdims` is `True`, the temporal dimension are retained with - length 1. - The behavior is the same as for `tf.reduce_max` or `np.max`. + >>> x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) + >>> x = tf.reshape(x, [3, 3, 1]) + >>> x + + >>> max_pool_1d = tf.keras.layers.GlobalMaxPooling1D() + >>> max_pool_1d(x) + - Input shape: - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + keepdims: A boolean, whether to keep the temporal dimension or not. + If `keepdims` is `False` (default), the rank of the tensor is reduced + for spatial dimensions. + If `keepdims` is `True`, the temporal dimension are retained with + length 1. + The behavior is the same as for `tf.reduce_max` or `np.max`. - Output shape: - - If `keepdims`=False: - 2D tensor with shape `(batch_size, features)`. - - If `keepdims`=True: + Input shape: - If `data_format='channels_last'`: - 3D tensor with shape `(batch_size, 1, features)` + 3D tensor with shape: + `(batch_size, steps, features)` - If `data_format='channels_first'`: - 3D tensor with shape `(batch_size, features, 1)` - """ + 3D tensor with shape: + `(batch_size, features, steps)` + + Output shape: + - If `keepdims`=False: + 2D tensor with shape `(batch_size, features)`. + - If `keepdims`=True: + - If `data_format='channels_last'`: + 3D tensor with shape `(batch_size, 1, features)` + - If `data_format='channels_first'`: + 3D tensor with shape `(batch_size, features, 1)` + """ - def call(self, inputs): - steps_axis = 1 if self.data_format == 'channels_last' else 2 - return backend.max(inputs, axis=steps_axis, keepdims=self.keepdims) + def call(self, inputs): + steps_axis = 1 if self.data_format == "channels_last" else 2 + return backend.max(inputs, axis=steps_axis, keepdims=self.keepdims) # Alias diff --git a/keras/layers/pooling/global_max_pooling2d.py b/keras/layers/pooling/global_max_pooling2d.py index dee0a258a060..77ef11b3abdd 100644 --- a/keras/layers/pooling/global_max_pooling2d.py +++ b/keras/layers/pooling/global_max_pooling2d.py @@ -13,65 +13,67 @@ # limitations under the License. # ============================================================================== """Global max pooling 2D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.layers.pooling.base_global_pooling2d import GlobalPooling2D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GlobalMaxPool2D', 'keras.layers.GlobalMaxPooling2D') +@keras_export("keras.layers.GlobalMaxPooling2D", "keras.layers.GlobalMaxPool2D") class GlobalMaxPooling2D(GlobalPooling2D): - """Global max pooling operation for spatial data. - - Examples: + """Global max pooling operation for spatial data. - >>> input_shape = (2, 4, 5, 3) - >>> x = tf.random.normal(input_shape) - >>> y = tf.keras.layers.GlobalMaxPool2D()(x) - >>> print(y.shape) - (2, 3) + Examples: - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - keepdims: A boolean, whether to keep the spatial dimensions or not. - If `keepdims` is `False` (default), the rank of the tensor is reduced - for spatial dimensions. - If `keepdims` is `True`, the spatial dimensions are retained with - length 1. - The behavior is the same as for `tf.reduce_max` or `np.max`. + >>> input_shape = (2, 4, 5, 3) + >>> x = tf.random.normal(input_shape) + >>> y = tf.keras.layers.GlobalMaxPooling2D()(x) + >>> print(y.shape) + (2, 3) - Input shape: - - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, rows, cols, channels)`. - - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, rows, cols)`. + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, height, width)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + keepdims: A boolean, whether to keep the spatial dimensions or not. + If `keepdims` is `False` (default), the rank of the tensor is reduced + for spatial dimensions. + If `keepdims` is `True`, the spatial dimensions are retained with + length 1. + The behavior is the same as for `tf.reduce_max` or `np.max`. - Output shape: - - If `keepdims`=False: - 2D tensor with shape `(batch_size, channels)`. - - If `keepdims`=True: + Input shape: - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, 1, 1, channels)` + 4D tensor with shape `(batch_size, rows, cols, channels)`. - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, 1, 1)` - """ + 4D tensor with shape `(batch_size, channels, rows, cols)`. + + Output shape: + - If `keepdims`=False: + 2D tensor with shape `(batch_size, channels)`. + - If `keepdims`=True: + - If `data_format='channels_last'`: + 4D tensor with shape `(batch_size, 1, 1, channels)` + - If `data_format='channels_first'`: + 4D tensor with shape `(batch_size, channels, 1, 1)` + """ - def call(self, inputs): - if self.data_format == 'channels_last': - return backend.max(inputs, axis=[1, 2], keepdims=self.keepdims) - else: - return backend.max(inputs, axis=[2, 3], keepdims=self.keepdims) + def call(self, inputs): + if self.data_format == "channels_last": + return backend.max(inputs, axis=[1, 2], keepdims=self.keepdims) + else: + return backend.max(inputs, axis=[2, 3], keepdims=self.keepdims) # Alias diff --git a/keras/layers/pooling/global_max_pooling3d.py b/keras/layers/pooling/global_max_pooling3d.py index 7df93d13df93..f5385fc9b414 100644 --- a/keras/layers/pooling/global_max_pooling3d.py +++ b/keras/layers/pooling/global_max_pooling3d.py @@ -13,59 +13,61 @@ # limitations under the License. # ============================================================================== """Global max pooling 3D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import backend from keras.layers.pooling.base_global_pooling3d import GlobalPooling3D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GlobalMaxPool3D', 'keras.layers.GlobalMaxPooling3D') +@keras_export("keras.layers.GlobalMaxPooling3D", "keras.layers.GlobalMaxPool3D") class GlobalMaxPooling3D(GlobalPooling3D): - """Global Max pooling operation for 3D data. - - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - keepdims: A boolean, whether to keep the spatial dimensions or not. - If `keepdims` is `False` (default), the rank of the tensor is reduced - for spatial dimensions. - If `keepdims` is `True`, the spatial dimensions are retained with - length 1. - The behavior is the same as for `tf.reduce_max` or `np.max`. + """Global Max pooling operation for 3D data. - Input shape: - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + keepdims: A boolean, whether to keep the spatial dimensions or not. + If `keepdims` is `False` (default), the rank of the tensor is reduced + for spatial dimensions. + If `keepdims` is `True`, the spatial dimensions are retained with + length 1. + The behavior is the same as for `tf.reduce_max` or `np.max`. - Output shape: - - If `keepdims`=False: - 2D tensor with shape `(batch_size, channels)`. - - If `keepdims`=True: + Input shape: - If `data_format='channels_last'`: - 5D tensor with shape `(batch_size, 1, 1, 1, channels)` + 5D tensor with shape: + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - If `data_format='channels_first'`: - 5D tensor with shape `(batch_size, channels, 1, 1, 1)` - """ + 5D tensor with shape: + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + + Output shape: + - If `keepdims`=False: + 2D tensor with shape `(batch_size, channels)`. + - If `keepdims`=True: + - If `data_format='channels_last'`: + 5D tensor with shape `(batch_size, 1, 1, 1, channels)` + - If `data_format='channels_first'`: + 5D tensor with shape `(batch_size, channels, 1, 1, 1)` + """ - def call(self, inputs): - if self.data_format == 'channels_last': - return backend.max(inputs, axis=[1, 2, 3], keepdims=self.keepdims) - else: - return backend.max(inputs, axis=[2, 3, 4], keepdims=self.keepdims) + def call(self, inputs): + if self.data_format == "channels_last": + return backend.max(inputs, axis=[1, 2, 3], keepdims=self.keepdims) + else: + return backend.max(inputs, axis=[2, 3, 4], keepdims=self.keepdims) # Alias diff --git a/keras/layers/pooling/global_max_pooling_test.py b/keras/layers/pooling/global_max_pooling_test.py index f8f4dcd1db1e..ccb59703a3c2 100644 --- a/keras/layers/pooling/global_max_pooling_test.py +++ b/keras/layers/pooling/global_max_pooling_test.py @@ -14,98 +14,124 @@ # ============================================================================== """Tests for global max pooling layers.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class GlobalMaxPoolingTest(tf.test.TestCase, parameterized.TestCase): + def test_global_max_pooling_1d(self): + test_utils.layer_test( + keras.layers.GlobalMaxPooling1D, input_shape=(3, 4, 5) + ) + test_utils.layer_test( + keras.layers.GlobalMaxPooling1D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 4, 5), + ) + + def test_global_max_pooling_2d_with_ragged(self): + ragged_data = tf.ragged.constant( + [ + [[[1.0], [1.0]], [[2.0], [2.0]], [[3.0], [3.0]]], + [[[1.0], [1.0]], [[2.0], [2.0]]], + ], + ragged_rank=1, + ) + dense_data = ragged_data.to_tensor() + + inputs = keras.Input(shape=(None, 2, 1), dtype="float32", ragged=True) + out = keras.layers.GlobalMaxPooling2D()(inputs) + model = keras.models.Model(inputs=inputs, outputs=out) + output_ragged = model.predict(ragged_data, steps=1) + + inputs = keras.Input(shape=(None, 2, 1), dtype="float32") + out = keras.layers.GlobalMaxPooling2D()(inputs) + model = keras.models.Model(inputs=inputs, outputs=out) + output_dense = model.predict(dense_data, steps=1) + + self.assertAllEqual(output_ragged, output_dense) + + def test_global_max_pooling_2d(self): + test_utils.layer_test( + keras.layers.GlobalMaxPooling2D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 4, 5, 6), + ) + test_utils.layer_test( + keras.layers.GlobalMaxPooling2D, + kwargs={"data_format": "channels_last"}, + input_shape=(3, 5, 6, 4), + ) + + def test_global_maxpooling_3d(self): + test_utils.layer_test( + keras.layers.GlobalMaxPooling3D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 4, 3, 4, 3), + ) + test_utils.layer_test( + keras.layers.GlobalMaxPooling3D, + kwargs={"data_format": "channels_last"}, + input_shape=(3, 4, 3, 4, 3), + ) + + def test_global_max_pooling_1d_keepdims(self): + test_utils.layer_test( + keras.layers.GlobalMaxPooling1D, + kwargs={"keepdims": True}, + input_shape=(3, 4, 5), + expected_output_shape=(None, 1, 5), + ) + test_utils.layer_test( + keras.layers.GlobalMaxPooling1D, + kwargs={"data_format": "channels_first", "keepdims": True}, + input_shape=(3, 4, 5), + expected_output_shape=(None, 4, 1), + ) + + def test_global_max_pooling_2d_keepdims(self): + test_utils.layer_test( + keras.layers.GlobalMaxPooling2D, + kwargs={"data_format": "channels_first", "keepdims": True}, + input_shape=(3, 4, 5, 6), + expected_output_shape=(None, 4, 1, 1), + ) + test_utils.layer_test( + keras.layers.GlobalMaxPooling2D, + kwargs={"data_format": "channels_last", "keepdims": True}, + input_shape=(3, 4, 5, 6), + expected_output_shape=(None, 1, 1, 6), + ) + + def test_global_max_pooling_3d_keepdims(self): + test_utils.layer_test( + keras.layers.GlobalMaxPooling3D, + kwargs={"data_format": "channels_first", "keepdims": True}, + input_shape=(3, 4, 3, 4, 3), + expected_output_shape=(None, 4, 1, 1, 1), + ) + test_utils.layer_test( + keras.layers.GlobalMaxPooling3D, + kwargs={"data_format": "channels_last", "keepdims": True}, + input_shape=(3, 4, 3, 4, 3), + expected_output_shape=(None, 1, 1, 1, 3), + ) + + def test_global_max_pooling_1d_invalid_input_dimension(self): + with self.assertRaisesRegex(ValueError, r"""Incorrect input shape"""): + layer = keras.layers.GlobalMaxPooling1D() + layer.build((None, 0, 2)) + + def test_global_max_pooling_3d_invalid_input_dimension(self): + with self.assertRaisesRegex(ValueError, r"""Incorrect input shape"""): + layer = keras.layers.GlobalMaxPooling3D(keepdims=True) + layer.build((None, 0, 16, 16, 3)) + - def test_global_max_pooling_1d(self): - test_utils.layer_test( - keras.layers.GlobalMaxPooling1D, input_shape=(3, 4, 5)) - test_utils.layer_test( - keras.layers.GlobalMaxPooling1D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 4, 5)) - - def test_global_max_pooling_2d_with_ragged(self): - ragged_data = tf.ragged.constant( - [[[[1.0], [1.0]], [[2.0], [2.0]], [[3.0], [3.0]]], - [[[1.0], [1.0]], [[2.0], [2.0]]]], - ragged_rank=1) - dense_data = ragged_data.to_tensor() - - inputs = keras.Input(shape=(None, 2, 1), dtype='float32', ragged=True) - out = keras.layers.GlobalMaxPooling2D()(inputs) - model = keras.models.Model(inputs=inputs, outputs=out) - output_ragged = model.predict(ragged_data, steps=1) - - inputs = keras.Input(shape=(None, 2, 1), dtype='float32') - out = keras.layers.GlobalMaxPooling2D()(inputs) - model = keras.models.Model(inputs=inputs, outputs=out) - output_dense = model.predict(dense_data, steps=1) - - self.assertAllEqual(output_ragged, output_dense) - - def test_global_max_pooling_2d(self): - test_utils.layer_test( - keras.layers.GlobalMaxPooling2D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 4, 5, 6)) - test_utils.layer_test( - keras.layers.GlobalMaxPooling2D, - kwargs={'data_format': 'channels_last'}, - input_shape=(3, 5, 6, 4)) - - def test_global_maxpooling_3d(self): - test_utils.layer_test( - keras.layers.GlobalMaxPooling3D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 4, 3, 4, 3)) - test_utils.layer_test( - keras.layers.GlobalMaxPooling3D, - kwargs={'data_format': 'channels_last'}, - input_shape=(3, 4, 3, 4, 3)) - - def test_global_max_pooling_1d_keepdims(self): - test_utils.layer_test( - keras.layers.GlobalMaxPooling1D, - kwargs={'keepdims': True}, - input_shape=(3, 4, 5), - expected_output_shape=(None, 1, 5)) - test_utils.layer_test( - keras.layers.GlobalMaxPooling1D, - kwargs={'data_format': 'channels_first', 'keepdims': True}, - input_shape=(3, 4, 5), - expected_output_shape=(None, 4, 1)) - - def test_global_max_pooling_2d_keepdims(self): - test_utils.layer_test( - keras.layers.GlobalMaxPooling2D, - kwargs={'data_format': 'channels_first', 'keepdims': True}, - input_shape=(3, 4, 5, 6), - expected_output_shape=(None, 4, 1, 1)) - test_utils.layer_test( - keras.layers.GlobalMaxPooling2D, - kwargs={'data_format': 'channels_last', 'keepdims': True}, - input_shape=(3, 4, 5, 6), - expected_output_shape=(None, 1, 1, 6)) - - def test_global_max_pooling_3d_keepdims(self): - test_utils.layer_test( - keras.layers.GlobalMaxPooling3D, - kwargs={'data_format': 'channels_first', 'keepdims': True}, - input_shape=(3, 4, 3, 4, 3), - expected_output_shape=(None, 4, 1, 1, 1)) - test_utils.layer_test( - keras.layers.GlobalMaxPooling3D, - kwargs={'data_format': 'channels_last', 'keepdims': True}, - input_shape=(3, 4, 3, 4, 3), - expected_output_shape=(None, 1, 1, 1, 3)) - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/pooling/max_pooling1d.py b/keras/layers/pooling/max_pooling1d.py index ff090941d5cd..67e915d4b79c 100644 --- a/keras/layers/pooling/max_pooling1d.py +++ b/keras/layers/pooling/max_pooling1d.py @@ -13,106 +13,114 @@ # limitations under the License. # ============================================================================== """Max pooling 1D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import functools from keras import backend from keras.layers.pooling.base_pooling1d import Pooling1D +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.MaxPool1D', 'keras.layers.MaxPooling1D') +@keras_export("keras.layers.MaxPooling1D", "keras.layers.MaxPool1D") class MaxPooling1D(Pooling1D): - """Max pooling operation for 1D temporal data. - - Downsamples the input representation by taking the maximum value over a - spatial window of size `pool_size`. The window is shifted by `strides`. The - resulting output, when using the `"valid"` padding option, has a shape of: - `output_shape = (input_shape - pool_size + 1) / strides)` - - The resulting output shape when using the `"same"` padding option is: - `output_shape = input_shape / strides` - - For example, for `strides=1` and `padding="valid"`: - - >>> x = tf.constant([1., 2., 3., 4., 5.]) - >>> x = tf.reshape(x, [1, 5, 1]) - >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, - ... strides=1, padding='valid') - >>> max_pool_1d(x) - - - For example, for `strides=2` and `padding="valid"`: - - >>> x = tf.constant([1., 2., 3., 4., 5.]) - >>> x = tf.reshape(x, [1, 5, 1]) - >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, - ... strides=2, padding='valid') - >>> max_pool_1d(x) - - - For example, for `strides=1` and `padding="same"`: - - >>> x = tf.constant([1., 2., 3., 4., 5.]) - >>> x = tf.reshape(x, [1, 5, 1]) - >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, - ... strides=1, padding='same') - >>> max_pool_1d(x) - - - Args: - pool_size: Integer, size of the max pooling window. - strides: Integer, or None. Specifies how much the pooling window moves - for each pooling step. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - - Input shape: - - If `data_format='channels_last'`: - 3D tensor with shape `(batch_size, steps, features)`. - - If `data_format='channels_first'`: - 3D tensor with shape `(batch_size, features, steps)`. - - Output shape: - - If `data_format='channels_last'`: - 3D tensor with shape `(batch_size, downsampled_steps, features)`. - - If `data_format='channels_first'`: - 3D tensor with shape `(batch_size, features, downsampled_steps)`. - """ - - def __init__(self, pool_size=2, strides=None, - padding='valid', data_format='channels_last', **kwargs): - - super().__init__( - functools.partial(backend.pool2d, pool_mode='max'), - pool_size=pool_size, - strides=strides, - padding=padding, - data_format=data_format, - **kwargs) + """Max pooling operation for 1D temporal data. + + Downsamples the input representation by taking the maximum value over a + spatial window of size `pool_size`. The window is shifted by `strides`. The + resulting output, when using the `"valid"` padding option, has a shape of: + `output_shape = (input_shape - pool_size + 1) / strides)` + + The resulting output shape when using the `"same"` padding option is: + `output_shape = input_shape / strides` + + For example, for `strides=1` and `padding="valid"`: + + >>> x = tf.constant([1., 2., 3., 4., 5.]) + >>> x = tf.reshape(x, [1, 5, 1]) + >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, + ... strides=1, padding='valid') + >>> max_pool_1d(x) + + + For example, for `strides=2` and `padding="valid"`: + + >>> x = tf.constant([1., 2., 3., 4., 5.]) + >>> x = tf.reshape(x, [1, 5, 1]) + >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, + ... strides=2, padding='valid') + >>> max_pool_1d(x) + + + For example, for `strides=1` and `padding="same"`: + + >>> x = tf.constant([1., 2., 3., 4., 5.]) + >>> x = tf.reshape(x, [1, 5, 1]) + >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, + ... strides=1, padding='same') + >>> max_pool_1d(x) + + + Args: + pool_size: Integer, size of the max pooling window. + strides: Integer, or None. Specifies how much the pooling window moves + for each pooling step. + If None, it will default to `pool_size`. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + + Input shape: + - If `data_format='channels_last'`: + 3D tensor with shape `(batch_size, steps, features)`. + - If `data_format='channels_first'`: + 3D tensor with shape `(batch_size, features, steps)`. + + Output shape: + - If `data_format='channels_last'`: + 3D tensor with shape `(batch_size, downsampled_steps, features)`. + - If `data_format='channels_first'`: + 3D tensor with shape `(batch_size, features, downsampled_steps)`. + """ + + def __init__( + self, + pool_size=2, + strides=None, + padding="valid", + data_format="channels_last", + **kwargs + ): + + super().__init__( + functools.partial(backend.pool2d, pool_mode="max"), + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs + ) # Alias diff --git a/keras/layers/pooling/max_pooling2d.py b/keras/layers/pooling/max_pooling2d.py index 1ac40cd41acf..f21ab07f2142 100644 --- a/keras/layers/pooling/max_pooling2d.py +++ b/keras/layers/pooling/max_pooling2d.py @@ -13,149 +13,158 @@ # limitations under the License. # ============================================================================== """Max pooling 2D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.layers.pooling.base_pooling2d import Pooling2D + import tensorflow.compat.v2 as tf +from keras.layers.pooling.base_pooling2d import Pooling2D + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D') +@keras_export("keras.layers.MaxPooling2D", "keras.layers.MaxPool2D") class MaxPooling2D(Pooling2D): - """Max pooling operation for 2D spatial data. - - Downsamples the input along its spatial dimensions (height and width) - by taking the maximum value over an input window - (of size defined by `pool_size`) for each channel of the input. - The window is shifted by `strides` along each dimension. - - The resulting output, - when using the `"valid"` padding option, has a spatial shape - (number of rows or columns) of: - `output_shape = math.floor((input_shape - pool_size) / strides) + 1` - (when `input_shape >= pool_size`) - - The resulting output shape when using the `"same"` padding option is: - `output_shape = math.floor((input_shape - 1) / strides) + 1` - - For example, for `strides=(1, 1)` and `padding="valid"`: - - >>> x = tf.constant([[1., 2., 3.], - ... [4., 5., 6.], - ... [7., 8., 9.]]) - >>> x = tf.reshape(x, [1, 3, 3, 1]) - >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), - ... strides=(1, 1), padding='valid') - >>> max_pool_2d(x) - - - For example, for `strides=(2, 2)` and `padding="valid"`: - - >>> x = tf.constant([[1., 2., 3., 4.], - ... [5., 6., 7., 8.], - ... [9., 10., 11., 12.]]) - >>> x = tf.reshape(x, [1, 3, 4, 1]) - >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), - ... strides=(2, 2), padding='valid') - >>> max_pool_2d(x) - - - Usage Example: - - >>> input_image = tf.constant([[[[1.], [1.], [2.], [4.]], - ... [[2.], [2.], [3.], [2.]], - ... [[4.], [1.], [1.], [1.]], - ... [[2.], [2.], [1.], [4.]]]]) - >>> output = tf.constant([[[[1], [0]], - ... [[0], [1]]]]) - >>> model = tf.keras.models.Sequential() - >>> model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), - ... input_shape=(4, 4, 1))) - >>> model.compile('adam', 'mean_squared_error') - >>> model.predict(input_image, steps=1) - array([[[[2.], - [4.]], - [[4.], - [4.]]]], dtype=float32) - - For example, for stride=(1, 1) and padding="same": - - >>> x = tf.constant([[1., 2., 3.], - ... [4., 5., 6.], - ... [7., 8., 9.]]) - >>> x = tf.reshape(x, [1, 3, 3, 1]) - >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), - ... strides=(1, 1), padding='same') - >>> max_pool_2d(x) - - - Args: - pool_size: integer or tuple of 2 integers, - window size over which to take the maximum. - `(2, 2)` will take the max value over a 2x2 pooling window. - If only one integer is specified, the same window length - will be used for both dimensions. - strides: Integer, tuple of 2 integers, or None. - Strides values. Specifies how far the pooling window moves - for each pooling step. If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, rows, cols, channels)`. - - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, rows, cols)`. - - Output shape: - - If `data_format='channels_last'`: - 4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`. - - If `data_format='channels_first'`: - 4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`. - - Returns: - A tensor of rank 4 representing the maximum pooled values. See above for - output shape. - """ - - def __init__(self, - pool_size=(2, 2), - strides=None, - padding='valid', - data_format=None, - **kwargs): - super().__init__( - tf.compat.v1.nn.max_pool, - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, **kwargs) + """Max pooling operation for 2D spatial data. + + Downsamples the input along its spatial dimensions (height and width) + by taking the maximum value over an input window + (of size defined by `pool_size`) for each channel of the input. + The window is shifted by `strides` along each dimension. + + The resulting output, + when using the `"valid"` padding option, has a spatial shape + (number of rows or columns) of: + `output_shape = math.floor((input_shape - pool_size) / strides) + 1` + (when `input_shape >= pool_size`) + + The resulting output shape when using the `"same"` padding option is: + `output_shape = math.floor((input_shape - 1) / strides) + 1` + + For example, for `strides=(1, 1)` and `padding="valid"`: + + >>> x = tf.constant([[1., 2., 3.], + ... [4., 5., 6.], + ... [7., 8., 9.]]) + >>> x = tf.reshape(x, [1, 3, 3, 1]) + >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), + ... strides=(1, 1), padding='valid') + >>> max_pool_2d(x) + + + For example, for `strides=(2, 2)` and `padding="valid"`: + + >>> x = tf.constant([[1., 2., 3., 4.], + ... [5., 6., 7., 8.], + ... [9., 10., 11., 12.]]) + >>> x = tf.reshape(x, [1, 3, 4, 1]) + >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), + ... strides=(2, 2), padding='valid') + >>> max_pool_2d(x) + + + Usage Example: + + >>> input_image = tf.constant([[[[1.], [1.], [2.], [4.]], + ... [[2.], [2.], [3.], [2.]], + ... [[4.], [1.], [1.], [1.]], + ... [[2.], [2.], [1.], [4.]]]]) + >>> output = tf.constant([[[[1], [0]], + ... [[0], [1]]]]) + >>> model = tf.keras.models.Sequential() + >>> model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), + ... input_shape=(4, 4, 1))) + >>> model.compile('adam', 'mean_squared_error') + >>> model.predict(input_image, steps=1) + array([[[[2.], + [4.]], + [[4.], + [4.]]]], dtype=float32) + + For example, for stride=(1, 1) and padding="same": + + >>> x = tf.constant([[1., 2., 3.], + ... [4., 5., 6.], + ... [7., 8., 9.]]) + >>> x = tf.reshape(x, [1, 3, 3, 1]) + >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), + ... strides=(1, 1), padding='same') + >>> max_pool_2d(x) + + + Args: + pool_size: integer or tuple of 2 integers, + window size over which to take the maximum. + `(2, 2)` will take the max value over a 2x2 pooling window. + If only one integer is specified, the same window length + will be used for both dimensions. + strides: Integer, tuple of 2 integers, or None. + Strides values. Specifies how far the pooling window moves + for each pooling step. If None, it will default to `pool_size`. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, height, width)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + - If `data_format='channels_last'`: + 4D tensor with shape `(batch_size, rows, cols, channels)`. + - If `data_format='channels_first'`: + 4D tensor with shape `(batch_size, channels, rows, cols)`. + + Output shape: + - If `data_format='channels_last'`: + 4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`. + - If `data_format='channels_first'`: + 4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`. + + Returns: + A tensor of rank 4 representing the maximum pooled values. See above for + output shape. + """ + + def __init__( + self, + pool_size=(2, 2), + strides=None, + padding="valid", + data_format=None, + **kwargs + ): + super().__init__( + tf.compat.v1.nn.max_pool, + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs + ) # Alias diff --git a/keras/layers/pooling/max_pooling3d.py b/keras/layers/pooling/max_pooling3d.py index fc31276ceb44..64b2575732eb 100644 --- a/keras/layers/pooling/max_pooling3d.py +++ b/keras/layers/pooling/max_pooling3d.py @@ -13,83 +13,92 @@ # limitations under the License. # ============================================================================== """Max pooling 3D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.layers.pooling.base_pooling3d import Pooling3D + import tensorflow.compat.v2 as tf +from keras.layers.pooling.base_pooling3d import Pooling3D + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.MaxPool3D', 'keras.layers.MaxPooling3D') +@keras_export("keras.layers.MaxPooling3D", "keras.layers.MaxPool3D") class MaxPooling3D(Pooling3D): - """Max pooling operation for 3D data (spatial or spatio-temporal). - - Downsamples the input along its spatial dimensions (depth, height, and width) - by taking the maximum value over an input window - (of size defined by `pool_size`) for each channel of the input. - The window is shifted by `strides` along each dimension. - - Args: - pool_size: Tuple of 3 integers, - factors by which to downscale (dim1, dim2, dim3). - `(2, 2, 2)` will halve the size of the 3D input in each dimension. - strides: tuple of 3 integers, or None. Strides values. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` - - Output shape: - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` - - Example: - - ```python - depth = 30 - height = 30 - width = 30 - input_channels = 3 - - inputs = tf.keras.Input(shape=(depth, height, width, input_channels)) - layer = tf.keras.layers.MaxPooling3D(pool_size=3) - outputs = layer(inputs) # Shape: (batch_size, 10, 10, 10, 3) - ``` - """ - - def __init__(self, - pool_size=(2, 2, 2), - strides=None, - padding='valid', - data_format=None, - **kwargs): - super().__init__( - tf.nn.max_pool3d, - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, **kwargs) + """Max pooling operation for 3D data (spatial or spatio-temporal). + + Downsamples the input along its spatial dimensions (depth, height, and + width) by taking the maximum value over an input window (of size defined by + `pool_size`) for each channel of the input. The window is shifted by + `strides` along each dimension. + + Args: + pool_size: Tuple of 3 integers, + factors by which to downscale (dim1, dim2, dim3). + `(2, 2, 2)` will halve the size of the 3D input in each dimension. + strides: tuple of 3 integers, or None. Strides values. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + - If `data_format='channels_last'`: + 5D tensor with shape: + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + - If `data_format='channels_first'`: + 5D tensor with shape: + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + + Output shape: + - If `data_format='channels_last'`: + 5D tensor with shape: + `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` + - If `data_format='channels_first'`: + 5D tensor with shape: + `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` + + Example: + + ```python + depth = 30 + height = 30 + width = 30 + input_channels = 3 + + inputs = tf.keras.Input(shape=(depth, height, width, input_channels)) + layer = tf.keras.layers.MaxPooling3D(pool_size=3) + outputs = layer(inputs) # Shape: (batch_size, 10, 10, 10, 3) + ``` + """ + + def __init__( + self, + pool_size=(2, 2, 2), + strides=None, + padding="valid", + data_format=None, + **kwargs + ): + super().__init__( + tf.nn.max_pool3d, + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs + ) # Alias diff --git a/keras/layers/pooling/max_pooling_test.py b/keras/layers/pooling/max_pooling_test.py index 70fc151674c5..e1e0bc568ba2 100644 --- a/keras/layers/pooling/max_pooling_test.py +++ b/keras/layers/pooling/max_pooling_test.py @@ -14,62 +14,61 @@ # ============================================================================== """Tests for max pooling layers.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class MaxPoolingTest(tf.test.TestCase, parameterized.TestCase): - - def test_max_pooling_1d(self): - for padding in ['valid', 'same']: - for stride in [1, 2]: + def test_max_pooling_1d(self): + for padding in ["valid", "same"]: + for stride in [1, 2]: + test_utils.layer_test( + keras.layers.MaxPooling1D, + kwargs={"strides": stride, "padding": padding}, + input_shape=(3, 5, 4), + ) test_utils.layer_test( keras.layers.MaxPooling1D, + kwargs={"data_format": "channels_first"}, + input_shape=(3, 2, 6), + ) + + def test_max_pooling_2d(self): + pool_size = (3, 3) + for strides in [(1, 1), (2, 2)]: + test_utils.layer_test( + keras.layers.MaxPooling2D, + kwargs={ + "strides": strides, + "padding": "valid", + "pool_size": pool_size, + }, + input_shape=(3, 5, 6, 4), + ) + + def test_max_pooling_3d(self): + pool_size = (3, 3, 3) + test_utils.layer_test( + keras.layers.MaxPooling3D, + kwargs={"strides": 2, "padding": "valid", "pool_size": pool_size}, + input_shape=(3, 11, 12, 10, 4), + ) + test_utils.layer_test( + keras.layers.MaxPooling3D, kwargs={ - 'strides': stride, - 'padding': padding + "strides": 3, + "padding": "valid", + "data_format": "channels_first", + "pool_size": pool_size, }, - input_shape=(3, 5, 4)) - test_utils.layer_test( - keras.layers.MaxPooling1D, - kwargs={'data_format': 'channels_first'}, - input_shape=(3, 2, 6)) - - def test_max_pooling_2d(self): - pool_size = (3, 3) - for strides in [(1, 1), (2, 2)]: - test_utils.layer_test( - keras.layers.MaxPooling2D, - kwargs={ - 'strides': strides, - 'padding': 'valid', - 'pool_size': pool_size - }, - input_shape=(3, 5, 6, 4)) + input_shape=(3, 4, 11, 12, 10), + ) - def test_max_pooling_3d(self): - pool_size = (3, 3, 3) - test_utils.layer_test( - keras.layers.MaxPooling3D, - kwargs={ - 'strides': 2, - 'padding': 'valid', - 'pool_size': pool_size - }, - input_shape=(3, 11, 12, 10, 4)) - test_utils.layer_test( - keras.layers.MaxPooling3D, - kwargs={ - 'strides': 3, - 'padding': 'valid', - 'data_format': 'channels_first', - 'pool_size': pool_size - }, - input_shape=(3, 4, 11, 12, 10)) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/BUILD b/keras/layers/preprocessing/BUILD index ca9cd75ca4af..17acbcd0aa3f 100644 --- a/keras/layers/preprocessing/BUILD +++ b/keras/layers/preprocessing/BUILD @@ -1,6 +1,7 @@ # Description: # Contains the Keras preprocess layers (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") # buildifier: disable=same-origin-load @@ -8,6 +9,7 @@ load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "distribute_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/tools/pip_package:__pkg__", @@ -256,6 +258,9 @@ distribute_py_test( name = "category_encoding_distribution_test", srcs = ["category_encoding_distribution_test.py"], disable_mlir_bridge = False, + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "category_encoding_distribution_test.py", python_version = "PY3", shard_count = 4, @@ -282,6 +287,9 @@ distribute_py_test( distribute_py_test( name = "image_preprocessing_distribution_test", srcs = ["image_preprocessing_distribution_test.py"], + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "image_preprocessing_distribution_test.py", python_version = "PY3", shard_count = 4, @@ -323,6 +331,9 @@ tf_py_test( distribute_py_test( name = "discretization_distribution_test", srcs = ["discretization_distribution_test.py"], + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "discretization_distribution_test.py", python_version = "PY3", shard_count = 4, @@ -362,6 +373,9 @@ distribute_py_test( name = "hashing_distribution_test", srcs = ["hashing_distribution_test.py"], disable_mlir_bridge = False, + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "hashing_distribution_test.py", python_version = "PY3", shard_count = 4, @@ -415,6 +429,9 @@ distribute_py_test( name = "index_lookup_distribution_test", srcs = ["index_lookup_distribution_test.py"], disable_mlir_bridge = False, + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "index_lookup_distribution_test.py", python_version = "PY3", shard_count = 4, @@ -490,6 +507,9 @@ tf_py_test( distribute_py_test( name = "normalization_distribution_test", srcs = ["normalization_distribution_test.py"], + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "normalization_distribution_test.py", python_version = "PY3", shard_count = 8, @@ -527,9 +547,12 @@ distribute_py_test( name = "text_vectorization_distribution_test", srcs = ["text_vectorization_distribution_test.py"], disable_mlir_bridge = False, + env = { + "CUDA_MODULE_LOADING": "LAZY", + }, main = "text_vectorization_distribution_test.py", python_version = "PY3", - shard_count = 4, + shard_count = 8, tags = [ "multi_and_single_gpu", "nomultivm", # TODO(b/170502145) diff --git a/keras/layers/preprocessing/benchmarks/BUILD b/keras/layers/preprocessing/benchmarks/BUILD index 4a6a4d15109b..66d4bf22a6b5 100644 --- a/keras/layers/preprocessing/benchmarks/BUILD +++ b/keras/layers/preprocessing/benchmarks/BUILD @@ -1,3 +1,5 @@ +# Placeholder: load unaliased py_library + # Benchmarks for Keras preprocessing layers. load("@org_keras//keras:keras.bzl", "cuda_py_test") @@ -5,6 +7,7 @@ load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/tools/pip_package:__pkg__", diff --git a/keras/layers/preprocessing/benchmarks/__init__.py b/keras/layers/preprocessing/benchmarks/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/keras/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py b/keras/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py index ff2dbd5693c4..e12ec7ae8013 100644 --- a/keras/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py @@ -14,14 +14,19 @@ # ============================================================================== """Benchmark for KPL implementation of bucketized columns with dense inputs.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import discretization -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 # The number of times to run each benchmark. BATCH_SIZES = [32, 256] @@ -29,46 +34,51 @@ ### KPL AND FC IMPLEMENTATION BENCHMARKS ### def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - max_value = 25.0 - bins = np.arange(1.0, max_value) - data = fc_bm.create_data( - max_length, batch_size * NUM_REPEATS, 100000, dtype=float) - - # Keras implementation - model = keras.Sequential() - model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.float32)) - model.add(discretization.Discretization(bins)) - - # FC implementation - fc = tf.feature_column.bucketized_column( - tf.feature_column.numeric_column("data"), boundaries=list(bins)) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data.to_tensor(default_value=0.0)} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_tensor(default_value=0.0)} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + max_value = 25.0 + bins = np.arange(1.0, max_value) + data = fc_bm.create_data( + max_length, batch_size * NUM_REPEATS, 100000, dtype=float + ) + + # Keras implementation + model = keras.Sequential() + model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.float32)) + model.add(discretization.Discretization(bins)) + + # FC implementation + fc = tf.feature_column.bucketized_column( + tf.feature_column.numeric_column("data"), boundaries=list(bins) + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data.to_tensor(default_value=0.0)} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_tensor(default_value=0.0)} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "bucketized|dense|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"bucketized|dense|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_encoding_benchmark.py b/keras/layers/preprocessing/benchmarks/category_encoding_benchmark.py index e44804626a22..15e2545c7791 100644 --- a/keras/layers/preprocessing/benchmarks/category_encoding_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_encoding_benchmark.py @@ -14,62 +14,70 @@ # ============================================================================== """Benchmark for Keras category_encoding preprocessing layer.""" -import tensorflow.compat.v2 as tf - import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import category_encoding class BenchmarkLayer(tf.test.Benchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def run_dataset_implementation(self, output_mode, batch_size, sequence_length, - max_tokens): - input_t = keras.Input(shape=(sequence_length,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - max_tokens=max_tokens, output_mode=output_mode) - _ = layer(input_t) + def run_dataset_implementation( + self, output_mode, batch_size, sequence_length, max_tokens + ): + input_t = keras.Input(shape=(sequence_length,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + max_tokens=max_tokens, output_mode=output_mode + ) + _ = layer(input_t) - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.from_tensor_slices( - tf.random.uniform([batch_size * 10, sequence_length], - minval=0, - maxval=max_tokens - 1, - dtype=tf.int32)) - ds = ds.shuffle(batch_size * 100) - ds = ds.batch(batch_size) - num_batches = 5 - ds = ds.take(num_batches) - ds = ds.prefetch(num_batches) - starts.append(time.time()) - # Benchmarked code begins here. - for i in ds: - _ = layer(i) - # Benchmarked code ends here. - ends.append(time.time()) + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.from_tensor_slices( + tf.random.uniform( + [batch_size * 10, sequence_length], + minval=0, + maxval=max_tokens - 1, + dtype=tf.int32, + ) + ) + ds = ds.shuffle(batch_size * 100) + ds = ds.batch(batch_size) + num_batches = 5 + ds = ds.take(num_batches) + ds = ds.prefetch(num_batches) + starts.append(time.time()) + # Benchmarked code begins here. + for i in ds: + _ = layer(i) + # Benchmarked code ends here. + ends.append(time.time()) - avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches - name = "category_encoding|batch_%s|seq_length_%s|%s_max_tokens" % ( - batch_size, sequence_length, max_tokens) - self.report_benchmark(iters=num_repeats, wall_time=avg_time, name=name) + avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches + name = "category_encoding|batch_%s|seq_length_%s|%s_max_tokens" % ( + batch_size, + sequence_length, + max_tokens, + ) + self.report_benchmark(iters=num_repeats, wall_time=avg_time, name=name) - def benchmark_vocab_size_by_batch(self): - for batch in [32, 256, 2048]: - for sequence_length in [10, 1000]: - for num_tokens in [100, 1000, 20000]: - self.run_dataset_implementation( - output_mode="count", - batch_size=batch, - sequence_length=sequence_length, - max_tokens=num_tokens) + def benchmark_vocab_size_by_batch(self): + for batch in [32, 256, 2048]: + for sequence_length in [10, 1000]: + for num_tokens in [100, 1000, 20000]: + self.run_dataset_implementation( + output_mode="count", + batch_size=batch, + sequence_length=sequence_length, + max_tokens=num_tokens, + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py b/keras/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py index 2a50b01dcf2d..f4953cc1842b 100644 --- a/keras/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py @@ -12,64 +12,77 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of categorical hash columns with dense inputs.""" +"""Benchmark for KPL implementation of categorical hash columns with dense +inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import hashing -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - - num_buckets = 10000 - vocab = fc_bm.create_vocabulary(32768) - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0) - - # Keras implementation - model = keras.Sequential() - model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) - model.add(hashing.Hashing(num_buckets)) - - # FC implementation - fc = tf.feature_column.sequence_categorical_column_with_hash_bucket("data", num_buckets) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = { - "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) - } - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = { - "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) - } - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + + num_buckets = 10000 + vocab = fc_bm.create_vocabulary(32768) + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0 + ) + + # Keras implementation + model = keras.Sequential() + model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) + model.add(hashing.Hashing(num_buckets)) + + # FC implementation + fc = tf.feature_column.sequence_categorical_column_with_hash_bucket( + "data", num_buckets + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = { + "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) + } + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = { + "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) + } + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "hash|dense|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"hash|dense|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py b/keras/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py index 07cd1d463b3b..a43f42a2c013 100644 --- a/keras/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py @@ -12,62 +12,77 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of categorical hash columns with varying-length inputs.""" +"""Benchmark for KPL implementation of categorical hash columns with +varying-length inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import hashing -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - - num_buckets = 10000 - vocab = fc_bm.create_vocabulary(32768) - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0) - - # Keras implementation - model = keras.Sequential() - model.add( - keras.Input( - shape=(max_length,), name="data", ragged=True, dtype=tf.string)) - model.add(hashing.Hashing(num_buckets)) - - # FC implementation - fc = tf.feature_column.categorical_column_with_hash_bucket("data", num_buckets) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_sparse()} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + + num_buckets = 10000 + vocab = fc_bm.create_vocabulary(32768) + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0 + ) + + # Keras implementation + model = keras.Sequential() + model.add( + keras.Input( + shape=(max_length,), name="data", ragged=True, dtype=tf.string + ) + ) + model.add(hashing.Hashing(num_buckets)) + + # FC implementation + fc = tf.feature_column.categorical_column_with_hash_bucket( + "data", num_buckets + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_sparse()} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "hash|varlen|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"hash|varlen|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py b/keras/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py index 26d4adb940ff..ae43734f5699 100644 --- a/keras/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py @@ -12,78 +12,98 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of vocabulary columns from files with dense inputs.""" - -import tensorflow.compat.v2 as tf +"""Benchmark for KPL implementation of vocabulary columns from files with dense +inputs.""" import os +import tensorflow.compat.v2 as tf + import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import string_lookup -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def embedding_varlen(self, batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - vocab = fc_bm.create_vocabulary(32768) - - path = self._write_to_temp_file("tmp", vocab) - - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) - - # Keras implementation - model = keras.Sequential() - model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) - model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None)) - - # FC implementation - fc = tf.feature_column.categorical_column_with_vocabulary_list( - key="data", vocabulary_list=vocab, num_oov_buckets=1) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = { - "data": data.to_tensor( - default_value="", shape=(batch_size, max_length)) - } - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = { - "data": data.to_tensor( - default_value="", shape=(batch_size, max_length)) - } - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time - - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "vocab_list|dense|batch_%s" % batch - k_time, f_time = self.embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + """Benchmark the layer forward pass.""" + + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def embedding_varlen(self, batch_size, max_length): + """Benchmark a variable-length embedding.""" + # Data and constants. + vocab = fc_bm.create_vocabulary(32768) + + path = self._write_to_temp_file("tmp", vocab) + + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15 + ) + + # Keras implementation + model = keras.Sequential() + model.add( + keras.Input(shape=(max_length,), name="data", dtype=tf.string) + ) + model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None)) + + # FC implementation + fc = tf.feature_column.categorical_column_with_vocabulary_list( + key="data", vocabulary_list=vocab, num_oov_buckets=1 + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache( + tensors + ), + None, + ) + + # Benchmark runs + keras_data = { + "data": data.to_tensor( + default_value="", shape=(batch_size, max_length) + ) + } + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = { + "data": data.to_tensor( + default_value="", shape=(batch_size, max_length) + ) + } + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time + + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"vocab_list|dense|batch_{batch}" + k_time, f_time = self.embedding_varlen( + batch_size=batch, max_length=256 + ) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py b/keras/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py index b5e38e0eabb6..26c6f4861ed9 100644 --- a/keras/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py @@ -12,73 +12,91 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of vocabulary columns from files with varying-length inputs.""" - -import tensorflow.compat.v2 as tf +"""Benchmark for KPL implementation of vocabulary columns from files with +varying-length inputs.""" import os +import tensorflow.compat.v2 as tf + import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import string_lookup -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def embedding_varlen(self, batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - vocab = fc_bm.create_vocabulary(32768) - path = self._write_to_temp_file("tmp", vocab) - - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) - - # Keras implementation - model = keras.Sequential() - model.add( - keras.Input( - shape=(max_length,), name="data", ragged=True, dtype=tf.string)) - model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None)) - - # FC implementation - fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list( - key="data", vocabulary_list=vocab, num_oov_buckets=1) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_sparse()} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time - - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "vocab_list|varlen|batch_%s" % batch - k_time, f_time = self.embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + """Benchmark the layer forward pass.""" + + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def embedding_varlen(self, batch_size, max_length): + """Benchmark a variable-length embedding.""" + # Data and constants. + vocab = fc_bm.create_vocabulary(32768) + path = self._write_to_temp_file("tmp", vocab) + + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15 + ) + + # Keras implementation + model = keras.Sequential() + model.add( + keras.Input( + shape=(max_length,), name="data", ragged=True, dtype=tf.string + ) + ) + model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None)) + + # FC implementation + fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list( + key="data", vocabulary_list=vocab, num_oov_buckets=1 + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache( + tensors + ), + None, + ) + + # Benchmark runs + keras_data = {"data": data} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_sparse()} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time + + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"vocab_list|varlen|batch_{batch}" + k_time, f_time = self.embedding_varlen( + batch_size=batch, max_length=256 + ) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py b/keras/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py index a04b30271d69..eb455a8e52bc 100644 --- a/keras/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py @@ -12,63 +12,75 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of vocabulary columns from lists with dense inputs.""" +"""Benchmark for KPL implementation of vocabulary columns from lists with dense +inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import string_lookup -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - vocab = fc_bm.create_vocabulary(32768) - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) - - # Keras implementation - model = keras.Sequential() - model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) - model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) - - # FC implementation - fc = tf.feature_column.categorical_column_with_vocabulary_list( - key="data", vocabulary_list=vocab, num_oov_buckets=1) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = { - "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) - } - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = { - "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) - } - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + vocab = fc_bm.create_vocabulary(32768) + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15 + ) + + # Keras implementation + model = keras.Sequential() + model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) + model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) + + # FC implementation + fc = tf.feature_column.categorical_column_with_vocabulary_list( + key="data", vocabulary_list=vocab, num_oov_buckets=1 + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = { + "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) + } + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = { + "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) + } + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "vocab_list|dense|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"vocab_list|dense|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py b/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py index be23aa79adc8..b2aa0d687a0c 100644 --- a/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py @@ -12,69 +12,84 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of vocabulary columns + indicator from lists with dense inputs.""" +"""Benchmark for KPL implementation of vocabulary columns + indicator from lists +with dense inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import category_encoding from keras.layers.preprocessing import string_lookup -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - vocab_size = 32768 - vocab = fc_bm.create_vocabulary(vocab_size) - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) - - # Keras implementation - model = keras.Sequential() - model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) - model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) - model.add( - category_encoding.CategoryEncoding( - num_tokens=vocab_size + 1, output_mode="count")) - - # FC implementation - fc = tf.feature_column.indicator_column( - tf.feature_column.categorical_column_with_vocabulary_list( - key="data", vocabulary_list=vocab, num_oov_buckets=1)) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = { - "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) - } - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = { - "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) - } - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + vocab_size = 32768 + vocab = fc_bm.create_vocabulary(vocab_size) + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15 + ) + + # Keras implementation + model = keras.Sequential() + model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string)) + model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) + model.add( + category_encoding.CategoryEncoding( + num_tokens=vocab_size + 1, output_mode="count" + ) + ) + + # FC implementation + fc = tf.feature_column.indicator_column( + tf.feature_column.categorical_column_with_vocabulary_list( + key="data", vocabulary_list=vocab, num_oov_buckets=1 + ) + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = { + "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) + } + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = { + "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) + } + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "vocab_list_indicator|dense|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"vocab_list_indicator|dense|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py b/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py index cede6b70a912..b46b01ebbb18 100644 --- a/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py @@ -12,67 +12,84 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of vocabulary columns + indicator from lists with varying-length inputs.""" +"""Benchmark for KPL implementation of vocabulary columns + indicator from lists +with varying-length inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import category_encoding from keras.layers.preprocessing import string_lookup -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - vocab_size = 32768 - vocab = fc_bm.create_vocabulary(vocab_size) - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) - - # Keras implementation - model = keras.Sequential() - model.add( - keras.Input( - shape=(max_length,), name="data", ragged=True, dtype=tf.string)) - model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) - model.add( - category_encoding.CategoryEncoding( - num_tokens=vocab_size + 1, output_mode="count")) - - # FC implementation - fc = tf.feature_column.indicator_column( - tf.feature_column.sequence_categorical_column_with_vocabulary_list( - key="data", vocabulary_list=vocab, num_oov_buckets=1)) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_sparse()} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + vocab_size = 32768 + vocab = fc_bm.create_vocabulary(vocab_size) + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15 + ) + + # Keras implementation + model = keras.Sequential() + model.add( + keras.Input( + shape=(max_length,), name="data", ragged=True, dtype=tf.string + ) + ) + model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) + model.add( + category_encoding.CategoryEncoding( + num_tokens=vocab_size + 1, output_mode="count" + ) + ) + + # FC implementation + fc = tf.feature_column.indicator_column( + tf.feature_column.sequence_categorical_column_with_vocabulary_list( + key="data", vocabulary_list=vocab, num_oov_buckets=1 + ) + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_sparse()} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "vocab_list_indicator|varlen|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"vocab_list_indicator|varlen|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py b/keras/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py index 85d9a515bd37..6b1455c5ec4a 100644 --- a/keras/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py @@ -12,61 +12,75 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of vocabulary columns from lists with varying-length inputs.""" +"""Benchmark for KPL implementation of vocabulary columns from lists with +varying-length inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function from keras.layers.preprocessing import string_lookup -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - vocab = fc_bm.create_vocabulary(32768) - data = fc_bm.create_string_data( - max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) - - # Keras implementation - model = keras.Sequential() - model.add( - keras.Input( - shape=(max_length,), name="data", ragged=True, dtype=tf.string)) - model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) - - # FC implementation - fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list( - key="data", vocabulary_list=vocab, num_oov_buckets=1) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_sparse()} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + vocab = fc_bm.create_vocabulary(32768) + data = fc_bm.create_string_data( + max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15 + ) + + # Keras implementation + model = keras.Sequential() + model.add( + keras.Input( + shape=(max_length,), name="data", ragged=True, dtype=tf.string + ) + ) + model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) + + # FC implementation + fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list( + key="data", vocabulary_list=vocab, num_oov_buckets=1 + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_sparse()} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "vocab_list|varlen|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"vocab_list|varlen|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py b/keras/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py index 4f5ba20c2517..86af3a6583e0 100644 --- a/keras/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py @@ -14,11 +14,10 @@ # ============================================================================== """Benchmark for Keras discretization preprocessing layer's adapt method.""" -import tensorflow.compat.v2 as tf - import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import discretization @@ -27,82 +26,83 @@ def reduce_fn(state, values, epsilon=EPSILON): - """tf.data.Dataset-friendly implementation of mean and variance.""" + """tf.data.Dataset-friendly implementation of mean and variance.""" - state_, = state - summary = discretization.summarize(values, epsilon) - if np.sum(state_[:, 0]) == 0: - return (summary,) - return (discretization.merge_summaries(state_, summary, epsilon),) + (state_,) = state + summary = discretization.summarize(values, epsilon) + if np.sum(state_[:, 0]) == 0: + return (summary,) + return (discretization.merge_summaries(state_, summary, epsilon),) class BenchmarkAdapt(tf.test.Benchmark): - """Benchmark adapt.""" - - def run_dataset_implementation(self, num_elements, batch_size): - input_t = keras.Input(shape=(1,)) - layer = discretization.Discretization() - _ = layer(input_t) - - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.range(num_elements) - ds = ds.map( - lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) - ds = ds.batch(batch_size) - - starts.append(time.time()) - # Benchmarked code begins here. - state = ds.reduce((np.zeros((1, 2)),), reduce_fn) - - bins = discretization.get_bucket_boundaries(state, 100) - layer.set_weights([bins]) - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) - return avg_time - - def bm_adapt_implementation(self, num_elements, batch_size): - """Test the KPL adapt implementation.""" - input_t = keras.Input(shape=(1,), dtype=tf.float32) - layer = discretization.Discretization() - _ = layer(input_t) - - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.range(num_elements) - ds = ds.map( - lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) - ds = ds.batch(batch_size) - - starts.append(time.time()) - # Benchmarked code begins here. - layer.adapt(ds) - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) - name = "discretization_adapt|%s_elements|batch_%s" % (num_elements, - batch_size) - baseline = self.run_dataset_implementation(num_elements, batch_size) - extras = { - "tf.data implementation baseline": baseline, - "delta seconds": (baseline - avg_time), - "delta percent": ((baseline - avg_time) / baseline) * 100 - } - self.report_benchmark( - iters=num_repeats, wall_time=avg_time, extras=extras, name=name) - - def benchmark_vocab_size_by_batch(self): - for vocab_size in [100, 1000, 10000, 100000, 1000000]: - for batch in [64 * 2048]: - self.bm_adapt_implementation(vocab_size, batch) + """Benchmark adapt.""" + + def run_dataset_implementation(self, num_elements, batch_size): + input_t = keras.Input(shape=(1,)) + layer = discretization.Discretization() + _ = layer(input_t) + + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.range(num_elements) + ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) + ds = ds.batch(batch_size) + + starts.append(time.time()) + # Benchmarked code begins here. + state = ds.reduce((np.zeros((1, 2)),), reduce_fn) + + bins = discretization.get_bucket_boundaries(state, 100) + layer.set_weights([bins]) + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) + return avg_time + + def bm_adapt_implementation(self, num_elements, batch_size): + """Test the KPL adapt implementation.""" + input_t = keras.Input(shape=(1,), dtype=tf.float32) + layer = discretization.Discretization() + _ = layer(input_t) + + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.range(num_elements) + ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) + ds = ds.batch(batch_size) + + starts.append(time.time()) + # Benchmarked code begins here. + layer.adapt(ds) + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) + name = "discretization_adapt|%s_elements|batch_%s" % ( + num_elements, + batch_size, + ) + baseline = self.run_dataset_implementation(num_elements, batch_size) + extras = { + "tf.data implementation baseline": baseline, + "delta seconds": (baseline - avg_time), + "delta percent": ((baseline - avg_time) / baseline) * 100, + } + self.report_benchmark( + iters=num_repeats, wall_time=avg_time, extras=extras, name=name + ) + + def benchmark_vocab_size_by_batch(self): + for vocab_size in [100, 1000, 10000, 100000, 1000000]: + for batch in [64 * 2048]: + self.bm_adapt_implementation(vocab_size, batch) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/embedding_dense_benchmark.py b/keras/layers/preprocessing/benchmarks/embedding_dense_benchmark.py index 9f8a70e80d9a..bbe64c2c8d8e 100644 --- a/keras/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/embedding_dense_benchmark.py @@ -17,8 +17,14 @@ import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] @@ -26,48 +32,54 @@ ### KPL AND FC IMPLEMENTATION BENCHMARKS ### def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - embedding_size = 32768 - data = fc_bm.create_data( - max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int) - - # Keras implementation - model = keras.Sequential() - model.add(keras.Input(shape=(None,), name="data", dtype=tf.int64)) - model.add(keras.layers.Embedding(embedding_size, 256)) - model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1))) - - # FC implementation - fc = tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_identity( - "data", num_buckets=embedding_size - 1), - dimension=256) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data.to_tensor(default_value=0)} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_tensor(default_value=0)} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + embedding_size = 32768 + data = fc_bm.create_data( + max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int + ) + + # Keras implementation + model = keras.Sequential() + model.add(keras.Input(shape=(None,), name="data", dtype=tf.int64)) + model.add(keras.layers.Embedding(embedding_size, 256)) + model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1))) + + # FC implementation + fc = tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + "data", num_buckets=embedding_size - 1 + ), + dimension=256, + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data.to_tensor(default_value=0)} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_tensor(default_value=0)} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "embedding|dense|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"embedding|dense|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py b/keras/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py index c1538a4c9c81..f7ddbcc3a571 100644 --- a/keras/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py @@ -12,13 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of embedding column with varying-length inputs.""" +"""Benchmark for KPL implementation of embedding column with varying-length +inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] @@ -26,49 +33,56 @@ ### KPL AND FC IMPLEMENTATION BENCHMARKS ### def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - embedding_size = 32768 - data = fc_bm.create_data( - max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int) - - # Keras implementation - model = keras.Sequential() - model.add( - keras.Input(shape=(None,), ragged=True, name="data", dtype=tf.int64)) - model.add(keras.layers.Embedding(embedding_size, 256)) - model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1))) - - # FC implementation - fc = tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_identity( - "data", num_buckets=embedding_size - 1), - dimension=256) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_sparse()} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + embedding_size = 32768 + data = fc_bm.create_data( + max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int + ) + + # Keras implementation + model = keras.Sequential() + model.add( + keras.Input(shape=(None,), ragged=True, name="data", dtype=tf.int64) + ) + model.add(keras.layers.Embedding(embedding_size, 256)) + model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1))) + + # FC implementation + fc = tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + "data", num_buckets=embedding_size - 1 + ), + dimension=256, + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_sparse()} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "embedding|varlen|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"embedding|varlen|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/feature_column_benchmark.py b/keras/layers/preprocessing/benchmarks/feature_column_benchmark.py index 572e6c823786..cb14279fc2dc 100644 --- a/keras/layers/preprocessing/benchmarks/feature_column_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/feature_column_benchmark.py @@ -14,7 +14,6 @@ # ============================================================================== """Benchmark suite for KPL and feature column implementations.""" -import tensorflow.compat.v2 as tf import itertools import math import random @@ -22,123 +21,134 @@ import time import numpy as np +import tensorflow.compat.v2 as tf import keras class LayerBenchmark(tf.test.Benchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def report(self, name, keras_time, fc_time, iters): - """Calculate and report benchmark statistics.""" - extras = { - "fc_avg_time": fc_time, - "fc_vs_keras_sec": fc_time - keras_time, - "fc_vs_keras_pct": ((fc_time - keras_time) / fc_time) * 100, - "keras_faster_ratio": fc_time / keras_time - } - self.report_benchmark( - iters=iters, wall_time=keras_time, extras=extras, name=name) + def report(self, name, keras_time, fc_time, iters): + """Calculate and report benchmark statistics.""" + extras = { + "fc_avg_time": fc_time, + "fc_vs_keras_sec": fc_time - keras_time, + "fc_vs_keras_pct": ((fc_time - keras_time) / fc_time) * 100, + "keras_faster_ratio": fc_time / keras_time, + } + self.report_benchmark( + iters=iters, wall_time=keras_time, extras=extras, name=name + ) class StepTimingCallback(keras.callbacks.Callback): - """A callback that times non-warmup steps of a Keras predict call.""" + """A callback that times non-warmup steps of a Keras predict call.""" - def __init__(self): - self.t0 = None - self.steps = 0 + def __init__(self): + self.t0 = None + self.steps = 0 - def on_predict_batch_begin(self, batch_index, _): - if batch_index == 2: - self.t0 = time.time() - elif batch_index > 2: - self.steps += 1 + def on_predict_batch_begin(self, batch_index, _): + if batch_index == 2: + self.t0 = time.time() + elif batch_index > 2: + self.steps += 1 - def on_predict_end(self, _): - self.tn = time.time() - self.t_avg = (self.tn - self.t0) / self.steps + def on_predict_end(self, _): + self.tn = time.time() + self.t_avg = (self.tn - self.t0) / self.steps def create_data(length, num_entries, max_value, dtype): - """Create a ragged tensor with random data entries.""" - lengths = (np.random.random(size=num_entries) * length).astype(int) - total_length = np.sum(lengths) - values = (np.random.random(size=total_length) * max_value).astype(dtype) - return tf.RaggedTensor.from_row_lengths(values, lengths) - - -def create_string_data(length, - num_entries, - vocabulary, - pct_oov, - oov_string="__OOV__"): - """Create a ragged tensor with random data entries.""" - lengths = (np.random.random(size=num_entries) * length).astype(int) - total_length = np.sum(lengths) - num_oovs = int(pct_oov * total_length) - values = [] - for _ in range(total_length): - values.append(random.choice(vocabulary)) - - if pct_oov > 0: - oov_cadence = int(total_length / num_oovs) - idx = 0 - for _ in range(num_oovs): - if idx < total_length: - values[idx] = oov_string - idx += oov_cadence - - return tf.RaggedTensor.from_row_lengths(values, lengths) + """Create a ragged tensor with random data entries.""" + lengths = (np.random.random(size=num_entries) * length).astype(int) + total_length = np.sum(lengths) + values = (np.random.random(size=total_length) * max_value).astype(dtype) + return tf.RaggedTensor.from_row_lengths(values, lengths) + + +def create_string_data( + length, num_entries, vocabulary, pct_oov, oov_string="__OOV__" +): + """Create a ragged tensor with random data entries.""" + lengths = (np.random.random(size=num_entries) * length).astype(int) + total_length = np.sum(lengths) + num_oovs = int(pct_oov * total_length) + values = [] + for _ in range(total_length): + values.append(random.choice(vocabulary)) + + if pct_oov > 0: + oov_cadence = int(total_length / num_oovs) + idx = 0 + for _ in range(num_oovs): + if idx < total_length: + values[idx] = oov_string + idx += oov_cadence + + return tf.RaggedTensor.from_row_lengths(values, lengths) def create_vocabulary(vocab_size): - base = len(string.ascii_letters) - n = math.ceil(math.log(vocab_size, base)) - vocab = [] - for i in range(1, n + 1): - for item in itertools.product(string.ascii_letters, repeat=i): - if len(vocab) >= vocab_size: - break - vocab.append("".join(item)) - return vocab + base = len(string.ascii_letters) + n = math.ceil(math.log(vocab_size, base)) + vocab = [] + for i in range(1, n + 1): + for item in itertools.product(string.ascii_letters, repeat=i): + if len(vocab) >= vocab_size: + break + vocab.append("".join(item)) + return vocab def run_keras(data, model, batch_size, num_runs, steps_per_repeat=100): - """Benchmark a Keras model.""" - ds = tf.data.Dataset.from_tensor_slices(data).repeat().prefetch( - tf.data.AUTOTUNE).batch(batch_size).cache() - steps = 0 - times = [] - for _ in range(num_runs): - steps += steps_per_repeat - timer = StepTimingCallback() - # Benchmarked code begins here. - model.predict(ds, steps=steps, callbacks=[timer]) - # Benchmarked code ends here. - times.append(timer.t_avg) - avg_time = np.mean(times) - return avg_time + """Benchmark a Keras model.""" + ds = ( + tf.data.Dataset.from_tensor_slices(data) + .repeat() + .prefetch(tf.data.AUTOTUNE) + .batch(batch_size) + .cache() + ) + steps = 0 + times = [] + for _ in range(num_runs): + steps += steps_per_repeat + timer = StepTimingCallback() + # Benchmarked code begins here. + model.predict(ds, steps=steps, callbacks=[timer]) + # Benchmarked code ends here. + times.append(timer.t_avg) + avg_time = np.mean(times) + return avg_time def run_fc(data, fc_fn, batch_size, num_runs, steps_per_repeat=100): - """Benchmark a Feature Column.""" - - ds = tf.data.Dataset.from_tensor_slices(data).repeat().prefetch( - tf.data.AUTOTUNE).batch(batch_size).cache() - - # Trace the fc_fn - ds_iter = ds.__iter__() - fc_fn(next(ds_iter)) - fc_starts = [] - fc_ends = [] - for _ in range(num_runs): - fc_starts.append(time.time()) - # Benchmarked code begins here. - for _ in range(steps_per_repeat): - _ = fc_fn(next(ds_iter)) - # Benchmarked code ends here. - fc_ends.append(time.time()) - avg_per_step_time = (np.array(fc_ends) - - np.array(fc_starts)) / steps_per_repeat - avg_time = np.mean(avg_per_step_time) - return avg_time + """Benchmark a Feature Column.""" + + ds = ( + tf.data.Dataset.from_tensor_slices(data) + .repeat() + .prefetch(tf.data.AUTOTUNE) + .batch(batch_size) + .cache() + ) + + # Trace the fc_fn + ds_iter = ds.__iter__() + fc_fn(next(ds_iter)) + fc_starts = [] + fc_ends = [] + for _ in range(num_runs): + fc_starts.append(time.time()) + # Benchmarked code begins here. + for _ in range(steps_per_repeat): + _ = fc_fn(next(ds_iter)) + # Benchmarked code ends here. + fc_ends.append(time.time()) + avg_per_step_time = ( + np.array(fc_ends) - np.array(fc_starts) + ) / steps_per_repeat + avg_time = np.mean(avg_per_step_time) + return avg_time diff --git a/keras/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py b/keras/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py index 3dd74662fc84..9b0fad90f2c0 100644 --- a/keras/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py @@ -12,69 +12,78 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of categorical cross hash columns with dense inputs.""" +"""Benchmark for KPL implementation of categorical cross hash columns with dense +inputs.""" +import tensorflow.compat.v2 as tf + import keras from keras.layers.preprocessing import hashed_crossing -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm -import tensorflow.compat.v2 as tf -from tensorflow.python.eager.def_function import function as tf_function +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] def embedding_varlen(batch_size): - """Benchmark a variable-length embedding.""" - # Data and constants. - num_buckets = 10000 - data_a = tf.random.uniform(shape=(batch_size * NUM_REPEATS, 1), - maxval=32768, - dtype=tf.int64) - data_b = tf.strings.as_string(data_a) - - # Keras implementation - input_1 = keras.Input(shape=(1,), name="data_a", dtype=tf.int64) - input_2 = keras.Input(shape=(1,), name="data_b", dtype=tf.string) - outputs = hashed_crossing.HashedCrossing(num_buckets)([input_1, input_2]) - model = keras.Model([input_1, input_2], outputs) - - # FC implementation - fc = tf.feature_column.crossed_column(["data_a", "data_b"], num_buckets) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature( - tf.__internal__.feature_column.FeatureTransformationCache(tensors), - None) - - # Benchmark runs - keras_data = { - "data_a": data_a, - "data_b": data_b, - } - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = { - "data_a": data_a, - "data_b": data_b, - } - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + num_buckets = 10000 + data_a = tf.random.uniform( + shape=(batch_size * NUM_REPEATS, 1), maxval=32768, dtype=tf.int64 + ) + data_b = tf.strings.as_string(data_a) + + # Keras implementation + input_1 = keras.Input(shape=(1,), name="data_a", dtype=tf.int64) + input_2 = keras.Input(shape=(1,), name="data_b", dtype=tf.string) + outputs = hashed_crossing.HashedCrossing(num_buckets)([input_1, input_2]) + model = keras.Model([input_1, input_2], outputs) + + # FC implementation + fc = tf.feature_column.crossed_column(["data_a", "data_b"], num_buckets) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = { + "data_a": data_a, + "data_b": data_b, + } + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = { + "data_a": data_a, + "data_b": data_b, + } + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "hashed_cross|dense|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"hashed_cross|dense|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/hashing_benchmark.py b/keras/layers/preprocessing/benchmarks/hashing_benchmark.py index 0bd10f4eed64..0d0d5b0f8a86 100644 --- a/keras/layers/preprocessing/benchmarks/hashing_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/hashing_benchmark.py @@ -14,14 +14,13 @@ # ============================================================================== """Benchmark for Keras hashing preprocessing layer.""" -import tensorflow.compat.v2 as tf - import itertools import random import string import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import hashing @@ -30,73 +29,76 @@ # word_gen creates random sequences of ASCII letters (both lowercase and upper). # The number of unique strings is ~2,700. def word_gen(): - for _ in itertools.count(1): - yield "".join(random.choice(string.ascii_letters) for i in range(2)) + for _ in itertools.count(1): + yield "".join(random.choice(string.ascii_letters) for i in range(2)) class BenchmarkLayer(tf.test.Benchmark): - """Benchmark the layer forward pass.""" - - def run_dataset_implementation(self, batch_size): - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.from_generator(word_gen, tf.string, - tf.TensorShape([])) - ds = ds.shuffle(batch_size * 100) - ds = ds.batch(batch_size) - num_batches = 5 - ds = ds.take(num_batches) - ds = ds.prefetch(num_batches) - starts.append(time.time()) - # Benchmarked code begins here. - for i in ds: - _ = tf.strings.to_hash_bucket(i, num_buckets=2) - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches - return avg_time - - def bm_layer_implementation(self, batch_size): - input_1 = keras.Input(shape=(None,), dtype=tf.string, name="word") - layer = hashing.Hashing(num_bins=2) - _ = layer(input_1) - - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.from_generator(word_gen, tf.string, - tf.TensorShape([])) - ds = ds.shuffle(batch_size * 100) - ds = ds.batch(batch_size) - num_batches = 5 - ds = ds.take(num_batches) - ds = ds.prefetch(num_batches) - starts.append(time.time()) - # Benchmarked code begins here. - for i in ds: - _ = layer(i) - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches - name = "hashing|batch_%s" % batch_size - baseline = self.run_dataset_implementation(batch_size) - extras = { - "dataset implementation baseline": baseline, - "delta seconds": (baseline - avg_time), - "delta percent": ((baseline - avg_time) / baseline) * 100 - } - self.report_benchmark( - iters=num_repeats, wall_time=avg_time, extras=extras, name=name) - - def benchmark_vocab_size_by_batch(self): - for batch in [32, 64, 256]: - self.bm_layer_implementation(batch_size=batch) + """Benchmark the layer forward pass.""" + + def run_dataset_implementation(self, batch_size): + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.from_generator( + word_gen, tf.string, tf.TensorShape([]) + ) + ds = ds.shuffle(batch_size * 100) + ds = ds.batch(batch_size) + num_batches = 5 + ds = ds.take(num_batches) + ds = ds.prefetch(num_batches) + starts.append(time.time()) + # Benchmarked code begins here. + for i in ds: + _ = tf.strings.to_hash_bucket(i, num_buckets=2) + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches + return avg_time + + def bm_layer_implementation(self, batch_size): + input_1 = keras.Input(shape=(None,), dtype=tf.string, name="word") + layer = hashing.Hashing(num_bins=2) + _ = layer(input_1) + + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.from_generator( + word_gen, tf.string, tf.TensorShape([]) + ) + ds = ds.shuffle(batch_size * 100) + ds = ds.batch(batch_size) + num_batches = 5 + ds = ds.take(num_batches) + ds = ds.prefetch(num_batches) + starts.append(time.time()) + # Benchmarked code begins here. + for i in ds: + _ = layer(i) + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches + name = f"hashing|batch_{batch_size}" + baseline = self.run_dataset_implementation(batch_size) + extras = { + "dataset implementation baseline": baseline, + "delta seconds": (baseline - avg_time), + "delta percent": ((baseline - avg_time) / baseline) * 100, + } + self.report_benchmark( + iters=num_repeats, wall_time=avg_time, extras=extras, name=name + ) + + def benchmark_vocab_size_by_batch(self): + for batch in [32, 64, 256]: + self.bm_layer_implementation(batch_size=batch) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/image_preproc_benchmark.py b/keras/layers/preprocessing/benchmarks/image_preproc_benchmark.py index 9fc4eac16ecb..895232f22a85 100644 --- a/keras/layers/preprocessing/benchmarks/image_preproc_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/image_preproc_benchmark.py @@ -14,134 +14,145 @@ # ============================================================================== """Benchmark for Keras image preprocessing layer.""" -import tensorflow.compat.v2 as tf - import functools import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import image_preprocessing -LOWER = .2 -UPPER = .4 +LOWER = 0.2 +UPPER = 0.4 BATCH_SIZE = 32 def rotate(inputs): - """rotate image.""" - inputs_shape = tf.shape(inputs) - batch_size = inputs_shape[0] - img_hd = tf.cast(inputs_shape[1], tf.float32) - img_wd = tf.cast(inputs_shape[2], tf.float32) - min_angle = LOWER * 2. * np.pi - max_angle = UPPER * 2. * np.pi - angles = tf.random.uniform( - shape=[batch_size], minval=min_angle, maxval=max_angle) - return image_preprocessing.transform( - inputs, image_preprocessing.get_rotation_matrix(angles, img_hd, img_wd)) + """rotate image.""" + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + img_hd = tf.cast(inputs_shape[1], tf.float32) + img_wd = tf.cast(inputs_shape[2], tf.float32) + min_angle = LOWER * 2.0 * np.pi + max_angle = UPPER * 2.0 * np.pi + angles = tf.random.uniform( + shape=[batch_size], minval=min_angle, maxval=max_angle + ) + return image_preprocessing.transform( + inputs, image_preprocessing.get_rotation_matrix(angles, img_hd, img_wd) + ) def zoom(inputs): - """zoom image.""" - inputs_shape = tf.shape(inputs) - batch_size = inputs_shape[0] - img_hd = tf.cast(inputs_shape[1], tf.float32) - img_wd = tf.cast(inputs_shape[2], tf.float32) - height_zoom = tf.random.uniform( - shape=[batch_size, 1], minval=1. + LOWER, maxval=1. + UPPER) - width_zoom = tf.random.uniform( - shape=[batch_size, 1], minval=1. + LOWER, maxval=1. + UPPER) - zooms = tf.cast( - tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32) - return image_preprocessing.transform( - inputs, image_preprocessing.get_zoom_matrix(zooms, img_hd, img_wd)) + """zoom image.""" + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + img_hd = tf.cast(inputs_shape[1], tf.float32) + img_wd = tf.cast(inputs_shape[2], tf.float32) + height_zoom = tf.random.uniform( + shape=[batch_size, 1], minval=1.0 + LOWER, maxval=1.0 + UPPER + ) + width_zoom = tf.random.uniform( + shape=[batch_size, 1], minval=1.0 + LOWER, maxval=1.0 + UPPER + ) + zooms = tf.cast( + tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32 + ) + return image_preprocessing.transform( + inputs, image_preprocessing.get_zoom_matrix(zooms, img_hd, img_wd) + ) def image_augmentation(inputs, batch_size): - """image augmentation.""" - img = inputs - img = tf.image.resize(img, size=[224, 224]) - img = tf.image.random_crop(img, size=[batch_size, 224, 224, 3]) - img = rotate(img) - img = zoom(img) - return img + """image augmentation.""" + img = inputs + img = tf.image.resize(img, size=[224, 224]) + img = tf.image.random_crop(img, size=[batch_size, 224, 224, 3]) + img = rotate(img) + img = zoom(img) + return img class BenchmarkLayer(tf.test.Benchmark): - """Benchmark the layer forward pass.""" - - def run_dataset_implementation(self, batch_size): - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.from_tensor_slices( - np.random.random((batch_size, 256, 256, 3))) - ds = ds.shuffle(batch_size * 100) - ds = ds.batch(batch_size) - ds = ds.prefetch(batch_size) - img_augmentation = functools.partial( - image_augmentation, batch_size=batch_size) - ds = ds.map(img_augmentation, num_parallel_calls=8) - starts.append(time.time()) - count = 0 - # Benchmarked code begins here. - for i in ds: - _ = i - count += 1 - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) / count - return avg_time - - def bm_layer_implementation(self, batch_size): - with tf.device("/gpu:0"): - img = keras.Input(shape=(256, 256, 3), dtype=tf.float32) - preprocessor = keras.Sequential([ - image_preprocessing.Resizing(224, 224), - image_preprocessing.RandomCrop(height=224, width=224), - image_preprocessing.RandomRotation(factor=(.2, .4)), - image_preprocessing.RandomFlip(mode="horizontal"), - image_preprocessing.RandomZoom(.2, .2) - ]) - _ = preprocessor(img) - - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.from_tensor_slices( - np.random.random((batch_size, 256, 256, 3))) - ds = ds.shuffle(batch_size * 100) - ds = ds.batch(batch_size) - ds = ds.prefetch(batch_size) - starts.append(time.time()) - count = 0 - # Benchmarked code begins here. - for i in ds: - _ = preprocessor(i) - count += 1 - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) / count - name = "image_preprocessing|batch_%s" % batch_size - baseline = self.run_dataset_implementation(batch_size) - extras = { - "dataset implementation baseline": baseline, - "delta seconds": (baseline - avg_time), - "delta percent": ((baseline - avg_time) / baseline) * 100 - } - self.report_benchmark( - iters=num_repeats, wall_time=avg_time, extras=extras, name=name) - - def benchmark_vocab_size_by_batch(self): - for batch in [32, 64, 256]: - self.bm_layer_implementation(batch_size=batch) + """Benchmark the layer forward pass.""" + + def run_dataset_implementation(self, batch_size): + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.from_tensor_slices( + np.random.random((batch_size, 256, 256, 3)) + ) + ds = ds.shuffle(batch_size * 100) + ds = ds.batch(batch_size) + ds = ds.prefetch(batch_size) + img_augmentation = functools.partial( + image_augmentation, batch_size=batch_size + ) + ds = ds.map(img_augmentation, num_parallel_calls=8) + starts.append(time.time()) + count = 0 + # Benchmarked code begins here. + for i in ds: + _ = i + count += 1 + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) / count + return avg_time + + def bm_layer_implementation(self, batch_size): + with tf.device("/gpu:0"): + img = keras.Input(shape=(256, 256, 3), dtype=tf.float32) + preprocessor = keras.Sequential( + [ + image_preprocessing.Resizing(224, 224), + image_preprocessing.RandomCrop(height=224, width=224), + image_preprocessing.RandomRotation(factor=(0.2, 0.4)), + image_preprocessing.RandomFlip(mode="horizontal"), + image_preprocessing.RandomZoom(0.2, 0.2), + ] + ) + _ = preprocessor(img) + + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.from_tensor_slices( + np.random.random((batch_size, 256, 256, 3)) + ) + ds = ds.shuffle(batch_size * 100) + ds = ds.batch(batch_size) + ds = ds.prefetch(batch_size) + starts.append(time.time()) + count = 0 + # Benchmarked code begins here. + for i in ds: + _ = preprocessor(i) + count += 1 + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) / count + name = f"image_preprocessing|batch_{batch_size}" + baseline = self.run_dataset_implementation(batch_size) + extras = { + "dataset implementation baseline": baseline, + "delta seconds": (baseline - avg_time), + "delta percent": ((baseline - avg_time) / baseline) * 100, + } + self.report_benchmark( + iters=num_repeats, wall_time=avg_time, extras=extras, name=name + ) + + def benchmark_vocab_size_by_batch(self): + for batch in [32, 64, 256]: + self.bm_layer_implementation(batch_size=batch) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py b/keras/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py index 85493722cb59..589f9ab2dea7 100644 --- a/keras/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py @@ -14,8 +14,6 @@ # ============================================================================== """Benchmark for Keras text vectorization preprocessing layer's adapt method.""" -import tensorflow.compat.v2 as tf - import collections import itertools import random @@ -23,6 +21,7 @@ import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import index_lookup @@ -33,90 +32,102 @@ # word_gen creates random sequences of ASCII letters (both lowercase and upper). # The number of unique strings is ~2,700. def word_gen(): - for _ in itertools.count(1): - yield "".join(random.choice(string.ascii_letters) for i in range(2)) + for _ in itertools.count(1): + yield "".join(random.choice(string.ascii_letters) for i in range(2)) def get_top_k(dataset, k): - """Python implementation of vocabulary building using a defaultdict.""" - counts = collections.defaultdict(int) - for tensor in dataset: - data = tensor.numpy() - for element in data: - counts[element] += 1 - sorted_vocab = [ - k for k, _ in sorted( - counts.items(), key=lambda item: item[1], reverse=True) - ] - if len(sorted_vocab) > k: - sorted_vocab = sorted_vocab[:k] - return sorted_vocab + """Python implementation of vocabulary building using a defaultdict.""" + counts = collections.defaultdict(int) + for tensor in dataset: + data = tensor.numpy() + for element in data: + counts[element] += 1 + sorted_vocab = [ + k + for k, _ in sorted( + counts.items(), key=lambda item: item[1], reverse=True + ) + ] + if len(sorted_vocab) > k: + sorted_vocab = sorted_vocab[:k] + return sorted_vocab class BenchmarkAdapt(tf.test.Benchmark): - """Benchmark adapt.""" - - def run_numpy_implementation(self, num_elements, batch_size, k): - """Test the python implementation.""" - ds = tf.data.Dataset.from_generator(word_gen, tf.string, - tf.TensorShape([])) - batched_ds = ds.take(num_elements).batch(batch_size) - input_t = keras.Input(shape=(), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=k, - num_oov_indices=0, - mask_token=None, - oov_token="OOV", - dtype=tf.string) - _ = layer(input_t) - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - starts.append(time.time()) - vocab = get_top_k(batched_ds, k) - layer.set_vocabulary(vocab) - ends.append(time.time()) - avg_time = np.mean(np.array(ends) - np.array(starts)) - return avg_time - - def bm_adapt_implementation(self, num_elements, batch_size, k): - """Test the KPL adapt implementation.""" - ds = tf.data.Dataset.from_generator(word_gen, tf.string, - tf.TensorShape([])) - batched_ds = ds.take(num_elements).batch(batch_size) - input_t = keras.Input(shape=(), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=k, - num_oov_indices=0, - mask_token=None, - oov_token="OOV", - dtype=tf.string) - _ = layer(input_t) - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - starts.append(time.time()) - layer.adapt(batched_ds) - ends.append(time.time()) - avg_time = np.mean(np.array(ends) - np.array(starts)) - name = "index_lookup_adapt|%s_elements|vocab_size_%s|batch_%s" % ( - num_elements, k, batch_size) - baseline = self.run_numpy_implementation(num_elements, batch_size, k) - extras = { - "numpy implementation baseline": baseline, - "delta seconds": (baseline - avg_time), - "delta percent": ((baseline - avg_time) / baseline) * 100 - } - self.report_benchmark( - iters=num_repeats, wall_time=avg_time, extras=extras, name=name) - - def benchmark_vocab_size_by_batch(self): - for vocab_size in [100, 1000, 10000, 100000, 1000000]: - for batch in [1, 16, 2048]: - self.bm_adapt_implementation(vocab_size, batch, int(vocab_size / 10)) + """Benchmark adapt.""" + + def run_numpy_implementation(self, num_elements, batch_size, k): + """Test the python implementation.""" + ds = tf.data.Dataset.from_generator( + word_gen, tf.string, tf.TensorShape([]) + ) + batched_ds = ds.take(num_elements).batch(batch_size) + input_t = keras.Input(shape=(), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=k, + num_oov_indices=0, + mask_token=None, + oov_token="OOV", + dtype=tf.string, + ) + _ = layer(input_t) + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + starts.append(time.time()) + vocab = get_top_k(batched_ds, k) + layer.set_vocabulary(vocab) + ends.append(time.time()) + avg_time = np.mean(np.array(ends) - np.array(starts)) + return avg_time + + def bm_adapt_implementation(self, num_elements, batch_size, k): + """Test the KPL adapt implementation.""" + ds = tf.data.Dataset.from_generator( + word_gen, tf.string, tf.TensorShape([]) + ) + batched_ds = ds.take(num_elements).batch(batch_size) + input_t = keras.Input(shape=(), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=k, + num_oov_indices=0, + mask_token=None, + oov_token="OOV", + dtype=tf.string, + ) + _ = layer(input_t) + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + starts.append(time.time()) + layer.adapt(batched_ds) + ends.append(time.time()) + avg_time = np.mean(np.array(ends) - np.array(starts)) + name = "index_lookup_adapt|%s_elements|vocab_size_%s|batch_%s" % ( + num_elements, + k, + batch_size, + ) + baseline = self.run_numpy_implementation(num_elements, batch_size, k) + extras = { + "numpy implementation baseline": baseline, + "delta seconds": (baseline - avg_time), + "delta percent": ((baseline - avg_time) / baseline) * 100, + } + self.report_benchmark( + iters=num_repeats, wall_time=avg_time, extras=extras, name=name + ) + + def benchmark_vocab_size_by_batch(self): + for vocab_size in [100, 1000, 10000, 100000, 1000000]: + for batch in [1, 16, 2048]: + self.bm_adapt_implementation( + vocab_size, batch, int(vocab_size / 10) + ) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py b/keras/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py index d7f6868ddbdb..659d65569403 100644 --- a/keras/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py @@ -14,14 +14,13 @@ # ============================================================================== """Benchmark for Keras text vectorization preprocessing layer's adapt method.""" -import tensorflow.compat.v2 as tf - import os import random import string import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import index_lookup @@ -30,107 +29,114 @@ # word_gen creates random sequences of ASCII letters (both lowercase and upper). # The number of unique strings is ~2,700. def tensor_gen(batch, num_elements): - data = [] - for _ in range(batch): - batch_element = [] - for _ in range(num_elements - 1): - tok = "".join(random.choice(string.ascii_letters) for i in range(2)) - batch_element.append(tok) - batch_element.append("") # Explicitly test the empty string. - data.append(batch_element) - return tf.constant(data) + data = [] + for _ in range(batch): + batch_element = [] + for _ in range(num_elements - 1): + tok = "".join(random.choice(string.ascii_letters) for i in range(2)) + batch_element.append(tok) + batch_element.append("") # Explicitly test the empty string. + data.append(batch_element) + return tf.constant(data) def get_vocab(): - vocab = list( - set([a + b for a in string.ascii_letters for b in string.ascii_letters])) # pylint:disable=g-complex-comprehension - vocab.sort() - return vocab + vocab = list( + set([a + b for a in string.ascii_letters for b in string.ascii_letters]) + ) + vocab.sort() + return vocab # This class uses TestCase for get_temp_dir(). class BenchmarkLookup(tf.test.Benchmark): - """Benchmark the index lookup layer's forward pass.""" - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def run_numpy_implementation(self, data, vocab): - """Test the python implementation.""" - input_t = keras.Input(shape=(), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="OOV", - dtype=tf.string) - out_t = layer(input_t) - model = keras.Model(input_t, out_t) - num_repeats = 5 - starts = [] - ends = [] - _ = model(data) - for _ in range(num_repeats): - starts.append(time.time()) - out = model(data) - ends.append(time.time()) - avg_time = np.mean(np.array(ends) - np.array(starts)) - return avg_time, out - - def bm_adapt_implementation(self, num_elements, batch_size): - """Test the KPL adapt implementation.""" - vocab = get_vocab() - vocab_file = self._write_to_temp_file("vocab", vocab) - vocabulary_initializer = tf.lookup.TextFileInitializer( - filename=vocab_file, - key_dtype=tf.string, - key_index=tf.lookup.TextFileIndex.WHOLE_LINE, - value_dtype=tf.int64, - value_index=tf.lookup.TextFileIndex.LINE_NUMBER, - value_index_offset=2) - input_t = keras.Input(shape=(), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocabulary_initializer, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="OOV", - dtype=tf.string) - out_t = layer(input_t) - model = keras.Model(input_t, out_t) - num_repeats = 5 - starts = [] - ends = [] - data = tensor_gen(batch_size, num_elements) - _ = model(data) - for _ in range(num_repeats): - starts.append(time.time()) - _ = model(data) - ends.append(time.time()) - avg_time = np.mean(np.array(ends) - np.array(starts)) - baseline, _ = self.run_numpy_implementation(data, vocab) - extras = { - "numpy implementation baseline": baseline, - "delta seconds": (baseline - avg_time), - "delta percent": ((baseline - avg_time) / baseline) * 100 - } - name = "index_lookup_forward|%s_elements|batch_%s" % (num_elements, - batch_size) - self.report_benchmark( - iters=num_repeats, wall_time=avg_time, extras=extras, name=name) - - def benchmark_vocab_size_by_batch(self): - for tensor_size in [100, 1000, 10000]: - for batch in [1, 16, 2048]: - self.bm_adapt_implementation(tensor_size, batch) + """Benchmark the index lookup layer's forward pass.""" + + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def run_numpy_implementation(self, data, vocab): + """Test the python implementation.""" + input_t = keras.Input(shape=(), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="OOV", + dtype=tf.string, + ) + out_t = layer(input_t) + model = keras.Model(input_t, out_t) + num_repeats = 5 + starts = [] + ends = [] + _ = model(data) + for _ in range(num_repeats): + starts.append(time.time()) + out = model(data) + ends.append(time.time()) + avg_time = np.mean(np.array(ends) - np.array(starts)) + return avg_time, out + + def bm_adapt_implementation(self, num_elements, batch_size): + """Test the KPL adapt implementation.""" + vocab = get_vocab() + vocab_file = self._write_to_temp_file("vocab", vocab) + vocabulary_initializer = tf.lookup.TextFileInitializer( + filename=vocab_file, + key_dtype=tf.string, + key_index=tf.lookup.TextFileIndex.WHOLE_LINE, + value_dtype=tf.int64, + value_index=tf.lookup.TextFileIndex.LINE_NUMBER, + value_index_offset=2, + ) + input_t = keras.Input(shape=(), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocabulary_initializer, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="OOV", + dtype=tf.string, + ) + out_t = layer(input_t) + model = keras.Model(input_t, out_t) + num_repeats = 5 + starts = [] + ends = [] + data = tensor_gen(batch_size, num_elements) + _ = model(data) + for _ in range(num_repeats): + starts.append(time.time()) + _ = model(data) + ends.append(time.time()) + avg_time = np.mean(np.array(ends) - np.array(starts)) + baseline, _ = self.run_numpy_implementation(data, vocab) + extras = { + "numpy implementation baseline": baseline, + "delta seconds": (baseline - avg_time), + "delta percent": ((baseline - avg_time) / baseline) * 100, + } + name = "index_lookup_forward|%s_elements|batch_%s" % ( + num_elements, + batch_size, + ) + self.report_benchmark( + iters=num_repeats, wall_time=avg_time, extras=extras, name=name + ) + + def benchmark_vocab_size_by_batch(self): + for tensor_size in [100, 1000, 10000]: + for batch in [1, 16, 2048]: + self.bm_adapt_implementation(tensor_size, batch) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py b/keras/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py index 491216f3cff4..6d8c50b1a125 100644 --- a/keras/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py @@ -14,106 +14,109 @@ # ============================================================================== """Benchmark for Keras text vectorization preprocessing layer's adapt method.""" -import tensorflow.compat.v2 as tf - import time import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.layers.preprocessing import normalization def reduce_fn(state, values): - """tf.data.Dataset-friendly implementation of mean and variance.""" - k, n, ex, ex2 = state - # If this is the first iteration, we pick the first value to be 'k', - # which helps with precision - we assume that k is close to an average - # value and calculate mean and variance with respect to that. - k = tf.cond(tf.equal(n, 0), lambda: values[0], lambda: k) - - sum_v = tf.reduce_sum(values, axis=0) - sum_v2 = tf.reduce_sum(tf.square(values), axis=0) - ones = tf.ones_like(values, dtype=tf.int32) - batch_size = tf.reduce_sum(ones, axis=0) - batch_size_f = tf.cast(batch_size, tf.float32) - - ex = 0 + sum_v - tf.multiply(batch_size_f, k) - ex2 = 0 + sum_v2 + tf.multiply( - batch_size_f, (tf.square(k) - - tf.multiply(tf.multiply(2.0, k), sum_v))) - - return (k, n + batch_size, ex, ex2) + """tf.data.Dataset-friendly implementation of mean and variance.""" + k, n, ex, ex2 = state + # If this is the first iteration, we pick the first value to be 'k', + # which helps with precision - we assume that k is close to an average + # value and calculate mean and variance with respect to that. + k = tf.cond(tf.equal(n, 0), lambda: values[0], lambda: k) + + sum_v = tf.reduce_sum(values, axis=0) + sum_v2 = tf.reduce_sum(tf.square(values), axis=0) + ones = tf.ones_like(values, dtype=tf.int32) + batch_size = tf.reduce_sum(ones, axis=0) + batch_size_f = tf.cast(batch_size, tf.float32) + + ex = 0 + sum_v - tf.multiply(batch_size_f, k) + ex2 = ( + 0 + + sum_v2 + + tf.multiply( + batch_size_f, + (tf.square(k) - tf.multiply(tf.multiply(2.0, k), sum_v)), + ) + ) + + return (k, n + batch_size, ex, ex2) class BenchmarkAdapt(tf.test.Benchmark): - """Benchmark adapt.""" - - def run_dataset_implementation(self, num_elements, batch_size): - input_t = keras.Input(shape=(1,)) - layer = normalization.Normalization() - _ = layer(input_t) - - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.range(num_elements) - ds = ds.map( - lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) - ds = ds.batch(batch_size) - - starts.append(time.time()) - # Benchmarked code begins here. - k, n, ex, ex2 = ds.reduce((0.0, 0, 0.0, 0.0), reduce_fn) - mean = k.numpy() + ex.numpy() / n.numpy() - var = (ex2.numpy() - (ex.numpy() * ex.numpy()) / n.numpy()) / ( - n.numpy() - 1) - layer.set_weights([mean, var]) - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) - return avg_time - - def bm_adapt_implementation(self, num_elements, batch_size): - """Test the KPL adapt implementation.""" - input_t = keras.Input(shape=(1,), dtype=tf.float32) - layer = normalization.Normalization() - _ = layer(input_t) - - num_repeats = 5 - starts = [] - ends = [] - for _ in range(num_repeats): - ds = tf.data.Dataset.range(num_elements) - ds = ds.map( - lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) - ds = ds.batch(batch_size) - - starts.append(time.time()) - # Benchmarked code begins here. - layer.adapt(ds) - # Benchmarked code ends here. - ends.append(time.time()) - - avg_time = np.mean(np.array(ends) - np.array(starts)) - name = "normalization_adapt|%s_elements|batch_%s" % (num_elements, - batch_size) - baseline = self.run_dataset_implementation(num_elements, batch_size) - extras = { - "tf.data implementation baseline": baseline, - "delta seconds": (baseline - avg_time), - "delta percent": ((baseline - avg_time) / baseline) * 100 - } - self.report_benchmark( - iters=num_repeats, wall_time=avg_time, extras=extras, name=name) - - def benchmark_vocab_size_by_batch(self): - for vocab_size in [100, 1000, 10000, 100000, 1000000]: - for batch in [1, 16, 2048]: - self.bm_adapt_implementation(vocab_size, batch) + """Benchmark adapt.""" + + def run_dataset_implementation(self, num_elements, batch_size): + input_t = keras.Input(shape=(1,)) + layer = normalization.Normalization() + _ = layer(input_t) + + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.range(num_elements) + ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) + ds = ds.batch(batch_size) + + starts.append(time.time()) + # Benchmarked code begins here. + k, n, ex, ex2 = ds.reduce((0.0, 0, 0.0, 0.0), reduce_fn) + mean = k.numpy() + ex.numpy() / n.numpy() + var = (ex2.numpy() - (ex.numpy() * ex.numpy()) / n.numpy()) / ( + n.numpy() - 1 + ) + layer.set_weights([mean, var]) + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) + return avg_time + + def bm_adapt_implementation(self, num_elements, batch_size): + """Test the KPL adapt implementation.""" + input_t = keras.Input(shape=(1,), dtype=tf.float32) + layer = normalization.Normalization() + _ = layer(input_t) + + num_repeats = 5 + starts = [] + ends = [] + for _ in range(num_repeats): + ds = tf.data.Dataset.range(num_elements) + ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1)) + ds = ds.batch(batch_size) + + starts.append(time.time()) + # Benchmarked code begins here. + layer.adapt(ds) + # Benchmarked code ends here. + ends.append(time.time()) + + avg_time = np.mean(np.array(ends) - np.array(starts)) + name = f"normalization_adapt|{num_elements}_elements|batch_{batch_size}" + baseline = self.run_dataset_implementation(num_elements, batch_size) + extras = { + "tf.data implementation baseline": baseline, + "delta seconds": (baseline - avg_time), + "delta percent": ((baseline - avg_time) / baseline) * 100, + } + self.report_benchmark( + iters=num_repeats, wall_time=avg_time, extras=extras, name=name + ) + + def benchmark_vocab_size_by_batch(self): + for vocab_size in [100, 1000, 10000, 100000, 1000000]: + for batch in [1, 16, 2048]: + self.bm_adapt_implementation(vocab_size, batch) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py b/keras/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py index 40a64d1e4e76..6213761e34dd 100644 --- a/keras/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +++ b/keras/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py @@ -12,13 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for KPL implementation of weighted embedding column with varying-length inputs.""" +"""Benchmark for KPL implementation of weighted embedding column with +varying-length inputs.""" import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.def_function import function as tf_function -from keras.layers.preprocessing.benchmarks import feature_column_benchmark as fc_bm +from keras.layers.preprocessing.benchmarks import ( + feature_column_benchmark as fc_bm, +) + +# isort: off +from tensorflow.python.eager.def_function import ( + function as tf_function, +) NUM_REPEATS = 10 BATCH_SIZES = [32, 256] @@ -26,56 +33,66 @@ ### KPL AND FC IMPLEMENTATION BENCHMARKS ### def embedding_varlen(batch_size, max_length): - """Benchmark a variable-length embedding.""" - # Data and constants. - embedding_size = 32768 - data = fc_bm.create_data( - max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int) - weight = tf.ones_like(data, dtype=tf.float32) - - # Keras implementation - data_input = keras.Input( - shape=(None,), ragged=True, name="data", dtype=tf.int64) - weight_input = keras.Input( - shape=(None,), ragged=True, name="weight", dtype=tf.float32) - embedded_data = keras.layers.Embedding(embedding_size, 256)(data_input) - weighted_embedding = tf.multiply( - embedded_data, tf.expand_dims(weight_input, -1)) - reduced_embedding = tf.reduce_sum(weighted_embedding, axis=1) - model = keras.Model([data_input, weight_input], reduced_embedding) - - # FC implementation - fc = tf.feature_column.embedding_column( - tf.feature_column.weighted_categorical_column( - tf.feature_column.categorical_column_with_identity( - "data", num_buckets=embedding_size - 1), - weight_feature_key="weight"), - dimension=256) - - # Wrap the FC implementation in a tf.function for a fair comparison - @tf_function() - def fc_fn(tensors): - fc.transform_feature(tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) - - # Benchmark runs - keras_data = {"data": data, "weight": weight} - k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) - - fc_data = {"data": data.to_sparse(), "weight": weight.to_sparse()} - fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) - - return k_avg_time, fc_avg_time + """Benchmark a variable-length embedding.""" + # Data and constants. + embedding_size = 32768 + data = fc_bm.create_data( + max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int + ) + weight = tf.ones_like(data, dtype=tf.float32) + + # Keras implementation + data_input = keras.Input( + shape=(None,), ragged=True, name="data", dtype=tf.int64 + ) + weight_input = keras.Input( + shape=(None,), ragged=True, name="weight", dtype=tf.float32 + ) + embedded_data = keras.layers.Embedding(embedding_size, 256)(data_input) + weighted_embedding = tf.multiply( + embedded_data, tf.expand_dims(weight_input, -1) + ) + reduced_embedding = tf.reduce_sum(weighted_embedding, axis=1) + model = keras.Model([data_input, weight_input], reduced_embedding) + + # FC implementation + fc = tf.feature_column.embedding_column( + tf.feature_column.weighted_categorical_column( + tf.feature_column.categorical_column_with_identity( + "data", num_buckets=embedding_size - 1 + ), + weight_feature_key="weight", + ), + dimension=256, + ) + + # Wrap the FC implementation in a tf.function for a fair comparison + @tf_function() + def fc_fn(tensors): + fc.transform_feature( + tf.__internal__.feature_column.FeatureTransformationCache(tensors), + None, + ) + + # Benchmark runs + keras_data = {"data": data, "weight": weight} + k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) + + fc_data = {"data": data.to_sparse(), "weight": weight.to_sparse()} + fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) + + return k_avg_time, fc_avg_time class BenchmarkLayer(fc_bm.LayerBenchmark): - """Benchmark the layer forward pass.""" + """Benchmark the layer forward pass.""" - def benchmark_layer(self): - for batch in BATCH_SIZES: - name = "weighted_embedding|varlen|batch_%s" % batch - k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) - self.report(name, k_time, f_time, NUM_REPEATS) + def benchmark_layer(self): + for batch in BATCH_SIZES: + name = f"weighted_embedding|varlen|batch_{batch}" + k_time, f_time = embedding_varlen(batch_size=batch, max_length=256) + self.report(name, k_time, f_time, NUM_REPEATS) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/category_encoding.py b/keras/layers/preprocessing/category_encoding.py index 8f41de191d95..5b606616f02e 100644 --- a/keras/layers/preprocessing/category_encoding.py +++ b/keras/layers/preprocessing/category_encoding.py @@ -14,15 +14,16 @@ # ============================================================================== """Keras CategoryEncoding preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export @@ -32,184 +33,199 @@ COUNT = utils.COUNT -@keras_export("keras.layers.CategoryEncoding", - "keras.layers.experimental.preprocessing.CategoryEncoding") +@keras_export( + "keras.layers.CategoryEncoding", + "keras.layers.experimental.preprocessing.CategoryEncoding", +) class CategoryEncoding(base_layer.Layer): - """A preprocessing layer which encodes integer features. - - This layer provides options for condensing data into a categorical encoding - when the total number of tokens are known in advance. It accepts integer - values as inputs, and it outputs a dense or sparse representation of those - inputs. For integer inputs where the total number of tokens is not known, use - `tf.keras.layers.IntegerLookup` instead. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Examples: - - **One-hot encoding data** - - >>> layer = tf.keras.layers.CategoryEncoding( - ... num_tokens=4, output_mode="one_hot") - >>> layer([3, 2, 0, 1]) - - - **Multi-hot encoding data** - - >>> layer = tf.keras.layers.CategoryEncoding( - ... num_tokens=4, output_mode="multi_hot") - >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]]) - - - **Using weighted inputs in `"count"` mode** - - >>> layer = tf.keras.layers.CategoryEncoding( - ... num_tokens=4, output_mode="count") - >>> count_weights = np.array([[.1, .2], [.1, .1], [.2, .3], [.4, .2]]) - >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]], count_weights=count_weights) - - - Args: - num_tokens: The total number of tokens the layer should support. All inputs - to the layer must integers in the range `0 <= value < num_tokens`, or an - error will be thrown. - output_mode: Specification for the output of the layer. - Defaults to `"multi_hot"`. Values can be `"one_hot"`, `"multi_hot"` or - `"count"`, configuring the layer as follows: - - `"one_hot"`: Encodes each individual element in the input into an - array of `num_tokens` size, containing a 1 at the element index. If - the last dimension is size 1, will encode on that dimension. If the - last dimension is not size 1, will append a new dimension for the - encoded output. - - `"multi_hot"`: Encodes each sample in the input into a single array - of `num_tokens` size, containing a 1 for each vocabulary term present - in the sample. Treats the last dimension as the sample dimension, if - input shape is `(..., sample_length)`, output shape will be - `(..., num_tokens)`. - - `"count"`: Like `"multi_hot"`, but the int array contains a count of - the number of times the token at that index appeared in the sample. - For all output modes, currently only output up to rank 2 is supported. - sparse: Boolean. If true, returns a `SparseTensor` instead of a dense - `Tensor`. Defaults to `False`. - - Call arguments: - inputs: A 1D or 2D tensor of integer inputs. - count_weights: A tensor in the same shape as `inputs` indicating the - weight for each sample value when summing up in `count` mode. Not used in - `"multi_hot"` or `"one_hot"` modes. - """ - - def __init__(self, - num_tokens=None, - output_mode="multi_hot", - sparse=False, - **kwargs): - # max_tokens is an old name for the num_tokens arg we continue to support - # because of usage. - if "max_tokens" in kwargs: - logging.warning( - "max_tokens is deprecated, please use num_tokens instead.") - num_tokens = kwargs["max_tokens"] - del kwargs["max_tokens"] - - # By default, output floats. This is already default for TF2, but in TF1 - # dtype is inferred from inputs, and would default to int. - if "dtype" not in kwargs: - kwargs["dtype"] = backend.floatx() - - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell("CategoryEncoding").set( - True) - - # Support deprecated names for output_modes. - if output_mode == "binary": - output_mode = MULTI_HOT - # 'output_mode' must be one of (COUNT, ONE_HOT, MULTI_HOT) - layer_utils.validate_string_arg( - output_mode, - allowable_strings=(COUNT, ONE_HOT, MULTI_HOT), - layer_name="CategoryEncoding", - arg_name="output_mode") - - if num_tokens is None: - raise ValueError("num_tokens must be set to use this layer. If the " - "number of tokens is not known beforehand, use the " - "IntegerLookup layer instead.") - if num_tokens < 1: - raise ValueError( - f"`num_tokens` must be >= 1. Received: num_tokens={num_tokens}.") - - self.num_tokens = num_tokens - self.output_mode = output_mode - self.sparse = sparse - - def compute_output_shape(self, input_shape): - if not input_shape: - return tf.TensorShape([self.num_tokens]) - if self.output_mode == ONE_HOT and input_shape[-1] != 1: - return tf.TensorShape(input_shape + [self.num_tokens]) - else: - return tf.TensorShape(input_shape[:-1] + [self.num_tokens]) - - def compute_output_signature(self, input_spec): - output_shape = self.compute_output_shape(input_spec.shape.as_list()) - if self.sparse: - return tf.SparseTensorSpec( - shape=output_shape, dtype=tf.int64) - else: - return tf.TensorSpec(shape=output_shape, dtype=tf.int64) - - def get_config(self): - config = { - "num_tokens": self.num_tokens, - "output_mode": self.output_mode, - "sparse": self.sparse, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def call(self, inputs, count_weights=None): - inputs = utils.ensure_tensor(inputs) - - if count_weights is not None: - if self.output_mode != COUNT: - raise ValueError( - "`count_weights` is not used when `output_mode` is not `'count'`. " - "Received `count_weights={}`.".format(count_weights)) - count_weights = utils.ensure_tensor(count_weights, self.compute_dtype) - - depth = self.num_tokens - if isinstance(inputs, tf.SparseTensor): - max_value = tf.reduce_max(inputs.values) - min_value = tf.reduce_min(inputs.values) - else: - max_value = tf.reduce_max(inputs) - min_value = tf.reduce_min(inputs) - condition = tf.logical_and( - tf.greater(tf.cast(depth, max_value.dtype), max_value), - tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) - assertion = tf.Assert(condition, [ - "Input values must be in the range 0 <= values < num_tokens" - " with num_tokens={}".format(depth) - ]) - with tf.control_dependencies([assertion]): - return utils.encode_categorical_inputs( - inputs, - output_mode=self.output_mode, - depth=depth, - dtype=self.compute_dtype, - sparse=self.sparse, - count_weights=count_weights) + """A preprocessing layer which encodes integer features. + + This layer provides options for condensing data into a categorical encoding + when the total number of tokens are known in advance. It accepts integer + values as inputs, and it outputs a dense or sparse representation of those + inputs. For integer inputs where the total number of tokens is not known, + use `tf.keras.layers.IntegerLookup` instead. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Examples: + + **One-hot encoding data** + + >>> layer = tf.keras.layers.CategoryEncoding( + ... num_tokens=4, output_mode="one_hot") + >>> layer([3, 2, 0, 1]) + + + **Multi-hot encoding data** + + >>> layer = tf.keras.layers.CategoryEncoding( + ... num_tokens=4, output_mode="multi_hot") + >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]]) + + + **Using weighted inputs in `"count"` mode** + + >>> layer = tf.keras.layers.CategoryEncoding( + ... num_tokens=4, output_mode="count") + >>> count_weights = np.array([[.1, .2], [.1, .1], [.2, .3], [.4, .2]]) + >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]], count_weights=count_weights) + + + Args: + num_tokens: The total number of tokens the layer should support. All + inputs to the layer must integers in the range `0 <= value < + num_tokens`, or an error will be thrown. + output_mode: Specification for the output of the layer. + Values can be `"one_hot"`, `"multi_hot"` or + `"count"`, configuring the layer as follows: + - `"one_hot"`: Encodes each individual element in the input into an + array of `num_tokens` size, containing a 1 at the element index. If + the last dimension is size 1, will encode on that dimension. If the + last dimension is not size 1, will append a new dimension for the + encoded output. + - `"multi_hot"`: Encodes each sample in the input into a single array + of `num_tokens` size, containing a 1 for each vocabulary term + present in the sample. Treats the last dimension as the sample + dimension, if input shape is `(..., sample_length)`, output shape + will be `(..., num_tokens)`. + - `"count"`: Like `"multi_hot"`, but the int array contains a count of + the number of times the token at that index appeared in the sample. + For all output modes, currently only output up to rank 2 is supported. + Defaults to `"multi_hot"`. + sparse: Boolean. If true, returns a `SparseTensor` instead of a dense + `Tensor`. Defaults to `False`. + + Call arguments: + inputs: A 1D or 2D tensor of integer inputs. + count_weights: A tensor in the same shape as `inputs` indicating the + weight for each sample value when summing up in `count` mode. Not used + in `"multi_hot"` or `"one_hot"` modes. + """ + + def __init__( + self, num_tokens=None, output_mode="multi_hot", sparse=False, **kwargs + ): + # max_tokens is an old name for the num_tokens arg we continue to + # support because of usage. + if "max_tokens" in kwargs: + logging.warning( + "max_tokens is deprecated, please use num_tokens instead." + ) + num_tokens = kwargs["max_tokens"] + del kwargs["max_tokens"] + + # By default, output floats. This is already default for TF2, but in TF1 + # dtype is inferred from inputs, and would default to int. + if "dtype" not in kwargs: + kwargs["dtype"] = backend.floatx() + + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell( + "CategoryEncoding" + ).set(True) + + # Support deprecated names for output_modes. + if output_mode == "binary": + output_mode = MULTI_HOT + # 'output_mode' must be one of (COUNT, ONE_HOT, MULTI_HOT) + layer_utils.validate_string_arg( + output_mode, + allowable_strings=(COUNT, ONE_HOT, MULTI_HOT), + layer_name="CategoryEncoding", + arg_name="output_mode", + ) + + if num_tokens is None: + raise ValueError( + "num_tokens must be set to use this layer. If the " + "number of tokens is not known beforehand, use the " + "IntegerLookup layer instead." + ) + if num_tokens < 1: + raise ValueError( + f"`num_tokens` must be >= 1. Received: num_tokens={num_tokens}." + ) + + self.num_tokens = num_tokens + self.output_mode = output_mode + self.sparse = sparse + + def compute_output_shape(self, input_shape): + input_shape = list(input_shape) + if not input_shape: + return tf.TensorShape([self.num_tokens]) + if self.output_mode == ONE_HOT and input_shape[-1] != 1: + return tf.TensorShape(input_shape + [self.num_tokens]) + else: + return tf.TensorShape(input_shape[:-1] + [self.num_tokens]) + + def compute_output_signature(self, input_spec): + output_shape = self.compute_output_shape(input_spec.shape.as_list()) + if self.sparse: + return tf.SparseTensorSpec(shape=output_shape, dtype=tf.int64) + else: + return tf.TensorSpec(shape=output_shape, dtype=tf.int64) + + def get_config(self): + config = { + "num_tokens": self.num_tokens, + "output_mode": self.output_mode, + "sparse": self.sparse, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs, count_weights=None): + inputs = utils.ensure_tensor(inputs) + + if count_weights is not None: + if self.output_mode != COUNT: + raise ValueError( + "`count_weights` is not used when `output_mode` is not " + "`'count'`. Received `count_weights={count_weights}`." + ) + count_weights = utils.ensure_tensor( + count_weights, self.compute_dtype + ) + + depth = self.num_tokens + if isinstance(inputs, tf.SparseTensor): + max_value = tf.reduce_max(inputs.values) + min_value = tf.reduce_min(inputs.values) + else: + max_value = tf.reduce_max(inputs) + min_value = tf.reduce_min(inputs) + condition = tf.logical_and( + tf.greater(tf.cast(depth, max_value.dtype), max_value), + tf.greater_equal(min_value, tf.cast(0, min_value.dtype)), + ) + assertion = tf.Assert( + condition, + [ + "Input values must be in the range 0 <= values < num_tokens" + " with num_tokens={}".format(depth) + ], + ) + with tf.control_dependencies([assertion]): + return utils.encode_categorical_inputs( + inputs, + output_mode=self.output_mode, + depth=depth, + dtype=self.compute_dtype, + sparse=self.sparse, + count_weights=count_weights, + ) diff --git a/keras/layers/preprocessing/category_encoding_distribution_test.py b/keras/layers/preprocessing/category_encoding_distribution_test.py index 5f8d5a72b9bf..8be4b5cc5abf 100644 --- a/keras/layers/preprocessing/category_encoding_distribution_test.py +++ b/keras/layers/preprocessing/category_encoding_distribution_test.py @@ -15,6 +15,8 @@ """Distribution tests for keras.layers.preprocessing.category_encoding.""" +import numpy as np +import tensorflow.compat.v2 as tf import keras from keras import backend @@ -23,59 +25,64 @@ from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) def batch_wrapper(dataset, batch_size, strategy, repeat=None): - if repeat: - dataset = dataset.repeat(repeat) - # TPUs currently require fully defined input shapes, drop_remainder ensures - # the input will have fully defined shapes. - if backend.is_tpu_strategy(strategy): - return dataset.batch(batch_size, drop_remainder=True) - else: - return dataset.batch(batch_size) + if repeat: + dataset = dataset.repeat(repeat) + # TPUs currently require fully defined input shapes, drop_remainder ensures + # the input will have fully defined shapes. + if backend.is_tpu_strategy(strategy): + return dataset.batch(batch_size, drop_remainder=True) + else: + return dataset.batch(batch_size) @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_single_worker + - strategy_combinations.parameter_server_strategies_multi_worker, - mode=["eager"])) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_single_worker + + strategy_combinations.parameter_server_strategies_multi_worker, + mode=["eager"], + ) +) class CategoryEncodingDistributionTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_strategy(self, strategy): - if (backend.is_tpu_strategy(strategy) and - not tf_test_utils.is_mlir_bridge_enabled()): - self.skipTest("TPU tests require MLIR bridge") + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_strategy(self, strategy): + if ( + backend.is_tpu_strategy(strategy) + and not tf_test_utils.is_mlir_bridge_enabled() + ): + self.skipTest("TPU tests require MLIR bridge") - input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) - inp_dataset = tf.data.Dataset.from_tensor_slices(input_array) - inp_dataset = batch_wrapper(inp_dataset, 2, strategy) + input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) + inp_dataset = tf.data.Dataset.from_tensor_slices(input_array) + inp_dataset = batch_wrapper(inp_dataset, 2, strategy) - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0, 0], - [1, 1, 0, 1, 0, 0]] - # pyformat: enable - num_tokens = 6 - tf.config.set_soft_device_placement(True) + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0, 0], [1, 1, 0, 1, 0, 0]] + # pyformat: enable + num_tokens = 6 + tf.config.set_soft_device_placement(True) - with strategy.scope(): - input_data = keras.Input(shape=(4,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(inp_dataset) - self.assertAllEqual(expected_output, output_dataset) + with strategy.scope(): + input_data = keras.Input(shape=(4,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(inp_dataset) + self.assertAllEqual(expected_output, output_dataset) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/category_encoding_test.py b/keras/layers/preprocessing/category_encoding_test.py index 70677ea3b1a5..ed02ecc7652f 100644 --- a/keras/layers/preprocessing/category_encoding_test.py +++ b/keras/layers/preprocessing/category_encoding_test.py @@ -15,513 +15,577 @@ """Tests for Keras text category_encoding preprocessing layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras import backend from keras.layers import core from keras.layers.preprocessing import category_encoding from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes(always_skip_v1=True) -class CategoryEncodingInputTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest - ): - - @parameterized.named_parameters( - ("list", list), - ("tuple", tuple), - ("numpy", np.array), - ("array_like", preprocessing_test_utils.ArrayLike), - ) - def test_tensor_like_inputs(self, data_fn): - category_data = data_fn([1, 2, 3, 3, 0]) - weight_data = data_fn([1, 2, 3, 1, 7]) - expected_output = [7, 1, 2, 4, 0, 0] - - layer = category_encoding.CategoryEncoding( - num_tokens=6, output_mode=category_encoding.COUNT) - output_data = layer(category_data, count_weights=weight_data) - self.assertAllEqual(output_data, expected_output) - - def test_dense_input_sparse_output(self): - input_array = tf.constant([[1, 2, 3], [3, 3, 0]]) - - # The expected output should be (X for missing value): - # [[X, 1, 1, 1, X, X] - # [1, X, X, 2, X, X]] - expected_indices = [[0, 1], [0, 2], [0, 3], [1, 0], [1, 3]] - expected_values = [1, 1, 1, 1, 2] - num_tokens = 6 - - input_data = keras.Input(shape=(None,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True) - int_data = layer(input_data) - - model = keras.Model(inputs=input_data, outputs=int_data) - sp_output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual(expected_values, sp_output_dataset.values) - self.assertAllEqual(expected_indices, sp_output_dataset.indices) - - # Assert sparse output is same as dense output. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, - output_mode=category_encoding.COUNT, - sparse=False) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual( - tf.sparse.to_dense(sp_output_dataset, default_value=0), - output_dataset) - - def test_sparse_input(self): - input_array = np.array([[1, 2, 3, 0], [0, 3, 1, 0]], dtype=np.int64) - sparse_tensor_data = tf.sparse.from_dense(input_array) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0, 0], - [0, 1, 0, 1, 0, 0]] - # pyformat: enable - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(sparse_tensor_data, steps=1) - self.assertAllEqual(expected_output, output_dataset) - - def test_sparse_input_with_weights(self): - input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 4]], dtype=np.int64) - weights_array = np.array([[.1, .2, .3, .4], [.2, .1, .4, .3]]) - sparse_tensor_data = tf.sparse.from_dense(input_array) - sparse_weight_data = tf.sparse.from_dense(weights_array) - - # pyformat: disable - expected_output = [[0, .1, .2, .3, .4, 0], - [0, .4, 0, .1, .5, 0]] - # pyformat: enable - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - weight_data = keras.Input(shape=(None,), dtype=tf.float32, sparse=True) - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.COUNT) - int_data = layer(input_data, count_weights=weight_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=[input_data, weight_data], outputs=int_data) - output_dataset = model.predict([sparse_tensor_data, sparse_weight_data], - steps=1) - self.assertAllClose(expected_output, output_dataset) - - def test_sparse_input_sparse_output(self): - sp_inp = tf.SparseTensor( - indices=[[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]], - values=[0, 2, 1, 1, 0], - dense_shape=[4, 2]) - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - - # The expected output should be (X for missing value): - # [[1, X, X, X] - # [X, X, 1, X] - # [X, 2, X, X] - # [1, X, X, X]] - expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]] - expected_values = [1, 1, 2, 1] - num_tokens = 6 - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True) - int_data = layer(input_data) - - model = keras.Model(inputs=input_data, outputs=int_data) - sp_output_dataset = model.predict(sp_inp, steps=1) - self.assertAllEqual(expected_values, sp_output_dataset.values) - self.assertAllEqual(expected_indices, sp_output_dataset.indices) - - # Assert sparse output is same as dense output. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, - output_mode=category_encoding.COUNT, - sparse=False) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(sp_inp, steps=1) - self.assertAllEqual( - tf.sparse.to_dense(sp_output_dataset, default_value=0), - output_dataset) - - def test_sparse_input_sparse_output_with_weights(self): - indices = [[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]] - sp_inp = tf.SparseTensor( - indices=indices, values=[0, 2, 1, 1, 0], dense_shape=[4, 2]) - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - sp_weight = tf.SparseTensor( - indices=indices, values=[.1, .2, .4, .3, .2], dense_shape=[4, 2]) - weight_data = keras.Input(shape=(None,), dtype=tf.float32, sparse=True) - - # The expected output should be (X for missing value): - # [[1, X, X, X] - # [X, X, 1, X] - # [X, 2, X, X] - # [1, X, X, X]] - expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]] - expected_values = [.1, .2, .7, .2] - num_tokens = 6 - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True) - int_data = layer(input_data, count_weights=weight_data) - - model = keras.Model(inputs=[input_data, weight_data], outputs=int_data) - sp_output_dataset = model.predict([sp_inp, sp_weight], steps=1) - self.assertAllClose(expected_values, sp_output_dataset.values) - self.assertAllEqual(expected_indices, sp_output_dataset.indices) - - def test_ragged_input(self): - input_array = tf.ragged.constant([[1, 2, 3], [3, 1]]) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0, 0], - [0, 1, 0, 1, 0, 0]] - # pyformat: enable - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - int_data = layer(input_data) - - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual(expected_output, output_dataset) - - def test_ragged_input_sparse_output(self): - input_array = tf.ragged.constant([[1, 2, 3], [3, 3]]) - - # The expected output should be (X for missing value): - # [[X, 1, 1, 1] - # [X, X, X, 2]] - expected_indices = [[0, 1], [0, 2], [0, 3], [1, 3]] - expected_values = [1, 1, 1, 2] - num_tokens = 6 - - input_data = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True) - int_data = layer(input_data) - - model = keras.Model(inputs=input_data, outputs=int_data) - sp_output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual(expected_values, sp_output_dataset.values) - self.assertAllEqual(expected_indices, sp_output_dataset.indices) - - # Assert sparse output is same as dense output. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, - output_mode=category_encoding.COUNT, - sparse=False) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual( - tf.sparse.to_dense(sp_output_dataset, default_value=0), - output_dataset) - - def test_sparse_output_and_dense_layer(self): - input_array = tf.constant([[1, 2, 3], [3, 3, 0]]) - - num_tokens = 4 - - input_data = keras.Input(shape=(None,), dtype=tf.int32) - encoding_layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True) - int_data = encoding_layer(input_data) - dense_layer = keras.layers.Dense(units=1) - output_data = dense_layer(int_data) - - model = keras.Model(inputs=input_data, outputs=output_data) - _ = model.predict(input_array, steps=1) - - def test_dense_oov_input(self): - valid_array = tf.constant([[0, 1, 2], [0, 1, 2]]) - invalid_array = tf.constant([[0, 1, 2], [2, 3, 1]]) - num_tokens = 3 - expected_output_shape = [None, num_tokens] - encoder_layer = category_encoding.CategoryEncoding(num_tokens) - input_data = keras.Input(shape=(3,), dtype=tf.int32) - int_data = encoder_layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - model = keras.Model(inputs=input_data, outputs=int_data) - # Call predict once on valid input to compile a graph and test control flow. - _ = model.predict(valid_array, steps=1) - with self.assertRaisesRegex( - tf.errors.InvalidArgumentError, - ".*must be in the range 0 <= values < num_tokens.*"): - _ = model.predict(invalid_array, steps=1) - - def test_dense_negative(self): - valid_array = tf.constant([[0, 1, 2], [0, 1, 2]]) - invalid_array = tf.constant([[1, 2, 0], [2, 2, -1]]) - num_tokens = 3 - expected_output_shape = [None, num_tokens] - encoder_layer = category_encoding.CategoryEncoding(num_tokens) - input_data = keras.Input(shape=(3,), dtype=tf.int32) - int_data = encoder_layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - model = keras.Model(inputs=input_data, outputs=int_data) - # Call predict once on valid input to compile a graph and test control flow. - _ = model.predict(valid_array, steps=1) - with self.assertRaisesRegex( - tf.errors.InvalidArgumentError, - ".*must be in the range 0 <= values < num_tokens.*"): - _ = model.predict(invalid_array, steps=1) - - def test_legacy_max_tokens_arg(self): - input_array = np.array([[1, 2, 3, 1]]) - expected_output = [[0, 1, 1, 1, 0, 0]] - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - max_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) +class CategoryEncodingInputTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters( + ("list", list), + ("tuple", tuple), + ("numpy", np.array), + ("array_like", preprocessing_test_utils.ArrayLike), + ) + def test_tensor_like_inputs(self, data_fn): + category_data = data_fn([1, 2, 3, 3, 0]) + weight_data = data_fn([1, 2, 3, 1, 7]) + expected_output = [7, 1, 2, 4, 0, 0] + + layer = category_encoding.CategoryEncoding( + num_tokens=6, output_mode=category_encoding.COUNT + ) + output_data = layer(category_data, count_weights=weight_data) + self.assertAllEqual(output_data, expected_output) + + def test_compute_output_shape(self): + layer = category_encoding.CategoryEncoding(5) + output_shape = layer.compute_output_shape((None, 1)) + self.assertListEqual(output_shape.as_list(), [None, 5]) + output_shape = layer.compute_output_shape([None, 1]) + self.assertListEqual(output_shape.as_list(), [None, 5]) + + def test_dense_input_sparse_output(self): + input_array = tf.constant([[1, 2, 3], [3, 3, 0]]) + + # The expected output should be (X for missing value): + # [[X, 1, 1, 1, X, X] + # [1, X, X, 2, X, X]] + expected_indices = [[0, 1], [0, 2], [0, 3], [1, 0], [1, 3]] + expected_values = [1, 1, 1, 1, 2] + num_tokens = 6 + + input_data = keras.Input(shape=(None,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=True, + ) + int_data = layer(input_data) + + model = keras.Model(inputs=input_data, outputs=int_data) + sp_output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual(expected_values, sp_output_dataset.values) + self.assertAllEqual(expected_indices, sp_output_dataset.indices) + + # Assert sparse output is same as dense output. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=False, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual( + tf.sparse.to_dense(sp_output_dataset, default_value=0), + output_dataset, + ) + + def test_sparse_input(self): + input_array = np.array([[1, 2, 3, 0], [0, 3, 1, 0]], dtype=np.int64) + sparse_tensor_data = tf.sparse.from_dense(input_array) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0, 0], [0, 1, 0, 1, 0, 0]] + # pyformat: enable + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(sparse_tensor_data, steps=1) + self.assertAllEqual(expected_output, output_dataset) + + def test_sparse_input_with_weights(self): + input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 4]], dtype=np.int64) + weights_array = np.array([[0.1, 0.2, 0.3, 0.4], [0.2, 0.1, 0.4, 0.3]]) + sparse_tensor_data = tf.sparse.from_dense(input_array) + sparse_weight_data = tf.sparse.from_dense(weights_array) + + # pyformat: disable + expected_output = [[0, 0.1, 0.2, 0.3, 0.4, 0], [0, 0.4, 0, 0.1, 0.5, 0]] + # pyformat: enable + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + weight_data = keras.Input(shape=(None,), dtype=tf.float32, sparse=True) + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.COUNT + ) + int_data = layer(input_data, count_weights=weight_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=[input_data, weight_data], outputs=int_data) + output_dataset = model.predict( + [sparse_tensor_data, sparse_weight_data], steps=1 + ) + self.assertAllClose(expected_output, output_dataset) + + def test_sparse_input_sparse_output(self): + sp_inp = tf.SparseTensor( + indices=[[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]], + values=[0, 2, 1, 1, 0], + dense_shape=[4, 2], + ) + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + + # The expected output should be (X for missing value): + # [[1, X, X, X] + # [X, X, 1, X] + # [X, 2, X, X] + # [1, X, X, X]] + expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]] + expected_values = [1, 1, 2, 1] + num_tokens = 6 + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=True, + ) + int_data = layer(input_data) + + model = keras.Model(inputs=input_data, outputs=int_data) + sp_output_dataset = model.predict(sp_inp, steps=1) + self.assertAllEqual(expected_values, sp_output_dataset.values) + self.assertAllEqual(expected_indices, sp_output_dataset.indices) + + # Assert sparse output is same as dense output. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=False, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(sp_inp, steps=1) + self.assertAllEqual( + tf.sparse.to_dense(sp_output_dataset, default_value=0), + output_dataset, + ) + + def test_sparse_input_sparse_output_with_weights(self): + indices = [[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]] + sp_inp = tf.SparseTensor( + indices=indices, values=[0, 2, 1, 1, 0], dense_shape=[4, 2] + ) + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + sp_weight = tf.SparseTensor( + indices=indices, + values=[0.1, 0.2, 0.4, 0.3, 0.2], + dense_shape=[4, 2], + ) + weight_data = keras.Input(shape=(None,), dtype=tf.float32, sparse=True) + + # The expected output should be (X for missing value): + # [[1, X, X, X] + # [X, X, 1, X] + # [X, 2, X, X] + # [1, X, X, X]] + expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]] + expected_values = [0.1, 0.2, 0.7, 0.2] + num_tokens = 6 + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=True, + ) + int_data = layer(input_data, count_weights=weight_data) + + model = keras.Model(inputs=[input_data, weight_data], outputs=int_data) + sp_output_dataset = model.predict([sp_inp, sp_weight], steps=1) + self.assertAllClose(expected_values, sp_output_dataset.values) + self.assertAllEqual(expected_indices, sp_output_dataset.indices) + + def test_ragged_input(self): + input_array = tf.ragged.constant([[1, 2, 3], [3, 1]]) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0, 0], [0, 1, 0, 1, 0, 0]] + # pyformat: enable + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + int_data = layer(input_data) + + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual(expected_output, output_dataset) + + def test_ragged_input_sparse_output(self): + input_array = tf.ragged.constant([[1, 2, 3], [3, 3]]) + + # The expected output should be (X for missing value): + # [[X, 1, 1, 1] + # [X, X, X, 2]] + expected_indices = [[0, 1], [0, 2], [0, 3], [1, 3]] + expected_values = [1, 1, 1, 2] + num_tokens = 6 + + input_data = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=True, + ) + int_data = layer(input_data) + + model = keras.Model(inputs=input_data, outputs=int_data) + sp_output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual(expected_values, sp_output_dataset.values) + self.assertAllEqual(expected_indices, sp_output_dataset.indices) + + # Assert sparse output is same as dense output. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=False, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual( + tf.sparse.to_dense(sp_output_dataset, default_value=0), + output_dataset, + ) + + def test_sparse_output_and_dense_layer(self): + input_array = tf.constant([[1, 2, 3], [3, 3, 0]]) + + num_tokens = 4 + + input_data = keras.Input(shape=(None,), dtype=tf.int32) + encoding_layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, + output_mode=category_encoding.COUNT, + sparse=True, + ) + int_data = encoding_layer(input_data) + dense_layer = keras.layers.Dense(units=1) + output_data = dense_layer(int_data) + + model = keras.Model(inputs=input_data, outputs=output_data) + _ = model.predict(input_array, steps=1) + + def test_dense_oov_input(self): + valid_array = tf.constant([[0, 1, 2], [0, 1, 2]]) + invalid_array = tf.constant([[0, 1, 2], [2, 3, 1]]) + num_tokens = 3 + expected_output_shape = [None, num_tokens] + encoder_layer = category_encoding.CategoryEncoding(num_tokens) + input_data = keras.Input(shape=(3,), dtype=tf.int32) + int_data = encoder_layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + model = keras.Model(inputs=input_data, outputs=int_data) + # Call predict once on valid input to compile a graph and test control + # flow. + _ = model.predict(valid_array, steps=1) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, + ".*must be in the range 0 <= values < num_tokens.*", + ): + _ = model.predict(invalid_array, steps=1) + + def test_dense_negative(self): + valid_array = tf.constant([[0, 1, 2], [0, 1, 2]]) + invalid_array = tf.constant([[1, 2, 0], [2, 2, -1]]) + num_tokens = 3 + expected_output_shape = [None, num_tokens] + encoder_layer = category_encoding.CategoryEncoding(num_tokens) + input_data = keras.Input(shape=(3,), dtype=tf.int32) + int_data = encoder_layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + model = keras.Model(inputs=input_data, outputs=int_data) + # Call predict once on valid input to compile a graph and test control + # flow. + _ = model.predict(valid_array, steps=1) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, + ".*must be in the range 0 <= values < num_tokens.*", + ): + _ = model.predict(invalid_array, steps=1) + + def test_legacy_max_tokens_arg(self): + input_array = np.array([[1, 2, 3, 1]]) + expected_output = [[0, 1, 1, 1, 0, 0]] + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + max_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_combinations.run_all_keras_modes -class CategoryEncodingOutputTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest - ): - - @parameterized.named_parameters( - ("float32", tf.float32), - ("float64", tf.float64), - ) - def test_output_dtype(self, dtype): - inputs = keras.Input(shape=(1,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - num_tokens=4, - output_mode=category_encoding.ONE_HOT, - dtype=dtype) - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, dtype) - - def test_one_hot_output(self): - input_data = np.array([[3], [2], [0], [1]]) - expected_output = [ - [0, 0, 0, 1], - [0, 0, 1, 0], - [1, 0, 0, 0], - [0, 1, 0, 0], - ] - num_tokens = 4 - expected_output_shape = [None, num_tokens] - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT) - inputs = keras.Input(shape=(1,), dtype=tf.int32) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_dataset = model(input_data) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - self.assertAllEqual(expected_output, output_dataset) - - def test_one_hot_output_rank_one_input(self): - input_data = np.array([3, 2, 0, 1]) - expected_output = [ - [0, 0, 0, 1], - [0, 0, 1, 0], - [1, 0, 0, 0], - [0, 1, 0, 0], - ] - num_tokens = 4 - expected_output_shape = [None, num_tokens] - - # Test call on layer directly. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT) - output_data = layer(input_data) - self.assertAllEqual(expected_output, output_data) - - # Test call on model. - inputs = keras.Input(shape=(1,), dtype=tf.int32) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model(input_data) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - self.assertAllEqual(expected_output, output_data) - - def test_one_hot_output_rank_zero_input(self): - input_data = np.array(3) - expected_output = [0, 0, 0, 1] - num_tokens = 4 - expected_output_shape = [None, num_tokens] - - # Test call on layer directly. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT) - output_data = layer(input_data) - self.assertAllEqual(expected_output, output_data) - - # Test call on model. - inputs = keras.Input(shape=(1,), dtype=tf.int32) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model(input_data) - - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - self.assertAllEqual(expected_output, output_data) - - def test_one_hot_rank_3_output_fails(self): - layer = category_encoding.CategoryEncoding( - num_tokens=4, output_mode=category_encoding.ONE_HOT) - with self.assertRaisesRegex(ValueError, "maximum supported output rank"): - _ = layer(keras.Input(shape=(4,), dtype=tf.int32)) - with self.assertRaisesRegex(ValueError, "maximum supported output rank"): - _ = layer(np.array([[3, 2, 0, 1], [3, 2, 0, 1]])) - - def test_multi_hot_output(self): - input_data = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) - expected_output = [ - [0, 1, 1, 1, 0, 0], - [1, 1, 0, 1, 0, 0], - ] - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - inputs = keras.Input(shape=(None,), dtype=tf.int32) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model.predict(input_data) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_output_rank_one_input(self): - input_data = np.array([3, 2, 0, 1]) - expected_output = [1, 1, 1, 1, 0, 0] - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - # Test call on layer directly. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - output_data = layer(input_data) - self.assertAllEqual(expected_output, output_data) - - # Test call on model. - inputs = keras.Input(shape=(4,), dtype=tf.int32) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model(input_data) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_output_rank_zero_input(self): - input_data = np.array(3) - expected_output = [0, 0, 0, 1, 0, 0] - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - # Test call on layer directly. - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT) - output_data = layer(input_data) - self.assertAllEqual(expected_output, output_data) - - # Test call on model. - inputs = keras.Input(shape=(4,), dtype=tf.int32) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model(input_data) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_rank_3_output_fails(self): - layer = category_encoding.CategoryEncoding( - num_tokens=4, output_mode=category_encoding.ONE_HOT) - with self.assertRaisesRegex(ValueError, "maximum supported output rank"): - _ = layer(keras.Input(shape=(3, 4,), dtype=tf.int32)) - with self.assertRaisesRegex(ValueError, "maximum supported output rank"): - _ = layer(np.array([[[3, 2, 0, 1], [3, 2, 0, 1]]])) - - def test_count_output(self): - input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) - - # pyformat: disable - expected_output = [[0, 2, 1, 1, 0, 0], - [2, 1, 0, 1, 0, 0]] - # pyformat: enable - num_tokens = 6 - expected_output_shape = [None, num_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - num_tokens=6, output_mode=category_encoding.COUNT) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) +class CategoryEncodingOutputTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters( + ("float32", tf.float32), + ("float64", tf.float64), + ) + def test_output_dtype(self, dtype): + inputs = keras.Input(shape=(1,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + num_tokens=4, output_mode=category_encoding.ONE_HOT, dtype=dtype + ) + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, dtype) + + def test_one_hot_output(self): + input_data = np.array([[3], [2], [0], [1]]) + expected_output = [ + [0, 0, 0, 1], + [0, 0, 1, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + ] + num_tokens = 4 + expected_output_shape = [None, num_tokens] + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT + ) + inputs = keras.Input(shape=(1,), dtype=tf.int32) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_dataset = model(input_data) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + self.assertAllEqual(expected_output, output_dataset) + + def test_one_hot_output_rank_one_input(self): + input_data = np.array([3, 2, 0, 1]) + expected_output = [ + [0, 0, 0, 1], + [0, 0, 1, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + ] + num_tokens = 4 + expected_output_shape = [None, num_tokens] + + # Test call on layer directly. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT + ) + output_data = layer(input_data) + self.assertAllEqual(expected_output, output_data) + + # Test call on model. + inputs = keras.Input(shape=(1,), dtype=tf.int32) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model(input_data) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + self.assertAllEqual(expected_output, output_data) + + def test_one_hot_output_rank_zero_input(self): + input_data = np.array(3) + expected_output = [0, 0, 0, 1] + num_tokens = 4 + expected_output_shape = [None, num_tokens] + + # Test call on layer directly. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT + ) + output_data = layer(input_data) + self.assertAllEqual(expected_output, output_data) + + # Test call on model. + inputs = keras.Input(shape=(1,), dtype=tf.int32) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model(input_data) + + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + self.assertAllEqual(expected_output, output_data) + + def test_one_hot_rank_3_output_fails(self): + layer = category_encoding.CategoryEncoding( + num_tokens=4, output_mode=category_encoding.ONE_HOT + ) + with self.assertRaisesRegex( + ValueError, "maximum supported output rank" + ): + _ = layer(keras.Input(shape=(4,), dtype=tf.int32)) + with self.assertRaisesRegex( + ValueError, "maximum supported output rank" + ): + _ = layer(np.array([[3, 2, 0, 1], [3, 2, 0, 1]])) + + def test_multi_hot_output(self): + input_data = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) + expected_output = [ + [0, 1, 1, 1, 0, 0], + [1, 1, 0, 1, 0, 0], + ] + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + inputs = keras.Input(shape=(None,), dtype=tf.int32) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model.predict(input_data) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_output_rank_one_input(self): + input_data = np.array([3, 2, 0, 1]) + expected_output = [1, 1, 1, 1, 0, 0] + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + # Test call on layer directly. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + output_data = layer(input_data) + self.assertAllEqual(expected_output, output_data) + + # Test call on model. + inputs = keras.Input(shape=(4,), dtype=tf.int32) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model(input_data) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_output_rank_zero_input(self): + input_data = np.array(3) + expected_output = [0, 0, 0, 1, 0, 0] + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + # Test call on layer directly. + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT + ) + output_data = layer(input_data) + self.assertAllEqual(expected_output, output_data) + + # Test call on model. + inputs = keras.Input(shape=(4,), dtype=tf.int32) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model(input_data) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_rank_3_output_fails(self): + layer = category_encoding.CategoryEncoding( + num_tokens=4, output_mode=category_encoding.ONE_HOT + ) + with self.assertRaisesRegex( + ValueError, "maximum supported output rank" + ): + _ = layer( + keras.Input( + shape=( + 3, + 4, + ), + dtype=tf.int32, + ) + ) + with self.assertRaisesRegex( + ValueError, "maximum supported output rank" + ): + _ = layer(np.array([[[3, 2, 0, 1], [3, 2, 0, 1]]])) + + def test_count_output(self): + input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) + + # pyformat: disable + expected_output = [[0, 2, 1, 1, 0, 0], [2, 1, 0, 1, 0, 0]] + # pyformat: enable + num_tokens = 6 + expected_output_shape = [None, num_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + num_tokens=6, output_mode=category_encoding.COUNT + ) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) class CategoryEncodingModelBuildingTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - @parameterized.named_parameters( - { - "testcase_name": "count_output", - "num_tokens": 5, - "output_mode": category_encoding.COUNT - }, { - "testcase_name": "multi_hot_output", - "num_tokens": 5, - "output_mode": category_encoding.MULTI_HOT - }) - def test_end_to_end_bagged_modeling(self, output_mode, num_tokens): - input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) - - input_data = keras.Input(shape=(None,), dtype=tf.int32) - layer = category_encoding.CategoryEncoding( - num_tokens=num_tokens, output_mode=output_mode) - - weights = [] - if num_tokens is None: - layer.set_num_elements(5) - layer.set_weights(weights) - - int_data = layer(input_data) - float_data = backend.cast(int_data, dtype="float32") - output_data = core.Dense(64)(float_data) - model = keras.Model(inputs=input_data, outputs=output_data) - _ = model.predict(input_array) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters( + { + "testcase_name": "count_output", + "num_tokens": 5, + "output_mode": category_encoding.COUNT, + }, + { + "testcase_name": "multi_hot_output", + "num_tokens": 5, + "output_mode": category_encoding.MULTI_HOT, + }, + ) + def test_end_to_end_bagged_modeling(self, output_mode, num_tokens): + input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]]) + + input_data = keras.Input(shape=(None,), dtype=tf.int32) + layer = category_encoding.CategoryEncoding( + num_tokens=num_tokens, output_mode=output_mode + ) + + weights = [] + if num_tokens is None: + layer.set_num_elements(5) + layer.set_weights(weights) + + int_data = layer(input_data) + float_data = backend.cast(int_data, dtype="float32") + output_data = core.Dense(64)(float_data) + model = keras.Model(inputs=input_data, outputs=output_data) + _ = model.predict(input_array) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/discretization.py b/keras/layers/preprocessing/discretization.py index d83c02853a60..72ae53c4e0ac 100644 --- a/keras/layers/preprocessing/discretization.py +++ b/keras/layers/preprocessing/discretization.py @@ -14,16 +14,17 @@ # ============================================================================== """Keras discretization preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import numpy as np +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils from keras.utils import layer_utils from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export @@ -34,360 +35,410 @@ def summarize(values, epsilon): - """Reduce a 1D sequence of values to a summary. - - This algorithm is based on numpy.quantiles but modified to allow for - intermediate steps between multiple data sets. It first finds the target - number of bins as the reciprocal of epsilon and then takes the individual - values spaced at appropriate intervals to arrive at that target. - The final step is to return the corresponding counts between those values - If the target num_bins is larger than the size of values, the whole array is - returned (with weights of 1). - - Args: - values: 1D `np.ndarray` to be summarized. - epsilon: A `'float32'` that determines the approximate desired precision. - - Returns: - A 2D `np.ndarray` that is a summary of the inputs. First column is the - interpolated partition values, the second is the weights (counts). - """ - - values = tf.reshape(values, [-1]) - values = tf.sort(values) - elements = tf.cast(tf.size(values), tf.float32) - num_buckets = 1. / epsilon - increment = tf.cast(elements / num_buckets, tf.int32) - start = increment - step = tf.maximum(increment, 1) - boundaries = values[start::step] - weights = tf.ones_like(boundaries) - weights = weights * tf.cast(step, tf.float32) - return tf.stack([boundaries, weights]) - - -def compress(summary, epsilon): - """Compress a summary to within `epsilon` accuracy. + """Reduce a 1D sequence of values to a summary. + + This algorithm is based on numpy.quantiles but modified to allow for + intermediate steps between multiple data sets. It first finds the target + number of bins as the reciprocal of epsilon and then takes the individual + values spaced at appropriate intervals to arrive at that target. + The final step is to return the corresponding counts between those values + If the target num_bins is larger than the size of values, the whole array is + returned (with weights of 1). + + Args: + values: 1D `np.ndarray` to be summarized. + epsilon: A `'float32'` that determines the approximate desired + precision. + + Returns: + A 2D `np.ndarray` that is a summary of the inputs. First column is the + interpolated partition values, the second is the weights (counts). + """ - The compression step is needed to keep the summary sizes small after merging, - and also used to return the final target boundaries. It finds the new bins - based on interpolating cumulative weight percentages from the large summary. - Taking the difference of the cumulative weights from the previous bin's - cumulative weight will give the new weight for that bin. + values = tf.reshape(values, [-1]) + values = tf.sort(values) + elements = tf.cast(tf.size(values), tf.float32) + num_buckets = 1.0 / epsilon + increment = tf.cast(elements / num_buckets, tf.int32) + start = increment + step = tf.maximum(increment, 1) + boundaries = values[start::step] + weights = tf.ones_like(boundaries) + weights = weights * tf.cast(step, tf.float32) + return tf.stack([boundaries, weights]) - Args: - summary: 2D `np.ndarray` summary to be compressed. - epsilon: A `'float32'` that determines the approxmiate desired precision. - Returns: - A 2D `np.ndarray` that is a compressed summary. First column is the - interpolated partition values, the second is the weights (counts). - """ - # TODO(b/184863356): remove the numpy escape hatch here. - return tf.numpy_function( - lambda s: _compress_summary_numpy(s, epsilon), [summary], tf.float32) +def compress(summary, epsilon): + """Compress a summary to within `epsilon` accuracy. + + The compression step is needed to keep the summary sizes small after + merging, and also used to return the final target boundaries. It finds the + new bins based on interpolating cumulative weight percentages from the large + summary. Taking the difference of the cumulative weights from the previous + bin's cumulative weight will give the new weight for that bin. + + Args: + summary: 2D `np.ndarray` summary to be compressed. + epsilon: A `'float32'` that determines the approxmiate desired + precision. + + Returns: + A 2D `np.ndarray` that is a compressed summary. First column is the + interpolated partition values, the second is the weights (counts). + """ + # TODO(b/184863356): remove the numpy escape hatch here. + return tf.numpy_function( + lambda s: _compress_summary_numpy(s, epsilon), [summary], tf.float32 + ) def _compress_summary_numpy(summary, epsilon): - """Compress a summary with numpy.""" - if summary.shape[1] * epsilon < 1: - return summary - - percents = epsilon + np.arange(0.0, 1.0, epsilon) - cum_weights = summary[1].cumsum() - cum_weight_percents = cum_weights / cum_weights[-1] - new_bins = np.interp(percents, cum_weight_percents, summary[0]) - cum_weights = np.interp(percents, cum_weight_percents, cum_weights) - new_weights = cum_weights - np.concatenate((np.array([0]), cum_weights[:-1])) - summary = np.stack((new_bins, new_weights)) - return summary.astype(np.float32) + """Compress a summary with numpy.""" + if summary.shape[1] * epsilon < 1: + return summary + + percents = epsilon + np.arange(0.0, 1.0, epsilon) + cum_weights = summary[1].cumsum() + cum_weight_percents = cum_weights / cum_weights[-1] + new_bins = np.interp(percents, cum_weight_percents, summary[0]) + cum_weights = np.interp(percents, cum_weight_percents, cum_weights) + new_weights = cum_weights - np.concatenate( + (np.array([0]), cum_weights[:-1]) + ) + summary = np.stack((new_bins, new_weights)) + return summary.astype(np.float32) def merge_summaries(prev_summary, next_summary, epsilon): - """Weighted merge sort of summaries. + """Weighted merge sort of summaries. - Given two summaries of distinct data, this function merges (and compresses) - them to stay within `epsilon` error tolerance. + Given two summaries of distinct data, this function merges (and compresses) + them to stay within `epsilon` error tolerance. - Args: - prev_summary: 2D `np.ndarray` summary to be merged with `next_summary`. - next_summary: 2D `np.ndarray` summary to be merged with `prev_summary`. - epsilon: A float that determines the approxmiate desired precision. + Args: + prev_summary: 2D `np.ndarray` summary to be merged with `next_summary`. + next_summary: 2D `np.ndarray` summary to be merged with `prev_summary`. + epsilon: A float that determines the approxmiate desired precision. - Returns: - A 2-D `np.ndarray` that is a merged summary. First column is the - interpolated partition values, the second is the weights (counts). - """ - merged = tf.concat((prev_summary, next_summary), axis=1) - merged = tf.gather(merged, tf.argsort(merged[0]), axis=1) - return compress(merged, epsilon) + Returns: + A 2-D `np.ndarray` that is a merged summary. First column is the + interpolated partition values, the second is the weights (counts). + """ + merged = tf.concat((prev_summary, next_summary), axis=1) + merged = tf.gather(merged, tf.argsort(merged[0]), axis=1) + return compress(merged, epsilon) def get_bin_boundaries(summary, num_bins): - return compress(summary, 1.0 / num_bins)[0, :-1] + return compress(summary, 1.0 / num_bins)[0, :-1] -@keras_export("keras.layers.Discretization", - "keras.layers.experimental.preprocessing.Discretization") +@keras_export( + "keras.layers.Discretization", + "keras.layers.experimental.preprocessing.Discretization", +) class Discretization(base_preprocessing_layer.PreprocessingLayer): - """A preprocessing layer which buckets continuous features by ranges. - - This layer will place each element of its input data into one of several - contiguous ranges and output an integer index indicating which range each - element was placed in. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - Any `tf.Tensor` or `tf.RaggedTensor` of dimension 2 or higher. - - Output shape: - Same as input shape. - - Arguments: - bin_boundaries: A list of bin boundaries. The leftmost and rightmost bins - will always extend to `-inf` and `inf`, so `bin_boundaries=[0., 1., 2.]` - generates bins `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`, and `[2., +inf)`. If - this option is set, `adapt()` should not be called. - num_bins: The integer number of bins to compute. If this option is set, - `adapt()` should be called to learn the bin boundaries. - epsilon: Error tolerance, typically a small fraction close to zero (e.g. - 0.01). Higher values of epsilon increase the quantile approximation, and - hence result in more unequal buckets, but could improve performance - and resource consumption. - output_mode: Specification for the output of the layer. Defaults to `"int"`. - Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or `"count"` - configuring the layer as follows: - - `"int"`: Return the discritized bin indices directly. - - `"one_hot"`: Encodes each individual element in the input into an - array the same size as `num_bins`, containing a 1 at the input's bin - index. If the last dimension is size 1, will encode on that dimension. - If the last dimension is not size 1, will append a new dimension for - the encoded output. - - `"multi_hot"`: Encodes each sample in the input into a single array - the same size as `num_bins`, containing a 1 for each bin index - index present in the sample. Treats the last dimension as the sample - dimension, if input shape is `(..., sample_length)`, output shape will - be `(..., num_tokens)`. - - `"count"`: As `"multi_hot"`, but the int array contains a count of the - number of times the bin index appeared in the sample. - sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, - and `"count"` output modes. If True, returns a `SparseTensor` instead of - a dense `Tensor`. Defaults to False. - - Examples: - - Bucketize float values based on provided buckets. - >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) - >>> layer = tf.keras.layers.Discretization(bin_boundaries=[0., 1., 2.]) - >>> layer(input) - - - Bucketize float values based on a number of buckets to compute. - >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) - >>> layer = tf.keras.layers.Discretization(num_bins=4, epsilon=0.01) - >>> layer.adapt(input) - >>> layer(input) - - """ - - def __init__(self, - bin_boundaries=None, - num_bins=None, - epsilon=0.01, - output_mode="int", - sparse=False, - **kwargs): - # bins is a deprecated arg for setting bin_boundaries or num_bins that still - # has some usage. - if "bins" in kwargs: - logging.warning( - "bins is deprecated, please use bin_boundaries or num_bins instead.") - if isinstance(kwargs["bins"], int) and num_bins is None: - num_bins = kwargs["bins"] - elif bin_boundaries is None: - bin_boundaries = kwargs["bins"] - del kwargs["bins"] - - # By default, output int64 when output_mode='int' and floats otherwise. - if "dtype" not in kwargs or kwargs["dtype"] is None: - kwargs["dtype"] = tf.int64 if output_mode == INT else backend.floatx() - elif output_mode == "int" and not tf.as_dtype(kwargs["dtype"]).is_integer: - # Compat for when dtype was always floating and ignored by the layer. - kwargs["dtype"] = tf.int64 - - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell("Discretization").set( - True) - - # Check dtype only after base layer parses it; dtype parsing is complex. - if output_mode == INT and not tf.as_dtype(self.compute_dtype).is_integer: - input_dtype = kwargs["dtype"] - raise ValueError("When `output_mode='int'`, `dtype` should be an integer " - f"type. Received: dtype={input_dtype}") - - # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT) - layer_utils.validate_string_arg( - output_mode, - allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT), - layer_name=self.__class__.__name__, - arg_name="output_mode") - - if sparse and output_mode == INT: - raise ValueError(f"`sparse` may only be true if `output_mode` is " - f"`'one_hot'`, `'multi_hot'`, or `'count'`. " - f"Received: sparse={sparse} and " - f"output_mode={output_mode}") - - if num_bins is not None and num_bins < 0: - raise ValueError("`num_bins` must be greater than or equal to 0. " - "You passed `num_bins={}`".format(num_bins)) - if num_bins is not None and bin_boundaries is not None: - raise ValueError("Both `num_bins` and `bin_boundaries` should not be " - "set. You passed `num_bins={}` and " - "`bin_boundaries={}`".format(num_bins, bin_boundaries)) - bin_boundaries = utils.listify_tensors(bin_boundaries) - self.input_bin_boundaries = bin_boundaries - self.bin_boundaries = bin_boundaries if bin_boundaries is not None else [] - self.num_bins = num_bins - self.epsilon = epsilon - self.output_mode = output_mode - self.sparse = sparse - - def build(self, input_shape): - super().build(input_shape) - - if self.input_bin_boundaries is not None: - return - - # Summary contains two equal length vectors of bins at index 0 and weights - # at index 1. - self.summary = self.add_weight( - name="summary", - shape=(2, None), - dtype=tf.float32, - initializer=lambda shape, dtype: [[], []], # pylint: disable=unused-arguments - trainable=False) - - # We override this method solely to generate a docstring. - def adapt(self, data, batch_size=None, steps=None): - """Computes bin boundaries from quantiles in a input dataset. - - Calling `adapt()` on a `Discretization` layer is an alternative to passing - in a `bin_boundaries` argument during construction. A `Discretization` layer - should always be either adapted over a dataset or passed `bin_boundaries`. - - During `adapt()`, the layer will estimate the quantile boundaries of the - input dataset. The number of quantiles can be controlled via the `num_bins` - argument, and the error tolerance for quantile boundaries can be controlled - via the `epsilon` argument. - - In order to make `Discretization` efficient in any distribution context, the - computed boundaries are kept static with respect to any compiled `tf.Graph`s - that call the layer. As a consequence, if the layer is adapted a second - time, any models using the layer should be re-compiled. For more information - see `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. - - `adapt()` is meant only as a single machine utility to compute layer state. - To analyze a dataset that cannot fit on a single machine, see - [Tensorflow Transform](https://www.tensorflow.org/tfx/transform/get_started) - for a multi-machine, map-reduce solution. + """A preprocessing layer which buckets continuous features by ranges. + + This layer will place each element of its input data into one of several + contiguous ranges and output an integer index indicating which range each + element was placed in. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + Any `tf.Tensor` or `tf.RaggedTensor` of dimension 2 or higher. + + Output shape: + Same as input shape. Arguments: - data: The data to train on. It can be passed either as a - `tf.data.Dataset`, or as a numpy array. - batch_size: Integer or `None`. - Number of samples per state update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - steps: Integer or `None`. - Total number of steps (batches of samples) - When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps' is None, the epoch will run until - the input dataset is exhausted. When passing an infinitely - repeating dataset, you must specify the `steps` argument. This - argument is not supported with array inputs. + bin_boundaries: A list of bin boundaries. The leftmost and rightmost bins + will always extend to `-inf` and `inf`, so `bin_boundaries=[0., 1., 2.]` + generates bins `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`, and `[2., +inf)`. + If this option is set, `adapt()` should not be called. + num_bins: The integer number of bins to compute. If this option is set, + `adapt()` should be called to learn the bin boundaries. + epsilon: Error tolerance, typically a small fraction close to zero (e.g. + 0.01). Higher values of epsilon increase the quantile approximation, and + hence result in more unequal buckets, but could improve performance + and resource consumption. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, or + `"count"` configuring the layer as follows: + - `"int"`: Return the discretized bin indices directly. + - `"one_hot"`: Encodes each individual element in the input into an + array the same size as `num_bins`, containing a 1 at the input's bin + index. If the last dimension is size 1, will encode on that + dimension. If the last dimension is not size 1, will append a new + dimension for the encoded output. + - `"multi_hot"`: Encodes each sample in the input into a single array + the same size as `num_bins`, containing a 1 for each bin index + index present in the sample. Treats the last dimension as the sample + dimension, if input shape is `(..., sample_length)`, output shape + will be `(..., num_tokens)`. + - `"count"`: As `"multi_hot"`, but the int array contains a count of + the number of times the bin index appeared in the sample. + Defaults to `"int"`. + sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, + and `"count"` output modes. If True, returns a `SparseTensor` instead of + a dense `Tensor`. Defaults to `False`. + + Examples: + + Bucketize float values based on provided buckets. + >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) + >>> layer = tf.keras.layers.Discretization(bin_boundaries=[0., 1., 2.]) + >>> layer(input) + + + Bucketize float values based on a number of buckets to compute. + >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) + >>> layer = tf.keras.layers.Discretization(num_bins=4, epsilon=0.01) + >>> layer.adapt(input) + >>> layer(input) + """ - super().adapt(data, batch_size=batch_size, steps=steps) - - def update_state(self, data): - if self.input_bin_boundaries is not None: - raise ValueError( - "Cannot adapt a Discretization layer that has been initialized with " - "`bin_boundaries`, use `num_bins` instead. You passed " - "`bin_boundaries={}`.".format(self.input_bin_boundaries)) - - if not self.built: - raise RuntimeError("`build` must be called before `update_state`.") - - data = tf.convert_to_tensor(data) - if data.dtype != tf.float32: - data = tf.cast(data, tf.float32) - summary = summarize(data, self.epsilon) - self.summary.assign(merge_summaries(summary, self.summary, self.epsilon)) - - def finalize_state(self): - if self.input_bin_boundaries is not None or not self.built: - return - - # The bucketize op only support list boundaries. - self.bin_boundaries = utils.listify_tensors( - get_bin_boundaries(self.summary, self.num_bins)) - - def reset_state(self): # pylint: disable=method-hidden - if self.input_bin_boundaries is not None or not self.built: - return - - self.summary.assign([[], []]) - - def get_config(self): - config = super().get_config() - config.update({ - "bin_boundaries": self.input_bin_boundaries, - "num_bins": self.num_bins, - "epsilon": self.epsilon, - "output_mode": self.output_mode, - "sparse": self.sparse, - }) - return config - - def compute_output_shape(self, input_shape): - return input_shape - - def compute_output_signature(self, input_spec): - output_shape = self.compute_output_shape(input_spec.shape.as_list()) - if isinstance(input_spec, tf.SparseTensorSpec): - return tf.SparseTensorSpec( - shape=output_shape, dtype=self.compute_dtype) - return tf.TensorSpec(shape=output_shape, dtype=self.compute_dtype) - - def call(self, inputs): - def bucketize(inputs): - return tf.raw_ops.Bucketize(input=inputs, boundaries=self.bin_boundaries) - - if tf_utils.is_ragged(inputs): - indices = tf.ragged.map_flat_values(bucketize, inputs) - elif tf_utils.is_sparse(inputs): - indices = tf.SparseTensor( - indices=tf.identity(inputs.indices), - values=bucketize(inputs.values), - dense_shape=tf.identity(inputs.dense_shape)) - else: - indices = bucketize(inputs) - - return utils.encode_categorical_inputs( - indices, - output_mode=self.output_mode, - depth=len(self.bin_boundaries) + 1, - sparse=self.sparse, - dtype=self.compute_dtype) + + def __init__( + self, + bin_boundaries=None, + num_bins=None, + epsilon=0.01, + output_mode="int", + sparse=False, + **kwargs, + ): + # bins is a deprecated arg for setting bin_boundaries or num_bins that + # still has some usage. + if "bins" in kwargs: + logging.warning( + "bins is deprecated, " + "please use bin_boundaries or num_bins instead." + ) + if isinstance(kwargs["bins"], int) and num_bins is None: + num_bins = kwargs["bins"] + elif bin_boundaries is None: + bin_boundaries = kwargs["bins"] + del kwargs["bins"] + + # By default, output int64 when output_mode='int' and floats otherwise. + if "dtype" not in kwargs or kwargs["dtype"] is None: + kwargs["dtype"] = ( + tf.int64 if output_mode == INT else backend.floatx() + ) + elif ( + output_mode == "int" and not tf.as_dtype(kwargs["dtype"]).is_integer + ): + # Compat for when dtype was always floating and ignored by the + # layer. + kwargs["dtype"] = tf.int64 + + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("Discretization").set( + True + ) + + # Check dtype only after base layer parses it; dtype parsing is complex. + if ( + output_mode == INT + and not tf.as_dtype(self.compute_dtype).is_integer + ): + input_dtype = kwargs["dtype"] + raise ValueError( + "When `output_mode='int'`, `dtype` should be an integer " + f"type. Received: dtype={input_dtype}" + ) + + # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT) + layer_utils.validate_string_arg( + output_mode, + allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT), + layer_name=self.__class__.__name__, + arg_name="output_mode", + ) + + if sparse and output_mode == INT: + raise ValueError( + "`sparse` may only be true if `output_mode` is " + "`'one_hot'`, `'multi_hot'`, or `'count'`. " + f"Received: sparse={sparse} and " + f"output_mode={output_mode}" + ) + + if num_bins is not None and num_bins < 0: + raise ValueError( + "`num_bins` must be greater than or equal to 0. " + "You passed `num_bins={}`".format(num_bins) + ) + if num_bins is not None and bin_boundaries is not None: + raise ValueError( + "Both `num_bins` and `bin_boundaries` should not be " + "set. You passed `num_bins={}` and " + "`bin_boundaries={}`".format(num_bins, bin_boundaries) + ) + bin_boundaries = utils.listify_tensors(bin_boundaries) + self.input_bin_boundaries = bin_boundaries + self.bin_boundaries = ( + bin_boundaries if bin_boundaries is not None else [] + ) + self.num_bins = num_bins + self.epsilon = epsilon + self.output_mode = output_mode + self.sparse = sparse + + def build(self, input_shape): + super().build(input_shape) + + if self.input_bin_boundaries is not None: + return + + # Summary contains two equal length vectors of bins at index 0 and + # weights at index 1. + self.summary = self.add_weight( + name="summary", + shape=(2, None), + dtype=tf.float32, + initializer=lambda shape, dtype: [ + [], + [], + ], + trainable=False, + ) + + # We override this method solely to generate a docstring. + def adapt(self, data, batch_size=None, steps=None): + """Computes bin boundaries from quantiles in a input dataset. + + Calling `adapt()` on a `Discretization` layer is an alternative to + passing in a `bin_boundaries` argument during construction. A + `Discretization` layer should always be either adapted over a dataset or + passed `bin_boundaries`. + + During `adapt()`, the layer will estimate the quantile boundaries of the + input dataset. The number of quantiles can be controlled via the + `num_bins` argument, and the error tolerance for quantile boundaries can + be controlled via the `epsilon` argument. + + In order to make `Discretization` efficient in any distribution context, + the computed boundaries are kept static with respect to any compiled + `tf.Graph`s that call the layer. As a consequence, if the layer is + adapted a second time, any models using the layer should be re-compiled. + For more information see + `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. + + `adapt()` is meant only as a single machine utility to compute layer + state. To analyze a dataset that cannot fit on a single machine, see + [Tensorflow Transform]( + https://www.tensorflow.org/tfx/transform/get_started) for a + multi-machine, map-reduce solution. + + Arguments: + data: The data to train on. It can be passed either as a + `tf.data.Dataset`, or as a numpy array. + batch_size: Integer or `None`. + Number of samples per state update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.Sequence` instances + (since they generate batches). + steps: Integer or `None`. + Total number of steps (batches of samples) + When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps' is None, the epoch will run until + the input dataset is exhausted. When passing an infinitely + repeating dataset, you must specify the `steps` argument. This + argument is not supported with array inputs. + """ + super().adapt(data, batch_size=batch_size, steps=steps) + + def update_state(self, data): + if self.input_bin_boundaries is not None: + raise ValueError( + "Cannot adapt a Discretization layer that has been initialized " + "with `bin_boundaries`, use `num_bins` instead. You passed " + "`bin_boundaries={}`.".format(self.input_bin_boundaries) + ) + + if not self.built: + raise RuntimeError("`build` must be called before `update_state`.") + + data = tf.convert_to_tensor(data) + if data.dtype != tf.float32: + data = tf.cast(data, tf.float32) + summary = summarize(data, self.epsilon) + self.summary.assign( + merge_summaries(summary, self.summary, self.epsilon) + ) + + def finalize_state(self): + if self.input_bin_boundaries is not None or not self.built: + return + + # The bucketize op only support list boundaries. + self.bin_boundaries = utils.listify_tensors( + get_bin_boundaries(self.summary, self.num_bins) + ) + + def reset_state(self): + if self.input_bin_boundaries is not None or not self.built: + return + + self.summary.assign([[], []]) + + def get_config(self): + config = super().get_config() + config.update( + { + "bin_boundaries": self.input_bin_boundaries, + "num_bins": self.num_bins, + "epsilon": self.epsilon, + "output_mode": self.output_mode, + "sparse": self.sparse, + } + ) + return config + + def compute_output_shape(self, input_shape): + return input_shape + + def compute_output_signature(self, input_spec): + output_shape = self.compute_output_shape(input_spec.shape.as_list()) + if isinstance(input_spec, tf.SparseTensorSpec): + return tf.SparseTensorSpec( + shape=output_shape, dtype=self.compute_dtype + ) + return tf.TensorSpec(shape=output_shape, dtype=self.compute_dtype) + + def call(self, inputs): + def bucketize(inputs): + return tf.raw_ops.Bucketize( + input=inputs, boundaries=self.bin_boundaries + ) + + if tf_utils.is_ragged(inputs): + indices = tf.ragged.map_flat_values(bucketize, inputs) + elif tf_utils.is_sparse(inputs): + indices = tf.SparseTensor( + indices=tf.identity(inputs.indices), + values=bucketize(inputs.values), + dense_shape=tf.identity(inputs.dense_shape), + ) + else: + indices = bucketize(inputs) + + return utils.encode_categorical_inputs( + indices, + output_mode=self.output_mode, + depth=len(self.bin_boundaries) + 1, + sparse=self.sparse, + dtype=self.compute_dtype, + ) diff --git a/keras/layers/preprocessing/discretization_distribution_test.py b/keras/layers/preprocessing/discretization_distribution_test.py index 562d71fb6dac..ff2d962fe71a 100644 --- a/keras/layers/preprocessing/discretization_distribution_test.py +++ b/keras/layers/preprocessing/discretization_distribution_test.py @@ -15,6 +15,8 @@ """Distribution tests for keras.layers.preprocessing.discretization.""" +import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.distribute import strategy_combinations @@ -22,40 +24,43 @@ from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_single_worker + - strategy_combinations.parameter_server_strategies_multi_worker, - mode=["eager"])) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_single_worker + + strategy_combinations.parameter_server_strategies_multi_worker, + mode=["eager"], + ) +) class DiscretizationDistributionTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_strategy(self, strategy): - input_array = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_strategy(self, strategy): + input_array = np.array([[-1.5, 1.0, 3.4, 0.5], [0.0, 3.0, 1.3, 0.0]]) - expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]] - expected_output_shape = [None, 4] + expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]] + expected_output_shape = [None, 4] - tf.config.set_soft_device_placement(True) + tf.config.set_soft_device_placement(True) - with strategy.scope(): - input_data = keras.Input(shape=(4,)) - layer = discretization.Discretization(bin_boundaries=[0., 1., 2.]) - bucket_data = layer(input_data) - self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) + with strategy.scope(): + input_data = keras.Input(shape=(4,)) + layer = discretization.Discretization( + bin_boundaries=[0.0, 1.0, 2.0] + ) + bucket_data = layer(input_data) + self.assertAllEqual( + expected_output_shape, bucket_data.shape.as_list() + ) - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/discretization_test.py b/keras/layers/preprocessing/discretization_test.py index 38dad27dc312..0b4b5e78b1df 100644 --- a/keras/layers/preprocessing/discretization_test.py +++ b/keras/layers/preprocessing/discretization_test.py @@ -16,403 +16,451 @@ import os +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.preprocessing import discretization from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes -class DiscretizationTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_bucketize_with_explicit_buckets_integer(self): - input_array = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) - - expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]] - expected_output_shape = [None, 4] - - input_data = keras.Input(shape=(4,)) - layer = discretization.Discretization(bin_boundaries=[0., 1., 2.]) - bucket_data = layer(input_data) - self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_bucketize_with_explicit_buckets_int_input(self): - input_array = np.array([[-1, 1, 3, 0], [0, 3, 1, 0]], dtype=np.int64) - - expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]] - expected_output_shape = [None, 4] - - input_data = keras.Input(shape=(4,), dtype=tf.int64) - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5]) - bucket_data = layer(input_data) - self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_bucketize_with_explicit_buckets_sparse_float_input(self): - indices = [[0, 1], [0, 2], [1, 1]] - input_array = tf.SparseTensor( - indices=indices, values=[-1.5, 1.0, 3.4], dense_shape=[2, 3]) - expected_output = [0, 2, 3] - input_data = keras.Input(shape=(3,), dtype=tf.float32, sparse=True) - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5]) - bucket_data = layer(input_data) - - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual(indices, output_dataset.indices) - self.assertAllEqual(expected_output, output_dataset.values) - - def test_bucketize_with_explicit_buckets_ragged_float_input(self): - input_array = tf.ragged.constant([[-1.5, 1.0, 3.4, .5], - [0.0, 3.0, 1.3]]) - - expected_output = [[0, 2, 3, 1], [1, 3, 2]] - expected_output_shape = [None, None] - - input_data = keras.Input(shape=(None,), ragged=True) - layer = discretization.Discretization(bin_boundaries=[0., 1., 2.]) - bucket_data = layer(input_data) - self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_bucketize_with_explicit_buckets_ragged_int_input(self): - input_array = tf.ragged.constant([[-1, 1, 3, 0], [0, 3, 1]], - dtype=tf.int64) - - expected_output = [[0, 2, 3, 1], [1, 3, 2]] - expected_output_shape = [None, None] - - input_data = keras.Input(shape=(None,), ragged=True, dtype=tf.int64) - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5]) - bucket_data = layer(input_data) - self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_bucketize_with_explicit_buckets_sparse_int_input(self): - indices = [[0, 1], [0, 2], [1, 1]] - input_array = tf.SparseTensor( - indices=indices, values=[-1, 1, 3], dense_shape=[2, 3]) - expected_output = [0, 2, 3] - input_data = keras.Input(shape=(3,), dtype=tf.int32, sparse=True) - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5]) - bucket_data = layer(input_data) - - model = keras.Model(inputs=input_data, outputs=bucket_data) - output_dataset = model.predict(input_array, steps=1) - self.assertAllEqual(indices, output_dataset.indices) - self.assertAllEqual(expected_output, output_dataset.values) - - def test_one_hot_output(self): - input_data = np.array([-1.5, 1.0, 3.4, 3.5]) - - expected_output = [[1., 0., 0., 0.], - [0., 0., 1., 0.], - [0., 0., 0., 1.], - [0., 0., 0., 1.]] - expected_output_shape = [None, 4] - - inputs = keras.Input(shape=(1,)) - layer = discretization.Discretization(bin_boundaries=[0., 1., 2.], - output_mode="one_hot") - outputs = layer(inputs) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - - model = keras.Model(inputs, outputs) - output_data = model(input_data) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_output(self): - input_data = np.array([-1.5, 1.0, 3.4, 3.5]) - - expected_output = [1., 0., 1., 1.] - expected_output_shape = [None, 4] - - inputs = keras.Input(shape=(4,)) - layer = discretization.Discretization(bin_boundaries=[0., 1., 2.], - output_mode="multi_hot") - outputs = layer(inputs) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - - model = keras.Model(inputs, outputs) - output_data = model(input_data) - self.assertAllEqual(expected_output, output_data) - - def test_count_output(self): - input_data = np.array([-1.5, 1.0, 3.4, 3.5]) - - expected_output = [1., 0., 1., 2.] - expected_output_shape = [None, 4] - - inputs = keras.Input(shape=(4,)) - layer = discretization.Discretization(bin_boundaries=[0., 1., 2.], - output_mode="count") - outputs = layer(inputs) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - - model = keras.Model(inputs, outputs) - output_data = model(input_data) - self.assertAllEqual(expected_output, output_data) - - def test_output_shape(self): - inputs = keras.Input(batch_size=16, shape=(4,), dtype=tf.int64) - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5]) - outputs = layer(inputs) - self.assertAllEqual(outputs.shape.as_list(), [16, 4]) - - @parameterized.named_parameters( - ("int32", tf.int32), - ("int64", tf.int64), - ) - def test_output_dtype(self, dtype): - inputs = keras.Input(batch_size=16, shape=(4,), dtype="float32") - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5], - dtype=dtype) - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, dtype) - - def test_legacy_dtype_compat(self): - inputs = keras.Input(batch_size=16, shape=(4,), dtype="float32") - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5], - dtype="float32") - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, tf.int64) - # In TF1 we sometimes face an explicit dtype=None in the config. - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5], - dtype=None) - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, tf.int64) - - @parameterized.named_parameters( - ("float32", tf.float32), - ("float64", tf.float64), - ) - def test_one_hot_output_dtype(self, dtype): - inputs = keras.Input(batch_size=16, shape=(1,), dtype="float32") - layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5], - output_mode="one_hot", - dtype=dtype) - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, dtype) - - def test_num_bins_negative_fails(self): - with self.assertRaisesRegex(ValueError, "`num_bins` must be.*num_bins=-7"): - _ = discretization.Discretization(num_bins=-7) - - def test_num_bins_and_bins_set_fails(self): - with self.assertRaisesRegex( - ValueError, - r"`num_bins` and `bin_boundaries` should not be set.*5.*\[1, 2\]"): - _ = discretization.Discretization(num_bins=5, bins=[1, 2]) +class DiscretizationTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_bucketize_with_explicit_buckets_integer(self): + input_array = np.array([[-1.5, 1.0, 3.4, 0.5], [0.0, 3.0, 1.3, 0.0]]) + + expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]] + expected_output_shape = [None, 4] + + input_data = keras.Input(shape=(4,)) + layer = discretization.Discretization(bin_boundaries=[0.0, 1.0, 2.0]) + bucket_data = layer(input_data) + self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_bucketize_with_explicit_buckets_int_input(self): + input_array = np.array([[-1, 1, 3, 0], [0, 3, 1, 0]], dtype=np.int64) + + expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]] + expected_output_shape = [None, 4] + + input_data = keras.Input(shape=(4,), dtype=tf.int64) + layer = discretization.Discretization(bin_boundaries=[-0.5, 0.5, 1.5]) + bucket_data = layer(input_data) + self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_bucketize_with_explicit_buckets_sparse_float_input(self): + indices = [[0, 1], [0, 2], [1, 1]] + input_array = tf.SparseTensor( + indices=indices, values=[-1.5, 1.0, 3.4], dense_shape=[2, 3] + ) + expected_output = [0, 2, 3] + input_data = keras.Input(shape=(3,), dtype=tf.float32, sparse=True) + layer = discretization.Discretization(bin_boundaries=[-0.5, 0.5, 1.5]) + bucket_data = layer(input_data) + + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual(indices, output_dataset.indices) + self.assertAllEqual(expected_output, output_dataset.values) + + def test_bucketize_with_explicit_buckets_ragged_float_input(self): + input_array = tf.ragged.constant( + [[-1.5, 1.0, 3.4, 0.5], [0.0, 3.0, 1.3]] + ) + + expected_output = [[0, 2, 3, 1], [1, 3, 2]] + expected_output_shape = [None, None] + + input_data = keras.Input(shape=(None,), ragged=True) + layer = discretization.Discretization(bin_boundaries=[0.0, 1.0, 2.0]) + bucket_data = layer(input_data) + self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_bucketize_with_explicit_buckets_ragged_int_input(self): + input_array = tf.ragged.constant( + [[-1, 1, 3, 0], [0, 3, 1]], dtype=tf.int64 + ) + + expected_output = [[0, 2, 3, 1], [1, 3, 2]] + expected_output_shape = [None, None] + + input_data = keras.Input(shape=(None,), ragged=True, dtype=tf.int64) + layer = discretization.Discretization(bin_boundaries=[-0.5, 0.5, 1.5]) + bucket_data = layer(input_data) + self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list()) + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_bucketize_with_explicit_buckets_sparse_int_input(self): + indices = [[0, 1], [0, 2], [1, 1]] + input_array = tf.SparseTensor( + indices=indices, values=[-1, 1, 3], dense_shape=[2, 3] + ) + expected_output = [0, 2, 3] + input_data = keras.Input(shape=(3,), dtype=tf.int32, sparse=True) + layer = discretization.Discretization(bin_boundaries=[-0.5, 0.5, 1.5]) + bucket_data = layer(input_data) + + model = keras.Model(inputs=input_data, outputs=bucket_data) + output_dataset = model.predict(input_array, steps=1) + self.assertAllEqual(indices, output_dataset.indices) + self.assertAllEqual(expected_output, output_dataset.values) + + def test_one_hot_output(self): + input_data = np.array([-1.5, 1.0, 3.4, 3.5]) + + expected_output = [ + [1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 1.0], + [0.0, 0.0, 0.0, 1.0], + ] + expected_output_shape = [None, 4] + + inputs = keras.Input(shape=(1,)) + layer = discretization.Discretization( + bin_boundaries=[0.0, 1.0, 2.0], output_mode="one_hot" + ) + outputs = layer(inputs) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + + model = keras.Model(inputs, outputs) + output_data = model(input_data) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_output(self): + input_data = np.array([-1.5, 1.0, 3.4, 3.5]) + + expected_output = [1.0, 0.0, 1.0, 1.0] + expected_output_shape = [None, 4] + + inputs = keras.Input(shape=(4,)) + layer = discretization.Discretization( + bin_boundaries=[0.0, 1.0, 2.0], output_mode="multi_hot" + ) + outputs = layer(inputs) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + + model = keras.Model(inputs, outputs) + output_data = model(input_data) + self.assertAllEqual(expected_output, output_data) + + def test_count_output(self): + input_data = np.array([-1.5, 1.0, 3.4, 3.5]) + + expected_output = [1.0, 0.0, 1.0, 2.0] + expected_output_shape = [None, 4] + + inputs = keras.Input(shape=(4,)) + layer = discretization.Discretization( + bin_boundaries=[0.0, 1.0, 2.0], output_mode="count" + ) + outputs = layer(inputs) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + + model = keras.Model(inputs, outputs) + output_data = model(input_data) + self.assertAllEqual(expected_output, output_data) + + def test_output_shape(self): + inputs = keras.Input(batch_size=16, shape=(4,), dtype=tf.int64) + layer = discretization.Discretization(bin_boundaries=[-0.5, 0.5, 1.5]) + outputs = layer(inputs) + self.assertAllEqual(outputs.shape.as_list(), [16, 4]) + + @parameterized.named_parameters( + ("int32", tf.int32), + ("int64", tf.int64), + ) + def test_output_dtype(self, dtype): + inputs = keras.Input(batch_size=16, shape=(4,), dtype="float32") + layer = discretization.Discretization( + bin_boundaries=[-0.5, 0.5, 1.5], dtype=dtype + ) + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, dtype) + + def test_legacy_dtype_compat(self): + inputs = keras.Input(batch_size=16, shape=(4,), dtype="float32") + layer = discretization.Discretization( + bin_boundaries=[-0.5, 0.5, 1.5], dtype="float32" + ) + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, tf.int64) + # In TF1 we sometimes face an explicit dtype=None in the config. + layer = discretization.Discretization( + bin_boundaries=[-0.5, 0.5, 1.5], dtype=None + ) + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, tf.int64) + + @parameterized.named_parameters( + ("float32", tf.float32), + ("float64", tf.float64), + ) + def test_one_hot_output_dtype(self, dtype): + inputs = keras.Input(batch_size=16, shape=(1,), dtype="float32") + layer = discretization.Discretization( + bin_boundaries=[-0.5, 0.5, 1.5], output_mode="one_hot", dtype=dtype + ) + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, dtype) + + def test_num_bins_negative_fails(self): + with self.assertRaisesRegex( + ValueError, "`num_bins` must be.*num_bins=-7" + ): + _ = discretization.Discretization(num_bins=-7) + + def test_num_bins_and_bins_set_fails(self): + with self.assertRaisesRegex( + ValueError, + r"`num_bins` and `bin_boundaries` should not be set.*5.*\[1, 2\]", + ): + _ = discretization.Discretization(num_bins=5, bins=[1, 2]) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class DiscretizationAdaptTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - @parameterized.named_parameters([ - { - "testcase_name": "2d_single_element", - "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]]), - "test_data": np.array([[1.], [2.], [3.]]), - "use_dataset": True, - "expected": np.array([[1], [2], [3]]), - "num_bins": 5, - "epsilon": 0.01 - }, { - "testcase_name": "2d_multi_element", - "adapt_data": np.array([[1., 6.], [2., 7.], [3., 8.], [4., 9.], - [5., 10.]]), - "test_data": np.array([[1., 10.], [2., 6.], [3., 8.]]), - "use_dataset": True, - "expected": np.array([[0, 4], [1, 3], [1, 4]]), - "num_bins": 5, - "epsilon": 0.01 - }, { - "testcase_name": "1d_single_element", - "adapt_data": np.array([3., 2., 1., 5., 4.]), - "test_data": np.array([1., 2., 3.]), - "use_dataset": True, - "expected": np.array([1, 2, 3]), - "num_bins": 5, - "epsilon": 0.01 - }, { - "testcase_name": "300_batch_1d_single_element_1", - "adapt_data": np.arange(300), - "test_data": np.arange(300), - "use_dataset": True, - "expected": - np.concatenate([np.zeros(101), np.ones(99), 2 * np.ones(100)]), - "num_bins": 3, - "epsilon": 0.01 - }, { - "testcase_name": "300_batch_1d_single_element_2", - "adapt_data": np.arange(300) ** 2, - "test_data": np.arange(300) ** 2, - "use_dataset": True, - "expected": - np.concatenate([np.zeros(101), np.ones(99), 2 * np.ones(100)]), - "num_bins": 3, - "epsilon": 0.01 - }, { - "testcase_name": "300_batch_1d_single_element_large_epsilon", - "adapt_data": np.arange(300), - "test_data": np.arange(300), - "use_dataset": True, - "expected": np.concatenate([np.zeros(136), np.ones(164)]), - "num_bins": 2, - "epsilon": 0.1 - }]) - def test_layer_computation(self, adapt_data, test_data, use_dataset, - expected, num_bins=5, epsilon=0.01): - - input_shape = tuple(list(test_data.shape)[1:]) - np.random.shuffle(adapt_data) - if use_dataset: - # Keras APIs expect batched datasets - adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch( - test_data.shape[0] // 2) - test_data = tf.data.Dataset.from_tensor_slices(test_data).batch( - test_data.shape[0] // 2) - - layer = discretization.Discretization(epsilon=epsilon, num_bins=num_bins) - layer.adapt(adapt_data) - - input_data = keras.Input(shape=input_shape) - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - output_data = model.predict(test_data) - self.assertAllClose(expected, output_data) - - def test_multiple_adapts(self): - first_adapt = [[1], [2], [3]] - second_adapt = [[4], [5], [6]] - predict_input = [[2], [2]] - expected_first_output = [[2], [2]] - expected_second_output = [[0], [0]] - - inputs = keras.Input(shape=(1,), dtype=tf.int32) - layer = discretization.Discretization(num_bins=3) - layer.adapt(first_adapt) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - actual_output = model.predict(predict_input) - self.assertAllClose(actual_output, expected_first_output) - - # Re-adapt the layer on new inputs. - layer.adapt(second_adapt) - # Re-compile the model. - model.compile() - # `predict` should now use the new model state. - actual_output = model.predict(predict_input) - self.assertAllClose(actual_output, expected_second_output) - - def test_saved_model_tf(self): - input_data = [[1], [2], [3]] - predict_data = [[0.5], [1.5], [2.5]] - expected_output = [[0], [1], [2]] - - inputs = keras.Input(shape=(1,), dtype=tf.float32) - layer = discretization.Discretization(num_bins=3) - layer.adapt(input_data) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - output_data = model.predict(predict_data) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_saved_model") - tf.saved_model.save(model, output_path) - loaded_model = tf.saved_model.load(output_path) - f = loaded_model.signatures["serving_default"] - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_data = f(tf.constant(predict_data))["discretization"] - self.assertAllClose(new_output_data, expected_output) - - @parameterized.product( - save_format=["tf", "h5"], - adapt=[True, False], - ) - def test_saved_model_keras(self, save_format, adapt): - input_data = [[1], [2], [3]] - predict_data = [[0.5], [1.5], [2.5]] - expected_output = [[0], [1], [2]] - - cls = discretization.Discretization - inputs = keras.Input(shape=(1,), dtype=tf.float32) - if adapt: - layer = cls(num_bins=3) - layer.adapt(input_data) - else: - layer = cls(bin_boundaries=[1.0, 2.0]) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - output_data = model.predict(predict_data) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format=save_format) - loaded_model = keras.models.load_model( - output_path, custom_objects={"Discretization": cls}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_data = loaded_model.predict(predict_data) - self.assertAllClose(new_output_data, expected_output) - - def test_saved_weights_keras(self): - input_data = [[1], [2], [3]] - predict_data = [[0.5], [1.5], [2.5]] - expected_output = [[0], [1], [2]] - - cls = discretization.Discretization - inputs = keras.Input(shape=(1,), dtype=tf.float32) - layer = cls(num_bins=3) - layer.adapt(input_data) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - output_data = model.predict(predict_data) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_weights") - model.save_weights(output_path, save_format="tf") - new_model = keras.Model.from_config( - model.get_config(), custom_objects={"Discretization": cls}) - new_model.load_weights(output_path) - - # Validate correctness of the new model. - new_output_data = new_model.predict(predict_data) - self.assertAllClose(new_output_data, expected_output) +class DiscretizationAdaptTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters( + [ + { + "testcase_name": "2d_single_element", + "adapt_data": np.array([[1.0], [2.0], [3.0], [4.0], [5.0]]), + "test_data": np.array([[1.0], [2.0], [3.0]]), + "use_dataset": True, + "expected": np.array([[1], [2], [3]]), + "num_bins": 5, + "epsilon": 0.01, + }, + { + "testcase_name": "2d_multi_element", + "adapt_data": np.array( + [ + [1.0, 6.0], + [2.0, 7.0], + [3.0, 8.0], + [4.0, 9.0], + [5.0, 10.0], + ] + ), + "test_data": np.array([[1.0, 10.0], [2.0, 6.0], [3.0, 8.0]]), + "use_dataset": True, + "expected": np.array([[0, 4], [1, 3], [1, 4]]), + "num_bins": 5, + "epsilon": 0.01, + }, + { + "testcase_name": "1d_single_element", + "adapt_data": np.array([3.0, 2.0, 1.0, 5.0, 4.0]), + "test_data": np.array([1.0, 2.0, 3.0]), + "use_dataset": True, + "expected": np.array([1, 2, 3]), + "num_bins": 5, + "epsilon": 0.01, + }, + { + "testcase_name": "300_batch_1d_single_element_1", + "adapt_data": np.arange(300), + "test_data": np.arange(300), + "use_dataset": True, + "expected": np.concatenate( + [np.zeros(101), np.ones(99), 2 * np.ones(100)] + ), + "num_bins": 3, + "epsilon": 0.01, + }, + { + "testcase_name": "300_batch_1d_single_element_2", + "adapt_data": np.arange(300) ** 2, + "test_data": np.arange(300) ** 2, + "use_dataset": True, + "expected": np.concatenate( + [np.zeros(101), np.ones(99), 2 * np.ones(100)] + ), + "num_bins": 3, + "epsilon": 0.01, + }, + { + "testcase_name": "300_batch_1d_single_element_large_epsilon", + "adapt_data": np.arange(300), + "test_data": np.arange(300), + "use_dataset": True, + "expected": np.concatenate([np.zeros(136), np.ones(164)]), + "num_bins": 2, + "epsilon": 0.1, + }, + ] + ) + def test_layer_computation( + self, + adapt_data, + test_data, + use_dataset, + expected, + num_bins=5, + epsilon=0.01, + ): + + input_shape = tuple(list(test_data.shape)[1:]) + np.random.shuffle(adapt_data) + if use_dataset: + # Keras APIs expect batched datasets + adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch( + test_data.shape[0] // 2 + ) + test_data = tf.data.Dataset.from_tensor_slices(test_data).batch( + test_data.shape[0] // 2 + ) + + layer = discretization.Discretization( + epsilon=epsilon, num_bins=num_bins + ) + layer.adapt(adapt_data) + + input_data = keras.Input(shape=input_shape) + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() + output_data = model.predict(test_data) + self.assertAllClose(expected, output_data) + + def test_multiple_adapts(self): + first_adapt = [[1], [2], [3]] + second_adapt = [[4], [5], [6]] + predict_input = [[2], [2]] + expected_first_output = [[2], [2]] + expected_second_output = [[0], [0]] + + inputs = keras.Input(shape=(1,), dtype=tf.int32) + layer = discretization.Discretization(num_bins=3) + layer.adapt(first_adapt) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + actual_output = model.predict(predict_input) + self.assertAllClose(actual_output, expected_first_output) + + # Re-adapt the layer on new inputs. + layer.adapt(second_adapt) + # Re-compile the model. + model.compile() + # `predict` should now use the new model state. + actual_output = model.predict(predict_input) + self.assertAllClose(actual_output, expected_second_output) + + def test_saved_model_tf(self): + input_data = [[1], [2], [3]] + predict_data = [[0.5], [1.5], [2.5]] + expected_output = [[0], [1], [2]] + + inputs = keras.Input(shape=(1,), dtype=tf.float32) + layer = discretization.Discretization(num_bins=3) + layer.adapt(input_data) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(predict_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_saved_model") + tf.saved_model.save(model, output_path) + loaded_model = tf.saved_model.load(output_path) + f = loaded_model.signatures["serving_default"] + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = f(tf.constant(predict_data))["discretization"] + self.assertAllClose(new_output_data, expected_output) + + @parameterized.product( + save_format=["tf", "h5"], + adapt=[True, False], + ) + def test_saved_model_keras(self, save_format, adapt): + input_data = [[1], [2], [3]] + predict_data = [[0.5], [1.5], [2.5]] + expected_output = [[0], [1], [2]] + + cls = discretization.Discretization + inputs = keras.Input(shape=(1,), dtype=tf.float32) + if adapt: + layer = cls(num_bins=3) + layer.adapt(input_data) + else: + layer = cls(bin_boundaries=[1.0, 2.0]) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(predict_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format=save_format) + loaded_model = keras.models.load_model( + output_path, custom_objects={"Discretization": cls} + ) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = loaded_model.predict(predict_data) + self.assertAllClose(new_output_data, expected_output) + + def test_saved_weights_keras(self): + input_data = [[1], [2], [3]] + predict_data = [[0.5], [1.5], [2.5]] + expected_output = [[0], [1], [2]] + + cls = discretization.Discretization + inputs = keras.Input(shape=(1,), dtype=tf.float32) + layer = cls(num_bins=3) + layer.adapt(input_data) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(predict_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_weights" + ) + model.save_weights(output_path, save_format="tf") + new_model = keras.Model.from_config( + model.get_config(), custom_objects={"Discretization": cls} + ) + new_model.load_weights(output_path) + + # Validate correctness of the new model. + new_output_data = new_model.predict(predict_data) + self.assertAllClose(new_output_data, expected_output) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/hashed_crossing.py b/keras/layers/preprocessing/hashed_crossing.py index 240281b2f343..02fa326d3999 100644 --- a/keras/layers/preprocessing/hashed_crossing.py +++ b/keras/layers/preprocessing/hashed_crossing.py @@ -14,185 +14,214 @@ # ============================================================================== """Keras hashed crossing preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export INT = utils.INT ONE_HOT = utils.ONE_HOT -@keras_export("keras.layers.experimental.preprocessing.HashedCrossing") +@keras_export( + "keras.layers.HashedCrossing", + "keras.layers.experimental.preprocessing.HashedCrossing", + v1=[], +) class HashedCrossing(base_layer.Layer): - """A preprocessing layer which crosses features using the "hashing trick". - - This layer performs crosses of categorical features using the "hasing trick". - Conceptually, the transformation can be thought of as: - hash(concatenation of features) % `num_bins`. - - This layer currently only performs crosses of scalar inputs and batches of - scalar inputs. Valid input shapes are `(batch_size, 1)`, `(batch_size,)` and - `()`. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - num_bins: Number of hash bins. - output_mode: Specification for the output of the layer. Defaults to `"int"`. - Values can be `"int"`, or `"one_hot"` configuring the layer as follows: - - `"int"`: Return the integer bin indices directly. - - `"one_hot"`: Encodes each individual element in the input into an - array the same size as `num_bins`, containing a 1 at the input's bin - index. - sparse: Boolean. Only applicable to `"one_hot"` mode. If True, returns a - `SparseTensor` instead of a dense `Tensor`. Defaults to False. - **kwargs: Keyword arguments to construct a layer. - - Examples: - - **Crossing two scalar features.** - - >>> layer = tf.keras.layers.experimental.preprocessing.HashedCrossing( - ... num_bins=5) - >>> feat1 = tf.constant(['A', 'B', 'A', 'B', 'A']) - >>> feat2 = tf.constant([101, 101, 101, 102, 102]) - >>> layer((feat1, feat2)) - - - **Crossing and one-hotting two scalar features.** - - >>> layer = tf.keras.layers.experimental.preprocessing.HashedCrossing( - ... num_bins=5, output_mode='one_hot') - >>> feat1 = tf.constant(['A', 'B', 'A', 'B', 'A']) - >>> feat2 = tf.constant([101, 101, 101, 102, 102]) - >>> layer((feat1, feat2)) - - """ - - def __init__(self, - num_bins, - output_mode="int", - sparse=False, - **kwargs): - # By default, output int64 when output_mode="int" and floats otherwise. - if "dtype" not in kwargs or kwargs["dtype"] is None: - kwargs["dtype"] = tf.int64 if output_mode == INT else backend.floatx() - - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell( - "HashedCrossing").set(True) - - # Check dtype only after base layer parses it; dtype parsing is complex. - if output_mode == INT and not tf.as_dtype(self.compute_dtype).is_integer: - input_dtype = kwargs["dtype"] - raise ValueError("When `output_mode='int'`, `dtype` should be an integer " - f"type. Received: dtype={input_dtype}") - - # "output_mode" must be one of (INT, ONE_HOT) - layer_utils.validate_string_arg( - output_mode, - allowable_strings=(INT, ONE_HOT), - layer_name=self.__class__.__name__, - arg_name="output_mode") - - self.num_bins = num_bins - self.output_mode = output_mode - self.sparse = sparse - - def call(self, inputs): - # Convert all inputs to tensors and check shape. This layer only supports - # sclars and batches of scalars for the initial version. - self._check_at_least_two_inputs(inputs) - inputs = [utils.ensure_tensor(x) for x in inputs] - self._check_input_shape_and_type(inputs) - - # Uprank to rank 2 for the cross_hashed op. - rank = inputs[0].shape.rank - if rank < 2: - inputs = [utils.expand_dims(x, -1) for x in inputs] - if rank < 1: - inputs = [utils.expand_dims(x, -1) for x in inputs] - - # Perform the cross and convert to dense - outputs = tf.sparse.cross_hashed(inputs, self.num_bins) - outputs = tf.sparse.to_dense(outputs) - - # Fix output shape and downrank to match input rank. - if rank == 2: - # tf.sparse.cross_hashed output shape will always be None on the last - # dimension. Given our input shape restrictions, we want to force shape 1 - # instead. - outputs = tf.reshape(outputs, [-1, 1]) - elif rank == 1: - outputs = tf.reshape(outputs, [-1]) - elif rank == 0: - outputs = tf.reshape(outputs, []) - - # Encode outputs. - return utils.encode_categorical_inputs( - outputs, - output_mode=self.output_mode, - depth=self.num_bins, - sparse=self.sparse, - dtype=self.compute_dtype) - - def compute_output_shape(self, input_shapes): - self._check_at_least_two_inputs(input_shapes) - return utils.compute_shape_for_encode_categorical(input_shapes[0]) - - def compute_output_signature(self, input_specs): - input_shapes = [x.shape.as_list() for x in input_specs] - output_shape = self.compute_output_shape(input_shapes) - if self.sparse or any( - isinstance(x, tf.SparseTensorSpec) for x in input_specs): - return tf.SparseTensorSpec(shape=output_shape, dtype=self.compute_dtype) - return tf.TensorSpec(shape=output_shape, dtype=self.compute_dtype) - - def get_config(self): - config = super().get_config() - config.update({ - "num_bins": self.num_bins, - "output_mode": self.output_mode, - "sparse": self.sparse, - }) - return config - - def _check_at_least_two_inputs(self, inputs): - if not isinstance(inputs, (list, tuple)): - raise ValueError( - "`HashedCrossing` should be called on a list or tuple of inputs. " - f"Received: inputs={inputs}") - if len(inputs) < 2: - raise ValueError( - "`HashedCrossing` should be called on at least two inputs. " - f"Received: inputs={inputs}") - - def _check_input_shape_and_type(self, inputs): - first_shape = inputs[0].shape.as_list() - rank = len(first_shape) - if rank > 2 or (rank == 2 and first_shape[-1] != 1): - raise ValueError( - "All `HashedCrossing` inputs should have shape `[]`, `[batch_size]` " - f"or `[batch_size, 1]`. Received: inputs={inputs}") - if not all(x.shape.as_list() == first_shape for x in inputs[1:]): - raise ValueError("All `HashedCrossing` inputs should have equal shape. " - f"Received: inputs={inputs}") - if any(isinstance(x, (tf.RaggedTensor, tf.SparseTensor)) for x in inputs): - raise ValueError("All `HashedCrossing` inputs should be dense tensors. " - f"Received: inputs={inputs}") - if not all(x.dtype.is_integer or x.dtype == tf.string for x in inputs): - raise ValueError("All `HashedCrossing` inputs should have an integer or " - f"string dtype. Received: inputs={inputs}") + """A preprocessing layer which crosses features using the "hashing trick". + + This layer performs crosses of categorical features using the "hashing + trick". Conceptually, the transformation can be thought of as: + `hash(concatenate(features)) % num_bins`. + + This layer currently only performs crosses of scalar inputs and batches of + scalar inputs. Valid input shapes are `(batch_size, 1)`, `(batch_size,)` and + `()`. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + num_bins: Number of hash bins. + output_mode: Specification for the output of the layer. Values can be + `"int"`, or `"one_hot"` configuring the layer as follows: + - `"int"`: Return the integer bin indices directly. + - `"one_hot"`: Encodes each individual element in the input into an + array the same size as `num_bins`, containing a 1 at the input's + bin index. Defaults to `"int"`. + sparse: Boolean. Only applicable to `"one_hot"` mode. If `True`, + returns a `SparseTensor` instead of a dense `Tensor`. + Defaults to `False`. + **kwargs: Keyword arguments to construct a layer. + + Examples: + + **Crossing two scalar features.** + + >>> layer = tf.keras.layers.HashedCrossing( + ... num_bins=5) + >>> feat1 = tf.constant(['A', 'B', 'A', 'B', 'A']) + >>> feat2 = tf.constant([101, 101, 101, 102, 102]) + >>> layer((feat1, feat2)) + + + **Crossing and one-hotting two scalar features.** + + >>> layer = tf.keras.layers.HashedCrossing( + ... num_bins=5, output_mode='one_hot') + >>> feat1 = tf.constant(['A', 'B', 'A', 'B', 'A']) + >>> feat2 = tf.constant([101, 101, 101, 102, 102]) + >>> layer((feat1, feat2)) + + """ + + def __init__(self, num_bins, output_mode="int", sparse=False, **kwargs): + # By default, output int64 when output_mode="int" and floats otherwise. + if "dtype" not in kwargs or kwargs["dtype"] is None: + kwargs["dtype"] = ( + tf.int64 if output_mode == INT else backend.floatx() + ) + + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("HashedCrossing").set( + True + ) + + # Check dtype only after base layer parses it; dtype parsing is complex. + if ( + output_mode == INT + and not tf.as_dtype(self.compute_dtype).is_integer + ): + input_dtype = kwargs["dtype"] + raise ValueError( + "When `output_mode='int'`, `dtype` should be an integer " + f"type. Received: dtype={input_dtype}" + ) + + # "output_mode" must be one of (INT, ONE_HOT) + layer_utils.validate_string_arg( + output_mode, + allowable_strings=(INT, ONE_HOT), + layer_name=self.__class__.__name__, + arg_name="output_mode", + ) + + self.num_bins = num_bins + self.output_mode = output_mode + self.sparse = sparse + + def call(self, inputs): + # Convert all inputs to tensors and check shape. This layer only + # supports sclars and batches of scalars for the initial version. + self._check_at_least_two_inputs(inputs) + inputs = [utils.ensure_tensor(x) for x in inputs] + self._check_input_shape_and_type(inputs) + + # Uprank to rank 2 for the cross_hashed op. + rank = inputs[0].shape.rank + if rank < 2: + inputs = [utils.expand_dims(x, -1) for x in inputs] + if rank < 1: + inputs = [utils.expand_dims(x, -1) for x in inputs] + + # Perform the cross and convert to dense + outputs = tf.sparse.cross_hashed(inputs, self.num_bins) + outputs = tf.sparse.to_dense(outputs) + + # Fix output shape and downrank to match input rank. + if rank == 2: + # tf.sparse.cross_hashed output shape will always be None on the + # last dimension. Given our input shape restrictions, we want to + # force shape 1 instead. + outputs = tf.reshape(outputs, [-1, 1]) + elif rank == 1: + outputs = tf.reshape(outputs, [-1]) + elif rank == 0: + outputs = tf.reshape(outputs, []) + + # Encode outputs. + return utils.encode_categorical_inputs( + outputs, + output_mode=self.output_mode, + depth=self.num_bins, + sparse=self.sparse, + dtype=self.compute_dtype, + ) + + def compute_output_shape(self, input_shapes): + self._check_at_least_two_inputs(input_shapes) + return utils.compute_shape_for_encode_categorical(input_shapes[0]) + + def compute_output_signature(self, input_specs): + input_shapes = [x.shape.as_list() for x in input_specs] + output_shape = self.compute_output_shape(input_shapes) + if self.sparse or any( + isinstance(x, tf.SparseTensorSpec) for x in input_specs + ): + return tf.SparseTensorSpec( + shape=output_shape, dtype=self.compute_dtype + ) + return tf.TensorSpec(shape=output_shape, dtype=self.compute_dtype) + + def get_config(self): + config = super().get_config() + config.update( + { + "num_bins": self.num_bins, + "output_mode": self.output_mode, + "sparse": self.sparse, + } + ) + return config + + def _check_at_least_two_inputs(self, inputs): + if not isinstance(inputs, (list, tuple)): + raise ValueError( + "`HashedCrossing` should be called on a list or tuple of " + f"inputs. Received: inputs={inputs}" + ) + if len(inputs) < 2: + raise ValueError( + "`HashedCrossing` should be called on at least two inputs. " + f"Received: inputs={inputs}" + ) + + def _check_input_shape_and_type(self, inputs): + first_shape = inputs[0].shape.as_list() + rank = len(first_shape) + if rank > 2 or (rank == 2 and first_shape[-1] != 1): + raise ValueError( + "All `HashedCrossing` inputs should have shape `[]`, " + "`[batch_size]` or `[batch_size, 1]`. " + f"Received: inputs={inputs}" + ) + if not all(x.shape.as_list() == first_shape for x in inputs[1:]): + raise ValueError( + "All `HashedCrossing` inputs should have equal shape. " + f"Received: inputs={inputs}" + ) + if any( + isinstance(x, (tf.RaggedTensor, tf.SparseTensor)) for x in inputs + ): + raise ValueError( + "All `HashedCrossing` inputs should be dense tensors. " + f"Received: inputs={inputs}" + ) + if not all(x.dtype.is_integer or x.dtype == tf.string for x in inputs): + raise ValueError( + "All `HashedCrossing` inputs should have an integer or " + f"string dtype. Received: inputs={inputs}" + ) diff --git a/keras/layers/preprocessing/hashed_crossing_test.py b/keras/layers/preprocessing/hashed_crossing_test.py index 529673d791a8..6fa5163fb784 100644 --- a/keras/layers/preprocessing/hashed_crossing_test.py +++ b/keras/layers/preprocessing/hashed_crossing_test.py @@ -15,153 +15,192 @@ """Tests for hashed crossing layer.""" import os + +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras from keras.layers.preprocessing import hashed_crossing from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes(always_skip_v1=True) class HashedCrossingTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('python_value', lambda x: x), - ('dense', tf.constant), - ) - def test_cross_scalars(self, data_fn): - layer = hashed_crossing.HashedCrossing(num_bins=10) - feat1 = data_fn('A') - feat2 = data_fn(101) - outputs = layer((feat1, feat2)) - self.assertAllClose(outputs, 1) - self.assertAllEqual(outputs.shape.as_list(), []) - - @parameterized.named_parameters( - ('tuple', tuple), - ('list', list), - ('numpy', np.array), - ('array_like', preprocessing_test_utils.ArrayLike), - ('dense', tf.constant), - ) - def test_cross_batch_of_scalars_1d(self, data_fn): - layer = hashed_crossing.HashedCrossing(num_bins=10) - feat1 = data_fn(['A', 'B', 'A', 'B', 'A']) - feat2 = data_fn([101, 101, 101, 102, 102]) - outputs = layer((feat1, feat2)) - self.assertAllClose(outputs, [1, 4, 1, 6, 3]) - self.assertAllEqual(outputs.shape.as_list(), [5]) - - @parameterized.named_parameters( - ('tuple', tuple), - ('list', list), - ('numpy', np.array), - ('array_like', preprocessing_test_utils.ArrayLike), - ('dense', tf.constant), - ) - def test_cross_batch_of_scalars_2d(self, data_fn): - layer = hashed_crossing.HashedCrossing(num_bins=10) - feat1 = data_fn([['A'], ['B'], ['A'], ['B'], ['A']]) - feat2 = data_fn([[101], [101], [101], [102], [102]]) - outputs = layer((feat1, feat2)) - self.assertAllClose(outputs, [[1], [4], [1], [6], [3]]) - self.assertAllEqual(outputs.shape.as_list(), [5, 1]) - - @parameterized.named_parameters( - ('sparse', True), - ('dense', False), - ) - def test_cross_one_hot_output(self, sparse): - layer = hashed_crossing.HashedCrossing( - num_bins=5, output_mode='one_hot', sparse=sparse) - feat1 = tf.constant([['A'], ['B'], ['A'], ['B'], ['A']]) - feat2 = tf.constant([[101], [101], [101], [102], [102]]) - outputs = layer((feat1, feat2)) - if sparse: - outputs = tf.sparse.to_dense(outputs) - self.assertAllClose(outputs, [ - [0, 1, 0, 0, 0], - [0, 0, 0, 0, 1], - [0, 1, 0, 0, 0], - [0, 1, 0, 0, 0], - [0, 0, 0, 1, 0], - ]) - self.assertAllEqual(outputs.shape.as_list(), [5, 5]) - - def test_cross_output_dtype(self): - layer = hashed_crossing.HashedCrossing(num_bins=2) - self.assertAllEqual(layer(([1], [1])).dtype, tf.int64) - layer = hashed_crossing.HashedCrossing(num_bins=2, dtype=tf.int32) - self.assertAllEqual(layer(([1], [1])).dtype, tf.int32) - layer = hashed_crossing.HashedCrossing(num_bins=2, output_mode='one_hot') - self.assertAllEqual(layer(([1], [1])).dtype, tf.float32) - layer = hashed_crossing.HashedCrossing( - num_bins=2, output_mode='one_hot', dtype=tf.float64) - self.assertAllEqual(layer(([1], [1])).dtype, tf.float64) - - def test_non_list_input_fails(self): - with self.assertRaisesRegex(ValueError, 'should be called on a list'): - hashed_crossing.HashedCrossing(num_bins=10)(tf.constant(1)) - - def test_single_input_fails(self): - with self.assertRaisesRegex(ValueError, 'at least two inputs'): - hashed_crossing.HashedCrossing(num_bins=10)([tf.constant(1)]) - - def test_sparse_input_fails(self): - with self.assertRaisesRegex(ValueError, 'inputs should be dense tensors'): - sparse_in = tf.sparse.from_dense(tf.constant([1])) - hashed_crossing.HashedCrossing(num_bins=10)((sparse_in, sparse_in)) - - def test_float_input_fails(self): - with self.assertRaisesRegex(ValueError, 'should have an integer or string'): - hashed_crossing.HashedCrossing(num_bins=10)( - (tf.constant([1.]), tf.constant([1.]))) - - def test_upsupported_shape_input_fails(self): - with self.assertRaisesRegex(ValueError, 'inputs should have shape'): - hashed_crossing.HashedCrossing(num_bins=10)( - (tf.constant([[[1.]]]), tf.constant([[[1.]]]))) - - def test_from_config(self): - layer = hashed_crossing.HashedCrossing( - num_bins=5, output_mode='one_hot', sparse=True) - cloned_layer = hashed_crossing.HashedCrossing.from_config( - layer.get_config()) - feat1 = tf.constant([['A'], ['B'], ['A'], ['B'], ['A']]) - feat2 = tf.constant([[101], [101], [101], [102], [102]]) - original_outputs = layer((feat1, feat2)) - cloned_outputs = cloned_layer((feat1, feat2)) - self.assertAllEqual( - tf.sparse.to_dense(cloned_outputs), - tf.sparse.to_dense(original_outputs)) - - def test_saved_model_keras(self): - string_in = keras.Input(shape=(1,), dtype=tf.string) - int_in = keras.Input(shape=(1,), dtype=tf.int64) - out = hashed_crossing.HashedCrossing(num_bins=10)((string_in, int_in)) - model = keras.Model(inputs=(string_in, int_in), outputs=out) - - string_data = tf.constant([['A'], ['B'], ['A'], ['B'], ['A']]) - int_data = tf.constant([[101], [101], [101], [102], [102]]) - expected_output = [[1], [4], [1], [6], [3]] - - output_data = model((string_data, int_data)) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), 'saved_model') - model.save(output_path, save_format='tf') - loaded_model = keras.models.load_model( - output_path, - custom_objects={'HashedCrossing': hashed_crossing.HashedCrossing}) - - # Validate correctness of the new model. - new_output_data = loaded_model((string_data, int_data)) - self.assertAllClose(new_output_data, expected_output) - - -if __name__ == '__main__': - tf.test.main() + @parameterized.named_parameters( + ("python_value", lambda x: x), + ("dense", tf.constant), + ) + def test_cross_scalars(self, data_fn): + layer = hashed_crossing.HashedCrossing(num_bins=10) + feat1 = data_fn("A") + feat2 = data_fn(101) + outputs = layer((feat1, feat2)) + self.assertAllClose(outputs, 1) + self.assertAllEqual(outputs.shape.as_list(), []) + + @parameterized.named_parameters( + ("tuple", tuple), + ("list", list), + ("numpy", np.array), + ("array_like", preprocessing_test_utils.ArrayLike), + ("dense", tf.constant), + ) + def test_cross_batch_of_scalars_1d(self, data_fn): + layer = hashed_crossing.HashedCrossing(num_bins=10) + feat1 = data_fn(["A", "B", "A", "B", "A"]) + feat2 = data_fn([101, 101, 101, 102, 102]) + outputs = layer((feat1, feat2)) + self.assertAllClose(outputs, [1, 4, 1, 6, 3]) + self.assertAllEqual(outputs.shape.as_list(), [5]) + + @parameterized.named_parameters( + ("tuple", tuple), + ("list", list), + ("numpy", np.array), + ("array_like", preprocessing_test_utils.ArrayLike), + ("dense", tf.constant), + ) + def test_cross_batch_of_scalars_2d(self, data_fn): + layer = hashed_crossing.HashedCrossing(num_bins=10) + feat1 = data_fn([["A"], ["B"], ["A"], ["B"], ["A"]]) + feat2 = data_fn([[101], [101], [101], [102], [102]]) + outputs = layer((feat1, feat2)) + self.assertAllClose(outputs, [[1], [4], [1], [6], [3]]) + self.assertAllEqual(outputs.shape.as_list(), [5, 1]) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_cross_one_hot_output(self, sparse): + layer = hashed_crossing.HashedCrossing( + num_bins=5, output_mode="one_hot", sparse=sparse + ) + feat1 = tf.constant([["A"], ["B"], ["A"], ["B"], ["A"]]) + feat2 = tf.constant([[101], [101], [101], [102], [102]]) + outputs = layer((feat1, feat2)) + if sparse: + outputs = tf.sparse.to_dense(outputs) + self.assertAllClose( + outputs, + [ + [0, 1, 0, 0, 0], + [0, 0, 0, 0, 1], + [0, 1, 0, 0, 0], + [0, 1, 0, 0, 0], + [0, 0, 0, 1, 0], + ], + ) + self.assertAllEqual(outputs.shape.as_list(), [5, 5]) + + def test_cross_output_dtype(self): + layer = hashed_crossing.HashedCrossing(num_bins=2) + self.assertAllEqual(layer(([1], [1])).dtype, tf.int64) + layer = hashed_crossing.HashedCrossing(num_bins=2, dtype=tf.int32) + self.assertAllEqual(layer(([1], [1])).dtype, tf.int32) + layer = hashed_crossing.HashedCrossing( + num_bins=2, output_mode="one_hot" + ) + self.assertAllEqual(layer(([1], [1])).dtype, tf.float32) + layer = hashed_crossing.HashedCrossing( + num_bins=2, output_mode="one_hot", dtype=tf.float64 + ) + self.assertAllEqual(layer(([1], [1])).dtype, tf.float64) + + def test_non_list_input_fails(self): + with self.assertRaisesRegex(ValueError, "should be called on a list"): + hashed_crossing.HashedCrossing(num_bins=10)(tf.constant(1)) + + def test_single_input_fails(self): + with self.assertRaisesRegex(ValueError, "at least two inputs"): + hashed_crossing.HashedCrossing(num_bins=10)([tf.constant(1)]) + + def test_sparse_input_fails(self): + with self.assertRaisesRegex( + ValueError, "inputs should be dense tensors" + ): + sparse_in = tf.sparse.from_dense(tf.constant([1])) + hashed_crossing.HashedCrossing(num_bins=10)((sparse_in, sparse_in)) + + def test_float_input_fails(self): + with self.assertRaisesRegex( + ValueError, "should have an integer or string" + ): + hashed_crossing.HashedCrossing(num_bins=10)( + (tf.constant([1.0]), tf.constant([1.0])) + ) + + def test_upsupported_shape_input_fails(self): + with self.assertRaisesRegex(ValueError, "inputs should have shape"): + hashed_crossing.HashedCrossing(num_bins=10)( + (tf.constant([[[1.0]]]), tf.constant([[[1.0]]])) + ) + + def test_from_config(self): + layer = hashed_crossing.HashedCrossing( + num_bins=5, output_mode="one_hot", sparse=True + ) + cloned_layer = hashed_crossing.HashedCrossing.from_config( + layer.get_config() + ) + feat1 = tf.constant([["A"], ["B"], ["A"], ["B"], ["A"]]) + feat2 = tf.constant([[101], [101], [101], [102], [102]]) + original_outputs = layer((feat1, feat2)) + cloned_outputs = cloned_layer((feat1, feat2)) + self.assertAllEqual( + tf.sparse.to_dense(cloned_outputs), + tf.sparse.to_dense(original_outputs), + ) + + def test_saving_keras(self): + string_in = keras.Input(shape=(1,), dtype=tf.string) + int_in = keras.Input(shape=(1,), dtype=tf.int64) + out = hashed_crossing.HashedCrossing(num_bins=10)((string_in, int_in)) + model = keras.Model(inputs=(string_in, int_in), outputs=out) + + string_data = tf.constant([["A"], ["B"], ["A"], ["B"], ["A"]]) + int_data = tf.constant([[101], [101], [101], [102], [102]]) + expected_output = [[1], [4], [1], [6], [3]] + + output_data = model((string_data, int_data)) + self.assertAllClose(output_data, expected_output) + + with self.subTest("savedmodel"): + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "saved_model") + model.save(output_path, save_format="tf") + loaded_model = keras.models.load_model( + output_path, + custom_objects={ + "HashedCrossing": hashed_crossing.HashedCrossing + }, + ) + + # Validate correctness of the new model. + new_output_data = loaded_model((string_data, int_data)) + self.assertAllClose(new_output_data, expected_output) + + with self.subTest("keras_v3"): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "model.keras") + model.save(output_path, save_format="keras_v3") + loaded_model = keras.models.load_model( + output_path, + custom_objects={ + "HashedCrossing": hashed_crossing.HashedCrossing + }, + ) + + # Validate correctness of the new model. + new_output_data = loaded_model((string_data, int_data)) + self.assertAllClose(new_output_data, expected_output) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/hashing.py b/keras/layers/preprocessing/hashing.py index 1dd13d585a69..77adfee68d0e 100644 --- a/keras/layers/preprocessing/hashing.py +++ b/keras/layers/preprocessing/hashing.py @@ -14,15 +14,16 @@ # ============================================================================== """Keras hashing preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils from keras.utils import layer_utils -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export INT = utils.INT @@ -31,238 +32,267 @@ COUNT = utils.COUNT -@keras_export('keras.layers.Hashing', - 'keras.layers.experimental.preprocessing.Hashing') +@keras_export( + "keras.layers.Hashing", "keras.layers.experimental.preprocessing.Hashing" +) class Hashing(base_layer.Layer): - """A preprocessing layer which hashes and bins categorical features. - - This layer transforms categorical inputs to hashed output. It element-wise - converts a ints or strings to ints in a fixed range. The stable hash - function uses `tensorflow::ops::Fingerprint` to produce the same output - consistently across all platforms. - - This layer uses [FarmHash64](https://github.com/google/farmhash) by default, - which provides a consistent hashed output across different platforms and is - stable across invocations, regardless of device and context, by mixing the - input bits thoroughly. - - If you want to obfuscate the hashed output, you can also pass a random `salt` - argument in the constructor. In that case, the layer will use the - [SipHash64](https://github.com/google/highwayhash) hash function, with - the `salt` value serving as additional input to the hash function. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - **Example (FarmHash64)** - - >>> layer = tf.keras.layers.Hashing(num_bins=3) - >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] - >>> layer(inp) - - - **Example (FarmHash64) with a mask value** - - >>> layer = tf.keras.layers.Hashing(num_bins=3, mask_value='') - >>> inp = [['A'], ['B'], [''], ['C'], ['D']] - >>> layer(inp) - - - **Example (SipHash64)** - - >>> layer = tf.keras.layers.Hashing(num_bins=3, salt=[133, 137]) - >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] - >>> layer(inp) - - - **Example (Siphash64 with a single integer, same as `salt=[133, 133]`)** - - >>> layer = tf.keras.layers.Hashing(num_bins=3, salt=133) - >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] - >>> layer(inp) - - - Args: - num_bins: Number of hash bins. Note that this includes the `mask_value` bin, - so the effective number of bins is `(num_bins - 1)` if `mask_value` is - set. - mask_value: A value that represents masked inputs, which are mapped to - index 0. Defaults to None, meaning no mask term will be added and the - hashing will start at index 0. - salt: A single unsigned integer or None. - If passed, the hash function used will be SipHash64, with these values - used as an additional input (known as a "salt" in cryptography). - These should be non-zero. Defaults to `None` (in that - case, the FarmHash64 hash function is used). It also supports - tuple/list of 2 unsigned integer numbers, see reference paper for details. - output_mode: Specification for the output of the layer. Defaults to `"int"`. - Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or `"count"` - configuring the layer as follows: - - `"int"`: Return the integer bin indices directly. - - `"one_hot"`: Encodes each individual element in the input into an - array the same size as `num_bins`, containing a 1 at the input's bin - index. If the last dimension is size 1, will encode on that dimension. - If the last dimension is not size 1, will append a new dimension for - the encoded output. - - `"multi_hot"`: Encodes each sample in the input into a single array - the same size as `num_bins`, containing a 1 for each bin index - index present in the sample. Treats the last dimension as the sample - dimension, if input shape is `(..., sample_length)`, output shape will - be `(..., num_tokens)`. - - `"count"`: As `"multi_hot"`, but the int array contains a count of the - number of times the bin index appeared in the sample. - sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, - and `"count"` output modes. If True, returns a `SparseTensor` instead of - a dense `Tensor`. Defaults to False. - **kwargs: Keyword arguments to construct a layer. - - Input shape: - A single or list of string, int32 or int64 `Tensor`, - `SparseTensor` or `RaggedTensor` of shape `(batch_size, ...,)` - - Output shape: - An int64 `Tensor`, `SparseTensor` or `RaggedTensor` of shape - `(batch_size, ...)`. If any input is `RaggedTensor` then output is - `RaggedTensor`, otherwise if any input is `SparseTensor` then output is - `SparseTensor`, otherwise the output is `Tensor`. - - Reference: - - [SipHash with salt](https://www.131002.net/siphash/siphash.pdf) - - """ - - def __init__(self, - num_bins, - mask_value=None, - salt=None, - output_mode='int', - sparse=False, - **kwargs): - if num_bins is None or num_bins <= 0: - raise ValueError( - f'The `num_bins` for `Hashing` cannot be `None` or non-positive ' - f'values. Received: num_bins={num_bins}.') - - # By default, output int64 when output_mode='int' and floats otherwise. - if 'dtype' not in kwargs or kwargs['dtype'] is None: - kwargs['dtype'] = tf.int64 if output_mode == INT else backend.floatx() - elif output_mode == 'int' and not tf.as_dtype(kwargs['dtype']).is_integer: - # Compat for when dtype was always floating and ignored by the layer. - kwargs['dtype'] = tf.int64 - - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell('Hashing').set(True) - - # Check dtype only after base layer parses it; dtype parsing is complex. - if output_mode == INT and not tf.as_dtype(self.compute_dtype).is_integer: - input_dtype = kwargs['dtype'] - raise ValueError('When `output_mode="int"`, `dtype` should be an integer ' - f'type. Received: dtype={input_dtype}') - - # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT) - layer_utils.validate_string_arg( - output_mode, - allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT), - layer_name=self.__class__.__name__, - arg_name='output_mode') - - if sparse and output_mode == INT: - raise ValueError(f'`sparse` may only be true if `output_mode` is ' - f'`"one_hot"`, `"multi_hot"`, or `"count"`. ' - f'Received: sparse={sparse} and ' - f'output_mode={output_mode}') - - self.num_bins = num_bins - self.mask_value = mask_value - self.strong_hash = True if salt is not None else False - self.output_mode = output_mode - self.sparse = sparse - self.salt = None - if salt is not None: - if isinstance(salt, (tuple, list)) and len(salt) == 2: - self.salt = salt - elif isinstance(salt, int): - self.salt = [salt, salt] - else: - raise ValueError( - f'The `salt` argument for `Hashing` can only be a tuple of size 2 ' - f'integers, or a single integer. Received: salt={salt}.') - - def call(self, inputs): - inputs = utils.ensure_tensor(inputs) - if isinstance(inputs, tf.SparseTensor): - indices = tf.SparseTensor( - indices=inputs.indices, - values=self._hash_values_to_bins(inputs.values), - dense_shape=inputs.dense_shape) - else: - indices = self._hash_values_to_bins(inputs) - return utils.encode_categorical_inputs( - indices, - output_mode=self.output_mode, - depth=self.num_bins, - sparse=self.sparse, - dtype=self.compute_dtype) - - def _hash_values_to_bins(self, values): - """Converts a non-sparse tensor of values to bin indices.""" - hash_bins = self.num_bins - mask = None - # If mask_value is set, the zeroth bin is reserved for it. - if self.mask_value is not None and hash_bins > 1: - hash_bins -= 1 - mask = tf.equal(values, self.mask_value) - # Convert all values to strings before hashing. - if values.dtype.is_integer: - values = tf.as_string(values) - # Hash the strings. - if self.strong_hash: - values = tf.strings.to_hash_bucket_strong( - values, hash_bins, name='hash', key=self.salt) - else: - values = tf.strings.to_hash_bucket_fast(values, hash_bins, name='hash') - if mask is not None: - values = tf.add(values, tf.ones_like(values)) - values = tf.where(mask, tf.zeros_like(values), values) - return values - - def compute_output_shape(self, input_shape): - return input_shape - - def compute_output_signature(self, input_spec): - output_shape = self.compute_output_shape(input_spec.shape) - if isinstance(input_spec, tf.SparseTensorSpec): - return tf.SparseTensorSpec(shape=output_shape, dtype=self.compute_dtype) - else: - return tf.TensorSpec(shape=output_shape, dtype=self.compute_dtype) - - def get_config(self): - config = super().get_config() - config.update({ - 'num_bins': self.num_bins, - 'salt': self.salt, - 'mask_value': self.mask_value, - 'output_mode': self.output_mode, - 'sparse': self.sparse, - }) - return config + """A preprocessing layer which hashes and bins categorical features. + + This layer transforms categorical inputs to hashed output. It element-wise + converts a ints or strings to ints in a fixed range. The stable hash + function uses `tensorflow::ops::Fingerprint` to produce the same output + consistently across all platforms. + + This layer uses [FarmHash64](https://github.com/google/farmhash) by default, + which provides a consistent hashed output across different platforms and is + stable across invocations, regardless of device and context, by mixing the + input bits thoroughly. + + If you want to obfuscate the hashed output, you can also pass a random + `salt` argument in the constructor. In that case, the layer will use the + [SipHash64](https://github.com/google/highwayhash) hash function, with + the `salt` value serving as additional input to the hash function. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + **Example (FarmHash64)** + + >>> layer = tf.keras.layers.Hashing(num_bins=3) + >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] + >>> layer(inp) + + + **Example (FarmHash64) with a mask value** + + >>> layer = tf.keras.layers.Hashing(num_bins=3, mask_value='') + >>> inp = [['A'], ['B'], [''], ['C'], ['D']] + >>> layer(inp) + + + **Example (SipHash64)** + + >>> layer = tf.keras.layers.Hashing(num_bins=3, salt=[133, 137]) + >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] + >>> layer(inp) + + + **Example (Siphash64 with a single integer, same as `salt=[133, 133]`)** + + >>> layer = tf.keras.layers.Hashing(num_bins=3, salt=133) + >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] + >>> layer(inp) + + + Args: + num_bins: Number of hash bins. Note that this includes the `mask_value` + bin, so the effective number of bins is `(num_bins - 1)` if `mask_value` + is set. + mask_value: A value that represents masked inputs, which are mapped to + index 0. `None` means no mask term will be added and the + hashing will start at index 0. Defaults to `None`. + salt: A single unsigned integer or None. + If passed, the hash function used will be SipHash64, with these values + used as an additional input (known as a "salt" in cryptography). + These should be non-zero. If `None`, uses the FarmHash64 hash function. + It also supports tuple/list of 2 unsigned integer numbers, see + reference paper for details. Defaults to `None`. + output_mode: Specification for the output of the layer. Values can bes + `"int"`, `"one_hot"`, `"multi_hot"`, or + `"count"` configuring the layer as follows: + - `"int"`: Return the integer bin indices directly. + - `"one_hot"`: Encodes each individual element in the input into an + array the same size as `num_bins`, containing a 1 at the input's bin + index. If the last dimension is size 1, will encode on that + dimension. If the last dimension is not size 1, will append a new + dimension for the encoded output. + - `"multi_hot"`: Encodes each sample in the input into a single array + the same size as `num_bins`, containing a 1 for each bin index + index present in the sample. Treats the last dimension as the sample + dimension, if input shape is `(..., sample_length)`, output shape + will be `(..., num_tokens)`. + - `"count"`: As `"multi_hot"`, but the int array contains a count of + the number of times the bin index appeared in the sample. + Defaults to `"int"`. + sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, + and `"count"` output modes. If True, returns a `SparseTensor` instead of + a dense `Tensor`. Defaults to `False`. + **kwargs: Keyword arguments to construct a layer. + + Input shape: + A single or list of string, int32 or int64 `Tensor`, + `SparseTensor` or `RaggedTensor` of shape `(batch_size, ...,)` + + Output shape: + An int64 `Tensor`, `SparseTensor` or `RaggedTensor` of shape + `(batch_size, ...)`. If any input is `RaggedTensor` then output is + `RaggedTensor`, otherwise if any input is `SparseTensor` then output is + `SparseTensor`, otherwise the output is `Tensor`. + + Reference: + - [SipHash with salt](https://www.131002.net/siphash/siphash.pdf) + + """ + + def __init__( + self, + num_bins, + mask_value=None, + salt=None, + output_mode="int", + sparse=False, + **kwargs, + ): + if num_bins is None or num_bins <= 0: + raise ValueError( + "The `num_bins` for `Hashing` cannot be `None` or " + f"non-positive values. Received: num_bins={num_bins}." + ) + + # By default, output int64 when output_mode='int' and floats otherwise. + if "dtype" not in kwargs or kwargs["dtype"] is None: + kwargs["dtype"] = ( + tf.int64 if output_mode == INT else backend.floatx() + ) + elif ( + output_mode == "int" and not tf.as_dtype(kwargs["dtype"]).is_integer + ): + # Compat for when dtype was always floating and ignored by the + # layer. + kwargs["dtype"] = tf.int64 + + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("Hashing").set(True) + + # Check dtype only after base layer parses it; dtype parsing is complex. + if ( + output_mode == INT + and not tf.as_dtype(self.compute_dtype).is_integer + ): + input_dtype = kwargs["dtype"] + raise ValueError( + 'When `output_mode="int"`, `dtype` should be an integer ' + f"type. Received: dtype={input_dtype}" + ) + + # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT) + layer_utils.validate_string_arg( + output_mode, + allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT), + layer_name=self.__class__.__name__, + arg_name="output_mode", + ) + + if sparse and output_mode == INT: + raise ValueError( + "`sparse` may only be true if `output_mode` is " + '`"one_hot"`, `"multi_hot"`, or `"count"`. ' + f"Received: sparse={sparse} and " + f"output_mode={output_mode}" + ) + + self.num_bins = num_bins + self.mask_value = mask_value + self.strong_hash = True if salt is not None else False + self.output_mode = output_mode + self.sparse = sparse + self.salt = None + if salt is not None: + if isinstance(salt, (tuple, list)) and len(salt) == 2: + self.salt = salt + elif isinstance(salt, int): + self.salt = [salt, salt] + else: + raise ValueError( + "The `salt` argument for `Hashing` can only be a tuple of " + "size 2 integers, or a single integer. " + f"Received: salt={salt}." + ) + + def call(self, inputs): + inputs = utils.ensure_tensor(inputs) + if isinstance(inputs, tf.SparseTensor): + indices = tf.SparseTensor( + indices=inputs.indices, + values=self._hash_values_to_bins(inputs.values), + dense_shape=inputs.dense_shape, + ) + else: + indices = self._hash_values_to_bins(inputs) + return utils.encode_categorical_inputs( + indices, + output_mode=self.output_mode, + depth=self.num_bins, + sparse=self.sparse, + dtype=self.compute_dtype, + ) + + def _hash_values_to_bins(self, values): + """Converts a non-sparse tensor of values to bin indices.""" + hash_bins = self.num_bins + mask = None + # If mask_value is set, the zeroth bin is reserved for it. + if self.mask_value is not None and hash_bins > 1: + hash_bins -= 1 + mask = tf.equal(values, self.mask_value) + # Convert all values to strings before hashing. + if values.dtype.is_integer: + values = tf.as_string(values) + # Hash the strings. + if self.strong_hash: + values = tf.strings.to_hash_bucket_strong( + values, hash_bins, name="hash", key=self.salt + ) + else: + values = tf.strings.to_hash_bucket_fast( + values, hash_bins, name="hash" + ) + if mask is not None: + values = tf.add(values, tf.ones_like(values)) + values = tf.where(mask, tf.zeros_like(values), values) + return values + + def compute_output_shape(self, input_shape): + return input_shape + + def compute_output_signature(self, input_spec): + output_shape = self.compute_output_shape(input_spec.shape) + if isinstance(input_spec, tf.SparseTensorSpec): + return tf.SparseTensorSpec( + shape=output_shape, dtype=self.compute_dtype + ) + else: + return tf.TensorSpec(shape=output_shape, dtype=self.compute_dtype) + + def get_config(self): + config = super().get_config() + config.update( + { + "num_bins": self.num_bins, + "salt": self.salt, + "mask_value": self.mask_value, + "output_mode": self.output_mode, + "sparse": self.sparse, + } + ) + return config diff --git a/keras/layers/preprocessing/hashing_distribution_test.py b/keras/layers/preprocessing/hashing_distribution_test.py index 9814b1d38f83..af6a1fab4c29 100644 --- a/keras/layers/preprocessing/hashing_distribution_test.py +++ b/keras/layers/preprocessing/hashing_distribution_test.py @@ -15,6 +15,8 @@ """Tests for keras.layers.preprocessing.hashing.""" +import numpy as np +import tensorflow.compat.v2 as tf import keras from keras import backend @@ -23,42 +25,49 @@ from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_single_worker + - strategy_combinations.parameter_server_strategies_multi_worker, - mode=["eager"])) -class HashingDistributionTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_strategy(self, strategy): - if (backend.is_tpu_strategy(strategy) and - not tf_test_utils.is_mlir_bridge_enabled()): - self.skipTest("TPU tests require MLIR bridge") + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_single_worker + + strategy_combinations.parameter_server_strategies_multi_worker, + mode=["eager"], + ) +) +class HashingDistributionTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_strategy(self, strategy): + if ( + backend.is_tpu_strategy(strategy) + and not tf_test_utils.is_mlir_bridge_enabled() + ): + self.skipTest("TPU tests require MLIR bridge") - input_data = np.asarray([["omar"], ["stringer"], ["marlo"], ["wire"]]) - input_dataset = tf.data.Dataset.from_tensor_slices(input_data).batch( - 2, drop_remainder=True) - expected_output = [[0], [0], [1], [0]] + input_data = np.asarray([["omar"], ["stringer"], ["marlo"], ["wire"]]) + input_dataset = tf.data.Dataset.from_tensor_slices(input_data).batch( + 2, drop_remainder=True + ) + expected_output = [[0], [0], [1], [0]] - tf.config.set_soft_device_placement(True) + tf.config.set_soft_device_placement(True) - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = hashing.Hashing(num_bins=2) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_dataset) - self.assertAllEqual(expected_output, output_dataset) + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = hashing.Hashing(num_bins=2) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_dataset) + self.assertAllEqual(expected_output, output_dataset) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/hashing_test.py b/keras/layers/preprocessing/hashing_test.py index f7d018a4571e..7bb20dc1eab8 100644 --- a/keras/layers/preprocessing/hashing_test.py +++ b/keras/layers/preprocessing/hashing_test.py @@ -15,6 +15,9 @@ """Tests for hashing layer.""" import os + +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras @@ -24,393 +27,444 @@ from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes(always_skip_v1=True) class HashingTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('list', list), - ('tuple', tuple), - ('numpy', np.array), - ('array_like', preprocessing_test_utils.ArrayLike), - ) - def test_tensor_like_inputs(self, data_fn): - input_data = data_fn([0, 1, 2, 3, 4]) - expected_output = [1, 0, 1, 0, 2] - - layer = hashing.Hashing(num_bins=3) - output_data = layer(input_data) - self.assertAllEqual(output_data, expected_output) - - def test_hash_single_bin(self): - layer = hashing.Hashing(num_bins=1) - inp = np.asarray([['A'], ['B'], ['C'], ['D'], ['E']]) - output = layer(inp) - self.assertAllClose([[0], [0], [0], [0], [0]], output) - - def test_hash_dense_input_farmhash(self): - layer = hashing.Hashing(num_bins=2) - inp = np.asarray([['omar'], ['stringer'], ['marlo'], ['wire'], - ['skywalker']]) - output = layer(inp) - # Assert equal for hashed output that should be true on all platforms. - self.assertAllClose([[0], [0], [1], [0], [0]], output) - - def test_hash_dense_input_mask_value_farmhash(self): - empty_mask_layer = hashing.Hashing(num_bins=3, mask_value='') - omar_mask_layer = hashing.Hashing(num_bins=3, mask_value='omar') - inp = np.asarray([['omar'], ['stringer'], ['marlo'], ['wire'], - ['skywalker']]) - empty_mask_output = empty_mask_layer(inp) - omar_mask_output = omar_mask_layer(inp) - # Outputs should be one more than test_hash_dense_input_farmhash (the zeroth - # bin is now reserved for masks). - self.assertAllClose([[1], [1], [2], [1], [1]], empty_mask_output) - # 'omar' should map to 0. - self.assertAllClose([[0], [1], [2], [1], [1]], omar_mask_output) - - def test_hash_dense_list_input_farmhash(self): - layer = hashing.Hashing(num_bins=2) - inp = [['omar'], ['stringer'], ['marlo'], ['wire'], ['skywalker']] - output = layer(inp) - # Assert equal for hashed output that should be true on all platforms. - self.assertAllClose([[0], [0], [1], [0], [0]], output) - - inp = ['omar', 'stringer', 'marlo', 'wire', 'skywalker'] - output = layer(inp) - # Assert equal for hashed output that should be true on all platforms. - self.assertAllClose([0, 0, 1, 0, 0], output) - - def test_hash_dense_int_input_farmhash(self): - layer = hashing.Hashing(num_bins=3) - inp = np.asarray([[0], [1], [2], [3], [4]]) - output = layer(inp) - # Assert equal for hashed output that should be true on all platforms. - self.assertAllClose([[1], [0], [1], [0], [2]], output) - - def test_hash_dense_input_siphash(self): - layer = hashing.Hashing(num_bins=2, salt=[133, 137]) - inp = np.asarray([['omar'], ['stringer'], ['marlo'], ['wire'], - ['skywalker']]) - output = layer(inp) - # Assert equal for hashed output that should be true on all platforms. - # Note the result is different from FarmHash. - self.assertAllClose([[0], [1], [0], [1], [0]], output) - - layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137]) - output_2 = layer_2(inp) - # Note the result is different from (133, 137). - self.assertAllClose([[1], [0], [1], [0], [1]], output_2) - - def test_hash_dense_int_input_siphash(self): - layer = hashing.Hashing(num_bins=3, salt=[133, 137]) - inp = np.asarray([[0], [1], [2], [3], [4]]) - output = layer(inp) - # Assert equal for hashed output that should be true on all platforms. - self.assertAllClose([[1], [1], [2], [0], [1]], output) - - def test_hash_sparse_input_farmhash(self): - layer = hashing.Hashing(num_bins=2) - indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] - inp = tf.SparseTensor( - indices=indices, - values=['omar', 'stringer', 'marlo', 'wire', 'skywalker'], - dense_shape=[3, 2]) - output = layer(inp) - self.assertAllClose(indices, output.indices) - self.assertAllClose([0, 0, 1, 0, 0], output.values) - - def test_hash_sparse_input_mask_value_farmhash(self): - empty_mask_layer = hashing.Hashing(num_bins=3, mask_value='') - omar_mask_layer = hashing.Hashing(num_bins=3, mask_value='omar') - indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] - inp = tf.SparseTensor( - indices=indices, - values=['omar', 'stringer', 'marlo', 'wire', 'skywalker'], - dense_shape=[3, 2]) - empty_mask_output = empty_mask_layer(inp) - omar_mask_output = omar_mask_layer(inp) - self.assertAllClose(indices, omar_mask_output.indices) - self.assertAllClose(indices, empty_mask_output.indices) - # Outputs should be one more than test_hash_sparse_input_farmhash (the - # zeroth bin is now reserved for masks). - self.assertAllClose([1, 1, 2, 1, 1], empty_mask_output.values) - # 'omar' should map to 0. - self.assertAllClose([0, 1, 2, 1, 1], omar_mask_output.values) - - def test_hash_sparse_int_input_farmhash(self): - layer = hashing.Hashing(num_bins=3) - indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] - inp = tf.SparseTensor( - indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2]) - output = layer(inp) - self.assertAllClose(indices, output.indices) - self.assertAllClose([1, 0, 1, 0, 2], output.values) - - def test_hash_sparse_input_siphash(self): - layer = hashing.Hashing(num_bins=2, salt=[133, 137]) - indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] - inp = tf.SparseTensor( - indices=indices, - values=['omar', 'stringer', 'marlo', 'wire', 'skywalker'], - dense_shape=[3, 2]) - output = layer(inp) - self.assertAllClose(output.indices, indices) - # The result should be same with test_hash_dense_input_siphash. - self.assertAllClose([0, 1, 0, 1, 0], output.values) - - layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137]) - output = layer_2(inp) - # The result should be same with test_hash_dense_input_siphash. - self.assertAllClose([1, 0, 1, 0, 1], output.values) - - def test_hash_sparse_int_input_siphash(self): - layer = hashing.Hashing(num_bins=3, salt=[133, 137]) - indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] - inp = tf.SparseTensor( - indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2]) - output = layer(inp) - self.assertAllClose(indices, output.indices) - self.assertAllClose([1, 1, 2, 0, 1], output.values) - - def test_hash_ragged_string_input_farmhash(self): - layer = hashing.Hashing(num_bins=2) - inp_data = tf.ragged.constant( - [['omar', 'stringer', 'marlo', 'wire'], ['marlo', 'skywalker', 'wire']], - dtype=tf.string) - out_data = layer(inp_data) - # Same hashed output as test_hash_sparse_input_farmhash - expected_output = [[0, 0, 1, 0], [1, 0, 0]] - self.assertAllEqual(expected_output, out_data) - - inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) - out_t = layer(inp_t) - model = training.Model(inputs=inp_t, outputs=out_t) - self.assertAllClose(out_data, model.predict(inp_data)) - - def test_hash_ragged_input_mask_value(self): - empty_mask_layer = hashing.Hashing(num_bins=3, mask_value='') - omar_mask_layer = hashing.Hashing(num_bins=3, mask_value='omar') - inp_data = tf.ragged.constant( - [['omar', 'stringer', 'marlo', 'wire'], ['marlo', 'skywalker', 'wire']], - dtype=tf.string) - empty_mask_output = empty_mask_layer(inp_data) - omar_mask_output = omar_mask_layer(inp_data) - # Outputs should be one more than test_hash_ragged_string_input_farmhash - # (the zeroth bin is now reserved for masks). - expected_output = [[1, 1, 2, 1], [2, 1, 1]] - self.assertAllClose(expected_output, empty_mask_output) - # 'omar' should map to 0. - expected_output = [[0, 1, 2, 1], [2, 1, 1]] - self.assertAllClose(expected_output, omar_mask_output) - - def test_hash_ragged_int_input_farmhash(self): - layer = hashing.Hashing(num_bins=3) - inp_data = tf.ragged.constant([[0, 1, 3, 4], [2, 1, 0]], dtype=tf.int64) - out_data = layer(inp_data) - # Same hashed output as test_hash_sparse_input_farmhash - expected_output = [[1, 0, 0, 2], [1, 0, 1]] - self.assertAllEqual(expected_output, out_data) - - inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.int64) - out_t = layer(inp_t) - model = training.Model(inputs=inp_t, outputs=out_t) - self.assertAllClose(out_data, model.predict(inp_data)) - - def test_hash_ragged_string_input_siphash(self): - layer = hashing.Hashing(num_bins=2, salt=[133, 137]) - inp_data = tf.ragged.constant( - [['omar', 'stringer', 'marlo', 'wire'], ['marlo', 'skywalker', 'wire']], - dtype=tf.string) - out_data = layer(inp_data) - # Same hashed output as test_hash_dense_input_siphash - expected_output = [[0, 1, 0, 1], [0, 0, 1]] - self.assertAllEqual(expected_output, out_data) - - inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) - out_t = layer(inp_t) - model = training.Model(inputs=inp_t, outputs=out_t) - self.assertAllClose(out_data, model.predict(inp_data)) - - layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137]) - out_data = layer_2(inp_data) - expected_output = [[1, 0, 1, 0], [1, 1, 0]] - self.assertAllEqual(expected_output, out_data) - - out_t = layer_2(inp_t) - model = training.Model(inputs=inp_t, outputs=out_t) - self.assertAllClose(out_data, model.predict(inp_data)) - - def test_hash_ragged_int_input_siphash(self): - layer = hashing.Hashing(num_bins=3, salt=[133, 137]) - inp_data = tf.ragged.constant([[0, 1, 3, 4], [2, 1, 0]], dtype=tf.int64) - out_data = layer(inp_data) - # Same hashed output as test_hash_sparse_input_farmhash - expected_output = [[1, 1, 0, 1], [2, 1, 1]] - self.assertAllEqual(expected_output, out_data) - - inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.int64) - out_t = layer(inp_t) - model = training.Model(inputs=inp_t, outputs=out_t) - self.assertAllClose(out_data, model.predict(inp_data)) - - def test_invalid_inputs(self): - with self.assertRaisesRegex(ValueError, 'cannot be `None`'): - _ = hashing.Hashing(num_bins=None) - with self.assertRaisesRegex(ValueError, 'cannot be `None`'): - _ = hashing.Hashing(num_bins=-1) - with self.assertRaisesRegex(ValueError, 'can only be a tuple of size 2'): - _ = hashing.Hashing(num_bins=2, salt='string') - with self.assertRaisesRegex(ValueError, 'can only be a tuple of size 2'): - _ = hashing.Hashing(num_bins=2, salt=[1]) - with self.assertRaisesRegex(ValueError, 'can only be a tuple of size 2'): - _ = hashing.Hashing(num_bins=1, salt=tf.constant([133, 137])) - - def test_one_hot_output(self): - input_array = np.array([0, 1, 2, 3, 4]) - - expected_output = [[0., 1., 0.], - [1., 0., 0.], - [0., 1., 0.], - [1., 0., 0.], - [0., 0., 1.]] - expected_output_shape = [None, 3] - - inputs = keras.Input(shape=(1,), dtype='int32') - layer = hashing.Hashing(num_bins=3, output_mode='one_hot') - outputs = layer(inputs) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - - model = keras.Model(inputs, outputs) - output_data = model(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_output(self): - input_array = np.array([0, 1, 2, 3, 4]) - - expected_output = [1., 1., 1.] - expected_output_shape = [None, 3] - - inputs = keras.Input(shape=(3,), dtype='int32') - layer = hashing.Hashing(num_bins=3, output_mode='multi_hot') - outputs = layer(inputs) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - - model = keras.Model(inputs, outputs) - output_data = model(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_count_output(self): - input_array = np.array([0, 1, 2, 3, 4]) - - expected_output = [2., 2., 1.] - expected_output_shape = [None, 3] - - inputs = keras.Input(shape=(3,), dtype='int32') - layer = hashing.Hashing(num_bins=3, output_mode='count') - outputs = layer(inputs) - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - - model = keras.Model(inputs, outputs) - output_data = model(input_array) - self.assertAllEqual(expected_output, output_data) - - @parameterized.named_parameters( - ('int32', tf.int32), - ('int64', tf.int64), - ) - def test_output_dtype(self, dtype): - input_data = keras.Input(batch_size=16, shape=(4,), dtype='string') - layer = hashing.Hashing(num_bins=3, dtype=dtype) - output = layer(input_data) - self.assertAllEqual(output.dtype, dtype) - - def test_legacy_dtype_compat(self): - inputs = keras.Input(batch_size=16, shape=(4,), dtype='string') - layer = hashing.Hashing(num_bins=3, dtype='float32') - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, tf.int64) - # In TF1 we sometimes face an explicit dtype=None in the config. - layer = hashing.Hashing(num_bins=3, dtype=None) - outputs = layer(inputs) - self.assertAllEqual(outputs.dtype, tf.int64) - - @parameterized.named_parameters( - ('float32', tf.float32), - ('float64', tf.float64), - ) - def test_one_hot_output_dtype(self, dtype): - input_data = keras.Input(batch_size=16, shape=(1,), dtype='string') - layer = hashing.Hashing(num_bins=3, output_mode='one_hot', dtype=dtype) - output = layer(input_data) - self.assertAllEqual(output.dtype, dtype) - - def test_hash_compute_output_signature(self): - input_shape = tf.TensorShape([2, 3]) - input_spec = tf.TensorSpec(input_shape, tf.string) - layer = hashing.Hashing(num_bins=2) - output_spec = layer.compute_output_signature(input_spec) - self.assertEqual(output_spec.shape.dims, input_shape.dims) - self.assertEqual(output_spec.dtype, tf.int64) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = hashing.Hashing(num_bins=2, name='hashing') - config = layer.get_config() - layer_1 = hashing.Hashing.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_saved_model(self): - input_data = np.array(['omar', 'stringer', 'marlo', 'wire', 'skywalker']) - - inputs = keras.Input(shape=(None,), dtype=tf.string) - outputs = hashing.Hashing(num_bins=100)(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - original_output_data = model(input_data) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), 'tf_keras_saved_model') - model.save(output_path, save_format='tf') - loaded_model = keras.models.load_model(output_path) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_data = loaded_model(input_data) - self.assertAllClose(new_output_data, original_output_data) - - @parameterized.named_parameters( - ( - 'list_input', - [1, 2, 3], - [1, 1, 1], - ), - ( - 'list_input_2d', - [[1], [2], [3]], - [[1], [1], [1]], - ), - ( - 'list_input_2d_multiple', - [[1, 2], [2, 3], [3, 4]], - [[1, 1], [1, 1], [1, 1]], - ), - ( - 'list_input_3d', - [[[1], [2]], [[2], [3]], [[3], [4]]], - [[[1], [1]], [[1], [1]], [[1], [1]]], - ), - ) - def test_hash_list_input(self, input_data, expected): - layer = hashing.Hashing(num_bins=2) - out_data = layer(input_data) - self.assertAllEqual(expected, out_data.numpy().tolist()) - - -if __name__ == '__main__': - tf.test.main() + @parameterized.named_parameters( + ("list", list), + ("tuple", tuple), + ("numpy", np.array), + ("array_like", preprocessing_test_utils.ArrayLike), + ) + def test_tensor_like_inputs(self, data_fn): + input_data = data_fn([0, 1, 2, 3, 4]) + expected_output = [1, 0, 1, 0, 2] + + layer = hashing.Hashing(num_bins=3) + output_data = layer(input_data) + self.assertAllEqual(output_data, expected_output) + + def test_hash_single_bin(self): + layer = hashing.Hashing(num_bins=1) + inp = np.asarray([["A"], ["B"], ["C"], ["D"], ["E"]]) + output = layer(inp) + self.assertAllClose([[0], [0], [0], [0], [0]], output) + + def test_hash_dense_input_farmhash(self): + layer = hashing.Hashing(num_bins=2) + inp = np.asarray( + [["omar"], ["stringer"], ["marlo"], ["wire"], ["skywalker"]] + ) + output = layer(inp) + # Assert equal for hashed output that should be true on all platforms. + self.assertAllClose([[0], [0], [1], [0], [0]], output) + + def test_hash_dense_input_mask_value_farmhash(self): + empty_mask_layer = hashing.Hashing(num_bins=3, mask_value="") + omar_mask_layer = hashing.Hashing(num_bins=3, mask_value="omar") + inp = np.asarray( + [["omar"], ["stringer"], ["marlo"], ["wire"], ["skywalker"]] + ) + empty_mask_output = empty_mask_layer(inp) + omar_mask_output = omar_mask_layer(inp) + # Outputs should be one more than test_hash_dense_input_farmhash (the + # zeroth bin is now reserved for masks). + self.assertAllClose([[1], [1], [2], [1], [1]], empty_mask_output) + # 'omar' should map to 0. + self.assertAllClose([[0], [1], [2], [1], [1]], omar_mask_output) + + def test_hash_dense_list_input_farmhash(self): + layer = hashing.Hashing(num_bins=2) + inp = [["omar"], ["stringer"], ["marlo"], ["wire"], ["skywalker"]] + output = layer(inp) + # Assert equal for hashed output that should be true on all platforms. + self.assertAllClose([[0], [0], [1], [0], [0]], output) + + inp = ["omar", "stringer", "marlo", "wire", "skywalker"] + output = layer(inp) + # Assert equal for hashed output that should be true on all platforms. + self.assertAllClose([0, 0, 1, 0, 0], output) + + def test_hash_dense_int_input_farmhash(self): + layer = hashing.Hashing(num_bins=3) + inp = np.asarray([[0], [1], [2], [3], [4]]) + output = layer(inp) + # Assert equal for hashed output that should be true on all platforms. + self.assertAllClose([[1], [0], [1], [0], [2]], output) + + def test_hash_dense_input_siphash(self): + layer = hashing.Hashing(num_bins=2, salt=[133, 137]) + inp = np.asarray( + [["omar"], ["stringer"], ["marlo"], ["wire"], ["skywalker"]] + ) + output = layer(inp) + # Assert equal for hashed output that should be true on all platforms. + # Note the result is different from FarmHash. + self.assertAllClose([[0], [1], [0], [1], [0]], output) + + layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137]) + output_2 = layer_2(inp) + # Note the result is different from (133, 137). + self.assertAllClose([[1], [0], [1], [0], [1]], output_2) + + def test_hash_dense_int_input_siphash(self): + layer = hashing.Hashing(num_bins=3, salt=[133, 137]) + inp = np.asarray([[0], [1], [2], [3], [4]]) + output = layer(inp) + # Assert equal for hashed output that should be true on all platforms. + self.assertAllClose([[1], [1], [2], [0], [1]], output) + + def test_hash_sparse_input_farmhash(self): + layer = hashing.Hashing(num_bins=2) + indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] + inp = tf.SparseTensor( + indices=indices, + values=["omar", "stringer", "marlo", "wire", "skywalker"], + dense_shape=[3, 2], + ) + output = layer(inp) + self.assertAllClose(indices, output.indices) + self.assertAllClose([0, 0, 1, 0, 0], output.values) + + def test_hash_sparse_input_mask_value_farmhash(self): + empty_mask_layer = hashing.Hashing(num_bins=3, mask_value="") + omar_mask_layer = hashing.Hashing(num_bins=3, mask_value="omar") + indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] + inp = tf.SparseTensor( + indices=indices, + values=["omar", "stringer", "marlo", "wire", "skywalker"], + dense_shape=[3, 2], + ) + empty_mask_output = empty_mask_layer(inp) + omar_mask_output = omar_mask_layer(inp) + self.assertAllClose(indices, omar_mask_output.indices) + self.assertAllClose(indices, empty_mask_output.indices) + # Outputs should be one more than test_hash_sparse_input_farmhash (the + # zeroth bin is now reserved for masks). + self.assertAllClose([1, 1, 2, 1, 1], empty_mask_output.values) + # 'omar' should map to 0. + self.assertAllClose([0, 1, 2, 1, 1], omar_mask_output.values) + + def test_hash_sparse_int_input_farmhash(self): + layer = hashing.Hashing(num_bins=3) + indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] + inp = tf.SparseTensor( + indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2] + ) + output = layer(inp) + self.assertAllClose(indices, output.indices) + self.assertAllClose([1, 0, 1, 0, 2], output.values) + + def test_hash_sparse_input_siphash(self): + layer = hashing.Hashing(num_bins=2, salt=[133, 137]) + indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] + inp = tf.SparseTensor( + indices=indices, + values=["omar", "stringer", "marlo", "wire", "skywalker"], + dense_shape=[3, 2], + ) + output = layer(inp) + self.assertAllClose(output.indices, indices) + # The result should be same with test_hash_dense_input_siphash. + self.assertAllClose([0, 1, 0, 1, 0], output.values) + + layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137]) + output = layer_2(inp) + # The result should be same with test_hash_dense_input_siphash. + self.assertAllClose([1, 0, 1, 0, 1], output.values) + + def test_hash_sparse_int_input_siphash(self): + layer = hashing.Hashing(num_bins=3, salt=[133, 137]) + indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]] + inp = tf.SparseTensor( + indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2] + ) + output = layer(inp) + self.assertAllClose(indices, output.indices) + self.assertAllClose([1, 1, 2, 0, 1], output.values) + + def test_hash_ragged_string_input_farmhash(self): + layer = hashing.Hashing(num_bins=2) + inp_data = tf.ragged.constant( + [ + ["omar", "stringer", "marlo", "wire"], + ["marlo", "skywalker", "wire"], + ], + dtype=tf.string, + ) + out_data = layer(inp_data) + # Same hashed output as test_hash_sparse_input_farmhash + expected_output = [[0, 0, 1, 0], [1, 0, 0]] + self.assertAllEqual(expected_output, out_data) + + inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) + out_t = layer(inp_t) + model = training.Model(inputs=inp_t, outputs=out_t) + self.assertAllClose(out_data, model.predict(inp_data)) + + def test_hash_ragged_input_mask_value(self): + empty_mask_layer = hashing.Hashing(num_bins=3, mask_value="") + omar_mask_layer = hashing.Hashing(num_bins=3, mask_value="omar") + inp_data = tf.ragged.constant( + [ + ["omar", "stringer", "marlo", "wire"], + ["marlo", "skywalker", "wire"], + ], + dtype=tf.string, + ) + empty_mask_output = empty_mask_layer(inp_data) + omar_mask_output = omar_mask_layer(inp_data) + # Outputs should be one more than test_hash_ragged_string_input_farmhash + # (the zeroth bin is now reserved for masks). + expected_output = [[1, 1, 2, 1], [2, 1, 1]] + self.assertAllClose(expected_output, empty_mask_output) + # 'omar' should map to 0. + expected_output = [[0, 1, 2, 1], [2, 1, 1]] + self.assertAllClose(expected_output, omar_mask_output) + + def test_hash_ragged_int_input_farmhash(self): + layer = hashing.Hashing(num_bins=3) + inp_data = tf.ragged.constant([[0, 1, 3, 4], [2, 1, 0]], dtype=tf.int64) + out_data = layer(inp_data) + # Same hashed output as test_hash_sparse_input_farmhash + expected_output = [[1, 0, 0, 2], [1, 0, 1]] + self.assertAllEqual(expected_output, out_data) + + inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.int64) + out_t = layer(inp_t) + model = training.Model(inputs=inp_t, outputs=out_t) + self.assertAllClose(out_data, model.predict(inp_data)) + + def test_hash_ragged_string_input_siphash(self): + layer = hashing.Hashing(num_bins=2, salt=[133, 137]) + inp_data = tf.ragged.constant( + [ + ["omar", "stringer", "marlo", "wire"], + ["marlo", "skywalker", "wire"], + ], + dtype=tf.string, + ) + out_data = layer(inp_data) + # Same hashed output as test_hash_dense_input_siphash + expected_output = [[0, 1, 0, 1], [0, 0, 1]] + self.assertAllEqual(expected_output, out_data) + + inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) + out_t = layer(inp_t) + model = training.Model(inputs=inp_t, outputs=out_t) + self.assertAllClose(out_data, model.predict(inp_data)) + + layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137]) + out_data = layer_2(inp_data) + expected_output = [[1, 0, 1, 0], [1, 1, 0]] + self.assertAllEqual(expected_output, out_data) + + out_t = layer_2(inp_t) + model = training.Model(inputs=inp_t, outputs=out_t) + self.assertAllClose(out_data, model.predict(inp_data)) + + def test_hash_ragged_int_input_siphash(self): + layer = hashing.Hashing(num_bins=3, salt=[133, 137]) + inp_data = tf.ragged.constant([[0, 1, 3, 4], [2, 1, 0]], dtype=tf.int64) + out_data = layer(inp_data) + # Same hashed output as test_hash_sparse_input_farmhash + expected_output = [[1, 1, 0, 1], [2, 1, 1]] + self.assertAllEqual(expected_output, out_data) + + inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.int64) + out_t = layer(inp_t) + model = training.Model(inputs=inp_t, outputs=out_t) + self.assertAllClose(out_data, model.predict(inp_data)) + + def test_invalid_inputs(self): + with self.assertRaisesRegex(ValueError, "cannot be `None`"): + _ = hashing.Hashing(num_bins=None) + with self.assertRaisesRegex(ValueError, "cannot be `None`"): + _ = hashing.Hashing(num_bins=-1) + with self.assertRaisesRegex( + ValueError, "can only be a tuple of size 2" + ): + _ = hashing.Hashing(num_bins=2, salt="string") + with self.assertRaisesRegex( + ValueError, "can only be a tuple of size 2" + ): + _ = hashing.Hashing(num_bins=2, salt=[1]) + with self.assertRaisesRegex( + ValueError, "can only be a tuple of size 2" + ): + _ = hashing.Hashing(num_bins=1, salt=tf.constant([133, 137])) + + def test_one_hot_output(self): + input_array = np.array([0, 1, 2, 3, 4]) + + expected_output = [ + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + ] + expected_output_shape = [None, 3] + + inputs = keras.Input(shape=(1,), dtype="int32") + layer = hashing.Hashing(num_bins=3, output_mode="one_hot") + outputs = layer(inputs) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + + model = keras.Model(inputs, outputs) + output_data = model(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_output(self): + input_array = np.array([0, 1, 2, 3, 4]) + + expected_output = [1.0, 1.0, 1.0] + expected_output_shape = [None, 3] + + inputs = keras.Input(shape=(3,), dtype="int32") + layer = hashing.Hashing(num_bins=3, output_mode="multi_hot") + outputs = layer(inputs) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + + model = keras.Model(inputs, outputs) + output_data = model(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_count_output(self): + input_array = np.array([0, 1, 2, 3, 4]) + + expected_output = [2.0, 2.0, 1.0] + expected_output_shape = [None, 3] + + inputs = keras.Input(shape=(3,), dtype="int32") + layer = hashing.Hashing(num_bins=3, output_mode="count") + outputs = layer(inputs) + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + + model = keras.Model(inputs, outputs) + output_data = model(input_array) + self.assertAllEqual(expected_output, output_data) + + @parameterized.named_parameters( + ("int32", tf.int32), + ("int64", tf.int64), + ) + def test_output_dtype(self, dtype): + input_data = keras.Input(batch_size=16, shape=(4,), dtype="string") + layer = hashing.Hashing(num_bins=3, dtype=dtype) + output = layer(input_data) + self.assertAllEqual(output.dtype, dtype) + + def test_legacy_dtype_compat(self): + inputs = keras.Input(batch_size=16, shape=(4,), dtype="string") + layer = hashing.Hashing(num_bins=3, dtype="float32") + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, tf.int64) + # In TF1 we sometimes face an explicit dtype=None in the config. + layer = hashing.Hashing(num_bins=3, dtype=None) + outputs = layer(inputs) + self.assertAllEqual(outputs.dtype, tf.int64) + + @parameterized.named_parameters( + ("float32", tf.float32), + ("float64", tf.float64), + ) + def test_one_hot_output_dtype(self, dtype): + input_data = keras.Input(batch_size=16, shape=(1,), dtype="string") + layer = hashing.Hashing(num_bins=3, output_mode="one_hot", dtype=dtype) + output = layer(input_data) + self.assertAllEqual(output.dtype, dtype) + + def test_hash_compute_output_signature(self): + input_shape = tf.TensorShape([2, 3]) + input_spec = tf.TensorSpec(input_shape, tf.string) + layer = hashing.Hashing(num_bins=2) + output_spec = layer.compute_output_signature(input_spec) + self.assertEqual(output_spec.shape.dims, input_shape.dims) + self.assertEqual(output_spec.dtype, tf.int64) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = hashing.Hashing(num_bins=2, name="hashing") + config = layer.get_config() + layer_1 = hashing.Hashing.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_saved_model(self): + input_data = np.array( + ["omar", "stringer", "marlo", "wire", "skywalker"] + ) + + inputs = keras.Input(shape=(None,), dtype=tf.string) + outputs = hashing.Hashing(num_bins=100)(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + original_output_data = model(input_data) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + loaded_model = keras.models.load_model(output_path) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = loaded_model(input_data) + self.assertAllClose(new_output_data, original_output_data) + + @test_utils.run_v2_only + def test_save_keras_v3(self): + input_data = np.array( + ["omar", "stringer", "marlo", "wire", "skywalker"] + ) + + inputs = keras.Input(shape=(None,), dtype=tf.string) + outputs = hashing.Hashing(num_bins=100)(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + original_output_data = model(input_data) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_model.keras") + model.save(output_path, save_format="keras_v3") + loaded_model = keras.models.load_model(output_path) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = loaded_model(input_data) + self.assertAllClose(new_output_data, original_output_data) + + @parameterized.named_parameters( + ( + "list_input", + [1, 2, 3], + [1, 1, 1], + ), + ( + "list_input_2d", + [[1], [2], [3]], + [[1], [1], [1]], + ), + ( + "list_input_2d_multiple", + [[1, 2], [2, 3], [3, 4]], + [[1, 1], [1, 1], [1, 1]], + ), + ( + "list_input_3d", + [[[1], [2]], [[2], [3]], [[3], [4]]], + [[[1], [1]], [[1], [1]], [[1], [1]]], + ), + ) + def test_hash_list_input(self, input_data, expected): + layer = hashing.Hashing(num_bins=2) + out_data = layer(input_data) + self.assertAllEqual(expected, out_data.numpy().tolist()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/image_preprocessing.py b/keras/layers/preprocessing/image_preprocessing.py index cf8416c5ec18..b2c74b9f65eb 100644 --- a/keras/layers/preprocessing/image_preprocessing.py +++ b/keras/layers/preprocessing/image_preprocessing.py @@ -14,8 +14,9 @@ # ============================================================================== """Keras image preprocessing layers.""" -# pylint: disable=g-classes-have-attributes - +import numpy as np +import tensorflow.compat.v2 as tf +from tensorflow.python.util.tf_export import keras_export from keras import backend from keras.engine import base_layer @@ -23,1903 +24,1742 @@ from keras.layers.preprocessing import preprocessing_utils as utils from keras.utils import image_utils from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.python.ops import stateless_random_ops -from tensorflow.python.util.tf_export import keras_export -from tensorflow.tools.docs import doc_controls H_AXIS = -3 W_AXIS = -2 -IMAGES = 'images' -LABELS = 'labels' -TARGETS = 'targets' -BOUNDING_BOXES = 'bounding_boxes' - def check_fill_mode_and_interpolation(fill_mode, interpolation): - if fill_mode not in {'reflect', 'wrap', 'constant', 'nearest'}: - raise NotImplementedError( - 'Unknown `fill_mode` {}. Only `reflect`, `wrap`, ' - '`constant` and `nearest` are supported.'.format(fill_mode)) - if interpolation not in {'nearest', 'bilinear'}: - raise NotImplementedError('Unknown `interpolation` {}. Only `nearest` and ' - '`bilinear` are supported.'.format(interpolation)) + if fill_mode not in {"reflect", "wrap", "constant", "nearest"}: + raise NotImplementedError( + f"Unknown `fill_mode` {fill_mode}. Only `reflect`, `wrap`, " + "`constant` and `nearest` are supported." + ) + if interpolation not in {"nearest", "bilinear"}: + raise NotImplementedError( + f"Unknown `interpolation` {interpolation}. Only `nearest` and " + "`bilinear` are supported." + ) + + +@keras_export( + "keras.layers.Resizing", "keras.layers.experimental.preprocessing.Resizing" +) +class Resizing(base_layer.Layer): + """A preprocessing layer which resizes images. + This layer resizes an image input to a target height and width. The input + should be a 4D (batched) or 3D (unbatched) tensor in `"channels_last"` + format. Input pixel values can be of any range + (e.g. `[0., 1.)` or `[0, 255]`) and of integer or floating point dtype. + By default, the layer will output floats. -@keras_export('keras.layers.Resizing', - 'keras.layers.experimental.preprocessing.Resizing') -class Resizing(base_layer.Layer): - """A preprocessing layer which resizes images. - - This layer resizes an image input to a target height and width. The input - should be a 4D (batched) or 3D (unbatched) tensor in `"channels_last"` format. - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and of - interger or floating point dtype. By default, the layer will output floats. - - This layer can be called on tf.RaggedTensor batches of input images of - distinct sizes, and will resize the outputs to dense tensors of uniform size. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - height: Integer, the height of the output shape. - width: Integer, the width of the output shape. - interpolation: String, the interpolation method. Defaults to `"bilinear"`. - Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, - `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. - crop_to_aspect_ratio: If True, resize the images without aspect - ratio distortion. When the original aspect ratio differs from the target - aspect ratio, the output image will be cropped so as to return the largest - possible window in the image (of size `(height, width)`) that matches - the target aspect ratio. By default (`crop_to_aspect_ratio=False`), - aspect ratio may not be preserved. - """ - - def __init__(self, - height, - width, - interpolation='bilinear', - crop_to_aspect_ratio=False, - **kwargs): - self.height = height - self.width = width - self.interpolation = interpolation - self.crop_to_aspect_ratio = crop_to_aspect_ratio - self._interpolation_method = image_utils.get_interpolation(interpolation) - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell('Resizing').set(True) - - def call(self, inputs): - # tf.image.resize will always output float32 and operate more efficiently on - # float32 unless interpolation is nearest, in which case ouput type matches - # input type. - if self.interpolation == 'nearest': - input_dtype = self.compute_dtype - else: - input_dtype = tf.float32 - inputs = utils.ensure_tensor(inputs, dtype=input_dtype) - size = [self.height, self.width] - if self.crop_to_aspect_ratio: - def resize_to_aspect(x): - if tf_utils.is_ragged(inputs): - x = x.to_tensor() - return image_utils.smart_resize( - x, - size=size, - interpolation=self._interpolation_method) - - if tf_utils.is_ragged(inputs): - size_as_shape = tf.TensorShape(size) - shape = size_as_shape + inputs.shape[-1:] - spec = tf.TensorSpec(shape, input_dtype) - outputs = tf.map_fn(resize_to_aspect, inputs, fn_output_signature=spec) - else: - outputs = resize_to_aspect(inputs) - else: - outputs = tf.image.resize( - inputs, - size=size, - method=self._interpolation_method) - return tf.cast(outputs, self.compute_dtype) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - input_shape[H_AXIS] = self.height - input_shape[W_AXIS] = self.width - return tf.TensorShape(input_shape) - - def get_config(self): - config = { - 'height': self.height, - 'width': self.width, - 'interpolation': self.interpolation, - 'crop_to_aspect_ratio': self.crop_to_aspect_ratio, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.CenterCrop', - 'keras.layers.experimental.preprocessing.CenterCrop') + This layer can be called on tf.RaggedTensor batches of input images of + distinct sizes, and will resize the outputs to dense tensors of uniform + size. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + height: Integer, the height of the output shape. + width: Integer, the width of the output shape. + interpolation: String, the interpolation method. + Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, + `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `"bilinear"`. + crop_to_aspect_ratio: If True, resize the images without aspect + ratio distortion. When the original aspect ratio differs + from the target aspect ratio, the output image will be + cropped so as to return the + largest possible window in the image (of size `(height, width)`) + that matches the target aspect ratio. By default + (`crop_to_aspect_ratio=False`), aspect ratio may not be preserved. + """ + + def __init__( + self, + height, + width, + interpolation="bilinear", + crop_to_aspect_ratio=False, + **kwargs, + ): + self.height = height + self.width = width + self.interpolation = interpolation + self.crop_to_aspect_ratio = crop_to_aspect_ratio + self._interpolation_method = image_utils.get_interpolation( + interpolation + ) + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("Resizing").set(True) + + def call(self, inputs): + # tf.image.resize will always output float32 + # and operate more efficiently on float32 + # unless interpolation is nearest, in which case ouput type matches + # input type. + if self.interpolation == "nearest": + input_dtype = self.compute_dtype + else: + input_dtype = tf.float32 + inputs = convert_inputs(inputs, dtype=input_dtype) + size = [self.height, self.width] + if self.crop_to_aspect_ratio: + + def resize_to_aspect(x): + if tf_utils.is_ragged(inputs): + x = x.to_tensor() + return image_utils.smart_resize( + x, size=size, interpolation=self._interpolation_method + ) + + if tf_utils.is_ragged(inputs): + size_as_shape = tf.TensorShape(size) + shape = size_as_shape + inputs.shape[-1:] + spec = tf.TensorSpec(shape, input_dtype) + outputs = tf.map_fn( + resize_to_aspect, inputs, fn_output_signature=spec + ) + else: + outputs = resize_to_aspect(inputs) + else: + outputs = tf.image.resize( + inputs, size=size, method=self._interpolation_method + ) + return tf.cast(outputs, self.compute_dtype) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + input_shape[H_AXIS] = self.height + input_shape[W_AXIS] = self.width + return tf.TensorShape(input_shape) + + def get_config(self): + config = { + "height": self.height, + "width": self.width, + "interpolation": self.interpolation, + "crop_to_aspect_ratio": self.crop_to_aspect_ratio, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export( + "keras.layers.CenterCrop", + "keras.layers.experimental.preprocessing.CenterCrop", +) class CenterCrop(base_layer.Layer): - """A preprocessing layer which crops images. - - This layers crops the central portion of the images to a target size. If an - image is smaller than the target size, it will be resized and cropped so as to - return the largest possible window in the image that matches the target aspect - ratio. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - of interger or floating point dtype. By default, the layer will output floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., target_height, target_width, channels)`. - - If the input height/width is even and the target height/width is odd (or - inversely), the input image is left-padded by 1 pixel. - - Args: - height: Integer, the height of the output shape. - width: Integer, the width of the output shape. - """ - - def __init__(self, height, width, **kwargs): - self.height = height - self.width = width - super().__init__(**kwargs, autocast=False) - base_preprocessing_layer.keras_kpl_gauge.get_cell('CenterCrop').set(True) - - def call(self, inputs): - inputs = utils.ensure_tensor(inputs, self.compute_dtype) - input_shape = tf.shape(inputs) - h_diff = input_shape[H_AXIS] - self.height - w_diff = input_shape[W_AXIS] - self.width - - def center_crop(): - h_start = tf.cast(h_diff / 2, tf.int32) - w_start = tf.cast(w_diff / 2, tf.int32) - return tf.image.crop_to_bounding_box(inputs, h_start, w_start, - self.height, self.width) - - def upsize(): - outputs = image_utils.smart_resize(inputs, [self.height, self.width]) - # smart_resize will always output float32, so we need to re-cast. - return tf.cast(outputs, self.compute_dtype) - - return tf.cond( - tf.reduce_all((h_diff >= 0, w_diff >= 0)), center_crop, upsize) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - input_shape[H_AXIS] = self.height - input_shape[W_AXIS] = self.width - return tf.TensorShape(input_shape) - - def get_config(self): - config = { - 'height': self.height, - 'width': self.width, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.__internal__.layers.BaseImageAugmentationLayer') -class BaseImageAugmentationLayer(base_layer.BaseRandomLayer): - """Abstract base layer for image augmentaion. - - This layer contains base functionalities for preprocessing layers which - augment image related data, eg. image and in future, label and bounding boxes. - The subclasses could avoid making certain mistakes and reduce code - duplications. - - This layer requires you to implement one method: `augment_image()`, which - augments one single image during the training. There are a few additional - methods that you can implement for added functionality on the layer: - - `augment_label()`, which handles label augmentation if the layer supports - that. - - `augment_bounding_boxes()`, which handles the bounding box augmentation, if the - layer supports that. - - `get_random_transformation()`, which should produce a random transformation - setting. The tranformation object, which could be any type, will be passed to - `augment_image`, `augment_label` and `augment_bounding_boxes`, to coodinate - the randomness behavior, eg, in the RandomFlip layer, the image and - bounding_boxes should be changed in the same way. - - The `call()` method support two formats of inputs: - 1. Single image tensor with 3D (HWC) or 4D (NHWC) format. - 2. A dict of tensors with stable keys. The supported keys are: - `"images"`, `"labels"` and `"bounding_boxes"` at the moment. We might add - more keys in future when we support more types of augmentation. - - The output of the `call()` will be in two formats, which will be the same - structure as the inputs. - - The `call()` will handle the logic detecting the training/inference - mode, unpack the inputs, forward to the correct function, and pack the output - back to the same structure as the inputs. - - By default the `call()` method leverages the `tf.vectorized_map()` function. - Auto-vectorization can be disabled by setting `self.auto_vectorize = False` - in your `__init__()` method. When disabled, `call()` instead relies - on `tf.map_fn()`. For example: - - ```python - class SubclassLayer(BaseImageAugmentationLayer): - def __init__(self): - super().__init__() - self.auto_vectorize = False - ``` - - Example: - - ```python - class RandomContrast(BaseImageAugmentationLayer): - - def __init__(self, factor=(0.5, 1.5), **kwargs): - super().__init__(**kwargs) - self._factor = factor - - def augment_image(self, image, transformation): - random_factor = tf.random.uniform([], self._factor[0], self._factor[1]) - mean = tf.math.reduced_mean(inputs, axis=-1, keep_dim=True) - return (inputs - mean) * random_factor + mean - ``` - - Note that since the randomness is also a common functionnality, this layer - also includes a tf.keras.backend.RandomGenerator, which can be used to produce - the random numbers. The random number generator is stored in the - `self._random_generator` attribute. - """ - - def __init__(self, rate=1.0, seed=None, **kwargs): - super().__init__(seed=seed, **kwargs) - self.rate = rate - - @property - def auto_vectorize(self): - """Control whether automatic vectorization occurs. - - By default the `call()` method leverages the `tf.vectorized_map()` function. - Auto-vectorization can be disabled by setting `self.auto_vectorize = False` - in your `__init__()` method. When disabled, `call()` instead relies - on `tf.map_fn()`. For example: + """A preprocessing layer which crops images. - ```python - class SubclassLayer(BaseImageAugmentationLayer): - def __init__(self): - super().__init__() - self.auto_vectorize = False - ``` + This layers crops the central portion of the images to a target size. If an + image is smaller than the target size, it will be resized and cropped + so as to return the largest possible window in the image that matches + the target aspect ratio. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. + By default, the layer will output floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., target_height, target_width, channels)`. + + If the input height/width is even and the target height/width is odd (or + inversely), the input image is left-padded by 1 pixel. + + Args: + height: Integer, the height of the output shape. + width: Integer, the width of the output shape. """ - return getattr(self, '_auto_vectorize', True) - @auto_vectorize.setter - def auto_vectorize(self, auto_vectorize): - self._auto_vectorize = auto_vectorize + def __init__(self, height, width, **kwargs): + self.height = height + self.width = width + super().__init__(**kwargs, autocast=False) + base_preprocessing_layer.keras_kpl_gauge.get_cell("CenterCrop").set( + True + ) + + def call(self, inputs): + inputs = convert_inputs(inputs, self.compute_dtype) + input_shape = tf.shape(inputs) + h_diff = input_shape[H_AXIS] - self.height + w_diff = input_shape[W_AXIS] - self.width + + def center_crop(): + h_start = tf.cast(h_diff / 2, tf.int32) + w_start = tf.cast(w_diff / 2, tf.int32) + return tf.image.crop_to_bounding_box( + inputs, h_start, w_start, self.height, self.width + ) + + def upsize(): + outputs = image_utils.smart_resize( + inputs, [self.height, self.width] + ) + # smart_resize will always output float32, so we need to re-cast. + return tf.cast(outputs, self.compute_dtype) + + return tf.cond( + tf.reduce_all((h_diff >= 0, w_diff >= 0)), center_crop, upsize + ) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + input_shape[H_AXIS] = self.height + input_shape[W_AXIS] = self.width + return tf.TensorShape(input_shape) + + def get_config(self): + config = { + "height": self.height, + "width": self.width, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export( + "keras.layers.RandomCrop", + "keras.layers.experimental.preprocessing.RandomCrop", + v1=[], +) +class RandomCrop(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly crops images during training. + + During training, this layer will randomly choose a location to crop images + down to a target size. The layer will crop all the images in the same batch + to the same cropping location. + + At inference time, and during training if an input image is smaller than the + target size, the input will be resized and cropped so as to return the + largest possible window in the image that matches the target aspect ratio. + If you need to apply random cropping at inference time, set `training` to + True when calling the layer. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. By default, the layer will output + floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., target_height, target_width, channels)`. - @property - def _map_fn(self): - if self.auto_vectorize: - return tf.vectorized_map - else: - return tf.map_fn + Args: + height: Integer, the height of the output shape. + width: Integer, the width of the output shape. + seed: Integer. Used to create a random seed. + """ + + def __init__(self, height, width, seed=None, **kwargs): + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomCrop").set( + True + ) + super().__init__( + **kwargs, autocast=False, seed=seed, force_generator=True + ) + self.height = height + self.width = width + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, dtype=self.compute_dtype) + input_shape = tf.shape(inputs) + h_diff = input_shape[H_AXIS] - self.height + w_diff = input_shape[W_AXIS] - self.width + + def random_crop(): + dtype = input_shape.dtype + rands = self._random_generator.random_uniform( + [2], 0, dtype.max, dtype + ) + h_start = rands[0] % (h_diff + 1) + w_start = rands[1] % (w_diff + 1) + return tf.image.crop_to_bounding_box( + inputs, h_start, w_start, self.height, self.width + ) + + def resize(): + outputs = image_utils.smart_resize( + inputs, [self.height, self.width] + ) + # smart_resize will always output float32, so we need to re-cast. + return tf.cast(outputs, self.compute_dtype) + + return tf.cond( + tf.reduce_all((training, h_diff >= 0, w_diff >= 0)), + random_crop, + resize, + ) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + input_shape[H_AXIS] = self.height + input_shape[W_AXIS] = self.width + return tf.TensorShape(input_shape) + + def get_config(self): + config = { + "height": self.height, + "width": self.width, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export( + "keras.layers.Rescaling", + "keras.layers.experimental.preprocessing.Rescaling", +) +class Rescaling(base_layer.Layer): + """A preprocessing layer which rescales input values to a new range. + + This layer rescales every value of an input (often an image) by multiplying + by `scale` and adding `offset`. + + For instance: + + 1. To rescale an input in the `[0, 255]` range + to be in the `[0, 1]` range, you would pass `scale=1./255`. + + 2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range, + you would pass `scale=1./127.5, offset=-1`. - @doc_controls.for_subclass_implementers - def augment_image(self, image, transformation): - """Augment a single image during training. + The rescaling is applied both during training and inference. Inputs can be + of integer or floating point dtype, and by default the layer will output + floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + Arbitrary. + + Output shape: + Same as input. Args: - image: 3D image input tensor to the layer. Forwarded from `layer.call()`. - transformation: The transformation object produced by - `get_random_transformation`. Used to coordinate the randomness between - image, label and bounding box. + scale: Float, the scale to apply to the inputs. + offset: Float, the offset to apply to the inputs. + """ - Returns: - output 3D tensor, which will be forward to `layer.call()`. + def __init__(self, scale, offset=0.0, **kwargs): + self.scale = scale + self.offset = offset + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("Rescaling").set(True) + + def call(self, inputs): + dtype = self.compute_dtype + inputs = convert_inputs(inputs, dtype=dtype) + scale = tf.cast(self.scale, dtype) + offset = tf.cast(self.offset, dtype) + return tf.cast(inputs, dtype) * scale + offset + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "scale": self.scale, + "offset": self.offset, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +HORIZONTAL = "horizontal" +VERTICAL = "vertical" +HORIZONTAL_AND_VERTICAL = "horizontal_and_vertical" + + +@keras_export( + "keras.layers.RandomFlip", + "keras.layers.experimental.preprocessing.RandomFlip", + v1=[], +) +class RandomFlip(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly flips images during training. + + This layer will flip the images horizontally and or vertically based on the + `mode` attribute. During inference time, the output will be identical to + input. Call the layer with `training=True` to flip the input. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. + By default, the layer will output floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Args: + mode: String indicating which flip mode to use. Can be `"horizontal"`, + `"vertical"`, or `"horizontal_and_vertical"`. `"horizontal"` is a + left-right flip and `"vertical"` is a top-bottom flip. Defaults to + `"horizontal_and_vertical"` + seed: Integer. Used to create a random seed. """ - raise NotImplementedError() - @doc_controls.for_subclass_implementers - def augment_label(self, label, transformation): - """Augment a single label during training. + def __init__(self, mode=HORIZONTAL_AND_VERTICAL, seed=None, **kwargs): + super().__init__(seed=seed, force_generator=True, **kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomFlip").set( + True + ) + self.mode = mode + if mode == HORIZONTAL: + self.horizontal = True + self.vertical = False + elif mode == VERTICAL: + self.horizontal = False + self.vertical = True + elif mode == HORIZONTAL_AND_VERTICAL: + self.horizontal = True + self.vertical = True + else: + raise ValueError( + f"RandomFlip layer {self.name} received an unknown mode " + f"argument {mode}" + ) + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, self.compute_dtype) + + def random_flipped_inputs(inputs): + flipped_outputs = inputs + if self.horizontal: + seed = self._random_generator.make_seed_for_stateless_op() + if seed is not None: + flipped_outputs = tf.image.stateless_random_flip_left_right( + flipped_outputs, seed=seed + ) + else: + flipped_outputs = tf.image.random_flip_left_right( + flipped_outputs, + self._random_generator.make_legacy_seed(), + ) + if self.vertical: + seed = self._random_generator.make_seed_for_stateless_op() + if seed is not None: + flipped_outputs = tf.image.stateless_random_flip_up_down( + flipped_outputs, seed=seed + ) + else: + flipped_outputs = tf.image.random_flip_up_down( + flipped_outputs, + self._random_generator.make_legacy_seed(), + ) + flipped_outputs.set_shape(inputs.shape) + return flipped_outputs + + if training: + return random_flipped_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "mode": self.mode, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +# TODO(tanzheny): Add examples, here and everywhere. +@keras_export( + "keras.layers.RandomTranslation", + "keras.layers.experimental.preprocessing.RandomTranslation", + v1=[], +) +class RandomTranslation(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly translates images during training. + + This layer will apply random translations to each image during training, + filling empty space according to `fill_mode`. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. By default, the layer will output + floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + height_factor: a float represented as fraction of value, or a tuple of + size 2 representing lower and upper bound for shifting vertically. A + negative value means shifting image up, while a positive value means + shifting image down. When represented as a single positive float, this + value is used for both the upper and lower bound. For instance, + `height_factor=(-0.2, 0.3)` results in an output shifted by a random + amount in the range `[-20%, +30%]`. `height_factor=0.2` results in an + output height shifted by a random amount in the range `[-20%, +20%]`. + width_factor: a float represented as fraction of value, or a tuple of size + 2 representing lower and upper bound for shifting horizontally. A + negative value means shifting image left, while a positive value means + shifting image right. When represented as a single positive float, + this value is used for both the upper and lower bound. For instance, + `width_factor=(-0.2, 0.3)` results in an output shifted left by 20%, + and shifted right by 30%. `width_factor=0.2` results + in an output height shifted left or right by 20%. + fill_mode: Points outside the boundaries of the input are filled according + to the given mode + (one of `{"constant", "reflect", "wrap", "nearest"}`). + - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by + reflecting about the edge of the last pixel. + - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by + filling all values beyond the edge with the same constant value + k = 0. + - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by + wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by + the nearest pixel. + interpolation: Interpolation mode. Supported values: `"nearest"`, + `"bilinear"`. + seed: Integer. Used to create a random seed. + fill_value: a float represents the value to be filled outside the + boundaries when `fill_mode="constant"`. + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + """ + + def __init__( + self, + height_factor, + width_factor, + fill_mode="reflect", + interpolation="bilinear", + seed=None, + fill_value=0.0, + **kwargs, + ): + base_preprocessing_layer.keras_kpl_gauge.get_cell( + "RandomTranslation" + ).set(True) + super().__init__(seed=seed, force_generator=True, **kwargs) + self.height_factor = height_factor + if isinstance(height_factor, (tuple, list)): + self.height_lower = height_factor[0] + self.height_upper = height_factor[1] + else: + self.height_lower = -height_factor + self.height_upper = height_factor + if self.height_upper < self.height_lower: + raise ValueError( + "`height_factor` cannot have upper bound less than " + f"lower bound, got {height_factor}" + ) + if abs(self.height_lower) > 1.0 or abs(self.height_upper) > 1.0: + raise ValueError( + "`height_factor` argument must have values between [-1, 1]. " + f"Received: height_factor={height_factor}" + ) + + self.width_factor = width_factor + if isinstance(width_factor, (tuple, list)): + self.width_lower = width_factor[0] + self.width_upper = width_factor[1] + else: + self.width_lower = -width_factor + self.width_upper = width_factor + if self.width_upper < self.width_lower: + raise ValueError( + "`width_factor` cannot have upper bound less than " + f"lower bound, got {width_factor}" + ) + if abs(self.width_lower) > 1.0 or abs(self.width_upper) > 1.0: + raise ValueError( + "`width_factor` must have values between [-1, 1], " + f"got {width_factor}" + ) + + check_fill_mode_and_interpolation(fill_mode, interpolation) + + self.fill_mode = fill_mode + self.fill_value = fill_value + self.interpolation = interpolation + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, self.compute_dtype) + + def random_translated_inputs(inputs): + """Translated inputs with random ops.""" + # The transform op only accepts rank 4 inputs, + # so if we have an unbatched image, + # we need to temporarily expand dims to a batch. + original_shape = inputs.shape + unbatched = inputs.shape.rank == 3 + if unbatched: + inputs = tf.expand_dims(inputs, 0) + + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32) + img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32) + height_translate = self._random_generator.random_uniform( + shape=[batch_size, 1], + minval=self.height_lower, + maxval=self.height_upper, + dtype=tf.float32, + ) + height_translate = height_translate * img_hd + width_translate = self._random_generator.random_uniform( + shape=[batch_size, 1], + minval=self.width_lower, + maxval=self.width_upper, + dtype=tf.float32, + ) + width_translate = width_translate * img_wd + translations = tf.cast( + tf.concat([width_translate, height_translate], axis=1), + dtype=tf.float32, + ) + output = transform( + inputs, + get_translation_matrix(translations), + interpolation=self.interpolation, + fill_mode=self.fill_mode, + fill_value=self.fill_value, + ) + if unbatched: + output = tf.squeeze(output, 0) + output.set_shape(original_shape) + return output + + if training: + return random_translated_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def get_translation_matrix(translations, name=None): + """Returns projective transform(s) for the given translation(s). Args: - label: 1D label to the layer. Forwarded from `layer.call()`. - transformation: The transformation object produced by - `get_random_transformation`. Used to coordinate the randomness between - image, label and bounding box. + translations: A matrix of 2-element lists representing `[dx, dy]` + to translate for each image (for a batch of images). + name: The name of the op. Returns: - output 1D tensor, which will be forward to `layer.call()`. + A tensor of shape `(num_images, 8)` projective transforms + which can be given to `transform`. """ - raise NotImplementedError() - - @doc_controls.for_subclass_implementers - def augment_target(self, target, transformation): - """Augment a single target during training. + with backend.name_scope(name or "translation_matrix"): + num_translations = tf.shape(translations)[0] + # The translation matrix looks like: + # [[1 0 -dx] + # [0 1 -dy] + # [0 0 1]] + # where the last entry is implicit. + # Translation matrices are always float32. + return tf.concat( + values=[ + tf.ones((num_translations, 1), tf.float32), + tf.zeros((num_translations, 1), tf.float32), + -translations[:, 0, None], + tf.zeros((num_translations, 1), tf.float32), + tf.ones((num_translations, 1), tf.float32), + -translations[:, 1, None], + tf.zeros((num_translations, 2), tf.float32), + ], + axis=1, + ) + + +def transform( + images, + transforms, + fill_mode="reflect", + fill_value=0.0, + interpolation="bilinear", + output_shape=None, + name=None, +): + """Applies the given transform(s) to the image(s). Args: - target: 1D label to the layer. Forwarded from `layer.call()`. - transformation: The transformation object produced by - `get_random_transformation`. Used to coordinate the randomness between - image, label and bounding box. + images: A tensor of shape + `(num_images, num_rows, num_columns, num_channels)` (NHWC). + The rank must be statically known + (the shape is not `TensorShape(None)`). + transforms: Projective transform matrix/matrices. + A vector of length 8 or tensor of size N x 8. + If one row of transforms is [a0, a1, a2, b0, b1, b2, + c0, c1], then it maps the *output* point `(x, y)` + to a transformed *input* point + `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where + `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the + transform mapping input points to output points. + Note that gradients are not backpropagated + into transformation parameters. + fill_mode: Points outside the boundaries of the input are filled + according to the given mode + (one of `{"constant", "reflect", "wrap", "nearest"}`). + fill_value: a float represents the value to be filled outside + the boundaries when `fill_mode="constant"`. + interpolation: Interpolation mode. Supported values: `"nearest"`, + `"bilinear"`. + output_shape: Output dimension after the transform, `[height, width]`. + If `None`, output is the same size as input image. + name: The name of the op. + + Fill mode behavior for each valid value is as follows: + + - `"reflect"`: `(d c b a | a b c d | d c b a)` + The input is extended by reflecting about the edge of the last pixel. + + - `"constant"`: `(k k k k | a b c d | k k k k)` + The input is extended by filling all + values beyond the edge with the same constant value k = 0. + + - `"wrap"`: `(a b c d | a b c d | a b c d)` + The input is extended by wrapping around to the opposite edge. + + - `"nearest"`: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. + + Input shape: + 4D tensor with shape: `(samples, height, width, channels)`, + in `"channels_last"` format. + + Output shape: + 4D tensor with shape: `(samples, height, width, channels)`, + in `"channels_last"` format. Returns: - output 1D tensor, which will be forward to `layer.call()`. + Image(s) with the same type and shape as `images`, with the given + transform(s) applied. Transformed coordinates outside of the input image + will be filled with zeros. """ - return self.augment_label(target, transformation) + with backend.name_scope(name or "transform"): + if output_shape is None: + output_shape = tf.shape(images)[1:3] + if not tf.executing_eagerly(): + output_shape_value = tf.get_static_value(output_shape) + if output_shape_value is not None: + output_shape = output_shape_value + + output_shape = tf.convert_to_tensor( + output_shape, tf.int32, name="output_shape" + ) + + if not output_shape.get_shape().is_compatible_with([2]): + raise ValueError( + "output_shape must be a 1-D Tensor of 2 elements: " + "new_height, new_width, instead got " + f"output_shape={output_shape}" + ) + + fill_value = tf.convert_to_tensor( + fill_value, tf.float32, name="fill_value" + ) + + return tf.raw_ops.ImageProjectiveTransformV3( + images=images, + output_shape=output_shape, + fill_value=fill_value, + transforms=transforms, + fill_mode=fill_mode.upper(), + interpolation=interpolation.upper(), + ) - @doc_controls.for_subclass_implementers - def augment_bounding_boxes(self, image, bounding_boxes, transformation=None): - """Augment bounding boxes for one image during training. + +def get_rotation_matrix(angles, image_height, image_width, name=None): + """Returns projective transform(s) for the given angle(s). Args: - image: 3D image input tensor to the layer. Forwarded from `layer.call()`. - bounding_boxes: 2D bounding boxes to the layer. Forwarded from `call()`. - transformation: The transformation object produced by - `get_random_transformation`. Used to coordinate the randomness between - image, label and bounding box. + angles: A scalar angle to rotate all images by, + or (for batches of images) a vector with an angle to + rotate each image in the batch. The rank must be + statically known (the shape is not `TensorShape(None)`). + image_height: Height of the image(s) to be transformed. + image_width: Width of the image(s) to be transformed. + name: The name of the op. Returns: - output 2D tensor, which will be forward to `layer.call()`. + A tensor of shape (num_images, 8). + Projective transforms which can be given + to operation `image_projective_transform_v2`. + If one row of transforms is + [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point + `(x, y)` to a transformed *input* point + `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, + where `k = c0 x + c1 y + 1`. + """ + with backend.name_scope(name or "rotation_matrix"): + x_offset = ( + (image_width - 1) + - ( + tf.cos(angles) * (image_width - 1) + - tf.sin(angles) * (image_height - 1) + ) + ) / 2.0 + y_offset = ( + (image_height - 1) + - ( + tf.sin(angles) * (image_width - 1) + + tf.cos(angles) * (image_height - 1) + ) + ) / 2.0 + num_angles = tf.shape(angles)[0] + return tf.concat( + values=[ + tf.cos(angles)[:, None], + -tf.sin(angles)[:, None], + x_offset[:, None], + tf.sin(angles)[:, None], + tf.cos(angles)[:, None], + y_offset[:, None], + tf.zeros((num_angles, 2), tf.float32), + ], + axis=1, + ) + + +@keras_export( + "keras.layers.RandomRotation", + "keras.layers.experimental.preprocessing.RandomRotation", + v1=[], +) +class RandomRotation(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly rotates images during training. + + This layer will apply random rotations to each image, filling empty space + according to `fill_mode`. + + By default, random rotations are only applied during training. + At inference time, the layer does nothing. If you need to apply random + rotations at inference time, set `training` to True when calling the layer. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. + By default, the layer will output floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + + Args: + factor: a float represented as fraction of 2 Pi, or a tuple of size 2 + representing lower and upper bound for rotating clockwise and + counter-clockwise. A positive values means rotating + counter clock-wise, + while a negative value means clock-wise. + When represented as a single + float, this value is used for both the upper and lower bound. + For instance, `factor=(-0.2, 0.3)` + results in an output rotation by a random + amount in the range `[-20% * 2pi, 30% * 2pi]`. + `factor=0.2` results in an + output rotating by a random amount + in the range `[-20% * 2pi, 20% * 2pi]`. + fill_mode: Points outside the boundaries of the input are filled + according to the given mode + (one of `{"constant", "reflect", "wrap", "nearest"}`). + - *reflect*: `(d c b a | a b c d | d c b a)` + The input is extended by reflecting about + the edge of the last pixel. + - *constant*: `(k k k k | a b c d | k k k k)` + The input is extended by + filling all values beyond the edge with + the same constant value k = 0. + - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by + wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. + interpolation: Interpolation mode. Supported values: `"nearest"`, + `"bilinear"`. + seed: Integer. Used to create a random seed. + fill_value: a float represents the value to be filled outside + the boundaries when `fill_mode="constant"`. """ - raise NotImplementedError() - @doc_controls.for_subclass_implementers - def get_random_transformation( - self, image=None, label=None, bounding_box=None): - """Produce random transformation config for one single input. + def __init__( + self, + factor, + fill_mode="reflect", + interpolation="bilinear", + seed=None, + fill_value=0.0, + **kwargs, + ): + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomRotation").set( + True + ) + super().__init__(seed=seed, force_generator=True, **kwargs) + self.factor = factor + if isinstance(factor, (tuple, list)): + self.lower = factor[0] + self.upper = factor[1] + else: + self.lower = -factor + self.upper = factor + if self.upper < self.lower: + raise ValueError( + "`factor` argument cannot have a negative value. " + f"Received: factor={factor}" + ) + check_fill_mode_and_interpolation(fill_mode, interpolation) + self.fill_mode = fill_mode + self.fill_value = fill_value + self.interpolation = interpolation + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, self.compute_dtype) + + def random_rotated_inputs(inputs): + """Rotated inputs with random ops.""" + original_shape = inputs.shape + unbatched = inputs.shape.rank == 3 + # The transform op only accepts rank 4 inputs, + # so if we have an unbatched image, + # we need to temporarily expand dims to a batch. + if unbatched: + inputs = tf.expand_dims(inputs, 0) + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32) + img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32) + min_angle = self.lower * 2.0 * np.pi + max_angle = self.upper * 2.0 * np.pi + angles = self._random_generator.random_uniform( + shape=[batch_size], minval=min_angle, maxval=max_angle + ) + output = transform( + inputs, + get_rotation_matrix(angles, img_hd, img_wd), + fill_mode=self.fill_mode, + fill_value=self.fill_value, + interpolation=self.interpolation, + ) + if unbatched: + output = tf.squeeze(output, 0) + output.set_shape(original_shape) + return output + + if training: + return random_rotated_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "factor": self.factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export( + "keras.layers.RandomZoom", + "keras.layers.experimental.preprocessing.RandomZoom", + v1=[], +) +class RandomZoom(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly zooms images during training. + + This layer will randomly zoom in or out on each axis of an image + independently, filling empty space according to `fill_mode`. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. + By default, the layer will output floats. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - This is used to produce same randomness between image/label/bounding_box. + Args: + height_factor: a float represented as fraction of value, + or a tuple of size 2 representing lower and upper bound + for zooming vertically. When represented as a single float, + this value is used for both the upper and + lower bound. A positive value means zooming out, + while a negative value + means zooming in. For instance, `height_factor=(0.2, 0.3)` + result in an output zoomed out by a random amount + in the range `[+20%, +30%]`. + `height_factor=(-0.3, -0.2)` result in an output zoomed + in by a random amount in the range `[+20%, +30%]`. + width_factor: a float represented as fraction of value, + or a tuple of size 2 representing lower and upper bound + for zooming horizontally. When + represented as a single float, this value is used + for both the upper and + lower bound. For instance, `width_factor=(0.2, 0.3)` + result in an output + zooming out between 20% to 30%. + `width_factor=(-0.3, -0.2)` result in an + output zooming in between 20% to 30%. `None` means + i.e., zooming vertical and horizontal directions + by preserving the aspect ratio. Defaults to `None`. + fill_mode: Points outside the boundaries of the input are + filled according to the given mode + (one of `{"constant", "reflect", "wrap", "nearest"}`). + - *reflect*: `(d c b a | a b c d | d c b a)` + The input is extended by reflecting about + the edge of the last pixel. + - *constant*: `(k k k k | a b c d | k k k k)` + The input is extended by filling all values beyond + the edge with the same constant value k = 0. + - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by + wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. + interpolation: Interpolation mode. Supported values: `"nearest"`, + `"bilinear"`. + seed: Integer. Used to create a random seed. + fill_value: a float represents the value to be filled outside + the boundaries when `fill_mode="constant"`. + + Example: + + >>> input_img = np.random.random((32, 224, 224, 3)) + >>> layer = tf.keras.layers.RandomZoom(.5, .2) + >>> out_img = layer(input_img) + >>> out_img.shape + TensorShape([32, 224, 224, 3]) + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + """ + + def __init__( + self, + height_factor, + width_factor=None, + fill_mode="reflect", + interpolation="bilinear", + seed=None, + fill_value=0.0, + **kwargs, + ): + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomZoom").set( + True + ) + super().__init__(seed=seed, force_generator=True, **kwargs) + self.height_factor = height_factor + if isinstance(height_factor, (tuple, list)): + self.height_lower = height_factor[0] + self.height_upper = height_factor[1] + else: + self.height_lower = -height_factor + self.height_upper = height_factor + + if abs(self.height_lower) > 1.0 or abs(self.height_upper) > 1.0: + raise ValueError( + "`height_factor` argument must have values between [-1, 1]. " + f"Received: height_factor={height_factor}" + ) + + self.width_factor = width_factor + if width_factor is not None: + if isinstance(width_factor, (tuple, list)): + self.width_lower = width_factor[0] + self.width_upper = width_factor[1] + else: + self.width_lower = -width_factor + self.width_upper = width_factor + + if self.width_lower < -1.0 or self.width_upper < -1.0: + raise ValueError( + "`width_factor` argument must have values larger than -1. " + f"Received: width_factor={width_factor}" + ) + + check_fill_mode_and_interpolation(fill_mode, interpolation) + + self.fill_mode = fill_mode + self.fill_value = fill_value + self.interpolation = interpolation + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, self.compute_dtype) + + def random_zoomed_inputs(inputs): + """Zoomed inputs with random ops.""" + original_shape = inputs.shape + unbatched = inputs.shape.rank == 3 + # The transform op only accepts rank 4 inputs, + # so if we have an unbatched image, + # we need to temporarily expand dims to a batch. + if unbatched: + inputs = tf.expand_dims(inputs, 0) + inputs_shape = tf.shape(inputs) + batch_size = inputs_shape[0] + img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32) + img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32) + height_zoom = self._random_generator.random_uniform( + shape=[batch_size, 1], + minval=1.0 + self.height_lower, + maxval=1.0 + self.height_upper, + ) + if self.width_factor is not None: + width_zoom = self._random_generator.random_uniform( + shape=[batch_size, 1], + minval=1.0 + self.width_lower, + maxval=1.0 + self.width_upper, + ) + else: + width_zoom = height_zoom + zooms = tf.cast( + tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32 + ) + output = transform( + inputs, + get_zoom_matrix(zooms, img_hd, img_wd), + fill_mode=self.fill_mode, + fill_value=self.fill_value, + interpolation=self.interpolation, + ) + if unbatched: + output = tf.squeeze(output, 0) + output.set_shape(original_shape) + return output + + if training: + return random_zoomed_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def get_zoom_matrix(zooms, image_height, image_width, name=None): + """Returns projective transform(s) for the given zoom(s). Args: - image: 3D image tensor from inputs. - label: optional 1D label tensor from inputs. - bounding_box: optional 2D bounding boxes tensor from inputs. + zooms: A matrix of 2-element lists representing `[zx, zy]` + to zoom for each image (for a batch of images). + image_height: Height of the image(s) to be transformed. + image_width: Width of the image(s) to be transformed. + name: The name of the op. Returns: - Any type of object, which will be forwarded to `augment_image`, - `augment_label` and `augment_bounding_box` as the `transformation` - parameter. + A tensor of shape `(num_images, 8)`. Projective transforms which can be + given to operation `image_projective_transform_v2`. + If one row of transforms is + `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the *output* point + `(x, y)` to a transformed *input* point + `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, + where `k = c0 x + c1 y + 1`. """ - return None - - def call(self, inputs, training=True): - inputs = self._ensure_inputs_are_compute_dtype(inputs) - if training: - inputs, is_dict, use_targets = self._format_inputs(inputs) - images = inputs[IMAGES] - if images.shape.rank == 3: - return self._format_output(self._augment(inputs), is_dict, use_targets) - elif images.shape.rank == 4: - return self._format_output(self._batch_augment(inputs), is_dict, use_targets) - else: - raise ValueError('Image augmentation layers are expecting inputs to be ' - 'rank 3 (HWC) or 4D (NHWC) tensors. Got shape: ' - f'{images.shape}') - else: - return inputs - - def _augment(self, inputs): - image = inputs.get(IMAGES, None) - label = inputs.get(LABELS, None) - bounding_box = inputs.get(BOUNDING_BOXES, None) - transformation = self.get_random_transformation( - image=image, label=label, bounding_box=bounding_box) # pylint: disable=assignment-from-none - image = self.augment_image(image, transformation=transformation) - result = {IMAGES: image} - if label is not None: - label = self.augment_target(label, transformation=transformation) - result[LABELS] = label - if bounding_box is not None: - bounding_box = self.augment_bounding_boxes( - image, bounding_box, transformation=transformation) - result[BOUNDING_BOXES] = bounding_box - return result - - def _batch_augment(self, inputs): - return self._map_fn(self._augment, inputs) - - def _format_inputs(self, inputs): - if tf.is_tensor(inputs): - # single image input tensor - return {IMAGES: inputs}, False, False - elif isinstance(inputs, dict) and TARGETS in inputs: - # TODO(scottzhu): Check if it only contains the valid keys - inputs[LABELS] = inputs[TARGETS] - del inputs[TARGETS] - return inputs, True, True - elif isinstance(inputs, dict): - return inputs, True, False - else: - raise ValueError( - f'Expect the inputs to be image tensor or dict. Got {inputs}') - - def _format_output(self, output, is_dict, use_targets): - if not is_dict: - return output[IMAGES] - elif use_targets: - output[TARGETS] = output[LABELS] - del output[LABELS] - return output - else: - return output - - def _ensure_inputs_are_compute_dtype(self, inputs): - if isinstance(inputs, dict): - inputs[IMAGES] = utils.ensure_tensor(inputs[IMAGES], - self.compute_dtype) - else: - inputs = utils.ensure_tensor(inputs, self.compute_dtype) - return inputs + with backend.name_scope(name or "zoom_matrix"): + num_zooms = tf.shape(zooms)[0] + # The zoom matrix looks like: + # [[zx 0 0] + # [0 zy 0] + # [0 0 1]] + # where the last entry is implicit. + # Zoom matrices are always float32. + x_offset = ((image_width - 1.0) / 2.0) * (1.0 - zooms[:, 0, None]) + y_offset = ((image_height - 1.0) / 2.0) * (1.0 - zooms[:, 1, None]) + return tf.concat( + values=[ + zooms[:, 0, None], + tf.zeros((num_zooms, 1), tf.float32), + x_offset, + tf.zeros((num_zooms, 1), tf.float32), + zooms[:, 1, None], + y_offset, + tf.zeros((num_zooms, 2), tf.float32), + ], + axis=1, + ) + + +@keras_export( + "keras.layers.RandomContrast", + "keras.layers.experimental.preprocessing.RandomContrast", + v1=[], +) +class RandomContrast(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly adjusts contrast during training. + + This layer will randomly adjust the contrast of an image or images + by a random factor. Contrast is adjusted independently + for each channel of each image during training. + + For each channel, this layer computes the mean of the image pixels in the + channel and then adjusts each component `x` of each pixel to + `(x - mean) * contrast_factor + mean`. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + in integer or floating point dtype. + By default, the layer will output floats. + The output value will be clipped to the range `[0, 255]`, the valid + range of RGB colors. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + Args: + factor: a positive float represented as fraction of value, or a tuple of + size 2 representing lower and upper bound. + When represented as a single float, lower = upper. + The contrast factor will be randomly picked between + `[1.0 - lower, 1.0 + upper]`. For any pixel x in the channel, + the output will be `(x - mean) * factor + mean` + where `mean` is the mean value of the channel. + seed: Integer. Used to create a random seed. + """ -@keras_export('keras.layers.RandomCrop', - 'keras.layers.experimental.preprocessing.RandomCrop', - v1=[]) -class RandomCrop(BaseImageAugmentationLayer): - """A preprocessing layer which randomly crops images during training. - - During training, this layer will randomly choose a location to crop images - down to a target size. The layer will crop all the images in the same batch to - the same cropping location. - - At inference time, and during training if an input image is smaller than the - target size, the input will be resized and cropped so as to return the largest - possible window in the image that matches the target aspect ratio. If you need - to apply random cropping at inference time, set `training` to True when - calling the layer. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - of interger or floating point dtype. By default, the layer will output floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., target_height, target_width, channels)`. - - Args: - height: Integer, the height of the output shape. - width: Integer, the width of the output shape. - seed: Integer. Used to create a random seed. - """ - - def __init__(self, height, width, seed=None, **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomCrop').set(True) - super().__init__(**kwargs, autocast=False, seed=seed, - force_generator=True) - self.height = height - self.width = width - self.seed = seed - - def call(self, inputs, training=True): - - if training: - return super().call(inputs, training) - else: - inputs = self._ensure_inputs_are_compute_dtype(inputs) - inputs, is_dict, targets = self._format_inputs(inputs) - output = inputs - # self._resize() returns valid results for both batched and unbatched - output['images'] = self._resize(inputs['images']) - return self._format_output(output, is_dict, targets) - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - input_shape = tf.shape(image) - h_diff = input_shape[H_AXIS] - self.height - w_diff = input_shape[W_AXIS] - self.width - dtype = input_shape.dtype - rands = self._random_generator.random_uniform([2], 0, dtype.max, dtype) - h_start = rands[0] % (h_diff + 1) - w_start = rands[1] % (w_diff + 1) - return {'top': h_start, 'left': w_start} - - def augment_image(self, image, transformation): - input_shape = tf.shape(image) - h_diff = input_shape[H_AXIS] - self.height - w_diff = input_shape[W_AXIS] - self.width - return tf.cond( - tf.reduce_all((h_diff >= 0, w_diff >= 0)), - lambda: self._crop(image, transformation), lambda: self._resize(image)) - - def _crop(self, image, transformation): - top = transformation['top'] - left = transformation['left'] - return tf.image.crop_to_bounding_box(image, top, left, self.height, - self.width) - - def _resize(self, image): - outputs = image_utils.smart_resize(image, [self.height, self.width]) - # smart_resize will always output float32, so we need to re-cast. - return tf.cast(outputs, self.compute_dtype) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - input_shape[H_AXIS] = self.height - input_shape[W_AXIS] = self.width - return tf.TensorShape(input_shape) - - def get_config(self): - config = { - 'height': self.height, - 'width': self.width, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.Rescaling', - 'keras.layers.experimental.preprocessing.Rescaling') -class Rescaling(base_layer.Layer): - """A preprocessing layer which rescales input values to a new range. - - This layer rescales every value of an input (often an image) by multiplying by - `scale` and adding `offset`. - - For instance: - - 1. To rescale an input in the ``[0, 255]`` range - to be in the `[0, 1]` range, you would pass `scale=1./255`. - - 2. To rescale an input in the ``[0, 255]`` range to be in the `[-1, 1]` range, - you would pass `scale=1./127.5, offset=-1`. - - The rescaling is applied both during training and inference. Inputs can be - of integer or floating point dtype, and by default the layer will output - floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - Arbitrary. - - Output shape: - Same as input. - - Args: - scale: Float, the scale to apply to the inputs. - offset: Float, the offset to apply to the inputs. - """ - - def __init__(self, scale, offset=0., **kwargs): - self.scale = scale - self.offset = offset - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell('Rescaling').set(True) - - def call(self, inputs): - dtype = self.compute_dtype - scale = tf.cast(self.scale, dtype) - offset = tf.cast(self.offset, dtype) - return tf.cast(inputs, dtype) * scale + offset - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'scale': self.scale, - 'offset': self.offset, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -HORIZONTAL = 'horizontal' -VERTICAL = 'vertical' -HORIZONTAL_AND_VERTICAL = 'horizontal_and_vertical' - - -@keras_export('keras.layers.RandomFlip', - 'keras.layers.experimental.preprocessing.RandomFlip', - v1=[]) -class RandomFlip(BaseImageAugmentationLayer): - """A preprocessing layer which randomly flips images during training. - - This layer will flip the images horizontally and or vertically based on the - `mode` attribute. During inference time, the output will be identical to - input. Call the layer with `training=True` to flip the input. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - of interger or floating point dtype. By default, the layer will output floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Arguments: - mode: String indicating which flip mode to use. Can be `"horizontal"`, - `"vertical"`, or `"horizontal_and_vertical"`. Defaults to - `"horizontal_and_vertical"`. `"horizontal"` is a left-right flip and - `"vertical"` is a top-bottom flip. - seed: Integer. Used to create a random seed. - """ - - def __init__(self, - mode=HORIZONTAL_AND_VERTICAL, - seed=None, - **kwargs): - super().__init__(seed=seed, force_generator=True, **kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomFlip').set(True) - self.mode = mode - if mode == HORIZONTAL: - self.horizontal = True - self.vertical = False - elif mode == VERTICAL: - self.horizontal = False - self.vertical = True - elif mode == HORIZONTAL_AND_VERTICAL: - self.horizontal = True - self.vertical = True - else: - raise ValueError('RandomFlip layer {name} received an unknown mode ' - 'argument {arg}'.format(name=self.name, arg=mode)) - self.auto_vectorize = False - - def augment_label(self, label, transformation): - return label - - def augment_image(self, image, transformation): - flipped_outputs = image - if self.horizontal and transformation['flip_horizontal']: - flipped_outputs = tf.image.flip_left_right(flipped_outputs) - if self.vertical and transformation['flip_vertical']: - flipped_outputs = tf.image.flip_up_down(flipped_outputs) - flipped_outputs.set_shape(image.shape) - return flipped_outputs - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - flip_horizontal = False - flip_vertical = False - if self.horizontal: - flip_horizontal = np.random.choice([True, False]) - if self.vertical: - flip_vertical = np.random.choice([True, False]) - return {'flip_horizontal': flip_horizontal, 'flip_vertical': flip_vertical} - - def augment_bounding_boxes(self, image, bounding_boxes, transformation=None): - transformation = transformation or self.get_random_transformation() - image = tf.expand_dims(image, 0) - image_shape = tf.shape(image) - h = image_shape[H_AXIS] - w = image_shape[W_AXIS] - bboxes_out = tf.identity(bounding_boxes) - if transformation['flip_horizontal']: - bboxes_out = tf.stack([ - w - bboxes_out[:, 2], bboxes_out[:, 1], w - bboxes_out[:, 0], - bboxes_out[:, 3] - ], - axis=-1) - if transformation['flip_vertical']: - bboxes_out = tf.stack([ - bboxes_out[:, 0], h - bboxes_out[:, 3], bboxes_out[:, 2], - h - bboxes_out[:, 1] - ], - axis=-1) - return bboxes_out - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'mode': self.mode, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def __init__(self, factor, seed=None, **kwargs): + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomContrast").set( + True + ) + super().__init__(seed=seed, force_generator=True, **kwargs) + self.factor = factor + if isinstance(factor, (tuple, list)): + self.lower = factor[0] + self.upper = factor[1] + else: + self.lower = self.upper = factor + if self.lower < 0.0 or self.upper < 0.0 or self.lower > 1.0: + raise ValueError( + "`factor` argument cannot have negative values or values " + "greater than 1." + f"Received: factor={factor}" + ) + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, self.compute_dtype) + + def random_contrasted_inputs(inputs): + seed = self._random_generator.make_seed_for_stateless_op() + if seed is not None: + output = tf.image.stateless_random_contrast( + inputs, 1.0 - self.lower, 1.0 + self.upper, seed=seed + ) + else: + output = tf.image.random_contrast( + inputs, + 1.0 - self.lower, + 1.0 + self.upper, + seed=self._random_generator.make_legacy_seed(), + ) + output = tf.clip_by_value(output, 0, 255) + output.set_shape(inputs.shape) + return output + + if training: + return random_contrasted_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "factor": self.factor, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.layers.RandomBrightness", v1=[]) +class RandomBrightness(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly adjusts brightness during training. + + This layer will randomly increase/reduce the brightness for the input RGB + images. At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the brightness of the input. + + Note that different brightness adjustment factors + will be apply to each the images in the batch. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + Args: + factor: Float or a list/tuple of 2 floats between -1.0 and 1.0. The + factor is used to determine the lower bound and upper bound of the + brightness adjustment. A float value will be chosen randomly between + the limits. When -1.0 is chosen, the output image will be black, and + when 1.0 is chosen, the image will be fully white. + When only one float is provided, eg, 0.2, + then -0.2 will be used for lower bound and 0.2 + will be used for upper bound. + value_range: Optional list/tuple of 2 floats + for the lower and upper limit + of the values of the input data. + To make no change, use [0.0, 1.0], e.g., if the image input + has been scaled before this layer. Defaults to [0.0, 255.0]. + The brightness adjustment will be scaled to this range, and the + output values will be clipped to this range. + seed: optional integer, for fixed RNG behavior. + + Inputs: 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel + values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) + + Output: 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the + `factor`. By default, the layer will output floats. + The output value will be clipped to the range `[0, 255]`, + the valid range of RGB colors, and + rescaled based on the `value_range` if needed. + + Sample usage: -# TODO(tanzheny): Add examples, here and everywhere. -@keras_export('keras.layers.RandomTranslation', - 'keras.layers.experimental.preprocessing.RandomTranslation', - v1=[]) -class RandomTranslation(BaseImageAugmentationLayer): - """A preprocessing layer which randomly translates images during training. - - This layer will apply random translations to each image during training, - filling empty space according to `fill_mode`. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - of interger or floating point dtype. By default, the layer will output floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - height_factor: a float represented as fraction of value, or a tuple of size - 2 representing lower and upper bound for shifting vertically. A negative - value means shifting image up, while a positive value means shifting image - down. When represented as a single positive float, this value is used for - both the upper and lower bound. For instance, `height_factor=(-0.2, 0.3)` - results in an output shifted by a random amount in the range - `[-20%, +30%]`. - `height_factor=0.2` results in an output height shifted by a random amount - in the range `[-20%, +20%]`. - width_factor: a float represented as fraction of value, or a tuple of size 2 - representing lower and upper bound for shifting horizontally. A negative - value means shifting image left, while a positive value means shifting - image right. When represented as a single positive float, this value is - used for both the upper and lower bound. For instance, - `width_factor=(-0.2, 0.3)` results in an output shifted left by 20%, and - shifted right by 30%. `width_factor=0.2` results in an output height - shifted left or right by 20%. - fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). - - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by - reflecting about the edge of the last pixel. - - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by - filling all values beyond the edge with the same constant value k = 0. - - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by - wrapping around to the opposite edge. - - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the - nearest pixel. - interpolation: Interpolation mode. Supported values: `"nearest"`, - `"bilinear"`. - seed: Integer. Used to create a random seed. - fill_value: a float represents the value to be filled outside the boundaries - when `fill_mode="constant"`. - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - """ - - def __init__(self, - height_factor, - width_factor, - fill_mode='reflect', - interpolation='bilinear', - seed=None, - fill_value=0.0, - **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomTranslation').set( - True) - super().__init__(seed=seed, force_generator=True, - **kwargs) - self.height_factor = height_factor - if isinstance(height_factor, (tuple, list)): - self.height_lower = height_factor[0] - self.height_upper = height_factor[1] - else: - self.height_lower = -height_factor - self.height_upper = height_factor - if self.height_upper < self.height_lower: - raise ValueError('`height_factor` cannot have upper bound less than ' - 'lower bound, got {}'.format(height_factor)) - if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.: - raise ValueError('`height_factor` must have values between [-1, 1], ' - 'got {}'.format(height_factor)) - - self.width_factor = width_factor - if isinstance(width_factor, (tuple, list)): - self.width_lower = width_factor[0] - self.width_upper = width_factor[1] - else: - self.width_lower = -width_factor - self.width_upper = width_factor - if self.width_upper < self.width_lower: - raise ValueError('`width_factor` cannot have upper bound less than ' - 'lower bound, got {}'.format(width_factor)) - if abs(self.width_lower) > 1. or abs(self.width_upper) > 1.: - raise ValueError('`width_factor` must have values between [-1, 1], ' - 'got {}'.format(width_factor)) - - check_fill_mode_and_interpolation(fill_mode, interpolation) - - self.fill_mode = fill_mode - self.fill_value = fill_value - self.interpolation = interpolation - self.seed = seed - - @tf.function - def augment_image(self, image, transformation): - """Translated inputs with random ops.""" - # The transform op only accepts rank 4 inputs, so if we have an unbatched - # image, we need to temporarily expand dims to a batch. - original_shape = image.shape - inputs = tf.expand_dims(image, 0) - - inputs_shape = tf.shape(inputs) - img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32) - img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32) - height_translation = transformation['height_translation'] - width_translation = transformation['width_translation'] - height_translation = height_translation * img_hd - width_translation = width_translation * img_wd - translations = tf.cast( - tf.concat([width_translation, height_translation], axis=1), - dtype=tf.float32) - output = transform( - inputs, - get_translation_matrix(translations), - interpolation=self.interpolation, - fill_mode=self.fill_mode, - fill_value=self.fill_value) - - output = tf.squeeze(output, 0) - output.set_shape(original_shape) - return output - - def get_random_transformation( - self, image=None, label=None, bounding_box=None): - del image, label, bounding_box - batch_size = 1 - height_translation = self._random_generator.random_uniform( - shape=[batch_size, 1], - minval=self.height_lower, - maxval=self.height_upper, - dtype=tf.float32) - width_translation = self._random_generator.random_uniform( - shape=[batch_size, 1], - minval=self.width_lower, - maxval=self.width_upper, - dtype=tf.float32) - return {'height_translation': height_translation, - 'width_translation': width_translation} - - def _batch_augment(self, inputs): - # Change to vectorized_map for better performance, as well as work around - # issue for different tensorspec between inputs and outputs. - return tf.vectorized_map(self._augment, inputs) - - def augment_label(self, label, transformation): - return label - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'height_factor': self.height_factor, - 'width_factor': self.width_factor, - 'fill_mode': self.fill_mode, - 'fill_value': self.fill_value, - 'interpolation': self.interpolation, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + ```python + random_bright = tf.keras.layers.RandomBrightness(factor=0.2) + # An image with shape [2, 2, 3] + image = [[[1, 2, 3], [4 ,5 ,6]], [[7, 8, 9], [10, 11, 12]]] -def get_translation_matrix(translations, name=None): - """Returns projective transform(s) for the given translation(s). - - Args: - translations: A matrix of 2-element lists representing `[dx, dy]` - to translate for each image (for a batch of images). - name: The name of the op. - - Returns: - A tensor of shape `(num_images, 8)` projective transforms which can be given - to `transform`. - """ - with backend.name_scope(name or 'translation_matrix'): - num_translations = tf.shape(translations)[0] - # The translation matrix looks like: - # [[1 0 -dx] - # [0 1 -dy] - # [0 0 1]] - # where the last entry is implicit. - # Translation matrices are always float32. - return tf.concat( - values=[ - tf.ones((num_translations, 1), tf.float32), - tf.zeros((num_translations, 1), tf.float32), - -translations[:, 0, None], - tf.zeros((num_translations, 1), tf.float32), - tf.ones((num_translations, 1), tf.float32), - -translations[:, 1, None], - tf.zeros((num_translations, 2), tf.float32), - ], - axis=1) - - -def transform(images, - transforms, - fill_mode='reflect', - fill_value=0.0, - interpolation='bilinear', - output_shape=None, - name=None): - """Applies the given transform(s) to the image(s). - - Args: - images: A tensor of shape - `(num_images, num_rows, num_columns, num_channels)` (NHWC). The rank must - be statically known (the shape is not `TensorShape(None)`). - transforms: Projective transform matrix/matrices. A vector of length 8 or - tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, - c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* - point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where - `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the - transform mapping input points to output points. Note that gradients are - not backpropagated into transformation parameters. - fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). - fill_value: a float represents the value to be filled outside the boundaries - when `fill_mode="constant"`. - interpolation: Interpolation mode. Supported values: `"nearest"`, - `"bilinear"`. - output_shape: Output dimension after the transform, `[height, width]`. - If `None`, output is the same size as input image. - name: The name of the op. - - Fill mode behavior for each valid value is as follows: - - - reflect (d c b a | a b c d | d c b a) - The input is extended by reflecting about the edge of the last pixel. - - - constant (k k k k | a b c d | k k k k) - The input is extended by filling all - values beyond the edge with the same constant value k = 0. - - - wrap (a b c d | a b c d | a b c d) - The input is extended by wrapping around to the opposite edge. - - - nearest (a a a a | a b c d | d d d d) - The input is extended by the nearest pixel. - - Input shape: - 4D tensor with shape: `(samples, height, width, channels)`, - in `"channels_last"` format. - - Output shape: - 4D tensor with shape: `(samples, height, width, channels)`, - in `"channels_last"` format. - - Returns: - Image(s) with the same type and shape as `images`, with the given - transform(s) applied. Transformed coordinates outside of the input image - will be filled with zeros. - - Raises: - TypeError: If `image` is an invalid type. - ValueError: If output shape is not 1-D int32 Tensor. - """ - with backend.name_scope(name or 'transform'): - if output_shape is None: - output_shape = tf.shape(images)[1:3] - if not tf.executing_eagerly(): - output_shape_value = tf.get_static_value(output_shape) - if output_shape_value is not None: - output_shape = output_shape_value - - output_shape = tf.convert_to_tensor( - output_shape, tf.int32, name='output_shape') - - if not output_shape.get_shape().is_compatible_with([2]): - raise ValueError('output_shape must be a 1-D Tensor of 2 elements: ' - 'new_height, new_width, instead got ' - '{}'.format(output_shape)) - - fill_value = tf.convert_to_tensor( - fill_value, tf.float32, name='fill_value') - - return tf.raw_ops.ImageProjectiveTransformV3( - images=images, - output_shape=output_shape, - fill_value=fill_value, - transforms=transforms, - fill_mode=fill_mode.upper(), - interpolation=interpolation.upper()) + # Assume we randomly select the factor to be 0.1, then it will apply + # 0.1 * 255 to all the channel + output = random_bright(image, training=True) + # output will be int64 with 25.5 added to each channel and round down. + tf.Tensor([[[26.5, 27.5, 28.5] + [29.5, 30.5, 31.5]] + [[32.5, 33.5, 34.5] + [35.5, 36.5, 37.5]]], + shape=(2, 2, 3), dtype=int64) + ``` + """ -def get_rotation_matrix(angles, image_height, image_width, name=None): - """Returns projective transform(s) for the given angle(s). - - Args: - angles: A scalar angle to rotate all images by, or (for batches of images) a - vector with an angle to rotate each image in the batch. The rank must be - statically known (the shape is not `TensorShape(None)`). - image_height: Height of the image(s) to be transformed. - image_width: Width of the image(s) to be transformed. - name: The name of the op. - - Returns: - A tensor of shape (num_images, 8). Projective transforms which can be given - to operation `image_projective_transform_v2`. If one row of transforms is - [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point - `(x, y)` to a transformed *input* point - `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, - where `k = c0 x + c1 y + 1`. - """ - with backend.name_scope(name or 'rotation_matrix'): - x_offset = ((image_width - 1) - (tf.cos(angles) * - (image_width - 1) - tf.sin(angles) * - (image_height - 1))) / 2.0 - y_offset = ((image_height - 1) - (tf.sin(angles) * - (image_width - 1) + tf.cos(angles) * - (image_height - 1))) / 2.0 - num_angles = tf.shape(angles)[0] - return tf.concat( - values=[ - tf.cos(angles)[:, None], - -tf.sin(angles)[:, None], - x_offset[:, None], - tf.sin(angles)[:, None], - tf.cos(angles)[:, None], - y_offset[:, None], - tf.zeros((num_angles, 2), tf.float32), - ], - axis=1) - - -@keras_export('keras.layers.RandomRotation', - 'keras.layers.experimental.preprocessing.RandomRotation', - v1=[]) -class RandomRotation(BaseImageAugmentationLayer): - """A preprocessing layer which randomly rotates images during training. - - This layer will apply random rotations to each image, filling empty space - according to `fill_mode`. - - By default, random rotations are only applied during training. - At inference time, the layer does nothing. If you need to apply random - rotations at inference time, set `training` to True when calling the layer. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - of interger or floating point dtype. By default, the layer will output floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format - - Arguments: - factor: a float represented as fraction of 2 Pi, or a tuple of size 2 - representing lower and upper bound for rotating clockwise and - counter-clockwise. A positive values means rotating counter clock-wise, - while a negative value means clock-wise. When represented as a single - float, this value is used for both the upper and lower bound. For - instance, `factor=(-0.2, 0.3)` results in an output rotation by a random - amount in the range `[-20% * 2pi, 30% * 2pi]`. `factor=0.2` results in an - output rotating by a random amount in the range `[-20% * 2pi, 20% * 2pi]`. - fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). - - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by - reflecting about the edge of the last pixel. - - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by - filling all values beyond the edge with the same constant value k = 0. - - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by - wrapping around to the opposite edge. - - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the - nearest pixel. - interpolation: Interpolation mode. Supported values: `"nearest"`, - `"bilinear"`. - seed: Integer. Used to create a random seed. - fill_value: a float represents the value to be filled outside the boundaries - when `fill_mode="constant"`. - """ - - def __init__(self, - factor, - fill_mode='reflect', - interpolation='bilinear', - seed=None, - fill_value=0.0, - **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomRotation').set( - True) - super().__init__(seed=seed, force_generator=True, - **kwargs) - self.factor = factor - if isinstance(factor, (tuple, list)): - self.lower = factor[0] - self.upper = factor[1] - else: - self.lower = -factor - self.upper = factor - if self.upper < self.lower: - raise ValueError('Factor cannot have negative values, ' - 'got {}'.format(factor)) - check_fill_mode_and_interpolation(fill_mode, interpolation) - self.fill_mode = fill_mode - self.fill_value = fill_value - self.interpolation = interpolation - self.seed = seed - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - min_angle = self.lower * 2. * np.pi - max_angle = self.upper * 2. * np.pi - angle = self._random_generator.random_uniform( - shape=[1], minval=min_angle, maxval=max_angle) - return {'angle': angle} - - def augment_image(self, image, transformation): - image = utils.ensure_tensor(image, self.compute_dtype) - original_shape = image.shape - image = tf.expand_dims(image, 0) - image_shape = tf.shape(image) - img_hd = tf.cast(image_shape[H_AXIS], tf.float32) - img_wd = tf.cast(image_shape[W_AXIS], tf.float32) - angle = transformation['angle'] - output = transform( - image, - get_rotation_matrix(angle, img_hd, img_wd), - fill_mode=self.fill_mode, - fill_value=self.fill_value, - interpolation=self.interpolation) - output = tf.squeeze(output, 0) - output.set_shape(original_shape) - return output - - def augment_bounding_boxes(self, image, bounding_boxes, transformation): - image = tf.expand_dims(image, 0) - image_shape = tf.shape(image) - h = image_shape[H_AXIS] - w = image_shape[W_AXIS] - bbox_dtype = bounding_boxes.dtype - # origin coordinates, all the points on the image are rotated around this - # point - origin_x, origin_y = int(h / 2), int(w / 2) - angle = transformation['angle'] - angle = -angle - # calculate coordinates of all four corners of the bounding box - point = tf.stack([ - tf.stack([bounding_boxes[:, 0], bounding_boxes[:, 1]], axis=1), - tf.stack([bounding_boxes[:, 2], bounding_boxes[:, 1]], axis=1), - tf.stack([bounding_boxes[:, 2], bounding_boxes[:, 3]], axis=1), - tf.stack([bounding_boxes[:, 0], bounding_boxes[:, 3]], axis=1)], axis=1) - # point_x : x coordinates of all corners of the bounding box - point_x = tf.gather(point, [0], axis=2) - # point_y : y cordinates of all corners of the bounding box - point_y = tf.gather(point, [1], axis=2) - # rotated bbox coordinates - # new_x : new position of x coordinates of corners of bounding box - new_x = origin_x + tf.multiply(tf.cos(angle), tf.cast( - (point_x - origin_x), dtype=tf.float32)) - tf.multiply( - tf.sin(angle), tf.cast((point_y - origin_y), dtype=tf.float32)) - # new_y : new position of y coordinates of corners of bounding box - new_y = origin_y + tf.multiply(tf.sin(angle), tf.cast( - (point_x - origin_x), dtype=tf.float32)) + tf.multiply( - tf.cos(angle), tf.cast((point_y - origin_y), dtype=tf.float32)) - # rotated bbox coordinates - out = tf.concat([new_x, new_y], axis=2) - # find readjusted coordinates of bounding box to represent it in corners - # format - min_cordinates = tf.math.reduce_min(out, axis=1) - max_cordinates = tf.math.reduce_max(out, axis=1) - bboxes_out = tf.concat([min_cordinates, max_cordinates], axis=1) - # cordinates cannot be float values, it is casted to int32 - bboxes_out = tf.cast(bboxes_out, bbox_dtype) - return bboxes_out - - def augment_label(self, label, transformation): - return label - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'factor': self.factor, - 'fill_mode': self.fill_mode, - 'fill_value': self.fill_value, - 'interpolation': self.interpolation, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.RandomZoom', - 'keras.layers.experimental.preprocessing.RandomZoom', - v1=[]) -class RandomZoom(BaseImageAugmentationLayer): - """A preprocessing layer which randomly zooms images during training. - - This layer will randomly zoom in or out on each axis of an image - independently, filling empty space according to `fill_mode`. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - of interger or floating point dtype. By default, the layer will output floats. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - height_factor: a float represented as fraction of value, or a tuple of size - 2 representing lower and upper bound for zooming vertically. When - represented as a single float, this value is used for both the upper and - lower bound. A positive value means zooming out, while a negative value - means zooming in. For instance, `height_factor=(0.2, 0.3)` result in an - output zoomed out by a random amount in the range `[+20%, +30%]`. - `height_factor=(-0.3, -0.2)` result in an output zoomed in by a random - amount in the range `[+20%, +30%]`. - width_factor: a float represented as fraction of value, or a tuple of size 2 - representing lower and upper bound for zooming horizontally. When - represented as a single float, this value is used for both the upper and - lower bound. For instance, `width_factor=(0.2, 0.3)` result in an output - zooming out between 20% to 30%. `width_factor=(-0.3, -0.2)` result in an - output zooming in between 20% to 30%. Defaults to `None`, i.e., zooming - vertical and horizontal directions by preserving the aspect ratio. - fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). - - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by - reflecting about the edge of the last pixel. - - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by - filling all values beyond the edge with the same constant value k = 0. - - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by - wrapping around to the opposite edge. - - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the - nearest pixel. - interpolation: Interpolation mode. Supported values: `"nearest"`, - `"bilinear"`. - seed: Integer. Used to create a random seed. - fill_value: a float represents the value to be filled outside the boundaries - when `fill_mode="constant"`. - - Example: - - >>> input_img = np.random.random((32, 224, 224, 3)) - >>> layer = tf.keras.layers.RandomZoom(.5, .2) - >>> out_img = layer(input_img) - >>> out_img.shape - TensorShape([32, 224, 224, 3]) - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - """ - - def __init__(self, - height_factor, - width_factor=None, - fill_mode='reflect', - interpolation='bilinear', - seed=None, - fill_value=0.0, - **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomZoom').set(True) - super().__init__(seed=seed, force_generator=True, **kwargs) - self.height_factor = height_factor - if isinstance(height_factor, (tuple, list)): - self.height_lower = height_factor[0] - self.height_upper = height_factor[1] - else: - self.height_lower = -height_factor - self.height_upper = height_factor - - if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.: - raise ValueError('`height_factor` must have values between [-1, 1], ' - 'got {}'.format(height_factor)) - - self.width_factor = width_factor - if width_factor is not None: - if isinstance(width_factor, (tuple, list)): - self.width_lower = width_factor[0] - self.width_upper = width_factor[1] - else: - self.width_lower = -width_factor # pylint: disable=invalid-unary-operand-type - self.width_upper = width_factor - - if self.width_lower < -1. or self.width_upper < -1.: - raise ValueError('`width_factor` must have values larger than -1, ' - 'got {}'.format(width_factor)) - - check_fill_mode_and_interpolation(fill_mode, interpolation) - - self.fill_mode = fill_mode - self.fill_value = fill_value - self.interpolation = interpolation - self.seed = seed - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - height_zoom = self._random_generator.random_uniform( - shape=[1, 1], - minval=1. + self.height_lower, - maxval=1. + self.height_upper) - if self.width_factor is not None: - width_zoom = self._random_generator.random_uniform( - shape=[1, 1], - minval=1. + self.width_lower, - maxval=1. + self.width_upper) - else: - width_zoom = height_zoom - - return {'height_zoom': height_zoom, 'width_zoom': width_zoom} - - def augment_image(self, image, transformation): - image = utils.ensure_tensor(image, self.compute_dtype) - original_shape = image.shape - image = tf.expand_dims(image, 0) - image_shape = tf.shape(image) - img_hd = tf.cast(image_shape[H_AXIS], tf.float32) - img_wd = tf.cast(image_shape[W_AXIS], tf.float32) - width_zoom = transformation['width_zoom'] - height_zoom = transformation['height_zoom'] - zooms = tf.cast( - tf.concat([width_zoom, height_zoom], axis=1), - dtype=tf.float32) - output = transform( - image, - get_zoom_matrix(zooms, img_hd, img_wd), - fill_mode=self.fill_mode, - fill_value=self.fill_value, - interpolation=self.interpolation) - output = tf.squeeze(output, 0) - output.set_shape(original_shape) - return output - - def augment_label(self, label, transformation): - return label - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'height_factor': self.height_factor, - 'width_factor': self.width_factor, - 'fill_mode': self.fill_mode, - 'fill_value': self.fill_value, - 'interpolation': self.interpolation, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + _FACTOR_VALIDATION_ERROR = ( + "The `factor` argument should be a number (or a list of two numbers) " + "in the range [-1.0, 1.0]. " + ) + _VALUE_RANGE_VALIDATION_ERROR = ( + "The `value_range` argument should be a list of two numbers. " + ) + def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs): + base_preprocessing_layer.keras_kpl_gauge.get_cell( + "RandomBrightness" + ).set(True) + super().__init__(seed=seed, force_generator=True, **kwargs) + self._set_factor(factor) + self._set_value_range(value_range) + self._seed = seed + + def _set_value_range(self, value_range): + if not isinstance(value_range, (tuple, list)): + raise ValueError( + self._VALUE_RANGE_VALIDATION_ERROR + f"Got {value_range}" + ) + if len(value_range) != 2: + raise ValueError( + self._VALUE_RANGE_VALIDATION_ERROR + f"Got {value_range}" + ) + self._value_range = sorted(value_range) + + def _set_factor(self, factor): + if isinstance(factor, (tuple, list)): + if len(factor) != 2: + raise ValueError( + self._FACTOR_VALIDATION_ERROR + f"Got {factor}" + ) + self._check_factor_range(factor[0]) + self._check_factor_range(factor[1]) + self._factor = sorted(factor) + elif isinstance(factor, (int, float)): + self._check_factor_range(factor) + factor = abs(factor) + self._factor = [-factor, factor] + else: + raise ValueError(self._FACTOR_VALIDATION_ERROR + f"Got {factor}") + + def _check_factor_range(self, input_number): + if input_number > 1.0 or input_number < -1.0: + raise ValueError( + self._FACTOR_VALIDATION_ERROR + f"Got {input_number}" + ) + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs, dtype=self.compute_dtype) + if training: + return self._brightness_adjust(inputs) + else: + return inputs + + def _brightness_adjust(self, images): + rank = images.shape.rank + if rank == 3: + rgb_delta_shape = (1, 1, 1) + elif rank == 4: + # Keep only the batch dim. This will ensure to have same adjustment + # with in one image, but different across the images. + rgb_delta_shape = [tf.shape(images)[0], 1, 1, 1] + else: + raise ValueError( + "Expected the input image to be rank 3 or 4. Got " + f"inputs.shape = {images.shape}" + ) + rgb_delta = self._random_generator.random_uniform( + shape=rgb_delta_shape, + minval=self._factor[0], + maxval=self._factor[1], + ) + rgb_delta = rgb_delta * (self._value_range[1] - self._value_range[0]) + rgb_delta = tf.cast(rgb_delta, images.dtype) + images += rgb_delta + return tf.clip_by_value( + images, self._value_range[0], self._value_range[1] + ) + + def get_config(self): + config = { + "factor": self._factor, + "value_range": self._value_range, + "seed": self._seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export( + "keras.layers.RandomHeight", + "keras.layers.experimental.preprocessing.RandomHeight", + v1=[], +) +class RandomHeight(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly varies image height during training. + + This layer adjusts the height of a batch of images by a random factor. + The input should be a 3D (unbatched) or 4D (batched) tensor in the + `"channels_last"` image data format. Input pixel values can be of any range + (e.g. `[0., 1.)` or `[0, 255]`) and of integer or floating point dtype. By + default, the layer will output floats. + + + By default, this layer is inactive during inference. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). -def get_zoom_matrix(zooms, image_height, image_width, name=None): - """Returns projective transform(s) for the given zoom(s). - - Args: - zooms: A matrix of 2-element lists representing `[zx, zy]` to zoom for each - image (for a batch of images). - image_height: Height of the image(s) to be transformed. - image_width: Width of the image(s) to be transformed. - name: The name of the op. - - Returns: - A tensor of shape `(num_images, 8)`. Projective transforms which can be - given to operation `image_projective_transform_v2`. - If one row of transforms is - `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the *output* point - `(x, y)` to a transformed *input* point - `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, - where `k = c0 x + c1 y + 1`. - """ - with backend.name_scope(name or 'zoom_matrix'): - num_zooms = tf.shape(zooms)[0] - # The zoom matrix looks like: - # [[zx 0 0] - # [0 zy 0] - # [0 0 1]] - # where the last entry is implicit. - # Zoom matrices are always float32. - x_offset = ((image_width - 1.) / 2.0) * (1.0 - zooms[:, 0, None]) - y_offset = ((image_height - 1.) / 2.0) * (1.0 - zooms[:, 1, None]) - return tf.concat( - values=[ - zooms[:, 0, None], - tf.zeros((num_zooms, 1), tf.float32), - x_offset, - tf.zeros((num_zooms, 1), tf.float32), - zooms[:, 1, None], - y_offset, - tf.zeros((num_zooms, 2), tf.float32), - ], - axis=1) - - -@keras_export('keras.layers.RandomContrast', - 'keras.layers.experimental.preprocessing.RandomContrast', - v1=[]) -class RandomContrast(BaseImageAugmentationLayer): - """A preprocessing layer which randomly adjusts contrast during training. - - This layer will randomly adjust the contrast of an image or images by a random - factor. Contrast is adjusted independently for each channel of each image - during training. - - For each channel, this layer computes the mean of the image pixels in the - channel and then adjusts each component `x` of each pixel to - `(x - mean) * contrast_factor + mean`. - - Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and - in integer or floating point dtype. By default, the layer will output floats. - The output value will be clipped to the range `[0, 255]`, the valid - range of RGB colors. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Arguments: - factor: a positive float represented as fraction of value, or a tuple of - size 2 representing lower and upper bound. When represented as a single - float, lower = upper. The contrast factor will be randomly picked between - `[1.0 - lower, 1.0 + upper]`. For any pixel x in the channel, the output - will be `(x - mean) * factor + mean` where `mean` is the mean value of the - channel. - seed: Integer. Used to create a random seed. - """ - - def __init__(self, factor, seed=None, **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomContrast').set( - True) - super().__init__(seed=seed, force_generator=True, - **kwargs) - self.factor = factor - if isinstance(factor, (tuple, list)): - self.lower = factor[0] - self.upper = factor[1] - else: - self.lower = self.upper = factor - if self.lower < 0. or self.upper < 0. or self.lower > 1.: - raise ValueError('Factor cannot have negative values or greater than 1.0,' - ' got {}'.format(factor)) - self.seed = seed - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - lower = 1. - self.lower - upper = 1. + self.upper - random_seed = self._random_generator.make_seed_for_stateless_op() - contrast_factor = stateless_random_ops.stateless_random_uniform( - shape=[], minval=lower, maxval=upper, seed=random_seed) - return {'contrast_factor': contrast_factor} - - def augment_image(self, image, transformation): - contrast_factor = transformation['contrast_factor'] - output = tf.image.adjust_contrast(image, contrast_factor=contrast_factor) - output = tf.clip_by_value(output, 0, 255) - output.set_shape(image.shape) - return output - - def augment_label(self, label, transformation): - return label - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'factor': self.factor, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.RandomBrightness', v1=[]) -class RandomBrightness(BaseImageAugmentationLayer): - """A preprocessing layer which randomly adjusts brightness during training. - - This layer will randomly increase/reduce the brightness for the input RGB - images. At inference time, the output will be identical to the input. - Call the layer with `training=True` to adjust the brightness of the input. - - Note that different brightness adjustment factors - will be apply to each the images in the batch. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - factor: Float or a list/tuple of 2 floats between -1.0 and 1.0. The - factor is used to determine the lower bound and upper bound of the - brightness adjustment. A float value will be chosen randomly between - the limits. When -1.0 is chosen, the output image will be black, and - when 1.0 is chosen, the image will be fully white. When only one float - is provided, eg, 0.2, then -0.2 will be used for lower bound and 0.2 - will be used for upper bound. - value_range: Optional list/tuple of 2 floats for the lower and upper limit - of the values of the input data. Defaults to [0.0, 255.0]. Can be changed - to e.g. [0.0, 1.0] if the image input has been scaled before this layer. - The brightness adjustment will be scaled to this range, and the - output values will be clipped to this range. - seed: optional integer, for fixed RNG behavior. - - Inputs: 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel - values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) - - Output: 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the - `factor`. By default, the layer will output floats. The output value will - be clipped to the range `[0, 255]`, the valid range of RGB colors, and - rescaled based on the `value_range` if needed. - - Sample usage: - - ```python - random_bright = tf.keras.layers.RandomBrightness(factor=0.2) - - # An image with shape [2, 2, 3] - image = [[[1, 2, 3], [4 ,5 ,6]], [[7, 8, 9], [10, 11, 12]]] - - # Assume we randomly select the factor to be 0.1, then it will apply - # 0.1 * 255 to all the channel - output = random_bright(image, training=True) - - # output will be int64 with 25.5 added to each channel and round down. - tf.Tensor([[[26.5, 27.5, 28.5] - [29.5, 30.5, 31.5]] - [[32.5, 33.5, 34.5] - [35.5, 36.5, 37.5]]], - shape=(2, 2, 3), dtype=int64) - ``` - """ - _FACTOR_VALIDATION_ERROR = ( - 'The `factor` argument should be a number (or a list of two numbers) ' - 'in the range [-1.0, 1.0]. ') - _VALUE_RANGE_VALIDATION_ERROR = ( - 'The `value_range` argument should be a list of two numbers. ') - - def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomBrightness').set( - True) - super().__init__(seed=seed, force_generator=True, **kwargs) - self._set_factor(factor) - self._set_value_range(value_range) - self._seed = seed - - def augment_image(self, image, transformation): - return self._brightness_adjust(image, transformation['rgb_delta']) - - def augment_label(self, label, transformation): - return label - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - rgb_delta_shape = (1, 1, 1) - random_rgb_delta = self._random_generator.random_uniform( - shape=rgb_delta_shape, - minval=self._factor[0], - maxval=self._factor[1], - ) - random_rgb_delta = random_rgb_delta * ( - self._value_range[1] - self._value_range[0]) - return {'rgb_delta': random_rgb_delta} - - def _set_value_range(self, value_range): - if not isinstance(value_range, (tuple, list)): - raise ValueError( - self._VALUE_RANGE_VALIDATION_ERROR + f'Got {value_range}') - if len(value_range) != 2: - raise ValueError( - self._VALUE_RANGE_VALIDATION_ERROR + f'Got {value_range}') - self._value_range = sorted(value_range) - - def _set_factor(self, factor): - if isinstance(factor, (tuple, list)): - if len(factor) != 2: - raise ValueError(self._FACTOR_VALIDATION_ERROR + f'Got {factor}') - self._check_factor_range(factor[0]) - self._check_factor_range(factor[1]) - self._factor = sorted(factor) - elif isinstance(factor, (int, float)): - self._check_factor_range(factor) - factor = abs(factor) - self._factor = [-factor, factor] - else: - raise ValueError(self._FACTOR_VALIDATION_ERROR + f'Got {factor}') - - def _check_factor_range(self, input_number): - if input_number > 1.0 or input_number < -1.0: - raise ValueError(self._FACTOR_VALIDATION_ERROR + f'Got {input_number}') - - def _brightness_adjust(self, image, rgb_delta): - image = utils.ensure_tensor(image, self.compute_dtype) - rank = image.shape.rank - if rank != 3: - raise ValueError( - 'Expected the input image to be rank 3. Got ' - f'inputs.shape = {image.shape}') - rgb_delta = tf.cast(rgb_delta, image.dtype) - image += rgb_delta - return tf.clip_by_value( - image, self._value_range[0], self._value_range[1]) - - def get_config(self): - config = { - 'factor': self._factor, - 'value_range': self._value_range, - 'seed': self._seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.RandomHeight', - 'keras.layers.experimental.preprocessing.RandomHeight', - v1=[]) -class RandomHeight(BaseImageAugmentationLayer): - """A preprocessing layer which randomly varies image height during training. - - This layer adjusts the height of a batch of images by a random factor. - The input should be a 3D (unbatched) or 4D (batched) tensor in the - `"channels_last"` image data format. Input pixel values can be of any range - (e.g. `[0., 1.)` or `[0, 255]`) and of interger or floating point dtype. By - default, the layer will output floats. - - - By default, this layer is inactive during inference. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - factor: A positive float (fraction of original height), or a tuple of size 2 - representing lower and upper bound for resizing vertically. When - represented as a single float, this value is used for both the upper and - lower bound. For instance, `factor=(0.2, 0.3)` results in an output with - height changed by a random amount in the range `[20%, 30%]`. - `factor=(-0.2, 0.3)` results in an output with height changed by a random - amount in the range `[-20%, +30%]`. `factor=0.2` results in an output with - height changed by a random amount in the range `[-20%, +20%]`. - interpolation: String, the interpolation method. Defaults to `"bilinear"`. - Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, - `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. - seed: Integer. Used to create a random seed. - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., random_height, width, channels)`. - """ - - def __init__(self, - factor, - interpolation='bilinear', - seed=None, - **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomHeight').set(True) - super().__init__(seed=seed, force_generator=True, - **kwargs) - self.factor = factor - if isinstance(factor, (tuple, list)): - self.height_lower = factor[0] - self.height_upper = factor[1] - else: - self.height_lower = -factor - self.height_upper = factor - - if self.height_upper < self.height_lower: - raise ValueError('`factor` cannot have upper bound less than ' - 'lower bound, got {}'.format(factor)) - if self.height_lower < -1. or self.height_upper < -1.: - raise ValueError('`factor` must have values larger than -1, ' - 'got {}'.format(factor)) - self.interpolation = interpolation - self._interpolation_method = image_utils.get_interpolation(interpolation) - self.seed = seed - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - height_factor = self._random_generator.random_uniform( - shape=[], - minval=(1.0 + self.height_lower), - maxval=(1.0 + self.height_upper)) - inputs_shape = tf.shape(image) - img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32) - adjusted_height = tf.cast(height_factor * img_hd, tf.int32) - return {'height': adjusted_height} - - def _batch_augment(self, inputs): - images = self.augment_image( - inputs[IMAGES], - transformation=self.get_random_transformation(image=inputs[IMAGES])) - result = {IMAGES: images} - # to-do augment bbox to clip bbox to resized height value - return result - - def augment_image(self, image, transformation): - # The batch dimension of the input=image is not modified. The output would - # be accurate for both unbatched and batched input - inputs_shape = tf.shape(image) - img_wd = inputs_shape[W_AXIS] - adjusted_height = transformation['height'] - adjusted_size = tf.stack([adjusted_height, img_wd]) - output = tf.image.resize( - images=image, size=adjusted_size, method=self._interpolation_method) - # tf.resize will output float32 in many cases regardless of input type. - output = tf.cast(output, self.compute_dtype) - output_shape = list(image.shape) - output_shape[H_AXIS] = None - output.set_shape(output_shape) - return output - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - input_shape[H_AXIS] = None - return tf.TensorShape(input_shape) - - def get_config(self): - config = { - 'factor': self.factor, - 'interpolation': self.interpolation, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.RandomWidth', - 'keras.layers.experimental.preprocessing.RandomWidth', - v1=[]) -class RandomWidth(BaseImageAugmentationLayer): - """A preprocessing layer which randomly varies image width during training. - - This layer will randomly adjusts the width of a batch of images of a - batch of images by a random factor. The input should be a 3D (unbatched) or - 4D (batched) tensor in the `"channels_last"` image data format. Input pixel - values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and of interger or - floating point dtype. By default, the layer will output floats. - - By default, this layer is inactive during inference. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - factor: A positive float (fraction of original width), or a tuple of size 2 - representing lower and upper bound for resizing vertically. When - represented as a single float, this value is used for both the upper and - lower bound. For instance, `factor=(0.2, 0.3)` results in an output with - width changed by a random amount in the range `[20%, 30%]`. `factor=(-0.2, - 0.3)` results in an output with width changed by a random amount in the - range `[-20%, +30%]`. `factor=0.2` results in an output with width changed - by a random amount in the range `[-20%, +20%]`. - interpolation: String, the interpolation method. Defaults to `bilinear`. - Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, - `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. - seed: Integer. Used to create a random seed. - - Input shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, width, channels)`, in `"channels_last"` format. - - Output shape: - 3D (unbatched) or 4D (batched) tensor with shape: - `(..., height, random_width, channels)`. - """ - - def __init__(self, - factor, - interpolation='bilinear', - seed=None, - **kwargs): - base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomWidth').set(True) - super().__init__(seed=seed, force_generator=True, **kwargs) - self.factor = factor - if isinstance(factor, (tuple, list)): - self.width_lower = factor[0] - self.width_upper = factor[1] - else: - self.width_lower = -factor - self.width_upper = factor - if self.width_upper < self.width_lower: - raise ValueError('`factor` cannot have upper bound less than ' - 'lower bound, got {}'.format(factor)) - if self.width_lower < -1. or self.width_upper < -1.: - raise ValueError('`factor` must have values larger than -1, ' - 'got {}'.format(factor)) - self.interpolation = interpolation - self._interpolation_method = image_utils.get_interpolation(interpolation) - self.seed = seed - self.auto_vectorize = False - - def _batch_augment(self, inputs): - images = self.augment_image( - inputs[IMAGES], - transformation=self.get_random_transformation(image=inputs[IMAGES])) - result = {IMAGES: images} - # to-do augment bbox to clip bbox to resized width value - return result - - def augment_image(self, image, transformation): - # The batch dimension of the input=image is not modified. The output would - # be accurate for both unbatched and batched input - inputs = utils.ensure_tensor(image) - inputs_shape = tf.shape(inputs) - img_hd = inputs_shape[H_AXIS] - adjusted_width = transformation['width'] - adjusted_size = tf.stack([img_hd, adjusted_width]) - output = tf.image.resize( - images=inputs, size=adjusted_size, method=self._interpolation_method) - # tf.resize will output float32 in many cases regardless of input type. - output = tf.cast(output, self.compute_dtype) - output_shape = inputs.shape.as_list() - output_shape[W_AXIS] = None - output.set_shape(output_shape) - return output - - def get_random_transformation(self, - image=None, - label=None, - bounding_box=None): - inputs_shape = tf.shape(image) - img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32) - width_factor = self._random_generator.random_uniform( - shape=[], - minval=(1.0 + self.width_lower), - maxval=(1.0 + self.width_upper)) - adjusted_width = tf.cast(width_factor * img_wd, tf.int32) - return {'width': adjusted_width} - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - input_shape[W_AXIS] = None - return tf.TensorShape(input_shape) - - def get_config(self): - config = { - 'factor': self.factor, - 'interpolation': self.interpolation, - 'seed': self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + Args: + factor: A positive float (fraction of original height), + or a tuple of size 2 representing lower and upper bound + for resizing vertically. When represented as a single float, + this value is used for both the upper and + lower bound. For instance, `factor=(0.2, 0.3)` results + in an output with + height changed by a random amount in the range `[20%, 30%]`. + `factor=(-0.2, 0.3)` results in an output with height + changed by a random amount in the range `[-20%, +30%]`. + `factor=0.2` results in an output with + height changed by a random amount in the range `[-20%, +20%]`. + interpolation: String, the interpolation method. + Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, + `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `"bilinear"`. + seed: Integer. Used to create a random seed. + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., random_height, width, channels)`. + """ + + def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs): + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomHeight").set( + True + ) + super().__init__(seed=seed, force_generator=True, **kwargs) + self.factor = factor + if isinstance(factor, (tuple, list)): + self.height_lower = factor[0] + self.height_upper = factor[1] + else: + self.height_lower = -factor + self.height_upper = factor + + if self.height_upper < self.height_lower: + raise ValueError( + "`factor` argument cannot have an upper bound lesser than the " + f"lower bound. Received: factor={factor}" + ) + if self.height_lower < -1.0 or self.height_upper < -1.0: + raise ValueError( + "`factor` argument must have values larger than -1. " + f"Received: factor={factor}" + ) + self.interpolation = interpolation + self._interpolation_method = image_utils.get_interpolation( + interpolation + ) + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs) + + def random_height_inputs(inputs): + """Inputs height-adjusted with random ops.""" + inputs_shape = tf.shape(inputs) + img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32) + img_wd = inputs_shape[W_AXIS] + height_factor = self._random_generator.random_uniform( + shape=[], + minval=(1.0 + self.height_lower), + maxval=(1.0 + self.height_upper), + ) + adjusted_height = tf.cast(height_factor * img_hd, tf.int32) + adjusted_size = tf.stack([adjusted_height, img_wd]) + output = tf.image.resize( + images=inputs, + size=adjusted_size, + method=self._interpolation_method, + ) + # tf.resize will output float32 regardless of input type. + output = tf.cast(output, self.compute_dtype) + output_shape = inputs.shape.as_list() + output_shape[H_AXIS] = None + output.set_shape(output_shape) + return output + + if training: + return random_height_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + input_shape[H_AXIS] = None + return tf.TensorShape(input_shape) + + def get_config(self): + config = { + "factor": self.factor, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export( + "keras.layers.RandomWidth", + "keras.layers.experimental.preprocessing.RandomWidth", + v1=[], +) +class RandomWidth(base_layer.BaseRandomLayer): + """A preprocessing layer which randomly varies image width during training. + + This layer will randomly adjusts the width of a batch of images of a + batch of images by a random factor. The input should be a 3D (unbatched) or + 4D (batched) tensor in the `"channels_last"` image data format. Input pixel + values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and of integer or + floating point dtype. By default, the layer will output floats. + + By default, this layer is inactive during inference. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + factor: A positive float (fraction of original width), + or a tuple of size 2 representing lower and upper bound + for resizing horizontally. When represented as a single float, + this value is used for both the upper and + lower bound. For instance, `factor=(0.2, 0.3)` + results in an output with + width changed by a random amount in the range `[20%, 30%]`. + `factor=(-0.2, 0.3)` results in an output with width changed + by a random amount in the range `[-20%, +30%]`. + `factor=0.2` results in an output with width changed + by a random amount in the range `[-20%, +20%]`. + interpolation: String, the interpolation method. + Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, + `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `bilinear`. + seed: Integer. Used to create a random seed. + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, random_width, channels)`. + """ + + def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs): + base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomWidth").set( + True + ) + super().__init__(seed=seed, force_generator=True, **kwargs) + self.factor = factor + if isinstance(factor, (tuple, list)): + self.width_lower = factor[0] + self.width_upper = factor[1] + else: + self.width_lower = -factor + self.width_upper = factor + if self.width_upper < self.width_lower: + raise ValueError( + "`factor` argument cannot have an upper bound less than the " + f"lower bound. Received: factor={factor}" + ) + if self.width_lower < -1.0 or self.width_upper < -1.0: + raise ValueError( + "`factor` argument must have values larger than -1. " + f"Received: factor={factor}" + ) + self.interpolation = interpolation + self._interpolation_method = image_utils.get_interpolation( + interpolation + ) + self.seed = seed + + def call(self, inputs, training=True): + inputs = convert_inputs(inputs) + + def random_width_inputs(inputs): + """Inputs width-adjusted with random ops.""" + inputs_shape = tf.shape(inputs) + img_hd = inputs_shape[H_AXIS] + img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32) + width_factor = self._random_generator.random_uniform( + shape=[], + minval=(1.0 + self.width_lower), + maxval=(1.0 + self.width_upper), + ) + adjusted_width = tf.cast(width_factor * img_wd, tf.int32) + adjusted_size = tf.stack([img_hd, adjusted_width]) + output = tf.image.resize( + images=inputs, + size=adjusted_size, + method=self._interpolation_method, + ) + # tf.resize will output float32 regardless of input type. + output = tf.cast(output, self.compute_dtype) + output_shape = inputs.shape.as_list() + output_shape[W_AXIS] = None + output.set_shape(output_shape) + return output + + if training: + return random_width_inputs(inputs) + else: + return inputs + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + input_shape[W_AXIS] = None + return tf.TensorShape(input_shape) + + def get_config(self): + config = { + "factor": self.factor, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def convert_inputs(inputs, dtype=None): + if isinstance(inputs, dict): + raise ValueError( + "This layer can only process a tensor representing an image or " + f"a batch of images. Received: type(inputs)={type(inputs)}." + "If you need to pass a dict containing " + "images, labels, and bounding boxes, you should " + "instead use the preprocessing and augmentation layers " + "from `keras_cv.layers`. See docs at " + "https://keras.io/api/keras_cv/layers/" + ) + inputs = utils.ensure_tensor(inputs, dtype=dtype) + return inputs diff --git a/keras/layers/preprocessing/image_preprocessing_distribution_test.py b/keras/layers/preprocessing/image_preprocessing_distribution_test.py index 1a71b8ce5a2d..9383de95e0e7 100644 --- a/keras/layers/preprocessing/image_preprocessing_distribution_test.py +++ b/keras/layers/preprocessing/image_preprocessing_distribution_test.py @@ -14,52 +14,60 @@ # ============================================================================== """Distribution tests for keras.layers.preprocessing.image_preprocessing.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.distribute import strategy_combinations from keras.layers.preprocessing import image_preprocessing from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies, - mode=["eager"])) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies, + mode=["eager"], + ) +) class ImagePreprocessingDistributionTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_distribution(self, strategy): - if "CentralStorage" in type(strategy).__name__: - self.skipTest("Does not work with CentralStorageStrategy yet.") - # TODO(b/159738418): large image input causes OOM in ubuntu multi gpu. - np_images = np.random.random((32, 32, 32, 3)).astype(np.float32) - image_dataset = tf.data.Dataset.from_tensor_slices(np_images).batch( - 16, drop_remainder=True) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_distribution(self, strategy): + if "CentralStorage" in type(strategy).__name__: + self.skipTest("Does not work with CentralStorageStrategy yet.") + # TODO(b/159738418): large image input causes OOM in ubuntu multi gpu. + np_images = np.random.random((32, 32, 32, 3)).astype(np.float32) + image_dataset = tf.data.Dataset.from_tensor_slices(np_images).batch( + 16, drop_remainder=True + ) - with strategy.scope(): - input_data = keras.Input(shape=(32, 32, 3), dtype=tf.float32) - image_preprocessor = keras.Sequential([ - image_preprocessing.Resizing(height=256, width=256), - image_preprocessing.RandomCrop(height=224, width=224), - image_preprocessing.RandomTranslation(.1, .1), - image_preprocessing.RandomBrightness(.1, value_range=(0, 1)), - image_preprocessing.RandomRotation(.2), - image_preprocessing.RandomFlip(), - image_preprocessing.RandomZoom(.2, .2)]) - preprocessed_image = image_preprocessor(input_data) - flatten_layer = keras.layers.Flatten(data_format="channels_last") - output = flatten_layer(preprocessed_image) - cls_layer = keras.layers.Dense(units=1, activation="sigmoid") - output = cls_layer(output) - model = keras.Model(inputs=input_data, outputs=output) - _ = model.predict(image_dataset) + with strategy.scope(): + input_data = keras.Input(shape=(32, 32, 3), dtype=tf.float32) + image_preprocessor = keras.Sequential( + [ + image_preprocessing.Resizing(height=256, width=256), + image_preprocessing.RandomCrop(height=224, width=224), + image_preprocessing.RandomTranslation(0.1, 0.1), + image_preprocessing.RandomBrightness( + 0.1, value_range=(0, 1) + ), + image_preprocessing.RandomRotation(0.2), + image_preprocessing.RandomFlip(), + image_preprocessing.RandomZoom(0.2, 0.2), + ] + ) + preprocessed_image = image_preprocessor(input_data) + flatten_layer = keras.layers.Flatten(data_format="channels_last") + output = flatten_layer(preprocessed_image) + cls_layer = keras.layers.Dense(units=1, activation="sigmoid") + output = cls_layer(output) + model = keras.Model(inputs=input_data, outputs=output) + _ = model.predict(image_dataset) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/image_preprocessing_test.py b/keras/layers/preprocessing/image_preprocessing_test.py index 413bb43cd6f8..8385e6cdace2 100644 --- a/keras/layers/preprocessing/image_preprocessing_test.py +++ b/keras/layers/preprocessing/image_preprocessing_test.py @@ -15,6 +15,9 @@ """Tests for image preprocessing layers.""" import functools + +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras @@ -22,2229 +25,2284 @@ from keras.layers.preprocessing import image_preprocessing from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.ops import stateless_random_ops @test_combinations.run_all_keras_modes(always_skip_v1=True) class ResizingTest(test_combinations.TestCase): - - def _run_test(self, kwargs, expected_height, expected_width): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - kwargs.update({'height': expected_height, 'width': expected_width}) - with test_utils.use_gpu(): - test_utils.layer_test( - image_preprocessing.Resizing, - kwargs=kwargs, - input_shape=(num_samples, orig_height, orig_width, channels), - expected_output_shape=(None, expected_height, expected_width, - channels)) - - @parameterized.named_parameters(('down_sample_bilinear_2_by_2', { - 'interpolation': 'bilinear' - }, 2, 2), ('down_sample_bilinear_3_by_2', { - 'interpolation': 'bilinear' - }, 3, 2), ('down_sample_nearest_2_by_2', { - 'interpolation': 'nearest' - }, 2, 2), ('down_sample_nearest_3_by_2', { - 'interpolation': 'nearest' - }, 3, 2), ('down_sample_area_2_by_2', { - 'interpolation': 'area' - }, 2, 2), ('down_sample_area_3_by_2', { - 'interpolation': 'area' - }, 3, 2), ('down_sample_crop_to_aspect_ratio_3_by_2', { - 'interpolation': 'bilinear', - 'crop_to_aspect_ratio': True, - }, 3, 2)) - def test_down_sampling(self, kwargs, expected_height, expected_width): - self._run_test(kwargs, expected_height, expected_width) - - @parameterized.named_parameters(('up_sample_bilinear_10_by_12', { - 'interpolation': 'bilinear' - }, 10, 12), ('up_sample_bilinear_12_by_12', { - 'interpolation': 'bilinear' - }, 12, 12), ('up_sample_nearest_10_by_12', { - 'interpolation': 'nearest' - }, 10, 12), ('up_sample_nearest_12_by_12', { - 'interpolation': 'nearest' - }, 12, 12), ('up_sample_area_10_by_12', { - 'interpolation': 'area' - }, 10, 12), ('up_sample_area_12_by_12', { - 'interpolation': 'area' - }, 12, 12), ('up_sample_crop_to_aspect_ratio_12_by_14', { - 'interpolation': 'bilinear', - 'crop_to_aspect_ratio': True, - }, 12, 14)) - def test_up_sampling(self, kwargs, expected_height, expected_width): - self._run_test(kwargs, expected_height, expected_width) - - def test_down_sampling_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype) - layer = image_preprocessing.Resizing( - height=2, width=2, interpolation='nearest') - output_image = layer(input_image) - # pyformat: disable - expected_output = np.asarray([ - [5, 7], - [13, 15] - ]).astype(dtype) - # pyformat: enable - expected_output = np.reshape(expected_output, (1, 2, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_up_sampling_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype) + def _run_test(self, kwargs, expected_height, expected_width): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + kwargs.update({"height": expected_height, "width": expected_width}) + with test_utils.use_gpu(): + test_utils.layer_test( + image_preprocessing.Resizing, + kwargs=kwargs, + input_shape=(num_samples, orig_height, orig_width, channels), + expected_output_shape=( + None, + expected_height, + expected_width, + channels, + ), + ) + + @parameterized.named_parameters( + ("down_sample_bilinear_2_by_2", {"interpolation": "bilinear"}, 2, 2), + ("down_sample_bilinear_3_by_2", {"interpolation": "bilinear"}, 3, 2), + ("down_sample_nearest_2_by_2", {"interpolation": "nearest"}, 2, 2), + ("down_sample_nearest_3_by_2", {"interpolation": "nearest"}, 3, 2), + ("down_sample_area_2_by_2", {"interpolation": "area"}, 2, 2), + ("down_sample_area_3_by_2", {"interpolation": "area"}, 3, 2), + ( + "down_sample_crop_to_aspect_ratio_3_by_2", + { + "interpolation": "bilinear", + "crop_to_aspect_ratio": True, + }, + 3, + 2, + ), + ) + def test_down_sampling(self, kwargs, expected_height, expected_width): + self._run_test(kwargs, expected_height, expected_width) + + @parameterized.named_parameters( + ("up_sample_bilinear_10_by_12", {"interpolation": "bilinear"}, 10, 12), + ("up_sample_bilinear_12_by_12", {"interpolation": "bilinear"}, 12, 12), + ("up_sample_nearest_10_by_12", {"interpolation": "nearest"}, 10, 12), + ("up_sample_nearest_12_by_12", {"interpolation": "nearest"}, 12, 12), + ("up_sample_area_10_by_12", {"interpolation": "area"}, 10, 12), + ("up_sample_area_12_by_12", {"interpolation": "area"}, 12, 12), + ( + "up_sample_crop_to_aspect_ratio_12_by_14", + { + "interpolation": "bilinear", + "crop_to_aspect_ratio": True, + }, + 12, + 14, + ), + ) + def test_up_sampling(self, kwargs, expected_height, expected_width): + self._run_test(kwargs, expected_height, expected_width) + + def test_down_sampling_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype( + dtype + ) + layer = image_preprocessing.Resizing( + height=2, width=2, interpolation="nearest" + ) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([[5, 7], [13, 15]]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_up_sampling_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype( + dtype + ) + layer = image_preprocessing.Resizing( + height=4, width=4, interpolation="nearest" + ) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray( + [[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 3, 3], [2, 2, 3, 3]] + ).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 4, 4, 1)) + self.assertAllEqual(expected_output, output_image) + + @parameterized.named_parameters( + ("reshape_bilinear_10_by_4", {"interpolation": "bilinear"}, 10, 4) + ) + def test_reshaping(self, kwargs, expected_height, expected_width): + self._run_test(kwargs, expected_height, expected_width) + + def test_invalid_interpolation(self): + with self.assertRaises(NotImplementedError): + image_preprocessing.Resizing(5, 5, "invalid_interpolation") + + def test_config_with_custom_name(self): + layer = image_preprocessing.Resizing(5, 5, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.Resizing.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_crop_to_aspect_ratio(self): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype( + "float32" + ) + layer = image_preprocessing.Resizing( + 4, 2, crop_to_aspect_ratio=True + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [1, 2], + [5, 6], + [9, 10], + [13, 14], + ] + ).astype("float32") + expected_output = np.reshape(expected_output, (1, 4, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_unbatched_image(self): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (4, 4, 1)).astype( + "float32" + ) + layer = image_preprocessing.Resizing(2, 2, interpolation="nearest") + output_image = layer(input_image) + expected_output = np.asarray( + [ + [5, 7], + [13, 15], + ] + ).astype("float32") + expected_output = np.reshape(expected_output, (2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + @parameterized.named_parameters( + ("crop_to_aspect_ratio_false", False), + ("crop_to_aspect_ratio_true", True), + ) + def test_ragged_image(self, crop_to_aspect_ratio): + with test_utils.use_gpu(): + inputs = tf.ragged.constant( + [ + np.ones((8, 8, 1)), + np.ones((8, 4, 1)), + np.ones((4, 8, 1)), + np.ones((2, 2, 1)), + ], + dtype="float32", + ) + layer = image_preprocessing.Resizing( + 2, + 2, + interpolation="nearest", + crop_to_aspect_ratio=crop_to_aspect_ratio, + ) + outputs = layer(inputs) + expected_output = [ + [[[1.0], [1.0]], [[1.0], [1.0]]], + [[[1.0], [1.0]], [[1.0], [1.0]]], + [[[1.0], [1.0]], [[1.0], [1.0]]], + [[[1.0], [1.0]], [[1.0], [1.0]]], + ] + self.assertIsInstance(outputs, tf.Tensor) + self.assertNotIsInstance(outputs, tf.RaggedTensor) + self.assertAllEqual(expected_output, outputs) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.Resizing(2, 2) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.Resizing(2, 2, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") + + @parameterized.named_parameters( + ("batch_crop_to_aspect_ratio", True, True), + ("batch_dont_crop_to_aspect_ratio", False, True), + ("single_sample_crop_to_aspect_ratio", True, False), + ("single_sample_dont_crop_to_aspect_ratio", False, False), + ) + def test_static_shape_inference(self, crop_to_aspect_ratio, batch): + channels = 3 + input_height = 8 + input_width = 8 + target_height = 4 + target_width = 6 layer = image_preprocessing.Resizing( - height=4, width=4, interpolation='nearest') - output_image = layer(input_image) - # pyformat: disable - expected_output = np.asarray([ - [0, 0, 1, 1], - [0, 0, 1, 1], - [2, 2, 3, 3], - [2, 2, 3, 3] - ]).astype(dtype) - # pyformat: enable - expected_output = np.reshape(expected_output, (1, 4, 4, 1)) - self.assertAllEqual(expected_output, output_image) - - @parameterized.named_parameters(('reshape_bilinear_10_by_4', { - 'interpolation': 'bilinear' - }, 10, 4)) - def test_reshaping(self, kwargs, expected_height, expected_width): - self._run_test(kwargs, expected_height, expected_width) - - def test_invalid_interpolation(self): - with self.assertRaises(NotImplementedError): - image_preprocessing.Resizing(5, 5, 'invalid_interpolation') - - def test_config_with_custom_name(self): - layer = image_preprocessing.Resizing(5, 5, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.Resizing.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_crop_to_aspect_ratio(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype('float32') - layer = image_preprocessing.Resizing(4, 2, crop_to_aspect_ratio=True) - output_image = layer(input_image) - expected_output = np.asarray([ - [1, 2], - [5, 6], - [9, 10], - [13, 14], - ]).astype('float32') - expected_output = np.reshape(expected_output, (1, 4, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_unbatched_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 16), (4, 4, 1)).astype('float32') - layer = image_preprocessing.Resizing(2, 2, interpolation='nearest') - output_image = layer(input_image) - expected_output = np.asarray([ - [5, 7], - [13, 15], - ]).astype('float32') - expected_output = np.reshape(expected_output, (2, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - @parameterized.named_parameters(('crop_to_aspect_ratio_false', False), - ('crop_to_aspect_ratio_true', True)) - def test_ragged_image(self, crop_to_aspect_ratio): - with test_utils.use_gpu(): - inputs = tf.ragged.constant([ - np.ones((8, 8, 1)), - np.ones((8, 4, 1)), - np.ones((4, 8, 1)), - np.ones((2, 2, 1)), - ], dtype='float32') - layer = image_preprocessing.Resizing( - 2, - 2, - interpolation='nearest', - crop_to_aspect_ratio=crop_to_aspect_ratio) - outputs = layer(inputs) - expected_output = [[[[1.], [1.]], [[1.], [1.]]], - [[[1.], [1.]], [[1.], [1.]]], - [[[1.], [1.]], [[1.], [1.]]], - [[[1.], [1.]], [[1.], [1.]]]] - self.assertIsInstance(outputs, tf.Tensor) - self.assertNotIsInstance(outputs, tf.RaggedTensor) - self.assertAllEqual(expected_output, outputs) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.Resizing(2, 2) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.Resizing(2, 2, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') - - @parameterized.named_parameters( - ('batch_crop_to_aspect_ratio', True, True), - ('batch_dont_crop_to_aspect_ratio', False, True), - ('single_sample_crop_to_aspect_ratio', True, False), - ('single_sample_dont_crop_to_aspect_ratio', False, False), - ) - def test_static_shape_inference(self, crop_to_aspect_ratio, batch): - channels = 3 - input_height = 8 - input_width = 8 - target_height = 4 - target_width = 6 - layer = image_preprocessing.Resizing( - target_height, target_width, crop_to_aspect_ratio=crop_to_aspect_ratio) - unit_test = self - - @tf.function - def tf_function(img): - unit_test.assertListEqual([input_height, input_width, channels], - img.shape.as_list()[-3:]) - img = layer(img) - unit_test.assertListEqual([target_height, target_width, channels], - img.shape.as_list()[-3:]) - return img - - with test_utils.use_gpu(): - if batch: - input_shape = (2, input_height, input_width, channels) - else: - input_shape = (input_height, input_width, channels) - img_data = np.random.random(size=input_shape).astype('float32') - tf_function(img_data) + target_height, + target_width, + crop_to_aspect_ratio=crop_to_aspect_ratio, + ) + unit_test = self + + @tf.function + def tf_function(img): + unit_test.assertListEqual( + [input_height, input_width, channels], img.shape.as_list()[-3:] + ) + img = layer(img) + unit_test.assertListEqual( + [target_height, target_width, channels], + img.shape.as_list()[-3:], + ) + return img + + with test_utils.use_gpu(): + if batch: + input_shape = (2, input_height, input_width, channels) + else: + input_shape = (input_height, input_width, channels) + img_data = np.random.random(size=input_shape).astype("float32") + tf_function(img_data) def get_numpy_center_crop(images, expected_height, expected_width): - orig_height = images.shape[1] - orig_width = images.shape[2] - height_start = int((orig_height - expected_height) / 2) - width_start = int((orig_width - expected_width) / 2) - height_end = height_start + expected_height - width_end = width_start + expected_width - return images[:, height_start:height_end, width_start:width_end, :] + orig_height = images.shape[1] + orig_width = images.shape[2] + height_start = int((orig_height - expected_height) / 2) + width_start = int((orig_width - expected_width) / 2) + height_end = height_start + expected_height + width_end = width_start + expected_width + return images[:, height_start:height_end, width_start:width_end, :] @test_combinations.run_all_keras_modes(always_skip_v1=True) class CenterCropTest(test_combinations.TestCase): - - def _run_test(self, expected_height, expected_width): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - kwargs = {'height': expected_height, 'width': expected_width} - input_images = np.random.random( - (num_samples, orig_height, orig_width, channels)).astype(np.float32) - expected_output = get_numpy_center_crop(input_images, expected_height, - expected_width) - with test_utils.use_gpu(): - test_utils.layer_test( - image_preprocessing.CenterCrop, - kwargs=kwargs, - input_shape=(num_samples, orig_height, orig_width, channels), - input_data=input_images, - expected_output=expected_output, - expected_output_shape=(None, expected_height, expected_width, - channels)) - - @parameterized.named_parameters(('center_crop_3_by_4', 3, 4), - ('center_crop_3_by_2', 3, 2)) - def test_center_crop_aligned(self, expected_height, expected_width): - self._run_test(expected_height, expected_width) - - @parameterized.named_parameters(('center_crop_4_by_5', 4, 5), - ('center_crop_4_by_3', 4, 3)) - def test_center_crop_mis_aligned(self, expected_height, expected_width): - self._run_test(expected_height, expected_width) - - @parameterized.named_parameters(('center_crop_4_by_6', 4, 6), - ('center_crop_3_by_2', 3, 2)) - def test_center_crop_half_mis_aligned(self, expected_height, expected_width): - self._run_test(expected_height, expected_width) - - def test_input_smaller_than_crop_box(self): - np.random.seed(1337) - height, width = 10, 8 - inp = np.random.random((12, 3, 3, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.CenterCrop(height, width) - actual_output = layer(inp) - # In this case, output should equal resizing with crop_to_aspect ratio. - resize_layer = image_preprocessing.Resizing( - height, width, crop_to_aspect_ratio=True) - expected_output = resize_layer(inp) - self.assertAllEqual(expected_output, actual_output) - - def test_config_with_custom_name(self): - layer = image_preprocessing.CenterCrop(5, 5, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.CenterCrop.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 16), (4, 4, 1)).astype('float32') - layer = image_preprocessing.CenterCrop(2, 2) - output_image = layer(input_image) - expected_output = np.asarray([ - [5, 6], - [9, 10], - ]).astype('float32') - expected_output = np.reshape(expected_output, (2, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.CenterCrop(2, 2) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.CenterCrop(2, 2, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + def _run_test(self, expected_height, expected_width): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + kwargs = {"height": expected_height, "width": expected_width} + input_images = np.random.random( + (num_samples, orig_height, orig_width, channels) + ).astype(np.float32) + expected_output = get_numpy_center_crop( + input_images, expected_height, expected_width + ) + with test_utils.use_gpu(): + test_utils.layer_test( + image_preprocessing.CenterCrop, + kwargs=kwargs, + input_shape=(num_samples, orig_height, orig_width, channels), + input_data=input_images, + expected_output=expected_output, + expected_output_shape=( + None, + expected_height, + expected_width, + channels, + ), + ) + + @parameterized.named_parameters( + ("center_crop_3_by_4", 3, 4), ("center_crop_3_by_2", 3, 2) + ) + def test_center_crop_aligned(self, expected_height, expected_width): + self._run_test(expected_height, expected_width) + + @parameterized.named_parameters( + ("center_crop_4_by_5", 4, 5), ("center_crop_4_by_3", 4, 3) + ) + def test_center_crop_mis_aligned(self, expected_height, expected_width): + self._run_test(expected_height, expected_width) + + @parameterized.named_parameters( + ("center_crop_4_by_6", 4, 6), ("center_crop_3_by_2", 3, 2) + ) + def test_center_crop_half_mis_aligned( + self, expected_height, expected_width + ): + self._run_test(expected_height, expected_width) + + def test_input_smaller_than_crop_box(self): + np.random.seed(1337) + height, width = 10, 8 + inp = np.random.random((12, 3, 3, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.CenterCrop(height, width) + actual_output = layer(inp) + # In this case, output should equal resizing + # with crop_to_aspect ratio. + resize_layer = image_preprocessing.Resizing( + height, width, crop_to_aspect_ratio=True + ) + expected_output = resize_layer(inp) + self.assertAllEqual(expected_output, actual_output) + + def test_config_with_custom_name(self): + layer = image_preprocessing.CenterCrop(5, 5, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.CenterCrop.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (4, 4, 1)).astype( + "float32" + ) + layer = image_preprocessing.CenterCrop(2, 2) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [5, 6], + [9, 10], + ] + ).astype("float32") + expected_output = np.reshape(expected_output, (2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.CenterCrop(2, 2) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.CenterCrop(2, 2, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) class RandomCropTest(test_combinations.TestCase): - - def _run_test(self, expected_height, expected_width): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - kwargs = {'height': expected_height, 'width': expected_width} - with test_utils.use_gpu(): - test_utils.layer_test( - image_preprocessing.RandomCrop, - kwargs=kwargs, - input_shape=(num_samples, orig_height, orig_width, channels), - expected_output_shape=(None, expected_height, expected_width, - channels)) - - def test_input_smaller_than_crop_box(self): - np.random.seed(1337) - height, width = 10, 8 - inp = np.random.random((12, 3, 3, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(height, width) - actual_output = layer(inp) - # In this case, output should equal resizing with crop_to_aspect ratio. - resize_layer = image_preprocessing.Resizing( - height, width, crop_to_aspect_ratio=True) - expected_output = resize_layer(inp) - self.assertAllEqual(expected_output, actual_output) - - def test_training_with_mock(self): - np.random.seed(1337) - height, width = 3, 4 - height_offset = np.random.randint(low=0, high=3) - width_offset = np.random.randint(low=0, high=5) - mock_offset = [height_offset, width_offset] - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(height, width) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_offset): - inp = np.random.random((12, 5, 8, 3)) - actual_output = layer(inp, training=True) - expected_output = inp[:, height_offset:(height_offset + height), - width_offset:(width_offset + width), :] - self.assertAllClose(expected_output, actual_output) - - @parameterized.named_parameters(('random_crop_4_by_6', 4, 6), - ('random_crop_3_by_2', 3, 2)) - def test_random_crop_output_shape(self, expected_height, expected_width): - self._run_test(expected_height, expected_width) - - def test_random_crop_full_height(self): - self._run_test(5, 2) - - def test_random_crop_full_width(self): - self._run_test(3, 8) - - def test_random_crop_full(self): - np.random.seed(1337) - height, width = 8, 16 - inp = np.random.random((12, 8, 16, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(height, width) - actual_output = layer(inp, training=False) - self.assertAllClose(inp, actual_output) - - def test_predicting_with_mock_longer_height(self): - np.random.seed(1337) - height, width = 3, 3 - inp = np.random.random((12, 10, 6, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(height, width) - actual_output = layer(inp, training=False) - resized_inp = tf.image.resize(inp, size=[5, 3]) - expected_output = resized_inp[:, 1:4, :, :] - self.assertAllClose(expected_output, actual_output) - - def test_predicting_with_mock_longer_width(self): - np.random.seed(1337) - height, width = 4, 6 - inp = np.random.random((12, 8, 16, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(height, width) - actual_output = layer(inp, training=False) - resized_inp = tf.image.resize(inp, size=[4, 8]) - expected_output = resized_inp[:, :, 1:7, :] - self.assertAllClose(expected_output, actual_output) - - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomCrop(5, 5, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomCrop.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - np.random.seed(1337) - inp = np.random.random((16, 16, 3)) - mock_offset = [2, 2] - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(8, 8) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, - 'random_uniform', - return_value=mock_offset): - actual_output = layer(inp, training=True) - self.assertAllClose(inp[2:10, 2:10, :], actual_output) - - def test_batched_input(self): - np.random.seed(1337) - inp = np.random.random((20, 16, 16, 3)) - mock_offset = [2, 2] - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(8, 8) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_offset): - actual_output = layer(inp, training=True) - self.assertAllClose(inp[:, 2:10, 2:10, :], actual_output) - - def test_augment_image(self): - np.random.seed(1337) - inp = np.random.random((16, 16, 3)) - mock_offset = [2, 2] - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(8, 8) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_offset): - actual_output = layer.augment_image( - inp, transformation=layer.get_random_transformation(image=inp)) - self.assertAllClose(inp[2:10, 2:10, :], actual_output) - - def test_training_false(self): - np.random.seed(1337) - height, width = 4, 6 - inp = np.random.random((12, 8, 16, 3)) - inp_dict = {'images': inp} - with test_utils.use_gpu(): - layer = image_preprocessing.RandomCrop(height, width) - # test wih tensor input - actual_output = layer(inp, training=False) - resized_inp = tf.image.resize(inp, size=[4, 8]) - expected_output = resized_inp[:, :, 1:7, :] - self.assertAllClose(expected_output, actual_output) - # test with dictionary input - actual_output = layer(inp_dict, training=False) - resized_inp = tf.image.resize(inp, size=[4, 8]) - expected_output = resized_inp[:, :, 1:7, :] - self.assertAllClose(expected_output, actual_output['images']) - - @test_utils.run_v2_only - def test_uint8_input(self): - inputs = keras.Input((128, 128, 3), batch_size=2, dtype=tf.uint8) - layer = image_preprocessing.RandomCrop(64, 64) - self.assertAllEqual(layer(inputs).dtype, 'float32') - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomCrop(2, 2) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomCrop(2, 2, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + def _run_test(self, expected_height, expected_width): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + kwargs = {"height": expected_height, "width": expected_width} + with test_utils.use_gpu(): + test_utils.layer_test( + image_preprocessing.RandomCrop, + kwargs=kwargs, + input_shape=(num_samples, orig_height, orig_width, channels), + expected_output_shape=( + None, + expected_height, + expected_width, + channels, + ), + ) + + def test_input_smaller_than_crop_box(self): + np.random.seed(1337) + height, width = 10, 8 + inp = np.random.random((12, 3, 3, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.RandomCrop(height, width) + actual_output = layer(inp) + # In this case, output should equal resizing + # with crop_to_aspect ratio. + resize_layer = image_preprocessing.Resizing( + height, width, crop_to_aspect_ratio=True + ) + expected_output = resize_layer(inp) + self.assertAllEqual(expected_output, actual_output) + + def test_training_with_mock(self): + np.random.seed(1337) + height, width = 3, 4 + height_offset = np.random.randint(low=0, high=3) + width_offset = np.random.randint(low=0, high=5) + mock_offset = [height_offset, width_offset] + with test_utils.use_gpu(): + layer = image_preprocessing.RandomCrop(height, width) + with tf.compat.v1.test.mock.patch.object( + layer._random_generator, + "random_uniform", + return_value=mock_offset, + ): + inp = np.random.random((12, 5, 8, 3)) + actual_output = layer(inp, training=True) + expected_output = inp[ + :, + height_offset : (height_offset + height), + width_offset : (width_offset + width), + :, + ] + self.assertAllClose(expected_output, actual_output) + + @parameterized.named_parameters( + ("random_crop_4_by_6", 4, 6), ("random_crop_3_by_2", 3, 2) + ) + def test_random_crop_output_shape(self, expected_height, expected_width): + self._run_test(expected_height, expected_width) + + def test_random_crop_full_height(self): + self._run_test(5, 2) + + def test_random_crop_full_width(self): + self._run_test(3, 8) + + def test_random_crop_full(self): + np.random.seed(1337) + height, width = 8, 16 + inp = np.random.random((12, 8, 16, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.RandomCrop(height, width) + actual_output = layer(inp, training=False) + self.assertAllClose(inp, actual_output) + + def test_predicting_with_mock_longer_height(self): + np.random.seed(1337) + height, width = 3, 3 + inp = np.random.random((12, 10, 6, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.RandomCrop(height, width) + actual_output = layer(inp, training=False) + resized_inp = tf.image.resize(inp, size=[5, 3]) + expected_output = resized_inp[:, 1:4, :, :] + self.assertAllClose(expected_output, actual_output) + + def test_predicting_with_mock_longer_width(self): + np.random.seed(1337) + height, width = 4, 6 + inp = np.random.random((12, 8, 16, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.RandomCrop(height, width) + actual_output = layer(inp, training=False) + resized_inp = tf.image.resize(inp, size=[4, 8]) + expected_output = resized_inp[:, :, 1:7, :] + self.assertAllClose(expected_output, actual_output) + + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomCrop(5, 5, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.RandomCrop.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + np.random.seed(1337) + inp = np.random.random((16, 16, 3)) + mock_offset = [2, 2] + with test_utils.use_gpu(): + layer = image_preprocessing.RandomCrop(8, 8) + with tf.compat.v1.test.mock.patch.object( + layer._random_generator, + "random_uniform", + return_value=mock_offset, + ): + actual_output = layer(inp, training=True) + self.assertAllClose(inp[2:10, 2:10, :], actual_output) + + @test_utils.run_v2_only + def test_uint8_input(self): + inputs = keras.Input((128, 128, 3), batch_size=2, dtype=tf.uint8) + layer = image_preprocessing.RandomCrop(64, 64) + self.assertAllEqual(layer(inputs).dtype, "float32") + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomCrop(2, 2) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomCrop(2, 2, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") class RescalingTest(test_combinations.TestCase): - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_rescaling_base(self): - kwargs = {'scale': 1. / 127.5, 'offset': -1.} - test_utils.layer_test( - image_preprocessing.Rescaling, - kwargs=kwargs, - input_shape=(2, 5, 6, 3), - expected_output_shape=(None, 5, 6, 3)) - - @test_utils.run_v2_only - def test_rescaling_correctness_float(self): - layer = image_preprocessing.Rescaling(scale=1. / 127.5, offset=-1.) - inputs = tf.random.uniform((2, 4, 5, 3)) - outputs = layer(inputs) - self.assertAllClose(outputs.numpy(), inputs.numpy() * (1. / 127.5) - 1) - - @test_utils.run_v2_only - def test_rescaling_correctness_int(self): - layer = image_preprocessing.Rescaling(scale=1. / 127.5, offset=-1) - inputs = tf.random.uniform((2, 4, 5, 3), 0, 100, dtype='int32') - outputs = layer(inputs) - self.assertEqual(outputs.dtype.name, 'float32') - self.assertAllClose(outputs.numpy(), inputs.numpy() * (1. / 127.5) - 1) - - def test_config_with_custom_name(self): - layer = image_preprocessing.Rescaling(0.5, name='rescaling') - config = layer.get_config() - layer_1 = image_preprocessing.Rescaling.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_unbatched_image(self): - layer = image_preprocessing.Rescaling(scale=1. / 127.5, offset=-1) - inputs = tf.random.uniform((4, 5, 3)) - outputs = layer(inputs) - self.assertAllClose(outputs.numpy(), inputs.numpy() * (1. / 127.5) - 1) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.Rescaling(0.5) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.Rescaling(0.5, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_rescaling_base(self): + kwargs = {"scale": 1.0 / 127.5, "offset": -1.0} + test_utils.layer_test( + image_preprocessing.Rescaling, + kwargs=kwargs, + input_shape=(2, 5, 6, 3), + expected_output_shape=(None, 5, 6, 3), + ) + + @test_utils.run_v2_only + def test_rescaling_correctness_float(self): + layer = image_preprocessing.Rescaling(scale=1.0 / 127.5, offset=-1.0) + inputs = tf.random.uniform((2, 4, 5, 3)) + outputs = layer(inputs) + self.assertAllClose(outputs.numpy(), inputs.numpy() * (1.0 / 127.5) - 1) + + @test_utils.run_v2_only + def test_rescaling_correctness_int(self): + layer = image_preprocessing.Rescaling(scale=1.0 / 127.5, offset=-1) + inputs = tf.random.uniform((2, 4, 5, 3), 0, 100, dtype="int32") + outputs = layer(inputs) + self.assertEqual(outputs.dtype.name, "float32") + self.assertAllClose(outputs.numpy(), inputs.numpy() * (1.0 / 127.5) - 1) + + def test_config_with_custom_name(self): + layer = image_preprocessing.Rescaling(0.5, name="rescaling") + config = layer.get_config() + layer_1 = image_preprocessing.Rescaling.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_unbatched_image(self): + layer = image_preprocessing.Rescaling(scale=1.0 / 127.5, offset=-1) + inputs = tf.random.uniform((4, 5, 3)) + outputs = layer(inputs) + self.assertAllClose(outputs.numpy(), inputs.numpy() * (1.0 / 127.5) - 1) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.Rescaling(0.5) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.Rescaling(0.5, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) class RandomFlipTest(test_combinations.TestCase): - - def _run_test(self, mode, expected_output=None, mock_random=None): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - if mock_random is None: - mock_random = [True for _ in range(num_samples)] - if mode == 'horizontal_and_vertical': - mock_random *= 2 - inp = np.random.random((num_samples, orig_height, orig_width, channels)) - if expected_output is None: - expected_output = inp - if mode == 'horizontal' or mode == 'horizontal_and_vertical': - expected_output = np.flip(expected_output, axis=2) - if mode == 'vertical' or mode == 'horizontal_and_vertical': - expected_output = np.flip(expected_output, axis=1) - with tf.compat.v1.test.mock.patch.object( - np.random, - 'choice', - side_effect=mock_random, - ): - with test_utils.use_gpu(): - layer = image_preprocessing.RandomFlip(mode) - actual_output = layer(inp, training=True) - self.assertAllClose(expected_output, actual_output) - - @parameterized.named_parameters( - ('random_flip_horizontal', 'horizontal'), - ('random_flip_vertical', 'vertical'), - ('random_flip_both', 'horizontal_and_vertical')) - def test_random_flip(self, mode): - self._run_test(mode) - - def test_random_flip_horizontal_half(self): - np.random.seed(1337) - mock_random = [True, False] - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images.copy() - expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=1) - self._run_test('horizontal', expected_output, mock_random) - - def test_random_flip_vertical_half(self): - np.random.seed(1337) - mock_random = [True, False] - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images.copy() - expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=0) - self._run_test('vertical', expected_output, mock_random) - - def test_random_flip_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomFlip() - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - def test_random_flip_default(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = np.flip(np.flip(input_images, axis=1), axis=2) - mock_random = [True, True, True, True] - with tf.compat.v1.test.mock.patch.object( - np.random, - 'choice', - side_effect=mock_random, - ): - with self.cached_session(): + def _run_test(self, mode, expected_output=None, mock_random=None): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + if mock_random is None: + mock_random = [1 for _ in range(num_samples)] + mock_random = np.reshape(mock_random, [2, 1, 1, 1]) + inp = np.random.random((num_samples, orig_height, orig_width, channels)) + if expected_output is None: + expected_output = inp + if mode == "horizontal" or mode == "horizontal_and_vertical": + expected_output = np.flip(expected_output, axis=2) + if mode == "vertical" or mode == "horizontal_and_vertical": + expected_output = np.flip(expected_output, axis=1) + with tf.compat.v1.test.mock.patch.object( + stateless_random_ops, + "stateless_random_uniform", + return_value=mock_random, + ): + with test_utils.use_gpu(): + layer = image_preprocessing.RandomFlip(mode) + actual_output = layer(inp, training=True) + self.assertAllClose(expected_output, actual_output) + + @parameterized.named_parameters( + ("random_flip_horizontal", "horizontal"), + ("random_flip_vertical", "vertical"), + ("random_flip_both", "horizontal_and_vertical"), + ) + def test_random_flip(self, mode): + self._run_test(mode) + + def test_random_flip_horizontal_half(self): + np.random.seed(1337) + mock_random = [1, 0] + mock_random = np.reshape(mock_random, [2, 1, 1, 1]) + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images.copy() + expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=1) + self._run_test("horizontal", expected_output, mock_random) + + def test_random_flip_vertical_half(self): + np.random.seed(1337) + mock_random = [1, 0] + mock_random = np.reshape(mock_random, [2, 1, 1, 1]) + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images.copy() + expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=0) + self._run_test("vertical", expected_output, mock_random) + + def test_random_flip_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomFlip() + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + def test_random_flip_default(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = np.flip(np.flip(input_images, axis=1), axis=2) + mock_random = [1, 1] + mock_random = np.reshape(mock_random, [2, 1, 1, 1]) + with tf.compat.v1.test.mock.patch.object( + stateless_random_ops, + "stateless_random_uniform", + return_value=mock_random, + ): + with self.cached_session(): + layer = image_preprocessing.RandomFlip() + actual_output = layer(input_images, training=True) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomFlip(name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.RandomFlip.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_random_flip_unbatched_image(self): + input_image = np.random.random((4, 4, 1)).astype(np.float32) + expected_output = np.flip(input_image, axis=0) + # mock_random = np.reshape([0.], [1, 1, 1]) + with tf.compat.v1.test.mock.patch.object( + stateless_random_ops, + "stateless_random_uniform", + return_value=0.0, + ): + with self.cached_session(): + layer = image_preprocessing.RandomFlip("vertical") + actual_output = layer(input_image, training=True) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") layer = image_preprocessing.RandomFlip() - actual_output = layer(input_images, training=True) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomFlip(name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomFlip.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_random_flip_unbatched_image(self): - input_image = np.random.random((4, 4, 1)).astype(np.float32) - expected_output = np.flip(input_image, axis=0) - mock_random = [True, True, True, True] - with tf.compat.v1.test.mock.patch.object( - np.random, - 'choice', - side_effect=mock_random, - ): - with self.cached_session(): - layer = image_preprocessing.RandomFlip('vertical') - actual_output = layer(input_image, training=True) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomFlip() - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomFlip(dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') - - @test_utils.run_v2_only - def test_augment_bbox_horizontal(self): - image = tf.zeros([1, 20, 20, 3]) - bboxes = np.array([[0, 0, 10, 10], [4, 4, 12, 12]], dtype='int32') - layer = image_preprocessing.RandomFlip() - output = layer.augment_bounding_boxes( - image, - bboxes, - transformation={ - 'flip_horizontal': True, - 'flip_vertical': False - }) - expected_output = [[10, 0, 20, 10], [8, 4, 16, 12]] - self.assertAllClose(expected_output, output) - - @test_utils.run_v2_only - def test_augment_bbox_vertical(self): - image = tf.zeros([1, 20, 20, 3]) - bboxes = np.array([[0, 0, 10, 10], [4, 4, 12, 12]], dtype='int32') - layer = image_preprocessing.RandomFlip() - output = layer.augment_bounding_boxes( - image, - bboxes, - transformation={ - 'flip_horizontal': False, - 'flip_vertical': True - }) - expected_output = [[0, 10, 10, 20], [4, 8, 12, 16]] - self.assertAllClose(expected_output, output) - - @test_utils.run_v2_only - def test_augment_bbox_both(self): - image = tf.zeros([1, 20, 20, 3]) - bboxes = np.array([[0, 0, 10, 10], [4, 4, 12, 12]], dtype='int32') - layer = image_preprocessing.RandomFlip() - output = layer.augment_bounding_boxes( - image, - bboxes, - transformation={ - 'flip_horizontal': True, - 'flip_vertical': True - }) - expected_output = [[10, 10, 20, 20], [8, 8, 16, 16]] - self.assertAllClose(expected_output, output) - - @test_utils.run_v2_only - def test_augment_bbox_batched_input(self): - image = tf.zeros([20, 20, 3]) - bboxes = np.array( - [[[0, 0, 10, 10], [4, 4, 12, 12]], [[0, 0, 10, 10], [4, 4, 12, 12]]], - dtype='int32') - input = {'images': [image, image], 'bounding_boxes': bboxes} - mock_random = [True, True, True, True] - with tf.compat.v1.test.mock.patch.object( - np.random, - 'choice', - side_effect=mock_random, - ): - layer = image_preprocessing.RandomFlip() - output = layer(input, training=True) - expected_output = [[[10, 10, 20, 20], [8, 8, 16, 16]], - [[10, 10, 20, 20], [8, 8, 16, 16]]] - self.assertAllClose(expected_output, output['bounding_boxes']) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomFlip(dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") + @test_combinations.run_all_keras_modes(always_skip_v1=True) class RandomContrastTest(test_combinations.TestCase): - - def _run_test(self, lower, upper, expected_output=None, mock_random=None): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - if mock_random is None: - mock_random = 0.2 - inp = np.random.random((num_samples, orig_height, orig_width, channels)) - if expected_output is None: - # reduce mean on height. - inp_mean = np.mean(inp, axis=1, keepdims=True) - # reduce mean on width. - inp_mean = np.mean(inp_mean, axis=2, keepdims=True) - expected_output = (inp - inp_mean) * mock_random + inp_mean - with tf.compat.v1.test.mock.patch.object( - stateless_random_ops, - 'stateless_random_uniform', - return_value=mock_random, - ): - with test_utils.use_gpu(): - layer = image_preprocessing.RandomContrast((lower, upper)) - actual_output = layer(inp, training=True) - self.assertAllClose(expected_output, actual_output) - - @parameterized.named_parameters(('random_contrast_2_by_5', 0.2, 0.5), - ('random_contrast_2_by_13', 0.2, 1.3), - ('random_contrast_5_by_2', 0.5, 0.2), - ('random_contrast_10_by_10', 1.0, 1.0)) - def test_random_contrast(self, lower, upper): - self._run_test(lower, upper) - - @parameterized.named_parameters(('random_contrast_amplitude_2', 0.2), - ('random_contrast_amplitude_5', 0.5)) - def test_random_contrast_amplitude(self, amplitude): - input_images = np.random.random((2, 5, 8, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.RandomContrast(amplitude) - layer(input_images) - - def test_random_contrast_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomContrast((0.1, 0.2)) - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - def test_random_contrast_int_dtype(self): - input_images = np.random.randint(low=0, high=255, size=(2, 5, 8, 3)) - with test_utils.use_gpu(): - layer = image_preprocessing.RandomContrast((0.1, 0.2)) - layer(input_images) - - def test_random_contrast_invalid_bounds(self): - with self.assertRaises(ValueError): - image_preprocessing.RandomContrast((-0.1, .5)) - - with self.assertRaises(ValueError): - image_preprocessing.RandomContrast((1.1, .5)) - - with self.assertRaises(ValueError): - image_preprocessing.RandomContrast((0.1, -0.2)) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomContrast((.5, .6), name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomContrast.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_output_value_clip(self): - input_images = np.random.random((5, 8, 3)).astype(np.float32) * 255.0 - # Give a factor range [1.0, 11.0] so that it will produce large contrast. - layer = image_preprocessing.RandomContrast((0.0, 10.0)) - output = layer(input_images) - self.assertLessEqual(tf.reduce_max(output), 255.0) - self.assertGreaterEqual(tf.reduce_min(output), 0.0) - - def test_unbatched_image(self): - np.random.seed(1337) - mock_random = 0.2 - inp = np.random.random((4, 4, 1)) - inp_mean = np.mean(inp, axis=0, keepdims=True) - inp_mean = np.mean(inp_mean, axis=1, keepdims=True) - expected_output = (inp - inp_mean) * mock_random + inp_mean - with tf.compat.v1.test.mock.patch.object( - stateless_random_ops, - 'stateless_random_uniform', - return_value=mock_random, - ): - with test_utils.use_gpu(): - layer = image_preprocessing.RandomContrast((0.2, 0.5)) - actual_output = layer(inp, training=True) - self.assertAllClose(expected_output, actual_output) - - def test_augment_image(self): - np.random.seed(1337) - mock_random = 0.2 - inp = np.random.random((4, 4, 1)) - inp_mean = np.mean(inp, axis=0, keepdims=True) - inp_mean = np.mean(inp_mean, axis=1, keepdims=True) - expected_output = (inp - inp_mean) * mock_random + inp_mean - with tf.compat.v1.test.mock.patch.object( - stateless_random_ops, - 'stateless_random_uniform', - return_value=mock_random, - ): - with test_utils.use_gpu(): - layer = image_preprocessing.RandomContrast((0.2, 0.5)) - actual_output = layer.augment_image( - inp, transformation=layer.get_random_transformation()) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomContrast((.5, .6)) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomContrast((.5, .6), dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + def _run_test(self, lower, upper, expected_output=None, mock_random=None): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + if mock_random is None: + mock_random = 0.2 + inp = np.random.random((num_samples, orig_height, orig_width, channels)) + if expected_output is None: + # reduce mean on height. + inp_mean = np.mean(inp, axis=1, keepdims=True) + # reduce mean on width. + inp_mean = np.mean(inp_mean, axis=2, keepdims=True) + expected_output = (inp - inp_mean) * mock_random + inp_mean + with tf.compat.v1.test.mock.patch.object( + stateless_random_ops, + "stateless_random_uniform", + return_value=mock_random, + ): + with test_utils.use_gpu(): + layer = image_preprocessing.RandomContrast((lower, upper)) + actual_output = layer(inp, training=True) + self.assertAllClose(expected_output, actual_output) + + @parameterized.named_parameters( + ("random_contrast_2_by_5", 0.2, 0.5), + ("random_contrast_2_by_13", 0.2, 1.3), + ("random_contrast_5_by_2", 0.5, 0.2), + ("random_contrast_10_by_10", 1.0, 1.0), + ) + def test_random_contrast(self, lower, upper): + self._run_test(lower, upper) + + @parameterized.named_parameters( + ("random_contrast_amplitude_2", 0.2), + ("random_contrast_amplitude_5", 0.5), + ) + def test_random_contrast_amplitude(self, amplitude): + input_images = np.random.random((2, 5, 8, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.RandomContrast(amplitude) + layer(input_images) + + def test_random_contrast_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomContrast((0.1, 0.2)) + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + def test_random_contrast_int_dtype(self): + input_images = np.random.randint(low=0, high=255, size=(2, 5, 8, 3)) + with test_utils.use_gpu(): + layer = image_preprocessing.RandomContrast((0.1, 0.2)) + layer(input_images) + + def test_random_contrast_invalid_bounds(self): + with self.assertRaises(ValueError): + image_preprocessing.RandomContrast((-0.1, 0.5)) + + with self.assertRaises(ValueError): + image_preprocessing.RandomContrast((1.1, 0.5)) + + with self.assertRaises(ValueError): + image_preprocessing.RandomContrast((0.1, -0.2)) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomContrast( + (0.5, 0.6), name="image_preproc" + ) + config = layer.get_config() + layer_1 = image_preprocessing.RandomContrast.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_output_value_clip(self): + input_images = np.random.random((5, 8, 3)).astype(np.float32) * 255.0 + # Give a factor range [1.0, 11.0] so that + # it will produce large contrast. + layer = image_preprocessing.RandomContrast((0.0, 10.0)) + output = layer(input_images) + self.assertLessEqual(tf.reduce_max(output), 255.0) + self.assertGreaterEqual(tf.reduce_min(output), 0.0) + + def test_unbatched_image(self): + np.random.seed(1337) + mock_random = 0.2 + inp = np.random.random((4, 4, 1)) + inp_mean = np.mean(inp, axis=0, keepdims=True) + inp_mean = np.mean(inp_mean, axis=1, keepdims=True) + expected_output = (inp - inp_mean) * mock_random + inp_mean + with tf.compat.v1.test.mock.patch.object( + stateless_random_ops, + "stateless_random_uniform", + return_value=mock_random, + ): + with test_utils.use_gpu(): + layer = image_preprocessing.RandomContrast((0.2, 0.5)) + actual_output = layer(inp, training=True) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomContrast((0.5, 0.6)) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomContrast((0.5, 0.6), dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) class RandomBrightnessTest(test_combinations.TestCase): + def test_factor_input_validation(self): + with self.assertRaisesRegex(ValueError, r"in the range \[-1.0, 1.0\]"): + image_preprocessing.RandomBrightness(2.0) + + with self.assertRaisesRegex(ValueError, "list of two numbers"): + image_preprocessing.RandomBrightness([1.0]) + + with self.assertRaisesRegex(ValueError, "should be a number"): + image_preprocessing.RandomBrightness("one") + + def test_factor_normalize(self): + layer = image_preprocessing.RandomBrightness(1.0) + self.assertEqual(layer._factor, [-1.0, 1.0]) + + layer = image_preprocessing.RandomBrightness((0.5, 0.3)) + self.assertEqual(layer._factor, [0.3, 0.5]) + + layer = image_preprocessing.RandomBrightness(-0.2) + self.assertEqual(layer._factor, [-0.2, 0.2]) + + @test_utils.run_v2_only + def test_output_value_range(self): + # Always scale up to 255 + layer = image_preprocessing.RandomBrightness([1.0, 1.0]) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs) + output_min = tf.math.reduce_min(output) + output_max = tf.math.reduce_max(output) + self.assertEqual(output_min, 255) + self.assertEqual(output_max, 255) + + # Always scale down to 0 + layer = image_preprocessing.RandomBrightness([-1.0, -1.0]) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs) + output_min = tf.math.reduce_min(output) + output_max = tf.math.reduce_max(output) + self.assertEqual(output_min, 0) + self.assertEqual(output_max, 0) + + def test_output(self): + # Always scale up, but randomly between 0 ~ 255 + layer = image_preprocessing.RandomBrightness([0, 1.0]) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs) + diff = output - inputs + self.assertGreaterEqual(tf.math.reduce_min(diff), 0) + self.assertGreater(tf.math.reduce_mean(diff), 0) + + # Always scale down, but randomly between 0 ~ 255 + layer = image_preprocessing.RandomBrightness([-1.0, 0.0]) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs) + diff = output - inputs + self.assertLessEqual(tf.math.reduce_max(diff), 0) + self.assertLess(tf.math.reduce_mean(diff), 0) + + @test_utils.run_v2_only + def test_scale_output(self): + layer = image_preprocessing.RandomBrightness([0, 1.0], seed=1337) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs) + + # Create a new layer with same seed but different value range + layer2 = image_preprocessing.RandomBrightness( + [0, 1.0], value_range=[0, 1], seed=1337 + ) + inputs2 = inputs / 255.0 + output2 = layer2(inputs2) + # Make sure the outputs are the same, but just scaled with 255 + self.assertAllClose(output, output2 * 255.0) + + def test_different_adjustment_within_batch(self): + layer = image_preprocessing.RandomBrightness([0.2, 0.3]) + inputs = np.zeros(shape=(2, 10, 10, 3)) # 2 images with all zeros + output = layer(inputs) + diff = output - inputs + # Make sure two images gets the different adjustment + self.assertNotAllClose(diff[0], diff[1]) + # Make sure all the pixel are the same with the same image + image1 = output[0] + # The reduced mean pixel value among width and height are the same as + # any of the pixel in the image. + self.assertAllClose( + tf.reduce_mean(image1), image1[0, 0, 0], rtol=1e-5, atol=1e-5 + ) + + def test_inference(self): + layer = image_preprocessing.RandomBrightness([0, 1.0]) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs, training=False) + self.assertAllClose(inputs, output) + + @test_utils.run_v2_only + def test_dtype(self): + layer = image_preprocessing.RandomBrightness([0, 1.0]) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output = layer(inputs) + self.assertEqual(output.dtype, tf.float32) + + layer = image_preprocessing.RandomBrightness([0, 1.0], dtype="uint8") + output = layer(inputs) + self.assertEqual(output.dtype, tf.uint8) + + def test_seed(self): + layer = image_preprocessing.RandomBrightness([0, 1.0], seed=1337) + inputs = np.random.randint(0, 255, size=(224, 224, 3)) + output_1 = layer(inputs) + + layer2 = image_preprocessing.RandomBrightness([0, 1.0], seed=1337) + output_2 = layer2(inputs) + + self.assertAllClose(output_1, output_2) + + def test_config(self): + layer = image_preprocessing.RandomBrightness( + [0, 1.0], value_range=[0.0, 1.0], seed=1337 + ) + config = layer.get_config() + self.assertEqual(config["factor"], [0.0, 1.0]) + self.assertEqual(config["value_range"], [0.0, 1.0]) + self.assertEqual(config["seed"], 1337) + + reconstructed_layer = image_preprocessing.RandomBrightness.from_config( + config + ) + self.assertEqual(reconstructed_layer._factor, layer._factor) + self.assertEqual(reconstructed_layer._value_range, layer._value_range) + self.assertEqual(reconstructed_layer._seed, layer._seed) + - def test_factor_input_validation(self): - with self.assertRaisesRegex(ValueError, r'in the range \[-1.0, 1.0\]'): - image_preprocessing.RandomBrightness(2.0) - - with self.assertRaisesRegex(ValueError, 'list of two numbers'): - image_preprocessing.RandomBrightness([1.0]) - - with self.assertRaisesRegex(ValueError, 'should be a number'): - image_preprocessing.RandomBrightness('one') - - def test_factor_normalize(self): - layer = image_preprocessing.RandomBrightness(1.0) - self.assertEqual(layer._factor, [-1.0, 1.0]) - - layer = image_preprocessing.RandomBrightness((0.5, 0.3)) - self.assertEqual(layer._factor, [0.3, 0.5]) - - layer = image_preprocessing.RandomBrightness(-0.2) - self.assertEqual(layer._factor, [-0.2, 0.2]) - - @test_utils.run_v2_only - def test_output_value_range(self): - # Always scale up to 255 - layer = image_preprocessing.RandomBrightness([1.0, 1.0]) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs) - output_min = tf.math.reduce_min(output) - output_max = tf.math.reduce_max(output) - self.assertEqual(output_min, 255) - self.assertEqual(output_max, 255) - - # Always scale down to 0 - layer = image_preprocessing.RandomBrightness([-1.0, -1.0]) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs) - output_min = tf.math.reduce_min(output) - output_max = tf.math.reduce_max(output) - self.assertEqual(output_min, 0) - self.assertEqual(output_max, 0) - - def test_output(self): - # Always scale up, but randomly between 0 ~ 255 - layer = image_preprocessing.RandomBrightness([0, 1.0]) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs) - diff = output - inputs - self.assertGreaterEqual(tf.math.reduce_min(diff), 0) - self.assertGreater(tf.math.reduce_mean(diff), 0) - - # Always scale down, but randomly between 0 ~ 255 - layer = image_preprocessing.RandomBrightness([-1.0, 0.0]) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs) - diff = output - inputs - self.assertLessEqual(tf.math.reduce_max(diff), 0) - self.assertLess(tf.math.reduce_mean(diff), 0) - - def test_augment_image(self): - # Always scale up, but randomly between 0 ~ 255 - layer = image_preprocessing.RandomBrightness([0, 1.0]) - image = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer.augment_image( - image, transformation=layer.get_random_transformation()) - diff = output - image - self.assertGreaterEqual(tf.math.reduce_min(diff), 0) - self.assertGreater(tf.math.reduce_mean(diff), 0) - - # Always scale down, but randomly between 0 ~ 255 - layer = image_preprocessing.RandomBrightness([-1.0, 0.0]) - image = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer.augment_image( - image, transformation=layer.get_random_transformation()) - diff = output - image - self.assertLessEqual(tf.math.reduce_max(diff), 0) - self.assertLess(tf.math.reduce_mean(diff), 0) - - @test_utils.run_v2_only - def test_scale_output(self): - layer = image_preprocessing.RandomBrightness([0, 1.0], seed=1337) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs) - - # Create a new layer with same seed but different value range - layer2 = image_preprocessing.RandomBrightness( - [0, 1.0], value_range=[0, 1], seed=1337) - inputs2 = inputs / 255.0 - output2 = layer2(inputs2) - # Make sure the outputs are the same, but just scaled with 255 - self.assertAllClose(output, output2 * 255.0) - - def test_different_adjustment_within_batch(self): - layer = image_preprocessing.RandomBrightness([0.2, 0.3]) - inputs = np.zeros(shape=(2, 10, 10, 3)) # 2 images with all zeros - output = layer(inputs) - diff = output - inputs - # Make sure two images gets the different adjustment - self.assertNotAllClose(diff[0], diff[1]) - # Make sure all the pixel are the same with the same image - image1 = output[0] - # The reduced mean pixel value among width and height are the same as - # any of the pixel in the image. - self.assertAllClose( - tf.reduce_mean(image1), image1[0, 0, 0], rtol=1e-5, atol=1e-5) - - def test_inference(self): - layer = image_preprocessing.RandomBrightness([0, 1.0]) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs, training=False) - self.assertAllClose(inputs, output) - - @test_utils.run_v2_only - def test_dtype(self): - layer = image_preprocessing.RandomBrightness([0, 1.0]) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output = layer(inputs) - self.assertEqual(output.dtype, tf.float32) - - layer = image_preprocessing.RandomBrightness([0, 1.0], dtype='uint8') - output = layer(inputs) - self.assertEqual(output.dtype, tf.uint8) - - def test_seed(self): - layer = image_preprocessing.RandomBrightness([0, 1.0], seed=1337) - inputs = np.random.randint(0, 255, size=(224, 224, 3)) - output_1 = layer(inputs) - - layer2 = image_preprocessing.RandomBrightness([0, 1.0], seed=1337) - output_2 = layer2(inputs) - - self.assertAllClose(output_1, output_2) - - def test_config(self): - layer = image_preprocessing.RandomBrightness( - [0, 1.0], value_range=[0.0, 1.0], seed=1337) - config = layer.get_config() - self.assertEqual(config['factor'], [0.0, 1.0]) - self.assertEqual(config['value_range'], [0.0, 1.0]) - self.assertEqual(config['seed'], 1337) - - reconstructed_layer = image_preprocessing.RandomBrightness.from_config( - config) - self.assertEqual(reconstructed_layer._factor, layer._factor) - self.assertEqual(reconstructed_layer._value_range, layer._value_range) - self.assertEqual(reconstructed_layer._seed, layer._seed) - - -@test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) class RandomTranslationTest(test_combinations.TestCase): - - def _run_test(self, height_factor, width_factor): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - kwargs = {'height_factor': height_factor, 'width_factor': width_factor} - with test_utils.use_gpu(): - test_utils.layer_test( - image_preprocessing.RandomTranslation, - kwargs=kwargs, - input_shape=(num_samples, orig_height, orig_width, channels), - expected_output_shape=(None, orig_height, orig_width, channels)) - - @parameterized.named_parameters( - ('random_translate_4_by_6', .4, .6), ('random_translate_3_by_2', .3, .2), - ('random_translate_tuple_factor', (-.5, .4), (.2, .3))) - def test_random_translation(self, height_factor, width_factor): - self._run_test(height_factor, width_factor) - - def test_random_translation_up_numeric_reflect(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) - # Shifting by -.2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=(-.2, -.2), width_factor=0.) - output_image = layer(input_image) - expected_output = np.asarray([ - [5, 6, 7, 8, 9], - [10, 11, 12, 13, 14], - [15, 16, 17, 18, 19], - [20, 21, 22, 23, 24], - [20, 21, 22, 23, 24], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_up_numeric_constant(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) - # Shifting by -.2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=(-.2, -.2), width_factor=0., fill_mode='constant') - output_image = layer(input_image) - expected_output = np.asarray([ - [5, 6, 7, 8, 9], - [10, 11, 12, 13, 14], - [15, 16, 17, 18, 19], - [20, 21, 22, 23, 24], - [0, 0, 0, 0, 0], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_down_numeric_reflect(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) - # Shifting by .2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=(.2, .2), width_factor=0.) - output_image = layer(input_image) - expected_output = np.asarray([ - [0, 1, 2, 3, 4], - [0, 1, 2, 3, 4], - [5, 6, 7, 8, 9], - [10, 11, 12, 13, 14], - [15, 16, 17, 18, 19], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_asymmetric_size_numeric_reflect(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype) - # Shifting by .5 * 8 = 1 pixel. + def _run_test(self, height_factor, width_factor): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + kwargs = {"height_factor": height_factor, "width_factor": width_factor} + with test_utils.use_gpu(): + test_utils.layer_test( + image_preprocessing.RandomTranslation, + kwargs=kwargs, + input_shape=(num_samples, orig_height, orig_width, channels), + expected_output_shape=(None, orig_height, orig_width, channels), + ) + + @parameterized.named_parameters( + ("random_translate_4_by_6", 0.4, 0.6), + ("random_translate_3_by_2", 0.3, 0.2), + ("random_translate_tuple_factor", (-0.5, 0.4), (0.2, 0.3)), + ) + def test_random_translation(self, height_factor, width_factor): + self._run_test(height_factor, width_factor) + + def test_random_translation_up_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(-0.2, -0.2), width_factor=0.0 + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + [20, 21, 22, 23, 24], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_up_numeric_constant(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(-0.2, -0.2), + width_factor=0.0, + fill_mode="constant", + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + [0, 0, 0, 0, 0], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_down_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) + # Shifting by .2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(0.2, 0.2), width_factor=0.0 + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_asymmetric_size_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype( + dtype + ) + # Shifting by .5 * 8 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(0.5, 0.5), width_factor=0.0 + ) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray( + [ + [6, 7], + [4, 5], + [2, 3], + [0, 1], + [0, 1], + [2, 3], + [4, 5], + [6, 7], + ] + ).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 8, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_down_numeric_constant(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(0.2, 0.2), + width_factor=0.0, + fill_mode="constant", + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [0, 0, 0, 0, 0], + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_left_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) + # Shifting by .2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=0.0, width_factor=(-0.2, -0.2) + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [1, 2, 3, 4, 4], + [6, 7, 8, 9, 9], + [11, 12, 13, 14, 14], + [16, 17, 18, 19, 19], + [21, 22, 23, 24, 24], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_left_numeric_constant(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=0.0, + width_factor=(-0.2, -0.2), + fill_mode="constant", + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [1, 2, 3, 4, 0], + [6, 7, 8, 9, 0], + [11, 12, 13, 14, 0], + [16, 17, 18, 19, 0], + [21, 22, 23, 24, 0], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomTranslation(0.5, 0.5) + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): layer = image_preprocessing.RandomTranslation( - height_factor=(.5, .5), width_factor=0.) - output_image = layer(input_image) - # pyformat: disable - expected_output = np.asarray([ - [6, 7], - [4, 5], - [2, 3], - [0, 1], - [0, 1], - [2, 3], - [4, 5], - [6, 7], - ]).astype(dtype) - # pyformat: enable - expected_output = np.reshape(expected_output, (1, 8, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_down_numeric_constant(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) - # Shifting by -.2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=(.2, .2), width_factor=0., fill_mode='constant') - output_image = layer(input_image) - expected_output = np.asarray([ - [0, 0, 0, 0, 0], - [0, 1, 2, 3, 4], - [5, 6, 7, 8, 9], - [10, 11, 12, 13, 14], - [15, 16, 17, 18, 19], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_left_numeric_reflect(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) - # Shifting by .2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=0., width_factor=(-.2, -.2)) - output_image = layer(input_image) - expected_output = np.asarray([ - [1, 2, 3, 4, 4], - [6, 7, 8, 9, 9], - [11, 12, 13, 14, 14], - [16, 17, 18, 19, 19], - [21, 22, 23, 24, 24], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_left_numeric_constant(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) - # Shifting by -.2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=0., width_factor=(-.2, -.2), fill_mode='constant') - output_image = layer(input_image) - expected_output = np.asarray([ - [1, 2, 3, 4, 0], - [6, 7, 8, 9, 0], - [11, 12, 13, 14, 0], - [16, 17, 18, 19, 0], - [21, 22, 23, 24, 0], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_translation_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomTranslation(.5, .5) - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomTranslation(.5, .6, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomTranslation.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(np.int64) - # Shifting by -.2 * 5 = 1 pixel. - layer = image_preprocessing.RandomTranslation( - height_factor=(-.2, -.2), width_factor=0.) - output_image = layer(input_image) - expected_output = np.asarray([ - [5, 6, 7, 8, 9], - [10, 11, 12, 13, 14], - [15, 16, 17, 18, 19], - [20, 21, 22, 23, 24], - [20, 21, 22, 23, 24], - ]).astype(np.int64) - expected_output = np.reshape(expected_output, (5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomTranslation(.5, .6) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomTranslation(.5, .6, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + 0.5, 0.6, name="image_preproc" + ) + config = layer.get_config() + layer_1 = image_preprocessing.RandomTranslation.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype( + np.int64 + ) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(-0.2, -0.2), width_factor=0.0 + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + [20, 21, 22, 23, 24], + ] + ).astype(np.int64) + expected_output = np.reshape(expected_output, (5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomTranslation(0.5, 0.6) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomTranslation(0.5, 0.6, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) class RandomTransformTest(test_combinations.TestCase): - - def _run_random_transform_with_mock(self, - transform_matrix, - expected_output, - mode, - fill_value=0.0, - interpolation='bilinear'): - inp = np.arange(15).reshape((1, 5, 3, 1)).astype(np.float32) - with self.cached_session(): - output = image_preprocessing.transform( - inp, - transform_matrix, - fill_mode=mode, - fill_value=fill_value, - interpolation=interpolation) - self.assertAllClose(expected_output, output) - - def test_random_translation_reflect(self): - # reflected output is (dcba|abcd|dcba) - - # Test down shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 1., 2.], - [0., 1., 2.], - [3., 4., 5.], - [6., 7., 8], - [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'reflect') - - # Test up shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[3., 4., 5.], - [6., 7., 8], - [9., 10., 11.], - [12., 13., 14.], - [12., 13., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'reflect') - - # Test left shift by 1. - # reflected output is (dcba|abcd|dcba) - # pyformat: disable - expected_output = np.asarray( - [[1., 2., 2.], - [4., 5., 5.], - [7., 8., 8.], - [10., 11., 11.], - [13., 14., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'reflect') - - # Test right shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 0., 1.], - [3., 3., 4], - [6., 6., 7.], - [9., 9., 10.], - [12., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'reflect') - - def test_random_translation_wrap(self): - # warpped output is (abcd|abcd|abcd) - - # Test down shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[12., 13., 14.], - [0., 1., 2.], - [3., 4., 5.], - [6., 7., 8], - [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'wrap') - - # Test up shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[3., 4., 5.], - [6., 7., 8], - [9., 10., 11.], - [12., 13., 14.], - [0., 1., 2.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'wrap') - - # Test left shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 2., 0.], - [4., 5., 3.], - [7., 8., 6.], - [10., 11., 9.], - [13., 14., 12.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'wrap') - - # Test right shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[2., 0., 1.], - [5., 3., 4], - [8., 6., 7.], - [11., 9., 10.], - [14., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'wrap') - - def test_random_translation_nearest(self): - # nearest output is (aaaa|abcd|dddd) - - # Test down shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 1., 2.], - [0., 1., 2.], - [3., 4., 5.], - [6., 7., 8], - [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'nearest') - - # Test up shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[3., 4., 5.], - [6., 7., 8], - [9., 10., 11.], - [12., 13., 14.], - [12., 13., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'nearest') - - # Test left shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 2., 2.], - [4., 5., 5.], - [7., 8., 8.], - [10., 11., 11.], - [13., 14., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'nearest') - - # Test right shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 0., 1.], - [3., 3., 4], - [6., 6., 7.], - [9., 9., 10.], - [12., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'nearest') - - def test_random_translation_constant_0(self): - # constant output is (0000|abcd|0000) - - # Test down shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 0., 0.], - [0., 1., 2.], - [3., 4., 5.], - [6., 7., 8], - [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'constant') - - # Test up shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[3., 4., 5.], - [6., 7., 8], - [9., 10., 11.], - [12., 13., 14.], - [0., 0., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'constant') - - # Test left shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 2., 0.], - [4., 5., 0.], - [7., 8., 0.], - [10., 11., 0.], - [13., 14., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'constant') - - # Test right shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 0., 1.], - [0., 3., 4], - [0., 6., 7.], - [0., 9., 10.], - [0., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock(transform_matrix, expected_output, - 'constant') - - def test_random_translation_constant_1(self): - with tf.compat.forward_compatibility_horizon(2020, 8, 6): - # constant output is (1111|abcd|1111) - - # Test down shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 1., 1.], - [0., 1., 2.], - [3., 4., 5.], - [6., 7., 8], - [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, expected_output, 'constant', fill_value=1.0) - - # Test up shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[3., 4., 5.], - [6., 7., 8], - [9., 10., 11.], - [12., 13., 14.], - [1., 1., 1.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, expected_output, 'constant', fill_value=1.0) - - # Test left shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 2., 1.], - [4., 5., 1.], - [7., 8., 1.], - [10., 11., 1.], - [13., 14., 1.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, expected_output, 'constant', fill_value=1.0) - - # Test right shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 0., 1.], - [1., 3., 4], - [1., 6., 7.], - [1., 9., 10.], - [1., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, expected_output, 'constant', fill_value=1.0) - - def test_random_translation_nearest_interpolation(self): - # nearest output is (aaaa|abcd|dddd) - - # Test down shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 0., 0.], - [0., 1., 2.], - [3., 4., 5.], - [6., 7., 8], - [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, - expected_output, - mode='constant', - interpolation='nearest') - - # Test up shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[3., 4., 5.], - [6., 7., 8], - [9., 10., 11.], - [12., 13., 14.], - [0., 0., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) - self._run_random_transform_with_mock( + def _run_random_transform_with_mock( + self, transform_matrix, expected_output, - mode='constant', - interpolation='nearest') - - # Test left shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[1., 2., 0.], - [4., 5., 0.], - [7., 8., 0.], - [10., 11., 0.], - [13., 14., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, - expected_output, - mode='constant', - interpolation='nearest') - - # Test right shift by 1. - # pyformat: disable - expected_output = np.asarray( - [[0., 0., 1.], - [0., 3., 4], - [0., 6., 7.], - [0., 9., 10.], - [0., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) - # pyformat: enable - transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) - self._run_random_transform_with_mock( - transform_matrix, - expected_output, - mode='constant', - interpolation='nearest') - - -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class RandomRotationTest(test_combinations.TestCase): - - def _run_test(self, factor): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - kwargs = {'factor': factor} - with test_utils.use_gpu(): - test_utils.layer_test( - image_preprocessing.RandomRotation, - kwargs=kwargs, - input_shape=(num_samples, orig_height, orig_width, channels), - expected_output_shape=(None, orig_height, orig_width, channels)) - - @parameterized.named_parameters(('random_rotate_4', .4), - ('random_rotate_3', .3), - ('random_rotate_tuple_factor', (-.5, .4))) - def test_random_rotation(self, factor): - self._run_test(factor) - - def test_random_rotation_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomRotation(.5) - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - def test_distribution_strategy(self): - """Tests that RandomRotation can be created within distribution strategies.""" - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - with test_utils.use_gpu(): - strat = tf.distribute.MirroredStrategy(devices=['cpu', 'gpu']) - with strat.scope(): - layer = image_preprocessing.RandomRotation(.5) - output = strat.run(lambda: layer(input_images, training=True)) - values = output.values - self.assertAllEqual(2, len(values)) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomRotation(.5, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomRotation.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(np.float32) - # 180 rotation. - layer = image_preprocessing.RandomRotation(factor=(0.5, 0.5)) - output_image = layer(input_image) - expected_output = np.asarray([ - [24, 23, 22, 21, 20], - [19, 18, 17, 16, 15], - [14, 13, 12, 11, 10], - [9, 8, 7, 6, 5], - [4, 3, 2, 1, 0], - ]).astype(np.float32) - expected_output = np.reshape(expected_output, (5, 5, 1)) - self.assertAllClose(expected_output, output_image) - - def test_augment_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(np.float32) - # 180 rotation. - layer = image_preprocessing.RandomRotation(factor=(0.5, 0.5)) - output_image = layer.augment_image( - input_image, transformation=layer.get_random_transformation()) - expected_output = np.asarray([ - [24, 23, 22, 21, 20], - [19, 18, 17, 16, 15], - [14, 13, 12, 11, 10], - [9, 8, 7, 6, 5], - [4, 3, 2, 1, 0], - ]).astype(np.float32) - expected_output = np.reshape(expected_output, (5, 5, 1)) - self.assertAllClose(expected_output, output_image) - - def test_augment_bbox(self): - with test_utils.use_gpu(): - input_image = np.random.random((512, 512, 3)).astype(np.float32) - bboxes = tf.convert_to_tensor([[200,200,400,400],[100,100,300,300]]) - # 180 rotation. - layer = image_preprocessing.RandomRotation(factor=(0.5, 0.5)) - output_bbox = layer.augment_bounding_boxes( - input_image, bboxes, transformation=layer.get_random_transformation()) - expected_output = np.asarray([ - [111, 112, 312, 312], - [212, 211, 412, 412] - ]).astype(np.int32) - expected_output = np.reshape(expected_output, ( 2, 4)) - self.assertAllClose(expected_output, output_bbox) - - def test_augment_bbox_dict_input(self): - with test_utils.use_gpu(): - input_image = np.random.random((512, 512, 3)).astype(np.float32) - bboxes = tf.convert_to_tensor([[200,200,400,400],[100,100,300,300]]) - input = {'images':input_image, 'bounding_boxes':bboxes} - # 180 rotation. - layer = image_preprocessing.RandomRotation(factor=(0.0833, 0.0833)) - output_bbox = layer(input) - expected_output = np.asarray([ - [179, 135, 452, 408], - [42, 98, 316, 372] - ]).astype(np.int32) - expected_output = np.reshape(expected_output, ( 2, 4)) - self.assertAllClose(expected_output, output_bbox['bounding_boxes']) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomRotation(.5) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomRotation(.5, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') - - -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class RandomZoomTest(test_combinations.TestCase): + mode, + fill_value=0.0, + interpolation="bilinear", + ): + inp = np.arange(15).reshape((1, 5, 3, 1)).astype(np.float32) + with self.cached_session(): + output = image_preprocessing.transform( + inp, + transform_matrix, + fill_mode=mode, + fill_value=fill_value, + interpolation=interpolation, + ) + self.assertAllClose(expected_output, output) + + def test_random_translation_reflect(self): + # reflected output is (dcba|abcd|dcba) + + # Test down shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [0.0, 1.0, 2.0], + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "reflect" + ) + + # Test up shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11.0], + [12.0, 13.0, 14.0], + [12.0, 13.0, 14.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "reflect" + ) + + # Test left shift by 1. + # reflected output is (dcba|abcd|dcba) + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 2.0, 2.0], + [4.0, 5.0, 5.0], + [7.0, 8.0, 8.0], + [10.0, 11.0, 11.0], + [13.0, 14.0, 14.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "reflect" + ) + + # Test right shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [0.0, 0.0, 1.0], + [3.0, 3.0, 4], + [6.0, 6.0, 7.0], + [9.0, 9.0, 10.0], + [12.0, 12.0, 13.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "reflect" + ) - def _run_test(self, height_factor, width_factor): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - kwargs = {'height_factor': height_factor, 'width_factor': width_factor} - with test_utils.use_gpu(): - test_utils.layer_test( - image_preprocessing.RandomZoom, - kwargs=kwargs, - input_shape=(num_samples, orig_height, orig_width, channels), - expected_output_shape=(None, orig_height, orig_width, channels)) - - @parameterized.named_parameters( - ('random_zoom_4_by_6', -.4, -.6), ('random_zoom_2_by_3', -.2, -.3), - ('random_zoom_tuple_factor', (-.4, -.5), (-.2, -.3))) - def test_random_zoom_in(self, height_factor, width_factor): - self._run_test(height_factor, width_factor) - - @parameterized.named_parameters( - ('random_zoom_4_by_6', .4, .6), ('random_zoom_2_by_3', .2, .3), - ('random_zoom_tuple_factor', (.4, .5), (.2, .3))) - def test_random_zoom_out(self, height_factor, width_factor): - self._run_test(height_factor, width_factor) - - def test_random_zoom_in_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) - layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5), - interpolation='nearest') - output_image = layer(np.expand_dims(input_image, axis=0)) - expected_output = np.asarray([ - [6, 7, 7, 8, 8], - [11, 12, 12, 13, 13], - [11, 12, 12, 13, 13], - [16, 17, 17, 18, 18], - [16, 17, 17, 18, 18], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_zoom_out_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) - layer = image_preprocessing.RandomZoom((.5, .5), (.8, .8), - fill_mode='constant', - interpolation='nearest') - output_image = layer(np.expand_dims(input_image, axis=0)) - expected_output = np.asarray([ - [0, 0, 0, 0, 0], - [0, 5, 7, 9, 0], - [0, 10, 12, 14, 0], - [0, 20, 22, 24, 0], - [0, 0, 0, 0, 0], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_zoom_out_numeric_preserve_aspect_ratio(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) - layer = image_preprocessing.RandomZoom((.5, .5), - fill_mode='constant', - interpolation='nearest') - output_image = layer(np.expand_dims(input_image, axis=0)) - expected_output = np.asarray([ - [0, 0, 0, 0, 0], - [0, 6, 7, 9, 0], - [0, 11, 12, 14, 0], - [0, 21, 22, 24, 0], - [0, 0, 0, 0, 0], - ]).astype(dtype) - expected_output = np.reshape(expected_output, (1, 5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_zoom_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomZoom(.5, .5) - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomZoom(.5, .6, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomZoom.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(np.int64) - layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5), - interpolation='nearest') - output_image = layer(input_image) - expected_output = np.asarray([ - [6, 7, 7, 8, 8], - [11, 12, 12, 13, 13], - [11, 12, 12, 13, 13], - [16, 17, 17, 18, 18], - [16, 17, 17, 18, 18], - ]).astype(np.int64) - expected_output = np.reshape(expected_output, (5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_augment_image(self): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(np.int64) - layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5), - interpolation='nearest') - output_image = layer.augment_image( - input_image, transformation=layer.get_random_transformation()) - expected_output = np.asarray([ - [6, 7, 7, 8, 8], - [11, 12, 12, 13, 13], - [11, 12, 12, 13, 13], - [16, 17, 17, 18, 18], - [16, 17, 17, 18, 18], - ]).astype(np.int64) - expected_output = np.reshape(expected_output, (5, 5, 1)) - self.assertAllEqual(expected_output, output_image) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomZoom(.5, .5) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomZoom(.5, .5, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + def test_random_translation_wrap(self): + # warpped output is (abcd|abcd|abcd) + # Test down shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [12.0, 13.0, 14.0], + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "wrap" + ) + + # Test up shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11.0], + [12.0, 13.0, 14.0], + [0.0, 1.0, 2.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "wrap" + ) + + # Test left shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 2.0, 0.0], + [4.0, 5.0, 3.0], + [7.0, 8.0, 6.0], + [10.0, 11.0, 9.0], + [13.0, 14.0, 12.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "wrap" + ) + + # Test right shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [2.0, 0.0, 1.0], + [5.0, 3.0, 4], + [8.0, 6.0, 7.0], + [11.0, 9.0, 10.0], + [14.0, 12.0, 13.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "wrap" + ) -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class RandomHeightTest(test_combinations.TestCase): + def test_random_translation_nearest(self): + # nearest output is (aaaa|abcd|dddd) - def _run_test(self, factor): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - with test_utils.use_gpu(): - img = np.random.random((num_samples, orig_height, orig_width, channels)) - layer = image_preprocessing.RandomHeight(factor) - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[0], 2) - self.assertEqual(img_out.shape[2], 8) - self.assertEqual(img_out.shape[3], 3) - - @parameterized.named_parameters(('random_height_4_by_6', (.4, .6)), - ('random_height_3_by_2', (-.3, .2)), - ('random_height_3', .3)) - def test_random_height_basic(self, factor): - self._run_test(factor) - - def test_valid_random_height(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((12, 5, 8, 3)) - layer = image_preprocessing.RandomHeight(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[1], 3) - - def test_random_height_longer_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype(dtype) - layer = image_preprocessing.RandomHeight(factor=(1., 1.)) - # Return type of RandomHeight() is float32 if `interpolation` is not - # set to `ResizeMethod.NEAREST_NEIGHBOR`; cast `layer` to desired dtype. - output_image = tf.cast( - layer(np.expand_dims(input_image, axis=0)), dtype=dtype) + # Test down shift by 1. # pyformat: disable - expected_output = np.asarray([ - [0, 1, 2], - [0.75, 1.75, 2.75], - [2.25, 3.25, 4.25], - [3, 4, 5] - ]).astype(dtype) + expected_output = ( + np.asarray( + [ + [0.0, 1.0, 2.0], + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) # pyformat: enable - expected_output = np.reshape(expected_output, (1, 4, 3, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_height_shorter_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype(dtype) - layer = image_preprocessing.RandomHeight( - factor=(-.5, -.5), interpolation='nearest') - output_image = layer(np.expand_dims(input_image, axis=0)) + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "nearest" + ) + + # Test up shift by 1. # pyformat: disable - expected_output = np.asarray([ - [2, 3], - [6, 7] - ]).astype(dtype) + expected_output = ( + np.asarray( + [ + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11.0], + [12.0, 13.0, 14.0], + [12.0, 13.0, 14.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) # pyformat: enable - expected_output = np.reshape(expected_output, (1, 2, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_height_invalid_factor(self): - with self.assertRaises(ValueError): - image_preprocessing.RandomHeight((-1.5, .4)) - - def test_random_height_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomHeight(.5) - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomHeight(.5, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomHeight.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((5, 8, 3)) - layer = image_preprocessing.RandomHeight(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[0], 3) - - @test_utils.run_v2_only - def test_batched_input(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - images = np.random.random((5, 5, 8, 3)) - layer = image_preprocessing.RandomHeight(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer(images, training=True) - self.assertEqual(img_out.shape[1], 3) - - @test_utils.run_v2_only - def test_augment_image(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((5, 8, 3)) - layer = image_preprocessing.RandomHeight(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer.augment_image( - img, transformation=layer.get_random_transformation(image=img)) - self.assertEqual(img_out.shape[0], 3) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomHeight(.2) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomHeight(.2, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') - + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "nearest" + ) + + # Test left shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 2.0, 2.0], + [4.0, 5.0, 5.0], + [7.0, 8.0, 8.0], + [10.0, 11.0, 11.0], + [13.0, 14.0, 14.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "nearest" + ) + + # Test right shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [0.0, 0.0, 1.0], + [3.0, 3.0, 4], + [6.0, 6.0, 7.0], + [9.0, 9.0, 10.0], + [12.0, 12.0, 13.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "nearest" + ) -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class RandomWidthTest(test_combinations.TestCase): + def test_random_translation_constant_0(self): + # constant output is (0000|abcd|0000) - def _run_test(self, factor): - np.random.seed(1337) - num_samples = 2 - orig_height = 5 - orig_width = 8 - channels = 3 - with test_utils.use_gpu(): - img = np.random.random((num_samples, orig_height, orig_width, channels)) - layer = image_preprocessing.RandomWidth(factor) - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[0], 2) - self.assertEqual(img_out.shape[1], 5) - self.assertEqual(img_out.shape[3], 3) - - @parameterized.named_parameters(('random_width_4_by_6', (.4, .6)), - ('random_width_3_by_2', (-.3, .2)), - ('random_width_3', .3)) - def test_random_width_basic(self, factor): - self._run_test(factor) - - def test_valid_random_width(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((12, 8, 5, 3)) - layer = image_preprocessing.RandomWidth(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[2], 3) - - def test_random_width_longer_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype(dtype) - layer = image_preprocessing.RandomWidth(factor=(1., 1.)) - # Return type of RandomWidth() is float32 if `interpolation` is not - # set to `ResizeMethod.NEAREST_NEIGHBOR`; cast `layer` to desired dtype. - output_image = tf.cast( - layer(np.expand_dims(input_image, axis=0)), dtype=dtype) + # Test down shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [0.0, 0.0, 0.0], + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant" + ) + + # Test up shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11.0], + [12.0, 13.0, 14.0], + [0.0, 0.0, 0.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant" + ) + + # Test left shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 2.0, 0.0], + [4.0, 5.0, 0.0], + [7.0, 8.0, 0.0], + [10.0, 11.0, 0.0], + [13.0, 14.0, 0.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant" + ) + + # Test right shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [0.0, 0.0, 1.0], + [0.0, 3.0, 4], + [0.0, 6.0, 7.0], + [0.0, 9.0, 10.0], + [0.0, 12.0, 13.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant" + ) + + def test_random_translation_constant_1(self): + with tf.compat.forward_compatibility_horizon(2020, 8, 6): + # constant output is (1111|abcd|1111) + + # Test down shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 1.0, 1.0], + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant", fill_value=1.0 + ) + + # Test up shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11.0], + [12.0, 13.0, 14.0], + [1.0, 1.0, 1.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant", fill_value=1.0 + ) + + # Test left shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 2.0, 1.0], + [4.0, 5.0, 1.0], + [7.0, 8.0, 1.0], + [10.0, 11.0, 1.0], + [13.0, 14.0, 1.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant", fill_value=1.0 + ) + + # Test right shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [1.0, 0.0, 1.0], + [1.0, 3.0, 4], + [1.0, 6.0, 7.0], + [1.0, 9.0, 10.0], + [1.0, 12.0, 13.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, expected_output, "constant", fill_value=1.0 + ) + + def test_random_translation_nearest_interpolation(self): + # nearest output is (aaaa|abcd|dddd) + + # Test down shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [0.0, 0.0, 0.0], + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, + expected_output, + mode="constant", + interpolation="nearest", + ) + + # Test up shift by 1. + # pyformat: disable + expected_output = ( + np.asarray( + [ + [3.0, 4.0, 5.0], + [6.0, 7.0, 8], + [9.0, 10.0, 11.0], + [12.0, 13.0, 14.0], + [0.0, 0.0, 0.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) + # pyformat: enable + transform_matrix = np.asarray( + [[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, + expected_output, + mode="constant", + interpolation="nearest", + ) + + # Test left shift by 1. # pyformat: disable - expected_output = np.asarray([ - [0, 0.25, 0.75, 1], - [2, 2.25, 2.75, 3], - [4, 4.25, 4.75, 5] - ]).astype(dtype) + expected_output = ( + np.asarray( + [ + [1.0, 2.0, 0.0], + [4.0, 5.0, 0.0], + [7.0, 8.0, 0.0], + [10.0, 11.0, 0.0], + [13.0, 14.0, 0.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) # pyformat: enable - expected_output = np.reshape(expected_output, (1, 3, 4, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_width_shorter_numeric(self): - for dtype in (np.int64, np.float32): - with test_utils.use_gpu(): - input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype(dtype) - layer = image_preprocessing.RandomWidth( - factor=(-.5, -.5), interpolation='nearest') - output_image = layer(np.expand_dims(input_image, axis=0)) + transform_matrix = np.asarray( + [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, + expected_output, + mode="constant", + interpolation="nearest", + ) + + # Test right shift by 1. # pyformat: disable - expected_output = np.asarray([ - [1, 3], - [5, 7] - ]).astype(dtype) + expected_output = ( + np.asarray( + [ + [0.0, 0.0, 1.0], + [0.0, 3.0, 4], + [0.0, 6.0, 7.0], + [0.0, 9.0, 10.0], + [0.0, 12.0, 13.0], + ] + ) + .reshape((1, 5, 3, 1)) + .astype(np.float32) + ) # pyformat: enable - expected_output = np.reshape(expected_output, (1, 2, 2, 1)) - self.assertAllEqual(expected_output, output_image) - - def test_random_width_invalid_factor(self): - with self.assertRaises(ValueError): - image_preprocessing.RandomWidth((-1.5, .4)) - - def test_random_width_inference(self): - input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - expected_output = input_images - with test_utils.use_gpu(): - layer = image_preprocessing.RandomWidth(.5) - actual_output = layer(input_images, training=False) - self.assertAllClose(expected_output, actual_output) - - @test_utils.run_v2_only - def test_config_with_custom_name(self): - layer = image_preprocessing.RandomWidth(.5, name='image_preproc') - config = layer.get_config() - layer_1 = image_preprocessing.RandomWidth.from_config(config) - self.assertEqual(layer_1.name, layer.name) - - def test_unbatched_image(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((8, 5, 3)) - layer = image_preprocessing.RandomWidth(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[1], 3) - - @test_utils.run_v2_only - def test_batched_input(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((12, 8, 5, 3)) - layer = image_preprocessing.RandomWidth(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer(img, training=True) - self.assertEqual(img_out.shape[2], 3) - - @test_utils.run_v2_only - def test_augment_image(self): - # need (maxval - minval) * rnd + minval = 0.6 - mock_factor = 0.6 - with test_utils.use_gpu(): - img = np.random.random((8, 5, 3)) - layer = image_preprocessing.RandomWidth(.4) - with tf.compat.v1.test.mock.patch.object( - layer._random_generator, 'random_uniform', return_value=mock_factor): - img_out = layer.augment_image( - img, transformation=layer.get_random_transformation(image=img)) - self.assertEqual(img_out.shape[1], 3) - - @test_utils.run_v2_only - def test_output_dtypes(self): - inputs = np.array([[[1], [2]], [[3], [4]]], dtype='float64') - layer = image_preprocessing.RandomWidth(.2) - self.assertAllEqual(layer(inputs).dtype, 'float32') - layer = image_preprocessing.RandomWidth(.2, dtype='uint8') - self.assertAllEqual(layer(inputs).dtype, 'uint8') + transform_matrix = np.asarray( + [[1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0]] + ) + self._run_random_transform_with_mock( + transform_matrix, + expected_output, + mode="constant", + interpolation="nearest", + ) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class WithLabelsTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('RandomZoom', image_preprocessing.RandomZoom, { - 'height_factor': 0.1 - }), - ('RandomBrightness', image_preprocessing.RandomBrightness, { - 'factor': 0.5 - }), - ('RandomContrast', image_preprocessing.RandomContrast, { - 'factor': 0.5 - }), - ('RandomRotation', image_preprocessing.RandomRotation, { - 'factor': 0.2 - }), - ) - def test_layer_with_labels(self, layer_cls, init_args): - layer = layer_cls(**init_args) - - img = tf.random.uniform( - shape=(3, 512, 512, 3), minval=0, maxval=1, dtype=tf.float32) - labels = tf.constant(([[1, 0, 0], [0, 0, 1], [0, 1, 0]]), dtype=tf.float32) - - inputs = {'images': img, 'labels': labels} - outputs = layer(inputs) - self.assertAllClose(labels, outputs["labels"]) +class RandomRotationTest(test_combinations.TestCase): + def _run_test(self, factor): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + kwargs = {"factor": factor} + with test_utils.use_gpu(): + test_utils.layer_test( + image_preprocessing.RandomRotation, + kwargs=kwargs, + input_shape=(num_samples, orig_height, orig_width, channels), + expected_output_shape=(None, orig_height, orig_width, channels), + ) + + @parameterized.named_parameters( + ("random_rotate_4", 0.4), + ("random_rotate_3", 0.3), + ("random_rotate_tuple_factor", (-0.5, 0.4)), + ) + def test_random_rotation(self, factor): + self._run_test(factor) + + def test_random_rotation_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomRotation(0.5) + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + def test_distribution_strategy(self): + """Tests that RandomRotation can be created within DistStrats.""" + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + with test_utils.use_gpu(): + strat = tf.distribute.MirroredStrategy(devices=["cpu", "gpu"]) + with strat.scope(): + layer = image_preprocessing.RandomRotation(0.5) + output = strat.run(lambda: layer(input_images, training=True)) + values = output.values + self.assertAllEqual(2, len(values)) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomRotation(0.5, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.RandomRotation.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype( + np.float32 + ) + # 180 rotation. + layer = image_preprocessing.RandomRotation(factor=(0.5, 0.5)) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [24, 23, 22, 21, 20], + [19, 18, 17, 16, 15], + [14, 13, 12, 11, 10], + [9, 8, 7, 6, 5], + [4, 3, 2, 1, 0], + ] + ).astype(np.float32) + expected_output = np.reshape(expected_output, (5, 5, 1)) + self.assertAllClose(expected_output, output_image) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomRotation(0.5) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomRotation(0.5, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) -class LearningPhaseTest(test_combinations.TestCase): - - def test_plain_call(self): - layer = image_preprocessing.RandomWidth(.5, seed=123) - shape = (12, 12, 3) - img = np.random.random((12,) + shape) - out = layer(img) # Default to training=True - self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - - out = layer(img, training=True) - self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - - out = layer(img, training=False) - self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape) - - def test_call_in_container(self): - layer1 = image_preprocessing.RandomWidth(.5, seed=123) - layer2 = image_preprocessing.RandomHeight(.5, seed=123) - seq = sequential.Sequential([layer1, layer2]) - - shape = (12, 12, 3) - img = np.random.random((12,) + shape) - out = seq(img) # Default to training=True - self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - - out = seq(img, training=True) - self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - - out = seq(img, training=False) - self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape) +class RandomZoomTest(test_combinations.TestCase): + def _run_test(self, height_factor, width_factor): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + kwargs = {"height_factor": height_factor, "width_factor": width_factor} + with test_utils.use_gpu(): + test_utils.layer_test( + image_preprocessing.RandomZoom, + kwargs=kwargs, + input_shape=(num_samples, orig_height, orig_width, channels), + expected_output_shape=(None, orig_height, orig_width, channels), + ) + + @parameterized.named_parameters( + ("random_zoom_4_by_6", -0.4, -0.6), + ("random_zoom_2_by_3", -0.2, -0.3), + ("random_zoom_tuple_factor", (-0.4, -0.5), (-0.2, -0.3)), + ) + def test_random_zoom_in(self, height_factor, width_factor): + self._run_test(height_factor, width_factor) + + @parameterized.named_parameters( + ("random_zoom_4_by_6", 0.4, 0.6), + ("random_zoom_2_by_3", 0.2, 0.3), + ("random_zoom_tuple_factor", (0.4, 0.5), (0.2, 0.3)), + ) + def test_random_zoom_out(self, height_factor, width_factor): + self._run_test(height_factor, width_factor) + + def test_random_zoom_in_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomZoom( + (-0.5, -0.5), (-0.5, -0.5), interpolation="nearest" + ) + output_image = layer(np.expand_dims(input_image, axis=0)) + expected_output = np.asarray( + [ + [6, 7, 7, 8, 8], + [11, 12, 12, 13, 13], + [11, 12, 12, 13, 13], + [16, 17, 17, 18, 18], + [16, 17, 17, 18, 18], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_zoom_out_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomZoom( + (0.5, 0.5), + (0.8, 0.8), + fill_mode="constant", + interpolation="nearest", + ) + output_image = layer(np.expand_dims(input_image, axis=0)) + expected_output = np.asarray( + [ + [0, 0, 0, 0, 0], + [0, 5, 7, 9, 0], + [0, 10, 12, 14, 0], + [0, 20, 22, 24, 0], + [0, 0, 0, 0, 0], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_zoom_out_numeric_preserve_aspect_ratio(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomZoom( + (0.5, 0.5), fill_mode="constant", interpolation="nearest" + ) + output_image = layer(np.expand_dims(input_image, axis=0)) + expected_output = np.asarray( + [ + [0, 0, 0, 0, 0], + [0, 6, 7, 9, 0], + [0, 11, 12, 14, 0], + [0, 21, 22, 24, 0], + [0, 0, 0, 0, 0], + ] + ).astype(dtype) + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_zoom_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomZoom(0.5, 0.5) + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomZoom(0.5, 0.6, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.RandomZoom.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype( + np.int64 + ) + layer = image_preprocessing.RandomZoom( + (-0.5, -0.5), (-0.5, -0.5), interpolation="nearest" + ) + output_image = layer(input_image) + expected_output = np.asarray( + [ + [6, 7, 7, 8, 8], + [11, 12, 12, 13, 13], + [11, 12, 12, 13, 13], + [16, 17, 17, 18, 18], + [16, 17, 17, 18, 18], + ] + ).astype(np.int64) + expected_output = np.reshape(expected_output, (5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomZoom(0.5, 0.5) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomZoom(0.5, 0.5, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) -class DeterminismTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('random_contrast', - functools.partial(image_preprocessing.RandomContrast, factor=1.)), - ('random_crop', - functools.partial(image_preprocessing.RandomCrop, height=2, width=2)), - ('random_translation', - functools.partial(image_preprocessing.RandomTranslation, 0.3, 0.2)), - ('random_rotation', - functools.partial(image_preprocessing.RandomRotation, 0.5)), - ('random_zoom', functools.partial(image_preprocessing.RandomZoom, 0.2)), - ('random_height', functools.partial(image_preprocessing.RandomHeight, - 0.4)), - ('random_width', functools.partial(image_preprocessing.RandomWidth, 0.3)), - ) - def test_seed_constructor_arg(self, layer_cls): - input_image = np.random.random((2, 5, 8, 3)).astype(np.float32) - - layer1 = layer_cls(seed=0.) - layer2 = layer_cls(seed=0.) - layer1_output = layer1(input_image) - layer2_output = layer2(input_image) - - self.assertAllClose(layer1_output.numpy().tolist(), - layer2_output.numpy().tolist()) - - -class RandomAddLayer(image_preprocessing.BaseImageAugmentationLayer): - - def __init__(self, value_range=(0., 1.0), fixed_value=None, **kwargs): - super().__init__(**kwargs) - self.value_range = value_range - self.fixed_value = fixed_value - - def get_random_transformation( - self, image=None, label=None, bounding_box=None): - if self.fixed_value: - return self.fixed_value - return self._random_generator.random_uniform( - [], minval=self.value_range[0], maxval=self.value_range[1]) - - def augment_image(self, image, transformation): - return image + transformation - - def augment_label(self, label, transformation): - return label + transformation - - -class VectorizeDisabledLayer(image_preprocessing.BaseImageAugmentationLayer): - - def __init__(self, **kwargs): - self.auto_vectorize = False - super().__init__(**kwargs) +class RandomHeightTest(test_combinations.TestCase): + def _run_test(self, factor): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + with test_utils.use_gpu(): + img = np.random.random( + (num_samples, orig_height, orig_width, channels) + ) + layer = image_preprocessing.RandomHeight(factor) + img_out = layer(img, training=True) + self.assertEqual(img_out.shape[0], 2) + self.assertEqual(img_out.shape[2], 8) + self.assertEqual(img_out.shape[3], 3) + + @parameterized.named_parameters( + ("random_height_4_by_6", (0.4, 0.6)), + ("random_height_3_by_2", (-0.3, 0.2)), + ("random_height_3", 0.3), + ) + def test_random_height_basic(self, factor): + self._run_test(factor) + + def test_valid_random_height(self): + # need (maxval - minval) * rnd + minval = 0.6 + mock_factor = 0.6 + with test_utils.use_gpu(): + img = np.random.random((12, 5, 8, 3)) + layer = image_preprocessing.RandomHeight(0.4) + with tf.compat.v1.test.mock.patch.object( + layer._random_generator, + "random_uniform", + return_value=mock_factor, + ): + img_out = layer(img, training=True) + self.assertEqual(img_out.shape[1], 3) + + def test_random_height_longer_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomHeight(factor=(1.0, 1.0)) + # Return type of RandomHeight() is float32 + # if `interpolation` is not + # set to `ResizeMethod.NEAREST_NEIGHBOR`; + # cast `layer` to desired dtype. + output_image = tf.cast( + layer(np.expand_dims(input_image, axis=0)), dtype=dtype + ) + # pyformat: disable + expected_output = np.asarray( + [ + [0, 1, 2], + [0.75, 1.75, 2.75], + [2.25, 3.25, 4.25], + [3, 4, 5], + ] + ).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 4, 3, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_height_shorter_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomHeight( + factor=(-0.5, -0.5), interpolation="nearest" + ) + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([[2, 3], [6, 7]]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_height_invalid_factor(self): + with self.assertRaises(ValueError): + image_preprocessing.RandomHeight((-1.5, 0.4)) + + def test_random_height_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomHeight(0.5) + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomHeight(0.5, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.RandomHeight.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + # need (maxval - minval) * rnd + minval = 0.6 + mock_factor = 0.6 + with test_utils.use_gpu(): + img = np.random.random((5, 8, 3)) + layer = image_preprocessing.RandomHeight(0.4) + with tf.compat.v1.test.mock.patch.object( + layer._random_generator, + "random_uniform", + return_value=mock_factor, + ): + img_out = layer(img, training=True) + self.assertEqual(img_out.shape[0], 3) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomHeight(0.2) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomHeight(0.2, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") @test_combinations.run_all_keras_modes(always_skip_v1=True) -class BaseImageAugmentationLayerTest(test_combinations.TestCase): - - def test_augment_single_image(self): - add_layer = RandomAddLayer(fixed_value=2.0) - image = np.random.random(size=(8, 8, 3)).astype('float32') - output = add_layer(image) - - self.assertAllClose(image + 2.0, output) - - def test_augment_dict_return_type(self): - add_layer = RandomAddLayer(fixed_value=2.0) - image = np.random.random(size=(8, 8, 3)).astype('float32') - output = add_layer({'images': image}) - - self.assertIsInstance(output, dict) - - def test_auto_vectorize_disabled(self): - vectorize_disabled_layer = VectorizeDisabledLayer() - self.assertFalse(vectorize_disabled_layer.auto_vectorize) - self.assertEqual(vectorize_disabled_layer._map_fn, tf.map_fn) - - @test_utils.run_v2_only - def test_augment_casts_dtypes(self): - add_layer = RandomAddLayer(fixed_value=2.0) - images = tf.ones((2, 8, 8, 3), dtype='uint8') - output = add_layer(images) - - self.assertAllClose(tf.ones((2, 8, 8, 3), dtype='float32') * 3.0, output) +class RandomWidthTest(test_combinations.TestCase): + def _run_test(self, factor): + np.random.seed(1337) + num_samples = 2 + orig_height = 5 + orig_width = 8 + channels = 3 + with test_utils.use_gpu(): + img = np.random.random( + (num_samples, orig_height, orig_width, channels) + ) + layer = image_preprocessing.RandomWidth(factor) + img_out = layer(img, training=True) + self.assertEqual(img_out.shape[0], 2) + self.assertEqual(img_out.shape[1], 5) + self.assertEqual(img_out.shape[3], 3) + + @parameterized.named_parameters( + ("random_width_4_by_6", (0.4, 0.6)), + ("random_width_3_by_2", (-0.3, 0.2)), + ("random_width_3", 0.3), + ) + def test_random_width_basic(self, factor): + self._run_test(factor) + + def test_valid_random_width(self): + # need (maxval - minval) * rnd + minval = 0.6 + mock_factor = 0.6 + with test_utils.use_gpu(): + img = np.random.random((12, 8, 5, 3)) + layer = image_preprocessing.RandomWidth(0.4) + with tf.compat.v1.test.mock.patch.object( + layer._random_generator, + "random_uniform", + return_value=mock_factor, + ): + img_out = layer(img, training=True) + self.assertEqual(img_out.shape[2], 3) + + def test_random_width_longer_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomWidth(factor=(1.0, 1.0)) + # Return type of RandomWidth() is float32 + # if `interpolation` is not + # set to `ResizeMethod.NEAREST_NEIGHBOR`; + # cast `layer` to desired dtype. + output_image = tf.cast( + layer(np.expand_dims(input_image, axis=0)), dtype=dtype + ) + # pyformat: disable + expected_output = np.asarray( + [[0, 0.25, 0.75, 1], [2, 2.25, 2.75, 3], [4, 4.25, 4.75, 5]] + ).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 3, 4, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_width_shorter_numeric(self): + for dtype in (np.int64, np.float32): + with test_utils.use_gpu(): + input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype( + dtype + ) + layer = image_preprocessing.RandomWidth( + factor=(-0.5, -0.5), interpolation="nearest" + ) + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([[1, 3], [5, 7]]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_width_invalid_factor(self): + with self.assertRaises(ValueError): + image_preprocessing.RandomWidth((-1.5, 0.4)) + + def test_random_width_inference(self): + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + expected_output = input_images + with test_utils.use_gpu(): + layer = image_preprocessing.RandomWidth(0.5) + actual_output = layer(input_images, training=False) + self.assertAllClose(expected_output, actual_output) + + @test_utils.run_v2_only + def test_config_with_custom_name(self): + layer = image_preprocessing.RandomWidth(0.5, name="image_preproc") + config = layer.get_config() + layer_1 = image_preprocessing.RandomWidth.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + def test_unbatched_image(self): + # need (maxval - minval) * rnd + minval = 0.6 + mock_factor = 0.6 + with test_utils.use_gpu(): + img = np.random.random((8, 5, 3)) + layer = image_preprocessing.RandomWidth(0.4) + with tf.compat.v1.test.mock.patch.object( + layer._random_generator, + "random_uniform", + return_value=mock_factor, + ): + img_out = layer(img, training=True) + self.assertEqual(img_out.shape[1], 3) + + @test_utils.run_v2_only + def test_output_dtypes(self): + inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64") + layer = image_preprocessing.RandomWidth(0.2) + self.assertAllEqual(layer(inputs).dtype, "float32") + layer = image_preprocessing.RandomWidth(0.2, dtype="uint8") + self.assertAllEqual(layer(inputs).dtype, "uint8") - def test_augment_batch_images(self): - add_layer = RandomAddLayer() - images = np.random.random(size=(2, 8, 8, 3)).astype('float32') - output = add_layer(images) - diff = output - images - # Make sure the first image and second image get different augmentation - self.assertNotAllClose(diff[0], diff[1]) +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class LearningPhaseTest(test_combinations.TestCase): + def test_plain_call(self): + layer = image_preprocessing.RandomWidth(0.5, seed=123) + shape = (12, 12, 3) + img = np.random.random((12,) + shape) + out = layer(img) # Defaults to training=True + self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - def test_augment_image_and_label(self): - add_layer = RandomAddLayer(fixed_value=2.0) - image = np.random.random(size=(8, 8, 3)).astype('float32') - label = np.random.random(size=(1,)).astype('float32') + out = layer(img, training=True) + self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - output = add_layer({'images': image, 'labels': label}) - expected_output = {'images': image + 2.0, 'labels': label + 2.0} - self.assertAllClose(output, expected_output) + out = layer(img, training=False) + self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape) - def test_augment_image_and_target(self): - add_layer = RandomAddLayer(fixed_value=2.0) - image = np.random.random(size=(8, 8, 3)).astype('float32') - label = np.random.random(size=(1,)).astype('float32') + def test_call_in_container(self): + layer1 = image_preprocessing.RandomWidth(0.5, seed=123) + layer2 = image_preprocessing.RandomHeight(0.5, seed=123) + seq = sequential.Sequential([layer1, layer2]) - output = add_layer({'images': image, 'targets': label}) - expected_output = {'images': image + 2.0, 'targets': label + 2.0} - self.assertAllClose(output, expected_output) + shape = (12, 12, 3) + img = np.random.random((12,) + shape) + out = seq(img) # Defaults to training=True + self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - def test_augment_batch_images_and_labels(self): - add_layer = RandomAddLayer() - images = np.random.random(size=(2, 8, 8, 3)).astype('float32') - labels = np.random.random(size=(2, 1)).astype('float32') - output = add_layer({'images': images, 'labels': labels}) + out = seq(img, training=True) + self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) - image_diff = output['images'] - images - label_diff = output['labels'] - labels - # Make sure the first image and second image get different augmentation - self.assertNotAllClose(image_diff[0], image_diff[1]) - self.assertNotAllClose(label_diff[0], label_diff[1]) + out = seq(img, training=False) + self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape) -if __name__ == '__main__': - tf.test.main() +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class DeterminismTest(test_combinations.TestCase): + @parameterized.named_parameters( + ("random_flip", image_preprocessing.RandomFlip), + ( + "random_contrast", + functools.partial(image_preprocessing.RandomContrast, factor=1.0), + ), + ( + "random_crop", + functools.partial( + image_preprocessing.RandomCrop, height=2, width=2 + ), + ), + ( + "random_translation", + functools.partial(image_preprocessing.RandomTranslation, 0.3, 0.2), + ), + ( + "random_rotation", + functools.partial(image_preprocessing.RandomRotation, 0.5), + ), + ("random_zoom", functools.partial(image_preprocessing.RandomZoom, 0.2)), + ( + "random_height", + functools.partial(image_preprocessing.RandomHeight, 0.4), + ), + ( + "random_width", + functools.partial(image_preprocessing.RandomWidth, 0.3), + ), + ) + def test_seed_constructor_arg(self, layer_cls): + input_image = np.random.random((2, 5, 8, 3)).astype(np.float32) + + layer1 = layer_cls(seed=0.0) + layer2 = layer_cls(seed=0.0) + layer1_output = layer1(input_image) + layer2_output = layer2(input_image) + + self.assertAllClose( + layer1_output.numpy().tolist(), layer2_output.numpy().tolist() + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/index_lookup.py b/keras/layers/preprocessing/index_lookup.py index 752f2c294bf6..4747b7ac206e 100644 --- a/keras/layers/preprocessing/index_lookup.py +++ b/keras/layers/preprocessing/index_lookup.py @@ -14,20 +14,21 @@ # ============================================================================== """Keras index lookup preprocessing layer.""" -# pylint: disable=g-classes-have-attributes - import collections +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import base_layer_utils from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils -from keras.saving.saved_model import layer_serialization +from keras.saving.legacy.saved_model import layer_serialization from keras.utils import layer_utils from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging as logging INT = utils.INT @@ -41,770 +42,958 @@ class NullInitializer(tf.lookup.KeyValueTensorInitializer): - """A placeholder initializer for restoring this layer from a SavedModel.""" + """A placeholder initializer for restoring this layer from a SavedModel.""" - def __init__(self, key_dtype, value_dtype): - """Construct a table initializer object. + def __init__(self, key_dtype, value_dtype): + """Construct a table initializer object. - Args: - key_dtype: Type of the table keys. - value_dtype: Type of the table values. - """ - self._key_dtype = key_dtype - self._value_dtype = value_dtype + Args: + key_dtype: Type of the table keys. + value_dtype: Type of the table values. + """ + self._key_dtype = key_dtype + self._value_dtype = value_dtype - @property - def key_dtype(self): - """The expected table key dtype.""" - return self._key_dtype + @property + def key_dtype(self): + """The expected table key dtype.""" + return self._key_dtype - @property - def value_dtype(self): - """The expected table value dtype.""" - return self._value_dtype + @property + def value_dtype(self): + """The expected table value dtype.""" + return self._value_dtype - def initialize(self, table): - """Returns the table initialization op.""" - pass + def initialize(self, table): + """Returns the table initialization op.""" + pass class VocabWeightHandler(base_layer_utils.TrackableWeightHandler): - """Adds the vocabulary as a layer weight during serialization.""" + """Adds the vocabulary as a layer weight during serialization.""" - def __init__(self, lookup_layer): - self._layer = lookup_layer - self._dtype = lookup_layer.vocabulary_dtype - self._distribute_strategy = tf.distribute.get_strategy() + def __init__(self, lookup_layer): + # Note that this class doesn't call super().__init__() in order to + # have customized behavior. The fileds like '_dtype' and + # '_distribute_strategy' are required by the parent class, as well as + # tf.distribute. See `strategy.extended.variable_created_in_scope` + self._layer = lookup_layer + self._dtype = lookup_layer.vocabulary_dtype + self._distribute_strategy = tf.distribute.get_strategy() - @property - def num_tensors(self): - return 1 + @property + def num_tensors(self): + return 1 - def set_weights(self, weights): - tokens = tf.convert_to_tensor(weights[0], self._dtype) - self._layer.lookup_table = self._layer._lookup_table_from_tokens(tokens) # pylint: disable=protected-access + def set_weights(self, weights): + tokens = tf.convert_to_tensor(weights[0], self._dtype) + self._layer.lookup_table = self._layer._lookup_table_from_tokens(tokens) - def get_tensors(self): - # Just save the non-config part of the vocab (no special tokens). - tokens = self._layer.get_vocabulary(include_special_tokens=False) - tokens = tf.convert_to_tensor(tokens, self._dtype) - return [tokens] + def get_tensors(self): + # Just save the non-config part of the vocab (no special tokens). + tokens = self._layer.get_vocabulary(include_special_tokens=False) + tokens = tf.convert_to_tensor(tokens, self._dtype) + return [tokens] class IndexLookup(base_preprocessing_layer.PreprocessingLayer): - """Maps values from a vocabulary to integer indices. - - This layer translates a set of arbitrary hashables into an integer output via - a table-based lookup, with optional out-of-vocabulary handling. This is the - basis layer for both IntegerLookup and StringLookup; it holds the common - logic but is not intended to be exported as part of the Keras API. - - Args: - max_tokens: The maximum size of the vocabulary for this layer. If None, - there is no cap on the size of the vocabulary. Note that this size - includes the OOV and mask tokens. - num_oov_indices: The number of out-of-vocabulary tokens to use. If this - value is more than 1, OOV inputs are hashed to determine their OOV value. - If this value is 0, OOV inputs will cause an error when calling the layer. - mask_token: A token that represents masked inputs. When `output_mode` is - `"int"`, the token is included in vocabulary and mapped to index 0. In - other output modes, the token will not appear in the vocabulary and - instances of the mask token in the input will be dropped. If set to None, - no mask term will be added. - oov_token: Only used when `invert` is True. The token to return for OOV - indices. - vocabulary: Optional. Either an array or a string path to a text file. If - passing an array, can pass a tuple, list, 1D numpy array, or 1D tensor - containing the vocbulary terms. If passing a file path, the file should - contain one line per term in the vocabulary. If this argument is set, - there is no need to `adapt` the layer. - vocabulary_dtype: The dtype of the vocabulary terms. For example, `"int64"` - or `"string"`. - idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, 1D - numpy array, or 1D tensor or the same length as the vocabulary, containing - the floating point inverse document frequency weights, which will be - multiplied by per sample term counts for the final `tf_idf` weight. If the - `vocabulary` argument is set, and `output_mode` is `"tf_idf"`, this - argument must be supplied. - invert: Only valid when `output_mode` is `"int"`. If True, this layer will - map indices to vocabulary items instead of mapping vocabulary items to - indices. Default to False. - output_mode: Specification for the output of the layer. Defaults to `"int"`. - Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or - `"tf_idf"` configuring the layer as follows: - - `"int"`: Return the raw integer indices of the input tokens. - - `"one_hot"`: Encodes each individual element in the input into an - array the same size as the vocabulary, containing a 1 at the element - index. If the last dimension is size 1, will encode on that dimension. - If the last dimension is not size 1, will append a new dimension for - the encoded output. - - `"multi_hot"`: Encodes each sample in the input into a single array - the same size as the vocabulary, containing a 1 for each vocabulary - term present in the sample. Treats the last dimension as the sample - dimension, if input shape is (..., sample_length), output shape will - be (..., num_tokens). - - `"count"`: As `"multi_hot"`, but the int array contains a count of the - number of times the token at that index appeared in the sample. - - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to - find the value in each token slot. - pad_to_max_tokens: Only valid when `output_mode` is `"multi_hot"`, - `"count"`, or `"tf_idf"`. If True, the output will have its feature axis - padded to `max_tokens` even if the number of unique tokens in the - vocabulary is less than max_tokens, resulting in a tensor of shape - [batch_size, max_tokens] regardless of vocabulary size. Defaults to False. - sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, `"count"` - and `"tf-idf"` output modes. If True, returns a `SparseTensor` instead of - a dense `Tensor`. Defaults to False. - """ - - def __init__(self, - max_tokens, - num_oov_indices, - mask_token, - oov_token, - vocabulary_dtype, - vocabulary=None, - idf_weights=None, - invert=False, - output_mode="int", - sparse=False, - pad_to_max_tokens=False, - **kwargs): - # If max_tokens is set, the value must be greater than 1 - otherwise we - # are creating a 0-element vocab, which doesn't make sense. - if max_tokens is not None and max_tokens <= 1: - raise ValueError(f"If set, `max_tokens` must be greater than 1. " - f"Received: max_tokens={max_tokens}") - - if pad_to_max_tokens and max_tokens is None: - raise ValueError(f"If pad_to_max_tokens is True, must set `max_tokens`. " - f"Received: max_tokens={max_tokens}") - - if num_oov_indices < 0: - raise ValueError(f"`num_oov_indices` must be greater than or equal to 0. " - f"Received: num_oov_indices={num_oov_indices}") - - # Support deprecated names for output_modes. - if output_mode == "binary": - output_mode = MULTI_HOT - if output_mode == "tf-idf": - output_mode = TF_IDF - # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT, TF_IDF) - layer_utils.validate_string_arg( - output_mode, - allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT, TF_IDF), - layer_name=self.__class__.__name__, - arg_name="output_mode") - - if invert and output_mode != INT: - raise ValueError(f"`output_mode` must be `'int'` when `invert` is true. " - f"Received: output_mode={output_mode}") - - if sparse and output_mode == INT: - raise ValueError(f"`sparse` may only be true if `output_mode` is " - f"`'one_hot'`, `'multi_hot'`, `'count'` or `'tf_idf'`. " - f"Received: sparse={sparse} and " - f"output_mode={output_mode}") - - if idf_weights is not None and output_mode != TF_IDF: - raise ValueError(f"`idf_weights` should only be set if `output_mode` is " - f"`'tf_idf'`. Received: idf_weights={idf_weights} and " - f"output_mode={output_mode}") - - self.invert = invert - self.max_tokens = max_tokens - self.num_oov_indices = num_oov_indices - self.mask_token = mask_token - self.oov_token = oov_token - self.output_mode = output_mode - self.sparse = sparse - self.pad_to_max_tokens = pad_to_max_tokens - self.vocabulary_dtype = vocabulary_dtype - self._frozen_vocab_size = None - - self.input_vocabulary = vocabulary - self.input_idf_weights = idf_weights - # VocabularySavedModelSaver will clear the config vocabulary to restore the - # lookup table ops directly. We persist this hidden option to persist the - # fact that we have have a non-adaptable layer with a manually set vocab. - self._has_input_vocabulary = kwargs.pop("has_input_vocabulary", - (vocabulary is not None)) - - # Drop deprecated config options. - kwargs.pop("vocabulary_size", None) - kwargs.pop("has_static_table", None) - - # By default, output int64 when output_mode='int' and floats otherwise. - if "dtype" not in kwargs: - kwargs["dtype"] = tf.int64 if output_mode == INT else backend.floatx() - - super().__init__(**kwargs) - - # Check dtype only after base layer parses it; dtype parsing is complex. - if output_mode == INT and not tf.as_dtype(self.compute_dtype).is_integer: - input_dtype = kwargs["dtype"] - raise ValueError("When `output_mode='int'`, `dtype` should be an integer " - f"type. Received: dtype={input_dtype}") - - if invert: - self._key_dtype = self.dtype if output_mode == INT else tf.int64 - self._value_dtype = tf.as_dtype(self.vocabulary_dtype) - mask_key = 0 - mask_value = mask_token - self._default_value = self.oov_token - else: - self._key_dtype = tf.as_dtype(self.vocabulary_dtype) - self._value_dtype = self.dtype if output_mode == INT else tf.int64 - mask_key = mask_token - # Masks should map to 0 for int output and be dropped otherwise. Max ints - # will be dropped from the bincount op. - mask_value = 0 if self.output_mode == INT else self._value_dtype.max - if self.num_oov_indices == 0: - # If there are no OOV indices, we map OOV tokens to -1 and error out - # during call if we find a negative index. - self._default_value = -1 - elif self.num_oov_indices == 1: - # If there is only one OOV index, we can set that index as the default - # value of the index_lookup table. - self._default_value = self._oov_start_index() - else: - # If we have multiple OOV values, we need to do a further hashing step; - # to make this easier, we set the OOV value to -1. (This lets us do a - # vectorized add and cast to boolean to determine locations where we - # need to do extra hashing.) - self._default_value = -1 - if self.mask_token is not None: - self._mask_key = tf.convert_to_tensor(mask_key, self._key_dtype) - self._mask_value = tf.convert_to_tensor(mask_value, self._value_dtype) - - if self.output_mode == TF_IDF: - self.idf_weights = tf.Variable( - [0] * self._token_start_index(), - shape=(None,), - dtype=self.compute_dtype, - trainable=False) - self.idf_weights_const = self.idf_weights.value() - - if vocabulary is not None: - self.set_vocabulary(vocabulary, idf_weights) - else: - # When restoring from a keras SavedModel, the loading code will expect to - # find and restore a lookup_table attribute on the layer. This table needs - # to be uninitialized as a StaticHashTable cannot be initialized twice. - self.lookup_table = self._uninitialized_lookup_table() - - # Only set up adapt state if we did not receive a vocab on construction. - if not self._has_input_vocabulary: - # Add a custom weight handler to return the layers vocab as it's weight. - self._add_trackable(VocabWeightHandler(self), False) - # Set adapt state. - self.token_counts = tf.lookup.experimental.MutableHashTable( - key_dtype=vocabulary_dtype, value_dtype=tf.int64, default_value=0) - if self.output_mode == TF_IDF: - self.token_document_counts = tf.lookup.experimental.MutableHashTable( - key_dtype=vocabulary_dtype, value_dtype=tf.int64, default_value=0) - self.num_documents = tf.Variable(0, dtype=tf.int64, trainable=False) - - def compute_output_shape(self, input_shape): - if self.output_mode == INT: - return input_shape - depth = ( - self.max_tokens if self.pad_to_max_tokens else self._frozen_vocab_size) - return tf.TensorShape([input_shape[0], depth]) - - def compute_output_signature(self, input_spec): - output_shape = self.compute_output_shape(input_spec.shape.as_list()) - output_dtype = self.vocabulary_dtype if self.invert else self.compute_dtype - return tf.TensorSpec(shape=output_shape, dtype=output_dtype) - - def get_vocabulary(self, include_special_tokens=True): - """Returns the current vocabulary of the layer. + """Maps values from a vocabulary to integer indices. - Args: - include_special_tokens: If True, the returned vocabulary will include mask - and OOV tokens, and a term's index in the vocabulary will equal the - term's index when calling the layer. If False, the returned vocabulary - will not include any mask or OOV tokens. - """ - # The lookup table data will not be sorted, so we will create a inverted - # lookup here, and use that to lookup a range of indices [0, vocab_size). - if self.lookup_table.size() == 0: - vocab, indices = [], [] - else: - keys, values = self.lookup_table.export() - vocab, indices = (values, keys) if self.invert else (keys, values) - vocab, indices = (self._tensor_vocab_to_numpy(vocab), indices.numpy()) - lookup = collections.defaultdict(lambda: self.oov_token, - zip(indices, vocab)) - vocab = [lookup[x] for x in range(self.vocabulary_size())] - if self.mask_token is not None and self.output_mode == INT: - vocab[0] = self.mask_token - if not include_special_tokens: - vocab = vocab[self._token_start_index():] - return vocab - - def vocabulary_size(self): - """Gets the current size of the layer's vocabulary. - - Returns: - The integer size of the voculary, including optional mask and oov indices. - """ - return int(self.lookup_table.size().numpy()) + self._token_start_index() - - def vocab_size(self): - logging.warning("vocab_size is deprecated, please use vocabulary_size.") - return self.vocabulary_size() - - def get_config(self): - config = { - "invert": self.invert, - "max_tokens": self.max_tokens, - "num_oov_indices": self.num_oov_indices, - "oov_token": self.oov_token, - "mask_token": self.mask_token, - "output_mode": self.output_mode, - "sparse": self.sparse, - "pad_to_max_tokens": self.pad_to_max_tokens, - "vocabulary": utils.listify_tensors(self.input_vocabulary), - "vocabulary_dtype": self.vocabulary_dtype, - "idf_weights": utils.listify_tensors(self.input_idf_weights), - } - - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def set_vocabulary(self, vocabulary, idf_weights=None): - """Sets vocabulary (and optionally document frequency) data for this layer. - - This method sets the vocabulary and idf weights for this layer directly, - instead of analyzing a dataset through `adapt`. It should be used whenever - the vocab (and optionally document frequency) information is already known. - If vocabulary data is already present in the layer, this method will replace - it. - - Args: - vocabulary: Either an array or a string path to a text file. If passing an - array, can pass a tuple, list, 1D numpy array, or 1D tensor containing - the vocbulary terms. If passing a file path, the file should contain one - line per term in the vocabulary. - idf_weights: A tuple, list, 1D numpy array, or 1D tensor of inverse - document frequency weights with equal length to vocabulary. Must be set - if `output_mode` is `"tf_idf"`. Should not be set otherwise. - - Raises: - ValueError: If there are too many inputs, the inputs do not match, or - input data is missing. - RuntimeError: If the vocabulary cannot be set when this function is - called. This happens when `"multi_hot"`, `"count"`, and `"tf_idf"` - modes, if `pad_to_max_tokens` is False and the layer itself has already - been called. - RuntimeError: If a tensor vocabulary is passed outside of eager execution. - """ - if self.output_mode != TF_IDF and idf_weights is not None: - raise ValueError(f"`idf_weights` should only be set if output_mode is " - f"`'tf_idf'`. Received: output_mode={self.output_mode} " - f"and idf_weights={idf_weights}") - - if isinstance(vocabulary, str): - if not tf.io.gfile.exists(vocabulary): - raise ValueError( - "Vocabulary file {} does not exist.".format(vocabulary)) - if self.output_mode == TF_IDF: - raise ValueError("output_mode `'tf_idf'` does not support loading a " - "vocabulary from file.") - self.lookup_table = self._lookup_table_from_file(vocabulary) - return - - if not tf.executing_eagerly() and (tf.is_tensor(vocabulary) or - tf.is_tensor(idf_weights)): - raise RuntimeError( - "Cannot set a tensor vocabulary on {} layer {} when not executing " - "eagerly. Create this layer or call `set_vocabulary` outside of " - "any `tf.function`s and with eager execution enabled.".format( - self.__class__.__name__, self.name)) - - # TODO(mattdangerw): for better performance we should rewrite this entire - # function to operate on tensors and convert vocabulary to a tensor here. - if tf.is_tensor(vocabulary): - vocabulary = self._tensor_vocab_to_numpy(vocabulary) - elif isinstance(vocabulary, (list, tuple)): - vocabulary = np.array(vocabulary) - if tf.is_tensor(idf_weights): - idf_weights = idf_weights.numpy() - elif isinstance(idf_weights, (list, tuple)): - idf_weights = np.array(idf_weights) - - if vocabulary.size == 0: - raise ValueError( - "Cannot set an empty vocabulary, you passed {}.".format(vocabulary)) - - oov_start = self._oov_start_index() - token_start = self._token_start_index() - special_tokens = ( - [self.mask_token] * oov_start + [self.oov_token] * self.num_oov_indices) - found_special_tokens = np.array_equal( - special_tokens, vocabulary[:token_start]) - if found_special_tokens: - tokens = vocabulary[token_start:] - else: - tokens = vocabulary - - repeated_tokens = self._find_repeated_tokens(tokens) - if repeated_tokens: - raise ValueError("The passed vocabulary has at least one repeated " - "term. Please uniquify your dataset. The repeated terms " - "are {}".format(repeated_tokens)) - - if self.mask_token is not None and self.mask_token in tokens: - mask_index = np.argwhere(vocabulary == self.mask_token)[-1] - raise ValueError( - "Found reserved mask token at unexpected location in `vocabulary`. " - "Note that passed `vocabulary` does not need to include the OOV and " - "mask tokens. Either remove all mask and OOV tokens, or include them " - "only at the start of the vocabulary in precisely this order: " - f"{special_tokens}. Received: mask_token={self.mask_token} at " - f"vocabulary index {mask_index}") - # Only error out for oov_token when invert=True. When invert=False, - # oov_token is unused during lookup. - if self.oov_token is not None and self.invert and self.oov_token in tokens: - oov_index = np.argwhere(vocabulary == self.oov_token)[-1] - raise ValueError( - "Found reserved OOV token at unexpected location in `vocabulary`. " - "Note that passed `vocabulary` does not need to include the OOV and " - "mask tokens. Either remove all mask and OOV tokens, or include them " - "only at the start of the vocabulary in precisely this order: " - f"{special_tokens}. Received: oov_token={self.oov_token} at " - f"vocabulary index {oov_index}") - - new_vocab_size = token_start + len(tokens) - if self.max_tokens is not None and (new_vocab_size > self.max_tokens): - raise ValueError( - "Attempted to set a vocabulary larger than the maximum vocab size. " - "Passed vocab size is {}, max vocab size is {}.".format( - new_vocab_size, self.max_tokens)) - self.lookup_table = self._lookup_table_from_tokens(tokens) - - if self.output_mode == TF_IDF: - if idf_weights is None: - raise ValueError("`idf_weights` must be set if output_mode is TF_IDF") - if len(vocabulary) != len(idf_weights): - raise ValueError("`idf_weights` must be the same length as vocabulary. " - "len(idf_weights) is {}, len(vocabulary) is {}".format( - len(vocabulary), len(idf_weights))) - idf_weights = self._convert_to_ndarray(idf_weights) - if idf_weights.ndim != 1: - raise ValueError( - "TF-IDF data must be a 1-index array, but received {}".format( - type(idf_weights))) - - # If the passed vocabulary has no special tokens, we need to pad the front - # of idf_weights. We don't have real document frequencies for these tokens - # so we will use an average of all idf_weights passed in as a reasonable - # default. - if found_special_tokens: - front_padding = 0 - front_padding_value = 0 - else: - front_padding = token_start - front_padding_value = np.average(idf_weights) - # If pad_to_max_tokens is true, and max_tokens is greater than our total - # vocab size, we need to pad the back of idf_weights with zeros as well. - back_padding_value = 0 - if self.pad_to_max_tokens and self.max_tokens is not None: - back_padding = self.max_tokens - front_padding - len(idf_weights) - else: - back_padding = 0 - weights = np.pad( - idf_weights, (front_padding, back_padding), - "constant", - constant_values=(front_padding_value, back_padding_value)) - weights = tf.convert_to_tensor(weights, dtype=self.compute_dtype) - self.idf_weights.assign(weights) - self.idf_weights_const = self.idf_weights.value() - - def update_state(self, data): - if self._has_input_vocabulary: - raise ValueError( - "Cannot adapt {} layer after setting a static vocabulary via init " - "argument or `set_vocabulary`.".format(self.__class__.__name__)) - - data = utils.ensure_tensor(data, dtype=self.vocabulary_dtype) - if data.shape.rank == 0: - data = tf.expand_dims(data, 0) - if data.shape.rank == 1: - # Expand dims on axis 0 for tf-idf. A 1-d tensor is a single document. - data = tf.expand_dims(data, 0) - - tokens, counts = self._num_tokens(data) - self.token_counts.insert(tokens, counts + self.token_counts.lookup(tokens)) - - if self.output_mode == TF_IDF: - # Dedupe each row of our dataset. - deduped_doc_data = tf.map_fn(lambda x: tf.unique(x)[0], data) - # Flatten and count tokens. - tokens, doc_counts = self._num_tokens(deduped_doc_data) - self.token_document_counts.insert( - tokens, doc_counts + self.token_document_counts.lookup(tokens)) - if tf_utils.is_ragged(data): - self.num_documents.assign_add(data.nrows()) - else: - self.num_documents.assign_add(tf.shape(data, out_type=tf.int64)[0]) - - def finalize_state(self): - if self._has_input_vocabulary or tf.equal(self.token_counts.size(), 0): - # Finalize idf_weights to a const for call even if we don't need to - # compute a new vocabulary. - if self.output_mode == TF_IDF: - self.idf_weights_const = self.idf_weights.value() - return - - # Remove special tokens from our counts. - if self.mask_token is not None: - self.token_counts.remove( - tf.convert_to_tensor([self.mask_token], self.vocabulary_dtype)) - if self.oov_token is not None: - self.token_counts.remove( - tf.convert_to_tensor([self.oov_token], self.vocabulary_dtype)) - - tokens, counts = self.token_counts.export() - # To keep vocabs deterministic, we sort our tokens by count and break ties - # by sorting the tokens themselves. Tensorflow has no ops for sorting - # strings, so we need to use numpy for the sort. - sorted_indices = np.lexsort((tokens.numpy(), counts.numpy()))[::-1] - token_start = self._token_start_index() - if self.max_tokens: - max_learned_tokens = self.max_tokens - token_start - sorted_indices = sorted_indices[:max_learned_tokens] - tokens = tf.gather(tokens, sorted_indices) - self.lookup_table = self._lookup_table_from_tokens(tokens) - - if self.output_mode == TF_IDF: - token_document_counts = self.token_document_counts.lookup(tokens) - idf_weights = self._inverse_document_frequency(token_document_counts, - self.num_documents) - idf_weights = tf.cast(idf_weights, self.compute_dtype) - # Pad the front of idf_weights with the average idf weight for OOV tokens. - # We cannot compute the real idf weight of OOV in a single pass. - idf_weights = tf.pad( - idf_weights, [[self._token_start_index(), 0]], - constant_values=tf.reduce_mean(idf_weights)) - if self.pad_to_max_tokens and self.max_tokens is not None: - # Pad the back of idf_weights with zeros. - idf_weights = tf.pad( - idf_weights, [[0, self.max_tokens - tf.size(idf_weights)]], - constant_values=0) - self.idf_weights.assign(idf_weights) - self.idf_weights_const = self.idf_weights.value() - - # We call this here to save memory, now that we've built our vocabulary, we - # don't want to keep every token we've seen in separate lookup tables. - self.reset_state() - - def reset_state(self): # pylint: disable=method-hidden - if self._has_input_vocabulary: - return - - self.token_counts.remove(self.token_counts.export()[0]) - if self.output_mode == TF_IDF: - self.token_document_counts.remove(self.token_document_counts.export()[0]) - self.num_documents.assign(0) - - def call(self, inputs): - self._maybe_freeze_vocab_size() - - inputs = utils.ensure_tensor(inputs, dtype=self._key_dtype) - original_shape = inputs.shape - # Some ops will not handle scalar input, so uprank to rank 1. - if inputs.shape.rank == 0: - inputs = self._expand_dims(inputs, -1) - - if tf_utils.is_sparse(inputs): - lookups = tf.SparseTensor(inputs.indices, - self._lookup_dense(inputs.values), - inputs.dense_shape) - elif tf_utils.is_ragged(inputs): - lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs) - else: - lookups = self._lookup_dense(inputs) - - if self.output_mode == INT: - # If we received a scalar input, downrank back to a scalar. - if original_shape.rank == 0: - lookups = tf.squeeze(lookups, -1) - return lookups - - depth = ( - self.max_tokens if self.pad_to_max_tokens else self._frozen_vocab_size) - idf_weights = self.idf_weights_const if self.output_mode == TF_IDF else None - return utils.encode_categorical_inputs( - lookups, - output_mode=self.output_mode, - depth=depth, - dtype=self.compute_dtype, - sparse=self.sparse, - idf_weights=idf_weights) - - def _lookup_dense(self, inputs): - """Lookup table values for a dense Tensor, handling masking and OOV.""" - # When executing eagerly and tracing keras.Inputs, do not call lookup. This - # is critical for restoring SavedModel, which will first trace layer.call - # and then attempt to restore the table. We need the table to be - # uninitialized for the restore to work, but calling the table uninitialized - # would error. - if tf.executing_eagerly() and backend.is_keras_tensor(inputs): - lookups = tf.zeros_like(inputs, dtype=self._value_dtype) - else: - lookups = self.lookup_table.lookup(inputs) - - if self.mask_token is not None: - mask_locations = tf.equal(inputs, self._mask_key) - lookups = tf.where(mask_locations, self._mask_value, lookups) - - if self.invert: - return lookups - - lookup_checks = [] - - if self.num_oov_indices == 0: - # If we have zero oov indices, we need to check for oov inputs. - oov_indices = tf.where(tf.equal(lookups, -1)) - oov_inputs = tf.gather_nd(inputs, oov_indices) - msg = tf.strings.format( - "When `num_oov_indices=0` all inputs should be in vocabulary, " - "found OOV values {}, consider setting `num_oov_indices=1`.", - (oov_inputs,)) - assertion = tf.Assert(tf.equal(tf.size(oov_indices), 0), [msg]) - lookup_checks.append(assertion) - elif self.num_oov_indices > 1: - # If we have multiple oov indices, we need a further hashing step. - if self._key_dtype.is_integer: - oov_indices = tf.math.floormod(inputs, self.num_oov_indices) - else: - oov_indices = tf.strings.to_hash_bucket_fast( - inputs, num_buckets=self.num_oov_indices) - oov_indices = oov_indices + self._oov_start_index() - oov_locations = tf.equal(lookups, self._default_value) - lookups = tf.where(oov_locations, oov_indices, lookups) - - with tf.control_dependencies(lookup_checks): - return tf.identity(lookups) - - def _uninitialized_lookup_table(self): - with tf.init_scope(): - initializer = NullInitializer(self._key_dtype, self._value_dtype) - return tf.lookup.StaticHashTable(initializer, self._default_value) - - def _lookup_table_from_tokens(self, tokens): - with tf.init_scope(): - token_start = self._token_start_index() - token_end = token_start + tf.size(tokens) - indices_dtype = self._key_dtype if self.invert else self._value_dtype - indices = tf.range(token_start, token_end, dtype=indices_dtype) - keys, values = (indices, tokens) if self.invert else (tokens, indices) - initializer = tf.lookup.KeyValueTensorInitializer(keys, values, - self._key_dtype, - self._value_dtype) - return tf.lookup.StaticHashTable(initializer, self._default_value) - - def _lookup_table_from_file(self, filename): - if self.invert: - key_index = tf.lookup.TextFileIndex.LINE_NUMBER - value_index = tf.lookup.TextFileIndex.WHOLE_LINE - else: - key_index = tf.lookup.TextFileIndex.WHOLE_LINE - value_index = tf.lookup.TextFileIndex.LINE_NUMBER - with tf.init_scope(): - initializer = tf.lookup.TextFileInitializer( - filename=filename, - key_dtype=self._key_dtype, - key_index=key_index, - value_dtype=self._value_dtype, - value_index=value_index, - value_index_offset=self._token_start_index()) - return tf.lookup.StaticHashTable(initializer, self._default_value) - - def _convert_to_ndarray(self, x): - return np.array(x) if isinstance(x, (list, tuple)) else x - - def _expand_dims(self, inputs, axis): - if tf_utils.is_sparse(inputs): - return tf.sparse.expand_dims(inputs, axis) - else: - return tf.expand_dims(inputs, axis) - - def _oov_start_index(self): - return 1 if self.mask_token is not None and self.output_mode == INT else 0 - - def _token_start_index(self): - return self._oov_start_index() + self.num_oov_indices - - def _maybe_freeze_vocab_size(self): - if self.output_mode == INT or self.pad_to_max_tokens: - return - with tf.init_scope(): - if not tf.executing_eagerly(): - raise RuntimeError( - "When using `output_mode={}` eager execution must be enabled." - .format(self.output_mode)) - new_vocab_size = self.vocabulary_size() - if new_vocab_size == self._token_start_index(): - raise RuntimeError( - "When using `output_mode={}` and `pad_to_max_tokens=False`, you " - "must set the layer's vocabulary before calling it. Either pass " - "a `vocabulary` argument to the layer, or call `adapt` with some " - "sample data.".format(self.output_mode)) - elif (self._frozen_vocab_size is not None and - new_vocab_size != self._frozen_vocab_size): - raise RuntimeError( - "When using `output_mode={}` and `pad_to_max_tokens=False`, the " - "vocabulary size cannot be changed after the layer is called. " - "Vocab size is {}, new vocab size is {}".format( - self.output_mode, self._frozen_vocab_size, new_vocab_size)) - self._frozen_vocab_size = new_vocab_size - - def _find_repeated_tokens(self, vocabulary): - """Return all repeated tokens in a vocabulary.""" - vocabulary_set = set(vocabulary) - if len(vocabulary) != len(vocabulary_set): - return [ - item for item, count in collections.Counter(vocabulary).items() - if count > 1 - ] - else: - return [] - - def _num_tokens(self, data): - """Count the number of tokens in a ragged, sparse or dense tensor.""" - if tf_utils.is_sparse(data): - flat_values = data.values - elif tf_utils.is_ragged(data): - flat_values = data.flat_values - else: - flat_values = tf.reshape(data, [-1]) - tokens, _, counts = tf.unique_with_counts(flat_values, out_idx=tf.int64) - return tokens, counts - - def _inverse_document_frequency(self, token_document_counts, num_documents): - """Computes the inverse-document-frequency (IDF) component of "tf_idf". - - Uses the default weighting scheme described in - https://en.wikipedia.org/wiki/Tf%E2%80%93idf. + This layer translates a set of arbitrary hashables into an integer output + via a table-based lookup, with optional out-of-vocabulary handling. This is + the basis layer for both IntegerLookup and StringLookup; it holds the common + logic but is not intended to be exported as part of the Keras API. Args: - token_document_counts: An array of the # of documents each token appears - in. - num_documents: An int representing the total number of documents - - Returns: - An array of "inverse document frequency" weights. + max_tokens: The maximum size of the vocabulary for this layer. If None, + there is no cap on the size of the vocabulary. Note that this size + includes the OOV and mask tokens. + num_oov_indices: The number of out-of-vocabulary tokens to use. If this + value is more than 1, OOV inputs are hashed to determine their OOV + value. If this value is 0, OOV inputs will cause an error when calling + the layer. + mask_token: A token that represents masked inputs. When `output_mode` is + `"int"`, the token is included in vocabulary and mapped to index 0. In + other output modes, the token will not appear in the vocabulary and + instances of the mask token in the input will be dropped. If set to + None, no mask term will be added. + oov_token: Only used when `invert` is True. The token to return for OOV + indices. + vocabulary: Optional. Either an array or a string path to a text file. If + passing an array, can pass a tuple, list, 1D numpy array, or 1D tensor + containing the vocbulary terms. If passing a file path, the file should + contain one line per term in the vocabulary. If this argument is set, + there is no need to `adapt` the layer. + vocabulary_dtype: The dtype of the vocabulary terms. For example, + `"int64"` or `"string"`. + idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, + 1D numpy array, or 1D tensor or the same length as the vocabulary, + containing the floating point inverse document frequency weights, which + will be multiplied by per sample term counts for the final `tf_idf` + weight. If the `vocabulary` argument is set, and `output_mode` is + `"tf_idf"`, this argument must be supplied. + invert: Only valid when `output_mode` is `"int"`. If True, this layer will + map indices to vocabulary items instead of mapping vocabulary items to + indices. Defaults to `False`. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + configuring the layer as follows: + - `"int"`: Return the raw integer indices of the input tokens. + - `"one_hot"`: Encodes each individual element in the input into an + array the same size as the vocabulary, containing a 1 at the element + index. If the last dimension is size 1, will encode on that + dimension. If the last dimension is not size 1, will append a new + dimension for the encoded output. + - `"multi_hot"`: Encodes each sample in the input into a single array + the same size as the vocabulary, containing a 1 for each vocabulary + term present in the sample. Treats the last dimension as the sample + dimension, if input shape is (..., sample_length), output shape will + be (..., num_tokens). + - `"count"`: As `"multi_hot"`, but the int array contains a count of + the number of times the token at that index appeared in the sample. + - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to + find the value in each token slot. + Defaults to `"int"`. + pad_to_max_tokens: Only valid when `output_mode` is `"multi_hot"`, + `"count"`, or `"tf_idf"`. If True, the output will have its feature axis + padded to `max_tokens` even if the number of unique tokens in the + vocabulary is less than max_tokens, resulting in a tensor of shape + [batch_size, max_tokens] regardless of vocabulary size. Defaults to + False. + sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, `"count"` + and `"tf-idf"` output modes. If True, returns a `SparseTensor` instead + of a dense `Tensor`. Defaults to `False`. """ - return tf.math.log(1 + num_documents / (1 + token_document_counts)) - - @property - def _trackable_saved_model_saver(self): - return layer_serialization.VocabularySavedModelSaver(self) - # Override points for IntegerLookup and StringLookup. - def _tensor_vocab_to_numpy(self, vocabulary): - """Converts a tensor vocabulary to a numpy vocabulary.""" - return vocabulary.numpy() + def __init__( + self, + max_tokens, + num_oov_indices, + mask_token, + oov_token, + vocabulary_dtype, + vocabulary=None, + idf_weights=None, + invert=False, + output_mode="int", + sparse=False, + pad_to_max_tokens=False, + **kwargs, + ): + # If max_tokens is set, the value must be greater than 1 - otherwise we + # are creating a 0-element vocab, which doesn't make sense. + if max_tokens is not None and max_tokens <= 1: + raise ValueError( + "If set, `max_tokens` must be greater than 1. " + f"Received: max_tokens={max_tokens}" + ) + + if pad_to_max_tokens and max_tokens is None: + raise ValueError( + "If pad_to_max_tokens is True, must set `max_tokens`. " + f"Received: max_tokens={max_tokens}" + ) + + if num_oov_indices < 0: + raise ValueError( + "`num_oov_indices` must be greater than or equal to 0. " + f"Received: num_oov_indices={num_oov_indices}" + ) + + # Support deprecated names for output_modes. + if output_mode == "binary": + output_mode = MULTI_HOT + if output_mode == "tf-idf": + output_mode = TF_IDF + # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT, TF_IDF) + layer_utils.validate_string_arg( + output_mode, + allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT, TF_IDF), + layer_name=self.__class__.__name__, + arg_name="output_mode", + ) + + if invert and output_mode != INT: + raise ValueError( + "`output_mode` must be `'int'` when `invert` is true. " + f"Received: output_mode={output_mode}" + ) + + if sparse and output_mode == INT: + raise ValueError( + "`sparse` may only be true if `output_mode` is " + "`'one_hot'`, `'multi_hot'`, `'count'` or `'tf_idf'`. " + f"Received: sparse={sparse} and " + f"output_mode={output_mode}" + ) + + if idf_weights is not None and output_mode != TF_IDF: + raise ValueError( + "`idf_weights` should only be set if `output_mode` is " + f"`'tf_idf'`. Received: idf_weights={idf_weights} and " + f"output_mode={output_mode}" + ) + + self.invert = invert + self.max_tokens = max_tokens + self.num_oov_indices = num_oov_indices + self.mask_token = mask_token + self.oov_token = oov_token + self.output_mode = output_mode + self.sparse = sparse + self.pad_to_max_tokens = pad_to_max_tokens + self.vocabulary_dtype = vocabulary_dtype + self._frozen_vocab_size = kwargs.pop("vocabulary_size", None) + + self.input_vocabulary = vocabulary + self.input_idf_weights = idf_weights + # VocabularySavedModelSaver will clear the config vocabulary to restore + # the lookup table ops directly. We persist this hidden option to + # persist the fact that we have have a non-adaptable layer with a + # manually set vocab. + self._has_input_vocabulary = kwargs.pop( + "has_input_vocabulary", (vocabulary is not None) + ) + + # Drop deprecated config options. + kwargs.pop("has_static_table", None) + + # By default, output int64 when output_mode='int' and floats otherwise. + if "dtype" not in kwargs: + kwargs["dtype"] = ( + tf.int64 if output_mode == INT else backend.floatx() + ) + + super().__init__(**kwargs) + + # Check dtype only after base layer parses it; dtype parsing is complex. + if ( + output_mode == INT + and not tf.as_dtype(self.compute_dtype).is_integer + ): + input_dtype = kwargs["dtype"] + raise ValueError( + "When `output_mode='int'`, `dtype` should be an integer " + f"type. Received: dtype={input_dtype}" + ) + + if invert: + self._key_dtype = self.dtype if output_mode == INT else tf.int64 + self._value_dtype = tf.as_dtype(self.vocabulary_dtype) + mask_key = 0 + mask_value = mask_token + self._default_value = self.oov_token + else: + self._key_dtype = tf.as_dtype(self.vocabulary_dtype) + self._value_dtype = self.dtype if output_mode == INT else tf.int64 + mask_key = mask_token + # Masks should map to 0 for int output and be dropped otherwise. Max + # ints will be dropped from the bincount op. + mask_value = 0 if self.output_mode == INT else self._value_dtype.max + if self.num_oov_indices == 0: + # If there are no OOV indices, we map OOV tokens to -1 and error + # out during call if we find a negative index. + self._default_value = -1 + elif self.num_oov_indices == 1: + # If there is only one OOV index, we can set that index as the + # default value of the index_lookup table. + self._default_value = self._oov_start_index() + else: + # If we have multiple OOV values, we need to do a further + # hashing step; to make this easier, we set the OOV value to -1. + # (This lets us do a vectorized add and cast to boolean to + # determine locations where we need to do extra hashing.) + self._default_value = -1 + if self.mask_token is not None: + self._mask_key = tf.convert_to_tensor(mask_key, self._key_dtype) + self._mask_value = tf.convert_to_tensor( + mask_value, self._value_dtype + ) + + if self.output_mode == TF_IDF: + self.idf_weights = tf.Variable( + [0] * self._token_start_index(), + shape=(None,), + dtype=self.compute_dtype, + trainable=False, + ) + self.idf_weights_const = self.idf_weights.value() + + if vocabulary is not None: + self.set_vocabulary(vocabulary, idf_weights) + else: + # When restoring from a keras SavedModel, the loading code will + # expect to find and restore a lookup_table attribute on the layer. + # This table needs to be uninitialized as a StaticHashTable cannot + # be initialized twice. + self.lookup_table = self._uninitialized_lookup_table() + + # Only set up adapt state if we did not receive a vocab on construction. + if not self._has_input_vocabulary: + # Add custom weight handler to return the layer's vocab as a weight. + self._add_trackable(VocabWeightHandler(self), False) + # Set adapt state. + self.token_counts = tf.lookup.experimental.MutableHashTable( + key_dtype=vocabulary_dtype, + value_dtype=tf.int64, + default_value=0, + ) + if self.output_mode == TF_IDF: + self.token_document_counts = ( + tf.lookup.experimental.MutableHashTable( + key_dtype=vocabulary_dtype, + value_dtype=tf.int64, + default_value=0, + ) + ) + self.num_documents = tf.Variable( + 0, dtype=tf.int64, trainable=False + ) + + def compute_output_shape(self, input_shape): + if self.output_mode == INT: + return input_shape + depth = ( + self.max_tokens + if self.pad_to_max_tokens + else self._frozen_vocab_size + ) + return tf.TensorShape([input_shape[0], depth]) + + def compute_output_signature(self, input_spec): + output_shape = self.compute_output_shape(input_spec.shape.as_list()) + output_dtype = ( + self.vocabulary_dtype if self.invert else self.compute_dtype + ) + return tf.TensorSpec(shape=output_shape, dtype=output_dtype) + + def get_vocabulary(self, include_special_tokens=True): + """Returns the current vocabulary of the layer. + + Args: + include_special_tokens: If True, the returned vocabulary will include + mask and OOV tokens, and a term's index in the vocabulary will equal + the term's index when calling the layer. If False, the returned + vocabulary will not include any mask or OOV tokens. + """ + # The lookup table data will not be sorted, so we will create a inverted + # lookup here, and use that to lookup a range of indices [0, + # vocab_size). + if self.lookup_table.size() == 0: + vocab, indices = [], [] + else: + keys, values = self.lookup_table.export() + vocab, indices = (values, keys) if self.invert else (keys, values) + vocab, indices = ( + self._tensor_vocab_to_numpy(vocab), + indices.numpy(), + ) + lookup = collections.defaultdict( + lambda: self.oov_token, zip(indices, vocab) + ) + vocab = [lookup[x] for x in range(self.vocabulary_size())] + if self.mask_token is not None and self.output_mode == INT: + vocab[0] = self.mask_token + if not include_special_tokens: + vocab = vocab[self._token_start_index() :] + return vocab + + def vocabulary_size(self): + """Gets the current size of the layer's vocabulary. + + Returns: + The integer size of the vocabulary, including optional mask and oov + indices. + """ + if tf.executing_eagerly(): + return ( + int(self.lookup_table.size().numpy()) + + self._token_start_index() + ) + else: + return self.lookup_table.size() + self._token_start_index() + + def vocab_size(self): + logging.warning("vocab_size is deprecated, please use vocabulary_size.") + return self.vocabulary_size() + + def get_config(self): + config = { + "invert": self.invert, + "max_tokens": self.max_tokens, + "num_oov_indices": self.num_oov_indices, + "oov_token": self.oov_token, + "mask_token": self.mask_token, + "output_mode": self.output_mode, + "sparse": self.sparse, + "pad_to_max_tokens": self.pad_to_max_tokens, + "vocabulary_dtype": self.vocabulary_dtype, + "idf_weights": utils.listify_tensors(self.input_idf_weights), + "vocabulary": utils.listify_tensors(self.input_vocabulary), + "vocabulary_size": self._frozen_vocab_size, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _record_vocabulary_size(self): + self._ensure_vocab_size_unchanged() + with tf.init_scope(): + self._frozen_vocab_size = self.vocabulary_size() + + def set_vocabulary(self, vocabulary, idf_weights=None): + """Sets vocabulary (and optionally document frequency) for this layer. + + This method sets the vocabulary and idf weights for this layer directly, + instead of analyzing a dataset through `adapt`. It should be used + whenever the vocab (and optionally document frequency) information is + already known. If vocabulary data is already present in the layer, this + method will replace it. + + Args: + vocabulary: Either an array or a string path to a text file. If + passing an array, can pass a tuple, list, 1D numpy array, or 1D + tensor containing the vocbulary terms. If passing a file path, the + file should contain one line per term in the vocabulary. + idf_weights: A tuple, list, 1D numpy array, or 1D tensor of inverse + document frequency weights with equal length to vocabulary. Must be + set if `output_mode` is `"tf_idf"`. Should not be set otherwise. + + Raises: + ValueError: If there are too many inputs, the inputs do not match, or + input data is missing. + RuntimeError: If the vocabulary cannot be set when this function is + called. This happens when `"multi_hot"`, `"count"`, and `"tf_idf"` + modes, if `pad_to_max_tokens` is False and the layer itself has + already been called. + RuntimeError: If a tensor vocabulary is passed outside of eager + execution. + """ + if self.output_mode == TF_IDF: + if idf_weights is None: + raise ValueError( + "`idf_weights` must be set if output_mode is TF_IDF" + ) + elif idf_weights is not None: + raise ValueError( + "`idf_weights` should only be set if output_mode is " + f"`'tf_idf'`. Received: output_mode={self.output_mode} " + f"and idf_weights={idf_weights}" + ) + + if isinstance(vocabulary, str): + if not tf.io.gfile.exists(vocabulary): + raise ValueError( + f"Vocabulary file {vocabulary} does not exist." + ) + if self.output_mode == TF_IDF: + raise ValueError( + "output_mode `'tf_idf'` does not support loading a " + "vocabulary from file." + ) + self.lookup_table = self._lookup_table_from_file(vocabulary) + self._record_vocabulary_size() + return + + if not tf.executing_eagerly() and ( + tf.is_tensor(vocabulary) or tf.is_tensor(idf_weights) + ): + raise RuntimeError( + "Cannot set a tensor vocabulary on {} layer {} when not " + "executing eagerly. Create this layer or call `set_vocabulary` " + "outside of any `tf.function`s and with eager execution " + "enabled.".format(self.__class__.__name__, self.name) + ) + + # TODO(mattdangerw): for better performance we should rewrite this + # entire function to operate on tensors and convert vocabulary to a + # tensor here. + if tf.is_tensor(vocabulary): + vocabulary = self._tensor_vocab_to_numpy(vocabulary) + elif isinstance(vocabulary, (list, tuple)): + vocabulary = np.array(vocabulary) + if tf.is_tensor(idf_weights): + idf_weights = idf_weights.numpy() + elif isinstance(idf_weights, (list, tuple)): + idf_weights = np.array(idf_weights) + + if vocabulary.size == 0: + raise ValueError( + f"Cannot set an empty vocabulary, you passed {vocabulary}." + ) + + oov_start = self._oov_start_index() + token_start = self._token_start_index() + special_tokens = [self.mask_token] * oov_start + [ + self.oov_token + ] * self.num_oov_indices + found_special_tokens = np.array_equal( + special_tokens, vocabulary[:token_start] + ) + if found_special_tokens: + tokens = vocabulary[token_start:] + else: + tokens = vocabulary + + repeated_tokens = self._find_repeated_tokens(tokens) + if repeated_tokens: + raise ValueError( + "The passed vocabulary has at least one repeated " + "term. Please uniquify your dataset. The repeated terms " + "are {}".format(repeated_tokens) + ) + + if self.mask_token is not None and self.mask_token in tokens: + mask_index = np.argwhere(vocabulary == self.mask_token)[-1] + raise ValueError( + "Found reserved mask token at unexpected location in " + "`vocabulary`. Note that passed `vocabulary` does not need to " + "include the OOV and mask tokens. Either remove all mask and " + "OOV tokens, or include them only at the start of the " + f"vocabulary in precisely this order: {special_tokens}. " + f"Received: mask_token={self.mask_token} at " + f"vocabulary index {mask_index}" + ) + # Only error out for oov_token when invert=True. When invert=False, + # oov_token is unused during lookup. + if ( + self.oov_token is not None + and self.invert + and self.oov_token in tokens + ): + oov_index = np.argwhere(vocabulary == self.oov_token)[-1] + raise ValueError( + "Found reserved OOV token at unexpected location in " + "`vocabulary`. Note that passed `vocabulary` does not need to " + "include the OOV and mask tokens. Either remove all mask and " + "OOV tokens, or include them only at the start of the " + f"vocabulary in precisely this order: {special_tokens}. " + f"Received: oov_token={self.oov_token} at " + f"vocabulary index {oov_index}" + ) + + new_vocab_size = token_start + len(tokens) + if self.max_tokens is not None and (new_vocab_size > self.max_tokens): + raise ValueError( + "Attempted to set a vocabulary larger than the maximum vocab " + "size. Passed vocab size is {}, max vocab size is {}.".format( + new_vocab_size, self.max_tokens + ) + ) + self.lookup_table = self._lookup_table_from_tokens(tokens) + self._record_vocabulary_size() + + if self.output_mode == TF_IDF and idf_weights is not False: + if len(vocabulary) != len(idf_weights): + raise ValueError( + "`idf_weights` must be the same length as vocabulary. " + "len(idf_weights) is {}, len(vocabulary) is {}".format( + len(vocabulary), len(idf_weights) + ) + ) + idf_weights = self._convert_to_ndarray(idf_weights) + if idf_weights.ndim != 1: + raise ValueError( + "TF-IDF data must be a 1-index array, " + "but received {}".format(type(idf_weights)) + ) + + # If the passed vocabulary has no special tokens, we need to pad the + # front of idf_weights. We don't have real document frequencies for + # these tokens so we will use an average of all idf_weights passed + # in as a reasonable default. + if found_special_tokens: + front_padding = 0 + front_padding_value = 0 + else: + front_padding = token_start + front_padding_value = np.average(idf_weights) + # If pad_to_max_tokens is true, and max_tokens is greater than our + # total vocab size, we need to pad the back of idf_weights with + # zeros as well. + back_padding_value = 0 + if self.pad_to_max_tokens and self.max_tokens is not None: + back_padding = ( + self.max_tokens - front_padding - len(idf_weights) + ) + else: + back_padding = 0 + weights = np.pad( + idf_weights, + (front_padding, back_padding), + "constant", + constant_values=(front_padding_value, back_padding_value), + ) + weights = tf.convert_to_tensor(weights, dtype=self.compute_dtype) + self.idf_weights.assign(weights) + self.idf_weights_const = self.idf_weights.value() + + def update_state(self, data): + if self._has_input_vocabulary: + raise ValueError( + "Cannot adapt {} layer after setting a static vocabulary via " + "init argument " + "or `set_vocabulary`.".format(self.__class__.__name__) + ) + + data = utils.ensure_tensor(data, dtype=self.vocabulary_dtype) + if data.shape.rank == 0: + data = tf.expand_dims(data, 0) + if data.shape.rank == 1: + # Expand dims on axis 0 for tf-idf. A 1-d tensor is a single + # document. + data = tf.expand_dims(data, 0) + + tokens, counts = self._num_tokens(data) + self.token_counts.insert( + tokens, counts + self.token_counts.lookup(tokens) + ) + + if self.output_mode == TF_IDF: + # Dedupe each row of our dataset. + deduped_doc_data = tf.map_fn(lambda x: tf.unique(x)[0], data) + # Flatten and count tokens. + tokens, doc_counts = self._num_tokens(deduped_doc_data) + self.token_document_counts.insert( + tokens, doc_counts + self.token_document_counts.lookup(tokens) + ) + if tf_utils.is_ragged(data): + self.num_documents.assign_add(data.nrows()) + else: + self.num_documents.assign_add( + tf.shape(data, out_type=tf.int64)[0] + ) + + def finalize_state(self): + if self._has_input_vocabulary or tf.equal(self.token_counts.size(), 0): + # Finalize idf_weights to a const for call even if we don't need to + # compute a new vocabulary. + if self.output_mode == TF_IDF: + self.idf_weights_const = self.idf_weights.value() + self._record_vocabulary_size() + return + + # Remove special tokens from our counts. + if self.mask_token is not None: + self.token_counts.remove( + tf.convert_to_tensor([self.mask_token], self.vocabulary_dtype) + ) + if self.oov_token is not None: + self.token_counts.remove( + tf.convert_to_tensor([self.oov_token], self.vocabulary_dtype) + ) + + tokens, counts = self.token_counts.export() + # To keep vocabs deterministic, we sort our tokens by count and break + # ties by sorting the tokens themselves. Tensorflow has no ops for + # sorting strings, so we need to use numpy for the sort. + sorted_indices = np.lexsort((tokens.numpy(), counts.numpy()))[::-1] + token_start = self._token_start_index() + if self.max_tokens: + max_learned_tokens = self.max_tokens - token_start + sorted_indices = sorted_indices[:max_learned_tokens] + tokens = tf.gather(tokens, sorted_indices) + self.lookup_table = self._lookup_table_from_tokens(tokens) + + if self.output_mode == TF_IDF: + token_document_counts = self.token_document_counts.lookup(tokens) + idf_weights = self._inverse_document_frequency( + token_document_counts, self.num_documents + ) + idf_weights = tf.cast(idf_weights, self.compute_dtype) + # Pad the front of idf_weights with the average idf weight for OOV + # tokens. We cannot compute the real idf weight of OOV in a single + # pass. + idf_weights = tf.pad( + idf_weights, + [[self._token_start_index(), 0]], + constant_values=tf.reduce_mean(idf_weights), + ) + if self.pad_to_max_tokens and self.max_tokens is not None: + # Pad the back of idf_weights with zeros. + idf_weights = tf.pad( + idf_weights, + [[0, self.max_tokens - tf.size(idf_weights)]], + constant_values=0, + ) + self.idf_weights.assign(idf_weights) + self.idf_weights_const = self.idf_weights.value() + + # We call this here to save memory, now that we've built our vocabulary, + # we don't want to keep every token we've seen in separate lookup + # tables. + self.reset_state() + self._record_vocabulary_size() + + def reset_state(self): + if self._has_input_vocabulary: + return + + self.token_counts.remove(self.token_counts.export()[0]) + if self.output_mode == TF_IDF: + self.token_document_counts.remove( + self.token_document_counts.export()[0] + ) + self.num_documents.assign(0) + + def call(self, inputs): + self._ensure_known_vocab_size() + + inputs = utils.ensure_tensor(inputs, dtype=self._key_dtype) + original_shape = inputs.shape + # Some ops will not handle scalar input, so uprank to rank 1. + if inputs.shape.rank == 0: + inputs = self._expand_dims(inputs, -1) + + if tf_utils.is_sparse(inputs): + lookups = tf.SparseTensor( + inputs.indices, + self._lookup_dense(inputs.values), + inputs.dense_shape, + ) + elif tf_utils.is_ragged(inputs): + lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs) + else: + lookups = self._lookup_dense(inputs) + + if self.output_mode == INT: + # If we received a scalar input, downrank back to a scalar. + if original_shape.rank == 0: + lookups = tf.squeeze(lookups, -1) + return lookups + + depth = ( + self.max_tokens + if self.pad_to_max_tokens + else self._frozen_vocab_size + ) + idf_weights = ( + self.idf_weights_const if self.output_mode == TF_IDF else None + ) + return utils.encode_categorical_inputs( + lookups, + output_mode=self.output_mode, + depth=depth, + dtype=self.compute_dtype, + sparse=self.sparse, + idf_weights=idf_weights, + ) + + def _lookup_dense(self, inputs): + """Lookup table values for a dense Tensor, handling masking and OOV.""" + # When executing eagerly and tracing keras.Input objects, + # do not call lookup. + # This is critical for restoring SavedModel, which will first trace + # layer.call and then attempt to restore the table. We need the table to + # be uninitialized for the restore to work, but calling the table + # uninitialized would error. + if tf.executing_eagerly() and backend.is_keras_tensor(inputs): + lookups = tf.zeros_like(inputs, dtype=self._value_dtype) + else: + lookups = self.lookup_table.lookup(inputs) + + if self.mask_token is not None: + mask_locations = tf.equal(inputs, self._mask_key) + lookups = tf.where(mask_locations, self._mask_value, lookups) + + if self.invert: + return lookups + + lookup_checks = [] + + if self.num_oov_indices == 0: + # If we have zero oov indices, we need to check for oov inputs. + oov_indices = tf.where(tf.equal(lookups, -1)) + oov_inputs = tf.gather_nd(inputs, oov_indices) + msg = tf.strings.format( + "When `num_oov_indices=0` all inputs should be in vocabulary, " + "found OOV values {}, consider setting `num_oov_indices=1`.", + (oov_inputs,), + ) + assertion = tf.Assert(tf.equal(tf.size(oov_indices), 0), [msg]) + lookup_checks.append(assertion) + elif self.num_oov_indices > 1: + # If we have multiple oov indices, we need a further hashing step. + if self._key_dtype.is_integer: + oov_indices = tf.math.floormod(inputs, self.num_oov_indices) + else: + oov_indices = tf.strings.to_hash_bucket_fast( + inputs, num_buckets=self.num_oov_indices + ) + oov_indices = oov_indices + self._oov_start_index() + oov_locations = tf.equal(lookups, self._default_value) + lookups = tf.where(oov_locations, oov_indices, lookups) + + with tf.control_dependencies(lookup_checks): + return tf.identity(lookups) + + def save_own_variables(self, store): + if self.output_mode == TF_IDF: + store["idf_weights"] = self.idf_weights_const.numpy() + + def load_own_variables(self, store): + if self.output_mode == TF_IDF: + self.idf_weights.assign(store["idf_weights"]) + self.idf_weights_const = self.idf_weights.value() + + def save_assets(self, dir_path): + if self.input_vocabulary: + # Vocab saved in config. + # TODO: consider unifying both paths. + return + vocabulary = self.get_vocabulary(include_special_tokens=True) + vocabulary_filepath = tf.io.gfile.join(dir_path, "vocabulary.txt") + with open(vocabulary_filepath, "w") as f: + f.write("\n".join([str(w) for w in vocabulary])) + + def load_assets(self, dir_path): + if self.input_vocabulary: + # Vocab saved in config. + # TODO: consider unifying both paths. + return + vocabulary_filepath = tf.io.gfile.join(dir_path, "vocabulary.txt") + # TODO: fix bug with include_special_tokens and set reload from file. + with open(vocabulary_filepath, "r") as f: + lines = f.read().split("\n") + if tf.as_dtype(self.vocabulary_dtype) == tf.string: + values = [str(line) for line in lines] + else: + values = [int(line) for line in lines] + if self.output_mode == TF_IDF: + self.set_vocabulary(values, idf_weights=False) + else: + self.set_vocabulary(values) + + def _uninitialized_lookup_table(self): + with tf.init_scope(): + initializer = NullInitializer(self._key_dtype, self._value_dtype) + return tf.lookup.StaticHashTable(initializer, self._default_value) + + def _lookup_table_from_tokens(self, tokens): + with tf.init_scope(): + token_start = self._token_start_index() + token_end = token_start + tf.size(tokens) + indices_dtype = ( + self._key_dtype if self.invert else self._value_dtype + ) + indices = tf.range(token_start, token_end, dtype=indices_dtype) + keys, values = ( + (indices, tokens) if self.invert else (tokens, indices) + ) + initializer = tf.lookup.KeyValueTensorInitializer( + keys, values, self._key_dtype, self._value_dtype + ) + return tf.lookup.StaticHashTable(initializer, self._default_value) + + def _lookup_table_from_file(self, filename): + if self.invert: + key_index = tf.lookup.TextFileIndex.LINE_NUMBER + value_index = tf.lookup.TextFileIndex.WHOLE_LINE + else: + key_index = tf.lookup.TextFileIndex.WHOLE_LINE + value_index = tf.lookup.TextFileIndex.LINE_NUMBER + with tf.init_scope(): + initializer = tf.lookup.TextFileInitializer( + filename=filename, + key_dtype=self._key_dtype, + key_index=key_index, + value_dtype=self._value_dtype, + value_index=value_index, + value_index_offset=self._token_start_index(), + ) + return tf.lookup.StaticHashTable(initializer, self._default_value) + + def _convert_to_ndarray(self, x): + return np.array(x) if isinstance(x, (list, tuple)) else x + + def _expand_dims(self, inputs, axis): + if tf_utils.is_sparse(inputs): + return tf.sparse.expand_dims(inputs, axis) + else: + return tf.expand_dims(inputs, axis) + + def _oov_start_index(self): + return ( + 1 if self.mask_token is not None and self.output_mode == INT else 0 + ) + + def _token_start_index(self): + return self._oov_start_index() + self.num_oov_indices + + def _ensure_known_vocab_size(self): + if self.output_mode == INT or self.pad_to_max_tokens: + return + if self._frozen_vocab_size is None: + raise RuntimeError( + f"When using `output_mode={self.output_mode}` " + "and `pad_to_max_tokens=False`, " + "you must set the layer's vocabulary before calling it. Either " + "pass a `vocabulary` argument to the layer, or call `adapt` " + "with some sample data.".format(self.output_mode) + ) + + def _ensure_vocab_size_unchanged(self): + if self.output_mode == INT or self.pad_to_max_tokens: + return + + with tf.init_scope(): + new_vocab_size = self.vocabulary_size() + + if ( + self._frozen_vocab_size is not None + and new_vocab_size != self._frozen_vocab_size + ): + raise RuntimeError( + f"When using `output_mode={self.output_mode}` " + "and `pad_to_max_tokens=False`, " + "the vocabulary size cannot be changed after the layer is " + f"called. Old vocab size is {self._frozen_vocab_size}, " + f"new vocab size is {new_vocab_size}" + ) + + def _find_repeated_tokens(self, vocabulary): + """Return all repeated tokens in a vocabulary.""" + vocabulary_set = set(vocabulary) + if len(vocabulary) != len(vocabulary_set): + return [ + item + for item, count in collections.Counter(vocabulary).items() + if count > 1 + ] + else: + return [] + + def _num_tokens(self, data): + """Count the number of tokens in a ragged, sparse or dense tensor.""" + if tf_utils.is_sparse(data): + flat_values = data.values + elif tf_utils.is_ragged(data): + flat_values = data.flat_values + else: + flat_values = tf.reshape(data, [-1]) + tokens, _, counts = tf.unique_with_counts(flat_values, out_idx=tf.int64) + return tokens, counts + + def _inverse_document_frequency(self, token_document_counts, num_documents): + """Computes the inverse-document-frequency (IDF) component of "tf_idf". + + Uses the default weighting scheme described in + https://en.wikipedia.org/wiki/Tf%E2%80%93idf. + + Args: + token_document_counts: An array of the # of documents each token + appears in. + num_documents: An int representing the total number of documents + + Returns: + An array of "inverse document frequency" weights. + """ + return tf.math.log(1 + num_documents / (1 + token_document_counts)) + + @property + def _trackable_saved_model_saver(self): + return layer_serialization.VocabularySavedModelSaver(self) + + # Override points for IntegerLookup and StringLookup. + def _tensor_vocab_to_numpy(self, vocabulary): + """Converts a tensor vocabulary to a numpy vocabulary.""" + return vocabulary.numpy() diff --git a/keras/layers/preprocessing/index_lookup_distribution_test.py b/keras/layers/preprocessing/index_lookup_distribution_test.py index a7942b3dcc6e..eb9790b75734 100644 --- a/keras/layers/preprocessing/index_lookup_distribution_test.py +++ b/keras/layers/preprocessing/index_lookup_distribution_test.py @@ -15,9 +15,11 @@ """Distribution tests for keras.layers.preprocessing.index_lookup.""" - import os +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras import backend from keras.distribute import strategy_combinations @@ -25,128 +27,174 @@ from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_single_worker + - strategy_combinations.parameter_server_strategies_multi_worker, - mode=["eager"])) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_single_worker + + strategy_combinations.parameter_server_strategies_multi_worker, + mode=["eager"], + ) +) class IndexLookupDistributionTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def test_strategy(self, strategy): - if (backend.is_tpu_strategy(strategy) and - not tf_test_utils.is_mlir_bridge_enabled()): - self.skipTest("TPU tests require MLIR bridge") - - vocab_data = [[ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ]] - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( - 2, drop_remainder=True) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - tf.config.set_soft_device_placement(True) - - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.adapt(vocab_dataset) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - model.compile(loss="mse") - output_dataset = model.predict(input_dataset) - self.assertAllEqual(expected_output, output_dataset) - - def test_strategy_with_file(self, strategy): - if (backend.is_tpu_strategy(strategy) and - not tf_test_utils.is_mlir_bridge_enabled()): - self.skipTest("TPU tests require MLIR bridge") - - vocab_data = ["earth", "wind", "and", "fire"] - vocab_file = self._write_to_temp_file("temp", vocab_data) - - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( - 2, drop_remainder=True) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - tf.config.set_soft_device_placement(True) - - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - model.compile(loss="mse") - output_dataset = model.predict(input_dataset) - self.assertAllEqual(expected_output, output_dataset) - - def test_tpu_with_multiple_oov(self, strategy): - # TODO(b/180614455): remove this check when MLIR bridge is always enabled. - if backend.is_tpu_strategy(strategy): - self.skipTest("This test needs MLIR bridge on TPU.") - - vocab_data = [[ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ]] - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( - 2, drop_remainder=True) - expected_output = [[3, 4, 5, 6], [6, 5, 3, 1]] - - tf.config.set_soft_device_placement(True) - - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=2, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.adapt(vocab_dataset) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_dataset) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def test_strategy(self, strategy): + if ( + backend.is_tpu_strategy(strategy) + and not tf_test_utils.is_mlir_bridge_enabled() + ): + self.skipTest("TPU tests require MLIR bridge") + + vocab_data = [ + [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + ] + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( + 2, drop_remainder=True + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + tf.config.set_soft_device_placement(True) + + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.adapt(vocab_dataset) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + model.compile(loss="mse") + output_dataset = model.predict(input_dataset) + self.assertAllEqual(expected_output, output_dataset) + + def test_strategy_with_file(self, strategy): + if ( + backend.is_tpu_strategy(strategy) + and not tf_test_utils.is_mlir_bridge_enabled() + ): + self.skipTest("TPU tests require MLIR bridge") + + vocab_data = ["earth", "wind", "and", "fire"] + vocab_file = self._write_to_temp_file("temp", vocab_data) + + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( + 2, drop_remainder=True + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + tf.config.set_soft_device_placement(True) + + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + model.compile(loss="mse") + output_dataset = model.predict(input_dataset) + self.assertAllEqual(expected_output, output_dataset) + + def test_tpu_with_multiple_oov(self, strategy): + # TODO(b/180614455): remove this check when MLIR bridge is always + # enabled. + if backend.is_tpu_strategy(strategy): + self.skipTest("This test needs MLIR bridge on TPU.") + + vocab_data = [ + [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + ] + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( + 2, drop_remainder=True + ) + expected_output = [[3, 4, 5, 6], [6, 5, 3, 1]] + + tf.config.set_soft_device_placement(True) + + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=2, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.adapt(vocab_dataset) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_dataset) + self.assertAllEqual(expected_output, output_dataset) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/index_lookup_test.py b/keras/layers/preprocessing/index_lookup_test.py index 9b3ef9687d36..ca488eb4c54e 100644 --- a/keras/layers/preprocessing/index_lookup_test.py +++ b/keras/layers/preprocessing/index_lookup_test.py @@ -20,6 +20,8 @@ import random import string +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras @@ -27,2201 +29,988 @@ from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.utils.generic_utils import CustomObjectScope -import numpy as np -import tensorflow.compat.v2 as tf +from keras.utils import CustomObjectScope def zip_and_sort(weight_values): - keys, values = weight_values - return sorted(zip(keys, values), key=lambda x: x[1]) + keys, values = weight_values + return sorted(zip(keys, values), key=lambda x: x[1]) def _get_end_to_end_test_cases(): - test_cases = ( - { - "testcase_name": - "test_strings_soft_vocab_cap", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # accumulator is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": None, - "num_oov_indices": 1, - "mask_token": "", - "oov_token": "[OOV]", - "vocabulary_dtype": tf.string, - }, - "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], - "input_dtype": - tf.string - }, - { - "testcase_name": - "test_inverse_strings_soft_vocab_cap", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # accumulator is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([[2], [3], [4], [1], [1], [4], [2], [5]]), - "kwargs": { - "max_tokens": None, - "num_oov_indices": 1, - "mask_token": "", - "oov_token": "[OOV]", - "vocabulary_dtype": tf.string, - "invert": True - }, - "expected_output": - np.array([[b"earth"], [b"wind"], [b"and"], [b"[OOV]"], [b"[OOV]"], - [b"and"], [b"earth"], [b"fire"]]), - "input_dtype": - tf.int64 - }, - { - "testcase_name": - "test_strings_with_special_tokens", - # Mask and oov values in the vocab data should be dropped, and mapped - # to 0 and 1 respectively when calling the layer. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - [""], [""], [""], ["[OOV]"], ["[OOV]"], ["[OOV]"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], [""], ["wind"], ["[OOV]"], ["and"], [""], - ["fire"], ["and"], ["[OOV]"], ["michigan"]]), - "kwargs": { - "max_tokens": None, - "num_oov_indices": 1, - "mask_token": "", - "oov_token": "[OOV]", - "vocabulary_dtype": tf.string, - }, - "expected_output": [[2], [0], [3], [1], [4], [0], [5], [4], [1], [1]], - "input_dtype": - tf.string - }, - { - "testcase_name": - "test_ints_soft_vocab_cap", - # Create an array where 1138 is the most frequent term, followed by - # 1729, then 725, then 42. This ensures that the vocab accumulator - # is sorting by frequency. - "vocab_data": - np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729], - [1729], [725], [725]], - dtype=np.int64), - "input_data": - np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]], - dtype=np.int64), - "kwargs": { - "max_tokens": None, - "num_oov_indices": 1, - "mask_token": 0, - "oov_token": -1, - "vocabulary_dtype": tf.int64, - }, - "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], - "input_dtype": - tf.int64 - }, - { - "testcase_name": - "test_ints_with_special_tokens", - # Mask and oov values in the vocab data should be dropped, and mapped - # to 0 and 1 respectively when calling the layer. - "vocab_data": - np.array([[42], [1138], [1138], [1138], [1138], [0], [0], [0], - [-1], [-1], [-1], [1729], [1729], [1729], [725], [725]], - dtype=np.int64), - "input_data": - np.array([[1138], [0], [1729], [-1], [725], [0], [42], [725], - [-1], [4]], - dtype=np.int64), - "kwargs": { - "max_tokens": None, - "num_oov_indices": 1, - "mask_token": 0, - "oov_token": -1, - "vocabulary_dtype": tf.int64, - }, - "expected_output": [[2], [0], [3], [1], [4], [0], [5], [4], [1], [1]], - "input_dtype": - tf.int64 - }, - { - "testcase_name": - "test_strings_hard_vocab_cap", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # accumulator is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": 5, - "num_oov_indices": 1, - "mask_token": "", - "oov_token": "[OOV]", - "vocabulary_dtype": tf.string, - }, - "expected_output": [[2], [3], [4], [1], [1], [4], [2], [1]], - "input_dtype": - tf.string - }, - { - "testcase_name": - "test_inverse_strings_hard_vocab_cap", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # accumulator is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([[2], [3], [4], [1], [1], [4], [2], [5]]), - "kwargs": { - "max_tokens": 5, - "num_oov_indices": 1, - "mask_token": "", - "oov_token": "[OOV]", - "vocabulary_dtype": tf.string, - "invert": True - }, - "expected_output": - np.array([[b"earth"], [b"wind"], [b"and"], [b"[OOV]"], [b"[OOV]"], - [b"and"], [b"earth"], [b"[OOV]"]]), - "input_dtype": - tf.int64 - }, - { - "testcase_name": - "test_ints_hard_vocab_cap", - # Create an array where 1138 is the most frequent term, followed by - # 1729, then 725, then 42. This ensures that the vocab accumulator - # is sorting by frequency. - "vocab_data": - np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729], - [1729], [725], [725]], - dtype=np.int64), - "input_data": - np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]], - dtype=np.int64), - "kwargs": { - "max_tokens": 5, - "num_oov_indices": 1, - "mask_token": 0, - "oov_token": -1, - "vocabulary_dtype": tf.int64, - }, - "expected_output": [[2], [3], [4], [1], [1], [4], [2], [1]], - "input_dtype": - tf.int64 - }, - { - "testcase_name": - "test_ints_tf_idf_output", - "vocab_data": - np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729], - [1729], [725], [725]]), - "input_data": - np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "num_oov_indices": 1, - "mask_token": 0, - "oov_token": -1, - "output_mode": index_lookup.TF_IDF, - "vocabulary_dtype": tf.int64, - }, - "expected_output": [[0, 1.098612, 0, 0, 0], [0, 0, 1.252763, 0, 0], - [0, 0, 0, 1.466337, 0], [0, 0, 0, 0, 1.7917595], - [0, 0, 0, 0, 1.7917595], [0, 0, 0, 1.4663371, 0], - [0, 1.098612, 0, 0, 0], [1.402368, 0, 0, 0, 0]], - "input_dtype": - tf.int64 - }, - { - "testcase_name": - "test_strings_tf_idf_output", - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "num_oov_indices": 1, - "mask_token": "", - "oov_token": "[OOV]", - "output_mode": index_lookup.TF_IDF, - "vocabulary_dtype": tf.string, - }, - "expected_output": [[0, 1.098612, 0, 0, 0], [0, 0, 1.252763, 0, 0], - [0, 0, 0, 1.466337, 0], [0, 0, 0, 0, 1.7917595], - [0, 0, 0, 0, 1.7917595], [0, 0, 0, 1.4663371, 0], - [0, 1.098612, 0, 0, 0], [1.402368, 0, 0, 0, 0]], - "input_dtype": - tf.string - }, - ) - - crossed_test_cases = [] - # Cross above test cases with use_dataset in (True, False) - for use_dataset in (True, False): - for case in test_cases: - case = case.copy() - if use_dataset: - case["testcase_name"] = case["testcase_name"] + "_with_dataset" - case["use_dataset"] = use_dataset - crossed_test_cases.append(case) - - return crossed_test_cases + test_cases = ( + { + "testcase_name": "test_strings_soft_vocab_cap", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab + # accumulator is sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": None, + "num_oov_indices": 1, + "mask_token": "", + "oov_token": "[OOV]", + "vocabulary_dtype": tf.string, + }, + "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], + "input_dtype": tf.string, + }, + { + "testcase_name": "test_inverse_strings_soft_vocab_cap", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab + # accumulator is sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array([[2], [3], [4], [1], [1], [4], [2], [5]]), + "kwargs": { + "max_tokens": None, + "num_oov_indices": 1, + "mask_token": "", + "oov_token": "[OOV]", + "vocabulary_dtype": tf.string, + "invert": True, + }, + "expected_output": np.array( + [ + [b"earth"], + [b"wind"], + [b"and"], + [b"[OOV]"], + [b"[OOV]"], + [b"and"], + [b"earth"], + [b"fire"], + ] + ), + "input_dtype": tf.int64, + }, + { + "testcase_name": "test_strings_with_special_tokens", + # Mask and oov values in the vocab data should be dropped, and + # mapped to 0 and 1 respectively when calling the layer. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + [""], + [""], + [""], + ["[OOV]"], + ["[OOV]"], + ["[OOV]"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + [""], + ["wind"], + ["[OOV]"], + ["and"], + [""], + ["fire"], + ["and"], + ["[OOV]"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": None, + "num_oov_indices": 1, + "mask_token": "", + "oov_token": "[OOV]", + "vocabulary_dtype": tf.string, + }, + "expected_output": [ + [2], + [0], + [3], + [1], + [4], + [0], + [5], + [4], + [1], + [1], + ], + "input_dtype": tf.string, + }, + { + "testcase_name": "test_ints_soft_vocab_cap", + # Create an array where 1138 is the most frequent term, followed by + # 1729, then 725, then 42. This ensures that the vocab accumulator + # is sorting by frequency. + "vocab_data": np.array( + [ + [42], + [1138], + [1138], + [1138], + [1138], + [1729], + [1729], + [1729], + [725], + [725], + ], + dtype=np.int64, + ), + "input_data": np.array( + [[1138], [1729], [725], [42], [42], [725], [1138], [4]], + dtype=np.int64, + ), + "kwargs": { + "max_tokens": None, + "num_oov_indices": 1, + "mask_token": 0, + "oov_token": -1, + "vocabulary_dtype": tf.int64, + }, + "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], + "input_dtype": tf.int64, + }, + { + "testcase_name": "test_ints_with_special_tokens", + # Mask and oov values in the vocab data should be dropped, and + # mapped to 0 and 1 respectively when calling the layer. + "vocab_data": np.array( + [ + [42], + [1138], + [1138], + [1138], + [1138], + [0], + [0], + [0], + [-1], + [-1], + [-1], + [1729], + [1729], + [1729], + [725], + [725], + ], + dtype=np.int64, + ), + "input_data": np.array( + [[1138], [0], [1729], [-1], [725], [0], [42], [725], [-1], [4]], + dtype=np.int64, + ), + "kwargs": { + "max_tokens": None, + "num_oov_indices": 1, + "mask_token": 0, + "oov_token": -1, + "vocabulary_dtype": tf.int64, + }, + "expected_output": [ + [2], + [0], + [3], + [1], + [4], + [0], + [5], + [4], + [1], + [1], + ], + "input_dtype": tf.int64, + }, + { + "testcase_name": "test_strings_hard_vocab_cap", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab + # accumulator is sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": 5, + "num_oov_indices": 1, + "mask_token": "", + "oov_token": "[OOV]", + "vocabulary_dtype": tf.string, + }, + "expected_output": [[2], [3], [4], [1], [1], [4], [2], [1]], + "input_dtype": tf.string, + }, + { + "testcase_name": "test_inverse_strings_hard_vocab_cap", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab + # accumulator is sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array([[2], [3], [4], [1], [1], [4], [2], [5]]), + "kwargs": { + "max_tokens": 5, + "num_oov_indices": 1, + "mask_token": "", + "oov_token": "[OOV]", + "vocabulary_dtype": tf.string, + "invert": True, + }, + "expected_output": np.array( + [ + [b"earth"], + [b"wind"], + [b"and"], + [b"[OOV]"], + [b"[OOV]"], + [b"and"], + [b"earth"], + [b"[OOV]"], + ] + ), + "input_dtype": tf.int64, + }, + { + "testcase_name": "test_ints_hard_vocab_cap", + # Create an array where 1138 is the most frequent term, followed by + # 1729, then 725, then 42. This ensures that the vocab accumulator + # is sorting by frequency. + "vocab_data": np.array( + [ + [42], + [1138], + [1138], + [1138], + [1138], + [1729], + [1729], + [1729], + [725], + [725], + ], + dtype=np.int64, + ), + "input_data": np.array( + [[1138], [1729], [725], [42], [42], [725], [1138], [4]], + dtype=np.int64, + ), + "kwargs": { + "max_tokens": 5, + "num_oov_indices": 1, + "mask_token": 0, + "oov_token": -1, + "vocabulary_dtype": tf.int64, + }, + "expected_output": [[2], [3], [4], [1], [1], [4], [2], [1]], + "input_dtype": tf.int64, + }, + { + "testcase_name": "test_ints_tf_idf_output", + "vocab_data": np.array( + [ + [42], + [1138], + [1138], + [1138], + [1138], + [1729], + [1729], + [1729], + [725], + [725], + ] + ), + "input_data": np.array( + [[1138], [1729], [725], [42], [42], [725], [1138], [4]] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "num_oov_indices": 1, + "mask_token": 0, + "oov_token": -1, + "output_mode": index_lookup.TF_IDF, + "vocabulary_dtype": tf.int64, + }, + "expected_output": [ + [0, 1.098612, 0, 0, 0], + [0, 0, 1.252763, 0, 0], + [0, 0, 0, 1.466337, 0], + [0, 0, 0, 0, 1.7917595], + [0, 0, 0, 0, 1.7917595], + [0, 0, 0, 1.4663371, 0], + [0, 1.098612, 0, 0, 0], + [1.402368, 0, 0, 0, 0], + ], + "input_dtype": tf.int64, + }, + { + "testcase_name": "test_strings_tf_idf_output", + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "num_oov_indices": 1, + "mask_token": "", + "oov_token": "[OOV]", + "output_mode": index_lookup.TF_IDF, + "vocabulary_dtype": tf.string, + }, + "expected_output": [ + [0, 1.098612, 0, 0, 0], + [0, 0, 1.252763, 0, 0], + [0, 0, 0, 1.466337, 0], + [0, 0, 0, 0, 1.7917595], + [0, 0, 0, 0, 1.7917595], + [0, 0, 0, 1.4663371, 0], + [0, 1.098612, 0, 0, 0], + [1.402368, 0, 0, 0, 0], + ], + "input_dtype": tf.string, + }, + ) + + crossed_test_cases = [] + # Cross above test cases with use_dataset in (True, False) + for use_dataset in (True, False): + for case in test_cases: + case = case.copy() + if use_dataset: + case["testcase_name"] = case["testcase_name"] + "_with_dataset" + case["use_dataset"] = use_dataset + crossed_test_cases.append(case) + + return crossed_test_cases @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IndexLookupLayerTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - @parameterized.named_parameters(*_get_end_to_end_test_cases()) - def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs, - use_dataset, expected_output, - input_dtype): - cls = index_lookup.IndexLookup - if "invert" in kwargs and kwargs["invert"]: - expected_output_dtype = kwargs["vocabulary_dtype"] - elif "output_mode" in kwargs and kwargs["output_mode"] != index_lookup.INT: - expected_output_dtype = tf.float32 - else: - expected_output_dtype = tf.int64 - - input_shape = input_data.shape - - if use_dataset: - # Keras APIs expect batched datasets. - # TODO(rachelim): `model.predict` predicts the result on each - # dataset batch separately, then tries to concatenate the results - # together. When the results have different shapes on the non-concat - # axis (which can happen in the output_mode = INT case for - # IndexLookup), the concatenation fails. In real use cases, this may - # not be an issue because users are likely to pipe the preprocessing layer - # into other keras layers instead of predicting it directly. A workaround - # for these unit tests is to have the dataset only contain one batch, so - # no concatenation needs to happen with the result. For consistency with - # numpy input, we should make `predict` join differently shaped results - # together sensibly, with 0 padding. - input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( - input_shape[0]) - vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( - input_shape[0]) - - with CustomObjectScope({"IndexLookup": cls}): - output_data = test_utils.layer_test( - cls, - kwargs=kwargs, - input_shape=input_shape, - input_data=input_data, - input_dtype=input_dtype, - expected_output_dtype=expected_output_dtype, - validate_training=False, - adapt_data=vocab_data) - if "invert" in kwargs and kwargs["invert"]: - self.assertAllEqual(expected_output, output_data) - else: - self.assertAllClose(expected_output, output_data) +class IndexLookupLayerTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters(*_get_end_to_end_test_cases()) + def test_layer_end_to_end_with_adapt( + self, + vocab_data, + input_data, + kwargs, + use_dataset, + expected_output, + input_dtype, + ): + cls = index_lookup.IndexLookup + if "invert" in kwargs and kwargs["invert"]: + expected_output_dtype = kwargs["vocabulary_dtype"] + elif ( + "output_mode" in kwargs + and kwargs["output_mode"] != index_lookup.INT + ): + expected_output_dtype = tf.float32 + else: + expected_output_dtype = tf.int64 + + input_shape = input_data.shape + + if use_dataset: + # Keras APIs expect batched datasets. + # TODO(rachelim): `model.predict` predicts the result on each + # dataset batch separately, then tries to concatenate the results + # together. When the results have different shapes on the non-concat + # axis (which can happen in the output_mode = INT case for + # IndexLookup), the concatenation fails. In real use cases, this may + # not be an issue because users are likely to pipe the preprocessing + # layer into other keras layers instead of predicting it directly. A + # workaround for these unit tests is to have the dataset only + # contain one batch, so no concatenation needs to happen with the + # result. For consistency with numpy input, we should make `predict` + # join differently shaped results together sensibly, with 0 padding. + input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( + input_shape[0] + ) + vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( + input_shape[0] + ) + + with CustomObjectScope({"IndexLookup": cls}): + output_data = test_utils.layer_test( + cls, + kwargs=kwargs, + input_shape=input_shape, + input_data=input_data, + input_dtype=input_dtype, + expected_output_dtype=expected_output_dtype, + validate_training=False, + adapt_data=vocab_data, + ) + if "invert" in kwargs and kwargs["invert"]: + self.assertAllEqual(expected_output, output_data) + else: + self.assertAllClose(expected_output, output_data) @test_combinations.run_all_keras_modes(always_skip_v1=True) class CategoricalEncodingInputTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_sparse_string_input(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], - values=["fire", "michigan"], - dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [5, 1] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.string, sparse=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_sparse_int_input(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], - values=np.array([13, 32], dtype=np.int64), - dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [5, 1] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=1, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_ragged_string_input(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.ragged.constant( - [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_ragged_int_input(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.ragged.constant([[10, 11, 13], [13, 12, 10, 42]], - dtype=np.int64) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=1, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int32_input_with_int64_keys(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.ragged.constant([[10, 11, 13], [13, 12, 10, 42]], - dtype=np.int32) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=1, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_sparse_string_input(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=["fire", "michigan"], + dense_shape=[3, 4], + ) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [5, 1] + expected_dense_shape = [3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.string, sparse=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_sparse_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 32], dtype=np.int64), + dense_shape=[3, 4], + ) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [5, 1] + expected_dense_shape = [3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_ragged_string_input(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.ragged.constant( + [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]] + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_ragged_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.ragged.constant( + [[10, 11, 13], [13, 12, 10, 42]], dtype=np.int64 + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int32_input_with_int64_keys(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.ragged.constant( + [[10, 11, 13], [13, 12, 10, 42]], dtype=np.int32 + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int32, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_combinations.run_all_keras_modes(always_skip_v1=True) class CategoricalEncodingMultiOOVTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_sparse_string_input_multi_bucket(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], values=["fire", "ohio"], dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [6, 2] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.string, sparse=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=2, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_sparse_int_input_multi_bucket(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], - values=np.array([13, 133], dtype=np.int64), - dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [6, 2] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=2, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_ragged_string_input_multi_bucket(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.ragged.constant([["earth", "wind", "fire"], - ["fire", "and", "earth", "ohio"]]) - expected_output = [[3, 4, 6], [6, 5, 3, 2]] - - input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=2, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_ragged_int_input_multi_bucket(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.ragged.constant([[10, 11, 13], [13, 12, 10, 133]], - dtype=np.int64) - expected_output = [[3, 4, 6], [6, 5, 3, 2]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=2, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_sparse_string_input_multi_bucket(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=["fire", "ohio"], + dense_shape=[3, 4], + ) + expected_indices = [[0, 0], [1, 2]] + expected_values = [6, 2] + expected_dense_shape = [3, 4] -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class CategoricalEncodingAdaptTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_sparse_adapt(self): - vocab_data = tf.SparseTensor( - indices=[[0, 0], [0, 1], [1, 2]], - values=["michigan", "fire", "michigan"], - dense_shape=[3, 4]) - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.adapt(vocab_dataset) - expected_vocabulary = ["", "[OOV]", "michigan", "fire"] - self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) - - def test_ragged_adapt(self): - vocab_data = tf.ragged.constant([["michigan"], - ["fire", "michigan"]]) - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.adapt(vocab_dataset) - expected_vocabulary = ["", "[OOV]", "michigan", "fire"] - self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) - - def test_sparse_int_input(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], - values=np.array([13, 32], dtype=np.int64), - dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [5, 1] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=1, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_ragged_string_input(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.ragged.constant( - [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_ragged_int_input(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.ragged.constant([[10, 11, 13], [13, 12, 10, 42]], - dtype=np.int64) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) - layer = index_lookup.IndexLookup( - max_tokens=None, - vocabulary_dtype=tf.int64, - num_oov_indices=1, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_single_string_generator_dataset(self): - - def word_gen(): - for _ in itertools.count(1): - yield "".join(random.choice(string.ascii_letters) for i in range(2)) - - ds = tf.data.Dataset.from_generator(word_gen, tf.string, - tf.TensorShape([])) - batched_ds = ds.take(2) - input_t = keras.Input(shape=(), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=10, - num_oov_indices=0, - mask_token=None, - oov_token=None, - vocabulary_dtype=tf.string) - _ = layer(input_t) - layer.adapt(batched_ds) + input_data = keras.Input(shape=(None,), dtype=tf.string, sparse=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=2, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_sparse_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 133], dtype=np.int64), + dense_shape=[3, 4], + ) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [6, 2] + expected_dense_shape = [3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=2, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + def test_ragged_string_input_multi_bucket(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.ragged.constant( + [["earth", "wind", "fire"], ["fire", "and", "earth", "ohio"]] + ) + expected_output = [[3, 4, 6], [6, 5, 3, 2]] -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class IndexLookupOutputTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - @parameterized.product( - rank=[0, 1, 2], - # Check lists, numpy arrays, tensors, and objects convertable to tensor. - data_fn=[None, np.array, tf.constant, preprocessing_test_utils.ArrayLike] - ) - def test_input_types(self, rank, data_fn): - input_data = vocab = ["earth", "wind", "and", "fire"] - expected_output = [2, 3, 4, 5] - if rank == 0: - input_data = input_data[0] - expected_output = expected_output[0] - elif rank == 2: - input_data = [input_data] - expected_output = [expected_output] - if data_fn is not None: - input_data = data_fn(input_data) - input_shape = [] if rank == 0 else [None] - - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary=vocab, - vocabulary_dtype=tf.string) - output_data = layer(input_data) - self.assertAllEqual(expected_output, output_data) - - # Again in a keras.Model - inputs = keras.Input(shape=input_shape, dtype=tf.string) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model(tf.constant(input_data)) - self.assertAllEqual(expected_output, output_data) - - def test_int_output_shape(self): - input_data = keras.Input(batch_size=16, shape=(4,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=2, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - int_data = layer(input_data) - self.assertAllEqual(int_data.shape.as_list(), [16, 4]) - - @parameterized.named_parameters( - ("int32", tf.int32), - ("int64", tf.int64), - ) - def test_int_output_dtype(self, dtype): - input_data = keras.Input(batch_size=16, shape=(4,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=2, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - dtype=dtype) - int_data = layer(input_data) - self.assertAllEqual(int_data.dtype, dtype) - - def test_int_output_float_dtype_fails(self): - with self.assertRaisesRegex(ValueError, "`dtype` should be an integer"): - index_lookup.IndexLookup( - max_tokens=2, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - dtype=tf.float32) - - def test_int_output_no_reserved_zero(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token=None, - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_no_oov(self): - vocab_data = ["earth", "wind", "and", "fire"] - valid_input = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", ""]]) - invalid_input = np.array([["earth", "wind", "and", "michigan"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=0, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(valid_input) - self.assertAllEqual(expected_output, output_data) - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "found OOV values.*michigan"): - _ = model.predict(invalid_input) - - def test_int_output_no_oov_ragged(self): - vocab_data = ["earth", "wind", "and", "fire"] - valid_input = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", ""]]) - invalid_input = np.array([["earth", "wind", "and", "michigan"], - ["fire", "and", "earth", "michigan"]]) - valid_input = tf.RaggedTensor.from_tensor(valid_input) - invalid_input = tf.RaggedTensor.from_tensor(invalid_input) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=0, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(valid_input) - self.assertAllEqual(expected_output, output_data) - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "found OOV values.*michigan"): - _ = model.predict(invalid_input) - - def test_int_output_no_oov_sparse(self): - vocab_data = ["earth", "wind", "and", "fire"] - valid_input = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", ""]]) - invalid_input = np.array([["earth", "wind", "and", "michigan"], - ["fire", "and", "earth", "michigan"]]) - valid_input = tf.sparse.from_dense(valid_input) - invalid_input = tf.sparse.from_dense(invalid_input) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=0, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(valid_input) - self.assertAllEqual(expected_output, - tf.sparse.to_dense(output_data)) - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "found OOV values.*michigan"): - _ = model.predict(invalid_input) - - def test_int_output_explicit_vocab(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_one_hot_output_hard_maximum(self): - """Check binary output when pad_to_max_tokens=True.""" - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array(["earth", "wind", "and", "fire", "michigan", ""]) - expected_output = [ - [0, 1, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 1, 0], - [1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - ] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=6, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.ONE_HOT, - pad_to_max_tokens=True, - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - binary_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=binary_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_one_hot_output_soft_maximum(self): - """Check binary output when pad_to_max_tokens=False.""" - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array(["earth", "wind", "and", "fire", "michigan", ""]) - expected_output = [ - [0, 1, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - ] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.ONE_HOT, - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - binary_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=binary_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_one_hot_output_rank_zero_no_oov(self): - """Check binary output when pad_to_max_tokens=False.""" - vocab_data = ["earth", "wind", "and", "fire"] - input_data = tf.constant("earth") - expected_output = [1, 0, 0, 0] - - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=0, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.ONE_HOT, - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - output_data = layer(input_data) - self.assertAllEqual(expected_output, output_data) - - def test_one_hot_output_shape(self): - inputs = keras.Input(batch_size=16, shape=(1,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=["earth"], - max_tokens=2, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.ONE_HOT, - vocabulary_dtype=tf.string) - outputs = layer(inputs) - self.assertAllEqual(outputs.shape.as_list(), [16, 2]) - - @parameterized.product( - sparse=[True, False], - adapt=[True, False], - pad_to_max=[True, False], - mode=["multi_hot", "count", "tf_idf"], - dtype=[tf.float32, tf.float64], - ) - def test_binned_output(self, sparse, adapt, pad_to_max, mode, dtype): - """Check "multi_hot", "count", and "tf_idf" output.""" - # Adapt breaks ties with sort order. - vocab_data = ["wind", "fire", "earth", "and"] - # IDF weight for a term in 1 out of 1 document is log(1 + 1/2). - idf_data = [math.log(1.5)] * 4 - input_data = np.array([["and", "earth", "fire", "and", ""], - ["michigan", "wind", "and", "ohio", ""]]) - - if mode == "count": - expected_output = np.array([ - [0, 0, 1, 1, 2], - [2, 1, 0, 0, 1], - ]) - elif mode == "tf_idf": - expected_output = np.array([ - [0, 0, 1, 1, 2], - [2, 1, 0, 0, 1], - ]) * math.log(1.5) - else: - expected_output = np.array([ - [0, 0, 1, 1, 1], - [1, 1, 0, 0, 1], - ]) - expected_output_shape = [None, 5] - if pad_to_max: - expected_output = np.concatenate((expected_output, [[0], [0]]), axis=1) - expected_output_shape = [None, 6] - - inputs = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=6, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=mode, - pad_to_max_tokens=pad_to_max, - vocabulary_dtype=tf.string, - sparse=sparse, - vocabulary=None if adapt else vocab_data, - idf_weights=None if adapt or mode != "tf_idf" else idf_data, - dtype=dtype) - if adapt: - layer.adapt(vocab_data) - outputs = layer(inputs) - model = keras.Model(inputs, outputs) - output_data = model.predict(input_data) - if sparse: - output_data = tf.sparse.to_dense(output_data) - # Check output data. - self.assertAllClose(expected_output, output_data) - # Check symbolic output shape. - self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) - # Check output dtype. - self.assertAllEqual(dtype, output_data.dtype) - - def test_multi_hot_output_no_oov(self): - """Check multi hot output when num_oov_indices=0.""" - vocab_data = ["earth", "wind", "and", "fire"] - valid_input = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", ""]]) - invalid_input = np.array([["earth", "wind", "and", "michigan"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [ - [1, 1, 1, 1, 0], - [1, 0, 1, 1, 0], - ] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=0, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.MULTI_HOT, - pad_to_max_tokens=True, - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - binary_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=binary_data) - output_data = model.predict(valid_input) - self.assertAllEqual(expected_output, output_data) - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "found OOV values.*michigan"): - _ = model.predict(invalid_input) - - def test_multi_hot_output_hard_maximum_multiple_adapts(self): - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - adapt_data = ["earth", "earth", "earth", "earth", "wind", "wind", "wind"] - first_expected_output = [ - [1, 1, 1, 0, 0], - [1, 1, 0, 0, 0], - ] - second_adapt_data = [ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ] - second_expected_output = [ - [0, 1, 1, 1, 0], - [1, 1, 0, 1, 0], - ] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.MULTI_HOT, - pad_to_max_tokens=True, - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - # Test the first adapt - layer.adapt(adapt_data) - first_output = model.predict(input_array) - # Test the second adapt - layer.adapt(second_adapt_data) - # We need to recompile the model to retrace our call graph. - model.compile() - second_output = model.predict(input_array) - self.assertAllEqual(first_expected_output, first_output) - self.assertAllEqual(second_expected_output, second_output) - - def test_int_output_file_vocab(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 0, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_non_int_output_file_vocab_in_tf_function(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.constant( - [["earth", "wind", "and", "fire", ""], - ["fire", "and", "earth", "michigan", ""]], - dtype=tf.string) - - expected_output = [ - [0, 1, 1, 1, 1], - [1, 1, 0, 1, 1], - ] - vocab_file = self._write_to_temp_file("temp", vocab_data) - - @tf.function - def compute(data): - layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.MULTI_HOT, - vocabulary_dtype=tf.string) - return layer(data) - - output_dataset = compute(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_file_vocab_and_list_vocab_identical_attrs(self): - vocab_data = ["earth", "wind", "and", "fire"] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - file_layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - list_layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - expected_vocab = ["", "[OOV]", "earth", "wind", "and", "fire"] - self.assertAllEqual(expected_vocab, list_layer.get_vocabulary()) - expected_vocab_size = 6 - self.assertAllEqual(expected_vocab_size, list_layer.vocabulary_size()) - self.assertAllEqual(list_layer.get_vocabulary(), - file_layer.get_vocabulary()) - self.assertAllEqual(list_layer.vocabulary_size(), - file_layer.vocabulary_size()) - - def test_file_vocab_and_list_vocab_identical_attrs_multi_oov(self): - vocab_data = ["earth", "wind", "and", "fire"] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - file_layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - num_oov_indices=2, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - list_layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=2, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - expected_vocab = ["", "[OOV]", "[OOV]", "earth", "wind", "and", "fire"] - self.assertAllEqual(expected_vocab, list_layer.get_vocabulary()) - expected_vocab_size = 7 - self.assertAllEqual(expected_vocab_size, list_layer.vocabulary_size()) - self.assertAllEqual(list_layer.get_vocabulary(), - file_layer.get_vocabulary()) - self.assertAllEqual(list_layer.vocabulary_size(), - file_layer.vocabulary_size()) - - def test_file_vocab_and_list_vocab_identical_attrs_no_mask(self): - vocab_data = ["earth", "wind", "and", "fire"] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - file_layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - num_oov_indices=2, - mask_token=None, - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - list_layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=2, - mask_token=None, - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - expected_vocab = ["[OOV]", "[OOV]", "earth", "wind", "and", "fire"] - self.assertAllEqual(expected_vocab, list_layer.get_vocabulary()) - expected_vocab_size = 6 - self.assertAllEqual(expected_vocab_size, list_layer.vocabulary_size()) - self.assertAllEqual(list_layer.get_vocabulary(), - file_layer.get_vocabulary()) - self.assertAllEqual(list_layer.vocabulary_size(), - file_layer.vocabulary_size()) - - def test_int_output_file_vocab_no_mask(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 0, 1, 0]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - mask_token=None, - num_oov_indices=1, - oov_token="[OOV]", - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_file_vocab_no_oov_or_mask(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "wind", "earth", "and"]]) - expected_output = [[0, 1, 2, 3], [3, 1, 0, 2]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - mask_token=None, - num_oov_indices=0, - oov_token=None, - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_file_vocab_inversion(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([[1, 2, 3, 4], [4, 0, 1, 0]]) - expected_output = [["earth", "wind", "and", "fire"], - ["fire", "[OOV]", "earth", "[OOV]"]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - idata = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - mask_token=None, - num_oov_indices=1, - oov_token="[OOV]", - vocabulary_dtype=tf.string) - _ = layer(idata) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - - invert_layer = index_lookup.IndexLookup( - vocabulary=layer.get_vocabulary(), - max_tokens=None, - oov_token="[OOV]", - mask_token=None, - num_oov_indices=1, - invert=True, - vocabulary_dtype=tf.string) - int_data = invert_layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_int_file_vocab(self): - vocab_data = ["10", "20", "30", "40"] - input_array = np.array([[10, 20, 30, 40], [40, 0, 10, 42]]) - expected_output = [[2, 3, 4, 5], [5, 0, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = index_lookup.IndexLookup( - vocabulary=vocab_file, - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_dataset_map_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=0, - mask_token=None, - oov_token="[OOV]", - vocabulary=vocab_data, - vocabulary_dtype=tf.string) - ds = tf.data.Dataset.from_tensor_slices([["earth"], ["wind"], ["and"]]) - ds = ds.map(layer) - self.assertAllEqual(list(ds.as_numpy_iterator()), [[0], [1], [2]]) - - def test_dataset_map_output_layer_created_in_function(self): - vocab_data = ["earth", "wind", "and", "fire"] - - def apply_lookup(data): - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=0, - mask_token=None, - oov_token="[OOV]", - vocabulary=vocab_data, - vocabulary_dtype=tf.string) - return layer(data) - - ds = tf.data.Dataset.from_tensor_slices([["earth"], ["wind"], ["and"]]) - ds = ds.map(apply_lookup) - self.assertAllEqual(list(ds.as_numpy_iterator()), [[0], [1], [2]]) + input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=2, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + def test_ragged_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.ragged.constant( + [[10, 11, 13], [13, 12, 10, 133]], dtype=np.int64 + ) + expected_output = [[3, 4, 6], [6, 5, 3, 2]] -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class IndexLookupVocabularyTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest - ): - - def test_int_output_explicit_vocab(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_explicit_vocab_with_special_tokens(self): - vocab_data = ["", "[OOV]", "earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_get_vocabulary_no_special_tokens(self): - vocab_data = ["", "[OOV]", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary(include_special_tokens=False) - self.assertAllEqual(returned_vocab, ["wind", "and", "fire"]) - self.assertAllEqual(layer.vocabulary_size(), 5) - - def test_vocab_multi_oov(self): - vocab_data = ["", "[OOV]", "[OOV]", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=2, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(returned_vocab, vocab_data) - - def test_vocab_multi_oov_not_present(self): - vocab_data = ["wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=10, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(returned_vocab, - [""] + ["[OOV]"] * 10 + ["wind", "and", "fire"]) - - def test_vocab_with_max_cap(self): - vocab_data = ["", "[OOV]", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - self.assertAllEqual(layer.vocabulary_size(), 5) - - def test_int_vocab_with_max_cap(self): - vocab_data = [0, -1, 42, 1276, 1138] - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - self.assertAllEqual(layer.vocabulary_size(), 5) - - def test_vocab_with_multiple_oov_indices(self): - vocab_data = ["", "[OOV]", "[OOV]", "[OOV]", "wind"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=3, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - - def test_int_vocab_with_multiple_oov_indices(self): - vocab_data = [0, -1, -1, -1, 42] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=3, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - - def test_non_unique_vocab_fails(self): - vocab_data = ["earth", "wind", "and", "fire", "fire"] - with self.assertRaisesRegex(ValueError, "repeated term.*fire"): - _ = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - - def test_vocab_with_repeated_element_fails(self): - vocab_data = ["earth", "earth", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - with self.assertRaisesRegex(ValueError, "repeated term.*earth"): - layer.set_vocabulary(vocab_data) - - def test_vocab_with_reserved_oov_element_and_invert_true_fails(self): - vocab_data = ["earth", "test", "[OOV]", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - invert=True, - vocabulary_dtype=tf.string) - with self.assertRaisesRegex(ValueError, "reserved OOV"): - layer.set_vocabulary(vocab_data) - - def test_vocab_with_reserved_mask_element_fails(self): - vocab_data = ["earth", "mask_token", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="mask_token", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - with self.assertRaisesRegex(ValueError, "reserved mask"): - layer.set_vocabulary(vocab_data) - - def test_vocab_size_changed_pad_to_max_false_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - pad_to_max_tokens=False, - output_mode=index_lookup.MULTI_HOT, - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - # Calling the layer should lock the vocabulary size. - _ = layer([["earth"]]) - layer.set_vocabulary(vocab_data[:2]) - with self.assertRaisesRegex(RuntimeError, - "vocabulary size cannot be changed"): - # Calling the layer again should cause an error. - _ = layer([["earth"]]) - - def test_vocab_with_idf_weights_non_tfidf_output_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - weight_data = [1, 1, 1, 1, 1] - with self.assertRaisesRegex(ValueError, - "`idf_weights` should only be set if"): - index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.MULTI_HOT, - vocabulary_dtype=tf.string, - vocabulary=vocab_data, - idf_weights=weight_data) - - def test_vocab_with_idf_weights_length_mismatch_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - weight_data = [1, 1, 1, 1, 1] # too long - with self.assertRaisesRegex( - ValueError, "`idf_weights` must be the same length as vocab"): - index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.TF_IDF, - vocabulary_dtype=tf.string, - vocabulary=vocab_data, - idf_weights=weight_data) - - def test_vocab_without_idf_weights_tfidf_output_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - with self.assertRaisesRegex( - ValueError, "`idf_weights` must be set if output_mode is TF_IDF"): - index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - output_mode=index_lookup.TF_IDF, - vocabulary_dtype=tf.string, - vocabulary=vocab_data) - - def test_non_unique_int_vocab_fails(self): - vocab_data = [12, 13, 14, 15, 15] - with self.assertRaisesRegex(ValueError, "repeated term.*15"): - _ = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64) - - def test_int_vocab_with_reserved_oov_element_and_invert_true_fails(self): - vocab_data = [14, 38, -1, 34, 3, 84] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - invert=True, - vocabulary_dtype=tf.int64) - with self.assertRaisesRegex(ValueError, "reserved OOV"): - layer.set_vocabulary(vocab_data) - - def test_int_vocab_with_reserved_mask_element_fails(self): - vocab_data = [125, 0, 3, 4, 94] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64) - with self.assertRaisesRegex(ValueError, "reserved mask"): - layer.set_vocabulary(vocab_data) - - def test_no_vocab_file_string_fails(self): - with self.assertRaisesRegex(ValueError, "non_existent_file"): - _ = index_lookup.IndexLookup( - vocabulary="non_existent_file", - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64) + input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=2, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IndexLookupInverseVocabularyTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_int_output_explicit_vocab(self): - vocab_data = ["", "[OOV]", "earth", "wind", "and", "fire"] - input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 1]]) - expected_output = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "[OOV]"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - invert=True) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_vocab_with_max_cap(self): - vocab_data = ["", "[OOV]", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - invert=True) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - - def test_int_vocab_with_max_cap(self): - vocab_data = [0, -1, 42, 1276, 1138] - layer = index_lookup.IndexLookup( - max_tokens=5, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64, - invert=True) - layer.set_vocabulary(vocab_data) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - - def test_non_unique_vocab_fails(self): - vocab_data = ["earth", "wind", "and", "fire", "fire"] - with self.assertRaisesRegex(ValueError, "repeated term.*fire"): - _ = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - invert=True) - - def test_non_int_output_fails(self): - with self.assertRaisesRegex(ValueError, "`output_mode` must be `'int'`"): - _ = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - output_mode=index_lookup.COUNT, - invert=True) - - def test_vocab_with_repeated_element_fails(self): - vocab_data = ["earth", "earth", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - invert=True) - with self.assertRaisesRegex(ValueError, "repeated term.*earth"): - layer.set_vocabulary(vocab_data) - - def test_vocab_with_reserved_mask_element_fails(self): - vocab_data = ["earth", "mask_token", "wind", "and", "fire"] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="mask_token", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - invert=True) - with self.assertRaisesRegex(ValueError, "reserved mask"): - layer.set_vocabulary(vocab_data) - - def test_non_unique_int_vocab_fails(self): - vocab_data = [12, 13, 14, 15, 15] - with self.assertRaisesRegex(ValueError, "repeated term.*15"): - _ = index_lookup.IndexLookup( - vocabulary=vocab_data, - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64, - invert=True) - - def test_int_vocab_with_repeated_element_fails(self): - vocab_data = [11, 11, 34, 23, 124] - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token=0, - oov_token=-1, - vocabulary_dtype=tf.int64, - invert=True) - with self.assertRaisesRegex(ValueError, "repeated term.*11"): - layer.set_vocabulary(vocab_data) +class CategoricalEncodingAdaptTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_sparse_adapt(self): + vocab_data = tf.SparseTensor( + indices=[[0, 0], [0, 1], [1, 2]], + values=["michigan", "fire", "michigan"], + dense_shape=[3, 4], + ) + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) + + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.adapt(vocab_dataset) + expected_vocabulary = ["", "[OOV]", "michigan", "fire"] + self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) + def test_ragged_adapt(self): + vocab_data = tf.ragged.constant([["michigan"], ["fire", "michigan"]]) + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class IndexLookupErrorTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_too_long_vocab_fails_in_single_setting(self): - vocab_data = ["earth", "wind", "and", "fire"] - - layer = index_lookup.IndexLookup( - max_tokens=4, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - with self.assertRaisesRegex(ValueError, - "vocabulary larger than the maximum vocab"): - layer.set_vocabulary(vocab_data) - - def test_zero_max_tokens_fails(self): - with self.assertRaisesRegex(ValueError, "max_tokens"): - _ = index_lookup.IndexLookup( - max_tokens=0, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.adapt(vocab_dataset) + expected_vocabulary = ["", "[OOV]", "michigan", "fire"] + self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) + + def test_sparse_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 32], dtype=np.int64), + dense_shape=[3, 4], + ) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [5, 1] + expected_dense_shape = [3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_ragged_string_input(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.ragged.constant( + [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]] + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_ragged_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.ragged.constant( + [[10, 11, 13], [13, 12, 10, 42]], dtype=np.int64 + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) + layer = index_lookup.IndexLookup( + max_tokens=None, + vocabulary_dtype=tf.int64, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_single_string_generator_dataset(self): + def word_gen(): + for _ in itertools.count(1): + yield "".join( + random.choice(string.ascii_letters) for i in range(2) + ) + + ds = tf.data.Dataset.from_generator( + word_gen, tf.string, tf.TensorShape([]) + ) + batched_ds = ds.take(2) + input_t = keras.Input(shape=(), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=10, + num_oov_indices=0, + mask_token=None, + oov_token=None, + vocabulary_dtype=tf.string, + ) + _ = layer(input_t) + layer.adapt(batched_ds) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IndexLookupSavingTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def test_vocabulary_persistence_across_saving(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = keras.models.load_model( - output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - def test_vocabulary_persistence_file_across_cloning(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - vocab_file = self._write_to_temp_file("temp", vocab_data) - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Clone the model and set weights. - new_model = keras.models.clone_model(model) - new_model.set_weights(model.get_weights()) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, new_model) - - # Validate correctness of the new model. - new_output_dataset = new_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - def test_persistence_file_vocabs_tf_save_tf_load(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - tf.saved_model.save(obj=model, export_dir=output_path) - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = tf.saved_model.load(output_path) - f = loaded_model.signatures["serving_default"] - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = f(tf.constant(input_array))["index_lookup"] - self.assertAllEqual(new_output_dataset, expected_output) - - def test_vocabulary_persistence_file_vocab_keras_save_tf_load(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = tf.saved_model.load(output_path) - f = loaded_model.signatures["serving_default"] - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = f(tf.constant(input_array))["index_lookup"] - self.assertAllEqual(new_output_dataset, expected_output) - - def test_persistence_file_vocab_keras_save_keras_load(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - tf.io.gfile.remove(vocab_file) - - loaded_model = keras.models.load_model( - output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - # Try re-saving the layer. This simulates saving a layer contained at - # a hub Module. - input_data_2 = keras.Input(shape=(None,), dtype=tf.string) - output_2 = loaded_model(input_data_2) - model_2 = keras.Model(inputs=input_data_2, outputs=output_2) - new_output_dataset = model_2.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model_2") - model_2.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = keras.models.load_model( - output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - def test_persistence_file_vocab_keras_save_keras_load_tf_save_tf_load(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - tf.io.gfile.remove(vocab_file) - - loaded_model = keras.models.load_model( - output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - # Try re-saving the layer. This simulates saving a layer contained at - # a hub Module. - input_data_2 = keras.Input(shape=(None,), dtype=tf.string) - output_2 = loaded_model(input_data_2) - model_2 = keras.Model(inputs=input_data_2, outputs=output_2) - new_output_dataset = model_2.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model_2") - tf.saved_model.save(model_2, output_path) - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = tf.saved_model.load(output_path) - f = loaded_model.signatures["serving_default"] - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = f(tf.constant(input_array))["model"] - self.assertAllEqual(new_output_dataset, expected_output) - - def test_persistence_file_vocab_keras_save_keras_load_keras_save_keras_load( - self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_file = self._write_to_temp_file("temp", vocab_data) - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = index_lookup.IndexLookup( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - tf.io.gfile.remove(vocab_file) - - loaded_model = keras.models.load_model( - output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - # Try re-saving the layer. This simulates saving a layer contained at - # a hub Module. - input_data_2 = keras.Input(shape=(None,), dtype=tf.string) - output_2 = loaded_model(input_data_2) - model_2 = keras.Model(inputs=input_data_2, outputs=output_2) - new_output_dataset = model_2.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model_2") - model_2.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = keras.models.load_model( - output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = model_2.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - def test_static_table_config_weight_data_transfer_succeeds(self): - vocab_data = ["earth", "wind", "and", "fire"] - vocab_file = self._write_to_temp_file("temp", vocab_data) - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - # Build and validate a golden model. - layer_cls = index_lookup.IndexLookup - layer = layer_cls( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_file) - config = layer.get_config() - weights = layer.get_weights() - - layer = layer_cls.from_config(config) - layer.set_weights(weights) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - output = layer(input_data) - model = keras.Model(inputs=input_data, outputs=output) - - new_output_dataset = model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) - - def test_sparse_output_across_saving(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - - expected_output = [[0., 1., 1., 1., 1.], [1., 1., 0., 1., 1.]] - - layer_cls = index_lookup.IndexLookup - layer = layer_cls( - max_tokens=None, - num_oov_indices=1, - mask_token="", - oov_token="[OOV]", - vocabulary_dtype=tf.string, - vocabulary=vocab_data, - output_mode="multi_hot", - sparse=True) - config = layer.get_config() - layer = layer_cls.from_config(config) - - output = layer(input_array) - self.assertIsInstance(output, tf.SparseTensor) - self.assertAllEqual(tf.sparse.to_dense(output), expected_output) - - -class EagerExecutionDisabled(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_lookup(self): - # We need this test for model_to_estimator followed by export_saved_model, - # which will call the layer in a legacy session. This could also happen - # directly if a user calls disable_v2_behavior or disable_eager_execution. - with tf.compat.v1.Session(): - with test_utils.run_eagerly_scope(False): +class IndexLookupOutputTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + @parameterized.product( + rank=[0, 1, 2], + # Check lists, numpy arrays, tensors, and objects convertable to tensor. + data_fn=[ + None, + np.array, + tf.constant, + preprocessing_test_utils.ArrayLike, + ], + ) + def test_input_types(self, rank, data_fn): + input_data = vocab = ["earth", "wind", "and", "fire"] + expected_output = [2, 3, 4, 5] + if rank == 0: + input_data = input_data[0] + expected_output = expected_output[0] + elif rank == 2: + input_data = [input_data] + expected_output = [expected_output] + if data_fn is not None: + input_data = data_fn(input_data) + input_shape = [] if rank == 0 else [None] + + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary=vocab, + vocabulary_dtype=tf.string, + ) + output_data = layer(input_data) + self.assertAllEqual(expected_output, output_data) + + # Again in a keras.Model + inputs = keras.Input(shape=input_shape, dtype=tf.string) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model(tf.constant(input_data)) + self.assertAllEqual(expected_output, output_data) + + def test_int_output_shape(self): + input_data = keras.Input(batch_size=16, shape=(4,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=2, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + self.assertAllEqual(int_data.shape.as_list(), [16, 4]) + + @parameterized.named_parameters( + ("int32", tf.int32), + ("int64", tf.int64), + ) + def test_int_output_dtype(self, dtype): + input_data = keras.Input(batch_size=16, shape=(4,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=2, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + dtype=dtype, + ) + int_data = layer(input_data) + self.assertAllEqual(int_data.dtype, dtype) + + def test_int_output_float_dtype_fails(self): + with self.assertRaisesRegex(ValueError, "`dtype` should be an integer"): + index_lookup.IndexLookup( + max_tokens=2, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + dtype=tf.float32, + ) + + def test_int_output_no_reserved_zero(self): vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array(["earth", "wind", "and", "fire"]) - expected_output = [1, 2, 3, 4] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] input_data = keras.Input(shape=(None,), dtype=tf.string) layer = index_lookup.IndexLookup( @@ -2230,17 +1019,1771 @@ def test_lookup(self): mask_token=None, oov_token="[OOV]", vocabulary_dtype=tf.string, - vocabulary=vocab_data) + ) + layer.set_vocabulary(vocab_data) int_data = layer(input_data) model = keras.Model(inputs=input_data, outputs=int_data) - # In a TF1 session the user will need to make sure all tables are - # initialized themselves. - tf.compat.v1.tables_initializer().run() - output_dataset = model(input_array) - self.assertAllEqual(output_dataset, expected_output) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_no_oov(self): + vocab_data = ["earth", "wind", "and", "fire"] + valid_input = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", ""]] + ) + invalid_input = np.array( + [ + ["earth", "wind", "and", "michigan"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=0, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(valid_input) + self.assertAllEqual(expected_output, output_data) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "found OOV values.*michigan" + ): + _ = model.predict(invalid_input) + + def test_int_output_no_oov_ragged(self): + vocab_data = ["earth", "wind", "and", "fire"] + valid_input = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", ""]] + ) + invalid_input = np.array( + [ + ["earth", "wind", "and", "michigan"], + ["fire", "and", "earth", "michigan"], + ] + ) + valid_input = tf.RaggedTensor.from_tensor(valid_input) + invalid_input = tf.RaggedTensor.from_tensor(invalid_input) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=0, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(valid_input) + self.assertAllEqual(expected_output, output_data) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "found OOV values.*michigan" + ): + _ = model.predict(invalid_input) + + def test_int_output_no_oov_sparse(self): + vocab_data = ["earth", "wind", "and", "fire"] + valid_input = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", ""]] + ) + invalid_input = np.array( + [ + ["earth", "wind", "and", "michigan"], + ["fire", "and", "earth", "michigan"], + ] + ) + valid_input = tf.sparse.from_dense(valid_input) + invalid_input = tf.sparse.from_dense(invalid_input) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=0, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(valid_input) + self.assertAllEqual(expected_output, tf.sparse.to_dense(output_data)) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "found OOV values.*michigan" + ): + _ = model.predict(invalid_input) + + def test_int_output_explicit_vocab(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_one_hot_output_hard_maximum(self): + """Check binary output when pad_to_max_tokens=True.""" + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array(["earth", "wind", "and", "fire", "michigan", ""]) + expected_output = [ + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=6, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.ONE_HOT, + pad_to_max_tokens=True, + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + binary_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=binary_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_one_hot_output_soft_maximum(self): + """Check binary output when pad_to_max_tokens=False.""" + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array(["earth", "wind", "and", "fire", "michigan", ""]) + expected_output = [ + [0, 1, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.ONE_HOT, + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + binary_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=binary_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_one_hot_output_rank_zero_no_oov(self): + """Check binary output when pad_to_max_tokens=False.""" + vocab_data = ["earth", "wind", "and", "fire"] + input_data = tf.constant("earth") + expected_output = [1, 0, 0, 0] + + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=0, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.ONE_HOT, + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + output_data = layer(input_data) + self.assertAllEqual(expected_output, output_data) + + def test_one_hot_output_shape(self): + inputs = keras.Input(batch_size=16, shape=(1,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=["earth"], + max_tokens=2, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.ONE_HOT, + vocabulary_dtype=tf.string, + ) + outputs = layer(inputs) + self.assertAllEqual(outputs.shape.as_list(), [16, 2]) + + @parameterized.product( + sparse=[True, False], + adapt=[True, False], + pad_to_max=[True, False], + mode=["multi_hot", "count", "tf_idf"], + dtype=[tf.float32, tf.float64], + ) + def test_binned_output(self, sparse, adapt, pad_to_max, mode, dtype): + """Check "multi_hot", "count", and "tf_idf" output.""" + # Adapt breaks ties with sort order. + vocab_data = ["wind", "fire", "earth", "and"] + # IDF weight for a term in 1 out of 1 document is log(1 + 1/2). + idf_data = [math.log(1.5)] * 4 + input_data = np.array( + [ + ["and", "earth", "fire", "and", ""], + ["michigan", "wind", "and", "ohio", ""], + ] + ) + + if mode == "count": + expected_output = np.array( + [ + [0, 0, 1, 1, 2], + [2, 1, 0, 0, 1], + ] + ) + elif mode == "tf_idf": + expected_output = np.array( + [ + [0, 0, 1, 1, 2], + [2, 1, 0, 0, 1], + ] + ) * math.log(1.5) + else: + expected_output = np.array( + [ + [0, 0, 1, 1, 1], + [1, 1, 0, 0, 1], + ] + ) + expected_output_shape = [None, 5] + if pad_to_max: + expected_output = np.concatenate( + (expected_output, [[0], [0]]), axis=1 + ) + expected_output_shape = [None, 6] + + inputs = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=6, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=mode, + pad_to_max_tokens=pad_to_max, + vocabulary_dtype=tf.string, + sparse=sparse, + vocabulary=None if adapt else vocab_data, + idf_weights=None if adapt or mode != "tf_idf" else idf_data, + dtype=dtype, + ) + if adapt: + layer.adapt(vocab_data) + outputs = layer(inputs) + model = keras.Model(inputs, outputs) + output_data = model.predict(input_data) + if sparse: + output_data = tf.sparse.to_dense(output_data) + # Check output data. + self.assertAllClose(expected_output, output_data) + # Check symbolic output shape. + self.assertAllEqual(expected_output_shape, outputs.shape.as_list()) + # Check output dtype. + self.assertAllEqual(dtype, output_data.dtype) + + def test_multi_hot_output_no_oov(self): + """Check multi hot output when num_oov_indices=0.""" + vocab_data = ["earth", "wind", "and", "fire"] + valid_input = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", ""]] + ) + invalid_input = np.array( + [ + ["earth", "wind", "and", "michigan"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [ + [1, 1, 1, 1, 0], + [1, 0, 1, 1, 0], + ] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=0, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.MULTI_HOT, + pad_to_max_tokens=True, + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + binary_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=binary_data) + output_data = model.predict(valid_input) + self.assertAllEqual(expected_output, output_data) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "found OOV values.*michigan" + ): + _ = model.predict(invalid_input) + + def test_multi_hot_output_hard_maximum_multiple_adapts(self): + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + adapt_data = [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + ] + first_expected_output = [ + [1, 1, 1, 0, 0], + [1, 1, 0, 0, 0], + ] + second_adapt_data = [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + second_expected_output = [ + [0, 1, 1, 1, 0], + [1, 1, 0, 1, 0], + ] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.MULTI_HOT, + pad_to_max_tokens=True, + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + # Test the first adapt + layer.adapt(adapt_data) + first_output = model.predict(input_array) + # Test the second adapt + layer.adapt(second_adapt_data) + # We need to recompile the model to retrace our call graph. + model.compile() + second_output = model.predict(input_array) + self.assertAllEqual(first_expected_output, first_output) + self.assertAllEqual(second_expected_output, second_output) + + def test_int_output_file_vocab(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 0, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_non_int_output_file_vocab_in_tf_function(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.constant( + [ + ["earth", "wind", "and", "fire", ""], + ["fire", "and", "earth", "michigan", ""], + ], + dtype=tf.string, + ) + + expected_output = [ + [0, 1, 1, 1, 1], + [1, 1, 0, 1, 1], + ] + vocab_file = self._write_to_temp_file("temp", vocab_data) + + @tf.function + def compute(data): + layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.MULTI_HOT, + vocabulary_dtype=tf.string, + ) + return layer(data) + + output_dataset = compute(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_file_vocab_and_list_vocab_identical_attrs(self): + vocab_data = ["earth", "wind", "and", "fire"] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + file_layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + list_layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + expected_vocab = ["", "[OOV]", "earth", "wind", "and", "fire"] + self.assertAllEqual(expected_vocab, list_layer.get_vocabulary()) + expected_vocab_size = 6 + self.assertAllEqual(expected_vocab_size, list_layer.vocabulary_size()) + self.assertAllEqual( + list_layer.get_vocabulary(), file_layer.get_vocabulary() + ) + self.assertAllEqual( + list_layer.vocabulary_size(), file_layer.vocabulary_size() + ) + + def test_file_vocab_and_list_vocab_identical_attrs_multi_oov(self): + vocab_data = ["earth", "wind", "and", "fire"] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + file_layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + num_oov_indices=2, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + list_layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=2, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + expected_vocab = ["", "[OOV]", "[OOV]", "earth", "wind", "and", "fire"] + self.assertAllEqual(expected_vocab, list_layer.get_vocabulary()) + expected_vocab_size = 7 + self.assertAllEqual(expected_vocab_size, list_layer.vocabulary_size()) + self.assertAllEqual( + list_layer.get_vocabulary(), file_layer.get_vocabulary() + ) + self.assertAllEqual( + list_layer.vocabulary_size(), file_layer.vocabulary_size() + ) + + def test_file_vocab_and_list_vocab_identical_attrs_no_mask(self): + vocab_data = ["earth", "wind", "and", "fire"] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + file_layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + num_oov_indices=2, + mask_token=None, + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + list_layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=2, + mask_token=None, + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + expected_vocab = ["[OOV]", "[OOV]", "earth", "wind", "and", "fire"] + self.assertAllEqual(expected_vocab, list_layer.get_vocabulary()) + expected_vocab_size = 6 + self.assertAllEqual(expected_vocab_size, list_layer.vocabulary_size()) + self.assertAllEqual( + list_layer.get_vocabulary(), file_layer.get_vocabulary() + ) + self.assertAllEqual( + list_layer.vocabulary_size(), file_layer.vocabulary_size() + ) + + def test_int_output_file_vocab_no_mask(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 0, 1, 0]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + mask_token=None, + num_oov_indices=1, + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_file_vocab_no_oov_or_mask(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [["earth", "wind", "and", "fire"], ["fire", "wind", "earth", "and"]] + ) + expected_output = [[0, 1, 2, 3], [3, 1, 0, 2]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + mask_token=None, + num_oov_indices=0, + oov_token=None, + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_file_vocab_inversion(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([[1, 2, 3, 4], [4, 0, 1, 0]]) + expected_output = [ + ["earth", "wind", "and", "fire"], + ["fire", "[OOV]", "earth", "[OOV]"], + ] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + idata = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + mask_token=None, + num_oov_indices=1, + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + _ = layer(idata) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + + invert_layer = index_lookup.IndexLookup( + vocabulary=layer.get_vocabulary(), + max_tokens=None, + oov_token="[OOV]", + mask_token=None, + num_oov_indices=1, + invert=True, + vocabulary_dtype=tf.string, + ) + int_data = invert_layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_int_file_vocab(self): + vocab_data = ["10", "20", "30", "40"] + input_array = np.array([[10, 20, 30, 40], [40, 0, 10, 42]]) + expected_output = [[2, 3, 4, 5], [5, 0, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = index_lookup.IndexLookup( + vocabulary=vocab_file, + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_dataset_map_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=0, + mask_token=None, + oov_token="[OOV]", + vocabulary=vocab_data, + vocabulary_dtype=tf.string, + ) + ds = tf.data.Dataset.from_tensor_slices([["earth"], ["wind"], ["and"]]) + ds = ds.map(layer) + self.assertAllEqual(list(ds.as_numpy_iterator()), [[0], [1], [2]]) + + def test_dataset_map_output_layer_created_in_function(self): + vocab_data = ["earth", "wind", "and", "fire"] + + def apply_lookup(data): + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=0, + mask_token=None, + oov_token="[OOV]", + vocabulary=vocab_data, + vocabulary_dtype=tf.string, + ) + return layer(data) + + ds = tf.data.Dataset.from_tensor_slices([["earth"], ["wind"], ["and"]]) + ds = ds.map(apply_lookup) + self.assertAllEqual(list(ds.as_numpy_iterator()), [[0], [1], [2]]) + + +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class IndexLookupVocabularyTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_int_output_explicit_vocab(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_explicit_vocab_with_special_tokens(self): + vocab_data = ["", "[OOV]", "earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_get_vocabulary_no_special_tokens(self): + vocab_data = ["", "[OOV]", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary(include_special_tokens=False) + self.assertAllEqual(returned_vocab, ["wind", "and", "fire"]) + self.assertAllEqual(layer.vocabulary_size(), 5) + + def test_vocab_multi_oov(self): + vocab_data = ["", "[OOV]", "[OOV]", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=2, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(returned_vocab, vocab_data) + + def test_vocab_multi_oov_not_present(self): + vocab_data = ["wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=10, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual( + returned_vocab, [""] + ["[OOV]"] * 10 + ["wind", "and", "fire"] + ) + + def test_vocab_with_max_cap(self): + vocab_data = ["", "[OOV]", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + self.assertAllEqual(layer.vocabulary_size(), 5) + + def test_int_vocab_with_max_cap(self): + vocab_data = [0, -1, 42, 1276, 1138] + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + self.assertAllEqual(layer.vocabulary_size(), 5) + + def test_vocab_with_multiple_oov_indices(self): + vocab_data = ["", "[OOV]", "[OOV]", "[OOV]", "wind"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=3, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + + def test_int_vocab_with_multiple_oov_indices(self): + vocab_data = [0, -1, -1, -1, 42] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=3, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + + def test_non_unique_vocab_fails(self): + vocab_data = ["earth", "wind", "and", "fire", "fire"] + with self.assertRaisesRegex(ValueError, "repeated term.*fire"): + _ = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + def test_vocab_with_repeated_element_fails(self): + vocab_data = ["earth", "earth", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + with self.assertRaisesRegex(ValueError, "repeated term.*earth"): + layer.set_vocabulary(vocab_data) + + def test_vocab_with_reserved_oov_element_and_invert_true_fails(self): + vocab_data = ["earth", "test", "[OOV]", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + invert=True, + vocabulary_dtype=tf.string, + ) + with self.assertRaisesRegex(ValueError, "reserved OOV"): + layer.set_vocabulary(vocab_data) + + def test_vocab_with_reserved_mask_element_fails(self): + vocab_data = ["earth", "mask_token", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="mask_token", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + with self.assertRaisesRegex(ValueError, "reserved mask"): + layer.set_vocabulary(vocab_data) + + def test_vocab_size_changed_pad_to_max_false_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + pad_to_max_tokens=False, + output_mode=index_lookup.MULTI_HOT, + vocabulary_dtype=tf.string, + ) + layer.set_vocabulary(vocab_data) + # Calling the layer should lock the vocabulary size. + _ = layer([["earth"]]) + with self.assertRaisesRegex( + RuntimeError, "vocabulary size cannot be changed" + ): + layer.set_vocabulary(vocab_data[:2]) + + def test_vocab_with_idf_weights_non_tfidf_output_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + weight_data = [1, 1, 1, 1, 1] + with self.assertRaisesRegex( + ValueError, "`idf_weights` should only be set if" + ): + index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.MULTI_HOT, + vocabulary_dtype=tf.string, + vocabulary=vocab_data, + idf_weights=weight_data, + ) + + def test_vocab_with_idf_weights_length_mismatch_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + weight_data = [1, 1, 1, 1, 1] # too long + with self.assertRaisesRegex( + ValueError, "`idf_weights` must be the same length as vocab" + ): + index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.TF_IDF, + vocabulary_dtype=tf.string, + vocabulary=vocab_data, + idf_weights=weight_data, + ) + + def test_vocab_without_idf_weights_tfidf_output_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + with self.assertRaisesRegex( + ValueError, "`idf_weights` must be set if output_mode is TF_IDF" + ): + index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + output_mode=index_lookup.TF_IDF, + vocabulary_dtype=tf.string, + vocabulary=vocab_data, + ) + + def test_non_unique_int_vocab_fails(self): + vocab_data = [12, 13, 14, 15, 15] + with self.assertRaisesRegex(ValueError, "repeated term.*15"): + _ = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + ) + + def test_int_vocab_with_reserved_oov_element_and_invert_true_fails(self): + vocab_data = [14, 38, -1, 34, 3, 84] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + invert=True, + vocabulary_dtype=tf.int64, + ) + with self.assertRaisesRegex(ValueError, "reserved OOV"): + layer.set_vocabulary(vocab_data) + + def test_int_vocab_with_reserved_mask_element_fails(self): + vocab_data = [125, 0, 3, 4, 94] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + ) + with self.assertRaisesRegex(ValueError, "reserved mask"): + layer.set_vocabulary(vocab_data) + + def test_no_vocab_file_string_fails(self): + with self.assertRaisesRegex(ValueError, "non_existent_file"): + _ = index_lookup.IndexLookup( + vocabulary="non_existent_file", + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + ) + + +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class IndexLookupInverseVocabularyTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_int_output_explicit_vocab(self): + vocab_data = ["", "[OOV]", "earth", "wind", "and", "fire"] + input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 1]]) + expected_output = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "[OOV]"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + invert=True, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_vocab_with_max_cap(self): + vocab_data = ["", "[OOV]", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + invert=True, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + + def test_int_vocab_with_max_cap(self): + vocab_data = [0, -1, 42, 1276, 1138] + layer = index_lookup.IndexLookup( + max_tokens=5, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + invert=True, + ) + layer.set_vocabulary(vocab_data) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + + def test_non_unique_vocab_fails(self): + vocab_data = ["earth", "wind", "and", "fire", "fire"] + with self.assertRaisesRegex(ValueError, "repeated term.*fire"): + _ = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + invert=True, + ) + + def test_non_int_output_fails(self): + with self.assertRaisesRegex( + ValueError, "`output_mode` must be `'int'`" + ): + _ = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + output_mode=index_lookup.COUNT, + invert=True, + ) + + def test_vocab_with_repeated_element_fails(self): + vocab_data = ["earth", "earth", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + invert=True, + ) + with self.assertRaisesRegex(ValueError, "repeated term.*earth"): + layer.set_vocabulary(vocab_data) + + def test_vocab_with_reserved_mask_element_fails(self): + vocab_data = ["earth", "mask_token", "wind", "and", "fire"] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="mask_token", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + invert=True, + ) + with self.assertRaisesRegex(ValueError, "reserved mask"): + layer.set_vocabulary(vocab_data) + + def test_non_unique_int_vocab_fails(self): + vocab_data = [12, 13, 14, 15, 15] + with self.assertRaisesRegex(ValueError, "repeated term.*15"): + _ = index_lookup.IndexLookup( + vocabulary=vocab_data, + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + invert=True, + ) + + def test_int_vocab_with_repeated_element_fails(self): + vocab_data = [11, 11, 34, 23, 124] + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token=0, + oov_token=-1, + vocabulary_dtype=tf.int64, + invert=True, + ) + with self.assertRaisesRegex(ValueError, "repeated term.*11"): + layer.set_vocabulary(vocab_data) + + +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class IndexLookupErrorTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_too_long_vocab_fails_in_single_setting(self): + vocab_data = ["earth", "wind", "and", "fire"] + + layer = index_lookup.IndexLookup( + max_tokens=4, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + with self.assertRaisesRegex( + ValueError, "vocabulary larger than the maximum vocab" + ): + layer.set_vocabulary(vocab_data) + + def test_zero_max_tokens_fails(self): + with self.assertRaisesRegex(ValueError, "max_tokens"): + _ = index_lookup.IndexLookup( + max_tokens=0, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + ) + + +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class IndexLookupSavingTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def test_vocabulary_persistence_across_saving(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + with self.subTest("keras_v3"): + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_model.keras" + ) + model.save(output_path, save_format="keras_v3") + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + with self.subTest("savedmodel"): + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model" + ) + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + tf.io.gfile.remove(vocab_file) + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + def test_vocabulary_persistence_file_across_cloning(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + # Clone the model and set weights. + new_model = keras.models.clone_model(model) + new_model.set_weights(model.get_weights()) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, new_model) + + # Validate correctness of the new model. + new_output_dataset = new_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + def test_persistence_file_vocabs_tf_save_tf_load(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + tf.saved_model.save(obj=model, export_dir=output_path) + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = tf.saved_model.load(output_path) + f = loaded_model.signatures["serving_default"] + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = f(tf.constant(input_array))["index_lookup"] + self.assertAllEqual(new_output_dataset, expected_output) + + def test_vocabulary_persistence_file_vocab_keras_save_tf_load(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = tf.saved_model.load(output_path) + f = loaded_model.signatures["serving_default"] + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = f(tf.constant(input_array))["index_lookup"] + self.assertAllEqual(new_output_dataset, expected_output) + + def test_persistence_file_vocab_keras_save_keras_load(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + with self.subTest("keras_v3"): + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_model.keras" + ) + model.save(output_path, save_format="keras_v3") + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Try re-saving the layer. This simulates saving a layer + # contained at a hub Module. + input_data_2 = keras.Input(shape=(None,), dtype=tf.string) + output_2 = loaded_model(input_data_2) + model_2 = keras.Model(inputs=input_data_2, outputs=output_2) + new_output_dataset = model_2.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_model_2.keras" + ) + model_2.save(output_path, save_format="keras_v3") + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + with self.subTest("saved_model"): + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model" + ) + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + tf.io.gfile.remove(vocab_file) + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Try re-saving the layer. This simulates saving a layer + # contained at a hub Module. + input_data_2 = keras.Input(shape=(None,), dtype=tf.string) + output_2 = loaded_model(input_data_2) + model_2 = keras.Model(inputs=input_data_2, outputs=output_2) + new_output_dataset = model_2.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model_2" + ) + model_2.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + def test_persistence_file_vocab_keras_save_keras_load_tf_save_tf_load(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + tf.io.gfile.remove(vocab_file) + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Try re-saving the layer. This simulates saving a layer contained at + # a hub Module. + input_data_2 = keras.Input(shape=(None,), dtype=tf.string) + output_2 = loaded_model(input_data_2) + model_2 = keras.Model(inputs=input_data_2, outputs=output_2) + new_output_dataset = model_2.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model_2" + ) + tf.saved_model.save(model_2, output_path) + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = tf.saved_model.load(output_path) + f = loaded_model.signatures["serving_default"] + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = f(tf.constant(input_array))["model"] + self.assertAllEqual(new_output_dataset, expected_output) + + def test_persistence_file_vocab_keras_save_keras_load_keras_save_keras_load( + self, + ): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_file = self._write_to_temp_file("temp", vocab_data) + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + tf.io.gfile.remove(vocab_file) + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Try re-saving the layer. This simulates saving a layer contained at + # a hub Module. + input_data_2 = keras.Input(shape=(None,), dtype=tf.string) + output_2 = loaded_model(input_data_2) + model_2 = keras.Model(inputs=input_data_2, outputs=output_2) + new_output_dataset = model_2.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model_2" + ) + model_2.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IndexLookup": index_lookup.IndexLookup}, + ) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = model_2.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + def test_static_table_config_weight_data_transfer_succeeds(self): + vocab_data = ["earth", "wind", "and", "fire"] + vocab_file = self._write_to_temp_file("temp", vocab_data) + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + # Build and validate a golden model. + layer_cls = index_lookup.IndexLookup + layer = layer_cls( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_file, + ) + config = layer.get_config() + weights = layer.get_weights() + + layer = layer_cls.from_config(config) + layer.set_weights(weights) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + + new_output_dataset = model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + def test_sparse_output_across_saving(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + + expected_output = [[0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 0.0, 1.0, 1.0]] + + layer_cls = index_lookup.IndexLookup + layer = layer_cls( + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_data, + output_mode="multi_hot", + sparse=True, + ) + config = layer.get_config() + layer = layer_cls.from_config(config) + + output = layer(input_array) + self.assertIsInstance(output, tf.SparseTensor) + self.assertAllEqual(tf.sparse.to_dense(output), expected_output) + + +class EagerExecutionDisabled( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_lookup(self): + # We need this test for model_to_estimator followed by + # export_saved_model, which will call the layer in a legacy session. + # This could also happen directly if a user calls disable_v2_behavior or + # disable_eager_execution. + with tf.compat.v1.Session(): + with test_utils.run_eagerly_scope(False): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array(["earth", "wind", "and", "fire"]) + expected_output = [1, 2, 3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = index_lookup.IndexLookup( + max_tokens=None, + num_oov_indices=1, + mask_token=None, + oov_token="[OOV]", + vocabulary_dtype=tf.string, + vocabulary=vocab_data, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + # In a TF1 session the user will need to make sure all tables + # are initialized themselves. + tf.compat.v1.tables_initializer().run() + output_dataset = model(input_array) + self.assertAllEqual(output_dataset, expected_output) if __name__ == "__main__": - # IndexLookup is only exported as a TF2 API. - tf.compat.v1.enable_v2_behavior() - tf.test.main() + # IndexLookup is only exported as a TF2 API. + tf.compat.v1.enable_v2_behavior() + tf.test.main() diff --git a/keras/layers/preprocessing/integer_lookup.py b/keras/layers/preprocessing/integer_lookup.py index b24c32daa78f..78601201f63f 100644 --- a/keras/layers/preprocessing/integer_lookup.py +++ b/keras/layers/preprocessing/integer_lookup.py @@ -14,13 +14,14 @@ # ============================================================================== """Keras string lookup preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import numpy as np +import tensorflow.compat.v2 as tf from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import index_lookup -import numpy as np -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export @@ -28,405 +29,435 @@ @keras_export( "keras.layers.IntegerLookup", "keras.layers.experimental.preprocessing.IntegerLookup", - v1=[]) + v1=[], +) class IntegerLookup(index_lookup.IndexLookup): - """A preprocessing layer which maps integer features to contiguous ranges. - - This layer maps a set of arbitrary integer input tokens into indexed - integer output via a table-based vocabulary lookup. The layer's output indices - will be contiguously arranged up to the maximum vocab size, even if the input - tokens are non-continguous or unbounded. The layer supports multiple options - for encoding the output via `output_mode`, and has optional support for - out-of-vocabulary (OOV) tokens and masking. - - The vocabulary for the layer must be either supplied on construction or - learned via `adapt()`. During `adapt()`, the layer will analyze a data set, - determine the frequency of individual integer tokens, and create a vocabulary - from them. If the vocabulary is capped in size, the most frequent tokens will - be used to create the vocabulary and all others will be treated as OOV. - - There are two possible output modes for the layer. - When `output_mode` is `"int"`, - input integers are converted to their index in the vocabulary (an integer). - When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input integers - are encoded into an array where each dimension corresponds to an element in - the vocabulary. - - The vocabulary can optionally contain a mask token as well as an OOV token - (which can optionally occupy multiple indices in the vocabulary, as set - by `num_oov_indices`). - The position of these tokens in the vocabulary is fixed. When `output_mode` is - `"int"`, the vocabulary will begin with the mask token at index 0, followed by - OOV indices, followed by the rest of the vocabulary. When `output_mode` is - `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with OOV - indices and instances of the mask token will be dropped. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - max_tokens: Maximum size of the vocabulary for this layer. This should only - be specified when adapting the vocabulary or when setting - `pad_to_max_tokens=True`. If None, there is no cap on the size of the - vocabulary. Note that this size includes the OOV and mask tokens. Defaults - to None. - num_oov_indices: The number of out-of-vocabulary tokens to use. If this - value is more than 1, OOV inputs are modulated to determine their OOV - value. If this value is 0, OOV inputs will cause an error when calling the - layer. Defaults to 1. - mask_token: An integer token that represents masked inputs. When - `output_mode` is `"int"`, the token is included in vocabulary and mapped - to index 0. In other output modes, the token will not appear in the - vocabulary and instances of the mask token in the input will be dropped. - If set to None, no mask term will be added. Defaults to None. - oov_token: Only used when `invert` is True. The token to return for OOV - indices. Defaults to -1. - vocabulary: Optional. Either an array of integers or a string path to a text - file. If passing an array, can pass a tuple, list, 1D numpy array, or 1D - tensor containing the integer vocbulary terms. If passing a file path, the - file should contain one line per term in the vocabulary. If this argument - is set, there is no need to `adapt()` the layer. - vocabulary_dtype: The dtype of the vocabulary terms, for example - `"int64"` or `"int32"`. Defaults to `"int64"`. - idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, 1D - numpy array, or 1D tensor or the same length as the vocabulary, containing - the floating point inverse document frequency weights, which will be - multiplied by per sample term counts for the final `tf_idf` weight. If the - `vocabulary` argument is set, and `output_mode` is `"tf_idf"`, this - argument must be supplied. - invert: Only valid when `output_mode` is `"int"`. If True, this layer will - map indices to vocabulary items instead of mapping vocabulary items to - indices. Default to False. - output_mode: Specification for the output of the layer. Defaults to `"int"`. - Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or - `"tf_idf"` configuring the layer as follows: - - `"int"`: Return the vocabulary indices of the input tokens. - - `"one_hot"`: Encodes each individual element in the input into an - array the same size as the vocabulary, containing a 1 at the element - index. If the last dimension is size 1, will encode on that dimension. - If the last dimension is not size 1, will append a new dimension for - the encoded output. - - `"multi_hot"`: Encodes each sample in the input into a single array - the same size as the vocabulary, containing a 1 for each vocabulary - term present in the sample. Treats the last dimension as the sample - dimension, if input shape is (..., sample_length), output shape will - be (..., num_tokens). - - `"count"`: As `"multi_hot"`, but the int array contains a count of the - number of times the token at that index appeared in the sample. - - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to - find the value in each token slot. - For `"int"` output, any shape of input and output is supported. For all - other output modes, currently only output up to rank 2 is supported. - pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, - `"count"`, or `"tf_idf"`. If True, the output will have its feature axis - padded to `max_tokens` even if the number of unique tokens in the - vocabulary is less than max_tokens, resulting in a tensor of shape - [batch_size, max_tokens] regardless of vocabulary size. Defaults to False. - sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, - `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a - dense `Tensor`. Defaults to False. - - Examples: - - **Creating a lookup layer with a known vocabulary** - - This example creates a lookup layer with a pre-existing vocabulary. - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) # Note OOV tokens - >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab) - >>> layer(data) - - - **Creating a lookup layer with an adapted vocabulary** - - This example creates a lookup layer and generates the vocabulary by analyzing - the dataset. - - >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) - >>> layer = tf.keras.layers.IntegerLookup() - >>> layer.adapt(data) - >>> layer.get_vocabulary() - [-1, 42, 1138, 1000, 36, 12] - - Note that the OOV token -1 have been added to the vocabulary. The remaining - tokens are sorted by frequency (42, which has 2 occurrences, is first) then - by inverse sort order. - - >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) - >>> layer = tf.keras.layers.IntegerLookup() - >>> layer.adapt(data) - >>> layer(data) - - - - **Lookups with multiple OOV indices** - - This example demonstrates how to use a lookup layer with multiple OOV indices. - When a layer is created with more than one OOV index, any OOV tokens are - hashed into the number of OOV buckets, distributing OOV tokens in a - deterministic fashion across the set. - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([[12, 1138, 42], [37, 1000, 36]]) - >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab, num_oov_indices=2) - >>> layer(data) - - - Note that the output for OOV token 37 is 1, while the output for OOV token - 1000 is 0. The in-vocab terms have their output index increased by 1 from - earlier examples (12 maps to 2, etc) in order to make space for the extra OOV - token. - - **One-hot output** - - Configure the layer with `output_mode='one_hot'`. Note that the first - `num_oov_indices` dimensions in the ont_hot encoding represent OOV values. - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([12, 36, 1138, 42, 7]) # Note OOV tokens - >>> layer = tf.keras.layers.IntegerLookup( - ... vocabulary=vocab, output_mode='one_hot') - >>> layer(data) - - - **Multi-hot output** - - Configure the layer with `output_mode='multi_hot'`. Note that the first - `num_oov_indices` dimensions in the multi_hot encoding represent OOV tokens - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens - >>> layer = tf.keras.layers.IntegerLookup( - ... vocabulary=vocab, output_mode='multi_hot') - >>> layer(data) - - - **Token count output** - - Configure the layer with `output_mode='count'`. As with multi_hot output, the - first `num_oov_indices` dimensions in the output represent OOV tokens. - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens - >>> layer = tf.keras.layers.IntegerLookup( - ... vocabulary=vocab, output_mode='count') - >>> layer(data) - - - **TF-IDF output** - - Configure the layer with `output_mode='tf_idf'`. As with multi_hot output, the - first `num_oov_indices` dimensions in the output represent OOV tokens. - - Each token bin will output `token_count * idf_weight`, where the idf weights - are the inverse document frequency weights per token. These should be provided - along with the vocabulary. Note that the `idf_weight` for OOV tokens will - default to the average of all idf weights passed in. - - >>> vocab = [12, 36, 1138, 42] - >>> idf_weights = [0.25, 0.75, 0.6, 0.4] - >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens - >>> layer = tf.keras.layers.IntegerLookup( - ... output_mode='tf_idf', vocabulary=vocab, idf_weights=idf_weights) - >>> layer(data) - - - To specify the idf weights for oov tokens, you will need to pass the entire - vocabularly including the leading oov token. - - >>> vocab = [-1, 12, 36, 1138, 42] - >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4] - >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens - >>> layer = tf.keras.layers.IntegerLookup( - ... output_mode='tf_idf', vocabulary=vocab, idf_weights=idf_weights) - >>> layer(data) - - - When adapting the layer in tf_idf mode, each input sample will be considered a - document, and idf weight per token will be calculated as - `log(1 + num_documents / (1 + token_document_count))`. - - **Inverse lookup** - - This example demonstrates how to map indices to tokens using this layer. (You - can also use `adapt()` with `inverse=True`, but for simplicity we'll pass the - vocab in this example.) - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([[1, 3, 4], [4, 0, 2]]) - >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab, invert=True) - >>> layer(data) - - - Note that the first index correspond to the oov token by default. - - - **Forward and inverse lookup pairs** - - This example demonstrates how to use the vocabulary of a standard lookup - layer to create an inverse lookup layer. - - >>> vocab = [12, 36, 1138, 42] - >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) - >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab) - >>> i_layer = tf.keras.layers.IntegerLookup( - ... vocabulary=layer.get_vocabulary(), invert=True) - >>> int_data = layer(data) - >>> i_layer(int_data) - - - In this example, the input token 1000 resulted in an output of -1, since - 1000 was not in the vocabulary - it got represented as an OOV, and all OOV - tokens are returned as -1 in the inverse layer. Also, note that for the - inverse to work, you must have already set the forward layer vocabulary - either directly or via `adapt()` before calling `get_vocabulary()`. - """ - - def __init__(self, - max_tokens=None, - num_oov_indices=1, - mask_token=None, - oov_token=-1, - vocabulary=None, - vocabulary_dtype="int64", - idf_weights=None, - invert=False, - output_mode="int", - sparse=False, - pad_to_max_tokens=False, - **kwargs): - if not tf.dtypes.as_dtype(vocabulary_dtype).is_integer: - raise ValueError("`vocabulary_dtype` must be an integer dtype. " - f"Received: {vocabulary_dtype}") - - # Legacy versions of the IntegerLookup layer set layer dtype to int64, - # instead of the output type. If we see this and output mode is not "int", - # clear the setting so we don't switch types for old SavedModels. - if output_mode != "int" and "dtype" in kwargs and ( - kwargs["dtype"] == tf.int64 or kwargs["dtype"] == "int64"): - del kwargs["dtype"] - - # Support deprecated args for this layer. - if "max_values" in kwargs: - logging.log_first_n(logging.WARN, - "max_values is deprecated, use max_tokens instead.", - 1) - max_tokens = kwargs["max_values"] - del kwargs["max_values"] - if "mask_value" in kwargs: - logging.log_first_n(logging.WARN, - "mask_value is deprecated, use mask_token instead.", - 1) - mask_token = kwargs["mask_value"] - del kwargs["mask_value"] - if "oov_value" in kwargs: - logging.log_first_n(logging.WARN, - "oov_value is deprecated, use oov_token instead.", 1) - oov_token = kwargs["oov_value"] - del kwargs["oov_value"] - - # If max_tokens is set, the token must be greater than 1 - otherwise we - # are creating a 0-element vocab, which doesn't make sense. - if max_tokens is not None and max_tokens <= 1: - raise ValueError( - f"If `max_tokens` is set for `IntegerLookup`, it must be " - f"greater than 1. Received: max_tokens={max_tokens}.") - - if num_oov_indices < 0: - raise ValueError( - f"The value of `num_oov_indices` argument for `IntegerLookup` " - f"must >= 0. Received num_oov_indices=" - f"{num_oov_indices}.") - - # Make sure mask and oov are of the dtype we want. - mask_token = None if mask_token is None else np.int64(mask_token) - oov_token = None if oov_token is None else np.int64(oov_token) - - super().__init__( - max_tokens=max_tokens, - num_oov_indices=num_oov_indices, - mask_token=mask_token, - oov_token=oov_token, - vocabulary=vocabulary, - vocabulary_dtype=vocabulary_dtype, - idf_weights=idf_weights, - invert=invert, - output_mode=output_mode, - sparse=sparse, - pad_to_max_tokens=pad_to_max_tokens, - **kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell("IntegerLookup").set(True) - - # We override this method solely to generate a docstring. - def adapt(self, data, batch_size=None, steps=None): - """Computes a vocabulary of interger terms from tokens in a dataset. - - Calling `adapt()` on an `IntegerLookup` layer is an alternative to passing - in a precomputed vocabulary on construction via the `vocabulary` argument. - An `IntegerLookup` layer should always be either adapted over a dataset or - supplied with a vocabulary. - - During `adapt()`, the layer will build a vocabulary of all integer tokens - seen in the dataset, sorted by occurance count, with ties broken by sort - order of the tokens (high to low). At the end of `adapt()`, if `max_tokens` - is set, the voculary wil be truncated to `max_tokens` size. For example, - adapting a layer with `max_tokens=1000` will compute the 1000 most frequent - tokens occurring in the input dataset. If `output_mode='tf-idf'`, `adapt()` - will also learn the document frequencies of each token in the input dataset. - - In order to make `StringLookup` efficient in any distribution context, the - vocabulary is kept static with respect to any compiled `tf.Graph`s that - call the layer. As a consequence, if the layer is adapted a second time, - any models using the layer should be re-compiled. For more information - see `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. - - `adapt()` is meant only as a single machine utility to compute layer state. - To analyze a dataset that cannot fit on a single machine, see - [Tensorflow Transform](https://www.tensorflow.org/tfx/transform/get_started) - for a multi-machine, map-reduce solution. - - Arguments: - data: The data to train on. It can be passed either as a - `tf.data.Dataset`, or as a numpy array. - batch_size: Integer or `None`. - Number of samples per state update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - steps: Integer or `None`. - Total number of steps (batches of samples) - When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps' is None, the epoch will run until - the input dataset is exhausted. When passing an infinitely - repeating dataset, you must specify the `steps` argument. This - argument is not supported with array inputs. + """A preprocessing layer which maps integer features to contiguous ranges. + + This layer maps a set of arbitrary integer input tokens into indexed integer + output via a table-based vocabulary lookup. The layer's output indices will + be contiguously arranged up to the maximum vocab size, even if the input + tokens are non-continguous or unbounded. The layer supports multiple options + for encoding the output via `output_mode`, and has optional support for + out-of-vocabulary (OOV) tokens and masking. + + The vocabulary for the layer must be either supplied on construction or + learned via `adapt()`. During `adapt()`, the layer will analyze a data set, + determine the frequency of individual integer tokens, and create a + vocabulary from them. If the vocabulary is capped in size, the most frequent + tokens will be used to create the vocabulary and all others will be treated + as OOV. + + There are two possible output modes for the layer. When `output_mode` is + `"int"`, input integers are converted to their index in the vocabulary (an + integer). When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, + input integers are encoded into an array where each dimension corresponds to + an element in the vocabulary. + + The vocabulary can optionally contain a mask token as well as an OOV token + (which can optionally occupy multiple indices in the vocabulary, as set + by `num_oov_indices`). + The position of these tokens in the vocabulary is fixed. When `output_mode` + is `"int"`, the vocabulary will begin with the mask token at index 0, + followed by OOV indices, followed by the rest of the vocabulary. When + `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will + begin with OOV indices and instances of the mask token will be dropped. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + max_tokens: Maximum size of the vocabulary for this layer. This should + only be specified when adapting the vocabulary or when setting + `pad_to_max_tokens=True`. If None, there is no cap on the size of the + vocabulary. Note that this size includes the OOV and mask tokens. + Defaults to `None`. + num_oov_indices: The number of out-of-vocabulary tokens to use. If this + value is more than 1, OOV inputs are modulated to determine their OOV + value. If this value is 0, OOV inputs will cause an error when calling + the layer. Defaults to `1`. + mask_token: An integer token that represents masked inputs. When + `output_mode` is `"int"`, the token is included in vocabulary and mapped + to index 0. In other output modes, the token will not appear in the + vocabulary and instances of the mask token in the input will be dropped. + If set to None, no mask term will be added. Defaults to `None`. + oov_token: Only used when `invert` is True. The token to return for OOV + indices. Defaults to `-1`. + vocabulary: Optional. Either an array of integers or a string path to a + text file. If passing an array, can pass a tuple, list, 1D numpy array, + or 1D tensor containing the integer vocbulary terms. If passing a file + path, the file should contain one line per term in the vocabulary. If + this argument is set, there is no need to `adapt()` the layer. + vocabulary_dtype: The dtype of the vocabulary terms, for example + `"int64"` or `"int32"`. Defaults to `"int64"`. + idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, + 1D numpy array, or 1D tensor or the same length as the vocabulary, + containing the floating point inverse document frequency weights, which + will be multiplied by per sample term counts for the final `tf_idf` + weight. If the `vocabulary` argument is set, and `output_mode` is + `"tf_idf"`, this argument must be supplied. + invert: Only valid when `output_mode` is `"int"`. If True, this layer will + map indices to vocabulary items instead of mapping vocabulary items to + indices. Defaults to `False`. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + configuring the layer as follows: + - `"int"`: Return the vocabulary indices of the input tokens. + - `"one_hot"`: Encodes each individual element in the input into an + array the same size as the vocabulary, containing a 1 at the element + index. If the last dimension is size 1, will encode on that + dimension. If the last dimension is not size 1, will append a new + dimension for the encoded output. + - `"multi_hot"`: Encodes each sample in the input into a single array + the same size as the vocabulary, containing a 1 for each vocabulary + term present in the sample. Treats the last dimension as the sample + dimension, if input shape is (..., sample_length), output shape will + be (..., num_tokens). + - `"count"`: As `"multi_hot"`, but the int array contains a count of + the number of times the token at that index appeared in the sample. + - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to + find the value in each token slot. + For `"int"` output, any shape of input and output is supported. For all + other output modes, currently only output up to rank 2 is supported. + Defaults to `"int"`. + pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, + `"count"`, or `"tf_idf"`. If True, the output will have its feature axis + padded to `max_tokens` even if the number of unique tokens in the + vocabulary is less than max_tokens, resulting in a tensor of shape + [batch_size, max_tokens] regardless of vocabulary size. Defaults to + False. + sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, + `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a + dense `Tensor`. Defaults to `False`. + + Examples: + + **Creating a lookup layer with a known vocabulary** + + This example creates a lookup layer with a pre-existing vocabulary. + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) # Note OOV tokens + >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab) + >>> layer(data) + + + **Creating a lookup layer with an adapted vocabulary** + + This example creates a lookup layer and generates the vocabulary by + analyzing the dataset. + + >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) + >>> layer = tf.keras.layers.IntegerLookup() + >>> layer.adapt(data) + >>> layer.get_vocabulary() + [-1, 42, 1138, 1000, 36, 12] + + Note that the OOV token -1 have been added to the vocabulary. The remaining + tokens are sorted by frequency (42, which has 2 occurrences, is first) then + by inverse sort order. + + >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) + >>> layer = tf.keras.layers.IntegerLookup() + >>> layer.adapt(data) + >>> layer(data) + + + + **Lookups with multiple OOV indices** + + This example demonstrates how to use a lookup layer with multiple OOV + indices. When a layer is created with more than one OOV index, any OOV + tokens are hashed into the number of OOV buckets, distributing OOV tokens in + a deterministic fashion across the set. + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([[12, 1138, 42], [37, 1000, 36]]) + >>> layer = tf.keras.layers.IntegerLookup( + ... vocabulary=vocab, num_oov_indices=2) + >>> layer(data) + + + Note that the output for OOV token 37 is 1, while the output for OOV token + 1000 is 0. The in-vocab terms have their output index increased by 1 from + earlier examples (12 maps to 2, etc) in order to make space for the extra + OOV token. + + **One-hot output** + + Configure the layer with `output_mode='one_hot'`. Note that the first + `num_oov_indices` dimensions in the one_hot encoding represent OOV values. + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([12, 36, 1138, 42, 7]) # Note OOV tokens + >>> layer = tf.keras.layers.IntegerLookup( + ... vocabulary=vocab, output_mode='one_hot') + >>> layer(data) + + + **Multi-hot output** + + Configure the layer with `output_mode='multi_hot'`. Note that the first + `num_oov_indices` dimensions in the multi_hot encoding represent OOV tokens + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([[12, 1138, 42, 42], + ... [42, 7, 36, 7]]) # Note OOV tokens + >>> layer = tf.keras.layers.IntegerLookup( + ... vocabulary=vocab, output_mode='multi_hot') + >>> layer(data) + + + **Token count output** + + Configure the layer with `output_mode='count'`. As with multi_hot output, + the first `num_oov_indices` dimensions in the output represent OOV tokens. + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([[12, 1138, 42, 42], + ... [42, 7, 36, 7]]) # Note OOV tokens + >>> layer = tf.keras.layers.IntegerLookup( + ... vocabulary=vocab, output_mode='count') + >>> layer(data) + + + **TF-IDF output** + + Configure the layer with `output_mode='tf_idf'`. As with multi_hot output, + the first `num_oov_indices` dimensions in the output represent OOV tokens. + + Each token bin will output `token_count * idf_weight`, where the idf weights + are the inverse document frequency weights per token. These should be + provided along with the vocabulary. Note that the `idf_weight` for OOV + tokens will default to the average of all idf weights passed in. + + >>> vocab = [12, 36, 1138, 42] + >>> idf_weights = [0.25, 0.75, 0.6, 0.4] + >>> data = tf.constant([[12, 1138, 42, 42], + ... [42, 7, 36, 7]]) # Note OOV tokens + >>> layer = tf.keras.layers.IntegerLookup( + ... output_mode='tf_idf', vocabulary=vocab, idf_weights=idf_weights) + >>> layer(data) + + + To specify the idf weights for oov tokens, you will need to pass the entire + vocabularly including the leading oov token. + + >>> vocab = [-1, 12, 36, 1138, 42] + >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4] + >>> data = tf.constant([[12, 1138, 42, 42], + ... [42, 7, 36, 7]]) # Note OOV tokens + >>> layer = tf.keras.layers.IntegerLookup( + ... output_mode='tf_idf', vocabulary=vocab, idf_weights=idf_weights) + >>> layer(data) + + + When adapting the layer in tf_idf mode, each input sample will be considered + a document, and idf weight per token will be calculated as + `log(1 + num_documents / (1 + token_document_count))`. + + **Inverse lookup** + + This example demonstrates how to map indices to tokens using this layer. + (You can also use `adapt()` with `inverse=True`, but for simplicity we'll + pass the vocab in this example.) + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([[1, 3, 4], [4, 0, 2]]) + >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab, invert=True) + >>> layer(data) + + + Note that the first index correspond to the oov token by default. + + + **Forward and inverse lookup pairs** + + This example demonstrates how to use the vocabulary of a standard lookup + layer to create an inverse lookup layer. + + >>> vocab = [12, 36, 1138, 42] + >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]]) + >>> layer = tf.keras.layers.IntegerLookup(vocabulary=vocab) + >>> i_layer = tf.keras.layers.IntegerLookup( + ... vocabulary=layer.get_vocabulary(), invert=True) + >>> int_data = layer(data) + >>> i_layer(int_data) + + + In this example, the input token 1000 resulted in an output of -1, since + 1000 was not in the vocabulary - it got represented as an OOV, and all OOV + tokens are returned as -1 in the inverse layer. Also, note that for the + inverse to work, you must have already set the forward layer vocabulary + either directly or via `adapt()` before calling `get_vocabulary()`. """ - super().adapt(data, batch_size=batch_size, steps=steps) + + def __init__( + self, + max_tokens=None, + num_oov_indices=1, + mask_token=None, + oov_token=-1, + vocabulary=None, + vocabulary_dtype="int64", + idf_weights=None, + invert=False, + output_mode="int", + sparse=False, + pad_to_max_tokens=False, + **kwargs, + ): + if not tf.dtypes.as_dtype(vocabulary_dtype).is_integer: + raise ValueError( + "`vocabulary_dtype` must be an integer dtype. " + f"Received: {vocabulary_dtype}" + ) + + # Legacy versions of the IntegerLookup layer set layer dtype to int64, + # instead of the output type. If we see this and output mode is not + # "int", clear the setting so we don't switch types for old SavedModels. + if ( + output_mode != "int" + and "dtype" in kwargs + and (kwargs["dtype"] == tf.int64 or kwargs["dtype"] == "int64") + ): + del kwargs["dtype"] + + # Support deprecated args for this layer. + if "max_values" in kwargs: + logging.log_first_n( + logging.WARN, + "max_values is deprecated, use max_tokens instead.", + 1, + ) + max_tokens = kwargs["max_values"] + del kwargs["max_values"] + if "mask_value" in kwargs: + logging.log_first_n( + logging.WARN, + "mask_value is deprecated, use mask_token instead.", + 1, + ) + mask_token = kwargs["mask_value"] + del kwargs["mask_value"] + if "oov_value" in kwargs: + logging.log_first_n( + logging.WARN, + "oov_value is deprecated, use oov_token instead.", + 1, + ) + oov_token = kwargs["oov_value"] + del kwargs["oov_value"] + + # If max_tokens is set, the token must be greater than 1 - otherwise we + # are creating a 0-element vocab, which doesn't make sense. + if max_tokens is not None and max_tokens <= 1: + raise ValueError( + "If `max_tokens` is set for `IntegerLookup`, it must be " + f"greater than 1. Received: max_tokens={max_tokens}." + ) + + if num_oov_indices < 0: + raise ValueError( + "The value of `num_oov_indices` argument for `IntegerLookup` " + "must >= 0. Received num_oov_indices=" + f"{num_oov_indices}." + ) + + # Make sure mask and oov are of the dtype we want. + mask_token = None if mask_token is None else np.int64(mask_token) + oov_token = None if oov_token is None else np.int64(oov_token) + + super().__init__( + max_tokens=max_tokens, + num_oov_indices=num_oov_indices, + mask_token=mask_token, + oov_token=oov_token, + vocabulary=vocabulary, + vocabulary_dtype=vocabulary_dtype, + idf_weights=idf_weights, + invert=invert, + output_mode=output_mode, + sparse=sparse, + pad_to_max_tokens=pad_to_max_tokens, + **kwargs, + ) + base_preprocessing_layer.keras_kpl_gauge.get_cell("IntegerLookup").set( + True + ) + + # We override this method solely to generate a docstring. + def adapt(self, data, batch_size=None, steps=None): + """Computes a vocabulary of interger terms from tokens in a dataset. + + Calling `adapt()` on an `IntegerLookup` layer is an alternative to + passing in a precomputed vocabulary on construction via the + `vocabulary` argument. An `IntegerLookup` layer should always be either + adapted over a dataset or supplied with a vocabulary. + + During `adapt()`, the layer will build a vocabulary of all integer + tokens seen in the dataset, sorted by occurrence count, with ties broken + by sort order of the tokens (high to low). At the end of `adapt()`, if + `max_tokens` is set, the vocabulary wil be truncated to `max_tokens` + size. For example, adapting a layer with `max_tokens=1000` will compute + the 1000 most frequent tokens occurring in the input dataset. If + `output_mode='tf-idf'`, `adapt()` will also learn the document + frequencies of each token in the input dataset. + + In order to make `StringLookup` efficient in any distribution context, + the vocabulary is kept static with respect to any compiled `tf.Graph`s + that call the layer. As a consequence, if the layer is adapted a second + time, any models using the layer should be re-compiled. For more + information see + `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. + + `adapt()` is meant only as a single machine utility to compute layer + state. To analyze a dataset that cannot fit on a single machine, see + [Tensorflow Transform]( + https://www.tensorflow.org/tfx/transform/get_started) for a + multi-machine, map-reduce solution. + + Arguments: + data: The data to train on. It can be passed either as a + `tf.data.Dataset`, or as a numpy array. + batch_size: Integer or `None`. + Number of samples per state update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.Sequence` instances + (since they generate batches). + steps: Integer or `None`. + Total number of steps (batches of samples) + When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps' is None, the epoch will run until + the input dataset is exhausted. When passing an infinitely + repeating dataset, you must specify the `steps` argument. This + argument is not supported with array inputs. + """ + super().adapt(data, batch_size=batch_size, steps=steps) diff --git a/keras/layers/preprocessing/integer_lookup_test.py b/keras/layers/preprocessing/integer_lookup_test.py index 17f29b77a9bf..4a06475880cb 100644 --- a/keras/layers/preprocessing/integer_lookup_test.py +++ b/keras/layers/preprocessing/integer_lookup_test.py @@ -14,605 +14,673 @@ # ============================================================================== """Tests for Keras text vectorization preprocessing layer.""" -import tensorflow.compat.v2 as tf - import gc import itertools import os import random -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.layers.preprocessing import integer_lookup from keras.layers.preprocessing import preprocessing_test_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils def _get_end_to_end_test_cases(): - test_cases = ( - { - "testcase_name": - "test_ints_soft_vocab_cap", - # Create an array where 1138 is the most frequent term, followed by - # 1729, then 725, then 42. This ensures that the vocab accumulator - # is sorting by frequency. - "vocab_data": - np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729], - [1729], [725], [725]], - dtype=np.int64), - "input_data": - np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]], - dtype=np.int64), - "kwargs": { - "max_tokens": None, - "dtype": tf.int64, - }, - "expected_output": [[1], [2], [3], [4], [4], [3], [1], [0]], - "input_dtype": - tf.int64 - },) - - crossed_test_cases = [] - # Cross above test cases with use_dataset in (True, False) - for use_dataset in (True, False): - for case in test_cases: - case = case.copy() - if use_dataset: - case["testcase_name"] = case["testcase_name"] + "_with_dataset" - case["use_dataset"] = use_dataset - crossed_test_cases.append(case) - - return crossed_test_cases + test_cases = ( + { + "testcase_name": "test_ints_soft_vocab_cap", + # Create an array where 1138 is the most frequent term, followed by + # 1729, then 725, then 42. This ensures that the vocab accumulator + # is sorting by frequency. + "vocab_data": np.array( + [ + [42], + [1138], + [1138], + [1138], + [1138], + [1729], + [1729], + [1729], + [725], + [725], + ], + dtype=np.int64, + ), + "input_data": np.array( + [[1138], [1729], [725], [42], [42], [725], [1138], [4]], + dtype=np.int64, + ), + "kwargs": { + "max_tokens": None, + "dtype": tf.int64, + }, + "expected_output": [[1], [2], [3], [4], [4], [3], [1], [0]], + "input_dtype": tf.int64, + }, + ) + + crossed_test_cases = [] + # Cross above test cases with use_dataset in (True, False) + for use_dataset in (True, False): + for case in test_cases: + case = case.copy() + if use_dataset: + case["testcase_name"] = case["testcase_name"] + "_with_dataset" + case["use_dataset"] = use_dataset + crossed_test_cases.append(case) + + return crossed_test_cases @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IntegerLookupLayerTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - @parameterized.named_parameters(*_get_end_to_end_test_cases()) - def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs, - use_dataset, expected_output, - input_dtype): - cls = integer_lookup.IntegerLookup - expected_output_dtype = tf.int64 - input_shape = input_data.shape - - if use_dataset: - # Keras APIs expect batched datasets. - # TODO(rachelim): `model.predict` predicts the result on each - # dataset batch separately, then tries to concatenate the results - # together. When the results have different shapes on the non-concat - # axis (which can happen in the output_mode = INT case for - # IntegerLookup), the concatenation fails. In real use cases, this may - # not be an issue because users are likely to pipe the preprocessing layer - # into other keras layers instead of predicting it directly. A workaround - # for these unit tests is to have the dataset only contain one batch, so - # no concatenation needs to happen with the result. For consistency with - # numpy input, we should make `predict` join differently shaped results - # together sensibly, with 0 padding. - input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( - input_shape[0]) - vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( - input_shape[0]) - - output_data = test_utils.layer_test( - cls, - kwargs=kwargs, - input_shape=input_shape, - input_data=input_data, - input_dtype=input_dtype, - expected_output_dtype=expected_output_dtype, - validate_training=False, - adapt_data=vocab_data) - self.assertAllClose(expected_output, output_data) - - def test_layer_with_list_input(self): - vocab = [12, 36, 1138, 42] - data = [[12, 1138, 42], [42, 1000, 36]] # Note OOV tokens - layer = integer_lookup.IntegerLookup(vocabulary=vocab) - output = layer(data) - expected_output = np.array([[1, 3, 4], [4, 0, 2]]) - self.assertEqual(output.numpy().tolist(), expected_output.tolist()) +class IntegerLookupLayerTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters(*_get_end_to_end_test_cases()) + def test_layer_end_to_end_with_adapt( + self, + vocab_data, + input_data, + kwargs, + use_dataset, + expected_output, + input_dtype, + ): + cls = integer_lookup.IntegerLookup + expected_output_dtype = tf.int64 + input_shape = input_data.shape + + if use_dataset: + # Keras APIs expect batched datasets. + # TODO(rachelim): `model.predict` predicts the result on each + # dataset batch separately, then tries to concatenate the results + # together. When the results have different shapes on the non-concat + # axis (which can happen in the output_mode = INT case for + # IntegerLookup), the concatenation fails. In real use cases, this + # may not be an issue because users are likely to pipe the + # preprocessing layer into other keras layers instead of predicting + # it directly. A workaround for these unit tests is to have the + # dataset only contain one batch, so no concatenation needs to + # happen with the result. For consistency with numpy input, we + # should make `predict` join differently shaped results together + # sensibly, with 0 padding. + input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( + input_shape[0] + ) + vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( + input_shape[0] + ) + + output_data = test_utils.layer_test( + cls, + kwargs=kwargs, + input_shape=input_shape, + input_data=input_data, + input_dtype=input_dtype, + expected_output_dtype=expected_output_dtype, + validate_training=False, + adapt_data=vocab_data, + ) + self.assertAllClose(expected_output, output_data) + + def test_layer_with_list_input(self): + vocab = [12, 36, 1138, 42] + data = [[12, 1138, 42], [42, 1000, 36]] # Note OOV tokens + layer = integer_lookup.IntegerLookup(vocabulary=vocab) + output = layer(data) + expected_output = np.array([[1, 3, 4], [4, 0, 2]]) + self.assertEqual(output.numpy().tolist(), expected_output.tolist()) @test_combinations.run_all_keras_modes(always_skip_v1=True) class CategoricalEncodingInputTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_sparse_int_input(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], - values=np.array([13, 32], dtype=np.int64), - dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [4, 0] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - layer = integer_lookup.IntegerLookup(max_tokens=None) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_ragged_int_input(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.ragged.constant([[10, 11, 13], [13, 12, 10, 42]], - dtype=np.int64) - expected_output = [[1, 2, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) - layer = integer_lookup.IntegerLookup(max_tokens=None) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_sparse_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 32], dtype=np.int64), + dense_shape=[3, 4], + ) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [4, 0] + expected_dense_shape = [3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + layer = integer_lookup.IntegerLookup(max_tokens=None) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_ragged_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.ragged.constant( + [[10, 11, 13], [13, 12, 10, 42]], dtype=np.int64 + ) + expected_output = [[1, 2, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) + layer = integer_lookup.IntegerLookup(max_tokens=None) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_combinations.run_all_keras_modes(always_skip_v1=True) class CategoricalEncodingMultiOOVTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_sparse_int_input_multi_bucket(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.SparseTensor( - indices=[[0, 0], [1, 2]], - values=np.array([13, 133], dtype=np.int64), - dense_shape=[3, 4]) - - expected_indices = [[0, 0], [1, 2]] - expected_values = [6, 2] - expected_dense_shape = [3, 4] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) - layer = integer_lookup.IntegerLookup( - max_tokens=None, - dtype=tf.int64, - num_oov_indices=2, - mask_token=0, - oov_token=-1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array, steps=1) - self.assertAllEqual(expected_indices, output_data.indices) - self.assertAllEqual(expected_values, output_data.values) - self.assertAllEqual(expected_dense_shape, output_data.dense_shape) - - def test_ragged_int_input_multi_bucket(self): - vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = tf.ragged.constant([[10, 11, 13], [13, 12, 10, 133]], - dtype=np.int64) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) - layer = integer_lookup.IntegerLookup(max_tokens=None, num_oov_indices=2) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_sparse_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 133], dtype=np.int64), + dense_shape=[3, 4], + ) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [6, 2] + expected_dense_shape = [3, 4] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True) + layer = integer_lookup.IntegerLookup( + max_tokens=None, + dtype=tf.int64, + num_oov_indices=2, + mask_token=0, + oov_token=-1, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array, steps=1) + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_ragged_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = tf.ragged.constant( + [[10, 11, 13], [13, 12, 10, 133]], dtype=np.int64 + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64, ragged=True) + layer = integer_lookup.IntegerLookup(max_tokens=None, num_oov_indices=2) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_combinations.run_all_keras_modes(always_skip_v1=True) class CategoricalEncodingAdaptTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_sparse_adapt(self): - vocab_data = tf.SparseTensor( - indices=[[0, 0], [0, 1], [1, 2]], - values=[203, 1729, 203], - dense_shape=[3, 4]) - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - - layer = integer_lookup.IntegerLookup() - layer.adapt(vocab_dataset) - expected_vocabulary = [-1, 203, 1729] - self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) - - def test_ragged_adapt(self): - vocab_data = tf.ragged.constant([[203], [1729, 203]]) - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - - layer = integer_lookup.IntegerLookup() - layer.adapt(vocab_dataset) - expected_vocabulary = [-1, 203, 1729] - self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) - - def test_single_int_generator_dataset(self): - - def word_gen(): - for _ in itertools.count(1): - yield random.randint(0, 100) - - ds = tf.data.Dataset.from_generator(word_gen, tf.int64, tf.TensorShape([])) - batched_ds = ds.take(2) - input_t = keras.Input(shape=(), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - max_tokens=10, num_oov_indices=0, mask_token=None, oov_token=None) - _ = layer(input_t) - layer.adapt(batched_ds) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_sparse_adapt(self): + vocab_data = tf.SparseTensor( + indices=[[0, 0], [0, 1], [1, 2]], + values=[203, 1729, 203], + dense_shape=[3, 4], + ) + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) + + layer = integer_lookup.IntegerLookup() + layer.adapt(vocab_dataset) + expected_vocabulary = [-1, 203, 1729] + self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) + + def test_ragged_adapt(self): + vocab_data = tf.ragged.constant([[203], [1729, 203]]) + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) + + layer = integer_lookup.IntegerLookup() + layer.adapt(vocab_dataset) + expected_vocabulary = [-1, 203, 1729] + self.assertAllEqual(expected_vocabulary, layer.get_vocabulary()) + + def test_single_int_generator_dataset(self): + def word_gen(): + for _ in itertools.count(1): + yield random.randint(0, 100) + + ds = tf.data.Dataset.from_generator( + word_gen, tf.int64, tf.TensorShape([]) + ) + batched_ds = ds.take(2) + input_t = keras.Input(shape=(), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + max_tokens=10, num_oov_indices=0, mask_token=None, oov_token=None + ) + _ = layer(input_t) + layer.adapt(batched_ds) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IntegerLookupOutputTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_int_output(self): - vocab_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup() - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_output_shape(self): - input_data = keras.Input(shape=(4,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(max_tokens=2, num_oov_indices=1) - int_data = layer(input_data) - self.assertAllEqual(int_data.shape[1:], input_data.shape[1:]) - - def test_int_output_with_mask(self): - vocab_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(max_tokens=None, mask_token=0) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_explicit_vocab(self): - vocab_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, - max_tokens=None, - ) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_explicit_vocab_with_special_tokens(self): - vocab_data = [0, -1, 42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, - max_tokens=None, - mask_token=0, - ) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_no_oov(self): - vocab_data = [42, 1138, 725, 1729] - valid_input = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 0]]) - invalid_input = np.array([[42, 1138, 725, 203], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, mask_token=0, num_oov_indices=0) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(valid_input) - self.assertAllEqual(expected_output, output_data) - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "found OOV values.*203"): - _ = model.predict(invalid_input) - - def test_inverse_output(self): - vocab_data = [-1, 42, 1138, 725, 1729] - input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) - expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]]) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(invert=True) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_forward_backward_explicit_vocab(self): - vocab_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]]) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(vocabulary=vocab_data) - inverse_layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, invert=True) - int_data = layer(input_data) - inverse_data = inverse_layer(int_data) - model = keras.Model(inputs=input_data, outputs=inverse_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_forward_backward_adapted_vocab(self): - adapt_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]]) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup() - layer.adapt(adapt_data) - inverse_layer = integer_lookup.IntegerLookup( - vocabulary=layer.get_vocabulary(), invert=True) - int_data = layer(input_data) - inverse_data = inverse_layer(int_data) - model = keras.Model(inputs=input_data, outputs=inverse_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) +class IntegerLookupOutputTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_int_output(self): + vocab_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup() + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_output_shape(self): + input_data = keras.Input(shape=(4,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(max_tokens=2, num_oov_indices=1) + int_data = layer(input_data) + self.assertAllEqual(int_data.shape[1:], input_data.shape[1:]) + + def test_int_output_with_mask(self): + vocab_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(max_tokens=None, mask_token=0) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_explicit_vocab(self): + vocab_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, + max_tokens=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_explicit_vocab_with_special_tokens(self): + vocab_data = [0, -1, 42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, + max_tokens=None, + mask_token=0, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_no_oov(self): + vocab_data = [42, 1138, 725, 1729] + valid_input = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 0]]) + invalid_input = np.array([[42, 1138, 725, 203], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, mask_token=0, num_oov_indices=0 + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(valid_input) + self.assertAllEqual(expected_output, output_data) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "found OOV values.*203" + ): + _ = model.predict(invalid_input) + + def test_inverse_output(self): + vocab_data = [-1, 42, 1138, 725, 1729] + input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) + expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]]) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(invert=True) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_forward_backward_explicit_vocab(self): + vocab_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]]) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(vocabulary=vocab_data) + inverse_layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, invert=True + ) + int_data = layer(input_data) + inverse_data = inverse_layer(int_data) + model = keras.Model(inputs=input_data, outputs=inverse_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_forward_backward_adapted_vocab(self): + adapt_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]]) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup() + layer.adapt(adapt_data) + inverse_layer = integer_lookup.IntegerLookup( + vocabulary=layer.get_vocabulary(), invert=True + ) + int_data = layer(input_data) + inverse_data = inverse_layer(int_data) + model = keras.Model(inputs=input_data, outputs=inverse_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_combinations.run_all_keras_modes(always_skip_v1=True) class IntegerLookupVocabularyTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(str(vocab) + "\n") - writer.flush() - writer.close() - return vocab_path - - def test_int_output_explicit_vocab(self): - vocab_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(vocabulary=vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_no_vocab(self): - with self.assertRaisesRegex(RuntimeError, - "you must set the layer's vocabulary"): - layer = integer_lookup.IntegerLookup(output_mode="binary") - layer([[1]]) - - def test_one_hot_output(self): - vocab_data = [2, 3, 4, 5] - input_array = np.array([2, 3, 4, 5, 6]) - expected_output = [ - [0, 1, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - ] - - input_data = keras.Input(shape=(1,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, output_mode="one_hot") - res = layer(input_data) - model = keras.Model(inputs=input_data, outputs=res) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_output(self): - vocab_data = [2, 3, 4, 5] - input_array = np.array([[2, 2, 3, 4], [0, 1, 5, 2]]) - expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 0, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, output_mode="multi_hot") - res = layer(input_data) - model = keras.Model(inputs=input_data, outputs=res) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_count_output(self): - vocab_data = [2, 3, 4, 5] - input_array = np.array([[2, 2, 3, 4], [0, 1, 5, 6]]) - expected_output = [[0, 2, 1, 1, 0], [3, 0, 0, 0, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, output_mode="count") - res = layer(input_data) - model = keras.Model(inputs=input_data, outputs=res) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_sparse_output(self): - vocab_data = [2, 3, 4, 5] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_data, output_mode="multi_hot", sparse=True) - res = layer(input_data) - self.assertTrue(res.__class__.__name__, "SparseKerasTensor") - - def test_get_vocab_returns_int(self): - vocab_data = [42, 1138, 725, 1729] - expected_vocab = [-1, 42, 1138, 725, 1729] - layer = integer_lookup.IntegerLookup(vocabulary=vocab_data) - layer_vocab = layer.get_vocabulary() - self.assertAllEqual(expected_vocab, layer_vocab) - self.assertIsInstance(layer_vocab[0], np.int64) - - def test_int_output_explicit_vocab_from_file(self): - vocab_list = [42, 1138, 725, 1729] - vocab_path = self._write_to_temp_file("vocab_file", vocab_list) - - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(vocabulary=vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_inverted_vocab_from_file(self): - vocab_list = [42, 1138, 725, 1729] - vocab_path = self._write_to_temp_file("vocab_file", vocab_list) - - input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) - expected_output = [[42, 1138, 725, 1729], [1729, 725, 42, -1]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(vocabulary=vocab_path, invert=True) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_inverted_vocab_from_file_with_mask(self): - vocab_list = [42, 1138, 725, 1729] - vocab_path = self._write_to_temp_file("vocab_file", vocab_list) - - input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]]) - expected_output = [[42, 1138, 725, 1729], [1729, 725, 42, -10]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup( - vocabulary=vocab_path, invert=True, mask_value=-10) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_explicit_vocab_from_file_via_setter(self): - vocab_list = [42, 1138, 725, 1729] - vocab_path = self._write_to_temp_file("vocab_file", vocab_list) - - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup() - layer.set_vocabulary(vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_non_unique_vocab_fails(self): - vocab_data = [42, 1138, 725, 1729, 1729] - with self.assertRaisesRegex(ValueError, ".*repeated term.*1729.*"): - _ = integer_lookup.IntegerLookup(vocabulary=vocab_data) - - def test_non_unique_vocab_from_file_fails(self): - vocab_list = [42, 1138, 725, 1729, 42] - vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list) - with self.assertRaisesRegex( - tf.errors.FailedPreconditionError, - ".*HashTable has different value for same key.*42.*"): - _ = integer_lookup.IntegerLookup(vocabulary=vocab_path) - - def test_tensor_vocab(self): - vocab_data = [-1, 42, 1138, 725, 1729] - vocab_tensor = tf.constant(vocab_data, tf.int64) - layer = integer_lookup.IntegerLookup(vocabulary=vocab_tensor) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - self.assertAllEqual(layer.vocabulary_size(), 5) - fn = tf.function(lambda: layer.set_vocabulary(vocab_tensor)) - with self.assertRaisesRegex(RuntimeError, "Cannot set a tensor vocabulary"): - fn() + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(str(vocab) + "\n") + writer.flush() + writer.close() + return vocab_path + + def test_int_output_explicit_vocab(self): + vocab_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(vocabulary=vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_no_vocab(self): + with self.assertRaisesRegex( + RuntimeError, "you must set the layer's vocabulary" + ): + layer = integer_lookup.IntegerLookup(output_mode="binary") + layer([[1]]) + + def test_one_hot_output(self): + vocab_data = [2, 3, 4, 5] + input_array = np.array([2, 3, 4, 5, 6]) + expected_output = [ + [0, 1, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, output_mode="one_hot" + ) + res = layer(input_data) + model = keras.Model(inputs=input_data, outputs=res) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_output(self): + vocab_data = [2, 3, 4, 5] + input_array = np.array([[2, 2, 3, 4], [0, 1, 5, 2]]) + expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 0, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, output_mode="multi_hot" + ) + res = layer(input_data) + model = keras.Model(inputs=input_data, outputs=res) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_count_output(self): + vocab_data = [2, 3, 4, 5] + input_array = np.array([[2, 2, 3, 4], [0, 1, 5, 6]]) + expected_output = [[0, 2, 1, 1, 0], [3, 0, 0, 0, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, output_mode="count" + ) + res = layer(input_data) + model = keras.Model(inputs=input_data, outputs=res) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_sparse_output(self): + vocab_data = [2, 3, 4, 5] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_data, output_mode="multi_hot", sparse=True + ) + res = layer(input_data) + self.assertTrue(res.__class__.__name__, "SparseKerasTensor") + + def test_get_vocab_returns_int(self): + vocab_data = [42, 1138, 725, 1729] + expected_vocab = [-1, 42, 1138, 725, 1729] + layer = integer_lookup.IntegerLookup(vocabulary=vocab_data) + layer_vocab = layer.get_vocabulary() + self.assertAllEqual(expected_vocab, layer_vocab) + self.assertIsInstance(layer_vocab[0], np.int64) + + def test_int_output_explicit_vocab_from_file(self): + vocab_list = [42, 1138, 725, 1729] + vocab_path = self._write_to_temp_file("vocab_file", vocab_list) + + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(vocabulary=vocab_path) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_inverted_vocab_from_file(self): + vocab_list = [42, 1138, 725, 1729] + vocab_path = self._write_to_temp_file("vocab_file", vocab_list) + + input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) + expected_output = [[42, 1138, 725, 1729], [1729, 725, 42, -1]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(vocabulary=vocab_path, invert=True) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_inverted_vocab_from_file_with_mask(self): + vocab_list = [42, 1138, 725, 1729] + vocab_path = self._write_to_temp_file("vocab_file", vocab_list) + + input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]]) + expected_output = [[42, 1138, 725, 1729], [1729, 725, 42, -10]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup( + vocabulary=vocab_path, invert=True, mask_value=-10 + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_explicit_vocab_from_file_via_setter(self): + vocab_list = [42, 1138, 725, 1729] + vocab_path = self._write_to_temp_file("vocab_file", vocab_list) + + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup() + layer.set_vocabulary(vocab_path) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_non_unique_vocab_fails(self): + vocab_data = [42, 1138, 725, 1729, 1729] + with self.assertRaisesRegex(ValueError, ".*repeated term.*1729.*"): + _ = integer_lookup.IntegerLookup(vocabulary=vocab_data) + + def test_non_unique_vocab_from_file_fails(self): + vocab_list = [42, 1138, 725, 1729, 42] + vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list) + with self.assertRaisesRegex( + tf.errors.FailedPreconditionError, + ".*HashTable has different value for same key.*42.*", + ): + _ = integer_lookup.IntegerLookup(vocabulary=vocab_path) + + def test_tensor_vocab(self): + vocab_data = [-1, 42, 1138, 725, 1729] + vocab_tensor = tf.constant(vocab_data, tf.int64) + layer = integer_lookup.IntegerLookup(vocabulary=vocab_tensor) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + self.assertAllEqual(layer.vocabulary_size(), 5) + fn = tf.function(lambda: layer.set_vocabulary(vocab_tensor)) + with self.assertRaisesRegex( + RuntimeError, "Cannot set a tensor vocabulary" + ): + fn() @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IntegerLookupErrorTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_too_long_vocab_fails_in_single_setting(self): - vocab_data = [42, 1138, 725, 1729] +class IntegerLookupErrorTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_too_long_vocab_fails_in_single_setting(self): + vocab_data = [42, 1138, 725, 1729] - layer = integer_lookup.IntegerLookup(max_tokens=4, num_oov_indices=1) - with self.assertRaisesRegex(ValueError, - "vocabulary larger than the maximum vocab.*"): - layer.set_vocabulary(vocab_data) + layer = integer_lookup.IntegerLookup(max_tokens=4, num_oov_indices=1) + with self.assertRaisesRegex( + ValueError, "vocabulary larger than the maximum vocab.*" + ): + layer.set_vocabulary(vocab_data) - def test_zero_max_tokens_fails(self): - with self.assertRaisesRegex(ValueError, ".*max_tokens.*"): - _ = integer_lookup.IntegerLookup(max_tokens=0, num_oov_indices=1) + def test_zero_max_tokens_fails(self): + with self.assertRaisesRegex(ValueError, ".*max_tokens.*"): + _ = integer_lookup.IntegerLookup(max_tokens=0, num_oov_indices=1) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class IntegerLookupSavingTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def tearDown(self): - keras.backend.clear_session() - gc.collect() - super(IntegerLookupSavingTest, self).tearDown() - - def test_vocabulary_persistence_across_saving(self): - vocab_data = [42, 1138, 725, 1729] - input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = integer_lookup.IntegerLookup(max_tokens=None, num_oov_indices=1) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - # TODO(b/149526183): Can't clear session when TF2 is disabled. - if tf.__internal__.tf2.enabled(): - keras.backend.clear_session() - - loaded_model = keras.models.load_model( - output_path, - custom_objects={"IntegerLookup": integer_lookup.IntegerLookup}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllEqual(new_output_dataset, expected_output) +class IntegerLookupSavingTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def tearDown(self): + keras.backend.clear_session() + gc.collect() + super(IntegerLookupSavingTest, self).tearDown() + + def test_vocabulary_persistence_across_saving(self): + vocab_data = [42, 1138, 725, 1729] + input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = integer_lookup.IntegerLookup(max_tokens=None, num_oov_indices=1) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(output_dataset, expected_output) + + with self.subTest("keras_v3"): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_model.keras" + ) + model.save(output_path, save_format="keras_v3") + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IntegerLookup": integer_lookup.IntegerLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) + + with self.subTest("savedmodel"): + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_model" + ) + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + # TODO(b/149526183): Can't clear session when TF2 is disabled. + if tf.__internal__.tf2.enabled(): + keras.backend.clear_session() + + loaded_model = keras.models.load_model( + output_path, + custom_objects={"IntegerLookup": integer_lookup.IntegerLookup}, + ) + + # Ensure that the loaded model is unique + # (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllEqual(new_output_dataset, expected_output) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/normalization.py b/keras/layers/preprocessing/normalization.py index 52b25ed56651..c105877d8d64 100644 --- a/keras/layers/preprocessing/normalization.py +++ b/keras/layers/preprocessing/normalization.py @@ -14,335 +14,380 @@ # ============================================================================== """Normalization preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import numpy as np +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils -import numpy as np -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Normalization', - 'keras.layers.experimental.preprocessing.Normalization') +@keras_export( + "keras.layers.Normalization", + "keras.layers.experimental.preprocessing.Normalization", +) class Normalization(base_preprocessing_layer.PreprocessingLayer): - """A preprocessing layer which normalizes continuous features. - - This layer will shift and scale inputs into a distribution centered around - 0 with standard deviation 1. It accomplishes this by precomputing the mean and - variance of the data, and calling `(input - mean) / sqrt(var)` at runtime. - - The mean and variance values for the layer must be either supplied on - construction or learned via `adapt()`. `adapt()` will compute the mean and - variance of the data and store them as the layer's weights. `adapt()` should - be called before `fit()`, `evaluate()`, or `predict()`. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - axis: Integer, tuple of integers, or None. The axis or axes that should - have a separate mean and variance for each index in the shape. For - example, if shape is `(None, 5)` and `axis=1`, the layer will track 5 - separate mean and variance values for the last axis. If `axis` is set to - `None`, the layer will normalize all elements in the input by a scalar - mean and variance. Defaults to -1, where the last axis of the input is - assumed to be a feature dimension and is normalized per index. Note that - in the specific case of batched scalar inputs where the only axis is the - batch axis, the default will normalize each index in the batch - separately. In this case, consider passing `axis=None`. - mean: The mean value(s) to use during normalization. The passed value(s) - will be broadcast to the shape of the kept axes above; if the value(s) - cannot be broadcast, an error will be raised when this layer's `build()` - method is called. - variance: The variance value(s) to use during normalization. The passed - value(s) will be broadcast to the shape of the kept axes above; if the - value(s) cannot be broadcast, an error will be raised when this layer's - `build()` method is called. - invert: If True, this layer will apply the inverse transformation - to its inputs: it would turn a normalized input back into its - original form. - - Examples: - - Calculate a global mean and variance by analyzing the dataset in `adapt()`. - - >>> adapt_data = np.array([1., 2., 3., 4., 5.], dtype='float32') - >>> input_data = np.array([1., 2., 3.], dtype='float32') - >>> layer = tf.keras.layers.Normalization(axis=None) - >>> layer.adapt(adapt_data) - >>> layer(input_data) - - - Calculate a mean and variance for each index on the last axis. - - >>> adapt_data = np.array([[0., 7., 4.], - ... [2., 9., 6.], - ... [0., 7., 4.], - ... [2., 9., 6.]], dtype='float32') - >>> input_data = np.array([[0., 7., 4.]], dtype='float32') - >>> layer = tf.keras.layers.Normalization(axis=-1) - >>> layer.adapt(adapt_data) - >>> layer(input_data) - - - Pass the mean and variance directly. - - >>> input_data = np.array([[1.], [2.], [3.]], dtype='float32') - >>> layer = tf.keras.layers.Normalization(mean=3., variance=2.) - >>> layer(input_data) - - - Use the layer to de-normalize inputs (after adapting the layer). - - >>> adapt_data = np.array([[0., 7., 4.], - ... [2., 9., 6.], - ... [0., 7., 4.], - ... [2., 9., 6.]], dtype='float32') - >>> input_data = np.array([[1., 2., 3.]], dtype='float32') - >>> layer = tf.keras.layers.Normalization(axis=-1, invert=True) - >>> layer.adapt(adapt_data) - >>> layer(input_data) - - """ - - def __init__(self, axis=-1, mean=None, variance=None, invert=False, **kwargs): - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell('Normalization').set(True) - - # Standardize `axis` to a tuple. - if axis is None: - axis = () - elif isinstance(axis, int): - axis = (axis,) - else: - axis = tuple(axis) - self.axis = axis - - # Set `mean` and `variance` if passed. - if isinstance(mean, tf.Variable): - raise ValueError('Normalization does not support passing a Variable ' - 'for the `mean` init arg.') - if isinstance(variance, tf.Variable): - raise ValueError('Normalization does not support passing a Variable ' - 'for the `variance` init arg.') - if (mean is not None) != (variance is not None): - raise ValueError( - 'When setting values directly, both `mean` and `variance` ' - 'must be set. Got mean: {} and variance: {}'.format(mean, variance)) - self.input_mean = mean - self.input_variance = variance - self.invert = invert - - def build(self, input_shape): - super().build(input_shape) - - if (isinstance(input_shape, (list, tuple)) and - all(isinstance(shape, tf.TensorShape) for shape in input_shape)): - raise ValueError('Normalization only accepts a single input. If you are ' - 'passing a python list or tuple as a single input, ' - 'please convert to a numpy array or `tf.Tensor`.') - - input_shape = tf.TensorShape(input_shape).as_list() - ndim = len(input_shape) - - if any(a < -ndim or a >= ndim for a in self.axis): - raise ValueError('All `axis` values must be in the range [-ndim, ndim). ' - 'Found ndim: `{}`, axis: {}'.format(ndim, self.axis)) - - # Axes to be kept, replacing negative values with positive equivalents. - # Sorted to avoid transposing axes. - self._keep_axis = sorted([d if d >= 0 else d + ndim for d in self.axis]) - # All axes to be kept should have known shape. - for d in self._keep_axis: - if input_shape[d] is None: - raise ValueError( - 'All `axis` values to be kept must have known shape. Got axis: {}, ' - 'input shape: {}, with unknown axis at index: {}'.format( - self.axis, input_shape, d)) - # Axes to be reduced. - self._reduce_axis = [d for d in range(ndim) if d not in self._keep_axis] - # 1 if an axis should be reduced, 0 otherwise. - self._reduce_axis_mask = [ - 0 if d in self._keep_axis else 1 for d in range(ndim) - ] - # Broadcast any reduced axes. - self._broadcast_shape = [ - input_shape[d] if d in self._keep_axis else 1 for d in range(ndim) - ] - mean_and_var_shape = tuple(input_shape[d] for d in self._keep_axis) - - if self.input_mean is None: - self.adapt_mean = self.add_weight( - name='mean', - shape=mean_and_var_shape, - dtype=self.compute_dtype, - initializer='zeros', - trainable=False) - self.adapt_variance = self.add_weight( - name='variance', - shape=mean_and_var_shape, - dtype=self.compute_dtype, - initializer='ones', - trainable=False) - self.count = self.add_weight( - name='count', - shape=(), - dtype=tf.int64, - initializer='zeros', - trainable=False) - self.finalize_state() - else: - # In the no adapt case, make constant tensors for mean and variance with - # proper broadcast shape for use during call. - mean = self.input_mean * np.ones(mean_and_var_shape) - variance = self.input_variance * np.ones(mean_and_var_shape) - mean = tf.reshape(mean, self._broadcast_shape) - variance = tf.reshape(variance, self._broadcast_shape) - self.mean = tf.cast(mean, self.compute_dtype) - self.variance = tf.cast(variance, self.compute_dtype) - - # We override this method solely to generate a docstring. - def adapt(self, data, batch_size=None, steps=None): - """Computes the mean and variance of values in a dataset. - - Calling `adapt()` on a `Normalization` layer is an alternative to passing in - `mean` and `variance` arguments during layer construction. A `Normalization` - layer should always either be adapted over a dataset or passed `mean` and - `variance`. - - During `adapt()`, the layer will compute a `mean` and `variance` separately - for each position in each axis specified by the `axis` argument. To - calculate a single `mean` and `variance` over the input data, simply pass - `axis=None`. - - In order to make `Normalization` efficient in any distribution context, the - computed mean and variance are kept static with respect to any compiled - `tf.Graph`s that call the layer. As a consequence, if the layer is adapted a - second time, any models using the layer should be re-compiled. For more - information see - `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. - - `adapt()` is meant only as a single machine utility to compute layer state. - To analyze a dataset that cannot fit on a single machine, see - [Tensorflow Transform](https://www.tensorflow.org/tfx/transform/get_started) - for a multi-machine, map-reduce solution. - - Arguments: - data: The data to train on. It can be passed either as a - `tf.data.Dataset`, or as a numpy array. - batch_size: Integer or `None`. - Number of samples per state update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - steps: Integer or `None`. - Total number of steps (batches of samples) - When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps' is None, the epoch will run until - the input dataset is exhausted. When passing an infinitely - repeating dataset, you must specify the `steps` argument. This - argument is not supported with array inputs. + """A preprocessing layer which normalizes continuous features. + + This layer will shift and scale inputs into a distribution centered around + 0 with standard deviation 1. It accomplishes this by precomputing the mean + and variance of the data, and calling `(input - mean) / sqrt(var)` at + runtime. + + The mean and variance values for the layer must be either supplied on + construction or learned via `adapt()`. `adapt()` will compute the mean and + variance of the data and store them as the layer's weights. `adapt()` should + be called before `fit()`, `evaluate()`, or `predict()`. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + axis: Integer, tuple of integers, or None. The axis or axes that should + have a separate mean and variance for each index in the shape. For + example, if shape is `(None, 5)` and `axis=1`, the layer will track 5 + separate mean and variance values for the last axis. If `axis` is set + to `None`, the layer will normalize all elements in the input by a + scalar mean and variance. When `-1` the last axis of the + input is assumed to be a feature dimension and is normalized per + index. Note that in the specific case of batched scalar inputs where + the only axis is the batch axis, the default will normalize each index + in the batch separately. In this case, consider passing `axis=None`. + Defaults to `-1`. + mean: The mean value(s) to use during normalization. The passed value(s) + will be broadcast to the shape of the kept axes above; if the value(s) + cannot be broadcast, an error will be raised when this layer's + `build()` method is called. + variance: The variance value(s) to use during normalization. The passed + value(s) will be broadcast to the shape of the kept axes above; if the + value(s) cannot be broadcast, an error will be raised when this + layer's `build()` method is called. + invert: If True, this layer will apply the inverse transformation + to its inputs: it would turn a normalized input back into its + original form. + + Examples: + + Calculate a global mean and variance by analyzing the dataset in `adapt()`. + + >>> adapt_data = np.array([1., 2., 3., 4., 5.], dtype='float32') + >>> input_data = np.array([1., 2., 3.], dtype='float32') + >>> layer = tf.keras.layers.Normalization(axis=None) + >>> layer.adapt(adapt_data) + >>> layer(input_data) + + + Calculate a mean and variance for each index on the last axis. + + >>> adapt_data = np.array([[0., 7., 4.], + ... [2., 9., 6.], + ... [0., 7., 4.], + ... [2., 9., 6.]], dtype='float32') + >>> input_data = np.array([[0., 7., 4.]], dtype='float32') + >>> layer = tf.keras.layers.Normalization(axis=-1) + >>> layer.adapt(adapt_data) + >>> layer(input_data) + + + Pass the mean and variance directly. + + >>> input_data = np.array([[1.], [2.], [3.]], dtype='float32') + >>> layer = tf.keras.layers.Normalization(mean=3., variance=2.) + >>> layer(input_data) + + + Use the layer to de-normalize inputs (after adapting the layer). + + >>> adapt_data = np.array([[0., 7., 4.], + ... [2., 9., 6.], + ... [0., 7., 4.], + ... [2., 9., 6.]], dtype='float32') + >>> input_data = np.array([[1., 2., 3.]], dtype='float32') + >>> layer = tf.keras.layers.Normalization(axis=-1, invert=True) + >>> layer.adapt(adapt_data) + >>> layer(input_data) + """ - super().adapt(data, batch_size=batch_size, steps=steps) - - def update_state(self, data): - if self.input_mean is not None: - raise ValueError( - 'Cannot `adapt` a Normalization layer that is initialized with ' - 'static `mean` and `variance`, you passed mean {} and variance {}.' - .format(self.input_mean, self.input_variance)) - - if not self.built: - raise RuntimeError('`build` must be called before `update_state`.') - - data = self._standardize_inputs(data) - data = tf.cast(data, self.adapt_mean.dtype) - batch_mean, batch_variance = tf.nn.moments(data, axes=self._reduce_axis) - batch_shape = tf.shape(data, out_type=self.count.dtype) - if self._reduce_axis: - batch_reduce_shape = tf.gather(batch_shape, self._reduce_axis) - batch_count = tf.reduce_prod(batch_reduce_shape) - else: - batch_count = 1 - - total_count = batch_count + self.count - batch_weight = ( - tf.cast(batch_count, dtype=self.compute_dtype) / - tf.cast(total_count, dtype=self.compute_dtype)) - existing_weight = 1. - batch_weight - - total_mean = self.adapt_mean * existing_weight + batch_mean * batch_weight - # The variance is computed using the lack-of-fit sum of squares - # formula (see https://en.wikipedia.org/wiki/Lack-of-fit_sum_of_squares). - total_variance = ((self.adapt_variance + - (self.adapt_mean - total_mean)**2) * existing_weight + - (batch_variance + - (batch_mean - total_mean)**2) * batch_weight) - self.adapt_mean.assign(total_mean) - self.adapt_variance.assign(total_variance) - self.count.assign(total_count) - - def reset_state(self): # pylint: disable=method-hidden - if self.input_mean is not None or not self.built: - return - - self.adapt_mean.assign(tf.zeros_like(self.adapt_mean)) - self.adapt_variance.assign(tf.ones_like(self.adapt_variance)) - self.count.assign(tf.zeros_like(self.count)) - - def finalize_state(self): - if self.input_mean is not None or not self.built: - return - - # In the adapt case, we make constant tensors for mean and variance with - # proper broadcast shape and dtype each time `finalize_state` is called. - self.mean = tf.reshape(self.adapt_mean, self._broadcast_shape) - self.mean = tf.cast(self.mean, self.compute_dtype) - self.variance = tf.reshape(self.adapt_variance, self._broadcast_shape) - self.variance = tf.cast(self.variance, self.compute_dtype) - - def call(self, inputs): - inputs = self._standardize_inputs(inputs) - # The base layer automatically casts floating-point inputs, but we - # explicitly cast here to also allow integer inputs to be passed - inputs = tf.cast(inputs, self.compute_dtype) - if self.invert: - return ((inputs + self.mean) * - tf.maximum(tf.sqrt(self.variance), backend.epsilon())) - else: - return ((inputs - self.mean) / - tf.maximum(tf.sqrt(self.variance), backend.epsilon())) - - def compute_output_shape(self, input_shape): - return input_shape - - def compute_output_signature(self, input_spec): - return input_spec - - def get_config(self): - config = super().get_config() - config.update({ - 'axis': self.axis, - 'mean': utils.listify_tensors(self.input_mean), - 'variance': utils.listify_tensors(self.input_variance), - }) - return config - - def _standardize_inputs(self, inputs): - inputs = tf.convert_to_tensor(inputs) - if inputs.dtype != self.compute_dtype: - inputs = tf.cast(inputs, self.compute_dtype) - return inputs + + def __init__( + self, axis=-1, mean=None, variance=None, invert=False, **kwargs + ): + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell("Normalization").set( + True + ) + + # Standardize `axis` to a tuple. + if axis is None: + axis = () + elif isinstance(axis, int): + axis = (axis,) + else: + axis = tuple(axis) + self.axis = axis + + # Set `mean` and `variance` if passed. + if isinstance(mean, tf.Variable): + raise ValueError( + "Normalization does not support passing a Variable " + "for the `mean` init arg." + ) + if isinstance(variance, tf.Variable): + raise ValueError( + "Normalization does not support passing a Variable " + "for the `variance` init arg." + ) + if (mean is not None) != (variance is not None): + raise ValueError( + "When setting values directly, both `mean` and `variance` " + "must be set. Got mean: {} and variance: {}".format( + mean, variance + ) + ) + self.input_mean = mean + self.input_variance = variance + self.invert = invert + + def build(self, input_shape): + super().build(input_shape) + + if isinstance(input_shape, (list, tuple)) and all( + isinstance(shape, tf.TensorShape) for shape in input_shape + ): + raise ValueError( + "Normalization only accepts a single input. If you are " + "passing a python list or tuple as a single input, " + "please convert to a numpy array or `tf.Tensor`." + ) + + input_shape = tf.TensorShape(input_shape).as_list() + ndim = len(input_shape) + + if any(a < -ndim or a >= ndim for a in self.axis): + raise ValueError( + "All `axis` values must be in the range [-ndim, ndim). " + "Found ndim: `{}`, axis: {}".format(ndim, self.axis) + ) + + # Axes to be kept, replacing negative values with positive equivalents. + # Sorted to avoid transposing axes. + self._keep_axis = sorted([d if d >= 0 else d + ndim for d in self.axis]) + # All axes to be kept should have known shape. + for d in self._keep_axis: + if input_shape[d] is None: + raise ValueError( + "All `axis` values to be kept must have known shape. " + "Got axis: {}, " + "input shape: {}, with unknown axis at index: {}".format( + self.axis, input_shape, d + ) + ) + # Axes to be reduced. + self._reduce_axis = [d for d in range(ndim) if d not in self._keep_axis] + # 1 if an axis should be reduced, 0 otherwise. + self._reduce_axis_mask = [ + 0 if d in self._keep_axis else 1 for d in range(ndim) + ] + # Broadcast any reduced axes. + self._broadcast_shape = [ + input_shape[d] if d in self._keep_axis else 1 for d in range(ndim) + ] + mean_and_var_shape = tuple(input_shape[d] for d in self._keep_axis) + + if self.input_mean is None: + self.adapt_mean = self.add_weight( + name="mean", + shape=mean_and_var_shape, + dtype=self.compute_dtype, + initializer="zeros", + trainable=False, + ) + self.adapt_variance = self.add_weight( + name="variance", + shape=mean_and_var_shape, + dtype=self.compute_dtype, + initializer="ones", + trainable=False, + ) + self.count = self.add_weight( + name="count", + shape=(), + dtype=tf.int64, + initializer="zeros", + trainable=False, + ) + self.finalize_state() + else: + # In the no adapt case, make constant tensors for mean and variance + # with proper broadcast shape for use during call. + mean = self.input_mean * np.ones(mean_and_var_shape) + variance = self.input_variance * np.ones(mean_and_var_shape) + mean = tf.reshape(mean, self._broadcast_shape) + variance = tf.reshape(variance, self._broadcast_shape) + self.mean = tf.cast(mean, self.compute_dtype) + self.variance = tf.cast(variance, self.compute_dtype) + + # We override this method solely to generate a docstring. + def adapt(self, data, batch_size=None, steps=None): + """Computes the mean and variance of values in a dataset. + + Calling `adapt()` on a `Normalization` layer is an alternative to + passing in `mean` and `variance` arguments during layer construction. A + `Normalization` layer should always either be adapted over a dataset or + passed `mean` and `variance`. + + During `adapt()`, the layer will compute a `mean` and `variance` + separately for each position in each axis specified by the `axis` + argument. To calculate a single `mean` and `variance` over the input + data, simply pass `axis=None`. + + In order to make `Normalization` efficient in any distribution context, + the computed mean and variance are kept static with respect to any + compiled `tf.Graph`s that call the layer. As a consequence, if the layer + is adapted a second time, any models using the layer should be + re-compiled. For more information see + `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. + + `adapt()` is meant only as a single machine utility to compute layer + state. To analyze a dataset that cannot fit on a single machine, see + [Tensorflow Transform]( + https://www.tensorflow.org/tfx/transform/get_started) + for a multi-machine, map-reduce solution. + + Arguments: + data: The data to train on. It can be passed either as a + `tf.data.Dataset`, or as a numpy array. + batch_size: Integer or `None`. + Number of samples per state update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.Sequence` instances + (since they generate batches). + steps: Integer or `None`. + Total number of steps (batches of samples) + When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps' is None, the epoch will run until + the input dataset is exhausted. When passing an infinitely + repeating dataset, you must specify the `steps` argument. This + argument is not supported with array inputs. + """ + super().adapt(data, batch_size=batch_size, steps=steps) + + def update_state(self, data): + if self.input_mean is not None: + raise ValueError( + "Cannot `adapt` a Normalization layer that is initialized with " + "static `mean` and `variance`, " + "you passed mean {} and variance {}.".format( + self.input_mean, self.input_variance + ) + ) + + if not self.built: + raise RuntimeError("`build` must be called before `update_state`.") + + data = self._standardize_inputs(data) + data = tf.cast(data, self.adapt_mean.dtype) + batch_mean, batch_variance = tf.nn.moments(data, axes=self._reduce_axis) + batch_shape = tf.shape(data, out_type=self.count.dtype) + if self._reduce_axis: + batch_reduce_shape = tf.gather(batch_shape, self._reduce_axis) + batch_count = tf.reduce_prod(batch_reduce_shape) + else: + batch_count = 1 + + total_count = batch_count + self.count + batch_weight = tf.cast(batch_count, dtype=self.compute_dtype) / tf.cast( + total_count, dtype=self.compute_dtype + ) + existing_weight = 1.0 - batch_weight + + total_mean = ( + self.adapt_mean * existing_weight + batch_mean * batch_weight + ) + # The variance is computed using the lack-of-fit sum of squares + # formula (see + # https://en.wikipedia.org/wiki/Lack-of-fit_sum_of_squares). + total_variance = ( + self.adapt_variance + (self.adapt_mean - total_mean) ** 2 + ) * existing_weight + ( + batch_variance + (batch_mean - total_mean) ** 2 + ) * batch_weight + self.adapt_mean.assign(total_mean) + self.adapt_variance.assign(total_variance) + self.count.assign(total_count) + + def reset_state(self): + if self.input_mean is not None or not self.built: + return + + self.adapt_mean.assign(tf.zeros_like(self.adapt_mean)) + self.adapt_variance.assign(tf.ones_like(self.adapt_variance)) + self.count.assign(tf.zeros_like(self.count)) + + def finalize_state(self): + if self.input_mean is not None or not self.built: + return + + # In the adapt case, we make constant tensors for mean and variance with + # proper broadcast shape and dtype each time `finalize_state` is called. + self.mean = tf.reshape(self.adapt_mean, self._broadcast_shape) + self.mean = tf.cast(self.mean, self.compute_dtype) + self.variance = tf.reshape(self.adapt_variance, self._broadcast_shape) + self.variance = tf.cast(self.variance, self.compute_dtype) + + def call(self, inputs): + inputs = self._standardize_inputs(inputs) + # The base layer automatically casts floating-point inputs, but we + # explicitly cast here to also allow integer inputs to be passed + inputs = tf.cast(inputs, self.compute_dtype) + if self.invert: + return self.mean + ( + inputs * tf.maximum(tf.sqrt(self.variance), backend.epsilon()) + ) + else: + return (inputs - self.mean) / tf.maximum( + tf.sqrt(self.variance), backend.epsilon() + ) + + def compute_output_shape(self, input_shape): + return input_shape + + def compute_output_signature(self, input_spec): + return input_spec + + def get_config(self): + config = super().get_config() + config.update( + { + "axis": self.axis, + "invert": self.invert, + "mean": utils.listify_tensors(self.input_mean), + "variance": utils.listify_tensors(self.input_variance), + } + ) + return config + + def _standardize_inputs(self, inputs): + inputs = tf.convert_to_tensor(inputs) + if inputs.dtype != self.compute_dtype: + inputs = tf.cast(inputs, self.compute_dtype) + return inputs + + def load_own_variables(self, store): + # Ensure that we call finalize_state after variable loading. + super().load_own_variables(store) + self.finalize_state() diff --git a/keras/layers/preprocessing/normalization_distribution_test.py b/keras/layers/preprocessing/normalization_distribution_test.py index 3562aaba3e58..3d8e08aacf44 100644 --- a/keras/layers/preprocessing/normalization_distribution_test.py +++ b/keras/layers/preprocessing/normalization_distribution_test.py @@ -15,110 +15,145 @@ """Distribution tests for keras.layers.preprocessing.normalization.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.distribute import strategy_combinations from keras.layers.preprocessing import normalization from keras.layers.preprocessing import preprocessing_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf def _get_layer_computation_test_cases(): - test_cases = ({ - "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32), - "axis": -1, - "test_data": np.array([[1.], [2.], [3.]], np.float32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_single_element" - }, { - "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32), - "axis": None, - "test_data": np.array([[1.], [2.], [3.]], np.float32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_single_element_none_axis" - }, { - "adapt_data": np.array([[1., 2., 3., 4., 5.]], dtype=np.float32), - "axis": None, - "test_data": np.array([[1.], [2.], [3.]], np.float32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_single_element_none_axis_flat_data" - }, { - "adapt_data": - np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]], - np.float32), - "axis": - 1, - "test_data": - np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]], - np.float32), - "expected": - np.array([[[-1.549193, -0.774597, 0.], [-1.549193, -0.774597, 0.]], - [[0., 0.774597, 1.549193], [0., 0.774597, 1.549193]]], - np.float32), - "testcase_name": - "3d_internal_axis" - }, { - "adapt_data": - np.array( - [[[1., 0., 3.], [2., 3., 4.]], [[3., -1., 5.], [4., 5., 8.]]], - np.float32), - "axis": (1, 2), - "test_data": - np.array( - [[[3., 1., -1.], [2., 5., 4.]], [[3., 0., 5.], [2., 5., 8.]]], - np.float32), - "expected": - np.array( - [[[1., 3., -5.], [-1., 1., -1.]], [[1., 1., 1.], [-1., 1., 1.]]], - np.float32), - "testcase_name": - "3d_multiple_axis" - }) + test_cases = ( + { + "adapt_data": np.array( + [[1.0], [2.0], [3.0], [4.0], [5.0]], dtype=np.float32 + ), + "axis": -1, + "test_data": np.array([[1.0], [2.0], [3.0]], np.float32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_single_element", + }, + { + "adapt_data": np.array( + [[1.0], [2.0], [3.0], [4.0], [5.0]], dtype=np.float32 + ), + "axis": None, + "test_data": np.array([[1.0], [2.0], [3.0]], np.float32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_single_element_none_axis", + }, + { + "adapt_data": np.array( + [[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float32 + ), + "axis": None, + "test_data": np.array([[1.0], [2.0], [3.0]], np.float32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_single_element_none_axis_flat_data", + }, + { + "adapt_data": np.array( + [ + [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], + [[3.0, 4.0, 5.0], [4.0, 5.0, 6.0]], + ], + np.float32, + ), + "axis": 1, + "test_data": np.array( + [ + [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], + [[3.0, 4.0, 5.0], [4.0, 5.0, 6.0]], + ], + np.float32, + ), + "expected": np.array( + [ + [[-1.549193, -0.774597, 0.0], [-1.549193, -0.774597, 0.0]], + [[0.0, 0.774597, 1.549193], [0.0, 0.774597, 1.549193]], + ], + np.float32, + ), + "testcase_name": "3d_internal_axis", + }, + { + "adapt_data": np.array( + [ + [[1.0, 0.0, 3.0], [2.0, 3.0, 4.0]], + [[3.0, -1.0, 5.0], [4.0, 5.0, 8.0]], + ], + np.float32, + ), + "axis": (1, 2), + "test_data": np.array( + [ + [[3.0, 1.0, -1.0], [2.0, 5.0, 4.0]], + [[3.0, 0.0, 5.0], [2.0, 5.0, 8.0]], + ], + np.float32, + ), + "expected": np.array( + [ + [[1.0, 3.0, -5.0], [-1.0, 1.0, -1.0]], + [[1.0, 1.0, 1.0], [-1.0, 1.0, 1.0]], + ], + np.float32, + ), + "testcase_name": "3d_multiple_axis", + }, + ) - crossed_test_cases = [] - # Cross above test cases with use_dataset in (True, False) - for use_dataset in (True, False): - for case in test_cases: - case = case.copy() - if use_dataset: - case["testcase_name"] = case["testcase_name"] + "_with_dataset" - case["use_dataset"] = use_dataset - crossed_test_cases.append(case) + crossed_test_cases = [] + # Cross above test cases with use_dataset in (True, False) + for use_dataset in (True, False): + for case in test_cases: + case = case.copy() + if use_dataset: + case["testcase_name"] = case["testcase_name"] + "_with_dataset" + case["use_dataset"] = use_dataset + crossed_test_cases.append(case) - return crossed_test_cases + return crossed_test_cases @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.times( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_single_worker + - strategy_combinations.parameter_server_strategies_multi_worker, - mode=["eager"]), _get_layer_computation_test_cases())) -class NormalizationTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_layer_computation(self, strategy, adapt_data, axis, test_data, - use_dataset, expected): - input_shape = tuple([None for _ in range(test_data.ndim - 1)]) - if use_dataset: - # Keras APIs expect batched datasets - adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch(2) - test_data = tf.data.Dataset.from_tensor_slices(test_data).batch(2) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_single_worker + + strategy_combinations.parameter_server_strategies_multi_worker, + mode=["eager"], + ), + _get_layer_computation_test_cases(), + ) +) +class NormalizationTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_layer_computation( + self, strategy, adapt_data, axis, test_data, use_dataset, expected + ): + input_shape = tuple([None for _ in range(test_data.ndim - 1)]) + if use_dataset: + # Keras APIs expect batched datasets + adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch(2) + test_data = tf.data.Dataset.from_tensor_slices(test_data).batch(2) - with strategy.scope(): - input_data = keras.Input(shape=input_shape) - layer = normalization.Normalization(axis=axis) - layer.adapt(adapt_data) - output = layer(input_data) - model = keras.Model(input_data, output) - output_data = model.predict(test_data) - self.assertAllClose(expected, output_data) + with strategy.scope(): + input_data = keras.Input(shape=input_shape) + layer = normalization.Normalization(axis=axis) + layer.adapt(adapt_data) + output = layer(input_data) + model = keras.Model(input_data, output) + output_data = model.predict(test_data) + self.assertAllClose(expected, output_data) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/normalization_test.py b/keras/layers/preprocessing/normalization_test.py index 856cb8959338..d948f34d38fa 100644 --- a/keras/layers/preprocessing/normalization_test.py +++ b/keras/layers/preprocessing/normalization_test.py @@ -14,420 +14,530 @@ # ============================================================================== """Tests for keras.layers.preprocessing.normalization.""" -import tensorflow.compat.v2 as tf - import os -from absl.testing import parameterized - import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.layers.preprocessing import normalization from keras.layers.preprocessing import preprocessing_test_utils from keras.mixed_precision import policy +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils def _get_layer_computation_test_cases(): - test_cases = ({ - "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32), - "axis": -1, - "test_data": np.array([[1.], [2.], [3.]], np.float32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_single_element" - }, { - "adapt_data": np.array([[1], [2], [3], [4], [5]], dtype=np.int32), - "axis": -1, - "test_data": np.array([[1], [2], [3]], np.int32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_int_data" - }, { - "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32), - "axis": None, - "test_data": np.array([[1.], [2.], [3.]], np.float32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_single_element_none_axis" - }, { - "adapt_data": np.array([[1., 2., 3., 4., 5.]], dtype=np.float32), - "axis": None, - "test_data": np.array([[1.], [2.], [3.]], np.float32), - "expected": np.array([[-1.414214], [-.707107], [0]], np.float32), - "testcase_name": "2d_single_element_none_axis_flat_data" - }, { - "adapt_data": - np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]], - np.float32), - "axis": - 1, - "test_data": - np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]], - np.float32), - "expected": - np.array([[[-1.549193, -0.774597, 0.], [-1.549193, -0.774597, 0.]], - [[0., 0.774597, 1.549193], [0., 0.774597, 1.549193]]], - np.float32), - "testcase_name": - "3d_internal_axis" - }, { - "adapt_data": - np.array( - [[[1., 0., 3.], [2., 3., 4.]], [[3., -1., 5.], [4., 5., 8.]]], - np.float32), - "axis": (1, 2), - "test_data": - np.array( - [[[3., 1., -1.], [2., 5., 4.]], [[3., 0., 5.], [2., 5., 8.]]], - np.float32), - "expected": - np.array( - [[[1., 3., -5.], [-1., 1., -1.]], [[1., 1., 1.], [-1., 1., 1.]]], - np.float32), - "testcase_name": - "3d_multiple_axis" - }, { - "adapt_data": - np.zeros((3, 4)), - "axis": -1, - "test_data": - np.zeros((3, 4)), - "expected": - np.zeros((3, 4)), - "testcase_name": - "zero_variance" - }) - - crossed_test_cases = [] - # Cross above test cases with use_dataset in (True, False) - for use_dataset in (True, False): - for case in test_cases: - case = case.copy() - if use_dataset: - case["testcase_name"] = case["testcase_name"] + "_with_dataset" - case["use_dataset"] = use_dataset - crossed_test_cases.append(case) - - return crossed_test_cases + test_cases = ( + { + "adapt_data": np.array( + [[1.0], [2.0], [3.0], [4.0], [5.0]], dtype=np.float32 + ), + "axis": -1, + "test_data": np.array([[1.0], [2.0], [3.0]], np.float32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_single_element", + }, + { + "adapt_data": np.array([[1], [2], [3], [4], [5]], dtype=np.int32), + "axis": -1, + "test_data": np.array([[1], [2], [3]], np.int32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_int_data", + }, + { + "adapt_data": np.array( + [[1.0], [2.0], [3.0], [4.0], [5.0]], dtype=np.float32 + ), + "axis": None, + "test_data": np.array([[1.0], [2.0], [3.0]], np.float32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_single_element_none_axis", + }, + { + "adapt_data": np.array( + [[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float32 + ), + "axis": None, + "test_data": np.array([[1.0], [2.0], [3.0]], np.float32), + "expected": np.array([[-1.414214], [-0.707107], [0]], np.float32), + "testcase_name": "2d_single_element_none_axis_flat_data", + }, + { + "adapt_data": np.array( + [ + [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], + [[3.0, 4.0, 5.0], [4.0, 5.0, 6.0]], + ], + np.float32, + ), + "axis": 1, + "test_data": np.array( + [ + [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], + [[3.0, 4.0, 5.0], [4.0, 5.0, 6.0]], + ], + np.float32, + ), + "expected": np.array( + [ + [[-1.549193, -0.774597, 0.0], [-1.549193, -0.774597, 0.0]], + [[0.0, 0.774597, 1.549193], [0.0, 0.774597, 1.549193]], + ], + np.float32, + ), + "testcase_name": "3d_internal_axis", + }, + { + "adapt_data": np.array( + [ + [[1.0, 0.0, 3.0], [2.0, 3.0, 4.0]], + [[3.0, -1.0, 5.0], [4.0, 5.0, 8.0]], + ], + np.float32, + ), + "axis": (1, 2), + "test_data": np.array( + [ + [[3.0, 1.0, -1.0], [2.0, 5.0, 4.0]], + [[3.0, 0.0, 5.0], [2.0, 5.0, 8.0]], + ], + np.float32, + ), + "expected": np.array( + [ + [[1.0, 3.0, -5.0], [-1.0, 1.0, -1.0]], + [[1.0, 1.0, 1.0], [-1.0, 1.0, 1.0]], + ], + np.float32, + ), + "testcase_name": "3d_multiple_axis", + }, + { + "adapt_data": np.zeros((3, 4)), + "axis": -1, + "test_data": np.zeros((3, 4)), + "expected": np.zeros((3, 4)), + "testcase_name": "zero_variance", + }, + ) + + crossed_test_cases = [] + # Cross above test cases with use_dataset in (True, False) + for use_dataset in (True, False): + for case in test_cases: + case = case.copy() + if use_dataset: + case["testcase_name"] = case["testcase_name"] + "_with_dataset" + case["use_dataset"] = use_dataset + crossed_test_cases.append(case) + + return crossed_test_cases @test_combinations.run_all_keras_modes -class NormalizationTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_broadcasting_during_direct_setting(self): - layer = normalization.Normalization(axis=-1, mean=[1.0], variance=[1.0]) - output = layer(np.array([[1., 2.]])) - expected_output = [[0., 1.]] - self.assertAllClose(output, expected_output) - self.assertAllClose(layer.get_weights(), []) - - def test_broadcasting_during_direct_setting_with_tensors(self): - if not tf.executing_eagerly(): - self.skipTest("Only supported in TF2.") - - layer = normalization.Normalization( - axis=-1, - mean=tf.constant([1.0]), - variance=tf.constant([1.0])) - output = layer(np.array([[1., 2.]])) - expected_output = [[0., 1.]] - self.assertAllClose(output, expected_output) - self.assertAllClose(layer.get_weights(), []) - - def test_1d_data(self): - data = np.array([0., 2., 0., 2.]) - layer = normalization.Normalization(mean=1.0, variance=1.0) - output = layer(data) - self.assertListEqual(output.shape.as_list(), [4]) - self.assertAllClose(output, [-1, 1, -1, 1]) - - def test_0d_data(self): - layer = normalization.Normalization(axis=None, mean=1.0, variance=1.0) - output = layer(0.) - self.assertListEqual(output.shape.as_list(), []) - self.assertAllClose(output, -1) - - def test_broadcasting_during_direct_setting_with_variables_fails(self): - with self.assertRaisesRegex(ValueError, "passing a Variable"): - _ = normalization.Normalization( - axis=-1, - mean=tf.Variable([1.0]), - variance=tf.Variable([2.0])) - - def test_keeping_an_unknown_axis_fails(self): - layer = normalization.Normalization(axis=-1) - with self.assertRaisesRegex(ValueError, "axis.*must have known shape"): - layer.build([None]) - - @parameterized.parameters( - # Out of bounds - {"axis": 3}, - {"axis": -4}, - # In a tuple - {"axis": (1, 3)}, - {"axis": (1, -4)}, - ) - def test_bad_axis_fail_build(self, axis): - layer = normalization.Normalization(axis=axis) - with self.assertRaisesRegex(ValueError, "in the range"): - layer.build([None, 2, 3]) - - def test_list_input(self): - with self.assertRaisesRegex( - ValueError, ("Normalization only accepts a single input. If you are " - "passing a python list or tuple as a single input, " - "please convert to a numpy array or `tf.Tensor`.")): - normalization.Normalization()([1, 2, 3]) - - def test_scalar_input(self): - with self.assertRaisesRegex(ValueError, - "axis.*values must be in the range"): - normalization.Normalization()(1) - - def test_output_dtype(self): - if not tf.__internal__.tf2.enabled(): - self.skipTest("set_global_policy only supported in TF2.") - # Output should respect an explicit dtype, and default to the global policy. - policy.set_global_policy("float64") - input_data = keras.Input(batch_size=16, shape=(1,)) - layer = normalization.Normalization(mean=1.0, variance=1.0, dtype="float16") - output = layer(input_data) - self.assertAllEqual(output.dtype, tf.float16) - layer = normalization.Normalization(mean=1.0, variance=1.0) - output = layer(input_data) - self.assertAllEqual(output.dtype, tf.float64) - - def test_invert(self): - data = np.array([0., 2., 0., 2.]) - norm = normalization.Normalization(mean=1.0, variance=1.0) - inv_norm = normalization.Normalization(mean=1.0, variance=1.0, invert=True) - output = norm(data) - output2 = inv_norm(output) - self.assertListEqual(output2.shape.as_list(), [4]) - self.assertAllClose(output2, [0., 2., 0., 2.]) - - @test_utils.run_v2_only - def test_invert_adapt(self): - input_data = [[0.], [2.], [0.], [2.]] - norm = keras.layers.Normalization(axis=-1) - norm.adapt(input_data) - inv_norm = keras.layers.Normalization(axis=-1, invert=True) - inv_norm.adapt(input_data) - output = norm(input_data) - output2 = inv_norm(output) - self.assertAllClose(input_data, output2) +class NormalizationTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_broadcasting_during_direct_setting(self): + layer = normalization.Normalization(axis=-1, mean=[1.0], variance=[1.0]) + output = layer(np.array([[1.0, 2.0]])) + expected_output = [[0.0, 1.0]] + self.assertAllClose(output, expected_output) + self.assertAllClose(layer.get_weights(), []) + + def test_broadcasting_during_direct_setting_with_tensors(self): + if not tf.executing_eagerly(): + self.skipTest("Only supported in TF2.") + + layer = normalization.Normalization( + axis=-1, mean=tf.constant([1.0]), variance=tf.constant([1.0]) + ) + output = layer(np.array([[1.0, 2.0]])) + expected_output = [[0.0, 1.0]] + self.assertAllClose(output, expected_output) + self.assertAllClose(layer.get_weights(), []) + + def test_1d_data(self): + data = np.array([0.0, 2.0, 0.0, 2.0]) + layer = normalization.Normalization(mean=1.0, variance=1.0) + output = layer(data) + self.assertListEqual(output.shape.as_list(), [4]) + self.assertAllClose(output, [-1, 1, -1, 1]) + + def test_0d_data(self): + layer = normalization.Normalization(axis=None, mean=1.0, variance=1.0) + output = layer(0.0) + self.assertListEqual(output.shape.as_list(), []) + self.assertAllClose(output, -1) + + def test_broadcasting_during_direct_setting_with_variables_fails(self): + with self.assertRaisesRegex(ValueError, "passing a Variable"): + _ = normalization.Normalization( + axis=-1, mean=tf.Variable([1.0]), variance=tf.Variable([2.0]) + ) + + def test_keeping_an_unknown_axis_fails(self): + layer = normalization.Normalization(axis=-1) + with self.assertRaisesRegex(ValueError, "axis.*must have known shape"): + layer.build([None]) + + @parameterized.parameters( + # Out of bounds + {"axis": 3}, + {"axis": -4}, + # In a tuple + {"axis": (1, 3)}, + {"axis": (1, -4)}, + ) + def test_bad_axis_fail_build(self, axis): + layer = normalization.Normalization(axis=axis) + with self.assertRaisesRegex(ValueError, "in the range"): + layer.build([None, 2, 3]) + + def test_list_input(self): + with self.assertRaisesRegex( + ValueError, + "Normalization only accepts a single input. If you are " + "passing a python list or tuple as a single input, " + "please convert to a numpy array or `tf.Tensor`.", + ): + normalization.Normalization()([1, 2, 3]) + + def test_scalar_input(self): + with self.assertRaisesRegex( + ValueError, "axis.*values must be in the range" + ): + normalization.Normalization()(1) + + def test_output_dtype(self): + if not tf.__internal__.tf2.enabled(): + self.skipTest("set_global_policy only supported in TF2.") + # Output should respect an explicit dtype, and default to the global + # policy. + policy.set_global_policy("float64") + input_data = keras.Input(batch_size=16, shape=(1,)) + layer = normalization.Normalization( + mean=1.0, variance=1.0, dtype="float16" + ) + output = layer(input_data) + self.assertAllEqual(output.dtype, tf.float16) + layer = normalization.Normalization(mean=1.0, variance=1.0) + output = layer(input_data) + self.assertAllEqual(output.dtype, tf.float64) + + def test_invert(self): + input_data = np.array([0.0, 4.0, 0.0, 4.0]) + norm = normalization.Normalization(mean=2.0, variance=4.0) + inv_norm = normalization.Normalization( + mean=2.0, variance=4.0, invert=True + ) + output = norm(input_data) + output2 = inv_norm(output) + self.assertListEqual(output2.shape.as_list(), [4]) + self.assertAllClose(input_data, output2) + + @test_utils.run_v2_only + def test_invert_adapt(self): + input_data = [[0.0], [4.0], [0.0], [4.0]] + norm = keras.layers.Normalization(axis=-1) + norm.adapt(input_data) + inv_norm = keras.layers.Normalization(axis=-1, invert=True) + inv_norm.adapt(input_data) + output = norm(input_data) + output2 = inv_norm(output) + self.assertListEqual(output2.shape.as_list(), [4, 1]) + self.assertAllClose(input_data, output2) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class NormalizationAdaptTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_layer_api_compatibility(self): - cls = normalization.Normalization - output_data = test_utils.layer_test( - cls, - kwargs={"axis": -1}, - input_shape=(None, 3), - input_data=np.array([[3, 1, 2], [6, 5, 4]], dtype=np.float32), - validate_training=False, - adapt_data=np.array([[1, 2, 1], [2, 3, 4], [1, 2, 1], [2, 3, 4]])) - expected = np.array([[3., -3., -0.33333333], [9., 5., 1.]]) - self.assertAllClose(expected, output_data) - - @parameterized.named_parameters(*_get_layer_computation_test_cases()) - def test_layer_computation(self, adapt_data, axis, test_data, use_dataset, - expected): - input_shape = tuple([test_data.shape[i] for i in range(1, test_data.ndim)]) - if use_dataset: - # Keras APIs expect batched datasets - adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch( - test_data.shape[0] // 2) - test_data = tf.data.Dataset.from_tensor_slices(test_data).batch( - test_data.shape[0] // 2) - - layer = normalization.Normalization(axis=axis) - layer.adapt(adapt_data) - - input_data = keras.Input(shape=input_shape) - output = layer(input_data) - model = keras.Model(input_data, output) - model._run_eagerly = test_utils.should_run_eagerly() - output_data = model.predict(test_data) - self.assertAllClose(expected, output_data) - - def test_1d_unbatched_adapt(self): - ds = tf.data.Dataset.from_tensor_slices([ - [2., 0., 2., 0.], - [0., 2., 0., 2.], - ]) - layer = normalization.Normalization(axis=-1) - layer.adapt(ds) - output_ds = ds.map(layer) - self.assertAllClose( - list(output_ds.as_numpy_iterator()), [ - [1., -1., 1., -1.], - [-1., 1., -1., 1.], - ]) - - def test_0d_unbatched_adapt(self): - ds = tf.data.Dataset.from_tensor_slices([2., 0., 2., 0.]) - layer = normalization.Normalization(axis=None) - layer.adapt(ds) - output_ds = ds.map(layer) - self.assertAllClose(list(output_ds.as_numpy_iterator()), [1., -1., 1., -1.]) - - @parameterized.parameters( - # Results should be identical no matter how the axes are specified (3d). - {"axis": (1, 2)}, - {"axis": (2, 1)}, - {"axis": (1, -1)}, - {"axis": (-1, 1)}, - ) - def test_axis_permutations(self, axis): - layer = normalization.Normalization(axis=axis) - # data.shape = [2, 2, 3] - data = np.array([[[0., 1., 2.], [0., 2., 6.]], - [[2., 3., 4.], [3., 6., 10.]]]) - expect = np.array([[[-1., -1., -1.], [-1., -1., -1.]], - [[1., 1., 1.], [1., 1., 1.]]]) - layer.adapt(data) - self.assertAllClose(expect, layer(data)) - - def test_model_summary_after_layer_adapt(self): - data = np.array([[[0., 1., 2.], [0., 2., 6.]], - [[2., 3., 4.], [3., 6., 10.]]]) - layer = normalization.Normalization(axis=-1) - layer.adapt(data) - model = keras.Sequential( - [layer, - keras.layers.Dense(64, activation="relu"), - keras.layers.Dense(1)]) - model.summary() - - def test_multiple_adapts(self): - first_adapt = [[0], [2], [0], [2]] - second_adapt = [[2], [4], [2], [4]] - predict_input = [[2], [2]] - expected_first_output = [[1], [1]] - expected_second_output = [[-1], [-1]] - - inputs = keras.Input(shape=(1,), dtype=tf.int32) - layer = normalization.Normalization(axis=-1) - layer.adapt(first_adapt) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - actual_output = model.predict(predict_input) - self.assertAllClose(actual_output, expected_first_output) - - # Re-adapt the layer on new inputs. - layer.adapt(second_adapt) - # Re-compile the model. - model.compile() - # `predict` should now use the new model state. - actual_output = model.predict(predict_input) - self.assertAllClose(actual_output, expected_second_output) - - @parameterized.parameters( - {"adapted": True}, - {"adapted": False}, - ) - def test_saved_model_tf(self, adapted): - input_data = [[0.], [2.], [0.], [2.]] - expected_output = [[-1.], [1.], [-1.], [1.]] - - inputs = keras.Input(shape=(1,), dtype=tf.float32) - if adapted: - layer = normalization.Normalization(axis=-1) - layer.adapt(input_data) - else: - layer = normalization.Normalization(mean=1., variance=1.) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - output_data = model.predict(input_data) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_saved_model") - tf.saved_model.save(model, output_path) - loaded_model = tf.saved_model.load(output_path) - f = loaded_model.signatures["serving_default"] - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_data = f(tf.constant(input_data))["normalization"] - self.assertAllClose(new_output_data, expected_output) - - @parameterized.product( - save_format=["tf", "h5"], - adapt=[True, False], - ) - def test_saved_model_keras(self, save_format, adapt): - input_data = [[0.], [2.], [0.], [2.]] - expected_output = [[-1.], [1.], [-1.], [1.]] - - cls = normalization.Normalization - inputs = keras.Input(shape=(1,), dtype=tf.float32) - if adapt: - layer = cls(axis=-1) - layer.adapt(input_data) - else: - layer = cls(mean=1., variance=1.) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - output_data = model.predict(input_data) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format=format) - loaded_model = keras.models.load_model( - output_path, custom_objects={"Normalization": cls}) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_data = loaded_model.predict(input_data) - self.assertAllClose(new_output_data, expected_output) - - @parameterized.parameters( - {"adapted": True}, - {"adapted": False}, - ) - def test_saved_weights_keras(self, adapted): - input_data = [[0.], [2.], [0.], [2.]] - expected_output = [[-1.], [1.], [-1.], [1.]] - - cls = normalization.Normalization - inputs = keras.Input(shape=(1,), dtype=tf.float32) - if adapted: - layer = cls(axis=-1) - layer.adapt(input_data) - else: - layer = cls(mean=1., variance=1.) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - - output_data = model.predict(input_data) - self.assertAllClose(output_data, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_weights") - model.save_weights(output_path, save_format="tf") - new_model = keras.Model.from_config( - model.get_config(), custom_objects={"Normalization": cls}) - new_model.load_weights(output_path) - - # Validate correctness of the new model. - new_output_data = new_model.predict(input_data) - self.assertAllClose(new_output_data, expected_output) +class NormalizationAdaptTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_layer_api_compatibility(self): + cls = normalization.Normalization + output_data = test_utils.layer_test( + cls, + kwargs={"axis": -1}, + input_shape=(None, 3), + input_data=np.array([[3, 1, 2], [6, 5, 4]], dtype=np.float32), + validate_training=False, + adapt_data=np.array([[1, 2, 1], [2, 3, 4], [1, 2, 1], [2, 3, 4]]), + ) + expected = np.array([[3.0, -3.0, -0.33333333], [9.0, 5.0, 1.0]]) + self.assertAllClose(expected, output_data) + + @parameterized.named_parameters(*_get_layer_computation_test_cases()) + def test_layer_computation( + self, adapt_data, axis, test_data, use_dataset, expected + ): + input_shape = tuple( + [test_data.shape[i] for i in range(1, test_data.ndim)] + ) + if use_dataset: + # Keras APIs expect batched datasets + adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch( + test_data.shape[0] // 2 + ) + test_data = tf.data.Dataset.from_tensor_slices(test_data).batch( + test_data.shape[0] // 2 + ) + + layer = normalization.Normalization(axis=axis) + layer.adapt(adapt_data) + + input_data = keras.Input(shape=input_shape) + output = layer(input_data) + model = keras.Model(input_data, output) + model._run_eagerly = test_utils.should_run_eagerly() + output_data = model.predict(test_data) + self.assertAllClose(expected, output_data) + + def test_1d_unbatched_adapt(self): + ds = tf.data.Dataset.from_tensor_slices( + [ + [2.0, 0.0, 2.0, 0.0], + [0.0, 2.0, 0.0, 2.0], + ] + ) + layer = normalization.Normalization(axis=-1) + layer.adapt(ds) + output_ds = ds.map(layer) + self.assertAllClose( + list(output_ds.as_numpy_iterator()), + [ + [1.0, -1.0, 1.0, -1.0], + [-1.0, 1.0, -1.0, 1.0], + ], + ) + + def test_0d_unbatched_adapt(self): + ds = tf.data.Dataset.from_tensor_slices([2.0, 0.0, 2.0, 0.0]) + layer = normalization.Normalization(axis=None) + layer.adapt(ds) + output_ds = ds.map(layer) + self.assertAllClose( + list(output_ds.as_numpy_iterator()), [1.0, -1.0, 1.0, -1.0] + ) + + @parameterized.parameters( + # Results should be identical no matter how the axes are specified (3d). + {"axis": (1, 2)}, + {"axis": (2, 1)}, + {"axis": (1, -1)}, + {"axis": (-1, 1)}, + ) + def test_axis_permutations(self, axis): + layer = normalization.Normalization(axis=axis) + # data.shape = [2, 2, 3] + data = np.array( + [ + [[0.0, 1.0, 2.0], [0.0, 2.0, 6.0]], + [[2.0, 3.0, 4.0], [3.0, 6.0, 10.0]], + ] + ) + expect = np.array( + [ + [[-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0]], + [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], + ] + ) + layer.adapt(data) + self.assertAllClose(expect, layer(data)) + + def test_model_summary_after_layer_adapt(self): + data = np.array( + [ + [[0.0, 1.0, 2.0], [0.0, 2.0, 6.0]], + [[2.0, 3.0, 4.0], [3.0, 6.0, 10.0]], + ] + ) + layer = normalization.Normalization(axis=-1) + layer.adapt(data) + model = keras.Sequential( + [ + layer, + keras.layers.Dense(64, activation="relu"), + keras.layers.Dense(1), + ] + ) + model.summary() + + def test_multiple_adapts(self): + first_adapt = [[0], [2], [0], [2]] + second_adapt = [[2], [4], [2], [4]] + predict_input = [[2], [2]] + expected_first_output = [[1], [1]] + expected_second_output = [[-1], [-1]] + + inputs = keras.Input(shape=(1,), dtype=tf.int32) + layer = normalization.Normalization(axis=-1) + layer.adapt(first_adapt) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + actual_output = model.predict(predict_input) + self.assertAllClose(actual_output, expected_first_output) + + # Re-adapt the layer on new inputs. + layer.adapt(second_adapt) + # Re-compile the model. + model.compile() + # `predict` should now use the new model state. + actual_output = model.predict(predict_input) + self.assertAllClose(actual_output, expected_second_output) + + @parameterized.parameters( + {"adapted": True}, + {"adapted": False}, + ) + def test_saving_tf(self, adapted): + input_data = [[0.0], [2.0], [0.0], [2.0]] + expected_output = [[-1.0], [1.0], [-1.0], [1.0]] + + inputs = keras.Input(shape=(1,), dtype=tf.float32) + if adapted: + layer = normalization.Normalization(axis=-1) + layer.adapt(input_data) + else: + layer = normalization.Normalization(mean=1.0, variance=1.0) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(input_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_saved_model") + tf.saved_model.save(model, output_path) + loaded_model = tf.saved_model.load(output_path) + f = loaded_model.signatures["serving_default"] + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = f(tf.constant(input_data))["normalization"] + self.assertAllClose(new_output_data, expected_output) + + @parameterized.product( + save_format=["tf", "h5", "keras_v3"], + adapt=[True, False], + ) + def test_saving_keras(self, save_format, adapt): + input_data = [[0.0], [2.0], [0.0], [2.0]] + expected_output = [[-1.0], [1.0], [-1.0], [1.0]] + + cls = normalization.Normalization + inputs = keras.Input(shape=(1,), dtype=tf.float32) + if adapt: + layer = cls(axis=-1) + layer.adapt(input_data) + else: + layer = cls(mean=1.0, variance=1.0) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(input_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_model") + if save_format == "keras_v3": + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + output_path += ".keras" + model.save(output_path, save_format=save_format) + loaded_model = keras.models.load_model( + output_path, custom_objects={"Normalization": cls} + ) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = loaded_model.predict(input_data) + self.assertAllClose(new_output_data, expected_output) + + @parameterized.product( + save_format=["tf", "h5", "keras_v3"], + adapt=[True, False], + ) + def test_saving_keras_invert(self, save_format, adapt): + expected_output = [[0.0], [2.0], [0.0], [2.0]] + input_data = [[-1.0], [1.0], [-1.0], [1.0]] + + cls = normalization.Normalization + inputs = keras.Input(shape=(1,), dtype=tf.float32) + if adapt: + layer = cls(axis=-1, invert=True) + layer.adapt(expected_output) + else: + layer = cls(mean=1.0, variance=1.0, invert=True) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(input_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_model_invert") + if save_format == "keras_v3": + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + output_path += ".keras" + model.save(output_path, save_format=save_format) + loaded_model = keras.models.load_model( + output_path, custom_objects={"Normalization": cls} + ) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_data = loaded_model.predict(input_data) + self.assertAllClose(new_output_data, expected_output) + + @parameterized.parameters( + {"adapted": True}, + {"adapted": False}, + ) + def test_saved_weights_keras(self, adapted): + input_data = [[0.0], [2.0], [0.0], [2.0]] + expected_output = [[-1.0], [1.0], [-1.0], [1.0]] + + cls = normalization.Normalization + inputs = keras.Input(shape=(1,), dtype=tf.float32) + if adapted: + layer = cls(axis=-1) + layer.adapt(input_data) + else: + layer = cls(mean=1.0, variance=1.0) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + + output_data = model.predict(input_data) + self.assertAllClose(output_data, expected_output) + + # Save the model to disk. + output_path = os.path.join( + self.get_temp_dir(), "tf_keras_saved_weights" + ) + model.save_weights(output_path, save_format="tf") + new_model = keras.Model.from_config( + model.get_config(), custom_objects={"Normalization": cls} + ) + new_model.load_weights(output_path) + + # Validate correctness of the new model. + new_output_data = new_model.predict(input_data) + self.assertAllClose(new_output_data, expected_output) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/preprocessing_stage.py b/keras/layers/preprocessing/preprocessing_stage.py index 2247f13b7aa3..035f18c16b6f 100644 --- a/keras/layers/preprocessing/preprocessing_stage.py +++ b/keras/layers/preprocessing/preprocessing_stage.py @@ -14,10 +14,9 @@ # ============================================================================== """Preprocessing stage.""" +import numpy as np import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes -import numpy as np from keras.engine import base_preprocessing_layer from keras.engine import functional from keras.engine import sequential @@ -25,243 +24,259 @@ # Sequential methods should take precedence. -class PreprocessingStage(sequential.Sequential, - base_preprocessing_layer.PreprocessingLayer): - """A sequential preprocessing stage. - - This preprocessing stage wraps a list of preprocessing layers into a - Sequential-like object that enables you to `adapt()` the whole list via - a single `adapt()` call on the preprocessing stage. - - Args: - layers: List of layers. Can include layers that aren't preprocessing layers. - name: String. Optional name for the preprocessing stage object. - """ +class PreprocessingStage( + sequential.Sequential, base_preprocessing_layer.PreprocessingLayer +): + """A sequential preprocessing stage. - def adapt(self, data, reset_state=True): - """Adapt the state of the layers of the preprocessing stage to the data. + This preprocessing stage wraps a list of preprocessing layers into a + Sequential-like object that enables you to `adapt()` the whole list via + a single `adapt()` call on the preprocessing stage. Args: - data: A batched Dataset object, or a NumPy array, or an EagerTensor. - Data to be iterated over to adapt the state of the layers in this - preprocessing stage. - reset_state: Whether this call to `adapt` should reset the state of - the layers in this preprocessing stage. + layers: List of layers. Can include layers that aren't preprocessing + layers. + name: String. Optional name for the preprocessing stage object. """ - if not isinstance( - data, (tf.data.Dataset, np.ndarray, tf.__internal__.EagerTensor)): - raise ValueError( - f'`adapt()` requires a batched Dataset, an EagerTensor, or a Numpy ' - f'array as input. Received data={data}') - if isinstance(data, tf.data.Dataset): - # Validate the datasets to try and ensure we haven't been passed one with - # infinite size. That would cause an infinite loop here. - if tf_utils.dataset_is_infinite(data): - raise ValueError( - 'The dataset passed to `adapt()` has an infinite number of ' - 'elements. Please use dataset.take(...) to make the number ' - 'of elements finite.') - - for current_layer_index in range(0, len(self.layers)): - if not hasattr(self.layers[current_layer_index], 'adapt'): - # Skip any layer that does not need adapting. - continue - def map_fn(x): - """Maps `PreprocessingStage` inputs to inputs at `current_layer_index`. + def adapt(self, data, reset_state=True): + """Adapt the state of the layers of the preprocessing stage to the data. Args: - x: Batch of inputs seen in entry of the `PreprocessingStage` instance. - - Returns: - Batch of inputs to be processed by layer - `self.layers[current_layer_index]` + data: A batched Dataset object, or a NumPy array, or an EagerTensor. + Data to be iterated over to adapt the state of the layers in this + preprocessing stage. + reset_state: Whether this call to `adapt` should reset the state of + the layers in this preprocessing stage. """ - if current_layer_index == 0: # pylint: disable=cell-var-from-loop - return x - for i in range(current_layer_index): # pylint: disable=cell-var-from-loop - x = self.layers[i](x) - return x - - if isinstance(data, tf.data.Dataset): - current_layer_data = data.map(map_fn) - else: - current_layer_data = map_fn(data) - self.layers[current_layer_index].adapt(current_layer_data, - reset_state=reset_state) + if not isinstance( + data, (tf.data.Dataset, np.ndarray, tf.__internal__.EagerTensor) + ): + raise ValueError( + "`adapt()` requires a batched Dataset, an EagerTensor, or a " + f"Numpy array as input. Received data={data}" + ) + if isinstance(data, tf.data.Dataset): + # Validate the datasets to try and ensure we haven't been passed one + # with infinite size. That would cause an infinite loop here. + if tf_utils.dataset_is_infinite(data): + raise ValueError( + "The dataset passed to `adapt()` has an infinite number of " + "elements. Please use dataset.take(...) to make the number " + "of elements finite." + ) + + for current_layer_index in range(0, len(self.layers)): + if not hasattr(self.layers[current_layer_index], "adapt"): + # Skip any layer that does not need adapting. + continue + + def map_fn(x): + """Maps this object's inputs to those at current_layer_index. + + Args: + x: Batch of inputs seen in entry of the `PreprocessingStage` + instance. + + Returns: + Batch of inputs to be processed by layer + `self.layers[current_layer_index]` + """ + if current_layer_index == 0: + return x + for i in range(current_layer_index): + x = self.layers[i](x) + return x + + if isinstance(data, tf.data.Dataset): + current_layer_data = data.map(map_fn) + else: + current_layer_data = map_fn(data) + self.layers[current_layer_index].adapt( + current_layer_data, reset_state=reset_state + ) # Functional methods should take precedence. -class FunctionalPreprocessingStage(functional.Functional, - base_preprocessing_layer.PreprocessingLayer): - """A functional preprocessing stage. - - This preprocessing stage wraps a graph of preprocessing layers into a - Functional-like object that enables you to `adapt()` the whole graph via - a single `adapt()` call on the preprocessing stage. - - Preprocessing stage is not a complete model, so it cannot be called with - `fit()`. However, it is possible to add regular layers that may be trainable - to a preprocessing stage. - - A functional preprocessing stage is created in the same way as `Functional` - models. A stage can be instantiated by passing two arguments to - `__init__`. The first argument is the `keras.Input` Tensors that represent - the inputs to the stage. The second argument specifies the output - tensors that represent the outputs of this stage. Both arguments can be a - nested structure of tensors. - - Example: - - >>> inputs = {'x2': tf.keras.Input(shape=(5,)), - ... 'x1': tf.keras.Input(shape=(1,))} - >>> norm_layer = tf.keras.layers.experimental.preprocessing.Normalization() - >>> y = norm_layer(inputs['x2']) - >>> y, z = tf.keras.layers.Lambda(lambda x: (x, x))(inputs['x1']) - >>> outputs = [inputs['x1'], [y, z]] - >>> stage = FunctionalPreprocessingStage(inputs, outputs) - - Args: - inputs: An input tensor (must be created via `tf.keras.Input()`), or a list, - a dict, or a nested structure of input tensors. - outputs: An output tensor, or a list, a dict or a nested structure of output - tensors. - name: String, optional. Name of the preprocessing stage. - """ - - def fit(self, *args, **kwargs): - raise ValueError( - 'Preprocessing stage is not a complete model, and hence should not be ' - '`fit`. Instead, you may feed data to `adapt` the stage to set ' - 'appropriate states of the layers in the stage.') - - def adapt(self, data, reset_state=True): - """Adapt the state of the layers of the preprocessing stage to the data. +class FunctionalPreprocessingStage( + functional.Functional, base_preprocessing_layer.PreprocessingLayer +): + """A functional preprocessing stage. - Args: - data: A batched Dataset object, a NumPy array, an EagerTensor, or a list, - dict or nested structure of Numpy Arrays or EagerTensors. The elements - of Dataset object need to conform with inputs of the stage. The first - dimension of NumPy arrays or EagerTensors are understood to be batch - dimension. Data to be iterated over to adapt the state of the layers in - this preprocessing stage. - reset_state: Whether this call to `adapt` should reset the state of the - layers in this preprocessing stage. - - Examples: - - >>> # For a stage with dict input - >>> inputs = {'x2': tf.keras.Input(shape=(5,)), - ... 'x1': tf.keras.Input(shape=(1,))} - >>> outputs = [inputs['x1'], inputs['x2']] - >>> stage = FunctionalPreprocessingStage(inputs, outputs) - >>> ds = tf.data.Dataset.from_tensor_slices({'x1': tf.ones((4,5)), - ... 'x2': tf.ones((4,1))}) - >>> sorted(ds.element_spec.items()) # Check element_spec - [('x1', TensorSpec(shape=(5,), dtype=tf.float32, name=None)), - ('x2', TensorSpec(shape=(1,), dtype=tf.float32, name=None))] - >>> stage.adapt(ds) - >>> data_np = {'x1': np.ones((4, 5)), 'x2': np.ones((4, 1))} - >>> stage.adapt(data_np) + This preprocessing stage wraps a graph of preprocessing layers into a + Functional-like object that enables you to `adapt()` the whole graph via + a single `adapt()` call on the preprocessing stage. - """ - if not isinstance(data, tf.data.Dataset): - data = self._flatten_to_reference_inputs(data) - if any(not isinstance(datum, (np.ndarray, tf.__internal__.EagerTensor)) - for datum in data): - raise ValueError( - '`adapt()` requires a batched Dataset, a list of EagerTensors ' - 'or Numpy arrays as input, got {}'.format(type(data))) - ds_input = [ - tf.data.Dataset.from_tensor_slices(x).batch(1) for x in data - ] - - if isinstance(data, tf.data.Dataset): - # Validate the datasets to try and ensure we haven't been passed one with - # infinite size. That would cause an infinite loop here. - if tf_utils.dataset_is_infinite(data): - raise ValueError( - 'The dataset passed to `adapt()` has an infinite number of ' - 'elements. Please use dataset.take(...) to make the number ' - 'of elements finite.') - # Unzip dataset object to a list of single input dataset. - ds_input = _unzip_dataset(data) + Preprocessing stage is not a complete model, so it cannot be called with + `fit()`. However, it is possible to add regular layers that may be trainable + to a preprocessing stage. - # Dictionary mapping reference tensors to datasets - ds_dict = {} - tensor_usage_count = self._tensor_usage_count - for x, y in zip(self.inputs, ds_input): - x_id = str(id(x)) - ds_dict[x_id] = [y] * tensor_usage_count[x_id] + A functional preprocessing stage is created in the same way as `Functional` + models. A stage can be instantiated by passing two arguments to + `__init__`. The first argument is the `keras.Input` Tensors that represent + the inputs to the stage. The second argument specifies the output + tensors that represent the outputs of this stage. Both arguments can be a + nested structure of tensors. - nodes_by_depth = self._nodes_by_depth - depth_keys = sorted(nodes_by_depth.keys(), reverse=True) + Example: - def build_map_fn(node, args, kwargs): - if not isinstance(args.element_spec, tuple): - - def map_fn(*x): - return tf.nest.flatten(node.layer(*x, **kwargs)) - else: - - def map_fn(*x): - return tf.nest.flatten(node.layer(x, **kwargs)) - - return map_fn - - for depth in depth_keys: - for node in nodes_by_depth[depth]: - # Input node - if node.is_input: - continue + >>> inputs = {'x2': tf.keras.Input(shape=(5,)), + ... 'x1': tf.keras.Input(shape=(1,))} + >>> norm_layer = tf.keras.layers.Normalization() + >>> y = norm_layer(inputs['x2']) + >>> y, z = tf.keras.layers.Lambda(lambda x: (x, x))(inputs['x1']) + >>> outputs = [inputs['x1'], [y, z]] + >>> stage = FunctionalPreprocessingStage(inputs, outputs) - # Node with input not computed yet - if any(t_id not in ds_dict for t_id in node.flat_input_ids): - continue + Args: + inputs: An input tensor (must be created via `tf.keras.Input()`), or a + list, a dict, or a nested structure of input tensors. + outputs: An output tensor, or a list, a dict or a nested structure of + output tensors. + name: String, optional. Name of the preprocessing stage. + """ - args, kwargs = node.map_arguments(ds_dict) - args = tf.data.Dataset.zip(tf.__internal__.nest.list_to_tuple(*args)) + def fit(self, *args, **kwargs): + raise ValueError( + "Preprocessing stage is not a complete model, and hence should not " + "be `fit`. Instead, you may feed data to `adapt` the stage to set " + "appropriate states of the layers in the stage." + ) - if node.layer.stateful and hasattr(node.layer, 'adapt'): - node.layer.adapt(args, reset_state=reset_state) + def adapt(self, data, reset_state=True): + """Adapt the state of the layers of the preprocessing stage to the data. - map_fn = build_map_fn(node, args, kwargs) - outputs = args.map(map_fn) - outputs = _unzip_dataset(outputs) + Args: + data: A batched Dataset object, a NumPy array, an EagerTensor, or a + list, dict or nested structure of Numpy Arrays or EagerTensors. The + elements of Dataset object need to conform with inputs of the stage. + The first dimension of NumPy arrays or EagerTensors are understood + to be batch dimension. Data to be iterated over to adapt the state + of the layers in this preprocessing stage. + reset_state: Whether this call to `adapt` should reset the state of + the layers in this preprocessing stage. + + Examples: + + >>> # For a stage with dict input + >>> inputs = {'x2': tf.keras.Input(shape=(5,)), + ... 'x1': tf.keras.Input(shape=(1,))} + >>> outputs = [inputs['x1'], inputs['x2']] + >>> stage = FunctionalPreprocessingStage(inputs, outputs) + >>> ds = tf.data.Dataset.from_tensor_slices({'x1': tf.ones((4,5)), + ... 'x2': tf.ones((4,1))}) + >>> sorted(ds.element_spec.items()) # Check element_spec + [('x1', TensorSpec(shape=(5,), dtype=tf.float32, name=None)), + ('x2', TensorSpec(shape=(1,), dtype=tf.float32, name=None))] + >>> stage.adapt(ds) + >>> data_np = {'x1': np.ones((4, 5)), 'x2': np.ones((4, 1))} + >>> stage.adapt(data_np) - # Update ds_dict. - for x_id, y in zip(node.flat_output_ids, outputs): - ds_dict[x_id] = [y] * tensor_usage_count[x_id] + """ + if not isinstance(data, tf.data.Dataset): + data = self._flatten_to_reference_inputs(data) + if any( + not isinstance(datum, (np.ndarray, tf.__internal__.EagerTensor)) + for datum in data + ): + raise ValueError( + "`adapt()` requires a batched Dataset, a list of " + f"EagerTensors or Numpy arrays as input, got {type(data)}" + ) + ds_input = [ + tf.data.Dataset.from_tensor_slices(x).batch(1) for x in data + ] + + if isinstance(data, tf.data.Dataset): + # Validate the datasets to try and ensure we haven't been passed one + # with infinite size. That would cause an infinite loop here. + if tf_utils.dataset_is_infinite(data): + raise ValueError( + "The dataset passed to `adapt()` has an infinite number of " + "elements. Please use dataset.take(...) to make the number " + "of elements finite." + ) + # Unzip dataset object to a list of single input dataset. + ds_input = _unzip_dataset(data) + + # Dictionary mapping reference tensors to datasets + ds_dict = {} + tensor_usage_count = self._tensor_usage_count + for x, y in zip(self.inputs, ds_input): + x_id = str(id(x)) + ds_dict[x_id] = [y] * tensor_usage_count[x_id] + + nodes_by_depth = self._nodes_by_depth + depth_keys = sorted(nodes_by_depth.keys(), reverse=True) + + def build_map_fn(node, args, kwargs): + if not isinstance(args.element_spec, tuple): + + def map_fn(*x): + return tf.nest.flatten(node.layer(*x, **kwargs)) + + else: + + def map_fn(*x): + return tf.nest.flatten(node.layer(x, **kwargs)) + + return map_fn + + for depth in depth_keys: + for node in nodes_by_depth[depth]: + # Input node + if node.is_input: + continue + + # Node with input not computed yet + if any(t_id not in ds_dict for t_id in node.flat_input_ids): + continue + + args, kwargs = node.map_arguments(ds_dict) + args = tf.data.Dataset.zip( + tf.__internal__.nest.list_to_tuple(*args) + ) + + if node.layer.stateful and hasattr(node.layer, "adapt"): + node.layer.adapt(args, reset_state=reset_state) + + map_fn = build_map_fn(node, args, kwargs) + outputs = args.map(map_fn) + outputs = _unzip_dataset(outputs) + + # Update ds_dict. + for x_id, y in zip(node.flat_output_ids, outputs): + ds_dict[x_id] = [y] * tensor_usage_count[x_id] def _unzip_dataset(ds): - """Unzip dataset into a list of single element datasets. + """Unzip dataset into a list of single element datasets. - Args: - ds: A Dataset object. + Args: + ds: A Dataset object. - Returns: - A list of Dataset object, each correspond to one of the `element_spec` of - the input Dataset object. + Returns: + A list of Dataset object, each correspond to one of the `element_spec` of + the input Dataset object. - Example: + Example: - >>> ds1 = tf.data.Dataset.from_tensor_slices([1, 2, 3]) - >>> ds2 = tf.data.Dataset.from_tensor_slices([4, 5, 6]) - >>> ds_zipped_tuple = tf.data.Dataset.zip((ds1, ds2)) - >>> ds_unzipped_tuple = _unzip_dataset(ds_zipped_tuple) - >>> ds_zipped_dict = tf.data.Dataset.zip({'ds1': ds1, 'ds2': ds2}) - >>> ds_unzipped_dict = _unzip_dataset(ds_zipped_dict) + >>> ds1 = tf.data.Dataset.from_tensor_slices([1, 2, 3]) + >>> ds2 = tf.data.Dataset.from_tensor_slices([4, 5, 6]) + >>> ds_zipped_tuple = tf.data.Dataset.zip((ds1, ds2)) + >>> ds_unzipped_tuple = _unzip_dataset(ds_zipped_tuple) + >>> ds_zipped_dict = tf.data.Dataset.zip({'ds1': ds1, 'ds2': ds2}) + >>> ds_unzipped_dict = _unzip_dataset(ds_zipped_dict) - Then the two elements of `ds_unzipped_tuple` and `ds_unzipped_dict` are both - the same as `ds1` and `ds2`. - """ - element_count = len(tf.nest.flatten(ds.element_spec)) - ds_unzipped = [] - for i in range(element_count): + Then the two elements of `ds_unzipped_tuple` and `ds_unzipped_dict` are both + the same as `ds1` and `ds2`. + """ + element_count = len(tf.nest.flatten(ds.element_spec)) + ds_unzipped = [] + for i in range(element_count): - def map_fn(*x, j=i): - return tf.nest.flatten(x)[j] + def map_fn(*x, j=i): + return tf.nest.flatten(x)[j] - ds_unzipped.append(ds.map(map_fn)) - return ds_unzipped + ds_unzipped.append(ds.map(map_fn)) + return ds_unzipped diff --git a/keras/layers/preprocessing/preprocessing_stage_functional_test.py b/keras/layers/preprocessing/preprocessing_stage_functional_test.py index 12fd94b0c9b5..897c1d48ec64 100644 --- a/keras/layers/preprocessing/preprocessing_stage_functional_test.py +++ b/keras/layers/preprocessing/preprocessing_stage_functional_test.py @@ -14,12 +14,11 @@ # ============================================================================== """Functional preprocessing stage tests.""" -import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes - import time + import numpy as np -from keras.testing_infra import test_combinations +import tensorflow.compat.v2 as tf + from keras.engine import base_preprocessing_layer from keras.engine.input_layer import Input from keras.layers import convolutional @@ -29,411 +28,421 @@ from keras.layers.preprocessing import normalization from keras.layers.preprocessing import preprocessing_stage from keras.layers.preprocessing import preprocessing_test_utils +from keras.testing_infra import test_combinations class PL(base_preprocessing_layer.PreprocessingLayer): + def __init__(self, **kwargs): + self.adapt_time = None + self.adapt_count = 0 + super().__init__(**kwargs) - def __init__(self, **kwargs): - self.adapt_time = None - self.adapt_count = 0 - super().__init__(**kwargs) - - def adapt(self, data, reset_state=True): - self.adapt_time = time.time() - self.adapt_count += 1 + def adapt(self, data, reset_state=True): + self.adapt_time = time.time() + self.adapt_count += 1 - def call(self, inputs): - return inputs + 1 + def call(self, inputs): + return inputs + 1 class PLMerge(PL): - - def call(self, inputs): - return inputs[0] + inputs[1] + def call(self, inputs): + return inputs[0] + inputs[1] class PLSplit(PL): - - def call(self, inputs): - return inputs + 1, inputs - 1 + def call(self, inputs): + return inputs + 1, inputs - 1 @test_combinations.run_all_keras_modes(always_skip_v1=True) -class PreprocessingStageTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_adapt_preprocessing_stage_with_single_input_output(self): - - x = Input(shape=(3,)) - - l0 = PL() - y = l0(x) - - l1 = PL() - z = l1(y) - - stage = preprocessing_stage.FunctionalPreprocessingStage(x, z) - stage.compile() - - # Test with NumPy array - one_array = np.ones((4, 3), dtype='float32') - stage.adapt(one_array) - self.assertEqual(l0.adapt_count, 1) - self.assertEqual(l1.adapt_count, 1) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - - # Check call - z = stage(tf.ones((4, 3), dtype='float32')) - self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 2.) - - # Test with dataset - adapt_data = tf.data.Dataset.from_tensor_slices(one_array) - adapt_data = adapt_data.batch(2) # 5 batches of 2 samples - - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 2) - self.assertEqual(l1.adapt_count, 2) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - - # Test error with bad data - with self.assertRaisesRegex(ValueError, 'requires a '): - stage.adapt(None) - - # Disallow calling fit - with self.assertRaisesRegex(ValueError, 'Preprocessing stage'): - stage.fit(None) - - def test_adapt_preprocessing_stage_with_list_input(self): - - x0 = Input(shape=(3,)) - x1 = Input(shape=(3,)) - x2 = Input(shape=(3,)) - - l0 = PLMerge() - y = l0([x0, x1]) - - l1 = PLMerge() - y = l1([y, x2]) - - l2 = PLSplit() - z, y = l2(y) - - stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1, x2], - [y, z]) - stage.compile() - - # Test with NumPy array - one_array = np.ones((4, 3), dtype='float32') - stage.adapt([one_array, one_array, one_array]) - self.assertEqual(l0.adapt_count, 1) - self.assertEqual(l1.adapt_count, 1) - self.assertEqual(l2.adapt_count, 1) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Check call - y, z = stage([ - tf.ones((4, 3), dtype='float32'), - tf.ones((4, 3), dtype='float32'), - tf.ones((4, 3), dtype='float32') - ]) - self.assertAllClose(y, np.ones((4, 3), dtype='float32') + 1.) - self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 3.) - - # Test with dataset - adapt_data = tf.data.Dataset.from_tensor_slices( - (one_array, one_array, one_array)) - adapt_data = adapt_data.batch(2) # 5 batches of 2 samples - - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 2) - self.assertEqual(l1.adapt_count, 2) - self.assertEqual(l2.adapt_count, 2) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Test error with bad data - with self.assertRaisesRegex(ValueError, 'requires a '): - stage.adapt(None) - - def test_adapt_preprocessing_stage_with_dict_input(self): - x0 = Input(shape=(3,), name='x0') - x1 = Input(shape=(4,), name='x1') - x2 = Input(shape=(3, 5), name='x2') - - # dimension will mismatch if x1 incorrectly placed. - x1_sum = core.Lambda( - lambda x: tf.reduce_sum(x, axis=-1, keepdims=True))( - x1) - x2_sum = core.Lambda(lambda x: tf.reduce_sum(x, axis=-1))(x2) - - l0 = PLMerge() - y = l0([x0, x1_sum]) - - l1 = PLMerge() - y = l1([y, x2_sum]) - - l2 = PLSplit() - z, y = l2(y) - stage = preprocessing_stage.FunctionalPreprocessingStage( - { - 'x2': x2, - 'x0': x0, - 'x1': x1 - }, [y, z]) - stage.compile() - - # Test with dict of NumPy array - one_array0 = np.ones((4, 3), dtype='float32') - one_array1 = np.ones((4, 4), dtype='float32') - one_array2 = np.ones((4, 3, 5), dtype='float32') - adapt_data = {'x1': one_array1, 'x0': one_array0, 'x2': one_array2} - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 1) - self.assertEqual(l1.adapt_count, 1) - self.assertEqual(l2.adapt_count, 1) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Check call - y, z = stage({ - 'x1': tf.constant(one_array1), - 'x2': tf.constant(one_array2), - 'x0': tf.constant(one_array0) - }) - self.assertAllClose(y, np.zeros((4, 3), dtype='float32') + 9.) - self.assertAllClose(z, np.zeros((4, 3), dtype='float32') + 11.) - - # Test with list of NumPy array - adapt_data = [one_array0, one_array1, one_array2] - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 2) - self.assertEqual(l1.adapt_count, 2) - self.assertEqual(l2.adapt_count, 2) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Test with flattened dataset - adapt_data = tf.data.Dataset.from_tensor_slices( - (one_array0, one_array1, one_array2)) - adapt_data = adapt_data.batch(2) # 5 batches of 2 samples - - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 3) - self.assertEqual(l1.adapt_count, 3) - self.assertEqual(l2.adapt_count, 3) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Test with dataset in dict shape - adapt_data = tf.data.Dataset.from_tensor_slices({ - 'x0': one_array0, - 'x2': one_array2, - 'x1': one_array1 - }) - adapt_data = adapt_data.batch(2) # 5 batches of 2 samples - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 4) - self.assertEqual(l1.adapt_count, 4) - self.assertEqual(l2.adapt_count, 4) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Test error with bad data - with self.assertRaisesRegex(ValueError, 'requires a '): - stage.adapt(None) - - def test_adapt_preprocessing_stage_with_dict_output(self): - x = Input(shape=(3,), name='x') - - l0 = PLSplit() - y0, y1 = l0(x) - - l1 = PLSplit() - z0, z1 = l1(y0) - stage = preprocessing_stage.FunctionalPreprocessingStage({'x': x}, { - 'y1': y1, - 'z1': z1, - 'y0': y0, - 'z0': z0 - }) - stage.compile() - - # Test with NumPy array - one_array = np.ones((4, 3), dtype='float32') - adapt_data = {'x': one_array} - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 1) - self.assertEqual(l1.adapt_count, 1) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - - # Check call - outputs = stage({'x': tf.constant(one_array)}) - self.assertEqual(set(outputs.keys()), {'y0', 'y1', 'z0', 'z1'}) - self.assertAllClose(outputs['y0'], np.ones((4, 3), dtype='float32') + 1.) - self.assertAllClose(outputs['y1'], np.ones((4, 3), dtype='float32') - 1.) - self.assertAllClose(outputs['z0'], np.ones((4, 3), dtype='float32') + 2.) - self.assertAllClose(outputs['z1'], np.ones((4, 3), dtype='float32')) - - def test_preprocessing_stage_with_nested_input(self): - # Test with NumPy array - x0 = Input(shape=(3,)) - x1 = Input(shape=(3,)) - x2 = Input(shape=(3,)) - - l0 = PLMerge() - y = l0([x0, x1]) - - l1 = PLMerge() - y = l1([y, x2]) - - l2 = PLSplit() - z, y = l2(y) - - stage = preprocessing_stage.FunctionalPreprocessingStage([x0, [x1, x2]], - [y, z]) - stage.compile() - one_array = np.ones((4, 3), dtype='float32') - stage.adapt([one_array, [one_array, one_array]]) - self.assertEqual(l0.adapt_count, 1) - self.assertEqual(l1.adapt_count, 1) - self.assertEqual(l2.adapt_count, 1) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Check call - y, z = stage([ - tf.ones((4, 3), dtype='float32'), - [ - tf.ones((4, 3), dtype='float32'), - tf.ones((4, 3), dtype='float32') +class PreprocessingStageTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_adapt_preprocessing_stage_with_single_input_output(self): + + x = Input(shape=(3,)) + + l0 = PL() + y = l0(x) + + l1 = PL() + z = l1(y) + + stage = preprocessing_stage.FunctionalPreprocessingStage(x, z) + stage.compile() + + # Test with NumPy array + one_array = np.ones((4, 3), dtype="float32") + stage.adapt(one_array) + self.assertEqual(l0.adapt_count, 1) + self.assertEqual(l1.adapt_count, 1) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + + # Check call + z = stage(tf.ones((4, 3), dtype="float32")) + self.assertAllClose(z, np.ones((4, 3), dtype="float32") + 2.0) + + # Test with dataset + adapt_data = tf.data.Dataset.from_tensor_slices(one_array) + adapt_data = adapt_data.batch(2) # 5 batches of 2 samples + + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 2) + self.assertEqual(l1.adapt_count, 2) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + + # Test error with bad data + with self.assertRaisesRegex(ValueError, "requires a "): + stage.adapt(None) + + # Disallow calling fit + with self.assertRaisesRegex(ValueError, "Preprocessing stage"): + stage.fit(None) + + def test_adapt_preprocessing_stage_with_list_input(self): + + x0 = Input(shape=(3,)) + x1 = Input(shape=(3,)) + x2 = Input(shape=(3,)) + + l0 = PLMerge() + y = l0([x0, x1]) + + l1 = PLMerge() + y = l1([y, x2]) + + l2 = PLSplit() + z, y = l2(y) + + stage = preprocessing_stage.FunctionalPreprocessingStage( + [x0, x1, x2], [y, z] + ) + stage.compile() + + # Test with NumPy array + one_array = np.ones((4, 3), dtype="float32") + stage.adapt([one_array, one_array, one_array]) + self.assertEqual(l0.adapt_count, 1) + self.assertEqual(l1.adapt_count, 1) + self.assertEqual(l2.adapt_count, 1) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Check call + y, z = stage( + [ + tf.ones((4, 3), dtype="float32"), + tf.ones((4, 3), dtype="float32"), + tf.ones((4, 3), dtype="float32"), + ] + ) + self.assertAllClose(y, np.ones((4, 3), dtype="float32") + 1.0) + self.assertAllClose(z, np.ones((4, 3), dtype="float32") + 3.0) + + # Test with dataset + adapt_data = tf.data.Dataset.from_tensor_slices( + (one_array, one_array, one_array) + ) + adapt_data = adapt_data.batch(2) # 5 batches of 2 samples + + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 2) + self.assertEqual(l1.adapt_count, 2) + self.assertEqual(l2.adapt_count, 2) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Test error with bad data + with self.assertRaisesRegex(ValueError, "requires a "): + stage.adapt(None) + + def test_adapt_preprocessing_stage_with_dict_input(self): + x0 = Input(shape=(3,), name="x0") + x1 = Input(shape=(4,), name="x1") + x2 = Input(shape=(3, 5), name="x2") + + # dimension will mismatch if x1 incorrectly placed. + x1_sum = core.Lambda( + lambda x: tf.reduce_sum(x, axis=-1, keepdims=True) + )(x1) + x2_sum = core.Lambda(lambda x: tf.reduce_sum(x, axis=-1))(x2) + + l0 = PLMerge() + y = l0([x0, x1_sum]) + + l1 = PLMerge() + y = l1([y, x2_sum]) + + l2 = PLSplit() + z, y = l2(y) + stage = preprocessing_stage.FunctionalPreprocessingStage( + {"x2": x2, "x0": x0, "x1": x1}, [y, z] + ) + stage.compile() + + # Test with dict of NumPy array + one_array0 = np.ones((4, 3), dtype="float32") + one_array1 = np.ones((4, 4), dtype="float32") + one_array2 = np.ones((4, 3, 5), dtype="float32") + adapt_data = {"x1": one_array1, "x0": one_array0, "x2": one_array2} + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 1) + self.assertEqual(l1.adapt_count, 1) + self.assertEqual(l2.adapt_count, 1) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Check call + y, z = stage( + { + "x1": tf.constant(one_array1), + "x2": tf.constant(one_array2), + "x0": tf.constant(one_array0), + } + ) + self.assertAllClose(y, np.zeros((4, 3), dtype="float32") + 9.0) + self.assertAllClose(z, np.zeros((4, 3), dtype="float32") + 11.0) + + # Test with list of NumPy array + adapt_data = [one_array0, one_array1, one_array2] + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 2) + self.assertEqual(l1.adapt_count, 2) + self.assertEqual(l2.adapt_count, 2) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Test with flattened dataset + adapt_data = tf.data.Dataset.from_tensor_slices( + (one_array0, one_array1, one_array2) + ) + adapt_data = adapt_data.batch(2) # 5 batches of 2 samples + + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 3) + self.assertEqual(l1.adapt_count, 3) + self.assertEqual(l2.adapt_count, 3) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Test with dataset in dict shape + adapt_data = tf.data.Dataset.from_tensor_slices( + {"x0": one_array0, "x2": one_array2, "x1": one_array1} + ) + adapt_data = adapt_data.batch(2) # 5 batches of 2 samples + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 4) + self.assertEqual(l1.adapt_count, 4) + self.assertEqual(l2.adapt_count, 4) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Test error with bad data + with self.assertRaisesRegex(ValueError, "requires a "): + stage.adapt(None) + + def test_adapt_preprocessing_stage_with_dict_output(self): + x = Input(shape=(3,), name="x") + + l0 = PLSplit() + y0, y1 = l0(x) + + l1 = PLSplit() + z0, z1 = l1(y0) + stage = preprocessing_stage.FunctionalPreprocessingStage( + {"x": x}, {"y1": y1, "z1": z1, "y0": y0, "z0": z0} + ) + stage.compile() + + # Test with NumPy array + one_array = np.ones((4, 3), dtype="float32") + adapt_data = {"x": one_array} + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 1) + self.assertEqual(l1.adapt_count, 1) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + + # Check call + outputs = stage({"x": tf.constant(one_array)}) + self.assertEqual(set(outputs.keys()), {"y0", "y1", "z0", "z1"}) + self.assertAllClose( + outputs["y0"], np.ones((4, 3), dtype="float32") + 1.0 + ) + self.assertAllClose( + outputs["y1"], np.ones((4, 3), dtype="float32") - 1.0 + ) + self.assertAllClose( + outputs["z0"], np.ones((4, 3), dtype="float32") + 2.0 + ) + self.assertAllClose(outputs["z1"], np.ones((4, 3), dtype="float32")) + + def test_preprocessing_stage_with_nested_input(self): + # Test with NumPy array + x0 = Input(shape=(3,)) + x1 = Input(shape=(3,)) + x2 = Input(shape=(3,)) + + l0 = PLMerge() + y = l0([x0, x1]) + + l1 = PLMerge() + y = l1([y, x2]) + + l2 = PLSplit() + z, y = l2(y) + + stage = preprocessing_stage.FunctionalPreprocessingStage( + [x0, [x1, x2]], [y, z] + ) + stage.compile() + one_array = np.ones((4, 3), dtype="float32") + stage.adapt([one_array, [one_array, one_array]]) + self.assertEqual(l0.adapt_count, 1) + self.assertEqual(l1.adapt_count, 1) + self.assertEqual(l2.adapt_count, 1) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Check call + y, z = stage( + [ + tf.ones((4, 3), dtype="float32"), + [ + tf.ones((4, 3), dtype="float32"), + tf.ones((4, 3), dtype="float32"), + ], + ] + ) + self.assertAllClose(y, np.ones((4, 3), dtype="float32") + 1.0) + self.assertAllClose(z, np.ones((4, 3), dtype="float32") + 3.0) + + # Test with dataset + adapt_data = tf.data.Dataset.from_tensor_slices( + (one_array, (one_array, one_array)) + ) + adapt_data = adapt_data.batch(2) # 5 batches of 2 samples + + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 2) + self.assertEqual(l1.adapt_count, 2) + self.assertEqual(l2.adapt_count, 2) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + self.assertLessEqual(l1.adapt_time, l2.adapt_time) + + # Test error with bad data + with self.assertRaisesRegex(ValueError, "requires a "): + stage.adapt(None) + + def test_include_layers_with_dict_input(self): + class PLMergeDict(PLMerge): + def call(self, inputs): + return inputs["a"] + inputs["b"] + + x0 = Input(shape=(3,)) + x1 = Input(shape=(3,)) + + l0 = PLMergeDict() + y = l0({"a": x0, "b": x1}) + + l1 = PLSplit() + z, y = l1(y) + + stage = preprocessing_stage.FunctionalPreprocessingStage( + [x0, x1], [y, z] + ) + stage.compile() + + one_array = np.ones((4, 3), dtype="float32") + adapt_data = tf.data.Dataset.from_tensor_slices((one_array, one_array)) + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 1) + self.assertEqual(l1.adapt_count, 1) + self.assertLessEqual(l0.adapt_time, l1.adapt_time) + + # Check call + y, z = stage( + [tf.ones((4, 3), dtype="float32"), tf.ones((4, 3), dtype="float32")] + ) + self.assertAllClose(y, np.ones((4, 3), dtype="float32")) + self.assertAllClose(z, np.ones((4, 3), dtype="float32") + 2.0) + + def test_include_layers_with_nested_input(self): + class PLMergeNest(PLMerge): + def call(self, inputs): + a = inputs[0] + b = inputs[1][0] + c = inputs[1][1] + return a + b + c + + x0 = Input(shape=(3,)) + x1 = Input(shape=(3,)) + x2 = Input(shape=(3,)) + + l0 = PLMergeNest() + y = l0([x0, [x1, x2]]) + + stage = preprocessing_stage.FunctionalPreprocessingStage( + [x0, x1, x2], y + ) + stage.compile() + + one_array = np.ones((4, 3), dtype="float32") + adapt_data = tf.data.Dataset.from_tensor_slices((one_array,) * 3) + stage.adapt(adapt_data) + self.assertEqual(l0.adapt_count, 1) + + # Check call + y = stage( + [ + tf.ones((4, 3), dtype="float32"), + tf.ones((4, 3), dtype="float32"), + tf.ones((4, 3), dtype="float32"), + ] + ) + self.assertAllClose(y, np.ones((4, 3), dtype="float32") + 2.0) + + def test_mixing_preprocessing_and_regular_layers(self): + x0 = Input(shape=(10, 10, 3)) + x1 = Input(shape=(10, 10, 3)) + x2 = Input(shape=(10, 10, 3)) + + y0 = merging.Add()([x0, x1]) + y1 = image_preprocessing.CenterCrop(8, 8)(x2) + y1 = convolutional.ZeroPadding2D(padding=1)(y1) + + z = merging.Add()([y0, y1]) + z = normalization.Normalization()(z) + z = convolutional.Conv2D(4, 3)(z) + + stage = preprocessing_stage.FunctionalPreprocessingStage( + [x0, x1, x2], z + ) + + data = [ + np.ones((12, 10, 10, 3), dtype="float32"), + np.ones((12, 10, 10, 3), dtype="float32"), + np.ones((12, 10, 10, 3), dtype="float32"), ] - ]) - self.assertAllClose(y, np.ones((4, 3), dtype='float32') + 1.) - self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 3.) - - # Test with dataset - adapt_data = tf.data.Dataset.from_tensor_slices( - (one_array, (one_array, one_array))) - adapt_data = adapt_data.batch(2) # 5 batches of 2 samples - - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 2) - self.assertEqual(l1.adapt_count, 2) - self.assertEqual(l2.adapt_count, 2) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - self.assertLessEqual(l1.adapt_time, l2.adapt_time) - - # Test error with bad data - with self.assertRaisesRegex(ValueError, 'requires a '): - stage.adapt(None) - - def test_include_layers_with_dict_input(self): - - class PLMergeDict(PLMerge): - - def call(self, inputs): - return inputs['a'] + inputs['b'] - - x0 = Input(shape=(3,)) - x1 = Input(shape=(3,)) - - l0 = PLMergeDict() - y = l0({'a': x0, 'b': x1}) - - l1 = PLSplit() - z, y = l1(y) - - stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1], [y, z]) - stage.compile() - - one_array = np.ones((4, 3), dtype='float32') - adapt_data = tf.data.Dataset.from_tensor_slices((one_array, one_array)) - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 1) - self.assertEqual(l1.adapt_count, 1) - self.assertLessEqual(l0.adapt_time, l1.adapt_time) - - # Check call - y, z = stage([ - tf.ones((4, 3), dtype='float32'), - tf.ones((4, 3), dtype='float32') - ]) - self.assertAllClose(y, np.ones((4, 3), dtype='float32')) - self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 2.) - - def test_include_layers_with_nested_input(self): - - class PLMergeNest(PLMerge): - - def call(self, inputs): - a = inputs[0] - b = inputs[1][0] - c = inputs[1][1] - return a + b + c - - x0 = Input(shape=(3,)) - x1 = Input(shape=(3,)) - x2 = Input(shape=(3,)) - - l0 = PLMergeNest() - y = l0([x0, [x1, x2]]) - - stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1, x2], y) - stage.compile() - - one_array = np.ones((4, 3), dtype='float32') - adapt_data = tf.data.Dataset.from_tensor_slices((one_array,) * 3) - stage.adapt(adapt_data) - self.assertEqual(l0.adapt_count, 1) - - # Check call - y = stage([ - tf.ones((4, 3), dtype='float32'), - tf.ones((4, 3), dtype='float32'), - tf.ones((4, 3), dtype='float32') - ]) - self.assertAllClose(y, np.ones((4, 3), dtype='float32') + 2.) - - def test_mixing_preprocessing_and_regular_layers(self): - x0 = Input(shape=(10, 10, 3)) - x1 = Input(shape=(10, 10, 3)) - x2 = Input(shape=(10, 10, 3)) - - y0 = merging.Add()([x0, x1]) - y1 = image_preprocessing.CenterCrop(8, 8)(x2) - y1 = convolutional.ZeroPadding2D(padding=1)(y1) - - z = merging.Add()([y0, y1]) - z = normalization.Normalization()(z) - z = convolutional.Conv2D(4, 3)(z) - - stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1, x2], z) - - data = [ - np.ones((12, 10, 10, 3), dtype='float32'), - np.ones((12, 10, 10, 3), dtype='float32'), - np.ones((12, 10, 10, 3), dtype='float32') - ] - - stage.adapt(data) - _ = stage(data) - stage.compile('rmsprop', 'mse') - with self.assertRaisesRegex(ValueError, 'Preprocessing stage'): - stage.fit(data, np.ones((12, 8, 8, 4))) - - ds_x0 = tf.data.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3))) - ds_x1 = tf.data.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3))) - ds_x2 = tf.data.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3))) - ds_x = tf.data.Dataset.zip((ds_x0, ds_x1, ds_x2)) - ds_y = tf.data.Dataset.from_tensor_slices(np.ones((12, 8, 8, 4))) - dataset = tf.data.Dataset.zip((ds_x, ds_y)).batch(4) - - with self.assertRaisesRegex(ValueError, 'Preprocessing stage'): - stage.fit(dataset) - _ = stage.evaluate(data, np.ones((12, 8, 8, 4))) - _ = stage.predict(data) - - -if __name__ == '__main__': - tf.test.main() + + stage.adapt(data) + _ = stage(data) + stage.compile("rmsprop", "mse") + with self.assertRaisesRegex(ValueError, "Preprocessing stage"): + stage.fit(data, np.ones((12, 8, 8, 4))) + + ds_x0 = tf.data.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3))) + ds_x1 = tf.data.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3))) + ds_x2 = tf.data.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3))) + ds_x = tf.data.Dataset.zip((ds_x0, ds_x1, ds_x2)) + ds_y = tf.data.Dataset.from_tensor_slices(np.ones((12, 8, 8, 4))) + dataset = tf.data.Dataset.zip((ds_x, ds_y)).batch(4) + + with self.assertRaisesRegex(ValueError, "Preprocessing stage"): + stage.fit(dataset) + _ = stage.evaluate(data, np.ones((12, 8, 8, 4))) + _ = stage.predict(data) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/preprocessing_stage_test.py b/keras/layers/preprocessing/preprocessing_stage_test.py index b8bfe2692c59..5d183d841648 100644 --- a/keras/layers/preprocessing/preprocessing_stage_test.py +++ b/keras/layers/preprocessing/preprocessing_stage_test.py @@ -14,70 +14,73 @@ # ============================================================================== """Preprocessing stage tests.""" -import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes - import time + import numpy as np -from keras.testing_infra import test_combinations +import tensorflow.compat.v2 as tf + from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_stage from keras.layers.preprocessing import preprocessing_test_utils +from keras.testing_infra import test_combinations @test_combinations.run_all_keras_modes(always_skip_v1=True) class PreprocessingStageTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_adapt(self): - - class PL(base_preprocessing_layer.PreprocessingLayer): - - def __init__(self, **kwargs): - self.adapt_time = None - self.adapt_count = 0 - super().__init__(**kwargs) - - def adapt(self, data, reset_state=True): - self.adapt_time = time.time() - self.adapt_count += 1 - - def call(self, inputs): - return inputs + 1. - - # Test with NumPy array - stage = preprocessing_stage.PreprocessingStage([ - PL(), - PL(), - PL(), - ]) - stage.adapt(np.ones((3, 4))) - self.assertEqual(stage.layers[0].adapt_count, 1) - self.assertEqual(stage.layers[1].adapt_count, 1) - self.assertEqual(stage.layers[2].adapt_count, 1) - self.assertLessEqual(stage.layers[0].adapt_time, stage.layers[1].adapt_time) - self.assertLessEqual(stage.layers[1].adapt_time, stage.layers[2].adapt_time) - - # Check call - y = stage(tf.ones((3, 4))) - self.assertAllClose(y, np.ones((3, 4)) + 3.) - - # Test with dataset - adapt_data = tf.data.Dataset.from_tensor_slices(np.ones((3, 10))) - adapt_data = adapt_data.batch(2) # 5 batches of 2 samples - - stage.adapt(adapt_data) - self.assertEqual(stage.layers[0].adapt_count, 2) - self.assertEqual(stage.layers[1].adapt_count, 2) - self.assertEqual(stage.layers[2].adapt_count, 2) - self.assertLess(stage.layers[0].adapt_time, stage.layers[1].adapt_time) - self.assertLess(stage.layers[1].adapt_time, stage.layers[2].adapt_time) - - # Test error with bad data - with self.assertRaisesRegex(ValueError, 'requires a '): - stage.adapt(None) - - -if __name__ == '__main__': - tf.test.main() + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_adapt(self): + class PL(base_preprocessing_layer.PreprocessingLayer): + def __init__(self, **kwargs): + self.adapt_time = None + self.adapt_count = 0 + super().__init__(**kwargs) + + def adapt(self, data, reset_state=True): + self.adapt_time = time.time() + self.adapt_count += 1 + + def call(self, inputs): + return inputs + 1.0 + + # Test with NumPy array + stage = preprocessing_stage.PreprocessingStage( + [ + PL(), + PL(), + PL(), + ] + ) + stage.adapt(np.ones((3, 4))) + self.assertEqual(stage.layers[0].adapt_count, 1) + self.assertEqual(stage.layers[1].adapt_count, 1) + self.assertEqual(stage.layers[2].adapt_count, 1) + self.assertLessEqual( + stage.layers[0].adapt_time, stage.layers[1].adapt_time + ) + self.assertLessEqual( + stage.layers[1].adapt_time, stage.layers[2].adapt_time + ) + + # Check call + y = stage(tf.ones((3, 4))) + self.assertAllClose(y, np.ones((3, 4)) + 3.0) + + # Test with dataset + adapt_data = tf.data.Dataset.from_tensor_slices(np.ones((3, 10))) + adapt_data = adapt_data.batch(2) # 5 batches of 2 samples + + stage.adapt(adapt_data) + self.assertEqual(stage.layers[0].adapt_count, 2) + self.assertEqual(stage.layers[1].adapt_count, 2) + self.assertEqual(stage.layers[2].adapt_count, 2) + self.assertLess(stage.layers[0].adapt_time, stage.layers[1].adapt_time) + self.assertLess(stage.layers[1].adapt_time, stage.layers[2].adapt_time) + + # Test error with bad data + with self.assertRaisesRegex(ValueError, "requires a "): + stage.adapt(None) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/preprocessing_test_utils.py b/keras/layers/preprocessing/preprocessing_test_utils.py index ae5366c1a4ae..8862241e4f1b 100644 --- a/keras/layers/preprocessing/preprocessing_test_utils.py +++ b/keras/layers/preprocessing/preprocessing_test_utils.py @@ -15,156 +15,189 @@ """Tests utils for preprocessing layers.""" import collections + import numpy as np import tensorflow.compat.v2 as tf class ArrayLike: + def __init__(self, values): + self.values = values - def __init__(self, values): - self.values = values - - def __array__(self): - return np.array(self.values) + def __array__(self): + return np.array(self.values) class PreprocessingLayerTest(tf.test.TestCase): - """Base test class for preprocessing layer API validation.""" - # TODO(b/137303934): Consider incorporating something like this Close vs All - # behavior into core tf.test.TestCase. - - def assertAllCloseOrEqual(self, a, b, msg=None): - """Asserts that elements are close (if numeric) or equal (if string).""" - if a is None or b is None: - self.assertAllEqual(a, b, msg=msg) - elif isinstance(a, (list, tuple)): - self.assertEqual(len(a), len(b)) - for a_value, b_value in zip(a, b): - self.assertAllCloseOrEqual(a_value, b_value, msg=msg) - elif isinstance(a, collections.abc.Mapping): - self.assertEqual(len(a), len(b)) - for key, a_value in a.items(): - b_value = b[key] - error_message = "{} ({})".format(msg, key) if msg else None - self.assertAllCloseOrEqual(a_value, b_value, error_message) - elif (isinstance(a, float) or - hasattr(a, "dtype") and np.issubdtype(a.dtype, np.number)): - self.assertAllClose(a, b, msg=msg) - else: - self.assertAllEqual(a, b, msg=msg) - - def assert_extracted_output_equal(self, combiner, acc1, acc2, msg=None): - data_1 = combiner.extract(acc1) - data_2 = combiner.extract(acc2) - self.assertAllCloseOrEqual(data_1, data_2, msg=msg) - - # This is an injection seam so that tests like TextVectorizationTest can - # define their own methods for asserting that accumulators are equal. - compare_accumulators = assertAllCloseOrEqual - - def validate_accumulator_computation(self, combiner, data, expected): - """Validate that various combinations of compute and merge are identical.""" - if len(data) < 4: - raise AssertionError( - f"Data must have at least 4 elements. Received " - f"len(data)={len(data)}.") - data_0 = np.array([data[0]]) - data_1 = np.array([data[1]]) - data_2 = np.array(data[2:]) - - single_compute = combiner.compute(data) - - all_merge = combiner.merge([ - combiner.compute(data_0), - combiner.compute(data_1), - combiner.compute(data_2) - ]) - - self.compare_accumulators( - single_compute, - all_merge, - msg="Sharding data should not change the data output.") - - unordered_all_merge = combiner.merge([ - combiner.compute(data_1), - combiner.compute(data_2), - combiner.compute(data_0) - ]) - self.compare_accumulators( - all_merge, - unordered_all_merge, - msg="The order of merge arguments should not change the data " - "output.") - - hierarchical_merge = combiner.merge([ - combiner.compute(data_1), - combiner.merge([combiner.compute(data_2), - combiner.compute(data_0)]) - ]) - self.compare_accumulators( - all_merge, - hierarchical_merge, - msg="Nesting merge arguments should not change the data output.") - - nested_compute = combiner.compute( - data_0, combiner.compute(data_1, combiner.compute(data_2))) - self.compare_accumulators( - all_merge, - nested_compute, - msg="Nesting compute arguments should not change the data output.") - - mixed_compute = combiner.merge([ - combiner.compute(data_0), - combiner.compute(data_1, combiner.compute(data_2)) - ]) - self.compare_accumulators( - all_merge, - mixed_compute, - msg="Mixing merge and compute calls should not change the data " - "output.") - - single_merge = combiner.merge([ - combiner.merge([combiner.compute(data_0)]), - combiner.compute(data_1, combiner.compute(data_2)) - ]) - self.compare_accumulators( - all_merge, - single_merge, - msg="Calling merge with a data length of 1 should not change the data " - "output.") - - self.compare_accumulators( - expected, - all_merge, - msg="Calculated accumulators " - "did not match expected accumulator.") - - def validate_accumulator_extract(self, combiner, data, expected): - """Validate that the expected results of computing and extracting.""" - acc = combiner.compute(data) - extracted_data = combiner.extract(acc) - self.assertAllCloseOrEqual(expected, extracted_data) - - def validate_accumulator_extract_and_restore(self, combiner, data, expected): - """Validate that the extract<->restore loop loses no data.""" - acc = combiner.compute(data) - extracted_data = combiner.extract(acc) - restored_acc = combiner.restore(extracted_data) - self.assert_extracted_output_equal(combiner, acc, restored_acc) - self.assertAllCloseOrEqual(expected, combiner.extract(restored_acc)) - - def validate_accumulator_serialize_and_deserialize(self, combiner, data, - expected): - """Validate that the serialize<->deserialize loop loses no data.""" - acc = combiner.compute(data) - serialized_data = combiner.serialize(acc) - deserialized_data = combiner.deserialize(serialized_data) - self.compare_accumulators(acc, deserialized_data) - self.compare_accumulators(expected, deserialized_data) - - def validate_accumulator_uniqueness(self, combiner, data): - """Validate that every call to compute creates a unique accumulator.""" - acc = combiner.compute(data) - acc2 = combiner.compute(data) - self.assertIsNot(acc, acc2) - self.compare_accumulators(acc, acc2) + """Base test class for preprocessing layer API validation.""" + + # TODO(b/137303934): Consider incorporating something like this Close vs All + # behavior into core tf.test.TestCase. + + def assertAllCloseOrEqual(self, a, b, msg=None): + """Asserts that elements are close (if numeric) or equal (if string).""" + if a is None or b is None: + self.assertAllEqual(a, b, msg=msg) + elif isinstance(a, (list, tuple)): + self.assertEqual(len(a), len(b)) + for a_value, b_value in zip(a, b): + self.assertAllCloseOrEqual(a_value, b_value, msg=msg) + elif isinstance(a, collections.abc.Mapping): + self.assertEqual(len(a), len(b)) + for key, a_value in a.items(): + b_value = b[key] + error_message = f"{msg} ({key})" if msg else None + self.assertAllCloseOrEqual(a_value, b_value, error_message) + elif ( + isinstance(a, float) + or hasattr(a, "dtype") + and np.issubdtype(a.dtype, np.number) + ): + self.assertAllClose(a, b, msg=msg) + else: + self.assertAllEqual(a, b, msg=msg) + + def assert_extracted_output_equal(self, combiner, acc1, acc2, msg=None): + data_1 = combiner.extract(acc1) + data_2 = combiner.extract(acc2) + self.assertAllCloseOrEqual(data_1, data_2, msg=msg) + + # This is an injection seam so that tests like TextVectorizationTest can + # define their own methods for asserting that accumulators are equal. + compare_accumulators = assertAllCloseOrEqual + + def validate_accumulator_computation(self, combiner, data, expected): + """Validate that various combinations of compute and merge are + identical.""" + if len(data) < 4: + raise AssertionError( + "Data must have at least 4 elements. Received " + f"len(data)={len(data)}." + ) + data_0 = np.array([data[0]]) + data_1 = np.array([data[1]]) + data_2 = np.array(data[2:]) + + single_compute = combiner.compute(data) + + all_merge = combiner.merge( + [ + combiner.compute(data_0), + combiner.compute(data_1), + combiner.compute(data_2), + ] + ) + + self.compare_accumulators( + single_compute, + all_merge, + msg="Sharding data should not change the data output.", + ) + + unordered_all_merge = combiner.merge( + [ + combiner.compute(data_1), + combiner.compute(data_2), + combiner.compute(data_0), + ] + ) + self.compare_accumulators( + all_merge, + unordered_all_merge, + msg=( + "The order of merge arguments should not change the data " + "output." + ), + ) + + hierarchical_merge = combiner.merge( + [ + combiner.compute(data_1), + combiner.merge( + [combiner.compute(data_2), combiner.compute(data_0)] + ), + ] + ) + self.compare_accumulators( + all_merge, + hierarchical_merge, + msg="Nesting merge arguments should not change the data output.", + ) + + nested_compute = combiner.compute( + data_0, combiner.compute(data_1, combiner.compute(data_2)) + ) + self.compare_accumulators( + all_merge, + nested_compute, + msg="Nesting compute arguments should not change the data output.", + ) + + mixed_compute = combiner.merge( + [ + combiner.compute(data_0), + combiner.compute(data_1, combiner.compute(data_2)), + ] + ) + self.compare_accumulators( + all_merge, + mixed_compute, + msg=( + "Mixing merge and compute calls should not change the data " + "output." + ), + ) + + single_merge = combiner.merge( + [ + combiner.merge([combiner.compute(data_0)]), + combiner.compute(data_1, combiner.compute(data_2)), + ] + ) + self.compare_accumulators( + all_merge, + single_merge, + msg=( + "Calling merge with a data length of 1 should not change " + "the data output." + ), + ) + + self.compare_accumulators( + expected, + all_merge, + msg="Calculated accumulators did not match expected accumulator.", + ) + + def validate_accumulator_extract(self, combiner, data, expected): + """Validate that the expected results of computing and extracting.""" + acc = combiner.compute(data) + extracted_data = combiner.extract(acc) + self.assertAllCloseOrEqual(expected, extracted_data) + + def validate_accumulator_extract_and_restore( + self, combiner, data, expected + ): + """Validate that the extract<->restore loop loses no data.""" + acc = combiner.compute(data) + extracted_data = combiner.extract(acc) + restored_acc = combiner.restore(extracted_data) + self.assert_extracted_output_equal(combiner, acc, restored_acc) + self.assertAllCloseOrEqual(expected, combiner.extract(restored_acc)) + + def validate_accumulator_serialize_and_deserialize( + self, combiner, data, expected + ): + """Validate that the serialize<->deserialize loop loses no data.""" + acc = combiner.compute(data) + serialized_data = combiner.serialize(acc) + deserialized_data = combiner.deserialize(serialized_data) + self.compare_accumulators(acc, deserialized_data) + self.compare_accumulators(expected, deserialized_data) + + def validate_accumulator_uniqueness(self, combiner, data): + """Validate that every call to compute creates a unique accumulator.""" + acc = combiner.compute(data) + acc2 = combiner.compute(data) + self.assertIsNot(acc, acc2) + self.compare_accumulators(acc, acc2) diff --git a/keras/layers/preprocessing/preprocessing_utils.py b/keras/layers/preprocessing/preprocessing_utils.py index 4c60721d7235..b0f7cc94555e 100644 --- a/keras/layers/preprocessing/preprocessing_utils.py +++ b/keras/layers/preprocessing/preprocessing_utils.py @@ -14,10 +14,11 @@ # ============================================================================== """Utils for preprocessing layers.""" -from keras.utils import tf_utils import numpy as np import tensorflow.compat.v2 as tf +from keras.utils import tf_utils + INT = "int" ONE_HOT = "one_hot" MULTI_HOT = "multi_hot" @@ -26,128 +27,140 @@ def ensure_tensor(inputs, dtype=None): - """Ensures the input is a Tensor, SparseTensor or RaggedTensor.""" - if not isinstance(inputs, (tf.Tensor, tf.RaggedTensor, tf.SparseTensor)): - inputs = tf.convert_to_tensor(inputs, dtype) - if dtype is not None and inputs.dtype != dtype: - inputs = tf.cast(inputs, dtype) - return inputs + """Ensures the input is a Tensor, SparseTensor or RaggedTensor.""" + if not isinstance(inputs, (tf.Tensor, tf.RaggedTensor, tf.SparseTensor)): + inputs = tf.convert_to_tensor(inputs, dtype) + if dtype is not None and inputs.dtype != dtype: + inputs = tf.cast(inputs, dtype) + return inputs def listify_tensors(x): - """Convert any tensors or numpy arrays to lists for config serialization.""" - if tf.is_tensor(x): - x = x.numpy() - if isinstance(x, np.ndarray): - x = x.tolist() - return x + """Convert any tensors or numpy arrays to lists for config serialization.""" + if tf.is_tensor(x): + x = x.numpy() + if isinstance(x, np.ndarray): + x = x.tolist() + return x def sparse_bincount(inputs, depth, binary_output, dtype, count_weights=None): - """Apply binary or count encoding to an input and return a sparse tensor.""" - result = tf.sparse.bincount( - inputs, - weights=count_weights, - minlength=depth, - maxlength=depth, - axis=-1, - binary_output=binary_output) - result = tf.cast(result, dtype) - if inputs.shape.rank == 1: - output_shape = (depth,) - else: - batch_size = tf.shape(result)[0] - output_shape = (batch_size, depth) - result = tf.SparseTensor( - indices=result.indices, values=result.values, dense_shape=output_shape) - return result + """Apply binary or count encoding to an input and return a sparse tensor.""" + result = tf.sparse.bincount( + inputs, + weights=count_weights, + minlength=depth, + maxlength=depth, + axis=-1, + binary_output=binary_output, + ) + result = tf.cast(result, dtype) + if inputs.shape.rank == 1: + output_shape = (depth,) + else: + batch_size = tf.shape(result)[0] + output_shape = (batch_size, depth) + result = tf.SparseTensor( + indices=result.indices, values=result.values, dense_shape=output_shape + ) + return result def dense_bincount(inputs, depth, binary_output, dtype, count_weights=None): - """Apply binary or count encoding to an input.""" - result = tf.math.bincount( - inputs, - weights=count_weights, - minlength=depth, - maxlength=depth, - dtype=dtype, - axis=-1, - binary_output=binary_output) - if inputs.shape.rank == 1: - result.set_shape(tf.TensorShape((depth,))) - else: - batch_size = inputs.shape.as_list()[0] - result.set_shape(tf.TensorShape((batch_size, depth))) - return result + """Apply binary or count encoding to an input.""" + result = tf.math.bincount( + inputs, + weights=count_weights, + minlength=depth, + maxlength=depth, + dtype=dtype, + axis=-1, + binary_output=binary_output, + ) + if inputs.shape.rank == 1: + result.set_shape(tf.TensorShape((depth,))) + else: + batch_size = inputs.shape.as_list()[0] + result.set_shape(tf.TensorShape((batch_size, depth))) + return result def expand_dims(inputs, axis): - """Expand dims on sparse, ragged, or dense tensors.""" - if tf_utils.is_sparse(inputs): - return tf.sparse.expand_dims(inputs, axis) - else: - return tf.expand_dims(inputs, axis) - - -def encode_categorical_inputs(inputs, - output_mode, - depth, - dtype="float32", - sparse=False, - count_weights=None, - idf_weights=None): - """Encodes categoical inputs according to output_mode.""" - if output_mode == INT: - return tf.identity(tf.cast(inputs, dtype)) - - original_shape = inputs.shape - # In all cases, we should uprank scalar input to a single sample. - if inputs.shape.rank == 0: - inputs = expand_dims(inputs, -1) - # One hot will unprank only if the final output dimension is not already 1. - if output_mode == ONE_HOT: - if inputs.shape[-1] != 1: - inputs = expand_dims(inputs, -1) - - # TODO(b/190445202): remove output rank restriction. - if inputs.shape.rank > 2: - raise ValueError( - f"When output_mode is not `'int'`, maximum supported output rank is 2. " - f"Received output_mode {output_mode} and input shape {original_shape}, " - f"which would result in output rank {inputs.shape.rank}.") - - binary_output = output_mode in (MULTI_HOT, ONE_HOT) - if sparse: - bincounts = sparse_bincount(inputs, depth, binary_output, dtype, - count_weights) - else: - bincounts = dense_bincount(inputs, depth, binary_output, dtype, - count_weights) - - if output_mode != TF_IDF: - return bincounts - - if idf_weights is None: - raise ValueError( - f"When output mode is `'tf_idf'`, idf_weights must be provided. " - f"Received: output_mode={output_mode} and idf_weights={idf_weights}") - - if sparse: - value_weights = tf.gather(idf_weights, bincounts.indices[:, -1]) - return tf.SparseTensor(bincounts.indices, - value_weights * bincounts.values, - bincounts.dense_shape) - else: - return tf.multiply(bincounts, idf_weights) + """Expand dims on sparse, ragged, or dense tensors.""" + if tf_utils.is_sparse(inputs): + return tf.sparse.expand_dims(inputs, axis) + else: + return tf.expand_dims(inputs, axis) + + +def encode_categorical_inputs( + inputs, + output_mode, + depth, + dtype="float32", + sparse=False, + count_weights=None, + idf_weights=None, +): + """Encodes categoical inputs according to output_mode.""" + if output_mode == INT: + return tf.identity(tf.cast(inputs, dtype)) + + original_shape = inputs.shape + # In all cases, we should uprank scalar input to a single sample. + if inputs.shape.rank == 0: + inputs = expand_dims(inputs, -1) + # One hot will unprank only if the final output dimension is not already 1. + if output_mode == ONE_HOT: + if inputs.shape[-1] != 1: + inputs = expand_dims(inputs, -1) + + # TODO(b/190445202): remove output rank restriction. + if inputs.shape.rank > 2: + raise ValueError( + "When output_mode is not `'int'`, maximum supported output rank " + f"is 2. Received output_mode {output_mode} and input shape " + f"{original_shape}, " + f"which would result in output rank {inputs.shape.rank}." + ) + + binary_output = output_mode in (MULTI_HOT, ONE_HOT) + if sparse: + bincounts = sparse_bincount( + inputs, depth, binary_output, dtype, count_weights + ) + else: + bincounts = dense_bincount( + inputs, depth, binary_output, dtype, count_weights + ) + + if output_mode != TF_IDF: + return bincounts + + if idf_weights is None: + raise ValueError( + "When output mode is `'tf_idf'`, idf_weights must be provided. " + f"Received: output_mode={output_mode} and idf_weights={idf_weights}" + ) + + if sparse: + value_weights = tf.gather(idf_weights, bincounts.indices[:, -1]) + return tf.SparseTensor( + bincounts.indices, + value_weights * bincounts.values, + bincounts.dense_shape, + ) + else: + return tf.multiply(bincounts, idf_weights) def compute_shape_for_encode_categorical(shape, output_mode, depth): - """Computes the output shape of `encode_categorical_inputs`.""" - if output_mode == INT: - return tf.TensorShape(shape) - if not shape: - return tf.TensorShape([depth]) - if output_mode == ONE_HOT and shape[-1] != 1: - return tf.TensorShape(shape + [depth]) - else: - return tf.TensorShape(shape[:-1] + [depth]) + """Computes the output shape of `encode_categorical_inputs`.""" + if output_mode == INT: + return tf.TensorShape(shape) + if not shape: + return tf.TensorShape([depth]) + if output_mode == ONE_HOT and shape[-1] != 1: + return tf.TensorShape(shape + [depth]) + else: + return tf.TensorShape(shape[:-1] + [depth]) diff --git a/keras/layers/preprocessing/preprocessing_utils_test.py b/keras/layers/preprocessing/preprocessing_utils_test.py index 2394f59d5169..5e48a0ca19ff 100644 --- a/keras/layers/preprocessing/preprocessing_utils_test.py +++ b/keras/layers/preprocessing/preprocessing_utils_test.py @@ -14,112 +14,121 @@ # ============================================================================== """Tests for preprocessing utils.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras.layers.preprocessing import preprocessing_utils from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes(always_skip_v1=True) class ListifyTensorsTest(test_combinations.TestCase): + def test_tensor_input(self): + inputs = tf.constant([0, 1, 2, 3, 4]) + outputs = preprocessing_utils.listify_tensors(inputs) + self.assertAllEqual([0, 1, 2, 3, 4], outputs) + self.assertIsInstance(outputs, list) - def test_tensor_input(self): - inputs = tf.constant([0, 1, 2, 3, 4]) - outputs = preprocessing_utils.listify_tensors(inputs) - self.assertAllEqual([0, 1, 2, 3, 4], outputs) - self.assertIsInstance(outputs, list) - - def test_numpy_input(self): - inputs = np.array([0, 1, 2, 3, 4]) - outputs = preprocessing_utils.listify_tensors(inputs) - self.assertAllEqual([0, 1, 2, 3, 4], outputs) - self.assertIsInstance(outputs, list) + def test_numpy_input(self): + inputs = np.array([0, 1, 2, 3, 4]) + outputs = preprocessing_utils.listify_tensors(inputs) + self.assertAllEqual([0, 1, 2, 3, 4], outputs) + self.assertIsInstance(outputs, list) @test_combinations.run_all_keras_modes class EncodeCategoricalInputsTest(test_combinations.TestCase): - - def test_int_encoding(self): - inputs = tf.constant([0, 1, 2]) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, output_mode='int', depth=4) - self.assertAllEqual([0, 1, 2], outputs) - - @parameterized.named_parameters( - ('sparse', True), - ('dense', False), - ) - def test_one_hot_encoding(self, sparse): - inputs = tf.constant([0, 1, 2]) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, output_mode='one_hot', depth=4, sparse=sparse) - if sparse: - outputs = tf.sparse.to_dense(outputs) - self.assertAllEqual([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]], outputs) - - @parameterized.named_parameters( - ('sparse', True), - ('dense', False), - ) - def test_multi_hot_encoding(self, sparse): - inputs = tf.constant([0, 1, 2]) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, output_mode='multi_hot', depth=4, sparse=sparse) - if sparse: - outputs = tf.sparse.to_dense(outputs) - self.assertAllEqual([1, 1, 1, 0], outputs) - - @parameterized.named_parameters( - ('sparse', True), - ('dense', False), - ) - def test_count_encoding(self, sparse): - inputs = tf.constant([0, 1, 1, 2, 2, 2]) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, output_mode='count', depth=4, sparse=sparse) - if sparse: - outputs = tf.sparse.to_dense(outputs) - self.assertAllEqual([1, 2, 3, 0], outputs) - - @parameterized.named_parameters( - ('sparse', True), - ('dense', False), - ) - def test_tf_idf_encoding(self, sparse): - inputs = tf.constant([0, 1, 1, 2, 2, 2]) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, - output_mode='tf_idf', - depth=4, - sparse=sparse, - idf_weights=[0.1, 1.0, 10.0, 0]) - if sparse: - outputs = tf.sparse.to_dense(outputs) - self.assertAllClose([.1, 2, 30, 0], outputs) - - def test_output_dtype(self): - inputs = tf.constant([0, 1, 2], dtype=tf.dtypes.int32) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, output_mode='int', depth=4, dtype=tf.dtypes.int64) - self.assertAllEqual(outputs.dtype, tf.dtypes.int64) - outputs = preprocessing_utils.encode_categorical_inputs( - inputs, output_mode='one_hot', depth=4, dtype=tf.dtypes.float64) - self.assertAllEqual(outputs.dtype, tf.dtypes.float64) - - def test_rank_3_output_fails(self): - inputs = tf.constant([[[0]], [[1]], [[2]]]) - with self.assertRaisesRegex(ValueError, - 'maximum supported output rank is 2'): - preprocessing_utils.encode_categorical_inputs(inputs, 'multi_hot', 4, - 'float32') - - def test_tf_idf_output_with_no_weights_fails(self): - inputs = tf.constant([0, 1, 2]) - with self.assertRaisesRegex(ValueError, 'idf_weights must be provided'): - preprocessing_utils.encode_categorical_inputs(inputs, 'tf_idf', 4, - 'float32') - - -if __name__ == '__main__': - tf.test.main() + def test_int_encoding(self): + inputs = tf.constant([0, 1, 2]) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, output_mode="int", depth=4 + ) + self.assertAllEqual([0, 1, 2], outputs) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_one_hot_encoding(self, sparse): + inputs = tf.constant([0, 1, 2]) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, output_mode="one_hot", depth=4, sparse=sparse + ) + if sparse: + outputs = tf.sparse.to_dense(outputs) + self.assertAllEqual([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]], outputs) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_multi_hot_encoding(self, sparse): + inputs = tf.constant([0, 1, 2]) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, output_mode="multi_hot", depth=4, sparse=sparse + ) + if sparse: + outputs = tf.sparse.to_dense(outputs) + self.assertAllEqual([1, 1, 1, 0], outputs) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_count_encoding(self, sparse): + inputs = tf.constant([0, 1, 1, 2, 2, 2]) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, output_mode="count", depth=4, sparse=sparse + ) + if sparse: + outputs = tf.sparse.to_dense(outputs) + self.assertAllEqual([1, 2, 3, 0], outputs) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_tf_idf_encoding(self, sparse): + inputs = tf.constant([0, 1, 1, 2, 2, 2]) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, + output_mode="tf_idf", + depth=4, + sparse=sparse, + idf_weights=[0.1, 1.0, 10.0, 0], + ) + if sparse: + outputs = tf.sparse.to_dense(outputs) + self.assertAllClose([0.1, 2, 30, 0], outputs) + + def test_output_dtype(self): + inputs = tf.constant([0, 1, 2], dtype=tf.dtypes.int32) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, output_mode="int", depth=4, dtype=tf.dtypes.int64 + ) + self.assertAllEqual(outputs.dtype, tf.dtypes.int64) + outputs = preprocessing_utils.encode_categorical_inputs( + inputs, output_mode="one_hot", depth=4, dtype=tf.dtypes.float64 + ) + self.assertAllEqual(outputs.dtype, tf.dtypes.float64) + + def test_rank_3_output_fails(self): + inputs = tf.constant([[[0]], [[1]], [[2]]]) + with self.assertRaisesRegex( + ValueError, "maximum supported output rank is 2" + ): + preprocessing_utils.encode_categorical_inputs( + inputs, "multi_hot", 4, "float32" + ) + + def test_tf_idf_output_with_no_weights_fails(self): + inputs = tf.constant([0, 1, 2]) + with self.assertRaisesRegex(ValueError, "idf_weights must be provided"): + preprocessing_utils.encode_categorical_inputs( + inputs, "tf_idf", 4, "float32" + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/preprocessing/string_lookup.py b/keras/layers/preprocessing/string_lookup.py index b0fd1f01cc6c..5bf7389b8539 100644 --- a/keras/layers/preprocessing/string_lookup.py +++ b/keras/layers/preprocessing/string_lookup.py @@ -14,388 +14,404 @@ # ============================================================================== """Keras string lookup preprocessing layer.""" +import numpy as np import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes -import numpy as np from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import index_lookup + +# isort: off from tensorflow.python.util.tf_export import keras_export @keras_export( "keras.layers.StringLookup", "keras.layers.experimental.preprocessing.StringLookup", - v1=[]) + v1=[], +) class StringLookup(index_lookup.IndexLookup): - """A preprocessing layer which maps string features to integer indices. - - This layer translates a set of arbitrary strings into integer output via a - table-based vocabulary lookup. This layer will perform no splitting or - transformation of input strings. For a layer than can split and tokenize - natural language, see the `TextVectorization` layer. - - The vocabulary for the layer must be either supplied on construction or - learned via `adapt()`. During `adapt()`, the layer will analyze a data set, - determine the frequency of individual strings tokens, and create a vocabulary - from them. If the vocabulary is capped in size, the most frequent tokens will - be used to create the vocabulary and all others will be treated as - out-of-vocabulary (OOV). - - There are two possible output modes for the layer. - When `output_mode` is `"int"`, - input strings are converted to their index in the vocabulary (an integer). - When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input strings - are encoded into an array where each dimension corresponds to an element in - the vocabulary. - - The vocabulary can optionally contain a mask token as well as an OOV token - (which can optionally occupy multiple indices in the vocabulary, as set - by `num_oov_indices`). - The position of these tokens in the vocabulary is fixed. When `output_mode` is - `"int"`, the vocabulary will begin with the mask token (if set), followed by - OOV indices, followed by the rest of the vocabulary. When `output_mode` is - `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with OOV - indices and instances of the mask token will be dropped. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - max_tokens: Maximum size of the vocabulary for this layer. This should only - be specified when adapting the vocabulary or when setting - `pad_to_max_tokens=True`. If None, there is no cap on the size of the - vocabulary. Note that this size includes the OOV and mask tokens. Defaults - to None. - num_oov_indices: The number of out-of-vocabulary tokens to use. If this - value is more than 1, OOV inputs are hashed to determine their OOV value. - If this value is 0, OOV inputs will cause an error when calling the layer. - Defaults to 1. - mask_token: A token that represents masked inputs. When `output_mode` is - `"int"`, the token is included in vocabulary and mapped to index 0. In - other output modes, the token will not appear in the vocabulary and - instances of the mask token in the input will be dropped. If set to None, - no mask term will be added. Defaults to `None`. - oov_token: Only used when `invert` is True. The token to return for OOV - indices. Defaults to `"[UNK]"`. - vocabulary: Optional. Either an array of strings or a string path to a text - file. If passing an array, can pass a tuple, list, 1D numpy array, or 1D - tensor containing the string vocbulary terms. If passing a file path, the - file should contain one line per term in the vocabulary. If this argument - is set, there is no need to `adapt()` the layer. - idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, 1D - numpy array, or 1D tensor or the same length as the vocabulary, containing - the floating point inverse document frequency weights, which will be - multiplied by per sample term counts for the final `tf_idf` weight. If the - `vocabulary` argument is set, and `output_mode` is `"tf_idf"`, this - argument must be supplied. - invert: Only valid when `output_mode` is `"int"`. If True, this layer will - map indices to vocabulary items instead of mapping vocabulary items to - indices. Default to False. - output_mode: Specification for the output of the layer. Defaults to `"int"`. - Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or - `"tf_idf"` configuring the layer as follows: - - `"int"`: Return the raw integer indices of the input tokens. - - `"one_hot"`: Encodes each individual element in the input into an - array the same size as the vocabulary, containing a 1 at the element - index. If the last dimension is size 1, will encode on that dimension. - If the last dimension is not size 1, will append a new dimension for - the encoded output. - - `"multi_hot"`: Encodes each sample in the input into a single array - the same size as the vocabulary, containing a 1 for each vocabulary - term present in the sample. Treats the last dimension as the sample - dimension, if input shape is (..., sample_length), output shape will - be (..., num_tokens). - - `"count"`: As `"multi_hot"`, but the int array contains a count of the - number of times the token at that index appeared in the sample. - - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to - find the value in each token slot. - For `"int"` output, any shape of input and output is supported. For all - other output modes, currently only output up to rank 2 is supported. - pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, - `"count"`, or `"tf_idf"`. If True, the output will have its feature axis - padded to `max_tokens` even if the number of unique tokens in the - vocabulary is less than max_tokens, resulting in a tensor of shape - [batch_size, max_tokens] regardless of vocabulary size. Defaults to False. - sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, - `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a - dense `Tensor`. Defaults to False. - - Examples: - - **Creating a lookup layer with a known vocabulary** - - This example creates a lookup layer with a pre-existing vocabulary. - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) - >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab) - >>> layer(data) - - - **Creating a lookup layer with an adapted vocabulary** - - This example creates a lookup layer and generates the vocabulary by analyzing - the dataset. - - >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) - >>> layer = tf.keras.layers.StringLookup() - >>> layer.adapt(data) - >>> layer.get_vocabulary() - ['[UNK]', 'd', 'z', 'c', 'b', 'a'] - - Note that the OOV token `"[UNK]"` has been added to the vocabulary. - The remaining tokens are sorted by frequency - (`"d"`, which has 2 occurrences, is first) then by inverse sort order. - - >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) - >>> layer = tf.keras.layers.StringLookup() - >>> layer.adapt(data) - >>> layer(data) - - - **Lookups with multiple OOV indices** - - This example demonstrates how to use a lookup layer with multiple OOV indices. - When a layer is created with more than one OOV index, any OOV values are - hashed into the number of OOV buckets, distributing OOV values in a - deterministic fashion across the set. - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant([["a", "c", "d"], ["m", "z", "b"]]) - >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab, num_oov_indices=2) - >>> layer(data) - - - Note that the output for OOV value 'm' is 0, while the output for OOV value - 'z' is 1. The in-vocab terms have their output index increased by 1 from - earlier examples (a maps to 2, etc) in order to make space for the extra OOV - value. - - **One-hot output** - - Configure the layer with `output_mode='one_hot'`. Note that the first - `num_oov_indices` dimensions in the ont_hot encoding represent OOV values. - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant(["a", "b", "c", "d", "z"]) - >>> layer = tf.keras.layers.StringLookup( - ... vocabulary=vocab, output_mode='one_hot') - >>> layer(data) - - - **Multi-hot output** - - Configure the layer with `output_mode='multi_hot'`. Note that the first - `num_oov_indices` dimensions in the multi_hot encoding represent OOV values. - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) - >>> layer = tf.keras.layers.StringLookup( - ... vocabulary=vocab, output_mode='multi_hot') - >>> layer(data) - - - **Token count output** - - Configure the layer with `output_mode='count'`. As with multi_hot output, the - first `num_oov_indices` dimensions in the output represent OOV values. - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) - >>> layer = tf.keras.layers.StringLookup( - ... vocabulary=vocab, output_mode='count') - >>> layer(data) - - - **TF-IDF output** - - Configure the layer with `output_mode="tf_idf"`. As with multi_hot output, the - first `num_oov_indices` dimensions in the output represent OOV values. - - Each token bin will output `token_count * idf_weight`, where the idf weights - are the inverse document frequency weights per token. These should be provided - along with the vocabulary. Note that the `idf_weight` for OOV values will - default to the average of all idf weights passed in. - - >>> vocab = ["a", "b", "c", "d"] - >>> idf_weights = [0.25, 0.75, 0.6, 0.4] - >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) - >>> layer = tf.keras.layers.StringLookup(output_mode="tf_idf") - >>> layer.set_vocabulary(vocab, idf_weights=idf_weights) - >>> layer(data) - - - To specify the idf weights for oov values, you will need to pass the entire - vocabularly including the leading oov token. - - >>> vocab = ["[UNK]", "a", "b", "c", "d"] - >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4] - >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) - >>> layer = tf.keras.layers.StringLookup(output_mode="tf_idf") - >>> layer.set_vocabulary(vocab, idf_weights=idf_weights) - >>> layer(data) - - - When adapting the layer in `"tf_idf"` mode, each input sample will be - considered a document, and IDF weight per token will be calculated as - `log(1 + num_documents / (1 + token_document_count))`. - - **Inverse lookup** - - This example demonstrates how to map indices to strings using this layer. (You - can also use `adapt()` with `inverse=True`, but for simplicity we'll pass the - vocab in this example.) - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant([[1, 3, 4], [4, 0, 2]]) - >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab, invert=True) - >>> layer(data) - - - Note that the first index correspond to the oov token by default. - - - **Forward and inverse lookup pairs** - - This example demonstrates how to use the vocabulary of a standard lookup - layer to create an inverse lookup layer. - - >>> vocab = ["a", "b", "c", "d"] - >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) - >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab) - >>> i_layer = tf.keras.layers.StringLookup(vocabulary=vocab, invert=True) - >>> int_data = layer(data) - >>> i_layer(int_data) - - - In this example, the input value `"z"` resulted in an output of `"[UNK]"`, - since 1000 was not in the vocabulary - it got represented as an OOV, and all - OOV values are returned as `"[UNK]"` in the inverse layer. Also, note that - for the inverse to work, you must have already set the forward layer - vocabulary either directly or via `adapt()` before calling `get_vocabulary()`. - """ - - def __init__(self, - max_tokens=None, - num_oov_indices=1, - mask_token=None, - oov_token="[UNK]", - vocabulary=None, - idf_weights=None, - encoding=None, - invert=False, - output_mode="int", - sparse=False, - pad_to_max_tokens=False, - **kwargs): - # Legacy versions of the StringLookup layer set layer dtype to string, - # instead of the output type. If we see this, clear it. - if "dtype" in kwargs and (kwargs["dtype"] == tf.string or - kwargs["dtype"] == "string"): - del kwargs["dtype"] - - if encoding is None: - encoding = "utf-8" - - self.encoding = encoding - - super().__init__( - max_tokens=max_tokens, - num_oov_indices=num_oov_indices, - mask_token=mask_token, - oov_token=oov_token, - vocabulary=vocabulary, - vocabulary_dtype=tf.string, - idf_weights=idf_weights, - invert=invert, - output_mode=output_mode, - sparse=sparse, - pad_to_max_tokens=pad_to_max_tokens, - **kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell("StringLookup").set(True) - - def get_config(self): - config = {"encoding": self.encoding} - base_config = super().get_config() - # There is only one valid dtype for strings, so we don't expose this. - del base_config["vocabulary_dtype"] - return dict(list(base_config.items()) + list(config.items())) - - # We override this method solely to generate a docstring. - def adapt(self, data, batch_size=None, steps=None): - """Computes a vocabulary of string terms from tokens in a dataset. - - Calling `adapt()` on a `StringLookup` layer is an alternative to passing in - a precomputed vocabulary on construction via the `vocabulary` argument. A - `StringLookup` layer should always be either adapted over a dataset or - supplied with a vocabulary. - - During `adapt()`, the layer will build a vocabulary of all string tokens - seen in the dataset, sorted by occurance count, with ties broken by sort - order of the tokens (high to low). At the end of `adapt()`, if `max_tokens` - is set, the voculary wil be truncated to `max_tokens` size. For example, - adapting a layer with `max_tokens=1000` will compute the 1000 most frequent - tokens occurring in the input dataset. If `output_mode='tf-idf'`, `adapt()` - will also learn the document frequencies of each token in the input dataset. - - In order to make `StringLookup` efficient in any distribution context, the - vocabulary is kept static with respect to any compiled `tf.Graph`s that - call the layer. As a consequence, if the layer is adapted a second time, - any models using the layer should be re-compiled. For more information - see `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. - - `adapt()` is meant only as a single machine utility to compute layer state. - To analyze a dataset that cannot fit on a single machine, see - [Tensorflow Transform](https://www.tensorflow.org/tfx/transform/get_started) - for a multi-machine, map-reduce solution. - - Arguments: - data: The data to train on. It can be passed either as a - `tf.data.Dataset`, or as a numpy array. - batch_size: Integer or `None`. - Number of samples per state update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - steps: Integer or `None`. - Total number of steps (batches of samples) - When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps' is None, the epoch will run until - the input dataset is exhausted. When passing an infinitely - repeating dataset, you must specify the `steps` argument. This - argument is not supported with array inputs. + """A preprocessing layer which maps string features to integer indices. + + This layer translates a set of arbitrary strings into integer output via a + table-based vocabulary lookup. This layer will perform no splitting or + transformation of input strings. For a layer that can split and tokenize + natural language, see the `tf.keras.layers.TextVectorization` layer. + + The vocabulary for the layer must be either supplied on construction or + learned via `adapt()`. During `adapt()`, the layer will analyze a data set, + determine the frequency of individual strings tokens, and create a + vocabulary from them. If the vocabulary is capped in size, the most frequent + tokens will be used to create the vocabulary and all others will be treated + as out-of-vocabulary (OOV). + + There are two possible output modes for the layer. + When `output_mode` is `"int"`, + input strings are converted to their index in the vocabulary (an integer). + When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input strings + are encoded into an array where each dimension corresponds to an element in + the vocabulary. + + The vocabulary can optionally contain a mask token as well as an OOV token + (which can optionally occupy multiple indices in the vocabulary, as set + by `num_oov_indices`). + The position of these tokens in the vocabulary is fixed. When `output_mode` + is `"int"`, the vocabulary will begin with the mask token (if set), followed + by OOV indices, followed by the rest of the vocabulary. When `output_mode` + is `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with + OOV indices and instances of the mask token will be dropped. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + max_tokens: Maximum size of the vocabulary for this layer. This should + only be specified when adapting the vocabulary or when setting + `pad_to_max_tokens=True`. If None, there is no cap on the size of the + vocabulary. Note that this size includes the OOV and mask tokens. + Defaults to `None`. + num_oov_indices: The number of out-of-vocabulary tokens to use. If this + value is more than 1, OOV inputs are hashed to determine their OOV + value. If this value is 0, OOV inputs will cause an error when calling + the layer. Defaults to `1`. + mask_token: A token that represents masked inputs. When `output_mode` is + `"int"`, the token is included in vocabulary and mapped to index 0. In + other output modes, the token will not appear in the vocabulary and + instances of the mask token in the input will be dropped. If set to + None, no mask term will be added. Defaults to `None`. + oov_token: Only used when `invert` is True. The token to return for OOV + indices. Defaults to `"[UNK]"`. + vocabulary: Optional. Either an array of strings or a string path to a + text file. If passing an array, can pass a tuple, list, 1D numpy array, + or 1D tensor containing the string vocabulary terms. If passing a file + path, the file should contain one line per term in the vocabulary. If + this argument is set, there is no need to `adapt()` the layer. + idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, + 1D numpy array, or 1D tensor or the same length as the vocabulary, + containing the floating point inverse document frequency weights, which + will be multiplied by per sample term counts for the final `tf_idf` + weight. If the `vocabulary` argument is set, and `output_mode` is + `"tf_idf"`, this argument must be supplied. + invert: Only valid when `output_mode` is `"int"`. If True, this layer will + map indices to vocabulary items instead of mapping vocabulary items to + indices. Defaults to `False`. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + configuring the layer as follows: + - `"int"`: Return the raw integer indices of the input tokens. + - `"one_hot"`: Encodes each individual element in the input into an + array the same size as the vocabulary, containing a 1 at the element + index. If the last dimension is size 1, will encode on that + dimension. If the last dimension is not size 1, will append a new + dimension for the encoded output. + - `"multi_hot"`: Encodes each sample in the input into a single array + the same size as the vocabulary, containing a 1 for each vocabulary + term present in the sample. Treats the last dimension as the sample + dimension, if input shape is (..., sample_length), output shape will + be (..., num_tokens). + - `"count"`: As `"multi_hot"`, but the int array contains a count of + the number of times the token at that index appeared in the sample. + - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to + find the value in each token slot. + For `"int"` output, any shape of input and output is supported. For all + other output modes, currently only output up to rank 2 is supported. + Defaults to `"int"`. + pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, + `"count"`, or `"tf_idf"`. If True, the output will have its feature axis + padded to `max_tokens` even if the number of unique tokens in the + vocabulary is less than max_tokens, resulting in a tensor of shape + [batch_size, max_tokens] regardless of vocabulary size. Defaults to + `False`. + sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, + `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a + dense `Tensor`. Defaults to `False`. + encoding: Optional. The text encoding to use to interpret the input + strings. Defaults to `"utf-8"`. + + Examples: + + **Creating a lookup layer with a known vocabulary** + + This example creates a lookup layer with a pre-existing vocabulary. + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) + >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab) + >>> layer(data) + + + **Creating a lookup layer with an adapted vocabulary** + + This example creates a lookup layer and generates the vocabulary by + analyzing the dataset. + + >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) + >>> layer = tf.keras.layers.StringLookup() + >>> layer.adapt(data) + >>> layer.get_vocabulary() + ['[UNK]', 'd', 'z', 'c', 'b', 'a'] + + Note that the OOV token `"[UNK]"` has been added to the vocabulary. + The remaining tokens are sorted by frequency + (`"d"`, which has 2 occurrences, is first) then by inverse sort order. + + >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) + >>> layer = tf.keras.layers.StringLookup() + >>> layer.adapt(data) + >>> layer(data) + + + **Lookups with multiple OOV indices** + + This example demonstrates how to use a lookup layer with multiple OOV + indices. When a layer is created with more than one OOV index, any OOV + values are hashed into the number of OOV buckets, distributing OOV values in + a deterministic fashion across the set. + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant([["a", "c", "d"], ["m", "z", "b"]]) + >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab, + ... num_oov_indices=2) + >>> layer(data) + + + Note that the output for OOV value 'm' is 0, while the output for OOV value + 'z' is 1. The in-vocab terms have their output index increased by 1 from + earlier examples (a maps to 2, etc) in order to make space for the extra OOV + value. + + **One-hot output** + + Configure the layer with `output_mode='one_hot'`. Note that the first + `num_oov_indices` dimensions in the one_hot encoding represent OOV values. + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant(["a", "b", "c", "d", "z"]) + >>> layer = tf.keras.layers.StringLookup( + ... vocabulary=vocab, output_mode='one_hot') + >>> layer(data) + + + **Multi-hot output** + + Configure the layer with `output_mode='multi_hot'`. Note that the first + `num_oov_indices` dimensions in the multi_hot encoding represent OOV values. + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) + >>> layer = tf.keras.layers.StringLookup( + ... vocabulary=vocab, output_mode='multi_hot') + >>> layer(data) + + + **Token count output** + + Configure the layer with `output_mode='count'`. As with multi_hot output, + the first `num_oov_indices` dimensions in the output represent OOV values. + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) + >>> layer = tf.keras.layers.StringLookup( + ... vocabulary=vocab, output_mode='count') + >>> layer(data) + + + **TF-IDF output** + + Configure the layer with `output_mode="tf_idf"`. As with multi_hot output, + the first `num_oov_indices` dimensions in the output represent OOV values. + + Each token bin will output `token_count * idf_weight`, where the idf weights + are the inverse document frequency weights per token. These should be + provided along with the vocabulary. Note that the `idf_weight` for OOV + values will default to the average of all idf weights passed in. + + >>> vocab = ["a", "b", "c", "d"] + >>> idf_weights = [0.25, 0.75, 0.6, 0.4] + >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) + >>> layer = tf.keras.layers.StringLookup(output_mode="tf_idf") + >>> layer.set_vocabulary(vocab, idf_weights=idf_weights) + >>> layer(data) + + + To specify the idf weights for oov values, you will need to pass the entire + vocabularly including the leading oov token. + + >>> vocab = ["[UNK]", "a", "b", "c", "d"] + >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4] + >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]]) + >>> layer = tf.keras.layers.StringLookup(output_mode="tf_idf") + >>> layer.set_vocabulary(vocab, idf_weights=idf_weights) + >>> layer(data) + + + When adapting the layer in `"tf_idf"` mode, each input sample will be + considered a document, and IDF weight per token will be calculated as + `log(1 + num_documents / (1 + token_document_count))`. + + **Inverse lookup** + + This example demonstrates how to map indices to strings using this layer. + (You can also use `adapt()` with `inverse=True`, but for simplicity we'll + pass the vocab in this example.) + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant([[1, 3, 4], [4, 0, 2]]) + >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab, invert=True) + >>> layer(data) + + + Note that the first index correspond to the oov token by default. + + + **Forward and inverse lookup pairs** + + This example demonstrates how to use the vocabulary of a standard lookup + layer to create an inverse lookup layer. + + >>> vocab = ["a", "b", "c", "d"] + >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) + >>> layer = tf.keras.layers.StringLookup(vocabulary=vocab) + >>> i_layer = tf.keras.layers.StringLookup(vocabulary=vocab, invert=True) + >>> int_data = layer(data) + >>> i_layer(int_data) + + + In this example, the input value `"z"` resulted in an output of `"[UNK]"`, + since 1000 was not in the vocabulary - it got represented as an OOV, and all + OOV values are returned as `"[UNK]"` in the inverse layer. Also, note that + for the inverse to work, you must have already set the forward layer + vocabulary either directly or via `adapt()` before calling + `get_vocabulary()`. """ - super().adapt(data, batch_size=batch_size, steps=steps) - # Overridden methods from IndexLookup. - def _tensor_vocab_to_numpy(self, vocabulary): - vocabulary = vocabulary.numpy() - return np.array([tf.compat.as_text(x, self.encoding) for x in vocabulary]) + def __init__( + self, + max_tokens=None, + num_oov_indices=1, + mask_token=None, + oov_token="[UNK]", + vocabulary=None, + idf_weights=None, + encoding="utf-8", + invert=False, + output_mode="int", + sparse=False, + pad_to_max_tokens=False, + **kwargs + ): + # Legacy versions of the StringLookup layer set layer dtype to string, + # instead of the output type. If we see this, clear it. + if "dtype" in kwargs and ( + kwargs["dtype"] == tf.string or kwargs["dtype"] == "string" + ): + del kwargs["dtype"] + + self.encoding = encoding + + super().__init__( + max_tokens=max_tokens, + num_oov_indices=num_oov_indices, + mask_token=mask_token, + oov_token=oov_token, + vocabulary=vocabulary, + vocabulary_dtype=tf.string, + idf_weights=idf_weights, + invert=invert, + output_mode=output_mode, + sparse=sparse, + pad_to_max_tokens=pad_to_max_tokens, + **kwargs + ) + base_preprocessing_layer.keras_kpl_gauge.get_cell("StringLookup").set( + True + ) + + def get_config(self): + config = {"encoding": self.encoding} + base_config = super().get_config() + # There is only one valid dtype for strings, so we don't expose this. + del base_config["vocabulary_dtype"] + return dict(list(base_config.items()) + list(config.items())) + + # We override this method solely to generate a docstring. + def adapt(self, data, batch_size=None, steps=None): + """Computes a vocabulary of string terms from tokens in a dataset. + + Calling `adapt()` on a `StringLookup` layer is an alternative to passing + in a precomputed vocabulary on construction via the `vocabulary` + argument. A `StringLookup` layer should always be either adapted over a + dataset or supplied with a vocabulary. + + During `adapt()`, the layer will build a vocabulary of all string tokens + seen in the dataset, sorted by occurrence count, with ties broken by + sort order of the tokens (high to low). At the end of `adapt()`, if + `max_tokens` is set, the vocabulary wil be truncated to `max_tokens` + size. For example, adapting a layer with `max_tokens=1000` will compute + the 1000 most frequent tokens occurring in the input dataset. If + `output_mode='tf-idf'`, `adapt()` will also learn the document + frequencies of each token in the input dataset. + + In order to make `StringLookup` efficient in any distribution context, + the vocabulary is kept static with respect to any compiled `tf.Graph`s + that call the layer. As a consequence, if the layer is adapted a second + time, any models using the layer should be re-compiled. For more + information see + `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. + + `adapt()` is meant only as a single machine utility to compute layer + state. To analyze a dataset that cannot fit on a single machine, see + [Tensorflow Transform]( + https://www.tensorflow.org/tfx/transform/get_started) for a + multi-machine, map-reduce solution. + + Arguments: + data: The data to train on. It can be passed either as a + `tf.data.Dataset`, or as a numpy array. + batch_size: Integer or `None`. + Number of samples per state update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.Sequence` instances + (since they generate batches). + steps: Integer or `None`. + Total number of steps (batches of samples) + When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps' is None, the epoch will run until + the input dataset is exhausted. When passing an infinitely + repeating dataset, you must specify the `steps` argument. This + argument is not supported with array inputs. + """ + super().adapt(data, batch_size=batch_size, steps=steps) + + # Overridden methods from IndexLookup. + def _tensor_vocab_to_numpy(self, vocabulary): + vocabulary = vocabulary.numpy() + return np.array( + [tf.compat.as_text(x, self.encoding) for x in vocabulary] + ) diff --git a/keras/layers/preprocessing/string_lookup_test.py b/keras/layers/preprocessing/string_lookup_test.py index 17ead71db055..0fac8cf28f1d 100644 --- a/keras/layers/preprocessing/string_lookup_test.py +++ b/keras/layers/preprocessing/string_lookup_test.py @@ -14,377 +14,515 @@ # ============================================================================== """Tests for Keras text vectorization preprocessing layer.""" -import tensorflow.compat.v2 as tf - import os -from absl.testing import parameterized + import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.layers.preprocessing import preprocessing_test_utils from keras.layers.preprocessing import string_lookup +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils def _get_end_to_end_test_cases(): - test_cases = ( - { - "testcase_name": "test_strings_soft_vocab_cap", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # accumulator is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": None, - }, - "expected_output": [[1], [2], [3], [4], [4], [3], [1], [0]], - "input_dtype": - tf.string - }, - ) - - crossed_test_cases = [] - # Cross above test cases with use_dataset in (True, False) - for use_dataset in (True, False): - for case in test_cases: - case = case.copy() - if use_dataset: - case["testcase_name"] = case["testcase_name"] + "_with_dataset" - case["use_dataset"] = use_dataset - crossed_test_cases.append(case) - - return crossed_test_cases + test_cases = ( + { + "testcase_name": "test_strings_soft_vocab_cap", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab + # accumulator is sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": None, + }, + "expected_output": [[1], [2], [3], [4], [4], [3], [1], [0]], + "input_dtype": tf.string, + }, + ) + + crossed_test_cases = [] + # Cross above test cases with use_dataset in (True, False) + for use_dataset in (True, False): + for case in test_cases: + case = case.copy() + if use_dataset: + case["testcase_name"] = case["testcase_name"] + "_with_dataset" + case["use_dataset"] = use_dataset + crossed_test_cases.append(case) + + return crossed_test_cases @test_combinations.run_all_keras_modes(always_skip_v1=True) -class StringLookupLayerTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - @parameterized.named_parameters(*_get_end_to_end_test_cases()) - def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs, - use_dataset, expected_output, - input_dtype): - cls = string_lookup.StringLookup - expected_output_dtype = tf.int64 - input_shape = input_data.shape - - if use_dataset: - # Keras APIs expect batched datasets. - # TODO(rachelim): `model.predict` predicts the result on each - # dataset batch separately, then tries to concatenate the results - # together. When the results have different shapes on the non-concat - # axis (which can happen in the output_mode = INT case for - # StringLookup), the concatenation fails. In real use cases, this may - # not be an issue because users are likely to pipe the preprocessing layer - # into other keras layers instead of predicting it directly. A workaround - # for these unit tests is to have the dataset only contain one batch, so - # no concatenation needs to happen with the result. For consistency with - # numpy input, we should make `predict` join differently shaped results - # together sensibly, with 0 padding. - input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( - input_shape[0]) - vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( - input_shape[0]) - - output_data = test_utils.layer_test( - cls, - kwargs=kwargs, - input_shape=input_shape, - input_data=input_data, - input_dtype=input_dtype, - expected_output_dtype=expected_output_dtype, - validate_training=False, - adapt_data=vocab_data) - self.assertAllClose(expected_output, output_data) +class StringLookupLayerTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters(*_get_end_to_end_test_cases()) + def test_layer_end_to_end_with_adapt( + self, + vocab_data, + input_data, + kwargs, + use_dataset, + expected_output, + input_dtype, + ): + cls = string_lookup.StringLookup + expected_output_dtype = tf.int64 + input_shape = input_data.shape + + if use_dataset: + # Keras APIs expect batched datasets. + # TODO(rachelim): `model.predict` predicts the result on each + # dataset batch separately, then tries to concatenate the results + # together. When the results have different shapes on the non-concat + # axis (which can happen in the output_mode = INT case for + # StringLookup), the concatenation fails. In real use cases, this + # may not be an issue because users are likely to pipe the + # preprocessing layer into other keras layers instead of predicting + # it directly. A workaround for these unit tests is to have the + # dataset only contain one batch, so no concatenation needs to + # happen with the result. For consistency with numpy input, we + # should make `predict` join differently shaped results together + # sensibly, with 0 padding. + input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( + input_shape[0] + ) + vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( + input_shape[0] + ) + + output_data = test_utils.layer_test( + cls, + kwargs=kwargs, + input_shape=input_shape, + input_data=input_data, + input_dtype=input_dtype, + expected_output_dtype=expected_output_dtype, + validate_training=False, + adapt_data=vocab_data, + ) + self.assertAllClose(expected_output, output_data) @test_combinations.run_all_keras_modes(always_skip_v1=True) -class StringLookupVocabularyTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest - ): - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def test_int_output_explicit_vocab(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup(vocabulary=vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_int_output_explicit_vocab_with_special_tokens(self): - vocab_data = ["", "[UNK]", "earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup(vocabulary=vocab_data, mask_token="") - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_int_output_no_oov(self): - vocab_data = ["earth", "wind", "and", "fire"] - valid_input = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", ""]]) - invalid_input = np.array([["earth", "wind", "and", "michigan"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup( - vocabulary=vocab_data, mask_token="", num_oov_indices=0) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(valid_input) - self.assertAllEqual(expected_output, output_data) - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "found OOV values.*michigan"): - _ = model.predict(invalid_input) - - def test_no_vocab(self): - with self.assertRaisesRegex(RuntimeError, - "you must set the layer's vocabulary"): - layer = string_lookup.StringLookup(output_mode="binary") - layer([["a"]]) - - def test_one_hot_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array(["earth", "wind", "and", "fire", "michigan"]) - expected_output = [ - [0, 1, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - ] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = string_lookup.StringLookup( - vocabulary=vocab_data, output_mode="one_hot") - res = layer(input_data) - model = keras.Model(inputs=input_data, outputs=res) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_multi_hot_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[0, 1, 1, 1, 1], [1, 1, 0, 1, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup( - vocabulary=vocab_data, output_mode="multi_hot") - res = layer(input_data) - model = keras.Model(inputs=input_data, outputs=res) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_count_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "earth", "fire", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[0, 2, 0, 0, 2], [1, 1, 0, 1, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup( - vocabulary=vocab_data, output_mode="count") - res = layer(input_data) - model = keras.Model(inputs=input_data, outputs=res) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_sparse_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup( - vocabulary=vocab_data, output_mode="multi_hot", sparse=True) - res = layer(input_data) - self.assertTrue(res.__class__.__name__, "SparseKerasTensor") - - def test_get_vocab_returns_str(self): - vocab_data = ["earth", "wind", "and", "fire"] - expected_vocab = ["[UNK]", "earth", "wind", "and", "fire"] - layer = string_lookup.StringLookup(vocabulary=vocab_data) - layer_vocab = layer.get_vocabulary() - self.assertAllEqual(expected_vocab, layer_vocab) - self.assertIsInstance(layer_vocab[0], str) - - inverse_layer = string_lookup.StringLookup( - vocabulary=layer.get_vocabulary(), invert=True) - layer_vocab = inverse_layer.get_vocabulary() - self.assertAllEqual(expected_vocab, layer_vocab) - self.assertIsInstance(layer_vocab[0], str) - - def test_int_output_explicit_vocab_from_file(self): - vocab_list = ["earth", "wind", "and", "fire"] - vocab_path = self._write_to_temp_file("vocab_file", vocab_list) - - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup(vocabulary=vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_int_output_explicit_vocab_from_file_via_setter(self): - vocab_list = ["earth", "wind", "and", "fire"] - vocab_path = self._write_to_temp_file("vocab_file", vocab_list) - - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup() - layer.set_vocabulary(vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_non_unique_vocab_fails(self): - vocab_data = ["earth", "wind", "and", "fire", "fire"] - with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"): - _ = string_lookup.StringLookup(vocabulary=vocab_data) - - def test_non_unique_vocab_from_file_fails(self): - vocab_list = ["earth", "wind", "and", "fire", "earth"] - vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list) - with self.assertRaisesRegex( - tf.errors.FailedPreconditionError, - "HashTable has different value for same key.*earth"): - _ = string_lookup.StringLookup(vocabulary=vocab_path) - - def test_inverse_layer(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]]) - expected_output = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", ""]]) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = string_lookup.StringLookup( - vocabulary=vocab_data, invert=True, mask_token="") - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_inverse_layer_from_file(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) - expected_output = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "[UNK]"]]) - vocab_path = self._write_to_temp_file("vocab_file", vocab_data) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = string_lookup.StringLookup(vocabulary=vocab_path, invert=True) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_inverse_layer_from_file_with_mask(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]]) - expected_output = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "[M]"]]) - vocab_path = self._write_to_temp_file("vocab_file", vocab_data) - - input_data = keras.Input(shape=(None,), dtype=tf.int64) - layer = string_lookup.StringLookup( - vocabulary=vocab_path, invert=True, mask_token="[M]") - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_forward_backward_explicit_vocab(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "[UNK]"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup(vocabulary=vocab_data) - invert_layer = string_lookup.StringLookup( - vocabulary=vocab_data, invert=True) - int_data = layer(input_data) - out_data = invert_layer(int_data) - model = keras.Model(inputs=input_data, outputs=out_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_forward_backward_adapted_vocab(self): - adapt_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "[UNK]"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = string_lookup.StringLookup() - layer.adapt(adapt_data) - invert_layer = string_lookup.StringLookup( - vocabulary=layer.get_vocabulary(), invert=True) - int_data = layer(input_data) - out_data = invert_layer(int_data) - model = keras.Model(inputs=input_data, outputs=out_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_ragged_string_input_multi_bucket(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = tf.ragged.constant([["earth", "wind", "fire"], - ["fire", "and", "earth", "ohio"]]) - expected_output = [[2, 3, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) - layer = string_lookup.StringLookup(num_oov_indices=2) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_data = model.predict(input_array) - self.assertAllEqual(expected_output, output_data) - - def test_tensor_vocab(self): - vocab_data = ["[UNK]", "wind", "and", "fire"] - vocab_tensor = tf.constant(vocab_data) - layer = string_lookup.StringLookup(vocabulary=vocab_tensor) - returned_vocab = layer.get_vocabulary() - self.assertAllEqual(vocab_data, returned_vocab) - self.assertAllEqual(layer.vocabulary_size(), 4) - fn = tf.function(lambda: layer.set_vocabulary(vocab_tensor)) - with self.assertRaisesRegex(RuntimeError, "Cannot set a tensor vocabulary"): - fn() +class StringLookupVocabularyTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def test_int_output_explicit_vocab(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(vocabulary=vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_int_output_explicit_vocab_with_special_tokens(self): + vocab_data = ["", "[UNK]", "earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(vocabulary=vocab_data, mask_token="") + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_int_output_no_oov(self): + vocab_data = ["earth", "wind", "and", "fire"] + valid_input = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", ""]] + ) + invalid_input = np.array( + [ + ["earth", "wind", "and", "michigan"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup( + vocabulary=vocab_data, mask_token="", num_oov_indices=0 + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(valid_input) + self.assertAllEqual(expected_output, output_data) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, "found OOV values.*michigan" + ): + _ = model.predict(invalid_input) + + def test_no_vocab(self): + with self.assertRaisesRegex( + RuntimeError, "you must set the layer's vocabulary" + ): + layer = string_lookup.StringLookup(output_mode="binary") + layer([["a"]]) + + def test_one_hot_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array(["earth", "wind", "and", "fire", "michigan"]) + expected_output = [ + [0, 1, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = string_lookup.StringLookup( + vocabulary=vocab_data, output_mode="one_hot" + ) + res = layer(input_data) + model = keras.Model(inputs=input_data, outputs=res) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_multi_hot_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[0, 1, 1, 1, 1], [1, 1, 0, 1, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup( + vocabulary=vocab_data, output_mode="multi_hot" + ) + res = layer(input_data) + model = keras.Model(inputs=input_data, outputs=res) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_count_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "earth", "fire", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[0, 2, 0, 0, 2], [1, 1, 0, 1, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup( + vocabulary=vocab_data, output_mode="count" + ) + res = layer(input_data) + model = keras.Model(inputs=input_data, outputs=res) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_sparse_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup( + vocabulary=vocab_data, output_mode="multi_hot", sparse=True + ) + res = layer(input_data) + self.assertTrue(res.__class__.__name__, "SparseKerasTensor") + + def test_get_vocab_returns_str(self): + vocab_data = ["earth", "wind", "and", "fire"] + expected_vocab = ["[UNK]", "earth", "wind", "and", "fire"] + layer = string_lookup.StringLookup(vocabulary=vocab_data) + layer_vocab = layer.get_vocabulary() + self.assertAllEqual(expected_vocab, layer_vocab) + self.assertIsInstance(layer_vocab[0], str) + + inverse_layer = string_lookup.StringLookup( + vocabulary=layer.get_vocabulary(), invert=True + ) + layer_vocab = inverse_layer.get_vocabulary() + self.assertAllEqual(expected_vocab, layer_vocab) + self.assertIsInstance(layer_vocab[0], str) + + def test_int_output_explicit_vocab_from_file(self): + vocab_list = ["earth", "wind", "and", "fire"] + vocab_path = self._write_to_temp_file("vocab_file", vocab_list) + + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(vocabulary=vocab_path) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_int_output_explicit_vocab_from_file_via_setter(self): + vocab_list = ["earth", "wind", "and", "fire"] + vocab_path = self._write_to_temp_file("vocab_file", vocab_list) + + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup() + layer.set_vocabulary(vocab_path) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_non_unique_vocab_fails(self): + vocab_data = ["earth", "wind", "and", "fire", "fire"] + with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"): + _ = string_lookup.StringLookup(vocabulary=vocab_data) + + def test_non_unique_vocab_from_file_fails(self): + vocab_list = ["earth", "wind", "and", "fire", "earth"] + vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list) + with self.assertRaisesRegex( + tf.errors.FailedPreconditionError, + "HashTable has different value for same key.*earth", + ): + _ = string_lookup.StringLookup(vocabulary=vocab_path) + + def test_inverse_layer(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]]) + expected_output = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", ""]] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = string_lookup.StringLookup( + vocabulary=vocab_data, invert=True, mask_token="" + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_inverse_layer_from_file(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]]) + expected_output = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "[UNK]"], + ] + ) + vocab_path = self._write_to_temp_file("vocab_file", vocab_data) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = string_lookup.StringLookup(vocabulary=vocab_path, invert=True) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_inverse_layer_from_file_with_mask(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]]) + expected_output = np.array( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth", "[M]"]] + ) + vocab_path = self._write_to_temp_file("vocab_file", vocab_data) + + input_data = keras.Input(shape=(None,), dtype=tf.int64) + layer = string_lookup.StringLookup( + vocabulary=vocab_path, invert=True, mask_token="[M]" + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_forward_backward_explicit_vocab(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "[UNK]"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(vocabulary=vocab_data) + invert_layer = string_lookup.StringLookup( + vocabulary=vocab_data, invert=True + ) + int_data = layer(input_data) + out_data = invert_layer(int_data) + model = keras.Model(inputs=input_data, outputs=out_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_forward_backward_adapted_vocab(self): + adapt_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "[UNK]"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup() + layer.adapt(adapt_data) + invert_layer = string_lookup.StringLookup( + vocabulary=layer.get_vocabulary(), invert=True + ) + int_data = layer(input_data) + out_data = invert_layer(int_data) + model = keras.Model(inputs=input_data, outputs=out_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_ragged_string_input_multi_bucket(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = tf.ragged.constant( + [["earth", "wind", "fire"], ["fire", "and", "earth", "ohio"]] + ) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string, ragged=True) + layer = string_lookup.StringLookup(num_oov_indices=2) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_data = model.predict(input_array) + self.assertAllEqual(expected_output, output_data) + + def test_tensor_vocab(self): + vocab_data = ["[UNK]", "wind", "and", "fire"] + vocab_tensor = tf.constant(vocab_data) + layer = string_lookup.StringLookup(vocabulary=vocab_tensor) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + self.assertAllEqual(layer.vocabulary_size(), 4) + fn = tf.function(lambda: layer.set_vocabulary(vocab_tensor)) + with self.assertRaisesRegex( + RuntimeError, "Cannot set a tensor vocabulary" + ): + fn() + + @test_utils.run_v2_only() + def test_saving_v3(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array(["earth", "wind", "and", "fire"]) + + # First, with a static vocabulary. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(vocabulary=vocab_data) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + ref_output = model.predict(input_array) + temp_dir = self.get_temp_dir() + model_path = os.path.join(temp_dir, "mymodel.keras") + model.save(model_path, save_format="keras_v3") + model = keras.models.load_model(model_path) + output = model.predict(input_array) + self.assertAllEqual(output, ref_output) + + # Second, with adapt(). + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup() + layer.adapt(vocab_data) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + ref_output = model.predict(input_array) + model.save(model_path, save_format="keras_v3", overwrite=True) + model = keras.models.load_model(model_path) + output = model.predict(input_array) + self.assertAllEqual(output, ref_output) + + # Test TF-IDF + adapt(). + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = string_lookup.StringLookup(output_mode="tf_idf") + layer.adapt(vocab_data) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + ref_output = model.predict(input_array) + model.save(model_path, save_format="keras_v3", overwrite=True) + model = keras.models.load_model(model_path) + output = model.predict(input_array) + self.assertAllEqual(output, ref_output) + if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/preprocessing/text_vectorization.py b/keras/layers/preprocessing/text_vectorization.py index 80c77fece698..cd65e21bec4b 100644 --- a/keras/layers/preprocessing/text_vectorization.py +++ b/keras/layers/preprocessing/text_vectorization.py @@ -14,18 +14,20 @@ # ============================================================================== """Keras text vectorization preprocessing layer.""" -# pylint: disable=g-classes-have-attributes +import numpy as np +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_preprocessing_layer from keras.layers.preprocessing import preprocessing_utils as utils from keras.layers.preprocessing import string_lookup -from keras.saving.saved_model import layer_serialization +from keras.saving.legacy.saved_model import layer_serialization +from keras.saving.serialization_lib import deserialize_keras_object from keras.utils import layer_utils from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export LOWER_AND_STRIP_PUNCTUATION = "lower_and_strip_punctuation" @@ -49,545 +51,633 @@ @keras_export( "keras.layers.TextVectorization", "keras.layers.experimental.preprocessing.TextVectorization", - v1=[]) + v1=[], +) class TextVectorization(base_preprocessing_layer.PreprocessingLayer): - """A preprocessing layer which maps text features to integer sequences. - - This layer has basic options for managing text in a Keras model. It transforms - a batch of strings (one example = one string) into either a list of token - indices (one example = 1D tensor of integer token indices) or a dense - representation (one example = 1D tensor of float values representing data - about the example's tokens). This layer is meant to handle natural language - inputs. To handle simple string inputs (categorical strings or pre-tokenized - strings) see `tf.keras.layers.StringLookup`. - - The vocabulary for the layer must be either supplied on construction or - learned via `adapt()`. When this layer is adapted, it will analyze the - dataset, determine the frequency of individual string values, and create a - vocabulary from them. This vocabulary can have unlimited size or be capped, - depending on the configuration options for this layer; if there are more - unique values in the input than the maximum vocabulary size, the most frequent - terms will be used to create the vocabulary. - - The processing of each example contains the following steps: - - 1. Standardize each example (usually lowercasing + punctuation stripping) - 2. Split each example into substrings (usually words) - 3. Recombine substrings into tokens (usually ngrams) - 4. Index tokens (associate a unique int value with each token) - 5. Transform each example using this index, either into a vector of ints or - a dense float vector. - - Some notes on passing callables to customize splitting and normalization for - this layer: - - 1. Any callable can be passed to this Layer, but if you want to serialize - this object you should only pass functions that are registered Keras - serializables (see `tf.keras.utils.register_keras_serializable` for more - details). - 2. When using a custom callable for `standardize`, the data received - by the callable will be exactly as passed to this layer. The callable - should return a tensor of the same shape as the input. - 3. When using a custom callable for `split`, the data received by the - callable will have the 1st dimension squeezed out - instead of - `[["string to split"], ["another string to split"]]`, the Callable will - see `["string to split", "another string to split"]`. The callable should - return a Tensor with the first dimension containing the split tokens - - in this example, we should see something like `[["string", "to", - "split"], ["another", "string", "to", "split"]]`. This makes the callable - site natively compatible with `tf.strings.split()`. - - For an overview and full list of preprocessing layers, see the preprocessing - [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - max_tokens: Maximum size of the vocabulary for this layer. This should only - be specified when adapting a vocabulary or when setting - `pad_to_max_tokens=True`. Note that this vocabulary - contains 1 OOV token, so the effective number of tokens is `(max_tokens - - 1 - (1 if output_mode == "int" else 0))`. - standardize: Optional specification for standardization to apply to the - input text. Values can be: - - `None`: No standardization. - - `"lower_and_strip_punctuation"`: Text will be lowercased and all - punctuation removed. - - `"lower"`: Text will be lowercased. - - `"strip_punctuation"`: All punctuation will be removed. - - Callable: Inputs will passed to the callable function, which should - standardized and returned. - split: Optional specification for splitting the input text. Values can be: - - `None`: No splitting. - - `"whitespace"`: Split on whitespace. - - `"character"`: Split on each unicode character. - - Callable: Standardized inputs will passed to the callable function, - which should split and returned. - ngrams: Optional specification for ngrams to create from the possibly-split - input text. Values can be None, an integer or tuple of integers; passing - an integer will create ngrams up to that integer, and passing a tuple of - integers will create ngrams for the specified values in the tuple. Passing - None means that no ngrams will be created. - output_mode: Optional specification for the output of the layer. Values can - be `"int"`, `"multi_hot"`, `"count"` or `"tf_idf"`, configuring the layer - as follows: - - `"int"`: Outputs integer indices, one integer index per split string - token. When `output_mode == "int"`, 0 is reserved for masked - locations; this reduces the vocab size to - `max_tokens - 2` instead of `max_tokens - 1`. - - `"multi_hot"`: Outputs a single int array per batch, of either - vocab_size or max_tokens size, containing 1s in all elements where the - token mapped to that index exists at least once in the batch item. - - `"count"`: Like `"multi_hot"`, but the int array contains a count of - the number of times the token at that index appeared in the - batch item. - - `"tf_idf"`: Like `"multi_hot"`, but the TF-IDF algorithm is applied to - find the value in each token slot. - For `"int"` output, any shape of input and output is supported. For all - other output modes, currently only rank 1 inputs (and rank 2 outputs after - splitting) are supported. - output_sequence_length: Only valid in INT mode. If set, the output will have - its time dimension padded or truncated to exactly `output_sequence_length` - values, resulting in a tensor of shape - `(batch_size, output_sequence_length)` regardless of how many tokens - resulted from the splitting step. Defaults to None. - pad_to_max_tokens: Only valid in `"multi_hot"`, `"count"`, and `"tf_idf"` - modes. If True, the output will have its feature axis padded to - `max_tokens` even if the number of unique tokens in the vocabulary is less - than max_tokens, resulting in a tensor of shape `(batch_size, max_tokens)` - regardless of vocabulary size. Defaults to False. - vocabulary: Optional. Either an array of strings or a string path to a text - file. If passing an array, can pass a tuple, list, 1D numpy array, or 1D - tensor containing the string vocbulary terms. If passing a file path, the - file should contain one line per term in the vocabulary. If this argument - is set, there is no need to `adapt()` the layer. - idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, 1D - numpy array, or 1D tensor or the same length as the vocabulary, containing - the floating point inverse document frequency weights, which will be - multiplied by per sample term counts for the final `tf_idf` weight. If the - `vocabulary` argument is set, and `output_mode` is `"tf_idf"`, this - argument must be supplied. - ragged: Boolean. Only applicable to `"int"` output mode. If True, returns a - `RaggedTensor` instead of a dense `Tensor`, where each sequence may have a - different length after string splitting. Defaults to False. - sparse: Boolean. Only applicable to `"multi_hot"`, `"count"`, and - `"tf_idf"` output modes. If True, returns a `SparseTensor` instead of a - dense `Tensor`. Defaults to False. - - Example: - - This example instantiates a `TextVectorization` layer that lowercases text, - splits on whitespace, strips punctuation, and outputs integer vocab indices. - - >>> text_dataset = tf.data.Dataset.from_tensor_slices(["foo", "bar", "baz"]) - >>> max_features = 5000 # Maximum vocab size. - >>> max_len = 4 # Sequence length to pad the outputs to. - >>> - >>> # Create the layer. - >>> vectorize_layer = tf.keras.layers.TextVectorization( - ... max_tokens=max_features, - ... output_mode='int', - ... output_sequence_length=max_len) - >>> - >>> # Now that the vocab layer has been created, call `adapt` on the text-only - >>> # dataset to create the vocabulary. You don't have to batch, but for large - >>> # datasets this means we're not keeping spare copies of the dataset. - >>> vectorize_layer.adapt(text_dataset.batch(64)) - >>> - >>> # Create the model that uses the vectorize text layer - >>> model = tf.keras.models.Sequential() - >>> - >>> # Start by creating an explicit input layer. It needs to have a shape of - >>> # (1,) (because we need to guarantee that there is exactly one string - >>> # input per batch), and the dtype needs to be 'string'. - >>> model.add(tf.keras.Input(shape=(1,), dtype=tf.string)) - >>> - >>> # The first layer in our model is the vectorization layer. After this - >>> # layer, we have a tensor of shape (batch_size, max_len) containing vocab - >>> # indices. - >>> model.add(vectorize_layer) - >>> - >>> # Now, the model can map strings to integers, and you can add an embedding - >>> # layer to map these integers to learned embeddings. - >>> input_data = [["foo qux bar"], ["qux baz"]] - >>> model.predict(input_data) - array([[2, 1, 4, 0], - [1, 3, 0, 0]]) - - Example: - - This example instantiates a `TextVectorization` layer by passing a list - of vocabulary terms to the layer's `__init__()` method. - - >>> vocab_data = ["earth", "wind", "and", "fire"] - >>> max_len = 4 # Sequence length to pad the outputs to. - >>> - >>> # Create the layer, passing the vocab directly. You can also pass the - >>> # vocabulary arg a path to a file containing one vocabulary word per - >>> # line. - >>> vectorize_layer = tf.keras.layers.TextVectorization( - ... max_tokens=max_features, - ... output_mode='int', - ... output_sequence_length=max_len, - ... vocabulary=vocab_data) - >>> - >>> # Because we've passed the vocabulary directly, we don't need to adapt - >>> # the layer - the vocabulary is already set. The vocabulary contains the - >>> # padding token ('') and OOV token ('[UNK]') as well as the passed tokens. - >>> vectorize_layer.get_vocabulary() - ['', '[UNK]', 'earth', 'wind', 'and', 'fire'] - - """ - - def __init__(self, - max_tokens=None, - standardize="lower_and_strip_punctuation", - split="whitespace", - ngrams=None, - output_mode="int", - output_sequence_length=None, - pad_to_max_tokens=False, - vocabulary=None, - idf_weights=None, - sparse=False, - ragged=False, - **kwargs): - - # This layer only applies to string processing, and so should only have - # a dtype of 'string'. - if "dtype" in kwargs and kwargs["dtype"] != tf.string: - raise ValueError( - f"`TextVectorization` may only have a dtype of string. " - f"Received dtype: {kwargs['dtype']}.") - elif "dtype" not in kwargs: - kwargs["dtype"] = tf.string - - # 'standardize' must be one of - # (None, LOWER_AND_STRIP_PUNCTUATION, LOWER, STRIP_PUNCTUATION, callable) - layer_utils.validate_string_arg( - standardize, - allowable_strings=(LOWER_AND_STRIP_PUNCTUATION, LOWER, - STRIP_PUNCTUATION), - layer_name="TextVectorization", - arg_name="standardize", - allow_none=True, - allow_callables=True) - - # 'split' must be one of (None, WHITESPACE, CHARACTER, callable) - layer_utils.validate_string_arg( - split, - allowable_strings=(WHITESPACE, CHARACTER), - layer_name="TextVectorization", - arg_name="split", - allow_none=True, - allow_callables=True) - - # Support deprecated names for output_modes. - if output_mode == "binary": - output_mode = MULTI_HOT - if output_mode == "tf-idf": - output_mode = TF_IDF - # 'output_mode' must be one of (None, INT, COUNT, MULTI_HOT, TF_IDF) - layer_utils.validate_string_arg( - output_mode, - allowable_strings=(INT, COUNT, MULTI_HOT, TF_IDF), - layer_name="TextVectorization", - arg_name="output_mode", - allow_none=True) - - # 'ngrams' must be one of (None, int, tuple(int)) - if not (ngrams is None or - isinstance(ngrams, int) or - isinstance(ngrams, tuple) and - all(isinstance(item, int) for item in ngrams)): - raise ValueError(f"`ngrams` must be None, an integer, or a tuple of " - f"integers. Received: ngrams={ngrams}") - - # 'output_sequence_length' must be one of (None, int) and is only - # set if output_mode is INT. - if (output_mode == INT and not (isinstance(output_sequence_length, int) or - (output_sequence_length is None))): - raise ValueError(f"`output_sequence_length` must be either None or an " - f"integer when `output_mode` is 'int'. Received: " - f"output_sequence_length={output_sequence_length}") - - if output_mode != INT and output_sequence_length is not None: - raise ValueError( - f"`output_sequence_length` must not be set if `output_mode` is not " - f"'int'. Received output_sequence_length={output_sequence_length}.") - - if ragged and output_mode != INT: - raise ValueError(f"`ragged` must not be true if `output_mode` is " - f"`'int'`. Received: ragged={ragged} and " - f"output_mode={output_mode}") - - if ragged and output_sequence_length is not None: - raise ValueError(f"`output_sequence_length` must not be set if ragged " - f"is True. Received: ragged={ragged} and " - f"output_sequence_length={output_sequence_length}") - - self._max_tokens = max_tokens - self._standardize = standardize - self._split = split - self._ngrams_arg = ngrams - if isinstance(ngrams, int): - self._ngrams = tuple(range(1, ngrams + 1)) - else: - self._ngrams = ngrams - self._ragged = ragged - - self._output_mode = output_mode - self._output_sequence_length = output_sequence_length - - # VocabularySavedModelSaver will clear the config vocabulary to restore the - # lookup table ops directly. We persist this hidden option to persist the - # fact that we have have a non-adaptable layer with a manually set vocab. - self._has_input_vocabulary = kwargs.pop("has_input_vocabulary", - (vocabulary is not None)) - - # Drop deprecated config options. - kwargs.pop("vocabulary_size", None) - - super().__init__(**kwargs) - base_preprocessing_layer.keras_kpl_gauge.get_cell("TextVectorization").set( - True) - - self._lookup_layer = string_lookup.StringLookup( - max_tokens=max_tokens, - vocabulary=vocabulary, - idf_weights=idf_weights, - pad_to_max_tokens=pad_to_max_tokens, - mask_token="", - output_mode=output_mode if output_mode is not None else INT, - sparse=sparse, - has_input_vocabulary=self._has_input_vocabulary) - - def compute_output_shape(self, input_shape): - if self._output_mode == INT: - return tf.TensorShape([input_shape[0], self._output_sequence_length]) - - if self._split is None: - if len(input_shape) <= 1: - input_shape = tuple(input_shape) + (1,) - else: - input_shape = tuple(input_shape) + (None,) - return self._lookup_layer.compute_output_shape(input_shape) - - def compute_output_signature(self, input_spec): - output_shape = self.compute_output_shape(input_spec.shape.as_list()) - output_dtype = (tf.int64 if self._output_mode == INT - else backend.floatx()) - return tf.TensorSpec(shape=output_shape, dtype=output_dtype) - - # We override this method solely to generate a docstring. - def adapt(self, data, batch_size=None, steps=None): - """Computes a vocabulary of string terms from tokens in a dataset. - - Calling `adapt()` on a `TextVectorization` layer is an alternative to - passing in a precomputed vocabulary on construction via the `vocabulary` - argument. A `TextVectorization` layer should always be either adapted over a - dataset or supplied with a vocabulary. - - During `adapt()`, the layer will build a vocabulary of all string tokens - seen in the dataset, sorted by occurance count, with ties broken by sort - order of the tokens (high to low). At the end of `adapt()`, if `max_tokens` - is set, the vocabulary wil be truncated to `max_tokens` size. For example, - adapting a layer with `max_tokens=1000` will compute the 1000 most frequent - tokens occurring in the input dataset. If `output_mode='tf-idf'`, `adapt()` - will also learn the document frequencies of each token in the input dataset. - - In order to make `TextVectorization` efficient in any distribution context, - the vocabulary is kept static with respect to any compiled `tf.Graph`s that - call the layer. As a consequence, if the layer is adapted a second time, - any models using the layer should be re-compiled. For more information - see `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. - - `adapt()` is meant only as a single machine utility to compute layer state. - To analyze a dataset that cannot fit on a single machine, see - [Tensorflow Transform](https://www.tensorflow.org/tfx/transform/get_started) - for a multi-machine, map-reduce solution. - - Arguments: - data: The data to train on. It can be passed either as a - `tf.data.Dataset`, or as a numpy array. - batch_size: Integer or `None`. - Number of samples per state update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of datasets, generators, or `keras.utils.Sequence` instances - (since they generate batches). - steps: Integer or `None`. - Total number of steps (batches of samples) - When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. If x is a - `tf.data` dataset, and 'steps' is None, the epoch will run until - the input dataset is exhausted. When passing an infinitely - repeating dataset, you must specify the `steps` argument. This - argument is not supported with array inputs. - """ - super().adapt(data, batch_size=batch_size, steps=steps) - - def update_state(self, data): - self._lookup_layer.update_state(self._preprocess(data)) - - def finalize_state(self): - self._lookup_layer.finalize_state() - - def reset_state(self): # pylint: disable=method-hidden - self._lookup_layer.reset_state() - - def get_vocabulary(self, include_special_tokens=True): - """Returns the current vocabulary of the layer. + """A preprocessing layer which maps text features to integer sequences. + + This layer has basic options for managing text in a Keras model. It + transforms a batch of strings (one example = one string) into either a list + of token indices (one example = 1D tensor of integer token indices) or a + dense representation (one example = 1D tensor of float values representing + data about the example's tokens). This layer is meant to handle natural + language inputs. To handle simple string inputs (categorical strings or + pre-tokenized strings) see `tf.keras.layers.StringLookup`. + + The vocabulary for the layer must be either supplied on construction or + learned via `adapt()`. When this layer is adapted, it will analyze the + dataset, determine the frequency of individual string values, and create a + vocabulary from them. This vocabulary can have unlimited size or be capped, + depending on the configuration options for this layer; if there are more + unique values in the input than the maximum vocabulary size, the most + frequent terms will be used to create the vocabulary. + + The processing of each example contains the following steps: + + 1. Standardize each example (usually lowercasing + punctuation stripping) + 2. Split each example into substrings (usually words) + 3. Recombine substrings into tokens (usually ngrams) + 4. Index tokens (associate a unique int value with each token) + 5. Transform each example using this index, either into a vector of ints or + a dense float vector. + + Some notes on passing callables to customize splitting and normalization for + this layer: + + 1. Any callable can be passed to this Layer, but if you want to serialize + this object you should only pass functions that are registered Keras + serializables (see `tf.keras.saving.register_keras_serializable` for more + details). + 2. When using a custom callable for `standardize`, the data received + by the callable will be exactly as passed to this layer. The callable + should return a tensor of the same shape as the input. + 3. When using a custom callable for `split`, the data received by the + callable will have the 1st dimension squeezed out - instead of + `[["string to split"], ["another string to split"]]`, the Callable will + see `["string to split", "another string to split"]`. The callable should + return a Tensor with the first dimension containing the split tokens - + in this example, we should see something like `[["string", "to", + "split"], ["another", "string", "to", "split"]]`. This makes the callable + site natively compatible with `tf.strings.split()`. + + For an overview and full list of preprocessing layers, see the preprocessing + [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). Args: - include_special_tokens: If True, the returned vocabulary will include - the padding and OOV tokens, and a term's index in the vocabulary will - equal the term's index when calling the layer. If False, the returned - vocabulary will not include any padding or OOV tokens. - """ - return self._lookup_layer.get_vocabulary(include_special_tokens) - - def vocabulary_size(self): - """Gets the current size of the layer's vocabulary. + max_tokens: Maximum size of the vocabulary for this layer. This should + only be specified when adapting a vocabulary or when setting + `pad_to_max_tokens=True`. Note that this vocabulary + contains 1 OOV token, so the effective number of tokens is + `(max_tokens - 1 - (1 if output_mode == "int" else 0))`. + standardize: Optional specification for standardization to apply to the + input text. Values can be: + - `None`: No standardization. + - `"lower_and_strip_punctuation"`: Text will be lowercased and all + punctuation removed. + - `"lower"`: Text will be lowercased. + - `"strip_punctuation"`: All punctuation will be removed. + - Callable: Inputs will passed to the callable function, which should + be standardized and returned. + split: Optional specification for splitting the input text. Values can be: + - `None`: No splitting. + - `"whitespace"`: Split on whitespace. + - `"character"`: Split on each unicode character. + - Callable: Standardized inputs will passed to the callable function, + which should be split and returned. + ngrams: Optional specification for ngrams to create from the + possibly-split input text. Values can be None, an integer or tuple of + integers; passing an integer will create ngrams up to that integer, and + passing a tuple of integers will create ngrams for the specified values + in the tuple. Passing None means that no ngrams will be created. + output_mode: Optional specification for the output of the layer. Values + can be `"int"`, `"multi_hot"`, `"count"` or `"tf_idf"`, configuring the + layer as follows: + - `"int"`: Outputs integer indices, one integer index per split string + token. When `output_mode == "int"`, 0 is reserved for masked + locations; this reduces the vocab size to + `max_tokens - 2` instead of `max_tokens - 1`. + - `"multi_hot"`: Outputs a single int array per batch, of either + vocab_size or max_tokens size, containing 1s in all elements where + the token mapped to that index exists at least once in the batch + item. + - `"count"`: Like `"multi_hot"`, but the int array contains a count of + the number of times the token at that index appeared in the + batch item. + - `"tf_idf"`: Like `"multi_hot"`, but the TF-IDF algorithm is applied + to find the value in each token slot. + For `"int"` output, any shape of input and output is supported. For all + other output modes, currently only rank 1 inputs (and rank 2 outputs + after splitting) are supported. + output_sequence_length: Only valid in INT mode. If set, the output will + have its time dimension padded or truncated to exactly + `output_sequence_length` values, resulting in a tensor of shape + `(batch_size, output_sequence_length)` regardless of how many tokens + resulted from the splitting step. Defaults to `None`. + pad_to_max_tokens: Only valid in `"multi_hot"`, `"count"`, and `"tf_idf"` + modes. If True, the output will have its feature axis padded to + `max_tokens` even if the number of unique tokens in the vocabulary is + less than max_tokens, resulting in a tensor of shape `(batch_size, + max_tokens)` regardless of vocabulary size. Defaults to `False`. + vocabulary: Optional. Either an array of strings or a string path to a + text file. If passing an array, can pass a tuple, list, 1D numpy array, + or 1D tensor containing the string vocabulary terms. If passing a file + path, the file should contain one line per term in the vocabulary. If + this argument is set, there is no need to `adapt()` the layer. + idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, + 1D numpy array, or 1D tensor of the same length as the vocabulary, + containing the floating point inverse document frequency weights, which + will be multiplied by per sample term counts for the final `tf_idf` + weight. If the `vocabulary` argument is set, and `output_mode` is + `"tf_idf"`, this argument must be supplied. + ragged: Boolean. Only applicable to `"int"` output mode. If True, returns + a `RaggedTensor` instead of a dense `Tensor`, where each sequence may + have a different length after string splitting. Defaults to `False`. + sparse: Boolean. Only applicable to `"multi_hot"`, `"count"`, and + `"tf_idf"` output modes. If True, returns a `SparseTensor` instead of a + dense `Tensor`. Defaults to `False`. + encoding: Optional. The text encoding to use to interpret the input + strings. Defaults to `"utf-8"`. + + Example: + + This example instantiates a `TextVectorization` layer that lowercases text, + splits on whitespace, strips punctuation, and outputs integer vocab indices. + + >>> text_dataset = tf.data.Dataset.from_tensor_slices(["foo", "bar", "baz"]) + >>> max_features = 5000 # Maximum vocab size. + >>> max_len = 4 # Sequence length to pad the outputs to. + >>> + >>> # Create the layer. + >>> vectorize_layer = tf.keras.layers.TextVectorization( + ... max_tokens=max_features, + ... output_mode='int', + ... output_sequence_length=max_len) + >>> + >>> # Now that the vocab layer has been created, call `adapt` on the + >>> # text-only dataset to create the vocabulary. You don't have to batch, + >>> # but for large datasets this means we're not keeping spare copies of + >>> # the dataset. + >>> vectorize_layer.adapt(text_dataset.batch(64)) + >>> + >>> # Create the model that uses the vectorize text layer + >>> model = tf.keras.models.Sequential() + >>> + >>> # Start by creating an explicit input layer. It needs to have a shape of + >>> # (1,) (because we need to guarantee that there is exactly one string + >>> # input per batch), and the dtype needs to be 'string'. + >>> model.add(tf.keras.Input(shape=(1,), dtype=tf.string)) + >>> + >>> # The first layer in our model is the vectorization layer. After this + >>> # layer, we have a tensor of shape (batch_size, max_len) containing + >>> # vocab indices. + >>> model.add(vectorize_layer) + >>> + >>> # Now, the model can map strings to integers, and you can add an + >>> # embedding layer to map these integers to learned embeddings. + >>> input_data = [["foo qux bar"], ["qux baz"]] + >>> model.predict(input_data) + array([[2, 1, 4, 0], + [1, 3, 0, 0]]) + + Example: + + This example instantiates a `TextVectorization` layer by passing a list + of vocabulary terms to the layer's `__init__()` method. + + >>> vocab_data = ["earth", "wind", "and", "fire"] + >>> max_len = 4 # Sequence length to pad the outputs to. + >>> + >>> # Create the layer, passing the vocab directly. You can also pass the + >>> # vocabulary arg a path to a file containing one vocabulary word per + >>> # line. + >>> vectorize_layer = tf.keras.layers.TextVectorization( + ... max_tokens=max_features, + ... output_mode='int', + ... output_sequence_length=max_len, + ... vocabulary=vocab_data) + >>> + >>> # Because we've passed the vocabulary directly, we don't need to adapt + >>> # the layer - the vocabulary is already set. The vocabulary contains the + >>> # padding token ('') and OOV token ('[UNK]') as well as the passed + >>> # tokens. + >>> vectorize_layer.get_vocabulary() + ['', '[UNK]', 'earth', 'wind', 'and', 'fire'] - Returns: - The integer size of the vocabulary, including optional mask and - OOV indices. """ - return self._lookup_layer.vocabulary_size() - - def get_config(self): - vocab = self._lookup_layer.input_vocabulary - idf_weights = self._lookup_layer.input_idf_weights - config = { - "max_tokens": self._lookup_layer.max_tokens, - "standardize": self._standardize, - "split": self._split, - "ngrams": self._ngrams_arg, - "output_mode": self._output_mode, - "output_sequence_length": self._output_sequence_length, - "pad_to_max_tokens": self._lookup_layer.pad_to_max_tokens, - "sparse": self._lookup_layer.sparse, - "ragged": self._ragged, - "vocabulary": utils.listify_tensors(vocab), - "idf_weights": utils.listify_tensors(idf_weights), - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def set_vocabulary(self, vocabulary, idf_weights=None): - """Sets vocabulary (and optionally document frequency) data for this layer. - - This method sets the vocabulary and idf weights for this layer directly, - instead of analyzing a dataset through 'adapt'. It should be used whenever - the vocab (and optionally document frequency) information is already known. - If vocabulary data is already present in the layer, this method will replace - it. - Args: - vocabulary: Either an array or a string path to a text file. If passing an - array, can pass a tuple, list, 1D numpy array, or 1D tensor containing - the vocbulary terms. If passing a file path, the file should contain one - line per term in the vocabulary. - idf_weights: A tuple, list, 1D numpy array, or 1D tensor of inverse - document frequency weights with equal length to vocabulary. Must be set - if `output_mode` is `"tf_idf"`. Should not be set otherwise. - - Raises: - ValueError: If there are too many inputs, the inputs do not match, or - input data is missing. - RuntimeError: If the vocabulary cannot be set when this function is - called. This happens when `"multi_hot"`, `"count"`, and "tf_idf" modes, - if `pad_to_max_tokens` is False and the layer itself has already been - called. - """ - self._lookup_layer.set_vocabulary(vocabulary, idf_weights=idf_weights) - - def _preprocess(self, inputs): - inputs = utils.ensure_tensor(inputs, dtype=tf.string) - if self._standardize in (LOWER, LOWER_AND_STRIP_PUNCTUATION): - inputs = tf.strings.lower(inputs) - if self._standardize in (STRIP_PUNCTUATION, LOWER_AND_STRIP_PUNCTUATION): - inputs = tf.strings.regex_replace(inputs, DEFAULT_STRIP_REGEX, "") - if callable(self._standardize): - inputs = self._standardize(inputs) - - if self._split is not None: - # If we are splitting, we validate that the 1st axis is of dimension 1 and - # so can be squeezed out. We do this here instead of after splitting for - # performance reasons - it's more expensive to squeeze a ragged tensor. - if inputs.shape.rank > 1: - if inputs.shape[-1] != 1: - raise ValueError( - "When using `TextVectorization` to tokenize strings, the input " - "rank must be 1 or the last shape dimension must be 1. Received: " - f"inputs.shape={inputs.shape} with rank={inputs.shape.rank}") + def __init__( + self, + max_tokens=None, + standardize="lower_and_strip_punctuation", + split="whitespace", + ngrams=None, + output_mode="int", + output_sequence_length=None, + pad_to_max_tokens=False, + vocabulary=None, + idf_weights=None, + sparse=False, + ragged=False, + encoding="utf-8", + **kwargs, + ): + + # This layer only applies to string processing, and so should only have + # a dtype of 'string'. + if "dtype" in kwargs and kwargs["dtype"] != tf.string: + raise ValueError( + "`TextVectorization` may only have a dtype of string. " + f"Received dtype: {kwargs['dtype']}." + ) + elif "dtype" not in kwargs: + kwargs["dtype"] = tf.string + + # 'standardize' must be one of + # (None, LOWER_AND_STRIP_PUNCTUATION, LOWER, STRIP_PUNCTUATION, + # callable) + layer_utils.validate_string_arg( + standardize, + allowable_strings=( + LOWER_AND_STRIP_PUNCTUATION, + LOWER, + STRIP_PUNCTUATION, + ), + layer_name="TextVectorization", + arg_name="standardize", + allow_none=True, + allow_callables=True, + ) + + # 'split' must be one of (None, WHITESPACE, CHARACTER, callable) + layer_utils.validate_string_arg( + split, + allowable_strings=(WHITESPACE, CHARACTER), + layer_name="TextVectorization", + arg_name="split", + allow_none=True, + allow_callables=True, + ) + + # Support deprecated names for output_modes. + if output_mode == "binary": + output_mode = MULTI_HOT + if output_mode == "tf-idf": + output_mode = TF_IDF + # 'output_mode' must be one of (None, INT, COUNT, MULTI_HOT, TF_IDF) + layer_utils.validate_string_arg( + output_mode, + allowable_strings=(INT, COUNT, MULTI_HOT, TF_IDF), + layer_name="TextVectorization", + arg_name="output_mode", + allow_none=True, + ) + + # 'ngrams' must be one of (None, int, tuple(int)) + if not ( + ngrams is None + or isinstance(ngrams, int) + or isinstance(ngrams, tuple) + and all(isinstance(item, int) for item in ngrams) + ): + raise ValueError( + "`ngrams` must be None, an integer, or a tuple of " + f"integers. Received: ngrams={ngrams}" + ) + + # 'output_sequence_length' must be one of (None, int) and is only + # set if output_mode is INT. + if output_mode == INT and not ( + isinstance(output_sequence_length, int) + or (output_sequence_length is None) + ): + raise ValueError( + "`output_sequence_length` must be either None or an " + "integer when `output_mode` is 'int'. Received: " + f"output_sequence_length={output_sequence_length}" + ) + + if output_mode != INT and output_sequence_length is not None: + raise ValueError( + "`output_sequence_length` must not be set if `output_mode` is " + "not 'int'. " + f"Received output_sequence_length={output_sequence_length}." + ) + + if ragged and output_mode != INT: + raise ValueError( + "`ragged` must not be true if `output_mode` is " + f"`'int'`. Received: ragged={ragged} and " + f"output_mode={output_mode}" + ) + + if ragged and output_sequence_length is not None: + raise ValueError( + "`output_sequence_length` must not be set if ragged " + f"is True. Received: ragged={ragged} and " + f"output_sequence_length={output_sequence_length}" + ) + + self._max_tokens = max_tokens + self._standardize = standardize + self._split = split + self._ngrams_arg = ngrams + if isinstance(ngrams, int): + self._ngrams = tuple(range(1, ngrams + 1)) + else: + self._ngrams = ngrams + self._ragged = ragged + + self._output_mode = output_mode + self._output_sequence_length = output_sequence_length + self._encoding = encoding + + # VocabularySavedModelSaver will clear the config vocabulary to restore + # the lookup table ops directly. We persist this hidden option to + # persist the fact that we have have a non-adaptable layer with a + # manually set vocab. + self._has_input_vocabulary = kwargs.pop( + "has_input_vocabulary", (vocabulary is not None) + ) + + vocabulary_size = kwargs.pop("vocabulary_size", None) + + super().__init__(**kwargs) + base_preprocessing_layer.keras_kpl_gauge.get_cell( + "TextVectorization" + ).set(True) + + self._lookup_layer = string_lookup.StringLookup( + max_tokens=max_tokens, + vocabulary=vocabulary, + idf_weights=idf_weights, + pad_to_max_tokens=pad_to_max_tokens, + mask_token="", + output_mode=output_mode if output_mode is not None else INT, + sparse=sparse, + has_input_vocabulary=self._has_input_vocabulary, + encoding=encoding, + vocabulary_size=vocabulary_size, + ) + + def compute_output_shape(self, input_shape): + if self._output_mode == INT: + return tf.TensorShape( + [input_shape[0], self._output_sequence_length] + ) + + if self._split is None: + if len(input_shape) <= 1: + input_shape = tuple(input_shape) + (1,) else: - inputs = tf.squeeze(inputs, axis=-1) - if self._split == WHITESPACE: - # This treats multiple whitespaces as one whitespace, and strips leading - # and trailing whitespace. - inputs = tf.strings.split(inputs) - elif self._split == CHARACTER: - inputs = tf.strings.unicode_split(inputs, "UTF-8") - elif callable(self._split): - inputs = self._split(inputs) - else: - raise ValueError( - ("%s is not a supported splitting." - "TextVectorization supports the following options " - "for `split`: None, 'whitespace', or a Callable.") % self._split) - - # Note that 'inputs' here can be either ragged or dense depending on the - # configuration choices for this Layer. The strings.ngrams op, however, does - # support both ragged and dense inputs. - if self._ngrams is not None: - inputs = tf.strings.ngrams( - inputs, ngram_width=self._ngrams, separator=" ") - - return inputs - - def call(self, inputs): - if isinstance(inputs, (list, tuple, np.ndarray)): - inputs = tf.convert_to_tensor(inputs) - - inputs = self._preprocess(inputs) - - # If we're not doing any output processing, return right away. - if self._output_mode is None: - return inputs - - lookup_data = self._lookup_layer(inputs) - - # For any non-int output, we can return directly from the underlying layer. - if self._output_mode != INT: - return lookup_data - - if self._ragged: - return lookup_data - - # If we have a ragged tensor, we can pad during the conversion to dense. - if tf_utils.is_ragged(lookup_data): - shape = lookup_data.shape.as_list() - # If output sequence length is None, to_tensor will pad the last dimension - # to the bounding shape of the ragged dimension. - shape[-1] = self._output_sequence_length - return lookup_data.to_tensor(default_value=0, shape=shape) - - # If we have a dense tensor, we need to pad/trim directly. - if self._output_sequence_length is not None: - # Maybe trim the output. - lookup_data = lookup_data[..., :self._output_sequence_length] - - # Maybe pad the output. We need to be careful to use dynamic shape here as - # required_space_to_batch_paddings requires a fully known shape. - shape = tf.shape(lookup_data) - padded_shape = tf.concat((shape[:-1], [self._output_sequence_length]), 0) - padding, _ = tf.required_space_to_batch_paddings(shape, padded_shape) - return tf.pad(lookup_data, padding) - - return lookup_data - - @property - def _trackable_saved_model_saver(self): - return layer_serialization.VocabularySavedModelSaver(self) + input_shape = tuple(input_shape) + (None,) + return self._lookup_layer.compute_output_shape(input_shape) + + def compute_output_signature(self, input_spec): + output_shape = self.compute_output_shape(input_spec.shape.as_list()) + output_dtype = ( + tf.int64 if self._output_mode == INT else backend.floatx() + ) + return tf.TensorSpec(shape=output_shape, dtype=output_dtype) + + # We override this method solely to generate a docstring. + def adapt(self, data, batch_size=None, steps=None): + """Computes a vocabulary of string terms from tokens in a dataset. + + Calling `adapt()` on a `TextVectorization` layer is an alternative to + passing in a precomputed vocabulary on construction via the `vocabulary` + argument. A `TextVectorization` layer should always be either adapted + over a dataset or supplied with a vocabulary. + + During `adapt()`, the layer will build a vocabulary of all string tokens + seen in the dataset, sorted by occurrence count, with ties broken by + sort order of the tokens (high to low). At the end of `adapt()`, if + `max_tokens` is set, the vocabulary wil be truncated to `max_tokens` + size. For example, adapting a layer with `max_tokens=1000` will compute + the 1000 most frequent tokens occurring in the input dataset. If + `output_mode='tf-idf'`, `adapt()` will also learn the document + frequencies of each token in the input dataset. + + In order to make `TextVectorization` efficient in any distribution + context, the vocabulary is kept static with respect to any compiled + `tf.Graph`s that call the layer. As a consequence, if the layer is + adapted a second time, any models using the layer should be re-compiled. + For more information see + `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`. + + `adapt()` is meant only as a single machine utility to compute layer + state. To analyze a dataset that cannot fit on a single machine, see + [Tensorflow Transform]( + https://www.tensorflow.org/tfx/transform/get_started) for a + multi-machine, map-reduce solution. + + Arguments: + data: The data to train on. It can be passed either as a + `tf.data.Dataset`, or as a numpy array. + batch_size: Integer or `None`. + Number of samples per state update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.Sequence` instances + (since they generate batches). + steps: Integer or `None`. + Total number of steps (batches of samples) + When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If x is a + `tf.data` dataset, and 'steps' is None, the epoch will run until + the input dataset is exhausted. When passing an infinitely + repeating dataset, you must specify the `steps` argument. This + argument is not supported with array inputs. + """ + super().adapt(data, batch_size=batch_size, steps=steps) + + def update_state(self, data): + self._lookup_layer.update_state(self._preprocess(data)) + + def finalize_state(self): + self._lookup_layer.finalize_state() + + def reset_state(self): + self._lookup_layer.reset_state() + + def get_vocabulary(self, include_special_tokens=True): + """Returns the current vocabulary of the layer. + + Args: + include_special_tokens: If True, the returned vocabulary will include + the padding and OOV tokens, and a term's index in the vocabulary + will equal the term's index when calling the layer. If False, the + returned vocabulary will not include any padding or OOV tokens. + """ + return self._lookup_layer.get_vocabulary(include_special_tokens) + + def vocabulary_size(self): + """Gets the current size of the layer's vocabulary. + + Returns: + The integer size of the vocabulary, including optional mask and + OOV indices. + """ + return self._lookup_layer.vocabulary_size() + + def get_config(self): + config = { + "max_tokens": self._lookup_layer.max_tokens, + "standardize": self._standardize, + "split": self._split, + "ngrams": self._ngrams_arg, + "output_mode": self._output_mode, + "output_sequence_length": self._output_sequence_length, + "pad_to_max_tokens": self._lookup_layer.pad_to_max_tokens, + "sparse": self._lookup_layer.sparse, + "ragged": self._ragged, + "vocabulary": utils.listify_tensors( + self._lookup_layer.input_vocabulary + ), + "idf_weights": utils.listify_tensors( + self._lookup_layer.input_idf_weights + ), + "encoding": self._encoding, + "vocabulary_size": self.vocabulary_size(), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if config["standardize"] not in ( + LOWER_AND_STRIP_PUNCTUATION, + LOWER, + STRIP_PUNCTUATION, + ): + config["standardize"] = deserialize_keras_object( + config["standardize"] + ) + if config["split"] not in (WHITESPACE, CHARACTER): + config["split"] = deserialize_keras_object(config["split"]) + return cls(**config) + + def set_vocabulary(self, vocabulary, idf_weights=None): + """Sets vocabulary (and optionally document frequency) for this layer. + + This method sets the vocabulary and idf weights for this layer directly, + instead of analyzing a dataset through 'adapt'. It should be used + whenever the vocab (and optionally document frequency) information is + already known. If vocabulary data is already present in the layer, this + method will replace it. + + Args: + vocabulary: Either an array or a string path to a text file. If + passing an array, can pass a tuple, list, 1D numpy array, or 1D + tensor containing the vocbulary terms. If passing a file path, the + file should contain one line per term in the vocabulary. + idf_weights: A tuple, list, 1D numpy array, or 1D tensor of inverse + document frequency weights with equal length to vocabulary. Must be + set if `output_mode` is `"tf_idf"`. Should not be set otherwise. + + Raises: + ValueError: If there are too many inputs, the inputs do not match, or + input data is missing. + RuntimeError: If the vocabulary cannot be set when this function is + called. This happens when `"multi_hot"`, `"count"`, and "tf_idf" + modes, if `pad_to_max_tokens` is False and the layer itself has + already been called. + """ + self._lookup_layer.set_vocabulary(vocabulary, idf_weights=idf_weights) + + def _preprocess(self, inputs): + inputs = utils.ensure_tensor(inputs, dtype=tf.string) + if self._standardize in (LOWER, LOWER_AND_STRIP_PUNCTUATION): + inputs = tf.strings.lower(inputs) + if self._standardize in ( + STRIP_PUNCTUATION, + LOWER_AND_STRIP_PUNCTUATION, + ): + inputs = tf.strings.regex_replace(inputs, DEFAULT_STRIP_REGEX, "") + if callable(self._standardize): + inputs = self._standardize(inputs) + + if self._split is not None: + # If we are splitting, we validate that the 1st axis is of dimension + # 1 and so can be squeezed out. We do this here instead of after + # splitting for performance reasons - it's more expensive to squeeze + # a ragged tensor. + if inputs.shape.rank > 1: + if inputs.shape[-1] != 1: + raise ValueError( + "When using `TextVectorization` to tokenize strings, " + "the input rank must be 1 or the last shape dimension " + f"must be 1. Received: inputs.shape={inputs.shape} " + f"with rank={inputs.shape.rank}" + ) + else: + inputs = tf.squeeze(inputs, axis=-1) + if self._split == WHITESPACE: + # This treats multiple whitespaces as one whitespace, and strips + # leading and trailing whitespace. + inputs = tf.strings.split(inputs) + elif self._split == CHARACTER: + inputs = tf.strings.unicode_split(inputs, "UTF-8") + elif callable(self._split): + inputs = self._split(inputs) + else: + raise ValueError( + "%s is not a supported splitting." + "TextVectorization supports the following options " + "for `split`: None, 'whitespace', or a Callable." + % self._split + ) + + # Note that 'inputs' here can be either ragged or dense depending on the + # configuration choices for this Layer. The strings.ngrams op, however, + # does support both ragged and dense inputs. + if self._ngrams is not None: + inputs = tf.strings.ngrams( + inputs, ngram_width=self._ngrams, separator=" " + ) + + return inputs + + def call(self, inputs): + if isinstance(inputs, (list, tuple, np.ndarray)): + inputs = tf.convert_to_tensor(inputs) + + inputs = self._preprocess(inputs) + + # If we're not doing any output processing, return right away. + if self._output_mode is None: + return inputs + + lookup_data = self._lookup_layer(inputs) + + # For any non-int output, we can return directly from the underlying + # layer. + if self._output_mode != INT: + return lookup_data + + if self._ragged: + return lookup_data + + # If we have a ragged tensor, we can pad during the conversion to dense. + if tf_utils.is_ragged(lookup_data): + shape = lookup_data.shape.as_list() + # If output sequence length is None, to_tensor will pad the last + # dimension to the bounding shape of the ragged dimension. + shape[-1] = self._output_sequence_length + return lookup_data.to_tensor(default_value=0, shape=shape) + + # If we have a dense tensor, we need to pad/trim directly. + if self._output_sequence_length is not None: + # Maybe trim the output. + lookup_data = lookup_data[..., : self._output_sequence_length] + + # Maybe pad the output. We need to be careful to use dynamic shape + # here as required_space_to_batch_paddings requires a fully known + # shape. + shape = tf.shape(lookup_data) + padded_shape = tf.concat( + (shape[:-1], [self._output_sequence_length]), 0 + ) + padding, _ = tf.required_space_to_batch_paddings( + shape, padded_shape + ) + return tf.pad(lookup_data, padding) + + return lookup_data + + @property + def _trackable_saved_model_saver(self): + return layer_serialization.VocabularySavedModelSaver(self) + + def save_own_variables(self, store): + self._lookup_layer.save_own_variables(store) + + def load_own_variables(self, store): + self._lookup_layer.load_own_variables(store) + + def save_assets(self, dir_path): + self._lookup_layer.save_assets(dir_path) + + def load_assets(self, dir_path): + self._lookup_layer.load_assets(dir_path) diff --git a/keras/layers/preprocessing/text_vectorization_distribution_test.py b/keras/layers/preprocessing/text_vectorization_distribution_test.py index 30c171f1d5fb..94087acacbac 100644 --- a/keras/layers/preprocessing/text_vectorization_distribution_test.py +++ b/keras/layers/preprocessing/text_vectorization_distribution_test.py @@ -15,6 +15,8 @@ """Distribution tests for keras.layers.preprocessing.text_vectorization.""" +import numpy as np +import tensorflow.compat.v2 as tf import keras from keras import backend @@ -23,84 +25,113 @@ from keras.layers.preprocessing import text_vectorization from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) @test_utils.run_v2_only @tf.__internal__.distribute.combinations.generate( tf.__internal__.test.combinations.combine( - strategy=strategy_combinations.all_strategies + - strategy_combinations.multi_worker_mirrored_strategies + - strategy_combinations.parameter_server_strategies_single_worker + - strategy_combinations.parameter_server_strategies_multi_worker, - mode=["eager"])) + strategy=strategy_combinations.all_strategies + + strategy_combinations.multi_worker_mirrored_strategies + + strategy_combinations.parameter_server_strategies_single_worker + + strategy_combinations.parameter_server_strategies_multi_worker, + mode=["eager"], + ) +) class TextVectorizationDistributionTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_distribution_strategy_output(self, strategy): - if (backend.is_tpu_strategy(strategy) and - not tf_test_utils.is_mlir_bridge_enabled()): - self.skipTest("TPU tests require MLIR bridge") - - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( - 2, drop_remainder=True) - - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - tf.config.set_soft_device_placement(True) - - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT, - vocabulary=vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_dataset) - self.assertAllEqual(expected_output, output_dataset) - - def test_distribution_strategy_output_with_adapt(self, strategy): - # TODO(b/180614455): remove this check when MLIR bridge is always enabled. - if backend.is_tpu_strategy(strategy): - self.skipTest("This test needs MLIR bridge on TPU.") - - vocab_data = [[ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ]] - vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( - 2, drop_remainder=True) - - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - tf.config.set_soft_device_placement(True) - - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - layer.adapt(vocab_dataset) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_dataset) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_distribution_strategy_output(self, strategy): + if ( + backend.is_tpu_strategy(strategy) + and not tf_test_utils.is_mlir_bridge_enabled() + ): + self.skipTest("TPU tests require MLIR bridge") + + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( + 2, drop_remainder=True + ) + + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + tf.config.set_soft_device_placement(True) + + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + vocabulary=vocab_data, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_dataset) + self.assertAllEqual(expected_output, output_dataset) + + def test_distribution_strategy_output_with_adapt(self, strategy): + # TODO(b/180614455): remove this check when MLIR bridge is always + # enabled. + if backend.is_tpu_strategy(strategy): + self.skipTest("This test needs MLIR bridge on TPU.") + + vocab_data = [ + [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + ] + vocab_dataset = tf.data.Dataset.from_tensors(vocab_data) + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + input_dataset = tf.data.Dataset.from_tensor_slices(input_array).batch( + 2, drop_remainder=True + ) + + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + tf.config.set_soft_device_placement(True) + + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + layer.adapt(vocab_dataset) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_dataset) + self.assertAllEqual(expected_output, output_dataset) + if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/layers/preprocessing/text_vectorization_test.py b/keras/layers/preprocessing/text_vectorization_test.py index 9b615c9a0d25..9a4b85c16d6e 100644 --- a/keras/layers/preprocessing/text_vectorization_test.py +++ b/keras/layers/preprocessing/text_vectorization_test.py @@ -14,1903 +14,2491 @@ # ============================================================================== """Tests for Keras text vectorization preprocessing layer.""" -import tensorflow.compat.v2 as tf - import gc import os -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras import backend -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.layers import convolutional from keras.layers import core from keras.layers.preprocessing import preprocessing_test_utils from keras.layers.preprocessing import text_vectorization -from keras.utils import generic_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import register_keras_serializable def _get_end_to_end_test_cases(): - test_cases = ( - { - "testcase_name": - "test_simple_tokens_int_mode", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": None, - "standardize": None, - "split": None, - "output_mode": text_vectorization.INT - }, - "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], - }, - { - "testcase_name": - "test_simple_tokens_int_mode_hard_cap", - # Create an array where 'earth' is the most frequent term, followed by - # 'wind', then 'and', then 'fire'. This ensures that the vocab - # is sorting by frequency. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": 6, - "standardize": None, - "split": None, - "output_mode": text_vectorization.INT - }, - "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], - }, - { - "testcase_name": - "test_special_tokens_int_mode", - # Mask tokens in the vocab data should be ignored, and mapped to 0 in - # from the input data. - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - [""], [""], [""], ["[UNK]"], ["[UNK]"], ["[UNK]"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], [""], ["wind"], ["[UNK]"], ["and"], [""], - ["fire"], ["and"], ["[UNK]"], ["michigan"]]), - "kwargs": { - "max_tokens": None, - "standardize": None, - "split": None, - "output_mode": text_vectorization.INT - }, - "expected_output": [[2], [0], [3], [1], [4], [0], [5], [4], [1], [1]], - }, - { - "testcase_name": - "test_documents_int_mode", - "vocab_data": - np.array([["fire earth earth"], ["earth earth"], ["wind wind"], - ["and wind and"]]), - "input_data": - np.array([["earth wind and"], ["fire fire"], ["and earth"], - ["michigan"]]), - "kwargs": { - "max_tokens": None, - "standardize": None, - "split": text_vectorization.WHITESPACE, - "output_mode": text_vectorization.INT - }, - "expected_output": [[2, 3, 4], [5, 5, 0], [4, 2, 0], [1, 0, 0]], - }, - { - "testcase_name": - "test_documents_1d_input_int_mode", - "vocab_data": - np.array([ - "fire earth earth", "earth earth", "wind wind", "and wind and" - ]), - "input_data": - np.array([["earth wind and"], ["fire fire"], ["and earth"], - ["michigan"]]), - "kwargs": { - "max_tokens": None, - "standardize": None, - "split": text_vectorization.WHITESPACE, - "output_mode": text_vectorization.INT - }, - "expected_output": [[2, 3, 4], [5, 5, 0], [4, 2, 0], [1, 0, 0]], - }, - { - "testcase_name": - "test_simple_tokens_binary_mode", - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "standardize": None, - "split": None, - "output_mode": text_vectorization.MULTI_HOT - }, - "expected_output": [[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0], - [0, 1, 0, 0, 0], [1, 0, 0, 0, 0]], - }, - { - "testcase_name": - "test_documents_binary_mode", - "vocab_data": - np.array([["fire earth earth"], ["earth earth"], ["wind wind"], - ["and wind and"]]), - "input_data": - np.array([["earth wind"], ["and"], ["fire fire"], - ["earth michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "standardize": None, - "split": text_vectorization.WHITESPACE, - "output_mode": text_vectorization.MULTI_HOT - }, - "expected_output": [[0, 1, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], - [1, 1, 0, 0, 0]], - }, - { - "testcase_name": - "test_simple_tokens_count_mode", - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "standardize": None, - "split": None, - "output_mode": text_vectorization.COUNT - }, - "expected_output": [[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0], - [0, 1, 0, 0, 0], [1, 0, 0, 0, 0]], - }, - { - "testcase_name": - "test_documents_count_mode", - "vocab_data": - np.array([["fire earth earth"], ["earth earth"], ["wind wind"], - ["and wind and"]]), - "input_data": - np.array([["earth wind"], ["and"], ["fire fire"], - ["earth michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "standardize": None, - "split": text_vectorization.WHITESPACE, - "output_mode": text_vectorization.COUNT - }, - "expected_output": [[0, 1, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 2], - [1, 1, 0, 0, 0]], - }, - { - "testcase_name": - "test_tokens_idf_mode", - "vocab_data": - np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"], - ["wind"], ["wind"], ["wind"], ["and"], ["and"]]), - "input_data": - np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"], - ["and"], ["earth"], ["michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "standardize": None, - "split": None, - "output_mode": text_vectorization.TF_IDF - }, - "expected_output": [[0, 1.098612, 0, 0, 0], [0, 0, 1.252763, 0, 0], - [0, 0, 0, 1.466337, 0], [0, 0, 0, 0, 1.7917595], - [0, 0, 0, 0, 1.7917595], [0, 0, 0, 1.4663371, 0], - [0, 1.098612, 0, 0, 0], [1.402368, 0, 0, 0, 0]], - }, - { - "testcase_name": - "test_documents_idf_mode", - "vocab_data": - np.array([["fire earth earth"], ["earth earth"], ["wind wind"], - ["and wind and"]]), - "input_data": - np.array([["earth wind"], ["and"], ["fire fire"], - ["earth michigan"]]), - "kwargs": { - "max_tokens": 5, - "pad_to_max_tokens": True, - "standardize": None, - "split": text_vectorization.WHITESPACE, - "output_mode": text_vectorization.TF_IDF - }, - "expected_output": [[0., 0.847298, 0.847298, 0., 0.], - [0., 0., 0., 1.098612, 0.], - [0., 0., 0., 0., 2.197225], - [0.972955, 0.847298, 0., 0., 0.]], - }, - ) - - crossed_test_cases = [] - # Cross above test cases with use_dataset in (True, False) - for use_dataset in (True, False): - for case in test_cases: - case = case.copy() - if use_dataset: - case["testcase_name"] = case["testcase_name"] + "_with_dataset" - case["use_dataset"] = use_dataset - crossed_test_cases.append(case) - - return crossed_test_cases + test_cases = ( + { + "testcase_name": "test_simple_tokens_int_mode", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab is + # sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": None, + "standardize": None, + "split": None, + "output_mode": text_vectorization.INT, + }, + "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], + }, + { + "testcase_name": "test_simple_tokens_int_mode_hard_cap", + # Create an array where 'earth' is the most frequent term, followed + # by 'wind', then 'and', then 'fire'. This ensures that the vocab is + # sorting by frequency. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": 6, + "standardize": None, + "split": None, + "output_mode": text_vectorization.INT, + }, + "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]], + }, + { + "testcase_name": "test_special_tokens_int_mode", + # Mask tokens in the vocab data should be ignored, and mapped to 0 + # in from the input data. + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + [""], + [""], + [""], + ["[UNK]"], + ["[UNK]"], + ["[UNK]"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + [""], + ["wind"], + ["[UNK]"], + ["and"], + [""], + ["fire"], + ["and"], + ["[UNK]"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": None, + "standardize": None, + "split": None, + "output_mode": text_vectorization.INT, + }, + "expected_output": [ + [2], + [0], + [3], + [1], + [4], + [0], + [5], + [4], + [1], + [1], + ], + }, + { + "testcase_name": "test_documents_int_mode", + "vocab_data": np.array( + [ + ["fire earth earth"], + ["earth earth"], + ["wind wind"], + ["and wind and"], + ] + ), + "input_data": np.array( + [["earth wind and"], ["fire fire"], ["and earth"], ["michigan"]] + ), + "kwargs": { + "max_tokens": None, + "standardize": None, + "split": text_vectorization.WHITESPACE, + "output_mode": text_vectorization.INT, + }, + "expected_output": [[2, 3, 4], [5, 5, 0], [4, 2, 0], [1, 0, 0]], + }, + { + "testcase_name": "test_documents_1d_input_int_mode", + "vocab_data": np.array( + ["fire earth earth", "earth earth", "wind wind", "and wind and"] + ), + "input_data": np.array( + [["earth wind and"], ["fire fire"], ["and earth"], ["michigan"]] + ), + "kwargs": { + "max_tokens": None, + "standardize": None, + "split": text_vectorization.WHITESPACE, + "output_mode": text_vectorization.INT, + }, + "expected_output": [[2, 3, 4], [5, 5, 0], [4, 2, 0], [1, 0, 0]], + }, + { + "testcase_name": "test_simple_tokens_binary_mode", + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "standardize": None, + "split": None, + "output_mode": text_vectorization.MULTI_HOT, + }, + "expected_output": [ + [0, 1, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 1], + [0, 0, 0, 0, 1], + [0, 0, 0, 1, 0], + [0, 1, 0, 0, 0], + [1, 0, 0, 0, 0], + ], + }, + { + "testcase_name": "test_documents_binary_mode", + "vocab_data": np.array( + [ + ["fire earth earth"], + ["earth earth"], + ["wind wind"], + ["and wind and"], + ] + ), + "input_data": np.array( + [["earth wind"], ["and"], ["fire fire"], ["earth michigan"]] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "standardize": None, + "split": text_vectorization.WHITESPACE, + "output_mode": text_vectorization.MULTI_HOT, + }, + "expected_output": [ + [0, 1, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 1], + [1, 1, 0, 0, 0], + ], + }, + { + "testcase_name": "test_simple_tokens_count_mode", + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "standardize": None, + "split": None, + "output_mode": text_vectorization.COUNT, + }, + "expected_output": [ + [0, 1, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 1], + [0, 0, 0, 0, 1], + [0, 0, 0, 1, 0], + [0, 1, 0, 0, 0], + [1, 0, 0, 0, 0], + ], + }, + { + "testcase_name": "test_documents_count_mode", + "vocab_data": np.array( + [ + ["fire earth earth"], + ["earth earth"], + ["wind wind"], + ["and wind and"], + ] + ), + "input_data": np.array( + [["earth wind"], ["and"], ["fire fire"], ["earth michigan"]] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "standardize": None, + "split": text_vectorization.WHITESPACE, + "output_mode": text_vectorization.COUNT, + }, + "expected_output": [ + [0, 1, 1, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 2], + [1, 1, 0, 0, 0], + ], + }, + { + "testcase_name": "test_tokens_idf_mode", + "vocab_data": np.array( + [ + ["fire"], + ["earth"], + ["earth"], + ["earth"], + ["earth"], + ["wind"], + ["wind"], + ["wind"], + ["and"], + ["and"], + ] + ), + "input_data": np.array( + [ + ["earth"], + ["wind"], + ["and"], + ["fire"], + ["fire"], + ["and"], + ["earth"], + ["michigan"], + ] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "standardize": None, + "split": None, + "output_mode": text_vectorization.TF_IDF, + }, + "expected_output": [ + [0, 1.098612, 0, 0, 0], + [0, 0, 1.252763, 0, 0], + [0, 0, 0, 1.466337, 0], + [0, 0, 0, 0, 1.7917595], + [0, 0, 0, 0, 1.7917595], + [0, 0, 0, 1.4663371, 0], + [0, 1.098612, 0, 0, 0], + [1.402368, 0, 0, 0, 0], + ], + }, + { + "testcase_name": "test_documents_idf_mode", + "vocab_data": np.array( + [ + ["fire earth earth"], + ["earth earth"], + ["wind wind"], + ["and wind and"], + ] + ), + "input_data": np.array( + [["earth wind"], ["and"], ["fire fire"], ["earth michigan"]] + ), + "kwargs": { + "max_tokens": 5, + "pad_to_max_tokens": True, + "standardize": None, + "split": text_vectorization.WHITESPACE, + "output_mode": text_vectorization.TF_IDF, + }, + "expected_output": [ + [0.0, 0.847298, 0.847298, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.098612, 0.0], + [0.0, 0.0, 0.0, 0.0, 2.197225], + [0.972955, 0.847298, 0.0, 0.0, 0.0], + ], + }, + ) + + crossed_test_cases = [] + # Cross above test cases with use_dataset in (True, False) + for use_dataset in (True, False): + for case in test_cases: + case = case.copy() + if use_dataset: + case["testcase_name"] = case["testcase_name"] + "_with_dataset" + case["use_dataset"] = use_dataset + crossed_test_cases.append(case) + + return crossed_test_cases @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) -class TextVectorizationLayerTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest - ): - - @parameterized.named_parameters(*_get_end_to_end_test_cases()) - def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs, - use_dataset, expected_output): - cls = text_vectorization.TextVectorization - if kwargs.get("output_mode") == text_vectorization.INT: - expected_output_dtype = tf.int64 - else: - expected_output_dtype = tf.float32 - input_shape = input_data.shape - - if use_dataset: - # Keras APIs expect batched datasets. - # TODO(rachelim): `model.predict` predicts the result on each - # dataset batch separately, then tries to concatenate the results - # together. When the results have different shapes on the non-concat - # axis (which can happen in the output_mode = INT case for - # TextVectorization), the concatenation fails. In real use cases, this may - # not be an issue because users are likely to pipe the preprocessing layer - # into other keras layers instead of predicting it directly. A workaround - # for these unit tests is to have the dataset only contain one batch, so - # no concatenation needs to happen with the result. For consistency with - # numpy input, we should make `predict` join differently shaped results - # together sensibly, with 0 padding. - input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( - input_shape[0]) - vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( - input_shape[0]) - - output_data = test_utils.layer_test( - cls, - kwargs=kwargs, - input_shape=input_shape, - input_data=input_data, - input_dtype=tf.string, - expected_output_dtype=expected_output_dtype, - validate_training=False, - adapt_data=vocab_data) - self.assertAllClose(expected_output, output_data) - - @parameterized.product( - rank=[0, 1, 2], - # Check lists, numpy arrays, tensors, and objects convertable to tensor. - data_fn=[None, np.array, tf.constant, preprocessing_test_utils.ArrayLike] - ) - def test_input_types(self, rank, data_fn): - input_data = "earth wind and fire" - expected_output = [2, 3, 4, 5] - if rank == 1: - input_data = [input_data] - expected_output = [expected_output] - elif rank == 2: - input_data = [[input_data]] - expected_output = [expected_output] - if data_fn is not None: - input_data = data_fn(input_data) - input_shape = [] if rank == 0 else [1] - - layer = text_vectorization.TextVectorization( - vocabulary=["earth", "wind", "and", "fire"]) - output_data = layer(input_data) - self.assertAllEqual(output_data, expected_output) - - # Again in a keras.Model - inputs = keras.Input(shape=input_shape, dtype=tf.string) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model(tf.constant(input_data)) - self.assertAllEqual(output_data, expected_output) - - @parameterized.named_parameters([ - { - "testcase_name": "ragged_tensor1", - "input_data": [[["0 a b"], ["c d"]], [["e a"], ["b c d"]], [["f"]]], - "expected_output": [[[1, 2, 3], [4, 5]], [[6, 2], [3, 4, 5]], [[7]]], - }, - { - "testcase_name": "ragged_tensor2", - "input_data": [[["0 a b"], [""]], [], [["e a"], ["b c d"]], [["f"]]], - "expected_output": [[[1, 2, 3], []], [], [[6, 2], [3, 4, 5]], [[7]]], - }, - ]) - def test_ragged_input_and_ragged_output(self, input_data, expected_output): - input_data = tf.ragged.constant(input_data, inner_shape=(1,)) - layer = text_vectorization.TextVectorization( - vocabulary=["a", "b", "c", "d", "e", "f"], ragged=True) - output_data = layer(input_data) - self.assertAllEqual(output_data, expected_output) - - # Again in a keras.Model - inputs = keras.Input(shape=(1,), dtype=tf.string) - outputs = layer(inputs) - model = keras.Model(inputs=inputs, outputs=outputs) - output_data = model.predict(input_data) - self.assertAllEqual(output_data, expected_output) - - def test_scalar_input_int_mode_no_len_limit(self): - vocab_data = [ - "fire earth earth", "earth earth", "wind wind", "and wind and" - ] - input_data = "earth wind and fire fire and earth michigan" - layer = text_vectorization.TextVectorization() - layer.adapt(vocab_data) - out = layer(input_data) - self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1]) - layer.set_vocabulary(["earth", "wind", "and", "fire"]) - out = layer(input_data) - self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1]) - - def test_scalar_input_int_mode_trim_to_len_limit(self): - vocab_data = [ - "fire earth earth", "earth earth", "wind wind", "and wind and" - ] - input_data = "earth wind and fire fire and earth michigan" - layer = text_vectorization.TextVectorization(output_sequence_length=3) - layer.adapt(vocab_data) - out = layer(input_data) - self.assertAllClose(out.numpy(), [2, 3, 4]) - layer.set_vocabulary(["earth", "wind", "and", "fire"]) - out = layer(input_data) - self.assertAllClose(out.numpy(), [2, 3, 4]) - - def test_scalar_input_int_pad_to_len_limit(self): - vocab_data = [ - "fire earth earth", "earth earth", "wind wind", "and wind and" - ] - input_data = "earth wind and fire fire and earth michigan" - layer = text_vectorization.TextVectorization(output_sequence_length=10) - layer.adapt(vocab_data) - out = layer(input_data) - self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1, 0, 0]) - layer.set_vocabulary(["earth", "wind", "and", "fire"]) - out = layer(input_data) - self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1, 0, 0]) - - def test_dataset_of_single_strings(self): - vocab_data = ["two two two", "two three three", "three four four five"] - input_data = ["two three", "four five"] - vocab_ds = tf.data.Dataset.from_tensor_slices(vocab_data) # unbatched - input_ds = tf.data.Dataset.from_tensor_slices(input_data) # unbatched - layer = text_vectorization.TextVectorization() - layer.adapt(vocab_ds) - out = input_ds.map(layer) - self.assertAllClose(list(out.as_numpy_iterator()), [[2, 3], [4, 5]]) - - def test_dataset_of_single_strings_with_output_sequence(self): - vocab_data = ["two two two", "two three three", "three four four five"] - input_data = ["two three", "four five"] - vocab_ds = tf.data.Dataset.from_tensor_slices(vocab_data) # unbatched - input_ds = tf.data.Dataset.from_tensor_slices(input_data) # unbatched - layer = text_vectorization.TextVectorization(output_sequence_length=3) - layer.adapt(vocab_ds) - out = input_ds.map(layer) - self.assertAllClose(list(out.as_numpy_iterator()), [[2, 3, 0], [4, 5, 0]]) - - @parameterized.named_parameters( - { - "testcase_name": "1d", - "data": ["0", "a", "b", "c", "d", "e", "a", "b", "c", "d", "f"], - "expected": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1] - }, - { - "testcase_name": "2d", - "data": [["0", "a", "b", "c", "d"], ["e", "a", "b", "c", "d"], ["f"]], - "expected": [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 0, 0, 0, 0]] - }, - { - "testcase_name": - "3d", - "data": [[["0", "a", "b"], ["c", "d"]], [["e", "a"], ["b", "c", "d"]], - [["f"]]], - "expected": [[[1, 2, 3], [4, 5, 0]], [[1, 2, 0], [3, 4, 5]], - [[1, 0, 0], [0, 0, 0]]] - }, - ) - def test_layer_dimensionality_handling(self, data, expected): - vocab = ["a", "b", "c", "d"] - vectorization = text_vectorization.TextVectorization( - max_tokens=None, standardize=None, split=None, pad_to_max_tokens=False) - vectorization.set_vocabulary(vocab) - output = vectorization(tf.ragged.constant(data)) - self.assertAllEqual(expected, output) - - @parameterized.named_parameters( - { - "testcase_name": "1d", - "data": ["0 a b c d e a b c d f"], - "expected": [[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1]] - }, - { - "testcase_name": - "3d", - "data": [[["0 a b"], ["c d"]], [["e a"], ["b c d"]], [["f"]]], - "expected": [[[1, 2, 3], [4, 5, 0]], [[1, 2, 0], [3, 4, 5]], - [[1, 0, 0], [0, 0, 0]]] - }, - ) - def test_layer_dimensionality_handling_with_split(self, data, expected): - vocab = ["a", "b", "c", "d"] - vectorization = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - pad_to_max_tokens=False) - vectorization.set_vocabulary(vocab) - output = vectorization(tf.ragged.constant(data, inner_shape=(1,))) - self.assertAllEqual(expected, output) +class TextVectorizationLayerTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters(*_get_end_to_end_test_cases()) + def test_layer_end_to_end_with_adapt( + self, vocab_data, input_data, kwargs, use_dataset, expected_output + ): + cls = text_vectorization.TextVectorization + if kwargs.get("output_mode") == text_vectorization.INT: + expected_output_dtype = tf.int64 + else: + expected_output_dtype = tf.float32 + input_shape = input_data.shape + + if use_dataset: + # Keras APIs expect batched datasets. + # TODO(rachelim): `model.predict` predicts the result on each + # dataset batch separately, then tries to concatenate the results + # together. When the results have different shapes on the non-concat + # axis (which can happen in the output_mode = INT case for + # TextVectorization), the concatenation fails. In real use cases, + # this may not be an issue because users are likely to pipe the + # preprocessing layer into other keras layers instead of predicting + # it directly. A workaround for these unit tests is to have the + # dataset only contain one batch, so no concatenation needs to + # happen with the result. For consistency with numpy input, we + # should make `predict` join differently shaped results together + # sensibly, with 0 padding. + input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( + input_shape[0] + ) + vocab_data = tf.data.Dataset.from_tensor_slices(vocab_data).batch( + input_shape[0] + ) + + output_data = test_utils.layer_test( + cls, + kwargs=kwargs, + input_shape=input_shape, + input_data=input_data, + input_dtype=tf.string, + expected_output_dtype=expected_output_dtype, + validate_training=False, + adapt_data=vocab_data, + ) + self.assertAllClose(expected_output, output_data) + + @parameterized.product( + rank=[0, 1, 2], + # Check lists, numpy arrays, tensors, and objects convertable to tensor. + data_fn=[ + None, + np.array, + tf.constant, + preprocessing_test_utils.ArrayLike, + ], + ) + def test_input_types(self, rank, data_fn): + input_data = "earth wind and fire" + expected_output = [2, 3, 4, 5] + if rank == 1: + input_data = [input_data] + expected_output = [expected_output] + elif rank == 2: + input_data = [[input_data]] + expected_output = [expected_output] + if data_fn is not None: + input_data = data_fn(input_data) + input_shape = [] if rank == 0 else [1] + + layer = text_vectorization.TextVectorization( + vocabulary=["earth", "wind", "and", "fire"] + ) + output_data = layer(input_data) + self.assertAllEqual(output_data, expected_output) + + # Again in a keras.Model + inputs = keras.Input(shape=input_shape, dtype=tf.string) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model(tf.constant(input_data)) + self.assertAllEqual(output_data, expected_output) + + @parameterized.named_parameters( + [ + { + "testcase_name": "ragged_tensor1", + "input_data": [ + [["0 a b"], ["c d"]], + [["e a"], ["b c d"]], + [["f"]], + ], + "expected_output": [ + [[1, 2, 3], [4, 5]], + [[6, 2], [3, 4, 5]], + [[7]], + ], + }, + { + "testcase_name": "ragged_tensor2", + "input_data": [ + [["0 a b"], [""]], + [], + [["e a"], ["b c d"]], + [["f"]], + ], + "expected_output": [ + [[1, 2, 3], []], + [], + [[6, 2], [3, 4, 5]], + [[7]], + ], + }, + ] + ) + def test_ragged_input_and_ragged_output(self, input_data, expected_output): + input_data = tf.ragged.constant(input_data, inner_shape=(1,)) + layer = text_vectorization.TextVectorization( + vocabulary=["a", "b", "c", "d", "e", "f"], ragged=True + ) + output_data = layer(input_data) + self.assertAllEqual(output_data, expected_output) + + # Again in a keras.Model + inputs = keras.Input(shape=(1,), dtype=tf.string) + outputs = layer(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + output_data = model.predict(input_data) + self.assertAllEqual(output_data, expected_output) + + def test_scalar_input_int_mode_no_len_limit(self): + vocab_data = [ + "fire earth earth", + "earth earth", + "wind wind", + "and wind and", + ] + input_data = "earth wind and fire fire and earth michigan" + layer = text_vectorization.TextVectorization() + layer.adapt(vocab_data) + out = layer(input_data) + self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1]) + layer.set_vocabulary(["earth", "wind", "and", "fire"]) + out = layer(input_data) + self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1]) + + def test_scalar_input_int_mode_trim_to_len_limit(self): + vocab_data = [ + "fire earth earth", + "earth earth", + "wind wind", + "and wind and", + ] + input_data = "earth wind and fire fire and earth michigan" + layer = text_vectorization.TextVectorization(output_sequence_length=3) + layer.adapt(vocab_data) + out = layer(input_data) + self.assertAllClose(out.numpy(), [2, 3, 4]) + layer.set_vocabulary(["earth", "wind", "and", "fire"]) + out = layer(input_data) + self.assertAllClose(out.numpy(), [2, 3, 4]) + + def test_scalar_input_int_pad_to_len_limit(self): + vocab_data = [ + "fire earth earth", + "earth earth", + "wind wind", + "and wind and", + ] + input_data = "earth wind and fire fire and earth michigan" + layer = text_vectorization.TextVectorization(output_sequence_length=10) + layer.adapt(vocab_data) + out = layer(input_data) + self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1, 0, 0]) + layer.set_vocabulary(["earth", "wind", "and", "fire"]) + out = layer(input_data) + self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1, 0, 0]) + + def test_dataset_of_single_strings(self): + vocab_data = ["two two two", "two three three", "three four four five"] + input_data = ["two three", "four five"] + vocab_ds = tf.data.Dataset.from_tensor_slices(vocab_data) # unbatched + input_ds = tf.data.Dataset.from_tensor_slices(input_data) # unbatched + layer = text_vectorization.TextVectorization() + layer.adapt(vocab_ds) + out = input_ds.map(layer) + self.assertAllClose(list(out.as_numpy_iterator()), [[2, 3], [4, 5]]) + + def test_dataset_of_single_strings_with_output_sequence(self): + vocab_data = ["two two two", "two three three", "three four four five"] + input_data = ["two three", "four five"] + vocab_ds = tf.data.Dataset.from_tensor_slices(vocab_data) # unbatched + input_ds = tf.data.Dataset.from_tensor_slices(input_data) # unbatched + layer = text_vectorization.TextVectorization(output_sequence_length=3) + layer.adapt(vocab_ds) + out = input_ds.map(layer) + self.assertAllClose( + list(out.as_numpy_iterator()), [[2, 3, 0], [4, 5, 0]] + ) + + @parameterized.named_parameters( + { + "testcase_name": "1d", + "data": ["0", "a", "b", "c", "d", "e", "a", "b", "c", "d", "f"], + "expected": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1], + }, + { + "testcase_name": "2d", + "data": [ + ["0", "a", "b", "c", "d"], + ["e", "a", "b", "c", "d"], + ["f"], + ], + "expected": [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 0, 0, 0, 0]], + }, + { + "testcase_name": "3d", + "data": [ + [["0", "a", "b"], ["c", "d"]], + [["e", "a"], ["b", "c", "d"]], + [["f"]], + ], + "expected": [ + [[1, 2, 3], [4, 5, 0]], + [[1, 2, 0], [3, 4, 5]], + [[1, 0, 0], [0, 0, 0]], + ], + }, + ) + def test_layer_dimensionality_handling(self, data, expected): + vocab = ["a", "b", "c", "d"] + vectorization = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + pad_to_max_tokens=False, + ) + vectorization.set_vocabulary(vocab) + output = vectorization(tf.ragged.constant(data)) + self.assertAllEqual(expected, output) + + @parameterized.named_parameters( + { + "testcase_name": "1d", + "data": ["0 a b c d e a b c d f"], + "expected": [[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1]], + }, + { + "testcase_name": "3d", + "data": [[["0 a b"], ["c d"]], [["e a"], ["b c d"]], [["f"]]], + "expected": [ + [[1, 2, 3], [4, 5, 0]], + [[1, 2, 0], [3, 4, 5]], + [[1, 0, 0], [0, 0, 0]], + ], + }, + ) + def test_layer_dimensionality_handling_with_split(self, data, expected): + vocab = ["a", "b", "c", "d"] + vectorization = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + pad_to_max_tokens=False, + ) + vectorization.set_vocabulary(vocab) + output = vectorization(tf.ragged.constant(data, inner_shape=(1,))) + self.assertAllEqual(expected, output) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) class TextVectorizationPreprocessingTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def _write_to_temp_file(self, file_name, vocab_list): - vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") - with tf.io.gfile.GFile(vocab_path, "w") as writer: - for vocab in vocab_list: - writer.write(vocab + "\n") - writer.flush() - writer.close() - return vocab_path - - def test_summary_before_adapt(self): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - pad_to_max_tokens=True, - standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, - split=None, - ngrams=None, - output_mode=text_vectorization.TF_IDF) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - # We are testing that model.summary() can be called without erroring out. - # (b/145726907) - model.summary() - - @parameterized.parameters([list, np.array, tf.constant, tf.ragged.constant]) - def test_lower_and_strip_punctuation(self, data_fn): - input_array = data_fn([["Earth", "wInD", "aNd", "firE"], - ["fire|", "an<>d", "{earth}", "michigan@%$"]]) - expected_output = data_fn([[b"earth", b"wind", b"and", b"fire"], - [b"fire", b"and", b"earth", b"michigan"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, - split=None, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - @parameterized.parameters([list, np.array, tf.constant, tf.ragged.constant]) - def test_strip_punctuation(self, data_fn): - input_array = data_fn([["Earth", "wInD", "aNd", "firE"], - ["fire|", "an<>d", "{earth}", "michigan@%$"]]) - expected_output = data_fn([[b"Earth", b"wInD", b"aNd", b"firE"], - [b"fire", b"and", b"earth", b"michigan"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=text_vectorization.STRIP_PUNCTUATION, - split=None, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - @parameterized.parameters([list, np.array, tf.constant, tf.ragged.constant]) - def test_lower(self, data_fn): - input_array = data_fn([["Earth", "wInD", "aNd", "firE"], - ["fire|", "an<>d", "{earth}", "michigan@$"]]) - expected_output = data_fn([[b"earth", b"wind", b"and", b"fire"], - [b"fire|", b"an<>d", b"{earth}", b"michigan@$"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=text_vectorization.LOWER, - split=None, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_custom_normalization(self): - input_array = np.array([["Earth", "wInD", "aNd", "firE"], - ["fire|", "an<>d", "{earth}", "michigan@%$"]]) - expected_output = np.array( - [[b"earth", b"wind", b"and", b"fire"], - [b"fire|", b"an<>d", b"{earth}", b"michigan@%$"]]) - - custom_standardization = tf.strings.lower - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=custom_standardization, - split=None, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_whitespace_splitting(self): - input_array = np.array([["earth wind and fire"], - ["\tfire\tand\nearth michigan "]]) - expected_output = [[b"earth", b"wind", b"and", b"fire"], - [b"fire", b"and", b"earth", b"michigan"]] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_character_splitting(self): - input_array = np.array([["earthwind"], - ["and fire"]]) - expected_output = [[b"e", b"a", b"r", b"t", b"h", b"w", b"i", b"n", b"d"], - [b"a", b"n", b"d", b" ", b"f", b"i", b"r", b"e"]] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.CHARACTER, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_custom_string_splitting(self): - input_array = np.array([["earth>wind>and fire"], - ["\tfire>and\nearth>michigan"]]) - expected_output = [[b"earth", b"wind", b"and fire"], - [b"\tfire", b"and\nearth", b"michigan"]] - - custom_split = lambda x: tf.strings.split(x, sep=">") - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=custom_split, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_single_ngram_value_ragged_inputs(self): - input_array = tf.ragged.constant([["earth", "wind", "and", "fire"], - ["fire", "and", "earth"]]) - # pyformat: disable - expected_output = [[b"earth", b"wind", b"and", b"fire", - b"earth wind", b"wind and", b"and fire", - b"earth wind and", b"wind and fire"], - [b"fire", b"and", b"earth", - b"fire and", b"and earth", - b"fire and earth"]] - # pyformat: enable - - input_data = keras.Input(shape=(None,), ragged=True, dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - ngrams=3, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_single_ngram_value(self): - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - # pyformat: disable - expected_output = [[b"earth", b"wind", b"and", b"fire", - b"earth wind", b"wind and", b"and fire", - b"earth wind and", b"wind and fire"], - [b"fire", b"and", b"earth", b"michigan", - b"fire and", b"and earth", b"earth michigan", - b"fire and earth", b"and earth michigan"]] - # pyformat: enable - - input_data = keras.Input(shape=(4,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - ngrams=3, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_multiple_ngram_values(self): - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - # pyformat: disable - expected_output = [[b"earth wind", b"wind and", b"and fire", - b"earth wind and", b"wind and fire"], - [b"fire and", b"and earth", b"earth michigan", - b"fire and earth", b"and earth michigan"]] - # pyformat: enable - - input_data = keras.Input(shape=(4,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - ngrams=(2, 3), - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_string_multiple_preprocessing_steps(self): - input_array = np.array([["earth wInD and firE"], - ["\tfire\tand\nearth!! michig@n "]]) - expected_output = [[ - b"earth", - b"wind", - b"and", - b"fire", - b"earth wind", - b"wind and", - b"and fire", - ], - [ - b"fire", - b"and", - b"earth", - b"michign", - b"fire and", - b"and earth", - b"earth michign", - ]] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, - split=text_vectorization.WHITESPACE, - ngrams=2, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_string_splitting_with_non_1d_array_fails(self): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - vocabulary=["a"], - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=None) - with self.assertRaisesRegex(ValueError, "last shape dimension must be 1"): - _ = layer(input_data) - - def test_string_splitting_with_non_1d_raggedarray_fails(self): - input_data = keras.Input(shape=(None,), ragged=True, dtype=tf.string) - layer = text_vectorization.TextVectorization( - vocabulary=["a"], - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=None) - with self.assertRaisesRegex(ValueError, "last shape dimension must be 1"): - _ = layer(input_data) - - def test_standardization_with_invalid_standardize_arg(self): - with self.assertRaisesRegex(ValueError, "Unkown value for `standardize`"): - text_vectorization.TextVectorization( - vocabulary=["a"], standardize="unsupported") - - def test_splitting_with_invalid_split_arg(self): - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization(vocabulary=["a"]) - layer._split = "unsupported" - with self.assertRaisesRegex(ValueError, ".*is not a supported splitting.*"): - _ = layer(input_data) - - def test_vocab_setting_via_init(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT, - vocabulary=vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_vocab_setting_via_init_file(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_path = self._write_to_temp_file("vocab_file", vocab_data) - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT, - vocabulary=vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_vocab_setting_via_setter(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_path = self._write_to_temp_file("vocab_file", vocab_data) - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - layer.set_vocabulary(vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_vocab_setting_with_oov_via_setter(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - vocab_path = self._write_to_temp_file("vocab_file", vocab_data) - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - layer.set_vocabulary(vocab_path) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def _write_to_temp_file(self, file_name, vocab_list): + vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt") + with tf.io.gfile.GFile(vocab_path, "w") as writer: + for vocab in vocab_list: + writer.write(vocab + "\n") + writer.flush() + writer.close() + return vocab_path + + def test_summary_before_adapt(self): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + pad_to_max_tokens=True, + standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, + split=None, + ngrams=None, + output_mode=text_vectorization.TF_IDF, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + # We are testing that model.summary() can be called without erroring + # out. (b/145726907) + model.summary() + + @parameterized.parameters([list, np.array, tf.constant, tf.ragged.constant]) + def test_lower_and_strip_punctuation(self, data_fn): + input_array = data_fn( + [ + ["Earth", "wInD", "aNd", "firE"], + ["fire|", "an<>d", "{earth}", "michigan@%$"], + ] + ) + expected_output = data_fn( + [ + [b"earth", b"wind", b"and", b"fire"], + [b"fire", b"and", b"earth", b"michigan"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, + split=None, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + @parameterized.parameters([list, np.array, tf.constant, tf.ragged.constant]) + def test_strip_punctuation(self, data_fn): + input_array = data_fn( + [ + ["Earth", "wInD", "aNd", "firE"], + ["fire|", "an<>d", "{earth}", "michigan@%$"], + ] + ) + expected_output = data_fn( + [ + [b"Earth", b"wInD", b"aNd", b"firE"], + [b"fire", b"and", b"earth", b"michigan"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=text_vectorization.STRIP_PUNCTUATION, + split=None, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + @parameterized.parameters([list, np.array, tf.constant, tf.ragged.constant]) + def test_lower(self, data_fn): + input_array = data_fn( + [ + ["Earth", "wInD", "aNd", "firE"], + ["fire|", "an<>d", "{earth}", "michigan@$"], + ] + ) + expected_output = data_fn( + [ + [b"earth", b"wind", b"and", b"fire"], + [b"fire|", b"an<>d", b"{earth}", b"michigan@$"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=text_vectorization.LOWER, + split=None, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_custom_normalization(self): + input_array = np.array( + [ + ["Earth", "wInD", "aNd", "firE"], + ["fire|", "an<>d", "{earth}", "michigan@%$"], + ] + ) + expected_output = np.array( + [ + [b"earth", b"wind", b"and", b"fire"], + [b"fire|", b"an<>d", b"{earth}", b"michigan@%$"], + ] + ) + + custom_standardization = tf.strings.lower + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=custom_standardization, + split=None, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_whitespace_splitting(self): + input_array = np.array( + [["earth wind and fire"], ["\tfire\tand\nearth michigan "]] + ) + expected_output = [ + [b"earth", b"wind", b"and", b"fire"], + [b"fire", b"and", b"earth", b"michigan"], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_character_splitting(self): + input_array = np.array([["earthwind"], ["and fire"]]) + expected_output = [ + [b"e", b"a", b"r", b"t", b"h", b"w", b"i", b"n", b"d"], + [b"a", b"n", b"d", b" ", b"f", b"i", b"r", b"e"], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.CHARACTER, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_custom_string_splitting(self): + input_array = np.array( + [["earth>wind>and fire"], ["\tfire>and\nearth>michigan"]] + ) + expected_output = [ + [b"earth", b"wind", b"and fire"], + [b"\tfire", b"and\nearth", b"michigan"], + ] + + custom_split = lambda x: tf.strings.split(x, sep=">") + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=custom_split, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_single_ngram_value_ragged_inputs(self): + input_array = tf.ragged.constant( + [["earth", "wind", "and", "fire"], ["fire", "and", "earth"]] + ) + # pyformat: disable + expected_output = [ + [ + b"earth", + b"wind", + b"and", + b"fire", + b"earth wind", + b"wind and", + b"and fire", + b"earth wind and", + b"wind and fire", + ], + [ + b"fire", + b"and", + b"earth", + b"fire and", + b"and earth", + b"fire and earth", + ], + ] + # pyformat: enable + + input_data = keras.Input(shape=(None,), ragged=True, dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + ngrams=3, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_single_ngram_value(self): + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + # pyformat: disable + expected_output = [ + [ + b"earth", + b"wind", + b"and", + b"fire", + b"earth wind", + b"wind and", + b"and fire", + b"earth wind and", + b"wind and fire", + ], + [ + b"fire", + b"and", + b"earth", + b"michigan", + b"fire and", + b"and earth", + b"earth michigan", + b"fire and earth", + b"and earth michigan", + ], + ] + # pyformat: enable + + input_data = keras.Input(shape=(4,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + ngrams=3, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_multiple_ngram_values(self): + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + # pyformat: disable + expected_output = [ + [ + b"earth wind", + b"wind and", + b"and fire", + b"earth wind and", + b"wind and fire", + ], + [ + b"fire and", + b"and earth", + b"earth michigan", + b"fire and earth", + b"and earth michigan", + ], + ] + # pyformat: enable + + input_data = keras.Input(shape=(4,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + ngrams=(2, 3), + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_string_multiple_preprocessing_steps(self): + input_array = np.array( + [["earth wInD and firE"], ["\tfire\tand\nearth!! michig@n "]] + ) + expected_output = [ + [ + b"earth", + b"wind", + b"and", + b"fire", + b"earth wind", + b"wind and", + b"and fire", + ], + [ + b"fire", + b"and", + b"earth", + b"michign", + b"fire and", + b"and earth", + b"earth michign", + ], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, + split=text_vectorization.WHITESPACE, + ngrams=2, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_string_splitting_with_non_1d_array_fails(self): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + vocabulary=["a"], + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=None, + ) + with self.assertRaisesRegex( + ValueError, "last shape dimension must be 1" + ): + _ = layer(input_data) + + def test_string_splitting_with_non_1d_raggedarray_fails(self): + input_data = keras.Input(shape=(None,), ragged=True, dtype=tf.string) + layer = text_vectorization.TextVectorization( + vocabulary=["a"], + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=None, + ) + with self.assertRaisesRegex( + ValueError, "last shape dimension must be 1" + ): + _ = layer(input_data) + + def test_standardization_with_invalid_standardize_arg(self): + with self.assertRaisesRegex( + ValueError, "Unkown value for `standardize`" + ): + text_vectorization.TextVectorization( + vocabulary=["a"], standardize="unsupported" + ) + + def test_splitting_with_invalid_split_arg(self): + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization(vocabulary=["a"]) + layer._split = "unsupported" + with self.assertRaisesRegex( + ValueError, ".*is not a supported splitting.*" + ): + _ = layer(input_data) + + def test_vocab_setting_via_init(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + vocabulary=vocab_data, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_vocab_setting_via_init_file(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_path = self._write_to_temp_file("vocab_file", vocab_data) + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + vocabulary=vocab_path, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_vocab_setting_via_setter(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_path = self._write_to_temp_file("vocab_file", vocab_data) + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + layer.set_vocabulary(vocab_path) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_vocab_setting_with_oov_via_setter(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + vocab_path = self._write_to_temp_file("vocab_file", vocab_data) + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + layer.set_vocabulary(vocab_path) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) class TextVectorizationDistributionTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_distribution_strategy_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - strategy = tf.distribute.OneDeviceStrategy("/cpu:0") - with strategy.scope(): - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_distribution_strategy_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + strategy = tf.distribute.OneDeviceStrategy("/cpu:0") + with strategy.scope(): + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) class TextVectorizationOutputTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_int_output(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_densifies_with_zeros(self): - vocab_data = ["earth", "wind", "and", "fire"] - # Create an input array that has 5 elements in the first example and 4 in - # the second. This should output a 2x5 tensor with a padding value in the - # second example. - input_array = np.array([["earth wind and also fire"], - ["fire and earth michigan"]]) - expected_output = [[2, 3, 4, 1, 5], [5, 4, 2, 1, 0]] - - # This test doesn't explicitly set an output shape, so the 2nd dimension - # should stay 'None'. - expected_output_shape = [None, None] - - # The input shape here is explicitly 1 because we're tokenizing. - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=text_vectorization.INT) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_ragged(self): - vocab_data = ["earth", "wind", "and", "fire"] - # Create an input array that has 5 elements in the first example and 4 in - # the second. - input_array = np.array([["earth wind and also fire"], - ["fire and earth michigan"]]) - expected_output = tf.ragged.constant([[2, 3, 4, 1, 5], [5, 4, 2, 1]]) - expected_output_shape = [None, None] - - # The input shape here is explicitly 1 because we're tokenizing. - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=text_vectorization.INT, - ragged=True) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_densifies_with_zeros_and_pads(self): - vocab_data = ["earth", "wind", "and", "fire"] - # Create an input array that has 5 elements in the first example and 4 in - # the second. This should output a 2x6 tensor with a padding value in the - # second example, since output_sequence_length is set to 6. - input_array = np.array([["earth wind and also fire"], - ["fire and earth michigan"]]) - expected_output = [[2, 3, 4, 1, 5, 0], [5, 4, 2, 1, 0, 0]] - - output_sequence_length = 6 - expected_output_shape = [None, output_sequence_length] - - # The input shape here is explicitly 1 because we're tokenizing. - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=text_vectorization.INT, - output_sequence_length=output_sequence_length) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_densifies_with_zeros_and_strips(self): - vocab_data = ["earth", "wind", "and", "fire"] - # Create an input array that has 5 elements in the first example and 4 in - # the second. This should output a 2x3 tensor with a padding value in the - # second example, since output_sequence_length is set to 3. - input_array = np.array([["earth wind and also fire"], - ["fire and earth michigan"]]) - expected_output = [[2, 3, 4], [5, 4, 2]] - output_sequence_length = 3 - expected_output_shape = [None, output_sequence_length] - - # The input shape here is explicitly 1 because we're tokenizing. - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=text_vectorization.INT, - output_sequence_length=output_sequence_length) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_int_output_dynamically_strips_and_pads(self): - vocab_data = ["earth", "wind", "and", "fire"] - # Create an input array that has 5 elements in the first example and 4 in - # the second. This should output a 2x3 tensor with a padding value in the - # second example, since output_sequence_length is set to 3. - input_array = np.array([["earth wind and also fire"], - ["fire and earth michigan"]]) - expected_output = [[2, 3, 4], [5, 4, 2]] - output_sequence_length = 3 - expected_output_shape = [None, output_sequence_length] - - # The input shape here is explicitly 1 because we're tokenizing. - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=text_vectorization.INT, - output_sequence_length=output_sequence_length) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - # Create an input array that has 1 element in the first example and 2 in - # the second. This should output a 2x3 tensor with a padding value in the - # second example, since output_sequence_length is set to 3. - input_array_2 = np.array([["wind"], ["fire and"]]) - expected_output_2 = [[3, 0, 0], [5, 4, 0]] - output_dataset = model.predict(input_array_2) - self.assertAllEqual(expected_output_2, output_dataset) - - @parameterized.parameters( - {"sparse": True}, - {"sparse": False}, - ) - def test_multi_hot_output_hard_maximum(self, sparse): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0, 0], - [1, 1, 0, 1, 0, 0]] - # pyformat: enable - max_tokens = 6 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=max_tokens, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=True, - sparse=sparse) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - if sparse: - expected_output = tf.sparse.from_dense(tf.constant(expected_output)) - self.assertAllEqual(expected_output.indices, output_dataset.indices) - self.assertAllEqual(expected_output.values, output_dataset.values) - else: - self.assertAllEqual(expected_output, output_dataset) - - @parameterized.parameters( - {"sparse": True}, - {"sparse": False}, - ) - def test_multi_hot_output_soft_maximum(self, sparse): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0], - [1, 1, 0, 1, 0]] - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=False, - sparse=sparse) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - if sparse: - expected_output = tf.sparse.from_dense(tf.constant(expected_output)) - self.assertAllEqual(expected_output.indices, output_dataset.indices) - self.assertAllEqual(expected_output.values, output_dataset.values) - else: - self.assertAllEqual(expected_output, output_dataset) - - def test_multi_hot_output_hard_maximum_set_vocabulary_after_build(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0], - [1, 1, 0, 1, 0]] - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=max_tokens, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=True) - int_data = layer(input_data) - layer.set_vocabulary(vocab_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_multi_hot_output_hard_maximum_adapt_after_build(self): - vocab_data = np.array([ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ]) - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0], - [1, 1, 0, 1, 0]] - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=max_tokens, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=True) - int_data = layer(input_data) - layer.adapt(vocab_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_multi_hot_output_hard_maximum_multiple_adapts(self): - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - adapt_data = ["earth", "earth", "earth", "earth", "wind", "wind", "wind"] - first_expected_output = [ - [1, 1, 1, 0, 0], - [1, 1, 0, 0, 0], - ] - second_adapt_data = [ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ] - second_expected_output = [ - [0, 1, 1, 1, 0], - [1, 1, 0, 1, 0], - ] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=5, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=True) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - # Test the first adapt - layer.adapt(adapt_data) - first_output = model.predict(input_array) - # Test the second adapt - layer.adapt(second_adapt_data) - # We need to recompile the model to retrace our call graph. - model.compile() - second_output = model.predict(input_array) - self.assertAllEqual(first_expected_output, first_output) - self.assertAllEqual(second_expected_output, second_output) - - def test_multi_hot_output_soft_maximum_set_state_after_build(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 1, 1, 1, 0], - [1, 1, 0, 1, 0]] - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=False) - layer.build(input_data.shape) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_vocab_size_changed_pad_to_max_false_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - pad_to_max_tokens=False) - layer.adapt(vocab_data) - _ = layer(input_data) - - layer.set_vocabulary(vocab_data[:2]) - with self.assertRaisesRegex(RuntimeError, - "vocabulary size cannot be changed"): - _ = layer(input_data) - - def test_count_output_hard_maximum(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 2, 1, 1, 0, 0], - [2, 1, 0, 1, 0, 0]] - # pyformat: enable - max_tokens = 6 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=6, - standardize=None, - split=None, - output_mode=text_vectorization.COUNT, - pad_to_max_tokens=True) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - def test_count_output_soft_maximum(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[0, 2, 1, 1, 0], - [2, 1, 0, 1, 0]] - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - standardize=None, - split=None, - output_mode=text_vectorization.COUNT, - pad_to_max_tokens=False) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - @parameterized.named_parameters( - ("sparse", True), - ("dense", False), - ) - def test_tfidf_output_hard_maximum(self, sparse): - vocab_data = ["earth", "wind", "and", "fire"] - # OOV idf weight (bucket 0) should 0.5, the average of passed weights. - idf_weights = [.4, .25, .75, .6] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "fire", "earth", "michigan"]]) - - # pyformat: disable - # pylint: disable=bad-whitespace - expected_output = [[ 0, .8, .25, .75, 0, 0], - [ 1, .4, 0, 0, .6, 0]] - # pylint: enable=bad-whitespace - # pyformat: enable - max_tokens = 6 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=6, - standardize=None, - split=None, - output_mode=text_vectorization.TF_IDF, - pad_to_max_tokens=True, - sparse=sparse, - vocabulary=vocab_data, - idf_weights=idf_weights) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - if sparse: - output_dataset = tf.sparse.to_dense(output_dataset) - self.assertAllClose(expected_output, output_dataset) - - @parameterized.named_parameters( - ("sparse", True), - ("dense", False), - ) - def test_tfidf_output_soft_maximum(self, sparse): - vocab_data = ["earth", "wind", "and", "fire"] - # OOV idf weight (bucket 0) should 0.5, the average of passed weights. - idf_weights = [.4, .25, .75, .6] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "fire", "earth", "michigan"]]) - - # pyformat: disable - # pylint: disable=bad-whitespace - expected_output = [[ 0, .8, .25, .75, 0], - [ 1, .4, 0, 0, .6]] - # pylint: enable=bad-whitespace - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - standardize=None, - split=None, - output_mode=text_vectorization.TF_IDF, - pad_to_max_tokens=False, - sparse=sparse, - vocabulary=vocab_data, - idf_weights=idf_weights) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - if sparse: - output_dataset = tf.sparse.to_dense(output_dataset) - self.assertAllClose(expected_output, output_dataset) - - @parameterized.named_parameters( - ("sparse", True), - ("dense", False), - ) - def test_tfidf_output_set_oov_weight(self, sparse): - vocab_data = ["[UNK]", "earth", "wind", "and", "fire"] - idf_weights = [.1, .4, .25, .75, .6] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "fire", "earth", "michigan"]]) - - # pyformat: disable - # pylint: disable=bad-whitespace - expected_output = [[ 0, .8, .25, .75, 0], - [ .2, .4, 0, 0, .6]] - # pylint: enable=bad-whitespace - # pyformat: enable - max_tokens = 5 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - standardize=None, - split=None, - output_mode=text_vectorization.TF_IDF, - pad_to_max_tokens=False, - sparse=sparse, - vocabulary=vocab_data, - idf_weights=idf_weights) - int_data = layer(input_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - if sparse: - output_dataset = tf.sparse.to_dense(output_dataset) - self.assertAllClose(expected_output, output_dataset) - - def test_accept_1D_input(self): - input_array = np.array(["earth wind and fire", - "fire and earth michigan"]) - layer = text_vectorization.TextVectorization( - standardize=None, split=None, output_mode="int") - layer.adapt(input_array) - _ = layer(input_array) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_int_output(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_densifies_with_zeros(self): + vocab_data = ["earth", "wind", "and", "fire"] + # Create an input array that has 5 elements in the first example and 4 + # in the second. This should output a 2x5 tensor with a padding value in + # the second example. + input_array = np.array( + [["earth wind and also fire"], ["fire and earth michigan"]] + ) + expected_output = [[2, 3, 4, 1, 5], [5, 4, 2, 1, 0]] + + # This test doesn't explicitly set an output shape, so the 2nd dimension + # should stay 'None'. + expected_output_shape = [None, None] + + # The input shape here is explicitly 1 because we're tokenizing. + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=text_vectorization.INT, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_ragged(self): + vocab_data = ["earth", "wind", "and", "fire"] + # Create an input array that has 5 elements in the first example and 4 + # in the second. + input_array = np.array( + [["earth wind and also fire"], ["fire and earth michigan"]] + ) + expected_output = tf.ragged.constant([[2, 3, 4, 1, 5], [5, 4, 2, 1]]) + expected_output_shape = [None, None] + + # The input shape here is explicitly 1 because we're tokenizing. + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=text_vectorization.INT, + ragged=True, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_densifies_with_zeros_and_pads(self): + vocab_data = ["earth", "wind", "and", "fire"] + # Create an input array that has 5 elements in the first example and 4 + # in the second. This should output a 2x6 tensor with a padding value in + # the second example, since output_sequence_length is set to 6. + input_array = np.array( + [["earth wind and also fire"], ["fire and earth michigan"]] + ) + expected_output = [[2, 3, 4, 1, 5, 0], [5, 4, 2, 1, 0, 0]] + + output_sequence_length = 6 + expected_output_shape = [None, output_sequence_length] + + # The input shape here is explicitly 1 because we're tokenizing. + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=text_vectorization.INT, + output_sequence_length=output_sequence_length, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_densifies_with_zeros_and_strips(self): + vocab_data = ["earth", "wind", "and", "fire"] + # Create an input array that has 5 elements in the first example and 4 + # in the second. This should output a 2x3 tensor with a padding value in + # the second example, since output_sequence_length is set to 3. + input_array = np.array( + [["earth wind and also fire"], ["fire and earth michigan"]] + ) + expected_output = [[2, 3, 4], [5, 4, 2]] + output_sequence_length = 3 + expected_output_shape = [None, output_sequence_length] + + # The input shape here is explicitly 1 because we're tokenizing. + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=text_vectorization.INT, + output_sequence_length=output_sequence_length, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_int_output_dynamically_strips_and_pads(self): + vocab_data = ["earth", "wind", "and", "fire"] + # Create an input array that has 5 elements in the first example and 4 + # in the second. This should output a 2x3 tensor with a padding value in + # the second example, since output_sequence_length is set to 3. + input_array = np.array( + [["earth wind and also fire"], ["fire and earth michigan"]] + ) + expected_output = [[2, 3, 4], [5, 4, 2]] + output_sequence_length = 3 + expected_output_shape = [None, output_sequence_length] + + # The input shape here is explicitly 1 because we're tokenizing. + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=text_vectorization.INT, + output_sequence_length=output_sequence_length, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + # Create an input array that has 1 element in the first example and 2 in + # the second. This should output a 2x3 tensor with a padding value in + # the second example, since output_sequence_length is set to 3. + input_array_2 = np.array([["wind"], ["fire and"]]) + expected_output_2 = [[3, 0, 0], [5, 4, 0]] + output_dataset = model.predict(input_array_2) + self.assertAllEqual(expected_output_2, output_dataset) + + @parameterized.parameters( + {"sparse": True}, + {"sparse": False}, + ) + def test_multi_hot_output_hard_maximum(self, sparse): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0, 0], [1, 1, 0, 1, 0, 0]] + # pyformat: enable + max_tokens = 6 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=max_tokens, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=True, + sparse=sparse, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + if sparse: + expected_output = tf.sparse.from_dense(tf.constant(expected_output)) + self.assertAllEqual(expected_output.indices, output_dataset.indices) + self.assertAllEqual(expected_output.values, output_dataset.values) + else: + self.assertAllEqual(expected_output, output_dataset) + + @parameterized.parameters( + {"sparse": True}, + {"sparse": False}, + ) + def test_multi_hot_output_soft_maximum(self, sparse): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 1, 0]] + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=False, + sparse=sparse, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + if sparse: + expected_output = tf.sparse.from_dense(tf.constant(expected_output)) + self.assertAllEqual(expected_output.indices, output_dataset.indices) + self.assertAllEqual(expected_output.values, output_dataset.values) + else: + self.assertAllEqual(expected_output, output_dataset) + + def test_multi_hot_output_hard_maximum_set_vocabulary_after_build(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 1, 0]] + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=max_tokens, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=True, + ) + int_data = layer(input_data) + layer.set_vocabulary(vocab_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_multi_hot_output_hard_maximum_adapt_after_build(self): + vocab_data = np.array( + [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + ) + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 1, 0]] + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=max_tokens, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=True, + ) + int_data = layer(input_data) + layer.adapt(vocab_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_multi_hot_output_hard_maximum_multiple_adapts(self): + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + adapt_data = [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + ] + first_expected_output = [ + [1, 1, 1, 0, 0], + [1, 1, 0, 0, 0], + ] + second_adapt_data = [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + second_expected_output = [ + [0, 1, 1, 1, 0], + [1, 1, 0, 1, 0], + ] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=5, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=True, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + # Test the first adapt + layer.adapt(adapt_data) + first_output = model.predict(input_array) + # Test the second adapt + layer.adapt(second_adapt_data) + # We need to recompile the model to retrace our call graph. + model.compile() + second_output = model.predict(input_array) + self.assertAllEqual(first_expected_output, first_output) + self.assertAllEqual(second_expected_output, second_output) + + def test_multi_hot_output_soft_maximum_set_state_after_build(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 1, 0]] + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=False, + ) + layer.build(input_data.shape) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_vocab_size_changed_pad_to_max_false_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + pad_to_max_tokens=False, + ) + layer.adapt(vocab_data) + _ = layer(input_data) + + with self.assertRaisesRegex( + RuntimeError, "vocabulary size cannot be changed" + ): + layer.set_vocabulary(vocab_data[:2]) + + def test_count_output_hard_maximum(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 2, 1, 1, 0, 0], [2, 1, 0, 1, 0, 0]] + # pyformat: enable + max_tokens = 6 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=6, + standardize=None, + split=None, + output_mode=text_vectorization.COUNT, + pad_to_max_tokens=True, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_count_output_soft_maximum(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[0, 2, 1, 1, 0], [2, 1, 0, 1, 0]] + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + standardize=None, + split=None, + output_mode=text_vectorization.COUNT, + pad_to_max_tokens=False, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_tfidf_output_hard_maximum(self, sparse): + vocab_data = ["earth", "wind", "and", "fire"] + # OOV idf weight (bucket 0) should 0.5, the average of passed weights. + idf_weights = [0.4, 0.25, 0.75, 0.6] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "fire", "earth", "michigan"], + ] + ) + + # pyformat: disable + + expected_output = [[0, 0.8, 0.25, 0.75, 0, 0], [1, 0.4, 0, 0, 0.6, 0]] + + # pyformat: enable + max_tokens = 6 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=6, + standardize=None, + split=None, + output_mode=text_vectorization.TF_IDF, + pad_to_max_tokens=True, + sparse=sparse, + vocabulary=vocab_data, + idf_weights=idf_weights, + ) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + if sparse: + output_dataset = tf.sparse.to_dense(output_dataset) + self.assertAllClose(expected_output, output_dataset) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_tfidf_output_soft_maximum(self, sparse): + vocab_data = ["earth", "wind", "and", "fire"] + # OOV idf weight (bucket 0) should 0.5, the average of passed weights. + idf_weights = [0.4, 0.25, 0.75, 0.6] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "fire", "earth", "michigan"], + ] + ) + + # pyformat: disable + + expected_output = [[0, 0.8, 0.25, 0.75, 0], [1, 0.4, 0, 0, 0.6]] + + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + standardize=None, + split=None, + output_mode=text_vectorization.TF_IDF, + pad_to_max_tokens=False, + sparse=sparse, + vocabulary=vocab_data, + idf_weights=idf_weights, + ) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + if sparse: + output_dataset = tf.sparse.to_dense(output_dataset) + self.assertAllClose(expected_output, output_dataset) + + @parameterized.named_parameters( + ("sparse", True), + ("dense", False), + ) + def test_tfidf_output_set_oov_weight(self, sparse): + vocab_data = ["[UNK]", "earth", "wind", "and", "fire"] + idf_weights = [0.1, 0.4, 0.25, 0.75, 0.6] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "fire", "earth", "michigan"], + ] + ) + + # pyformat: disable + + expected_output = [[0, 0.8, 0.25, 0.75, 0], [0.2, 0.4, 0, 0, 0.6]] + + # pyformat: enable + max_tokens = 5 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + standardize=None, + split=None, + output_mode=text_vectorization.TF_IDF, + pad_to_max_tokens=False, + sparse=sparse, + vocabulary=vocab_data, + idf_weights=idf_weights, + ) + int_data = layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + if sparse: + output_dataset = tf.sparse.to_dense(output_dataset) + self.assertAllClose(expected_output, output_dataset) + + def test_accept_1D_input(self): + input_array = np.array( + ["earth wind and fire", "fire and earth michigan"] + ) + layer = text_vectorization.TextVectorization( + standardize=None, split=None, output_mode="int" + ) + layer.adapt(input_array) + _ = layer(input_array) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) class TextVectorizationModelBuildingTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - @parameterized.named_parameters( - { - "testcase_name": "count_hard_max", - "pad_to_max_tokens": True, - "output_mode": text_vectorization.COUNT - }, { - "testcase_name": "count_soft_max", - "pad_to_max_tokens": False, - "output_mode": text_vectorization.COUNT - }, { - "testcase_name": "binary_hard_max", - "pad_to_max_tokens": True, - "output_mode": text_vectorization.MULTI_HOT - }, { - "testcase_name": "binary_soft_max", - "pad_to_max_tokens": False, - "output_mode": text_vectorization.MULTI_HOT - }, { - "testcase_name": "tfidf_hard_max", - "pad_to_max_tokens": True, - "output_mode": text_vectorization.TF_IDF - }, { - "testcase_name": "tfidf_soft_max", - "pad_to_max_tokens": False, - "output_mode": text_vectorization.TF_IDF - }) - def test_end_to_end_bagged_modeling(self, output_mode, pad_to_max_tokens): - vocab_data = ["earth", "wind", "and", "fire"] - if output_mode == text_vectorization.TF_IDF: - idf_weights = [.5, .25, .2, .125] - else: - idf_weights = None - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=10, - standardize=None, - split=None, - output_mode=output_mode, - pad_to_max_tokens=pad_to_max_tokens, - vocabulary=vocab_data, - idf_weights=idf_weights) - - int_data = layer(input_data) - float_data = backend.cast(int_data, dtype="float32") - output_data = core.Dense(64)(float_data) - model = keras.Model(inputs=input_data, outputs=output_data) - _ = model.predict(input_array) - - def test_end_to_end_vocab_modeling(self): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth wind and also fire"], - ["fire and earth michigan"]]) - output_sequence_length = 6 - max_tokens = 5 - - # The input shape here is explicitly 1 because we're tokenizing. - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=text_vectorization.WHITESPACE, - output_mode=text_vectorization.INT, - output_sequence_length=output_sequence_length) - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - embedded_data = core.Embedding( - input_dim=max_tokens + 1, output_dim=32)( - int_data) - output_data = convolutional.Conv1D( - 250, 3, padding="valid", activation="relu", strides=1)( - embedded_data) - - model = keras.Model(inputs=input_data, outputs=output_data) - _ = model.predict(input_array) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + @parameterized.named_parameters( + { + "testcase_name": "count_hard_max", + "pad_to_max_tokens": True, + "output_mode": text_vectorization.COUNT, + }, + { + "testcase_name": "count_soft_max", + "pad_to_max_tokens": False, + "output_mode": text_vectorization.COUNT, + }, + { + "testcase_name": "binary_hard_max", + "pad_to_max_tokens": True, + "output_mode": text_vectorization.MULTI_HOT, + }, + { + "testcase_name": "binary_soft_max", + "pad_to_max_tokens": False, + "output_mode": text_vectorization.MULTI_HOT, + }, + { + "testcase_name": "tfidf_hard_max", + "pad_to_max_tokens": True, + "output_mode": text_vectorization.TF_IDF, + }, + { + "testcase_name": "tfidf_soft_max", + "pad_to_max_tokens": False, + "output_mode": text_vectorization.TF_IDF, + }, + ) + def test_end_to_end_bagged_modeling(self, output_mode, pad_to_max_tokens): + vocab_data = ["earth", "wind", "and", "fire"] + if output_mode == text_vectorization.TF_IDF: + idf_weights = [0.5, 0.25, 0.2, 0.125] + else: + idf_weights = None + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=10, + standardize=None, + split=None, + output_mode=output_mode, + pad_to_max_tokens=pad_to_max_tokens, + vocabulary=vocab_data, + idf_weights=idf_weights, + ) + + int_data = layer(input_data) + float_data = backend.cast(int_data, dtype="float32") + output_data = core.Dense(64)(float_data) + model = keras.Model(inputs=input_data, outputs=output_data) + _ = model.predict(input_array) + + def test_end_to_end_vocab_modeling(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [["earth wind and also fire"], ["fire and earth michigan"]] + ) + output_sequence_length = 6 + max_tokens = 5 + + # The input shape here is explicitly 1 because we're tokenizing. + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=text_vectorization.WHITESPACE, + output_mode=text_vectorization.INT, + output_sequence_length=output_sequence_length, + ) + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + embedded_data = core.Embedding(input_dim=max_tokens + 1, output_dim=32)( + int_data + ) + output_data = convolutional.Conv1D( + 250, 3, padding="valid", activation="relu", strides=1 + )(embedded_data) + + model = keras.Model(inputs=input_data, outputs=output_data) + _ = model.predict(input_array) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) -class TextVectorizationVocbularyTest( +class TextVectorizationVocabularyTest( test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest, ): + def test_get_vocabulary(self): + vocab = ["earth", "wind", "and", "fire"] - def test_get_vocabulary(self): - vocab = ["earth", "wind", "and", "fire"] + layer = text_vectorization.TextVectorization(vocabulary=vocab) + self.assertAllEqual( + layer.get_vocabulary(), + ["", "[UNK]", "earth", "wind", "and", "fire"], + ) - layer = text_vectorization.TextVectorization(vocabulary=vocab) - self.assertAllEqual(layer.get_vocabulary(), - ["", "[UNK]", "earth", "wind", "and", "fire"]) + def test_get_vocabulary_adapt(self): + vocab = np.array( + [["earth earth earth earth wind wind wind and and fire"]] + ) - def test_get_vocabulary_adapt(self): - vocab = np.array([["earth earth earth earth wind wind wind and and fire"]]) + layer = text_vectorization.TextVectorization() + layer.adapt(vocab) + self.assertAllEqual( + layer.get_vocabulary(), + ["", "[UNK]", "earth", "wind", "and", "fire"], + ) - layer = text_vectorization.TextVectorization() - layer.adapt(vocab) - self.assertAllEqual(layer.get_vocabulary(), - ["", "[UNK]", "earth", "wind", "and", "fire"]) + def test_get_vocabulary_no_special_tokens(self): + vocab = ["earth", "wind", "and", "fire"] - def test_get_vocabulary_no_special_tokens(self): - vocab = ["earth", "wind", "and", "fire"] - - layer = text_vectorization.TextVectorization(vocabulary=vocab) - self.assertAllEqual( - layer.get_vocabulary(include_special_tokens=False), - ["earth", "wind", "and", "fire"]) + layer = text_vectorization.TextVectorization(vocabulary=vocab) + self.assertAllEqual( + layer.get_vocabulary(include_special_tokens=False), + ["earth", "wind", "and", "fire"], + ) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) -class TextVectorizationErrorTest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest - ): - - def test_too_long_vocab_fails_in_single_setting(self): - vocab_data = ["earth", "wind", "and", "fire"] - - layer = text_vectorization.TextVectorization( - max_tokens=4, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - with self.assertRaisesRegex(ValueError, - "vocabulary larger than the maximum vocab.*"): - layer.set_vocabulary(vocab_data) - - def test_setting_vocab_without_idf_weights_fails_in_tfidf_mode(self): - vocab_data = ["earth", "wind", "and", "fire"] - - with self.assertRaisesRegex( - ValueError, "`idf_weights` must be set if output_mode is TF_IDF"): - text_vectorization.TextVectorization( - max_tokens=5, - standardize=None, - split=None, - output_mode=text_vectorization.TF_IDF, - vocabulary=vocab_data) - - def test_idf_weights_length_mismatch_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - idf_weights = [1, 2, 3] - with self.assertRaisesRegex( - ValueError, "`idf_weights` must be the same length as vocab"): - text_vectorization.TextVectorization( - max_tokens=5, - standardize=None, - split=None, - output_mode=text_vectorization.TF_IDF, - vocabulary=vocab_data, - idf_weights=idf_weights) - - def test_set_tfidf_in_non_tfidf_fails(self): - vocab_data = ["earth", "wind", "and", "fire"] - idf_weights = [1, 2, 3, 4] - with self.assertRaisesRegex(ValueError, - "`idf_weights` should only be set if"): - text_vectorization.TextVectorization( - max_tokens=5, - standardize=None, - split=None, - output_mode=text_vectorization.MULTI_HOT, - vocabulary=vocab_data, - idf_weights=idf_weights) - - def test_zero_max_tokens_fails(self): - with self.assertRaisesRegex(ValueError, "max_tokens.*"): - _ = text_vectorization.TextVectorization(max_tokens=0) - - def test_non_string_dtype_fails(self): - with self.assertRaisesRegex(ValueError, "dtype of string.*"): - _ = text_vectorization.TextVectorization(dtype=tf.int64) - - def test_unknown_standardize_arg_fails(self): - with self.assertRaisesRegex(ValueError, - "`standardize` arg.*unsupported_value"): - _ = text_vectorization.TextVectorization(standardize="unsupported_value") - - def test_unknown_split_arg_fails(self): - with self.assertRaisesRegex(ValueError, "`split` arg.*unsupported_value"): - _ = text_vectorization.TextVectorization(split="unsupported_value") - - def test_unknown_output_mode_arg_fails(self): - with self.assertRaisesRegex(ValueError, - "`output_mode` arg.*unsupported_value"): - _ = text_vectorization.TextVectorization(output_mode="unsupported_value") - - def test_unknown_ngrams_arg_fails(self): - with self.assertRaisesRegex(ValueError, "ngrams.*unsupported_value"): - _ = text_vectorization.TextVectorization(ngrams="unsupported_value") - - def test_float_ngrams_arg_fails(self): - with self.assertRaisesRegex(ValueError, "ngrams.*2.9"): - _ = text_vectorization.TextVectorization(ngrams=2.9) - - def test_float_tuple_ngrams_arg_fails(self): - with self.assertRaisesRegex(ValueError, "ngrams.*(1.3, 2.9)"): - _ = text_vectorization.TextVectorization(ngrams=(1.3, 2.9)) - - def test_non_int_output_sequence_length_dtype_fails(self): - with self.assertRaisesRegex(ValueError, "output_sequence_length.*2.0"): - _ = text_vectorization.TextVectorization( - output_mode="int", output_sequence_length=2.0) - - def test_non_none_output_sequence_length_fails_if_output_mode_not_int(self): - with self.assertRaisesRegex(ValueError, - "`output_sequence_length` must not be set"): - _ = text_vectorization.TextVectorization( - output_mode="count", output_sequence_length=2) - - def test_non_none_output_sequence_length_fails_if_ragged_true(self): - with self.assertRaisesRegex(ValueError, - "`output_sequence_length` must not be set"): - _ = text_vectorization.TextVectorization( - ragged=True, output_sequence_length=2) - - def test_ragged_true_fails_if_output_mode_not_int(self): - with self.assertRaisesRegex(ValueError, "`ragged` must not be true if"): - _ = text_vectorization.TextVectorization( - ragged=True, output_mode=text_vectorization.MULTI_HOT) - - def test_sparse_true_fails_if_output_mode_is_int(self): - with self.assertRaisesRegex(ValueError, "`sparse` may only be true if"): - _ = text_vectorization.TextVectorization( - sparse=True, output_mode=text_vectorization.INT) +class TextVectorizationErrorTest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_too_long_vocab_fails_in_single_setting(self): + vocab_data = ["earth", "wind", "and", "fire"] + + layer = text_vectorization.TextVectorization( + max_tokens=4, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + with self.assertRaisesRegex( + ValueError, "vocabulary larger than the maximum vocab.*" + ): + layer.set_vocabulary(vocab_data) + + def test_setting_vocab_without_idf_weights_fails_in_tfidf_mode(self): + vocab_data = ["earth", "wind", "and", "fire"] + + with self.assertRaisesRegex( + ValueError, "`idf_weights` must be set if output_mode is TF_IDF" + ): + text_vectorization.TextVectorization( + max_tokens=5, + standardize=None, + split=None, + output_mode=text_vectorization.TF_IDF, + vocabulary=vocab_data, + ) + + def test_idf_weights_length_mismatch_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + idf_weights = [1, 2, 3] + with self.assertRaisesRegex( + ValueError, "`idf_weights` must be the same length as vocab" + ): + text_vectorization.TextVectorization( + max_tokens=5, + standardize=None, + split=None, + output_mode=text_vectorization.TF_IDF, + vocabulary=vocab_data, + idf_weights=idf_weights, + ) + + def test_set_tfidf_in_non_tfidf_fails(self): + vocab_data = ["earth", "wind", "and", "fire"] + idf_weights = [1, 2, 3, 4] + with self.assertRaisesRegex( + ValueError, "`idf_weights` should only be set if" + ): + text_vectorization.TextVectorization( + max_tokens=5, + standardize=None, + split=None, + output_mode=text_vectorization.MULTI_HOT, + vocabulary=vocab_data, + idf_weights=idf_weights, + ) + + def test_zero_max_tokens_fails(self): + with self.assertRaisesRegex(ValueError, "max_tokens.*"): + _ = text_vectorization.TextVectorization(max_tokens=0) + + def test_non_string_dtype_fails(self): + with self.assertRaisesRegex(ValueError, "dtype of string.*"): + _ = text_vectorization.TextVectorization(dtype=tf.int64) + + def test_unknown_standardize_arg_fails(self): + with self.assertRaisesRegex( + ValueError, "`standardize` arg.*unsupported_value" + ): + _ = text_vectorization.TextVectorization( + standardize="unsupported_value" + ) + + def test_unknown_split_arg_fails(self): + with self.assertRaisesRegex( + ValueError, "`split` arg.*unsupported_value" + ): + _ = text_vectorization.TextVectorization(split="unsupported_value") + + def test_unknown_output_mode_arg_fails(self): + with self.assertRaisesRegex( + ValueError, "`output_mode` arg.*unsupported_value" + ): + _ = text_vectorization.TextVectorization( + output_mode="unsupported_value" + ) + + def test_unknown_ngrams_arg_fails(self): + with self.assertRaisesRegex(ValueError, "ngrams.*unsupported_value"): + _ = text_vectorization.TextVectorization(ngrams="unsupported_value") + + def test_float_ngrams_arg_fails(self): + with self.assertRaisesRegex(ValueError, "ngrams.*2.9"): + _ = text_vectorization.TextVectorization(ngrams=2.9) + + def test_float_tuple_ngrams_arg_fails(self): + with self.assertRaisesRegex(ValueError, "ngrams.*(1.3, 2.9)"): + _ = text_vectorization.TextVectorization(ngrams=(1.3, 2.9)) + + def test_non_int_output_sequence_length_dtype_fails(self): + with self.assertRaisesRegex(ValueError, "output_sequence_length.*2.0"): + _ = text_vectorization.TextVectorization( + output_mode="int", output_sequence_length=2.0 + ) + + def test_non_none_output_sequence_length_fails_if_output_mode_not_int(self): + with self.assertRaisesRegex( + ValueError, "`output_sequence_length` must not be set" + ): + _ = text_vectorization.TextVectorization( + output_mode="count", output_sequence_length=2 + ) + + def test_non_none_output_sequence_length_fails_if_ragged_true(self): + with self.assertRaisesRegex( + ValueError, "`output_sequence_length` must not be set" + ): + _ = text_vectorization.TextVectorization( + ragged=True, output_sequence_length=2 + ) + + def test_ragged_true_fails_if_output_mode_not_int(self): + with self.assertRaisesRegex(ValueError, "`ragged` must not be true if"): + _ = text_vectorization.TextVectorization( + ragged=True, output_mode=text_vectorization.MULTI_HOT + ) + + def test_sparse_true_fails_if_output_mode_is_int(self): + with self.assertRaisesRegex(ValueError, "`sparse` may only be true if"): + _ = text_vectorization.TextVectorization( + sparse=True, output_mode=text_vectorization.INT + ) # Custom functions for the custom callable serialization test. Declared here # to avoid multiple registrations from run_all_keras_modes(). -@generic_utils.register_keras_serializable(package="Test") +@register_keras_serializable(package="Test") def custom_standardize_fn(x): - return tf.strings.lower(x) + return tf.strings.lower(x) -@generic_utils.register_keras_serializable(package="Test") +@register_keras_serializable(package="Test") def custom_split_fn(x): - return tf.strings.split(x, sep=">") + return tf.strings.split(x, sep=">") @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) class TextVectorizationSavingTest( - test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def tearDown(self): - keras.backend.clear_session() - gc.collect() - super(TextVectorizationSavingTest, self).tearDown() - - @parameterized.parameters( - {"init_vocab": True}, - {"init_vocab": False}, - ) - def test_saving(self, init_vocab): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - vocabulary = vocab_data if init_vocab else None - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT, - vocabulary=vocabulary) - if not init_vocab: - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = keras.models.load_model(output_path) - self.assertAllEqual(loaded_model.predict(input_array), expected_output) - - @parameterized.parameters( - {"init_vocab": True}, - {"init_vocab": False}, - ) - def test_saving_when_nested(self, init_vocab): - vocab_data = ["earth", "wind", "and", "fire"] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - vocabulary = vocab_data if init_vocab else None - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT, - vocabulary=vocabulary) - if not init_vocab: - layer.set_vocabulary(vocab_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - outer_input = keras.Input(shape=(None,), dtype=tf.string) - outer_output = model(outer_input) - outer_model = keras.Model(inputs=outer_input, outputs=outer_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - outer_model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = keras.models.load_model(output_path) - self.assertAllEqual(loaded_model.predict(input_array), expected_output) - - def test_saving_when_adapted(self): - adapt_data = [ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ] - input_array = np.array([["earth", "wind", "and", "fire"], - ["fire", "and", "earth", "michigan"]]) - expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=None, - split=None, - output_mode=text_vectorization.INT) - layer.adapt(adapt_data) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - - model.save(output_path, save_format="tf") - - # Delete the session and graph to ensure that the loaded model is generated - # from scratch. - keras.backend.clear_session() - - loaded_model = keras.models.load_model(output_path) - self.assertAllEqual(loaded_model.predict(input_array), expected_output) - - def test_saving_with_tfidf(self): - vocab_data = ["earth", "wind", "and", "fire"] - # OOV idf weight (bucket 0) should 0.5, the average of passed weights. - idf_weights = [.4, .25, .75, .6] - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "fire", "earth", "michigan"]]) - - # pyformat: disable - # pylint: disable=bad-whitespace - expected_output = [[ 0, .8, .25, .75, 0], - [ 1, .4, 0, 0, .6]] - vocab_data = ["earth", "wind", "and", "fire"] - # pylint: enable=bad-whitespace - # pyformat: enable - - # Build and validate a golden model. - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=5, - standardize=None, - split=None, - output_mode=text_vectorization.TF_IDF) - layer.set_vocabulary(vocab_data, idf_weights=idf_weights) - - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllClose(output_dataset, expected_output) - - # Save the model to disk. - output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") - model.save(output_path, save_format="tf") - loaded_model = keras.models.load_model(output_path) - - # Ensure that the loaded model is unique (so that the save/load is real) - self.assertIsNot(model, loaded_model) - - # Validate correctness of the new model. - new_output_dataset = loaded_model.predict(input_array) - self.assertAllClose(new_output_dataset, expected_output) - - def test_serialization_with_custom_callables(self): - input_array = np.array([["earth>wind>and Fire"], - ["\tfire>And\nearth>michigan"]]) - expected_output = [[b"earth", b"wind", b"and fire"], - [b"\tfire", b"and\nearth", b"michigan"]] - - input_data = keras.Input(shape=(1,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=None, - standardize=custom_standardize_fn, - split=custom_split_fn, - ngrams=None, - output_mode=None) - int_data = layer(input_data) - model = keras.Model(inputs=input_data, outputs=int_data) - output_dataset = model.predict(input_array) - self.assertAllEqual(expected_output, output_dataset) - - serialized_model_data = model.get_config() - new_model = keras.Model.from_config(serialized_model_data) - new_output_dataset = new_model.predict(input_array) - self.assertAllEqual(expected_output, new_output_dataset) + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def tearDown(self): + keras.backend.clear_session() + gc.collect() + super(TextVectorizationSavingTest, self).tearDown() + + @parameterized.parameters( + {"init_vocab": True}, + {"init_vocab": False}, + ) + def test_saving(self, init_vocab): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + vocabulary = vocab_data if init_vocab else None + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + vocabulary=vocabulary, + ) + if not init_vocab: + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = keras.models.load_model(output_path) + self.assertAllEqual(loaded_model.predict(input_array), expected_output) + + @parameterized.parameters( + {"init_vocab": True}, + {"init_vocab": False}, + ) + def test_saving_when_nested(self, init_vocab): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + vocabulary = vocab_data if init_vocab else None + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + vocabulary=vocabulary, + ) + if not init_vocab: + layer.set_vocabulary(vocab_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + outer_input = keras.Input(shape=(None,), dtype=tf.string) + outer_output = model(outer_input) + outer_model = keras.Model(inputs=outer_input, outputs=outer_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + outer_model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = keras.models.load_model(output_path) + self.assertAllEqual(loaded_model.predict(input_array), expected_output) + + def test_saving_when_adapted(self): + adapt_data = [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + input_array = np.array( + [ + ["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"], + ] + ) + expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]] + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=None, + split=None, + output_mode=text_vectorization.INT, + ) + layer.adapt(adapt_data) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + + model.save(output_path, save_format="tf") + + # Delete the session and graph to ensure that the loaded model is + # generated from scratch. + keras.backend.clear_session() + + loaded_model = keras.models.load_model(output_path) + self.assertAllEqual(loaded_model.predict(input_array), expected_output) + + def test_saving_with_tfidf(self): + vocab_data = ["earth", "wind", "and", "fire"] + # OOV idf weight (bucket 0) should 0.5, the average of passed weights. + idf_weights = [0.4, 0.25, 0.75, 0.6] + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "fire", "earth", "michigan"], + ] + ) + + # pyformat: disable + + expected_output = [[0, 0.8, 0.25, 0.75, 0], [1, 0.4, 0, 0, 0.6]] + vocab_data = ["earth", "wind", "and", "fire"] + + # pyformat: enable + + # Build and validate a golden model. + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=5, + standardize=None, + split=None, + output_mode=text_vectorization.TF_IDF, + ) + layer.set_vocabulary(vocab_data, idf_weights=idf_weights) + + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllClose(output_dataset, expected_output) + + # Save the model to disk. + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + loaded_model = keras.models.load_model(output_path) + + # Ensure that the loaded model is unique (so that the save/load is real) + self.assertIsNot(model, loaded_model) + + # Validate correctness of the new model. + new_output_dataset = loaded_model.predict(input_array) + self.assertAllClose(new_output_dataset, expected_output) + + def test_serialization_with_custom_callables(self): + input_array = np.array( + [["earth>wind>and Fire"], ["\tfire>And\nearth>michigan"]] + ) + expected_output = [ + [b"earth", b"wind", b"and fire"], + [b"\tfire", b"and\nearth", b"michigan"], + ] + + input_data = keras.Input(shape=(1,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=None, + standardize=custom_standardize_fn, + split=custom_split_fn, + ngrams=None, + output_mode=None, + ) + int_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=int_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + serialized_model_data = model.get_config() + new_model = keras.Model.from_config(serialized_model_data) + new_output_dataset = new_model.predict(input_array) + self.assertAllEqual(expected_output, new_output_dataset) + + @test_utils.run_v2_only() + def test_saving_v3(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array(["earth, wind, and fire"]) + + # First, with a static vocabulary. + input_data = keras.Input(shape=(), dtype=tf.string) + layer = text_vectorization.TextVectorization(vocabulary=vocab_data) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + ref_output = model.predict(input_array) + temp_dir = self.get_temp_dir() + model_path = os.path.join(temp_dir, "mymodel.keras") + model.save(model_path, save_format="keras_v3") + model = keras.models.load_model(model_path) + output = model.predict(input_array) + self.assertAllEqual(output, ref_output) + + # Second, with adapt(). + input_data = keras.Input(shape=(), dtype=tf.string) + layer = text_vectorization.TextVectorization() + layer.adapt(vocab_data) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + ref_output = model.predict(input_array) + model.save(model_path, save_format="keras_v3", overwrite=True) + model = keras.models.load_model(model_path) + output = model.predict(input_array) + self.assertAllEqual(output, ref_output) + + # Test TF-IDF + adapt(). + input_data = keras.Input(shape=(), dtype=tf.string) + layer = text_vectorization.TextVectorization(output_mode="tf_idf") + layer.adapt(vocab_data) + output = layer(input_data) + model = keras.Model(inputs=input_data, outputs=output) + ref_output = model.predict(input_array) + model.save(model_path, save_format="keras_v3", overwrite=True) + model = keras.models.load_model(model_path) + output = model.predict(input_array) + self.assertAllEqual(output, ref_output) @test_utils.run_v2_only @test_combinations.run_all_keras_modes(always_skip_v1=True) -class TextVectorizationE2ETest(test_combinations.TestCase, - preprocessing_test_utils.PreprocessingLayerTest): - - def test_keras_vocab_trimming_example(self): - vocab_data = np.array([ - "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and", - "and", "fire" - ]) - input_array = np.array([["earth", "wind", "and", "earth"], - ["ohio", "and", "earth", "michigan"]]) - - # pyformat: disable - expected_output = [[1, 2, 1], - [3, 1, 0]] - # pyformat: enable - max_tokens = 3 - expected_output_shape = [None, max_tokens] - - input_data = keras.Input(shape=(None,), dtype=tf.string) - layer = text_vectorization.TextVectorization( - max_tokens=max_tokens, - standardize=None, - split=None, - output_mode=text_vectorization.COUNT, - pad_to_max_tokens=True) - int_data = layer(input_data) - layer.adapt(vocab_data) - self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) - model = keras.Model(input_data, int_data) - output = model.predict(input_array) - self.assertAllEqual(expected_output, output) +class TextVectorizationE2ETest( + test_combinations.TestCase, preprocessing_test_utils.PreprocessingLayerTest +): + def test_keras_vocab_trimming_example(self): + vocab_data = np.array( + [ + "earth", + "earth", + "earth", + "earth", + "wind", + "wind", + "wind", + "and", + "and", + "fire", + ] + ) + input_array = np.array( + [ + ["earth", "wind", "and", "earth"], + ["ohio", "and", "earth", "michigan"], + ] + ) + + # pyformat: disable + expected_output = [[1, 2, 1], [3, 1, 0]] + # pyformat: enable + max_tokens = 3 + expected_output_shape = [None, max_tokens] + + input_data = keras.Input(shape=(None,), dtype=tf.string) + layer = text_vectorization.TextVectorization( + max_tokens=max_tokens, + standardize=None, + split=None, + output_mode=text_vectorization.COUNT, + pad_to_max_tokens=True, + ) + int_data = layer(input_data) + layer.adapt(vocab_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + model = keras.Model(input_data, int_data) + output = model.predict(input_array) + self.assertAllEqual(expected_output, output) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/regularization/BUILD b/keras/layers/regularization/BUILD index c49cb80ed4b7..ac9a829414ae 100644 --- a/keras/layers/regularization/BUILD +++ b/keras/layers/regularization/BUILD @@ -1,15 +1,17 @@ # Description: # Contains the Keras regularization layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/py/tensorflow_gnn:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], diff --git a/keras/layers/regularization/__init__.py b/keras/layers/regularization/__init__.py index 8718c8985ace..60e910e8ef62 100644 --- a/keras/layers/regularization/__init__.py +++ b/keras/layers/regularization/__init__.py @@ -13,14 +13,15 @@ # limitations under the License. # ============================================================================== """Keras regularization layers.""" -# pylint: disable=g-bad-import-order + +from keras.layers.regularization.activity_regularization import ( + ActivityRegularization, +) +from keras.layers.regularization.alpha_dropout import AlphaDropout from keras.layers.regularization.dropout import Dropout +from keras.layers.regularization.gaussian_dropout import GaussianDropout +from keras.layers.regularization.gaussian_noise import GaussianNoise from keras.layers.regularization.spatial_dropout1d import SpatialDropout1D from keras.layers.regularization.spatial_dropout2d import SpatialDropout2D from keras.layers.regularization.spatial_dropout3d import SpatialDropout3D -from keras.layers.regularization.gaussian_dropout import GaussianDropout -from keras.layers.regularization.gaussian_noise import GaussianNoise -from keras.layers.regularization.activity_regularization import ActivityRegularization -from keras.layers.regularization.alpha_dropout import AlphaDropout - diff --git a/keras/layers/regularization/activity_regularization.py b/keras/layers/regularization/activity_regularization.py index 520b526e4978..977b7d24e56c 100644 --- a/keras/layers/regularization/activity_regularization.py +++ b/keras/layers/regularization/activity_regularization.py @@ -13,41 +13,44 @@ # limitations under the License. # ============================================================================== """Contains the ActivityRegularization layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import regularizers from keras.engine.base_layer import Layer + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ActivityRegularization') +@keras_export("keras.layers.ActivityRegularization") class ActivityRegularization(Layer): - """Layer that applies an update to the cost function based input activity. - - Args: - l1: L1 regularization factor (positive float). - l2: L2 regularization factor (positive float). - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as input. - """ - - def __init__(self, l1=0., l2=0., **kwargs): - super().__init__( - activity_regularizer=regularizers.L1L2(l1=l1, l2=l2), **kwargs) - self.supports_masking = True - self.l1 = l1 - self.l2 = l2 - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = {'l1': self.l1, 'l2': self.l2} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Layer that applies an update to the cost function based input activity. + + Args: + l1: L1 regularization factor (positive float). + l2: L2 regularization factor (positive float). + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as input. + """ + + def __init__(self, l1=0.0, l2=0.0, **kwargs): + super().__init__( + activity_regularizer=regularizers.L1L2(l1=l1, l2=l2), **kwargs + ) + self.supports_masking = True + self.l1 = l1 + self.l2 = l2 + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = {"l1": self.l1, "l2": self.l2} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/regularization/activity_regularization_test.py b/keras/layers/regularization/activity_regularization_test.py index 47475ff70b57..a98d57cc0382 100644 --- a/keras/layers/regularization/activity_regularization_test.py +++ b/keras/layers/regularization/activity_regularization_test.py @@ -14,21 +14,22 @@ # ============================================================================== """Tests for activity regularization layer.""" -import keras -from keras.testing_infra import test_combinations import numpy as np import tensorflow.compat.v2 as tf +import keras +from keras.testing_infra import test_combinations + @test_combinations.run_all_keras_modes class ActivityRegularizationTest(test_combinations.TestCase): + def test_activity_regularization(self): + layer = keras.layers.ActivityRegularization(l1=0.1) + layer(keras.backend.variable(np.ones((2, 4)))) + self.assertEqual(1, len(layer.losses)) + config = layer.get_config() + self.assertEqual(config.pop("l1"), 0.1) - def test_activity_regularization(self): - layer = keras.layers.ActivityRegularization(l1=0.1) - layer(keras.backend.variable(np.ones((2, 4)))) - self.assertEqual(1, len(layer.losses)) - config = layer.get_config() - self.assertEqual(config.pop('l1'), 0.1) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/regularization/alpha_dropout.py b/keras/layers/regularization/alpha_dropout.py index f9d5287b5e6a..5c00ab347243 100644 --- a/keras/layers/regularization/alpha_dropout.py +++ b/keras/layers/regularization/alpha_dropout.py @@ -13,88 +13,92 @@ # limitations under the License. # ============================================================================== """Contains the AlphaDropout layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf - +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.AlphaDropout') +@keras_export("keras.layers.AlphaDropout") class AlphaDropout(base_layer.BaseRandomLayer): - """Applies Alpha Dropout to the input. - - Alpha Dropout is a `Dropout` that keeps mean and variance of inputs - to their original values, in order to ensure the self-normalizing property - even after this dropout. - Alpha Dropout fits well to Scaled Exponential Linear Units - by randomly setting activations to the negative saturation value. - - Args: - rate: float, drop probability (as with `Dropout`). - The multiplicative noise will have - standard deviation `sqrt(rate / (1 - rate))`. - seed: Integer, optional random seed to enable deterministic behavior. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as input. - """ - - def __init__(self, rate, noise_shape=None, seed=None, **kwargs): - super().__init__(seed=seed, **kwargs) - self.rate = rate - self.noise_shape = noise_shape - self.seed = seed - self.supports_masking = True - - def _get_noise_shape(self, inputs): - return self.noise_shape if self.noise_shape else tf.shape(inputs) - - def call(self, inputs, training=None): - if 0. < self.rate < 1.: - noise_shape = self._get_noise_shape(inputs) - - def dropped_inputs(inputs=inputs, rate=self.rate): # pylint: disable=missing-docstring - alpha = 1.6732632423543772848170429916717 - scale = 1.0507009873554804934193349852946 - alpha_p = -alpha * scale - - kept_idx = tf.greater_equal( - self._random_generator.random_uniform(noise_shape), rate) - kept_idx = tf.cast(kept_idx, inputs.dtype) - - # Get affine transformation params - a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 - b = -a * alpha_p * rate - - # Apply mask - x = inputs * kept_idx + alpha_p * (1 - kept_idx) - - # Do affine transformation - return a * x + b - - return backend.in_train_phase(dropped_inputs, inputs, training=training) - return inputs - - def get_config(self): - config = {'rate': self.rate, 'seed': self.seed} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Applies Alpha Dropout to the input. + + Alpha Dropout is a `Dropout` that keeps mean and variance of inputs + to their original values, in order to ensure the self-normalizing property + even after this dropout. + Alpha Dropout fits well to Scaled Exponential Linear Units + by randomly setting activations to the negative saturation value. + + Args: + rate: float, drop probability (as with `Dropout`). + The multiplicative noise will have + standard deviation `sqrt(rate / (1 - rate))`. + seed: Integer, optional random seed to enable deterministic behavior. + + Call arguments: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as input. + """ + + def __init__(self, rate, noise_shape=None, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + self.rate = rate + self.noise_shape = noise_shape + self.seed = seed + self.supports_masking = True + + def _get_noise_shape(self, inputs): + return self.noise_shape if self.noise_shape else tf.shape(inputs) + + def call(self, inputs, training=None): + if 0.0 < self.rate < 1.0: + noise_shape = self._get_noise_shape(inputs) + + def dropped_inputs(inputs=inputs, rate=self.rate): + alpha = 1.6732632423543772848170429916717 + scale = 1.0507009873554804934193349852946 + alpha_p = -alpha * scale + + kept_idx = tf.greater_equal( + self._random_generator.random_uniform(noise_shape), rate + ) + kept_idx = tf.cast(kept_idx, inputs.dtype) + + # Get affine transformation params + a = ((1 - rate) * (1 + rate * alpha_p**2)) ** -0.5 + b = -a * alpha_p * rate + + # Apply mask + x = inputs * kept_idx + alpha_p * (1 - kept_idx) + + # Do affine transformation + return a * x + b + + return backend.in_train_phase( + dropped_inputs, inputs, training=training + ) + return inputs + + def get_config(self): + config = {"rate": self.rate, "seed": self.seed} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/regularization/alpha_dropout_test.py b/keras/layers/regularization/alpha_dropout_test.py index d7d8c1230062..b466acf4fe86 100644 --- a/keras/layers/regularization/alpha_dropout_test.py +++ b/keras/layers/regularization/alpha_dropout_test.py @@ -14,41 +14,46 @@ # ============================================================================== """Tests for alpha dropout layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class AlphaDropoutTest(test_combinations.TestCase): - - def test_AlphaDropout(self): - test_utils.layer_test( - keras.layers.AlphaDropout, kwargs={'rate': 0.2}, input_shape=(3, 2, 3)) - - def _make_model(self, dtype): - assert dtype in (tf.float32, tf.float64) - model = keras.Sequential() - model.add(keras.layers.Dense(8, input_shape=(32,), dtype=dtype)) - layer = keras.layers.AlphaDropout(0.5, dtype=dtype) - model.add(layer) - return model - - def _train_model(self, dtype): - model = self._make_model(dtype) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((8, 32)), np.zeros((8, 8))) - - def test_alpha_dropout_float32(self): - self._train_model(tf.float32) - - def test_alpha_dropout_float64(self): - self._train_model(tf.float64) - -if __name__ == '__main__': - tf.test.main() + def test_AlphaDropout(self): + test_utils.layer_test( + keras.layers.AlphaDropout, + kwargs={"rate": 0.2}, + input_shape=(3, 2, 3), + ) + + def _make_model(self, dtype): + assert dtype in (tf.float32, tf.float64) + model = keras.Sequential() + model.add(keras.layers.Dense(8, input_shape=(32,), dtype=dtype)) + layer = keras.layers.AlphaDropout(0.5, dtype=dtype) + model.add(layer) + return model + + def _train_model(self, dtype): + model = self._make_model(dtype) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((8, 32)), np.zeros((8, 8))) + + def test_alpha_dropout_float32(self): + self._train_model(tf.float32) + + def test_alpha_dropout_float64(self): + self._train_model(tf.float64) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/regularization/dropout.py b/keras/layers/regularization/dropout.py index dbfa82d6fecd..17374afcdf3b 100644 --- a/keras/layers/regularization/dropout.py +++ b/keras/layers/regularization/dropout.py @@ -13,113 +13,123 @@ # limitations under the License. # ============================================================================== """Contains the Dropout layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + +import numbers + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer from keras.utils import control_flow_util -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Dropout') +@keras_export("keras.layers.Dropout") class Dropout(base_layer.BaseRandomLayer): - """Applies Dropout to the input. - - The Dropout layer randomly sets input units to 0 with a frequency of `rate` - at each step during training time, which helps prevent overfitting. - Inputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over - all inputs is unchanged. - - Note that the Dropout layer only applies when `training` is set to True - such that no values are dropped during inference. When using `model.fit`, - `training` will be appropriately set to True automatically, and in other - contexts, you can set the kwarg explicitly to True when calling the layer. - - (This is in contrast to setting `trainable=False` for a Dropout layer. - `trainable` does not affect the layer's behavior, as Dropout does - not have any variables/weights that can be frozen during training.) - - >>> tf.random.set_seed(0) - >>> layer = tf.keras.layers.Dropout(.2, input_shape=(2,)) - >>> data = np.arange(10).reshape(5, 2).astype(np.float32) - >>> print(data) - [[0. 1.] - [2. 3.] - [4. 5.] - [6. 7.] - [8. 9.]] - >>> outputs = layer(data, training=True) - >>> print(outputs) - tf.Tensor( - [[ 0. 1.25] - [ 2.5 3.75] - [ 5. 6.25] - [ 7.5 8.75] - [10. 0. ]], shape=(5, 2), dtype=float32) - - Args: - rate: Float between 0 and 1. Fraction of the input units to drop. - noise_shape: 1D integer tensor representing the shape of the - binary dropout mask that will be multiplied with the input. - For instance, if your inputs have shape - `(batch_size, timesteps, features)` and - you want the dropout mask to be the same for all timesteps, - you can use `noise_shape=(batch_size, 1, features)`. - seed: A Python integer to use as random seed. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - """ - - def __init__(self, rate, noise_shape=None, seed=None, **kwargs): - super().__init__(seed=seed, **kwargs) - if isinstance(rate, (int, float)) and not 0 <= rate <= 1: - raise ValueError(f'Invalid value {rate} received for ' - f'`rate`, expected a value between 0 and 1.') - self.rate = rate - self.noise_shape = noise_shape - self.seed = seed - self.supports_masking = True - - def build(self, input_shape): - self._random_generator._maybe_init() # pylint: disable=protected-access - - def _get_noise_shape(self, inputs): - # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`, - # which will override `self.noise_shape`, and allows for custom noise - # shapes with dynamically sized inputs. - if self.noise_shape is None: - return None - - concrete_inputs_shape = tf.shape(inputs) - noise_shape = [] - for i, value in enumerate(self.noise_shape): - noise_shape.append(concrete_inputs_shape[i] if value is None else value) - return tf.convert_to_tensor(noise_shape) - - def call(self, inputs, training=None): - if training is None: - training = backend.learning_phase() - - def dropped_inputs(): - return self._random_generator.dropout( - inputs, self.rate, noise_shape=self._get_noise_shape(inputs)) - - output = control_flow_util.smart_cond(training, dropped_inputs, - lambda: tf.identity(inputs)) - return output - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'rate': self.rate, - 'noise_shape': self.noise_shape, - 'seed': self.seed - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Applies Dropout to the input. + + The Dropout layer randomly sets input units to 0 with a frequency of `rate` + at each step during training time, which helps prevent overfitting. + Inputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over + all inputs is unchanged. + + Note that the Dropout layer only applies when `training` is set to True + such that no values are dropped during inference. When using `model.fit`, + `training` will be appropriately set to True automatically, and in other + contexts, you can set the kwarg explicitly to True when calling the layer. + + (This is in contrast to setting `trainable=False` for a Dropout layer. + `trainable` does not affect the layer's behavior, as Dropout does + not have any variables/weights that can be frozen during training.) + + >>> tf.random.set_seed(0) + >>> layer = tf.keras.layers.Dropout(.2, input_shape=(2,)) + >>> data = np.arange(10).reshape(5, 2).astype(np.float32) + >>> print(data) + [[0. 1.] + [2. 3.] + [4. 5.] + [6. 7.] + [8. 9.]] + >>> outputs = layer(data, training=True) + >>> print(outputs) + tf.Tensor( + [[ 0. 1.25] + [ 2.5 3.75] + [ 5. 6.25] + [ 7.5 8.75] + [10. 0. ]], shape=(5, 2), dtype=float32) + + Args: + rate: Float between 0 and 1. Fraction of the input units to drop. + noise_shape: 1D integer tensor representing the shape of the + binary dropout mask that will be multiplied with the input. + For instance, if your inputs have shape + `(batch_size, timesteps, features)` and + you want the dropout mask to be the same for all timesteps, + you can use `noise_shape=(batch_size, 1, features)`. + seed: A Python integer to use as random seed. + + Call arguments: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + """ + + def __init__(self, rate, noise_shape=None, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + if isinstance(rate, (int, float)) and not 0 <= rate <= 1: + raise ValueError( + f"Invalid value {rate} received for " + "`rate`, expected a value between 0 and 1." + ) + self.rate = rate + self.noise_shape = noise_shape + self.seed = seed + self.supports_masking = True + + def _get_noise_shape(self, inputs): + # Subclasses of `Dropout` may implement `_get_noise_shape(self, + # inputs)`, which will override `self.noise_shape`, and allows for + # custom noise shapes with dynamically sized inputs. + if self.noise_shape is None: + return None + + concrete_inputs_shape = tf.shape(inputs) + noise_shape = [] + for i, value in enumerate(self.noise_shape): + noise_shape.append( + concrete_inputs_shape[i] if value is None else value + ) + return tf.convert_to_tensor(noise_shape) + + def call(self, inputs, training=None): + if isinstance(self.rate, numbers.Real) and self.rate == 0: + return tf.identity(inputs) + + if training is None: + training = backend.learning_phase() + + def dropped_inputs(): + return self._random_generator.dropout( + inputs, self.rate, noise_shape=self._get_noise_shape(inputs) + ) + + output = control_flow_util.smart_cond( + training, dropped_inputs, lambda: tf.identity(inputs) + ) + return output + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + "rate": self.rate, + "noise_shape": self.noise_shape, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/regularization/dropout_test.py b/keras/layers/regularization/dropout_test.py index 19fdb1e50ab4..2239338b8af4 100644 --- a/keras/layers/regularization/dropout_test.py +++ b/keras/layers/regularization/dropout_test.py @@ -16,78 +16,148 @@ import os +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class DropoutTest(test_combinations.TestCase): + def test_dropout(self): + test_utils.layer_test( + keras.layers.Dropout, kwargs={"rate": 0.5}, input_shape=(3, 2) + ) + + test_utils.layer_test( + keras.layers.Dropout, + kwargs={"rate": 0.5, "noise_shape": [3, 1]}, + input_shape=(3, 2), + ) + + def test_dropout_supports_masking(self): + dropout = keras.layers.Dropout(0.5) + self.assertEqual(True, dropout.supports_masking) + + def test_dropout_partial_noise_shape(self): + inputs = keras.Input(shape=(5, 10)) + layer = keras.layers.Dropout(0.5, noise_shape=(None, 1, None)) + outputs = layer(inputs) + model = keras.Model(inputs, outputs) + out = model(np.ones((20, 5, 10)), training=True) + out_np = keras.backend.get_value(out) + # Test that dropout mask is shared across second dim. + self.assertAllClose(out_np[:, 0, :], out_np[:, 1, :]) + + @test_utils.run_v2_only + def test_dropout_with_zero_rate(self): + inputs = np.ones((20, 5, 10)) + dropout = keras.layers.Dropout(0.0, force_generator=True) + dropout.build((20, 5, 10)) + # Make sure we don't use the RNG when the dropout rate is 0 + # (for performance). + rng_state_var = tf.constant( + dropout._random_generator._generator._state_var + ) + output = dropout(inputs, training=True) + self.assertAllClose(inputs, output) + self.assertAllClose( + rng_state_var, dropout._random_generator._generator._state_var + ) + + def test_dropout_with_saving(self): + inputs = keras.Input(shape=(5, 10)) + layer = keras.layers.Dropout(0.5, force_generator=True) + outputs = layer(inputs) + model = keras.Model(inputs, outputs) + train = model(np.ones((20, 5, 10)), training=True) + predict = model(np.ones((20, 5, 10))) + # Make sure the weights from tf.random.Generator is not present in the + # model which will cause weight loading issue for existing application + # models if it contains dropout layer. + self.assertEmpty(layer.get_weights()) + self.assertEmpty(model.get_weights()) + + # Make sure the layer does dropout value when training + self.assertNotAllClose(train, predict) + + with self.subTest("savedmodel"): + model.save( + os.path.join(self.get_temp_dir(), "savedmodel"), + save_format="tf", + ) + loaded_model = keras.models.load_model( + os.path.join(self.get_temp_dir(), "savedmodel") + ) + predict2 = loaded_model(np.ones((20, 5, 10))) + + self.assertAllClose(predict, predict2) + # Make sure the model dropout different value after loading + train2 = loaded_model(np.ones((20, 5, 10)), training=True) + self.assertNotAllClose(train, train2) + self.assertIsNotNone(loaded_model.layers[1]._random_generator) + + with self.subTest("keras_v3"): + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "TF2 must be enabled to use the new `.keras` saving." + ) + model.save( + os.path.join(self.get_temp_dir(), "model.keras"), + save_format="keras_v3", + ) + loaded_model = keras.models.load_model( + os.path.join(self.get_temp_dir(), "model.keras") + ) + predict2 = loaded_model(np.ones((20, 5, 10))) + + self.assertAllClose(predict, predict2) + # Make sure the model dropout different value after loading + train2 = loaded_model(np.ones((20, 5, 10)), training=True) + self.assertNotAllClose(train, train2) + self.assertIsNotNone(loaded_model.layers[1]._random_generator) + + with self.subTest("checkpoint"): + # Also make sure the checkpoint doesn't contain any variable from + # the dropout layer, to keep the backward compatibility. + checkpoint = tf.train.Checkpoint(model) + save_path = checkpoint.save( + os.path.join(self.get_temp_dir(), "checkpoint") + ) + checkpoint_var_names = [ + name_value_tuple[0] + for name_value_tuple in tf.train.list_variables(save_path) + ] + for name in checkpoint_var_names: + self.assertNotIn("dropout", name) + + # Make sure the checkpoint can be loaded + clone_model = keras.models.clone_model(model) + checkpoint = tf.train.Checkpoint(clone_model) + status = checkpoint.restore( + os.path.join(self.get_temp_dir(), "checkpoint-1") + ) + self.assertTrue(status.assert_consumed()) + self.assertTrue(status.assert_existing_objects_matched()) + # Make sure the output is differnt from the original model, since + # the StateVar is not preserved. + train3 = clone_model(np.ones((20, 5, 10)), training=True) + self.assertNotAllClose(train3, train2) + + @test_utils.run_v2_only + def test_state_variable_name(self): + inputs = keras.Input(shape=(5, 10)) + layer = keras.layers.Dropout( + 0.5, force_generator=True, name="dropout_layer" + ) + layer(inputs) + self.assertEqual( + layer._random_generator._generator._state_var.name, + "dropout_layer/StateVar:0", + ) + - def test_dropout(self): - test_utils.layer_test( - keras.layers.Dropout, kwargs={'rate': 0.5}, input_shape=(3, 2)) - - test_utils.layer_test( - keras.layers.Dropout, - kwargs={ - 'rate': 0.5, - 'noise_shape': [3, 1] - }, - input_shape=(3, 2)) - - def test_dropout_supports_masking(self): - dropout = keras.layers.Dropout(0.5) - self.assertEqual(True, dropout.supports_masking) - - def test_dropout_partial_noise_shape(self): - inputs = keras.Input(shape=(5, 10)) - layer = keras.layers.Dropout(0.5, noise_shape=(None, 1, None)) - outputs = layer(inputs) - model = keras.Model(inputs, outputs) - out = model(np.ones((20, 5, 10)), training=True) - out_np = keras.backend.get_value(out) - # Test that dropout mask is shared across second dim. - self.assertAllClose(out_np[:, 0, :], out_np[:, 1, :]) - - def test_dropout_with_savemodel(self): - inputs = keras.Input(shape=(5, 10)) - layer = keras.layers.Dropout(0.5, force_generator=True) - outputs = layer(inputs) - model = keras.Model(inputs, outputs) - train = model(np.ones((20, 5, 10)), training=True) - predict = model(np.ones((20, 5, 10))) - # Make sure the weights from tf.random.Generator is not present in the model - # which will cause weight loading issue for existing application models if - # it contains dropout layer. - self.assertEmpty(layer.get_weights()) - self.assertEmpty(model.get_weights()) - - # Make sure the layer does dropout value when training - self.assertNotAllClose(train, predict) - - model.save(os.path.join(self.get_temp_dir(), 'savedmodel'), - save_format='tf') - loaded_model = keras.models.load_model( - os.path.join(self.get_temp_dir(), 'savedmodel')) - predict2 = loaded_model(np.ones((20, 5, 10))) - - self.assertAllClose(predict, predict2) - # Make sure the model dropout different value after loading - train2 = loaded_model(np.ones((20, 5, 10)), training=True) - self.assertNotAllClose(train, train2) - self.assertIsNotNone(loaded_model.layers[1]._random_generator) - - # Also make sure the checkpoint doesn't contain any variable from the - # dropout layer, to keep the backward compatibility. - checkpoint = tf.train.Checkpoint(model) - save_path = checkpoint.save(os.path.join(self.get_temp_dir(), 'checkpoint')) - checkpoint_var_names = [name_value_tuple[0] for name_value_tuple in - tf.train.list_variables(save_path)] - for name in checkpoint_var_names: - self.assertNotIn('dropout', name) - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/regularization/gaussian_dropout.py b/keras/layers/regularization/gaussian_dropout.py index 1ff92e8923c0..9e9d442bbe87 100644 --- a/keras/layers/regularization/gaussian_dropout.py +++ b/keras/layers/regularization/gaussian_dropout.py @@ -13,69 +13,71 @@ # limitations under the License. # ============================================================================== """Contains the GaussianDropout layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras import backend -from keras.engine import base_layer -from keras.utils import tf_utils import numpy as np import tensorflow.compat.v2 as tf +from keras import backend +from keras.engine import base_layer +from keras.utils import tf_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GaussianDropout') +@keras_export("keras.layers.GaussianDropout") class GaussianDropout(base_layer.BaseRandomLayer): - """Apply multiplicative 1-centered Gaussian noise. - - As it is a regularization layer, it is only active at training time. - - Args: - rate: Float, drop probability (as with `Dropout`). - The multiplicative noise will have - standard deviation `sqrt(rate / (1 - rate))`. - seed: Integer, optional random seed to enable deterministic behavior. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as input. - """ - - def __init__(self, rate, seed=None, **kwargs): - super().__init__(seed=seed, **kwargs) - self.supports_masking = True - self.rate = rate - self.seed = seed - - def call(self, inputs, training=None): - if 0 < self.rate < 1: - - def noised(): - stddev = np.sqrt(self.rate / (1.0 - self.rate)) - return inputs * self._random_generator.random_normal( - shape=tf.shape(inputs), - mean=1.0, - stddev=stddev, - dtype=inputs.dtype) - - return backend.in_train_phase(noised, inputs, training=training) - return inputs - - def get_config(self): - config = {'rate': self.rate, 'seed': self.seed} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Apply multiplicative 1-centered Gaussian noise. + + As it is a regularization layer, it is only active at training time. + + Args: + rate: Float, drop probability (as with `Dropout`). + The multiplicative noise will have + standard deviation `sqrt(rate / (1 - rate))`. + seed: Integer, optional random seed to enable deterministic behavior. + + Call arguments: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as input. + """ + + def __init__(self, rate, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + self.supports_masking = True + self.rate = rate + self.seed = seed + + def call(self, inputs, training=None): + if 0 < self.rate < 1: + + def noised(): + stddev = np.sqrt(self.rate / (1.0 - self.rate)) + return inputs * self._random_generator.random_normal( + shape=tf.shape(inputs), + mean=1.0, + stddev=stddev, + dtype=inputs.dtype, + ) + + return backend.in_train_phase(noised, inputs, training=training) + return inputs + + def get_config(self): + config = {"rate": self.rate, "seed": self.seed} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/regularization/gaussian_dropout_test.py b/keras/layers/regularization/gaussian_dropout_test.py index a961c838926a..b50d348e2548 100644 --- a/keras/layers/regularization/gaussian_dropout_test.py +++ b/keras/layers/regularization/gaussian_dropout_test.py @@ -14,43 +14,46 @@ # ============================================================================== """Tests for gaussian dropout layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class NoiseLayersTest(test_combinations.TestCase): - - def test_GaussianDropout(self): - test_utils.layer_test( - keras.layers.GaussianDropout, - kwargs={'rate': 0.5}, - input_shape=(3, 2, 3)) - - def _make_model(self, dtype): - assert dtype in (tf.float32, tf.float64) - model = keras.Sequential() - model.add(keras.layers.Dense(8, input_shape=(32,), dtype=dtype)) - layer = keras.layers.GaussianDropout(0.1, dtype=dtype) - model.add(layer) - return model - - def _train_model(self, dtype): - model = self._make_model(dtype) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((8, 32)), np.zeros((8, 8))) - - def test_gaussian_dropout_float32(self): - self._train_model(tf.float32) - - def test_gaussian_dropout_float64(self): - self._train_model(tf.float64) - -if __name__ == '__main__': - tf.test.main() + def test_GaussianDropout(self): + test_utils.layer_test( + keras.layers.GaussianDropout, + kwargs={"rate": 0.5}, + input_shape=(3, 2, 3), + ) + + def _make_model(self, dtype): + assert dtype in (tf.float32, tf.float64) + model = keras.Sequential() + model.add(keras.layers.Dense(8, input_shape=(32,), dtype=dtype)) + layer = keras.layers.GaussianDropout(0.1, dtype=dtype) + model.add(layer) + return model + + def _train_model(self, dtype): + model = self._make_model(dtype) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((8, 32)), np.zeros((8, 8))) + + def test_gaussian_dropout_float32(self): + self._train_model(tf.float32) + + def test_gaussian_dropout_float64(self): + self._train_model(tf.float64) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/regularization/gaussian_noise.py b/keras/layers/regularization/gaussian_noise.py index 32386ac09e21..f88e3a3c4a2d 100644 --- a/keras/layers/regularization/gaussian_noise.py +++ b/keras/layers/regularization/gaussian_noise.py @@ -13,68 +13,69 @@ # limitations under the License. # ============================================================================== """Contains the GaussianNoise layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf - +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.GaussianNoise') +@keras_export("keras.layers.GaussianNoise") class GaussianNoise(base_layer.BaseRandomLayer): - """Apply additive zero-centered Gaussian noise. - - This is useful to mitigate overfitting - (you could see it as a form of random data augmentation). - Gaussian Noise (GS) is a natural choice as corruption process - for real valued inputs. - - As it is a regularization layer, it is only active at training time. - - Args: - stddev: Float, standard deviation of the noise distribution. - seed: Integer, optional random seed to enable deterministic behavior. - - Call arguments: - inputs: Input tensor (of any rank). - training: Python boolean indicating whether the layer should behave in - training mode (adding noise) or in inference mode (doing nothing). - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same shape as input. - """ - - def __init__(self, stddev, seed=None, **kwargs): - super().__init__(seed=seed, **kwargs) - self.supports_masking = True - self.stddev = stddev - self.seed = seed - - def call(self, inputs, training=None): - - def noised(): - return inputs + self._random_generator.random_normal( - shape=tf.shape(inputs), - mean=0., - stddev=self.stddev, - dtype=inputs.dtype) - - return backend.in_train_phase(noised, inputs, training=training) - - def get_config(self): - config = {'stddev': self.stddev, 'seed': self.seed} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - return input_shape + """Apply additive zero-centered Gaussian noise. + + This is useful to mitigate overfitting + (you could see it as a form of random data augmentation). + Gaussian Noise (GS) is a natural choice as corruption process + for real valued inputs. + + As it is a regularization layer, it is only active at training time. + + Args: + stddev: Float, standard deviation of the noise distribution. + seed: Integer, optional random seed to enable deterministic behavior. + + Call arguments: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode (adding noise) or in inference mode (doing nothing). + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as input. + """ + + def __init__(self, stddev, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + self.supports_masking = True + self.stddev = stddev + self.seed = seed + + def call(self, inputs, training=None): + def noised(): + return inputs + self._random_generator.random_normal( + shape=tf.shape(inputs), + mean=0.0, + stddev=self.stddev, + dtype=inputs.dtype, + ) + + return backend.in_train_phase(noised, inputs, training=training) + + def get_config(self): + config = {"stddev": self.stddev, "seed": self.seed} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras/layers/regularization/gaussian_noise_test.py b/keras/layers/regularization/gaussian_noise_test.py index 3ac051240cf4..b67084e053f2 100644 --- a/keras/layers/regularization/gaussian_noise_test.py +++ b/keras/layers/regularization/gaussian_noise_test.py @@ -14,43 +14,46 @@ # ============================================================================== """Tests for gaussian noise layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class NoiseLayersTest(test_combinations.TestCase): - - def test_GaussianNoise(self): - test_utils.layer_test( - keras.layers.GaussianNoise, - kwargs={'stddev': 1.}, - input_shape=(3, 2, 3)) - - def _make_model(self, dtype): - assert dtype in (tf.float32, tf.float64) - model = keras.Sequential() - model.add(keras.layers.Dense(8, input_shape=(32,), dtype=dtype)) - layer = keras.layers.GaussianNoise(0.0003, dtype=dtype) - model.add(layer) - return model - - def _train_model(self, dtype): - model = self._make_model(dtype) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((8, 32)), np.zeros((8, 8))) - - def test_gaussian_noise_float32(self): - self._train_model(tf.float32) - - def test_gaussian_noise_float64(self): - self._train_model(tf.float64) - -if __name__ == '__main__': - tf.test.main() + def test_GaussianNoise(self): + test_utils.layer_test( + keras.layers.GaussianNoise, + kwargs={"stddev": 1.0}, + input_shape=(3, 2, 3), + ) + + def _make_model(self, dtype): + assert dtype in (tf.float32, tf.float64) + model = keras.Sequential() + model.add(keras.layers.Dense(8, input_shape=(32,), dtype=dtype)) + layer = keras.layers.GaussianNoise(0.0003, dtype=dtype) + model.add(layer) + return model + + def _train_model(self, dtype): + model = self._make_model(dtype) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((8, 32)), np.zeros((8, 8))) + + def test_gaussian_noise_float32(self): + self._train_model(tf.float32) + + def test_gaussian_noise_float64(self): + self._train_model(tf.float64) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/regularization/spatial_dropout1d.py b/keras/layers/regularization/spatial_dropout1d.py index 29dabc95ac72..7a3672c9d295 100644 --- a/keras/layers/regularization/spatial_dropout1d.py +++ b/keras/layers/regularization/spatial_dropout1d.py @@ -13,45 +13,47 @@ # limitations under the License. # ============================================================================== """Contains the SpatialDropout1D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras.engine.input_spec import InputSpec from keras.layers.regularization.dropout import Dropout -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.SpatialDropout1D') +@keras_export("keras.layers.SpatialDropout1D") class SpatialDropout1D(Dropout): - """Spatial 1D version of Dropout. - - This version performs the same function as Dropout, however, it drops - entire 1D feature maps instead of individual elements. If adjacent frames - within feature maps are strongly correlated (as is normally the case in - early convolution layers) then regular dropout will not regularize the - activations and will otherwise just result in an effective learning rate - decrease. In this case, SpatialDropout1D will help promote independence - between feature maps and should be used instead. - - Args: - rate: Float between 0 and 1. Fraction of the input units to drop. - Call arguments: - inputs: A 3D tensor. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - Input shape: - 3D tensor with shape: `(samples, timesteps, channels)` - Output shape: Same as input. - References: - [Efficient Object Localization Using Convolutional - Networks](https://arxiv.org/abs/1411.4280) - """ - - def __init__(self, rate, **kwargs): - super().__init__(rate, **kwargs) - self.input_spec = InputSpec(ndim=3) - - def _get_noise_shape(self, inputs): - input_shape = tf.shape(inputs) - noise_shape = (input_shape[0], 1, input_shape[2]) - return noise_shape + """Spatial 1D version of Dropout. + + This version performs the same function as Dropout, however, it drops + entire 1D feature maps instead of individual elements. If adjacent frames + within feature maps are strongly correlated (as is normally the case in + early convolution layers) then regular dropout will not regularize the + activations and will otherwise just result in an effective learning rate + decrease. In this case, SpatialDropout1D will help promote independence + between feature maps and should be used instead. + + Args: + rate: Float between 0 and 1. Fraction of the input units to drop. + Call arguments: + inputs: A 3D tensor. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + Input shape: + 3D tensor with shape: `(samples, timesteps, channels)` + Output shape: Same as input. + References: - [Efficient Object Localization Using Convolutional + Networks](https://arxiv.org/abs/1411.4280) + """ + + def __init__(self, rate, **kwargs): + super().__init__(rate, **kwargs) + self.input_spec = InputSpec(ndim=3) + + def _get_noise_shape(self, inputs): + input_shape = tf.shape(inputs) + noise_shape = (input_shape[0], 1, input_shape[2]) + return noise_shape diff --git a/keras/layers/regularization/spatial_dropout2d.py b/keras/layers/regularization/spatial_dropout2d.py index ec6b84806033..e913c132c682 100644 --- a/keras/layers/regularization/spatial_dropout2d.py +++ b/keras/layers/regularization/spatial_dropout2d.py @@ -13,63 +13,67 @@ # limitations under the License. # ============================================================================== """Contains the SpatialDropout2D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.input_spec import InputSpec from keras.layers.regularization.dropout import Dropout -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.SpatialDropout2D') +@keras_export("keras.layers.SpatialDropout2D") class SpatialDropout2D(Dropout): - """Spatial 2D version of Dropout. + """Spatial 2D version of Dropout. - This version performs the same function as Dropout, however, it drops - entire 2D feature maps instead of individual elements. If adjacent pixels - within feature maps are strongly correlated (as is normally the case in - early convolution layers) then regular dropout will not regularize the - activations and will otherwise just result in an effective learning rate - decrease. In this case, SpatialDropout2D will help promote independence - between feature maps and should be used instead. + This version performs the same function as Dropout, however, it drops + entire 2D feature maps instead of individual elements. If adjacent pixels + within feature maps are strongly correlated (as is normally the case in + early convolution layers) then regular dropout will not regularize the + activations and will otherwise just result in an effective learning rate + decrease. In this case, SpatialDropout2D will help promote independence + between feature maps and should be used instead. - Args: - rate: Float between 0 and 1. Fraction of the input units to drop. - data_format: 'channels_first' or 'channels_last'. In 'channels_first' mode, - the channels dimension (the depth) is at index 1, in 'channels_last' mode - is it at index 3. It defaults to the `image_data_format` value found in - your Keras config file at `~/.keras/keras.json`. If you never set it, then - it will be "channels_last". - Call arguments: - inputs: A 4D tensor. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - Input shape: - 4D tensor with shape: `(samples, channels, rows, cols)` if - data_format='channels_first' - or 4D tensor with shape: `(samples, rows, cols, channels)` if - data_format='channels_last'. - Output shape: Same as input. - References: - [Efficient Object Localization Using Convolutional - Networks](https://arxiv.org/abs/1411.4280) - """ + Args: + rate: Float between 0 and 1. Fraction of the input units to drop. + data_format: 'channels_first' or 'channels_last'. In 'channels_first' + mode, the channels dimension (the depth) is at index 1, in + 'channels_last' mode is it at index 3. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + Call arguments: + inputs: A 4D tensor. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + Input shape: + 4D tensor with shape: `(samples, channels, rows, cols)` if + data_format='channels_first' + or 4D tensor with shape: `(samples, rows, cols, channels)` if + data_format='channels_last'. + Output shape: Same as input. + References: - [Efficient Object Localization Using Convolutional + Networks](https://arxiv.org/abs/1411.4280) + """ - def __init__(self, rate, data_format=None, **kwargs): - super().__init__(rate, **kwargs) - if data_format is None: - data_format = backend.image_data_format() - if data_format not in {'channels_last', 'channels_first'}: - raise ValueError( - f'`data_format` must be "channels_last" or "channels_first". ' - f'Received: data_format={data_format}.') - self.data_format = data_format - self.input_spec = InputSpec(ndim=4) + def __init__(self, rate, data_format=None, **kwargs): + super().__init__(rate, **kwargs) + if data_format is None: + data_format = backend.image_data_format() + if data_format not in {"channels_last", "channels_first"}: + raise ValueError( + '`data_format` must be "channels_last" or "channels_first". ' + f"Received: data_format={data_format}." + ) + self.data_format = data_format + self.input_spec = InputSpec(ndim=4) - def _get_noise_shape(self, inputs): - input_shape = tf.shape(inputs) - if self.data_format == 'channels_first': - return (input_shape[0], input_shape[1], 1, 1) - elif self.data_format == 'channels_last': - return (input_shape[0], 1, 1, input_shape[3]) + def _get_noise_shape(self, inputs): + input_shape = tf.shape(inputs) + if self.data_format == "channels_first": + return (input_shape[0], input_shape[1], 1, 1) + elif self.data_format == "channels_last": + return (input_shape[0], 1, 1, input_shape[3]) diff --git a/keras/layers/regularization/spatial_dropout3d.py b/keras/layers/regularization/spatial_dropout3d.py index 792a2c5b703b..d7dff8724e0b 100644 --- a/keras/layers/regularization/spatial_dropout3d.py +++ b/keras/layers/regularization/spatial_dropout3d.py @@ -13,63 +13,67 @@ # limitations under the License. # ============================================================================== """Contains the SpatialDropout3D layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.input_spec import InputSpec from keras.layers.regularization.dropout import Dropout -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.SpatialDropout3D') +@keras_export("keras.layers.SpatialDropout3D") class SpatialDropout3D(Dropout): - """Spatial 3D version of Dropout. + """Spatial 3D version of Dropout. - This version performs the same function as Dropout, however, it drops - entire 3D feature maps instead of individual elements. If adjacent voxels - within feature maps are strongly correlated (as is normally the case in - early convolution layers) then regular dropout will not regularize the - activations and will otherwise just result in an effective learning rate - decrease. In this case, SpatialDropout3D will help promote independence - between feature maps and should be used instead. + This version performs the same function as Dropout, however, it drops + entire 3D feature maps instead of individual elements. If adjacent voxels + within feature maps are strongly correlated (as is normally the case in + early convolution layers) then regular dropout will not regularize the + activations and will otherwise just result in an effective learning rate + decrease. In this case, SpatialDropout3D will help promote independence + between feature maps and should be used instead. - Args: - rate: Float between 0 and 1. Fraction of the input units to drop. - data_format: 'channels_first' or 'channels_last'. In 'channels_first' mode, - the channels dimension (the depth) is at index 1, in 'channels_last' mode - is it at index 4. It defaults to the `image_data_format` value found in - your Keras config file at `~/.keras/keras.json`. If you never set it, then - it will be "channels_last". - Call arguments: - inputs: A 5D tensor. - training: Python boolean indicating whether the layer should behave in - training mode (adding dropout) or in inference mode (doing nothing). - Input shape: - 5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if - data_format='channels_first' - or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if - data_format='channels_last'. - Output shape: Same as input. - References: - [Efficient Object Localization Using Convolutional - Networks](https://arxiv.org/abs/1411.4280) - """ + Args: + rate: Float between 0 and 1. Fraction of the input units to drop. + data_format: 'channels_first' or 'channels_last'. In 'channels_first' + mode, the channels dimension (the depth) is at index 1, in + 'channels_last' mode is it at index 4. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + Call arguments: + inputs: A 5D tensor. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + Input shape: + 5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if + data_format='channels_first' + or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if + data_format='channels_last'. + Output shape: Same as input. + References: - [Efficient Object Localization Using Convolutional + Networks](https://arxiv.org/abs/1411.4280) + """ - def __init__(self, rate, data_format=None, **kwargs): - super().__init__(rate, **kwargs) - if data_format is None: - data_format = backend.image_data_format() - if data_format not in {'channels_last', 'channels_first'}: - raise ValueError( - f'`data_format` must be "channels_last" or "channels_first". ' - f'Received: data_format={data_format}.') - self.data_format = data_format - self.input_spec = InputSpec(ndim=5) + def __init__(self, rate, data_format=None, **kwargs): + super().__init__(rate, **kwargs) + if data_format is None: + data_format = backend.image_data_format() + if data_format not in {"channels_last", "channels_first"}: + raise ValueError( + '`data_format` must be "channels_last" or "channels_first". ' + f"Received: data_format={data_format}." + ) + self.data_format = data_format + self.input_spec = InputSpec(ndim=5) - def _get_noise_shape(self, inputs): - input_shape = tf.shape(inputs) - if self.data_format == 'channels_first': - return (input_shape[0], input_shape[1], 1, 1, 1) - elif self.data_format == 'channels_last': - return (input_shape[0], 1, 1, 1, input_shape[4]) + def _get_noise_shape(self, inputs): + input_shape = tf.shape(inputs) + if self.data_format == "channels_first": + return (input_shape[0], input_shape[1], 1, 1, 1) + elif self.data_format == "channels_last": + return (input_shape[0], 1, 1, 1, input_shape[4]) diff --git a/keras/layers/regularization/spatial_dropout_test.py b/keras/layers/regularization/spatial_dropout_test.py index 1b4ec6f12c98..66ac40ec242d 100644 --- a/keras/layers/regularization/spatial_dropout_test.py +++ b/keras/layers/regularization/spatial_dropout_test.py @@ -14,48 +14,48 @@ # ============================================================================== """Tests for spatial dropout layers.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class SpacialDropoutTest(test_combinations.TestCase): - - def test_spatial_dropout_1d(self): - test_utils.layer_test( - keras.layers.SpatialDropout1D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4)) - - def test_spatial_dropout_2d(self): - test_utils.layer_test( - keras.layers.SpatialDropout2D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4, 5)) - - test_utils.layer_test( - keras.layers.SpatialDropout2D, - kwargs={ - 'rate': 0.5, - 'data_format': 'channels_first' - }, - input_shape=(2, 3, 4, 5)) - - def test_spatial_dropout_3d(self): - test_utils.layer_test( - keras.layers.SpatialDropout3D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4, 4, 5)) - - test_utils.layer_test( - keras.layers.SpatialDropout3D, - kwargs={ - 'rate': 0.5, - 'data_format': 'channels_first' - }, - input_shape=(2, 3, 4, 4, 5)) - -if __name__ == '__main__': - tf.test.main() + def test_spatial_dropout_1d(self): + test_utils.layer_test( + keras.layers.SpatialDropout1D, + kwargs={"rate": 0.5}, + input_shape=(2, 3, 4), + ) + + def test_spatial_dropout_2d(self): + test_utils.layer_test( + keras.layers.SpatialDropout2D, + kwargs={"rate": 0.5}, + input_shape=(2, 3, 4, 5), + ) + + test_utils.layer_test( + keras.layers.SpatialDropout2D, + kwargs={"rate": 0.5, "data_format": "channels_first"}, + input_shape=(2, 3, 4, 5), + ) + + def test_spatial_dropout_3d(self): + test_utils.layer_test( + keras.layers.SpatialDropout3D, + kwargs={"rate": 0.5}, + input_shape=(2, 3, 4, 4, 5), + ) + + test_utils.layer_test( + keras.layers.SpatialDropout3D, + kwargs={"rate": 0.5, "data_format": "channels_first"}, + input_shape=(2, 3, 4, 4, 5), + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/BUILD b/keras/layers/reshaping/BUILD index 0fd9bdb8d927..2f7e2a73d8e6 100644 --- a/keras/layers/reshaping/BUILD +++ b/keras/layers/reshaping/BUILD @@ -1,18 +1,20 @@ # Description: # Contains the Keras reshaping layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:__subpackages__", "//third_party/tensorflow/python/distribute:__pkg__", "//third_party/tensorflow/python/feature_column:__pkg__", "//third_party/tensorflow/python/keras:__subpackages__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", "//third_party/tensorflow/tools/pip_package:__pkg__", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", ], diff --git a/keras/layers/reshaping/cropping1d.py b/keras/layers/reshaping/cropping1d.py index 5c4068b892c7..2eb632e38d0a 100644 --- a/keras/layers/reshaping/cropping1d.py +++ b/keras/layers/reshaping/cropping1d.py @@ -13,77 +13,85 @@ # limitations under the License. # ============================================================================== """Keras cropping layer for 1D input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Cropping1D') +@keras_export("keras.layers.Cropping1D") class Cropping1D(Layer): - """Cropping layer for 1D input (e.g. temporal sequence). - - It crops along the time dimension (axis 1). - - Examples: - - >>> input_shape = (2, 3, 2) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> print(x) - [[[ 0 1] - [ 2 3] - [ 4 5]] - [[ 6 7] - [ 8 9] - [10 11]]] - >>> y = tf.keras.layers.Cropping1D(cropping=1)(x) - >>> print(y) - tf.Tensor( - [[[2 3]] - [[8 9]]], shape=(2, 1, 2), dtype=int64) - - Args: - cropping: Int or tuple of int (length 2) - How many units should be trimmed off at the beginning and end of - the cropping dimension (axis 1). - If a single int is provided, the same value will be used for both. - - Input shape: - 3D tensor with shape `(batch_size, axis_to_crop, features)` - - Output shape: - 3D tensor with shape `(batch_size, cropped_axis, features)` - """ - - def __init__(self, cropping=(1, 1), **kwargs): - super().__init__(**kwargs) - self.cropping = conv_utils.normalize_tuple( - cropping, 2, 'cropping', allow_zero=True) - self.input_spec = InputSpec(ndim=3) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if input_shape[1] is not None: - length = input_shape[1] - self.cropping[0] - self.cropping[1] - else: - length = None - return tf.TensorShape([input_shape[0], length, input_shape[2]]) - - def call(self, inputs): - if inputs.shape[1] is not None and sum(self.cropping) >= inputs.shape[1]: - raise ValueError('cropping parameter of Cropping layer must be ' - 'greater than the input shape. Received: inputs.shape=' - f'{inputs.shape}, and cropping={self.cropping}') - if self.cropping[1] == 0: - return inputs[:, self.cropping[0]:, :] - else: - return inputs[:, self.cropping[0]:-self.cropping[1], :] - - def get_config(self): - config = {'cropping': self.cropping} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Cropping layer for 1D input (e.g. temporal sequence). + + It crops along the time dimension (axis 1). + + Examples: + + >>> input_shape = (2, 3, 2) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> print(x) + [[[ 0 1] + [ 2 3] + [ 4 5]] + [[ 6 7] + [ 8 9] + [10 11]]] + >>> y = tf.keras.layers.Cropping1D(cropping=1)(x) + >>> print(y) + tf.Tensor( + [[[2 3]] + [[8 9]]], shape=(2, 1, 2), dtype=int64) + + Args: + cropping: Int or tuple of int (length 2) + How many units should be trimmed off at the beginning and end of + the cropping dimension (axis 1). + If a single int is provided, the same value will be used for both. + + Input shape: + 3D tensor with shape `(batch_size, axis_to_crop, features)` + + Output shape: + 3D tensor with shape `(batch_size, cropped_axis, features)` + """ + + def __init__(self, cropping=(1, 1), **kwargs): + super().__init__(**kwargs) + self.cropping = conv_utils.normalize_tuple( + cropping, 2, "cropping", allow_zero=True + ) + self.input_spec = InputSpec(ndim=3) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if input_shape[1] is not None: + length = input_shape[1] - self.cropping[0] - self.cropping[1] + else: + length = None + return tf.TensorShape([input_shape[0], length, input_shape[2]]) + + def call(self, inputs): + if ( + inputs.shape[1] is not None + and sum(self.cropping) >= inputs.shape[1] + ): + raise ValueError( + "cropping parameter of Cropping layer must be " + "greater than the input shape. Received: inputs.shape=" + f"{inputs.shape}, and cropping={self.cropping}" + ) + if self.cropping[1] == 0: + return inputs[:, self.cropping[0] :, :] + else: + return inputs[:, self.cropping[0] : -self.cropping[1], :] + + def get_config(self): + config = {"cropping": self.cropping} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/cropping2d.py b/keras/layers/reshaping/cropping2d.py index 72cedb846936..118de07ee54e 100644 --- a/keras/layers/reshaping/cropping2d.py +++ b/keras/layers/reshaping/cropping2d.py @@ -13,152 +13,207 @@ # limitations under the License. # ============================================================================== """Keras cropping layer for 2D input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Cropping2D') +@keras_export("keras.layers.Cropping2D") class Cropping2D(Layer): - """Cropping layer for 2D input (e.g. picture). - - It crops along spatial dimensions, i.e. height and width. - - Examples: - - >>> input_shape = (2, 28, 28, 3) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> y = tf.keras.layers.Cropping2D(cropping=((2, 2), (4, 4)))(x) - >>> print(y.shape) - (2, 24, 20, 3) - - Args: - cropping: Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - - If int: the same symmetric cropping - is applied to height and width. - - If tuple of 2 ints: - interpreted as two different - symmetric cropping values for height and width: - `(symmetric_height_crop, symmetric_width_crop)`. - - If tuple of 2 tuples of 2 ints: - interpreted as - `((top_crop, bottom_crop), (left_crop, right_crop))` - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch_size, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, rows, cols)` - - Output shape: - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, cropped_rows, cropped_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, cropped_rows, cropped_cols)` - """ - - def __init__(self, cropping=((0, 0), (0, 0)), data_format=None, **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - if isinstance(cropping, int): - self.cropping = ((cropping, cropping), (cropping, cropping)) - elif hasattr(cropping, '__len__'): - if len(cropping) != 2: - raise ValueError('`cropping` should have two elements. ' - f'Received: {cropping}.') - height_cropping = conv_utils.normalize_tuple( - cropping[0], 2, '1st entry of cropping', allow_zero=True) - width_cropping = conv_utils.normalize_tuple( - cropping[1], 2, '2nd entry of cropping', allow_zero=True) - self.cropping = (height_cropping, width_cropping) - else: - raise ValueError('`cropping` should be either an int, ' - 'a tuple of 2 ints ' - '(symmetric_height_crop, symmetric_width_crop), ' - 'or a tuple of 2 tuples of 2 ints ' - '((top_crop, bottom_crop), (left_crop, right_crop)). ' - f'Received: {cropping}.') - self.input_spec = InputSpec(ndim=4) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - # pylint: disable=invalid-unary-operand-type - if self.data_format == 'channels_first': - return tf.TensorShape([ - input_shape[0], input_shape[1], - input_shape[2] - self.cropping[0][0] - self.cropping[0][1] - if input_shape[2] else None, - input_shape[3] - self.cropping[1][0] - self.cropping[1][1] - if input_shape[3] else None - ]) - else: - return tf.TensorShape([ - input_shape[0], - input_shape[1] - self.cropping[0][0] - self.cropping[0][1] - if input_shape[1] else None, - input_shape[2] - self.cropping[1][0] - self.cropping[1][1] - if input_shape[2] else None, input_shape[3] - ]) - # pylint: enable=invalid-unary-operand-type - - def call(self, inputs): - # pylint: disable=invalid-unary-operand-type - if self.data_format == 'channels_first': - if ((inputs.shape[2] is not None and - sum(self.cropping[0]) >= inputs.shape[2]) or - (inputs.shape[3] is not None and - sum(self.cropping[1]) >= inputs.shape[3])): - raise ValueError('Argument `cropping` must be ' - 'greater than the input shape. Received: inputs.shape=' - f'{inputs.shape}, and cropping={self.cropping}') - if self.cropping[0][1] == self.cropping[1][1] == 0: - return inputs[:, :, self.cropping[0][0]:, self.cropping[1][0]:] - elif self.cropping[0][1] == 0: - return inputs[:, :, self.cropping[0][0]:, self.cropping[1][0]: - -self.cropping[1][1]] - elif self.cropping[1][1] == 0: - return inputs[:, :, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:] - return inputs[:, :, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:-self.cropping[1][1]] - else: - if ((inputs.shape[1] is not None and - sum(self.cropping[0]) >= inputs.shape[1]) or - (inputs.shape[2] is not None and - sum(self.cropping[1]) >= inputs.shape[2])): - raise ValueError('Argument `cropping` must be ' - 'greater than the input shape. Received: inputs.shape=' - f'{inputs.shape}, and cropping={self.cropping}') - if self.cropping[0][1] == self.cropping[1][1] == 0: - return inputs[:, self.cropping[0][0]:, self.cropping[1][0]:, :] - elif self.cropping[0][1] == 0: - return inputs[:, self.cropping[0][0]:, self.cropping[1][0]: - -self.cropping[1][1], :] - elif self.cropping[1][1] == 0: - return inputs[:, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:, :] - return inputs[:, self.cropping[0][0]:-self.cropping[0][1], self.cropping[ - 1][0]:-self.cropping[1][1], :] # pylint: disable=invalid-unary-operand-type - # pylint: enable=invalid-unary-operand-type - - def get_config(self): - config = {'cropping': self.cropping, 'data_format': self.data_format} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Cropping layer for 2D input (e.g. picture). + + It crops along spatial dimensions, i.e. height and width. + + Examples: + + >>> input_shape = (2, 28, 28, 3) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> y = tf.keras.layers.Cropping2D(cropping=((2, 2), (4, 4)))(x) + >>> print(y.shape) + (2, 24, 20, 3) + + Args: + cropping: Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. + - If int: the same symmetric cropping + is applied to height and width. + - If tuple of 2 ints: + interpreted as two different + symmetric cropping values for height and width: + `(symmetric_height_crop, symmetric_width_crop)`. + - If tuple of 2 tuples of 2 ints: + interpreted as + `((top_crop, bottom_crop), (left_crop, right_crop))` + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch_size, channels, height, width)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + 4D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, rows, cols, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, rows, cols)` + + Output shape: + 4D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, cropped_rows, cropped_cols, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, cropped_rows, cropped_cols)` + """ + + def __init__(self, cropping=((0, 0), (0, 0)), data_format=None, **kwargs): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + if isinstance(cropping, int): + self.cropping = ((cropping, cropping), (cropping, cropping)) + elif hasattr(cropping, "__len__"): + if len(cropping) != 2: + raise ValueError( + "`cropping` should have two elements. " + f"Received: {cropping}." + ) + height_cropping = conv_utils.normalize_tuple( + cropping[0], 2, "1st entry of cropping", allow_zero=True + ) + width_cropping = conv_utils.normalize_tuple( + cropping[1], 2, "2nd entry of cropping", allow_zero=True + ) + self.cropping = (height_cropping, width_cropping) + else: + raise ValueError( + "`cropping` should be either an int, " + "a tuple of 2 ints " + "(symmetric_height_crop, symmetric_width_crop), " + "or a tuple of 2 tuples of 2 ints " + "((top_crop, bottom_crop), (left_crop, right_crop)). " + f"Received: {cropping}." + ) + self.input_spec = InputSpec(ndim=4) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + + if self.data_format == "channels_first": + return tf.TensorShape( + [ + input_shape[0], + input_shape[1], + input_shape[2] - self.cropping[0][0] - self.cropping[0][1] + if input_shape[2] + else None, + input_shape[3] - self.cropping[1][0] - self.cropping[1][1] + if input_shape[3] + else None, + ] + ) + else: + return tf.TensorShape( + [ + input_shape[0], + input_shape[1] - self.cropping[0][0] - self.cropping[0][1] + if input_shape[1] + else None, + input_shape[2] - self.cropping[1][0] - self.cropping[1][1] + if input_shape[2] + else None, + input_shape[3], + ] + ) + + def call(self, inputs): + + if self.data_format == "channels_first": + if ( + inputs.shape[2] is not None + and sum(self.cropping[0]) >= inputs.shape[2] + ) or ( + inputs.shape[3] is not None + and sum(self.cropping[1]) >= inputs.shape[3] + ): + raise ValueError( + "Argument `cropping` must be " + "greater than the input shape. Received: inputs.shape=" + f"{inputs.shape}, and cropping={self.cropping}" + ) + if self.cropping[0][1] == self.cropping[1][1] == 0: + return inputs[ + :, :, self.cropping[0][0] :, self.cropping[1][0] : + ] + elif self.cropping[0][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] :, + self.cropping[1][0] : -self.cropping[1][1], + ] + elif self.cropping[1][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] :, + ] + return inputs[ + :, + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] : -self.cropping[1][1], + ] + else: + if ( + inputs.shape[1] is not None + and sum(self.cropping[0]) >= inputs.shape[1] + ) or ( + inputs.shape[2] is not None + and sum(self.cropping[1]) >= inputs.shape[2] + ): + raise ValueError( + "Argument `cropping` must be " + "greater than the input shape. Received: inputs.shape=" + f"{inputs.shape}, and cropping={self.cropping}" + ) + if self.cropping[0][1] == self.cropping[1][1] == 0: + return inputs[ + :, self.cropping[0][0] :, self.cropping[1][0] :, : + ] + elif self.cropping[0][1] == 0: + return inputs[ + :, + self.cropping[0][0] :, + self.cropping[1][0] : -self.cropping[1][1], + :, + ] + elif self.cropping[1][1] == 0: + return inputs[ + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] :, + :, + ] + return inputs[ + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] : -self.cropping[1][1], + :, + ] + + def get_config(self): + config = {"cropping": self.cropping, "data_format": self.data_format} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/cropping3d.py b/keras/layers/reshaping/cropping3d.py index 775c4a32f6a8..a7d1a933e7ca 100644 --- a/keras/layers/reshaping/cropping3d.py +++ b/keras/layers/reshaping/cropping3d.py @@ -13,193 +13,301 @@ # limitations under the License. # ============================================================================== """Keras cropping layer for 3D input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Cropping3D') +@keras_export("keras.layers.Cropping3D") class Cropping3D(Layer): - """Cropping layer for 3D data (e.g. spatial or spatio-temporal). - - Examples: - - >>> input_shape = (2, 28, 28, 10, 3) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> y = tf.keras.layers.Cropping3D(cropping=(2, 4, 2))(x) - >>> print(y.shape) - (2, 24, 20, 6, 3) - - Args: - cropping: Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. - - If int: the same symmetric cropping - is applied to depth, height, and width. - - If tuple of 3 ints: interpreted as two different - symmetric cropping values for depth, height, and width: - `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. - - If tuple of 3 tuples of 2 ints: interpreted as - `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, - right_dim2_crop), (left_dim3_crop, right_dim3_crop))` - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop, - depth)` - - If `data_format` is `"channels_first"`: - `(batch_size, depth, first_axis_to_crop, second_axis_to_crop, - third_axis_to_crop)` - - Output shape: - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, first_cropped_axis, second_cropped_axis, third_cropped_axis, - depth)` - - If `data_format` is `"channels_first"`: - `(batch_size, depth, first_cropped_axis, second_cropped_axis, - third_cropped_axis)` - """ - - def __init__(self, - cropping=((1, 1), (1, 1), (1, 1)), - data_format=None, - **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - if isinstance(cropping, int): - self.cropping = ((cropping, cropping), (cropping, cropping), (cropping, - cropping)) - elif hasattr(cropping, '__len__'): - if len(cropping) != 3: - raise ValueError('`cropping` should have 3 elements. ' - f'Received: {cropping}.') - dim1_cropping = conv_utils.normalize_tuple( - cropping[0], 2, '1st entry of cropping', allow_zero=True) - dim2_cropping = conv_utils.normalize_tuple( - cropping[1], 2, '2nd entry of cropping', allow_zero=True) - dim3_cropping = conv_utils.normalize_tuple( - cropping[2], 2, '3rd entry of cropping', allow_zero=True) - self.cropping = (dim1_cropping, dim2_cropping, dim3_cropping) - else: - raise ValueError( - '`cropping` should be either an int, ' - 'a tuple of 3 ints ' - '(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop), ' - 'or a tuple of 3 tuples of 2 ints ' - '((left_dim1_crop, right_dim1_crop),' - ' (left_dim2_crop, right_dim2_crop),' - ' (left_dim3_crop, right_dim2_crop)). ' - f'Received: {cropping}.') - self.input_spec = InputSpec(ndim=5) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - # pylint: disable=invalid-unary-operand-type - if self.data_format == 'channels_first': - if input_shape[2] is not None: - dim1 = input_shape[2] - self.cropping[0][0] - self.cropping[0][1] - else: - dim1 = None - if input_shape[3] is not None: - dim2 = input_shape[3] - self.cropping[1][0] - self.cropping[1][1] - else: - dim2 = None - if input_shape[4] is not None: - dim3 = input_shape[4] - self.cropping[2][0] - self.cropping[2][1] - else: - dim3 = None - return tf.TensorShape( - [input_shape[0], input_shape[1], dim1, dim2, dim3]) - elif self.data_format == 'channels_last': - if input_shape[1] is not None: - dim1 = input_shape[1] - self.cropping[0][0] - self.cropping[0][1] - else: - dim1 = None - if input_shape[2] is not None: - dim2 = input_shape[2] - self.cropping[1][0] - self.cropping[1][1] - else: - dim2 = None - if input_shape[3] is not None: - dim3 = input_shape[3] - self.cropping[2][0] - self.cropping[2][1] - else: - dim3 = None - return tf.TensorShape( - [input_shape[0], dim1, dim2, dim3, input_shape[4]]) - # pylint: enable=invalid-unary-operand-type - - def call(self, inputs): - # pylint: disable=invalid-unary-operand-type - if self.data_format == 'channels_first': - if self.cropping[0][1] == self.cropping[1][1] == self.cropping[2][1] == 0: - return inputs[:, :, self.cropping[0][0]:, self.cropping[1][0]:, - self.cropping[2][0]:] - elif self.cropping[0][1] == self.cropping[1][1] == 0: - return inputs[:, :, self.cropping[0][0]:, self.cropping[1][0]:, - self.cropping[2][0]:-self.cropping[2][1]] - elif self.cropping[1][1] == self.cropping[2][1] == 0: - return inputs[:, :, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:, self.cropping[2][0]:] - elif self.cropping[0][1] == self.cropping[2][1] == 0: - return inputs[:, :, self.cropping[0][0]:, self.cropping[1][0]: - -self.cropping[1][1], self.cropping[2][0]:] - elif self.cropping[0][1] == 0: - return inputs[:, :, self.cropping[0][0]:, self.cropping[1][ - 0]:-self.cropping[1][1], self.cropping[2][0]:-self.cropping[2][1]] - elif self.cropping[1][1] == 0: - return inputs[:, :, self.cropping[0][0]:-self.cropping[0][1], self. - cropping[1][0]:, self.cropping[2][0]:-self.cropping[2][1]] - elif self.cropping[2][1] == 0: - return inputs[:, :, self.cropping[0][0]:-self.cropping[0][1], self. - cropping[1][0]:-self.cropping[1][1], self.cropping[2][0]:] - return inputs[:, :, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:-self.cropping[1][1], self.cropping[2][ - 0]:-self.cropping[2][1]] - else: - if self.cropping[0][1] == self.cropping[1][1] == self.cropping[2][1] == 0: - return inputs[:, self.cropping[0][0]:, self.cropping[1][0]:, - self.cropping[2][0]:, :] - elif self.cropping[0][1] == self.cropping[1][1] == 0: - return inputs[:, self.cropping[0][0]:, self.cropping[1][0]:, - self.cropping[2][0]:-self.cropping[2][1], :] - elif self.cropping[1][1] == self.cropping[2][1] == 0: - return inputs[:, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:, self.cropping[2][0]:, :] - elif self.cropping[0][1] == self.cropping[2][1] == 0: - return inputs[:, self.cropping[0][0]:, self.cropping[1][0]: - -self.cropping[1][1], self.cropping[2][0]:, :] - elif self.cropping[0][1] == 0: - return inputs[:, self.cropping[0][0]:, self.cropping[1][ - 0]:-self.cropping[1][1], self.cropping[2][0]: - -self.cropping[2][1], :] - elif self.cropping[1][1] == 0: - return inputs[:, self.cropping[0][ - 0]:-self.cropping[0][1], self.cropping[1][0]:, self.cropping[2][0]: - -self.cropping[2][1], :] - elif self.cropping[2][1] == 0: - return inputs[:, self.cropping[0][0]:-self.cropping[0][1], - self.cropping[1][0]:-self.cropping[1][1], self.cropping[ - 2][0]:, :] - return inputs[:, self.cropping[0][0]:-self.cropping[0][1], self.cropping[ - 1][0]:-self.cropping[1][1], self.cropping[2][0]: # pylint: disable=invalid-unary-operand-type - -self.cropping[2][1], :] # pylint: disable=invalid-unary-operand-type - # pylint: enable=invalid-unary-operand-type - - def get_config(self): - config = {'cropping': self.cropping, 'data_format': self.data_format} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Cropping layer for 3D data (e.g. spatial or spatio-temporal). + + Examples: + + >>> input_shape = (2, 28, 28, 10, 3) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> y = tf.keras.layers.Cropping3D(cropping=(2, 4, 2))(x) + >>> print(y.shape) + (2, 24, 20, 6, 3) + + Args: + cropping: Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. + - If int: the same symmetric cropping + is applied to depth, height, and width. + - If tuple of 3 ints: interpreted as two different + symmetric cropping values for depth, height, and width: + `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. + - If tuple of 3 tuples of 2 ints: interpreted as + `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, + right_dim2_crop), (left_dim3_crop, right_dim3_crop))` + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + 5D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, first_axis_to_crop, second_axis_to_crop, + third_axis_to_crop, depth)` + - If `data_format` is `"channels_first"`: + `(batch_size, depth, first_axis_to_crop, second_axis_to_crop, + third_axis_to_crop)` + + Output shape: + 5D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, first_cropped_axis, second_cropped_axis, + third_cropped_axis, depth)` + - If `data_format` is `"channels_first"`: + `(batch_size, depth, first_cropped_axis, second_cropped_axis, + third_cropped_axis)` + """ + + def __init__( + self, cropping=((1, 1), (1, 1), (1, 1)), data_format=None, **kwargs + ): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + if isinstance(cropping, int): + self.cropping = ( + (cropping, cropping), + (cropping, cropping), + (cropping, cropping), + ) + elif hasattr(cropping, "__len__"): + if len(cropping) != 3: + raise ValueError( + f"`cropping` should have 3 elements. Received: {cropping}." + ) + dim1_cropping = conv_utils.normalize_tuple( + cropping[0], 2, "1st entry of cropping", allow_zero=True + ) + dim2_cropping = conv_utils.normalize_tuple( + cropping[1], 2, "2nd entry of cropping", allow_zero=True + ) + dim3_cropping = conv_utils.normalize_tuple( + cropping[2], 2, "3rd entry of cropping", allow_zero=True + ) + self.cropping = (dim1_cropping, dim2_cropping, dim3_cropping) + else: + raise ValueError( + "`cropping` should be either an int, " + "a tuple of 3 ints " + "(symmetric_dim1_crop, symmetric_dim2_crop, " + "symmetric_dim3_crop), " + "or a tuple of 3 tuples of 2 ints " + "((left_dim1_crop, right_dim1_crop)," + " (left_dim2_crop, right_dim2_crop)," + " (left_dim3_crop, right_dim2_crop)). " + f"Received: {cropping}." + ) + self.input_spec = InputSpec(ndim=5) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + + if self.data_format == "channels_first": + if input_shape[2] is not None: + dim1 = ( + input_shape[2] - self.cropping[0][0] - self.cropping[0][1] + ) + else: + dim1 = None + if input_shape[3] is not None: + dim2 = ( + input_shape[3] - self.cropping[1][0] - self.cropping[1][1] + ) + else: + dim2 = None + if input_shape[4] is not None: + dim3 = ( + input_shape[4] - self.cropping[2][0] - self.cropping[2][1] + ) + else: + dim3 = None + return tf.TensorShape( + [input_shape[0], input_shape[1], dim1, dim2, dim3] + ) + elif self.data_format == "channels_last": + if input_shape[1] is not None: + dim1 = ( + input_shape[1] - self.cropping[0][0] - self.cropping[0][1] + ) + else: + dim1 = None + if input_shape[2] is not None: + dim2 = ( + input_shape[2] - self.cropping[1][0] - self.cropping[1][1] + ) + else: + dim2 = None + if input_shape[3] is not None: + dim3 = ( + input_shape[3] - self.cropping[2][0] - self.cropping[2][1] + ) + else: + dim3 = None + return tf.TensorShape( + [input_shape[0], dim1, dim2, dim3, input_shape[4]] + ) + + def call(self, inputs): + + if self.data_format == "channels_first": + if ( + self.cropping[0][1] + == self.cropping[1][1] + == self.cropping[2][1] + == 0 + ): + return inputs[ + :, + :, + self.cropping[0][0] :, + self.cropping[1][0] :, + self.cropping[2][0] :, + ] + elif self.cropping[0][1] == self.cropping[1][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] :, + self.cropping[1][0] :, + self.cropping[2][0] : -self.cropping[2][1], + ] + elif self.cropping[1][1] == self.cropping[2][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] :, + self.cropping[2][0] :, + ] + elif self.cropping[0][1] == self.cropping[2][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] :, + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] :, + ] + elif self.cropping[0][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] :, + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] : -self.cropping[2][1], + ] + elif self.cropping[1][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] :, + self.cropping[2][0] : -self.cropping[2][1], + ] + elif self.cropping[2][1] == 0: + return inputs[ + :, + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] :, + ] + return inputs[ + :, + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] : -self.cropping[2][1], + ] + else: + if ( + self.cropping[0][1] + == self.cropping[1][1] + == self.cropping[2][1] + == 0 + ): + return inputs[ + :, + self.cropping[0][0] :, + self.cropping[1][0] :, + self.cropping[2][0] :, + :, + ] + elif self.cropping[0][1] == self.cropping[1][1] == 0: + return inputs[ + :, + self.cropping[0][0] :, + self.cropping[1][0] :, + self.cropping[2][0] : -self.cropping[2][1], + :, + ] + elif self.cropping[1][1] == self.cropping[2][1] == 0: + return inputs[ + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] :, + self.cropping[2][0] :, + :, + ] + elif self.cropping[0][1] == self.cropping[2][1] == 0: + return inputs[ + :, + self.cropping[0][0] :, + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] :, + :, + ] + elif self.cropping[0][1] == 0: + return inputs[ + :, + self.cropping[0][0] :, + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] : -self.cropping[2][1], + :, + ] + elif self.cropping[1][1] == 0: + return inputs[ + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] :, + self.cropping[2][0] : -self.cropping[2][1], + :, + ] + elif self.cropping[2][1] == 0: + return inputs[ + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] :, + :, + ] + return inputs[ + :, + self.cropping[0][0] : -self.cropping[0][1], + self.cropping[1][0] : -self.cropping[1][1], + self.cropping[2][0] : -self.cropping[2][1], + :, + ] + + def get_config(self): + config = {"cropping": self.cropping, "data_format": self.data_format} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/cropping_test.py b/keras/layers/reshaping/cropping_test.py index 5b6a7d22b5da..69f7a28003d0 100644 --- a/keras/layers/reshaping/cropping_test.py +++ b/keras/layers/reshaping/cropping_test.py @@ -14,161 +14,199 @@ # ============================================================================== """Tests for cropping layers.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class CroppingTest(test_combinations.TestCase): + def test_cropping_1d(self): + num_samples = 2 + time_length = 4 + input_len_dim1 = 2 + inputs = np.random.rand(num_samples, time_length, input_len_dim1) - def test_cropping_1d(self): - num_samples = 2 - time_length = 4 - input_len_dim1 = 2 - inputs = np.random.rand(num_samples, time_length, input_len_dim1) - - with self.cached_session(): - test_utils.layer_test( - keras.layers.Cropping1D, - kwargs={'cropping': (1, 1)}, - input_shape=inputs.shape) - - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.Cropping1D(cropping=(1, 1, 1)) - with self.assertRaises(ValueError): - keras.layers.Cropping1D(cropping=None) - with self.assertRaises(ValueError): - input_layer = keras.layers.Input( - shape=(num_samples, time_length, input_len_dim1)) - keras.layers.Cropping1D(cropping=(2, 3))(input_layer) - - def test_cropping_2d(self): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 9 - input_len_dim2 = 9 - cropping = ((2, 2), (3, 3)) - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_len_dim1, - input_len_dim2) - else: - inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, - stack_size) - with self.cached_session(): - # basic test - test_utils.layer_test( - keras.layers.Cropping2D, - kwargs={'cropping': cropping, - 'data_format': data_format}, - input_shape=inputs.shape) - # correctness test - layer = keras.layers.Cropping2D( - cropping=cropping, data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - # compare with numpy - if data_format == 'channels_first': - expected_out = inputs[:, :, cropping[0][0]:-cropping[0][1], cropping[ - 1][0]:-cropping[1][1]] - else: - expected_out = inputs[:, cropping[0][0]:-cropping[0][1], cropping[1][ - 0]:-cropping[1][1], :] - np.testing.assert_allclose(np_output, expected_out) - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_len_dim1, - input_len_dim2) - else: - inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, - stack_size) - # another correctness test (no cropping) - with self.cached_session(): - cropping = ((0, 0), (0, 0)) - layer = keras.layers.Cropping2D( - cropping=cropping, data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - # compare with input - np.testing.assert_allclose(np_output, inputs) - - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.Cropping2D(cropping=(1, 1, 1)) - with self.assertRaises(ValueError): - keras.layers.Cropping2D(cropping=None) - with self.assertRaises(ValueError): - input_layer = keras.layers.Input( - shape=(num_samples, input_len_dim1, input_len_dim2, stack_size)) - keras.layers.Cropping2D(cropping=((5, 4), (3, 4)))(input_layer) - - def test_cropping_3d(self): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 8 - input_len_dim2 = 8 - input_len_dim3 = 8 - croppings = [((2, 2), (1, 1), (2, 3)), 3, (0, 1, 1)] - - for cropping in croppings: - for data_format in ['channels_last', 'channels_first']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_len_dim1, - input_len_dim2, input_len_dim3) - else: - inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, - input_len_dim3, stack_size) - # basic test with self.cached_session(): - test_utils.layer_test( - keras.layers.Cropping3D, - kwargs={'cropping': cropping, - 'data_format': data_format}, - input_shape=inputs.shape) - - if len(croppings) == 3 and len(croppings[0]) == 2: - # correctness test - with self.cached_session(): - layer = keras.layers.Cropping3D( - cropping=cropping, data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() + test_utils.layer_test( + keras.layers.Cropping1D, + kwargs={"cropping": (1, 1)}, + input_shape=inputs.shape, + ) + + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.Cropping1D(cropping=(1, 1, 1)) + with self.assertRaises(ValueError): + keras.layers.Cropping1D(cropping=None) + with self.assertRaises(ValueError): + input_layer = keras.layers.Input( + shape=(num_samples, time_length, input_len_dim1) + ) + keras.layers.Cropping1D(cropping=(2, 3))(input_layer) + + def test_cropping_2d(self): + num_samples = 2 + stack_size = 2 + input_len_dim1 = 9 + input_len_dim2 = 9 + cropping = ((2, 2), (3, 3)) + + for data_format in ["channels_first", "channels_last"]: + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, stack_size, input_len_dim1, input_len_dim2 + ) else: - np_output = keras.backend.eval(output) - # compare with numpy - if data_format == 'channels_first': - expected_out = inputs[:, :, - cropping[0][0]:-cropping[0][1], - cropping[1][0]:-cropping[1][1], - cropping[2][0]:-cropping[2][1]] + inputs = np.random.rand( + num_samples, input_len_dim1, input_len_dim2, stack_size + ) + with self.cached_session(): + # basic test + test_utils.layer_test( + keras.layers.Cropping2D, + kwargs={"cropping": cropping, "data_format": data_format}, + input_shape=inputs.shape, + ) + # correctness test + layer = keras.layers.Cropping2D( + cropping=cropping, data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + # compare with numpy + if data_format == "channels_first": + expected_out = inputs[ + :, + :, + cropping[0][0] : -cropping[0][1], + cropping[1][0] : -cropping[1][1], + ] + else: + expected_out = inputs[ + :, + cropping[0][0] : -cropping[0][1], + cropping[1][0] : -cropping[1][1], + :, + ] + np.testing.assert_allclose(np_output, expected_out) + + for data_format in ["channels_first", "channels_last"]: + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, stack_size, input_len_dim1, input_len_dim2 + ) else: - expected_out = inputs[:, - cropping[0][0]:-cropping[0][1], - cropping[1][0]:-cropping[1][1], - cropping[2][0]:-cropping[2][1], :] - np.testing.assert_allclose(np_output, expected_out) - - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.Cropping3D(cropping=(1, 1)) - with self.assertRaises(ValueError): - keras.layers.Cropping3D(cropping=None) - -if __name__ == '__main__': - tf.test.main() + inputs = np.random.rand( + num_samples, input_len_dim1, input_len_dim2, stack_size + ) + # another correctness test (no cropping) + with self.cached_session(): + cropping = ((0, 0), (0, 0)) + layer = keras.layers.Cropping2D( + cropping=cropping, data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + # compare with input + np.testing.assert_allclose(np_output, inputs) + + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.Cropping2D(cropping=(1, 1, 1)) + with self.assertRaises(ValueError): + keras.layers.Cropping2D(cropping=None) + with self.assertRaises(ValueError): + input_layer = keras.layers.Input( + shape=(num_samples, input_len_dim1, input_len_dim2, stack_size) + ) + keras.layers.Cropping2D(cropping=((5, 4), (3, 4)))(input_layer) + + def test_cropping_3d(self): + num_samples = 2 + stack_size = 2 + input_len_dim1 = 8 + input_len_dim2 = 8 + input_len_dim3 = 8 + croppings = [((2, 2), (1, 1), (2, 3)), 3, (0, 1, 1)] + + for cropping in croppings: + for data_format in ["channels_last", "channels_first"]: + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, + stack_size, + input_len_dim1, + input_len_dim2, + input_len_dim3, + ) + else: + inputs = np.random.rand( + num_samples, + input_len_dim1, + input_len_dim2, + input_len_dim3, + stack_size, + ) + # basic test + with self.cached_session(): + test_utils.layer_test( + keras.layers.Cropping3D, + kwargs={ + "cropping": cropping, + "data_format": data_format, + }, + input_shape=inputs.shape, + ) + + if len(croppings) == 3 and len(croppings[0]) == 2: + # correctness test + with self.cached_session(): + layer = keras.layers.Cropping3D( + cropping=cropping, data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + # compare with numpy + if data_format == "channels_first": + expected_out = inputs[ + :, + :, + cropping[0][0] : -cropping[0][1], + cropping[1][0] : -cropping[1][1], + cropping[2][0] : -cropping[2][1], + ] + else: + expected_out = inputs[ + :, + cropping[0][0] : -cropping[0][1], + cropping[1][0] : -cropping[1][1], + cropping[2][0] : -cropping[2][1], + :, + ] + np.testing.assert_allclose(np_output, expected_out) + + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.Cropping3D(cropping=(1, 1)) + with self.assertRaises(ValueError): + keras.layers.Cropping3D(cropping=None) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/flatten.py b/keras/layers/reshaping/flatten.py index 8dc1d246d68d..51d3a4fe2a49 100644 --- a/keras/layers/reshaping/flatten.py +++ b/keras/layers/reshaping/flatten.py @@ -13,103 +13,110 @@ # limitations under the License. # ============================================================================== """Contains the flatten layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import functools import operator +import numpy as np +import tensorflow.compat.v2 as tf + from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Flatten') +@keras_export("keras.layers.Flatten") class Flatten(Layer): - """Flattens the input. Does not affect the batch size. - - Note: If inputs are shaped `(batch,)` without a feature axis, then - flattening adds an extra channel dimension and output shape is `(batch, 1)`. - - Args: - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Example: - - >>> model = tf.keras.Sequential() - >>> model.add(tf.keras.layers.Conv2D(64, 3, 3, input_shape=(3, 32, 32))) - >>> model.output_shape - (None, 1, 10, 64) - - >>> model.add(Flatten()) - >>> model.output_shape - (None, 640) - - """ - - def __init__(self, data_format=None, **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - self.input_spec = InputSpec(min_ndim=1) - self._channels_first = self.data_format == 'channels_first' - - def call(self, inputs): - if self._channels_first: - rank = inputs.shape.rank - if rank and rank > 1: - # Switch to channels-last format. - permutation = [0] - permutation.extend(range(2, rank)) - permutation.append(1) - inputs = tf.transpose(inputs, perm=permutation) - - if tf.executing_eagerly(): - # Full static shape is guaranteed to be available. - # Performance: Using `constant_op` is much faster than passing a list. - flattened_shape = tf.constant([inputs.shape[0], -1]) - return tf.reshape(inputs, flattened_shape) - else: - input_shape = inputs.shape - rank = input_shape.rank - if rank == 1: - return tf.expand_dims(inputs, axis=1) - else: - batch_dim = tf.compat.dimension_value(input_shape[0]) - non_batch_dims = input_shape[1:] - # Reshape in a way that preserves as much shape info as possible. - if non_batch_dims.is_fully_defined(): - last_dim = int(functools.reduce(operator.mul, non_batch_dims)) - flattened_shape = tf.constant([-1, last_dim]) - elif batch_dim is not None: - flattened_shape = tf.constant([int(batch_dim), -1]) + """Flattens the input. Does not affect the batch size. + + Note: If inputs are shaped `(batch,)` without a feature axis, then + flattening adds an extra channel dimension and output shape is `(batch, 1)`. + + Args: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, ..., channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, ...)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Example: + + >>> model = tf.keras.Sequential() + >>> model.add(tf.keras.layers.Conv2D(64, 3, 3, input_shape=(3, 32, 32))) + >>> model.output_shape + (None, 1, 10, 64) + + >>> model.add(Flatten()) + >>> model.output_shape + (None, 640) + + """ + + def __init__(self, data_format=None, **kwargs): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(min_ndim=1) + self._channels_first = self.data_format == "channels_first" + + def call(self, inputs): + if self._channels_first: + rank = inputs.shape.rank + if rank and rank > 1: + # Switch to channels-last format. + permutation = [0] + permutation.extend(range(2, rank)) + permutation.append(1) + inputs = tf.transpose(inputs, perm=permutation) + + if tf.executing_eagerly(): + # Full static shape is guaranteed to be available. + # Performance: Using `constant_op` is much faster than passing a + # list. + flattened_shape = tf.constant([inputs.shape[0], -1]) + return tf.reshape(inputs, flattened_shape) + else: + input_shape = inputs.shape + rank = input_shape.rank + if rank == 1: + return tf.expand_dims(inputs, axis=1) + else: + batch_dim = tf.compat.dimension_value(input_shape[0]) + non_batch_dims = input_shape[1:] + # Reshape in a way that preserves as much shape info as + # possible. + if non_batch_dims.is_fully_defined(): + last_dim = int( + functools.reduce(operator.mul, non_batch_dims) + ) + flattened_shape = tf.constant([-1, last_dim]) + elif batch_dim is not None: + flattened_shape = tf.constant([int(batch_dim), -1]) + else: + flattened_shape = [tf.shape(inputs)[0], -1] + return tf.reshape(inputs, flattened_shape) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if not input_shape: + output_shape = tf.TensorShape([1]) else: - flattened_shape = [tf.shape(inputs)[0], -1] - return tf.reshape(inputs, flattened_shape) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if not input_shape: - output_shape = tf.TensorShape([1]) - else: - output_shape = [input_shape[0]] - if np.all(input_shape[1:]): - output_shape += [np.prod(input_shape[1:], dtype=int)] - else: - output_shape += [None] - return tf.TensorShape(output_shape) - - def get_config(self): - config = super().get_config() - config.update({'data_format': self.data_format}) - return config + output_shape = [input_shape[0]] + if np.all(input_shape[1:]): + output_shape += [np.prod(input_shape[1:], dtype=int)] + else: + output_shape += [None] + return tf.TensorShape(output_shape) + + def get_config(self): + config = super().get_config() + config.update({"data_format": self.data_format}) + return config diff --git a/keras/layers/reshaping/flatten_test.py b/keras/layers/reshaping/flatten_test.py index f6a343bcb798..92127afffe29 100644 --- a/keras/layers/reshaping/flatten_test.py +++ b/keras/layers/reshaping/flatten_test.py @@ -14,41 +14,46 @@ # ============================================================================== """Tests for flatten layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class FlattenTest(test_combinations.TestCase): - - def test_flatten(self): - test_utils.layer_test( - keras.layers.Flatten, kwargs={}, input_shape=(3, 2, 4)) - - # Test channels_first - inputs = np.random.random((10, 3, 5, 5)).astype('float32') - outputs = test_utils.layer_test( - keras.layers.Flatten, - kwargs={'data_format': 'channels_first'}, - input_data=inputs) - target_outputs = np.reshape( - np.transpose(inputs, (0, 2, 3, 1)), (-1, 5 * 5 * 3)) - self.assertAllClose(outputs, target_outputs) - - def test_flatten_scalar_channels(self): - test_utils.layer_test(keras.layers.Flatten, kwargs={}, input_shape=(3,)) - - # Test channels_first - inputs = np.random.random((10,)).astype('float32') - outputs = test_utils.layer_test( - keras.layers.Flatten, - kwargs={'data_format': 'channels_first'}, - input_data=inputs) - target_outputs = np.expand_dims(inputs, -1) - self.assertAllClose(outputs, target_outputs) - -if __name__ == '__main__': - tf.test.main() + def test_flatten(self): + test_utils.layer_test( + keras.layers.Flatten, kwargs={}, input_shape=(3, 2, 4) + ) + + # Test channels_first + inputs = np.random.random((10, 3, 5, 5)).astype("float32") + outputs = test_utils.layer_test( + keras.layers.Flatten, + kwargs={"data_format": "channels_first"}, + input_data=inputs, + ) + target_outputs = np.reshape( + np.transpose(inputs, (0, 2, 3, 1)), (-1, 5 * 5 * 3) + ) + self.assertAllClose(outputs, target_outputs) + + def test_flatten_scalar_channels(self): + test_utils.layer_test(keras.layers.Flatten, kwargs={}, input_shape=(3,)) + + # Test channels_first + inputs = np.random.random((10,)).astype("float32") + outputs = test_utils.layer_test( + keras.layers.Flatten, + kwargs={"data_format": "channels_first"}, + input_data=inputs, + ) + target_outputs = np.expand_dims(inputs, -1) + self.assertAllClose(outputs, target_outputs) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/permute.py b/keras/layers/reshaping/permute.py index 96767a1a944d..590815e9a8e6 100644 --- a/keras/layers/reshaping/permute.py +++ b/keras/layers/reshaping/permute.py @@ -13,70 +13,73 @@ # limitations under the License. # ============================================================================== """Contains the Permute layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import copy +import tensorflow.compat.v2 as tf + from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Permute') +@keras_export("keras.layers.Permute") class Permute(Layer): - """Permutes the dimensions of the input according to a given pattern. - - Useful e.g. connecting RNNs and convnets. - - Example: - - ```python - model = Sequential() - model.add(Permute((2, 1), input_shape=(10, 64))) - # now: model.output_shape == (None, 64, 10) - # note: `None` is the batch dimension - ``` - - Args: - dims: Tuple of integers. Permutation pattern does not include the - samples dimension. Indexing starts at 1. - For instance, `(2, 1)` permutes the first and second dimensions - of the input. - - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape: - Same as the input shape, but with the dimensions re-ordered according - to the specified pattern. - """ - - def __init__(self, dims, **kwargs): - super().__init__(**kwargs) - self.dims = tuple(dims) - if sorted(dims) != list(range(1, len(dims) + 1)): - raise ValueError( - 'Invalid permutation argument `dims` for Permute Layer. ' - 'The set of indices in `dims` must be consecutive and start from 1. ' - f'Received dims={dims}') - self.input_spec = InputSpec(ndim=len(self.dims) + 1) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - output_shape = copy.copy(input_shape) - for i, dim in enumerate(self.dims): - target_dim = input_shape[dim] - output_shape[i + 1] = target_dim - return tf.TensorShape(output_shape) - - def call(self, inputs): - return tf.transpose(inputs, perm=(0,) + self.dims) - - def get_config(self): - config = {'dims': self.dims} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Permutes the dimensions of the input according to a given pattern. + + Useful e.g. connecting RNNs and convnets. + + Example: + + ```python + model = Sequential() + model.add(Permute((2, 1), input_shape=(10, 64))) + # now: model.output_shape == (None, 64, 10) + # note: `None` is the batch dimension + ``` + + Args: + dims: Tuple of integers. Permutation pattern does not include the + samples dimension. Indexing starts at 1. + For instance, `(2, 1)` permutes the first and second dimensions + of the input. + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same as the input shape, but with the dimensions re-ordered according + to the specified pattern. + """ + + def __init__(self, dims, **kwargs): + super().__init__(**kwargs) + self.dims = tuple(dims) + if sorted(dims) != list(range(1, len(dims) + 1)): + raise ValueError( + "Invalid permutation argument `dims` for Permute Layer. " + "The set of indices in `dims` must be consecutive and start " + f"from 1. Received dims={dims}" + ) + self.input_spec = InputSpec(ndim=len(self.dims) + 1) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + output_shape = copy.copy(input_shape) + for i, dim in enumerate(self.dims): + target_dim = input_shape[dim] + output_shape[i + 1] = target_dim + return tf.TensorShape(output_shape) + + def call(self, inputs): + return tf.transpose(inputs, perm=(0,) + self.dims) + + def get_config(self): + config = {"dims": self.dims} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/permute_test.py b/keras/layers/reshaping/permute_test.py index 4145a6439e3c..1a9e6564c8de 100644 --- a/keras/layers/reshaping/permute_test.py +++ b/keras/layers/reshaping/permute_test.py @@ -14,33 +14,40 @@ # ============================================================================== """Tests for Keras permute layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf - @test_combinations.run_all_keras_modes class PermuteTest(test_combinations.TestCase): - - def test_permute(self): - test_utils.layer_test( - keras.layers.Permute, kwargs={'dims': (2, 1)}, input_shape=(3, 2, 4)) - - def test_permute_errors_on_invalid_starting_dims_index(self): - with self.assertRaisesRegex(ValueError, r'Invalid permutation .*dims.*'): - test_utils.layer_test( - keras.layers.Permute, - kwargs={'dims': (0, 1, 2)}, - input_shape=(3, 2, 4)) - - def test_permute_errors_on_invalid_set_of_dims_indices(self): - with self.assertRaisesRegex(ValueError, r'Invalid permutation .*dims.*'): - test_utils.layer_test( - keras.layers.Permute, - kwargs={'dims': (1, 4, 2)}, - input_shape=(3, 2, 4)) - -if __name__ == '__main__': - tf.test.main() + def test_permute(self): + test_utils.layer_test( + keras.layers.Permute, kwargs={"dims": (2, 1)}, input_shape=(3, 2, 4) + ) + + def test_permute_errors_on_invalid_starting_dims_index(self): + with self.assertRaisesRegex( + ValueError, r"Invalid permutation .*dims.*" + ): + test_utils.layer_test( + keras.layers.Permute, + kwargs={"dims": (0, 1, 2)}, + input_shape=(3, 2, 4), + ) + + def test_permute_errors_on_invalid_set_of_dims_indices(self): + with self.assertRaisesRegex( + ValueError, r"Invalid permutation .*dims.*" + ): + test_utils.layer_test( + keras.layers.Permute, + kwargs={"dims": (1, 4, 2)}, + input_shape=(3, 2, 4), + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/repeat_vector.py b/keras/layers/reshaping/repeat_vector.py index db3e4cff7ace..46dcb89e1541 100644 --- a/keras/layers/reshaping/repeat_vector.py +++ b/keras/layers/reshaping/repeat_vector.py @@ -13,53 +13,57 @@ # limitations under the License. # ============================================================================== """Contains the RepeatVector layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.RepeatVector') +@keras_export("keras.layers.RepeatVector") class RepeatVector(Layer): - """Repeats the input n times. + """Repeats the input n times. - Example: + Example: - ```python - model = Sequential() - model.add(Dense(32, input_dim=32)) - # now: model.output_shape == (None, 32) - # note: `None` is the batch dimension + ```python + model = Sequential() + model.add(Dense(32, input_dim=32)) + # now: model.output_shape == (None, 32) + # note: `None` is the batch dimension - model.add(RepeatVector(3)) - # now: model.output_shape == (None, 3, 32) - ``` + model.add(RepeatVector(3)) + # now: model.output_shape == (None, 3, 32) + ``` - Args: - n: Integer, repetition factor. - Input shape: 2D tensor of shape `(num_samples, features)`. - Output shape: 3D tensor of shape `(num_samples, n, features)`. - """ + Args: + n: Integer, repetition factor. + Input shape: 2D tensor of shape `(num_samples, features)`. + Output shape: 3D tensor of shape `(num_samples, n, features)`. + """ - def __init__(self, n, **kwargs): - super().__init__(**kwargs) - self.n = n - if not isinstance(n, int): - raise TypeError(f'Expected an integer value for `n`, got {type(n)}.') - self.input_spec = InputSpec(ndim=2) + def __init__(self, n, **kwargs): + super().__init__(**kwargs) + self.n = n + if not isinstance(n, int): + raise TypeError( + f"Expected an integer value for `n`, got {type(n)}." + ) + self.input_spec = InputSpec(ndim=2) - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - return tf.TensorShape([input_shape[0], self.n, input_shape[1]]) + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + return tf.TensorShape([input_shape[0], self.n, input_shape[1]]) - def call(self, inputs): - return backend.repeat(inputs, self.n) + def call(self, inputs): + return backend.repeat(inputs, self.n) - def get_config(self): - config = {'n': self.n} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"n": self.n} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/repeat_vector_test.py b/keras/layers/reshaping/repeat_vector_test.py index 62e567f6c478..f307f308f74c 100644 --- a/keras/layers/reshaping/repeat_vector_test.py +++ b/keras/layers/reshaping/repeat_vector_test.py @@ -14,26 +14,27 @@ # ============================================================================== """Tests for repeat vector layer.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np - -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class RepeatVectorTest(test_combinations.TestCase): + def test_repeat_vector(self): + test_utils.layer_test( + keras.layers.RepeatVector, kwargs={"n": 3}, input_shape=(3, 2) + ) - def test_repeat_vector(self): - test_utils.layer_test( - keras.layers.RepeatVector, kwargs={'n': 3}, input_shape=(3, 2)) + def test_numpy_inputs(self): + if tf.executing_eagerly(): + layer = keras.layers.RepeatVector(2) + x = np.ones((10, 10)) + self.assertAllEqual(np.ones((10, 2, 10)), layer(x)) - def test_numpy_inputs(self): - if tf.executing_eagerly(): - layer = keras.layers.RepeatVector(2) - x = np.ones((10, 10)) - self.assertAllEqual(np.ones((10, 2, 10)), layer(x)) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/reshape.py b/keras/layers/reshaping/reshape.py index ba2636340dbb..83bfccf61a24 100644 --- a/keras/layers/reshaping/reshape.py +++ b/keras/layers/reshaping/reshape.py @@ -13,125 +13,136 @@ # limitations under the License. # ============================================================================== """Contains the Reshape layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import -from keras.engine.base_layer import Layer + import numpy as np import tensorflow.compat.v2 as tf +from keras.engine.base_layer import Layer + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Reshape') +@keras_export("keras.layers.Reshape") class Reshape(Layer): - """Layer that reshapes inputs into the given shape. - - Input shape: - Arbitrary, although all dimensions in the input shape must be known/fixed. - Use the keyword argument `input_shape` (tuple of integers, does not include - the samples/batch size axis) when using this layer as the first layer - in a model. - - Output shape: - `(batch_size,) + target_shape` - - Example: - - >>> # as first layer in a Sequential model - >>> model = tf.keras.Sequential() - >>> model.add(tf.keras.layers.Reshape((3, 4), input_shape=(12,))) - >>> # model.output_shape == (None, 3, 4), `None` is the batch size. - >>> model.output_shape - (None, 3, 4) - - >>> # as intermediate layer in a Sequential model - >>> model.add(tf.keras.layers.Reshape((6, 2))) - >>> model.output_shape - (None, 6, 2) - - >>> # also supports shape inference using `-1` as dimension - >>> model.add(tf.keras.layers.Reshape((-1, 2, 2))) - >>> model.output_shape - (None, 3, 2, 2) - """ - - def __init__(self, target_shape, **kwargs): - """Creates a `tf.keras.layers.Reshape` layer instance. - - Args: - target_shape: Target shape. Tuple of integers, does not include the - samples dimension (batch size). - **kwargs: Any additional layer keyword arguments. + """Layer that reshapes inputs into the given shape. + + Input shape: + Arbitrary, although all dimensions in the input shape must be known/fixed. + Use the keyword argument `input_shape` (tuple of integers, does not + include the samples/batch size axis) when using this layer as the first + layer in a model. + + Output shape: + `(batch_size,) + target_shape` + + Example: + + >>> # as first layer in a Sequential model + >>> model = tf.keras.Sequential() + >>> model.add(tf.keras.layers.Reshape((3, 4), input_shape=(12,))) + >>> # model.output_shape == (None, 3, 4), `None` is the batch size. + >>> model.output_shape + (None, 3, 4) + + >>> # as intermediate layer in a Sequential model + >>> model.add(tf.keras.layers.Reshape((6, 2))) + >>> model.output_shape + (None, 6, 2) + + >>> # also supports shape inference using `-1` as dimension + >>> model.add(tf.keras.layers.Reshape((-1, 2, 2))) + >>> model.output_shape + (None, 3, 2, 2) """ - super().__init__(**kwargs) - self.target_shape = tuple(target_shape) - - def _fix_unknown_dimension(self, input_shape, output_shape): - """Find and replace a missing dimension in an output shape. - This is a near direct port of the internal Numpy function - `_fix_unknown_dimension` in `numpy/core/src/multiarray/shape.c` - - Args: - input_shape: Shape of array being reshaped - output_shape: Desired shape of the array with at most a single -1 which - indicates a dimension that should be derived from the input shape. - - Returns: - The new output shape with a -1 replaced with its computed value. - - Raises: - ValueError: If the total array size of the output_shape is - different than the input_shape, or more than one unknown dimension - is specified. - """ - output_shape = list(output_shape) - msg = ('total size of new array must be unchanged, ' - 'input_shape = {}, output_shape = {}'.format(input_shape, - output_shape)) - - known, unknown = 1, None - for index, dim in enumerate(output_shape): - if dim < 0: - if unknown is None: - unknown = index + def __init__(self, target_shape, **kwargs): + """Creates a `tf.keras.layers.Reshape` layer instance. + + Args: + target_shape: Target shape. Tuple of integers, does not include the + samples dimension (batch size). + **kwargs: Any additional layer keyword arguments. + """ + super().__init__(**kwargs) + self.target_shape = tuple(target_shape) + + def _fix_unknown_dimension(self, input_shape, output_shape): + """Find and replace a missing dimension in an output shape. + + This is a near direct port of the internal Numpy function + `_fix_unknown_dimension` in `numpy/core/src/multiarray/shape.c` + + Args: + input_shape: Shape of array being reshaped + output_shape: Desired shape of the array with at most a single -1 + which indicates a dimension that should be derived from the input + shape. + + Returns: + The new output shape with a -1 replaced with its computed value. + + Raises: + ValueError: If the total array size of the output_shape is + different than the input_shape, or more than one unknown dimension + is specified. + """ + output_shape = list(output_shape) + msg = ( + "total size of new array must be unchanged, " + "input_shape = {}, output_shape = {}".format( + input_shape, output_shape + ) + ) + + known, unknown = 1, None + for index, dim in enumerate(output_shape): + if dim < 0: + if unknown is None: + unknown = index + else: + raise ValueError( + "There must be at most one unknown dimension in " + f"output_shape. Received: output_shape={output_shape}." + ) + else: + known *= dim + + original = np.prod(input_shape, dtype=int) + if unknown is not None: + if known == 0 or original % known != 0: + raise ValueError(msg) + output_shape[unknown] = original // known + elif original != known: + raise ValueError(msg) + return output_shape + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if None in input_shape[1:]: + output_shape = [input_shape[0]] + # input shape (partially) unknown? replace -1's with None's + output_shape += tuple( + s if s != -1 else None for s in self.target_shape + ) else: - raise ValueError( - f'There must be at most one unknown dimension in output_shape. ' - f'Received: output_shape={output_shape}.') - else: - known *= dim - - original = np.prod(input_shape, dtype=int) - if unknown is not None: - if known == 0 or original % known != 0: - raise ValueError(msg) - output_shape[unknown] = original // known - elif original != known: - raise ValueError(msg) - return output_shape - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if None in input_shape[1:]: - output_shape = [input_shape[0]] - # input shape (partially) unknown? replace -1's with None's - output_shape += tuple(s if s != -1 else None for s in self.target_shape) - else: - output_shape = [input_shape[0]] - output_shape += self._fix_unknown_dimension(input_shape[1:], - self.target_shape) - return tf.TensorShape(output_shape) - - def call(self, inputs): - result = tf.reshape(inputs, (tf.shape(inputs)[0],) + self.target_shape) - if not tf.executing_eagerly(): - # Set the static shape for the result since it might lost during array_ops - # reshape, eg, some `None` dim in the result could be inferred. - result.set_shape(self.compute_output_shape(inputs.shape)) - return result - - def get_config(self): - config = {'target_shape': self.target_shape} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + output_shape = [input_shape[0]] + output_shape += self._fix_unknown_dimension( + input_shape[1:], self.target_shape + ) + return tf.TensorShape(output_shape) + + def call(self, inputs): + result = tf.reshape(inputs, (tf.shape(inputs)[0],) + self.target_shape) + if not tf.executing_eagerly(): + # Set the static shape for the result since it might lost during + # array_ops reshape, eg, some `None` dim in the result could be + # inferred. + result.set_shape(self.compute_output_shape(inputs.shape)) + return result + + def get_config(self): + config = {"target_shape": self.target_shape} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/reshape_test.py b/keras/layers/reshaping/reshape_test.py index 8e66b4d3b948..0c9d89f737a2 100644 --- a/keras/layers/reshaping/reshape_test.py +++ b/keras/layers/reshaping/reshape_test.py @@ -14,42 +14,46 @@ # ============================================================================== """Tests for reshape layer.""" +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf - @test_combinations.run_all_keras_modes class ReshapeTest(test_combinations.TestCase): - - def test_reshape(self): - test_utils.layer_test( - keras.layers.Reshape, - kwargs={'target_shape': (8, 1)}, - input_shape=(3, 2, 4)) - - test_utils.layer_test( - keras.layers.Reshape, - kwargs={'target_shape': (-1, 1)}, - input_shape=(3, 2, 4)) - - test_utils.layer_test( - keras.layers.Reshape, - kwargs={'target_shape': (1, -1)}, - input_shape=(3, 2, 4)) - - test_utils.layer_test( - keras.layers.Reshape, - kwargs={'target_shape': (-1, 1)}, - input_shape=(None, None, 2)) - - def test_reshape_set_static_shape(self): - input_layer = keras.Input(batch_shape=(1, None)) - reshaped = keras.layers.Reshape((1, 100))(input_layer) - # Make sure the batch dim is not lost after array_ops.reshape. - self.assertEqual(reshaped.shape, [1, 1, 100]) - -if __name__ == '__main__': - tf.test.main() + def test_reshape(self): + test_utils.layer_test( + keras.layers.Reshape, + kwargs={"target_shape": (8, 1)}, + input_shape=(3, 2, 4), + ) + + test_utils.layer_test( + keras.layers.Reshape, + kwargs={"target_shape": (-1, 1)}, + input_shape=(3, 2, 4), + ) + + test_utils.layer_test( + keras.layers.Reshape, + kwargs={"target_shape": (1, -1)}, + input_shape=(3, 2, 4), + ) + + test_utils.layer_test( + keras.layers.Reshape, + kwargs={"target_shape": (-1, 1)}, + input_shape=(None, None, 2), + ) + + def test_reshape_set_static_shape(self): + input_layer = keras.Input(batch_shape=(1, None)) + reshaped = keras.layers.Reshape((1, 100))(input_layer) + # Make sure the batch dim is not lost after array_ops.reshape. + self.assertEqual(reshaped.shape, [1, 1, 100]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/up_sampling1d.py b/keras/layers/reshaping/up_sampling1d.py index b5853cc867c3..56b75ef23d2d 100644 --- a/keras/layers/reshaping/up_sampling1d.py +++ b/keras/layers/reshaping/up_sampling1d.py @@ -13,68 +13,72 @@ # limitations under the License. # ============================================================================== """Keras upsampling layer for 1D inputs.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.UpSampling1D') +@keras_export("keras.layers.UpSampling1D") class UpSampling1D(Layer): - """Upsampling layer for 1D inputs. - - Repeats each temporal step `size` times along the time axis. - - Examples: - - >>> input_shape = (2, 2, 3) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> print(x) - [[[ 0 1 2] - [ 3 4 5]] - [[ 6 7 8] - [ 9 10 11]]] - >>> y = tf.keras.layers.UpSampling1D(size=2)(x) - >>> print(y) - tf.Tensor( + """Upsampling layer for 1D inputs. + + Repeats each temporal step `size` times along the time axis. + + Examples: + + >>> input_shape = (2, 2, 3) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> print(x) [[[ 0 1 2] - [ 0 1 2] - [ 3 4 5] [ 3 4 5]] [[ 6 7 8] - [ 6 7 8] - [ 9 10 11] - [ 9 10 11]]], shape=(2, 4, 3), dtype=int64) - - Args: - size: Integer. Upsampling factor. - - Input shape: - 3D tensor with shape: `(batch_size, steps, features)`. - - Output shape: - 3D tensor with shape: `(batch_size, upsampled_steps, features)`. - """ - - def __init__(self, size=2, **kwargs): - super().__init__(**kwargs) - self.size = int(size) - self.input_spec = InputSpec(ndim=3) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - size = self.size * input_shape[1] if input_shape[1] is not None else None - return tf.TensorShape([input_shape[0], size, input_shape[2]]) - - def call(self, inputs): - output = backend.repeat_elements(inputs, self.size, axis=1) - return output - - def get_config(self): - config = {'size': self.size} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + [ 9 10 11]]] + >>> y = tf.keras.layers.UpSampling1D(size=2)(x) + >>> print(y) + tf.Tensor( + [[[ 0 1 2] + [ 0 1 2] + [ 3 4 5] + [ 3 4 5]] + [[ 6 7 8] + [ 6 7 8] + [ 9 10 11] + [ 9 10 11]]], shape=(2, 4, 3), dtype=int64) + + Args: + size: Integer. Upsampling factor. + + Input shape: + 3D tensor with shape: `(batch_size, steps, features)`. + + Output shape: + 3D tensor with shape: `(batch_size, upsampled_steps, features)`. + """ + + def __init__(self, size=2, **kwargs): + super().__init__(**kwargs) + self.size = int(size) + self.input_spec = InputSpec(ndim=3) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + size = ( + self.size * input_shape[1] if input_shape[1] is not None else None + ) + return tf.TensorShape([input_shape[0], size, input_shape[2]]) + + def call(self, inputs): + output = backend.repeat_elements(inputs, self.size, axis=1) + return output + + def get_config(self): + config = {"size": self.size} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/up_sampling2d.py b/keras/layers/reshaping/up_sampling2d.py index cf2513092931..ccfd2a6cff0f 100644 --- a/keras/layers/reshaping/up_sampling2d.py +++ b/keras/layers/reshaping/up_sampling2d.py @@ -13,128 +13,135 @@ # limitations under the License. # ============================================================================== """Keras upsampling layer for 2D inputs.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +from keras.utils import image_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.UpSampling2D') +@keras_export("keras.layers.UpSampling2D") class UpSampling2D(Layer): - """Upsampling layer for 2D inputs. - - Repeats the rows and columns of the data - by `size[0]` and `size[1]` respectively. - - Examples: - - >>> input_shape = (2, 2, 1, 3) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> print(x) - [[[[ 0 1 2]] - [[ 3 4 5]]] - [[[ 6 7 8]] - [[ 9 10 11]]]] - >>> y = tf.keras.layers.UpSampling2D(size=(1, 2))(x) - >>> print(y) - tf.Tensor( - [[[[ 0 1 2] - [ 0 1 2]] - [[ 3 4 5] - [ 3 4 5]]] - [[[ 6 7 8] - [ 6 7 8]] - [[ 9 10 11] - [ 9 10 11]]]], shape=(2, 2, 2, 3), dtype=int64) - - Args: - size: Int, or tuple of 2 integers. - The upsampling factors for rows and columns. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch_size, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - interpolation: A string, one of `"area"`, `"bicubic"`, `"bilinear"`, - `"gaussian"`, `"lanczos3"`, `"lanczos5"`, `"mitchellcubic"`, `"nearest"`. - - Input shape: - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, rows, cols)` - - Output shape: - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, upsampled_rows, upsampled_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, upsampled_rows, upsampled_cols)` - """ - - def __init__(self, - size=(2, 2), - data_format=None, - interpolation='nearest', - **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - self.size = conv_utils.normalize_tuple(size, 2, 'size') - interpolations = { - 'area': tf.image.ResizeMethod.AREA, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'gaussian': tf.image.ResizeMethod.GAUSSIAN, - 'lanczos3': tf.image.ResizeMethod.LANCZOS3, - 'lanczos5': tf.image.ResizeMethod.LANCZOS5, - 'mitchellcubic': tf.image.ResizeMethod.MITCHELLCUBIC, - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - } - interploations_list = '"' + '", "'.join(interpolations.keys()) + '"' - if interpolation not in interpolations: - raise ValueError('`interpolation` argument should be one of: ' - f'{interploations_list}. Received: "{interpolation}".') - self.interpolation = interpolation - self.input_spec = InputSpec(ndim=4) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - height = self.size[0] * input_shape[ - 2] if input_shape[2] is not None else None - width = self.size[1] * input_shape[ - 3] if input_shape[3] is not None else None - return tf.TensorShape( - [input_shape[0], input_shape[1], height, width]) - else: - height = self.size[0] * input_shape[ - 1] if input_shape[1] is not None else None - width = self.size[1] * input_shape[ - 2] if input_shape[2] is not None else None - return tf.TensorShape( - [input_shape[0], height, width, input_shape[3]]) - - def call(self, inputs): - return backend.resize_images( - inputs, self.size[0], self.size[1], self.data_format, - interpolation=self.interpolation) - - def get_config(self): - config = { - 'size': self.size, - 'data_format': self.data_format, - 'interpolation': self.interpolation - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Upsampling layer for 2D inputs. + + Repeats the rows and columns of the data + by `size[0]` and `size[1]` respectively. + + Examples: + + >>> input_shape = (2, 2, 1, 3) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> print(x) + [[[[ 0 1 2]] + [[ 3 4 5]]] + [[[ 6 7 8]] + [[ 9 10 11]]]] + >>> y = tf.keras.layers.UpSampling2D(size=(1, 2))(x) + >>> print(y) + tf.Tensor( + [[[[ 0 1 2] + [ 0 1 2]] + [[ 3 4 5] + [ 3 4 5]]] + [[[ 6 7 8] + [ 6 7 8]] + [[ 9 10 11] + [ 9 10 11]]]], shape=(2, 2, 2, 3), dtype=int64) + + Args: + size: Int, or tuple of 2 integers. + The upsampling factors for rows and columns. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch_size, channels, height, width)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + interpolation: A string, one of `"area"`, `"bicubic"`, `"bilinear"`, + `"gaussian"`, `"lanczos3"`, `"lanczos5"`, `"mitchellcubic"`, + `"nearest"`. + + Input shape: + 4D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, rows, cols, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, rows, cols)` + + Output shape: + 4D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, upsampled_rows, upsampled_cols, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, upsampled_rows, upsampled_cols)` + """ + + def __init__( + self, size=(2, 2), data_format=None, interpolation="nearest", **kwargs + ): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + self.size = conv_utils.normalize_tuple(size, 2, "size") + self.interpolation = image_utils.get_interpolation(interpolation) + self.input_spec = InputSpec(ndim=4) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + height = ( + self.size[0] * input_shape[2] + if input_shape[2] is not None + else None + ) + width = ( + self.size[1] * input_shape[3] + if input_shape[3] is not None + else None + ) + return tf.TensorShape( + [input_shape[0], input_shape[1], height, width] + ) + else: + height = ( + self.size[0] * input_shape[1] + if input_shape[1] is not None + else None + ) + width = ( + self.size[1] * input_shape[2] + if input_shape[2] is not None + else None + ) + return tf.TensorShape( + [input_shape[0], height, width, input_shape[3]] + ) + + def call(self, inputs): + return backend.resize_images( + inputs, + self.size[0], + self.size[1], + self.data_format, + interpolation=self.interpolation, + ) + + def get_config(self): + config = { + "size": self.size, + "data_format": self.data_format, + "interpolation": self.interpolation, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/up_sampling3d.py b/keras/layers/reshaping/up_sampling3d.py index 72ed748c2dd8..9482ea1b530c 100644 --- a/keras/layers/reshaping/up_sampling3d.py +++ b/keras/layers/reshaping/up_sampling3d.py @@ -13,93 +13,119 @@ # limitations under the License. # ============================================================================== """Keras upsampling layer for 3D inputs.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.UpSampling3D') +@keras_export("keras.layers.UpSampling3D") class UpSampling3D(Layer): - """Upsampling layer for 3D inputs. - - Repeats the 1st, 2nd and 3rd dimensions - of the data by `size[0]`, `size[1]` and `size[2]` respectively. - - Examples: - - >>> input_shape = (2, 1, 2, 1, 3) - >>> x = tf.constant(1, shape=input_shape) - >>> y = tf.keras.layers.UpSampling3D(size=2)(x) - >>> print(y.shape) - (2, 2, 4, 2, 3) - - Args: - size: Int, or tuple of 3 integers. - The upsampling factors for dim1, dim2 and dim3. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, dim1, dim2, dim3, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, dim1, dim2, dim3)` - - Output shape: - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, upsampled_dim1, upsampled_dim2, upsampled_dim3, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, upsampled_dim1, upsampled_dim2, upsampled_dim3)` - """ - - def __init__(self, size=(2, 2, 2), data_format=None, **kwargs): - self.data_format = conv_utils.normalize_data_format(data_format) - self.size = conv_utils.normalize_tuple(size, 3, 'size') - self.input_spec = InputSpec(ndim=5) - super().__init__(**kwargs) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - dim1 = self.size[0] * input_shape[ - 2] if input_shape[2] is not None else None - dim2 = self.size[1] * input_shape[ - 3] if input_shape[3] is not None else None - dim3 = self.size[2] * input_shape[ - 4] if input_shape[4] is not None else None - return tf.TensorShape( - [input_shape[0], input_shape[1], dim1, dim2, dim3]) - else: - dim1 = self.size[0] * input_shape[ - 1] if input_shape[1] is not None else None - dim2 = self.size[1] * input_shape[ - 2] if input_shape[2] is not None else None - dim3 = self.size[2] * input_shape[ - 3] if input_shape[3] is not None else None - return tf.TensorShape( - [input_shape[0], dim1, dim2, dim3, input_shape[4]]) - - def call(self, inputs): - return backend.resize_volumes( - inputs, self.size[0], self.size[1], self.size[2], self.data_format) - - def get_config(self): - config = {'size': self.size, 'data_format': self.data_format} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Upsampling layer for 3D inputs. + + Repeats the 1st, 2nd and 3rd dimensions + of the data by `size[0]`, `size[1]` and `size[2]` respectively. + + Examples: + + >>> input_shape = (2, 1, 2, 1, 3) + >>> x = tf.constant(1, shape=input_shape) + >>> y = tf.keras.layers.UpSampling3D(size=2)(x) + >>> print(y.shape) + (2, 2, 4, 2, 3) + + Args: + size: Int, or tuple of 3 integers. + The upsampling factors for dim1, dim2 and dim3. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + 5D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, dim1, dim2, dim3, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, dim1, dim2, dim3)` + + Output shape: + 5D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, upsampled_dim1, upsampled_dim2, upsampled_dim3, + channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, upsampled_dim1, upsampled_dim2, + upsampled_dim3)` + """ + + def __init__(self, size=(2, 2, 2), data_format=None, **kwargs): + self.data_format = conv_utils.normalize_data_format(data_format) + self.size = conv_utils.normalize_tuple(size, 3, "size") + self.input_spec = InputSpec(ndim=5) + super().__init__(**kwargs) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + dim1 = ( + self.size[0] * input_shape[2] + if input_shape[2] is not None + else None + ) + dim2 = ( + self.size[1] * input_shape[3] + if input_shape[3] is not None + else None + ) + dim3 = ( + self.size[2] * input_shape[4] + if input_shape[4] is not None + else None + ) + return tf.TensorShape( + [input_shape[0], input_shape[1], dim1, dim2, dim3] + ) + else: + dim1 = ( + self.size[0] * input_shape[1] + if input_shape[1] is not None + else None + ) + dim2 = ( + self.size[1] * input_shape[2] + if input_shape[2] is not None + else None + ) + dim3 = ( + self.size[2] * input_shape[3] + if input_shape[3] is not None + else None + ) + return tf.TensorShape( + [input_shape[0], dim1, dim2, dim3, input_shape[4]] + ) + + def call(self, inputs): + return backend.resize_volumes( + inputs, self.size[0], self.size[1], self.size[2], self.data_format + ) + + def get_config(self): + config = {"size": self.size, "data_format": self.data_format} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/up_sampling_test.py b/keras/layers/reshaping/up_sampling_test.py index 2716f902c252..70ed79e6328e 100644 --- a/keras/layers/reshaping/up_sampling_test.py +++ b/keras/layers/reshaping/up_sampling_test.py @@ -15,164 +15,244 @@ """Tests for up-sampling layers.""" +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) -@tf_test_utils.for_all_test_methods(tf_test_utils.disable_xla, - 'align_corners=False not supported by XLA') +@tf_test_utils.for_all_test_methods( + tf_test_utils.disable_xla, "align_corners=False not supported by XLA" +) @test_combinations.run_all_keras_modes class UpSamplingTest(test_combinations.TestCase): + def test_upsampling_1d(self): + with self.cached_session(): + test_utils.layer_test( + keras.layers.UpSampling1D, + kwargs={"size": 2}, + input_shape=(3, 5, 4), + ) + + def test_upsampling_2d(self): + num_samples = 2 + stack_size = 2 + input_num_row = 11 + input_num_col = 12 + + for data_format in ["channels_first", "channels_last"]: + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, stack_size, input_num_row, input_num_col + ) + else: + inputs = np.random.rand( + num_samples, input_num_row, input_num_col, stack_size + ) + + # basic test + with self.cached_session(): + test_utils.layer_test( + keras.layers.UpSampling2D, + kwargs={"size": (2, 2), "data_format": data_format}, + input_shape=inputs.shape, + ) + + for length_row in [2]: + for length_col in [2, 3]: + layer = keras.layers.UpSampling2D( + size=(length_row, length_col), + data_format=data_format, + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == "channels_first": + assert ( + np_output.shape[2] == length_row * input_num_row + ) + assert ( + np_output.shape[3] == length_col * input_num_col + ) + else: # tf + assert ( + np_output.shape[1] == length_row * input_num_row + ) + assert ( + np_output.shape[2] == length_col * input_num_col + ) + + # compare with numpy + if data_format == "channels_first": + expected_out = np.repeat(inputs, length_row, axis=2) + expected_out = np.repeat( + expected_out, length_col, axis=3 + ) + else: # tf + expected_out = np.repeat(inputs, length_row, axis=1) + expected_out = np.repeat( + expected_out, length_col, axis=2 + ) - def test_upsampling_1d(self): - with self.cached_session(): - test_utils.layer_test( - keras.layers.UpSampling1D, kwargs={'size': 2}, input_shape=(3, 5, 4)) - - def test_upsampling_2d(self): - num_samples = 2 - stack_size = 2 - input_num_row = 11 - input_num_col = 12 - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_num_row, - input_num_col) - else: - inputs = np.random.rand(num_samples, input_num_row, input_num_col, - stack_size) - - # basic test - with self.cached_session(): - test_utils.layer_test( - keras.layers.UpSampling2D, - kwargs={'size': (2, 2), - 'data_format': data_format}, - input_shape=inputs.shape) - - for length_row in [2]: - for length_col in [2, 3]: - layer = keras.layers.UpSampling2D( - size=(length_row, length_col), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() + np.testing.assert_allclose(np_output, expected_out) + + def test_upsampling_2d_bilinear(self): + num_samples = 2 + stack_size = 2 + input_num_row = 11 + input_num_col = 12 + for data_format in ["channels_first", "channels_last"]: + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, stack_size, input_num_row, input_num_col + ) else: - np_output = keras.backend.eval(output) - if data_format == 'channels_first': - assert np_output.shape[2] == length_row * input_num_row - assert np_output.shape[3] == length_col * input_num_col - else: # tf - assert np_output.shape[1] == length_row * input_num_row - assert np_output.shape[2] == length_col * input_num_col - - # compare with numpy - if data_format == 'channels_first': - expected_out = np.repeat(inputs, length_row, axis=2) - expected_out = np.repeat(expected_out, length_col, axis=3) - else: # tf - expected_out = np.repeat(inputs, length_row, axis=1) - expected_out = np.repeat(expected_out, length_col, axis=2) - - np.testing.assert_allclose(np_output, expected_out) - - def test_upsampling_2d_bilinear(self): - num_samples = 2 - stack_size = 2 - input_num_row = 11 - input_num_col = 12 - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_num_row, - input_num_col) - else: - inputs = np.random.rand(num_samples, input_num_row, input_num_col, - stack_size) - - test_utils.layer_test(keras.layers.UpSampling2D, - kwargs={'size': (2, 2), - 'data_format': data_format, - 'interpolation': 'bilinear'}, - input_shape=inputs.shape) - - if not tf.executing_eagerly(): - for length_row in [2]: - for length_col in [2, 3]: - layer = keras.layers.UpSampling2D( - size=(length_row, length_col), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(keras.backend.variable(inputs)) - np_output = keras.backend.eval(outputs) - if data_format == 'channels_first': - self.assertEqual(np_output.shape[2], length_row * input_num_row) - self.assertEqual(np_output.shape[3], length_col * input_num_col) + inputs = np.random.rand( + num_samples, input_num_row, input_num_col, stack_size + ) + + test_utils.layer_test( + keras.layers.UpSampling2D, + kwargs={ + "size": (2, 2), + "data_format": data_format, + "interpolation": "bilinear", + }, + input_shape=inputs.shape, + ) + + if not tf.executing_eagerly(): + for length_row in [2]: + for length_col in [2, 3]: + layer = keras.layers.UpSampling2D( + size=(length_row, length_col), + data_format=data_format, + ) + layer.build(inputs.shape) + outputs = layer(keras.backend.variable(inputs)) + np_output = keras.backend.eval(outputs) + if data_format == "channels_first": + self.assertEqual( + np_output.shape[2], length_row * input_num_row + ) + self.assertEqual( + np_output.shape[3], length_col * input_num_col + ) + else: + self.assertEqual( + np_output.shape[1], length_row * input_num_row + ) + self.assertEqual( + np_output.shape[2], length_col * input_num_col + ) + + def test_upsampling_3d(self): + num_samples = 2 + stack_size = 2 + input_len_dim1 = 10 + input_len_dim2 = 11 + input_len_dim3 = 12 + + for data_format in ["channels_first", "channels_last"]: + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, + stack_size, + input_len_dim1, + input_len_dim2, + input_len_dim3, + ) else: - self.assertEqual(np_output.shape[1], length_row * input_num_row) - self.assertEqual(np_output.shape[2], length_col * input_num_col) - - def test_upsampling_3d(self): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 10 - input_len_dim2 = 11 - input_len_dim3 = 12 - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_len_dim1, - input_len_dim2, input_len_dim3) - else: - inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, - input_len_dim3, stack_size) - - # basic test - with self.cached_session(): - test_utils.layer_test( - keras.layers.UpSampling3D, - kwargs={'size': (2, 2, 2), - 'data_format': data_format}, - input_shape=inputs.shape) - - for length_dim1 in [2, 3]: - for length_dim2 in [2]: - for length_dim3 in [3]: - layer = keras.layers.UpSampling3D( - size=(length_dim1, length_dim2, length_dim3), - data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_first': - assert np_output.shape[2] == length_dim1 * input_len_dim1 - assert np_output.shape[3] == length_dim2 * input_len_dim2 - assert np_output.shape[4] == length_dim3 * input_len_dim3 - else: # tf - assert np_output.shape[1] == length_dim1 * input_len_dim1 - assert np_output.shape[2] == length_dim2 * input_len_dim2 - assert np_output.shape[3] == length_dim3 * input_len_dim3 - - # compare with numpy - if data_format == 'channels_first': - expected_out = np.repeat(inputs, length_dim1, axis=2) - expected_out = np.repeat(expected_out, length_dim2, axis=3) - expected_out = np.repeat(expected_out, length_dim3, axis=4) - else: # tf - expected_out = np.repeat(inputs, length_dim1, axis=1) - expected_out = np.repeat(expected_out, length_dim2, axis=2) - expected_out = np.repeat(expected_out, length_dim3, axis=3) - - np.testing.assert_allclose(np_output, expected_out) - -if __name__ == '__main__': - tf.test.main() + inputs = np.random.rand( + num_samples, + input_len_dim1, + input_len_dim2, + input_len_dim3, + stack_size, + ) + + # basic test + with self.cached_session(): + test_utils.layer_test( + keras.layers.UpSampling3D, + kwargs={"size": (2, 2, 2), "data_format": data_format}, + input_shape=inputs.shape, + ) + + for length_dim1 in [2, 3]: + for length_dim2 in [2]: + for length_dim3 in [3]: + layer = keras.layers.UpSampling3D( + size=(length_dim1, length_dim2, length_dim3), + data_format=data_format, + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == "channels_first": + assert ( + np_output.shape[2] + == length_dim1 * input_len_dim1 + ) + assert ( + np_output.shape[3] + == length_dim2 * input_len_dim2 + ) + assert ( + np_output.shape[4] + == length_dim3 * input_len_dim3 + ) + else: # tf + assert ( + np_output.shape[1] + == length_dim1 * input_len_dim1 + ) + assert ( + np_output.shape[2] + == length_dim2 * input_len_dim2 + ) + assert ( + np_output.shape[3] + == length_dim3 * input_len_dim3 + ) + + # compare with numpy + if data_format == "channels_first": + expected_out = np.repeat( + inputs, length_dim1, axis=2 + ) + expected_out = np.repeat( + expected_out, length_dim2, axis=3 + ) + expected_out = np.repeat( + expected_out, length_dim3, axis=4 + ) + else: # tf + expected_out = np.repeat( + inputs, length_dim1, axis=1 + ) + expected_out = np.repeat( + expected_out, length_dim2, axis=2 + ) + expected_out = np.repeat( + expected_out, length_dim3, axis=3 + ) + + np.testing.assert_allclose(np_output, expected_out) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/reshaping/zero_padding1d.py b/keras/layers/reshaping/zero_padding1d.py index 68d11d994661..591e5d92172d 100644 --- a/keras/layers/reshaping/zero_padding1d.py +++ b/keras/layers/reshaping/zero_padding1d.py @@ -13,79 +13,82 @@ # limitations under the License. # ============================================================================== """Keras zero-padding layer for 1D input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ZeroPadding1D') +@keras_export("keras.layers.ZeroPadding1D") class ZeroPadding1D(Layer): - """Zero-padding layer for 1D input (e.g. temporal sequence). + """Zero-padding layer for 1D input (e.g. temporal sequence). - Examples: + Examples: - >>> input_shape = (2, 2, 3) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> print(x) - [[[ 0 1 2] - [ 3 4 5]] - [[ 6 7 8] - [ 9 10 11]]] - >>> y = tf.keras.layers.ZeroPadding1D(padding=2)(x) - >>> print(y) - tf.Tensor( - [[[ 0 0 0] - [ 0 0 0] - [ 0 1 2] - [ 3 4 5] - [ 0 0 0] - [ 0 0 0]] - [[ 0 0 0] - [ 0 0 0] - [ 6 7 8] - [ 9 10 11] - [ 0 0 0] - [ 0 0 0]]], shape=(2, 6, 3), dtype=int64) + >>> input_shape = (2, 2, 3) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> print(x) + [[[ 0 1 2] + [ 3 4 5]] + [[ 6 7 8] + [ 9 10 11]]] + >>> y = tf.keras.layers.ZeroPadding1D(padding=2)(x) + >>> print(y) + tf.Tensor( + [[[ 0 0 0] + [ 0 0 0] + [ 0 1 2] + [ 3 4 5] + [ 0 0 0] + [ 0 0 0]] + [[ 0 0 0] + [ 0 0 0] + [ 6 7 8] + [ 9 10 11] + [ 0 0 0] + [ 0 0 0]]], shape=(2, 6, 3), dtype=int64) - Args: - padding: Int, or tuple of int (length 2), or dictionary. - - If int: - How many zeros to add at the beginning and end of - the padding dimension (axis 1). - - If tuple of int (length 2): - How many zeros to add at the beginning and the end of - the padding dimension (`(left_pad, right_pad)`). + Args: + padding: Int, or tuple of int (length 2). + - If int: + How many zeros to add at the beginning and end of + the padding dimension (axis 1). + - If tuple of int (length 2): + How many zeros to add at the beginning and the end of + the padding dimension (`(left_pad, right_pad)`). - Input shape: - 3D tensor with shape `(batch_size, axis_to_pad, features)` + Input shape: + 3D tensor with shape `(batch_size, axis_to_pad, features)` - Output shape: - 3D tensor with shape `(batch_size, padded_axis, features)` - """ + Output shape: + 3D tensor with shape `(batch_size, padded_axis, features)` + """ - def __init__(self, padding=1, **kwargs): - super().__init__(**kwargs) - self.padding = conv_utils.normalize_tuple( - padding, 2, 'padding', allow_zero=True) - self.input_spec = InputSpec(ndim=3) + def __init__(self, padding=1, **kwargs): + super().__init__(**kwargs) + self.padding = conv_utils.normalize_tuple( + padding, 2, "padding", allow_zero=True + ) + self.input_spec = InputSpec(ndim=3) - def compute_output_shape(self, input_shape): - if input_shape[1] is not None: - length = input_shape[1] + self.padding[0] + self.padding[1] - else: - length = None - return tf.TensorShape([input_shape[0], length, input_shape[2]]) + def compute_output_shape(self, input_shape): + if input_shape[1] is not None: + length = input_shape[1] + self.padding[0] + self.padding[1] + else: + length = None + return tf.TensorShape([input_shape[0], length, input_shape[2]]) - def call(self, inputs): - return backend.temporal_padding(inputs, padding=self.padding) + def call(self, inputs): + return backend.temporal_padding(inputs, padding=self.padding) - def get_config(self): - config = {'padding': self.padding} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"padding": self.padding} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/zero_padding2d.py b/keras/layers/reshaping/zero_padding2d.py index 7b5584f0afc4..a4e4c3e6fb57 100644 --- a/keras/layers/reshaping/zero_padding2d.py +++ b/keras/layers/reshaping/zero_padding2d.py @@ -13,137 +13,144 @@ # limitations under the License. # ============================================================================== """Keras zero-padding layer for 2D input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ZeroPadding2D') +@keras_export("keras.layers.ZeroPadding2D") class ZeroPadding2D(Layer): - """Zero-padding layer for 2D input (e.g. picture). - - This layer can add rows and columns of zeros - at the top, bottom, left and right side of an image tensor. - - Examples: - - >>> input_shape = (1, 1, 2, 2) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> print(x) - [[[[0 1] - [2 3]]]] - >>> y = tf.keras.layers.ZeroPadding2D(padding=1)(x) - >>> print(y) - tf.Tensor( - [[[[0 0] - [0 0] - [0 0] - [0 0]] - [[0 0] - [0 1] - [2 3] - [0 0]] - [[0 0] - [0 0] - [0 0] - [0 0]]]], shape=(1, 3, 4, 2), dtype=int64) - - Args: - padding: Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - - If int: the same symmetric padding - is applied to height and width. - - If tuple of 2 ints: - interpreted as two different - symmetric padding values for height and width: - `(symmetric_height_pad, symmetric_width_pad)`. - - If tuple of 2 tuples of 2 ints: - interpreted as - `((top_pad, bottom_pad), (left_pad, right_pad))` - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch_size, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - Input shape: - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, rows, cols)` - - Output shape: - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, padded_rows, padded_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch_size, channels, padded_rows, padded_cols)` - """ - - def __init__(self, padding=(1, 1), data_format=None, **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - if isinstance(padding, int): - self.padding = ((padding, padding), (padding, padding)) - elif hasattr(padding, '__len__'): - if len(padding) != 2: - raise ValueError('`padding` should have two elements. ' - f'Received: {padding}.') - height_padding = conv_utils.normalize_tuple( - padding[0], 2, '1st entry of padding', allow_zero=True) - width_padding = conv_utils.normalize_tuple( - padding[1], 2, '2nd entry of padding', allow_zero=True) - self.padding = (height_padding, width_padding) - else: - raise ValueError('`padding` should be either an int, ' - 'a tuple of 2 ints ' - '(symmetric_height_pad, symmetric_width_pad), ' - 'or a tuple of 2 tuples of 2 ints ' - '((top_pad, bottom_pad), (left_pad, right_pad)). ' - f'Received: {padding}.') - self.input_spec = InputSpec(ndim=4) - - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - if input_shape[2] is not None: - rows = input_shape[2] + self.padding[0][0] + self.padding[0][1] - else: - rows = None - if input_shape[3] is not None: - cols = input_shape[3] + self.padding[1][0] + self.padding[1][1] - else: - cols = None - return tf.TensorShape( - [input_shape[0], input_shape[1], rows, cols]) - elif self.data_format == 'channels_last': - if input_shape[1] is not None: - rows = input_shape[1] + self.padding[0][0] + self.padding[0][1] - else: - rows = None - if input_shape[2] is not None: - cols = input_shape[2] + self.padding[1][0] + self.padding[1][1] - else: - cols = None - return tf.TensorShape( - [input_shape[0], rows, cols, input_shape[3]]) - - def call(self, inputs): - return backend.spatial_2d_padding( - inputs, padding=self.padding, data_format=self.data_format) - - def get_config(self): - config = {'padding': self.padding, 'data_format': self.data_format} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Zero-padding layer for 2D input (e.g. picture). + + This layer can add rows and columns of zeros + at the top, bottom, left and right side of an image tensor. + + Examples: + + >>> input_shape = (1, 1, 2, 2) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> print(x) + [[[[0 1] + [2 3]]]] + >>> y = tf.keras.layers.ZeroPadding2D(padding=1)(x) + >>> print(y) + tf.Tensor( + [[[[0 0] + [0 0] + [0 0] + [0 0]] + [[0 0] + [0 1] + [2 3] + [0 0]] + [[0 0] + [0 0] + [0 0] + [0 0]]]], shape=(1, 3, 4, 2), dtype=int64) + + Args: + padding: Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. + - If int: the same symmetric padding + is applied to height and width. + - If tuple of 2 ints: + interpreted as two different + symmetric padding values for height and width: + `(symmetric_height_pad, symmetric_width_pad)`. + - If tuple of 2 tuples of 2 ints: + interpreted as + `((top_pad, bottom_pad), (left_pad, right_pad))` + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch_size, channels, height, width)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + + Input shape: + 4D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, rows, cols, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, rows, cols)` + + Output shape: + 4D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, padded_rows, padded_cols, channels)` + - If `data_format` is `"channels_first"`: + `(batch_size, channels, padded_rows, padded_cols)` + """ + + def __init__(self, padding=(1, 1), data_format=None, **kwargs): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + if isinstance(padding, int): + self.padding = ((padding, padding), (padding, padding)) + elif hasattr(padding, "__len__"): + if len(padding) != 2: + raise ValueError( + f"`padding` should have two elements. Received: {padding}." + ) + height_padding = conv_utils.normalize_tuple( + padding[0], 2, "1st entry of padding", allow_zero=True + ) + width_padding = conv_utils.normalize_tuple( + padding[1], 2, "2nd entry of padding", allow_zero=True + ) + self.padding = (height_padding, width_padding) + else: + raise ValueError( + "`padding` should be either an int, " + "a tuple of 2 ints " + "(symmetric_height_pad, symmetric_width_pad), " + "or a tuple of 2 tuples of 2 ints " + "((top_pad, bottom_pad), (left_pad, right_pad)). " + f"Received: {padding}." + ) + self.input_spec = InputSpec(ndim=4) + + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + if input_shape[2] is not None: + rows = input_shape[2] + self.padding[0][0] + self.padding[0][1] + else: + rows = None + if input_shape[3] is not None: + cols = input_shape[3] + self.padding[1][0] + self.padding[1][1] + else: + cols = None + return tf.TensorShape([input_shape[0], input_shape[1], rows, cols]) + elif self.data_format == "channels_last": + if input_shape[1] is not None: + rows = input_shape[1] + self.padding[0][0] + self.padding[0][1] + else: + rows = None + if input_shape[2] is not None: + cols = input_shape[2] + self.padding[1][0] + self.padding[1][1] + else: + cols = None + return tf.TensorShape([input_shape[0], rows, cols, input_shape[3]]) + + def call(self, inputs): + return backend.spatial_2d_padding( + inputs, padding=self.padding, data_format=self.data_format + ) + + def get_config(self): + config = {"padding": self.padding, "data_format": self.data_format} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/zero_padding3d.py b/keras/layers/reshaping/zero_padding3d.py index 214bf6355593..147118afd52e 100644 --- a/keras/layers/reshaping/zero_padding3d.py +++ b/keras/layers/reshaping/zero_padding3d.py @@ -13,138 +13,152 @@ # limitations under the License. # ============================================================================== """Keras zero-padding layer for 3D input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.utils import conv_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ZeroPadding3D') +@keras_export("keras.layers.ZeroPadding3D") class ZeroPadding3D(Layer): - """Zero-padding layer for 3D data (spatial or spatio-temporal). + """Zero-padding layer for 3D data (spatial or spatio-temporal). - Examples: + Examples: - >>> input_shape = (1, 1, 2, 2, 3) - >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) - >>> y = tf.keras.layers.ZeroPadding3D(padding=2)(x) - >>> print(y.shape) - (1, 5, 6, 6, 3) + >>> input_shape = (1, 1, 2, 2, 3) + >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + >>> y = tf.keras.layers.ZeroPadding3D(padding=2)(x) + >>> print(y.shape) + (1, 5, 6, 6, 3) - Args: - padding: Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. - - If int: the same symmetric padding - is applied to height and width. - - If tuple of 3 ints: - interpreted as two different - symmetric padding values for height and width: - `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. - - If tuple of 3 tuples of 2 ints: - interpreted as - `((left_dim1_pad, right_dim1_pad), (left_dim2_pad, - right_dim2_pad), (left_dim3_pad, right_dim3_pad))` - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". + Args: + padding: Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. + - If int: the same symmetric padding + is applied to height and width. + - If tuple of 3 ints: + interpreted as two different + symmetric padding values for height and width: + `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. + - If tuple of 3 tuples of 2 ints: + interpreted as + `((left_dim1_pad, right_dim1_pad), (left_dim2_pad, + right_dim2_pad), (left_dim3_pad, right_dim3_pad))` + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `channels_first` corresponds to inputs with shape + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. - Input shape: - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad, - depth)` - - If `data_format` is `"channels_first"`: - `(batch_size, depth, first_axis_to_pad, second_axis_to_pad, + Input shape: + 5D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, first_axis_to_pad, second_axis_to_pad, + third_axis_to_pad, depth)` + - If `data_format` is `"channels_first"`: + `(batch_size, depth, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad)` - Output shape: - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch_size, first_padded_axis, second_padded_axis, third_axis_to_pad, - depth)` - - If `data_format` is `"channels_first"`: - `(batch_size, depth, first_padded_axis, second_padded_axis, - third_axis_to_pad)` - """ + Output shape: + 5D tensor with shape: + - If `data_format` is `"channels_last"`: + `(batch_size, first_padded_axis, second_padded_axis, + third_axis_to_pad, depth)` + - If `data_format` is `"channels_first"`: + `(batch_size, depth, first_padded_axis, second_padded_axis, + third_axis_to_pad)` + """ - def __init__(self, padding=(1, 1, 1), data_format=None, **kwargs): - super().__init__(**kwargs) - self.data_format = conv_utils.normalize_data_format(data_format) - if isinstance(padding, int): - self.padding = ((padding, padding), (padding, padding), (padding, - padding)) - elif hasattr(padding, '__len__'): - if len(padding) != 3: - raise ValueError('`padding` should have 3 elements. ' - f'Received: {padding}.') - dim1_padding = conv_utils.normalize_tuple( - padding[0], 2, '1st entry of padding', allow_zero=True) - dim2_padding = conv_utils.normalize_tuple( - padding[1], 2, '2nd entry of padding', allow_zero=True) - dim3_padding = conv_utils.normalize_tuple( - padding[2], 2, '3rd entry of padding', allow_zero=True) - self.padding = (dim1_padding, dim2_padding, dim3_padding) - else: - raise ValueError( - '`padding` should be either an int, ' - 'a tuple of 3 ints ' - '(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad), ' - 'or a tuple of 3 tuples of 2 ints ' - '((left_dim1_pad, right_dim1_pad),' - ' (left_dim2_pad, right_dim2_pad),' - ' (left_dim3_pad, right_dim2_pad)). ' - f'Received: {padding}.') - self.input_spec = InputSpec(ndim=5) + def __init__(self, padding=(1, 1, 1), data_format=None, **kwargs): + super().__init__(**kwargs) + self.data_format = conv_utils.normalize_data_format(data_format) + if isinstance(padding, int): + self.padding = ( + (padding, padding), + (padding, padding), + (padding, padding), + ) + elif hasattr(padding, "__len__"): + if len(padding) != 3: + raise ValueError( + f"`padding` should have 3 elements. Received: {padding}." + ) + dim1_padding = conv_utils.normalize_tuple( + padding[0], 2, "1st entry of padding", allow_zero=True + ) + dim2_padding = conv_utils.normalize_tuple( + padding[1], 2, "2nd entry of padding", allow_zero=True + ) + dim3_padding = conv_utils.normalize_tuple( + padding[2], 2, "3rd entry of padding", allow_zero=True + ) + self.padding = (dim1_padding, dim2_padding, dim3_padding) + else: + raise ValueError( + "`padding` should be either an int, " + "a tuple of 3 ints " + "(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad), " + "or a tuple of 3 tuples of 2 ints " + "((left_dim1_pad, right_dim1_pad)," + " (left_dim2_pad, right_dim2_pad)," + " (left_dim3_pad, right_dim2_pad)). " + f"Received: {padding}." + ) + self.input_spec = InputSpec(ndim=5) - def compute_output_shape(self, input_shape): - input_shape = tf.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - if input_shape[2] is not None: - dim1 = input_shape[2] + self.padding[0][0] + self.padding[0][1] - else: - dim1 = None - if input_shape[3] is not None: - dim2 = input_shape[3] + self.padding[1][0] + self.padding[1][1] - else: - dim2 = None - if input_shape[4] is not None: - dim3 = input_shape[4] + self.padding[2][0] + self.padding[2][1] - else: - dim3 = None - return tf.TensorShape( - [input_shape[0], input_shape[1], dim1, dim2, dim3]) - elif self.data_format == 'channels_last': - if input_shape[1] is not None: - dim1 = input_shape[1] + self.padding[0][0] + self.padding[0][1] - else: - dim1 = None - if input_shape[2] is not None: - dim2 = input_shape[2] + self.padding[1][0] + self.padding[1][1] - else: - dim2 = None - if input_shape[3] is not None: - dim3 = input_shape[3] + self.padding[2][0] + self.padding[2][1] - else: - dim3 = None - return tf.TensorShape( - [input_shape[0], dim1, dim2, dim3, input_shape[4]]) + def compute_output_shape(self, input_shape): + input_shape = tf.TensorShape(input_shape).as_list() + if self.data_format == "channels_first": + if input_shape[2] is not None: + dim1 = input_shape[2] + self.padding[0][0] + self.padding[0][1] + else: + dim1 = None + if input_shape[3] is not None: + dim2 = input_shape[3] + self.padding[1][0] + self.padding[1][1] + else: + dim2 = None + if input_shape[4] is not None: + dim3 = input_shape[4] + self.padding[2][0] + self.padding[2][1] + else: + dim3 = None + return tf.TensorShape( + [input_shape[0], input_shape[1], dim1, dim2, dim3] + ) + elif self.data_format == "channels_last": + if input_shape[1] is not None: + dim1 = input_shape[1] + self.padding[0][0] + self.padding[0][1] + else: + dim1 = None + if input_shape[2] is not None: + dim2 = input_shape[2] + self.padding[1][0] + self.padding[1][1] + else: + dim2 = None + if input_shape[3] is not None: + dim3 = input_shape[3] + self.padding[2][0] + self.padding[2][1] + else: + dim3 = None + return tf.TensorShape( + [input_shape[0], dim1, dim2, dim3, input_shape[4]] + ) - def call(self, inputs): - return backend.spatial_3d_padding( - inputs, padding=self.padding, data_format=self.data_format) + def call(self, inputs): + return backend.spatial_3d_padding( + inputs, padding=self.padding, data_format=self.data_format + ) - def get_config(self): - config = {'padding': self.padding, 'data_format': self.data_format} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"padding": self.padding, "data_format": self.data_format} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/reshaping/zero_padding_test.py b/keras/layers/reshaping/zero_padding_test.py index 0896cd01afa4..4e997658d791 100644 --- a/keras/layers/reshaping/zero_padding_test.py +++ b/keras/layers/reshaping/zero_padding_test.py @@ -14,246 +14,327 @@ # ============================================================================== """Tests for zero-padding layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class ZeroPaddingTest(test_combinations.TestCase): + def test_zero_padding_1d(self): + num_samples = 2 + input_dim = 2 + num_steps = 5 + shape = (num_samples, num_steps, input_dim) + inputs = np.ones(shape) - def test_zero_padding_1d(self): - num_samples = 2 - input_dim = 2 - num_steps = 5 - shape = (num_samples, num_steps, input_dim) - inputs = np.ones(shape) + with self.cached_session(): + # basic test + test_utils.layer_test( + keras.layers.ZeroPadding1D, + kwargs={"padding": 2}, + input_shape=inputs.shape, + ) + test_utils.layer_test( + keras.layers.ZeroPadding1D, + kwargs={"padding": (1, 2)}, + input_shape=inputs.shape, + ) - with self.cached_session(): - # basic test - test_utils.layer_test( - keras.layers.ZeroPadding1D, - kwargs={'padding': 2}, - input_shape=inputs.shape) - test_utils.layer_test( - keras.layers.ZeroPadding1D, - kwargs={'padding': (1, 2)}, - input_shape=inputs.shape) + # correctness test + layer = keras.layers.ZeroPadding1D(padding=2) + layer.build(shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose(np_output[:, offset, :], 0.0) + np.testing.assert_allclose(np_output[:, 2:-2, :], 1.0) - # correctness test - layer = keras.layers.ZeroPadding1D(padding=2) - layer.build(shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, offset, :], 0.) - np.testing.assert_allclose(np_output[:, 2:-2, :], 1.) + layer = keras.layers.ZeroPadding1D(padding=(1, 2)) + layer.build(shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + for left_offset in [0]: + np.testing.assert_allclose(np_output[:, left_offset, :], 0.0) + for right_offset in [-1, -2]: + np.testing.assert_allclose(np_output[:, right_offset, :], 0.0) + np.testing.assert_allclose(np_output[:, 1:-2, :], 1.0) + layer.get_config() - layer = keras.layers.ZeroPadding1D(padding=(1, 2)) - layer.build(shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - for left_offset in [0]: - np.testing.assert_allclose(np_output[:, left_offset, :], 0.) - for right_offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, right_offset, :], 0.) - np.testing.assert_allclose(np_output[:, 1:-2, :], 1.) - layer.get_config() + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.ZeroPadding1D(padding=(1, 1, 1)) + with self.assertRaises(ValueError): + keras.layers.ZeroPadding1D(padding=None) - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.ZeroPadding1D(padding=(1, 1, 1)) - with self.assertRaises(ValueError): - keras.layers.ZeroPadding1D(padding=None) + @parameterized.named_parameters( + ("channels_first", "channels_first"), ("channels_last", "channels_last") + ) + def test_zero_padding_2d(self, data_format): + num_samples = 2 + stack_size = 2 + input_num_row = 4 + input_num_col = 5 + if data_format == "channels_first": + inputs = np.ones( + (num_samples, stack_size, input_num_row, input_num_col) + ) + elif data_format == "channels_last": + inputs = np.ones( + (num_samples, input_num_row, input_num_col, stack_size) + ) - @parameterized.named_parameters(('channels_first', 'channels_first'), - ('channels_last', 'channels_last')) - def test_zero_padding_2d(self, data_format): - num_samples = 2 - stack_size = 2 - input_num_row = 4 - input_num_col = 5 - if data_format == 'channels_first': - inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col)) - elif data_format == 'channels_last': - inputs = np.ones((num_samples, input_num_row, input_num_col, stack_size)) + # basic test + with self.cached_session(): + test_utils.layer_test( + keras.layers.ZeroPadding2D, + kwargs={"padding": (2, 2), "data_format": data_format}, + input_shape=inputs.shape, + ) + test_utils.layer_test( + keras.layers.ZeroPadding2D, + kwargs={ + "padding": ((1, 2), (3, 4)), + "data_format": data_format, + }, + input_shape=inputs.shape, + ) - # basic test - with self.cached_session(): - test_utils.layer_test( - keras.layers.ZeroPadding2D, - kwargs={ - 'padding': (2, 2), - 'data_format': data_format - }, - input_shape=inputs.shape) - test_utils.layer_test( - keras.layers.ZeroPadding2D, - kwargs={ - 'padding': ((1, 2), (3, 4)), - 'data_format': data_format - }, - input_shape=inputs.shape) + # correctness test + with self.cached_session(): + layer = keras.layers.ZeroPadding2D( + padding=(2, 2), data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == "channels_last": + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose(np_output[:, offset, :, :], 0.0) + np.testing.assert_allclose(np_output[:, :, offset, :], 0.0) + np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.0) + elif data_format == "channels_first": + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose(np_output[:, :, offset, :], 0.0) + np.testing.assert_allclose(np_output[:, :, :, offset], 0.0) + np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.0) - # correctness test - with self.cached_session(): - layer = keras.layers.ZeroPadding2D( - padding=(2, 2), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_last': - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, offset, :, :], 0.) - np.testing.assert_allclose(np_output[:, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) - elif data_format == 'channels_first': - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, :, :, offset], 0.) - np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) + layer = keras.layers.ZeroPadding2D( + padding=((1, 2), (3, 4)), data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == "channels_last": + for top_offset in [0]: + np.testing.assert_allclose( + np_output[:, top_offset, :, :], 0.0 + ) + for bottom_offset in [-1, -2]: + np.testing.assert_allclose( + np_output[:, bottom_offset, :, :], 0.0 + ) + for left_offset in [0, 1, 2]: + np.testing.assert_allclose( + np_output[:, :, left_offset, :], 0.0 + ) + for right_offset in [-1, -2, -3, -4]: + np.testing.assert_allclose( + np_output[:, :, right_offset, :], 0.0 + ) + np.testing.assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.0) + elif data_format == "channels_first": + for top_offset in [0]: + np.testing.assert_allclose( + np_output[:, :, top_offset, :], 0.0 + ) + for bottom_offset in [-1, -2]: + np.testing.assert_allclose( + np_output[:, :, bottom_offset, :], 0.0 + ) + for left_offset in [0, 1, 2]: + np.testing.assert_allclose( + np_output[:, :, :, left_offset], 0.0 + ) + for right_offset in [-1, -2, -3, -4]: + np.testing.assert_allclose( + np_output[:, :, :, right_offset], 0.0 + ) + np.testing.assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.0) - layer = keras.layers.ZeroPadding2D( - padding=((1, 2), (3, 4)), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_last': - for top_offset in [0]: - np.testing.assert_allclose(np_output[:, top_offset, :, :], 0.) - for bottom_offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, bottom_offset, :, :], 0.) - for left_offset in [0, 1, 2]: - np.testing.assert_allclose(np_output[:, :, left_offset, :], 0.) - for right_offset in [-1, -2, -3, -4]: - np.testing.assert_allclose(np_output[:, :, right_offset, :], 0.) - np.testing.assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.) - elif data_format == 'channels_first': - for top_offset in [0]: - np.testing.assert_allclose(np_output[:, :, top_offset, :], 0.) - for bottom_offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, :, bottom_offset, :], 0.) - for left_offset in [0, 1, 2]: - np.testing.assert_allclose(np_output[:, :, :, left_offset], 0.) - for right_offset in [-1, -2, -3, -4]: - np.testing.assert_allclose(np_output[:, :, :, right_offset], 0.) - np.testing.assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.) + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.ZeroPadding2D(padding=(1, 1, 1)) + with self.assertRaises(ValueError): + keras.layers.ZeroPadding2D(padding=None) - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.ZeroPadding2D(padding=(1, 1, 1)) - with self.assertRaises(ValueError): - keras.layers.ZeroPadding2D(padding=None) + @parameterized.named_parameters( + ("channels_first", "channels_first"), ("channels_last", "channels_last") + ) + def test_zero_padding_3d(self, data_format): + num_samples = 2 + stack_size = 2 + input_len_dim1 = 4 + input_len_dim2 = 5 + input_len_dim3 = 3 - @parameterized.named_parameters(('channels_first', 'channels_first'), - ('channels_last', 'channels_last')) - def test_zero_padding_3d(self, data_format): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 4 - input_len_dim2 = 5 - input_len_dim3 = 3 + if data_format == "channels_first": + inputs = np.ones( + ( + num_samples, + stack_size, + input_len_dim1, + input_len_dim2, + input_len_dim3, + ) + ) + elif data_format == "channels_last": + inputs = np.ones( + ( + num_samples, + input_len_dim1, + input_len_dim2, + input_len_dim3, + stack_size, + ) + ) - if data_format == 'channels_first': - inputs = np.ones((num_samples, stack_size, input_len_dim1, input_len_dim2, - input_len_dim3)) - elif data_format == 'channels_last': - inputs = np.ones((num_samples, input_len_dim1, input_len_dim2, - input_len_dim3, stack_size)) + with self.cached_session(): + # basic test + test_utils.layer_test( + keras.layers.ZeroPadding3D, + kwargs={"padding": (2, 2, 2), "data_format": data_format}, + input_shape=inputs.shape, + ) + test_utils.layer_test( + keras.layers.ZeroPadding3D, + kwargs={ + "padding": ((1, 2), (3, 4), (0, 2)), + "data_format": data_format, + }, + input_shape=inputs.shape, + ) - with self.cached_session(): - # basic test - test_utils.layer_test( - keras.layers.ZeroPadding3D, - kwargs={ - 'padding': (2, 2, 2), - 'data_format': data_format - }, - input_shape=inputs.shape) - test_utils.layer_test( - keras.layers.ZeroPadding3D, - kwargs={ - 'padding': ((1, 2), (3, 4), (0, 2)), - 'data_format': data_format - }, - input_shape=inputs.shape) + with self.cached_session(): + # correctness test + layer = keras.layers.ZeroPadding3D( + padding=(2, 2, 2), data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == "channels_last": + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose( + np_output[:, offset, :, :, :], 0.0 + ) + np.testing.assert_allclose( + np_output[:, :, offset, :, :], 0.0 + ) + np.testing.assert_allclose( + np_output[:, :, :, offset, :], 0.0 + ) + np.testing.assert_allclose( + np_output[:, 2:-2, 2:-2, 2:-2, :], 1.0 + ) + elif data_format == "channels_first": + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose( + np_output[:, :, offset, :, :], 0.0 + ) + np.testing.assert_allclose( + np_output[:, :, :, offset, :], 0.0 + ) + np.testing.assert_allclose( + np_output[:, :, :, :, offset], 0.0 + ) + np.testing.assert_allclose( + np_output[:, :, 2:-2, 2:-2, 2:-2], 1.0 + ) - with self.cached_session(): - # correctness test - layer = keras.layers.ZeroPadding3D( - padding=(2, 2, 2), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_last': - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, offset, :, :, :], 0.) - np.testing.assert_allclose(np_output[:, :, offset, :, :], 0.) - np.testing.assert_allclose(np_output[:, :, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.) - elif data_format == 'channels_first': - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, :, offset, :, :], 0.) - np.testing.assert_allclose(np_output[:, :, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, :, :, :, offset], 0.) - np.testing.assert_allclose(np_output[:, :, 2:-2, 2:-2, 2:-2], 1.) + layer = keras.layers.ZeroPadding3D( + padding=((1, 2), (3, 4), (0, 2)), data_format=data_format + ) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if tf.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == "channels_last": + for offset in [0]: + np.testing.assert_allclose( + np_output[:, offset, :, :, :], 0.0 + ) + for offset in [-1, -2]: + np.testing.assert_allclose( + np_output[:, offset, :, :, :], 0.0 + ) + for offset in [0, 1, 2]: + np.testing.assert_allclose( + np_output[:, :, offset, :, :], 0.0 + ) + for offset in [-1, -2, -3, -4]: + np.testing.assert_allclose( + np_output[:, :, offset, :, :], 0.0 + ) + for offset in [-1, -2]: + np.testing.assert_allclose( + np_output[:, :, :, offset, :], 0.0 + ) + np.testing.assert_allclose( + np_output[:, 1:-2, 3:-4, 0:-2, :], 1.0 + ) + elif data_format == "channels_first": + for offset in [0]: + np.testing.assert_allclose( + np_output[:, :, offset, :, :], 0.0 + ) + for offset in [-1, -2]: + np.testing.assert_allclose( + np_output[:, :, offset, :, :], 0.0 + ) + for offset in [0, 1, 2]: + np.testing.assert_allclose( + np_output[:, :, :, offset, :], 0.0 + ) + for offset in [-1, -2, -3, -4]: + np.testing.assert_allclose( + np_output[:, :, :, offset, :], 0.0 + ) + for offset in [-1, -2]: + np.testing.assert_allclose( + np_output[:, :, :, :, offset], 0.0 + ) + np.testing.assert_allclose( + np_output[:, :, 1:-2, 3:-4, 0:-2], 1.0 + ) - layer = keras.layers.ZeroPadding3D( - padding=((1, 2), (3, 4), (0, 2)), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if tf.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_last': - for offset in [0]: - np.testing.assert_allclose(np_output[:, offset, :, :, :], 0.) - for offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, offset, :, :, :], 0.) - for offset in [0, 1, 2]: - np.testing.assert_allclose(np_output[:, :, offset, :, :], 0.) - for offset in [-1, -2, -3, -4]: - np.testing.assert_allclose(np_output[:, :, offset, :, :], 0.) - for offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, :, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, 1:-2, 3:-4, 0:-2, :], 1.) - elif data_format == 'channels_first': - for offset in [0]: - np.testing.assert_allclose(np_output[:, :, offset, :, :], 0.) - for offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, :, offset, :, :], 0.) - for offset in [0, 1, 2]: - np.testing.assert_allclose(np_output[:, :, :, offset, :], 0.) - for offset in [-1, -2, -3, -4]: - np.testing.assert_allclose(np_output[:, :, :, offset, :], 0.) - for offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, :, :, :, offset], 0.) - np.testing.assert_allclose(np_output[:, :, 1:-2, 3:-4, 0:-2], 1.) + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.ZeroPadding3D(padding=(1, 1)) + with self.assertRaises(ValueError): + keras.layers.ZeroPadding3D(padding=None) - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.ZeroPadding3D(padding=(1, 1)) - with self.assertRaises(ValueError): - keras.layers.ZeroPadding3D(padding=None) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/BUILD b/keras/layers/rnn/BUILD index ccbb9690a242..5b7ca0279f40 100644 --- a/keras/layers/rnn/BUILD +++ b/keras/layers/rnn/BUILD @@ -1,12 +1,14 @@ # Description: # Contains the Keras recurrent layers. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow_models/official/projects/residual_mobilenet/modeling/backbones:__pkg__", @@ -180,7 +182,7 @@ py_library( "//keras:backend", "//keras/engine:base_layer", "//keras/engine:input_spec", - "//keras/saving/saved_model", + "//keras/saving/legacy/saved_model", "//keras/utils:generic_utils", ], ) @@ -396,6 +398,9 @@ cuda_py_test( srcs = ["gru_lstm_test.py"], python_version = "PY3", shard_count = 2, + tags = [ + "no_oss", # TODO(b/277925387) + ], deps = [ ":gru", ":lstm", @@ -414,7 +419,9 @@ cuda_py_test( srcs = ["gru_test.py"], python_version = "PY3", shard_count = 12, - tags = ["no_rocm"], + tags = [ + "no_oss", # TODO(b/277925387) + ], deps = [ ":gru_lstm_utils", "//:expect_absl_installed", @@ -501,7 +508,6 @@ tf_py_test( python_version = "PY3", shard_count = 12, tags = [ - "no_rocm", "notsan", # TODO(b/170870794) ], deps = [ @@ -544,7 +550,6 @@ tf_py_test( srcs = ["conv_lstm_test.py"], python_version = "PY3", shard_count = 8, - tags = ["no_rocm"], deps = [ "//:expect_absl_installed", "//:expect_numpy_installed", @@ -562,6 +567,7 @@ cuda_py_test( python_version = "PY3", shard_count = 4, tags = [ + "no_oss", # TODO(b/277925387) "no_windows_gpu", ], deps = [ @@ -569,7 +575,7 @@ cuda_py_test( "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", ], diff --git a/keras/layers/rnn/__init__.py b/keras/layers/rnn/__init__.py index 3b6587d9edcd..a2438fc7d105 100644 --- a/keras/layers/rnn/__init__.py +++ b/keras/layers/rnn/__init__.py @@ -13,59 +13,61 @@ # limitations under the License. # ============================================================================== """Keras recurrent layers.""" -# pylint: disable=g-bad-import-order,g-direct-tensorflow-import,disable=g-import-not-at-top import tensorflow.compat.v2 as tf +from keras.layers.rnn.abstract_rnn_cell import AbstractRNNCell + # Recurrent layers. from keras.layers.rnn.base_rnn import RNN -from keras.layers.rnn.abstract_rnn_cell import AbstractRNNCell -from keras.layers.rnn.stacked_rnn_cells import StackedRNNCells -from keras.layers.rnn.simple_rnn import SimpleRNNCell from keras.layers.rnn.simple_rnn import SimpleRNN +from keras.layers.rnn.simple_rnn import SimpleRNNCell +from keras.layers.rnn.stacked_rnn_cells import StackedRNNCells if tf.__internal__.tf2.enabled(): - from keras.layers.rnn.gru import GRU - from keras.layers.rnn.gru import GRUCell - from keras.layers.rnn.lstm import LSTM - from keras.layers.rnn.lstm import LSTMCell - from keras.layers.rnn.gru_v1 import GRU as GRUV1 - from keras.layers.rnn.gru_v1 import GRUCell as GRUCellV1 - from keras.layers.rnn.lstm_v1 import LSTM as LSTMV1 - from keras.layers.rnn.lstm_v1 import LSTMCell as LSTMCellV1 - GRUV2 = GRU - GRUCellV2 = GRUCell - LSTMV2 = LSTM - LSTMCellV2 = LSTMCell + from keras.layers.rnn.gru import GRU + from keras.layers.rnn.gru import GRUCell + from keras.layers.rnn.gru_v1 import GRU as GRUV1 + from keras.layers.rnn.gru_v1 import GRUCell as GRUCellV1 + from keras.layers.rnn.lstm import LSTM + from keras.layers.rnn.lstm import LSTMCell + from keras.layers.rnn.lstm_v1 import LSTM as LSTMV1 + from keras.layers.rnn.lstm_v1 import LSTMCell as LSTMCellV1 + + GRUV2 = GRU + GRUCellV2 = GRUCell + LSTMV2 = LSTM + LSTMCellV2 = LSTMCell else: - from keras.layers.rnn.gru_v1 import GRU - from keras.layers.rnn.gru_v1 import GRUCell - from keras.layers.rnn.lstm_v1 import LSTM - from keras.layers.rnn.lstm_v1 import LSTMCell - from keras.layers.rnn.gru import GRU as GRUV2 - from keras.layers.rnn.gru import GRUCell as GRUCellV2 - from keras.layers.rnn.lstm import LSTM as LSTMV2 - from keras.layers.rnn.lstm import LSTMCell as LSTMCellV2 - GRUV1 = GRU - GRUCellV1 = GRUCell - LSTMV1 = LSTM - LSTMCellV1 = LSTMCell + from keras.layers.rnn.gru import GRU as GRUV2 + from keras.layers.rnn.gru import GRUCell as GRUCellV2 + from keras.layers.rnn.gru_v1 import GRU + from keras.layers.rnn.gru_v1 import GRUCell + from keras.layers.rnn.lstm import LSTM as LSTMV2 + from keras.layers.rnn.lstm import LSTMCell as LSTMCellV2 + from keras.layers.rnn.lstm_v1 import LSTM + from keras.layers.rnn.lstm_v1 import LSTMCell -# Convolutional-recurrent layers. -from keras.layers.rnn.conv_lstm1d import ConvLSTM1D -from keras.layers.rnn.conv_lstm2d import ConvLSTM2D -from keras.layers.rnn.conv_lstm3d import ConvLSTM3D - -# cuDNN recurrent layers. -from keras.layers.rnn.cudnn_lstm import CuDNNLSTM -from keras.layers.rnn.cudnn_gru import CuDNNGRU + GRUV1 = GRU + GRUCellV1 = GRUCell + LSTMV1 = LSTM + LSTMCellV1 = LSTMCell # Wrapper functions. from keras.layers.rnn.base_wrapper import Wrapper from keras.layers.rnn.bidirectional import Bidirectional -from keras.layers.rnn.time_distributed import TimeDistributed # RNN Cell wrappers. from keras.layers.rnn.cell_wrappers import DeviceWrapper from keras.layers.rnn.cell_wrappers import DropoutWrapper from keras.layers.rnn.cell_wrappers import ResidualWrapper + +# Convolutional-recurrent layers. +from keras.layers.rnn.conv_lstm1d import ConvLSTM1D +from keras.layers.rnn.conv_lstm2d import ConvLSTM2D +from keras.layers.rnn.conv_lstm3d import ConvLSTM3D +from keras.layers.rnn.cudnn_gru import CuDNNGRU + +# cuDNN recurrent layers. +from keras.layers.rnn.cudnn_lstm import CuDNNLSTM +from keras.layers.rnn.time_distributed import TimeDistributed diff --git a/keras/layers/rnn/abstract_rnn_cell.py b/keras/layers/rnn/abstract_rnn_cell.py index 0ae557fc40ec..d097947a21e5 100644 --- a/keras/layers/rnn/abstract_rnn_cell.py +++ b/keras/layers/rnn/abstract_rnn_cell.py @@ -13,101 +13,103 @@ # limitations under the License. # ============================================================================== """Base class for RNN cells.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras.engine import base_layer from keras.layers.rnn import rnn_utils +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.AbstractRNNCell') +@keras_export("keras.layers.AbstractRNNCell") class AbstractRNNCell(base_layer.Layer): - """Abstract object representing an RNN cell. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - This is the base class for implementing RNN cells with custom behavior. - - Every `RNNCell` must have the properties below and implement `call` with - the signature `(output, next_state) = call(input, state)`. - - Examples: - - ```python - class MinimalRNNCell(AbstractRNNCell): - - def __init__(self, units, **kwargs): - self.units = units - super(MinimalRNNCell, self).__init__(**kwargs) - - @property - def state_size(self): - return self.units - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = backend.dot(inputs, self.kernel) - output = h + backend.dot(prev_output, self.recurrent_kernel) - return output, output - ``` - - This definition of cell differs from the definition used in the literature. - In the literature, 'cell' refers to an object with a single scalar output. - This definition refers to a horizontal array of such units. - - An RNN cell, in the most abstract setting, is anything that has - a state and performs some operation that takes a matrix of inputs. - This operation results in an output matrix with `self.output_size` columns. - If `self.state_size` is an integer, this operation also results in a new - state matrix with `self.state_size` columns. If `self.state_size` is a - (possibly nested tuple of) TensorShape object(s), then it should return a - matching structure of Tensors having shape `[batch_size].concatenate(s)` - for each `s` in `self.batch_size`. - """ - - def call(self, inputs, states): - """The function that contains the logic for one RNN step calculation. - - Args: - inputs: the input tensor, which is a slide from the overall RNN input by - the time dimension (usually the second dimension). - states: the state tensor from previous step, which has the same shape - as `(batch, state_size)`. In the case of timestep 0, it will be the - initial state user specified, or zero filled tensor otherwise. - - Returns: - A tuple of two tensors: - 1. output tensor for the current timestep, with size `output_size`. - 2. state tensor for next step, which has the shape of `state_size`. + """Abstract object representing an RNN cell. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + This is the base class for implementing RNN cells with custom behavior. + + Every `RNNCell` must have the properties below and implement `call` with + the signature `(output, next_state) = call(input, state)`. + + Examples: + + ```python + class MinimalRNNCell(AbstractRNNCell): + + def __init__(self, units, **kwargs): + self.units = units + super(MinimalRNNCell, self).__init__(**kwargs) + + @property + def state_size(self): + return self.units + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = backend.dot(inputs, self.kernel) + output = h + backend.dot(prev_output, self.recurrent_kernel) + return output, output + ``` + + This definition of cell differs from the definition used in the literature. + In the literature, 'cell' refers to an object with a single scalar output. + This definition refers to a horizontal array of such units. + + An RNN cell, in the most abstract setting, is anything that has + a state and performs some operation that takes a matrix of inputs. + This operation results in an output matrix with `self.output_size` columns. + If `self.state_size` is an integer, this operation also results in a new + state matrix with `self.state_size` columns. If `self.state_size` is a + (possibly nested tuple of) TensorShape object(s), then it should return a + matching structure of Tensors having shape `[batch_size].concatenate(s)` + for each `s` in `self.batch_size`. """ - raise NotImplementedError - - @property - def state_size(self): - """size(s) of state(s) used by this cell. - - It can be represented by an Integer, a TensorShape or a tuple of Integers - or TensorShapes. - """ - raise NotImplementedError - - @property - def output_size(self): - """Integer or TensorShape: size of outputs produced by this cell.""" - raise NotImplementedError - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - return rnn_utils.generate_zero_filled_state_for_cell( - self, inputs, batch_size, dtype) + def call(self, inputs, states): + """The function that contains the logic for one RNN step calculation. + + Args: + inputs: the input tensor, which is a slide from the overall RNN input + by the time dimension (usually the second dimension). + states: the state tensor from previous step, which has the same shape + as `(batch, state_size)`. In the case of timestep 0, it will be the + initial state user specified, or zero filled tensor otherwise. + + Returns: + A tuple of two tensors: + 1. output tensor for the current timestep, with size `output_size`. + 2. state tensor for next step, which has the shape of `state_size`. + """ + raise NotImplementedError + + @property + def state_size(self): + """size(s) of state(s) used by this cell. + + It can be represented by an Integer, a TensorShape or a tuple of + Integers or TensorShapes. + """ + raise NotImplementedError + + @property + def output_size(self): + """Integer or TensorShape: size of outputs produced by this cell.""" + raise NotImplementedError + + def get_initial_state(self, inputs=None, batch_size=None, dtype=None): + return rnn_utils.generate_zero_filled_state_for_cell( + self, inputs, batch_size, dtype + ) diff --git a/keras/layers/rnn/base_conv_lstm.py b/keras/layers/rnn/base_conv_lstm.py index ef753cc94acb..b3280d5ac63b 100644 --- a/keras/layers/rnn/base_conv_lstm.py +++ b/keras/layers/rnn/base_conv_lstm.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Base class for N-D convolutional LSTM layers.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import activations from keras import backend @@ -24,577 +26,617 @@ from keras.layers.rnn.base_conv_rnn import ConvRNN from keras.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin from keras.utils import conv_utils -import tensorflow.compat.v2 as tf class ConvLSTMCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer): - """Cell class for the ConvLSTM layer. - - Args: - rank: Integer, rank of the convolution, e.g. "2" for 2D convolutions. - filters: Integer, the dimensionality of the output space (i.e. the number of - output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, specifying the strides of - the convolution. Specifying any stride value != 1 is incompatible with - specifying any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding evenly to the left/right or up/down - of the input such that output has the same height/width dimension as the - input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - It defaults to the `image_data_format` value found in your Keras config - file at `~/.keras/keras.json`. If you never set it, then it will be - "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying the - dilation rate to use for dilated convolution. Currently, specifying any - `dilation_rate` value != 1 is incompatible with specifying any `strides` - value != 1. - activation: Activation function to use. If you don't specify anything, no - activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use for the recurrent step. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at - initialization. Use in combination with `bias_initializer="zeros"`. This - is recommended in [Jozefowicz et al., 2015]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. - bias_constraint: Constraint function applied to the bias vector. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. - Call arguments: - inputs: A (2+ `rank`)D tensor. - states: List of state tensors corresponding to the previous timestep. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. Only relevant when `dropout` or - `recurrent_dropout` is used. - """ - - def __init__(self, - rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0.0, - recurrent_dropout=0.0, - **kwargs): - super().__init__(**kwargs) - self.rank = rank - if self.rank > 3: - raise ValueError(f'Rank {rank} convolutions are not currently ' - f'implemented. Received: rank={rank}') - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple(kernel_size, self.rank, - 'kernel_size') - self.strides = conv_utils.normalize_tuple( - strides, self.rank, 'strides', allow_zero=True) - self.padding = conv_utils.normalize_padding(padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, self.rank, - 'dilation_rate') - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.unit_forget_bias = unit_forget_bias - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1.0, max(0.0, dropout)) - self.recurrent_dropout = min(1.0, max(0.0, recurrent_dropout)) - self.state_size = (self.filters, self.filters) - - def build(self, input_shape): - - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError( - 'The channel dimension of the inputs (last axis) should be defined. ' - f'Found None. Full input shape received: input_shape={input_shape}') - input_dim = input_shape[channel_axis] - self.kernel_shape = self.kernel_size + (input_dim, self.filters * 4) - recurrent_kernel_shape = self.kernel_size + (self.filters, self.filters * 4) - - self.kernel = self.add_weight( - shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=recurrent_kernel_shape, - initializer=self.recurrent_initializer, - name='recurrent_kernel', - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - if self.use_bias: - if self.unit_forget_bias: - - def bias_initializer(_, *args, **kwargs): - return backend.concatenate([ - self.bias_initializer((self.filters,), *args, **kwargs), - initializers.get('ones')((self.filters,), *args, **kwargs), - self.bias_initializer((self.filters * 2,), *args, **kwargs), - ]) - else: - bias_initializer = self.bias_initializer - self.bias = self.add_weight( - shape=(self.filters * 4,), - name='bias', - initializer=bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - self.built = True - - def call(self, inputs, states, training=None): - h_tm1 = states[0] # previous memory state - c_tm1 = states[1] # previous carry state - - # dropout matrices for input units - dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) - # dropout matrices for recurrent units - rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( - h_tm1, training, count=4) - - if 0 < self.dropout < 1.: - inputs_i = inputs * dp_mask[0] - inputs_f = inputs * dp_mask[1] - inputs_c = inputs * dp_mask[2] - inputs_o = inputs * dp_mask[3] - else: - inputs_i = inputs - inputs_f = inputs - inputs_c = inputs - inputs_o = inputs - - if 0 < self.recurrent_dropout < 1.: - h_tm1_i = h_tm1 * rec_dp_mask[0] - h_tm1_f = h_tm1 * rec_dp_mask[1] - h_tm1_c = h_tm1 * rec_dp_mask[2] - h_tm1_o = h_tm1 * rec_dp_mask[3] - else: - h_tm1_i = h_tm1 - h_tm1_f = h_tm1 - h_tm1_c = h_tm1 - h_tm1_o = h_tm1 - - (kernel_i, kernel_f, kernel_c, kernel_o) = tf.split( - self.kernel, 4, axis=self.rank + 1) - (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c, - recurrent_kernel_o) = tf.split( - self.recurrent_kernel, 4, axis=self.rank + 1) - - if self.use_bias: - bias_i, bias_f, bias_c, bias_o = tf.split(self.bias, 4) - else: - bias_i, bias_f, bias_c, bias_o = None, None, None, None - - x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding) - x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding) - x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding) - x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding) - h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i) - h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f) - h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c) - h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o) - - i = self.recurrent_activation(x_i + h_i) - f = self.recurrent_activation(x_f + h_f) - c = f * c_tm1 + i * self.activation(x_c + h_c) - o = self.recurrent_activation(x_o + h_o) - h = o * self.activation(c) - return h, [h, c] - - @property - def _conv_func(self): - if self.rank == 1: - return backend.conv1d - if self.rank == 2: - return backend.conv2d - if self.rank == 3: - return backend.conv3d - - def input_conv(self, x, w, b=None, padding='valid'): - conv_out = self._conv_func( - x, - w, - strides=self.strides, - padding=padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - if b is not None: - conv_out = backend.bias_add(conv_out, b, data_format=self.data_format) - return conv_out - - def recurrent_conv(self, x, w): - strides = conv_utils.normalize_tuple( - 1, self.rank, 'strides', allow_zero=True) - conv_out = self._conv_func( - x, w, strides=strides, padding='same', data_format=self.data_format) - return conv_out - - def get_config(self): - config = { - 'filters': + """Cell class for the ConvLSTM layer. + + Args: + rank: Integer, rank of the convolution, e.g. "2" for 2D convolutions. + filters: Integer, the dimensionality of the output space (i.e. the number + of output filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + dimensions of the convolution window. + strides: An integer or tuple/list of n integers, specifying the strides of + the convolution. Specifying any stride value != 1 is incompatible with + specifying any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding evenly to the left/right or + up/down of the input such that output has the same height/width + dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of n integers, specifying the + dilation rate to use for dilated convolution. Currently, specifying any + `dilation_rate` value != 1 is incompatible with specifying any `strides` + value != 1. + activation: Activation function to use. If you don't specify anything, no + activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use for the recurrent step. + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, used for + the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate + at initialization. Use in combination with `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( + http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. + Call arguments: + inputs: A (2+ `rank`)D tensor. + states: List of state tensors corresponding to the previous timestep. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. Only relevant when `dropout` or + `recurrent_dropout` is used. + """ + + def __init__( + self, + rank, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs, + ): + super().__init__(**kwargs) + self.rank = rank + if self.rank > 3: + raise ValueError( + f"Rank {rank} convolutions are not currently " + f"implemented. Received: rank={rank}" + ) + self.filters = filters + self.kernel_size = conv_utils.normalize_tuple( + kernel_size, self.rank, "kernel_size" + ) + self.strides = conv_utils.normalize_tuple( + strides, self.rank, "strides", allow_zero=True + ) + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.dilation_rate = conv_utils.normalize_tuple( + dilation_rate, self.rank, "dilation_rate" + ) + self.activation = activations.get(activation) + self.recurrent_activation = activations.get(recurrent_activation) + self.use_bias = use_bias + + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.unit_forget_bias = unit_forget_bias + + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + self.dropout = min(1.0, max(0.0, dropout)) + self.recurrent_dropout = min(1.0, max(0.0, recurrent_dropout)) + self.state_size = (self.filters, self.filters) + + def build(self, input_shape): + super().build(input_shape) + if self.data_format == "channels_first": + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis] is None: + raise ValueError( + "The channel dimension of the inputs (last axis) should be " + "defined. Found None. Full input shape received: " + f"input_shape={input_shape}" + ) + input_dim = input_shape[channel_axis] + self.kernel_shape = self.kernel_size + (input_dim, self.filters * 4) + recurrent_kernel_shape = self.kernel_size + ( self.filters, - 'kernel_size': - self.kernel_size, - 'strides': - self.strides, - 'padding': - self.padding, - 'data_format': - self.data_format, - 'dilation_rate': - self.dilation_rate, - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + self.filters * 4, + ) + + self.kernel = self.add_weight( + shape=self.kernel_shape, + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + self.recurrent_kernel = self.add_weight( + shape=recurrent_kernel_shape, + initializer=self.recurrent_initializer, + name="recurrent_kernel", + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint, + ) + + if self.use_bias: + if self.unit_forget_bias: + + def bias_initializer(_, *args, **kwargs): + return backend.concatenate( + [ + self.bias_initializer( + (self.filters,), *args, **kwargs + ), + initializers.get("ones")( + (self.filters,), *args, **kwargs + ), + self.bias_initializer( + (self.filters * 2,), *args, **kwargs + ), + ] + ) + + else: + bias_initializer = self.bias_initializer + self.bias = self.add_weight( + shape=(self.filters * 4,), + name="bias", + initializer=bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs, states, training=None): + h_tm1 = states[0] # previous memory state + c_tm1 = states[1] # previous carry state + + # dropout matrices for input units + dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) + # dropout matrices for recurrent units + rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( + h_tm1, training, count=4 + ) + + if 0 < self.dropout < 1.0: + inputs_i = inputs * dp_mask[0] + inputs_f = inputs * dp_mask[1] + inputs_c = inputs * dp_mask[2] + inputs_o = inputs * dp_mask[3] + else: + inputs_i = inputs + inputs_f = inputs + inputs_c = inputs + inputs_o = inputs + + if 0 < self.recurrent_dropout < 1.0: + h_tm1_i = h_tm1 * rec_dp_mask[0] + h_tm1_f = h_tm1 * rec_dp_mask[1] + h_tm1_c = h_tm1 * rec_dp_mask[2] + h_tm1_o = h_tm1 * rec_dp_mask[3] + else: + h_tm1_i = h_tm1 + h_tm1_f = h_tm1 + h_tm1_c = h_tm1 + h_tm1_o = h_tm1 + + (kernel_i, kernel_f, kernel_c, kernel_o) = tf.split( + self.kernel, 4, axis=self.rank + 1 + ) + ( + recurrent_kernel_i, + recurrent_kernel_f, + recurrent_kernel_c, + recurrent_kernel_o, + ) = tf.split(self.recurrent_kernel, 4, axis=self.rank + 1) + + if self.use_bias: + bias_i, bias_f, bias_c, bias_o = tf.split(self.bias, 4) + else: + bias_i, bias_f, bias_c, bias_o = None, None, None, None + + x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding) + x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding) + x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding) + x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding) + h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i) + h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f) + h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c) + h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o) + + i = self.recurrent_activation(x_i + h_i) + f = self.recurrent_activation(x_f + h_f) + c = f * c_tm1 + i * self.activation(x_c + h_c) + o = self.recurrent_activation(x_o + h_o) + h = o * self.activation(c) + return h, [h, c] + + @property + def _conv_func(self): + if self.rank == 1: + return backend.conv1d + if self.rank == 2: + return backend.conv2d + if self.rank == 3: + return backend.conv3d + + def input_conv(self, x, w, b=None, padding="valid"): + conv_out = self._conv_func( + x, + w, + strides=self.strides, + padding=padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) + if b is not None: + conv_out = backend.bias_add( + conv_out, b, data_format=self.data_format + ) + return conv_out + + def recurrent_conv(self, x, w): + strides = conv_utils.normalize_tuple( + 1, self.rank, "strides", allow_zero=True + ) + conv_out = self._conv_func( + x, w, strides=strides, padding="same", data_format=self.data_format + ) + return conv_out + + def get_config(self): + config = { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "unit_forget_bias": self.unit_forget_bias, + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class ConvLSTM(ConvRNN): - """Abstract N-D Convolutional LSTM layer (used as implementation base). - - Similar to an LSTM layer, but the input transformations - and recurrent transformations are both convolutional. - - Args: - rank: Integer, rank of the convolution, e.g. "2" for 2D convolutions. - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the strides of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, time, ..., channels)` - while `channels_first` corresponds to - inputs with shape `(batch, time, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function to use. - By default hyperbolic tangent activation function is applied - (`tanh(x)`). - recurrent_activation: Activation function to use - for the recurrent step. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Use in combination with `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al., 2015]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to. - kernel_constraint: Constraint function applied to - the `kernel` weights matrix. - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. (default False) - return_state: Boolean Whether to return the last state - in addition to the output. (default False) - go_backwards: Boolean (default False). - If True, process the input sequence backwards. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - """ - - def __init__(self, - rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - dropout=0.0, - recurrent_dropout=0.0, - **kwargs): - cell = ConvLSTMCell( - rank=rank, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - unit_forget_bias=unit_forget_bias, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - dtype=kwargs.get('dtype')) - super().__init__( + """Abstract N-D Convolutional LSTM layer (used as implementation base). + + Similar to an LSTM layer, but the input transformations + and recurrent transformations are both convolutional. + + Args: + rank: Integer, rank of the convolution, e.g. "2" for 2D convolutions. + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + dimensions of the convolution window. + strides: An integer or tuple/list of n integers, + specifying the strides of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, time, ..., channels)` + while `channels_first` corresponds to + inputs with shape `(batch, time, channels, ...)`. + When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of n integers, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + activation: Activation function to use. + By default hyperbolic tangent activation function is applied + (`tanh(x)`). + recurrent_activation: Activation function to use + for the recurrent step. + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. + If True, add 1 to the bias of the forget gate at initialization. + Use in combination with `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( + http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to. + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. (default False) + return_state: Boolean Whether to return the last state + in addition to the output. (default False) + go_backwards: Boolean (default False). + If True, process the input sequence backwards. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + """ + + def __init__( + self, rank, - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - return super().call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def filters(self): - return self.cell.filters - - @property - def kernel_size(self): - return self.cell.kernel_size - - @property - def strides(self): - return self.cell.strides - - @property - def padding(self): - return self.cell.padding - - @property - def data_format(self): - return self.cell.data_format - - @property - def dilation_rate(self): - return self.cell.dilation_rate - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def unit_forget_bias(self): - return self.cell.unit_forget_bias - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - def get_config(self): - config = {'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': activations.serialize( - self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize( - self.kernel_initializer), - 'recurrent_initializer': initializers.serialize( - self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': regularizers.serialize( - self.kernel_regularizer), - 'recurrent_regularizer': regularizers.serialize( - self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': regularizers.serialize( - self.activity_regularizer), - 'kernel_constraint': constraints.serialize( - self.kernel_constraint), - 'recurrent_constraint': constraints.serialize( - self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout} - base_config = super().get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - return cls(**config) + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs, + ): + cell = ConvLSTMCell( + rank=rank, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + unit_forget_bias=unit_forget_bias, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + name="conv_lstm_cell", + dtype=kwargs.get("dtype"), + ) + super().__init__( + rank, + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + **kwargs, + ) + self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + return super().call( + inputs, mask=mask, training=training, initial_state=initial_state + ) + + @property + def filters(self): + return self.cell.filters + + @property + def kernel_size(self): + return self.cell.kernel_size + + @property + def strides(self): + return self.cell.strides + + @property + def padding(self): + return self.cell.padding + + @property + def data_format(self): + return self.cell.data_format + + @property + def dilation_rate(self): + return self.cell.dilation_rate + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def unit_forget_bias(self): + return self.cell.unit_forget_bias + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + def get_config(self): + config = { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "unit_forget_bias": self.unit_forget_bias, + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + } + base_config = super().get_config() + del base_config["cell"] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras/layers/rnn/base_conv_rnn.py b/keras/layers/rnn/base_conv_rnn.py index 86f2babe950b..bdeef1155cd4 100644 --- a/keras/layers/rnn/base_conv_rnn.py +++ b/keras/layers/rnn/base_conv_rnn.py @@ -13,7 +13,10 @@ # limitations under the License. # ============================================================================== """Base class for convolutional-recurrent layers.""" -# pylint: disable=g-classes-have-attributes + + +import numpy as np +import tensorflow.compat.v2 as tf from keras import backend from keras.engine import base_layer @@ -22,369 +25,413 @@ from keras.utils import conv_utils from keras.utils import generic_utils from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf class ConvRNN(RNN): - """N-Dimensional Base class for convolutional-recurrent layers. - - Args: - rank: Integer, rank of the convolution, e.g. "2" for 2D convolutions. - cell: A RNN cell instance. A RNN cell is a class that has: - a - `call(input_at_t, states_at_t)` method, returning `(output_at_t, - states_at_t_plus_1)`. The call method of the cell can also take the - optional argument `constants`, see section "Note on passing external - constants" below. - a `state_size` attribute. This can be a single integer - (single state) in which case it is the number of channels of the recurrent - state (which should be the same as the number of channels of the cell - output). This can also be a list/tuple of integers (one size per state). - In this case, the first entry (`state_size[0]`) should be the same as the - size of the cell output. - return_sequences: Boolean. Whether to return the last output. in the output - sequence, or the full sequence. - return_state: Boolean. Whether to return the last state in addition to the - output. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - stateful: Boolean (default False). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - input_shape: Use this argument to specify the shape of the input when this - layer is the first one in a model. - Call arguments: - inputs: A (2 + `rank`)D tensor. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether a - given timestep should be masked. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is for use with cells that use dropout. - initial_state: List of initial state tensors to be passed to the first call - of the cell. - constants: List of constant tensors to be passed to the cell at each - timestep. - Input shape: - (3 + `rank`)D tensor with shape: `(samples, timesteps, channels, - img_dimensions...)` - if data_format='channels_first' or shape: `(samples, timesteps, - img_dimensions..., channels)` if data_format='channels_last'. - Output shape: - - If `return_state`: a list of tensors. The first tensor is the output. The - remaining tensors are the last states, - each (2 + `rank`)D tensor with shape: `(samples, filters, + """N-Dimensional Base class for convolutional-recurrent layers. + + Args: + rank: Integer, rank of the convolution, e.g. "2" for 2D convolutions. + cell: A RNN cell instance. A RNN cell is a class that has: - a + `call(input_at_t, states_at_t)` method, returning `(output_at_t, + states_at_t_plus_1)`. The call method of the cell can also take the + optional argument `constants`, see section "Note on passing external + constants" below. - a `state_size` attribute. This can be a single + integer (single state) in which case it is the number of channels of the + recurrent state (which should be the same as the number of channels of + the cell output). This can also be a list/tuple of integers (one size + per state). In this case, the first entry (`state_size[0]`) should be + the same as the size of the cell output. + return_sequences: Boolean. Whether to return the last output. in the + output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state in addition to the + output. + go_backwards: Boolean (default False). If True, process the input sequence + backwards and return the reversed sequence. + stateful: Boolean (default False). If True, the last state for each sample + at index i in a batch will be used as initial state for the sample of + index i in the following batch. + input_shape: Use this argument to specify the shape of the input when this + layer is the first one in a model. + Call arguments: + inputs: A (2 + `rank`)D tensor. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + given timestep should be masked. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is for use with cells that use dropout. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + constants: List of constant tensors to be passed to the cell at each + timestep. + Input shape: + (3 + `rank`)D tensor with shape: `(samples, timesteps, channels, + img_dimensions...)` + if data_format='channels_first' or shape: `(samples, timesteps, + img_dimensions..., channels)` if data_format='channels_last'. + Output shape: + - If `return_state`: a list of tensors. The first tensor is the output. + The remaining tensors are the last states, + each (2 + `rank`)D tensor with shape: `(samples, filters, + new_img_dimensions...)` if data_format='channels_first' + or shape: `(samples, new_img_dimensions..., filters)` if + data_format='channels_last'. img_dimension values might have changed + due to padding. + - If `return_sequences`: (3 + `rank`)D tensor with shape: `(samples, + timesteps, filters, new_img_dimensions...)` if + data_format='channels_first' + or shape: `(samples, timesteps, new_img_dimensions..., filters)` if + data_format='channels_last'. + - Else, (2 + `rank`)D tensor with shape: `(samples, filters, new_img_dimensions...)` if data_format='channels_first' - or shape: `(samples, new_img_dimensions..., filters)` if - data_format='channels_last'. img_dimension values might have changed due - to padding. - - If `return_sequences`: (3 + `rank`)D tensor with shape: `(samples, - timesteps, filters, new_img_dimensions...)` if - data_format='channels_first' - or shape: `(samples, timesteps, new_img_dimensions..., filters)` if - data_format='channels_last'. - - Else, (2 + `rank`)D tensor with shape: `(samples, filters, - new_img_dimensions...)` if data_format='channels_first' - or shape: `(samples, new_img_dimensions..., filters)` if - data_format='channels_last'. - Masking: This layer supports masking for input data with a variable number of - timesteps. - Note on using statefulness in RNNs: You can set RNN layers to be 'stateful', - which means that the states computed for the samples in one batch will be - reused as initial states for the samples in the next batch. This assumes a - one-to-one mapping between samples in different successive batches. - To enable statefulness: - Specify `stateful=True` in the layer constructor. - - Specify a fixed batch size for your model, by passing - - If sequential model: `batch_input_shape=(...)` to the first layer in - your model. - - If functional model with 1 or more Input layers: `batch_shape=(...)` - to all the first layers in your model. This is the expected shape of - your inputs *including the batch size*. It should be a tuple of - integers, e.g. `(32, 10, 100, 100, 32)`. for rank 2 convolution Note - that the image dimensions should be specified too. - Specify - `shuffle=False` when calling fit(). To reset the states of your - model, call `.reset_states()` on either a specific layer, or on your - entire model. - Note on specifying the initial state of RNNs: You can specify the initial - state of RNN layers symbolically by calling them with the keyword argument - `initial_state`. The value of `initial_state` should be a tensor or list of - tensors representing the initial state of the RNN layer. You can specify the - initial state of RNN layers numerically by calling `reset_states` with the - keyword argument `states`. The value of `states` should be a numpy array or - list of numpy arrays representing the initial state of the RNN layer. - Note on passing external constants to RNNs: You can pass "external" constants - to the cell using the `constants` keyword argument of `RNN.__call__` (as - well as `RNN.call`) method. This requires that the `cell.call` method - accepts the same keyword argument `constants`. Such constants can be used to - condition the cell transformation on additional static inputs (not changing - over time), a.k.a. an attention mechanism. - """ - - def __init__(self, - rank, - cell, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if unroll: - raise TypeError( - 'Unrolling is not possible with convolutional RNNs. ' - f'Received: unroll={unroll}') - if isinstance(cell, (list, tuple)): - # The StackedConvRNN3DCells isn't implemented yet. - raise TypeError('It is not possible at the moment to' - 'stack convolutional cells. Only pass a single cell ' - 'instance as the `cell` argument. Received: ' - f'cell={cell}') - super().__init__(cell, return_sequences, return_state, - go_backwards, stateful, unroll, **kwargs) - self.rank = rank - self.input_spec = [InputSpec(ndim=rank + 3)] - self.states = None - self._num_constants = None - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - - cell = self.cell - if cell.data_format == 'channels_first': - img_dims = input_shape[3:] - elif cell.data_format == 'channels_last': - img_dims = input_shape[2:-1] - - norm_img_dims = tuple([ - conv_utils.conv_output_length( # pylint: disable=g-complex-comprehension - img_dims[idx], - cell.kernel_size[idx], - padding=cell.padding, - stride=cell.strides[idx], - dilation=cell.dilation_rate[idx]) for idx in range(len(img_dims)) - ]) - - if cell.data_format == 'channels_first': - output_shape = input_shape[:2] + (cell.filters,) + norm_img_dims - elif cell.data_format == 'channels_last': - output_shape = input_shape[:2] + norm_img_dims + (cell.filters,) - - if not self.return_sequences: - output_shape = output_shape[:1] + output_shape[2:] - - if self.return_state: - output_shape = [output_shape] - if cell.data_format == 'channels_first': - output_shape += [ - (input_shape[0], cell.filters) + norm_img_dims for _ in range(2) - ] - elif cell.data_format == 'channels_last': - output_shape += [(input_shape[0],) + norm_img_dims + (cell.filters,) - for _ in range(2)] - return output_shape - - @tf_utils.shape_type_conversion - def build(self, input_shape): - # Note input_shape will be list of shapes of initial states and - # constants if these are passed in __call__. - if self._num_constants is not None: - constants_shape = input_shape[-self._num_constants:] # pylint: disable=invalid-unary-operand-type - else: - constants_shape = None - - if isinstance(input_shape, list): - input_shape = input_shape[0] - - batch_size = input_shape[0] if self.stateful else None - self.input_spec[0] = InputSpec( - shape=(batch_size, None) + input_shape[2:self.rank + 3]) - - # allow cell (if layer) to build before we set or validate state_spec - if isinstance(self.cell, base_layer.Layer): - step_input_shape = (input_shape[0],) + input_shape[2:] - if constants_shape is not None: - self.cell.build([step_input_shape] + constants_shape) - else: - self.cell.build(step_input_shape) - - # set or validate state_spec - if hasattr(self.cell.state_size, '__len__'): - state_size = list(self.cell.state_size) - else: - state_size = [self.cell.state_size] - - if self.state_spec is not None: - # initial_state was passed in call, check compatibility - if self.cell.data_format == 'channels_first': - ch_dim = 1 - elif self.cell.data_format == 'channels_last': - ch_dim = self.rank + 1 - if [spec.shape[ch_dim] for spec in self.state_spec] != state_size: - raise ValueError( - 'An `initial_state` was passed that is not compatible with ' - '`cell.state_size`. Received state shapes ' - f'{[spec.shape for spec in self.state_spec]}. ' - f'However `cell.state_size` is {self.cell.state_size}') - else: - img_dims = tuple((None for _ in range(self.rank))) - if self.cell.data_format == 'channels_first': - self.state_spec = [ - InputSpec(shape=(None, dim) + img_dims) for dim in state_size - ] - elif self.cell.data_format == 'channels_last': - self.state_spec = [ - InputSpec(shape=(None,) + img_dims + (dim,)) for dim in state_size - ] - if self.stateful: - self.reset_states() - self.built = True - - def get_initial_state(self, inputs): - # (samples, timesteps, img_dims..., filters) - initial_state = backend.zeros_like(inputs) - # (samples, img_dims..., filters) - initial_state = backend.sum(initial_state, axis=1) - shape = list(self.cell.kernel_shape) - shape[-1] = self.cell.filters - initial_state = self.cell.input_conv(initial_state, - tf.zeros(tuple(shape), - initial_state.dtype), - padding=self.cell.padding) - - if hasattr(self.cell.state_size, '__len__'): - return [initial_state for _ in self.cell.state_size] - else: - return [initial_state] - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - inputs, initial_state, constants = self._process_inputs( - inputs, initial_state, constants) - - if isinstance(mask, list): - mask = mask[0] - timesteps = backend.int_shape(inputs)[1] - - kwargs = {} - if generic_utils.has_arg(self.cell.call, 'training'): - kwargs['training'] = training - - if constants: - if not generic_utils.has_arg(self.cell.call, 'constants'): - raise ValueError( - f'RNN cell {self.cell} does not support constants. ' - f'Received: constants={constants}') - - def step(inputs, states): - constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type - states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type - return self.cell.call(inputs, states, constants=constants, **kwargs) - else: - def step(inputs, states): - return self.cell.call(inputs, states, **kwargs) - - last_output, outputs, states = backend.rnn( - step, + or shape: `(samples, new_img_dimensions..., filters)` if + data_format='channels_last'. + Masking: This layer supports masking for input data with a variable number + of timesteps. + Note on using statefulness in RNNs: You can set RNN layers to be 'stateful', + which means that the states computed for the samples in one batch will be + reused as initial states for the samples in the next batch. This assumes a + one-to-one mapping between samples in different successive batches. + To enable statefulness: - Specify `stateful=True` in the layer + constructor. + - Specify a fixed batch size for your model, by passing + - If sequential model: `batch_input_shape=(...)` to the first layer + in your model. + - If functional model with 1 or more Input layers: + `batch_shape=(...)` to all the first layers in your model. This is + the expected shape of your inputs *including the batch size*. It + should be a tuple of integers, e.g. `(32, 10, 100, 100, 32)`. for + rank 2 convolution Note that the image dimensions should be + specified too. - Specify `shuffle=False` when calling fit(). To + reset the states of your model, call `.reset_states()` on either a + specific layer, or on your entire model. + Note on specifying the initial state of RNNs: You can specify the initial + state of RNN layers symbolically by calling them with the keyword argument + `initial_state`. The value of `initial_state` should be a tensor or list + of tensors representing the initial state of the RNN layer. You can + specify the initial state of RNN layers numerically by calling + `reset_states` with the keyword argument `states`. The value of `states` + should be a numpy array or list of numpy arrays representing the initial + state of the RNN layer. + Note on passing external constants to RNNs: You can pass "external" + constants to the cell using the `constants` keyword argument of + `RNN.__call__` (as well as `RNN.call`) method. This requires that the + `cell.call` method accepts the same keyword argument `constants`. Such + constants can be used to condition the cell transformation on additional + static inputs (not changing over time), a.k.a. an attention mechanism. + """ + + def __init__( + self, + rank, + cell, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs, + ): + if unroll: + raise TypeError( + "Unrolling is not possible with convolutional RNNs. " + f"Received: unroll={unroll}" + ) + if isinstance(cell, (list, tuple)): + # The StackedConvRNN3DCells isn't implemented yet. + raise TypeError( + "It is not possible at the moment to" + "stack convolutional cells. Only pass a single cell " + "instance as the `cell` argument. Received: " + f"cell={cell}" + ) + super().__init__( + cell, + return_sequences, + return_state, + go_backwards, + stateful, + unroll, + **kwargs, + ) + self.rank = rank + self.input_spec = [InputSpec(ndim=rank + 3)] + self.states = None + self._num_constants = None + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + + cell = self.cell + if cell.data_format == "channels_first": + img_dims = input_shape[3:] + elif cell.data_format == "channels_last": + img_dims = input_shape[2:-1] + + norm_img_dims = tuple( + [ + conv_utils.conv_output_length( + img_dims[idx], + cell.kernel_size[idx], + padding=cell.padding, + stride=cell.strides[idx], + dilation=cell.dilation_rate[idx], + ) + for idx in range(len(img_dims)) + ] + ) + + if cell.data_format == "channels_first": + output_shape = input_shape[:2] + (cell.filters,) + norm_img_dims + elif cell.data_format == "channels_last": + output_shape = input_shape[:2] + norm_img_dims + (cell.filters,) + + if not self.return_sequences: + output_shape = output_shape[:1] + output_shape[2:] + + if self.return_state: + output_shape = [output_shape] + if cell.data_format == "channels_first": + output_shape += [ + (input_shape[0], cell.filters) + norm_img_dims + for _ in range(2) + ] + elif cell.data_format == "channels_last": + output_shape += [ + (input_shape[0],) + norm_img_dims + (cell.filters,) + for _ in range(2) + ] + return output_shape + + @tf_utils.shape_type_conversion + def build(self, input_shape): + # Note input_shape will be list of shapes of initial states and + # constants if these are passed in __call__. + if self._num_constants is not None: + constants_shape = input_shape[-self._num_constants :] + else: + constants_shape = None + + if isinstance(input_shape, list): + input_shape = input_shape[0] + + batch_size = input_shape[0] if self.stateful else None + self.input_spec[0] = InputSpec( + shape=(batch_size, None) + input_shape[2 : self.rank + 3] + ) + + # allow cell (if layer) to build before we set or validate state_spec + if isinstance(self.cell, base_layer.Layer): + step_input_shape = (input_shape[0],) + input_shape[2:] + if constants_shape is not None: + self.cell.build([step_input_shape] + constants_shape) + else: + self.cell.build(step_input_shape) + + # set or validate state_spec + if hasattr(self.cell.state_size, "__len__"): + state_size = list(self.cell.state_size) + else: + state_size = [self.cell.state_size] + + if self.state_spec is not None: + # initial_state was passed in call, check compatibility + if self.cell.data_format == "channels_first": + ch_dim = 1 + elif self.cell.data_format == "channels_last": + ch_dim = self.rank + 1 + if [spec.shape[ch_dim] for spec in self.state_spec] != state_size: + raise ValueError( + "An `initial_state` was passed that is not compatible with " + "`cell.state_size`. Received state shapes " + f"{[spec.shape for spec in self.state_spec]}. " + f"However `cell.state_size` is {self.cell.state_size}" + ) + else: + img_dims = tuple((None for _ in range(self.rank))) + if self.cell.data_format == "channels_first": + self.state_spec = [ + InputSpec(shape=(None, dim) + img_dims) + for dim in state_size + ] + elif self.cell.data_format == "channels_last": + self.state_spec = [ + InputSpec(shape=(None,) + img_dims + (dim,)) + for dim in state_size + ] + if self.stateful: + self.reset_states() + self.built = True + + def get_initial_state(self, inputs): + # (samples, timesteps, img_dims..., filters) + initial_state = backend.zeros_like(inputs) + # (samples, img_dims..., filters) + initial_state = backend.sum(initial_state, axis=1) + shape = list(self.cell.kernel_shape) + shape[-1] = self.cell.filters + initial_state = self.cell.input_conv( + initial_state, + tf.zeros(tuple(shape), initial_state.dtype), + padding=self.cell.padding, + ) + + if hasattr(self.cell.state_size, "__len__"): + return [initial_state for _ in self.cell.state_size] + else: + return [initial_state] + + def call( + self, inputs, - initial_state, - constants=constants, - go_backwards=self.go_backwards, - mask=mask, - input_length=timesteps, - return_all_outputs=self.return_sequences) - if self.stateful: - updates = [ - backend.update(self_state, state) - for self_state, state in zip(self.states, states) - ] - self.add_update(updates) - - if self.return_sequences: - output = outputs - else: - output = last_output - - if self.return_state: - if not isinstance(states, (list, tuple)): - states = [states] - else: - states = list(states) - return [output] + states - return output - - def reset_states(self, states=None): - if not self.stateful: - raise AttributeError('Layer must be stateful.') - input_shape = self.input_spec[0].shape - state_shape = self.compute_output_shape(input_shape) - if self.return_state: - state_shape = state_shape[0] - if self.return_sequences: - state_shape = state_shape[:1].concatenate(state_shape[2:]) - if None in state_shape: - raise ValueError('If a RNN is stateful, it needs to know ' - 'its batch size. Specify the batch size ' - 'of your input tensors: \n' - '- If using a Sequential model, ' - 'specify the batch size by passing ' - 'a `batch_input_shape` ' - 'argument to your first layer.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a ' - '`batch_shape` argument to your Input layer.\n' - 'The same thing goes for the number of rows and ' - 'columns.') - - # helper function - def get_tuple_shape(nb_channels): - result = list(state_shape) - if self.cell.data_format == 'channels_first': - result[1] = nb_channels - elif self.cell.data_format == 'channels_last': - result[self.rank + 1] = nb_channels - else: - raise KeyError( - 'Cell data format must be one of ' - '{"channels_first", "channels_last"}. Received: ' - f'cell.data_format={self.cell.data_format}') - return tuple(result) - - # initialize state if None - if self.states[0] is None: - if hasattr(self.cell.state_size, '__len__'): - self.states = [backend.zeros(get_tuple_shape(dim)) - for dim in self.cell.state_size] - else: - self.states = [backend.zeros(get_tuple_shape(self.cell.state_size))] - elif states is None: - if hasattr(self.cell.state_size, '__len__'): - for state, dim in zip(self.states, self.cell.state_size): - backend.set_value(state, np.zeros(get_tuple_shape(dim))) - else: - backend.set_value(self.states[0], - np.zeros(get_tuple_shape(self.cell.state_size))) - else: - if not isinstance(states, (list, tuple)): - states = [states] - if len(states) != len(self.states): - raise ValueError( - f'Layer {self.name} expects {len(self.states)} states, ' - f'but it received {len(states)} state values. ' - f'States received: {states}') - for index, (value, state) in enumerate(zip(states, self.states)): - if hasattr(self.cell.state_size, '__len__'): - dim = self.cell.state_size[index] + mask=None, + training=None, + initial_state=None, + constants=None, + ): + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + inputs, initial_state, constants = self._process_inputs( + inputs, initial_state, constants + ) + + if isinstance(mask, list): + mask = mask[0] + timesteps = backend.int_shape(inputs)[1] + + kwargs = {} + if generic_utils.has_arg(self.cell.call, "training"): + kwargs["training"] = training + + if constants: + if not generic_utils.has_arg(self.cell.call, "constants"): + raise ValueError( + f"RNN cell {self.cell} does not support constants. " + f"Received: constants={constants}" + ) + + def step(inputs, states): + constants = states[-self._num_constants :] + states = states[: -self._num_constants] + return self.cell.call( + inputs, states, constants=constants, **kwargs + ) + + else: + + def step(inputs, states): + return self.cell.call(inputs, states, **kwargs) + + last_output, outputs, states = backend.rnn( + step, + inputs, + initial_state, + constants=constants, + go_backwards=self.go_backwards, + mask=mask, + input_length=timesteps, + return_all_outputs=self.return_sequences, + ) + if self.stateful: + updates = [ + backend.update(self_state, state) + for self_state, state in zip(self.states, states) + ] + self.add_update(updates) + + if self.return_sequences: + output = outputs + else: + output = last_output + + if self.return_state: + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return [output] + states + return output + + def reset_states(self, states=None): + if not self.stateful: + raise AttributeError("Layer must be stateful.") + input_shape = self.input_spec[0].shape + state_shape = self.compute_output_shape(input_shape) + if self.return_state: + state_shape = state_shape[0] + if self.return_sequences: + state_shape = state_shape[:1].concatenate(state_shape[2:]) + if None in state_shape: + raise ValueError( + "If a RNN is stateful, it needs to know " + "its batch size. Specify the batch size " + "of your input tensors: \n" + "- If using a Sequential model, " + "specify the batch size by passing " + "a `batch_input_shape` " + "argument to your first layer.\n" + "- If using the functional API, specify " + "the time dimension by passing a " + "`batch_shape` argument to your Input layer.\n" + "The same thing goes for the number of rows and " + "columns." + ) + + # helper function + def get_tuple_shape(nb_channels): + result = list(state_shape) + if self.cell.data_format == "channels_first": + result[1] = nb_channels + elif self.cell.data_format == "channels_last": + result[self.rank + 1] = nb_channels + else: + raise KeyError( + "Cell data format must be one of " + '{"channels_first", "channels_last"}. Received: ' + f"cell.data_format={self.cell.data_format}" + ) + return tuple(result) + + # initialize state if None + if self.states[0] is None: + if hasattr(self.cell.state_size, "__len__"): + self.states = [ + backend.zeros(get_tuple_shape(dim)) + for dim in self.cell.state_size + ] + else: + self.states = [ + backend.zeros(get_tuple_shape(self.cell.state_size)) + ] + elif states is None: + if hasattr(self.cell.state_size, "__len__"): + for state, dim in zip(self.states, self.cell.state_size): + backend.set_value(state, np.zeros(get_tuple_shape(dim))) + else: + backend.set_value( + self.states[0], + np.zeros(get_tuple_shape(self.cell.state_size)), + ) else: - dim = self.cell.state_size - if value.shape != get_tuple_shape(dim): - raise ValueError( - f'State {index} is incompatible with layer {self.name}: ' - f'expected shape={get_tuple_shape(dim)}, ' - f'found shape={value.shape}') - backend.set_value(state, value) + if not isinstance(states, (list, tuple)): + states = [states] + if len(states) != len(self.states): + raise ValueError( + f"Layer {self.name} expects {len(self.states)} states, " + f"but it received {len(states)} state values. " + f"States received: {states}" + ) + for index, (value, state) in enumerate(zip(states, self.states)): + if hasattr(self.cell.state_size, "__len__"): + dim = self.cell.state_size[index] + else: + dim = self.cell.state_size + if value.shape != get_tuple_shape(dim): + raise ValueError( + "State {index} is incompatible with layer " + f"{self.name}: expected shape={get_tuple_shape(dim)}, " + f"found shape={value.shape}" + ) + backend.set_value(state, value) diff --git a/keras/layers/rnn/base_cudnn_rnn.py b/keras/layers/rnn/base_cudnn_rnn.py index 197dfdae787e..96426fc72e2a 100644 --- a/keras/layers/rnn/base_cudnn_rnn.py +++ b/keras/layers/rnn/base_cudnn_rnn.py @@ -13,133 +13,138 @@ # limitations under the License. # ============================================================================== """Base class for recurrent layers backed by cuDNN.""" -# pylint: disable=g-classes-have-attributes + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.input_spec import InputSpec from keras.layers.rnn.base_rnn import RNN -import tensorflow.compat.v2 as tf class _CuDNNRNN(RNN): - """Private base class for CuDNNGRU and CuDNNLSTM layers. - - Args: - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - time_major: Boolean (default False). If true, the inputs and outputs will be - in shape `(timesteps, batch, ...)`, whereas in the False case, it will - be `(batch, timesteps, ...)`. - """ - - def __init__(self, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - time_major=False, - **kwargs): - # We invoke the base layer's initializer directly here because we do not - # want to create RNN cell instance. - super(RNN, self).__init__(**kwargs) # pylint: disable=bad-super-call - self.return_sequences = return_sequences - self.return_state = return_state - self.go_backwards = go_backwards - self.stateful = stateful - self.time_major = time_major - self.supports_masking = False - self.input_spec = [InputSpec(ndim=3)] - if hasattr(self.cell.state_size, '__len__'): - state_size = self.cell.state_size - else: - state_size = [self.cell.state_size] - self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] - self.constants_spec = None - self._states = None - self._num_constants = 0 - self._vector_shape = tf.constant([-1]) - - def call(self, inputs, mask=None, training=None, initial_state=None): - if isinstance(mask, list): - mask = mask[0] - if mask is not None: - raise ValueError('Masking is not supported for CuDNN RNNs.') - - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - initial_state = inputs[1:] - inputs = inputs[0] - elif initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + str(len(initial_state)) + - ' initial states.') - - if self.go_backwards: - # Reverse time axis. - inputs = backend.reverse(inputs, 1) - output, states = self._process_batch(inputs, initial_state) - - if self.stateful: - updates = [ - tf.compat.v1.assign(self_state, state) - for self_state, state in zip(self.states, states) - ] - self.add_update(updates) - - if self.return_state: - return [output] + states - else: - return output - - def get_config(self): - config = { - 'return_sequences': self.return_sequences, - 'return_state': self.return_state, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful, - 'time_major': self.time_major, - } - base_config = super( # pylint: disable=bad-super-call - RNN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - return cls(**config) - - @property - def trainable_weights(self): - if self.trainable and self.built: - return [self.kernel, self.recurrent_kernel, self.bias] - return [] - - @property - def non_trainable_weights(self): - if not self.trainable and self.built: - return [self.kernel, self.recurrent_kernel, self.bias] - return [] - - @property - def losses(self): - return super(RNN, self).losses # pylint: disable=bad-super-call - - def get_losses_for(self, inputs=None): - return super( # pylint: disable=bad-super-call - RNN, self).get_losses_for(inputs=inputs) + """Private base class for CuDNNGRU and CuDNNLSTM layers. + + Args: + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + time_major: Boolean (default False). If true, the inputs and outputs will + be in shape `(timesteps, batch, ...)`, whereas in the False case, it + will be `(batch, timesteps, ...)`. + """ + + def __init__( + self, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + time_major=False, + **kwargs + ): + # We invoke the base layer's initializer directly here because we do not + # want to create RNN cell instance. + super(RNN, self).__init__(**kwargs) + self.return_sequences = return_sequences + self.return_state = return_state + self.go_backwards = go_backwards + self.stateful = stateful + self.time_major = time_major + self.supports_masking = False + self.input_spec = [InputSpec(ndim=3)] + if hasattr(self.cell.state_size, "__len__"): + state_size = self.cell.state_size + else: + state_size = [self.cell.state_size] + self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] + self.constants_spec = None + self._states = None + self._num_constants = 0 + self._vector_shape = tf.constant([-1]) + + def call(self, inputs, mask=None, training=None, initial_state=None): + if isinstance(mask, list): + mask = mask[0] + if mask is not None: + raise ValueError("Masking is not supported for CuDNN RNNs.") + + # input shape: `(samples, time (padded with zeros), input_dim)` + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, list): + initial_state = inputs[1:] + inputs = inputs[0] + elif initial_state is not None: + pass + elif self.stateful: + initial_state = self.states + else: + initial_state = self.get_initial_state(inputs) + + if len(initial_state) != len(self.states): + raise ValueError( + "Layer has " + + str(len(self.states)) + + " states but was passed " + + str(len(initial_state)) + + " initial states." + ) + + if self.go_backwards: + # Reverse time axis. + inputs = backend.reverse(inputs, 1) + output, states = self._process_batch(inputs, initial_state) + + if self.stateful: + updates = [ + tf.compat.v1.assign(self_state, state) + for self_state, state in zip(self.states, states) + ] + self.add_update(updates) + + if self.return_state: + return [output] + states + else: + return output + + def get_config(self): + config = { + "return_sequences": self.return_sequences, + "return_state": self.return_state, + "go_backwards": self.go_backwards, + "stateful": self.stateful, + "time_major": self.time_major, + } + base_config = super(RNN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) + + @property + def trainable_weights(self): + if self.trainable and self.built: + return [self.kernel, self.recurrent_kernel, self.bias] + return [] + + @property + def non_trainable_weights(self): + if not self.trainable and self.built: + return [self.kernel, self.recurrent_kernel, self.bias] + return [] + + @property + def losses(self): + return super(RNN, self).losses + + def get_losses_for(self, inputs=None): + return super(RNN, self).get_losses_for(inputs=inputs) diff --git a/keras/layers/rnn/base_rnn.py b/keras/layers/rnn/base_rnn.py index 36bab3653f57..350dcd1dd60e 100644 --- a/keras/layers/rnn/base_rnn.py +++ b/keras/layers/rnn/base_rnn.py @@ -13,850 +13,967 @@ # limitations under the License. # ============================================================================== """Base class for recurrent layers.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import collections +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import base_layer from keras.engine.input_spec import InputSpec from keras.layers.rnn import rnn_utils from keras.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin from keras.layers.rnn.stacked_rnn_cells import StackedRNNCells -from keras.saving.saved_model import layer_serialization +from keras.saving import serialization_lib +from keras.saving.legacy.saved_model import layer_serialization from keras.utils import generic_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls -@keras_export('keras.layers.RNN') +@keras_export("keras.layers.RNN") class RNN(base_layer.Layer): - """Base class for recurrent layers. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - Args: - cell: A RNN cell instance or a list of RNN cell instances. - A RNN cell is a class that has: - - A `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - A `state_size` attribute. This can be a single integer - (single state) in which case it is the size of the recurrent - state. This can also be a list/tuple of integers (one size per state). - The `state_size` can also be TensorShape or tuple/list of - TensorShape, to represent high dimension state. - - A `output_size` attribute. This can be a single integer or a - TensorShape, which represent the shape of the output. For backward - compatible reason, if this attribute is not available for the - cell, the value will be inferred by the first element of the - `state_size`. - - A `get_initial_state(inputs=None, batch_size=None, dtype=None)` - method that creates a tensor meant to be fed to `call()` as the - initial state, if the user didn't specify any initial state via other - means. The returned initial state should have a shape of - [batch_size, cell.state_size]. The cell might choose to create a - tensor full of zeros, or full of other values based on the cell's - implementation. - `inputs` is the input tensor to the RNN layer, which should - contain the batch size as its shape[0], and also dtype. Note that - the shape[0] might be `None` during the graph construction. Either - the `inputs` or the pair of `batch_size` and `dtype` are provided. - `batch_size` is a scalar tensor that represents the batch size - of the inputs. `dtype` is `tf.DType` that represents the dtype of - the inputs. - For backward compatibility, if this method is not implemented - by the cell, the RNN layer will create a zero filled tensor with the - size of [batch_size, cell.state_size]. - In the case that `cell` is a list of RNN cell instances, the cells - will be stacked on top of each other in the RNN, resulting in an - efficient stacked RNN. - return_sequences: Boolean (default `False`). Whether to return the last - output in the output sequence, or the full sequence. - return_state: Boolean (default `False`). Whether to return the last state - in addition to the output. - go_backwards: Boolean (default `False`). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default `False`). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default `False`). - If True, the network will be unrolled, else a symbolic loop will be used. - Unrolling can speed-up a RNN, although it tends to be more - memory-intensive. Unrolling is only suitable for short sequences. - time_major: The shape format of the `inputs` and `outputs` tensors. - If True, the inputs and outputs will be in shape - `(timesteps, batch, ...)`, whereas in the False case, it will be - `(batch, timesteps, ...)`. Using `time_major = True` is a bit more - efficient because it avoids transposes at the beginning and end of the - RNN calculation. However, most TensorFlow data is batch-major, so by - default this function accepts input and emits output in batch-major - form. - zero_output_for_mask: Boolean (default `False`). - Whether the output should use zeros for the masked timesteps. Note that - this field is only used when `return_sequences` is True and mask is - provided. It can useful if you want to reuse the raw output sequence of - the RNN without interference from the masked timesteps, eg, merging - bidirectional RNNs. - - Call arguments: - inputs: Input tensor. - mask: Binary tensor of shape `[batch_size, timesteps]` indicating whether - a given timestep should be masked. An individual `True` entry indicates - that the corresponding timestep should be utilized, while a `False` - entry indicates that the corresponding timestep should be ignored. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is for use with cells that use dropout. - initial_state: List of initial state tensors to be passed to the first - call of the cell. - constants: List of constant tensors to be passed to the cell at each - timestep. - - Input shape: - N-D tensor with shape `[batch_size, timesteps, ...]` or - `[timesteps, batch_size, ...]` when time_major is True. - - Output shape: - - If `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `[batch_size, state_size]`, where `state_size` could - be a high dimension tensor shape. - - If `return_sequences`: N-D tensor with shape - `[batch_size, timesteps, output_size]`, where `output_size` could - be a high dimension tensor shape, or - `[timesteps, batch_size, output_size]` when `time_major` is True. - - Else, N-D tensor with shape `[batch_size, output_size]`, where - `output_size` could be a high dimension tensor shape. - - Masking: - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [tf.keras.layers.Embedding] layer with the `mask_zero` parameter - set to `True`. - - Note on using statefulness in RNNs: - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - - To enable statefulness: - - Specify `stateful=True` in the layer constructor. - - Specify a fixed batch size for your model, by passing - If sequential model: - `batch_input_shape=(...)` to the first layer in your model. - Else for functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - Specify `shuffle=False` when calling `fit()`. - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - Note on specifying the initial state of RNNs: - You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. The value of - `initial_state` should be a tensor or list of tensors representing - the initial state of the RNN layer. - - You can specify the initial state of RNN layers numerically by - calling `reset_states` with the keyword argument `states`. The value of - `states` should be a numpy array or list of numpy arrays representing - the initial state of the RNN layer. - - Note on passing external constants to RNNs: - You can pass "external" constants to the cell using the `constants` - keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This - requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time), - a.k.a. an attention mechanism. - - Examples: - - ```python - # First, let's define a RNN Cell, as a layer subclass. - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = backend.dot(inputs, self.kernel) - output = h + backend.dot(prev_output, self.recurrent_kernel) - return output, [output] - - # Let's use this cell in a RNN layer: - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = RNN(cell) - y = layer(x) - - # Here's how to use the cell to build a stacked RNN: - - cells = [MinimalRNNCell(32), MinimalRNNCell(64)] - x = keras.Input((None, 5)) - layer = RNN(cells) - y = layer(x) - ``` - """ - - def __init__(self, - cell, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - time_major=False, - **kwargs): - if isinstance(cell, (list, tuple)): - cell = StackedRNNCells(cell) - if 'call' not in dir(cell): - raise ValueError('Argument `cell` should have a `call` method. ' - f'The RNN was passed: cell={cell}') - if 'state_size' not in dir(cell): - raise ValueError('The RNN cell should have a `state_size` attribute ' - '(tuple of integers, one integer per RNN state). ' - f'Received: cell={cell}') - # If True, the output for masked timestep will be zeros, whereas in the - # False case, output from previous timestep is returned for masked timestep. - self.zero_output_for_mask = kwargs.pop('zero_output_for_mask', False) - - if 'input_shape' not in kwargs and ( - 'input_dim' in kwargs or 'input_length' in kwargs): - input_shape = (kwargs.pop('input_length', None), - kwargs.pop('input_dim', None)) - kwargs['input_shape'] = input_shape - - super().__init__(**kwargs) - self.cell = cell - self.return_sequences = return_sequences - self.return_state = return_state - self.go_backwards = go_backwards - self.stateful = stateful - self.unroll = unroll - self.time_major = time_major - - self.supports_masking = True - # The input shape is unknown yet, it could have nested tensor inputs, and - # the input spec will be the list of specs for nested inputs, the structure - # of the input_spec will be the same as the input. - self.input_spec = None - self.state_spec = None - self._states = None - self.constants_spec = None - self._num_constants = 0 - - if stateful: - if tf.distribute.has_strategy(): - raise ValueError('Stateful RNNs (created with `stateful=True`) ' - 'are not yet supported with tf.distribute.Strategy.') - - @property - def _use_input_spec_as_call_signature(self): - if self.unroll: - # When the RNN layer is unrolled, the time step shape cannot be unknown. - # The input spec does not define the time step (because this layer can be - # called with any time step value, as long as it is not None), so it - # cannot be used as the call function signature when saving to SavedModel. - return False - return super()._use_input_spec_as_call_signature - - @property - def states(self): - if self._states is None: - state = tf.nest.map_structure(lambda _: None, self.cell.state_size) - return state if tf.nest.is_nested(self.cell.state_size) else [state] - return self._states - - @states.setter - # Automatic tracking catches "self._states" which adds an extra weight and - # breaks HDF5 checkpoints. - @tf.__internal__.tracking.no_automatic_dependency_tracking - def states(self, states): - self._states = states - - def compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - # Check whether the input shape contains any nested shapes. It could be - # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy - # inputs. - try: - input_shape = tf.TensorShape(input_shape) - except (ValueError, TypeError): - # A nested tensor input - input_shape = tf.nest.flatten(input_shape)[0] - - batch = input_shape[0] - time_step = input_shape[1] - if self.time_major: - batch, time_step = time_step, batch - - if rnn_utils.is_multiple_state(self.cell.state_size): - state_size = self.cell.state_size - else: - state_size = [self.cell.state_size] - - def _get_output_shape(flat_output_size): - output_dim = tf.TensorShape(flat_output_size).as_list() - if self.return_sequences: + """Base class for recurrent layers. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + Args: + cell: A RNN cell instance or a list of RNN cell instances. + A RNN cell is a class that has: + - A `call(input_at_t, states_at_t)` method, returning + `(output_at_t, states_at_t_plus_1)`. The call method of the + cell can also take the optional argument `constants`, see + section "Note on passing external constants" below. + - A `state_size` attribute. This can be a single integer + (single state) in which case it is the size of the recurrent + state. This can also be a list/tuple of integers (one size per state). + The `state_size` can also be TensorShape or tuple/list of + TensorShape, to represent high dimension state. + - A `output_size` attribute. This can be a single integer or a + TensorShape, which represent the shape of the output. For backward + compatible reason, if this attribute is not available for the + cell, the value will be inferred by the first element of the + `state_size`. + - A `get_initial_state(inputs=None, batch_size=None, dtype=None)` + method that creates a tensor meant to be fed to `call()` as the + initial state, if the user didn't specify any initial state via other + means. The returned initial state should have a shape of + [batch_size, cell.state_size]. The cell might choose to create a + tensor full of zeros, or full of other values based on the cell's + implementation. + `inputs` is the input tensor to the RNN layer, which should + contain the batch size as its shape[0], and also dtype. Note that + the shape[0] might be `None` during the graph construction. Either + the `inputs` or the pair of `batch_size` and `dtype` are provided. + `batch_size` is a scalar tensor that represents the batch size + of the inputs. `dtype` is `tf.DType` that represents the dtype of + the inputs. + For backward compatibility, if this method is not implemented + by the cell, the RNN layer will create a zero filled tensor with the + size of [batch_size, cell.state_size]. + In the case that `cell` is a list of RNN cell instances, the cells + will be stacked on top of each other in the RNN, resulting in an + efficient stacked RNN. + return_sequences: Boolean (default `False`). Whether to return the last + output in the output sequence, or the full sequence. + return_state: Boolean (default `False`). Whether to return the last state + in addition to the output. + go_backwards: Boolean (default `False`). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default `False`). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default `False`). + If True, the network will be unrolled, else a symbolic loop will be + used. Unrolling can speed-up a RNN, although it tends to be more + memory-intensive. Unrolling is only suitable for short sequences. + time_major: The shape format of the `inputs` and `outputs` tensors. + If True, the inputs and outputs will be in shape + `(timesteps, batch, ...)`, whereas in the False case, it will be + `(batch, timesteps, ...)`. Using `time_major = True` is a bit more + efficient because it avoids transposes at the beginning and end of the + RNN calculation. However, most TensorFlow data is batch-major, so by + default this function accepts input and emits output in batch-major + form. + zero_output_for_mask: Boolean (default `False`). + Whether the output should use zeros for the masked timesteps. Note that + this field is only used when `return_sequences` is True and mask is + provided. It can useful if you want to reuse the raw output sequence of + the RNN without interference from the masked timesteps, eg, merging + bidirectional RNNs. + + Call arguments: + inputs: Input tensor. + mask: Binary tensor of shape `[batch_size, timesteps]` indicating whether + a given timestep should be masked. An individual `True` entry indicates + that the corresponding timestep should be utilized, while a `False` + entry indicates that the corresponding timestep should be ignored. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is for use with cells that use dropout. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + constants: List of constant tensors to be passed to the cell at each + timestep. + + Input shape: + N-D tensor with shape `[batch_size, timesteps, ...]` or + `[timesteps, batch_size, ...]` when time_major is True. + + Output shape: + - If `return_state`: a list of tensors. The first tensor is + the output. The remaining tensors are the last states, + each with shape `[batch_size, state_size]`, where `state_size` could + be a high dimension tensor shape. + - If `return_sequences`: N-D tensor with shape + `[batch_size, timesteps, output_size]`, where `output_size` could + be a high dimension tensor shape, or + `[timesteps, batch_size, output_size]` when `time_major` is True. + - Else, N-D tensor with shape `[batch_size, output_size]`, where + `output_size` could be a high dimension tensor shape. + + Masking: + This layer supports masking for input data with a variable number + of timesteps. To introduce masks to your data, + use an [tf.keras.layers.Embedding] layer with the `mask_zero` parameter + set to `True`. + + Note on using statefulness in RNNs: + You can set RNN layers to be 'stateful', which means that the states + computed for the samples in one batch will be reused as initial states + for the samples in the next batch. This assumes a one-to-one mapping + between samples in different successive batches. + + To enable statefulness: + - Specify `stateful=True` in the layer constructor. + - Specify a fixed batch size for your model, by passing + If sequential model: + `batch_input_shape=(...)` to the first layer in your model. + Else for functional model with 1 or more Input layers: + `batch_shape=(...)` to all the first layers in your model. + This is the expected shape of your inputs + *including the batch size*. + It should be a tuple of integers, e.g. `(32, 10, 100)`. + - Specify `shuffle=False` when calling `fit()`. + + To reset the states of your model, call `.reset_states()` on either + a specific layer, or on your entire model. + + Note on specifying the initial state of RNNs: + You can specify the initial state of RNN layers symbolically by + calling them with the keyword argument `initial_state`. The value of + `initial_state` should be a tensor or list of tensors representing + the initial state of the RNN layer. + + You can specify the initial state of RNN layers numerically by + calling `reset_states` with the keyword argument `states`. The value of + `states` should be a numpy array or list of numpy arrays representing + the initial state of the RNN layer. + + Note on passing external constants to RNNs: + You can pass "external" constants to the cell using the `constants` + keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This + requires that the `cell.call` method accepts the same keyword argument + `constants`. Such constants can be used to condition the cell + transformation on additional static inputs (not changing over time), + a.k.a. an attention mechanism. + + Examples: + + ```python + from keras.layers import RNN + from keras import backend + + # First, let's define a RNN Cell, as a layer subclass. + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = backend.dot(inputs, self.kernel) + output = h + backend.dot(prev_output, self.recurrent_kernel) + return output, [output] + + # Let's use this cell in a RNN layer: + + cell = MinimalRNNCell(32) + x = keras.Input((None, 5)) + layer = RNN(cell) + y = layer(x) + + # Here's how to use the cell to build a stacked RNN: + + cells = [MinimalRNNCell(32), MinimalRNNCell(64)] + x = keras.Input((None, 5)) + layer = RNN(cells) + y = layer(x) + ``` + """ + + def __init__( + self, + cell, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + time_major=False, + **kwargs, + ): + if isinstance(cell, (list, tuple)): + cell = StackedRNNCells(cell) + if "call" not in dir(cell): + raise ValueError( + "Argument `cell` should have a `call` method. " + f"The RNN was passed: cell={cell}" + ) + if "state_size" not in dir(cell): + raise ValueError( + "The RNN cell should have a `state_size` attribute " + "(tuple of integers, one integer per RNN state). " + f"Received: cell={cell}" + ) + # If True, the output for masked timestep will be zeros, whereas in the + # False case, output from previous timestep is returned for masked + # timestep. + self.zero_output_for_mask = kwargs.pop("zero_output_for_mask", False) + + if "input_shape" not in kwargs and ( + "input_dim" in kwargs or "input_length" in kwargs + ): + input_shape = ( + kwargs.pop("input_length", None), + kwargs.pop("input_dim", None), + ) + kwargs["input_shape"] = input_shape + + super().__init__(**kwargs) + self.cell = cell + self.return_sequences = return_sequences + self.return_state = return_state + self.go_backwards = go_backwards + self.stateful = stateful + self.unroll = unroll + self.time_major = time_major + + self.supports_masking = True + # The input shape is unknown yet, it could have nested tensor inputs, + # and the input spec will be the list of specs for nested inputs, the + # structure of the input_spec will be the same as the input. + self.input_spec = None + self.state_spec = None + self._states = None + self.constants_spec = None + self._num_constants = 0 + + if stateful: + if tf.distribute.has_strategy(): + raise ValueError( + "Stateful RNNs (created with `stateful=True`) " + "are not yet supported with tf.distribute.Strategy." + ) + + @property + def _use_input_spec_as_call_signature(self): + if self.unroll: + # When the RNN layer is unrolled, the time step shape cannot be + # unknown. The input spec does not define the time step (because + # this layer can be called with any time step value, as long as it + # is not None), so it cannot be used as the call function signature + # when saving to SavedModel. + return False + return super()._use_input_spec_as_call_signature + + @property + def states(self): + if self._states is None: + state = tf.nest.map_structure(lambda _: None, self.cell.state_size) + return state if tf.nest.is_nested(self.cell.state_size) else [state] + return self._states + + @states.setter + # Automatic tracking catches "self._states" which adds an extra weight and + # breaks HDF5 checkpoints. + @tf.__internal__.tracking.no_automatic_dependency_tracking + def states(self, states): + self._states = states + + def compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + # Check whether the input shape contains any nested shapes. It could be + # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from + # numpy inputs. + try: + input_shape = tf.TensorShape(input_shape) + except (ValueError, TypeError): + # A nested tensor input + input_shape = tf.nest.flatten(input_shape)[0] + + batch = input_shape[0] + time_step = input_shape[1] if self.time_major: - output_shape = tf.TensorShape( - [time_step, batch] + output_dim) + batch, time_step = time_step, batch + + if rnn_utils.is_multiple_state(self.cell.state_size): + state_size = self.cell.state_size + else: + state_size = [self.cell.state_size] + + def _get_output_shape(flat_output_size): + output_dim = tf.TensorShape(flat_output_size).as_list() + if self.return_sequences: + if self.time_major: + output_shape = tf.TensorShape( + [time_step, batch] + output_dim + ) + else: + output_shape = tf.TensorShape( + [batch, time_step] + output_dim + ) + else: + output_shape = tf.TensorShape([batch] + output_dim) + return output_shape + + if getattr(self.cell, "output_size", None) is not None: + # cell.output_size could be nested structure. + output_shape = tf.nest.flatten( + tf.nest.map_structure(_get_output_shape, self.cell.output_size) + ) + output_shape = ( + output_shape[0] if len(output_shape) == 1 else output_shape + ) else: - output_shape = tf.TensorShape( - [batch, time_step] + output_dim) - else: - output_shape = tf.TensorShape([batch] + output_dim) - return output_shape - - if getattr(self.cell, 'output_size', None) is not None: - # cell.output_size could be nested structure. - output_shape = tf.nest.flatten(tf.nest.map_structure( - _get_output_shape, self.cell.output_size)) - output_shape = output_shape[0] if len(output_shape) == 1 else output_shape - else: - # Note that state_size[0] could be a tensor_shape or int. - output_shape = _get_output_shape(state_size[0]) - - if self.return_state: - def _get_state_shape(flat_state): - state_shape = [batch] + tf.TensorShape(flat_state).as_list() - return tf.TensorShape(state_shape) - state_shape = tf.nest.map_structure(_get_state_shape, state_size) - return generic_utils.to_list(output_shape) + tf.nest.flatten(state_shape) - else: - return output_shape - - def compute_mask(self, inputs, mask): - # Time step masks must be the same for each input. - # This is because the mask for an RNN is of size [batch, time_steps, 1], - # and specifies which time steps should be skipped, and a time step - # must be skipped for all inputs. - # TODO(scottzhu): Should we accept multiple different masks? - mask = tf.nest.flatten(mask)[0] - output_mask = mask if self.return_sequences else None - if self.return_state: - state_mask = [None for _ in self.states] - return [output_mask] + state_mask - else: - return output_mask - - def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - # The input_shape here could be a nest structure. - - # do the tensor_shape to shapes here. The input could be single tensor, or a - # nested structure of tensors. - def get_input_spec(shape): - """Convert input shape to InputSpec.""" - if isinstance(shape, tf.TensorShape): - input_spec_shape = shape.as_list() - else: - input_spec_shape = list(shape) - batch_index, time_step_index = (1, 0) if self.time_major else (0, 1) - if not self.stateful: - input_spec_shape[batch_index] = None - input_spec_shape[time_step_index] = None - return InputSpec(shape=tuple(input_spec_shape)) - - def get_step_input_shape(shape): - if isinstance(shape, tf.TensorShape): - shape = tuple(shape.as_list()) - # remove the timestep from the input_shape - return shape[1:] if self.time_major else (shape[0],) + shape[2:] - - def get_state_spec(shape): - state_spec_shape = tf.TensorShape(shape).as_list() - # append batch dim - state_spec_shape = [None] + state_spec_shape - return InputSpec(shape=tuple(state_spec_shape)) - - # Check whether the input shape contains any nested shapes. It could be - # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy - # inputs. - try: - input_shape = tf.TensorShape(input_shape) - except (ValueError, TypeError): - # A nested tensor input - pass - - if not tf.nest.is_nested(input_shape): - # This indicates the there is only one input. - if self.input_spec is not None: - self.input_spec[0] = get_input_spec(input_shape) - else: - self.input_spec = [get_input_spec(input_shape)] - step_input_shape = get_step_input_shape(input_shape) - else: - if self.input_spec is not None: - self.input_spec[0] = tf.nest.map_structure(get_input_spec, input_shape) - else: - self.input_spec = generic_utils.to_list( - tf.nest.map_structure(get_input_spec, input_shape)) - step_input_shape = tf.nest.map_structure(get_step_input_shape, - input_shape) - - # allow cell (if layer) to build before we set or validate state_spec. - if isinstance(self.cell, base_layer.Layer) and not self.cell.built: - with backend.name_scope(self.cell.name): - self.cell.build(step_input_shape) - self.cell.built = True - - # set or validate state_spec - if rnn_utils.is_multiple_state(self.cell.state_size): - state_size = list(self.cell.state_size) - else: - state_size = [self.cell.state_size] - - if self.state_spec is not None: - # initial_state was passed in call, check compatibility - self._validate_state_spec(state_size, self.state_spec) - else: - if tf.nest.is_nested(state_size): - self.state_spec = tf.nest.map_structure(get_state_spec, state_size) - else: - self.state_spec = [ - InputSpec(shape=[None] + tf.TensorShape(dim).as_list()) - for dim in state_size - ] - # ensure the generated state_spec is correct. - self._validate_state_spec(state_size, self.state_spec) - if self.stateful: - self.reset_states() - self.built = True - - @staticmethod - def _validate_state_spec(cell_state_sizes, init_state_specs): - """Validate the state spec between the initial_state and the state_size. + # Note that state_size[0] could be a tensor_shape or int. + output_shape = _get_output_shape(state_size[0]) - Args: - cell_state_sizes: list, the `state_size` attribute from the cell. - init_state_specs: list, the `state_spec` from the initial_state that is - passed in `call()`. + if self.return_state: - Raises: - ValueError: When initial state spec is not compatible with the state size. - """ - validation_error = ValueError( - 'An `initial_state` was passed that is not compatible with ' - '`cell.state_size`. Received `state_spec`={}; ' - 'however `cell.state_size` is ' - '{}'.format(init_state_specs, cell_state_sizes)) - flat_cell_state_sizes = tf.nest.flatten(cell_state_sizes) - flat_state_specs = tf.nest.flatten(init_state_specs) - - if len(flat_cell_state_sizes) != len(flat_state_specs): - raise validation_error - for cell_state_spec, cell_state_size in zip(flat_state_specs, - flat_cell_state_sizes): - if not tf.TensorShape( - # Ignore the first axis for init_state which is for batch - cell_state_spec.shape[1:]).is_compatible_with( - tf.TensorShape(cell_state_size)): - raise validation_error - - @doc_controls.do_not_doc_inheritable - def get_initial_state(self, inputs): - get_initial_state_fn = getattr(self.cell, 'get_initial_state', None) - - if tf.nest.is_nested(inputs): - # The input are nested sequences. Use the first element in the seq to get - # batch size and dtype. - inputs = tf.nest.flatten(inputs)[0] - - input_shape = tf.shape(inputs) - batch_size = input_shape[1] if self.time_major else input_shape[0] - dtype = inputs.dtype - if get_initial_state_fn: - init_state = get_initial_state_fn( - inputs=None, batch_size=batch_size, dtype=dtype) - else: - init_state = rnn_utils.generate_zero_filled_state( - batch_size, self.cell.state_size, dtype) - # Keras RNN expect the states in a list, even if it's a single state tensor. - if not tf.nest.is_nested(init_state): - init_state = [init_state] - # Force the state to be a list in case it is a namedtuple eg LSTMStateTuple. - return list(init_state) - - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - inputs, initial_state, constants = rnn_utils.standardize_args( - inputs, initial_state, constants, self._num_constants) - - if initial_state is None and constants is None: - return super().__call__(inputs, **kwargs) - - # If any of `initial_state` or `constants` are specified and are Keras - # tensors, then add them to the inputs and temporarily modify the - # input_spec to include them. - - additional_inputs = [] - additional_specs = [] - if initial_state is not None: - additional_inputs += initial_state - self.state_spec = tf.nest.map_structure( - lambda s: InputSpec(shape=backend.int_shape(s)), initial_state) - additional_specs += self.state_spec - if constants is not None: - additional_inputs += constants - self.constants_spec = [ - InputSpec(shape=backend.int_shape(constant)) for constant in constants - ] - self._num_constants = len(constants) - additional_specs += self.constants_spec - # additional_inputs can be empty if initial_state or constants are provided - # but empty (e.g. the cell is stateless). - flat_additional_inputs = tf.nest.flatten(additional_inputs) - is_keras_tensor = backend.is_keras_tensor( - flat_additional_inputs[0]) if flat_additional_inputs else True - for tensor in flat_additional_inputs: - if backend.is_keras_tensor(tensor) != is_keras_tensor: - raise ValueError( - 'The initial state or constants of an RNN layer cannot be ' - 'specified via a mix of Keras tensors and non-Keras tensors ' - '(a "Keras tensor" is a tensor that was returned by a Keras layer ' - ' or by `Input` during Functional model construction). ' - f'Received: initial_state={initial_state}, constants={constants}') - - if is_keras_tensor: - # Compute the full input spec, including state and constants - full_input = [inputs] + additional_inputs - if self.built: - # Keep the input_spec since it has been populated in build() method. - full_input_spec = self.input_spec + additional_specs - else: - # The original input_spec is None since there could be a nested tensor - # input. Update the input_spec to match the inputs. - full_input_spec = generic_utils.to_list( - tf.nest.map_structure(lambda _: None, inputs)) + additional_specs - # Perform the call with temporarily replaced input_spec - self.input_spec = full_input_spec - output = super().__call__(full_input, **kwargs) - # Remove the additional_specs from input spec and keep the rest. It is - # important to keep since the input spec was populated by build(), and - # will be reused in the stateful=True. - self.input_spec = self.input_spec[:-len(additional_specs)] - return output - else: - if initial_state is not None: - kwargs['initial_state'] = initial_state - if constants is not None: - kwargs['constants'] = constants - return super().__call__(inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): - # The input should be dense, padded with zeros. If a ragged input is fed - # into the layer, it is padded and the row lengths are used for masking. - inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) - is_ragged_input = (row_lengths is not None) - self._validate_args_if_ragged(is_ragged_input, mask) - - inputs, initial_state, constants = self._process_inputs( - inputs, initial_state, constants) - - self._maybe_reset_cell_dropout_mask(self.cell) - if isinstance(self.cell, StackedRNNCells): - for cell in self.cell.cells: - self._maybe_reset_cell_dropout_mask(cell) - - if mask is not None: - # Time step masks must be the same for each input. - # TODO(scottzhu): Should we accept multiple different masks? - mask = tf.nest.flatten(mask)[0] - - if tf.nest.is_nested(inputs): - # In the case of nested input, use the first element for shape check. - input_shape = backend.int_shape(tf.nest.flatten(inputs)[0]) - else: - input_shape = backend.int_shape(inputs) - timesteps = input_shape[0] if self.time_major else input_shape[1] - if self.unroll and timesteps is None: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - - kwargs = {} - if generic_utils.has_arg(self.cell.call, 'training'): - kwargs['training'] = training - - # TF RNN cells expect single tensor as state instead of list wrapped tensor. - is_tf_rnn_cell = getattr(self.cell, '_is_tf_rnn_cell', None) is not None - # Use the __call__ function for callable objects, eg layers, so that it - # will have the proper name scopes for the ops, etc. - cell_call_fn = self.cell.__call__ if callable(self.cell) else self.cell.call - if constants: - if not generic_utils.has_arg(self.cell.call, 'constants'): - raise ValueError( - f'RNN cell {self.cell} does not support constants. ' - f'Received: constants={constants}') - - def step(inputs, states): - constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type - states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type - - states = states[0] if len(states) == 1 and is_tf_rnn_cell else states - output, new_states = cell_call_fn( - inputs, states, constants=constants, **kwargs) - if not tf.nest.is_nested(new_states): - new_states = [new_states] - return output, new_states - else: - - def step(inputs, states): - states = states[0] if len(states) == 1 and is_tf_rnn_cell else states - output, new_states = cell_call_fn(inputs, states, **kwargs) - if not tf.nest.is_nested(new_states): - new_states = [new_states] - return output, new_states - last_output, outputs, states = backend.rnn( - step, + def _get_state_shape(flat_state): + state_shape = [batch] + tf.TensorShape(flat_state).as_list() + return tf.TensorShape(state_shape) + + state_shape = tf.nest.map_structure(_get_state_shape, state_size) + return generic_utils.to_list(output_shape) + tf.nest.flatten( + state_shape + ) + else: + return output_shape + + def compute_mask(self, inputs, mask): + # Time step masks must be the same for each input. + # This is because the mask for an RNN is of size [batch, time_steps, 1], + # and specifies which time steps should be skipped, and a time step + # must be skipped for all inputs. + # TODO(scottzhu): Should we accept multiple different masks? + mask = tf.nest.flatten(mask)[0] + output_mask = mask if self.return_sequences else None + if self.return_state: + state_mask = [None for _ in self.states] + return [output_mask] + state_mask + else: + return output_mask + + def build(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + # The input_shape here could be a nest structure. + + # do the tensor_shape to shapes here. The input could be single tensor, + # or a nested structure of tensors. + def get_input_spec(shape): + """Convert input shape to InputSpec.""" + if isinstance(shape, tf.TensorShape): + input_spec_shape = shape.as_list() + else: + input_spec_shape = list(shape) + batch_index, time_step_index = (1, 0) if self.time_major else (0, 1) + if not self.stateful: + input_spec_shape[batch_index] = None + input_spec_shape[time_step_index] = None + return InputSpec(shape=tuple(input_spec_shape)) + + def get_step_input_shape(shape): + if isinstance(shape, tf.TensorShape): + shape = tuple(shape.as_list()) + # remove the timestep from the input_shape + return shape[1:] if self.time_major else (shape[0],) + shape[2:] + + def get_state_spec(shape): + state_spec_shape = tf.TensorShape(shape).as_list() + # append batch dim + state_spec_shape = [None] + state_spec_shape + return InputSpec(shape=tuple(state_spec_shape)) + + # Check whether the input shape contains any nested shapes. It could be + # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from + # numpy inputs. + try: + input_shape = tf.TensorShape(input_shape) + except (ValueError, TypeError): + # A nested tensor input + pass + + if not tf.nest.is_nested(input_shape): + # This indicates the there is only one input. + if self.input_spec is not None: + self.input_spec[0] = get_input_spec(input_shape) + else: + self.input_spec = [get_input_spec(input_shape)] + step_input_shape = get_step_input_shape(input_shape) + else: + if self.input_spec is not None: + self.input_spec[0] = tf.nest.map_structure( + get_input_spec, input_shape + ) + else: + self.input_spec = generic_utils.to_list( + tf.nest.map_structure(get_input_spec, input_shape) + ) + step_input_shape = tf.nest.map_structure( + get_step_input_shape, input_shape + ) + + # allow cell (if layer) to build before we set or validate state_spec. + if isinstance(self.cell, base_layer.Layer) and not self.cell.built: + with backend.name_scope(self.cell.name): + self.cell.build(step_input_shape) + self.cell.built = True + + # set or validate state_spec + if rnn_utils.is_multiple_state(self.cell.state_size): + state_size = list(self.cell.state_size) + else: + state_size = [self.cell.state_size] + + if self.state_spec is not None: + # initial_state was passed in call, check compatibility + self._validate_state_spec(state_size, self.state_spec) + else: + if tf.nest.is_nested(state_size): + self.state_spec = tf.nest.map_structure( + get_state_spec, state_size + ) + else: + self.state_spec = [ + InputSpec(shape=[None] + tf.TensorShape(dim).as_list()) + for dim in state_size + ] + # ensure the generated state_spec is correct. + self._validate_state_spec(state_size, self.state_spec) + if self.stateful: + self.reset_states() + super().build(input_shape) + + @staticmethod + def _validate_state_spec(cell_state_sizes, init_state_specs): + """Validate the state spec between the initial_state and the state_size. + + Args: + cell_state_sizes: list, the `state_size` attribute from the cell. + init_state_specs: list, the `state_spec` from the initial_state that + is passed in `call()`. + + Raises: + ValueError: When initial state spec is not compatible with the state + size. + """ + validation_error = ValueError( + "An `initial_state` was passed that is not compatible with " + "`cell.state_size`. Received `state_spec`={}; " + "however `cell.state_size` is " + "{}".format(init_state_specs, cell_state_sizes) + ) + flat_cell_state_sizes = tf.nest.flatten(cell_state_sizes) + flat_state_specs = tf.nest.flatten(init_state_specs) + + if len(flat_cell_state_sizes) != len(flat_state_specs): + raise validation_error + for cell_state_spec, cell_state_size in zip( + flat_state_specs, flat_cell_state_sizes + ): + if not tf.TensorShape( + # Ignore the first axis for init_state which is for batch + cell_state_spec.shape[1:] + ).is_compatible_with(tf.TensorShape(cell_state_size)): + raise validation_error + + @doc_controls.do_not_doc_inheritable + def get_initial_state(self, inputs): + get_initial_state_fn = getattr(self.cell, "get_initial_state", None) + + if tf.nest.is_nested(inputs): + # The input are nested sequences. Use the first element in the seq + # to get batch size and dtype. + inputs = tf.nest.flatten(inputs)[0] + + input_shape = tf.shape(inputs) + batch_size = input_shape[1] if self.time_major else input_shape[0] + dtype = inputs.dtype + if get_initial_state_fn: + init_state = get_initial_state_fn( + inputs=None, batch_size=batch_size, dtype=dtype + ) + else: + init_state = rnn_utils.generate_zero_filled_state( + batch_size, self.cell.state_size, dtype + ) + # Keras RNN expect the states in a list, even if it's a single state + # tensor. + if not tf.nest.is_nested(init_state): + init_state = [init_state] + # Force the state to be a list in case it is a namedtuple eg + # LSTMStateTuple. + return list(init_state) + + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + inputs, initial_state, constants = rnn_utils.standardize_args( + inputs, initial_state, constants, self._num_constants + ) + + if initial_state is None and constants is None: + return super().__call__(inputs, **kwargs) + + # If any of `initial_state` or `constants` are specified and are Keras + # tensors, then add them to the inputs and temporarily modify the + # input_spec to include them. + + additional_inputs = [] + additional_specs = [] + if initial_state is not None: + additional_inputs += initial_state + self.state_spec = tf.nest.map_structure( + lambda s: InputSpec(shape=backend.int_shape(s)), initial_state + ) + additional_specs += self.state_spec + if constants is not None: + additional_inputs += constants + self.constants_spec = [ + InputSpec(shape=backend.int_shape(constant)) + for constant in constants + ] + self._num_constants = len(constants) + additional_specs += self.constants_spec + # additional_inputs can be empty if initial_state or constants are + # provided but empty (e.g. the cell is stateless). + flat_additional_inputs = tf.nest.flatten(additional_inputs) + is_keras_tensor = ( + backend.is_keras_tensor(flat_additional_inputs[0]) + if flat_additional_inputs + else True + ) + for tensor in flat_additional_inputs: + if backend.is_keras_tensor(tensor) != is_keras_tensor: + raise ValueError( + "The initial state or constants of an RNN layer cannot be " + "specified via a mix of Keras tensors and non-Keras " + 'tensors (a "Keras tensor" is a tensor that was returned ' + "by a Keras layer or by `Input` during Functional " + "model construction). Received: " + f"initial_state={initial_state}, constants={constants}" + ) + + if is_keras_tensor: + # Compute the full input spec, including state and constants + full_input = [inputs] + additional_inputs + if self.built: + # Keep the input_spec since it has been populated in build() + # method. + full_input_spec = self.input_spec + additional_specs + else: + # The original input_spec is None since there could be a nested + # tensor input. Update the input_spec to match the inputs. + full_input_spec = ( + generic_utils.to_list( + tf.nest.map_structure(lambda _: None, inputs) + ) + + additional_specs + ) + # Perform the call with temporarily replaced input_spec + self.input_spec = full_input_spec + output = super().__call__(full_input, **kwargs) + # Remove the additional_specs from input spec and keep the rest. It + # is important to keep since the input spec was populated by + # build(), and will be reused in the stateful=True. + self.input_spec = self.input_spec[: -len(additional_specs)] + return output + else: + if initial_state is not None: + kwargs["initial_state"] = initial_state + if constants is not None: + kwargs["constants"] = constants + return super().__call__(inputs, **kwargs) + + def call( + self, inputs, - initial_state, - constants=constants, - go_backwards=self.go_backwards, - mask=mask, - unroll=self.unroll, - input_length=row_lengths if row_lengths is not None else timesteps, - time_major=self.time_major, - zero_output_for_mask=self.zero_output_for_mask, - return_all_outputs=self.return_sequences) - - if self.stateful: - updates = [ - tf.compat.v1.assign(self_state, tf.cast(state, self_state.dtype)) - for self_state, state in zip( - tf.nest.flatten(self.states), tf.nest.flatten(states)) - ] - self.add_update(updates) - - if self.return_sequences: - output = backend.maybe_convert_to_ragged( - is_ragged_input, outputs, row_lengths, go_backwards=self.go_backwards) - else: - output = last_output - - if self.return_state: - if not isinstance(states, (list, tuple)): - states = [states] - else: - states = list(states) - return generic_utils.to_list(output) + states - else: - return output - - def _process_inputs(self, inputs, initial_state, constants): - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if (isinstance(inputs, collections.abc.Sequence) - and not isinstance(inputs, tuple)): - # get initial_state from full input spec - # as they could be copied to multiple GPU. - if not self._num_constants: - initial_state = inputs[1:] - else: - initial_state = inputs[1:-self._num_constants] - constants = inputs[-self._num_constants:] - if len(initial_state) == 0: - initial_state = None - inputs = inputs[0] - - if self.stateful: - if initial_state is not None: - # When layer is stateful and initial_state is provided, check if the - # recorded state is same as the default value (zeros). Use the recorded - # state if it is not same as the default. - non_zero_count = tf.add_n([tf.math.count_nonzero(s) - for s in tf.nest.flatten(self.states)]) - # Set strict = True to keep the original structure of the state. - initial_state = tf.compat.v1.cond(non_zero_count > 0, - true_fn=lambda: self.states, - false_fn=lambda: initial_state, - strict=True) - else: - initial_state = self.states - initial_state = tf.nest.map_structure( - # When the layer has a inferred dtype, use the dtype from the cell. - lambda v: tf.cast(v, self.compute_dtype or self.cell.compute_dtype), - initial_state - ) - elif initial_state is None: - initial_state = self.get_initial_state(inputs) - - if len(initial_state) != len(self.states): - raise ValueError(f'Layer has {len(self.states)} ' - f'states but was passed {len(initial_state)} initial ' - f'states. Received: initial_state={initial_state}') - return inputs, initial_state, constants - - def _validate_args_if_ragged(self, is_ragged_input, mask): - if not is_ragged_input: - return - - if mask is not None: - raise ValueError(f'The mask that was passed in was {mask}, which ' - 'cannot be applied to RaggedTensor inputs. Please ' - 'make sure that there is no mask injected by upstream ' - 'layers.') - if self.unroll: - raise ValueError('The input received contains RaggedTensors and does ' - 'not support unrolling. Disable unrolling by passing ' - '`unroll=False` in the RNN Layer constructor.') - - def _maybe_reset_cell_dropout_mask(self, cell): - if isinstance(cell, DropoutRNNCellMixin): - cell.reset_dropout_mask() - cell.reset_recurrent_dropout_mask() - - def reset_states(self, states=None): - """Reset the recorded states for the stateful RNN layer. - - Can only be used when RNN layer is constructed with `stateful` = `True`. - Args: - states: Numpy arrays that contains the value for the initial state, which - will be feed to cell at the first time step. When the value is None, - zero filled numpy array will be created based on the cell state size. - - Raises: - AttributeError: When the RNN layer is not stateful. - ValueError: When the batch size of the RNN layer is unknown. - ValueError: When the input numpy array is not compatible with the RNN - layer state, either size wise or dtype wise. - """ - if not self.stateful: - raise AttributeError('Layer must be stateful.') - spec_shape = None - if self.input_spec is not None: - spec_shape = tf.nest.flatten(self.input_spec[0])[0].shape - if spec_shape is None: - # It is possible to have spec shape to be None, eg when construct a RNN - # with a custom cell, or standard RNN layers (LSTM/GRU) which we only know - # it has 3 dim input, but not its full shape spec before build(). - batch_size = None - else: - batch_size = spec_shape[1] if self.time_major else spec_shape[0] - if not batch_size: - raise ValueError('If a RNN is stateful, it needs to know ' - 'its batch size. Specify the batch size ' - 'of your input tensors: \n' - '- If using a Sequential model, ' - 'specify the batch size by passing ' - 'a `batch_input_shape` ' - 'argument to your first layer.\n' - '- If using the functional API, specify ' - 'the batch size by passing a ' - '`batch_shape` argument to your Input layer.') - # initialize state if None - if tf.nest.flatten(self.states)[0] is None: - if getattr(self.cell, 'get_initial_state', None): - flat_init_state_values = tf.nest.flatten(self.cell.get_initial_state( - inputs=None, batch_size=batch_size, - # Use variable_dtype instead of compute_dtype, since the state is - # stored in a variable - dtype=self.variable_dtype or backend.floatx())) - else: - flat_init_state_values = tf.nest.flatten( - rnn_utils.generate_zero_filled_state( - batch_size, self.cell.state_size, self.variable_dtype or - backend.floatx())) - flat_states_variables = tf.nest.map_structure( - backend.variable, flat_init_state_values) - self.states = tf.nest.pack_sequence_as(self.cell.state_size, - flat_states_variables) - if not tf.nest.is_nested(self.states): - self.states = [self.states] - elif states is None: - for state, size in zip(tf.nest.flatten(self.states), - tf.nest.flatten(self.cell.state_size)): - backend.set_value( - state, - np.zeros([batch_size] + tf.TensorShape(size).as_list())) - else: - flat_states = tf.nest.flatten(self.states) - flat_input_states = tf.nest.flatten(states) - if len(flat_input_states) != len(flat_states): - raise ValueError(f'Layer {self.name} expects {len(flat_states)} ' - f'states, but it received {len(flat_input_states)} ' - f'state values. States received: {states}') - set_value_tuples = [] - for i, (value, state) in enumerate(zip(flat_input_states, - flat_states)): - if value.shape != state.shape: - raise ValueError( - f'State {i} is incompatible with layer {self.name}: ' - f'expected shape={(batch_size, state)} ' - f'but found shape={value.shape}') - set_value_tuples.append((state, value)) - backend.batch_set_value(set_value_tuples) - - def get_config(self): - config = { - 'return_sequences': self.return_sequences, - 'return_state': self.return_state, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful, - 'unroll': self.unroll, - 'time_major': self.time_major - } - if self._num_constants: - config['num_constants'] = self._num_constants - if self.zero_output_for_mask: - config['zero_output_for_mask'] = self.zero_output_for_mask - - config['cell'] = generic_utils.serialize_keras_object(self.cell) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - from keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) - num_constants = config.pop('num_constants', 0) - layer = cls(cell, **config) - layer._num_constants = num_constants # pylint: disable=protected-access - return layer - - @property - def _trackable_saved_model_saver(self): - return layer_serialization.RNNSavedModelSaver(self) + mask=None, + training=None, + initial_state=None, + constants=None, + ): + # The input should be dense, padded with zeros. If a ragged input is fed + # into the layer, it is padded and the row lengths are used for masking. + inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) + is_ragged_input = row_lengths is not None + self._validate_args_if_ragged(is_ragged_input, mask) + + inputs, initial_state, constants = self._process_inputs( + inputs, initial_state, constants + ) + + self._maybe_reset_cell_dropout_mask(self.cell) + if isinstance(self.cell, StackedRNNCells): + for cell in self.cell.cells: + self._maybe_reset_cell_dropout_mask(cell) + + if mask is not None: + # Time step masks must be the same for each input. + # TODO(scottzhu): Should we accept multiple different masks? + mask = tf.nest.flatten(mask)[0] + + if tf.nest.is_nested(inputs): + # In the case of nested input, use the first element for shape + # check. + input_shape = backend.int_shape(tf.nest.flatten(inputs)[0]) + else: + input_shape = backend.int_shape(inputs) + timesteps = input_shape[0] if self.time_major else input_shape[1] + if self.unroll and timesteps is None: + raise ValueError( + "Cannot unroll a RNN if the " + "time dimension is undefined. \n" + "- If using a Sequential model, " + "specify the time dimension by passing " + "an `input_shape` or `batch_input_shape` " + "argument to your first layer. If your " + "first layer is an Embedding, you can " + "also use the `input_length` argument.\n" + "- If using the functional API, specify " + "the time dimension by passing a `shape` " + "or `batch_shape` argument to your Input layer." + ) + + kwargs = {} + if generic_utils.has_arg(self.cell.call, "training"): + kwargs["training"] = training + + # TF RNN cells expect single tensor as state instead of list wrapped + # tensor. + is_tf_rnn_cell = getattr(self.cell, "_is_tf_rnn_cell", None) is not None + # Use the __call__ function for callable objects, eg layers, so that it + # will have the proper name scopes for the ops, etc. + cell_call_fn = ( + self.cell.__call__ if callable(self.cell) else self.cell.call + ) + if constants: + if not generic_utils.has_arg(self.cell.call, "constants"): + raise ValueError( + f"RNN cell {self.cell} does not support constants. " + f"Received: constants={constants}" + ) + + def step(inputs, states): + constants = states[-self._num_constants :] + states = states[: -self._num_constants] + + states = ( + states[0] if len(states) == 1 and is_tf_rnn_cell else states + ) + output, new_states = cell_call_fn( + inputs, states, constants=constants, **kwargs + ) + if not tf.nest.is_nested(new_states): + new_states = [new_states] + return output, new_states + + else: + + def step(inputs, states): + states = ( + states[0] if len(states) == 1 and is_tf_rnn_cell else states + ) + output, new_states = cell_call_fn(inputs, states, **kwargs) + if not tf.nest.is_nested(new_states): + new_states = [new_states] + return output, new_states + + last_output, outputs, states = backend.rnn( + step, + inputs, + initial_state, + constants=constants, + go_backwards=self.go_backwards, + mask=mask, + unroll=self.unroll, + input_length=row_lengths if row_lengths is not None else timesteps, + time_major=self.time_major, + zero_output_for_mask=self.zero_output_for_mask, + return_all_outputs=self.return_sequences, + ) + + if self.stateful: + updates = [ + tf.compat.v1.assign( + self_state, tf.cast(state, self_state.dtype) + ) + for self_state, state in zip( + tf.nest.flatten(self.states), tf.nest.flatten(states) + ) + ] + self.add_update(updates) + + if self.return_sequences: + output = backend.maybe_convert_to_ragged( + is_ragged_input, + outputs, + row_lengths, + go_backwards=self.go_backwards, + ) + else: + output = last_output + + if self.return_state: + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return generic_utils.to_list(output) + states + else: + return output + + def _process_inputs(self, inputs, initial_state, constants): + # input shape: `(samples, time (padded with zeros), input_dim)` + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, collections.abc.Sequence) and not isinstance( + inputs, tuple + ): + # get initial_state from full input spec + # as they could be copied to multiple GPU. + if not self._num_constants: + initial_state = inputs[1:] + else: + initial_state = inputs[1 : -self._num_constants] + constants = inputs[-self._num_constants :] + if len(initial_state) == 0: + initial_state = None + inputs = inputs[0] + + if self.stateful: + if initial_state is not None: + # When layer is stateful and initial_state is provided, check if + # the recorded state is same as the default value (zeros). Use + # the recorded state if it is not same as the default. + non_zero_count = tf.add_n( + [ + tf.math.count_nonzero(s) + for s in tf.nest.flatten(self.states) + ] + ) + # Set strict = True to keep the original structure of the state. + initial_state = tf.compat.v1.cond( + non_zero_count > 0, + true_fn=lambda: self.states, + false_fn=lambda: initial_state, + strict=True, + ) + else: + initial_state = self.states + initial_state = tf.nest.map_structure( + # When the layer has a inferred dtype, use the dtype from the + # cell. + lambda v: tf.cast( + v, self.compute_dtype or self.cell.compute_dtype + ), + initial_state, + ) + elif initial_state is None: + initial_state = self.get_initial_state(inputs) + + if len(initial_state) != len(self.states): + raise ValueError( + f"Layer has {len(self.states)} " + f"states but was passed {len(initial_state)} initial " + f"states. Received: initial_state={initial_state}" + ) + return inputs, initial_state, constants + + def _validate_args_if_ragged(self, is_ragged_input, mask): + if not is_ragged_input: + return + + if mask is not None: + raise ValueError( + f"The mask that was passed in was {mask}, which " + "cannot be applied to RaggedTensor inputs. Please " + "make sure that there is no mask injected by upstream " + "layers." + ) + if self.unroll: + raise ValueError( + "The input received contains RaggedTensors and does " + "not support unrolling. Disable unrolling by passing " + "`unroll=False` in the RNN Layer constructor." + ) + + def _maybe_reset_cell_dropout_mask(self, cell): + if isinstance(cell, DropoutRNNCellMixin): + cell.reset_dropout_mask() + cell.reset_recurrent_dropout_mask() + + def reset_states(self, states=None): + """Reset the recorded states for the stateful RNN layer. + + Can only be used when RNN layer is constructed with `stateful` = `True`. + Args: + states: Numpy arrays that contains the value for the initial state, + which will be feed to cell at the first time step. When the value is + None, zero filled numpy array will be created based on the cell + state size. + + Raises: + AttributeError: When the RNN layer is not stateful. + ValueError: When the batch size of the RNN layer is unknown. + ValueError: When the input numpy array is not compatible with the RNN + layer state, either size wise or dtype wise. + """ + if not self.stateful: + raise AttributeError("Layer must be stateful.") + spec_shape = None + if self.input_spec is not None: + spec_shape = tf.nest.flatten(self.input_spec[0])[0].shape + if spec_shape is None: + # It is possible to have spec shape to be None, eg when construct a + # RNN with a custom cell, or standard RNN layers (LSTM/GRU) which we + # only know it has 3 dim input, but not its full shape spec before + # build(). + batch_size = None + else: + batch_size = spec_shape[1] if self.time_major else spec_shape[0] + if not batch_size: + raise ValueError( + "If a RNN is stateful, it needs to know " + "its batch size. Specify the batch size " + "of your input tensors: \n" + "- If using a Sequential model, " + "specify the batch size by passing " + "a `batch_input_shape` " + "argument to your first layer.\n" + "- If using the functional API, specify " + "the batch size by passing a " + "`batch_shape` argument to your Input layer." + ) + # initialize state if None + if tf.nest.flatten(self.states)[0] is None: + if getattr(self.cell, "get_initial_state", None): + flat_init_state_values = tf.nest.flatten( + self.cell.get_initial_state( + inputs=None, + batch_size=batch_size, + # Use variable_dtype instead of compute_dtype, since the + # state is stored in a variable + dtype=self.variable_dtype or backend.floatx(), + ) + ) + else: + flat_init_state_values = tf.nest.flatten( + rnn_utils.generate_zero_filled_state( + batch_size, + self.cell.state_size, + self.variable_dtype or backend.floatx(), + ) + ) + flat_states_variables = tf.nest.map_structure( + lambda v: backend.variable(v, v.dtype), flat_init_state_values + ) + self.states = tf.nest.pack_sequence_as( + self.cell.state_size, flat_states_variables + ) + if not tf.nest.is_nested(self.states): + self.states = [self.states] + elif states is None: + for state, size in zip( + tf.nest.flatten(self.states), + tf.nest.flatten(self.cell.state_size), + ): + backend.set_value( + state, + np.zeros([batch_size] + tf.TensorShape(size).as_list()), + ) + else: + flat_states = tf.nest.flatten(self.states) + flat_input_states = tf.nest.flatten(states) + if len(flat_input_states) != len(flat_states): + raise ValueError( + f"Layer {self.name} expects {len(flat_states)} " + f"states, but it received {len(flat_input_states)} " + f"state values. States received: {states}" + ) + set_value_tuples = [] + for i, (value, state) in enumerate( + zip(flat_input_states, flat_states) + ): + if value.shape != state.shape: + raise ValueError( + f"State {i} is incompatible with layer {self.name}: " + f"expected shape={(batch_size, state)} " + f"but found shape={value.shape}" + ) + set_value_tuples.append((state, value)) + backend.batch_set_value(set_value_tuples) + + def get_config(self): + config = { + "return_sequences": self.return_sequences, + "return_state": self.return_state, + "go_backwards": self.go_backwards, + "stateful": self.stateful, + "unroll": self.unroll, + "time_major": self.time_major, + } + if self._num_constants: + config["num_constants"] = self._num_constants + if self.zero_output_for_mask: + config["zero_output_for_mask"] = self.zero_output_for_mask + + config["cell"] = serialization_lib.serialize_keras_object(self.cell) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + from keras.layers import deserialize as deserialize_layer + + cell = deserialize_layer( + config.pop("cell"), custom_objects=custom_objects + ) + num_constants = config.pop("num_constants", 0) + layer = cls(cell, **config) + layer._num_constants = num_constants + return layer + + @property + def _trackable_saved_model_saver(self): + return layer_serialization.RNNSavedModelSaver(self) diff --git a/keras/layers/rnn/base_rnn_test.py b/keras/layers/rnn/base_rnn_test.py index a010879bb656..7b0182a15cb2 100644 --- a/keras/layers/rnn/base_rnn_test.py +++ b/keras/layers/rnn/base_rnn_test.py @@ -20,7 +20,10 @@ import collections +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.engine import base_layer_utils from keras.layers.rnn import gru @@ -29,1914 +32,2136 @@ from keras.layers.rnn import lstm_v1 from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.utils import generic_utils -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.python.training.tracking import util as trackable_util +# isort: off +from tensorflow.python.checkpoint import ( + checkpoint as trackable_util, +) # Used for nested input/output/state RNN test. -NestedInput = collections.namedtuple('NestedInput', ['t1', 't2']) -NestedState = collections.namedtuple('NestedState', ['s1', 's2']) +NestedInput = collections.namedtuple("NestedInput", ["t1", "t2"]) +NestedState = collections.namedtuple("NestedState", ["s1", "s2"]) @test_combinations.run_all_keras_modes class RNNTest(test_combinations.TestCase): + def test_minimal_rnn_cell_non_layer(self): + class MinimalRNNCell: + def __init__(self, units, input_dim): + self.units = units + self.state_size = units + self.kernel = keras.backend.variable( + np.random.random((input_dim, units)) + ) + + def call(self, inputs, states): + prev_output = states[0] + output = keras.backend.dot(inputs, self.kernel) + prev_output + return output, [output] + + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [ + MinimalRNNCell(8, 5), + MinimalRNNCell(32, 8), + MinimalRNNCell(32, 32), + ] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_minimal_rnn_cell_non_layer_multiple_states(self): + class MinimalRNNCell: + def __init__(self, units, input_dim): + self.units = units + self.state_size = (units, units) + self.kernel = keras.backend.variable( + np.random.random((input_dim, units)) + ) + + def call(self, inputs, states): + prev_output_1 = states[0] + prev_output_2 = states[1] + output = keras.backend.dot(inputs, self.kernel) + output += prev_output_1 + output -= prev_output_2 + return output, [output * 2, output * 3] + + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [ + MinimalRNNCell(8, 5), + MinimalRNNCell(16, 8), + MinimalRNNCell(32, 16), + ] + layer = keras.layers.RNN(cells) + self.assertEqual(layer.cell.state_size, ((8, 8), (16, 16), (32, 32))) + self.assertEqual(layer.cell.output_size, 32) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_minimal_rnn_cell_layer(self): + class MinimalRNNCell(keras.layers.Layer): + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super().__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="uniform", + name="kernel", + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer="uniform", + name="recurrent_kernel", + ) + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = keras.backend.dot(inputs, self.kernel) + output = h + keras.backend.dot( + prev_output, self.recurrent_kernel + ) + return output, [output] + + def get_config(self): + config = {"units": self.units} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + # Test basic case. + x = keras.Input((None, 5)) + cell = MinimalRNNCell(32) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({"MinimalRNNCell": MinimalRNNCell}): + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # Test stacking. + cells = [MinimalRNNCell(8), MinimalRNNCell(12), MinimalRNNCell(32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacked RNN serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({"MinimalRNNCell": MinimalRNNCell}): + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + def test_minimal_rnn_cell_abstract_rnn_cell(self): + class MinimalRNNCell(keras.layers.AbstractRNNCell): + def __init__(self, units, **kwargs): + self.units = units + super().__init__(**kwargs) + + @property + def state_size(self): + return self.units + + def build(self, input_shape): + self.kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="uniform", + name="kernel", + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer="uniform", + name="recurrent_kernel", + ) + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = keras.backend.dot(inputs, self.kernel) + output = h + keras.backend.dot( + prev_output, self.recurrent_kernel + ) + return output, output + + @property + def output_size(self): + return self.units + + cell = MinimalRNNCell(32) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8), MinimalRNNCell(16), MinimalRNNCell(32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_rnn_with_time_major(self): + batch = 10 + time_step = 5 + embedding_dim = 4 + units = 3 + + # Test basic case. + x = keras.Input((time_step, embedding_dim)) + time_major_x = keras.layers.Lambda( + lambda t: tf.transpose(t, [1, 0, 2]) + )(x) + layer = keras.layers.SimpleRNN( + units, time_major=True, return_sequences=True + ) + self.assertEqual( + layer.compute_output_shape( + (time_step, None, embedding_dim) + ).as_list(), + [time_step, None, units], + ) + y = layer(time_major_x) + self.assertEqual(layer.output_shape, (time_step, None, units)) + + y = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(y) + + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, embedding_dim)), + np.zeros((batch, time_step, units)), + ) + + # Test stacking. + x = keras.Input((time_step, embedding_dim)) + time_major_x = keras.layers.Lambda( + lambda t: tf.transpose(t, [1, 0, 2]) + )(x) + cell_units = [10, 8, 6] + cells = [keras.layers.SimpleRNNCell(cell_units[i]) for i in range(3)] + layer = keras.layers.RNN(cells, time_major=True, return_sequences=True) + y = layer(time_major_x) + self.assertEqual(layer.output_shape, (time_step, None, cell_units[-1])) + + y = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(y) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, embedding_dim)), + np.zeros((batch, time_step, cell_units[-1])), + ) + + # Test masking. + x = keras.Input((time_step, embedding_dim)) + time_major = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))( + x + ) + mask = keras.layers.Masking()(time_major) + rnn = keras.layers.SimpleRNN( + units, time_major=True, return_sequences=True + )(mask) + y = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(rnn) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, embedding_dim)), + np.zeros((batch, time_step, units)), + ) + + # Test layer output + x = keras.Input((time_step, embedding_dim)) + rnn_1 = keras.layers.SimpleRNN(units, return_sequences=True) + y = rnn_1(x) + + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, embedding_dim)), + np.zeros((batch, time_step, units)), + ) + + x_np = np.random.random((batch, time_step, embedding_dim)) + y_np_1 = model.predict(x_np) + + time_major = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))( + x + ) + rnn_2 = keras.layers.SimpleRNN( + units, time_major=True, return_sequences=True + ) + y_2 = rnn_2(time_major) + y_2 = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(y_2) + + model_2 = keras.models.Model(x, y_2) + rnn_2.set_weights(rnn_1.get_weights()) + + y_np_2 = model_2.predict(x_np) + self.assertAllClose(y_np_1, y_np_2, atol=1e-4) + + def test_rnn_cell_with_constants_layer(self): + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cell = RNNCellWithConstants(32, constant_size=3) + layer = keras.layers.RNN(cell) + y = layer(x, constants=c) + + model = keras.models.Model([x, c], y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 32)) + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {"RNNCellWithConstants": RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # test flat list inputs. + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer([x, c]) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + # Test stacking. + cells = [ + gru.GRUCell(8), + RNNCellWithConstants(12, constant_size=3), + RNNCellWithConstants(32, constant_size=3), + ] + layer = keras.layers.RNN(cells) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 32)) + ) + + # Test GRUCell reset_after property. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cells = [gru.GRUCell(32, reset_after=True)] + layer = keras.layers.RNN(cells) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 32)) + ) + + # Test stacked RNN serialization + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + def test_rnn_cell_with_non_keras_constants(self): + # Test basic case. + x = keras.Input((None, 5)) + c = tf.zeros([6, 3], dtype=tf.float32) + cell = RNNCellWithConstants(32, constant_size=3) + layer = keras.layers.RNN(cell) + y = layer(x, constants=c) + + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [ + gru.GRUCell(8), + RNNCellWithConstants(12, constant_size=3), + RNNCellWithConstants(32, constant_size=3), + ] + layer = keras.layers.RNN(cells) + y = layer(x, constants=c) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_rnn_cell_with_constants_layer_passing_initial_state(self): + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + s = keras.Input((32,)) + cell = RNNCellWithConstants(32, constant_size=3) + layer = keras.layers.RNN(cell) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], + np.zeros((6, 32)), + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + s_np = np.random.random((6, 32)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, s_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {"RNNCellWithConstants": RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, s_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # verify that state is used + y_np_2_different_s = model.predict([x_np, s_np + 10.0, c_np]) + with self.assertRaises(AssertionError): + self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4) + + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer([x, s, c]) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, s_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + def test_rnn_cell_with_non_keras_constants_and_initial_state(self): + # Test basic case. + x = keras.Input((None, 5)) + c = tf.zeros([6, 3], dtype=tf.float32) + s = tf.zeros([6, 32], dtype=tf.float32) + cell = RNNCellWithConstants(32, constant_size=3) + layer = keras.layers.RNN(cell) + y = layer(x, initial_state=s, constants=c) + + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [ + gru.GRUCell(8), + RNNCellWithConstants(12, constant_size=3), + RNNCellWithConstants(32, constant_size=3), + ] + layer = keras.layers.RNN(cells) + s = [ + tf.zeros([6, 8], dtype=tf.float32), + tf.zeros([6, 12], dtype=tf.float32), + tf.zeros([6, 32], dtype=tf.float32), + ] + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_stacked_rnn_attributes(self): + if tf.executing_eagerly(): + self.skipTest("reduce_sum is not available in eager mode.") + + cells = [keras.layers.LSTMCell(1), keras.layers.LSTMCell(1)] + layer = keras.layers.RNN(cells) + layer.build((None, None, 1)) + + # Test weights + self.assertEqual(len(layer.trainable_weights), 6) + cells[0].trainable = False + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 3) + + # Test `get_losses_for` and `losses` + x = keras.Input((None, 1)) + loss_1 = tf.reduce_sum(x) + loss_2 = tf.reduce_sum(cells[0].kernel) + cells[0].add_loss(loss_1, inputs=x) + cells[0].add_loss(loss_2) + self.assertEqual(len(layer.losses), 2) + self.assertEqual(layer.get_losses_for(None), [loss_2]) + self.assertEqual(layer.get_losses_for(x), [loss_1]) + + # Test `updates` + cells = [keras.layers.LSTMCell(1), keras.layers.LSTMCell(1)] + layer = keras.layers.RNN(cells) + x = keras.Input((None, 1)) + _ = layer(x) + + update_1 = tf.compat.v1.assign_add( + cells[0].kernel, x[0, 0, 0] * cells[0].kernel + ) + update_2 = tf.compat.v1.assign_add( + cells[0].kernel, tf.ones_like(cells[0].kernel) + ) + # TODO(b/128682878): Remove when RNNCells are __call__'d. + with base_layer_utils.call_context().enter(layer, x, True, None): + cells[0].add_update(update_1) + cells[0].add_update(update_2) + self.assertEqual(len(layer.updates), 2) + + def test_rnn_dynamic_trainability(self): + layer_class = keras.layers.SimpleRNN + embedding_dim = 4 + units = 3 + + layer = layer_class(units) + layer.build((None, None, embedding_dim)) + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 0) + layer.trainable = False + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.non_trainable_weights), 3) + layer.trainable = True + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 0) + + @parameterized.parameters( + [keras.layers.SimpleRNN, keras.layers.GRU, keras.layers.LSTM] + ) + def test_rnn_cell_trainability(self, layer_cls): + # https://github.com/tensorflow/tensorflow/issues/32369. + layer = layer_cls(3, trainable=False) + self.assertFalse(layer.cell.trainable) + + layer.trainable = True + self.assertTrue(layer.cell.trainable) + + def test_state_reuse_with_dropout(self): + layer_class = keras.layers.SimpleRNN + embedding_dim = 4 + units = 3 + timesteps = 2 + num_samples = 2 + + input1 = keras.Input( + batch_shape=(num_samples, timesteps, embedding_dim) + ) + layer = layer_class( + units, return_state=True, return_sequences=True, dropout=0.2 + ) + state = layer(input1)[1:] + + input2 = keras.Input( + batch_shape=(num_samples, timesteps, embedding_dim) + ) + output = layer_class(units)(input2, initial_state=state) + model = keras.Model([input1, input2], output) + + inputs = [ + np.random.random((num_samples, timesteps, embedding_dim)), + np.random.random((num_samples, timesteps, embedding_dim)), + ] + model.predict(inputs) + + def test_builtin_and_custom_rnn_cell_serialization(self): + @keras.utils.register_keras_serializable(package="TestOnly") + class CustomRNNCell(keras.layers.Layer): + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super().__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="uniform", + name="kernel", + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer="uniform", + name="recurrent_kernel", + ) + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = keras.backend.dot(inputs, self.kernel) + output = h + keras.backend.dot( + prev_output, self.recurrent_kernel + ) + return output, [output] + + def get_config(self): + config = {"units": self.units} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + for cell_class in [ + keras.layers.SimpleRNNCell, + keras.layers.GRUCell, + keras.layers.LSTMCell, + CustomRNNCell, + ]: + # Test basic case. + x = keras.Input((None, 5)) + cell = cell_class(32) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # Test stacking. + cells = [cell_class(8), cell_class(12), cell_class(32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + # Test stacked RNN serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer=[ + keras.layers.SimpleRNN, + gru_v1.GRU, + lstm_v1.LSTM, + gru.GRU, + lstm.LSTM, + ], + unroll=[True, False], + ) + ) + def test_rnn_dropout(self, layer, unroll): + rnn_layer = layer(3, dropout=0.1, recurrent_dropout=0.1, unroll=unroll) + if not unroll: + x = keras.Input((None, 5)) + else: + x = keras.Input((5, 5)) + y = rnn_layer(x) + model = keras.models.Model(x, y) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x_np = np.random.random((6, 5, 5)) + y_np = np.random.random((6, 3)) + model.train_on_batch(x_np, y_np) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + cell=[ + keras.layers.SimpleRNNCell, + keras.layers.GRUCell, + keras.layers.LSTMCell, + ], + unroll=[True, False], + ) + ) + def test_stacked_rnn_dropout(self, cell, unroll): + cells = [ + cell(3, dropout=0.1, recurrent_dropout=0.1), + cell(3, dropout=0.1, recurrent_dropout=0.1), + ] + layer = keras.layers.RNN(cells, unroll=unroll) + + if not unroll: + x = keras.Input((None, 5)) + else: + x = keras.Input((5, 5)) + y = layer(x) + model = keras.models.Model(x, y) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + x_np = np.random.random((6, 5, 5)) + y_np = np.random.random((6, 3)) + model.train_on_batch(x_np, y_np) + + def test_dropout_mask_reuse(self): + # The layer is created with recurrent_initializer = zero, so that the + # the recurrent state won't affect the output. By doing this, we can + # verify the output and see if the same mask is applied to for each + # timestep. + layer_1 = keras.layers.SimpleRNN( + 3, + dropout=0.5, + kernel_initializer="ones", + recurrent_initializer="zeros", + return_sequences=True, + unroll=True, + ) + layer_2 = keras.layers.RNN( + keras.layers.SimpleRNNCell( + 3, + dropout=0.5, + kernel_initializer="ones", + recurrent_initializer="zeros", + ), + return_sequences=True, + unroll=True, + ) + layer_3 = keras.layers.RNN( + [ + keras.layers.SimpleRNNCell( + 3, + dropout=0.5, + kernel_initializer="ones", + recurrent_initializer="zeros", + ), + keras.layers.SimpleRNNCell( + 3, + dropout=0.5, + kernel_initializer="ones", + recurrent_initializer="zeros", + ), + ], + return_sequences=True, + unroll=True, + ) + + def verify(rnn_layer): + inputs = tf.constant(1.0, shape=(6, 2, 5)) + out = rnn_layer(inputs, training=True) + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + batch_1 = self.evaluate(out) + batch_1_t0, batch_1_t1 = batch_1[:, 0, :], batch_1[:, 1, :] + self.assertAllClose(batch_1_t0, batch_1_t1) + + # This simulate the layer called with multiple batches in eager mode + if tf.executing_eagerly(): + out2 = rnn_layer(inputs, training=True) + else: + out2 = out + batch_2 = self.evaluate(out2) + batch_2_t0, batch_2_t1 = batch_2[:, 0, :], batch_2[:, 1, :] + self.assertAllClose(batch_2_t0, batch_2_t1) + + # Also validate that different dropout is used by between batches. + self.assertNotAllClose(batch_1_t0, batch_2_t0) + self.assertNotAllClose(batch_1_t1, batch_2_t1) + + for l in [layer_1, layer_2, layer_3]: + verify(l) + + def test_stacked_rnn_compute_output_shape(self): + cells = [keras.layers.LSTMCell(3), keras.layers.LSTMCell(6)] + embedding_dim = 4 + timesteps = 2 + layer = keras.layers.RNN( + cells, return_state=True, return_sequences=True + ) + output_shape = layer.compute_output_shape( + (None, timesteps, embedding_dim) + ) + expected_output_shape = [ + (None, timesteps, 6), + (None, 3), + (None, 3), + (None, 6), + (None, 6), + ] + self.assertEqual( + [tuple(o.as_list()) for o in output_shape], expected_output_shape + ) + + # Test reverse_state_order = True for stacked cell. + stacked_cell = keras.layers.StackedRNNCells( + cells, reverse_state_order=True + ) + layer = keras.layers.RNN( + stacked_cell, return_state=True, return_sequences=True + ) + output_shape = layer.compute_output_shape( + (None, timesteps, embedding_dim) + ) + expected_output_shape = [ + (None, timesteps, 6), + (None, 6), + (None, 6), + (None, 3), + (None, 3), + ] + self.assertEqual( + [tuple(o.as_list()) for o in output_shape], expected_output_shape + ) + + def test_stacked_rnn_with_training_param(self): + # See https://github.com/tensorflow/tensorflow/issues/32586 + + class CellWrapper(keras.layers.AbstractRNNCell): + def __init__(self, cell): + super().__init__() + self.cell = cell + + @property + def state_size(self): + return self.cell.state_size + + @property + def output_size(self): + return self.cell.output_size + + def build(self, input_shape): + self.cell.build(input_shape) + self.built = True + + def get_initial_state( + self, inputs=None, batch_size=None, dtype=None + ): + return self.cell.get_initial_state( + inputs=inputs, batch_size=batch_size, dtype=dtype + ) + + def call(self, inputs, states, training=None, **kwargs): + assert training is not None + return self.cell(inputs, states=states, training=training) + + cell = keras.layers.LSTMCell(32) + cell = CellWrapper(cell) + cell = keras.layers.StackedRNNCells([cell]) + + rnn = keras.layers.RNN(cell) + inputs = np.ones((8, 4, 16), dtype=np.float32) + rnn(inputs, training=True) + + def test_stacked_rnn_with_nested_cell(self): + batch = 10 + t = 5 + i1, i2, i3 = 3, 4, 5 + o11, o12, o13 = 2, 3, 4 + o21, o22, o23 = 4, 5, 6 + + # test 1: use_tuple=False + cells = [NestedCell(o11, o12, o13), NestedCell(o21, o22, o23)] + rnn = keras.layers.RNN(cells, return_sequences=True, return_state=True) + + input_1 = keras.Input((t, i1)) + input_2 = keras.Input((t, i2, i3)) + + output1, output2, state1, state2 = rnn((input_1, input_2)) + s11, s12 = state1 + s21, s22 = state2 + + self.assertEqual(output1.shape.as_list(), [None, t, o21]) + self.assertEqual(output2.shape.as_list(), [None, t, o22, o23]) + self.assertEqual(s11.shape.as_list(), [None, o11]) + self.assertEqual(s12.shape.as_list(), [None, o12, o13]) + self.assertEqual(s21.shape.as_list(), [None, o21]) + self.assertEqual(s22.shape.as_list(), [None, o22, o23]) + + model = keras.models.Model([input_1, input_2], [output1, output2]) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], + [np.zeros((batch, t, o21)), np.zeros((batch, t, o22, o23))], + ) + self.assertEqual( + model.output_shape, [(None, t, o21), (None, t, o22, o23)] + ) + + # test 2: use_tuple=True + cells = [ + NestedCell(o11, o12, o13, use_tuple=True), + NestedCell(o21, o22, o23), + ] + + rnn = keras.layers.RNN(cells, return_sequences=True, return_state=True) + + input_1 = keras.Input((t, i1)) + input_2 = keras.Input((t, i2, i3)) + + output1, output2, state1, state2 = rnn( + NestedInput(t1=input_1, t2=input_2) + ) + s11, s12 = state1 + s21, s22 = state2 + + self.assertEqual(output1.shape.as_list(), [None, t, o21]) + self.assertEqual(output2.shape.as_list(), [None, t, o22, o23]) + self.assertEqual(s11.shape.as_list(), [None, o11]) + self.assertEqual(s12.shape.as_list(), [None, o12, o13]) + self.assertEqual(s21.shape.as_list(), [None, o21]) + self.assertEqual(s22.shape.as_list(), [None, o22, o23]) + + model = keras.models.Model([input_1, input_2], [output1, output2]) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], + [np.zeros((batch, t, o21)), np.zeros((batch, t, o22, o23))], + ) + self.assertEqual( + model.output_shape, [(None, t, o21), (None, t, o22, o23)] + ) + + def test_trackable_dependencies(self): + rnn = keras.layers.SimpleRNN + x = np.random.random((2, 2, 2)) + y = np.random.random((2, 2)) + model = keras.models.Sequential() + model.add(rnn(2)) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, batch_size=1) + + # check whether the model variables are present in the + # trackable list of objects + checkpointed_objects = { + id(o) for o in trackable_util.list_objects(model) + } + for v in model.variables: + self.assertIn(id(v), checkpointed_objects) + + def test_high_dimension_RNN(self): + # Basic test case. + unit_a = 10 + unit_b = 20 + input_a = 5 + input_b = 10 + batch = 32 + time_step = 4 + + cell = Minimal2DRNNCell(unit_a, unit_b) + x = keras.Input((None, input_a, input_b)) + layer = keras.layers.RNN(cell) + y = layer(x) + + self.assertEqual(cell.state_size.as_list(), [unit_a, unit_b]) + + if not tf.executing_eagerly(): + init_state = layer.get_initial_state(x) + self.assertEqual(len(init_state), 1) + self.assertEqual( + init_state[0].shape.as_list(), [None, unit_a, unit_b] + ) + + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, input_a, input_b)), + np.zeros((batch, unit_a, unit_b)), + ) + self.assertEqual(model.output_shape, (None, unit_a, unit_b)) + + # Test stacking. + cells = [ + Minimal2DRNNCell(unit_a, unit_b), + Minimal2DRNNCell(unit_a * 2, unit_b * 2), + Minimal2DRNNCell(unit_a * 4, unit_b * 4), + ] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, input_a, input_b)), + np.zeros((batch, unit_a * 4, unit_b * 4)), + ) + self.assertEqual(model.output_shape, (None, unit_a * 4, unit_b * 4)) + + def test_high_dimension_RNN_with_init_state(self): + unit_a = 10 + unit_b = 20 + input_a = 5 + input_b = 10 + batch = 32 + time_step = 4 + + # Basic test case. + cell = Minimal2DRNNCell(unit_a, unit_b) + x = keras.Input((None, input_a, input_b)) + s = keras.Input((unit_a, unit_b)) + layer = keras.layers.RNN(cell) + y = layer(x, initial_state=s) + + model = keras.models.Model([x, s], y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [ + np.zeros((batch, time_step, input_a, input_b)), + np.zeros((batch, unit_a, unit_b)), + ], + np.zeros((batch, unit_a, unit_b)), + ) + self.assertEqual(model.output_shape, (None, unit_a, unit_b)) + + # Bad init state shape. + bad_shape_a = unit_a * 2 + bad_shape_b = unit_b * 2 + cell = Minimal2DRNNCell(unit_a, unit_b) + x = keras.Input((None, input_a, input_b)) + s = keras.Input((bad_shape_a, bad_shape_b)) + layer = keras.layers.RNN(cell) + with self.assertRaisesWithPredicateMatch( + ValueError, "however `cell.state_size` is" + ): + layer(x, initial_state=s) + + def test_inconsistent_output_state_size(self): + batch = 32 + time_step = 4 + state_size = 5 + input_size = 6 + cell = PlusOneRNNCell(state_size) + x = keras.Input((None, input_size)) + layer = keras.layers.RNN(cell) + y = layer(x) + + self.assertEqual(cell.state_size, state_size) + if not tf.executing_eagerly(): + init_state = layer.get_initial_state(x) + self.assertEqual(len(init_state), 1) + self.assertEqual(init_state[0].shape.as_list(), [None, state_size]) + + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, time_step, input_size)), + np.zeros((batch, input_size)), + ) + self.assertEqual(model.output_shape, (None, input_size)) + + def test_get_initial_state(self): + cell = keras.layers.SimpleRNNCell(5) + with self.assertRaisesRegex( + ValueError, "batch_size and dtype cannot be None" + ): + cell.get_initial_state(None, None, None) + + if not tf.executing_eagerly(): + inputs = keras.Input((None, 10)) + initial_state = cell.get_initial_state(inputs, None, None) + self.assertEqual(initial_state.shape.as_list(), [None, 5]) + self.assertEqual(initial_state.dtype, inputs.dtype) + + batch = tf.shape(inputs)[0] + dtype = inputs.dtype + initial_state = cell.get_initial_state(None, batch, dtype) + self.assertEqual(initial_state.shape.as_list(), [None, 5]) + self.assertEqual(initial_state.dtype, inputs.dtype) + else: + batch = 8 + inputs = np.random.random((batch, 10)) + initial_state = cell.get_initial_state(inputs, None, None) + self.assertEqual(initial_state.shape.as_list(), [8, 5]) + self.assertEqual(initial_state.dtype, inputs.dtype) + + dtype = inputs.dtype + initial_state = cell.get_initial_state(None, batch, dtype) + self.assertEqual(initial_state.shape.as_list(), [batch, 5]) + self.assertEqual(initial_state.dtype, inputs.dtype) + + @parameterized.parameters([True, False]) + def test_nested_input_output(self, stateful): + batch = 10 + t = 5 + i1, i2, i3 = 3, 4, 5 + o1, o2, o3 = 2, 3, 4 + + cell = NestedCell(o1, o2, o3) + rnn = keras.layers.RNN(cell, stateful=stateful) + + batch_size = batch if stateful else None + input_1 = keras.Input((t, i1), batch_size=batch_size) + input_2 = keras.Input((t, i2, i3), batch_size=batch_size) + + outputs = rnn((input_1, input_2)) + + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].shape.as_list(), [batch_size, o1]) + self.assertEqual(outputs[1].shape.as_list(), [batch_size, o2, o3]) + + model = keras.models.Model((input_1, input_2), outputs) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], + [np.zeros((batch, o1)), np.zeros((batch, o2, o3))], + ) + self.assertEqual( + model.output_shape, [(batch_size, o1), (batch_size, o2, o3)] + ) + + cell = NestedCell(o1, o2, o3, use_tuple=True) + + rnn = keras.layers.RNN(cell, stateful=stateful) + + input_1 = keras.Input((t, i1), batch_size=batch_size) + input_2 = keras.Input((t, i2, i3), batch_size=batch_size) + + outputs = rnn(NestedInput(t1=input_1, t2=input_2)) + + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].shape.as_list(), [batch_size, o1]) + self.assertEqual(outputs[1].shape.as_list(), [batch_size, o2, o3]) + + model = keras.models.Model([input_1, input_2], outputs) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], + [np.zeros((batch, o1)), np.zeros((batch, o2, o3))], + ) + self.assertEqual( + model.output_shape, [(batch_size, o1), (batch_size, o2, o3)] + ) + + def test_nested_input_output_with_state(self): + batch = 10 + t = 5 + i1, i2, i3 = 3, 4, 5 + o1, o2, o3 = 2, 3, 4 + + cell = NestedCell(o1, o2, o3) + rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) + + input_1 = keras.Input((t, i1)) + input_2 = keras.Input((t, i2, i3)) + + output1, output2, s1, s2 = rnn((input_1, input_2)) + + self.assertEqual(output1.shape.as_list(), [None, t, o1]) + self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) + self.assertEqual(s1.shape.as_list(), [None, o1]) + self.assertEqual(s2.shape.as_list(), [None, o2, o3]) + + model = keras.models.Model([input_1, input_2], [output1, output2]) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], + [np.zeros((batch, t, o1)), np.zeros((batch, t, o2, o3))], + ) + self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) + + cell = NestedCell(o1, o2, o3, use_tuple=True) + + rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) + + input_1 = keras.Input((t, i1)) + input_2 = keras.Input((t, i2, i3)) + + output1, output2, s1, s2 = rnn(NestedInput(t1=input_1, t2=input_2)) + + self.assertEqual(output1.shape.as_list(), [None, t, o1]) + self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) + self.assertEqual(s1.shape.as_list(), [None, o1]) + self.assertEqual(s2.shape.as_list(), [None, o2, o3]) + + model = keras.models.Model([input_1, input_2], [output1, output2]) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], + [np.zeros((batch, t, o1)), np.zeros((batch, t, o2, o3))], + ) + self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) + + def test_nest_input_output_with_init_state(self): + batch = 10 + t = 5 + i1, i2, i3 = 3, 4, 5 + o1, o2, o3 = 2, 3, 4 + + cell = NestedCell(o1, o2, o3) + rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) + + input_1 = keras.Input((t, i1)) + input_2 = keras.Input((t, i2, i3)) + init_s1 = keras.Input((o1,)) + init_s2 = keras.Input((o2, o3)) + + output1, output2, s1, s2 = rnn( + (input_1, input_2), initial_state=(init_s1, init_s2) + ) + + self.assertEqual(output1.shape.as_list(), [None, t, o1]) + self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) + self.assertEqual(s1.shape.as_list(), [None, o1]) + self.assertEqual(s2.shape.as_list(), [None, o2, o3]) + + model = keras.models.Model( + [input_1, input_2, init_s1, init_s2], [output1, output2] + ) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [ + np.zeros((batch, t, i1)), + np.zeros((batch, t, i2, i3)), + np.zeros((batch, o1)), + np.zeros((batch, o2, o3)), + ], + [np.zeros((batch, t, o1)), np.zeros((batch, t, o2, o3))], + ) + self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) + + cell = NestedCell(o1, o2, o3, use_tuple=True) + + rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) + + input_1 = keras.Input((t, i1)) + input_2 = keras.Input((t, i2, i3)) + init_s1 = keras.Input((o1,)) + init_s2 = keras.Input((o2, o3)) + init_state = NestedState(s1=init_s1, s2=init_s2) + + output1, output2, s1, s2 = rnn( + NestedInput(t1=input_1, t2=input_2), initial_state=init_state + ) + + self.assertEqual(output1.shape.as_list(), [None, t, o1]) + self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) + self.assertEqual(s1.shape.as_list(), [None, o1]) + self.assertEqual(s2.shape.as_list(), [None, o2, o3]) + + model = keras.models.Model( + [input_1, input_2, init_s1, init_s2], [output1, output2] + ) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + [ + np.zeros((batch, t, i1)), + np.zeros((batch, t, i2, i3)), + np.zeros((batch, o1)), + np.zeros((batch, o2, o3)), + ], + [np.zeros((batch, t, o1)), np.zeros((batch, t, o2, o3))], + ) + self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) + + def test_masking_rnn_with_output_and_states(self): + class Cell(keras.layers.Layer): + def __init__(self): + self.state_size = None + self.output_size = None + super().__init__() + + def build(self, input_shape): + self.state_size = input_shape[-1] + self.output_size = input_shape[-1] + + def call(self, inputs, states): + return inputs, [s + 1 for s in states] + + x = keras.Input((3, 1), name="x") + x_masked = keras.layers.Masking()(x) + s_0 = keras.Input((1,), name="s_0") + y, s = keras.layers.RNN(Cell(), return_state=True)( + x_masked, initial_state=s_0 + ) + model = keras.models.Model([x, s_0], [y, s]) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + # last time step masked + x_np = np.array([[[1.0], [2.0], [0.0]]]) + s_0_np = np.array([[10.0]]) + y_np, s_np = model.predict([x_np, s_0_np]) + + # 1 is added to initial state two times + self.assertAllClose(s_np, s_0_np + 2) + # Expect last output to be the same as last output before masking + self.assertAllClose(y_np, x_np[:, 1, :]) + + def test_zero_output_for_masking(self): + for unroll in [True, False]: + cell = keras.layers.SimpleRNNCell(5) + x = keras.Input((5, 5)) + mask = keras.layers.Masking() + layer = keras.layers.RNN( + cell, + return_sequences=True, + zero_output_for_mask=True, + unroll=unroll, + ) + masked_input = mask(x) + y = layer(masked_input) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + np_x = np.ones((6, 5, 5)) + result_1 = model.predict(np_x) + + # set the time 4 and 5 for last record to be zero (masked). + np_x[5, 3:] = 0 + result_2 = model.predict(np_x) + + # expect the result_2 has same output, except the time 4,5 for last + # record. + result_1[5, 3:] = 0 + self.assertAllClose(result_1, result_2) + + def test_unroll_single_step(self): + """Even if the time dimension is only one, we should be able to + unroll.""" + cell = keras.layers.SimpleRNNCell(5) + x = keras.Input((1, 5)) + layer = keras.layers.RNN(cell, return_sequences=True, unroll=True) + y = layer(x) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + np_x = np.ones((6, 1, 5)) + result = model.predict(np_x) + self.assertEqual((6, 1, 5), result.shape) + + def test_unroll_zero_step(self): + """If the time dimension is None, we should fail to unroll.""" + cell = keras.layers.SimpleRNNCell(5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell, return_sequences=True, unroll=True) + with self.assertRaisesRegex(ValueError, "Cannot unroll a RNN.*"): + layer(x) + + def test_full_input_spec(self): + # See https://github.com/tensorflow/tensorflow/issues/25985 + inputs = keras.layers.Input(batch_shape=(1, 1, 1)) + state_h = keras.layers.Input(batch_shape=(1, 1)) + state_c = keras.layers.Input(batch_shape=(1, 1)) + states = [state_h, state_c] + decoder_out = keras.layers.LSTM(1, stateful=True)( + inputs, initial_state=states + ) + model = keras.Model([inputs, state_h, state_c], decoder_out) + output1 = model.predict( + [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))] + ) + output2 = model.predict( + [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))] + ) + model.reset_states() + output3 = model.predict( + [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))] + ) + self.assertAllClose(output1, output3) + self.assertNotAllClose(output1, output2) + + def test_reset_states(self): + # See https://github.com/tensorflow/tensorflow/issues/25852 + with self.assertRaisesRegex( + ValueError, "it needs to know its batch size" + ): + simple_rnn = keras.layers.SimpleRNN(1, stateful=True) + simple_rnn.reset_states() + + with self.assertRaisesRegex( + ValueError, "it needs to know its batch size" + ): + cell = Minimal2DRNNCell(1, 2) + custom_rnn = keras.layers.RNN(cell, stateful=True) + custom_rnn.reset_states() + + @parameterized.parameters( + [ + keras.layers.SimpleRNNCell, + keras.layers.GRUCell, + keras.layers.LSTMCell, + ] + ) + def test_stateful_rnn_with_stacking(self, cell): + # See https://github.com/tensorflow/tensorflow/issues/28614. + batch = 12 + timesteps = 10 + input_dim = 8 + output_dim = 64 + cells = [cell(32), cell(64)] + x = keras.Input(batch_shape=(batch, None, input_dim)) + layer = keras.layers.RNN(cells, stateful=True) + y = layer(x) + + model = keras.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, timesteps, input_dim)), + np.zeros((batch, output_dim)), + ) + model.predict(np.ones((batch, timesteps, input_dim))) + + model.reset_states() + model.predict(np.ones((batch, timesteps, input_dim))) + + new_states = tf.nest.map_structure( + lambda s: np.ones((batch, s)), layer.cell.state_size + ) + layer.reset_states(new_states) + model.predict(np.ones((batch, timesteps, input_dim))) + + def test_stateful_rnn_with_initial_state(self): + # See https://github.com/tensorflow/tensorflow/issues/32299. + batch = 12 + timesteps = 1 + input_dim = 8 + output_dim = 16 + + test_inputs = np.full((batch, timesteps, input_dim), 0.5) + + def make_model(stateful=False, with_initial_state=False): + input_layer = keras.Input(shape=(None, input_dim), batch_size=batch) + if with_initial_state: + initial_states = keras.backend.constant( + np.ones((batch, output_dim)) + ) + else: + initial_states = None + rnn_output = keras.layers.GRU( + units=output_dim, return_sequences=True, stateful=stateful + )(input_layer, initial_state=initial_states) + model = keras.Model(input_layer, rnn_output) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + # Define a model with a constant state initialization + model = make_model(stateful=True, with_initial_state=True) + layer_weights = model.layers[1].get_weights() + + model.reset_states() + predict_1 = model.predict(test_inputs) + predict_2 = model.predict(test_inputs) + + model.reset_states() + predict_3 = model.predict(test_inputs) + + # predict 1 and 2 should be different since the batch 2 should use the + # state from batch 1 as the initial state. + self.assertNotAllClose(predict_1, predict_2) + self.assertAllClose(predict_1, predict_3) + + # Create a new model with same weights but without initial states. Make + # sure the predict value is different from the model with non-zero + # initial state. + model_2 = make_model(stateful=True, with_initial_state=False) + model_2.layers[1].set_weights(layer_weights) + + model_2.reset_states() + predict_4 = model_2.predict(test_inputs) + predict_5 = model_2.predict(test_inputs) + self.assertNotAllClose(predict_1, predict_4) + self.assertNotAllClose(predict_4, predict_5) + + # Create models with stateful=False, and make sure they handle init + # state correctly. + model_3 = make_model(stateful=False, with_initial_state=True) + model_3.layers[1].set_weights(layer_weights) + + model_3.reset_states() + predict_6 = model_3.predict(test_inputs) + predict_7 = model_3.predict(test_inputs) + self.assertAllClose(predict_1, predict_6) + self.assertAllClose(predict_6, predict_7) + + def test_stateful_rnn_with_customized_get_initial_state(self): + class TestCell(keras.layers.AbstractRNNCell): + state_size = 1 + output_size = 2 + + def get_initial_state( + self, inputs=None, batch_size=None, dtype=None + ): + return np.ones((batch_size, 1), dtype=dtype) + + def call(self, inputs, states): + return inputs, states + + layer = keras.layers.RNN(TestCell(), stateful=True, return_state=True) + inputs = keras.Input(shape=(10, 2), batch_size=4) + model = keras.Model(inputs, layer(inputs)) + x = np.ones((4, 10, 2), dtype=np.float32) + output, state = model.predict(x) + self.assertAllClose(output, np.ones((4, 2))) + self.assertAllClose(state, np.ones((4, 1))) + + def test_stateful_rnn_with_customized_dtype(self): + class TestCell(keras.layers.AbstractRNNCell): + state_size = 1 + output_size = 2 + + def get_initial_state( + self, inputs=None, batch_size=None, dtype=None + ): + return np.ones((batch_size, 1), dtype=np.float16) + + def call(self, inputs, states): + return inputs, states + + layer = keras.layers.RNN(TestCell(), stateful=True, return_state=True) + inputs = keras.Input(shape=(10, 2), batch_size=4) + model = keras.Model(inputs, layer(inputs)) + x = np.ones((4, 10, 2), dtype=np.float16) + output, state = model.predict(x) + self.assertAllClose(output, np.ones((4, 2), dtype=np.float16)) + self.assertAllClose(state, np.ones((4, 1), dtype=np.float16)) + + def test_input_dim_length(self): + simple_rnn = keras.layers.SimpleRNN(5, input_length=10, input_dim=8) + self.assertEqual(simple_rnn._batch_input_shape, (None, 10, 8)) + + simple_rnn = keras.layers.SimpleRNN(5, input_dim=8) + self.assertEqual(simple_rnn._batch_input_shape, (None, None, 8)) + + simple_rnn = keras.layers.SimpleRNN(5, input_length=10) + self.assertEqual(simple_rnn._batch_input_shape, (None, 10, None)) + + @parameterized.parameters( + [ + keras.layers.SimpleRNNCell, + keras.layers.GRUCell, + keras.layers.LSTMCell, + ] + ) + def test_state_spec_with_stack_cell(self, cell): + # See https://github.com/tensorflow/tensorflow/issues/27817 for more + # detail. + batch = 12 + timesteps = 10 + input_dim = 8 + output_dim = 8 + + def create_cell(): + return [cell(output_dim), cell(output_dim), cell(output_dim)] + + inputs = keras.Input((timesteps, input_dim)) + encoder_output = keras.layers.RNN(create_cell(), return_state=True)( + inputs + ) + + states = encoder_output[1:] + + decoder_output = keras.layers.RNN(create_cell())( + inputs, initial_state=states + ) + + model = keras.models.Model(inputs, decoder_output) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch( + np.zeros((batch, timesteps, input_dim)), + np.zeros((batch, output_dim)), + ) + model.predict(np.ones((batch, timesteps, input_dim))) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer=[ + keras.layers.SimpleRNN, + gru_v1.GRU, + lstm_v1.LSTM, + gru.GRU, + lstm.LSTM, + ] + ) + ) + def test_rnn_with_ragged_input(self, layer): + ragged_data = tf.ragged.constant( + [ + [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 2.0, 3.0, 1.0, 1.0]], + [[2.0, 4.0, 1.0, 3.0, 1.0]], + [ + [2.0, 3.0, 4.0, 1.0, 5.0], + [2.0, 3.0, 1.0, 1.0, 1.0], + [1.0, 2.0, 3.0, 4.0, 5.0], + ], + ], + ragged_rank=1, + ) + label_data = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1]]) + + # Test results in feed forward + np.random.seed(100) + rnn_layer = layer(4, activation="sigmoid") + + x_ragged = keras.Input(shape=(None, 5), ragged=True) + y_ragged = rnn_layer(x_ragged) + model = keras.models.Model(x_ragged, y_ragged) + output_ragged = model.predict(ragged_data, steps=1) + + x_dense = keras.Input(shape=(3, 5)) + masking = keras.layers.Masking()(x_dense) + y_dense = rnn_layer(masking) + model_2 = keras.models.Model(x_dense, y_dense) + dense_data = ragged_data.to_tensor() + output_dense = model_2.predict(dense_data, steps=1) + + self.assertAllClose(output_dense, output_ragged) + + # Test results with go backwards + np.random.seed(200) + back_rnn_layer = layer(8, go_backwards=True, activation="sigmoid") + + x_ragged = keras.Input(shape=(None, 5), ragged=True) + y_ragged = back_rnn_layer(x_ragged) + model = keras.models.Model(x_ragged, y_ragged) + output_ragged = model.predict(ragged_data, steps=1) + + x_dense = keras.Input(shape=(3, 5)) + masking = keras.layers.Masking()(x_dense) + y_dense = back_rnn_layer(masking) + model_2 = keras.models.Model(x_dense, y_dense) + dense_data = ragged_data.to_tensor() + output_dense = model_2.predict(dense_data, steps=1) + + self.assertAllClose(output_dense, output_ragged) + + # Test densification of the ragged input + dense_tensor, row_lengths = keras.backend.convert_inputs_if_ragged( + ragged_data + ) + self.assertAllClose(dense_data, dense_tensor) + + # Test optional params, all should work except unrolling + inputs = keras.Input(shape=(None, 5), dtype=tf.float32, ragged=True) + custom_rnn_layer = layer( + 3, zero_output_for_mask=True, dropout=0.1, use_bias=True + ) + outputs = custom_rnn_layer(inputs) + model = keras.models.Model(inputs, outputs) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(ragged_data, label_data) + + # Test stateful and full shape specification + inputs = keras.Input( + shape=(None, 5), batch_size=3, dtype=tf.float32, ragged=True + ) + stateful_rnn_layer = layer(3, stateful=True) + outputs = stateful_rnn_layer(inputs) + model = keras.models.Model(inputs, outputs) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(ragged_data, label_data) + + # Must raise error when unroll is set to True + unroll_rnn_layer = layer(3, unroll=True) + with self.assertRaisesRegex( + ValueError, "The input received contains RaggedTensors *" + ): + unroll_rnn_layer(inputs) + + # Check if return sequences outputs are correct + np.random.seed(100) + returning_rnn_layer = layer(4, return_sequences=True) + + x_ragged = keras.Input(shape=(None, 5), ragged=True) + y_ragged = returning_rnn_layer(x_ragged) + model = keras.models.Model(x_ragged, y_ragged) + output_ragged = model.predict(ragged_data, steps=1) + self.assertAllClose(output_ragged.ragged_rank, ragged_data.ragged_rank) + self.assertAllClose(output_ragged.row_splits, ragged_data.row_splits) + + x_dense = keras.Input(shape=(3, 5)) + masking = keras.layers.Masking()(x_dense) + y_dense = returning_rnn_layer(masking) + model_2 = keras.models.Model(x_dense, y_dense) + dense_data = ragged_data.to_tensor() + output_dense = model_2.predict(dense_data, steps=1) + # Convert the output here to ragged for value comparison + output_dense = tf.RaggedTensor.from_tensor( + output_dense, lengths=row_lengths + ) + self.assertAllClose(output_ragged, output_dense) + + # Check if return sequences and go_backwards outputs are correct + np.random.seed(100) + returning_rnn_layer = layer(4, go_backwards=True, return_sequences=True) + + x_ragged = keras.Input(shape=(None, 5), ragged=True) + y_ragged = returning_rnn_layer(x_ragged) + model = keras.models.Model(x_ragged, y_ragged) + output_ragged = model.predict(ragged_data, steps=1) + self.assertAllClose(output_ragged.ragged_rank, ragged_data.ragged_rank) + self.assertAllClose(output_ragged.row_splits, ragged_data.row_splits) + + x_dense = keras.Input(shape=(3, 5)) + masking = keras.layers.Masking()(x_dense) + y_dense = returning_rnn_layer(masking) + model_2 = keras.models.Model(x_dense, y_dense) + dense_data = ragged_data.to_tensor() + output_dense = model_2.predict(dense_data, steps=1) + + # Note that the raw output for dense and ragged input when + # go_backward=True will be different. Consider following input + # [[a, b, 0], [c, 0, 0], [d, e, f]] where 0s are masked value. + # The dense output will be [[0, b, a], [0, 0, c], [f, e, d]] since it + # will process the whole sequence from the end. + # While ragged output will be [[b, a], [c], [f, e, d]] since it just + # ignore the 0s. And if we densify the ragged output, it will by default + # inserting 0s to the end (rather than from the beginning), which make + # the output to be [[b, a, 0], [c, 0, 0], [f, e, d]]. With this, we need + # to verify that reverse(ragged_output.to_tensor()) == + # reverse(dense_output) + output_dense = keras.backend.reverse(output_dense, [1]) + output_dense = tf.RaggedTensor.from_tensor( + output_dense, lengths=row_lengths + ) + + self.assertAllClose( + keras.backend.reverse(output_ragged, [1]), output_dense + ) + + def test_stateless_rnn_cell(self): + class StatelessCell(keras.layers.Layer): + def __init__(self): + self.state_size = ((), [], ()) + self.output_size = None + super().__init__() + + def build(self, input_shape): + self.output_size = input_shape[-1] + + def call(self, inputs, states): + return inputs, states + + x = keras.Input((None, 5)) + cell = StatelessCell() + initial_state = tf.nest.map_structure(lambda t: None, cell.state_size) + layer = keras.layers.RNN(cell) + y = layer(x, initial_state=initial_state) + model = keras.models.Model(x, y) + model.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 5))) + + @parameterized.parameters( + [keras.layers.SimpleRNN, gru_v1.GRU, lstm_v1.LSTM, gru.GRU, lstm.LSTM] + ) + def test_for_enable_caching_device_for_layer(self, layer_cls): + expected_caching_device = ( + tf.compat.v1.executing_eagerly_outside_functions() + ) + layer = layer_cls(1) + self.assertEqual( + layer.cell._enable_caching_device, expected_caching_device + ) + + # Make sure the config only appears when the none default value is used. + config = layer.get_config() + self.assertNotIn("enable_caching_device", config) + + non_default_value = not expected_caching_device + layer = layer_cls(1, enable_caching_device=non_default_value) + self.assertEqual(layer.cell._enable_caching_device, non_default_value) + config = layer.get_config() + self.assertEqual(config["enable_caching_device"], non_default_value) + + @parameterized.parameters( + [ + keras.layers.SimpleRNNCell, + gru_v1.GRUCell, + lstm_v1.LSTMCell, + gru.GRUCell, + lstm.LSTMCell, + ] + ) + def test_for_enable_caching_device_for_cell(self, cell_cls): + expected_caching_device = ( + tf.compat.v1.executing_eagerly_outside_functions() + ) + cell = cell_cls(1) + self.assertEqual(cell._enable_caching_device, expected_caching_device) - def test_minimal_rnn_cell_non_layer(self): - - class MinimalRNNCell: + # Make sure the config only appears when the none default value is used. + config = cell.get_config() + self.assertNotIn("enable_caching_device", config) - def __init__(self, units, input_dim): - self.units = units - self.state_size = units - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) + non_default_value = not expected_caching_device + cell = cell_cls(1, enable_caching_device=non_default_value) + self.assertEqual(cell._enable_caching_device, non_default_value) + config = cell.get_config() + self.assertEqual(config["enable_caching_device"], non_default_value) - def call(self, inputs, states): - prev_output = states[0] - output = keras.backend.dot(inputs, self.kernel) + prev_output - return output, [output] - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(32, 8), - MinimalRNNCell(32, 32)] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_minimal_rnn_cell_non_layer_multiple_states(self): - - class MinimalRNNCell: - - def __init__(self, units, input_dim): - self.units = units - self.state_size = (units, units) - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output_1 = states[0] - prev_output_2 = states[1] - output = keras.backend.dot(inputs, self.kernel) - output += prev_output_1 - output -= prev_output_2 - return output, [output * 2, output * 3] - - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(16, 8), - MinimalRNNCell(32, 16)] - layer = keras.layers.RNN(cells) - self.assertEqual(layer.cell.state_size, ((8, 8), (16, 16), (32, 32))) - self.assertEqual(layer.cell.output_size, 32) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_minimal_rnn_cell_layer(self): - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): +class RNNCellWithConstants(keras.layers.Layer): + def __init__(self, units, constant_size, **kwargs): self.units = units self.state_size = units + self.constant_size = constant_size super().__init__(**kwargs) - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') + def build(self, input_shape): + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="uniform", + name="kernel", + ) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') + initializer="uniform", + name="recurrent_kernel", + ) + self.constant_kernel = self.add_weight( + shape=(self.constant_size, self.units), + initializer="uniform", + name="constant_kernel", + ) self.built = True - def call(self, inputs, states): - prev_output = states[0] - h = keras.backend.dot(inputs, self.kernel) - output = h + keras.backend.dot(prev_output, self.recurrent_kernel) + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const return output, [output] - def get_config(self): - config = {'units': self.units} + def get_config(self): + config = {"units": self.units, "constant_size": self.constant_size} base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) - # Test basic case. - x = keras.Input((None, 5)) - cell = MinimalRNNCell(32) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with generic_utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = keras.layers.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # Test stacking. - cells = [MinimalRNNCell(8), - MinimalRNNCell(12), - MinimalRNNCell(32)] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with generic_utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = keras.layers.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - def test_minimal_rnn_cell_abstract_rnn_cell(self): - - class MinimalRNNCell(keras.layers.AbstractRNNCell): - - def __init__(self, units, **kwargs): - self.units = units - super().__init__(**kwargs) - - @property - def state_size(self): - return self.units - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = keras.backend.dot(inputs, self.kernel) - output = h + keras.backend.dot(prev_output, self.recurrent_kernel) - return output, output - - @property - def output_size(self): - return self.units - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8), - MinimalRNNCell(16), - MinimalRNNCell(32)] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_rnn_with_time_major(self): - batch = 10 - time_step = 5 - embedding_dim = 4 - units = 3 - - # Test basic case. - x = keras.Input((time_step, embedding_dim)) - time_major_x = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(x) - layer = keras.layers.SimpleRNN( - units, time_major=True, return_sequences=True) - self.assertEqual( - layer.compute_output_shape((time_step, None, - embedding_dim)).as_list(), - [time_step, None, units]) - y = layer(time_major_x) - self.assertEqual(layer.output_shape, (time_step, None, units)) - - y = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(y) - - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, embedding_dim)), - np.zeros((batch, time_step, units))) - - # Test stacking. - x = keras.Input((time_step, embedding_dim)) - time_major_x = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(x) - cell_units = [10, 8, 6] - cells = [keras.layers.SimpleRNNCell(cell_units[i]) for i in range(3)] - layer = keras.layers.RNN(cells, time_major=True, return_sequences=True) - y = layer(time_major_x) - self.assertEqual(layer.output_shape, (time_step, None, cell_units[-1])) - - y = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(y) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, embedding_dim)), - np.zeros((batch, time_step, cell_units[-1]))) - - # Test masking. - x = keras.Input((time_step, embedding_dim)) - time_major = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(x) - mask = keras.layers.Masking()(time_major) - rnn = keras.layers.SimpleRNN( - units, time_major=True, return_sequences=True)(mask) - y = keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))(rnn) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, embedding_dim)), - np.zeros((batch, time_step, units))) - - # Test layer output - x = keras.Input((time_step, embedding_dim)) - rnn_1 = keras.layers.SimpleRNN(units, return_sequences=True) - y = rnn_1(x) - - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, embedding_dim)), - np.zeros((batch, time_step, units))) - - x_np = np.random.random((batch, time_step, embedding_dim)) - y_np_1 = model.predict(x_np) - - time_major = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(x) - rnn_2 = keras.layers.SimpleRNN( - units, time_major=True, return_sequences=True) - y_2 = rnn_2(time_major) - y_2 = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(y_2) - - model_2 = keras.models.Model(x, y_2) - rnn_2.set_weights(rnn_1.get_weights()) - - y_np_2 = model_2.predict(x_np) - self.assertAllClose(y_np_1, y_np_2, atol=1e-4) - - def test_rnn_cell_with_constants_layer(self): - # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - cell = RNNCellWithConstants(32, constant_size=3) - layer = keras.layers.RNN(cell) - y = layer(x, constants=c) - - model = keras.models.Model([x, c], y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # test flat list inputs. - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer([x, c]) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_3, atol=1e-4) - - # Test stacking. - cells = [gru.GRUCell(8), - RNNCellWithConstants(12, constant_size=3), - RNNCellWithConstants(32, constant_size=3)] - layer = keras.layers.RNN(cells) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - - # Test GRUCell reset_after property. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - cells = [gru.GRUCell(32, reset_after=True)] - layer = keras.layers.RNN(cells) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) +class Minimal2DRNNCell(keras.layers.Layer): + """The minimal 2D RNN cell is a simple combination of 2 1-D RNN cell. - # Test stacked RNN serialization - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - def test_rnn_cell_with_non_keras_constants(self): - # Test basic case. - x = keras.Input((None, 5)) - c = tf.zeros([6, 3], dtype=tf.float32) - cell = RNNCellWithConstants(32, constant_size=3) - layer = keras.layers.RNN(cell) - y = layer(x, constants=c) - - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [gru.GRUCell(8), - RNNCellWithConstants(12, constant_size=3), - RNNCellWithConstants(32, constant_size=3)] - layer = keras.layers.RNN(cells) - y = layer(x, constants=c) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_rnn_cell_with_constants_layer_passing_initial_state(self): - # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - s = keras.Input((32,)) - cell = RNNCellWithConstants(32, constant_size=3) - layer = keras.layers.RNN(cell) - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model([x, s, c], y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) + Both internal state and output have 2 dimensions and are orthogonal + between each other. + """ - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - s_np = np.random.random((6, 32)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, s_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model([x, s, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, s_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # verify that state is used - y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) - with self.assertRaises(AssertionError): - self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4) - - # test flat list inputs - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer([x, s, c]) - model = keras.models.Model([x, s, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, s_np, c_np]) - self.assertAllClose(y_np, y_np_3, atol=1e-4) - - def test_rnn_cell_with_non_keras_constants_and_initial_state(self): - # Test basic case. - x = keras.Input((None, 5)) - c = tf.zeros([6, 3], dtype=tf.float32) - s = tf.zeros([6, 32], dtype=tf.float32) - cell = RNNCellWithConstants(32, constant_size=3) - layer = keras.layers.RNN(cell) - y = layer(x, initial_state=s, constants=c) - - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [gru.GRUCell(8), - RNNCellWithConstants(12, constant_size=3), - RNNCellWithConstants(32, constant_size=3)] - layer = keras.layers.RNN(cells) - s = [tf.zeros([6, 8], dtype=tf.float32), - tf.zeros([6, 12], dtype=tf.float32), - tf.zeros([6, 32], dtype=tf.float32)] - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_stacked_rnn_attributes(self): - if tf.executing_eagerly(): - self.skipTest('reduce_sum is not available in eager mode.') - - cells = [keras.layers.LSTMCell(1), - keras.layers.LSTMCell(1)] - layer = keras.layers.RNN(cells) - layer.build((None, None, 1)) - - # Test weights - self.assertEqual(len(layer.trainable_weights), 6) - cells[0].trainable = False - self.assertEqual(len(layer.trainable_weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 3) - - # Test `get_losses_for` and `losses` - x = keras.Input((None, 1)) - loss_1 = tf.reduce_sum(x) - loss_2 = tf.reduce_sum(cells[0].kernel) - cells[0].add_loss(loss_1, inputs=x) - cells[0].add_loss(loss_2) - self.assertEqual(len(layer.losses), 2) - self.assertEqual(layer.get_losses_for(None), [loss_2]) - self.assertEqual(layer.get_losses_for(x), [loss_1]) - - # Test `updates` - cells = [keras.layers.LSTMCell(1), - keras.layers.LSTMCell(1)] - layer = keras.layers.RNN(cells) - x = keras.Input((None, 1)) - _ = layer(x) - - update_1 = tf.compat.v1.assign_add(cells[0].kernel, - x[0, 0, 0] * cells[0].kernel) - update_2 = tf.compat.v1.assign_add(cells[0].kernel, - tf.ones_like(cells[0].kernel)) - # TODO(b/128682878): Remove when RNNCells are __call__'d. - with base_layer_utils.call_context().enter(layer, x, True, None): - cells[0].add_update(update_1) - cells[0].add_update(update_2) - self.assertEqual(len(layer.updates), 2) - - def test_rnn_dynamic_trainability(self): - layer_class = keras.layers.SimpleRNN - embedding_dim = 4 - units = 3 - - layer = layer_class(units) - layer.build((None, None, embedding_dim)) - self.assertEqual(len(layer.weights), 3) - self.assertEqual(len(layer.trainable_weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 0) - layer.trainable = False - self.assertEqual(len(layer.weights), 3) - self.assertEqual(len(layer.trainable_weights), 0) - self.assertEqual(len(layer.non_trainable_weights), 3) - layer.trainable = True - self.assertEqual(len(layer.weights), 3) - self.assertEqual(len(layer.trainable_weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 0) - - @parameterized.parameters( - [keras.layers.SimpleRNN, keras.layers.GRU, keras.layers.LSTM]) - def test_rnn_cell_trainability(self, layer_cls): - # https://github.com/tensorflow/tensorflow/issues/32369. - layer = layer_cls(3, trainable=False) - self.assertFalse(layer.cell.trainable) - - layer.trainable = True - self.assertTrue(layer.cell.trainable) - - def test_state_reuse_with_dropout(self): - layer_class = keras.layers.SimpleRNN - embedding_dim = 4 - units = 3 - timesteps = 2 - num_samples = 2 - - input1 = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = layer_class(units, - return_state=True, - return_sequences=True, - dropout=0.2) - state = layer(input1)[1:] - - input2 = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) - output = layer_class(units)(input2, initial_state=state) - model = keras.Model([input1, input2], output) - - inputs = [np.random.random((num_samples, timesteps, embedding_dim)), - np.random.random((num_samples, timesteps, embedding_dim))] - model.predict(inputs) - - def test_builtin_and_custom_rnn_cell_serialization(self): - - @keras.utils.generic_utils.register_keras_serializable(package='TestOnly') - class CustomRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units + def __init__(self, unit_a, unit_b, **kwargs): + self.unit_a = unit_a + self.unit_b = unit_b + self.state_size = tf.TensorShape([unit_a, unit_b]) + self.output_size = tf.TensorShape([unit_a, unit_b]) super().__init__(**kwargs) - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') + def build(self, input_shape): + input_a = input_shape[-2] + input_b = input_shape[-1] + self.kernel = self.add_weight( + shape=(input_a, input_b, self.unit_a, self.unit_b), + initializer="uniform", + name="kernel", + ) + self.recurring_kernel = self.add_weight( + shape=(self.unit_a, self.unit_b, self.unit_a, self.unit_b), + initializer="uniform", + name="recurring_kernel", + ) + self.bias = self.add_weight( + shape=(self.unit_a, self.unit_b), initializer="uniform", name="bias" + ) self.built = True - def call(self, inputs, states): + def call(self, inputs, states): prev_output = states[0] - h = keras.backend.dot(inputs, self.kernel) - output = h + keras.backend.dot(prev_output, self.recurrent_kernel) + h = tf.einsum("bij,ijkl->bkl", inputs, self.kernel) + h += tf.expand_dims(self.bias, axis=0) + output = h + tf.einsum( + "bij,ijkl->bkl", prev_output, self.recurring_kernel + ) return output, [output] - def get_config(self): - config = {'units': self.units} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - for cell_class in [keras.layers.SimpleRNNCell, - keras.layers.GRUCell, - keras.layers.LSTMCell, - CustomRNNCell]: - # Test basic case. - x = keras.Input((None, 5)) - cell = cell_class(32) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - layer = keras.layers.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # Test stacking. - cells = [cell_class(8), - cell_class(12), - cell_class(32)] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - layer = keras.layers.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - layer=[ - keras.layers.SimpleRNN, gru_v1.GRU, lstm_v1.LSTM, gru.GRU, - lstm.LSTM - ], - unroll=[True, False])) - def test_rnn_dropout(self, layer, unroll): - rnn_layer = layer(3, dropout=0.1, recurrent_dropout=0.1, unroll=unroll) - if not unroll: - x = keras.Input((None, 5)) - else: - x = keras.Input((5, 5)) - y = rnn_layer(x) - model = keras.models.Model(x, y) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x_np = np.random.random((6, 5, 5)) - y_np = np.random.random((6, 3)) - model.train_on_batch(x_np, y_np) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - cell=[keras.layers.SimpleRNNCell, keras.layers.GRUCell, - keras.layers.LSTMCell], - unroll=[True, False])) - def test_stacked_rnn_dropout(self, cell, unroll): - cells = [cell(3, dropout=0.1, recurrent_dropout=0.1), - cell(3, dropout=0.1, recurrent_dropout=0.1)] - layer = keras.layers.RNN(cells, unroll=unroll) - - if not unroll: - x = keras.Input((None, 5)) - else: - x = keras.Input((5, 5)) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x_np = np.random.random((6, 5, 5)) - y_np = np.random.random((6, 3)) - model.train_on_batch(x_np, y_np) - - def test_dropout_mask_reuse(self): - # The layer is created with recurrent_initializer = zero, so that the - # the recurrent state won't affect the output. By doing this, we can verify - # the output and see if the same mask is applied to for each timestep. - layer_1 = keras.layers.SimpleRNN(3, - dropout=0.5, - kernel_initializer='ones', - recurrent_initializer='zeros', - return_sequences=True, - unroll=True) - layer_2 = keras.layers.RNN( - keras.layers.SimpleRNNCell(3, - dropout=0.5, - kernel_initializer='ones', - recurrent_initializer='zeros'), - return_sequences=True, - unroll=True) - layer_3 = keras.layers.RNN( - [keras.layers.SimpleRNNCell(3, - dropout=0.5, - kernel_initializer='ones', - recurrent_initializer='zeros'), - keras.layers.SimpleRNNCell(3, - dropout=0.5, - kernel_initializer='ones', - recurrent_initializer='zeros') - ], - return_sequences=True, - unroll=True) - - def verify(rnn_layer): - inputs = tf.constant(1.0, shape=(6, 2, 5)) - out = rnn_layer(inputs, training=True) - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - batch_1 = self.evaluate(out) - batch_1_t0, batch_1_t1 = batch_1[:, 0, :], batch_1[:, 1, :] - self.assertAllClose(batch_1_t0, batch_1_t1) - - # This simulate the layer called with multiple batches in eager mode - if tf.executing_eagerly(): - out2 = rnn_layer(inputs, training=True) - else: - out2 = out - batch_2 = self.evaluate(out2) - batch_2_t0, batch_2_t1 = batch_2[:, 0, :], batch_2[:, 1, :] - self.assertAllClose(batch_2_t0, batch_2_t1) - - # Also validate that different dropout is used by between batches. - self.assertNotAllClose(batch_1_t0, batch_2_t0) - self.assertNotAllClose(batch_1_t1, batch_2_t1) - - for l in [layer_1, layer_2, layer_3]: - verify(l) - - def test_stacked_rnn_compute_output_shape(self): - cells = [keras.layers.LSTMCell(3), - keras.layers.LSTMCell(6)] - embedding_dim = 4 - timesteps = 2 - layer = keras.layers.RNN(cells, return_state=True, return_sequences=True) - output_shape = layer.compute_output_shape((None, timesteps, embedding_dim)) - expected_output_shape = [(None, timesteps, 6), - (None, 3), - (None, 3), - (None, 6), - (None, 6)] - self.assertEqual( - [tuple(o.as_list()) for o in output_shape], - expected_output_shape) - - # Test reverse_state_order = True for stacked cell. - stacked_cell = keras.layers.StackedRNNCells( - cells, reverse_state_order=True) - layer = keras.layers.RNN( - stacked_cell, return_state=True, return_sequences=True) - output_shape = layer.compute_output_shape((None, timesteps, embedding_dim)) - expected_output_shape = [(None, timesteps, 6), - (None, 6), - (None, 6), - (None, 3), - (None, 3)] - self.assertEqual( - [tuple(o.as_list()) for o in output_shape], - expected_output_shape) - - def test_stacked_rnn_with_training_param(self): - # See https://github.com/tensorflow/tensorflow/issues/32586 - - class CellWrapper(keras.layers.AbstractRNNCell): - - def __init__(self, cell): - super().__init__() - self.cell = cell - - @property - def state_size(self): - return self.cell.state_size - - @property - def output_size(self): - return self.cell.output_size - - def build(self, input_shape): - self.cell.build(input_shape) - self.built = True - - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - return self.cell.get_initial_state( - inputs=inputs, batch_size=batch_size, dtype=dtype) - - def call(self, inputs, states, training=None, **kwargs): - assert training is not None - return self.cell(inputs, states=states, training=training) - - cell = keras.layers.LSTMCell(32) - cell = CellWrapper(cell) - cell = keras.layers.StackedRNNCells([cell]) - - rnn = keras.layers.RNN(cell) - inputs = np.ones((8, 4, 16), dtype=np.float32) - rnn(inputs, training=True) - - def test_stacked_rnn_with_nested_cell(self): - batch = 10 - t = 5 - i1, i2, i3 = 3, 4, 5 - o11, o12, o13 = 2, 3, 4 - o21, o22, o23 = 4, 5, 6 - - # test 1: use_tuple=False - cells = [NestedCell(o11, o12, o13), NestedCell(o21, o22, o23)] - rnn = keras.layers.RNN(cells, return_sequences=True, return_state=True) - - input_1 = keras.Input((t, i1)) - input_2 = keras.Input((t, i2, i3)) - - output1, output2, state1, state2 = rnn((input_1, input_2)) - s11, s12 = state1 - s21, s22 = state2 - - self.assertEqual(output1.shape.as_list(), [None, t, o21]) - self.assertEqual(output2.shape.as_list(), [None, t, o22, o23]) - self.assertEqual(s11.shape.as_list(), [None, o11]) - self.assertEqual(s12.shape.as_list(), [None, o12, o13]) - self.assertEqual(s21.shape.as_list(), [None, o21]) - self.assertEqual(s22.shape.as_list(), [None, o22, o23]) - - model = keras.models.Model([input_1, input_2], [output1, output2]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3))], - [np.zeros((batch, t, o21)), - np.zeros((batch, t, o22, o23))]) - self.assertEqual(model.output_shape, [(None, t, o21), (None, t, o22, o23)]) - - # test 2: use_tuple=True - cells = [ - NestedCell(o11, o12, o13, use_tuple=True), - NestedCell(o21, o22, o23) - ] - - rnn = keras.layers.RNN(cells, return_sequences=True, return_state=True) - - input_1 = keras.Input((t, i1)) - input_2 = keras.Input((t, i2, i3)) - - output1, output2, state1, state2 = rnn(NestedInput(t1=input_1, t2=input_2)) - s11, s12 = state1 - s21, s22 = state2 - - self.assertEqual(output1.shape.as_list(), [None, t, o21]) - self.assertEqual(output2.shape.as_list(), [None, t, o22, o23]) - self.assertEqual(s11.shape.as_list(), [None, o11]) - self.assertEqual(s12.shape.as_list(), [None, o12, o13]) - self.assertEqual(s21.shape.as_list(), [None, o21]) - self.assertEqual(s22.shape.as_list(), [None, o22, o23]) - - model = keras.models.Model([input_1, input_2], [output1, output2]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3))], - [np.zeros((batch, t, o21)), - np.zeros((batch, t, o22, o23))]) - self.assertEqual(model.output_shape, [(None, t, o21), (None, t, o22, o23)]) - - def test_trackable_dependencies(self): - rnn = keras.layers.SimpleRNN - x = np.random.random((2, 2, 2)) - y = np.random.random((2, 2)) - model = keras.models.Sequential() - model.add(rnn(2)) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, batch_size=1) - - # check whether the model variables are present in the - # trackable list of objects - checkpointed_objects = {id(o) for o in trackable_util.list_objects(model)} - for v in model.variables: - self.assertIn(id(v), checkpointed_objects) - - def test_high_dimension_RNN(self): - # Basic test case. - unit_a = 10 - unit_b = 20 - input_a = 5 - input_b = 10 - batch = 32 - time_step = 4 - - cell = Minimal2DRNNCell(unit_a, unit_b) - x = keras.Input((None, input_a, input_b)) - layer = keras.layers.RNN(cell) - y = layer(x) - - self.assertEqual(cell.state_size.as_list(), [unit_a, unit_b]) - - if not tf.executing_eagerly(): - init_state = layer.get_initial_state(x) - self.assertEqual(len(init_state), 1) - self.assertEqual(init_state[0].shape.as_list(), [None, unit_a, unit_b]) - - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, input_a, input_b)), - np.zeros((batch, unit_a, unit_b))) - self.assertEqual(model.output_shape, (None, unit_a, unit_b)) - - # Test stacking. - cells = [ - Minimal2DRNNCell(unit_a, unit_b), - Minimal2DRNNCell(unit_a * 2, unit_b * 2), - Minimal2DRNNCell(unit_a * 4, unit_b * 4) - ] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, input_a, input_b)), - np.zeros((batch, unit_a * 4, unit_b * 4))) - self.assertEqual(model.output_shape, (None, unit_a * 4, unit_b * 4)) - - def test_high_dimension_RNN_with_init_state(self): - unit_a = 10 - unit_b = 20 - input_a = 5 - input_b = 10 - batch = 32 - time_step = 4 - - # Basic test case. - cell = Minimal2DRNNCell(unit_a, unit_b) - x = keras.Input((None, input_a, input_b)) - s = keras.Input((unit_a, unit_b)) - layer = keras.layers.RNN(cell) - y = layer(x, initial_state=s) - - model = keras.models.Model([x, s], y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch([ - np.zeros((batch, time_step, input_a, input_b)), - np.zeros((batch, unit_a, unit_b)) - ], np.zeros((batch, unit_a, unit_b))) - self.assertEqual(model.output_shape, (None, unit_a, unit_b)) - - # Bad init state shape. - bad_shape_a = unit_a * 2 - bad_shape_b = unit_b * 2 - cell = Minimal2DRNNCell(unit_a, unit_b) - x = keras.Input((None, input_a, input_b)) - s = keras.Input((bad_shape_a, bad_shape_b)) - layer = keras.layers.RNN(cell) - with self.assertRaisesWithPredicateMatch(ValueError, - 'however `cell.state_size` is'): - layer(x, initial_state=s) - - def test_inconsistent_output_state_size(self): - batch = 32 - time_step = 4 - state_size = 5 - input_size = 6 - cell = PlusOneRNNCell(state_size) - x = keras.Input((None, input_size)) - layer = keras.layers.RNN(cell) - y = layer(x) - - self.assertEqual(cell.state_size, state_size) - if not tf.executing_eagerly(): - init_state = layer.get_initial_state(x) - self.assertEqual(len(init_state), 1) - self.assertEqual(init_state[0].shape.as_list(), [None, state_size]) - - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, time_step, input_size)), - np.zeros((batch, input_size))) - self.assertEqual(model.output_shape, (None, input_size)) - - def test_get_initial_state(self): - cell = keras.layers.SimpleRNNCell(5) - with self.assertRaisesRegex(ValueError, - 'batch_size and dtype cannot be None'): - cell.get_initial_state(None, None, None) - - if not tf.executing_eagerly(): - inputs = keras.Input((None, 10)) - initial_state = cell.get_initial_state(inputs, None, None) - self.assertEqual(initial_state.shape.as_list(), [None, 5]) - self.assertEqual(initial_state.dtype, inputs.dtype) - - batch = tf.shape(inputs)[0] - dtype = inputs.dtype - initial_state = cell.get_initial_state(None, batch, dtype) - self.assertEqual(initial_state.shape.as_list(), [None, 5]) - self.assertEqual(initial_state.dtype, inputs.dtype) - else: - batch = 8 - inputs = np.random.random((batch, 10)) - initial_state = cell.get_initial_state(inputs, None, None) - self.assertEqual(initial_state.shape.as_list(), [8, 5]) - self.assertEqual(initial_state.dtype, inputs.dtype) - - dtype = inputs.dtype - initial_state = cell.get_initial_state(None, batch, dtype) - self.assertEqual(initial_state.shape.as_list(), [batch, 5]) - self.assertEqual(initial_state.dtype, inputs.dtype) - - @parameterized.parameters([True, False]) - def test_nested_input_output(self, stateful): - batch = 10 - t = 5 - i1, i2, i3 = 3, 4, 5 - o1, o2, o3 = 2, 3, 4 - - cell = NestedCell(o1, o2, o3) - rnn = keras.layers.RNN(cell, stateful=stateful) - - batch_size = batch if stateful else None - input_1 = keras.Input((t, i1), batch_size=batch_size) - input_2 = keras.Input((t, i2, i3), batch_size=batch_size) - - outputs = rnn((input_1, input_2)) - - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].shape.as_list(), [batch_size, o1]) - self.assertEqual(outputs[1].shape.as_list(), [batch_size, o2, o3]) - - model = keras.models.Model((input_1, input_2), outputs) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))], - [np.zeros((batch, o1)), np.zeros((batch, o2, o3))]) - self.assertEqual(model.output_shape, [(batch_size, o1), - (batch_size, o2, o3)]) - - cell = NestedCell(o1, o2, o3, use_tuple=True) - - rnn = keras.layers.RNN(cell, stateful=stateful) - - input_1 = keras.Input((t, i1), batch_size=batch_size) - input_2 = keras.Input((t, i2, i3), batch_size=batch_size) - - outputs = rnn(NestedInput(t1=input_1, t2=input_2)) - - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].shape.as_list(), [batch_size, o1]) - self.assertEqual(outputs[1].shape.as_list(), [batch_size, o2, o3]) - - model = keras.models.Model([input_1, input_2], outputs) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3))], - [np.zeros((batch, o1)), np.zeros((batch, o2, o3))]) - self.assertEqual(model.output_shape, [(batch_size, o1), - (batch_size, o2, o3)]) - - def test_nested_input_output_with_state(self): - batch = 10 - t = 5 - i1, i2, i3 = 3, 4, 5 - o1, o2, o3 = 2, 3, 4 - - cell = NestedCell(o1, o2, o3) - rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) - - input_1 = keras.Input((t, i1)) - input_2 = keras.Input((t, i2, i3)) - - output1, output2, s1, s2 = rnn((input_1, input_2)) - - self.assertEqual(output1.shape.as_list(), [None, t, o1]) - self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) - self.assertEqual(s1.shape.as_list(), [None, o1]) - self.assertEqual(s2.shape.as_list(), [None, o2, o3]) - - model = keras.models.Model([input_1, input_2], [output1, output2]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3))], - [np.zeros((batch, t, o1)), - np.zeros((batch, t, o2, o3))]) - self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) - - cell = NestedCell(o1, o2, o3, use_tuple=True) - - rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) - - input_1 = keras.Input((t, i1)) - input_2 = keras.Input((t, i2, i3)) - - output1, output2, s1, s2 = rnn(NestedInput(t1=input_1, t2=input_2)) - - self.assertEqual(output1.shape.as_list(), [None, t, o1]) - self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) - self.assertEqual(s1.shape.as_list(), [None, o1]) - self.assertEqual(s2.shape.as_list(), [None, o2, o3]) - - model = keras.models.Model([input_1, input_2], [output1, output2]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3))], - [np.zeros((batch, t, o1)), - np.zeros((batch, t, o2, o3))]) - self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) - - def test_nest_input_output_with_init_state(self): - batch = 10 - t = 5 - i1, i2, i3 = 3, 4, 5 - o1, o2, o3 = 2, 3, 4 - - cell = NestedCell(o1, o2, o3) - rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) - - input_1 = keras.Input((t, i1)) - input_2 = keras.Input((t, i2, i3)) - init_s1 = keras.Input((o1,)) - init_s2 = keras.Input((o2, o3)) - - output1, output2, s1, s2 = rnn((input_1, input_2), - initial_state=(init_s1, init_s2)) - - self.assertEqual(output1.shape.as_list(), [None, t, o1]) - self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) - self.assertEqual(s1.shape.as_list(), [None, o1]) - self.assertEqual(s2.shape.as_list(), [None, o2, o3]) - - model = keras.models.Model([input_1, input_2, init_s1, init_s2], - [output1, output2]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3)), - np.zeros((batch, o1)), - np.zeros((batch, o2, o3))], - [np.zeros((batch, t, o1)), - np.zeros((batch, t, o2, o3))]) - self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) - - cell = NestedCell(o1, o2, o3, use_tuple=True) - - rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True) - - input_1 = keras.Input((t, i1)) - input_2 = keras.Input((t, i2, i3)) - init_s1 = keras.Input((o1,)) - init_s2 = keras.Input((o2, o3)) - init_state = NestedState(s1=init_s1, s2=init_s2) - - output1, output2, s1, s2 = rnn(NestedInput(t1=input_1, t2=input_2), - initial_state=init_state) - - self.assertEqual(output1.shape.as_list(), [None, t, o1]) - self.assertEqual(output2.shape.as_list(), [None, t, o2, o3]) - self.assertEqual(s1.shape.as_list(), [None, o1]) - self.assertEqual(s2.shape.as_list(), [None, o2, o3]) - - model = keras.models.Model([input_1, input_2, init_s1, init_s2], - [output1, output2]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - [np.zeros((batch, t, i1)), - np.zeros((batch, t, i2, i3)), - np.zeros((batch, o1)), - np.zeros((batch, o2, o3))], - [np.zeros((batch, t, o1)), - np.zeros((batch, t, o2, o3))]) - self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)]) - - def test_masking_rnn_with_output_and_states(self): - - class Cell(keras.layers.Layer): - - def __init__(self): - self.state_size = None - self.output_size = None - super().__init__() - - def build(self, input_shape): - self.state_size = input_shape[-1] - self.output_size = input_shape[-1] - - def call(self, inputs, states): - return inputs, [s + 1 for s in states] - - x = keras.Input((3, 1), name='x') - x_masked = keras.layers.Masking()(x) - s_0 = keras.Input((1,), name='s_0') - y, s = keras.layers.RNN( - Cell(), return_state=True)(x_masked, initial_state=s_0) - model = keras.models.Model([x, s_0], [y, s]) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - # last time step masked - x_np = np.array([[[1.], [2.], [0.]]]) - s_0_np = np.array([[10.]]) - y_np, s_np = model.predict([x_np, s_0_np]) - - # 1 is added to initial state two times - self.assertAllClose(s_np, s_0_np + 2) - # Expect last output to be the same as last output before masking - self.assertAllClose(y_np, x_np[:, 1, :]) - - def test_zero_output_for_masking(self): - - for unroll in [True, False]: - cell = keras.layers.SimpleRNNCell(5) - x = keras.Input((5, 5)) - mask = keras.layers.Masking() - layer = keras.layers.RNN( - cell, return_sequences=True, zero_output_for_mask=True, unroll=unroll) - masked_input = mask(x) - y = layer(masked_input) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - np_x = np.ones((6, 5, 5)) - result_1 = model.predict(np_x) - - # set the time 4 and 5 for last record to be zero (masked). - np_x[5, 3:] = 0 - result_2 = model.predict(np_x) - - # expect the result_2 has same output, except the time 4,5 for last - # record. - result_1[5, 3:] = 0 - self.assertAllClose(result_1, result_2) - - def test_unroll_single_step(self): - """Even if the time dimension is only one, we should be able to unroll.""" - cell = keras.layers.SimpleRNNCell(5) - x = keras.Input((1, 5)) - layer = keras.layers.RNN(cell, return_sequences=True, unroll=True) - y = layer(x) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - np_x = np.ones((6, 1, 5)) - result = model.predict(np_x) - self.assertEqual((6, 1, 5), result.shape) - - def test_unroll_zero_step(self): - """If the time dimension is None, we should fail to unroll.""" - cell = keras.layers.SimpleRNNCell(5) - x = keras.Input((None, 5)) - layer = keras.layers.RNN(cell, return_sequences=True, unroll=True) - with self.assertRaisesRegex(ValueError, 'Cannot unroll a RNN.*'): - layer(x) - - def test_full_input_spec(self): - # See https://github.com/tensorflow/tensorflow/issues/25985 - inputs = keras.layers.Input(batch_shape=(1, 1, 1)) - state_h = keras.layers.Input(batch_shape=(1, 1)) - state_c = keras.layers.Input(batch_shape=(1, 1)) - states = [state_h, state_c] - decoder_out = keras.layers.LSTM(1, stateful=True)( - inputs, - initial_state=states - ) - model = keras.Model([inputs, state_h, state_c], decoder_out) - output1 = model.predict( - [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))]) - output2 = model.predict( - [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))]) - model.reset_states() - output3 = model.predict( - [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))]) - self.assertAllClose(output1, output3) - self.assertNotAllClose(output1, output2) - - def test_reset_states(self): - # See https://github.com/tensorflow/tensorflow/issues/25852 - with self.assertRaisesRegex(ValueError, 'it needs to know its batch size'): - simple_rnn = keras.layers.SimpleRNN(1, stateful=True) - simple_rnn.reset_states() - - with self.assertRaisesRegex(ValueError, 'it needs to know its batch size'): - cell = Minimal2DRNNCell(1, 2) - custom_rnn = keras.layers.RNN(cell, stateful=True) - custom_rnn.reset_states() - - @parameterized.parameters( - [keras.layers.SimpleRNNCell, keras.layers.GRUCell, keras.layers.LSTMCell]) - def test_stateful_rnn_with_stacking(self, cell): - # See https://github.com/tensorflow/tensorflow/issues/28614. - batch = 12 - timesteps = 10 - input_dim = 8 - output_dim = 64 - cells = [cell(32), cell(64)] - x = keras.Input(batch_shape=(batch, None, input_dim)) - layer = keras.layers.RNN(cells, stateful=True) - y = layer(x) - - model = keras.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, timesteps, input_dim)), - np.zeros((batch, output_dim))) - model.predict(np.ones((batch, timesteps, input_dim))) - - model.reset_states() - model.predict(np.ones((batch, timesteps, input_dim))) - - new_states = tf.nest.map_structure(lambda s: np.ones((batch, s)), - layer.cell.state_size) - layer.reset_states(new_states) - model.predict(np.ones((batch, timesteps, input_dim))) - - def test_stateful_rnn_with_initial_state(self): - # See https://github.com/tensorflow/tensorflow/issues/32299. - batch = 12 - timesteps = 1 - input_dim = 8 - output_dim = 16 - - test_inputs = np.full((batch, timesteps, input_dim), 0.5) - - def make_model(stateful=False, with_initial_state=False): - input_layer = keras.Input(shape=(None, input_dim), batch_size=batch) - if with_initial_state: - initial_states = keras.backend.constant(np.ones((batch, output_dim))) - else: - initial_states = None - rnn_output = keras.layers.GRU( - units=output_dim, return_sequences=True, stateful=stateful)( - input_layer, initial_state=initial_states) - model = keras.Model(input_layer, rnn_output) - model.compile( - optimizer='rmsprop', loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - return model - - # Define a model with a constant state initialization - model = make_model(stateful=True, with_initial_state=True) - layer_weights = model.layers[1].get_weights() - - model.reset_states() - predict_1 = model.predict(test_inputs) - predict_2 = model.predict(test_inputs) - - model.reset_states() - predict_3 = model.predict(test_inputs) - - # predict 1 and 2 should be different since the batch 2 should use the state - # from batch 1 as the initial state. - self.assertNotAllClose(predict_1, predict_2) - self.assertAllClose(predict_1, predict_3) - - # Create a new model with same weights but without initial states. Make sure - # the predict value is different from the model with non-zero initial state. - model_2 = make_model(stateful=True, with_initial_state=False) - model_2.layers[1].set_weights(layer_weights) - - model_2.reset_states() - predict_4 = model_2.predict(test_inputs) - predict_5 = model_2.predict(test_inputs) - self.assertNotAllClose(predict_1, predict_4) - self.assertNotAllClose(predict_4, predict_5) - - # Create models with stateful=False, and make sure they handle init state - # correctly. - model_3 = make_model(stateful=False, with_initial_state=True) - model_3.layers[1].set_weights(layer_weights) - - model_3.reset_states() - predict_6 = model_3.predict(test_inputs) - predict_7 = model_3.predict(test_inputs) - self.assertAllClose(predict_1, predict_6) - self.assertAllClose(predict_6, predict_7) - - def test_stateful_rnn_with_customized_get_initial_state(self): - - class TestCell(keras.layers.AbstractRNNCell): - - state_size = 1 - output_size = 2 - - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - return np.ones((batch_size, 1), dtype=dtype) - - def call(self, inputs, states): - return inputs, states - - layer = keras.layers.RNN(TestCell(), stateful=True, return_state=True) - inputs = keras.Input(shape=(10, 2), batch_size=4) - model = keras.Model(inputs, layer(inputs)) - x = np.ones((4, 10, 2), dtype=np.float32) - output, state = model.predict(x) - self.assertAllClose(output, np.ones((4, 2))) - self.assertAllClose(state, np.ones((4, 1))) - - def test_input_dim_length(self): - simple_rnn = keras.layers.SimpleRNN(5, input_length=10, input_dim=8) - self.assertEqual(simple_rnn._batch_input_shape, (None, 10, 8)) - - simple_rnn = keras.layers.SimpleRNN(5, input_dim=8) - self.assertEqual(simple_rnn._batch_input_shape, (None, None, 8)) - - simple_rnn = keras.layers.SimpleRNN(5, input_length=10) - self.assertEqual(simple_rnn._batch_input_shape, (None, 10, None)) - - @parameterized.parameters( - [keras.layers.SimpleRNNCell, keras.layers.GRUCell, keras.layers.LSTMCell]) - def test_state_spec_with_stack_cell(self, cell): - # See https://github.com/tensorflow/tensorflow/issues/27817 for more detail. - batch = 12 - timesteps = 10 - input_dim = 8 - output_dim = 8 - - def create_cell(): - return [cell(output_dim), - cell(output_dim), - cell(output_dim)] - - inputs = keras.Input((timesteps, input_dim)) - encoder_output = keras.layers.RNN(create_cell(), return_state=True)(inputs) - - states = encoder_output[1:] - - decoder_output = keras.layers.RNN( - create_cell())(inputs, initial_state=states) - - model = keras.models.Model(inputs, decoder_output) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, timesteps, input_dim)), - np.zeros((batch, output_dim))) - model.predict(np.ones((batch, timesteps, input_dim))) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name(layer=[ - keras.layers.SimpleRNN, gru_v1.GRU, lstm_v1.LSTM, gru.GRU, lstm.LSTM - ])) - def test_rnn_with_ragged_input(self, layer): - ragged_data = tf.ragged.constant( - [[[1., 1., 1., 1., 1.], [1., 2., 3., 1., 1.]], - [[2., 4., 1., 3., 1.]], - [[2., 3., 4., 1., 5.], [2., 3., 1., 1., 1.], [1., 2., 3., 4., 5.]]], - ragged_rank=1) - label_data = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1]]) - - # Test results in feed forward - np.random.seed(100) - rnn_layer = layer(4, activation='sigmoid') - - x_ragged = keras.Input(shape=(None, 5), ragged=True) - y_ragged = rnn_layer(x_ragged) - model = keras.models.Model(x_ragged, y_ragged) - output_ragged = model.predict(ragged_data, steps=1) - - x_dense = keras.Input(shape=(3, 5)) - masking = keras.layers.Masking()(x_dense) - y_dense = rnn_layer(masking) - model_2 = keras.models.Model(x_dense, y_dense) - dense_data = ragged_data.to_tensor() - output_dense = model_2.predict(dense_data, steps=1) - - self.assertAllClose(output_dense, output_ragged) - - # Test results with go backwards - np.random.seed(200) - back_rnn_layer = layer(8, go_backwards=True, activation='sigmoid') - - x_ragged = keras.Input(shape=(None, 5), ragged=True) - y_ragged = back_rnn_layer(x_ragged) - model = keras.models.Model(x_ragged, y_ragged) - output_ragged = model.predict(ragged_data, steps=1) - - x_dense = keras.Input(shape=(3, 5)) - masking = keras.layers.Masking()(x_dense) - y_dense = back_rnn_layer(masking) - model_2 = keras.models.Model(x_dense, y_dense) - dense_data = ragged_data.to_tensor() - output_dense = model_2.predict(dense_data, steps=1) - - self.assertAllClose(output_dense, output_ragged) - - # Test densification of the ragged input - dense_tensor, row_lengths = keras.backend.convert_inputs_if_ragged( - ragged_data) - self.assertAllClose(dense_data, dense_tensor) - - # Test optional params, all should work except unrolling - inputs = keras.Input(shape=(None, 5), dtype=tf.float32, ragged=True) - custom_rnn_layer = layer( - 3, zero_output_for_mask=True, dropout=0.1, use_bias=True) - outputs = custom_rnn_layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(ragged_data, label_data) - - # Test stateful and full shape specification - inputs = keras.Input( - shape=(None, 5), batch_size=3, dtype=tf.float32, ragged=True) - stateful_rnn_layer = layer(3, stateful=True) - outputs = stateful_rnn_layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(ragged_data, label_data) - - # Must raise error when unroll is set to True - unroll_rnn_layer = layer(3, unroll=True) - with self.assertRaisesRegex(ValueError, - 'The input received contains RaggedTensors *'): - unroll_rnn_layer(inputs) - - # Check if return sequences outputs are correct - np.random.seed(100) - returning_rnn_layer = layer(4, return_sequences=True) - - x_ragged = keras.Input(shape=(None, 5), ragged=True) - y_ragged = returning_rnn_layer(x_ragged) - model = keras.models.Model(x_ragged, y_ragged) - output_ragged = model.predict(ragged_data, steps=1) - self.assertAllClose(output_ragged.ragged_rank, ragged_data.ragged_rank) - self.assertAllClose(output_ragged.row_splits, ragged_data.row_splits) - - x_dense = keras.Input(shape=(3, 5)) - masking = keras.layers.Masking()(x_dense) - y_dense = returning_rnn_layer(masking) - model_2 = keras.models.Model(x_dense, y_dense) - dense_data = ragged_data.to_tensor() - output_dense = model_2.predict(dense_data, steps=1) - # Convert the output here to ragged for value comparison - output_dense = tf.RaggedTensor.from_tensor( - output_dense, lengths=row_lengths) - self.assertAllClose(output_ragged, output_dense) - - # Check if return sequences and go_backwards outputs are correct - np.random.seed(100) - returning_rnn_layer = layer(4, go_backwards=True, return_sequences=True) - - x_ragged = keras.Input(shape=(None, 5), ragged=True) - y_ragged = returning_rnn_layer(x_ragged) - model = keras.models.Model(x_ragged, y_ragged) - output_ragged = model.predict(ragged_data, steps=1) - self.assertAllClose(output_ragged.ragged_rank, ragged_data.ragged_rank) - self.assertAllClose(output_ragged.row_splits, ragged_data.row_splits) - - x_dense = keras.Input(shape=(3, 5)) - masking = keras.layers.Masking()(x_dense) - y_dense = returning_rnn_layer(masking) - model_2 = keras.models.Model(x_dense, y_dense) - dense_data = ragged_data.to_tensor() - output_dense = model_2.predict(dense_data, steps=1) - - # Note that the raw output for dense and ragged input when go_backward=True - # will be different. Consider following input - # [[a, b, 0], [c, 0, 0], [d, e, f]] where 0s are masked value. - # The dense output will be [[0, b, a], [0, 0, c], [f, e, d]] since it will - # process the whole sequence from the end. - # While ragged output will be [[b, a], [c], [f, e, d]] since it just ignore - # the 0s. And if we densify the ragged output, it will by default inserting - # 0s to the end (rather than from the beginning), which make the output to - # be [[b, a, 0], [c, 0, 0], [f, e, d]]. With this, we need to verify that - # reverse(ragged_output.to_tensor()) == reverse(dense_output) - output_dense = keras.backend.reverse(output_dense, [1]) - output_dense = tf.RaggedTensor.from_tensor( - output_dense, lengths=row_lengths) - - self.assertAllClose(keras.backend.reverse(output_ragged, [1]), output_dense) - - def test_stateless_rnn_cell(self): - - class StatelessCell(keras.layers.Layer): - - def __init__(self): - self.state_size = ((), [], ()) - self.output_size = None - super().__init__() - - def build(self, input_shape): - self.output_size = input_shape[-1] - - def call(self, inputs, states): - return inputs, states - - x = keras.Input((None, 5)) - cell = StatelessCell() - initial_state = tf.nest.map_structure(lambda t: None, cell.state_size) - layer = keras.layers.RNN(cell) - y = layer(x, initial_state=initial_state) - model = keras.models.Model(x, y) - model.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 5))) - - @parameterized.parameters( - [keras.layers.SimpleRNN, gru_v1.GRU, lstm_v1.LSTM, gru.GRU, lstm.LSTM]) - def test_for_enable_caching_device_for_layer(self, layer_cls): - expected_caching_device = tf.compat.v1.executing_eagerly_outside_functions() - layer = layer_cls(1) - self.assertEqual(layer.cell._enable_caching_device, expected_caching_device) - - # Make sure the config only appears when the none default value is used. - config = layer.get_config() - self.assertNotIn('enable_caching_device', config) - - non_default_value = not expected_caching_device - layer = layer_cls(1, enable_caching_device=non_default_value) - self.assertEqual(layer.cell._enable_caching_device, non_default_value) - config = layer.get_config() - self.assertEqual(config['enable_caching_device'], non_default_value) - - @parameterized.parameters( - [keras.layers.SimpleRNNCell, gru_v1.GRUCell, lstm_v1.LSTMCell, - gru.GRUCell, lstm.LSTMCell]) - def test_for_enable_caching_device_for_cell(self, cell_cls): - expected_caching_device = tf.compat.v1.executing_eagerly_outside_functions() - cell = cell_cls(1) - self.assertEqual(cell._enable_caching_device, expected_caching_device) - - # Make sure the config only appears when the none default value is used. - config = cell.get_config() - self.assertNotIn('enable_caching_device', config) - - non_default_value = not expected_caching_device - cell = cell_cls(1, enable_caching_device=non_default_value) - self.assertEqual(cell._enable_caching_device, non_default_value) - config = cell.get_config() - self.assertEqual(config['enable_caching_device'], non_default_value) - - -class RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, constant_size, **kwargs): - self.units = units - self.state_size = units - self.constant_size = constant_size - super().__init__(**kwargs) - - def build(self, input_shape): - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(self.constant_size, self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units, 'constant_size': self.constant_size} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Minimal2DRNNCell(keras.layers.Layer): - """The minimal 2D RNN cell is a simple combination of 2 1-D RNN cell. - - Both internal state and output have 2 dimensions and are orthogonal - between each other. - """ - - def __init__(self, unit_a, unit_b, **kwargs): - self.unit_a = unit_a - self.unit_b = unit_b - self.state_size = tf.TensorShape([unit_a, unit_b]) - self.output_size = tf.TensorShape([unit_a, unit_b]) - super().__init__(**kwargs) - - def build(self, input_shape): - input_a = input_shape[-2] - input_b = input_shape[-1] - self.kernel = self.add_weight( - shape=(input_a, input_b, self.unit_a, self.unit_b), - initializer='uniform', - name='kernel') - self.recurring_kernel = self.add_weight( - shape=(self.unit_a, self.unit_b, self.unit_a, self.unit_b), - initializer='uniform', - name='recurring_kernel') - self.bias = self.add_weight( - shape=(self.unit_a, self.unit_b), initializer='uniform', name='bias') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = tf.einsum('bij,ijkl->bkl', inputs, self.kernel) - h += tf.expand_dims(self.bias, axis=0) - output = h + tf.einsum('bij,ijkl->bkl', prev_output, self.recurring_kernel) - return output, [output] - class PlusOneRNNCell(keras.layers.Layer): - """Add one to the input and state. + """Add one to the input and state. - This cell is used for testing state_size and output_size. - """ + This cell is used for testing state_size and output_size. + """ - def __init__(self, num_unit, **kwargs): - self.state_size = num_unit - super().__init__(**kwargs) + def __init__(self, num_unit, **kwargs): + self.state_size = num_unit + super().__init__(**kwargs) - def build(self, input_shape): - self.output_size = input_shape[-1] + def build(self, input_shape): + self.output_size = input_shape[-1] - def call(self, inputs, states): - return inputs + 1, [states[0] + 1] + def call(self, inputs, states): + return inputs + 1, [states[0] + 1] class NestedCell(keras.layers.Layer): - - def __init__(self, unit_1, unit_2, unit_3, use_tuple=False, **kwargs): - self.unit_1 = unit_1 - self.unit_2 = unit_2 - self.unit_3 = unit_3 - self.use_tuple = use_tuple - super().__init__(**kwargs) - # A nested state. - if use_tuple: - self.state_size = NestedState( - s1=unit_1, s2=tf.TensorShape([unit_2, unit_3])) - else: - self.state_size = (unit_1, tf.TensorShape([unit_2, unit_3])) - self.output_size = (unit_1, tf.TensorShape([unit_2, unit_3])) - - def build(self, inputs_shape): - # expect input_shape to contain 2 items, [(batch, i1), (batch, i2, i3)] - if self.use_tuple: - input_1 = inputs_shape.t1[1] - input_2, input_3 = inputs_shape.t2[1:] - else: - input_1 = inputs_shape[0][1] - input_2, input_3 = inputs_shape[1][1:] - - self.kernel_1 = self.add_weight( - shape=(input_1, self.unit_1), initializer='uniform', name='kernel_1') - self.kernel_2_3 = self.add_weight( - shape=(input_2, input_3, self.unit_2, self.unit_3), - initializer='uniform', - name='kernel_2_3') - - def call(self, inputs, states): - # inputs should be in [(batch, input_1), (batch, input_2, input_3)] - # state should be in shape [(batch, unit_1), (batch, unit_2, unit_3)] - flatten_inputs = tf.nest.flatten(inputs) - s1, s2 = states - - output_1 = tf.matmul(flatten_inputs[0], self.kernel_1) - output_2_3 = tf.einsum('bij,ijkl->bkl', flatten_inputs[1], self.kernel_2_3) - state_1 = s1 + output_1 - state_2_3 = s2 + output_2_3 - - output = [output_1, output_2_3] - new_states = NestedState(s1=state_1, s2=state_2_3) - - return output, new_states - - -if __name__ == '__main__': - tf.test.main() + def __init__(self, unit_1, unit_2, unit_3, use_tuple=False, **kwargs): + self.unit_1 = unit_1 + self.unit_2 = unit_2 + self.unit_3 = unit_3 + self.use_tuple = use_tuple + super().__init__(**kwargs) + # A nested state. + if use_tuple: + self.state_size = NestedState( + s1=unit_1, s2=tf.TensorShape([unit_2, unit_3]) + ) + else: + self.state_size = (unit_1, tf.TensorShape([unit_2, unit_3])) + self.output_size = (unit_1, tf.TensorShape([unit_2, unit_3])) + + def build(self, inputs_shape): + # expect input_shape to contain 2 items, [(batch, i1), (batch, i2, i3)] + if self.use_tuple: + input_1 = inputs_shape.t1[1] + input_2, input_3 = inputs_shape.t2[1:] + else: + input_1 = inputs_shape[0][1] + input_2, input_3 = inputs_shape[1][1:] + + self.kernel_1 = self.add_weight( + shape=(input_1, self.unit_1), initializer="uniform", name="kernel_1" + ) + self.kernel_2_3 = self.add_weight( + shape=(input_2, input_3, self.unit_2, self.unit_3), + initializer="uniform", + name="kernel_2_3", + ) + + def call(self, inputs, states): + # inputs should be in [(batch, input_1), (batch, input_2, input_3)] + # state should be in shape [(batch, unit_1), (batch, unit_2, unit_3)] + flatten_inputs = tf.nest.flatten(inputs) + s1, s2 = states + + output_1 = tf.matmul(flatten_inputs[0], self.kernel_1) + output_2_3 = tf.einsum( + "bij,ijkl->bkl", flatten_inputs[1], self.kernel_2_3 + ) + state_1 = s1 + output_1 + state_2_3 = s2 + output_2_3 + + output = [output_1, output_2_3] + new_states = NestedState(s1=state_1, s2=state_2_3) + + return output, new_states + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/base_wrapper.py b/keras/layers/rnn/base_wrapper.py index 24c40007f76c..6058d85fa59b 100644 --- a/keras/layers/rnn/base_wrapper.py +++ b/keras/layers/rnn/base_wrapper.py @@ -16,56 +16,77 @@ Wrappers are layers that augment the functionality of another layer. """ -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import copy from keras.engine.base_layer import Layer -from keras.utils import generic_utils +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Wrapper') +@keras_export("keras.layers.Wrapper") class Wrapper(Layer): - """Abstract wrapper base class. + """Abstract wrapper base class. + + Wrappers take another layer and augment it in various ways. + Do not use this class as a layer, it is only an abstract base class. + Two usable wrappers are the `TimeDistributed` and `Bidirectional` wrappers. - Wrappers take another layer and augment it in various ways. - Do not use this class as a layer, it is only an abstract base class. - Two usable wrappers are the `TimeDistributed` and `Bidirectional` wrappers. + Args: + layer: The layer to be wrapped. + """ - Args: - layer: The layer to be wrapped. - """ + def __init__(self, layer, **kwargs): + try: + assert isinstance(layer, Layer) + except Exception: + raise ValueError( + f"Layer {layer} supplied to wrapper is" + " not a supported layer type. Please" + " ensure wrapped layer is a valid Keras layer." + ) + self.layer = layer + super().__init__(**kwargs) - def __init__(self, layer, **kwargs): - assert isinstance(layer, Layer) - self.layer = layer - super().__init__(**kwargs) + def build(self, input_shape=None): + if not self.layer.built: + self.layer.build(input_shape) + self.layer.built = True + self.built = True - def build(self, input_shape=None): - if not self.layer.built: - self.layer.build(input_shape) - self.layer.built = True - self.built = True + @property + def activity_regularizer(self): + if hasattr(self.layer, "activity_regularizer"): + return self.layer.activity_regularizer + else: + return None - @property - def activity_regularizer(self): - if hasattr(self.layer, 'activity_regularizer'): - return self.layer.activity_regularizer - else: - return None + def get_config(self): + try: + config = { + "layer": serialization_lib.serialize_keras_object(self.layer) + } + except TypeError: # Case of incompatible custom wrappers + config = { + "layer": legacy_serialization.serialize_keras_object(self.layer) + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - def get_config(self): - config = {'layer': generic_utils.serialize_keras_object(self.layer)} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + @classmethod + def from_config(cls, config, custom_objects=None): + from keras.layers import deserialize as deserialize_layer - @classmethod - def from_config(cls, config, custom_objects=None): - from keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - # Avoid mutating the input dict - config = copy.deepcopy(config) - layer = deserialize_layer( - config.pop('layer'), custom_objects=custom_objects) - return cls(layer, **config) + # Avoid mutating the input dict + config = copy.deepcopy(config) + use_legacy_format = "module" not in config + layer = deserialize_layer( + config.pop("layer"), + custom_objects=custom_objects, + use_legacy_format=use_legacy_format, + ) + return cls(layer, **config) diff --git a/keras/layers/rnn/base_wrapper_test.py b/keras/layers/rnn/base_wrapper_test.py index d7d5cbf2f4aa..cd019a5f77a0 100644 --- a/keras/layers/rnn/base_wrapper_test.py +++ b/keras/layers/rnn/base_wrapper_test.py @@ -14,31 +14,31 @@ # ============================================================================== """Tests for the Wrapper base class.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras -import tensorflow.compat.v2 as tf class ExampleWrapper(keras.layers.Wrapper): - """Simple Wrapper subclass.""" + """Simple Wrapper subclass.""" - def call(self, inputs, *args, **kwargs): - return self.layer(inputs, *args, **kwargs) + def call(self, inputs, *args, **kwargs): + return self.layer(inputs, *args, **kwargs) class WrapperTest(parameterized.TestCase): + def test_wrapper_from_config_no_mutation(self): + wrapper = ExampleWrapper(keras.layers.Dense(1)) + config = wrapper.get_config() + config_copy = config.copy() + self.assertEqual(config, config_copy) - def test_wrapper_from_config_no_mutation(self): - wrapper = ExampleWrapper(keras.layers.Dense(1)) - config = wrapper.get_config() - config_copy = config.copy() - self.assertEqual(config, config_copy) - - wrapper_from_config = ExampleWrapper.from_config(config) - new_config = wrapper_from_config.get_config() - self.assertEqual(new_config, config) - self.assertEqual(new_config, config_copy) + wrapper_from_config = ExampleWrapper.from_config(config) + new_config = wrapper_from_config.get_config() + self.assertEqual(new_config, config) + self.assertEqual(new_config, config_copy) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/bidirectional.py b/keras/layers/rnn/bidirectional.py index 70c32d2e0692..3a2d30536b42 100644 --- a/keras/layers/rnn/bidirectional.py +++ b/keras/layers/rnn/bidirectional.py @@ -13,455 +13,521 @@ # limitations under the License. # ============================================================================== """Bidirectional wrapper for RNNs.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import copy +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine.base_layer import Layer from keras.engine.input_spec import InputSpec from keras.layers.rnn import rnn_utils from keras.layers.rnn.base_wrapper import Wrapper +from keras.saving import serialization_lib from keras.utils import generic_utils from keras.utils import tf_inspect from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.Bidirectional') +@keras_export("keras.layers.Bidirectional") class Bidirectional(Wrapper): - """Bidirectional wrapper for RNNs. - - Args: - layer: `keras.layers.RNN` instance, such as `keras.layers.LSTM` or - `keras.layers.GRU`. It could also be a `keras.layers.Layer` instance - that meets the following criteria: - 1. Be a sequence-processing layer (accepts 3D+ inputs). - 2. Have a `go_backwards`, `return_sequences` and `return_state` - attribute (with the same semantics as for the `RNN` class). - 3. Have an `input_spec` attribute. - 4. Implement serialization via `get_config()` and `from_config()`. - Note that the recommended way to create new RNN layers is to write a - custom RNN cell and use it with `keras.layers.RNN`, instead of - subclassing `keras.layers.Layer` directly. - - When the `returns_sequences` is true, the output of the masked timestep - will be zero regardless of the layer's original `zero_output_for_mask` - value. - merge_mode: Mode by which outputs of the forward and backward RNNs will be - combined. One of {'sum', 'mul', 'concat', 'ave', None}. If None, the - outputs will not be combined, they will be returned as a list. Default - value is 'concat'. - backward_layer: Optional `keras.layers.RNN`, or `keras.layers.Layer` - instance to be used to handle backwards input processing. - If `backward_layer` is not provided, the layer instance passed as the - `layer` argument will be used to generate the backward layer - automatically. - Note that the provided `backward_layer` layer should have properties - matching those of the `layer` argument, in particular it should have the - same values for `stateful`, `return_states`, `return_sequences`, etc. - In addition, `backward_layer` and `layer` should have different - `go_backwards` argument values. - A `ValueError` will be raised if these requirements are not met. - - Call arguments: - The call arguments for this layer are the same as those of the wrapped RNN - layer. - Beware that when passing the `initial_state` argument during the call of - this layer, the first half in the list of elements in the `initial_state` - list will be passed to the forward RNN call and the last half in the list - of elements will be passed to the backward RNN call. - - Raises: - ValueError: - 1. If `layer` or `backward_layer` is not a `Layer` instance. - 2. In case of invalid `merge_mode` argument. - 3. If `backward_layer` has mismatched properties compared to `layer`. - - Examples: - - ```python - model = Sequential() - model.add(Bidirectional(LSTM(10, return_sequences=True), input_shape=(5, 10))) - model.add(Bidirectional(LSTM(10))) - model.add(Dense(5)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - - # With custom backward layer - model = Sequential() - forward_layer = LSTM(10, return_sequences=True) - backward_layer = LSTM(10, activation='relu', return_sequences=True, - go_backwards=True) - model.add(Bidirectional(forward_layer, backward_layer=backward_layer, - input_shape=(5, 10))) - model.add(Dense(5)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - ``` - """ - - def __init__(self, - layer, - merge_mode='concat', - weights=None, - backward_layer=None, - **kwargs): - if not isinstance(layer, Layer): - raise ValueError( - 'Please initialize `Bidirectional` layer with a ' - f'`tf.keras.layers.Layer` instance. Received: {layer}') - if backward_layer is not None and not isinstance(backward_layer, Layer): - raise ValueError( - '`backward_layer` need to be a `tf.keras.layers.Layer` instance. ' - f'Received: {backward_layer}') - if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]: - raise ValueError(f'Invalid merge mode. Received: {merge_mode}. ' - 'Merge mode should be one of ' - '{"sum", "mul", "ave", "concat", None}') - # We don't want to track `layer` since we're already tracking the two copies - # of it we actually run. - self._setattr_tracking = False - super().__init__(layer, **kwargs) - self._setattr_tracking = True - - # Recreate the forward layer from the original layer config, so that it will - # not carry over any state from the layer. - self.forward_layer = self._recreate_layer_from_config(layer) - - if backward_layer is None: - self.backward_layer = self._recreate_layer_from_config( - layer, go_backwards=True) - else: - self.backward_layer = backward_layer - # Keep the custom backward layer config, so that we can save it later. The - # layer's name might be updated below with prefix 'backward_', and we want - # to preserve the original config. - self._backward_layer_config = generic_utils.serialize_keras_object( - backward_layer) - - self.forward_layer._name = 'forward_' + self.forward_layer.name - self.backward_layer._name = 'backward_' + self.backward_layer.name - - self._verify_layer_config() - - def force_zero_output_for_mask(layer): - # Force the zero_output_for_mask to be True if returning sequences. - if getattr(layer, 'zero_output_for_mask', None) is not None: - layer.zero_output_for_mask = layer.return_sequences - - force_zero_output_for_mask(self.forward_layer) - force_zero_output_for_mask(self.backward_layer) - - self.merge_mode = merge_mode - if weights: - nw = len(weights) - self.forward_layer.initial_weights = weights[:nw // 2] - self.backward_layer.initial_weights = weights[nw // 2:] - self.stateful = layer.stateful - self.return_sequences = layer.return_sequences - self.return_state = layer.return_state - self.supports_masking = True - self._trainable = True - self._num_constants = 0 - self.input_spec = layer.input_spec - - @property - def _use_input_spec_as_call_signature(self): - return self.layer._use_input_spec_as_call_signature # pylint: disable=protected-access - - def _verify_layer_config(self): - """Ensure the forward and backward layers have valid common property.""" - if self.forward_layer.go_backwards == self.backward_layer.go_backwards: - raise ValueError( - 'Forward layer and backward layer should have different ' - '`go_backwards` value.' - f'forward_layer.go_backwards = {self.forward_layer.go_backwards},' - f'backward_layer.go_backwards = {self.backward_layer.go_backwards}') - - common_attributes = ('stateful', 'return_sequences', 'return_state') - for a in common_attributes: - forward_value = getattr(self.forward_layer, a) - backward_value = getattr(self.backward_layer, a) - if forward_value != backward_value: - raise ValueError( - 'Forward layer and backward layer are expected to have the same ' - f'value for attribute "{a}", got "{forward_value}" for forward ' - f'layer and "{backward_value}" for backward layer') - - def _recreate_layer_from_config(self, layer, go_backwards=False): - # When recreating the layer from its config, it is possible that the layer - # is a RNN layer that contains custom cells. In this case we inspect the - # layer and pass the custom cell class as part of the `custom_objects` - # argument when calling `from_config`. - # See https://github.com/tensorflow/tensorflow/issues/26581 for more detail. - config = layer.get_config() - if go_backwards: - config['go_backwards'] = not config['go_backwards'] - if 'custom_objects' in tf_inspect.getfullargspec( - layer.__class__.from_config).args: - custom_objects = {} - cell = getattr(layer, 'cell', None) - if cell is not None: - custom_objects[cell.__class__.__name__] = cell.__class__ - # For StackedRNNCells - stacked_cells = getattr(cell, 'cells', []) - for c in stacked_cells: - custom_objects[c.__class__.__name__] = c.__class__ - return layer.__class__.from_config(config, custom_objects=custom_objects) - else: - return layer.__class__.from_config(config) - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - output_shape = self.forward_layer.compute_output_shape(input_shape) - if self.return_state: - state_shape = tf_utils.convert_shapes(output_shape[1:], to_tuples=False) - output_shape = tf_utils.convert_shapes(output_shape[0], to_tuples=False) - else: - output_shape = tf_utils.convert_shapes(output_shape, to_tuples=False) - - if self.merge_mode == 'concat': - output_shape = output_shape.as_list() - output_shape[-1] *= 2 - output_shape = tf.TensorShape(output_shape) - elif self.merge_mode is None: - output_shape = [output_shape, copy.copy(output_shape)] - - if self.return_state: - if self.merge_mode is None: - return output_shape + state_shape + copy.copy(state_shape) - return [output_shape] + state_shape + copy.copy(state_shape) - return output_shape - - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - """`Bidirectional.__call__` implements the same API as the wrapped `RNN`.""" - inputs, initial_state, constants = rnn_utils.standardize_args( - inputs, initial_state, constants, self._num_constants) - - if isinstance(inputs, list): - if len(inputs) > 1: - initial_state = inputs[1:] - inputs = inputs[0] - - if initial_state is None and constants is None: - return super().__call__(inputs, **kwargs) - - # Applies the same workaround as in `RNN.__call__` - additional_inputs = [] - additional_specs = [] - if initial_state is not None: - # Check if `initial_state` can be split into half - num_states = len(initial_state) - if num_states % 2 > 0: - raise ValueError( - 'When passing `initial_state` to a Bidirectional RNN, ' - 'the state should be a list containing the states of ' - 'the underlying RNNs. ' - f'Received: {initial_state}') - - kwargs['initial_state'] = initial_state - additional_inputs += initial_state - state_specs = tf.nest.map_structure( - lambda state: InputSpec(shape=backend.int_shape(state)), - initial_state) - self.forward_layer.state_spec = state_specs[:num_states // 2] - self.backward_layer.state_spec = state_specs[num_states // 2:] - additional_specs += state_specs - if constants is not None: - kwargs['constants'] = constants - additional_inputs += constants - constants_spec = [InputSpec(shape=backend.int_shape(constant)) - for constant in constants] - self.forward_layer.constants_spec = constants_spec - self.backward_layer.constants_spec = constants_spec - additional_specs += constants_spec - - self._num_constants = len(constants) - self.forward_layer._num_constants = self._num_constants - self.backward_layer._num_constants = self._num_constants - - is_keras_tensor = backend.is_keras_tensor( - tf.nest.flatten(additional_inputs)[0]) - for tensor in tf.nest.flatten(additional_inputs): - if backend.is_keras_tensor(tensor) != is_keras_tensor: - raise ValueError('The initial state of a Bidirectional' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors' - ' (a "Keras tensor" is a tensor that was' - ' returned by a Keras layer, or by `Input`)') - - if is_keras_tensor: - # Compute the full input spec, including state - full_input = [inputs] + additional_inputs - # The original input_spec is None since there could be a nested tensor - # input. Update the input_spec to match the inputs. - full_input_spec = [None for _ in range(len(tf.nest.flatten(inputs))) - ] + additional_specs - # Removing kwargs since the value are passed with input list. - kwargs['initial_state'] = None - kwargs['constants'] = None - - # Perform the call with temporarily replaced input_spec - original_input_spec = self.input_spec - self.input_spec = full_input_spec - output = super().__call__(full_input, **kwargs) - self.input_spec = original_input_spec - return output - else: - return super().__call__(inputs, **kwargs) - - def call(self, - inputs, - training=None, - mask=None, - initial_state=None, - constants=None): - """`Bidirectional.call` implements the same API as the wrapped `RNN`.""" - kwargs = {} - if generic_utils.has_arg(self.layer.call, 'training'): - kwargs['training'] = training - if generic_utils.has_arg(self.layer.call, 'mask'): - kwargs['mask'] = mask - if generic_utils.has_arg(self.layer.call, 'constants'): - kwargs['constants'] = constants - - if generic_utils.has_arg(self.layer.call, 'initial_state'): - if isinstance(inputs, list) and len(inputs) > 1: - # initial_states are keras tensors, which means they are passed in - # together with inputs as list. The initial_states need to be split into - # forward and backward section, and be feed to layers accordingly. - forward_inputs = [inputs[0]] - backward_inputs = [inputs[0]] - pivot = (len(inputs) - self._num_constants) // 2 + 1 - # add forward initial state - forward_inputs += inputs[1:pivot] - if not self._num_constants: - # add backward initial state - backward_inputs += inputs[pivot:] + """Bidirectional wrapper for RNNs. + + Args: + layer: `keras.layers.RNN` instance, such as `keras.layers.LSTM` or + `keras.layers.GRU`. It could also be a `keras.layers.Layer` instance + that meets the following criteria: + 1. Be a sequence-processing layer (accepts 3D+ inputs). + 2. Have a `go_backwards`, `return_sequences` and `return_state` + attribute (with the same semantics as for the `RNN` class). + 3. Have an `input_spec` attribute. + 4. Implement serialization via `get_config()` and `from_config()`. + Note that the recommended way to create new RNN layers is to write a + custom RNN cell and use it with `keras.layers.RNN`, instead of + subclassing `keras.layers.Layer` directly. + - When the `returns_sequences` is true, the output of the masked + timestep will be zero regardless of the layer's original + `zero_output_for_mask` value. + merge_mode: Mode by which outputs of the forward and backward RNNs will be + combined. One of {'sum', 'mul', 'concat', 'ave', None}. If None, the + outputs will not be combined, they will be returned as a list. Default + value is 'concat'. + backward_layer: Optional `keras.layers.RNN`, or `keras.layers.Layer` + instance to be used to handle backwards input processing. + If `backward_layer` is not provided, the layer instance passed as the + `layer` argument will be used to generate the backward layer + automatically. + Note that the provided `backward_layer` layer should have properties + matching those of the `layer` argument, in particular it should have the + same values for `stateful`, `return_states`, `return_sequences`, etc. + In addition, `backward_layer` and `layer` should have different + `go_backwards` argument values. + A `ValueError` will be raised if these requirements are not met. + + Call arguments: + The call arguments for this layer are the same as those of the wrapped RNN + layer. + Beware that when passing the `initial_state` argument during the call of + this layer, the first half in the list of elements in the `initial_state` + list will be passed to the forward RNN call and the last half in the list + of elements will be passed to the backward RNN call. + + Raises: + ValueError: + 1. If `layer` or `backward_layer` is not a `Layer` instance. + 2. In case of invalid `merge_mode` argument. + 3. If `backward_layer` has mismatched properties compared to `layer`. + + Examples: + + ```python + model = Sequential() + model.add(Bidirectional(LSTM(10, return_sequences=True), + input_shape=(5, 10))) + model.add(Bidirectional(LSTM(10))) + model.add(Dense(5)) + model.add(Activation('softmax')) + model.compile(loss='categorical_crossentropy', optimizer='rmsprop') + + # With custom backward layer + model = Sequential() + forward_layer = LSTM(10, return_sequences=True) + backward_layer = LSTM(10, activation='relu', return_sequences=True, + go_backwards=True) + model.add(Bidirectional(forward_layer, backward_layer=backward_layer, + input_shape=(5, 10))) + model.add(Dense(5)) + model.add(Activation('softmax')) + model.compile(loss='categorical_crossentropy', optimizer='rmsprop') + ``` + """ + + def __init__( + self, + layer, + merge_mode="concat", + weights=None, + backward_layer=None, + **kwargs, + ): + if not isinstance(layer, Layer): + raise ValueError( + "Please initialize `Bidirectional` layer with a " + f"`tf.keras.layers.Layer` instance. Received: {layer}" + ) + if backward_layer is not None and not isinstance(backward_layer, Layer): + raise ValueError( + "`backward_layer` need to be a `tf.keras.layers.Layer` " + f"instance. Received: {backward_layer}" + ) + if merge_mode not in ["sum", "mul", "ave", "concat", None]: + raise ValueError( + f"Invalid merge mode. Received: {merge_mode}. " + "Merge mode should be one of " + '{"sum", "mul", "ave", "concat", None}' + ) + # We don't want to track `layer` since we're already tracking the two + # copies of it we actually run. + self._setattr_tracking = False + super().__init__(layer, **kwargs) + self._setattr_tracking = True + + # Recreate the forward layer from the original layer config, so that it + # will not carry over any state from the layer. + self.forward_layer = self._recreate_layer_from_config(layer) + + if backward_layer is None: + self.backward_layer = self._recreate_layer_from_config( + layer, go_backwards=True + ) + else: + self.backward_layer = backward_layer + + # Keep the custom backward layer config, so that we can save it + # later. The layer's name might be updated below with prefix + # 'backward_', and we want to preserve the original config. + self._backward_layer_config = ( + serialization_lib.serialize_keras_object(backward_layer) + ) + + self.forward_layer._name = "forward_" + self.forward_layer.name + self.backward_layer._name = "backward_" + self.backward_layer.name + + self._verify_layer_config() + + def force_zero_output_for_mask(layer): + # Force the zero_output_for_mask to be True if returning sequences. + if getattr(layer, "zero_output_for_mask", None) is not None: + layer.zero_output_for_mask = layer.return_sequences + + force_zero_output_for_mask(self.forward_layer) + force_zero_output_for_mask(self.backward_layer) + + self.merge_mode = merge_mode + if weights: + nw = len(weights) + self.forward_layer.initial_weights = weights[: nw // 2] + self.backward_layer.initial_weights = weights[nw // 2 :] + self.stateful = layer.stateful + self.return_sequences = layer.return_sequences + self.return_state = layer.return_state + self.supports_masking = True + self._trainable = kwargs.get("trainable", layer.trainable) + self._num_constants = 0 + self.input_spec = layer.input_spec + + @property + def _use_input_spec_as_call_signature(self): + return self.layer._use_input_spec_as_call_signature + + def _verify_layer_config(self): + """Ensure the forward and backward layers have valid common property.""" + if self.forward_layer.go_backwards == self.backward_layer.go_backwards: + raise ValueError( + "Forward layer and backward layer should have different " + "`go_backwards` value." + "forward_layer.go_backwards = " + f"{self.forward_layer.go_backwards}," + "backward_layer.go_backwards = " + f"{self.backward_layer.go_backwards}" + ) + + common_attributes = ("stateful", "return_sequences", "return_state") + for a in common_attributes: + forward_value = getattr(self.forward_layer, a) + backward_value = getattr(self.backward_layer, a) + if forward_value != backward_value: + raise ValueError( + "Forward layer and backward layer are expected to have " + f'the same value for attribute "{a}", got ' + f'"{forward_value}" for forward layer and ' + f'"{backward_value}" for backward layer' + ) + + def _recreate_layer_from_config(self, layer, go_backwards=False): + # When recreating the layer from its config, it is possible that the + # layer is a RNN layer that contains custom cells. In this case we + # inspect the layer and pass the custom cell class as part of the + # `custom_objects` argument when calling `from_config`. See + # https://github.com/tensorflow/tensorflow/issues/26581 for more detail. + config = layer.get_config() + if go_backwards: + config["go_backwards"] = not config["go_backwards"] + if ( + "custom_objects" + in tf_inspect.getfullargspec(layer.__class__.from_config).args + ): + custom_objects = {} + cell = getattr(layer, "cell", None) + if cell is not None: + custom_objects[cell.__class__.__name__] = cell.__class__ + # For StackedRNNCells + stacked_cells = getattr(cell, "cells", []) + for c in stacked_cells: + custom_objects[c.__class__.__name__] = c.__class__ + return layer.__class__.from_config( + config, custom_objects=custom_objects + ) + else: + return layer.__class__.from_config(config) + + @tf_utils.shape_type_conversion + def compute_output_shape(self, input_shape): + output_shape = self.forward_layer.compute_output_shape(input_shape) + if self.return_state: + state_shape = tf_utils.convert_shapes( + output_shape[1:], to_tuples=False + ) + output_shape = tf_utils.convert_shapes( + output_shape[0], to_tuples=False + ) + else: + output_shape = tf_utils.convert_shapes( + output_shape, to_tuples=False + ) + + if self.merge_mode == "concat": + output_shape = output_shape.as_list() + output_shape[-1] *= 2 + output_shape = tf.TensorShape(output_shape) + elif self.merge_mode is None: + output_shape = [output_shape, copy.copy(output_shape)] + + if self.return_state: + if self.merge_mode is None: + return output_shape + state_shape + copy.copy(state_shape) + return [output_shape] + state_shape + copy.copy(state_shape) + return output_shape + + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + """`Bidirectional.__call__` implements the same API as the wrapped + `RNN`.""" + inputs, initial_state, constants = rnn_utils.standardize_args( + inputs, initial_state, constants, self._num_constants + ) + + if isinstance(inputs, list): + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[0] + + if initial_state is None and constants is None: + return super().__call__(inputs, **kwargs) + + # Applies the same workaround as in `RNN.__call__` + additional_inputs = [] + additional_specs = [] + if initial_state is not None: + # Check if `initial_state` can be split into half + num_states = len(initial_state) + if num_states % 2 > 0: + raise ValueError( + "When passing `initial_state` to a Bidirectional RNN, " + "the state should be a list containing the states of " + "the underlying RNNs. " + f"Received: {initial_state}" + ) + + kwargs["initial_state"] = initial_state + additional_inputs += initial_state + state_specs = tf.nest.map_structure( + lambda state: InputSpec(shape=backend.int_shape(state)), + initial_state, + ) + self.forward_layer.state_spec = state_specs[: num_states // 2] + self.backward_layer.state_spec = state_specs[num_states // 2 :] + additional_specs += state_specs + if constants is not None: + kwargs["constants"] = constants + additional_inputs += constants + constants_spec = [ + InputSpec(shape=backend.int_shape(constant)) + for constant in constants + ] + self.forward_layer.constants_spec = constants_spec + self.backward_layer.constants_spec = constants_spec + additional_specs += constants_spec + + self._num_constants = len(constants) + self.forward_layer._num_constants = self._num_constants + self.backward_layer._num_constants = self._num_constants + + is_keras_tensor = backend.is_keras_tensor( + tf.nest.flatten(additional_inputs)[0] + ) + for tensor in tf.nest.flatten(additional_inputs): + if backend.is_keras_tensor(tensor) != is_keras_tensor: + raise ValueError( + "The initial state of a Bidirectional" + " layer cannot be specified with a mix of" + " Keras tensors and non-Keras tensors" + ' (a "Keras tensor" is a tensor that was' + " returned by a Keras layer, or by `Input`)" + ) + + if is_keras_tensor: + # Compute the full input spec, including state + full_input = [inputs] + additional_inputs + # The original input_spec is None since there could be a nested + # tensor input. Update the input_spec to match the inputs. + full_input_spec = [ + None for _ in range(len(tf.nest.flatten(inputs))) + ] + additional_specs + # Removing kwargs since the value are passed with input list. + kwargs["initial_state"] = None + kwargs["constants"] = None + + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super().__call__(full_input, **kwargs) + self.input_spec = original_input_spec + return output + else: + return super().__call__(inputs, **kwargs) + + def call( + self, + inputs, + training=None, + mask=None, + initial_state=None, + constants=None, + ): + """`Bidirectional.call` implements the same API as the wrapped `RNN`.""" + kwargs = {} + if generic_utils.has_arg(self.layer.call, "training"): + kwargs["training"] = training + if generic_utils.has_arg(self.layer.call, "mask"): + kwargs["mask"] = mask + if generic_utils.has_arg(self.layer.call, "constants"): + kwargs["constants"] = constants + + if generic_utils.has_arg(self.layer.call, "initial_state"): + if isinstance(inputs, list) and len(inputs) > 1: + # initial_states are keras tensors, which means they are passed + # in together with inputs as list. The initial_states need to be + # split into forward and backward section, and be feed to layers + # accordingly. + forward_inputs = [inputs[0]] + backward_inputs = [inputs[0]] + pivot = (len(inputs) - self._num_constants) // 2 + 1 + # add forward initial state + forward_inputs += inputs[1:pivot] + if not self._num_constants: + # add backward initial state + backward_inputs += inputs[pivot:] + else: + # add backward initial state + backward_inputs += inputs[pivot : -self._num_constants] + # add constants for forward and backward layers + forward_inputs += inputs[-self._num_constants :] + backward_inputs += inputs[-self._num_constants :] + forward_state, backward_state = None, None + if "constants" in kwargs: + kwargs["constants"] = None + elif initial_state is not None: + # initial_states are not keras tensors, eg eager tensor from np + # array. They are only passed in from kwarg initial_state, and + # should be passed to forward/backward layer via kwarg + # initial_state as well. + forward_inputs, backward_inputs = inputs, inputs + half = len(initial_state) // 2 + forward_state = initial_state[:half] + backward_state = initial_state[half:] + else: + forward_inputs, backward_inputs = inputs, inputs + forward_state, backward_state = None, None + + y = self.forward_layer( + forward_inputs, initial_state=forward_state, **kwargs + ) + y_rev = self.backward_layer( + backward_inputs, initial_state=backward_state, **kwargs + ) + else: + y = self.forward_layer(inputs, **kwargs) + y_rev = self.backward_layer(inputs, **kwargs) + + if self.return_state: + states = y[1:] + y_rev[1:] + y = y[0] + y_rev = y_rev[0] + + if self.return_sequences: + time_dim = ( + 0 if getattr(self.forward_layer, "time_major", False) else 1 + ) + y_rev = backend.reverse(y_rev, time_dim) + if self.merge_mode == "concat": + output = backend.concatenate([y, y_rev]) + elif self.merge_mode == "sum": + output = y + y_rev + elif self.merge_mode == "ave": + output = (y + y_rev) / 2 + elif self.merge_mode == "mul": + output = y * y_rev + elif self.merge_mode is None: + output = [y, y_rev] + else: + raise ValueError( + "Unrecognized value for `merge_mode`. " + f"Received: {self.merge_mode}" + 'Expected values are ["concat", "sum", "ave", "mul"]' + ) + + if self.return_state: + if self.merge_mode is None: + return output + states + return [output] + states + return output + + def reset_states(self, states=None): + if not self.stateful: + raise AttributeError("Layer must be stateful.") + + if states is None: + self.forward_layer.reset_states() + self.backward_layer.reset_states() + else: + if not isinstance(states, (list, tuple)): + raise ValueError( + "Unrecognized value for `states`. " + "Expected `states` to be list or tuple. " + f"Received: {states}" + ) + + half = len(states) // 2 + self.forward_layer.reset_states(states[:half]) + self.backward_layer.reset_states(states[half:]) + + def build(self, input_shape): + with backend.name_scope(self.forward_layer.name): + self.forward_layer.build(input_shape) + with backend.name_scope(self.backward_layer.name): + self.backward_layer.build(input_shape) + self.built = True + + def compute_mask(self, inputs, mask): + if isinstance(mask, list): + mask = mask[0] + if self.return_sequences: + if not self.merge_mode: + output_mask = [mask, mask] + else: + output_mask = mask else: - # add backward initial state - backward_inputs += inputs[pivot:-self._num_constants] - # add constants for forward and backward layers - forward_inputs += inputs[-self._num_constants:] - backward_inputs += inputs[-self._num_constants:] - forward_state, backward_state = None, None - if 'constants' in kwargs: - kwargs['constants'] = None - elif initial_state is not None: - # initial_states are not keras tensors, eg eager tensor from np array. - # They are only passed in from kwarg initial_state, and should be passed - # to forward/backward layer via kwarg initial_state as well. - forward_inputs, backward_inputs = inputs, inputs - half = len(initial_state) // 2 - forward_state = initial_state[:half] - backward_state = initial_state[half:] - else: - forward_inputs, backward_inputs = inputs, inputs - forward_state, backward_state = None, None - - y = self.forward_layer(forward_inputs, - initial_state=forward_state, **kwargs) - y_rev = self.backward_layer(backward_inputs, - initial_state=backward_state, **kwargs) - else: - y = self.forward_layer(inputs, **kwargs) - y_rev = self.backward_layer(inputs, **kwargs) - - if self.return_state: - states = y[1:] + y_rev[1:] - y = y[0] - y_rev = y_rev[0] - - if self.return_sequences: - time_dim = 0 if getattr(self.forward_layer, 'time_major', False) else 1 - y_rev = backend.reverse(y_rev, time_dim) - if self.merge_mode == 'concat': - output = backend.concatenate([y, y_rev]) - elif self.merge_mode == 'sum': - output = y + y_rev - elif self.merge_mode == 'ave': - output = (y + y_rev) / 2 - elif self.merge_mode == 'mul': - output = y * y_rev - elif self.merge_mode is None: - output = [y, y_rev] - else: - raise ValueError( - f'Unrecognized value for `merge_mode`. Received: {self.merge_mode}' - 'Expected values are ["concat", "sum", "ave", "mul"]') - - if self.return_state: - if self.merge_mode is None: - return output + states - return [output] + states - return output - - def reset_states(self): - self.forward_layer.reset_states() - self.backward_layer.reset_states() - - def build(self, input_shape): - with backend.name_scope(self.forward_layer.name): - self.forward_layer.build(input_shape) - with backend.name_scope(self.backward_layer.name): - self.backward_layer.build(input_shape) - self.built = True - - def compute_mask(self, inputs, mask): - if isinstance(mask, list): - mask = mask[0] - if self.return_sequences: - if not self.merge_mode: - output_mask = [mask, mask] - else: - output_mask = mask - else: - output_mask = [None, None] if not self.merge_mode else None - - if self.return_state: - states = self.forward_layer.states - state_mask = [None for _ in states] - if isinstance(output_mask, list): - return output_mask + state_mask * 2 - return [output_mask] + state_mask * 2 - return output_mask - - @property - def constraints(self): - constraints = {} - if hasattr(self.forward_layer, 'constraints'): - constraints.update(self.forward_layer.constraints) - constraints.update(self.backward_layer.constraints) - return constraints - - def get_config(self): - config = {'merge_mode': self.merge_mode} - if self._num_constants: - config['num_constants'] = self._num_constants - - if hasattr(self, '_backward_layer_config'): - config['backward_layer'] = self._backward_layer_config - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - # Instead of updating the input, create a copy and use that. - config = copy.deepcopy(config) - num_constants = config.pop('num_constants', 0) - # Handle forward layer instantiation (as would parent class). - from keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - config['layer'] = deserialize_layer( - config['layer'], custom_objects=custom_objects) - # Handle (optional) backward layer instantiation. - backward_layer_config = config.pop('backward_layer', None) - if backward_layer_config is not None: - backward_layer = deserialize_layer( - backward_layer_config, custom_objects=custom_objects) - config['backward_layer'] = backward_layer - # Instantiate the wrapper, adjust it and return it. - layer = cls(**config) - layer._num_constants = num_constants # pylint: disable=protected-access - return layer + output_mask = [None, None] if not self.merge_mode else None + + if self.return_state: + states = self.forward_layer.states + state_mask = [None for _ in states] + if isinstance(output_mask, list): + return output_mask + state_mask * 2 + return [output_mask] + state_mask * 2 + return output_mask + + @property + def constraints(self): + constraints = {} + if hasattr(self.forward_layer, "constraints"): + constraints.update(self.forward_layer.constraints) + constraints.update(self.backward_layer.constraints) + return constraints + + def get_config(self): + config = {"merge_mode": self.merge_mode} + if self._num_constants: + config["num_constants"] = self._num_constants + + if hasattr(self, "_backward_layer_config"): + config["backward_layer"] = self._backward_layer_config + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + # Instead of updating the input, create a copy and use that. + config = copy.deepcopy(config) + num_constants = config.pop("num_constants", 0) + # Handle forward layer instantiation (as would parent class). + from keras.layers import deserialize as deserialize_layer + + config["layer"] = deserialize_layer( + config["layer"], custom_objects=custom_objects + ) + # Handle (optional) backward layer instantiation. + backward_layer_config = config.pop("backward_layer", None) + if backward_layer_config is not None: + backward_layer = deserialize_layer( + backward_layer_config, custom_objects=custom_objects + ) + config["backward_layer"] = backward_layer + # Instantiate the wrapper, adjust it and return it. + layer = cls(**config) + layer._num_constants = num_constants + return layer diff --git a/keras/layers/rnn/bidirectional_test.py b/keras/layers/rnn/bidirectional_test.py index 29df473f3fe2..cc97f2c1b91f 100644 --- a/keras/layers/rnn/bidirectional_test.py +++ b/keras/layers/rnn/bidirectional_test.py @@ -17,922 +17,1117 @@ import copy +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.engine import base_layer_utils from keras.layers import core from keras.layers.rnn.cell_wrappers import ResidualWrapper from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.utils import generic_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_util -from tensorflow.python.training.tracking import util as trackable_util +# isort: off +from tensorflow.python.checkpoint import ( + checkpoint as trackable_util, +) +from tensorflow.python.framework import ( + test_util as tf_test_util, +) class _RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, constant_size, **kwargs): - self.units = units - self.state_size = units - self.constant_size = constant_size - super().__init__(**kwargs) - - def build(self, input_shape): - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(self.constant_size, self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units, 'constant_size': self.constant_size} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def __init__(self, units, constant_size, **kwargs): + self.units = units + self.state_size = units + self.constant_size = constant_size + super().__init__(**kwargs) + + def build(self, input_shape): + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="uniform", + name="kernel", + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer="uniform", + name="recurrent_kernel", + ) + self.constant_kernel = self.add_weight( + shape=(self.constant_size, self.units), + initializer="uniform", + name="constant_kernel", + ) + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {"units": self.units, "constant_size": self.constant_size} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class _ResidualLSTMCell(keras.layers.LSTMCell): - - def call(self, inputs, states, training=None): - output, states = super().call(inputs, states) - return output + inputs, states + def call(self, inputs, states, training=None): + output, states = super().call(inputs, states) + return output + inputs, states class _AddOneCell(keras.layers.AbstractRNNCell): - """Increments inputs and state by one on each call.""" + """Increments inputs and state by one on each call.""" - @property - def state_size(self): - return 1 + @property + def state_size(self): + return 1 - @property - def output_size(self): - return 1 + @property + def output_size(self): + return 1 - def call(self, inputs, state): - inputs = tf.reduce_mean(inputs, axis=1, keepdims=True) - outputs = inputs + 1.0 - state = tf.nest.map_structure(lambda t: t + 1.0, state) - return outputs, state + def call(self, inputs, state): + inputs = tf.reduce_mean(inputs, axis=1, keepdims=True) + outputs = inputs + 1.0 + state = tf.nest.map_structure(lambda t: t + 1.0, state) + return outputs, state -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class BidirectionalTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.parameters(["sum", "concat", "ave", "mul"]) + def test_bidirectional(self, mode): + rnn = keras.layers.SimpleRNN + samples = 2 + dim = 2 + timesteps = 2 + output_dim = 2 + with self.cached_session(): + x = np.random.random((samples, timesteps, dim)) + target_dim = 2 * output_dim if mode == "concat" else output_dim + y = np.random.random((samples, target_dim)) + + # test with Sequential model + model = keras.models.Sequential() + model.add( + keras.layers.Bidirectional( + rnn(output_dim), + merge_mode=mode, + input_shape=(timesteps, dim), + ) + ) + model.compile(optimizer="rmsprop", loss="mse") + model.fit(x, y, epochs=1, batch_size=1) + + # check whether the model variables are present in the + # trackable list of objects + checkpointed_object_ids = { + id(o) for o in trackable_util.list_objects(model) + } + for v in model.variables: + self.assertIn(id(v), checkpointed_object_ids) + + # test compute output shape + ref_shape = model.layers[-1].output.shape + shape = model.layers[-1].compute_output_shape( + (None, timesteps, dim) + ) + self.assertListEqual(shape.as_list(), ref_shape.as_list()) + + # test config + model.get_config() + model = keras.models.model_from_json(model.to_json()) + model.summary() + + def test_bidirectional_invalid_init(self): + x = tf.constant(np.zeros((1, 1)).astype("float32")) + with self.assertRaisesRegex( + ValueError, + "Please initialize `Bidirectional` layer with a " + "`tf.keras.layers.Layer` instance.", + ): + keras.layers.Bidirectional(x) + + def test_bidirectional_weight_loading(self): + rnn = keras.layers.SimpleRNN + samples = 2 + dim = 2 + timesteps = 2 + output_dim = 2 + with self.cached_session(): + x = np.random.random((samples, timesteps, dim)) + model = keras.models.Sequential() + model.add( + keras.layers.Bidirectional( + rnn(output_dim), input_shape=(timesteps, dim) + ) + ) + y_ref = model.predict(x) + weights = model.layers[-1].get_weights() + model.layers[-1].set_weights(weights) + y = model.predict(x) + self.assertAllClose(y, y_ref) + + def test_bidirectional_stacked(self): + # test stacked bidirectional layers + rnn = keras.layers.SimpleRNN + samples = 2 + dim = 2 + timesteps = 2 + output_dim = 2 + mode = "sum" + + with self.cached_session(): + x = np.random.random((samples, timesteps, dim)) + target_dim = 2 * output_dim if mode == "concat" else output_dim + y = np.random.random((samples, target_dim)) + + model = keras.models.Sequential() + model.add( + keras.layers.Bidirectional( + rnn(output_dim, return_sequences=True), + merge_mode=mode, + input_shape=(timesteps, dim), + ) + ) + model.add( + keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode) + ) + model.compile(loss="mse", optimizer="sgd") + model.fit(x, y, epochs=1, batch_size=1) + + # test with functional API + inputs = keras.layers.Input((timesteps, dim)) + output = keras.layers.Bidirectional( + rnn(output_dim), merge_mode=mode + )(inputs) + model = keras.models.Model(inputs, output) + model.compile(loss="mse", optimizer="sgd") + model.fit(x, y, epochs=1, batch_size=1) + + def test_bidirectional_statefulness(self): + # Bidirectional and stateful + def run_test(): + rnn = keras.layers.SimpleRNN + samples = 2 + dim = 2 + timesteps = 2 + output_dim = 2 + mode = "sum" + + with self.cached_session(): + x = np.random.random((samples, timesteps, dim)) + target_dim = 2 * output_dim if mode == "concat" else output_dim + y = np.random.random((samples, target_dim)) + + inputs = keras.layers.Input(batch_shape=(1, timesteps, dim)) + bidi_rnn = keras.layers.Bidirectional( + rnn(output_dim, stateful=True), merge_mode=mode + ) + self.assertTrue(bidi_rnn.stateful) + output = bidi_rnn(inputs) + model = keras.models.Model(inputs, output) + + y_1 = model.predict(x, batch_size=1) + model.reset_states() + y_2 = model.predict(x, batch_size=1) + + self.assertAllClose(y_1, y_2) + + model.compile(loss="mse", optimizer="sgd") + model.fit(x, y, epochs=1, batch_size=1) + + if tf.executing_eagerly(): + run_test() + else: + tf_test_util.enable_output_all_intermediates(run_test)() + + @parameterized.parameters(["sum", "mul", "ave", "concat", None]) + def test_Bidirectional_merged_value(self, merge_mode): + rnn = keras.layers.LSTM + samples = 2 + dim = 5 + timesteps = 3 + units = 3 + x = [np.random.rand(samples, timesteps, dim)] + + with self.cached_session(): + if merge_mode == "sum": + merge_func = lambda y, y_rev: y + y_rev + elif merge_mode == "mul": + merge_func = lambda y, y_rev: y * y_rev + elif merge_mode == "ave": + merge_func = lambda y, y_rev: (y + y_rev) / 2 + elif merge_mode == "concat": + merge_func = lambda y, y_rev: np.concatenate( + (y, y_rev), axis=-1 + ) + else: + merge_func = lambda y, y_rev: [y, y_rev] + + # basic case + inputs = keras.Input((timesteps, dim)) + layer = keras.layers.Bidirectional( + rnn(units, return_sequences=True), merge_mode=merge_mode + ) + f_merged = keras.backend.function([inputs], _to_list(layer(inputs))) + f_forward = keras.backend.function( + [inputs], [layer.forward_layer(inputs)] + ) + f_backward = keras.backend.function( + [inputs], + [keras.backend.reverse(layer.backward_layer(inputs), 1)], + ) + + y_merged = f_merged(x) + y_expected = _to_list(merge_func(f_forward(x)[0], f_backward(x)[0])) + assert len(y_merged) == len(y_expected) + for x1, x2 in zip(y_merged, y_expected): + self.assertAllClose(x1, x2, atol=1e-5) + + # test return_state + inputs = keras.Input((timesteps, dim)) + layer = keras.layers.Bidirectional( + rnn(units, return_state=True), merge_mode=merge_mode + ) + f_merged = keras.backend.function([inputs], layer(inputs)) + f_forward = keras.backend.function( + [inputs], layer.forward_layer(inputs) + ) + f_backward = keras.backend.function( + [inputs], layer.backward_layer(inputs) + ) + n_states = len(layer.layer.states) + + y_merged = f_merged(x) + y_forward = f_forward(x) + y_backward = f_backward(x) + y_expected = _to_list(merge_func(y_forward[0], y_backward[0])) + assert len(y_merged) == len(y_expected) + n_states * 2 + for x1, x2 in zip(y_merged, y_expected): + self.assertAllClose(x1, x2, atol=1e-5) + + y_merged = y_merged[-n_states * 2 :] + y_forward = y_forward[-n_states:] + y_backward = y_backward[-n_states:] + for state_birnn, state_inner in zip( + y_merged, y_forward + y_backward + ): + self.assertAllClose(state_birnn, state_inner, atol=1e-5) + + @parameterized.parameters([True, False]) + def test_Bidirectional_with_time_major_input(self, time_major): + batch_size, time, input_dim = 2, 3, 1 + inputs = tf.zeros((batch_size, time, input_dim)) + # length is [1 2]. Within the batch, the first element has 1 step, and + # the second element as 2 steps. + lengths = tf.range(1, 1 + batch_size) + mask = tf.sequence_mask(lengths, maxlen=time, dtype=tf.float32) + + forward_cell = _AddOneCell(name="forward") + backward_cell = _AddOneCell(name="backward") + + layer = keras.layers.Bidirectional( + layer=keras.layers.RNN( + forward_cell, time_major=time_major, return_sequences=True + ), + backward_layer=keras.layers.RNN( + backward_cell, + time_major=time_major, + return_sequences=True, + go_backwards=True, + ), + ) + + # Switch to time-major. + if time_major: + inputs = tf.transpose(inputs, [1, 0, 2]) + mask = tf.transpose(mask, [1, 0]) + + keras_outputs = layer(inputs, mask=mask) + if time_major: + keras_outputs = tf.transpose(keras_outputs, [1, 0, 2]) + + # expect the first element in batch has 1 step and second element in + # batch has 2 steps. + expected_result = np.array( + [ + [[1.0, 1.0], [0.0, 0.0], [0.0, 0.0]], + [[1.0, 1.0], [1.0, 1.0], [0.0, 0.0]], + ] + ) + self.assertAllClose(expected_result, keras_outputs) + + def test_Bidirectional_dropout(self): + rnn = keras.layers.LSTM + samples = 2 + dim = 5 + timesteps = 3 + units = 3 + merge_mode = "sum" + x = [np.random.rand(samples, timesteps, dim)] + + with self.cached_session(): + inputs = keras.Input((timesteps, dim)) + wrapped = keras.layers.Bidirectional( + rnn(units, dropout=0.2, recurrent_dropout=0.2), + merge_mode=merge_mode, + ) + outputs = _to_list(wrapped(inputs, training=True)) + + inputs = keras.Input((timesteps, dim)) + wrapped = keras.layers.Bidirectional( + rnn(units, dropout=0.2, return_state=True), + merge_mode=merge_mode, + ) + outputs = _to_list(wrapped(inputs)) + + model = keras.Model(inputs, outputs) + y1 = _to_list(model.predict(x)) + y2 = _to_list(model.predict(x)) + for x1, x2 in zip(y1, y2): + self.assertAllClose(x1, x2, atol=1e-5) + + def test_Bidirectional_state_reuse(self): + rnn = keras.layers.LSTM + samples = 2 + dim = 5 + timesteps = 3 + units = 3 + + with self.cached_session(): + input1 = keras.layers.Input((timesteps, dim)) + layer = keras.layers.Bidirectional( + rnn(units, return_state=True, return_sequences=True) + ) + state = layer(input1)[1:] + + # test passing invalid initial_state: passing a tensor + input2 = keras.layers.Input((timesteps, dim)) + with self.assertRaises(ValueError): + keras.layers.Bidirectional(rnn(units))( + input2, initial_state=state[0] + ) + + # test valid usage: passing a list + output = keras.layers.Bidirectional(rnn(units))( + input2, initial_state=state + ) + model = keras.models.Model([input1, input2], output) + assert len(model.layers) == 4 + assert isinstance(model.layers[-1].input, list) + inputs = [ + np.random.rand(samples, timesteps, dim), + np.random.rand(samples, timesteps, dim), + ] + model.predict(inputs) + + def test_Bidirectional_state_reuse_with_np_input(self): + # See https://github.com/tensorflow/tensorflow/issues/28761 for more + # detail. + rnn = keras.layers.LSTM + samples = 2 + dim = 5 + timesteps = 3 + units = 3 + + with self.cached_session(): + input1 = np.random.rand(samples, timesteps, dim).astype(np.float32) + layer = keras.layers.Bidirectional( + rnn(units, return_state=True, return_sequences=True) + ) + state = layer(input1)[1:] + + input2 = np.random.rand(samples, timesteps, dim).astype(np.float32) + keras.layers.Bidirectional(rnn(units))(input2, initial_state=state) + + def test_Bidirectional_trainable(self): + # test layers that need learning_phase to be set + with self.cached_session(): + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.Bidirectional(keras.layers.SimpleRNN(3)) + _ = layer(x) + assert len(layer.trainable_weights) == 6 + layer.trainable = False + assert not layer.trainable_weights + layer.trainable = True + assert len(layer.trainable_weights) == 6 + + def test_Bidirectional_updates(self): + if tf.executing_eagerly(): + self.skipTest("layer.updates is only available in graph mode.") + + with self.cached_session(): + x = keras.layers.Input(shape=(3, 2)) + x_reachable_update = x * x + layer = keras.layers.Bidirectional(keras.layers.SimpleRNN(3)) + _ = layer(x) + assert not layer.updates + # TODO(b/128684069): Remove when Wrapper sublayers are __call__'d. + with base_layer_utils.call_context().enter(layer, x, True, None): + layer.forward_layer.add_update(x_reachable_update) + layer.forward_layer.add_update(1) + layer.backward_layer.add_update(x_reachable_update) + layer.backward_layer.add_update(1) + assert len(layer.updates) == 4 + + def test_Bidirectional_losses(self): + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.Bidirectional( + keras.layers.SimpleRNN( + 3, + kernel_regularizer="l1", + bias_regularizer="l1", + activity_regularizer="l1", + ) + ) + _ = layer(x) + assert len(layer.losses) == 6 + + loss = x * x + layer.forward_layer.add_loss(loss) + layer.backward_layer.add_loss(loss) + assert len(layer.losses) == 8 + + def test_Bidirectional_with_constants(self): + with self.cached_session(): + # Test basic case. + x = keras.Input((5, 5)) + c = keras.Input((3,)) + cell = _RNNCellWithConstants(32, 3) + custom_objects = {"_RNNCellWithConstants": _RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.Bidirectional(keras.layers.RNN(cell)) + y = layer(x, constants=c) + model = keras.Model([x, c], y) + model.compile(optimizer="rmsprop", loss="mse") + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 64)) + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.Bidirectional.from_config( + copy.deepcopy(config) + ) + y = layer(x, constants=c) + model = keras.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # Test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.Bidirectional.from_config( + copy.deepcopy(config) + ) + y = layer([x, c]) + model = keras.Model([x, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + def test_Bidirectional_with_constants_layer_passing_initial_state(self): + with self.cached_session(): + # Test basic case. + x = keras.Input((5, 5)) + c = keras.Input((3,)) + s_for = keras.Input((32,)) + s_bac = keras.Input((32,)) + cell = _RNNCellWithConstants(32, 3) + custom_objects = {"_RNNCellWithConstants": _RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.Bidirectional(keras.layers.RNN(cell)) + y = layer(x, initial_state=[s_for, s_bac], constants=c) + model = keras.Model([x, s_for, s_bac, c], y) + model.compile(optimizer="rmsprop", loss="mse") + model.train_on_batch( + [ + np.zeros((6, 5, 5)), + np.zeros((6, 32)), + np.zeros((6, 32)), + np.zeros((6, 3)), + ], + np.zeros((6, 64)), + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + s_fw_np = np.random.random((6, 32)) + s_bk_np = np.random.random((6, 32)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.Bidirectional.from_config( + copy.deepcopy(config) + ) + y = layer(x, initial_state=[s_for, s_bac], constants=c) + model = keras.Model([x, s_for, s_bac, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # Verify that state is used + y_np_2_different_s = model.predict( + [x_np, s_fw_np + 10.0, s_bk_np + 10.0, c_np] + ) + assert np.mean(y_np - y_np_2_different_s) != 0 + + # Test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.Bidirectional.from_config( + copy.deepcopy(config) + ) + y = layer([x, s_for, s_bac, c]) + model = keras.Model([x, s_for, s_bac, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + @parameterized.parameters([keras.layers.LSTM, keras.layers.GRU]) + def test_Bidirectional_output_shape(self, rnn): + input_shape = [None, 2, 1] + num_state = 4 if rnn == keras.layers.LSTM else 2 + + wrapper = keras.layers.Bidirectional(rnn(3)) + output_shape = wrapper.compute_output_shape(input_shape) + self.assertEqual(output_shape.as_list(), [None, 6]) + + wrapper = keras.layers.Bidirectional(rnn(3, return_state=True)) + output_shape = wrapper.compute_output_shape(input_shape) + # 1 for output and the rest for forward and backward states + self.assertLen(output_shape, 1 + num_state) + self.assertEqual(output_shape[0].as_list(), [None, 6]) + for shape in output_shape[1:]: + self.assertEqual(shape.as_list(), [None, 3]) + + wrapper = keras.layers.Bidirectional( + rnn(3, return_state=True), merge_mode=None + ) + output_shape = wrapper.compute_output_shape(input_shape) + # 1 for forward output and 1 for backward output, and the rest for + # states + self.assertLen(output_shape, 2 + num_state) + for shape in output_shape: + self.assertEqual(shape.as_list(), [None, 3]) + + def test_Bidirectional_output_shape_return_types(self): + class TestLayer(keras.layers.SimpleRNN): + def call(self, inputs): + return tf.concat([inputs, inputs], axis=-1) + + def compute_output_shape(self, input_shape): + output_shape = tf.TensorShape(input_shape).as_list() + output_shape[-1] = output_shape[-1] * 2 + return tf.TensorShape(output_shape) + + class TestListLayer(TestLayer): + def compute_output_shape(self, input_shape): + shape = super().compute_output_shape(input_shape) + return shape.as_list() + + class TestTupleLayer(TestLayer): + def compute_output_shape(self, input_shape): + shape = super().compute_output_shape(input_shape) + return tuple(shape.as_list()) + + # Layers can specify output shape as list/tuple/TensorShape + test_layers = [TestLayer, TestListLayer, TestTupleLayer] + for layer in test_layers: + input_layer = keras.layers.Bidirectional(layer(1)) + inputs = keras.backend.placeholder(shape=(None, 2, 4)) + output = input_layer(inputs) + self.assertEqual(output.shape.as_list(), [None, 2, 16]) + self.assertEqual( + input_layer.compute_output_shape([None, 2, 4]).as_list(), + [None, 2, 16], + ) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_Bidirectional_last_output_with_masking(self): + rnn = keras.layers.LSTM + samples = 2 + dim = 5 + timesteps = 3 + units = 3 + merge_mode = "concat" + x = np.random.rand(samples, timesteps, dim) + # clear the first record's timestep 2. Last output should be same as + # state, not zeroed. + x[0, 2] = 0 + + with self.cached_session(): + inputs = keras.Input((timesteps, dim)) + masked_inputs = keras.layers.Masking()(inputs) + wrapped = keras.layers.Bidirectional( + rnn(units, return_state=True), merge_mode=merge_mode + ) + outputs = _to_list(wrapped(masked_inputs, training=True)) + self.assertLen(outputs, 5) + self.assertEqual(outputs[0].shape.as_list(), [None, units * 2]) + + model = keras.Model(inputs, outputs) + y = _to_list(model.predict(x)) + self.assertLen(y, 5) + self.assertAllClose(y[0], np.concatenate([y[1], y[3]], axis=1)) + + @parameterized.parameters([keras.layers.LSTM, keras.layers.GRU]) + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_Bidirectional_sequence_output_with_masking(self, rnn): + samples = 2 + dim = 5 + timesteps = 3 + units = 3 + merge_mode = "concat" + x = np.random.rand(samples, timesteps, dim) + # clear the first record's timestep 2, and expect the output of timestep + # 2 is also 0s. + x[0, 2] = 0 + + with self.cached_session(): + inputs = keras.Input((timesteps, dim)) + masked_inputs = keras.layers.Masking()(inputs) + wrapped = keras.layers.Bidirectional( + rnn(units, return_sequences=True), merge_mode=merge_mode + ) + outputs = _to_list(wrapped(masked_inputs, training=True)) + self.assertLen(outputs, 1) + self.assertEqual( + outputs[0].shape.as_list(), [None, timesteps, units * 2] + ) + + model = keras.Model(inputs, outputs) + y = _to_list(model.predict(x)) + self.assertLen(y, 1) + self.assertAllClose(y[0][0, 2], np.zeros(units * 2)) + + @parameterized.parameters(["sum", "concat"]) + def test_custom_backward_layer(self, mode): + rnn = keras.layers.SimpleRNN + samples = 2 + dim = 2 + timesteps = 2 + output_dim = 2 - @parameterized.parameters(['sum', 'concat', 'ave', 'mul']) - def test_bidirectional(self, mode): - rnn = keras.layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - with self.cached_session(): - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - - # test with Sequential model - model = keras.models.Sequential() - model.add( - keras.layers.Bidirectional( - rnn(output_dim), merge_mode=mode, input_shape=(timesteps, dim))) - model.compile(optimizer='rmsprop', loss='mse') - model.fit(x, y, epochs=1, batch_size=1) - - # check whether the model variables are present in the - # trackable list of objects - checkpointed_object_ids = { - id(o) for o in trackable_util.list_objects(model) - } - for v in model.variables: - self.assertIn(id(v), checkpointed_object_ids) - - # test compute output shape - ref_shape = model.layers[-1].output.shape - shape = model.layers[-1].compute_output_shape( - (None, timesteps, dim)) - self.assertListEqual(shape.as_list(), ref_shape.as_list()) - - # test config - model.get_config() - model = keras.models.model_from_json(model.to_json()) - model.summary() - - def test_bidirectional_invalid_init(self): - x = tf.constant(np.zeros((1, 1)).astype('float32')) - with self.assertRaisesRegex( - ValueError, - 'Please initialize `Bidirectional` layer with a ' - '`tf.keras.layers.Layer` instance.'): - keras.layers.Bidirectional(x) - - def test_bidirectional_weight_loading(self): - rnn = keras.layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - with self.cached_session(): - x = np.random.random((samples, timesteps, dim)) - model = keras.models.Sequential() - model.add( - keras.layers.Bidirectional( - rnn(output_dim), input_shape=(timesteps, dim))) - y_ref = model.predict(x) - weights = model.layers[-1].get_weights() - model.layers[-1].set_weights(weights) - y = model.predict(x) - self.assertAllClose(y, y_ref) - - def test_bidirectional_stacked(self): - # test stacked bidirectional layers - rnn = keras.layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - mode = 'sum' - - with self.cached_session(): - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - - model = keras.models.Sequential() - model.add( - keras.layers.Bidirectional( - rnn(output_dim, return_sequences=True), - merge_mode=mode, - input_shape=(timesteps, dim))) - model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # test with functional API - inputs = keras.layers.Input((timesteps, dim)) - output = keras.layers.Bidirectional( - rnn(output_dim), merge_mode=mode)(inputs) - model = keras.models.Model(inputs, output) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - def test_bidirectional_statefulness(self): - # Bidirectional and stateful - def run_test(): - rnn = keras.layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - mode = 'sum' - - with self.cached_session(): x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim + target_dim = 2 * output_dim if mode == "concat" else output_dim y = np.random.random((samples, target_dim)) + forward_layer = rnn(output_dim) + backward_layer = rnn(output_dim, go_backwards=True) + + # test with Sequential model + model = keras.models.Sequential() + model.add( + keras.layers.Bidirectional( + forward_layer, + merge_mode=mode, + backward_layer=backward_layer, + input_shape=(timesteps, dim), + ) + ) + model.compile(optimizer="rmsprop", loss="mse") + model.fit(x, y, epochs=1, batch_size=1) - inputs = keras.layers.Input(batch_shape=(1, timesteps, dim)) - bidi_rnn = keras.layers.Bidirectional( - rnn(output_dim, stateful=True), merge_mode=mode) - self.assertTrue(bidi_rnn.stateful) - output = bidi_rnn(inputs) - model = keras.models.Model(inputs, output) - - y_1 = model.predict(x, batch_size=1) + # check whether the model variables are present in the + # trackable list of objects + checkpointed_object_ids = { + id(o) for o in trackable_util.list_objects(model) + } + for v in model.variables: + self.assertIn(id(v), checkpointed_object_ids) + + # test compute output shape + ref_shape = model.layers[-1].output.shape + shape = model.layers[-1].compute_output_shape((None, timesteps, dim)) + self.assertListEqual(shape.as_list(), ref_shape.as_list()) + + # test config + model.get_config() + model = keras.models.model_from_json(model.to_json()) + model.summary() + + def test_custom_backward_layer_error_check(self): + rnn = keras.layers.LSTM + units = 2 + + forward_layer = rnn(units) + backward_layer = rnn(units) + + with self.assertRaisesRegex( + ValueError, "should have different `go_backwards` value." + ): + keras.layers.Bidirectional( + forward_layer, + merge_mode="concat", + backward_layer=backward_layer, + ) + + for attr in ("stateful", "return_sequences", "return_state"): + kwargs = {attr: True} + backward_layer = rnn(units, go_backwards=True, **kwargs) + with self.assertRaisesRegex( + ValueError, + 'expected to have the same value for attribute "' + attr, + ): + keras.layers.Bidirectional( + forward_layer, + merge_mode="concat", + backward_layer=backward_layer, + ) + + def test_custom_backward_layer_serialization(self): + rnn = keras.layers.LSTM + units = 2 + + forward_layer = rnn(units) + backward_layer = rnn(units, go_backwards=True) + layer = keras.layers.Bidirectional( + forward_layer, merge_mode="concat", backward_layer=backward_layer + ) + config = layer.get_config() + layer_from_config = keras.layers.Bidirectional.from_config(config) + new_config = layer_from_config.get_config() + self.assertDictEqual(config, new_config) + + def test_rnn_layer_name(self): + rnn = keras.layers.LSTM + units = 2 + + layer = keras.layers.Bidirectional(rnn(units, name="rnn")) + config = layer.get_config() + + self.assertEqual(config["layer"]["config"]["name"], "rnn") + + layer_from_config = keras.layers.Bidirectional.from_config(config) + self.assertEqual(layer_from_config.forward_layer.name, "forward_rnn") + self.assertEqual(layer_from_config.backward_layer.name, "backward_rnn") + + def test_custom_backward_rnn_layer_name(self): + rnn = keras.layers.LSTM + units = 2 + + forward_layer = rnn(units) + backward_layer = rnn(units, go_backwards=True) + layer = keras.layers.Bidirectional( + forward_layer, merge_mode="concat", backward_layer=backward_layer + ) + config = layer.get_config() + + self.assertEqual(config["layer"]["config"]["name"], "lstm") + self.assertEqual(config["backward_layer"]["config"]["name"], "lstm_1") + + layer_from_config = keras.layers.Bidirectional.from_config(config) + self.assertEqual(layer_from_config.forward_layer.name, "forward_lstm") + self.assertEqual( + layer_from_config.backward_layer.name, "backward_lstm_1" + ) + + def test_rnn_with_customized_cell(self): + batch = 20 + dim = 5 + timesteps = 3 + units = 5 + merge_mode = "sum" + + cell = _ResidualLSTMCell(units) + forward_layer = keras.layers.RNN(cell) + inputs = keras.Input((timesteps, dim)) + bidirectional_rnn = keras.layers.Bidirectional( + forward_layer, merge_mode=merge_mode + ) + outputs = _to_list(bidirectional_rnn(inputs)) + + model = keras.Model(inputs, outputs) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + np.random.random((batch, timesteps, dim)), + np.random.random((batch, units)), + epochs=1, + batch_size=10, + ) + + def test_rnn_with_customized_cell_stacking(self): + batch = 20 + dim = 5 + timesteps = 3 + units = 5 + merge_mode = "sum" + + cell = [_ResidualLSTMCell(units), _ResidualLSTMCell(units)] + forward_layer = keras.layers.RNN(cell) + inputs = keras.Input((timesteps, dim)) + bidirectional_rnn = keras.layers.Bidirectional( + forward_layer, merge_mode=merge_mode + ) + outputs = _to_list(bidirectional_rnn(inputs)) + + model = keras.Model(inputs, outputs) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + np.random.random((batch, timesteps, dim)), + np.random.random((batch, units)), + epochs=1, + batch_size=10, + ) + + @test_utils.run_v2_only + def test_wrapped_rnn_cell(self): + # See https://github.com/tensorflow/tensorflow/issues/26581. + batch = 20 + dim = 5 + timesteps = 3 + units = 5 + merge_mode = "sum" + + cell = keras.layers.LSTMCell(units) + cell = ResidualWrapper(cell) + rnn = keras.layers.RNN(cell) + + inputs = keras.Input((timesteps, dim)) + wrapped = keras.layers.Bidirectional(rnn, merge_mode=merge_mode) + outputs = _to_list(wrapped(inputs)) + + model = keras.Model(inputs, outputs) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + np.random.random((batch, timesteps, dim)), + np.random.random((batch, units)), + epochs=1, + batch_size=10, + ) + + @parameterized.parameters(["ave", "concat", "mul"]) + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm RNN does not support ragged tensors yet." + ), + ) + def test_Bidirectional_ragged_input(self, merge_mode): + np.random.seed(100) + rnn = keras.layers.LSTM + units = 3 + x = tf.ragged.constant( + [ + [[1, 1, 1], [1, 1, 1]], + [[1, 1, 1]], + [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], + [[1, 1, 1], [1, 1, 1], [1, 1, 1]], + ], + ragged_rank=1, + ) + x = tf.cast(x, "float32") + + with self.cached_session(): + if merge_mode == "ave": + merge_func = lambda y, y_rev: (y + y_rev) / 2 + elif merge_mode == "concat": + merge_func = lambda y, y_rev: tf.concat((y, y_rev), axis=-1) + elif merge_mode == "mul": + merge_func = lambda y, y_rev: (y * y_rev) + + inputs = keras.Input( + shape=(None, 3), batch_size=4, dtype="float32", ragged=True + ) + layer = keras.layers.Bidirectional( + rnn(units, return_sequences=True), merge_mode=merge_mode + ) + f_merged = keras.backend.function([inputs], layer(inputs)) + f_forward = keras.backend.function( + [inputs], layer.forward_layer(inputs) + ) + + # TODO(kaftan): after KerasTensor refactor TF op layers should work + # with many composite tensors, and this shouldn't need to be a + # lambda layer. + reverse_layer = core.Lambda(tf.reverse, arguments=dict(axis=[1])) + f_backward = keras.backend.function( + [inputs], reverse_layer(layer.backward_layer(inputs)) + ) + + y_merged = f_merged(x) + y_expected = merge_func( + convert_ragged_tensor_value(f_forward(x)), + convert_ragged_tensor_value(f_backward(x)), + ) + + y_merged = convert_ragged_tensor_value(y_merged) + self.assertAllClose(y_merged.flat_values, y_expected.flat_values) + + def test_Bidirectional_nested_state_reuse(self): + if not tf.executing_eagerly(): + self.skipTest("Only test eager mode.") + x = tf.random.normal([4, 8, 16]) + layer = keras.layers.Bidirectional( + keras.layers.RNN( + [keras.layers.LSTMCell(5), keras.layers.LSTMCell(5)], + return_sequences=True, + return_state=True, + ) + ) + y = layer(x) + self.assertAllClose(layer([x] + y[1:]), layer(x, initial_state=y[1:])) + + def test_full_input_spec(self): + # See https://github.com/tensorflow/tensorflow/issues/38403 + inputs = keras.layers.Input(batch_shape=(1, 1, 1)) + fw_state = keras.layers.Input(batch_shape=(1, 1)) + bw_state = keras.layers.Input(batch_shape=(1, 1)) + states = [fw_state, bw_state] + bidirectional_rnn = keras.layers.Bidirectional( + keras.layers.SimpleRNN(1, stateful=True) + ) + + rnn_output = bidirectional_rnn(inputs, initial_state=states) + model = keras.Model([inputs, fw_state, bw_state], rnn_output) + output1 = model.predict( + [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))] + ) + output2 = model.predict( + [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))] + ) model.reset_states() - y_2 = model.predict(x, batch_size=1) + output3 = model.predict( + [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))] + ) + self.assertAllClose(output1, output3) + self.assertNotAllClose(output1, output2) - self.assertAllClose(y_1, y_2) + def test_reset_states(self): + ref_state = np.random.rand(1, 3).astype(np.float32) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) + # build model + inp = keras.Input(batch_shape=[1, 2, 3]) - if tf.executing_eagerly(): - run_test() - else: - tf_test_util.enable_output_all_intermediates(run_test)() - - @parameterized.parameters(['sum', 'mul', 'ave', 'concat', None]) - def test_Bidirectional_merged_value(self, merge_mode): - rnn = keras.layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - x = [np.random.rand(samples, timesteps, dim)] - - with self.cached_session(): - if merge_mode == 'sum': - merge_func = lambda y, y_rev: y + y_rev - elif merge_mode == 'mul': - merge_func = lambda y, y_rev: y * y_rev - elif merge_mode == 'ave': - merge_func = lambda y, y_rev: (y + y_rev) / 2 - elif merge_mode == 'concat': - merge_func = lambda y, y_rev: np.concatenate((y, y_rev), axis=-1) - else: - merge_func = lambda y, y_rev: [y, y_rev] - - # basic case - inputs = keras.Input((timesteps, dim)) - layer = keras.layers.Bidirectional( - rnn(units, return_sequences=True), merge_mode=merge_mode) - f_merged = keras.backend.function([inputs], _to_list(layer(inputs))) - f_forward = keras.backend.function([inputs], - [layer.forward_layer(inputs)]) - f_backward = keras.backend.function( - [inputs], - [keras.backend.reverse(layer.backward_layer(inputs), 1)]) - - y_merged = f_merged(x) - y_expected = _to_list(merge_func(f_forward(x)[0], f_backward(x)[0])) - assert len(y_merged) == len(y_expected) - for x1, x2 in zip(y_merged, y_expected): - self.assertAllClose(x1, x2, atol=1e-5) - - # test return_state - inputs = keras.Input((timesteps, dim)) - layer = keras.layers.Bidirectional( - rnn(units, return_state=True), merge_mode=merge_mode) - f_merged = keras.backend.function([inputs], layer(inputs)) - f_forward = keras.backend.function([inputs], - layer.forward_layer(inputs)) - f_backward = keras.backend.function([inputs], - layer.backward_layer(inputs)) - n_states = len(layer.layer.states) - - y_merged = f_merged(x) - y_forward = f_forward(x) - y_backward = f_backward(x) - y_expected = _to_list(merge_func(y_forward[0], y_backward[0])) - assert len(y_merged) == len(y_expected) + n_states * 2 - for x1, x2 in zip(y_merged, y_expected): - self.assertAllClose(x1, x2, atol=1e-5) - - y_merged = y_merged[-n_states * 2:] - y_forward = y_forward[-n_states:] - y_backward = y_backward[-n_states:] - for state_birnn, state_inner in zip(y_merged, y_forward + y_backward): - self.assertAllClose(state_birnn, state_inner, atol=1e-5) - - @parameterized.parameters([True, False]) - def test_Bidirectional_with_time_major_input(self, time_major): - batch_size, time, input_dim = 2, 3, 1 - inputs = tf.zeros((batch_size, time, input_dim)) - # length is [1 2]. Within the batch, the first element has 1 step, and the - # second element as 2 steps. - lengths = tf.range(1, 1 + batch_size) - mask = tf.sequence_mask(lengths, maxlen=time, dtype=tf.float32) - - forward_cell = _AddOneCell(name='forward') - backward_cell = _AddOneCell(name='backward') - - layer = keras.layers.Bidirectional( - layer=keras.layers.RNN( - forward_cell, time_major=time_major, return_sequences=True), - backward_layer=keras.layers.RNN( - backward_cell, time_major=time_major, return_sequences=True, - go_backwards=True)) - - # Switch to time-major. - if time_major: - inputs = tf.transpose(inputs, [1, 0, 2]) - mask = tf.transpose(mask, [1, 0]) - - keras_outputs = layer(inputs, mask=mask) - if time_major: - keras_outputs = tf.transpose(keras_outputs, [1, 0, 2]) - - # expect the first element in batch has 1 step and second element in batch - # has 2 steps. - expected_result = np.array([[[1., 1.], [0., 0.], [0., 0.]], - [[1., 1.], [1., 1.], [0., 0.]]]) - self.assertAllClose(expected_result, keras_outputs) - - def test_Bidirectional_dropout(self): - rnn = keras.layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - merge_mode = 'sum' - x = [np.random.rand(samples, timesteps, dim)] - - with self.cached_session(): - inputs = keras.Input((timesteps, dim)) - wrapped = keras.layers.Bidirectional( - rnn(units, dropout=0.2, recurrent_dropout=0.2), merge_mode=merge_mode) - outputs = _to_list(wrapped(inputs, training=True)) - - inputs = keras.Input((timesteps, dim)) - wrapped = keras.layers.Bidirectional( - rnn(units, dropout=0.2, return_state=True), merge_mode=merge_mode) - outputs = _to_list(wrapped(inputs)) - - model = keras.Model(inputs, outputs) - y1 = _to_list(model.predict(x)) - y2 = _to_list(model.predict(x)) - for x1, x2 in zip(y1, y2): - self.assertAllClose(x1, x2, atol=1e-5) - - def test_Bidirectional_state_reuse(self): - rnn = keras.layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - - with self.cached_session(): - input1 = keras.layers.Input((timesteps, dim)) - layer = keras.layers.Bidirectional( - rnn(units, return_state=True, return_sequences=True)) - state = layer(input1)[1:] - - # test passing invalid initial_state: passing a tensor - input2 = keras.layers.Input((timesteps, dim)) - with self.assertRaises(ValueError): - keras.layers.Bidirectional(rnn(units))(input2, initial_state=state[0]) - - # test valid usage: passing a list - output = keras.layers.Bidirectional(rnn(units))(input2, - initial_state=state) - model = keras.models.Model([input1, input2], output) - assert len(model.layers) == 4 - assert isinstance(model.layers[-1].input, list) - inputs = [np.random.rand(samples, timesteps, dim), - np.random.rand(samples, timesteps, dim)] - model.predict(inputs) - - def test_Bidirectional_state_reuse_with_np_input(self): - # See https://github.com/tensorflow/tensorflow/issues/28761 for more detail. - rnn = keras.layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - - with self.cached_session(): - input1 = np.random.rand(samples, timesteps, dim).astype(np.float32) - layer = keras.layers.Bidirectional( - rnn(units, return_state=True, return_sequences=True)) - state = layer(input1)[1:] - - input2 = np.random.rand(samples, timesteps, dim).astype(np.float32) - keras.layers.Bidirectional(rnn(units))(input2, initial_state=state) - - def test_Bidirectional_trainable(self): - # test layers that need learning_phase to be set - with self.cached_session(): - x = keras.layers.Input(shape=(3, 2)) - layer = keras.layers.Bidirectional(keras.layers.SimpleRNN(3)) - _ = layer(x) - assert len(layer.trainable_weights) == 6 - layer.trainable = False - assert not layer.trainable_weights - layer.trainable = True - assert len(layer.trainable_weights) == 6 - - def test_Bidirectional_updates(self): - if tf.executing_eagerly(): - self.skipTest('layer.updates is only available in graph mode.') - - with self.cached_session(): - x = keras.layers.Input(shape=(3, 2)) - x_reachable_update = x * x - layer = keras.layers.Bidirectional(keras.layers.SimpleRNN(3)) - _ = layer(x) - assert not layer.updates - # TODO(b/128684069): Remove when Wrapper sublayers are __call__'d. - with base_layer_utils.call_context().enter(layer, x, True, None): - layer.forward_layer.add_update(x_reachable_update) - layer.forward_layer.add_update(1) - layer.backward_layer.add_update(x_reachable_update) - layer.backward_layer.add_update(1) - assert len(layer.updates) == 4 - - def test_Bidirectional_losses(self): - x = keras.layers.Input(shape=(3, 2)) - layer = keras.layers.Bidirectional( - keras.layers.SimpleRNN( - 3, - kernel_regularizer='l1', - bias_regularizer='l1', - activity_regularizer='l1')) - _ = layer(x) - assert len(layer.losses) == 6 - - loss = x * x - layer.forward_layer.add_loss(loss) - layer.backward_layer.add_loss(loss) - assert len(layer.losses) == 8 - - def test_Bidirectional_with_constants(self): - with self.cached_session(): - # Test basic case. - x = keras.Input((5, 5)) - c = keras.Input((3,)) - cell = _RNNCellWithConstants(32, 3) - custom_objects = {'_RNNCellWithConstants': _RNNCellWithConstants} - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.Bidirectional(keras.layers.RNN(cell)) - y = layer(x, constants=c) - model = keras.Model([x, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 64)) - ) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer(x, constants=c) - model = keras.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # Test flat list inputs - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer([x, c]) - model = keras.Model([x, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_3, atol=1e-4) - - def test_Bidirectional_with_constants_layer_passing_initial_state(self): - with self.cached_session(): - # Test basic case. - x = keras.Input((5, 5)) - c = keras.Input((3,)) - s_for = keras.Input((32,)) - s_bac = keras.Input((32,)) - cell = _RNNCellWithConstants(32, 3) - custom_objects = {'_RNNCellWithConstants': _RNNCellWithConstants} - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.Bidirectional(keras.layers.RNN(cell)) - y = layer(x, initial_state=[s_for, s_bac], constants=c) - model = keras.Model([x, s_for, s_bac, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), - np.zeros((6, 32)), - np.zeros((6, 32)), - np.zeros((6, 3))], - np.zeros((6, 64)) - ) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - s_fw_np = np.random.random((6, 32)) - s_bk_np = np.random.random((6, 32)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer(x, initial_state=[s_for, s_bac], constants=c) - model = keras.Model([x, s_for, s_bac, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # Verify that state is used - y_np_2_different_s = model.predict( - [x_np, s_fw_np + 10., s_bk_np + 10., c_np]) - assert np.mean(y_np - y_np_2_different_s) != 0 - - # Test flat list inputs - with generic_utils.CustomObjectScope(custom_objects): - layer = keras.layers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer([x, s_for, s_bac, c]) - model = keras.Model([x, s_for, s_bac, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) - self.assertAllClose(y_np, y_np_3, atol=1e-4) - - @parameterized.parameters([keras.layers.LSTM, keras.layers.GRU]) - def test_Bidirectional_output_shape(self, rnn): - input_shape = [None, 2, 1] - num_state = 4 if rnn == keras.layers.LSTM else 2 - - wrapper = keras.layers.Bidirectional(rnn(3)) - output_shape = wrapper.compute_output_shape(input_shape) - self.assertEqual(output_shape.as_list(), [None, 6]) - - wrapper = keras.layers.Bidirectional(rnn(3, return_state=True)) - output_shape = wrapper.compute_output_shape(input_shape) - # 1 for output and the rest for forward and backward states - self.assertLen(output_shape, 1 + num_state) - self.assertEqual(output_shape[0].as_list(), [None, 6]) - for shape in output_shape[1:]: - self.assertEqual(shape.as_list(), [None, 3]) - - wrapper = keras.layers.Bidirectional(rnn(3, return_state=True), - merge_mode=None) - output_shape = wrapper.compute_output_shape(input_shape) - # 1 for forward output and 1 for backward output, and the rest for states - self.assertLen(output_shape, 2 + num_state) - for shape in output_shape: - self.assertEqual(shape.as_list(), [None, 3]) - - def test_Bidirectional_output_shape_return_types(self): - - class TestLayer(keras.layers.SimpleRNN): - - def call(self, inputs): - return tf.concat([inputs, inputs], axis=-1) - - def compute_output_shape(self, input_shape): - output_shape = tf.TensorShape(input_shape).as_list() - output_shape[-1] = output_shape[-1] * 2 - return tf.TensorShape(output_shape) - - class TestListLayer(TestLayer): - - def compute_output_shape(self, input_shape): - shape = super().compute_output_shape(input_shape) - return shape.as_list() - - class TestTupleLayer(TestLayer): - - def compute_output_shape(self, input_shape): - shape = super().compute_output_shape(input_shape) - return tuple(shape.as_list()) - - # Layers can specify output shape as list/tuple/TensorShape - test_layers = [TestLayer, TestListLayer, TestTupleLayer] - for layer in test_layers: - input_layer = keras.layers.Bidirectional(layer(1)) - inputs = keras.backend.placeholder(shape=(None, 2, 4)) - output = input_layer(inputs) - self.assertEqual(output.shape.as_list(), [None, 2, 16]) - self.assertEqual( - input_layer.compute_output_shape([None, 2, 4]).as_list(), - [None, 2, 16]) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_Bidirectional_last_output_with_masking(self): - rnn = keras.layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - merge_mode = 'concat' - x = np.random.rand(samples, timesteps, dim) - # clear the first record's timestep 2. Last output should be same as state, - # not zeroed. - x[0, 2] = 0 - - with self.cached_session(): - inputs = keras.Input((timesteps, dim)) - masked_inputs = keras.layers.Masking()(inputs) - wrapped = keras.layers.Bidirectional( - rnn(units, return_state=True), merge_mode=merge_mode) - outputs = _to_list(wrapped(masked_inputs, training=True)) - self.assertLen(outputs, 5) - self.assertEqual(outputs[0].shape.as_list(), [None, units * 2]) - - model = keras.Model(inputs, outputs) - y = _to_list(model.predict(x)) - self.assertLen(y, 5) - self.assertAllClose(y[0], np.concatenate([y[1], y[3]], axis=1)) - - @parameterized.parameters([keras.layers.LSTM, keras.layers.GRU]) - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_Bidirectional_sequence_output_with_masking(self, rnn): - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - merge_mode = 'concat' - x = np.random.rand(samples, timesteps, dim) - # clear the first record's timestep 2, and expect the output of timestep 2 - # is also 0s. - x[0, 2] = 0 - - with self.cached_session(): - inputs = keras.Input((timesteps, dim)) - masked_inputs = keras.layers.Masking()(inputs) - wrapped = keras.layers.Bidirectional( - rnn(units, return_sequences=True), - merge_mode=merge_mode) - outputs = _to_list(wrapped(masked_inputs, training=True)) - self.assertLen(outputs, 1) - self.assertEqual(outputs[0].shape.as_list(), [None, timesteps, units * 2]) - - model = keras.Model(inputs, outputs) - y = _to_list(model.predict(x)) - self.assertLen(y, 1) - self.assertAllClose(y[0][0, 2], np.zeros(units * 2)) - - @parameterized.parameters(['sum', 'concat']) - def test_custom_backward_layer(self, mode): - rnn = keras.layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - forward_layer = rnn(output_dim) - backward_layer = rnn(output_dim, go_backwards=True) - - # test with Sequential model - model = keras.models.Sequential() - model.add( - keras.layers.Bidirectional( - forward_layer, - merge_mode=mode, - backward_layer=backward_layer, - input_shape=(timesteps, dim))) - model.compile(optimizer='rmsprop', loss='mse') - model.fit(x, y, epochs=1, batch_size=1) - - # check whether the model variables are present in the - # trackable list of objects - checkpointed_object_ids = { - id(o) for o in trackable_util.list_objects(model) - } - for v in model.variables: - self.assertIn(id(v), checkpointed_object_ids) - - # test compute output shape - ref_shape = model.layers[-1].output.shape - shape = model.layers[-1].compute_output_shape((None, timesteps, dim)) - self.assertListEqual(shape.as_list(), ref_shape.as_list()) - - # test config - model.get_config() - model = keras.models.model_from_json(model.to_json()) - model.summary() - - def test_custom_backward_layer_error_check(self): - rnn = keras.layers.LSTM - units = 2 - - forward_layer = rnn(units) - backward_layer = rnn(units) - - with self.assertRaisesRegex(ValueError, - 'should have different `go_backwards` value.'): - keras.layers.Bidirectional( - forward_layer, merge_mode='concat', backward_layer=backward_layer) - - for attr in ('stateful', 'return_sequences', 'return_state'): - kwargs = {attr: True} - backward_layer = rnn(units, go_backwards=True, **kwargs) - with self.assertRaisesRegex( - ValueError, 'expected to have the same value for attribute "' + attr): - keras.layers.Bidirectional( - forward_layer, merge_mode='concat', backward_layer=backward_layer) - - def test_custom_backward_layer_serialization(self): - rnn = keras.layers.LSTM - units = 2 - - forward_layer = rnn(units) - backward_layer = rnn(units, go_backwards=True) - layer = keras.layers.Bidirectional( - forward_layer, merge_mode='concat', backward_layer=backward_layer) - config = layer.get_config() - layer_from_config = keras.layers.Bidirectional.from_config(config) - new_config = layer_from_config.get_config() - self.assertDictEqual(config, new_config) - - def test_rnn_layer_name(self): - rnn = keras.layers.LSTM - units = 2 - - layer = keras.layers.Bidirectional(rnn(units, name='rnn')) - config = layer.get_config() - - self.assertEqual(config['layer']['config']['name'], 'rnn') - - layer_from_config = keras.layers.Bidirectional.from_config(config) - self.assertEqual(layer_from_config.forward_layer.name, 'forward_rnn') - self.assertEqual(layer_from_config.backward_layer.name, 'backward_rnn') - - def test_custom_backward_rnn_layer_name(self): - rnn = keras.layers.LSTM - units = 2 - - forward_layer = rnn(units) - backward_layer = rnn(units, go_backwards=True) - layer = keras.layers.Bidirectional( - forward_layer, merge_mode='concat', backward_layer=backward_layer) - config = layer.get_config() - - self.assertEqual(config['layer']['config']['name'], 'lstm') - self.assertEqual(config['backward_layer']['config']['name'], 'lstm_1') - - layer_from_config = keras.layers.Bidirectional.from_config(config) - self.assertEqual(layer_from_config.forward_layer.name, 'forward_lstm') - self.assertEqual(layer_from_config.backward_layer.name, 'backward_lstm_1') - - def test_rnn_with_customized_cell(self): - batch = 20 - dim = 5 - timesteps = 3 - units = 5 - merge_mode = 'sum' - - cell = _ResidualLSTMCell(units) - forward_layer = keras.layers.RNN(cell) - inputs = keras.Input((timesteps, dim)) - bidirectional_rnn = keras.layers.Bidirectional( - forward_layer, merge_mode=merge_mode) - outputs = _to_list(bidirectional_rnn(inputs)) - - model = keras.Model(inputs, outputs) - model.compile(optimizer='rmsprop', loss='mse') - model.fit( - np.random.random((batch, timesteps, dim)), - np.random.random((batch, units)), - epochs=1, - batch_size=10) - - def test_rnn_with_customized_cell_stacking(self): - batch = 20 - dim = 5 - timesteps = 3 - units = 5 - merge_mode = 'sum' - - cell = [_ResidualLSTMCell(units), _ResidualLSTMCell(units)] - forward_layer = keras.layers.RNN(cell) - inputs = keras.Input((timesteps, dim)) - bidirectional_rnn = keras.layers.Bidirectional( - forward_layer, merge_mode=merge_mode) - outputs = _to_list(bidirectional_rnn(inputs)) - - model = keras.Model(inputs, outputs) - model.compile(optimizer='rmsprop', loss='mse') - model.fit( - np.random.random((batch, timesteps, dim)), - np.random.random((batch, units)), - epochs=1, - batch_size=10) - - @test_utils.run_v2_only - def test_wrapped_rnn_cell(self): - # See https://github.com/tensorflow/tensorflow/issues/26581. - batch = 20 - dim = 5 - timesteps = 3 - units = 5 - merge_mode = 'sum' - - cell = keras.layers.LSTMCell(units) - cell = ResidualWrapper(cell) - rnn = keras.layers.RNN(cell) - - inputs = keras.Input((timesteps, dim)) - wrapped = keras.layers.Bidirectional(rnn, merge_mode=merge_mode) - outputs = _to_list(wrapped(inputs)) - - model = keras.Model(inputs, outputs) - model.compile(optimizer='rmsprop', loss='mse') - model.fit( - np.random.random((batch, timesteps, dim)), - np.random.random((batch, units)), - epochs=1, - batch_size=10) - - @parameterized.parameters(['ave', 'concat', 'mul']) - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm RNN does not support ragged tensors yet.') - def test_Bidirectional_ragged_input(self, merge_mode): - np.random.seed(100) - rnn = keras.layers.LSTM - units = 3 - x = tf.ragged.constant( - [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1]], - [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], - [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], - ragged_rank=1) - x = tf.cast(x, 'float32') - - # pylint: disable=g-long-lambda - with self.cached_session(): - if merge_mode == 'ave': - merge_func = lambda y, y_rev: (y + y_rev) / 2 - elif merge_mode == 'concat': - merge_func = lambda y, y_rev: tf.concat( - (y, y_rev), axis=-1) - elif merge_mode == 'mul': - merge_func = lambda y, y_rev: (y * y_rev) - # pylint: enable=g-long-lambda - - inputs = keras.Input( - shape=(None, 3), batch_size=4, dtype='float32', ragged=True) - layer = keras.layers.Bidirectional( - rnn(units, return_sequences=True), merge_mode=merge_mode) - f_merged = keras.backend.function([inputs], layer(inputs)) - f_forward = keras.backend.function([inputs], - layer.forward_layer(inputs)) - - # TODO(kaftan): after KerasTensor refactor TF op layers should work - # with many composite tensors, and this shouldn't need to be a lambda - # layer. - reverse_layer = core.Lambda(tf.reverse, arguments=dict(axis=[1])) - f_backward = keras.backend.function( - [inputs], - reverse_layer(layer.backward_layer(inputs))) - - y_merged = f_merged(x) - y_expected = merge_func( - convert_ragged_tensor_value(f_forward(x)), - convert_ragged_tensor_value(f_backward(x))) - - y_merged = convert_ragged_tensor_value(y_merged) - self.assertAllClose(y_merged.flat_values, y_expected.flat_values) - - def test_Bidirectional_nested_state_reuse(self): - if not tf.executing_eagerly(): - self.skipTest('Only test eager mode.') - x = tf.random.normal([4, 8, 16]) - layer = keras.layers.Bidirectional( - keras.layers.RNN([keras.layers.LSTMCell(5), - keras.layers.LSTMCell(5)], - return_sequences=True, - return_state=True)) - y = layer(x) - self.assertAllClose(layer([x] + y[1:]), layer(x, initial_state=y[1:])) - - def test_full_input_spec(self): - # See https://github.com/tensorflow/tensorflow/issues/38403 - inputs = keras.layers.Input(batch_shape=(1, 1, 1)) - fw_state = keras.layers.Input(batch_shape=(1, 1)) - bw_state = keras.layers.Input(batch_shape=(1, 1)) - states = [fw_state, bw_state] - bidirectional_rnn = keras.layers.Bidirectional( - keras.layers.SimpleRNN(1, stateful=True)) - - rnn_output = bidirectional_rnn(inputs, initial_state=states) - model = keras.Model([inputs, fw_state, bw_state], rnn_output) - output1 = model.predict( - [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))]) - output2 = model.predict( - [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))]) - model.reset_states() - output3 = model.predict( - [np.ones((1, 1, 1)), np.ones((1, 1)), np.ones((1, 1))]) - self.assertAllClose(output1, output3) - self.assertNotAllClose(output1, output2) + stateful = keras.layers.SimpleRNN(units=3, stateful=True) + stateless = keras.layers.SimpleRNN(units=3, stateful=False) + bid_stateless = keras.layers.Bidirectional(stateless) + bid_stateful = keras.layers.Bidirectional(stateful) -def _to_list(ls): - if isinstance(ls, list): - return ls - else: - return [ls] + # required to correctly initialize the state in the layers + _ = keras.Model( + inp, + [ + bid_stateless(inp), + bid_stateful(inp), + ], + ) + with self.assertRaisesRegex( + AttributeError, + "Layer must be stateful.", + ): + bid_stateless.reset_states() -def convert_ragged_tensor_value(inputs): - if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): - flat_values = tf.convert_to_tensor( - value=inputs.flat_values, - name='flat_values') - return tf.RaggedTensor.from_nested_row_splits( - flat_values, inputs.nested_row_splits, validate=False) - return inputs + with self.assertRaisesRegex(AttributeError, "Layer must be stateful."): + bid_stateless.reset_states([]) + + bid_stateful.reset_states() + bid_stateful.reset_states([ref_state, ref_state]) + + with self.assertRaisesRegex( + ValueError, + "Unrecognized value for `states`. Expected `states` " + "to be list or tuple", + ): + bid_stateful.reset_states({}) + + def test_trainable_parameter_argument(self): + inp = keras.layers.Input([None, 3]) + + def test(fwd, bwd, **kwargs): + def _remove_from_dict(d, remove_key): + if isinstance(d, dict): + d.pop(remove_key, None) + for key in list(d.keys()): + _remove_from_dict(d[key], remove_key) + bid = keras.layers.Bidirectional(fwd, backward_layer=bwd, **kwargs) -if __name__ == '__main__': - tf.test.main() + model = keras.Model(inp, bid(inp)) + clone = keras.models.clone_model(model) + + # Comparison should exclude `build_config` + clone_config = _remove_from_dict(clone.get_config(), "build_config") + model_config = _remove_from_dict(model.get_config(), "build_config") + self.assertEqual(clone_config, model_config) + + # test fetching trainable from `layer` + fwd = keras.layers.SimpleRNN(units=3) + bwd = keras.layers.SimpleRNN(units=3, go_backwards=True) + + fwd.trainable = True + test(fwd, None) + + fwd.trainable = False + test(fwd, None) + + fwd.trainable = True + bwd.trainable = False + test(fwd, bwd) + + fwd.trainable = False + bwd.trainable = True + test(fwd, bwd) + + fwd.trainable = True + bwd.trainable = True + test(fwd, bwd) + + fwd.trainable = False + bwd.trainable = False + test(fwd, bwd) + + # test fetching trainable from `kwargs` + test(fwd, None, trainable=True) + test(fwd, None, trainable=False) + + +def _to_list(ls): + if isinstance(ls, list): + return ls + else: + return [ls] + + +def convert_ragged_tensor_value(inputs): + if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): + flat_values = tf.convert_to_tensor( + value=inputs.flat_values, name="flat_values" + ) + return tf.RaggedTensor.from_nested_row_splits( + flat_values, inputs.nested_row_splits, validate=False + ) + return inputs + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/cell_wrappers.py b/keras/layers/rnn/cell_wrappers.py index 61e97b9b85fc..596c5e16ae71 100644 --- a/keras/layers/rnn/cell_wrappers.py +++ b/keras/layers/rnn/cell_wrappers.py @@ -27,557 +27,675 @@ import types as python_types import warnings +import tensorflow.compat.v2 as tf + from keras.layers.rnn import lstm from keras.layers.rnn.abstract_rnn_cell import AbstractRNNCell +from keras.saving import serialization_lib from keras.utils import generic_utils from keras.utils import tf_inspect -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.deprecation import deprecated class _RNNCellWrapper(AbstractRNNCell): - """Base class for cells wrappers V2 compatibility. - - This class along with `rnn_cell_impl._RNNCellWrapperV1` allows to define - wrappers that are compatible with V1 and V2, and defines helper methods for - this purpose. - """ - - def __init__(self, cell, *args, **kwargs): - super().__init__(*args, **kwargs) - self.cell = cell - cell_call_spec = tf_inspect.getfullargspec(cell.call) - self._call_spec.expects_training_arg = (("training" - in cell_call_spec.args) or - (cell_call_spec.varkw is not None)) - - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Calls the wrapped cell and performs the wrapping logic. - - This method is called from the wrapper's `call` or `__call__` methods. - - Args: - inputs: A tensor with wrapped cell's input. - state: A tensor or tuple of tensors with wrapped cell's state. - cell_call_fn: Wrapped cell's method to use for step computation (cell's - `__call__` or 'call' method). - **kwargs: Additional arguments. - - Returns: - A pair containing: - - Output: A tensor with cell's output. - - New state: A tensor or tuple of tensors with new wrapped cell's state. - """ - raise NotImplementedError - - def call(self, inputs, state, **kwargs): - """Runs the RNN cell step computation. - - When `call` is being used, we assume that the wrapper object has been built, - and therefore the wrapped cells has been built via its `build` method and - its `call` method can be used directly. + """Base class for cells wrappers V2 compatibility. - This allows to use the wrapped cell and the non-wrapped cell equivalently - when using `call` and `build`. - - Args: - inputs: A tensor with wrapped cell's input. - state: A tensor or tuple of tensors with wrapped cell's state. - **kwargs: Additional arguments passed to the wrapped cell's `call`. - - Returns: - A pair containing: - - - Output: A tensor with cell's output. - - New state: A tensor or tuple of tensors with new wrapped cell's state. + This class along with `rnn_cell_impl._RNNCellWrapperV1` allows to define + wrappers that are compatible with V1 and V2, and defines helper methods for + this purpose. """ - return self._call_wrapped_cell( - inputs, state, cell_call_fn=self.cell.call, **kwargs) - - def build(self, inputs_shape): - """Builds the wrapped cell.""" - self.cell.build(inputs_shape) - self.built = True - - @property - def wrapped_cell(self): - return self.cell - - @property - def state_size(self): - return self.cell.state_size - - @property - def output_size(self): - return self.cell.output_size - - def zero_state(self, batch_size, dtype): - with tf.name_scope(type(self).__name__ + "ZeroState"): - return self.cell.zero_state(batch_size, dtype) - - def get_config(self): - config = { - "cell": { - "class_name": self.cell.__class__.__name__, - "config": self.cell.get_config() - }, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() - from keras.layers.serialization import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - cell = deserialize_layer(config.pop("cell"), custom_objects=custom_objects) - return cls(cell, **config) - + def __init__(self, cell, *args, **kwargs): + super().__init__(*args, **kwargs) + self.cell = cell + cell_call_spec = tf_inspect.getfullargspec(cell.call) + self._call_spec.expects_training_arg = ( + "training" in cell_call_spec.args + ) or (cell_call_spec.varkw is not None) + + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Calls the wrapped cell and performs the wrapping logic. + + This method is called from the wrapper's `call` or `__call__` methods. + + Args: + inputs: A tensor with wrapped cell's input. + state: A tensor or tuple of tensors with wrapped cell's state. + cell_call_fn: Wrapped cell's method to use for step computation + (cell's `__call__` or 'call' method). + **kwargs: Additional arguments. + + Returns: + A pair containing: + - Output: A tensor with cell's output. + - New state: A tensor or tuple of tensors with new wrapped cell's + state. + """ + raise NotImplementedError + + def call(self, inputs, state, **kwargs): + """Runs the RNN cell step computation. + + When `call` is being used, we assume that the wrapper object has been + built, and therefore the wrapped cells has been built via its `build` + method and its `call` method can be used directly. + + This allows to use the wrapped cell and the non-wrapped cell + equivalently when using `call` and `build`. + + Args: + inputs: A tensor with wrapped cell's input. + state: A tensor or tuple of tensors with wrapped cell's state. + **kwargs: Additional arguments passed to the wrapped cell's `call`. + + Returns: + A pair containing: + + - Output: A tensor with cell's output. + - New state: A tensor or tuple of tensors with new wrapped cell's + state. + """ + return self._call_wrapped_cell( + inputs, state, cell_call_fn=self.cell.call, **kwargs + ) + + def build(self, inputs_shape): + """Builds the wrapped cell.""" + self.cell.build(inputs_shape) + self.built = True + + @property + def wrapped_cell(self): + return self.cell + + @property + def state_size(self): + return self.cell.state_size + + @property + def output_size(self): + return self.cell.output_size + + def zero_state(self, batch_size, dtype): + with tf.name_scope(type(self).__name__ + "ZeroState"): + return self.cell.zero_state(batch_size, dtype) + + def get_config(self): + config = { + "cell": { + "class_name": self.cell.__class__.__name__, + "config": self.cell.get_config(), + }, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() + from keras.layers.serialization import deserialize as deserialize_layer + + cell = deserialize_layer( + config.pop("cell"), custom_objects=custom_objects + ) + return cls(cell, **config) + + +@deprecated(None, "Please use tf.keras.layers.RNN instead.") @tf_export("nn.RNNCellDropoutWrapper", v1=[]) class DropoutWrapper(_RNNCellWrapper): - """Operator adding dropout to inputs and outputs of the given cell.""" - - def __init__(self, - cell, - input_keep_prob=1.0, - output_keep_prob=1.0, - state_keep_prob=1.0, - variational_recurrent=False, - input_size=None, - dtype=None, - seed=None, - dropout_state_filter_visitor=None, - **kwargs): - """Create a cell with added input, state, and/or output dropout. - - If `variational_recurrent` is set to `True` (**NOT** the default behavior), - then the same dropout mask is applied at every step, as described in: - [A Theoretically Grounded Application of Dropout in Recurrent - Neural Networks. Y. Gal, Z. Ghahramani](https://arxiv.org/abs/1512.05287). - - Otherwise a different dropout mask is applied at every time step. - - Note, by default (unless a custom `dropout_state_filter` is provided), - the memory state (`c` component of any `LSTMStateTuple`) passing through - a `DropoutWrapper` is never modified. This behavior is described in the - above article. - - Args: - cell: an RNNCell, a projection to output_size is added to it. - input_keep_prob: unit Tensor or float between 0 and 1, input keep - probability; if it is constant and 1, no input dropout will be added. - output_keep_prob: unit Tensor or float between 0 and 1, output keep - probability; if it is constant and 1, no output dropout will be added. - state_keep_prob: unit Tensor or float between 0 and 1, output keep - probability; if it is constant and 1, no output dropout will be added. - State dropout is performed on the outgoing states of the cell. **Note** - the state components to which dropout is applied when `state_keep_prob` - is in `(0, 1)` are also determined by the argument - `dropout_state_filter_visitor` (e.g. by default dropout is never applied - to the `c` component of an `LSTMStateTuple`). - variational_recurrent: Python bool. If `True`, then the same dropout - pattern is applied across all time steps per run call. If this parameter - is set, `input_size` **must** be provided. - input_size: (optional) (possibly nested tuple of) `TensorShape` objects - containing the depth(s) of the input tensors expected to be passed in to - the `DropoutWrapper`. Required and used **iff** `variational_recurrent - = True` and `input_keep_prob < 1`. - dtype: (optional) The `dtype` of the input, state, and output tensors. - Required and used **iff** `variational_recurrent = True`. - seed: (optional) integer, the randomness seed. - dropout_state_filter_visitor: (optional), default: (see below). Function - that takes any hierarchical level of the state and returns a scalar or - depth=1 structure of Python booleans describing which terms in the state - should be dropped out. In addition, if the function returns `True`, - dropout is applied across this sublevel. If the function returns - `False`, dropout is not applied across this entire sublevel. - Default behavior: perform dropout on all terms except the memory (`c`) - state of `LSTMCellState` objects, and don't try to apply dropout to - `TensorArray` objects: ``` - def dropout_state_filter_visitor(s): - if isinstance(s, LSTMCellState): # Never perform dropout on the c - state. return LSTMCellState(c=False, h=True) - elif isinstance(s, TensorArray): return False return True ``` - **kwargs: dict of keyword arguments for base layer. - - Raises: - TypeError: if `cell` is not an `RNNCell`, or `keep_state_fn` is provided - but not `callable`. - ValueError: if any of the keep_probs are not between 0 and 1. - """ - if isinstance(cell, lstm.LSTMCell): - raise ValueError("keras LSTM cell does not work with DropoutWrapper. " - "Please use LSTMCell(dropout=x, recurrent_dropout=y) " - "instead.") - super().__init__(cell, dtype=dtype, **kwargs) - - if (dropout_state_filter_visitor is not None and - not callable(dropout_state_filter_visitor)): - raise TypeError("dropout_state_filter_visitor must be callable. " - f"Received: {dropout_state_filter_visitor}") - self._dropout_state_filter = ( - dropout_state_filter_visitor or _default_dropout_state_filter_visitor) - with tf.name_scope("DropoutWrapperInit"): - - def tensor_and_const_value(v): - tensor_value = tf.convert_to_tensor(v) - const_value = tf.get_static_value(tensor_value) - return (tensor_value, const_value) - - for prob, attr in [(input_keep_prob, "input_keep_prob"), - (state_keep_prob, "state_keep_prob"), - (output_keep_prob, "output_keep_prob")]: - tensor_prob, const_prob = tensor_and_const_value(prob) - if const_prob is not None: - if const_prob < 0 or const_prob > 1: - raise ValueError(f"Parameter {attr} must be between 0 and 1. " - f"Received {const_prob}") - setattr(self, "_%s" % attr, float(const_prob)) + """Operator adding dropout to inputs and outputs of the given cell.""" + + def __init__( + self, + cell, + input_keep_prob=1.0, + output_keep_prob=1.0, + state_keep_prob=1.0, + variational_recurrent=False, + input_size=None, + dtype=None, + seed=None, + dropout_state_filter_visitor=None, + **kwargs, + ): + """Create a cell with added input, state, and/or output dropout. + + If `variational_recurrent` is set to `True` (**NOT** the default + behavior), then the same dropout mask is applied at every step, as + described in: [A Theoretically Grounded Application of Dropout in + Recurrent Neural Networks. Y. Gal, Z. + Ghahramani](https://arxiv.org/abs/1512.05287). + + Otherwise a different dropout mask is applied at every time step. + + Note, by default (unless a custom `dropout_state_filter` is provided), + the memory state (`c` component of any `LSTMStateTuple`) passing through + a `DropoutWrapper` is never modified. This behavior is described in the + above article. + + Args: + cell: an RNNCell, a projection to output_size is added to it. + input_keep_prob: unit Tensor or float between 0 and 1, input keep + probability; if it is constant and 1, no input dropout will be + added. + output_keep_prob: unit Tensor or float between 0 and 1, output keep + probability; if it is constant and 1, no output dropout will be + added. + state_keep_prob: unit Tensor or float between 0 and 1, output keep + probability; if it is constant and 1, no output dropout will be + added. State dropout is performed on the outgoing states of the + cell. **Note** the state components to which dropout is applied when + `state_keep_prob` is in `(0, 1)` are also determined by the argument + `dropout_state_filter_visitor` (e.g. by default dropout is never + applied to the `c` component of an `LSTMStateTuple`). + variational_recurrent: Python bool. If `True`, then the same dropout + pattern is applied across all time steps per run call. If this + parameter is set, `input_size` **must** be provided. + input_size: (optional) (possibly nested tuple of) `TensorShape` + objects containing the depth(s) of the input tensors expected to be + passed in to the `DropoutWrapper`. Required and used **iff** + `variational_recurrent = True` and `input_keep_prob < 1`. + dtype: (optional) The `dtype` of the input, state, and output tensors. + Required and used **iff** `variational_recurrent = True`. + seed: (optional) integer, the randomness seed. + dropout_state_filter_visitor: (optional), default: (see below). + Function that takes any hierarchical level of the state and returns + a scalar or depth=1 structure of Python booleans describing which + terms in the state should be dropped out. In addition, if the + function returns `True`, dropout is applied across this sublevel. + If the function returns `False`, dropout is not applied across this + entire sublevel. Default behavior: perform dropout on all terms + except the memory (`c`) state of `LSTMCellState` objects, and don't + try to apply dropout to + `TensorArray` objects: + ``` + def dropout_state_filter_visitor(s): + # Never perform dropout on the c state. + if isinstance(s, LSTMCellState): + return LSTMCellState(c=False, h=True) + elif isinstance(s, TensorArray): + return False + return True + ``` + **kwargs: dict of keyword arguments for base layer. + + Raises: + TypeError: if `cell` is not an `RNNCell`, or `keep_state_fn` is + provided but not `callable`. + ValueError: if any of the keep_probs are not between 0 and 1. + """ + if isinstance(cell, lstm.LSTMCell): + raise ValueError( + "keras LSTM cell does not work with DropoutWrapper. " + "Please use LSTMCell(dropout=x, recurrent_dropout=y) " + "instead." + ) + super().__init__(cell, dtype=dtype, **kwargs) + + if dropout_state_filter_visitor is not None and not callable( + dropout_state_filter_visitor + ): + raise TypeError( + "dropout_state_filter_visitor must be callable. " + f"Received: {dropout_state_filter_visitor}" + ) + self._dropout_state_filter = ( + dropout_state_filter_visitor + or _default_dropout_state_filter_visitor + ) + with tf.name_scope("DropoutWrapperInit"): + + def tensor_and_const_value(v): + tensor_value = tf.convert_to_tensor(v) + const_value = tf.get_static_value(tensor_value) + return (tensor_value, const_value) + + for prob, attr in [ + (input_keep_prob, "input_keep_prob"), + (state_keep_prob, "state_keep_prob"), + (output_keep_prob, "output_keep_prob"), + ]: + tensor_prob, const_prob = tensor_and_const_value(prob) + if const_prob is not None: + if const_prob < 0 or const_prob > 1: + raise ValueError( + f"Parameter {attr} must be between 0 and 1. " + f"Received {const_prob}" + ) + setattr(self, f"_{attr}", float(const_prob)) + else: + setattr(self, f"_{attr}", tensor_prob) + + # Set variational_recurrent, seed before running the code below + self._variational_recurrent = variational_recurrent + self._input_size = input_size + self._seed = seed + + self._recurrent_input_noise = None + self._recurrent_state_noise = None + self._recurrent_output_noise = None + + if variational_recurrent: + if dtype is None: + raise ValueError( + "When variational_recurrent=True, dtype must be provided" + ) + + def convert_to_batch_shape(s): + # Prepend a 1 for the batch dimension; for recurrent + # variational dropout we use the same dropout mask for all + # batch elements. + return tf.concat(([1], tf.TensorShape(s).as_list()), 0) + + def batch_noise(s, inner_seed): + shape = convert_to_batch_shape(s) + return tf.random.uniform(shape, seed=inner_seed, dtype=dtype) + + if ( + not isinstance(self._input_keep_prob, numbers.Real) + or self._input_keep_prob < 1.0 + ): + if input_size is None: + raise ValueError( + "When variational_recurrent=True and input_keep_prob < " + "1.0 or is unknown, input_size must be provided" + ) + self._recurrent_input_noise = _enumerated_map_structure_up_to( + input_size, + lambda i, s: batch_noise( + s, inner_seed=self._gen_seed("input", i) + ), + input_size, + ) + self._recurrent_state_noise = _enumerated_map_structure_up_to( + cell.state_size, + lambda i, s: batch_noise( + s, inner_seed=self._gen_seed("state", i) + ), + cell.state_size, + ) + self._recurrent_output_noise = _enumerated_map_structure_up_to( + cell.output_size, + lambda i, s: batch_noise( + s, inner_seed=self._gen_seed("output", i) + ), + cell.output_size, + ) + + def _gen_seed(self, salt_prefix, index): + if self._seed is None: + return None + salt = "%s_%d" % (salt_prefix, index) + string = (str(self._seed) + salt).encode("utf-8") + return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF + + def _variational_recurrent_dropout_value( + self, unused_index, value, noise, keep_prob + ): + """Performs dropout given the pre-calculated noise tensor.""" + # uniform [keep_prob, 1.0 + keep_prob) + random_tensor = keep_prob + noise + + # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) + binary_tensor = tf.floor(random_tensor) + ret = tf.divide(value, keep_prob) * binary_tensor + ret.set_shape(value.get_shape()) + return ret + + def _dropout( + self, + values, + salt_prefix, + recurrent_noise, + keep_prob, + shallow_filtered_substructure=None, + ): + """Decides whether to perform standard dropout or recurrent dropout.""" + + if shallow_filtered_substructure is None: + # Put something so we traverse the entire structure; inside the + # dropout function we check to see if leafs of this are bool or not. + shallow_filtered_substructure = values + + if not self._variational_recurrent: + + def dropout(i, do_dropout, v): + if not isinstance(do_dropout, bool) or do_dropout: + return tf.nn.dropout( + v, + rate=1.0 - keep_prob, + seed=self._gen_seed(salt_prefix, i), + ) + else: + return v + + return _enumerated_map_structure_up_to( + shallow_filtered_substructure, + dropout, + *[shallow_filtered_substructure, values], + ) else: - setattr(self, "_%s" % attr, tensor_prob) - - # Set variational_recurrent, seed before running the code below - self._variational_recurrent = variational_recurrent - self._input_size = input_size - self._seed = seed - - self._recurrent_input_noise = None - self._recurrent_state_noise = None - self._recurrent_output_noise = None - - if variational_recurrent: - if dtype is None: - raise ValueError( - "When variational_recurrent=True, dtype must be provided") - - def convert_to_batch_shape(s): - # Prepend a 1 for the batch dimension; for recurrent - # variational dropout we use the same dropout mask for all - # batch elements. - return tf.concat(([1], tf.TensorShape(s).as_list()), 0) - - def batch_noise(s, inner_seed): - shape = convert_to_batch_shape(s) - return tf.random.uniform(shape, seed=inner_seed, dtype=dtype) - - if (not isinstance(self._input_keep_prob, numbers.Real) or - self._input_keep_prob < 1.0): - if input_size is None: - raise ValueError( - "When variational_recurrent=True and input_keep_prob < 1.0 or " - "is unknown, input_size must be provided") - self._recurrent_input_noise = _enumerated_map_structure_up_to( - input_size, - lambda i, s: batch_noise(s, inner_seed=self._gen_seed("input", i)), - input_size) - self._recurrent_state_noise = _enumerated_map_structure_up_to( - cell.state_size, - lambda i, s: batch_noise(s, inner_seed=self._gen_seed("state", i)), - cell.state_size) - self._recurrent_output_noise = _enumerated_map_structure_up_to( - cell.output_size, - lambda i, s: batch_noise(s, inner_seed=self._gen_seed("output", i)), - cell.output_size) - - def _gen_seed(self, salt_prefix, index): - if self._seed is None: - return None - salt = "%s_%d" % (salt_prefix, index) - string = (str(self._seed) + salt).encode("utf-8") - return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF - - def _variational_recurrent_dropout_value( - self, unused_index, value, noise, keep_prob): - """Performs dropout given the pre-calculated noise tensor.""" - # uniform [keep_prob, 1.0 + keep_prob) - random_tensor = keep_prob + noise - - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - binary_tensor = tf.floor(random_tensor) - ret = tf.divide(value, keep_prob) * binary_tensor - ret.set_shape(value.get_shape()) - return ret - - def _dropout(self, - values, - salt_prefix, - recurrent_noise, - keep_prob, - shallow_filtered_substructure=None): - """Decides whether to perform standard dropout or recurrent dropout.""" - - if shallow_filtered_substructure is None: - # Put something so we traverse the entire structure; inside the - # dropout function we check to see if leafs of this are bool or not. - shallow_filtered_substructure = values - - if not self._variational_recurrent: - - def dropout(i, do_dropout, v): - if not isinstance(do_dropout, bool) or do_dropout: - return tf.nn.dropout( - v, rate=1. - keep_prob, seed=self._gen_seed(salt_prefix, i)) - else: - return v - - return _enumerated_map_structure_up_to( - shallow_filtered_substructure, dropout, - *[shallow_filtered_substructure, values]) - else: - - def dropout(i, do_dropout, v, n): - if not isinstance(do_dropout, bool) or do_dropout: - return self._variational_recurrent_dropout_value(i, v, n, keep_prob) - else: - return v - - return _enumerated_map_structure_up_to( - shallow_filtered_substructure, dropout, - *[shallow_filtered_substructure, values, recurrent_noise]) - - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Runs the wrapped cell and applies dropout. - - Args: - inputs: A tensor with wrapped cell's input. - state: A tensor or tuple of tensors with wrapped cell's state. - cell_call_fn: Wrapped cell's method to use for step computation (cell's - `__call__` or 'call' method). - **kwargs: Additional arguments. - - Returns: - A pair containing: - - - Output: A tensor with cell's output. - - New state: A tensor or tuple of tensors with new wrapped cell's state. - """ - - def _should_dropout(p): - return (not isinstance(p, float)) or p < 1 - - if _should_dropout(self._input_keep_prob): - inputs = self._dropout(inputs, "input", self._recurrent_input_noise, - self._input_keep_prob) - output, new_state = cell_call_fn(inputs, state, **kwargs) - if _should_dropout(self._state_keep_prob): - # Identify which subsets of the state to perform dropout on and - # which ones to keep. - shallow_filtered_substructure = tf.__internal__.nest.get_traverse_shallow_structure( - self._dropout_state_filter, new_state) - new_state = self._dropout(new_state, "state", self._recurrent_state_noise, - self._state_keep_prob, - shallow_filtered_substructure) - if _should_dropout(self._output_keep_prob): - output = self._dropout(output, "output", self._recurrent_output_noise, - self._output_keep_prob) - return output, new_state - - def get_config(self): - """Returns the config of the dropout wrapper.""" - config = { - "input_keep_prob": self._input_keep_prob, - "output_keep_prob": self._output_keep_prob, - "state_keep_prob": self._state_keep_prob, - "variational_recurrent": self._variational_recurrent, - "input_size": self._input_size, - "seed": self._seed, - } - if self._dropout_state_filter != _default_dropout_state_filter_visitor: # pylint: disable=comparison-with-callable - function, function_type, function_module = _serialize_function_to_config( - self._dropout_state_filter) - config.update({"dropout_fn": function, - "dropout_fn_type": function_type, - "dropout_fn_module": function_module}) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - if "dropout_fn" in config: - config = config.copy() - dropout_state_filter = _parse_config_to_function( - config, custom_objects, "dropout_fn", "dropout_fn_type", - "dropout_fn_module") - config.pop("dropout_fn") - config["dropout_state_filter_visitor"] = dropout_state_filter - return super(DropoutWrapper, cls).from_config( - config, custom_objects=custom_objects) - + def dropout(i, do_dropout, v, n): + if not isinstance(do_dropout, bool) or do_dropout: + return self._variational_recurrent_dropout_value( + i, v, n, keep_prob + ) + else: + return v + + return _enumerated_map_structure_up_to( + shallow_filtered_substructure, + dropout, + *[shallow_filtered_substructure, values, recurrent_noise], + ) + + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Runs the wrapped cell and applies dropout. + + Args: + inputs: A tensor with wrapped cell's input. + state: A tensor or tuple of tensors with wrapped cell's state. + cell_call_fn: Wrapped cell's method to use for step computation + (cell's `__call__` or 'call' method). + **kwargs: Additional arguments. + + Returns: + A pair containing: + + - Output: A tensor with cell's output. + - New state: A tensor or tuple of tensors with new wrapped cell's + state. + """ + + def _should_dropout(p): + return (not isinstance(p, float)) or p < 1 + + if _should_dropout(self._input_keep_prob): + inputs = self._dropout( + inputs, + "input", + self._recurrent_input_noise, + self._input_keep_prob, + ) + output, new_state = cell_call_fn(inputs, state, **kwargs) + if _should_dropout(self._state_keep_prob): + # Identify which subsets of the state to perform dropout on and + # which ones to keep. + shallow_filtered_substructure = ( + tf.__internal__.nest.get_traverse_shallow_structure( + self._dropout_state_filter, new_state + ) + ) + new_state = self._dropout( + new_state, + "state", + self._recurrent_state_noise, + self._state_keep_prob, + shallow_filtered_substructure, + ) + if _should_dropout(self._output_keep_prob): + output = self._dropout( + output, + "output", + self._recurrent_output_noise, + self._output_keep_prob, + ) + return output, new_state + + def get_config(self): + """Returns the config of the dropout wrapper.""" + config = { + "input_keep_prob": self._input_keep_prob, + "output_keep_prob": self._output_keep_prob, + "state_keep_prob": self._state_keep_prob, + "variational_recurrent": self._variational_recurrent, + "input_size": self._input_size, + "seed": self._seed, + } + if self._dropout_state_filter != _default_dropout_state_filter_visitor: + ( + function, + function_type, + function_module, + ) = _serialize_function_to_config(self._dropout_state_filter) + config.update( + { + "dropout_fn": function, + "dropout_fn_type": function_type, + "dropout_fn_module": function_module, + } + ) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + if "dropout_fn" in config: + config = config.copy() + dropout_state_filter = _parse_config_to_function( + config, + custom_objects, + "dropout_fn", + "dropout_fn_type", + "dropout_fn_module", + ) + config.pop("dropout_fn") + config["dropout_state_filter_visitor"] = dropout_state_filter + return super(DropoutWrapper, cls).from_config( + config, custom_objects=custom_objects + ) + + +@deprecated(None, "Please use tf.keras.layers.RNN instead.") @tf_export("nn.RNNCellResidualWrapper", v1=[]) class ResidualWrapper(_RNNCellWrapper): - """RNNCell wrapper that ensures cell inputs are added to the outputs.""" - - def __init__(self, cell, residual_fn=None, **kwargs): - """Constructs a `ResidualWrapper` for `cell`. - - Args: - cell: An instance of `RNNCell`. - residual_fn: (Optional) The function to map raw cell inputs and raw cell - outputs to the actual cell outputs of the residual network. - Defaults to calling nest.map_structure on (lambda i, o: i + o), inputs - and outputs. - **kwargs: dict of keyword arguments for base layer. - """ - super().__init__(cell, **kwargs) - self._residual_fn = residual_fn - - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Run the cell and then apply the residual_fn on its inputs to its outputs. - - Args: - inputs: cell inputs. - state: cell state. - cell_call_fn: Wrapped cell's method to use for step computation (cell's - `__call__` or 'call' method). - **kwargs: Additional arguments passed to the wrapped cell's `call`. - - Returns: - Tuple of cell outputs and new state. - - Raises: - TypeError: If cell inputs and outputs have different structure (type). - ValueError: If cell inputs and outputs have different structure (value). - """ - outputs, new_state = cell_call_fn(inputs, state, **kwargs) - - # Ensure shapes match - def assert_shape_match(inp, out): - inp.get_shape().assert_is_compatible_with(out.get_shape()) - - def default_residual_fn(inputs, outputs): - tf.nest.assert_same_structure(inputs, outputs) - tf.nest.map_structure(assert_shape_match, inputs, outputs) - return tf.nest.map_structure(lambda inp, out: inp + out, inputs, outputs) - - res_outputs = (self._residual_fn or default_residual_fn)(inputs, outputs) - return (res_outputs, new_state) - - def get_config(self): - """Returns the config of the residual wrapper.""" - if self._residual_fn is not None: - function, function_type, function_module = _serialize_function_to_config( - self._residual_fn) - config = { - "residual_fn": function, - "residual_fn_type": function_type, - "residual_fn_module": function_module - } - else: - config = {} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - if "residual_fn" in config: - config = config.copy() - residual_function = _parse_config_to_function(config, custom_objects, - "residual_fn", - "residual_fn_type", - "residual_fn_module") - config["residual_fn"] = residual_function - return super(ResidualWrapper, cls).from_config( - config, custom_objects=custom_objects) - - + """RNNCell wrapper that ensures cell inputs are added to the outputs.""" + + def __init__(self, cell, residual_fn=None, **kwargs): + """Constructs a `ResidualWrapper` for `cell`. + + Args: + cell: An instance of `RNNCell`. + residual_fn: (Optional) The function to map raw cell inputs and raw + cell outputs to the actual cell outputs of the residual network. + Defaults to calling nest.map_structure on (lambda i, o: i + o), + inputs and outputs. + **kwargs: dict of keyword arguments for base layer. + """ + super().__init__(cell, **kwargs) + self._residual_fn = residual_fn + + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Run the cell and apply the residual_fn. + + Args: + inputs: cell inputs. + state: cell state. + cell_call_fn: Wrapped cell's method to use for step computation + (cell's `__call__` or 'call' method). + **kwargs: Additional arguments passed to the wrapped cell's `call`. + + Returns: + Tuple of cell outputs and new state. + + Raises: + TypeError: If cell inputs and outputs have different structure (type). + ValueError: If cell inputs and outputs have different structure + (value). + """ + outputs, new_state = cell_call_fn(inputs, state, **kwargs) + + # Ensure shapes match + def assert_shape_match(inp, out): + inp.get_shape().assert_is_compatible_with(out.get_shape()) + + def default_residual_fn(inputs, outputs): + tf.nest.assert_same_structure(inputs, outputs) + tf.nest.map_structure(assert_shape_match, inputs, outputs) + return tf.nest.map_structure( + lambda inp, out: inp + out, inputs, outputs + ) + + res_outputs = (self._residual_fn or default_residual_fn)( + inputs, outputs + ) + return (res_outputs, new_state) + + def get_config(self): + """Returns the config of the residual wrapper.""" + if self._residual_fn is not None: + ( + function, + function_type, + function_module, + ) = _serialize_function_to_config(self._residual_fn) + config = { + "residual_fn": function, + "residual_fn_type": function_type, + "residual_fn_module": function_module, + } + else: + config = {} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + if "residual_fn" in config: + config = config.copy() + residual_function = _parse_config_to_function( + config, + custom_objects, + "residual_fn", + "residual_fn_type", + "residual_fn_module", + ) + config["residual_fn"] = residual_function + return super(ResidualWrapper, cls).from_config( + config, custom_objects=custom_objects + ) + + +@deprecated(None, "Please use tf.keras.layers.RNN instead.") @tf_export("nn.RNNCellDeviceWrapper", v1=[]) class DeviceWrapper(_RNNCellWrapper): - """Operator that ensures an RNNCell runs on a particular device.""" + """Operator that ensures an RNNCell runs on a particular device.""" - def __init__(self, cell, device, **kwargs): - """Construct a `DeviceWrapper` for `cell` with device `device`. + def __init__(self, cell, device, **kwargs): + """Construct a `DeviceWrapper` for `cell` with device `device`. - Ensures the wrapped `cell` is called with `tf.device(device)`. + Ensures the wrapped `cell` is called with `tf.device(device)`. - Args: - cell: An instance of `RNNCell`. - device: A device string or function, for passing to `tf.device`. - **kwargs: dict of keyword arguments for base layer. - """ - super().__init__(cell, **kwargs) - self._device = device + Args: + cell: An instance of `RNNCell`. + device: A device string or function, for passing to `tf.device`. + **kwargs: dict of keyword arguments for base layer. + """ + super().__init__(cell, **kwargs) + self._device = device - def zero_state(self, batch_size, dtype): - with tf.name_scope(type(self).__name__ + "ZeroState"): - with tf.compat.v1.device(self._device): - return self.cell.zero_state(batch_size, dtype) + def zero_state(self, batch_size, dtype): + with tf.name_scope(type(self).__name__ + "ZeroState"): + with tf.compat.v1.device(self._device): + return self.cell.zero_state(batch_size, dtype) - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Run the cell on specified device.""" - with tf.compat.v1.device(self._device): - return cell_call_fn(inputs, state, **kwargs) + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Run the cell on specified device.""" + with tf.compat.v1.device(self._device): + return cell_call_fn(inputs, state, **kwargs) - def get_config(self): - config = {"device": self._device} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"device": self._device} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) def _serialize_function_to_config(function): - """Serialize the function for get_config().""" - if isinstance(function, python_types.LambdaType): - output = generic_utils.func_dump(function) - output_type = "lambda" - module = function.__module__ - elif callable(function): - output = function.__name__ - output_type = "function" - module = function.__module__ - else: - raise ValueError( - f"Unrecognized function type for input: {type(function)}") - - return output, output_type, module - - -def _parse_config_to_function(config, custom_objects, func_attr_name, - func_type_attr_name, module_attr_name): - """Reconstruct the function from the config.""" - globs = globals() - module = config.pop(module_attr_name, None) - if module in sys.modules: - globs.update(sys.modules[module].__dict__) - elif module is not None: - # Note: we don't know the name of the function if it's a lambda. - warnings.warn( - "{} is not loaded, but a layer uses it. " - "It may cause errors.".format(module), - UserWarning, - stacklevel=2) - if custom_objects: - globs.update(custom_objects) - function_type = config.pop(func_type_attr_name) - if function_type == "function": - # Simple lookup in custom objects - function = generic_utils.deserialize_keras_object( - config[func_attr_name], - custom_objects=custom_objects, - printable_module_name="function in wrapper") - elif function_type == "lambda": - # Unsafe deserialization from bytecode - function = generic_utils.func_load( - config[func_attr_name], globs=globs) - else: - raise TypeError( - f"Unknown function type received: {function_type}. " - "Expected types are ['function', 'lambda']") - return function + """Serialize the function for get_config().""" + if isinstance(function, python_types.LambdaType): + output = generic_utils.func_dump(function) + output_type = "lambda" + module = function.__module__ + elif callable(function): + output = function.__name__ + output_type = "function" + module = function.__module__ + else: + raise ValueError( + f"Unrecognized function type for input: {type(function)}" + ) + + return output, output_type, module + + +def _parse_config_to_function( + config, + custom_objects, + func_attr_name, + func_type_attr_name, + module_attr_name, +): + """Reconstruct the function from the config.""" + globs = globals() + module = config.pop(module_attr_name, None) + if module in sys.modules: + globs.update(sys.modules[module].__dict__) + elif module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn( + "{} is not loaded, but a layer uses it. " + "It may cause errors.".format(module), + UserWarning, + stacklevel=2, + ) + if custom_objects: + globs.update(custom_objects) + function_type = config.pop(func_type_attr_name) + if function_type == "function": + # Simple lookup in custom objects + function = serialization_lib.deserialize_keras_object( + config[func_attr_name], + custom_objects=custom_objects, + printable_module_name="function in wrapper", + ) + elif function_type == "lambda": + if serialization_lib.in_safe_mode(): + raise ValueError( + "Requested the deserialization of a layer with a " + "Python `lambda` inside it. " + "This carries a potential risk of arbitrary code execution " + "and thus it is disallowed by default. If you trust the " + "source of the saved model, you can pass `safe_mode=False` to " + "the loading function in order to allow " + "`lambda` loading." + ) + # Unsafe deserialization from bytecode + function = generic_utils.func_load(config[func_attr_name], globs=globs) + else: + raise TypeError( + f"Unknown function type received: {function_type}. " + "Expected types are ['function', 'lambda']" + ) + return function def _default_dropout_state_filter_visitor(substate): - return not isinstance(substate, tf.TensorArray) + return not isinstance(substate, tf.TensorArray) def _enumerated_map_structure_up_to(shallow_structure, map_fn, *args, **kwargs): - ix = [0] + ix = [0] - def enumerated_fn(*inner_args, **inner_kwargs): - r = map_fn(ix[0], *inner_args, **inner_kwargs) - ix[0] += 1 - return r + def enumerated_fn(*inner_args, **inner_kwargs): + r = map_fn(ix[0], *inner_args, **inner_kwargs) + ix[0] += 1 + return r - return tf.__internal__.nest.map_structure_up_to(shallow_structure, - enumerated_fn, *args, - **kwargs) + return tf.__internal__.nest.map_structure_up_to( + shallow_structure, enumerated_fn, *args, **kwargs + ) diff --git a/keras/layers/rnn/cell_wrappers_test.py b/keras/layers/rnn/cell_wrappers_test.py index e5f3caa30438..e8683a7f2040 100644 --- a/keras/layers/rnn/cell_wrappers_test.py +++ b/keras/layers/rnn/cell_wrappers_test.py @@ -14,208 +14,223 @@ # ============================================================================== """Tests for RNN cell wrappers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import layers from keras.layers.rnn import cell_wrappers from keras.layers.rnn import legacy_cells from keras.legacy_tf_layers import base as legacy_base_layer from keras.testing_infra import test_combinations from keras.utils import generic_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class RNNCellWrapperTest(tf.test.TestCase, parameterized.TestCase): - - def testResidualWrapper(self): - wrapper_type = cell_wrappers.ResidualWrapper - x = tf.convert_to_tensor( - np.array([[1., 1., 1.]]), dtype="float32") - m = tf.convert_to_tensor( - np.array([[0.1, 0.1, 0.1]]), dtype="float32") - base_cell = legacy_cells.GRUCell( - 3, kernel_initializer=tf.compat.v1.constant_initializer(0.5), - bias_initializer=tf.compat.v1.constant_initializer(0.5)) - g, m_new = base_cell(x, m) - wrapper_object = wrapper_type(base_cell) - self.assertDictEqual({"cell": base_cell}, - wrapper_object._trackable_children()) - wrapper_object.get_config() # Should not throw an error - - g_res, m_new_res = wrapper_object(x, m) - self.evaluate([tf.compat.v1.global_variables_initializer()]) - res = self.evaluate([g, g_res, m_new, m_new_res]) - # Residual connections - self.assertAllClose(res[1], res[0] + [1., 1., 1.]) - # States are left untouched - self.assertAllClose(res[2], res[3]) - - def testResidualWrapperWithSlice(self): - wrapper_type = cell_wrappers.ResidualWrapper - x = tf.convert_to_tensor( - np.array([[1., 1., 1., 1., 1.]]), dtype="float32") - m = tf.convert_to_tensor( - np.array([[0.1, 0.1, 0.1]]), dtype="float32") - base_cell = legacy_cells.GRUCell( - 3, kernel_initializer=tf.compat.v1.constant_initializer(0.5), - bias_initializer=tf.compat.v1.constant_initializer(0.5)) - g, m_new = base_cell(x, m) - - def residual_with_slice_fn(inp, out): - inp_sliced = tf.slice(inp, [0, 0], [-1, 3]) - return inp_sliced + out - - g_res, m_new_res = wrapper_type( - base_cell, residual_with_slice_fn)(x, m) - self.evaluate([tf.compat.v1.global_variables_initializer()]) - res_g, res_g_res, res_m_new, res_m_new_res = self.evaluate( - [g, g_res, m_new, m_new_res]) - # Residual connections - self.assertAllClose(res_g_res, res_g + [1., 1., 1.]) - # States are left untouched - self.assertAllClose(res_m_new, res_m_new_res) - - def testDeviceWrapper(self): - wrapper_type = cell_wrappers.DeviceWrapper - x = tf.zeros([1, 3]) - m = tf.zeros([1, 3]) - cell = legacy_cells.GRUCell(3) - wrapped_cell = wrapper_type(cell, "/cpu:0") - self.assertDictEqual({"cell": cell}, - wrapped_cell._trackable_children()) - wrapped_cell.get_config() # Should not throw an error - - outputs, _ = wrapped_cell(x, m) - self.assertIn("cpu:0", outputs.device.lower()) - - @parameterized.parameters( - [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper]) - def testWrapperKerasStyle(self, wrapper): - """Tests if wrapper cell is instantiated in keras style scope.""" - wrapped_cell = wrapper(legacy_cells.BasicRNNCell(1)) - self.assertIsNone(getattr(wrapped_cell, "_keras_style", None)) - - @parameterized.parameters( - [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper]) - def testWrapperWeights(self, wrapper): - """Tests that wrapper weights contain wrapped cells weights.""" - base_cell = layers.SimpleRNNCell(1, name="basic_rnn_cell") - rnn_cell = wrapper(base_cell) - rnn_layer = layers.RNN(rnn_cell) - inputs = tf.convert_to_tensor([[[1]]], dtype=tf.float32) - rnn_layer(inputs) - - wrapper_name = generic_utils.to_snake_case(wrapper.__name__) - expected_weights = ["rnn/" + wrapper_name + "/" + var for var in - ("kernel:0", "recurrent_kernel:0", "bias:0")] - self.assertLen(rnn_cell.weights, 3) - self.assertCountEqual([v.name for v in rnn_cell.weights], expected_weights) - self.assertCountEqual([v.name for v in rnn_cell.trainable_variables], - expected_weights) - self.assertCountEqual([v.name for v in rnn_cell.non_trainable_variables], - []) - self.assertCountEqual([v.name for v in rnn_cell.cell.weights], - expected_weights) - - @parameterized.parameters( - [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper]) - def testWrapperV2Caller(self, wrapper): - """Tests that wrapper V2 is using the LayerRNNCell's caller.""" - - with legacy_base_layer.keras_style_scope(): - base_cell = legacy_cells.MultiRNNCell( - [legacy_cells.BasicRNNCell(1) for _ in range(2)]) - rnn_cell = wrapper(base_cell) - inputs = tf.convert_to_tensor([[1]], dtype=tf.float32) - state = tf.convert_to_tensor([[1]], dtype=tf.float32) - _ = rnn_cell(inputs, [state, state]) - weights = base_cell._cells[0].weights - self.assertLen(weights, expected_len=2) - self.assertTrue(all("_wrapper" in v.name for v in weights)) - - @parameterized.parameters( - [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper]) - def testWrapperV2Build(self, wrapper): - cell = legacy_cells.LSTMCell(10) - wrapper = wrapper(cell) - wrapper.build((1,)) - self.assertTrue(cell.built) - - def testDeviceWrapperSerialization(self): - wrapper_cls = cell_wrappers.DeviceWrapper - cell = layers.LSTMCell(10) - wrapper = wrapper_cls(cell, "/cpu:0") - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - self.assertDictEqual(config, reconstructed_wrapper.get_config()) - self.assertIsInstance(reconstructed_wrapper, wrapper_cls) - - def testResidualWrapperSerialization(self): - wrapper_cls = cell_wrappers.ResidualWrapper - cell = layers.LSTMCell(10) - wrapper = wrapper_cls(cell) - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - self.assertDictEqual(config, reconstructed_wrapper.get_config()) - self.assertIsInstance(reconstructed_wrapper, wrapper_cls) - - wrapper = wrapper_cls(cell, residual_fn=lambda i, o: i + i + o) - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - # Assert the reconstructed function will perform the math correctly. - self.assertEqual(reconstructed_wrapper._residual_fn(1, 2), 4) - - def residual_fn(inputs, outputs): - return inputs * 3 + outputs - - wrapper = wrapper_cls(cell, residual_fn=residual_fn) - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - # Assert the reconstructed function will perform the math correctly. - self.assertEqual(reconstructed_wrapper._residual_fn(1, 2), 5) - - def testDropoutWrapperSerialization(self): - wrapper_cls = cell_wrappers.DropoutWrapper - cell = layers.GRUCell(10) - wrapper = wrapper_cls(cell) - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - self.assertDictEqual(config, reconstructed_wrapper.get_config()) - self.assertIsInstance(reconstructed_wrapper, wrapper_cls) - - wrapper = wrapper_cls(cell, dropout_state_filter_visitor=lambda s: True) - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - self.assertTrue(reconstructed_wrapper._dropout_state_filter(None)) - - def dropout_state_filter_visitor(unused_state): - return False - - wrapper = wrapper_cls( - cell, dropout_state_filter_visitor=dropout_state_filter_visitor) - config = wrapper.get_config() - - reconstructed_wrapper = wrapper_cls.from_config(config) - self.assertFalse(reconstructed_wrapper._dropout_state_filter(None)) - - def testDropoutWrapperWithKerasLSTMCell(self): - wrapper_cls = cell_wrappers.DropoutWrapper - cell = layers.LSTMCell(10) - - with self.assertRaisesRegex(ValueError, "does not work with "): - wrapper_cls(cell) - - cell = layers.LSTMCellV2(10) - with self.assertRaisesRegex(ValueError, "does not work with "): - wrapper_cls(cell) + def testResidualWrapper(self): + wrapper_type = cell_wrappers.ResidualWrapper + x = tf.convert_to_tensor(np.array([[1.0, 1.0, 1.0]]), dtype="float32") + m = tf.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]), dtype="float32") + base_cell = legacy_cells.GRUCell( + 3, + kernel_initializer=tf.compat.v1.constant_initializer(0.5), + bias_initializer=tf.compat.v1.constant_initializer(0.5), + ) + g, m_new = base_cell(x, m) + wrapper_object = wrapper_type(base_cell) + self.assertDictEqual( + {"cell": base_cell}, wrapper_object._trackable_children() + ) + wrapper_object.get_config() # Should not throw an error + + g_res, m_new_res = wrapper_object(x, m) + self.evaluate([tf.compat.v1.global_variables_initializer()]) + res = self.evaluate([g, g_res, m_new, m_new_res]) + # Residual connections + self.assertAllClose(res[1], res[0] + [1.0, 1.0, 1.0]) + # States are left untouched + self.assertAllClose(res[2], res[3]) + + def testResidualWrapperWithSlice(self): + wrapper_type = cell_wrappers.ResidualWrapper + x = tf.convert_to_tensor( + np.array([[1.0, 1.0, 1.0, 1.0, 1.0]]), dtype="float32" + ) + m = tf.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]), dtype="float32") + base_cell = legacy_cells.GRUCell( + 3, + kernel_initializer=tf.compat.v1.constant_initializer(0.5), + bias_initializer=tf.compat.v1.constant_initializer(0.5), + ) + g, m_new = base_cell(x, m) + + def residual_with_slice_fn(inp, out): + inp_sliced = tf.slice(inp, [0, 0], [-1, 3]) + return inp_sliced + out + + g_res, m_new_res = wrapper_type(base_cell, residual_with_slice_fn)(x, m) + self.evaluate([tf.compat.v1.global_variables_initializer()]) + res_g, res_g_res, res_m_new, res_m_new_res = self.evaluate( + [g, g_res, m_new, m_new_res] + ) + # Residual connections + self.assertAllClose(res_g_res, res_g + [1.0, 1.0, 1.0]) + # States are left untouched + self.assertAllClose(res_m_new, res_m_new_res) + + def testDeviceWrapper(self): + wrapper_type = cell_wrappers.DeviceWrapper + x = tf.zeros([1, 3]) + m = tf.zeros([1, 3]) + cell = legacy_cells.GRUCell(3) + wrapped_cell = wrapper_type(cell, "/cpu:0") + self.assertDictEqual({"cell": cell}, wrapped_cell._trackable_children()) + wrapped_cell.get_config() # Should not throw an error + + outputs, _ = wrapped_cell(x, m) + self.assertIn("cpu:0", outputs.device.lower()) + + @parameterized.parameters( + [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper] + ) + def testWrapperKerasStyle(self, wrapper): + """Tests if wrapper cell is instantiated in keras style scope.""" + wrapped_cell = wrapper(legacy_cells.BasicRNNCell(1)) + self.assertIsNone(getattr(wrapped_cell, "_keras_style", None)) + + @parameterized.parameters( + [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper] + ) + def testWrapperWeights(self, wrapper): + """Tests that wrapper weights contain wrapped cells weights.""" + base_cell = layers.SimpleRNNCell(1, name="basic_rnn_cell") + rnn_cell = wrapper(base_cell) + rnn_layer = layers.RNN(rnn_cell) + inputs = tf.convert_to_tensor([[[1]]], dtype=tf.float32) + rnn_layer(inputs) + + wrapper_name = generic_utils.to_snake_case(wrapper.__name__) + expected_weights = [ + "rnn/" + wrapper_name + "/" + var + for var in ("kernel:0", "recurrent_kernel:0", "bias:0") + ] + self.assertLen(rnn_cell.weights, 3) + self.assertCountEqual( + [v.name for v in rnn_cell.weights], expected_weights + ) + self.assertCountEqual( + [v.name for v in rnn_cell.trainable_variables], expected_weights + ) + self.assertCountEqual( + [v.name for v in rnn_cell.non_trainable_variables], [] + ) + self.assertCountEqual( + [v.name for v in rnn_cell.cell.weights], expected_weights + ) + + @parameterized.parameters( + [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper] + ) + def testWrapperV2Caller(self, wrapper): + """Tests that wrapper V2 is using the LayerRNNCell's caller.""" + + with legacy_base_layer.keras_style_scope(): + base_cell = legacy_cells.MultiRNNCell( + [legacy_cells.BasicRNNCell(1) for _ in range(2)] + ) + rnn_cell = wrapper(base_cell) + inputs = tf.convert_to_tensor([[1]], dtype=tf.float32) + state = tf.convert_to_tensor([[1]], dtype=tf.float32) + _ = rnn_cell(inputs, [state, state]) + weights = base_cell._cells[0].weights + self.assertLen(weights, expected_len=2) + self.assertTrue(all("_wrapper" in v.name for v in weights)) + + @parameterized.parameters( + [cell_wrappers.DropoutWrapper, cell_wrappers.ResidualWrapper] + ) + def testWrapperV2Build(self, wrapper): + cell = legacy_cells.LSTMCell(10) + wrapper = wrapper(cell) + wrapper.build((1,)) + self.assertTrue(cell.built) + + def testDeviceWrapperSerialization(self): + wrapper_cls = cell_wrappers.DeviceWrapper + cell = layers.LSTMCell(10) + wrapper = wrapper_cls(cell, "/cpu:0") + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + self.assertDictEqual(config, reconstructed_wrapper.get_config()) + self.assertIsInstance(reconstructed_wrapper, wrapper_cls) + + def testResidualWrapperSerialization(self): + wrapper_cls = cell_wrappers.ResidualWrapper + cell = layers.LSTMCell(10) + wrapper = wrapper_cls(cell) + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + self.assertDictEqual(config, reconstructed_wrapper.get_config()) + self.assertIsInstance(reconstructed_wrapper, wrapper_cls) + + wrapper = wrapper_cls(cell, residual_fn=lambda i, o: i + i + o) + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + # Assert the reconstructed function will perform the math correctly. + self.assertEqual(reconstructed_wrapper._residual_fn(1, 2), 4) + + def residual_fn(inputs, outputs): + return inputs * 3 + outputs + + wrapper = wrapper_cls(cell, residual_fn=residual_fn) + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + # Assert the reconstructed function will perform the math correctly. + self.assertEqual(reconstructed_wrapper._residual_fn(1, 2), 5) + + def testDropoutWrapperSerialization(self): + wrapper_cls = cell_wrappers.DropoutWrapper + cell = layers.GRUCell(10) + wrapper = wrapper_cls(cell) + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + self.assertDictEqual(config, reconstructed_wrapper.get_config()) + self.assertIsInstance(reconstructed_wrapper, wrapper_cls) + + wrapper = wrapper_cls(cell, dropout_state_filter_visitor=lambda s: True) + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + self.assertTrue(reconstructed_wrapper._dropout_state_filter(None)) + + def dropout_state_filter_visitor(unused_state): + return False + + wrapper = wrapper_cls( + cell, dropout_state_filter_visitor=dropout_state_filter_visitor + ) + config = wrapper.get_config() + + reconstructed_wrapper = wrapper_cls.from_config(config) + self.assertFalse(reconstructed_wrapper._dropout_state_filter(None)) + + def testDropoutWrapperWithKerasLSTMCell(self): + wrapper_cls = cell_wrappers.DropoutWrapper + cell = layers.LSTMCell(10) + + with self.assertRaisesRegex(ValueError, "does not work with "): + wrapper_cls(cell) + + cell = layers.LSTMCellV2(10) + with self.assertRaisesRegex(ValueError, "does not work with "): + wrapper_cls(cell) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/rnn/conv_lstm1d.py b/keras/layers/rnn/conv_lstm1d.py index b86eb9a4c1b7..96d3c2837416 100644 --- a/keras/layers/rnn/conv_lstm1d.py +++ b/keras/layers/rnn/conv_lstm1d.py @@ -13,172 +13,177 @@ # limitations under the License. # ============================================================================== """1D Convolutional LSTM layer.""" -# pylint: disable=g-classes-have-attributes,disable=g-direct-tensorflow-import + from keras.layers.rnn.base_conv_lstm import ConvLSTM +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ConvLSTM1D') +@keras_export("keras.layers.ConvLSTM1D") class ConvLSTM1D(ConvLSTM): - """1D Convolutional LSTM. + """1D Convolutional LSTM. - Similar to an LSTM layer, but the input transformations - and recurrent transformations are both convolutional. + Similar to an LSTM layer, but the input transformations + and recurrent transformations are both convolutional. - Args: - filters: Integer, the dimensionality of the output space (i.e. the number of - output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, specifying the strides of - the convolution. Specifying any stride value != 1 is incompatible with - specifying any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding evenly to the left/right or up/down - of the input such that output has the same height/width dimension as the - input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch, time, ..., channels)` while `channels_first` - corresponds to inputs with shape `(batch, time, channels, ...)`. It - defaults to the `image_data_format` value found in your Keras config file - at `~/.keras/keras.json`. If you never set it, then it will be - "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying the - dilation rate to use for dilated convolution. Currently, specifying any - `dilation_rate` value != 1 is incompatible with specifying any `strides` - value != 1. - activation: Activation function to use. By default hyperbolic tangent - activation function is applied (`tanh(x)`). - recurrent_activation: Activation function to use for the recurrent step. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at - initialization. Use in combination with `bias_initializer="zeros"`. This - is recommended in [Jozefowicz et al., 2015]( + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of output filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + dimensions of the convolution window. + strides: An integer or tuple/list of n integers, specifying the strides of + the convolution. Specifying any stride value != 1 is incompatible with + specifying any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding evenly to the left/right or + up/down of the input such that output has the same height/width + dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch, time, ..., + channels)` while `channels_first` corresponds to inputs with shape + `(batch, time, channels, ...)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of n integers, specifying the + dilation rate to use for dilated convolution. Currently, specifying any + `dilation_rate` value != 1 is incompatible with specifying any `strides` + value != 1. + activation: Activation function to use. By default hyperbolic tangent + activation function is applied (`tanh(x)`). + recurrent_activation: Activation function to use for the recurrent step. + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, used for + the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate + at initialization. Use in combination with `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. - bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output in the output - sequence, or the full sequence. (default False) - return_state: Boolean Whether to return the last state in addition to the - output. (default False) - go_backwards: Boolean (default False). If True, process the input sequence - backwards. - stateful: Boolean (default False). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. - Call arguments: - inputs: A 4D tensor. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether a - given timestep should be masked. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or `recurrent_dropout` - are set. - initial_state: List of initial state tensors to be passed to the first call - of the cell. - Input shape: - If data_format='channels_first' - 4D tensor with shape: `(samples, time, channels, rows)` - If - data_format='channels_last' - 4D tensor with shape: `(samples, time, rows, channels)` - Output shape: - - If `return_state`: a list of tensors. The first tensor is the output. The - remaining tensors are the last states, - each 3D tensor with shape: `(samples, filters, new_rows)` if + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output in the output + sequence, or the full sequence. (default False) + return_state: Boolean Whether to return the last state in addition to the + output. (default False) + go_backwards: Boolean (default False). If True, process the input sequence + backwards. + stateful: Boolean (default False). If True, the last state for each sample + at index i in a batch will be used as initial state for the sample of + index i in the following batch. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. + Call arguments: + inputs: A 4D tensor. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + given timestep should be masked. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` are set. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + Input shape: - If data_format='channels_first' + 4D tensor with shape: `(samples, time, channels, rows)` - If + data_format='channels_last' + 4D tensor with shape: `(samples, time, rows, channels)` + Output shape: + - If `return_state`: a list of tensors. The first tensor is the output. + The remaining tensors are the last states, + each 3D tensor with shape: `(samples, filters, new_rows)` if + data_format='channels_first' + or shape: `(samples, new_rows, filters)` if data_format='channels_last'. + `rows` values might have changed due to padding. + - If `return_sequences`: 4D tensor with shape: `(samples, timesteps, + filters, new_rows)` if data_format='channels_first' + or shape: `(samples, timesteps, new_rows, filters)` if + data_format='channels_last'. + - Else, 3D tensor with shape: `(samples, filters, new_rows)` if data_format='channels_first' - or shape: `(samples, new_rows, filters)` if data_format='channels_last'. - `rows` values might have changed due to padding. - - If `return_sequences`: 4D tensor with shape: `(samples, timesteps, - filters, new_rows)` if data_format='channels_first' - or shape: `(samples, timesteps, new_rows, filters)` if - data_format='channels_last'. - - Else, 3D tensor with shape: `(samples, filters, new_rows)` if - data_format='channels_first' - or shape: `(samples, new_rows, filters)` if data_format='channels_last'. + or shape: `(samples, new_rows, filters)` if data_format='channels_last'. - Raises: - ValueError: in case of invalid constructor arguments. + Raises: + ValueError: in case of invalid constructor arguments. - References: - - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) - (the current implementation does not include the feedback loop on the - cells output). - """ + References: + - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) + (the current implementation does not include the feedback loop on the + cells output). + """ - def __init__(self, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - dropout=0.0, - recurrent_dropout=0.0, - **kwargs): - super().__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - unit_forget_bias=unit_forget_bias, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - **kwargs) + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs + ): + super().__init__( + rank=1, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + unit_forget_bias=unit_forget_bias, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + **kwargs + ) diff --git a/keras/layers/rnn/conv_lstm2d.py b/keras/layers/rnn/conv_lstm2d.py index e559097dda4b..668c9da5e4a9 100644 --- a/keras/layers/rnn/conv_lstm2d.py +++ b/keras/layers/rnn/conv_lstm2d.py @@ -13,174 +13,179 @@ # limitations under the License. # ============================================================================== """2D Convolutional LSTM layer.""" -# pylint: disable=g-classes-have-attributes,disable=g-direct-tensorflow-import + from keras.layers.rnn.base_conv_lstm import ConvLSTM +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ConvLSTM2D') +@keras_export("keras.layers.ConvLSTM2D") class ConvLSTM2D(ConvLSTM): - """2D Convolutional LSTM. + """2D Convolutional LSTM. - Similar to an LSTM layer, but the input transformations - and recurrent transformations are both convolutional. + Similar to an LSTM layer, but the input transformations + and recurrent transformations are both convolutional. - Args: - filters: Integer, the dimensionality of the output space (i.e. the number of - output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, specifying the strides of - the convolution. Specifying any stride value != 1 is incompatible with - specifying any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding evenly to the left/right or up/down - of the input such that output has the same height/width dimension as the - input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch, time, ..., channels)` while `channels_first` - corresponds to inputs with shape `(batch, time, channels, ...)`. It - defaults to the `image_data_format` value found in your Keras config file - at `~/.keras/keras.json`. If you never set it, then it will be - "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying the - dilation rate to use for dilated convolution. Currently, specifying any - `dilation_rate` value != 1 is incompatible with specifying any `strides` - value != 1. - activation: Activation function to use. By default hyperbolic tangent - activation function is applied (`tanh(x)`). - recurrent_activation: Activation function to use for the recurrent step. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at - initialization. Use in combination with `bias_initializer="zeros"`. This - is recommended in [Jozefowicz et al., 2015]( + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of output filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + dimensions of the convolution window. + strides: An integer or tuple/list of n integers, specifying the strides of + the convolution. Specifying any stride value != 1 is incompatible with + specifying any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding evenly to the left/right or + up/down of the input such that output has the same height/width + dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch, time, ..., + channels)` while `channels_first` corresponds to inputs with shape + `(batch, time, channels, ...)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of n integers, specifying the + dilation rate to use for dilated convolution. Currently, specifying any + `dilation_rate` value != 1 is incompatible with specifying any `strides` + value != 1. + activation: Activation function to use. By default hyperbolic tangent + activation function is applied (`tanh(x)`). + recurrent_activation: Activation function to use for the recurrent step. + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, used for + the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate + at initialization. Use in combination with `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. - bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output in the output - sequence, or the full sequence. (default False) - return_state: Boolean Whether to return the last state in addition to the - output. (default False) - go_backwards: Boolean (default False). If True, process the input sequence - backwards. - stateful: Boolean (default False). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. - Call arguments: - inputs: A 5D tensor. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether a - given timestep should be masked. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or `recurrent_dropout` - are set. - initial_state: List of initial state tensors to be passed to the first call - of the cell. - Input shape: - If data_format='channels_first' - 5D tensor with shape: `(samples, time, channels, rows, cols)` - If - data_format='channels_last' - 5D tensor with shape: `(samples, time, rows, cols, channels)` - Output shape: - - If `return_state`: a list of tensors. The first tensor is the output. The - remaining tensors are the last states, - each 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output in the output + sequence, or the full sequence. (default False) + return_state: Boolean Whether to return the last state in addition to the + output. (default False) + go_backwards: Boolean (default False). If True, process the input sequence + backwards. + stateful: Boolean (default False). If True, the last state for each sample + at index i in a batch will be used as initial state for the sample of + index i in the following batch. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. + Call arguments: + inputs: A 5D tensor. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + given timestep should be masked. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` are set. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + Input shape: - If data_format='channels_first' + 5D tensor with shape: `(samples, time, channels, rows, cols)` - If + data_format='channels_last' + 5D tensor with shape: `(samples, time, rows, cols, channels)` + Output shape: + - If `return_state`: a list of tensors. The first tensor is the output. + The remaining tensors are the last states, + each 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if + data_format='channels_first' + or shape: `(samples, new_rows, new_cols, filters)` if + data_format='channels_last'. `rows` and `cols` values might have + changed due to padding. + - If `return_sequences`: 5D tensor with shape: `(samples, timesteps, + filters, new_rows, new_cols)` if data_format='channels_first' + or shape: `(samples, timesteps, new_rows, new_cols, filters)` if + data_format='channels_last'. + - Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' - or shape: `(samples, new_rows, new_cols, filters)` if - data_format='channels_last'. `rows` and `cols` values might have changed - due to padding. - - If `return_sequences`: 5D tensor with shape: `(samples, timesteps, - filters, new_rows, new_cols)` if data_format='channels_first' - or shape: `(samples, timesteps, new_rows, new_cols, filters)` if - data_format='channels_last'. - - Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if - data_format='channels_first' - or shape: `(samples, new_rows, new_cols, filters)` if - data_format='channels_last'. + or shape: `(samples, new_rows, new_cols, filters)` if + data_format='channels_last'. - Raises: - ValueError: in case of invalid constructor arguments. + Raises: + ValueError: in case of invalid constructor arguments. - References: - - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) - (the current implementation does not include the feedback loop on the - cells output). - """ + References: + - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) + (the current implementation does not include the feedback loop on the + cells output). + """ - def __init__(self, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - dropout=0.0, - recurrent_dropout=0.0, - **kwargs): - super().__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - unit_forget_bias=unit_forget_bias, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - **kwargs) + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs + ): + super().__init__( + rank=2, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + unit_forget_bias=unit_forget_bias, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + **kwargs + ) diff --git a/keras/layers/rnn/conv_lstm3d.py b/keras/layers/rnn/conv_lstm3d.py index 76e490dbc74b..1488faae72c5 100644 --- a/keras/layers/rnn/conv_lstm3d.py +++ b/keras/layers/rnn/conv_lstm3d.py @@ -13,174 +13,179 @@ # limitations under the License. # ============================================================================== """3D Convolutional LSTM layer.""" -# pylint: disable=g-classes-have-attributes,disable=g-direct-tensorflow-import + from keras.layers.rnn.base_conv_lstm import ConvLSTM +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.ConvLSTM3D') +@keras_export("keras.layers.ConvLSTM3D") class ConvLSTM3D(ConvLSTM): - """3D Convolutional LSTM. + """3D Convolutional LSTM. - Similar to an LSTM layer, but the input transformations - and recurrent transformations are both convolutional. + Similar to an LSTM layer, but the input transformations + and recurrent transformations are both convolutional. - Args: - filters: Integer, the dimensionality of the output space (i.e. the number of - output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, specifying the strides of - the convolution. Specifying any stride value != 1 is incompatible with - specifying any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no - padding. `"same"` results in padding evenly to the left/right or up/down - of the input such that output has the same height/width dimension as the - input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. `channels_last` corresponds - to inputs with shape `(batch, time, ..., channels)` while `channels_first` - corresponds to inputs with shape `(batch, time, channels, ...)`. It - defaults to the `image_data_format` value found in your Keras config file - at `~/.keras/keras.json`. If you never set it, then it will be - "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying the - dilation rate to use for dilated convolution. Currently, specifying any - `dilation_rate` value != 1 is incompatible with specifying any `strides` - value != 1. - activation: Activation function to use. By default hyperbolic tangent - activation function is applied (`tanh(x)`). - recurrent_activation: Activation function to use for the recurrent step. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at - initialization. Use in combination with `bias_initializer="zeros"`. This - is recommended in [Jozefowicz et al., 2015]( + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of output filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + dimensions of the convolution window. + strides: An integer or tuple/list of n integers, specifying the strides of + the convolution. Specifying any stride value != 1 is incompatible with + specifying any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means + no padding. `"same"` results in padding evenly to the left/right or + up/down of the input such that output has the same height/width + dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch, time, ..., + channels)` while `channels_first` corresponds to inputs with shape + `(batch, time, channels, ...)`. When unspecified, uses + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json` (if exists) else 'channels_last'. + Defaults to 'channels_last'. + dilation_rate: An integer or tuple/list of n integers, specifying the + dilation rate to use for dilated convolution. Currently, specifying any + `dilation_rate` value != 1 is incompatible with specifying any `strides` + value != 1. + activation: Activation function to use. By default hyperbolic tangent + activation function is applied (`tanh(x)`). + recurrent_activation: Activation function to use for the recurrent step. + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, used for + the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate + at initialization. Use in combination with `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. - bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output in the output - sequence, or the full sequence. (default False) - return_state: Boolean Whether to return the last state in addition to the - output. (default False) - go_backwards: Boolean (default False). If True, process the input sequence - backwards. - stateful: Boolean (default False). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. - Call arguments: - inputs: A 6D tensor. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether a - given timestep should be masked. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or `recurrent_dropout` - are set. - initial_state: List of initial state tensors to be passed to the first call - of the cell. - Input shape: - If data_format='channels_first' - 6D tensor with shape: `(samples, time, channels, rows, cols, depth)` - - If data_format='channels_last' - 5D tensor with shape: `(samples, time, rows, cols, depth, channels)` - Output shape: - - If `return_state`: a list of tensors. The first tensor is the output. The - remaining tensors are the last states, - each 5D tensor with shape: `(samples, filters, new_rows, new_cols, + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output in the output + sequence, or the full sequence. (default False) + return_state: Boolean Whether to return the last state in addition to the + output. (default False) + go_backwards: Boolean (default False). If True, process the input sequence + backwards. + stateful: Boolean (default False). If True, the last state for each sample + at index i in a batch will be used as initial state for the sample of + index i in the following batch. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. + Call arguments: + inputs: A 6D tensor. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + given timestep should be masked. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` are set. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + Input shape: - If data_format='channels_first' + 6D tensor with shape: `(samples, time, channels, rows, cols, depth)` - + If data_format='channels_last' + 5D tensor with shape: `(samples, time, rows, cols, depth, channels)` + Output shape: + - If `return_state`: a list of tensors. The first tensor is the output. + The remaining tensors are the last states, + each 5D tensor with shape: `(samples, filters, new_rows, new_cols, + new_depth)` if data_format='channels_first' + or shape: `(samples, new_rows, new_cols, new_depth, filters)` if + data_format='channels_last'. `rows`, `cols`, and `depth` values might + have changed due to padding. + - If `return_sequences`: 6D tensor with shape: `(samples, timesteps, + filters, new_rows, new_cols, new_depth)` if data_format='channels_first' + or shape: `(samples, timesteps, new_rows, new_cols, new_depth, filters)` + if data_format='channels_last'. + - Else, 5D tensor with shape: `(samples, filters, new_rows, new_cols, new_depth)` if data_format='channels_first' - or shape: `(samples, new_rows, new_cols, new_depth, filters)` if - data_format='channels_last'. `rows`, `cols`, and `depth` values might - have changed due to padding. - - If `return_sequences`: 6D tensor with shape: `(samples, timesteps, - filters, new_rows, new_cols, new_depth)` if data_format='channels_first' - or shape: `(samples, timesteps, new_rows, new_cols, new_depth, filters)` - if data_format='channels_last'. - - Else, 5D tensor with shape: `(samples, filters, new_rows, new_cols, - new_depth)` if data_format='channels_first' - or shape: `(samples, new_rows, new_cols, new_depth, filters)` if - data_format='channels_last'. + or shape: `(samples, new_rows, new_cols, new_depth, filters)` if + data_format='channels_last'. - Raises: - ValueError: in case of invalid constructor arguments. + Raises: + ValueError: in case of invalid constructor arguments. - References: - - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) - (the current implementation does not include the feedback loop on the - cells output). - """ + References: + - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) + (the current implementation does not include the feedback loop on the + cells output). + """ - def __init__(self, - filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1, 1), - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - dropout=0.0, - recurrent_dropout=0.0, - **kwargs): - super().__init__( - rank=3, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - unit_forget_bias=unit_forget_bias, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - **kwargs) + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1, 1), + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs + ): + super().__init__( + rank=3, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + unit_forget_bias=unit_forget_bias, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + **kwargs + ) diff --git a/keras/layers/rnn/conv_lstm_test.py b/keras/layers/rnn/conv_lstm_test.py index 707d4b8b3b22..d8dfdeda2bfe 100644 --- a/keras/layers/rnn/conv_lstm_test.py +++ b/keras/layers/rnn/conv_lstm_test.py @@ -14,334 +14,406 @@ # ============================================================================== """Tests for convolutional recurrent layers.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class ConvLSTM1DTest(test_combinations.TestCase): - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - data_format=['channels_first', 'channels_last'], - return_sequences=[True, False])) - def test_conv_lstm(self, data_format, return_sequences): - num_row = 3 - filters = 3 - num_samples = 1 - input_channel = 2 - input_num_row = 5 - sequence_len = 2 - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, sequence_len, input_channel, - input_num_row) - else: - inputs = np.random.rand(num_samples, sequence_len, input_num_row, - input_channel) - - # test for return state: - x = keras.Input(batch_shape=inputs.shape) - kwargs = { - 'data_format': data_format, - 'return_sequences': return_sequences, - 'return_state': True, - 'stateful': True, - 'filters': filters, - 'kernel_size': num_row, - 'padding': 'valid', - } - layer = keras.layers.ConvLSTM1D(**kwargs) - layer.build(inputs.shape) - outputs = layer(x) - _, states = outputs[0], outputs[1:] - self.assertEqual(len(states), 2) - model = keras.models.Model(x, states[0]) - - state = model.predict(inputs) - - self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) - - # test for output shape: - test_utils.layer_test( - keras.layers.ConvLSTM1D, - kwargs={ - 'data_format': data_format, - 'return_sequences': return_sequences, - 'filters': filters, - 'kernel_size': num_row, - 'padding': 'valid' - }, - input_shape=inputs.shape) + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + data_format=["channels_first", "channels_last"], + return_sequences=[True, False], + ) + ) + def test_conv_lstm(self, data_format, return_sequences): + num_row = 3 + filters = 3 + num_samples = 1 + input_channel = 2 + input_num_row = 5 + sequence_len = 2 + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, sequence_len, input_channel, input_num_row + ) + else: + inputs = np.random.rand( + num_samples, sequence_len, input_num_row, input_channel + ) + + # test for return state: + x = keras.Input(batch_shape=inputs.shape) + kwargs = { + "data_format": data_format, + "return_sequences": return_sequences, + "return_state": True, + "stateful": True, + "filters": filters, + "kernel_size": num_row, + "padding": "valid", + } + layer = keras.layers.ConvLSTM1D(**kwargs) + layer.build(inputs.shape) + outputs = layer(x) + _, states = outputs[0], outputs[1:] + self.assertEqual(len(states), 2) + model = keras.models.Model(x, states[0]) + + state = model.predict(inputs) + + self.assertAllClose( + keras.backend.eval(layer.states[0]), state, atol=1e-4 + ) + + # test for output shape: + test_utils.layer_test( + keras.layers.ConvLSTM1D, + kwargs={ + "data_format": data_format, + "return_sequences": return_sequences, + "filters": filters, + "kernel_size": num_row, + "padding": "valid", + }, + input_shape=inputs.shape, + ) @test_combinations.run_all_keras_modes class ConvLSTM2DTest(test_combinations.TestCase): - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - data_format=['channels_first', 'channels_last'], - return_sequences=[True, False])) - def test_conv_lstm(self, data_format, return_sequences): - num_row = 3 - num_col = 3 - filters = 2 - num_samples = 1 - input_channel = 2 - input_num_row = 5 - input_num_col = 5 - sequence_len = 2 - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, sequence_len, - input_channel, - input_num_row, input_num_col) - else: - inputs = np.random.rand(num_samples, sequence_len, - input_num_row, input_num_col, - input_channel) - - # test for return state: - x = keras.Input(batch_shape=inputs.shape) - kwargs = {'data_format': data_format, - 'return_sequences': return_sequences, - 'return_state': True, - 'stateful': True, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'padding': 'valid'} - layer = keras.layers.ConvLSTM2D(**kwargs) - layer.build(inputs.shape) - outputs = layer(x) - _, states = outputs[0], outputs[1:] - self.assertEqual(len(states), 2) - model = keras.models.Model(x, states[0]) - state = model.predict(inputs) - - self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) - - # test for output shape: - test_utils.layer_test( - keras.layers.ConvLSTM2D, - kwargs={'data_format': data_format, - 'return_sequences': return_sequences, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'padding': 'valid'}, - input_shape=inputs.shape) - - def test_conv_lstm_statefulness(self): - # Tests for statefulness - num_row = 3 - num_col = 3 - filters = 2 - num_samples = 1 - input_channel = 2 - input_num_row = 5 - input_num_col = 5 - sequence_len = 2 - inputs = np.random.rand(num_samples, sequence_len, - input_num_row, input_num_col, - input_channel) - - with self.cached_session(): - model = keras.models.Sequential() - kwargs = {'data_format': 'channels_last', - 'return_sequences': False, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'stateful': True, - 'batch_input_shape': inputs.shape, - 'padding': 'same'} - layer = keras.layers.ConvLSTM2D(**kwargs) - - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - out1 = model.predict(np.ones_like(inputs)) - - # train once so that the states change - model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) - out2 = model.predict(np.ones_like(inputs)) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones_like(inputs)) - self.assertNotEqual(out3.max(), out2.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones_like(inputs)) - self.assertAllClose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones_like(inputs)) - self.assertNotEqual(out4.max(), out5.max()) - - def test_conv_lstm_regularizers(self): - # check regularizers - num_row = 3 - num_col = 3 - filters = 2 - num_samples = 1 - input_channel = 2 - input_num_row = 5 - input_num_col = 5 - sequence_len = 2 - inputs = np.random.rand(num_samples, sequence_len, - input_num_row, input_num_col, - input_channel) - - with self.cached_session(): - kwargs = {'data_format': 'channels_last', - 'return_sequences': False, - 'kernel_size': (num_row, num_col), - 'stateful': True, - 'filters': filters, - 'batch_input_shape': inputs.shape, - 'kernel_regularizer': keras.regularizers.L1L2(l1=0.01), - 'recurrent_regularizer': keras.regularizers.L1L2(l1=0.01), - 'activity_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'recurrent_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'padding': 'same'} - - layer = keras.layers.ConvLSTM2D(**kwargs) - layer.build(inputs.shape) - self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones(inputs.shape))) - self.assertEqual(len(layer.losses), 4) - - def test_conv_lstm_dropout(self): - # check dropout - with self.cached_session(): - test_utils.layer_test( - keras.layers.ConvLSTM2D, - kwargs={'data_format': 'channels_last', - 'return_sequences': False, - 'filters': 2, - 'kernel_size': (3, 3), - 'padding': 'same', - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(1, 2, 5, 5, 2)) - - def test_conv_lstm_cloning(self): - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.ConvLSTM2D(5, 3, input_shape=(None, 5, 5, 3))) - - test_inputs = np.random.random((2, 4, 5, 5, 3)) - reference_outputs = model.predict(test_inputs) - weights = model.get_weights() - - # Use a new graph to clone the model - with self.cached_session(): - clone = keras.models.clone_model(model) - clone.set_weights(weights) - - outputs = clone.predict(test_inputs) - self.assertAllClose(reference_outputs, outputs, atol=1e-5) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping the test as OOM occurred with 1 GB budget.') - def test_conv_lstm_with_initial_state(self): - num_samples = 32 - sequence_len = 5 - encoder_inputs = keras.layers.Input((None, 32, 32, 3)) - encoder = keras.layers.ConvLSTM2D( - filters=32, kernel_size=(3, 3), padding='same', - return_sequences=False, return_state=True) - _, state_h, state_c = encoder(encoder_inputs) - encoder_states = [state_h, state_c] - - decoder_inputs = keras.layers.Input((None, 32, 32, 4)) - decoder_lstm = keras.layers.ConvLSTM2D( - filters=32, kernel_size=(3, 3), padding='same', - return_sequences=False, return_state=False) - decoder_outputs = decoder_lstm(decoder_inputs, initial_state=encoder_states) - output = keras.layers.Conv2D( - 1, (3, 3), padding='same', activation='relu')(decoder_outputs) - model = keras.Model([encoder_inputs, decoder_inputs], output) - - model.compile( - optimizer='sgd', loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - x_1 = np.random.rand(num_samples, sequence_len, 32, 32, 3) - x_2 = np.random.rand(num_samples, sequence_len, 32, 32, 4) - y = np.random.rand(num_samples, 32, 32, 1) - model.fit([x_1, x_2], y) - - model.predict([x_1, x_2]) + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + data_format=["channels_first", "channels_last"], + return_sequences=[True, False], + ) + ) + def test_conv_lstm(self, data_format, return_sequences): + num_row = 3 + num_col = 3 + filters = 2 + num_samples = 1 + input_channel = 2 + input_num_row = 5 + input_num_col = 5 + sequence_len = 2 + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, + sequence_len, + input_channel, + input_num_row, + input_num_col, + ) + else: + inputs = np.random.rand( + num_samples, + sequence_len, + input_num_row, + input_num_col, + input_channel, + ) + + # test for return state: + x = keras.Input(batch_shape=inputs.shape) + kwargs = { + "data_format": data_format, + "return_sequences": return_sequences, + "return_state": True, + "stateful": True, + "filters": filters, + "kernel_size": (num_row, num_col), + "padding": "valid", + } + layer = keras.layers.ConvLSTM2D(**kwargs) + layer.build(inputs.shape) + outputs = layer(x) + _, states = outputs[0], outputs[1:] + self.assertEqual(len(states), 2) + model = keras.models.Model(x, states[0]) + state = model.predict(inputs) + + self.assertAllClose( + keras.backend.eval(layer.states[0]), state, atol=1e-4 + ) + + # test for output shape: + test_utils.layer_test( + keras.layers.ConvLSTM2D, + kwargs={ + "data_format": data_format, + "return_sequences": return_sequences, + "filters": filters, + "kernel_size": (num_row, num_col), + "padding": "valid", + }, + input_shape=inputs.shape, + ) + + def test_conv_lstm_statefulness(self): + # Tests for statefulness + num_row = 3 + num_col = 3 + filters = 2 + num_samples = 1 + input_channel = 2 + input_num_row = 5 + input_num_col = 5 + sequence_len = 2 + inputs = np.random.rand( + num_samples, + sequence_len, + input_num_row, + input_num_col, + input_channel, + ) + + with self.cached_session(): + model = keras.models.Sequential() + kwargs = { + "data_format": "channels_last", + "return_sequences": False, + "filters": filters, + "kernel_size": (num_row, num_col), + "stateful": True, + "batch_input_shape": inputs.shape, + "padding": "same", + } + layer = keras.layers.ConvLSTM2D(**kwargs) + + model.add(layer) + model.compile(optimizer="sgd", loss="mse") + out1 = model.predict(np.ones_like(inputs)) + + # train once so that the states change + model.train_on_batch( + np.ones_like(inputs), np.random.random(out1.shape) + ) + out2 = model.predict(np.ones_like(inputs)) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones_like(inputs)) + self.assertNotEqual(out3.max(), out2.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones_like(inputs)) + self.assertAllClose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones_like(inputs)) + self.assertNotEqual(out4.max(), out5.max()) + + def test_conv_lstm_regularizers(self): + # check regularizers + num_row = 3 + num_col = 3 + filters = 2 + num_samples = 1 + input_channel = 2 + input_num_row = 5 + input_num_col = 5 + sequence_len = 2 + inputs = np.random.rand( + num_samples, + sequence_len, + input_num_row, + input_num_col, + input_channel, + ) + + with self.cached_session(): + kwargs = { + "data_format": "channels_last", + "return_sequences": False, + "kernel_size": (num_row, num_col), + "stateful": True, + "filters": filters, + "batch_input_shape": inputs.shape, + "kernel_regularizer": keras.regularizers.L1L2(l1=0.01), + "recurrent_regularizer": keras.regularizers.L1L2(l1=0.01), + "activity_regularizer": "l2", + "bias_regularizer": "l2", + "kernel_constraint": "max_norm", + "recurrent_constraint": "max_norm", + "bias_constraint": "max_norm", + "padding": "same", + } + + layer = keras.layers.ConvLSTM2D(**kwargs) + layer.build(inputs.shape) + self.assertEqual(len(layer.losses), 3) + layer(keras.backend.variable(np.ones(inputs.shape))) + self.assertEqual(len(layer.losses), 4) + + def test_conv_lstm_dropout(self): + # check dropout + with self.cached_session(): + test_utils.layer_test( + keras.layers.ConvLSTM2D, + kwargs={ + "data_format": "channels_last", + "return_sequences": False, + "filters": 2, + "kernel_size": (3, 3), + "padding": "same", + "dropout": 0.1, + "recurrent_dropout": 0.1, + }, + input_shape=(1, 2, 5, 5, 2), + ) + + def test_conv_lstm_cloning(self): + with self.cached_session(): + model = keras.models.Sequential() + model.add( + keras.layers.ConvLSTM2D(5, 3, input_shape=(None, 5, 5, 3)) + ) + + test_inputs = np.random.random((2, 4, 5, 5, 3)) + reference_outputs = model.predict(test_inputs) + weights = model.get_weights() + + # Use a new graph to clone the model + with self.cached_session(): + clone = keras.models.clone_model(model) + clone.set_weights(weights) + + outputs = clone.predict(test_inputs) + self.assertAllClose(reference_outputs, outputs, atol=1e-5) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Skipping the test as OOM occurred with 1 GB budget.", + ) + def test_conv_lstm_with_initial_state(self): + num_samples = 32 + sequence_len = 5 + encoder_inputs = keras.layers.Input((None, 32, 32, 3)) + encoder = keras.layers.ConvLSTM2D( + filters=32, + kernel_size=(3, 3), + padding="same", + return_sequences=False, + return_state=True, + ) + _, state_h, state_c = encoder(encoder_inputs) + encoder_states = [state_h, state_c] + + decoder_inputs = keras.layers.Input((None, 32, 32, 4)) + decoder_lstm = keras.layers.ConvLSTM2D( + filters=32, + kernel_size=(3, 3), + padding="same", + return_sequences=False, + return_state=False, + ) + decoder_outputs = decoder_lstm( + decoder_inputs, initial_state=encoder_states + ) + output = keras.layers.Conv2D( + 1, (3, 3), padding="same", activation="relu" + )(decoder_outputs) + model = keras.Model([encoder_inputs, decoder_inputs], output) + + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + x_1 = np.random.rand(num_samples, sequence_len, 32, 32, 3) + x_2 = np.random.rand(num_samples, sequence_len, 32, 32, 4) + y = np.random.rand(num_samples, 32, 32, 1) + model.fit([x_1, x_2], y) + + model.predict([x_1, x_2]) @test_combinations.run_all_keras_modes class ConvLSTM3DTest(test_combinations.TestCase): - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - data_format=['channels_first', 'channels_last'], - return_sequences=[True, False])) - def test_conv_lstm(self, data_format, return_sequences): - num_height = 3 - num_width = 3 - num_depth = 3 - filters = 3 - num_samples = 1 - input_channel = 2 - input_height = 5 - input_width = 5 - input_depth = 5 - sequence_len = 2 - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, sequence_len, input_channel, - input_height, input_width, input_depth) - else: - inputs = np.random.rand(num_samples, sequence_len, input_height, - input_width, input_depth, input_channel) - - # test for return state: - x = keras.Input(batch_shape=inputs.shape) - kwargs = { - 'data_format': data_format, - 'return_sequences': return_sequences, - 'return_state': True, - 'stateful': True, - 'filters': filters, - 'kernel_size': (num_height, num_width, num_depth), - 'padding': 'same' - } - layer = keras.layers.ConvLSTM3D(**kwargs) - layer.build(inputs.shape) - outputs = layer(x) - _, states = outputs[0], outputs[1:] - self.assertEqual(len(states), 2) - model = keras.models.Model(x, states[0]) - - state = model.predict(inputs) - - self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) - - # test for output shape: - test_utils.layer_test( - keras.layers.ConvLSTM3D, - kwargs={ - 'data_format': data_format, - 'return_sequences': return_sequences, - 'filters': filters, - 'kernel_size': (num_height, num_width, num_depth), - 'padding': 'valid' - }, - input_shape=inputs.shape) - - -if __name__ == '__main__': - tf.test.main() + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + data_format=["channels_first", "channels_last"], + return_sequences=[True, False], + ) + ) + def test_conv_lstm(self, data_format, return_sequences): + num_height = 3 + num_width = 3 + num_depth = 3 + filters = 3 + num_samples = 1 + input_channel = 2 + input_height = 5 + input_width = 5 + input_depth = 5 + sequence_len = 2 + if data_format == "channels_first": + inputs = np.random.rand( + num_samples, + sequence_len, + input_channel, + input_height, + input_width, + input_depth, + ) + else: + inputs = np.random.rand( + num_samples, + sequence_len, + input_height, + input_width, + input_depth, + input_channel, + ) + + # test for return state: + x = keras.Input(batch_shape=inputs.shape) + kwargs = { + "data_format": data_format, + "return_sequences": return_sequences, + "return_state": True, + "stateful": True, + "filters": filters, + "kernel_size": (num_height, num_width, num_depth), + "padding": "same", + } + layer = keras.layers.ConvLSTM3D(**kwargs) + layer.build(inputs.shape) + outputs = layer(x) + _, states = outputs[0], outputs[1:] + self.assertEqual(len(states), 2) + model = keras.models.Model(x, states[0]) + + state = model.predict(inputs) + + self.assertAllClose( + keras.backend.eval(layer.states[0]), state, atol=1e-4 + ) + + # test for output shape: + test_utils.layer_test( + keras.layers.ConvLSTM3D, + kwargs={ + "data_format": data_format, + "return_sequences": return_sequences, + "filters": filters, + "kernel_size": (num_height, num_width, num_depth), + "padding": "valid", + }, + input_shape=inputs.shape, + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/cudnn_gru.py b/keras/layers/rnn/cudnn_gru.py index ead4431c3d64..45c7c91d53e3 100644 --- a/keras/layers/rnn/cudnn_gru.py +++ b/keras/layers/rnn/cudnn_gru.py @@ -13,195 +13,212 @@ # limitations under the License. # ============================================================================== """Fast GRU layer backed by cuDNN.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import collections +import tensorflow.compat.v2 as tf + from keras import constraints from keras import initializers from keras import regularizers from keras.layers.rnn import gru_lstm_utils from keras.layers.rnn.base_cudnn_rnn import _CuDNNRNN -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export(v1=['keras.layers.CuDNNGRU']) +@keras_export(v1=["keras.layers.CuDNNGRU"]) class CuDNNGRU(_CuDNNRNN): - """Fast GRU implementation backed by cuDNN. - - More information about cuDNN can be found on the [NVIDIA - developer website](https://developer.nvidia.com/cudnn). - Can only be run on GPU. - - Args: - units: Positive integer, dimensionality of the output space. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - recurrent_constraint: Constraint function applied to the - `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output in the output - sequence, or the full sequence. - return_state: Boolean. Whether to return the last state in addition to the - output. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - stateful: Boolean (default False). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - """ - - def __init__(self, - units, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - **kwargs): - self.units = units - cell_spec = collections.namedtuple('cell', 'state_size') - self._cell = cell_spec(state_size=self.units) - super().__init__( - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - **kwargs) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - @property - def cell(self): - return self._cell - - def build(self, input_shape): - super().build(input_shape) - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_dim = int(input_shape[-1]) - - self.kernel = self.add_weight( - shape=(input_dim, self.units * 3), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 3), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - self.bias = self.add_weight( - shape=(self.units * 6,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - self.built = True - - def _process_batch(self, inputs, initial_state): - if not self.time_major: - inputs = tf.transpose(inputs, perm=(1, 0, 2)) - input_h = initial_state[0] - input_h = tf.expand_dims(input_h, axis=0) - - params = gru_lstm_utils.canonical_to_params( - weights=[ - self.kernel[:, self.units:self.units * 2], - self.kernel[:, :self.units], - self.kernel[:, self.units * 2:], - self.recurrent_kernel[:, self.units:self.units * 2], - self.recurrent_kernel[:, :self.units], - self.recurrent_kernel[:, self.units * 2:], - ], - biases=[ - self.bias[self.units:self.units * 2], - self.bias[:self.units], - self.bias[self.units * 2:self.units * 3], - self.bias[self.units * 4:self.units * 5], - self.bias[self.units * 3:self.units * 4], - self.bias[self.units * 5:], - ], - shape=self._vector_shape) - - args = { - 'input': inputs, - 'input_h': input_h, - 'input_c': 0, - 'params': params, - 'is_training': True, - 'rnn_mode': 'gru', - } - - outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV2(**args) - - if self.stateful or self.return_state: - h = h[0] - if self.return_sequences: - if self.time_major: - output = outputs - else: - output = tf.transpose(outputs, perm=(1, 0, 2)) - else: - output = outputs[-1] - return output, [h] - - def get_config(self): - config = { - 'units': self.units, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Fast GRU implementation backed by cuDNN. + + More information about cuDNN can be found on the [NVIDIA + developer website](https://developer.nvidia.com/cudnn). + Can only be run on GPU. + + Args: + units: Positive integer, dimensionality of the output space. + kernel_initializer: Initializer for the `kernel` weights matrix, used + for the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output in the + output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state in addition to + the output. + go_backwards: Boolean (default False). If True, process the input + sequence backwards and return the reversed sequence. + stateful: Boolean (default False). If True, the last state for each + sample at index i in a batch will be used as initial state for the + sample of index i in the following batch. + """ + + def __init__( + self, + units, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + **kwargs + ): + self.units = units + cell_spec = collections.namedtuple("cell", "state_size") + self._cell = cell_spec(state_size=self.units) + super().__init__( + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + **kwargs + ) + + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) + + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + @property + def cell(self): + return self._cell + + def build(self, input_shape): + super().build(input_shape) + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_dim = int(input_shape[-1]) + + self.kernel = self.add_weight( + shape=(input_dim, self.units * 3), + name="kernel", + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units * 3), + name="recurrent_kernel", + initializer=self.recurrent_initializer, + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint, + ) + + self.bias = self.add_weight( + shape=(self.units * 6,), + name="bias", + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + + self.built = True + + def _process_batch(self, inputs, initial_state): + if not self.time_major: + inputs = tf.transpose(inputs, perm=(1, 0, 2)) + input_h = initial_state[0] + input_h = tf.expand_dims(input_h, axis=0) + + params = gru_lstm_utils.canonical_to_params( + weights=[ + self.kernel[:, self.units : self.units * 2], + self.kernel[:, : self.units], + self.kernel[:, self.units * 2 :], + self.recurrent_kernel[:, self.units : self.units * 2], + self.recurrent_kernel[:, : self.units], + self.recurrent_kernel[:, self.units * 2 :], + ], + biases=[ + self.bias[self.units : self.units * 2], + self.bias[: self.units], + self.bias[self.units * 2 : self.units * 3], + self.bias[self.units * 4 : self.units * 5], + self.bias[self.units * 3 : self.units * 4], + self.bias[self.units * 5 :], + ], + shape=self._vector_shape, + ) + + args = { + "input": inputs, + "input_h": input_h, + "input_c": 0, + "params": params, + "is_training": True, + "rnn_mode": "gru", + } + + outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV2(**args) + + if self.stateful or self.return_state: + h = h[0] + if self.return_sequences: + if self.time_major: + output = outputs + else: + output = tf.transpose(outputs, perm=(1, 0, 2)) + else: + output = outputs[-1] + return output, [h] + + def get_config(self): + config = { + "units": self.units, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/rnn/cudnn_lstm.py b/keras/layers/rnn/cudnn_lstm.py index dd37f357ff9b..69ae8e96af6b 100644 --- a/keras/layers/rnn/cudnn_lstm.py +++ b/keras/layers/rnn/cudnn_lstm.py @@ -13,218 +13,245 @@ # limitations under the License. # ============================================================================== """Fast LSTM layer backed by cuDNN.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import collections +import tensorflow.compat.v2 as tf + from keras import constraints from keras import initializers from keras import regularizers from keras.layers.rnn import gru_lstm_utils from keras.layers.rnn.base_cudnn_rnn import _CuDNNRNN -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export(v1=['keras.layers.CuDNNLSTM']) +@keras_export(v1=["keras.layers.CuDNNLSTM"]) class CuDNNLSTM(_CuDNNRNN): - """Fast LSTM implementation backed by cuDNN. - - More information about cuDNN can be found on the [NVIDIA - developer website](https://developer.nvidia.com/cudnn). - Can only be run on GPU. - - Args: - units: Positive integer, dimensionality of the output space. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. - unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate - at initialization. Setting it to true will also force - `bias_initializer="zeros"`. This is recommended in [Jozefowicz et - al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - recurrent_constraint: Constraint function applied to the - `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output. in the - output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state in addition to the - output. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - stateful: Boolean (default False). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - """ - - def __init__(self, - units, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - **kwargs): - self.units = units - cell_spec = collections.namedtuple('cell', 'state_size') - self._cell = cell_spec(state_size=(self.units, self.units)) - super().__init__( - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - **kwargs) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.unit_forget_bias = unit_forget_bias - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - @property - def cell(self): - return self._cell - - def build(self, input_shape): - super().build(input_shape) - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_dim = int(input_shape[-1]) - - self.kernel = self.add_weight( - shape=(input_dim, self.units * 4), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 4), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - if self.unit_forget_bias: - - def bias_initializer(_, *args, **kwargs): - return tf.concat([ - self.bias_initializer((self.units * 5,), *args, **kwargs), - tf.compat.v1.ones_initializer()((self.units,), *args, **kwargs), - self.bias_initializer((self.units * 2,), *args, **kwargs), - ], axis=0) - else: - bias_initializer = self.bias_initializer - self.bias = self.add_weight( - shape=(self.units * 8,), - name='bias', - initializer=bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - self.built = True - - def _process_batch(self, inputs, initial_state): - if not self.time_major: - inputs = tf.transpose(inputs, perm=(1, 0, 2)) - input_h = initial_state[0] - input_c = initial_state[1] - input_h = tf.expand_dims(input_h, axis=0) - input_c = tf.expand_dims(input_c, axis=0) - - params = gru_lstm_utils.canonical_to_params( - weights=[ - self.kernel[:, :self.units], - self.kernel[:, self.units:self.units * 2], - self.kernel[:, self.units * 2:self.units * 3], - self.kernel[:, self.units * 3:], - self.recurrent_kernel[:, :self.units], - self.recurrent_kernel[:, self.units:self.units * 2], - self.recurrent_kernel[:, self.units * 2:self.units * 3], - self.recurrent_kernel[:, self.units * 3:], - ], - biases=[ - self.bias[:self.units], - self.bias[self.units:self.units * 2], - self.bias[self.units * 2:self.units * 3], - self.bias[self.units * 3:self.units * 4], - self.bias[self.units * 4:self.units * 5], - self.bias[self.units * 5:self.units * 6], - self.bias[self.units * 6:self.units * 7], - self.bias[self.units * 7:], - ], - shape=self._vector_shape) - - args = { - 'input': inputs, - 'input_h': input_h, - 'input_c': input_c, - 'params': params, - 'is_training': True, - } - - outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV2(**args) - - if self.stateful or self.return_state: - h = h[0] - c = c[0] - if self.return_sequences: - if self.time_major: - output = outputs - else: - output = tf.transpose(outputs, perm=(1, 0, 2)) - else: - output = outputs[-1] - return output, [h, c] - - def get_config(self): - config = { - 'units': self.units, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Fast LSTM implementation backed by cuDNN. + + More information about cuDNN can be found on the [NVIDIA + developer website](https://developer.nvidia.com/cudnn). + Can only be run on GPU. + + Args: + units: Positive integer, dimensionality of the output space. + kernel_initializer: Initializer for the `kernel` weights matrix, used + for the linear transformation of the inputs. + unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate + at initialization. Setting it to true will also force + `bias_initializer="zeros"`. This is recommended in [Jozefowicz et + al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output. in the + output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state in addition to + the output. + go_backwards: Boolean (default False). If True, process the input + sequence backwards and return the reversed sequence. + stateful: Boolean (default False). If True, the last state for each + sample at index i in a batch will be used as initial state for the + sample of index i in the following batch. + """ + + def __init__( + self, + units, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + **kwargs + ): + self.units = units + cell_spec = collections.namedtuple("cell", "state_size") + self._cell = cell_spec(state_size=(self.units, self.units)) + super().__init__( + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + **kwargs + ) + + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.unit_forget_bias = unit_forget_bias + + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + @property + def cell(self): + return self._cell + + def build(self, input_shape): + super().build(input_shape) + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_dim = int(input_shape[-1]) + + self.kernel = self.add_weight( + shape=(input_dim, self.units * 4), + name="kernel", + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units * 4), + name="recurrent_kernel", + initializer=self.recurrent_initializer, + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint, + ) + + if self.unit_forget_bias: + + def bias_initializer(_, *args, **kwargs): + return tf.concat( + [ + self.bias_initializer( + (self.units * 5,), *args, **kwargs + ), + tf.compat.v1.ones_initializer()( + (self.units,), *args, **kwargs + ), + self.bias_initializer( + (self.units * 2,), *args, **kwargs + ), + ], + axis=0, + ) + + else: + bias_initializer = self.bias_initializer + self.bias = self.add_weight( + shape=(self.units * 8,), + name="bias", + initializer=bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + + self.built = True + + def _process_batch(self, inputs, initial_state): + if not self.time_major: + inputs = tf.transpose(inputs, perm=(1, 0, 2)) + input_h = initial_state[0] + input_c = initial_state[1] + input_h = tf.expand_dims(input_h, axis=0) + input_c = tf.expand_dims(input_c, axis=0) + + params = gru_lstm_utils.canonical_to_params( + weights=[ + self.kernel[:, : self.units], + self.kernel[:, self.units : self.units * 2], + self.kernel[:, self.units * 2 : self.units * 3], + self.kernel[:, self.units * 3 :], + self.recurrent_kernel[:, : self.units], + self.recurrent_kernel[:, self.units : self.units * 2], + self.recurrent_kernel[:, self.units * 2 : self.units * 3], + self.recurrent_kernel[:, self.units * 3 :], + ], + biases=[ + self.bias[: self.units], + self.bias[self.units : self.units * 2], + self.bias[self.units * 2 : self.units * 3], + self.bias[self.units * 3 : self.units * 4], + self.bias[self.units * 4 : self.units * 5], + self.bias[self.units * 5 : self.units * 6], + self.bias[self.units * 6 : self.units * 7], + self.bias[self.units * 7 :], + ], + shape=self._vector_shape, + ) + + args = { + "input": inputs, + "input_h": input_h, + "input_c": input_c, + "params": params, + "is_training": True, + } + + outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV2(**args) + + if self.stateful or self.return_state: + h = h[0] + c = c[0] + if self.return_sequences: + if self.time_major: + output = outputs + else: + output = tf.transpose(outputs, perm=(1, 0, 2)) + else: + output = outputs[-1] + return output, [h, c] + + def get_config(self): + config = { + "units": self.units, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "unit_forget_bias": self.unit_forget_bias, + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/layers/rnn/cudnn_test.py b/keras/layers/rnn/cudnn_test.py index 8aac19766715..8e4a67c1e64e 100644 --- a/keras/layers/rnn/cudnn_test.py +++ b/keras/layers/rnn/cudnn_test.py @@ -14,479 +14,530 @@ # ============================================================================== """Tests for cudnn recurrent layers.""" -import tensorflow.compat.v2 as tf - import os import tempfile -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from tensorflow.python.framework import test_util as tf_test_utils +from keras.optimizers.legacy.rmsprop import RMSprop from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.optimizers.optimizer_v2.rmsprop import RMSprop + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) @test_combinations.run_all_keras_modes class CuDNNTest(test_combinations.TestCase): - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], - return_sequences=[True, False])) - @tf_test_utils.run_gpu_only - def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - test_utils.layer_test( - layer_class, - kwargs={'units': units, - 'return_sequences': return_sequences}, - input_shape=(num_samples, timesteps, input_size)) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], - go_backwards=[True, False])) - @tf_test_utils.run_gpu_only - def test_cudnn_rnn_go_backward(self, layer_class, go_backwards): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - test_utils.layer_test( - layer_class, - kwargs={'units': units, - 'go_backwards': go_backwards}, - input_shape=(num_samples, timesteps, input_size)) - - @parameterized.named_parameters( - ('cudnngru', keras.layers.CuDNNGRU), - ('cudnnlstm', keras.layers.CuDNNLSTM), - ) - @tf_test_utils.run_gpu_only - def test_return_state(self, layer_class): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 - - inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) - layer = layer_class(units, return_state=True, stateful=True) - outputs = layer(inputs) - _, state = outputs[0], outputs[1:] - self.assertEqual(len(state), num_states) - model = keras.models.Model(inputs, state[0]) - model.run_eagerly = test_utils.should_run_eagerly() - - inputs = np.random.random((num_samples, timesteps, input_size)) - state = model.predict(inputs) - np.testing.assert_allclose( - keras.backend.eval(layer.states[0]), state, atol=1e-4) - - @parameterized.named_parameters( - ('cudnngru', keras.layers.CuDNNGRU), - ('cudnnlstm', keras.layers.CuDNNLSTM), - ) - @tf_test_utils.run_gpu_only - def test_time_major_input(self, layer_class): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - - model = keras.models.Sequential() - model.add( - keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))) - layer = layer_class(units, time_major=True, return_sequences=True) - model.add(layer) - model.add( - keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))) - model.compile(loss='categorical_crossentropy', - optimizer=RMSprop(learning_rate=0.001)) - model.fit( - np.ones((num_samples, timesteps, input_size)), - np.ones((num_samples, timesteps, units))) - out = model.predict(np.ones((num_samples, timesteps, input_size))) - self.assertEqual(out.shape, (num_samples, timesteps, units)) - - @parameterized.named_parameters( - ('cudnngru', keras.layers.CuDNNGRU), - ('cudnnlstm', keras.layers.CuDNNLSTM), - ) - @tf_test_utils.run_gpu_only - def test_specify_initial_state_keras_tensor(self, layer_class): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 - - inputs = keras.Input((timesteps, input_size)) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - layer = layer_class(units) - if len(initial_state) == 1: - output = layer(inputs, initial_state=initial_state[0]) - else: - output = layer(inputs, initial_state=initial_state) - self.assertTrue( - any(initial_state[0] is t - for t in layer._inbound_nodes[0].input_tensors)) - - model = keras.models.Model([inputs] + initial_state, output) - model.compile( - loss='categorical_crossentropy', - optimizer=RMSprop(learning_rate=0.001), - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.random.random((num_samples, timesteps, input_size)) - initial_state = [ - np.random.random((num_samples, units)) for _ in range(num_states) - ] - targets = np.random.random((num_samples, units)) - model.fit([inputs] + initial_state, targets) + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], + return_sequences=[True, False], + ) + ) + @tf_test_utils.run_gpu_only + def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + test_utils.layer_test( + layer_class, + kwargs={"units": units, "return_sequences": return_sequences}, + input_shape=(num_samples, timesteps, input_size), + ) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], + go_backwards=[True, False], + ) + ) + @tf_test_utils.run_gpu_only + def test_cudnn_rnn_go_backward(self, layer_class, go_backwards): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + test_utils.layer_test( + layer_class, + kwargs={"units": units, "go_backwards": go_backwards}, + input_shape=(num_samples, timesteps, input_size), + ) + + @parameterized.named_parameters( + ("cudnngru", keras.layers.CuDNNGRU), + ("cudnnlstm", keras.layers.CuDNNLSTM), + ) + @tf_test_utils.run_gpu_only + def test_return_state(self, layer_class): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 + + inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) + layer = layer_class(units, return_state=True, stateful=True) + outputs = layer(inputs) + _, state = outputs[0], outputs[1:] + self.assertEqual(len(state), num_states) + model = keras.models.Model(inputs, state[0]) + model.run_eagerly = test_utils.should_run_eagerly() + + inputs = np.random.random((num_samples, timesteps, input_size)) + state = model.predict(inputs) + np.testing.assert_allclose( + keras.backend.eval(layer.states[0]), state, atol=1e-4 + ) + + @parameterized.named_parameters( + ("cudnngru", keras.layers.CuDNNGRU), + ("cudnnlstm", keras.layers.CuDNNLSTM), + ) + @tf_test_utils.run_gpu_only + def test_time_major_input(self, layer_class): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + + model = keras.models.Sequential() + model.add(keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))) + layer = layer_class(units, time_major=True, return_sequences=True) + model.add(layer) + model.add(keras.layers.Lambda(lambda t: tf.transpose(t, [1, 0, 2]))) + model.compile( + loss="categorical_crossentropy", + optimizer=RMSprop(learning_rate=0.001), + ) + model.fit( + np.ones((num_samples, timesteps, input_size)), + np.ones((num_samples, timesteps, units)), + ) + out = model.predict(np.ones((num_samples, timesteps, input_size))) + self.assertEqual(out.shape, (num_samples, timesteps, units)) + + @parameterized.named_parameters( + ("cudnngru", keras.layers.CuDNNGRU), + ("cudnnlstm", keras.layers.CuDNNLSTM), + ) + @tf_test_utils.run_gpu_only + def test_specify_initial_state_keras_tensor(self, layer_class): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 + + inputs = keras.Input((timesteps, input_size)) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + layer = layer_class(units) + if len(initial_state) == 1: + output = layer(inputs, initial_state=initial_state[0]) + else: + output = layer(inputs, initial_state=initial_state) + self.assertTrue( + any( + initial_state[0] is t + for t in layer._inbound_nodes[0].input_tensors + ) + ) + + model = keras.models.Model([inputs] + initial_state, output) + model.compile( + loss="categorical_crossentropy", + optimizer=RMSprop(learning_rate=0.001), + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.random.random((num_samples, timesteps, input_size)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.fit([inputs] + initial_state, targets) class CuDNNGraphOnlyTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('cudnngru', keras.layers.CuDNNGRU), - ('cudnnlstm', keras.layers.CuDNNLSTM), - ) - @tf_test_utils.run_gpu_only - def test_regularizer(self, layer_class): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - with tf.Graph().as_default(): - layer = layer_class( - units, - return_sequences=False, - input_shape=(timesteps, input_size), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2') - layer.build((None, None, input_size)) - self.assertEqual(len(layer.losses), 3) - - layer = layer_class( - units, - return_sequences=False, - input_shape=(timesteps, input_size), - activity_regularizer='l2') - self.assertTrue(layer.activity_regularizer) - x = keras.backend.variable( - np.ones((num_samples, timesteps, input_size))) - layer(x) - self.assertEqual(len(layer.get_losses_for(x)), 1) - - @parameterized.named_parameters( - ('cudnngru', keras.layers.CuDNNGRU), - ('cudnnlstm', keras.layers.CuDNNLSTM), - ) - @tf_test_utils.run_gpu_only - @tf_test_utils.run_v1_only('b/120941292') - def test_statefulness(self, layer_class): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - - with self.cached_session(): - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - 10, - input_size, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class( - units, return_sequences=False, stateful=True, weights=None) - model.add(layer) - model.compile(optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse') - out1 = model.predict(np.ones((num_samples, timesteps))) - self.assertEqual(out1.shape, (num_samples, units)) - - # train once so that the states change - model.train_on_batch( - np.ones((num_samples, timesteps)), np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out2.max(), out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - self.assertAllClose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out4.max(), out5.max()) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + @parameterized.named_parameters( + ("cudnngru", keras.layers.CuDNNGRU), + ("cudnnlstm", keras.layers.CuDNNLSTM), + ) + @tf_test_utils.run_gpu_only + def test_regularizer(self, layer_class): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + with tf.Graph().as_default(): + layer = layer_class( + units, + return_sequences=False, + input_shape=(timesteps, input_size), + kernel_regularizer=keras.regularizers.l1(0.01), + recurrent_regularizer=keras.regularizers.l1(0.01), + bias_regularizer="l2", + ) + layer.build((None, None, input_size)) + self.assertEqual(len(layer.losses), 3) + + layer = layer_class( + units, + return_sequences=False, + input_shape=(timesteps, input_size), + activity_regularizer="l2", + ) + self.assertTrue(layer.activity_regularizer) + x = keras.backend.variable( + np.ones((num_samples, timesteps, input_size)) + ) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) + + @parameterized.named_parameters( + ("cudnngru", keras.layers.CuDNNGRU), + ("cudnnlstm", keras.layers.CuDNNLSTM), + ) + @tf_test_utils.run_gpu_only + @tf_test_utils.run_v1_only("b/120941292") + def test_statefulness(self, layer_class): + input_size = 10 + timesteps = 6 + units = 2 + num_samples = 32 + + with self.cached_session(): + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + 10, + input_size, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps), + ) + ) + layer = layer_class( + units, return_sequences=False, stateful=True, weights=None + ) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + ) + out1 = model.predict(np.ones((num_samples, timesteps))) + self.assertEqual(out1.shape, (num_samples, units)) + + # train once so that the states change + model.train_on_batch( + np.ones((num_samples, timesteps)), np.ones((num_samples, units)) + ) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out2.max(), out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + self.assertAllClose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out4.max(), out5.max()) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CuDNNV1OnlyTest(test_combinations.TestCase): - - @tf_test_utils.run_gpu_only - def test_trainability(self): - input_size = 10 - units = 2 - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - layer = layer_class(units) - layer.build((None, None, input_size)) - self.assertEqual(len(layer.weights), 3) - self.assertEqual(len(layer.trainable_weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 0) - layer.trainable = False - self.assertEqual(len(layer.weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 3) - self.assertEqual(len(layer.trainable_weights), 0) - layer.trainable = True - self.assertEqual(len(layer.weights), 3) - self.assertEqual(len(layer.trainable_weights), 3) - self.assertEqual(len(layer.non_trainable_weights), 0) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False], - bidirectional=[True, False], implementation=[1, 2], - model_nest_level=[1, 2], model_type=['seq', 'func'])) - @tf_test_utils.run_v1_only('b/120911602, b/112083752') - @tf_test_utils.run_gpu_only - def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn, - bidirectional, implementation, - model_nest_level, model_type): - input_size = 10 - timesteps = 6 - input_shape = (timesteps, input_size) - units = 2 - num_samples = 32 - inputs = np.random.random((num_samples, timesteps, input_size)) - - rnn_layer_kwargs = { - 'recurrent_activation': 'sigmoid', - # ensure biases are non-zero and properly converted - 'bias_initializer': 'random_uniform', - 'implementation': implementation - } - if rnn_type == 'LSTM': - rnn_layer_class = keras.layers.LSTM - cudnn_rnn_layer_class = keras.layers.CuDNNLSTM - else: - rnn_layer_class = keras.layers.GRU - cudnn_rnn_layer_class = keras.layers.CuDNNGRU - rnn_layer_kwargs['reset_after'] = True - - layer = rnn_layer_class(units, **rnn_layer_kwargs) - if bidirectional: - layer = keras.layers.Bidirectional(layer) - - cudnn_layer = cudnn_rnn_layer_class(units) - if bidirectional: - cudnn_layer = keras.layers.Bidirectional(cudnn_layer) - - model = self._make_nested_model(input_shape, layer, model_nest_level, - model_type) - cudnn_model = self._make_nested_model(input_shape, cudnn_layer, - model_nest_level, model_type) - - if to_cudnn: - self._convert_model_weights(model, cudnn_model) - else: - self._convert_model_weights(cudnn_model, model) - - self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), - atol=1e-4) - - def _make_nested_model(self, input_shape, layer, level=1, model_type='func'): - # example: make_nested_seq_model((1,), Dense(10), level=2).summary() - def make_nested_seq_model(input_shape, layer, level=1): - model = layer - for i in range(1, level + 1): - layers = [keras.layers.InputLayer(input_shape), - model] if (i == 1) else [model] - model = keras.models.Sequential(layers) - if i > 1: - model.build((None,) + input_shape) - return model - - # example: make_nested_func_model((1,), Dense(10), level=2).summary() - def make_nested_func_model(input_shape, layer, level=1): - model_input = keras.layers.Input(input_shape) - model = layer - for _ in range(level): - model = keras.models.Model(model_input, model(model_input)) - return model - - if model_type == 'func': - return make_nested_func_model(input_shape, layer, level) - elif model_type == 'seq': - return make_nested_seq_model(input_shape, layer, level) - - def _convert_model_weights(self, source_model, target_model): - _, fname = tempfile.mkstemp('.h5') - source_model.save_weights(fname) - target_model.load_weights(fname) - os.remove(fname) - - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False])) - @tf_test_utils.run_v1_only('b/120911602') - @tf_test_utils.run_gpu_only - def test_load_weights_between_noncudnn_rnn_time_distributed(self, rnn_type, - to_cudnn): - # Similar test as test_load_weights_between_noncudnn_rnn() but has different - # rank of input due to usage of TimeDistributed. Issue: #10356. - input_size = 10 - steps = 6 - timesteps = 6 - input_shape = (timesteps, steps, input_size) - units = 2 - num_samples = 32 - inputs = np.random.random((num_samples, timesteps, steps, input_size)) - - rnn_layer_kwargs = { - 'recurrent_activation': 'sigmoid', - # ensure biases are non-zero and properly converted - 'bias_initializer': 'random_uniform', - } - if rnn_type == 'LSTM': - rnn_layer_class = keras.layers.LSTM - cudnn_rnn_layer_class = keras.layers.CuDNNLSTM - else: - rnn_layer_class = keras.layers.GRU - cudnn_rnn_layer_class = keras.layers.CuDNNGRU - rnn_layer_kwargs['reset_after'] = True - - layer = rnn_layer_class(units, **rnn_layer_kwargs) - layer = keras.layers.TimeDistributed(layer) - - cudnn_layer = cudnn_rnn_layer_class(units) - cudnn_layer = keras.layers.TimeDistributed(cudnn_layer) - - model = self._make_nested_model(input_shape, layer) - cudnn_model = self._make_nested_model(input_shape, cudnn_layer) - - if to_cudnn: - self._convert_model_weights(model, cudnn_model) - else: - self._convert_model_weights(cudnn_model, model) - - self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), - atol=1e-4) - - @tf_test_utils.run_gpu_only - def test_cudnnrnn_bidirectional(self): - rnn = keras.layers.CuDNNGRU - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - mode = 'concat' - - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - - # test with Sequential model - model = keras.Sequential() - model.add( - keras.layers.Bidirectional( - rnn(output_dim), merge_mode=mode, input_shape=(None, dim))) - model.compile(loss='mse', optimizer='rmsprop') - model.fit(x, y, epochs=1, batch_size=1) - - # test config - model.get_config() - model = keras.models.model_from_json(model.to_json()) - model.summary() - - # test stacked bidirectional layers - model = keras.Sequential() - model.add( - keras.layers.Bidirectional( - rnn(output_dim, return_sequences=True), - merge_mode=mode, - input_shape=(None, dim))) - model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) - model.compile(loss='mse', optimizer=R'rmsprop') - model.fit(x, y, epochs=1, batch_size=1) - - # test with functional API - inputs = keras.Input((timesteps, dim)) - outputs = keras.layers.Bidirectional( - rnn(output_dim), merge_mode=mode)( - inputs) - model = keras.Model(inputs, outputs) - model.compile(loss='mse', optimizer=R'rmsprop') - model.fit(x, y, epochs=1, batch_size=1) - - # Bidirectional and stateful - inputs = keras.Input(batch_shape=(1, timesteps, dim)) - outputs = keras.layers.Bidirectional( - rnn(output_dim, stateful=True), merge_mode=mode)( - inputs) - model = keras.Model(inputs, outputs) - model.compile(loss='mse', optimizer='rmsprop') - model.fit(x, y, epochs=1, batch_size=1) - - @tf_test_utils.run_gpu_only - def test_preprocess_weights_for_loading_gru_incompatible(self): - """Test loading weights between incompatible layers. - - Should fail fast with an exception. - """ - input_shape = (3, 5) - - def gru(cudnn=False, **kwargs): - layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1 - return layer_class(2, input_shape=input_shape, **kwargs) - - def get_layer_weights(layer): - layer.build(input_shape=input_shape) - return layer.get_weights() - - def assert_not_compatible(src, dest, message): - with self.assertRaises(ValueError) as ex: - keras.saving.hdf5_format.preprocess_weights_for_loading( - dest, - get_layer_weights(src)) - self.assertIn(message, str(ex.exception)) - - assert_not_compatible( - gru(), - gru(cudnn=True), - 'GRU(reset_after=False) is not compatible with CuDNNGRU') - assert_not_compatible( - gru(cudnn=True), - gru(), - 'CuDNNGRU is not compatible with GRU(reset_after=False)') - assert_not_compatible( - gru(), - gru(reset_after=True), - 'GRU(reset_after=False) is not compatible with ' - 'GRU(reset_after=True)') - assert_not_compatible( - gru(reset_after=True), - gru(), - 'GRU(reset_after=True) is not compatible with ' - 'GRU(reset_after=False)') - - -if __name__ == '__main__': - tf.test.main() + @tf_test_utils.run_gpu_only + def test_trainability(self): + input_size = 10 + units = 2 + for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: + layer = layer_class(units) + layer.build((None, None, input_size)) + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 0) + layer.trainable = False + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 3) + self.assertEqual(len(layer.trainable_weights), 0) + layer.trainable = True + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 0) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + rnn_type=["LSTM", "GRU"], + to_cudnn=[True, False], + bidirectional=[True, False], + implementation=[1, 2], + model_nest_level=[1, 2], + model_type=["seq", "func"], + ) + ) + @tf_test_utils.run_v1_only("b/120911602, b/112083752") + @tf_test_utils.run_gpu_only + def test_load_weights_between_noncudnn_rnn( + self, + rnn_type, + to_cudnn, + bidirectional, + implementation, + model_nest_level, + model_type, + ): + input_size = 10 + timesteps = 6 + input_shape = (timesteps, input_size) + units = 2 + num_samples = 32 + inputs = np.random.random((num_samples, timesteps, input_size)) + + rnn_layer_kwargs = { + "recurrent_activation": "sigmoid", + # ensure biases are non-zero and properly converted + "bias_initializer": "random_uniform", + "implementation": implementation, + } + if rnn_type == "LSTM": + rnn_layer_class = keras.layers.LSTM + cudnn_rnn_layer_class = keras.layers.CuDNNLSTM + else: + rnn_layer_class = keras.layers.GRU + cudnn_rnn_layer_class = keras.layers.CuDNNGRU + rnn_layer_kwargs["reset_after"] = True + + layer = rnn_layer_class(units, **rnn_layer_kwargs) + if bidirectional: + layer = keras.layers.Bidirectional(layer) + + cudnn_layer = cudnn_rnn_layer_class(units) + if bidirectional: + cudnn_layer = keras.layers.Bidirectional(cudnn_layer) + + model = self._make_nested_model( + input_shape, layer, model_nest_level, model_type + ) + cudnn_model = self._make_nested_model( + input_shape, cudnn_layer, model_nest_level, model_type + ) + + if to_cudnn: + self._convert_model_weights(model, cudnn_model) + else: + self._convert_model_weights(cudnn_model, model) + + self.assertAllClose( + model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4 + ) + + def _make_nested_model( + self, input_shape, layer, level=1, model_type="func" + ): + # example: make_nested_seq_model((1,), Dense(10), level=2).summary() + def make_nested_seq_model(input_shape, layer, level=1): + model = layer + for i in range(1, level + 1): + layers = ( + [keras.layers.InputLayer(input_shape), model] + if (i == 1) + else [model] + ) + model = keras.models.Sequential(layers) + if i > 1: + model.build((None,) + input_shape) + return model + + # example: make_nested_func_model((1,), Dense(10), level=2).summary() + def make_nested_func_model(input_shape, layer, level=1): + model_input = keras.layers.Input(input_shape) + model = layer + for _ in range(level): + model = keras.models.Model(model_input, model(model_input)) + return model + + if model_type == "func": + return make_nested_func_model(input_shape, layer, level) + elif model_type == "seq": + return make_nested_seq_model(input_shape, layer, level) + + def _convert_model_weights(self, source_model, target_model): + _, fname = tempfile.mkstemp(".h5") + source_model.save_weights(fname) + target_model.load_weights(fname) + os.remove(fname) + + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + rnn_type=["LSTM", "GRU"], to_cudnn=[True, False] + ) + ) + @tf_test_utils.run_v1_only("b/120911602") + @tf_test_utils.run_gpu_only + def test_load_weights_between_noncudnn_rnn_time_distributed( + self, rnn_type, to_cudnn + ): + # Similar test as test_load_weights_between_noncudnn_rnn() but has + # different rank of input due to usage of TimeDistributed. Issue: + # #10356. + input_size = 10 + steps = 6 + timesteps = 6 + input_shape = (timesteps, steps, input_size) + units = 2 + num_samples = 32 + inputs = np.random.random((num_samples, timesteps, steps, input_size)) + + rnn_layer_kwargs = { + "recurrent_activation": "sigmoid", + # ensure biases are non-zero and properly converted + "bias_initializer": "random_uniform", + } + if rnn_type == "LSTM": + rnn_layer_class = keras.layers.LSTM + cudnn_rnn_layer_class = keras.layers.CuDNNLSTM + else: + rnn_layer_class = keras.layers.GRU + cudnn_rnn_layer_class = keras.layers.CuDNNGRU + rnn_layer_kwargs["reset_after"] = True + + layer = rnn_layer_class(units, **rnn_layer_kwargs) + layer = keras.layers.TimeDistributed(layer) + + cudnn_layer = cudnn_rnn_layer_class(units) + cudnn_layer = keras.layers.TimeDistributed(cudnn_layer) + + model = self._make_nested_model(input_shape, layer) + cudnn_model = self._make_nested_model(input_shape, cudnn_layer) + + if to_cudnn: + self._convert_model_weights(model, cudnn_model) + else: + self._convert_model_weights(cudnn_model, model) + + self.assertAllClose( + model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4 + ) + + @tf_test_utils.run_gpu_only + def test_cudnnrnn_bidirectional(self): + rnn = keras.layers.CuDNNGRU + samples = 2 + dim = 2 + timesteps = 2 + output_dim = 2 + mode = "concat" + + x = np.random.random((samples, timesteps, dim)) + target_dim = 2 * output_dim if mode == "concat" else output_dim + y = np.random.random((samples, target_dim)) + + # test with Sequential model + model = keras.Sequential() + model.add( + keras.layers.Bidirectional( + rnn(output_dim), merge_mode=mode, input_shape=(None, dim) + ) + ) + model.compile(loss="mse", optimizer="rmsprop") + model.fit(x, y, epochs=1, batch_size=1) + + # test config + model.get_config() + model = keras.models.model_from_json(model.to_json()) + model.summary() + + # test stacked bidirectional layers + model = keras.Sequential() + model.add( + keras.layers.Bidirectional( + rnn(output_dim, return_sequences=True), + merge_mode=mode, + input_shape=(None, dim), + ) + ) + model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) + model.compile(loss="mse", optimizer=R"rmsprop") + model.fit(x, y, epochs=1, batch_size=1) + + # test with functional API + inputs = keras.Input((timesteps, dim)) + outputs = keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)( + inputs + ) + model = keras.Model(inputs, outputs) + model.compile(loss="mse", optimizer=R"rmsprop") + model.fit(x, y, epochs=1, batch_size=1) + + # Bidirectional and stateful + inputs = keras.Input(batch_shape=(1, timesteps, dim)) + outputs = keras.layers.Bidirectional( + rnn(output_dim, stateful=True), merge_mode=mode + )(inputs) + model = keras.Model(inputs, outputs) + model.compile(loss="mse", optimizer="rmsprop") + model.fit(x, y, epochs=1, batch_size=1) + + @tf_test_utils.run_gpu_only + def test_preprocess_weights_for_loading_gru_incompatible(self): + """Test loading weights between incompatible layers. + + Should fail fast with an exception. + """ + input_shape = (3, 5) + + def gru(cudnn=False, **kwargs): + layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1 + return layer_class(2, input_shape=input_shape, **kwargs) + + def get_layer_weights(layer): + layer.build(input_shape=input_shape) + return layer.get_weights() + + def assert_not_compatible(src, dest, message): + with self.assertRaises(ValueError) as ex: + keras.saving.legacy.hdf5_format.preprocess_weights_for_loading( + dest, get_layer_weights(src) + ) + self.assertIn(message, str(ex.exception)) + + assert_not_compatible( + gru(), + gru(cudnn=True), + "GRU(reset_after=False) is not compatible with CuDNNGRU", + ) + assert_not_compatible( + gru(cudnn=True), + gru(), + "CuDNNGRU is not compatible with GRU(reset_after=False)", + ) + assert_not_compatible( + gru(), + gru(reset_after=True), + "GRU(reset_after=False) is not compatible with " + "GRU(reset_after=True)", + ) + assert_not_compatible( + gru(reset_after=True), + gru(), + "GRU(reset_after=True) is not compatible with " + "GRU(reset_after=False)", + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/dropout_rnn_cell_mixin.py b/keras/layers/rnn/dropout_rnn_cell_mixin.py index 43c85271b479..d2ee109fc9ad 100644 --- a/keras/layers/rnn/dropout_rnn_cell_mixin.py +++ b/keras/layers/rnn/dropout_rnn_cell_mixin.py @@ -15,159 +15,165 @@ """Mixin holding dropout fields for RNN cells.""" -from keras import backend import tensorflow.compat.v2 as tf - from tensorflow.tools.docs import doc_controls +from keras import backend + @doc_controls.do_not_generate_docs class DropoutRNNCellMixin: - """Object that hold dropout related fields for RNN Cell. - - This class is not a standalone RNN cell. It suppose to be used with a RNN cell - by multiple inheritance. Any cell that mix with class should have following - fields: - dropout: a float number within range [0, 1). The ratio that the input - tensor need to dropout. - recurrent_dropout: a float number within range [0, 1). The ratio that the - recurrent state weights need to dropout. - _random_generator: A backend.RandomGenerator instance, which will be used - to produce outputs based on the inputs and dropout rate. - This object will create and cache created dropout masks, and reuse them for - the incoming data, so that the same mask is used for every batch input. - """ - - def __init__(self, *args, **kwargs): - self._create_non_trackable_mask_cache() - super().__init__(*args, **kwargs) - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def _create_non_trackable_mask_cache(self): - """Create the cache for dropout and recurrent dropout mask. - - Note that the following two masks will be used in "graph function" mode, - e.g. these masks are symbolic tensors. In eager mode, the `eager_*_mask` - tensors will be generated differently than in the "graph function" case, - and they will be cached. - - Also note that in graph mode, we still cache those masks only because the - RNN could be created with `unroll=True`. In that case, the `cell.call()` - function will be invoked multiple times, and we want to ensure same mask - is used every time. - - Also the caches are created without tracking. Since they are not picklable - by python when deepcopy, we don't want `layer._obj_reference_counts_dict` - to track it by default. - """ - self._dropout_mask_cache = backend.ContextValueCache( - self._create_dropout_mask) - self._recurrent_dropout_mask_cache = backend.ContextValueCache( - self._create_recurrent_dropout_mask) - - def reset_dropout_mask(self): - """Reset the cached dropout masks if any. - - This is important for the RNN layer to invoke this in it `call()` method so - that the cached mask is cleared before calling the `cell.call()`. The mask - should be cached across the timestep within the same batch, but shouldn't - be cached between batches. Otherwise it will introduce unreasonable bias - against certain index of data within the batch. + """Object that hold dropout related fields for RNN Cell. + + This class is not a standalone RNN cell. It suppose to be used with a RNN + cell by multiple inheritance. Any cell that mix with class should have + following fields: + dropout: a float number within range [0, 1). The ratio that the input + tensor need to dropout. + recurrent_dropout: a float number within range [0, 1). The ratio that the + recurrent state weights need to dropout. + _random_generator: A backend.RandomGenerator instance, which will be used + to produce outputs based on the inputs and dropout rate. + This object will create and cache created dropout masks, and reuse them for + the incoming data, so that the same mask is used for every batch input. """ - self._dropout_mask_cache.clear() - def reset_recurrent_dropout_mask(self): - """Reset the cached recurrent dropout masks if any. - - This is important for the RNN layer to invoke this in it call() method so - that the cached mask is cleared before calling the cell.call(). The mask - should be cached across the timestep within the same batch, but shouldn't - be cached between batches. Otherwise it will introduce unreasonable bias - against certain index of data within the batch. - """ - self._recurrent_dropout_mask_cache.clear() - - def _create_dropout_mask(self, inputs, training, count=1): - return _generate_dropout_mask( - self._random_generator, - tf.ones_like(inputs), - self.dropout, - training=training, - count=count) - - def _create_recurrent_dropout_mask(self, inputs, training, count=1): - return _generate_dropout_mask( - self._random_generator, - tf.ones_like(inputs), - self.recurrent_dropout, - training=training, - count=count) - - def get_dropout_mask_for_cell(self, inputs, training, count=1): - """Get the dropout mask for RNN cell's input. - - It will create mask based on context if there isn't any existing cached - mask. If a new mask is generated, it will update the cache in the cell. - - Args: - inputs: The input tensor whose shape will be used to generate dropout - mask. - training: Boolean tensor, whether its in training mode, dropout will be - ignored in non-training mode. - count: Int, how many dropout mask will be generated. It is useful for cell - that has internal weights fused together. - Returns: - List of mask tensor, generated or cached mask based on context. - """ - if self.dropout == 0: - return None - init_kwargs = dict(inputs=inputs, training=training, count=count) - return self._dropout_mask_cache.setdefault(kwargs=init_kwargs) - - def get_recurrent_dropout_mask_for_cell(self, inputs, training, count=1): - """Get the recurrent dropout mask for RNN cell. - - It will create mask based on context if there isn't any existing cached - mask. If a new mask is generated, it will update the cache in the cell. - - Args: - inputs: The input tensor whose shape will be used to generate dropout - mask. - training: Boolean tensor, whether its in training mode, dropout will be - ignored in non-training mode. - count: Int, how many dropout mask will be generated. It is useful for cell - that has internal weights fused together. - Returns: - List of mask tensor, generated or cached mask based on context. - """ - if self.recurrent_dropout == 0: - return None - init_kwargs = dict(inputs=inputs, training=training, count=count) - return self._recurrent_dropout_mask_cache.setdefault(kwargs=init_kwargs) - - def __getstate__(self): - # Used for deepcopy. The caching can't be pickled by python, since it will - # contain tensor and graph. - state = super().__getstate__() - state.pop('_dropout_mask_cache', None) - state.pop('_recurrent_dropout_mask_cache', None) - return state - - def __setstate__(self, state): - state['_dropout_mask_cache'] = backend.ContextValueCache( - self._create_dropout_mask) - state['_recurrent_dropout_mask_cache'] = backend.ContextValueCache( - self._create_recurrent_dropout_mask) - super().__setstate__(state) + def __init__(self, *args, **kwargs): + self._create_non_trackable_mask_cache() + super().__init__(*args, **kwargs) + + @tf.__internal__.tracking.no_automatic_dependency_tracking + def _create_non_trackable_mask_cache(self): + """Create the cache for dropout and recurrent dropout mask. + + Note that the following two masks will be used in "graph function" mode, + e.g. these masks are symbolic tensors. In eager mode, the `eager_*_mask` + tensors will be generated differently than in the "graph function" case, + and they will be cached. + + Also note that in graph mode, we still cache those masks only because + the RNN could be created with `unroll=True`. In that case, the + `cell.call()` function will be invoked multiple times, and we want to + ensure same mask is used every time. + + Also the caches are created without tracking. Since they are not + pickleable by python when deepcopy, we don't want + `layer._obj_reference_counts_dict` to track it by default. + """ + self._dropout_mask_cache = backend.ContextValueCache( + self._create_dropout_mask + ) + self._recurrent_dropout_mask_cache = backend.ContextValueCache( + self._create_recurrent_dropout_mask + ) + + def reset_dropout_mask(self): + """Reset the cached dropout masks if any. + + This is important for the RNN layer to invoke this in it `call()` method + so that the cached mask is cleared before calling the `cell.call()`. The + mask should be cached across the timestep within the same batch, but + shouldn't be cached between batches. Otherwise it will introduce + unreasonable bias against certain index of data within the batch. + """ + self._dropout_mask_cache.clear() + + def reset_recurrent_dropout_mask(self): + """Reset the cached recurrent dropout masks if any. + + This is important for the RNN layer to invoke this in it call() method + so that the cached mask is cleared before calling the cell.call(). The + mask should be cached across the timestep within the same batch, but + shouldn't be cached between batches. Otherwise it will introduce + unreasonable bias against certain index of data within the batch. + """ + self._recurrent_dropout_mask_cache.clear() + + def _create_dropout_mask(self, inputs, training, count=1): + return _generate_dropout_mask( + self._random_generator, + tf.ones_like(inputs), + self.dropout, + training=training, + count=count, + ) + + def _create_recurrent_dropout_mask(self, inputs, training, count=1): + return _generate_dropout_mask( + self._random_generator, + tf.ones_like(inputs), + self.recurrent_dropout, + training=training, + count=count, + ) + + def get_dropout_mask_for_cell(self, inputs, training, count=1): + """Get the dropout mask for RNN cell's input. + + It will create mask based on context if there isn't any existing cached + mask. If a new mask is generated, it will update the cache in the cell. + + Args: + inputs: The input tensor whose shape will be used to generate dropout + mask. + training: Boolean tensor, whether its in training mode, dropout will + be ignored in non-training mode. + count: Int, how many dropout mask will be generated. It is useful for + cell that has internal weights fused together. + Returns: + List of mask tensor, generated or cached mask based on context. + """ + if self.dropout == 0: + return None + init_kwargs = dict(inputs=inputs, training=training, count=count) + return self._dropout_mask_cache.setdefault(kwargs=init_kwargs) + + def get_recurrent_dropout_mask_for_cell(self, inputs, training, count=1): + """Get the recurrent dropout mask for RNN cell. + + It will create mask based on context if there isn't any existing cached + mask. If a new mask is generated, it will update the cache in the cell. + + Args: + inputs: The input tensor whose shape will be used to generate dropout + mask. + training: Boolean tensor, whether its in training mode, dropout will + be ignored in non-training mode. + count: Int, how many dropout mask will be generated. It is useful for + cell that has internal weights fused together. + Returns: + List of mask tensor, generated or cached mask based on context. + """ + if self.recurrent_dropout == 0: + return None + init_kwargs = dict(inputs=inputs, training=training, count=count) + return self._recurrent_dropout_mask_cache.setdefault(kwargs=init_kwargs) + + def __getstate__(self): + # Used for deepcopy. The caching can't be pickled by python, since it + # will contain tensor and graph. + state = super().__getstate__() + state.pop("_dropout_mask_cache", None) + state.pop("_recurrent_dropout_mask_cache", None) + return state + + def __setstate__(self, state): + state["_dropout_mask_cache"] = backend.ContextValueCache( + self._create_dropout_mask + ) + state["_recurrent_dropout_mask_cache"] = backend.ContextValueCache( + self._create_recurrent_dropout_mask + ) + super().__setstate__(state) def _generate_dropout_mask(generator, ones, rate, training=None, count=1): - def dropped_inputs(): - return generator.dropout(ones, rate) - - if count > 1: - return [ - backend.in_train_phase(dropped_inputs, ones, training=training) - for _ in range(count) - ] - return backend.in_train_phase(dropped_inputs, ones, training=training) + def dropped_inputs(): + return generator.dropout(ones, rate) + + if count > 1: + return [ + backend.in_train_phase(dropped_inputs, ones, training=training) + for _ in range(count) + ] + return backend.in_train_phase(dropped_inputs, ones, training=training) diff --git a/keras/layers/rnn/gru.py b/keras/layers/rnn/gru.py index 99a172c9bc9f..855b2561c29a 100644 --- a/keras/layers/rnn/gru.py +++ b/keras/layers/rnn/gru.py @@ -13,10 +13,12 @@ # limitations under the License. # ============================================================================== """Gated Recurrent Unit layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import uuid +import tensorflow.compat.v2 as tf + from keras import activations from keras import backend from keras import constraints @@ -29,1121 +31,1270 @@ from keras.layers.rnn.base_rnn import RNN from keras.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export - RECURRENT_DROPOUT_WARNING_MSG = ( - 'RNN `implementation=2` is not supported when `recurrent_dropout` is set. ' - 'Using `implementation=1`.') + "RNN `implementation=2` is not supported when `recurrent_dropout` is set. " + "Using `implementation=1`." +) -@keras_export('keras.layers.GRUCell', v1=[]) +@keras_export("keras.layers.GRUCell", v1=[]) class GRUCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer): - """Cell class for the GRU layer. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - This class processes one step within the whole time sequence input, whereas - `tf.keras.layer.GRU` processes the whole sequence. - - For example: - - >>> inputs = tf.random.normal([32, 10, 8]) - >>> rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4)) - >>> output = rnn(inputs) - >>> print(output.shape) - (32, 4) - >>> rnn = tf.keras.layers.RNN( - ... tf.keras.layers.GRUCell(4), - ... return_sequences=True, - ... return_state=True) - >>> whole_sequence_output, final_state = rnn(inputs) - >>> print(whole_sequence_output.shape) - (32, 10, 4) - >>> print(final_state.shape) - (32, 4) - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. Default: hyperbolic tangent - (`tanh`). If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use for the recurrent step. - Default: sigmoid (`sigmoid`). If you pass `None`, no activation is - applied (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, (default `True`), whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. Default: - `glorot_uniform`. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, used for the linear transformation of the recurrent state. - Default: `orthogonal`. - bias_initializer: Initializer for the bias vector. Default: `zeros`. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. Default: `None`. - bias_regularizer: Regularizer function applied to the bias vector. Default: - `None`. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. Default: `None`. - bias_constraint: Constraint function applied to the bias vector. Default: - `None`. - dropout: Float between 0 and 1. Fraction of the units to drop for the - linear transformation of the inputs. Default: 0. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. Default: 0. - reset_after: GRU convention (whether to apply reset gate after or - before matrix multiplication). False = "before", - True = "after" (default and cuDNN compatible). - - Call arguments: - inputs: A 2D tensor, with shape of `[batch, feature]`. - states: A 2D tensor with shape of `[batch, units]`, which is the state from - the previous time step. For timestep 0, the initial state provided by user - will be feed to cell. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. Only relevant when `dropout` or - `recurrent_dropout` is used. - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - reset_after=True, - **kwargs): - if units < 0: - raise ValueError(f'Received an invalid value for argument `units`, ' - f'expected a positive integer, got {units}.') - # By default use cached variable under v2 mode, see b/143699808. - if tf.compat.v1.executing_eagerly_outside_functions(): - self._enable_caching_device = kwargs.pop('enable_caching_device', True) - else: - self._enable_caching_device = kwargs.pop('enable_caching_device', False) - super().__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - - implementation = kwargs.pop('implementation', 2) - if self.recurrent_dropout != 0 and implementation != 1: - logging.debug(RECURRENT_DROPOUT_WARNING_MSG) - self.implementation = 1 - else: - self.implementation = implementation - self.reset_after = reset_after - self.state_size = self.units - self.output_size = self.units - - @tf_utils.shape_type_conversion - def build(self, input_shape): - input_dim = input_shape[-1] - default_caching_device = rnn_utils.caching_device(self) - self.kernel = self.add_weight( - shape=(input_dim, self.units * 3), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - caching_device=default_caching_device) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 3), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint, - caching_device=default_caching_device) - - if self.use_bias: - if not self.reset_after: - bias_shape = (3 * self.units,) - else: - # separate biases for input and recurrent kernels - # Note: the shape is intentionally different from CuDNNGRU biases - # `(2 * 3 * self.units,)`, so that we can distinguish the classes - # when loading and converting saved weights. - bias_shape = (2, 3 * self.units) - self.bias = self.add_weight(shape=bias_shape, - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - caching_device=default_caching_device) - else: - self.bias = None - self.built = True - - def call(self, inputs, states, training=None): - h_tm1 = states[0] if tf.nest.is_nested( - states) else states # previous memory - - dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3) - rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( - h_tm1, training, count=3) - - if self.use_bias: - if not self.reset_after: - input_bias, recurrent_bias = self.bias, None - else: - input_bias, recurrent_bias = tf.unstack(self.bias) - - if self.implementation == 1: - if 0. < self.dropout < 1.: - inputs_z = inputs * dp_mask[0] - inputs_r = inputs * dp_mask[1] - inputs_h = inputs * dp_mask[2] - else: - inputs_z = inputs - inputs_r = inputs - inputs_h = inputs - - x_z = backend.dot(inputs_z, self.kernel[:, :self.units]) - x_r = backend.dot(inputs_r, self.kernel[:, self.units:self.units * 2]) - x_h = backend.dot(inputs_h, self.kernel[:, self.units * 2:]) - - if self.use_bias: - x_z = backend.bias_add(x_z, input_bias[:self.units]) - x_r = backend.bias_add(x_r, input_bias[self.units: self.units * 2]) - x_h = backend.bias_add(x_h, input_bias[self.units * 2:]) - - if 0. < self.recurrent_dropout < 1.: - h_tm1_z = h_tm1 * rec_dp_mask[0] - h_tm1_r = h_tm1 * rec_dp_mask[1] - h_tm1_h = h_tm1 * rec_dp_mask[2] - else: - h_tm1_z = h_tm1 - h_tm1_r = h_tm1 - h_tm1_h = h_tm1 - - recurrent_z = backend.dot(h_tm1_z, self.recurrent_kernel[:, :self.units]) - recurrent_r = backend.dot( - h_tm1_r, self.recurrent_kernel[:, self.units:self.units * 2]) - if self.reset_after and self.use_bias: - recurrent_z = backend.bias_add(recurrent_z, recurrent_bias[:self.units]) - recurrent_r = backend.bias_add( - recurrent_r, recurrent_bias[self.units:self.units * 2]) - - z = self.recurrent_activation(x_z + recurrent_z) - r = self.recurrent_activation(x_r + recurrent_r) - - # reset gate applied after/before matrix multiplication - if self.reset_after: - recurrent_h = backend.dot( - h_tm1_h, self.recurrent_kernel[:, self.units * 2:]) - if self.use_bias: - recurrent_h = backend.bias_add( - recurrent_h, recurrent_bias[self.units * 2:]) - recurrent_h = r * recurrent_h - else: - recurrent_h = backend.dot( - r * h_tm1_h, self.recurrent_kernel[:, self.units * 2:]) - - hh = self.activation(x_h + recurrent_h) - else: - if 0. < self.dropout < 1.: - inputs = inputs * dp_mask[0] - - # inputs projected by all gate matrices at once - matrix_x = backend.dot(inputs, self.kernel) - if self.use_bias: - # biases: bias_z_i, bias_r_i, bias_h_i - matrix_x = backend.bias_add(matrix_x, input_bias) - - x_z, x_r, x_h = tf.split(matrix_x, 3, axis=-1) + """Cell class for the GRU layer. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + This class processes one step within the whole time sequence input, whereas + `tf.keras.layer.GRU` processes the whole sequence. + + For example: + + >>> inputs = tf.random.normal([32, 10, 8]) + >>> rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4)) + >>> output = rnn(inputs) + >>> print(output.shape) + (32, 4) + >>> rnn = tf.keras.layers.RNN( + ... tf.keras.layers.GRUCell(4), + ... return_sequences=True, + ... return_state=True) + >>> whole_sequence_output, final_state = rnn(inputs) + >>> print(whole_sequence_output.shape) + (32, 10, 4) + >>> print(final_state.shape) + (32, 4) + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. Default: hyperbolic tangent + (`tanh`). If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use for the recurrent step. + Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + applied (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, (default `True`), whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. Default: + `glorot_uniform`. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, used for the linear transformation of the recurrent + state. Default: `orthogonal`. + bias_initializer: Initializer for the bias vector. Default: `zeros`. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_regularizer: Regularizer function applied to the bias vector. + Default: `None`. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_constraint: Constraint function applied to the bias vector. Default: + `None`. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. Default: 0. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. Default: 0. + reset_after: GRU convention (whether to apply reset gate after or + before matrix multiplication). False = "before", + True = "after" (default and cuDNN compatible). + + Call arguments: + inputs: A 2D tensor, with shape of `[batch, feature]`. + states: A 2D tensor with shape of `[batch, units]`, which is the state + from the previous time step. For timestep 0, the initial state provided + by user will be feed to cell. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. Only relevant when `dropout` or + `recurrent_dropout` is used. + """ + + def __init__( + self, + units, + activation="tanh", + recurrent_activation="sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + reset_after=True, + **kwargs, + ): + if units <= 0: + raise ValueError( + "Received an invalid value for argument `units`, " + f"expected a positive integer, got {units}." + ) + # By default use cached variable under v2 mode, see b/143699808. + if tf.compat.v1.executing_eagerly_outside_functions(): + self._enable_caching_device = kwargs.pop( + "enable_caching_device", True + ) + else: + self._enable_caching_device = kwargs.pop( + "enable_caching_device", False + ) + super().__init__(**kwargs) + self.units = units + self.activation = activations.get(activation) + self.recurrent_activation = activations.get(recurrent_activation) + self.use_bias = use_bias + + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) + + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + self.dropout = min(1.0, max(0.0, dropout)) + self.recurrent_dropout = min(1.0, max(0.0, recurrent_dropout)) + + implementation = kwargs.pop("implementation", 2) + if self.recurrent_dropout != 0 and implementation != 1: + logging.debug(RECURRENT_DROPOUT_WARNING_MSG) + self.implementation = 1 + else: + self.implementation = implementation + self.reset_after = reset_after + self.state_size = self.units + self.output_size = self.units + + @tf_utils.shape_type_conversion + def build(self, input_shape): + super().build(input_shape) + input_dim = input_shape[-1] + default_caching_device = rnn_utils.caching_device(self) + self.kernel = self.add_weight( + shape=(input_dim, self.units * 3), + name="kernel", + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + caching_device=default_caching_device, + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units * 3), + name="recurrent_kernel", + initializer=self.recurrent_initializer, + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint, + caching_device=default_caching_device, + ) - if self.reset_after: - # hidden state projected by all gate matrices at once - matrix_inner = backend.dot(h_tm1, self.recurrent_kernel) if self.use_bias: - matrix_inner = backend.bias_add(matrix_inner, recurrent_bias) - else: - # hidden state projected separately for update/reset and new - matrix_inner = backend.dot( - h_tm1, self.recurrent_kernel[:, :2 * self.units]) - - recurrent_z, recurrent_r, recurrent_h = tf.split( - matrix_inner, [self.units, self.units, -1], axis=-1) - - z = self.recurrent_activation(x_z + recurrent_z) - r = self.recurrent_activation(x_r + recurrent_r) - - if self.reset_after: - recurrent_h = r * recurrent_h - else: - recurrent_h = backend.dot( - r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) - - hh = self.activation(x_h + recurrent_h) - # previous and candidate state mixed by update gate - h = z * h_tm1 + (1 - z) * hh - new_state = [h] if tf.nest.is_nested(states) else h - return h, new_state - - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout, - 'implementation': self.implementation, - 'reset_after': self.reset_after - } - config.update(rnn_utils.config_for_enable_caching_device(self)) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + if not self.reset_after: + bias_shape = (3 * self.units,) + else: + # separate biases for input and recurrent kernels + # Note: the shape is intentionally different from CuDNNGRU + # biases `(2 * 3 * self.units,)`, so that we can distinguish the + # classes when loading and converting saved weights. + bias_shape = (2, 3 * self.units) + self.bias = self.add_weight( + shape=bias_shape, + name="bias", + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + caching_device=default_caching_device, + ) + else: + self.bias = None + self.built = True - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - return rnn_utils.generate_zero_filled_state_for_cell( - self, inputs, batch_size, dtype) + def call(self, inputs, states, training=None): + h_tm1 = ( + states[0] if tf.nest.is_nested(states) else states + ) # previous memory + dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3) + rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( + h_tm1, training, count=3 + ) -@keras_export('keras.layers.GRU', v1=[]) + if self.use_bias: + if not self.reset_after: + input_bias, recurrent_bias = self.bias, None + else: + input_bias, recurrent_bias = tf.unstack(self.bias) + + if self.implementation == 1: + if 0.0 < self.dropout < 1.0: + inputs_z = inputs * dp_mask[0] + inputs_r = inputs * dp_mask[1] + inputs_h = inputs * dp_mask[2] + else: + inputs_z = inputs + inputs_r = inputs + inputs_h = inputs + + x_z = backend.dot(inputs_z, self.kernel[:, : self.units]) + x_r = backend.dot( + inputs_r, self.kernel[:, self.units : self.units * 2] + ) + x_h = backend.dot(inputs_h, self.kernel[:, self.units * 2 :]) + + if self.use_bias: + x_z = backend.bias_add(x_z, input_bias[: self.units]) + x_r = backend.bias_add( + x_r, input_bias[self.units : self.units * 2] + ) + x_h = backend.bias_add(x_h, input_bias[self.units * 2 :]) + + if 0.0 < self.recurrent_dropout < 1.0: + h_tm1_z = h_tm1 * rec_dp_mask[0] + h_tm1_r = h_tm1 * rec_dp_mask[1] + h_tm1_h = h_tm1 * rec_dp_mask[2] + else: + h_tm1_z = h_tm1 + h_tm1_r = h_tm1 + h_tm1_h = h_tm1 + + recurrent_z = backend.dot( + h_tm1_z, self.recurrent_kernel[:, : self.units] + ) + recurrent_r = backend.dot( + h_tm1_r, self.recurrent_kernel[:, self.units : self.units * 2] + ) + if self.reset_after and self.use_bias: + recurrent_z = backend.bias_add( + recurrent_z, recurrent_bias[: self.units] + ) + recurrent_r = backend.bias_add( + recurrent_r, recurrent_bias[self.units : self.units * 2] + ) + + z = self.recurrent_activation(x_z + recurrent_z) + r = self.recurrent_activation(x_r + recurrent_r) + + # reset gate applied after/before matrix multiplication + if self.reset_after: + recurrent_h = backend.dot( + h_tm1_h, self.recurrent_kernel[:, self.units * 2 :] + ) + if self.use_bias: + recurrent_h = backend.bias_add( + recurrent_h, recurrent_bias[self.units * 2 :] + ) + recurrent_h = r * recurrent_h + else: + recurrent_h = backend.dot( + r * h_tm1_h, self.recurrent_kernel[:, self.units * 2 :] + ) + + hh = self.activation(x_h + recurrent_h) + else: + if 0.0 < self.dropout < 1.0: + inputs = inputs * dp_mask[0] + + # inputs projected by all gate matrices at once + matrix_x = backend.dot(inputs, self.kernel) + if self.use_bias: + # biases: bias_z_i, bias_r_i, bias_h_i + matrix_x = backend.bias_add(matrix_x, input_bias) + + x_z, x_r, x_h = tf.split(matrix_x, 3, axis=-1) + + if self.reset_after: + # hidden state projected by all gate matrices at once + matrix_inner = backend.dot(h_tm1, self.recurrent_kernel) + if self.use_bias: + matrix_inner = backend.bias_add( + matrix_inner, recurrent_bias + ) + else: + # hidden state projected separately for update/reset and new + matrix_inner = backend.dot( + h_tm1, self.recurrent_kernel[:, : 2 * self.units] + ) + + recurrent_z, recurrent_r, recurrent_h = tf.split( + matrix_inner, [self.units, self.units, -1], axis=-1 + ) + + z = self.recurrent_activation(x_z + recurrent_z) + r = self.recurrent_activation(x_r + recurrent_r) + + if self.reset_after: + recurrent_h = r * recurrent_h + else: + recurrent_h = backend.dot( + r * h_tm1, self.recurrent_kernel[:, 2 * self.units :] + ) + + hh = self.activation(x_h + recurrent_h) + # previous and candidate state mixed by update gate + h = z * h_tm1 + (1 - z) * hh + new_state = [h] if tf.nest.is_nested(states) else h + return h, new_state + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + "implementation": self.implementation, + "reset_after": self.reset_after, + } + config.update(rnn_utils.config_for_enable_caching_device(self)) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def get_initial_state(self, inputs=None, batch_size=None, dtype=None): + return rnn_utils.generate_zero_filled_state_for_cell( + self, inputs, batch_size, dtype + ) + + +@keras_export("keras.layers.GRU", v1=[]) class GRU(DropoutRNNCellMixin, RNN, base_layer.BaseRandomLayer): - """Gated Recurrent Unit - Cho et al. 2014. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - Based on available runtime hardware and constraints, this layer - will choose different implementations (cuDNN-based or pure-TensorFlow) - to maximize the performance. If a GPU is available and all - the arguments to the layer meet the requirement of the cuDNN kernel - (see below for details), the layer will use a fast cuDNN implementation. - - The requirements to use the cuDNN implementation are: - - 1. `activation` == `tanh` - 2. `recurrent_activation` == `sigmoid` - 3. `recurrent_dropout` == 0 - 4. `unroll` is `False` - 5. `use_bias` is `True` - 6. `reset_after` is `True` - 7. Inputs, if use masking, are strictly right-padded. - 8. Eager execution is enabled in the outermost context. - - There are two variants of the GRU implementation. The default one is based on - [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to hidden - state before matrix multiplication. The other one is based on - [original](https://arxiv.org/abs/1406.1078v1) and has the order reversed. - - The second variant is compatible with CuDNNGRU (GPU-only) and allows - inference on CPU. Thus it has separate biases for `kernel` and - `recurrent_kernel`. To use this variant, set `reset_after=True` and - `recurrent_activation='sigmoid'`. - - For example: - - >>> inputs = tf.random.normal([32, 10, 8]) - >>> gru = tf.keras.layers.GRU(4) - >>> output = gru(inputs) - >>> print(output.shape) - (32, 4) - >>> gru = tf.keras.layers.GRU(4, return_sequences=True, return_state=True) - >>> whole_sequence_output, final_state = gru(inputs) - >>> print(whole_sequence_output.shape) - (32, 10, 4) - >>> print(final_state.shape) - (32, 4) - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. - Default: sigmoid (`sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, (default `True`), whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. Default: - `glorot_uniform`. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, used for the linear transformation of the recurrent - state. Default: `orthogonal`. - bias_initializer: Initializer for the bias vector. Default: `zeros`. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. Default: `None`. - bias_regularizer: Regularizer function applied to the bias vector. Default: - `None`. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). Default: `None`. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. Default: `None`. - bias_constraint: Constraint function applied to the bias vector. Default: - `None`. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. Default: 0. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. Default: 0. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. Default: `False`. - return_state: Boolean. Whether to return the last state in addition to the - output. Default: `False`. - go_backwards: Boolean (default `False`). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - time_major: The shape format of the `inputs` and `outputs` tensors. - If True, the inputs and outputs will be in shape - `[timesteps, batch, feature]`, whereas in the False case, it will be - `[batch, timesteps, feature]`. Using `time_major = True` is a bit more - efficient because it avoids transposes at the beginning and end of the - RNN calculation. However, most TensorFlow data is batch-major, so by - default this function accepts input and emits output in batch-major - form. - reset_after: GRU convention (whether to apply reset gate after or - before matrix multiplication). False = "before", - True = "after" (default and cuDNN compatible). - - Call arguments: - inputs: A 3D tensor, with shape `[batch, timesteps, feature]`. - mask: Binary tensor of shape `[samples, timesteps]` indicating whether - a given timestep should be masked (optional, defaults to `None`). - An individual `True` entry indicates that the corresponding timestep - should be utilized, while a `False` entry indicates that the - corresponding timestep should be ignored. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or - `recurrent_dropout` is used (optional, defaults to `None`). - initial_state: List of initial state tensors to be passed to the first - call of the cell (optional, defaults to `None` which causes creation - of zero-filled initial state tensors). - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - time_major=False, - reset_after=True, - **kwargs): - # return_runtime is a flag for testing, which shows the real backend - # implementation chosen by grappler in graph mode. - self._return_runtime = kwargs.pop('return_runtime', False) - implementation = kwargs.pop('implementation', 2) - if implementation == 0: - logging.warning('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=2`.' - 'Please update your layer call.') - if 'enable_caching_device' in kwargs: - cell_kwargs = {'enable_caching_device': - kwargs.pop('enable_caching_device')} - else: - cell_kwargs = {} - cell = GRUCell( + """Gated Recurrent Unit - Cho et al. 2014. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + Based on available runtime hardware and constraints, this layer + will choose different implementations (cuDNN-based or pure-TensorFlow) + to maximize the performance. If a GPU is available and all + the arguments to the layer meet the requirement of the cuDNN kernel + (see below for details), the layer will use a fast cuDNN implementation. + + The requirements to use the cuDNN implementation are: + + 1. `activation` == `tanh` + 2. `recurrent_activation` == `sigmoid` + 3. `recurrent_dropout` == 0 + 4. `unroll` is `False` + 5. `use_bias` is `True` + 6. `reset_after` is `True` + 7. Inputs, if use masking, are strictly right-padded. + 8. Eager execution is enabled in the outermost context. + + There are two variants of the GRU implementation. The default one is based + on [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to + hidden state before matrix multiplication. The other one is based on + [original](https://arxiv.org/abs/1406.1078v1) and has the order reversed. + + The second variant is compatible with CuDNNGRU (GPU-only) and allows + inference on CPU. Thus it has separate biases for `kernel` and + `recurrent_kernel`. To use this variant, set `reset_after=True` and + `recurrent_activation='sigmoid'`. + + For example: + + >>> inputs = tf.random.normal([32, 10, 8]) + >>> gru = tf.keras.layers.GRU(4) + >>> output = gru(inputs) + >>> print(output.shape) + (32, 4) + >>> gru = tf.keras.layers.GRU(4, return_sequences=True, return_state=True) + >>> whole_sequence_output, final_state = gru(inputs) + >>> print(whole_sequence_output.shape) + (32, 10, 4) + >>> print(final_state.shape) + (32, 4) + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. + Default: sigmoid (`sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, (default `True`), whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. Default: + `glorot_uniform`. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, used for the linear transformation of the recurrent + state. Default: `orthogonal`. + bias_initializer: Initializer for the bias vector. Default: `zeros`. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_regularizer: Regularizer function applied to the bias vector. + Default: `None`. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). Default: `None`. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_constraint: Constraint function applied to the bias vector. Default: + `None`. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. Default: 0. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. Default: 0. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. Default: `False`. + return_state: Boolean. Whether to return the last state in addition to the + output. Default: `False`. + go_backwards: Boolean (default `False`). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + time_major: The shape format of the `inputs` and `outputs` tensors. + If True, the inputs and outputs will be in shape + `[timesteps, batch, feature]`, whereas in the False case, it will be + `[batch, timesteps, feature]`. Using `time_major = True` is a bit more + efficient because it avoids transposes at the beginning and end of the + RNN calculation. However, most TensorFlow data is batch-major, so by + default this function accepts input and emits output in batch-major + form. + reset_after: GRU convention (whether to apply reset gate after or + before matrix multiplication). False = "before", + True = "after" (default and cuDNN compatible). + + Call arguments: + inputs: A 3D tensor, with shape `[batch, timesteps, feature]`. + mask: Binary tensor of shape `[samples, timesteps]` indicating whether + a given timestep should be masked (optional). + An individual `True` entry indicates that the corresponding timestep + should be utilized, while a `False` entry indicates that the + corresponding timestep should be ignored. Defaults to `None`. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` is used (optional). Defaults to `None`. + initial_state: List of initial state tensors to be passed to the first + call of the cell (optional, `None` causes creation + of zero-filled initial state tensors). Defaults to `None`. + """ + + def __init__( + self, units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation, - reset_after=reset_after, - dtype=kwargs.get('dtype'), - trainable=kwargs.get('trainable', True), - **cell_kwargs) - super().__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - time_major=time_major, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.input_spec = [InputSpec(ndim=3)] - - # GPU kernel uses following setting by default and not configurable. - self._could_use_gpu_kernel = ( - self.activation in (activations.tanh, tf.tanh) and - self.recurrent_activation in (activations.sigmoid, tf.sigmoid) and - recurrent_dropout == 0 and not unroll and use_bias and - reset_after and tf.compat.v1.executing_eagerly_outside_functions()) - if tf.config.list_logical_devices('GPU'): - # Only show the message when there is GPU available, user will not care - # about the cuDNN if there isn't any GPU. - if self._could_use_gpu_kernel: - logging.debug(gru_lstm_utils.CUDNN_AVAILABLE_MSG % self.name) - else: - logging.warning(gru_lstm_utils.CUDNN_NOT_AVAILABLE_MSG % self.name) - - if gru_lstm_utils.use_new_gru_lstm_impl(): - self._defun_wrapper = gru_lstm_utils.DefunWrapper( - time_major, go_backwards, 'gru') + activation="tanh", + recurrent_activation="sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + time_major=False, + reset_after=True, + **kwargs, + ): + # return_runtime is a flag for testing, which shows the real backend + # implementation chosen by grappler in graph mode. + self._return_runtime = kwargs.pop("return_runtime", False) + implementation = kwargs.pop("implementation", 2) + if implementation == 0: + logging.warning( + "`implementation=0` has been deprecated, " + "and now defaults to `implementation=2`." + "Please update your layer call." + ) + if "enable_caching_device" in kwargs: + cell_kwargs = { + "enable_caching_device": kwargs.pop("enable_caching_device") + } + else: + cell_kwargs = {} + cell = GRUCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation, + reset_after=reset_after, + dtype=kwargs.get("dtype"), + trainable=kwargs.get("trainable", True), + name="gru_cell", + **cell_kwargs, + ) + super().__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + time_major=time_major, + **kwargs, + ) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.input_spec = [InputSpec(ndim=3)] + + # GPU kernel uses following setting by default and not configurable. + self._could_use_gpu_kernel = ( + self.activation in (activations.tanh, tf.tanh) + and self.recurrent_activation in (activations.sigmoid, tf.sigmoid) + and recurrent_dropout == 0 + and not unroll + and use_bias + and reset_after + and tf.compat.v1.executing_eagerly_outside_functions() + ) + if tf.config.list_logical_devices("GPU"): + # Only show the message when there is GPU available, user will not + # care about the cuDNN if there isn't any GPU. + if self._could_use_gpu_kernel: + logging.debug(gru_lstm_utils.CUDNN_AVAILABLE_MSG % self.name) + else: + logging.warning( + gru_lstm_utils.CUDNN_NOT_AVAILABLE_MSG % self.name + ) + + if gru_lstm_utils.use_new_gru_lstm_impl(): + self._defun_wrapper = gru_lstm_utils.DefunWrapper( + time_major, go_backwards, "gru" + ) + + def call(self, inputs, mask=None, training=None, initial_state=None): + # The input should be dense, padded with zeros. If a ragged input is fed + # into the layer, it is padded and the row lengths are used for masking. + inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) + is_ragged_input = row_lengths is not None + self._validate_args_if_ragged(is_ragged_input, mask) + + # GRU does not support constants. Ignore it during process. + inputs, initial_state, _ = self._process_inputs( + inputs, initial_state, None + ) + + if isinstance(mask, list): + mask = mask[0] + + input_shape = backend.int_shape(inputs) + timesteps = input_shape[0] if self.time_major else input_shape[1] + + if not self._could_use_gpu_kernel: + kwargs = {"training": training} + self._maybe_reset_cell_dropout_mask(self.cell) + + def step(cell_inputs, cell_states): + return self.cell(cell_inputs, cell_states, **kwargs) + + last_output, outputs, states = backend.rnn( + step, + inputs, + initial_state, + constants=None, + go_backwards=self.go_backwards, + mask=mask, + unroll=self.unroll, + input_length=row_lengths + if row_lengths is not None + else timesteps, + time_major=self.time_major, + zero_output_for_mask=self.zero_output_for_mask, + return_all_outputs=self.return_sequences, + ) + # This is a dummy tensor for testing purpose. + runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) + else: + last_output, outputs, runtime, states = self._defun_gru_call( + inputs, initial_state, training, mask, row_lengths + ) + + if self.stateful: + updates = [ + tf.compat.v1.assign( + self.states[0], tf.cast(states[0], self.states[0].dtype) + ) + ] + self.add_update(updates) + + if self.return_sequences: + output = backend.maybe_convert_to_ragged( + is_ragged_input, + outputs, + row_lengths, + go_backwards=self.go_backwards, + ) + else: + output = last_output - def call(self, inputs, mask=None, training=None, initial_state=None): - # The input should be dense, padded with zeros. If a ragged input is fed - # into the layer, it is padded and the row lengths are used for masking. - inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) - is_ragged_input = (row_lengths is not None) - self._validate_args_if_ragged(is_ragged_input, mask) + if self.return_state: + return [output] + list(states) + elif self._return_runtime: + return output, runtime + else: + return output + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + + @property + def reset_after(self): + return self.cell.reset_after + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + "implementation": self.implementation, + "reset_after": self.reset_after, + } + config.update(rnn_utils.config_for_enable_caching_device(self.cell)) + base_config = super().get_config() + del base_config["cell"] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if "implementation" in config and config["implementation"] == 0: + config["implementation"] = 1 + return cls(**config) + + def _defun_gru_call( + self, inputs, initial_state, training, mask, sequence_lengths + ): + # Use the new defun approach for backend implementation swap. + # Note that different implementations need to have same function + # signature, eg, the tensor parameters need to have same shape and + # dtypes. + + self.reset_dropout_mask() + dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=3) + if dropout_mask is not None: + inputs = inputs * dropout_mask[0] + + if gru_lstm_utils.use_new_gru_lstm_impl(): + gru_kwargs = { + "inputs": inputs, + "init_h": gru_lstm_utils.read_variable_value(initial_state[0]), + "kernel": gru_lstm_utils.read_variable_value(self.cell.kernel), + "recurrent_kernel": gru_lstm_utils.read_variable_value( + self.cell.recurrent_kernel + ), + "bias": gru_lstm_utils.read_variable_value(self.cell.bias), + "mask": mask, + "time_major": self.time_major, + "go_backwards": self.go_backwards, + "sequence_lengths": sequence_lengths, + "zero_output_for_mask": self.zero_output_for_mask, + } + ( + last_output, + outputs, + new_h, + runtime, + ) = self._defun_wrapper.defun_layer(**gru_kwargs) + else: + gpu_gru_kwargs = { + "inputs": inputs, + "init_h": gru_lstm_utils.read_variable_value(initial_state[0]), + "kernel": gru_lstm_utils.read_variable_value(self.cell.kernel), + "recurrent_kernel": gru_lstm_utils.read_variable_value( + self.cell.recurrent_kernel + ), + "bias": gru_lstm_utils.read_variable_value(self.cell.bias), + "mask": mask, + "time_major": self.time_major, + "go_backwards": self.go_backwards, + "sequence_lengths": sequence_lengths, + "return_sequences": self.return_sequences, + } + normal_gru_kwargs = gpu_gru_kwargs.copy() + normal_gru_kwargs.update( + { + "zero_output_for_mask": self.zero_output_for_mask, + } + ) + + if tf.executing_eagerly(): + device_type = gru_lstm_utils.get_context_device_type() + can_use_gpu = ( + # Either user specified GPU or unspecified but GPU is + # available. + ( + device_type == gru_lstm_utils.GPU_DEVICE_NAME + or ( + device_type is None + and tf.config.list_logical_devices("GPU") + ) + ) + and ( + gru_lstm_utils.is_cudnn_supported_inputs( + mask, self.time_major, sequence_lengths + ) + ) + ) + # Under eager context, check the device placement and prefer the + if can_use_gpu: + last_output, outputs, new_h, runtime = gpu_gru( + **gpu_gru_kwargs + ) + else: + last_output, outputs, new_h, runtime = standard_gru( + **normal_gru_kwargs + ) + else: + ( + last_output, + outputs, + new_h, + runtime, + ) = gru_with_backend_selection(**normal_gru_kwargs) + + states = [new_h] + return last_output, outputs, runtime, states + + +def standard_gru( + inputs, + init_h, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + zero_output_for_mask, + return_sequences, +): + """GRU with standard kernel implementation. + + This implementation can be run on all types of hardware. + + This implementation lifts out all the layer weights and make them function + parameters. It has same number of tensor input params as the cuDNN + counterpart. The RNN step logic has been simplified, eg dropout and mask is + removed since cuDNN implementation does not support that. + + Args: + inputs: Input tensor of GRU layer. + init_h: Initial state tensor for the cell output. + kernel: Weights for cell kernel. + recurrent_kernel: Weights for cell recurrent kernel. + bias: Weights for cell kernel bias and recurrent bias. The bias contains + the combined input_bias and recurrent_bias. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether + a given timestep should be masked. An individual `True` entry indicates + that the corresponding timestep should be utilized, while a `False` + entry indicates that the corresponding timestep should be ignored. + time_major: Boolean, whether the inputs are in the format of + [time, batch, feature] or [batch, time, feature]. + go_backwards: Boolean (default False). If True, process the input sequence + backwards and return the reversed sequence. + sequence_lengths: The lengths of all sequences coming from a variable + length input, such as ragged tensors. If the input has a fixed timestep + size, this should be None. + zero_output_for_mask: Boolean, whether to output zero for masked timestep. + return_sequences: Boolean. If True, return the recurrent outputs for all + timesteps in the sequence. If False, only return the output for the + last timestep (which consumes less memory). + + Returns: + last_output: output tensor for the last timestep, which has shape + [batch, units]. + outputs: + - If `return_sequences=True`: output tensor for all timesteps, + which has shape [batch, time, units]. + - Else, a tensor equal to `last_output` with shape [batch, 1, units] + state_0: the cell output, which has same shape as init_h. + runtime: constant string tensor which indicate real runtime hardware. This + value is for testing purpose and should be used by user. + """ + input_shape = backend.int_shape(inputs) + timesteps = input_shape[0] if time_major else input_shape[1] - # GRU does not support constants. Ignore it during process. - inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None) + input_bias, recurrent_bias = tf.unstack(bias) - if isinstance(mask, list): - mask = mask[0] + def step(cell_inputs, cell_states): + """Step function that will be used by Keras RNN backend.""" + h_tm1 = cell_states[0] - input_shape = backend.int_shape(inputs) - timesteps = input_shape[0] if self.time_major else input_shape[1] - - if not self._could_use_gpu_kernel: - kwargs = {'training': training} - self._maybe_reset_cell_dropout_mask(self.cell) - - def step(cell_inputs, cell_states): - return self.cell(cell_inputs, cell_states, **kwargs) - - last_output, outputs, states = backend.rnn( - step, - inputs, - initial_state, - constants=None, - go_backwards=self.go_backwards, - mask=mask, - unroll=self.unroll, - input_length=row_lengths if row_lengths is not None else timesteps, - time_major=self.time_major, - zero_output_for_mask=self.zero_output_for_mask, - return_all_outputs=self.return_sequences) - # This is a dummy tensor for testing purpose. - runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) - else: - last_output, outputs, runtime, states = self._defun_gru_call( - inputs, initial_state, training, mask, row_lengths) + # inputs projected by all gate matrices at once + matrix_x = backend.dot(cell_inputs, kernel) + matrix_x = backend.bias_add(matrix_x, input_bias) - if self.stateful: - updates = [tf.compat.v1.assign(self.states[0], - tf.cast(states[0], self.states[0].dtype))] - self.add_update(updates) + x_z, x_r, x_h = tf.split(matrix_x, 3, axis=1) - if self.return_sequences: - output = backend.maybe_convert_to_ragged( - is_ragged_input, outputs, row_lengths, go_backwards=self.go_backwards) + # hidden state projected by all gate matrices at once + matrix_inner = backend.dot(h_tm1, recurrent_kernel) + matrix_inner = backend.bias_add(matrix_inner, recurrent_bias) + + recurrent_z, recurrent_r, recurrent_h = tf.split( + matrix_inner, 3, axis=1 + ) + z = tf.sigmoid(x_z + recurrent_z) + r = tf.sigmoid(x_r + recurrent_r) + hh = tf.tanh(x_h + r * recurrent_h) + + # previous and candidate state mixed by update gate + h = z * h_tm1 + (1 - z) * hh + return h, [h] + + last_output, outputs, new_states = backend.rnn( + step, + inputs, + [init_h], + constants=None, + unroll=False, + time_major=time_major, + mask=mask, + go_backwards=go_backwards, + input_length=sequence_lengths + if sequence_lengths is not None + else timesteps, + zero_output_for_mask=zero_output_for_mask, + return_all_outputs=return_sequences, + ) + return ( + last_output, + outputs, + new_states[0], + gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_CPU), + ) + + +def gpu_gru( + inputs, + init_h, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + return_sequences, +): + """GRU with cuDNN implementation which is only available for GPU.""" + if mask is not None: + sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask( + mask, time_major + ) + + if not time_major and sequence_lengths is None: + inputs = tf.transpose(inputs, perm=(1, 0, 2)) + seq_axis, batch_axis = (0, 1) else: - output = last_output - - if self.return_state: - return [output] + list(states) - elif self._return_runtime: - return output, runtime + seq_axis, batch_axis = (0, 1) if time_major else (1, 0) + # For init_h, cuDNN expects one more dim of num_layers before or after batch + # dim for time major or batch major inputs respectively + init_h = tf.expand_dims(init_h, axis=seq_axis) + + weights = tf.split(kernel, 3, axis=1) + weights += tf.split(recurrent_kernel, 3, axis=1) + # Note that the bias was initialized as shape (2, 3 * units), flat it into + # (6 * units) + bias = tf.split(backend.flatten(bias), 6) + + if tf.sysconfig.get_build_info()["is_cuda_build"]: + # Note that the gate order for cuDNN is different from the canonical + # format. canonical format is [z, r, h], whereas cuDNN is [r, z, h]. + # The swap need to be done for kernel, recurrent_kernel, input_bias, + # recurrent_bias. + # z is update gate weights. + # r is reset gate weights. + # h is output gate weights. + weights[0], weights[1] = weights[1], weights[0] + weights[3], weights[4] = weights[4], weights[3] + bias[0], bias[1] = bias[1], bias[0] + bias[3], bias[4] = bias[4], bias[3] + + params = gru_lstm_utils.canonical_to_params( + weights=weights, + biases=bias, + shape=tf.constant([-1]), + transpose_weights=True, + ) + + if sequence_lengths is not None: + if go_backwards: + # Three reversals are required. E.g., + # normal input = [1, 2, 3, 0, 0] # where 0 need to be masked + # reversed_input_to_cudnn = [3, 2, 1, 0, 0] + # output_from_cudnn = [6, 5, 4, 0, 0] + # expected_output = [0, 0, 6, 5 ,4] + inputs = tf.reverse_sequence( + inputs, + sequence_lengths, + seq_axis=seq_axis, + batch_axis=batch_axis, + ) + outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3( + input=inputs, + input_h=init_h, + input_c=0, + params=params, + is_training=True, + rnn_mode="gru", + sequence_lengths=sequence_lengths, + time_major=time_major, + ) + if go_backwards: + outputs = tf.reverse_sequence( + outputs, + sequence_lengths, + seq_axis=seq_axis, + batch_axis=batch_axis, + ) + outputs = tf.reverse(outputs, axis=[seq_axis]) else: - return output - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - - @property - def reset_after(self): - return self.cell.reset_after - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation, - 'reset_after': - self.reset_after + if go_backwards: + # Reverse axis 0 since the input is already convert to time major. + inputs = tf.reverse(inputs, axis=[0]) + outputs, h, _, _ = tf.raw_ops.CudnnRNN( + input=inputs, + input_h=init_h, + input_c=0, + params=params, + is_training=True, + rnn_mode="gru", + ) + + last_output = outputs[-1] + if not time_major and sequence_lengths is None and return_sequences: + outputs = tf.transpose(outputs, perm=[1, 0, 2]) + h = tf.squeeze(h, axis=seq_axis) + + # In the case of variable length input, the cudnn kernel will fill zeros for + # the output, whereas the default keras behavior is to bring over the + # previous output for t-1, so that in the return_sequence=False case, user + # can quickly get the final effect output instead just 0s at the last + # timestep. In order to mimic the default keras behavior, we copy the final + # h state as the last_output, since it is numerically same as the output. + if sequence_lengths is not None: + last_output = h + + # Match CPU return format + if not return_sequences: + outputs = tf.expand_dims(last_output, axis=0 if time_major else 1) + + return ( + last_output, + outputs, + h, + gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_GPU), + ) + + +def gru_with_backend_selection( + inputs, + init_h, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + zero_output_for_mask, + return_sequences, +): + """Call the GRU with optimized backend kernel selection. + + Under the hood, this function will create two TF function, one with the most + generic kernel and can run on all device condition, and the second one with + cuDNN specific kernel, which can only run on GPU. + + The first function will be called with normal_lstm_params, while the second + function is not called, but only registered in the graph. The Grappler will + do the proper graph rewrite and swap the optimized TF function based on the + device placement. + + Args: + inputs: Input tensor of GRU layer. + init_h: Initial state tensor for the cell output. + kernel: Weights for cell kernel. + recurrent_kernel: Weights for cell recurrent kernel. + bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias + is used in this case. + mask: Boolean tensor for mask out the steps within sequence. + An individual `True` entry indicates that the corresponding timestep + should be utilized, while a `False` entry indicates that the + corresponding timestep should be ignored. + time_major: Boolean, whether the inputs are in the format of + [time, batch, feature] or [batch, time, feature]. + go_backwards: Boolean (default False). If True, process the input sequence + backwards and return the reversed sequence. + sequence_lengths: The lengths of all sequences coming from a variable + length input, such as ragged tensors. If the input has a fixed timestep + size, this should be None. + zero_output_for_mask: Boolean, whether to output zero for masked timestep. + return_sequences: Boolean. If True, return the recurrent outputs for all + timesteps in the sequence. If False, only return the output for the + last timestep (which consumes less memory). + + Returns: + List of output tensors, same as standard_gru. + """ + params = { + "inputs": inputs, + "init_h": init_h, + "kernel": kernel, + "recurrent_kernel": recurrent_kernel, + "bias": bias, + "mask": mask, + "time_major": time_major, + "go_backwards": go_backwards, + "sequence_lengths": sequence_lengths, + "zero_output_for_mask": zero_output_for_mask, + "return_sequences": return_sequences, } - config.update(rnn_utils.config_for_enable_caching_device(self.cell)) - base_config = super().get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) - - def _defun_gru_call(self, inputs, initial_state, training, mask, - sequence_lengths): - # Use the new defun approach for backend implementation swap. - # Note that different implementations need to have same function - # signature, eg, the tensor parameters need to have same shape and dtypes. - - self.reset_dropout_mask() - dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=3) - if dropout_mask is not None: - inputs = inputs * dropout_mask[0] + + def gpu_gru_with_fallback( + inputs, + init_h, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + zero_output_for_mask, + return_sequences, + ): + """Use cuDNN kernel when mask is none or strictly right padded.""" + + def cudnn_gru_fn(): + return gpu_gru( + inputs=inputs, + init_h=init_h, + kernel=kernel, + recurrent_kernel=recurrent_kernel, + bias=bias, + mask=mask, + time_major=time_major, + go_backwards=go_backwards, + sequence_lengths=sequence_lengths, + return_sequences=return_sequences, + ) + + def standard_gru_fn(): + return standard_gru( + inputs=inputs, + init_h=init_h, + kernel=kernel, + recurrent_kernel=recurrent_kernel, + bias=bias, + mask=mask, + time_major=time_major, + go_backwards=go_backwards, + sequence_lengths=sequence_lengths, + zero_output_for_mask=zero_output_for_mask, + return_sequences=return_sequences, + ) + + return tf.__internal__.smart_cond.smart_cond( + gru_lstm_utils.is_cudnn_supported_inputs( + mask, time_major, sequence_lengths + ), + true_fn=cudnn_gru_fn, + false_fn=standard_gru_fn, + ) if gru_lstm_utils.use_new_gru_lstm_impl(): - gru_kwargs = { - 'inputs': - inputs, - 'init_h': - gru_lstm_utils.read_variable_value(initial_state[0]), - 'kernel': - gru_lstm_utils.read_variable_value(self.cell.kernel), - 'recurrent_kernel': - gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel), - 'bias': - gru_lstm_utils.read_variable_value(self.cell.bias), - 'mask': - mask, - 'time_major': - self.time_major, - 'go_backwards': - self.go_backwards, - 'sequence_lengths': - sequence_lengths, - 'zero_output_for_mask': - self.zero_output_for_mask - } - (last_output, outputs, new_h, - runtime) = self._defun_wrapper.defun_layer(**gru_kwargs) + # Chooses the implementation dynamically based on the running device. + ( + last_output, + outputs, + new_h, + runtime, + ) = tf.__internal__.execute_fn_for_device( + { + gru_lstm_utils.CPU_DEVICE_NAME: lambda: standard_gru(**params), + gru_lstm_utils.GPU_DEVICE_NAME: lambda: gpu_gru_with_fallback( + **params + ), + }, + lambda: standard_gru(**params), + ) else: - gpu_gru_kwargs = { - 'inputs': - inputs, - 'init_h': - gru_lstm_utils.read_variable_value(initial_state[0]), - 'kernel': - gru_lstm_utils.read_variable_value(self.cell.kernel), - 'recurrent_kernel': - gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel), - 'bias': - gru_lstm_utils.read_variable_value(self.cell.bias), - 'mask': - mask, - 'time_major': - self.time_major, - 'go_backwards': - self.go_backwards, - 'sequence_lengths': - sequence_lengths, - 'return_sequences': - self.return_sequences - } - normal_gru_kwargs = gpu_gru_kwargs.copy() - normal_gru_kwargs.update({ - 'zero_output_for_mask': self.zero_output_for_mask, - }) - - if tf.executing_eagerly(): - device_type = gru_lstm_utils.get_context_device_type() - can_use_gpu = ( - # Either user specified GPU or unspecified but GPU is available. - (device_type == gru_lstm_utils.GPU_DEVICE_NAME or - (device_type is None and tf.config.list_logical_devices('GPU'))) - and - (mask is None or - gru_lstm_utils.is_cudnn_supported_inputs(mask, self.time_major))) - # Under eager context, check the device placement and prefer the - if can_use_gpu: - last_output, outputs, new_h, runtime = gpu_gru(**gpu_gru_kwargs) - else: - last_output, outputs, new_h, runtime = standard_gru( - **normal_gru_kwargs) - else: - last_output, outputs, new_h, runtime = gru_with_backend_selection( - **normal_gru_kwargs) - - states = [new_h] - return last_output, outputs, runtime, states - - -def standard_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, - time_major, go_backwards, sequence_lengths, - zero_output_for_mask, return_sequences): - """GRU with standard kernel implementation. - - This implementation can be run on all types of hardware. - - This implementation lifts out all the layer weights and make them function - parameters. It has same number of tensor input params as the cuDNN - counterpart. The RNN step logic has been simplified, eg dropout and mask is - removed since cuDNN implementation does not support that. - - Args: - inputs: Input tensor of GRU layer. - init_h: Initial state tensor for the cell output. - kernel: Weights for cell kernel. - recurrent_kernel: Weights for cell recurrent kernel. - bias: Weights for cell kernel bias and recurrent bias. The bias contains the - combined input_bias and recurrent_bias. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether - a given timestep should be masked. An individual `True` entry indicates - that the corresponding timestep should be utilized, while a `False` entry - indicates that the corresponding timestep should be ignored. - time_major: Boolean, whether the inputs are in the format of - [time, batch, feature] or [batch, time, feature]. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - sequence_lengths: The lengths of all sequences coming from a variable length - input, such as ragged tensors. If the input has a fixed timestep size, - this should be None. - zero_output_for_mask: Boolean, whether to output zero for masked timestep. - return_sequences: Boolean. If True, return the recurrent outputs for all - timesteps in the sequence. If False, only return the output for the - last timestep (which consumes less memory). - - Returns: - last_output: output tensor for the last timestep, which has shape - [batch, units]. - outputs: - - If `return_sequences=True`: output tensor for all timesteps, - which has shape [batch, time, units]. - - Else, a tensor equal to `last_output` with shape [batch, 1, units] - state_0: the cell output, which has same shape as init_h. - runtime: constant string tensor which indicate real runtime hardware. This - value is for testing purpose and should be used by user. - """ - input_shape = backend.int_shape(inputs) - timesteps = input_shape[0] if time_major else input_shape[1] - - input_bias, recurrent_bias = tf.unstack(bias) - - def step(cell_inputs, cell_states): - """Step function that will be used by Keras RNN backend.""" - h_tm1 = cell_states[0] - - # inputs projected by all gate matrices at once - matrix_x = backend.dot(cell_inputs, kernel) - matrix_x = backend.bias_add(matrix_x, input_bias) - - x_z, x_r, x_h = tf.split(matrix_x, 3, axis=1) - - # hidden state projected by all gate matrices at once - matrix_inner = backend.dot(h_tm1, recurrent_kernel) - matrix_inner = backend.bias_add(matrix_inner, recurrent_bias) - - recurrent_z, recurrent_r, recurrent_h = tf.split(matrix_inner, 3, axis=1) - z = tf.sigmoid(x_z + recurrent_z) - r = tf.sigmoid(x_r + recurrent_r) - hh = tf.tanh(x_h + r * recurrent_h) - - # previous and candidate state mixed by update gate - h = z * h_tm1 + (1 - z) * hh - return h, [h] - - last_output, outputs, new_states = backend.rnn( - step, - inputs, [init_h], - constants=None, - unroll=False, - time_major=time_major, - mask=mask, - go_backwards=go_backwards, - input_length=sequence_lengths - if sequence_lengths is not None else timesteps, - zero_output_for_mask=zero_output_for_mask, - return_all_outputs=return_sequences) - return last_output, outputs, new_states[0], gru_lstm_utils.runtime( - gru_lstm_utils.RUNTIME_CPU) - - -def gpu_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major, - go_backwards, sequence_lengths, return_sequences): - """GRU with cuDNN implementation which is only available for GPU.""" - if mask is not None: - sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask( - mask, time_major) - - if not time_major and sequence_lengths is None: - inputs = tf.transpose(inputs, perm=(1, 0, 2)) - seq_axis, batch_axis = (0, 1) - else: - seq_axis, batch_axis = (0, 1) if time_major else (1, 0) - # For init_h, cuDNN expects one more dim of num_layers before or after batch - # dim for time major or batch major inputs respectively - init_h = tf.expand_dims(init_h, axis=seq_axis) - - weights = tf.split(kernel, 3, axis=1) - weights += tf.split(recurrent_kernel, 3, axis=1) - # Note that the bias was initialized as shape (2, 3 * units), flat it into - # (6 * units) - bias = tf.split(backend.flatten(bias), 6) - - if tf.sysconfig.get_build_info()['is_cuda_build']: - # Note that the gate order for cuDNN is different from the canonical format. - # canonical format is [z, r, h], whereas cuDNN is [r, z, h]. The swap need - # to be done for kernel, recurrent_kernel, input_bias, recurrent_bias. - # z is update gate weights. - # r is reset gate weights. - # h is output gate weights. - weights[0], weights[1] = weights[1], weights[0] - weights[3], weights[4] = weights[4], weights[3] - bias[0], bias[1] = bias[1], bias[0] - bias[3], bias[4] = bias[4], bias[3] - - params = gru_lstm_utils.canonical_to_params( - weights=weights, - biases=bias, - shape=tf.constant([-1]), - transpose_weights=True) - - if sequence_lengths is not None: - if go_backwards: - # Three reversals are required. E.g., - # normal input = [1, 2, 3, 0, 0] # where 0 need to be masked - # reversed_input_to_cudnn = [3, 2, 1, 0, 0] - # output_from_cudnn = [6, 5, 4, 0, 0] - # expected_output = [0, 0, 6, 5 ,4] - inputs = tf.reverse_sequence( - inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis) - outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3( - input=inputs, - input_h=init_h, - input_c=0, - params=params, - is_training=True, - rnn_mode='gru', - sequence_lengths=sequence_lengths, - time_major=time_major) - if go_backwards: - outputs = tf.reverse_sequence( - outputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis) - outputs = tf.reverse(outputs, axis=[seq_axis]) - else: - if go_backwards: - # Reverse axis 0 since the input is already convert to time major. - inputs = tf.reverse(inputs, axis=[0]) - outputs, h, _, _ = tf.raw_ops.CudnnRNN( - input=inputs, input_h=init_h, input_c=0, params=params, - is_training=True, rnn_mode='gru') - - last_output = outputs[-1] - if not time_major and sequence_lengths is None and return_sequences: - outputs = tf.transpose(outputs, perm=[1, 0, 2]) - h = tf.squeeze(h, axis=seq_axis) - - # In the case of variable length input, the cudnn kernel will fill zeros for - # the output, whereas the default keras behavior is to bring over the previous - # output for t-1, so that in the return_sequence=False case, user can quickly - # get the final effect output instead just 0s at the last timestep. - # In order to mimic the default keras behavior, we copy the final h state as - # the last_output, since it is numerically same as the output. - if sequence_lengths is not None: - last_output = h - - # Match CPU return format - if not return_sequences: - outputs = tf.expand_dims(last_output, axis=0 if time_major else 1) - - return last_output, outputs, h, gru_lstm_utils.runtime( - gru_lstm_utils.RUNTIME_GPU) - - -def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias, - mask, time_major, go_backwards, sequence_lengths, - zero_output_for_mask, return_sequences): - """Call the GRU with optimized backend kernel selection. - - Under the hood, this function will create two TF function, one with the most - generic kernel and can run on all device condition, and the second one with - cuDNN specific kernel, which can only run on GPU. - - The first function will be called with normal_lstm_params, while the second - function is not called, but only registered in the graph. The Grappler will - do the proper graph rewrite and swap the optimized TF function based on the - device placement. - - Args: - inputs: Input tensor of GRU layer. - init_h: Initial state tensor for the cell output. - kernel: Weights for cell kernel. - recurrent_kernel: Weights for cell recurrent kernel. - bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias - is used in this case. - mask: Boolean tensor for mask out the steps within sequence. - An individual `True` entry indicates that the corresponding timestep - should be utilized, while a `False` entry indicates that the corresponding - timestep should be ignored. - time_major: Boolean, whether the inputs are in the format of - [time, batch, feature] or [batch, time, feature]. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - sequence_lengths: The lengths of all sequences coming from a variable length - input, such as ragged tensors. If the input has a fixed timestep size, - this should be None. - zero_output_for_mask: Boolean, whether to output zero for masked timestep. - return_sequences: Boolean. If True, return the recurrent outputs for all - timesteps in the sequence. If False, only return the output for the - last timestep (which consumes less memory). - - Returns: - List of output tensors, same as standard_gru. - """ - params = { - 'inputs': inputs, - 'init_h': init_h, - 'kernel': kernel, - 'recurrent_kernel': recurrent_kernel, - 'bias': bias, - 'mask': mask, - 'time_major': time_major, - 'go_backwards': go_backwards, - 'sequence_lengths': sequence_lengths, - 'zero_output_for_mask': zero_output_for_mask, - 'return_sequences': return_sequences, - } - - def gpu_gru_with_fallback(inputs, init_h, kernel, recurrent_kernel, bias, - mask, time_major, go_backwards, sequence_lengths, - zero_output_for_mask, return_sequences): - """Use cuDNN kernel when mask is none or strictly right padded.""" - if mask is None: - return gpu_gru( - inputs=inputs, - init_h=init_h, - kernel=kernel, - recurrent_kernel=recurrent_kernel, - bias=bias, - mask=mask, - time_major=time_major, - go_backwards=go_backwards, - sequence_lengths=sequence_lengths, - return_sequences=return_sequences) - - def cudnn_gru_fn(): - return gpu_gru( - inputs=inputs, - init_h=init_h, - kernel=kernel, - recurrent_kernel=recurrent_kernel, - bias=bias, - mask=mask, - time_major=time_major, - go_backwards=go_backwards, - sequence_lengths=sequence_lengths, - return_sequences=return_sequences) - - def standard_gru_fn(): - return standard_gru( - inputs=inputs, - init_h=init_h, - kernel=kernel, - recurrent_kernel=recurrent_kernel, - bias=bias, - mask=mask, - time_major=time_major, - go_backwards=go_backwards, - sequence_lengths=sequence_lengths, - zero_output_for_mask=zero_output_for_mask, - return_sequences=return_sequences) - - return tf.cond( - gru_lstm_utils.is_cudnn_supported_inputs(mask, time_major), - true_fn=cudnn_gru_fn, - false_fn=standard_gru_fn) - - if gru_lstm_utils.use_new_gru_lstm_impl(): - # Chooses the implementation dynamically based on the running device. - (last_output, outputs, new_h, - runtime) = tf.__internal__.execute_fn_for_device( - { - gru_lstm_utils.CPU_DEVICE_NAME: - lambda: standard_gru(**params), - gru_lstm_utils.GPU_DEVICE_NAME: - lambda: gpu_gru_with_fallback(**params) - }, lambda: standard_gru(**params)) - else: - # Each time a `tf.function` is called, we will give it a unique - # identifiable API name, so that Grappler won't get confused when it - # sees multiple GRU layers added into same graph, and it will be able - # to pair up the different implementations across them. - api_name = 'gru_' + str(uuid.uuid4()) - supportive_attribute = { - 'time_major': time_major, - 'go_backwards': go_backwards, - } - defun_standard_gru = gru_lstm_utils.generate_defun_backend( - api_name, gru_lstm_utils.CPU_DEVICE_NAME, standard_gru, - supportive_attribute) - defun_gpu_gru = gru_lstm_utils.generate_defun_backend( - api_name, gru_lstm_utils.GPU_DEVICE_NAME, gpu_gru_with_fallback, - supportive_attribute) - - # Call the normal GRU impl and register the cuDNN impl function. The - # grappler will kick in during session execution to optimize the graph. - last_output, outputs, new_h, runtime = defun_standard_gru(**params) - gru_lstm_utils.function_register(defun_gpu_gru, **params) - - return last_output, outputs, new_h, runtime + # Each time a `tf.function` is called, we will give it a unique + # identifiable API name, so that Grappler won't get confused when it + # sees multiple GRU layers added into same graph, and it will be able + # to pair up the different implementations across them. + api_name = "gru_" + str(uuid.uuid4()) + supportive_attribute = { + "time_major": time_major, + "go_backwards": go_backwards, + } + defun_standard_gru = gru_lstm_utils.generate_defun_backend( + api_name, + gru_lstm_utils.CPU_DEVICE_NAME, + standard_gru, + supportive_attribute, + ) + defun_gpu_gru = gru_lstm_utils.generate_defun_backend( + api_name, + gru_lstm_utils.GPU_DEVICE_NAME, + gpu_gru_with_fallback, + supportive_attribute, + ) + + # Call the normal GRU impl and register the cuDNN impl function. The + # grappler will kick in during session execution to optimize the graph. + last_output, outputs, new_h, runtime = defun_standard_gru(**params) + gru_lstm_utils.function_register(defun_gpu_gru, **params) + + return last_output, outputs, new_h, runtime diff --git a/keras/layers/rnn/gru_lstm_test.py b/keras/layers/rnn/gru_lstm_test.py index 33ed001f7de4..0c09541e605c 100644 --- a/keras/layers/rnn/gru_lstm_test.py +++ b/keras/layers/rnn/gru_lstm_test.py @@ -19,131 +19,161 @@ import os +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.rnn import gru from keras.layers.rnn import lstm from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class RNNV2Test(test_combinations.TestCase): - - @parameterized.parameters([lstm.LSTM, gru.GRU]) - def test_device_placement(self, layer): - if not tf.test.is_gpu_available(): - self.skipTest('Need GPU for testing.') - vocab_size = 20 - embedding_dim = 10 - batch_size = 8 - timestep = 12 - units = 5 - x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) - y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) - - # Test when GPU is available but not used, the graph should be properly - # created with CPU ops. - with test_utils.device(should_use_gpu=False): - model = keras.Sequential([ - keras.layers.Embedding(vocab_size, embedding_dim, - batch_input_shape=[batch_size, timestep]), - layer(units, return_sequences=True, stateful=True), - keras.layers.Dense(vocab_size) - ]) - model.compile( - optimizer='adam', - loss='sparse_categorical_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, shuffle=False) - - @parameterized.parameters([lstm.LSTM, gru.GRU]) - def test_reset_dropout_mask_between_batch(self, layer): - # See https://github.com/tensorflow/tensorflow/issues/29187 for more details - batch_size = 8 - timestep = 12 - embedding_dim = 10 - units = 5 - layer = layer(units, dropout=0.5, recurrent_dropout=0.5) - - inputs = np.random.random((batch_size, timestep, embedding_dim)).astype( - np.float32) - previous_dropout, previous_recurrent_dropout = None, None - - for _ in range(5): - layer(inputs, training=True) - dropout = layer.cell.get_dropout_mask_for_cell(inputs, training=True) - recurrent_dropout = layer.cell.get_recurrent_dropout_mask_for_cell( - inputs, training=True) - if previous_dropout is not None: - self.assertNotAllClose(self.evaluate(previous_dropout), - self.evaluate(dropout)) - previous_dropout = dropout - if previous_recurrent_dropout is not None: - self.assertNotAllClose(self.evaluate(previous_recurrent_dropout), - self.evaluate(recurrent_dropout)) - previous_recurrent_dropout = recurrent_dropout - - @parameterized.parameters([lstm.LSTM, gru.GRU]) - def test_recurrent_dropout_with_stateful_RNN(self, layer): - # See https://github.com/tensorflow/tensorflow/issues/27829 for details. - # The issue was caused by using inplace mul for a variable, which was a - # warning for RefVariable, but an error for ResourceVariable in 2.0 - keras.models.Sequential([ - layer(128, stateful=True, return_sequences=True, dropout=0.2, - batch_input_shape=[32, None, 5], recurrent_dropout=0.2) - ]) - - @parameterized.parameters([lstm.LSTM, gru.GRU]) - def test_recurrent_dropout_saved_model(self, layer): - if not tf.executing_eagerly(): - self.skipTest('v2-only test') - inputs = keras.Input(shape=(784, 3), name='digits') - x = layer(64, activation='relu', name='RNN', dropout=0.1)(inputs) - x = keras.layers.Dense(64, activation='relu', name='dense')(x) - outputs = keras.layers.Dense( - 10, activation='softmax', name='predictions')( - x) - model = keras.Model(inputs=inputs, outputs=outputs, name='3_layer') - model.save(os.path.join(self.get_temp_dir(), 'model'), save_format='tf') - - @parameterized.parameters([lstm.LSTM, gru.GRU]) - def test_ragged(self, layer): - vocab_size = 100 - inputs = tf.ragged.constant( - np.random.RandomState(0).randint(0, vocab_size, [128, 25])) - embedder = keras.layers.Embedding(input_dim=vocab_size, output_dim=16) - embedded_inputs = embedder(inputs) - layer = layer(32) - layer(embedded_inputs) - - @parameterized.parameters([lstm.LSTM, gru.GRU]) - @test_utils.run_v2_only - def test_compare_ragged_with_masks(self, layer): - vocab_size = 100 - timestep = 20 - units = 32 - embedder = keras.layers.Embedding(input_dim=vocab_size, output_dim=units) - layer = layer(units, return_sequences=True) - data = tf.constant( - np.random.RandomState(0).randint(0, vocab_size, [timestep, timestep])) - mask = tf.sequence_mask(tf.range(1, timestep + 1)) - data_ragged = tf.ragged.boolean_mask(data, mask) - - outputs = [] - devices = [test_utils.device(should_use_gpu=False)] - if tf.test.is_gpu_available(): - devices.append(test_utils.device(should_use_gpu=True)) - for device in devices: - with device: - outputs.append(tf.boolean_mask(layer(embedder(data), mask=mask), mask)) - outputs.append(layer(embedder(data_ragged)).values) - - for i in range(len(outputs) - 1): - self.assertAllClose(outputs[i], outputs[i + 1], atol=1e-4) - - -if __name__ == '__main__': - tf.test.main() + @parameterized.parameters([lstm.LSTM, gru.GRU]) + def test_device_placement(self, layer): + if not tf.test.is_gpu_available(): + self.skipTest("Need GPU for testing.") + vocab_size = 20 + embedding_dim = 10 + batch_size = 8 + timestep = 12 + units = 5 + x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) + y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) + + # Test when GPU is available but not used, the graph should be properly + # created with CPU ops. + with test_utils.device(should_use_gpu=False): + model = keras.Sequential( + [ + keras.layers.Embedding( + vocab_size, + embedding_dim, + batch_input_shape=[batch_size, timestep], + ), + layer(units, return_sequences=True, stateful=True), + keras.layers.Dense(vocab_size), + ] + ) + model.compile( + optimizer="adam", + loss="sparse_categorical_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, shuffle=False) + + @parameterized.parameters([lstm.LSTM, gru.GRU]) + def test_reset_dropout_mask_between_batch(self, layer): + # See https://github.com/tensorflow/tensorflow/issues/29187 for more + # details + batch_size = 8 + timestep = 12 + embedding_dim = 10 + units = 5 + layer = layer(units, dropout=0.5, recurrent_dropout=0.5) + + inputs = np.random.random((batch_size, timestep, embedding_dim)).astype( + np.float32 + ) + previous_dropout, previous_recurrent_dropout = None, None + + for _ in range(5): + layer(inputs, training=True) + dropout = layer.cell.get_dropout_mask_for_cell( + inputs, training=True + ) + recurrent_dropout = layer.cell.get_recurrent_dropout_mask_for_cell( + inputs, training=True + ) + if previous_dropout is not None: + self.assertNotAllClose( + self.evaluate(previous_dropout), self.evaluate(dropout) + ) + previous_dropout = dropout + if previous_recurrent_dropout is not None: + self.assertNotAllClose( + self.evaluate(previous_recurrent_dropout), + self.evaluate(recurrent_dropout), + ) + previous_recurrent_dropout = recurrent_dropout + + @parameterized.parameters([lstm.LSTM, gru.GRU]) + def test_recurrent_dropout_with_stateful_RNN(self, layer): + # See https://github.com/tensorflow/tensorflow/issues/27829 for details. + # The issue was caused by using inplace mul for a variable, which was a + # warning for RefVariable, but an error for ResourceVariable in 2.0 + keras.models.Sequential( + [ + layer( + 128, + stateful=True, + return_sequences=True, + dropout=0.2, + batch_input_shape=[32, None, 5], + recurrent_dropout=0.2, + ) + ] + ) + + @parameterized.parameters([lstm.LSTM, gru.GRU]) + def test_recurrent_dropout_saved_model(self, layer): + if not tf.executing_eagerly(): + self.skipTest("v2-only test") + inputs = keras.Input(shape=(784, 3), name="digits") + x = layer(64, activation="relu", name="RNN", dropout=0.1)(inputs) + x = keras.layers.Dense(64, activation="relu", name="dense")(x) + outputs = keras.layers.Dense( + 10, activation="softmax", name="predictions" + )(x) + model = keras.Model(inputs=inputs, outputs=outputs, name="3_layer") + model.save(os.path.join(self.get_temp_dir(), "model"), save_format="tf") + + @parameterized.parameters([lstm.LSTM, gru.GRU]) + def test_ragged(self, layer): + vocab_size = 100 + inputs = tf.ragged.constant( + np.random.RandomState(0).randint(0, vocab_size, [128, 25]) + ) + embedder = keras.layers.Embedding(input_dim=vocab_size, output_dim=16) + embedded_inputs = embedder(inputs) + layer = layer(32) + layer(embedded_inputs) + + @parameterized.parameters([lstm.LSTM, gru.GRU]) + @test_utils.run_v2_only + def test_compare_ragged_with_masks(self, layer): + vocab_size = 100 + timestep = 20 + units = 32 + embedder = keras.layers.Embedding( + input_dim=vocab_size, output_dim=units + ) + layer = layer(units, return_sequences=True) + data = tf.constant( + np.random.RandomState(0).randint( + 0, vocab_size, [timestep, timestep] + ) + ) + mask = tf.sequence_mask(tf.range(1, timestep + 1)) + data_ragged = tf.ragged.boolean_mask(data, mask) + + outputs = [] + devices = [test_utils.device(should_use_gpu=False)] + if tf.test.is_gpu_available(): + devices.append(test_utils.device(should_use_gpu=True)) + for device in devices: + with device: + outputs.append( + tf.boolean_mask(layer(embedder(data), mask=mask), mask) + ) + outputs.append(layer(embedder(data_ragged)).values) + + for i in range(len(outputs) - 1): + self.assertAllClose(outputs[i], outputs[i + 1], atol=1e-4) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/gru_lstm_utils.py b/keras/layers/rnn/gru_lstm_utils.py index 1ddde291b219..d0f3208134e7 100644 --- a/keras/layers/rnn/gru_lstm_utils.py +++ b/keras/layers/rnn/gru_lstm_utils.py @@ -19,15 +19,15 @@ import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.eager.context import get_device_name - # The following string constants are used by Defun approach for unified backend # of LSTM and GRU. -_FUNCTION_API_NAME_ATTRIBUTE = 'api_implements' -_FUNCTION_DEVICE_ATTRIBUTE = 'api_preferred_device' -CPU_DEVICE_NAME = 'CPU' -GPU_DEVICE_NAME = 'GPU' +_FUNCTION_API_NAME_ATTRIBUTE = "api_implements" +_FUNCTION_DEVICE_ATTRIBUTE = "api_preferred_device" +CPU_DEVICE_NAME = "CPU" +GPU_DEVICE_NAME = "GPU" # The following number constants are used to represent the runtime of the defun # backend function. Since the CPU/GPU implementation are mathematically same, we @@ -37,211 +37,239 @@ RUNTIME_CPU = 1 RUNTIME_GPU = 2 -CUDNN_AVAILABLE_MSG = 'Layer %s will use cuDNN kernels when running on GPU.' -CUDNN_NOT_AVAILABLE_MSG = ('Layer %s will not use cuDNN kernels since it ' - 'doesn\'t meet the criteria. It will ' - 'use a generic GPU kernel as fallback when running ' - 'on GPU.') +CUDNN_AVAILABLE_MSG = "Layer %s will use cuDNN kernels when running on GPU." +CUDNN_NOT_AVAILABLE_MSG = ( + "Layer %s will not use cuDNN kernels since it " + "doesn't meet the criteria. It will " + "use a generic GPU kernel as fallback when running " + "on GPU." +) def use_new_gru_lstm_impl(): - return False + return False # TODO(b/169707691): The wrapper can be removed if TFLite doesn't need to rely # on supportive attributes from LSTM/GRU. class DefunWrapper: - """A wrapper with no deep copy of the Defun in LSTM/GRU layer.""" - - def __init__(self, time_major, go_backwards, layer_name): - self.time_major = time_major - self.go_backwards = go_backwards - self.layer_name = layer_name - if self.layer_name not in ['lstm', 'gru']: - raise ValueError('Defun wrapper only applies to LSTM and GRU layer, ' - 'but given {}'.format(self.layer_name)) - # The first two attributes are added to support TFLite use case. - supportive_attributes = { - 'time_major': self.time_major, - 'go_backwards': self.go_backwards, - _FUNCTION_API_NAME_ATTRIBUTE: self.layer_name + '_' + str(uuid.uuid4()) - } - if self.layer_name == 'lstm': - from keras.layers.rnn import lstm # pylint: disable=g-import-not-at-top - layer_func = lstm.lstm_with_backend_selection - else: - from keras.layers.rnn import gru # pylint: disable=g-import-not-at-top - layer_func = gru.gru_with_backend_selection - - self.defun_layer = tf.__internal__.function.defun_with_attributes( - layer_func, - attributes=supportive_attributes, - autograph=False) - - def __deepcopy__(self, memo): - new_wrapper = type(self)( - self.time_major, self.go_backwards, self.layer_name) - memo[id(self)] = new_wrapper - return new_wrapper + """A wrapper with no deep copy of the Defun in LSTM/GRU layer.""" + + def __init__(self, time_major, go_backwards, layer_name): + self.time_major = time_major + self.go_backwards = go_backwards + self.layer_name = layer_name + if self.layer_name not in ["lstm", "gru"]: + raise ValueError( + "Defun wrapper only applies to LSTM and GRU layer, " + "but given {}".format(self.layer_name) + ) + # The first two attributes are added to support TFLite use case. + supportive_attributes = { + "time_major": self.time_major, + "go_backwards": self.go_backwards, + _FUNCTION_API_NAME_ATTRIBUTE: self.layer_name + + "_" + + str(uuid.uuid4()), + } + if self.layer_name == "lstm": + from keras.layers.rnn import ( + lstm, + ) + + layer_func = lstm.lstm_with_backend_selection + else: + from keras.layers.rnn import ( + gru, + ) + + layer_func = gru.gru_with_backend_selection + + self.defun_layer = tf.function( + layer_func, + autograph=False, + experimental_attributes=supportive_attributes, + ) + + def __deepcopy__(self, memo): + new_wrapper = type(self)( + self.time_major, self.go_backwards, self.layer_name + ) + memo[id(self)] = new_wrapper + return new_wrapper def canonical_to_params(weights, biases, shape, transpose_weights=False): - """Utility function convert variable to cuDNN compatible parameter. + """Utility function convert variable to cuDNN compatible parameter. + + Note that Keras weights for kernels are different from the cuDNN format. + Eg.: - Note that Keras weights for kernels are different from the cuDNN format. Eg.: + ``` + Keras cuDNN + [[0, 1, 2], <---> [[0, 2, 4], + [3, 4, 5]] [1, 3, 5]] + ``` - ``` - Keras cuDNN - [[0, 1, 2], <---> [[0, 2, 4], - [3, 4, 5]] [1, 3, 5]] - ``` + If the input weights need to be in a unified format, then set + `transpose_weights=True` to convert the weights. - If the input weights need to be in a unified format, then set - `transpose_weights=True` to convert the weights. + Args: + weights: list of weights for the individual kernels and recurrent kernels. + biases: list of biases for individual gate. + shape: the shape for the converted variables that will be feed to cuDNN. + transpose_weights: boolean, whether to transpose the weights. - Args: - weights: list of weights for the individual kernels and recurrent kernels. - biases: list of biases for individual gate. - shape: the shape for the converted variables that will be feed to cuDNN. - transpose_weights: boolean, whether to transpose the weights. + Returns: + The converted weights that can be feed to cuDNN ops as param. + """ - Returns: - The converted weights that can be feed to cuDNN ops as param. - """ - def convert(w): - return tf.transpose(w) if transpose_weights else w + def convert(w): + return tf.transpose(w) if transpose_weights else w - weights = [tf.reshape(convert(x), shape) for x in weights] - biases = [tf.reshape(x, shape) for x in biases] - return tf.concat(weights + biases, axis=0) + weights = [tf.reshape(convert(x), shape) for x in weights] + biases = [tf.reshape(x, shape) for x in biases] + return tf.concat(weights + biases, axis=0) def is_sequence_right_padded(mask): - """Check the mask tensor and see if it right padded. + """Check the mask tensor and see if it right padded. - For cuDNN kernel, it uses the sequence length param to skip the tailing - timestep. If the data is left padded, or not a strict right padding (has - masked value in the middle of the sequence), then cuDNN kernel won't be work - properly in those cases. + For cuDNN kernel, it uses the sequence length param to skip the tailing + timestep. If the data is left padded, or not a strict right padding (has + masked value in the middle of the sequence), then cuDNN kernel won't be work + properly in those cases. - Left padded data: [[False, False, True, True, True]]. - Right padded data: [[True, True, True, False, False]]. - Mixture of mask/unmasked data: [[True, False, True, False, False]]. + Left padded data: [[False, False, True, True, True]]. + Right padded data: [[True, True, True, False, False]]. + Mixture of mask/unmasked data: [[True, False, True, False, False]]. - Note that for the mixed data example above, the actually data RNN should see - are those 2 Trues (index 0 and 2), the index 1 False should be ignored and not - pollute the internal states. + Note that for the mixed data example above, the actually data RNN should see + are those 2 Trues (index 0 and 2), the index 1 False should be ignored and + not pollute the internal states. - Args: - mask: the Boolean tensor with shape [batch, timestep] + Args: + mask: the Boolean tensor with shape [batch, timestep] - Returns: - boolean scalar tensor, whether the mask is strictly right padded. - """ - max_seq_length = tf.shape(mask)[1] - count_of_true = tf.reduce_sum(tf.cast(mask, tf.int32), axis=1) - right_padded_mask = tf.sequence_mask( - count_of_true, maxlen=max_seq_length) - return tf.reduce_all(tf.equal(mask, right_padded_mask)) + Returns: + boolean scalar tensor, whether the mask is strictly right padded. + """ + max_seq_length = tf.shape(mask)[1] + count_of_true = tf.reduce_sum(tf.cast(mask, tf.int32), axis=1) + right_padded_mask = tf.sequence_mask(count_of_true, maxlen=max_seq_length) + return tf.reduce_all(tf.equal(mask, right_padded_mask)) def has_fully_masked_sequence(mask): - # See https://github.com/tensorflow/tensorflow/issues/33148 for more details. - # Cudnn kernel will error out if the input sequence contains any fully masked - # data. We walk around this issue by rerouting the computation to standard - # kernel, until the issue on cudnn side has been fixed. - # For a fully masked sequence, it will contain all Falses. To make it easy to - # check, we inverse the boolean, check if any of the sequence has all True. - return tf.reduce_any( - tf.reduce_all( - tf.logical_not(mask), - axis=1)) - - -def is_cudnn_supported_inputs(mask, time_major): - if time_major: - mask = tf.transpose(mask) - - return tf.logical_and( - is_sequence_right_padded(mask), - tf.logical_not(has_fully_masked_sequence(mask))) + # See https://github.com/tensorflow/tensorflow/issues/33148 for more + # details. Cudnn kernel will error out if the input sequence contains any + # fully masked data. We walk around this issue by rerouting the computation + # to standard kernel, until the issue on cudnn side has been fixed. For a + # fully masked sequence, it will contain all Falses. To make it easy to + # check, we inverse the boolean, check if any of the sequence has all True. + return tf.reduce_any(tf.reduce_all(tf.logical_not(mask), axis=1)) + + +def is_cudnn_supported_inputs(mask, time_major, sequence_lengths): + if tf.sysconfig.get_build_info()["is_rocm_build"]: + if (not time_major) and (sequence_lengths is not None): + return False + if mask is not None: + return tf.reduce_all(mask) + elif sequence_lengths is not None: + return tf.math.equal( + tf.reduce_min(sequence_lengths), tf.reduce_max(sequence_lengths) + ) + else: + return True + if mask is None: + return True + if time_major: + mask = tf.transpose(mask) + + return tf.logical_and( + is_sequence_right_padded(mask), + tf.logical_not(has_fully_masked_sequence(mask)), + ) def calculate_sequence_by_mask(mask, time_major): - """Calculate the sequence length tensor (1-D) based on the masking tensor. - - The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For - any timestep that should be masked, the corresponding field will be False. - Consider the following example: - a = [[True, True, False, False], - [True, True, True, False]] - It is a (2, 4) tensor, and the corresponding sequence length result should be - 1D tensor with value [2, 3]. Note that the masking tensor must be right - padded that could be checked by, e.g., `is_sequence_right_padded()`. - - Args: - mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if - time_major=True. - time_major: Boolean, which indicates whether the mask is time major or batch - major. - Returns: - sequence_length: 1D int32 tensor. - """ - timestep_index = 0 if time_major else 1 - return tf.reduce_sum(tf.cast(mask, tf.int32), axis=timestep_index) - - -def generate_defun_backend(unique_api_name, preferred_device, func, - supportive_attributes): - function_attributes = { - _FUNCTION_API_NAME_ATTRIBUTE: unique_api_name, - _FUNCTION_DEVICE_ATTRIBUTE: preferred_device, - } - function_attributes.update(supportive_attributes) - return tf.__internal__.function.defun_with_attributes( - func=func, attributes=function_attributes, autograph=False) + """Calculate the sequence length tensor (1-D) based on the masking tensor. + + The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For + any timestep that should be masked, the corresponding field will be False. + Consider the following example: + a = [[True, True, False, False], + [True, True, True, False]] + It is a (2, 4) tensor, and the corresponding sequence length result should + be 1D tensor with value [2, 3]. Note that the masking tensor must be right + padded that could be checked by, e.g., `is_sequence_right_padded()`. + + Args: + mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if + time_major=True. + time_major: Boolean, which indicates whether the mask is time major or + batch major. + Returns: + sequence_length: 1D int32 tensor. + """ + timestep_index = 0 if time_major else 1 + return tf.reduce_sum(tf.cast(mask, tf.int32), axis=timestep_index) + + +def generate_defun_backend( + unique_api_name, preferred_device, func, supportive_attributes +): + function_attributes = { + _FUNCTION_API_NAME_ATTRIBUTE: unique_api_name, + _FUNCTION_DEVICE_ATTRIBUTE: preferred_device, + } + function_attributes.update(supportive_attributes) + return tf.function( + func, autograph=False, experimental_attributes=function_attributes + ) def get_context_device_type(): - """Parse the current context and return the device type, eg CPU/GPU.""" - current_device = get_device_name() - if current_device is None: - return None - return tf.compat.v1.DeviceSpec.from_string(current_device).device_type + """Parse the current context and return the device type, eg CPU/GPU.""" + current_device = get_device_name() + if current_device is None: + return None + return tf.compat.v1.DeviceSpec.from_string(current_device).device_type def runtime(runtime_name): - with tf.device('/cpu:0'): - return tf.constant( - runtime_name, dtype=tf.float32, name='runtime') + with tf.device("/cpu:0"): + return tf.constant(runtime_name, dtype=tf.float32, name="runtime") def read_variable_value(v): - """Read the value of a variable if it is variable.""" - if isinstance(v, tf.Variable): - return v.read_value() - return v + """Read the value of a variable if it is variable.""" + if isinstance(v, tf.Variable): + return v.read_value() + return v def function_register(func, *args, **kwargs): - """Register a specialization of a `Function` into the graph. - - This won't actually call the function with the inputs, and only put the - function definition into graph. Register function with different input param - will result into multiple version of functions registered in graph. - - Args: - func: the `Function` instance that generated by a @defun - *args: input arguments for the Python function. - **kwargs: input keyword arguments for the Python function. - - Returns: - a `ConcreteFunction` object specialized to inputs and execution context. - - Raises: - ValueError: When the input function is not a defun wrapped python function. - """ - concrete_func = func.get_concrete_function(*args, **kwargs) - concrete_func.add_to_graph() - concrete_func.add_gradient_functions_to_graph() - return concrete_func + """Register a specialization of a `Function` into the graph. + + This won't actually call the function with the inputs, and only put the + function definition into graph. Register function with different input param + will result into multiple version of functions registered in graph. + + Args: + func: the `Function` instance that generated by a @defun + *args: input arguments for the Python function. + **kwargs: input keyword arguments for the Python function. + + Returns: + a `ConcreteFunction` object specialized to inputs and execution context. + + Raises: + ValueError: When the input function is not a defun wrapped python + function. + """ + concrete_func = func.get_concrete_function(*args, **kwargs) + concrete_func.add_to_graph() + concrete_func.add_gradient_functions_to_graph() + return concrete_func diff --git a/keras/layers/rnn/gru_test.py b/keras/layers/rnn/gru_test.py index 22ab1c98c1cb..241ad2c3181f 100644 --- a/keras/layers/rnn/gru_test.py +++ b/keras/layers/rnn/gru_test.py @@ -19,18 +19,21 @@ import os import shutil +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.rnn import gru_lstm_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.core.protobuf import rewriter_config_pb2 -from tensorflow.python.framework import test_util as tf_test_util - +from tensorflow.python.framework import ( + test_util as tf_test_util, +) # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() @@ -40,903 +43,1015 @@ _config = tf.compat.v1.ConfigProto(graph_options=_graph_options) -@test_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU') +@test_utils.run_all_without_tensor_float_32("RNN GRU can use TF32 on GPU") @test_combinations.run_all_keras_modes(config=_config) class GRUGraphRewriteTest(test_combinations.TestCase): - input_shape = 10 - output_shape = 8 - rnn_state_size = 8 - timestep = 4 - batch = 100 - epoch = 1 - - @parameterized.named_parameters( - ('non_tan_activation', 'relu', 'sigmoid', 0, False, True, True), - ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True, True), - ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True, True), - ('unroll', 'tanh', 'sigmoid', 0, True, True, True), - ('not_use_bias', 'tanh', 'sigmoid', 0, False, False, True), - ('not_reset_after', 'tanh', 'sigmoid', 0, False, True, False) - ) - @test_utils.run_v2_only - def test_could_use_defun_backend(self, activation, recurrent_activation, - recurrent_dropout, unroll, use_bias, - reset_after): - layer = keras.layers.GRU( - 1, - activation=activation, - recurrent_activation=recurrent_activation, - recurrent_dropout=recurrent_dropout, - unroll=unroll, - use_bias=use_bias, - reset_after=reset_after) - self.assertFalse(layer._could_use_gpu_kernel) - - @test_utils.run_v2_only - def test_use_on_default_activation_with_gpu_kernel(self): - layer = keras.layers.GRU(1, activation=tf.tanh) - self.assertTrue(layer._could_use_gpu_kernel) - - layer = keras.layers.GRU(1, recurrent_activation=tf.sigmoid) - self.assertTrue(layer._could_use_gpu_kernel) - - def test_keras_model_with_gru(self): - epoch = 10 - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=self.batch, - test_samples=0, - input_shape=(self.timestep, self.input_shape), - num_classes=self.output_shape) - y_train = np_utils.to_categorical(y_train, self.output_shape) - - layer = keras.layers.GRU(self.rnn_state_size) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - - outputs = layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile('rmsprop', loss='mse') - model.fit(x_train, y_train, epochs=epoch) - model.evaluate(x_train, y_train) - model.predict(x_train) - - def test_dynamic_behavior_GRU(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer = keras.layers.GRU(units, input_shape=(None, embedding_dim)) - model = keras.models.Sequential() - model.add(layer) - model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.001), 'mse') - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - def test_stacking_GRU(self): - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.GRU(10, return_sequences=True, unroll=False)) - model.add(keras.layers.GRU(5, return_sequences=True, unroll=False)) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - def test_from_config_GRU(self): - layer_class = keras.layers.GRU - for stateful in (False, True): - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - @parameterized.named_parameters( - # test_name, use_bias, bias_initializer, activation - ('normal', True, 'zeros'), - ('no_bias', False, 'zeros'), - ('random_bias', True, 'random_uniform'), - ) - def test_gru_v2_model_save_load(self, use_bias, bias_initializer): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - h5_path = os.path.join(temp_dir, 'test.h5') - - batch = 10 - timestep = 3 - input_dim = 5 - units = 2 - - x = np.random.random((batch, timestep, input_dim)) - - def build_model(): - inputs = keras.layers.Input( - shape=[timestep, input_dim], dtype=tf.float32) - layer = keras.layers.GRU( - units, - use_bias=use_bias, - bias_initializer=bias_initializer) - output = layer(inputs) - return keras.models.Model(inputs, output), layer - - model, layer = build_model() - y_ref = model.predict(x) - model.save_weights(h5_path) - - cloned_model, new_layer = build_model() - cloned_model.load_weights(h5_path) - y = cloned_model.predict(x) - - self.assertAllClose(y, y_ref) - self.assertAllClose(layer.get_weights(), new_layer.get_weights()) - - def test_gru_v2_output_on_multiple_kernel(self): - x_train = np.random.random((self.batch, self.timestep, self.input_shape)) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - with test_utils.device(should_use_gpu=False): - layer = keras.layers.GRU(self.rnn_state_size) - output = layer(inputs) - cpu_model = keras.models.Model(inputs, output) - weights = cpu_model.get_weights() - y_1 = cpu_model.predict(x_train) - - with test_utils.device(should_use_gpu=True): - layer = keras.layers.GRU(self.rnn_state_size) - output = layer(inputs) - gpu_model = keras.models.Model(inputs, output) - gpu_model.set_weights(weights) - y_2 = gpu_model.predict(x_train) - - self.assertAllClose(y_1, y_2, rtol=1e-5, atol=1e-5) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_with_masking_layer_GRU(self): - layer_class = keras.layers.GRU - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(layer_class(units=5, return_sequences=True, unroll=False)) - model.compile(loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.001)) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_masking_with_stacking_GRU(self): - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(keras.layers.GRU(10, return_sequences=True, unroll=False)) - model.add(keras.layers.GRU(5, return_sequences=True, unroll=False)) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - def test_return_sequences_GRU(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'return_sequences': True}, - input_shape=(num_samples, timesteps, embedding_dim)) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Double type is not yet supported in ROCm') - @test_utils.run_v2_only - def test_float64_GRU(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'return_sequences': True, - 'dtype': 'float64'}, - input_shape=(num_samples, timesteps, embedding_dim), - input_dtype='float64') - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_return_states_GRU(self): - layer_class = keras.layers.GRU - x = np.random.random((2, 3, 4)) - y = np.abs(np.random.random((2, 5))) - s = np.abs(np.random.random((2, 5))) - inputs = keras.layers.Input( - shape=[3, 4], dtype=tf.float32) - masked = keras.layers.Masking()(inputs) - outputs, states = layer_class(units=5, return_state=True)(masked) - - model = keras.models.Model(inputs, [outputs, states]) - model.compile(loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.001)) - model.fit(x, [y, s], epochs=1, batch_size=2, verbose=1) - - def test_dropout_GRU(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_constraints_GRU(self): - embedding_dim = 4 - layer_class = keras.layers.GRU - k_constraint = keras.constraints.max_norm(0.01) - r_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_constraint=k_constraint, - recurrent_constraint=r_constraint, - bias_constraint=b_constraint) - layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) - - @parameterized.parameters([0, 1, 2]) - def test_implementation_mode_GRU(self, implementation_mode): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'implementation': implementation_mode}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_regularizers_GRU(self): - embedding_dim = 4 - layer_class = keras.layers.GRU - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2', - activity_regularizer='l1') - layer.build((None, None, 2)) - self.assertEqual(len(layer.losses), 3) - - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - if tf.executing_eagerly(): - self.assertEqual(len(layer.losses), 4) - else: - self.assertEqual(len(layer.get_losses_for(x)), 1) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_statefulness_GRU(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer_class = keras.layers.GRU - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - 4, - embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class( - units, return_sequences=False, stateful=True, weights=None) - model.add(layer) - model.compile( - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - out1 = model.predict(np.ones((num_samples, timesteps))) - self.assertEqual(out1.shape, (num_samples, units)) - - # train once so that the states change - model.train_on_batch( - np.ones((num_samples, timesteps)), np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out2.max(), out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - np.testing.assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out4.max(), out5.max()) - - # Check masking - layer.reset_states() - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - layer.reset_states() - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - layer.reset_states() - - mix_padded_input = np.ones((num_samples, timesteps)) - mix_padded_input[0, 1] = 0 - mix_padded_input[1, 0] = 0 - mix_padded_input[1, 2] = 0 - out8 = model.predict(mix_padded_input) - - self.assertAllClose(out7, out6, atol=1e-5) - self.assertAllClose(out8, out7, atol=1e-5) - - def test_stateful_GRU_training(self): - # See b/123587692 for more context. - vocab_size = 20 - embedding_dim = 10 - batch_size = 8 - timestep = 12 - units = 5 - x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) - y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) - - model = keras.Sequential([ - keras.layers.Embedding(vocab_size, embedding_dim, - batch_input_shape=[batch_size, timestep]), - keras.layers.GRU(units, return_sequences=True, stateful=True), - keras.layers.Dense(vocab_size) - ]) - model.compile( - optimizer='adam', - loss='sparse_categorical_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, shuffle=False) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_explicit_device_with_go_backward_and_mask(self): - batch_size = 8 - timestep = 7 - masksteps = 5 - units = 4 - - inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) - mask = np.ones((batch_size, timestep)).astype(np.bool) - mask[:, masksteps:] = 0 - - gru_layer = keras.layers.GRU( - units, return_sequences=True, go_backwards=True) - with test_utils.device(should_use_gpu=True): - outputs_masked = gru_layer(inputs, mask=tf.constant(mask)) - outputs_trimmed = gru_layer(inputs[:, :masksteps]) - self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) - - @tf_test_util.enable_output_all_intermediates - def test_v1_session_behavior(self): - with tf.compat.v1.get_default_graph().as_default(): - # See b/139132348 for more details. - x = np.random.uniform(size=(100, 4, 8)) - y = np.random.uniform(size=(100, 1)) - dataset = tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(100).batch(32) - - inp = keras.layers.Input(shape=(4, 8)) - layer = keras.layers.GRU(1)(inp) - layer = keras.layers.Dense(1)(layer) - - model = keras.models.Model(inp, layer) - - model.compile(loss='mse', optimizer='sgd') - model.fit(dataset) - - def test_with_fully_masked_inputs(self): - num_samples = 8 - timestep = 5 - embedding_dim = 4 - vocab_size = 20 - units = 2 - - inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep)) - # Set the first inputs to be fully zero. - inputs[0, :] = 0.0 - - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - vocab_size, - embedding_dim, - mask_zero=True, - input_length=timestep, - batch_input_shape=(num_samples, timestep))) - layer = keras.layers.GRU(units) - model.add(layer) - model.compile( - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - # Make sure it doesn't crash with cudnn kernel. - model.predict(inputs) - - # TODO (b/169895267): test with xla_gpu is disabled. - def test_deepcopy(self): - if not tf.executing_eagerly(): - self.skipTest('v2-only test') - original_layer = keras.layers.GRU(5) - copied_layer = copy.deepcopy(original_layer) - self.assertEqual(copied_layer.units, 5) - self.assertEqual(original_layer.get_config(), original_layer.get_config()) - - # Copy layer before layer call on inputs without weight initialization. - inputs = np.random.normal(size=[32, 10, 8]).astype(np.float32) - original_layer = keras.layers.GRU(4) - copied_layer = copy.deepcopy(original_layer) - outputs = original_layer(inputs) - copied_outputs = copied_layer(inputs) - self.assertNotAllClose( - self.evaluate(outputs), self.evaluate(copied_outputs)) - - # Copy layer after layer call on inputs with weight initialization. - original_layer = keras.layers.GRU(4) - outputs = original_layer(inputs) - copied_layer = copy.deepcopy(original_layer) - copied_outputs = copied_layer(inputs) - self.assertAllClose(self.evaluate(outputs), self.evaluate(copied_outputs)) - - def _test_runtime_with_model(self, model): - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=self.batch, - test_samples=0, - input_shape=(self.timestep, self.input_shape), - num_classes=self.output_shape) - y_train = np_utils.to_categorical(y_train, self.output_shape) - - model.compile( - optimizer='sgd', - loss=['categorical_crossentropy', None]) - - existing_loss = 0 - for _ in range(self.epoch): - history = model.fit(x_train, y_train) - loss_value = history.history['loss'][0] - - self.assertNotEqual(existing_loss, loss_value) - existing_loss = loss_value - - _, runtime_value = model.predict(x_train) - if tf.test.is_gpu_available(): - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) - else: - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - @test_utils.run_v2_only - def test_GRU_runtime(self): - layer = keras.layers.GRU(self.rnn_state_size, return_runtime=True) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - - outputs, runtime = layer(inputs) - # Expand the runtime so that it is a 1D tensor instead of scalar. - # TF model does not work with scalar model output, specially during - # aggregation. - runtime = keras.layers.Lambda( - lambda x: tf.expand_dims(x, axis=-1))(runtime) - model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) - self._test_runtime_with_model(model) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_GRU_runtime_with_mask(self): - # Masking will affect which backend is selected based on whether the mask - # is strictly right padded. - layer = keras.layers.GRU(self.rnn_state_size, return_runtime=True) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - masked_inputs = keras.layers.Masking()(inputs) - - outputs, runtime = layer(masked_inputs) - # Expand the runtime so that it is a 1D tensor instead of scalar. - # TF model does not work with scalar model output, specially during - # aggregation. - runtime = keras.layers.Lambda( - lambda x: tf.expand_dims(x, axis=-1))(runtime) - model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=self.batch, - test_samples=0, - input_shape=(self.timestep, self.input_shape), - num_classes=self.output_shape) - y_train = np_utils.to_categorical(y_train, self.output_shape) - - model.compile( - optimizer='sgd', - loss=['categorical_crossentropy', None], - run_eagerly=test_utils.should_run_eagerly()) - - model.fit(x_train, y_train) - - # Verify unpadded data. - _, runtime_value = model.predict(x_train) - if tf.test.is_gpu_available(): - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) - else: - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - # Update x/y to be right padded by setting the last timestep to 0 - x_train[:, -1, :] = 0 - y_train[:, -1] = 0 - _, runtime_value = model.predict(x_train) - if tf.test.is_gpu_available(): - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) - else: - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - # Further update x/y to be mix padded (masks in the middle), and verify - # only cpu kernel can be selected. - x_train[:, -3, :] = 0 - y_train[:, -3] = 0 - _, runtime_value = model.predict(x_train) - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - @test_utils.run_v2_only - def test_GRU_runtime_with_cond(self): - # This test is to demonstrate the graph rewrite of grappler plugin under - # the condition that the function returns different number of internal - # states. - layer = keras.layers.GRU(self.rnn_state_size, return_runtime=True) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - - zeros = tf.zeros([self.batch, self.output_shape]) - dummy_runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) - a = tf.constant(0) - b = tf.constant(1) - # Will always run the GRU layer. - outputs, runtime = tf.cond( - tf.less(a, b), - lambda: layer(inputs), - lambda: (zeros, dummy_runtime)) - - # Expand the runtime so that it is a 1D tensor instead of scalar. - # TF model does not work with scalar model output, specially during - # aggregation. - runtime = keras.layers.Lambda( - lambda x: tf.expand_dims(x, axis=-1))(runtime) - model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) - self._test_runtime_with_model(model) - - -@test_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU') + input_shape = 10 + output_shape = 8 + rnn_state_size = 8 + timestep = 4 + batch = 100 + epoch = 1 + + @parameterized.named_parameters( + ("non_tan_activation", "relu", "sigmoid", 0, False, True, True), + ("non_sigmoid_recur_activation", "tanh", "relu", 0, False, True, True), + ("use_recurrent_dropout", "tanh", "sigmoid", 0.1, False, True, True), + ("unroll", "tanh", "sigmoid", 0, True, True, True), + ("not_use_bias", "tanh", "sigmoid", 0, False, False, True), + ("not_reset_after", "tanh", "sigmoid", 0, False, True, False), + ) + @test_utils.run_v2_only + def test_could_use_defun_backend( + self, + activation, + recurrent_activation, + recurrent_dropout, + unroll, + use_bias, + reset_after, + ): + layer = keras.layers.GRU( + 1, + activation=activation, + recurrent_activation=recurrent_activation, + recurrent_dropout=recurrent_dropout, + unroll=unroll, + use_bias=use_bias, + reset_after=reset_after, + ) + self.assertFalse(layer._could_use_gpu_kernel) + + @test_utils.run_v2_only + def test_use_on_default_activation_with_gpu_kernel(self): + layer = keras.layers.GRU(1, activation=tf.tanh) + self.assertTrue(layer._could_use_gpu_kernel) + + layer = keras.layers.GRU(1, recurrent_activation=tf.sigmoid) + self.assertTrue(layer._could_use_gpu_kernel) + + def test_keras_model_with_gru(self): + epoch = 10 + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=self.batch, + test_samples=0, + input_shape=(self.timestep, self.input_shape), + num_classes=self.output_shape, + ) + y_train = np_utils.to_categorical(y_train, self.output_shape) + + layer = keras.layers.GRU(self.rnn_state_size) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + + outputs = layer(inputs) + model = keras.models.Model(inputs, outputs) + model.compile("rmsprop", loss="mse") + model.fit(x_train, y_train, epochs=epoch) + model.evaluate(x_train, y_train) + model.predict(x_train) + + def test_dynamic_behavior_GRU(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer = keras.layers.GRU(units, input_shape=(None, embedding_dim)) + model = keras.models.Sequential() + model.add(layer) + model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.001), "mse") + x = np.random.random((num_samples, timesteps, embedding_dim)) + y = np.random.random((num_samples, units)) + model.train_on_batch(x, y) + + def test_stacking_GRU(self): + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.GRU(10, return_sequences=True, unroll=False)) + model.add(keras.layers.GRU(5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + def test_from_config_GRU(self): + layer_class = keras.layers.GRU + for stateful in (False, True): + l1 = layer_class(units=1, stateful=stateful) + l2 = layer_class.from_config(l1.get_config()) + assert l1.get_config() == l2.get_config() + + @parameterized.named_parameters( + # test_name, use_bias, bias_initializer, activation + ("normal", True, "zeros"), + ("no_bias", False, "zeros"), + ("random_bias", True, "random_uniform"), + ) + def test_gru_v2_model_save_load(self, use_bias, bias_initializer): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + h5_path = os.path.join(temp_dir, "test.h5") + + batch = 10 + timestep = 3 + input_dim = 5 + units = 2 + + x = np.random.random((batch, timestep, input_dim)) + + def build_model(): + inputs = keras.layers.Input( + shape=[timestep, input_dim], dtype=tf.float32 + ) + layer = keras.layers.GRU( + units, use_bias=use_bias, bias_initializer=bias_initializer + ) + output = layer(inputs) + return keras.models.Model(inputs, output), layer + + model, layer = build_model() + y_ref = model.predict(x) + model.save_weights(h5_path) + + cloned_model, new_layer = build_model() + cloned_model.load_weights(h5_path) + y = cloned_model.predict(x) + + self.assertAllClose(y, y_ref) + self.assertAllClose(layer.get_weights(), new_layer.get_weights()) + + def test_gru_v2_output_on_multiple_kernel(self): + x_train = np.random.random( + (self.batch, self.timestep, self.input_shape) + ) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + with test_utils.device(should_use_gpu=False): + layer = keras.layers.GRU(self.rnn_state_size) + output = layer(inputs) + cpu_model = keras.models.Model(inputs, output) + weights = cpu_model.get_weights() + y_1 = cpu_model.predict(x_train) + + with test_utils.device(should_use_gpu=True): + layer = keras.layers.GRU(self.rnn_state_size) + output = layer(inputs) + gpu_model = keras.models.Model(inputs, output) + gpu_model.set_weights(weights) + y_2 = gpu_model.predict(x_train) + + self.assertAllClose(y_1, y_2, rtol=1e-5, atol=1e-5) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_with_masking_layer_GRU(self): + layer_class = keras.layers.GRU + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(layer_class(units=5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.001), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_masking_with_stacking_GRU(self): + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(keras.layers.GRU(10, return_sequences=True, unroll=False)) + model.add(keras.layers.GRU(5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + def test_return_sequences_GRU(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={"units": units, "return_sequences": True}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Double type is not yet supported in ROCm", + ) + @test_utils.run_v2_only + def test_float64_GRU(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={ + "units": units, + "return_sequences": True, + "dtype": "float64", + }, + input_shape=(num_samples, timesteps, embedding_dim), + input_dtype="float64", + ) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_return_states_GRU(self): + layer_class = keras.layers.GRU + x = np.random.random((2, 3, 4)) + y = np.abs(np.random.random((2, 5))) + s = np.abs(np.random.random((2, 5))) + inputs = keras.layers.Input(shape=[3, 4], dtype=tf.float32) + masked = keras.layers.Masking()(inputs) + outputs, states = layer_class(units=5, return_state=True)(masked) + + model = keras.models.Model(inputs, [outputs, states]) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.001), + ) + model.fit(x, [y, s], epochs=1, batch_size=2, verbose=1) + + def test_dropout_GRU(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={"units": units, "dropout": 0.1, "recurrent_dropout": 0.1}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_constraints_GRU(self): + embedding_dim = 4 + layer_class = keras.layers.GRU + k_constraint = keras.constraints.max_norm(0.01) + r_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_constraint=k_constraint, + recurrent_constraint=r_constraint, + bias_constraint=b_constraint, + ) + layer.build((None, None, embedding_dim)) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) + + @parameterized.parameters([0, 1, 2]) + def test_implementation_mode_GRU(self, implementation_mode): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={"units": units, "implementation": implementation_mode}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_regularizers_GRU(self): + embedding_dim = 4 + layer_class = keras.layers.GRU + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_regularizer=keras.regularizers.l1(0.01), + recurrent_regularizer=keras.regularizers.l1(0.01), + bias_regularizer="l2", + activity_regularizer="l1", + ) + layer.build((None, None, 2)) + self.assertEqual(len(layer.losses), 3) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + if tf.executing_eagerly(): + self.assertEqual(len(layer.losses), 4) + else: + self.assertEqual(len(layer.get_losses_for(x)), 1) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_statefulness_GRU(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer_class = keras.layers.GRU + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + 4, + embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps), + ) + ) + layer = layer_class( + units, return_sequences=False, stateful=True, weights=None + ) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + out1 = model.predict(np.ones((num_samples, timesteps))) + self.assertEqual(out1.shape, (num_samples, units)) + + # train once so that the states change + model.train_on_batch( + np.ones((num_samples, timesteps)), np.ones((num_samples, units)) + ) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out2.max(), out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + np.testing.assert_allclose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out4.max(), out5.max()) + + # Check masking + layer.reset_states() + + left_padded_input = np.ones((num_samples, timesteps)) + left_padded_input[0, :1] = 0 + left_padded_input[1, :2] = 0 + out6 = model.predict(left_padded_input) + + layer.reset_states() + + right_padded_input = np.ones((num_samples, timesteps)) + right_padded_input[0, -1:] = 0 + right_padded_input[1, -2:] = 0 + out7 = model.predict(right_padded_input) + + layer.reset_states() + + mix_padded_input = np.ones((num_samples, timesteps)) + mix_padded_input[0, 1] = 0 + mix_padded_input[1, 0] = 0 + mix_padded_input[1, 2] = 0 + out8 = model.predict(mix_padded_input) + + self.assertAllClose(out7, out6, atol=1e-5) + self.assertAllClose(out8, out7, atol=1e-5) + + def test_stateful_GRU_training(self): + # See b/123587692 for more context. + vocab_size = 20 + embedding_dim = 10 + batch_size = 8 + timestep = 12 + units = 5 + x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) + y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) + + model = keras.Sequential( + [ + keras.layers.Embedding( + vocab_size, + embedding_dim, + batch_input_shape=[batch_size, timestep], + ), + keras.layers.GRU(units, return_sequences=True, stateful=True), + keras.layers.Dense(vocab_size), + ] + ) + model.compile( + optimizer="adam", + loss="sparse_categorical_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, shuffle=False) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_explicit_device_with_go_backward_and_mask(self): + batch_size = 8 + timestep = 7 + masksteps = 5 + units = 4 + + inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) + mask = np.ones((batch_size, timestep)).astype(bool) + mask[:, masksteps:] = 0 + + gru_layer = keras.layers.GRU( + units, return_sequences=True, go_backwards=True + ) + with test_utils.device(should_use_gpu=True): + outputs_masked = gru_layer(inputs, mask=tf.constant(mask)) + outputs_trimmed = gru_layer(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) + + @tf_test_util.enable_output_all_intermediates + def test_v1_session_behavior(self): + with tf.compat.v1.get_default_graph().as_default(): + # See b/139132348 for more details. + x = np.random.uniform(size=(100, 4, 8)) + y = np.random.uniform(size=(100, 1)) + dataset = ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(100) + .batch(32) + ) + + inp = keras.layers.Input(shape=(4, 8)) + layer = keras.layers.GRU(1)(inp) + layer = keras.layers.Dense(1)(layer) + + model = keras.models.Model(inp, layer) + + model.compile(loss="mse", optimizer="sgd") + model.fit(dataset) + + def test_with_fully_masked_inputs(self): + num_samples = 8 + timestep = 5 + embedding_dim = 4 + vocab_size = 20 + units = 2 + + inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep)) + # Set the first inputs to be fully zero. + inputs[0, :] = 0.0 + + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + vocab_size, + embedding_dim, + mask_zero=True, + input_length=timestep, + batch_input_shape=(num_samples, timestep), + ) + ) + layer = keras.layers.GRU(units) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + # Make sure it doesn't crash with cudnn kernel. + model.predict(inputs) + + # TODO (b/169895267): test with xla_gpu is disabled. + def test_deepcopy(self): + if not tf.executing_eagerly(): + self.skipTest("v2-only test") + original_layer = keras.layers.GRU(5) + copied_layer = copy.deepcopy(original_layer) + self.assertEqual(copied_layer.units, 5) + self.assertEqual( + original_layer.get_config(), original_layer.get_config() + ) + + # Copy layer before layer call on inputs without weight initialization. + inputs = np.random.normal(size=[32, 10, 8]).astype(np.float32) + original_layer = keras.layers.GRU(4) + copied_layer = copy.deepcopy(original_layer) + outputs = original_layer(inputs) + copied_outputs = copied_layer(inputs) + self.assertNotAllClose( + self.evaluate(outputs), self.evaluate(copied_outputs) + ) + + # Copy layer after layer call on inputs with weight initialization. + original_layer = keras.layers.GRU(4) + outputs = original_layer(inputs) + copied_layer = copy.deepcopy(original_layer) + copied_outputs = copied_layer(inputs) + self.assertAllClose( + self.evaluate(outputs), self.evaluate(copied_outputs) + ) + + def _test_runtime_with_model(self, model): + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=self.batch, + test_samples=0, + input_shape=(self.timestep, self.input_shape), + num_classes=self.output_shape, + ) + y_train = np_utils.to_categorical(y_train, self.output_shape) + + model.compile(optimizer="sgd", loss=["categorical_crossentropy", None]) + + existing_loss = 0 + for _ in range(self.epoch): + history = model.fit(x_train, y_train) + loss_value = history.history["loss"][0] + + self.assertNotEqual(existing_loss, loss_value) + existing_loss = loss_value + + _, runtime_value = model.predict(x_train) + if not tf.sysconfig.get_build_info()["is_rocm_build"]: + if tf.test.is_gpu_available(): + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) + else: + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + @test_utils.run_v2_only + def test_GRU_runtime(self): + layer = keras.layers.GRU(self.rnn_state_size, return_runtime=True) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + + outputs, runtime = layer(inputs) + # Expand the runtime so that it is a 1D tensor instead of scalar. + # TF model does not work with scalar model output, specially during + # aggregation. + runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))( + runtime + ) + model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) + self._test_runtime_with_model(model) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_GRU_runtime_with_mask(self): + # Masking will affect which backend is selected based on whether the + # mask is strictly right padded. + layer = keras.layers.GRU(self.rnn_state_size, return_runtime=True) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + masked_inputs = keras.layers.Masking()(inputs) + + outputs, runtime = layer(masked_inputs) + # Expand the runtime so that it is a 1D tensor instead of scalar. + # TF model does not work with scalar model output, specially during + # aggregation. + runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))( + runtime + ) + model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=self.batch, + test_samples=0, + input_shape=(self.timestep, self.input_shape), + num_classes=self.output_shape, + ) + y_train = np_utils.to_categorical(y_train, self.output_shape) + + model.compile( + optimizer="sgd", + loss=["categorical_crossentropy", None], + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit(x_train, y_train) + + # Verify unpadded data. + _, runtime_value = model.predict(x_train) + if tf.test.is_gpu_available(): + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) + else: + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + # Update x/y to be right padded by setting the last timestep to 0 + x_train[:, -1, :] = 0 + y_train[:, -1] = 0 + _, runtime_value = model.predict(x_train) + if tf.test.is_gpu_available(): + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) + else: + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + # Further update x/y to be mix padded (masks in the middle), and verify + # only cpu kernel can be selected. + x_train[:, -3, :] = 0 + y_train[:, -3] = 0 + _, runtime_value = model.predict(x_train) + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + @test_utils.run_v2_only + def test_GRU_runtime_with_cond(self): + # This test is to demonstrate the graph rewrite of grappler plugin under + # the condition that the function returns different number of internal + # states. + layer = keras.layers.GRU(self.rnn_state_size, return_runtime=True) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + + zeros = tf.zeros([self.batch, self.output_shape]) + dummy_runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) + a = tf.constant(0) + b = tf.constant(1) + # Will always run the GRU layer. + outputs, runtime = tf.cond( + tf.less(a, b), lambda: layer(inputs), lambda: (zeros, dummy_runtime) + ) + + # Expand the runtime so that it is a 1D tensor instead of scalar. + # TF model does not work with scalar model output, specially during + # aggregation. + runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))( + runtime + ) + model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) + self._test_runtime_with_model(model) + + +@test_utils.run_all_without_tensor_float_32("RNN GRU can use TF32 on GPU") class GRULayerGradientTapeTest(test_combinations.TestCase): + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_in_tape(self): + with self.test_session(config=_config): + time_steps = 10 + embedding_size = 11 + gru_unit_size = 12 - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_in_tape(self): - with self.test_session(config=_config): - time_steps = 10 - embedding_size = 11 - gru_unit_size = 12 - - gru_layer = keras.layers.GRU( - gru_unit_size, - return_sequences=True, - return_state=True, - recurrent_activation='sigmoid', - recurrent_initializer='glorot_uniform') + gru_layer = keras.layers.GRU( + gru_unit_size, + return_sequences=True, + return_state=True, + recurrent_activation="sigmoid", + recurrent_initializer="glorot_uniform", + ) - x = tf.random.uniform([1, time_steps, embedding_size]) - y = tf.random.uniform([1, gru_unit_size]) + x = tf.random.uniform([1, time_steps, embedding_size]) + y = tf.random.uniform([1, gru_unit_size]) - with tf.GradientTape() as tape: - hidden_state = tf.zeros([1, gru_unit_size], dtype=tf.float32) - _, state = gru_layer(x, initial_state=hidden_state) + with tf.GradientTape() as tape: + hidden_state = tf.zeros([1, gru_unit_size], dtype=tf.float32) + _, state = gru_layer(x, initial_state=hidden_state) - loss = tf.reduce_mean(tf.square(state - y)) + loss = tf.reduce_mean(tf.square(state - y)) - tape.gradient(loss, gru_layer.variables) + tape.gradient(loss, gru_layer.variables) @test_combinations.run_all_keras_modes class GRULayerTest(test_combinations.TestCase): - - def test_return_sequences_gru(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'return_sequences': True}, - input_shape=(num_samples, timesteps, embedding_dim)) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Double type is not yet supported in ROCm') - @test_utils.run_v2_only - def test_float64_gru(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'return_sequences': True, - 'dtype': 'float64'}, - input_shape=(num_samples, timesteps, embedding_dim), - input_dtype='float64') - - def test_dynamic_behavior_gru(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer = keras.layers.GRU(units, input_shape=(None, embedding_dim)) - model = keras.models.Sequential() - model.add(layer) - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - def test_dropout_gru(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_recurrent_dropout_with_implementation_restriction(self): - layer = keras.layers.GRU(2, recurrent_dropout=0.1, implementation=2) - # The implementation is force to 1 due to the limit of recurrent_dropout. - self.assertEqual(layer.implementation, 1) - - @parameterized.parameters([0, 1, 2]) - def test_implementation_mode_gru(self, implementation_mode): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.GRU, - kwargs={'units': units, - 'implementation': implementation_mode}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_reset_after_gru(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=num_samples, - test_samples=0, - input_shape=(timesteps, embedding_dim), - num_classes=units) - y_train = np_utils.to_categorical(y_train, units) - - inputs = keras.layers.Input(shape=[timesteps, embedding_dim]) - gru_layer = keras.layers.GRU(units, - reset_after=True) - output = gru_layer(inputs) - gru_model = keras.models.Model(inputs, output) - gru_model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - gru_model.fit(x_train, y_train) - gru_model.predict(x_train) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='MIOpen only supports packed input output') - def test_with_masking_layer_gru(self): - layer_class = keras.layers.GRU - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(layer_class(units=5, return_sequences=True, unroll=False)) - model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='MIOpen only supports packed input output') - def test_statefulness_gru(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer_class = keras.layers.GRU - - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - 4, - embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class( - units, return_sequences=False, stateful=True, weights=None) - model.add(layer) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - out1 = model.predict(np.ones((num_samples, timesteps))) - self.assertEqual(out1.shape, (num_samples, units)) - - # train once so that the states change - model.train_on_batch( - np.ones((num_samples, timesteps)), np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out2.max(), out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - np.testing.assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out4.max(), out5.max()) - - # Check masking - layer.reset_states() - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - layer.reset_states() - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - np.testing.assert_allclose(out7, out6, atol=1e-5) - - def test_get_initial_states(self): - batch_size = 4 - cell = keras.layers.GRUCell(20) - initial_state = cell.get_initial_state( - batch_size=batch_size, dtype=tf.float32) - _, state = cell(np.ones((batch_size, 20), dtype=np.float32), initial_state) - self.assertEqual(state.shape, initial_state.shape) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_return_sequences_gru(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={"units": units, "return_sequences": True}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Double type is not yet supported in ROCm", + ) + @test_utils.run_v2_only + def test_float64_gru(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={ + "units": units, + "return_sequences": True, + "dtype": "float64", + }, + input_shape=(num_samples, timesteps, embedding_dim), + input_dtype="float64", + ) + + def test_dynamic_behavior_gru(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer = keras.layers.GRU(units, input_shape=(None, embedding_dim)) + model = keras.models.Sequential() + model.add(layer) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + x = np.random.random((num_samples, timesteps, embedding_dim)) + y = np.random.random((num_samples, units)) + model.train_on_batch(x, y) + + def test_dropout_gru(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={"units": units, "dropout": 0.1, "recurrent_dropout": 0.1}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_recurrent_dropout_with_implementation_restriction(self): + layer = keras.layers.GRU(2, recurrent_dropout=0.1, implementation=2) + # The implementation is force to 1 due to the limit of + # recurrent_dropout. + self.assertEqual(layer.implementation, 1) + + @test_utils.run_v2_only + def test_dropout_variable_name(self): + layer = keras.layers.RNN( + keras.layers.GRUCell(2, dropout=0.1, force_generator=True) + ) + layer(np.random.random((2, 3, 4))) + self.assertEqual( + layer.cell._random_generator._generator._state_var.name, + "rnn/gru_cell/StateVar:0", + ) + + layer = keras.layers.GRU(2, dropout=0.1, force_generator=True) + layer(np.random.random((2, 3, 4))) + self.assertEqual( + layer._random_generator._generator._state_var.name, + "gru/StateVar:0", + ) + + @parameterized.parameters([0, 1, 2]) + def test_implementation_mode_gru(self, implementation_mode): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.GRU, + kwargs={"units": units, "implementation": implementation_mode}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_reset_after_gru(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=num_samples, + test_samples=0, + input_shape=(timesteps, embedding_dim), + num_classes=units, + ) + y_train = np_utils.to_categorical(y_train, units) + + inputs = keras.layers.Input(shape=[timesteps, embedding_dim]) + gru_layer = keras.layers.GRU(units, reset_after=True) + output = gru_layer(inputs) + gru_model = keras.models.Model(inputs, output) + gru_model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + gru_model.fit(x_train, y_train) + gru_model.predict(x_train) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="MIOpen only supports packed input output", + ) + def test_with_masking_layer_gru(self): + layer_class = keras.layers.GRU + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(layer_class(units=5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="MIOpen only supports packed input output", + ) + def test_statefulness_gru(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer_class = keras.layers.GRU + + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + 4, + embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps), + ) + ) + layer = layer_class( + units, return_sequences=False, stateful=True, weights=None + ) + model.add(layer) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + out1 = model.predict(np.ones((num_samples, timesteps))) + self.assertEqual(out1.shape, (num_samples, units)) + + # train once so that the states change + model.train_on_batch( + np.ones((num_samples, timesteps)), np.ones((num_samples, units)) + ) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out2.max(), out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + np.testing.assert_allclose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out4.max(), out5.max()) + + # Check masking + layer.reset_states() + + left_padded_input = np.ones((num_samples, timesteps)) + left_padded_input[0, :1] = 0 + left_padded_input[1, :2] = 0 + out6 = model.predict(left_padded_input) + + layer.reset_states() + + right_padded_input = np.ones((num_samples, timesteps)) + right_padded_input[0, -1:] = 0 + right_padded_input[1, -2:] = 0 + out7 = model.predict(right_padded_input) + + np.testing.assert_allclose(out7, out6, atol=1e-5) + + def test_get_initial_states(self): + batch_size = 4 + cell = keras.layers.GRUCell(20) + initial_state = cell.get_initial_state( + batch_size=batch_size, dtype=tf.float32 + ) + _, state = cell( + np.ones((batch_size, 20), dtype=np.float32), initial_state + ) + self.assertEqual(state.shape, initial_state.shape) + + @test_utils.run_v2_only + def test_cloned_weight_names(self): + inp = keras.Input([None, 3]) + rnn = keras.layers.GRU(units=3) + model = keras.Model(inp, rnn(inp)) + clone = keras.models.clone_model(model) + + model_names = [x.name for x in model.weights] + clone_names = [x.name for x in clone.weights] + self.assertEqual(model_names, clone_names) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class GRULayerGenericTest(tf.test.TestCase): - - def test_constraints_gru(self): - embedding_dim = 4 - layer_class = keras.layers.GRU - k_constraint = keras.constraints.max_norm(0.01) - r_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_constraint=k_constraint, - recurrent_constraint=r_constraint, - bias_constraint=b_constraint) - layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) - - def test_from_config_gru(self): - layer_class = keras.layers.GRU - for stateful in (False, True): - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - def test_deep_copy_gru(self): - cell = keras.layers.GRUCell(5) - copied_cell = copy.deepcopy(cell) - self.assertEqual(copied_cell.units, 5) - self.assertEqual(cell.get_config(), copied_cell.get_config()) - - def test_regularizers_gru(self): - embedding_dim = 4 - layer_class = keras.layers.GRU - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2', - activity_regularizer='l1') - layer.build((None, None, 2)) - self.assertLen(layer.losses, 3) - - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - if tf.executing_eagerly(): - self.assertLen(layer.losses, 4) - else: - self.assertLen(layer.get_losses_for(x), 1) - - -if __name__ == '__main__': - tf.test.main() + def test_constraints_gru(self): + embedding_dim = 4 + layer_class = keras.layers.GRU + k_constraint = keras.constraints.max_norm(0.01) + r_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_constraint=k_constraint, + recurrent_constraint=r_constraint, + bias_constraint=b_constraint, + ) + layer.build((None, None, embedding_dim)) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) + + def test_from_config_gru(self): + layer_class = keras.layers.GRU + for stateful in (False, True): + l1 = layer_class(units=1, stateful=stateful) + l2 = layer_class.from_config(l1.get_config()) + assert l1.get_config() == l2.get_config() + + def test_deep_copy_gru(self): + cell = keras.layers.GRUCell(5) + copied_cell = copy.deepcopy(cell) + self.assertEqual(copied_cell.units, 5) + self.assertEqual(cell.get_config(), copied_cell.get_config()) + + def test_regularizers_gru(self): + embedding_dim = 4 + layer_class = keras.layers.GRU + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_regularizer=keras.regularizers.l1(0.01), + recurrent_regularizer=keras.regularizers.l1(0.01), + bias_regularizer="l2", + activity_regularizer="l1", + ) + layer.build((None, None, 2)) + self.assertLen(layer.losses, 3) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + if tf.executing_eagerly(): + self.assertLen(layer.losses, 4) + else: + self.assertLen(layer.get_losses_for(x), 1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/gru_v1.py b/keras/layers/rnn/gru_v1.py index eba9493c2f6f..f6b458c6f8f1 100644 --- a/keras/layers/rnn/gru_v1.py +++ b/keras/layers/rnn/gru_v1.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Gated Recurrent Unit V1 layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import activations from keras import constraints @@ -24,372 +24,381 @@ from keras.layers.rnn import rnn_utils from keras.layers.rnn.base_rnn import RNN +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export(v1=['keras.layers.GRUCell']) +@keras_export(v1=["keras.layers.GRUCell"]) class GRUCell(gru.GRUCell): - """Cell class for the GRU layer. - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - kernel_constraint: Constraint function applied to - the `kernel` weights matrix. - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - dropout: Float between 0 and 1. - Fraction of the units to drop for the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - reset_after: GRU convention (whether to apply reset gate after or - before matrix multiplication). False = "before" (default), - True = "after" (cuDNN compatible). - - Call arguments: - inputs: A 2D tensor. - states: List of state tensors corresponding to the previous timestep. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. Only relevant when `dropout` or - `recurrent_dropout` is used. - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - reset_after=False, - **kwargs): - super().__init__( + """Cell class for the GRU layer. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + reset_after: GRU convention (whether to apply reset gate after or + before matrix multiplication). False = "before" (default), + True = "after" (cuDNN compatible). + + Call arguments: + inputs: A 2D tensor. + states: List of state tensors corresponding to the previous timestep. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. Only relevant when `dropout` or + `recurrent_dropout` is used. + """ + + def __init__( + self, units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=kwargs.pop('implementation', 1), - reset_after=reset_after, - **kwargs) - - -@keras_export(v1=['keras.layers.GRU']) + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + reset_after=False, + **kwargs + ): + super().__init__( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=kwargs.pop("implementation", 1), + reset_after=reset_after, + **kwargs + ) + + +@keras_export(v1=["keras.layers.GRU"]) class GRU(RNN): - """Gated Recurrent Unit - Cho et al. 2014. - - There are two variants. The default one is based on 1406.1078v3 and - has reset gate applied to hidden state before matrix multiplication. The - other one is based on original 1406.1078v1 and has the order reversed. - - The second variant is compatible with CuDNNGRU (GPU-only) and allows - inference on CPU. Thus it has separate biases for `kernel` and - `recurrent_kernel`. Use `'reset_after'=True` and - `recurrent_activation='sigmoid'`. - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. - kernel_constraint: Constraint function applied to - the `kernel` weights matrix. - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - time_major: The shape format of the `inputs` and `outputs` tensors. - If True, the inputs and outputs will be in shape - `(timesteps, batch, ...)`, whereas in the False case, it will be - `(batch, timesteps, ...)`. Using `time_major = True` is a bit more - efficient because it avoids transposes at the beginning and end of the - RNN calculation. However, most TensorFlow data is batch-major, so by - default this function accepts input and emits output in batch-major - form. - reset_after: GRU convention (whether to apply reset gate after or - before matrix multiplication). False = "before" (default), - True = "after" (cuDNN compatible). - - Call arguments: - inputs: A 3D tensor. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether - a given timestep should be masked. An individual `True` entry indicates - that the corresponding timestep should be utilized, while a `False` - entry indicates that the corresponding timestep should be ignored. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or - `recurrent_dropout` is used. - initial_state: List of initial state tensors to be passed to the first - call of the cell. - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - reset_after=False, - **kwargs): - implementation = kwargs.pop('implementation', 1) - if implementation == 0: - logging.warning('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - if 'enable_caching_device' in kwargs: - cell_kwargs = {'enable_caching_device': - kwargs.pop('enable_caching_device')} - else: - cell_kwargs = {} - cell = GRUCell( + """Gated Recurrent Unit - Cho et al. 2014. + + There are two variants. The default one is based on 1406.1078v3 and + has reset gate applied to hidden state before matrix multiplication. The + other one is based on original 1406.1078v1 and has the order reversed. + + The second variant is compatible with CuDNNGRU (GPU-only) and allows + inference on CPU. Thus it has separate biases for `kernel` and + `recurrent_kernel`. Use `'reset_after'=True` and + `recurrent_activation='sigmoid'`. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation").. + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + time_major: The shape format of the `inputs` and `outputs` tensors. + If True, the inputs and outputs will be in shape + `(timesteps, batch, ...)`, whereas in the False case, it will be + `(batch, timesteps, ...)`. Using `time_major = True` is a bit more + efficient because it avoids transposes at the beginning and end of the + RNN calculation. However, most TensorFlow data is batch-major, so by + default this function accepts input and emits output in batch-major + form. + reset_after: GRU convention (whether to apply reset gate after or + before matrix multiplication). False = "before" (default), + True = "after" (cuDNN compatible). + + Call arguments: + inputs: A 3D tensor. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether + a given timestep should be masked. An individual `True` entry indicates + that the corresponding timestep should be utilized, while a `False` + entry indicates that the corresponding timestep should be ignored. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` is used. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + """ + + def __init__( + self, units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation, - reset_after=reset_after, - dtype=kwargs.get('dtype'), - trainable=kwargs.get('trainable', True), - **cell_kwargs) - super().__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.input_spec = [InputSpec(ndim=3)] - - def call(self, inputs, mask=None, training=None, initial_state=None): - return super().call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - - @property - def reset_after(self): - return self.cell.reset_after - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation, - 'reset_after': - self.reset_after - } - config.update(rnn_utils.config_for_enable_caching_device(self.cell)) - base_config = super().get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + reset_after=False, + **kwargs + ): + implementation = kwargs.pop("implementation", 1) + if implementation == 0: + logging.warning( + "`implementation=0` has been deprecated, " + "and now defaults to `implementation=1`." + "Please update your layer call." + ) + if "enable_caching_device" in kwargs: + cell_kwargs = { + "enable_caching_device": kwargs.pop("enable_caching_device") + } + else: + cell_kwargs = {} + cell = GRUCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation, + reset_after=reset_after, + dtype=kwargs.get("dtype"), + trainable=kwargs.get("trainable", True), + name="gru_cell", + **cell_kwargs + ) + super().__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs + ) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.input_spec = [InputSpec(ndim=3)] + + def call(self, inputs, mask=None, training=None, initial_state=None): + return super().call( + inputs, mask=mask, training=training, initial_state=initial_state + ) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + + @property + def reset_after(self): + return self.cell.reset_after + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + "implementation": self.implementation, + "reset_after": self.reset_after, + } + config.update(rnn_utils.config_for_enable_caching_device(self.cell)) + base_config = super().get_config() + del base_config["cell"] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if "implementation" in config and config["implementation"] == 0: + config["implementation"] = 1 + return cls(**config) diff --git a/keras/layers/rnn/gru_v1_test.py b/keras/layers/rnn/gru_v1_test.py index 88df22c88a1b..84f6e375f859 100644 --- a/keras/layers/rnn/gru_v1_test.py +++ b/keras/layers/rnn/gru_v1_test.py @@ -15,18 +15,17 @@ """Tests for GRU V1 layer.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized +from tensorflow.core.protobuf import rewriter_config_pb2 + import keras from keras.layers.rnn import gru from keras.layers.rnn import gru_v1 from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.core.protobuf import rewriter_config_pb2 - # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() @@ -36,125 +35,136 @@ _config = tf.compat.v1.ConfigProto(graph_options=_graph_options) -@test_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU') +@test_utils.run_all_without_tensor_float_32("RNN GRU can use TF32 on GPU") @test_combinations.run_all_keras_modes(config=_config) class GRUGraphRewriteTest(test_combinations.TestCase): - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_gru_feature_parity_v1_v2(self): - input_shape = 10 - rnn_state_size = 8 - timestep = 4 - batch = 20 - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=rnn_state_size, - random_seed=87654321) - y_train = np_utils.to_categorical(y_train, rnn_state_size) - # For the last batch item of the test data, we filter out the last - # timestep to simulate the variable length sequence and masking test. - x_train[-2:, -1, :] = 0.0 - y_train[-2:] = 0 - - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - masked_input = keras.layers.Masking()(inputs) - gru_layer = gru_v1.GRU(rnn_state_size, - recurrent_activation='sigmoid', - reset_after=True) - output = gru_layer(masked_input) - gru_model = keras.models.Model(inputs, output) - weights = gru_model.get_weights() - y_1 = gru_model.predict(x_train) - gru_model.compile('rmsprop', 'mse') - gru_model.fit(x_train, y_train) - y_2 = gru_model.predict(x_train) - - with test_utils.device(should_use_gpu=True): - cudnn_layer = gru.GRU(rnn_state_size, - recurrent_activation='sigmoid', - reset_after=True) - cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) - cudnn_model.set_weights(weights) - y_3 = cudnn_model.predict(x_train) - cudnn_model.compile('rmsprop', 'mse') - cudnn_model.fit(x_train, y_train) - y_4 = cudnn_model.predict(x_train) - - self.assertAllClose(y_1, y_3, rtol=2e-5, atol=2e-5) - self.assertAllClose(y_2, y_4, rtol=2e-5, atol=2e-5) - - @parameterized.named_parameters( - # test_name, time_major, go_backwards - ('normal', False, False), - ('time_major', True, False), - ('go_backwards', False, True), - ('both', True, True), - ) - def test_time_major_and_go_backward_v1_v2(self, time_major, go_backwards): - input_shape = 10 - rnn_state_size = 8 - timestep = 4 - batch = 100 - - x_train = np.random.random((batch, timestep, input_shape)) - - def build_model(layer_cls): - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - layer = layer_cls(rnn_state_size, - recurrent_activation='sigmoid', - time_major=time_major, - return_sequences=True, - go_backwards=go_backwards, - reset_after=True) - if time_major: - converted_input = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(inputs) - outputs = layer(converted_input) - outputs = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(outputs) - else: - outputs = layer(inputs) - return keras.models.Model(inputs, outputs) - - gru_model = build_model(gru_v1.GRU) - y_ref = gru_model.predict(x_train) - weights = gru_model.get_weights() - - gru_v2_model = build_model(gru.GRU) - gru_v2_model.set_weights(weights) - y = gru_v2_model.predict(x_train) - - self.assertAllClose(y, y_ref) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_explicit_device_with_go_backward_and_mask_v1(self): - batch_size = 8 - timestep = 7 - masksteps = 5 - units = 4 - - inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) - mask = np.ones((batch_size, timestep)).astype(np.bool) - mask[:, masksteps:] = 0 - - gru_layer = gru_v1.GRU( - units, return_sequences=True, go_backwards=True) - with test_utils.device(should_use_gpu=True): - outputs_masked = gru_layer(inputs, mask=tf.constant(mask)) - outputs_trimmed = gru_layer(inputs[:, :masksteps]) - self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) - - -if __name__ == '__main__': - tf.test.main() + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_gru_feature_parity_v1_v2(self): + input_shape = 10 + rnn_state_size = 8 + timestep = 4 + batch = 20 + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=rnn_state_size, + random_seed=87654321, + ) + y_train = np_utils.to_categorical(y_train, rnn_state_size) + # For the last batch item of the test data, we filter out the last + # timestep to simulate the variable length sequence and masking test. + x_train[-2:, -1, :] = 0.0 + y_train[-2:] = 0 + + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) + masked_input = keras.layers.Masking()(inputs) + gru_layer = gru_v1.GRU( + rnn_state_size, recurrent_activation="sigmoid", reset_after=True + ) + output = gru_layer(masked_input) + gru_model = keras.models.Model(inputs, output) + weights = gru_model.get_weights() + y_1 = gru_model.predict(x_train) + gru_model.compile("rmsprop", "mse") + gru_model.fit(x_train, y_train) + y_2 = gru_model.predict(x_train) + + with test_utils.device(should_use_gpu=True): + cudnn_layer = gru.GRU( + rnn_state_size, recurrent_activation="sigmoid", reset_after=True + ) + cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) + cudnn_model.set_weights(weights) + y_3 = cudnn_model.predict(x_train) + cudnn_model.compile("rmsprop", "mse") + cudnn_model.fit(x_train, y_train) + y_4 = cudnn_model.predict(x_train) + + self.assertAllClose(y_1, y_3, rtol=2e-5, atol=2e-5) + self.assertAllClose(y_2, y_4, rtol=2e-5, atol=2e-5) + + @parameterized.named_parameters( + # test_name, time_major, go_backwards + ("normal", False, False), + ("time_major", True, False), + ("go_backwards", False, True), + ("both", True, True), + ) + def test_time_major_and_go_backward_v1_v2(self, time_major, go_backwards): + input_shape = 10 + rnn_state_size = 8 + timestep = 4 + batch = 100 + + x_train = np.random.random((batch, timestep, input_shape)) + + def build_model(layer_cls): + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) + layer = layer_cls( + rnn_state_size, + recurrent_activation="sigmoid", + time_major=time_major, + return_sequences=True, + go_backwards=go_backwards, + reset_after=True, + ) + if time_major: + converted_input = keras.layers.Lambda( + lambda t: tf.transpose(t, [1, 0, 2]) + )(inputs) + outputs = layer(converted_input) + outputs = keras.layers.Lambda( + lambda t: tf.transpose(t, [1, 0, 2]) + )(outputs) + else: + outputs = layer(inputs) + return keras.models.Model(inputs, outputs) + + gru_model = build_model(gru_v1.GRU) + y_ref = gru_model.predict(x_train) + weights = gru_model.get_weights() + + gru_v2_model = build_model(gru.GRU) + gru_v2_model.set_weights(weights) + y = gru_v2_model.predict(x_train) + + self.assertAllClose(y, y_ref) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_explicit_device_with_go_backward_and_mask_v1(self): + batch_size = 8 + timestep = 7 + masksteps = 5 + units = 4 + + inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) + mask = np.ones((batch_size, timestep)).astype(bool) + mask[:, masksteps:] = 0 + + gru_layer = gru_v1.GRU(units, return_sequences=True, go_backwards=True) + with test_utils.device(should_use_gpu=True): + outputs_masked = gru_layer(inputs, mask=tf.constant(mask)) + outputs_trimmed = gru_layer(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/legacy_cell_wrappers.py b/keras/layers/rnn/legacy_cell_wrappers.py index 4847c73e1887..ebdbd399c63a 100644 --- a/keras/layers/rnn/legacy_cell_wrappers.py +++ b/keras/layers/rnn/legacy_cell_wrappers.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Module implementing the V1 version of RNN cell wrappers.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from __future__ import absolute_import from __future__ import division @@ -22,549 +22,647 @@ import hashlib import numbers +import tensorflow.compat.v2 as tf + from keras.layers.rnn.cell_wrappers import _enumerated_map_structure_up_to from keras.layers.rnn.cell_wrappers import _parse_config_to_function from keras.layers.rnn.cell_wrappers import _serialize_function_to_config from keras.layers.rnn.legacy_cells import RNNCell -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export from tensorflow.python.util.tf_export import tf_export - # This can be used with self.assertRaisesRegexp for assert_like_rnncell. ASSERT_LIKE_RNNCELL_ERROR_REGEXP = "is not an RNNCell" def _hasattr(obj, attr_name): - try: - getattr(obj, attr_name) - except AttributeError: - return False - else: - return True + try: + getattr(obj, attr_name) + except AttributeError: + return False + else: + return True def assert_like_rnncell(cell_name, cell): - """Raises a TypeError if cell is not like an RNNCell. - - NOTE: Do not rely on the error message (in particular in tests) which can be - subject to change to increase readability. Use - ASSERT_LIKE_RNNCELL_ERROR_REGEXP. - - Args: - cell_name: A string to give a meaningful error referencing to the name of - the functionargument. - cell: The object which should behave like an RNNCell. - - Raises: - TypeError: A human-friendly exception. - """ - conditions = [ - _hasattr(cell, "output_size"), - _hasattr(cell, "state_size"), - _hasattr(cell, "get_initial_state") or _hasattr(cell, "zero_state"), - callable(cell), - ] - errors = [ - "'output_size' property is missing", "'state_size' property is missing", - "either 'zero_state' or 'get_initial_state' method is required", - "is not callable" - ] - - if not all(conditions): - - errors = [error for error, cond in zip(errors, conditions) if not cond] - raise TypeError("The argument {!r} ({}) is not an RNNCell: {}.".format( - cell_name, cell, ", ".join(errors))) - - -class _RNNCellWrapperV1(RNNCell): - """Base class for cells wrappers V1 compatibility. - - This class along with `_RNNCellWrapperV2` allows to define cells wrappers that - are compatible with V1 and V2, and defines helper methods for this purpose. - """ - - def __init__(self, cell, *args, **kwargs): - super().__init__(*args, **kwargs) - assert_like_rnncell("cell", cell) - self.cell = cell - if isinstance(cell, tf.__internal__.tracking.Trackable): - self._track_trackable(self.cell, name="cell") - - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Calls the wrapped cell and performs the wrapping logic. + """Raises a TypeError if cell is not like an RNNCell. - This method is called from the wrapper's `call` or `__call__` methods. + NOTE: Do not rely on the error message (in particular in tests) which can be + subject to change to increase readability. Use + ASSERT_LIKE_RNNCELL_ERROR_REGEXP. Args: - inputs: A tensor with wrapped cell's input. - state: A tensor or tuple of tensors with wrapped cell's state. - cell_call_fn: Wrapped cell's method to use for step computation (cell's - `__call__` or 'call' method). - **kwargs: Additional arguments. - - Returns: - A pair containing: - - Output: A tensor with cell's output. - - New state: A tensor or tuple of tensors with new wrapped cell's state. - """ - raise NotImplementedError - - def __call__(self, inputs, state, scope=None): - """Runs the RNN cell step computation. + cell_name: A string to give a meaningful error referencing to the name of + the functionargument. + cell: The object which should behave like an RNNCell. - We assume that the wrapped RNNCell is being built within its `__call__` - method. We directly use the wrapped cell's `__call__` in the overridden - wrapper `__call__` method. - - This allows to use the wrapped cell and the non-wrapped cell equivalently - when using `__call__`. + Raises: + TypeError: A human-friendly exception. + """ + conditions = [ + _hasattr(cell, "output_size"), + _hasattr(cell, "state_size"), + _hasattr(cell, "get_initial_state") or _hasattr(cell, "zero_state"), + callable(cell), + ] + errors = [ + "'output_size' property is missing", + "'state_size' property is missing", + "either 'zero_state' or 'get_initial_state' method is required", + "is not callable", + ] + + if not all(conditions): + + errors = [error for error, cond in zip(errors, conditions) if not cond] + raise TypeError( + "The argument {!r} ({}) is not an RNNCell: {}.".format( + cell_name, cell, ", ".join(errors) + ) + ) - Args: - inputs: A tensor with wrapped cell's input. - state: A tensor or tuple of tensors with wrapped cell's state. - scope: VariableScope for the subgraph created in the wrapped cells' - `__call__`. - Returns: - A pair containing: +class _RNNCellWrapperV1(RNNCell): + """Base class for cells wrappers V1 compatibility. - - Output: A tensor with cell's output. - - New state: A tensor or tuple of tensors with new wrapped cell's state. + This class along with `_RNNCellWrapperV2` allows to define cells wrappers + that are compatible with V1 and V2, and defines helper methods for this + purpose. """ - return self._call_wrapped_cell( - inputs, state, cell_call_fn=self.cell.__call__, scope=scope) - - @property - def state_size(self): - return self.cell.state_size - - @property - def output_size(self): - return self.cell.output_size - - def zero_state(self, batch_size, dtype): - with tf.name_scope(type(self).__name__ + "ZeroState"): - return self.cell.zero_state(batch_size, dtype) - - def get_config(self): - config = { - "cell": { - "class_name": self.cell.__class__.__name__, - "config": self.cell.get_config() - }, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() - cell = config.pop("cell") - try: - assert_like_rnncell("cell", cell) - return cls(cell, **config) - except TypeError: - raise ValueError("RNNCellWrapper cannot reconstruct the wrapped cell. " - "Please overwrite the cell in the config with a RNNCell " - "instance.") + + def __init__(self, cell, *args, **kwargs): + super().__init__(*args, **kwargs) + assert_like_rnncell("cell", cell) + self.cell = cell + if isinstance(cell, tf.__internal__.tracking.Trackable): + self._track_trackable(self.cell, name="cell") + + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Calls the wrapped cell and performs the wrapping logic. + + This method is called from the wrapper's `call` or `__call__` methods. + + Args: + inputs: A tensor with wrapped cell's input. + state: A tensor or tuple of tensors with wrapped cell's state. + cell_call_fn: Wrapped cell's method to use for step computation + (cell's `__call__` or 'call' method). + **kwargs: Additional arguments. + + Returns: + A pair containing: + - Output: A tensor with cell's output. + - New state: A tensor or tuple of tensors with new wrapped cell's + state. + """ + raise NotImplementedError + + def __call__(self, inputs, state, scope=None): + """Runs the RNN cell step computation. + + We assume that the wrapped RNNCell is being built within its `__call__` + method. We directly use the wrapped cell's `__call__` in the overridden + wrapper `__call__` method. + + This allows to use the wrapped cell and the non-wrapped cell + equivalently when using `__call__`. + + Args: + inputs: A tensor with wrapped cell's input. + state: A tensor or tuple of tensors with wrapped cell's state. + scope: VariableScope for the subgraph created in the wrapped cells' + `__call__`. + + Returns: + A pair containing: + + - Output: A tensor with cell's output. + - New state: A tensor or tuple of tensors with new wrapped cell's + state. + """ + return self._call_wrapped_cell( + inputs, state, cell_call_fn=self.cell.__call__, scope=scope + ) + + @property + def state_size(self): + return self.cell.state_size + + @property + def output_size(self): + return self.cell.output_size + + def zero_state(self, batch_size, dtype): + with tf.name_scope(type(self).__name__ + "ZeroState"): + return self.cell.zero_state(batch_size, dtype) + + def get_config(self): + config = { + "cell": { + "class_name": self.cell.__class__.__name__, + "config": self.cell.get_config(), + }, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() + cell = config.pop("cell") + try: + assert_like_rnncell("cell", cell) + return cls(cell, **config) + except TypeError: + raise ValueError( + "RNNCellWrapper cannot reconstruct the wrapped cell. " + "Please overwrite the cell in the config with a RNNCell " + "instance." + ) @keras_export(v1=["keras.__internal__.legacy.rnn_cell.DropoutWrapper"]) @tf_export(v1=["nn.rnn_cell.DropoutWrapper"]) class DropoutWrapper(_RNNCellWrapperV1): - """Operator adding dropout to inputs and outputs of the given cell.""" - - def __init__(self, - cell, - input_keep_prob=1.0, - output_keep_prob=1.0, - state_keep_prob=1.0, - variational_recurrent=False, - input_size=None, - dtype=None, - seed=None, - dropout_state_filter_visitor=None, - **kwargs): - """Create a cell with added input, state, and/or output dropout. - - If `variational_recurrent` is set to `True` (**NOT** the default behavior), - then the same dropout mask is applied at every step, as described in: - [A Theoretically Grounded Application of Dropout in Recurrent - Neural Networks. Y. Gal, Z. Ghahramani](https://arxiv.org/abs/1512.05287). - - Otherwise a different dropout mask is applied at every time step. - - Note, by default (unless a custom `dropout_state_filter` is provided), - the memory state (`c` component of any `LSTMStateTuple`) passing through - a `DropoutWrapper` is never modified. This behavior is described in the - above article. - - Args: - cell: an RNNCell, a projection to output_size is added to it. - input_keep_prob: unit Tensor or float between 0 and 1, input keep - probability; if it is constant and 1, no input dropout will be added. - output_keep_prob: unit Tensor or float between 0 and 1, output keep - probability; if it is constant and 1, no output dropout will be added. - state_keep_prob: unit Tensor or float between 0 and 1, output keep - probability; if it is constant and 1, no output dropout will be added. - State dropout is performed on the outgoing states of the cell. **Note** - the state components to which dropout is applied when `state_keep_prob` - is in `(0, 1)` are also determined by the argument - `dropout_state_filter_visitor` (e.g. by default dropout is never applied - to the `c` component of an `LSTMStateTuple`). - variational_recurrent: Python bool. If `True`, then the same dropout - pattern is applied across all time steps per run call. If this parameter - is set, `input_size` **must** be provided. - input_size: (optional) (possibly nested tuple of) `TensorShape` objects - containing the depth(s) of the input tensors expected to be passed in to - the `DropoutWrapper`. Required and used **iff** `variational_recurrent - = True` and `input_keep_prob < 1`. - dtype: (optional) The `dtype` of the input, state, and output tensors. - Required and used **iff** `variational_recurrent = True`. - seed: (optional) integer, the randomness seed. - dropout_state_filter_visitor: (optional), default: (see below). Function - that takes any hierarchical level of the state and returns a scalar or - depth=1 structure of Python booleans describing which terms in the state - should be dropped out. In addition, if the function returns `True`, - dropout is applied across this sublevel. If the function returns - `False`, dropout is not applied across this entire sublevel. - Default behavior: perform dropout on all terms except the memory (`c`) - state of `LSTMCellState` objects, and don't try to apply dropout to - `TensorArray` objects: ``` - def dropout_state_filter_visitor(s): - if isinstance(s, LSTMCellState): # Never perform dropout on the c - state. return LSTMCellState(c=False, h=True) - elif isinstance(s, TensorArray): return False return True ``` - **kwargs: dict of keyword arguments for base layer. - - Raises: - TypeError: if `cell` is not an `RNNCell`, or `keep_state_fn` is provided - but not `callable`. - ValueError: if any of the keep_probs are not between 0 and 1. - """ - super().__init__(cell, dtype=dtype, **kwargs) - - if (dropout_state_filter_visitor is not None and - not callable(dropout_state_filter_visitor)): - raise TypeError("dropout_state_filter_visitor must be callable. " - f"Received: {dropout_state_filter_visitor}") - self._dropout_state_filter = ( - dropout_state_filter_visitor or _default_dropout_state_filter_visitor) - with tf.name_scope("DropoutWrapperInit"): - - def tensor_and_const_value(v): - tensor_value = tf.convert_to_tensor(v) - const_value = tf.get_static_value(tensor_value) - return (tensor_value, const_value) - - for prob, attr in [(input_keep_prob, "input_keep_prob"), - (state_keep_prob, "state_keep_prob"), - (output_keep_prob, "output_keep_prob")]: - tensor_prob, const_prob = tensor_and_const_value(prob) - if const_prob is not None: - if const_prob < 0 or const_prob > 1: - raise ValueError(f"Parameter {attr} must be between 0 and 1. " - f"Received {const_prob}") - setattr(self, "_%s" % attr, float(const_prob)) + """Operator adding dropout to inputs and outputs of the given cell.""" + + def __init__( + self, + cell, + input_keep_prob=1.0, + output_keep_prob=1.0, + state_keep_prob=1.0, + variational_recurrent=False, + input_size=None, + dtype=None, + seed=None, + dropout_state_filter_visitor=None, + **kwargs, + ): + """Create a cell with added input, state, and/or output dropout. + + If `variational_recurrent` is set to `True` (**NOT** the default + behavior), then the same dropout mask is applied at every step, as + described in: [A Theoretically Grounded Application of Dropout in + Recurrent Neural Networks. Y. Gal, Z. + Ghahramani](https://arxiv.org/abs/1512.05287). + + Otherwise a different dropout mask is applied at every time step. + + Note, by default (unless a custom `dropout_state_filter` is provided), + the memory state (`c` component of any `LSTMStateTuple`) passing through + a `DropoutWrapper` is never modified. This behavior is described in the + above article. + + Args: + cell: an RNNCell, a projection to output_size is added to it. + input_keep_prob: unit Tensor or float between 0 and 1, input keep + probability; if it is constant and 1, no input dropout will be + added. + output_keep_prob: unit Tensor or float between 0 and 1, output keep + probability; if it is constant and 1, no output dropout will be + added. + state_keep_prob: unit Tensor or float between 0 and 1, output keep + probability; if it is constant and 1, no output dropout will be + added. State dropout is performed on the outgoing states of the + cell. **Note** the state components to which dropout is applied when + `state_keep_prob` is in `(0, 1)` are also determined by the argument + `dropout_state_filter_visitor` (e.g. by default dropout is never + applied to the `c` component of an `LSTMStateTuple`). + variational_recurrent: Python bool. If `True`, then the same dropout + pattern is applied across all time steps per run call. If this + parameter is set, `input_size` **must** be provided. + input_size: (optional) (possibly nested tuple of) `TensorShape` + objects containing the depth(s) of the input tensors expected to be + passed in to the `DropoutWrapper`. Required and used **iff** + `variational_recurrent = True` and `input_keep_prob < 1`. + dtype: (optional) The `dtype` of the input, state, and output tensors. + Required and used **iff** `variational_recurrent = True`. + seed: (optional) integer, the randomness seed. + dropout_state_filter_visitor: (optional), default: (see below). + Function that takes any hierarchical level of the state and returns + a scalar or depth=1 structure of Python booleans describing which + terms in the state should be dropped out. In addition, if the + function returns `True`, dropout is applied across this sublevel. + If the function returns `False`, dropout is not applied across this + entire sublevel. Default behavior: perform dropout on all terms + except the memory (`c`) state of `LSTMCellState` objects, and don't + try to apply dropout to `TensorArray` objects: + ``` + def dropout_state_filter_visitor(s): + # Never perform dropout on the c state. + if isinstance(s, LSTMCellState): + return LSTMCellState(c=False, h=True) + elif isinstance(s, TensorArray): + return False + return True + ``` + **kwargs: dict of keyword arguments for base layer. + + Raises: + TypeError: if `cell` is not an `RNNCell`, or `keep_state_fn` is + provided but not `callable`. + ValueError: if any of the keep_probs are not between 0 and 1. + """ + super().__init__(cell, dtype=dtype, **kwargs) + + if dropout_state_filter_visitor is not None and not callable( + dropout_state_filter_visitor + ): + raise TypeError( + "dropout_state_filter_visitor must be callable. " + f"Received: {dropout_state_filter_visitor}" + ) + self._dropout_state_filter = ( + dropout_state_filter_visitor + or _default_dropout_state_filter_visitor + ) + with tf.name_scope("DropoutWrapperInit"): + + def tensor_and_const_value(v): + tensor_value = tf.convert_to_tensor(v) + const_value = tf.get_static_value(tensor_value) + return (tensor_value, const_value) + + for prob, attr in [ + (input_keep_prob, "input_keep_prob"), + (state_keep_prob, "state_keep_prob"), + (output_keep_prob, "output_keep_prob"), + ]: + tensor_prob, const_prob = tensor_and_const_value(prob) + if const_prob is not None: + if const_prob < 0 or const_prob > 1: + raise ValueError( + f"Parameter {attr} must be between 0 and 1. " + f"Received {const_prob}" + ) + setattr(self, f"_{attr}", float(const_prob)) + else: + setattr(self, f"_{attr}", tensor_prob) + + # Set variational_recurrent, seed before running the code below + self._variational_recurrent = variational_recurrent + self._input_size = input_size + self._seed = seed + + self._recurrent_input_noise = None + self._recurrent_state_noise = None + self._recurrent_output_noise = None + + if variational_recurrent: + if dtype is None: + raise ValueError( + "When variational_recurrent=True, dtype must be provided" + ) + + def convert_to_batch_shape(s): + # Prepend a 1 for the batch dimension; for recurrent + # variational dropout we use the same dropout mask for all + # batch elements. + return tf.concat(([1], tf.TensorShape(s).as_list()), 0) + + def batch_noise(s, inner_seed): + shape = convert_to_batch_shape(s) + return tf.random.uniform(shape, seed=inner_seed, dtype=dtype) + + if ( + not isinstance(self._input_keep_prob, numbers.Real) + or self._input_keep_prob < 1.0 + ): + if input_size is None: + raise ValueError( + "When variational_recurrent=True and input_keep_prob " + "< 1.0 or is unknown, input_size must be provided" + ) + self._recurrent_input_noise = _enumerated_map_structure_up_to( + input_size, + lambda i, s: batch_noise( + s, inner_seed=self._gen_seed("input", i) + ), + input_size, + ) + self._recurrent_state_noise = _enumerated_map_structure_up_to( + cell.state_size, + lambda i, s: batch_noise( + s, inner_seed=self._gen_seed("state", i) + ), + cell.state_size, + ) + self._recurrent_output_noise = _enumerated_map_structure_up_to( + cell.output_size, + lambda i, s: batch_noise( + s, inner_seed=self._gen_seed("output", i) + ), + cell.output_size, + ) + + def _gen_seed(self, salt_prefix, index): + if self._seed is None: + return None + salt = "%s_%d" % (salt_prefix, index) + string = (str(self._seed) + salt).encode("utf-8") + return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF + + @property + def wrapped_cell(self): + return self.cell + + def build(self, inputs_shape): + self.cell.build(inputs_shape) + self.built = True + + def _variational_recurrent_dropout_value( + self, unused_index, value, noise, keep_prob + ): + """Performs dropout given the pre-calculated noise tensor.""" + # uniform [keep_prob, 1.0 + keep_prob) + random_tensor = keep_prob + noise + + # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) + binary_tensor = tf.floor(random_tensor) + ret = tf.divide(value, keep_prob) * binary_tensor + ret.set_shape(value.get_shape()) + return ret + + def _dropout( + self, + values, + salt_prefix, + recurrent_noise, + keep_prob, + shallow_filtered_substructure=None, + ): + """Decides whether to perform standard dropout or recurrent dropout.""" + + if shallow_filtered_substructure is None: + # Put something so we traverse the entire structure; inside the + # dropout function we check to see if leafs of this are bool or not. + shallow_filtered_substructure = values + + if not self._variational_recurrent: + + def dropout(i, do_dropout, v): + if not isinstance(do_dropout, bool) or do_dropout: + return tf.nn.dropout( + v, + rate=1.0 - keep_prob, + seed=self._gen_seed(salt_prefix, i), + ) + else: + return v + + return _enumerated_map_structure_up_to( + shallow_filtered_substructure, + dropout, + *[shallow_filtered_substructure, values], + ) else: - setattr(self, "_%s" % attr, tensor_prob) - - # Set variational_recurrent, seed before running the code below - self._variational_recurrent = variational_recurrent - self._input_size = input_size - self._seed = seed - - self._recurrent_input_noise = None - self._recurrent_state_noise = None - self._recurrent_output_noise = None - - if variational_recurrent: - if dtype is None: - raise ValueError( - "When variational_recurrent=True, dtype must be provided") - - def convert_to_batch_shape(s): - # Prepend a 1 for the batch dimension; for recurrent - # variational dropout we use the same dropout mask for all - # batch elements. - return tf.concat(([1], tf.TensorShape(s).as_list()), 0) - - def batch_noise(s, inner_seed): - shape = convert_to_batch_shape(s) - return tf.random.uniform(shape, seed=inner_seed, dtype=dtype) - - if (not isinstance(self._input_keep_prob, numbers.Real) or - self._input_keep_prob < 1.0): - if input_size is None: - raise ValueError( - "When variational_recurrent=True and input_keep_prob < 1.0 or " - "is unknown, input_size must be provided") - self._recurrent_input_noise = _enumerated_map_structure_up_to( - input_size, - lambda i, s: batch_noise(s, inner_seed=self._gen_seed("input", i)), - input_size) - self._recurrent_state_noise = _enumerated_map_structure_up_to( - cell.state_size, - lambda i, s: batch_noise(s, inner_seed=self._gen_seed("state", i)), - cell.state_size) - self._recurrent_output_noise = _enumerated_map_structure_up_to( - cell.output_size, - lambda i, s: batch_noise(s, inner_seed=self._gen_seed("output", i)), - cell.output_size) - - def _gen_seed(self, salt_prefix, index): - if self._seed is None: - return None - salt = "%s_%d" % (salt_prefix, index) - string = (str(self._seed) + salt).encode("utf-8") - return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF - - @property - def wrapped_cell(self): - return self.cell - - def build(self, inputs_shape): - self.cell.build(inputs_shape) - self.built = True - - def _variational_recurrent_dropout_value( - self, unused_index, value, noise, keep_prob): - """Performs dropout given the pre-calculated noise tensor.""" - # uniform [keep_prob, 1.0 + keep_prob) - random_tensor = keep_prob + noise - - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - binary_tensor = tf.floor(random_tensor) - ret = tf.divide(value, keep_prob) * binary_tensor - ret.set_shape(value.get_shape()) - return ret - - def _dropout(self, - values, - salt_prefix, - recurrent_noise, - keep_prob, - shallow_filtered_substructure=None): - """Decides whether to perform standard dropout or recurrent dropout.""" - - if shallow_filtered_substructure is None: - # Put something so we traverse the entire structure; inside the - # dropout function we check to see if leafs of this are bool or not. - shallow_filtered_substructure = values - - if not self._variational_recurrent: - - def dropout(i, do_dropout, v): - if not isinstance(do_dropout, bool) or do_dropout: - return tf.nn.dropout( - v, rate=1. - keep_prob, seed=self._gen_seed(salt_prefix, i)) - else: - return v - - return _enumerated_map_structure_up_to( - shallow_filtered_substructure, dropout, - *[shallow_filtered_substructure, values]) - else: - - def dropout(i, do_dropout, v, n): - if not isinstance(do_dropout, bool) or do_dropout: - return self._variational_recurrent_dropout_value(i, v, n, keep_prob) - else: - return v - - return _enumerated_map_structure_up_to( - shallow_filtered_substructure, dropout, - *[shallow_filtered_substructure, values, recurrent_noise]) - - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Runs the wrapped cell and applies dropout. - Args: - inputs: A tensor with wrapped cell's input. - state: A tensor or tuple of tensors with wrapped cell's state. - cell_call_fn: Wrapped cell's method to use for step computation (cell's - `__call__` or 'call' method). - **kwargs: Additional arguments. - - Returns: - A pair containing: - - - Output: A tensor with cell's output. - - New state: A tensor or tuple of tensors with new wrapped cell's state. - """ - - def _should_dropout(p): - return (not isinstance(p, float)) or p < 1 - - if _should_dropout(self._input_keep_prob): - inputs = self._dropout(inputs, "input", self._recurrent_input_noise, - self._input_keep_prob) - output, new_state = cell_call_fn(inputs, state, **kwargs) - if _should_dropout(self._state_keep_prob): - # Identify which subsets of the state to perform dropout on and - # which ones to keep. - shallow_filtered_substructure = tf.__internal__.nest.get_traverse_shallow_structure( - self._dropout_state_filter, new_state) - new_state = self._dropout(new_state, "state", self._recurrent_state_noise, - self._state_keep_prob, - shallow_filtered_substructure) - if _should_dropout(self._output_keep_prob): - output = self._dropout(output, "output", self._recurrent_output_noise, - self._output_keep_prob) - return output, new_state - - def get_config(self): - """Returns the config of the dropout wrapper.""" - config = { - "input_keep_prob": self._input_keep_prob, - "output_keep_prob": self._output_keep_prob, - "state_keep_prob": self._state_keep_prob, - "variational_recurrent": self._variational_recurrent, - "input_size": self._input_size, - "seed": self._seed, - } - if self._dropout_state_filter != _default_dropout_state_filter_visitor: # pylint: disable=comparison-with-callable - function, function_type, function_module = _serialize_function_to_config( - self._dropout_state_filter) - config.update({"dropout_fn": function, - "dropout_fn_type": function_type, - "dropout_fn_module": function_module}) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - if "dropout_fn" in config: - config = config.copy() - dropout_state_filter = _parse_config_to_function( - config, custom_objects, "dropout_fn", "dropout_fn_type", - "dropout_fn_module") - config.pop("dropout_fn") - config["dropout_state_filter_visitor"] = dropout_state_filter - return super(DropoutWrapper, cls).from_config( - config, custom_objects=custom_objects) + def dropout(i, do_dropout, v, n): + if not isinstance(do_dropout, bool) or do_dropout: + return self._variational_recurrent_dropout_value( + i, v, n, keep_prob + ) + else: + return v + + return _enumerated_map_structure_up_to( + shallow_filtered_substructure, + dropout, + *[shallow_filtered_substructure, values, recurrent_noise], + ) + + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Runs the wrapped cell and applies dropout. + + Args: + inputs: A tensor with wrapped cell's input. + state: A tensor or tuple of tensors with wrapped cell's state. + cell_call_fn: Wrapped cell's method to use for step computation + (cell's `__call__` or 'call' method). + **kwargs: Additional arguments. + + Returns: + A pair containing: + + - Output: A tensor with cell's output. + - New state: A tensor or tuple of tensors with new wrapped cell's + state. + """ + + def _should_dropout(p): + return (not isinstance(p, float)) or p < 1 + + if _should_dropout(self._input_keep_prob): + inputs = self._dropout( + inputs, + "input", + self._recurrent_input_noise, + self._input_keep_prob, + ) + output, new_state = cell_call_fn(inputs, state, **kwargs) + if _should_dropout(self._state_keep_prob): + # Identify which subsets of the state to perform dropout on and + # which ones to keep. + shallow_filtered_substructure = ( + tf.__internal__.nest.get_traverse_shallow_structure( + self._dropout_state_filter, new_state + ) + ) + new_state = self._dropout( + new_state, + "state", + self._recurrent_state_noise, + self._state_keep_prob, + shallow_filtered_substructure, + ) + if _should_dropout(self._output_keep_prob): + output = self._dropout( + output, + "output", + self._recurrent_output_noise, + self._output_keep_prob, + ) + return output, new_state + + def get_config(self): + """Returns the config of the dropout wrapper.""" + config = { + "input_keep_prob": self._input_keep_prob, + "output_keep_prob": self._output_keep_prob, + "state_keep_prob": self._state_keep_prob, + "variational_recurrent": self._variational_recurrent, + "input_size": self._input_size, + "seed": self._seed, + } + if self._dropout_state_filter != _default_dropout_state_filter_visitor: + ( + function, + function_type, + function_module, + ) = _serialize_function_to_config(self._dropout_state_filter) + config.update( + { + "dropout_fn": function, + "dropout_fn_type": function_type, + "dropout_fn_module": function_module, + } + ) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + if "dropout_fn" in config: + config = config.copy() + dropout_state_filter = _parse_config_to_function( + config, + custom_objects, + "dropout_fn", + "dropout_fn_type", + "dropout_fn_module", + ) + config.pop("dropout_fn") + config["dropout_state_filter_visitor"] = dropout_state_filter + return super(DropoutWrapper, cls).from_config( + config, custom_objects=custom_objects + ) @keras_export(v1=["keras.__internal__.legacy.rnn_cell.ResidualWrapper"]) @tf_export(v1=["nn.rnn_cell.ResidualWrapper"]) class ResidualWrapper(_RNNCellWrapperV1): - """RNNCell wrapper that ensures cell inputs are added to the outputs.""" - - def __init__(self, cell, residual_fn=None, **kwargs): - """Constructs a `ResidualWrapper` for `cell`. - - Args: - cell: An instance of `RNNCell`. - residual_fn: (Optional) The function to map raw cell inputs and raw cell - outputs to the actual cell outputs of the residual network. - Defaults to calling nest.map_structure on (lambda i, o: i + o), inputs - and outputs. - **kwargs: dict of keyword arguments for base layer. - """ - super().__init__(cell, **kwargs) - self._residual_fn = residual_fn - - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Run the cell and then apply the residual_fn on its inputs to its outputs. - - Args: - inputs: cell inputs. - state: cell state. - cell_call_fn: Wrapped cell's method to use for step computation (cell's - `__call__` or 'call' method). - **kwargs: Additional arguments passed to the wrapped cell's `call`. - - Returns: - Tuple of cell outputs and new state. - - Raises: - TypeError: If cell inputs and outputs have different structure (type). - ValueError: If cell inputs and outputs have different structure (value). - """ - outputs, new_state = cell_call_fn(inputs, state, **kwargs) - - # Ensure shapes match - def assert_shape_match(inp, out): - inp.get_shape().assert_is_compatible_with(out.get_shape()) - - def default_residual_fn(inputs, outputs): - tf.nest.assert_same_structure(inputs, outputs) - tf.nest.map_structure(assert_shape_match, inputs, outputs) - return tf.nest.map_structure(lambda inp, out: inp + out, inputs, outputs) - - res_outputs = (self._residual_fn or default_residual_fn)(inputs, outputs) - return (res_outputs, new_state) - - def get_config(self): - """Returns the config of the residual wrapper.""" - if self._residual_fn is not None: - function, function_type, function_module = _serialize_function_to_config( - self._residual_fn) - config = { - "residual_fn": function, - "residual_fn_type": function_type, - "residual_fn_module": function_module - } - else: - config = {} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - if "residual_fn" in config: - config = config.copy() - residual_function = _parse_config_to_function(config, custom_objects, - "residual_fn", - "residual_fn_type", - "residual_fn_module") - config["residual_fn"] = residual_function - return super(ResidualWrapper, cls).from_config( - config, custom_objects=custom_objects) + """RNNCell wrapper that ensures cell inputs are added to the outputs.""" + + def __init__(self, cell, residual_fn=None, **kwargs): + """Constructs a `ResidualWrapper` for `cell`. + + Args: + cell: An instance of `RNNCell`. + residual_fn: (Optional) The function to map raw cell inputs and raw + cell outputs to the actual cell outputs of the residual network. + Defaults to calling nest.map_structure on (lambda i, o: i + o), + inputs and outputs. + **kwargs: dict of keyword arguments for base layer. + """ + super().__init__(cell, **kwargs) + self._residual_fn = residual_fn + + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Run the cell and apply the residual_fn. + + Args: + inputs: cell inputs. + state: cell state. + cell_call_fn: Wrapped cell's method to use for step computation + (cell's `__call__` or 'call' method). + **kwargs: Additional arguments passed to the wrapped cell's `call`. + + Returns: + Tuple of cell outputs and new state. + + Raises: + TypeError: If cell inputs and outputs have different structure (type). + ValueError: If cell inputs and outputs have different structure + (value). + """ + outputs, new_state = cell_call_fn(inputs, state, **kwargs) + + # Ensure shapes match + def assert_shape_match(inp, out): + inp.get_shape().assert_is_compatible_with(out.get_shape()) + + def default_residual_fn(inputs, outputs): + tf.nest.assert_same_structure(inputs, outputs) + tf.nest.map_structure(assert_shape_match, inputs, outputs) + return tf.nest.map_structure( + lambda inp, out: inp + out, inputs, outputs + ) + + res_outputs = (self._residual_fn or default_residual_fn)( + inputs, outputs + ) + return (res_outputs, new_state) + + def get_config(self): + """Returns the config of the residual wrapper.""" + if self._residual_fn is not None: + ( + function, + function_type, + function_module, + ) = _serialize_function_to_config(self._residual_fn) + config = { + "residual_fn": function, + "residual_fn_type": function_type, + "residual_fn_module": function_module, + } + else: + config = {} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + if "residual_fn" in config: + config = config.copy() + residual_function = _parse_config_to_function( + config, + custom_objects, + "residual_fn", + "residual_fn_type", + "residual_fn_module", + ) + config["residual_fn"] = residual_function + return super(ResidualWrapper, cls).from_config( + config, custom_objects=custom_objects + ) @keras_export(v1=["keras.__internal__.legacy.rnn_cell.DeviceWrapper"]) @tf_export(v1=["nn.rnn_cell.DeviceWrapper"]) class DeviceWrapper(_RNNCellWrapperV1): - """Operator that ensures an RNNCell runs on a particular device.""" + """Operator that ensures an RNNCell runs on a particular device.""" - def __init__(self, cell, device, **kwargs): - """Construct a `DeviceWrapper` for `cell` with device `device`. + def __init__(self, cell, device, **kwargs): + """Construct a `DeviceWrapper` for `cell` with device `device`. - Ensures the wrapped `cell` is called with `tf.device(device)`. + Ensures the wrapped `cell` is called with `tf.device(device)`. - Args: - cell: An instance of `RNNCell`. - device: A device string or function, for passing to `tf.device`. - **kwargs: dict of keyword arguments for base layer. - """ - super().__init__(cell, **kwargs) - self._device = device + Args: + cell: An instance of `RNNCell`. + device: A device string or function, for passing to `tf.device`. + **kwargs: dict of keyword arguments for base layer. + """ + super().__init__(cell, **kwargs) + self._device = device - def zero_state(self, batch_size, dtype): - with tf.name_scope(type(self).__name__ + "ZeroState"): - with tf.compat.v1.device(self._device): - return self.cell.zero_state(batch_size, dtype) + def zero_state(self, batch_size, dtype): + with tf.name_scope(type(self).__name__ + "ZeroState"): + with tf.compat.v1.device(self._device): + return self.cell.zero_state(batch_size, dtype) - def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): - """Run the cell on specified device.""" - with tf.compat.v1.device(self._device): - return cell_call_fn(inputs, state, **kwargs) + def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): + """Run the cell on specified device.""" + with tf.compat.v1.device(self._device): + return cell_call_fn(inputs, state, **kwargs) - def get_config(self): - config = {"device": self._device} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = {"device": self._device} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) def _default_dropout_state_filter_visitor(substate): - from keras.layers.rnn.legacy_cells import LSTMStateTuple # pylint: disable=g-import-not-at-top - if isinstance(substate, LSTMStateTuple): - # Do not perform dropout on the memory state. - return LSTMStateTuple(c=False, h=True) - elif isinstance(substate, tf.TensorArray): - return False - return True + from keras.layers.rnn.legacy_cells import ( + LSTMStateTuple, + ) + + if isinstance(substate, LSTMStateTuple): + # Do not perform dropout on the memory state. + return LSTMStateTuple(c=False, h=True) + elif isinstance(substate, tf.TensorArray): + return False + return True diff --git a/keras/layers/rnn/legacy_cell_wrappers_test.py b/keras/layers/rnn/legacy_cell_wrappers_test.py index 8e04fad275fe..f9bf3040e70b 100644 --- a/keras/layers/rnn/legacy_cell_wrappers_test.py +++ b/keras/layers/rnn/legacy_cell_wrappers_test.py @@ -14,24 +14,27 @@ # ============================================================================== """Tests for RNN cell wrappers v1 implementation.""" +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras.layers.rnn import legacy_cell_wrappers from keras.layers.rnn import legacy_cells from keras.testing_infra import test_combinations -import tensorflow.compat.v2 as tf @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class RNNCellWrapperV1Test(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - legacy_cell_wrappers.DropoutWrapper, legacy_cell_wrappers.ResidualWrapper - ]) - def testWrapperKerasStyle(self, wrapper): - """Tests if wrapper cell is instantiated in keras style scope.""" - wrapped_cell = wrapper(legacy_cells.BasicRNNCell(1)) - self.assertFalse(wrapped_cell._keras_style) + @parameterized.parameters( + [ + legacy_cell_wrappers.DropoutWrapper, + legacy_cell_wrappers.ResidualWrapper, + ] + ) + def testWrapperKerasStyle(self, wrapper): + """Tests if wrapper cell is instantiated in keras style scope.""" + wrapped_cell = wrapper(legacy_cells.BasicRNNCell(1)) + self.assertFalse(wrapped_cell._keras_style) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/layers/rnn/legacy_cells.py b/keras/layers/rnn/legacy_cells.py index 05c601c460d6..ca2431cb67a9 100644 --- a/keras/layers/rnn/legacy_cells.py +++ b/keras/layers/rnn/legacy_cells.py @@ -20,7 +20,7 @@ Constructing multi-layer cells is supported by the class `MultiRNNCell`, or by calling the `rnn` ops several times. """ -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from __future__ import absolute_import from __future__ import division @@ -29,6 +29,8 @@ import collections import warnings +import tensorflow.compat.v2 as tf + from keras import activations from keras import backend from keras import initializers @@ -36,555 +38,605 @@ from keras.engine import input_spec from keras.legacy_tf_layers import base as base_layer from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export from tensorflow.python.util.tf_export import tf_export - _BIAS_VARIABLE_NAME = "bias" _WEIGHTS_VARIABLE_NAME = "kernel" def _hasattr(obj, attr_name): - try: - getattr(obj, attr_name) - except AttributeError: - return False - else: - return True + try: + getattr(obj, attr_name) + except AttributeError: + return False + else: + return True def _concat(prefix, suffix, static=False): - """Concat that enables int, Tensor, or TensorShape values. - - This function takes a size specification, which can be an integer, a - TensorShape, or a Tensor, and converts it into a concatenated Tensor - (if static = False) or a list of integers (if static = True). - - Args: - prefix: The prefix; usually the batch size (and/or time step size). - (TensorShape, int, or Tensor.) - suffix: TensorShape, int, or Tensor. - static: If `True`, return a python list with possibly unknown dimensions. - Otherwise return a `Tensor`. - - Returns: - shape: the concatenation of prefix and suffix. - - Raises: - ValueError: if `suffix` is not a scalar or vector (or TensorShape). - ValueError: if prefix or suffix was `None` and asked for dynamic - Tensors out. - """ - if isinstance(prefix, tf.Tensor): - p = prefix - p_static = tf.get_static_value(prefix) - if p.shape.ndims == 0: - p = tf.compat.v1.expand_dims(p, 0) - elif p.shape.ndims != 1: - raise ValueError( - "Prefix tensor must be either a scalar or vector, " - f"but received tensor: {p}") - else: - p = tf.TensorShape(prefix) - p_static = p.as_list() if p.ndims is not None else None - p = ( - tf.constant(p.as_list(), dtype=tf.int32) - if p.is_fully_defined() else None) - if isinstance(suffix, tf.Tensor): - s = suffix - s_static = tf.get_static_value(suffix) - if s.shape.ndims == 0: - s = tf.compat.v1.expand_dims(s, 0) - elif s.shape.ndims != 1: - raise ValueError("suffix tensor must be either a scalar or vector, " - f"but received tensor: {s}") - else: - s = tf.TensorShape(suffix) - s_static = s.as_list() if s.ndims is not None else None - s = ( - tf.constant(s.as_list(), dtype=tf.int32) - if s.is_fully_defined() else None) - - if static: - shape = tf.TensorShape(p_static).concatenate(s_static) - shape = shape.as_list() if shape.ndims is not None else None - else: - if p is None or s is None: - raise ValueError( - "Prefix or suffix can't be None. " - f"Received prefix = {prefix} and suffix = {suffix}") - shape = tf.concat((p, s), 0) - return shape + """Concat that enables int, Tensor, or TensorShape values. + + This function takes a size specification, which can be an integer, a + TensorShape, or a Tensor, and converts it into a concatenated Tensor + (if static = False) or a list of integers (if static = True). + + Args: + prefix: The prefix; usually the batch size (and/or time step size). + (TensorShape, int, or Tensor.) + suffix: TensorShape, int, or Tensor. + static: If `True`, return a python list with possibly unknown dimensions. + Otherwise return a `Tensor`. + + Returns: + shape: the concatenation of prefix and suffix. + + Raises: + ValueError: if `suffix` is not a scalar or vector (or TensorShape). + ValueError: if prefix or suffix was `None` and asked for dynamic + Tensors out. + """ + if isinstance(prefix, tf.Tensor): + p = prefix + p_static = tf.get_static_value(prefix) + if p.shape.ndims == 0: + p = tf.compat.v1.expand_dims(p, 0) + elif p.shape.ndims != 1: + raise ValueError( + "Prefix tensor must be either a scalar or vector, " + f"but received tensor: {p}" + ) + else: + p = tf.TensorShape(prefix) + p_static = p.as_list() if p.ndims is not None else None + p = ( + tf.constant(p.as_list(), dtype=tf.int32) + if p.is_fully_defined() + else None + ) + if isinstance(suffix, tf.Tensor): + s = suffix + s_static = tf.get_static_value(suffix) + if s.shape.ndims == 0: + s = tf.compat.v1.expand_dims(s, 0) + elif s.shape.ndims != 1: + raise ValueError( + "suffix tensor must be either a scalar or vector, " + f"but received tensor: {s}" + ) + else: + s = tf.TensorShape(suffix) + s_static = s.as_list() if s.ndims is not None else None + s = ( + tf.constant(s.as_list(), dtype=tf.int32) + if s.is_fully_defined() + else None + ) + + if static: + shape = tf.TensorShape(p_static).concatenate(s_static) + shape = shape.as_list() if shape.ndims is not None else None + else: + if p is None or s is None: + raise ValueError( + "Prefix or suffix can't be None. " + f"Received prefix = {prefix} and suffix = {suffix}" + ) + shape = tf.concat((p, s), 0) + return shape def _zero_state_tensors(state_size, batch_size, dtype): - """Create tensors of zeros based on state_size, batch_size, and dtype.""" + """Create tensors of zeros based on state_size, batch_size, and dtype.""" - def get_state_shape(s): - """Combine s with batch_size to get a proper tensor shape.""" - c = _concat(batch_size, s) - size = tf.zeros(c, dtype=dtype) - if not tf.executing_eagerly(): - c_static = _concat(batch_size, s, static=True) - size.set_shape(c_static) - return size + def get_state_shape(s): + """Combine s with batch_size to get a proper tensor shape.""" + c = _concat(batch_size, s) + size = tf.zeros(c, dtype=dtype) + if not tf.executing_eagerly(): + c_static = _concat(batch_size, s, static=True) + size.set_shape(c_static) + return size - return tf.nest.map_structure(get_state_shape, state_size) + return tf.nest.map_structure(get_state_shape, state_size) @keras_export(v1=["keras.__internal__.legacy.rnn_cell.RNNCell"]) @tf_export(v1=["nn.rnn_cell.RNNCell"]) class RNNCell(base_layer.Layer): - """Abstract object representing an RNN cell. - - Every `RNNCell` must have the properties below and implement `call` with - the signature `(output, next_state) = call(input, state)`. The optional - third input argument, `scope`, is allowed for backwards compatibility - purposes; but should be left off for new subclasses. - - This definition of cell differs from the definition used in the literature. - In the literature, 'cell' refers to an object with a single scalar output. - This definition refers to a horizontal array of such units. - - An RNN cell, in the most abstract setting, is anything that has - a state and performs some operation that takes a matrix of inputs. - This operation results in an output matrix with `self.output_size` columns. - If `self.state_size` is an integer, this operation also results in a new - state matrix with `self.state_size` columns. If `self.state_size` is a - (possibly nested tuple of) TensorShape object(s), then it should return a - matching structure of Tensors having shape `[batch_size].concatenate(s)` - for each `s` in `self.batch_size`. - """ - - def __init__(self, trainable=True, name=None, dtype=None, **kwargs): - super().__init__( - trainable=trainable, name=name, dtype=dtype, **kwargs) - # Attribute that indicates whether the cell is a TF RNN cell, due the slight - # difference between TF and Keras RNN cell. Notably the state is not wrapped - # in a list for TF cell where they are single tensor state, whereas keras - # cell will wrap the state into a list, and call() will have to unwrap them. - self._is_tf_rnn_cell = True - - def __call__(self, inputs, state, scope=None): - """Run this RNN cell on inputs, starting from the given state. + """Abstract object representing an RNN cell. + + Every `RNNCell` must have the properties below and implement `call` with + the signature `(output, next_state) = call(input, state)`. The optional + third input argument, `scope`, is allowed for backwards compatibility + purposes; but should be left off for new subclasses. + + This definition of cell differs from the definition used in the literature. + In the literature, 'cell' refers to an object with a single scalar output. + This definition refers to a horizontal array of such units. + + An RNN cell, in the most abstract setting, is anything that has + a state and performs some operation that takes a matrix of inputs. + This operation results in an output matrix with `self.output_size` columns. + If `self.state_size` is an integer, this operation also results in a new + state matrix with `self.state_size` columns. If `self.state_size` is a + (possibly nested tuple of) TensorShape object(s), then it should return a + matching structure of Tensors having shape `[batch_size].concatenate(s)` + for each `s` in `self.batch_size`. + """ - Args: - inputs: `2-D` tensor with shape `[batch_size, input_size]`. - state: if `self.state_size` is an integer, this should be a `2-D Tensor` - with shape `[batch_size, self.state_size]`. Otherwise, if - `self.state_size` is a tuple of integers, this should be a tuple with - shapes `[batch_size, s] for s in self.state_size`. - scope: VariableScope for the created subgraph; defaults to class name. + def __init__(self, trainable=True, name=None, dtype=None, **kwargs): + super().__init__(trainable=trainable, name=name, dtype=dtype, **kwargs) + # Attribute that indicates whether the cell is a TF RNN cell, due the + # slight difference between TF and Keras RNN cell. Notably the state is + # not wrapped in a list for TF cell where they are single tensor state, + # whereas keras cell will wrap the state into a list, and call() will + # have to unwrap them. + self._is_tf_rnn_cell = True + + def __call__(self, inputs, state, scope=None): + """Run this RNN cell on inputs, starting from the given state. + + Args: + inputs: `2-D` tensor with shape `[batch_size, input_size]`. + state: if `self.state_size` is an integer, this should be a + `2-D Tensor` with shape `[batch_size, self.state_size]`. Otherwise, + if `self.state_size` is a tuple of integers, this should be a tuple + with shapes `[batch_size, s] for s in self.state_size`. + scope: VariableScope for the created subgraph; None uses class name. + Defaults to `None`. + + Returns: + A pair containing: + + - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`. + - New state: Either a single `2-D` tensor, or a tuple of tensors + matching the arity and shapes of `state`. + """ + if scope is not None: + with tf.compat.v1.variable_scope( + scope, custom_getter=self._rnn_get_variable + ) as scope: + return super().__call__(inputs, state, scope=scope) + else: + scope_attrname = "rnncell_scope" + scope = getattr(self, scope_attrname, None) + if scope is None: + scope = tf.compat.v1.variable_scope( + tf.compat.v1.get_variable_scope(), + custom_getter=self._rnn_get_variable, + ) + setattr(self, scope_attrname, scope) + with scope: + return super().__call__(inputs, state) + + def _rnn_get_variable(self, getter, *args, **kwargs): + variable = getter(*args, **kwargs) + if tf.compat.v1.executing_eagerly_outside_functions(): + trainable = variable.trainable + else: + trainable = variable in tf.compat.v1.trainable_variables() or ( + base_layer_utils.is_split_variable(variable) + and list(variable)[0] in tf.compat.v1.trainable_variables() + ) + if trainable and all( + variable is not v for v in self._trainable_weights + ): + self._trainable_weights.append(variable) + elif not trainable and all( + variable is not v for v in self._non_trainable_weights + ): + self._non_trainable_weights.append(variable) + return variable + + @property + def state_size(self): + """size(s) of state(s) used by this cell. + + It can be represented by an Integer, a TensorShape or a tuple of + Integers or TensorShapes. + """ + raise NotImplementedError("Abstract method") + + @property + def output_size(self): + """Integer or TensorShape: size of outputs produced by this cell.""" + raise NotImplementedError("Abstract method") + + def build(self, _): + # This tells the parent Layer object that it's OK to call + # self.add_weight() inside the call() method. + pass + + def get_initial_state(self, inputs=None, batch_size=None, dtype=None): + if inputs is not None: + # Validate the given batch_size and dtype against inputs if + # provided. + inputs = tf.convert_to_tensor(inputs, name="inputs") + if batch_size is not None: + if tf.is_tensor(batch_size): + static_batch_size = tf.get_static_value( + batch_size, partial=True + ) + else: + static_batch_size = batch_size + if inputs.shape.dims[0].value != static_batch_size: + raise ValueError( + "batch size from input tensor is different from the " + "input param. Input tensor batch: " + f"{inputs.shape.dims[0].value}, " + f"batch_size: {batch_size}" + ) + + if dtype is not None and inputs.dtype != dtype: + raise ValueError( + "dtype from input tensor is different from the " + f"input param. Input tensor dtype: {inputs.dtype}, " + f"dtype: {dtype}" + ) + + batch_size = ( + inputs.shape.dims[0].value or tf.compat.v1.shape(inputs)[0] + ) + dtype = inputs.dtype + if batch_size is None or dtype is None: + raise ValueError( + "batch_size and dtype cannot be None while constructing " + f"initial state: batch_size={batch_size}, dtype={dtype}" + ) + return self.zero_state(batch_size, dtype) + + def zero_state(self, batch_size, dtype): + """Return zero-filled state tensor(s). + + Args: + batch_size: int, float, or unit Tensor representing the batch size. + dtype: the data type to use for the state. + + Returns: + If `state_size` is an int or TensorShape, then the return value is a + `N-D` tensor of shape `[batch_size, state_size]` filled with zeros. + + If `state_size` is a nested list or tuple, then the return value is + a nested list or tuple (of the same structure) of `2-D` tensors with + the shapes `[batch_size, s]` for each s in `state_size`. + """ + # Try to use the last cached zero_state. This is done to avoid + # recreating zeros, especially when eager execution is enabled. + state_size = self.state_size + is_eager = tf.executing_eagerly() + if is_eager and _hasattr(self, "_last_zero_state"): + ( + last_state_size, + last_batch_size, + last_dtype, + last_output, + ) = getattr(self, "_last_zero_state") + if ( + last_batch_size == batch_size + and last_dtype == dtype + and last_state_size == state_size + ): + return last_output + with backend.name_scope(type(self).__name__ + "ZeroState"): + output = _zero_state_tensors(state_size, batch_size, dtype) + if is_eager: + self._last_zero_state = (state_size, batch_size, dtype, output) + return output + + def get_config(self): + return super().get_config() + + @property + def _use_input_spec_as_call_signature(self): + # We do not store the shape information for the state argument in the + # call function for legacy RNN cells, so do not generate an input + # signature. + return False - Returns: - A pair containing: - - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`. - - New state: Either a single `2-D` tensor, or a tuple of tensors matching - the arity and shapes of `state`. - """ - if scope is not None: - with tf.compat.v1.variable_scope( - scope, custom_getter=self._rnn_get_variable) as scope: - return super().__call__(inputs, state, scope=scope) - else: - scope_attrname = "rnncell_scope" - scope = getattr(self, scope_attrname, None) - if scope is None: - scope = tf.compat.v1.variable_scope( - tf.compat.v1.get_variable_scope(), - custom_getter=self._rnn_get_variable) - setattr(self, scope_attrname, scope) - with scope: - return super().__call__(inputs, state) - - def _rnn_get_variable(self, getter, *args, **kwargs): - variable = getter(*args, **kwargs) - if tf.compat.v1.executing_eagerly_outside_functions(): - trainable = variable.trainable - else: - trainable = ( - variable in tf.compat.v1.trainable_variables() or - (base_layer_utils.is_split_variable(variable) and - list(variable)[0] in tf.compat.v1.trainable_variables())) - if trainable and all(variable is not v for v in self._trainable_weights): - self._trainable_weights.append(variable) - elif not trainable and all( - variable is not v for v in self._non_trainable_weights): - self._non_trainable_weights.append(variable) - return variable - - @property - def state_size(self): - """size(s) of state(s) used by this cell. - - It can be represented by an Integer, a TensorShape or a tuple of Integers - or TensorShapes. +class LayerRNNCell(RNNCell): + """Subclass of RNNCells that act like proper `tf.Layer` objects. + + For backwards compatibility purposes, most `RNNCell` instances allow their + `call` methods to instantiate variables via `tf.compat.v1.get_variable`. + The underlying variable scope thus keeps track of any variables, and + returning cached versions. This is atypical of `tf.layer` objects, which + separate this part of layer building into a `build` method that is only + called once. + + Here we provide a subclass for `RNNCell` objects that act exactly as + `Layer` objects do. They must provide a `build` method and their + `call` methods do not access Variables `tf.compat.v1.get_variable`. """ - raise NotImplementedError("Abstract method") - - @property - def output_size(self): - """Integer or TensorShape: size of outputs produced by this cell.""" - raise NotImplementedError("Abstract method") - - def build(self, _): - # This tells the parent Layer object that it's OK to call - # self.add_weight() inside the call() method. - pass - - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - if inputs is not None: - # Validate the given batch_size and dtype against inputs if provided. - inputs = tf.convert_to_tensor(inputs, name="inputs") - if batch_size is not None: - if tf.is_tensor(batch_size): - static_batch_size = tf.get_static_value( - batch_size, partial=True) - else: - static_batch_size = batch_size - if inputs.shape.dims[0].value != static_batch_size: - raise ValueError( - "batch size from input tensor is different from the " - f"input param. Input tensor batch: {inputs.shape.dims[0].value}, " - f"batch_size: {batch_size}") - - if dtype is not None and inputs.dtype != dtype: - raise ValueError( - "dtype from input tensor is different from the " - f"input param. Input tensor dtype: {inputs.dtype}, dtype: {dtype}") - batch_size = inputs.shape.dims[0].value or tf.compat.v1.shape(inputs)[0] - dtype = inputs.dtype - if batch_size is None or dtype is None: - raise ValueError( - "batch_size and dtype cannot be None while constructing initial " - f"state: batch_size={batch_size}, dtype={dtype}") - return self.zero_state(batch_size, dtype) + def __call__(self, inputs, state, scope=None, *args, **kwargs): + """Run this RNN cell on inputs, starting from the given state. + + Args: + inputs: `2-D` tensor with shape `[batch_size, input_size]`. + state: if `self.state_size` is an integer, this should be a `2-D + Tensor` with shape `[batch_size, self.state_size]`. Otherwise, if + `self.state_size` is a tuple of integers, this should be a tuple + with shapes `[batch_size, s] for s in self.state_size`. + scope: optional cell scope. + *args: Additional positional arguments. + **kwargs: Additional keyword arguments. + + Returns: + A pair containing: + + - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`. + - New state: Either a single `2-D` tensor, or a tuple of tensors + matching the arity and shapes of `state`. + """ + # Bypass RNNCell's variable capturing semantics for LayerRNNCell. + # Instead, it is up to subclasses to provide a proper build + # method. See the class docstring for more details. + return base_layer.Layer.__call__( + self, inputs, state, scope=scope, *args, **kwargs + ) - def zero_state(self, batch_size, dtype): - """Return zero-filled state tensor(s). - Args: - batch_size: int, float, or unit Tensor representing the batch size. - dtype: the data type to use for the state. +@keras_export(v1=["keras.__internal__.legacy.rnn_cell.BasicRNNCell"]) +@tf_export(v1=["nn.rnn_cell.BasicRNNCell"]) +class BasicRNNCell(LayerRNNCell): + """The most basic RNN cell. - Returns: - If `state_size` is an int or TensorShape, then the return value is a - `N-D` tensor of shape `[batch_size, state_size]` filled with zeros. + Note that this cell is not optimized for performance. - If `state_size` is a nested list or tuple, then the return value is - a nested list or tuple (of the same structure) of `2-D` tensors with - the shapes `[batch_size, s]` for each s in `state_size`. + Args: + num_units: int, The number of units in the RNN cell. + activation: Nonlinearity to use. Default: `tanh`. It could also be string + that is within Keras activation function names. + reuse: (optional) Python boolean describing whether to reuse variables in + an existing scope. If not `True`, and the existing scope already has the + given variables, an error is raised. + name: String, the name of the layer. Layers with the same name will share + weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type of + the first input). Required when `build` is called before `call`. + **kwargs: Dict, keyword named properties for common layer attributes, like + `trainable` etc when constructing the cell from configs of get_config(). """ - # Try to use the last cached zero_state. This is done to avoid recreating - # zeros, especially when eager execution is enabled. - state_size = self.state_size - is_eager = tf.executing_eagerly() - if is_eager and _hasattr(self, "_last_zero_state"): - (last_state_size, last_batch_size, last_dtype, - last_output) = getattr(self, "_last_zero_state") - if (last_batch_size == batch_size and last_dtype == dtype and - last_state_size == state_size): - return last_output - with backend.name_scope(type(self).__name__ + "ZeroState"): - output = _zero_state_tensors(state_size, batch_size, dtype) - if is_eager: - self._last_zero_state = (state_size, batch_size, dtype, output) - return output - - # TODO(b/134773139): Remove when contrib RNN cells implement `get_config` - def get_config(self): # pylint: disable=useless-super-delegation - return super().get_config() - - @property - def _use_input_spec_as_call_signature(self): - # We do not store the shape information for the state argument in the call - # function for legacy RNN cells, so do not generate an input signature. - return False + def __init__( + self, + num_units, + activation=None, + reuse=None, + name=None, + dtype=None, + **kwargs, + ): + warnings.warn( + "`tf.nn.rnn_cell.BasicRNNCell` is deprecated and will be " + "removed in a future version. This class " + "is equivalent as `tf.keras.layers.SimpleRNNCell`, " + "and will be replaced by that in Tensorflow 2.0.", + stacklevel=2, + ) + super().__init__(_reuse=reuse, name=name, dtype=dtype, **kwargs) + _check_supported_dtypes(self.dtype) + if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): + logging.warning( + "%s: Note that this cell is not optimized for performance.", + self, + ) + + # Inputs must be 2-dimensional. + self.input_spec = input_spec.InputSpec(ndim=2) + + self._num_units = num_units + if activation: + self._activation = activations.get(activation) + else: + self._activation = tf.tanh -class LayerRNNCell(RNNCell): - """Subclass of RNNCells that act like proper `tf.Layer` objects. + @property + def state_size(self): + return self._num_units - For backwards compatibility purposes, most `RNNCell` instances allow their - `call` methods to instantiate variables via `tf.compat.v1.get_variable`. The - underlying - variable scope thus keeps track of any variables, and returning cached - versions. This is atypical of `tf.layer` objects, which separate this - part of layer building into a `build` method that is only called once. + @property + def output_size(self): + return self._num_units - Here we provide a subclass for `RNNCell` objects that act exactly as - `Layer` objects do. They must provide a `build` method and their - `call` methods do not access Variables `tf.compat.v1.get_variable`. - """ + @tf_utils.shape_type_conversion + def build(self, inputs_shape): + if inputs_shape[-1] is None: + raise ValueError( + "Expected inputs.shape[-1] to be known, " + f"received shape: {inputs_shape}" + ) + _check_supported_dtypes(self.dtype) + + input_depth = inputs_shape[-1] + self._kernel = self.add_weight( + _WEIGHTS_VARIABLE_NAME, + shape=[input_depth + self._num_units, self._num_units], + ) + self._bias = self.add_weight( + _BIAS_VARIABLE_NAME, + shape=[self._num_units], + initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype), + ) + + self.built = True + + def call(self, inputs, state): + """Most basic RNN: output = new_state = act(W * input + U * state + + B).""" + _check_rnn_cell_input_dtypes([inputs, state]) + gate_inputs = tf.matmul(tf.concat([inputs, state], 1), self._kernel) + gate_inputs = tf.nn.bias_add(gate_inputs, self._bias) + output = self._activation(gate_inputs) + return output, output + + def get_config(self): + config = { + "num_units": self._num_units, + "activation": activations.serialize(self._activation), + "reuse": self._reuse, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - def __call__(self, inputs, state, scope=None, *args, **kwargs): - """Run this RNN cell on inputs, starting from the given state. - Args: - inputs: `2-D` tensor with shape `[batch_size, input_size]`. - state: if `self.state_size` is an integer, this should be a `2-D Tensor` - with shape `[batch_size, self.state_size]`. Otherwise, if - `self.state_size` is a tuple of integers, this should be a tuple with - shapes `[batch_size, s] for s in self.state_size`. - scope: optional cell scope. - *args: Additional positional arguments. - **kwargs: Additional keyword arguments. +@keras_export(v1=["keras.__internal__.legacy.rnn_cell.GRUCell"]) +@tf_export(v1=["nn.rnn_cell.GRUCell"]) +class GRUCell(LayerRNNCell): + """Gated Recurrent Unit cell. - Returns: - A pair containing: + Note that this cell is not optimized for performance. Please use + `tf.compat.v1.keras.layers.CuDNNGRU` for better performance on GPU, or + `tf.raw_ops.GRUBlockCell` for better performance on CPU. - - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`. - - New state: Either a single `2-D` tensor, or a tuple of tensors matching - the arity and shapes of `state`. + Args: + num_units: int, The number of units in the GRU cell. + activation: Nonlinearity to use. Default: `tanh`. + reuse: (optional) Python boolean describing whether to reuse variables in + an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + kernel_initializer: (optional) The initializer to use for the weight and + projection matrices. + bias_initializer: (optional) The initializer to use for the bias. + name: String, the name of the layer. Layers with the same name will share + weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type of + the first input). Required when `build` is called before `call`. + **kwargs: Dict, keyword named properties for common layer attributes, like + `trainable` etc when constructing the cell from configs of get_config(). + References: Learning Phrase Representations using RNN Encoder Decoder + for Statistical Machine Translation: [Cho et al., 2014] + (https://aclanthology.coli.uni-saarland.de/papers/D14-1179/d14-1179) + ([pdf](http://emnlp2014.org/papers/pdf/EMNLP2014179.pdf)) """ - # Bypass RNNCell's variable capturing semantics for LayerRNNCell. - # Instead, it is up to subclasses to provide a proper build - # method. See the class docstring for more details. - return base_layer.Layer.__call__( - self, inputs, state, scope=scope, *args, **kwargs) + def __init__( + self, + num_units, + activation=None, + reuse=None, + kernel_initializer=None, + bias_initializer=None, + name=None, + dtype=None, + **kwargs, + ): + warnings.warn( + "`tf.nn.rnn_cell.GRUCell` is deprecated and will be removed " + "in a future version. This class " + "is equivalent as `tf.keras.layers.GRUCell`, " + "and will be replaced by that in Tensorflow 2.0.", + stacklevel=2, + ) + super().__init__(_reuse=reuse, name=name, dtype=dtype, **kwargs) + _check_supported_dtypes(self.dtype) + + if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): + logging.warning( + "%s: Note that this cell is not optimized for performance. " + "Please use tf.compat.v1.keras.layers.CuDNNGRU for better " + "performance on GPU.", + self, + ) + # Inputs must be 2-dimensional. + self.input_spec = input_spec.InputSpec(ndim=2) + + self._num_units = num_units + if activation: + self._activation = activations.get(activation) + else: + self._activation = tf.tanh + self._kernel_initializer = initializers.get(kernel_initializer) + self._bias_initializer = initializers.get(bias_initializer) -@keras_export(v1=["keras.__internal__.legacy.rnn_cell.BasicRNNCell"]) -@tf_export(v1=["nn.rnn_cell.BasicRNNCell"]) -class BasicRNNCell(LayerRNNCell): - """The most basic RNN cell. - - Note that this cell is not optimized for performance. Please use - `tf.contrib.cudnn_rnn.CudnnRNNTanh` for better performance on GPU. - - Args: - num_units: int, The number of units in the RNN cell. - activation: Nonlinearity to use. Default: `tanh`. It could also be string - that is within Keras activation function names. - reuse: (optional) Python boolean describing whether to reuse variables in an - existing scope. If not `True`, and the existing scope already has the - given variables, an error is raised. - name: String, the name of the layer. Layers with the same name will share - weights, but to avoid mistakes we require reuse=True in such cases. - dtype: Default dtype of the layer (default of `None` means use the type of - the first input). Required when `build` is called before `call`. - **kwargs: Dict, keyword named properties for common layer attributes, like - `trainable` etc when constructing the cell from configs of get_config(). - """ - - def __init__(self, - num_units, - activation=None, - reuse=None, - name=None, - dtype=None, - **kwargs): - warnings.warn( - "`tf.nn.rnn_cell.BasicRNNCell` is deprecated and will be " - "removed in a future version. This class " - "is equivalent as `tf.keras.layers.SimpleRNNCell`, " - "and will be replaced by that in Tensorflow 2.0.", - stacklevel=2) - super().__init__( - _reuse=reuse, name=name, dtype=dtype, **kwargs) - _check_supported_dtypes(self.dtype) - if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): - logging.warning( - "%s: Note that this cell is not optimized for performance. " - "Please use tf.contrib.cudnn_rnn.CudnnRNNTanh for better " - "performance on GPU.", self) - - # Inputs must be 2-dimensional. - self.input_spec = input_spec.InputSpec(ndim=2) - - self._num_units = num_units - if activation: - self._activation = activations.get(activation) - else: - self._activation = tf.tanh - - @property - def state_size(self): - return self._num_units - - @property - def output_size(self): - return self._num_units - - @tf_utils.shape_type_conversion - def build(self, inputs_shape): - if inputs_shape[-1] is None: - raise ValueError( - "Expected inputs.shape[-1] to be known, " - f"received shape: {inputs_shape}") - _check_supported_dtypes(self.dtype) - - input_depth = inputs_shape[-1] - self._kernel = self.add_weight( - _WEIGHTS_VARIABLE_NAME, - shape=[input_depth + self._num_units, self._num_units]) - self._bias = self.add_weight( - _BIAS_VARIABLE_NAME, - shape=[self._num_units], - initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype)) - - self.built = True - - def call(self, inputs, state): - """Most basic RNN: output = new_state = act(W * input + U * state + B).""" - _check_rnn_cell_input_dtypes([inputs, state]) - gate_inputs = tf.matmul( - tf.concat([inputs, state], 1), self._kernel) - gate_inputs = tf.nn.bias_add(gate_inputs, self._bias) - output = self._activation(gate_inputs) - return output, output - - def get_config(self): - config = { - "num_units": self._num_units, - "activation": activations.serialize(self._activation), - "reuse": self._reuse, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + @property + def state_size(self): + return self._num_units + @property + def output_size(self): + return self._num_units -@keras_export(v1=["keras.__internal__.legacy.rnn_cell.GRUCell"]) -@tf_export(v1=["nn.rnn_cell.GRUCell"]) -class GRUCell(LayerRNNCell): - """Gated Recurrent Unit cell. - - Note that this cell is not optimized for performance. Please use - `tf.contrib.cudnn_rnn.CudnnGRU` for better performance on GPU, or - `tf.contrib.rnn.GRUBlockCellV2` for better performance on CPU. - - Args: - num_units: int, The number of units in the GRU cell. - activation: Nonlinearity to use. Default: `tanh`. - reuse: (optional) Python boolean describing whether to reuse variables in an - existing scope. If not `True`, and the existing scope already has the - given variables, an error is raised. - kernel_initializer: (optional) The initializer to use for the weight and - projection matrices. - bias_initializer: (optional) The initializer to use for the bias. - name: String, the name of the layer. Layers with the same name will share - weights, but to avoid mistakes we require reuse=True in such cases. - dtype: Default dtype of the layer (default of `None` means use the type of - the first input). Required when `build` is called before `call`. - **kwargs: Dict, keyword named properties for common layer attributes, like - `trainable` etc when constructing the cell from configs of get_config(). - References: Learning Phrase Representations using RNN Encoder Decoder for - Statistical - Machine Translation: [Cho et al., 2014] - (https://aclanthology.coli.uni-saarland.de/papers/D14-1179/d14-1179) - ([pdf](http://emnlp2014.org/papers/pdf/EMNLP2014179.pdf)) - """ - - def __init__(self, - num_units, - activation=None, - reuse=None, - kernel_initializer=None, - bias_initializer=None, - name=None, - dtype=None, - **kwargs): - warnings.warn( - "`tf.nn.rnn_cell.GRUCell` is deprecated and will be removed " - "in a future version. This class " - "is equivalent as `tf.keras.layers.GRUCell`, " - "and will be replaced by that in Tensorflow 2.0.", - stacklevel=2) - super().__init__( - _reuse=reuse, name=name, dtype=dtype, **kwargs) - _check_supported_dtypes(self.dtype) - - if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): - logging.warning( - "%s: Note that this cell is not optimized for performance. " - "Please use tf.contrib.cudnn_rnn.CudnnGRU for better " - "performance on GPU.", self) - # Inputs must be 2-dimensional. - self.input_spec = input_spec.InputSpec(ndim=2) - - self._num_units = num_units - if activation: - self._activation = activations.get(activation) - else: - self._activation = tf.tanh - self._kernel_initializer = initializers.get(kernel_initializer) - self._bias_initializer = initializers.get(bias_initializer) - - @property - def state_size(self): - return self._num_units - - @property - def output_size(self): - return self._num_units - - @tf_utils.shape_type_conversion - def build(self, inputs_shape): - if inputs_shape[-1] is None: - raise ValueError( - "Expected inputs.shape[-1] to be known, " - f"received shape: {inputs_shape}") - _check_supported_dtypes(self.dtype) - input_depth = inputs_shape[-1] - self._gate_kernel = self.add_weight( - "gates/%s" % _WEIGHTS_VARIABLE_NAME, - shape=[input_depth + self._num_units, 2 * self._num_units], - initializer=self._kernel_initializer) - self._gate_bias = self.add_weight( - "gates/%s" % _BIAS_VARIABLE_NAME, - shape=[2 * self._num_units], - initializer=(self._bias_initializer - if self._bias_initializer is not None else - tf.compat.v1.constant_initializer(1.0, dtype=self.dtype))) - self._candidate_kernel = self.add_weight( - "candidate/%s" % _WEIGHTS_VARIABLE_NAME, - shape=[input_depth + self._num_units, self._num_units], - initializer=self._kernel_initializer) - self._candidate_bias = self.add_weight( - "candidate/%s" % _BIAS_VARIABLE_NAME, - shape=[self._num_units], - initializer=(self._bias_initializer - if self._bias_initializer is not None else - tf.compat.v1.zeros_initializer(dtype=self.dtype))) - - self.built = True - - def call(self, inputs, state): - """Gated recurrent unit (GRU) with nunits cells.""" - _check_rnn_cell_input_dtypes([inputs, state]) - - gate_inputs = tf.matmul( - tf.concat([inputs, state], 1), self._gate_kernel) - gate_inputs = tf.nn.bias_add(gate_inputs, self._gate_bias) - - value = tf.sigmoid(gate_inputs) - r, u = tf.split(value=value, num_or_size_splits=2, axis=1) - - r_state = r * state - - candidate = tf.matmul( - tf.concat([inputs, r_state], 1), self._candidate_kernel) - candidate = tf.nn.bias_add(candidate, self._candidate_bias) - - c = self._activation(candidate) - new_h = u * state + (1 - u) * c - return new_h, new_h - - def get_config(self): - config = { - "num_units": self._num_units, - "kernel_initializer": initializers.serialize(self._kernel_initializer), - "bias_initializer": initializers.serialize(self._bias_initializer), - "activation": activations.serialize(self._activation), - "reuse": self._reuse, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + @tf_utils.shape_type_conversion + def build(self, inputs_shape): + if inputs_shape[-1] is None: + raise ValueError( + "Expected inputs.shape[-1] to be known, " + f"received shape: {inputs_shape}" + ) + _check_supported_dtypes(self.dtype) + input_depth = inputs_shape[-1] + self._gate_kernel = self.add_weight( + f"gates/{_WEIGHTS_VARIABLE_NAME}", + shape=[input_depth + self._num_units, 2 * self._num_units], + initializer=self._kernel_initializer, + ) + self._gate_bias = self.add_weight( + f"gates/{_BIAS_VARIABLE_NAME}", + shape=[2 * self._num_units], + initializer=( + self._bias_initializer + if self._bias_initializer is not None + else tf.compat.v1.constant_initializer(1.0, dtype=self.dtype) + ), + ) + self._candidate_kernel = self.add_weight( + f"candidate/{_WEIGHTS_VARIABLE_NAME}", + shape=[input_depth + self._num_units, self._num_units], + initializer=self._kernel_initializer, + ) + self._candidate_bias = self.add_weight( + f"candidate/{_BIAS_VARIABLE_NAME}", + shape=[self._num_units], + initializer=( + self._bias_initializer + if self._bias_initializer is not None + else tf.compat.v1.zeros_initializer(dtype=self.dtype) + ), + ) + + self.built = True + + def call(self, inputs, state): + """Gated recurrent unit (GRU) with nunits cells.""" + _check_rnn_cell_input_dtypes([inputs, state]) + + gate_inputs = tf.matmul( + tf.concat([inputs, state], 1), self._gate_kernel + ) + gate_inputs = tf.nn.bias_add(gate_inputs, self._gate_bias) + + value = tf.sigmoid(gate_inputs) + r, u = tf.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + candidate = tf.matmul( + tf.concat([inputs, r_state], 1), self._candidate_kernel + ) + candidate = tf.nn.bias_add(candidate, self._candidate_bias) + + c = self._activation(candidate) + new_h = u * state + (1 - u) * c + return new_h, new_h + + def get_config(self): + config = { + "num_units": self._num_units, + "kernel_initializer": initializers.serialize( + self._kernel_initializer + ), + "bias_initializer": initializers.serialize(self._bias_initializer), + "activation": activations.serialize(self._activation), + "reuse": self._reuse, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) _LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h")) @@ -593,638 +645,714 @@ def get_config(self): @keras_export(v1=["keras.__internal__.legacy.rnn_cell.LSTMStateTuple"]) @tf_export(v1=["nn.rnn_cell.LSTMStateTuple"]) class LSTMStateTuple(_LSTMStateTuple): - """Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state. + """Tuple used by LSTM Cells for `state_size`, `zero_state`, & output state. + + Stores two elements: `(c, h)`, in that order. Where `c` is the hidden state + and `h` is the output. - Stores two elements: `(c, h)`, in that order. Where `c` is the hidden state - and `h` is the output. + Only used when `state_is_tuple=True`. + """ - Only used when `state_is_tuple=True`. - """ - __slots__ = () + __slots__ = () - @property - def dtype(self): - (c, h) = self - if c.dtype != h.dtype: - raise TypeError("Inconsistent dtypes for internal state: " - f"{c.dtype} vs {h.dtype}") - return c.dtype + @property + def dtype(self): + (c, h) = self + if c.dtype != h.dtype: + raise TypeError( + "Inconsistent dtypes for internal state: " + f"{c.dtype} vs {h.dtype}" + ) + return c.dtype @keras_export(v1=["keras.__internal__.legacy.rnn_cell.BasicLSTMCell"]) @tf_export(v1=["nn.rnn_cell.BasicLSTMCell"]) class BasicLSTMCell(LayerRNNCell): - """DEPRECATED: Please use `tf.compat.v1.nn.rnn_cell.LSTMCell` instead. + """DEPRECATED: Please use `tf.compat.v1.nn.rnn_cell.LSTMCell` instead. - Basic LSTM recurrent network cell. + Basic LSTM recurrent network cell. - The implementation is based on + The implementation is based on - We add forget_bias (default: 1) to the biases of the forget gate in order to - reduce the scale of forgetting in the beginning of the training. + We add forget_bias (default: 1) to the biases of the forget gate in order to + reduce the scale of forgetting in the beginning of the training. - It does not allow cell clipping, a projection layer, and does not - use peep-hole connections: it is the basic baseline. + It does not allow cell clipping, a projection layer, and does not + use peep-hole connections: it is the basic baseline. - For advanced models, please use the full `tf.compat.v1.nn.rnn_cell.LSTMCell` - that follows. + For advanced models, please use the full `tf.compat.v1.nn.rnn_cell.LSTMCell` + that follows. - Note that this cell is not optimized for performance. Please use - `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or - `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for - better performance on CPU. - """ + Note that this cell is not optimized for performance. Please use + `tf.compat.v1.keras.layers.CuDNNLSTM` for better performance on GPU, or + `tf.raw_ops.LSTMBlockCell` for better performance on CPU. + """ - def __init__(self, - num_units, - forget_bias=1.0, - state_is_tuple=True, - activation=None, - reuse=None, - name=None, - dtype=None, - **kwargs): - """Initialize the basic LSTM cell. + def __init__( + self, + num_units, + forget_bias=1.0, + state_is_tuple=True, + activation=None, + reuse=None, + name=None, + dtype=None, + **kwargs, + ): + """Initialize the basic LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell. + forget_bias: float, The bias added to forget gates (see above). Must + set to `0.0` manually when restoring from CudnnLSTM-trained + checkpoints. + state_is_tuple: If True, accepted and returned states are 2-tuples of + the `c_state` and `m_state`. If False, they are concatenated along + the column axis. The latter behavior will soon be deprecated. + activation: Activation function of the inner states. Default: `tanh`. + It could also be string that is within Keras activation function + names. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already + has the given variables, an error is raised. + name: String, the name of the layer. Layers with the same name will + share weights, but to avoid mistakes we require reuse=True in such + cases. + dtype: Default dtype of the layer (default of `None` means use the + type of the first input). Required when `build` is called before + `call`. + **kwargs: Dict, keyword named properties for common layer attributes, + like `trainable` etc when constructing the cell from configs of + get_config(). When restoring from CudnnLSTM-trained checkpoints, + must use `CudnnCompatibleLSTMCell` instead. + """ + warnings.warn( + "`tf.nn.rnn_cell.BasicLSTMCell` is deprecated and will be " + "removed in a future version. This class " + "is equivalent as `tf.keras.layers.LSTMCell`, " + "and will be replaced by that in Tensorflow 2.0.", + stacklevel=2, + ) + super().__init__(_reuse=reuse, name=name, dtype=dtype, **kwargs) + _check_supported_dtypes(self.dtype) + if not state_is_tuple: + logging.warning( + "%s: Using a concatenated state is slower and will soon be " + "deprecated. Use state_is_tuple=True.", + self, + ) + if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): + logging.warning( + "%s: Note that this cell is not optimized for performance. " + "Please use tf.compat.v1.keras.layers.CuDNNLSTM for better " + "performance on GPU.", + self, + ) + + # Inputs must be 2-dimensional. + self.input_spec = input_spec.InputSpec(ndim=2) + + self._num_units = num_units + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + if activation: + self._activation = activations.get(activation) + else: + self._activation = tf.tanh + + @property + def state_size(self): + return ( + LSTMStateTuple(self._num_units, self._num_units) + if self._state_is_tuple + else 2 * self._num_units + ) + + @property + def output_size(self): + return self._num_units + + @tf_utils.shape_type_conversion + def build(self, inputs_shape): + if inputs_shape[-1] is None: + raise ValueError( + "Expected inputs.shape[-1] to be known, " + f"received shape: {inputs_shape}" + ) + _check_supported_dtypes(self.dtype) + input_depth = inputs_shape[-1] + h_depth = self._num_units + self._kernel = self.add_weight( + _WEIGHTS_VARIABLE_NAME, + shape=[input_depth + h_depth, 4 * self._num_units], + ) + self._bias = self.add_weight( + _BIAS_VARIABLE_NAME, + shape=[4 * self._num_units], + initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype), + ) + + self.built = True + + def call(self, inputs, state): + """Long short-term memory cell (LSTM). + + Args: + inputs: `2-D` tensor with shape `[batch_size, input_size]`. + state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size, + num_units]`, if `state_is_tuple` has been set to `True`. Otherwise, + a `Tensor` shaped `[batch_size, 2 * num_units]`. + + Returns: + A pair containing the new hidden state, and the new state (either a + `LSTMStateTuple` or a concatenated state, depending on + `state_is_tuple`). + """ + _check_rnn_cell_input_dtypes([inputs, state]) + + sigmoid = tf.sigmoid + one = tf.constant(1, dtype=tf.int32) + # Parameters of gates are concatenated into one multiply for efficiency. + if self._state_is_tuple: + c, h = state + else: + c, h = tf.split(value=state, num_or_size_splits=2, axis=one) - Args: - num_units: int, The number of units in the LSTM cell. - forget_bias: float, The bias added to forget gates (see above). Must set - to `0.0` manually when restoring from CudnnLSTM-trained checkpoints. - state_is_tuple: If True, accepted and returned states are 2-tuples of the - `c_state` and `m_state`. If False, they are concatenated along the - column axis. The latter behavior will soon be deprecated. - activation: Activation function of the inner states. Default: `tanh`. It - could also be string that is within Keras activation function names. - reuse: (optional) Python boolean describing whether to reuse variables in - an existing scope. If not `True`, and the existing scope already has - the given variables, an error is raised. - name: String, the name of the layer. Layers with the same name will share - weights, but to avoid mistakes we require reuse=True in such cases. - dtype: Default dtype of the layer (default of `None` means use the type of - the first input). Required when `build` is called before `call`. - **kwargs: Dict, keyword named properties for common layer attributes, like - `trainable` etc when constructing the cell from configs of get_config(). - When restoring from CudnnLSTM-trained checkpoints, must use - `CudnnCompatibleLSTMCell` instead. - """ - warnings.warn( - "`tf.nn.rnn_cell.BasicLSTMCell` is deprecated and will be " - "removed in a future version. This class " - "is equivalent as `tf.keras.layers.LSTMCell`, " - "and will be replaced by that in Tensorflow 2.0.", - stacklevel=2) - super().__init__( - _reuse=reuse, name=name, dtype=dtype, **kwargs) - _check_supported_dtypes(self.dtype) - if not state_is_tuple: - logging.warning( - "%s: Using a concatenated state is slower and will soon be " - "deprecated. Use state_is_tuple=True.", self) - if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): - logging.warning( - "%s: Note that this cell is not optimized for performance. " - "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better " - "performance on GPU.", self) - - # Inputs must be 2-dimensional. - self.input_spec = input_spec.InputSpec(ndim=2) - - self._num_units = num_units - self._forget_bias = forget_bias - self._state_is_tuple = state_is_tuple - if activation: - self._activation = activations.get(activation) - else: - self._activation = tf.tanh - - @property - def state_size(self): - return (LSTMStateTuple(self._num_units, self._num_units) - if self._state_is_tuple else 2 * self._num_units) - - @property - def output_size(self): - return self._num_units - - @tf_utils.shape_type_conversion - def build(self, inputs_shape): - if inputs_shape[-1] is None: - raise ValueError( - "Expected inputs.shape[-1] to be known, " - f"received shape: {inputs_shape}") - _check_supported_dtypes(self.dtype) - input_depth = inputs_shape[-1] - h_depth = self._num_units - self._kernel = self.add_weight( - _WEIGHTS_VARIABLE_NAME, - shape=[input_depth + h_depth, 4 * self._num_units]) - self._bias = self.add_weight( - _BIAS_VARIABLE_NAME, - shape=[4 * self._num_units], - initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype)) - - self.built = True - - def call(self, inputs, state): - """Long short-term memory cell (LSTM). + gate_inputs = tf.matmul(tf.concat([inputs, h], 1), self._kernel) + gate_inputs = tf.nn.bias_add(gate_inputs, self._bias) - Args: - inputs: `2-D` tensor with shape `[batch_size, input_size]`. - state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size, - num_units]`, if `state_is_tuple` has been set to `True`. Otherwise, a - `Tensor` shaped `[batch_size, 2 * num_units]`. + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + i, j, f, o = tf.split(value=gate_inputs, num_or_size_splits=4, axis=one) - Returns: - A pair containing the new hidden state, and the new state (either a - `LSTMStateTuple` or a concatenated state, depending on - `state_is_tuple`). - """ - _check_rnn_cell_input_dtypes([inputs, state]) + forget_bias_tensor = tf.constant(self._forget_bias, dtype=f.dtype) + # Note that using `add` and `multiply` instead of `+` and `*` gives a + # performance improvement. So using those at the cost of readability. + add = tf.add + multiply = tf.multiply + new_c = add( + multiply(c, sigmoid(add(f, forget_bias_tensor))), + multiply(sigmoid(i), self._activation(j)), + ) + new_h = multiply(self._activation(new_c), sigmoid(o)) - sigmoid = tf.sigmoid - one = tf.constant(1, dtype=tf.int32) - # Parameters of gates are concatenated into one multiply for efficiency. - if self._state_is_tuple: - c, h = state - else: - c, h = tf.split(value=state, num_or_size_splits=2, axis=one) - - gate_inputs = tf.matmul( - tf.concat([inputs, h], 1), self._kernel) - gate_inputs = tf.nn.bias_add(gate_inputs, self._bias) - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - i, j, f, o = tf.split( - value=gate_inputs, num_or_size_splits=4, axis=one) - - forget_bias_tensor = tf.constant(self._forget_bias, dtype=f.dtype) - # Note that using `add` and `multiply` instead of `+` and `*` gives a - # performance improvement. So using those at the cost of readability. - add = tf.add - multiply = tf.multiply - new_c = add( - multiply(c, sigmoid(add(f, forget_bias_tensor))), - multiply(sigmoid(i), self._activation(j))) - new_h = multiply(self._activation(new_c), sigmoid(o)) - - if self._state_is_tuple: - new_state = LSTMStateTuple(new_c, new_h) - else: - new_state = tf.concat([new_c, new_h], 1) - return new_h, new_state + if self._state_is_tuple: + new_state = LSTMStateTuple(new_c, new_h) + else: + new_state = tf.concat([new_c, new_h], 1) + return new_h, new_state - def get_config(self): - config = { - "num_units": self._num_units, - "forget_bias": self._forget_bias, - "state_is_tuple": self._state_is_tuple, - "activation": activations.serialize(self._activation), - "reuse": self._reuse, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def get_config(self): + config = { + "num_units": self._num_units, + "forget_bias": self._forget_bias, + "state_is_tuple": self._state_is_tuple, + "activation": activations.serialize(self._activation), + "reuse": self._reuse, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) @keras_export(v1=["keras.__internal__.legacy.rnn_cell.LSTMCell"]) @tf_export(v1=["nn.rnn_cell.LSTMCell"]) class LSTMCell(LayerRNNCell): - """Long short-term memory unit (LSTM) recurrent network cell. - - The default non-peephole implementation is based on (Gers et al., 1999). - The peephole implementation is based on (Sak et al., 2014). - - The class uses optional peep-hole connections, optional cell clipping, and - an optional projection layer. - - Note that this cell is not optimized for performance. Please use - `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or - `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for - better performance on CPU. - References: - Long short-term memory recurrent neural network architectures for large - scale acoustic modeling: - [Sak et al., 2014] - (https://www.isca-speech.org/archive/interspeech_2014/i14_0338.html) - ([pdf] - (https://www.isca-speech.org/archive/archive_papers/interspeech_2014/i14_0338.pdf)) - Learning to forget: - [Gers et al., 1999] - (http://digital-library.theiet.org/content/conferences/10.1049/cp_19991218) - ([pdf](https://arxiv.org/pdf/1409.2329.pdf)) - Long Short-Term Memory: - [Hochreiter et al., 1997] - (https://www.mitpressjournals.org/doi/abs/10.1162/neco.1997.9.8.1735) - ([pdf](http://ml.jku.at/publications/older/3504.pdf)) - """ - - def __init__(self, - num_units, - use_peepholes=False, - cell_clip=None, - initializer=None, - num_proj=None, - proj_clip=None, - num_unit_shards=None, - num_proj_shards=None, - forget_bias=1.0, - state_is_tuple=True, - activation=None, - reuse=None, - name=None, - dtype=None, - **kwargs): - """Initialize the parameters for an LSTM cell. - - Args: - num_units: int, The number of units in the LSTM cell. - use_peepholes: bool, set True to enable diagonal/peephole connections. - cell_clip: (optional) A float value, if provided the cell state is clipped - by this value prior to the cell output activation. - initializer: (optional) The initializer to use for the weight and - projection matrices. - num_proj: (optional) int, The output dimensionality for the projection - matrices. If None, no projection is performed. - proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is - provided, then the projected values are clipped elementwise to within - `[-proj_clip, proj_clip]`. - num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a - variable_scope partitioner instead. - num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a - variable_scope partitioner instead. - forget_bias: Biases of the forget gate are initialized by default to 1 in - order to reduce the scale of forgetting at the beginning of the - training. Must set it manually to `0.0` when restoring from CudnnLSTM - trained checkpoints. - state_is_tuple: If True, accepted and returned states are 2-tuples of the - `c_state` and `m_state`. If False, they are concatenated along the - column axis. This latter behavior will soon be deprecated. - activation: Activation function of the inner states. Default: `tanh`. It - could also be string that is within Keras activation function names. - reuse: (optional) Python boolean describing whether to reuse variables in - an existing scope. If not `True`, and the existing scope already has - the given variables, an error is raised. - name: String, the name of the layer. Layers with the same name will share - weights, but to avoid mistakes we require reuse=True in such cases. - dtype: Default dtype of the layer (default of `None` means use the type of - the first input). Required when `build` is called before `call`. - **kwargs: Dict, keyword named properties for common layer attributes, like - `trainable` etc when constructing the cell from configs of get_config(). - When restoring from CudnnLSTM-trained checkpoints, use - `CudnnCompatibleLSTMCell` instead. + """Long short-term memory unit (LSTM) recurrent network cell. + + The default non-peephole implementation is based on (Gers et al., 1999). + The peephole implementation is based on (Sak et al., 2014). + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + + Note that this cell is not optimized for performance. Please use + `tf.compat.v1.keras.layers.CuDNNLSTM` for better performance on GPU, or + `tf.raw_ops.LSTMBlockCell` for better performance on CPU. + References: + Long short-term memory recurrent neural network architectures for large + scale acoustic modeling: + [Sak et al., 2014] + (https://www.isca-speech.org/archive/interspeech_2014/i14_0338.html) + ([pdf] + (https://www.isca-speech.org/archive/archive_papers/interspeech_2014/i14_0338.pdf)) + Learning to forget: + [Gers et al., 1999] + (http://digital-library.theiet.org/content/conferences/10.1049/cp_19991218) + ([pdf](https://arxiv.org/pdf/1409.2329.pdf)) + Long Short-Term Memory: + [Hochreiter et al., 1997] + (https://www.mitpressjournals.org/doi/abs/10.1162/neco.1997.9.8.1735) + ([pdf](http://ml.jku.at/publications/older/3504.pdf)) """ - warnings.warn( - "`tf.nn.rnn_cell.LSTMCell` is deprecated and will be " - "removed in a future version. This class " - "is equivalent as `tf.keras.layers.LSTMCell`, " - "and will be replaced by that in Tensorflow 2.0.", - stacklevel=2) - super().__init__( - _reuse=reuse, name=name, dtype=dtype, **kwargs) - _check_supported_dtypes(self.dtype) - if not state_is_tuple: - logging.warning( - "%s: Using a concatenated state is slower and will soon be " - "deprecated. Use state_is_tuple=True.", self) - if num_unit_shards is not None or num_proj_shards is not None: - logging.warning( - "%s: The num_unit_shards and proj_unit_shards parameters are " - "deprecated and will be removed in Jan 2017. " - "Use a variable scope with a partitioner instead.", self) - if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): - logging.warning( - "%s: Note that this cell is not optimized for performance. " - "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better " - "performance on GPU.", self) - - # Inputs must be 2-dimensional. - self.input_spec = input_spec.InputSpec(ndim=2) - - self._num_units = num_units - self._use_peepholes = use_peepholes - self._cell_clip = cell_clip - self._initializer = initializers.get(initializer) - self._num_proj = num_proj - self._proj_clip = proj_clip - self._num_unit_shards = num_unit_shards - self._num_proj_shards = num_proj_shards - self._forget_bias = forget_bias - self._state_is_tuple = state_is_tuple - if activation: - self._activation = activations.get(activation) - else: - self._activation = tf.tanh - - if num_proj: - self._state_size = ( - LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + - num_proj) - self._output_size = num_proj - else: - self._state_size = ( - LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * - num_units) - self._output_size = num_units - - @property - def state_size(self): - return self._state_size - - @property - def output_size(self): - return self._output_size - - @tf_utils.shape_type_conversion - def build(self, inputs_shape): - if inputs_shape[-1] is None: - raise ValueError("Expected inputs.shape[-1] to be known, " - f"received shape: {inputs_shape}") - _check_supported_dtypes(self.dtype) - input_depth = inputs_shape[-1] - h_depth = self._num_units if self._num_proj is None else self._num_proj - maybe_partitioner = ( - tf.compat.v1.fixed_size_partitioner(self._num_unit_shards) - if self._num_unit_shards is not None else None) - self._kernel = self.add_weight( - _WEIGHTS_VARIABLE_NAME, - shape=[input_depth + h_depth, 4 * self._num_units], - initializer=self._initializer, - partitioner=maybe_partitioner) - if self.dtype is None: - initializer = tf.compat.v1.zeros_initializer - else: - initializer = tf.compat.v1.zeros_initializer(dtype=self.dtype) - self._bias = self.add_weight( - _BIAS_VARIABLE_NAME, - shape=[4 * self._num_units], - initializer=initializer) - if self._use_peepholes: - self._w_f_diag = self.add_weight( - "w_f_diag", shape=[self._num_units], initializer=self._initializer) - self._w_i_diag = self.add_weight( - "w_i_diag", shape=[self._num_units], initializer=self._initializer) - self._w_o_diag = self.add_weight( - "w_o_diag", shape=[self._num_units], initializer=self._initializer) - - if self._num_proj is not None: - maybe_proj_partitioner = ( - tf.compat.v1.fixed_size_partitioner(self._num_proj_shards) - if self._num_proj_shards is not None else None) - self._proj_kernel = self.add_weight( - "projection/%s" % _WEIGHTS_VARIABLE_NAME, - shape=[self._num_units, self._num_proj], - initializer=self._initializer, - partitioner=maybe_proj_partitioner) - - self.built = True - - def call(self, inputs, state): - """Run one step of LSTM. - Args: - inputs: input Tensor, must be 2-D, `[batch, input_size]`. - state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, - [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple - of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. + def __init__( + self, + num_units, + use_peepholes=False, + cell_clip=None, + initializer=None, + num_proj=None, + proj_clip=None, + num_unit_shards=None, + num_proj_shards=None, + forget_bias=1.0, + state_is_tuple=True, + activation=None, + reuse=None, + name=None, + dtype=None, + **kwargs, + ): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell. + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is + clipped by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and + `proj_clip` is provided, then the projected values are clipped + elementwise to within `[-proj_clip, proj_clip]`. + num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a + variable_scope partitioner instead. + num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a + variable_scope partitioner instead. + forget_bias: Biases of the forget gate are initialized by default to 1 + in order to reduce the scale of forgetting at the beginning of the + training. Must set it manually to `0.0` when restoring from + CudnnLSTM trained checkpoints. + state_is_tuple: If True, accepted and returned states are 2-tuples of + the `c_state` and `m_state`. If False, they are concatenated along + the column axis. This latter behavior will soon be deprecated. + activation: Activation function of the inner states. Default: `tanh`. + It could also be string that is within Keras activation function + names. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already + has the given variables, an error is raised. + name: String, the name of the layer. Layers with the same name will + share weights, but to avoid mistakes we require reuse=True in such + cases. + dtype: Default dtype of the layer (default of `None` means use the + type of the first input). Required when `build` is called before + `call`. + **kwargs: Dict, keyword named properties for common layer attributes, + like `trainable` etc when constructing the cell from configs of + get_config(). When restoring from CudnnLSTM-trained checkpoints, + use `CudnnCompatibleLSTMCell` instead. + """ + warnings.warn( + "`tf.nn.rnn_cell.LSTMCell` is deprecated and will be " + "removed in a future version. This class " + "is equivalent as `tf.keras.layers.LSTMCell`, " + "and will be replaced by that in Tensorflow 2.0.", + stacklevel=2, + ) + super().__init__(_reuse=reuse, name=name, dtype=dtype, **kwargs) + _check_supported_dtypes(self.dtype) + if not state_is_tuple: + logging.warning( + "%s: Using a concatenated state is slower and will soon be " + "deprecated. Use state_is_tuple=True.", + self, + ) + if num_unit_shards is not None or num_proj_shards is not None: + logging.warning( + "%s: The num_unit_shards and proj_unit_shards parameters are " + "deprecated and will be removed in Jan 2017. " + "Use a variable scope with a partitioner instead.", + self, + ) + if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"): + logging.warning( + "%s: Note that this cell is not optimized for performance. " + "Please use tf.compat.v1.keras.layers.CuDNNLSTM for better " + "performance on GPU.", + self, + ) + + # Inputs must be 2-dimensional. + self.input_spec = input_spec.InputSpec(ndim=2) + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializers.get(initializer) + self._num_proj = num_proj + self._proj_clip = proj_clip + self._num_unit_shards = num_unit_shards + self._num_proj_shards = num_proj_shards + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + if activation: + self._activation = activations.get(activation) + else: + self._activation = tf.tanh + + if num_proj: + self._state_size = ( + LSTMStateTuple(num_units, num_proj) + if state_is_tuple + else num_units + num_proj + ) + self._output_size = num_proj + else: + self._state_size = ( + LSTMStateTuple(num_units, num_units) + if state_is_tuple + else 2 * num_units + ) + self._output_size = num_units + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + @tf_utils.shape_type_conversion + def build(self, inputs_shape): + if inputs_shape[-1] is None: + raise ValueError( + "Expected inputs.shape[-1] to be known, " + f"received shape: {inputs_shape}" + ) + _check_supported_dtypes(self.dtype) + input_depth = inputs_shape[-1] + h_depth = self._num_units if self._num_proj is None else self._num_proj + maybe_partitioner = ( + tf.compat.v1.fixed_size_partitioner(self._num_unit_shards) + if self._num_unit_shards is not None + else None + ) + self._kernel = self.add_weight( + _WEIGHTS_VARIABLE_NAME, + shape=[input_depth + h_depth, 4 * self._num_units], + initializer=self._initializer, + partitioner=maybe_partitioner, + ) + if self.dtype is None: + initializer = tf.compat.v1.zeros_initializer + else: + initializer = tf.compat.v1.zeros_initializer(dtype=self.dtype) + self._bias = self.add_weight( + _BIAS_VARIABLE_NAME, + shape=[4 * self._num_units], + initializer=initializer, + ) + if self._use_peepholes: + self._w_f_diag = self.add_weight( + "w_f_diag", + shape=[self._num_units], + initializer=self._initializer, + ) + self._w_i_diag = self.add_weight( + "w_i_diag", + shape=[self._num_units], + initializer=self._initializer, + ) + self._w_o_diag = self.add_weight( + "w_o_diag", + shape=[self._num_units], + initializer=self._initializer, + ) + + if self._num_proj is not None: + maybe_proj_partitioner = ( + tf.compat.v1.fixed_size_partitioner(self._num_proj_shards) + if self._num_proj_shards is not None + else None + ) + self._proj_kernel = self.add_weight( + f"projection/{_WEIGHTS_VARIABLE_NAME}", + shape=[self._num_units, self._num_proj], + initializer=self._initializer, + partitioner=maybe_proj_partitioner, + ) + + self.built = True + + def call(self, inputs, state): + """Run one step of LSTM. + + Args: + inputs: input Tensor, must be 2-D, `[batch, input_size]`. + state: if `state_is_tuple` is False, this must be a state Tensor, + `2-D, [batch, state_size]`. If `state_is_tuple` is True, this must + be a tuple of state Tensors, both `2-D`, with column sizes `c_state` + and `m_state`. + + Returns: + A tuple containing: + + - A `2-D, [batch, output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` + when the previous state was `state`. Same type and shape(s) as + `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + _check_rnn_cell_input_dtypes([inputs, state]) + + num_proj = self._num_units if self._num_proj is None else self._num_proj + sigmoid = tf.sigmoid - Returns: - A tuple containing: + if self._state_is_tuple: + (c_prev, m_prev) = state + else: + c_prev = tf.slice(state, [0, 0], [-1, self._num_units]) + m_prev = tf.slice(state, [0, self._num_units], [-1, num_proj]) - - A `2-D, [batch, output_dim]`, Tensor representing the output of the - LSTM after reading `inputs` when previous state was `state`. - Here output_dim is: - num_proj if num_proj was set, - num_units otherwise. - - Tensor(s) representing the new state of LSTM after reading `inputs` when - the previous state was `state`. Same type and shape(s) as `state`. + input_size = inputs.get_shape().with_rank(2).dims[1].value + if input_size is None: + raise ValueError( + "Could not infer input size from inputs.get_shape()[-1]." + f"Received input shape: {inputs.get_shape()}" + ) + + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + lstm_matrix = tf.matmul(tf.concat([inputs, m_prev], 1), self._kernel) + lstm_matrix = tf.nn.bias_add(lstm_matrix, self._bias) + + i, j, f, o = tf.split(value=lstm_matrix, num_or_size_splits=4, axis=1) + # Diagonal connections + if self._use_peepholes: + c = sigmoid( + f + self._forget_bias + self._w_f_diag * c_prev + ) * c_prev + sigmoid( + i + self._w_i_diag * c_prev + ) * self._activation( + j + ) + else: + c = sigmoid(f + self._forget_bias) * c_prev + sigmoid( + i + ) * self._activation(j) - Raises: - ValueError: If input size cannot be inferred from inputs via - static shape inference. - """ - _check_rnn_cell_input_dtypes([inputs, state]) + if self._cell_clip is not None: - num_proj = self._num_units if self._num_proj is None else self._num_proj - sigmoid = tf.sigmoid + c = tf.clip_by_value(c, -self._cell_clip, self._cell_clip) - if self._state_is_tuple: - (c_prev, m_prev) = state - else: - c_prev = tf.slice(state, [0, 0], [-1, self._num_units]) - m_prev = tf.slice(state, [0, self._num_units], [-1, num_proj]) - - input_size = inputs.get_shape().with_rank(2).dims[1].value - if input_size is None: - raise ValueError( - "Could not infer input size from inputs.get_shape()[-1]." - f"Received input shape: {inputs.get_shape()}") - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - lstm_matrix = tf.matmul( - tf.concat([inputs, m_prev], 1), self._kernel) - lstm_matrix = tf.nn.bias_add(lstm_matrix, self._bias) - - i, j, f, o = tf.split( - value=lstm_matrix, num_or_size_splits=4, axis=1) - # Diagonal connections - if self._use_peepholes: - c = ( - sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev + - sigmoid(i + self._w_i_diag * c_prev) * self._activation(j)) - else: - c = ( - sigmoid(f + self._forget_bias) * c_prev + - sigmoid(i) * self._activation(j)) - - if self._cell_clip is not None: - # pylint: disable=invalid-unary-operand-type - c = tf.clip_by_value(c, -self._cell_clip, self._cell_clip) - # pylint: enable=invalid-unary-operand-type - if self._use_peepholes: - m = sigmoid(o + self._w_o_diag * c) * self._activation(c) - else: - m = sigmoid(o) * self._activation(c) - - if self._num_proj is not None: - m = tf.matmul(m, self._proj_kernel) - - if self._proj_clip is not None: - # pylint: disable=invalid-unary-operand-type - m = tf.clip_by_value(m, -self._proj_clip, self._proj_clip) - # pylint: enable=invalid-unary-operand-type - - new_state = ( - LSTMStateTuple(c, m) - if self._state_is_tuple else tf.concat([c, m], 1)) - return m, new_state - - def get_config(self): - config = { - "num_units": self._num_units, - "use_peepholes": self._use_peepholes, - "cell_clip": self._cell_clip, - "initializer": initializers.serialize(self._initializer), - "num_proj": self._num_proj, - "proj_clip": self._proj_clip, - "num_unit_shards": self._num_unit_shards, - "num_proj_shards": self._num_proj_shards, - "forget_bias": self._forget_bias, - "state_is_tuple": self._state_is_tuple, - "activation": activations.serialize(self._activation), - "reuse": self._reuse, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + if self._use_peepholes: + m = sigmoid(o + self._w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + m = tf.matmul(m, self._proj_kernel) + + if self._proj_clip is not None: + + m = tf.clip_by_value(m, -self._proj_clip, self._proj_clip) + + new_state = ( + LSTMStateTuple(c, m) + if self._state_is_tuple + else tf.concat([c, m], 1) + ) + return m, new_state + + def get_config(self): + config = { + "num_units": self._num_units, + "use_peepholes": self._use_peepholes, + "cell_clip": self._cell_clip, + "initializer": initializers.serialize(self._initializer), + "num_proj": self._num_proj, + "proj_clip": self._proj_clip, + "num_unit_shards": self._num_unit_shards, + "num_proj_shards": self._num_proj_shards, + "forget_bias": self._forget_bias, + "state_is_tuple": self._state_is_tuple, + "activation": activations.serialize(self._activation), + "reuse": self._reuse, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) @keras_export(v1=["keras.__internal__.legacy.rnn_cell.MultiRNNCell"]) @tf_export(v1=["nn.rnn_cell.MultiRNNCell"]) class MultiRNNCell(RNNCell): - """RNN cell composed sequentially of multiple simple cells. - - Example: - - ```python - num_units = [128, 64] - cells = [BasicLSTMCell(num_units=n) for n in num_units] - stacked_rnn_cell = MultiRNNCell(cells) - ``` - """ + """RNN cell composed sequentially of multiple simple cells. - def __init__(self, cells, state_is_tuple=True): - """Create a RNN cell composed sequentially of a number of RNNCells. + Example: - Args: - cells: list of RNNCells that will be composed in this order. - state_is_tuple: If True, accepted and returned states are n-tuples, where - `n = len(cells)`. If False, the states are all concatenated along the - column axis. This latter behavior will soon be deprecated. - - Raises: - ValueError: if cells is empty (not allowed), or at least one of the cells - returns a state tuple but the flag `state_is_tuple` is `False`. + ```python + num_units = [128, 64] + cells = [BasicLSTMCell(num_units=n) for n in num_units] + stacked_rnn_cell = MultiRNNCell(cells) + ``` """ - logging.warning("`tf.nn.rnn_cell.MultiRNNCell` is deprecated. This class " - "is equivalent as `tf.keras.layers.StackedRNNCells`, " - "and will be replaced by that in Tensorflow 2.0.") - super().__init__() - if not cells: - raise ValueError("Must specify at least one cell for MultiRNNCell.") - if not tf.nest.is_nested(cells): - raise TypeError(f"cells must be a list or tuple, but received: {cells}.") - - if len(set(id(cell) for cell in cells)) < len(cells): - logging.log_first_n( - logging.WARN, "At least two cells provided to MultiRNNCell " - "are the same object and will share weights.", 1) - - self._cells = cells - for cell_number, cell in enumerate(self._cells): - # Add Trackable dependencies on these cells so their variables get - # saved with this object when using object-based saving. - if isinstance(cell, tf.__internal__.tracking.Trackable): - # TODO(allenl): Track down non-Trackable callers. - self._track_trackable(cell, name="cell-%d" % (cell_number,)) - self._state_is_tuple = state_is_tuple - if not state_is_tuple: - if any(tf.nest.is_nested(c.state_size) for c in self._cells): - raise ValueError( - "Some cells return tuples of states, but the flag " - "state_is_tuple is not set. " - f"State sizes are: {[c.state_size for c in self._cells]}") - - @property - def state_size(self): - if self._state_is_tuple: - return tuple(cell.state_size for cell in self._cells) - else: - return sum(cell.state_size for cell in self._cells) - - @property - def output_size(self): - return self._cells[-1].output_size - - def zero_state(self, batch_size, dtype): - with backend.name_scope(type(self).__name__ + "ZeroState"): - if self._state_is_tuple: - return tuple(cell.zero_state(batch_size, dtype) for cell in self._cells) - else: - # We know here that state_size of each cell is not a tuple and - # presumably does not contain TensorArrays or anything else fancy - return super().zero_state(batch_size, dtype) - - @property - def trainable_weights(self): - if not self.trainable: - return [] - weights = [] - for cell in self._cells: - if isinstance(cell, base_layer.Layer): - weights += cell.trainable_weights - return weights - - @property - def non_trainable_weights(self): - weights = [] - for cell in self._cells: - if isinstance(cell, base_layer.Layer): - weights += cell.non_trainable_weights - if not self.trainable: - trainable_weights = [] - for cell in self._cells: - if isinstance(cell, base_layer.Layer): - trainable_weights += cell.trainable_weights - return trainable_weights + weights - return weights - - def call(self, inputs, state): - """Run this multi-layer cell on inputs, starting from state.""" - cur_state_pos = 0 - cur_inp = inputs - new_states = [] - for i, cell in enumerate(self._cells): - with tf.compat.v1.variable_scope("cell_%d" % i): + + def __init__(self, cells, state_is_tuple=True): + """Create a RNN cell composed sequentially of a number of RNNCells. + + Args: + cells: list of RNNCells that will be composed in this order. + state_is_tuple: If True, accepted and returned states are n-tuples, + where `n = len(cells)`. If False, the states are all concatenated + along the column axis. This latter behavior will soon be + deprecated. + + Raises: + ValueError: if cells is empty (not allowed), or at least one of the + cells returns a state tuple but the flag `state_is_tuple` is + `False`. + """ + logging.warning( + "`tf.nn.rnn_cell.MultiRNNCell` is deprecated. This class " + "is equivalent as `tf.keras.layers.StackedRNNCells`, " + "and will be replaced by that in Tensorflow 2.0." + ) + super().__init__() + if not cells: + raise ValueError("Must specify at least one cell for MultiRNNCell.") + if not tf.nest.is_nested(cells): + raise TypeError( + f"cells must be a list or tuple, but received: {cells}." + ) + + if len(set(id(cell) for cell in cells)) < len(cells): + logging.log_first_n( + logging.WARN, + "At least two cells provided to MultiRNNCell " + "are the same object and will share weights.", + 1, + ) + + self._cells = cells + for cell_number, cell in enumerate(self._cells): + # Add Trackable dependencies on these cells so their variables get + # saved with this object when using object-based saving. + if isinstance(cell, tf.__internal__.tracking.Trackable): + # TODO(allenl): Track down non-Trackable callers. + self._track_trackable(cell, name="cell-%d" % (cell_number,)) + self._state_is_tuple = state_is_tuple + if not state_is_tuple: + if any(tf.nest.is_nested(c.state_size) for c in self._cells): + raise ValueError( + "Some cells return tuples of states, but the flag " + "state_is_tuple is not set. " + f"State sizes are: {[c.state_size for c in self._cells]}" + ) + + @property + def state_size(self): if self._state_is_tuple: - if not tf.nest.is_nested(state): - raise ValueError( - f"Expected state to be a tuple of length {len(self.state_size)}" - f", but received: {state}") - cur_state = state[i] + return tuple(cell.state_size for cell in self._cells) else: - cur_state = tf.slice(state, [0, cur_state_pos], [-1, cell.state_size]) - cur_state_pos += cell.state_size - cur_inp, new_state = cell(cur_inp, cur_state) - new_states.append(new_state) - - new_states = ( - tuple(new_states) if self._state_is_tuple else tf.concat( - new_states, 1)) - - return cur_inp, new_states + return sum(cell.state_size for cell in self._cells) + + @property + def output_size(self): + return self._cells[-1].output_size + + def zero_state(self, batch_size, dtype): + with backend.name_scope(type(self).__name__ + "ZeroState"): + if self._state_is_tuple: + return tuple( + cell.zero_state(batch_size, dtype) for cell in self._cells + ) + else: + # We know here that state_size of each cell is not a tuple and + # presumably does not contain TensorArrays or anything else + # fancy + return super().zero_state(batch_size, dtype) + + @property + def trainable_weights(self): + if not self.trainable: + return [] + weights = [] + for cell in self._cells: + if isinstance(cell, base_layer.Layer): + weights += cell.trainable_weights + return weights + + @property + def non_trainable_weights(self): + weights = [] + for cell in self._cells: + if isinstance(cell, base_layer.Layer): + weights += cell.non_trainable_weights + if not self.trainable: + trainable_weights = [] + for cell in self._cells: + if isinstance(cell, base_layer.Layer): + trainable_weights += cell.trainable_weights + return trainable_weights + weights + return weights + + def call(self, inputs, state): + """Run this multi-layer cell on inputs, starting from state.""" + cur_state_pos = 0 + cur_inp = inputs + new_states = [] + for i, cell in enumerate(self._cells): + with tf.compat.v1.variable_scope("cell_%d" % i): + if self._state_is_tuple: + if not tf.nest.is_nested(state): + raise ValueError( + "Expected state to be a tuple of length " + f"{len(self.state_size)}" + f", but received: {state}" + ) + cur_state = state[i] + else: + cur_state = tf.slice( + state, [0, cur_state_pos], [-1, cell.state_size] + ) + cur_state_pos += cell.state_size + cur_inp, new_state = cell(cur_inp, cur_state) + new_states.append(new_state) + + new_states = ( + tuple(new_states) + if self._state_is_tuple + else tf.concat(new_states, 1) + ) + + return cur_inp, new_states def _check_rnn_cell_input_dtypes(inputs): - """Check whether the input tensors are with supported dtypes. + """Check whether the input tensors are with supported dtypes. - Default RNN cells only support floats and complex as its dtypes since the - activation function (tanh and sigmoid) only allow those types. This function - will throw a proper error message if the inputs is not in a supported type. + Default RNN cells only support floats and complex as its dtypes since the + activation function (tanh and sigmoid) only allow those types. This function + will throw a proper error message if the inputs is not in a supported type. - Args: - inputs: tensor or nested structure of tensors that are feed to RNN cell as - input or state. + Args: + inputs: tensor or nested structure of tensors that are feed to RNN cell as + input or state. - Raises: - ValueError: if any of the input tensor are not having dtypes of float or - complex. - """ - for t in tf.nest.flatten(inputs): - _check_supported_dtypes(t.dtype) + Raises: + ValueError: if any of the input tensor are not having dtypes of float or + complex. + """ + for t in tf.nest.flatten(inputs): + _check_supported_dtypes(t.dtype) def _check_supported_dtypes(dtype): - if dtype is None: - return - dtype = tf.as_dtype(dtype) - if not (dtype.is_floating or dtype.is_complex): - raise ValueError("RNN cell only supports floating point inputs, " - f"but received dtype: {dtype}") + if dtype is None: + return + dtype = tf.as_dtype(dtype) + if not (dtype.is_floating or dtype.is_complex): + raise ValueError( + "RNN cell only supports floating point inputs, " + f"but received dtype: {dtype}" + ) diff --git a/keras/layers/rnn/lstm.py b/keras/layers/rnn/lstm.py index 30d08fbb5e53..47ae51f7e6a5 100644 --- a/keras/layers/rnn/lstm.py +++ b/keras/layers/rnn/lstm.py @@ -13,10 +13,12 @@ # limitations under the License. # ============================================================================== """Long Short-Term Memory layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import uuid +import tensorflow.compat.v2 as tf + from keras import activations from keras import backend from keras import constraints @@ -29,1158 +31,1314 @@ from keras.layers.rnn.base_rnn import RNN from keras.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export - RECURRENT_DROPOUT_WARNING_MSG = ( - 'RNN `implementation=2` is not supported when `recurrent_dropout` is set. ' - 'Using `implementation=1`.') + "RNN `implementation=2` is not supported when `recurrent_dropout` is set. " + "Using `implementation=1`." +) -@keras_export('keras.layers.LSTMCell', v1=[]) +@keras_export("keras.layers.LSTMCell", v1=[]) class LSTMCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer): - """Cell class for the LSTM layer. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - This class processes one step within the whole time sequence input, whereas - `tf.keras.layer.LSTM` processes the whole sequence. - - For example: - - >>> inputs = tf.random.normal([32, 10, 8]) - >>> rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(4)) - >>> output = rnn(inputs) - >>> print(output.shape) - (32, 4) - >>> rnn = tf.keras.layers.RNN( - ... tf.keras.layers.LSTMCell(4), - ... return_sequences=True, - ... return_state=True) - >>> whole_seq_output, final_memory_state, final_carry_state = rnn(inputs) - >>> print(whole_seq_output.shape) - (32, 10, 4) - >>> print(final_memory_state.shape) - (32, 4) - >>> print(final_carry_state.shape) - (32, 4) - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. Default: hyperbolic tangent - (`tanh`). If you pass `None`, no activation is applied (ie. "linear" - activation: `a(x) = x`). - recurrent_activation: Activation function to use for the recurrent step. - Default: sigmoid (`sigmoid`). If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, (default `True`), whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. Default: `glorot_uniform`. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - Default: `orthogonal`. - bias_initializer: Initializer for the bias vector. Default: `zeros`. - unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of - the forget gate at initialization. Setting it to true will also force - `bias_initializer="zeros"`. This is recommended in [Jozefowicz et - al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. Default: `None`. - bias_regularizer: Regularizer function applied to the bias vector. Default: - `None`. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. Default: `None`. - bias_constraint: Constraint function applied to the bias vector. Default: - `None`. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. Default: 0. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. Default: 0. - - Call arguments: - inputs: A 2D tensor, with shape of `[batch, feature]`. - states: List of 2 tensors that corresponding to the cell's units. Both of - them have shape `[batch, units]`, the first tensor is the memory state - from previous time step, the second tensor is the carry state from - previous time step. For timestep 0, the initial state provided by user - will be feed to cell. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. Only relevant when `dropout` or - `recurrent_dropout` is used. - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - **kwargs): - if units < 0: - raise ValueError(f'Received an invalid value for argument `units`, ' - f'expected a positive integer, got {units}.') - # By default use cached variable under v2 mode, see b/143699808. - if tf.compat.v1.executing_eagerly_outside_functions(): - self._enable_caching_device = kwargs.pop('enable_caching_device', True) - else: - self._enable_caching_device = kwargs.pop('enable_caching_device', False) - super().__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.unit_forget_bias = unit_forget_bias - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - implementation = kwargs.pop('implementation', 2) - if self.recurrent_dropout != 0 and implementation != 1: - logging.debug(RECURRENT_DROPOUT_WARNING_MSG) - self.implementation = 1 - else: - self.implementation = implementation - self.state_size = [self.units, self.units] - self.output_size = self.units - - @tf_utils.shape_type_conversion - def build(self, input_shape): - default_caching_device = rnn_utils.caching_device(self) - input_dim = input_shape[-1] - self.kernel = self.add_weight( - shape=(input_dim, self.units * 4), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - caching_device=default_caching_device) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 4), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint, - caching_device=default_caching_device) - - if self.use_bias: - if self.unit_forget_bias: - - def bias_initializer(_, *args, **kwargs): - return backend.concatenate([ - self.bias_initializer((self.units,), *args, **kwargs), - initializers.get('ones')((self.units,), *args, **kwargs), - self.bias_initializer((self.units * 2,), *args, **kwargs), - ]) - else: - bias_initializer = self.bias_initializer - self.bias = self.add_weight( - shape=(self.units * 4,), - name='bias', - initializer=bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - caching_device=default_caching_device) - else: - self.bias = None - self.built = True - - def _compute_carry_and_output(self, x, h_tm1, c_tm1): - """Computes carry and output using split kernels.""" - x_i, x_f, x_c, x_o = x - h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1 - i = self.recurrent_activation( - x_i + backend.dot(h_tm1_i, self.recurrent_kernel[:, :self.units])) - f = self.recurrent_activation(x_f + backend.dot( - h_tm1_f, self.recurrent_kernel[:, self.units:self.units * 2])) - c = f * c_tm1 + i * self.activation(x_c + backend.dot( - h_tm1_c, self.recurrent_kernel[:, self.units * 2:self.units * 3])) - o = self.recurrent_activation( - x_o + backend.dot(h_tm1_o, self.recurrent_kernel[:, self.units * 3:])) - return c, o - - def _compute_carry_and_output_fused(self, z, c_tm1): - """Computes carry and output using fused kernels.""" - z0, z1, z2, z3 = z - i = self.recurrent_activation(z0) - f = self.recurrent_activation(z1) - c = f * c_tm1 + i * self.activation(z2) - o = self.recurrent_activation(z3) - return c, o - - def call(self, inputs, states, training=None): - h_tm1 = states[0] # previous memory state - c_tm1 = states[1] # previous carry state - - dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) - rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( - h_tm1, training, count=4) - - if self.implementation == 1: - if 0 < self.dropout < 1.: - inputs_i = inputs * dp_mask[0] - inputs_f = inputs * dp_mask[1] - inputs_c = inputs * dp_mask[2] - inputs_o = inputs * dp_mask[3] - else: - inputs_i = inputs - inputs_f = inputs - inputs_c = inputs - inputs_o = inputs - k_i, k_f, k_c, k_o = tf.split( - self.kernel, num_or_size_splits=4, axis=1) - x_i = backend.dot(inputs_i, k_i) - x_f = backend.dot(inputs_f, k_f) - x_c = backend.dot(inputs_c, k_c) - x_o = backend.dot(inputs_o, k_o) - if self.use_bias: - b_i, b_f, b_c, b_o = tf.split( - self.bias, num_or_size_splits=4, axis=0) - x_i = backend.bias_add(x_i, b_i) - x_f = backend.bias_add(x_f, b_f) - x_c = backend.bias_add(x_c, b_c) - x_o = backend.bias_add(x_o, b_o) - - if 0 < self.recurrent_dropout < 1.: - h_tm1_i = h_tm1 * rec_dp_mask[0] - h_tm1_f = h_tm1 * rec_dp_mask[1] - h_tm1_c = h_tm1 * rec_dp_mask[2] - h_tm1_o = h_tm1 * rec_dp_mask[3] - else: - h_tm1_i = h_tm1 - h_tm1_f = h_tm1 - h_tm1_c = h_tm1 - h_tm1_o = h_tm1 - x = (x_i, x_f, x_c, x_o) - h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o) - c, o = self._compute_carry_and_output(x, h_tm1, c_tm1) - else: - if 0. < self.dropout < 1.: - inputs = inputs * dp_mask[0] - z = backend.dot(inputs, self.kernel) - z += backend.dot(h_tm1, self.recurrent_kernel) - if self.use_bias: - z = backend.bias_add(z, self.bias) - - z = tf.split(z, num_or_size_splits=4, axis=1) - c, o = self._compute_carry_and_output_fused(z, c_tm1) - - h = o * self.activation(c) - return h, [h, c] - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation - } - config.update(rnn_utils.config_for_enable_caching_device(self)) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Cell class for the LSTM layer. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + This class processes one step within the whole time sequence input, whereas + `tf.keras.layer.LSTM` processes the whole sequence. + + For example: + + >>> inputs = tf.random.normal([32, 10, 8]) + >>> rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(4)) + >>> output = rnn(inputs) + >>> print(output.shape) + (32, 4) + >>> rnn = tf.keras.layers.RNN( + ... tf.keras.layers.LSTMCell(4), + ... return_sequences=True, + ... return_state=True) + >>> whole_seq_output, final_memory_state, final_carry_state = rnn(inputs) + >>> print(whole_seq_output.shape) + (32, 10, 4) + >>> print(final_memory_state.shape) + (32, 4) + >>> print(final_carry_state.shape) + (32, 4) + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. Default: hyperbolic tangent + (`tanh`). If you pass `None`, no activation is applied (ie. "linear" + activation: `a(x) = x`). + recurrent_activation: Activation function to use for the recurrent step. + Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + applied (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, (default `True`), whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, used for + the linear transformation of the inputs. Default: `glorot_uniform`. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + Default: `orthogonal`. + bias_initializer: Initializer for the bias vector. Default: `zeros`. + unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of + the forget gate at initialization. Setting it to true will also force + `bias_initializer="zeros"`. This is recommended in [Jozefowicz et + al.](https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. Default: `None`. + bias_regularizer: Regularizer function applied to the bias vector. + Default: `None`. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_constraint: Constraint function applied to the bias vector. Default: + `None`. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. Default: 0. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. Default: 0. + + Call arguments: + inputs: A 2D tensor, with shape of `[batch, feature]`. + states: List of 2 tensors that corresponding to the cell's units. Both of + them have shape `[batch, units]`, the first tensor is the memory state + from previous time step, the second tensor is the carry state from + previous time step. For timestep 0, the initial state provided by user + will be feed to cell. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. Only relevant when `dropout` or + `recurrent_dropout` is used. + """ + + def __init__( + self, + units, + activation="tanh", + recurrent_activation="sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs, + ): + if units <= 0: + raise ValueError( + "Received an invalid value for argument `units`, " + f"expected a positive integer, got {units}." + ) + # By default use cached variable under v2 mode, see b/143699808. + if tf.compat.v1.executing_eagerly_outside_functions(): + self._enable_caching_device = kwargs.pop( + "enable_caching_device", True + ) + else: + self._enable_caching_device = kwargs.pop( + "enable_caching_device", False + ) + super().__init__(**kwargs) + self.units = units + self.activation = activations.get(activation) + self.recurrent_activation = activations.get(recurrent_activation) + self.use_bias = use_bias + + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.unit_forget_bias = unit_forget_bias + + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + self.dropout = min(1.0, max(0.0, dropout)) + self.recurrent_dropout = min(1.0, max(0.0, recurrent_dropout)) + implementation = kwargs.pop("implementation", 2) + if self.recurrent_dropout != 0 and implementation != 1: + logging.debug(RECURRENT_DROPOUT_WARNING_MSG) + self.implementation = 1 + else: + self.implementation = implementation + self.state_size = [self.units, self.units] + self.output_size = self.units + + @tf_utils.shape_type_conversion + def build(self, input_shape): + super().build(input_shape) + default_caching_device = rnn_utils.caching_device(self) + input_dim = input_shape[-1] + self.kernel = self.add_weight( + shape=(input_dim, self.units * 4), + name="kernel", + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + caching_device=default_caching_device, + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units * 4), + name="recurrent_kernel", + initializer=self.recurrent_initializer, + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint, + caching_device=default_caching_device, + ) + + if self.use_bias: + if self.unit_forget_bias: + + def bias_initializer(_, *args, **kwargs): + return backend.concatenate( + [ + self.bias_initializer( + (self.units,), *args, **kwargs + ), + initializers.get("ones")( + (self.units,), *args, **kwargs + ), + self.bias_initializer( + (self.units * 2,), *args, **kwargs + ), + ] + ) + + else: + bias_initializer = self.bias_initializer + self.bias = self.add_weight( + shape=(self.units * 4,), + name="bias", + initializer=bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + caching_device=default_caching_device, + ) + else: + self.bias = None + self.built = True + + def _compute_carry_and_output(self, x, h_tm1, c_tm1): + """Computes carry and output using split kernels.""" + x_i, x_f, x_c, x_o = x + h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1 + i = self.recurrent_activation( + x_i + backend.dot(h_tm1_i, self.recurrent_kernel[:, : self.units]) + ) + f = self.recurrent_activation( + x_f + + backend.dot( + h_tm1_f, self.recurrent_kernel[:, self.units : self.units * 2] + ) + ) + c = f * c_tm1 + i * self.activation( + x_c + + backend.dot( + h_tm1_c, + self.recurrent_kernel[:, self.units * 2 : self.units * 3], + ) + ) + o = self.recurrent_activation( + x_o + + backend.dot(h_tm1_o, self.recurrent_kernel[:, self.units * 3 :]) + ) + return c, o + + def _compute_carry_and_output_fused(self, z, c_tm1): + """Computes carry and output using fused kernels.""" + z0, z1, z2, z3 = z + i = self.recurrent_activation(z0) + f = self.recurrent_activation(z1) + c = f * c_tm1 + i * self.activation(z2) + o = self.recurrent_activation(z3) + return c, o + + def call(self, inputs, states, training=None): + h_tm1 = states[0] # previous memory state + c_tm1 = states[1] # previous carry state + + dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) + rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( + h_tm1, training, count=4 + ) + + if self.implementation == 1: + if 0 < self.dropout < 1.0: + inputs_i = inputs * dp_mask[0] + inputs_f = inputs * dp_mask[1] + inputs_c = inputs * dp_mask[2] + inputs_o = inputs * dp_mask[3] + else: + inputs_i = inputs + inputs_f = inputs + inputs_c = inputs + inputs_o = inputs + k_i, k_f, k_c, k_o = tf.split( + self.kernel, num_or_size_splits=4, axis=1 + ) + x_i = backend.dot(inputs_i, k_i) + x_f = backend.dot(inputs_f, k_f) + x_c = backend.dot(inputs_c, k_c) + x_o = backend.dot(inputs_o, k_o) + if self.use_bias: + b_i, b_f, b_c, b_o = tf.split( + self.bias, num_or_size_splits=4, axis=0 + ) + x_i = backend.bias_add(x_i, b_i) + x_f = backend.bias_add(x_f, b_f) + x_c = backend.bias_add(x_c, b_c) + x_o = backend.bias_add(x_o, b_o) + + if 0 < self.recurrent_dropout < 1.0: + h_tm1_i = h_tm1 * rec_dp_mask[0] + h_tm1_f = h_tm1 * rec_dp_mask[1] + h_tm1_c = h_tm1 * rec_dp_mask[2] + h_tm1_o = h_tm1 * rec_dp_mask[3] + else: + h_tm1_i = h_tm1 + h_tm1_f = h_tm1 + h_tm1_c = h_tm1 + h_tm1_o = h_tm1 + x = (x_i, x_f, x_c, x_o) + h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o) + c, o = self._compute_carry_and_output(x, h_tm1, c_tm1) + else: + if 0.0 < self.dropout < 1.0: + inputs = inputs * dp_mask[0] + z = backend.dot(inputs, self.kernel) + z += backend.dot(h_tm1, self.recurrent_kernel) + if self.use_bias: + z = backend.bias_add(z, self.bias) + + z = tf.split(z, num_or_size_splits=4, axis=1) + c, o = self._compute_carry_and_output_fused(z, c_tm1) + + h = o * self.activation(c) + return h, [h, c] + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "unit_forget_bias": self.unit_forget_bias, + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + "implementation": self.implementation, + } + config.update(rnn_utils.config_for_enable_caching_device(self)) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - return list(rnn_utils.generate_zero_filled_state_for_cell( - self, inputs, batch_size, dtype)) + def get_initial_state(self, inputs=None, batch_size=None, dtype=None): + return list( + rnn_utils.generate_zero_filled_state_for_cell( + self, inputs, batch_size, dtype + ) + ) -@keras_export('keras.layers.LSTM', v1=[]) +@keras_export("keras.layers.LSTM", v1=[]) class LSTM(DropoutRNNCellMixin, RNN, base_layer.BaseRandomLayer): - """Long Short-Term Memory layer - Hochreiter 1997. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - Based on available runtime hardware and constraints, this layer - will choose different implementations (cuDNN-based or pure-TensorFlow) - to maximize the performance. If a GPU is available and all - the arguments to the layer meet the requirement of the cuDNN kernel - (see below for details), the layer will use a fast cuDNN implementation. - - The requirements to use the cuDNN implementation are: - - 1. `activation` == `tanh` - 2. `recurrent_activation` == `sigmoid` - 3. `recurrent_dropout` == 0 - 4. `unroll` is `False` - 5. `use_bias` is `True` - 6. Inputs, if use masking, are strictly right-padded. - 7. Eager execution is enabled in the outermost context. - - For example: - - >>> inputs = tf.random.normal([32, 10, 8]) - >>> lstm = tf.keras.layers.LSTM(4) - >>> output = lstm(inputs) - >>> print(output.shape) - (32, 4) - >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True) - >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs) - >>> print(whole_seq_output.shape) - (32, 10, 4) - >>> print(final_memory_state.shape) - (32, 4) - >>> print(final_carry_state.shape) - (32, 4) - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation - is applied (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use for the recurrent step. - Default: sigmoid (`sigmoid`). If you pass `None`, no activation is - applied (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean (default `True`), whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, used for - the linear transformation of the inputs. Default: `glorot_uniform`. - recurrent_initializer: Initializer for the `recurrent_kernel` weights - matrix, used for the linear transformation of the recurrent state. - Default: `orthogonal`. - bias_initializer: Initializer for the bias vector. Default: `zeros`. - unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of - the forget gate at initialization. Setting it to true will also force - `bias_initializer="zeros"`. This is recommended in [Jozefowicz et - al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. Default: `None`. - bias_regularizer: Regularizer function applied to the bias vector. Default: - `None`. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). Default: `None`. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. Default: `None`. - bias_constraint: Constraint function applied to the bias vector. Default: - `None`. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. Default: 0. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. Default: 0. - return_sequences: Boolean. Whether to return the last output in the output - sequence, or the full sequence. Default: `False`. - return_state: Boolean. Whether to return the last state in addition to the - output. Default: `False`. - go_backwards: Boolean (default `False`). If True, process the input sequence - backwards and return the reversed sequence. - stateful: Boolean (default `False`). If True, the last state for each sample - at index i in a batch will be used as initial state for the sample of - index i in the following batch. - time_major: The shape format of the `inputs` and `outputs` tensors. - If True, the inputs and outputs will be in shape - `[timesteps, batch, feature]`, whereas in the False case, it will be - `[batch, timesteps, feature]`. Using `time_major = True` is a bit more - efficient because it avoids transposes at the beginning and end of the - RNN calculation. However, most TensorFlow data is batch-major, so by - default this function accepts input and emits output in batch-major - form. - unroll: Boolean (default `False`). If True, the network will be unrolled, - else a symbolic loop will be used. Unrolling can speed-up a RNN, although - it tends to be more memory-intensive. Unrolling is only suitable for short - sequences. - - Call arguments: - inputs: A 3D tensor with shape `[batch, timesteps, feature]`. - mask: Binary tensor of shape `[batch, timesteps]` indicating whether - a given timestep should be masked (optional, defaults to `None`). - An individual `True` entry indicates that the corresponding timestep - should be utilized, while a `False` entry indicates that the corresponding - timestep should be ignored. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or - `recurrent_dropout` is used (optional, defaults to `None`). - initial_state: List of initial state tensors to be passed to the first - call of the cell (optional, defaults to `None` which causes creation - of zero-filled initial state tensors). - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - time_major=False, - unroll=False, - **kwargs): - # return_runtime is a flag for testing, which shows the real backend - # implementation chosen by grappler in graph mode. - self.return_runtime = kwargs.pop('return_runtime', False) - implementation = kwargs.pop('implementation', 2) - if implementation == 0: - logging.warning('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - if 'enable_caching_device' in kwargs: - cell_kwargs = {'enable_caching_device': - kwargs.pop('enable_caching_device')} - else: - cell_kwargs = {} - cell = LSTMCell( + """Long Short-Term Memory layer - Hochreiter 1997. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + Based on available runtime hardware and constraints, this layer + will choose different implementations (cuDNN-based or pure-TensorFlow) + to maximize the performance. If a GPU is available and all + the arguments to the layer meet the requirement of the cuDNN kernel + (see below for details), the layer will use a fast cuDNN implementation. + + The requirements to use the cuDNN implementation are: + + 1. `activation` == `tanh` + 2. `recurrent_activation` == `sigmoid` + 3. `recurrent_dropout` == 0 + 4. `unroll` is `False` + 5. `use_bias` is `True` + 6. Inputs, if use masking, are strictly right-padded. + 7. Eager execution is enabled in the outermost context. + + For example: + + >>> inputs = tf.random.normal([32, 10, 8]) + >>> lstm = tf.keras.layers.LSTM(4) + >>> output = lstm(inputs) + >>> print(output.shape) + (32, 4) + >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True) + >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs) + >>> print(whole_seq_output.shape) + (32, 10, 4) + >>> print(final_memory_state.shape) + (32, 4) + >>> print(final_carry_state.shape) + (32, 4) + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation + is applied (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use for the recurrent step. + Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + applied (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean (default `True`), whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, used for + the linear transformation of the inputs. Default: `glorot_uniform`. + recurrent_initializer: Initializer for the `recurrent_kernel` weights + matrix, used for the linear transformation of the recurrent state. + Default: `orthogonal`. + bias_initializer: Initializer for the bias vector. Default: `zeros`. + unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of + the forget gate at initialization. Setting it to true will also force + `bias_initializer="zeros"`. This is recommended in [Jozefowicz et + al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_regularizer: Regularizer function applied to the bias vector. + Default: `None`. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). Default: `None`. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_constraint: Constraint function applied to the bias vector. Default: + `None`. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. Default: 0. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. Default: 0. + return_sequences: Boolean. Whether to return the last output in the output + sequence, or the full sequence. Default: `False`. + return_state: Boolean. Whether to return the last state in addition to the + output. Default: `False`. + go_backwards: Boolean (default `False`). If True, process the input + sequence backwards and return the reversed sequence. + stateful: Boolean (default `False`). If True, the last state for each + sample at index i in a batch will be used as initial state for the sample + of index i in the following batch. + time_major: The shape format of the `inputs` and `outputs` tensors. + If True, the inputs and outputs will be in shape + `[timesteps, batch, feature]`, whereas in the False case, it will be + `[batch, timesteps, feature]`. Using `time_major = True` is a bit more + efficient because it avoids transposes at the beginning and end of the + RNN calculation. However, most TensorFlow data is batch-major, so by + default this function accepts input and emits output in batch-major + form. + unroll: Boolean (default `False`). If True, the network will be unrolled, + else a symbolic loop will be used. Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. Unrolling is only + suitable for short sequences. + + Call arguments: + inputs: A 3D tensor with shape `[batch, timesteps, feature]`. + mask: Binary tensor of shape `[batch, timesteps]` indicating whether + a given timestep should be masked (optional). + An individual `True` entry indicates that the corresponding timestep + should be utilized, while a `False` entry indicates that the + corresponding timestep should be ignored. Defaults to `None`. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` is used (optional). Defaults to `None`. + initial_state: List of initial state tensors to be passed to the first + call of the cell (optional, `None` causes creation + of zero-filled initial state tensors). Defaults to `None`. + """ + + def __init__( + self, units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - unit_forget_bias=unit_forget_bias, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation, - dtype=kwargs.get('dtype'), - trainable=kwargs.get('trainable', True), - **cell_kwargs) - super().__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - time_major=time_major, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.input_spec = [InputSpec(ndim=3)] - self.state_spec = [ - InputSpec(shape=(None, dim)) for dim in (self.units, self.units) - ] - self._could_use_gpu_kernel = ( - self.activation in (activations.tanh, tf.tanh) and - self.recurrent_activation in (activations.sigmoid, tf.sigmoid) and - recurrent_dropout == 0 and not unroll and use_bias and - tf.compat.v1.executing_eagerly_outside_functions()) - if tf.config.list_logical_devices('GPU'): - # Only show the message when there is GPU available, user will not care - # about the cuDNN if there isn't any GPU. - if self._could_use_gpu_kernel: - logging.debug(gru_lstm_utils.CUDNN_AVAILABLE_MSG % self.name) - else: - logging.warning(gru_lstm_utils.CUDNN_NOT_AVAILABLE_MSG % self.name) - - if gru_lstm_utils.use_new_gru_lstm_impl(): - self._defun_wrapper = gru_lstm_utils.DefunWrapper( - time_major, go_backwards, 'lstm') + activation="tanh", + recurrent_activation="sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + time_major=False, + unroll=False, + **kwargs, + ): + # return_runtime is a flag for testing, which shows the real backend + # implementation chosen by grappler in graph mode. + self.return_runtime = kwargs.pop("return_runtime", False) + implementation = kwargs.pop("implementation", 2) + if implementation == 0: + logging.warning( + "`implementation=0` has been deprecated, " + "and now defaults to `implementation=1`." + "Please update your layer call." + ) + if "enable_caching_device" in kwargs: + cell_kwargs = { + "enable_caching_device": kwargs.pop("enable_caching_device") + } + else: + cell_kwargs = {} + cell = LSTMCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + unit_forget_bias=unit_forget_bias, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation, + dtype=kwargs.get("dtype"), + trainable=kwargs.get("trainable", True), + name="lstm_cell", + **cell_kwargs, + ) + super().__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + time_major=time_major, + unroll=unroll, + **kwargs, + ) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.input_spec = [InputSpec(ndim=3)] + self.state_spec = [ + InputSpec(shape=(None, dim)) for dim in (self.units, self.units) + ] + self._could_use_gpu_kernel = ( + self.activation in (activations.tanh, tf.tanh) + and self.recurrent_activation in (activations.sigmoid, tf.sigmoid) + and recurrent_dropout == 0 + and not unroll + and use_bias + and tf.compat.v1.executing_eagerly_outside_functions() + ) + if tf.config.list_logical_devices("GPU"): + # Only show the message when there is GPU available, user will not + # care about the cuDNN if there isn't any GPU. + if self._could_use_gpu_kernel: + logging.debug(gru_lstm_utils.CUDNN_AVAILABLE_MSG % self.name) + else: + logging.warning( + gru_lstm_utils.CUDNN_NOT_AVAILABLE_MSG % self.name + ) + + if gru_lstm_utils.use_new_gru_lstm_impl(): + self._defun_wrapper = gru_lstm_utils.DefunWrapper( + time_major, go_backwards, "lstm" + ) + + def call(self, inputs, mask=None, training=None, initial_state=None): + # The input should be dense, padded with zeros. If a ragged input is fed + # into the layer, it is padded and the row lengths are used for masking. + inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) + is_ragged_input = row_lengths is not None + self._validate_args_if_ragged(is_ragged_input, mask) + + # LSTM does not support constants. Ignore it during process. + inputs, initial_state, _ = self._process_inputs( + inputs, initial_state, None + ) + + if isinstance(mask, list): + mask = mask[0] + + input_shape = backend.int_shape(inputs) + timesteps = input_shape[0] if self.time_major else input_shape[1] + + if not self._could_use_gpu_kernel: + # Fall back to use the normal LSTM. + kwargs = {"training": training} + self._maybe_reset_cell_dropout_mask(self.cell) + + def step(inputs, states): + return self.cell(inputs, states, **kwargs) + + last_output, outputs, states = backend.rnn( + step, + inputs, + initial_state, + constants=None, + go_backwards=self.go_backwards, + mask=mask, + unroll=self.unroll, + input_length=row_lengths + if row_lengths is not None + else timesteps, + time_major=self.time_major, + zero_output_for_mask=self.zero_output_for_mask, + return_all_outputs=self.return_sequences, + ) + runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) + else: + # Use the new defun approach for backend implementation swap. + # Note that different implementations need to have same function + # signature, eg, the tensor parameters need to have same shape and + # dtypes. Since the cuDNN has an extra set of bias, those bias will + # be passed to both normal and cuDNN implementations. + self.reset_dropout_mask() + dropout_mask = self.get_dropout_mask_for_cell( + inputs, training, count=4 + ) + if dropout_mask is not None: + inputs = inputs * dropout_mask[0] + if gru_lstm_utils.use_new_gru_lstm_impl(): + lstm_kwargs = { + "inputs": inputs, + "init_h": gru_lstm_utils.read_variable_value( + initial_state[0] + ), + "init_c": gru_lstm_utils.read_variable_value( + initial_state[1] + ), + "kernel": gru_lstm_utils.read_variable_value( + self.cell.kernel + ), + "recurrent_kernel": gru_lstm_utils.read_variable_value( + self.cell.recurrent_kernel + ), + "bias": gru_lstm_utils.read_variable_value(self.cell.bias), + "mask": mask, + "time_major": self.time_major, + "go_backwards": self.go_backwards, + "sequence_lengths": row_lengths, + "zero_output_for_mask": self.zero_output_for_mask, + } + ( + last_output, + outputs, + new_h, + new_c, + runtime, + ) = self._defun_wrapper.defun_layer(**lstm_kwargs) + else: + gpu_lstm_kwargs = { + "inputs": inputs, + "init_h": gru_lstm_utils.read_variable_value( + initial_state[0] + ), + "init_c": gru_lstm_utils.read_variable_value( + initial_state[1] + ), + "kernel": gru_lstm_utils.read_variable_value( + self.cell.kernel + ), + "recurrent_kernel": gru_lstm_utils.read_variable_value( + self.cell.recurrent_kernel + ), + "bias": gru_lstm_utils.read_variable_value(self.cell.bias), + "mask": mask, + "time_major": self.time_major, + "go_backwards": self.go_backwards, + "sequence_lengths": row_lengths, + "return_sequences": self.return_sequences, + } + normal_lstm_kwargs = gpu_lstm_kwargs.copy() + normal_lstm_kwargs.update( + { + "zero_output_for_mask": self.zero_output_for_mask, + } + ) + + if tf.executing_eagerly(): + device_type = gru_lstm_utils.get_context_device_type() + can_use_gpu = ( + # Either user specified GPU or unspecified but GPU is + # available. + ( + device_type == gru_lstm_utils.GPU_DEVICE_NAME + or ( + device_type is None + and tf.config.list_logical_devices("GPU") + ) + ) + and gru_lstm_utils.is_cudnn_supported_inputs( + mask, self.time_major, row_lengths + ) + ) + # Under eager context, check the device placement and prefer + # the GPU implementation when GPU is available. + if can_use_gpu: + last_output, outputs, new_h, new_c, runtime = gpu_lstm( + **gpu_lstm_kwargs + ) + else: + ( + last_output, + outputs, + new_h, + new_c, + runtime, + ) = standard_lstm(**normal_lstm_kwargs) + else: + ( + last_output, + outputs, + new_h, + new_c, + runtime, + ) = lstm_with_backend_selection(**normal_lstm_kwargs) + + states = [new_h, new_c] + + if self.stateful: + updates = [ + tf.compat.v1.assign( + self_state, tf.cast(state, self_state.dtype) + ) + for self_state, state in zip(self.states, states) + ] + self.add_update(updates) + + if self.return_sequences: + output = backend.maybe_convert_to_ragged( + is_ragged_input, + outputs, + row_lengths, + go_backwards=self.go_backwards, + ) + else: + output = last_output - def call(self, inputs, mask=None, training=None, initial_state=None): - # The input should be dense, padded with zeros. If a ragged input is fed - # into the layer, it is padded and the row lengths are used for masking. - inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) - is_ragged_input = (row_lengths is not None) - self._validate_args_if_ragged(is_ragged_input, mask) + if self.return_state: + return [output] + list(states) + elif self.return_runtime: + return output, runtime + else: + return output + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def unit_forget_bias(self): + return self.cell.unit_forget_bias + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "unit_forget_bias": self.unit_forget_bias, + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + "implementation": self.implementation, + } + config.update(rnn_utils.config_for_enable_caching_device(self.cell)) + base_config = super().get_config() + del base_config["cell"] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if "implementation" in config and config["implementation"] == 0: + config["implementation"] = 1 + return cls(**config) + + +def standard_lstm( + inputs, + init_h, + init_c, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + zero_output_for_mask, + return_sequences, +): + """LSTM with standard kernel implementation. + + This implementation can be run on all types for hardware. + + This implementation lifts out all the layer weights and make them function + parameters. It has same number of tensor input params as the cuDNN + counterpart. The RNN step logic has been simplified, eg dropout and mask is + removed since cuDNN implementation does not support that. + + Note that the first half of the bias tensor should be ignored by this impl. + The cuDNN impl need an extra set of input gate bias. In order to make the + both function take same shape of parameter, that extra set of bias is also + feed + here. + + Args: + inputs: input tensor of LSTM layer. + init_h: initial state tensor for the cell output. + init_c: initial state tensor for the cell hidden state. + kernel: weights for cell kernel. + recurrent_kernel: weights for cell recurrent kernel. + bias: weights for cell kernel bias and recurrent bias. Only recurrent bias + is used in this case. + mask: Boolean tensor for mask out the steps within sequence. + An individual `True` entry indicates that the corresponding timestep + should be utilized, while a `False` entry indicates that the + corresponding timestep should be ignored. + time_major: boolean, whether the inputs are in the format of + [time, batch, feature] or [batch, time, feature]. + go_backwards: Boolean (default False). If True, process the input sequence + backwards and return the reversed sequence. + sequence_lengths: The lengths of all sequences coming from a variable + length input, such as ragged tensors. If the input has a fixed timestep + size, this should be None. + zero_output_for_mask: Boolean, whether to output zero for masked timestep. + return_sequences: Boolean. If True, return the recurrent outputs for all + timesteps in the sequence. If False, only return the output for the + last timestep (which consumes less memory). + + Returns: + last_output: output tensor for the last timestep, which has shape + [batch, units]. + outputs: + - If `return_sequences=True`: output tensor for all timesteps, + which has shape [batch, time, units]. + - Else, a tensor equal to `last_output` with shape [batch, 1, units] + state_0: the cell output, which has same shape as init_h. + state_1: the cell hidden state, which has same shape as init_c. + runtime: constant string tensor which indicate real runtime hardware. This + value is for testing purpose and should be used by user. + """ + input_shape = backend.int_shape(inputs) + timesteps = input_shape[0] if time_major else input_shape[1] - # LSTM does not support constants. Ignore it during process. - inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None) + def step(cell_inputs, cell_states): + """Step function that will be used by Keras RNN backend.""" + h_tm1 = cell_states[0] # previous memory state + c_tm1 = cell_states[1] # previous carry state - if isinstance(mask, list): - mask = mask[0] + z = backend.dot(cell_inputs, kernel) + z += backend.dot(h_tm1, recurrent_kernel) + z = backend.bias_add(z, bias) - input_shape = backend.int_shape(inputs) - timesteps = input_shape[0] if self.time_major else input_shape[1] - - if not self._could_use_gpu_kernel: - # Fall back to use the normal LSTM. - kwargs = {'training': training} - self._maybe_reset_cell_dropout_mask(self.cell) - - def step(inputs, states): - return self.cell(inputs, states, **kwargs) - - last_output, outputs, states = backend.rnn( - step, - inputs, - initial_state, - constants=None, - go_backwards=self.go_backwards, - mask=mask, - unroll=self.unroll, - input_length=row_lengths if row_lengths is not None else timesteps, - time_major=self.time_major, - zero_output_for_mask=self.zero_output_for_mask, - return_all_outputs=self.return_sequences) - runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) - else: - # Use the new defun approach for backend implementation swap. - # Note that different implementations need to have same function - # signature, eg, the tensor parameters need to have same shape and dtypes. - # Since the cuDNN has an extra set of bias, those bias will be passed to - # both normal and cuDNN implementations. - self.reset_dropout_mask() - dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) - if dropout_mask is not None: - inputs = inputs * dropout_mask[0] - if gru_lstm_utils.use_new_gru_lstm_impl(): - lstm_kwargs = { - 'inputs': - inputs, - 'init_h': - gru_lstm_utils.read_variable_value(initial_state[0]), - 'init_c': - gru_lstm_utils.read_variable_value(initial_state[1]), - 'kernel': - gru_lstm_utils.read_variable_value(self.cell.kernel), - 'recurrent_kernel': - gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel), - 'bias': - gru_lstm_utils.read_variable_value(self.cell.bias), - 'mask': - mask, - 'time_major': - self.time_major, - 'go_backwards': - self.go_backwards, - 'sequence_lengths': - row_lengths, - 'zero_output_for_mask': - self.zero_output_for_mask, - } - (last_output, outputs, new_h, new_c, - runtime) = self._defun_wrapper.defun_layer(**lstm_kwargs) - else: - gpu_lstm_kwargs = { - 'inputs': - inputs, - 'init_h': - gru_lstm_utils.read_variable_value(initial_state[0]), - 'init_c': - gru_lstm_utils.read_variable_value(initial_state[1]), - 'kernel': - gru_lstm_utils.read_variable_value(self.cell.kernel), - 'recurrent_kernel': - gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel), - 'bias': - gru_lstm_utils.read_variable_value(self.cell.bias), - 'mask': - mask, - 'time_major': - self.time_major, - 'go_backwards': - self.go_backwards, - 'sequence_lengths': - row_lengths, - 'return_sequences': - self.return_sequences - } - normal_lstm_kwargs = gpu_lstm_kwargs.copy() - normal_lstm_kwargs.update({ - 'zero_output_for_mask': self.zero_output_for_mask, - }) - - if tf.executing_eagerly(): - device_type = gru_lstm_utils.get_context_device_type() - can_use_gpu = ( - # Either user specified GPU or unspecified but GPU is available. - (device_type == gru_lstm_utils.GPU_DEVICE_NAME or - (device_type is None - and tf.config.list_logical_devices('GPU'))) and - (mask is None or - gru_lstm_utils.is_cudnn_supported_inputs(mask, self.time_major))) - # Under eager context, check the device placement and prefer the - # GPU implementation when GPU is available. - if can_use_gpu: - last_output, outputs, new_h, new_c, runtime = gpu_lstm( - **gpu_lstm_kwargs) - else: - last_output, outputs, new_h, new_c, runtime = standard_lstm( - **normal_lstm_kwargs) - else: - (last_output, outputs, new_h, new_c, - runtime) = lstm_with_backend_selection(**normal_lstm_kwargs) + z0, z1, z2, z3 = tf.split(z, 4, axis=1) - states = [new_h, new_c] + i = tf.sigmoid(z0) + f = tf.sigmoid(z1) + c = f * c_tm1 + i * tf.tanh(z2) + o = tf.sigmoid(z3) - if self.stateful: - updates = [ - tf.compat.v1.assign(self_state, tf.cast(state, self_state.dtype)) - for self_state, state in zip(self.states, states) - ] - self.add_update(updates) + h = o * tf.tanh(c) + return h, [h, c] - if self.return_sequences: - output = backend.maybe_convert_to_ragged( - is_ragged_input, outputs, row_lengths, go_backwards=self.go_backwards) + last_output, outputs, new_states = backend.rnn( + step, + inputs, + [init_h, init_c], + constants=None, + unroll=False, + time_major=time_major, + mask=mask, + go_backwards=go_backwards, + input_length=( + sequence_lengths if sequence_lengths is not None else timesteps + ), + zero_output_for_mask=zero_output_for_mask, + return_all_outputs=return_sequences, + ) + return ( + last_output, + outputs, + new_states[0], + new_states[1], + gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_CPU), + ) + + +def gpu_lstm( + inputs, + init_h, + init_c, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + return_sequences, +): + """LSTM with either cuDNN or ROCm implementation which is only available for + GPU. + + Note that currently only right padded data is supported, or the result will + be polluted by the unmasked data which should be filtered. + + Args: + inputs: Input tensor of LSTM layer. + init_h: Initial state tensor for the cell output. + init_c: Initial state tensor for the cell hidden state. + kernel: Weights for cell kernel. + recurrent_kernel: Weights for cell recurrent kernel. + bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias + is used in this case. + mask: Boolean tensor for mask out the steps within sequence. An individual + `True` entry indicates that the corresponding timestep should be + utilized, while a `False` entry indicates that the corresponding + timestep should be ignored. + time_major: Boolean, whether the inputs are in the format of [time, batch, + feature] or [batch, time, feature]. + go_backwards: Boolean (default False). If True, process the input sequence + backwards and return the reversed sequence. + sequence_lengths: The lengths of all sequences coming from a variable + length input, such as ragged tensors. If the input has a fixed timestep + size, this should be None. + return_sequences: Boolean. If True, return the recurrent outputs for all + timesteps in the sequence. If False, only return the output for the + last timestep, matching the CPU function output format. + + Returns: + last_output: Output tensor for the last timestep, which has shape + [batch, units]. + outputs: + - If `return_sequences=True`: output tensor for all timesteps, + which has shape [batch, time, units]. + - Else, a tensor equal to `last_output` with shape [batch, 1, units] + state_0: The cell output, which has same shape as init_h. + state_1: The cell hidden state, which has same shape as init_c. + runtime: Constant string tensor which indicate real runtime hardware. This + value is for testing purpose and should not be used by user. + """ + if mask is not None: + sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask( + mask, time_major + ) + + if not time_major and sequence_lengths is None: + inputs = tf.transpose(inputs, perm=(1, 0, 2)) + seq_axis, batch_axis = (0, 1) else: - output = last_output - - if self.return_state: - return [output] + list(states) - elif self.return_runtime: - return output, runtime + seq_axis, batch_axis = (0, 1) if time_major else (1, 0) + # For init_h and init_c, cuDNN expects one more dim of num_layers before or + # after batch dim for time major or batch major inputs respectively + init_h = tf.expand_dims(init_h, axis=seq_axis) + init_c = tf.expand_dims(init_c, axis=seq_axis) + + weights = tf.split(kernel, 4, axis=1) + weights += tf.split(recurrent_kernel, 4, axis=1) + # cuDNN has an extra set of bias for inputs, we disable them (setting to 0), + # so that mathematically it is same as the canonical LSTM implementation. + full_bias = tf.concat((tf.zeros_like(bias), bias), 0) + + if tf.sysconfig.get_build_info()["is_rocm_build"]: + # ROCm MIOpen's weight sequence for LSTM is different from both + # canonical and Cudnn format + # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o] + # i is input gate weights. + # f is forget gate weights. + # o is output gate weights. + # c is cell gate weights. + weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)] + # full_bias is a tensor of shape (8*n,) + full_bias = tf.split(full_bias, 8, axis=0) + full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)] + + params = gru_lstm_utils.canonical_to_params( + weights=weights, + biases=tf.split(full_bias, 8), + shape=tf.constant([-1]), + transpose_weights=True, + ) + + if sequence_lengths is not None: + if go_backwards: + # Three reversals are required. E.g., + # normal input = [1, 2, 3, 0, 0] # where 0 need to be masked + # reversed_input_to_cudnn = [3, 2, 1, 0, 0] + # output_from_cudnn = [6, 5, 4, 0, 0] + # expected_output = [0, 0, 6, 5 ,4] + inputs = tf.reverse_sequence( + inputs, + sequence_lengths, + seq_axis=seq_axis, + batch_axis=batch_axis, + ) + outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3( + input=inputs, + input_h=init_h, + input_c=init_c, + params=params, + is_training=True, + rnn_mode="lstm", + sequence_lengths=sequence_lengths, + time_major=time_major, + ) + if go_backwards: + outputs = tf.reverse_sequence( + outputs, + sequence_lengths, + seq_axis=seq_axis, + batch_axis=batch_axis, + ) + outputs = tf.reverse(outputs, axis=[seq_axis]) else: - return output - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def unit_forget_bias(self): - return self.cell.unit_forget_bias - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation + # # Fill the array with shape [batch] with value of max timesteps. + # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]], + # array_ops.shape(inputs)[0]) + if go_backwards: + # Reverse axis 0 since the input is already convert to time major. + inputs = tf.reverse(inputs, axis=[0]) + outputs, h, c, _ = tf.raw_ops.CudnnRNN( + input=inputs, + input_h=init_h, + input_c=init_c, + params=params, + is_training=True, + rnn_mode="lstm", + ) + + last_output = outputs[-1] + if not time_major and sequence_lengths is None and return_sequences: + outputs = tf.transpose(outputs, perm=[1, 0, 2]) + h = tf.squeeze(h, axis=seq_axis) + c = tf.squeeze(c, axis=seq_axis) + + # In the case of variable length input, the cudnn kernel will fill zeros for + # the output, whereas the default keras behavior is to bring over the + # previous output for t-1, so that in the return_sequence=False case, user + # can quickly get the final effect output instead just 0s at the last + # timestep. In order to mimic the default keras behavior, we copy the final + # h state as the last_output, since it is numerically same as the output. + if sequence_lengths is not None: + last_output = h + + # Match CPU return format + if not return_sequences: + outputs = tf.expand_dims(last_output, axis=0 if time_major else 1) + + return ( + last_output, + outputs, + h, + c, + gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_GPU), + ) + + +def lstm_with_backend_selection( + inputs, + init_h, + init_c, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + zero_output_for_mask, + return_sequences, +): + """Call the LSTM with optimized backend kernel selection. + + Under the hood, this function will create two TF function, one with the most + generic kernel and can run on all device condition, and the second one with + cuDNN specific kernel, which can only run on GPU. + + The first function will be called with normal_lstm_params, while the second + function is not called, but only registered in the graph. The Grappler will + do the proper graph rewrite and swap the optimized TF function based on the + device placement. + + Args: + inputs: Input tensor of LSTM layer. + init_h: Initial state tensor for the cell output. + init_c: Initial state tensor for the cell hidden state. + kernel: Weights for cell kernel. + recurrent_kernel: Weights for cell recurrent kernel. + bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias + is used in this case. + mask: Boolean tensor for mask out the steps within sequence. + An individual `True` entry indicates that the corresponding timestep + should be utilized, while a `False` entry indicates that the + corresponding timestep should be ignored. + time_major: Boolean, whether the inputs are in the format of + [time, batch, feature] or [batch, time, feature]. + go_backwards: Boolean (default False). If True, process the input sequence + backwards and return the reversed sequence. + sequence_lengths: The lengths of all sequences coming from a variable + length input, such as ragged tensors. If the input has a fixed timestep + size, this should be None. + zero_output_for_mask: Boolean, whether to output zero for masked timestep. + return_sequences: Boolean. If True, return the recurrent outputs for all + timesteps in the sequence. If False, only return the output for the + last timestep (which consumes less memory). + + Returns: + List of output tensors, same as standard_lstm. + """ + params = { + "inputs": inputs, + "init_h": init_h, + "init_c": init_c, + "kernel": kernel, + "recurrent_kernel": recurrent_kernel, + "bias": bias, + "mask": mask, + "time_major": time_major, + "go_backwards": go_backwards, + "sequence_lengths": sequence_lengths, + "zero_output_for_mask": zero_output_for_mask, + "return_sequences": return_sequences, } - config.update(rnn_utils.config_for_enable_caching_device(self.cell)) - base_config = super().get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) - - -def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, - time_major, go_backwards, sequence_lengths, - zero_output_for_mask, return_sequences): - """LSTM with standard kernel implementation. - - This implementation can be run on all types for hardware. - - This implementation lifts out all the layer weights and make them function - parameters. It has same number of tensor input params as the cuDNN - counterpart. The RNN step logic has been simplified, eg dropout and mask is - removed since cuDNN implementation does not support that. - - Note that the first half of the bias tensor should be ignored by this impl. - The cuDNN impl need an extra set of input gate bias. In order to make the both - function take same shape of parameter, that extra set of bias is also feed - here. - - Args: - inputs: input tensor of LSTM layer. - init_h: initial state tensor for the cell output. - init_c: initial state tensor for the cell hidden state. - kernel: weights for cell kernel. - recurrent_kernel: weights for cell recurrent kernel. - bias: weights for cell kernel bias and recurrent bias. Only recurrent bias - is used in this case. - mask: Boolean tensor for mask out the steps within sequence. - An individual `True` entry indicates that the corresponding timestep - should be utilized, while a `False` entry indicates that the corresponding - timestep should be ignored. - time_major: boolean, whether the inputs are in the format of - [time, batch, feature] or [batch, time, feature]. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - sequence_lengths: The lengths of all sequences coming from a variable length - input, such as ragged tensors. If the input has a fixed timestep size, - this should be None. - zero_output_for_mask: Boolean, whether to output zero for masked timestep. - return_sequences: Boolean. If True, return the recurrent outputs for all - timesteps in the sequence. If False, only return the output for the - last timestep (which consumes less memory). - - Returns: - last_output: output tensor for the last timestep, which has shape - [batch, units]. - outputs: - - If `return_sequences=True`: output tensor for all timesteps, - which has shape [batch, time, units]. - - Else, a tensor equal to `last_output` with shape [batch, 1, units] - state_0: the cell output, which has same shape as init_h. - state_1: the cell hidden state, which has same shape as init_c. - runtime: constant string tensor which indicate real runtime hardware. This - value is for testing purpose and should be used by user. - """ - input_shape = backend.int_shape(inputs) - timesteps = input_shape[0] if time_major else input_shape[1] - - def step(cell_inputs, cell_states): - """Step function that will be used by Keras RNN backend.""" - h_tm1 = cell_states[0] # previous memory state - c_tm1 = cell_states[1] # previous carry state - - z = backend.dot(cell_inputs, kernel) - z += backend.dot(h_tm1, recurrent_kernel) - z = backend.bias_add(z, bias) - - z0, z1, z2, z3 = tf.split(z, 4, axis=1) - - i = tf.sigmoid(z0) - f = tf.sigmoid(z1) - c = f * c_tm1 + i * tf.tanh(z2) - o = tf.sigmoid(z3) - - h = o * tf.tanh(c) - return h, [h, c] - - last_output, outputs, new_states = backend.rnn( - step, - inputs, [init_h, init_c], - constants=None, - unroll=False, - time_major=time_major, - mask=mask, - go_backwards=go_backwards, - input_length=(sequence_lengths - if sequence_lengths is not None else timesteps), - zero_output_for_mask=zero_output_for_mask, - return_all_outputs=return_sequences) - return (last_output, outputs, new_states[0], new_states[1], - gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_CPU)) - - -def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, - time_major, go_backwards, sequence_lengths, return_sequences): - """LSTM with either cuDNN or ROCm implementation which is only available for GPU. - - Note that currently only right padded data is supported, or the result will be - polluted by the unmasked data which should be filtered. - - Args: - inputs: Input tensor of LSTM layer. - init_h: Initial state tensor for the cell output. - init_c: Initial state tensor for the cell hidden state. - kernel: Weights for cell kernel. - recurrent_kernel: Weights for cell recurrent kernel. - bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias - is used in this case. - mask: Boolean tensor for mask out the steps within sequence. An individual - `True` entry indicates that the corresponding timestep should be utilized, - while a `False` entry indicates that the corresponding timestep should be - ignored. - time_major: Boolean, whether the inputs are in the format of [time, batch, - feature] or [batch, time, feature]. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - sequence_lengths: The lengths of all sequences coming from a variable length - input, such as ragged tensors. If the input has a fixed timestep size, - this should be None. - return_sequences: Boolean. If True, return the recurrent outputs for all - timesteps in the sequence. If False, only return the output for the - last timestep, matching the CPU function output format. - - Returns: - last_output: Output tensor for the last timestep, which has shape - [batch, units]. - outputs: - - If `return_sequences=True`: output tensor for all timesteps, - which has shape [batch, time, units]. - - Else, a tensor equal to `last_output` with shape [batch, 1, units] - state_0: The cell output, which has same shape as init_h. - state_1: The cell hidden state, which has same shape as init_c. - runtime: Constant string tensor which indicate real runtime hardware. This - value is for testing purpose and should not be used by user. - """ - if mask is not None: - sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask( - mask, time_major) - - if not time_major and sequence_lengths is None: - inputs = tf.transpose(inputs, perm=(1, 0, 2)) - seq_axis, batch_axis = (0, 1) - else: - seq_axis, batch_axis = (0, 1) if time_major else (1, 0) - # For init_h and init_c, cuDNN expects one more dim of num_layers before or - # after batch dim for time major or batch major inputs respectively - init_h = tf.expand_dims(init_h, axis=seq_axis) - init_c = tf.expand_dims(init_c, axis=seq_axis) - - weights = tf.split(kernel, 4, axis=1) - weights += tf.split(recurrent_kernel, 4, axis=1) - # cuDNN has an extra set of bias for inputs, we disable them (setting to 0), - # so that mathematically it is same as the canonical LSTM implementation. - full_bias = tf.concat((tf.zeros_like(bias), bias), 0) - - if tf.sysconfig.get_build_info()['is_rocm_build']: - # ROCm MIOpen's weight sequence for LSTM is different from both canonical - # and Cudnn format - # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o] - # i is input gate weights. - # f is forget gate weights. - # o is output gate weights. - # c is cell gate weights. - weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)] - # full_bias is a tensor of shape (8*n,) - full_bias = tf.split(full_bias, 8, axis=0) - full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)] - - params = gru_lstm_utils.canonical_to_params( - weights=weights, - biases=tf.split(full_bias, 8), - shape=tf.constant([-1]), - transpose_weights=True) - - if sequence_lengths is not None: - if go_backwards: - # Three reversals are required. E.g., - # normal input = [1, 2, 3, 0, 0] # where 0 need to be masked - # reversed_input_to_cudnn = [3, 2, 1, 0, 0] - # output_from_cudnn = [6, 5, 4, 0, 0] - # expected_output = [0, 0, 6, 5 ,4] - inputs = tf.reverse_sequence( - inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis) - outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3( - input=inputs, - input_h=init_h, - input_c=init_c, - params=params, - is_training=True, - rnn_mode='lstm', - sequence_lengths=sequence_lengths, - time_major=time_major) - if go_backwards: - outputs = tf.reverse_sequence( - outputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis) - outputs = tf.reverse(outputs, axis=[seq_axis]) - else: - # # Fill the array with shape [batch] with value of max timesteps. - # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]], - # array_ops.shape(inputs)[0]) - if go_backwards: - # Reverse axis 0 since the input is already convert to time major. - inputs = tf.reverse(inputs, axis=[0]) - outputs, h, c, _ = tf.raw_ops.CudnnRNN( - input=inputs, input_h=init_h, input_c=init_c, params=params, - is_training=True, rnn_mode='lstm') - - last_output = outputs[-1] - if not time_major and sequence_lengths is None and return_sequences: - outputs = tf.transpose(outputs, perm=[1, 0, 2]) - h = tf.squeeze(h, axis=seq_axis) - c = tf.squeeze(c, axis=seq_axis) - - # In the case of variable length input, the cudnn kernel will fill zeros for - # the output, whereas the default keras behavior is to bring over the previous - # output for t-1, so that in the return_sequence=False case, user can quickly - # get the final effect output instead just 0s at the last timestep. - # In order to mimic the default keras behavior, we copy the final h state as - # the last_output, since it is numerically same as the output. - if sequence_lengths is not None: - last_output = h - - # Match CPU return format - if not return_sequences: - outputs = tf.expand_dims(last_output, axis=0 if time_major else 1) - - return last_output, outputs, h, c, gru_lstm_utils.runtime( - gru_lstm_utils.RUNTIME_GPU) - - -def lstm_with_backend_selection(inputs, init_h, init_c, kernel, - recurrent_kernel, bias, mask, time_major, - go_backwards, sequence_lengths, - zero_output_for_mask, return_sequences): - """Call the LSTM with optimized backend kernel selection. - - Under the hood, this function will create two TF function, one with the most - generic kernel and can run on all device condition, and the second one with - cuDNN specific kernel, which can only run on GPU. - - The first function will be called with normal_lstm_params, while the second - function is not called, but only registered in the graph. The Grappler will - do the proper graph rewrite and swap the optimized TF function based on the - device placement. - - Args: - inputs: Input tensor of LSTM layer. - init_h: Initial state tensor for the cell output. - init_c: Initial state tensor for the cell hidden state. - kernel: Weights for cell kernel. - recurrent_kernel: Weights for cell recurrent kernel. - bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias - is used in this case. - mask: Boolean tensor for mask out the steps within sequence. - An individual `True` entry indicates that the corresponding timestep - should be utilized, while a `False` entry indicates that the corresponding - timestep should be ignored. - time_major: Boolean, whether the inputs are in the format of - [time, batch, feature] or [batch, time, feature]. - go_backwards: Boolean (default False). If True, process the input sequence - backwards and return the reversed sequence. - sequence_lengths: The lengths of all sequences coming from a variable length - input, such as ragged tensors. If the input has a fixed timestep size, - this should be None. - zero_output_for_mask: Boolean, whether to output zero for masked timestep. - return_sequences: Boolean. If True, return the recurrent outputs for all - timesteps in the sequence. If False, only return the output for the - last timestep (which consumes less memory). - - Returns: - List of output tensors, same as standard_lstm. - """ - params = { - 'inputs': inputs, - 'init_h': init_h, - 'init_c': init_c, - 'kernel': kernel, - 'recurrent_kernel': recurrent_kernel, - 'bias': bias, - 'mask': mask, - 'time_major': time_major, - 'go_backwards': go_backwards, - 'sequence_lengths': sequence_lengths, - 'zero_output_for_mask': zero_output_for_mask, - 'return_sequences': return_sequences, - } - - def gpu_lstm_with_fallback(inputs, init_h, init_c, kernel, recurrent_kernel, - bias, mask, time_major, go_backwards, - sequence_lengths, zero_output_for_mask, - return_sequences): - """Use cuDNN kernel when mask is none or strictly right padded.""" - if mask is None: - return gpu_lstm( - inputs=inputs, - init_h=init_h, - init_c=init_c, - kernel=kernel, - recurrent_kernel=recurrent_kernel, - bias=bias, - mask=mask, - time_major=time_major, - go_backwards=go_backwards, - sequence_lengths=sequence_lengths, - return_sequences=return_sequences) - - def cudnn_lstm_fn(): - return gpu_lstm( - inputs=inputs, - init_h=init_h, - init_c=init_c, - kernel=kernel, - recurrent_kernel=recurrent_kernel, - bias=bias, - mask=mask, - time_major=time_major, - go_backwards=go_backwards, - sequence_lengths=sequence_lengths, - return_sequences=return_sequences) - - def stardard_lstm_fn(): - return standard_lstm( - inputs=inputs, - init_h=init_h, - init_c=init_c, - kernel=kernel, - recurrent_kernel=recurrent_kernel, - bias=bias, - mask=mask, - time_major=time_major, - go_backwards=go_backwards, - sequence_lengths=sequence_lengths, - zero_output_for_mask=zero_output_for_mask, - return_sequences=return_sequences) - - return tf.cond( - gru_lstm_utils.is_cudnn_supported_inputs(mask, time_major), - true_fn=cudnn_lstm_fn, - false_fn=stardard_lstm_fn) - - if gru_lstm_utils.use_new_gru_lstm_impl(): - # Chooses the implementation dynamically based on the running device. - (last_output, outputs, new_h, new_c, - runtime) = tf.__internal__.execute_fn_for_device( - { - gru_lstm_utils.CPU_DEVICE_NAME: - lambda: standard_lstm(**params), - gru_lstm_utils.GPU_DEVICE_NAME: - lambda: gpu_lstm_with_fallback(**params) - }, lambda: standard_lstm(**params)) - else: - # Each time a `tf.function` is called, we will give it a unique - # identifiable API name, so that Grappler won't get confused when it - # sees multiple LSTM layers added into same graph, and it will be able - # to pair up the different implementations across them. - api_name = 'lstm_' + str(uuid.uuid4()) - supportive_attribute = { - 'time_major': time_major, - 'go_backwards': go_backwards, - } - defun_standard_lstm = gru_lstm_utils.generate_defun_backend( - api_name, gru_lstm_utils.CPU_DEVICE_NAME, standard_lstm, - supportive_attribute) - defun_gpu_lstm = gru_lstm_utils.generate_defun_backend( - api_name, gru_lstm_utils.GPU_DEVICE_NAME, gpu_lstm_with_fallback, - supportive_attribute) - - # Call the normal LSTM impl and register the cuDNN impl function. The - # grappler will kick in during session execution to optimize the graph. - last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(**params) - gru_lstm_utils.function_register(defun_gpu_lstm, **params) - - return last_output, outputs, new_h, new_c, runtime + + def gpu_lstm_with_fallback( + inputs, + init_h, + init_c, + kernel, + recurrent_kernel, + bias, + mask, + time_major, + go_backwards, + sequence_lengths, + zero_output_for_mask, + return_sequences, + ): + """Use cuDNN kernel when mask is none or strictly right padded.""" + + def cudnn_lstm_fn(): + return gpu_lstm( + inputs=inputs, + init_h=init_h, + init_c=init_c, + kernel=kernel, + recurrent_kernel=recurrent_kernel, + bias=bias, + mask=mask, + time_major=time_major, + go_backwards=go_backwards, + sequence_lengths=sequence_lengths, + return_sequences=return_sequences, + ) + + def stardard_lstm_fn(): + return standard_lstm( + inputs=inputs, + init_h=init_h, + init_c=init_c, + kernel=kernel, + recurrent_kernel=recurrent_kernel, + bias=bias, + mask=mask, + time_major=time_major, + go_backwards=go_backwards, + sequence_lengths=sequence_lengths, + zero_output_for_mask=zero_output_for_mask, + return_sequences=return_sequences, + ) + + return tf.__internal__.smart_cond.smart_cond( + gru_lstm_utils.is_cudnn_supported_inputs( + mask, time_major, sequence_lengths + ), + true_fn=cudnn_lstm_fn, + false_fn=stardard_lstm_fn, + ) + + if gru_lstm_utils.use_new_gru_lstm_impl(): + # Chooses the implementation dynamically based on the running device. + ( + last_output, + outputs, + new_h, + new_c, + runtime, + ) = tf.__internal__.execute_fn_for_device( + { + gru_lstm_utils.CPU_DEVICE_NAME: lambda: standard_lstm(**params), + gru_lstm_utils.GPU_DEVICE_NAME: lambda: gpu_lstm_with_fallback( + **params + ), + }, + lambda: standard_lstm(**params), + ) + else: + # Each time a `tf.function` is called, we will give it a unique + # identifiable API name, so that Grappler won't get confused when it + # sees multiple LSTM layers added into same graph, and it will be able + # to pair up the different implementations across them. + api_name = "lstm_" + str(uuid.uuid4()) + supportive_attribute = { + "time_major": time_major, + "go_backwards": go_backwards, + } + defun_standard_lstm = gru_lstm_utils.generate_defun_backend( + api_name, + gru_lstm_utils.CPU_DEVICE_NAME, + standard_lstm, + supportive_attribute, + ) + defun_gpu_lstm = gru_lstm_utils.generate_defun_backend( + api_name, + gru_lstm_utils.GPU_DEVICE_NAME, + gpu_lstm_with_fallback, + supportive_attribute, + ) + + # Call the normal LSTM impl and register the cuDNN impl function. The + # grappler will kick in during session execution to optimize the graph. + last_output, outputs, new_h, new_c, runtime = defun_standard_lstm( + **params + ) + gru_lstm_utils.function_register(defun_gpu_lstm, **params) + + return last_output, outputs, new_h, new_c, runtime diff --git a/keras/layers/rnn/lstm_test.py b/keras/layers/rnn/lstm_test.py index fd208eeb9f57..e3e77dddae6b 100644 --- a/keras/layers/rnn/lstm_test.py +++ b/keras/layers/rnn/lstm_test.py @@ -19,18 +19,21 @@ import os import shutil +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.rnn import gru_lstm_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.core.protobuf import rewriter_config_pb2 -from tensorflow.python.framework import test_util as tf_test_util - +from tensorflow.python.framework import ( + test_util as tf_test_util, +) # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() @@ -43,1229 +46,1384 @@ @test_combinations.run_all_keras_modes(config=_config) class LSTMGraphRewriteTest(test_combinations.TestCase): - input_shape = 10 - output_shape = 8 - rnn_state_size = 8 - timestep = 4 - batch = 100 - epoch = 1 - - @parameterized.named_parameters( - ('non_tan_activation', 'relu', 'sigmoid', 0, False, True), - ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True), - ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True), - ('unroll', 'tanh', 'sigmoid', 0, True, True), - ('not_use_bias', 'tanh', 'sigmoid', 0, False, False), - ) - @test_utils.run_v2_only - def test_could_use_defun_backend(self, activation, recurrent_activation, - recurrent_dropout, unroll, use_bias): - layer = keras.layers.LSTM( - 1, - activation=activation, - recurrent_activation=recurrent_activation, - recurrent_dropout=recurrent_dropout, - unroll=unroll, - use_bias=use_bias) - self.assertFalse(layer._could_use_gpu_kernel) - - @test_utils.run_v2_only - def test_use_on_default_activation_with_gpu_kernel(self): - layer = keras.layers.LSTM(1, activation=tf.tanh) - self.assertTrue(layer._could_use_gpu_kernel) - - layer = keras.layers.LSTM(1, recurrent_activation=tf.sigmoid) - self.assertTrue(layer._could_use_gpu_kernel) - - def test_static_shape_inference_LSTM(self): - # Github issue: 15165 - timesteps = 3 - embedding_dim = 4 - units = 2 - - model = keras.models.Sequential() - inputs = keras.layers.Dense( - embedding_dim, input_shape=(timesteps, embedding_dim)) - model.add(inputs) - layer = keras.layers.LSTM(units, return_sequences=True) - model.add(layer) - outputs = model.layers[-1].output - self.assertEqual(outputs.shape.as_list(), [None, timesteps, units]) - - def test_dynamic_behavior_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer = keras.layers.LSTM(units, input_shape=(None, embedding_dim)) - model = keras.models.Sequential() - model.add(layer) - model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.001), 'mse') - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - def test_stacking_LSTM(self): - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.LSTM(10, return_sequences=True, unroll=False)) - model.add(keras.layers.LSTM(5, return_sequences=True, unroll=False)) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - def test_from_config_LSTM(self): - layer_class = keras.layers.LSTM - for stateful in (False, True): - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - def test_specify_initial_state_keras_tensor(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - # Test with Keras tensor - inputs = keras.Input((timesteps, embedding_dim)) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - layer = keras.layers.LSTM(units) - if len(initial_state) == 1: - output = layer(inputs, initial_state=initial_state[0]) - else: - output = layer(inputs, initial_state=initial_state) - self.assertTrue( - any(initial_state[0] is t - for t in layer._inbound_nodes[0].input_tensors)) - - model = keras.models.Model([inputs] + initial_state, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [ - np.random.random((num_samples, units)) for _ in range(num_states) - ] - targets = np.random.random((num_samples, units)) - model.train_on_batch([inputs] + initial_state, targets) - - def test_specify_initial_state_non_keras_tensor(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - # Test with non-Keras tensor - inputs = keras.Input((timesteps, embedding_dim)) - initial_state = [ - keras.backend.random_normal_variable((num_samples, units), 0, 1) - for _ in range(num_states) - ] - layer = keras.layers.LSTM(units) - output = layer(inputs, initial_state=initial_state) - - model = keras.models.Model(inputs, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - targets = np.random.random((num_samples, units)) - model.train_on_batch(inputs, targets) - - def test_reset_states_with_values(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - layer = keras.layers.LSTM(units, stateful=True) - layer.build((num_samples, timesteps, embedding_dim)) - initial_weight_count = len(layer.weights) - layer.reset_states() - assert len(layer.states) == num_states - assert layer.states[0] is not None - self.assertAllClose( - keras.backend.eval(layer.states[0]), - np.zeros(keras.backend.int_shape(layer.states[0])), - atol=1e-4) - state_shapes = [keras.backend.int_shape(state) for state in layer.states] - values = [np.ones(shape) for shape in state_shapes] - if len(values) == 1: - values = values[0] - layer.reset_states(values) - self.assertAllClose( - keras.backend.eval(layer.states[0]), - np.ones(keras.backend.int_shape(layer.states[0])), - atol=1e-4) - - # Test with invalid data - with self.assertRaises(ValueError): - layer.reset_states([1] * (len(layer.states) + 1)) - - self.assertEqual(initial_weight_count, len(layer.weights)) - # Variables in "states" shouldn't show up in .weights - layer.states = tf.nest.map_structure(tf.Variable, values) - layer.reset_states() - self.assertEqual(initial_weight_count, len(layer.weights)) - - def test_specify_state_with_masking(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - inputs = keras.Input((timesteps, embedding_dim)) - _ = keras.layers.Masking()(inputs) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - output = keras.layers.LSTM(units)( - inputs, initial_state=initial_state) - - model = keras.models.Model([inputs] + initial_state, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [ - np.random.random((num_samples, units)) for _ in range(num_states) - ] - targets = np.random.random((num_samples, units)) - model.train_on_batch([inputs] + initial_state, targets) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_return_state(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) - masked = keras.layers.Masking()(inputs) - layer = keras.layers.LSTM(units, return_state=True, stateful=True) - outputs = layer(masked) - state = outputs[1:] - assert len(state) == num_states - model = keras.models.Model(inputs, state[0]) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - state = model.predict(inputs) - self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) - - def test_state_reuse(self): - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = keras.layers.LSTM( - units, return_state=True, return_sequences=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - output = keras.layers.LSTM(units)(output, initial_state=state) - model = keras.models.Model(inputs, output) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - model.predict(inputs) - - def test_initial_states_as_other_inputs(self): - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - num_states = 2 - layer_class = keras.layers.LSTM - - # Test with Keras tensor - main_inputs = keras.Input((timesteps, embedding_dim)) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - inputs = [main_inputs] + initial_state - - layer = layer_class(units) - output = layer(inputs) - self.assertTrue( - any(initial_state[0] is t - for t in layer._inbound_nodes[0].input_tensors)) - - model = keras.models.Model(inputs, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - - main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [ - np.random.random((num_samples, units)) for _ in range(num_states) - ] - targets = np.random.random((num_samples, units)) - model.train_on_batch([main_inputs] + initial_state, targets) - - @parameterized.named_parameters(('v0', 0), ('v1', 1), ('v2', 2)) - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_implementation_mode_LSTM(self, implementation_mode): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={ - 'units': units, - 'implementation': implementation_mode - }, - input_shape=(num_samples, timesteps, embedding_dim)) - - layer_class = keras.layers.LSTM - k_constraint = keras.constraints.max_norm(0.01) - r_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_constraint=k_constraint, - recurrent_constraint=r_constraint, - bias_constraint=b_constraint) - layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) - - layer_class = keras.layers.LSTM - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(layer_class(units=5, return_sequences=True, unroll=False)) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_masking_with_stacking_LSTM(self): - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(keras.layers.LSTM(10, return_sequences=True, unroll=False)) - model.add(keras.layers.LSTM(5, return_sequences=True, unroll=False)) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01)) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - @parameterized.named_parameters( - # test_name, use_bias, bias_initializer, activation - ('normal', True, 'zeros'), - ('no_bias', False, 'zeros'), - ('random_bias', True, 'random_uniform'), - ) - def test_lstm_model_save_load(self, use_bias, bias_initializer): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - h5_path = os.path.join(temp_dir, 'test.h5') - - batch = 10 - timestep = 3 - input_dim = 5 - units = 2 - - x = np.random.random((batch, timestep, input_dim)) - - def build_model(): - inputs = keras.layers.Input( - shape=[timestep, input_dim], dtype=tf.float32) - layer = keras.layers.LSTM( - units, - use_bias=use_bias, - bias_initializer=bias_initializer) - output = layer(inputs) - return keras.models.Model(inputs, output), layer - - model, layer = build_model() - y_ref = model.predict(x) - model.save_weights(h5_path) - - cloned_model, new_layer = build_model() - cloned_model.load_weights(h5_path) - y = cloned_model.predict(x) - - self.assertAllClose(y, y_ref) - self.assertAllClose(layer.get_weights(), new_layer.get_weights()) - - def test_lstm_output_on_multiple_kernel(self): - x_train = np.random.random((self.batch, self.timestep, self.input_shape)) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - with test_utils.device(should_use_gpu=False): - layer = keras.layers.LSTM(self.rnn_state_size) - output = layer(inputs) - cpu_model = keras.models.Model(inputs, output) - weights = cpu_model.get_weights() - y_1 = cpu_model.predict(x_train) - - with test_utils.device(should_use_gpu=True): - layer = keras.layers.LSTM(self.rnn_state_size) - output = layer(inputs) - gpu_model = keras.models.Model(inputs, output) - gpu_model.set_weights(weights) - y_2 = gpu_model.predict(x_train) - - self.assertAllClose(y_1, y_2) - - def test_return_sequences_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={ - 'units': units, - 'return_sequences': True - }, - input_shape=(num_samples, timesteps, embedding_dim)) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support float64 yet.') - @test_utils.run_v2_only - def test_float64_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={ - 'units': units, - 'return_sequences': True, - 'dtype': 'float64' - }, - input_shape=(num_samples, timesteps, embedding_dim), - input_dtype='float64') - - def test_regularizers_LSTM(self): - embedding_dim = 4 - layer_class = keras.layers.LSTM - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2', - activity_regularizer='l1') - layer.build((None, None, 2)) - self.assertEqual(len(layer.losses), 3) - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - if tf.executing_eagerly(): - self.assertEqual(len(layer.losses), 4) - else: - self.assertEqual(len(layer.get_losses_for(x)), 1) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - def test_statefulness_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer_class = keras.layers.LSTM - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - 4, - embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class( - units, return_sequences=False, stateful=True, weights=None) - model.add(layer) - model.compile( - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - out1 = model.predict(np.ones((num_samples, timesteps))) - self.assertEqual(out1.shape, (num_samples, units)) - - # train once so that the states change - model.train_on_batch( - np.ones((num_samples, timesteps)), np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out2.max(), out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - self.assertAllClose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out4.max(), out5.max()) - - # Check masking - layer.reset_states() - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - layer.reset_states() - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - layer.reset_states() - - mix_padded_input = np.ones((num_samples, timesteps)) - mix_padded_input[0, 1] = 0 - mix_padded_input[1, 0] = 0 - mix_padded_input[1, 2] = 0 - out8 = model.predict(mix_padded_input) - - self.assertAllClose(out7, out6, atol=1e-5) - self.assertAllClose(out8, out7, atol=1e-5) - - def test_stateful_LSTM_training(self): - # See b/123587692 for more context. - vocab_size = 20 - embedding_dim = 10 - batch_size = 8 - timestep = 12 - units = 5 - x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) - y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) - - model = keras.Sequential([ - keras.layers.Embedding(vocab_size, embedding_dim, - batch_input_shape=[batch_size, timestep]), - keras.layers.LSTM(units, return_sequences=True, stateful=True), - keras.layers.Dense(vocab_size) - ]) - model.compile( - optimizer='adam', - loss='sparse_categorical_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, epochs=1, shuffle=False) - - def test_dropout_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={ - 'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1 - }, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_bidirectional(self): - batch = 128 - timestep = 20 - vocab_size = 1000 - model = keras.Sequential([ - keras.layers.Embedding(vocab_size, 64), - keras.layers.Bidirectional(keras.layers.LSTM( - 64, return_sequences=True)), - keras.layers.Bidirectional(keras.layers.LSTM(32)), - keras.layers.Dense(64, activation='relu'), - keras.layers.Dense(1, activation='sigmoid') - ]) - - model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) - - x = np.random.randint(0, vocab_size, size=(batch, timestep)) - y = np.random.randint(0, 1, size=(batch)) - model.fit(x, y, epochs=1, shuffle=False) - model.evaluate(x, y) - model.predict(x) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_explicit_device_with_go_backward_and_mask(self): - batch_size = 8 - timestep = 7 - masksteps = 5 - units = 4 - - inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) - mask = np.ones((batch_size, timestep)).astype(np.bool) - mask[:, masksteps:] = 0 - - lstm_layer = keras.layers.LSTM( - units, return_sequences=True, go_backwards=True) - with test_utils.device(should_use_gpu=True): - outputs_masked = lstm_layer(inputs, mask=tf.constant(mask)) - outputs_trimmed = lstm_layer(inputs[:, :masksteps]) - self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) - - @tf_test_util.enable_output_all_intermediates - def test_v1_session_behavior(self): - with tf.compat.v1.get_default_graph().as_default(): - # See b/139132348 for more details. - x = np.random.uniform(size=(100, 4, 8)) - y = np.random.uniform(size=(100, 1)) - dataset = tf.data.Dataset.from_tensor_slices( - (x, y)).shuffle(100).batch(32) - - inp = keras.layers.Input(shape=(4, 8)) - layer = keras.layers.LSTM(1)(inp) - layer = keras.layers.Dense(1)(layer) - - model = keras.models.Model(inp, layer) - - model.compile(loss='mse', optimizer='sgd') - model.fit(dataset) - - def test_with_fully_masked_inputs(self): - num_samples = 8 - timestep = 5 - embedding_dim = 4 - vocab_size = 20 - units = 2 - - inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep)) - # Set the first inputs to be fully zero. - inputs[0, :] = 0.0 - - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - vocab_size, - embedding_dim, - mask_zero=True, - input_length=timestep, - batch_input_shape=(num_samples, timestep))) - layer = keras.layers.LSTM(units) - model.add(layer) - model.compile( - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - # Make sure it doesn't crash with cudnn kernel. - model.predict(inputs) - - # TODO (b/169895267): test with xla_gpu is disabled. - def test_deepcopy(self): - if not tf.executing_eagerly(): - self.skipTest('v2-only test') - original_layer = keras.layers.LSTM(5) - copied_layer = copy.deepcopy(original_layer) - self.assertEqual(copied_layer.units, 5) - self.assertEqual(original_layer.get_config(), original_layer.get_config()) - - # Copy layer before layer call on inputs without weight initialization. - inputs = np.random.normal(size=[32, 10, 8]).astype(np.float32) - original_layer = keras.layers.LSTM(4) - copied_layer = copy.deepcopy(original_layer) - outputs = original_layer(inputs) - copied_outputs = copied_layer(inputs) - self.assertNotAllClose( - self.evaluate(outputs), self.evaluate(copied_outputs)) - - # Copy layer after layer call on inputs with weight initialization. - original_layer = keras.layers.LSTM(4) - outputs = original_layer(inputs) - copied_layer = copy.deepcopy(original_layer) - copied_outputs = copied_layer(inputs) - self.assertAllClose(self.evaluate(outputs), self.evaluate(copied_outputs)) - - def _test_runtime_with_model(self, model): - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=self.batch, - test_samples=0, - input_shape=(self.timestep, self.input_shape), - num_classes=self.output_shape) - y_train = np_utils.to_categorical(y_train, self.output_shape) - - model.compile( - optimizer='sgd', - loss=['categorical_crossentropy', None], - run_eagerly=test_utils.should_run_eagerly()) - - existing_loss = 0 - for _ in range(self.epoch): - history = model.fit(x_train, y_train) - loss_value = history.history['loss'][0] - - self.assertNotEqual(existing_loss, loss_value) - existing_loss = loss_value - - _, runtime_value = model.predict(x_train) - if tf.test.is_gpu_available(): - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) - else: - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - @test_utils.run_v2_only - def test_LSTM_runtime(self): - layer = keras.layers.LSTM(self.rnn_state_size, return_runtime=True) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - - outputs, runtime = layer(inputs) - # Expand the runtime so that it is a 1D tensor instead of scalar. - # TF model does not work with scalar model output, specially during - # aggregation. - runtime = keras.layers.Lambda( - lambda x: tf.expand_dims(x, axis=-1))(runtime) - model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) - self._test_runtime_with_model(model) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_LSTM_runtime_with_mask(self): - # Masking will affect which backend is selected based on whether the mask - # is strictly right padded. - layer = keras.layers.LSTM(self.rnn_state_size, return_runtime=True) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - masked_inputs = keras.layers.Masking()(inputs) - - outputs, runtime = layer(masked_inputs) - # Expand the runtime so that it is a 1D tensor instead of scalar. - # TF model does not work with scalar model output, specially during - # aggregation. - runtime = keras.layers.Lambda( - lambda x: tf.expand_dims(x, axis=-1))(runtime) - model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=self.batch, - test_samples=0, - input_shape=(self.timestep, self.input_shape), - num_classes=self.output_shape) - y_train = np_utils.to_categorical(y_train, self.output_shape) - - model.compile( - optimizer='sgd', - loss=['categorical_crossentropy', None], - run_eagerly=test_utils.should_run_eagerly()) - - model.fit(x_train, y_train) - - # Verify unpadded data. - _, runtime_value = model.predict(x_train) - if tf.test.is_gpu_available(): - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) - else: - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - # Update x/y to be right padded by setting the last timestep to 0 - x_train[:, -1, :] = 0 - y_train[:, -1] = 0 - _, runtime_value = model.predict(x_train) - if tf.test.is_gpu_available(): - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) - else: - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - # Further update x/y to be mix padded (masks in the middle), and verify - # only cpu kernel can be selected. - x_train[:, -3, :] = 0 - y_train[:, -3] = 0 - _, runtime_value = model.predict(x_train) - self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) - - @test_utils.run_v2_only - def test_LSTM_runtime_with_cond(self): - # This test is to demonstrate the graph rewrite of grappler plugin under - # the condition that the function returns different number of internal - # states. - layer = keras.layers.LSTM(self.rnn_state_size, return_runtime=True) - - inputs = keras.layers.Input( - shape=[self.timestep, self.input_shape], dtype=tf.float32) - - zeros = tf.zeros([self.batch, self.output_shape]) - dummy_runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) - a = tf.constant(0) - b = tf.constant(1) - # Will always run the lstm layer. - outputs, runtime = tf.cond( - tf.less(a, b), - lambda: layer(inputs), - lambda: (zeros, dummy_runtime)) - - # Expand the runtime so that it is a 1D tensor instead of scalar. - # TF model does not work with scalar model output, specially during - # aggregation. - runtime = keras.layers.Lambda( - lambda x: tf.expand_dims(x, axis=-1))(runtime) - model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) - self._test_runtime_with_model(model) + input_shape = 10 + output_shape = 8 + rnn_state_size = 8 + timestep = 4 + batch = 100 + epoch = 1 + + @parameterized.named_parameters( + ("non_tan_activation", "relu", "sigmoid", 0, False, True), + ("non_sigmoid_recur_activation", "tanh", "relu", 0, False, True), + ("use_recurrent_dropout", "tanh", "sigmoid", 0.1, False, True), + ("unroll", "tanh", "sigmoid", 0, True, True), + ("not_use_bias", "tanh", "sigmoid", 0, False, False), + ) + @test_utils.run_v2_only + def test_could_use_defun_backend( + self, + activation, + recurrent_activation, + recurrent_dropout, + unroll, + use_bias, + ): + layer = keras.layers.LSTM( + 1, + activation=activation, + recurrent_activation=recurrent_activation, + recurrent_dropout=recurrent_dropout, + unroll=unroll, + use_bias=use_bias, + ) + self.assertFalse(layer._could_use_gpu_kernel) + + @test_utils.run_v2_only + def test_use_on_default_activation_with_gpu_kernel(self): + layer = keras.layers.LSTM(1, activation=tf.tanh) + self.assertTrue(layer._could_use_gpu_kernel) + + layer = keras.layers.LSTM(1, recurrent_activation=tf.sigmoid) + self.assertTrue(layer._could_use_gpu_kernel) + + def test_static_shape_inference_LSTM(self): + # GitHub issue: 15165 + timesteps = 3 + embedding_dim = 4 + units = 2 + + model = keras.models.Sequential() + inputs = keras.layers.Dense( + embedding_dim, input_shape=(timesteps, embedding_dim) + ) + model.add(inputs) + layer = keras.layers.LSTM(units, return_sequences=True) + model.add(layer) + outputs = model.layers[-1].output + self.assertEqual(outputs.shape.as_list(), [None, timesteps, units]) + + def test_dynamic_behavior_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer = keras.layers.LSTM(units, input_shape=(None, embedding_dim)) + model = keras.models.Sequential() + model.add(layer) + model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.001), "mse") + x = np.random.random((num_samples, timesteps, embedding_dim)) + y = np.random.random((num_samples, units)) + model.train_on_batch(x, y) + + def test_stacking_LSTM(self): + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.LSTM(10, return_sequences=True, unroll=False)) + model.add(keras.layers.LSTM(5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + def test_from_config_LSTM(self): + layer_class = keras.layers.LSTM + for stateful in (False, True): + l1 = layer_class(units=1, stateful=stateful) + l2 = layer_class.from_config(l1.get_config()) + assert l1.get_config() == l2.get_config() + + def test_specify_initial_state_keras_tensor(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + # Test with Keras tensor + inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + layer = keras.layers.LSTM(units) + if len(initial_state) == 1: + output = layer(inputs, initial_state=initial_state[0]) + else: + output = layer(inputs, initial_state=initial_state) + self.assertTrue( + any( + initial_state[0] is t + for t in layer._inbound_nodes[0].input_tensors + ) + ) + + model = keras.models.Model([inputs] + initial_state, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.train_on_batch([inputs] + initial_state, targets) + + def test_specify_initial_state_non_keras_tensor(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + # Test with non-Keras tensor + inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [ + keras.backend.random_normal_variable((num_samples, units), 0, 1) + for _ in range(num_states) + ] + layer = keras.layers.LSTM(units) + output = layer(inputs, initial_state=initial_state) + + model = keras.models.Model(inputs, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + targets = np.random.random((num_samples, units)) + model.train_on_batch(inputs, targets) + + def test_reset_states_with_values(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + layer = keras.layers.LSTM(units, stateful=True) + layer.build((num_samples, timesteps, embedding_dim)) + initial_weight_count = len(layer.weights) + layer.reset_states() + assert len(layer.states) == num_states + assert layer.states[0] is not None + self.assertAllClose( + keras.backend.eval(layer.states[0]), + np.zeros(keras.backend.int_shape(layer.states[0])), + atol=1e-4, + ) + state_shapes = [ + keras.backend.int_shape(state) for state in layer.states + ] + values = [np.ones(shape) for shape in state_shapes] + if len(values) == 1: + values = values[0] + layer.reset_states(values) + self.assertAllClose( + keras.backend.eval(layer.states[0]), + np.ones(keras.backend.int_shape(layer.states[0])), + atol=1e-4, + ) + + # Test with invalid data + with self.assertRaises(ValueError): + layer.reset_states([1] * (len(layer.states) + 1)) + + self.assertEqual(initial_weight_count, len(layer.weights)) + # Variables in "states" shouldn't show up in .weights + layer.states = tf.nest.map_structure(tf.Variable, values) + layer.reset_states() + self.assertEqual(initial_weight_count, len(layer.weights)) + + def test_specify_state_with_masking(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + inputs = keras.Input((timesteps, embedding_dim)) + _ = keras.layers.Masking()(inputs) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + output = keras.layers.LSTM(units)(inputs, initial_state=initial_state) + + model = keras.models.Model([inputs] + initial_state, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.train_on_batch([inputs] + initial_state, targets) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_return_state(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + inputs = keras.Input( + batch_shape=(num_samples, timesteps, embedding_dim) + ) + masked = keras.layers.Masking()(inputs) + layer = keras.layers.LSTM(units, return_state=True, stateful=True) + outputs = layer(masked) + state = outputs[1:] + assert len(state) == num_states + model = keras.models.Model(inputs, state[0]) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + state = model.predict(inputs) + self.assertAllClose( + keras.backend.eval(layer.states[0]), state, atol=1e-4 + ) + + def test_state_reuse(self): + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + inputs = keras.Input( + batch_shape=(num_samples, timesteps, embedding_dim) + ) + layer = keras.layers.LSTM( + units, return_state=True, return_sequences=True + ) + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + output = keras.layers.LSTM(units)(output, initial_state=state) + model = keras.models.Model(inputs, output) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + model.predict(inputs) + + def test_initial_states_as_other_inputs(self): + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + num_states = 2 + layer_class = keras.layers.LSTM + + # Test with Keras tensor + main_inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + inputs = [main_inputs] + initial_state + + layer = layer_class(units) + output = layer(inputs) + self.assertTrue( + any( + initial_state[0] is t + for t in layer._inbound_nodes[0].input_tensors + ) + ) + + model = keras.models.Model(inputs, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + + main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.train_on_batch([main_inputs] + initial_state, targets) + + @parameterized.named_parameters(("v0", 0), ("v1", 1), ("v2", 2)) + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_implementation_mode_LSTM(self, implementation_mode): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={"units": units, "implementation": implementation_mode}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + layer_class = keras.layers.LSTM + k_constraint = keras.constraints.max_norm(0.01) + r_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_constraint=k_constraint, + recurrent_constraint=r_constraint, + bias_constraint=b_constraint, + ) + layer.build((None, None, embedding_dim)) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) + + layer_class = keras.layers.LSTM + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(layer_class(units=5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_masking_with_stacking_LSTM(self): + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(keras.layers.LSTM(10, return_sequences=True, unroll=False)) + model.add(keras.layers.LSTM(5, return_sequences=True, unroll=False)) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + @parameterized.named_parameters( + # test_name, use_bias, bias_initializer, activation + ("normal", True, "zeros"), + ("no_bias", False, "zeros"), + ("random_bias", True, "random_uniform"), + ) + def test_lstm_model_save_load(self, use_bias, bias_initializer): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + h5_path = os.path.join(temp_dir, "test.h5") + + batch = 10 + timestep = 3 + input_dim = 5 + units = 2 + + x = np.random.random((batch, timestep, input_dim)) + + def build_model(): + inputs = keras.layers.Input( + shape=[timestep, input_dim], dtype=tf.float32 + ) + layer = keras.layers.LSTM( + units, use_bias=use_bias, bias_initializer=bias_initializer + ) + output = layer(inputs) + return keras.models.Model(inputs, output), layer + + model, layer = build_model() + y_ref = model.predict(x) + model.save_weights(h5_path) + + cloned_model, new_layer = build_model() + cloned_model.load_weights(h5_path) + y = cloned_model.predict(x) + + self.assertAllClose(y, y_ref) + self.assertAllClose(layer.get_weights(), new_layer.get_weights()) + + def test_lstm_output_on_multiple_kernel(self): + x_train = np.random.random( + (self.batch, self.timestep, self.input_shape) + ) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + with test_utils.device(should_use_gpu=False): + layer = keras.layers.LSTM(self.rnn_state_size) + output = layer(inputs) + cpu_model = keras.models.Model(inputs, output) + weights = cpu_model.get_weights() + y_1 = cpu_model.predict(x_train) + + with test_utils.device(should_use_gpu=True): + layer = keras.layers.LSTM(self.rnn_state_size) + output = layer(inputs) + gpu_model = keras.models.Model(inputs, output) + gpu_model.set_weights(weights) + y_2 = gpu_model.predict(x_train) + + self.assertAllClose(y_1, y_2) + + def test_return_sequences_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={"units": units, "return_sequences": True}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Skipping as ROCm MIOpen does not support float64 yet.", + ) + @test_utils.run_v2_only + def test_float64_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={ + "units": units, + "return_sequences": True, + "dtype": "float64", + }, + input_shape=(num_samples, timesteps, embedding_dim), + input_dtype="float64", + ) + + def test_regularizers_LSTM(self): + embedding_dim = 4 + layer_class = keras.layers.LSTM + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_regularizer=keras.regularizers.l1(0.01), + recurrent_regularizer=keras.regularizers.l1(0.01), + bias_regularizer="l2", + activity_regularizer="l1", + ) + layer.build((None, None, 2)) + self.assertEqual(len(layer.losses), 3) + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + if tf.executing_eagerly(): + self.assertEqual(len(layer.losses), 4) + else: + self.assertEqual(len(layer.get_losses_for(x)), 1) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + def test_statefulness_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer_class = keras.layers.LSTM + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + 4, + embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps), + ) + ) + layer = layer_class( + units, return_sequences=False, stateful=True, weights=None + ) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + out1 = model.predict(np.ones((num_samples, timesteps))) + self.assertEqual(out1.shape, (num_samples, units)) + + # train once so that the states change + model.train_on_batch( + np.ones((num_samples, timesteps)), np.ones((num_samples, units)) + ) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out2.max(), out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + self.assertAllClose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out4.max(), out5.max()) + + # Check masking + layer.reset_states() + + left_padded_input = np.ones((num_samples, timesteps)) + left_padded_input[0, :1] = 0 + left_padded_input[1, :2] = 0 + out6 = model.predict(left_padded_input) + + layer.reset_states() + + right_padded_input = np.ones((num_samples, timesteps)) + right_padded_input[0, -1:] = 0 + right_padded_input[1, -2:] = 0 + out7 = model.predict(right_padded_input) + + layer.reset_states() + + mix_padded_input = np.ones((num_samples, timesteps)) + mix_padded_input[0, 1] = 0 + mix_padded_input[1, 0] = 0 + mix_padded_input[1, 2] = 0 + out8 = model.predict(mix_padded_input) + + self.assertAllClose(out7, out6, atol=1e-5) + self.assertAllClose(out8, out7, atol=1e-5) + + def test_stateful_LSTM_training(self): + # See b/123587692 for more context. + vocab_size = 20 + embedding_dim = 10 + batch_size = 8 + timestep = 12 + units = 5 + x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) + y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) + + model = keras.Sequential( + [ + keras.layers.Embedding( + vocab_size, + embedding_dim, + batch_input_shape=[batch_size, timestep], + ), + keras.layers.LSTM(units, return_sequences=True, stateful=True), + keras.layers.Dense(vocab_size), + ] + ) + model.compile( + optimizer="adam", + loss="sparse_categorical_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, epochs=1, shuffle=False) + + def test_dropout_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={"units": units, "dropout": 0.1, "recurrent_dropout": 0.1}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_bidirectional(self): + batch = 128 + timestep = 20 + vocab_size = 1000 + model = keras.Sequential( + [ + keras.layers.Embedding(vocab_size, 64), + keras.layers.Bidirectional( + keras.layers.LSTM(64, return_sequences=True) + ), + keras.layers.Bidirectional(keras.layers.LSTM(32)), + keras.layers.Dense(64, activation="relu"), + keras.layers.Dense(1, activation="sigmoid"), + ] + ) + + model.compile( + loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"] + ) + + x = np.random.randint(0, vocab_size, size=(batch, timestep)) + y = np.random.randint(0, 1, size=(batch)) + model.fit(x, y, epochs=1, shuffle=False) + model.evaluate(x, y) + model.predict(x) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_explicit_device_with_go_backward_and_mask(self): + batch_size = 8 + timestep = 7 + masksteps = 5 + units = 4 + + inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) + mask = np.ones((batch_size, timestep)).astype(bool) + mask[:, masksteps:] = 0 + + lstm_layer = keras.layers.LSTM( + units, return_sequences=True, go_backwards=True + ) + with test_utils.device(should_use_gpu=True): + outputs_masked = lstm_layer(inputs, mask=tf.constant(mask)) + outputs_trimmed = lstm_layer(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) + + @tf_test_util.enable_output_all_intermediates + def test_v1_session_behavior(self): + with tf.compat.v1.get_default_graph().as_default(): + # See b/139132348 for more details. + x = np.random.uniform(size=(100, 4, 8)) + y = np.random.uniform(size=(100, 1)) + dataset = ( + tf.data.Dataset.from_tensor_slices((x, y)) + .shuffle(100) + .batch(32) + ) + + inp = keras.layers.Input(shape=(4, 8)) + layer = keras.layers.LSTM(1)(inp) + layer = keras.layers.Dense(1)(layer) + + model = keras.models.Model(inp, layer) + + model.compile(loss="mse", optimizer="sgd") + model.fit(dataset) + + def test_with_fully_masked_inputs(self): + num_samples = 8 + timestep = 5 + embedding_dim = 4 + vocab_size = 20 + units = 2 + + inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep)) + # Set the first inputs to be fully zero. + inputs[0, :] = 0.0 + + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + vocab_size, + embedding_dim, + mask_zero=True, + input_length=timestep, + batch_input_shape=(num_samples, timestep), + ) + ) + layer = keras.layers.LSTM(units) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + # Make sure it doesn't crash with cudnn kernel. + model.predict(inputs) + + # TODO (b/169895267): test with xla_gpu is disabled. + def test_deepcopy(self): + if not tf.executing_eagerly(): + self.skipTest("v2-only test") + original_layer = keras.layers.LSTM(5) + copied_layer = copy.deepcopy(original_layer) + self.assertEqual(copied_layer.units, 5) + self.assertEqual( + original_layer.get_config(), original_layer.get_config() + ) + + # Copy layer before layer call on inputs without weight initialization. + inputs = np.random.normal(size=[32, 10, 8]).astype(np.float32) + original_layer = keras.layers.LSTM(4) + copied_layer = copy.deepcopy(original_layer) + outputs = original_layer(inputs) + copied_outputs = copied_layer(inputs) + self.assertNotAllClose( + self.evaluate(outputs), self.evaluate(copied_outputs) + ) + + # Copy layer after layer call on inputs with weight initialization. + original_layer = keras.layers.LSTM(4) + outputs = original_layer(inputs) + copied_layer = copy.deepcopy(original_layer) + copied_outputs = copied_layer(inputs) + self.assertAllClose( + self.evaluate(outputs), self.evaluate(copied_outputs) + ) + + def _test_runtime_with_model(self, model): + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=self.batch, + test_samples=0, + input_shape=(self.timestep, self.input_shape), + num_classes=self.output_shape, + ) + y_train = np_utils.to_categorical(y_train, self.output_shape) + + model.compile( + optimizer="sgd", + loss=["categorical_crossentropy", None], + run_eagerly=test_utils.should_run_eagerly(), + ) + + existing_loss = 0 + for _ in range(self.epoch): + history = model.fit(x_train, y_train) + loss_value = history.history["loss"][0] + + self.assertNotEqual(existing_loss, loss_value) + existing_loss = loss_value + + _, runtime_value = model.predict(x_train) + if not tf.sysconfig.get_build_info()["is_rocm_build"]: + if tf.test.is_gpu_available(): + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) + else: + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + @test_utils.run_v2_only + def test_LSTM_runtime(self): + layer = keras.layers.LSTM(self.rnn_state_size, return_runtime=True) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + + outputs, runtime = layer(inputs) + # Expand the runtime so that it is a 1D tensor instead of scalar. + # TF model does not work with scalar model output, specially during + # aggregation. + runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))( + runtime + ) + model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) + self._test_runtime_with_model(model) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_LSTM_runtime_with_mask(self): + # Masking will affect which backend is selected based on whether the + # mask is strictly right padded. + layer = keras.layers.LSTM(self.rnn_state_size, return_runtime=True) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + masked_inputs = keras.layers.Masking()(inputs) + + outputs, runtime = layer(masked_inputs) + # Expand the runtime so that it is a 1D tensor instead of scalar. + # TF model does not work with scalar model output, specially during + # aggregation. + runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))( + runtime + ) + model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=self.batch, + test_samples=0, + input_shape=(self.timestep, self.input_shape), + num_classes=self.output_shape, + ) + y_train = np_utils.to_categorical(y_train, self.output_shape) + + model.compile( + optimizer="sgd", + loss=["categorical_crossentropy", None], + run_eagerly=test_utils.should_run_eagerly(), + ) + + model.fit(x_train, y_train) + + # Verify unpadded data. + _, runtime_value = model.predict(x_train) + if tf.test.is_gpu_available(): + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) + else: + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + # Update x/y to be right padded by setting the last timestep to 0 + x_train[:, -1, :] = 0 + y_train[:, -1] = 0 + _, runtime_value = model.predict(x_train) + if tf.test.is_gpu_available(): + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU) + else: + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + # Further update x/y to be mix padded (masks in the middle), and verify + # only cpu kernel can be selected. + x_train[:, -3, :] = 0 + y_train[:, -3] = 0 + _, runtime_value = model.predict(x_train) + self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU) + + @test_utils.run_v2_only + def test_LSTM_runtime_with_cond(self): + # This test is to demonstrate the graph rewrite of grappler plugin under + # the condition that the function returns different number of internal + # states. + layer = keras.layers.LSTM(self.rnn_state_size, return_runtime=True) + + inputs = keras.layers.Input( + shape=[self.timestep, self.input_shape], dtype=tf.float32 + ) + + zeros = tf.zeros([self.batch, self.output_shape]) + dummy_runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN) + a = tf.constant(0) + b = tf.constant(1) + # Will always run the lstm layer. + outputs, runtime = tf.cond( + tf.less(a, b), lambda: layer(inputs), lambda: (zeros, dummy_runtime) + ) + + # Expand the runtime so that it is a 1D tensor instead of scalar. + # TF model does not work with scalar model output, specially during + # aggregation. + runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))( + runtime + ) + model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime]) + self._test_runtime_with_model(model) @test_combinations.run_all_keras_modes class LSTMLayerTest(test_combinations.TestCase): - - def test_return_sequences_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={'units': units, - 'return_sequences': True}, - input_shape=(num_samples, timesteps, embedding_dim)) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Double type is yet not supported in ROCm') - @test_utils.run_v2_only - def test_float64_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={'units': units, - 'return_sequences': True, - 'dtype': 'float64'}, - input_shape=(num_samples, timesteps, embedding_dim), - input_dtype='float64') - - def test_static_shape_inference_LSTM(self): - # Github issue: 15165 - timesteps = 3 - embedding_dim = 4 - units = 2 - - model = keras.models.Sequential() - inputs = keras.layers.Dense(embedding_dim, - input_shape=(timesteps, embedding_dim)) - model.add(inputs) - layer = keras.layers.LSTM(units, return_sequences=True) - model.add(layer) - outputs = model.layers[-1].output - self.assertEqual(outputs.shape.as_list(), [None, timesteps, units]) - - def test_dynamic_behavior_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer = keras.layers.LSTM(units, input_shape=(None, embedding_dim)) - model = keras.models.Sequential() - model.add(layer) - model.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - def test_dropout_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_recurrent_dropout_with_implementation_restriction(self): - layer = keras.layers.LSTM(2, recurrent_dropout=0.1, implementation=2) - # The implementation is force to 1 due to the limit of recurrent_dropout. - self.assertEqual(layer.implementation, 1) - - @parameterized.parameters([0, 1, 2]) - def test_implementation_mode_LSTM(self, implementation_mode): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.LSTM, - kwargs={'units': units, - 'implementation': implementation_mode}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_constraints_LSTM(self): - embedding_dim = 4 - layer_class = keras.layers.LSTM - k_constraint = keras.constraints.max_norm(0.01) - r_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_constraint=k_constraint, - recurrent_constraint=r_constraint, - bias_constraint=b_constraint) - layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) - - @parameterized.parameters([True, False]) - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input.') - def test_with_masking_layer_LSTM(self, unroll): - layer_class = keras.layers.LSTM - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(layer_class(units=5, return_sequences=True, unroll=unroll)) - model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - @parameterized.parameters([True, False]) - def test_masking_with_stacking_LSTM(self, unroll): - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - lstm_cells = [keras.layers.LSTMCell(10), keras.layers.LSTMCell(5)] - model.add(keras.layers.RNN( - lstm_cells, return_sequences=True, unroll=unroll)) - model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - def test_from_config_LSTM(self): - layer_class = keras.layers.LSTM - for stateful in (False, True): - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - def test_deep_copy_LSTM(self): - cell = keras.layers.LSTMCell(5) - copied_cell = copy.deepcopy(cell) - self.assertEqual(copied_cell.units, 5) - self.assertEqual(cell.get_config(), copied_cell.get_config()) - - def test_specify_initial_state_keras_tensor(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - # Test with Keras tensor - inputs = keras.Input((timesteps, embedding_dim)) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - layer = keras.layers.LSTM(units) - if len(initial_state) == 1: - output = layer(inputs, initial_state=initial_state[0]) - else: - output = layer(inputs, initial_state=initial_state) - self.assertTrue( - any(initial_state[0] is t - for t in layer._inbound_nodes[0].input_tensors)) - - model = keras.models.Model([inputs] + initial_state, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.AdamOptimizer(), - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.train_on_batch([inputs] + initial_state, targets) - - def test_specify_initial_state_non_keras_tensor(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - # Test with non-Keras tensor - inputs = keras.Input((timesteps, embedding_dim)) - initial_state = [keras.backend.random_normal_variable( - (num_samples, units), 0, 1) - for _ in range(num_states)] - layer = keras.layers.LSTM(units) - output = layer(inputs, initial_state=initial_state) - - model = keras.models.Model(inputs, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.AdamOptimizer(), - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - targets = np.random.random((num_samples, units)) - model.train_on_batch(inputs, targets) - - def test_reset_states_with_values(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - layer = keras.layers.LSTM(units, stateful=True) - layer.build((num_samples, timesteps, embedding_dim)) - layer.reset_states() - assert len(layer.states) == num_states - assert layer.states[0] is not None - self.assertAllClose( - keras.backend.eval(layer.states[0]), - np.zeros(keras.backend.int_shape(layer.states[0])), - atol=1e-4) - state_shapes = [keras.backend.int_shape(state) for state in layer.states] - values = [np.ones(shape) for shape in state_shapes] - if len(values) == 1: - values = values[0] - layer.reset_states(values) - self.assertAllClose( - keras.backend.eval(layer.states[0]), - np.ones(keras.backend.int_shape(layer.states[0])), - atol=1e-4) - - # Test with invalid data - with self.assertRaises(ValueError): - layer.reset_states([1] * (len(layer.states) + 1)) - - def test_specify_state_with_masking(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - inputs = keras.Input((timesteps, embedding_dim)) - _ = keras.layers.Masking()(inputs) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - output = keras.layers.LSTM(units)(inputs, initial_state=initial_state) - - model = keras.models.Model([inputs] + initial_state, output) - model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.train_on_batch([inputs] + initial_state, targets) - - def test_return_state(self): - num_states = 2 - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = keras.layers.LSTM(units, return_state=True, stateful=True) - outputs = layer(inputs) - state = outputs[1:] - assert len(state) == num_states - model = keras.models.Model(inputs, state[0]) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - state = model.predict(inputs) - self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) - - def test_state_reuse(self): - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - - inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = keras.layers.LSTM(units, return_state=True, return_sequences=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - output = keras.layers.LSTM(units)(output, initial_state=state) - model = keras.models.Model(inputs, output) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - outputs = model.predict(inputs) - - def test_initial_states_as_other_inputs(self): - timesteps = 3 - embedding_dim = 4 - units = 3 - num_samples = 2 - num_states = 2 - layer_class = keras.layers.LSTM - - # Test with Keras tensor - main_inputs = keras.Input((timesteps, embedding_dim)) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - inputs = [main_inputs] + initial_state - - layer = layer_class(units) - output = layer(inputs) - self.assertTrue( - any(initial_state[0] is t - for t in layer._inbound_nodes[0].input_tensors)) - - model = keras.models.Model(inputs, output) - model.compile( - loss='categorical_crossentropy', - optimizer=tf.compat.v1.train.AdamOptimizer(), - run_eagerly=test_utils.should_run_eagerly()) - - main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.train_on_batch([main_inputs] + initial_state, targets) - - def test_regularizers_LSTM(self): - embedding_dim = 4 - layer_class = keras.layers.LSTM - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2', - activity_regularizer='l1') - layer.build((None, None, 2)) - self.assertEqual(len(layer.losses), 3) - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - if tf.executing_eagerly(): - self.assertEqual(len(layer.losses), 4) - else: - self.assertEqual(len(layer.get_losses_for(x)), 1) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input.') - def test_statefulness_LSTM(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer_class = keras.layers.LSTM - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - 4, - embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class( - units, return_sequences=False, stateful=True, weights=None) - model.add(layer) - model.compile( - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - out1 = model.predict(np.ones((num_samples, timesteps))) - self.assertEqual(out1.shape, (num_samples, units)) - - # train once so that the states change - model.train_on_batch( - np.ones((num_samples, timesteps)), np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out2.max(), out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - self.assertAllClose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out4.max(), out5.max()) - - # Check masking - layer.reset_states() - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - layer.reset_states() - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - self.assertAllClose(out7, out6, atol=1e-5) - - -if __name__ == '__main__': - tf.test.main() + def test_return_sequences_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={"units": units, "return_sequences": True}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Double type is yet not supported in ROCm", + ) + @test_utils.run_v2_only + def test_float64_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={ + "units": units, + "return_sequences": True, + "dtype": "float64", + }, + input_shape=(num_samples, timesteps, embedding_dim), + input_dtype="float64", + ) + + def test_static_shape_inference_LSTM(self): + # GitHub issue: 15165 + timesteps = 3 + embedding_dim = 4 + units = 2 + + model = keras.models.Sequential() + inputs = keras.layers.Dense( + embedding_dim, input_shape=(timesteps, embedding_dim) + ) + model.add(inputs) + layer = keras.layers.LSTM(units, return_sequences=True) + model.add(layer) + outputs = model.layers[-1].output + self.assertEqual(outputs.shape.as_list(), [None, timesteps, units]) + + def test_dynamic_behavior_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer = keras.layers.LSTM(units, input_shape=(None, embedding_dim)) + model = keras.models.Sequential() + model.add(layer) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + x = np.random.random((num_samples, timesteps, embedding_dim)) + y = np.random.random((num_samples, units)) + model.train_on_batch(x, y) + + def test_dropout_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={"units": units, "dropout": 0.1, "recurrent_dropout": 0.1}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_recurrent_dropout_with_implementation_restriction(self): + layer = keras.layers.LSTM(2, recurrent_dropout=0.1, implementation=2) + # The implementation is force to 1 due to the limit of + # recurrent_dropout. + self.assertEqual(layer.implementation, 1) + + @test_utils.run_v2_only + def test_dropout_variable_name(self): + layer = keras.layers.RNN( + keras.layers.LSTMCell(2, dropout=0.1, force_generator=True) + ) + layer(np.random.random((2, 3, 4))) + self.assertEqual( + layer.cell._random_generator._generator._state_var.name, + "rnn/lstm_cell/StateVar:0", + ) + + layer = keras.layers.LSTM(2, dropout=0.1, force_generator=True) + layer(np.random.random((2, 3, 4))) + self.assertEqual( + layer._random_generator._generator._state_var.name, + "lstm/StateVar:0", + ) + + @parameterized.parameters([0, 1, 2]) + def test_implementation_mode_LSTM(self, implementation_mode): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.LSTM, + kwargs={"units": units, "implementation": implementation_mode}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_constraints_LSTM(self): + embedding_dim = 4 + layer_class = keras.layers.LSTM + k_constraint = keras.constraints.max_norm(0.01) + r_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_constraint=k_constraint, + recurrent_constraint=r_constraint, + bias_constraint=b_constraint, + ) + layer.build((None, None, embedding_dim)) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) + + @parameterized.parameters([True, False]) + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Skipping as ROCm MIOpen does not support padded input.", + ) + def test_with_masking_layer_LSTM(self, unroll): + layer_class = keras.layers.LSTM + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(layer_class(units=5, return_sequences=True, unroll=unroll)) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + @parameterized.parameters([True, False]) + def test_masking_with_stacking_LSTM(self, unroll): + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + lstm_cells = [keras.layers.LSTMCell(10), keras.layers.LSTMCell(5)] + model.add( + keras.layers.RNN(lstm_cells, return_sequences=True, unroll=unroll) + ) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + def test_from_config_LSTM(self): + layer_class = keras.layers.LSTM + for stateful in (False, True): + l1 = layer_class(units=1, stateful=stateful) + l2 = layer_class.from_config(l1.get_config()) + assert l1.get_config() == l2.get_config() + + def test_deep_copy_LSTM(self): + cell = keras.layers.LSTMCell(5) + copied_cell = copy.deepcopy(cell) + self.assertEqual(copied_cell.units, 5) + self.assertEqual(cell.get_config(), copied_cell.get_config()) + + def test_specify_initial_state_keras_tensor(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + # Test with Keras tensor + inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + layer = keras.layers.LSTM(units) + if len(initial_state) == 1: + output = layer(inputs, initial_state=initial_state[0]) + else: + output = layer(inputs, initial_state=initial_state) + self.assertTrue( + any( + initial_state[0] is t + for t in layer._inbound_nodes[0].input_tensors + ) + ) + + model = keras.models.Model([inputs] + initial_state, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.AdamOptimizer(), + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.train_on_batch([inputs] + initial_state, targets) + + def test_specify_initial_state_non_keras_tensor(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + # Test with non-Keras tensor + inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [ + keras.backend.random_normal_variable((num_samples, units), 0, 1) + for _ in range(num_states) + ] + layer = keras.layers.LSTM(units) + output = layer(inputs, initial_state=initial_state) + + model = keras.models.Model(inputs, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.AdamOptimizer(), + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + targets = np.random.random((num_samples, units)) + model.train_on_batch(inputs, targets) + + def test_reset_states_with_values(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + layer = keras.layers.LSTM(units, stateful=True) + layer.build((num_samples, timesteps, embedding_dim)) + layer.reset_states() + assert len(layer.states) == num_states + assert layer.states[0] is not None + self.assertAllClose( + keras.backend.eval(layer.states[0]), + np.zeros(keras.backend.int_shape(layer.states[0])), + atol=1e-4, + ) + state_shapes = [ + keras.backend.int_shape(state) for state in layer.states + ] + values = [np.ones(shape) for shape in state_shapes] + if len(values) == 1: + values = values[0] + layer.reset_states(values) + self.assertAllClose( + keras.backend.eval(layer.states[0]), + np.ones(keras.backend.int_shape(layer.states[0])), + atol=1e-4, + ) + + # Test with invalid data + with self.assertRaises(ValueError): + layer.reset_states([1] * (len(layer.states) + 1)) + + def test_specify_state_with_masking(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + inputs = keras.Input((timesteps, embedding_dim)) + _ = keras.layers.Masking()(inputs) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + output = keras.layers.LSTM(units)(inputs, initial_state=initial_state) + + model = keras.models.Model([inputs] + initial_state, output) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.train_on_batch([inputs] + initial_state, targets) + + def test_return_state(self): + num_states = 2 + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + inputs = keras.Input( + batch_shape=(num_samples, timesteps, embedding_dim) + ) + layer = keras.layers.LSTM(units, return_state=True, stateful=True) + outputs = layer(inputs) + state = outputs[1:] + assert len(state) == num_states + model = keras.models.Model(inputs, state[0]) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + state = model.predict(inputs) + self.assertAllClose( + keras.backend.eval(layer.states[0]), state, atol=1e-4 + ) + + def test_state_reuse(self): + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + + inputs = keras.Input( + batch_shape=(num_samples, timesteps, embedding_dim) + ) + layer = keras.layers.LSTM( + units, return_state=True, return_sequences=True + ) + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + output = keras.layers.LSTM(units)(output, initial_state=state) + model = keras.models.Model(inputs, output) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + outputs = model.predict(inputs) + + def test_initial_states_as_other_inputs(self): + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + num_states = 2 + layer_class = keras.layers.LSTM + + # Test with Keras tensor + main_inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + inputs = [main_inputs] + initial_state + + layer = layer_class(units) + output = layer(inputs) + self.assertTrue( + any( + initial_state[0] is t + for t in layer._inbound_nodes[0].input_tensors + ) + ) + + model = keras.models.Model(inputs, output) + model.compile( + loss="categorical_crossentropy", + optimizer=tf.compat.v1.train.AdamOptimizer(), + run_eagerly=test_utils.should_run_eagerly(), + ) + + main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [ + np.random.random((num_samples, units)) for _ in range(num_states) + ] + targets = np.random.random((num_samples, units)) + model.train_on_batch([main_inputs] + initial_state, targets) + + def test_regularizers_LSTM(self): + embedding_dim = 4 + layer_class = keras.layers.LSTM + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_regularizer=keras.regularizers.l1(0.01), + recurrent_regularizer=keras.regularizers.l1(0.01), + bias_regularizer="l2", + activity_regularizer="l1", + ) + layer.build((None, None, 2)) + self.assertEqual(len(layer.losses), 3) + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + if tf.executing_eagerly(): + self.assertEqual(len(layer.losses), 4) + else: + self.assertEqual(len(layer.get_losses_for(x)), 1) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message="Skipping as ROCm MIOpen does not support padded input.", + ) + def test_statefulness_LSTM(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer_class = keras.layers.LSTM + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + 4, + embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps), + ) + ) + layer = layer_class( + units, return_sequences=False, stateful=True, weights=None + ) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + out1 = model.predict(np.ones((num_samples, timesteps))) + self.assertEqual(out1.shape, (num_samples, units)) + + # train once so that the states change + model.train_on_batch( + np.ones((num_samples, timesteps)), np.ones((num_samples, units)) + ) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out2.max(), out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + self.assertAllClose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out4.max(), out5.max()) + + # Check masking + layer.reset_states() + + left_padded_input = np.ones((num_samples, timesteps)) + left_padded_input[0, :1] = 0 + left_padded_input[1, :2] = 0 + out6 = model.predict(left_padded_input) + + layer.reset_states() + + right_padded_input = np.ones((num_samples, timesteps)) + right_padded_input[0, -1:] = 0 + right_padded_input[1, -2:] = 0 + out7 = model.predict(right_padded_input) + + self.assertAllClose(out7, out6, atol=1e-5) + + @test_utils.run_v2_only + def test_cloned_weight_names(self): + inp = keras.Input([None, 3]) + rnn = keras.layers.LSTM(units=3) + model = keras.Model(inp, rnn(inp)) + clone = keras.models.clone_model(model) + + model_names = [x.name for x in model.weights] + clone_names = [x.name for x in clone.weights] + self.assertEqual(model_names, clone_names) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/lstm_v1.py b/keras/layers/rnn/lstm_v1.py index d883879b12b9..78d4c700cbb6 100644 --- a/keras/layers/rnn/lstm_v1.py +++ b/keras/layers/rnn/lstm_v1.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Long Short-Term Memory V1 layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + from keras import activations from keras import constraints @@ -24,372 +24,381 @@ from keras.layers.rnn import rnn_utils from keras.layers.rnn.base_rnn import RNN +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export(v1=['keras.layers.LSTMCell']) +@keras_export(v1=["keras.layers.LSTMCell"]) class LSTMCell(lstm.LSTMCell): - """Cell class for the LSTM layer. - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Setting it to true will also force `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al., 2015]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - kernel_constraint: Constraint function applied to - the `kernel` weights matrix. - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - - Call arguments: - inputs: A 2D tensor. - states: List of state tensors corresponding to the previous timestep. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. Only relevant when `dropout` or - `recurrent_dropout` is used. - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - **kwargs): - super().__init__( + """Cell class for the LSTM layer. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. + If True, add 1 to the bias of the forget gate at initialization. + Setting it to true will also force `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( + http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + + Call arguments: + inputs: A 2D tensor. + states: List of state tensors corresponding to the previous timestep. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. Only relevant when `dropout` or + `recurrent_dropout` is used. + """ + + def __init__( + self, units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - unit_forget_bias=unit_forget_bias, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=kwargs.pop('implementation', 1), - **kwargs) - - -@keras_export(v1=['keras.layers.LSTM']) + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs + ): + super().__init__( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + unit_forget_bias=unit_forget_bias, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=kwargs.pop("implementation", 1), + **kwargs + ) + + +@keras_export(v1=["keras.layers.LSTM"]) class LSTM(RNN): - """Long Short-Term Memory layer - Hochreiter 1997. - - Note that this cell is not optimized for performance on GPU. Please use - `tf.compat.v1.keras.layers.CuDNNLSTM` for better performance on GPU. - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - bias_initializer: Initializer for the bias vector. - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Setting it to true will also force `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al., 2015]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix. - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - time_major: The shape format of the `inputs` and `outputs` tensors. - If True, the inputs and outputs will be in shape - `(timesteps, batch, ...)`, whereas in the False case, it will be - `(batch, timesteps, ...)`. Using `time_major = True` is a bit more - efficient because it avoids transposes at the beginning and end of the - RNN calculation. However, most TensorFlow data is batch-major, so by - default this function accepts input and emits output in batch-major - form. - - Call arguments: - inputs: A 3D tensor. - mask: Binary tensor of shape `(samples, timesteps)` indicating whether - a given timestep should be masked. An individual `True` entry indicates - that the corresponding timestep should be utilized, while a `False` - entry indicates that the corresponding timestep should be ignored. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or - `recurrent_dropout` is used. - initial_state: List of initial state tensors to be passed to the first - call of the cell. - """ - - def __init__(self, - units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - implementation = kwargs.pop('implementation', 1) - if implementation == 0: - logging.warning('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - if 'enable_caching_device' in kwargs: - cell_kwargs = {'enable_caching_device': - kwargs.pop('enable_caching_device')} - else: - cell_kwargs = {} - cell = LSTMCell( + """Long Short-Term Memory layer - Hochreiter 1997. + + Note that this cell is not optimized for performance on GPU. Please use + `tf.compat.v1.keras.layers.CuDNNLSTM` for better performance on GPU. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs.. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. + If True, add 1 to the bias of the forget gate at initialization. + Setting it to true will also force `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al., 2015]( + http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + time_major: The shape format of the `inputs` and `outputs` tensors. + If True, the inputs and outputs will be in shape + `(timesteps, batch, ...)`, whereas in the False case, it will be + `(batch, timesteps, ...)`. Using `time_major = True` is a bit more + efficient because it avoids transposes at the beginning and end of the + RNN calculation. However, most TensorFlow data is batch-major, so by + default this function accepts input and emits output in batch-major + form. + + Call arguments: + inputs: A 3D tensor. + mask: Binary tensor of shape `(samples, timesteps)` indicating whether + a given timestep should be masked. An individual `True` entry indicates + that the corresponding timestep should be utilized, while a `False` + entry indicates that the corresponding timestep should be ignored. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` is used. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + """ + + def __init__( + self, units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - unit_forget_bias=unit_forget_bias, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation, - dtype=kwargs.get('dtype'), - trainable=kwargs.get('trainable', True), - **cell_kwargs) - super().__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.input_spec = [InputSpec(ndim=3)] - - def call(self, inputs, mask=None, training=None, initial_state=None): - return super().call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def unit_forget_bias(self): - return self.cell.unit_forget_bias - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation - } - config.update(rnn_utils.config_for_enable_caching_device(self.cell)) - base_config = super().get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) + activation="tanh", + recurrent_activation="hard_sigmoid", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs + ): + implementation = kwargs.pop("implementation", 1) + if implementation == 0: + logging.warning( + "`implementation=0` has been deprecated, " + "and now defaults to `implementation=1`." + "Please update your layer call." + ) + if "enable_caching_device" in kwargs: + cell_kwargs = { + "enable_caching_device": kwargs.pop("enable_caching_device") + } + else: + cell_kwargs = {} + cell = LSTMCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + unit_forget_bias=unit_forget_bias, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation, + dtype=kwargs.get("dtype"), + trainable=kwargs.get("trainable", True), + name="lstm_cell", + **cell_kwargs + ) + super().__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs + ) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.input_spec = [InputSpec(ndim=3)] + + def call(self, inputs, mask=None, training=None, initial_state=None): + return super().call( + inputs, mask=mask, training=training, initial_state=initial_state + ) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def unit_forget_bias(self): + return self.cell.unit_forget_bias + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "recurrent_activation": activations.serialize( + self.recurrent_activation + ), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "unit_forget_bias": self.unit_forget_bias, + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + "implementation": self.implementation, + } + config.update(rnn_utils.config_for_enable_caching_device(self.cell)) + base_config = super().get_config() + del base_config["cell"] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if "implementation" in config and config["implementation"] == 0: + config["implementation"] = 1 + return cls(**config) diff --git a/keras/layers/rnn/lstm_v1_test.py b/keras/layers/rnn/lstm_v1_test.py index 0cf6ffa0dd92..f1d539985dd8 100644 --- a/keras/layers/rnn/lstm_v1_test.py +++ b/keras/layers/rnn/lstm_v1_test.py @@ -17,20 +17,21 @@ import time +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.layers.rnn import lstm from keras.layers.rnn import lstm_v1 from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.platform import tf_logging as logging - # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() _rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON @@ -41,281 +42,330 @@ @test_combinations.run_all_keras_modes(config=_config) class LSTMGraphRewriteTest(test_combinations.TestCase): + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_lstm_feature_parity_v1_v2(self): + input_shape = 10 + rnn_state_size = 8 + timestep = 4 + batch = 20 + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=rnn_state_size, + random_seed=87654321, + ) + y_train = np_utils.to_categorical(y_train, rnn_state_size) + # For the last batch item of the test data, we filter out the last + # timestep to simulate the variable length sequence and masking test. + x_train[-2:, -1, :] = 0.0 + y_train[-2:] = 0 + + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) + masked_input = keras.layers.Masking()(inputs) + lstm_layer = lstm_v1.LSTM( + rnn_state_size, recurrent_activation="sigmoid" + ) + output = lstm_layer(masked_input) + lstm_model = keras.models.Model(inputs, output) + weights = lstm_model.get_weights() + y_1 = lstm_model.predict(x_train) + lstm_model.compile("rmsprop", "mse") + lstm_model.fit(x_train, y_train) + y_2 = lstm_model.predict(x_train) + + with test_utils.device(should_use_gpu=True): + cudnn_layer = lstm.LSTM(rnn_state_size) + cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) + cudnn_model.set_weights(weights) + y_3 = cudnn_model.predict(x_train) + cudnn_model.compile("rmsprop", "mse") + cudnn_model.fit(x_train, y_train) + y_4 = cudnn_model.predict(x_train) + + self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5) + self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5) + + @parameterized.named_parameters( + # test_name, time_major, go_backwards + ("normal", False, False), + ("time_major", True, False), + ("go_backwards", False, True), + ("both", True, True), + ) + def test_time_major_and_go_backward_v1_v2(self, time_major, go_backwards): + input_shape = 10 + rnn_state_size = 8 + timestep = 4 + batch = 100 + + x_train = np.random.random((batch, timestep, input_shape)) + + def build_model(layer_cls): + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) + layer = layer_cls( + rnn_state_size, + recurrent_activation="sigmoid", + time_major=time_major, + return_sequences=True, + go_backwards=go_backwards, + ) + if time_major: + converted_input = keras.layers.Lambda( + lambda t: tf.transpose(t, [1, 0, 2]) + )(inputs) + outputs = layer(converted_input) + outputs = keras.layers.Lambda( + lambda t: tf.transpose(t, [1, 0, 2]) + )(outputs) + else: + outputs = layer(inputs) + return keras.models.Model(inputs, outputs) + + lstm_model = build_model(lstm_v1.LSTM) + y_ref = lstm_model.predict(x_train) + weights = lstm_model.get_weights() + + lstm_v2_model = build_model(lstm.LSTM) + lstm_v2_model.set_weights(weights) + y = lstm_v2_model.predict(x_train) + + self.assertAllClose(y, y_ref) + + input_shape = 10 + rnn_state_size = 8 + output_shape = 8 + timestep = 4 + batch = 100 + epoch = 10 + + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=batch, + test_samples=0, + input_shape=(timestep, input_shape), + num_classes=output_shape, + ) + y_train = np_utils.to_categorical(y_train, output_shape) + + layer = lstm.LSTM(rnn_state_size) + + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_lstm_feature_parity_v1_v2(self): - input_shape = 10 - rnn_state_size = 8 - timestep = 4 - batch = 20 - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=rnn_state_size, - random_seed=87654321) - y_train = np_utils.to_categorical(y_train, rnn_state_size) - # For the last batch item of the test data, we filter out the last - # timestep to simulate the variable length sequence and masking test. - x_train[-2:, -1, :] = 0.0 - y_train[-2:] = 0 - - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - masked_input = keras.layers.Masking()(inputs) - lstm_layer = lstm_v1.LSTM(rnn_state_size, recurrent_activation='sigmoid') - output = lstm_layer(masked_input) - lstm_model = keras.models.Model(inputs, output) - weights = lstm_model.get_weights() - y_1 = lstm_model.predict(x_train) - lstm_model.compile('rmsprop', 'mse') - lstm_model.fit(x_train, y_train) - y_2 = lstm_model.predict(x_train) - - with test_utils.device(should_use_gpu=True): - cudnn_layer = lstm.LSTM(rnn_state_size) - cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) - cudnn_model.set_weights(weights) - y_3 = cudnn_model.predict(x_train) - cudnn_model.compile('rmsprop', 'mse') - cudnn_model.fit(x_train, y_train) - y_4 = cudnn_model.predict(x_train) - - self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5) - self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5) - - @parameterized.named_parameters( - # test_name, time_major, go_backwards - ('normal', False, False), - ('time_major', True, False), - ('go_backwards', False, True), - ('both', True, True), - ) - def test_time_major_and_go_backward_v1_v2(self, time_major, go_backwards): - input_shape = 10 - rnn_state_size = 8 - timestep = 4 - batch = 100 - - x_train = np.random.random((batch, timestep, input_shape)) - - def build_model(layer_cls): - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - layer = layer_cls(rnn_state_size, - recurrent_activation='sigmoid', - time_major=time_major, - return_sequences=True, - go_backwards=go_backwards) - if time_major: - converted_input = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(inputs) - outputs = layer(converted_input) - outputs = keras.layers.Lambda( - lambda t: tf.transpose(t, [1, 0, 2]))(outputs) - else: outputs = layer(inputs) - return keras.models.Model(inputs, outputs) - - lstm_model = build_model(lstm_v1.LSTM) - y_ref = lstm_model.predict(x_train) - weights = lstm_model.get_weights() - - lstm_v2_model = build_model(lstm.LSTM) - lstm_v2_model.set_weights(weights) - y = lstm_v2_model.predict(x_train) - - self.assertAllClose(y, y_ref) - - input_shape = 10 - rnn_state_size = 8 - output_shape = 8 - timestep = 4 - batch = 100 - epoch = 10 - - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=batch, - test_samples=0, - input_shape=(timestep, input_shape), - num_classes=output_shape) - y_train = np_utils.to_categorical(y_train, output_shape) - - layer = lstm.LSTM(rnn_state_size) - - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - - outputs = layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile('rmsprop', loss='mse') - model.fit(x_train, y_train, epochs=epoch) - model.evaluate(x_train, y_train) - model.predict(x_train) - - @tf.test.disable_with_predicate( - pred=tf.test.is_built_with_rocm, - skip_message='Skipping as ROCm MIOpen does not support padded input yet.') - @test_utils.run_v2_only - def test_explicit_device_with_go_backward_and_mask_v1(self): - batch_size = 8 - timestep = 7 - masksteps = 5 - units = 4 - - inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) - mask = np.ones((batch_size, timestep)).astype(np.bool) - mask[:, masksteps:] = 0 - - lstm_v1_layer = lstm_v1.LSTM( - units, return_sequences=True, go_backwards=True) - with test_utils.device(should_use_gpu=True): - outputs_masked_v1 = lstm_v1_layer(inputs, mask=tf.constant(mask)) - outputs_trimmed_v1 = lstm_v1_layer(inputs[:, :masksteps]) - self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) + model = keras.models.Model(inputs, outputs) + model.compile("rmsprop", loss="mse") + model.fit(x_train, y_train, epochs=epoch) + model.evaluate(x_train, y_train) + model.predict(x_train) + + @tf.test.disable_with_predicate( + pred=tf.test.is_built_with_rocm, + skip_message=( + "Skipping as ROCm MIOpen does not support padded input yet." + ), + ) + @test_utils.run_v2_only + def test_explicit_device_with_go_backward_and_mask_v1(self): + batch_size = 8 + timestep = 7 + masksteps = 5 + units = 4 + + inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) + mask = np.ones((batch_size, timestep)).astype(bool) + mask[:, masksteps:] = 0 + + lstm_v1_layer = lstm_v1.LSTM( + units, return_sequences=True, go_backwards=True + ) + with test_utils.device(should_use_gpu=True): + outputs_masked_v1 = lstm_v1_layer(inputs, mask=tf.constant(mask)) + outputs_trimmed_v1 = lstm_v1_layer(inputs[:, :masksteps]) + self.assertAllClose( + outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1 + ) class LSTMPerformanceTest(tf.test.Benchmark): + def _measure_performance(self, test_config, model, x_train, y_train): + batch = test_config["batch"] + epoch = test_config["epoch"] + warmup_epoch = test_config["warmup_epoch"] + + # warm up the model + model.fit(x_train, y_train, batch_size=batch, epochs=warmup_epoch) + start_time = time.time() + model.fit( + x_train, y_train, batch_size=batch, epochs=epoch - warmup_epoch + ) + end_time = time.time() + return (end_time - start_time) / (epoch - warmup_epoch) + + def _time_performance_run_cudnn_lstm(self, test_config, x_train, y_train): + # Get the performance number for standard Cudnn LSTM + input_shape = test_config["input_shape"] + rnn_state_size = test_config["rnn_state_size"] + timestep = test_config["timestep"] + + cudnn_lstm_layer = keras.layers.CuDNNLSTM(rnn_state_size) + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) + + outputs = cudnn_lstm_layer(inputs) + model = keras.models.Model(inputs, outputs) + model.compile("sgd", "mse") + + sec_per_epoch = self._measure_performance( + test_config, model, x_train, y_train + ) + logging.info( + "Average performance for %s per epoch is: %s", + "CuDNN LSTM", + sec_per_epoch, + ) + return sec_per_epoch + + def _time_performance_run_unifed_lstm_gpu( + self, test_config, x_train, y_train + ): + # Get performance number for lstm_v2 with grappler swap the impl + input_shape = test_config["input_shape"] + rnn_state_size = test_config["rnn_state_size"] + timestep = test_config["timestep"] + + layer = keras.layers.LSTM(rnn_state_size) + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) + + outputs = layer(inputs) + model = keras.models.Model(inputs, outputs) + model.compile("sgd", "mse") + + sec_per_epoch = self._measure_performance( + test_config, model, x_train, y_train + ) + logging.info( + "Average performance for %s per epoch is: %s", + "LSTM V2", + sec_per_epoch, + ) + return sec_per_epoch + + def _time_performance_run_normal_lstm(self, test_config, x_train, y_train): + # Get performance number for standard LSTM on GPU. + input_shape = test_config["input_shape"] + rnn_state_size = test_config["rnn_state_size"] + timestep = test_config["timestep"] + + layer = lstm_v1.LSTM(rnn_state_size) + inputs = keras.layers.Input( + shape=[timestep, input_shape], dtype=tf.float32 + ) - def _measure_performance(self, test_config, model, x_train, y_train): - batch = test_config['batch'] - epoch = test_config['epoch'] - warmup_epoch = test_config['warmup_epoch'] - - # warm up the model - model.fit(x_train, y_train, batch_size=batch, epochs=warmup_epoch) - start_time = time.time() - model.fit(x_train, y_train, batch_size=batch, epochs=epoch - warmup_epoch) - end_time = time.time() - return (end_time - start_time) / (epoch - warmup_epoch) - - def _time_performance_run_cudnn_lstm(self, test_config, x_train, y_train): - # Get the performance number for standard Cudnn LSTM - input_shape = test_config['input_shape'] - rnn_state_size = test_config['rnn_state_size'] - timestep = test_config['timestep'] - - cudnn_lstm_layer = keras.layers.CuDNNLSTM(rnn_state_size) - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - - outputs = cudnn_lstm_layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile('sgd', 'mse') - - sec_per_epoch = self._measure_performance( - test_config, model, x_train, y_train) - logging.info('Average performance for %s per epoch is: %s', - 'CuDNN LSTM', sec_per_epoch) - return sec_per_epoch - - def _time_performance_run_unifed_lstm_gpu( - self, test_config, x_train, y_train): - # Get performance number for lstm_v2 with grappler swap the impl - input_shape = test_config['input_shape'] - rnn_state_size = test_config['rnn_state_size'] - timestep = test_config['timestep'] - - layer = keras.layers.LSTM(rnn_state_size) - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - - outputs = layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile('sgd', 'mse') - - sec_per_epoch = self._measure_performance( - test_config, model, x_train, y_train) - logging.info('Average performance for %s per epoch is: %s', - 'LSTM V2', sec_per_epoch) - return sec_per_epoch - - def _time_performance_run_normal_lstm( - self, test_config, x_train, y_train): - # Get performance number for standard LSTM on GPU. - input_shape = test_config['input_shape'] - rnn_state_size = test_config['rnn_state_size'] - timestep = test_config['timestep'] - - layer = lstm_v1.LSTM(rnn_state_size) - inputs = keras.layers.Input( - shape=[timestep, input_shape], dtype=tf.float32) - - outputs = layer(inputs) - model = keras.models.Model(inputs, outputs) - model.compile('sgd', 'mse') - - sec_per_epoch = self._measure_performance( - test_config, model, x_train, y_train) - logging.info('Average performance for %s per epoch is: %s', - 'Normal LSTM', sec_per_epoch) - return sec_per_epoch - - def _benchmark_performance_with_standard_cudnn_impl(self): - if not tf.test.is_gpu_available(): - self.skipTest('performance test will only run on GPU') - - mode = 'eager' if tf.executing_eagerly() else 'graph' - batch = 64 - num_batch = 10 - test_config = { - 'input_shape': 128, - 'rnn_state_size': 64, - 'output_shape': 64, - 'timestep': 50, - 'batch': batch, - 'epoch': 20, - # The performance for warmup epoch is ignored. - 'warmup_epoch': 1, - } - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=(batch * num_batch), - test_samples=0, - input_shape=(test_config['timestep'], test_config['input_shape']), - num_classes=test_config['output_shape']) - y_train = np_utils.to_categorical(y_train, test_config['output_shape']) - - cudnn_sec_per_epoch = self._time_performance_run_cudnn_lstm( - test_config, x_train, y_train) - lstm_v2_sec_per_epoch = self._time_performance_run_unifed_lstm_gpu( - test_config, x_train, y_train) - normal_lstm_sec_per_epoch = self._time_performance_run_normal_lstm( - test_config, x_train, y_train) - - cudnn_vs_v2 = cudnn_sec_per_epoch / lstm_v2_sec_per_epoch - v2_vs_normal = normal_lstm_sec_per_epoch / lstm_v2_sec_per_epoch - - self.report_benchmark(name='keras_cudnn_lstm_' + mode, - wall_time=cudnn_sec_per_epoch, - iters=test_config['epoch'], - extras=test_config) - self.report_benchmark(name='keras_lstm_v2_' + mode, - wall_time=lstm_v2_sec_per_epoch, - iters=test_config['epoch'], - extras=test_config) - self.report_benchmark(name='keras_canonical_lstm_' + mode, - wall_time=normal_lstm_sec_per_epoch, - iters=test_config['epoch'], - extras=test_config) - - logging.info('Expect the performance of LSTM V2 is within 80% of ' - 'cuDNN LSTM, got {0:.2f}%'.format(cudnn_vs_v2 * 100)) - logging.info('Expect the performance of LSTM V2 is more than 5 times' - ' of normal LSTM, got {0:.2f}'.format(v2_vs_normal)) - - def benchmark_performance_graph(self): - with tf.compat.v1.get_default_graph().as_default(): - with tf.compat.v1.Session(config=_config): - self._benchmark_performance_with_standard_cudnn_impl() - - def benchmark_performance_eager(self): - with tf.__internal__.eager_context.eager_mode(): - self._benchmark_performance_with_standard_cudnn_impl() - - -if __name__ == '__main__': - tf.test.main() + outputs = layer(inputs) + model = keras.models.Model(inputs, outputs) + model.compile("sgd", "mse") + + sec_per_epoch = self._measure_performance( + test_config, model, x_train, y_train + ) + logging.info( + "Average performance for %s per epoch is: %s", + "Normal LSTM", + sec_per_epoch, + ) + return sec_per_epoch + + def _benchmark_performance_with_standard_cudnn_impl(self): + if not tf.test.is_gpu_available(): + self.skipTest("performance test will only run on GPU") + + mode = "eager" if tf.executing_eagerly() else "graph" + batch = 64 + num_batch = 10 + test_config = { + "input_shape": 128, + "rnn_state_size": 64, + "output_shape": 64, + "timestep": 50, + "batch": batch, + "epoch": 20, + # The performance for warmup epoch is ignored. + "warmup_epoch": 1, + } + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=(batch * num_batch), + test_samples=0, + input_shape=(test_config["timestep"], test_config["input_shape"]), + num_classes=test_config["output_shape"], + ) + y_train = np_utils.to_categorical(y_train, test_config["output_shape"]) + + cudnn_sec_per_epoch = self._time_performance_run_cudnn_lstm( + test_config, x_train, y_train + ) + lstm_v2_sec_per_epoch = self._time_performance_run_unifed_lstm_gpu( + test_config, x_train, y_train + ) + normal_lstm_sec_per_epoch = self._time_performance_run_normal_lstm( + test_config, x_train, y_train + ) + + cudnn_vs_v2 = cudnn_sec_per_epoch / lstm_v2_sec_per_epoch + v2_vs_normal = normal_lstm_sec_per_epoch / lstm_v2_sec_per_epoch + + self.report_benchmark( + name="keras_cudnn_lstm_" + mode, + wall_time=cudnn_sec_per_epoch, + iters=test_config["epoch"], + extras=test_config, + ) + self.report_benchmark( + name="keras_lstm_v2_" + mode, + wall_time=lstm_v2_sec_per_epoch, + iters=test_config["epoch"], + extras=test_config, + ) + self.report_benchmark( + name="keras_canonical_lstm_" + mode, + wall_time=normal_lstm_sec_per_epoch, + iters=test_config["epoch"], + extras=test_config, + ) + + logging.info( + "Expect the performance of LSTM V2 is within 80% of " + "cuDNN LSTM, got {0:.2f}%".format(cudnn_vs_v2 * 100) + ) + logging.info( + "Expect the performance of LSTM V2 is more than 5 times" + " of normal LSTM, got {0:.2f}".format(v2_vs_normal) + ) + + def benchmark_performance_graph(self): + with tf.compat.v1.get_default_graph().as_default(): + with tf.compat.v1.Session(config=_config): + self._benchmark_performance_with_standard_cudnn_impl() + + def benchmark_performance_eager(self): + with tf.__internal__.eager_context.eager_mode(): + self._benchmark_performance_with_standard_cudnn_impl() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/rnn_utils.py b/keras/layers/rnn/rnn_utils.py index 28ba910100c6..c11bb3762fd5 100644 --- a/keras/layers/rnn/rnn_utils.py +++ b/keras/layers/rnn/rnn_utils.py @@ -13,171 +13,183 @@ # limitations under the License. # ============================================================================== """Utilities for RNN cells and layers.""" -# pylint: disable=protected-access -from keras.utils import control_flow_util + import tensorflow.compat.v2 as tf +from keras.utils import control_flow_util + +# isort: off from tensorflow.python.platform import tf_logging as logging def standardize_args(inputs, initial_state, constants, num_constants): - """Standardizes `__call__` to a single list of tensor inputs. - - When running a model loaded from a file, the input tensors - `initial_state` and `constants` can be passed to `RNN.__call__()` as part - of `inputs` instead of by the dedicated keyword arguments. This method - makes sure the arguments are separated and that `initial_state` and - `constants` are lists of tensors (or None). - - Args: - inputs: Tensor or list/tuple of tensors. which may include constants - and initial states. In that case `num_constant` must be specified. - initial_state: Tensor or list of tensors or None, initial states. - constants: Tensor or list of tensors or None, constant tensors. - num_constants: Expected number of constants (if constants are passed as - part of the `inputs` list. - - Returns: - inputs: Single tensor or tuple of tensors. - initial_state: List of tensors or None. - constants: List of tensors or None. - """ - if isinstance(inputs, list): - # There are several situations here: - # In the graph mode, __call__ will be only called once. The initial_state - # and constants could be in inputs (from file loading). - # In the eager mode, __call__ will be called twice, once during - # rnn_layer(inputs=input_t, constants=c_t, ...), and second time will be - # model.fit/train_on_batch/predict with real np data. In the second case, - # the inputs will contain initial_state and constants as eager tensor. - # - # For either case, the real input is the first item in the list, which - # could be a nested structure itself. Then followed by initial_states, which - # could be a list of items, or list of list if the initial_state is complex - # structure, and finally followed by constants which is a flat list. - assert initial_state is None and constants is None - if num_constants: - constants = inputs[-num_constants:] - inputs = inputs[:-num_constants] - if len(inputs) > 1: - initial_state = inputs[1:] - inputs = inputs[:1] - - if len(inputs) > 1: - inputs = tuple(inputs) - else: - inputs = inputs[0] - - def to_list_or_none(x): - if x is None or isinstance(x, list): - return x - if isinstance(x, tuple): - return list(x) - return [x] - - initial_state = to_list_or_none(initial_state) - constants = to_list_or_none(constants) - - return inputs, initial_state, constants + """Standardizes `__call__` to a single list of tensor inputs. + + When running a model loaded from a file, the input tensors + `initial_state` and `constants` can be passed to `RNN.__call__()` as part + of `inputs` instead of by the dedicated keyword arguments. This method + makes sure the arguments are separated and that `initial_state` and + `constants` are lists of tensors (or None). + + Args: + inputs: Tensor or list/tuple of tensors. which may include constants + and initial states. In that case `num_constant` must be specified. + initial_state: Tensor or list of tensors or None, initial states. + constants: Tensor or list of tensors or None, constant tensors. + num_constants: Expected number of constants (if constants are passed as + part of the `inputs` list. + + Returns: + inputs: Single tensor or tuple of tensors. + initial_state: List of tensors or None. + constants: List of tensors or None. + """ + if isinstance(inputs, list): + # There are several situations here: + # In the graph mode, __call__ will be only called once. The + # initial_state and constants could be in inputs (from file loading). + # In the eager mode, __call__ will be called twice, once during + # rnn_layer(inputs=input_t, constants=c_t, ...), and second time will be + # model.fit/train_on_batch/predict with real np data. In the second + # case, the inputs will contain initial_state and constants as eager + # tensor. + # + # For either case, the real input is the first item in the list, which + # could be a nested structure itself. Then followed by initial_states, + # which could be a list of items, or list of list if the initial_state + # is complex structure, and finally followed by constants which is a + # flat list. + assert initial_state is None and constants is None + if num_constants: + constants = inputs[-num_constants:] + inputs = inputs[:-num_constants] + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[:1] + + if len(inputs) > 1: + inputs = tuple(inputs) + else: + inputs = inputs[0] + + def to_list_or_none(x): + if x is None or isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + return [x] + + initial_state = to_list_or_none(initial_state) + constants = to_list_or_none(constants) + + return inputs, initial_state, constants def is_multiple_state(state_size): - """Check whether the state_size contains multiple states.""" - return (hasattr(state_size, '__len__') and - not isinstance(state_size, tf.TensorShape)) + """Check whether the state_size contains multiple states.""" + return hasattr(state_size, "__len__") and not isinstance( + state_size, tf.TensorShape + ) def generate_zero_filled_state_for_cell(cell, inputs, batch_size, dtype): - if inputs is not None: - batch_size = tf.shape(inputs)[0] - dtype = inputs.dtype - return generate_zero_filled_state(batch_size, cell.state_size, dtype) + if inputs is not None: + batch_size = tf.shape(inputs)[0] + dtype = inputs.dtype + return generate_zero_filled_state(batch_size, cell.state_size, dtype) def generate_zero_filled_state(batch_size_tensor, state_size, dtype): - """Generate a zero filled tensor with shape [batch_size, state_size].""" - if batch_size_tensor is None or dtype is None: - raise ValueError( - 'batch_size and dtype cannot be None while constructing initial state. ' - f'Received: batch_size={batch_size_tensor}, dtype={dtype}') - - def create_zeros(unnested_state_size): - flat_dims = tf.TensorShape(unnested_state_size).as_list() - init_state_size = [batch_size_tensor] + flat_dims - return tf.zeros(init_state_size, dtype=dtype) - - if tf.nest.is_nested(state_size): - return tf.nest.map_structure(create_zeros, state_size) - else: - return create_zeros(state_size) + """Generate a zero filled tensor with shape [batch_size, state_size].""" + if batch_size_tensor is None or dtype is None: + raise ValueError( + "batch_size and dtype cannot be None while constructing initial " + f"state. Received: batch_size={batch_size_tensor}, dtype={dtype}" + ) + + def create_zeros(unnested_state_size): + flat_dims = tf.TensorShape(unnested_state_size).as_list() + init_state_size = [batch_size_tensor] + flat_dims + return tf.zeros(init_state_size, dtype=dtype) + + if tf.nest.is_nested(state_size): + return tf.nest.map_structure(create_zeros, state_size) + else: + return create_zeros(state_size) def caching_device(rnn_cell): - """Returns the caching device for the RNN variable. - - This is useful for distributed training, when variable is not located as same - device as the training worker. By enabling the device cache, this allows - worker to read the variable once and cache locally, rather than read it every - time step from remote when it is needed. - - Note that this is assuming the variable that cell needs for each time step is - having the same value in the forward path, and only gets updated in the - backprop. It is true for all the default cells (SimpleRNN, GRU, LSTM). If the - cell body relies on any variable that gets updated every time step, then - caching device will cause it to read the stall value. - - Args: - rnn_cell: the rnn cell instance. - """ - if tf.executing_eagerly(): - # caching_device is not supported in eager mode. - return None - if not getattr(rnn_cell, '_enable_caching_device', False): - return None - # Don't set a caching device when running in a loop, since it is possible that - # train steps could be wrapped in a tf.while_loop. In that scenario caching - # prevents forward computations in loop iterations from re-reading the - # updated weights. - if control_flow_util.IsInWhileLoop(tf.compat.v1.get_default_graph()): - logging.warning( - 'Variable read device caching has been disabled because the ' - 'RNN is in tf.while_loop loop context, which will cause ' - 'reading stalled value in forward path. This could slow down ' - 'the training due to duplicated variable reads. Please ' - 'consider updating your code to remove tf.while_loop if possible.') - return None - if (rnn_cell._dtype_policy.compute_dtype != - rnn_cell._dtype_policy.variable_dtype): - logging.warning( - 'Variable read device caching has been disabled since it ' - 'doesn\'t work with the mixed precision API. This is ' - 'likely to cause a slowdown for RNN training due to ' - 'duplicated read of variable for each timestep, which ' - 'will be significant in a multi remote worker setting. ' - 'Please consider disabling mixed precision API if ' - 'the performance has been affected.') - return None - # Cache the value on the device that access the variable. - return lambda op: op.device + """Returns the caching device for the RNN variable. + + This is useful for distributed training, when variable is not located as + same device as the training worker. By enabling the device cache, this + allows worker to read the variable once and cache locally, rather than read + it every time step from remote when it is needed. + + Note that this is assuming the variable that cell needs for each time step + is having the same value in the forward path, and only gets updated in the + backprop. It is true for all the default cells (SimpleRNN, GRU, LSTM). If + the cell body relies on any variable that gets updated every time step, then + caching device will cause it to read the stall value. + + Args: + rnn_cell: the rnn cell instance. + """ + if tf.executing_eagerly(): + # caching_device is not supported in eager mode. + return None + if not getattr(rnn_cell, "_enable_caching_device", False): + return None + # Don't set a caching device when running in a loop, since it is possible + # that train steps could be wrapped in a tf.while_loop. In that scenario + # caching prevents forward computations in loop iterations from re-reading + # the updated weights. + if control_flow_util.IsInWhileLoop(tf.compat.v1.get_default_graph()): + logging.warning( + "Variable read device caching has been disabled because the " + "RNN is in tf.while_loop loop context, which will cause " + "reading stalled value in forward path. This could slow down " + "the training due to duplicated variable reads. Please " + "consider updating your code to remove tf.while_loop if possible." + ) + return None + if ( + rnn_cell._dtype_policy.compute_dtype + != rnn_cell._dtype_policy.variable_dtype + ): + logging.warning( + "Variable read device caching has been disabled since it " + "doesn't work with the mixed precision API. This is " + "likely to cause a slowdown for RNN training due to " + "duplicated read of variable for each timestep, which " + "will be significant in a multi remote worker setting. " + "Please consider disabling mixed precision API if " + "the performance has been affected." + ) + return None + # Cache the value on the device that access the variable. + return lambda op: op.device def config_for_enable_caching_device(rnn_cell): - """Return the dict config for RNN cell wrt to enable_caching_device field. - - Since enable_caching_device is a internal implementation detail for speed up - the RNN variable read when running on the multi remote worker setting, we - don't want this config to be serialized constantly in the JSON. We will only - serialize this field when a none default value is used to create the cell. - Args: - rnn_cell: the RNN cell for serialize. - - Returns: - A dict which contains the JSON config for enable_caching_device value or - empty dict if the enable_caching_device value is same as the default value. - """ - default_enable_caching_device = tf.compat.v1.executing_eagerly_outside_functions( - ) - if rnn_cell._enable_caching_device != default_enable_caching_device: - return {'enable_caching_device': rnn_cell._enable_caching_device} - return {} + """Return the dict config for RNN cell wrt to enable_caching_device field. + + Since enable_caching_device is a internal implementation detail for speed up + the RNN variable read when running on the multi remote worker setting, we + don't want this config to be serialized constantly in the JSON. We will only + serialize this field when a none default value is used to create the cell. + Args: + rnn_cell: the RNN cell for serialize. + + Returns: + A dict which contains the JSON config for enable_caching_device value or + empty dict if the enable_caching_device value is same as the default + value. + """ + default_enable_caching_device = ( + tf.compat.v1.executing_eagerly_outside_functions() + ) + if rnn_cell._enable_caching_device != default_enable_caching_device: + return {"enable_caching_device": rnn_cell._enable_caching_device} + return {} diff --git a/keras/layers/rnn/simple_rnn.py b/keras/layers/rnn/simple_rnn.py index 74c1579422bb..97a2e94d761f 100644 --- a/keras/layers/rnn/simple_rnn.py +++ b/keras/layers/rnn/simple_rnn.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Fully connected RNN layer.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import activations from keras import backend @@ -26,467 +28,483 @@ from keras.layers.rnn.base_rnn import RNN from keras.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.SimpleRNNCell') +@keras_export("keras.layers.SimpleRNNCell") class SimpleRNNCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer): - """Cell class for SimpleRNN. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - This class processes one step within the whole time sequence input, whereas - `tf.keras.layer.SimpleRNN` processes the whole sequence. - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, (default `True`), whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. Default: - `glorot_uniform`. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, used for the linear transformation of the recurrent state. - Default: `orthogonal`. - bias_initializer: Initializer for the bias vector. Default: `zeros`. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. Default: `None`. - bias_regularizer: Regularizer function applied to the bias vector. Default: - `None`. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. Default: `None`. - bias_constraint: Constraint function applied to the bias vector. Default: - `None`. - dropout: Float between 0 and 1. Fraction of the units to drop for the linear - transformation of the inputs. Default: 0. - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for - the linear transformation of the recurrent state. Default: 0. - - Call arguments: - inputs: A 2D tensor, with shape of `[batch, feature]`. - states: A 2D tensor with shape of `[batch, units]`, which is the state from - the previous time step. For timestep 0, the initial state provided by user - will be feed to cell. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. Only relevant when `dropout` or - `recurrent_dropout` is used. - - Examples: - - ```python - inputs = np.random.random([32, 10, 8]).astype(np.float32) - rnn = tf.keras.layers.RNN(tf.keras.layers.SimpleRNNCell(4)) - - output = rnn(inputs) # The output has shape `[32, 4]`. - - rnn = tf.keras.layers.RNN( - tf.keras.layers.SimpleRNNCell(4), - return_sequences=True, - return_state=True) - - # whole_sequence_output has shape `[32, 10, 4]`. - # final_state has shape `[32, 4]`. - whole_sequence_output, final_state = rnn(inputs) - ``` - """ - - def __init__(self, - units, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - **kwargs): - if units < 0: - raise ValueError(f'Received an invalid value for argument `units`, ' - f'expected a positive integer, got {units}.') - # By default use cached variable under v2 mode, see b/143699808. - if tf.compat.v1.executing_eagerly_outside_functions(): - self._enable_caching_device = kwargs.pop('enable_caching_device', True) - else: - self._enable_caching_device = kwargs.pop('enable_caching_device', False) - super().__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_size = self.units - self.output_size = self.units - - @tf_utils.shape_type_conversion - def build(self, input_shape): - default_caching_device = rnn_utils.caching_device(self) - self.kernel = self.add_weight( - shape=(input_shape[-1], self.units), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - caching_device=default_caching_device) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint, - caching_device=default_caching_device) - if self.use_bias: - self.bias = self.add_weight( - shape=(self.units,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - caching_device=default_caching_device) - else: - self.bias = None - self.built = True - - def call(self, inputs, states, training=None): - prev_output = states[0] if tf.nest.is_nested(states) else states - dp_mask = self.get_dropout_mask_for_cell(inputs, training) - rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( - prev_output, training) - - if dp_mask is not None: - h = backend.dot(inputs * dp_mask, self.kernel) - else: - h = backend.dot(inputs, self.kernel) - if self.bias is not None: - h = backend.bias_add(h, self.bias) - - if rec_dp_mask is not None: - prev_output = prev_output * rec_dp_mask - output = h + backend.dot(prev_output, self.recurrent_kernel) - if self.activation is not None: - output = self.activation(output) - - new_state = [output] if tf.nest.is_nested(states) else output - return output, new_state - - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - return rnn_utils.generate_zero_filled_state_for_cell( - self, inputs, batch_size, dtype) - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout - } - config.update(rnn_utils.config_for_enable_caching_device(self)) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.layers.SimpleRNN') + """Cell class for SimpleRNN. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + This class processes one step within the whole time sequence input, whereas + `tf.keras.layer.SimpleRNN` processes the whole sequence. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, (default `True`), whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. Default: + `glorot_uniform`. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, used for the linear transformation of the recurrent + state. Default: `orthogonal`. + bias_initializer: Initializer for the bias vector. Default: `zeros`. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_regularizer: Regularizer function applied to the bias vector. + Default: `None`. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_constraint: Constraint function applied to the bias vector. Default: + `None`. + dropout: Float between 0 and 1. Fraction of the units to drop for the + linear transformation of the inputs. Default: 0. + recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + for the linear transformation of the recurrent state. Default: 0. + + Call arguments: + inputs: A 2D tensor, with shape of `[batch, feature]`. + states: A 2D tensor with shape of `[batch, units]`, which is the state + from the previous time step. For timestep 0, the initial state provided + by user will be feed to cell. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. Only relevant when `dropout` or + `recurrent_dropout` is used. + + Examples: + + ```python + inputs = np.random.random([32, 10, 8]).astype(np.float32) + rnn = tf.keras.layers.RNN(tf.keras.layers.SimpleRNNCell(4)) + + output = rnn(inputs) # The output has shape `[32, 4]`. + + rnn = tf.keras.layers.RNN( + tf.keras.layers.SimpleRNNCell(4), + return_sequences=True, + return_state=True) + + # whole_sequence_output has shape `[32, 10, 4]`. + # final_state has shape `[32, 4]`. + whole_sequence_output, final_state = rnn(inputs) + ``` + """ + + def __init__( + self, + units, + activation="tanh", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + **kwargs, + ): + if units <= 0: + raise ValueError( + "Received an invalid value for argument `units`, " + f"expected a positive integer, got {units}." + ) + # By default use cached variable under v2 mode, see b/143699808. + if tf.compat.v1.executing_eagerly_outside_functions(): + self._enable_caching_device = kwargs.pop( + "enable_caching_device", True + ) + else: + self._enable_caching_device = kwargs.pop( + "enable_caching_device", False + ) + super().__init__(**kwargs) + self.units = units + self.activation = activations.get(activation) + self.use_bias = use_bias + + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) + + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) + + self.dropout = min(1.0, max(0.0, dropout)) + self.recurrent_dropout = min(1.0, max(0.0, recurrent_dropout)) + self.state_size = self.units + self.output_size = self.units + + @tf_utils.shape_type_conversion + def build(self, input_shape): + super().build(input_shape) + default_caching_device = rnn_utils.caching_device(self) + self.kernel = self.add_weight( + shape=(input_shape[-1], self.units), + name="kernel", + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + caching_device=default_caching_device, + ) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + name="recurrent_kernel", + initializer=self.recurrent_initializer, + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint, + caching_device=default_caching_device, + ) + if self.use_bias: + self.bias = self.add_weight( + shape=(self.units,), + name="bias", + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + caching_device=default_caching_device, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs, states, training=None): + prev_output = states[0] if tf.nest.is_nested(states) else states + dp_mask = self.get_dropout_mask_for_cell(inputs, training) + rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( + prev_output, training + ) + + if dp_mask is not None: + h = backend.dot(inputs * dp_mask, self.kernel) + else: + h = backend.dot(inputs, self.kernel) + if self.bias is not None: + h = backend.bias_add(h, self.bias) + + if rec_dp_mask is not None: + prev_output = prev_output * rec_dp_mask + output = h + backend.dot(prev_output, self.recurrent_kernel) + if self.activation is not None: + output = self.activation(output) + + new_state = [output] if tf.nest.is_nested(states) else output + return output, new_state + + def get_initial_state(self, inputs=None, batch_size=None, dtype=None): + return rnn_utils.generate_zero_filled_state_for_cell( + self, inputs, batch_size, dtype + ) + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + } + config.update(rnn_utils.config_for_enable_caching_device(self)) + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.layers.SimpleRNN") class SimpleRNN(RNN): - """Fully-connected RNN where the output is to be fed back to input. - - See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) - for details about the usage of RNN API. - - Args: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, (default `True`), whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. Default: - `glorot_uniform`. - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, used for the linear transformation of the recurrent state. - Default: `orthogonal`. - bias_initializer: Initializer for the bias vector. Default: `zeros`. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_regularizer: Regularizer function applied to the - `recurrent_kernel` weights matrix. Default: `None`. - bias_regularizer: Regularizer function applied to the bias vector. Default: - `None`. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation"). Default: `None`. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. Default: `None`. - recurrent_constraint: Constraint function applied to the `recurrent_kernel` - weights matrix. Default: `None`. - bias_constraint: Constraint function applied to the bias vector. Default: - `None`. - dropout: Float between 0 and 1. - Fraction of the units to drop for the linear transformation of the inputs. - Default: 0. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for the linear transformation of the - recurrent state. Default: 0. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. Default: `False`. - return_state: Boolean. Whether to return the last state - in addition to the output. Default: `False` - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - - Call arguments: - inputs: A 3D tensor, with shape `[batch, timesteps, feature]`. - mask: Binary tensor of shape `[batch, timesteps]` indicating whether - a given timestep should be masked. An individual `True` entry indicates - that the corresponding timestep should be utilized, while a `False` entry - indicates that the corresponding timestep should be ignored. - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the cell - when calling it. This is only relevant if `dropout` or - `recurrent_dropout` is used. - initial_state: List of initial state tensors to be passed to the first - call of the cell. - - Examples: - - ```python - inputs = np.random.random([32, 10, 8]).astype(np.float32) - simple_rnn = tf.keras.layers.SimpleRNN(4) - - output = simple_rnn(inputs) # The output has shape `[32, 4]`. - - simple_rnn = tf.keras.layers.SimpleRNN( - 4, return_sequences=True, return_state=True) - - # whole_sequence_output has shape `[32, 10, 4]`. - # final_state has shape `[32, 4]`. - whole_sequence_output, final_state = simple_rnn(inputs) - ``` - """ - - def __init__(self, - units, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if 'implementation' in kwargs: - kwargs.pop('implementation') - logging.warning('The `implementation` argument ' - 'in `SimpleRNN` has been deprecated. ' - 'Please remove it from your layer call.') - if 'enable_caching_device' in kwargs: - cell_kwargs = {'enable_caching_device': - kwargs.pop('enable_caching_device')} - else: - cell_kwargs = {} - cell = SimpleRNNCell( + """Fully-connected RNN where the output is to be fed back to input. + + See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + for details about the usage of RNN API. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, (default `True`), whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. Default: + `glorot_uniform`. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, used for the linear transformation of the recurrent + state. Default: `orthogonal`. + bias_initializer: Initializer for the bias vector. Default: `zeros`. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_regularizer: Regularizer function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_regularizer: Regularizer function applied to the bias vector. + Default: `None`. + activity_regularizer: Regularizer function applied to the output of the + layer (its "activation"). Default: `None`. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. Default: `None`. + recurrent_constraint: Constraint function applied to the + `recurrent_kernel` weights matrix. Default: `None`. + bias_constraint: Constraint function applied to the bias vector. Default: + `None`. + dropout: Float between 0 and 1. + Fraction of the units to drop for the linear transformation of the + inputs. Default: 0. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for the linear transformation of the + recurrent state. Default: 0. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. Default: `False`. + return_state: Boolean. Whether to return the last state + in addition to the output. Default: `False` + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + + Call arguments: + inputs: A 3D tensor, with shape `[batch, timesteps, feature]`. + mask: Binary tensor of shape `[batch, timesteps]` indicating whether + a given timestep should be masked. An individual `True` entry indicates + that the corresponding timestep should be utilized, while a `False` + entry indicates that the corresponding timestep should be ignored. + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the cell + when calling it. This is only relevant if `dropout` or + `recurrent_dropout` is used. + initial_state: List of initial state tensors to be passed to the first + call of the cell. + + Examples: + + ```python + inputs = np.random.random([32, 10, 8]).astype(np.float32) + simple_rnn = tf.keras.layers.SimpleRNN(4) + + output = simple_rnn(inputs) # The output has shape `[32, 4]`. + + simple_rnn = tf.keras.layers.SimpleRNN( + 4, return_sequences=True, return_state=True) + + # whole_sequence_output has shape `[32, 10, 4]`. + # final_state has shape `[32, 4]`. + whole_sequence_output, final_state = simple_rnn(inputs) + ``` + """ + + def __init__( + self, units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - dtype=kwargs.get('dtype'), - trainable=kwargs.get('trainable', True), - **cell_kwargs) - super().__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.input_spec = [InputSpec(ndim=3)] - - def call(self, inputs, mask=None, training=None, initial_state=None): - return super().call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout - } - base_config = super().get_config() - config.update(rnn_utils.config_for_enable_caching_device(self.cell)) - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config: - config.pop('implementation') - return cls(**config) + activation="tanh", + use_bias=True, + kernel_initializer="glorot_uniform", + recurrent_initializer="orthogonal", + bias_initializer="zeros", + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs, + ): + if "implementation" in kwargs: + kwargs.pop("implementation") + logging.warning( + "The `implementation` argument " + "in `SimpleRNN` has been deprecated. " + "Please remove it from your layer call." + ) + if "enable_caching_device" in kwargs: + cell_kwargs = { + "enable_caching_device": kwargs.pop("enable_caching_device") + } + else: + cell_kwargs = {} + cell = SimpleRNNCell( + units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + dtype=kwargs.get("dtype"), + trainable=kwargs.get("trainable", True), + name="simple_rnn_cell", + **cell_kwargs, + ) + super().__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs, + ) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.input_spec = [InputSpec(ndim=3)] + + def call(self, inputs, mask=None, training=None, initial_state=None): + return super().call( + inputs, mask=mask, training=training, initial_state=initial_state + ) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "recurrent_initializer": initializers.serialize( + self.recurrent_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "recurrent_regularizer": regularizers.serialize( + self.recurrent_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "recurrent_constraint": constraints.serialize( + self.recurrent_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + "dropout": self.dropout, + "recurrent_dropout": self.recurrent_dropout, + } + base_config = super().get_config() + config.update(rnn_utils.config_for_enable_caching_device(self.cell)) + del base_config["cell"] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if "implementation" in config: + config.pop("implementation") + return cls(**config) diff --git a/keras/layers/rnn/simple_rnn_test.py b/keras/layers/rnn/simple_rnn_test.py index 8901d363c540..9cd1a27668d7 100644 --- a/keras/layers/rnn/simple_rnn_test.py +++ b/keras/layers/rnn/simple_rnn_test.py @@ -14,12 +14,11 @@ # ============================================================================== """Tests for SimpleRNN layer.""" -import tensorflow.compat.v2 as tf - import copy -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras.testing_infra import test_combinations @@ -28,206 +27,228 @@ @test_combinations.generate(test_combinations.keras_mode_combinations()) class SimpleRNNLayerTest(tf.test.TestCase, parameterized.TestCase): - - def test_return_sequences_SimpleRNN(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.SimpleRNN, - kwargs={'units': units, - 'return_sequences': True}, - input_shape=(num_samples, timesteps, embedding_dim)) - - @test_utils.run_v2_only - def test_float64_SimpleRNN(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.SimpleRNN, - kwargs={'units': units, - 'return_sequences': True, - 'dtype': 'float64'}, - input_shape=(num_samples, timesteps, embedding_dim), - input_dtype='float64') - - def test_dynamic_behavior_SimpleRNN(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer = keras.layers.SimpleRNN(units, input_shape=(None, embedding_dim)) - model = keras.models.Sequential() - model.add(layer) - model.compile('rmsprop', 'mse') - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - def test_dropout_SimpleRNN(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - test_utils.layer_test( - keras.layers.SimpleRNN, - kwargs={'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_implementation_mode_SimpleRNN(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - for mode in [0, 1, 2]: - test_utils.layer_test( - keras.layers.SimpleRNN, - kwargs={'units': units, - 'implementation': mode}, - input_shape=(num_samples, timesteps, embedding_dim)) - - def test_constraints_SimpleRNN(self): - embedding_dim = 4 - layer_class = keras.layers.SimpleRNN - k_constraint = keras.constraints.max_norm(0.01) - r_constraint = keras.constraints.max_norm(0.01) - b_constraint = keras.constraints.max_norm(0.01) - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_constraint=k_constraint, - recurrent_constraint=r_constraint, - bias_constraint=b_constraint) - layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) - - def test_with_masking_layer_SimpleRNN(self): - layer_class = keras.layers.SimpleRNN - inputs = np.random.random((2, 3, 4)) - targets = np.abs(np.random.random((2, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - model = keras.models.Sequential() - model.add(keras.layers.Masking(input_shape=(3, 4))) - model.add(layer_class(units=5, return_sequences=True, unroll=False)) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) - - def test_from_config_SimpleRNN(self): - layer_class = keras.layers.SimpleRNN - for stateful in (False, True): - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - def test_deep_copy_SimpleRNN(self): - cell = keras.layers.SimpleRNNCell(5) - copied_cell = copy.deepcopy(cell) - self.assertEqual(copied_cell.units, 5) - self.assertEqual(cell.get_config(), copied_cell.get_config()) - - def test_regularizers_SimpleRNN(self): - embedding_dim = 4 - layer_class = keras.layers.SimpleRNN - layer = layer_class( - 5, - return_sequences=False, - weights=None, - input_shape=(None, embedding_dim), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2', - activity_regularizer='l1') - layer.build((None, None, 2)) - self.assertLen(layer.losses, 3) - - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - if tf.executing_eagerly(): - self.assertLen(layer.losses, 4) - else: - self.assertLen(layer.get_losses_for(x), 1) - - def test_statefulness_SimpleRNN(self): - num_samples = 2 - timesteps = 3 - embedding_dim = 4 - units = 2 - layer_class = keras.layers.SimpleRNN - model = keras.models.Sequential() - model.add( - keras.layers.Embedding( - 4, - embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class( - units, return_sequences=False, stateful=True, weights=None) - model.add(layer) - model.compile( - optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - out1 = model.predict(np.ones((num_samples, timesteps))) - self.assertEqual(out1.shape, (num_samples, units)) - - # train once so that the states change - model.train_on_batch( - np.ones((num_samples, timesteps)), np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - self.assertNotEqual(out1.max(), out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out2.max(), out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - np.testing.assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - self.assertNotEqual(out4.max(), out5.max()) - - # Check masking - layer.reset_states() - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - layer.reset_states() - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - np.testing.assert_allclose(out7, out6, atol=1e-5) - - def test_get_initial_states(self): - batch_size = 4 - cell = keras.layers.SimpleRNNCell(20) - initial_state = cell.get_initial_state( - batch_size=batch_size, dtype=tf.float32) - _, state = cell(np.ones((batch_size, 20), dtype=np.float32), initial_state) - self.assertEqual(state.shape, initial_state.shape) - - -if __name__ == '__main__': - tf.test.main() + def test_return_sequences_SimpleRNN(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.SimpleRNN, + kwargs={"units": units, "return_sequences": True}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + @test_utils.run_v2_only + def test_float64_SimpleRNN(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.SimpleRNN, + kwargs={ + "units": units, + "return_sequences": True, + "dtype": "float64", + }, + input_shape=(num_samples, timesteps, embedding_dim), + input_dtype="float64", + ) + + def test_dynamic_behavior_SimpleRNN(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer = keras.layers.SimpleRNN(units, input_shape=(None, embedding_dim)) + model = keras.models.Sequential() + model.add(layer) + model.compile("rmsprop", "mse") + x = np.random.random((num_samples, timesteps, embedding_dim)) + y = np.random.random((num_samples, units)) + model.train_on_batch(x, y) + + def test_dropout_SimpleRNN(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + test_utils.layer_test( + keras.layers.SimpleRNN, + kwargs={"units": units, "dropout": 0.1, "recurrent_dropout": 0.1}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_implementation_mode_SimpleRNN(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + for mode in [0, 1, 2]: + test_utils.layer_test( + keras.layers.SimpleRNN, + kwargs={"units": units, "implementation": mode}, + input_shape=(num_samples, timesteps, embedding_dim), + ) + + def test_constraints_SimpleRNN(self): + embedding_dim = 4 + layer_class = keras.layers.SimpleRNN + k_constraint = keras.constraints.max_norm(0.01) + r_constraint = keras.constraints.max_norm(0.01) + b_constraint = keras.constraints.max_norm(0.01) + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_constraint=k_constraint, + recurrent_constraint=r_constraint, + bias_constraint=b_constraint, + ) + layer.build((None, None, embedding_dim)) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) + + def test_with_masking_layer_SimpleRNN(self): + layer_class = keras.layers.SimpleRNN + inputs = np.random.random((2, 3, 4)) + targets = np.abs(np.random.random((2, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + model = keras.models.Sequential() + model.add(keras.layers.Masking(input_shape=(3, 4))) + model.add(layer_class(units=5, return_sequences=True, unroll=False)) + model.compile(loss="categorical_crossentropy", optimizer="rmsprop") + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1) + + def test_from_config_SimpleRNN(self): + layer_class = keras.layers.SimpleRNN + for stateful in (False, True): + l1 = layer_class(units=1, stateful=stateful) + l2 = layer_class.from_config(l1.get_config()) + assert l1.get_config() == l2.get_config() + + def test_deep_copy_SimpleRNN(self): + cell = keras.layers.SimpleRNNCell(5) + copied_cell = copy.deepcopy(cell) + self.assertEqual(copied_cell.units, 5) + self.assertEqual(cell.get_config(), copied_cell.get_config()) + + def test_regularizers_SimpleRNN(self): + embedding_dim = 4 + layer_class = keras.layers.SimpleRNN + layer = layer_class( + 5, + return_sequences=False, + weights=None, + input_shape=(None, embedding_dim), + kernel_regularizer=keras.regularizers.l1(0.01), + recurrent_regularizer=keras.regularizers.l1(0.01), + bias_regularizer="l2", + activity_regularizer="l1", + ) + layer.build((None, None, 2)) + self.assertLen(layer.losses, 3) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + if tf.executing_eagerly(): + self.assertLen(layer.losses, 4) + else: + self.assertLen(layer.get_losses_for(x), 1) + + def test_statefulness_SimpleRNN(self): + num_samples = 2 + timesteps = 3 + embedding_dim = 4 + units = 2 + layer_class = keras.layers.SimpleRNN + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + 4, + embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps), + ) + ) + layer = layer_class( + units, return_sequences=False, stateful=True, weights=None + ) + model.add(layer) + model.compile( + optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + out1 = model.predict(np.ones((num_samples, timesteps))) + self.assertEqual(out1.shape, (num_samples, units)) + + # train once so that the states change + model.train_on_batch( + np.ones((num_samples, timesteps)), np.ones((num_samples, units)) + ) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + self.assertNotEqual(out1.max(), out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out2.max(), out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + np.testing.assert_allclose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + self.assertNotEqual(out4.max(), out5.max()) + + # Check masking + layer.reset_states() + + left_padded_input = np.ones((num_samples, timesteps)) + left_padded_input[0, :1] = 0 + left_padded_input[1, :2] = 0 + out6 = model.predict(left_padded_input) + + layer.reset_states() + + right_padded_input = np.ones((num_samples, timesteps)) + right_padded_input[0, -1:] = 0 + right_padded_input[1, -2:] = 0 + out7 = model.predict(right_padded_input) + + np.testing.assert_allclose(out7, out6, atol=1e-5) + + def test_get_initial_states(self): + batch_size = 4 + cell = keras.layers.SimpleRNNCell(20) + initial_state = cell.get_initial_state( + batch_size=batch_size, dtype=tf.float32 + ) + _, state = cell( + np.ones((batch_size, 20), dtype=np.float32), initial_state + ) + self.assertEqual(state.shape, initial_state.shape) + + @test_utils.run_v2_only + def test_cloned_weight_names(self): + inp = keras.Input([None, 3]) + rnn = keras.layers.SimpleRNN(units=3) + model = keras.Model(inp, rnn(inp)) + clone = keras.models.clone_model(model) + + model_names = [x.name for x in model.weights] + clone_names = [x.name for x in clone.weights] + self.assertEqual(model_names, clone_names) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/rnn/stacked_rnn_cells.py b/keras/layers/rnn/stacked_rnn_cells.py index 2a5ab8cdab05..46bb3091f3fb 100644 --- a/keras/layers/rnn/stacked_rnn_cells.py +++ b/keras/layers/rnn/stacked_rnn_cells.py @@ -13,168 +13,205 @@ # limitations under the License. # ============================================================================== """Wrapper allowing a stack of RNN cells to behave as a single cell.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + import functools +import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import base_layer from keras.layers.rnn import rnn_utils +from keras.saving import serialization_lib from keras.utils import generic_utils from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.StackedRNNCells') +@keras_export("keras.layers.StackedRNNCells") class StackedRNNCells(base_layer.Layer): - """Wrapper allowing a stack of RNN cells to behave as a single cell. - - Used to implement efficient stacked RNNs. - - Args: - cells: List of RNN cell instances. - - Examples: - - ```python - batch_size = 3 - sentence_max_length = 5 - n_features = 2 - new_shape = (batch_size, sentence_max_length, n_features) - x = tf.constant(np.reshape(np.arange(30), new_shape), dtype = tf.float32) - - rnn_cells = [tf.keras.layers.LSTMCell(128) for _ in range(2)] - stacked_lstm = tf.keras.layers.StackedRNNCells(rnn_cells) - lstm_layer = tf.keras.layers.RNN(stacked_lstm) - - result = lstm_layer(x) - ``` - """ - - def __init__(self, cells, **kwargs): - for cell in cells: - if 'call' not in dir(cell): - raise ValueError('All cells must have a `call` method. ' - f'Received cell without a `call` method: {cell}') - if 'state_size' not in dir(cell): - raise ValueError('All cells must have a `state_size` attribute. ' - f'Received cell without a `state_size`: {cell}') - self.cells = cells - # reverse_state_order determines whether the state size will be in a reverse - # order of the cells' state. User might want to set this to True to keep the - # existing behavior. This is only useful when use RNN(return_state=True) - # since the state will be returned as the same order of state_size. - self.reverse_state_order = kwargs.pop('reverse_state_order', False) - if self.reverse_state_order: - logging.warning('reverse_state_order=True in StackedRNNCells will soon ' - 'be deprecated. Please update the code to work with the ' - 'natural order of states if you rely on the RNN states, ' - 'eg RNN(return_state=True).') - super().__init__(**kwargs) - - @property - def state_size(self): - return tuple(c.state_size for c in - (self.cells[::-1] if self.reverse_state_order else self.cells)) - - @property - def output_size(self): - if getattr(self.cells[-1], 'output_size', None) is not None: - return self.cells[-1].output_size - elif rnn_utils.is_multiple_state(self.cells[-1].state_size): - return self.cells[-1].state_size[0] - else: - return self.cells[-1].state_size - - def get_initial_state(self, inputs=None, batch_size=None, dtype=None): - initial_states = [] - for cell in self.cells[::-1] if self.reverse_state_order else self.cells: - get_initial_state_fn = getattr(cell, 'get_initial_state', None) - if get_initial_state_fn: - initial_states.append(get_initial_state_fn( - inputs=inputs, batch_size=batch_size, dtype=dtype)) - else: - initial_states.append(rnn_utils.generate_zero_filled_state_for_cell( - cell, inputs, batch_size, dtype)) - - return tuple(initial_states) - - def call(self, inputs, states, constants=None, training=None, **kwargs): - # Recover per-cell states. - state_size = (self.state_size[::-1] - if self.reverse_state_order else self.state_size) - nested_states = tf.nest.pack_sequence_as(state_size, - tf.nest.flatten(states)) - - # Call the cells in order and store the returned states. - new_nested_states = [] - for cell, states in zip(self.cells, nested_states): - states = states if tf.nest.is_nested(states) else [states] - # TF cell does not wrap the state into list when there is only one state. - is_tf_rnn_cell = getattr(cell, '_is_tf_rnn_cell', None) is not None - states = states[0] if len(states) == 1 and is_tf_rnn_cell else states - if generic_utils.has_arg(cell.call, 'training'): - kwargs['training'] = training - else: - kwargs.pop('training', None) - # Use the __call__ function for callable objects, eg layers, so that it - # will have the proper name scopes for the ops, etc. - cell_call_fn = cell.__call__ if callable(cell) else cell.call - if generic_utils.has_arg(cell.call, 'constants'): - inputs, states = cell_call_fn(inputs, states, - constants=constants, **kwargs) - else: - inputs, states = cell_call_fn(inputs, states, **kwargs) - new_nested_states.append(states) - - return inputs, tf.nest.pack_sequence_as(state_size, - tf.nest.flatten(new_nested_states)) - - @tf_utils.shape_type_conversion - def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - - def get_batch_input_shape(batch_size, dim): - shape = tf.TensorShape(dim).as_list() - return tuple([batch_size] + shape) - - for cell in self.cells: - if isinstance(cell, base_layer.Layer) and not cell.built: - with backend.name_scope(cell.name): - cell.build(input_shape) - cell.built = True - if getattr(cell, 'output_size', None) is not None: - output_dim = cell.output_size - elif rnn_utils.is_multiple_state(cell.state_size): - output_dim = cell.state_size[0] - else: - output_dim = cell.state_size - batch_size = tf.nest.flatten(input_shape)[0] - if tf.nest.is_nested(output_dim): - input_shape = tf.nest.map_structure( - functools.partial(get_batch_input_shape, batch_size), output_dim) - input_shape = tuple(input_shape) - else: - input_shape = tuple([batch_size] + tf.TensorShape(output_dim).as_list()) - self.built = True - - def get_config(self): - cells = [] - for cell in self.cells: - cells.append(generic_utils.serialize_keras_object(cell)) - config = {'cells': cells} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - from keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - cells = [] - for cell_config in config.pop('cells'): - cells.append( - deserialize_layer(cell_config, custom_objects=custom_objects)) - return cls(cells, **config) + """Wrapper allowing a stack of RNN cells to behave as a single cell. + + Used to implement efficient stacked RNNs. + + Args: + cells: List of RNN cell instances. + + Examples: + + ```python + batch_size = 3 + sentence_max_length = 5 + n_features = 2 + new_shape = (batch_size, sentence_max_length, n_features) + x = tf.constant(np.reshape(np.arange(30), new_shape), dtype = tf.float32) + + rnn_cells = [tf.keras.layers.LSTMCell(128) for _ in range(2)] + stacked_lstm = tf.keras.layers.StackedRNNCells(rnn_cells) + lstm_layer = tf.keras.layers.RNN(stacked_lstm) + + result = lstm_layer(x) + ``` + """ + + def __init__(self, cells, **kwargs): + for cell in cells: + if "call" not in dir(cell): + raise ValueError( + "All cells must have a `call` method. " + f"Received cell without a `call` method: {cell}" + ) + if "state_size" not in dir(cell): + raise ValueError( + "All cells must have a `state_size` attribute. " + f"Received cell without a `state_size`: {cell}" + ) + self.cells = cells + # reverse_state_order determines whether the state size will be in a + # reverse order of the cells' state. User might want to set this to True + # to keep the existing behavior. This is only useful when use + # RNN(return_state=True) since the state will be returned as the same + # order of state_size. + self.reverse_state_order = kwargs.pop("reverse_state_order", False) + if self.reverse_state_order: + logging.warning( + "reverse_state_order=True in StackedRNNCells will soon " + "be deprecated. Please update the code to work with the " + "natural order of states if you rely on the RNN states, " + "eg RNN(return_state=True)." + ) + super().__init__(**kwargs) + + @property + def state_size(self): + return tuple( + c.state_size + for c in ( + self.cells[::-1] if self.reverse_state_order else self.cells + ) + ) + + @property + def output_size(self): + if getattr(self.cells[-1], "output_size", None) is not None: + return self.cells[-1].output_size + elif rnn_utils.is_multiple_state(self.cells[-1].state_size): + return self.cells[-1].state_size[0] + else: + return self.cells[-1].state_size + + def get_initial_state(self, inputs=None, batch_size=None, dtype=None): + initial_states = [] + for cell in ( + self.cells[::-1] if self.reverse_state_order else self.cells + ): + get_initial_state_fn = getattr(cell, "get_initial_state", None) + if get_initial_state_fn: + initial_states.append( + get_initial_state_fn( + inputs=inputs, batch_size=batch_size, dtype=dtype + ) + ) + else: + initial_states.append( + rnn_utils.generate_zero_filled_state_for_cell( + cell, inputs, batch_size, dtype + ) + ) + + return tuple(initial_states) + + def call(self, inputs, states, constants=None, training=None, **kwargs): + # Recover per-cell states. + state_size = ( + self.state_size[::-1] + if self.reverse_state_order + else self.state_size + ) + nested_states = tf.nest.pack_sequence_as( + state_size, tf.nest.flatten(states) + ) + + # Call the cells in order and store the returned states. + new_nested_states = [] + for cell, states in zip(self.cells, nested_states): + states = states if tf.nest.is_nested(states) else [states] + # TF cell does not wrap the state into list when there is only one + # state. + is_tf_rnn_cell = getattr(cell, "_is_tf_rnn_cell", None) is not None + states = ( + states[0] if len(states) == 1 and is_tf_rnn_cell else states + ) + if generic_utils.has_arg(cell.call, "training"): + kwargs["training"] = training + else: + kwargs.pop("training", None) + # Use the __call__ function for callable objects, eg layers, so that + # it will have the proper name scopes for the ops, etc. + cell_call_fn = cell.__call__ if callable(cell) else cell.call + if generic_utils.has_arg(cell.call, "constants"): + inputs, states = cell_call_fn( + inputs, states, constants=constants, **kwargs + ) + else: + inputs, states = cell_call_fn(inputs, states, **kwargs) + new_nested_states.append(states) + + return inputs, tf.nest.pack_sequence_as( + state_size, tf.nest.flatten(new_nested_states) + ) + + @tf_utils.shape_type_conversion + def build(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + + def get_batch_input_shape(batch_size, dim): + shape = tf.TensorShape(dim).as_list() + return tuple([batch_size] + shape) + + for cell in self.cells: + if isinstance(cell, base_layer.Layer) and not cell.built: + with backend.name_scope(cell.name): + cell.build(input_shape) + cell.built = True + if getattr(cell, "output_size", None) is not None: + output_dim = cell.output_size + elif rnn_utils.is_multiple_state(cell.state_size): + output_dim = cell.state_size[0] + else: + output_dim = cell.state_size + batch_size = tf.nest.flatten(input_shape)[0] + if tf.nest.is_nested(output_dim): + input_shape = tf.nest.map_structure( + functools.partial(get_batch_input_shape, batch_size), + output_dim, + ) + input_shape = tuple(input_shape) + else: + input_shape = tuple( + [batch_size] + tf.TensorShape(output_dim).as_list() + ) + self.built = True + + def get_config(self): + cells = [] + for cell in self.cells: + cells.append(serialization_lib.serialize_keras_object(cell)) + config = {"cells": cells} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + from keras.layers import deserialize as deserialize_layer + + cells = [] + for cell_config in config.pop("cells"): + cells.append( + deserialize_layer(cell_config, custom_objects=custom_objects) + ) + return cls(cells, **config) diff --git a/keras/layers/rnn/time_distributed.py b/keras/layers/rnn/time_distributed.py index f0a995afd8e0..27f28236394e 100644 --- a/keras/layers/rnn/time_distributed.py +++ b/keras/layers/rnn/time_distributed.py @@ -13,7 +13,9 @@ # limitations under the License. # ============================================================================== """Wrapper layer to apply every temporal slice of an input.""" -# pylint: disable=g-classes-have-attributes,g-direct-tensorflow-import + + +import tensorflow.compat.v2 as tf from keras import backend from keras.engine.base_layer import Layer @@ -22,306 +24,329 @@ from keras.utils import generic_utils from keras.utils import layer_utils from keras.utils import tf_utils -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.layers.TimeDistributed') +@keras_export("keras.layers.TimeDistributed") class TimeDistributed(Wrapper): - """This wrapper allows to apply a layer to every temporal slice of an input. - - Every input should be at least 3D, and the dimension of index one of the - first input will be considered to be the temporal dimension. - - Consider a batch of 32 video samples, where each sample is a 128x128 RGB image - with `channels_last` data format, across 10 timesteps. - The batch input shape is `(32, 10, 128, 128, 3)`. - - You can then use `TimeDistributed` to apply the same `Conv2D` layer to each - of the 10 timesteps, independently: - - >>> inputs = tf.keras.Input(shape=(10, 128, 128, 3)) - >>> conv_2d_layer = tf.keras.layers.Conv2D(64, (3, 3)) - >>> outputs = tf.keras.layers.TimeDistributed(conv_2d_layer)(inputs) - >>> outputs.shape - TensorShape([None, 10, 126, 126, 64]) - - Because `TimeDistributed` applies the same instance of `Conv2D` to each of the - timestamps, the same set of weights are used at each timestamp. - - Args: - layer: a `tf.keras.layers.Layer` instance. - - Call arguments: - inputs: Input tensor of shape (batch, time, ...) or nested tensors, - and each of which has shape (batch, time, ...). - training: Python boolean indicating whether the layer should behave in - training mode or in inference mode. This argument is passed to the - wrapped layer (only if the layer supports this argument). - mask: Binary tensor of shape `(samples, timesteps)` indicating whether - a given timestep should be masked. This argument is passed to the - wrapped layer (only if the layer supports this argument). - - Raises: - ValueError: If not initialized with a `tf.keras.layers.Layer` instance. - """ - - def __init__(self, layer, **kwargs): - if not isinstance(layer, Layer): - raise ValueError( - 'Please initialize `TimeDistributed` layer with a ' - f'`tf.keras.layers.Layer` instance. Received: {layer}') - super().__init__(layer, **kwargs) - self.supports_masking = True - - # It is safe to use the fast, reshape-based approach with all of our - # built-in Layers. - self._always_use_reshape = ( - layer_utils.is_builtin_layer(layer) and - not getattr(layer, 'stateful', False)) - - def _get_shape_tuple(self, init_tuple, tensor, start_idx, int_shape=None): - """Finds non-specific dimensions in the static shapes. - - The static shapes are replaced with the corresponding dynamic shapes of the - tensor. - Args: - init_tuple: a tuple, the first part of the output shape - tensor: the tensor from which to get the (static and dynamic) shapes - as the last part of the output shape - start_idx: int, which indicate the first dimension to take from - the static shape of the tensor - int_shape: an alternative static shape to take as the last part - of the output shape - Returns: - The new int_shape with the first part from init_tuple - and the last part from either `int_shape` (if provided) - or `tensor.shape`, where every `None` is replaced by - the corresponding dimension from `tf.shape(tensor)`. - """ - # replace all None in int_shape by backend.shape - if int_shape is None: - int_shape = backend.int_shape(tensor)[start_idx:] - if isinstance(int_shape, tf.TensorShape): - int_shape = int_shape.as_list() - if not any(not s for s in int_shape): - return init_tuple + tuple(int_shape) - shape = backend.shape(tensor) - int_shape = list(int_shape) - for i, s in enumerate(int_shape): - if not s: - int_shape[i] = shape[start_idx + i] - return init_tuple + tuple(int_shape) - - def _remove_timesteps(self, dims): - dims = dims.as_list() - return tf.TensorShape([dims[0]] + dims[2:]) - - def build(self, input_shape): - input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - input_dims = tf.nest.flatten( - tf.nest.map_structure(lambda x: x.ndims, input_shape)) - if any(dim < 3 for dim in input_dims): - raise ValueError( - '`TimeDistributed` Layer should be passed an `input_shape ` ' - f'with at least 3 dimensions, received: {input_shape}') - # Don't enforce the batch or time dimension. - self.input_spec = tf.nest.map_structure( - lambda x: InputSpec(shape=[None, None] + x.as_list()[2:]), input_shape) - child_input_shape = tf.nest.map_structure(self._remove_timesteps, - input_shape) - child_input_shape = tf_utils.convert_shapes(child_input_shape) - super().build(tuple(child_input_shape)) - self.built = True - - def compute_output_shape(self, input_shape): - input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - - child_input_shape = tf.nest.map_structure(self._remove_timesteps, - input_shape) - child_output_shape = self.layer.compute_output_shape(child_input_shape) - child_output_shape = tf_utils.convert_shapes( - child_output_shape, to_tuples=False) - timesteps = tf_utils.convert_shapes(input_shape) - timesteps = tf.nest.flatten(timesteps)[1] - - def insert_timesteps(dims): - dims = dims.as_list() - return tf.TensorShape([dims[0], timesteps] + dims[1:]) - - return tf.nest.map_structure(insert_timesteps, child_output_shape) - - def call(self, inputs, training=None, mask=None): - kwargs = {} - if generic_utils.has_arg(self.layer.call, 'training'): - kwargs['training'] = training - - input_shape = tf.nest.map_structure( - lambda x: tf.TensorShape(backend.int_shape(x)), inputs) - batch_size = tf_utils.convert_shapes(input_shape) - batch_size = tf.nest.flatten(batch_size)[0] - if batch_size and not self._always_use_reshape: - inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) - is_ragged_input = row_lengths is not None - input_length = tf_utils.convert_shapes(input_shape) - input_length = tf.nest.flatten(input_length)[1] - - # batch size matters, use rnn-based implementation - def step(x, _): - output = self.layer(x, **kwargs) - return output, [] - - _, outputs, _ = backend.rnn( - step, - inputs, - initial_states=[], - input_length=row_lengths[0] if is_ragged_input else input_length, - mask=mask, - unroll=False) - # pylint: disable=g-long-lambda - y = tf.nest.map_structure( - lambda output: backend.maybe_convert_to_ragged( - is_ragged_input, output, row_lengths), outputs) - else: - # No batch size specified, therefore the layer will be able - # to process batches of any size. - # We can go with reshape-based implementation for performance. - is_ragged_input = tf.nest.map_structure( - lambda x: isinstance(x, tf.RaggedTensor), inputs) - is_ragged_input = tf.nest.flatten(is_ragged_input) - if all(is_ragged_input): - input_values = tf.nest.map_structure(lambda x: x.values, inputs) - input_row_lenghts = tf.nest.map_structure( - lambda x: x.nested_row_lengths()[0], inputs) - y = self.layer(input_values, **kwargs) - y = tf.nest.map_structure(tf.RaggedTensor.from_row_lengths, y, - input_row_lenghts) - elif any(is_ragged_input): - raise ValueError('All inputs has to be either ragged or not, ' - f'but not mixed. Received: {inputs}') - else: - input_length = tf_utils.convert_shapes(input_shape) - input_length = tf.nest.flatten(input_length)[1] - if not input_length: - input_length = tf.nest.map_structure(lambda x: tf.shape(x)[1], inputs) - input_length = generic_utils.to_list(tf.nest.flatten(input_length))[0] + """This wrapper allows to apply a layer to every temporal slice of an input. - inner_input_shape = tf.nest.map_structure( - lambda x: self._get_shape_tuple((-1,), x, 2), inputs) - # Shape: (num_samples * timesteps, ...). And track the - # transformation in self._input_map. - inputs = tf.__internal__.nest.map_structure_up_to( - inputs, tf.reshape, inputs, inner_input_shape) - # (num_samples * timesteps, ...) - if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: - inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) - kwargs['mask'] = backend.reshape(mask, inner_mask_shape) - - y = self.layer(inputs, **kwargs) - - # Shape: (num_samples, timesteps, ...) - output_shape = self.compute_output_shape(input_shape) - # pylint: disable=g-long-lambda - output_shape = tf.nest.map_structure( - lambda tensor, int_shape: self._get_shape_tuple( - (-1, input_length), tensor, 1, int_shape[2:]), y, output_shape) - y = tf.__internal__.nest.map_structure_up_to(y, tf.reshape, y, - output_shape) - if not tf.executing_eagerly(): - # Set the static shape for the result since it might be lost during - # array_ops reshape, eg, some `None` dim in the result could be - # inferred. - tf.__internal__.nest.map_structure_up_to( - y, lambda tensor, shape: tensor.set_shape(shape), y, - self.compute_output_shape(input_shape)) - - return y - - def compute_mask(self, inputs, mask=None): - """Computes an output mask tensor for Embedding layer. - - This is based on the inputs, mask, and the inner layer. - If batch size is specified: - Simply return the input `mask`. (An rnn-based implementation with - more than one rnn inputs is required but not supported in tf.keras yet.) - Otherwise we call `compute_mask` of the inner layer at each time step. - If the output mask at each time step is not `None`: - (E.g., inner layer is Masking or RNN) - Concatenate all of them and return the concatenation. - If the output mask at each time step is `None` and the input mask is not - `None`:(E.g., inner layer is Dense) - Reduce the input_mask to 2 dimensions and return it. - Otherwise (both the output mask and the input mask are `None`): - (E.g., `mask` is not used at all) - Return `None`. + Every input should be at least 3D, and the dimension of index one of the + first input will be considered to be the temporal dimension. - Args: - inputs: Tensor with shape [batch size, timesteps, ...] indicating the - input to TimeDistributed. If static shape information is available for - "batch size", `mask` is returned unmodified. - mask: Either None (indicating no masking) or a Tensor indicating the - input mask for TimeDistributed. The shape can be static or dynamic. - - Returns: - Either None (no masking), or a [batch size, timesteps, ...] Tensor with - an output mask for the TimeDistributed layer with the shape beyond the - second dimension being the value of the input mask shape(if the computed - output mask is none), an output mask with the shape beyond the first - dimension being the value of the mask shape(if mask is not None) or - output mask with the shape beyond the first dimension being the - value of the computed output shape. + Consider a batch of 32 video samples, where each sample is a 128x128 RGB + image with `channels_last` data format, across 10 timesteps. + The batch input shape is `(32, 10, 128, 128, 3)`. + You can then use `TimeDistributed` to apply the same `Conv2D` layer to each + of the 10 timesteps, independently: + + >>> inputs = tf.keras.Input(shape=(10, 128, 128, 3)) + >>> conv_2d_layer = tf.keras.layers.Conv2D(64, (3, 3)) + >>> outputs = tf.keras.layers.TimeDistributed(conv_2d_layer)(inputs) + >>> outputs.shape + TensorShape([None, 10, 126, 126, 64]) + + Because `TimeDistributed` applies the same instance of `Conv2D` to each of + the timestamps, the same set of weights are used at each timestamp. + + Args: + layer: a `tf.keras.layers.Layer` instance. + + Call arguments: + inputs: Input tensor of shape (batch, time, ...) or nested tensors, + and each of which has shape (batch, time, ...). + training: Python boolean indicating whether the layer should behave in + training mode or in inference mode. This argument is passed to the + wrapped layer (only if the layer supports this argument). + mask: Binary tensor of shape `(samples, timesteps)` indicating whether + a given timestep should be masked. This argument is passed to the + wrapped layer (only if the layer supports this argument). + + Raises: + ValueError: If not initialized with a `tf.keras.layers.Layer` instance. """ - # cases need to call the layer.compute_mask when input_mask is None: - # Masking layer and Embedding layer with mask_zero - input_shape = tf.nest.map_structure( - lambda x: tf.TensorShape(backend.int_shape(x)), inputs) - input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) - batch_size = tf_utils.convert_shapes(input_shape) - batch_size = tf.nest.flatten(batch_size)[0] - is_ragged_input = tf.nest.map_structure( - lambda x: isinstance(x, tf.RaggedTensor), inputs) - is_ragged_input = generic_utils.to_list(tf.nest.flatten(is_ragged_input)) - if batch_size and not self._always_use_reshape or any(is_ragged_input): - # batch size matters, we currently do not handle mask explicitly, or if - # the layer always uses reshape approach, or the input is a ragged tensor. - return mask - inner_mask = mask - if inner_mask is not None: - inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) - inner_mask = backend.reshape(inner_mask, inner_mask_shape) - inner_input_shape = tf.nest.map_structure( - lambda tensor: self._get_shape_tuple((-1,), tensor, 2), inputs) - inner_inputs = tf.__internal__.nest.map_structure_up_to( - inputs, tf.reshape, inputs, inner_input_shape) - output_mask = self.layer.compute_mask(inner_inputs, inner_mask) - if output_mask is None: - if mask is None: - return None - # input_mask is not None, and output_mask is None: - # we should return a not-None mask - output_mask = mask - for _ in range(2, len(backend.int_shape(mask))): - output_mask = backend.any(output_mask, axis=-1) - else: - # output_mask is not None. We need to reshape it - input_length = tf_utils.convert_shapes(input_shape) - input_length = tf.nest.flatten(input_length)[1] - if not input_length: - input_length = tf.nest.map_structure(lambda x: backend.shape(x)[1], - inputs) - input_length = tf.nest.flatten(input_length)[0] - output_mask_int_shape = backend.int_shape(output_mask) - if output_mask_int_shape is None: - # if the output_mask does not have a static shape, - # its shape must be the same as mask's - if mask is not None: - output_mask_int_shape = backend.int_shape(mask) + + def __init__(self, layer, **kwargs): + if not isinstance(layer, Layer): + raise ValueError( + "Please initialize `TimeDistributed` layer with a " + f"`tf.keras.layers.Layer` instance. Received: {layer}" + ) + super().__init__(layer, **kwargs) + self.supports_masking = True + + # It is safe to use the fast, reshape-based approach with all of our + # built-in Layers. + self._always_use_reshape = layer_utils.is_builtin_layer( + layer + ) and not getattr(layer, "stateful", False) + + def _get_shape_tuple(self, init_tuple, tensor, start_idx): + """Finds non-specific dimensions in the static shapes. + + The static shapes are replaced with the corresponding dynamic shapes of + the tensor. + Args: + init_tuple: a tuple, the first part of the output shape + tensor: the tensor from which to get the (static and dynamic) shapes + as the last part of the output shape + start_idx: int, which indicate the first dimension to take from + the static shape of the tensor + Returns: + The new shape with the first part from `init_tuple` and the last part + from or `tensor.shape`, where every `None` is replaced by the + corresponding dimension from `tf.shape(tensor)`. + """ + # replace all None in int_shape by backend.shape + int_shape = backend.int_shape(tensor)[start_idx:] + if not any(s is None for s in int_shape): + return init_tuple + int_shape + shape = backend.shape(tensor) + int_shape = list(int_shape) + for i, s in enumerate(int_shape): + if s is None: + int_shape[i] = shape[start_idx + i] + return init_tuple + tuple(int_shape) + + def _remove_timesteps(self, dims): + dims = dims.as_list() + return tf.TensorShape([dims[0]] + dims[2:]) + + def build(self, input_shape): + input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) + input_dims = tf.nest.flatten( + tf.nest.map_structure(lambda x: x.ndims, input_shape) + ) + if any(dim < 3 for dim in input_dims): + raise ValueError( + "`TimeDistributed` Layer should be passed an `input_shape ` " + f"with at least 3 dimensions, received: {input_shape}" + ) + # Don't enforce the batch or time dimension. + self.input_spec = tf.nest.map_structure( + lambda x: InputSpec(shape=[None, None] + x.as_list()[2:]), + input_shape, + ) + child_input_shape = tf.nest.map_structure( + self._remove_timesteps, input_shape + ) + child_input_shape = tf_utils.convert_shapes(child_input_shape) + super().build(tuple(child_input_shape)) + self.built = True + + def compute_output_shape(self, input_shape): + input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) + + child_input_shape = tf.nest.map_structure( + self._remove_timesteps, input_shape + ) + child_output_shape = self.layer.compute_output_shape(child_input_shape) + child_output_shape = tf_utils.convert_shapes( + child_output_shape, to_tuples=False + ) + timesteps = tf_utils.convert_shapes(input_shape) + timesteps = tf.nest.flatten(timesteps)[1] + + def insert_timesteps(dims): + dims = dims.as_list() + return tf.TensorShape([dims[0], timesteps] + dims[1:]) + + return tf.nest.map_structure(insert_timesteps, child_output_shape) + + def call(self, inputs, training=None, mask=None): + kwargs = {} + if generic_utils.has_arg(self.layer.call, "training"): + kwargs["training"] = training + + input_shape = tf.nest.map_structure( + lambda x: tf.TensorShape(backend.int_shape(x)), inputs + ) + batch_size = tf_utils.convert_shapes(input_shape) + batch_size = tf.nest.flatten(batch_size)[0] + if batch_size and not self._always_use_reshape: + inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) + is_ragged_input = row_lengths is not None + input_length = tf_utils.convert_shapes(input_shape) + input_length = tf.nest.flatten(input_length)[1] + + # batch size matters, use rnn-based implementation + def step(x, _): + output = self.layer(x, **kwargs) + return output, [] + + _, outputs, _ = backend.rnn( + step, + inputs, + initial_states=[], + input_length=row_lengths[0] + if is_ragged_input + else input_length, + mask=mask, + unroll=False, + ) + + y = tf.nest.map_structure( + lambda output: backend.maybe_convert_to_ragged( + is_ragged_input, output, row_lengths + ), + outputs, + ) + else: + # No batch size specified, therefore the layer will be able + # to process batches of any size. + # We can go with reshape-based implementation for performance. + is_ragged_input = tf.nest.map_structure( + lambda x: isinstance(x, tf.RaggedTensor), inputs + ) + is_ragged_input = tf.nest.flatten(is_ragged_input) + if all(is_ragged_input): + input_values = tf.nest.map_structure(lambda x: x.values, inputs) + input_row_lenghts = tf.nest.map_structure( + lambda x: x.nested_row_lengths()[0], inputs + ) + y = self.layer(input_values, **kwargs) + y = tf.nest.map_structure( + tf.RaggedTensor.from_row_lengths, y, input_row_lenghts + ) + elif any(is_ragged_input): + raise ValueError( + "All inputs has to be either ragged or not, " + f"but not mixed. Received: {inputs}" + ) + else: + input_length = tf_utils.convert_shapes(input_shape) + input_length = tf.nest.flatten(input_length)[1] + if not input_length: + input_length = tf.nest.map_structure( + lambda x: tf.shape(x)[1], inputs + ) + input_length = generic_utils.to_list( + tf.nest.flatten(input_length) + )[0] + + inner_input_shape = tf.nest.map_structure( + lambda x: self._get_shape_tuple((-1,), x, 2), inputs + ) + # Shape: (num_samples * timesteps, ...). And track the + # transformation in self._input_map. + inputs = tf.__internal__.nest.map_structure_up_to( + inputs, tf.reshape, inputs, inner_input_shape + ) + # (num_samples * timesteps, ...) + if ( + generic_utils.has_arg(self.layer.call, "mask") + and mask is not None + ): + inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) + kwargs["mask"] = backend.reshape(mask, inner_mask_shape) + + y = self.layer(inputs, **kwargs) + + # Reconstruct the output shape by re-splitting the 0th dimension + # back into (num_samples, timesteps, ...) + # We use batch_size when available so that the 0th dimension is + # set in the static shape of the reshaped output + reshape_batch_size = batch_size if batch_size else -1 + output_shape = tf.nest.map_structure( + lambda tensor: self._get_shape_tuple( + (reshape_batch_size, input_length), tensor, 1 + ), + y, + ) + y = tf.__internal__.nest.map_structure_up_to( + y, tf.reshape, y, output_shape + ) + + return y + + def compute_mask(self, inputs, mask=None): + """Computes an output mask tensor for Embedding layer. + + This is based on the inputs, mask, and the inner layer. + If batch size is specified: + Simply return the input `mask`. (An rnn-based implementation with + more than one rnn inputs is required but not supported in tf.keras yet.) + Otherwise we call `compute_mask` of the inner layer at each time step. + If the output mask at each time step is not `None`: + (E.g., inner layer is Masking or RNN) + Concatenate all of them and return the concatenation. + If the output mask at each time step is `None` and the input mask is not + `None`:(E.g., inner layer is Dense) + Reduce the input_mask to 2 dimensions and return it. + Otherwise (both the output mask and the input mask are `None`): + (E.g., `mask` is not used at all) + Return `None`. + + Args: + inputs: Tensor with shape [batch size, timesteps, ...] indicating the + input to TimeDistributed. If static shape information is available + for "batch size", `mask` is returned unmodified. + mask: Either None (indicating no masking) or a Tensor indicating the + input mask for TimeDistributed. The shape can be static or dynamic. + + Returns: + Either None (no masking), or a [batch size, timesteps, ...] Tensor + with an output mask for the TimeDistributed layer with the shape + beyond the second dimension being the value of the input mask shape(if + the computed output mask is none), an output mask with the shape + beyond the first dimension being the value of the mask shape(if mask + is not None) or output mask with the shape beyond the first dimension + being the value of the computed output shape. + + """ + # cases need to call the layer.compute_mask when input_mask is None: + # Masking layer and Embedding layer with mask_zero + input_shape = tf.nest.map_structure( + lambda x: tf.TensorShape(backend.int_shape(x)), inputs + ) + input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) + batch_size = tf_utils.convert_shapes(input_shape) + batch_size = tf.nest.flatten(batch_size)[0] + is_ragged_input = tf.nest.map_structure( + lambda x: isinstance(x, tf.RaggedTensor), inputs + ) + is_ragged_input = generic_utils.to_list( + tf.nest.flatten(is_ragged_input) + ) + if batch_size and not self._always_use_reshape or any(is_ragged_input): + # batch size matters, we currently do not handle mask explicitly, or + # if the layer always uses reshape approach, or the input is a + # ragged tensor. + return mask + inner_mask = mask + if inner_mask is not None: + inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) + inner_mask = backend.reshape(inner_mask, inner_mask_shape) + inner_input_shape = tf.nest.map_structure( + lambda tensor: self._get_shape_tuple((-1,), tensor, 2), inputs + ) + inner_inputs = tf.__internal__.nest.map_structure_up_to( + inputs, tf.reshape, inputs, inner_input_shape + ) + output_mask = self.layer.compute_mask(inner_inputs, inner_mask) + if output_mask is None: + if mask is None: + return None + # input_mask is not None, and output_mask is None: + # we should return a not-None mask + output_mask = mask + for _ in range(2, len(backend.int_shape(mask))): + output_mask = backend.any(output_mask, axis=-1) else: - input_shape = generic_utils.to_list(tf.nest.flatten(input_shape))[0] - output_mask_int_shape = backend.compute_output_shape(input_shape)[:-1] - output_mask_shape = self._get_shape_tuple( - (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) - output_mask = backend.reshape(output_mask, output_mask_shape) - return output_mask + # output_mask is not None. We need to reshape it + input_length = tf_utils.convert_shapes(input_shape) + input_length = tf.nest.flatten(input_length)[1] + if not input_length: + input_length = tf.nest.map_structure( + lambda x: backend.shape(x)[1], inputs + ) + input_length = tf.nest.flatten(input_length)[0] + reshape_batch_size = batch_size if batch_size else -1 + output_mask_shape = self._get_shape_tuple( + (reshape_batch_size, input_length), output_mask, 1 + ) + output_mask = backend.reshape(output_mask, output_mask_shape) + return output_mask diff --git a/keras/layers/rnn/time_distributed_test.py b/keras/layers/rnn/time_distributed_test.py index 74cce5b3a388..432fa3ad26f3 100644 --- a/keras/layers/rnn/time_distributed_test.py +++ b/keras/layers/rnn/time_distributed_test.py @@ -15,470 +15,560 @@ """Tests for TimeDistributed wrapper.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.training.tracking import util as trackable_util +# isort: off +from tensorflow.python.checkpoint import ( + checkpoint as trackable_util, +) class TimeDistributedTest(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_timedistributed_dense(self): - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(2), input_shape=(3, 4))) - model.compile(optimizer='rmsprop', loss='mse') - model.fit( - np.random.random((10, 3, 4)), - np.random.random((10, 3, 2)), - epochs=1, - batch_size=10) - - # test config - model.get_config() - - # check whether the model variables are present in the - # trackable list of objects - checkpointed_object_ids = { - id(o) for o in trackable_util.list_objects(model) - } - for v in model.variables: - self.assertIn(id(v), checkpointed_object_ids) - - def test_timedistributed_static_batch_size(self): - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(2), input_shape=(3, 4), batch_size=10)) - model.compile(optimizer='rmsprop', loss='mse') - model.fit( - np.random.random((10, 3, 4)), - np.random.random((10, 3, 2)), - epochs=1, - batch_size=10) - - def test_timedistributed_invalid_init(self): - x = tf.constant(np.zeros((1, 1)).astype('float32')) - with self.assertRaisesRegex( - ValueError, 'Please initialize `TimeDistributed` layer with a ' - '`tf.keras.layers.Layer` instance.'): - keras.layers.TimeDistributed(x) - - def test_timedistributed_conv2d(self): - with self.cached_session(): - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Conv2D(5, (2, 2), padding='same'), - input_shape=(2, 4, 4, 3))) - model.add(keras.layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - np.random.random((1, 2, 4, 4, 3)), np.random.random((1, 2, 4, 4, 5))) - - model = keras.models.model_from_json(model.to_json()) - model.summary() - - def test_timedistributed_stacked(self): - with self.cached_session(): - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(2), input_shape=(3, 4))) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - model.add(keras.layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - - model.fit( - np.random.random((10, 3, 4)), - np.random.random((10, 3, 3)), - epochs=1, - batch_size=10) - - def test_regularizers(self): - with self.cached_session(): - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(2, kernel_regularizer='l1', - activity_regularizer='l1'), - input_shape=(3, 4))) - model.add(keras.layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - self.assertEqual(len(model.losses), 2) - - def test_TimeDistributed_learning_phase(self): - with self.cached_session(): - # test layers that need learning_phase to be set - np.random.seed(1234) - x = keras.layers.Input(shape=(3, 2)) - y = keras.layers.TimeDistributed(keras.layers.Dropout(.999))( - x, training=True) - model = keras.models.Model(x, y) - y = model.predict(np.random.random((10, 3, 2))) - self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1) - - def test_TimeDistributed_batchnorm(self): - with self.cached_session(): - # test that wrapped BN updates still work. - model = keras.models.Sequential() - model.add(keras.layers.TimeDistributed( - keras.layers.BatchNormalization(center=True, scale=True), - name='bn', - input_shape=(10, 2))) - model.compile(optimizer='rmsprop', loss='mse') - # Assert that mean and variance are 0 and 1. - td = model.layers[0] - self.assertAllClose(td.get_weights()[2], np.array([0, 0])) - assert np.array_equal(td.get_weights()[3], np.array([1, 1])) - # Train - model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)), - np.broadcast_to(np.array([0, 1]), (1, 10, 2))) - # Assert that mean and variance changed. - assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) - assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) - - def test_TimeDistributed_trainable(self): - # test layers that need learning_phase to be set - x = keras.layers.Input(shape=(3, 2)) - layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization()) - _ = layer(x) - self.assertEqual(len(layer.trainable_weights), 2) - layer.trainable = False - assert not layer.trainable_weights - layer.trainable = True - assert len(layer.trainable_weights) == 2 - - def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self): - with self.cached_session(): - # test with unspecified shape and Embeddings with mask_zero - model = keras.models.Sequential() - model.add(keras.layers.TimeDistributed( - keras.layers.Embedding(5, 6, mask_zero=True), - input_shape=(None, None))) # N by t_1 by t_2 by 6 - model.add(keras.layers.TimeDistributed( - keras.layers.SimpleRNN(7, return_sequences=True))) - model.add(keras.layers.TimeDistributed( - keras.layers.SimpleRNN(8, return_sequences=False))) - model.add(keras.layers.SimpleRNN(1, return_sequences=False)) - model.compile(optimizer='rmsprop', loss='mse') - model_input = np.random.randint(low=1, high=5, size=(10, 3, 4), - dtype='int32') - for i in range(4): - model_input[i, i:, i:] = 0 - model.fit(model_input, - np.random.random((10, 1)), epochs=1, batch_size=10) - mask_outputs = [model.layers[0].compute_mask(model.input)] - for layer in model.layers[1:]: - mask_outputs.append(layer.compute_mask(layer.input, mask_outputs[-1])) - func = keras.backend.function([model.input], mask_outputs[:-1]) - mask_outputs_val = func([model_input]) - ref_mask_val_0 = model_input > 0 # embedding layer - ref_mask_val_1 = ref_mask_val_0 # first RNN layer - ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1) # second RNN layer - ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2] - for i in range(3): - self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i]) - self.assertIs(mask_outputs[-1], None) # final layer - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_TimeDistributed_with_masking_layer(self): - # test with Masking layer - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Masking(mask_value=0.,), input_shape=(None, 4))) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(5))) - model.compile(optimizer='rmsprop', loss='mse') - model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) - for i in range(4): - model_input[i, i:, :] = 0. - model.compile(optimizer='rmsprop', loss='mse') - model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) - mask_outputs = [model.layers[0].compute_mask(model.input)] - mask_outputs += [ - model.layers[1].compute_mask(model.layers[1].input, mask_outputs[-1]) - ] - func = keras.backend.function([model.input], mask_outputs) - mask_outputs_val = func([model_input]) - self.assertEqual((mask_outputs_val[0]).all(), model_input.all()) - self.assertEqual((mask_outputs_val[1]).all(), model_input.all()) - - def test_TimeDistributed_with_different_time_shapes(self): - time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) - ph_1 = keras.backend.placeholder(shape=(None, 10, 13)) - out_1 = time_dist(ph_1) - self.assertEqual(out_1.shape.as_list(), [None, 10, 5]) - - ph_2 = keras.backend.placeholder(shape=(None, 1, 13)) - out_2 = time_dist(ph_2) - self.assertEqual(out_2.shape.as_list(), [None, 1, 5]) - - ph_3 = keras.backend.placeholder(shape=(None, 1, 18)) - with self.assertRaisesRegex(ValueError, 'is incompatible with'): - time_dist(ph_3) - - def test_TimeDistributed_with_invalid_dimensions(self): - time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) - ph = keras.backend.placeholder(shape=(None, 10)) - with self.assertRaisesRegex( - ValueError, - '`TimeDistributed` Layer should be passed an `input_shape `'): - time_dist(ph) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_TimeDistributed_reshape(self): - - class NoReshapeLayer(keras.layers.Layer): - - def call(self, inputs): - return inputs - - # Built-in layers that aren't stateful use the reshape implementation. - td1 = keras.layers.TimeDistributed(keras.layers.Dense(5)) - self.assertTrue(td1._always_use_reshape) - - # Built-in layers that are stateful don't use the reshape implementation. - td2 = keras.layers.TimeDistributed( - keras.layers.RNN(keras.layers.SimpleRNNCell(10), stateful=True)) - self.assertFalse(td2._always_use_reshape) - - # Custom layers are not allowlisted for the fast reshape implementation. - td3 = keras.layers.TimeDistributed(NoReshapeLayer()) - self.assertFalse(td3._always_use_reshape) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_TimeDistributed_output_shape_return_types(self): - - class TestLayer(keras.layers.Layer): - - def call(self, inputs): - return tf.concat([inputs, inputs], axis=-1) - - def compute_output_shape(self, input_shape): - output_shape = tf.TensorShape(input_shape).as_list() - output_shape[-1] = output_shape[-1] * 2 - output_shape = tf.TensorShape(output_shape) - return output_shape - - class TestListLayer(TestLayer): - - def compute_output_shape(self, input_shape): - shape = super().compute_output_shape(input_shape) - return shape.as_list() - - class TestTupleLayer(TestLayer): - - def compute_output_shape(self, input_shape): - shape = super().compute_output_shape(input_shape) - return tuple(shape.as_list()) - - # Layers can specify output shape as list/tuple/TensorShape - test_layers = [TestLayer, TestListLayer, TestTupleLayer] - for layer in test_layers: - input_layer = keras.layers.TimeDistributed(layer()) - inputs = keras.backend.placeholder(shape=(None, 2, 4)) - output = input_layer(inputs) - self.assertEqual(output.shape.as_list(), [None, 2, 8]) - self.assertEqual( - input_layer.compute_output_shape([None, 2, 4]).as_list(), - [None, 2, 8]) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - # TODO(scottzhu): check why v1 session failed. - def test_TimeDistributed_with_mask_first_implementation(self): - np.random.seed(100) - rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True) - - data = np.array([[[[1.0], [1.0]], [[0.0], [1.0]]], - [[[1.0], [0.0]], [[1.0], [1.0]]], - [[[1.0], [0.0]], [[1.0], [1.0]]]]) - x = keras.layers.Input(shape=(2, 2, 1), batch_size=3) - x_masking = keras.layers.Masking()(x) - y = keras.layers.TimeDistributed(rnn_layer)(x_masking) - model_1 = keras.models.Model(x, y) - model_1.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - output_with_mask = model_1.predict(data, steps=1) - - y = keras.layers.TimeDistributed(rnn_layer)(x) - model_2 = keras.models.Model(x, y) - model_2.compile( - 'rmsprop', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - output = model_2.predict(data, steps=1) - - self.assertNotAllClose(output_with_mask, output, atol=1e-7) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - *test_utils.generate_combinations_with_testcase_name( - layer=[keras.layers.LSTM, - keras.layers.Dense])) - def test_TimeDistributed_with_ragged_input(self, layer): - if tf.executing_eagerly(): - self.skipTest('b/143103634') - np.random.seed(100) - layer = layer(4) - ragged_data = tf.ragged.constant( - [[[[1.0], [1.0]], [[2.0], [2.0]]], - [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], - [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]]], - ragged_rank=1) - - x_ragged = keras.Input(shape=(None, 2, 1), dtype='float32', ragged=True) - y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) - model_1 = keras.models.Model(x_ragged, y_ragged) - model_1._run_eagerly = test_utils.should_run_eagerly() - output_ragged = model_1.predict(ragged_data, steps=1) - - x_dense = keras.Input(shape=(None, 2, 1), dtype='float32') - masking = keras.layers.Masking()(x_dense) - y_dense = keras.layers.TimeDistributed(layer)(masking) - model_2 = keras.models.Model(x_dense, y_dense) - dense_data = ragged_data.to_tensor() - model_2._run_eagerly = test_utils.should_run_eagerly() - output_dense = model_2.predict(dense_data, steps=1) - - output_ragged = convert_ragged_tensor_value(output_ragged) - self.assertAllEqual(output_ragged.to_tensor(), output_dense) - - @test_combinations.run_all_keras_modes - def test_TimeDistributed_with_ragged_input_with_batch_size(self): - np.random.seed(100) - layer = keras.layers.Dense(16) - - ragged_data = tf.ragged.constant( - [[[[1.0], [1.0]], [[2.0], [2.0]]], - [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], - [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]]], - ragged_rank=1) - - # Use the first implementation by specifying batch_size - x_ragged = keras.Input(shape=(None, 2, 1), batch_size=3, dtype='float32', - ragged=True) - y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) - model_1 = keras.models.Model(x_ragged, y_ragged) - output_ragged = model_1.predict(ragged_data, steps=1) - - x_dense = keras.Input(shape=(None, 2, 1), batch_size=3, dtype='float32') - masking = keras.layers.Masking()(x_dense) - y_dense = keras.layers.TimeDistributed(layer)(masking) - model_2 = keras.models.Model(x_dense, y_dense) - dense_data = ragged_data.to_tensor() - output_dense = model_2.predict(dense_data, steps=1) - - output_ragged = convert_ragged_tensor_value(output_ragged) - self.assertAllEqual(output_ragged.to_tensor(), output_dense) - - def test_TimeDistributed_set_static_shape(self): - layer = keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3))) - inputs = keras.Input(batch_shape=(1, None, 32, 32, 1)) - outputs = layer(inputs) - # Make sure the batch dim is not lost after array_ops.reshape. - self.assertListEqual(outputs.shape.as_list(), [1, None, 30, 30, 16]) - - @test_combinations.run_all_keras_modes - def test_TimeDistributed_with_mimo(self): - dense_1 = keras.layers.Dense(8) - dense_2 = keras.layers.Dense(16) - - class TestLayer(keras.layers.Layer): - - def __init__(self): - super().__init__() - self.dense_1 = dense_1 - self.dense_2 = dense_2 - - def call(self, inputs): - return self.dense_1(inputs[0]), self.dense_2(inputs[1]) - - def compute_output_shape(self, input_shape): - output_shape_1 = self.dense_1.compute_output_shape(input_shape[0]) - output_shape_2 = self.dense_2.compute_output_shape(input_shape[1]) - return output_shape_1, output_shape_2 - - np.random.seed(100) - layer = TestLayer() - - data_1 = tf.constant([[[[1.0], [1.0]], [[2.0], [2.0]]], - [[[4.0], [4.0]], [[5.0], [5.0]]], - [[[7.0], [7.0]], [[8.0], [8.0]]]]) - - data_2 = tf.constant([[[[1.0], [1.0]], [[2.0], [2.0]]], - [[[4.0], [4.0]], [[5.0], [5.0]]], - [[[7.0], [7.0]], [[8.0], [8.0]]]]) - - x1 = keras.Input(shape=(None, 2, 1), dtype='float32') - x2 = keras.Input(shape=(None, 2, 1), dtype='float32') - y1, y2 = keras.layers.TimeDistributed(layer)([x1, x2]) - model_1 = keras.models.Model([x1, x2], [y1, y2]) - model_1.compile( - optimizer='rmsprop', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - output_1 = model_1.predict((data_1, data_2), steps=1) - - y1 = dense_1(x1) - y2 = dense_2(x2) - model_2 = keras.models.Model([x1, x2], [y1, y2]) - output_2 = model_2.predict((data_1, data_2), steps=1) - - self.assertAllClose(output_1, output_2) - - model_1.fit( - x=[np.random.random((10, 2, 2, 1)), - np.random.random((10, 2, 2, 1))], - y=[np.random.random((10, 2, 2, 8)), - np.random.random((10, 2, 2, 16))], - epochs=1, - batch_size=3) - - def test_TimeDistributed_Attention(self): - query_input = keras.layers.Input(shape=(None, 1, 10), dtype='float32') - value_input = keras.layers.Input(shape=(None, 4, 10), dtype='float32') - - # Query-value attention of shape [batch_size, Tq, filters]. - query_value_attention_seq = keras.layers.TimeDistributed( - keras.layers.Attention())([query_input, value_input]) - model = keras.models.Model([query_input, value_input], - query_value_attention_seq) - model.compile(optimizer='rmsprop', loss='mse') - model.fit( - [np.random.random((10, 8, 1, 10)), - np.random.random((10, 8, 4, 10))], - np.random.random((10, 8, 1, 10)), - epochs=1, - batch_size=10) - - # test config and serialization/deserialization - model.get_config() - model = keras.models.model_from_json(model.to_json()) - model.summary() + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_timedistributed_dense(self): + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(2), input_shape=(3, 4) + ) + ) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + np.random.random((10, 3, 4)), + np.random.random((10, 3, 2)), + epochs=1, + batch_size=10, + ) + + # test config + model.get_config() + + # check whether the model variables are present in the + # trackable list of objects + checkpointed_object_ids = { + id(o) for o in trackable_util.list_objects(model) + } + for v in model.variables: + self.assertIn(id(v), checkpointed_object_ids) + + def test_timedistributed_static_batch_size(self): + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(2), input_shape=(3, 4), batch_size=10 + ) + ) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + np.random.random((10, 3, 4)), + np.random.random((10, 3, 2)), + epochs=1, + batch_size=10, + ) + + def test_timedistributed_invalid_init(self): + x = tf.constant(np.zeros((1, 1)).astype("float32")) + with self.assertRaisesRegex( + ValueError, + "Please initialize `TimeDistributed` layer with a " + "`tf.keras.layers.Layer` instance.", + ): + keras.layers.TimeDistributed(x) + + def test_timedistributed_conv2d(self): + with self.cached_session(): + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Conv2D(5, (2, 2), padding="same"), + input_shape=(2, 4, 4, 3), + ) + ) + model.add(keras.layers.Activation("relu")) + model.compile(optimizer="rmsprop", loss="mse") + model.train_on_batch( + np.random.random((1, 2, 4, 4, 3)), + np.random.random((1, 2, 4, 4, 5)), + ) + + model = keras.models.model_from_json(model.to_json()) + model.summary() + + def test_timedistributed_stacked(self): + with self.cached_session(): + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(2), input_shape=(3, 4) + ) + ) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + model.add(keras.layers.Activation("relu")) + model.compile(optimizer="rmsprop", loss="mse") + + model.fit( + np.random.random((10, 3, 4)), + np.random.random((10, 3, 3)), + epochs=1, + batch_size=10, + ) + + def test_regularizers(self): + with self.cached_session(): + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense( + 2, kernel_regularizer="l1", activity_regularizer="l1" + ), + input_shape=(3, 4), + ) + ) + model.add(keras.layers.Activation("relu")) + model.compile(optimizer="rmsprop", loss="mse") + self.assertEqual(len(model.losses), 2) + + def test_TimeDistributed_learning_phase(self): + with self.cached_session(): + keras.utils.set_random_seed(0) + x = keras.layers.Input(shape=(3, 2)) + y = keras.layers.TimeDistributed(keras.layers.Dropout(0.999))( + x, training=True + ) + model = keras.models.Model(x, y) + y = model.predict(np.random.random((10, 3, 2))) + self.assertAllClose(np.mean(y), 0.0, atol=1e-1, rtol=1e-1) + + def test_TimeDistributed_batchnorm(self): + with self.cached_session(): + # test that wrapped BN updates still work. + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.BatchNormalization(center=True, scale=True), + name="bn", + input_shape=(10, 2), + ) + ) + model.compile(optimizer="rmsprop", loss="mse") + # Assert that mean and variance are 0 and 1. + td = model.layers[0] + self.assertAllClose(td.get_weights()[2], np.array([0, 0])) + assert np.array_equal(td.get_weights()[3], np.array([1, 1])) + # Train + model.train_on_batch( + np.random.normal(loc=2, scale=2, size=(1, 10, 2)), + np.broadcast_to(np.array([0, 1]), (1, 10, 2)), + ) + # Assert that mean and variance changed. + assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) + assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) + + def test_TimeDistributed_trainable(self): + # test layers that need learning_phase to be set + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization()) + _ = layer(x) + self.assertEqual(len(layer.trainable_weights), 2) + layer.trainable = False + assert not layer.trainable_weights + layer.trainable = True + assert len(layer.trainable_weights) == 2 + + def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self): + with self.cached_session(): + # test with unspecified shape and Embeddings with mask_zero + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Embedding(5, 6, mask_zero=True), + input_shape=(None, None), + ) + ) # N by t_1 by t_2 by 6 + model.add( + keras.layers.TimeDistributed( + keras.layers.SimpleRNN(7, return_sequences=True) + ) + ) + model.add( + keras.layers.TimeDistributed( + keras.layers.SimpleRNN(8, return_sequences=False) + ) + ) + model.add(keras.layers.SimpleRNN(1, return_sequences=False)) + model.compile(optimizer="rmsprop", loss="mse") + model_input = np.random.randint( + low=1, high=5, size=(10, 3, 4), dtype="int32" + ) + for i in range(4): + model_input[i, i:, i:] = 0 + model.fit( + model_input, np.random.random((10, 1)), epochs=1, batch_size=10 + ) + mask_outputs = [model.layers[0].compute_mask(model.input)] + for layer in model.layers[1:]: + mask_outputs.append( + layer.compute_mask(layer.input, mask_outputs[-1]) + ) + func = keras.backend.function([model.input], mask_outputs[:-1]) + mask_outputs_val = func([model_input]) + ref_mask_val_0 = model_input > 0 # embedding layer + ref_mask_val_1 = ref_mask_val_0 # first RNN layer + ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1) # second RNN layer + ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2] + for i in range(3): + self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i]) + self.assertIs(mask_outputs[-1], None) # final layer + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_TimeDistributed_with_masking_layer(self): + # test with Masking layer + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Masking( + mask_value=0.0, + ), + input_shape=(None, 4), + ) + ) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(5))) + model.compile(optimizer="rmsprop", loss="mse") + model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) + for i in range(4): + model_input[i, i:, :] = 0.0 + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6 + ) + mask_outputs = [model.layers[0].compute_mask(model.input)] + mask_outputs += [ + model.layers[1].compute_mask( + model.layers[1].input, mask_outputs[-1] + ) + ] + func = keras.backend.function([model.input], mask_outputs) + mask_outputs_val = func([model_input]) + self.assertEqual((mask_outputs_val[0]).all(), model_input.all()) + self.assertEqual((mask_outputs_val[1]).all(), model_input.all()) + + def test_TimeDistributed_with_different_time_shapes(self): + time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) + ph_1 = keras.backend.placeholder(shape=(None, 10, 13)) + out_1 = time_dist(ph_1) + self.assertEqual(out_1.shape.as_list(), [None, 10, 5]) + + ph_2 = keras.backend.placeholder(shape=(None, 1, 13)) + out_2 = time_dist(ph_2) + self.assertEqual(out_2.shape.as_list(), [None, 1, 5]) + + ph_3 = keras.backend.placeholder(shape=(None, 1, 18)) + with self.assertRaisesRegex(ValueError, "is incompatible with"): + time_dist(ph_3) + + def test_TimeDistributed_with_invalid_dimensions(self): + time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) + ph = keras.backend.placeholder(shape=(None, 10)) + with self.assertRaisesRegex( + ValueError, + "`TimeDistributed` Layer should be passed an `input_shape `", + ): + time_dist(ph) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_TimeDistributed_reshape(self): + class NoReshapeLayer(keras.layers.Layer): + def call(self, inputs): + return inputs + + # Built-in layers that aren't stateful use the reshape implementation. + td1 = keras.layers.TimeDistributed(keras.layers.Dense(5)) + self.assertTrue(td1._always_use_reshape) + + # Built-in layers that are stateful don't use the reshape + # implementation. + td2 = keras.layers.TimeDistributed( + keras.layers.RNN(keras.layers.SimpleRNNCell(10), stateful=True) + ) + self.assertFalse(td2._always_use_reshape) + + # Custom layers are not allowlisted for the fast reshape implementation. + td3 = keras.layers.TimeDistributed(NoReshapeLayer()) + self.assertFalse(td3._always_use_reshape) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + ("fully_defined", [3, 2, 4], [3, 2, 8]), + ("dynamic_batch_size", [None, 2, 4], [None, 2, 8]), + ("two_dynamic_dims", [None, None, 4], [None, None, 8]), + ("rank_only", [None, None, None], [None, None, None]), + ) + def test_TimeDistributed_output_shape_return_types( + self, input_shape, expected_output_shape + ): + class TestLayer(keras.layers.Layer): + def call(self, inputs): + return tf.concat([inputs, inputs], axis=-1) + + def compute_output_shape(self, input_shape): + output_shape = tf.TensorShape(input_shape).as_list() + if output_shape[-1] is not None: + output_shape[-1] = output_shape[-1] * 2 + output_shape = tf.TensorShape(output_shape) + return output_shape + + class TestListLayer(TestLayer): + def compute_output_shape(self, input_shape): + shape = super().compute_output_shape(input_shape) + return shape.as_list() + + class TestTupleLayer(TestLayer): + def compute_output_shape(self, input_shape): + shape = super().compute_output_shape(input_shape) + return tuple(shape.as_list()) + + # Layers can specify output shape as list/tuple/TensorShape + test_layers = [TestLayer, TestListLayer, TestTupleLayer] + for layer in test_layers: + input_layer = keras.layers.TimeDistributed(layer()) + inputs = keras.backend.placeholder(shape=input_shape) + output = input_layer(inputs) + self.assertEqual(output.shape.as_list(), expected_output_shape) + self.assertEqual( + input_layer.compute_output_shape(input_shape).as_list(), + expected_output_shape, + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + # TODO(scottzhu): check why v1 session failed. + def test_TimeDistributed_with_mask_first_implementation(self): + np.random.seed(100) + rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True) + + data = np.array( + [ + [[[1.0], [1.0]], [[0.0], [1.0]]], + [[[1.0], [0.0]], [[1.0], [1.0]]], + [[[1.0], [0.0]], [[1.0], [1.0]]], + ] + ) + x = keras.layers.Input(shape=(2, 2, 1), batch_size=3) + x_masking = keras.layers.Masking()(x) + y = keras.layers.TimeDistributed(rnn_layer)(x_masking) + model_1 = keras.models.Model(x, y) + model_1.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + output_with_mask = model_1.predict(data, steps=1) + + y = keras.layers.TimeDistributed(rnn_layer)(x) + model_2 = keras.models.Model(x, y) + model_2.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + output = model_2.predict(data, steps=1) + + self.assertNotAllClose(output_with_mask, output, atol=1e-7) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + *test_utils.generate_combinations_with_testcase_name( + layer=[keras.layers.LSTM, keras.layers.Dense] + ) + ) + def test_TimeDistributed_with_ragged_input(self, layer): + if tf.executing_eagerly(): + self.skipTest("b/143103634") + np.random.seed(100) + layer = layer(4) + ragged_data = tf.ragged.constant( + [ + [[[1.0], [1.0]], [[2.0], [2.0]]], + [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], + [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]], + ], + ragged_rank=1, + ) + + x_ragged = keras.Input(shape=(None, 2, 1), dtype="float32", ragged=True) + y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) + model_1 = keras.models.Model(x_ragged, y_ragged) + model_1._run_eagerly = test_utils.should_run_eagerly() + output_ragged = model_1.predict(ragged_data, steps=1) + + x_dense = keras.Input(shape=(None, 2, 1), dtype="float32") + masking = keras.layers.Masking()(x_dense) + y_dense = keras.layers.TimeDistributed(layer)(masking) + model_2 = keras.models.Model(x_dense, y_dense) + dense_data = ragged_data.to_tensor() + model_2._run_eagerly = test_utils.should_run_eagerly() + output_dense = model_2.predict(dense_data, steps=1) + + output_ragged = convert_ragged_tensor_value(output_ragged) + self.assertAllEqual(output_ragged.to_tensor(), output_dense) + + @test_combinations.run_all_keras_modes + def test_TimeDistributed_with_ragged_input_with_batch_size(self): + np.random.seed(100) + layer = keras.layers.Dense(16) + + ragged_data = tf.ragged.constant( + [ + [[[1.0], [1.0]], [[2.0], [2.0]]], + [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], + [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]], + ], + ragged_rank=1, + ) + + # Use the first implementation by specifying batch_size + x_ragged = keras.Input( + shape=(None, 2, 1), batch_size=3, dtype="float32", ragged=True + ) + y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) + model_1 = keras.models.Model(x_ragged, y_ragged) + output_ragged = model_1.predict(ragged_data, steps=1) + + x_dense = keras.Input(shape=(None, 2, 1), batch_size=3, dtype="float32") + masking = keras.layers.Masking()(x_dense) + y_dense = keras.layers.TimeDistributed(layer)(masking) + model_2 = keras.models.Model(x_dense, y_dense) + dense_data = ragged_data.to_tensor() + output_dense = model_2.predict(dense_data, steps=1) + + output_ragged = convert_ragged_tensor_value(output_ragged) + self.assertAllEqual(output_ragged.to_tensor(), output_dense) + + def test_TimeDistributed_set_static_shape(self): + layer = keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3))) + inputs = keras.Input(batch_shape=(1, None, 32, 32, 1)) + outputs = layer(inputs) + # Make sure the batch dim is not lost after array_ops.reshape. + self.assertListEqual(outputs.shape.as_list(), [1, None, 30, 30, 16]) + + @test_combinations.run_all_keras_modes + def test_TimeDistributed_with_mimo(self): + dense_1 = keras.layers.Dense(8) + dense_2 = keras.layers.Dense(16) + + class TestLayer(keras.layers.Layer): + def __init__(self): + super().__init__() + self.dense_1 = dense_1 + self.dense_2 = dense_2 + + def call(self, inputs): + return self.dense_1(inputs[0]), self.dense_2(inputs[1]) + + def compute_output_shape(self, input_shape): + output_shape_1 = self.dense_1.compute_output_shape( + input_shape[0] + ) + output_shape_2 = self.dense_2.compute_output_shape( + input_shape[1] + ) + return output_shape_1, output_shape_2 + + np.random.seed(100) + layer = TestLayer() + + data_1 = tf.constant( + [ + [[[1.0], [1.0]], [[2.0], [2.0]]], + [[[4.0], [4.0]], [[5.0], [5.0]]], + [[[7.0], [7.0]], [[8.0], [8.0]]], + ] + ) + + data_2 = tf.constant( + [ + [[[1.0], [1.0]], [[2.0], [2.0]]], + [[[4.0], [4.0]], [[5.0], [5.0]]], + [[[7.0], [7.0]], [[8.0], [8.0]]], + ] + ) + + x1 = keras.Input(shape=(None, 2, 1), dtype="float32") + x2 = keras.Input(shape=(None, 2, 1), dtype="float32") + y1, y2 = keras.layers.TimeDistributed(layer)([x1, x2]) + model_1 = keras.models.Model([x1, x2], [y1, y2]) + model_1.compile( + optimizer="rmsprop", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + output_1 = model_1.predict((data_1, data_2), steps=1) + + y1 = dense_1(x1) + y2 = dense_2(x2) + model_2 = keras.models.Model([x1, x2], [y1, y2]) + output_2 = model_2.predict((data_1, data_2), steps=1) + + self.assertAllClose(output_1, output_2) + + model_1.fit( + x=[ + np.random.random((10, 2, 2, 1)), + np.random.random((10, 2, 2, 1)), + ], + y=[ + np.random.random((10, 2, 2, 8)), + np.random.random((10, 2, 2, 16)), + ], + epochs=1, + batch_size=3, + ) + + def test_TimeDistributed_Attention(self): + query_input = keras.layers.Input(shape=(None, 1, 10), dtype="float32") + value_input = keras.layers.Input(shape=(None, 4, 10), dtype="float32") + + # Query-value attention of shape [batch_size, Tq, filters]. + query_value_attention_seq = keras.layers.TimeDistributed( + keras.layers.Attention() + )([query_input, value_input]) + model = keras.models.Model( + [query_input, value_input], query_value_attention_seq + ) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + [ + np.random.random((10, 8, 1, 10)), + np.random.random((10, 8, 4, 10)), + ], + np.random.random((10, 8, 1, 10)), + epochs=1, + batch_size=10, + ) + + # test config and serialization/deserialization + model.get_config() + model = keras.models.model_from_json(model.to_json()) + model.summary() def convert_ragged_tensor_value(inputs): - if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): - flat_values = tf.convert_to_tensor( - value=inputs.flat_values, - name='flat_values') - return tf.RaggedTensor.from_nested_row_splits( - flat_values, inputs.nested_row_splits, validate=False) - return inputs - - -if __name__ == '__main__': - tf.test.main() + if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): + flat_values = tf.convert_to_tensor( + value=inputs.flat_values, name="flat_values" + ) + return tf.RaggedTensor.from_nested_row_splits( + flat_values, inputs.nested_row_splits, validate=False + ) + return inputs + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/serialization.py b/keras/layers/serialization.py index f0f3b6629bfe..e35761b5b273 100644 --- a/keras/layers/serialization.py +++ b/keras/layers/serialization.py @@ -14,10 +14,10 @@ # ============================================================================== """Layer serialization/deserialization functions.""" +import threading + import tensorflow.compat.v2 as tf -# pylint: disable=g-bad-import-order,g-direct-tensorflow-import,unused-import,wildcard-import -import threading from keras.engine import base_layer from keras.engine import input_layer from keras.engine import input_spec @@ -27,199 +27,273 @@ from keras.layers import core from keras.layers import locally_connected from keras.layers import merging -from keras.layers import noise from keras.layers import pooling from keras.layers import regularization from keras.layers import reshaping from keras.layers import rnn -from keras.layers.rnn import cell_wrappers -from keras.layers.rnn import gru -from keras.layers.rnn import lstm from keras.layers.normalization import batch_normalization from keras.layers.normalization import batch_normalization_v1 +from keras.layers.normalization import group_normalization from keras.layers.normalization import layer_normalization from keras.layers.normalization import unit_normalization from keras.layers.preprocessing import category_encoding from keras.layers.preprocessing import discretization -from keras.layers.preprocessing import hashing from keras.layers.preprocessing import hashed_crossing +from keras.layers.preprocessing import hashing from keras.layers.preprocessing import image_preprocessing from keras.layers.preprocessing import integer_lookup -from keras.layers.preprocessing import normalization as preprocessing_normalization +from keras.layers.preprocessing import ( + normalization as preprocessing_normalization, +) from keras.layers.preprocessing import string_lookup from keras.layers.preprocessing import text_vectorization -from keras.saving.saved_model import json_utils +from keras.layers.rnn import cell_wrappers +from keras.layers.rnn import gru +from keras.layers.rnn import lstm +from keras.metrics import base_metric +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.legacy.saved_model import json_utils from keras.utils import generic_utils from keras.utils import tf_inspect as inspect + +# isort: off from tensorflow.python.util.tf_export import keras_export -ALL_MODULES = (base_layer, input_layer, activation, attention, convolutional, - core, locally_connected, merging, batch_normalization_v1, - layer_normalization, unit_normalization, pooling, - image_preprocessing, regularization, reshaping, rnn, hashing, - hashed_crossing, category_encoding, discretization, - integer_lookup, preprocessing_normalization, string_lookup, - text_vectorization) -ALL_V2_MODULES = (batch_normalization, layer_normalization, cell_wrappers, gru, - lstm) +ALL_MODULES = ( + base_layer, + input_layer, + activation, + attention, + convolutional, + core, + locally_connected, + merging, + batch_normalization_v1, + group_normalization, + layer_normalization, + unit_normalization, + pooling, + image_preprocessing, + regularization, + reshaping, + rnn, + hashing, + hashed_crossing, + category_encoding, + discretization, + integer_lookup, + preprocessing_normalization, + string_lookup, + text_vectorization, +) +ALL_V2_MODULES = ( + batch_normalization, + layer_normalization, + cell_wrappers, + gru, + lstm, +) # ALL_OBJECTS is meant to be a global mutable. Hence we need to make it # thread-local to avoid concurrent mutations. LOCAL = threading.local() def populate_deserializable_objects(): - """Populates dict ALL_OBJECTS with every built-in layer.""" - global LOCAL - if not hasattr(LOCAL, 'ALL_OBJECTS'): - LOCAL.ALL_OBJECTS = {} - LOCAL.GENERATED_WITH_V2 = None - - if LOCAL.ALL_OBJECTS and LOCAL.GENERATED_WITH_V2 == tf.__internal__.tf2.enabled( - ): - # Objects dict is already generated for the proper TF version: - # do nothing. - return + """Populates dict ALL_OBJECTS with every built-in layer.""" + global LOCAL + if not hasattr(LOCAL, "ALL_OBJECTS"): + LOCAL.ALL_OBJECTS = {} + LOCAL.GENERATED_WITH_V2 = None - LOCAL.ALL_OBJECTS = {} - LOCAL.GENERATED_WITH_V2 = tf.__internal__.tf2.enabled() + if ( + LOCAL.ALL_OBJECTS + and LOCAL.GENERATED_WITH_V2 == tf.__internal__.tf2.enabled() + ): + # Objects dict is already generated for the proper TF version: + # do nothing. + return - base_cls = base_layer.Layer - generic_utils.populate_dict_with_module_objects( - LOCAL.ALL_OBJECTS, - ALL_MODULES, - obj_filter=lambda x: inspect.isclass(x) and issubclass(x, base_cls)) + LOCAL.ALL_OBJECTS = {} + LOCAL.GENERATED_WITH_V2 = tf.__internal__.tf2.enabled() - # Overwrite certain V1 objects with V2 versions - if tf.__internal__.tf2.enabled(): + base_cls = base_layer.Layer generic_utils.populate_dict_with_module_objects( LOCAL.ALL_OBJECTS, - ALL_V2_MODULES, - obj_filter=lambda x: inspect.isclass(x) and issubclass(x, base_cls)) - - # These deserialization aliases are added for backward compatibility, - # as in TF 1.13, "BatchNormalizationV1" and "BatchNormalizationV2" - # were used as class name for v1 and v2 version of BatchNormalization, - # respectively. Here we explicitly convert them to their canonical names. - LOCAL.ALL_OBJECTS[ - 'BatchNormalizationV1'] = batch_normalization_v1.BatchNormalization - LOCAL.ALL_OBJECTS[ - 'BatchNormalizationV2'] = batch_normalization.BatchNormalization - - # Prevent circular dependencies. - from keras import models # pylint: disable=g-import-not-at-top - from keras.premade_models.linear import LinearModel # pylint: disable=g-import-not-at-top - from keras.premade_models.wide_deep import WideDeepModel # pylint: disable=g-import-not-at-top - from keras.feature_column.sequence_feature_column import SequenceFeatures # pylint: disable=g-import-not-at-top - - LOCAL.ALL_OBJECTS['Input'] = input_layer.Input - LOCAL.ALL_OBJECTS['InputSpec'] = input_spec.InputSpec - LOCAL.ALL_OBJECTS['Functional'] = models.Functional - LOCAL.ALL_OBJECTS['Model'] = models.Model - LOCAL.ALL_OBJECTS['SequenceFeatures'] = SequenceFeatures - LOCAL.ALL_OBJECTS['Sequential'] = models.Sequential - LOCAL.ALL_OBJECTS['LinearModel'] = LinearModel - LOCAL.ALL_OBJECTS['WideDeepModel'] = WideDeepModel - - if tf.__internal__.tf2.enabled(): - from keras.feature_column.dense_features_v2 import DenseFeatures # pylint: disable=g-import-not-at-top - LOCAL.ALL_OBJECTS['DenseFeatures'] = DenseFeatures - else: - from keras.feature_column.dense_features import DenseFeatures # pylint: disable=g-import-not-at-top - LOCAL.ALL_OBJECTS['DenseFeatures'] = DenseFeatures - - # Merging layers, function versions. - LOCAL.ALL_OBJECTS['add'] = merging.add - LOCAL.ALL_OBJECTS['subtract'] = merging.subtract - LOCAL.ALL_OBJECTS['multiply'] = merging.multiply - LOCAL.ALL_OBJECTS['average'] = merging.average - LOCAL.ALL_OBJECTS['maximum'] = merging.maximum - LOCAL.ALL_OBJECTS['minimum'] = merging.minimum - LOCAL.ALL_OBJECTS['concatenate'] = merging.concatenate - LOCAL.ALL_OBJECTS['dot'] = merging.dot - - -@keras_export('keras.layers.serialize') -def serialize(layer): - """Serializes a `Layer` object into a JSON-compatible representation. - - Args: - layer: The `Layer` object to serialize. - - Returns: - A JSON-serializable dict representing the object's config. - - Example: - - ```python - from pprint import pprint - model = tf.keras.models.Sequential() - model.add(tf.keras.Input(shape=(16,))) - model.add(tf.keras.layers.Dense(32, activation='relu')) - - pprint(tf.keras.layers.serialize(model)) - # prints the configuration of the model, as a dict. - """ - return generic_utils.serialize_keras_object(layer) - - -@keras_export('keras.layers.deserialize') -def deserialize(config, custom_objects=None): - """Instantiates a layer from a config dictionary. - - Args: - config: dict of the form {'class_name': str, 'config': dict} - custom_objects: dict mapping class names (or function names) of custom - (non-Keras) objects to class/functions - - Returns: - Layer instance (may be Model, Sequential, Network, Layer...) - - Example: - - ```python - # Configuration of Dense(32, activation='relu') - config = { - 'class_name': 'Dense', - 'config': { - 'activation': 'relu', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': {'class_name': 'GlorotUniform', - 'config': {'seed': None}}, - 'kernel_regularizer': None, - 'name': 'dense', - 'trainable': True, - 'units': 32, - 'use_bias': True + ALL_MODULES, + obj_filter=lambda x: inspect.isclass(x) and issubclass(x, base_cls), + ) + + # Overwrite certain V1 objects with V2 versions + if tf.__internal__.tf2.enabled(): + generic_utils.populate_dict_with_module_objects( + LOCAL.ALL_OBJECTS, + ALL_V2_MODULES, + obj_filter=lambda x: inspect.isclass(x) and issubclass(x, base_cls), + ) + + # These deserialization aliases are added for backward compatibility, + # as in TF 1.13, "BatchNormalizationV1" and "BatchNormalizationV2" + # were used as class name for v1 and v2 version of BatchNormalization, + # respectively. Here we explicitly convert them to their canonical names. + LOCAL.ALL_OBJECTS[ + "BatchNormalizationV1" + ] = batch_normalization_v1.BatchNormalization + LOCAL.ALL_OBJECTS[ + "BatchNormalizationV2" + ] = batch_normalization.BatchNormalization + + # Prevent circular dependencies. + from keras import models + from keras.feature_column.sequence_feature_column import ( + SequenceFeatures, + ) + from keras.premade_models.linear import ( + LinearModel, + ) + from keras.premade_models.wide_deep import ( + WideDeepModel, + ) + + LOCAL.ALL_OBJECTS["Input"] = input_layer.Input + LOCAL.ALL_OBJECTS["InputSpec"] = input_spec.InputSpec + LOCAL.ALL_OBJECTS["Functional"] = models.Functional + LOCAL.ALL_OBJECTS["Model"] = models.Model + LOCAL.ALL_OBJECTS["SequenceFeatures"] = SequenceFeatures + LOCAL.ALL_OBJECTS["Sequential"] = models.Sequential + LOCAL.ALL_OBJECTS["LinearModel"] = LinearModel + LOCAL.ALL_OBJECTS["WideDeepModel"] = WideDeepModel + + if tf.__internal__.tf2.enabled(): + from keras.feature_column.dense_features_v2 import ( + DenseFeatures, + ) + + LOCAL.ALL_OBJECTS["DenseFeatures"] = DenseFeatures + else: + from keras.feature_column.dense_features import ( + DenseFeatures, + ) + + LOCAL.ALL_OBJECTS["DenseFeatures"] = DenseFeatures + + # Merging layers, function versions. + LOCAL.ALL_OBJECTS["add"] = merging.add + LOCAL.ALL_OBJECTS["subtract"] = merging.subtract + LOCAL.ALL_OBJECTS["multiply"] = merging.multiply + LOCAL.ALL_OBJECTS["average"] = merging.average + LOCAL.ALL_OBJECTS["maximum"] = merging.maximum + LOCAL.ALL_OBJECTS["minimum"] = merging.minimum + LOCAL.ALL_OBJECTS["concatenate"] = merging.concatenate + LOCAL.ALL_OBJECTS["dot"] = merging.dot + + +@keras_export("keras.layers.serialize") +def serialize(layer, use_legacy_format=False): + """Serializes a `Layer` object into a JSON-compatible representation. + + Args: + layer: The `Layer` object to serialize. + + Returns: + A JSON-serializable dict representing the object's config. + + Example: + + ```python + from pprint import pprint + model = tf.keras.models.Sequential() + model.add(tf.keras.Input(shape=(16,))) + model.add(tf.keras.layers.Dense(32, activation='relu')) + + pprint(tf.keras.layers.serialize(model)) + # prints the configuration of the model, as a dict. + """ + if isinstance(layer, base_metric.Metric): + raise ValueError( + f"Cannot serialize {layer} since it is a metric. " + "Please use the `keras.metrics.serialize()` and " + "`keras.metrics.deserialize()` APIs to serialize " + "and deserialize metrics." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(layer) + + return serialization_lib.serialize_keras_object(layer) + + +@keras_export("keras.layers.deserialize") +def deserialize(config, custom_objects=None, use_legacy_format=False): + """Instantiates a layer from a config dictionary. + + Args: + config: dict of the form {'class_name': str, 'config': dict} + custom_objects: dict mapping class names (or function names) of custom + (non-Keras) objects to class/functions + + Returns: + Layer instance (may be Model, Sequential, Network, Layer...) + + Example: + + ```python + # Configuration of Dense(32, activation='relu') + config = { + 'class_name': 'Dense', + 'config': { + 'activation': 'relu', + 'activity_regularizer': None, + 'bias_constraint': None, + 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, + 'bias_regularizer': None, + 'dtype': 'float32', + 'kernel_constraint': None, + 'kernel_initializer': {'class_name': 'GlorotUniform', + 'config': {'seed': None}}, + 'kernel_regularizer': None, + 'name': 'dense', + 'trainable': True, + 'units': 32, + 'use_bias': True + } } - } - dense_layer = tf.keras.layers.deserialize(config) - ``` - """ - populate_deserializable_objects() - return generic_utils.deserialize_keras_object( - config, - module_objects=LOCAL.ALL_OBJECTS, - custom_objects=custom_objects, - printable_module_name='layer') + dense_layer = tf.keras.layers.deserialize(config) + ``` + """ + populate_deserializable_objects() + if not config: + raise ValueError( + f"Cannot deserialize empty config. Received: config={config}" + ) + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=LOCAL.ALL_OBJECTS, + custom_objects=custom_objects, + printable_module_name="layer", + ) + + return serialization_lib.deserialize_keras_object( + config, + module_objects=LOCAL.ALL_OBJECTS, + custom_objects=custom_objects, + printable_module_name="layer", + ) def get_builtin_layer(class_name): - """Returns class if `class_name` is registered, else returns None.""" - if not hasattr(LOCAL, 'ALL_OBJECTS'): - populate_deserializable_objects() - return LOCAL.ALL_OBJECTS.get(class_name) + """Returns class if `class_name` is registered, else returns None.""" + if not hasattr(LOCAL, "ALL_OBJECTS"): + populate_deserializable_objects() + return LOCAL.ALL_OBJECTS.get(class_name) def deserialize_from_json(json_string, custom_objects=None): - """Instantiates a layer from a JSON string.""" - populate_deserializable_objects() - config = json_utils.decode_and_deserialize( - json_string, - module_objects=LOCAL.ALL_OBJECTS, - custom_objects=custom_objects) - return deserialize(config, custom_objects) + """Instantiates a layer from a JSON string.""" + populate_deserializable_objects() + config = json_utils.decode_and_deserialize( + json_string, + module_objects=LOCAL.ALL_OBJECTS, + custom_objects=custom_objects, + ) + return deserialize(config, custom_objects) diff --git a/keras/layers/serialization_test.py b/keras/layers/serialization_test.py index e71ebd5ead20..688466be0b74 100644 --- a/keras/layers/serialization_test.py +++ b/keras/layers/serialization_test.py @@ -15,154 +15,188 @@ """Tests for layer serialization utils.""" import tensorflow.compat.v2 as tf - from absl.testing import parameterized import keras -from keras.testing_infra import test_combinations +from keras.layers.normalization import batch_normalization as batchnorm_v2 +from keras.layers.normalization import batch_normalization_v1 as batchnorm_v1 from keras.layers.rnn import gru from keras.layers.rnn import gru_v1 from keras.layers.rnn import lstm from keras.layers.rnn import lstm_v1 -from keras.layers.normalization import batch_normalization as batchnorm_v2 -from keras.layers.normalization import batch_normalization_v1 as batchnorm_v1 +from keras.metrics import Mean +from keras.testing_infra import test_combinations class SerializableInt(int): + def __new__(cls, value): + return int.__new__(cls, value) - def __new__(cls, value): - return int.__new__(cls, value) + def get_config(self): + return {"value": int(self)} - def get_config(self): - return {'value': int(self)} + @classmethod + def from_config(cls, config): + return cls(**config) - @classmethod - def from_config(cls, config): - return cls(**config) - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LayerSerializationTest(parameterized.TestCase, tf.test.TestCase): - - def test_serialize_deserialize(self): - layer = keras.layers.Dense( - 3, activation='relu', kernel_initializer='ones', bias_regularizer='l2') - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.activation, keras.activations.relu) - self.assertEqual(new_layer.bias_regularizer.__class__, - keras.regularizers.L2) - if tf.__internal__.tf2.enabled(): - self.assertEqual(new_layer.kernel_initializer.__class__, - keras.initializers.OnesV2) - else: - self.assertEqual(new_layer.kernel_initializer.__class__, - keras.initializers.Ones) - self.assertEqual(new_layer.units, 3) - - def test_implicit_serialize_deserialize_fails_without_object(self): - layer = keras.layers.Dense( - SerializableInt(3), - activation='relu', - kernel_initializer='ones', - bias_regularizer='l2') - config = keras.layers.serialize(layer) - # Because we're passing an unknown class here, deserialization should fail - # unless we add SerializableInt to the custom object dict. - with self.assertRaisesRegex(ValueError, - 'Unknown config_item: SerializableInt.*'): - _ = keras.layers.deserialize(config) - - def test_implicit_serialize_deserialize_succeeds_with_object(self): - layer = keras.layers.Dense( - SerializableInt(3), - activation='relu', - kernel_initializer='ones', - bias_regularizer='l2') - config = keras.layers.serialize(layer) - # Because we're passing an unknown class here, deserialization should fail - # unless we add SerializableInt to the custom object dict. - new_layer = keras.layers.deserialize( - config, custom_objects={'SerializableInt': SerializableInt}) - self.assertEqual(new_layer.activation, keras.activations.relu) - self.assertEqual(new_layer.bias_regularizer.__class__, - keras.regularizers.L2) - if tf.__internal__.tf2.enabled(): - self.assertEqual(new_layer.kernel_initializer.__class__, - keras.initializers.OnesV2) - else: - self.assertEqual(new_layer.kernel_initializer.__class__, - keras.initializers.Ones) - self.assertEqual(new_layer.units.__class__, SerializableInt) - self.assertEqual(new_layer.units, 3) - - @parameterized.parameters( - [batchnorm_v1.BatchNormalization, batchnorm_v2.BatchNormalization]) - def test_serialize_deserialize_batchnorm(self, batchnorm_layer): - layer = batchnorm_layer( - momentum=0.9, beta_initializer='zeros', gamma_regularizer='l2') - config = keras.layers.serialize(layer) - self.assertEqual(config['class_name'], 'BatchNormalization') - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.momentum, 0.9) - if tf.__internal__.tf2.enabled(): - self.assertIsInstance(new_layer, batchnorm_v2.BatchNormalization) - self.assertEqual(new_layer.beta_initializer.__class__, - keras.initializers.ZerosV2) - else: - self.assertIsInstance(new_layer, batchnorm_v1.BatchNormalization) - self.assertEqual(new_layer.beta_initializer.__class__, - keras.initializers.Zeros) - self.assertEqual(new_layer.gamma_regularizer.__class__, - keras.regularizers.L2) - - @parameterized.parameters( - [batchnorm_v1.BatchNormalization, batchnorm_v2.BatchNormalization]) - def test_deserialize_batchnorm_backwards_compatibility(self, batchnorm_layer): - layer = batchnorm_layer( - momentum=0.9, beta_initializer='zeros', gamma_regularizer='l2') - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.momentum, 0.9) - if tf.__internal__.tf2.enabled(): - self.assertIsInstance(new_layer, batchnorm_v2.BatchNormalization) - self.assertEqual(new_layer.beta_initializer.__class__, - keras.initializers.ZerosV2) - else: - self.assertIsInstance(new_layer, batchnorm_v1.BatchNormalization) - self.assertEqual(new_layer.beta_initializer.__class__, - keras.initializers.Zeros) - self.assertEqual(new_layer.gamma_regularizer.__class__, - keras.regularizers.L2) - - @parameterized.parameters([lstm_v1.LSTM, lstm.LSTM]) - def test_serialize_deserialize_lstm(self, layer): - lstm_layer = layer(5, return_sequences=True) - config = keras.layers.serialize(lstm_layer) - self.assertEqual(config['class_name'], 'LSTM') - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.units, 5) - self.assertEqual(new_layer.return_sequences, True) - if tf.__internal__.tf2.enabled(): - self.assertIsInstance(new_layer, lstm.LSTM) - else: - self.assertIsInstance(new_layer, lstm_v1.LSTM) - self.assertNotIsInstance(new_layer, lstm.LSTM) - - @parameterized.parameters([gru_v1.GRU, gru.GRU]) - def test_serialize_deserialize_gru(self, layer): - gru_layer = layer(5, return_sequences=True) - config = keras.layers.serialize(gru_layer) - self.assertEqual(config['class_name'], 'GRU') - new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.units, 5) - self.assertEqual(new_layer.return_sequences, True) - if tf.__internal__.tf2.enabled(): - self.assertIsInstance(new_layer, gru.GRU) - else: - self.assertIsInstance(new_layer, gru_v1.GRU) - self.assertNotIsInstance(new_layer, gru.GRU) - - -if __name__ == '__main__': - tf.test.main() + def test_serialize_deserialize(self): + layer = keras.layers.Dense( + 3, + activation="relu", + kernel_initializer="ones", + bias_regularizer="l2", + ) + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.activation, keras.activations.relu) + self.assertEqual( + new_layer.bias_regularizer.__class__, keras.regularizers.L2 + ) + if tf.__internal__.tf2.enabled(): + self.assertEqual( + new_layer.kernel_initializer.__class__, + keras.initializers.OnesV2, + ) + else: + self.assertEqual( + new_layer.kernel_initializer.__class__, keras.initializers.Ones + ) + self.assertEqual(new_layer.units, 3) + + def test_implicit_serialize_deserialize_fails_without_object(self): + # After discussion (rchao, nkovela) decided to exclude from new saving + if tf.__internal__.tf2.enabled(): + self.skipTest("Test excluded from new saving format.") + layer = keras.layers.Dense( + SerializableInt(3), + activation="relu", + kernel_initializer="ones", + bias_regularizer="l2", + ) + config = keras.layers.serialize(layer) + # Because we're passing an unknown class here, deserialization should + # fail unless we add SerializableInt to the custom object dict. + with self.assertRaisesRegex( + ValueError, "Unknown config_item: 'SerializableInt.*" + ): + _ = keras.layers.deserialize(config) + + def test_implicit_serialize_deserialize_succeeds_with_object(self): + layer = keras.layers.Dense( + SerializableInt(3), + activation="relu", + kernel_initializer="ones", + bias_regularizer="l2", + ) + config = keras.layers.serialize(layer) + # Because we're passing an unknown class here, deserialization should + # fail unless we add SerializableInt to the custom object dict. + new_layer = keras.layers.deserialize( + config, custom_objects={"SerializableInt": SerializableInt} + ) + self.assertEqual(new_layer.activation, keras.activations.relu) + self.assertEqual( + new_layer.bias_regularizer.__class__, keras.regularizers.L2 + ) + if tf.__internal__.tf2.enabled(): + self.assertEqual( + new_layer.kernel_initializer.__class__, + keras.initializers.OnesV2, + ) + else: + self.assertEqual( + new_layer.kernel_initializer.__class__, keras.initializers.Ones + ) + self.assertEqual(new_layer.units.__class__, SerializableInt) + self.assertEqual(new_layer.units, 3) + + @parameterized.parameters( + [batchnorm_v1.BatchNormalization, batchnorm_v2.BatchNormalization] + ) + def test_serialize_deserialize_batchnorm(self, batchnorm_layer): + layer = batchnorm_layer( + momentum=0.9, beta_initializer="zeros", gamma_regularizer="l2" + ) + config = keras.layers.serialize(layer) + self.assertEqual(config["class_name"], "BatchNormalization") + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.momentum, 0.9) + if tf.__internal__.tf2.enabled(): + self.assertIsInstance(new_layer, batchnorm_v2.BatchNormalization) + self.assertEqual( + new_layer.beta_initializer.__class__, keras.initializers.ZerosV2 + ) + else: + self.assertIsInstance(new_layer, batchnorm_v1.BatchNormalization) + self.assertEqual( + new_layer.beta_initializer.__class__, keras.initializers.Zeros + ) + self.assertEqual( + new_layer.gamma_regularizer.__class__, keras.regularizers.L2 + ) + + @parameterized.parameters( + [batchnorm_v1.BatchNormalization, batchnorm_v2.BatchNormalization] + ) + def test_deserialize_batchnorm_backwards_compatibility( + self, batchnorm_layer + ): + layer = batchnorm_layer( + momentum=0.9, beta_initializer="zeros", gamma_regularizer="l2" + ) + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.momentum, 0.9) + if tf.__internal__.tf2.enabled(): + self.assertIsInstance(new_layer, batchnorm_v2.BatchNormalization) + self.assertEqual( + new_layer.beta_initializer.__class__, keras.initializers.ZerosV2 + ) + else: + self.assertIsInstance(new_layer, batchnorm_v1.BatchNormalization) + self.assertEqual( + new_layer.beta_initializer.__class__, keras.initializers.Zeros + ) + self.assertEqual( + new_layer.gamma_regularizer.__class__, keras.regularizers.L2 + ) + + @parameterized.parameters([lstm_v1.LSTM, lstm.LSTM]) + def test_serialize_deserialize_lstm(self, layer): + lstm_layer = layer(5, return_sequences=True) + config = keras.layers.serialize(lstm_layer) + self.assertEqual(config["class_name"], "LSTM") + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.units, 5) + self.assertEqual(new_layer.return_sequences, True) + if tf.__internal__.tf2.enabled(): + self.assertIsInstance(new_layer, lstm.LSTM) + else: + self.assertIsInstance(new_layer, lstm_v1.LSTM) + self.assertNotIsInstance(new_layer, lstm.LSTM) + + @parameterized.parameters([gru_v1.GRU, gru.GRU]) + def test_serialize_deserialize_gru(self, layer): + gru_layer = layer(5, return_sequences=True) + config = keras.layers.serialize(gru_layer) + self.assertEqual(config["class_name"], "GRU") + new_layer = keras.layers.deserialize(config) + self.assertEqual(new_layer.units, 5) + self.assertEqual(new_layer.return_sequences, True) + if tf.__internal__.tf2.enabled(): + self.assertIsInstance(new_layer, gru.GRU) + else: + self.assertIsInstance(new_layer, gru_v1.GRU) + self.assertNotIsInstance(new_layer, gru.GRU) + + def test_serialize_metric_throws_error(self): + metric = Mean() + with self.assertRaisesRegex(ValueError, "since it is a metric."): + _ = keras.layers.serialize(metric) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/subclassed_layers_test.py b/keras/layers/subclassed_layers_test.py index 3adfa04d1e8a..de4ebeacaa1c 100644 --- a/keras/layers/subclassed_layers_test.py +++ b/keras/layers/subclassed_layers_test.py @@ -25,52 +25,53 @@ @test_combinations.run_all_keras_modes @test_combinations.run_with_all_model_types class SubclassedLayersTest(test_combinations.TestCase): - - def test_simple_build_with_constant(self): - - class BuildConstantLayer(keras.layers.Layer): - - def build(self, input_shape): - self.b = tf.convert_to_tensor(2.0) - - def call(self, inputs): - return self.b * inputs - - layer = BuildConstantLayer() - model = test_utils.get_model_from_layers( - [layer, keras.layers.Dense(1)], input_shape=(1,)) - - x = tf.convert_to_tensor([[3.0]]) - self.assertEqual( - tf_utils.is_symbolic_tensor(model(x)), not tf.executing_eagerly()) - self.assertEqual( - tf_utils.is_symbolic_tensor(layer(x)), not tf.executing_eagerly()) - self.assertAllClose(keras.backend.get_value(layer(x)), [[6.0]]) - - def test_build_with_derived_constant(self): - - class BuildDerivedConstantLayer(keras.layers.Layer): - - def build(self, input_shape): - a = tf.convert_to_tensor(1.0) - b = 2.0 * a - self.variable = tf.Variable(b) - self.constant = tf.convert_to_tensor(self.variable) - - def call(self, inputs): - return self.variable * self.constant * inputs - - layer = BuildDerivedConstantLayer() - model = test_utils.get_model_from_layers( - [layer, keras.layers.Dense(1)], input_shape=(1,)) - - x = tf.convert_to_tensor([[3.0]]) - self.assertEqual( - tf_utils.is_symbolic_tensor(model(x)), not tf.executing_eagerly()) - self.assertEqual( - tf_utils.is_symbolic_tensor(layer(x)), not tf.executing_eagerly()) - self.assertAllClose(keras.backend.get_value(layer(x)), [[12.0]]) - - -if __name__ == '__main__': - tf.test.main() + def test_simple_build_with_constant(self): + class BuildConstantLayer(keras.layers.Layer): + def build(self, input_shape): + self.b = tf.convert_to_tensor(2.0) + + def call(self, inputs): + return self.b * inputs + + layer = BuildConstantLayer() + model = test_utils.get_model_from_layers( + [layer, keras.layers.Dense(1)], input_shape=(1,) + ) + + x = tf.convert_to_tensor([[3.0]]) + self.assertEqual( + tf_utils.is_symbolic_tensor(model(x)), not tf.executing_eagerly() + ) + self.assertEqual( + tf_utils.is_symbolic_tensor(layer(x)), not tf.executing_eagerly() + ) + self.assertAllClose(keras.backend.get_value(layer(x)), [[6.0]]) + + def test_build_with_derived_constant(self): + class BuildDerivedConstantLayer(keras.layers.Layer): + def build(self, input_shape): + a = tf.convert_to_tensor(1.0) + b = 2.0 * a + self.variable = tf.Variable(b) + self.constant = tf.convert_to_tensor(self.variable) + + def call(self, inputs): + return self.variable * self.constant * inputs + + layer = BuildDerivedConstantLayer() + model = test_utils.get_model_from_layers( + [layer, keras.layers.Dense(1)], input_shape=(1,) + ) + + x = tf.convert_to_tensor([[3.0]]) + self.assertEqual( + tf_utils.is_symbolic_tensor(model(x)), not tf.executing_eagerly() + ) + self.assertEqual( + tf_utils.is_symbolic_tensor(layer(x)), not tf.executing_eagerly() + ) + self.assertAllClose(keras.backend.get_value(layer(x)), [[12.0]]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/layers/tensorflow_op_layer_test.py b/keras/layers/tensorflow_op_layer_test.py index a42da122c6d3..6c0173c14bad 100644 --- a/keras/layers/tensorflow_op_layer_test.py +++ b/keras/layers/tensorflow_op_layer_test.py @@ -14,736 +14,761 @@ # ============================================================================== """Test for allowing TF ops to work with Keras Functional API.""" -import tensorflow.compat.v2 as tf - import time -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras +from keras.engine import keras_tensor +from keras.optimizers.legacy import adam +from keras.saving.legacy import model_config from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.engine import keras_tensor -from keras.optimizers.optimizer_v2 import adam -from keras.saving import model_config def _single_op_at_end(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - outputs = tf.nn.relu(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + outputs = tf.nn.relu(x) + return keras.Model(inputs, outputs) def _single_identity_op_at_end(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - outputs = tf.identity(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + outputs = tf.identity(x) + return keras.Model(inputs, outputs) def _multiple_ops_at_end(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - x = tf.nn.relu(x) - outputs = tf.nn.relu(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + x = tf.nn.relu(x) + outputs = tf.nn.relu(x) + return keras.Model(inputs, outputs) def _single_op_in_middle(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - x = tf.nn.relu(x) - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + x = tf.nn.relu(x) + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) def _multiple_ops_in_middle(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - x = tf.nn.relu(x) - x = tf.nn.relu(x) - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + x = tf.nn.relu(x) + x = tf.nn.relu(x) + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) def _shape_op_inference(): - inputs = keras.Input(shape=(10,)) - x = tf.shape(inputs) - x = tf.ones(x) - assert x.shape.as_list() == [None, 10] - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = tf.shape(inputs) + x = tf.ones(x) + assert x.shape.as_list() == [None, 10] + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) def _shape_op_known_batch_size(): - inputs = keras.Input(batch_size=2, shape=(10,)) - x = tf.shape(inputs) - x = tf.ones(x) - assert x.shape.as_list() == [2, 10] - outputs = keras.layers.Dense(10)(x) - if tf.executing_eagerly(): - return keras.Model(inputs, outputs) - else: - # In V1 the op layer fails for some reason, - # but we don't have access to the test case to call - # self.skip_test in this util method - return keras.Model(inputs, inputs) + inputs = keras.Input(batch_size=2, shape=(10,)) + x = tf.shape(inputs) + x = tf.ones(x) + assert x.shape.as_list() == [2, 10] + outputs = keras.layers.Dense(10)(x) + if tf.executing_eagerly(): + return keras.Model(inputs, outputs) + else: + # In V1 the op layer fails for some reason, + # but we don't have access to the test case to call + # self.skip_test in this util method + return keras.Model(inputs, inputs) def _shape_op_slice_and_range(): - inputs = keras.Input(shape=(10,)) - batch_size = tf.shape(inputs)[0] - x = tf.range(batch_size * 2) - assert x.shape.as_list() == [None] - x = tf.reshape(x, (batch_size, 2)) - x = tf.cast(x, dtype='float32') - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + batch_size = tf.shape(inputs)[0] + x = tf.range(batch_size * 2) + assert x.shape.as_list() == [None] + x = tf.reshape(x, (batch_size, 2)) + x = tf.cast(x, dtype="float32") + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) def _shape_op_slice_and_range_known_dim(): - inputs = keras.Input(batch_size=2, shape=(10,)) - batch_size = tf.shape(inputs)[0] - x = tf.range(batch_size * 3) - assert x.shape.as_list() == [6] - x = tf.reshape(x, (batch_size, 3)) - x = tf.cast(x, dtype='float32') - outputs = keras.layers.Dense(10)(x) - if tf.executing_eagerly(): - return keras.Model(inputs, outputs) - else: - # In V1 the op layer fails for some reason, - # but we don't have access to the test case to call - # self.skip_test in this util method - return keras.Model(inputs, inputs) + inputs = keras.Input(batch_size=2, shape=(10,)) + batch_size = tf.shape(inputs)[0] + x = tf.range(batch_size * 3) + assert x.shape.as_list() == [6] + x = tf.reshape(x, (batch_size, 3)) + x = tf.cast(x, dtype="float32") + outputs = keras.layers.Dense(10)(x) + if tf.executing_eagerly(): + return keras.Model(inputs, outputs) + else: + # In V1 the op layer fails for some reason, + # but we don't have access to the test case to call + # self.skip_test in this util method + return keras.Model(inputs, inputs) def _int32_manipulation_too_big_for_shape(): - # This test verifies that the Keras Functional API - # won't crash when manipulating int32 tensors that are too large - # to represent shapes. - inputs = keras.Input(batch_size=2, shape=(10,)) - batch_size = tf.shape(inputs)[0] - num_features = 3 * 1024 * 16 - x = tf.range(batch_size * num_features, dtype='int32') - assert x.shape.as_list() == [inputs.shape[0] * num_features] - x = tf.reshape(x, (batch_size, num_features)) - x = tf.cast(x, dtype='float32') - outputs = keras.layers.Dense(10)(x) - if tf.executing_eagerly(): - return keras.Model(inputs, outputs) - else: - # In V1 the op layer fails for some reason, - # but we don't have access to the test case to call - # self.skip_test in this util method - return keras.Model(inputs, inputs) + # This test verifies that the Keras Functional API + # won't crash when manipulating int32 tensors that are too large + # to represent shapes. + inputs = keras.Input(batch_size=2, shape=(10,)) + batch_size = tf.shape(inputs)[0] + num_features = 3 * 1024 * 16 + x = tf.range(batch_size * num_features, dtype="int32") + assert x.shape.as_list() == [inputs.shape[0] * num_features] + x = tf.reshape(x, (batch_size, num_features)) + x = tf.cast(x, dtype="float32") + outputs = keras.layers.Dense(10)(x) + if tf.executing_eagerly(): + return keras.Model(inputs, outputs) + else: + # In V1 the op layer fails for some reason, + # but we don't have access to the test case to call + # self.skip_test in this util method + return keras.Model(inputs, inputs) def _int32_manipulation_at_max_shape_dims_limit(): - # This test verifies that the Keras Functional API - # won't crash when manipulating int32 tensors that are at the limit - # of the max tensor size Keras can try inferring values for. - inputs = keras.Input(batch_size=2, shape=(10,)) - batch_size = tf.shape(inputs)[0] - num_features = int(keras_tensor._MAX_TENSOR_RANK / int(inputs.shape[0])) - x = tf.range(batch_size * num_features, dtype='int32') - assert x.shape.as_list() == [keras_tensor._MAX_TENSOR_RANK] - - # Verify that a value was actually inferred for a tensor that *might* - # represent the shape, bying checking that a value in - # the range appears in the printed inferred value - if tf.compat.v1.executing_eagerly_outside_functions(): - assert str(keras_tensor._MAX_TENSOR_RANK - 1) in str(x) - - x = tf.reshape(x, (batch_size, num_features)) - x = tf.cast(x, dtype='float32') - outputs = keras.layers.Dense(10)(x) - if tf.executing_eagerly(): - return keras.Model(inputs, outputs) - else: - # In V1 the op layer fails for some reason, - # but we don't have access to the test case to call - # self.skip_test in this util method - return keras.Model(inputs, inputs) + # This test verifies that the Keras Functional API + # won't crash when manipulating int32 tensors that are at the limit + # of the max tensor size Keras can try inferring values for. + inputs = keras.Input(batch_size=2, shape=(10,)) + batch_size = tf.shape(inputs)[0] + num_features = int(keras_tensor._MAX_TENSOR_RANK / int(inputs.shape[0])) + x = tf.range(batch_size * num_features, dtype="int32") + assert x.shape.as_list() == [keras_tensor._MAX_TENSOR_RANK] + + # Verify that a value was actually inferred for a tensor that *might* + # represent the shape, bying checking that a value in + # the range appears in the printed inferred value + if tf.compat.v1.executing_eagerly_outside_functions(): + assert str(keras_tensor._MAX_TENSOR_RANK - 1) in str(x) + + x = tf.reshape(x, (batch_size, num_features)) + x = tf.cast(x, dtype="float32") + outputs = keras.layers.Dense(10)(x) + if tf.executing_eagerly(): + return keras.Model(inputs, outputs) + else: + # In V1 the op layer fails for some reason, + # but we don't have access to the test case to call + # self.skip_test in this util method + return keras.Model(inputs, inputs) def _single_standalone_branch(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - outputs = x * 2 - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + outputs = x * 2 + return keras.Model(inputs, outputs) def _single_op_with_attrs(): - inputs = keras.Input(shape=(10,)) - x = tf.reduce_mean(inputs, axis=1, keepdims=True) - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = tf.reduce_mean(inputs, axis=1, keepdims=True) + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) def _multiple_uses(): - inputs = keras.Input(shape=(10,)) - x = tf.reduce_mean(inputs, axis=1, keepdims=True) - x1 = keras.layers.Dense(10)(x) - x2 = keras.layers.Dense(10)(x) - outputs = x1 + x2 - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = tf.reduce_mean(inputs, axis=1, keepdims=True) + x1 = keras.layers.Dense(10)(x) + x2 = keras.layers.Dense(10)(x) + outputs = x1 + x2 + return keras.Model(inputs, outputs) def _op_with_tensor_list(): - inputs = keras.Input(shape=(10,)) - x = tf.concat([inputs, inputs], axis=1) - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = tf.concat([inputs, inputs], axis=1) + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) def _add_n(): - inputs = keras.Input(shape=(10,)) - outputs = tf.add_n([inputs, inputs, inputs]) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + outputs = tf.add_n([inputs, inputs, inputs]) + return keras.Model(inputs, outputs) def _reuse_op(): - inputs = keras.Input(shape=(10,)) - # This op needs to be checked multiple times. - x = tf.nn.relu(inputs) - y = keras.layers.Dense(10)(x) - x2 = x * 2 - y2 = keras.layers.Dense(10)(x2) - outputs = y + y2 - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + # This op needs to be checked multiple times. + x = tf.nn.relu(inputs) + y = keras.layers.Dense(10)(x) + x2 = x * 2 + y2 = keras.layers.Dense(10)(x2) + outputs = y + y2 + return keras.Model(inputs, outputs) def _float64_op(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10, dtype='float64')(inputs) - x = tf.nn.relu(x) - assert x.dtype == 'float64', 'x has dtype: %s' % x.dtype - outputs = keras.layers.Dense(10)(x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10, dtype="float64")(inputs) + x = tf.nn.relu(x) + assert x.dtype == "float64", f"x has dtype: {x.dtype}" + outputs = keras.layers.Dense(10)(x) + return keras.Model(inputs, outputs) class MyAdd(keras.layers.Layer): - - def call(self, x, y): - return x + y + def call(self, x, y): + return x + y def _layer_with_tensor_arg(): - inputs = keras.Input(shape=(10,)) - x = inputs * 2 - outputs = MyAdd()(inputs, x) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + x = inputs * 2 + outputs = MyAdd()(inputs, x) + return keras.Model(inputs, outputs) class LayerWithLayer(keras.layers.Layer): + def build(self, input_shape): + self.bias = self.add_weight(name="bias", dtype="float32") + self.layer = keras.layers.Dense(10) - def build(self, input_shape): - self.bias = self.add_weight(name='bias', dtype='float32') - self.layer = keras.layers.Dense(10) - - def call(self, inputs): - inputs = inputs * self.bias - # Would throw an error if Keras History was created here. - return self.layer(inputs) + def call(self, inputs): + inputs = inputs * self.bias + # Would throw an error if Keras History was created here. + return self.layer(inputs) def _inner_layer(): - inputs = keras.Input(shape=(10,)) - outputs = LayerWithLayer()(inputs) - return keras.Model(inputs, outputs) + inputs = keras.Input(shape=(10,)) + outputs = LayerWithLayer()(inputs) + return keras.Model(inputs, outputs) def _reuse_ancillary_layer(): - inputs = (keras.Input(shape=(5,)), keras.Input(shape=(5,))) - base_model = keras.Sequential([ - keras.layers.Dense(3, input_shape=(5,)), - ]) - outputs = base_model(inputs[0]) - model = keras.Model(inputs, outputs) - # The second input is only involved in ancillary layers. - outputs_delta = outputs - base_model(0.5 * inputs[1]) - l2_loss = tf.reduce_mean( - tf.reduce_sum(tf.square(outputs_delta), -1)) - model.add_loss(l2_loss) - model.add_metric(l2_loss, aggregation='mean', name='l2_loss') - l1_loss = 0.01 * tf.reduce_mean( - tf.reduce_sum(tf.abs(outputs_delta), -1)) - model.add_loss(l1_loss) - model.add_metric(l1_loss, aggregation='mean', name='l1_loss') - return model + inputs = (keras.Input(shape=(5,)), keras.Input(shape=(5,))) + base_model = keras.Sequential( + [ + keras.layers.Dense(3, input_shape=(5,)), + ] + ) + outputs = base_model(inputs[0]) + model = keras.Model(inputs, outputs) + # The second input is only involved in ancillary layers. + outputs_delta = outputs - base_model(0.5 * inputs[1]) + l2_loss = tf.reduce_mean(tf.reduce_sum(tf.square(outputs_delta), -1)) + model.add_loss(l2_loss) + model.add_metric(l2_loss, aggregation="mean", name="l2_loss") + l1_loss = 0.01 * tf.reduce_mean(tf.reduce_sum(tf.abs(outputs_delta), -1)) + model.add_loss(l1_loss) + model.add_metric(l1_loss, aggregation="mean", name="l1_loss") + return model @test_combinations.run_all_keras_modes() class AutoLambdaTest(test_combinations.TestCase): - - @parameterized.named_parameters( - ('single_op_at_end', _single_op_at_end), - ('single_identity_op_at_end', _single_identity_op_at_end), - ('multiple_ops_at_end', _multiple_ops_at_end), - ('single_op_in_middle', _single_op_in_middle), - ('multiple_ops_in_middle', _multiple_ops_in_middle), - ('shape_op_inference', _shape_op_inference), - ('shape_op_known_batch_size', _shape_op_known_batch_size), - ('shape_op_slice_and_range', _shape_op_slice_and_range), - ('shape_op_slice_and_range_known_dim', - _shape_op_slice_and_range_known_dim), - ('int32_manipulation_too_big_for_shape', - _int32_manipulation_too_big_for_shape), - ('int32_manipulation_at_max_shape_dims_limit', - _int32_manipulation_at_max_shape_dims_limit), - ('single_standalone_branch', _single_standalone_branch), - ('single_op_with_attrs', _single_op_with_attrs), - ('multiple_uses', _multiple_uses), - ('op_with_tensor_list', _op_with_tensor_list), - ('add_n', _add_n), - ('_reuse_op', _reuse_op), - ('_float64_op', _float64_op), - ('_inner_layer', _inner_layer), - ('_reuse_ancillary_layer', _reuse_ancillary_layer), - ('_layer_with_tensor_arg', _layer_with_tensor_arg), - ) - def test_autolambda(self, model_fn): - model = model_fn() - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - np_inputs = tf.nest.map_structure( - lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.inputs) - np_outputs = tf.nest.map_structure( - lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.outputs) - model.fit(np_inputs, np_outputs, batch_size=2) - model(np_inputs) # Test calling the model directly on inputs. - - new_model = keras.Model.from_config( - model.get_config(), - custom_objects={ - 'LayerWithLayer': LayerWithLayer, - 'MyAdd': MyAdd - }) - new_model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - new_model.fit(np_inputs, np_outputs, batch_size=2) - new_model(np_inputs) # Test calling the new model directly on inputs. - # Assert that metrics are preserved and in the right order. - self.assertAllEqual(model.metrics_names, new_model.metrics_names) - # Assert that layer names don't change. - self.assertAllEqual([layer.name for layer in model.layers], - [layer.name for layer in new_model.layers]) - - def test_stack_preserves_correct_shape(self): - ## Test stack([x]) - inp = keras.Input(shape=(), dtype='float32') - - out = tf.stack([inp]) - model = keras.Model( - inputs=inp, - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - x = tf.ones(shape=(4, 4)) - expected = tf.stack([x]) - self.assertAllEqual(expected.shape, (1, 4, 4)) - - self.assertAllEqual(model(x).shape, (1, 4, 4)) - self.assertAllEqual(model(x), expected) - - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(x).shape, (1, 4, 4)) - self.assertAllEqual(model(x), expected) - - ## Test stack(x) - inp = keras.Input(shape=(), dtype='float32') - - out = tf.stack(inp) - model = keras.Model( - inputs=inp, - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - x = tf.ones(shape=(4, 4)) - expected = tf.stack(x) - self.assertAllEqual(expected.shape, (4, 4)) - - self.assertAllEqual(model(x).shape, (4, 4)) - self.assertAllEqual(model(x), expected) - - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(x).shape, (4, 4)) - self.assertAllEqual(model(x), expected) - - def test_getitem_slice_with_step_only(self): - if not tf.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(8,)) - slice_step = keras.Input(shape=(), dtype='int32') - - out = inp[..., ::slice_step[0]] - model = keras.Model( - inputs=[inp, slice_step], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - batch_size = 7 - step = 3 - x = tf.stack([ - tf.range(8) for _ in range(batch_size)]) - args = [x, tf.constant(step, shape=(batch_size,))] - expected = tf.stack([ - tf.range(8)[::step] for _ in range(batch_size)]) - - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIn('tf.__operators__.getitem', ( - x.name for x in model.layers)) - self.assertNotIn('tf.strided_slice', ( - x.name for x in model.layers)) - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_slice_real_tensor(self): - if not tf.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - x = tf.range(10.0) - slice_stop = keras.Input(shape=(), dtype='int32') - - out = x[:slice_stop[0]] - model = keras.Model( - inputs=slice_stop, - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - batch_size = 7 - stop = 6 - args = tf.constant(stop, shape=(batch_size,)) - expected = x[:stop] - - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIn('tf.__operators__.getitem', ( - x.name for x in model.layers)) - # TODO(b/161925288): Fix the dispatch triggering then uncomment: - # self.assertNotIn('tf.strided_slice', ( - # x.name for x in model.layers)) - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_index_real_tensor(self): - if not tf.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - x = tf.range(10.0) - slice_stop = keras.Input(shape=(), dtype='int32') - - out = x[slice_stop[0]] - model = keras.Model( - inputs=slice_stop, - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - batch_size = 7 - index = 6 - args = tf.constant(index, shape=(batch_size,)) - expected = x[index] - - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIn('tf.__operators__.getitem', ( - x.name for x in model.layers)) - # TODO(b/161925288): Fix the bug then uncomment: - # self.assertNotIn('tf.strided_slice', ( - # x.name for x in model.layers)) - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_slice_with_stop_only(self): - if not tf.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(8,)) - slice_stop = keras.Input(shape=(), dtype='int32') - - out = inp[:slice_stop[0]] - model = keras.Model( - inputs=[inp, slice_stop], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - batch_size = 7 - stop = 6 - x = tf.stack([ - tf.range(8) for _ in range(batch_size)]) - args = [x, tf.constant(stop, shape=(batch_size,))] - expected = x[:stop] - - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIn('tf.__operators__.getitem', ( - x.name for x in model.layers)) - self.assertNotIn('tf.strided_slice', ( - x.name for x in model.layers)) - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_slice_with_stop_and_ellipsis_only(self): - if not tf.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(8,)) - slice_stop = keras.Input(shape=(), dtype='int32') - - out = inp[..., :slice_stop[0]] - model = keras.Model( - inputs=[inp, slice_stop], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - batch_size = 7 - stop = 6 - x = tf.stack([ - tf.range(8) for _ in range(batch_size)]) - args = [x, tf.constant(stop, shape=(batch_size,))] - expected = tf.stack([ - tf.range(8)[:stop] for _ in range(batch_size)]) - - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIn('tf.__operators__.getitem', ( - x.name for x in model.layers)) - self.assertNotIn('tf.strided_slice', ( - x.name for x in model.layers)) - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_complex_slicing(self): - if not tf.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) - first_dim = keras.Input(shape=(), dtype='int32') - slice_start = keras.Input(shape=(), dtype='int32') - slice_stop = keras.Input(shape=(), dtype='int32') - slice_stride = keras.Input(shape=(), dtype='int32') - - out = inp[..., first_dim[0], slice_start[0]:slice_stop[0]:slice_stride[0]] - model = keras.Model( - inputs=[inp, first_dim, slice_start, slice_stop, slice_stride], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - batch_size = 7 - start = 1 - stop = 6 - step = 2 - x = tf.stack([tf.stack([tf.stack([ - tf.range(8) - for _ in range(3)]) for _ in range(4)]) for _ in range(batch_size)]) - args = [x, + @parameterized.named_parameters( + ("single_op_at_end", _single_op_at_end), + ("single_identity_op_at_end", _single_identity_op_at_end), + ("multiple_ops_at_end", _multiple_ops_at_end), + ("single_op_in_middle", _single_op_in_middle), + ("multiple_ops_in_middle", _multiple_ops_in_middle), + ("shape_op_inference", _shape_op_inference), + ("shape_op_known_batch_size", _shape_op_known_batch_size), + ("shape_op_slice_and_range", _shape_op_slice_and_range), + ( + "shape_op_slice_and_range_known_dim", + _shape_op_slice_and_range_known_dim, + ), + ( + "int32_manipulation_too_big_for_shape", + _int32_manipulation_too_big_for_shape, + ), + ( + "int32_manipulation_at_max_shape_dims_limit", + _int32_manipulation_at_max_shape_dims_limit, + ), + ("single_standalone_branch", _single_standalone_branch), + ("single_op_with_attrs", _single_op_with_attrs), + ("multiple_uses", _multiple_uses), + ("op_with_tensor_list", _op_with_tensor_list), + ("add_n", _add_n), + ("_reuse_op", _reuse_op), + ("_float64_op", _float64_op), + ("_inner_layer", _inner_layer), + ("_reuse_ancillary_layer", _reuse_ancillary_layer), + ("_layer_with_tensor_arg", _layer_with_tensor_arg), + ) + def test_autolambda(self, model_fn): + model = model_fn() + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + np_inputs = tf.nest.map_structure( + lambda x: np.ones((2,) + tuple(x.shape[1:]), "float32"), + model.inputs, + ) + np_outputs = tf.nest.map_structure( + lambda x: np.ones((2,) + tuple(x.shape[1:]), "float32"), + model.outputs, + ) + model.fit(np_inputs, np_outputs, batch_size=2) + model(np_inputs) # Test calling the model directly on inputs. + + new_model = keras.Model.from_config( + model.get_config(), + custom_objects={"LayerWithLayer": LayerWithLayer, "MyAdd": MyAdd}, + ) + new_model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + new_model.fit(np_inputs, np_outputs, batch_size=2) + new_model(np_inputs) # Test calling the new model directly on inputs. + # Assert that metrics are preserved and in the right order. + self.assertAllEqual(model.metrics_names, new_model.metrics_names) + # Assert that layer names don't change. + self.assertAllEqual( + [layer.name for layer in model.layers], + [layer.name for layer in new_model.layers], + ) + + def test_stack_preserves_correct_shape(self): + ## Test stack([x]) + inp = keras.Input(shape=(), dtype="float32") + + out = tf.stack([inp]) + model = keras.Model(inputs=inp, outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + x = tf.ones(shape=(4, 4)) + expected = tf.stack([x]) + self.assertAllEqual(expected.shape, (1, 4, 4)) + + self.assertAllEqual(model(x).shape, (1, 4, 4)) + self.assertAllEqual(model(x), expected) + + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(x).shape, (1, 4, 4)) + self.assertAllEqual(model(x), expected) + + ## Test stack(x) + inp = keras.Input(shape=(), dtype="float32") + + out = tf.stack(inp) + model = keras.Model(inputs=inp, outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + + x = tf.ones(shape=(4, 4)) + expected = tf.stack(x) + self.assertAllEqual(expected.shape, (4, 4)) + + self.assertAllEqual(model(x).shape, (4, 4)) + self.assertAllEqual(model(x), expected) + + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(x).shape, (4, 4)) + self.assertAllEqual(model(x), expected) + + def test_getitem_slice_with_step_only(self): + if not tf.executing_eagerly(): + self.skipTest("Complex slicing like this fails in v1") + inp = keras.Input(shape=(8,)) + slice_step = keras.Input(shape=(), dtype="int32") + + out = inp[..., :: slice_step[0]] + model = keras.Model(inputs=[inp, slice_step], outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + batch_size = 7 + step = 3 + x = tf.stack([tf.range(8) for _ in range(batch_size)]) + args = [x, tf.constant(step, shape=(batch_size,))] + expected = tf.stack([tf.range(8)[::step] for _ in range(batch_size)]) + + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIn( + "tf.__operators__.getitem", (x.name for x in model.layers) + ) + self.assertNotIn("tf.strided_slice", (x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + def test_getitem_slice_real_tensor(self): + if not tf.executing_eagerly(): + self.skipTest("Complex slicing like this fails in v1") + x = tf.range(10.0) + slice_stop = keras.Input(shape=(), dtype="int32") + + out = x[: slice_stop[0]] + model = keras.Model(inputs=slice_stop, outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + batch_size = 7 + stop = 6 + args = tf.constant(stop, shape=(batch_size,)) + expected = x[:stop] + + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIn( + "tf.__operators__.getitem", (x.name for x in model.layers) + ) + # TODO(b/161925288): Fix the dispatch triggering then uncomment: + # self.assertNotIn('tf.strided_slice', ( + # x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + def test_getitem_index_real_tensor(self): + if not tf.executing_eagerly(): + self.skipTest("Complex slicing like this fails in v1") + x = tf.range(10.0) + slice_stop = keras.Input(shape=(), dtype="int32") + + out = x[slice_stop[0]] + model = keras.Model(inputs=slice_stop, outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + batch_size = 7 + index = 6 + args = tf.constant(index, shape=(batch_size,)) + expected = x[index] + + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIn( + "tf.__operators__.getitem", (x.name for x in model.layers) + ) + # TODO(b/161925288): Fix the bug then uncomment: + # self.assertNotIn('tf.strided_slice', ( + # x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + def test_getitem_slice_with_stop_only(self): + if not tf.executing_eagerly(): + self.skipTest("Complex slicing like this fails in v1") + inp = keras.Input(shape=(8,)) + slice_stop = keras.Input(shape=(), dtype="int32") + + out = inp[: slice_stop[0]] + model = keras.Model(inputs=[inp, slice_stop], outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + batch_size = 7 + stop = 6 + x = tf.stack([tf.range(8) for _ in range(batch_size)]) + args = [x, tf.constant(stop, shape=(batch_size,))] + expected = x[:stop] + + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIn( + "tf.__operators__.getitem", (x.name for x in model.layers) + ) + self.assertNotIn("tf.strided_slice", (x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + def test_getitem_slice_with_stop_and_ellipsis_only(self): + if not tf.executing_eagerly(): + self.skipTest("Complex slicing like this fails in v1") + inp = keras.Input(shape=(8,)) + slice_stop = keras.Input(shape=(), dtype="int32") + + out = inp[..., : slice_stop[0]] + model = keras.Model(inputs=[inp, slice_stop], outputs=out) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + batch_size = 7 + stop = 6 + x = tf.stack([tf.range(8) for _ in range(batch_size)]) + args = [x, tf.constant(stop, shape=(batch_size,))] + expected = tf.stack([tf.range(8)[:stop] for _ in range(batch_size)]) + + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIn( + "tf.__operators__.getitem", (x.name for x in model.layers) + ) + self.assertNotIn("tf.strided_slice", (x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + def test_getitem_complex_slicing(self): + if not tf.executing_eagerly(): + self.skipTest("Complex slicing like this fails in v1") + inp = keras.Input(shape=(4, 3, 8)) + first_dim = keras.Input(shape=(), dtype="int32") + slice_start = keras.Input(shape=(), dtype="int32") + slice_stop = keras.Input(shape=(), dtype="int32") + slice_stride = keras.Input(shape=(), dtype="int32") + + out = inp[ + ..., first_dim[0], slice_start[0] : slice_stop[0] : slice_stride[0] + ] + model = keras.Model( + inputs=[inp, first_dim, slice_start, slice_stop, slice_stride], + outputs=out, + ) + model.compile( + adam.Adam(0.001), "mse", run_eagerly=test_utils.should_run_eagerly() + ) + batch_size = 7 + start = 1 + stop = 6 + step = 2 + x = tf.stack( + [ + tf.stack( + [ + tf.stack([tf.range(8) for _ in range(3)]) + for _ in range(4) + ] + ) + for _ in range(batch_size) + ] + ) + args = [ + x, tf.constant(0, shape=(batch_size,)), tf.constant(start, shape=(batch_size,)), tf.constant(stop, shape=(batch_size,)), - tf.constant(step, shape=(batch_size,))] - # Slice the innermost dim. only grab one index from the second-to-innermost - # dim, removing that dim from the shape. - expected = tf.stack([tf.stack([ - tf.range(8)[start:stop:step] - for _ in range(4)]) for _ in range(batch_size)]) - - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIn('tf.__operators__.getitem', ( - x.name for x in model.layers)) - self.assertNotIn('tf.strided_slice', ( - x.name for x in model.layers)) - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_left_hand_numpy_multiplication(self): - x = np.asarray([3.0]) - inputs = keras.Input(shape=(4,)) - outputs = x * inputs - model = keras.Model(inputs, outputs) - ones = tf.ones((5, 4), dtype='float32') - self.assertAllEqual(model(ones), 3.0 * ones) - - def test_numerical_correctness_simple(self): - x = tf.convert_to_tensor([[-1., 0., -2., 1.]]) - inputs = keras.Input(shape=(4,)) - outputs = tf.nn.relu(inputs) - model = keras.Model(inputs, outputs) - y = self.evaluate(model(x)) - self.assertAllClose(y, [[0., 0., 0., 1.]]) - - def test_numerical_correctness_with_attrs(self): - x = tf.convert_to_tensor([[1.5, 1.5], [2.5, 3.5]]) - inputs = keras.Input(shape=(2,)) - outputs = tf.reduce_mean(inputs, axis=1) - model = keras.Model(inputs, outputs) - y = self.evaluate(model(x)) - self.assertAllClose(y, [1.5, 3.]) - - def test_numerical_correctness_serialization(self): - x = tf.convert_to_tensor([[-1., 0., -2., 1.]]) - inputs = keras.Input(shape=(4,)) - outputs = tf.nn.relu(inputs) - model1 = keras.Model(inputs, outputs) - y1 = self.evaluate(model1(x)) - model2 = keras.Model.from_config(model1.get_config()) - y2 = self.evaluate(model2(x)) - self.assertAllClose(y1, y2) - - def test_gradient_tape_in_function(self): - z = keras.Input((1,)) - x = tf.matmul(z, tf.constant(2.0, shape=(1, 1))) - x = tf.reduce_mean(x, axis=0, keepdims=True) - h = tf.nn.relu(x) - m = keras.Model(z, h) - - @tf.function() - def f(x): - with tf.GradientTape() as t: - t.watch(x) - z = m(x ** 2) - grads = t.gradient(z, x) - return grads - - self.assertAllEqual(f(tf.constant(10.0, shape=(1, 1))), - tf.constant(40.0, shape=(1, 1))) - - f = tf.function(f) - - self.assertAllEqual(f(tf.constant(10.0, shape=(1, 1))), - tf.constant(40.0, shape=(1, 1))) - - def test_no_tracking(self): - if not tf.executing_eagerly(): - x = tf.constant(1.0, shape=(10, 10)) - keras.layers.Dense(1)(x) - self.assertTrue(x._keras_history_checked) - - def test_timing_scales_linearly(self): - - def _construct_graph_of_size(size): - start = time.time() - x = keras.backend.placeholder(shape=(10, 4)) - - for _ in range(size): - x = keras.layers.Dense(4)(x) - x = tf.nn.relu(x) - - end = time.time() - return end - start - - size_50 = _construct_graph_of_size(50) - size_500 = _construct_graph_of_size(500) - - # Check construction time grows approx. linearly with size. - e = 3 # Fudge factor to prevent flakiness. - self.assertLess(size_500, (10 * e) * size_50) - - def test_built(self): - inputs = keras.Input(shape=(10,)) - outputs = tf.nn.relu(inputs) - model = keras.Model(inputs, outputs) - model.compile('sgd', 'mse') - for layer in model.layers: - self.assertTrue(layer.built) - # Test something that requires Layers to be built. - model.summary() - - def test_json_serialization(self): - inputs = keras.Input(shape=(4,), dtype='uint8') - outputs = tf.cast(inputs, 'float32') / 4. - model = model_config.model_from_json(keras.Model(inputs, outputs).to_json()) - self.assertAllEqual( - self.evaluate(model(np.array([0, 64, 128, 192], np.uint8))), - [0., 16., 32., 48.]) - model.summary() + tf.constant(step, shape=(batch_size,)), + ] + # Slice the innermost dim. only grab one index from the + # second-to-innermost dim, removing that dim from the shape. + expected = tf.stack( + [ + tf.stack([tf.range(8)[start:stop:step] for _ in range(4)]) + for _ in range(batch_size) + ] + ) + + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIn( + "tf.__operators__.getitem", (x.name for x in model.layers) + ) + self.assertNotIn("tf.strided_slice", (x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + def test_left_hand_numpy_multiplication(self): + x = np.asarray([3.0]) + inputs = keras.Input(shape=(4,)) + outputs = x * inputs + model = keras.Model(inputs, outputs) + ones = tf.ones((5, 4), dtype="float32") + self.assertAllEqual(model(ones), 3.0 * ones) + + def test_numerical_correctness_simple(self): + x = tf.convert_to_tensor([[-1.0, 0.0, -2.0, 1.0]]) + inputs = keras.Input(shape=(4,)) + outputs = tf.nn.relu(inputs) + model = keras.Model(inputs, outputs) + y = self.evaluate(model(x)) + self.assertAllClose(y, [[0.0, 0.0, 0.0, 1.0]]) + + def test_numerical_correctness_with_attrs(self): + x = tf.convert_to_tensor([[1.5, 1.5], [2.5, 3.5]]) + inputs = keras.Input(shape=(2,)) + outputs = tf.reduce_mean(inputs, axis=1) + model = keras.Model(inputs, outputs) + y = self.evaluate(model(x)) + self.assertAllClose(y, [1.5, 3.0]) + + def test_numerical_correctness_serialization(self): + x = tf.convert_to_tensor([[-1.0, 0.0, -2.0, 1.0]]) + inputs = keras.Input(shape=(4,)) + outputs = tf.nn.relu(inputs) + model1 = keras.Model(inputs, outputs) + y1 = self.evaluate(model1(x)) + model2 = keras.Model.from_config(model1.get_config()) + y2 = self.evaluate(model2(x)) + self.assertAllClose(y1, y2) + + def test_gradient_tape_in_function(self): + z = keras.Input((1,)) + x = tf.matmul(z, tf.constant(2.0, shape=(1, 1))) + x = tf.reduce_mean(x, axis=0, keepdims=True) + h = tf.nn.relu(x) + m = keras.Model(z, h) + + @tf.function() + def f(x): + with tf.GradientTape() as t: + t.watch(x) + z = m(x**2) + grads = t.gradient(z, x) + return grads + + self.assertAllEqual( + f(tf.constant(10.0, shape=(1, 1))), tf.constant(40.0, shape=(1, 1)) + ) + + f = tf.function(f) + + self.assertAllEqual( + f(tf.constant(10.0, shape=(1, 1))), tf.constant(40.0, shape=(1, 1)) + ) + + def test_no_tracking(self): + if not tf.executing_eagerly(): + x = tf.constant(1.0, shape=(10, 10)) + keras.layers.Dense(1)(x) + self.assertTrue(x._keras_history_checked) + + def test_timing_scales_linearly(self): + def _construct_graph_of_size(size): + start = time.time() + x = keras.backend.placeholder(shape=(10, 4)) + + for _ in range(size): + x = keras.layers.Dense(4)(x) + x = tf.nn.relu(x) + + end = time.time() + return end - start + + size_50 = _construct_graph_of_size(50) + size_500 = _construct_graph_of_size(500) + + # Check construction time grows approx. linearly with size. + e = 3 # Fudge factor to prevent flakiness. + self.assertLess(size_500, (10 * e) * size_50) + + def test_built(self): + inputs = keras.Input(shape=(10,)) + outputs = tf.nn.relu(inputs) + model = keras.Model(inputs, outputs) + model.compile("sgd", "mse") + for layer in model.layers: + self.assertTrue(layer.built) + # Test something that requires Layers to be built. + model.summary() + + def test_json_serialization(self): + inputs = keras.Input(shape=(4,), dtype="uint8") + outputs = tf.cast(inputs, "float32") / 4.0 + model = model_config.model_from_json( + keras.Model(inputs, outputs).to_json() + ) + self.assertAllEqual( + self.evaluate(model(np.array([0, 64, 128, 192], np.uint8))), + [0.0, 16.0, 32.0, 48.0], + ) + model.summary() @test_combinations.run_all_keras_modes(always_skip_v1=True) class InputInEagerTest(test_combinations.TestCase): - """Tests ops on keras inputs in Eager runtime. + """Tests ops on keras inputs in Eager runtime. - Input returns graph/symbolic tensors in the Eager runtime (this - happens, for example, with tensors returned from Keras layers). These - should be routed to the graph-style branch of these ops (b/134715641) - """ + Input returns graph/symbolic tensors in the Eager runtime (this + happens, for example, with tensors returned from Keras layers). These + should be routed to the graph-style branch of these ops (b/134715641) + """ - def test_identity(self): - x = keras.Input(shape=(1,)) - ident = tf.identity(x) + def test_identity(self): + x = keras.Input(shape=(1,)) + ident = tf.identity(x) - # This is now a graph tensor, and should be able to continue in graphland - self.assertIn('Identity', ident.name) + # This is now a graph tensor, and should be able to continue in + # graphland + self.assertIn("Identity", ident.name) - def test_size(self): - x = keras.Input(shape=(3,)) - self.assertAllEqual(x.get_shape().as_list(), [None, 3]) - sz = tf.size(x) + def test_size(self): + x = keras.Input(shape=(3,)) + self.assertAllEqual(x.get_shape().as_list(), [None, 3]) + sz = tf.size(x) - # This is now a graph tensor, and should be able to continue in graphland - self.assertIn('Size', sz.name) + # This is now a graph tensor, and should be able to continue in + # graphland + self.assertIn("Size", sz.name) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/legacy_tf_layers/BUILD b/keras/legacy_tf_layers/BUILD index 9beaf00b237d..67a8950d6f5d 100644 --- a/keras/legacy_tf_layers/BUILD +++ b/keras/legacy_tf_layers/BUILD @@ -1,11 +1,14 @@ # Description: # Contains the legacy TF layers (internal TensorFlow version). +# Placeholder: load unaliased py_library + # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "tf_py_test") load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//learning/brain/contrib:__subpackages__", diff --git a/keras/legacy_tf_layers/__init__.py b/keras/legacy_tf_layers/__init__.py index 11649ccd701b..0bb028307a4f 100644 --- a/keras/legacy_tf_layers/__init__.py +++ b/keras/legacy_tf_layers/__init__.py @@ -1,3 +1,3 @@ """Init file.""" -from keras.legacy_tf_layers import migration_utils # pylint: disable=unused-import +from keras.legacy_tf_layers import migration_utils diff --git a/keras/legacy_tf_layers/base.py b/keras/legacy_tf_layers/base.py index 40c0dbe244c2..fa2beea2f2d1 100644 --- a/keras/legacy_tf_layers/base.py +++ b/keras/legacy_tf_layers/base.py @@ -12,604 +12,658 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -# pylint: disable=g-classes-have-attributes + """Contains the base Layer class, from which all layers inherit.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import copy import warnings + +import tensorflow.compat.v2 as tf + from keras import backend -from keras.engine import base_layer_v1 as base_layer from keras.engine import base_layer_utils +from keras.engine import base_layer_v1 as base_layer from keras.legacy_tf_layers import variable_scope_shim from keras.mixed_precision import policy from keras.utils import tf_contextlib + +# isort: off from tensorflow.python.ops import variable_scope as vs from tensorflow.python.util.tf_export import keras_export -from tensorflow.python.util.tf_export import tf_export - _KERAS_STYLE_SCOPE = False @keras_export( - v1=['keras.__internal__.legacy.layers.experimental.keras_style_scope']) -@tf_export(v1=['layers.experimental.keras_style_scope']) + v1=["keras.__internal__.legacy.layers.experimental.keras_style_scope"] +) @tf_contextlib.contextmanager def keras_style_scope(): - """Use Keras-style variable management. + """Use Keras-style variable management. - All tf.layers and tf RNN cells created in this scope use Keras-style - variable management. Creating such layers with a scope= argument is - disallowed, and reuse=True is disallowed. + All tf.layers and tf RNN cells created in this scope use Keras-style + variable management. Creating such layers with a scope= argument is + disallowed, and reuse=True is disallowed. - The purpose of this scope is to allow users of existing layers to - slowly transition to a Keras layers API without breaking existing - functionality. + The purpose of this scope is to allow users of existing layers to + slowly transition to a Keras layers API without breaking existing + functionality. - One example of this is when using TensorFlow's RNN classes with Keras - Models or Networks. Because Keras models do not properly set variable - scopes, users of RNNs may either accidentally share scopes between two - different models, or get errors about variables that already exist. + One example of this is when using TensorFlow's RNN classes with Keras + Models or Networks. Because Keras models do not properly set variable + scopes, users of RNNs may either accidentally share scopes between two + different models, or get errors about variables that already exist. - Example: + Example: - ```python - class RNNModel(tf.keras.Model): + ```python + class RNNModel(tf.keras.Model): - def __init__(self, name): - super(RNNModel, self).__init__(name=name) - self.rnn = tf.compat.v1.nn.rnn_cell.MultiRNNCell( - [tf.compat.v1.nn.rnn_cell.LSTMCell(64) for _ in range(2)]) + def __init__(self, name): + super(RNNModel, self).__init__(name=name) + self.rnn = tf.compat.v1.nn.rnn_cell.MultiRNNCell( + [tf.compat.v1.nn.rnn_cell.LSTMCell(64) for _ in range(2)]) - def call(self, input, state): - return self.rnn(input, state) + def call(self, input, state): + return self.rnn(input, state) - model_1 = RNNModel("model_1") - model_2 = RNNModel("model_2") - - # OK - output_1, next_state_1 = model_1(input, state) - # Raises an error about trying to create an already existing variable. - output_2, next_state_2 = model_2(input, state) - ``` - - The solution is to wrap the model construction and execution in a keras-style - scope: - - ```python - with keras_style_scope(): model_1 = RNNModel("model_1") model_2 = RNNModel("model_2") - # model_1 and model_2 are guaranteed to create their own variables. + # OK output_1, next_state_1 = model_1(input, state) + # Raises an error about trying to create an already existing variable. output_2, next_state_2 = model_2(input, state) + ``` - assert len(model_1.weights) > 0 - assert len(model_2.weights) > 0 - assert(model_1.weights != model_2.weights) - ``` + The solution is to wrap the model construction and execution in a + keras-style scope: + + ```python + with keras_style_scope(): + model_1 = RNNModel("model_1") + model_2 = RNNModel("model_2") + + # model_1 and model_2 are guaranteed to create their own variables. + output_1, next_state_1 = model_1(input, state) + output_2, next_state_2 = model_2(input, state) - Yields: - A keras layer style scope. - """ - global _KERAS_STYLE_SCOPE - stack = _KERAS_STYLE_SCOPE - _KERAS_STYLE_SCOPE = True - try: - yield - finally: - _KERAS_STYLE_SCOPE = stack + assert len(model_1.weights) > 0 + assert len(model_2.weights) > 0 + assert(model_1.weights != model_2.weights) + ``` + + Yields: + A keras layer style scope. + """ + global _KERAS_STYLE_SCOPE + stack = _KERAS_STYLE_SCOPE + _KERAS_STYLE_SCOPE = True + try: + yield + finally: + _KERAS_STYLE_SCOPE = stack @keras_export( - v1=['keras.__internal__.legacy.layers.experimental.set_keras_style']) -@tf_export(v1=['layers.experimental.set_keras_style']) + v1=["keras.__internal__.legacy.layers.experimental.set_keras_style"] +) def set_keras_style(): - """Use Keras-style variable management. + """Use Keras-style variable management. - All tf.layers and tf RNN cells created after keras style ha been enabled - use Keras-style variable management. Creating such layers with a - scope= argument is disallowed, and reuse=True is disallowed. + All tf.layers and tf RNN cells created after keras style ha been enabled + use Keras-style variable management. Creating such layers with a + scope= argument is disallowed, and reuse=True is disallowed. - The purpose of this function is to allow users of existing layers to - slowly transition to Keras layers API without breaking existing - functionality. + The purpose of this function is to allow users of existing layers to + slowly transition to Keras layers API without breaking existing + functionality. - For more details, see the documentation for `keras_style_scope`. + For more details, see the documentation for `keras_style_scope`. - Note, once keras style has been set, it is set globally for the entire - program and cannot be unset. + Note, once keras style has been set, it is set globally for the entire + program and cannot be unset. - Example: + Example: - ```python - set_keras_style() + ```python + set_keras_style() - model_1 = RNNModel(name="model_1") - model_2 = RNNModel(name="model_2") + model_1 = RNNModel(name="model_1") + model_2 = RNNModel(name="model_2") - # model_1 and model_2 are guaranteed to create their own variables. - output_1, next_state_1 = model_1(input, state) - output_2, next_state_2 = model_2(input, state) + # model_1 and model_2 are guaranteed to create their own variables. + output_1, next_state_1 = model_1(input, state) + output_2, next_state_2 = model_2(input, state) - assert len(model_1.weights) > 0 - assert len(model_2.weights) > 0 - assert(model_1.weights != model_2.weights) - ``` - """ - global _KERAS_STYLE_SCOPE - _KERAS_STYLE_SCOPE = True + assert len(model_1.weights) > 0 + assert len(model_2.weights) > 0 + assert(model_1.weights != model_2.weights) + ``` + """ + global _KERAS_STYLE_SCOPE + _KERAS_STYLE_SCOPE = True def _is_in_keras_style_scope(): - global _KERAS_STYLE_SCOPE - return _KERAS_STYLE_SCOPE + global _KERAS_STYLE_SCOPE + return _KERAS_STYLE_SCOPE -@keras_export(v1=['keras.__internal__.legacy.layers.Layer']) -@tf_export(v1=['layers.Layer']) +@keras_export(v1=["keras.__internal__.legacy.layers.Layer"]) class Layer(base_layer.Layer): - """Base layer class. - - It is considered legacy, and we recommend the use of `tf.keras.layers.Layer` - instead. - - Args: - trainable: Boolean, whether the layer's variables should be trainable. - name: String name of the layer. - dtype: Default dtype of the layer's weights (default of `None` means use the - type of the first input). - - Read-only properties: - name: The name of the layer (string). - dtype: Default dtype of the layer's weights (default of `None` means use the - type of the first input). - trainable_variables: List of trainable variables. - non_trainable_variables: List of non-trainable variables. - variables: List of all variables of this layer, trainable and - non-trainable. - updates: List of update ops of this layer. - losses: List of losses added by this layer. - trainable_weights: List of variables to be included in backprop. - non_trainable_weights: List of variables that should not be - included in backprop. - weights: The concatenation of the lists trainable_weights and - non_trainable_weights (in this order). - - Mutable properties: - trainable: Whether the layer should be trained (boolean). - input_spec: Optional (list of) `InputSpec` object(s) specifying the - constraints on inputs that can be accepted by the layer. - """ - - def __init__(self, trainable=True, name=None, dtype=None, - **kwargs): - # For backwards compatibility, legacy layers do not use `ResourceVariable` - # by default. - self._use_resource_variables = False - scope = kwargs.pop('_scope', None) - self._reuse = kwargs.pop('_reuse', None) - - # Avoid an incorrect lint error - self._trainable_weights = [] - self.built = False - - if dtype is None: - # Indicates to infer dtype from inputs. When the V2 dtype behavior is - # enabled, Keras layers default their dtype to floatx instead, so we pass - # an "_infer" policy to keep the old V1 behavior. - dtype = policy.Policy('_infer') - - if 'autocast' not in kwargs: - kwargs['autocast'] = False - - # Mark that legacy layers should not be instrumented as Keras usage - self._disable_keras_instrumentation = True - - super().__init__(trainable=trainable, name=name, dtype=dtype, - **kwargs) - - if _is_in_keras_style_scope(): - if scope is not None: - raise ValueError( - 'scope argument not allowed when keras style layers are enabled, ' - 'but saw: {}'.format(scope)) - if self._reuse is not None: - raise ValueError( - 'reuse argument not allowed when keras style layers are enabled, ' - 'but saw: {}'.format(self._reuse)) - self._keras_style = True - else: - self._keras_style = False - - self._call_has_scope_arg = 'scope' in self._call_spec.arg_names - if scope: - with tf.compat.v1.variable_scope(scope) as captured_scope: - self._scope = captured_scope - else: - self._scope = None - self._current_scope = None - - def apply(self, *args, **kwargs): - return self(*args, **kwargs) - - # We no longer track graph in tf.layers layers. This property is only kept to - # maintain API backward compatibility. - @property - def graph(self): - warnings.warn( - '`Layer.graph` is deprecated and ' - 'will be removed in a future version. ' - 'Please stop using this property because tf.layers layers no ' - 'longer track their graph.', - stacklevel=2) - if tf.executing_eagerly(): - raise RuntimeError('Layer.graph not supported when executing eagerly.') - return None - - def _init_set_name(self, name): - # Determine layer name (non-unique). - if isinstance(name, tf.compat.v1.VariableScope): - base_name = name.name - self._name, _ = self._make_unique_name() - else: - base_name = name - self._name = name - if not name: - self._name, base_name = self._make_unique_name() - self._base_name = base_name - - def _make_unique_name(self, name_uid_map=None, avoid_names=None, - namespace='', zero_based=False): - base_name = base_layer.to_snake_case(self.__class__.__name__) - name = backend.unique_object_name( - base_name, - name_uid_map=name_uid_map, - avoid_names=avoid_names, - namespace=namespace, - zero_based=zero_based) - return (name, base_name) - - @property - def scope_name(self): - if not self._scope: - raise ValueError('No name available for layer scope because the layer "' + - self._name + '" has not been used yet. The scope name ' + - ' is determined the first time the layer instance is ' + - 'called. You must therefore call the layer before ' + - 'querying `scope_name`.') - return self._scope.name - - def add_loss(self, losses, inputs=None): - previous_losses_length = len(self._losses) - previous_callable_losses_length = len(self._callable_losses) - super().add_loss(losses, inputs=inputs) - if not tf.executing_eagerly(): - # TODO(fchollet): deprecate collection below. - new_losses = self._losses[previous_losses_length:] - new_callable_losses = self._callable_losses[ - previous_callable_losses_length:] - for regularizer in new_callable_losses: - loss_tensor = regularizer() - if loss_tensor is not None: - new_losses.append(loss_tensor) - _add_elements_to_collection( - new_losses, - tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - - def _name_scope(self): # pylint: disable=method-hidden - """Determines op naming for the Layer.""" - if self._keras_style: - return super()._name_scope() - return self._current_scope.original_name_scope - - def _set_scope(self, scope=None): - if self._scope is None: - # If constructed with _scope=None, lazy setting of scope. - if self._reuse: - with tf.compat.v1.variable_scope( - scope if scope is not None else self._base_name) as captured_scope: - self._scope = captured_scope - else: - with tf.compat.v1.variable_scope( - scope, default_name=self._base_name) as captured_scope: - self._scope = captured_scope - - def add_weight(self, - name, - shape, - dtype=None, - initializer=None, - regularizer=None, - trainable=None, - constraint=None, - use_resource=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.compat.v1.VariableAggregation.NONE, - partitioner=None, - **kwargs): - """Adds a new variable to the layer, or gets an existing one; returns it. + """Base layer class. - Args: - name: variable name. - shape: variable shape. - dtype: The type of the variable. Defaults to `self.dtype` or `float32`. - initializer: initializer instance (callable). - regularizer: regularizer instance (callable). - trainable: whether the variable should be part of the layer's - "trainable_variables" (e.g. variables, biases) - or "non_trainable_variables" (e.g. BatchNorm mean, stddev). - Note, if the current variable scope is marked as non-trainable - then this parameter is ignored and any added variables are also - marked as non-trainable. `trainable` defaults to `True` unless - `synchronization` is set to `ON_READ`. - constraint: constraint instance (callable). - use_resource: Whether to use `ResourceVariable`. - synchronization: Indicates when a distributed a variable will be - aggregated. Accepted values are constants defined in the class - `tf.VariableSynchronization`. By default the synchronization is set to - `AUTO` and the current `DistributionStrategy` chooses - when to synchronize. If `synchronization` is set to `ON_READ`, - `trainable` must not be set to `True`. - aggregation: Indicates how a distributed variable will be aggregated. - Accepted values are constants defined in the class - `tf.VariableAggregation`. - partitioner: (optional) partitioner instance (callable). If - provided, when the requested variable is created it will be split - into multiple partitions according to `partitioner`. In this case, - an instance of `PartitionedVariable` is returned. Available - partitioners include `tf.compat.v1.fixed_size_partitioner` and - `tf.compat.v1.variable_axis_size_partitioner`. For more details, see - the documentation of `tf.compat.v1.get_variable` and the "Variable - Partitioners and Sharding" section of the API guide. - **kwargs: Additional keyword arguments. - - Returns: - The created variable. Usually either a `Variable` or `ResourceVariable` - instance. If `partitioner` is not `None`, a `PartitionedVariable` - instance is returned. - - Raises: - RuntimeError: If called with partitioned variable regularization and - eager execution is enabled. - ValueError: When trainable has been set to True with synchronization - set as `ON_READ`. - """ - for kwarg in kwargs: - if kwarg != 'experimental_autocast': - raise TypeError('Unknown keyword argument:', kwarg) - if self._keras_style: - return super().add_weight( - name=name, - shape=shape, - dtype=dtype, - initializer=initializer, - regularizer=regularizer, - trainable=trainable and self.trainable, - constraint=constraint, - use_resource=use_resource, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.compat.v1.VariableAggregation.NONE, - partitioner=partitioner, - **kwargs) - - if synchronization == tf.VariableSynchronization.ON_READ: - if trainable: - raise ValueError( - 'Synchronization value can be set to ' - 'VariableSynchronization.ON_READ only for non-trainable variables. ' - 'You have specified trainable=True and ' - 'synchronization=VariableSynchronization.ON_READ.') - else: - # Set trainable to be false when variable is to be synced on read. - trainable = False - elif trainable is None: - trainable = True - - def _should_add_regularizer(variable, existing_variable_set): - if base_layer_utils.is_split_variable(variable): - for var in variable: - if var in existing_variable_set: - return False - return True - else: - return variable not in existing_variable_set - - init_graph = None - if not tf.executing_eagerly(): - default_graph = tf.compat.v1.get_default_graph() - if default_graph.building_function: - with tf.init_scope(): - # Retrieve the variables from the graph into which variables - # will be lifted; if initialization ops will be lifted into - # the eager context, then there is nothing to retrieve, since variable - # collections are not supported when eager execution is enabled. - if not tf.executing_eagerly(): - init_graph = tf.compat.v1.get_default_graph() - existing_variables = set(tf.compat.v1.global_variables()) - else: - # Initialization ops will not be lifted out of the default graph. - init_graph = default_graph - existing_variables = set(tf.compat.v1.global_variables()) - - if dtype is None: - dtype = self.dtype or tf.float32 - - self._set_scope(None) - reuse = self.built or self._reuse - prev_len_trainable = len(self._trainable_weights) - with tf.compat.v1.variable_scope( - self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: - self._current_scope = scope - with backend.name_scope(self._name_scope()): # pylint: disable=not-callable - use_resource = (use_resource or - self._use_resource_variables or - scope.use_resource) - if initializer is None: - initializer = scope.initializer - variable = super().add_weight( - name, - shape, - dtype=tf.as_dtype(dtype), - initializer=initializer, - trainable=trainable and self.trainable, - constraint=constraint, - partitioner=partitioner, - use_resource=use_resource, - synchronization=synchronization, - aggregation=aggregation, - getter=tf.compat.v1.get_variable, - **kwargs) - - if regularizer: - if (tf.compat.v1.executing_eagerly_outside_functions() - or _should_add_regularizer(variable, existing_variables)): - self._handle_weight_regularization(name, variable, regularizer) - var_store = vs._get_default_variable_store() # pylint: disable=protected-access - # When the shim to get variable scope working in TF2 is used, - # We need to explicitly make the shim track the regularization - # losses as the collections will not be accessible. - if hasattr(var_store, 'add_regularizer'): - var_store.add_regularizer(variable, regularizer) - - if init_graph is not None: - # Handle edge case where a custom getter has overridden `trainable`. - # There is one known occurrence of this, in unit test - # testBasicRNNCellNotTrainable in - # contrib.rnn.python.kernel_tests.core_rnn_cell_test - with init_graph.as_default(): - trainable_variables = tf.compat.v1.trainable_variables() - if (trainable and self.trainable and - variable not in trainable_variables): - # A custom getter / variable scope overrode the trainable flag. - extra_trainable_vars = self._trainable_weights[prev_len_trainable:] - self._trainable_weights = self._trainable_weights[ - :prev_len_trainable] - self._non_trainable_weights += extra_trainable_vars - return variable - - def __call__(self, inputs, *args, **kwargs): - """Wraps `call`, applying pre- and post-processing steps. + It is considered legacy, and we recommend the use of `tf.keras.layers.Layer` + instead. Args: - inputs: input tensor(s). - *args: additional positional arguments to be passed to `self.call`. - **kwargs: additional keyword arguments to be passed to `self.call`. - **Note**: kwarg `scope` is reserved for use by the layer. - - Returns: - Output tensor(s). - - Note: - - If the layer's `call` method takes a `scope` keyword argument, - this argument will be automatically set to the current variable scope. - - If the layer's `call` method takes a `mask` argument (as some Keras - layers do), its default value will be set to the mask generated - for `inputs` by the previous layer (if `input` did come from - a layer that generated a corresponding mask, i.e. if it came from - a Keras layer with masking support. - - Raises: - ValueError: if the layer's `call` method returns None (an invalid value). + trainable: Boolean, whether the layer's variables should be trainable. + name: String name of the layer. + dtype: Default dtype of the layer's weights (default of `None` means use + the type of the first input). + + Read-only properties: + name: The name of the layer (string). + dtype: Default dtype of the layer's weights (default of `None` means use + the type of the first input). + trainable_variables: List of trainable variables. + non_trainable_variables: List of non-trainable variables. + variables: List of all variables of this layer, trainable and + non-trainable. + updates: List of update ops of this layer. + losses: List of losses added by this layer. + trainable_weights: List of variables to be included in backprop. + non_trainable_weights: List of variables that should not be + included in backprop. + weights: The concatenation of the lists trainable_weights and + non_trainable_weights (in this order). + + Mutable properties: + trainable: Whether the layer should be trained (boolean). + input_spec: Optional (list of) `InputSpec` object(s) specifying the + constraints on inputs that can be accepted by the layer. """ - scope = kwargs.pop('scope', None) - - if self._keras_style: - if scope is not None: - raise ValueError( - 'scope argument not allowed when keras style layers are enabled, ' - 'but saw: {}'.format(scope)) - return super().__call__(inputs, *args, **kwargs) - - self._set_scope(scope) - - if self.built: - try: - # Some classes which inherit from Layer do not use its constructor, so - # rather than initializing to None we check for an AttributeError. - scope_context_manager = self._always_reuse_variable_scope # pylint: disable=access-member-before-definition - except AttributeError: - scope_context_manager = None - - if scope_context_manager is None: - # From this point we will always set reuse=True, so create a "final" - # variable scope with this setting. We avoid re-creating variable scopes - # after this point as an optimization. - scope_context_manager = tf.compat.v1.variable_scope( - self._scope, reuse=True, auxiliary_name_scope=False) - - # Do not cache variable scopes if Eager mode is enabled. If Eager mode - # is enabled then we don't want to reuse scopes because the cached scope - # might be from a FuncGraph or Eager scope we are no longer in. - if not tf.compat.v1.executing_eagerly_outside_functions(): - self._always_reuse_variable_scope = scope_context_manager - else: - scope_context_manager = tf.compat.v1.variable_scope( - self._scope, reuse=self._reuse, auxiliary_name_scope=False) - - with scope_context_manager as scope: - self._current_scope = scope - - try: - call_has_scope_arg = self._call_has_scope_arg - except AttributeError: - self._call_spec.arg_names = variable_scope_shim.fn_args(self.call) - self._call_has_scope_arg = 'scope' in self._call_spec.arg_names - call_has_scope_arg = self._call_has_scope_arg - if call_has_scope_arg: - kwargs['scope'] = scope - - # Actually call layer - outputs = super().__call__(inputs, *args, **kwargs) - - if not tf.executing_eagerly(): - # Update global default collections. - _add_elements_to_collection(self.updates, tf.compat.v1.GraphKeys.UPDATE_OPS) - return outputs - - def __deepcopy__(self, memo): - no_copy = set(['_graph', '_thread_local', '_metrics_lock']) - shallow_copy = set(['_scope', '_always_reuse_variable_scope']) - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result - for k, v in self.__dict__.items(): - if k in no_copy: - setattr(result, k, v) - elif k in shallow_copy: - setattr(result, k, copy.copy(v)) - elif base_layer.is_tensor_or_tensor_list(v): - setattr(result, k, v) - else: - setattr(result, k, copy.deepcopy(v, memo)) - return result - - def __setattr__(self, value, name): - # By-pass the automatic dependency tracking performed by the parent Layer. - super(tf.__internal__.tracking.Trackable, self).__setattr__(value, name) # pylint: disable=bad-super-call - - @property - def _is_legacy_layer(self): - """Used by keras to check compatibility. This should not be overridden.""" - return True + + def __init__(self, trainable=True, name=None, dtype=None, **kwargs): + # For backwards compatibility, legacy layers do not use + # `ResourceVariable` by default. + self._use_resource_variables = False + scope = kwargs.pop("_scope", None) + self._reuse = kwargs.pop("_reuse", None) + + # Avoid an incorrect lint error + self._trainable_weights = [] + self.built = False + + if dtype is None: + # Indicates to infer dtype from inputs. When the V2 dtype behavior + # is enabled, Keras layers default their dtype to floatx instead, so + # we pass an "_infer" policy to keep the old V1 behavior. + dtype = policy.Policy("_infer") + + if "autocast" not in kwargs: + kwargs["autocast"] = False + + # Mark that legacy layers should not be instrumented as Keras usage + self._disable_keras_instrumentation = True + + super().__init__(trainable=trainable, name=name, dtype=dtype, **kwargs) + + if _is_in_keras_style_scope(): + if scope is not None: + raise ValueError( + "scope argument not allowed when keras style layers are " + "enabled, but saw: {}".format(scope) + ) + if self._reuse is not None: + raise ValueError( + "reuse argument not allowed when keras style layers are " + "enabled, but saw: {}".format(self._reuse) + ) + self._keras_style = True + else: + self._keras_style = False + + self._call_has_scope_arg = "scope" in self._call_spec.arg_names + if scope: + with tf.compat.v1.variable_scope(scope) as captured_scope: + self._scope = captured_scope + else: + self._scope = None + self._current_scope = None + + def apply(self, *args, **kwargs): + return self(*args, **kwargs) + + # We no longer track graph in tf.layers layers. This property is only kept + # to maintain API backward compatibility. + @property + def graph(self): + warnings.warn( + "`Layer.graph` is deprecated and " + "will be removed in a future version. " + "Please stop using this property because tf.layers layers no " + "longer track their graph.", + stacklevel=2, + ) + if tf.executing_eagerly(): + raise RuntimeError( + "Layer.graph not supported when executing eagerly." + ) + return None + + def _init_set_name(self, name): + # Determine layer name (non-unique). + if isinstance(name, tf.compat.v1.VariableScope): + base_name = name.name + self._name, _ = self._make_unique_name() + else: + base_name = name + self._name = name + if not name: + self._name, base_name = self._make_unique_name() + self._base_name = base_name + + def _make_unique_name( + self, + name_uid_map=None, + avoid_names=None, + namespace="", + zero_based=False, + ): + base_name = base_layer.to_snake_case(self.__class__.__name__) + name = backend.unique_object_name( + base_name, + name_uid_map=name_uid_map, + avoid_names=avoid_names, + namespace=namespace, + zero_based=zero_based, + ) + return (name, base_name) + + @property + def scope_name(self): + if not self._scope: + raise ValueError( + 'No name available for layer scope because the layer "' + + self._name + + '" has not been used yet. The scope name ' + + " is determined the first time the layer instance is " + + "called. You must therefore call the layer before " + + "querying `scope_name`." + ) + return self._scope.name + + def add_loss(self, losses, inputs=None): + previous_losses_length = len(self._losses) + previous_callable_losses_length = len(self._callable_losses) + super().add_loss(losses, inputs=inputs) + if not tf.executing_eagerly(): + # TODO(fchollet): deprecate collection below. + new_losses = self._losses[previous_losses_length:] + new_callable_losses = self._callable_losses[ + previous_callable_losses_length: + ] + for regularizer in new_callable_losses: + loss_tensor = regularizer() + if loss_tensor is not None: + new_losses.append(loss_tensor) + _add_elements_to_collection( + new_losses, tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + + def _name_scope(self): + """Determines op naming for the Layer.""" + if self._keras_style: + return super()._name_scope() + return self._current_scope.original_name_scope + + def _set_scope(self, scope=None): + if self._scope is None: + # If constructed with _scope=None, lazy setting of scope. + if self._reuse: + with tf.compat.v1.variable_scope( + scope if scope is not None else self._base_name + ) as captured_scope: + self._scope = captured_scope + else: + with tf.compat.v1.variable_scope( + scope, default_name=self._base_name + ) as captured_scope: + self._scope = captured_scope + + def add_weight( + self, + name, + shape, + dtype=None, + initializer=None, + regularizer=None, + trainable=None, + constraint=None, + use_resource=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.compat.v1.VariableAggregation.NONE, + partitioner=None, + **kwargs + ): + """Adds a new variable to the layer, or gets an existing one; returns it + + Args: + name: variable name. + shape: variable shape. + dtype: The type of the variable. Defaults to `self.dtype` or + `float32`. + initializer: initializer instance (callable). + regularizer: regularizer instance (callable). + trainable: whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) + or "non_trainable_variables" (e.g. BatchNorm mean, stddev). + Note, if the current variable scope is marked as non-trainable + then this parameter is ignored and any added variables are also + marked as non-trainable. `trainable` becomes `True` unless + `synchronization` is set to `ON_READ`. Defaults to `True`. + constraint: constraint instance (callable). + use_resource: Whether to use `ResourceVariable`. + synchronization: Indicates when a distributed a variable will be + aggregated. Accepted values are constants defined in the class + `tf.VariableSynchronization`. By default the synchronization is set + to `AUTO` and the current `DistributionStrategy` chooses when to + synchronize. If `synchronization` is set to `ON_READ`, `trainable` + must not be set to `True`. + aggregation: Indicates how a distributed variable will be aggregated. + Accepted values are constants defined in the class + `tf.VariableAggregation`. + partitioner: (optional) partitioner instance (callable). If + provided, when the requested variable is created it will be split + into multiple partitions according to `partitioner`. In this case, + an instance of `PartitionedVariable` is returned. Available + partitioners include `tf.compat.v1.fixed_size_partitioner` and + `tf.compat.v1.variable_axis_size_partitioner`. For more details, + see the documentation of `tf.compat.v1.get_variable` and the + "Variable Partitioners and Sharding" section of the API guide. + **kwargs: Additional keyword arguments. + + Returns: + The created variable. Usually either a `Variable` or + `ResourceVariable` instance. If `partitioner` is not `None`, a + `PartitionedVariable` instance is returned. + + Raises: + RuntimeError: If called with partitioned variable regularization and + eager execution is enabled. + ValueError: When trainable has been set to True with synchronization + set as `ON_READ`. + """ + for kwarg in kwargs: + if kwarg != "experimental_autocast": + raise TypeError("Unknown keyword argument:", kwarg) + if self._keras_style: + return super().add_weight( + name=name, + shape=shape, + dtype=dtype, + initializer=initializer, + regularizer=regularizer, + trainable=trainable and self.trainable, + constraint=constraint, + use_resource=use_resource, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.compat.v1.VariableAggregation.NONE, + partitioner=partitioner, + **kwargs + ) + + if synchronization == tf.VariableSynchronization.ON_READ: + if trainable: + raise ValueError( + "Synchronization value can be set to " + "VariableSynchronization.ON_READ only for non-trainable " + "variables. You have specified trainable=True and " + "synchronization=VariableSynchronization.ON_READ." + ) + else: + # Set trainable to be false when variable is to be synced on + # read. + trainable = False + elif trainable is None: + trainable = True + + def _should_add_regularizer(variable, existing_variable_set): + if base_layer_utils.is_split_variable(variable): + for var in variable: + if var in existing_variable_set: + return False + return True + else: + return variable not in existing_variable_set + + init_graph = None + if not tf.executing_eagerly(): + default_graph = tf.compat.v1.get_default_graph() + if default_graph.building_function: + with tf.init_scope(): + # Retrieve the variables from the graph into which variables + # will be lifted; if initialization ops will be lifted into + # the eager context, then there is nothing to retrieve, + # since variable collections are not supported when eager + # execution is enabled. + if not tf.executing_eagerly(): + init_graph = tf.compat.v1.get_default_graph() + existing_variables = set( + tf.compat.v1.global_variables() + ) + else: + # Initialization ops will not be lifted out of the default + # graph. + init_graph = default_graph + existing_variables = set(tf.compat.v1.global_variables()) + + if dtype is None: + dtype = self.dtype or tf.float32 + + self._set_scope(None) + reuse = self.built or self._reuse + prev_len_trainable = len(self._trainable_weights) + with tf.compat.v1.variable_scope( + self._scope, reuse=reuse, auxiliary_name_scope=False + ) as scope: + self._current_scope = scope + with backend.name_scope(self._name_scope()): + use_resource = ( + use_resource + or self._use_resource_variables + or scope.use_resource + ) + if initializer is None: + initializer = scope.initializer + variable = super().add_weight( + name, + shape, + dtype=tf.as_dtype(dtype), + initializer=initializer, + trainable=trainable and self.trainable, + constraint=constraint, + partitioner=partitioner, + use_resource=use_resource, + synchronization=synchronization, + aggregation=aggregation, + getter=tf.compat.v1.get_variable, + **kwargs + ) + + if regularizer: + if ( + tf.compat.v1.executing_eagerly_outside_functions() + or _should_add_regularizer(variable, existing_variables) + ): + self._handle_weight_regularization( + name, variable, regularizer + ) + var_store = vs._get_default_variable_store() + # When the shim to get variable scope working in TF2 is + # used, We need to explicitly make the shim track the + # regularization losses as the collections will not be + # accessible. + if hasattr(var_store, "add_regularizer"): + var_store.add_regularizer(variable, regularizer) + + if init_graph is not None: + # Handle edge case where a custom getter has overridden + # `trainable`. There is one known occurrence of this, in + # unit test testBasicRNNCellNotTrainable in + # contrib.rnn.python.kernel_tests.core_rnn_cell_test + with init_graph.as_default(): + trainable_variables = tf.compat.v1.trainable_variables() + if ( + trainable + and self.trainable + and variable not in trainable_variables + ): + # A custom getter / variable scope overrode the + # trainable flag. + extra_trainable_vars = self._trainable_weights[ + prev_len_trainable: + ] + self._trainable_weights = self._trainable_weights[ + :prev_len_trainable + ] + self._non_trainable_weights += extra_trainable_vars + return variable + + def __call__(self, inputs, *args, **kwargs): + """Wraps `call`, applying pre- and post-processing steps. + + Args: + inputs: input tensor(s). + *args: additional positional arguments to be passed to `self.call`. + **kwargs: additional keyword arguments to be passed to `self.call`. + **Note**: kwarg `scope` is reserved for use by the layer. + + Returns: + Output tensor(s). + + Note: + - If the layer's `call` method takes a `scope` keyword argument, this + argument will be automatically set to the current variable scope. + - If the layer's `call` method takes a `mask` argument (as some Keras + layers do), its default value will be set to the mask generated + for `inputs` by the previous layer (if `input` did come from + a layer that generated a corresponding mask, i.e. if it came from + a Keras layer with masking support. + + Raises: + ValueError: if the layer's `call` method returns None (an invalid + value). + """ + scope = kwargs.pop("scope", None) + + if self._keras_style: + if scope is not None: + raise ValueError( + "scope argument not allowed when keras style layers are " + "enabled, but saw: {}".format(scope) + ) + return super().__call__(inputs, *args, **kwargs) + + self._set_scope(scope) + + if self.built: + try: + # Some classes which inherit from Layer do not use its + # constructor, so rather than initializing to None we check for + # an AttributeError. + scope_context_manager = self._always_reuse_variable_scope + except AttributeError: + scope_context_manager = None + + if scope_context_manager is None: + # From this point we will always set reuse=True, so create a + # "final" variable scope with this setting. We avoid re-creating + # variable scopes after this point as an optimization. + scope_context_manager = tf.compat.v1.variable_scope( + self._scope, reuse=True, auxiliary_name_scope=False + ) + + # Do not cache variable scopes if Eager mode is enabled. If + # Eager mode is enabled then we don't want to reuse scopes + # because the cached scope might be from a FuncGraph or Eager + # scope we are no longer in. + if not tf.compat.v1.executing_eagerly_outside_functions(): + self._always_reuse_variable_scope = scope_context_manager + else: + scope_context_manager = tf.compat.v1.variable_scope( + self._scope, reuse=self._reuse, auxiliary_name_scope=False + ) + + with scope_context_manager as scope: + self._current_scope = scope + + try: + call_has_scope_arg = self._call_has_scope_arg + except AttributeError: + self._call_spec.arg_names = variable_scope_shim.fn_args( + self.call + ) + self._call_has_scope_arg = "scope" in self._call_spec.arg_names + call_has_scope_arg = self._call_has_scope_arg + if call_has_scope_arg: + kwargs["scope"] = scope + + # Actually call layer + outputs = super().__call__(inputs, *args, **kwargs) + + if not tf.executing_eagerly(): + # Update global default collections. + _add_elements_to_collection( + self.updates, tf.compat.v1.GraphKeys.UPDATE_OPS + ) + return outputs + + def __deepcopy__(self, memo): + no_copy = set(["_graph", "_thread_local", "_metrics_lock"]) + shallow_copy = set(["_scope", "_always_reuse_variable_scope"]) + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k in no_copy: + setattr(result, k, v) + elif k in shallow_copy: + setattr(result, k, copy.copy(v)) + elif base_layer.is_tensor_or_tensor_list(v): + setattr(result, k, v) + else: + setattr(result, k, copy.deepcopy(v, memo)) + return result + + def __setattr__(self, value, name): + # By-pass the automatic dependency tracking performed by the parent + # Layer. + super(tf.__internal__.tracking.Trackable, self).__setattr__(value, name) + + @property + def _is_legacy_layer(self): + """Used by keras to check compatibility. This should not be + overridden.""" + return True def _add_elements_to_collection(elements, collection_list): - if tf.executing_eagerly(): - raise RuntimeError('Using collections from Layers not supported in Eager ' - 'mode. Tried to add %s to %s' % (elements, - collection_list)) - elements = tf.nest.flatten(elements) - collection_list = tf.nest.flatten(collection_list) - for name in collection_list: - collection = tf.compat.v1.get_collection_ref(name) - collection_set = {id(e) for e in collection} - for element in elements: - if id(element) not in collection_set: - collection.append(element) + if tf.executing_eagerly(): + raise RuntimeError( + "Using collections from Layers not supported in Eager " + "mode. Tried to add %s to %s" % (elements, collection_list) + ) + elements = tf.nest.flatten(elements) + collection_list = tf.nest.flatten(collection_list) + for name in collection_list: + collection = tf.compat.v1.get_collection_ref(name) + collection_set = {id(e) for e in collection} + for element in elements: + if id(element) not in collection_set: + collection.append(element) diff --git a/keras/legacy_tf_layers/base_test.py b/keras/legacy_tf_layers/base_test.py index a03e98c74631..e71403e8c680 100644 --- a/keras/legacy_tf_layers/base_test.py +++ b/keras/legacy_tf_layers/base_test.py @@ -18,694 +18,719 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import copy -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import backend -from keras.testing_infra import test_combinations from keras.engine import base_layer as keras_base_layer from keras.engine import input_spec from keras.legacy_tf_layers import base as base_tf_layers from keras.legacy_tf_layers import core as core_tf_layers +from keras.testing_infra import test_combinations class BaseLayerTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testLayerProperties(self): - layer = base_tf_layers.Layer(name='my_layer') - self.assertEqual(layer.variables, []) - self.assertEqual(layer.trainable_variables, []) - self.assertEqual(layer.non_trainable_variables, []) - if not tf.executing_eagerly(): - # updates, losses only supported in GRAPH mode - self.assertEqual(layer.updates, []) - self.assertEqual(layer.losses, []) - self.assertEqual(layer.built, False) - layer = base_tf_layers.Layer(name='my_layer', trainable=False) - self.assertEqual(layer.trainable, False) - - # Assert that the layer was not instrumented as a Keras layer - self.assertFalse(layer._instrumented_keras_api) - - # Assert this was instrumented as a legacy layer - self.assertTrue( - keras_base_layer.keras_api_gauge.get_cell('legacy_layer').value()) - keras_base_layer.keras_api_gauge.get_cell('legacy_layer').set(False) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInt64Layer(self): - layer = base_tf_layers.Layer(name='my_layer', dtype='int64') - layer.add_weight('my_var', [2, 2]) - self.assertEqual(layer.name, 'my_layer') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testKerasStyleAddWeight(self): - keras_layer = keras_base_layer.Layer(name='keras_layer') - with backend.name_scope('foo'): - keras_variable = keras_layer.add_weight( - 'my_var', [2, 2], initializer=tf.compat.v1.zeros_initializer()) - self.assertEqual(keras_variable.name, 'foo/my_var:0') - - with backend.name_scope('baz'): - old_style_layer = base_tf_layers.Layer(name='my_layer') - # Test basic variable creation. - variable = old_style_layer.add_weight( - 'my_var', [2, 2], initializer=tf.compat.v1.zeros_initializer()) - self.assertEqual(variable.name, 'my_layer/my_var:0') - - with base_tf_layers.keras_style_scope(): - layer = base_tf_layers.Layer(name='my_layer') - # Assert that the layer was not instrumented as a Keras layer - self.assertFalse(layer._instrumented_keras_api) - # Test basic variable creation. - with backend.name_scope('bar'): - variable = layer.add_weight( - 'my_var', [2, 2], initializer=tf.compat.v1.zeros_initializer()) - self.assertEqual(variable.name, 'bar/my_var:0') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testAddWeight(self): - layer = base_tf_layers.Layer(name='my_layer') - - # Test basic variable creation. - variable = layer.add_weight( - 'my_var', [2, 2], initializer=tf.compat.v1.zeros_initializer()) - self.assertEqual(variable.name, 'my_layer/my_var:0') - self.assertEqual(layer.variables, [variable]) - self.assertEqual(layer.trainable_variables, [variable]) - self.assertEqual(layer.non_trainable_variables, []) - if not tf.executing_eagerly(): - self.assertEqual( - layer.variables, - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)) - - # Test non-trainable variable creation. - # layer.add_variable should work even outside `build` and `call`. - variable_2 = layer.add_weight( - 'non_trainable_var', [2, 2], - initializer=tf.compat.v1.zeros_initializer(), - trainable=False) - self.assertEqual(layer.variables, [variable, variable_2]) - self.assertEqual(layer.trainable_variables, [variable]) - self.assertEqual(layer.non_trainable_variables, [variable_2]) - - if not tf.executing_eagerly(): - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 1) - - regularizer = lambda x: tf.reduce_sum(x) * 1e-3 - _ = layer.add_weight( - 'reg_var', [2, 2], - initializer=tf.compat.v1.zeros_initializer(), - regularizer=regularizer) - self.assertEqual(len(layer.losses), 1) - - added_variable = [False] - - # Test that sync `ON_READ` variables are defaulted to be non-trainable. - variable_3 = layer.add_weight( - 'sync_on_read_var', [2, 2], - initializer=tf.compat.v1.zeros_initializer(), - synchronization=tf.VariableSynchronization.ON_READ, - aggregation=tf.compat.v1.VariableAggregation.SUM) - self.assertEqual(layer.non_trainable_variables, [variable_2, variable_3]) - - @tf.function - def function_adds_weight(): - if not added_variable[0]: - layer.add_weight( - 'reg_var_from_function', [2, 2], + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testLayerProperties(self): + layer = base_tf_layers.Layer(name="my_layer") + self.assertEqual(layer.variables, []) + self.assertEqual(layer.trainable_variables, []) + self.assertEqual(layer.non_trainable_variables, []) + if not tf.executing_eagerly(): + # updates, losses only supported in GRAPH mode + self.assertEqual(layer.updates, []) + self.assertEqual(layer.losses, []) + self.assertEqual(layer.built, False) + layer = base_tf_layers.Layer(name="my_layer", trainable=False) + self.assertEqual(layer.trainable, False) + + # Assert that the layer was not instrumented as a Keras layer + self.assertFalse(layer._instrumented_keras_api) + + # Assert this was instrumented as a legacy layer + self.assertTrue( + keras_base_layer.keras_api_gauge.get_cell("legacy_layer").value() + ) + keras_base_layer.keras_api_gauge.get_cell("legacy_layer").set(False) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInt64Layer(self): + layer = base_tf_layers.Layer(name="my_layer", dtype="int64") + layer.add_weight("my_var", [2, 2]) + self.assertEqual(layer.name, "my_layer") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testKerasStyleAddWeight(self): + keras_layer = keras_base_layer.Layer(name="keras_layer") + with backend.name_scope("foo"): + keras_variable = keras_layer.add_weight( + "my_var", [2, 2], initializer=tf.compat.v1.zeros_initializer() + ) + self.assertEqual(keras_variable.name, "foo/my_var:0") + + with backend.name_scope("baz"): + old_style_layer = base_tf_layers.Layer(name="my_layer") + # Test basic variable creation. + variable = old_style_layer.add_weight( + "my_var", [2, 2], initializer=tf.compat.v1.zeros_initializer() + ) + self.assertEqual(variable.name, "my_layer/my_var:0") + + with base_tf_layers.keras_style_scope(): + layer = base_tf_layers.Layer(name="my_layer") + # Assert that the layer was not instrumented as a Keras layer + self.assertFalse(layer._instrumented_keras_api) + # Test basic variable creation. + with backend.name_scope("bar"): + variable = layer.add_weight( + "my_var", [2, 2], initializer=tf.compat.v1.zeros_initializer() + ) + self.assertEqual(variable.name, "bar/my_var:0") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testAddWeight(self): + layer = base_tf_layers.Layer(name="my_layer") + + # Test basic variable creation. + variable = layer.add_weight( + "my_var", [2, 2], initializer=tf.compat.v1.zeros_initializer() + ) + self.assertEqual(variable.name, "my_layer/my_var:0") + self.assertEqual(layer.variables, [variable]) + self.assertEqual(layer.trainable_variables, [variable]) + self.assertEqual(layer.non_trainable_variables, []) + if not tf.executing_eagerly(): + self.assertEqual( + layer.variables, + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ), + ) + + # Test non-trainable variable creation. + # layer.add_variable should work even outside `build` and `call`. + variable_2 = layer.add_weight( + "non_trainable_var", + [2, 2], initializer=tf.compat.v1.zeros_initializer(), - regularizer=regularizer) - added_variable[0] = True - - function_adds_weight() - self.assertEqual(len(layer.losses), 2) - - def testInvalidTrainableSynchronizationCombination(self): - layer = base_tf_layers.Layer(name='my_layer') - - with self.assertRaisesRegex( - ValueError, 'Synchronization value can be set to ' - 'VariableSynchronization.ON_READ only for non-trainable variables. ' - 'You have specified trainable=True and ' - 'synchronization=VariableSynchronization.ON_READ.'): - _ = layer.add_weight( - 'v', [2, 2], - initializer=tf.compat.v1.zeros_initializer(), - synchronization=tf.VariableSynchronization.ON_READ, - trainable=True) - - def testReusePartitionedVariablesAndRegularizers(self): - with tf.Graph().as_default(): - regularizer = lambda x: tf.reduce_sum(x) * 1e-3 - partitioner = tf.compat.v1.fixed_size_partitioner(3) - for reuse in [False, True]: - with tf.compat.v1.variable_scope( - tf.compat.v1.get_variable_scope(), - partitioner=partitioner, - reuse=reuse): - layer = base_tf_layers.Layer(name='my_layer') - _ = layer.add_weight( - 'reg_part_var', [4, 4], - initializer=tf.compat.v1.zeros_initializer(), - regularizer=regularizer) - self.assertEqual( - len(tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testCall(self): - - class MyLayer(base_tf_layers.Layer): - - def call(self, inputs): - return tf.square(inputs) - - layer = MyLayer(name='my_layer') - inputs = tf.random.uniform((5,), seed=1) - outputs = layer(inputs) - self.assertEqual(layer.built, True) - if not tf.executing_eagerly(): - # op is only supported in GRAPH mode - self.assertEqual(outputs.op.name, 'my_layer/Square') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDeepCopy(self): - - class MyLayer(base_tf_layers.Layer): - - def call(self, inputs): - return tf.square(inputs) - - layer = MyLayer(name='my_layer') - layer._private_tensor = tf.random.uniform(()) - inputs = tf.random.uniform((5,), seed=1) - outputs = layer(inputs) - self.assertEqual(layer.built, True) - if not tf.executing_eagerly(): - # op only supported in GRAPH mode. - self.assertEqual(outputs.op.name, 'my_layer/Square') - - layer_copy = copy.deepcopy(layer) - self.assertEqual(layer_copy.name, layer.name) - self.assertEqual(layer_copy._scope.name, layer._scope.name) - self.assertEqual(layer_copy._private_tensor, layer._private_tensor) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testScopeNaming(self): - - class PrivateLayer(base_tf_layers.Layer): - - def call(self, inputs): - return inputs - - inputs = tf.random.uniform((5,)) - default_layer = PrivateLayer() - _ = default_layer(inputs) - self.assertEqual(default_layer._scope.name, 'private_layer') - default_layer1 = PrivateLayer() - default_layer1(inputs) - self.assertEqual(default_layer1._scope.name, 'private_layer_1') - my_layer = PrivateLayer(name='my_layer') - my_layer(inputs) - self.assertEqual(my_layer._scope.name, 'my_layer') - my_layer1 = PrivateLayer(name='my_layer') - my_layer1(inputs) - self.assertEqual(my_layer1._scope.name, 'my_layer_1') - my_layer2 = PrivateLayer(name='my_layer') - my_layer2(inputs) - self.assertEqual(my_layer2._scope.name, 'my_layer_2') - # Name scope shouldn't affect names. - with backend.name_scope('some_name_scope'): - default_layer2 = PrivateLayer() - default_layer2(inputs) - self.assertEqual(default_layer2._scope.name, 'private_layer_2') - my_layer3 = PrivateLayer(name='my_layer') - my_layer3(inputs) - self.assertEqual(my_layer3._scope.name, 'my_layer_3') - other_layer = PrivateLayer(name='other_layer') - other_layer(inputs) - self.assertEqual(other_layer._scope.name, 'other_layer') - # Variable scope gets added to scope names. - with tf.compat.v1.variable_scope('var_scope'): - default_layer_scoped = PrivateLayer() - default_layer_scoped(inputs) - self.assertEqual(default_layer_scoped._scope.name, - 'var_scope/private_layer') - my_layer_scoped = PrivateLayer(name='my_layer') - my_layer_scoped(inputs) - self.assertEqual(my_layer_scoped._scope.name, 'var_scope/my_layer') - my_layer_scoped1 = PrivateLayer(name='my_layer') - my_layer_scoped1(inputs) - self.assertEqual(my_layer_scoped1._scope.name, 'var_scope/my_layer_1') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputSpecNdimCheck(self): - - class CustomerLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = input_spec.InputSpec(ndim=2) - - def call(self, inputs): - return inputs - - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'expected ndim=2'): - layer(tf.constant([1])) - - # Note that we re-create the layer since in Eager mode, input spec checks - # only happen on first call. - # Works - layer = CustomerLayer() - layer(tf.constant([[1], [2]])) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputSpecMinNdimCheck(self): - - class CustomLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = input_spec.InputSpec(min_ndim=2) - - def call(self, inputs): - return inputs - - layer = CustomLayer() - with self.assertRaisesRegex(ValueError, r'expected min_ndim=2'): - layer(tf.constant([1])) - - # Works - layer = CustomLayer() - layer(tf.constant([[1], [2]])) - - layer = CustomLayer() - layer(tf.constant([[[1], [2]]])) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputSpecMaxNdimCheck(self): - - class CustomerLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = input_spec.InputSpec(max_ndim=2) - - def call(self, inputs): - return inputs - - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'expected max_ndim=2'): - layer(tf.constant([[[1], [2]]])) - - # Works - layer = CustomerLayer() - layer(tf.constant([1])) - - layer = CustomerLayer() - layer(tf.constant([[1], [2]])) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputSpecDtypeCheck(self): - - class CustomerLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = input_spec.InputSpec(dtype='float32') - - def call(self, inputs): - return inputs - - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'expected dtype=float32'): - layer(tf.constant(1, dtype=tf.int32)) - - # Works - layer = CustomerLayer() - layer(tf.constant(1.0, dtype=tf.float32)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputSpecAxesCheck(self): - - class CustomerLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = input_spec.InputSpec(axes={-1: 2}) - - def call(self, inputs): - return inputs - - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'expected axis'): - layer(tf.constant([1, 2, 3])) - - # Works - layer = CustomerLayer() - layer(tf.constant([1, 2])) - layer = CustomerLayer() - layer(tf.constant([[1, 2], [3, 4], [5, 6]])) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInputSpecShapeCheck(self): - - class CustomerLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = input_spec.InputSpec(shape=(None, 3)) - - def call(self, inputs): - return inputs - - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'expected shape'): - layer(tf.constant([[1, 2]])) - - # Works - layer = CustomerLayer() - layer(tf.constant([[1, 2, 3], [4, 5, 6]])) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoInputSpec(self): - - class CustomerLayer(base_tf_layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = None - - def call(self, inputs): - return inputs + trainable=False, + ) + self.assertEqual(layer.variables, [variable, variable_2]) + self.assertEqual(layer.trainable_variables, [variable]) + self.assertEqual(layer.non_trainable_variables, [variable_2]) + + if not tf.executing_eagerly(): + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + ), + 1, + ) + + regularizer = lambda x: tf.reduce_sum(x) * 1e-3 + _ = layer.add_weight( + "reg_var", + [2, 2], + initializer=tf.compat.v1.zeros_initializer(), + regularizer=regularizer, + ) + self.assertEqual(len(layer.losses), 1) - layer = CustomerLayer() - - layer(tf.constant(1)) - - # Works - if not tf.executing_eagerly(): - layer(tf.compat.v1.placeholder('int32')) - layer(tf.compat.v1.placeholder('int32', shape=(2, 3))) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_count_params(self): - dense = core_tf_layers.Dense(16) - dense.build((None, 4)) - self.assertEqual(dense.count_params(), 16 * 4 + 16) - - dense = core_tf_layers.Dense(16) - with self.assertRaises(ValueError): - dense.count_params() - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDictInputOutput(self): - - class DictLayer(base_tf_layers.Layer): - - def call(self, inputs): - return {'l' + key: inputs[key] for key in inputs} - - layer = DictLayer() - if tf.executing_eagerly(): - i1 = tf.constant(3) - i2 = tf.constant(4.0) - result = layer({'abel': i1, 'ogits': i2}) - self.assertTrue(isinstance(result, dict)) - self.assertEqual(set(['label', 'logits']), set(result.keys())) - self.assertEqual(3, result['label'].numpy()) - self.assertEqual(4.0, result['logits'].numpy()) - else: - i1 = tf.compat.v1.placeholder('int32') - i2 = tf.compat.v1.placeholder('float32') - result = layer({'abel': i1, 'ogits': i2}) - self.assertTrue(isinstance(result, dict)) - self.assertEqual(set(['label', 'logits']), set(result.keys())) - - def testActivityRegularizer(self): - with tf.Graph().as_default(): - regularizer = tf.reduce_sum - layer = base_tf_layers.Layer(activity_regularizer=regularizer) - x = tf.compat.v1.placeholder('int32') - layer(x) - self.assertEqual(len(layer.get_losses_for(x)), 1) - - def testNameScopeIsConsistentWithVariableScope(self): - # Github issue 13429. - - class MyLayer(base_tf_layers.Layer): - - def build(self, input_shape): - self.my_var = self.add_weight('my_var', (), tf.float32) - self.built = True - - def call(self, inputs): - return tf.multiply(inputs, self.my_var, name='my_op') - - def _gen_layer(x, name=None): - layer = MyLayer(name=name) - out = layer(x) - return layer, out - - # unnamed layer - with tf.Graph().as_default(): - x = tf.compat.v1.placeholder(tf.float32, (), 'x') - layer, op = _gen_layer(x) - layer1, op1 = _gen_layer(op) - layer2, op2 = _gen_layer(op1) - - self.assertEqual(layer.my_var.name, 'my_layer/my_var:0') - self.assertEqual(op.name, 'my_layer/my_op:0') - self.assertEqual(layer1.my_var.name, 'my_layer_1/my_var:0') - self.assertEqual(op1.name, 'my_layer_1/my_op:0') - self.assertEqual(layer2.my_var.name, 'my_layer_2/my_var:0') - self.assertEqual(op2.name, 'my_layer_2/my_op:0') - # name starts from zero - with tf.Graph().as_default(): - x = tf.compat.v1.placeholder(tf.float32, (), 'x') - layer, op = _gen_layer(x, name='name') - layer1, op1 = _gen_layer(op, name='name_1') - layer2, op2 = _gen_layer(op1, name='name_2') - - self.assertEqual(layer.my_var.name, 'name/my_var:0') - self.assertEqual(op.name, 'name/my_op:0') - self.assertEqual(layer1.my_var.name, 'name_1/my_var:0') - self.assertEqual(op1.name, 'name_1/my_op:0') - self.assertEqual(layer2.my_var.name, 'name_2/my_var:0') - self.assertEqual(op2.name, 'name_2/my_op:0') - # name starts from one - with tf.Graph().as_default(): - x = tf.compat.v1.placeholder(tf.float32, (), 'x') - layer, op = _gen_layer(x, name='name_1') - layer1, op1 = _gen_layer(op, name='name_2') - layer2, op2 = _gen_layer(op1, name='name_3') - - self.assertEqual(layer.my_var.name, 'name_1/my_var:0') - self.assertEqual(op.name, 'name_1/my_op:0') - self.assertEqual(layer1.my_var.name, 'name_2/my_var:0') - self.assertEqual(op1.name, 'name_2/my_op:0') - self.assertEqual(layer2.my_var.name, 'name_3/my_var:0') - self.assertEqual(op2.name, 'name_3/my_op:0') - - def testVariablesAreLiftedFromFunctionBuildingGraphs(self): - class MyLayer(base_tf_layers.Layer): - - def build(self, input_shape): - self.my_var = self.add_weight('my_var', (), tf.float32) - self.built = True - - def call(self, inputs): - return inputs + added_variable = [False] - outer_graph = tf.compat.v1.get_default_graph() - function_building_graph = tf.Graph() - function_building_graph._building_function = True - with outer_graph.as_default(): - with function_building_graph.as_default(): - layer = MyLayer() - # Create a variable by invoking build through __call__ and assert that - # it is both tracked and lifted into the outer graph. - inputs = tf.compat.v1.placeholder(tf.float32, (), 'inputs') - layer(inputs) - self.assertEqual(len(layer.variables), 1) - self.assertEqual(len(layer.trainable_variables), 1) - self.assertEqual(layer.variables[0].graph, outer_graph) - - def testGetUpdateFor(self): - - class MyLayer(base_tf_layers.Layer): - - def build(self, input_shape): - self.a = self.add_weight('a', - (), - tf.float32, - trainable=False) - self.b = self.add_weight('b', - (), - tf.float32, - trainable=False) - self.add_update(tf.compat.v1.assign_add(self.a, 1., name='b_update')) - self.built = True - - def call(self, inputs): - self.add_update( - tf.compat.v1.assign_add(self.a, inputs, name='a_update')) - return inputs + 1 - - with tf.Graph().as_default(): - layer = MyLayer() - inputs = tf.compat.v1.placeholder(tf.float32, (), 'inputs') - intermediate_inputs = inputs + 1 - outputs = layer(intermediate_inputs) - - self.assertEqual(len(layer.updates), 2) - self.assertEqual(len(layer.get_updates_for(None)), 1) - self.assertEqual(len(layer.get_updates_for([inputs])), 1) - self.assertEqual(len(layer.get_updates_for([intermediate_inputs])), 1) - self.assertEqual(len(layer.get_updates_for([outputs])), 0) - - # Call same layer on new input, creating one more conditional update - inputs = tf.compat.v1.placeholder(tf.float32, (), 'inputs') - intermediate_inputs = inputs + 1 - outputs = layer(intermediate_inputs) - - self.assertEqual(len(layer.updates), 3) - self.assertEqual(len(layer.get_updates_for(None)), 1) - # Check that we are successfully filtering out irrelevant updates - self.assertEqual(len(layer.get_updates_for([inputs])), 1) - self.assertEqual(len(layer.get_updates_for([intermediate_inputs])), 1) - self.assertEqual(len(layer.get_updates_for([outputs])), 0) - - def testGetLossesFor(self): - - class MyLayer(base_tf_layers.Layer): - - def build(self, input_shape): - self.a = self.add_weight('a', - (), - tf.float32, - trainable=False) - self.b = self.add_weight('b', - (), - tf.float32, - trainable=False) - self.add_loss(self.a) - self.built = True - - def call(self, inputs): - self.add_loss(inputs, inputs=True) - return inputs + 1 - - with tf.Graph().as_default(): - layer = MyLayer() - inputs = tf.compat.v1.placeholder(tf.float32, (), 'inputs') - intermediate_inputs = inputs + 1 - outputs = layer(intermediate_inputs) - - self.assertEqual(len(layer.losses), 2) - self.assertEqual(len(layer.get_losses_for(None)), 1) - self.assertEqual(len(layer.get_losses_for([inputs])), 1) - self.assertEqual(len(layer.get_losses_for([intermediate_inputs])), 1) - self.assertEqual(len(layer.get_losses_for([outputs])), 0) - - # Call same layer on new input, creating one more conditional loss - inputs = tf.compat.v1.placeholder(tf.float32, (), 'inputs') - intermediate_inputs = inputs + 1 - outputs = layer(intermediate_inputs) - - self.assertEqual(len(layer.losses), 3) - self.assertEqual(len(layer.get_losses_for(None)), 1) - # Check that we are successfully filtering out irrelevant losses - self.assertEqual(len(layer.get_losses_for([inputs])), 1) - self.assertEqual(len(layer.get_losses_for([intermediate_inputs])), 1) - self.assertEqual(len(layer.get_losses_for([outputs])), 0) + # Test that sync `ON_READ` variables are defaulted to be non-trainable. + variable_3 = layer.add_weight( + "sync_on_read_var", + [2, 2], + initializer=tf.compat.v1.zeros_initializer(), + synchronization=tf.VariableSynchronization.ON_READ, + aggregation=tf.compat.v1.VariableAggregation.SUM, + ) + self.assertEqual( + layer.non_trainable_variables, [variable_2, variable_3] + ) + + @tf.function + def function_adds_weight(): + if not added_variable[0]: + layer.add_weight( + "reg_var_from_function", + [2, 2], + initializer=tf.compat.v1.zeros_initializer(), + regularizer=regularizer, + ) + added_variable[0] = True + + function_adds_weight() + self.assertEqual(len(layer.losses), 2) + + def testInvalidTrainableSynchronizationCombination(self): + layer = base_tf_layers.Layer(name="my_layer") + + with self.assertRaisesRegex( + ValueError, + "Synchronization value can be set to " + "VariableSynchronization.ON_READ only for non-trainable variables. " + "You have specified trainable=True and " + "synchronization=VariableSynchronization.ON_READ.", + ): + _ = layer.add_weight( + "v", + [2, 2], + initializer=tf.compat.v1.zeros_initializer(), + synchronization=tf.VariableSynchronization.ON_READ, + trainable=True, + ) + + def testReusePartitionedVariablesAndRegularizers(self): + with tf.Graph().as_default(): + regularizer = lambda x: tf.reduce_sum(x) * 1e-3 + partitioner = tf.compat.v1.fixed_size_partitioner(3) + for reuse in [False, True]: + with tf.compat.v1.variable_scope( + tf.compat.v1.get_variable_scope(), + partitioner=partitioner, + reuse=reuse, + ): + layer = base_tf_layers.Layer(name="my_layer") + _ = layer.add_weight( + "reg_part_var", + [4, 4], + initializer=tf.compat.v1.zeros_initializer(), + regularizer=regularizer, + ) + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + ), + 3, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testCall(self): + class MyLayer(base_tf_layers.Layer): + def call(self, inputs): + return tf.square(inputs) + + layer = MyLayer(name="my_layer") + inputs = tf.random.uniform((5,), seed=1) + outputs = layer(inputs) + self.assertEqual(layer.built, True) + if not tf.executing_eagerly(): + # op is only supported in GRAPH mode + self.assertEqual(outputs.op.name, "my_layer/Square") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDeepCopy(self): + class MyLayer(base_tf_layers.Layer): + def call(self, inputs): + return tf.square(inputs) + + layer = MyLayer(name="my_layer") + layer._private_tensor = tf.random.uniform(()) + inputs = tf.random.uniform((5,), seed=1) + outputs = layer(inputs) + self.assertEqual(layer.built, True) + if not tf.executing_eagerly(): + # op only supported in GRAPH mode. + self.assertEqual(outputs.op.name, "my_layer/Square") + + layer_copy = copy.deepcopy(layer) + self.assertEqual(layer_copy.name, layer.name) + self.assertEqual(layer_copy._scope.name, layer._scope.name) + self.assertEqual(layer_copy._private_tensor, layer._private_tensor) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testScopeNaming(self): + class PrivateLayer(base_tf_layers.Layer): + def call(self, inputs): + return inputs + + inputs = tf.random.uniform((5,)) + default_layer = PrivateLayer() + _ = default_layer(inputs) + self.assertEqual(default_layer._scope.name, "private_layer") + default_layer1 = PrivateLayer() + default_layer1(inputs) + self.assertEqual(default_layer1._scope.name, "private_layer_1") + my_layer = PrivateLayer(name="my_layer") + my_layer(inputs) + self.assertEqual(my_layer._scope.name, "my_layer") + my_layer1 = PrivateLayer(name="my_layer") + my_layer1(inputs) + self.assertEqual(my_layer1._scope.name, "my_layer_1") + my_layer2 = PrivateLayer(name="my_layer") + my_layer2(inputs) + self.assertEqual(my_layer2._scope.name, "my_layer_2") + # Name scope shouldn't affect names. + with backend.name_scope("some_name_scope"): + default_layer2 = PrivateLayer() + default_layer2(inputs) + self.assertEqual(default_layer2._scope.name, "private_layer_2") + my_layer3 = PrivateLayer(name="my_layer") + my_layer3(inputs) + self.assertEqual(my_layer3._scope.name, "my_layer_3") + other_layer = PrivateLayer(name="other_layer") + other_layer(inputs) + self.assertEqual(other_layer._scope.name, "other_layer") + # Variable scope gets added to scope names. + with tf.compat.v1.variable_scope("var_scope"): + default_layer_scoped = PrivateLayer() + default_layer_scoped(inputs) + self.assertEqual( + default_layer_scoped._scope.name, "var_scope/private_layer" + ) + my_layer_scoped = PrivateLayer(name="my_layer") + my_layer_scoped(inputs) + self.assertEqual(my_layer_scoped._scope.name, "var_scope/my_layer") + my_layer_scoped1 = PrivateLayer(name="my_layer") + my_layer_scoped1(inputs) + self.assertEqual( + my_layer_scoped1._scope.name, "var_scope/my_layer_1" + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputSpecNdimCheck(self): + class CustomerLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = input_spec.InputSpec(ndim=2) + + def call(self, inputs): + return inputs + + layer = CustomerLayer() + with self.assertRaisesRegex(ValueError, r"expected ndim=2"): + layer(tf.constant([1])) + + # Note that we re-create the layer since in Eager mode, input spec + # checks only happen on first call. + # Works + layer = CustomerLayer() + layer(tf.constant([[1], [2]])) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputSpecMinNdimCheck(self): + class CustomLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = input_spec.InputSpec(min_ndim=2) + + def call(self, inputs): + return inputs + + layer = CustomLayer() + with self.assertRaisesRegex(ValueError, r"expected min_ndim=2"): + layer(tf.constant([1])) + + # Works + layer = CustomLayer() + layer(tf.constant([[1], [2]])) + + layer = CustomLayer() + layer(tf.constant([[[1], [2]]])) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputSpecMaxNdimCheck(self): + class CustomerLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = input_spec.InputSpec(max_ndim=2) + + def call(self, inputs): + return inputs + + layer = CustomerLayer() + with self.assertRaisesRegex(ValueError, r"expected max_ndim=2"): + layer(tf.constant([[[1], [2]]])) + + # Works + layer = CustomerLayer() + layer(tf.constant([1])) + + layer = CustomerLayer() + layer(tf.constant([[1], [2]])) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputSpecDtypeCheck(self): + class CustomerLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = input_spec.InputSpec(dtype="float32") + + def call(self, inputs): + return inputs + + layer = CustomerLayer() + with self.assertRaisesRegex(ValueError, r"expected dtype=float32"): + layer(tf.constant(1, dtype=tf.int32)) + + # Works + layer = CustomerLayer() + layer(tf.constant(1.0, dtype=tf.float32)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputSpecAxesCheck(self): + class CustomerLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = input_spec.InputSpec(axes={-1: 2}) + + def call(self, inputs): + return inputs + + layer = CustomerLayer() + with self.assertRaisesRegex(ValueError, r"expected axis"): + layer(tf.constant([1, 2, 3])) + + # Works + layer = CustomerLayer() + layer(tf.constant([1, 2])) + layer = CustomerLayer() + layer(tf.constant([[1, 2], [3, 4], [5, 6]])) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInputSpecShapeCheck(self): + class CustomerLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = input_spec.InputSpec(shape=(None, 3)) + + def call(self, inputs): + return inputs + + layer = CustomerLayer() + with self.assertRaisesRegex(ValueError, r"expected shape"): + layer(tf.constant([[1, 2]])) + + # Works + layer = CustomerLayer() + layer(tf.constant([[1, 2, 3], [4, 5, 6]])) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoInputSpec(self): + class CustomerLayer(base_tf_layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = None + + def call(self, inputs): + return inputs + + layer = CustomerLayer() + + layer(tf.constant(1)) + + # Works + if not tf.executing_eagerly(): + layer(tf.compat.v1.placeholder("int32")) + layer(tf.compat.v1.placeholder("int32", shape=(2, 3))) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_count_params(self): + dense = core_tf_layers.Dense(16) + dense.build((None, 4)) + self.assertEqual(dense.count_params(), 16 * 4 + 16) + + dense = core_tf_layers.Dense(16) + with self.assertRaises(ValueError): + dense.count_params() + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDictInputOutput(self): + class DictLayer(base_tf_layers.Layer): + def call(self, inputs): + return {"l" + key: inputs[key] for key in inputs} + + layer = DictLayer() + if tf.executing_eagerly(): + i1 = tf.constant(3) + i2 = tf.constant(4.0) + result = layer({"abel": i1, "ogits": i2}) + self.assertTrue(isinstance(result, dict)) + self.assertEqual(set(["label", "logits"]), set(result.keys())) + self.assertEqual(3, result["label"].numpy()) + self.assertEqual(4.0, result["logits"].numpy()) + else: + i1 = tf.compat.v1.placeholder("int32") + i2 = tf.compat.v1.placeholder("float32") + result = layer({"abel": i1, "ogits": i2}) + self.assertTrue(isinstance(result, dict)) + self.assertEqual(set(["label", "logits"]), set(result.keys())) + + def testActivityRegularizer(self): + with tf.Graph().as_default(): + regularizer = tf.reduce_sum + layer = base_tf_layers.Layer(activity_regularizer=regularizer) + x = tf.compat.v1.placeholder("int32") + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) + + def testNameScopeIsConsistentWithVariableScope(self): + # GitHub issue 13429. + + class MyLayer(base_tf_layers.Layer): + def build(self, input_shape): + self.my_var = self.add_weight("my_var", (), tf.float32) + self.built = True + + def call(self, inputs): + return tf.multiply(inputs, self.my_var, name="my_op") + + def _gen_layer(x, name=None): + layer = MyLayer(name=name) + out = layer(x) + return layer, out + + # unnamed layer + with tf.Graph().as_default(): + x = tf.compat.v1.placeholder(tf.float32, (), "x") + layer, op = _gen_layer(x) + layer1, op1 = _gen_layer(op) + layer2, op2 = _gen_layer(op1) + + self.assertEqual(layer.my_var.name, "my_layer/my_var:0") + self.assertEqual(op.name, "my_layer/my_op:0") + self.assertEqual(layer1.my_var.name, "my_layer_1/my_var:0") + self.assertEqual(op1.name, "my_layer_1/my_op:0") + self.assertEqual(layer2.my_var.name, "my_layer_2/my_var:0") + self.assertEqual(op2.name, "my_layer_2/my_op:0") + # name starts from zero + with tf.Graph().as_default(): + x = tf.compat.v1.placeholder(tf.float32, (), "x") + layer, op = _gen_layer(x, name="name") + layer1, op1 = _gen_layer(op, name="name_1") + layer2, op2 = _gen_layer(op1, name="name_2") + + self.assertEqual(layer.my_var.name, "name/my_var:0") + self.assertEqual(op.name, "name/my_op:0") + self.assertEqual(layer1.my_var.name, "name_1/my_var:0") + self.assertEqual(op1.name, "name_1/my_op:0") + self.assertEqual(layer2.my_var.name, "name_2/my_var:0") + self.assertEqual(op2.name, "name_2/my_op:0") + # name starts from one + with tf.Graph().as_default(): + x = tf.compat.v1.placeholder(tf.float32, (), "x") + layer, op = _gen_layer(x, name="name_1") + layer1, op1 = _gen_layer(op, name="name_2") + layer2, op2 = _gen_layer(op1, name="name_3") + + self.assertEqual(layer.my_var.name, "name_1/my_var:0") + self.assertEqual(op.name, "name_1/my_op:0") + self.assertEqual(layer1.my_var.name, "name_2/my_var:0") + self.assertEqual(op1.name, "name_2/my_op:0") + self.assertEqual(layer2.my_var.name, "name_3/my_var:0") + self.assertEqual(op2.name, "name_3/my_op:0") + + def testVariablesAreLiftedFromFunctionBuildingGraphs(self): + class MyLayer(base_tf_layers.Layer): + def build(self, input_shape): + self.my_var = self.add_weight("my_var", (), tf.float32) + self.built = True + + def call(self, inputs): + return inputs + + outer_graph = tf.compat.v1.get_default_graph() + function_building_graph = tf.Graph() + function_building_graph._building_function = True + with outer_graph.as_default(): + with function_building_graph.as_default(): + layer = MyLayer() + # Create a variable by invoking build through __call__ and + # assert that it is both tracked and lifted into the outer + # graph. + inputs = tf.compat.v1.placeholder(tf.float32, (), "inputs") + layer(inputs) + self.assertEqual(len(layer.variables), 1) + self.assertEqual(len(layer.trainable_variables), 1) + self.assertEqual(layer.variables[0].graph, outer_graph) + + def testGetUpdateFor(self): + class MyLayer(base_tf_layers.Layer): + def build(self, input_shape): + self.a = self.add_weight("a", (), tf.float32, trainable=False) + self.b = self.add_weight("b", (), tf.float32, trainable=False) + self.add_update( + tf.compat.v1.assign_add(self.a, 1.0, name="b_update") + ) + self.built = True + + def call(self, inputs): + self.add_update( + tf.compat.v1.assign_add(self.a, inputs, name="a_update") + ) + return inputs + 1 + + with tf.Graph().as_default(): + layer = MyLayer() + inputs = tf.compat.v1.placeholder(tf.float32, (), "inputs") + intermediate_inputs = inputs + 1 + outputs = layer(intermediate_inputs) + + self.assertEqual(len(layer.updates), 2) + self.assertEqual(len(layer.get_updates_for(None)), 1) + self.assertEqual(len(layer.get_updates_for([inputs])), 1) + self.assertEqual( + len(layer.get_updates_for([intermediate_inputs])), 1 + ) + self.assertEqual(len(layer.get_updates_for([outputs])), 0) + + # Call same layer on new input, creating one more conditional update + inputs = tf.compat.v1.placeholder(tf.float32, (), "inputs") + intermediate_inputs = inputs + 1 + outputs = layer(intermediate_inputs) + + self.assertEqual(len(layer.updates), 3) + self.assertEqual(len(layer.get_updates_for(None)), 1) + # Check that we are successfully filtering out irrelevant updates + self.assertEqual(len(layer.get_updates_for([inputs])), 1) + self.assertEqual( + len(layer.get_updates_for([intermediate_inputs])), 1 + ) + self.assertEqual(len(layer.get_updates_for([outputs])), 0) + + def testGetLossesFor(self): + class MyLayer(base_tf_layers.Layer): + def build(self, input_shape): + self.a = self.add_weight("a", (), tf.float32, trainable=False) + self.b = self.add_weight("b", (), tf.float32, trainable=False) + self.add_loss(self.a) + self.built = True + + def call(self, inputs): + self.add_loss(inputs, inputs=True) + return inputs + 1 + + with tf.Graph().as_default(): + layer = MyLayer() + inputs = tf.compat.v1.placeholder(tf.float32, (), "inputs") + intermediate_inputs = inputs + 1 + outputs = layer(intermediate_inputs) + + self.assertEqual(len(layer.losses), 2) + self.assertEqual(len(layer.get_losses_for(None)), 1) + self.assertEqual(len(layer.get_losses_for([inputs])), 1) + self.assertEqual( + len(layer.get_losses_for([intermediate_inputs])), 1 + ) + self.assertEqual(len(layer.get_losses_for([outputs])), 0) + + # Call same layer on new input, creating one more conditional loss + inputs = tf.compat.v1.placeholder(tf.float32, (), "inputs") + intermediate_inputs = inputs + 1 + outputs = layer(intermediate_inputs) + + self.assertEqual(len(layer.losses), 3) + self.assertEqual(len(layer.get_losses_for(None)), 1) + # Check that we are successfully filtering out irrelevant losses + self.assertEqual(len(layer.get_losses_for([inputs])), 1) + self.assertEqual( + len(layer.get_losses_for([intermediate_inputs])), 1 + ) + self.assertEqual(len(layer.get_losses_for([outputs])), 0) class IdentityLayer(base_tf_layers.Layer): - """A layer returns the identity of it's input.""" + """A layer returns the identity of it's input.""" - def call(self, inputs): - return inputs + def call(self, inputs): + return inputs -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class DTypeTest(tf.test.TestCase, parameterized.TestCase): - - def _const(self, dtype): - return tf.constant(1, dtype=dtype) - - def test_dtype_inferred_from_input(self): - # Test with Tensor input - layer = IdentityLayer() - self.assertIsNone(layer.dtype) - layer(self._const('float64')) - self.assertEqual(layer.dtype, 'float64') - - # Test with Numpy input - layer = IdentityLayer() - self.assertIsNone(layer.dtype) - layer(np.array(1., dtype='float64')) - self.assertEqual(layer.dtype, 'float64') - - # Test with integer input - layer = IdentityLayer() - self.assertIsNone(layer.dtype) - layer(self._const('int32')) - self.assertEqual(layer.dtype, 'int32') - - # Test layer dtype doesn't change when passed a new dtype - layer = IdentityLayer() - self.assertIsNone(layer.dtype) - layer(self._const('float64')) - self.assertEqual(layer.dtype, 'float64') - layer(self._const('float16')) - self.assertEqual(layer.dtype, 'float64') - - # Test layer dtype inferred from first input - layer = IdentityLayer() - layer([self._const('float32'), self._const('float64')]) - self.assertEqual(layer.dtype, 'float32') - - def test_passing_dtype_to_constructor(self): - layer = IdentityLayer(dtype='float64') - layer(self._const('float32')) - self.assertEqual(layer.dtype, 'float64') - - layer = IdentityLayer(dtype='int32') - layer(self._const('float32')) - self.assertEqual(layer.dtype, 'int32') - - layer = IdentityLayer(dtype=tf.float64) - layer(self._const('float32')) - self.assertEqual(layer.dtype, 'float64') - - def test_inputs_not_casted(self): - layer = IdentityLayer(dtype='float32') - self.assertEqual(layer(self._const('float64')).dtype, 'float64') - - -if __name__ == '__main__': - tf.test.main() + def _const(self, dtype): + return tf.constant(1, dtype=dtype) + + def test_dtype_inferred_from_input(self): + # Test with Tensor input + layer = IdentityLayer() + self.assertIsNone(layer.dtype) + layer(self._const("float64")) + self.assertEqual(layer.dtype, "float64") + + # Test with Numpy input + layer = IdentityLayer() + self.assertIsNone(layer.dtype) + layer(np.array(1.0, dtype="float64")) + self.assertEqual(layer.dtype, "float64") + + # Test with integer input + layer = IdentityLayer() + self.assertIsNone(layer.dtype) + layer(self._const("int32")) + self.assertEqual(layer.dtype, "int32") + + # Test layer dtype doesn't change when passed a new dtype + layer = IdentityLayer() + self.assertIsNone(layer.dtype) + layer(self._const("float64")) + self.assertEqual(layer.dtype, "float64") + layer(self._const("float16")) + self.assertEqual(layer.dtype, "float64") + + # Test layer dtype inferred from first input + layer = IdentityLayer() + layer([self._const("float32"), self._const("float64")]) + self.assertEqual(layer.dtype, "float32") + + def test_passing_dtype_to_constructor(self): + layer = IdentityLayer(dtype="float64") + layer(self._const("float32")) + self.assertEqual(layer.dtype, "float64") + + layer = IdentityLayer(dtype="int32") + layer(self._const("float32")) + self.assertEqual(layer.dtype, "int32") + + layer = IdentityLayer(dtype=tf.float64) + layer(self._const("float32")) + self.assertEqual(layer.dtype, "float64") + + def test_inputs_not_casted(self): + layer = IdentityLayer(dtype="float32") + self.assertEqual(layer(self._const("float64")).dtype, "float64") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/legacy_tf_layers/convolutional.py b/keras/legacy_tf_layers/convolutional.py index 5eeb440ad7cf..735553e45a48 100644 --- a/keras/legacy_tf_layers/convolutional.py +++ b/keras/legacy_tf_layers/convolutional.py @@ -12,125 +12,273 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -# pylint: disable=g-classes-have-attributes + """Contains the convolutional layer classes and their functional aliases.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import warnings +import tensorflow.compat.v2 as tf + from keras import layers as keras_layers from keras.legacy_tf_layers import base + +# isort: off from tensorflow.python.util.tf_export import keras_export -from tensorflow.python.util.tf_export import tf_export -@keras_export(v1=['keras.__internal__.legacy.layers.Conv1D']) -@tf_export(v1=['layers.Conv1D']) +@keras_export(v1=["keras.__internal__.legacy.layers.Conv1D"]) class Conv1D(keras_layers.Conv1D, base.Layer): - """1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.Conv1D(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.Conv1D(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + """1D convolution layer (e.g. temporal convolution). + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of a single integer, specifying the + length of the 1D convolution window. + strides: An integer or tuple/list of a single integer, + specifying the stride length of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + dilation_rate: An integer or tuple/list of a single integer, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.Conv1D(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.Conv1D(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format="channels_last", + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.conv1d"]) +def conv1d( + inputs, + filters, + kernel_size, + strides=1, + padding="valid", + data_format="channels_last", + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for 1D convolution (e.g. temporal convolution). + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Args: + inputs: Tensor input. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of a single integer, specifying the + length of the 1D convolution window. + strides: An integer or tuple/list of a single integer, + specifying the stride length of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + dilation_rate: An integer or tuple/list of a single integer, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.conv1d(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Conv1D(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.conv1d` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.Conv1D` instead.", + stacklevel=2, + ) + layer = Conv1D( filters=filters, kernel_size=kernel_size, strides=strides, @@ -147,264 +295,278 @@ def __init__(self, filters, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, trainable=trainable, - name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.conv1d']) -@tf_export(v1=['layers.conv1d']) -def conv1d(inputs, - filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for 1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.conv1d(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Conv1D(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.conv1d` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.Conv1D` instead.', - stacklevel=2) - layer = Conv1D( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.Conv2D']) -@tf_export(v1=['layers.Conv2D']) + name=name, + _reuse=reuse, + _scope=name, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.Conv2D"]) class Conv2D(keras_layers.Conv2D, base.Layer): - """2D convolution layer (e.g. spatial convolution over images). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.Conv2D(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.Conv2D(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + """2D convolution layer (e.g. spatial convolution over images). + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of 2 integers, specifying the + height and width of the 2D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution along the height and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.Conv2D(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.Conv2D(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format="channels_last", + dilation_rate=(1, 1), + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.conv2d"]) +def conv2d( + inputs, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format="channels_last", + dilation_rate=(1, 1), + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for the 2D convolution layer. + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Args: + inputs: Tensor input. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of 2 integers, specifying the + height and width of the 2D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution along the height and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.conv2d(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Conv2D(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.conv2d` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.Conv2D` instead.", + stacklevel=2, + ) + layer = Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, @@ -421,272 +583,280 @@ def __init__(self, filters, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, trainable=trainable, - name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.conv2d']) -@tf_export(v1=['layers.conv2d']) -def conv2d(inputs, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for the 2D convolution layer. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.conv2d(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Conv2D(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.conv2d` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.Conv2D` instead.', - stacklevel=2) - layer = Conv2D( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.Conv3D']) -@tf_export(v1=['layers.Conv3D']) + name=name, + _reuse=reuse, + _scope=name, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.Conv3D"]) class Conv3D(keras_layers.Conv3D, base.Layer): - """3D convolution layer (e.g. spatial convolution over volumes). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, - height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - dilation_rate: An integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv3D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.Conv3D(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.Conv3D(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1, 1), - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + """3D convolution layer (e.g. spatial convolution over volumes). + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of 3 integers, specifying the + depth, height and width of the 3D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the convolution along the depth, + height and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + dilation_rate: An integer or tuple/list of 3 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv3D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.Conv3D(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.Conv3D(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format="channels_last", + dilation_rate=(1, 1, 1), + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.conv3d"]) +def conv3d( + inputs, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format="channels_last", + dilation_rate=(1, 1, 1), + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for the 3D convolution layer. + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Args: + inputs: Tensor input. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of 3 integers, specifying the + depth, height and width of the 3D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the convolution along the depth, + height and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + dilation_rate: An integer or tuple/list of 3 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv3D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.conv3d(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Conv3D(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.conv3d` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.Conv3D` instead.", + stacklevel=2, + ) + layer = Conv3D( filters=filters, kernel_size=kernel_size, strides=strides, @@ -703,279 +873,453 @@ def __init__(self, filters, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, trainable=trainable, - name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.conv3d']) -@tf_export(v1=['layers.conv3d']) -def conv3d(inputs, - filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1, 1), - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for the 3D convolution layer. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, - height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - dilation_rate: An integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Conv3D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.conv3d(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Conv3D(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.conv3d` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.Conv3D` instead.', - stacklevel=2) - layer = Conv3D( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.SeparableConv1D']) -@tf_export(v1=['layers.SeparableConv1D']) + name=name, + _reuse=reuse, + _scope=name, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.SeparableConv1D"]) class SeparableConv1D(keras_layers.SeparableConv1D, base.Layer): - """Depthwise separable 1D convolution. - - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A single integer specifying the spatial - dimensions of the filters. - strides: A single integer specifying the strides - of the convolution. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - dilation_rate: A single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel. - pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel. - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer`. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.SeparableConv1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.SeparableConv1D(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.SeparableConv1D(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer=None, - pointwise_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + """Depthwise separable 1D convolution. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. + It then optionally applies an activation function to produce the final + output. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A single integer specifying the spatial + dimensions of the filters. + strides: A single integer specifying the strides + of the convolution. + Specifying any `stride` value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + dilation_rate: A single integer, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution + kernel. + pointwise_initializer: An initializer for the pointwise convolution + kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.SeparableConv1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.SeparableConv1D(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.SeparableConv1D(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format="channels_last", + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer=None, + pointwise_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + pointwise_initializer=pointwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + pointwise_regularizer=pointwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + pointwise_constraint=pointwise_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.SeparableConv2D"]) +class SeparableConv2D(keras_layers.SeparableConv2D, base.Layer): + """Depthwise separable 2D convolution. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. It then optionally applies an + activation function to produce the final output. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of 2 integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of 2 positive integers specifying the strides + of the convolution. Can be a single integer to specify the same value + for all spatial dimensions. + Specifying any `stride` value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution + kernel. + pointwise_initializer: An initializer for the pointwise convolution + kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.SeparableConv2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.SeparableConv2D(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.SeparableConv2D(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format="channels_last", + dilation_rate=(1, 1), + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer=None, + pointwise_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + pointwise_initializer=pointwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + pointwise_regularizer=pointwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + pointwise_constraint=pointwise_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.separable_conv1d"]) +def separable_conv1d( + inputs, + filters, + kernel_size, + strides=1, + padding="valid", + data_format="channels_last", + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer=None, + pointwise_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for the depthwise separable 1D convolution layer. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. It then optionally applies an + activation function to produce the final output. + + Args: + inputs: Input tensor. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A single integer specifying the spatial + dimensions of the filters. + strides: A single integer specifying the strides + of the convolution. + Specifying any `stride` value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + dilation_rate: A single integer, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution + kernel. + pointwise_initializer: An initializer for the pointwise convolution + kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.SeparableConv1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.separable_conv1d(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.SeparableConv1D(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.separable_conv1d` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.SeparableConv1D` instead.", + stacklevel=2, + ) + layer = SeparableConv1D( filters=filters, kernel_size=kernel_size, strides=strides, @@ -997,132 +1341,156 @@ def __init__(self, filters, bias_constraint=bias_constraint, trainable=trainable, name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.SeparableConv2D']) -@tf_export(v1=['layers.SeparableConv2D']) -class SeparableConv2D(keras_layers.SeparableConv2D, base.Layer): - """Depthwise separable 2D convolution. - - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of 2 integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of 2 positive integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel. - pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel. - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer`. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.SeparableConv2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.SeparableConv2D(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.SeparableConv2D(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer=None, - pointwise_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + _reuse=reuse, + _scope=name, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.separable_conv2d"]) +def separable_conv2d( + inputs, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format="channels_last", + dilation_rate=(1, 1), + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer=None, + pointwise_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for the depthwise separable 2D convolution layer. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. It then optionally applies an + activation function to produce the final output. + + Args: + inputs: Input tensor. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of 2 integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of 2 positive integers specifying the strides + of the convolution. Can be a single integer to specify the same value + for all spatial dimensions. Specifying any `stride` value != 1 is + incompatible with specifying any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution + kernel. + pointwise_initializer: An initializer for the pointwise convolution + kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.SeparableConv2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.separable_conv2d(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.SeparableConv2D(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.separable_conv2d` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.SeparableConv2D` instead.", + stacklevel=2, + ) + layer = SeparableConv2D( filters=filters, kernel_size=kernel_size, strides=strides, @@ -1144,439 +1512,256 @@ def __init__(self, filters, bias_constraint=bias_constraint, trainable=trainable, name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.separable_conv1d']) -@tf_export(v1=['layers.separable_conv1d']) -def separable_conv1d(inputs, - filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer=None, - pointwise_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for the depthwise separable 1D convolution layer. - - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. - - Args: - inputs: Input tensor. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A single integer specifying the spatial - dimensions of the filters. - strides: A single integer specifying the strides - of the convolution. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - dilation_rate: A single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel. - pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel. - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer`. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.SeparableConv1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.separable_conv1d(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.SeparableConv1D(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.separable_conv1d` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.SeparableConv1D` instead.', - stacklevel=2) - layer = SeparableConv1D( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=activation, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - pointwise_initializer=pointwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - pointwise_regularizer=pointwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - pointwise_constraint=pointwise_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.separable_conv2d']) -@tf_export(v1=['layers.separable_conv2d']) -def separable_conv2d(inputs, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer=None, - pointwise_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for the depthwise separable 2D convolution layer. - - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. - - Args: - inputs: Input tensor. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of 2 integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of 2 positive integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel. - pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel. - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer`. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.SeparableConv2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.separable_conv2d(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.SeparableConv2D(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.separable_conv2d` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.SeparableConv2D` instead.', - stacklevel=2) - layer = SeparableConv2D( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=activation, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - pointwise_initializer=pointwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - pointwise_regularizer=pointwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - pointwise_constraint=pointwise_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.Conv2DTranspose']) -@tf_export(v1=['layers.Conv2DTranspose']) + _reuse=reuse, + _scope=name, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.Conv2DTranspose"]) class Conv2DTranspose(keras_layers.Conv2DTranspose, base.Layer): - """Transposed 2D convolution layer (sometimes called 2D Deconvolution). - - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of 2 positive integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of 2 positive integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.Conv2DTranspose`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.Conv2DTranspose(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.Conv2DTranspose(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format='channels_last', - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + """Transposed 2D convolution layer (sometimes called 2D Deconvolution). + + The need for transposed convolutions generally arises + from the desire to use a transformation going in the opposite direction + of a normal convolution, i.e., from something that has the shape of the + output of some convolution to something that has the shape of its input + while maintaining a connectivity pattern that is compatible with + said convolution. + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of 2 positive integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of 2 positive integers specifying the strides + of the convolution. Can be a single integer to specify the same value + for all spatial dimensions. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.Conv2DTranspose`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.Conv2DTranspose(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.Conv2DTranspose(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format="channels_last", + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.conv2d_transpose"]) +def conv2d_transpose( + inputs, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format="channels_last", + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for transposed 2D convolution layer. + + The need for transposed convolutions generally arises + from the desire to use a transformation going in the opposite direction + of a normal convolution, i.e., from something that has the shape of the + output of some convolution to something that has the shape of its input + while maintaining a connectivity pattern that is compatible with + said convolution. + + Args: + inputs: Input tensor. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of 2 positive integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of 2 positive integers specifying the strides + of the convolution. Can be a single integer to specify the same value + for all spatial dimensions. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + activation: Activation function. Set it to `None` to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If `None`, the + default initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.Conv2DTranspose`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.conv2d_transpose(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Conv2DTranspose(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.conv2d_transpose` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.Conv2DTranspose` instead.", + stacklevel=2, + ) + layer = Conv2DTranspose( filters=filters, kernel_size=kernel_size, strides=strides, @@ -1593,247 +1778,246 @@ def __init__(self, filters, bias_constraint=bias_constraint, trainable=trainable, name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.conv2d_transpose']) -@tf_export(v1=['layers.conv2d_transpose']) -def conv2d_transpose(inputs, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format='channels_last', - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for transposed 2D convolution layer. - - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. - - Args: - inputs: Input tensor. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of 2 positive integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of 2 positive integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - activation: Activation function. Set it to `None` to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If `None`, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.Conv2DTranspose`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.conv2d_transpose(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Conv2DTranspose(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.conv2d_transpose` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.Conv2DTranspose` instead.', - stacklevel=2) - layer = Conv2DTranspose( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.Conv3DTranspose']) -@tf_export(v1=['layers.Conv3DTranspose']) + _reuse=reuse, + _scope=name, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.Conv3DTranspose"]) class Conv3DTranspose(keras_layers.Conv3DTranspose, base.Layer): - """Transposed 3D convolution layer (sometimes called 3D Deconvolution). - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for all spatial - dimensions. - strides: An integer or tuple/list of 3 integers, specifying the strides - of the convolution along the depth, height and width. - Can be a single integer to specify the same value for all spatial - dimensions. - padding: One of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - activation: Activation function. Set it to `None` to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If `None`, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.Conv3DTranspose`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - conv = tf.compat.v1.layers.Conv3DTranspose(filters=3, kernel_size=3) - ``` - - After: - - ```python - conv = tf.keras.layers.Conv3DTranspose(filters=3, kernels_size=3) - ``` - @end_compatibility - """ - - def __init__(self, - filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format='channels_last', - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__( + """Transposed 3D convolution layer (sometimes called 3D Deconvolution). + + Args: + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of 3 integers, specifying the + depth, height and width of the 3D convolution window. + Can be a single integer to specify the same value for all spatial + dimensions. + strides: An integer or tuple/list of 3 integers, specifying the strides + of the convolution along the depth, height and width. + Can be a single integer to specify the same value for all spatial + dimensions. + padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + activation: Activation function. Set it to `None` to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If `None`, the + default initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.Conv3DTranspose`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + conv = tf.compat.v1.layers.Conv3DTranspose(filters=3, kernel_size=3) + ``` + + After: + + ```python + conv = tf.keras.layers.Conv3DTranspose(filters=3, kernels_size=3) + ``` + @end_compatibility + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format="channels_last", + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.conv3d_transpose"]) +def conv3d_transpose( + inputs, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format="channels_last", + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + reuse=None, +): + """Functional interface for transposed 3D convolution layer. + + Args: + inputs: Input tensor. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of 3 positive integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of 3 positive integers specifying the strides + of the convolution. Can be a single integer to specify the same value + for all spatial dimensions. + padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.Conv3DTranspose`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.conv3d_transpose(x, filters=3, kernel_size=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Conv3DTranspose(filters=3, kernels_size=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.conv3d_transpose` is deprecated and " + "will be removed in a future version. " + "Please Use `tf.keras.layers.Conv3DTranspose` instead.", + stacklevel=2, + ) + layer = Conv3DTranspose( filters=filters, kernel_size=kernel_size, strides=strides, @@ -1850,141 +2034,10 @@ def __init__(self, bias_constraint=bias_constraint, trainable=trainable, name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.conv3d_transpose']) -@tf_export(v1=['layers.conv3d_transpose']) -def conv3d_transpose(inputs, - filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format='channels_last', - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None): - """Functional interface for transposed 3D convolution layer. - - Args: - inputs: Input tensor. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of 3 positive integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of 3 positive integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - padding: one of `"valid"` or `"same"` (case-insensitive). - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.Conv3DTranspose`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.conv3d_transpose(x, filters=3, kernel_size=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Conv3DTranspose(filters=3, kernels_size=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.conv3d_transpose` is deprecated and ' - 'will be removed in a future version. ' - 'Please Use `tf.keras.layers.Conv3DTranspose` instead.', - stacklevel=2) - layer = Conv3DTranspose( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs) + _reuse=reuse, + _scope=name, + ) + return layer(inputs) # Aliases diff --git a/keras/legacy_tf_layers/convolutional_test.py b/keras/legacy_tf_layers/convolutional_test.py index 19d4a671048e..296aef07d981 100644 --- a/keras/legacy_tf_layers/convolutional_test.py +++ b/keras/legacy_tf_layers/convolutional_test.py @@ -18,1154 +18,1373 @@ from __future__ import division from __future__ import print_function +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras.legacy_tf_layers import convolutional as conv_layers class ConvTest(tf.test.TestCase): + def testInvalidDataFormat(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "data_format"): + conv_layers.conv2d(images, 32, 3, data_format="invalid") - def testInvalidDataFormat(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'data_format'): - conv_layers.conv2d(images, 32, 3, data_format='invalid') - - def testInvalidStrides(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.conv2d(images, 32, 3, strides=(1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.conv2d(images, 32, 3, strides=None) - - def testInvalidKernelSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.conv2d(images, 32, (1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.conv2d(images, 32, None) - - def testCreateConv2D(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2D(32, [3, 3], activation=tf.nn.relu) - output = layer(images) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'conv2d/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testConv2DFloat16(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4), dtype='float16') - output = conv_layers.conv2d(images, 32, [3, 3], activation=tf.nn.relu) - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - - def testCreateConv2DIntegerKernelSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2D(32, 3) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateConv2DChannelsFirst(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, 4, height, width)) - layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height - 2, width - 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testUnknownInputChannels(self): - with tf.Graph().as_default(): - images = tf.compat.v1.placeholder(tf.float32, (5, 7, 9, None)) - layer = conv_layers.Conv2D(32, [3, 3], activation=tf.nn.relu) - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. The input_shape received is'): - _ = layer(images) - - images = tf.compat.v1.placeholder(tf.float32, (5, None, 7, 9)) - layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first') - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. The input_shape received is'): - _ = layer(images) - - def testConv2DPaddingSame(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 32), seed=1) - layer = conv_layers.Conv2D(64, images.get_shape()[1:3], padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 64]) - - def testCreateConvWithStrides(self): - height, width = 6, 8 - # Test strides tuple - images = tf.random.uniform((5, height, width, 3), seed=1) - layer = conv_layers.Conv2D(32, [3, 3], strides=(2, 2), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 32]) - - # Test strides integer - layer = conv_layers.Conv2D(32, [3, 3], strides=2, padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 32]) - - # Test unequal strides - layer = conv_layers.Conv2D(32, [3, 3], strides=(2, 1), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width, 32]) - - def testCreateConv1D(self): - width = 7 - data = tf.random.uniform((5, width, 4)) - layer = conv_layers.Conv1D(32, 3, activation=tf.nn.relu) - output = layer(data) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'conv1d/Relu') - self.assertListEqual(output.get_shape().as_list(), [5, width - 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testConv1DFloat16(self): - width = 7 - data = tf.random.uniform((5, width, 4), dtype='float16') - output = conv_layers.conv1d(data, 32, 3, activation=tf.nn.relu) - self.assertListEqual(output.get_shape().as_list(), [5, width - 2, 32]) - - def testCreateConv1DChannelsFirst(self): - with tf.Graph().as_default(): - width = 7 - data = tf.random.uniform((5, 4, width)) - layer = conv_layers.Conv1D(32, 3, data_format='channels_first') - output = layer(data) - self.assertListEqual(output.get_shape().as_list(), [5, 32, width - 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testUnknownInputChannelsConv1D(self): - with tf.Graph().as_default(): - data = tf.compat.v1.placeholder(tf.float32, (5, 4, None)) - layer = conv_layers.Conv1D(32, 3, activation=tf.nn.relu) - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. The input_shape received is'): - _ = layer(data) - - data = tf.compat.v1.placeholder(tf.float32, (5, None, 4)) - layer = conv_layers.Conv1D(32, 3, data_format='channels_first') - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. The input_shape received is'): - _ = layer(data) - - def testCreateConv3D(self): - depth, height, width = 6, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 4)) - layer = conv_layers.Conv3D(32, [3, 3, 3], activation=tf.nn.relu) - output = layer(volumes) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'conv3d/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, depth - 2, height - 2, width - 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testUnknownInputChannelsConv3D(self): - with tf.Graph().as_default(): - volumes = tf.compat.v1.placeholder(tf.float32, (5, 6, 7, 9, None)) - layer = conv_layers.Conv3D(32, [3, 3, 3], activation=tf.nn.relu) - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. The input_shape received is'): - _ = layer(volumes) - - def testConv2DKernelRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.Conv2D(32, [3, 3], kernel_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testConv2DBiasRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.Conv2D(32, [3, 3], bias_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testConv2DNoBias(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2D( - 32, [3, 3], activation=tf.nn.relu, use_bias=False) - output = layer(images) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'conv2d/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) - self.assertEqual(layer.bias, None) - - def testDilatedConv2D(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2D(32, [3, 3], dilation_rate=3) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 1, 3, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - # Test tuple dilation rate - layer = conv_layers.Conv2D(32, [3, 3], dilation_rate=(1, 3)) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, height - 2, 3, 32]) - - def testFunctionalConv2DReuse(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d(images, 32, [3, 3], name='conv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - - def testFunctionalConv2DReuseFromScope(self): - with tf.Graph().as_default(): - with tf.compat.v1.variable_scope('scope'): + def testInvalidStrides(self): height, width = 7, 9 images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d(images, 32, [3, 3], name='conv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - with tf.compat.v1.variable_scope('scope', reuse=True): - conv_layers.conv2d(images, 32, [3, 3], name='conv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - - def testFunctionalConv2DInitializerFromScope(self): - with tf.Graph().as_default(), self.cached_session(): - with tf.compat.v1.variable_scope( - 'scope', initializer=tf.compat.v1.ones_initializer()): + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.conv2d(images, 32, 3, strides=(1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.conv2d(images, 32, 3, strides=None) + + def testInvalidKernelSize(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.conv2d(images, 32, (1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.conv2d(images, 32, None) + + def testCreateConv2D(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2D(32, [3, 3], activation=tf.nn.relu) + output = layer(images) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "conv2d/Relu") + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testConv2DFloat16(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4), dtype="float16") + output = conv_layers.conv2d(images, 32, [3, 3], activation=tf.nn.relu) + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + + def testCreateConv2DIntegerKernelSize(self): height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2D(32, 3) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateConv2DChannelsFirst(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, 4, height, width)) + layer = conv_layers.Conv2D(32, [3, 3], data_format="channels_first") + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, height - 2, width - 2] + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testUnknownInputChannels(self): + with tf.Graph().as_default(): + images = tf.compat.v1.placeholder(tf.float32, (5, 7, 9, None)) + layer = conv_layers.Conv2D(32, [3, 3], activation=tf.nn.relu) + with self.assertRaisesRegex( + ValueError, + "The channel dimension of the inputs " + "should be defined. The input_shape received is", + ): + _ = layer(images) + + images = tf.compat.v1.placeholder(tf.float32, (5, None, 7, 9)) + layer = conv_layers.Conv2D(32, [3, 3], data_format="channels_first") + with self.assertRaisesRegex( + ValueError, + "The channel dimension of the inputs " + "should be defined. The input_shape received is", + ): + _ = layer(images) + + def testConv2DPaddingSame(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 32), seed=1) + layer = conv_layers.Conv2D(64, images.get_shape()[1:3], padding="same") + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height, width, 64] + ) + + def testCreateConvWithStrides(self): + height, width = 6, 8 + # Test strides tuple images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d(images, 32, [3, 3], name='conv1') - weights = tf.compat.v1.trainable_variables() - # Check the names of weights in order. - self.assertTrue('kernel' in weights[0].name) - self.assertTrue('bias' in weights[1].name) - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = self.evaluate(weights) - # Check that the kernel weights got initialized to ones (from scope) - self.assertAllClose(weights[0], np.ones((3, 3, 3, 32))) - # Check that the bias still got initialized to zeros. - self.assertAllClose(weights[1], np.zeros((32))) - - def testFunctionalConv2DNoReuse(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d(images, 32, [3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - conv_layers.conv2d(images, 32, [3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 4) - - def testConstraints(self): - # Conv1D - k_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - conv1d = conv_layers.Conv1D(2, 3, - kernel_constraint=k_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 5), seed=1) - conv1d(inputs) - self.assertEqual(conv1d.kernel_constraint, k_constraint) - self.assertEqual(conv1d.bias_constraint, b_constraint) - - # Conv2D - k_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - conv2d = conv_layers.Conv2D(2, 3, - kernel_constraint=k_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 3, 5), seed=1) - conv2d(inputs) - self.assertEqual(conv2d.kernel_constraint, k_constraint) - self.assertEqual(conv2d.bias_constraint, b_constraint) - - # Conv3D - k_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - conv3d = conv_layers.Conv3D(2, 3, - kernel_constraint=k_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 3, 3, 5), seed=1) - conv3d(inputs) - self.assertEqual(conv3d.kernel_constraint, k_constraint) - self.assertEqual(conv3d.bias_constraint, b_constraint) - - def testConv3DChannelsFirst(self): - # Test case for GitHub issue 15655 - with tf.Graph().as_default(): - images = tf.compat.v1.placeholder( - dtype=tf.float32, shape=[None, 1, 32, 32, 32]) - conv_layers.conv3d(images, 32, 9, data_format='channels_first') + layer = conv_layers.Conv2D(32, [3, 3], strides=(2, 2), padding="same") + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width / 2, 32] + ) + + # Test strides integer + layer = conv_layers.Conv2D(32, [3, 3], strides=2, padding="same") + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width / 2, 32] + ) + + # Test unequal strides + layer = conv_layers.Conv2D(32, [3, 3], strides=(2, 1), padding="same") + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width, 32] + ) + + def testCreateConv1D(self): + width = 7 + data = tf.random.uniform((5, width, 4)) + layer = conv_layers.Conv1D(32, 3, activation=tf.nn.relu) + output = layer(data) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "conv1d/Relu") + self.assertListEqual(output.get_shape().as_list(), [5, width - 2, 32]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testConv1DFloat16(self): + width = 7 + data = tf.random.uniform((5, width, 4), dtype="float16") + output = conv_layers.conv1d(data, 32, 3, activation=tf.nn.relu) + self.assertListEqual(output.get_shape().as_list(), [5, width - 2, 32]) + + def testCreateConv1DChannelsFirst(self): + with tf.Graph().as_default(): + width = 7 + data = tf.random.uniform((5, 4, width)) + layer = conv_layers.Conv1D(32, 3, data_format="channels_first") + output = layer(data) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, width - 2] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testUnknownInputChannelsConv1D(self): + with tf.Graph().as_default(): + data = tf.compat.v1.placeholder(tf.float32, (5, 4, None)) + layer = conv_layers.Conv1D(32, 3, activation=tf.nn.relu) + with self.assertRaisesRegex( + ValueError, + "The channel dimension of the inputs " + "should be defined. The input_shape received is", + ): + _ = layer(data) + + data = tf.compat.v1.placeholder(tf.float32, (5, None, 4)) + layer = conv_layers.Conv1D(32, 3, data_format="channels_first") + with self.assertRaisesRegex( + ValueError, + "The channel dimension of the inputs " + "should be defined. The input_shape received is", + ): + _ = layer(data) + + def testCreateConv3D(self): + depth, height, width = 6, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 4)) + layer = conv_layers.Conv3D(32, [3, 3, 3], activation=tf.nn.relu) + output = layer(volumes) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "conv3d/Relu") + self.assertListEqual( + output.get_shape().as_list(), + [5, depth - 2, height - 2, width - 2, 32], + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testUnknownInputChannelsConv3D(self): + with tf.Graph().as_default(): + volumes = tf.compat.v1.placeholder(tf.float32, (5, 6, 7, 9, None)) + layer = conv_layers.Conv3D(32, [3, 3, 3], activation=tf.nn.relu) + with self.assertRaisesRegex( + ValueError, + "The channel dimension of the inputs " + "should be defined. The input_shape received is", + ): + _ = layer(volumes) + + def testConv2DKernelRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.Conv2D(32, [3, 3], kernel_regularizer=reg) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testConv2DBiasRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.Conv2D(32, [3, 3], bias_regularizer=reg) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testConv2DNoBias(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2D( + 32, [3, 3], activation=tf.nn.relu, use_bias=False + ) + output = layer(images) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "conv2d/Relu") + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) + self.assertEqual(layer.bias, None) + + def testDilatedConv2D(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2D(32, [3, 3], dilation_rate=3) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 1, 3, 32]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + # Test tuple dilation rate + layer = conv_layers.Conv2D(32, [3, 3], dilation_rate=(1, 3)) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, 3, 32] + ) + + def testFunctionalConv2DReuse(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3], name="conv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + conv_layers.conv2d(images, 32, [3, 3], name="conv1", reuse=True) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + + def testFunctionalConv2DReuseFromScope(self): + with tf.Graph().as_default(): + with tf.compat.v1.variable_scope("scope"): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3], name="conv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + with tf.compat.v1.variable_scope("scope", reuse=True): + conv_layers.conv2d(images, 32, [3, 3], name="conv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + + def testFunctionalConv2DInitializerFromScope(self): + with tf.Graph().as_default(), self.cached_session(): + with tf.compat.v1.variable_scope( + "scope", initializer=tf.compat.v1.ones_initializer() + ): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3], name="conv1") + weights = tf.compat.v1.trainable_variables() + # Check the names of weights in order. + self.assertTrue("kernel" in weights[0].name) + self.assertTrue("bias" in weights[1].name) + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = self.evaluate(weights) + # Check that the kernel weights got initialized to ones (from + # scope) + self.assertAllClose(weights[0], np.ones((3, 3, 3, 32))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[1], np.zeros((32))) + + def testFunctionalConv2DNoReuse(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + conv_layers.conv2d(images, 32, [3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 4) + + def testConstraints(self): + # Conv1D + k_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + conv1d = conv_layers.Conv1D( + 2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 3, 5), seed=1) + conv1d(inputs) + self.assertEqual(conv1d.kernel_constraint, k_constraint) + self.assertEqual(conv1d.bias_constraint, b_constraint) + + # Conv2D + k_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + conv2d = conv_layers.Conv2D( + 2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 3, 3, 5), seed=1) + conv2d(inputs) + self.assertEqual(conv2d.kernel_constraint, k_constraint) + self.assertEqual(conv2d.bias_constraint, b_constraint) + + # Conv3D + k_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + conv3d = conv_layers.Conv3D( + 2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 3, 3, 3, 5), seed=1) + conv3d(inputs) + self.assertEqual(conv3d.kernel_constraint, k_constraint) + self.assertEqual(conv3d.bias_constraint, b_constraint) + + def testConv3DChannelsFirst(self): + # Test case for GitHub issue 15655 + with tf.Graph().as_default(): + images = tf.compat.v1.placeholder( + dtype=tf.float32, shape=[None, 1, 32, 32, 32] + ) + conv_layers.conv3d(images, 32, 9, data_format="channels_first") class SeparableConv1DTest(tf.test.TestCase): + def testInvalidDataFormat(self): + length = 9 + data = tf.random.uniform((5, length, 3), seed=1) + with self.assertRaisesRegex(ValueError, "data_format"): + conv_layers.separable_conv1d(data, 32, 3, data_format="invalid") + + def testInvalidStrides(self): + length = 9 + data = tf.random.uniform((5, length, 3), seed=1) + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.separable_conv1d(data, 32, 3, strides=(1, 2)) + + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.separable_conv1d(data, 32, 3, strides=None) - def testInvalidDataFormat(self): - length = 9 - data = tf.random.uniform((5, length, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'data_format'): - conv_layers.separable_conv1d(data, 32, 3, data_format='invalid') - - def testInvalidStrides(self): - length = 9 - data = tf.random.uniform((5, length, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.separable_conv1d(data, 32, 3, strides=(1, 2)) - - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.separable_conv1d(data, 32, 3, strides=None) - - def testInvalidKernelSize(self): - length = 9 - data = tf.random.uniform((5, length, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.separable_conv1d(data, 32, (1, 2)) - - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.separable_conv1d(data, 32, None) - - def testCreateSeparableConv1D(self): - length = 9 - data = tf.random.uniform((5, length, 4)) - layer = conv_layers.SeparableConv1D(32, 3, activation=tf.nn.relu) - output = layer(data) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'separable_conv1d/Relu') - self.assertEqual(output.get_shape().as_list(), [5, length - 2, 32]) - self.assertEqual(layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1]) - self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32]) - self.assertEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateSeparableConv1DDepthMultiplier(self): - length = 9 - data = tf.random.uniform((5, length, 4)) - layer = conv_layers.SeparableConv1D(32, 3, depth_multiplier=2) - output = layer(data) - self.assertEqual(output.get_shape().as_list(), [5, length - 2, 32]) - self.assertEqual(layer.depthwise_kernel.get_shape().as_list(), [3, 4, 2]) - self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 8, 32]) - self.assertEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateSeparableConv1DChannelsFirst(self): - with tf.Graph().as_default(): - length = 9 - data = tf.random.uniform((5, 4, length)) - layer = conv_layers.SeparableConv1D(32, 3, data_format='channels_first') - output = layer(data) - self.assertEqual(output.get_shape().as_list(), [5, 32, length - 2]) - self.assertEqual(layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1]) - self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32]) - self.assertEqual(layer.bias.get_shape().as_list(), [32]) - - def testSeparableConv1DPaddingSame(self): - length = 9 - data = tf.random.uniform((5, length, 32), seed=1) - layer = conv_layers.SeparableConv1D( - 64, length, padding='same') - output = layer(data) - self.assertEqual(output.get_shape().as_list(), [5, length, 64]) - - def testCreateSeparableConv1DWithStrides(self): - length = 10 - data = tf.random.uniform((5, length, 3), seed=1) - layer = conv_layers.SeparableConv1D(32, 3, strides=2, padding='same') - output = layer(data) - self.assertEqual(output.get_shape().as_list(), [5, length // 2, 32]) - - def testCreateSeparableConv1DWithStridesChannelsFirst(self): - with tf.Graph().as_default(): - data_format = 'channels_first' - length = 10 - data = tf.random.uniform((5, 3, length), seed=1) - layer = conv_layers.SeparableConv1D( - 32, 3, strides=2, padding='same', data_format=data_format) - output = layer(data) - self.assertEqual(output.get_shape().as_list(), [5, 32, length // 2]) - - def testFunctionalConv1DReuse(self): - with tf.Graph().as_default(): - length = 10 - data = tf.random.uniform((5, length, 3), seed=1) - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1', reuse=True) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - - def testFunctionalConv1DReuseFromScope(self): - with tf.Graph().as_default(): - with tf.compat.v1.variable_scope('scope'): + def testInvalidKernelSize(self): + length = 9 + data = tf.random.uniform((5, length, 3), seed=1) + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.separable_conv1d(data, 32, (1, 2)) + + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.separable_conv1d(data, 32, None) + + def testCreateSeparableConv1D(self): + length = 9 + data = tf.random.uniform((5, length, 4)) + layer = conv_layers.SeparableConv1D(32, 3, activation=tf.nn.relu) + output = layer(data) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "separable_conv1d/Relu") + self.assertEqual(output.get_shape().as_list(), [5, length - 2, 32]) + self.assertEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1] + ) + self.assertEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32] + ) + self.assertEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateSeparableConv1DDepthMultiplier(self): + length = 9 + data = tf.random.uniform((5, length, 4)) + layer = conv_layers.SeparableConv1D(32, 3, depth_multiplier=2) + output = layer(data) + self.assertEqual(output.get_shape().as_list(), [5, length - 2, 32]) + self.assertEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 4, 2] + ) + self.assertEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 8, 32] + ) + self.assertEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateSeparableConv1DChannelsFirst(self): + with tf.Graph().as_default(): + length = 9 + data = tf.random.uniform((5, 4, length)) + layer = conv_layers.SeparableConv1D( + 32, 3, data_format="channels_first" + ) + output = layer(data) + self.assertEqual(output.get_shape().as_list(), [5, 32, length - 2]) + self.assertEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1] + ) + self.assertEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32] + ) + self.assertEqual(layer.bias.get_shape().as_list(), [32]) + + def testSeparableConv1DPaddingSame(self): + length = 9 + data = tf.random.uniform((5, length, 32), seed=1) + layer = conv_layers.SeparableConv1D(64, length, padding="same") + output = layer(data) + self.assertEqual(output.get_shape().as_list(), [5, length, 64]) + + def testCreateSeparableConv1DWithStrides(self): length = 10 data = tf.random.uniform((5, length, 3), seed=1) - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - with tf.compat.v1.variable_scope('scope', reuse=True): - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - - def testFunctionalConv1DNoReuse(self): - with tf.Graph().as_default(): - length = 10 - data = tf.random.uniform((5, length, 3), seed=1) - conv_layers.separable_conv1d(data, 32, 3) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - conv_layers.separable_conv1d(data, 32, 3) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 6) - - def testSeparableConv1DDepthwiseRegularizer(self): - with tf.Graph().as_default(): - length = 9 - data = tf.random.uniform((5, length, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.SeparableConv1D(32, 3, depthwise_regularizer=reg) - layer(data) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testSeparableConv1DPointwiseRegularizer(self): - with tf.Graph().as_default(): - length = 9 - data = tf.random.uniform((5, length, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.SeparableConv1D(32, 3, pointwise_regularizer=reg) - layer(data) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testSeparableConv1DBiasRegularizer(self): - with tf.Graph().as_default(): - length = 9 - data = tf.random.uniform((5, length, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.SeparableConv1D(32, 3, bias_regularizer=reg) - layer(data) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testSeparableConv1DNoBias(self): - with tf.Graph().as_default(): - length = 9 - data = tf.random.uniform((5, length, 4)) - layer = conv_layers.SeparableConv1D( - 32, 3, activation=tf.nn.relu, use_bias=False) - output = layer(data) - self.assertEqual(output.op.name, 'separable_conv1d/Relu') - self.assertEqual(layer.bias, None) - - def testConstraints(self): - d_constraint = lambda x: x / tf.reduce_sum(x) - p_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - layer = conv_layers.SeparableConv1D(2, 3, - depthwise_constraint=d_constraint, - pointwise_constraint=p_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 5), seed=1) - layer(inputs) - self.assertEqual(layer.depthwise_constraint, d_constraint) - self.assertEqual(layer.pointwise_constraint, p_constraint) - self.assertEqual(layer.bias_constraint, b_constraint) + layer = conv_layers.SeparableConv1D(32, 3, strides=2, padding="same") + output = layer(data) + self.assertEqual(output.get_shape().as_list(), [5, length // 2, 32]) + + def testCreateSeparableConv1DWithStridesChannelsFirst(self): + with tf.Graph().as_default(): + data_format = "channels_first" + length = 10 + data = tf.random.uniform((5, 3, length), seed=1) + layer = conv_layers.SeparableConv1D( + 32, 3, strides=2, padding="same", data_format=data_format + ) + output = layer(data) + self.assertEqual(output.get_shape().as_list(), [5, 32, length // 2]) + + def testFunctionalConv1DReuse(self): + with tf.Graph().as_default(): + length = 10 + data = tf.random.uniform((5, length, 3), seed=1) + conv_layers.separable_conv1d(data, 32, 3, name="sepconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + conv_layers.separable_conv1d( + data, 32, 3, name="sepconv1", reuse=True + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + + def testFunctionalConv1DReuseFromScope(self): + with tf.Graph().as_default(): + with tf.compat.v1.variable_scope("scope"): + length = 10 + data = tf.random.uniform((5, length, 3), seed=1) + conv_layers.separable_conv1d(data, 32, 3, name="sepconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + with tf.compat.v1.variable_scope("scope", reuse=True): + conv_layers.separable_conv1d(data, 32, 3, name="sepconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + + def testFunctionalConv1DNoReuse(self): + with tf.Graph().as_default(): + length = 10 + data = tf.random.uniform((5, length, 3), seed=1) + conv_layers.separable_conv1d(data, 32, 3) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + conv_layers.separable_conv1d(data, 32, 3) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 6) + + def testSeparableConv1DDepthwiseRegularizer(self): + with tf.Graph().as_default(): + length = 9 + data = tf.random.uniform((5, length, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.SeparableConv1D( + 32, 3, depthwise_regularizer=reg + ) + layer(data) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testSeparableConv1DPointwiseRegularizer(self): + with tf.Graph().as_default(): + length = 9 + data = tf.random.uniform((5, length, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.SeparableConv1D( + 32, 3, pointwise_regularizer=reg + ) + layer(data) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testSeparableConv1DBiasRegularizer(self): + with tf.Graph().as_default(): + length = 9 + data = tf.random.uniform((5, length, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.SeparableConv1D(32, 3, bias_regularizer=reg) + layer(data) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testSeparableConv1DNoBias(self): + with tf.Graph().as_default(): + length = 9 + data = tf.random.uniform((5, length, 4)) + layer = conv_layers.SeparableConv1D( + 32, 3, activation=tf.nn.relu, use_bias=False + ) + output = layer(data) + self.assertEqual(output.op.name, "separable_conv1d/Relu") + self.assertEqual(layer.bias, None) + + def testConstraints(self): + d_constraint = lambda x: x / tf.reduce_sum(x) + p_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + layer = conv_layers.SeparableConv1D( + 2, + 3, + depthwise_constraint=d_constraint, + pointwise_constraint=p_constraint, + bias_constraint=b_constraint, + ) + inputs = tf.random.uniform((5, 3, 5), seed=1) + layer(inputs) + self.assertEqual(layer.depthwise_constraint, d_constraint) + self.assertEqual(layer.pointwise_constraint, p_constraint) + self.assertEqual(layer.bias_constraint, b_constraint) class SeparableConv2DTest(tf.test.TestCase): + def testInvalidDataFormat(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "data_format"): + conv_layers.separable_conv2d(images, 32, 3, data_format="invalid") - def testInvalidDataFormat(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'data_format'): - conv_layers.separable_conv2d(images, 32, 3, data_format='invalid') - - def testInvalidStrides(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.separable_conv2d(images, 32, 3, strides=(1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.separable_conv2d(images, 32, 3, strides=None) - - def testInvalidKernelSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.separable_conv2d(images, 32, (1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.separable_conv2d(images, 32, None) - - def testCreateSeparableConv2D(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.SeparableConv2D(32, [3, 3], activation=tf.nn.relu) - output = layer(images) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'separable_conv2d/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 1]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateSeparableConv2DDepthMultiplier(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.SeparableConv2D(32, [3, 3], depth_multiplier=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 2]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 8, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateSeparableConv2DIntegerKernelSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.SeparableConv2D(32, 3) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 1]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateSeparableConv2DChannelsFirst(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, 4, height, width)) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height - 2, width - 2]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 1]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testSeparableConv2DPaddingSame(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 32), seed=1) - layer = conv_layers.SeparableConv2D( - 64, images.get_shape()[1:3], padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 64]) - - def testCreateSeparableConvWithStrides(self): - with tf.Graph().as_default(): - height, width = 6, 8 - # Test strides tuple - images = tf.random.uniform((5, height, width, 3), seed=1) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 2), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 32]) - - # Test strides integer - layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 32]) - - # Test unequal strides - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 1), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width, 32]) - - def testCreateSeparableConvWithStridesChannelsFirst(self): - with tf.Graph().as_default(): - data_format = 'channels_first' - height, width = 6, 8 - # Test strides tuple - images = tf.random.uniform((5, 3, height, width), seed=1) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 2), padding='same', data_format=data_format) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height / 2, width / 2]) - - # Test strides integer - layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same', - data_format=data_format) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height / 2, width / 2]) - - # Test unequal strides - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 1), padding='same', data_format=data_format) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height / 2, width]) - - def testFunctionalConv2DReuse(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - conv_layers.separable_conv2d( - images, 32, [3, 3], name='sepconv1', reuse=True) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - - def testFunctionalConv2DReuseFromScope(self): - with tf.Graph().as_default(): - with tf.compat.v1.variable_scope('scope'): + def testInvalidStrides(self): height, width = 7, 9 images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - with tf.compat.v1.variable_scope('scope', reuse=True): - conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - - def testFunctionalConv2DInitializerFromScope(self): - with tf.Graph().as_default(), self.cached_session(): - with tf.compat.v1.variable_scope( - 'scope', initializer=tf.compat.v1.ones_initializer()): + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.separable_conv2d(images, 32, 3, strides=(1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.separable_conv2d(images, 32, 3, strides=None) + + def testInvalidKernelSize(self): height, width = 7, 9 images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') - weights = tf.compat.v1.trainable_variables() - # Check the names of weights in order. - self.assertTrue('depthwise_kernel' in weights[0].name) - self.assertTrue('pointwise_kernel' in weights[1].name) - self.assertTrue('bias' in weights[2].name) - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = self.evaluate(weights) - # Check that the kernel weights got initialized to ones (from scope) - self.assertAllClose(weights[0], np.ones((3, 3, 3, 1))) - self.assertAllClose(weights[1], np.ones((1, 1, 3, 32))) - # Check that the bias still got initialized to zeros. - self.assertAllClose(weights[2], np.zeros((32))) - - def testFunctionalConv2DNoReuse(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.separable_conv2d(images, 32, [3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) - conv_layers.separable_conv2d(images, 32, [3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 6) - - def testSeparableConv2DDepthwiseRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.SeparableConv2D(32, [3, 3], depthwise_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testSeparableConv2DPointwiseRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.SeparableConv2D(32, [3, 3], pointwise_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testSeparableConv2DBiasRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.SeparableConv2D(32, [3, 3], bias_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testSeparableConv2DNoBias(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], activation=tf.nn.relu, use_bias=False) - output = layer(images) - self.assertEqual(output.op.name, 'separable_conv2d/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 1]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 4, 32]) - self.assertEqual(layer.bias, None) - - def testConstraints(self): - d_constraint = lambda x: x / tf.reduce_sum(x) - p_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - layer = conv_layers.SeparableConv2D(2, 3, - depthwise_constraint=d_constraint, - pointwise_constraint=p_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 3, 5), seed=1) - layer(inputs) - self.assertEqual(layer.depthwise_constraint, d_constraint) - self.assertEqual(layer.pointwise_constraint, p_constraint) - self.assertEqual(layer.bias_constraint, b_constraint) + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.separable_conv2d(images, 32, (1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.separable_conv2d(images, 32, None) + + def testCreateSeparableConv2D(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.SeparableConv2D(32, [3, 3], activation=tf.nn.relu) + output = layer(images) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "separable_conv2d/Relu") + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 3, 4, 1] + ) + self.assertListEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 1, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateSeparableConv2DDepthMultiplier(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.SeparableConv2D(32, [3, 3], depth_multiplier=2) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 3, 4, 2] + ) + self.assertListEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 1, 8, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateSeparableConv2DIntegerKernelSize(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.SeparableConv2D(32, 3) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 3, 4, 1] + ) + self.assertListEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 1, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateSeparableConv2DChannelsFirst(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, 4, height, width)) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], data_format="channels_first" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, height - 2, width - 2] + ) + self.assertListEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 3, 4, 1] + ) + self.assertListEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 1, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testSeparableConv2DPaddingSame(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 32), seed=1) + layer = conv_layers.SeparableConv2D( + 64, images.get_shape()[1:3], padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height, width, 64] + ) + + def testCreateSeparableConvWithStrides(self): + with tf.Graph().as_default(): + height, width = 6, 8 + # Test strides tuple + images = tf.random.uniform((5, height, width, 3), seed=1) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 2), padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width / 2, 32] + ) + + # Test strides integer + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=2, padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width / 2, 32] + ) + + # Test unequal strides + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 1), padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width, 32] + ) + + def testCreateSeparableConvWithStridesChannelsFirst(self): + with tf.Graph().as_default(): + data_format = "channels_first" + height, width = 6, 8 + # Test strides tuple + images = tf.random.uniform((5, 3, height, width), seed=1) + layer = conv_layers.SeparableConv2D( + 32, + [3, 3], + strides=(2, 2), + padding="same", + data_format=data_format, + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, height / 2, width / 2] + ) + + # Test strides integer + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=2, padding="same", data_format=data_format + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, height / 2, width / 2] + ) + + # Test unequal strides + layer = conv_layers.SeparableConv2D( + 32, + [3, 3], + strides=(2, 1), + padding="same", + data_format=data_format, + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, height / 2, width] + ) + + def testFunctionalConv2DReuse(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d(images, 32, [3, 3], name="sepconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + conv_layers.separable_conv2d( + images, 32, [3, 3], name="sepconv1", reuse=True + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + + def testFunctionalConv2DReuseFromScope(self): + with tf.Graph().as_default(): + with tf.compat.v1.variable_scope("scope"): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d( + images, 32, [3, 3], name="sepconv1" + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + with tf.compat.v1.variable_scope("scope", reuse=True): + conv_layers.separable_conv2d( + images, 32, [3, 3], name="sepconv1" + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + + def testFunctionalConv2DInitializerFromScope(self): + with tf.Graph().as_default(), self.cached_session(): + with tf.compat.v1.variable_scope( + "scope", initializer=tf.compat.v1.ones_initializer() + ): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d( + images, 32, [3, 3], name="sepconv1" + ) + weights = tf.compat.v1.trainable_variables() + # Check the names of weights in order. + self.assertTrue("depthwise_kernel" in weights[0].name) + self.assertTrue("pointwise_kernel" in weights[1].name) + self.assertTrue("bias" in weights[2].name) + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = self.evaluate(weights) + # Check that the kernel weights got initialized to ones (from + # scope) + self.assertAllClose(weights[0], np.ones((3, 3, 3, 1))) + self.assertAllClose(weights[1], np.ones((1, 1, 3, 32))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[2], np.zeros((32))) + + def testFunctionalConv2DNoReuse(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d(images, 32, [3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 3) + conv_layers.separable_conv2d(images, 32, [3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 6) + + def testSeparableConv2DDepthwiseRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], depthwise_regularizer=reg + ) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testSeparableConv2DPointwiseRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], pointwise_regularizer=reg + ) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testSeparableConv2DBiasRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], bias_regularizer=reg + ) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testSeparableConv2DNoBias(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], activation=tf.nn.relu, use_bias=False + ) + output = layer(images) + self.assertEqual(output.op.name, "separable_conv2d/Relu") + self.assertListEqual( + output.get_shape().as_list(), [5, height - 2, width - 2, 32] + ) + self.assertListEqual( + layer.depthwise_kernel.get_shape().as_list(), [3, 3, 4, 1] + ) + self.assertListEqual( + layer.pointwise_kernel.get_shape().as_list(), [1, 1, 4, 32] + ) + self.assertEqual(layer.bias, None) + + def testConstraints(self): + d_constraint = lambda x: x / tf.reduce_sum(x) + p_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + layer = conv_layers.SeparableConv2D( + 2, + 3, + depthwise_constraint=d_constraint, + pointwise_constraint=p_constraint, + bias_constraint=b_constraint, + ) + inputs = tf.random.uniform((5, 3, 3, 5), seed=1) + layer(inputs) + self.assertEqual(layer.depthwise_constraint, d_constraint) + self.assertEqual(layer.pointwise_constraint, p_constraint) + self.assertEqual(layer.bias_constraint, b_constraint) class Conv2DTransposeTest(tf.test.TestCase): + def testInvalidDataFormat(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "data_format"): + conv_layers.conv2d_transpose(images, 32, 3, data_format="invalid") - def testInvalidDataFormat(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'data_format'): - conv_layers.conv2d_transpose(images, 32, 3, data_format='invalid') - - def testInvalidStrides(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.conv2d_transpose(images, 32, 3, strides=(1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.conv2d_transpose(images, 32, 3, strides=None) - - def testInvalidKernelSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.conv2d_transpose(images, 32, (1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.conv2d_transpose(images, 32, None) - - def testCreateConv2DTranspose(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2DTranspose(32, [3, 3], activation=tf.nn.relu) - output = layer(images) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'conv2d_transpose/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height + 2, width + 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testConv2DTransposeFloat16(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4), dtype='float16') - output = conv_layers.conv2d_transpose(images, 32, [3, 3], - activation=tf.nn.relu) - self.assertListEqual(output.get_shape().as_list(), - [5, height + 2, width + 2, 32]) - - def testCreateConv2DTransposeIntegerKernelSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2DTranspose(32, 3) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height + 2, width + 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testCreateConv2DTransposeChannelsFirst(self): - height, width = 7, 9 - images = tf.random.uniform((5, 4, height, width)) - layer = conv_layers.Conv2DTranspose( - 32, [3, 3], data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height + 2, width + 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - - def testConv2DTransposePaddingSame(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 32), seed=1) - layer = conv_layers.Conv2DTranspose( - 64, images.get_shape()[1:3], padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 64]) - - def testCreateConv2DTransposeWithStrides(self): - height, width = 6, 8 - # Test strides tuple - images = tf.random.uniform((5, height, width, 3), seed=1) - layer = conv_layers.Conv2DTranspose( - 32, [3, 3], strides=(2, 2), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height * 2, width * 2, 32]) - - # Test strides integer - layer = conv_layers.Conv2DTranspose(32, [3, 3], strides=2, padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height * 2, width * 2, 32]) - - # Test unequal strides - layer = conv_layers.Conv2DTranspose( - 32, [3, 3], strides=(2, 1), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height * 2, width, 32]) - - def testConv2DTransposeKernelRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.Conv2DTranspose(32, [3, 3], kernel_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testConv2DTransposeBiasRegularizer(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.Conv2DTranspose(32, [3, 3], bias_regularizer=reg) - layer(images) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testConv2DTransposeNoBias(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = conv_layers.Conv2DTranspose( - 32, [3, 3], activation=tf.nn.relu, use_bias=False) - output = layer(images) - self.assertEqual(output.op.name, 'conv2d_transpose/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height + 2, width + 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) - self.assertEqual(layer.bias, None) - - def testFunctionalConv2DTransposeReuse(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - conv_layers.conv2d_transpose( - images, 32, [3, 3], name='deconv1', reuse=True) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - - def testFunctionalConv2DTransposeReuseFromScope(self): - with tf.Graph().as_default(): - with tf.compat.v1.variable_scope('scope'): + def testInvalidStrides(self): height, width = 7, 9 images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - with tf.compat.v1.variable_scope('scope', reuse=True): - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - - def testFunctionalConv2DTransposeInitializerFromScope(self): - with tf.Graph().as_default(), self.cached_session(): - with tf.compat.v1.variable_scope( - 'scope', initializer=tf.compat.v1.ones_initializer()): + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.conv2d_transpose(images, 32, 3, strides=(1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.conv2d_transpose(images, 32, 3, strides=None) + + def testInvalidKernelSize(self): height, width = 7, 9 images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') - weights = tf.compat.v1.trainable_variables() - # Check the names of weights in order. - self.assertTrue('kernel' in weights[0].name) - self.assertTrue('bias' in weights[1].name) - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = self.evaluate(weights) - # Check that the kernel weights got initialized to ones (from scope) - self.assertAllClose(weights[0], np.ones((3, 3, 32, 3))) - # Check that the bias still got initialized to zeros. - self.assertAllClose(weights[1], np.zeros((32))) - - def testFunctionalConv2DTransposeNoReuse(self): - with tf.Graph().as_default(): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - conv_layers.conv2d_transpose(images, 32, [3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - conv_layers.conv2d_transpose(images, 32, [3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 4) - - def testConstraints(self): - k_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - layer = conv_layers.Conv2DTranspose(2, 3, - kernel_constraint=k_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 3, 5), seed=1) - layer(inputs) - self.assertEqual(layer.kernel_constraint, k_constraint) - self.assertEqual(layer.bias_constraint, b_constraint) + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.conv2d_transpose(images, 32, (1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.conv2d_transpose(images, 32, None) + + def testCreateConv2DTranspose(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2DTranspose(32, [3, 3], activation=tf.nn.relu) + output = layer(images) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "conv2d_transpose/Relu") + self.assertListEqual( + output.get_shape().as_list(), [5, height + 2, width + 2, 32] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testConv2DTransposeFloat16(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4), dtype="float16") + output = conv_layers.conv2d_transpose( + images, 32, [3, 3], activation=tf.nn.relu + ) + self.assertListEqual( + output.get_shape().as_list(), [5, height + 2, width + 2, 32] + ) + + def testCreateConv2DTransposeIntegerKernelSize(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2DTranspose(32, 3) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height + 2, width + 2, 32] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testCreateConv2DTransposeChannelsFirst(self): + height, width = 7, 9 + images = tf.random.uniform((5, 4, height, width)) + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], data_format="channels_first" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, 32, height + 2, width + 2] + ) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + + def testConv2DTransposePaddingSame(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 32), seed=1) + layer = conv_layers.Conv2DTranspose( + 64, images.get_shape()[1:3], padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height, width, 64] + ) + + def testCreateConv2DTransposeWithStrides(self): + height, width = 6, 8 + # Test strides tuple + images = tf.random.uniform((5, height, width, 3), seed=1) + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], strides=(2, 2), padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height * 2, width * 2, 32] + ) + + # Test strides integer + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], strides=2, padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height * 2, width * 2, 32] + ) + + # Test unequal strides + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], strides=(2, 1), padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height * 2, width, 32] + ) + + def testConv2DTransposeKernelRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], kernel_regularizer=reg + ) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testConv2DTransposeBiasRegularizer(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], bias_regularizer=reg + ) + layer(images) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testConv2DTransposeNoBias(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], activation=tf.nn.relu, use_bias=False + ) + output = layer(images) + self.assertEqual(output.op.name, "conv2d_transpose/Relu") + self.assertListEqual( + output.get_shape().as_list(), [5, height + 2, width + 2, 32] + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 32, 4] + ) + self.assertEqual(layer.bias, None) + + def testFunctionalConv2DTransposeReuse(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3], name="deconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + conv_layers.conv2d_transpose( + images, 32, [3, 3], name="deconv1", reuse=True + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + + def testFunctionalConv2DTransposeReuseFromScope(self): + with tf.Graph().as_default(): + with tf.compat.v1.variable_scope("scope"): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3], name="deconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + with tf.compat.v1.variable_scope("scope", reuse=True): + conv_layers.conv2d_transpose(images, 32, [3, 3], name="deconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + + def testFunctionalConv2DTransposeInitializerFromScope(self): + with tf.Graph().as_default(), self.cached_session(): + with tf.compat.v1.variable_scope( + "scope", initializer=tf.compat.v1.ones_initializer() + ): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3], name="deconv1") + weights = tf.compat.v1.trainable_variables() + # Check the names of weights in order. + self.assertTrue("kernel" in weights[0].name) + self.assertTrue("bias" in weights[1].name) + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = self.evaluate(weights) + # Check that the kernel weights got initialized to ones (from + # scope) + self.assertAllClose(weights[0], np.ones((3, 3, 32, 3))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[1], np.zeros((32))) + + def testFunctionalConv2DTransposeNoReuse(self): + with tf.Graph().as_default(): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + conv_layers.conv2d_transpose(images, 32, [3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 4) + + def testConstraints(self): + k_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + layer = conv_layers.Conv2DTranspose( + 2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 3, 3, 5), seed=1) + layer(inputs) + self.assertEqual(layer.kernel_constraint, k_constraint) + self.assertEqual(layer.bias_constraint, b_constraint) class Conv3DTransposeTest(tf.test.TestCase): + def testInvalidDataFormat(self): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) + with self.assertRaisesRegex(ValueError, "data_format"): + conv_layers.conv3d_transpose(volumes, 4, 3, data_format="invalid") + + def testInvalidStrides(self): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.conv3d_transpose(volumes, 4, 3, strides=(1, 2)) + + with self.assertRaisesRegex(ValueError, "strides"): + conv_layers.conv3d_transpose(volumes, 4, 3, strides=None) + + def testInvalidKernelSize(self): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.conv3d_transpose(volumes, 4, (1, 2)) + + with self.assertRaisesRegex(ValueError, "kernel_size"): + conv_layers.conv3d_transpose(volumes, 4, None) - def testInvalidDataFormat(self): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) - with self.assertRaisesRegex(ValueError, 'data_format'): - conv_layers.conv3d_transpose(volumes, 4, 3, data_format='invalid') - - def testInvalidStrides(self): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.conv3d_transpose(volumes, 4, 3, strides=(1, 2)) - - with self.assertRaisesRegex(ValueError, 'strides'): - conv_layers.conv3d_transpose(volumes, 4, 3, strides=None) - - def testInvalidKernelSize(self): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.conv3d_transpose(volumes, 4, (1, 2)) - - with self.assertRaisesRegex(ValueError, 'kernel_size'): - conv_layers.conv3d_transpose(volumes, 4, None) - - def testCreateConv3DTranspose(self): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32)) - layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], activation=tf.nn.relu) - output = layer(volumes) - if not tf.executing_eagerly(): - self.assertEqual(output.op.name, 'conv3d_transpose/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, depth + 2, height + 2, width + 2, 4]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [4]) - - def testCreateConv3DTransposeIntegerKernelSize(self): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32)) - layer = conv_layers.Conv3DTranspose(4, 3) - output = layer(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, depth + 2, height + 2, width + 2, 4]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [4]) - - def testCreateConv3DTransposeChannelsFirst(self): - with tf.Graph().as_default(): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, 32, depth, height, width)) - layer = conv_layers.Conv3DTranspose( - 4, [3, 3, 3], data_format='channels_first') - output = layer(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, 4, depth + 2, height + 2, width + 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [4]) - - def testConv3DTransposePaddingSame(self): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 64), seed=1) - layer = conv_layers.Conv3DTranspose( - 32, volumes.get_shape()[1:4], padding='same') - output = layer(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, depth, height, width, 32]) - - def testCreateConv3DTransposeWithStrides(self): - depth, height, width = 4, 6, 8 - # Test strides tuple. - volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) - layer = conv_layers.Conv3DTranspose( - 4, [3, 3, 3], strides=(2, 2, 2), padding='same') - output = layer(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, depth * 2, height * 2, width * 2, 4]) - - # Test strides integer. - layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], strides=2, padding='same') - output = layer(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, depth * 2, height * 2, width * 2, 4]) - - # Test unequal strides. - layer = conv_layers.Conv3DTranspose( - 4, [3, 3, 3], strides=(2, 1, 1), padding='same') - output = layer(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, depth * 2, height, width, 4]) - - def testConv3DTransposeKernelRegularizer(self): - with tf.Graph().as_default(): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], kernel_regularizer=reg) - layer(volumes) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testConv3DTransposeBiasRegularizer(self): - with tf.Graph().as_default(): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32)) - reg = lambda x: 0.1 * tf.reduce_sum(x) - layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], bias_regularizer=reg) - layer(volumes) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual( - self.evaluate(layer.losses), self.evaluate(loss_keys)) - - def testConv3DTransposeNoBias(self): - with tf.Graph().as_default(): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32)) - layer = conv_layers.Conv3DTranspose( - 4, [3, 3, 3], activation=tf.nn.relu, use_bias=False) - output = layer(volumes) - self.assertEqual(output.op.name, 'conv3d_transpose/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, depth + 2, height + 2, width + 2, 4]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertEqual(layer.bias, None) - - def testFunctionalConv3DTransposeReuse(self): - with tf.Graph().as_default(): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - conv_layers.conv3d_transpose( - volumes, 4, [3, 3, 3], name='deconv1', reuse=True) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - - def testFunctionalConv3DTransposeReuseFromScope(self): - with tf.Graph().as_default(): - with tf.compat.v1.variable_scope('scope'): + def testCreateConv3DTranspose(self): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32)) + layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], activation=tf.nn.relu) + output = layer(volumes) + if not tf.executing_eagerly(): + self.assertEqual(output.op.name, "conv3d_transpose/Relu") + self.assertListEqual( + output.get_shape().as_list(), + [5, depth + 2, height + 2, width + 2, 4], + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [4]) + + def testCreateConv3DTransposeIntegerKernelSize(self): depth, height, width = 5, 7, 9 - volumes = tf.random.uniform( - (5, depth, height, width, 32), seed=1) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - with tf.compat.v1.variable_scope('scope', reuse=True): - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - - def testFunctionalConv3DTransposeInitializerFromScope(self): - with tf.Graph().as_default(), self.cached_session(): - with tf.compat.v1.variable_scope( - 'scope', initializer=tf.compat.v1.ones_initializer()): + volumes = tf.random.uniform((5, depth, height, width, 32)) + layer = conv_layers.Conv3DTranspose(4, 3) + output = layer(volumes) + self.assertListEqual( + output.get_shape().as_list(), + [5, depth + 2, height + 2, width + 2, 4], + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [4]) + + def testCreateConv3DTransposeChannelsFirst(self): + with tf.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, 32, depth, height, width)) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], data_format="channels_first" + ) + output = layer(volumes) + self.assertListEqual( + output.get_shape().as_list(), + [5, 4, depth + 2, height + 2, width + 2], + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32] + ) + self.assertListEqual(layer.bias.get_shape().as_list(), [4]) + + def testConv3DTransposePaddingSame(self): depth, height, width = 5, 7, 9 - volumes = tf.random.uniform( - (5, depth, height, width, 32), seed=1) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') - weights = tf.compat.v1.trainable_variables() - # Check the names of weights in order. - self.assertTrue('kernel' in weights[0].name) - self.assertTrue('bias' in weights[1].name) - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = self.evaluate(weights) - # Check that the kernel weights got initialized to ones (from scope) - self.assertAllClose(weights[0], np.ones((3, 3, 3, 4, 32))) - # Check that the bias still got initialized to zeros. - self.assertAllClose(weights[1], np.zeros((4))) - - def testFunctionalConv3DTransposeNoReuse(self): - with tf.Graph().as_default(): - depth, height, width = 5, 7, 9 - volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) - self.assertEqual(len(tf.compat.v1.trainable_variables()), 4) - - def testConstraints(self): - k_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - layer = conv_layers.Conv3DTranspose(2, 3, - kernel_constraint=k_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3, 3, 3, 5), seed=1) - layer(inputs) - self.assertEqual(layer.kernel_constraint, k_constraint) - self.assertEqual(layer.bias_constraint, b_constraint) - - -if __name__ == '__main__': - tf.test.main() + volumes = tf.random.uniform((5, depth, height, width, 64), seed=1) + layer = conv_layers.Conv3DTranspose( + 32, volumes.get_shape()[1:4], padding="same" + ) + output = layer(volumes) + self.assertListEqual( + output.get_shape().as_list(), [5, depth, height, width, 32] + ) + + def testCreateConv3DTransposeWithStrides(self): + depth, height, width = 4, 6, 8 + # Test strides tuple. + volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], strides=(2, 2, 2), padding="same" + ) + output = layer(volumes) + self.assertListEqual( + output.get_shape().as_list(), + [5, depth * 2, height * 2, width * 2, 4], + ) + + # Test strides integer. + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], strides=2, padding="same" + ) + output = layer(volumes) + self.assertListEqual( + output.get_shape().as_list(), + [5, depth * 2, height * 2, width * 2, 4], + ) + + # Test unequal strides. + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], strides=(2, 1, 1), padding="same" + ) + output = layer(volumes) + self.assertListEqual( + output.get_shape().as_list(), [5, depth * 2, height, width, 4] + ) + + def testConv3DTransposeKernelRegularizer(self): + with tf.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], kernel_regularizer=reg + ) + layer(volumes) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testConv3DTransposeBiasRegularizer(self): + with tf.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32)) + reg = lambda x: 0.1 * tf.reduce_sum(x) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], bias_regularizer=reg + ) + layer(volumes) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys) + ) + + def testConv3DTransposeNoBias(self): + with tf.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32)) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], activation=tf.nn.relu, use_bias=False + ) + output = layer(volumes) + self.assertEqual(output.op.name, "conv3d_transpose/Relu") + self.assertListEqual( + output.get_shape().as_list(), + [5, depth + 2, height + 2, width + 2, 4], + ) + self.assertListEqual( + layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32] + ) + self.assertEqual(layer.bias, None) + + def testFunctionalConv3DTransposeReuse(self): + with tf.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name="deconv1") + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + conv_layers.conv3d_transpose( + volumes, 4, [3, 3, 3], name="deconv1", reuse=True + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + + def testFunctionalConv3DTransposeReuseFromScope(self): + with tf.Graph().as_default(): + with tf.compat.v1.variable_scope("scope"): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform( + (5, depth, height, width, 32), seed=1 + ) + conv_layers.conv3d_transpose( + volumes, 4, [3, 3, 3], name="deconv1" + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + with tf.compat.v1.variable_scope("scope", reuse=True): + conv_layers.conv3d_transpose( + volumes, 4, [3, 3, 3], name="deconv1" + ) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + + def testFunctionalConv3DTransposeInitializerFromScope(self): + with tf.Graph().as_default(), self.cached_session(): + with tf.compat.v1.variable_scope( + "scope", initializer=tf.compat.v1.ones_initializer() + ): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform( + (5, depth, height, width, 32), seed=1 + ) + conv_layers.conv3d_transpose( + volumes, 4, [3, 3, 3], name="deconv1" + ) + weights = tf.compat.v1.trainable_variables() + # Check the names of weights in order. + self.assertTrue("kernel" in weights[0].name) + self.assertTrue("bias" in weights[1].name) + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = self.evaluate(weights) + # Check that the kernel weights got initialized to ones (from + # scope) + self.assertAllClose(weights[0], np.ones((3, 3, 3, 4, 32))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[1], np.zeros((4))) + + def testFunctionalConv3DTransposeNoReuse(self): + with tf.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = tf.random.uniform((5, depth, height, width, 32), seed=1) + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 2) + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) + self.assertEqual(len(tf.compat.v1.trainable_variables()), 4) + + def testConstraints(self): + k_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + layer = conv_layers.Conv3DTranspose( + 2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 3, 3, 3, 5), seed=1) + layer(inputs) + self.assertEqual(layer.kernel_constraint, k_constraint) + self.assertEqual(layer.bias_constraint, b_constraint) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/legacy_tf_layers/core.py b/keras/legacy_tf_layers/core.py index f4af5cfdfb65..b4111dc91343 100644 --- a/keras/legacy_tf_layers/core.py +++ b/keras/legacy_tf_layers/core.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -# pylint: disable=g-classes-have-attributes + """Contains the core layers: Dense, Dropout. Also contains their functional aliases. @@ -21,133 +21,139 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import warnings +import tensorflow.compat.v2 as tf + from keras import layers as keras_layers from keras.legacy_tf_layers import base + +# isort: off from tensorflow.python.util.tf_export import keras_export -from tensorflow.python.util.tf_export import tf_export -@keras_export(v1=['keras.__internal__.legacy.layers.Dense']) -@tf_export(v1=['layers.Dense']) +@keras_export(v1=["keras.__internal__.legacy.layers.Dense"]) class Dense(keras_layers.Dense, base.Layer): - """Densely-connected layer class. - - This layer implements the operation: - `outputs = activation(inputs * kernel + bias)` - Where `activation` is the activation function passed as the `activation` - argument (if not `None`), `kernel` is a weights matrix created by the layer, - and `bias` is a bias vector created by the layer - (only if `use_bias` is `True`). - - Args: - units: Integer or Long, dimensionality of the output space. - activation: Activation function (callable). Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: Initializer function for the weight matrix. - If `None` (default), weights are initialized using the default - initializer used by `tf.compat.v1.get_variable`. - bias_initializer: Initializer function for the bias. - kernel_regularizer: Regularizer function for the weight matrix. - bias_regularizer: Regularizer function for the bias. - activity_regularizer: Regularizer function for the output. - kernel_constraint: An optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: An optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: String, the name of the layer. Layers with the same name will - share weights, but to avoid mistakes we require reuse=True in such cases. - _reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Properties: - units: Python integer, dimensionality of the output space. - activation: Activation function (callable). - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: Initializer instance (or name) for the kernel matrix. - bias_initializer: Initializer instance (or name) for the bias. - kernel_regularizer: Regularizer instance for the kernel matrix (callable) - bias_regularizer: Regularizer instance for the bias (callable). - activity_regularizer: Regularizer instance for the output (callable) - kernel_constraint: Constraint function for the kernel matrix. - bias_constraint: Constraint function for the bias. - kernel: Weight matrix (TensorFlow variable or tensor). - bias: Bias vector, if applicable (TensorFlow variable or tensor). - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Dense`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - dense = tf.compat.v1.layers.Dense(units=3) - ``` - - After: - - ```python - dense = tf.keras.layers.Dense(units=3) - ``` - - @end_compatibility - """ - - def __init__(self, units, - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=tf.compat.v1.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super().__init__(units=units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.dense']) -@tf_export(v1=['layers.dense']) + """Densely-connected layer class. + + This layer implements the operation: + `outputs = activation(inputs * kernel + bias)` + Where `activation` is the activation function passed as the `activation` + argument (if not `None`), `kernel` is a weights matrix created by the layer, + and `bias` is a bias vector created by the layer + (only if `use_bias` is `True`). + + Args: + units: Integer or Long, dimensionality of the output space. + activation: Activation function (callable). Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: Initializer function for the weight matrix. + If `None` (default), weights are initialized using the default + initializer used by `tf.compat.v1.get_variable`. + bias_initializer: Initializer function for the bias. + kernel_regularizer: Regularizer function for the weight matrix. + bias_regularizer: Regularizer function for the bias. + activity_regularizer: Regularizer function for the output. + kernel_constraint: An optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: An optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: String, the name of the layer. Layers with the same name will + share weights, but to avoid mistakes we require reuse=True in such + cases. + _reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Properties: + units: Python integer, dimensionality of the output space. + activation: Activation function (callable). + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: Initializer instance (or name) for the kernel matrix. + bias_initializer: Initializer instance (or name) for the bias. + kernel_regularizer: Regularizer instance for the kernel matrix (callable) + bias_regularizer: Regularizer instance for the bias (callable). + activity_regularizer: Regularizer instance for the output (callable) + kernel_constraint: Constraint function for the kernel matrix. + bias_constraint: Constraint function for the bias. + kernel: Weight matrix (TensorFlow variable or tensor). + bias: Bias vector, if applicable (TensorFlow variable or tensor). + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Dense`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + dense = tf.compat.v1.layers.Dense(units=3) + ``` + + After: + + ```python + dense = tf.keras.layers.Dense(units=3) + ``` + + @end_compatibility + """ + + def __init__( + self, + units, + activation=None, + use_bias=True, + kernel_initializer=None, + bias_initializer=tf.compat.v1.zeros_initializer(), + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs + ): + super().__init__( + units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.dense"]) def dense( - inputs, units, + inputs, + units, activation=None, use_bias=True, kernel_initializer=None, @@ -159,386 +165,389 @@ def dense( bias_constraint=None, trainable=True, name=None, - reuse=None): - """Functional interface for the densely-connected layer. - - This layer implements the operation: - `outputs = activation(inputs * kernel + bias)` - where `activation` is the activation function passed as the `activation` - argument (if not `None`), `kernel` is a weights matrix created by the layer, - and `bias` is a bias vector created by the layer - (only if `use_bias` is `True`). - - Args: - inputs: Tensor input. - units: Integer or Long, dimensionality of the output space. - activation: Activation function (callable). Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: Initializer function for the weight matrix. - If `None` (default), weights are initialized using the default - initializer used by `tf.compat.v1.get_variable`. - bias_initializer: Initializer function for the bias. - kernel_regularizer: Regularizer function for the weight matrix. - bias_regularizer: Regularizer function for the bias. - activity_regularizer: Regularizer function for the output. - kernel_constraint: An optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: An optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: String, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor the same shape as `inputs` except the last dimension is of - size `units`. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Dense`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.dense(x, units=3) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28,)) - y = tf.keras.layers.Dense(units=3)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - - """ - warnings.warn( - '`tf.layers.dense` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.Dense` instead.', - stacklevel=2) - layer = Dense(units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - _scope=name, - _reuse=reuse) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.Dropout']) -@tf_export(v1=['layers.Dropout']) + reuse=None, +): + """Functional interface for the densely-connected layer. + + This layer implements the operation: + `outputs = activation(inputs * kernel + bias)` + where `activation` is the activation function passed as the `activation` + argument (if not `None`), `kernel` is a weights matrix created by the layer, + and `bias` is a bias vector created by the layer + (only if `use_bias` is `True`). + + Args: + inputs: Tensor input. + units: Integer or Long, dimensionality of the output space. + activation: Activation function (callable). Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: Initializer function for the weight matrix. + If `None` (default), weights are initialized using the default + initializer used by `tf.compat.v1.get_variable`. + bias_initializer: Initializer function for the bias. + kernel_regularizer: Regularizer function for the weight matrix. + bias_regularizer: Regularizer function for the bias. + activity_regularizer: Regularizer function for the output. + kernel_constraint: An optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: An optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: String, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. + + Returns: + Output tensor the same shape as `inputs` except the last dimension is of + size `units`. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Dense`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.dense(x, units=3) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28,)) + y = tf.keras.layers.Dense(units=3)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + + """ + warnings.warn( + "`tf.layers.dense` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.Dense` instead.", + stacklevel=2, + ) + layer = Dense( + units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + _scope=name, + _reuse=reuse, + ) + return layer(inputs) + + +@keras_export(v1=["keras.__internal__.legacy.layers.Dropout"]) class Dropout(keras_layers.Dropout, base.Layer): - """Applies Dropout to the input. - - Dropout consists in randomly setting a fraction `rate` of input units to 0 - at each update during training time, which helps prevent overfitting. - The units that are kept are scaled by `1 / (1 - rate)`, so that their - sum is unchanged at training time and inference time. - - Args: - rate: The dropout rate, between 0 and 1. E.g. `rate=0.1` would drop out - 10% of input units. - noise_shape: 1D tensor of type `int32` representing the shape of the - binary dropout mask that will be multiplied with the input. - For instance, if your inputs have shape - `(batch_size, timesteps, features)`, and you want the dropout mask - to be the same for all timesteps, you can use - `noise_shape=[batch_size, 1, features]`. - seed: A Python integer. Used to create random seeds. See - `tf.compat.v1.set_random_seed`. - for behavior. - name: The name of the layer (string). - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Dropout`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - dropout = tf.compat.v1.layers.Dropout() - ``` - - After: - - ```python - dropout = tf.keras.layers.Dropout() - ``` - @end_compatibility - """ - - def __init__(self, rate=0.5, - noise_shape=None, - seed=None, - name=None, - **kwargs): - super().__init__(rate=rate, - noise_shape=noise_shape, - seed=seed, - name=name, - **kwargs) - - def call(self, inputs, training=False): - return super().call(inputs, training=training) - - -@keras_export(v1=['keras.__internal__.legacy.layers.dropout']) -@tf_export(v1=['layers.dropout']) -def dropout(inputs, - rate=0.5, - noise_shape=None, - seed=None, - training=False, - name=None): - """Applies Dropout to the input. - - Dropout consists in randomly setting a fraction `rate` of input units to 0 - at each update during training time, which helps prevent overfitting. - The units that are kept are scaled by `1 / (1 - rate)`, so that their - sum is unchanged at training time and inference time. - - Args: - inputs: Tensor input. - rate: The dropout rate, between 0 and 1. E.g. "rate=0.1" would drop out - 10% of input units. - noise_shape: 1D tensor of type `int32` representing the shape of the - binary dropout mask that will be multiplied with the input. - For instance, if your inputs have shape - `(batch_size, timesteps, features)`, and you want the dropout mask - to be the same for all timesteps, you can use - `noise_shape=[batch_size, 1, features]`. - seed: A Python integer. Used to create random seeds. See - `tf.compat.v1.set_random_seed` - for behavior. - training: Either a Python boolean, or a TensorFlow boolean scalar tensor - (e.g. a placeholder). Whether to return the output in training mode - (apply dropout) or in inference mode (return the input untouched). - name: The name of the layer (string). - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is `tf.keras.layers.Dropout`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.dropout(x) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Dropout()(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.dropout` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.Dropout` instead.', - stacklevel=2) - layer = Dropout(rate, noise_shape=noise_shape, seed=seed, name=name) - return layer(inputs, training=training) - - -@keras_export(v1=['keras.__internal__.legacy.layers.Flatten']) -@tf_export(v1=['layers.Flatten']) + """Applies Dropout to the input. + + Dropout consists in randomly setting a fraction `rate` of input units to 0 + at each update during training time, which helps prevent overfitting. + The units that are kept are scaled by `1 / (1 - rate)`, so that their + sum is unchanged at training time and inference time. + + Args: + rate: The dropout rate, between 0 and 1. E.g. `rate=0.1` would drop out + 10% of input units. + noise_shape: 1D tensor of type `int32` representing the shape of the + binary dropout mask that will be multiplied with the input. + For instance, if your inputs have shape + `(batch_size, timesteps, features)`, and you want the dropout mask + to be the same for all timesteps, you can use + `noise_shape=[batch_size, 1, features]`. + seed: A Python integer. Used to create random seeds. See + `tf.compat.v1.set_random_seed`. + for behavior. + name: The name of the layer (string). + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Dropout`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + dropout = tf.compat.v1.layers.Dropout() + ``` + + After: + + ```python + dropout = tf.keras.layers.Dropout() + ``` + @end_compatibility + """ + + def __init__( + self, rate=0.5, noise_shape=None, seed=None, name=None, **kwargs + ): + # Force the rng type to be legacy stateful since the new stateful code + # path is not supported by legacy layer. + super().__init__( + rate=rate, + noise_shape=noise_shape, + seed=seed, + name=name, + rng_type="legacy_stateful", + **kwargs + ) + + def call(self, inputs, training=False): + return super().call(inputs, training=training) + + +@keras_export(v1=["keras.__internal__.legacy.layers.dropout"]) +def dropout( + inputs, rate=0.5, noise_shape=None, seed=None, training=False, name=None +): + """Applies Dropout to the input. + + Dropout consists in randomly setting a fraction `rate` of input units to 0 + at each update during training time, which helps prevent overfitting. + The units that are kept are scaled by `1 / (1 - rate)`, so that their + sum is unchanged at training time and inference time. + + Args: + inputs: Tensor input. + rate: The dropout rate, between 0 and 1. E.g. "rate=0.1" would drop out + 10% of input units. + noise_shape: 1D tensor of type `int32` representing the shape of the + binary dropout mask that will be multiplied with the input. + For instance, if your inputs have shape + `(batch_size, timesteps, features)`, and you want the dropout mask + to be the same for all timesteps, you can use + `noise_shape=[batch_size, 1, features]`. + seed: A Python integer. Used to create random seeds. See + `tf.compat.v1.set_random_seed` + for behavior. + training: Either a Python boolean, or a TensorFlow boolean scalar tensor + (e.g. a placeholder). Whether to return the output in training mode + (apply dropout) or in inference mode (return the input untouched). + name: The name of the layer (string). + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is `tf.keras.layers.Dropout`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.dropout(x) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Dropout()(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.dropout` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.Dropout` instead.", + stacklevel=2, + ) + layer = Dropout(rate, noise_shape=noise_shape, seed=seed, name=name) + return layer(inputs, training=training) + + +@keras_export(v1=["keras.__internal__.legacy.layers.Flatten"]) class Flatten(keras_layers.Flatten, base.Layer): - """Flattens an input tensor while preserving the batch axis (axis 0). + """Flattens an input tensor while preserving the batch axis (axis 0). + + Args: + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, ..., channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, ...)`. - Args: - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, ...)`. + Examples: - Examples: + ``` + x = tf.compat.v1.placeholder(shape=(None, 4, 4), dtype='float32') + y = Flatten()(x) + # now `y` has shape `(None, 16)` - ``` - x = tf.compat.v1.placeholder(shape=(None, 4, 4), dtype='float32') - y = Flatten()(x) - # now `y` has shape `(None, 16)` + x = tf.compat.v1.placeholder(shape=(None, 3, None), dtype='float32') + y = Flatten()(x) + # now `y` has shape `(None, None)` + ``` - x = tf.compat.v1.placeholder(shape=(None, 3, None), dtype='float32') - y = Flatten()(x) - # now `y` has shape `(None, None)` - ``` + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. + The corresponding TensorFlow v2 layer is `tf.keras.layers.Flatten`. - The corresponding TensorFlow v2 layer is `tf.keras.layers.Flatten`. + #### Structural Mapping to Native TF2 - #### Structural Mapping to Native TF2 + None of the supported arguments have changed name. - None of the supported arguments have changed name. + Before: - Before: + ```python + flatten = tf.compat.v1.layers.Flatten() + ``` - ```python - flatten = tf.compat.v1.layers.Flatten() - ``` + After: - After: + ```python + flatten = tf.keras.layers.Flatten() + ``` + @end_compatibility + """ - ```python - flatten = tf.keras.layers.Flatten() - ``` - @end_compatibility - """ - pass + pass -@keras_export(v1=['keras.__internal__.legacy.layers.flatten']) -@tf_export(v1=['layers.flatten']) -def flatten(inputs, name=None, data_format='channels_last'): - """Flattens an input tensor while preserving the batch axis (axis 0). +@keras_export(v1=["keras.__internal__.legacy.layers.flatten"]) +def flatten(inputs, name=None, data_format="channels_last"): + """Flattens an input tensor while preserving the batch axis (axis 0). - Args: - inputs: Tensor input. - name: The name of the layer (string). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. + Args: + inputs: Tensor input. + name: The name of the layer (string). + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. - Returns: - Reshaped tensor. + Returns: + Reshaped tensor. - Examples: + Examples: - ``` - x = tf.compat.v1.placeholder(shape=(None, 4, 4), dtype='float32') - y = flatten(x) - # now `y` has shape `(None, 16)` + ``` + x = tf.compat.v1.placeholder(shape=(None, 4, 4), dtype='float32') + y = flatten(x) + # now `y` has shape `(None, 16)` - x = tf.compat.v1.placeholder(shape=(None, 3, None), dtype='float32') - y = flatten(x) - # now `y` has shape `(None, None)` - ``` + x = tf.compat.v1.placeholder(shape=(None, 3, None), dtype='float32') + y = flatten(x) + # now `y` has shape `(None, None)` + ``` - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. - The corresponding TensorFlow v2 layer is `tf.keras.layers.Flatten`. + The corresponding TensorFlow v2 layer is `tf.keras.layers.Flatten`. - #### Structural Mapping to Native TF2 + #### Structural Mapping to Native TF2 - None of the supported arguments have changed name. + None of the supported arguments have changed name. - Before: + Before: - ```python - y = tf.compat.v1.layers.flatten(x) - ``` + ```python + y = tf.compat.v1.layers.flatten(x) + ``` - After: + After: - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.Flatten()(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.flatten` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.Flatten` instead.', - stacklevel=2) - layer = Flatten(name=name, data_format=data_format) - return layer(inputs) + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.Flatten()(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.flatten` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.Flatten` instead.", + stacklevel=2, + ) + layer = Flatten(name=name, data_format=data_format) + return layer(inputs) # Aliases diff --git a/keras/legacy_tf_layers/core_test.py b/keras/legacy_tf_layers/core_test.py index e945a89d1939..558aa823d4b4 100644 --- a/keras/legacy_tf_layers/core_test.py +++ b/keras/legacy_tf_layers/core_test.py @@ -18,548 +18,636 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import collections import platform -from absl.testing import parameterized import numpy as np -from tensorflow.python.framework import test_util as tf_test_utils -from keras.testing_infra import test_combinations +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras.legacy_tf_layers import core as core_layers +from keras.testing_infra import test_combinations + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) from tensorflow.python.ops import variable_scope class DenseTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDenseProperties(self): + dense = core_layers.Dense(2, activation=tf.nn.relu, name="my_dense") + self.assertEqual(dense.units, 2) + self.assertEqual(dense.activation, tf.nn.relu) + self.assertEqual(dense.kernel_regularizer, None) + self.assertEqual(dense.bias_regularizer, None) + self.assertEqual(dense.activity_regularizer, None) + self.assertEqual(dense.use_bias, True) + + # Test auto-naming + dense = core_layers.Dense(2, activation=tf.nn.relu) + dense(tf.random.uniform((5, 2))) + self.assertEqual(dense.name, "dense_1") + dense = core_layers.Dense(2, activation=tf.nn.relu) + dense(tf.random.uniform((5, 2))) + self.assertEqual(dense.name, "dense_2") + + @tf_test_utils.run_deprecated_v1 + def testVariableInput(self): + with self.cached_session(): + v = tf.compat.v1.get_variable( + "X", initializer=tf.compat.v1.zeros_initializer(), shape=(1, 1) + ) + x = core_layers.Dense(1)(v) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllEqual(x, [[0.0]]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testCall(self): + dense = core_layers.Dense(2, activation=tf.nn.relu, name="my_dense") + inputs = tf.random.uniform((5, 4), seed=1) + outputs = dense(inputs) + self.assertListEqual([5, 2], outputs.get_shape().as_list()) + self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) + self.assertListEqual( + dense.trainable_variables, [dense.kernel, dense.bias] + ) + self.assertListEqual(dense.non_trainable_variables, []) + if not tf.executing_eagerly(): + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + ), + 2, + ) + self.assertEqual(dense.kernel.name, "my_dense/kernel:0") + self.assertEqual(dense.bias.name, "my_dense/bias:0") + + @tf_test_utils.assert_no_new_pyobjects_executing_eagerly + def testNoEagerLeak(self): + # Tests that repeatedly constructing and building a Layer does not leak + # Python objects. + inputs = tf.random.uniform((5, 4), seed=1) + core_layers.Dense(5)(inputs) + core_layers.Dense(2, activation=tf.nn.relu, name="my_dense")(inputs) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testCallTensorDot(self): + dense = core_layers.Dense(2, activation=tf.nn.relu, name="my_dense") + inputs = tf.random.uniform((5, 4, 3), seed=1) + outputs = dense(inputs) + self.assertListEqual([5, 4, 2], outputs.get_shape().as_list()) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoBias(self): + dense = core_layers.Dense(2, use_bias=False, name="my_dense") + inputs = tf.random.uniform((5, 2), seed=1) + _ = dense(inputs) + self.assertListEqual(dense.variables, [dense.kernel]) + self.assertListEqual(dense.trainable_variables, [dense.kernel]) + self.assertListEqual(dense.non_trainable_variables, []) + if not tf.executing_eagerly(): + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + ), + 1, + ) + self.assertEqual(dense.kernel.name, "my_dense/kernel:0") + self.assertEqual(dense.bias, None) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNonTrainable(self): + dense = core_layers.Dense(2, trainable=False, name="my_dense") + inputs = tf.random.uniform((5, 2), seed=1) + _ = dense(inputs) + self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) + self.assertListEqual( + dense.non_trainable_variables, [dense.kernel, dense.bias] + ) + self.assertListEqual(dense.trainable_variables, []) + if not tf.executing_eagerly(): + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + ), + 0, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testOutputShape(self): + dense = core_layers.Dense(7, activation=tf.nn.relu, name="my_dense") + inputs = tf.random.uniform((5, 3), seed=1) + outputs = dense(inputs) + self.assertEqual(outputs.get_shape().as_list(), [5, 7]) + + inputs = tf.random.uniform((5, 2, 3), seed=1) + outputs = dense(inputs) + self.assertEqual(outputs.get_shape().as_list(), [5, 2, 7]) + + inputs = tf.random.uniform((1, 2, 4, 3), seed=1) + outputs = dense(inputs) + self.assertEqual(outputs.get_shape().as_list(), [1, 2, 4, 7]) + + @tf_test_utils.run_deprecated_v1 + def testCallOnPlaceHolder(self): + inputs = tf.compat.v1.placeholder(dtype=tf.float32) + dense = core_layers.Dense(4, name="my_dense") + with self.assertRaises(ValueError): + dense(inputs) + + inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None]) + dense = core_layers.Dense(4, name="my_dense") + with self.assertRaises(ValueError): + dense(inputs) + + inputs = tf.compat.v1.placeholder( + dtype=tf.float32, shape=[None, None, None] + ) + dense = core_layers.Dense(4, name="my_dense") + with self.assertRaises(ValueError): + dense(inputs) + + inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3]) + dense = core_layers.Dense(4, name="my_dense") + dense(inputs) + + inputs = tf.compat.v1.placeholder( + dtype=tf.float32, shape=[None, None, 3] + ) + dense = core_layers.Dense(4, name="my_dense") + dense(inputs) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testActivation(self): + dense = core_layers.Dense(2, activation=tf.nn.relu, name="dense1") + inputs = tf.random.uniform((5, 3), seed=1) + outputs = dense(inputs) + if not tf.executing_eagerly(): + self.assertEqual(outputs.op.name, "dense1/Relu") - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDenseProperties(self): - dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') - self.assertEqual(dense.units, 2) - self.assertEqual(dense.activation, tf.nn.relu) - self.assertEqual(dense.kernel_regularizer, None) - self.assertEqual(dense.bias_regularizer, None) - self.assertEqual(dense.activity_regularizer, None) - self.assertEqual(dense.use_bias, True) - - # Test auto-naming - dense = core_layers.Dense(2, activation=tf.nn.relu) - dense(tf.random.uniform((5, 2))) - self.assertEqual(dense.name, 'dense_1') - dense = core_layers.Dense(2, activation=tf.nn.relu) - dense(tf.random.uniform((5, 2))) - self.assertEqual(dense.name, 'dense_2') - - @tf_test_utils.run_deprecated_v1 - def testVariableInput(self): - with self.cached_session(): - v = tf.compat.v1.get_variable( - 'X', initializer=tf.compat.v1.zeros_initializer(), shape=(1, 1)) - x = core_layers.Dense(1)(v) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllEqual(x, [[0.0]]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testCall(self): - dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') - inputs = tf.random.uniform((5, 4), seed=1) - outputs = dense(inputs) - self.assertListEqual([5, 2], outputs.get_shape().as_list()) - self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) - self.assertListEqual(dense.trainable_variables, - [dense.kernel, dense.bias]) - self.assertListEqual(dense.non_trainable_variables, []) - if not tf.executing_eagerly(): - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2) - self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') - self.assertEqual(dense.bias.name, 'my_dense/bias:0') - - @tf_test_utils.assert_no_new_pyobjects_executing_eagerly - def testNoEagerLeak(self): - # Tests that repeatedly constructing and building a Layer does not leak - # Python objects. - inputs = tf.random.uniform((5, 4), seed=1) - core_layers.Dense(5)(inputs) - core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')(inputs) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testCallTensorDot(self): - dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') - inputs = tf.random.uniform((5, 4, 3), seed=1) - outputs = dense(inputs) - self.assertListEqual([5, 4, 2], outputs.get_shape().as_list()) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoBias(self): - dense = core_layers.Dense(2, use_bias=False, name='my_dense') - inputs = tf.random.uniform((5, 2), seed=1) - _ = dense(inputs) - self.assertListEqual(dense.variables, [dense.kernel]) - self.assertListEqual(dense.trainable_variables, [dense.kernel]) - self.assertListEqual(dense.non_trainable_variables, []) - if not tf.executing_eagerly(): - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 1) - self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') - self.assertEqual(dense.bias, None) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNonTrainable(self): - dense = core_layers.Dense(2, trainable=False, name='my_dense') - inputs = tf.random.uniform((5, 2), seed=1) - _ = dense(inputs) - self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) - self.assertListEqual(dense.non_trainable_variables, - [dense.kernel, dense.bias]) - self.assertListEqual(dense.trainable_variables, []) - if not tf.executing_eagerly(): - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testOutputShape(self): - dense = core_layers.Dense(7, activation=tf.nn.relu, name='my_dense') - inputs = tf.random.uniform((5, 3), seed=1) - outputs = dense(inputs) - self.assertEqual(outputs.get_shape().as_list(), [5, 7]) - - inputs = tf.random.uniform((5, 2, 3), seed=1) - outputs = dense(inputs) - self.assertEqual(outputs.get_shape().as_list(), [5, 2, 7]) - - inputs = tf.random.uniform((1, 2, 4, 3), seed=1) - outputs = dense(inputs) - self.assertEqual(outputs.get_shape().as_list(), [1, 2, 4, 7]) - - @tf_test_utils.run_deprecated_v1 - def testCallOnPlaceHolder(self): - inputs = tf.compat.v1.placeholder(dtype=tf.float32) - dense = core_layers.Dense(4, name='my_dense') - with self.assertRaises(ValueError): - dense(inputs) - - inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None]) - dense = core_layers.Dense(4, name='my_dense') - with self.assertRaises(ValueError): - dense(inputs) - - inputs = tf.compat.v1.placeholder( - dtype=tf.float32, shape=[None, None, None]) - dense = core_layers.Dense(4, name='my_dense') - with self.assertRaises(ValueError): - dense(inputs) - - inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3]) - dense = core_layers.Dense(4, name='my_dense') - dense(inputs) - - inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None, 3]) - dense = core_layers.Dense(4, name='my_dense') - dense(inputs) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testActivation(self): - dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1') - inputs = tf.random.uniform((5, 3), seed=1) - outputs = dense(inputs) - if not tf.executing_eagerly(): - self.assertEqual(outputs.op.name, 'dense1/Relu') - - dense = core_layers.Dense(2, name='dense2') - inputs = tf.random.uniform((5, 3), seed=1) - outputs = dense(inputs) - if not tf.executing_eagerly(): - self.assertEqual(outputs.op.name, 'dense2/BiasAdd') - - @tf_test_utils.run_deprecated_v1 - def testActivityRegularizer(self): - regularizer = lambda x: tf.reduce_sum(x) * 1e-3 - dense = core_layers.Dense( - 2, name='my_dense', activity_regularizer=regularizer) - inputs = tf.random.uniform((5, 3), seed=1) - _ = dense(inputs) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.assertListEqual(dense.losses, loss_keys) - - @tf_test_utils.run_deprecated_v1 - def testKernelRegularizer(self): - regularizer = lambda x: tf.reduce_sum(x) * 1e-3 - dense = core_layers.Dense( - 2, name='my_dense', kernel_regularizer=regularizer) - inputs = tf.random.uniform((5, 3), seed=1) - _ = dense(inputs) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in dense.variables]) - self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys)) - - @tf_test_utils.run_deprecated_v1 - def testKernelRegularizerWithReuse(self): - regularizer = lambda x: tf.reduce_sum(x) * 1e-3 - inputs = tf.random.uniform((5, 3), seed=1) - _ = core_layers.dense( - inputs, 2, name='my_dense', kernel_regularizer=regularizer) - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1) - _ = core_layers.dense( - inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True) - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1) - - @tf_test_utils.run_deprecated_v1 - def testBiasRegularizer(self): - regularizer = lambda x: tf.reduce_sum(x) * 1e-3 - dense = core_layers.Dense(2, name='my_dense', bias_regularizer=regularizer) - inputs = tf.random.uniform((5, 3), seed=1) - _ = dense(inputs) - loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in dense.variables]) - self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys)) - - @tf_test_utils.run_deprecated_v1 - def testFunctionalDense(self): - with self.cached_session(): - inputs = tf.random.uniform((5, 3), seed=1) - outputs = core_layers.dense( - inputs, 2, activation=tf.nn.relu, name='my_dense') - self.assertEqual( - len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2) - self.assertEqual(outputs.op.name, 'my_dense/Relu') - - @tf_test_utils.run_deprecated_v1 - def testFunctionalDenseTwice(self): - inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2) - vars1 = _get_variable_dict_from_varstore().values() - core_layers.dense(inputs, 2) - vars2 = _get_variable_dict_from_varstore().values() - self.assertEqual(len(vars1), 2) - self.assertEqual(len(vars2), 4) - - # TODO(alive): get this to work in eager mode. - def testFunctionalDenseTwiceReuse(self): - with self.cached_session(): - inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2, name='my_dense') - vars1 = tf.compat.v1.trainable_variables() - core_layers.dense(inputs, 2, name='my_dense', reuse=True) - vars2 = tf.compat.v1.trainable_variables() - self.assertEqual(vars1, vars2) - - # TODO(alive): get this to work in eager mode. - def testFunctionalDenseTwiceReuseFromScope(self): - with self.cached_session(): - with tf.compat.v1.variable_scope('scope'): + dense = core_layers.Dense(2, name="dense2") + inputs = tf.random.uniform((5, 3), seed=1) + outputs = dense(inputs) + if not tf.executing_eagerly(): + self.assertEqual(outputs.op.name, "dense2/BiasAdd") + + @tf_test_utils.run_deprecated_v1 + def testActivityRegularizer(self): + regularizer = lambda x: tf.reduce_sum(x) * 1e-3 + dense = core_layers.Dense( + 2, name="my_dense", activity_regularizer=regularizer + ) + inputs = tf.random.uniform((5, 3), seed=1) + _ = dense(inputs) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.assertListEqual(dense.losses, loss_keys) + + @tf_test_utils.run_deprecated_v1 + def testKernelRegularizer(self): + regularizer = lambda x: tf.reduce_sum(x) * 1e-3 + dense = core_layers.Dense( + 2, name="my_dense", kernel_regularizer=regularizer + ) inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2, name='my_dense') - vars1 = tf.compat.v1.trainable_variables() - with tf.compat.v1.variable_scope('scope', reuse=True): - core_layers.dense(inputs, 2, name='my_dense') - vars2 = tf.compat.v1.trainable_variables() - self.assertEqual(vars1, vars2) - - @tf_test_utils.run_deprecated_v1 - def testFunctionalDenseInitializerFromScope(self): - with tf.compat.v1.variable_scope( - 'scope', - initializer=tf.compat.v1.ones_initializer()), self.cached_session(): - inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2) - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = _get_variable_dict_from_varstore() - self.assertEqual(len(weights), 2) - # Check that the matrix weights got initialized to ones (from scope). - self.assertAllClose(weights['scope/dense/kernel'].read_value(), - np.ones((3, 2))) - # Check that the bias still got initialized to zeros. - self.assertAllClose(weights['scope/dense/bias'].read_value(), np.zeros( - (2))) - - def testFunctionalDenseWithCustomGetter(self): - called = [0] - - def custom_getter(getter, *args, **kwargs): - called[0] += 1 - return getter(*args, **kwargs) - - with tf.compat.v1.variable_scope('test', custom_getter=custom_getter): - inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2) - self.assertEqual(called[0], 2) - - @tf_test_utils.run_deprecated_v1 - def testFunctionalDenseInScope(self): - with self.cached_session(): - with tf.compat.v1.variable_scope('test'): + _ = dense(inputs) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in dense.variables]) + self.assertAllEqual( + self.evaluate(dense.losses), self.evaluate(loss_keys) + ) + + @tf_test_utils.run_deprecated_v1 + def testKernelRegularizerWithReuse(self): + regularizer = lambda x: tf.reduce_sum(x) * 1e-3 inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2, name='my_dense') - var_dict = _get_variable_dict_from_varstore() - var_key = 'test/my_dense/kernel' - self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) - with tf.compat.v1.variable_scope('test1') as scope: + _ = core_layers.dense( + inputs, 2, name="my_dense", kernel_regularizer=regularizer + ) + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + ), + 1, + ) + _ = core_layers.dense( + inputs, + 2, + name="my_dense", + kernel_regularizer=regularizer, + reuse=True, + ) + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + ), + 1, + ) + + @tf_test_utils.run_deprecated_v1 + def testBiasRegularizer(self): + regularizer = lambda x: tf.reduce_sum(x) * 1e-3 + dense = core_layers.Dense( + 2, name="my_dense", bias_regularizer=regularizer + ) inputs = tf.random.uniform((5, 3), seed=1) - core_layers.dense(inputs, 2, name=scope) - var_dict = _get_variable_dict_from_varstore() - var_key = 'test1/kernel' - self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) - with tf.compat.v1.variable_scope('test2'): + _ = dense(inputs) + loss_keys = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES + ) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in dense.variables]) + self.assertAllEqual( + self.evaluate(dense.losses), self.evaluate(loss_keys) + ) + + @tf_test_utils.run_deprecated_v1 + def testFunctionalDense(self): + with self.cached_session(): + inputs = tf.random.uniform((5, 3), seed=1) + outputs = core_layers.dense( + inputs, 2, activation=tf.nn.relu, name="my_dense" + ) + self.assertEqual( + len( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ) + ), + 2, + ) + self.assertEqual(outputs.op.name, "my_dense/Relu") + + @tf_test_utils.run_deprecated_v1 + def testFunctionalDenseTwice(self): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) - var_dict = _get_variable_dict_from_varstore() - var_key = 'test2/dense/kernel' - self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testComputeOutputShape(self): - dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1') - ts = tf.TensorShape - # pylint: disable=protected-access - with self.assertRaises(ValueError): - dense.compute_output_shape(ts(None)) - with self.assertRaises(ValueError): - dense.compute_output_shape(ts([])) - with self.assertRaises(ValueError): - dense.compute_output_shape(ts([1])) - self.assertEqual( - [None, 2], - dense.compute_output_shape((None, 3)).as_list()) - self.assertEqual( - [None, 2], - dense.compute_output_shape(ts([None, 3])).as_list()) - self.assertEqual( - [None, 4, 2], - dense.compute_output_shape(ts([None, 4, 3])).as_list()) - # pylint: enable=protected-access - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testConstraints(self): - k_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - dense = core_layers.Dense(2, - kernel_constraint=k_constraint, - bias_constraint=b_constraint) - inputs = tf.random.uniform((5, 3), seed=1) - dense(inputs) - self.assertEqual(dense.kernel_constraint, k_constraint) - self.assertEqual(dense.bias_constraint, b_constraint) + vars1 = _get_variable_dict_from_varstore().values() + core_layers.dense(inputs, 2) + vars2 = _get_variable_dict_from_varstore().values() + self.assertEqual(len(vars1), 2) + self.assertEqual(len(vars2), 4) + + # TODO(alive): get this to work in eager mode. + def testFunctionalDenseTwiceReuse(self): + with self.cached_session(): + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2, name="my_dense") + vars1 = tf.compat.v1.trainable_variables() + core_layers.dense(inputs, 2, name="my_dense", reuse=True) + vars2 = tf.compat.v1.trainable_variables() + self.assertEqual(vars1, vars2) + + # TODO(alive): get this to work in eager mode. + def testFunctionalDenseTwiceReuseFromScope(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("scope"): + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2, name="my_dense") + vars1 = tf.compat.v1.trainable_variables() + with tf.compat.v1.variable_scope("scope", reuse=True): + core_layers.dense(inputs, 2, name="my_dense") + vars2 = tf.compat.v1.trainable_variables() + self.assertEqual(vars1, vars2) + + @tf_test_utils.run_deprecated_v1 + def testFunctionalDenseInitializerFromScope(self): + with tf.compat.v1.variable_scope( + "scope", initializer=tf.compat.v1.ones_initializer() + ), self.cached_session(): + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2) + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = _get_variable_dict_from_varstore() + self.assertEqual(len(weights), 2) + # Check that the matrix weights got initialized to ones (from + # scope). + self.assertAllClose( + weights["scope/dense/kernel"].read_value(), np.ones((3, 2)) + ) + # Check that the bias still got initialized to zeros. + self.assertAllClose( + weights["scope/dense/bias"].read_value(), np.zeros((2)) + ) + + def testFunctionalDenseWithCustomGetter(self): + called = [0] + + def custom_getter(getter, *args, **kwargs): + called[0] += 1 + return getter(*args, **kwargs) + + with tf.compat.v1.variable_scope("test", custom_getter=custom_getter): + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2) + self.assertEqual(called[0], 2) + + @tf_test_utils.run_deprecated_v1 + def testFunctionalDenseInScope(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("test"): + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2, name="my_dense") + var_dict = _get_variable_dict_from_varstore() + var_key = "test/my_dense/kernel" + self.assertEqual(var_dict[var_key].name, f"{var_key}:0") + with tf.compat.v1.variable_scope("test1") as scope: + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2, name=scope) + var_dict = _get_variable_dict_from_varstore() + var_key = "test1/kernel" + self.assertEqual(var_dict[var_key].name, f"{var_key}:0") + with tf.compat.v1.variable_scope("test2"): + inputs = tf.random.uniform((5, 3), seed=1) + core_layers.dense(inputs, 2) + var_dict = _get_variable_dict_from_varstore() + var_key = "test2/dense/kernel" + self.assertEqual(var_dict[var_key].name, f"{var_key}:0") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testComputeOutputShape(self): + dense = core_layers.Dense(2, activation=tf.nn.relu, name="dense1") + ts = tf.TensorShape + + with self.assertRaises(ValueError): + dense.compute_output_shape(ts(None)) + with self.assertRaises(ValueError): + dense.compute_output_shape(ts([])) + with self.assertRaises(ValueError): + dense.compute_output_shape(ts([1])) + self.assertEqual( + [None, 2], dense.compute_output_shape((None, 3)).as_list() + ) + self.assertEqual( + [None, 2], dense.compute_output_shape(ts([None, 3])).as_list() + ) + self.assertEqual( + [None, 4, 2], dense.compute_output_shape(ts([None, 4, 3])).as_list() + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testConstraints(self): + k_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + dense = core_layers.Dense( + 2, kernel_constraint=k_constraint, bias_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 3), seed=1) + dense(inputs) + self.assertEqual(dense.kernel_constraint, k_constraint) + self.assertEqual(dense.bias_constraint, b_constraint) def _get_variable_dict_from_varstore(): - var_dict = variable_scope._get_default_variable_store()._vars # pylint: disable=protected-access - sorted_var_dict = collections.OrderedDict( - sorted(var_dict.items(), key=lambda t: t[0])) - return sorted_var_dict + var_dict = variable_scope._get_default_variable_store()._vars + sorted_var_dict = collections.OrderedDict( + sorted(var_dict.items(), key=lambda t: t[0]) + ) + return sorted_var_dict class DropoutTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDropoutProperties(self): - dp = core_layers.Dropout(0.5, name='dropout') - self.assertEqual(dp.rate, 0.5) - self.assertEqual(dp.noise_shape, None) - dp(tf.ones(())) - self.assertEqual(dp.name, 'dropout') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testBooleanLearningPhase(self): - dp = core_layers.Dropout(0.5) - inputs = tf.ones((5, 3)) - dropped = dp(inputs, training=True) - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_output = self.evaluate(dropped) - self.assertAlmostEqual(0., np_output.min()) - dropped = dp(inputs, training=False) - np_output = self.evaluate(dropped) - self.assertAllClose(np.ones((5, 3)), np_output) - - @tf_test_utils.run_deprecated_v1 - def testDynamicLearningPhase(self): - with self.cached_session() as sess: - dp = core_layers.Dropout(0.5, seed=1) - inputs = tf.ones((5, 5)) - training = tf.compat.v1.placeholder(dtype='bool') - dropped = dp(inputs, training=training) - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_output = sess.run(dropped, feed_dict={training: True}) - self.assertAlmostEqual(0., np_output.min()) - np_output = sess.run(dropped, feed_dict={training: False}) - self.assertAllClose(np.ones((5, 5)), np_output) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDynamicNoiseShape(self): - inputs = tf.ones((5, 3, 2)) - noise_shape = [None, 1, None] - dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) - dropped = dp(inputs, training=True) - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_output = self.evaluate(dropped) - self.assertAlmostEqual(0., np_output.min()) - self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) - - def testCustomNoiseShape(self): - inputs = tf.ones((5, 3, 2)) - noise_shape = [5, 1, 2] - dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) - dropped = dp(inputs, training=True) - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_output = self.evaluate(dropped) - self.assertAlmostEqual(0., np_output.min()) - self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) - - @tf_test_utils.run_deprecated_v1 - def testFunctionalDropout(self): - with self.cached_session(): - inputs = tf.ones((5, 5)) - dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_output = self.evaluate(dropped) - self.assertAlmostEqual(0., np_output.min()) - dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) - np_output = self.evaluate(dropped) - self.assertAllClose(np.ones((5, 5)), np_output) - - @tf_test_utils.run_deprecated_v1 - def testDynamicRate(self): - with self.cached_session() as sess: - rate = tf.compat.v1.placeholder(dtype='float32', name='rate') - dp = core_layers.Dropout(rate, name='dropout') - inputs = tf.ones((5, 5)) - dropped = dp(inputs, training=True) - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_output = sess.run(dropped, feed_dict={rate: 0.5}) - self.assertAlmostEqual(0., np_output.min()) - np_output = sess.run(dropped, feed_dict={rate: 0.0}) - self.assertAllClose(np.ones((5, 5)), np_output) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDropoutProperties(self): + dp = core_layers.Dropout(0.5, name="dropout") + self.assertEqual(dp.rate, 0.5) + self.assertEqual(dp.noise_shape, None) + dp(tf.ones(())) + self.assertEqual(dp.name, "dropout") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBooleanLearningPhase(self): + dp = core_layers.Dropout(0.5) + inputs = tf.ones((5, 3)) + dropped = dp(inputs, training=True) + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_output = self.evaluate(dropped) + self.assertAlmostEqual(0.0, np_output.min()) + dropped = dp(inputs, training=False) + np_output = self.evaluate(dropped) + self.assertAllClose(np.ones((5, 3)), np_output) + + @tf_test_utils.run_deprecated_v1 + def testDynamicLearningPhase(self): + with self.cached_session() as sess: + dp = core_layers.Dropout(0.5, seed=1) + inputs = tf.ones((5, 5)) + training = tf.compat.v1.placeholder(dtype="bool") + dropped = dp(inputs, training=training) + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_output = sess.run(dropped, feed_dict={training: True}) + self.assertAlmostEqual(0.0, np_output.min()) + np_output = sess.run(dropped, feed_dict={training: False}) + self.assertAllClose(np.ones((5, 5)), np_output) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDynamicNoiseShape(self): + inputs = tf.ones((5, 3, 2)) + noise_shape = [None, 1, None] + dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) + dropped = dp(inputs, training=True) + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_output = self.evaluate(dropped) + self.assertAlmostEqual(0.0, np_output.min()) + self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) + + def testCustomNoiseShape(self): + inputs = tf.ones((5, 3, 2)) + noise_shape = [5, 1, 2] + dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) + dropped = dp(inputs, training=True) + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_output = self.evaluate(dropped) + self.assertAlmostEqual(0.0, np_output.min()) + self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) + + @tf_test_utils.run_deprecated_v1 + def testFunctionalDropout(self): + with self.cached_session(): + inputs = tf.ones((5, 5)) + dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_output = self.evaluate(dropped) + self.assertAlmostEqual(0.0, np_output.min()) + dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) + np_output = self.evaluate(dropped) + self.assertAllClose(np.ones((5, 5)), np_output) + + @tf_test_utils.run_deprecated_v1 + def testDynamicRate(self): + with self.cached_session() as sess: + rate = tf.compat.v1.placeholder(dtype="float32", name="rate") + dp = core_layers.Dropout(rate, name="dropout") + inputs = tf.ones((5, 5)) + dropped = dp(inputs, training=True) + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_output = sess.run(dropped, feed_dict={rate: 0.5}) + self.assertAlmostEqual(0.0, np_output.min()) + np_output = sess.run(dropped, feed_dict={rate: 0.0}) + self.assertAllClose(np.ones((5, 5)), np_output) class FlattenTest(tf.test.TestCase): - - @tf_test_utils.run_deprecated_v1 - def testCreateFlatten(self): - with self.cached_session() as sess: - x = tf.compat.v1.placeholder(shape=(None, 2, 3), dtype='float32') - y = core_layers.Flatten()(x) - np_output = sess.run(y, feed_dict={x: np.zeros((3, 2, 3))}) - self.assertEqual(list(np_output.shape), [3, 6]) - self.assertEqual(y.get_shape().as_list(), [None, 6]) - - x = tf.compat.v1.placeholder(shape=(1, 2, 3, 2), dtype='float32') - y = core_layers.Flatten()(x) - np_output = sess.run(y, feed_dict={x: np.zeros((1, 2, 3, 2))}) - self.assertEqual(list(np_output.shape), [1, 12]) - self.assertEqual(y.get_shape().as_list(), [1, 12]) - - def testComputeShape(self): - shape = core_layers.Flatten().compute_output_shape((1, 2, 3, 2)) - self.assertEqual(shape.as_list(), [1, 12]) - - shape = core_layers.Flatten().compute_output_shape((None, 3, 2)) - self.assertEqual(shape.as_list(), [None, 6]) - - shape = core_layers.Flatten().compute_output_shape((None, 3, None)) - self.assertEqual(shape.as_list(), [None, None]) - - @tf_test_utils.run_deprecated_v1 - def testDataFormat5d(self): - np_input_channels_last = np.arange( - 120, dtype='float32').reshape([1, 5, 4, 3, 2]) - - with self.test_session() as sess: - x = tf.compat.v1.placeholder(shape=(1, 5, 4, 3, 2), dtype='float32') - y = core_layers.Flatten(data_format='channels_last')(x) - np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last}) - - x = tf.compat.v1.placeholder(shape=(1, 2, 5, 4, 3), dtype='float32') - y = core_layers.Flatten(data_format='channels_first')(x) - np_input_channels_first = np.transpose(np_input_channels_last, - [0, 4, 1, 2, 3]) - np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first}) - - self.assertAllEqual(np_output_cl, np_output_cf) - - @tf_test_utils.run_deprecated_v1 - def testDataFormat4d(self): - np_input_channels_last = np.arange( - 24, dtype='float32').reshape([1, 4, 3, 2]) - - with self.test_session() as sess: - x = tf.compat.v1.placeholder(shape=(1, 4, 3, 2), dtype='float32') - y = core_layers.Flatten(data_format='channels_last')(x) - np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last}) - - x = tf.compat.v1.placeholder(shape=(1, 2, 4, 3), dtype='float32') - y = core_layers.Flatten(data_format='channels_first')(x) - np_input_channels_first = np.transpose(np_input_channels_last, - [0, 3, 1, 2]) - np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first}) - - self.assertAllEqual(np_output_cl, np_output_cf) - - @tf_test_utils.run_deprecated_v1 - def testFunctionalFlatten(self): - x = tf.compat.v1.placeholder(shape=(None, 2, 3), dtype='float32') - y = core_layers.flatten(x, name='flatten') - self.assertEqual(y.get_shape().as_list(), [None, 6]) - - @tf_test_utils.run_deprecated_v1 - def testFlatten0D(self): - x = tf.compat.v1.placeholder(shape=(None,), dtype='float32') - y = core_layers.Flatten()(x) - with self.cached_session() as sess: - np_output = sess.run(y, feed_dict={x: np.zeros((5,))}) - self.assertEqual(list(np_output.shape), [5, 1]) - self.assertEqual(y.shape.as_list(), [None, 1]) - - @tf_test_utils.run_deprecated_v1 - def testFlattenUnknownAxes(self): - with self.cached_session() as sess: - x = tf.compat.v1.placeholder(shape=(5, None, None), dtype='float32') - y = core_layers.Flatten()(x) - np_output = sess.run(y, feed_dict={x: np.zeros((5, 2, 3))}) - self.assertEqual(list(np_output.shape), [5, 6]) - self.assertEqual(y.get_shape().as_list(), [5, None]) - - x = tf.compat.v1.placeholder(shape=(5, None, 2), dtype='float32') - y = core_layers.Flatten()(x) - np_output = sess.run(y, feed_dict={x: np.zeros((5, 3, 2))}) - self.assertEqual(list(np_output.shape), [5, 6]) - self.assertEqual(y.get_shape().as_list(), [5, None]) - - @tf_test_utils.run_deprecated_v1 - def testFlattenLargeDim(self): - if any(platform.win32_ver()): - self.skipTest('values are truncated on windows causing test failures') - - x = tf.compat.v1.placeholder(shape=(None, 21316, 21316, 80), dtype='float32') - y = core_layers.Flatten()(x) - self.assertEqual(y.shape.as_list(), [None, 21316 * 21316 * 80]) - - @tf_test_utils.run_deprecated_v1 - def testFlattenLargeBatchDim(self): - batch_size = np.iinfo(np.int32).max + 10 - x = tf.compat.v1.placeholder( - shape=(batch_size, None, None, 1), dtype='float32') - y = core_layers.Flatten()(x) - self.assertEqual(y.shape.as_list(), [batch_size, None]) - - -if __name__ == '__main__': - tf.test.main() + @tf_test_utils.run_deprecated_v1 + def testCreateFlatten(self): + with self.cached_session() as sess: + x = tf.compat.v1.placeholder(shape=(None, 2, 3), dtype="float32") + y = core_layers.Flatten()(x) + np_output = sess.run(y, feed_dict={x: np.zeros((3, 2, 3))}) + self.assertEqual(list(np_output.shape), [3, 6]) + self.assertEqual(y.get_shape().as_list(), [None, 6]) + + x = tf.compat.v1.placeholder(shape=(1, 2, 3, 2), dtype="float32") + y = core_layers.Flatten()(x) + np_output = sess.run(y, feed_dict={x: np.zeros((1, 2, 3, 2))}) + self.assertEqual(list(np_output.shape), [1, 12]) + self.assertEqual(y.get_shape().as_list(), [1, 12]) + + def testComputeShape(self): + shape = core_layers.Flatten().compute_output_shape((1, 2, 3, 2)) + self.assertEqual(shape.as_list(), [1, 12]) + + shape = core_layers.Flatten().compute_output_shape((None, 3, 2)) + self.assertEqual(shape.as_list(), [None, 6]) + + shape = core_layers.Flatten().compute_output_shape((None, 3, None)) + self.assertEqual(shape.as_list(), [None, None]) + + @tf_test_utils.run_deprecated_v1 + def testDataFormat5d(self): + np_input_channels_last = np.arange(120, dtype="float32").reshape( + [1, 5, 4, 3, 2] + ) + + with self.test_session() as sess: + x = tf.compat.v1.placeholder(shape=(1, 5, 4, 3, 2), dtype="float32") + y = core_layers.Flatten(data_format="channels_last")(x) + np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last}) + + x = tf.compat.v1.placeholder(shape=(1, 2, 5, 4, 3), dtype="float32") + y = core_layers.Flatten(data_format="channels_first")(x) + np_input_channels_first = np.transpose( + np_input_channels_last, [0, 4, 1, 2, 3] + ) + np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first}) + + self.assertAllEqual(np_output_cl, np_output_cf) + + @tf_test_utils.run_deprecated_v1 + def testDataFormat4d(self): + np_input_channels_last = np.arange(24, dtype="float32").reshape( + [1, 4, 3, 2] + ) + + with self.test_session() as sess: + x = tf.compat.v1.placeholder(shape=(1, 4, 3, 2), dtype="float32") + y = core_layers.Flatten(data_format="channels_last")(x) + np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last}) + + x = tf.compat.v1.placeholder(shape=(1, 2, 4, 3), dtype="float32") + y = core_layers.Flatten(data_format="channels_first")(x) + np_input_channels_first = np.transpose( + np_input_channels_last, [0, 3, 1, 2] + ) + np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first}) + + self.assertAllEqual(np_output_cl, np_output_cf) + + @tf_test_utils.run_deprecated_v1 + def testFunctionalFlatten(self): + x = tf.compat.v1.placeholder(shape=(None, 2, 3), dtype="float32") + y = core_layers.flatten(x, name="flatten") + self.assertEqual(y.get_shape().as_list(), [None, 6]) + + @tf_test_utils.run_deprecated_v1 + def testFlatten0D(self): + x = tf.compat.v1.placeholder(shape=(None,), dtype="float32") + y = core_layers.Flatten()(x) + with self.cached_session() as sess: + np_output = sess.run(y, feed_dict={x: np.zeros((5,))}) + self.assertEqual(list(np_output.shape), [5, 1]) + self.assertEqual(y.shape.as_list(), [None, 1]) + + @tf_test_utils.run_deprecated_v1 + def testFlattenUnknownAxes(self): + with self.cached_session() as sess: + x = tf.compat.v1.placeholder(shape=(5, None, None), dtype="float32") + y = core_layers.Flatten()(x) + np_output = sess.run(y, feed_dict={x: np.zeros((5, 2, 3))}) + self.assertEqual(list(np_output.shape), [5, 6]) + self.assertEqual(y.get_shape().as_list(), [5, None]) + + x = tf.compat.v1.placeholder(shape=(5, None, 2), dtype="float32") + y = core_layers.Flatten()(x) + np_output = sess.run(y, feed_dict={x: np.zeros((5, 3, 2))}) + self.assertEqual(list(np_output.shape), [5, 6]) + self.assertEqual(y.get_shape().as_list(), [5, None]) + + @tf_test_utils.run_deprecated_v1 + def testFlattenLargeDim(self): + if any(platform.win32_ver()): + self.skipTest( + "values are truncated on windows causing test failures" + ) + + x = tf.compat.v1.placeholder( + shape=(None, 21316, 21316, 80), dtype="float32" + ) + y = core_layers.Flatten()(x) + self.assertEqual(y.shape.as_list(), [None, 21316 * 21316 * 80]) + + @tf_test_utils.run_deprecated_v1 + def testFlattenLargeBatchDim(self): + batch_size = np.iinfo(np.int32).max + 10 + x = tf.compat.v1.placeholder( + shape=(batch_size, None, None, 1), dtype="float32" + ) + y = core_layers.Flatten()(x) + self.assertEqual(y.shape.as_list(), [batch_size, None]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/legacy_tf_layers/migration_utils.py b/keras/legacy_tf_layers/migration_utils.py index 8d9c43d5837d..e1467beb66c0 100644 --- a/keras/legacy_tf_layers/migration_utils.py +++ b/keras/legacy_tf_layers/migration_utils.py @@ -9,95 +9,108 @@ import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.util.tf_export import keras_export @keras_export(v1=["keras.utils.DeterministicRandomTestTool"]) class DeterministicRandomTestTool(object): - """DeterministicRandomTestTool is a testing tool. - - This tool is used to validate random number generation semantics match between - TF1.x graphs/sessions and eager execution. - - This is useful when you are migrating from TF 1.x to TF2 and need to make sure - your computation is still happening correctly along the way. See the - validating correctness migration guide for more info : - https://www.tensorflow.org/guide/migrate/validate_correctness - - The following DeterministicRandomTestTool object provides a context manager - scope() that can make stateful random operations use the same seed across both - TF1 graphs/sessions and eager execution,The tool provides two testing modes: - - constant which uses the same seed for every single operation no matter how - many times it has been called and, - - num_random_ops which uses the number of previously-observed stateful random - operations as the operation seed. - The num_random_ops mode serves as a more sensitive validation check than the - constant mode. It ensures that the random numbers initialization does not get - accidentaly reused.(for example if several weights take on the same - initializations), you can use the num_random_ops mode to avoid this. In the - num_random_ops mode, the generated random numbers will depend on the ordering - of random ops in the program. - - This applies both to the stateful random operations used for creating and - initializing variables, and to the stateful random operations used in - computation (such as for dropout layers). - """ - - def __init__(self, seed: int = 42, mode="constant"): - """Set mode to 'constant' or 'num_random_ops'. Defaults to 'constant'.""" - if mode not in {"constant", "num_random_ops"}: - raise ValueError("Mode arg must be 'constant' or 'num_random_ops'. " + - "Got: {}".format(mode)) - self.seed_implementation = sys.modules[tf.compat.v1.get_seed.__module__] - self._mode = mode - self._seed = seed - self.operation_seed = 0 - self._observed_seeds = set() - - @property - def operation_seed(self): - return self._operation_seed - - @operation_seed.setter - def operation_seed(self, value): - self._operation_seed = value - - def scope(self): - """set random seed.""" - - tf.random.set_seed(self._seed) - def _get_seed(_): - """Wraps TF get_seed to make deterministic random generation easier. - - This makes a variable's initialization (and calls that involve random - number generation) depend only on how many random number generations - were used in the scope so far, rather than on how many unrelated - operations the graph contains. - - Returns: - Random seed tuple. - """ - op_seed = self._operation_seed - if self._mode == "constant": - tf.random.set_seed(op_seed) - else: - if op_seed in self._observed_seeds: - raise ValueError( - "This `DeterministicRandomTestTool` object is trying to re-use the " - + "already-used operation seed {}. ".format(op_seed) + - "It cannot guarantee random numbers will match between eager " + - "and sessions when an operation seed is reused. " + - "You most likely set " + - "`operation_seed` explicitly but used a value that caused the " + - "naturally-incrementing operation seed sequences to overlap " + - "with an already-used seed.") - - self._observed_seeds.add(op_seed) - self._operation_seed += 1 - - return (self._seed, op_seed) - # mock.patch internal symbols to modify the behavior of TF APIs relying on - # them - - return tf.compat.v1.test.mock.patch.object( - self.seed_implementation, "get_seed", wraps=_get_seed) + """DeterministicRandomTestTool is a testing tool. + + This tool is used to validate random number generation semantics match + between TF1.x graphs/sessions and eager execution. + + This is useful when you are migrating from TF 1.x to TF2 and need to make + sure your computation is still happening correctly along the way. See the + validating correctness migration guide for more info: + https://www.tensorflow.org/guide/migrate/validate_correctness + + The following DeterministicRandomTestTool object provides a context manager + scope() that can make stateful random operations use the same seed across + both TF1 graphs/sessions and eager execution,The tool provides two testing + modes: + - constant which uses the same seed for every single operation no matter how + many times it has been called and, + - num_random_ops which uses the number of previously-observed stateful + random operations as the operation seed. + The num_random_ops mode serves as a more sensitive validation check than the + constant mode. It ensures that the random numbers initialization does not + get accidentaly reused.(for example if several weights take on the same + initializations), you can use the num_random_ops mode to avoid this. In the + num_random_ops mode, the generated random numbers will depend on the + ordering of random ops in the program. + + This applies both to the stateful random operations used for creating and + initializing variables, and to the stateful random operations used in + computation (such as for dropout layers). + + Args: + mode: Set mode to 'constant' or 'num_random_ops'. Defaults to + 'constant'. + seed: The random seed to use. + """ + + def __init__(self, seed: int = 42, mode="constant"): + if mode not in {"constant", "num_random_ops"}: + raise ValueError( + "Mode arg must be 'constant' or 'num_random_ops'. " + + f"Got: {mode}" + ) + self.seed_implementation = sys.modules[tf.compat.v1.get_seed.__module__] + self._mode = mode + self._seed = seed + self.operation_seed = 0 + self._observed_seeds = set() + + @property + def operation_seed(self): + return self._operation_seed + + @operation_seed.setter + def operation_seed(self, value): + self._operation_seed = value + + def scope(self): + """set random seed.""" + + tf.random.set_seed(self._seed) + + def _get_seed(_): + """Wraps TF get_seed to make deterministic random generation easier. + + This makes a variable's initialization (and calls that involve + random number generation) depend only on how many random number + generations were used in the scope so far, rather than on how many + unrelated operations the graph contains. + + Returns: + Random seed tuple. + """ + op_seed = self._operation_seed + if self._mode == "constant": + tf.random.set_seed(op_seed) + else: + if op_seed in self._observed_seeds: + raise ValueError( + "This `DeterministicRandomTestTool` " + "object is trying to re-use the " + + f"already-used operation seed {op_seed}. " + + "It cannot guarantee random numbers will match " + + "between eager and sessions when an operation seed " + + "is reused. You most likely set " + + "`operation_seed` explicitly but used a value that " + + "caused the naturally-incrementing operation seed " + + "sequences to overlap with an already-used seed." + ) + + self._observed_seeds.add(op_seed) + self._operation_seed += 1 + + return (self._seed, op_seed) + + # mock.patch internal symbols to modify the behavior of TF APIs relying + # on them + + return tf.compat.v1.test.mock.patch.object( + self.seed_implementation, "get_seed", wraps=_get_seed + ) diff --git a/keras/legacy_tf_layers/migration_utils_test.py b/keras/legacy_tf_layers/migration_utils_test.py index 18c6e0242a01..3d024ceb2bdf 100644 --- a/keras/legacy_tf_layers/migration_utils_test.py +++ b/keras/legacy_tf_layers/migration_utils_test.py @@ -1,215 +1,206 @@ """Tests for migration_utils.""" -from keras.initializers import GlorotUniform as V2GlorotUniform -from keras.legacy_tf_layers import migration_utils import tensorflow as tf +from keras.legacy_tf_layers import migration_utils -class DeterministicRandomTestToolTest(tf.test.TestCase): - def test_constant_mode_no_seed(self): - """Test random tensor generation consistancy in constant mode. - - Verify that the random tensor generated without using the seed is - consistant between graph and eager mode - """ - - # Generate three random tensors to show how the stateful random number - # generation and glorot_uniform_initializer match between sessions and - # eager execution. - random_tool = migration_utils.DeterministicRandomTestTool() - with random_tool.scope(): - graph = tf.Graph() - with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess: - a = tf.compat.v1.random.uniform(shape=(3, 1)) - # adding additional computation/ops to the graph and ensuring consistant - # random number generation - a = a * 3 - b = tf.compat.v1.random.uniform(shape=(3, 3)) - b = b * 3 - c = tf.compat.v1.random.uniform(shape=(3, 3)) - c = c * 3 - d = tf.compat.v1.glorot_uniform_initializer()( - shape=(6, 6), dtype=tf.float32) - graph_a, graph_b, graph_c, graph_d = sess.run([a, b, c, d]) - - a = tf.compat.v2.random.uniform(shape=(3, 1)) - a = a * 3 - b = tf.compat.v2.random.uniform(shape=(3, 3)) - b = b * 3 - c = tf.compat.v2.random.uniform(shape=(3, 3)) - c = c * 3 - d = V2GlorotUniform()(shape=(6, 6), dtype=tf.float32) - # validate that the generated random tensors match - self.assertAllClose(graph_a, a) - self.assertAllClose(graph_b, b) - self.assertAllClose(graph_c, c) - self.assertAllClose(graph_d, d) - # In constant mode, because b and c were generated with the same seed within - # the same scope and have the same shape, they will have exactly the same - # values. - # validate that b and c are the same, also graph_b and graph_c - self.assertAllClose(b, c) - self.assertAllClose(graph_b, graph_c) - - def test_constant_mode_seed_argument(self): - """Test random tensor generation consistancy in constant mode. - - Verify that the random tensor generated by setting the global seeed - in the args is consistant between graph and eager mode. - """ - random_tool = migration_utils.DeterministicRandomTestTool() - with random_tool.scope(): - graph = tf.Graph() - with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess: - # adding additional computation/ops to the graph and ensuring consistant - # random number generation - a = tf.compat.v1.random.uniform(shape=(3, 1), seed=1234) - a = a * 3 - b = tf.compat.v1.random.uniform(shape=(3, 3), seed=1234) - b = b * 3 - c = tf.compat.v1.glorot_uniform_initializer(seed=1234)( - shape=(6, 6), dtype=tf.float32) - graph_a, graph_b, graph_c = sess.run([a, b, c]) - a = tf.compat.v2.random.uniform(shape=(3, 1), seed=1234) - a = a * 3 - b = tf.compat.v2.random.uniform(shape=(3, 3), seed=1234) - b = b * 3 - c = V2GlorotUniform(seed=1234)(shape=(6, 6), dtype=tf.float32) - - # validate that the generated random tensors match - self.assertAllClose(graph_a, a) - self.assertAllClose(graph_b, b) - self.assertAllClose(graph_c, c) - - def test_num_rand_ops(self): - """Test random tensor generation consistancy in num_random_ops mode. - - Verify that the random tensor generated without using the seed is - consistant between graph and eager mode. - Random tensor generated should be different based on random ops ordering - """ - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - graph = tf.Graph() - with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess: - # adding additional computation/ops to the graph and ensuring consistant - # random number generation - a = tf.compat.v1.random.uniform(shape=(3, 1)) - a = a * 3 - b = tf.compat.v1.random.uniform(shape=(3, 3)) - b = b * 3 - c = tf.compat.v1.random.uniform(shape=(3, 3)) - c = c * 3 - d = tf.compat.v1.glorot_uniform_initializer()( - shape=(6, 6), dtype=tf.float32) - graph_a, graph_b, graph_c, graph_d = sess.run([a, b, c, d]) - - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - a = tf.compat.v2.random.uniform(shape=(3, 1)) - a = a * 3 - b = tf.compat.v2.random.uniform(shape=(3, 3)) - b = b * 3 - c = tf.compat.v2.random.uniform(shape=(3, 3)) - c = c * 3 - d = V2GlorotUniform()(shape=(6, 6), dtype=tf.float32) - # validate that the generated random tensors match - self.assertAllClose(graph_a, a) - self.assertAllClose(graph_b, b) - self.assertAllClose(graph_c, c) - self.assertAllClose(graph_d, d) - # validate that the tensors differ based on ops ordering - self.assertNotAllClose(b, c) - self.assertNotAllClose(graph_b, graph_c) - - def test_num_rand_ops_program_order(self): - """Test random tensor generation consistancy in num_random_ops mode. - - validate that in this mode random number generation is sensitive to program - order, so the generated random tesnors should not match. - """ - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - a = tf.random.uniform(shape=(3, 1)) - # adding additional computation/ops to the graph and ensuring consistant - # random number generation - a = a * 3 - b = tf.random.uniform(shape=(3, 3)) - b = b * 3 - - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - b_prime = tf.random.uniform(shape=(3, 3)) - # adding additional computation/ops to the graph and ensuring consistant - # random number generation - b_prime = b_prime * 3 - a_prime = tf.random.uniform(shape=(3, 1)) - a_prime = a_prime * 3 - # validate that the tensors are different - self.assertNotAllClose(a, a_prime) - self.assertNotAllClose(b, b_prime) - - def test_num_rand_ops_operation_seed(self): - """Test random tensor generation consistancy in num_random_ops mode. - - validate if random number generation match across two different program - orders. - """ - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - # operation seed = 0 - a = tf.random.uniform(shape=(3, 1)) - a = a * 3 - # operation seed = 1 - b = tf.random.uniform(shape=(3, 3)) - b = b * 3 - - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - random_tool.operation_seed = 1 - b_prime = tf.random.uniform(shape=(3, 3)) - b_prime = b_prime * 3 - random_tool.operation_seed = 0 - a_prime = tf.random.uniform(shape=(3, 1)) - a_prime = a_prime * 3 - - self.assertAllClose(a, a_prime) - self.assertAllClose(b, b_prime) - - def test_num_rand_ops_disallow_repeated_ops_seed(self): - """Test random tensor generation consistancy in num_random_ops mode. - - validate if DeterministicRandomTestTool disallows reusing already-used - operation seeds. - """ - random_tool = migration_utils.DeterministicRandomTestTool( - mode="num_random_ops") - with random_tool.scope(): - random_tool.operation_seed = 1 - b_prime = tf.random.uniform(shape=(3, 3)) - b_prime = b_prime * 3 - random_tool.operation_seed = 0 - a_prime = tf.random.uniform(shape=(3, 1)) - a_prime = a_prime * 3 - error_string = "An exception should have been raised before this" - error_raised = "An exception should have been raised before this" - try: - c = tf.random.uniform(shape=(3, 1)) - raise RuntimeError(error_string) - - except ValueError as err: - err_raised = err - - self.assertNotEqual(err_raised, error_string) +class DeterministicRandomTestToolTest(tf.test.TestCase): + def test_constant_mode_no_seed(self): + """Test random tensor generation consistancy in constant mode. + + Verify that the random tensor generated without using the seed is + consistant between graph and eager mode + """ + + # Generate three random tensors to show how the stateful random number + # generation match between sessions and eager execution. + random_tool = migration_utils.DeterministicRandomTestTool() + with random_tool.scope(): + graph = tf.Graph() + with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess: + a = tf.compat.v1.random.uniform(shape=(3, 1)) + # adding additional computation/ops to the graph and ensuring + # consistant random number generation + a = a * 3 + b = tf.compat.v1.random.uniform(shape=(3, 3)) + b = b * 3 + c = tf.compat.v1.random.uniform(shape=(3, 3)) + c = c * 3 + graph_a, graph_b, graph_c = sess.run([a, b, c]) + + a = tf.compat.v2.random.uniform(shape=(3, 1)) + a = a * 3 + b = tf.compat.v2.random.uniform(shape=(3, 3)) + b = b * 3 + c = tf.compat.v2.random.uniform(shape=(3, 3)) + c = c * 3 + # validate that the generated random tensors match + self.assertAllClose(graph_a, a) + self.assertAllClose(graph_b, b) + self.assertAllClose(graph_c, c) + # In constant mode, because b and c were generated with the same seed + # within the same scope and have the same shape, they will have exactly + # the same values. + # validate that b and c are the same, also graph_b and graph_c + self.assertAllClose(b, c) + self.assertAllClose(graph_b, graph_c) + + def test_constant_mode_seed_argument(self): + """Test random tensor generation consistancy in constant mode. + + Verify that the random tensor generated by setting the global seeed + in the args is consistant between graph and eager mode. + """ + random_tool = migration_utils.DeterministicRandomTestTool() + with random_tool.scope(): + graph = tf.Graph() + with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess: + # adding additional computation/ops to the graph and ensuring + # consistant random number generation + a = tf.compat.v1.random.uniform(shape=(3, 1), seed=1234) + a = a * 3 + b = tf.compat.v1.random.uniform(shape=(3, 3), seed=1234) + b = b * 3 + graph_a, graph_b = sess.run([a, b]) + a = tf.compat.v2.random.uniform(shape=(3, 1), seed=1234) + a = a * 3 + b = tf.compat.v2.random.uniform(shape=(3, 3), seed=1234) + b = b * 3 + + # validate that the generated random tensors match + self.assertAllClose(graph_a, a) + self.assertAllClose(graph_b, b) + + def test_num_rand_ops(self): + """Test random tensor generation consistancy in num_random_ops mode. + + Verify that the random tensor generated without using the seed is + consistant between graph and eager mode. + Random tensor generated should be different based on random ops ordering + """ + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + graph = tf.Graph() + with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess: + # adding additional computation/ops to the graph and ensuring + # consistant random number generation + a = tf.compat.v1.random.uniform(shape=(3, 1)) + a = a * 3 + b = tf.compat.v1.random.uniform(shape=(3, 3)) + b = b * 3 + c = tf.compat.v1.random.uniform(shape=(3, 3)) + c = c * 3 + graph_a, graph_b, graph_c = sess.run([a, b, c]) + + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + a = tf.compat.v2.random.uniform(shape=(3, 1)) + a = a * 3 + b = tf.compat.v2.random.uniform(shape=(3, 3)) + b = b * 3 + c = tf.compat.v2.random.uniform(shape=(3, 3)) + c = c * 3 + # validate that the generated random tensors match + self.assertAllClose(graph_a, a) + self.assertAllClose(graph_b, b) + self.assertAllClose(graph_c, c) + # validate that the tensors differ based on ops ordering + self.assertNotAllClose(b, c) + self.assertNotAllClose(graph_b, graph_c) + + def test_num_rand_ops_program_order(self): + """Test random tensor generation consistancy in num_random_ops mode. + + validate that in this mode random number generation is sensitive to + program order, so the generated random tesnors should not match. + """ + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + a = tf.random.uniform(shape=(3, 1)) + # adding additional computation/ops to the graph and ensuring + # consistant random number generation + a = a * 3 + b = tf.random.uniform(shape=(3, 3)) + b = b * 3 + + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + b_prime = tf.random.uniform(shape=(3, 3)) + # adding additional computation/ops to the graph and ensuring + # consistant random number generation + b_prime = b_prime * 3 + a_prime = tf.random.uniform(shape=(3, 1)) + a_prime = a_prime * 3 + # validate that the tensors are different + self.assertNotAllClose(a, a_prime) + self.assertNotAllClose(b, b_prime) + + def test_num_rand_ops_operation_seed(self): + """Test random tensor generation consistancy in num_random_ops mode. + + validate if random number generation match across two different program + orders. + """ + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + # operation seed = 0 + a = tf.random.uniform(shape=(3, 1)) + a = a * 3 + # operation seed = 1 + b = tf.random.uniform(shape=(3, 3)) + b = b * 3 + + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + random_tool.operation_seed = 1 + b_prime = tf.random.uniform(shape=(3, 3)) + b_prime = b_prime * 3 + random_tool.operation_seed = 0 + a_prime = tf.random.uniform(shape=(3, 1)) + a_prime = a_prime * 3 + + self.assertAllClose(a, a_prime) + self.assertAllClose(b, b_prime) + + def test_num_rand_ops_disallow_repeated_ops_seed(self): + """Test random tensor generation consistancy in num_random_ops mode. + + validate if DeterministicRandomTestTool disallows reusing already-used + operation seeds. + """ + random_tool = migration_utils.DeterministicRandomTestTool( + mode="num_random_ops" + ) + with random_tool.scope(): + random_tool.operation_seed = 1 + b_prime = tf.random.uniform(shape=(3, 3)) + b_prime = b_prime * 3 + random_tool.operation_seed = 0 + a_prime = tf.random.uniform(shape=(3, 1)) + a_prime = a_prime * 3 + error_string = "An exception should have been raised before this" + try: + tf.random.uniform(shape=(3, 1)) + raise RuntimeError(error_string) + + except ValueError as err: + err_raised = err + + self.assertNotEqual(err_raised, error_string) if __name__ == "__main__": - tf.test.main() - + tf.test.main() diff --git a/keras/legacy_tf_layers/normalization.py b/keras/legacy_tf_layers/normalization.py index 23d0652d34fa..c11f6457b2c1 100644 --- a/keras/legacy_tf_layers/normalization.py +++ b/keras/legacy_tf_layers/normalization.py @@ -12,198 +12,441 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -# pylint: disable=g-classes-have-attributes + """Contains the normalization layer classes and their functional aliases.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import warnings +import tensorflow.compat.v2 as tf + from keras.layers.normalization import batch_normalization_v1 from keras.legacy_tf_layers import base + +# isort: off from tensorflow.python.util.tf_export import keras_export from tensorflow.python.util.tf_export import tf_export -@keras_export(v1=['keras.__internal__.legacy.layers.BatchNormalization']) -@tf_export(v1=['layers.BatchNormalization']) +@keras_export(v1=["keras.__internal__.legacy.layers.BatchNormalization"]) +@tf_export(v1=["layers.BatchNormalization"]) class BatchNormalization(batch_normalization_v1.BatchNormalization, base.Layer): - """Batch Normalization layer from (Ioffe et al., 2015). - - Keras APIs handle BatchNormalization updates to the moving_mean and - moving_variance as part of their `fit()` and `evaluate()` loops. However, if a - custom training loop is used with an instance of `Model`, these updates need - to be explicitly included. Here's a simple example of how it can be done: - - ```python - # model is an instance of Model that contains BatchNormalization layer. - update_ops = model.get_updates_for(None) + model.get_updates_for(features) - train_op = optimizer.minimize(loss) - train_op = tf.group([train_op, update_ops]) - ``` - - Args: - axis: An `int` or list of `int`, the axis or axes that should be normalized, - typically the features axis/axes. For instance, after a `Conv2D` layer - with `data_format="channels_first"`, set `axis=1`. If a list of axes is - provided, each axis in `axis` will be normalized + """Batch Normalization layer from (Ioffe et al., 2015). + + Keras APIs handle BatchNormalization updates to the moving_mean and + moving_variance as part of their `fit()` and `evaluate()` loops. However, if + a custom training loop is used with an instance of `Model`, these updates + need to be explicitly included. Here's a simple example of how it can be + done: + + ```python + # model is an instance of Model that contains BatchNormalization layer. + update_ops = model.get_updates_for(None) + model.get_updates_for(features) + train_op = optimizer.minimize(loss) + train_op = tf.group([train_op, update_ops]) + ``` + + Args: + axis: An `int` or list of `int`, the axis or axes that should be + normalized, typically the features axis/axes. For instance, after a + `Conv2D` layer with `data_format="channels_first"`, set `axis=1`. If a + list of axes is provided, each axis in `axis` will be normalized simultaneously. Default is `-1` which uses the last axis. Note: when - using multi-axis batch norm, the `beta`, `gamma`, `moving_mean`, and - `moving_variance` variables are the same rank as the input Tensor, - with dimension size 1 in all reduced (non-axis) dimensions). - momentum: Momentum for the moving average. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. If False, `beta` - is ignored. - scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the - next layer is linear (also e.g. `nn.relu`), this can be disabled since the - scaling can be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: An optional projection function to be applied to the `beta` - weight after being updated by an `Optimizer` (e.g. used to implement norm - constraints or value constraints for layer weights). The function must - take as input the unprojected variable and must return the projected - variable (which must have the same shape). Constraints are not safe to use - when doing asynchronous distributed training. - gamma_constraint: An optional projection function to be applied to the - `gamma` weight after being updated by an `Optimizer`. - renorm: Whether to use Batch Renormalization (Ioffe, 2017). This adds extra - variables during training. The inference is the same for either value of - this parameter. - renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to - scalar `Tensors` used to clip the renorm correction. The correction `(r, - d)` is used as `corrected_value = normalized_value * r + d`, with `r` - clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, - dmax are set to inf, 0, inf, respectively. - renorm_momentum: Momentum used to update the moving means and standard - deviations with renorm. Unlike `momentum`, this affects training and - should be neither too small (which would add noise) nor too large (which - would give stale estimates). Note that `momentum` is still applied to get - the means and variances for inference. - fused: if `None` or `True`, use a faster, fused implementation if possible. - If `False`, use the system recommended implementation. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). - virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, - which means batch normalization is performed across the whole batch. When - `virtual_batch_size` is not `None`, instead perform "Ghost Batch - Normalization", which creates virtual sub-batches which are each - normalized separately (with shared gamma, beta, and moving statistics). - Must divide the actual batch size during execution. - adjustment: A function taking the `Tensor` containing the (dynamic) shape of - the input tensor and returning a pair (scale, bias) to apply to the - normalized values (before gamma and beta), only during training. For - example, if axis==-1, - `adjustment = lambda shape: ( - tf.random.uniform(shape[-1:], 0.93, 1.07), - tf.random.uniform(shape[-1:], -0.1, 0.1))` will scale the normalized - value by up to 7% up or down, then shift the result by up to 0.1 - (with independent scaling and bias for each feature but shared - across all examples), and finally apply gamma and/or beta. If - `None`, no adjustment is applied. Cannot be specified if - virtual_batch_size is specified. - name: A string, the name of the layer. - References: - Batch Normalization - Accelerating Deep Network Training by Reducing + using multi-axis batch norm, the `beta`, `gamma`, `moving_mean`, and + `moving_variance` variables are the same rank as the input Tensor, with + dimension size 1 in all reduced (non-axis) dimensions). + momentum: Momentum for the moving average. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is not used. When + the next layer is linear (also e.g. `nn.relu`), this can be disabled + since the scaling can be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + moving_mean_initializer: Initializer for the moving mean. + moving_variance_initializer: Initializer for the moving variance. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: An optional projection function to be applied to the + `beta` weight after being updated by an `Optimizer` (e.g. used to + implement norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are not + safe to use when doing asynchronous distributed training. + gamma_constraint: An optional projection function to be applied to the + `gamma` weight after being updated by an `Optimizer`. + renorm: Whether to use Batch Renormalization (Ioffe, 2017). This adds + extra variables during training. The inference is the same for either + value of this parameter. + renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to + scalar `Tensors` used to clip the renorm correction. The correction `(r, + d)` is used as `corrected_value = normalized_value * r + d`, with `r` + clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, + dmax are set to inf, 0, inf, respectively. + renorm_momentum: Momentum used to update the moving means and standard + deviations with renorm. Unlike `momentum`, this affects training and + should be neither too small (which would add noise) nor too large (which + would give stale estimates). Note that `momentum` is still applied to + get the means and variances for inference. + fused: if `None` or `True`, use a faster, fused implementation if + possible. If `False`, use the system recommended implementation. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). + virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, + which means batch normalization is performed across the whole batch. + When `virtual_batch_size` is not `None`, instead perform "Ghost Batch + Normalization", which creates virtual sub-batches which are each + normalized separately (with shared gamma, beta, and moving statistics). + Must divide the actual batch size during execution. + adjustment: A function taking the `Tensor` containing the (dynamic) shape + of the input tensor and returning a pair (scale, bias) to apply to the + normalized values (before gamma and beta), only during training. For + example, if axis==-1, + `adjustment = lambda shape: ( + tf.random.uniform(shape[-1:], 0.93, 1.07), + tf.random.uniform(shape[-1:], -0.1, 0.1))` will scale the normalized + value by up to 7% up or down, then shift the result by up to 0.1 + (with independent scaling and bias for each feature but shared + across all examples), and finally apply gamma and/or beta. If + `None`, no adjustment is applied. Cannot be specified if + virtual_batch_size is specified. + name: A string, the name of the layer. + References: + Batch Normalization - Accelerating Deep Network Training by Reducing + Internal Covariate Shift: + [Ioffe et al., 2015](http://proceedings.mlr.press/v37/ioffe15.html) + ([pdf](http://proceedings.mlr.press/v37/ioffe15.pdf)) + Batch Renormalization - Towards Reducing Minibatch Dependence in + Batch-Normalized Models: + [Ioffe, + 2017](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models) + ([pdf](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models.pdf)) + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.BatchNormalization`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + bn = tf.compat.v1.layers.BatchNormalization() + ``` + + After: + + ```python + bn = tf.keras.layers.BatchNormalization() + ``` + + #### How to Map Arguments + + TF1 Arg Name | TF2 Arg Name | Note + :------------------------ | :------------------------ | :--------------- + `name` | `name` | Layer base class + `trainable` | `trainable` | Layer base class + `axis` | `axis` | - + `momentum` | `momentum` | - + `epsilon` | `epsilon` | - + `center` | `center` | - + `scale` | `scale` | - + `beta_initializer` | `beta_initializer` | - + `gamma_initializer` | `gamma_initializer` | - + `moving_mean_initializer` | `moving_mean_initializer` | - + `beta_regularizer` | `beta_regularizer' | - + `gamma_regularizer` | `gamma_regularizer' | - + `beta_constraint` | `beta_constraint' | - + `gamma_constraint` | `gamma_constraint' | - + `renorm` | Not supported | - + `renorm_clipping` | Not supported | - + `renorm_momentum` | Not supported | - + `fused` | Not supported | - + `virtual_batch_size` | Not supported | - + `adjustment` | Not supported | - + + @end_compatibility + """ + + def __init__( + self, + axis=-1, + momentum=0.99, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer=tf.compat.v1.zeros_initializer(), + gamma_initializer=tf.compat.v1.ones_initializer(), + moving_mean_initializer=tf.compat.v1.zeros_initializer(), + moving_variance_initializer=tf.compat.v1.ones_initializer(), + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + renorm=False, + renorm_clipping=None, + renorm_momentum=0.99, + fused=None, + trainable=True, + virtual_batch_size=None, + adjustment=None, + name=None, + **kwargs + ): + super().__init__( + axis=axis, + momentum=momentum, + epsilon=epsilon, + center=center, + scale=scale, + beta_initializer=beta_initializer, + gamma_initializer=gamma_initializer, + moving_mean_initializer=moving_mean_initializer, + moving_variance_initializer=moving_variance_initializer, + beta_regularizer=beta_regularizer, + gamma_regularizer=gamma_regularizer, + beta_constraint=beta_constraint, + gamma_constraint=gamma_constraint, + renorm=renorm, + renorm_clipping=renorm_clipping, + renorm_momentum=renorm_momentum, + fused=fused, + trainable=trainable, + virtual_batch_size=virtual_batch_size, + adjustment=adjustment, + name=name, + **kwargs + ) + + def call(self, inputs, training=False, mask=None): + return super().call(inputs, training=training, mask=mask) + + +@keras_export(v1=["keras.__internal__.legacy.layers.batch_normalization"]) +@tf_export(v1=["layers.batch_normalization"]) +def batch_normalization( + inputs, + axis=-1, + momentum=0.99, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer=tf.compat.v1.zeros_initializer(), + gamma_initializer=tf.compat.v1.ones_initializer(), + moving_mean_initializer=tf.compat.v1.zeros_initializer(), + moving_variance_initializer=tf.compat.v1.ones_initializer(), + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + training=False, + trainable=True, + name=None, + reuse=None, + renorm=False, + renorm_clipping=None, + renorm_momentum=0.99, + fused=None, + virtual_batch_size=None, + adjustment=None, +): + """Functional interface for the batch normalization layer from_config(Ioffe + et al., 2015). + + Note: when training, the moving_mean and moving_variance need to be updated. + By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they + need to be executed alongside the `train_op`. Also, be sure to add any + batch_normalization ops before getting the update_ops collection. Otherwise, + update_ops will be empty, and training/inference will not work properly. For + example: + + ```python + x_norm = tf.compat.v1.layers.batch_normalization(x, training=training) + + # ... + + update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS) + train_op = optimizer.minimize(loss) + train_op = tf.group([train_op, update_ops]) + ``` + + Args: + inputs: Tensor input. + axis: An `int`, the axis that should be normalized (typically the features + axis). For instance, after a `Convolution2D` layer with + `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. + momentum: Momentum for the moving average. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is not used. When + the next layer is linear (also e.g. `nn.relu`), this can be disabled + since the scaling can be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + moving_mean_initializer: Initializer for the moving mean. + moving_variance_initializer: Initializer for the moving variance. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: An optional projection function to be applied to the + `beta` weight after being updated by an `Optimizer` (e.g. used to + implement norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are not + safe to use when doing asynchronous distributed training. + gamma_constraint: An optional projection function to be applied to the + `gamma` weight after being updated by an `Optimizer`. + training: Either a Python boolean, or a TensorFlow boolean scalar tensor + (e.g. a placeholder). Whether to return the output in training mode + (normalized with statistics of the current batch) or in inference mode + (normalized with moving statistics). **NOTE**: make sure to set this + parameter correctly, or else your training/inference will not work + properly. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). + name: String, the name of the layer. + reuse: Boolean, whether to reuse the weights of a previous layer by the + same name. + renorm: Whether to use Batch Renormalization (Ioffe, 2017). This adds + extra variables during training. The inference is the same for either + value of this parameter. + renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to + scalar `Tensors` used to clip the renorm correction. The correction `(r, + d)` is used as `corrected_value = normalized_value * r + d`, with `r` + clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, + dmax are set to inf, 0, inf, respectively. + renorm_momentum: Momentum used to update the moving means and standard + deviations with renorm. Unlike `momentum`, this affects training and + should be neither too small (which would add noise) nor too large (which + would give stale estimates). Note that `momentum` is still applied to + get the means and variances for inference. + fused: if `None` or `True`, use a faster, fused implementation if + possible. If `False`, use the system recommended implementation. + virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, + which means batch normalization is performed across the whole batch. + When `virtual_batch_size` is not `None`, instead perform "Ghost Batch + Normalization", which creates virtual sub-batches which are each + normalized separately (with shared gamma, beta, and moving statistics). + Must divide the actual batch size during execution. + adjustment: A function taking the `Tensor` containing the (dynamic) shape + of the input tensor and returning a pair (scale, bias) to apply to the + normalized values (before gamma and beta), only during training. For + example, if axis==-1, + `adjustment = lambda shape: ( + tf.random.uniform(shape[-1:], 0.93, 1.07), + tf.random.uniform(shape[-1:], -0.1, 0.1))` will scale the normalized + value by up to 7% up or down, then shift the result by up to 0.1 + (with independent scaling and bias for each feature but shared + across all examples), and finally apply gamma and/or beta. If + `None`, no adjustment is applied. Cannot be specified if + virtual_batch_size is specified. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + References: + Batch Normalization - Accelerating Deep Network Training by Reducing Internal Covariate Shift: - [Ioffe et al., 2015](http://proceedings.mlr.press/v37/ioffe15.html) - ([pdf](http://proceedings.mlr.press/v37/ioffe15.pdf)) - Batch Renormalization - Towards Reducing Minibatch Dependence in + [Ioffe et al., 2015](http://proceedings.mlr.press/v37/ioffe15.html) + ([pdf](http://proceedings.mlr.press/v37/ioffe15.pdf)) + Batch Renormalization - Towards Reducing Minibatch Dependence in Batch-Normalized Models: - [Ioffe, + [Ioffe, 2017](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models) - ([pdf](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models.pdf)) - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.BatchNormalization`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - bn = tf.compat.v1.layers.BatchNormalization() - ``` - - After: - - ```python - bn = tf.keras.layers.BatchNormalization() - ``` - - #### How to Map Arguments - - TF1 Arg Name | TF2 Arg Name | Note - :------------------------ | :------------------------ | :--------------- - `name` | `name` | Layer base class - `trainable` | `trainable` | Layer base class - `axis` | `axis` | - - `momentum` | `momentum` | - - `epsilon` | `epsilon` | - - `center` | `center` | - - `scale` | `scale` | - - `beta_initializer` | `beta_initializer` | - - `gamma_initializer` | `gamma_initializer` | - - `moving_mean_initializer` | `moving_mean_initializer` | - - `beta_regularizer` | `beta_regularizer' | - - `gamma_regularizer` | `gamma_regularizer' | - - `beta_constraint` | `beta_constraint' | - - `gamma_constraint` | `gamma_constraint' | - - `renorm` | Not supported | - - `renorm_clipping` | Not supported | - - `renorm_momentum` | Not supported | - - `fused` | Not supported | - - `virtual_batch_size` | Not supported | - - `adjustment` | Not supported | - - - @end_compatibility - """ - - def __init__(self, - axis=-1, - momentum=0.99, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer=tf.compat.v1.zeros_initializer(), - gamma_initializer=tf.compat.v1.ones_initializer(), - moving_mean_initializer=tf.compat.v1.zeros_initializer(), - moving_variance_initializer=tf.compat.v1.ones_initializer(), - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - renorm=False, - renorm_clipping=None, - renorm_momentum=0.99, - fused=None, - trainable=True, - virtual_batch_size=None, - adjustment=None, - name=None, - **kwargs): - super().__init__( + ([pdf](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models.pdf)) + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.BatchNormalization`. + + The batch updating pattern with + `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used in + native TF2. Consult the `tf.keras.layers.BatchNormalization` documentation + for further information. + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + x_norm = tf.compat.v1.layers.batch_normalization(x) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input(shape=(28, 28, 1),) + y = tf.keras.layers.BatchNormalization()(x) + model = tf.keras.Model(x, y) + ``` + #### How to Map Arguments + + TF1 Arg Name | TF2 Arg Name | Note + :------------------------ | :------------------------ | :--------------- + `name` | `name` | Layer base class + `trainable` | `trainable` | Layer base class + `axis` | `axis` | - + `momentum` | `momentum` | - + `epsilon` | `epsilon` | - + `center` | `center` | - + `scale` | `scale` | - + `beta_initializer` | `beta_initializer` | - + `gamma_initializer` | `gamma_initializer` | - + `moving_mean_initializer` | `moving_mean_initializer` | - + `beta_regularizer` | `beta_regularizer' | - + `gamma_regularizer` | `gamma_regularizer' | - + `beta_constraint` | `beta_constraint' | - + `gamma_constraint` | `gamma_constraint' | - + `renorm` | Not supported | - + `renorm_clipping` | Not supported | - + `renorm_momentum` | Not supported | - + `fused` | Not supported | - + `virtual_batch_size` | Not supported | - + `adjustment` | Not supported | - + + @end_compatibility + """ + warnings.warn( + "`tf.layers.batch_normalization` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.BatchNormalization` instead. " + "In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` " + "should not be used (consult the `tf.keras.layers.BatchNormalization` " + "documentation).", + stacklevel=2, + ) + layer = BatchNormalization( axis=axis, momentum=momentum, epsilon=epsilon, @@ -225,242 +468,10 @@ def __init__(self, virtual_batch_size=virtual_batch_size, adjustment=adjustment, name=name, - **kwargs) - - def call(self, inputs, training=False): - return super().call(inputs, training=training) - - -@keras_export(v1=['keras.__internal__.legacy.layers.batch_normalization']) -@tf_export(v1=['layers.batch_normalization']) -def batch_normalization(inputs, - axis=-1, - momentum=0.99, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer=tf.compat.v1.zeros_initializer(), - gamma_initializer=tf.compat.v1.ones_initializer(), - moving_mean_initializer=tf.compat.v1.zeros_initializer(), - moving_variance_initializer=tf.compat.v1.ones_initializer(), - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - training=False, - trainable=True, - name=None, - reuse=None, - renorm=False, - renorm_clipping=None, - renorm_momentum=0.99, - fused=None, - virtual_batch_size=None, - adjustment=None): - """Functional interface for the batch normalization layer from_config(Ioffe et al., 2015). - - Note: when training, the moving_mean and moving_variance need to be updated. - By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they - need to be executed alongside the `train_op`. Also, be sure to add any - batch_normalization ops before getting the update_ops collection. Otherwise, - update_ops will be empty, and training/inference will not work properly. For - example: - - ```python - x_norm = tf.compat.v1.layers.batch_normalization(x, training=training) - - # ... - - update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS) - train_op = optimizer.minimize(loss) - train_op = tf.group([train_op, update_ops]) - ``` - - Args: - inputs: Tensor input. - axis: An `int`, the axis that should be normalized (typically the features - axis). For instance, after a `Convolution2D` layer with - `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. - momentum: Momentum for the moving average. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. If False, `beta` - is ignored. - scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the - next layer is linear (also e.g. `nn.relu`), this can be disabled since the - scaling can be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: An optional projection function to be applied to the `beta` - weight after being updated by an `Optimizer` (e.g. used to implement norm - constraints or value constraints for layer weights). The function must - take as input the unprojected variable and must return the projected - variable (which must have the same shape). Constraints are not safe to use - when doing asynchronous distributed training. - gamma_constraint: An optional projection function to be applied to the - `gamma` weight after being updated by an `Optimizer`. - training: Either a Python boolean, or a TensorFlow boolean scalar tensor - (e.g. a placeholder). Whether to return the output in training mode - (normalized with statistics of the current batch) or in inference mode - (normalized with moving statistics). **NOTE**: make sure to set this - parameter correctly, or else your training/inference will not work - properly. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). - name: String, the name of the layer. - reuse: Boolean, whether to reuse the weights of a previous layer by the same - name. - renorm: Whether to use Batch Renormalization (Ioffe, 2017). This adds extra - variables during training. The inference is the same for either value of - this parameter. - renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to - scalar `Tensors` used to clip the renorm correction. The correction `(r, - d)` is used as `corrected_value = normalized_value * r + d`, with `r` - clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, - dmax are set to inf, 0, inf, respectively. - renorm_momentum: Momentum used to update the moving means and standard - deviations with renorm. Unlike `momentum`, this affects training and - should be neither too small (which would add noise) nor too large (which - would give stale estimates). Note that `momentum` is still applied to get - the means and variances for inference. - fused: if `None` or `True`, use a faster, fused implementation if possible. - If `False`, use the system recommended implementation. - virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, - which means batch normalization is performed across the whole batch. When - `virtual_batch_size` is not `None`, instead perform "Ghost Batch - Normalization", which creates virtual sub-batches which are each - normalized separately (with shared gamma, beta, and moving statistics). - Must divide the actual batch size during execution. - adjustment: A function taking the `Tensor` containing the (dynamic) shape of - the input tensor and returning a pair (scale, bias) to apply to the - normalized values (before gamma and beta), only during training. For - example, if axis==-1, - `adjustment = lambda shape: ( - tf.random.uniform(shape[-1:], 0.93, 1.07), - tf.random.uniform(shape[-1:], -0.1, 0.1))` will scale the normalized - value by up to 7% up or down, then shift the result by up to 0.1 - (with independent scaling and bias for each feature but shared - across all examples), and finally apply gamma and/or beta. If - `None`, no adjustment is applied. Cannot be specified if - virtual_batch_size is specified. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - References: - Batch Normalization - Accelerating Deep Network Training by Reducing - Internal Covariate Shift: - [Ioffe et al., 2015](http://proceedings.mlr.press/v37/ioffe15.html) - ([pdf](http://proceedings.mlr.press/v37/ioffe15.pdf)) - Batch Renormalization - Towards Reducing Minibatch Dependence in - Batch-Normalized Models: - [Ioffe, - 2017](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models) - ([pdf](http://papers.nips.cc/paper/6790-batch-renormalization-towards-reducing-minibatch-dependence-in-batch-normalized-models.pdf)) - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.BatchNormalization`. - - The batch updating pattern with - `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used in - native TF2. Consult the `tf.keras.layers.BatchNormalization` documentation - for further information. - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - x_norm = tf.compat.v1.layers.batch_normalization(x) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input(shape=(28, 28, 1),) - y = tf.keras.layers.BatchNormalization()(x) - model = tf.keras.Model(x, y) - ``` - #### How to Map Arguments - - TF1 Arg Name | TF2 Arg Name | Note - :------------------------ | :------------------------ | :--------------- - `name` | `name` | Layer base class - `trainable` | `trainable` | Layer base class - `axis` | `axis` | - - `momentum` | `momentum` | - - `epsilon` | `epsilon` | - - `center` | `center` | - - `scale` | `scale` | - - `beta_initializer` | `beta_initializer` | - - `gamma_initializer` | `gamma_initializer` | - - `moving_mean_initializer` | `moving_mean_initializer` | - - `beta_regularizer` | `beta_regularizer' | - - `gamma_regularizer` | `gamma_regularizer' | - - `beta_constraint` | `beta_constraint' | - - `gamma_constraint` | `gamma_constraint' | - - `renorm` | Not supported | - - `renorm_clipping` | Not supported | - - `renorm_momentum` | Not supported | - - `fused` | Not supported | - - `virtual_batch_size` | Not supported | - - `adjustment` | Not supported | - - - @end_compatibility - """ - warnings.warn( - '`tf.layers.batch_normalization` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.BatchNormalization` instead. ' - 'In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` ' - 'should not be used (consult the `tf.keras.layers.BatchNormalization` ' - 'documentation).', - stacklevel=2) - layer = BatchNormalization( - axis=axis, - momentum=momentum, - epsilon=epsilon, - center=center, - scale=scale, - beta_initializer=beta_initializer, - gamma_initializer=gamma_initializer, - moving_mean_initializer=moving_mean_initializer, - moving_variance_initializer=moving_variance_initializer, - beta_regularizer=beta_regularizer, - gamma_regularizer=gamma_regularizer, - beta_constraint=beta_constraint, - gamma_constraint=gamma_constraint, - renorm=renorm, - renorm_clipping=renorm_clipping, - renorm_momentum=renorm_momentum, - fused=fused, - trainable=trainable, - virtual_batch_size=virtual_batch_size, - adjustment=adjustment, - name=name, - _reuse=reuse, - _scope=name) - return layer(inputs, training=training) + _reuse=reuse, + _scope=name, + ) + return layer(inputs, training=training) # Aliases diff --git a/keras/legacy_tf_layers/normalization_test.py b/keras/legacy_tf_layers/normalization_test.py index b0a55cc6a5b2..097b20b8555b 100644 --- a/keras/legacy_tf_layers/normalization_test.py +++ b/keras/legacy_tf_layers/normalization_test.py @@ -18,1422 +18,1660 @@ from __future__ import division from __future__ import print_function -import tensorflow.compat.v2 as tf - import os import numpy as np +import tensorflow.compat.v2 as tf -from tensorflow.core.protobuf import saver_pb2 -from tensorflow.python.framework import test_util as tf_test_utils from keras.legacy_tf_layers import convolutional as conv_layers from keras.legacy_tf_layers import normalization as normalization_layers +# isort: off +from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) -@tf_test_utils.run_v1_only('b/120545219') -class BNTest(tf.test.TestCase): - def _simple_model(self, image, fused, freeze_mode): - output_channels, kernel_size = 2, 3 - conv = conv_layers.conv2d( - image, - output_channels, - kernel_size, - use_bias=False, - kernel_initializer=tf.compat.v1.ones_initializer()) - bn_layer = normalization_layers.BatchNormalization(fused=fused) - bn_layer._bessels_correction_test_only = False - training = not freeze_mode - bn = bn_layer(conv, training=training) - loss = tf.reduce_sum(tf.abs(bn)) - optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.01) - if not freeze_mode: - update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) - with tf.control_dependencies(update_ops): - train_op = optimizer.minimize(loss) - else: - train_op = optimizer.minimize(loss) - saver = tf.compat.v1.train.Saver(write_version=saver_pb2.SaverDef.V2) - return loss, train_op, saver - - def _train(self, - checkpoint_path, - shape, - use_gpu, - is_fused, - restore=False, - freeze_mode=False, - dtype=tf.float32): - tf.compat.v1.reset_default_graph() - graph = tf.compat.v1.get_default_graph() - with self.session(graph=graph, use_gpu=use_gpu) as sess: - image = tf.compat.v1.placeholder(dtype=dtype, shape=shape) - loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode) - if restore: - saver.restore(sess, checkpoint_path) - else: - self.evaluate(tf.compat.v1.global_variables_initializer()) - np.random.seed(0) - for _ in range(2): - image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype) - sess.run([loss, train_op], feed_dict={image: image_val}) - if restore: - all_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) - all_vars_values = [var.eval() for var in all_vars] - return all_vars_values - else: - saver.save(sess, checkpoint_path) - - def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused): - dtype = image_val.dtype - tf.compat.v1.reset_default_graph() - graph = tf.compat.v1.get_default_graph() - with self.session(graph=graph, use_gpu=use_gpu) as sess: - image = tf.compat.v1.placeholder(dtype=dtype, shape=shape) - loss, _, saver = self._simple_model(image, is_fused, True) - saver.restore(sess, checkpoint_path) - loss_val = sess.run(loss, feed_dict={image: image_val}) - return loss_val - - def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu, - infer_use_gpu): - batch, height, width, input_channels = 2, 4, 5, 3 - shape = [batch, height, width, input_channels] - - # Not all characters in a dtype string representation are allowed in - # filenames in all operating systems. This map will sanitize these. - dtype_to_valid_fn = { - tf.float16: 'float16', - tf.float32: 'float32', - } - checkpoint = os.path.join( - self.get_temp_dir(), 'cp_%s_%s_%s_%s' % ( - dtype_to_valid_fn[dtype], train1_use_gpu, train2_use_gpu, - infer_use_gpu)) - - self._train( - checkpoint, +@tf_test_utils.run_v1_only("b/120545219") +class BNTest(tf.test.TestCase): + def _simple_model(self, image, fused, freeze_mode): + output_channels, kernel_size = 2, 3 + conv = conv_layers.conv2d( + image, + output_channels, + kernel_size, + use_bias=False, + kernel_initializer=tf.compat.v1.ones_initializer(), + ) + bn_layer = normalization_layers.BatchNormalization(fused=fused) + bn_layer._bessels_correction_test_only = False + training = not freeze_mode + bn = bn_layer(conv, training=training) + loss = tf.reduce_sum(tf.abs(bn)) + optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.01) + if not freeze_mode: + update_ops = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.UPDATE_OPS + ) + with tf.control_dependencies(update_ops): + train_op = optimizer.minimize(loss) + else: + train_op = optimizer.minimize(loss) + saver = tf.compat.v1.train.Saver(write_version=saver_pb2.SaverDef.V2) + return loss, train_op, saver + + def _train( + self, + checkpoint_path, shape, - use_gpu=train1_use_gpu, - is_fused=True, + use_gpu, + is_fused, restore=False, freeze_mode=False, - dtype=dtype) - - train_vars = self._train( - checkpoint, - shape, - use_gpu=train2_use_gpu, - is_fused=True, - restore=True, - freeze_mode=False, - dtype=dtype) - - np.random.seed(0) - image_val = np.random.rand(batch, height, width, input_channels).astype( - dtype.as_numpy_dtype) - loss_val = self._infer( - checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True) - - return train_vars, loss_val - - def testHalfPrecision(self): - ref_vars, ref_loss = self._trainEvalSequence( dtype=tf.float32, - train1_use_gpu=True, - train2_use_gpu=True, - infer_use_gpu=True) - - self.assertEqual(len(ref_vars), 5) - - for train1_use_gpu in [True, False]: - for train2_use_gpu in [True, False]: - for infer_use_gpu in [True, False]: - test_vars, test_loss = self._trainEvalSequence( - tf.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu) - self.assertEqual(len(test_vars), 5) - for test_var, ref_var in zip(test_vars, ref_vars): - self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) - self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3) - - def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b, - use_gpu_checkpoint_a, use_gpu_checkpoint_b, - use_gpu_test_a, use_gpu_test_b, freeze_mode): - batch, height, width, input_channels = 2, 4, 5, 3 - shape = [batch, height, width, input_channels] - base_path = '%s_%s_%s_%s_%s_%s' % (is_fused_checkpoint_a, - is_fused_checkpoint_b, - use_gpu_checkpoint_a, - use_gpu_checkpoint_b, use_gpu_test_a, - use_gpu_test_b) - - checkpoint_path_a = os.path.join(self.get_temp_dir(), - 'checkpoint_a_%s' % base_path) - self._train( - checkpoint_path_a, - shape, - use_gpu_checkpoint_a, + ): + tf.compat.v1.reset_default_graph() + graph = tf.compat.v1.get_default_graph() + with self.session(graph=graph, use_gpu=use_gpu) as sess: + image = tf.compat.v1.placeholder(dtype=dtype, shape=shape) + loss, train_op, saver = self._simple_model( + image, is_fused, freeze_mode + ) + if restore: + saver.restore(sess, checkpoint_path) + else: + self.evaluate(tf.compat.v1.global_variables_initializer()) + np.random.seed(0) + for _ in range(2): + image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype) + sess.run([loss, train_op], feed_dict={image: image_val}) + if restore: + all_vars = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.GLOBAL_VARIABLES + ) + all_vars_values = [var.eval() for var in all_vars] + return all_vars_values + else: + saver.save(sess, checkpoint_path) + + def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused): + dtype = image_val.dtype + tf.compat.v1.reset_default_graph() + graph = tf.compat.v1.get_default_graph() + with self.session(graph=graph, use_gpu=use_gpu) as sess: + image = tf.compat.v1.placeholder(dtype=dtype, shape=shape) + loss, _, saver = self._simple_model(image, is_fused, True) + saver.restore(sess, checkpoint_path) + loss_val = sess.run(loss, feed_dict={image: image_val}) + return loss_val + + def _trainEvalSequence( + self, dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu + ): + batch, height, width, input_channels = 2, 4, 5, 3 + shape = [batch, height, width, input_channels] + + # Not all characters in a dtype string representation are allowed in + # filenames in all operating systems. This map will sanitize these. + dtype_to_valid_fn = { + tf.float16: "float16", + tf.float32: "float32", + } + checkpoint = os.path.join( + self.get_temp_dir(), + "cp_%s_%s_%s_%s" + % ( + dtype_to_valid_fn[dtype], + train1_use_gpu, + train2_use_gpu, + infer_use_gpu, + ), + ) + + self._train( + checkpoint, + shape, + use_gpu=train1_use_gpu, + is_fused=True, + restore=False, + freeze_mode=False, + dtype=dtype, + ) + + train_vars = self._train( + checkpoint, + shape, + use_gpu=train2_use_gpu, + is_fused=True, + restore=True, + freeze_mode=False, + dtype=dtype, + ) + + np.random.seed(0) + image_val = np.random.rand(batch, height, width, input_channels).astype( + dtype.as_numpy_dtype + ) + loss_val = self._infer( + checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True + ) + + return train_vars, loss_val + + def testHalfPrecision(self): + ref_vars, ref_loss = self._trainEvalSequence( + dtype=tf.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True, + ) + + self.assertEqual(len(ref_vars), 5) + + for train1_use_gpu in [True, False]: + for train2_use_gpu in [True, False]: + for infer_use_gpu in [True, False]: + test_vars, test_loss = self._trainEvalSequence( + tf.float16, + train1_use_gpu, + train2_use_gpu, + infer_use_gpu, + ) + self.assertEqual(len(test_vars), 5) + for test_var, ref_var in zip(test_vars, ref_vars): + self.assertAllClose( + test_var, ref_var, rtol=1.0e-3, atol=1.0e-3 + ) + self.assertAllClose( + test_loss, ref_loss, rtol=1.0e-3, atol=1.0e-3 + ) + + def _testCheckpoint( + self, is_fused_checkpoint_a, - restore=False, - freeze_mode=freeze_mode) - checkpoint_path_b = os.path.join(self.get_temp_dir(), - 'checkpoint_b_%s' % base_path) - self._train( - checkpoint_path_b, - shape, - use_gpu_checkpoint_b, is_fused_checkpoint_b, - restore=False, - freeze_mode=freeze_mode) - - vars_fused = self._train( - checkpoint_path_a, - shape, + use_gpu_checkpoint_a, + use_gpu_checkpoint_b, use_gpu_test_a, - True, - restore=True, - freeze_mode=freeze_mode) - vars_nonfused = self._train( - checkpoint_path_b, - shape, use_gpu_test_b, - False, - restore=True, - freeze_mode=freeze_mode) - self.assertEqual(len(vars_fused), 5) - self.assertEqual(len(vars_nonfused), 5) - for var_fused, var_nonfused in zip(vars_fused, vars_nonfused): - self.assertAllClose(var_fused, var_nonfused, atol=1e-5) - - image_val = np.random.rand(batch, height, width, - input_channels).astype(np.float32) - loss_fused_val = self._infer(checkpoint_path_a, image_val, shape, - use_gpu_test_a, True) - loss_nonfused_val = self._infer(checkpoint_path_b, image_val, shape, - use_gpu_test_b, False) - self.assertAllClose(loss_fused_val, loss_nonfused_val, atol=1e-6, rtol=3e-4) - - def _testCheckpointCrossDevice(self, ckpt_a_fused, ckpt_a_use_gpu, - ckpt_b_fused, ckpt_b_use_gpu): - for use_gpu_test_a in [True, False]: - for use_gpu_test_b in [True, False]: - for freeze_mode in [True, False]: - self._testCheckpoint(ckpt_a_fused, ckpt_a_use_gpu, ckpt_b_fused, - ckpt_b_use_gpu, use_gpu_test_a, use_gpu_test_b, - freeze_mode) - - def testCheckpointFusedCPUAndFusedGPU(self): - self._testCheckpointCrossDevice(True, False, True, True) - - def testCheckpointFusedCPUAndFusedCPU(self): - self._testCheckpointCrossDevice(True, False, True, False) - - def testCheckpointFusedGPUAndFusedGPU(self): - self._testCheckpointCrossDevice(True, True, True, True) - - def testCheckpointNonFusedCPUAndNonFusedGPU(self): - self._testCheckpointCrossDevice(False, False, False, True) - - def testCheckpointNonFusedCPUAndNonFusedCPU(self): - self._testCheckpointCrossDevice(False, False, False, False) - - def testCheckpointNonFusedGPUAndNonFusedGPU(self): - self._testCheckpointCrossDevice(False, True, False, True) - - def testCheckpointNonFusedGPUAndFusedGPU(self): - self._testCheckpointCrossDevice(False, True, True, True) - - def testCheckpointNonFusedGPUAndFusedCPU(self): - self._testCheckpointCrossDevice(False, True, True, False) - - def testCheckpointNonFusedCPUAndFusedCPU(self): - self._testCheckpointCrossDevice(False, False, True, False) - - def testCreateBN(self): - # Call layer. - bn = normalization_layers.BatchNormalization(axis=1) - inputs = tf.random.uniform((5, 4, 3), seed=1) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - # Verify shape. - self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3]) - - # Verify layer attributes. - self.assertEqual(len(bn.updates), 2) - self.assertEqual(len(bn.variables), 4) - self.assertEqual(len(bn.trainable_variables), 2) - self.assertEqual(len(bn.non_trainable_variables), 2) - - # Test that updates were created and added to UPDATE_OPS. - self.assertEqual(len(bn.updates), 2) - self.assertListEqual( - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS), bn.updates) - - # Test that weights were created and added to TRAINABLE_VARIABLES. - self.assertListEqual( - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES), - bn.trainable_variables) - - def testCreateFusedBNFloat16(self): - # Call layer. - bn = normalization_layers.BatchNormalization(axis=1, fused=True) - inputs = tf.random.uniform( - (5, 4, 3, 3), seed=1, dtype=tf.float16) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - # Verify shape. - self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3]) - - # Verify layer attributes. - self.assertEqual(len(bn.updates), 2) - self.assertEqual(len(bn.variables), 4) - self.assertEqual(len(bn.trainable_variables), 2) - self.assertEqual(len(bn.non_trainable_variables), 2) - for var in bn.variables: - self.assertTrue(var.dtype._is_ref_dtype) - - # Test that updates were created and added to UPDATE_OPS. - self.assertEqual(len(bn.updates), 2) - self.assertListEqual( - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS), bn.updates) - - # Test that weights were created and added to TRAINABLE_VARIABLES. - self.assertListEqual( - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES), - bn.trainable_variables) - - def test3DInputAxis1(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=1, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 4, 1)) - np_beta = np.reshape(np_beta, (1, 4, 1)) - - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 2)) - std = np.std(np_inputs, axis=(0, 2)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test3DInputAxis2(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=2, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 3)) - np_beta = np.reshape(np_beta, (1, 1, 3)) - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 1)) - std = np.std(np_inputs, axis=(0, 1)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test4DInputAxis1(self): - if tf.test.is_gpu_available(cuda_only=True): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=1, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 4, 1, 1)) - np_beta = np.reshape(np_beta, (1, 4, 1, 1)) - for _ in range(100): - np_output, _, _ = sess.run( - [outputs] + bn.updates, feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 2, 3)) - std = np.std(np_inputs, axis=(0, 2, 3)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test4DInputAxis2(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=2, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 3, 1)) - np_beta = np.reshape(np_beta, (1, 1, 3, 1)) - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 1, 3)) - std = np.std(np_inputs, axis=(0, 1, 3)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test4DInputAxis3(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=3, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 1, 2)) - std = np.std(np_inputs, axis=(0, 1, 2)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test4DInputAxis3Fused(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=3, epsilon=epsilon, momentum=0.9, fused=True) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - for _ in range(100): - np_output, _, _ = sess.run( - [outputs] + bn.updates, feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 1, 2)) - std = np.std(np_inputs, axis=(0, 1, 2)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test4DInputAxis1Fused(self): - if tf.test.is_gpu_available(cuda_only=True): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=1, epsilon=epsilon, momentum=0.9, fused=True) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 4, 1, 1)) - np_beta = np.reshape(np_beta, (1, 4, 1, 1)) - for _ in range(100): - np_output, _, _ = sess.run( - [outputs] + bn.updates, feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 2, 3)) - std = np.std(np_inputs, axis=(0, 2, 3)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def testNegativeAxis(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=-1, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 1, 2)) - std = np.std(np_inputs, axis=(0, 1, 2)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def testBooleanLearningPhase(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=-1, epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32) - outputs_training = bn(inputs, training=True) - outputs_infer = bn(inputs, training=False) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - for _ in range(100): - np_output, _, _ = sess.run([outputs_training] + bn.updates) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 1, 2)) - std = np.std(np_inputs, axis=(0, 1, 2)) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = self.evaluate(outputs_infer) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def testFunctionalNoReuse(self): - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)), dtype=tf.float32) - epsilon = 1e-3 - training = tf.compat.v1.placeholder(dtype='bool') - outputs = normalization_layers.batch_norm( - inputs, - axis=-1, - momentum=0.9, - epsilon=epsilon, - training=training, - name='bn') - - updates = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) - all_vars = dict([(v.name, v) for v in tf.compat.v1.global_variables()]) - moving_mean = all_vars['bn/moving_mean:0'] - moving_variance = all_vars['bn/moving_variance:0'] - beta = all_vars['bn/beta:0'] - gamma = all_vars['bn/gamma:0'] - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - np_gamma, np_beta = self.evaluate([gamma, beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - for _ in range(100): - np_output, _, _ = sess.run([outputs] + updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - np_moving_mean, np_moving_var = self.evaluate( - [moving_mean, moving_variance]) - np_inputs = self.evaluate(inputs) - np_mean = np.mean(np_inputs, axis=(0, 1, 2)) - np_std = np.std(np_inputs, axis=(0, 1, 2)) - np_variance = np.square(np_std) - self.assertAllClose(np_mean, np_moving_mean, atol=1e-2) - self.assertAllClose(np_variance, np_moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def testFunctionalReuse(self): - inputs1 = tf.Variable( - np.random.random((5, 4, 3, 6)), dtype=tf.float32) - inputs2 = tf.Variable( - np.random.random((5, 4, 3, 6)), dtype=tf.float32) - epsilon = 1e-3 - training = tf.compat.v1.placeholder(dtype='bool') - _ = normalization_layers.batch_norm( - inputs1, - axis=-1, - momentum=0.9, - epsilon=epsilon, - training=training, - name='bn') - outputs2 = normalization_layers.batch_norm( - inputs2, - axis=-1, - momentum=0.9, - epsilon=epsilon, - training=training, - name='bn', - reuse=True) - - # Last 2 update ops - updates = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)[-2:] - all_vars = dict([(v.name, v) for v in tf.compat.v1.global_variables()]) - moving_mean = all_vars['bn/moving_mean:0'] - moving_variance = all_vars['bn/moving_variance:0'] - beta = all_vars['bn/beta:0'] - gamma = all_vars['bn/gamma:0'] - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(100): - np_output, _, _ = sess.run([outputs2] + updates, - feed_dict={training: True}) - - # Verify that the statistics are updated during training. - np_moving_mean, np_moving_var = self.evaluate( - [moving_mean, moving_variance]) - np_inputs = self.evaluate(inputs2) - np_mean = np.mean(np_inputs, axis=(0, 1, 2)) - np_std = np.std(np_inputs, axis=(0, 1, 2)) - np_variance = np.square(np_std) - self.assertAllClose(np_mean, np_moving_mean, atol=1e-2) - self.assertAllClose(np_variance, np_moving_var, atol=1e-2) - - # Verify that the axis is normalized during training. - np_gamma, np_beta = self.evaluate([gamma, beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs2, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def testFunctionalReuseFromScope(self): - inputs = tf.Variable( - np.random.random((5, 4, 3, 6)), dtype=tf.float32) - epsilon = 1e-3 - training = tf.compat.v1.placeholder(dtype='bool') - with tf.compat.v1.variable_scope('scope'): - _ = normalization_layers.batch_norm( - inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training) - self.assertEqual(len(tf.compat.v1.global_variables()), 5) - with tf.compat.v1.variable_scope('scope', reuse=True): - _ = normalization_layers.batch_norm( - inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training) - self.assertEqual(len(tf.compat.v1.global_variables()), 5) - - def testNoCenter(self): - bn = normalization_layers.BatchNormalization(axis=1, center=False) - inputs = tf.random.uniform((5, 4, 3), seed=1) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - # Verify shape. - self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3]) - - # Verify layer attributes. - self.assertEqual(len(bn.updates), 2) - self.assertEqual(len(bn.variables), 3) - self.assertEqual(len(bn.trainable_variables), 1) - self.assertEqual(len(bn.non_trainable_variables), 2) - - def testNoScale(self): - bn = normalization_layers.BatchNormalization(axis=1, scale=False) - inputs = tf.random.uniform((5, 4, 3), seed=1) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - # Verify shape. - self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3]) - - # Verify layer attributes. - self.assertEqual(len(bn.updates), 2) - self.assertEqual(len(bn.variables), 3) - self.assertEqual(len(bn.trainable_variables), 1) - self.assertEqual(len(bn.non_trainable_variables), 2) - - def testRegularizers(self): - reg = lambda x: 0.1 * tf.reduce_sum(x) - bn = normalization_layers.BatchNormalization(axis=1, beta_regularizer=reg) - inputs = tf.random.uniform((5, 4, 3), seed=1) - training = tf.compat.v1.placeholder(dtype='bool') - _ = bn(inputs, training=training) - self.assertEqual(len(bn.losses), 1) - - bn = normalization_layers.BatchNormalization(axis=1, gamma_regularizer=reg) - inputs = tf.random.uniform((5, 4, 3), seed=1) - training = tf.compat.v1.placeholder(dtype='bool') - _ = bn(inputs, training=training) - self.assertEqual(len(bn.losses), 1) - - def testConstraints(self): - g_constraint = lambda x: x / tf.reduce_sum(x) - b_constraint = lambda x: x / tf.reduce_max(x) - bn = normalization_layers.BatchNormalization(axis=1, - gamma_constraint=g_constraint, - beta_constraint=b_constraint) - inputs = tf.random.uniform((5, 4, 3), seed=1) - bn(inputs) - self.assertEqual(bn.gamma_constraint, g_constraint) - self.assertEqual(bn.beta_constraint, b_constraint) - - def testRenorm(self): - shape = (4, 3) - xt = tf.compat.v1.placeholder(tf.float32, shape) - momentum = 0.99 - renorm_momentum = 0.8 - rmax = 1.1 - rmin = 0.9 - dmax = 0.1 - gamma = 2. - beta = 3. - epsilon = 0.001 - bn = normalization_layers.BatchNormalization( - axis=1, - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - beta_initializer=tf.compat.v1.constant_initializer(beta), - epsilon=epsilon, - momentum=momentum, - renorm=True, - renorm_clipping={'rmax': rmax, 'rmin': rmin, 'dmax': dmax}, - renorm_momentum=renorm_momentum) - training = tf.compat.v1.placeholder(tf.bool) - yt = bn(xt, training=training) - - moving_mean = 0. - moving_stddev = 1. - renorm_mean = 0. - renorm_stddev = 1. - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - - mean = x.mean(0) - variance = x.var(0) - stddev = np.sqrt(variance + epsilon) - r = (stddev / renorm_stddev).clip(rmin, rmax) - d = ((mean - renorm_mean) / renorm_stddev).clip(-dmax, dmax) - y_train = ((x - mean) / stddev * r + d) * gamma + beta - renorm_mean += (mean - renorm_mean) * (1. - renorm_momentum) - renorm_stddev += (stddev - renorm_stddev) * (1. - renorm_momentum) - moving_mean += (mean - moving_mean) * (1. - momentum) - moving_stddev += (stddev - moving_stddev) * (1. - momentum) - - y_test = ((x - moving_mean) / - (moving_stddev * moving_stddev)**0.5 * gamma) + beta - - yt_val_train, _, _ = sess.run([yt] + bn.updates, - feed_dict={xt: x, training: True}) - yt_val_test, _, _ = sess.run([yt] + bn.updates, - feed_dict={xt: x, training: False}) - - self.assertAllClose(y_train, yt_val_train, atol=1e-5) - self.assertAllClose(y_test, yt_val_test, atol=1e-5) - - def testRenormNoClippingSameMomentumGivesSameTestTrain(self): - shape = (4, 3) - xt = tf.compat.v1.placeholder(tf.float32, shape) - momentum = 0.9 - renorm_momentum = 0.9 - gamma = 2. - beta = 3. - epsilon = 0.001 - bn = normalization_layers.BatchNormalization( - axis=1, - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - beta_initializer=tf.compat.v1.constant_initializer(beta), - epsilon=epsilon, - momentum=momentum, - renorm=True, - renorm_clipping=None, - renorm_momentum=momentum) - training = tf.compat.v1.placeholder(tf.bool) - yt = bn(xt, training=training) - moving_mean = 0. - moving_stddev = 1. - renorm_mean = 0. - renorm_stddev = 1. - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for step in range(6): - x = np.random.random(shape) - - mean = x.mean(0) - variance = x.var(0) - stddev = np.sqrt(variance + epsilon) - r = (stddev / renorm_stddev) - d = ((mean - renorm_mean) / renorm_stddev) - y_test = ((x - moving_mean) / - (moving_stddev * moving_stddev)**0.5 * gamma) + beta - y_train = ((x - mean) / stddev * r + d) * gamma + beta - renorm_mean += (mean - renorm_mean) * (1. - renorm_momentum) - renorm_stddev += (stddev - renorm_stddev) * (1. - renorm_momentum) - moving_mean += (mean - moving_mean) * (1. - momentum) - moving_stddev += (stddev - moving_stddev) * (1. - momentum) - - # Compute test values first, before the train mode updates the moving - # averages. - yt_val_test, _, _ = sess.run([yt] + bn.updates, - feed_dict={xt: x, training: False}) - yt_val_train, _, _ = sess.run([yt] + bn.updates, - feed_dict={xt: x, training: True}) - - # Due to initialization inconsistencies, values may not be identical - # on the first iteration (but shouldn't be different by much more than - # epsilon). After the first iteration they should be identical. - atol = epsilon * 1.5 if step == 0 else 1e-5 - self.assertAllClose(y_train, yt_val_train, atol=atol) - self.assertAllClose(y_test, yt_val_test, atol=atol) - self.assertAllClose(yt_val_train, yt_val_test, atol=atol) - - def testAdjustment(self): - shape = (4, 3) - xt = tf.compat.v1.placeholder(tf.float32, shape) - momentum = 0.99 - gamma = 2. - beta = 3. - epsilon = 0.001 - adjust_scale = tf.random.uniform(shape[-1:], 0.5, 1.5) - adjust_bias = tf.random.uniform(shape[-1:], -.2, .2) - bn = normalization_layers.BatchNormalization( - axis=1, - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - beta_initializer=tf.compat.v1.constant_initializer(beta), - epsilon=epsilon, - momentum=momentum, - adjustment=lambda _: (adjust_scale, adjust_bias)) - training = tf.compat.v1.placeholder(tf.bool) - yt = bn(xt, training=training) - - moving_mean = 0. - moving_variance = 1. - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - yt_val_train, adj_scale_val, adj_bias_val = sess.run( - [yt, adjust_scale, adjust_bias] + bn.updates, - feed_dict={xt: x, training: True})[:3] - yt_val_test = sess.run([yt] + bn.updates, - feed_dict={xt: x, training: False})[0] - - mean = x.mean(0) - variance = x.var(0) - y_train = (((x - mean) / (variance + epsilon) ** 0.5) * adj_scale_val + - adj_bias_val) * gamma + beta - moving_mean += (mean - moving_mean) * (1. - momentum) - moving_variance += (variance - moving_variance) * (1. - momentum) - - y_test = ((x - moving_mean) / (moving_variance + epsilon) ** 0.5 * - gamma) + beta - - self.assertAllClose(y_train, yt_val_train, atol=1e-5) - self.assertAllClose(y_test, yt_val_test, atol=1e-5) - - def testRenormWithAdjustment(self): - shape = (4, 3) - xt = tf.compat.v1.placeholder(tf.float32, shape) - momentum = 0.99 - renorm_momentum = 0.8 - rmax = 1.1 - rmin = 0.9 - dmax = 0.1 - gamma = 2. - beta = 3. - epsilon = 0.001 - adjust_scale = tf.random.uniform(shape[-1:], 0.5, 1.5) - adjust_bias = tf.random.uniform(shape[-1:], -.2, .2) - bn = normalization_layers.BatchNormalization( - axis=1, - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - beta_initializer=tf.compat.v1.constant_initializer(beta), - epsilon=epsilon, - momentum=momentum, - renorm=True, - renorm_clipping={'rmax': rmax, 'rmin': rmin, 'dmax': dmax}, - renorm_momentum=renorm_momentum, - adjustment=lambda _: (adjust_scale, adjust_bias)) - training = tf.compat.v1.placeholder(tf.bool) - yt = bn(xt, training=training) - - moving_mean = 0. - moving_stddev = 1. - renorm_mean = 0. - renorm_stddev = 1. - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - yt_val_train, adj_scale_val, adj_bias_val = sess.run( - [yt, adjust_scale, adjust_bias] + bn.updates, - feed_dict={xt: x, training: True})[:3] - yt_val_test = sess.run([yt] + bn.updates, - feed_dict={xt: x, training: False})[0] - - mean = x.mean(0) - variance = x.var(0) - stddev = np.sqrt(variance + epsilon) - r = (stddev / renorm_stddev).clip(rmin, rmax) - d = ((mean - renorm_mean) / renorm_stddev).clip(-dmax, dmax) - y_train = (((x - mean) / stddev * r + d) * adj_scale_val + - adj_bias_val) * gamma + beta - renorm_mean += (mean - renorm_mean) * (1. - renorm_momentum) - renorm_stddev += (stddev - renorm_stddev) * (1. - renorm_momentum) - moving_mean += (mean - moving_mean) * (1. - momentum) - moving_stddev += (stddev - moving_stddev) * (1. - momentum) - - y_test = ((x - moving_mean) / - (moving_stddev * moving_stddev)**0.5 * gamma) + beta - - self.assertAllClose(y_train, yt_val_train, atol=1e-5) - self.assertAllClose(y_test, yt_val_test, atol=1e-5) - - def testGhostBNNegativeVirtualBatch(self): - shape = [6, 5, 4, 3] - inp = tf.random.uniform(shape, seed=1) - - with self.assertRaises(ValueError): - normalization_layers.batch_normalization( - inp, virtual_batch_size=-1) - - def testGhostBNVirtualBatchFull(self): - shape = [6, 5, 4, 3] - inp = tf.random.uniform(shape, seed=1) - out1 = normalization_layers.batch_normalization(inp) - out2 = normalization_layers.batch_normalization( - inp, virtual_batch_size=6) - - self.assertListEqual( - out1.shape.as_list(), out2.shape.as_list()) - - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - - x = np.random.random(shape) - y1, y2 = sess.run([out1, out2], feed_dict={inp: x}) - - self.assertAllClose(y1, y2, atol=1e-5) - - def testGhostBNInputOutputShapesMatch(self): - shape = [6, 4, 3] - inp = tf.random.uniform(shape, seed=1) - out = normalization_layers.batch_normalization( - inp, virtual_batch_size=3) - self.assertListEqual(out.shape.as_list(), shape) - - def testGhostBNUnknownBatchSize(self): - np_shape = [10, 5, 4] - tf_shape = [None, 5, 4] - inp = tf.compat.v1.placeholder(tf.float32, tf_shape) - out = normalization_layers.batch_normalization( - inp, virtual_batch_size=2) - - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - - x = np.random.random(np_shape) - y = sess.run(out, feed_dict={inp: x}) - - self.assertListEqual(list(y.shape), np_shape) - - def testGhostBN2Dims(self): - shape = [6, 2] - virtual_batch_size = 3 - beta = 2. - gamma = 3. - momentum = 0.8 - epsilon = 1e-3 - moving_means = np.zeros([2, 2], dtype=np.float32) - moving_vars = np.ones([2, 2], dtype=np.float32) - - inp = tf.compat.v1.placeholder(tf.float32, shape) - is_training = tf.compat.v1.placeholder(tf.bool) - bn = normalization_layers.BatchNormalization( - momentum=momentum, - epsilon=epsilon, - beta_initializer=tf.compat.v1.constant_initializer(beta), - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - virtual_batch_size=virtual_batch_size) - out = bn(inp, training=is_training) - ghost_shape = ([virtual_batch_size, - shape[0] // virtual_batch_size, - shape[1]]) - - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - - sub_batched = np.reshape(x, ghost_shape) - means = np.mean(sub_batched, axis=0, keepdims=True) - variances = np.var(sub_batched, axis=0, keepdims=True) - - avg_means = np.mean(means, axis=1, keepdims=True) - avg_variances = np.mean(variances, axis=1, keepdims=True) - - moving_means = moving_means * momentum + avg_means * (1. - momentum) - moving_vars = moving_vars * momentum + avg_variances * (1. - momentum) - - y_train = ((sub_batched - means) / - (variances + epsilon) ** 0.5 * gamma) + beta - y_test = ((sub_batched - moving_means) / - (moving_vars + epsilon) ** 0.5 * gamma) + beta - - y_train = np.reshape(y_train, shape) - y_test = np.reshape(y_test, shape) - - y_val_train, _, _ = sess.run([out] + bn.updates, - feed_dict={inp: x, is_training: True}) - y_val_test = sess.run(out, feed_dict={inp: x, is_training: False}) - - self.assertAllClose(y_train, y_val_train, atol=1e-5) - self.assertAllClose(y_test, y_val_test, atol=1e-5) - - def testGhostBN4DimsAxis3(self): - shape = [6, 10, 10, 3] - virtual_batch_size = 2 - beta = 2. - gamma = 3. - momentum = 0.8 - epsilon = 1e-3 - moving_means = np.zeros([1, 1, 1, 1, 3], dtype=np.float32) - moving_vars = np.ones([1, 1, 1, 1, 3], dtype=np.float32) - - inp = tf.compat.v1.placeholder(tf.float32, shape) - is_training = tf.compat.v1.placeholder(tf.bool) - bn = normalization_layers.BatchNormalization( - axis=3, - momentum=momentum, - epsilon=epsilon, - beta_initializer=tf.compat.v1.constant_initializer(beta), - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - virtual_batch_size=virtual_batch_size) - out = bn(inp, training=is_training) - ghost_shape = ([virtual_batch_size, shape[0] // virtual_batch_size] + - shape[1:]) - - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - - sub_batched = np.reshape(x, ghost_shape) - means = np.mean(sub_batched, axis=(0, 2, 3), keepdims=True) - variances = np.var(sub_batched, axis=(0, 2, 3), keepdims=True) - - avg_means = np.mean(means, axis=1, keepdims=True) - avg_variances = np.mean(variances, axis=1, keepdims=True) - - moving_means = moving_means * momentum + avg_means * (1. - momentum) - moving_vars = moving_vars * momentum + avg_variances * (1. - momentum) - - y_train = ((sub_batched - means) / - (variances + epsilon) ** 0.5 * gamma) + beta - y_test = ((sub_batched - moving_means) / - (moving_vars + epsilon) ** 0.5 * gamma) + beta - - y_train = np.reshape(y_train, shape) - y_test = np.reshape(y_test, shape) - - y_val_train, _, _ = sess.run([out] + bn.updates, - feed_dict={inp: x, is_training: True}) - y_val_test = sess.run(out, feed_dict={inp: x, is_training: False}) - - self.assertAllClose(y_train, y_val_train, atol=1e-2) - self.assertAllClose(y_test, y_val_test, atol=1e-2) - - def testGhostBN4DimsAxis1(self): - shape = [6, 3, 10, 10] - virtual_batch_size = 2 - beta = 2. - gamma = 3. - momentum = 0.8 - epsilon = 1e-3 - moving_means = np.zeros([1, 1, 3, 1, 1], dtype=np.float32) - moving_vars = np.ones([1, 1, 3, 1, 1], dtype=np.float32) - - inp = tf.compat.v1.placeholder(tf.float32, shape) - is_training = tf.compat.v1.placeholder(tf.bool) - bn = normalization_layers.BatchNormalization( - axis=1, - momentum=momentum, - epsilon=epsilon, - beta_initializer=tf.compat.v1.constant_initializer(beta), - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - virtual_batch_size=virtual_batch_size, - fused=False) # NCHW is unsupported by CPU fused batch norm - out = bn(inp, training=is_training) - ghost_shape = ([virtual_batch_size, shape[0] // virtual_batch_size] + - shape[1:]) - - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - - sub_batched = np.reshape(x, ghost_shape) - means = np.mean(sub_batched, axis=(0, 3, 4), keepdims=True) - variances = np.var(sub_batched, axis=(0, 3, 4), keepdims=True) - - avg_means = np.mean(means, axis=1, keepdims=True) - avg_variances = np.mean(variances, axis=1, keepdims=True) - - moving_means = moving_means * momentum + avg_means * (1. - momentum) - moving_vars = moving_vars * momentum + avg_variances * (1. - momentum) - - y_train = ((sub_batched - means) / - (variances + epsilon) ** 0.5 * gamma) + beta - y_test = ((sub_batched - moving_means) / - (moving_vars + epsilon) ** 0.5 * gamma) + beta - - y_train = np.reshape(y_train, shape) - y_test = np.reshape(y_test, shape) - - y_val_train, _, _ = sess.run([out] + bn.updates, - feed_dict={inp: x, is_training: True}) - y_val_test = sess.run(out, feed_dict={inp: x, is_training: False}) - - self.assertAllClose(y_train, y_val_train, atol=1e-2) - self.assertAllClose(y_test, y_val_test, atol=1e-2) - - def testMultiAxisInvalid(self): - shape = [6, 5, 4, 3] - inp = tf.random.uniform(shape, seed=1) - - with self.assertRaises(ValueError): - normalization_layers.batch_normalization( - inp, axis=[1, 4]) # out of bounds - - with self.assertRaises(ValueError): - normalization_layers.batch_normalization( - inp, axis=[-5, 1]) # out of bounds - - with self.assertRaises(ValueError): - normalization_layers.batch_normalization( - inp, axis=[1, 2, 1]) # duplicate - - def test3DInputMultiAxis12(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=[1, 2], epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 4, 3)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=0, keepdims=True) - std = np.std(np_inputs, axis=0, keepdims=True) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def test5DInputMultiAxis123(self): - epsilon = 1e-3 - bn = normalization_layers.BatchNormalization( - axis=[1, 2, 3], epsilon=epsilon, momentum=0.9) - inputs = tf.Variable( - np.random.random((5, 3, 4, 4, 3)) + 100, dtype=tf.float32) - training = tf.compat.v1.placeholder(dtype='bool') - outputs = bn(inputs, training=training) - - with self.cached_session() as sess: - # Test training with placeholder learning phase. - self.evaluate(tf.compat.v1.global_variables_initializer()) - - np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) - - for _ in range(100): - np_output, _, _ = sess.run([outputs] + bn.updates, - feed_dict={training: True}) - # Verify that the axis is normalized during training. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - # Verify that the statistics are updated during training. - moving_mean, moving_var = self.evaluate( - [bn.moving_mean, bn.moving_variance]) - np_inputs = self.evaluate(inputs) - mean = np.mean(np_inputs, axis=(0, 4), keepdims=True) - std = np.std(np_inputs, axis=(0, 4), keepdims=True) - variance = np.square(std) - self.assertAllClose(mean, moving_mean, atol=1e-2) - self.assertAllClose(variance, moving_var, atol=1e-2) - - # Test inference with placeholder learning phase. - np_output = sess.run(outputs, feed_dict={training: False}) - - # Verify that the axis is normalized during inference. - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - - def testGhostBN5DimsMultiAxis14(self): - shape = [6, 3, 10, 10, 4] - virtual_batch_size = 3 - beta = 2. - gamma = 3. - momentum = 0.8 - epsilon = 1e-3 - moving_means = np.zeros([1, 1, 3, 1, 1, 4], dtype=np.float32) - moving_vars = np.ones([1, 1, 3, 1, 1, 4], dtype=np.float32) - - inp = tf.compat.v1.placeholder(tf.float32, shape) - is_training = tf.compat.v1.placeholder(tf.bool) - bn = normalization_layers.BatchNormalization( - axis=[1, 4], - momentum=momentum, - epsilon=epsilon, - beta_initializer=tf.compat.v1.constant_initializer(beta), - gamma_initializer=tf.compat.v1.constant_initializer(gamma), - virtual_batch_size=virtual_batch_size, - fused=False) - out = bn(inp, training=is_training) - ghost_shape = ([virtual_batch_size, shape[0] // virtual_batch_size] + - shape[1:]) - - with self.session() as sess: - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(5): - x = np.random.random(shape) - - sub_batched = np.reshape(x, ghost_shape) - means = np.mean(sub_batched, axis=(0, 3, 4), keepdims=True) - variances = np.var(sub_batched, axis=(0, 3, 4), keepdims=True) - - avg_means = np.mean(means, axis=1, keepdims=True) - avg_variances = np.mean(variances, axis=1, keepdims=True) - - moving_means = moving_means * momentum + avg_means * (1. - momentum) - moving_vars = moving_vars * momentum + avg_variances * (1. - momentum) - - y_train = ((sub_batched - means) / - (variances + epsilon) ** 0.5 * gamma) + beta - y_test = ((sub_batched - moving_means) / - (moving_vars + epsilon) ** 0.5 * gamma) + beta - - y_train = np.reshape(y_train, shape) - y_test = np.reshape(y_test, shape) - - y_val_train, _, _ = sess.run([out] + bn.updates, - feed_dict={inp: x, is_training: True}) - y_val_test = sess.run(out, feed_dict={inp: x, is_training: False}) - - self.assertAllClose(y_train, y_val_train, atol=1e-2) - self.assertAllClose(y_test, y_val_test, atol=1e-2) - - -if __name__ == '__main__': - tf.test.main() + freeze_mode, + ): + batch, height, width, input_channels = 2, 4, 5, 3 + shape = [batch, height, width, input_channels] + base_path = "%s_%s_%s_%s_%s_%s" % ( + is_fused_checkpoint_a, + is_fused_checkpoint_b, + use_gpu_checkpoint_a, + use_gpu_checkpoint_b, + use_gpu_test_a, + use_gpu_test_b, + ) + + checkpoint_path_a = os.path.join( + self.get_temp_dir(), f"checkpoint_a_{base_path}" + ) + self._train( + checkpoint_path_a, + shape, + use_gpu_checkpoint_a, + is_fused_checkpoint_a, + restore=False, + freeze_mode=freeze_mode, + ) + checkpoint_path_b = os.path.join( + self.get_temp_dir(), f"checkpoint_b_{base_path}" + ) + self._train( + checkpoint_path_b, + shape, + use_gpu_checkpoint_b, + is_fused_checkpoint_b, + restore=False, + freeze_mode=freeze_mode, + ) + + vars_fused = self._train( + checkpoint_path_a, + shape, + use_gpu_test_a, + True, + restore=True, + freeze_mode=freeze_mode, + ) + vars_nonfused = self._train( + checkpoint_path_b, + shape, + use_gpu_test_b, + False, + restore=True, + freeze_mode=freeze_mode, + ) + self.assertEqual(len(vars_fused), 5) + self.assertEqual(len(vars_nonfused), 5) + for var_fused, var_nonfused in zip(vars_fused, vars_nonfused): + self.assertAllClose(var_fused, var_nonfused, atol=1e-5) + + image_val = np.random.rand(batch, height, width, input_channels).astype( + np.float32 + ) + loss_fused_val = self._infer( + checkpoint_path_a, image_val, shape, use_gpu_test_a, True + ) + loss_nonfused_val = self._infer( + checkpoint_path_b, image_val, shape, use_gpu_test_b, False + ) + self.assertAllClose( + loss_fused_val, loss_nonfused_val, atol=1e-6, rtol=3e-4 + ) + + def _testCheckpointCrossDevice( + self, ckpt_a_fused, ckpt_a_use_gpu, ckpt_b_fused, ckpt_b_use_gpu + ): + for use_gpu_test_a in [True, False]: + for use_gpu_test_b in [True, False]: + for freeze_mode in [True, False]: + self._testCheckpoint( + ckpt_a_fused, + ckpt_a_use_gpu, + ckpt_b_fused, + ckpt_b_use_gpu, + use_gpu_test_a, + use_gpu_test_b, + freeze_mode, + ) + + def testCheckpointFusedCPUAndFusedGPU(self): + self._testCheckpointCrossDevice(True, False, True, True) + + def testCheckpointFusedCPUAndFusedCPU(self): + self._testCheckpointCrossDevice(True, False, True, False) + + def testCheckpointFusedGPUAndFusedGPU(self): + self._testCheckpointCrossDevice(True, True, True, True) + + def testCheckpointNonFusedCPUAndNonFusedGPU(self): + self._testCheckpointCrossDevice(False, False, False, True) + + def testCheckpointNonFusedCPUAndNonFusedCPU(self): + self._testCheckpointCrossDevice(False, False, False, False) + + def testCheckpointNonFusedGPUAndNonFusedGPU(self): + self._testCheckpointCrossDevice(False, True, False, True) + + def testCheckpointNonFusedGPUAndFusedGPU(self): + self._testCheckpointCrossDevice(False, True, True, True) + + def testCheckpointNonFusedGPUAndFusedCPU(self): + self._testCheckpointCrossDevice(False, True, True, False) + + def testCheckpointNonFusedCPUAndFusedCPU(self): + self._testCheckpointCrossDevice(False, False, True, False) + + def testCreateBN(self): + # Call layer. + bn = normalization_layers.BatchNormalization(axis=1) + inputs = tf.random.uniform((5, 4, 3), seed=1) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 4) + self.assertEqual(len(bn.trainable_variables), 2) + self.assertEqual(len(bn.non_trainable_variables), 2) + + # Test that updates were created and added to UPDATE_OPS. + self.assertEqual(len(bn.updates), 2) + self.assertListEqual( + tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS), + bn.updates, + ) + + # Test that weights were created and added to TRAINABLE_VARIABLES. + self.assertListEqual( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ), + bn.trainable_variables, + ) + + def testCreateFusedBNFloat16(self): + # Call layer. + bn = normalization_layers.BatchNormalization(axis=1, fused=True) + inputs = tf.random.uniform((5, 4, 3, 3), seed=1, dtype=tf.float16) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 4) + self.assertEqual(len(bn.trainable_variables), 2) + self.assertEqual(len(bn.non_trainable_variables), 2) + for var in bn.variables: + self.assertTrue(var.dtype._is_ref_dtype) + + # Test that updates were created and added to UPDATE_OPS. + self.assertEqual(len(bn.updates), 2) + self.assertListEqual( + tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS), + bn.updates, + ) + + # Test that weights were created and added to TRAINABLE_VARIABLES. + self.assertListEqual( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES + ), + bn.trainable_variables, + ) + + def test3DInputAxis1(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=1, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 4, 1)) + np_beta = np.reshape(np_beta, (1, 4, 1)) + + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 2)) + std = np.std(np_inputs, axis=(0, 2)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test3DInputAxis2(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=2, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 3)) + np_beta = np.reshape(np_beta, (1, 1, 3)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 1)) + std = np.std(np_inputs, axis=(0, 1)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test4DInputAxis1(self): + if tf.test.is_gpu_available(cuda_only=True): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=1, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 4, 1, 1)) + np_beta = np.reshape(np_beta, (1, 4, 1, 1)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ( + (np_output - epsilon) * np_gamma + ) + np_beta + self.assertAlmostEqual( + np.mean(normed_np_output), 0.0, places=1 + ) + self.assertAlmostEqual( + np.std(normed_np_output), 1.0, places=1 + ) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 2, 3)) + std = np.std(np_inputs, axis=(0, 2, 3)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test4DInputAxis2(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=2, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 3, 1)) + np_beta = np.reshape(np_beta, (1, 1, 3, 1)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 1, 3)) + std = np.std(np_inputs, axis=(0, 1, 3)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test4DInputAxis3(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=3, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 1, 2)) + std = np.std(np_inputs, axis=(0, 1, 2)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test4DInputAxis3Fused(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=3, epsilon=epsilon, momentum=0.9, fused=True + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 1, 2)) + std = np.std(np_inputs, axis=(0, 1, 2)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test4DInputAxis1Fused(self): + if tf.test.is_gpu_available(cuda_only=True): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=1, epsilon=epsilon, momentum=0.9, fused=True + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 4, 1, 1)) + np_beta = np.reshape(np_beta, (1, 4, 1, 1)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ( + (np_output - epsilon) * np_gamma + ) + np_beta + self.assertAlmostEqual( + np.mean(normed_np_output), 0.0, places=1 + ) + self.assertAlmostEqual( + np.std(normed_np_output), 1.0, places=1 + ) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 2, 3)) + std = np.std(np_inputs, axis=(0, 2, 3)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def testNegativeAxis(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=-1, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 1, 2)) + std = np.std(np_inputs, axis=(0, 1, 2)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def testBooleanLearningPhase(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=-1, epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3, 6)) + 100, dtype=tf.float32 + ) + outputs_training = bn(inputs, training=True) + outputs_infer = bn(inputs, training=False) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) + for _ in range(100): + np_output, _, _ = sess.run([outputs_training] + bn.updates) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=2) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 1, 2)) + std = np.std(np_inputs, axis=(0, 1, 2)) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = self.evaluate(outputs_infer) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def testFunctionalNoReuse(self): + inputs = tf.Variable(np.random.random((5, 4, 3, 6)), dtype=tf.float32) + epsilon = 1e-3 + training = tf.compat.v1.placeholder(dtype="bool") + outputs = normalization_layers.batch_norm( + inputs, + axis=-1, + momentum=0.9, + epsilon=epsilon, + training=training, + name="bn", + ) + + updates = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) + all_vars = {v.name: v for v in tf.compat.v1.global_variables()} + moving_mean = all_vars["bn/moving_mean:0"] + moving_variance = all_vars["bn/moving_variance:0"] + beta = all_vars["bn/beta:0"] + gamma = all_vars["bn/gamma:0"] + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + np_gamma, np_beta = self.evaluate([gamma, beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + np_moving_mean, np_moving_var = self.evaluate( + [moving_mean, moving_variance] + ) + np_inputs = self.evaluate(inputs) + np_mean = np.mean(np_inputs, axis=(0, 1, 2)) + np_std = np.std(np_inputs, axis=(0, 1, 2)) + np_variance = np.square(np_std) + self.assertAllClose(np_mean, np_moving_mean, atol=1e-2) + self.assertAllClose(np_variance, np_moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def testFunctionalReuse(self): + inputs1 = tf.Variable(np.random.random((5, 4, 3, 6)), dtype=tf.float32) + inputs2 = tf.Variable(np.random.random((5, 4, 3, 6)), dtype=tf.float32) + epsilon = 1e-3 + training = tf.compat.v1.placeholder(dtype="bool") + _ = normalization_layers.batch_norm( + inputs1, + axis=-1, + momentum=0.9, + epsilon=epsilon, + training=training, + name="bn", + ) + outputs2 = normalization_layers.batch_norm( + inputs2, + axis=-1, + momentum=0.9, + epsilon=epsilon, + training=training, + name="bn", + reuse=True, + ) + + # Last 2 update ops + updates = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.UPDATE_OPS + )[-2:] + all_vars = {v.name: v for v in tf.compat.v1.global_variables()} + moving_mean = all_vars["bn/moving_mean:0"] + moving_variance = all_vars["bn/moving_variance:0"] + beta = all_vars["bn/beta:0"] + gamma = all_vars["bn/gamma:0"] + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(100): + np_output, _, _ = sess.run( + [outputs2] + updates, feed_dict={training: True} + ) + + # Verify that the statistics are updated during training. + np_moving_mean, np_moving_var = self.evaluate( + [moving_mean, moving_variance] + ) + np_inputs = self.evaluate(inputs2) + np_mean = np.mean(np_inputs, axis=(0, 1, 2)) + np_std = np.std(np_inputs, axis=(0, 1, 2)) + np_variance = np.square(np_std) + self.assertAllClose(np_mean, np_moving_mean, atol=1e-2) + self.assertAllClose(np_variance, np_moving_var, atol=1e-2) + + # Verify that the axis is normalized during training. + np_gamma, np_beta = self.evaluate([gamma, beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=2) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs2, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=2) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def testFunctionalReuseFromScope(self): + inputs = tf.Variable(np.random.random((5, 4, 3, 6)), dtype=tf.float32) + epsilon = 1e-3 + training = tf.compat.v1.placeholder(dtype="bool") + with tf.compat.v1.variable_scope("scope"): + _ = normalization_layers.batch_norm( + inputs, + axis=-1, + momentum=0.9, + epsilon=epsilon, + training=training, + ) + self.assertEqual(len(tf.compat.v1.global_variables()), 5) + with tf.compat.v1.variable_scope("scope", reuse=True): + _ = normalization_layers.batch_norm( + inputs, + axis=-1, + momentum=0.9, + epsilon=epsilon, + training=training, + ) + self.assertEqual(len(tf.compat.v1.global_variables()), 5) + + def testNoCenter(self): + bn = normalization_layers.BatchNormalization(axis=1, center=False) + inputs = tf.random.uniform((5, 4, 3), seed=1) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 3) + self.assertEqual(len(bn.trainable_variables), 1) + self.assertEqual(len(bn.non_trainable_variables), 2) + + def testNoScale(self): + bn = normalization_layers.BatchNormalization(axis=1, scale=False) + inputs = tf.random.uniform((5, 4, 3), seed=1) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 3) + self.assertEqual(len(bn.trainable_variables), 1) + self.assertEqual(len(bn.non_trainable_variables), 2) + + def testRegularizers(self): + reg = lambda x: 0.1 * tf.reduce_sum(x) + bn = normalization_layers.BatchNormalization( + axis=1, beta_regularizer=reg + ) + inputs = tf.random.uniform((5, 4, 3), seed=1) + training = tf.compat.v1.placeholder(dtype="bool") + _ = bn(inputs, training=training) + self.assertEqual(len(bn.losses), 1) + + bn = normalization_layers.BatchNormalization( + axis=1, gamma_regularizer=reg + ) + inputs = tf.random.uniform((5, 4, 3), seed=1) + training = tf.compat.v1.placeholder(dtype="bool") + _ = bn(inputs, training=training) + self.assertEqual(len(bn.losses), 1) + + def testConstraints(self): + g_constraint = lambda x: x / tf.reduce_sum(x) + b_constraint = lambda x: x / tf.reduce_max(x) + bn = normalization_layers.BatchNormalization( + axis=1, gamma_constraint=g_constraint, beta_constraint=b_constraint + ) + inputs = tf.random.uniform((5, 4, 3), seed=1) + bn(inputs) + self.assertEqual(bn.gamma_constraint, g_constraint) + self.assertEqual(bn.beta_constraint, b_constraint) + + def testRenorm(self): + shape = (4, 3) + xt = tf.compat.v1.placeholder(tf.float32, shape) + momentum = 0.99 + renorm_momentum = 0.8 + rmax = 1.1 + rmin = 0.9 + dmax = 0.1 + gamma = 2.0 + beta = 3.0 + epsilon = 0.001 + bn = normalization_layers.BatchNormalization( + axis=1, + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + beta_initializer=tf.compat.v1.constant_initializer(beta), + epsilon=epsilon, + momentum=momentum, + renorm=True, + renorm_clipping={"rmax": rmax, "rmin": rmin, "dmax": dmax}, + renorm_momentum=renorm_momentum, + ) + training = tf.compat.v1.placeholder(tf.bool) + yt = bn(xt, training=training) + + moving_mean = 0.0 + moving_stddev = 1.0 + renorm_mean = 0.0 + renorm_stddev = 1.0 + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + + mean = x.mean(0) + variance = x.var(0) + stddev = np.sqrt(variance + epsilon) + r = (stddev / renorm_stddev).clip(rmin, rmax) + d = ((mean - renorm_mean) / renorm_stddev).clip(-dmax, dmax) + y_train = ((x - mean) / stddev * r + d) * gamma + beta + renorm_mean += (mean - renorm_mean) * (1.0 - renorm_momentum) + renorm_stddev += (stddev - renorm_stddev) * ( + 1.0 - renorm_momentum + ) + moving_mean += (mean - moving_mean) * (1.0 - momentum) + moving_stddev += (stddev - moving_stddev) * (1.0 - momentum) + + y_test = ( + (x - moving_mean) + / (moving_stddev * moving_stddev) ** 0.5 + * gamma + ) + beta + + yt_val_train, _, _ = sess.run( + [yt] + bn.updates, feed_dict={xt: x, training: True} + ) + yt_val_test, _, _ = sess.run( + [yt] + bn.updates, feed_dict={xt: x, training: False} + ) + + self.assertAllClose(y_train, yt_val_train, atol=1e-5) + self.assertAllClose(y_test, yt_val_test, atol=1e-5) + + def testRenormNoClippingSameMomentumGivesSameTestTrain(self): + shape = (4, 3) + xt = tf.compat.v1.placeholder(tf.float32, shape) + momentum = 0.9 + renorm_momentum = 0.9 + gamma = 2.0 + beta = 3.0 + epsilon = 0.001 + bn = normalization_layers.BatchNormalization( + axis=1, + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + beta_initializer=tf.compat.v1.constant_initializer(beta), + epsilon=epsilon, + momentum=momentum, + renorm=True, + renorm_clipping=None, + renorm_momentum=momentum, + ) + training = tf.compat.v1.placeholder(tf.bool) + yt = bn(xt, training=training) + moving_mean = 0.0 + moving_stddev = 1.0 + renorm_mean = 0.0 + renorm_stddev = 1.0 + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for step in range(6): + x = np.random.random(shape) + + mean = x.mean(0) + variance = x.var(0) + stddev = np.sqrt(variance + epsilon) + r = stddev / renorm_stddev + d = (mean - renorm_mean) / renorm_stddev + y_test = ( + (x - moving_mean) + / (moving_stddev * moving_stddev) ** 0.5 + * gamma + ) + beta + y_train = ((x - mean) / stddev * r + d) * gamma + beta + renorm_mean += (mean - renorm_mean) * (1.0 - renorm_momentum) + renorm_stddev += (stddev - renorm_stddev) * ( + 1.0 - renorm_momentum + ) + moving_mean += (mean - moving_mean) * (1.0 - momentum) + moving_stddev += (stddev - moving_stddev) * (1.0 - momentum) + + # Compute test values first, before the train mode updates the + # moving averages. + yt_val_test, _, _ = sess.run( + [yt] + bn.updates, feed_dict={xt: x, training: False} + ) + yt_val_train, _, _ = sess.run( + [yt] + bn.updates, feed_dict={xt: x, training: True} + ) + + # Due to initialization inconsistencies, values may not be + # identical on the first iteration (but shouldn't be different + # by much more than epsilon). After the first iteration they + # should be identical. + atol = epsilon * 1.5 if step == 0 else 1e-5 + self.assertAllClose(y_train, yt_val_train, atol=atol) + self.assertAllClose(y_test, yt_val_test, atol=atol) + self.assertAllClose(yt_val_train, yt_val_test, atol=atol) + + def testAdjustment(self): + shape = (4, 3) + xt = tf.compat.v1.placeholder(tf.float32, shape) + momentum = 0.99 + gamma = 2.0 + beta = 3.0 + epsilon = 0.001 + adjust_scale = tf.random.uniform(shape[-1:], 0.5, 1.5) + adjust_bias = tf.random.uniform(shape[-1:], -0.2, 0.2) + bn = normalization_layers.BatchNormalization( + axis=1, + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + beta_initializer=tf.compat.v1.constant_initializer(beta), + epsilon=epsilon, + momentum=momentum, + adjustment=lambda _: (adjust_scale, adjust_bias), + ) + training = tf.compat.v1.placeholder(tf.bool) + yt = bn(xt, training=training) + + moving_mean = 0.0 + moving_variance = 1.0 + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + yt_val_train, adj_scale_val, adj_bias_val = sess.run( + [yt, adjust_scale, adjust_bias] + bn.updates, + feed_dict={xt: x, training: True}, + )[:3] + yt_val_test = sess.run( + [yt] + bn.updates, feed_dict={xt: x, training: False} + )[0] + + mean = x.mean(0) + variance = x.var(0) + y_train = ( + ((x - mean) / (variance + epsilon) ** 0.5) * adj_scale_val + + adj_bias_val + ) * gamma + beta + moving_mean += (mean - moving_mean) * (1.0 - momentum) + moving_variance += (variance - moving_variance) * ( + 1.0 - momentum + ) + + y_test = ( + (x - moving_mean) + / (moving_variance + epsilon) ** 0.5 + * gamma + ) + beta + + self.assertAllClose(y_train, yt_val_train, atol=1e-5) + self.assertAllClose(y_test, yt_val_test, atol=1e-5) + + def testRenormWithAdjustment(self): + shape = (4, 3) + xt = tf.compat.v1.placeholder(tf.float32, shape) + momentum = 0.99 + renorm_momentum = 0.8 + rmax = 1.1 + rmin = 0.9 + dmax = 0.1 + gamma = 2.0 + beta = 3.0 + epsilon = 0.001 + adjust_scale = tf.random.uniform(shape[-1:], 0.5, 1.5) + adjust_bias = tf.random.uniform(shape[-1:], -0.2, 0.2) + bn = normalization_layers.BatchNormalization( + axis=1, + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + beta_initializer=tf.compat.v1.constant_initializer(beta), + epsilon=epsilon, + momentum=momentum, + renorm=True, + renorm_clipping={"rmax": rmax, "rmin": rmin, "dmax": dmax}, + renorm_momentum=renorm_momentum, + adjustment=lambda _: (adjust_scale, adjust_bias), + ) + training = tf.compat.v1.placeholder(tf.bool) + yt = bn(xt, training=training) + + moving_mean = 0.0 + moving_stddev = 1.0 + renorm_mean = 0.0 + renorm_stddev = 1.0 + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + yt_val_train, adj_scale_val, adj_bias_val = sess.run( + [yt, adjust_scale, adjust_bias] + bn.updates, + feed_dict={xt: x, training: True}, + )[:3] + yt_val_test = sess.run( + [yt] + bn.updates, feed_dict={xt: x, training: False} + )[0] + + mean = x.mean(0) + variance = x.var(0) + stddev = np.sqrt(variance + epsilon) + r = (stddev / renorm_stddev).clip(rmin, rmax) + d = ((mean - renorm_mean) / renorm_stddev).clip(-dmax, dmax) + y_train = ( + ((x - mean) / stddev * r + d) * adj_scale_val + adj_bias_val + ) * gamma + beta + renorm_mean += (mean - renorm_mean) * (1.0 - renorm_momentum) + renorm_stddev += (stddev - renorm_stddev) * ( + 1.0 - renorm_momentum + ) + moving_mean += (mean - moving_mean) * (1.0 - momentum) + moving_stddev += (stddev - moving_stddev) * (1.0 - momentum) + + y_test = ( + (x - moving_mean) + / (moving_stddev * moving_stddev) ** 0.5 + * gamma + ) + beta + + self.assertAllClose(y_train, yt_val_train, atol=1e-5) + self.assertAllClose(y_test, yt_val_test, atol=1e-5) + + def testGhostBNNegativeVirtualBatch(self): + shape = [6, 5, 4, 3] + inp = tf.random.uniform(shape, seed=1) + + with self.assertRaises(ValueError): + normalization_layers.batch_normalization(inp, virtual_batch_size=-1) + + def testGhostBNVirtualBatchFull(self): + shape = [6, 5, 4, 3] + inp = tf.random.uniform(shape, seed=1) + out1 = normalization_layers.batch_normalization(inp) + out2 = normalization_layers.batch_normalization( + inp, virtual_batch_size=6 + ) + + self.assertListEqual(out1.shape.as_list(), out2.shape.as_list()) + + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + + x = np.random.random(shape) + y1, y2 = sess.run([out1, out2], feed_dict={inp: x}) + + self.assertAllClose(y1, y2, atol=1e-5) + + def testGhostBNInputOutputShapesMatch(self): + shape = [6, 4, 3] + inp = tf.random.uniform(shape, seed=1) + out = normalization_layers.batch_normalization( + inp, virtual_batch_size=3 + ) + self.assertListEqual(out.shape.as_list(), shape) + + def testGhostBNUnknownBatchSize(self): + np_shape = [10, 5, 4] + tf_shape = [None, 5, 4] + inp = tf.compat.v1.placeholder(tf.float32, tf_shape) + out = normalization_layers.batch_normalization( + inp, virtual_batch_size=2 + ) + + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + + x = np.random.random(np_shape) + y = sess.run(out, feed_dict={inp: x}) + + self.assertListEqual(list(y.shape), np_shape) + + def testGhostBN2Dims(self): + shape = [6, 2] + virtual_batch_size = 3 + beta = 2.0 + gamma = 3.0 + momentum = 0.8 + epsilon = 1e-3 + moving_means = np.zeros([2, 2], dtype=np.float32) + moving_vars = np.ones([2, 2], dtype=np.float32) + + inp = tf.compat.v1.placeholder(tf.float32, shape) + is_training = tf.compat.v1.placeholder(tf.bool) + bn = normalization_layers.BatchNormalization( + momentum=momentum, + epsilon=epsilon, + beta_initializer=tf.compat.v1.constant_initializer(beta), + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + virtual_batch_size=virtual_batch_size, + ) + out = bn(inp, training=is_training) + ghost_shape = [ + virtual_batch_size, + shape[0] // virtual_batch_size, + shape[1], + ] + + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + + sub_batched = np.reshape(x, ghost_shape) + means = np.mean(sub_batched, axis=0, keepdims=True) + variances = np.var(sub_batched, axis=0, keepdims=True) + + avg_means = np.mean(means, axis=1, keepdims=True) + avg_variances = np.mean(variances, axis=1, keepdims=True) + + moving_means = moving_means * momentum + avg_means * ( + 1.0 - momentum + ) + moving_vars = moving_vars * momentum + avg_variances * ( + 1.0 - momentum + ) + + y_train = ( + (sub_batched - means) / (variances + epsilon) ** 0.5 * gamma + ) + beta + y_test = ( + (sub_batched - moving_means) + / (moving_vars + epsilon) ** 0.5 + * gamma + ) + beta + + y_train = np.reshape(y_train, shape) + y_test = np.reshape(y_test, shape) + + y_val_train, _, _ = sess.run( + [out] + bn.updates, feed_dict={inp: x, is_training: True} + ) + y_val_test = sess.run( + out, feed_dict={inp: x, is_training: False} + ) + + self.assertAllClose(y_train, y_val_train, atol=1e-5) + self.assertAllClose(y_test, y_val_test, atol=1e-5) + + def testGhostBN4DimsAxis3(self): + shape = [6, 10, 10, 3] + virtual_batch_size = 2 + beta = 2.0 + gamma = 3.0 + momentum = 0.8 + epsilon = 1e-3 + moving_means = np.zeros([1, 1, 1, 1, 3], dtype=np.float32) + moving_vars = np.ones([1, 1, 1, 1, 3], dtype=np.float32) + + inp = tf.compat.v1.placeholder(tf.float32, shape) + is_training = tf.compat.v1.placeholder(tf.bool) + bn = normalization_layers.BatchNormalization( + axis=3, + momentum=momentum, + epsilon=epsilon, + beta_initializer=tf.compat.v1.constant_initializer(beta), + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + virtual_batch_size=virtual_batch_size, + ) + out = bn(inp, training=is_training) + ghost_shape = [ + virtual_batch_size, + shape[0] // virtual_batch_size, + ] + shape[1:] + + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + + sub_batched = np.reshape(x, ghost_shape) + means = np.mean(sub_batched, axis=(0, 2, 3), keepdims=True) + variances = np.var(sub_batched, axis=(0, 2, 3), keepdims=True) + + avg_means = np.mean(means, axis=1, keepdims=True) + avg_variances = np.mean(variances, axis=1, keepdims=True) + + moving_means = moving_means * momentum + avg_means * ( + 1.0 - momentum + ) + moving_vars = moving_vars * momentum + avg_variances * ( + 1.0 - momentum + ) + + y_train = ( + (sub_batched - means) / (variances + epsilon) ** 0.5 * gamma + ) + beta + y_test = ( + (sub_batched - moving_means) + / (moving_vars + epsilon) ** 0.5 + * gamma + ) + beta + + y_train = np.reshape(y_train, shape) + y_test = np.reshape(y_test, shape) + + y_val_train, _, _ = sess.run( + [out] + bn.updates, feed_dict={inp: x, is_training: True} + ) + y_val_test = sess.run( + out, feed_dict={inp: x, is_training: False} + ) + + self.assertAllClose(y_train, y_val_train, atol=1e-2) + self.assertAllClose(y_test, y_val_test, atol=1e-2) + + def testGhostBN4DimsAxis1(self): + shape = [6, 3, 10, 10] + virtual_batch_size = 2 + beta = 2.0 + gamma = 3.0 + momentum = 0.8 + epsilon = 1e-3 + moving_means = np.zeros([1, 1, 3, 1, 1], dtype=np.float32) + moving_vars = np.ones([1, 1, 3, 1, 1], dtype=np.float32) + + inp = tf.compat.v1.placeholder(tf.float32, shape) + is_training = tf.compat.v1.placeholder(tf.bool) + bn = normalization_layers.BatchNormalization( + axis=1, + momentum=momentum, + epsilon=epsilon, + beta_initializer=tf.compat.v1.constant_initializer(beta), + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + virtual_batch_size=virtual_batch_size, + fused=False, + ) # NCHW is unsupported by CPU fused batch norm + out = bn(inp, training=is_training) + ghost_shape = [ + virtual_batch_size, + shape[0] // virtual_batch_size, + ] + shape[1:] + + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + + sub_batched = np.reshape(x, ghost_shape) + means = np.mean(sub_batched, axis=(0, 3, 4), keepdims=True) + variances = np.var(sub_batched, axis=(0, 3, 4), keepdims=True) + + avg_means = np.mean(means, axis=1, keepdims=True) + avg_variances = np.mean(variances, axis=1, keepdims=True) + + moving_means = moving_means * momentum + avg_means * ( + 1.0 - momentum + ) + moving_vars = moving_vars * momentum + avg_variances * ( + 1.0 - momentum + ) + + y_train = ( + (sub_batched - means) / (variances + epsilon) ** 0.5 * gamma + ) + beta + y_test = ( + (sub_batched - moving_means) + / (moving_vars + epsilon) ** 0.5 + * gamma + ) + beta + + y_train = np.reshape(y_train, shape) + y_test = np.reshape(y_test, shape) + + y_val_train, _, _ = sess.run( + [out] + bn.updates, feed_dict={inp: x, is_training: True} + ) + y_val_test = sess.run( + out, feed_dict={inp: x, is_training: False} + ) + + self.assertAllClose(y_train, y_val_train, atol=1e-2) + self.assertAllClose(y_test, y_val_test, atol=1e-2) + + def testMultiAxisInvalid(self): + shape = [6, 5, 4, 3] + inp = tf.random.uniform(shape, seed=1) + + with self.assertRaises(ValueError): + normalization_layers.batch_normalization( + inp, axis=[1, 4] + ) # out of bounds + + with self.assertRaises(ValueError): + normalization_layers.batch_normalization( + inp, axis=[-5, 1] + ) # out of bounds + + with self.assertRaises(ValueError): + normalization_layers.batch_normalization( + inp, axis=[1, 2, 1] + ) # duplicate + + def test3DInputMultiAxis12(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=[1, 2], epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 4, 3)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=0, keepdims=True) + std = np.std(np_inputs, axis=0, keepdims=True) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def test5DInputMultiAxis123(self): + epsilon = 1e-3 + bn = normalization_layers.BatchNormalization( + axis=[1, 2, 3], epsilon=epsilon, momentum=0.9 + ) + inputs = tf.Variable( + np.random.random((5, 3, 4, 4, 3)) + 100, dtype=tf.float32 + ) + training = tf.compat.v1.placeholder(dtype="bool") + outputs = bn(inputs, training=training) + + with self.cached_session() as sess: + # Test training with placeholder learning phase. + self.evaluate(tf.compat.v1.global_variables_initializer()) + + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + + for _ in range(100): + np_output, _, _ = sess.run( + [outputs] + bn.updates, feed_dict={training: True} + ) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + # Verify that the statistics are updated during training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance] + ) + np_inputs = self.evaluate(inputs) + mean = np.mean(np_inputs, axis=(0, 4), keepdims=True) + std = np.std(np_inputs, axis=(0, 4), keepdims=True) + variance = np.square(std) + self.assertAllClose(mean, moving_mean, atol=1e-2) + self.assertAllClose(variance, moving_var, atol=1e-2) + + # Test inference with placeholder learning phase. + np_output = sess.run(outputs, feed_dict={training: False}) + + # Verify that the axis is normalized during inference. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0.0, places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1.0, places=1) + + def testGhostBN5DimsMultiAxis14(self): + shape = [6, 3, 10, 10, 4] + virtual_batch_size = 3 + beta = 2.0 + gamma = 3.0 + momentum = 0.8 + epsilon = 1e-3 + moving_means = np.zeros([1, 1, 3, 1, 1, 4], dtype=np.float32) + moving_vars = np.ones([1, 1, 3, 1, 1, 4], dtype=np.float32) + + inp = tf.compat.v1.placeholder(tf.float32, shape) + is_training = tf.compat.v1.placeholder(tf.bool) + bn = normalization_layers.BatchNormalization( + axis=[1, 4], + momentum=momentum, + epsilon=epsilon, + beta_initializer=tf.compat.v1.constant_initializer(beta), + gamma_initializer=tf.compat.v1.constant_initializer(gamma), + virtual_batch_size=virtual_batch_size, + fused=False, + ) + out = bn(inp, training=is_training) + ghost_shape = [ + virtual_batch_size, + shape[0] // virtual_batch_size, + ] + shape[1:] + + with self.session() as sess: + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(5): + x = np.random.random(shape) + + sub_batched = np.reshape(x, ghost_shape) + means = np.mean(sub_batched, axis=(0, 3, 4), keepdims=True) + variances = np.var(sub_batched, axis=(0, 3, 4), keepdims=True) + + avg_means = np.mean(means, axis=1, keepdims=True) + avg_variances = np.mean(variances, axis=1, keepdims=True) + + moving_means = moving_means * momentum + avg_means * ( + 1.0 - momentum + ) + moving_vars = moving_vars * momentum + avg_variances * ( + 1.0 - momentum + ) + + y_train = ( + (sub_batched - means) / (variances + epsilon) ** 0.5 * gamma + ) + beta + y_test = ( + (sub_batched - moving_means) + / (moving_vars + epsilon) ** 0.5 + * gamma + ) + beta + + y_train = np.reshape(y_train, shape) + y_test = np.reshape(y_test, shape) + + y_val_train, _, _ = sess.run( + [out] + bn.updates, feed_dict={inp: x, is_training: True} + ) + y_val_test = sess.run( + out, feed_dict={inp: x, is_training: False} + ) + + self.assertAllClose(y_train, y_val_train, atol=1e-2) + self.assertAllClose(y_test, y_val_test, atol=1e-2) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/legacy_tf_layers/pooling.py b/keras/legacy_tf_layers/pooling.py index 144bf12bbcda..71695d771612 100644 --- a/keras/legacy_tf_layers/pooling.py +++ b/keras/legacy_tf_layers/pooling.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -# pylint: disable=g-classes-have-attributes + """Contains the pooling layer classes and their functional aliases.""" from __future__ import absolute_import from __future__ import division @@ -22,878 +22,979 @@ from keras import layers as keras_layers from keras.legacy_tf_layers import base + +# isort: off from tensorflow.python.util.tf_export import keras_export -from tensorflow.python.util.tf_export import tf_export -@keras_export(v1=['keras.__internal__.legacy.layers.AveragePooling1D']) -@tf_export(v1=['layers.AveragePooling1D']) +@keras_export(v1=["keras.__internal__.legacy.layers.AveragePooling1D"]) class AveragePooling1D(keras_layers.AveragePooling1D, base.Layer): - """Average Pooling layer for 1D inputs. - - Args: - pool_size: An integer or tuple/list of a single integer, - representing the size of the pooling window. - strides: An integer or tuple/list of a single integer, specifying the - strides of the pooling operation. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.AveragePooling1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - pooling = tf.compat.v1.layers.AveragePooling1D(pool_size=2, strides=2) - ``` - - After: - - ```python - pooling = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2) - ``` - @end_compatibility - """ - - def __init__(self, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - if strides is None: - raise ValueError('Argument `strides` must not be None.') - super().__init__( + """Average Pooling layer for 1D inputs. + + Args: + pool_size: An integer or tuple/list of a single integer, + representing the size of the pooling window. + strides: An integer or tuple/list of a single integer, specifying the + strides of the pooling operation. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.AveragePooling1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + pooling = tf.compat.v1.layers.AveragePooling1D(pool_size=2, strides=2) + ``` + + After: + + ```python + pooling = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2) + ``` + @end_compatibility + """ + + def __init__( + self, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + if strides is None: + raise ValueError("Argument `strides` must not be None.") + super().__init__( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.average_pooling1d"]) +def average_pooling1d( + inputs, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, +): + """Average Pooling layer for 1D inputs. + + Args: + inputs: The tensor over which to pool. Must have rank 3. + pool_size: An integer or tuple/list of a single integer, + representing the size of the pooling window. + strides: An integer or tuple/list of a single integer, specifying the + strides of the pooling operation. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + name: A string, the name of the layer. + + Returns: + The output tensor, of rank 3. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.AveragePooling1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.average_pooling1d(x, pool_size=2, strides=2) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.average_pooling1d` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.AveragePooling1D` instead.", + stacklevel=2, + ) + layer = AveragePooling1D( pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.average_pooling1d']) -@tf_export(v1=['layers.average_pooling1d']) -def average_pooling1d(inputs, pool_size, strides, - padding='valid', data_format='channels_last', - name=None): - """Average Pooling layer for 1D inputs. - - Args: - inputs: The tensor over which to pool. Must have rank 3. - pool_size: An integer or tuple/list of a single integer, - representing the size of the pooling window. - strides: An integer or tuple/list of a single integer, specifying the - strides of the pooling operation. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - name: A string, the name of the layer. - - Returns: - The output tensor, of rank 3. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.AveragePooling1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.average_pooling1d(x, pool_size=2, strides=2) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.average_pooling1d` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.AveragePooling1D` instead.', - stacklevel=2) - layer = AveragePooling1D(pool_size=pool_size, - strides=strides, - padding=padding, - data_format=data_format, - name=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.MaxPooling1D']) -@tf_export(v1=['layers.MaxPooling1D']) -class MaxPooling1D(keras_layers.MaxPooling1D, base.Layer): - """Max Pooling layer for 1D inputs. - - Args: - pool_size: An integer or tuple/list of a single integer, - representing the size of the pooling window. - strides: An integer or tuple/list of a single integer, specifying the - strides of the pooling operation. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` + ) + return layer(inputs) - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - The corresponding TensorFlow v2 layer is - `tf.keras.layers.MaxPooling1D`. - - - #### Structural Mapping to Native TF2 +@keras_export(v1=["keras.__internal__.legacy.layers.MaxPooling1D"]) +class MaxPooling1D(keras_layers.MaxPooling1D, base.Layer): + """Max Pooling layer for 1D inputs. + + Args: + pool_size: An integer or tuple/list of a single integer, + representing the size of the pooling window. + strides: An integer or tuple/list of a single integer, specifying the + strides of the pooling operation. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.MaxPooling1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + pooling = tf.compat.v1.layers.MaxPooling1D(pool_size=2, strides=2) + ``` + + After: + + ```python + pooling = tf.keras.layers.MaxPooling1D(pool_size=2, strides=2) + ``` + @end_compatibility + """ + + def __init__( + self, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + if strides is None: + raise ValueError("Argument `strides` must not be None.") + super().__init__( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.max_pooling1d"]) +def max_pooling1d( + inputs, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, +): + """Max Pooling layer for 1D inputs. + + Args: + inputs: The tensor over which to pool. Must have rank 3. + pool_size: An integer or tuple/list of a single integer, + representing the size of the pooling window. + strides: An integer or tuple/list of a single integer, specifying the + strides of the pooling operation. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + name: A string, the name of the layer. + + Returns: + The output tensor, of rank 3. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.MaxPooling1D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.max_pooling1d(x, pool_size=2, strides=2) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.MaxPooling1D(pool_size=2, strides=2)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.max_pooling1d` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.MaxPooling1D` instead.", + stacklevel=2, + ) + layer = MaxPooling1D( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + ) + return layer(inputs) - None of the supported arguments have changed name. - Before: +@keras_export(v1=["keras.__internal__.legacy.layers.AveragePooling2D"]) +class AveragePooling2D(keras_layers.AveragePooling2D, base.Layer): + """Average pooling layer for 2D inputs (e.g. images). + + Args: + pool_size: An integer or tuple/list of 2 integers: (pool_height, + pool_width) specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.AveragePooling2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + pooling = tf.compat.v1.layers.AveragePooling2D(pool_size=2, strides=2) + ``` + + After: + + ```python + pooling = tf.keras.layers.AveragePooling2D(pool_size=2, strides=2) + ``` + @end_compatibility + """ + + def __init__( + self, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + if strides is None: + raise ValueError("Argument `strides` must not be None.") + super().__init__( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.average_pooling2d"]) +def average_pooling2d( + inputs, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, +): + """Average pooling layer for 2D inputs (e.g. images). + + Args: + inputs: The tensor over which to pool. Must have rank 4. + pool_size: An integer or tuple/list of 2 integers: (pool_height, + pool_width) specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + name: A string, the name of the layer. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.AveragePooling2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.average_pooling2d(x, pool_size=2, strides=2) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.AveragePooling2D(pool_size=2, strides=2)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.average_pooling2d` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.AveragePooling2D` instead.", + stacklevel=2, + ) + layer = AveragePooling2D( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + ) + return layer(inputs) - ```python - pooling = tf.compat.v1.layers.MaxPooling1D(pool_size=2, strides=2) - ``` - After: +@keras_export(v1=["keras.__internal__.legacy.layers.MaxPooling2D"]) +class MaxPooling2D(keras_layers.MaxPooling2D, base.Layer): + """Max pooling layer for 2D inputs (e.g. images). + + Args: + pool_size: An integer or tuple/list of 2 integers: (pool_height, + pool_width) specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.MaxPooling2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + pooling = tf.compat.v1.layers.MaxPooling2D(pool_size=2, strides=2) + ``` + + After: + + ```python + pooling = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2) + ``` + @end_compatibility + """ + + def __init__( + self, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + if strides is None: + raise ValueError("Argument `strides` must not be None.") + super().__init__( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.max_pooling2d"]) +def max_pooling2d( + inputs, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, +): + """Max pooling layer for 2D inputs (e.g. images). + + Args: + inputs: The tensor over which to pool. Must have rank 4. + pool_size: An integer or tuple/list of 2 integers: (pool_height, + pool_width) specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + name: A string, the name of the layer. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.MaxPooling2D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.max_pooling2d(x, pool_size=2, strides=2) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.max_pooling2d` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.MaxPooling2D` instead.", + stacklevel=2, + ) + layer = MaxPooling2D( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + ) + return layer(inputs) - ```python - pooling = tf.keras.layers.MaxPooling1D(pool_size=2, strides=2) - ``` - @end_compatibility - """ - def __init__(self, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - if strides is None: - raise ValueError('Argument `strides` must not be None.') - super().__init__( +@keras_export(v1=["keras.__internal__.legacy.layers.AveragePooling3D"]) +class AveragePooling3D(keras_layers.AveragePooling3D, base.Layer): + """Average pooling layer for 3D inputs (e.g. volumes). + + Args: + pool_size: An integer or tuple/list of 3 integers: + (pool_depth, pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.AveragePooling3D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + pooling = tf.compat.v1.layers.AveragePooling3D(pool_size=2, strides=2) + ``` + + After: + + ```python + pooling = tf.keras.layers.AveragePooling3D(pool_size=2, strides=2) + ``` + @end_compatibility + """ + + def __init__( + self, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + if strides is None: + raise ValueError("Argument `strides` must not be None.") + super().__init__( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.average_pooling3d"]) +def average_pooling3d( + inputs, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, +): + """Average pooling layer for 3D inputs (e.g. volumes). + + Args: + inputs: The tensor over which to pool. Must have rank 5. + pool_size: An integer or tuple/list of 3 integers: + (pool_depth, pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + name: A string, the name of the layer. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.AveragePooling3D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.average_pooling3d(x, pool_size=2, strides=2) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.AveragePooling3D(pool_size=2, strides=2)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.average_pooling3d` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.AveragePooling3D` instead.", + stacklevel=2, + ) + layer = AveragePooling3D( pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, name=name, - **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.max_pooling1d']) -@tf_export(v1=['layers.max_pooling1d']) -def max_pooling1d(inputs, pool_size, strides, - padding='valid', data_format='channels_last', - name=None): - """Max Pooling layer for 1D inputs. - - Args: - inputs: The tensor over which to pool. Must have rank 3. - pool_size: An integer or tuple/list of a single integer, - representing the size of the pooling window. - strides: An integer or tuple/list of a single integer, specifying the - strides of the pooling operation. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - name: A string, the name of the layer. - - Returns: - The output tensor, of rank 3. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.MaxPooling1D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.max_pooling1d(x, pool_size=2, strides=2) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.MaxPooling1D(pool_size=2, strides=2)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.max_pooling1d` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.MaxPooling1D` instead.', - stacklevel=2) - layer = MaxPooling1D(pool_size=pool_size, - strides=strides, - padding=padding, - data_format=data_format, - name=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.AveragePooling2D']) -@tf_export(v1=['layers.AveragePooling2D']) -class AveragePooling2D(keras_layers.AveragePooling2D, base.Layer): - """Average pooling layer for 2D inputs (e.g. images). - - Args: - pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.AveragePooling2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - pooling = tf.compat.v1.layers.AveragePooling2D(pool_size=2, strides=2) - ``` - - After: - - ```python - pooling = tf.keras.layers.AveragePooling2D(pool_size=2, strides=2) - ``` - @end_compatibility - """ - - def __init__(self, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - if strides is None: - raise ValueError('Argument `strides` must not be None.') - super().__init__( - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.average_pooling2d']) -@tf_export(v1=['layers.average_pooling2d']) -def average_pooling2d(inputs, - pool_size, strides, - padding='valid', data_format='channels_last', - name=None): - """Average pooling layer for 2D inputs (e.g. images). - - Args: - inputs: The tensor over which to pool. Must have rank 4. - pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - name: A string, the name of the layer. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.AveragePooling2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.average_pooling2d(x, pool_size=2, strides=2) - ``` + ) + return layer(inputs) - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.AveragePooling2D(pool_size=2, strides=2)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.average_pooling2d` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.AveragePooling2D` instead.', - stacklevel=2) - layer = AveragePooling2D(pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, - name=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.MaxPooling2D']) -@tf_export(v1=['layers.MaxPooling2D']) -class MaxPooling2D(keras_layers.MaxPooling2D, base.Layer): - """Max pooling layer for 2D inputs (e.g. images). - - Args: - pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.MaxPooling2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - pooling = tf.compat.v1.layers.MaxPooling2D(pool_size=2, strides=2) - ``` - - After: - - ```python - pooling = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2) - ``` - @end_compatibility - """ - - def __init__(self, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - if strides is None: - raise ValueError('Argument `strides` must not be None.') - super().__init__( - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.max_pooling2d']) -@tf_export(v1=['layers.max_pooling2d']) -def max_pooling2d(inputs, - pool_size, strides, - padding='valid', data_format='channels_last', - name=None): - """Max pooling layer for 2D inputs (e.g. images). - - Args: - inputs: The tensor over which to pool. Must have rank 4. - pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - name: A string, the name of the layer. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.MaxPooling2D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.max_pooling2d(x, pool_size=2, strides=2) - ``` - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.max_pooling2d` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.MaxPooling2D` instead.', - stacklevel=2) - layer = MaxPooling2D(pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, - name=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.AveragePooling3D']) -@tf_export(v1=['layers.AveragePooling3D']) -class AveragePooling3D(keras_layers.AveragePooling3D, base.Layer): - """Average pooling layer for 3D inputs (e.g. volumes). - - Args: - pool_size: An integer or tuple/list of 3 integers: - (pool_depth, pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.AveragePooling3D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - pooling = tf.compat.v1.layers.AveragePooling3D(pool_size=2, strides=2) - ``` - - After: - - ```python - pooling = tf.keras.layers.AveragePooling3D(pool_size=2, strides=2) - ``` - @end_compatibility - """ - - def __init__(self, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - if strides is None: - raise ValueError('Argument `strides` must not be None.') - super().__init__( - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.average_pooling3d']) -@tf_export(v1=['layers.average_pooling3d']) -def average_pooling3d(inputs, - pool_size, strides, - padding='valid', data_format='channels_last', - name=None): - """Average pooling layer for 3D inputs (e.g. volumes). - - Args: - inputs: The tensor over which to pool. Must have rank 5. - pool_size: An integer or tuple/list of 3 integers: - (pool_depth, pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - name: A string, the name of the layer. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.AveragePooling3D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.average_pooling3d(x, pool_size=2, strides=2) - ``` - - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.AveragePooling3D(pool_size=2, strides=2)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.average_pooling3d` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.AveragePooling3D` instead.', - stacklevel=2) - layer = AveragePooling3D(pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, - name=name) - return layer(inputs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.MaxPooling3D']) -@tf_export(v1=['layers.MaxPooling3D']) +@keras_export(v1=["keras.__internal__.legacy.layers.MaxPooling3D"]) class MaxPooling3D(keras_layers.MaxPooling3D, base.Layer): - """Max pooling layer for 3D inputs (e.g. volumes). - - Args: - pool_size: An integer or tuple/list of 3 integers: - (pool_depth, pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - name: A string, the name of the layer. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.MaxPooling3D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - pooling = tf.compat.v1.layers.MaxPooling3D(pool_size=2, strides=2) - ``` - - After: - - ```python - pooling = tf.keras.layers.MaxPooling3D(pool_size=2, strides=2) - ``` - @end_compatibility - """ - - def __init__(self, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - if strides is None: - raise ValueError('Argument `strides` must not be None.') - super().__init__( - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, name=name, **kwargs) - - -@keras_export(v1=['keras.__internal__.legacy.layers.max_pooling3d']) -@tf_export(v1=['layers.max_pooling3d']) -def max_pooling3d(inputs, - pool_size, strides, - padding='valid', data_format='channels_last', - name=None): - """Max pooling layer for 3D inputs (e.g. - - volumes). - - Args: - inputs: The tensor over which to pool. Must have rank 5. - pool_size: An integer or tuple/list of 3 integers: (pool_depth, pool_height, - pool_width) specifying the size of the pooling window. Can be a single - integer to specify the same value for all spatial dimensions. - strides: An integer or tuple/list of 3 integers, specifying the strides of - the pooling operation. Can be a single integer to specify the same value - for all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string. The ordering of the dimensions in the inputs. - `channels_last` (default) and `channels_first` are supported. - `channels_last` corresponds to inputs with shape `(batch, depth, height, - width, channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - name: A string, the name of the layer. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - - @compatibility(TF2) - This API is a legacy api that is only compatible with eager execution and - `tf.function` if you combine it with - `tf.compat.v1.keras.utils.track_tf1_style_variables` - - Please refer to [tf.layers model mapping section of the migration guide] - (https://www.tensorflow.org/guide/migrate/model_mapping) - to learn how to use your TensorFlow v1 model in TF2 with Keras. - - The corresponding TensorFlow v2 layer is - `tf.keras.layers.MaxPooling3D`. - - - #### Structural Mapping to Native TF2 - - None of the supported arguments have changed name. - - Before: - - ```python - y = tf.compat.v1.layers.max_pooling3d(x, pool_size=2, strides=2) - ``` + """Max pooling layer for 3D inputs (e.g. volumes). + + Args: + pool_size: An integer or tuple/list of 3 integers: + (pool_depth, pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, depth, height, width)`. + name: A string, the name of the layer. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.MaxPooling3D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + pooling = tf.compat.v1.layers.MaxPooling3D(pool_size=2, strides=2) + ``` + + After: + + ```python + pooling = tf.keras.layers.MaxPooling3D(pool_size=2, strides=2) + ``` + @end_compatibility + """ + + def __init__( + self, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, + **kwargs + ): + if strides is None: + raise ValueError("Argument `strides` must not be None.") + super().__init__( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + **kwargs + ) + + +@keras_export(v1=["keras.__internal__.legacy.layers.max_pooling3d"]) +def max_pooling3d( + inputs, + pool_size, + strides, + padding="valid", + data_format="channels_last", + name=None, +): + """Max pooling layer for 3D inputs (e.g. + + volumes). + + Args: + inputs: The tensor over which to pool. Must have rank 5. + pool_size: An integer or tuple/list of 3 integers: (pool_depth, + pool_height, pool_width) specifying the size of the pooling window. Can + be a single integer to specify the same value for all spatial + dimensions. + strides: An integer or tuple/list of 3 integers, specifying the strides of + the pooling operation. Can be a single integer to specify the same value + for all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string. The ordering of the dimensions in the inputs. + `channels_last` (default) and `channels_first` are supported. + `channels_last` corresponds to inputs with shape `(batch, depth, height, + width, channels)` while `channels_first` corresponds to inputs with + shape `(batch, channels, depth, height, width)`. + name: A string, the name of the layer. + + Returns: + Output tensor. + + Raises: + ValueError: if eager execution is enabled. + + + @compatibility(TF2) + This API is a legacy api that is only compatible with eager execution and + `tf.function` if you combine it with + `tf.compat.v1.keras.utils.track_tf1_style_variables` + + Please refer to [tf.layers model mapping section of the migration guide] + (https://www.tensorflow.org/guide/migrate/model_mapping) + to learn how to use your TensorFlow v1 model in TF2 with Keras. + + The corresponding TensorFlow v2 layer is + `tf.keras.layers.MaxPooling3D`. + + + #### Structural Mapping to Native TF2 + + None of the supported arguments have changed name. + + Before: + + ```python + y = tf.compat.v1.layers.max_pooling3d(x, pool_size=2, strides=2) + ``` + + After: + + To migrate code using TF1 functional layers use the [Keras Functional API] + (https://www.tensorflow.org/guide/keras/functional): + + ```python + x = tf.keras.Input((28, 28, 1)) + y = tf.keras.layers.MaxPooling3D(pool_size=2, strides=2)(x) + model = tf.keras.Model(x, y) + ``` + @end_compatibility + """ + warnings.warn( + "`tf.layers.max_pooling3d` is deprecated and " + "will be removed in a future version. " + "Please use `tf.keras.layers.MaxPooling3D` instead.", + stacklevel=2, + ) + layer = MaxPooling3D( + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + name=name, + ) + return layer(inputs) - After: - - To migrate code using TF1 functional layers use the [Keras Functional API] - (https://www.tensorflow.org/guide/keras/functional): - - ```python - x = tf.keras.Input((28, 28, 1)) - y = tf.keras.layers.MaxPooling3D(pool_size=2, strides=2)(x) - model = tf.keras.Model(x, y) - ``` - @end_compatibility - """ - warnings.warn( - '`tf.layers.max_pooling3d` is deprecated and ' - 'will be removed in a future version. ' - 'Please use `tf.keras.layers.MaxPooling3D` instead.', - stacklevel=2) - layer = MaxPooling3D(pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, - name=name) - return layer(inputs) # Aliases diff --git a/keras/legacy_tf_layers/pooling_test.py b/keras/legacy_tf_layers/pooling_test.py index 6ded7d886b97..a60049897936 100644 --- a/keras/legacy_tf_layers/pooling_test.py +++ b/keras/legacy_tf_layers/pooling_test.py @@ -20,187 +20,213 @@ import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils from keras.legacy_tf_layers import pooling as pooling_layers +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) -class PoolingTest(tf.test.TestCase): - def testInvalidDataFormat(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'data_format'): - pooling_layers.max_pooling2d(images, 3, strides=2, data_format='invalid') - - def testInvalidStrides(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'strides'): - pooling_layers.max_pooling2d(images, 3, strides=(1, 2, 3)) - - with self.assertRaisesRegex(ValueError, 'strides'): - pooling_layers.max_pooling2d(images, 3, strides=None) - - def testInvalidPoolSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 3), seed=1) - with self.assertRaisesRegex(ValueError, 'pool_size'): - pooling_layers.max_pooling2d(images, (1, 2, 3), strides=2) - - with self.assertRaisesRegex(ValueError, 'pool_size'): - pooling_layers.max_pooling2d(images, None, strides=2) - - def testCreateMaxPooling2D(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = pooling_layers.MaxPooling2D([2, 2], strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 3, 4, 4]) - - def testCreateAveragePooling2D(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = pooling_layers.AveragePooling2D([2, 2], strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 3, 4, 4]) - - @tf_test_utils.run_deprecated_v1 - def testCreateMaxPooling2DChannelsFirst(self): - height, width = 7, 9 - images = tf.random.uniform((5, 2, height, width)) - layer = pooling_layers.MaxPooling2D([2, 2], - strides=1, - data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 2, 6, 8]) - - @tf_test_utils.run_deprecated_v1 - def testCreateAveragePooling2DChannelsFirst(self): - height, width = 5, 6 - images = tf.random.uniform((3, 4, height, width)) - layer = pooling_layers.AveragePooling2D((2, 2), - strides=(1, 1), - padding='valid', - data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [3, 4, 4, 5]) - - @tf_test_utils.run_deprecated_v1 - def testCreateAveragePooling2DChannelsFirstWithNoneBatch(self): - height, width = 5, 6 - images = tf.compat.v1.placeholder(dtype='float32', - shape=(None, 4, height, width)) - layer = pooling_layers.AveragePooling2D((2, 2), - strides=(1, 1), - padding='valid', - data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [None, 4, 4, 5]) - - def testCreateMaxPooling1D(self): - width = 7 - channels = 3 - images = tf.random.uniform((5, width, channels)) - layer = pooling_layers.MaxPooling1D(2, strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, width // 2, channels]) - - def testCreateAveragePooling1D(self): - width = 7 - channels = 3 - images = tf.random.uniform((5, width, channels)) - layer = pooling_layers.AveragePooling1D(2, strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, width // 2, channels]) - - def testCreateMaxPooling1DChannelsFirst(self): - width = 7 - channels = 3 - images = tf.random.uniform((5, channels, width)) - layer = pooling_layers.MaxPooling1D( - 2, strides=2, data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, channels, width // 2]) - - def testCreateAveragePooling1DChannelsFirst(self): - width = 7 - channels = 3 - images = tf.random.uniform((5, channels, width)) - layer = pooling_layers.AveragePooling1D( - 2, strides=2, data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, channels, width // 2]) - - def testCreateMaxPooling3D(self): - depth, height, width = 6, 7, 9 - images = tf.random.uniform((5, depth, height, width, 4)) - layer = pooling_layers.MaxPooling3D([2, 2, 2], strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 4, 4]) - - def testCreateAveragePooling3D(self): - depth, height, width = 6, 7, 9 - images = tf.random.uniform((5, depth, height, width, 4)) - layer = pooling_layers.AveragePooling3D([2, 2, 2], strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 4, 4]) - - def testMaxPooling3DChannelsFirst(self): - depth, height, width = 6, 7, 9 - images = tf.random.uniform((5, 2, depth, height, width)) - layer = pooling_layers.MaxPooling3D( - [2, 2, 2], strides=2, data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 2, 3, 3, 4]) - - def testAveragePooling3DChannelsFirst(self): - depth, height, width = 6, 7, 9 - images = tf.random.uniform((5, 2, depth, height, width)) - layer = pooling_layers.AveragePooling3D( - [2, 2, 2], strides=2, data_format='channels_first') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 2, 3, 3, 4]) - - def testCreateMaxPooling2DIntegerPoolSize(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4)) - layer = pooling_layers.MaxPooling2D(2, strides=2) - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 3, 4, 4]) - - def testMaxPooling2DPaddingSame(self): - height, width = 7, 9 - images = tf.random.uniform((5, height, width, 4), seed=1) - layer = pooling_layers.MaxPooling2D( - images.get_shape()[1:3], strides=2, padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), [5, 4, 5, 4]) - - def testCreatePooling2DWithStrides(self): - height, width = 6, 8 - # Test strides tuple - images = tf.random.uniform((5, height, width, 3), seed=1) - layer = pooling_layers.MaxPooling2D([2, 2], strides=(2, 2), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 3]) - - # Test strides integer - layer = pooling_layers.MaxPooling2D([2, 2], strides=2, padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 3]) - - # Test unequal strides - layer = pooling_layers.MaxPooling2D([2, 2], strides=(2, 1), padding='same') - output = layer(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width, 3]) - - -if __name__ == '__main__': - tf.test.main() +class PoolingTest(tf.test.TestCase): + def testInvalidDataFormat(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "data_format"): + pooling_layers.max_pooling2d( + images, 3, strides=2, data_format="invalid" + ) + + def testInvalidStrides(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "strides"): + pooling_layers.max_pooling2d(images, 3, strides=(1, 2, 3)) + + with self.assertRaisesRegex(ValueError, "strides"): + pooling_layers.max_pooling2d(images, 3, strides=None) + + def testInvalidPoolSize(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 3), seed=1) + with self.assertRaisesRegex(ValueError, "pool_size"): + pooling_layers.max_pooling2d(images, (1, 2, 3), strides=2) + + with self.assertRaisesRegex(ValueError, "pool_size"): + pooling_layers.max_pooling2d(images, None, strides=2) + + def testCreateMaxPooling2D(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = pooling_layers.MaxPooling2D([2, 2], strides=2) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 3, 4, 4]) + + def testCreateAveragePooling2D(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = pooling_layers.AveragePooling2D([2, 2], strides=2) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 3, 4, 4]) + + @tf_test_utils.run_deprecated_v1 + def testCreateMaxPooling2DChannelsFirst(self): + height, width = 7, 9 + images = tf.random.uniform((5, 2, height, width)) + layer = pooling_layers.MaxPooling2D( + [2, 2], strides=1, data_format="channels_first" + ) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 2, 6, 8]) + + @tf_test_utils.run_deprecated_v1 + def testCreateAveragePooling2DChannelsFirst(self): + height, width = 5, 6 + images = tf.random.uniform((3, 4, height, width)) + layer = pooling_layers.AveragePooling2D( + (2, 2), + strides=(1, 1), + padding="valid", + data_format="channels_first", + ) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [3, 4, 4, 5]) + + @tf_test_utils.run_deprecated_v1 + def testCreateAveragePooling2DChannelsFirstWithNoneBatch(self): + height, width = 5, 6 + images = tf.compat.v1.placeholder( + dtype="float32", shape=(None, 4, height, width) + ) + layer = pooling_layers.AveragePooling2D( + (2, 2), + strides=(1, 1), + padding="valid", + data_format="channels_first", + ) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [None, 4, 4, 5]) + + def testCreateMaxPooling1D(self): + width = 7 + channels = 3 + images = tf.random.uniform((5, width, channels)) + layer = pooling_layers.MaxPooling1D(2, strides=2) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, width // 2, channels] + ) + + def testCreateAveragePooling1D(self): + width = 7 + channels = 3 + images = tf.random.uniform((5, width, channels)) + layer = pooling_layers.AveragePooling1D(2, strides=2) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, width // 2, channels] + ) + + def testCreateMaxPooling1DChannelsFirst(self): + width = 7 + channels = 3 + images = tf.random.uniform((5, channels, width)) + layer = pooling_layers.MaxPooling1D( + 2, strides=2, data_format="channels_first" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, channels, width // 2] + ) + + def testCreateAveragePooling1DChannelsFirst(self): + width = 7 + channels = 3 + images = tf.random.uniform((5, channels, width)) + layer = pooling_layers.AveragePooling1D( + 2, strides=2, data_format="channels_first" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, channels, width // 2] + ) + + def testCreateMaxPooling3D(self): + depth, height, width = 6, 7, 9 + images = tf.random.uniform((5, depth, height, width, 4)) + layer = pooling_layers.MaxPooling3D([2, 2, 2], strides=2) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 4, 4]) + + def testCreateAveragePooling3D(self): + depth, height, width = 6, 7, 9 + images = tf.random.uniform((5, depth, height, width, 4)) + layer = pooling_layers.AveragePooling3D([2, 2, 2], strides=2) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 4, 4]) + + def testMaxPooling3DChannelsFirst(self): + depth, height, width = 6, 7, 9 + images = tf.random.uniform((5, 2, depth, height, width)) + layer = pooling_layers.MaxPooling3D( + [2, 2, 2], strides=2, data_format="channels_first" + ) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 2, 3, 3, 4]) + + def testAveragePooling3DChannelsFirst(self): + depth, height, width = 6, 7, 9 + images = tf.random.uniform((5, 2, depth, height, width)) + layer = pooling_layers.AveragePooling3D( + [2, 2, 2], strides=2, data_format="channels_first" + ) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 2, 3, 3, 4]) + + def testCreateMaxPooling2DIntegerPoolSize(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4)) + layer = pooling_layers.MaxPooling2D(2, strides=2) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 3, 4, 4]) + + def testMaxPooling2DPaddingSame(self): + height, width = 7, 9 + images = tf.random.uniform((5, height, width, 4), seed=1) + layer = pooling_layers.MaxPooling2D( + images.get_shape()[1:3], strides=2, padding="same" + ) + output = layer(images) + self.assertListEqual(output.get_shape().as_list(), [5, 4, 5, 4]) + + def testCreatePooling2DWithStrides(self): + height, width = 6, 8 + # Test strides tuple + images = tf.random.uniform((5, height, width, 3), seed=1) + layer = pooling_layers.MaxPooling2D( + [2, 2], strides=(2, 2), padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width / 2, 3] + ) + + # Test strides integer + layer = pooling_layers.MaxPooling2D([2, 2], strides=2, padding="same") + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width / 2, 3] + ) + + # Test unequal strides + layer = pooling_layers.MaxPooling2D( + [2, 2], strides=(2, 1), padding="same" + ) + output = layer(images) + self.assertListEqual( + output.get_shape().as_list(), [5, height / 2, width, 3] + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/legacy_tf_layers/variable_scope_shim.py b/keras/legacy_tf_layers/variable_scope_shim.py index b7ee69ac0396..5eaf3f2fc49e 100644 --- a/keras/legacy_tf_layers/variable_scope_shim.py +++ b/keras/legacy_tf_layers/variable_scope_shim.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -# pylint: disable=g-classes-have-attributes + """Contains a shim to allow using TF1 get_variable code in TF2.""" from __future__ import absolute_import from __future__ import division @@ -21,291 +21,158 @@ import contextlib import functools +import tensorflow.compat.v2 as tf + from keras.engine import base_layer from keras.utils import layer_utils from keras.utils import tf_inspect -import tensorflow.compat.v2 as tf +# isort: off from tensorflow.python.ops import variable_scope as vs from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export def as_shape(shape): - """Converts the given object to a TensorShape.""" - if isinstance(shape, tf.TensorShape): - return shape - else: - return tf.TensorShape(shape) + """Converts the given object to a TensorShape.""" + if isinstance(shape, tf.TensorShape): + return shape + else: + return tf.TensorShape(shape) def _is_callable_object(obj): - return hasattr(obj, "__call__") and tf_inspect.ismethod(obj.__call__) + return hasattr(obj, "__call__") and tf_inspect.ismethod(obj.__call__) def _has_kwargs(fn): - """Returns whether the passed callable has **kwargs in its signature. + """Returns whether the passed callable has **kwargs in its signature. - Args: - fn: Function, or function-like object (e.g., result of `functools.partial`). + Args: + fn: Function, or function-like object (e.g., result of + `functools.partial`). - Returns: - `bool`: if `fn` has **kwargs in its signature. + Returns: + `bool`: if `fn` has **kwargs in its signature. - Raises: - `TypeError`: If fn is not a Function, or function-like object. - """ - if isinstance(fn, functools.partial): - fn = fn.func - elif _is_callable_object(fn): - fn = fn.__call__ - elif not callable(fn): - raise TypeError( - "fn should be a function-like object, but is of type {}.".format( - type(fn))) - return tf_inspect.getfullargspec(fn).varkw is not None + Raises: + `TypeError`: If fn is not a Function, or function-like object. + """ + if isinstance(fn, functools.partial): + fn = fn.func + elif _is_callable_object(fn): + fn = fn.__call__ + elif not callable(fn): + raise TypeError( + f"fn should be a function-like object, but is of type {type(fn)}." + ) + return tf_inspect.getfullargspec(fn).varkw is not None def fn_args(fn): - """Get argument names for function-like object. - - Args: - fn: Function, or function-like object (e.g., result of `functools.partial`). - - Returns: - `tuple` of string argument names. - - Raises: - ValueError: if partial function has positionally bound arguments - """ - if isinstance(fn, functools.partial): - args = fn_args(fn.func) - args = [a for a in args[len(fn.args):] if a not in (fn.keywords or [])] - else: - if hasattr(fn, "__call__") and tf_inspect.ismethod(fn.__call__): - fn = fn.__call__ - args = tf_inspect.getfullargspec(fn).args - if _is_bound_method(fn) and args: - # If it's a bound method, it may or may not have a self/cls first - # argument; for example, self could be captured in *args. - # If it does have a positional argument, it is self/cls. - args.pop(0) - return tuple(args) + """Get argument names for function-like object. + + Args: + fn: Function, or function-like object (e.g., result of + `functools.partial`). + + Returns: + `tuple` of string argument names. + + Raises: + ValueError: if partial function has positionally bound arguments + """ + if isinstance(fn, functools.partial): + args = fn_args(fn.func) + args = [a for a in args[len(fn.args) :] if a not in (fn.keywords or [])] + else: + if hasattr(fn, "__call__") and tf_inspect.ismethod(fn.__call__): + fn = fn.__call__ + args = tf_inspect.getfullargspec(fn).args + if _is_bound_method(fn) and args: + # If it's a bound method, it may or may not have a self/cls first + # argument; for example, self could be captured in *args. + # If it does have a positional argument, it is self/cls. + args.pop(0) + return tuple(args) def _is_bound_method(fn): - _, fn = tf.__internal__.decorator.unwrap(fn) - return tf_inspect.ismethod(fn) and (fn.__self__ is not None) + _, fn = tf.__internal__.decorator.unwrap(fn) + return tf_inspect.ismethod(fn) and (fn.__self__ is not None) def validate_synchronization_aggregation_trainable( - synchronization, aggregation, trainable, name): - """Given user-provided variable properties, sets defaults and validates.""" - if aggregation is None: - aggregation = tf.compat.v1.VariableAggregation.NONE - else: - if not isinstance(aggregation, - (tf.compat.v1.VariableAggregation, - tf.VariableAggregation)): - try: - aggregation = tf.VariableAggregation(aggregation) - except ValueError: - raise ValueError( - "Invalid variable aggregation mode: {} for variable: {}".format( - aggregation, name)) - if synchronization is None: - synchronization = tf.VariableSynchronization.AUTO - else: - try: - synchronization = tf.VariableSynchronization(synchronization) - except ValueError: - raise ValueError( - "Invalid variable synchronization mode: {} for variable: {}".format( - synchronization, name)) - if trainable is None: - trainable = synchronization != tf.VariableSynchronization.ON_READ - return synchronization, aggregation, trainable + synchronization, aggregation, trainable, name +): + """Given user-provided variable properties, sets defaults and validates.""" + if aggregation is None: + aggregation = tf.compat.v1.VariableAggregation.NONE + else: + if not isinstance( + aggregation, + (tf.compat.v1.VariableAggregation, tf.VariableAggregation), + ): + try: + aggregation = tf.VariableAggregation(aggregation) + except ValueError: + raise ValueError( + "Invalid variable aggregation mode: {} " + "for variable: {}".format(aggregation, name) + ) + if synchronization is None: + synchronization = tf.VariableSynchronization.AUTO + else: + try: + synchronization = tf.VariableSynchronization(synchronization) + except ValueError: + raise ValueError( + "Invalid variable synchronization mode: {} " + "for variable: {}".format(synchronization, name) + ) + if trainable is None: + trainable = synchronization != tf.VariableSynchronization.ON_READ + return synchronization, aggregation, trainable class _EagerVariableStore(tf.Module): - """TF2-compatible VariableStore that avoids collections & tracks regularizers. - - New variable names and new variables can be created; all stored - variables are initialized with the initializer passed to __init__. - - All variables get created in `tf.init_scope.` to avoid a bad - interaction between `tf.function` `FuncGraph` internals, Keras - Functional Models, and TPUStrategy variable initialization. - - Also, it always acts as if reuse is set to either "TRUE" or - tf.compat.v1.AUTO_REUSE - - Attributes: - vars: a dictionary with string names (same as passed in GetVar) as keys and - the corresponding TensorFlow Variables as values. - regularizers: a dictionary with string names as keys and the corresponding - callables that return losses as values. - layers: a dictionary with string names as keys and the corresponding - nested keras layers as values. - """ - - def __init__(self): - """Create a variable store.""" - self._vars = {} # A dictionary of the stored TensorFlow variables. - self._regularizers = {} # A dict mapping var names to their regularizers. - self._layers = {} # A dictionary of stored keras layers. - self._store_eager_variables = True - - @contextlib.contextmanager - def scope(self): - with vs.with_variable_store(self): - yield - - def get_variable( - self, - name, - shape=None, - dtype=tf.float32, - initializer=None, - regularizer=None, - reuse=None, - trainable=None, - collections=None, - caching_device=None, - partitioner=None, - validate_shape=True, - use_resource=None, - custom_getter=None, - constraint=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.compat.v1.VariableAggregation.NONE): - """Gets an existing variable with these parameters or create a new one. - - If a variable with the given name is already stored, we return the stored - variable. Otherwise, we create a new one. - - Set `reuse` to `True` when you only want to reuse existing Variables. - Set `reuse` to None (the default) or tf.compat.v1.AUTO_REUSE when you want - variables to be created if they don't exist or returned if they do. - In this shim, `reuse` of `False` will be treated as auto-reuse. - - If initializer is `None` (the default), the default initializer passed in - the constructor is used. If that one is `None` too, we use a new - `glorot_uniform_initializer`. If initializer is a Tensor, we use - it as a value and derive the shape from the initializer. - - If a partitioner is provided, a `PartitionedVariable` is returned. - Accessing this object as a `Tensor` returns the shards concatenated along - the partition axis. - - Some useful partitioners are available. See, e.g., - `variable_axis_size_partitioner` and `min_max_variable_partitioner`. + """TF2-safe VariableStore that avoids collections & tracks regularizers. - Args: - name: The name of the new or existing variable. - shape: Shape of the new or existing variable. - dtype: Type of the new or existing variable (defaults to `DT_FLOAT`). - initializer: Initializer for the variable. - regularizer: A (Tensor -> Tensor or None) function; the result of applying - it on a newly created variable will be added to the collection - GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. - reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation of - variables. When eager execution is enabled this argument is always - forced to be False. - trainable: If `True` also add the variable to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). `trainable` - defaults to `True`, unless `synchronization` is set to `ON_READ`, in - which case it defaults to `False`. - collections: List of graph collections keys to add the `Variable` to. - Defaults to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`). - caching_device: Optional device string or function describing where the - Variable should be cached for reading. Defaults to the Variable's - device. If not `None`, caches on another device. Typical use is to - cache on the device where the Ops using the `Variable` reside, to - deduplicate copying through `Switch` and other conditional statements. - partitioner: Optional callable that accepts a fully defined `TensorShape` - and dtype of the `Variable` to be created, and returns a list of - partitions for each axis (currently only one axis can be partitioned). - validate_shape: If False, allows the variable to be initialized with a - value of unknown shape. If True, the default, the shape of initial_value - must be known. - use_resource: If False, creates a regular Variable. If True, creates - instead an experimental ResourceVariable which has well-defined - semantics. Defaults to False (will later change to True). When eager - execution is enabled this argument is always forced to be true. - custom_getter: Callable that takes as a first argument the true getter, - and allows overwriting the internal get_variable method. The signature - of `custom_getter` should match that of this method, - but the most future-proof version will allow for changes: `def - custom_getter(getter, *args, **kwargs)`. Direct access to - all `get_variable` parameters is also allowed: `def - custom_getter(getter, name, *args, **kwargs)`. A simple identity - custom getter that simply creates variables with modified names is: - ```python - def custom_getter(getter, name, *args, **kwargs): return getter(name + - '_suffix', *args, **kwargs) ``` - constraint: An optional projection function to be applied to the variable - after being updated by an `Optimizer` (e.g. used to implement norm - constraints or value constraints for layer weights). The function must - take as input the unprojected Tensor representing the value of the - variable and return the Tensor for the projected value (which must have - the same shape). Constraints are not safe to use when doing asynchronous - distributed training. - synchronization: Indicates when a distributed a variable will be - aggregated. Accepted values are constants defined in the class - `tf.VariableSynchronization`. By default the synchronization is set to - `AUTO` and the current `DistributionStrategy` chooses when to - synchronize. - aggregation: Indicates how a distributed variable will be aggregated. - Accepted values are constants defined in the class - `tf.VariableAggregation`. + New variable names and new variables can be created; all stored + variables are initialized with the initializer passed to __init__. - Returns: - The created or existing `Variable` (or `PartitionedVariable`, if a - partitioner was used). + All variables get created in `tf.init_scope.` to avoid a bad + interaction between `tf.function` `FuncGraph` internals, Keras + Functional Models, and TPUStrategy variable initialization. - Raises: - ValueError: when creating a new variable and shape is not declared, - when reusing a variable and specifying a conflicting shape, - or when violating reuse during variable creation. - RuntimeError: when eager execution is enabled and not called from an - EagerVariableStore. + Also, it always acts as if reuse is set to either "TRUE" or + tf.compat.v1.AUTO_REUSE + + Attributes: + vars: a dictionary with string names (same as passed in GetVar) as keys + and the corresponding TensorFlow Variables as values. + regularizers: a dictionary with string names as keys and the corresponding + callables that return losses as values. + layers: a dictionary with string names as keys and the corresponding + nested keras layers as values. """ - if custom_getter is not None and not callable(custom_getter): - raise ValueError("Passed a custom_getter which is not callable: %s" % - custom_getter) - - with tf.init_scope(): - if tf.executing_eagerly(): - # Variable creation and initialization takes place in `init_scope`s; - # as such, if an `init_scope` lifts us into the eager context, then we - # need to use `ResourceVariable`s. - use_resource = True - - # Note that it's fine to reuse eager variables whose initialization was - # lifted from a function-building graph into the eager context (that's why - # the following clause is not wrapped in an `init_scope`); lifted variables - # are tracked by the graph's `VariableStore`. - if not reuse: - reuse = tf.compat.v1.AUTO_REUSE - - # If a *_ref type is passed in an error would be triggered further down the - # stack. We prevent this using base_dtype to get a non-ref version of the - # type, before doing anything else. When _ref types are removed in favor of - # resources, this line can be removed. - try: - dtype = dtype.base_dtype - except AttributeError: - # .base_dtype not existing means that we will try and use the raw dtype - # which was passed in - this might be a NumPy type which is valid. - pass - - # This is the main logic of get_variable. However, custom_getter - # may override this logic. So we save it as a callable and pass - # it to custom_getter. - # Note: the parameters of _true_getter, and their documentation, match - # *exactly* item-for-item with the docstring of this method. - def _true_getter( # pylint: disable=missing-docstring + + def __init__(self): + """Create a variable store.""" + self._vars = {} # A dictionary of the stored TensorFlow variables. + self._regularizers = ( + {} + ) # A dict mapping var names to their regularizers. + self._layers = {} # A dictionary of stored keras layers. + self._store_eager_variables = True + + @contextlib.contextmanager + def scope(self): + with vs.with_variable_store(self): + yield + + def get_variable( + self, name, shape=None, dtype=tf.float32, @@ -313,699 +180,907 @@ def _true_getter( # pylint: disable=missing-docstring regularizer=None, reuse=None, trainable=None, - collections=None, # pylint: disable=unused-argument + collections=None, caching_device=None, partitioner=None, validate_shape=True, - use_resource=None, # pylint: disable=unused-argument + use_resource=None, + custom_getter=None, constraint=None, synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.compat.v1.VariableAggregation.NONE): - # Partitioned variable currently unsupported w/ the shim - if partitioner is not None: - raise ValueError( - "`partitioner` arg for `get_variable` is unsupported in TF2." - "File a bug if you need help. You passed %s" % partitioner) - - # Single variable case - if "%s/part_0" % name in self._vars: - raise ValueError( - "No partitioner was provided, but a partitioned version of the " - "variable was found: %s/part_0. Perhaps a variable of the same " - "name was already created with partitioning?" % name) - - return self._get_single_variable( - name=name, - shape=shape, - dtype=dtype, - initializer=initializer, - regularizer=regularizer, - reuse=reuse, - trainable=trainable, - caching_device=caching_device, - validate_shape=validate_shape, - constraint=constraint, - synchronization=synchronization, - aggregation=aggregation) - - synchronization, aggregation, trainable = ( - validate_synchronization_aggregation_trainable( - synchronization, aggregation, trainable, name)) - - if custom_getter is not None: - # Handle backwards compatibility with getter arguments that were added - # to the API after users started writing custom getters. - custom_getter_kwargs = { - "getter": _true_getter, - "name": name, - "shape": shape, - "dtype": dtype, - "initializer": initializer, - "regularizer": regularizer, - "reuse": reuse, - "trainable": trainable, - "collections": collections, - "caching_device": caching_device, - "partitioner": partitioner, - "validate_shape": validate_shape, - "use_resource": use_resource, - "synchronization": synchronization, - "aggregation": aggregation, - } - # `fn_args` and `has_kwargs` can handle functions, `functools.partial`, - # `lambda`. - if ("constraint" in fn_args(custom_getter) or - _has_kwargs(custom_getter)): - custom_getter_kwargs["constraint"] = constraint - return custom_getter(**custom_getter_kwargs) - else: - return _true_getter( - name, - shape=shape, - dtype=dtype, - initializer=initializer, - regularizer=regularizer, - reuse=reuse, - trainable=trainable, - collections=collections, - caching_device=caching_device, - partitioner=partitioner, - validate_shape=validate_shape, - use_resource=use_resource, - constraint=constraint, - synchronization=synchronization, - aggregation=aggregation) - - def _get_single_variable( - self, - name, - shape=None, - dtype=tf.float32, - initializer=None, - regularizer=None, - partition_info=None, - reuse=None, - trainable=None, - caching_device=None, - validate_shape=True, - constraint=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.compat.v1.VariableAggregation.NONE): - """Get or create a single Variable (e.g. - - a shard or entire variable). - - See the documentation of get_variable above (ignore partitioning components) - for details. + aggregation=tf.compat.v1.VariableAggregation.NONE, + ): + """Gets an existing variable with these parameters or create a new one. + + If a variable with the given name is already stored, we return the + stored variable. Otherwise, we create a new one. + + Set `reuse` to `True` when you only want to reuse existing Variables. + Set `reuse` to None (the default) or tf.compat.v1.AUTO_REUSE when you + want variables to be created if they don't exist or returned if they do. + In this shim, `reuse` of `False` will be treated as auto-reuse. + + If initializer is `None` (the default), the default initializer passed + in the constructor is used. If that one is `None` too, we use a new + `glorot_uniform_initializer`. If initializer is a Tensor, we use it as a + value and derive the shape from the initializer. + + If a partitioner is provided, a `PartitionedVariable` is returned. + Accessing this object as a `Tensor` returns the shards concatenated + along the partition axis. + + Some useful partitioners are available. See, e.g., + `variable_axis_size_partitioner` and `min_max_variable_partitioner`. + + Args: + name: The name of the new or existing variable. + shape: Shape of the new or existing variable. + dtype: Type of the new or existing variable. Defaults to `DT_FLOAT`. + initializer: Initializer for the variable. + regularizer: A (Tensor -> Tensor or None) function; the result of + applying it on a newly created variable will be added to the + collection GraphKeys.REGULARIZATION_LOSSES and can be used for + regularization. + reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation + of variables. When eager execution is enabled this argument is + always forced to be False. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). `trainable` + becomes `True`, unless `synchronization` is set to `ON_READ`, in + which case it becomes `False`. Defaults to `True`. + collections: List of graph collections keys to add the `Variable` to. + Defaults to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`). + caching_device: Optional device string or function describing where + the Variable should be cached for reading. `None` to use the + Variable's device. If not `None`, caches on another device. + Typical use is to cache on the device where the Ops using the + `Variable` reside, to deduplicate copying through `Switch` and other + conditional statements. Defaults to `None`. + partitioner: Optional callable that accepts a fully defined + `TensorShape` and dtype of the `Variable` to be created, and returns + a list of partitions for each axis (currently only one axis can be + partitioned). + validate_shape: If False, allows the variable to be initialized with a + value of unknown shape. If True, the default, the shape of + initial_value must be known. + use_resource: If False, creates a regular Variable. If True, creates + instead an experimental ResourceVariable which has well-defined + semantics. When starting off as False it will later change to True. + When eager execution is enabled this argument always True. + Defaults to `False`. + custom_getter: Callable that takes as a first argument the true + getter, and allows overwriting the internal get_variable method. The + signature of `custom_getter` should match that of this method, but + the most future-proof version will allow for changes: + `def custom_getter(getter, *args, **kwargs)`. + Direct access to all `get_variable` parameters is also allowed: + `def custom_getter(getter, name, *args, **kwargs)`. + A simple identity custom getter that simply creates variables with + modified names is: + ```python + def custom_getter(getter, name, *args, **kwargs): + return getter(name + '_suffix', *args, **kwargs) + ``` + constraint: An optional projection function to be applied to the + variable after being updated by an `Optimizer` (e.g. used to + implement norm constraints or value constraints for layer weights). + The function must take as input the unprojected Tensor representing + the value of the variable and return the Tensor for the projected + value (which must have the same shape). Constraints are not safe to + use when doing asynchronous distributed training. + synchronization: Indicates when a distributed a variable will be + aggregated. Accepted values are constants defined in the class + `tf.VariableSynchronization`. By default the synchronization is set + to `AUTO` and the current `DistributionStrategy` chooses when to + synchronize. + aggregation: Indicates how a distributed variable will be aggregated. + Accepted values are constants defined in the class + `tf.VariableAggregation`. + + Returns: + The created or existing `Variable` (or `PartitionedVariable`, if a + partitioner was used). + + Raises: + ValueError: when creating a new variable and shape is not declared, + when reusing a variable and specifying a conflicting shape, + or when violating reuse during variable creation. + RuntimeError: when eager execution is enabled and not called from an + EagerVariableStore. + """ + if custom_getter is not None and not callable(custom_getter): + raise ValueError( + f"Passed a custom_getter which is not callable: {custom_getter}" + ) + + with tf.init_scope(): + if tf.executing_eagerly(): + # Variable creation and initialization takes place in + # `init_scope`s; as such, if an `init_scope` lifts us into the + # eager context, then we need to use `ResourceVariable`s. + use_resource = True + + # Note that it's fine to reuse eager variables whose initialization was + # lifted from a function-building graph into the eager context (that's + # why the following clause is not wrapped in an `init_scope`); lifted + # variables are tracked by the graph's `VariableStore`. + if not reuse: + reuse = tf.compat.v1.AUTO_REUSE + + # If a *_ref type is passed in an error would be triggered further down + # the stack. We prevent this using base_dtype to get a non-ref version + # of the type, before doing anything else. When _ref types are removed + # in favor of resources, this line can be removed. + try: + dtype = dtype.base_dtype + except AttributeError: + # .base_dtype not existing means that we will try and use the raw + # dtype which was passed in - this might be a NumPy type which is + # valid. + pass + + # This is the main logic of get_variable. However, custom_getter + # may override this logic. So we save it as a callable and pass + # it to custom_getter. + # Note: the parameters of _true_getter, and their documentation, match + # *exactly* item-for-item with the docstring of this method. + def _true_getter( + name, + shape=None, + dtype=tf.float32, + initializer=None, + regularizer=None, + reuse=None, + trainable=None, + collections=None, + caching_device=None, + partitioner=None, + validate_shape=True, + use_resource=None, + constraint=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.compat.v1.VariableAggregation.NONE, + ): + # Partitioned variable currently unsupported w/ the shim + if partitioner is not None: + raise ValueError( + "`partitioner` arg for `get_variable` is unsupported in " + "TF2. File a bug if you need help. " + "You passed %s" % partitioner + ) + + # Single variable case + if f"{name}/part_0" in self._vars: + raise ValueError( + "No partitioner was provided, but a partitioned version of " + "the variable was found: %s/part_0. Perhaps a variable of " + "the same name was already created with " + "partitioning?" % name + ) + + return self._get_single_variable( + name=name, + shape=shape, + dtype=dtype, + initializer=initializer, + regularizer=regularizer, + reuse=reuse, + trainable=trainable, + caching_device=caching_device, + validate_shape=validate_shape, + constraint=constraint, + synchronization=synchronization, + aggregation=aggregation, + ) + + ( + synchronization, + aggregation, + trainable, + ) = validate_synchronization_aggregation_trainable( + synchronization, aggregation, trainable, name + ) + + if custom_getter is not None: + # Handle backwards compatibility with getter arguments that were + # added to the API after users started writing custom getters. + custom_getter_kwargs = { + "getter": _true_getter, + "name": name, + "shape": shape, + "dtype": dtype, + "initializer": initializer, + "regularizer": regularizer, + "reuse": reuse, + "trainable": trainable, + "collections": collections, + "caching_device": caching_device, + "partitioner": partitioner, + "validate_shape": validate_shape, + "use_resource": use_resource, + "synchronization": synchronization, + "aggregation": aggregation, + } + # `fn_args` and `has_kwargs` can handle functions, + # `functools.partial`, `lambda`. + if "constraint" in fn_args(custom_getter) or _has_kwargs( + custom_getter + ): + custom_getter_kwargs["constraint"] = constraint + return custom_getter(**custom_getter_kwargs) + else: + return _true_getter( + name, + shape=shape, + dtype=dtype, + initializer=initializer, + regularizer=regularizer, + reuse=reuse, + trainable=trainable, + collections=collections, + caching_device=caching_device, + partitioner=partitioner, + validate_shape=validate_shape, + use_resource=use_resource, + constraint=constraint, + synchronization=synchronization, + aggregation=aggregation, + ) + + def _get_single_variable( + self, + name, + shape=None, + dtype=tf.float32, + initializer=None, + regularizer=None, + partition_info=None, + reuse=None, + trainable=None, + caching_device=None, + validate_shape=True, + constraint=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.compat.v1.VariableAggregation.NONE, + ): + """Get or create a single Variable (e.g. a shard or entire variable). + + See the documentation of get_variable above (ignore partitioning + components) for details. + + Args: + name: see get_variable. + shape: see get_variable. + dtype: see get_variable. + initializer: see get_variable. + regularizer: see get_variable. + partition_info: _PartitionInfo object. + reuse: see get_variable. + trainable: see get_variable. + caching_device: see get_variable. + validate_shape: see get_variable. + constraint: see get_variable. + synchronization: see get_variable. + aggregation: see get_variable. + + Returns: + A Variable. See documentation of get_variable above. + + Raises: + ValueError: See documentation of get_variable above. + """ + # Set to true if initializer is a constant. + initializing_from_value = False + if initializer is not None and not callable(initializer): + initializing_from_value = True + if shape is not None and initializing_from_value: + raise ValueError( + "If initializer is a constant, do not specify shape." + ) + + dtype = tf.as_dtype(dtype) + shape = as_shape(shape) + + if name in self._vars: + # Here we handle the case when returning an existing variable. + found_var = self._vars[name] + if not shape.is_compatible_with(found_var.get_shape()): + raise ValueError( + "Trying to share variable %s, but specified shape %s" + " and found shape %s." + % (name, shape, found_var.get_shape()) + ) + if not dtype.is_compatible_with(found_var.dtype): + dtype_str = dtype.name + found_type_str = found_var.dtype.name + raise ValueError( + "Trying to share variable %s, but specified dtype %s" + " and found dtype %s." % (name, dtype_str, found_type_str) + ) + return found_var + + # The code below handles only the case of creating a new variable. + if reuse is True: + raise ValueError( + "Variable %s does not exist, or was not created with " + "tf.get_variable(). Did you mean to set " + "reuse=tf.AUTO_REUSE in VarScope?" % name + ) + + # Create the tensor to initialize the variable with default value. + if initializer is None: + ( + initializer, + initializing_from_value, + ) = self._get_default_initializer( + name=name, shape=shape, dtype=dtype + ) + # Enter an init scope when creating the initializer. + with tf.init_scope(): + if initializing_from_value: + init_val = initializer + variable_dtype = None + else: + # Instantiate initializer if provided initializer is a type + # object. + if tf_inspect.isclass(initializer): + initializer = initializer() + if shape.is_fully_defined(): + if ( + "partition_info" + in tf_inspect.getargspec(initializer).args + ): + init_val = functools.partial( + initializer, + shape.as_list(), + dtype=dtype, + partition_info=partition_info, + ) + else: + init_val = functools.partial( + initializer, shape.as_list(), dtype=dtype + ) + variable_dtype = dtype.base_dtype + else: + init_val = initializer + variable_dtype = None + + # Create the variable (Always eagerly as a workaround for a strange + # tpu / funcgraph / keras functional model interaction ) + with tf.init_scope(): + v = tf.Variable( + initial_value=init_val, + name=name, + trainable=trainable, + caching_device=caching_device, + dtype=variable_dtype, + validate_shape=validate_shape, + constraint=constraint, + synchronization=synchronization, + aggregation=aggregation, + ) + + self._vars[name] = v + logging.vlog( + 1, + "Created variable %s with shape %s and init %s", + v.name, + format(shape), + initializer, + ) + + # Run the regularizer if requested and save the resulting loss. + if regularizer: + self.add_regularizer(v, regularizer) + + return v + + def get_or_create_layer(self, name, create_layer_method): + if name not in self._layers: + layer = create_layer_method() + self._layers[name] = layer + if isinstance(layer, base_layer.Layer): + self._regularizers[name] = lambda: tf.math.reduce_sum( + layer.losses + ) + return self._layers[name] + + def add_regularizer(self, var, regularizer): + self._regularizers[var.name] = functools.partial(regularizer, var) + + # Initialize variable when no initializer provided + def _get_default_initializer(self, name, shape=None, dtype=tf.float32): + """Provide a default initializer and a corresponding value. + + Args: + name: see get_variable. + shape: see get_variable. + dtype: see get_variable. + + Returns: + initializer and initializing_from_value. See get_variable above. + + Raises: + ValueError: When giving unsupported dtype. + """ + del shape + # If dtype is DT_FLOAT, provide a uniform unit scaling initializer + if dtype.is_floating: + initializer = tf.compat.v1.glorot_uniform_initializer() + initializing_from_value = False + # If dtype is DT_INT/DT_UINT, provide a default value `zero` + # If dtype is DT_BOOL, provide a default value `FALSE` + elif ( + dtype.is_integer + or dtype.is_unsigned + or dtype.is_bool + or dtype == tf.string + ): + initializer = tf.compat.v1.zeros_initializer() + initializing_from_value = False + # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX + # here? + else: + raise ValueError( + "An initializer for variable %s of %s is required" + % (name, dtype.base_dtype) + ) - Args: - name: see get_variable. - shape: see get_variable. - dtype: see get_variable. - initializer: see get_variable. - regularizer: see get_variable. - partition_info: _PartitionInfo object. - reuse: see get_variable. - trainable: see get_variable. - caching_device: see get_variable. - validate_shape: see get_variable. - constraint: see get_variable. - synchronization: see get_variable. - aggregation: see get_variable. + return initializer, initializing_from_value - Returns: - A Variable. See documentation of get_variable above. - Raises: - ValueError: See documentation of get_variable above. - """ - # Set to true if initializer is a constant. - initializing_from_value = False - if initializer is not None and not callable(initializer): - initializing_from_value = True - if shape is not None and initializing_from_value: - raise ValueError("If initializer is a constant, do not specify shape.") - - dtype = tf.as_dtype(dtype) - shape = as_shape(shape) - - if name in self._vars: - # Here we handle the case when returning an existing variable. - found_var = self._vars[name] - if not shape.is_compatible_with(found_var.get_shape()): - raise ValueError("Trying to share variable %s, but specified shape %s" - " and found shape %s." % - (name, shape, found_var.get_shape())) - if not dtype.is_compatible_with(found_var.dtype): - dtype_str = dtype.name - found_type_str = found_var.dtype.name - raise ValueError("Trying to share variable %s, but specified dtype %s" - " and found dtype %s." % - (name, dtype_str, found_type_str)) - return found_var - - # The code below handles only the case of creating a new variable. - if reuse is True: # pylint: disable=g-bool-id-comparison - raise ValueError("Variable %s does not exist, or was not created with " - "tf.get_variable(). Did you mean to set " - "reuse=tf.AUTO_REUSE in VarScope?" % name) - - # Create the tensor to initialize the variable with default value. - if initializer is None: - initializer, initializing_from_value = self._get_default_initializer( - name=name, shape=shape, dtype=dtype) - # Enter an init scope when creating the initializer. - with tf.init_scope(): - if initializing_from_value: - init_val = initializer - variable_dtype = None - else: - # Instantiate initializer if provided initializer is a type object. - if tf_inspect.isclass(initializer): - initializer = initializer() - if shape.is_fully_defined(): - if "partition_info" in tf_inspect.getargspec(initializer).args: - init_val = functools.partial(initializer, - shape.as_list(), - dtype=dtype, - partition_info=partition_info) - else: - init_val = functools.partial(initializer, - shape.as_list(), dtype=dtype) - variable_dtype = dtype.base_dtype - else: - init_val = initializer - variable_dtype = None - - # Create the variable (Always eagerly as a workaround for a strange - # tpu / funcgraph / keras functional model interaction ) - with tf.init_scope(): - v = tf.Variable( - initial_value=init_val, - name=name, - trainable=trainable, - caching_device=caching_device, - dtype=variable_dtype, - validate_shape=validate_shape, - constraint=constraint, - synchronization=synchronization, - aggregation=aggregation) - - self._vars[name] = v - logging.vlog(1, "Created variable %s with shape %s and init %s", v.name, - format(shape), initializer) - - # Run the regularizer if requested and save the resulting loss. - if regularizer: - self.add_regularizer(v, regularizer) - - return v - - def get_or_create_layer(self, name, create_layer_method): - if name not in self._layers: - layer = create_layer_method() - self._layers[name] = layer - if isinstance(layer, base_layer.Layer): - self._regularizers[name] = lambda: tf.math.reduce_sum(layer.losses) - return self._layers[name] - - def add_regularizer(self, var, regularizer): - self._regularizers[var.name] = functools.partial(regularizer, var) - - # Initialize variable when no initializer provided - def _get_default_initializer(self, name, shape=None, dtype=tf.float32): - """Provide a default initializer and a corresponding value. +@keras_export(v1=["keras.utils.track_tf1_style_variables"]) +def track_tf1_style_variables(method): + """Wrap layer & module methods in this decorator to capture tf1-style + weights. + + Decorating a `tf.keras.Layer`'s or `tf.Module`'s methods with this + decorator will cause the layer/module to track weights created/used + via `tf.compat.v1.get_variable` (and by extension `tf.compat.v1.layers`) + inside the decorated method. + + In addition to tracking the weights themselves under the standard + `layer.variable`/`module.variable`/etc. properties, if the method belongs + to a `tf.keras.Layer` then any regularization losses specified via the + `get_variable` or `tf.compat.v1.layers` regularizer arguments will get + tracked by the layer under the standard `layer.losses` property. + + This tracking enables using large classes of TF1-style model-forward-pass + code inside of Keras layers or `tf.Modules` in TF2 with TF2 behaviors + enabled. + + Example of capturing tf.compat.v1.layer-based modeling code as a Keras + layer: + + ```python + class WrappedDoubleDenseLayer(tf.keras.layers.Layer): + + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + @tf.compat.v1.keras.utils.track_tf1_style_variables + def call(self, inputs): + with tf.compat.v1.variable_scope("double_dense_layer"): + out = tf.compat.v1.layers.dense( + inputs, self.units, name="dense_one", + kernel_initializer=tf.compat.v1.random_normal_initializer, + kernel_regularizer="l2") + out = tf.compat.v1.layers.dense( + out, self.units, name="dense_two", + kernel_initializer=tf.compat.v1.random_normal_initializer(), + kernel_regularizer="l2") + return out + + # Create a layer that can be used as a standard keras layer + layer = WrappedDoubleDenseLayer(10) + + # call the layer on inputs + layer(...) + + # Variables created/used within the scope will be tracked by the layer + layer.weights + layer.trainable_variables + + # Regularization losses will be captured in layer.losses after a call, + # just like any other Keras layer + reg_losses = layer.losses + ``` + + Example of capturing tf.compat.v1.get_variable-based modeling code as + a Keras layer: + + ```python + class WrappedDoubleDenseLayer(tf.keras.layers.Layer): + + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + @tf.compat.v1.keras.utils.track_tf1_style_variables + def call(self, inputs): + out = inputs + with tf.compat.v1.variable_scope("double_dense_layer"): + with tf.compat.v1.variable_scope("dense_one"): + # The weights are created with a `regularizer`, + # so the layer should track their regularization losses + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=init_ops.ones_initializer(), + name="kernel") + bias = tf.compat.v1.get_variable( + shape=[self.units,], + initializer=init_ops.zeros_initializer(), + name="bias") + out = tf.compat.v1.math.matmul(out, kernel) + out = tf.compat.v1.nn.bias_add(out, bias) + with tf.compat.v1.variable_scope("dense_two"): + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=init_ops.ones_initializer(), + name="kernel") + bias = tf.compat.v1.get_variable( + shape=[self.units,], + initializer=init_ops.zeros_initializer(), + name="bias") + out = tf.compat.v1.math.matmul(out, kernel) + out = tf.compat.v1.nn.bias_add(out, bias) + return out + + # Create a layer that can be used as a standard keras layer + layer = WrappedDoubleDenseLayer(10) + + # call the layer on inputs + layer(...) + + # Variables created/used within the scope will be tracked by the layer + layer.weights + layer.trainable_variables + + # Regularization losses will be captured in layer.losses after a call, + # just like any other Keras layer + reg_losses = layer.losses + ``` + + Regularization losses: + Any regularizers specified in the `get_variable` calls or + `compat.v1.layer` creations will get captured if they occur in your + decorated method and the method belongs to a + `tf.keras.Layer`/`tf.keras.Module`. Regularization losses + are accessible in `layer.losses` after a call just like in a standard + Keras layer, and will be captured by any model that includes this layer. + Regularization losses attached to Keras layers/models set as attributes + of your layer will also get captured in the standard Keras regularization + loss tracking. + + (While Modules have no `losses` property, no-arg callables to compute + the regularization losses may be tracked as dict values in a private + `module._tf1_style_var_store._regularizers` property, but only for + `tf.compat.v1.layers` and `get_variable` weights and not for any other + nested Keras layers/tf.Modules) + + Variable scope / variable reuse: + variable-scope based reuse in your decorated method will be respected, + and work like variable-scope based reuse in TF1. + + Variable Names/Pre-trained checkpoint loading: + Variable naming from get_variable and `compat.v1.layer` layers will match + the TF1 names, so you should be able to re-use your old name-based + checkpoints. Variable naming for Keras layers/models or for variables + created by `tf.Variable` may change when going to eager execution. + + Training Arg if you decorate `layer.call`: + Keras will pass a `training` arg to this layer if `call` contains + a `training` arg or a `**kwargs` varargs in its call signature, + similarly to how keras passes `training` to other layers in TF2 that have + similar signatures in their `call` implementations. + See more details in the docs + on `tf.keras.layers.Layer` to understand what will be passed and when. + Note: tf.compat.v1.layers are usually not called with `training=None`, + so the training arg to `forward_pass` might not feed through to them + unless you pass it to their calls explicitly. + + Caveats: + * TF2 will not prune unused variable updates (or unused outputs). You may + need to adjust your forward pass code to avoid computations or variable + updates that you don't intend to use. + * Avoid Nesting variable creation in tf.function inside of + methods decorated with `track_tf1_style_variables` + While the method may safely be used from inside a `tf.function`, using + a function inside of a decorated method may break the variable scoping. + * This decorator only adds implicit tracking for legacy tf1-style + get_variable / compat.v1.layers usage. + If you would like to use nested Keras layers/models + inside the decorated method, you need to + assign them as attributes of your layer so that Keras/Module's standard + object-oriented weights (and loss tracking for layers) will kick in. + See the intro to modules, layers, and models + [guide](https://www.tensorflow.org/guide/intro_to_modules) for more + info. As a backup, the `compat.v1.keras.utils.get_or_create_layer` + method will ease tracking nested keras model weights and losses for + existing TF1 code, but new code should use explicit tracking. Args: - name: see get_variable. - shape: see get_variable. - dtype: see get_variable. + method: The method to decorate. This should belong to a custom tf.Module, + tf.keras.layers.Layer, or tf.keras.Model. Returns: - initializer and initializing_from_value. See get_variable above. - - Raises: - ValueError: When giving unsupported dtype. + The decorated method. """ - del shape - # If dtype is DT_FLOAT, provide a uniform unit scaling initializer - if dtype.is_floating: - initializer = tf.compat.v1.glorot_uniform_initializer() - initializing_from_value = False - # If dtype is DT_INT/DT_UINT, provide a default value `zero` - # If dtype is DT_BOOL, provide a default value `FALSE` - elif (dtype.is_integer or dtype.is_unsigned or dtype.is_bool or - dtype == tf.string): - initializer = tf.compat.v1.zeros_initializer() - initializing_from_value = False - # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here? - else: - raise ValueError("An initializer for variable %s of %s is required" % - (name, dtype.base_dtype)) - return initializer, initializing_from_value + def _method_wrapper(self, *args, **kwargs): + var_store = getattr(self, "_tf1_style_var_store", None) + if not var_store: + if not isinstance(self, tf.Module): + # Raise an error if you incorrectly decorate a method + # that is not a method of a Module, Layer, or Model: + raise ValueError( + "`@tf.compat.v1.keras.utils.track_tf1_layers_and_variables`" + " must be applied to a method of a subclassed `tf.Module`, " + "`tf.keras.layers.Layer`, or `tf.keras.Model` and which " + "takes `self` as the first argument. But, the first " + "argument passed to the decorated method was {}, which " + "does not extend Module, Layer, or Model.".format(self) + ) + var_store = _EagerVariableStore() + self._tf1_style_var_store = var_store + + existing_regularized_variables = set(var_store._regularizers.keys()) + with var_store.scope(): + out = method(self, *args, **kwargs) + + # If this is a layer method, add the regularization losses + # to the layer for any newly-created regularized variables + if isinstance(self, base_layer.Layer): + for ( + var_name, + regularizer, + ) in var_store._regularizers.items(): + if var_name not in existing_regularized_variables: + self.add_loss(regularizer) + + return out + + return tf.__internal__.decorator.make_decorator( + target=method, decorator_func=_method_wrapper + ) -@keras_export(v1=["keras.utils.track_tf1_style_variables"]) -def track_tf1_style_variables(method): - """Wrap layer & module methods in this decorator to capture tf1-style weights. - - Decorating a `tf.keras.Layer`'s or `tf.Module`'s methods with this - decorator will cause the layer/module to track weights created/used - via `tf.compat.v1.get_variable` (and by extension `tf.compat.v1.layers`) - inside the decorated method. - - In addition to tracking the weights themselves under the standard - `layer.variable`/`module.variable`/etc. properties, if the method belongs - to a `tf.keras.Layer` then any regularization losses specified via the - `get_variable` or `tf.compat.v1.layers` regularizer arguments will get - tracked by the layer under the standard `layer.losses` property. - - This tracking enables using large classes of TF1-style model-forward-pass - code inside of Keras layers or `tf.Modules` in TF2 with TF2 behaviors enabled. - - Example of capturing tf.compat.v1.layer-based modeling code as a Keras layer: - - ```python - class WrappedDoubleDenseLayer(tf.keras.layers.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - @tf.compat.v1.keras.utils.track_tf1_style_variables - def call(self, inputs): - with tf.compat.v1.variable_scope("double_dense_layer"): - out = tf.compat.v1.layers.dense( - inputs, self.units, name="dense_one", - kernel_initializer=tf.compat.v1.random_normal_initializer, - kernel_regularizer="l2") - out = tf.compat.v1.layers.dense( - out, self.units, name="dense_two", - kernel_initializer=tf.compat.v1.random_normal_initializer(), - kernel_regularizer="l2") - return out - - # Create a layer that can be used as a standard keras layer - layer = WrappedDoubleDenseLayer(10) - - # call the layer on inputs - layer(...) - - # Variables created/used within the scope will be tracked by the layer - layer.weights - layer.trainable_variables - - # Regularization losses will be captured in layer.losses after a call, - # just like any other Keras layer - reg_losses = layer.losses - ``` - - Example of capturing tf.compat.v1.get_variable-based modeling code as - a Keras layer: - - ```python - class WrappedDoubleDenseLayer(tf.keras.layers.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - @tf.compat.v1.keras.utils.track_tf1_style_variables - def call(self, inputs): - out = inputs - with tf.compat.v1.variable_scope("double_dense_layer"): - with tf.compat.v1.variable_scope("dense_one"): - # The weights are created with a `regularizer`, - # so the layer should track their regularization losses - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=init_ops.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=init_ops.zeros_initializer(), - name="bias") - out = tf.compat.v1.math.matmul(out, kernel) - out = tf.compat.v1.nn.bias_add(out, bias) - with tf.compat.v1.variable_scope("dense_two"): - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=init_ops.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=init_ops.zeros_initializer(), - name="bias") - out = tf.compat.v1.math.matmul(out, kernel) - out = tf.compat.v1.nn.bias_add(out, bias) - return out - - # Create a layer that can be used as a standard keras layer - layer = WrappedDoubleDenseLayer(10) - - # call the layer on inputs - layer(...) - - # Variables created/used within the scope will be tracked by the layer - layer.weights - layer.trainable_variables - - # Regularization losses will be captured in layer.losses after a call, - # just like any other Keras layer - reg_losses = layer.losses - ``` - - Regularization losses: - Any regularizers specified in the `get_variable` calls or `compat.v1.layer` - creations will get captured if they occur in your decorated method - and the method belongs to a `tf.keras.Layer`/`tf.keras.Module`. - Regularization losses - are accessible in `layer.losses` after a call just like in a standard - Keras layer, and will be captured by any model that includes this layer. - Regularization losses attached to Keras layers/models set as attributes - of your layer will also get captured in the standard Keras regularization - loss tracking. - - (While Modules have no `losses` property, no-arg callables to compute - the regularization losses may be tracked as dict values in a private - `module._tf1_style_var_store._regularizers` property, but only for - `tf.compat.v1.layers` and `get_variable` weights and not for any other - nested Keras layers/tf.Modules) - - Variable scope / variable reuse: - variable-scope based reuse in your decorated method will be respected, - and work like variable-scope based reuse in TF1. - - Variable Names/Pre-trained checkpoint loading: - Variable naming from get_variable and `compat.v1.layer` layers will match - the TF1 names, so you should be able to re-use your old name-based - checkpoints. Variable naming for Keras layers/models or for variables - created by `tf.Variable` may change when going to eager execution. - - Training Arg if you decorate `layer.call`: - Keras will pass a `training` arg to this layer if `call` contains - a `training` arg or a `**kwargs` varargs in its call signature, - similarly to how keras passes `training` to other layers in TF2 that have - similar signatures in their `call` implementations. - See more details in the docs - on `tf.keras.layers.Layer` to understand what will be passed and when. - Note: tf.compat.v1.layers are usually not called with `training=None`, - so the training arg to `forward_pass` might not feed through to them - unless you pass it to their calls explicitly. - - Caveats: - * TF2 will not prune unused variable updates (or unused outputs). You may - need to adjust your forward pass code to avoid computations or variable - updates that you don't intend to use. - * Avoid Nesting variable creation in tf.function inside of - methods decorated with `track_tf1_style_variables` - While the method may safely be used from inside a `tf.function`, using - a function inside of a decorated method may break the variable scoping. - * This decorator only adds implicit tracking for legacy tf1-style - get_variable / compat.v1.layers usage. - If you would like to use nested Keras layers/models - inside the decorated method, you need to - assign them as attributes of your layer so that Keras/Module's standard - object-oriented weights (and loss tracking for layers) will kick in. - See the intro to modules, layers, and models - [guide](https://www.tensorflow.org/guide/intro_to_modules) for more info. - As a backup, the `compat.v1.keras.utils.get_or_create_layer` method will - ease tracking nested keras model weights and losses for existing TF1 code, - but new code should use explicit tracking. - - Args: - method: The method to decorate. This should belong to a custom tf.Module, - tf.keras.layers.Layer, or tf.keras.Model. - - Returns: - The decorated method. - """ - - def _method_wrapper(self, *args, **kwargs): - var_store = getattr(self, "_tf1_style_var_store", None) - if not var_store: - if not isinstance(self, tf.Module): - # Raise an error if you incorrectly decorate a method - # that is not a method of a Module, Layer, or Model: - raise ValueError( - "`@tf.compat.v1.keras.utils.track_tf1_layers_and_variables` must " - "be applied to a method of a subclassed `tf.Module`, " - "`tf.keras.layers.Layer`, or `tf.keras.Model` and which takes " - "`self` as the first argument. But, the first argument passed " - "to the decorated method was {}, which does not " - "extend Module, Layer, or Model.".format(self)) - var_store = _EagerVariableStore() - self._tf1_style_var_store = var_store # pylint: disable=protected-access - - existing_regularized_variables = set(var_store._regularizers.keys()) # pylint: disable=protected-access - with var_store.scope(): - out = method(self, *args, **kwargs) - - # If this is a layer method, add the regularization losses - # to the layer for any newly-created regularized variables - if isinstance(self, base_layer.Layer): - for var_name, regularizer in var_store._regularizers.items(): # pylint: disable=protected-access - if var_name not in existing_regularized_variables: - self.add_loss(regularizer) - - return out - - return tf.__internal__.decorator.make_decorator( - target=method, decorator_func=_method_wrapper) +class VariableScopeLayer(base_layer.Layer): + """Wrapper Layer to capture `compat.v1.get_variable` and `compat.v1.layers`. + + This shim layer allows using large sets of TF1 model-forward-pass code as a + Keras layer that works in TF2 with TF2 behaviors enabled. It will capture + both weights and regularization losses of your forward-pass code. To use it, + override this class and put your TF1 model's forward pass inside your + implementation for `forward_pass`. (Unlike standard custom Keras layers, + do not override `call`.) + + Below are some examples, and then more details on the functionality of this + shim layer to wrap TF1 model forward passes. + + Example of capturing tf.compat.v1.layer-based modeling code as a Keras + layer: + + ```python + class WrappedDoubleDenseLayer(variable_scope_shim.VariableScopeLayer): + + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def forward_pass(self, inputs): + with variable_scope.variable_scope("double_dense_layer"): + out = tf.compat.v1.layers.dense( + inputs, self.units, name="dense_one", + kernel_initializer=tf.compat.v1.random_normal_initializer, + kernel_regularizer="l2") + out = tf.compat.v1.layers.dense( + out, self.units, name="dense_two", + kernel_initializer=tf.compat.v1.random_normal_initializer(), + kernel_regularizer="l2") + return out + + # Create a layer that can be used as a standard keras layer + layer = WrappedDoubleDenseLayer(10) + + # call the layer on inputs + layer(...) + + # Variables created/used within the scope will be tracked by the layer + layer.weights + layer.trainable_variables + + # Regularization losses will be captured in layer.losses after a call, + # just like any other Keras layer + reg_losses = layer.losses + ``` + + Example of capturing tf.compat.v1.get_variable-based modeling code as + a Keras layer: + + ```python + class WrappedDoubleDenseLayer(variable_scope_shim.VariableScopeLayer): + + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def forward_pass(self, inputs): + out = inputs + with tf.compat.v1.variable_scope("double_dense_layer"): + with tf.compat.v1.variable_scope("dense_one"): + # The weights are created with a `regularizer`, + # so the layer should track their regularization losses + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=init_ops.ones_initializer(), + name="kernel") + bias = tf.compat.v1.get_variable( + shape=[self.units,], + initializer=init_ops.zeros_initializer(), + name="bias") + out = tf.compat.v1.math.matmul(out, kernel) + out = tf.compat.v1.nn.bias_add(out, bias) + with tf.compat.v1.variable_scope("dense_two"): + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=init_ops.ones_initializer(), + name="kernel") + bias = tf.compat.v1.get_variable( + shape=[self.units,], + initializer=init_ops.zeros_initializer(), + name="bias") + out = tf.compat.v1.math.matmul(out, kernel) + out = tf.compat.v1.nn.bias_add(out, bias) + return out + + # Create a layer that can be used as a standard keras layer + layer = WrappedDoubleDenseLayer(10) + + # call the layer on inputs + layer(...) + + # Variables created/used within the scope will be tracked by the layer + layer.weights + layer.trainable_variables + + # Regularization losses will be captured in layer.losses after a call, + # just like any other Keras layer + reg_losses = layer.losses + ``` + + Regularization losses: + Any regularizers specified in the `get_variable` calls or + `compat.v1.layer` creations will get captured by this wrapper layer. + Regularization losses are accessible in `layer.losses` after a call just + like in a standard Keras layer, and will be captured by any model that + includes this layer. Regularization losses attached to Keras + layers/models set as attributes of your layer will also get captured in + the standard Keras regularization loss tracking. + + Variable scope / variable reuse: + variable-scope based reuse in the `forward_pass` will be respected, + and work like variable-scope based reuse in TF1. + + Variable Names/Pre-trained checkpoint loading: + Variable naming from get_variable and `compat.v1.layer` layers will match + the TF1 names, so you should be able to re-use your old name-based + checkpoints. Variable naming for Keras layers/models or for variables + created by `tf.Variable` may change when going to eager execution. + + Training Arg in `forward_pass`: + Keras will pass a `training` arg to this layer if `forward_pass` contains + a `training` arg or a `**kwargs` varargs in its call signature, + similarly to how keras passes `training` to other layers in TF2 that have + similar signatures in their `call` implementations. + See more details in the docs + on `tf.keras.layers.Layer` to understand what will be passed and when. + Note: tf.compat.v1.layers are usually not called with `training=None`, + so the training arg to `forward_pass` might not feed through to them + unless you pass it to their calls explicitly. + + Call signature of the forward pass: + The semantics of the forward pass signature match the standard + Keras layer `call` signature, including how Keras decides when + to pass in a `training` arg., and the semantics applied to + the first positional arg in the call signature. + + Caveats: + * TF2 will not prune unused variable updates (or unused outputs). You may + need to adjust your forward pass code to avoid computations or variable + updates that you don't intend to use. (E.g. by adding a flag to the + `forward_pass` call signature and branching on it). + * Avoid Nesting variable creation in tf.function inside of `forward_pass` + While the layer may safely be used from inside a `tf.function`, using + a function inside of `forward_pass` will break the variable scoping. + * If you would like to nest Keras layers/models or other + `VariableScopeLayer`s directly in `forward_pass`, you need to + assign them as attributes of your layer so that Keras's standard + object-oriented weights and loss tracking will kick in. + See the intro to modules, layers, and models + [guide](https://www.tensorflow.org/guide/intro_to_modules) for more info + """ + @property + @layer_utils.cached_per_instance + def _call_full_argspec(self): + # Argspec inspection is expensive and the call spec is used often, so it + # makes sense to cache the result. + return tf_inspect.getfullargspec(self.forward_pass) -class VariableScopeLayer(base_layer.Layer): - """Wrapper Layer to capture `compat.v1.get_variable` and `compat.v1.layers`. - - This shim layer allows using large sets of TF1 model-forward-pass code as a - Keras layer that works in TF2 with TF2 behaviors enabled. It will capture - both weights and regularization losses of your forward-pass code. To use it, - override this class and put your TF1 model's forward pass inside your - implementation for `forward_pass`. (Unlike standard custom Keras layers, - do not override `call`.) - - Below are some examples, and then more details on the functionality of this - shim layer to wrap TF1 model forward passes. - - Example of capturing tf.compat.v1.layer-based modeling code as a Keras layer: - - ```python - class WrappedDoubleDenseLayer(variable_scope_shim.VariableScopeLayer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def forward_pass(self, inputs): - with variable_scope.variable_scope("double_dense_layer"): - out = tf.compat.v1.layers.dense( - inputs, self.units, name="dense_one", - kernel_initializer=tf.compat.v1.random_normal_initializer, - kernel_regularizer="l2") - out = tf.compat.v1.layers.dense( - out, self.units, name="dense_two", - kernel_initializer=tf.compat.v1.random_normal_initializer(), - kernel_regularizer="l2") - return out - - # Create a layer that can be used as a standard keras layer - layer = WrappedDoubleDenseLayer(10) - - # call the layer on inputs - layer(...) - - # Variables created/used within the scope will be tracked by the layer - layer.weights - layer.trainable_variables - - # Regularization losses will be captured in layer.losses after a call, - # just like any other Keras layer - reg_losses = layer.losses - ``` - - Example of capturing tf.compat.v1.get_variable-based modeling code as - a Keras layer: - - ```python - class WrappedDoubleDenseLayer(variable_scope_shim.VariableScopeLayer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def forward_pass(self, inputs): - out = inputs - with tf.compat.v1.variable_scope("double_dense_layer"): - with tf.compat.v1.variable_scope("dense_one"): - # The weights are created with a `regularizer`, - # so the layer should track their regularization losses - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=init_ops.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=init_ops.zeros_initializer(), - name="bias") - out = tf.compat.v1.math.matmul(out, kernel) - out = tf.compat.v1.nn.bias_add(out, bias) - with tf.compat.v1.variable_scope("dense_two"): - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=init_ops.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=init_ops.zeros_initializer(), - name="bias") - out = tf.compat.v1.math.matmul(out, kernel) - out = tf.compat.v1.nn.bias_add(out, bias) - return out - - # Create a layer that can be used as a standard keras layer - layer = WrappedDoubleDenseLayer(10) - - # call the layer on inputs - layer(...) - - # Variables created/used within the scope will be tracked by the layer - layer.weights - layer.trainable_variables - - # Regularization losses will be captured in layer.losses after a call, - # just like any other Keras layer - reg_losses = layer.losses - ``` - - Regularization losses: - Any regularizers specified in the `get_variable` calls or `compat.v1.layer` - creations will get captured by this wrapper layer. Regularization losses - are accessible in `layer.losses` after a call just like in a standard - Keras layer, and will be captured by any model that includes this layer. - Regularization losses attached to Keras layers/models set as attributes - of your layer will also get captured in the standard Keras regularization - loss tracking. - - Variable scope / variable reuse: - variable-scope based reuse in the `forward_pass` will be respected, - and work like variable-scope based reuse in TF1. - - Variable Names/Pre-trained checkpoint loading: - Variable naming from get_variable and `compat.v1.layer` layers will match - the TF1 names, so you should be able to re-use your old name-based - checkpoints. Variable naming for Keras layers/models or for variables - created by `tf.Variable` may change when going to eager execution. - - Training Arg in `forward_pass`: - Keras will pass a `training` arg to this layer if `forward_pass` contains - a `training` arg or a `**kwargs` varargs in its call signature, - similarly to how keras passes `training` to other layers in TF2 that have - similar signatures in their `call` implementations. - See more details in the docs - on `tf.keras.layers.Layer` to understand what will be passed and when. - Note: tf.compat.v1.layers are usually not called with `training=None`, - so the training arg to `forward_pass` might not feed through to them - unless you pass it to their calls explicitly. - - Call signature of the forward pass: - The semantics of the forward pass signature match the standard - Keras layer `call` signature, including how Keras decides when - to pass in a `training` arg., and the semantics applied to - the first positional arg in the call signature. - - Caveats: - * TF2 will not prune unused variable updates (or unused outputs). You may - need to adjust your forward pass code to avoid computations or variable - updates that you don't intend to use. (E.g. by adding a flag to the - `forward_pass` call signature and branching on it). - * Avoid Nesting variable creation in tf.function inside of `forward_pass` - While the layer may safely be used from inside a `tf.function`, using - a function inside of `forward_pass` will break the variable scoping. - * If you would like to nest Keras layers/models or other - `VariableScopeLayer`s directly in `forward_pass`, you need to - assign them as attributes of your layer so that Keras's standard - object-oriented weights and loss tracking will kick in. - See the intro to modules, layers, and models - [guide](https://www.tensorflow.org/guide/intro_to_modules) for more info - """ - - @property - @layer_utils.cached_per_instance - def _call_full_argspec(self): - # Argspec inspection is expensive and the call spec is used often, so it - # makes sense to cache the result. - return tf_inspect.getfullargspec(self.forward_pass) - - def forward_pass(self, *args, **kwargs): - """Implement this method. It should include your model forward pass.""" - raise NotImplementedError - - @track_tf1_style_variables - def call(self, *args, **kwargs): - return self.forward_pass(*args, **kwargs) + def forward_pass(self, *args, **kwargs): + """Implement this method. It should include your model forward pass.""" + raise NotImplementedError + + @track_tf1_style_variables + def call(self, *args, **kwargs): + return self.forward_pass(*args, **kwargs) @keras_export(v1=["keras.utils.get_or_create_layer"]) def get_or_create_layer(name, create_layer_method): - """Use this method to track nested keras models in a shim-decorated method. - - This method can be used within a `tf.keras.Layer`'s methods decorated by - the`track_tf1_style_variables` shim, to additionally track inner keras Model - objects created within the same method. The inner model's variables and losses - will be accessible via the outer model's `variables` and `losses` attributes. - - This enables tracking of inner keras models using TF2 behaviors, with minimal - changes to existing TF1-style code. - - Example: - - ```python - class NestedLayer(tf.keras.layers.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def build_model(self): - inp = tf.keras.Input(shape=(5, 5)) - dense_layer = tf.keras.layers.Dense( - 10, name="dense", kernel_regularizer="l2", - kernel_initializer=tf.compat.v1.ones_initializer()) - model = tf.keras.Model(inputs=inp, outputs=dense_layer(inp)) - return model - - @tf.compat.v1.keras.utils.track_tf1_style_variables - def call(self, inputs): - model = tf.compat.v1.keras.utils.get_or_create_layer( - "dense_model", self.build_model) - return model(inputs) - ``` - The inner model creation should be confined to its own zero-arg function, - which should be passed into this method. In TF1, this method will immediately - create and return the desired model, without any tracking. - - Args: - name: A name to give the nested layer to track. - create_layer_method: a Callable that takes no args and returns the nested - layer. - - Returns: - The created layer. - """ - store = vs._get_default_variable_store() # pylint: disable=protected-access - if not isinstance(store, _EagerVariableStore): - if not tf.compat.v1.executing_eagerly_outside_functions(): - # tf1 case; just create and return layer - return create_layer_method() - else: - raise ValueError( - "Tried to call get_or_create_layer in eager mode from a method not" - "decorated with @tf.compat.v1.keras.utils.track_tf1_style_variables.") - vs_name = tf.compat.v1.get_variable_scope().name - name = f"{vs_name}/{name}" - return store.get_or_create_layer(name, create_layer_method) + """Use this method to track nested keras models in a shim-decorated method. + + This method can be used within a `tf.keras.Layer`'s methods decorated by + the`track_tf1_style_variables` shim, to additionally track inner keras Model + objects created within the same method. The inner model's variables and + losses will be accessible via the outer model's `variables` and `losses` + attributes. + + This enables tracking of inner keras models using TF2 behaviors, with + minimal changes to existing TF1-style code. + + Example: + + ```python + class NestedLayer(tf.keras.layers.Layer): + + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def build_model(self): + inp = tf.keras.Input(shape=(5, 5)) + dense_layer = tf.keras.layers.Dense( + 10, name="dense", kernel_regularizer="l2", + kernel_initializer=tf.compat.v1.ones_initializer()) + model = tf.keras.Model(inputs=inp, outputs=dense_layer(inp)) + return model + + @tf.compat.v1.keras.utils.track_tf1_style_variables + def call(self, inputs): + model = tf.compat.v1.keras.utils.get_or_create_layer( + "dense_model", self.build_model) + return model(inputs) + ``` + The inner model creation should be confined to its own zero-arg function, + which should be passed into this method. In TF1, this method will + immediately create and return the desired model, without any tracking. + + Args: + name: A name to give the nested layer to track. + create_layer_method: a Callable that takes no args and returns the nested + layer. + + Returns: + The created layer. + """ + store = vs._get_default_variable_store() + if not isinstance(store, _EagerVariableStore): + if not tf.compat.v1.executing_eagerly_outside_functions(): + # tf1 case; just create and return layer + return create_layer_method() + else: + raise ValueError( + "Tried to call get_or_create_layer in eager mode from a method " + "notdecorated with " + "@tf.compat.v1.keras.utils.track_tf1_style_variables." + ) + vs_name = tf.compat.v1.get_variable_scope().name + name = f"{vs_name}/{name}" + return store.get_or_create_layer(name, create_layer_method) diff --git a/keras/legacy_tf_layers/variable_scope_shim_test.py b/keras/legacy_tf_layers/variable_scope_shim_test.py index 9de0dd48d47b..f593bdfa71d6 100644 --- a/keras/legacy_tf_layers/variable_scope_shim_test.py +++ b/keras/legacy_tf_layers/variable_scope_shim_test.py @@ -21,7 +21,10 @@ import gc import threading +import numpy +import tensorflow as tf from absl.testing import parameterized + from keras import models from keras import regularizers from keras.engine import base_layer @@ -32,1589 +35,1823 @@ from keras.legacy_tf_layers import variable_scope_shim from keras.testing_infra import test_combinations -import numpy -import tensorflow as tf - -from tensorflow.python.framework import test_util as tf_test_utils -from tensorflow.python.ops import variable_scope +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) +from tensorflow.python.ops import variable_scope def run_inside_wrap_function_in_eager_mode(graph_function): - """Decorator to execute the same graph code in eager and graph modes. + """Decorator to execute the same graph code in eager and graph modes. - In graph mode, we just execute the graph_function passed as argument. In eager - mode, we wrap the function using wrap_function and then execute the wrapped - result. + In graph mode, we just execute the graph_function passed as argument. In + eager mode, we wrap the function using wrap_function and then execute the + wrapped result. - Args: - graph_function: python function containing graph code to be wrapped + Args: + graph_function: python function containing graph code to be wrapped - Returns: - decorated function - """ - def wrap_and_execute(self): - store = variable_scope_shim._EagerVariableStore() - with variable_scope.with_variable_store(store): - # use the original function - graph_function(self) - return wrap_and_execute + Returns: + decorated function + """ + def wrap_and_execute(self): + store = variable_scope_shim._EagerVariableStore() + with variable_scope.with_variable_store(store): + # use the original function + graph_function(self) -class VariableScopeTest(tf.test.TestCase): + return wrap_and_execute - def tearDown(self): - gc.collect() - # This will only contain uncollectable garbage, i.e. reference cycles - # involving objects with __del__ defined. - self.assertEqual(0, len(gc.garbage)) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testGetVar(self): - vs = variable_scope._get_default_variable_store() - v = vs.get_variable("v", [1]) - v1 = vs.get_variable("v", [1]) - self.assertIs(v, v1) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testNameExists(self): - vs = variable_scope._get_default_variable_store() - # No check by default, so we can both create and get existing names. - v = vs.get_variable("v", [1]) - v1 = vs.get_variable("v", [1]) - self.assertIs(v, v1) - - self.assertIsNot(v, vs.get_variable("u", [1], reuse=False)) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testNamelessStore(self): - vs = variable_scope._get_default_variable_store() - vs.get_variable("v1", [2]) - vs.get_variable("v2", [2]) - expected_names = ["%s:0" % name for name in ["v1", "v2"]] - self.assertEqual( - set(expected_names), set(v.name for v in vs._vars.values())) - - # TODO(mihaimaruseac): Not converted to use wrap_function because of - # TypeError: Expected tf.group() expected Tensor arguments not 'None' with - # type '' - @tf_test_utils.run_in_graph_and_eager_modes - def testVarScopeInitializer(self): - init = tf.compat.v1.constant_initializer(0.3) - with tf.compat.v1.variable_scope("tower0") as tower: - with tf.compat.v1.variable_scope("foo", initializer=init): - v = tf.compat.v1.get_variable("v", []) - self.evaluate(tf.compat.v1.variables_initializer([v])) - self.assertAllClose(self.evaluate(v.value()), 0.3) - with tf.compat.v1.variable_scope(tower, initializer=init): - w = tf.compat.v1.get_variable("w", []) - self.evaluate(tf.compat.v1.variables_initializer([w])) - self.assertAllClose(self.evaluate(w.value()), 0.3) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeConstraint(self): - constraint = lambda x: 0. * x - with tf.compat.v1.variable_scope("tower1") as tower: - with tf.compat.v1.variable_scope("foo", constraint=constraint): - v = tf.compat.v1.get_variable("v", []) - self.assertIsNotNone(v.constraint) - with tf.compat.v1.variable_scope(tower, constraint=constraint): - w = tf.compat.v1.get_variable("w", []) - self.assertIsNotNone(w.constraint) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeDType(self): - with tf.compat.v1.variable_scope("tower2") as tower: - with tf.compat.v1.variable_scope("foo", dtype=tf.float16): - v = tf.compat.v1.get_variable("v", []) - self.assertEqual(v.dtype.base_dtype, tf.float16) - with tf.compat.v1.variable_scope(tower, dtype=tf.float16): - w = tf.compat.v1.get_variable("w", []) - self.assertEqual(w.dtype.base_dtype, tf.float16) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testInitFromNonTensorValue(self): - v = tf.compat.v1.get_variable("v4", initializer=4, dtype=tf.int32) - self.evaluate(tf.compat.v1.variables_initializer([v])) - self.assertAllClose(self.evaluate(v.value()), 4) - - w = tf.compat.v1.get_variable( - "w4", initializer=numpy.array([1, 2, 3]), dtype=tf.int64) - self.evaluate(tf.compat.v1.variables_initializer([w])) - self.assertAllClose(self.evaluate(w.value()), [1, 2, 3]) - - # A quirk to be revisited? - error = ValueError if tf.executing_eagerly() else TypeError - with self.assertRaises(error): - tf.compat.v1.get_variable("x4", initializer={}) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testInitFromNonInitializer(self): - # Test various dtypes with zeros initializer as following: - types = [ - tf.int8, tf.uint8, tf.int16, tf.uint16, tf.int32, - tf.int64, tf.bool - ] - - # Use different variable_name to distinguish various dtypes - for (i, dtype) in enumerate(types): - x = tf.compat.v1.get_variable( - name="xx%d" % i, shape=(3, 4), dtype=dtype) - y = tf.compat.v1.get_variable( - name="yy%d" % i, - shape=(3, 4), - dtype=dtype, - initializer=tf.compat.v1.zeros_initializer(dtype=dtype)) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllEqual(self.evaluate(x.value()), self.evaluate(y.value())) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeRegularizer(self): - init = tf.compat.v1.constant_initializer(0.3) - - def regularizer1(v): - return tf.reduce_mean(v) + 0.1 - - def regularizer2(v): - return tf.reduce_mean(v) + 0.2 - - with tf.compat.v1.variable_scope( - "tower3", regularizer=regularizer1) as tower: - with tf.compat.v1.variable_scope("foo", initializer=init): - v = tf.compat.v1.get_variable("v", []) - self.evaluate(tf.compat.v1.variables_initializer([v])) - with tf.compat.v1.variable_scope(tower, initializer=init) as vs: - tf.compat.v1.get_variable("u", []) - vs.set_regularizer(regularizer2) - tf.compat.v1.get_variable("w", []) - # Next 3 variable not regularized to test disabling regularization. - tf.compat.v1.get_variable( - "x", [], regularizer=tf.compat.v1.no_regularizer) - with tf.compat.v1.variable_scope( - "baz", regularizer=tf.compat.v1.no_regularizer): - tf.compat.v1.get_variable("y", []) - vs.set_regularizer(tf.compat.v1.no_regularizer) - tf.compat.v1.get_variable("z", []) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testInitializeFromValue(self): - init = tf.constant(0.1) - w = tf.compat.v1.get_variable("v", initializer=init) - self.evaluate(tf.compat.v1.variables_initializer([w])) - self.assertAllClose(self.evaluate(w.value()), 0.1) - - with self.assertRaisesRegex(ValueError, "shape"): - # We disallow explicit shape specification when initializer is constant. - tf.compat.v1.get_variable("u", [1], initializer=init) - - with tf.compat.v1.variable_scope("foo", initializer=init): - # Constant initializer can be passed through scopes if needed. - v = tf.compat.v1.get_variable("v") - self.evaluate(tf.compat.v1.variables_initializer([v])) - self.assertAllClose(self.evaluate(v.value()), 0.1) - - # Check that non-float32 initializer creates a non-float32 variable. - init = tf.constant(1, dtype=tf.int32) - t = tf.compat.v1.get_variable("t", initializer=init) - self.assertEqual(t.dtype.base_dtype, tf.int32) - - # Raise error if `initializer` dtype and `dtype` are not identical. - with self.assertRaisesRegex(ValueError, "don't match"): - tf.compat.v1.get_variable("s", initializer=init, dtype=tf.float64) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeGetOrCreateReuse(self): - with self.cached_session(): - - def test_value(value): - x = tf.constant(value) - with tf.compat.v1.variable_scope( - "testVarScopeGetOrCreateReuse_bar", - reuse=tf.compat.v1.AUTO_REUSE): - _ = tf.compat.v1.assign(tf.compat.v1.get_variable("var", []), x) - with tf.compat.v1.variable_scope( - "testVarScopeGetOrCreateReuse_bar", - reuse=tf.compat.v1.AUTO_REUSE): - _ = tf.compat.v1.get_variable("var", []) - self.assertEqual(value, self.evaluate(x)) - - test_value(42.) # Variable is created. - test_value(13.) # Variable is reused hereafter. - test_value(17.) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeGetOrCreateReuseIgnoreFalse(self): - with self.cached_session(): - - def test_value(value): - x = tf.constant(value) - with tf.compat.v1.variable_scope( - "testVarScopeGetOrCreateReuse_bar", - reuse=False): - _ = tf.compat.v1.assign(tf.compat.v1.get_variable("var", []), x) - # We need to ignore reuse=False in the shim, because the - # code is expected to get rerun each time the user calls the shim. - with tf.compat.v1.variable_scope( - "testVarScopeGetOrCreateReuse_bar", - reuse=False): - _ = tf.compat.v1.get_variable("var", []) - self.assertEqual(value, self.evaluate(x)) - - test_value(42.) # Variable is created. - test_value(13.) # Variable is reused hereafter. - test_value(17.) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScope(self): - with self.cached_session(): - with tf.name_scope("testVarOpScope1"): - with tf.compat.v1.variable_scope("tower", "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "tower/w:0") - - with tf.name_scope("testVarOpScope2"): - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "default/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "default_1/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeUniqueNamesInterleavedSubstringScopes(self): - with self.cached_session(): - with tf.compat.v1.variable_scope(None, "defaultScope1"): - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, - "defaultScope1/layer/w:0") - with tf.compat.v1.variable_scope(None, "defaultScope1"): - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, - "defaultScope1_1/layer/w:0") - with tf.compat.v1.variable_scope(None, "defaultScope"): - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, - "defaultScope/layer/w:0") - with tf.compat.v1.variable_scope(None, "defaultScope1"): - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, - "defaultScope1_2/layer/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeUniqueNamesWithJump(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("default") as default: - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "default/layer/w:0") - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, - "default/layer_1/w:0") - with tf.compat.v1.variable_scope(default): - pass - # No matter the jump in the middle, unique numbering continues. - with tf.compat.v1.variable_scope(None, "layer"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, - "default/layer_2/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeReuse(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("outer") as outer: - with tf.compat.v1.variable_scope("tower", "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/tower/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - with tf.compat.v1.variable_scope(outer, reuse=True) as outer: - with tf.compat.v1.variable_scope("tower", "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/tower/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeGetVar(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("root"): - with tf.compat.v1.variable_scope("towerA") as tower_a: - va = tf.compat.v1.get_variable("v", [1]) - self.assertEqual(va.name, "root/towerA/v:0") - - with tf.compat.v1.variable_scope(tower_a, reuse=True): - va2 = tf.compat.v1.get_variable("v", [1]) - self.assertIs(va2, va) - - with tf.compat.v1.variable_scope("towerB"): - vb = tf.compat.v1.get_variable("v", [1]) - self.assertEqual(vb.name, "root/towerB/v:0") - - with tf.compat.v1.variable_scope("towerA", reuse=True): - va2 = tf.compat.v1.get_variable("v", [1]) - self.assertIs(va2, va) - with tf.compat.v1.variable_scope("foo"): - with tf.compat.v1.variable_scope("bar"): - v = tf.compat.v1.get_variable("v", [1]) - self.assertEqual(v.name, "root/foo/bar/v:0") - with tf.compat.v1.variable_scope(tower_a, reuse=True): - va3 = tf.compat.v1.get_variable("v", [1]) - self.assertIs(va, va3) - - with self.assertRaises(ValueError) as exc: - with tf.compat.v1.variable_scope(tower_a, reuse=True): - tf.compat.v1.get_variable("v", [2]) # Different shape. - self.assertEqual("shape" in str(exc.exception), True) - - with self.assertRaises(ValueError) as exc: - with tf.compat.v1.variable_scope(tower_a, reuse=True): - tf.compat.v1.get_variable("v", [1], dtype=tf.int32) - self.assertEqual("dtype" in str(exc.exception), True) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeOuterScope(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("outer") as outer: - pass - with tf.compat.v1.variable_scope(outer): +class VariableScopeTest(tf.test.TestCase): + def tearDown(self): + gc.collect() + # This will only contain uncollectable garbage, i.e. reference cycles + # involving objects with __del__ defined. + self.assertEqual(0, len(gc.garbage)) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testGetVar(self): + vs = variable_scope._get_default_variable_store() + v = vs.get_variable("v", [1]) + v1 = vs.get_variable("v", [1]) + self.assertIs(v, v1) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testNameExists(self): + vs = variable_scope._get_default_variable_store() + # No check by default, so we can both create and get existing names. + v = vs.get_variable("v", [1]) + v1 = vs.get_variable("v", [1]) + self.assertIs(v, v1) + + self.assertIsNot(v, vs.get_variable("u", [1], reuse=False)) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testNamelessStore(self): + vs = variable_scope._get_default_variable_store() + vs.get_variable("v1", [2]) + vs.get_variable("v2", [2]) + expected_names = [f"{name}:0" for name in ["v1", "v2"]] self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope("default"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") + set(expected_names), set(v.name for v in vs._vars.values()) + ) + + # TODO(mihaimaruseac): Not converted to use wrap_function because of + # TypeError: Expected tf.group() expected Tensor arguments not 'None' with + # type '' + @tf_test_utils.run_in_graph_and_eager_modes + def testVarScopeInitializer(self): + init = tf.compat.v1.constant_initializer(0.3) + with tf.compat.v1.variable_scope("tower0") as tower: + with tf.compat.v1.variable_scope("foo", initializer=init): + v = tf.compat.v1.get_variable("v", []) + self.evaluate(tf.compat.v1.variables_initializer([v])) + self.assertAllClose(self.evaluate(v.value()), 0.3) + with tf.compat.v1.variable_scope(tower, initializer=init): + w = tf.compat.v1.get_variable("w", []) + self.evaluate(tf.compat.v1.variables_initializer([w])) + self.assertAllClose(self.evaluate(w.value()), 0.3) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeConstraint(self): + constraint = lambda x: 0.0 * x + with tf.compat.v1.variable_scope("tower1") as tower: + with tf.compat.v1.variable_scope("foo", constraint=constraint): + v = tf.compat.v1.get_variable("v", []) + self.assertIsNotNone(v.constraint) + with tf.compat.v1.variable_scope(tower, constraint=constraint): + w = tf.compat.v1.get_variable("w", []) + self.assertIsNotNone(w.constraint) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeDType(self): + with tf.compat.v1.variable_scope("tower2") as tower: + with tf.compat.v1.variable_scope("foo", dtype=tf.float16): + v = tf.compat.v1.get_variable("v", []) + self.assertEqual(v.dtype.base_dtype, tf.float16) + with tf.compat.v1.variable_scope(tower, dtype=tf.float16): + w = tf.compat.v1.get_variable("w", []) + self.assertEqual(w.dtype.base_dtype, tf.float16) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testInitFromNonTensorValue(self): + v = tf.compat.v1.get_variable("v4", initializer=4, dtype=tf.int32) + self.evaluate(tf.compat.v1.variables_initializer([v])) + self.assertAllClose(self.evaluate(v.value()), 4) - with tf.compat.v1.variable_scope(outer, reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope("default", reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarScopeNestedOuterScope(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("outer") as outer: - with tf.compat.v1.variable_scope(outer): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope("default"): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - with tf.compat.v1.variable_scope(outer, reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope("default", reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeReuseParam(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("outer") as outer: - with tf.compat.v1.variable_scope("tower", "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/tower/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - with tf.compat.v1.variable_scope(outer) as outer: - with tf.compat.v1.variable_scope("tower", "default", reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/tower/w:0") - outer.reuse_variables() - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeReuseError(self): - with self.cached_session(): - with self.assertRaises(ValueError): - with tf.compat.v1.variable_scope(None, "default", reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/tower/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeOuterScope(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("outer") as outer: - pass - with tf.compat.v1.variable_scope(outer, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") + w = tf.compat.v1.get_variable( + "w4", initializer=numpy.array([1, 2, 3]), dtype=tf.int64 + ) + self.evaluate(tf.compat.v1.variables_initializer([w])) + self.assertAllClose(self.evaluate(w.value()), [1, 2, 3]) + + # A quirk to be revisited? + error = ValueError if tf.executing_eagerly() else TypeError + with self.assertRaises(error): + tf.compat.v1.get_variable("x4", initializer={}) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testInitFromNonInitializer(self): + # Test various dtypes with zeros initializer as following: + types = [ + tf.int8, + tf.uint8, + tf.int16, + tf.uint16, + tf.int32, + tf.int64, + tf.bool, + ] + + # Use different variable_name to distinguish various dtypes + for i, dtype in enumerate(types): + x = tf.compat.v1.get_variable( + name="xx%d" % i, shape=(3, 4), dtype=dtype + ) + y = tf.compat.v1.get_variable( + name="yy%d" % i, + shape=(3, 4), + dtype=dtype, + initializer=tf.compat.v1.zeros_initializer(dtype=dtype), + ) - with tf.compat.v1.variable_scope(outer, "default", reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - outer.reuse_variables() - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVarOpScopeNestedOuterScope(self): - with self.cached_session(): - with tf.compat.v1.variable_scope("outer") as outer: - with tf.compat.v1.variable_scope(outer, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - with tf.compat.v1.variable_scope(outer, "default", reuse=True): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/w:0") - with tf.compat.v1.variable_scope(None, "default", []): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testBasicWhenAuxiliaryNameScopeIsFalse(self): - with self.cached_session(): - with tf.compat.v1.variable_scope( - "scope", auxiliary_name_scope=False) as scope: - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "scope/w:0") - with tf.compat.v1.variable_scope(scope, auxiliary_name_scope=False): - self.assertEqual( - tf.compat.v1.get_variable("w1", []).name, "scope/w1:0") + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllEqual( + self.evaluate(x.value()), self.evaluate(y.value()) + ) - with tf.compat.v1.variable_scope("outer"): - with tf.compat.v1.variable_scope( - "inner", auxiliary_name_scope=False) as inner: - self.assertEqual(inner.original_name_scope, "outer/") - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/inner/w:0") - with tf.compat.v1.variable_scope( - inner, auxiliary_name_scope=False) as inner1: - self.assertEqual(inner1.original_name_scope, "outer/") - self.assertEqual( - tf.compat.v1.get_variable("w1", []).name, "outer/inner/w1:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self): - with self.cached_session(): - with tf.compat.v1.variable_scope( - None, default_name="default", auxiliary_name_scope=False): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "default/w:0") + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeRegularizer(self): + init = tf.compat.v1.constant_initializer(0.3) - with tf.compat.v1.variable_scope("outer"): - with tf.compat.v1.variable_scope( - None, default_name="default", - auxiliary_name_scope=False) as inner: - self.assertEqual(inner.original_name_scope, "outer/") - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/default/w:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self): - with self.cached_session(): - root_scope = tf.compat.v1.get_variable_scope() - with tf.compat.v1.variable_scope( - root_scope, auxiliary_name_scope=False): - self.assertEqual(tf.compat.v1.get_variable("w", []).name, "w:0") - - with tf.compat.v1.variable_scope("outer"): - with tf.compat.v1.variable_scope( - root_scope, auxiliary_name_scope=False) as inner: - self.assertEqual(inner.original_name_scope, "") - self.assertEqual(tf.compat.v1.get_variable("w1", []).name, "w1:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testAuxiliaryNameScopeIsInvalid(self): - with self.cached_session(): - with self.assertRaisesRegex(TypeError, "auxiliary_name_scope"): - with tf.compat.v1.variable_scope( - None, default_name="scope", auxiliary_name_scope="invalid"): - pass + def regularizer1(v): + return tf.reduce_mean(v) + 0.1 - with self.assertRaisesRegex(TypeError, "auxiliary_name_scope"): - with tf.compat.v1.variable_scope( - "scope", auxiliary_name_scope="invalid"): - pass + def regularizer2(v): + return tf.reduce_mean(v) + 0.2 - with tf.compat.v1.variable_scope("scope") as scope: - pass - with self.assertRaisesRegex(TypeError, "auxiliary_name_scope"): with tf.compat.v1.variable_scope( - scope, auxiliary_name_scope="invalid"): - pass - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testReuseScopeWithoutNameScopeCollision(self): - # Github issue: #13429 - with self.cached_session(): - with tf.compat.v1.variable_scope("outer"): - with tf.compat.v1.variable_scope("inner") as inner: - pass - - with tf.compat.v1.variable_scope( - inner, auxiliary_name_scope=False) as scope: - with tf.name_scope(scope.original_name_scope): - self.assertEqual( - tf.compat.v1.get_variable("w", []).name, "outer/inner/w:0") - - with tf.compat.v1.variable_scope("another"): - with tf.compat.v1.variable_scope( - inner, auxiliary_name_scope=False) as scope1: - with tf.name_scope(scope1.original_name_scope): - self.assertEqual( - tf.compat.v1.get_variable("w1", []).name, - "outer/inner/w1:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testGetVarWithDevice(self): - g = tf.Graph() - varname_type = [] - - def device_func(op): - if op.type in ["Variable", "VariableV2", "VarHandleOp"]: - varname_type.append((op.name, op.get_attr("dtype"))) - return "/device:GPU:0" - - with g.as_default(): - with tf.compat.v1.device(device_func): - _ = tf.compat.v1.get_variable("x", (100, 200)) - _ = tf.compat.v1.get_variable( - "y", dtype=tf.int64, initializer=numpy.arange(73)) - self.assertEqual(varname_type[0], ("x", tf.float32)) - self.assertEqual(varname_type[1], ("y", tf.int64)) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testGetVariableWithRefDtype(self): - v = tf.compat.v1.get_variable("v", shape=[3, 4], dtype=tf.float32) - # Ensure it is possible to do get_variable with a _ref dtype passed in. - _ = tf.compat.v1.get_variable("w", shape=[5, 6], dtype=v.dtype) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testGetVariableWithInitializerWhichTakesNoArgs(self): - v = tf.compat.v1.get_variable("foo", initializer=lambda: [2]) - self.assertEqual(v.name, "foo:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testGetVariableWithInitializerWhichTakesOptionalArgs(self): - v = tf.compat.v1.get_variable("foo", initializer=lambda x=True: [2]) - self.assertEqual(v.name, "foo:0") - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testTwoGraphs(self): - - def f(): - g1 = tf.Graph() - g2 = tf.Graph() - with g1.as_default(): - with g2.as_default(): - with tf.compat.v1.variable_scope("_"): - pass - - self.assertRaisesRegex(ValueError, - "'_' is not a valid (?:root )?scope name", f) + "tower3", regularizer=regularizer1 + ) as tower: + with tf.compat.v1.variable_scope("foo", initializer=init): + v = tf.compat.v1.get_variable("v", []) + self.evaluate(tf.compat.v1.variables_initializer([v])) + with tf.compat.v1.variable_scope(tower, initializer=init) as vs: + tf.compat.v1.get_variable("u", []) + vs.set_regularizer(regularizer2) + tf.compat.v1.get_variable("w", []) + # Next 3 variable not regularized to test disabling + # regularization. + tf.compat.v1.get_variable( + "x", [], regularizer=tf.compat.v1.no_regularizer + ) + with tf.compat.v1.variable_scope( + "baz", regularizer=tf.compat.v1.no_regularizer + ): + tf.compat.v1.get_variable("y", []) + vs.set_regularizer(tf.compat.v1.no_regularizer) + tf.compat.v1.get_variable("z", []) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testInitializeFromValue(self): + init = tf.constant(0.1) + w = tf.compat.v1.get_variable("v", initializer=init) + self.evaluate(tf.compat.v1.variables_initializer([w])) + self.assertAllClose(self.evaluate(w.value()), 0.1) + + with self.assertRaisesRegex(ValueError, "shape"): + # We disallow explicit shape specification when initializer is + # constant. + tf.compat.v1.get_variable("u", [1], initializer=init) + + with tf.compat.v1.variable_scope("foo", initializer=init): + # Constant initializer can be passed through scopes if needed. + v = tf.compat.v1.get_variable("v") + self.evaluate(tf.compat.v1.variables_initializer([v])) + self.assertAllClose(self.evaluate(v.value()), 0.1) + + # Check that non-float32 initializer creates a non-float32 variable. + init = tf.constant(1, dtype=tf.int32) + t = tf.compat.v1.get_variable("t", initializer=init) + self.assertEqual(t.dtype.base_dtype, tf.int32) + + # Raise error if `initializer` dtype and `dtype` are not identical. + with self.assertRaisesRegex(ValueError, "don't match"): + tf.compat.v1.get_variable("s", initializer=init, dtype=tf.float64) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeGetOrCreateReuse(self): + with self.cached_session(): + + def test_value(value): + x = tf.constant(value) + with tf.compat.v1.variable_scope( + "testVarScopeGetOrCreateReuse_bar", + reuse=tf.compat.v1.AUTO_REUSE, + ): + _ = tf.compat.v1.assign( + tf.compat.v1.get_variable("var", []), x + ) + with tf.compat.v1.variable_scope( + "testVarScopeGetOrCreateReuse_bar", + reuse=tf.compat.v1.AUTO_REUSE, + ): + _ = tf.compat.v1.get_variable("var", []) + self.assertEqual(value, self.evaluate(x)) + + test_value(42.0) # Variable is created. + test_value(13.0) # Variable is reused hereafter. + test_value(17.0) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeGetOrCreateReuseIgnoreFalse(self): + with self.cached_session(): + + def test_value(value): + x = tf.constant(value) + with tf.compat.v1.variable_scope( + "testVarScopeGetOrCreateReuse_bar", reuse=False + ): + _ = tf.compat.v1.assign( + tf.compat.v1.get_variable("var", []), x + ) + # We need to ignore reuse=False in the shim, because the code is + # expected to get rerun each time the user calls the shim. + with tf.compat.v1.variable_scope( + "testVarScopeGetOrCreateReuse_bar", reuse=False + ): + _ = tf.compat.v1.get_variable("var", []) + self.assertEqual(value, self.evaluate(x)) + + test_value(42.0) # Variable is created. + test_value(13.0) # Variable is reused hereafter. + test_value(17.0) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScope(self): + with self.cached_session(): + with tf.name_scope("testVarOpScope1"): + with tf.compat.v1.variable_scope("tower", "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "tower/w:0" + ) + + with tf.name_scope("testVarOpScope2"): + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "default/w:0" + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "default_1/w:0" + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeUniqueNamesInterleavedSubstringScopes(self): + with self.cached_session(): + with tf.compat.v1.variable_scope(None, "defaultScope1"): + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "defaultScope1/layer/w:0", + ) + with tf.compat.v1.variable_scope(None, "defaultScope1"): + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "defaultScope1_1/layer/w:0", + ) + with tf.compat.v1.variable_scope(None, "defaultScope"): + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "defaultScope/layer/w:0", + ) + with tf.compat.v1.variable_scope(None, "defaultScope1"): + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "defaultScope1_2/layer/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeUniqueNamesWithJump(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("default") as default: + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "default/layer/w:0", + ) + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "default/layer_1/w:0", + ) + with tf.compat.v1.variable_scope(default): + pass + # No matter the jump in the middle, unique numbering continues. + with tf.compat.v1.variable_scope(None, "layer"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "default/layer_2/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeReuse(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("outer") as outer: + with tf.compat.v1.variable_scope("tower", "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/tower/w:0", + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + with tf.compat.v1.variable_scope(outer, reuse=True) as outer: + with tf.compat.v1.variable_scope("tower", "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/tower/w:0", + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeGetVar(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("root"): + with tf.compat.v1.variable_scope("towerA") as tower_a: + va = tf.compat.v1.get_variable("v", [1]) + self.assertEqual(va.name, "root/towerA/v:0") + + with tf.compat.v1.variable_scope(tower_a, reuse=True): + va2 = tf.compat.v1.get_variable("v", [1]) + self.assertIs(va2, va) + + with tf.compat.v1.variable_scope("towerB"): + vb = tf.compat.v1.get_variable("v", [1]) + self.assertEqual(vb.name, "root/towerB/v:0") + + with tf.compat.v1.variable_scope("towerA", reuse=True): + va2 = tf.compat.v1.get_variable("v", [1]) + self.assertIs(va2, va) + + with tf.compat.v1.variable_scope("foo"): + with tf.compat.v1.variable_scope("bar"): + v = tf.compat.v1.get_variable("v", [1]) + self.assertEqual(v.name, "root/foo/bar/v:0") + with tf.compat.v1.variable_scope(tower_a, reuse=True): + va3 = tf.compat.v1.get_variable("v", [1]) + self.assertIs(va, va3) + + with self.assertRaises(ValueError) as exc: + with tf.compat.v1.variable_scope(tower_a, reuse=True): + tf.compat.v1.get_variable("v", [2]) # Different shape. + self.assertEqual("shape" in str(exc.exception), True) + + with self.assertRaises(ValueError) as exc: + with tf.compat.v1.variable_scope(tower_a, reuse=True): + tf.compat.v1.get_variable("v", [1], dtype=tf.int32) + self.assertEqual("dtype" in str(exc.exception), True) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeOuterScope(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("outer") as outer: + pass + with tf.compat.v1.variable_scope(outer): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope("default"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + with tf.compat.v1.variable_scope(outer, reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope("default", reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarScopeNestedOuterScope(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("outer") as outer: + with tf.compat.v1.variable_scope(outer): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope("default"): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + with tf.compat.v1.variable_scope(outer, reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope("default", reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeReuseParam(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("outer") as outer: + with tf.compat.v1.variable_scope("tower", "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/tower/w:0", + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + with tf.compat.v1.variable_scope(outer) as outer: + with tf.compat.v1.variable_scope( + "tower", "default", reuse=True + ): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/tower/w:0", + ) + outer.reuse_variables() + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeReuseError(self): + with self.cached_session(): + with self.assertRaises(ValueError): + with tf.compat.v1.variable_scope(None, "default", reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/tower/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeOuterScope(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("outer") as outer: + pass + with tf.compat.v1.variable_scope(outer, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + with tf.compat.v1.variable_scope(outer, "default", reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + outer.reuse_variables() + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVarOpScopeNestedOuterScope(self): + with self.cached_session(): + with tf.compat.v1.variable_scope("outer") as outer: + with tf.compat.v1.variable_scope(outer, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + with tf.compat.v1.variable_scope(outer, "default", reuse=True): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "outer/w:0" + ) + with tf.compat.v1.variable_scope(None, "default", []): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testBasicWhenAuxiliaryNameScopeIsFalse(self): + with self.cached_session(): + with tf.compat.v1.variable_scope( + "scope", auxiliary_name_scope=False + ) as scope: + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "scope/w:0" + ) + with tf.compat.v1.variable_scope(scope, auxiliary_name_scope=False): + self.assertEqual( + tf.compat.v1.get_variable("w1", []).name, "scope/w1:0" + ) + + with tf.compat.v1.variable_scope("outer"): + with tf.compat.v1.variable_scope( + "inner", auxiliary_name_scope=False + ) as inner: + self.assertEqual(inner.original_name_scope, "outer/") + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/inner/w:0", + ) + with tf.compat.v1.variable_scope( + inner, auxiliary_name_scope=False + ) as inner1: + self.assertEqual(inner1.original_name_scope, "outer/") + self.assertEqual( + tf.compat.v1.get_variable("w1", []).name, + "outer/inner/w1:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self): + with self.cached_session(): + with tf.compat.v1.variable_scope( + None, default_name="default", auxiliary_name_scope=False + ): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, "default/w:0" + ) + + with tf.compat.v1.variable_scope("outer"): + with tf.compat.v1.variable_scope( + None, default_name="default", auxiliary_name_scope=False + ) as inner: + self.assertEqual(inner.original_name_scope, "outer/") + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/default/w:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self): + with self.cached_session(): + root_scope = tf.compat.v1.get_variable_scope() + with tf.compat.v1.variable_scope( + root_scope, auxiliary_name_scope=False + ): + self.assertEqual(tf.compat.v1.get_variable("w", []).name, "w:0") + + with tf.compat.v1.variable_scope("outer"): + with tf.compat.v1.variable_scope( + root_scope, auxiliary_name_scope=False + ) as inner: + self.assertEqual(inner.original_name_scope, "") + self.assertEqual( + tf.compat.v1.get_variable("w1", []).name, "w1:0" + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testAuxiliaryNameScopeIsInvalid(self): + with self.cached_session(): + with self.assertRaisesRegex(TypeError, "auxiliary_name_scope"): + with tf.compat.v1.variable_scope( + None, default_name="scope", auxiliary_name_scope="invalid" + ): + pass + + with self.assertRaisesRegex(TypeError, "auxiliary_name_scope"): + with tf.compat.v1.variable_scope( + "scope", auxiliary_name_scope="invalid" + ): + pass + + with tf.compat.v1.variable_scope("scope") as scope: + pass + with self.assertRaisesRegex(TypeError, "auxiliary_name_scope"): + with tf.compat.v1.variable_scope( + scope, auxiliary_name_scope="invalid" + ): + pass + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testReuseScopeWithoutNameScopeCollision(self): + # GitHub issue: #13429 + with self.cached_session(): + with tf.compat.v1.variable_scope("outer"): + with tf.compat.v1.variable_scope("inner") as inner: + pass + + with tf.compat.v1.variable_scope( + inner, auxiliary_name_scope=False + ) as scope: + with tf.name_scope(scope.original_name_scope): + self.assertEqual( + tf.compat.v1.get_variable("w", []).name, + "outer/inner/w:0", + ) + + with tf.compat.v1.variable_scope("another"): + with tf.compat.v1.variable_scope( + inner, auxiliary_name_scope=False + ) as scope1: + with tf.name_scope(scope1.original_name_scope): + self.assertEqual( + tf.compat.v1.get_variable("w1", []).name, + "outer/inner/w1:0", + ) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testGetVarWithDevice(self): + g = tf.Graph() + varname_type = [] + + def device_func(op): + if op.type in ["Variable", "VariableV2", "VarHandleOp"]: + varname_type.append((op.name, op.get_attr("dtype"))) + return "/device:GPU:0" + + with g.as_default(): + with tf.compat.v1.device(device_func): + _ = tf.compat.v1.get_variable("x", (100, 200)) + _ = tf.compat.v1.get_variable( + "y", dtype=tf.int64, initializer=numpy.arange(73) + ) + self.assertEqual(varname_type[0], ("x", tf.float32)) + self.assertEqual(varname_type[1], ("y", tf.int64)) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testGetVariableWithRefDtype(self): + v = tf.compat.v1.get_variable("v", shape=[3, 4], dtype=tf.float32) + # Ensure it is possible to do get_variable with a _ref dtype passed in. + _ = tf.compat.v1.get_variable("w", shape=[5, 6], dtype=v.dtype) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testGetVariableWithInitializerWhichTakesNoArgs(self): + v = tf.compat.v1.get_variable("foo", initializer=lambda: [2]) + self.assertEqual(v.name, "foo:0") + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testGetVariableWithInitializerWhichTakesOptionalArgs(self): + v = tf.compat.v1.get_variable("foo", initializer=lambda x=True: [2]) + self.assertEqual(v.name, "foo:0") + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testTwoGraphs(self): + def f(): + g1 = tf.Graph() + g2 = tf.Graph() + with g1.as_default(): + with g2.as_default(): + with tf.compat.v1.variable_scope("_"): + pass + + self.assertRaisesRegex( + ValueError, "'_' is not a valid (?:root )?scope name", f + ) class VariableScopeWithCustomGetterTest(tf.test.TestCase): + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testNonCallableGetterFails(self): + with self.assertRaisesRegex( + ValueError, r"custom_getter .* not callable:" + ): + with tf.compat.v1.variable_scope("scope0", custom_getter=3): + tf.compat.v1.get_variable("name0") + with self.assertRaisesRegex( + ValueError, r"custom_getter .* not callable:" + ): + tf.compat.v1.get_variable("name0", custom_getter=3) + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testNoSideEffectsWithIdentityCustomGetter(self): + called = [0] + + def custom_getter(getter, *args, **kwargs): + called[0] += 1 + return getter(*args, **kwargs) - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testNonCallableGetterFails(self): - with self.assertRaisesRegex(ValueError, r"custom_getter .* not callable:"): - with tf.compat.v1.variable_scope("scope0", custom_getter=3): - tf.compat.v1.get_variable("name0") - with self.assertRaisesRegex(ValueError, r"custom_getter .* not callable:"): - tf.compat.v1.get_variable("name0", custom_getter=3) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testNoSideEffectsWithIdentityCustomGetter(self): - called = [0] - - def custom_getter(getter, *args, **kwargs): - called[0] += 1 - return getter(*args, **kwargs) - - with tf.compat.v1.variable_scope( - "scope", custom_getter=custom_getter) as scope: - v = tf.compat.v1.get_variable("v", [1]) - with tf.compat.v1.variable_scope(scope, reuse=True): - v2 = tf.compat.v1.get_variable("v", [1]) - with tf.compat.v1.variable_scope("new_scope") as new_scope: - v3 = tf.compat.v1.get_variable("v3", [1]) - with tf.compat.v1.variable_scope( - new_scope, reuse=True, custom_getter=custom_getter): - v4 = tf.compat.v1.get_variable("v3", [1]) - - self.assertIs(v, v2) - self.assertIs(v3, v4) - self.assertEqual(3, called[0]) # skipped one in the first new_scope - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testSynchronizationAndAggregationWithCustomGetter(self): - called = [0] - synchronization = tf.VariableSynchronization.AUTO - aggregation = tf.compat.v1.VariableAggregation.NONE - - def custom_getter(getter, *args, **kwargs): - called[0] += 1 - - # Verify synchronization and aggregation kwargs are as expected. - self.assertEqual(kwargs["synchronization"], synchronization) - self.assertEqual(kwargs["aggregation"], aggregation) - return getter(*args, **kwargs) - - with tf.compat.v1.variable_scope("scope", custom_getter=custom_getter): - tf.compat.v1.get_variable("v", [1]) - self.assertEqual(1, called[0]) - - with tf.compat.v1.variable_scope("scope", custom_getter=custom_getter): - synchronization = tf.VariableSynchronization.ON_READ - aggregation = tf.compat.v1.VariableAggregation.MEAN - tf.compat.v1.get_variable( - "v1", [1], synchronization=synchronization, aggregation=aggregation) - - self.assertEqual(2, called[0]) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVariableCreator(self): - variable_names = [] - - def creator_a(next_creator, **kwargs): - variable_names.append(kwargs.get("name", "")) - return next_creator(**kwargs) - - def creator_b(next_creator, **kwargs): - kwargs["name"] = "forced_name" - return next_creator(**kwargs) - - with tf.variable_creator_scope(creator_a): - with tf.variable_creator_scope(creator_b): - tf.compat.v1.Variable(1.0, name="one_name") - - self.assertEqual(variable_names[0], "forced_name") - - called = [False] - - def creater_c(next_creator, **kwargs): - called[0] = True - self.assertEqual(kwargs["synchronization"], - tf.VariableSynchronization.ON_WRITE) - self.assertEqual(kwargs["aggregation"], - tf.compat.v1.VariableAggregation.MEAN) - return next_creator(**kwargs) - - with tf.variable_creator_scope(creater_c): - tf.compat.v1.get_variable( - "v", [], - synchronization=tf.VariableSynchronization.ON_WRITE, - aggregation=tf.compat.v1.VariableAggregation.MEAN) - self.assertTrue(called[0]) - - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testVariableCreatorNestingError(self): - - def creator(next_creator, **kwargs): - return next_creator(**kwargs) - - # Save the state so we can clean up at the end. - graph = tf.compat.v1.get_default_graph() - old_creator_stack = graph._variable_creator_stack - - try: - scope = tf.variable_creator_scope(creator) - scope.__enter__() - with tf.variable_creator_scope(creator): - with self.assertRaises(RuntimeError): - scope.__exit__(None, None, None) - finally: - graph._variable_creator_stack = old_creator_stack - - -class VariableScopeMultithreadedTest(tf.test.TestCase): + with tf.compat.v1.variable_scope( + "scope", custom_getter=custom_getter + ) as scope: + v = tf.compat.v1.get_variable("v", [1]) + with tf.compat.v1.variable_scope(scope, reuse=True): + v2 = tf.compat.v1.get_variable("v", [1]) + with tf.compat.v1.variable_scope("new_scope") as new_scope: + v3 = tf.compat.v1.get_variable("v3", [1]) + with tf.compat.v1.variable_scope( + new_scope, reuse=True, custom_getter=custom_getter + ): + v4 = tf.compat.v1.get_variable("v3", [1]) + + self.assertIs(v, v2) + self.assertIs(v3, v4) + self.assertEqual(3, called[0]) # skipped one in the first new_scope + + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testSynchronizationAndAggregationWithCustomGetter(self): + called = [0] + synchronization = tf.VariableSynchronization.AUTO + aggregation = tf.compat.v1.VariableAggregation.NONE + + def custom_getter(getter, *args, **kwargs): + called[0] += 1 + + # Verify synchronization and aggregation kwargs are as expected. + self.assertEqual(kwargs["synchronization"], synchronization) + self.assertEqual(kwargs["aggregation"], aggregation) + return getter(*args, **kwargs) + + with tf.compat.v1.variable_scope("scope", custom_getter=custom_getter): + tf.compat.v1.get_variable("v", [1]) + self.assertEqual(1, called[0]) + + with tf.compat.v1.variable_scope("scope", custom_getter=custom_getter): + synchronization = tf.VariableSynchronization.ON_READ + aggregation = tf.compat.v1.VariableAggregation.MEAN + tf.compat.v1.get_variable( + "v1", + [1], + synchronization=synchronization, + aggregation=aggregation, + ) - @tf_test_utils.run_in_graph_and_eager_modes - @run_inside_wrap_function_in_eager_mode - def testReenterMainScope(self): + self.assertEqual(2, called[0]) - def thread_fn(graph, main_thread_scope): - with graph.as_default(): - # Variable created with main scope will have prefix "main". - with tf.compat.v1.variable_scope(main_thread_scope): - with tf.compat.v1.variable_scope("foo"): - v = tf.compat.v1.get_variable("v", []) - self.assertEqual("main/foo/v:0", v.name) + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVariableCreator(self): + variable_names = [] - # Variable created outside main scope will not have prefix "main". - with tf.compat.v1.variable_scope("bar"): - v = tf.compat.v1.get_variable("v", []) - self.assertEqual("bar/v:0", v.name) + def creator_a(next_creator, **kwargs): + variable_names.append(kwargs.get("name", "")) + return next_creator(**kwargs) - graph = tf.compat.v1.get_default_graph() - with tf.compat.v1.variable_scope("main") as main_thread_scope: - thread = threading.Thread( - target=thread_fn, args=(graph, main_thread_scope)) - thread.start() - thread.join() + def creator_b(next_creator, **kwargs): + kwargs["name"] = "forced_name" + return next_creator(**kwargs) + with tf.variable_creator_scope(creator_a): + with tf.variable_creator_scope(creator_b): + tf.compat.v1.Variable(1.0, name="one_name") -class CompatV1TemplateScaleByY(base_layer.Layer): + self.assertEqual(variable_names[0], "forced_name") - def __init__(self, **kwargs): - super().__init__(**kwargs) - def my_op(x, scalar_name): - var1 = tf.compat.v1.get_variable( - scalar_name, - shape=[], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.constant_initializer(1.5)) - return x * var1 - self.scale_by_y = tf.compat.v1.make_template( - "scale_by_y", my_op, scalar_name="y") + called = [False] - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - with tf.compat.v1.variable_scope("foo"): - return self.scale_by_y(inputs) + def creater_c(next_creator, **kwargs): + called[0] = True + self.assertEqual( + kwargs["synchronization"], tf.VariableSynchronization.ON_WRITE + ) + self.assertEqual( + kwargs["aggregation"], tf.compat.v1.VariableAggregation.MEAN + ) + return next_creator(**kwargs) + + with tf.variable_creator_scope(creater_c): + tf.compat.v1.get_variable( + "v", + [], + synchronization=tf.VariableSynchronization.ON_WRITE, + aggregation=tf.compat.v1.VariableAggregation.MEAN, + ) + self.assertTrue(called[0]) + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testVariableCreatorNestingError(self): + def creator(next_creator, **kwargs): + return next_creator(**kwargs) -class VariableScopeModule(tf.Module): - """Module that uses the shim.""" + # Save the state so we can clean up at the end. + graph = tf.compat.v1.get_default_graph() + old_creator_stack = graph._variable_creator_stack - @variable_scope_shim.track_tf1_style_variables - def __call__(self, *args, **kwargs): - with self.name_scope: - return self.forward_pass(*args, **kwargs) + try: + scope = tf.variable_creator_scope(creator) + scope.__enter__() + with tf.variable_creator_scope(creator): + with self.assertRaises(RuntimeError): + scope.__exit__(None, None, None) + finally: + graph._variable_creator_stack = old_creator_stack - def get_compat_v1_regularization_losses(self): - """Dict w/ regularization losses from `get_variable`&`compat.v1.layers`.""" - return {name: regularizer() for name, regularizer - in self._tf1_style_var_store._regularizers.items()} # pylint: disable=protected-access +class VariableScopeMultithreadedTest(tf.test.TestCase): + @tf_test_utils.run_in_graph_and_eager_modes + @run_inside_wrap_function_in_eager_mode + def testReenterMainScope(self): + def thread_fn(graph, main_thread_scope): + with graph.as_default(): + # Variable created with main scope will have prefix "main". + with tf.compat.v1.variable_scope(main_thread_scope): + with tf.compat.v1.variable_scope("foo"): + v = tf.compat.v1.get_variable("v", []) + self.assertEqual("main/foo/v:0", v.name) + + # Variable created outside main scope will not have prefix + # "main". + with tf.compat.v1.variable_scope("bar"): + v = tf.compat.v1.get_variable("v", []) + self.assertEqual("bar/v:0", v.name) + + graph = tf.compat.v1.get_default_graph() + with tf.compat.v1.variable_scope("main") as main_thread_scope: + thread = threading.Thread( + target=thread_fn, args=(graph, main_thread_scope) + ) + thread.start() + thread.join() -@test_combinations.generate(test_combinations.combine(mode=["eager"])) -class TF1VariableScopeLayerTest(tf.test.TestCase, parameterized.TestCase): - def test_get_variable(self): - # Test the shim when using `get_variable` (and regularizers) directly - - class WrappedDenseLayer(base_layer.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs, training=None): - out = inputs - with tf.compat.v1.variable_scope("dense_one"): - # The weights are created with a `regularizer`, - # so the layer should track their regularization losses - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.zeros_initializer(), - name="bias") - out = tf.matmul(out, kernel) - out = tf.nn.bias_add(out, bias) - with tf.compat.v1.variable_scope("nested_scope"): - with tf.compat.v1.variable_scope("dense_two"): - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.zeros_initializer(), - name="bias") - out = tf.matmul(out, kernel) - out = tf.nn.bias_add(out, bias) - return out - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, regularization losses, + variables were made - self.assertEqual(weights.keys(), {"dense_one/bias:0", - "dense_one/kernel:0", - "nested_scope/dense_two/bias:0", - "nested_scope/dense_two/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) - self.assertAllEqual(tf.add_n(layer.losses), 1.5) - - # Verify reuse by updating the variables then re-running - weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) - weights["nested_scope/dense_two/kernel:0"].assign( - tf.ones(shape=(10, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) - self.assertAllEqual(tf.add_n(layer.losses), 6) - - def test_compat_v1_layer(self): - # Test the shim when using `compat.v1` layers - - class WrappedDenseLayer(base_layer.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs, training=None): - out = core_layers.dense( - inputs, self.units, name="dense_one", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - with tf.compat.v1.variable_scope("nested_scope"): - out = core_layers.dense( - out, self.units, name="dense_two", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - return out - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, losses, + variables were made - self.assertEqual(weights.keys(), {"dense_one/bias:0", - "dense_one/kernel:0", - "nested_scope/dense_two/bias:0", - "nested_scope/dense_two/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) - self.assertAllEqual(tf.add_n(layer.losses), 1.5) - - # Verify reuse by updating the variables then re-running - weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) - weights["nested_scope/dense_two/kernel:0"].assign( - tf.ones(shape=(10, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) - self.assertAllEqual(tf.add_n(layer.losses), 6) - - def test_shim_exporting(self): - - class WrappedDenseLayer(base_layer.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs, training=None): - out = core_layers.dense( - inputs, - self.units, - name="dense_one", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - with tf.compat.v1.variable_scope("nested_scope"): - out = core_layers.dense( - out, - self.units, - name="dense_two", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - return out - - layer = WrappedDenseLayer(10) - layer(tf.ones(shape=(5, 5))) - - tmp_dir = self.get_temp_dir() - - # Try exporting the layer directly - tf.saved_model.save(layer, tmp_dir) - - # Try exporting the layer nested in a functional model - # This is where saving reflection gets tricky due to - # trying to replace the passed training arg in training=True - # and training=False modes - inp = input_layer_module.Input(shape=(5, 5)) - outs = layer(inp) - model = models.Model(inp, outs) - tf.saved_model.save(model, tmp_dir) - - def test_variable_store_scope_get_variable(self): - # Test the module shim when using `get_variable` (and regularizers) directly - - class WrappedDenseLayer(tf.Module): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - self._variable_store = variable_scope_shim._EagerVariableStore() - - def get_compat_v1_regularization_losses(self): - """Dict w/ regularization losses from `get_variable`.""" - return {name: regularizer() for name, regularizer - in self._variable_store._regularizers.items()} # pylint: disable=protected-access - - def __call__(self, inputs, training=None): - with self._variable_store.scope(): - out = inputs - with tf.compat.v1.variable_scope("dense_one"): - # The weights are created with a `regularizer`, - # so the layer should track their regularization losses - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.zeros_initializer(), - name="bias") - out = tf.matmul(out, kernel) - out = tf.nn.bias_add(out, bias) - with tf.compat.v1.variable_scope("nested_scope"): - with tf.compat.v1.variable_scope("dense_two"): - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.zeros_initializer(), - name="bias") - out = tf.matmul(out, kernel) - out = tf.nn.bias_add(out, bias) - return out - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, regularization losses, + variables were made - self.assertEqual(weights.keys(), {"dense_one/bias:0", - "dense_one/kernel:0", - "nested_scope/dense_two/bias:0", - "nested_scope/dense_two/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) - self.assertAllEqual( - tf.add_n(layer.get_compat_v1_regularization_losses().values()), 1.5) - - # Verify reuse by updating the variables then re-running - weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) - weights["nested_scope/dense_two/kernel:0"].assign( - tf.ones(shape=(10, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) - self.assertAllEqual( - tf.add_n(layer.get_compat_v1_regularization_losses().values()), 6) - - def test_module_get_variable(self): - # Test the module shim when using `get_variable` (and regularizers) directly - - class WrappedDenseLayer(VariableScopeModule): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def forward_pass(self, inputs, training=None): - out = inputs - with tf.compat.v1.variable_scope("dense_one"): - # The weights are created with a `regularizer`, - # so the layer should track their regularization losses - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.zeros_initializer(), - name="bias") - out = tf.matmul(out, kernel) - out = tf.nn.bias_add(out, bias) - with tf.compat.v1.variable_scope("nested_scope"): - with tf.compat.v1.variable_scope("dense_two"): - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.zeros_initializer(), - name="bias") - out = tf.matmul(out, kernel) - out = tf.nn.bias_add(out, bias) - return out - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, regularization losses, + variables were made - self.assertEqual(weights.keys(), {"dense_one/bias:0", - "dense_one/kernel:0", - "nested_scope/dense_two/bias:0", - "nested_scope/dense_two/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) - self.assertAllEqual( - tf.add_n(layer.get_compat_v1_regularization_losses().values()), 1.5) - - # Verify reuse by updating the variables then re-running - weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) - weights["nested_scope/dense_two/kernel:0"].assign( - tf.ones(shape=(10, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) - self.assertAllEqual( - tf.add_n(layer.get_compat_v1_regularization_losses().values()), 6) - - def test_module_compat_v1_layer(self): - # Test the module shim when using `compat.v1` layers - - class WrappedDenseLayer(VariableScopeModule): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def forward_pass(self, inputs, training=None): - out = core_layers.dense( - inputs, self.units, name="dense_one", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - with tf.compat.v1.variable_scope("nested_scope"): - out = core_layers.dense( - out, self.units, name="dense_two", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - return out - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, losses, + variables were made - self.assertEqual(weights.keys(), {"dense_one/bias:0", - "dense_one/kernel:0", - "nested_scope/dense_two/bias:0", - "nested_scope/dense_two/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) - self.assertAllEqual(tf.add_n( - layer.get_compat_v1_regularization_losses().values()), 1.5) - - # Verify reuse by updating the variables then re-running - weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) - weights["nested_scope/dense_two/kernel:0"].assign( - tf.ones(shape=(10, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) - self.assertAllEqual(tf.add_n( - layer.get_compat_v1_regularization_losses().values()), 6) - - def test_shim_nesting(self): - # Test that nesting the shim in itself works - - class NestedLayer(base_layer.Layer): - - def __init__(self, units, name, *args, **kwargs): - super().__init__(*args, name=name, **kwargs) - self.units = units - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - out = inputs - with tf.compat.v1.variable_scope(self.name): - # The weights are created with a `regularizer`, - # so the layer should track their regularization losses - kernel = tf.compat.v1.get_variable( - shape=[out.shape[-1], self.units], - regularizer=regularizers.L2(1.0), - initializer=tf.compat.v1.ones_initializer(), - name="kernel") - bias = tf.compat.v1.get_variable( - shape=[self.units,], - initializer=tf.compat.v1.initializers.zeros, - name="bias") - out = tf.linalg.matmul(out, kernel) - out = tf.compat.v1.nn.bias_add(out, bias) - return out - - class WrappedDenseLayer(base_layer.Layer): - - def __init__(self, units, **kwargs): - super().__init__(**kwargs) - self.units = units - self.dense_layer_a = None - self.dense_layer_b = None - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - # Only create the nested tf.variable/module/layer/model if it has not - # already been created! - if not self.dense_layer_a: - self.dense_layer_a = NestedLayer(self.units * 2, "dense_one") - out = self.dense_layer_a(inputs) - if not self.dense_layer_b: - self.dense_layer_b = NestedLayer(self.units, "dense_two") - out = self.dense_layer_b(out) - return out - - layer = WrappedDenseLayer(5) - out = layer(tf.ones(shape=(1, 3))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, losses, + variables were made - # (Specifically: no double-counting of any weights or reg. losses - # between nested components!) - self.assertEqual({var.name for var in layer.trainable_weights}, - {"dense_one/bias:0", - "dense_one/kernel:0", - "dense_two/bias:0", - "dense_two/kernel:0"}) - self.assertEqual({var.name for var in layer.dense_layer_a.weights}, - {"dense_one/bias:0", - "dense_one/kernel:0"}) - self.assertEqual({var.name for var in layer.dense_layer_b.weights}, - {"dense_two/bias:0", - "dense_two/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(1, 5)) * 30) - self.assertAllEqual(tf.add_n(layer.dense_layer_a.losses), 30) - self.assertAllEqual(tf.add_n(layer.dense_layer_b.losses), 50) - self.assertAllEqual(tf.add_n(layer.losses), 80) - - # Verify reuse by updating the variables then re-running - weights["dense_one/kernel:0"].assign(tf.ones(shape=(3, 10)) * 2) - weights["dense_two/kernel:0"].assign( - tf.ones(shape=(10, 5)) * 2) - out = layer(tf.ones(shape=(1, 3))) - self.assertAllEqual(out, tf.ones(shape=(1, 5)) * 120) - self.assertAllEqual(tf.add_n(layer.losses), 320) - - def test_compat_v1_make_template_in_shim_eager(self): - # Test the shim when using `compat.v1.make_template` - # Verify it works correctly in eager - layer = CompatV1TemplateScaleByY() - for _ in range(3): - # Use multiple calls to verify that no new weights get created - self.assertAllEqual(layer(tf.ones(shape=(2, 3))), - tf.constant(1.5, shape=(2, 3))) - self.assertAllEqual({var.name: var.numpy() for var in layer.weights}, - {"foo/scale_by_y/y:0": 1.5}) - self.assertAllEqual(tf.add_n(layer.losses), - regularizers.L2()(layer.weights[0])) - - def test_compat_v1_make_template_in_shim_tf_function(self): - # Test the shim when using `compat.v1.make_template` - # Verify it works correctly in a tf.function - # when made outside the function - layer = CompatV1TemplateScaleByY() - - @tf.function - def foo(x): - return layer(x), tf.add_n(layer.losses) - - for _ in range(3): - # Use multiple calls to verify that no new weights get created - out, loss = foo(tf.ones(shape=(2, 3))) - self.assertAllEqual(out, tf.constant(1.5, shape=(2, 3))) - self.assertAllEqual(loss, regularizers.L2()(layer.weights[0])) - self.assertAllEqual({var.name: var.numpy() for var in layer.weights}, - {"foo/scale_by_y/y:0": 1.5}) - - def test_compat_v1_make_template_in_trace_in_shim(self): - # Test the shim when using `compat.v1.make_template` - # Verify it works correctly when the make_template/layer/shim - # is created on the first tf.function trace! - layers = {} - @tf.function - def bar(x): - if "layer" not in layers: - layers["layer"] = CompatV1TemplateScaleByY() - layer = layers["layer"] - return layer(x), tf.add_n(layer.losses) - - for _ in range(3): - # Use multiple calls to verify that no new weights get created - out, loss = bar(tf.ones(shape=(2, 3))) - self.assertAllEqual(out, tf.constant(1.5, shape=(2, 3))) - self.assertAllEqual(loss, regularizers.L2()(layers["layer"].weights[0])) - self.assertAllEqual( - {var.name: var.numpy() for var in layers["layer"].weights}, - {"foo/scale_by_y/y:0": 1.5}) - - def test_only_track_get_variable(self): - # Test the shim does not try tracking or reusing variables - # that were not created by get_variable. These variables/modules/layers - # need to be tracked separately - - class WrappedDenseLayer(base_layer.Layer): - - def __init__(self, units, **kwargs): +class CompatV1TemplateScaleByY(base_layer.Layer): + def __init__(self, **kwargs): super().__init__(**kwargs) - self.units = units - self._dense_model = None - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - dense_layer = core.Dense( - self.units, name="dense", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - return dense_layer(inputs) - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 5) - self.assertEmpty(layer.weights) - - def test_embedded_keras_model(self): - # Test the shim when embedding a Keras model inside of it - # And assigning the model to an attribute + def my_op(x, scalar_name): + var1 = tf.compat.v1.get_variable( + scalar_name, + shape=[], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.constant_initializer(1.5), + ) + return x * var1 - class WrappedDenseLayer(base_layer.Layer): + self.scale_by_y = tf.compat.v1.make_template( + "scale_by_y", my_op, scalar_name="y" + ) - def __init__(self, units, **kwargs): - super().__init__(**kwargs) - self.units = units - self._dense_model = None - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - if not self._dense_model: - inp = input_layer_module.Input(shape=inputs.shape) - dense_layer = core.Dense( - self.units, name="dense", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - self._dense_model = training_module.Model( - inputs=inp, outputs=dense_layer(inp)) - return self._dense_model(inputs) - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, losses, + variables were made - self.assertEqual(weights.keys(), {"dense/bias:0", - "dense/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 5) - self.assertAllEqual(tf.add_n(layer.losses), 0.5) - - # Verify reuse by updating the variables then re-running - weights["dense/kernel:0"].assign( - tf.ones(shape=(5, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 10) - self.assertAllEqual(tf.add_n(layer.losses), 2) - - def test_embedded_keras_model_in_module(self): - # Test the module shim when embedding a Keras model inside of it - # And assigning the model to an attribute - - class WrappedDenseLayer(VariableScopeModule): - - def __init__(self, units, **kwargs): - super().__init__(**kwargs) - self.units = units - self._dense_model = None - - def forward_pass(self, inputs): - if not self._dense_model: - inp = input_layer_module.Input(shape=inputs.shape) - dense_layer = core.Dense( - self.units, name="dense", - kernel_initializer=tf.compat.v1.ones_initializer(), - kernel_regularizer="l2") - self._dense_model = training_module.Model( - inputs=inp, outputs=dense_layer(inp)) - return self._dense_model(inputs) - - layer = WrappedDenseLayer(10) - out = layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct output, losses, + variables were made - self.assertEqual(weights.keys(), {"dense/bias:0", - "dense/kernel:0"}) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 5) - - # The module shim will only track regularization losses made by - # compat.v1.layers and compat.v1.get_variable. Other regularization - # losses must be tracked by separate user-created mechanisms. - self.assertEmpty(layer.get_compat_v1_regularization_losses()) - - # Verify reuse by updating the variables then re-running - weights["dense/kernel:0"].assign( - tf.ones(shape=(5, 10)) * 2) - out = layer(tf.ones(shape=(5, 5))) - self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 10) - - # The module shim will only track regularization losses made by - # compat.v1.layers and compat.v1.get_variable. Other regularization - # losses must be tracked by separate user-created mechanisms. - self.assertEmpty(layer.get_compat_v1_regularization_losses()) - - def test_training_arg(self): - # Test the shim when passing in a Keras `training` arg - - class TrainingCheckLayer(base_layer.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs, training=None): - if training: - out = core_layers.dense(inputs, self.units, name="dense_training") - else: - out = core_layers.dense(inputs, self.units, name="dense_no_training") - return out - - layer = TrainingCheckLayer(10) - layer(tf.ones(shape=(5, 5)), training=True) - weights = {x.name: x for x in layer.variables} - - # Verify the correct variables were made - self.assertEqual(weights.keys(), - {"dense_training/bias:0", "dense_training/kernel:0"}) - - layer = TrainingCheckLayer(10) - layer(tf.ones(shape=(5, 5))) - weights = {x.name: x for x in layer.variables} - - # Verify the correct variables were made - self.assertEqual(weights.keys(), - {"dense_no_training/bias:0", "dense_no_training/kernel:0"}) - - def test_incorrect_decoration(self): - # Raise an error if you incorrectly decorate a method - # that is not a method of a Module, layer, or model: @variable_scope_shim.track_tf1_style_variables - def foo(x): - return x * 2 - - with self.assertRaisesRegex(ValueError, "does not extend"): - foo(tf.ones(shape=(4, 4))) - - -class GetOrCreateLayerTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def test_get_or_create_layer_with_regularizer_eager(self): - - class NestedLayer(base_layer.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def build_model(self): - inp = input_layer_module.Input(shape=(5, 5)) - dense_layer = core.Dense( - 10, name="dense", kernel_regularizer="l2", - kernel_initializer=tf.compat.v1.ones_initializer()) - model = training_module.Model(inputs=inp, outputs=dense_layer(inp)) - return model - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - # enter a variable scope to check module key naming - with tf.compat.v1.variable_scope("test_scope"): - model = variable_scope_shim.get_or_create_layer( - "dense_model", self.build_model) - return model(inputs) - - layer = NestedLayer(10) - x = tf.ones(shape=(5, 5)) - - out1 = layer(tf.expand_dims(x, 0)) - - model1 = layer.submodules[0]._layers["test_scope/dense_model"] - - out2 = layer(tf.expand_dims(x, 0)) - # Verify model produces same output on successive calls with same input - self.assertAllEqual(out1, out2) - - # Verify the model used on subsequent calls is the same - model2 = layer.submodules[0]._layers["test_scope/dense_model"] - self.assertIs(model1, model2) - - # Verify that stored layer computes outputs and losses correctly - weights = {x.name: x for x in layer.variables} - self.assertEqual(weights.keys(), {"dense/bias:0", "dense/kernel:0"}) - self.assertAllEqual(out2, tf.ones(shape=(1, 5, 10)) * 5) - self.assertAllEqual(layer.losses, [0.5]) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def test_get_or_create_layer_no_regularizer_eager(self): - - class NestedLayer(base_layer.Layer): - - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - - def build_model(self): - inp = input_layer_module.Input(shape=(5, 5)) - dense_layer = core.Dense( - 10, name="dense", - kernel_initializer=tf.compat.v1.ones_initializer()) - model = training_module.Model(inputs=inp, outputs=dense_layer(inp)) - return model - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - # enter a variable scope to check module key naming - with tf.compat.v1.variable_scope("test_scope"): - model = variable_scope_shim.get_or_create_layer( - "dense_model", self.build_model) - return model(inputs) - - layer = NestedLayer(10) - x = tf.ones(shape=(5, 5)) - - out1 = layer(tf.expand_dims(x, 0)) - - model1 = layer.submodules[0]._layers["test_scope/dense_model"] - - out2 = layer(tf.expand_dims(x, 0)) - # Verify model produces same output on successive calls with same input - self.assertAllEqual(out1, out2) + def call(self, inputs): + with tf.compat.v1.variable_scope("foo"): + return self.scale_by_y(inputs) - # Verify the model used on subsequent calls is the same - model2 = layer.submodules[0]._layers["test_scope/dense_model"] - self.assertIs(model1, model2) - # Verify that stored layer computes outputs and losses correctly - weights = {x.name: x for x in layer.variables} - self.assertEqual(weights.keys(), {"dense/bias:0", "dense/kernel:0"}) - self.assertAllEqual(out2, tf.ones(shape=(1, 5, 10)) * 5) - self.assertAllEqual(layer.losses, [0.0]) +class VariableScopeModule(tf.Module): + """Module that uses the shim.""" - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def test_get_or_create_layer_tf_function(self): + @variable_scope_shim.track_tf1_style_variables + def __call__(self, *args, **kwargs): + with self.name_scope: + return self.forward_pass(*args, **kwargs) - class NestedLayer(base_layer.Layer): + def get_compat_v1_regularization_losses(self): + """Dict w/ regularization losses from + `get_variable`&`compat.v1.layers`.""" + return { + name: regularizer() + for name, regularizer in self._tf1_style_var_store._regularizers.items() # noqa: E501 + } - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units - def build_model(self): +@test_combinations.generate(test_combinations.combine(mode=["eager"])) +class TF1VariableScopeLayerTest(tf.test.TestCase, parameterized.TestCase): + def test_get_variable(self): + # Test the shim when using `get_variable` (and regularizers) directly + + class WrappedDenseLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs, training=None): + out = inputs + with tf.compat.v1.variable_scope("dense_one"): + # The weights are created with a `regularizer`, + # so the layer should track their regularization losses + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.zeros_initializer(), + name="bias", + ) + out = tf.matmul(out, kernel) + out = tf.nn.bias_add(out, bias) + with tf.compat.v1.variable_scope("nested_scope"): + with tf.compat.v1.variable_scope("dense_two"): + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.zeros_initializer(), + name="bias", + ) + out = tf.matmul(out, kernel) + out = tf.nn.bias_add(out, bias) + return out + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, regularization losses, + variables were + # made + self.assertEqual( + weights.keys(), + { + "dense_one/bias:0", + "dense_one/kernel:0", + "nested_scope/dense_two/bias:0", + "nested_scope/dense_two/kernel:0", + }, + ) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) + self.assertAllEqual(tf.add_n(layer.losses), 1.5) + + # Verify reuse by updating the variables then re-running + weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + weights["nested_scope/dense_two/kernel:0"].assign( + tf.ones(shape=(10, 10)) * 2 + ) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) + self.assertAllEqual(tf.add_n(layer.losses), 6) + + def test_compat_v1_layer(self): + # Test the shim when using `compat.v1` layers + + class WrappedDenseLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs, training=None): + out = core_layers.dense( + inputs, + self.units, + name="dense_one", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + with tf.compat.v1.variable_scope("nested_scope"): + out = core_layers.dense( + out, + self.units, + name="dense_two", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + return out + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, losses, + variables were made + self.assertEqual( + weights.keys(), + { + "dense_one/bias:0", + "dense_one/kernel:0", + "nested_scope/dense_two/bias:0", + "nested_scope/dense_two/kernel:0", + }, + ) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) + self.assertAllEqual(tf.add_n(layer.losses), 1.5) + + # Verify reuse by updating the variables then re-running + weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + weights["nested_scope/dense_two/kernel:0"].assign( + tf.ones(shape=(10, 10)) * 2 + ) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) + self.assertAllEqual(tf.add_n(layer.losses), 6) + + def test_shim_exporting(self): + class WrappedDenseLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs, training=None): + out = core_layers.dense( + inputs, + self.units, + name="dense_one", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + with tf.compat.v1.variable_scope("nested_scope"): + out = core_layers.dense( + out, + self.units, + name="dense_two", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + return out + + layer = WrappedDenseLayer(10) + layer(tf.ones(shape=(5, 5))) + + tmp_dir = self.get_temp_dir() + + # Try exporting the layer directly + tf.saved_model.save(layer, tmp_dir) + + # Try exporting the layer nested in a functional model + # This is where saving reflection gets tricky due to + # trying to replace the passed training arg in training=True + # and training=False modes inp = input_layer_module.Input(shape=(5, 5)) - dense_layer = core.Dense( - 10, name="dense", kernel_regularizer="l2", + outs = layer(inp) + model = models.Model(inp, outs) + tf.saved_model.save(model, tmp_dir) + + def test_variable_store_scope_get_variable(self): + # Test the module shim when using `get_variable` (and regularizers) + # directly + + class WrappedDenseLayer(tf.Module): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + self._variable_store = variable_scope_shim._EagerVariableStore() + + def get_compat_v1_regularization_losses(self): + """Dict w/ regularization losses from `get_variable`.""" + return { + name: regularizer() + for name, regularizer in self._variable_store._regularizers.items() # noqa: E501 + } + + def __call__(self, inputs, training=None): + with self._variable_store.scope(): + out = inputs + with tf.compat.v1.variable_scope("dense_one"): + # The weights are created with a `regularizer`, + # so the layer should track their regularization losses + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.zeros_initializer(), + name="bias", + ) + out = tf.matmul(out, kernel) + out = tf.nn.bias_add(out, bias) + with tf.compat.v1.variable_scope("nested_scope"): + with tf.compat.v1.variable_scope("dense_two"): + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.zeros_initializer(), + name="bias", + ) + out = tf.matmul(out, kernel) + out = tf.nn.bias_add(out, bias) + return out + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, regularization losses, + variables were + # made + self.assertEqual( + weights.keys(), + { + "dense_one/bias:0", + "dense_one/kernel:0", + "nested_scope/dense_two/bias:0", + "nested_scope/dense_two/kernel:0", + }, + ) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) + self.assertAllEqual( + tf.add_n(layer.get_compat_v1_regularization_losses().values()), 1.5 + ) + + # Verify reuse by updating the variables then re-running + weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + weights["nested_scope/dense_two/kernel:0"].assign( + tf.ones(shape=(10, 10)) * 2 + ) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) + self.assertAllEqual( + tf.add_n(layer.get_compat_v1_regularization_losses().values()), 6 + ) + + def test_module_get_variable(self): + # Test the module shim when using `get_variable` (and regularizers) + # directly + + class WrappedDenseLayer(VariableScopeModule): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def forward_pass(self, inputs, training=None): + out = inputs + with tf.compat.v1.variable_scope("dense_one"): + # The weights are created with a `regularizer`, + # so the layer should track their regularization losses + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.zeros_initializer(), + name="bias", + ) + out = tf.matmul(out, kernel) + out = tf.nn.bias_add(out, bias) + with tf.compat.v1.variable_scope("nested_scope"): + with tf.compat.v1.variable_scope("dense_two"): + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.zeros_initializer(), + name="bias", + ) + out = tf.matmul(out, kernel) + out = tf.nn.bias_add(out, bias) + return out + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, regularization losses, + variables were + # made + self.assertEqual( + weights.keys(), + { + "dense_one/bias:0", + "dense_one/kernel:0", + "nested_scope/dense_two/bias:0", + "nested_scope/dense_two/kernel:0", + }, + ) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) + self.assertAllEqual( + tf.add_n(layer.get_compat_v1_regularization_losses().values()), 1.5 + ) + + # Verify reuse by updating the variables then re-running + weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + weights["nested_scope/dense_two/kernel:0"].assign( + tf.ones(shape=(10, 10)) * 2 + ) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) + self.assertAllEqual( + tf.add_n(layer.get_compat_v1_regularization_losses().values()), 6 + ) + + def test_module_compat_v1_layer(self): + # Test the module shim when using `compat.v1` layers + + class WrappedDenseLayer(VariableScopeModule): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def forward_pass(self, inputs, training=None): + out = core_layers.dense( + inputs, + self.units, + name="dense_one", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + with tf.compat.v1.variable_scope("nested_scope"): + out = core_layers.dense( + out, + self.units, + name="dense_two", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + return out + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, losses, + variables were made + self.assertEqual( + weights.keys(), + { + "dense_one/bias:0", + "dense_one/kernel:0", + "nested_scope/dense_two/bias:0", + "nested_scope/dense_two/kernel:0", + }, + ) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 50) + self.assertAllEqual( + tf.add_n(layer.get_compat_v1_regularization_losses().values()), 1.5 + ) + + # Verify reuse by updating the variables then re-running + weights["dense_one/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + weights["nested_scope/dense_two/kernel:0"].assign( + tf.ones(shape=(10, 10)) * 2 + ) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 200) + self.assertAllEqual( + tf.add_n(layer.get_compat_v1_regularization_losses().values()), 6 + ) + + def test_shim_nesting(self): + # Test that nesting the shim in itself works + + class NestedLayer(base_layer.Layer): + def __init__(self, units, name, *args, **kwargs): + super().__init__(*args, name=name, **kwargs) + self.units = units + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + out = inputs + with tf.compat.v1.variable_scope(self.name): + # The weights are created with a `regularizer`, + # so the layer should track their regularization losses + kernel = tf.compat.v1.get_variable( + shape=[out.shape[-1], self.units], + regularizer=regularizers.L2(1.0), + initializer=tf.compat.v1.ones_initializer(), + name="kernel", + ) + bias = tf.compat.v1.get_variable( + shape=[ + self.units, + ], + initializer=tf.compat.v1.initializers.zeros, + name="bias", + ) + out = tf.linalg.matmul(out, kernel) + out = tf.compat.v1.nn.bias_add(out, bias) + return out + + class WrappedDenseLayer(base_layer.Layer): + def __init__(self, units, **kwargs): + super().__init__(**kwargs) + self.units = units + self.dense_layer_a = None + self.dense_layer_b = None + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + # Only create the nested tf.variable/module/layer/model if it + # has not already been created! + if not self.dense_layer_a: + self.dense_layer_a = NestedLayer( + self.units * 2, "dense_one" + ) + out = self.dense_layer_a(inputs) + if not self.dense_layer_b: + self.dense_layer_b = NestedLayer(self.units, "dense_two") + out = self.dense_layer_b(out) + return out + + layer = WrappedDenseLayer(5) + out = layer(tf.ones(shape=(1, 3))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, losses, + variables were made + # (Specifically: no double-counting of any weights or reg. losses + # between nested components!) + self.assertEqual( + {var.name for var in layer.trainable_weights}, + { + "dense_one/bias:0", + "dense_one/kernel:0", + "dense_two/bias:0", + "dense_two/kernel:0", + }, + ) + self.assertEqual( + {var.name for var in layer.dense_layer_a.weights}, + {"dense_one/bias:0", "dense_one/kernel:0"}, + ) + self.assertEqual( + {var.name for var in layer.dense_layer_b.weights}, + {"dense_two/bias:0", "dense_two/kernel:0"}, + ) + self.assertAllEqual(out, tf.ones(shape=(1, 5)) * 30) + self.assertAllEqual(tf.add_n(layer.dense_layer_a.losses), 30) + self.assertAllEqual(tf.add_n(layer.dense_layer_b.losses), 50) + self.assertAllEqual(tf.add_n(layer.losses), 80) + + # Verify reuse by updating the variables then re-running + weights["dense_one/kernel:0"].assign(tf.ones(shape=(3, 10)) * 2) + weights["dense_two/kernel:0"].assign(tf.ones(shape=(10, 5)) * 2) + out = layer(tf.ones(shape=(1, 3))) + self.assertAllEqual(out, tf.ones(shape=(1, 5)) * 120) + self.assertAllEqual(tf.add_n(layer.losses), 320) + + def test_compat_v1_make_template_in_shim_eager(self): + # Test the shim when using `compat.v1.make_template` + # Verify it works correctly in eager + layer = CompatV1TemplateScaleByY() + for _ in range(3): + # Use multiple calls to verify that no new weights get created + self.assertAllEqual( + layer(tf.ones(shape=(2, 3))), tf.constant(1.5, shape=(2, 3)) ) - model = training_module.Model(inputs=inp, outputs=dense_layer(inp)) - return model - - @variable_scope_shim.track_tf1_style_variables - def call(self, inputs): - model = variable_scope_shim.get_or_create_layer( - "dense_model", self.build_model) - return model(inputs) - - layer = NestedLayer(10) - - @tf.function - def foo(x): - return layer(x), tf.add_n(layer.losses) - - # Verify inner model is reused - out1, loss1 = foo(tf.ones(shape=(5, 5))) - out2, loss2 = foo(tf.ones(shape=(5, 5))) - self.assertAllEqual(out1, out2) - self.assertAllEqual(loss1, loss2) - - @tf_test_utils.run_deprecated_v1 - def test_get_or_create_layer_graph(self): - - class NestedLayer(object): + self.assertAllEqual( + {var.name: var.numpy() for var in layer.weights}, + {"foo/scale_by_y/y:0": 1.5}, + ) + self.assertAllEqual( + tf.add_n(layer.losses), regularizers.L2()(layer.weights[0]) + ) + + def test_compat_v1_make_template_in_shim_tf_function(self): + # Test the shim when using `compat.v1.make_template` + # Verify it works correctly in a tf.function + # when made outside the function + layer = CompatV1TemplateScaleByY() + + @tf.function + def foo(x): + return layer(x), tf.add_n(layer.losses) + + for _ in range(3): + # Use multiple calls to verify that no new weights get created + out, loss = foo(tf.ones(shape=(2, 3))) + self.assertAllEqual(out, tf.constant(1.5, shape=(2, 3))) + self.assertAllEqual(loss, regularizers.L2()(layer.weights[0])) + self.assertAllEqual( + {var.name: var.numpy() for var in layer.weights}, + {"foo/scale_by_y/y:0": 1.5}, + ) + + def test_compat_v1_make_template_in_trace_in_shim(self): + # Test the shim when using `compat.v1.make_template` + # Verify it works correctly when the make_template/layer/shim + # is created on the first tf.function trace! + layers = {} + + @tf.function + def bar(x): + if "layer" not in layers: + layers["layer"] = CompatV1TemplateScaleByY() + layer = layers["layer"] + return layer(x), tf.add_n(layer.losses) + + for _ in range(3): + # Use multiple calls to verify that no new weights get created + out, loss = bar(tf.ones(shape=(2, 3))) + self.assertAllEqual(out, tf.constant(1.5, shape=(2, 3))) + self.assertAllEqual( + loss, regularizers.L2()(layers["layer"].weights[0]) + ) + self.assertAllEqual( + {var.name: var.numpy() for var in layers["layer"].weights}, + {"foo/scale_by_y/y:0": 1.5}, + ) + + def test_only_track_get_variable(self): + # Test the shim does not try tracking or reusing variables + # that were not created by get_variable. These variables/modules/layers + # need to be tracked separately + + class WrappedDenseLayer(base_layer.Layer): + def __init__(self, units, **kwargs): + super().__init__(**kwargs) + self.units = units + self._dense_model = None + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + dense_layer = core.Dense( + self.units, + name="dense", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + return dense_layer(inputs) + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 5) + + self.assertEmpty(layer.weights) + + def test_embedded_keras_model(self): + # Test the shim when embedding a Keras model inside of it + # And assigning the model to an attribute + + class WrappedDenseLayer(base_layer.Layer): + def __init__(self, units, **kwargs): + super().__init__(**kwargs) + self.units = units + self._dense_model = None + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + if not self._dense_model: + inp = input_layer_module.Input(shape=inputs.shape) + dense_layer = core.Dense( + self.units, + name="dense", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + self._dense_model = training_module.Model( + inputs=inp, outputs=dense_layer(inp) + ) + return self._dense_model(inputs) + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, losses, + variables were made + self.assertEqual(weights.keys(), {"dense/bias:0", "dense/kernel:0"}) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 5) + self.assertAllEqual(tf.add_n(layer.losses), 0.5) + + # Verify reuse by updating the variables then re-running + weights["dense/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 10) + self.assertAllEqual(tf.add_n(layer.losses), 2) + + def test_embedded_keras_model_in_module(self): + # Test the module shim when embedding a Keras model inside of it + # And assigning the model to an attribute + + class WrappedDenseLayer(VariableScopeModule): + def __init__(self, units, **kwargs): + super().__init__(**kwargs) + self.units = units + self._dense_model = None + + def forward_pass(self, inputs): + if not self._dense_model: + inp = input_layer_module.Input(shape=inputs.shape) + dense_layer = core.Dense( + self.units, + name="dense", + kernel_initializer=tf.compat.v1.ones_initializer(), + kernel_regularizer="l2", + ) + self._dense_model = training_module.Model( + inputs=inp, outputs=dense_layer(inp) + ) + return self._dense_model(inputs) + + layer = WrappedDenseLayer(10) + out = layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} + + # Verify the correct output, losses, + variables were made + self.assertEqual(weights.keys(), {"dense/bias:0", "dense/kernel:0"}) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 5) + + # The module shim will only track regularization losses made by + # compat.v1.layers and compat.v1.get_variable. Other regularization + # losses must be tracked by separate user-created mechanisms. + self.assertEmpty(layer.get_compat_v1_regularization_losses()) + + # Verify reuse by updating the variables then re-running + weights["dense/kernel:0"].assign(tf.ones(shape=(5, 10)) * 2) + out = layer(tf.ones(shape=(5, 5))) + self.assertAllEqual(out, tf.ones(shape=(5, 10)) * 10) + + # The module shim will only track regularization losses made by + # compat.v1.layers and compat.v1.get_variable. Other regularization + # losses must be tracked by separate user-created mechanisms. + self.assertEmpty(layer.get_compat_v1_regularization_losses()) + + def test_training_arg(self): + # Test the shim when passing in a Keras `training` arg + + class TrainingCheckLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs, training=None): + if training: + out = core_layers.dense( + inputs, self.units, name="dense_training" + ) + else: + out = core_layers.dense( + inputs, self.units, name="dense_no_training" + ) + return out + + layer = TrainingCheckLayer(10) + layer(tf.ones(shape=(5, 5)), training=True) + weights = {x.name: x for x in layer.variables} + + # Verify the correct variables were made + self.assertEqual( + weights.keys(), {"dense_training/bias:0", "dense_training/kernel:0"} + ) - def __init__(self, units, *args, **kwargs): - super().__init__(*args, **kwargs) - self.units = units + layer = TrainingCheckLayer(10) + layer(tf.ones(shape=(5, 5))) + weights = {x.name: x for x in layer.variables} - def build_model(self): - inp = input_layer_module.Input(shape=(5, 5)) - dense_layer = core.Dense( - 10, name="dense", kernel_regularizer="l2", - kernel_initializer=tf.compat.v1.ones_initializer()) - model = training_module.Model(inputs=inp, outputs=dense_layer(inp)) - return model - - def __call__(self, inputs): - model = variable_scope_shim.get_or_create_layer( - "dense_model", self.build_model) - return model(inputs) + # Verify the correct variables were made + self.assertEqual( + weights.keys(), + {"dense_no_training/bias:0", "dense_no_training/kernel:0"}, + ) - with self.cached_session(): - layer = NestedLayer(10) - x = tf.ones(shape=(5, 5)) + def test_incorrect_decoration(self): + # Raise an error if you incorrectly decorate a method + # that is not a method of a Module, layer, or model: + @variable_scope_shim.track_tf1_style_variables + def foo(x): + return x * 2 - out1 = layer(tf.expand_dims(x, 0)) - self.evaluate(tf.compat.v1.global_variables_initializer()) + with self.assertRaisesRegex(ValueError, "does not extend"): + foo(tf.ones(shape=(4, 4))) - # verify output - self.assertEqual(out1.shape, tf.TensorShape([1, 5, 10])) - self.assertAllEqual(out1, tf.ones(shape=(1, 5, 10)) * 5) - # verify variables are tracked - weights = {var.name for var in tf.compat.v1.trainable_variables()} - self.assertEqual(weights, {"dense/bias:0", "dense/kernel:0"}) +class GetOrCreateLayerTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_get_or_create_layer_with_regularizer_eager(self): + class NestedLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def build_model(self): + inp = input_layer_module.Input(shape=(5, 5)) + dense_layer = core.Dense( + 10, + name="dense", + kernel_regularizer="l2", + kernel_initializer=tf.compat.v1.ones_initializer(), + ) + model = training_module.Model( + inputs=inp, outputs=dense_layer(inp) + ) + return model + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + # enter a variable scope to check module key naming + with tf.compat.v1.variable_scope("test_scope"): + model = variable_scope_shim.get_or_create_layer( + "dense_model", self.build_model + ) + return model(inputs) + + layer = NestedLayer(10) + x = tf.ones(shape=(5, 5)) + + out1 = layer(tf.expand_dims(x, 0)) + + model1 = layer.submodules[0]._layers["test_scope/dense_model"] + + out2 = layer(tf.expand_dims(x, 0)) + # Verify model produces same output on successive calls with same input + self.assertAllEqual(out1, out2) + + # Verify the model used on subsequent calls is the same + model2 = layer.submodules[0]._layers["test_scope/dense_model"] + self.assertIs(model1, model2) + + # Verify that stored layer computes outputs and losses correctly + weights = {x.name: x for x in layer.variables} + self.assertEqual(weights.keys(), {"dense/bias:0", "dense/kernel:0"}) + self.assertAllEqual(out2, tf.ones(shape=(1, 5, 10)) * 5) + self.assertAllEqual(layer.losses, [0.5]) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_get_or_create_layer_no_regularizer_eager(self): + class NestedLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def build_model(self): + inp = input_layer_module.Input(shape=(5, 5)) + dense_layer = core.Dense( + 10, + name="dense", + kernel_initializer=tf.compat.v1.ones_initializer(), + ) + model = training_module.Model( + inputs=inp, outputs=dense_layer(inp) + ) + return model + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + # enter a variable scope to check module key naming + with tf.compat.v1.variable_scope("test_scope"): + model = variable_scope_shim.get_or_create_layer( + "dense_model", self.build_model + ) + return model(inputs) + + layer = NestedLayer(10) + x = tf.ones(shape=(5, 5)) + + out1 = layer(tf.expand_dims(x, 0)) + + model1 = layer.submodules[0]._layers["test_scope/dense_model"] + + out2 = layer(tf.expand_dims(x, 0)) + # Verify model produces same output on successive calls with same input + self.assertAllEqual(out1, out2) + + # Verify the model used on subsequent calls is the same + model2 = layer.submodules[0]._layers["test_scope/dense_model"] + self.assertIs(model1, model2) + + # Verify that stored layer computes outputs and losses correctly + weights = {x.name: x for x in layer.variables} + self.assertEqual(weights.keys(), {"dense/bias:0", "dense/kernel:0"}) + self.assertAllEqual(out2, tf.ones(shape=(1, 5, 10)) * 5) + self.assertAllEqual(layer.losses, [0.0]) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_get_or_create_layer_tf_function(self): + class NestedLayer(base_layer.Layer): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def build_model(self): + inp = input_layer_module.Input(shape=(5, 5)) + dense_layer = core.Dense( + 10, + name="dense", + kernel_regularizer="l2", + ) + model = training_module.Model( + inputs=inp, outputs=dense_layer(inp) + ) + return model + + @variable_scope_shim.track_tf1_style_variables + def call(self, inputs): + model = variable_scope_shim.get_or_create_layer( + "dense_model", self.build_model + ) + return model(inputs) + + layer = NestedLayer(10) + + @tf.function + def foo(x): + return layer(x), tf.add_n(layer.losses) + + # Verify inner model is reused + out1, loss1 = foo(tf.ones(shape=(5, 5))) + out2, loss2 = foo(tf.ones(shape=(5, 5))) + self.assertAllEqual(out1, out2) + self.assertAllEqual(loss1, loss2) + + @tf_test_utils.run_deprecated_v1 + def test_get_or_create_layer_graph(self): + class NestedLayer(object): + def __init__(self, units, *args, **kwargs): + super().__init__(*args, **kwargs) + self.units = units + + def build_model(self): + inp = input_layer_module.Input(shape=(5, 5)) + dense_layer = core.Dense( + 10, + name="dense", + kernel_regularizer="l2", + kernel_initializer=tf.compat.v1.ones_initializer(), + ) + model = training_module.Model( + inputs=inp, outputs=dense_layer(inp) + ) + return model + + def __call__(self, inputs): + model = variable_scope_shim.get_or_create_layer( + "dense_model", self.build_model + ) + return model(inputs) + + with self.cached_session(): + layer = NestedLayer(10) + x = tf.ones(shape=(5, 5)) + + out1 = layer(tf.expand_dims(x, 0)) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # verify output + self.assertEqual(out1.shape, tf.TensorShape([1, 5, 10])) + self.assertAllEqual(out1, tf.ones(shape=(1, 5, 10)) * 5) + + # verify variables are tracked + weights = {var.name for var in tf.compat.v1.trainable_variables()} + self.assertEqual(weights, {"dense/bias:0", "dense/kernel:0"}) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/losses.py b/keras/losses.py index fbffc3984493..dc325e67963c 100644 --- a/keras/losses.py +++ b/keras/losses.py @@ -12,20 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes + """Built-in loss functions.""" import abc import functools +import warnings + +import tensorflow.compat.v2 as tf + from keras import backend -from keras.saving.experimental import saving_lib -from keras.utils import generic_utils +from keras.saving import saving_lib +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object from keras.utils import losses_utils from keras.utils import tf_utils -from keras.utils.generic_utils import deserialize_keras_object -from keras.utils.generic_utils import serialize_keras_object -import tensorflow.compat.v2 as tf + +# isort: off from tensorflow.python.ops.ragged import ragged_map_ops from tensorflow.python.ops.ragged import ragged_util from tensorflow.python.util import dispatch @@ -33,2269 +38,2806 @@ from tensorflow.tools.docs import doc_controls -@keras_export('keras.losses.Loss') +@keras_export("keras.losses.Loss") class Loss: - """Loss base class. + """Loss base class. - To be implemented by subclasses: - * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`. + To be implemented by subclasses: + * `call()`: Contains the logic for loss calculation using `y_true`, + `y_pred`. - Example subclass implementation: + Example subclass implementation: - ```python - class MeanSquaredError(Loss): + ```python + class MeanSquaredError(Loss): - def call(self, y_true, y_pred): - return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1) - ``` - - When used with `tf.distribute.Strategy`, outside of built-in training loops - such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction - types, and reduce losses explicitly in your training loop. Using 'AUTO' or - 'SUM_OVER_BATCH_SIZE' will raise an error. + def call(self, y_true, y_pred): + return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1) + ``` - Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for more - details on this. + When using a Loss under a `tf.distribute.Strategy`, except passing it + to `Model.compile()` for use by `Model.fit()`, please use reduction + types 'SUM' or 'NONE', and reduce losses explicitly. Using 'AUTO' or + 'SUM_OVER_BATCH_SIZE' will raise an error when calling the Loss object + from a custom training loop or from user-defined code in `Layer.call()`. + Please see this custom training + [tutorial](https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details on this. + """ - You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like: + def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None): + """Initializes `Loss` class. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + """ + losses_utils.ReductionV2.validate(reduction) + self.reduction = reduction + self.name = name + # SUM_OVER_BATCH is only allowed in losses managed by `fit` or + # CannedEstimators. + self._allow_sum_over_batch_size = False + self._set_name_scope() + + def _set_name_scope(self): + """Creates a valid `name_scope` name.""" + if self.name is None: + self._name_scope = self.__class__.__name__.strip("_") + elif self.name == "": + self._name_scope = "lambda" + else: + # E.g. '_my_loss' => 'my_loss' + self._name_scope = self.name.strip("_") + + def __call__(self, y_true, y_pred, sample_weight=None): + """Invokes the `Loss` instance. + + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, + except sparse loss functions such as sparse categorical + crossentropy where shape = `[batch_size, d0, .. dN-1]` + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` + sample_weight: Optional `sample_weight` acts as a coefficient for + the loss. If a scalar is provided, then the loss is simply + scaled by the given value. If `sample_weight` is a tensor of + size `[batch_size]`, then the total loss for each sample of the + batch is rescaled by the corresponding element in the + `sample_weight` vector. If the shape of `sample_weight` is + `[batch_size, d0, .. dN-1]` (or can be broadcasted to this + shape), then each loss element of `y_pred` is scaled by the + corresponding value of `sample_weight`. (Note on`dN-1`: all loss + functions reduce by 1 dimension, usually axis=-1.) + + Returns: + Weighted loss float `Tensor`. If `reduction` is `NONE`, this has + shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. + (Note `dN-1` because all loss functions reduce by 1 dimension, + usually axis=-1.) + + Raises: + ValueError: If the shape of `sample_weight` is invalid. + """ + # If we are wrapping a lambda function strip '<>' from the name as it is + # not accepted in scope name. + graph_ctx = tf_utils.graph_context_for_symbolic_tensors( + y_true, y_pred, sample_weight + ) + with backend.name_scope(self._name_scope), graph_ctx: + if tf.executing_eagerly(): + call_fn = self.call + else: + call_fn = tf.__internal__.autograph.tf_convert( + self.call, tf.__internal__.autograph.control_status_ctx() + ) + + losses = call_fn(y_true, y_pred) + + in_mask = losses_utils.get_mask(y_pred) + out_mask = losses_utils.get_mask(losses) + + if in_mask is not None and out_mask is not None: + mask = in_mask & out_mask + elif in_mask is not None: + mask = in_mask + elif out_mask is not None: + mask = out_mask + else: + mask = None + + reduction = self._get_reduction() + sample_weight = losses_utils.apply_valid_mask( + losses, sample_weight, mask, reduction + ) + return losses_utils.compute_weighted_loss( + losses, sample_weight, reduction=reduction + ) + + @classmethod + def from_config(cls, config): + """Instantiates a `Loss` from its config (output of `get_config()`). + + Args: + config: Output of `get_config()`. + + Returns: + A `Loss` instance. + """ + return cls(**config) + + def get_config(self): + """Returns the config dictionary for a `Loss` instance.""" + return {"reduction": self.reduction, "name": self.name} + + @abc.abstractmethod + @doc_controls.for_subclass_implementers + def call(self, y_true, y_pred): + """Invokes the `Loss` instance. + + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, + except sparse loss functions such as sparse categorical + crossentropy where shape = `[batch_size, d0, .. dN-1]` + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` + + Returns: + Loss values with the shape `[batch_size, d0, .. dN-1]`. + """ + raise NotImplementedError("Must be implemented in subclasses.") + + def _get_reduction(self): + """Handles `AUTO` reduction cases and returns the reduction value.""" + if ( + not self._allow_sum_over_batch_size + and tf.distribute.has_strategy() + and ( + self.reduction == losses_utils.ReductionV2.AUTO + or self.reduction + == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + ) + ): + raise ValueError( + "Please use `tf.keras.losses.Reduction.SUM` or " + "`tf.keras.losses.Reduction.NONE` for loss reduction when " + "losses are used with `tf.distribute.Strategy`, " + "except for specifying losses in `Model.compile()` " + "for use by the built-in training looop `Model.fit()`.\n" + "Please see https://www.tensorflow.org/tutorials" + "/distribute/custom_training for more details." + ) + + if self.reduction == losses_utils.ReductionV2.AUTO: + return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE + return self.reduction + + +@keras_export("keras.__internal__.losses.LossFunctionWrapper", v1=[]) +class LossFunctionWrapper(Loss): + """Wraps a loss function in the `Loss` class.""" + + def __init__( + self, fn, reduction=losses_utils.ReductionV2.AUTO, name=None, **kwargs + ): + """Initializes `LossFunctionWrapper` class. + + Args: + fn: The loss function to wrap, with signature `fn(y_true, y_pred, + **kwargs)`. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + **kwargs: The keyword arguments that are passed on to `fn`. + """ + super().__init__(reduction=reduction, name=name) + self.fn = fn + self._fn_kwargs = kwargs - ```python - with strategy.scope(): - loss_obj = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE) - .... - loss = (tf.reduce_sum(loss_obj(labels, predictions)) * - (1. / global_batch_size)) - ``` - """ + def call(self, y_true, y_pred): + """Invokes the `LossFunctionWrapper` instance. + + Args: + y_true: Ground truth values. + y_pred: The predicted values. + + Returns: + Loss values per sample. + """ + if tf.is_tensor(y_pred) and tf.is_tensor(y_true): + y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true + ) + + ag_fn = tf.__internal__.autograph.tf_convert( + self.fn, tf.__internal__.autograph.control_status_ctx() + ) + return ag_fn(y_true, y_pred, **self._fn_kwargs) + + def get_config(self): + config = {} + for k, v in self._fn_kwargs.items(): + config[k] = ( + backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v + ) + + if saving_lib.saving_v3_enabled(): + from keras.utils import get_registered_name + + config["fn"] = get_registered_name(self.fn) + + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + """Instantiates a `Loss` from its config (output of `get_config()`). + + Args: + config: Output of `get_config()`. + + Returns: + A `keras.losses.Loss` instance. + """ + if saving_lib.saving_v3_enabled(): + fn_name = config.pop("fn", None) + if fn_name and cls is LossFunctionWrapper: + config["fn"] = get(fn_name) + return cls(**config) + + +@keras_export("keras.losses.MeanSquaredError") +class MeanSquaredError(LossFunctionWrapper): + """Computes the mean of squares of errors between labels and predictions. - def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None): - """Initializes `Loss` class. + `loss = mean(square(y_true - y_pred))` - Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. - """ - losses_utils.ReductionV2.validate(reduction) - self.reduction = reduction - self.name = name - # SUM_OVER_BATCH is only allowed in losses managed by `fit` or - # CannedEstimators. - self._allow_sum_over_batch_size = False - self._set_name_scope() - - def _set_name_scope(self): - """Creates a valid `name_scope` name.""" - if self.name is None: - self._name_scope = self.__class__.__name__ - elif self.name == '': - self._name_scope = 'lambda' - else: - # E.g. '_my_loss' => 'my_loss' - self._name_scope = self.name.strip('_') + Standalone usage: - def __call__(self, y_true, y_pred, sample_weight=None): - """Invokes the `Loss` instance. + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[1., 1.], [1., 0.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> mse = tf.keras.losses.MeanSquaredError() + >>> mse(y_true, y_pred).numpy() + 0.5 - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except - sparse loss functions such as sparse categorical crossentropy where - shape = `[batch_size, d0, .. dN-1]` - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` - sample_weight: Optional `sample_weight` acts as a coefficient for the - loss. If a scalar is provided, then the loss is simply scaled by the - given value. If `sample_weight` is a tensor of size `[batch_size]`, then - the total loss for each sample of the batch is rescaled by the - corresponding element in the `sample_weight` vector. If the shape of - `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to - this shape), then each loss element of `y_pred` is scaled - by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss - functions reduce by 1 dimension, usually axis=-1.) + >>> # Calling with 'sample_weight'. + >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() + 0.25 - Returns: - Weighted loss float `Tensor`. If `reduction` is `NONE`, this has - shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note `dN-1` - because all loss functions reduce by 1 dimension, usually axis=-1.) + >>> # Using 'sum' reduction type. + >>> mse = tf.keras.losses.MeanSquaredError( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> mse(y_true, y_pred).numpy() + 1.0 - Raises: - ValueError: If the shape of `sample_weight` is invalid. - """ - # If we are wrapping a lambda function strip '<>' from the name as it is not - # accepted in scope name. - graph_ctx = tf_utils.graph_context_for_symbolic_tensors( - y_true, y_pred, sample_weight) - with backend.name_scope(self._name_scope), graph_ctx: - if tf.executing_eagerly(): - call_fn = self.call - else: - call_fn = tf.__internal__.autograph.tf_convert(self.call, tf.__internal__.autograph.control_status_ctx()) - losses = call_fn(y_true, y_pred) - return losses_utils.compute_weighted_loss( - losses, sample_weight, reduction=self._get_reduction()) - - @classmethod - def from_config(cls, config): - """Instantiates a `Loss` from its config (output of `get_config()`). + >>> # Using 'none' reduction type. + >>> mse = tf.keras.losses.MeanSquaredError( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> mse(y_true, y_pred).numpy() + array([0.5, 0.5], dtype=float32) - Args: - config: Output of `get_config()`. + Usage with the `compile()` API: - Returns: - A `Loss` instance. + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError()) + ``` """ - return cls(**config) - def get_config(self): - """Returns the config dictionary for a `Loss` instance.""" - return {'reduction': self.reduction, 'name': self.name} + def __init__( + self, reduction=losses_utils.ReductionV2.AUTO, name="mean_squared_error" + ): + """Initializes `MeanSquaredError` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_squared_error'. + """ + super().__init__(mean_squared_error, name=name, reduction=reduction) + + +@keras_export("keras.losses.MeanAbsoluteError") +class MeanAbsoluteError(LossFunctionWrapper): + """Computes the mean of absolute difference between labels and predictions. - @abc.abstractmethod - @doc_controls.for_subclass_implementers - def call(self, y_true, y_pred): - """Invokes the `Loss` instance. + `loss = mean(abs(y_true - y_pred))` - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except - sparse loss functions such as sparse categorical crossentropy where - shape = `[batch_size, d0, .. dN-1]` - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` + Standalone usage: - Returns: - Loss values with the shape `[batch_size, d0, .. dN-1]`. - """ - raise NotImplementedError('Must be implemented in subclasses.') - - def _get_reduction(self): - """Handles `AUTO` reduction cases and returns the reduction value.""" - if (not self._allow_sum_over_batch_size and - tf.distribute.has_strategy() and - (self.reduction == losses_utils.ReductionV2.AUTO or - self.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)): - raise ValueError( - 'Please use `tf.keras.losses.Reduction.SUM` or ' - '`tf.keras.losses.Reduction.NONE` for loss reduction when losses are ' - 'used with `tf.distribute.Strategy` outside of the built-in training ' - 'loops. You can implement ' - '`tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch ' - 'size like:\n```\nwith strategy.scope():\n' - ' loss_obj = tf.keras.losses.CategoricalCrossentropy(' - 'reduction=tf.keras.losses.Reduction.NONE)\n....\n' - ' loss = tf.reduce_sum(loss_obj(labels, predictions)) * ' - '(1. / global_batch_size)\n```\nPlease see ' - 'https://www.tensorflow.org/tutorials/distribute/custom_training' - ' for more details.') - - if self.reduction == losses_utils.ReductionV2.AUTO: - return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE - return self.reduction + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[1., 1.], [1., 0.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> mae = tf.keras.losses.MeanAbsoluteError() + >>> mae(y_true, y_pred).numpy() + 0.5 + >>> # Calling with 'sample_weight'. + >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() + 0.25 -class LossFunctionWrapper(Loss): - """Wraps a loss function in the `Loss` class.""" + >>> # Using 'sum' reduction type. + >>> mae = tf.keras.losses.MeanAbsoluteError( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> mae(y_true, y_pred).numpy() + 1.0 - def __init__(self, - fn, - reduction=losses_utils.ReductionV2.AUTO, - name=None, - **kwargs): - """Initializes `LossFunctionWrapper` class. + >>> # Using 'none' reduction type. + >>> mae = tf.keras.losses.MeanAbsoluteError( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> mae(y_true, y_pred).numpy() + array([0.5, 0.5], dtype=float32) - Args: - fn: The loss function to wrap, with signature `fn(y_true, y_pred, - **kwargs)`. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. - **kwargs: The keyword arguments that are passed on to `fn`. + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError()) + ``` """ - super().__init__(reduction=reduction, name=name) - self.fn = fn - self._fn_kwargs = kwargs - def call(self, y_true, y_pred): - """Invokes the `LossFunctionWrapper` instance. + def __init__( + self, + reduction=losses_utils.ReductionV2.AUTO, + name="mean_absolute_error", + ): + """Initializes `MeanAbsoluteError` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_absolute_error'. + """ + super().__init__(mean_absolute_error, name=name, reduction=reduction) + + +@keras_export("keras.losses.MeanAbsolutePercentageError") +class MeanAbsolutePercentageError(LossFunctionWrapper): + """Computes the mean absolute percentage error between `y_true` & `y_pred`. - Args: - y_true: Ground truth values. - y_pred: The predicted values. + Formula: - Returns: - Loss values per sample. - """ - if tf.is_tensor(y_pred) and tf.is_tensor(y_true): - y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true) + `loss = 100 * abs((y_true - y_pred) / y_true)` - ag_fn = tf.__internal__.autograph.tf_convert(self.fn, tf.__internal__.autograph.control_status_ctx()) - return ag_fn(y_true, y_pred, **self._fn_kwargs) + Note that to avoid dividing by zero, a small epsilon value + is added to the denominator. - def get_config(self): - config = {} - for k, v in self._fn_kwargs.items(): - config[k] = backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v + Standalone usage: - if saving_lib._ENABLED: # pylint: disable=protected-access - config['fn'] = generic_utils.get_registered_name(self.fn) + >>> y_true = [[2., 1.], [2., 3.]] + >>> y_pred = [[1., 1.], [1., 0.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> mape = tf.keras.losses.MeanAbsolutePercentageError() + >>> mape(y_true, y_pred).numpy() + 50. - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + >>> # Calling with 'sample_weight'. + >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() + 20. - @classmethod - def from_config(cls, config): - """Instantiates a `Loss` from its config (output of `get_config()`). + >>> # Using 'sum' reduction type. + >>> mape = tf.keras.losses.MeanAbsolutePercentageError( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> mape(y_true, y_pred).numpy() + 100. - Args: - config: Output of `get_config()`. + >>> # Using 'none' reduction type. + >>> mape = tf.keras.losses.MeanAbsolutePercentageError( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> mape(y_true, y_pred).numpy() + array([25., 75.], dtype=float32) - Returns: - A `keras.losses.Loss` instance. + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss=tf.keras.losses.MeanAbsolutePercentageError()) + ``` """ - if saving_lib._ENABLED: # pylint: disable=protected-access - fn_name = config.pop('fn', None) - if fn_name and cls is LossFunctionWrapper: - config['fn'] = get(fn_name) - return cls(**config) + def __init__( + self, + reduction=losses_utils.ReductionV2.AUTO, + name="mean_absolute_percentage_error", + ): + """Initializes `MeanAbsolutePercentageError` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_absolute_percentage_error'. + """ + super().__init__( + mean_absolute_percentage_error, name=name, reduction=reduction + ) + + +@keras_export("keras.losses.MeanSquaredLogarithmicError") +class MeanSquaredLogarithmicError(LossFunctionWrapper): + """Computes the mean squared logarithmic error between `y_true` & `y_pred`. -@keras_export('keras.losses.MeanSquaredError') -class MeanSquaredError(LossFunctionWrapper): - """Computes the mean of squares of errors between labels and predictions. + `loss = square(log(y_true + 1.) - log(y_pred + 1.))` - `loss = square(y_true - y_pred)` + Standalone usage: - Standalone usage: + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[1., 1.], [1., 0.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> msle = tf.keras.losses.MeanSquaredLogarithmicError() + >>> msle(y_true, y_pred).numpy() + 0.240 - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[1., 1.], [1., 0.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> mse = tf.keras.losses.MeanSquaredError() - >>> mse(y_true, y_pred).numpy() - 0.5 + >>> # Calling with 'sample_weight'. + >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() + 0.120 - >>> # Calling with 'sample_weight'. - >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() - 0.25 + >>> # Using 'sum' reduction type. + >>> msle = tf.keras.losses.MeanSquaredLogarithmicError( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> msle(y_true, y_pred).numpy() + 0.480 - >>> # Using 'sum' reduction type. - >>> mse = tf.keras.losses.MeanSquaredError( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> mse(y_true, y_pred).numpy() - 1.0 + >>> # Using 'none' reduction type. + >>> msle = tf.keras.losses.MeanSquaredLogarithmicError( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> msle(y_true, y_pred).numpy() + array([0.240, 0.240], dtype=float32) - >>> # Using 'none' reduction type. - >>> mse = tf.keras.losses.MeanSquaredError( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> mse(y_true, y_pred).numpy() - array([0.5, 0.5], dtype=float32) + Usage with the `compile()` API: - Usage with the `compile()` API: + ```python + model.compile(optimizer='sgd', + loss=tf.keras.losses.MeanSquaredLogarithmicError()) + ``` + """ - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError()) - ``` - """ + def __init__( + self, + reduction=losses_utils.ReductionV2.AUTO, + name="mean_squared_logarithmic_error", + ): + """Initializes `MeanSquaredLogarithmicError` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_squared_logarithmic_error'. + """ + super().__init__( + mean_squared_logarithmic_error, name=name, reduction=reduction + ) + + +@keras_export("keras.losses.BinaryCrossentropy") +class BinaryCrossentropy(LossFunctionWrapper): + """Computes the cross-entropy loss between true labels and predicted labels. - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='mean_squared_error'): - """Initializes `MeanSquaredError` instance. + Use this cross-entropy loss for binary (0 or 1) classification applications. + The loss function requires the following inputs: - Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'mean_squared_error'. + - `y_true` (true label): This is either 0 or 1. + - `y_pred` (predicted value): This is the model's prediction, i.e, a single + floating-point value which either represents a + [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + when `from_logits=True`) or a probability (i.e, value in [0., 1.] when + `from_logits=False`). + + **Recommended Usage:** (set `from_logits=True`) + + With `tf.keras` API: + + ```python + model.compile( + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + .... + ) + ``` + + As a standalone function: + + >>> # Example 1: (batch_size = 1, number of samples = 4) + >>> y_true = [0, 1, 0, 0] + >>> y_pred = [-18.6, 0.51, 2.94, -12.8] + >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) + >>> bce(y_true, y_pred).numpy() + 0.865 + + >>> # Example 2: (batch_size = 2, number of samples = 4) + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] + >>> # Using default 'auto'/'sum_over_batch_size' reduction type. + >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) + >>> bce(y_true, y_pred).numpy() + 0.865 + >>> # Using 'sample_weight' attribute + >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + 0.243 + >>> # Using 'sum' reduction` type. + >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, + ... reduction=tf.keras.losses.Reduction.SUM) + >>> bce(y_true, y_pred).numpy() + 1.730 + >>> # Using 'none' reduction type. + >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, + ... reduction=tf.keras.losses.Reduction.NONE) + >>> bce(y_true, y_pred).numpy() + array([0.235, 1.496], dtype=float32) + + **Default Usage:** (set `from_logits=False`) + + >>> # Make the following updates to the above "Recommended Usage" section + >>> # 1. Set `from_logits=False` + >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False') + >>> # 2. Update `y_pred` to use probabilities instead of logits + >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]] """ - super().__init__(mean_squared_error, name=name, reduction=reduction) + def __init__( + self, + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction=losses_utils.ReductionV2.AUTO, + name="binary_crossentropy", + ): + """Initializes `BinaryCrossentropy` instance. + + Args: + from_logits: Whether to interpret `y_pred` as a tensor of + [logit](https://en.wikipedia.org/wiki/Logit) values. By default, + we assume that `y_pred` contains probabilities (i.e., values in + [0, 1]). + label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. + When > 0, we compute the loss between the predicted labels and a + smoothed version of the true labels, where the smoothing + squeezes the labels towards 0.5. Larger values of + `label_smoothing` correspond to heavier smoothing. + axis: The axis along which to compute crossentropy (the features + axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Name for the op. Defaults to 'binary_crossentropy'. + """ + super().__init__( + binary_crossentropy, + name=name, + reduction=reduction, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + self.from_logits = from_logits + + +@keras_export("keras.losses.BinaryFocalCrossentropy") +class BinaryFocalCrossentropy(LossFunctionWrapper): + """Computes focal cross-entropy loss between true labels and predictions. -@keras_export('keras.losses.MeanAbsoluteError') -class MeanAbsoluteError(LossFunctionWrapper): - """Computes the mean of absolute difference between labels and predictions. + Binary cross-entropy loss is often used for binary (0 or 1) classification + tasks. The loss function requires the following inputs: - `loss = abs(y_true - y_pred)` + - `y_true` (true label): This is either 0 or 1. + - `y_pred` (predicted value): This is the model's prediction, i.e, a single + floating-point value which either represents a + [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + when `from_logits=True`) or a probability (i.e, value in [0., 1.] when + `from_logits=False`). - Standalone usage: + According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + helps to apply a "focal factor" to down-weight easy examples and focus more + on hard examples. By default, the focal tensor is computed as follows: - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[1., 1.], [1., 0.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> mae = tf.keras.losses.MeanAbsoluteError() - >>> mae(y_true, y_pred).numpy() - 0.5 + `focal_factor = (1 - output) ** gamma` for class 1 + `focal_factor = output ** gamma` for class 0 + where `gamma` is a focusing parameter. When `gamma=0`, this function is + equivalent to the binary crossentropy loss. - >>> # Calling with 'sample_weight'. - >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() - 0.25 + With the `compile()` API: - >>> # Using 'sum' reduction type. - >>> mae = tf.keras.losses.MeanAbsoluteError( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> mae(y_true, y_pred).numpy() - 1.0 + ```python + model.compile( + loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True), + .... + ) + ``` + + As a standalone function: + + >>> # Example 1: (batch_size = 1, number of samples = 4) + >>> y_true = [0, 1, 0, 0] + >>> y_pred = [-18.6, 0.51, 2.94, -12.8] + >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=2, + ... from_logits=True) + >>> loss(y_true, y_pred).numpy() + 0.691 + + >>> # Apply class weight + >>> loss = tf.keras.losses.BinaryFocalCrossentropy( + ... apply_class_balancing=True, gamma=2, from_logits=True) + >>> loss(y_true, y_pred).numpy() + 0.51 + + >>> # Example 2: (batch_size = 2, number of samples = 4) + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] + >>> # Using default 'auto'/'sum_over_batch_size' reduction type. + >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3, + ... from_logits=True) + >>> loss(y_true, y_pred).numpy() + 0.647 + + >>> # Apply class weight + >>> loss = tf.keras.losses.BinaryFocalCrossentropy( + ... apply_class_balancing=True, gamma=3, from_logits=True) + >>> loss(y_true, y_pred).numpy() + 0.482 + + >>> # Using 'sample_weight' attribute with focal effect + >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3, + ... from_logits=True) + >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + 0.133 + + >>> # Apply class weight + >>> loss = tf.keras.losses.BinaryFocalCrossentropy( + ... apply_class_balancing=True, gamma=3, from_logits=True) + >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + 0.097 + + >>> # Using 'sum' reduction` type. + >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=4, + ... from_logits=True, + ... reduction=tf.keras.losses.Reduction.SUM) + >>> loss(y_true, y_pred).numpy() + 1.222 + + >>> # Apply class weight + >>> loss = tf.keras.losses.BinaryFocalCrossentropy( + ... apply_class_balancing=True, gamma=4, from_logits=True, + ... reduction=tf.keras.losses.Reduction.SUM) + >>> loss(y_true, y_pred).numpy() + 0.914 + + >>> # Using 'none' reduction type. + >>> loss = tf.keras.losses.BinaryFocalCrossentropy( + ... gamma=5, from_logits=True, + ... reduction=tf.keras.losses.Reduction.NONE) + >>> loss(y_true, y_pred).numpy() + array([0.0017 1.1561], dtype=float32) + + >>> # Apply class weight + >>> loss = tf.keras.losses.BinaryFocalCrossentropy( + ... apply_class_balancing=True, gamma=5, from_logits=True, + ... reduction=tf.keras.losses.Reduction.NONE) + >>> loss(y_true, y_pred).numpy() + array([0.0004 0.8670], dtype=float32) - >>> # Using 'none' reduction type. - >>> mae = tf.keras.losses.MeanAbsoluteError( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> mae(y_true, y_pred).numpy() - array([0.5, 0.5], dtype=float32) - Usage with the `compile()` API: + Args: + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in reference [Lin et al., 2018]( + https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is + `1.0 - alpha`. + gamma: A focusing parameter used to compute the focal factor, default is + `2.0` as mentioned in the reference + [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). + from_logits: Whether to interpret `y_pred` as a tensor of + [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we + assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). + label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. + When > `0`, we compute the loss between the predicted labels and a + smoothed version of the true labels, where the smoothing squeezes + the labels towards `0.5`. Larger values of `label_smoothing` + correspond to heavier smoothing. + axis: The axis along which to compute crossentropy (the features axis). + Defaults to `-1`. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all cases + this defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Name for the op. Defaults to 'binary_focal_crossentropy'. + """ - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError()) - ``` - """ + def __init__( + self, + apply_class_balancing=False, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction=losses_utils.ReductionV2.AUTO, + name="binary_focal_crossentropy", + ): + """Initializes `BinaryFocalCrossentropy` instance.""" + super().__init__( + binary_focal_crossentropy, + apply_class_balancing=apply_class_balancing, + alpha=alpha, + gamma=gamma, + name=name, + reduction=reduction, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + self.from_logits = from_logits + self.apply_class_balancing = apply_class_balancing + self.alpha = alpha + self.gamma = gamma + + def get_config(self): + config = { + "apply_class_balancing": self.apply_class_balancing, + "alpha": self.alpha, + "gamma": self.gamma, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.losses.CategoricalCrossentropy") +class CategoricalCrossentropy(LossFunctionWrapper): + """Computes the crossentropy loss between the labels and predictions. + + Use this crossentropy loss function when there are two or more label + classes. We expect labels to be provided in a `one_hot` representation. If + you want to provide labels as integers, please use + `SparseCategoricalCrossentropy` loss. There should be `# classes` floating + point values per feature. + + In the snippet below, there is `# classes` floating pointing values per + example. The shape of both `y_pred` and `y_true` are + `[batch_size, num_classes]`. + + Standalone usage: + + >>> y_true = [[0, 1, 0], [0, 0, 1]] + >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> cce = tf.keras.losses.CategoricalCrossentropy() + >>> cce(y_true, y_pred).numpy() + 1.177 + + >>> # Calling with 'sample_weight'. + >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() + 0.814 + + >>> # Using 'sum' reduction type. + >>> cce = tf.keras.losses.CategoricalCrossentropy( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> cce(y_true, y_pred).numpy() + 2.354 + + >>> # Using 'none' reduction type. + >>> cce = tf.keras.losses.CategoricalCrossentropy( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> cce(y_true, y_pred).numpy() + array([0.0513, 2.303], dtype=float32) + + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss=tf.keras.losses.CategoricalCrossentropy()) + ``` + """ - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='mean_absolute_error'): - """Initializes `MeanAbsoluteError` instance. + def __init__( + self, + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction=losses_utils.ReductionV2.AUTO, + name="categorical_crossentropy", + ): + """Initializes `CategoricalCrossentropy` instance. + + Args: + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability + distribution. + label_smoothing: Float in [0, 1]. When > 0, label values are + smoothed, meaning the confidence on label values are relaxed. + For example, if `0.1`, use `0.1 / num_classes` for non-target + labels and `0.9 + 0.1 / num_classes` for target labels. + axis: The axis along which to compute crossentropy (the features + axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to loss. + Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all + cases this defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'categorical_crossentropy'. + """ + super().__init__( + categorical_crossentropy, + name=name, + reduction=reduction, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + + +@keras_export("keras.losses.CategoricalFocalCrossentropy") +class CategoricalFocalCrossentropy(LossFunctionWrapper): + """Computes the alpha balanced focal crossentropy loss. + + Use this crossentropy loss function when there are two or more label + classes and if you want to handle class imbalance without using + `class_weights`. We expect labels to be provided in a `one_hot` + representation. + + According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + helps to apply a focal factor to down-weight easy examples and focus more on + hard examples. The general formula for the focal loss (FL) + is as follows: + + `FL(p_t) = (1 − p_t)^gamma * log(p_t)` + + where `p_t` is defined as follows: + `p_t = output if y_true == 1, else 1 - output` + + `(1 − p_t)^gamma` is the `modulating_factor`, where `gamma` is a focusing + parameter. When `gamma` = 0, there is no focal effect on the cross entropy. + `gamma` reduces the importance given to simple examples in a smooth manner. + + The authors use alpha-balanced variant of focal loss (FL) in the paper: + `FL(p_t) = −alpha * (1 − p_t)^gamma * log(p_t)` + + where `alpha` is the weight factor for the classes. If `alpha` = 1, the + loss won't be able to handle class imbalance properly as all + classes will have the same weight. This can be a constant or a list of + constants. If alpha is a list, it must have the same length as the number + of classes. + + The formula above can be generalized to: + `FL(p_t) = alpha * (1 − p_t)^gamma * CrossEntropy(y_true, y_pred)` + + where minus comes from `CrossEntropy(y_true, y_pred)` (CE). + + Extending this to multi-class case is straightforward: + `FL(p_t) = alpha * (1 − p_t)^gamma * CategoricalCE(y_true, y_pred)` + + In the snippet below, there is `# classes` floating pointing values per + example. The shape of both `y_pred` and `y_true` are + `[batch_size, num_classes]`. + + Standalone usage: + + >>> y_true = [[0., 1., 0.], [0., 0., 1.]] + >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> cce = tf.keras.losses.CategoricalFocalCrossentropy() + >>> cce(y_true, y_pred).numpy() + 0.23315276 + + >>> # Calling with 'sample_weight'. + >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() + 0.1632 + + >>> # Using 'sum' reduction type. + >>> cce = tf.keras.losses.CategoricalFocalCrossentropy( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> cce(y_true, y_pred).numpy() + 0.46631 + + >>> # Using 'none' reduction type. + >>> cce = tf.keras.losses.CategoricalFocalCrossentropy( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> cce(y_true, y_pred).numpy() + array([3.2058331e-05, 4.6627346e-01], dtype=float32) + + Usage with the `compile()` API: + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.CategoricalFocalCrossentropy()) + ``` Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'mean_absolute_error'. + alpha: A weight balancing factor for all classes, default is `0.25` as + mentioned in the reference. It can be a list of floats or a scalar. + In the multi-class case, alpha may be set by inverse class + frequency by using `compute_class_weight` from `sklearn.utils`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. It helps to gradually reduce the importance given to + simple (easy) examples in a smooth manner. + from_logits: Whether `output` is expected to be a logits tensor. By + default, we consider that `output` encodes a probability + distribution. + label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, + meaning the confidence on label values are relaxed. For example, if + `0.1`, use `0.1 / num_classes` for non-target labels and + `0.9 + 0.1 / num_classes` for target labels. + axis: The axis along which to compute crossentropy (the features + axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all cases + this defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'categorical_focal_crossentropy'. + """ - super().__init__(mean_absolute_error, name=name, reduction=reduction) + def __init__( + self, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction=losses_utils.ReductionV2.AUTO, + name="categorical_focal_crossentropy", + ): + """Initializes `CategoricalFocalCrossentropy` instance.""" + super().__init__( + categorical_focal_crossentropy, + alpha=alpha, + gamma=gamma, + name=name, + reduction=reduction, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + self.from_logits = from_logits + self.alpha = alpha + self.gamma = gamma + + def get_config(self): + config = { + "alpha": self.alpha, + "gamma": self.gamma, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.losses.SparseCategoricalCrossentropy") +class SparseCategoricalCrossentropy(LossFunctionWrapper): + """Computes the crossentropy loss between the labels and predictions. + + Use this crossentropy loss function when there are two or more label + classes. We expect labels to be provided as integers. If you want to + provide labels using `one-hot` representation, please use + `CategoricalCrossentropy` loss. There should be `# classes` floating point + values per feature for `y_pred` and a single floating point value per + feature for `y_true`. + + In the snippet below, there is a single floating point value per example for + `y_true` and `# classes` floating pointing values per example for `y_pred`. + The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is + `[batch_size, num_classes]`. + + Standalone usage: + + >>> y_true = [1, 2] + >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> scce = tf.keras.losses.SparseCategoricalCrossentropy() + >>> scce(y_true, y_pred).numpy() + 1.177 + + >>> # Calling with 'sample_weight'. + >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() + 0.814 + + >>> # Using 'sum' reduction type. + >>> scce = tf.keras.losses.SparseCategoricalCrossentropy( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> scce(y_true, y_pred).numpy() + 2.354 + + >>> # Using 'none' reduction type. + >>> scce = tf.keras.losses.SparseCategoricalCrossentropy( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> scce(y_true, y_pred).numpy() + array([0.0513, 2.303], dtype=float32) + + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss=tf.keras.losses.SparseCategoricalCrossentropy()) + ``` + """ -@keras_export('keras.losses.MeanAbsolutePercentageError') -class MeanAbsolutePercentageError(LossFunctionWrapper): - """Computes the mean absolute percentage error between `y_true` and `y_pred`. + def __init__( + self, + from_logits=False, + ignore_class=None, + reduction=losses_utils.ReductionV2.AUTO, + name="sparse_categorical_crossentropy", + ): + """Initializes `SparseCategoricalCrossentropy` instance. + + Args: + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability + distribution. + ignore_class: Optional integer. The ID of a class to be ignored + during loss computation. This is useful, for example, in + segmentation problems featuring a "void" class (commonly -1 or + 255) in segmentation maps. + By default (`ignore_class=None`), all classes are considered. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'sparse_categorical_crossentropy'. + """ + super().__init__( + sparse_categorical_crossentropy, + name=name, + reduction=reduction, + from_logits=from_logits, + ignore_class=ignore_class, + ) + + +@keras_export("keras.losses.CosineSimilarity") +class CosineSimilarity(LossFunctionWrapper): + """Computes the cosine similarity between labels and predictions. + + Note that it is a number between -1 and 1. When it is a negative number + between -1 and 0, 0 indicates orthogonality and values closer to -1 + indicate greater similarity. The values closer to 1 indicate greater + dissimilarity. This makes it usable as a loss function in a setting + where you try to maximize the proximity between predictions and targets. + If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0 + regardless of the proximity between predictions and targets. + + `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` + + Standalone usage: + + >>> y_true = [[0., 1.], [1., 1.]] + >>> y_pred = [[1., 0.], [1., 1.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1) + >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] + >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] + >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] + >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) + >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2 + >>> cosine_loss(y_true, y_pred).numpy() + -0.5 + + >>> # Calling with 'sample_weight'. + >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + -0.0999 + + >>> # Using 'sum' reduction type. + >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, + ... reduction=tf.keras.losses.Reduction.SUM) + >>> cosine_loss(y_true, y_pred).numpy() + -0.999 + + >>> # Using 'none' reduction type. + >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, + ... reduction=tf.keras.losses.Reduction.NONE) + >>> cosine_loss(y_true, y_pred).numpy() + array([-0., -0.999], dtype=float32) + + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss=tf.keras.losses.CosineSimilarity(axis=1)) + ``` - Formula: + Args: + axis: The axis along which the cosine similarity is computed + (the features axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to loss. + Default value is `AUTO`. `AUTO` indicates that the reduction option + will be determined by the usage context. For almost all cases this + defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an + error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'cosine_similarity'. + """ + + def __init__( + self, + axis=-1, + reduction=losses_utils.ReductionV2.AUTO, + name="cosine_similarity", + ): + super().__init__( + cosine_similarity, reduction=reduction, name=name, axis=axis + ) - `loss = 100 * abs((y_true - y_pred) / y_true)` - Note that to avoid dividing by zero, a small epsilon value - is added to the denominator. +@keras_export("keras.losses.Hinge") +class Hinge(LossFunctionWrapper): + """Computes the hinge loss between `y_true` & `y_pred`. - Standalone usage: + `loss = maximum(1 - y_true * y_pred, 0)` - >>> y_true = [[2., 1.], [2., 3.]] - >>> y_pred = [[1., 1.], [1., 0.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> mape = tf.keras.losses.MeanAbsolutePercentageError() - >>> mape(y_true, y_pred).numpy() - 50. + `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + provided we will convert them to -1 or 1. - >>> # Calling with 'sample_weight'. - >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() - 20. + Standalone usage: - >>> # Using 'sum' reduction type. - >>> mape = tf.keras.losses.MeanAbsolutePercentageError( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> mape(y_true, y_pred).numpy() - 100. + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> h = tf.keras.losses.Hinge() + >>> h(y_true, y_pred).numpy() + 1.3 - >>> # Using 'none' reduction type. - >>> mape = tf.keras.losses.MeanAbsolutePercentageError( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> mape(y_true, y_pred).numpy() - array([25., 75.], dtype=float32) + >>> # Calling with 'sample_weight'. + >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() + 0.55 - Usage with the `compile()` API: + >>> # Using 'sum' reduction type. + >>> h = tf.keras.losses.Hinge( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> h(y_true, y_pred).numpy() + 2.6 - ```python - model.compile(optimizer='sgd', - loss=tf.keras.losses.MeanAbsolutePercentageError()) - ``` - """ + >>> # Using 'none' reduction type. + >>> h = tf.keras.losses.Hinge( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> h(y_true, y_pred).numpy() + array([1.1, 1.5], dtype=float32) - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='mean_absolute_percentage_error'): - """Initializes `MeanAbsolutePercentageError` instance. + Usage with the `compile()` API: - Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to - 'mean_absolute_percentage_error'. + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge()) + ``` """ - super().__init__( - mean_absolute_percentage_error, name=name, reduction=reduction) + def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="hinge"): + """Initializes `Hinge` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'hinge'. + """ + super().__init__(hinge, name=name, reduction=reduction) + + +@keras_export("keras.losses.SquaredHinge") +class SquaredHinge(LossFunctionWrapper): + """Computes the squared hinge loss between `y_true` & `y_pred`. -@keras_export('keras.losses.MeanSquaredLogarithmicError') -class MeanSquaredLogarithmicError(LossFunctionWrapper): - """Computes the mean squared logarithmic error between `y_true` and `y_pred`. + `loss = square(maximum(1 - y_true * y_pred, 0))` - `loss = square(log(y_true + 1.) - log(y_pred + 1.))` + `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + provided we will convert them to -1 or 1. - Standalone usage: + Standalone usage: - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[1., 1.], [1., 0.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> msle = tf.keras.losses.MeanSquaredLogarithmicError() - >>> msle(y_true, y_pred).numpy() - 0.240 + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> h = tf.keras.losses.SquaredHinge() + >>> h(y_true, y_pred).numpy() + 1.86 - >>> # Calling with 'sample_weight'. - >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() - 0.120 + >>> # Calling with 'sample_weight'. + >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() + 0.73 - >>> # Using 'sum' reduction type. - >>> msle = tf.keras.losses.MeanSquaredLogarithmicError( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> msle(y_true, y_pred).numpy() - 0.480 + >>> # Using 'sum' reduction type. + >>> h = tf.keras.losses.SquaredHinge( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> h(y_true, y_pred).numpy() + 3.72 - >>> # Using 'none' reduction type. - >>> msle = tf.keras.losses.MeanSquaredLogarithmicError( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> msle(y_true, y_pred).numpy() - array([0.240, 0.240], dtype=float32) + >>> # Using 'none' reduction type. + >>> h = tf.keras.losses.SquaredHinge( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> h(y_true, y_pred).numpy() + array([1.46, 2.26], dtype=float32) - Usage with the `compile()` API: + Usage with the `compile()` API: - ```python - model.compile(optimizer='sgd', - loss=tf.keras.losses.MeanSquaredLogarithmicError()) - ``` - """ + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge()) + ``` + """ - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='mean_squared_logarithmic_error'): - """Initializes `MeanSquaredLogarithmicError` instance. + def __init__( + self, reduction=losses_utils.ReductionV2.AUTO, name="squared_hinge" + ): + """Initializes `SquaredHinge` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'squared_hinge'. + """ + super().__init__(squared_hinge, name=name, reduction=reduction) + + +@keras_export("keras.losses.CategoricalHinge") +class CategoricalHinge(LossFunctionWrapper): + """Computes the categorical hinge loss between `y_true` & `y_pred`. - Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to - 'mean_squared_logarithmic_error'. - """ - super().__init__( - mean_squared_logarithmic_error, name=name, reduction=reduction) + `loss = maximum(neg - pos + 1, 0)` + where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)` + Standalone usage: -@keras_export('keras.losses.BinaryCrossentropy') -class BinaryCrossentropy(LossFunctionWrapper): - """Computes the cross-entropy loss between true labels and predicted labels. - - Use this cross-entropy loss for binary (0 or 1) classification applications. - The loss function requires the following inputs: - - - `y_true` (true label): This is either 0 or 1. - - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in [0., 1.] when - `from_logits=False`). - - **Recommended Usage:** (set `from_logits=True`) - - With `tf.keras` API: - - ```python - model.compile( - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - .... - ) - ``` - - As a standalone function: - - >>> # Example 1: (batch_size = 1, number of samples = 4) - >>> y_true = [0, 1, 0, 0] - >>> y_pred = [-18.6, 0.51, 2.94, -12.8] - >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) - >>> bce(y_true, y_pred).numpy() - 0.865 - - >>> # Example 2: (batch_size = 2, number of samples = 4) - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] - >>> # Using default 'auto'/'sum_over_batch_size' reduction type. - >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) - >>> bce(y_true, y_pred).numpy() - 0.865 - >>> # Using 'sample_weight' attribute - >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - 0.243 - >>> # Using 'sum' reduction` type. - >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, - ... reduction=tf.keras.losses.Reduction.SUM) - >>> bce(y_true, y_pred).numpy() - 1.730 - >>> # Using 'none' reduction type. - >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, - ... reduction=tf.keras.losses.Reduction.NONE) - >>> bce(y_true, y_pred).numpy() - array([0.235, 1.496], dtype=float32) - - **Default Usage:** (set `from_logits=False`) - - >>> # Make the following updates to the above "Recommended Usage" section - >>> # 1. Set `from_logits=False` - >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False') - >>> # 2. Update `y_pred` to use probabilities instead of logits - >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]] - """ - - def __init__(self, - from_logits=False, - label_smoothing=0., - axis=-1, - reduction=losses_utils.ReductionV2.AUTO, - name='binary_crossentropy'): - """Initializes `BinaryCrossentropy` instance. + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> h = tf.keras.losses.CategoricalHinge() + >>> h(y_true, y_pred).numpy() + 1.4 - Args: - from_logits: Whether to interpret `y_pred` as a tensor of - [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` contains probabilities (i.e., values in [0, 1]). - label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0, - we compute the loss between the predicted labels and a smoothed version - of the true labels, where the smoothing squeezes the labels towards 0.5. - Larger values of `label_smoothing` correspond to heavier smoothing. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to -1. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Name for the op. Defaults to 'binary_crossentropy'. + >>> # Calling with 'sample_weight'. + >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() + 0.6 + + >>> # Using 'sum' reduction type. + >>> h = tf.keras.losses.CategoricalHinge( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> h(y_true, y_pred).numpy() + 2.8 + + >>> # Using 'none' reduction type. + >>> h = tf.keras.losses.CategoricalHinge( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> h(y_true, y_pred).numpy() + array([1.2, 1.6], dtype=float32) + + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge()) + ``` """ - super().__init__( - binary_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis) - self.from_logits = from_logits + def __init__( + self, reduction=losses_utils.ReductionV2.AUTO, name="categorical_hinge" + ): + """Initializes `CategoricalHinge` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'categorical_hinge'. + """ + super().__init__(categorical_hinge, name=name, reduction=reduction) + + +@keras_export("keras.losses.Poisson") +class Poisson(LossFunctionWrapper): + """Computes the Poisson loss between `y_true` & `y_pred`. -@keras_export('keras.losses.BinaryFocalCrossentropy') -class BinaryFocalCrossentropy(LossFunctionWrapper): - """Computes the focal cross-entropy loss between true labels and predictions. - - Binary cross-entropy loss is often used for binary (0 or 1) classification - tasks. The loss function requires the following inputs: - - - `y_true` (true label): This is either 0 or 1. - - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when - `from_logits=False`). - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a "focal factor" to down-weight easy examples and focus more on - hard examples. By default, the focal tensor is computed as follows: - - `focal_factor = (1 - output) ** gamma` for class 1 - `focal_factor = output ** gamma` for class 0 - where `gamma` is a focusing parameter. When `gamma=0`, this function is - equivalent to the binary crossentropy loss. - - With the `compile()` API: - - ```python - model.compile( - loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True), - .... - ) - ``` - - As a standalone function: - - >>> # Example 1: (batch_size = 1, number of samples = 4) - >>> y_true = [0, 1, 0, 0] - >>> y_pred = [-18.6, 0.51, 2.94, -12.8] - >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=2, from_logits=True) - >>> loss(y_true, y_pred).numpy() - 0.691 - - >>> # Example 2: (batch_size = 2, number of samples = 4) - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] - >>> # Using default 'auto'/'sum_over_batch_size' reduction type. - >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3, from_logits=True) - >>> loss(y_true, y_pred).numpy() - 0.647 - - >>> # Using 'sample_weight' attribute - >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - 0.133 - - >>> # Using 'sum' reduction` type. - >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=4, from_logits=True, - ... reduction=tf.keras.losses.Reduction.SUM) - >>> loss(y_true, y_pred).numpy() - 1.222 - - >>> # Using 'none' reduction type. - >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=5, from_logits=True, - ... reduction=tf.keras.losses.Reduction.NONE) - >>> loss(y_true, y_pred).numpy() - array([0.0017 1.1561], dtype=float32) - - Args: - gamma: A focusing parameter used to compute the focal factor, default is - `2.0` as mentioned in the reference - [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). - from_logits: Whether to interpret `y_pred` as a tensor of - [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). - label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. When > - `0`, we compute the loss between the predicted labels and a smoothed - version of the true labels, where the smoothing squeezes the labels - towards `0.5`. Larger values of `label_smoothing` correspond to heavier - smoothing. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to `-1`. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras`, `compile()` and `fit()`, using `SUM_OVER_BATCH_SIZE` or - `AUTO` will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Name for the op. Defaults to 'binary_focal_crossentropy'. - """ - - def __init__( - self, - gamma=2.0, - from_logits=False, - label_smoothing=0., - axis=-1, - reduction=losses_utils.ReductionV2.AUTO, - name='binary_focal_crossentropy', - ): - """Initializes `BinaryFocalCrossentropy` instance.""" - super().__init__( - binary_focal_crossentropy, - gamma=gamma, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis) - self.from_logits = from_logits - self.gamma = gamma + `loss = y_pred - y_true * log(y_pred)` - def get_config(self): - config = { - 'gamma': self.gamma, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + Standalone usage: + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[1., 1.], [0., 0.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> p = tf.keras.losses.Poisson() + >>> p(y_true, y_pred).numpy() + 0.5 -@keras_export('keras.losses.CategoricalCrossentropy') -class CategoricalCrossentropy(LossFunctionWrapper): - """Computes the crossentropy loss between the labels and predictions. - - Use this crossentropy loss function when there are two or more label classes. - We expect labels to be provided in a `one_hot` representation. If you want to - provide labels as integers, please use `SparseCategoricalCrossentropy` loss. - There should be `# classes` floating point values per feature. - - In the snippet below, there is `# classes` floating pointing values per - example. The shape of both `y_pred` and `y_true` are - `[batch_size, num_classes]`. - - Standalone usage: - - >>> y_true = [[0, 1, 0], [0, 0, 1]] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> cce = tf.keras.losses.CategoricalCrossentropy() - >>> cce(y_true, y_pred).numpy() - 1.177 - - >>> # Calling with 'sample_weight'. - >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() - 0.814 - - >>> # Using 'sum' reduction type. - >>> cce = tf.keras.losses.CategoricalCrossentropy( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> cce(y_true, y_pred).numpy() - 2.354 - - >>> # Using 'none' reduction type. - >>> cce = tf.keras.losses.CategoricalCrossentropy( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> cce(y_true, y_pred).numpy() - array([0.0513, 2.303], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalCrossentropy()) - ``` - """ - - def __init__(self, - from_logits=False, - label_smoothing=0., - axis=-1, - reduction=losses_utils.ReductionV2.AUTO, - name='categorical_crossentropy'): - """Initializes `CategoricalCrossentropy` instance. + >>> # Calling with 'sample_weight'. + >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + 0.4 - Args: - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, - meaning the confidence on label values are relaxed. For example, if - `0.1`, use `0.1 / num_classes` for non-target labels and - `0.9 + 0.1 / num_classes` for target labels. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to -1. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. - Defaults to 'categorical_crossentropy'. - """ - super().__init__( - categorical_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis) + >>> # Using 'sum' reduction type. + >>> p = tf.keras.losses.Poisson( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> p(y_true, y_pred).numpy() + 0.999 + >>> # Using 'none' reduction type. + >>> p = tf.keras.losses.Poisson( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> p(y_true, y_pred).numpy() + array([0.999, 0.], dtype=float32) -@keras_export('keras.losses.SparseCategoricalCrossentropy') -class SparseCategoricalCrossentropy(LossFunctionWrapper): - """Computes the crossentropy loss between the labels and predictions. - - Use this crossentropy loss function when there are two or more label classes. - We expect labels to be provided as integers. If you want to provide labels - using `one-hot` representation, please use `CategoricalCrossentropy` loss. - There should be `# classes` floating point values per feature for `y_pred` - and a single floating point value per feature for `y_true`. - - In the snippet below, there is a single floating point value per example for - `y_true` and `# classes` floating pointing values per example for `y_pred`. - The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is - `[batch_size, num_classes]`. - - Standalone usage: - - >>> y_true = [1, 2] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> scce = tf.keras.losses.SparseCategoricalCrossentropy() - >>> scce(y_true, y_pred).numpy() - 1.177 - - >>> # Calling with 'sample_weight'. - >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() - 0.814 - - >>> # Using 'sum' reduction type. - >>> scce = tf.keras.losses.SparseCategoricalCrossentropy( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> scce(y_true, y_pred).numpy() - 2.354 - - >>> # Using 'none' reduction type. - >>> scce = tf.keras.losses.SparseCategoricalCrossentropy( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> scce(y_true, y_pred).numpy() - array([0.0513, 2.303], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss=tf.keras.losses.SparseCategoricalCrossentropy()) - ``` - """ - - def __init__(self, - from_logits=False, - reduction=losses_utils.ReductionV2.AUTO, - name='sparse_categorical_crossentropy'): - """Initializes `SparseCategoricalCrossentropy` instance. + Usage with the `compile()` API: - Args: - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to - 'sparse_categorical_crossentropy'. + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson()) + ``` """ - super().__init__( - sparse_categorical_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits) + def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="poisson"): + """Initializes `Poisson` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'poisson'. + """ + super().__init__(poisson, name=name, reduction=reduction) + + +@keras_export("keras.losses.LogCosh") +class LogCosh(LossFunctionWrapper): + """Computes the logarithm of the hyperbolic cosine of the prediction error. -@keras_export('keras.losses.Hinge') -class Hinge(LossFunctionWrapper): - """Computes the hinge loss between `y_true` and `y_pred`. + `logcosh = log((exp(x) + exp(-x))/2)`, + where x is the error `y_pred - y_true`. - `loss = maximum(1 - y_true * y_pred, 0)` + Standalone usage: - `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are - provided we will convert them to -1 or 1. + >>> y_true = [[0., 1.], [0., 0.]] + >>> y_pred = [[1., 1.], [0., 0.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> l = tf.keras.losses.LogCosh() + >>> l(y_true, y_pred).numpy() + 0.108 - Standalone usage: + >>> # Calling with 'sample_weight'. + >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + 0.087 - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> h = tf.keras.losses.Hinge() - >>> h(y_true, y_pred).numpy() - 1.3 + >>> # Using 'sum' reduction type. + >>> l = tf.keras.losses.LogCosh( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> l(y_true, y_pred).numpy() + 0.217 - >>> # Calling with 'sample_weight'. - >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() - 0.55 + >>> # Using 'none' reduction type. + >>> l = tf.keras.losses.LogCosh( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> l(y_true, y_pred).numpy() + array([0.217, 0.], dtype=float32) - >>> # Using 'sum' reduction type. - >>> h = tf.keras.losses.Hinge( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> h(y_true, y_pred).numpy() - 2.6 + Usage with the `compile()` API: - >>> # Using 'none' reduction type. - >>> h = tf.keras.losses.Hinge( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> h(y_true, y_pred).numpy() - array([1.1, 1.5], dtype=float32) + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh()) + ``` + """ - Usage with the `compile()` API: + def __init__( + self, reduction=losses_utils.ReductionV2.AUTO, name="log_cosh" + ): + """Initializes `LogCosh` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'log_cosh'. + """ + super().__init__(log_cosh, name=name, reduction=reduction) + + +@keras_export("keras.losses.KLDivergence") +class KLDivergence(LossFunctionWrapper): + """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge()) - ``` - """ + `loss = y_true * log(y_true / y_pred)` - def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='hinge'): - """Initializes `Hinge` instance. + See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence - Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'hinge'. - """ - super().__init__(hinge, name=name, reduction=reduction) + Standalone usage: + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> kl = tf.keras.losses.KLDivergence() + >>> kl(y_true, y_pred).numpy() + 0.458 -@keras_export('keras.losses.SquaredHinge') -class SquaredHinge(LossFunctionWrapper): - """Computes the squared hinge loss between `y_true` and `y_pred`. + >>> # Calling with 'sample_weight'. + >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + 0.366 - `loss = square(maximum(1 - y_true * y_pred, 0))` + >>> # Using 'sum' reduction type. + >>> kl = tf.keras.losses.KLDivergence( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> kl(y_true, y_pred).numpy() + 0.916 - `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are - provided we will convert them to -1 or 1. + >>> # Using 'none' reduction type. + >>> kl = tf.keras.losses.KLDivergence( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> kl(y_true, y_pred).numpy() + array([0.916, -3.08e-06], dtype=float32) - Standalone usage: + Usage with the `compile()` API: - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> h = tf.keras.losses.SquaredHinge() - >>> h(y_true, y_pred).numpy() - 1.86 + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence()) + ``` + """ - >>> # Calling with 'sample_weight'. - >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() - 0.73 + def __init__( + self, reduction=losses_utils.ReductionV2.AUTO, name="kl_divergence" + ): + """Initializes `KLDivergence` instance. + + Args: + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'kl_divergence'. + """ + super().__init__(kl_divergence, name=name, reduction=reduction) + + +@keras_export("keras.losses.Huber") +class Huber(LossFunctionWrapper): + """Computes the Huber loss between `y_true` & `y_pred`. + + For each value x in `error = y_true - y_pred`: + + ``` + loss = 0.5 * x^2 if |x| <= d + loss = 0.5 * d^2 + d * (|x| - d) if |x| > d + ``` + where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss + + Standalone usage: + + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> h = tf.keras.losses.Huber() + >>> h(y_true, y_pred).numpy() + 0.155 + + >>> # Calling with 'sample_weight'. + >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() + 0.09 + + >>> # Using 'sum' reduction type. + >>> h = tf.keras.losses.Huber( + ... reduction=tf.keras.losses.Reduction.SUM) + >>> h(y_true, y_pred).numpy() + 0.31 + + >>> # Using 'none' reduction type. + >>> h = tf.keras.losses.Huber( + ... reduction=tf.keras.losses.Reduction.NONE) + >>> h(y_true, y_pred).numpy() + array([0.18, 0.13], dtype=float32) + + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', loss=tf.keras.losses.Huber()) + ``` + """ - >>> # Using 'sum' reduction type. - >>> h = tf.keras.losses.SquaredHinge( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> h(y_true, y_pred).numpy() - 3.72 + def __init__( + self, + delta=1.0, + reduction=losses_utils.ReductionV2.AUTO, + name="huber_loss", + ): + """Initializes `Huber` instance. + + Args: + delta: A float, the point where the Huber loss function changes from + a quadratic to linear. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'huber_loss'. + """ + super().__init__(huber, name=name, reduction=reduction, delta=delta) - >>> # Using 'none' reduction type. - >>> h = tf.keras.losses.SquaredHinge( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> h(y_true, y_pred).numpy() - array([1.46, 2.26], dtype=float32) - Usage with the `compile()` API: +@keras_export( + "keras.metrics.mean_squared_error", + "keras.metrics.mse", + "keras.metrics.MSE", + "keras.losses.mean_squared_error", + "keras.losses.mse", + "keras.losses.MSE", +) +@tf.__internal__.dispatch.add_dispatch_support +def mean_squared_error(y_true, y_pred): + """Computes the mean squared error between labels and predictions. - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge()) - ``` - """ + After computing the squared distance between the inputs, the mean value over + the last dimension is returned. - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='squared_hinge'): - """Initializes `SquaredHinge` instance. + `loss = mean(square(y_true - y_pred), axis=-1)` + + Standalone usage: + + >>> y_true = np.random.randint(0, 2, size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> assert np.array_equal( + ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1)) Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'squared_hinge'. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + + Returns: + Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. """ - super().__init__(squared_hinge, name=name, reduction=reduction) + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) -@keras_export('keras.losses.CategoricalHinge') -class CategoricalHinge(LossFunctionWrapper): - """Computes the categorical hinge loss between `y_true` and `y_pred`. +def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False): + """Apply a loss function on a per batch basis. - `loss = maximum(neg - pos + 1, 0)` - where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)` + Args: + loss_fn: The loss function + y_true: truth values (RaggedTensor) + y_pred: predicted values (RaggedTensor) + y_pred_extra_dim: whether y_pred has an additional dimension compared to + y_true - Standalone usage: + Returns: + Loss-function result. A dense tensor if the output has a single + dimension (per-batch loss value); a ragged tensor otherwise. + """ - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> h = tf.keras.losses.CategoricalHinge() - >>> h(y_true, y_pred).numpy() - 1.4 + def rt_is_equiv_dense(rt): + """Returns true if this RaggedTensor has the same row_lengths across + + all ragged dimensions and thus can be converted to a dense tensor + without loss of information. + + Args: + rt: RaggedTensor. + """ + return tf.reduce_all( + [ + tf.equal( + tf.math.reduce_variance( + tf.cast(row_lens, backend.floatx()) + ), + tf.constant([0.0]), + ) + for row_lens in rt.nested_row_lengths() + ] + ) + + def _convert_to_dense(inputs): + return tuple( + rt.to_tensor() if isinstance(rt, tf.RaggedTensor) else rt + for rt in inputs + ) + + def _call_loss(inputs, ragged_output): + """Adapt the result to ragged or dense tensor according to the expected - >>> # Calling with 'sample_weight'. - >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() - 0.6 + output type. This is done so that all the return values of the map + operation have the same type. + """ + r = loss_fn(*inputs) + if ragged_output and not isinstance(r, tf.RaggedTensor): + r = tf.RaggedTensor.from_tensor(r) + elif not ragged_output and isinstance(r, tf.RaggedTensor): + r = r.to_tensor() + return r + + def _wrapper(inputs, ragged_output): + _, y_pred = inputs + if isinstance(y_pred, tf.RaggedTensor): + return tf.cond( + rt_is_equiv_dense(y_pred), + lambda: _call_loss(_convert_to_dense(inputs), ragged_output), + lambda: _call_loss(inputs, ragged_output), + ) + + return loss_fn(*inputs) + + if not isinstance(y_true, tf.RaggedTensor): + return loss_fn(y_true, y_pred.to_tensor()) + + lshape = y_pred.shape.as_list()[1:-1] + if len(lshape) > 0: + spec = tf.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype) + else: + spec = tf.TensorSpec(shape=[], dtype=y_pred.dtype) - >>> # Using 'sum' reduction type. - >>> h = tf.keras.losses.CategoricalHinge( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> h(y_true, y_pred).numpy() - 2.8 + nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)] + if y_pred_extra_dim: + # The last dimension of a categorical prediction may be ragged or not. + rdims = [len(slist) for slist in nested_splits_list] + if rdims[0] == rdims[1] - 1: + nested_splits_list[1] = nested_splits_list[1][:-1] - >>> # Using 'none' reduction type. - >>> h = tf.keras.losses.CategoricalHinge( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> h(y_true, y_pred).numpy() - array([1.2, 1.6], dtype=float32) + map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1) - Usage with the `compile()` API: + assertion_list = ragged_util.assert_splits_match(nested_splits_list) + with tf.control_dependencies(assertion_list): + return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec) - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge()) - ``` - """ - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='categorical_hinge'): - """Initializes `CategoricalHinge` instance. +@dispatch.dispatch_for_types(mean_squared_error, tf.RaggedTensor) +def _ragged_tensor_mse(y_true, y_pred): + """Implements support for handling RaggedTensors. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'categorical_hinge'. + y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: RaggedTensor predicted values. + shape = `[batch_size, d0, .. dN]`. + + Returns: + Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. + When the number of dimensions of the batch feature vector [d0, .. dN] is + greater than one the return value is a RaggedTensor. Otherwise, a Dense + tensor with dimensions [batch_size] is returned. """ - super().__init__(categorical_hinge, name=name, reduction=reduction) + return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred) -@keras_export('keras.losses.Poisson') -class Poisson(LossFunctionWrapper): - """Computes the Poisson loss between `y_true` and `y_pred`. +@keras_export( + "keras.metrics.mean_absolute_error", + "keras.metrics.mae", + "keras.metrics.MAE", + "keras.losses.mean_absolute_error", + "keras.losses.mae", + "keras.losses.MAE", +) +@tf.__internal__.dispatch.add_dispatch_support +def mean_absolute_error(y_true, y_pred): + """Computes the mean absolute error between labels and predictions. - `loss = y_pred - y_true * log(y_pred)` + `loss = mean(abs(y_true - y_pred), axis=-1)` - Standalone usage: + Standalone usage: - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[1., 1.], [0., 0.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> p = tf.keras.losses.Poisson() - >>> p(y_true, y_pred).numpy() - 0.5 + >>> y_true = np.random.randint(0, 2, size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> assert np.array_equal( + ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1)) - >>> # Calling with 'sample_weight'. - >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - 0.4 + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - >>> # Using 'sum' reduction type. - >>> p = tf.keras.losses.Poisson( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> p(y_true, y_pred).numpy() - 0.999 + Returns: + Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + return backend.mean(tf.abs(y_pred - y_true), axis=-1) - >>> # Using 'none' reduction type. - >>> p = tf.keras.losses.Poisson( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> p(y_true, y_pred).numpy() - array([0.999, 0.], dtype=float32) - Usage with the `compile()` API: +@dispatch.dispatch_for_types(mean_absolute_error, tf.RaggedTensor) +def _ragged_tensor_mae(y_true, y_pred): + """RaggedTensor adapter for mean_absolute_error.""" + return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred) - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson()) - ``` - """ - def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='poisson'): - """Initializes `Poisson` instance. +@keras_export( + "keras.metrics.mean_absolute_percentage_error", + "keras.metrics.mape", + "keras.metrics.MAPE", + "keras.losses.mean_absolute_percentage_error", + "keras.losses.mape", + "keras.losses.MAPE", +) +@tf.__internal__.dispatch.add_dispatch_support +def mean_absolute_percentage_error(y_true, y_pred): + """Computes the mean absolute percentage error between `y_true` & `y_pred`. - Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'poisson'. - """ - super().__init__(poisson, name=name, reduction=reduction) + `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)` + Standalone usage: -@keras_export('keras.losses.LogCosh') -class LogCosh(LossFunctionWrapper): - """Computes the logarithm of the hyperbolic cosine of the prediction error. + >>> y_true = np.random.random(size=(2, 3)) + >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> assert np.array_equal( + ... loss.numpy(), + ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1)) - `logcosh = log((exp(x) + exp(-x))/2)`, - where x is the error `y_pred - y_true`. + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - Standalone usage: + Returns: + Mean absolute percentage error values. shape = `[batch_size, d0, .. + dN-1]`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + diff = tf.abs( + (y_true - y_pred) / backend.maximum(tf.abs(y_true), backend.epsilon()) + ) + return 100.0 * backend.mean(diff, axis=-1) - >>> y_true = [[0., 1.], [0., 0.]] - >>> y_pred = [[1., 1.], [0., 0.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> l = tf.keras.losses.LogCosh() - >>> l(y_true, y_pred).numpy() - 0.108 - >>> # Calling with 'sample_weight'. - >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - 0.087 +@dispatch.dispatch_for_types(mean_absolute_percentage_error, tf.RaggedTensor) +def _ragged_tensor_mape(y_true, y_pred): + """Support RaggedTensors.""" + return _ragged_tensor_apply_loss( + mean_absolute_percentage_error, y_true, y_pred + ) - >>> # Using 'sum' reduction type. - >>> l = tf.keras.losses.LogCosh( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> l(y_true, y_pred).numpy() - 0.217 - >>> # Using 'none' reduction type. - >>> l = tf.keras.losses.LogCosh( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> l(y_true, y_pred).numpy() - array([0.217, 0.], dtype=float32) +@keras_export( + "keras.metrics.mean_squared_logarithmic_error", + "keras.metrics.msle", + "keras.metrics.MSLE", + "keras.losses.mean_squared_logarithmic_error", + "keras.losses.msle", + "keras.losses.MSLE", +) +@tf.__internal__.dispatch.add_dispatch_support +def mean_squared_logarithmic_error(y_true, y_pred): + """Computes the mean squared logarithmic error between `y_true` & `y_pred`. - Usage with the `compile()` API: + `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)` - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh()) - ``` - """ + Standalone usage: - def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='log_cosh'): - """Initializes `LogCosh` instance. + >>> y_true = np.random.randint(0, 2, size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> y_true = np.maximum(y_true, 1e-7) + >>> y_pred = np.maximum(y_pred, 1e-7) + >>> assert np.allclose( + ... loss.numpy(), + ... np.mean( + ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1)) Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'log_cosh'. - """ - super().__init__(log_cosh, name=name, reduction=reduction) + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Returns: + Mean squared logarithmic error values. shape = `[batch_size, d0, .. + dN-1]`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.0) + second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.0) + return backend.mean( + tf.math.squared_difference(first_log, second_log), axis=-1 + ) -@keras_export('keras.losses.KLDivergence') -class KLDivergence(LossFunctionWrapper): - """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`. - `loss = y_true * log(y_true / y_pred)` +@dispatch.dispatch_for_types(mean_squared_logarithmic_error, tf.RaggedTensor) +def _ragged_tensor_msle(y_true, y_pred): + """Implements support for handling RaggedTensors.""" + return _ragged_tensor_apply_loss( + mean_squared_logarithmic_error, y_true, y_pred + ) - See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence - Standalone usage: +def _maybe_convert_labels(y_true): + """Converts binary labels into -1/1.""" + are_zeros = tf.equal(y_true, 0) + are_ones = tf.equal(y_true, 1) + is_binary = tf.reduce_all(tf.logical_or(are_zeros, are_ones)) - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> kl = tf.keras.losses.KLDivergence() - >>> kl(y_true, y_pred).numpy() - 0.458 + def _convert_binary_labels(): + # Convert the binary labels to -1 or 1. + return 2.0 * y_true - 1.0 - >>> # Calling with 'sample_weight'. - >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - 0.366 + updated_y_true = tf.__internal__.smart_cond.smart_cond( + is_binary, _convert_binary_labels, lambda: y_true + ) + return updated_y_true - >>> # Using 'sum' reduction type. - >>> kl = tf.keras.losses.KLDivergence( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> kl(y_true, y_pred).numpy() - 0.916 - >>> # Using 'none' reduction type. - >>> kl = tf.keras.losses.KLDivergence( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> kl(y_true, y_pred).numpy() - array([0.916, -3.08e-06], dtype=float32) +@keras_export("keras.metrics.squared_hinge", "keras.losses.squared_hinge") +@tf.__internal__.dispatch.add_dispatch_support +def squared_hinge(y_true, y_pred): + """Computes the squared hinge loss between `y_true` & `y_pred`. - Usage with the `compile()` API: + `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)` - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence()) - ``` - """ + Standalone usage: - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='kl_divergence'): - """Initializes `KLDivergence` instance. + >>> y_true = np.random.choice([-1, 1], size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> assert np.array_equal( + ... loss.numpy(), + ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1)) Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'kl_divergence'. + y_true: The ground truth values. `y_true` values are expected to be -1 + or 1. If binary (0 or 1) labels are provided we will convert them to + -1 or 1. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + + Returns: + Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`. """ - super().__init__(kl_divergence, name=name, reduction=reduction) + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + y_true = _maybe_convert_labels(y_true) + return backend.mean( + tf.square(tf.maximum(1.0 - y_true * y_pred, 0.0)), axis=-1 + ) -@keras_export('keras.losses.Huber') -class Huber(LossFunctionWrapper): - """Computes the Huber loss between `y_true` and `y_pred`. - - For each value x in `error = y_true - y_pred`: - - ``` - loss = 0.5 * x^2 if |x| <= d - loss = 0.5 * d^2 + d * (|x| - d) if |x| > d - ``` - where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss - - Standalone usage: - - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> h = tf.keras.losses.Huber() - >>> h(y_true, y_pred).numpy() - 0.155 - - >>> # Calling with 'sample_weight'. - >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() - 0.09 - - >>> # Using 'sum' reduction type. - >>> h = tf.keras.losses.Huber( - ... reduction=tf.keras.losses.Reduction.SUM) - >>> h(y_true, y_pred).numpy() - 0.31 - - >>> # Using 'none' reduction type. - >>> h = tf.keras.losses.Huber( - ... reduction=tf.keras.losses.Reduction.NONE) - >>> h(y_true, y_pred).numpy() - array([0.18, 0.13], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.Huber()) - ``` - """ - - def __init__(self, - delta=1.0, - reduction=losses_utils.ReductionV2.AUTO, - name='huber_loss'): - """Initializes `Huber` instance. +@keras_export("keras.metrics.hinge", "keras.losses.hinge") +@tf.__internal__.dispatch.add_dispatch_support +def hinge(y_true, y_pred): + """Computes the hinge loss between `y_true` & `y_pred`. + + `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)` + + Standalone usage: + + >>> y_true = np.random.choice([-1, 1], size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.hinge(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> assert np.array_equal( + ... loss.numpy(), + ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1)) Args: - delta: A float, the point where the Huber loss function changes from a - quadratic to linear. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used with - `tf.distribute.Strategy`, outside of built-in training loops such as - `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for - more details. - name: Optional name for the instance. Defaults to 'huber_loss'. + y_true: The ground truth values. `y_true` values are expected to be -1 + or 1. If binary (0 or 1) labels are provided we will convert them to + -1 or 1. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + + Returns: + Hinge loss values. shape = `[batch_size, d0, .. dN-1]`. """ - super().__init__(huber, name=name, reduction=reduction, delta=delta) + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + y_true = _maybe_convert_labels(y_true) + return backend.mean(tf.maximum(1.0 - y_true * y_pred, 0.0), axis=-1) -@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse', - 'keras.metrics.MSE', 'keras.losses.mean_squared_error', - 'keras.losses.mse', 'keras.losses.MSE') +@keras_export("keras.losses.categorical_hinge") @tf.__internal__.dispatch.add_dispatch_support -def mean_squared_error(y_true, y_pred): - """Computes the mean squared error between labels and predictions. +def categorical_hinge(y_true, y_pred): + """Computes the categorical hinge loss between `y_true` & `y_pred`. - After computing the squared distance between the inputs, the mean value over - the last dimension is returned. + `loss = maximum(neg - pos + 1, 0)` + where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)` - `loss = mean(square(y_true - y_pred), axis=-1)` + Standalone usage: - Standalone usage: + >>> y_true = np.random.randint(0, 3, size=(2,)) + >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> pos = np.sum(y_true * y_pred, axis=-1) + >>> neg = np.amax((1. - y_true) * y_pred, axis=-1) + >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.)) - >>> y_true = np.random.randint(0, 2, size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> assert np.array_equal( - ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1)) + Args: + y_true: The ground truth values. `y_true` values are expected to be + either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor). + y_pred: The predicted values. - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Returns: + Categorical hinge loss values. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + pos = tf.reduce_sum(y_true * y_pred, axis=-1) + neg = tf.reduce_max((1.0 - y_true) * y_pred, axis=-1) + zero = tf.cast(0.0, y_pred.dtype) + return tf.maximum(neg - pos + 1.0, zero) - Returns: - Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) +@keras_export("keras.losses.huber", v1=[]) +@tf.__internal__.dispatch.add_dispatch_support +def huber(y_true, y_pred, delta=1.0): + """Computes Huber loss value. -def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False): - """Apply a loss function on a per batch basis. + For each value x in `error = y_true - y_pred`: - Args: - loss_fn: The loss function - y_true: truth values (RaggedTensor) - y_pred: predicted values (RaggedTensor) - y_pred_extra_dim: whether y_pred has an additional dimension compared to - y_true + ``` + loss = 0.5 * x^2 if |x| <= d + loss = d * |x| - 0.5 * d^2 if |x| > d + ``` + where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss - Returns: - Loss-function result. A dense tensor if the output has a single dimension - (per-batch loss value); a ragged tensor otherwise. - """ + Args: + y_true: tensor of true targets. + y_pred: tensor of predicted targets. + delta: A float, the point where the Huber loss function changes from a + quadratic to linear. - def rt_is_equiv_dense(rt): - """Returns true if this RaggedTensor has the same row_lenghts across + Returns: + Tensor with one scalar loss entry per sample. + """ + y_pred = tf.cast(y_pred, dtype=backend.floatx()) + y_true = tf.cast(y_true, dtype=backend.floatx()) + delta = tf.cast(delta, dtype=backend.floatx()) + error = tf.subtract(y_pred, y_true) + abs_error = tf.abs(error) + half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype) + return backend.mean( + tf.where( + abs_error <= delta, + half * tf.square(error), + delta * abs_error - half * tf.square(delta), + ), + axis=-1, + ) - all ragged dimensions and thus can be converted to a dense tensor - without loss of information. + +@keras_export( + "keras.losses.log_cosh", + "keras.losses.logcosh", + "keras.metrics.log_cosh", + "keras.metrics.logcosh", +) +@tf.__internal__.dispatch.add_dispatch_support +def log_cosh(y_true, y_pred): + """Logarithm of the hyperbolic cosine of the prediction error. + + `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and + to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly + like the mean squared error, but will not be so strongly affected by the + occasional wildly incorrect prediction. + + Standalone usage: + + >>> y_true = np.random.random(size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.logcosh(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> x = y_pred - y_true + >>> assert np.allclose( + ... loss.numpy(), + ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - tf.math.log(2.), + ... axis=-1), + ... atol=1e-5) Args: - rt: RaggedTensor. - """ - return tf.reduce_all([ - tf.equal( - tf.math.reduce_variance(tf.cast(row_lens, backend.floatx())), - tf.constant([0.])) for row_lens in rt.nested_row_lengths() - ]) + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - def _convert_to_dense(inputs): - return tuple( - rt.to_tensor() if isinstance(rt, tf.RaggedTensor) else rt - for rt in inputs) + Returns: + Logcosh error values. shape = `[batch_size, d0, .. dN-1]`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) - def _call_loss(inputs, ragged_output): - """ Adapt the result to ragged or dense tensor according to the expected + def _logcosh(x): + return ( + x + tf.math.softplus(-2.0 * x) - tf.cast(tf.math.log(2.0), x.dtype) + ) - output type. This is done so that all the return values of the map - operation have the same type. - """ - r = loss_fn(*inputs) - if ragged_output and not isinstance(r, tf.RaggedTensor): - r = tf.RaggedTensor.from_tensor(r) - elif not ragged_output and isinstance(r, tf.RaggedTensor): - r = r.to_tensor() - return r - - def _wrapper(inputs, ragged_output): - _, y_pred = inputs - if isinstance(y_pred, tf.RaggedTensor): - return tf.cond( - rt_is_equiv_dense(y_pred), - lambda: _call_loss(_convert_to_dense(inputs), ragged_output), - lambda: _call_loss(inputs, ragged_output)) - - return loss_fn(*inputs) - - if not isinstance(y_true, tf.RaggedTensor): - return loss_fn(y_true, y_pred.to_tensor()) - - lshape = y_pred.shape.as_list()[1:-1] - if len(lshape) > 0: - spec = tf.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype) - else: - spec = tf.TensorSpec(shape=[], dtype=y_pred.dtype) - - nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)] - if y_pred_extra_dim: - # The last dimension of a categorical prediction may be ragged or not. - rdims = [len(slist) for slist in nested_splits_list] - if rdims[0] == rdims[1] - 1: - nested_splits_list[1] = nested_splits_list[1][:-1] - - map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1) - - assertion_list = ragged_util.assert_splits_match(nested_splits_list) - with tf.control_dependencies(assertion_list): - return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec) + return backend.mean(_logcosh(y_pred - y_true), axis=-1) -@dispatch.dispatch_for_types(mean_squared_error, tf.RaggedTensor) -def _ragged_tensor_mse(y_true, y_pred): - """Implements support for handling RaggedTensors. +@keras_export( + "keras.metrics.categorical_crossentropy", + "keras.losses.categorical_crossentropy", +) +@tf.__internal__.dispatch.add_dispatch_support +def categorical_crossentropy( + y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 +): + """Computes the categorical crossentropy loss. - Args: - y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`. + Standalone usage: - Returns: - Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. - When the number of dimensions of the batch feature vector [d0, .. dN] is - greater than one the return value is a RaggedTensor. Otherwise a Dense - tensor with dimensions [batch_size] is returned. - """ - return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred) + >>> y_true = [[0, 1, 0], [0, 0, 1]] + >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> loss.numpy() + array([0.0513, 2.303], dtype=float32) + Args: + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Defaults to -1. The dimension along which the entropy is + computed. -@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae', - 'keras.metrics.MAE', 'keras.losses.mean_absolute_error', - 'keras.losses.mae', 'keras.losses.MAE') -@tf.__internal__.dispatch.add_dispatch_support -def mean_absolute_error(y_true, y_pred): - """Computes the mean absolute error between labels and predictions. + Returns: + Categorical crossentropy loss value. + """ + if isinstance(axis, bool): + raise ValueError( + "`axis` must be of type `int`. " + f"Received: axis={axis} of type {type(axis)}" + ) + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) + + if y_pred.shape[-1] == 1: + warnings.warn( + "In loss categorical_crossentropy, expected " + "y_pred.shape to be (batch_size, num_classes) " + f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " + "Consider using 'binary_crossentropy' if you only have 2 classes.", + SyntaxWarning, + stacklevel=2, + ) + + def _smooth_labels(): + num_classes = tf.cast(tf.shape(y_true)[axis], y_pred.dtype) + return y_true * (1.0 - label_smoothing) + ( + label_smoothing / num_classes + ) + + y_true = tf.__internal__.smart_cond.smart_cond( + label_smoothing, _smooth_labels, lambda: y_true + ) - `loss = mean(abs(y_true - y_pred), axis=-1)` + return backend.categorical_crossentropy( + y_true, y_pred, from_logits=from_logits, axis=axis + ) - Standalone usage: - >>> y_true = np.random.randint(0, 2, size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> assert np.array_equal( - ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1)) +@dispatch.dispatch_for_types(categorical_crossentropy, tf.RaggedTensor) +def _ragged_tensor_categorical_crossentropy( + y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 +): + """Implements support for handling RaggedTensors. - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Args: + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: The axis along which to compute crossentropy (the features axis). + Defaults to -1. - Returns: - Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - return backend.mean(tf.abs(y_pred - y_true), axis=-1) + Returns: + Categorical crossentropy loss value. + Expected shape: (batch, sequence_len, n_classes) with sequence_len + being variable per batch. + Return shape: (batch, sequence_len). -@dispatch.dispatch_for_types(mean_absolute_error, tf.RaggedTensor) -def _ragged_tensor_mae(y_true, y_pred): - """RaggedTensor adapter for mean_absolute_error.""" - return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred) + When used by CategoricalCrossentropy() with the default reduction + (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the + number of elements independent of the batch. E.g. if the RaggedTensor + has 2 batches with [2, 1] values respectively the resulting loss is + the sum of the individual loss values divided by 3. + """ + fn = functools.partial( + categorical_crossentropy, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + return _ragged_tensor_apply_loss(fn, y_true, y_pred) -@keras_export('keras.metrics.mean_absolute_percentage_error', - 'keras.metrics.mape', 'keras.metrics.MAPE', - 'keras.losses.mean_absolute_percentage_error', - 'keras.losses.mape', 'keras.losses.MAPE') +@keras_export( + "keras.metrics.categorical_focal_crossentropy", + "keras.losses.categorical_focal_crossentropy", +) @tf.__internal__.dispatch.add_dispatch_support -def mean_absolute_percentage_error(y_true, y_pred): - """Computes the mean absolute percentage error between `y_true` and `y_pred`. - - `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)` - - Standalone usage: - - >>> y_true = np.random.random(size=(2, 3)) - >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> assert np.array_equal( - ... loss.numpy(), - ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1)) - - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - - Returns: - Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - diff = tf.abs( - (y_true - y_pred) / backend.maximum(tf.abs(y_true), - backend.epsilon())) - return 100. * backend.mean(diff, axis=-1) - - -@dispatch.dispatch_for_types(mean_absolute_percentage_error, - tf.RaggedTensor) -def _ragged_tensor_mape(y_true, y_pred): - """Support RaggedTensors.""" - return _ragged_tensor_apply_loss(mean_absolute_percentage_error, y_true, - y_pred) +def categorical_focal_crossentropy( + y_true, + y_pred, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1, +): + """Computes the categorical focal crossentropy loss. + Standalone usage: + >>> y_true = [[0, 1, 0], [0, 0, 1]] + >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]] + >>> loss = tf.keras.losses.categorical_focal_crossentropy(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> loss.numpy() + array([2.63401289e-04, 6.75912094e-01], dtype=float32) -@keras_export('keras.metrics.mean_squared_logarithmic_error', - 'keras.metrics.msle', 'keras.metrics.MSLE', - 'keras.losses.mean_squared_logarithmic_error', - 'keras.losses.msle', 'keras.losses.MSLE') -@tf.__internal__.dispatch.add_dispatch_support -def mean_squared_logarithmic_error(y_true, y_pred): - """Computes the mean squared logarithmic error between `y_true` and `y_pred`. - - `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)` - - Standalone usage: - - >>> y_true = np.random.randint(0, 2, size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> y_true = np.maximum(y_true, 1e-7) - >>> y_pred = np.maximum(y_pred, 1e-7) - >>> assert np.allclose( - ... loss.numpy(), - ... np.mean( - ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1)) - - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - - Returns: - Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.) - second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.) - return backend.mean( - tf.math.squared_difference(first_log, second_log), axis=-1) - - -@dispatch.dispatch_for_types(mean_squared_logarithmic_error, - tf.RaggedTensor) -def _ragged_tensor_msle(y_true, y_pred): - """Implements support for handling RaggedTensors.""" - return _ragged_tensor_apply_loss(mean_squared_logarithmic_error, y_true, - y_pred) + Args: + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + alpha: A weight balancing factor for all classes, default is `0.25` as + mentioned in the reference. It can be a list of floats or a scalar. + In the multi-class case, alpha may be set by inverse class + frequency by using `compute_class_weight` from `sklearn.utils`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. It helps to gradually reduce the importance given to + simple examples in a smooth manner. When `gamma` = 0, there is + no focal effect on the categorical crossentropy. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability + distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Defaults to -1. The dimension along which the entropy is + computed. + Returns: + Categorical focal crossentropy loss value. + """ + if isinstance(axis, bool): + raise ValueError( + "`axis` must be of type `int`. " + f"Received: axis={axis} of type {type(axis)}" + ) + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) + + if y_pred.shape[-1] == 1: + warnings.warn( + "In loss categorical_focal_crossentropy, expected " + "y_pred.shape to be (batch_size, num_classes) " + f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " + "Consider using 'binary_crossentropy' if you only have 2 classes.", + SyntaxWarning, + stacklevel=2, + ) + + def _smooth_labels(): + num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype) + return y_true * (1.0 - label_smoothing) + ( + label_smoothing / num_classes + ) + + y_true = tf.__internal__.smart_cond.smart_cond( + label_smoothing, _smooth_labels, lambda: y_true + ) -def _maybe_convert_labels(y_true): - """Converts binary labels into -1/1.""" - are_zeros = tf.equal(y_true, 0) - are_ones = tf.equal(y_true, 1) - is_binary = tf.reduce_all(tf.logical_or(are_zeros, are_ones)) + return backend.categorical_focal_crossentropy( + target=y_true, + output=y_pred, + alpha=alpha, + gamma=gamma, + from_logits=from_logits, + axis=axis, + ) - def _convert_binary_labels(): - # Convert the binary labels to -1 or 1. - return 2. * y_true - 1. - updated_y_true = tf.__internal__.smart_cond.smart_cond(is_binary, _convert_binary_labels, - lambda: y_true) - return updated_y_true +@dispatch.dispatch_for_types(categorical_focal_crossentropy, tf.RaggedTensor) +def _ragged_tensor_categorical_focal_crossentropy( + y_true, + y_pred, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1, +): + """Implements support for handling RaggedTensors. + Expected shape: (batch, sequence_len, n_classes) with sequence_len + being variable per batch. + Return shape: (batch, sequence_len). + When used by CategoricalFocalCrossentropy() with the default reduction + (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the + number of elements independent of the batch. E.g. if the RaggedTensor + has 2 batches with [2, 1] values respectively the resulting loss is + the sum of the individual loss values divided by 3. -@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge') -@tf.__internal__.dispatch.add_dispatch_support -def squared_hinge(y_true, y_pred): - """Computes the squared hinge loss between `y_true` and `y_pred`. + Args: + alpha: A weight balancing factor for all classes, default is `0.25` as + mentioned in the reference. It can be a list of floats or a scalar. + In the multi-class case, alpha may be set by inverse class + frequency by using `compute_class_weight` from `sklearn.utils`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. It helps to gradually reduce the importance given to + simple examples in a smooth manner. When `gamma` = 0, there is + no focal effect on the categorical crossentropy. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Defaults to -1. The dimension along which the entropy is + computed. - `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)` + Returns: + Categorical focal crossentropy loss value. + """ + fn = functools.partial( + categorical_focal_crossentropy, + alpha=alpha, + gamma=gamma, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + return _ragged_tensor_apply_loss(fn, y_true, y_pred) - Standalone usage: - >>> y_true = np.random.choice([-1, 1], size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> assert np.array_equal( - ... loss.numpy(), - ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1)) +@keras_export( + "keras.metrics.sparse_categorical_crossentropy", + "keras.losses.sparse_categorical_crossentropy", +) +@tf.__internal__.dispatch.add_dispatch_support +def sparse_categorical_crossentropy( + y_true, y_pred, from_logits=False, axis=-1, ignore_class=None +): + """Computes the sparse categorical crossentropy loss. + + Standalone usage: + + >>> y_true = [1, 2] + >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> loss.numpy() + array([0.0513, 2.303], dtype=float32) + + >>> y_true = [[[ 0, 2], + ... [-1, -1]], + ... [[ 0, 2], + ... [-1, -1]]] + >>> y_pred = [[[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]], + ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]], + ... [[[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]], + ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]]] + >>> loss = tf.keras.losses.sparse_categorical_crossentropy( + ... y_true, y_pred, ignore_class=-1) + >>> loss.numpy() + array([[[2.3841855e-07, 2.3841855e-07], + [0.0000000e+00, 0.0000000e+00]], + [[2.3841855e-07, 6.9314730e-01], + [0.0000000e+00, 0.0000000e+00]]], dtype=float32) - Args: - y_true: The ground truth values. `y_true` values are expected to be -1 or 1. - If binary (0 or 1) labels are provided we will convert them to -1 or 1. - shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Args: + y_true: Ground truth values. + y_pred: The predicted values. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + axis: Defaults to -1. The dimension along which the entropy is + computed. + ignore_class: Optional integer. The ID of a class to be ignored during + loss computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in + segmentation maps. By default (`ignore_class=None`), all classes are + considered. - Returns: - Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - y_true = _maybe_convert_labels(y_true) - return backend.mean( - tf.square(tf.maximum(1. - y_true * y_pred, 0.)), axis=-1) + Returns: + Sparse categorical crossentropy loss value. + """ + return backend.sparse_categorical_crossentropy( + y_true, + y_pred, + from_logits=from_logits, + ignore_class=ignore_class, + axis=axis, + ) -@keras_export('keras.metrics.hinge', 'keras.losses.hinge') -@tf.__internal__.dispatch.add_dispatch_support -def hinge(y_true, y_pred): - """Computes the hinge loss between `y_true` and `y_pred`. +@dispatch.dispatch_for_types(sparse_categorical_crossentropy, tf.RaggedTensor) +def _ragged_tensor_sparse_categorical_crossentropy( + y_true, y_pred, from_logits=False, axis=-1, ignore_class=None +): + """Implements support for handling RaggedTensors. - `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)` + Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len + being variable per batch. + Return shape: (batch, sequence_len). - Standalone usage: + When used by SparseCategoricalCrossentropy() with the default reduction + (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the + number of elements independent of the batch. E.g. if the RaggedTensor + has 2 batches with [2, 1] values respectively, the resulting loss is + the sum of the individual loss values divided by 3. + """ + fn = functools.partial( + sparse_categorical_crossentropy, + from_logits=from_logits, + ignore_class=ignore_class, + axis=axis, + ) + return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True) - >>> y_true = np.random.choice([-1, 1], size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.hinge(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> assert np.array_equal( - ... loss.numpy(), - ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1)) - Args: - y_true: The ground truth values. `y_true` values are expected to be -1 or 1. - If binary (0 or 1) labels are provided they will be converted to -1 or 1. - shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. +@keras_export( + "keras.metrics.binary_crossentropy", "keras.losses.binary_crossentropy" +) +@tf.__internal__.dispatch.add_dispatch_support +def binary_crossentropy( + y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 +): + """Computes the binary crossentropy loss. - Returns: - Hinge loss values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - y_true = _maybe_convert_labels(y_true) - return backend.mean(tf.maximum(1. - y_true * y_pred, 0.), axis=-1) + Standalone usage: + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> loss.numpy() + array([0.916 , 0.714], dtype=float32) -@keras_export('keras.losses.categorical_hinge') -@tf.__internal__.dispatch.add_dispatch_support -def categorical_hinge(y_true, y_pred): - """Computes the categorical hinge loss between `y_true` and `y_pred`. - - `loss = maximum(neg - pos + 1, 0)` - where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)` - - Standalone usage: - - >>> y_true = np.random.randint(0, 3, size=(2,)) - >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> pos = np.sum(y_true * y_pred, axis=-1) - >>> neg = np.amax((1. - y_true) * y_pred, axis=-1) - >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.)) - - Args: - y_true: The ground truth values. `y_true` values are expected to be - either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor). - y_pred: The predicted values. - - Returns: - Categorical hinge loss values. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - pos = tf.reduce_sum(y_true * y_pred, axis=-1) - neg = tf.reduce_max((1. - y_true) * y_pred, axis=-1) - zero = tf.cast(0., y_pred.dtype) - return tf.maximum(neg - pos + 1., zero) - - -@keras_export('keras.losses.huber', v1=[]) -@tf.__internal__.dispatch.add_dispatch_support -def huber(y_true, y_pred, delta=1.0): - """Computes Huber loss value. - - For each value x in `error = y_true - y_pred`: - - ``` - loss = 0.5 * x^2 if |x| <= d - loss = d * |x| - 0.5 * d^2 if |x| > d - ``` - where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss - - Args: - y_true: tensor of true targets. - y_pred: tensor of predicted targets. - delta: A float, the point where the Huber loss function changes from a - quadratic to linear. - - Returns: - Tensor with one scalar loss entry per sample. - """ - y_pred = tf.cast(y_pred, dtype=backend.floatx()) - y_true = tf.cast(y_true, dtype=backend.floatx()) - delta = tf.cast(delta, dtype=backend.floatx()) - error = tf.subtract(y_pred, y_true) - abs_error = tf.abs(error) - half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype) - return backend.mean( - tf.where(abs_error <= delta, half * tf.square(error), - delta * abs_error - half * tf.square(delta)), - axis=-1) - - -@keras_export('keras.losses.log_cosh', 'keras.losses.logcosh', - 'keras.metrics.log_cosh', 'keras.metrics.logcosh') -@tf.__internal__.dispatch.add_dispatch_support -def log_cosh(y_true, y_pred): - """Logarithm of the hyperbolic cosine of the prediction error. + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by + squeezing them towards 0.5 That is, using + `1. - 0.5 * label_smoothing` for the target class and + `0.5 * label_smoothing` for the non-target class. + axis: The axis along which the mean is computed. Defaults to -1. - `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and - to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly - like the mean squared error, but will not be so strongly affected by the - occasional wildly incorrect prediction. + Returns: + Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) - Standalone usage: + def _smooth_labels(): + return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - >>> y_true = np.random.random(size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.logcosh(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> x = y_pred - y_true - >>> assert np.allclose( - ... loss.numpy(), - ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - tf.math.log(2.), axis=-1), - ... atol=1e-5) + y_true = tf.__internal__.smart_cond.smart_cond( + label_smoothing, _smooth_labels, lambda: y_true + ) - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + return backend.mean( + backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits), + axis=axis, + ) - Returns: - Logcosh error values. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - def _logcosh(x): - return x + tf.math.softplus(-2. * x) - tf.cast( - tf.math.log(2.), x.dtype) +@dispatch.dispatch_for_types(binary_crossentropy, tf.RaggedTensor) +def _ragged_tensor_binary_crossentropy( + y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 +): + """Implements support for handling RaggedTensors. - return backend.mean(_logcosh(y_pred - y_true), axis=-1) + Args: + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Axis along which to compute crossentropy. + Returns: + Binary crossentropy loss value. -@keras_export('keras.metrics.categorical_crossentropy', - 'keras.losses.categorical_crossentropy') -@tf.__internal__.dispatch.add_dispatch_support -def categorical_crossentropy(y_true, - y_pred, - from_logits=False, - label_smoothing=0., - axis=-1): - """Computes the categorical crossentropy loss. - - Standalone usage: - - >>> y_true = [[0, 1, 0], [0, 0, 1]] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss.numpy() - array([0.0513, 2.303], dtype=float32) - - Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Defaults to -1. The dimension along which the entropy is - computed. - - Returns: - Categorical crossentropy loss value. - """ - if isinstance(axis, bool): - raise ValueError( - f'`axis` must be of type `int`. Received: axis={axis} of type {type(axis)}' + Expected shape: (batch, sequence_len) with sequence_len being variable + per batch. + Return shape: (batch,); returns the per batch mean of the loss values. + + When used by BinaryCrossentropy() with the default reduction + (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over + the number of batches. + """ + fn = functools.partial( + binary_crossentropy, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, ) - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) - - def _smooth_labels(): - num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype) - return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) - - y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels, - lambda: y_true) - - return backend.categorical_crossentropy( - y_true, y_pred, from_logits=from_logits, axis=axis) - - -@dispatch.dispatch_for_types(categorical_crossentropy, - tf.RaggedTensor) -def _ragged_tensor_categorical_crossentropy(y_true, - y_pred, - from_logits=False, - label_smoothing=0., - axis=-1): - """Implements support for handling RaggedTensors. - - Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to -1. - - Returns: - Categorical crossentropy loss value. - - Expected shape: (batch, sequence_len, n_classes) with sequence_len - being variable per batch. - Return shape: (batch, sequence_len). - - When used by CategoricalCrossentropy() with the default reduction - (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the - number of elements independent of the batch. E.g. if the RaggedTensor - has 2 batches with [2, 1] values respectively the resulting loss is - the sum of the individual loss values divided by 3. - """ - fn = functools.partial( - categorical_crossentropy, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis) - return _ragged_tensor_apply_loss(fn, y_true, y_pred) - - -@keras_export('keras.metrics.sparse_categorical_crossentropy', - 'keras.losses.sparse_categorical_crossentropy') -@tf.__internal__.dispatch.add_dispatch_support -def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1): - """Computes the sparse categorical crossentropy loss. - - Standalone usage: - - >>> y_true = [1, 2] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss.numpy() - array([0.0513, 2.303], dtype=float32) - - Args: - y_true: Ground truth values. - y_pred: The predicted values. - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - axis: Defaults to -1. The dimension along which the entropy is - computed. - - Returns: - Sparse categorical crossentropy loss value. - """ - y_pred = tf.convert_to_tensor(y_pred) - - return backend.sparse_categorical_crossentropy( - y_true, y_pred, from_logits=from_logits, axis=axis) - - -@dispatch.dispatch_for_types(sparse_categorical_crossentropy, - tf.RaggedTensor) -def _ragged_tensor_sparse_categorical_crossentropy(y_true, - y_pred, - from_logits=False, - axis=-1): - """ Implements support for handling RaggedTensors. - - Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len - being variable per batch. - Return shape: (batch, sequence_len). - - When used by SparseCategoricalCrossentropy() with the default reduction - (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the - number of elements independent of the batch. E.g. if the RaggedTensor - has 2 batches with [2, 1] values respectively, the resulting loss is - the sum of the individual loss values divided by 3. - """ - fn = functools.partial( - sparse_categorical_crossentropy, from_logits=from_logits, axis=axis) - return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True) - - -@keras_export('keras.metrics.binary_crossentropy', - 'keras.losses.binary_crossentropy') -@tf.__internal__.dispatch.add_dispatch_support -def binary_crossentropy(y_true, - y_pred, - from_logits=False, - label_smoothing=0., - axis=-1): - """Computes the binary crossentropy loss. - - Standalone usage: - - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss.numpy() - array([0.916 , 0.714], dtype=float32) - - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by - squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing` - for the target class and `0.5 * label_smoothing` for the non-target class. - axis: The axis along which the mean is computed. Defaults to -1. - - Returns: - Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) - - def _smooth_labels(): - return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - - y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels, - lambda: y_true) - - return backend.mean( - backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits), - axis=axis) + return _ragged_tensor_apply_loss(fn, y_true, y_pred) -@dispatch.dispatch_for_types(binary_crossentropy, tf.RaggedTensor) -def _ragged_tensor_binary_crossentropy(y_true, - y_pred, - from_logits=False, - label_smoothing=0., - axis=-1): - """Implements support for handling RaggedTensors. - - Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Axis along which to compute crossentropy. - - Returns: - Binary crossentropy loss value. - - Expected shape: (batch, sequence_len) with sequence_len being variable - per batch. - Return shape: (batch,); returns the per batch mean of the loss values. - - When used by BinaryCrossentropy() with the default reduction - (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over - the number of batches. - """ - fn = functools.partial( - binary_crossentropy, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis) - return _ragged_tensor_apply_loss(fn, y_true, y_pred) - - -@keras_export('keras.metrics.binary_focal_crossentropy', - 'keras.losses.binary_focal_crossentropy') +@keras_export( + "keras.metrics.binary_focal_crossentropy", + "keras.losses.binary_focal_crossentropy", +) @tf.__internal__.dispatch.add_dispatch_support def binary_focal_crossentropy( y_true, y_pred, + apply_class_balancing=False, + alpha=0.25, gamma=2.0, from_logits=False, - label_smoothing=0., + label_smoothing=0.0, axis=-1, ): - """Computes the binary focal crossentropy loss. - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a focal factor to down-weight easy examples and focus more on - hard examples. By default, the focal tensor is computed as follows: - - `focal_factor = (1 - output)**gamma` for class 1 - `focal_factor = output**gamma` for class 0 - where `gamma` is a focusing parameter. When `gamma` = 0, this function is - equivalent to the binary crossentropy loss. - - Standalone usage: - - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> loss = tf.keras.losses.binary_focal_crossentropy(y_true, y_pred, gamma=2) - >>> assert loss.shape == (2,) - >>> loss.numpy() - array([0.330, 0.206], dtype=float32) - - Args: - y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. - y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. - gamma: A focusing parameter, default is `2.0` as mentioned in the reference. - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the labels - by squeezing them towards `0.5`, i.e., using `1. - 0.5 * label_smoothing` - for the target class and `0.5 * label_smoothing` for the non-target class. - axis: The axis along which the mean is computed. Defaults to `-1`. - - Returns: - Binary focal crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) - - def _smooth_labels(): - return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - - y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, - _smooth_labels, lambda: y_true) - - return backend.mean( - backend.binary_focal_crossentropy( - target=y_true, - output=y_pred, - gamma=gamma, - from_logits=from_logits, - ), - axis=axis, - ) + """Computes the binary focal crossentropy loss. + + According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + helps to apply a focal factor to down-weight easy examples and focus more on + hard examples. By default, the focal tensor is computed as follows: + + `focal_factor = (1 - output)**gamma` for class 1 + `focal_factor = output**gamma` for class 0 + where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal + effect on the binary crossentropy loss. + + If `apply_class_balancing == True`, this function also takes into account a + weight balancing factor for the binary classes 0 and 1 as follows: + + `weight = alpha` for class 1 (`target == 1`) + `weight = 1 - alpha` for class 0 + where `alpha` is a float in the range of `[0, 1]`. + + Standalone usage: + + >>> y_true = [[0, 1], [0, 0]] + >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] + >>> loss = tf.keras.losses.binary_focal_crossentropy(y_true, y_pred, + ... gamma=2) + >>> assert loss.shape == (2,) + >>> loss.numpy() + array([0.330, 0.206], dtype=float32) + + Args: + y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. + y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in the reference. The weight for class 0 is `1.0 - alpha`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the + labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 * + label_smoothing` for the target class and `0.5 * label_smoothing` + for the non-target class. + axis: The axis along which the mean is computed. Defaults to `-1`. + + Returns: + Binary focal crossentropy loss value. + shape = `[batch_size, d0, .. dN-1]`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) + + def _smooth_labels(): + return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing + + y_true = tf.__internal__.smart_cond.smart_cond( + label_smoothing, _smooth_labels, lambda: y_true + ) + + return backend.mean( + backend.binary_focal_crossentropy( + target=y_true, + output=y_pred, + apply_class_balancing=apply_class_balancing, + alpha=alpha, + gamma=gamma, + from_logits=from_logits, + ), + axis=axis, + ) @dispatch.dispatch_for_types(binary_focal_crossentropy, tf.RaggedTensor) def _ragged_tensor_binary_focal_crossentropy( y_true, y_pred, + apply_class_balancing=False, + alpha=0.25, gamma=2.0, from_logits=False, - label_smoothing=0., + label_smoothing=0.0, axis=-1, ): - """Implements support for handling RaggedTensors. - - Expected shape: `(batch, sequence_len)` with sequence_len being variable per - batch. - Return shape: `(batch,)`; returns the per batch mean of the loss values. - - When used by BinaryFocalCrossentropy() with the default reduction - (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over - the number of batches. - - Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - gamma: A focusing parameter, default is `2.0` as mentioned in the reference - [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). - from_logits: Whether `y_pred` is expected to be a logits tensor. By default, - we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Axis along which to compute crossentropy. - - Returns: - Binary focal crossentropy loss value. - """ - fn = functools.partial( - binary_focal_crossentropy, - gamma=gamma, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis, - ) - return _ragged_tensor_apply_loss(fn, y_true, y_pred) - - -@keras_export('keras.metrics.kl_divergence', - 'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld', - 'keras.metrics.KLD', 'keras.losses.kl_divergence', - 'keras.losses.kullback_leibler_divergence', 'keras.losses.kld', - 'keras.losses.KLD') + """Implements support for handling RaggedTensors. + + Expected shape: `(batch, sequence_len)` with sequence_len being variable per + batch. + Return shape: `(batch,)`; returns the per batch mean of the loss values. + + When used by BinaryFocalCrossentropy() with the default reduction + (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over + the number of batches. + + Args: + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in the reference [Lin et al., 2018]( + https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is + `1.0 - alpha`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Axis along which to compute crossentropy. + + Returns: + Binary focal crossentropy loss value. + """ + fn = functools.partial( + binary_focal_crossentropy, + apply_class_balancing=apply_class_balancing, + alpha=alpha, + gamma=gamma, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + return _ragged_tensor_apply_loss(fn, y_true, y_pred) + + +@keras_export( + "keras.metrics.kl_divergence", + "keras.metrics.kullback_leibler_divergence", + "keras.metrics.kld", + "keras.metrics.KLD", + "keras.losses.kl_divergence", + "keras.losses.kullback_leibler_divergence", + "keras.losses.kld", + "keras.losses.KLD", +) @tf.__internal__.dispatch.add_dispatch_support def kl_divergence(y_true, y_pred): - """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`. + """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. - `loss = y_true * log(y_true / y_pred)` + `loss = y_true * log(y_true / y_pred)` - See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence + See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence - Standalone usage: + Standalone usage: - >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1) - >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1) - >>> assert np.array_equal( - ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1)) + >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1) + >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1) + >>> assert np.array_equal( + ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1)) - Args: - y_true: Tensor of true targets. - y_pred: Tensor of predicted targets. + Args: + y_true: Tensor of true targets. + y_pred: Tensor of predicted targets. - Returns: - A `Tensor` with loss. + Returns: + A `Tensor` with loss. - Raises: - TypeError: If `y_true` cannot be cast to the `y_pred.dtype`. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - y_true = backend.clip(y_true, backend.epsilon(), 1) - y_pred = backend.clip(y_pred, backend.epsilon(), 1) - return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1) + Raises: + TypeError: If `y_true` cannot be cast to the `y_pred.dtype`. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + y_true = backend.clip(y_true, backend.epsilon(), 1) + y_pred = backend.clip(y_pred, backend.epsilon(), 1) + return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1) -@keras_export('keras.metrics.poisson', 'keras.losses.poisson') +@keras_export("keras.metrics.poisson", "keras.losses.poisson") @tf.__internal__.dispatch.add_dispatch_support def poisson(y_true, y_pred): - """Computes the Poisson loss between y_true and y_pred. + """Computes the Poisson loss between y_true and y_pred. - The Poisson loss is the mean of the elements of the `Tensor` - `y_pred - y_true * log(y_pred)`. + The Poisson loss is the mean of the elements of the `Tensor` + `y_pred - y_true * log(y_pred)`. - Standalone usage: + Standalone usage: - >>> y_true = np.random.randint(0, 2, size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = tf.keras.losses.poisson(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> y_pred = y_pred + 1e-7 - >>> assert np.allclose( - ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1), - ... atol=1e-5) + >>> y_true = np.random.randint(0, 2, size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = tf.keras.losses.poisson(y_true, y_pred) + >>> assert loss.shape == (2,) + >>> y_pred = y_pred + 1e-7 + >>> assert np.allclose( + ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1), + ... atol=1e-5) - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - Returns: - Poisson loss value. shape = `[batch_size, d0, .. dN-1]`. + Returns: + Poisson loss value. shape = `[batch_size, d0, .. dN-1]`. - Raises: - InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes. - """ - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.cast(y_true, y_pred.dtype) - return backend.mean( - y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1) + Raises: + InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes. + """ + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.cast(y_true, y_pred.dtype) + return backend.mean( + y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1 + ) @keras_export( - 'keras.losses.cosine_similarity', + "keras.losses.cosine_similarity", v1=[ - 'keras.metrics.cosine_proximity', - 'keras.metrics.cosine', - 'keras.losses.cosine_proximity', - 'keras.losses.cosine', - 'keras.losses.cosine_similarity', - ]) + "keras.metrics.cosine_proximity", + "keras.metrics.cosine", + "keras.losses.cosine_proximity", + "keras.losses.cosine", + "keras.losses.cosine_similarity", + ], +) @tf.__internal__.dispatch.add_dispatch_support def cosine_similarity(y_true, y_pred, axis=-1): - """Computes the cosine similarity between labels and predictions. + """Computes the cosine similarity between labels and predictions. - Note that it is a number between -1 and 1. When it is a negative number - between -1 and 0, 0 indicates orthogonality and values closer to -1 - indicate greater similarity. The values closer to 1 indicate greater - dissimilarity. This makes it usable as a loss function in a setting - where you try to maximize the proximity between predictions and - targets. If either `y_true` or `y_pred` is a zero vector, cosine - similarity will be 0 regardless of the proximity between predictions - and targets. + Note that it is a number between -1 and 1. When it is a negative number + between -1 and 0, 0 indicates orthogonality and values closer to -1 + indicate greater similarity. The values closer to 1 indicate greater + dissimilarity. This makes it usable as a loss function in a setting + where you try to maximize the proximity between predictions and + targets. If either `y_true` or `y_pred` is a zero vector, cosine + similarity will be 0 regardless of the proximity between predictions + and targets. - `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` + `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` - Standalone usage: + Standalone usage: - >>> y_true = [[0., 1.], [1., 1.], [1., 1.]] - >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]] - >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1) - >>> loss.numpy() - array([-0., -0.999, 0.999], dtype=float32) - - Args: - y_true: Tensor of true targets. - y_pred: Tensor of predicted targets. - axis: Axis along which to determine similarity. - - Returns: - Cosine similarity tensor. - """ - y_true = tf.linalg.l2_normalize(y_true, axis=axis) - y_pred = tf.linalg.l2_normalize(y_pred, axis=axis) - return -tf.reduce_sum(y_true * y_pred, axis=axis) + >>> y_true = [[0., 1.], [1., 1.], [1., 1.]] + >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]] + >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1) + >>> loss.numpy() + array([-0., -0.999, 0.999], dtype=float32) + Args: + y_true: Tensor of true targets. + y_pred: Tensor of predicted targets. + axis: Axis along which to determine similarity. -@keras_export('keras.losses.CosineSimilarity') -class CosineSimilarity(LossFunctionWrapper): - """Computes the cosine similarity between labels and predictions. - - Note that it is a number between -1 and 1. When it is a negative number - between -1 and 0, 0 indicates orthogonality and values closer to -1 - indicate greater similarity. The values closer to 1 indicate greater - dissimilarity. This makes it usable as a loss function in a setting - where you try to maximize the proximity between predictions and targets. - If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0 - regardless of the proximity between predictions and targets. - - `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` - - Standalone usage: - - >>> y_true = [[0., 1.], [1., 1.]] - >>> y_pred = [[1., 0.], [1., 1.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1) - >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] - >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] - >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] - >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) - >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2 - >>> cosine_loss(y_true, y_pred).numpy() - -0.5 - - >>> # Calling with 'sample_weight'. - >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - -0.0999 - - >>> # Using 'sum' reduction type. - >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, - ... reduction=tf.keras.losses.Reduction.SUM) - >>> cosine_loss(y_true, y_pred).numpy() - -0.999 - - >>> # Using 'none' reduction type. - >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, - ... reduction=tf.keras.losses.Reduction.NONE) - >>> cosine_loss(y_true, y_pred).numpy() - array([-0., -0.999], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', loss=tf.keras.losses.CosineSimilarity(axis=1)) - ``` - - Args: - axis: The axis along which the cosine similarity is computed - (the features axis). Defaults to -1. - reduction: Type of `tf.keras.losses.Reduction` to apply to loss. - Default value is `AUTO`. `AUTO` indicates that the reduction option will - be determined by the usage context. For almost all cases this defaults to - `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of - built-in training loops such as `tf.keras` `compile` and `fit`, using - `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this - custom training [tutorial] - (https://www.tensorflow.org/tutorials/distribute/custom_training) for more - details. - name: Optional name for the instance. - """ - - def __init__(self, - axis=-1, - reduction=losses_utils.ReductionV2.AUTO, - name='cosine_similarity'): - super().__init__( - cosine_similarity, reduction=reduction, name=name, axis=axis) + Returns: + Cosine similarity tensor. + """ + y_true = tf.linalg.l2_normalize(y_true, axis=axis) + y_pred = tf.linalg.l2_normalize(y_pred, axis=axis) + return -tf.reduce_sum(y_true * y_pred, axis=axis) # Aliases. @@ -2311,95 +2853,126 @@ def __init__(self, def is_categorical_crossentropy(loss): - result = ((isinstance(loss, CategoricalCrossentropy) or - (isinstance(loss, LossFunctionWrapper) and - loss.fn == categorical_crossentropy) or - (hasattr(loss, '__name__') and - loss.__name__ == 'categorical_crossentropy') or - (loss == 'categorical_crossentropy'))) - return result + result = ( + isinstance(loss, CategoricalCrossentropy) + or ( + isinstance(loss, LossFunctionWrapper) + and loss.fn == categorical_crossentropy + ) + or ( + hasattr(loss, "__name__") + and loss.__name__ == "categorical_crossentropy" + ) + or (loss == "categorical_crossentropy") + ) + return result -@keras_export('keras.losses.serialize') -def serialize(loss): - """Serializes loss function or `Loss` instance. +@keras_export("keras.losses.serialize") +def serialize(loss, use_legacy_format=False): + """Serializes loss function or `Loss` instance. - Args: - loss: A Keras `Loss` instance or a loss function. + Args: + loss: A Keras `Loss` instance or a loss function. + use_legacy_format: Boolean, whether to use the legacy serialization + format. Defaults to `False`. - Returns: - Loss configuration dictionary. - """ - return serialize_keras_object(loss) + Returns: + Loss configuration dictionary. + """ + if loss is None: + return None + if not isinstance(loss, Loss): + warnings.warn( + "The `keras.losses.serialize()` API should only be used for " + "objects of type `keras.losses.Loss`. Found an instance of type " + f"{type(loss)}, which may lead to improper serialization." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(loss) + return serialize_keras_object(loss) + + +@keras_export("keras.losses.deserialize") +def deserialize(name, custom_objects=None, use_legacy_format=False): + """Deserializes a serialized loss class/function instance. + Args: + name: Loss configuration. + custom_objects: Optional dictionary mapping names (strings) to custom + objects (classes and functions) to be considered during + deserialization. + use_legacy_format: Boolean, whether to use the legacy serialization + format. Defaults to `False`. -@keras_export('keras.losses.deserialize') -def deserialize(name, custom_objects=None): - """Deserializes a serialized loss class/function instance. + Returns: + A Keras `Loss` instance or a loss function. + """ + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + name, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="loss function", + ) + return deserialize_keras_object( + name, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="loss function", + ) - Args: - name: Loss configuration. - custom_objects: Optional dictionary mapping names (strings) to custom - objects (classes and functions) to be considered during deserialization. - Returns: - A Keras `Loss` instance or a loss function. - """ - return deserialize_keras_object( - name, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='loss function') +@keras_export("keras.losses.get") +def get(identifier): + """Retrieves a Keras loss as a `function`/`Loss` class instance. + The `identifier` may be the string name of a loss function or `Loss` class. -@keras_export('keras.losses.get') -def get(identifier): - """Retrieves a Keras loss as a `function`/`Loss` class instance. - - The `identifier` may be the string name of a loss function or `Loss` class. - - >>> loss = tf.keras.losses.get("categorical_crossentropy") - >>> type(loss) - - >>> loss = tf.keras.losses.get("CategoricalCrossentropy") - >>> type(loss) - - - You can also specify `config` of the loss to this function by passing dict - containing `class_name` and `config` as an identifier. Also note that the - `class_name` must map to a `Loss` class - - >>> identifier = {"class_name": "CategoricalCrossentropy", - ... "config": {"from_logits": True}} - >>> loss = tf.keras.losses.get(identifier) - >>> type(loss) - - - Args: - identifier: A loss identifier. One of None or string name of a loss - function/class or loss configuration dictionary or a loss function or a - loss class instance. - - Returns: - A Keras loss as a `function`/ `Loss` class instance. - - Raises: - ValueError: If `identifier` cannot be interpreted. - """ - if identifier is None: - return None - if isinstance(identifier, str): - identifier = str(identifier) - return deserialize(identifier) - if isinstance(identifier, dict): - return deserialize(identifier) - if callable(identifier): - return identifier - raise ValueError( - f'Could not interpret loss function identifier: {identifier}') + >>> loss = tf.keras.losses.get("categorical_crossentropy") + >>> type(loss) + + >>> loss = tf.keras.losses.get("CategoricalCrossentropy") + >>> type(loss) + + + You can also specify `config` of the loss to this function by passing dict + containing `class_name` and `config` as an identifier. Also note that the + `class_name` must map to a `Loss` class + + >>> identifier = {"class_name": "CategoricalCrossentropy", + ... "config": {"from_logits": True}} + >>> loss = tf.keras.losses.get(identifier) + >>> type(loss) + + + Args: + identifier: A loss identifier. One of None or string name of a loss + function/class or loss configuration dictionary or a loss function + or a loss class instance. + + Returns: + A Keras loss as a `function`/ `Loss` class instance. + + Raises: + ValueError: If `identifier` cannot be interpreted. + """ + if identifier is None: + return None + if isinstance(identifier, str): + identifier = str(identifier) + use_legacy_format = "module" not in identifier + return deserialize(identifier, use_legacy_format=use_legacy_format) + if isinstance(identifier, dict): + return deserialize(identifier) + if callable(identifier): + return identifier + raise ValueError( + f"Could not interpret loss function identifier: {identifier}" + ) LABEL_DTYPES_FOR_LOSSES = { - tf.compat.v1.losses.sparse_softmax_cross_entropy: 'int32', - sparse_categorical_crossentropy: 'int32' + tf.compat.v1.losses.sparse_softmax_cross_entropy: "int32", + sparse_categorical_crossentropy: "int32", } diff --git a/keras/losses_test.py b/keras/losses_test.py index 382c9b132a3c..ba4203483c96 100644 --- a/keras/losses_test.py +++ b/keras/losses_test.py @@ -14,2068 +14,2988 @@ # ============================================================================== """Tests for Keras loss functions.""" -import tensorflow.compat.v2 as tf +import warnings -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized -from tensorflow.python.autograph.impl import api as autograph from keras import activations from keras import backend -from keras.testing_infra import test_combinations from keras import losses +from keras.testing_infra import test_combinations from keras.utils import losses_utils +# isort: off +from tensorflow.python.autograph.impl import ( + api as autograph, +) + ALL_LOSSES = [ - losses.mean_squared_error, losses.mean_absolute_error, + losses.mean_squared_error, + losses.mean_absolute_error, losses.mean_absolute_percentage_error, - losses.mean_squared_logarithmic_error, losses.squared_hinge, losses.hinge, - losses.categorical_crossentropy, losses.binary_crossentropy, - losses.kl_divergence, losses.poisson, losses.cosine_similarity, - losses.log_cosh, losses.categorical_hinge + losses.mean_squared_logarithmic_error, + losses.squared_hinge, + losses.hinge, + losses.categorical_crossentropy, + losses.binary_crossentropy, + losses.kl_divergence, + losses.poisson, + losses.cosine_similarity, + losses.log_cosh, + losses.categorical_hinge, ] class KerasLossesTest(tf.test.TestCase, parameterized.TestCase): - - def test_objective_shapes_3d(self): - with self.cached_session(): - y_a = backend.variable(np.random.random((5, 6, 7))) - y_b = backend.variable(np.random.random((5, 6, 7))) - for obj in ALL_LOSSES: - objective_output = obj(y_a, y_b) - self.assertListEqual(objective_output.shape.as_list(), [5, 6]) - - def test_objective_shapes_2d(self): - with self.cached_session(): - y_a = backend.variable(np.random.random((6, 7))) - y_b = backend.variable(np.random.random((6, 7))) - for obj in ALL_LOSSES: - objective_output = obj(y_a, y_b) - self.assertListEqual(objective_output.shape.as_list(), [ - 6, - ]) - - def test_cce_one_hot(self): - with self.cached_session(): - y_a = backend.variable(np.random.randint(0, 7, (5, 6))) - y_b = backend.variable(np.random.random((5, 6, 7))) - objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) - assert backend.eval(objective_output).shape == (5, 6) - - y_a = backend.variable(np.random.randint(0, 7, (6,))) - y_b = backend.variable(np.random.random((6, 7))) - objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) - assert backend.eval(objective_output).shape == (6,) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_loss(self): - target = backend.variable(np.random.randint(0, 1, (5, 1))) - logits = backend.variable(np.random.random((5, 1))) - softmax_output = backend.softmax(logits) - output_from_logit = losses.categorical_crossentropy( - target, logits, from_logits=True) - output_from_softmax = losses.categorical_crossentropy( - target, softmax_output) - np.testing.assert_allclose( - backend.eval(output_from_logit), - backend.eval(output_from_softmax), - atol=1e-5) - - axis = 0 - output_from_logit_axis = losses.categorical_crossentropy( - target, logits, from_logits=True, axis=axis) - output_from_softmax_axis = losses.categorical_crossentropy( - target, softmax_output, axis=axis) - - np.testing.assert_allclose( - backend.eval(output_from_logit_axis), - backend.eval(output_from_softmax_axis), - atol=1e-5) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_loss_with_unknown_rank_tensor(self): - t = backend.placeholder() - p = backend.placeholder() - o = losses.categorical_crossentropy(t, p) - - t_val = tf.convert_to_tensor([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]) - p_val = tf.convert_to_tensor([[.9, .05, .05], [.05, .89, .06], - [.05, .01, .94]]) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.105, .116, .062], 1e-3) - - # from logits - p_val = tf.convert_to_tensor([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - o = losses.categorical_crossentropy(t, p, from_logits=True) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.002, 0, .17], 1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sparse_categorical_crossentropy_loss(self): - target = backend.variable(np.random.randint(0, 1, (5, 1))) - logits = backend.variable(np.random.random((5, 1))) - softmax_output = backend.softmax(logits) - output_from_logit = losses.sparse_categorical_crossentropy( - target, logits, from_logits=True) - output_from_softmax = losses.sparse_categorical_crossentropy( - target, softmax_output) - np.testing.assert_allclose( - backend.eval(output_from_logit), - backend.eval(output_from_softmax), - atol=1e-5) - - @test_combinations.generate(test_combinations.combine(mode=['graph'])) - def test_sparse_categorical_crossentropy_loss_with_unknown_rank_tensor(self): - # This test only runs in graph because the TF op layer is not supported yet - # for sparse ops. - t = backend.placeholder() - p = backend.placeholder() - o = losses.sparse_categorical_crossentropy(t, p) - - t_val = tf.convert_to_tensor([0, 1, 2]) - p_val = tf.convert_to_tensor([[.9, .05, .05], [.05, .89, .06], - [.05, .01, .94]]) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.105, .116, .062], 1e-3) - - # from logits - p_val = tf.convert_to_tensor([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - o = losses.sparse_categorical_crossentropy(t, p, from_logits=True) - f = backend.function([t, p], o) - - result = f([t_val, p_val]) - self.assertArrayNear(result, [.002, 0, .17], 1e-3) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_sparse_categorical_crossentropy_with_float16(self): - # See https://github.com/keras-team/keras/issues/15012 for more details. - # we don't cast y_true to have same dtype as y_pred, since y_pred could be - # float16 which has a small upbound, and the casting could cause an - # underflow. The y_true will be used as int64 anyway. - - # create 2 observations with 2049 labels, since 2048 is the largest number - # for float16 - y_true = [0, 2049] - # should result in a loss close to 0 since predicting y_true perfectly - y_pred = np.zeros((2, 2050)) - y_pred[0][0] = 1 - y_pred[1][2049] = 1 - y_pred_16 = tf.convert_to_tensor(y_pred, dtype=tf.float16) - - # If we did a cast for y_true to float16 in SparseCategoricalCrossentropy, - # then the loss will not be zero. - scce = losses.SparseCategoricalCrossentropy() - self.assertAllClose(scce(y_true, y_pred_16).numpy(), 0.0, atol=1e-3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_crossentropy_loss(self): - target = backend.variable(np.random.randint(0, 1, (5, 1))) - logits = backend.variable(np.random.random((5, 1))) - sigmoid_output = backend.sigmoid(logits) - output_from_logit = losses.binary_crossentropy( - target, logits, from_logits=True) - output_from_sigmoid = losses.binary_crossentropy(target, sigmoid_output) - np.testing.assert_allclose( - backend.eval(output_from_logit), - backend.eval(output_from_sigmoid), - atol=1e-5) - - axis = 0 - output_from_logit_axis = losses.binary_crossentropy( - target, logits, from_logits=True, axis=axis) - output_from_sigmoid_axis = losses.binary_crossentropy( - target, sigmoid_output, axis=axis) - - np.testing.assert_allclose( - backend.eval(output_from_logit_axis), - backend.eval(output_from_sigmoid_axis), - atol=1e-5) - - def test_get_bce(self): - bce_fn = losses.get('bce') - self.assertEqual(bce_fn, losses.binary_crossentropy) - - def test_serialization(self): - fn = losses.get('mse') - config = losses.serialize(fn) - new_fn = losses.deserialize(config) - self.assertEqual(fn, new_fn) - - def test_categorical_hinge(self): - y_pred = backend.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) - y_true = backend.variable(np.array([[0, 1, 0], [1, 0, 0]])) - expected_loss = ((0.3 - 0.2 + 1) + (0.7 - 0.1 + 1)) / 2.0 - loss = backend.eval(losses.categorical_hinge(y_true, y_pred)) - self.assertAllClose(expected_loss, np.mean(loss)) - - def test_loss_wrapper(self): - loss_fn = losses.get('mse') - mse_obj = losses.LossFunctionWrapper(loss_fn, name=loss_fn.__name__) - - self.assertEqual(mse_obj.name, 'mean_squared_error') - self.assertEqual(mse_obj.reduction, losses_utils.ReductionV2.AUTO) - - y_true = tf.constant([[1., 9.], [2., 5.]]) - y_pred = tf.constant([[4., 8.], [12., 3.]]) - sample_weight = tf.constant([1.2, 0.5]) - loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) - - # mse = [((4 - 1)^2 + (8 - 9)^2) / 2, ((12 - 2)^2 + (3 - 5)^2) / 2] - # mse = [5, 52] - # weighted_mse = [5 * 1.2, 52 * 0.5] = [6, 26] - # reduced_weighted_mse = (6 + 26) / 2 = - self.assertAllClose(self.evaluate(loss), 16, 1e-2) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_loss_wrapper_autograph(self): - # Test that functions with control flow wrapped in a LossFunctionWrapper - # get autographed when in a tf.function - def loss_fn(y_true, y_pred): - mse_loss_fn = losses.get('mse') - if tf.reduce_mean(y_true) > 0: - return mse_loss_fn(y_true, y_pred) - else: - return mse_loss_fn(y_true, y_pred) - - mse_obj = losses.LossFunctionWrapper(loss_fn) - - y_true = tf.constant([[1., 9.], [2., 5.]]) - y_pred = tf.constant([[4., 8.], [12., 3.]]) - sample_weight = tf.constant([1.2, 0.5]) - - @tf.function - def tf_functioned_loss_fn(y_true, y_pred, sample_weight=None): - return mse_obj(y_true, y_pred, sample_weight=sample_weight) - - loss = tf_functioned_loss_fn(y_true, y_pred, sample_weight=sample_weight) - - # mse = [((4 - 1)^2 + (8 - 9)^2) / 2, ((12 - 2)^2 + (3 - 5)^2) / 2] - # mse = [5, 52] - # weighted_mse = [5 * 1.2, 52 * 0.5] = [6, 26] - # reduced_weighted_mse = (6 + 26) / 2 = - self.assertAllClose(self.evaluate(loss), 16, 1e-2) - - def test_loss_wrapper_dtype(self): - # Make sure the loss wrapper doesn't cause any numerical precision loss - # during calculation. See https://github.com/keras-team/keras/issues/15791 - x = tf.convert_to_tensor([[2.1]], dtype=tf.float64) - y_true = tf.square(x) - y_pred = tf.convert_to_tensor([[3.68]], dtype=tf.float64) - - # TF loss - loss = losses.MeanSquaredError() - tf_loss = loss(y_pred, y_true) - - # manually computed loss in 64-bit - man_loss64 = tf.squeeze(tf.square(y_pred - y_true)) - - self.assertEqual(tf_loss.dtype, tf.float64) - # Make a smaller atol to ensure the float64 precision is hold. - self.assertAllClose(self.evaluate(tf_loss), self.evaluate(man_loss64), - atol=1e-8) - - def test_invalid_reduction(self): - with self.assertRaisesRegex(ValueError, 'Invalid Reduction Key: Foo.'): - losses.MeanSquaredError(reduction='Foo') - - mse_obj = losses.MeanSquaredError() - y = tf.constant([1]) - mse_obj.reduction = 'Bar' - with self.assertRaisesRegex(ValueError, 'Invalid Reduction Key: Bar.'): - mse_obj(y, y) - - def test_deserialization_error(self): - with self.assertRaisesRegex(ValueError, 'Could not interpret loss'): - losses.get(0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_binary_crossentropy_uses_cached_logits(self): - logits = tf.constant([[-30., 30.]]) - y_pred = activations.sigmoid(logits) - self.assertTrue(hasattr(y_pred, '_keras_logits')) - y_true = tf.constant([[0., 1.]]) - loss = losses.binary_crossentropy(y_true, y_pred)[0] - # Check that logits are used. If y_pred is used directly, loss will - # collapse to 0 from underflow. - self.assertNotEqual(self.evaluate(loss), 0.) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_categorical_crossentropy_uses_cached_logits(self): - logits = tf.constant([[-5., 0., 5.]]) - y_pred = activations.softmax(logits) - self.assertTrue(hasattr(y_pred, '_keras_logits')) - y_true = tf.constant([[0., 0., 1.]]) - loss = losses.categorical_crossentropy(y_true, logits, from_logits=True)[0] - # Check that logits are used. If y_pred is used directly, loss will - # collapse to 0 from underflow. - self.assertNotEqual(self.evaluate(loss), 0.) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sparse_categorical_crossentropy_uses_cached_logits(self): - logits = tf.constant([[-5., 0., 5.]]) - y_pred = activations.softmax(logits) - self.assertTrue(hasattr(y_pred, '_keras_logits')) - y_true = tf.constant([2]) - loss = losses.sparse_categorical_crossentropy( - y_true, logits, from_logits=True)[0] - # Check that logits are used. If y_pred is used directly, loss will - # collapse to 0 from underflow. - self.assertNotEqual(self.evaluate(loss), 0.) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_loss_not_autographed_in_eager(self): - - class MyLoss(losses.Loss): - - def call(self, y_true, y_pred): - return y_true - y_pred - - loss = MyLoss() - y_true = tf.constant([[0., 0., 0.]]) - y_pred = tf.constant([[1., 1., 1.]]) - - def tf_convert(fn, _): - assert False, 'Function should not be autographed.' - return fn - - with tf.compat.v1.test.mock.patch.object(autograph, 'tf_convert', - tf_convert): - loss(y_true, y_pred) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_objective_shapes_3d(self): + with self.cached_session(): + y_a = backend.variable(np.random.random((5, 6, 7))) + y_b = backend.variable(np.random.random((5, 6, 7))) + for obj in ALL_LOSSES: + objective_output = obj(y_a, y_b) + self.assertListEqual(objective_output.shape.as_list(), [5, 6]) + + def test_objective_shapes_2d(self): + with self.cached_session(): + y_a = backend.variable(np.random.random((6, 7))) + y_b = backend.variable(np.random.random((6, 7))) + for obj in ALL_LOSSES: + objective_output = obj(y_a, y_b) + self.assertListEqual( + objective_output.shape.as_list(), + [ + 6, + ], + ) + + def test_cce_one_hot(self): + with self.cached_session(): + y_a = backend.variable(np.random.randint(0, 7, (5, 6))) + y_b = backend.variable(np.random.random((5, 6, 7))) + objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) + assert backend.eval(objective_output).shape == (5, 6) + + y_a = backend.variable(np.random.randint(0, 7, (6,))) + y_b = backend.variable(np.random.random((6, 7))) + objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) + assert backend.eval(objective_output).shape == (6,) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_loss(self): + target = backend.variable(np.random.randint(0, 1, (5, 1))) + logits = backend.variable(np.random.random((5, 1))) + softmax_output = backend.softmax(logits) + output_from_logit = losses.categorical_crossentropy( + target, logits, from_logits=True + ) + output_from_softmax = losses.categorical_crossentropy( + target, softmax_output + ) + np.testing.assert_allclose( + backend.eval(output_from_logit), + backend.eval(output_from_softmax), + atol=1e-5, + ) + + axis = 0 + output_from_logit_axis = losses.categorical_crossentropy( + target, logits, from_logits=True, axis=axis + ) + output_from_softmax_axis = losses.categorical_crossentropy( + target, softmax_output, axis=axis + ) + + np.testing.assert_allclose( + backend.eval(output_from_logit_axis), + backend.eval(output_from_softmax_axis), + atol=1e-5, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_loss_with_unknown_rank_tensor(self): + t = backend.placeholder() + p = backend.placeholder() + o = losses.categorical_crossentropy(t, p) + + t_val = tf.convert_to_tensor( + [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]] + ) + p_val = tf.convert_to_tensor( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06], [0.05, 0.01, 0.94]] + ) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.105, 0.116, 0.062], 1e-3) + + # from logits + p_val = tf.convert_to_tensor( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + o = losses.categorical_crossentropy(t, p, from_logits=True) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.002, 0, 0.17], 1e-3) + + def test_categorial_crossentropy_loss_different_axis(self): + target = backend.variable(np.random.randint(0, 1, (5, 2, 3))) + logits = backend.variable(np.random.random((5, 2, 3))) + softmax_output = backend.softmax(logits) + axis = 1 + output_from_logit_axis = losses.categorical_crossentropy( + target, logits, from_logits=True, axis=axis + ) + output_from_softmax_axis = losses.categorical_crossentropy( + target, softmax_output, axis=axis + ) + np.testing.assert_allclose( + backend.eval(output_from_logit_axis), + backend.eval(output_from_softmax_axis), + atol=1e-5, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_loss(self): + target = backend.variable(np.random.randint(0, 1, (5, 1))) + logits = backend.variable(np.random.random((5, 1))) + softmax_output = backend.softmax(logits) + output_from_logit = losses.sparse_categorical_crossentropy( + target, logits, from_logits=True + ) + output_from_softmax = losses.sparse_categorical_crossentropy( + target, softmax_output + ) + np.testing.assert_allclose( + backend.eval(output_from_logit), + backend.eval(output_from_softmax), + atol=1e-5, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_loss_with_ignore_class(self): + ignore_class = 255 + target = backend.variable(np.random.randint(0, 1, (5, 1))) + logits = backend.variable(np.random.random((5, 1))) + softmax_output = backend.softmax(logits) + + _valid = tf.constant([[0], [1], [0], [1], [1]], target.dtype) + target.assign(target * _valid + (1 - _valid) * ignore_class) + + output_from_logit = losses.sparse_categorical_crossentropy( + target, logits, ignore_class=ignore_class, from_logits=True + ) + output_from_softmax = losses.sparse_categorical_crossentropy( + target, softmax_output, ignore_class=ignore_class + ) + + # expected_mask = [False, True, False, True, True] + # for o in (output_from_logit, output_from_softmax): + # mask = backend.eval(losses_utils.get_mask(o)) + # np.testing.assert_array_equal(mask, expected_mask) + + np.testing.assert_allclose( + backend.eval(output_from_logit), + backend.eval(output_from_softmax), + atol=1e-5, + ) + + @test_combinations.generate(test_combinations.combine(mode=["graph"])) + def test_sparse_categorical_crossentropy_loss_with_unknown_rank_tensor( + self, + ): + # This test only runs in graph because the TF op layer is not supported + # yet for sparse ops. + t = backend.placeholder() + p = backend.placeholder() + o = losses.sparse_categorical_crossentropy(t, p) + + t_val = tf.convert_to_tensor([0, 1, 2]) + p_val = tf.convert_to_tensor( + [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06], [0.05, 0.01, 0.94]] + ) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.105, 0.116, 0.062], 1e-3) + + # from logits + p_val = tf.convert_to_tensor( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + o = losses.sparse_categorical_crossentropy(t, p, from_logits=True) + f = backend.function([t, p], o) + + result = f([t_val, p_val]) + self.assertArrayNear(result, [0.002, 0, 0.17], 1e-3) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_sparse_categorical_crossentropy_with_float16(self): + # See https://github.com/keras-team/keras/issues/15012 for more details. + # we don't cast y_true to have same dtype as y_pred, since y_pred could + # be float16 which has a small upbound, and the casting could cause an + # underflow. The y_true will be used as int64 anyway. + + # create 2 observations with 2049 labels, since 2048 is the largest + # number for float16 + y_true = [0, 2049] + # should result in a loss close to 0 since predicting y_true perfectly + y_pred = np.zeros((2, 2050)) + y_pred[0][0] = 1 + y_pred[1][2049] = 1 + y_pred_16 = tf.convert_to_tensor(y_pred, dtype=tf.float16) + + # If we did a cast for y_true to float16 in + # SparseCategoricalCrossentropy, then the loss will not be zero. + scce = losses.SparseCategoricalCrossentropy() + self.assertAllClose(scce(y_true, y_pred_16).numpy(), 0.0, atol=1e-3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_crossentropy_loss(self): + target = backend.variable(np.random.randint(0, 1, (5, 1))) + logits = backend.variable(np.random.random((5, 1))) + sigmoid_output = backend.sigmoid(logits) + output_from_logit = losses.binary_crossentropy( + target, logits, from_logits=True + ) + output_from_sigmoid = losses.binary_crossentropy(target, sigmoid_output) + np.testing.assert_allclose( + backend.eval(output_from_logit), + backend.eval(output_from_sigmoid), + atol=1e-5, + ) + + axis = 0 + output_from_logit_axis = losses.binary_crossentropy( + target, logits, from_logits=True, axis=axis + ) + output_from_sigmoid_axis = losses.binary_crossentropy( + target, sigmoid_output, axis=axis + ) + + np.testing.assert_allclose( + backend.eval(output_from_logit_axis), + backend.eval(output_from_sigmoid_axis), + atol=1e-5, + ) + + def test_get_bce(self): + bce_fn = losses.get("bce") + self.assertEqual(bce_fn, losses.binary_crossentropy) + + def test_serialization(self): + fn = losses.get("mse") + config = losses.serialize(fn) + new_fn = losses.deserialize(config) + self.assertEqual(fn, new_fn) + + def test_categorical_hinge(self): + y_pred = backend.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) + y_true = backend.variable(np.array([[0, 1, 0], [1, 0, 0]])) + expected_loss = ((0.3 - 0.2 + 1) + (0.7 - 0.1 + 1)) / 2.0 + loss = backend.eval(losses.categorical_hinge(y_true, y_pred)) + self.assertAllClose(expected_loss, np.mean(loss)) + + def test_loss_wrapper(self): + loss_fn = losses.get("mse") + mse_obj = losses.LossFunctionWrapper(loss_fn, name=loss_fn.__name__) + + self.assertEqual(mse_obj.name, "mean_squared_error") + self.assertEqual(mse_obj.reduction, losses_utils.ReductionV2.AUTO) + + y_true = tf.constant([[1.0, 9.0], [2.0, 5.0]]) + y_pred = tf.constant([[4.0, 8.0], [12.0, 3.0]]) + sample_weight = tf.constant([1.2, 0.5]) + loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) + + # mse = [((4 - 1)^2 + (8 - 9)^2) / 2, ((12 - 2)^2 + (3 - 5)^2) / 2] + # mse = [5, 52] + # weighted_mse = [5 * 1.2, 52 * 0.5] = [6, 26] + # reduced_weighted_mse = (6 + 26) / 2 = + self.assertAllClose(self.evaluate(loss), 16, 1e-2) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_loss_wrapper_autograph(self): + # Test that functions with control flow wrapped in a LossFunctionWrapper + # get autographed when in a tf.function + def loss_fn(y_true, y_pred): + mse_loss_fn = losses.get("mse") + if tf.reduce_mean(y_true) > 0: + return mse_loss_fn(y_true, y_pred) + else: + return mse_loss_fn(y_true, y_pred) + + mse_obj = losses.LossFunctionWrapper(loss_fn) + + y_true = tf.constant([[1.0, 9.0], [2.0, 5.0]]) + y_pred = tf.constant([[4.0, 8.0], [12.0, 3.0]]) + sample_weight = tf.constant([1.2, 0.5]) + + @tf.function + def tf_functioned_loss_fn(y_true, y_pred, sample_weight=None): + return mse_obj(y_true, y_pred, sample_weight=sample_weight) + + loss = tf_functioned_loss_fn( + y_true, y_pred, sample_weight=sample_weight + ) + + # mse = [((4 - 1)^2 + (8 - 9)^2) / 2, ((12 - 2)^2 + (3 - 5)^2) / 2] + # mse = [5, 52] + # weighted_mse = [5 * 1.2, 52 * 0.5] = [6, 26] + # reduced_weighted_mse = (6 + 26) / 2 = + self.assertAllClose(self.evaluate(loss), 16, 1e-2) + + def test_loss_wrapper_dtype(self): + # Make sure the loss wrapper doesn't cause any numerical precision loss + # during calculation. See + # https://github.com/keras-team/keras/issues/15791 + x = tf.convert_to_tensor([[2.1]], dtype=tf.float64) + y_true = tf.square(x) + y_pred = tf.convert_to_tensor([[3.68]], dtype=tf.float64) + + # TF loss + loss = losses.MeanSquaredError() + tf_loss = loss(y_pred, y_true) + + # manually computed loss in 64-bit + man_loss64 = tf.squeeze(tf.square(y_pred - y_true)) + + self.assertEqual(tf_loss.dtype, tf.float64) + # Make a smaller atol to ensure the float64 precision is hold. + self.assertAllClose( + self.evaluate(tf_loss), self.evaluate(man_loss64), atol=1e-8 + ) + + def test_invalid_reduction(self): + with self.assertRaisesRegex(ValueError, "Invalid Reduction Key: Foo."): + losses.MeanSquaredError(reduction="Foo") + + mse_obj = losses.MeanSquaredError() + y = tf.constant([1]) + mse_obj.reduction = "Bar" + with self.assertRaisesRegex(ValueError, "Invalid Reduction Key: Bar."): + mse_obj(y, y) + + def test_deserialization_error(self): + with self.assertRaisesRegex(ValueError, "Could not interpret loss"): + losses.get(0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_binary_crossentropy_uses_cached_logits(self): + logits = tf.constant([[-30.0, 30.0]]) + y_pred = activations.sigmoid(logits) + self.assertTrue(hasattr(y_pred, "_keras_logits")) + y_true = tf.constant([[0.0, 1.0]]) + loss = losses.binary_crossentropy(y_true, y_pred)[0] + # Check that logits are used. If y_pred is used directly, loss will + # collapse to 0 from underflow. + self.assertNotEqual(self.evaluate(loss), 0.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_categorical_crossentropy_uses_cached_logits(self): + logits = tf.constant([[-5.0, 0.0, 5.0]]) + y_pred = activations.softmax(logits) + self.assertTrue(hasattr(y_pred, "_keras_logits")) + y_true = tf.constant([[0.0, 0.0, 1.0]]) + loss = losses.categorical_crossentropy( + y_true, logits, from_logits=True + )[0] + # Check that logits are used. If y_pred is used directly, loss will + # collapse to 0 from underflow. + self.assertNotEqual(self.evaluate(loss), 0.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sparse_categorical_crossentropy_uses_cached_logits(self): + logits = tf.constant([[-5.0, 0.0, 5.0]]) + y_pred = activations.softmax(logits) + self.assertTrue(hasattr(y_pred, "_keras_logits")) + y_true = tf.constant([2]) + loss = losses.sparse_categorical_crossentropy( + y_true, logits, from_logits=True + )[0] + # Check that logits are used. If y_pred is used directly, loss will + # collapse to 0 from underflow. + self.assertNotEqual(self.evaluate(loss), 0.0) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_loss_not_autographed_in_eager(self): + class MyLoss(losses.Loss): + def call(self, y_true, y_pred): + return y_true - y_pred + + loss = MyLoss() + y_true = tf.constant([[0.0, 0.0, 0.0]]) + y_pred = tf.constant([[1.0, 1.0, 1.0]]) + + def tf_convert(fn, _): + assert False, "Function should not be autographed." + return fn + + with tf.compat.v1.test.mock.patch.object( + autograph, "tf_convert", tf_convert + ): + loss(y_true, y_pred) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class MeanSquaredErrorTest(tf.test.TestCase): - - def test_config(self): - mse_obj = losses.MeanSquaredError( - reduction=losses_utils.ReductionV2.SUM, name='mse_1') - self.assertEqual(mse_obj.name, 'mse_1') - self.assertEqual(mse_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct_unweighted(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) - loss = mse_obj(y_true, y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mse_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 49.5, 3) - - def test_scalar_weighted(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mse_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 113.85, 3) - - def test_sample_weighted(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 767.8 / 6, 3) - - def test_ragged_tensors(self): - mse_obj = losses.MeanSquaredError() - - y_true = tf.ragged.constant([[1., 1., 9.], [2., 5.]]) - y_pred = tf.ragged.constant([[4., 1., 8.], [12., 3.]]) - sample_weight = tf.constant([1.2, 0.5]) - loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) - - # mse = [((4 - 1)^2 + (8 - 9)^2) / 3, ((12 - 2)^2 + (3 - 5)^2) / 2] - # mse = [3.(3), 52] - # weighted_mse = [3.(3) * 1.2, 52 * 0.5] = [4, 26] - # reduced_weighted_mse = (4 + 26) / 2 = - self.assertAllClose(self.evaluate(loss), 15, 1e-2) - - def test_timestep_weighted(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) - loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 587 / 6, 3) - - def test_zero_weighted(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mse_obj(y_true, y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_invalid_sample_weight(self): - mse_obj = losses.MeanSquaredError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1)) - sample_weight = tf.constant([3, 6, 5, 0], shape=(2, 2)) - with self.assertRaisesRegex((ValueError, tf.errors.InvalidArgumentError), - (r'Incompatible shapes: \[2,3\] vs. \[2,2\]|' - 'Dimensions must be equal')): - mse_obj(y_true, y_pred, sample_weight=sample_weight) - - def test_no_reduction(self): - mse_obj = losses.MeanSquaredError(reduction=losses_utils.ReductionV2.NONE) - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mse_obj(y_true, y_pred, sample_weight=2.3) - loss = self.evaluate(loss) - self.assertArrayNear(loss, [84.3333, 143.3666], 1e-3) - - def test_sum_reduction(self): - mse_obj = losses.MeanSquaredError(reduction=losses_utils.ReductionV2.SUM) - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mse_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 227.69998, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + mse_obj = losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.SUM, name="mse_1" + ) + self.assertEqual(mse_obj.name, "mse_1") + self.assertEqual(mse_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct_unweighted(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) + loss = mse_obj(y_true, y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mse_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 49.5, 3) + + def test_scalar_weighted(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mse_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 113.85, 3) + + def test_sample_weighted(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 767.8 / 6, 3) + + def test_ragged_tensors(self): + mse_obj = losses.MeanSquaredError() + + y_true = tf.ragged.constant([[1.0, 1.0, 9.0], [2.0, 5.0]]) + y_pred = tf.ragged.constant([[4.0, 1.0, 8.0], [12.0, 3.0]]) + sample_weight = tf.constant([1.2, 0.5]) + loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) + + # mse = [((4 - 1)^2 + (8 - 9)^2) / 3, ((12 - 2)^2 + (3 - 5)^2) / 2] + # mse = [3.(3), 52] + # weighted_mse = [3.(3) * 1.2, 52 * 0.5] = [4, 26] + # reduced_weighted_mse = (4 + 26) / 2 = + self.assertAllClose(self.evaluate(loss), 15, 1e-2) + + def test_timestep_weighted(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32 + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) + loss = mse_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 587 / 6, 3) + + def test_zero_weighted(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mse_obj(y_true, y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_invalid_sample_weight(self): + mse_obj = losses.MeanSquaredError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1)) + sample_weight = tf.constant([3, 6, 5, 0], shape=(2, 2)) + with self.assertRaisesRegex( + (ValueError, tf.errors.InvalidArgumentError), + ( + r"Incompatible shapes: \[2,3\] vs. \[2,2\]|" + "Dimensions must be equal" + ), + ): + mse_obj(y_true, y_pred, sample_weight=sample_weight) + + def test_no_reduction(self): + mse_obj = losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.NONE + ) + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mse_obj(y_true, y_pred, sample_weight=2.3) + loss = self.evaluate(loss) + self.assertArrayNear(loss, [84.3333, 143.3666], 1e-3) + + def test_sum_reduction(self): + mse_obj = losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.SUM + ) + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mse_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 227.69998, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class MeanAbsoluteErrorTest(tf.test.TestCase): - - def test_config(self): - mae_obj = losses.MeanAbsoluteError( - reduction=losses_utils.ReductionV2.SUM, name='mae_1') - self.assertEqual(mae_obj.name, 'mae_1') - self.assertEqual(mae_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct_unweighted(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) - loss = mae_obj(y_true, y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mae_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 5.5, 3) - - def test_scalar_weighted(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mae_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 12.65, 3) - - def test_sample_weighted(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = mae_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 81.4 / 6, 3) - - def test_timestep_weighted(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) - loss = mae_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 83 / 6, 3) - - def test_zero_weighted(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mae_obj(y_true, y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_invalid_sample_weight(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1)) - sample_weight = tf.constant([3, 6, 5, 0], shape=(2, 2)) - with self.assertRaisesRegex((ValueError, tf.errors.InvalidArgumentError), - (r'Incompatible shapes: \[2,3\] vs. \[2,2\]|' - 'Dimensions must be equal')): - mae_obj(y_true, y_pred, sample_weight=sample_weight) - - def test_no_reduction(self): - mae_obj = losses.MeanAbsoluteError(reduction=losses_utils.ReductionV2.NONE) - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mae_obj(y_true, y_pred, sample_weight=2.3) - loss = self.evaluate(loss) - self.assertArrayNear(loss, [10.7333, 14.5666], 1e-3) - - def test_sum_reduction(self): - mae_obj = losses.MeanAbsoluteError(reduction=losses_utils.ReductionV2.SUM) - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mae_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 25.29999, 3) - - def test_ragged_tensor(self): - mae_obj = losses.MeanAbsoluteError() - y_true = tf.ragged.constant([[1, 9, 2], [-5, -2]], dtype=tf.float32) - y_pred = tf.ragged.constant([[4, 8, 12], [8, 1]], dtype=tf.float32) - # loss = [14/3, 16/2] - sample_weight = tf.constant([1.2, 1.0], shape=(2, 1)) - loss = mae_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 6.8, 5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + mae_obj = losses.MeanAbsoluteError( + reduction=losses_utils.ReductionV2.SUM, name="mae_1" + ) + self.assertEqual(mae_obj.name, "mae_1") + self.assertEqual(mae_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct_unweighted(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) + loss = mae_obj(y_true, y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mae_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 5.5, 3) + + def test_scalar_weighted(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mae_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 12.65, 3) + + def test_sample_weighted(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = mae_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 81.4 / 6, 3) + + def test_timestep_weighted(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32 + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) + loss = mae_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 83 / 6, 3) + + def test_zero_weighted(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mae_obj(y_true, y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_invalid_sample_weight(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1)) + sample_weight = tf.constant([3, 6, 5, 0], shape=(2, 2)) + with self.assertRaisesRegex( + (ValueError, tf.errors.InvalidArgumentError), + ( + r"Incompatible shapes: \[2,3\] vs. \[2,2\]|" + "Dimensions must be equal" + ), + ): + mae_obj(y_true, y_pred, sample_weight=sample_weight) + + def test_no_reduction(self): + mae_obj = losses.MeanAbsoluteError( + reduction=losses_utils.ReductionV2.NONE + ) + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mae_obj(y_true, y_pred, sample_weight=2.3) + loss = self.evaluate(loss) + self.assertArrayNear(loss, [10.7333, 14.5666], 1e-3) + + def test_sum_reduction(self): + mae_obj = losses.MeanAbsoluteError( + reduction=losses_utils.ReductionV2.SUM + ) + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mae_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 25.29999, 3) + + def test_ragged_tensor(self): + mae_obj = losses.MeanAbsoluteError() + y_true = tf.ragged.constant([[1, 9, 2], [-5, -2]], dtype=tf.float32) + y_pred = tf.ragged.constant([[4, 8, 12], [8, 1]], dtype=tf.float32) + # loss = [14/3, 16/2] + sample_weight = tf.constant([1.2, 1.0], shape=(2, 1)) + loss = mae_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 6.8, 5) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class MeanAbsolutePercentageErrorTest(tf.test.TestCase): - - def test_config(self): - mape_obj = losses.MeanAbsolutePercentageError( - reduction=losses_utils.ReductionV2.SUM, name='mape_1') - self.assertEqual(mape_obj.name, 'mape_1') - self.assertEqual(mape_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct_unweighted(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mape_obj(y_true, y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mape_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 211.8518, 3) - - def test_scalar_weighted(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mape_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 487.259, 3) - - def test_sample_weighted(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = mape_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 422.8888, 3) - - def test_ragged_tensors(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.ragged.constant([[1, 9, 2], [-5, -2]]) - y_pred = tf.ragged.constant([[4, 8, 12], [8, 1]], dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = mape_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 510.7222, 3) - - def test_timestep_weighted(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) - loss = mape_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 694.4445, 3) - - def test_zero_weighted(self): - mape_obj = losses.MeanAbsolutePercentageError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mape_obj(y_true, y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_no_reduction(self): - mape_obj = losses.MeanAbsolutePercentageError( - reduction=losses_utils.ReductionV2.NONE) - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = mape_obj(y_true, y_pred, sample_weight=2.3) - loss = self.evaluate(loss) - self.assertArrayNear(loss, [621.8518, 352.6666], 1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + mape_obj = losses.MeanAbsolutePercentageError( + reduction=losses_utils.ReductionV2.SUM, name="mape_1" + ) + self.assertEqual(mape_obj.name, "mape_1") + self.assertEqual(mape_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct_unweighted(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mape_obj(y_true, y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mape_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 211.8518, 3) + + def test_scalar_weighted(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mape_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 487.259, 3) + + def test_sample_weighted(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = mape_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 422.8888, 3) + + def test_ragged_tensors(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.ragged.constant([[1, 9, 2], [-5, -2]]) + y_pred = tf.ragged.constant([[4, 8, 12], [8, 1]], dtype=tf.float32) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = mape_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 510.7222, 3) + + def test_timestep_weighted(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32 + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) + loss = mape_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 694.4445, 3) + + def test_zero_weighted(self): + mape_obj = losses.MeanAbsolutePercentageError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mape_obj(y_true, y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_no_reduction(self): + mape_obj = losses.MeanAbsolutePercentageError( + reduction=losses_utils.ReductionV2.NONE + ) + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = mape_obj(y_true, y_pred, sample_weight=2.3) + loss = self.evaluate(loss) + self.assertArrayNear(loss, [621.8518, 352.6666], 1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class MeanSquaredLogarithmicErrorTest(tf.test.TestCase): - - def test_config(self): - msle_obj = losses.MeanSquaredLogarithmicError( - reduction=losses_utils.ReductionV2.SUM, name='mape_1') - self.assertEqual(msle_obj.name, 'mape_1') - self.assertEqual(msle_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - msle_obj = losses.MeanSquaredLogarithmicError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = msle_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 1.4370, 3) - - def test_scalar_weighted(self): - msle_obj = losses.MeanSquaredLogarithmicError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = msle_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 3.3051, 3) - - def test_sample_weighted(self): - msle_obj = losses.MeanSquaredLogarithmicError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = msle_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 3.7856, 3) - - def test_timestep_weighted(self): - msle_obj = losses.MeanSquaredLogarithmicError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) - loss = msle_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 2.6473, 3) - - def test_zero_weighted(self): - msle_obj = losses.MeanSquaredLogarithmicError() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = msle_obj(y_true, y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_ragged_tensors(self): - msle_obj = losses.MeanSquaredLogarithmicError() - y_true = tf.ragged.constant([[1, 9, 2], [-5, -2]]) - # log(max(y_true, 0) + 1): [[0.69314, 2.3025, 1.0986], [0., 0.]] - y_pred = tf.ragged.constant([[4, 8, 12], [8, 1]], dtype=tf.float32) - # log(max(y_pred, 0) + 1): [[1.6094, 2.1972, 2.5649], [2.1972, 0.6932]] - # per batch loss: [1.0002, 2.6541] - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = msle_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 5.1121, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + msle_obj = losses.MeanSquaredLogarithmicError( + reduction=losses_utils.ReductionV2.SUM, name="mape_1" + ) + self.assertEqual(msle_obj.name, "mape_1") + self.assertEqual(msle_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = msle_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 1.4370, 3) + + def test_scalar_weighted(self): + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = msle_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 3.3051, 3) + + def test_sample_weighted(self): + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = msle_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 3.7856, 3) + + def test_timestep_weighted(self): + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32 + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) + loss = msle_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 2.6473, 3) + + def test_zero_weighted(self): + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = msle_obj(y_true, y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_ragged_tensors(self): + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = tf.ragged.constant([[1, 9, 2], [-5, -2]]) + # log(max(y_true, 0) + 1): [[0.69314, 2.3025, 1.0986], [0., 0.]] + y_pred = tf.ragged.constant([[4, 8, 12], [8, 1]], dtype=tf.float32) + # log(max(y_pred, 0) + 1): [[1.6094, 2.1972, 2.5649], [2.1972, 0.6932]] + # per batch loss: [1.0002, 2.6541] + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = msle_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 5.1121, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CosineSimilarityTest(tf.test.TestCase): - - def l2_norm(self, x, axis): - epsilon = 1e-12 - square_sum = np.sum(np.square(x), axis=axis, keepdims=True) - x_inv_norm = 1 / np.sqrt(np.maximum(square_sum, epsilon)) - return np.multiply(x, x_inv_norm) - - def setup(self, axis=1): - self.np_y_true = np.asarray([[1, 9, 2], [-5, -2, 6]], dtype=np.float32) - self.np_y_pred = np.asarray([[4, 8, 12], [8, 1, 3]], dtype=np.float32) - - y_true = self.l2_norm(self.np_y_true, axis) - y_pred = self.l2_norm(self.np_y_pred, axis) - self.expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(axis,)) - - self.y_true = tf.constant(self.np_y_true) - self.y_pred = tf.constant(self.np_y_pred) - - def test_config(self): - cosine_obj = losses.CosineSimilarity( - axis=2, reduction=losses_utils.ReductionV2.SUM, name='cosine_loss') - self.assertEqual(cosine_obj.name, 'cosine_loss') - self.assertEqual(cosine_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - self.setup() - cosine_obj = losses.CosineSimilarity() - loss = cosine_obj(self.y_true, self.y_pred) - expected_loss = -np.mean(self.expected_loss) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_scalar_weighted(self): - self.setup() - cosine_obj = losses.CosineSimilarity() - sample_weight = 2.3 - loss = cosine_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - expected_loss = -np.mean(self.expected_loss * sample_weight) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_sample_weighted(self): - self.setup() - cosine_obj = losses.CosineSimilarity() - sample_weight = np.asarray([1.2, 3.4]) - loss = cosine_obj( - self.y_true, self.y_pred, sample_weight=tf.constant(sample_weight)) - expected_loss = -np.mean(self.expected_loss * sample_weight) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_timestep_weighted(self): - self.setup() - cosine_obj = losses.CosineSimilarity() - np_y_true = self.np_y_true.reshape((2, 3, 1)) - np_y_pred = self.np_y_pred.reshape((2, 3, 1)) - sample_weight = np.asarray([3, 6, 5, 0, 4, 2]).reshape((2, 3)) - - y_true = self.l2_norm(np_y_true, 2) - y_pred = self.l2_norm(np_y_pred, 2) - expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(2,)) - - y_true = tf.constant(np_y_true) - y_pred = tf.constant(np_y_pred) - loss = cosine_obj(y_true, y_pred, sample_weight=tf.constant(sample_weight)) - - expected_loss = -np.mean(expected_loss * sample_weight) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_zero_weighted(self): - self.setup() - cosine_obj = losses.CosineSimilarity() - loss = cosine_obj(self.y_true, self.y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0., 3) - - def test_axis(self): - self.setup(axis=1) - cosine_obj = losses.CosineSimilarity(axis=1) - loss = cosine_obj(self.y_true, self.y_pred) - expected_loss = -np.mean(self.expected_loss) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def l2_norm(self, x, axis): + epsilon = 1e-12 + square_sum = np.sum(np.square(x), axis=axis, keepdims=True) + x_inv_norm = 1 / np.sqrt(np.maximum(square_sum, epsilon)) + return np.multiply(x, x_inv_norm) + + def setup(self, axis=1): + self.np_y_true = np.asarray([[1, 9, 2], [-5, -2, 6]], dtype=np.float32) + self.np_y_pred = np.asarray([[4, 8, 12], [8, 1, 3]], dtype=np.float32) + + y_true = self.l2_norm(self.np_y_true, axis) + y_pred = self.l2_norm(self.np_y_pred, axis) + self.expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(axis,)) + + self.y_true = tf.constant(self.np_y_true) + self.y_pred = tf.constant(self.np_y_pred) + + def test_config(self): + cosine_obj = losses.CosineSimilarity( + axis=2, reduction=losses_utils.ReductionV2.SUM, name="cosine_loss" + ) + self.assertEqual(cosine_obj.name, "cosine_loss") + self.assertEqual(cosine_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + self.setup() + cosine_obj = losses.CosineSimilarity() + loss = cosine_obj(self.y_true, self.y_pred) + expected_loss = -np.mean(self.expected_loss) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_scalar_weighted(self): + self.setup() + cosine_obj = losses.CosineSimilarity() + sample_weight = 2.3 + loss = cosine_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + expected_loss = -np.mean(self.expected_loss * sample_weight) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_sample_weighted(self): + self.setup() + cosine_obj = losses.CosineSimilarity() + sample_weight = np.asarray([1.2, 3.4]) + loss = cosine_obj( + self.y_true, self.y_pred, sample_weight=tf.constant(sample_weight) + ) + expected_loss = -np.mean(self.expected_loss * sample_weight) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_timestep_weighted(self): + self.setup() + cosine_obj = losses.CosineSimilarity() + np_y_true = self.np_y_true.reshape((2, 3, 1)) + np_y_pred = self.np_y_pred.reshape((2, 3, 1)) + sample_weight = np.asarray([3, 6, 5, 0, 4, 2]).reshape((2, 3)) + + y_true = self.l2_norm(np_y_true, 2) + y_pred = self.l2_norm(np_y_pred, 2) + expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(2,)) + + y_true = tf.constant(np_y_true) + y_pred = tf.constant(np_y_pred) + loss = cosine_obj( + y_true, y_pred, sample_weight=tf.constant(sample_weight) + ) + + expected_loss = -np.mean(expected_loss * sample_weight) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_zero_weighted(self): + self.setup() + cosine_obj = losses.CosineSimilarity() + loss = cosine_obj(self.y_true, self.y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_axis(self): + self.setup(axis=1) + cosine_obj = losses.CosineSimilarity(axis=1) + loss = cosine_obj(self.y_true, self.y_pred) + expected_loss = -np.mean(self.expected_loss) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class BinaryCrossentropyTest(tf.test.TestCase): - - def test_config(self): - bce_obj = losses.BinaryCrossentropy( - reduction=losses_utils.ReductionV2.SUM, name='bce_1') - self.assertEqual(bce_obj.name, 'bce_1') - self.assertEqual(bce_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct_unweighted(self): - y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=tf.float32) - bce_obj = losses.BinaryCrossentropy() - loss = bce_obj(y_true, y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - # Test with logits. - logits = tf.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], - [-100.0, -100.0, 100.0]]) - bce_obj = losses.BinaryCrossentropy(from_logits=True) - loss = bce_obj(y_true, logits) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) - bce_obj = losses.BinaryCrossentropy() - loss = bce_obj(y_true, y_pred) - - # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] - - # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) - # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), - # -log(Y_MAX + EPSILON), -log(1)] - # = [0, 15.33, 0, 0] - # Reduced loss = 15.33 / 4 - - self.assertAlmostEqual(self.evaluate(loss), 3.833, 3) - - # Test with logits. - y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) - logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) - bce_obj = losses.BinaryCrossentropy(from_logits=True) - loss = bce_obj(y_true, logits) - - # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # = [((100 - 100 * 1 + log(1 + exp(-100))) + - # (0 + 100 * 0 + log(1 + exp(-100))) + - # (100 - 100 * 1 + log(1 + exp(-100))), - # ((100 - 100 * 0 + log(1 + exp(-100))) + - # (100 - 100 * 1 + log(1 + exp(-100))) + - # (0 + 100 * 1 + log(1 + exp(-100))))] - # = [(0 + 0 + 0) / 3, 200 / 3] - # Reduced loss = (0 + 66.666) / 2 - - self.assertAlmostEqual(self.evaluate(loss), 33.333, 3) - - def test_scalar_weighted(self): - bce_obj = losses.BinaryCrossentropy() - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) - loss = bce_obj(y_true, y_pred, sample_weight=2.3) - - # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] - - # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) - # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), - # -log(Y_MAX + EPSILON), -log(1)] - # = [0, 15.33, 0, 0] - # Weighted loss = [0, 15.33 * 2.3, 0, 0] - # Reduced loss = 15.33 * 2.3 / 4 - - self.assertAlmostEqual(self.evaluate(loss), 8.817, 3) - - # Test with logits. - y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) - logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) - bce_obj = losses.BinaryCrossentropy(from_logits=True) - loss = bce_obj(y_true, logits, sample_weight=2.3) - - # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Loss = [(0 + 0 + 0) / 3, 200 / 3] - # Weighted loss = [0 * 2.3, 66.666 * 2.3] - # Reduced loss = (0 + 66.666 * 2.3) / 2 - - self.assertAlmostEqual(self.evaluate(loss), 76.667, 3) - - def test_sample_weighted(self): - bce_obj = losses.BinaryCrossentropy() - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = bce_obj(y_true, y_pred, sample_weight=sample_weight) - - # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] - - # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) - # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), - # -log(Y_MAX + EPSILON), -log(1)] - # = [0, 15.33, 0, 0] - # Reduced loss = 15.33 * 1.2 / 4 - - self.assertAlmostEqual(self.evaluate(loss), 4.6, 3) - - # Test with logits. - y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) - logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) - weights = tf.constant([4, 3]) - bce_obj = losses.BinaryCrossentropy(from_logits=True) - loss = bce_obj(y_true, logits, sample_weight=weights) - - # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Loss = [(0 + 0 + 0)/3, 200 / 3] - # Weighted loss = [0 * 4, 66.666 * 3] - # Reduced loss = (0 + 66.666 * 3) / 2 - - self.assertAlmostEqual(self.evaluate(loss), 100, 3) - - def test_no_reduction(self): - y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) - logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) - bce_obj = losses.BinaryCrossentropy( - from_logits=True, reduction=losses_utils.ReductionV2.NONE) - loss = bce_obj(y_true, logits) - - # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Loss = [(0 + 0 + 0)/3, (200)/3] - - self.assertAllClose((0., 66.6666), self.evaluate(loss), 3) - - def test_label_smoothing(self): - logits = tf.constant([[100.0, -100.0, -100.0]]) - y_true = tf.constant([[1, 0, 1]]) - label_smoothing = 0.1 - # Loss: max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Label smoothing: z' = z * (1 - L) + 0.5L - # 1 = 1 - 0.5L - # 0 = 0.5L - # Applying the above two fns to the given input: - # (100 - 100 * (1 - 0.5 L) + 0 + - # 0 + 100 * (0.5 L) + 0 + - # 0 + 100 * (1 - 0.5 L) + 0) * (1/3) - # = (100 + 50L) * 1/3 - bce_obj = losses.BinaryCrossentropy( - from_logits=True, label_smoothing=label_smoothing) - loss = bce_obj(y_true, logits) - expected_value = (100.0 + 50.0 * label_smoothing) / 3.0 - self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) - - def test_label_smoothing_ndarray(self): - logits = np.asarray([[100.0, -100.0, -100.0]]) - y_true = np.asarray([[1, 0, 1]]) - label_smoothing = 0.1 - # Loss: max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Label smoothing: z' = z * (1 - L) + 0.5L - # 1 = 1 - 0.5L - # 0 = 0.5L - # Applying the above two fns to the given input: - # (100 - 100 * (1 - 0.5 L) + 0 + - # 0 + 100 * (0.5 L) + 0 + - # 0 + 100 * (1 - 0.5 L) + 0) * (1/3) - # = (100 + 50L) * 1/3 - bce_obj = losses.BinaryCrossentropy( - from_logits=True, label_smoothing=label_smoothing) - loss = bce_obj(y_true, logits) - expected_value = (100.0 + 50.0 * label_smoothing) / 3.0 - self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) - - def test_ragged_tensors(self): - bce_obj = losses.BinaryCrossentropy() - y_true = tf.ragged.constant([[1, 0, 1], [0]]) - y_pred = tf.ragged.constant([[1, 1, 1], [0]], dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = bce_obj(y_true, y_pred, sample_weight=sample_weight) - - # per batch loss = [ sum([0, 15.33, 0]) / 3, 0. ] - # = [ 5.11, 0] - # Reduced loss = 5.11 * 1.2 / 2 - - self.assertAlmostEqual(self.evaluate(loss), 3.0666, 3) - - # Test with logits. - y_true = tf.ragged.constant([[1, 0, 1], [0, 1]]) - logits = tf.ragged.constant([[100.0, -100.0, 100.0], [100.0, 100.0]]) - weights = tf.constant([4, 3]) - bce_obj = losses.BinaryCrossentropy(from_logits=True) - loss = bce_obj(y_true, logits, sample_weight=weights) - - # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Loss = [(0 + 0 + 0)/3, 100 / 2] - # Weighted loss = [0 * 4, 50 * 3] - # Reduced loss = (0 + 50 * 3) / 2 - - self.assertAlmostEqual(self.evaluate(loss), 75., 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + bce_obj = losses.BinaryCrossentropy( + reduction=losses_utils.ReductionV2.SUM, name="bce_1" + ) + self.assertEqual(bce_obj.name, "bce_1") + self.assertEqual(bce_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct_unweighted(self): + y_true = tf.constant( + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=tf.float32 + ) + bce_obj = losses.BinaryCrossentropy() + loss = bce_obj(y_true, y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + # Test with logits. + logits = tf.constant( + [ + [100.0, -100.0, -100.0], + [-100.0, 100.0, -100.0], + [-100.0, -100.0, 100.0], + ] + ) + bce_obj = losses.BinaryCrossentropy(from_logits=True) + loss = bce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) + bce_obj = losses.BinaryCrossentropy() + loss = bce_obj(y_true, y_pred) + + # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] + + # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) + # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), + # -log(Y_MAX + EPSILON), -log(1)] + # = [0, 15.33, 0, 0] + # Reduced loss = 15.33 / 4 + + self.assertAlmostEqual(self.evaluate(loss), 3.833, 3) + + # Test with logits. + y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) + logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) + bce_obj = losses.BinaryCrossentropy(from_logits=True) + loss = bce_obj(y_true, logits) + + # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # = [((100 - 100 * 1 + log(1 + exp(-100))) + + # (0 + 100 * 0 + log(1 + exp(-100))) + + # (100 - 100 * 1 + log(1 + exp(-100))), + # ((100 - 100 * 0 + log(1 + exp(-100))) + + # (100 - 100 * 1 + log(1 + exp(-100))) + + # (0 + 100 * 1 + log(1 + exp(-100))))] + # = [(0 + 0 + 0) / 3, 200 / 3] + # Reduced loss = (0 + 66.666) / 2 + + self.assertAlmostEqual(self.evaluate(loss), 33.333, 3) + + def test_scalar_weighted(self): + bce_obj = losses.BinaryCrossentropy() + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) + loss = bce_obj(y_true, y_pred, sample_weight=2.3) + + # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] + + # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) + # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), + # -log(Y_MAX + EPSILON), -log(1)] + # = [0, 15.33, 0, 0] + # Weighted loss = [0, 15.33 * 2.3, 0, 0] + # Reduced loss = 15.33 * 2.3 / 4 + + self.assertAlmostEqual(self.evaluate(loss), 8.817, 3) + + # Test with logits. + y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) + logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) + bce_obj = losses.BinaryCrossentropy(from_logits=True) + loss = bce_obj(y_true, logits, sample_weight=2.3) + + # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Loss = [(0 + 0 + 0) / 3, 200 / 3] + # Weighted loss = [0 * 2.3, 66.666 * 2.3] + # Reduced loss = (0 + 66.666 * 2.3) / 2 + + self.assertAlmostEqual(self.evaluate(loss), 76.667, 3) + + def test_sample_weighted(self): + bce_obj = losses.BinaryCrossentropy() + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = bce_obj(y_true, y_pred, sample_weight=sample_weight) + + # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] + + # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) + # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), + # -log(Y_MAX + EPSILON), -log(1)] + # = [0, 15.33, 0, 0] + # Reduced loss = 15.33 * 1.2 / 4 + + self.assertAlmostEqual(self.evaluate(loss), 4.6, 3) + + # Test with logits. + y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) + logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) + weights = tf.constant([4, 3]) + bce_obj = losses.BinaryCrossentropy(from_logits=True) + loss = bce_obj(y_true, logits, sample_weight=weights) + + # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Loss = [(0 + 0 + 0)/3, 200 / 3] + # Weighted loss = [0 * 4, 66.666 * 3] + # Reduced loss = (0 + 66.666 * 3) / 2 + + self.assertAlmostEqual(self.evaluate(loss), 100, 3) + + def test_no_reduction(self): + y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) + logits = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) + bce_obj = losses.BinaryCrossentropy( + from_logits=True, reduction=losses_utils.ReductionV2.NONE + ) + loss = bce_obj(y_true, logits) + + # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Loss = [(0 + 0 + 0)/3, (200)/3] + + self.assertAllClose((0.0, 66.6666), self.evaluate(loss), 3) + + def test_label_smoothing(self): + logits = tf.constant([[100.0, -100.0, -100.0]]) + y_true = tf.constant([[1, 0, 1]]) + label_smoothing = 0.1 + # Loss: max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Label smoothing: z' = z * (1 - L) + 0.5L + # 1 = 1 - 0.5L + # 0 = 0.5L + # Applying the above two fns to the given input: + # (100 - 100 * (1 - 0.5 L) + 0 + + # 0 + 100 * (0.5 L) + 0 + + # 0 + 100 * (1 - 0.5 L) + 0) * (1/3) + # = (100 + 50L) * 1/3 + bce_obj = losses.BinaryCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + loss = bce_obj(y_true, logits) + expected_value = (100.0 + 50.0 * label_smoothing) / 3.0 + self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + + def test_label_smoothing_ndarray(self): + logits = np.asarray([[100.0, -100.0, -100.0]]) + y_true = np.asarray([[1, 0, 1]]) + label_smoothing = 0.1 + # Loss: max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Label smoothing: z' = z * (1 - L) + 0.5L + # 1 = 1 - 0.5L + # 0 = 0.5L + # Applying the above two fns to the given input: + # (100 - 100 * (1 - 0.5 L) + 0 + + # 0 + 100 * (0.5 L) + 0 + + # 0 + 100 * (1 - 0.5 L) + 0) * (1/3) + # = (100 + 50L) * 1/3 + bce_obj = losses.BinaryCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + loss = bce_obj(y_true, logits) + expected_value = (100.0 + 50.0 * label_smoothing) / 3.0 + self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + + def test_ragged_tensors(self): + bce_obj = losses.BinaryCrossentropy() + y_true = tf.ragged.constant([[1, 0, 1], [0]]) + y_pred = tf.ragged.constant([[1, 1, 1], [0]], dtype=tf.float32) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = bce_obj(y_true, y_pred, sample_weight=sample_weight) + + # per batch loss = [ sum([0, 15.33, 0]) / 3, 0. ] + # = [ 5.11, 0] + # Reduced loss = 5.11 * 1.2 / 2 + + self.assertAlmostEqual(self.evaluate(loss), 3.0666, 3) + + # Test with logits. + y_true = tf.ragged.constant([[1, 0, 1], [0, 1]]) + logits = tf.ragged.constant([[100.0, -100.0, 100.0], [100.0, 100.0]]) + weights = tf.constant([4, 3]) + bce_obj = losses.BinaryCrossentropy(from_logits=True) + loss = bce_obj(y_true, logits, sample_weight=weights) + + # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Loss = [(0 + 0 + 0)/3, 100 / 2] + # Weighted loss = [0 * 4, 50 * 3] + # Reduced loss = (0 + 50 * 3) / 2 + + self.assertAlmostEqual(self.evaluate(loss), 75.0, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class BinaryFocalCrossentropyTest(tf.test.TestCase): - - def test_config(self): - obj = losses.BinaryFocalCrossentropy(gamma=1.5, name='bfce_0') - self.assertEqual(obj.name, 'bfce_0') - self.assertAlmostEqual(obj.gamma, 1.5) - - obj_2 = losses.BinaryFocalCrossentropy.from_config(obj.get_config()) - self.assertEqual(obj_2.name, 'bfce_0') - self.assertAlmostEqual(obj_2.gamma, 1.5) - - def test_all_correct_unweighted(self): - y_true = tf.constant([ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1], - ], dtype=tf.float32) - obj = losses.BinaryFocalCrossentropy(gamma=1.5) - loss = obj(y_true, y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - # Test with logits. - logits = tf.constant([ - [100.0, -100.0, -100.0], - [-100.0, 100.0, -100.0], - [-100.0, -100.0, 100.0], - ]) - obj = losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True) - loss = obj(y_true, logits) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape([2, 2]) - obj = losses.BinaryFocalCrossentropy(gamma=2.0) - loss = obj(y_true, y_pred) - - # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, 0.8]] - # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] - - # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] - # focalLoss = focal bceLoss = [[0.001, 1.03], [0.032, 0.009]] - # Reduced loss = (0.001 + 1.03 + 0.032 + 0.009) / 4 = 0.268 - - self.assertAlmostEqual(self.evaluate(loss), 0.268, 3) - - # Test with logits. - y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) - logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) - obj = losses.BinaryFocalCrossentropy(gamma=3.0, from_logits=True) - loss = obj(y_true, logits) - - # sigmoidal = sigmoid(logits) - # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] - # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) - # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] - # focal = (1 - p_t) ** gamma - # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] - - # bceLoss = -log(p_t) - # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] - - # focalLoss = focal bceLoss - # = [[0.0012, 2.2743, 2.514], [0.0000002, 0.0033, 0.00000001]] - # Reduced loss = 0.799 - - self.assertAlmostEqual(self.evaluate(loss), 0.799, 3) - - def test_scalar_weighted(self): - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape([2, 2]) - obj = losses.BinaryFocalCrossentropy(gamma=2.0) - loss = obj(y_true, y_pred, sample_weight=1.23) - - # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, 0.8]] - # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] - - # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] * sample_weight - # focalLoss = focal bceLoss - # = [[0.001, 1.03], [0.032, 0.009]] * sample_weight - # Reduced loss = (0.001 + 1.03 + 0.032 + 0.009) * 1.23 / 4 = 0.3296 - - self.assertAlmostEqual(self.evaluate(loss), 0.3296, 3) - - # Test with logits. - y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) - logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) - obj = losses.BinaryFocalCrossentropy(gamma=3.0, from_logits=True) - loss = obj(y_true, logits, sample_weight=3.21) - - # sigmoidal = sigmoid(logits) - # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] - # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) - # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] - # focal = (1 - p_t) ** gamma - # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] - - # bceLoss = -log(p_t) * sample_weight - # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] * sample_weight - - # focalLoss = focal * bceLoss = - # [[0.0012, 2.2743, 2.514], [0.0000002, 0.0033, 0.00000001]] * sample_weight - # Reduced loss = 0.799 * 3.21 = 2.565 - - self.assertAlmostEqual(self.evaluate(loss), 2.565, 3) - - def test_sample_weighted(self): - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape([2, 2]) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - obj = losses.BinaryFocalCrossentropy(gamma=2.0) - loss = obj(y_true, y_pred, sample_weight=sample_weight) - - # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, 0.8]] - # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] - - # bceLoss = -log(p_t) * sample_weight - # = [[0.105, 1.609] ,[0.357, 0.223]] * sample_weight - # focalLoss = focal * bceLoss - # = [[0.001, 1.03], [0.032, 0.009]] * sample_weight - # = [[0.0012, 1.236], [0.1088, 0.0306]] - # Reduced loss = (0.0012 + 1.236 + 0.1088 + 0.0306) / 4 = 0.34415 - - self.assertAlmostEqual(self.evaluate(loss), 0.34415, 3) - - # Test with logits. - y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) - logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) - obj = losses.BinaryFocalCrossentropy(gamma=3.0, from_logits=True) - loss = obj(y_true, logits, sample_weight=sample_weight) - - # sigmoidal = sigmoid(logits) - # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] - # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) - # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] - # focal = (1 - p_t) ** gamma - # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] - - # bceLoss = -log(p_t) * sample_weight - # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] * sample_weight - - # focalLoss = focal * bceLoss = - # [[0.0012, 2.2743, 2.514], [0.0000002, 0.0033, 0.00000001]] * sample_weight - # focalLoss = [[0.00144, 2.72916, 3.0168], [6.8e-7, 0.01122, 3.4e-8]] - # Reduced loss = 0.799 - - self.assertAlmostEqual(self.evaluate(loss), 0.95977, 3) - - def test_no_reduction(self): - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape([2, 2]) - obj = losses.BinaryFocalCrossentropy( - gamma=2.0, - reduction=losses_utils.ReductionV2.NONE, - ) - loss = obj(y_true, y_pred) - - # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, 0.8]] - # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] - - # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] - # focalLoss = focal bceLoss = [[0.001, 1.03], [0.032, 0.009]] - # Reduced loss = [(0.001 + 1.03) / 2, (0.032 + 0.009) / 2] - - self.assertAllClose(self.evaluate(loss), (0.5155, 0.0205), 3) - - def test_ragged_tensors(self): - y_true = tf.ragged.constant([[1, 0, 1], [0]]) - y_pred = tf.ragged.constant([[0.9, 0.8, 0.7], [0.2]]) - obj = losses.BinaryFocalCrossentropy(gamma=2.0) - loss = obj(y_true, y_pred) - - # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2, 0.7], [0.8]] - # focal = (1 - p_t) ** gamma = [[0.01, 0.64, 0.09], [0.04]] - - # bceLoss = -log(p_t) = [[0.105, 1.609, 0.357], [0.223]] - # focalLoss = focal bceLoss = [[0.001, 1.03, 0.032], [0.009]] - # Reduced loss = ((0.001 + 1.03 + 0.032) / 3 + 0.009) / 2 = 0.18166 - - self.assertAlmostEqual(self.evaluate(loss), 0.18166, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + obj = losses.BinaryFocalCrossentropy(gamma=1.5, name="bfce_0") + self.assertEqual(obj.name, "bfce_0") + self.assertAlmostEqual(obj.gamma, 1.5) + + obj_2 = losses.BinaryFocalCrossentropy.from_config(obj.get_config()) + self.assertEqual(obj_2.name, "bfce_0") + self.assertAlmostEqual(obj_2.gamma, 1.5) + + def test_all_correct_unweighted(self): + y_true = tf.constant( + [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + ], + dtype=tf.float32, + ) + obj = losses.BinaryFocalCrossentropy(gamma=1.5) + loss = obj(y_true, y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + # Test with logits. + logits = tf.constant( + [ + [100.0, -100.0, -100.0], + [-100.0, 100.0, -100.0], + [-100.0, -100.0, 100.0], + ] + ) + obj = losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True) + loss = obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + obj = losses.BinaryFocalCrossentropy(gamma=2.0) + loss = obj(y_true, y_pred) + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], + # [0.7, 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] + # focalLoss = focal bceLoss = [[0.001, 1.03], [0.032, 0.009]] + # Reduced loss = (0.001 + 1.03 + 0.032 + 0.009) / 4 = 0.268 + + self.assertAlmostEqual(self.evaluate(loss), 0.268, 3) + + # Test with logits. + y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) + logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) + obj = losses.BinaryFocalCrossentropy(gamma=3.0, from_logits=True) + loss = obj(y_true, logits) + + # sigmoidal = sigmoid(logits) + # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] + # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) + # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] + # focal = (1 - p_t) ** gamma + # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] + + # bceLoss = -log(p_t) + # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] + + # focalLoss = focal bceLoss + # = [[0.0012, 2.2743, 2.514], [0.0000002, 0.0033, 0.00000001]] + # Reduced loss = 0.799 + + self.assertAlmostEqual(self.evaluate(loss), 0.799, 3) + + def test_scalar_weighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + obj = losses.BinaryFocalCrossentropy(gamma=2.0) + loss = obj(y_true, y_pred, sample_weight=1.23) + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], + # [0.7, 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] * sample_weight + # focalLoss = focal bceLoss + # = [[0.001, 1.03], [0.032, 0.009]] * sample_weight + # Reduced loss = (0.001 + 1.03 + 0.032 + 0.009) * 1.23 / 4 = 0.3296 + + self.assertAlmostEqual(self.evaluate(loss), 0.3296, 3) + + # Test with logits. + y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) + logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) + obj = losses.BinaryFocalCrossentropy(gamma=3.0, from_logits=True) + loss = obj(y_true, logits, sample_weight=3.21) + + # sigmoidal = sigmoid(logits) + # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] + # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) + # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] + # focal = (1 - p_t) ** gamma + # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] + + # bceLoss = -log(p_t) * sample_weight + # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] * + # sample_weight + + # focalLoss = focal * bceLoss = + # [[0.0012, 2.2743, 2.514], [0.0000002, 0.0033, 0.00000001]] * + # sample_weight + # Reduced loss = 0.799 * 3.21 = 2.565 + + self.assertAlmostEqual(self.evaluate(loss), 2.565, 3) + + def test_sample_weighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + obj = losses.BinaryFocalCrossentropy(gamma=2.0) + loss = obj(y_true, y_pred, sample_weight=sample_weight) + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, + # 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) * sample_weight + # = [[0.105, 1.609] ,[0.357, 0.223]] * sample_weight + # focalLoss = focal * bceLoss + # = [[0.001, 1.03], [0.032, 0.009]] * sample_weight + # = [[0.0012, 1.236], [0.1088, 0.0306]] + # Reduced loss = (0.0012 + 1.236 + 0.1088 + 0.0306) / 4 = 0.34415 + + self.assertAlmostEqual(self.evaluate(loss), 0.34415, 3) + + # Test with logits. + y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) + logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) + obj = losses.BinaryFocalCrossentropy(gamma=3.0, from_logits=True) + loss = obj(y_true, logits, sample_weight=sample_weight) + + # sigmoidal = sigmoid(logits) + # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] + # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) + # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] + # focal = (1 - p_t) ** gamma + # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] + + # bceLoss = -log(p_t) * sample_weight + # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] * + # sample_weight + + # focalLoss = focal * bceLoss = + # [[0.0012, 2.2743, 2.514], [0.0000002, 0.0033, 0.00000001]] * + # sample_weight + # focalLoss = [[0.00144, 2.72916, 3.0168], [6.8e-7, 0.01122, 3.4e-8]] + # Reduced loss = 0.799 + + self.assertAlmostEqual(self.evaluate(loss), 0.95977, 3) + + def test_no_reduction(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + obj = losses.BinaryFocalCrossentropy( + gamma=2.0, + reduction=losses_utils.ReductionV2.NONE, + ) + loss = obj(y_true, y_pred) + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, + # 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] + # focalLoss = focal bceLoss = [[0.001, 1.03], [0.032, 0.009]] + # Reduced loss = [(0.001 + 1.03) / 2, (0.032 + 0.009) / 2] + + self.assertAllClose(self.evaluate(loss), (0.5155, 0.0205), 3) + + def test_ragged_tensors(self): + y_true = tf.ragged.constant([[1, 0, 1], [0]]) + y_pred = tf.ragged.constant([[0.9, 0.8, 0.7], [0.2]]) + obj = losses.BinaryFocalCrossentropy(gamma=2.0) + loss = obj(y_true, y_pred) + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2, 0.7], + # [0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64, 0.09], [0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609, 0.357], [0.223]] + # focalLoss = focal bceLoss = [[0.001, 1.03, 0.032], [0.009]] + # Reduced loss = ((0.001 + 1.03 + 0.032) / 3 + 0.009) / 2 = 0.18166 + + self.assertAlmostEqual(self.evaluate(loss), 0.18166, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BinaryWeightedFocalCrossentropyTest(tf.test.TestCase): + def test_config(self): + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.1, + gamma=1.5, + name="bfce_0", + ) + self.assertTrue(obj.apply_class_balancing) + self.assertEqual(obj.name, "bfce_0") + self.assertAlmostEqual(obj.alpha, 0.1) + self.assertAlmostEqual(obj.gamma, 1.5) + + obj_2 = losses.BinaryFocalCrossentropy.from_config(obj.get_config()) + self.assertTrue(obj_2.apply_class_balancing) + self.assertEqual(obj_2.name, "bfce_0") + self.assertAlmostEqual(obj_2.alpha, 0.1) + self.assertAlmostEqual(obj_2.gamma, 1.5) + + def test_all_correct_unweighted(self): + y_true = tf.constant( + [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + ], + dtype=tf.float32, + ) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, gamma=1.5 + ) + loss = obj(y_true, y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + # Test with logits. + logits = tf.constant( + [ + [100.0, -100.0, -100.0], + [-100.0, 100.0, -100.0], + [-100.0, -100.0, 100.0], + ] + ) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.3, + gamma=2.0, + from_logits=True, + ) + loss = obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.4, + gamma=2.0, + ) + loss = obj(y_true, y_pred) + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, + # 0.8]] + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.4, 0.6], [0.4, 0.6]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] + # weightedfocalLoss = alpha_weight focal bceLoss + # = [[0.0004, 0.618], [0.0128, 0.0054]] + # Reduced loss = (0.0004 + 0.618 + 0.0128 + 0.0054) / 4 = 0.15915 + + self.assertAlmostEqual(self.evaluate(loss), 0.15915, 3) + + # Test with logits. + y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) + logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.3, + gamma=3.0, + from_logits=True, + ) + loss = obj(y_true, logits) + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.3, 0.3, 0.7], [0.7, 0.3, 0.7]] + # sigmoidal = sigmoid(logits) + # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] + # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) + # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] + # focal = (1 - p_t) ** gamma + # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] + + # bceLoss = -log(p_t) + # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] + + # weightedfocalLoss = alpha_weight focal bceLoss + # = [[0.00036, 0.68229, 1.7598], [0.00000014, 0.00099, 0.000000007]] + # Reduced loss = 0.40724 + + self.assertAlmostEqual(self.evaluate(loss), 0.40724, 3) + + def test_scalar_weighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.6, + gamma=2.0, + ) + loss = obj(y_true, y_pred, sample_weight=1.23) + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.6, 0.4], [0.6, 0.4]] + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, + # 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] * sample_weight + # weightedfocalLoss = alpha_weight focal bceLoss + # = [[0.0006, 0.412], [0.0192, 0.0036]] * sample_weight + # Reduced loss = (0.0006 + 0.412 + 0.0192 + 0.0036) * 1.23 / 4 = 0.13388 + + self.assertAlmostEqual(self.evaluate(loss), 0.13388, 3) + + # Test with logits. + y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) + logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.2, + gamma=3.0, + from_logits=True, + ) + loss = obj(y_true, logits, sample_weight=3.21) + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.2, 0.2, 0.8], [0.8, 0.2, 0.8]] + # sigmoidal = sigmoid(logits) + # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] + # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) + # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] + # focal = (1 - p_t) ** gamma + # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] + + # bceLoss = -log(p_t) * sample_weight + # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] * + # sample_weight + + # weightedfocalLoss = alpha_weight * focal * bceLoss = + # [[0.00024, 0.45486, 2.0112], [0.00000016, 0.00066, 0.000000008]] * + # 3.21 + # Reduced loss = 0.41116 * 3.21 = 1.32 + + self.assertAlmostEqual(self.evaluate(loss), 1.32, 3) + + def test_sample_weighted(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.1, + gamma=2.0, + ) + loss = obj(y_true, y_pred, sample_weight=sample_weight) + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.1, 0.9], [0.1, 0.9]] + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, + # 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) * sample_weight + # = [[0.105, 1.609] ,[0.357, 0.223]] * sample_weight + # focalLoss = alpha_weight * focal * bceLoss + # = [[0.0001, 0.927], [0.0032, 0.0081]] * sample_weight + # = [[0.00012, 1.1124], [0.01088, 0.02754]] + # Reduced loss = (0.00012 + 1.1124 + 0.01088 + 0.02754) / 4 = 0.2877 + + self.assertAlmostEqual(self.evaluate(loss), 0.2877, 3) + + # Test with logits. + y_true = tf.constant([[1, 1, 0], [0, 1, 0]], dtype=tf.float32) + logits = tf.constant([[1.5, -2.7, 2.9], [-3.8, 1.2, -4.5]]) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.2, + gamma=3.0, + from_logits=True, + ) + loss = obj(y_true, logits, sample_weight=sample_weight) + + # sigmoidal = sigmoid(logits) + # = [[0.8176, 0.063, 0.9478], [0.0219, 0.7685, 0.011]] + # p_t = y_true sigmoidal + (1 - y_true) (1 - sigmoidal) + # = [[0.8176, 0.063, 0.0522], [0.9781, 0.7685, 0.989]] + # focal = (1 - p_t) ** gamma + # = [[0.006, 0.823, 0.851], [0.00001, 0.0124, 0.000001]] + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.2, 0.2, 0.8], [0.8, 0.2, 0.8]] + + # bceLoss = -log(p_t) * sample_weight + # = [[0.2014, 2.7646 , 2.9527], [0.0221, 0.2633, 0.01106]] * + # sample_weight + + # focalLoss = alpha_weight * focal * bceLoss = + # [[0.00024, 0.45486, 2.0112], [1.6e-7, 6.6e-4, 8e-9]] * sample_weight + # focalLoss = [[0.000288, 0.5458, 2.41344], [5.44e-7, 2.444e-3, + # 2.72e-8]] + # Reduced loss = 0.49366 + + self.assertAlmostEqual(self.evaluate(loss), 0.49366, 3) + + def test_no_reduction(self): + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([0.9, 0.8, 0.7, 0.2], dtype=np.float32).reshape( + [2, 2] + ) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.6, + gamma=2.0, + reduction=losses_utils.ReductionV2.NONE, + ) + loss = obj(y_true, y_pred) + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.6, 0.4], [0.6, 0.4]] + + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2], [0.7, + # 0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64], [0.09, 0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609] ,[0.357, 0.223]] + # focalLoss = alpha_weight focal bceLoss + # = [[0.0006, 0.412], [0.0192, 0.0036]] + # Reduced loss = [(0.0006 + 0.412) / 2, (0.0192 + 0.0036) / 2] + + self.assertAllClose(self.evaluate(loss), (0.2063, 0.0114), 3) + + def test_ragged_tensors(self): + y_true = tf.ragged.constant([[1, 0, 1], [0]]) + y_pred = tf.ragged.constant([[0.9, 0.8, 0.7], [0.2]]) + obj = losses.BinaryFocalCrossentropy( + apply_class_balancing=True, + alpha=0.1, + gamma=2.0, + ) + loss = obj(y_true, y_pred) + + # alpha_weight = alpha y_true + (1 - alpha) (1 - y_true) + # = [[0.1, 0.9, 0.1], [0.9]] + # p_t = y_true y_pred + (1 - y_true) (1 - y_pred) = [[0.9, 0.2, 0.7], + # [0.8]] + # focal = (1 - p_t) ** gamma = [[0.01, 0.64, 0.09], [0.04]] + + # bceLoss = -log(p_t) = [[0.105, 1.609, 0.357], [0.223]] + # focalLoss = alpha_weight focal bceLoss + # = [[0.0001, 0.927, 0.0032], [0.0081]] + # Reduced loss = ((0.0001 + 0.927 + 0.0032) / 3 + 0.0081) / 2 = 0.1591 + + self.assertAlmostEqual(self.evaluate(loss), 0.1591, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CategoricalCrossentropyTest(tf.test.TestCase): - - def test_config(self): - cce_obj = losses.CategoricalCrossentropy( - reduction=losses_utils.ReductionV2.SUM, name='bce_1') - self.assertEqual(cce_obj.name, 'bce_1') - self.assertEqual(cce_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct_unweighted(self): - y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=tf.int64) - y_pred = tf.constant([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], - dtype=tf.float32) - cce_obj = losses.CategoricalCrossentropy() - loss = cce_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - # Test with logits. - logits = tf.constant([[10., 0., 0.], [0., 10., 0.], [0., 0., 10.]]) - cce_obj = losses.CategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - cce_obj = losses.CategoricalCrossentropy() - y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]], - dtype=tf.float32) - loss = cce_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), .3239, 3) - - # Test with logits. - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.CategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits) - self.assertAlmostEqual(self.evaluate(loss), .0573, 3) - - def test_scalar_weighted(self): - cce_obj = losses.CategoricalCrossentropy() - y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]], - dtype=tf.float32) - loss = cce_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), .7449, 3) - - # Test with logits. - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.CategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), .1317, 3) - - def test_sample_weighted(self): - cce_obj = losses.CategoricalCrossentropy() - y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]], - dtype=tf.float32) - sample_weight = tf.constant([[1.2], [3.4], [5.6]], shape=(3, 1)) - loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 1.0696, 3) - - # Test with logits. - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.CategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0.31829, 3) - - def test_no_reduction(self): - y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.CategoricalCrossentropy( - from_logits=True, reduction=losses_utils.ReductionV2.NONE) - loss = cce_obj(y_true, logits) - self.assertAllClose((0.001822, 0.000459, 0.169846), self.evaluate(loss), 3) - - def test_label_smoothing(self): - logits = tf.constant([[100.0, -100.0, -100.0]]) - y_true = tf.constant([[1, 0, 0]]) - label_smoothing = 0.1 - # Softmax Cross Entropy Loss: -\sum_i p_i \log q_i - # where for a softmax activation - # \log q_i = x_i - \log \sum_j \exp x_j - # = x_i - x_max - \log \sum_j \exp (x_j - x_max) - # For our activations, [100, -100, -100] - # \log ( exp(0) + exp(-200) + exp(-200) ) = 0 - # so our log softmaxes become: [0, -200, -200] - # Label smoothing: z' = z * (1 - L) + L/n - # 1 = 1 - L + L/n - # 0 = L/n - # Applying the above two fns to the given input: - # -0 * (1 - L + L/n) + 200 * L/n + 200 * L/n = 400 L/n - cce_obj = losses.CategoricalCrossentropy( - from_logits=True, label_smoothing=label_smoothing) - loss = cce_obj(y_true, logits) - expected_value = 400.0 * label_smoothing / 3.0 - self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) - - def test_label_smoothing_ndarray(self): - logits = np.asarray([[100.0, -100.0, -100.0]]) - y_true = np.asarray([[1, 0, 0]]) - label_smoothing = 0.1 - # Softmax Cross Entropy Loss: -\sum_i p_i \log q_i - # where for a softmax activation - # \log q_i = x_i - \log \sum_j \exp x_j - # = x_i - x_max - \log \sum_j \exp (x_j - x_max) - # For our activations, [100, -100, -100] - # \log ( exp(0) + exp(-200) + exp(-200) ) = 0 - # so our log softmaxes become: [0, -200, -200] - # Label smoothing: z' = z * (1 - L) + L/n - # 1 = 1 - L + L/n - # 0 = L/n - # Applying the above two fns to the given input: - # -0 * (1 - L + L/n) + 200 * L/n + 200 * L/n = 400 L/n - cce_obj = losses.CategoricalCrossentropy( - from_logits=True, label_smoothing=label_smoothing) - loss = cce_obj(y_true, logits) - expected_value = 400.0 * label_smoothing / 3.0 - self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) - - def test_shape_mismatch(self): - y_true = tf.constant([[0], [1], [2]]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) - - cce_obj = losses.CategoricalCrossentropy() - with self.assertRaisesRegex(ValueError, 'Shapes .+ are incompatible'): - cce_obj(y_true, y_pred) - - def test_ragged_tensors(self): - cce_obj = losses.CategoricalCrossentropy() - y_true = tf.ragged.constant([[[1, 0, 0], [0, 1, 0]], [[0, 0, 1]]]) - y_pred = tf.ragged.constant( - [[[.9, .05, .05], [.5, .89, .6]], [[.05, .01, .94]]], dtype=tf.float32) - # batch losses [[0.1054, 0.8047], [0.0619]] - sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) - loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) - # sum([0.1054, 0.8047, 0.0619]) / 3 - self.assertAlmostEqual(self.evaluate(loss), 0.4341, 3) - - # Test with logits. - logits = tf.ragged.constant([[[8., 1., 1.], [0., 9., 1.]], [[2., 3., 5.]]]) - cce_obj = losses.CategoricalCrossentropy(from_logits=True) - # batch losses [[0.0018, 0.0004], [0.1698]] - loss = cce_obj(y_true, logits, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0.1934, 3) - - def test_ragged_tensors_ragged_sample_weights(self): - cce_obj = losses.CategoricalCrossentropy() - y_true = tf.ragged.constant([[[1, 0, 0], [0, 1, 0]], [[0, 0, 1]]]) - y_pred = tf.ragged.constant( - [[[.9, .05, .05], [.05, .89, .06]], [[.05, .01, .94]]], - dtype=tf.float32) - # batch losses [[0.1054, 0.1165], [0.0619]] - # Use independent weights for each batch element - sample_weight = tf.ragged.constant([[1.2, 3.4], [5.6]], dtype=tf.float32) - loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) - # sum([0.1054*1.2, 0.1165*3.4, 0.0619*5.6])/3 - self.assertAlmostEqual(self.evaluate(loss), 0.2897, 3) - - # Test with logits. - logits = tf.ragged.constant([[[8., 1., 1.], [0., 9., 1.]], [[2., 3., 5.]]]) - cce_obj = losses.CategoricalCrossentropy(from_logits=True) - # batch losses [[0.0018, 0.0004], [0.1698]] - # sum([0.0018*1.2, 0.0004*3.4, 0.1698*5.6]) / 3 - loss = cce_obj(y_true, logits, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0.3181, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + cce_obj = losses.CategoricalCrossentropy( + reduction=losses_utils.ReductionV2.SUM, name="bce_1" + ) + self.assertEqual(cce_obj.name, "bce_1") + self.assertEqual(cce_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct_unweighted(self): + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=tf.int64) + y_pred = tf.constant( + [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + dtype=tf.float32, + ) + cce_obj = losses.CategoricalCrossentropy() + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + # Test with logits. + logits = tf.constant( + [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]] + ) + cce_obj = losses.CategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + cce_obj = losses.CategoricalCrossentropy() + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.3239, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0573, 3) + + def test_scalar_weighted(self): + cce_obj = losses.CategoricalCrossentropy() + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.7449, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.1317, 3) + + def test_sample_weighted(self): + cce_obj = losses.CategoricalCrossentropy() + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + sample_weight = tf.constant([[1.2], [3.4], [5.6]], shape=(3, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 1.0696, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.31829, 3) + + def test_no_reduction(self): + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalCrossentropy( + from_logits=True, reduction=losses_utils.ReductionV2.NONE + ) + loss = cce_obj(y_true, logits) + self.assertAllClose( + (0.001822, 0.000459, 0.169846), self.evaluate(loss), 3 + ) + + def test_label_smoothing(self): + logits = tf.constant([[100.0, -100.0, -100.0]]) + y_true = tf.constant([[1, 0, 0]]) + label_smoothing = 0.1 + # Softmax Cross Entropy Loss: -\sum_i p_i \log q_i + # where for a softmax activation + # \log q_i = x_i - \log \sum_j \exp x_j + # = x_i - x_max - \log \sum_j \exp (x_j - x_max) + # For our activations, [100, -100, -100] + # \log ( exp(0) + exp(-200) + exp(-200) ) = 0 + # so our log softmaxes become: [0, -200, -200] + # Label smoothing: z' = z * (1 - L) + L/n + # 1 = 1 - L + L/n + # 0 = L/n + # Applying the above two fns to the given input: + # -0 * (1 - L + L/n) + 200 * L/n + 200 * L/n = 400 L/n + cce_obj = losses.CategoricalCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + loss = cce_obj(y_true, logits) + expected_value = 400.0 * label_smoothing / 3.0 + self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + + def test_label_smoothing_ndarray(self): + logits = np.asarray([[100.0, -100.0, -100.0]]) + y_true = np.asarray([[1, 0, 0]]) + label_smoothing = 0.1 + # Softmax Cross Entropy Loss: -\sum_i p_i \log q_i + # where for a softmax activation + # \log q_i = x_i - \log \sum_j \exp x_j + # = x_i - x_max - \log \sum_j \exp (x_j - x_max) + # For our activations, [100, -100, -100] + # \log ( exp(0) + exp(-200) + exp(-200) ) = 0 + # so our log softmaxes become: [0, -200, -200] + # Label smoothing: z' = z * (1 - L) + L/n + # 1 = 1 - L + L/n + # 0 = L/n + # Applying the above two fns to the given input: + # -0 * (1 - L + L/n) + 200 * L/n + 200 * L/n = 400 L/n + cce_obj = losses.CategoricalCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + loss = cce_obj(y_true, logits) + expected_value = 400.0 * label_smoothing / 3.0 + self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + + def test_shape_mismatch(self): + y_true = tf.constant([[0], [1], [2]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]] + ) + + cce_obj = losses.CategoricalCrossentropy() + with self.assertRaisesRegex(ValueError, "Shapes .+ are incompatible"): + cce_obj(y_true, y_pred) + + def test_ragged_tensors(self): + cce_obj = losses.CategoricalCrossentropy() + y_true = tf.ragged.constant([[[1, 0, 0], [0, 1, 0]], [[0, 0, 1]]]) + y_pred = tf.ragged.constant( + [[[0.9, 0.05, 0.05], [0.5, 0.89, 0.6]], [[0.05, 0.01, 0.94]]], + dtype=tf.float32, + ) + # batch losses [[0.1054, 0.8047], [0.0619]] + sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + # sum([0.1054, 0.8047, 0.0619]) / 3 + self.assertAlmostEqual(self.evaluate(loss), 0.4341, 3) + + # Test with logits. + logits = tf.ragged.constant( + [[[8.0, 1.0, 1.0], [0.0, 9.0, 1.0]], [[2.0, 3.0, 5.0]]] + ) + cce_obj = losses.CategoricalCrossentropy(from_logits=True) + # batch losses [[0.0018, 0.0004], [0.1698]] + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.1934, 3) + + def test_ragged_tensors_ragged_sample_weights(self): + cce_obj = losses.CategoricalCrossentropy() + y_true = tf.ragged.constant([[[1, 0, 0], [0, 1, 0]], [[0, 0, 1]]]) + y_pred = tf.ragged.constant( + [[[0.9, 0.05, 0.05], [0.05, 0.89, 0.06]], [[0.05, 0.01, 0.94]]], + dtype=tf.float32, + ) + # batch losses [[0.1054, 0.1165], [0.0619]] + # Use independent weights for each batch element + sample_weight = tf.ragged.constant( + [[1.2, 3.4], [5.6]], dtype=tf.float32 + ) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + # sum([0.1054*1.2, 0.1165*3.4, 0.0619*5.6])/3 + self.assertAlmostEqual(self.evaluate(loss), 0.2897, 3) + + # Test with logits. + logits = tf.ragged.constant( + [[[8.0, 1.0, 1.0], [0.0, 9.0, 1.0]], [[2.0, 3.0, 5.0]]] + ) + cce_obj = losses.CategoricalCrossentropy(from_logits=True) + # batch losses [[0.0018, 0.0004], [0.1698]] + # sum([0.0018*1.2, 0.0004*3.4, 0.1698*5.6]) / 3 + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.3181, 3) + + def test_binary_labels(self): + # raise a warning if the shape of y_true and y_pred are all (None, 1). + # categorical_crossentropy shouldn't be used with binary labels. + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + cce_obj = losses.CategoricalCrossentropy() + cce_obj(tf.constant([[1.0], [0.0]]), tf.constant([[1.0], [1.0]])) + self.assertIs(w[-1].category, SyntaxWarning) + self.assertIn( + "In loss categorical_crossentropy, expected ", + str(w[-1].message), + ) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class CategoricalFocalCrossentropyTest(tf.test.TestCase): + def test_config(self): + cce_obj = losses.CategoricalFocalCrossentropy( + name="focal_cce", + reduction=losses_utils.ReductionV2.SUM, + alpha=0.25, + gamma=2.0, + ) + self.assertEqual(cce_obj.name, "focal_cce") + self.assertEqual(cce_obj.reduction, losses_utils.ReductionV2.SUM) + self.assertEqual(cce_obj.alpha, 0.25) + self.assertEqual(cce_obj.gamma, 2.0) + + # Test alpha as a list + cce_obj = losses.CategoricalFocalCrossentropy(alpha=[0.25, 0.5, 0.75]) + self.assertEqual(cce_obj.alpha, [0.25, 0.5, 0.75]) + + def test_all_correct_unweighted(self): + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=tf.int64) + y_pred = tf.constant( + [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + dtype=tf.float32, + ) + cce_obj = losses.CategoricalFocalCrossentropy(alpha=0.25, gamma=2.0) + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + # Test with logits. + logits = tf.constant( + [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]] + ) + cce_obj = losses.CategoricalFocalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + cce_obj = losses.CategoricalFocalCrossentropy() + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.02059, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalFocalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.000345, 3) + + def test_scalar_weighted(self): + cce_obj = losses.CategoricalFocalCrossentropy() + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.047368, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalFocalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.000794, 4) + + def test_sample_weighted(self): + cce_obj = losses.CategoricalFocalCrossentropy() + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + sample_weight = tf.constant([[1.2], [3.4], [5.6]], shape=(3, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.06987, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalFocalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.001933, 3) + + def test_no_reduction(self): + y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.CategoricalFocalCrossentropy( + from_logits=True, reduction=losses_utils.ReductionV2.NONE + ) + loss = cce_obj(y_true, logits) + self.assertAllClose( + (1.5096224e-09, 2.4136547e-11, 1.0360638e-03), + self.evaluate(loss), + 3, + ) + + def test_label_smoothing(self): + logits = tf.constant([[4.9, -0.5, 2.05]]) + y_true = tf.constant([[1, 0, 0]]) + label_smoothing = 0.1 + + cce_obj = losses.CategoricalFocalCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + loss = cce_obj(y_true, logits) + + expected_value = 0.06685 + self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + + def test_label_smoothing_ndarray(self): + logits = np.asarray([[4.9, -0.5, 2.05]]) + y_true = np.asarray([[1, 0, 0]]) + label_smoothing = 0.1 + + cce_obj = losses.CategoricalFocalCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + loss = cce_obj(y_true, logits) + + expected_value = 0.06685 + self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + + def test_shape_mismatch(self): + y_true = tf.constant([[0], [1], [2]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]] + ) + + cce_obj = losses.CategoricalFocalCrossentropy() + with self.assertRaisesRegex(ValueError, "Shapes .+ are incompatible"): + cce_obj(y_true, y_pred) + + def test_ragged_tensors(self): + cce_obj = losses.CategoricalFocalCrossentropy() + y_true = tf.ragged.constant([[[1, 0, 0], [0, 1, 0]], [[0, 0, 1]]]) + y_pred = tf.ragged.constant( + [[[0.9, 0.05, 0.05], [0.5, 0.89, 0.6]], [[0.05, 0.01, 0.94]]], + dtype=tf.float32, + ) + # batch losses [[0.1054, 0.8047], [0.0619]] + sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + + self.assertAlmostEqual(self.evaluate(loss), 0.024754, 3) + + # Test with logits. + logits = tf.ragged.constant( + [[[8.0, 1.0, 1.0], [0.0, 9.0, 1.0]], [[2.0, 3.0, 5.0]]] + ) + cce_obj = losses.CategoricalFocalCrossentropy(from_logits=True) + + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.00117, 3) + + def test_ragged_tensors_ragged_sample_weights(self): + cce_obj = losses.CategoricalFocalCrossentropy() + y_true = tf.ragged.constant([[[1, 0, 0], [0, 1, 0]], [[0, 0, 1]]]) + y_pred = tf.ragged.constant( + [[[0.9, 0.05, 0.05], [0.05, 0.89, 0.06]], [[0.05, 0.01, 0.94]]], + dtype=tf.float32, + ) + sample_weight = tf.ragged.constant( + [[1.2, 3.4], [5.6]], dtype=tf.float32 + ) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.0006088, 4) + + # Test with logits. + logits = tf.ragged.constant( + [[[8.0, 1.0, 1.0], [0.0, 9.0, 1.0]], [[2.0, 3.0, 5.0]]] + ) + cce_obj = losses.CategoricalFocalCrossentropy(from_logits=True) + + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.001933, 3) + + def test_binary_labels(self): + # raise a warning if the shape of y_true and y_pred are all (None, 1). + # categorical_crossentropy shouldn't be used with binary labels. + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + cce_obj = losses.CategoricalFocalCrossentropy() + cce_obj(tf.constant([[1.0], [0.0]]), tf.constant([[1.0], [1.0]])) + self.assertIs(w[-1].category, SyntaxWarning) + self.assertIn( + "In loss categorical_focal_crossentropy, expected ", + str(w[-1].message), + ) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SparseCategoricalCrossentropyTest(tf.test.TestCase): - - def test_config(self): - cce_obj = losses.SparseCategoricalCrossentropy( - reduction=losses_utils.ReductionV2.SUM, name='scc') - self.assertEqual(cce_obj.name, 'scc') - self.assertEqual(cce_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct_unweighted(self): - y_true = tf.constant([[0], [1], [2]], dtype=tf.int64) - y_pred = tf.constant([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], - dtype=tf.float32) - cce_obj = losses.SparseCategoricalCrossentropy() - loss = cce_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - # Test with logits. - logits = tf.constant([[10., 0., 0.], [0., 10., 0.], [0., 0., 10.]]) - cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - cce_obj = losses.SparseCategoricalCrossentropy() - y_true = tf.constant([0, 1, 2]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]], - dtype=tf.float32) - loss = cce_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), .3239, 3) - - # Test with logits. - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits) - self.assertAlmostEqual(self.evaluate(loss), .0573, 3) - - def test_scalar_weighted(self): - cce_obj = losses.SparseCategoricalCrossentropy() - y_true = tf.constant([[0], [1], [2]]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]], - dtype=tf.float32) - loss = cce_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), .7449, 3) - - # Test with logits. - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), .1317, 3) - - def test_sample_weighted(self): - cce_obj = losses.SparseCategoricalCrossentropy() - y_true = tf.constant([[0], [1], [2]]) - y_pred = tf.constant([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]], - dtype=tf.float32) - sample_weight = tf.constant([[1.2], [3.4], [5.6]], shape=(3, 1)) - loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 1.0696, 3) - - # Test with logits. - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) - loss = cce_obj(y_true, logits, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0.31829, 3) - - def test_no_reduction(self): - y_true = tf.constant([[0], [1], [2]]) - logits = tf.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]]) - cce_obj = losses.SparseCategoricalCrossentropy( - from_logits=True, reduction=losses_utils.ReductionV2.NONE) - loss = cce_obj(y_true, logits) - self.assertAllClose((0.001822, 0.000459, 0.169846), self.evaluate(loss), 3) - - def test_non_tensor(self): - # Test case for GitHub issue 33394. - cce_obj = losses.SparseCategoricalCrossentropy() - y_true = [[0], [1], [2]] - y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] - loss = cce_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), .7449, 3) - - def test_ragged_tensors(self): - cce_obj = losses.SparseCategoricalCrossentropy() - y_true = tf.ragged.constant([[0, 1], [2]]) - y_pred = tf.ragged.constant( - [[[.9, .05, .05], [.5, .89, .6]], [[.05, .01, .94]]], dtype=tf.float32) - # batch losses [[0.1054, 0.8047], [0.0619]] - sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) - loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) - # sum([0.1054, 0.8047, 0.0619]) / 3 - self.assertAlmostEqual(self.evaluate(loss), 0.4341, 3) - - # Test with logits. - logits = tf.ragged.constant([[[8., 1., 1.], [0., 9., 1.]], [[2., 3., 5.]]]) - cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) - # batch losses [[0.0018, 0.0004], [0.1698]] - loss = cce_obj(y_true, logits, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0.1934, 3) - - def test_ragged_tensors_rank_1(self): - cce_obj = losses.SparseCategoricalCrossentropy() - y_true = tf.ragged.constant([[0, 1], [2]]) - y_pred = tf.ragged.constant( - [[[.9, .05, .05], [.5, .89, .6]], [[.05, .01, .94]]], - ragged_rank=1, - dtype=tf.float32) - # batch losses [[0.1054, 0.8047], [0.0619]] - sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) - loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) - # sum([0.1054, 0.8047, 0.0619]) / 3 - self.assertAlmostEqual(self.evaluate(loss), 0.4341, 3) - - # Test with logits. - logits = tf.ragged.constant([[[8., 1., 1.], [0., 9., 1.]], [[2., 3., 5.]]], - ragged_rank=1) - cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) - # batch losses [[0.0018, 0.0004], [0.1698]] - loss = cce_obj(y_true, logits, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0.1934, 3) - - def test_ragged_tensors_3d(self): - # shape [2, 1, None] - y_true = tf.ragged.constant([[[1, 1]], [[0]]]) - # shape [2, 1, None, 2] - y_pred = tf.ragged.constant([[[[0.1, 0.9], [0.1, 0.9]]], [[[0.9, 0.1]]]]) - cce_obj = losses.SparseCategoricalCrossentropy() - loss = cce_obj(y_true, y_pred) - self.assertAlmostEqual(self.evaluate(loss), 0.1054, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + cce_obj = losses.SparseCategoricalCrossentropy( + reduction=losses_utils.ReductionV2.SUM, name="scc" + ) + self.assertEqual(cce_obj.name, "scc") + self.assertEqual(cce_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct_unweighted(self): + y_true = tf.constant([[0], [1], [2]], dtype=tf.int64) + y_pred = tf.constant( + [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + dtype=tf.float32, + ) + cce_obj = losses.SparseCategoricalCrossentropy() + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + # Test with logits. + logits = tf.constant( + [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + cce_obj = losses.SparseCategoricalCrossentropy() + y_true = tf.constant([0, 1, 2]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.3239, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0573, 3) + + def test_unweighted_ignore_class(self): + cce_obj = losses.SparseCategoricalCrossentropy(ignore_class=-1) + y_true = tf.constant([0, 1, 2, -1]) + y_pred = tf.constant( + [ + [0.9, 0.05, 0.05], + [0.5, 0.89, 0.6], + [0.05, 0.01, 0.94], + [0.85, 0.14, 0.01], + ], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.3239, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0], [7.8, 2.0, 1.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy( + ignore_class=-1, from_logits=True + ) + loss = cce_obj(y_true, logits) + self.assertAlmostEqual(self.evaluate(loss), 0.0573, 3) + + def test_unweighted_ignore_class_for_segmentation(self): + cce_obj = losses.SparseCategoricalCrossentropy(ignore_class=-1) + y_true = tf.constant( + [[[0, 2], [-1, -1]], [[0, 2], [-1, -1]], [[0, 0], [0, 0]]] + ) + y_pred = tf.constant( + [ + [ + [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]], + [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]], + ], + [ + [[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]], + [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]], + ], + [ + [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0]], + [[0.1, 0.9, 0.0], [0.2, 0.8, 0.0]], + ], + ], + dtype=tf.float32, + ) + + # Expected loss values: + # [[0.0, 0.0], [0.0, 0.0]], + # [[0.0, 0.693148], [0.0, 0.0]], + # [[0.0, 0.0], [2.302585, 1.609438]], + + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.575646375, 3) + + # # Test with logits. + # logits = tf.constant( + # [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + # ) + # cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + # loss = cce_obj(y_true, logits) + # self.assertAlmostEqual(self.evaluate(loss), 0.0573, 3) + + def test_scalar_weighted(self): + cce_obj = losses.SparseCategoricalCrossentropy() + y_true = tf.constant([[0], [1], [2]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + loss = cce_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.7449, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.1317, 3) + + def test_sample_weighted(self): + cce_obj = losses.SparseCategoricalCrossentropy() + y_true = tf.constant([[0], [1], [2]]) + y_pred = tf.constant( + [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]], + dtype=tf.float32, + ) + sample_weight = tf.constant([[1.2], [3.4], [5.6]], shape=(3, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 1.0696, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.31829, 3) + + def test_sample_weighted_ignore_class(self): + cce_obj = losses.SparseCategoricalCrossentropy(ignore_class=-1) + y_true = tf.constant([[0], [1], [2], [-1]]) + y_pred = tf.constant( + [ + [0.9, 0.05, 0.05], + [0.5, 0.89, 0.6], + [0.05, 0.01, 0.94], + [0.85, 0.14, 0.01], + ], + dtype=tf.float32, + ) + sample_weight = tf.constant([[1.2], [3.4], [5.6], [10.4]], shape=(4, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 1.0696, 3) + + # Test with logits. + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0], [7.8, 2.0, 1.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy( + ignore_class=-1, from_logits=True + ) + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.31829, 3) + + def test_no_reduction(self): + y_true = tf.constant([[0], [1], [2]]) + logits = tf.constant( + [[8.0, 1.0, 1.0], [0.0, 9.0, 1.0], [2.0, 3.0, 5.0]] + ) + cce_obj = losses.SparseCategoricalCrossentropy( + from_logits=True, reduction=losses_utils.ReductionV2.NONE + ) + loss = cce_obj(y_true, logits) + self.assertAllClose( + (0.001822, 0.000459, 0.169846), self.evaluate(loss), 3 + ) + + def test_non_tensor(self): + # Test case for GitHub issue 33394. + cce_obj = losses.SparseCategoricalCrossentropy() + y_true = [[0], [1], [2]] + y_pred = [[0.9, 0.05, 0.05], [0.5, 0.89, 0.6], [0.05, 0.01, 0.94]] + loss = cce_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 0.7449, 3) + + def test_ragged_tensors(self): + cce_obj = losses.SparseCategoricalCrossentropy() + y_true = tf.ragged.constant([[0, 1], [2]]) + y_pred = tf.ragged.constant( + [[[0.9, 0.05, 0.05], [0.5, 0.89, 0.6]], [[0.05, 0.01, 0.94]]], + dtype=tf.float32, + ) + # batch losses [[0.1054, 0.8047], [0.0619]] + sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + # sum([0.1054, 0.8047, 0.0619]) / 3 + self.assertAlmostEqual(self.evaluate(loss), 0.4341, 3) + + # Test with logits. + logits = tf.ragged.constant( + [[[8.0, 1.0, 1.0], [0.0, 9.0, 1.0]], [[2.0, 3.0, 5.0]]] + ) + cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + # batch losses [[0.0018, 0.0004], [0.1698]] + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.1934, 3) + + def test_ragged_tensors_rank_1(self): + cce_obj = losses.SparseCategoricalCrossentropy() + y_true = tf.ragged.constant([[0, 1], [2]]) + y_pred = tf.ragged.constant( + [[[0.9, 0.05, 0.05], [0.5, 0.89, 0.6]], [[0.05, 0.01, 0.94]]], + ragged_rank=1, + dtype=tf.float32, + ) + # batch losses [[0.1054, 0.8047], [0.0619]] + sample_weight = tf.constant([[1.2], [3.4]], shape=(2, 1)) + loss = cce_obj(y_true, y_pred, sample_weight=sample_weight) + # sum([0.1054, 0.8047, 0.0619]) / 3 + self.assertAlmostEqual(self.evaluate(loss), 0.4341, 3) + + # Test with logits. + logits = tf.ragged.constant( + [[[8.0, 1.0, 1.0], [0.0, 9.0, 1.0]], [[2.0, 3.0, 5.0]]], + ragged_rank=1, + ) + cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True) + # batch losses [[0.0018, 0.0004], [0.1698]] + loss = cce_obj(y_true, logits, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.1934, 3) + + def test_ragged_tensors_3d(self): + # shape [2, 1, None] + y_true = tf.ragged.constant([[[1, 1]], [[0]]]) + # shape [2, 1, None, 2] + y_pred = tf.ragged.constant( + [[[[0.1, 0.9], [0.1, 0.9]]], [[[0.9, 0.1]]]] + ) + cce_obj = losses.SparseCategoricalCrossentropy() + loss = cce_obj(y_true, y_pred) + self.assertAlmostEqual(self.evaluate(loss), 0.1054, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class HingeTest(tf.test.TestCase): - - def test_config(self): - hinge_obj = losses.Hinge( - reduction=losses_utils.ReductionV2.SUM, name='hinge_loss') - self.assertEqual(hinge_obj.name, 'hinge_loss') - self.assertEqual(hinge_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - hinge_obj = losses.Hinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # loss = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] - # = [0.6, 0.4125] - # reduced loss = (0.6 + 0.4125) / 2 - - loss = hinge_obj(y_true, y_pred) - self.assertAllClose(0.506, self.evaluate(loss), atol=1e-3) - - def test_scalar_weighted(self): - hinge_obj = losses.Hinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # loss = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] - # = [0.6, 0.4125] - # weighted_loss = [0.6 * 2.3, 0.4125 * 2.3] - # reduced loss = (0.6 + 0.4125) * 2.3 / 2 - - loss = hinge_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 1.164, 3) - - # Verify we get the same output when the same input is given - loss_2 = hinge_obj(y_true, y_pred, sample_weight=2.3) - self.assertAllClose(self.evaluate(loss), self.evaluate(loss_2), 1e-3) - - def test_sample_weighted(self): - hinge_obj = losses.Hinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # loss = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] - # = [0.6, 0.4125] - # weighted loss = [0.6 * 1.2, 0.4125 * 3.4] - # reduced loss = (0.6 * 1.2 + 0.4125 * 3.4) / 2 - - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(loss), 1.061, 1e-3) - - def test_timestep_weighted(self): - hinge_obj = losses.Hinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]], shape=(2, 4, 1)) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]], - shape=(2, 4, 1)) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2, 1, 3], shape=(2, 4)) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[[-1], [1], [-1], [1]], [[-1], [-1], [1], [1]]] - # y_true * y_pred = [[[0.3], [0.2], [0.1], [1.6]], - # [[0.25], [1], [0.5], [0.6]]] - # 1 - y_true * y_pred = [[[0.7], [0.8], [0.9], [-0.6]], - # [[0.75], [0], [0.5], [0.4]]] - # loss = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] - # weighted loss = [[2.1, 4.8, 4.5, 0], [3, 0, 0.5, 1.2]] - # reduced loss = (2.1 + 4.8 + 4.5 + 0 + 3 + 0 + 0.5 + 1.2) / 8 - - loss = hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(loss), 2.012, 1e-3) - - def test_zero_weighted(self): - hinge_obj = losses.Hinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - loss = hinge_obj(y_true, y_pred, sample_weight=0) - self.assertAllClose(self.evaluate(loss), 0., 1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + hinge_obj = losses.Hinge( + reduction=losses_utils.ReductionV2.SUM, name="hinge_loss" + ) + self.assertEqual(hinge_obj.name, "hinge_loss") + self.assertEqual(hinge_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + hinge_obj = losses.Hinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # loss = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] + # = [0.6, 0.4125] + # reduced loss = (0.6 + 0.4125) / 2 + + loss = hinge_obj(y_true, y_pred) + self.assertAllClose(0.506, self.evaluate(loss), atol=1e-3) + + def test_scalar_weighted(self): + hinge_obj = losses.Hinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # loss = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] + # = [0.6, 0.4125] + # weighted_loss = [0.6 * 2.3, 0.4125 * 2.3] + # reduced loss = (0.6 + 0.4125) * 2.3 / 2 + + loss = hinge_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 1.164, 3) + + # Verify we get the same output when the same input is given + loss_2 = hinge_obj(y_true, y_pred, sample_weight=2.3) + self.assertAllClose(self.evaluate(loss), self.evaluate(loss_2), 1e-3) + + def test_sample_weighted(self): + hinge_obj = losses.Hinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # loss = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] + # = [0.6, 0.4125] + # weighted loss = [0.6 * 1.2, 0.4125 * 3.4] + # reduced loss = (0.6 * 1.2 + 0.4125 * 3.4) / 2 + + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(loss), 1.061, 1e-3) + + def test_timestep_weighted(self): + hinge_obj = losses.Hinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]], shape=(2, 4, 1)) + y_pred = tf.constant( + [[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]], shape=(2, 4, 1) + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2, 1, 3], shape=(2, 4)) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[[-1], [1], [-1], [1]], [[-1], [-1], [1], [1]]] + # y_true * y_pred = [[[0.3], [0.2], [0.1], [1.6]], + # [[0.25], [1], [0.5], [0.6]]] + # 1 - y_true * y_pred = [[[0.7], [0.8], [0.9], [-0.6]], + # [[0.75], [0], [0.5], [0.4]]] + # loss = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] + # weighted loss = [[2.1, 4.8, 4.5, 0], [3, 0, 0.5, 1.2]] + # reduced loss = (2.1 + 4.8 + 4.5 + 0 + 3 + 0 + 0.5 + 1.2) / 8 + + loss = hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(loss), 2.012, 1e-3) + + def test_zero_weighted(self): + hinge_obj = losses.Hinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + loss = hinge_obj(y_true, y_pred, sample_weight=0) + self.assertAllClose(self.evaluate(loss), 0.0, 1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SquaredHingeTest(tf.test.TestCase): - - def test_config(self): - sq_hinge_obj = losses.SquaredHinge( - reduction=losses_utils.ReductionV2.SUM, name='sq_hinge_loss') - self.assertEqual(sq_hinge_obj.name, 'sq_hinge_loss') - self.assertEqual(sq_hinge_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - sq_hinge_obj = losses.SquaredHinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] - # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], - # [0.5625, 0, 0.25, 0.16]] - # loss = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] - # = [0.485, 0.2431] - # reduced loss = (0.485 + 0.2431) / 2 - - loss = sq_hinge_obj(y_true, y_pred) - self.assertAllClose(self.evaluate(loss), 0.364, 1e-3) - - def test_scalar_weighted(self): - sq_hinge_obj = losses.SquaredHinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] - # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], - # [0.5625, 0, 0.25, 0.16]] - # loss = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] - # = [0.485, 0.2431] - # weighted loss = [0.485 * 2.3, 0.2431 * 2.3] - # reduced loss = (0.485 + 0.2431) * 2.3 / 2 - - loss = sq_hinge_obj(y_true, y_pred, sample_weight=2.3) - self.assertAllClose(self.evaluate(loss), 0.837, 1e-3) - - # Verify we get the same output when the same input is given - loss_2 = sq_hinge_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) - - def test_sample_weighted(self): - sq_hinge_obj = losses.SquaredHinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] - # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], - # [0.5625, 0, 0.25, 0.16]] - # loss = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] - # = [0.485, 0.2431] - # weighted loss = [0.485 * 1.2, 0.2431 * 3.4] - # reduced loss = (0.485 * 1.2 + 0.2431 * 3.4) / 2 - - sample_weight = tf.constant([1.2, 3.4]) - loss = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(loss), 0.704, 1e-3) - - def test_timestep_weighted(self): - sq_hinge_obj = losses.SquaredHinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]], shape=(2, 4, 1)) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]], - shape=(2, 4, 1)) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2, 1, 3], shape=(2, 4)) - - # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[[-1], [1], [-1], [1]], [[-1], [-1], [1], [1]]] - # y_true * y_pred = [[[0.3], [0.2], [0.1], [1.6]], - # [[0.25], [1], [0.5], [0.6]]] - # 1 - y_true * y_pred = [[[0.7], [0.8], [0.9], [-0.6]], - # [[0.75], [0], [0.5], [0.4]]] - # loss = [[0.49, 0.64, 0.81, 0], [0.5625, 0, 0.25, 0.16]] - # weighted loss = [[1.47, 3.84, 4.05, 0], [2.25, 0, 0.25, 0.48]] - # reduced loss = (1.47 + 3.84 + 4.05 + 0 + 2.25 + 0 + 0.25 + 0.48) / 8 - - loss = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(loss), 1.542, 1e-3) - - def test_zero_weighted(self): - sq_hinge_obj = losses.SquaredHinge() - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1., 0.5, 0.6]]) - loss = sq_hinge_obj(y_true, y_pred, sample_weight=0) - self.assertAllClose(self.evaluate(loss), 0., 1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + sq_hinge_obj = losses.SquaredHinge( + reduction=losses_utils.ReductionV2.SUM, name="sq_hinge_loss" + ) + self.assertEqual(sq_hinge_obj.name, "sq_hinge_loss") + self.assertEqual(sq_hinge_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + sq_hinge_obj = losses.SquaredHinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, + # 0.4]] + # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], + # [0.5625, 0, 0.25, 0.16]] + # loss = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] + # = [0.485, 0.2431] + # reduced loss = (0.485 + 0.2431) / 2 + + loss = sq_hinge_obj(y_true, y_pred) + self.assertAllClose(self.evaluate(loss), 0.364, 1e-3) + + def test_scalar_weighted(self): + sq_hinge_obj = losses.SquaredHinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, + # 0.4]] + # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], + # [0.5625, 0, 0.25, 0.16]] + # loss = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] + # = [0.485, 0.2431] + # weighted loss = [0.485 * 2.3, 0.2431 * 2.3] + # reduced loss = (0.485 + 0.2431) * 2.3 / 2 + + loss = sq_hinge_obj(y_true, y_pred, sample_weight=2.3) + self.assertAllClose(self.evaluate(loss), 0.837, 1e-3) + + # Verify we get the same output when the same input is given + loss_2 = sq_hinge_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) + + def test_sample_weighted(self): + sq_hinge_obj = losses.SquaredHinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, + # 0.4]] + # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], + # [0.5625, 0, 0.25, 0.16]] + # loss = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] + # = [0.485, 0.2431] + # weighted loss = [0.485 * 1.2, 0.2431 * 3.4] + # reduced loss = (0.485 * 1.2 + 0.2431 * 3.4) / 2 + + sample_weight = tf.constant([1.2, 3.4]) + loss = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(loss), 0.704, 1e-3) + + def test_timestep_weighted(self): + sq_hinge_obj = losses.SquaredHinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]], shape=(2, 4, 1)) + y_pred = tf.constant( + [[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]], shape=(2, 4, 1) + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2, 1, 3], shape=(2, 4)) + + # loss = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[[-1], [1], [-1], [1]], [[-1], [-1], [1], [1]]] + # y_true * y_pred = [[[0.3], [0.2], [0.1], [1.6]], + # [[0.25], [1], [0.5], [0.6]]] + # 1 - y_true * y_pred = [[[0.7], [0.8], [0.9], [-0.6]], + # [[0.75], [0], [0.5], [0.4]]] + # loss = [[0.49, 0.64, 0.81, 0], [0.5625, 0, 0.25, 0.16]] + # weighted loss = [[1.47, 3.84, 4.05, 0], [2.25, 0, 0.25, 0.48]] + # reduced loss = (1.47 + 3.84 + 4.05 + 0 + 2.25 + 0 + 0.25 + 0.48) / 8 + + loss = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(loss), 1.542, 1e-3) + + def test_zero_weighted(self): + sq_hinge_obj = losses.SquaredHinge() + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + loss = sq_hinge_obj(y_true, y_pred, sample_weight=0) + self.assertAllClose(self.evaluate(loss), 0.0, 1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CategoricalHingeTest(tf.test.TestCase): - - def test_config(self): - cat_hinge_obj = losses.CategoricalHinge( - reduction=losses_utils.ReductionV2.SUM, name='cat_hinge_loss') - self.assertEqual(cat_hinge_obj.name, 'cat_hinge_loss') - self.assertEqual(cat_hinge_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - cat_hinge_obj = losses.CategoricalHinge() - y_true = tf.constant([1, 9, 2, -5], shape=(2, 2)) - y_pred = tf.constant([4, 8, 12, 8], shape=(2, 2), dtype=tf.float32) - loss = cat_hinge_obj(y_true, y_pred) - - # pos = reduce_sum(y_true * y_pred) = [1*4+8*9, 12*2+8*-5] = [76, -16] - # neg = reduce_max((1. - y_true) * y_pred) = [[0, -64], [-12, 48]] = [0, 48] - # cat_hinge = max(0., neg - pos + 1.) = [0, 65] - # reduced_loss = (0 + 65)/2 = 32.5 - self.assertAlmostEqual(self.evaluate(loss), 32.5, 3) - - def test_scalar_weighted(self): - cat_hinge_obj = losses.CategoricalHinge() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = cat_hinge_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), 83.95, 3) - - # Verify we get the same output when the same input is given - loss_2 = cat_hinge_obj(y_true, y_pred, sample_weight=2.3) - self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) - - def test_sample_weighted(self): - cat_hinge_obj = losses.CategoricalHinge() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 124.1, 3) - - def test_timestep_weighted(self): - cat_hinge_obj = losses.CategoricalHinge() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32) - sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) - loss = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 4.0, 3) - - def test_zero_weighted(self): - cat_hinge_obj = losses.CategoricalHinge() - y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - y_pred = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32) - loss = cat_hinge_obj(y_true, y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0., 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def test_config(self): + cat_hinge_obj = losses.CategoricalHinge( + reduction=losses_utils.ReductionV2.SUM, name="cat_hinge_loss" + ) + self.assertEqual(cat_hinge_obj.name, "cat_hinge_loss") + self.assertEqual(cat_hinge_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + cat_hinge_obj = losses.CategoricalHinge() + y_true = tf.constant([1, 9, 2, -5], shape=(2, 2)) + y_pred = tf.constant([4, 8, 12, 8], shape=(2, 2), dtype=tf.float32) + loss = cat_hinge_obj(y_true, y_pred) + + # pos = reduce_sum(y_true * y_pred) = [1*4+8*9, 12*2+8*-5] = [76, -16] + # neg = reduce_max((1. - y_true) * y_pred) = [[0, -64], [-12, 48]] = [0, + # 48] + # cat_hinge = max(0., neg - pos + 1.) = [0, 65] + # reduced_loss = (0 + 65)/2 = 32.5 + self.assertAlmostEqual(self.evaluate(loss), 32.5, 3) + + def test_scalar_weighted(self): + cat_hinge_obj = losses.CategoricalHinge() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = cat_hinge_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), 83.95, 3) + + # Verify we get the same output when the same input is given + loss_2 = cat_hinge_obj(y_true, y_pred, sample_weight=2.3) + self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) + + def test_sample_weighted(self): + cat_hinge_obj = losses.CategoricalHinge() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 124.1, 3) + + def test_timestep_weighted(self): + cat_hinge_obj = losses.CategoricalHinge() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3, 1), dtype=tf.float32 + ) + sample_weight = tf.constant([3, 6, 5, 0, 4, 2], shape=(2, 3)) + loss = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 4.0, 3) + + def test_zero_weighted(self): + cat_hinge_obj = losses.CategoricalHinge() + y_true = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) + y_pred = tf.constant( + [4, 8, 12, 8, 1, 3], shape=(2, 3), dtype=tf.float32 + ) + loss = cat_hinge_obj(y_true, y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LogCoshTest(tf.test.TestCase): - - def setup(self): - y_pred = np.asarray([1, 9, 2, -5, -2, 6]).reshape((2, 3)) - y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) - - self.batch_size = 6 - error = y_pred - y_true - self.expected_losses = np.log((np.exp(error) + np.exp(-error)) / 2) - - self.y_pred = tf.constant(y_pred, dtype=tf.float32) - self.y_true = tf.constant(y_true) - - def test_config(self): - logcosh_obj = losses.LogCosh( - reduction=losses_utils.ReductionV2.SUM, name='logcosh_loss') - self.assertEqual(logcosh_obj.name, 'logcosh_loss') - self.assertEqual(logcosh_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - self.setup() - logcosh_obj = losses.LogCosh() - - loss = logcosh_obj(self.y_true, self.y_pred) - expected_loss = np.sum(self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_scalar_weighted(self): - self.setup() - logcosh_obj = losses.LogCosh() - sample_weight = 2.3 - - loss = logcosh_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - expected_loss = sample_weight * np.sum( - self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - # Verify we get the same output when the same input is given - loss_2 = logcosh_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) - - def test_sample_weighted(self): - self.setup() - logcosh_obj = losses.LogCosh() - - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = logcosh_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - - expected_loss = np.multiply( - self.expected_losses, - np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))) - expected_loss = np.sum(expected_loss) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_timestep_weighted(self): - self.setup() - logcosh_obj = losses.LogCosh() - y_true = np.asarray([1, 9, 2, -5, -2, 6]).reshape(2, 3, 1) - y_pred = np.asarray([4, 8, 12, 8, 1, 3]).reshape(2, 3, 1) - error = y_pred - y_true - expected_losses = np.log((np.exp(error) + np.exp(-error)) / 2) - sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3, 1)) - - y_pred = tf.constant(y_pred, dtype=tf.float32) - y_true = tf.constant(y_true) - loss = logcosh_obj( - y_true, y_pred, sample_weight=tf.constant(sample_weight, shape=(2, 3))) - expected_loss = np.sum(expected_losses * sample_weight) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_zero_weighted(self): - self.setup() - logcosh_obj = losses.LogCosh() - sample_weight = 0 - loss = logcosh_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0., 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def setup(self): + y_pred = np.asarray([1, 9, 2, -5, -2, 6]).reshape((2, 3)) + y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) + + self.batch_size = 6 + error = y_pred - y_true + self.expected_losses = np.log((np.exp(error) + np.exp(-error)) / 2) + + self.y_pred = tf.constant(y_pred, dtype=tf.float32) + self.y_true = tf.constant(y_true) + + def test_config(self): + logcosh_obj = losses.LogCosh( + reduction=losses_utils.ReductionV2.SUM, name="logcosh_loss" + ) + self.assertEqual(logcosh_obj.name, "logcosh_loss") + self.assertEqual(logcosh_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + self.setup() + logcosh_obj = losses.LogCosh() + + loss = logcosh_obj(self.y_true, self.y_pred) + expected_loss = np.sum(self.expected_losses) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_scalar_weighted(self): + self.setup() + logcosh_obj = losses.LogCosh() + sample_weight = 2.3 + + loss = logcosh_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + expected_loss = ( + sample_weight * np.sum(self.expected_losses) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + # Verify we get the same output when the same input is given + loss_2 = logcosh_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) + + def test_sample_weighted(self): + self.setup() + logcosh_obj = losses.LogCosh() + + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = logcosh_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + + expected_loss = np.multiply( + self.expected_losses, + np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)), + ) + expected_loss = np.sum(expected_loss) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_timestep_weighted(self): + self.setup() + logcosh_obj = losses.LogCosh() + y_true = np.asarray([1, 9, 2, -5, -2, 6]).reshape(2, 3, 1) + y_pred = np.asarray([4, 8, 12, 8, 1, 3]).reshape(2, 3, 1) + error = y_pred - y_true + expected_losses = np.log((np.exp(error) + np.exp(-error)) / 2) + sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3, 1)) + + y_pred = tf.constant(y_pred, dtype=tf.float32) + y_true = tf.constant(y_true) + loss = logcosh_obj( + y_true, + y_pred, + sample_weight=tf.constant(sample_weight, shape=(2, 3)), + ) + expected_loss = ( + np.sum(expected_losses * sample_weight) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_zero_weighted(self): + self.setup() + logcosh_obj = losses.LogCosh() + sample_weight = 0 + loss = logcosh_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class PoissonTest(tf.test.TestCase): - - def setup(self): - self.np_y_pred = np.asarray([1, 9, 2, 5, 2, 6]).reshape((2, 3)) - self.np_y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) - - self.batch_size = 6 - self.expected_losses = self.np_y_pred - np.multiply(self.np_y_true, - np.log(self.np_y_pred)) - - self.y_pred = tf.constant(self.np_y_pred, dtype=tf.float32) - self.y_true = tf.constant(self.np_y_true) - - def test_config(self): - poisson_obj = losses.Poisson( - reduction=losses_utils.ReductionV2.SUM, name='poisson') - self.assertEqual(poisson_obj.name, 'poisson') - self.assertEqual(poisson_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - self.setup() - poisson_obj = losses.Poisson() - - loss = poisson_obj(self.y_true, self.y_pred) - expected_loss = np.sum(self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_scalar_weighted(self): - self.setup() - poisson_obj = losses.Poisson() - sample_weight = 2.3 - loss = poisson_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - - expected_loss = sample_weight * np.sum( - self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - # Verify we get the same output when the same input is given - loss_2 = poisson_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) - - def test_sample_weighted(self): - self.setup() - poisson_obj = losses.Poisson() - - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = poisson_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - - expected_loss = np.multiply( - self.expected_losses, - np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))) - expected_loss = np.sum(expected_loss) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_timestep_weighted(self): - self.setup() - poisson_obj = losses.Poisson() - y_true = self.np_y_true.reshape(2, 3, 1) - y_pred = self.np_y_pred.reshape(2, 3, 1) - sample_weight = np.asarray([3, 6, 5, 0, 4, 2]).reshape(2, 3, 1) - expected_losses = y_pred - np.multiply(y_true, np.log(y_pred)) - - y_pred = tf.constant(y_pred, dtype=tf.float32) - y_true = tf.constant(y_true) - - loss = poisson_obj( - y_true, y_pred, sample_weight=tf.constant(sample_weight, shape=(2, 3))) - expected_loss = np.sum(expected_losses * sample_weight) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_zero_weighted(self): - self.setup() - poisson_obj = losses.Poisson() - loss = poisson_obj(self.y_true, self.y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0., 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def setup(self): + self.np_y_pred = np.asarray([1, 9, 2, 5, 2, 6]).reshape((2, 3)) + self.np_y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) + + self.batch_size = 6 + self.expected_losses = self.np_y_pred - np.multiply( + self.np_y_true, np.log(self.np_y_pred) + ) + + self.y_pred = tf.constant(self.np_y_pred, dtype=tf.float32) + self.y_true = tf.constant(self.np_y_true) + + def test_config(self): + poisson_obj = losses.Poisson( + reduction=losses_utils.ReductionV2.SUM, name="poisson" + ) + self.assertEqual(poisson_obj.name, "poisson") + self.assertEqual(poisson_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + self.setup() + poisson_obj = losses.Poisson() + + loss = poisson_obj(self.y_true, self.y_pred) + expected_loss = np.sum(self.expected_losses) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_scalar_weighted(self): + self.setup() + poisson_obj = losses.Poisson() + sample_weight = 2.3 + loss = poisson_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + + expected_loss = ( + sample_weight * np.sum(self.expected_losses) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + # Verify we get the same output when the same input is given + loss_2 = poisson_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) + + def test_sample_weighted(self): + self.setup() + poisson_obj = losses.Poisson() + + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = poisson_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + + expected_loss = np.multiply( + self.expected_losses, + np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)), + ) + expected_loss = np.sum(expected_loss) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_timestep_weighted(self): + self.setup() + poisson_obj = losses.Poisson() + y_true = self.np_y_true.reshape(2, 3, 1) + y_pred = self.np_y_pred.reshape(2, 3, 1) + sample_weight = np.asarray([3, 6, 5, 0, 4, 2]).reshape(2, 3, 1) + expected_losses = y_pred - np.multiply(y_true, np.log(y_pred)) + + y_pred = tf.constant(y_pred, dtype=tf.float32) + y_true = tf.constant(y_true) + + loss = poisson_obj( + y_true, + y_pred, + sample_weight=tf.constant(sample_weight, shape=(2, 3)), + ) + expected_loss = ( + np.sum(expected_losses * sample_weight) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_zero_weighted(self): + self.setup() + poisson_obj = losses.Poisson() + loss = poisson_obj(self.y_true, self.y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class KLDivergenceTest(tf.test.TestCase): - - def setup(self): - self.np_y_pred = np.asarray([.4, .9, .12, .36, .3, .4]).reshape((2, 3)) - self.np_y_true = np.asarray([.5, .8, .12, .7, .43, .8]).reshape((2, 3)) - - self.batch_size = 2 - self.expected_losses = np.multiply(self.np_y_true, - np.log(self.np_y_true / self.np_y_pred)) - - self.y_pred = tf.constant(self.np_y_pred, dtype=tf.float32) - self.y_true = tf.constant(self.np_y_true) - - def test_config(self): - k_obj = losses.KLDivergence( - reduction=losses_utils.ReductionV2.SUM, name='kld') - self.assertEqual(k_obj.name, 'kld') - self.assertEqual(k_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_unweighted(self): - self.setup() - k_obj = losses.KLDivergence() - - loss = k_obj(self.y_true, self.y_pred) - expected_loss = np.sum(self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_scalar_weighted(self): - self.setup() - k_obj = losses.KLDivergence() - sample_weight = 2.3 - - loss = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - expected_loss = sample_weight * np.sum( - self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - # Verify we get the same output when the same input is given - loss_2 = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) - - def test_sample_weighted(self): - self.setup() - k_obj = losses.KLDivergence() - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - loss = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - - expected_loss = np.multiply( - self.expected_losses, - np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape(2, 3)) - expected_loss = np.sum(expected_loss) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_timestep_weighted(self): - self.setup() - k_obj = losses.KLDivergence() - y_true = self.np_y_true.reshape(2, 3, 1) - y_pred = self.np_y_pred.reshape(2, 3, 1) - sample_weight = np.asarray([3, 6, 5, 0, 4, 2]).reshape(2, 3) - expected_losses = np.sum( - np.multiply(y_true, np.log(y_true / y_pred)), axis=-1) - - y_pred = tf.constant(y_pred, dtype=tf.float32) - y_true = tf.constant(y_true) - loss = k_obj(y_true, y_pred, sample_weight=tf.constant(sample_weight)) - - num_timesteps = 3 - expected_loss = np.sum(expected_losses * sample_weight) / ( - self.batch_size * num_timesteps) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_zero_weighted(self): - self.setup() - k_obj = losses.KLDivergence() - loss = k_obj(self.y_true, self.y_pred, sample_weight=0) - self.assertAlmostEqual(self.evaluate(loss), 0., 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + def setup(self): + self.np_y_pred = np.asarray([0.4, 0.9, 0.12, 0.36, 0.3, 0.4]).reshape( + (2, 3) + ) + self.np_y_true = np.asarray([0.5, 0.8, 0.12, 0.7, 0.43, 0.8]).reshape( + (2, 3) + ) + + self.batch_size = 2 + self.expected_losses = np.multiply( + self.np_y_true, np.log(self.np_y_true / self.np_y_pred) + ) + + self.y_pred = tf.constant(self.np_y_pred, dtype=tf.float32) + self.y_true = tf.constant(self.np_y_true) + + def test_config(self): + k_obj = losses.KLDivergence( + reduction=losses_utils.ReductionV2.SUM, name="kld" + ) + self.assertEqual(k_obj.name, "kld") + self.assertEqual(k_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_unweighted(self): + self.setup() + k_obj = losses.KLDivergence() + + loss = k_obj(self.y_true, self.y_pred) + expected_loss = np.sum(self.expected_losses) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_scalar_weighted(self): + self.setup() + k_obj = losses.KLDivergence() + sample_weight = 2.3 + + loss = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + expected_loss = ( + sample_weight * np.sum(self.expected_losses) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + # Verify we get the same output when the same input is given + loss_2 = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) + + def test_sample_weighted(self): + self.setup() + k_obj = losses.KLDivergence() + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + loss = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + + expected_loss = np.multiply( + self.expected_losses, + np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape(2, 3), + ) + expected_loss = np.sum(expected_loss) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_timestep_weighted(self): + self.setup() + k_obj = losses.KLDivergence() + y_true = self.np_y_true.reshape(2, 3, 1) + y_pred = self.np_y_pred.reshape(2, 3, 1) + sample_weight = np.asarray([3, 6, 5, 0, 4, 2]).reshape(2, 3) + expected_losses = np.sum( + np.multiply(y_true, np.log(y_true / y_pred)), axis=-1 + ) + + y_pred = tf.constant(y_pred, dtype=tf.float32) + y_true = tf.constant(y_true) + loss = k_obj(y_true, y_pred, sample_weight=tf.constant(sample_weight)) + + num_timesteps = 3 + expected_loss = np.sum(expected_losses * sample_weight) / ( + self.batch_size * num_timesteps + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_zero_weighted(self): + self.setup() + k_obj = losses.KLDivergence() + loss = k_obj(self.y_true, self.y_pred, sample_weight=0) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class HuberLossTest(tf.test.TestCase): - - def huber_loss(self, y_true, y_pred, delta=1.0): - error = y_pred - y_true - abs_error = np.abs(error) - - quadratic = np.minimum(abs_error, delta) - linear = np.subtract(abs_error, quadratic) - return np.add( - np.multiply(0.5, np.multiply(quadratic, quadratic)), - np.multiply(delta, linear)) - - def setup(self, delta=1.0): - self.np_y_pred = np.asarray([.9, .2, .2, .8, .4, .6]).reshape((2, 3)) - self.np_y_true = np.asarray([1., 0., 1., 1., 0., 0.]).reshape((2, 3)) - - self.batch_size = 6 - self.expected_losses = self.huber_loss(self.np_y_true, self.np_y_pred, - delta) - - self.y_pred = tf.constant(self.np_y_pred) - self.y_true = tf.constant(self.np_y_true) - - def test_config(self): - h_obj = losses.Huber(reduction=losses_utils.ReductionV2.SUM, name='huber') - self.assertEqual(h_obj.name, 'huber') - self.assertEqual(h_obj.reduction, losses_utils.ReductionV2.SUM) - - def test_all_correct(self): - self.setup() - h_obj = losses.Huber() - loss = h_obj(self.y_true, self.y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - - def test_unweighted(self): - self.setup() - h_obj = losses.Huber() - loss = h_obj(self.y_true, self.y_pred) - actual_loss = np.sum(self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) - - def test_scalar_weighted(self): - self.setup() - h_obj = losses.Huber() - sample_weight = 2.3 - loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - actual_loss = sample_weight * np.sum(self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) - - # Verify we get the same output when the same input is given - loss_2 = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) - - def test_sample_weighted(self): - self.setup() - h_obj = losses.Huber() - sample_weight = tf.constant((1.2, 3.4), shape=(2, 1)) - - loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - actual_loss = np.multiply( - self.expected_losses, - np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))) - actual_loss = np.sum(actual_loss) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) - - def test_timestep_weighted(self): - self.setup() - h_obj = losses.Huber() - y_pred = self.np_y_pred.reshape((2, 3, 1)) - y_true = self.np_y_true.reshape((2, 3, 1)) - expected_losses = self.huber_loss(y_true, y_pred) - - y_pred = tf.constant(y_pred) - y_true = tf.constant(y_true) - sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3, 1)) - loss = h_obj( - y_true, y_pred, sample_weight=tf.constant(sample_weight, shape=(2, 3))) - actual_loss = np.multiply(expected_losses, sample_weight) - actual_loss = np.sum(actual_loss) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) - - def test_zero_weighted(self): - self.setup() - h_obj = losses.Huber() - sample_weight = 0 - loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - self.assertAlmostEqual(self.evaluate(loss), 0., 3) - - def test_non_default_delta(self): - self.setup(delta=0.8) - h_obj = losses.Huber(delta=0.8) - sample_weight = 2.3 - loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - actual_loss = sample_weight * np.sum(self.expected_losses) / self.batch_size - self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) - - def test_loss_with_non_default_dtype(self): - # Test case for GitHub issue: - # https://github.com/tensorflow/tensorflow/issues/39004 - self.setup() - h_obj = losses.Huber() - try: - backend.set_floatx('float64') - loss = h_obj(self.y_true, self.y_true) - self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) - finally: - backend.set_floatx('float32') + def huber_loss(self, y_true, y_pred, delta=1.0): + error = y_pred - y_true + abs_error = np.abs(error) + + quadratic = np.minimum(abs_error, delta) + linear = np.subtract(abs_error, quadratic) + return np.add( + np.multiply(0.5, np.multiply(quadratic, quadratic)), + np.multiply(delta, linear), + ) + + def setup(self, delta=1.0): + self.np_y_pred = np.asarray([0.9, 0.2, 0.2, 0.8, 0.4, 0.6]).reshape( + (2, 3) + ) + self.np_y_true = np.asarray([1.0, 0.0, 1.0, 1.0, 0.0, 0.0]).reshape( + (2, 3) + ) + + self.batch_size = 6 + self.expected_losses = self.huber_loss( + self.np_y_true, self.np_y_pred, delta + ) + + self.y_pred = tf.constant(self.np_y_pred) + self.y_true = tf.constant(self.np_y_true) + + def test_config(self): + h_obj = losses.Huber( + reduction=losses_utils.ReductionV2.SUM, name="huber" + ) + self.assertEqual(h_obj.name, "huber") + self.assertEqual(h_obj.reduction, losses_utils.ReductionV2.SUM) + + def test_all_correct(self): + self.setup() + h_obj = losses.Huber() + loss = h_obj(self.y_true, self.y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_unweighted(self): + self.setup() + h_obj = losses.Huber() + loss = h_obj(self.y_true, self.y_pred) + actual_loss = np.sum(self.expected_losses) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) + + def test_scalar_weighted(self): + self.setup() + h_obj = losses.Huber() + sample_weight = 2.3 + loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + actual_loss = ( + sample_weight * np.sum(self.expected_losses) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) + + # Verify we get the same output when the same input is given + loss_2 = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), self.evaluate(loss_2), 3) + + def test_sample_weighted(self): + self.setup() + h_obj = losses.Huber() + sample_weight = tf.constant((1.2, 3.4), shape=(2, 1)) + + loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + actual_loss = np.multiply( + self.expected_losses, + np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)), + ) + actual_loss = np.sum(actual_loss) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) + + def test_timestep_weighted(self): + self.setup() + h_obj = losses.Huber() + y_pred = self.np_y_pred.reshape((2, 3, 1)) + y_true = self.np_y_true.reshape((2, 3, 1)) + expected_losses = self.huber_loss(y_true, y_pred) + + y_pred = tf.constant(y_pred) + y_true = tf.constant(y_true) + sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3, 1)) + loss = h_obj( + y_true, + y_pred, + sample_weight=tf.constant(sample_weight, shape=(2, 3)), + ) + actual_loss = np.multiply(expected_losses, sample_weight) + actual_loss = np.sum(actual_loss) / self.batch_size + self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) + + def test_zero_weighted(self): + self.setup() + h_obj = losses.Huber() + sample_weight = 0 + loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + + def test_non_default_delta(self): + self.setup(delta=0.8) + h_obj = losses.Huber(delta=0.8) + sample_weight = 2.3 + loss = h_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + actual_loss = ( + sample_weight * np.sum(self.expected_losses) / self.batch_size + ) + self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3) + + def test_loss_with_non_default_dtype(self): + # Test case for GitHub issue: + # https://github.com/tensorflow/tensorflow/issues/39004 + self.setup() + h_obj = losses.Huber() + try: + backend.set_floatx("float64") + loss = h_obj(self.y_true, self.y_true) + self.assertAlmostEqual(self.evaluate(loss), 0.0, 3) + finally: + backend.set_floatx("float32") class BinaryTruePositivesViaControlFlow(losses.Loss): + def __init__(self, reduction=losses_utils.ReductionV2.AUTO): + super().__init__(reduction=reduction) - def __init__(self, reduction=losses_utils.ReductionV2.AUTO): - super().__init__(reduction=reduction) - - def call(self, y_true, y_pred): - y_true = tf.cast(y_true, tf.bool) - y_pred = tf.cast(y_pred, tf.bool) + def call(self, y_true, y_pred): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) - result = tf.constant(0.0) - for i in range(len(y_true)): - for j in range(len(y_true[i])): - if y_true[i][j] and y_pred[i][j]: - result = result + 1 - return result + result = tf.constant(0.0) + for i in range(len(y_true)): + for j in range(len(y_true[i])): + if y_true[i][j] and y_pred[i][j]: + result = result + 1 + return result -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CustomLossTest(tf.test.TestCase): - - def test_autograph(self): - y_true = tf.constant([[0, 0.9, 0, 1, 0], [0, 0, 1, 1, 1], [1, 1, 1, 1, 0], - [0, 0, 0, 0, 1.5]]) - y_pred = tf.constant([[0, 0, 1, 5, 0], [1, 1, 1, 1, 1], [0, 1, 0, 1, 0], - [1, 10, 1, 1, 1]]) - - @tf.function - def loss_fn(y_true, y_pred): - loss_obj = BinaryTruePositivesViaControlFlow() - return loss_obj(y_true, y_pred) - - loss = loss_fn(y_true, y_pred) - self.assertAllEqual( - self.evaluate(loss), - 7.0, - ) - - -if __name__ == '__main__': - tf.test.main() + def test_autograph(self): + y_true = tf.constant( + [ + [0, 0.9, 0, 1, 0], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 0], + [0, 0, 0, 0, 1.5], + ] + ) + y_pred = tf.constant( + [ + [0, 0, 1, 5, 0], + [1, 1, 1, 1, 1], + [0, 1, 0, 1, 0], + [1, 10, 1, 1, 1], + ] + ) + + @tf.function + def loss_fn(y_true, y_pred): + loss_obj = BinaryTruePositivesViaControlFlow() + return loss_obj(y_true, y_pred) + + loss = loss_fn(y_true, y_pred) + self.assertAllEqual( + self.evaluate(loss), + 7.0, + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/BUILD b/keras/metrics/BUILD index e8d9911016da..6d259d9c8b23 100644 --- a/keras/metrics/BUILD +++ b/keras/metrics/BUILD @@ -16,9 +16,12 @@ # Description: # Contains the Keras metrics submodule. -load("@org_keras//keras:keras.bzl", "tf_py_test") +# Placeholder: load unaliased py_library +load("@org_keras//keras:keras.bzl", "cuda_py_test") +load("@org_keras//keras:keras.bzl", "tf_py_test") # buildifier: disable=same-origin-load package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/python/feature_column:__subpackages__", @@ -32,8 +35,15 @@ py_library( name = "metrics", srcs = [ "__init__.py", + "accuracy_metrics.py", "base_metric.py", - "metrics.py", + "confusion_metrics.py", + "f_score_metrics.py", + "hinge_metrics.py", + "iou_metrics.py", + "probabilistic_metrics.py", + "py_metric.py", + "regression_metrics.py", ], srcs_version = "PY3", deps = [ @@ -67,9 +77,9 @@ tf_py_test( ) tf_py_test( - name = "metrics_test", + name = "accuracy_metrics_test", size = "medium", - srcs = ["metrics_test.py"], + srcs = ["accuracy_metrics_test.py"], python_version = "PY3", shard_count = 4, deps = [ @@ -84,38 +94,122 @@ tf_py_test( ) tf_py_test( - name = "base_metric_test", + name = "confusion_metrics_test", size = "medium", - srcs = ["base_metric_test.py"], + srcs = ["confusion_metrics_test.py"], python_version = "PY3", shard_count = 4, deps = [ ":metrics", + "//:expect_absl_installed", "//:expect_numpy_installed", + "//:expect_scipy_installed", "//:expect_tensorflow_installed", "//keras", "//keras/layers", + "//keras/models", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", + "//keras/utils:metrics_utils", ], ) tf_py_test( - name = "confusion_matrix_test", + name = "f_score_metrics_test", size = "medium", - srcs = ["confusion_matrix_test.py"], + srcs = ["f_score_metrics_test.py"], python_version = "PY3", shard_count = 4, deps = [ ":metrics", "//:expect_absl_installed", "//:expect_numpy_installed", - "//:expect_scipy_installed", "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + +tf_py_test( + name = "hinge_metrics_test", + size = "medium", + srcs = ["hinge_metrics_test.py"], + python_version = "PY3", + shard_count = 4, + deps = [ + ":metrics", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", "//keras/layers", - "//keras/models", "//keras/testing_infra:test_combinations", - "//keras/utils:metrics_utils", + "//keras/testing_infra:test_utils", + ], +) + +tf_py_test( + name = "iou_metrics_test", + size = "medium", + srcs = ["iou_metrics_test.py"], + python_version = "PY3", + shard_count = 4, + deps = [ + ":metrics", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/layers", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + +tf_py_test( + name = "probabilistic_metrics_test", + size = "medium", + srcs = ["probabilistic_metrics_test.py"], + python_version = "PY3", + shard_count = 4, + deps = [ + ":metrics", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/layers", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + +tf_py_test( + name = "regression_metrics_test", + size = "medium", + srcs = ["regression_metrics_test.py"], + python_version = "PY3", + shard_count = 4, + deps = [ + ":metrics", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + ], +) + +tf_py_test( + name = "base_metric_test", + size = "medium", + srcs = ["base_metric_test.py"], + python_version = "PY3", + shard_count = 4, + deps = [ + ":metrics", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/layers", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", ], ) @@ -133,3 +227,21 @@ tf_py_test( "//keras/testing_infra:test_combinations", ], ) + +cuda_py_test( + name = "py_metric_test", + size = "medium", + srcs = ["py_metric_test.py"], + shard_count = 2, + tags = [ + "no_windows", + ], + deps = [ + ":metrics", + "//:expect_tensorflow_installed", + "//keras", + "//keras/layers", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) diff --git a/keras/metrics/__init__.py b/keras/metrics/__init__.py index f9581f89038d..9cc4c770ad51 100644 --- a/keras/metrics/__init__.py +++ b/keras/metrics/__init__.py @@ -13,92 +13,111 @@ # limitations under the License. # ============================================================================== """All Keras metrics.""" -# pylint: disable=g-bad-import-order -from keras.utils.generic_utils import deserialize_keras_object -from keras.utils.generic_utils import serialize_keras_object +# isort: off +import warnings from tensorflow.python.util.tf_export import keras_export -# Base classes -from keras.metrics.base_metric import Metric -from keras.metrics.base_metric import Reduce -from keras.metrics.base_metric import Sum +# Base classes and utilities from keras.metrics.base_metric import Mean from keras.metrics.base_metric import MeanMetricWrapper from keras.metrics.base_metric import MeanTensor +from keras.metrics.base_metric import Metric +from keras.metrics.base_metric import Reduce +from keras.metrics.base_metric import Sum from keras.metrics.base_metric import SumOverBatchSize from keras.metrics.base_metric import SumOverBatchSizeMetricWrapper - -# Individual metric classes -from keras.metrics.metrics import MeanRelativeError -from keras.metrics.metrics import Accuracy -from keras.metrics.metrics import BinaryAccuracy -from keras.metrics.metrics import CategoricalAccuracy -from keras.metrics.metrics import SparseCategoricalAccuracy -from keras.metrics.metrics import TopKCategoricalAccuracy -from keras.metrics.metrics import SparseTopKCategoricalAccuracy -from keras.metrics.metrics import FalsePositives -from keras.metrics.metrics import FalseNegatives -from keras.metrics.metrics import TrueNegatives -from keras.metrics.metrics import TruePositives -from keras.metrics.metrics import Precision -from keras.metrics.metrics import Recall -from keras.metrics.metrics import SensitivityAtSpecificity -from keras.metrics.metrics import SpecificityAtSensitivity -from keras.metrics.metrics import PrecisionAtRecall -from keras.metrics.metrics import RecallAtPrecision -from keras.metrics.metrics import AUC -from keras.metrics.metrics import CosineSimilarity -from keras.metrics.metrics import MeanAbsoluteError -from keras.metrics.metrics import MeanAbsolutePercentageError -from keras.metrics.metrics import MeanSquaredError -from keras.metrics.metrics import MeanSquaredLogarithmicError -from keras.metrics.metrics import Hinge -from keras.metrics.metrics import SquaredHinge -from keras.metrics.metrics import CategoricalHinge -from keras.metrics.metrics import RootMeanSquaredError -from keras.metrics.metrics import LogCoshError -from keras.metrics.metrics import Poisson -from keras.metrics.metrics import KLDivergence -from keras.metrics.metrics import IoU -from keras.metrics.metrics import BinaryIoU -from keras.metrics.metrics import MeanIoU -from keras.metrics.metrics import OneHotIoU -from keras.metrics.metrics import OneHotMeanIoU -from keras.metrics.metrics import BinaryCrossentropy -from keras.metrics.metrics import CategoricalCrossentropy -from keras.metrics.metrics import SparseCategoricalCrossentropy - -from keras.metrics.metrics import _IoUBase -from keras.metrics.metrics import _ConfusionMatrixConditionCount -from keras.metrics.metrics import SensitivitySpecificityBase - -# Metric functions -from keras.metrics.metrics import accuracy -from keras.metrics.metrics import binary_accuracy -from keras.metrics.metrics import categorical_accuracy -from keras.metrics.metrics import sparse_categorical_accuracy -from keras.metrics.metrics import top_k_categorical_accuracy -from keras.metrics.metrics import sparse_top_k_categorical_accuracy -from keras.metrics.metrics import cosine_similarity -from keras.metrics.metrics import binary_crossentropy -from keras.metrics.metrics import categorical_crossentropy -from keras.metrics.metrics import categorical_hinge -from keras.metrics.metrics import hinge -from keras.metrics.metrics import squared_hinge -from keras.metrics.metrics import kullback_leibler_divergence -from keras.metrics.metrics import logcosh -from keras.metrics.metrics import mean_absolute_error -from keras.metrics.metrics import mean_absolute_percentage_error -from keras.metrics.metrics import mean_squared_error -from keras.metrics.metrics import mean_squared_logarithmic_error -from keras.metrics.metrics import poisson -from keras.metrics.metrics import sparse_categorical_crossentropy - -# Utilities from keras.metrics.base_metric import clone_metric from keras.metrics.base_metric import clone_metrics +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object + +from keras.metrics.py_metric import PyMetric + +# Individual metric classes + +# Accuracy metrics +from keras.metrics.accuracy_metrics import Accuracy +from keras.metrics.accuracy_metrics import BinaryAccuracy +from keras.metrics.accuracy_metrics import CategoricalAccuracy +from keras.metrics.accuracy_metrics import SparseCategoricalAccuracy +from keras.metrics.accuracy_metrics import SparseTopKCategoricalAccuracy +from keras.metrics.accuracy_metrics import TopKCategoricalAccuracy + +from keras.metrics.accuracy_metrics import accuracy +from keras.metrics.accuracy_metrics import binary_accuracy +from keras.metrics.accuracy_metrics import categorical_accuracy +from keras.metrics.accuracy_metrics import sparse_categorical_accuracy +from keras.metrics.accuracy_metrics import sparse_top_k_categorical_accuracy +from keras.metrics.accuracy_metrics import top_k_categorical_accuracy + +# Probabilistic metrics +from keras.metrics.probabilistic_metrics import BinaryCrossentropy +from keras.metrics.probabilistic_metrics import CategoricalCrossentropy +from keras.metrics.probabilistic_metrics import KLDivergence +from keras.metrics.probabilistic_metrics import Poisson +from keras.metrics.probabilistic_metrics import SparseCategoricalCrossentropy + +from keras.metrics.probabilistic_metrics import binary_crossentropy +from keras.metrics.probabilistic_metrics import categorical_crossentropy +from keras.metrics.probabilistic_metrics import poisson +from keras.metrics.probabilistic_metrics import kullback_leibler_divergence +from keras.metrics.probabilistic_metrics import sparse_categorical_crossentropy + +# Regression metrics +from keras.metrics.regression_metrics import CosineSimilarity +from keras.metrics.regression_metrics import LogCoshError +from keras.metrics.regression_metrics import MeanAbsoluteError +from keras.metrics.regression_metrics import MeanAbsolutePercentageError +from keras.metrics.regression_metrics import MeanRelativeError +from keras.metrics.regression_metrics import MeanSquaredError +from keras.metrics.regression_metrics import MeanSquaredLogarithmicError +from keras.metrics.regression_metrics import RootMeanSquaredError +from keras.metrics.regression_metrics import R2Score + +from keras.metrics.regression_metrics import cosine_similarity +from keras.metrics.regression_metrics import logcosh +from keras.metrics.regression_metrics import mean_absolute_error +from keras.metrics.regression_metrics import mean_absolute_percentage_error +from keras.metrics.regression_metrics import mean_squared_error +from keras.metrics.regression_metrics import mean_squared_logarithmic_error + +# Confusion metrics +from keras.metrics.confusion_metrics import AUC +from keras.metrics.confusion_metrics import FalseNegatives +from keras.metrics.confusion_metrics import FalsePositives +from keras.metrics.confusion_metrics import Precision +from keras.metrics.confusion_metrics import PrecisionAtRecall +from keras.metrics.confusion_metrics import Recall +from keras.metrics.confusion_metrics import RecallAtPrecision +from keras.metrics.confusion_metrics import SensitivityAtSpecificity +from keras.metrics.confusion_metrics import SensitivitySpecificityBase +from keras.metrics.confusion_metrics import SpecificityAtSensitivity +from keras.metrics.confusion_metrics import TrueNegatives +from keras.metrics.confusion_metrics import TruePositives + +# F-Scores +from keras.metrics.f_score_metrics import FBetaScore +from keras.metrics.f_score_metrics import F1Score + +# IoU metrics +from keras.metrics.iou_metrics import BinaryIoU +from keras.metrics.iou_metrics import IoU +from keras.metrics.iou_metrics import MeanIoU +from keras.metrics.iou_metrics import OneHotIoU +from keras.metrics.iou_metrics import OneHotMeanIoU + +# Hinge metrics +from keras.metrics.hinge_metrics import CategoricalHinge +from keras.metrics.hinge_metrics import Hinge +from keras.metrics.hinge_metrics import SquaredHinge + +from keras.metrics.hinge_metrics import categorical_hinge +from keras.metrics.hinge_metrics import squared_hinge +from keras.metrics.hinge_metrics import hinge + # Aliases acc = ACC = accuracy bce = BCE = binary_crossentropy @@ -110,78 +129,96 @@ cosine_proximity = cosine_similarity -@keras_export('keras.metrics.serialize') -def serialize(metric): - """Serializes metric function or `Metric` instance. - - Args: - metric: A Keras `Metric` instance or a metric function. - - Returns: - Metric configuration dictionary. - """ - return serialize_keras_object(metric) - - -@keras_export('keras.metrics.deserialize') -def deserialize(config, custom_objects=None): - """Deserializes a serialized metric class/function instance. - - Args: - config: Metric configuration. - custom_objects: Optional dictionary mapping names (strings) to custom - objects (classes and functions) to be considered during deserialization. - - Returns: - A Keras `Metric` instance or a metric function. - """ - return deserialize_keras_object( - config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='metric function') - - -@keras_export('keras.metrics.get') +@keras_export("keras.metrics.serialize") +def serialize(metric, use_legacy_format=False): + """Serializes metric function or `Metric` instance. + + Args: + metric: A Keras `Metric` instance or a metric function. + + Returns: + Metric configuration dictionary. + """ + if metric is None: + return None + if not isinstance(metric, Metric): + warnings.warn( + "The `keras.metrics.serialize()` API should only be used for " + "objects of type `keras.metrics.Metric`. Found an instance of " + f"type {type(metric)}, which may lead to improper serialization." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(metric) + return serialize_keras_object(metric) + + +@keras_export("keras.metrics.deserialize") +def deserialize(config, custom_objects=None, use_legacy_format=False): + """Deserializes a serialized metric class/function instance. + + Args: + config: Metric configuration. + custom_objects: Optional dictionary mapping names (strings) to custom + objects (classes and functions) to be considered during deserialization. + + Returns: + A Keras `Metric` instance or a metric function. + """ + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="metric function", + ) + return deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="metric function", + ) + + +@keras_export("keras.metrics.get") def get(identifier): - """Retrieves a Keras metric as a `function`/`Metric` class instance. - - The `identifier` may be the string name of a metric function or class. - - >>> metric = tf.keras.metrics.get("categorical_crossentropy") - >>> type(metric) - - >>> metric = tf.keras.metrics.get("CategoricalCrossentropy") - >>> type(metric) - - - You can also specify `config` of the metric to this function by passing dict - containing `class_name` and `config` as an identifier. Also note that the - `class_name` must map to a `Metric` class - - >>> identifier = {"class_name": "CategoricalCrossentropy", - ... "config": {"from_logits": True}} - >>> metric = tf.keras.metrics.get(identifier) - >>> type(metric) - - - Args: - identifier: A metric identifier. One of None or string name of a metric - function/class or metric configuration dictionary or a metric function or - a metric class instance - - Returns: - A Keras metric as a `function`/ `Metric` class instance. - - Raises: - ValueError: If `identifier` cannot be interpreted. - """ - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, str): - return deserialize(str(identifier)) - elif callable(identifier): - return identifier - else: - raise ValueError( - f'Could not interpret metric identifier: {identifier}') + """Retrieves a Keras metric as a `function`/`Metric` class instance. + + The `identifier` may be the string name of a metric function or class. + + >>> metric = tf.keras.metrics.get("categorical_crossentropy") + >>> type(metric) + + >>> metric = tf.keras.metrics.get("CategoricalCrossentropy") + >>> type(metric) + + + You can also specify `config` of the metric to this function by passing dict + containing `class_name` and `config` as an identifier. Also note that the + `class_name` must map to a `Metric` class + + >>> identifier = {"class_name": "CategoricalCrossentropy", + ... "config": {"from_logits": True}} + >>> metric = tf.keras.metrics.get(identifier) + >>> type(metric) + + + Args: + identifier: A metric identifier. One of None or string name of a metric + function/class or metric configuration dictionary or a metric function + or a metric class instance + + Returns: + A Keras metric as a `function`/ `Metric` class instance. + + Raises: + ValueError: If `identifier` cannot be interpreted. + """ + if isinstance(identifier, dict): + use_legacy_format = "module" not in identifier + return deserialize(identifier, use_legacy_format=use_legacy_format) + elif isinstance(identifier, str): + return deserialize(str(identifier)) + elif callable(identifier): + return identifier + else: + raise ValueError(f"Could not interpret metric identifier: {identifier}") diff --git a/keras/metrics/accuracy_metrics.py b/keras/metrics/accuracy_metrics.py new file mode 100644 index 000000000000..98e130a8efc7 --- /dev/null +++ b/keras/metrics/accuracy_metrics.py @@ -0,0 +1,527 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Accuracy metrics.""" + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.dtensor import utils as dtensor_utils +from keras.metrics import base_metric +from keras.utils import metrics_utils + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.metrics.Accuracy") +class Accuracy(base_metric.MeanMetricWrapper): + """Calculates how often predictions equal labels. + + This metric creates two local variables, `total` and `count` that are used + to compute the frequency with which `y_pred` matches `y_true`. This + frequency is ultimately returned as `binary accuracy`: an idempotent + operation that simply divides `total` by `count`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.Accuracy() + >>> m.update_state([[1], [2], [3], [4]], [[0], [2], [3], [4]]) + >>> m.result().numpy() + 0.75 + + >>> m.reset_state() + >>> m.update_state([[1], [2], [3], [4]], [[0], [2], [3], [4]], + ... sample_weight=[1, 1, 0, 0]) + >>> m.result().numpy() + 0.5 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.Accuracy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="accuracy", dtype=None): + super().__init__(accuracy, name, dtype=dtype) + + +@keras_export("keras.metrics.BinaryAccuracy") +class BinaryAccuracy(base_metric.MeanMetricWrapper): + """Calculates how often predictions match binary labels. + + This metric creates two local variables, `total` and `count` that are used + to compute the frequency with which `y_pred` matches `y_true`. This + frequency is ultimately returned as `binary accuracy`: an idempotent + operation that simply divides `total` by `count`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + threshold: (Optional) Float representing the threshold for deciding + whether prediction values are 1 or 0. + + Standalone usage: + + >>> m = tf.keras.metrics.BinaryAccuracy() + >>> m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]]) + >>> m.result().numpy() + 0.75 + + >>> m.reset_state() + >>> m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]], + ... sample_weight=[1, 0, 0, 1]) + >>> m.result().numpy() + 0.5 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.BinaryAccuracy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="binary_accuracy", dtype=None, threshold=0.5): + super().__init__( + metrics_utils.binary_matches, name, dtype=dtype, threshold=threshold + ) + + +@keras_export("keras.metrics.CategoricalAccuracy") +class CategoricalAccuracy(base_metric.MeanMetricWrapper): + """Calculates how often predictions match one-hot labels. + + You can provide logits of classes as `y_pred`, since argmax of + logits and probabilities are same. + + This metric creates two local variables, `total` and `count` that are used + to compute the frequency with which `y_pred` matches `y_true`. This + frequency is ultimately returned as `categorical accuracy`: an idempotent + operation that simply divides `total` by `count`. + + `y_pred` and `y_true` should be passed in as vectors of probabilities, + rather than as labels. If necessary, use `tf.one_hot` to expand `y_true` as + a vector. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.CategoricalAccuracy() + >>> m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], + ... [0.05, 0.95, 0]]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], + ... [0.05, 0.95, 0]], + ... sample_weight=[0.7, 0.3]) + >>> m.result().numpy() + 0.3 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.CategoricalAccuracy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="categorical_accuracy", dtype=None): + super().__init__( + lambda y_true, y_pred: metrics_utils.sparse_categorical_matches( + tf.math.argmax(y_true, axis=-1), y_pred + ), + name, + dtype=dtype, + ) + + +@keras_export("keras.metrics.SparseCategoricalAccuracy") +class SparseCategoricalAccuracy(base_metric.MeanMetricWrapper): + """Calculates how often predictions match integer labels. + + ```python + acc = np.dot(sample_weight, np.equal(y_true, np.argmax(y_pred, axis=1)) + ``` + + You can provide logits of classes as `y_pred`, since argmax of + logits and probabilities are same. + + This metric creates two local variables, `total` and `count` that are used + to compute the frequency with which `y_pred` matches `y_true`. This + frequency is ultimately returned as `sparse categorical accuracy`: an + idempotent operation that simply divides `total` by `count`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.SparseCategoricalAccuracy() + >>> m.update_state([[2], [1]], [[0.1, 0.6, 0.3], [0.05, 0.95, 0]]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([[2], [1]], [[0.1, 0.6, 0.3], [0.05, 0.95, 0]], + ... sample_weight=[0.7, 0.3]) + >>> m.result().numpy() + 0.3 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="sparse_categorical_accuracy", dtype=None): + super().__init__( + metrics_utils.sparse_categorical_matches, name, dtype=dtype + ) + + +_SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING = """Accumulates metric statistics. + +For sparse categorical metrics, the shapes of `y_true` and `y_pred` are +different. + +Args: + y_true: Ground truth label values. shape = `[batch_size, d0, .. dN-1]` or + shape = `[batch_size, d0, .. dN-1, 1]`. + y_pred: The predicted probability values. shape = `[batch_size, d0, .. dN]`. + sample_weight: Optional `sample_weight` acts as a + coefficient for the metric. If a scalar is provided, then the metric is + simply scaled by the given value. If `sample_weight` is a tensor of size + `[batch_size]`, then the metric for each sample of the batch is rescaled + by the corresponding element in the `sample_weight` vector. If the shape + of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted + to this shape), then each metric element of `y_pred` is scaled by the + corresponding value of `sample_weight`. (Note on `dN-1`: all metric + functions reduce by 1 dimension, usually the last axis (-1)). + +Returns: + Update op. +""" + +SparseCategoricalAccuracy.update_state.__doc__ = ( + _SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING +) + + +@keras_export("keras.metrics.TopKCategoricalAccuracy") +class TopKCategoricalAccuracy(base_metric.MeanMetricWrapper): + """Computes how often targets are in the top `K` predictions. + + Args: + k: (Optional) Number of top elements to look at for computing accuracy. + Defaults to `5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.TopKCategoricalAccuracy(k=1) + >>> m.update_state([[0, 0, 1], [0, 1, 0]], + ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([[0, 0, 1], [0, 1, 0]], + ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], + ... sample_weight=[0.7, 0.3]) + >>> m.result().numpy() + 0.3 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.TopKCategoricalAccuracy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, k=5, name="top_k_categorical_accuracy", dtype=None): + super().__init__( + lambda yt, yp, k: metrics_utils.sparse_top_k_categorical_matches( + tf.math.argmax(yt, axis=-1), yp, k + ), + name, + dtype=dtype, + k=k, + ) + + +@keras_export("keras.metrics.SparseTopKCategoricalAccuracy") +class SparseTopKCategoricalAccuracy(base_metric.MeanMetricWrapper): + """Computes how often integer targets are in the top `K` predictions. + + Args: + k: (Optional) Number of top elements to look at for computing accuracy. + Defaults to `5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1) + >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], + ... sample_weight=[0.7, 0.3]) + >>> m.result().numpy() + 0.3 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, k=5, name="sparse_top_k_categorical_accuracy", dtype=None + ): + super().__init__( + metrics_utils.sparse_top_k_categorical_matches, + name, + dtype=dtype, + k=k, + ) + + +SparseTopKCategoricalAccuracy.update_state.__doc__ = ( + _SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING +) + + +def accuracy(y_true, y_pred): + [ + y_pred, + y_true, + ], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [y_pred, y_true] + ) + y_true.shape.assert_is_compatible_with(y_pred.shape) + if y_true.dtype != y_pred.dtype: + y_pred = tf.cast(y_pred, y_true.dtype) + return tf.cast(tf.equal(y_true, y_pred), backend.floatx()) + + +@keras_export("keras.metrics.binary_accuracy") +@tf.__internal__.dispatch.add_dispatch_support +def binary_accuracy(y_true, y_pred, threshold=0.5): + """Calculates how often predictions match binary labels. + + Standalone usage: + >>> y_true = [[1], [1], [0], [0]] + >>> y_pred = [[1], [1], [0], [0]] + >>> m = tf.keras.metrics.binary_accuracy(y_true, y_pred) + >>> assert m.shape == (4,) + >>> m.numpy() + array([1., 1., 1., 1.], dtype=float32) + + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + threshold: (Optional) Float representing the threshold for deciding + whether prediction values are 1 or 0. + + Returns: + Binary accuracy values. shape = `[batch_size, d0, .. dN-1]` + """ + # Note: calls metrics_utils.binary_matches with mean reduction. This + # maintains public facing binary_accuracy behavior and seperates it from the + # vital behavior of the binary_matches method needed in backend + # dependencies. + + return tf.reduce_mean( + metrics_utils.binary_matches(y_true, y_pred, threshold), axis=-1 + ) + + +@keras_export("keras.metrics.categorical_accuracy") +@tf.__internal__.dispatch.add_dispatch_support +def categorical_accuracy(y_true, y_pred): + """Calculates how often predictions match one-hot labels. + + Standalone usage: + >>> y_true = [[0, 0, 1], [0, 1, 0]] + >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] + >>> m = tf.keras.metrics.categorical_accuracy(y_true, y_pred) + >>> assert m.shape == (2,) + >>> m.numpy() + array([0., 1.], dtype=float32) + + You can provide logits of classes as `y_pred`, since argmax of + logits and probabilities are same. + + Args: + y_true: One-hot ground truth values. + y_pred: The prediction values. + + Returns: + Categorical accuracy values. + """ + # Note: wraps metrics_utils.categorical_matches. This seperates public + # facing categorical_accuracy behavior from the vital behavior of the + # categorical_matches method needed in backend dependencies. + + return metrics_utils.sparse_categorical_matches( + tf.math.argmax(y_true, axis=-1), y_pred + ) + + +@keras_export("keras.metrics.sparse_categorical_accuracy") +@tf.__internal__.dispatch.add_dispatch_support +def sparse_categorical_accuracy(y_true, y_pred): + """Calculates how often predictions match integer labels. + + Standalone usage: + >>> y_true = [2, 1] + >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] + >>> m = tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred) + >>> assert m.shape == (2,) + >>> m.numpy() + array([0., 1.], dtype=float32) + + You can provide logits of classes as `y_pred`, since argmax of + logits and probabilities are same. + + Args: + y_true: Integer ground truth values. + y_pred: The prediction values. + + Returns: + Sparse categorical accuracy values. + """ + # Note: wraps metrics_utils.sparse_categorical_matches method and checks for + # squeezing to align with expected public facing behavior. This seperates + # public facing sparse_categorical_accuracy behavior from the vital behavior + # of the sparse_categorical_matches method needed in backend dependencies. + + matches = metrics_utils.sparse_categorical_matches(y_true, y_pred) + + # if shape is (num_samples, 1) squeeze + if matches.shape.ndims > 1 and matches.shape[-1] == 1: + matches = tf.squeeze(matches, [-1]) + + return matches + + +@keras_export("keras.metrics.top_k_categorical_accuracy") +@tf.__internal__.dispatch.add_dispatch_support +def top_k_categorical_accuracy(y_true, y_pred, k=5): + """Computes how often targets are in the top `K` predictions. + + Standalone usage: + >>> y_true = [[0, 0, 1], [0, 1, 0]] + >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] + >>> m = tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=3) + >>> assert m.shape == (2,) + >>> m.numpy() + array([1., 1.], dtype=float32) + + Args: + y_true: The ground truth values. + y_pred: The prediction values. + k: (Optional) Number of top elements to look at for computing accuracy. + Defaults to `5`. + + Returns: + Top K categorical accuracy value. + """ + # Note: wraps metrics_utils.top_k_categorical_matches. This seperates + # public facing top_k_categorical_accuracy behavior from the vital behavior + # of the top_k_categorical_matches method needed in backend dependencies. + + return metrics_utils.sparse_top_k_categorical_matches( + tf.math.argmax(y_true, axis=-1), y_pred, k + ) + + +@keras_export("keras.metrics.sparse_top_k_categorical_accuracy") +@tf.__internal__.dispatch.add_dispatch_support +def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): + """Computes how often integer targets are in the top `K` predictions. + + Standalone usage: + >>> y_true = [2, 1] + >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] + >>> m = tf.keras.metrics.sparse_top_k_categorical_accuracy( + ... y_true, y_pred, k=3) + >>> assert m.shape == (2,) + >>> m.numpy() + array([1., 1.], dtype=float32) + + Args: + y_true: tensor of true targets. + y_pred: tensor of predicted targets. + k: (Optional) Number of top elements to look at for computing accuracy. + Defaults to `5`. + + Returns: + Sparse top K categorical accuracy value. + """ + # Note: wraps metrics_utils.sparse_top_k_categorical_matches. This seperates + # public facing sparse_top_k_categorical_accuracy behavior from the vital + # behavior of the sparse_top_k_categorical_matches method needed in backend + # dependencies. + + return metrics_utils.sparse_top_k_categorical_matches(y_true, y_pred, k) diff --git a/keras/metrics/accuracy_metrics_test.py b/keras/metrics/accuracy_metrics_test.py new file mode 100644 index 000000000000..a89ded8016cd --- /dev/null +++ b/keras/metrics/accuracy_metrics_test.py @@ -0,0 +1,407 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for accuracy metrics.""" + +import tensorflow.compat.v2 as tf + +from keras import Model +from keras import layers +from keras import metrics +from keras.testing_infra import test_combinations + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class AccuracyTest(tf.test.TestCase): + def test_accuracy(self): + acc_obj = metrics.Accuracy(name="my_acc") + + # check config + self.assertEqual(acc_obj.name, "my_acc") + self.assertTrue(acc_obj.stateful) + self.assertEqual(len(acc_obj.variables), 2) + self.assertEqual(acc_obj.dtype, tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + update_op = acc_obj.update_state( + [[1], [2], [3], [4]], [[1], [2], [3], [4]] + ) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # Check save and restore config + a2 = metrics.Accuracy.from_config(acc_obj.get_config()) + self.assertEqual(a2.name, "my_acc") + self.assertTrue(a2.stateful) + self.assertEqual(len(a2.variables), 2) + self.assertEqual(a2.dtype, tf.float32) + + # check with sample_weight + result_t = acc_obj([[2], [1]], [[2], [0]], sample_weight=[[0.5], [0.2]]) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.96, 2) # 4.5/4.7 + + def test_accuracy_ragged(self): + acc_obj = metrics.Accuracy(name="my_acc") + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + rt1 = tf.ragged.constant([[1], [2], [3], [4]]) + rt2 = tf.ragged.constant([[1], [2], [3], [4]]) + update_op = acc_obj.update_state(rt1, rt2) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check with sample_weight + rt1 = tf.ragged.constant([[2], [1]]) + rt2 = tf.ragged.constant([[2], [0]]) + sw_ragged = tf.ragged.constant([[0.5], [0.2]]) + result_t = acc_obj(rt1, rt2, sample_weight=sw_ragged) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.96, 2) # 4.5/4.7 + + def test_binary_accuracy(self): + acc_obj = metrics.BinaryAccuracy(name="my_acc") + + # check config + self.assertEqual(acc_obj.name, "my_acc") + self.assertTrue(acc_obj.stateful) + self.assertEqual(len(acc_obj.variables), 2) + self.assertEqual(acc_obj.dtype, tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + update_op = acc_obj.update_state([[1], [0]], [[1], [0]]) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check y_pred squeeze + update_op = acc_obj.update_state([[1], [1]], [[[1]], [[0]]]) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertAlmostEqual(result, 0.75, 2) # 3/4 + + # check y_true squeeze + result_t = acc_obj([[[1]], [[1]]], [[1], [0]]) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.67, 2) # 4/6 + + # check with sample_weight + result_t = acc_obj([[1], [1]], [[1], [0]], [[0.5], [0.2]]) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.67, 2) # 4.5/6.7 + + def test_binary_accuracy_ragged(self): + acc_obj = metrics.BinaryAccuracy(name="my_acc") + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + rt1 = tf.ragged.constant([[1], [0]]) + rt2 = tf.ragged.constant([[1], [0]]) + update_op = acc_obj.update_state(rt1, rt2) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check y_true squeeze only supported for dense tensors and is + # not supported by ragged tensor (different ranks). --> error + rt1 = tf.ragged.constant([[[1], [1]]]) + rt2 = tf.ragged.constant([[1], [0]]) + with self.assertRaises(ValueError): + result_t = acc_obj(rt1, rt2) + result = self.evaluate(result_t) + + def test_binary_accuracy_threshold(self): + acc_obj = metrics.BinaryAccuracy(threshold=0.7) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + result_t = acc_obj([[1], [1], [0], [0]], [[0.9], [0.6], [0.4], [0.8]]) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.5, 2) + + def test_binary_accuracy_threshold_ragged(self): + acc_obj = metrics.BinaryAccuracy(threshold=0.7) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + rt1 = tf.ragged.constant([[1], [1], [0], [0]]) + rt2 = tf.ragged.constant([[0.9], [0.6], [0.4], [0.8]]) + result_t = acc_obj(rt1, rt2) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.5, 2) + + def test_categorical_accuracy(self): + acc_obj = metrics.CategoricalAccuracy(name="my_acc") + + # check config + self.assertEqual(acc_obj.name, "my_acc") + self.assertTrue(acc_obj.stateful) + self.assertEqual(len(acc_obj.variables), 2) + self.assertEqual(acc_obj.dtype, tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + update_op = acc_obj.update_state( + [[0, 0, 1], [0, 1, 0]], [[0.1, 0.1, 0.8], [0.05, 0.95, 0]] + ) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check with sample_weight + result_t = acc_obj( + [[0, 0, 1], [0, 1, 0]], + [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], + [[0.5], [0.2]], + ) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.93, 2) # 2.5/2.7 + + def test_categorical_accuracy_ragged(self): + acc_obj = metrics.CategoricalAccuracy(name="my_acc") + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + rt1 = tf.ragged.constant([[0, 0, 1], [0, 1, 0]]) + rt2 = tf.ragged.constant([[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) + update_op = acc_obj.update_state(rt1, rt2) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check with sample_weight + rt1 = tf.ragged.constant([[0, 0, 1], [0, 1, 0]]) + rt2 = tf.ragged.constant([[0.1, 0.1, 0.8], [0.05, 0, 0.95]]) + sample_weight = tf.ragged.constant([[0.5], [0.2]]) + with self.assertRaises(tf.errors.InvalidArgumentError): + result_t = acc_obj(rt1, rt2, sample_weight) + result = self.evaluate(result_t) + + def test_sparse_categorical_accuracy(self): + acc_obj = metrics.SparseCategoricalAccuracy(name="my_acc") + + # check config + self.assertEqual(acc_obj.name, "my_acc") + self.assertTrue(acc_obj.stateful) + self.assertEqual(len(acc_obj.variables), 2) + self.assertEqual(acc_obj.dtype, tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + update_op = acc_obj.update_state( + [[2], [1]], [[0.1, 0.1, 0.8], [0.05, 0.95, 0]] + ) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check with sample_weight + result_t = acc_obj( + [[2], [1]], [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], [[0.5], [0.2]] + ) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.93, 2) # 2.5/2.7 + + def test_sparse_categorical_accuracy_ragged(self): + acc_obj = metrics.SparseCategoricalAccuracy(name="my_acc") + + # verify that correct value is returned + rt1 = tf.ragged.constant([[2], [1]]) + rt2 = tf.ragged.constant([[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) + + with self.assertRaises(tf.errors.InvalidArgumentError): + # sparse_categorical_accuracy is not supported for composite/ragged + # tensors. + update_op = acc_obj.update_state(rt1, rt2) + self.evaluate(update_op) + + def test_sparse_categorical_accuracy_mismatched_dims(self): + acc_obj = metrics.SparseCategoricalAccuracy(name="my_acc") + + # check config + self.assertEqual(acc_obj.name, "my_acc") + self.assertTrue(acc_obj.stateful) + self.assertEqual(len(acc_obj.variables), 2) + self.assertEqual(acc_obj.dtype, tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + # verify that correct value is returned + update_op = acc_obj.update_state( + [2, 1], [[0.1, 0.1, 0.8], [0.05, 0.95, 0]] + ) + self.evaluate(update_op) + result = self.evaluate(acc_obj.result()) + self.assertEqual(result, 1) # 2/2 + + # check with sample_weight + result_t = acc_obj( + [2, 1], [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], [[0.5], [0.2]] + ) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 0.93, 2) # 2.5/2.7 + + def test_sparse_categorical_accuracy_mismatched_dims_dynamic(self): + with tf.compat.v1.get_default_graph().as_default(), self.cached_session() as sess: # noqa: E501 + acc_obj = metrics.SparseCategoricalAccuracy(name="my_acc") + self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) + + t = tf.compat.v1.placeholder(tf.float32) + p = tf.compat.v1.placeholder(tf.float32) + w = tf.compat.v1.placeholder(tf.float32) + + result_t = acc_obj(t, p, w) + result = sess.run( + result_t, + feed_dict=( + { + t: [2, 1], + p: [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], + w: [[0.5], [0.2]], + } + ), + ) + self.assertAlmostEqual(result, 0.71, 2) # 2.5/2.7 + + def test_get_acc(self): + acc_fn = metrics.get("acc") + self.assertEqual(acc_fn, metrics.accuracy) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TopKCategoricalAccuracyTest(tf.test.TestCase): + def test_config(self): + a_obj = metrics.TopKCategoricalAccuracy(name="topkca", dtype=tf.int32) + self.assertEqual(a_obj.name, "topkca") + self.assertEqual(a_obj._dtype, tf.int32) + + a_obj2 = metrics.TopKCategoricalAccuracy.from_config(a_obj.get_config()) + self.assertEqual(a_obj2.name, "topkca") + self.assertEqual(a_obj2._dtype, tf.int32) + + def test_correctness(self): + a_obj = metrics.TopKCategoricalAccuracy() + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + y_true = tf.constant([[0, 0, 1], [0, 1, 0]]) + y_pred = tf.constant([[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + + result = a_obj(y_true, y_pred) + self.assertEqual(1, self.evaluate(result)) # both the samples match + + # With `k` < 5. + a_obj = metrics.TopKCategoricalAccuracy(k=1) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + result = a_obj(y_true, y_pred) + self.assertEqual(0.5, self.evaluate(result)) # only sample #2 matches + + # With `k` > 5. + y_true = tf.constant([[0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0]]) + y_pred = tf.constant( + [[0.5, 0.9, 0.1, 0.7, 0.6, 0.5, 0.4], [0.05, 0.95, 0, 0, 0, 0, 0]] + ) + a_obj = metrics.TopKCategoricalAccuracy(k=6) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + result = a_obj(y_true, y_pred) + self.assertEqual(0.5, self.evaluate(result)) # only 1 sample matches. + + def test_weighted(self): + a_obj = metrics.TopKCategoricalAccuracy(k=2) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + y_true = tf.constant([[0, 1, 0], [1, 0, 0], [0, 0, 1]]) + y_pred = tf.constant([[0, 0.9, 0.1], [0, 0.9, 0.1], [0, 0.9, 0.1]]) + sample_weight = tf.constant((1.0, 0.0, 1.0)) + result = a_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(1.0, self.evaluate(result), atol=1e-5) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SparseTopKCategoricalAccuracyTest(tf.test.TestCase): + def test_config(self): + a_obj = metrics.SparseTopKCategoricalAccuracy( + name="stopkca", dtype=tf.int32 + ) + self.assertEqual(a_obj.name, "stopkca") + self.assertEqual(a_obj._dtype, tf.int32) + + a_obj2 = metrics.SparseTopKCategoricalAccuracy.from_config( + a_obj.get_config() + ) + self.assertEqual(a_obj2.name, "stopkca") + self.assertEqual(a_obj2._dtype, tf.int32) + + def test_correctness(self): + a_obj = metrics.SparseTopKCategoricalAccuracy() + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + y_true = tf.constant([2, 1]) + y_pred = tf.constant([[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + + result = a_obj(y_true, y_pred) + self.assertEqual(1, self.evaluate(result)) # both the samples match + + # With `k` < 5. + a_obj = metrics.SparseTopKCategoricalAccuracy(k=1) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + result = a_obj(y_true, y_pred) + self.assertEqual(0.5, self.evaluate(result)) # only sample #2 matches + + # With `k` > 5. + y_pred = tf.constant( + [[0.5, 0.9, 0.1, 0.7, 0.6, 0.5, 0.4], [0.05, 0.95, 0, 0, 0, 0, 0]] + ) + a_obj = metrics.SparseTopKCategoricalAccuracy(k=6) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + result = a_obj(y_true, y_pred) + self.assertEqual(0.5, self.evaluate(result)) # only 1 sample matches. + + def test_weighted(self): + a_obj = metrics.SparseTopKCategoricalAccuracy(k=2) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + y_true = tf.constant([1, 0, 2]) + y_pred = tf.constant([[0, 0.9, 0.1], [0, 0.9, 0.1], [0, 0.9, 0.1]]) + sample_weight = tf.constant((1.0, 0.0, 1.0)) + result = a_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(1.0, self.evaluate(result), atol=1e-5) + + def test_sparse_top_k_categorical_accuracy_mismatched_dims_dynamic(self): + + if not tf.compat.v1.executing_eagerly(): + # Test will fail in v1 graph mode since the metric is not a normal + # layer. It will aggregate the output by batch dim, which failed on + # v1 code. + self.skipTest("v2 eager mode only") + + class AccLayer(layers.Layer): + def build(self, _): + self.acc = metrics.SparseTopKCategoricalAccuracy(k=1) + + def call(self, y_true, y_pred): + return self.acc(y_true, y_pred) + + label = layers.Input(shape=[1]) + predict = layers.Input(shape=[3]) + metric_result = AccLayer()(label, predict) + model = Model([label, predict], metric_result) + + result = model.predict( + [ + tf.constant([[2], [1]]), + tf.constant([[0.1, 0.1, 0.8], [0.05, 0, 0.95]]), + ], + steps=1, + ) + self.assertAllClose(result, 0.5) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/base_metric.py b/keras/metrics/base_metric.py index 2dbf91a387cd..7a56b4d13815 100644 --- a/keras/metrics/base_metric.py +++ b/keras/metrics/base_metric.py @@ -12,862 +12,982 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-classes-have-attributes -# pylint: disable=g-doc-return-or-yield """Base Metric classes.""" import abc -import copy import types import warnings +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras.dtensor import dtensor_api as dtensor from keras.dtensor import utils as dtensor_utils from keras.engine import base_layer from keras.engine import base_layer_utils from keras.engine import keras_tensor -from keras.saving.saved_model import metric_serialization +from keras.saving.legacy.saved_model import metric_serialization from keras.utils import generic_utils from keras.utils import losses_utils from keras.utils import metrics_utils -from keras.utils.tf_utils import is_tensor_or_variable -import numpy as np -import tensorflow.compat.v2 as tf +from keras.utils import tf_utils +# isort: off from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls -@keras_export('keras.metrics.Metric') +@keras_export("keras.metrics.Metric") class Metric(base_layer.Layer, metaclass=abc.ABCMeta): - """Encapsulates metric logic and state. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - **kwargs: Additional layer keywords arguments. - - Standalone usage: - - ```python - m = SomeMetric(...) - for input in ...: - m.update_state(input) - print('Final result: ', m.result().numpy()) - ``` - - Usage with `compile()` API: - - ```python - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(64, activation='relu')) - model.add(tf.keras.layers.Dense(64, activation='relu')) - model.add(tf.keras.layers.Dense(10, activation='softmax')) - - model.compile(optimizer=tf.keras.optimizers.RMSprop(0.01), - loss=tf.keras.losses.CategoricalCrossentropy(), - metrics=[tf.keras.metrics.CategoricalAccuracy()]) - - data = np.random.random((1000, 32)) - labels = np.random.random((1000, 10)) - - dataset = tf.data.Dataset.from_tensor_slices((data, labels)) - dataset = dataset.batch(32) - - model.fit(dataset, epochs=10) - ``` - - To be implemented by subclasses: - * `__init__()`: All state variables should be created in this method by - calling `self.add_weight()` like: `self.var = self.add_weight(...)` - * `update_state()`: Has all updates to the state variables like: - self.var.assign_add(...). - * `result()`: Computes and returns a scalar value or a dict of scalar values - for the metric from the state variables. - - Example subclass implementation: - - ```python - class BinaryTruePositives(tf.keras.metrics.Metric): - - def __init__(self, name='binary_true_positives', **kwargs): - super(BinaryTruePositives, self).__init__(name=name, **kwargs) - self.true_positives = self.add_weight(name='tp', initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - y_true = tf.cast(y_true, tf.bool) - y_pred = tf.cast(y_pred, tf.bool) - - values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) - values = tf.cast(values, self.dtype) - if sample_weight is not None: - sample_weight = tf.cast(sample_weight, self.dtype) - sample_weight = tf.broadcast_to(sample_weight, values.shape) - values = tf.multiply(values, sample_weight) - self.true_positives.assign_add(tf.reduce_sum(values)) - - def result(self): - return self.true_positives - ``` - """ - - def __init__(self, name=None, dtype=None, **kwargs): - super().__init__(name=name, dtype=dtype, **kwargs) - self.stateful = True # All metric layers are stateful. - self.built = True - if not base_layer_utils.v2_dtype_behavior_enabled(): - # We only do this when the V2 behavior is not enabled, as when it is - # enabled, the dtype already defaults to floatx. - self._dtype = (backend.floatx() if dtype is None - else tf.as_dtype(dtype).name) - - def __new__(cls, *args, **kwargs): - obj = super(Metric, cls).__new__(cls) - - # If `update_state` is not in eager/tf.function and it is not from a - # built-in metric, wrap it in `tf.function`. This is so that users writing - # custom metrics in v1 need not worry about control dependencies and - # return ops. - if (base_layer_utils.is_in_eager_or_tf_function() or - is_built_in(cls)): - obj_update_state = obj.update_state - - def update_state_fn(*args, **kwargs): - control_status = tf.__internal__.autograph.control_status_ctx() - ag_update_state = tf.__internal__.autograph.tf_convert( - obj_update_state, control_status) - return ag_update_state(*args, **kwargs) - else: - if isinstance(obj.update_state, tf.__internal__.function.Function): - update_state_fn = obj.update_state - else: - update_state_fn = tf.function(obj.update_state) - - obj.update_state = types.MethodType( - metrics_utils.update_state_wrapper(update_state_fn), obj) - - obj_result = obj.result - - def result_fn(*args, **kwargs): - control_status = tf.__internal__.autograph.control_status_ctx() - ag_result = tf.__internal__.autograph.tf_convert( - obj_result, control_status) - return ag_result(*args, **kwargs) - - obj.result = types.MethodType(metrics_utils.result_wrapper(result_fn), obj) - - return obj - - def __call__(self, *args, **kwargs): - """Accumulates statistics and then computes metric result value. + """Encapsulates metric logic and state. Args: - *args: - **kwargs: A mini-batch of inputs to the Metric, - passed on to `update_state()`. - - Returns: - The metric value tensor. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + **kwargs: Additional layer keywords arguments. + + Standalone usage: + + ```python + m = SomeMetric(...) + for input in ...: + m.update_state(input) + print('Final result: ', m.result().numpy()) + ``` + + Usage with `compile()` API: + + ```python + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(64, activation='relu')) + model.add(tf.keras.layers.Dense(64, activation='relu')) + model.add(tf.keras.layers.Dense(10, activation='softmax')) + + model.compile(optimizer=tf.keras.optimizers.RMSprop(0.01), + loss=tf.keras.losses.CategoricalCrossentropy(), + metrics=[tf.keras.metrics.CategoricalAccuracy()]) + + data = np.random.random((1000, 32)) + labels = np.random.random((1000, 10)) + + dataset = tf.data.Dataset.from_tensor_slices((data, labels)) + dataset = dataset.batch(32) + + model.fit(dataset, epochs=10) + ``` + + To be implemented by subclasses: + * `__init__()`: All state variables should be created in this method by + calling `self.add_weight()` like: `self.var = self.add_weight(...)` + * `update_state()`: Has all updates to the state variables like: + self.var.assign_add(...). + * `result()`: Computes and returns a scalar value or a dict of scalar values + for the metric from the state variables. + + Example subclass implementation: + + ```python + class BinaryTruePositives(tf.keras.metrics.Metric): + + def __init__(self, name='binary_true_positives', **kwargs): + super(BinaryTruePositives, self).__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name='tp', initializer='zeros') + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) + values = tf.cast(values, self.dtype) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self.dtype) + sample_weight = tf.broadcast_to(sample_weight, values.shape) + values = tf.multiply(values, sample_weight) + self.true_positives.assign_add(tf.reduce_sum(values)) + + def result(self): + return self.true_positives + ``` """ - def replica_local_fn(*args, **kwargs): - """Updates the state of the metric in a replica-local context.""" - if any( - isinstance(arg, keras_tensor.KerasTensor) - for arg in tf.nest.flatten((args, kwargs))): - update_op = None - else: - update_op = self.update_state(*args, **kwargs) # pylint: disable=not-callable - update_ops = [] - if update_op is not None: - update_ops.append(update_op) - with tf.control_dependencies(update_ops): - result_t = self.result() # pylint: disable=not-callable - - # We are adding the metric object as metadata on the result tensor. - # This is required when we want to use a metric with `add_metric` API on - # a Model/Layer in graph mode. This metric instance will later be used - # to reset variable state after each epoch of training. - # Example: - # model = Model() - # mean = Mean() - # model.add_metric(mean(values), name='mean') - result_t._metric_obj = self # pylint: disable=protected-access - return result_t - - from keras.distribute import distributed_training_utils # pylint:disable=g-import-not-at-top - return distributed_training_utils.call_replica_local_fn( - replica_local_fn, *args, **kwargs) - - def __str__(self): - args = ','.join(f'{k}={v}' for k, v in self.get_config().items()) - return f'{self.__class__.__name__}({args})' - - def __deepcopy__(self, memo): - result = type(self)(name=self.name, dtype=self.dtype) - memo[id(self)] = result - - for k, v in self.__dict__.items(): - if k in ['update_state', 'result']: - # `update_state` keeps a closure of `update_state_fn`, and deep - # copying it would result in copying that old reference. Avoid that. - # Likewise for `result`. - continue - if k in ['_obj_reference_counts_dict']: - # `Layer.__setattr__` attempts to flatten the - # `ObjectIdentityDictionary`, which can't be done since it stores - # heterogeneous instances. - tf.Module.__setattr__(result, k, copy.deepcopy(v, memo)) - elif k in ['_thread_local', '_metrics_lock']: - # Can't pickle _thread.lock objects. - setattr(result, k, v) - else: - setattr(result, k, copy.deepcopy(v, memo)) - - return result - - @property - def dtype(self): - return self._dtype - - def get_config(self): - """Returns the serializable config of the metric.""" - return {'name': self.name, 'dtype': self.dtype} - - def reset_state(self): - """Resets all of the metric state variables. - - This function is called between epochs/steps, - when a metric is evaluated during training. - """ - if not generic_utils.is_default(self.reset_states): - warnings.warn( - 'Metric %s implements a `reset_states()` method; rename it ' - 'to `reset_state()` (without the final "s"). The name ' - '`reset_states()` has been deprecated to improve API ' - 'consistency.' % (self.__class__.__name__,), - stacklevel=2) - return self.reset_states() - else: - backend.batch_set_value([(v, 0) for v in self.variables]) - - @abc.abstractmethod - def update_state(self, *args, **kwargs): - """Accumulates statistics for the metric. - - Note: This function is executed as a graph function in graph mode. - This means: - a) Operations on the same resource are executed in textual order. - This should make it easier to do things like add the updated - value of a variable to another, for example. - b) You don't need to worry about collecting the update ops to execute. - All update ops added to the graph by this function will be executed. - As a result, code should generally work the same way with graph or - eager execution. + def __init__(self, name=None, dtype=None, **kwargs): + super().__init__(name=name, dtype=dtype, **kwargs) + self.stateful = True # All metric layers are stateful. + self.built = True + if not base_layer_utils.v2_dtype_behavior_enabled(): + # We only do this when the V2 behavior is not enabled, as when it is + # enabled, the dtype already defaults to floatx. + self._dtype = ( + backend.floatx() if dtype is None else tf.as_dtype(dtype).name + ) + + def __new__(cls, *args, **kwargs): + obj = super(Metric, cls).__new__(cls) + + # If `update_state` is not in eager/tf.function and it is not from a + # built-in metric, wrap it in `tf.function`. This is so that users + # writing custom metrics in v1 need not worry about control dependencies + # and return ops. + if base_layer_utils.is_in_eager_or_tf_function() or is_built_in(cls): + obj_update_state = obj.update_state + + def update_state_fn(*args, **kwargs): + control_status = tf.__internal__.autograph.control_status_ctx() + ag_update_state = tf.__internal__.autograph.tf_convert( + obj_update_state, control_status + ) + return ag_update_state(*args, **kwargs) - Args: - *args: - **kwargs: A mini-batch of inputs to the Metric. - """ - raise NotImplementedError('Must be implemented in subclasses.') - - def merge_state(self, metrics): - """Merges the state from one or more metrics. + else: + if isinstance(obj.update_state, tf.__internal__.function.Function): + update_state_fn = obj.update_state + else: + update_state_fn = tf.function(obj.update_state) + + obj.update_state = types.MethodType( + metrics_utils.update_state_wrapper(update_state_fn), obj + ) + + obj_result = obj.result + + def result_fn(*args, **kwargs): + control_status = tf.__internal__.autograph.control_status_ctx() + ag_result = tf.__internal__.autograph.tf_convert( + obj_result, control_status + ) + return ag_result(*args, **kwargs) + + obj.result = types.MethodType( + metrics_utils.result_wrapper(result_fn), obj + ) + + return obj + + def __call__(self, *args, **kwargs): + """Accumulates statistics and then computes metric result value. + + Args: + *args: + **kwargs: A mini-batch of inputs to the Metric, + passed on to `update_state()`. + + Returns: + The metric value tensor. + """ + + def replica_local_fn(*args, **kwargs): + """Updates the state of the metric in a replica-local context.""" + if any( + isinstance(arg, keras_tensor.KerasTensor) + for arg in tf.nest.flatten((args, kwargs)) + ): + update_op = None + else: + update_op = self.update_state(*args, **kwargs) + update_ops = [] + if update_op is not None: + update_ops.append(update_op) + with tf.control_dependencies(update_ops): + result_t = self.result() + + # If the metric object return a dictionary as a result, wrap it + # with our custom dict object so we can attach the metric object + # to it. + if isinstance(result_t, dict): + result_t = _MetricDict(**result_t) + + # We are adding the metric object as metadata on the result + # tensor. This is required when we want to use a metric with + # `add_metric` API on a Model/Layer in graph mode. This metric + # instance will later be used to reset variable state after each + # epoch of training. + # Example: + # model = Model() + # mean = Mean() + # model.add_metric(mean(values), name='mean') + result_t._metric_obj = self + return result_t + + from keras.distribute import ( + distributed_training_utils, + ) + + return distributed_training_utils.call_replica_local_fn( + replica_local_fn, *args, **kwargs + ) + + def __str__(self): + args = ",".join(f"{k}={v}" for k, v in self.get_config().items()) + return f"{self.__class__.__name__}({args})" + + def __deepcopy__(self, memo=None): + try: + new_self = self.from_config(self.get_config()) + except NotImplementedError as e: + raise NotImplementedError( + "Calling `__deepcopy__()` on a Keras metric " + "requires the metric to be serializable, " + "i.e. it should implement `get_config()`.\n\n" + f"Error encountered during serialization: [{e}]" + ) + # Note that metrics don't implement `build()` so their variables + # are readily available after instantiation. + if self.weights: + new_self.set_weights(self.get_weights()) + memo[self] = new_self + return new_self + + @property + def dtype(self): + return self._dtype + + def get_config(self): + """Returns the serializable config of the metric.""" + return {"name": self.name, "dtype": self.dtype} + + def reset_state(self): + """Resets all of the metric state variables. + + This function is called between epochs/steps, + when a metric is evaluated during training. + """ + if not generic_utils.is_default(self.reset_states): + warnings.warn( + "Metric %s implements a `reset_states()` method; rename it " + 'to `reset_state()` (without the final "s"). The name ' + "`reset_states()` has been deprecated to improve API " + "consistency." % (self.__class__.__name__,), + stacklevel=2, + ) + return self.reset_states() + else: + backend.batch_set_value([(v, 0) for v in self.variables]) + + @abc.abstractmethod + def update_state(self, *args, **kwargs): + """Accumulates statistics for the metric. + + Note: This function is executed as a graph function in graph mode. + This means: + a) Operations on the same resource are executed in textual order. + This should make it easier to do things like add the updated + value of a variable to another, for example. + b) You don't need to worry about collecting the update ops to execute. + All update ops added to the graph by this function will be + executed. + As a result, code should generally work the same way with graph or + eager execution. + + Args: + *args: + **kwargs: A mini-batch of inputs to the Metric. + """ + raise NotImplementedError("Must be implemented in subclasses.") + + def merge_state(self, metrics): + """Merges the state from one or more metrics. + + This method can be used by distributed systems to merge the state + computed by different metric instances. Typically the state will be + stored in the form of the metric's weights. For example, a + tf.keras.metrics.Mean metric contains a list of two weight values: a + total and a count. If there were two instances of a + tf.keras.metrics.Accuracy that each independently aggregated partial + state for an overall accuracy calculation, these two metric's states + could be combined as follows: + + >>> m1 = tf.keras.metrics.Accuracy() + >>> _ = m1.update_state([[1], [2]], [[0], [2]]) + + >>> m2 = tf.keras.metrics.Accuracy() + >>> _ = m2.update_state([[3], [4]], [[3], [4]]) + + >>> m2.merge_state([m1]) + >>> m2.result().numpy() + 0.75 + + Args: + metrics: an iterable of metrics. The metrics must have compatible + state. + + Raises: + ValueError: If the provided iterable does not contain metrics matching + the metric's required specifications. + """ + assign_add_ops = [] + for metric in metrics: + if len(self.weights) != len(metric.weights): + raise ValueError( + f"Metric {metric} is not compatible with {self}" + ) + for weight, weight_to_add in zip(self.weights, metric.weights): + assign_add_ops.append(weight.assign_add(weight_to_add)) + return assign_add_ops + + @abc.abstractmethod + def result(self): + """Computes and returns the scalar metric value tensor or a dict of + scalars. + + Result computation is an idempotent operation that simply calculates the + metric value using the state variables. + + Returns: + A scalar tensor, or a dictionary of scalar tensors. + """ + raise NotImplementedError("Must be implemented in subclasses.") + + ### For use by subclasses ### + @doc_controls.for_subclass_implementers + def add_weight( + self, + name, + shape=(), + aggregation=tf.VariableAggregation.SUM, + synchronization=tf.VariableSynchronization.ON_READ, + initializer=None, + dtype=None, + ): + """Adds state variable. Only for use by subclasses.""" + if tf.distribute.has_strategy(): + strategy = tf.distribute.get_strategy() + else: + strategy = None + + additional_kwargs = {} + + # TODO(b/120571621): Make `ON_READ` work with Keras metrics on TPU. + if backend.is_tpu_strategy(strategy): + synchronization = tf.VariableSynchronization.ON_WRITE + if getattr(self, "_mesh", None) is not None: + # When self._mesh is set, it means this metric is used for DTensor. + additional_kwargs = { + "layout": dtensor.Layout.replicated( + self._mesh, tf.TensorShape(shape).rank + ) + } + + if tf_utils.in_local_vars_context(): + # Metrics created within a remotely-executed tf.function during + # parameter server evaluation should use tf2 Variables, so that they + # can be local variables that are freely usable and mutable within + # the function, using the + # `experimental_enable_variable_lifting=False` argument. This + # supports a visitation guarantee for model evaluation. + def local_v2_var_creator( + initializer=None, dtype=None, shape=None, **kwargs + ): + init_val, var_dtype = base_layer_utils.infer_init_val_and_dtype( + initializer, dtype, shape + ) + v1_only_args = ["use_resource", "collections"] + for v1_arg in v1_only_args: + kwargs.pop(v1_arg, None) + kwargs["experimental_enable_variable_lifting"] = False + return tf.Variable( + initial_value=init_val, + dtype=var_dtype, + shape=shape, + **kwargs, + ) + + additional_kwargs["getter"] = local_v2_var_creator + + with tf_utils.maybe_init_scope(layer=self): + return super().add_weight( + name=name, + shape=shape, + dtype=self._dtype if dtype is None else dtype, + trainable=False, + initializer=initializer, + collections=[], + synchronization=synchronization, + aggregation=aggregation, + **additional_kwargs, + ) + + ### End: For use by subclasses ### + + @property + def trainable_weights(self): + # Overridden from Layer class to track submetric weights. + if self.trainable: + trainable_weights = self._trainable_weights + for m in self._metrics: + trainable_weights += m.trainable_weights + return self._dedup_weights(trainable_weights) + else: + return [] + + @property + def non_trainable_weights(self): + # Overridden from Layer class to track submetric weights. + if self.trainable: + non_trainable_weights = self._non_trainable_weights + for m in self._metrics: + non_trainable_weights += m.non_trainable_weights + else: + non_trainable_weights = ( + self._non_trainable_weights + self._trainable_weights + ) + for m in self._metrics: + non_trainable_weights += m.weights + return self._dedup_weights(non_trainable_weights) - This method can be used by distributed systems to merge the state computed - by different metric instances. Typically the state will be stored in the - form of the metric's weights. For example, a tf.keras.metrics.Mean metric - contains a list of two weight values: a total and a count. If there were two - instances of a tf.keras.metrics.Accuracy that each independently aggregated - partial state for an overall accuracy calculation, these two metric's states - could be combined as follows: + @property + def _trackable_saved_model_saver(self): + return metric_serialization.MetricSavedModelSaver(self) - >>> m1 = tf.keras.metrics.Accuracy() - >>> _ = m1.update_state([[1], [2]], [[0], [2]]) + @generic_utils.default + @doc_controls.do_not_generate_docs + def reset_states(self): + # Backwards compatibility alias of `reset_state`. New classes should + # only implement `reset_state`. + return self.reset_state() - >>> m2 = tf.keras.metrics.Accuracy() - >>> _ = m2.update_state([[3], [4]], [[3], [4]]) - >>> m2.merge_state([m1]) - >>> m2.result().numpy() - 0.75 +class Reduce(Metric): + """Encapsulates metrics that perform a reduce operation on the values. Args: - metrics: an iterable of metrics. The metrics must have compatible state. - - Raises: - ValueError: If the provided iterable does not contain metrics matching the - metric's required specifications. - """ - assign_add_ops = [] - for metric in metrics: - if len(self.weights) != len(metric.weights): - raise ValueError(f'Metric {metric} is not compatible with {self}') - for weight, weight_to_add in zip(self.weights, metric.weights): - assign_add_ops.append(weight.assign_add(weight_to_add)) - return assign_add_ops - - @abc.abstractmethod - def result(self): - """Computes and returns the scalar metric value tensor or a dict of scalars. - - Result computation is an idempotent operation that simply calculates the - metric value using the state variables. - - Returns: - A scalar tensor, or a dictionary of scalar tensors. + reduction: a `tf.keras.metrics.Reduction` enum value. + name: string name of the metric instance. + dtype: (Optional) data type of the metric result. """ - raise NotImplementedError('Must be implemented in subclasses.') - - ### For use by subclasses ### - @doc_controls.for_subclass_implementers - def add_weight( - self, - name, - shape=(), - aggregation=tf.VariableAggregation.SUM, - synchronization=tf.VariableSynchronization.ON_READ, - initializer=None, - dtype=None): - """Adds state variable. Only for use by subclasses.""" - if tf.distribute.has_strategy(): - strategy = tf.distribute.get_strategy() - else: - strategy = None - - # TODO(b/120571621): Make `ON_READ` work with Keras metrics on TPU. - if backend.is_tpu_strategy(strategy): - synchronization = tf.VariableSynchronization.ON_WRITE - if getattr(self, '_mesh', None) is not None: - # When self._mesh is set, it means this metric is used for DTensor. - additional_kwargs = { - 'layout': dtensor.Layout.replicated(self._mesh, - tf.TensorShape(shape).rank)} - else: - additional_kwargs = {} - - with tf.init_scope(): - return super().add_weight( - name=name, - shape=shape, - dtype=self._dtype if dtype is None else dtype, - trainable=False, - initializer=initializer, - collections=[], - synchronization=synchronization, - aggregation=aggregation, - **additional_kwargs) - - ### End: For use by subclasses ### - - @property - def trainable_weights(self): - # Overridden from Layer class to track submetric weights. - if self.trainable: - trainable_weights = self._trainable_weights - for m in self._metrics: - trainable_weights += m.trainable_weights - return self._dedup_weights(trainable_weights) - else: - return [] - - @property - def non_trainable_weights(self): - # Overridden from Layer class to track submetric weights. - if self.trainable: - non_trainable_weights = self._non_trainable_weights - for m in self._metrics: - non_trainable_weights += m.non_trainable_weights - else: - non_trainable_weights = ( - self._non_trainable_weights + self._trainable_weights) - for m in self._metrics: - non_trainable_weights += m.weights - return self._dedup_weights(non_trainable_weights) - - @property - def _trackable_saved_model_saver(self): - return metric_serialization.MetricSavedModelSaver(self) - - @generic_utils.default - @doc_controls.do_not_generate_docs - def reset_states(self): - # Backwards compatibility alias of `reset_state`. New classes should - # only implement `reset_state`. - return self.reset_state() - -class Reduce(Metric): - """Encapsulates metrics that perform a reduce operation on the values. - - Args: - reduction: a `tf.keras.metrics.Reduction` enum value. - name: string name of the metric instance. - dtype: (Optional) data type of the metric result. - """ - - def __init__(self, reduction, name, dtype=None): - super().__init__(name=name, dtype=dtype) - self.reduction = reduction - self.total = self.add_weight( - 'total', initializer='zeros') - if reduction in [metrics_utils.Reduction.SUM_OVER_BATCH_SIZE, - metrics_utils.Reduction.WEIGHTED_MEAN]: - self.count = self.add_weight( - 'count', initializer='zeros') - - def update_state(self, values, sample_weight=None): - """Accumulates statistics for computing the metric. + def __init__(self, reduction, name, dtype=None): + super().__init__(name=name, dtype=dtype) + self.reduction = reduction + self.total = self.add_weight("total", initializer="zeros") + if reduction in [ + metrics_utils.Reduction.SUM_OVER_BATCH_SIZE, + metrics_utils.Reduction.WEIGHTED_MEAN, + ]: + self.count = self.add_weight("count", initializer="zeros") + + def update_state(self, values, sample_weight=None): + """Accumulates statistics for computing the metric. + + Args: + values: Per-example value. + sample_weight: Optional weighting of each example. Defaults to `1`. + + Returns: + Update op. + """ + [ + values + ], sample_weight = metrics_utils.ragged_assert_compatible_and_get_flat_values( # noqa: E501 + [values], sample_weight + ) + try: + values = tf.cast(values, self._dtype) + except (ValueError, TypeError): + msg = ( + "The output of a metric function can only be a single Tensor. " + f"Received: {values}. " + ) + if isinstance(values, dict): + msg += ( + "To return a dict of values, implement a custom Metric " + "subclass." + ) + raise RuntimeError(msg) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self._dtype) + # Update dimensions of weights to match with values if possible. + ( + values, + _, + sample_weight, + ) = losses_utils.squeeze_or_expand_dimensions( + values, sample_weight=sample_weight + ) + try: + # Broadcast weights if possible. + sample_weight = tf.__internal__.ops.broadcast_weights( + sample_weight, values + ) + except ValueError: + # Reduce values to same ndim as weight array + ndim = backend.ndim(values) + weight_ndim = backend.ndim(sample_weight) + if self.reduction == metrics_utils.Reduction.SUM: + values = tf.reduce_sum( + values, axis=list(range(weight_ndim, ndim)) + ) + else: + values = tf.reduce_mean( + values, axis=list(range(weight_ndim, ndim)) + ) + values = tf.multiply(values, sample_weight) + + value_sum = tf.reduce_sum(values) + with tf.control_dependencies([value_sum]): + update_total_op = self.total.assign_add(value_sum) + + # Exit early if the reduction doesn't have a denominator. + if self.reduction == metrics_utils.Reduction.SUM: + return update_total_op + + # Update `count` for reductions that require a denominator. + if self.reduction == metrics_utils.Reduction.SUM_OVER_BATCH_SIZE: + num_values = tf.cast(tf.size(values), self._dtype) + elif self.reduction == metrics_utils.Reduction.WEIGHTED_MEAN: + if sample_weight is None: + num_values = tf.cast(tf.size(values), self._dtype) + else: + num_values = tf.reduce_sum(sample_weight) + else: + raise NotImplementedError( + f'Reduction "{self.reduction}" not implemented. Expected ' + '"sum", "weighted_mean", or "sum_over_batch_size".' + ) - Args: - values: Per-example value. - sample_weight: Optional weighting of each example. Defaults to 1. + with tf.control_dependencies([update_total_op]): + return self.count.assign_add(num_values) - Returns: - Update op. - """ - [values], sample_weight = \ - metrics_utils.ragged_assert_compatible_and_get_flat_values( - [values], sample_weight) - try: - values = tf.cast(values, self._dtype) - except (ValueError, TypeError): - msg = ('The output of a metric function can only be a single Tensor. ' - f'Received: {values}. ') - if isinstance(values, dict): - msg += ('To return a dict of values, implement a custom Metric ' - 'subclass.') - raise RuntimeError(msg) - if sample_weight is not None: - sample_weight = tf.cast(sample_weight, self._dtype) - # Update dimensions of weights to match with values if possible. - values, _, sample_weight = losses_utils.squeeze_or_expand_dimensions( - values, sample_weight=sample_weight) - try: - # Broadcast weights if possible. - sample_weight = tf.__internal__.ops.broadcast_weights( - sample_weight, values) - except ValueError: - # Reduce values to same ndim as weight array - ndim = backend.ndim(values) - weight_ndim = backend.ndim(sample_weight) + def result(self): if self.reduction == metrics_utils.Reduction.SUM: - values = tf.reduce_sum( - values, axis=list(range(weight_ndim, ndim))) + return tf.identity(self.total) + elif self.reduction in [ + metrics_utils.Reduction.WEIGHTED_MEAN, + metrics_utils.Reduction.SUM_OVER_BATCH_SIZE, + ]: + return tf.math.divide_no_nan(self.total, self.count) else: - values = tf.reduce_mean( - values, axis=list(range(weight_ndim, ndim))) - values = tf.multiply(values, sample_weight) - - value_sum = tf.reduce_sum(values) - with tf.control_dependencies([value_sum]): - update_total_op = self.total.assign_add(value_sum) - - # Exit early if the reduction doesn't have a denominator. - if self.reduction == metrics_utils.Reduction.SUM: - return update_total_op - - # Update `count` for reductions that require a denominator. - if self.reduction == metrics_utils.Reduction.SUM_OVER_BATCH_SIZE: - num_values = tf.cast(tf.size(values), self._dtype) - elif self.reduction == metrics_utils.Reduction.WEIGHTED_MEAN: - if sample_weight is None: - num_values = tf.cast(tf.size(values), self._dtype) - else: - num_values = tf.reduce_sum(sample_weight) - else: - raise NotImplementedError( - f'Reduction "{self.reduction}" not implemented. Expected ' - '"sum", "weighted_mean", or "sum_over_batch_size".') - - with tf.control_dependencies([update_total_op]): - return self.count.assign_add(num_values) - - def result(self): - if self.reduction == metrics_utils.Reduction.SUM: - return tf.identity(self.total) - elif self.reduction in [ - metrics_utils.Reduction.WEIGHTED_MEAN, - metrics_utils.Reduction.SUM_OVER_BATCH_SIZE - ]: - return tf.math.divide_no_nan(self.total, self.count) - else: - raise NotImplementedError( - f'Reduction "{self.reduction}" not implemented. Expected ' - '"sum", "weighted_mean", or "sum_over_batch_size".') - - -@keras_export('keras.metrics.Sum') + raise NotImplementedError( + f'Reduction "{self.reduction}" not implemented. Expected ' + '"sum", "weighted_mean", or "sum_over_batch_size".' + ) + + +@keras_export("keras.metrics.Sum") class Sum(Reduce): - """Computes the (weighted) sum of the given values. + """Computes the (weighted) sum of the given values. - For example, if values is [1, 3, 5, 7] then the sum is 16. - If the weights were specified as [1, 1, 0, 0] then the sum would be 4. + For example, if values is [1, 3, 5, 7] then the sum is 16. + If the weights were specified as [1, 1, 0, 0] then the sum would be 4. - This metric creates one variable, `total`, that is used to compute the sum of - `values`. This is ultimately returned as `sum`. + This metric creates one variable, `total`, that is used to compute the sum + of `values`. This is ultimately returned as `sum`. - If `sample_weight` is `None`, weights default to 1. Use `sample_weight` of 0 - to mask values. + If `sample_weight` is `None`, weights default to 1. Use `sample_weight` of + 0 to mask values. - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. - Standalone usage: + Standalone usage: - >>> m = tf.keras.metrics.Sum() - >>> m.update_state([1, 3, 5, 7]) - >>> m.result().numpy() - 16.0 + >>> m = tf.keras.metrics.Sum() + >>> m.update_state([1, 3, 5, 7]) + >>> m.result().numpy() + 16.0 - Usage with `compile()` API: + Usage with `compile()` API: - ```python - model.add_metric(tf.keras.metrics.Sum(name='sum_1')(outputs)) - model.compile(optimizer='sgd', loss='mse') - ``` - """ + ```python + model.add_metric(tf.keras.metrics.Sum(name='sum_1')(outputs)) + model.compile(optimizer='sgd', loss='mse') + ``` + """ - @dtensor_utils.inject_mesh - def __init__(self, name='sum', dtype=None): - super().__init__(reduction=metrics_utils.Reduction.SUM, - name=name, dtype=dtype) + @dtensor_utils.inject_mesh + def __init__(self, name="sum", dtype=None): + super().__init__( + reduction=metrics_utils.Reduction.SUM, name=name, dtype=dtype + ) -@keras_export('keras.metrics.Mean') +@keras_export("keras.metrics.Mean") class Mean(Reduce): - """Computes the (weighted) mean of the given values. + """Computes the (weighted) mean of the given values. - For example, if values is [1, 3, 5, 7] then the mean is 4. - If the weights were specified as [1, 1, 0, 0] then the mean would be 2. + For example, if values is [1, 3, 5, 7] then the mean is 4. + If the weights were specified as [1, 1, 0, 0] then the mean would be 2. - This metric creates two variables, `total` and `count` that are used to - compute the average of `values`. This average is ultimately returned as `mean` - which is an idempotent operation that simply divides `total` by `count`. + This metric creates two variables, `total` and `count` that are used to + compute the average of `values`. This average is ultimately returned as + `mean` which is an idempotent operation that simply divides `total` by + `count`. - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. - Standalone usage: + Standalone usage: - >>> m = tf.keras.metrics.Mean() - >>> m.update_state([1, 3, 5, 7]) - >>> m.result().numpy() - 4.0 - >>> m.reset_state() - >>> m.update_state([1, 3, 5, 7], sample_weight=[1, 1, 0, 0]) - >>> m.result().numpy() - 2.0 + >>> m = tf.keras.metrics.Mean() + >>> m.update_state([1, 3, 5, 7]) + >>> m.result().numpy() + 4.0 + >>> m.reset_state() + >>> m.update_state([1, 3, 5, 7], sample_weight=[1, 1, 0, 0]) + >>> m.result().numpy() + 2.0 - Usage with `compile()` API: + Usage with `compile()` API: - ```python - model.add_metric(tf.keras.metrics.Mean(name='mean_1')(outputs)) - model.compile(optimizer='sgd', loss='mse') - ``` - """ + ```python + model.add_metric(tf.keras.metrics.Mean(name='mean_1')(outputs)) + model.compile(optimizer='sgd', loss='mse') + ``` + """ - @dtensor_utils.inject_mesh - def __init__(self, name='mean', dtype=None): - super().__init__( - reduction=metrics_utils.Reduction.WEIGHTED_MEAN, name=name, dtype=dtype) + @dtensor_utils.inject_mesh + def __init__(self, name="mean", dtype=None): + super().__init__( + reduction=metrics_utils.Reduction.WEIGHTED_MEAN, + name=name, + dtype=dtype, + ) -@keras_export('keras.metrics.MeanMetricWrapper') +@keras_export("keras.metrics.MeanMetricWrapper") class MeanMetricWrapper(Mean): - """Wraps a stateless metric function with the Mean metric. + """Wraps a stateless metric function with the Mean metric. - You could use this class to quickly build a mean metric from a function. The - function needs to have the signature `fn(y_true, y_pred)` and return a - per-sample loss array. `MeanMetricWrapper.result()` will return - the average metric value across all samples seen so far. + You could use this class to quickly build a mean metric from a function. The + function needs to have the signature `fn(y_true, y_pred)` and return a + per-sample loss array. `MeanMetricWrapper.result()` will return + the average metric value across all samples seen so far. - For example: + For example: - ```python - def accuracy(y_true, y_pred): - return tf.cast(tf.math.equal(y_true, y_pred), tf.float32) + ```python + def accuracy(y_true, y_pred): + return tf.cast(tf.math.equal(y_true, y_pred), tf.float32) - accuracy_metric = tf.keras.metrics.MeanMetricWrapper(fn=accuracy) + accuracy_metric = tf.keras.metrics.MeanMetricWrapper(fn=accuracy) - keras_model.compile(..., metrics=accuracy_metric) - ``` + keras_model.compile(..., metrics=accuracy_metric) + ``` - Args: - fn: The metric function to wrap, with signature `fn(y_true, y_pred, - **kwargs)`. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - **kwargs: Keyword arguments to pass on to `fn`. - """ + Args: + fn: The metric function to wrap, with signature `fn(y_true, y_pred, + **kwargs)`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + **kwargs: Keyword arguments to pass on to `fn`. + """ - @dtensor_utils.inject_mesh - def __init__(self, fn, name=None, dtype=None, **kwargs): - super().__init__(name=name, dtype=dtype) - self._fn = fn - self._fn_kwargs = kwargs + @dtensor_utils.inject_mesh + def __init__(self, fn, name=None, dtype=None, **kwargs): + super().__init__(name=name, dtype=dtype) + self._fn = fn + self._fn_kwargs = kwargs - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates metric statistics. + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates metric statistics. + + `y_true` and `y_pred` should have the same shape. + + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + sample_weight: Optional `sample_weight` acts as a + coefficient for the metric. If a scalar is provided, then the metric + is simply scaled by the given value. If `sample_weight` is a tensor + of size `[batch_size]`, then the metric for each sample of the batch + is rescaled by the corresponding element in the `sample_weight` + vector. If the shape of `sample_weight` is `[batch_size, d0, .. + dN-1]` (or can be broadcasted to this shape), then each metric + element of `y_pred` is scaled by the corresponding value of + `sample_weight`. (Note on `dN-1`: all metric functions reduce by 1 + dimension, usually the last axis (-1)). + + Returns: + Update op. + """ + y_true = tf.cast(y_true, self._dtype) + y_pred = tf.cast(y_pred, self._dtype) + [ + y_true, + y_pred, + ], sample_weight = metrics_utils.ragged_assert_compatible_and_get_flat_values( # noqa: E501 + [y_true, y_pred], sample_weight + ) + y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true + ) + + ag_fn = tf.__internal__.autograph.tf_convert( + self._fn, tf.__internal__.autograph.control_status_ctx() + ) + matches = ag_fn(y_true, y_pred, **self._fn_kwargs) + mask = losses_utils.get_mask(matches) + sample_weight = losses_utils.apply_valid_mask( + matches, sample_weight, mask, self.reduction + ) + return super().update_state(matches, sample_weight=sample_weight) + + def get_config(self): + config = { + k: backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v + for k, v in self._fn_kwargs.items() + } + + if type(self) is MeanMetricWrapper: + # Only include function argument when the object is a + # MeanMetricWrapper and not a subclass. + config["fn"] = self._fn + + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + from keras.metrics import get + + # Note that while MeanMetricWrapper itself isn't public, objects of this + # class may be created and added to the model by calling model.compile. + fn = config.pop("fn", None) + if cls is MeanMetricWrapper: + return cls(get(fn), **config) + return super(MeanMetricWrapper, cls).from_config(config) + + +@keras_export("keras.metrics.MeanTensor") +class MeanTensor(Metric): + """Computes the element-wise (weighted) mean of the given tensors. - `y_true` and `y_pred` should have the same shape. + `MeanTensor` returns a tensor with the same shape of the input tensors. The + mean value is updated by keeping local variables `total` and `count`. The + `total` tracks the sum of the weighted values, and `count` stores the sum of + the weighted counts. Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - sample_weight: Optional `sample_weight` acts as a - coefficient for the metric. If a scalar is provided, then the metric is - simply scaled by the given value. If `sample_weight` is a tensor of size - `[batch_size]`, then the metric for each sample of the batch is rescaled - by the corresponding element in the `sample_weight` vector. If the shape - of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted - to this shape), then each metric element of `y_pred` is scaled by the - corresponding value of `sample_weight`. (Note on `dN-1`: all metric - functions reduce by 1 dimension, usually the last axis (-1)). - - Returns: - Update op. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + shape: (Optional) A list of integers, a tuple of integers, or a 1-D Tensor + of type int32. If not specified, the shape is inferred from the values + at the first call of update_state. + + Standalone usage: + + >>> m = tf.keras.metrics.MeanTensor() + >>> m.update_state([0, 1, 2, 3]) + >>> m.update_state([4, 5, 6, 7]) + >>> m.result().numpy() + array([2., 3., 4., 5.], dtype=float32) + + >>> m.update_state([12, 10, 8, 6], sample_weight= [0, 0.2, 0.5, 1]) + >>> m.result().numpy() + array([2. , 3.6363635, 4.8 , 5.3333335], dtype=float32) + + >>> m = tf.keras.metrics.MeanTensor(dtype=tf.float64, shape=(1, 4)) + >>> m.result().numpy() + array([[0., 0., 0., 0.]]) + >>> m.update_state([[0, 1, 2, 3]]) + >>> m.update_state([[4, 5, 6, 7]]) + >>> m.result().numpy() + array([[2., 3., 4., 5.]]) """ - y_true = tf.cast(y_true, self._dtype) - y_pred = tf.cast(y_pred, self._dtype) - [y_true, y_pred], sample_weight = ( - metrics_utils.ragged_assert_compatible_and_get_flat_values( - [y_true, y_pred], sample_weight)) - y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( - y_pred, y_true) - - ag_fn = tf.__internal__.autograph.tf_convert(self._fn, tf.__internal__.autograph.control_status_ctx()) - matches = ag_fn(y_true, y_pred, **self._fn_kwargs) - return super().update_state( - matches, sample_weight=sample_weight) - - def get_config(self): - config = {} - - if type(self) is MeanMetricWrapper: # pylint: disable=unidiomatic-typecheck - # Only include function argument when the object is a MeanMetricWrapper - # and not a subclass. - config['fn'] = self._fn - - for k, v in self._fn_kwargs.items(): - config[k] = backend.eval(v) if is_tensor_or_variable(v) else v - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - from keras.metrics import get # pylint: disable=g-import-not-at-top - # Note that while MeanMetricWrapper itself isn't public, objects of this - # class may be created and added to the model by calling model.compile. - fn = config.pop('fn', None) - if cls is MeanMetricWrapper: - return cls(get(fn), **config) - return super(MeanMetricWrapper, cls).from_config(config) - - -@keras_export('keras.metrics.MeanTensor') -class MeanTensor(Metric): - """Computes the element-wise (weighted) mean of the given tensors. - - `MeanTensor` returns a tensor with the same shape of the input tensors. The - mean value is updated by keeping local variables `total` and `count`. The - `total` tracks the sum of the weighted values, and `count` stores the sum of - the weighted counts. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - shape: (Optional) A list of integers, a tuple of integers, or a 1-D Tensor - of type int32. If not specified, the shape is inferred from the values at - the first call of update_state. - - Standalone usage: - - >>> m = tf.keras.metrics.MeanTensor() - >>> m.update_state([0, 1, 2, 3]) - >>> m.update_state([4, 5, 6, 7]) - >>> m.result().numpy() - array([2., 3., 4., 5.], dtype=float32) - - >>> m.update_state([12, 10, 8, 6], sample_weight= [0, 0.2, 0.5, 1]) - >>> m.result().numpy() - array([2. , 3.6363635, 4.8 , 5.3333335], dtype=float32) - - >>> m = tf.keras.metrics.MeanTensor(dtype=tf.float64, shape=(1, 4)) - >>> m.result().numpy() - array([[0., 0., 0., 0.]]) - >>> m.update_state([[0, 1, 2, 3]]) - >>> m.update_state([[4, 5, 6, 7]]) - >>> m.result().numpy() - array([[2., 3., 4., 5.]]) - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='mean_tensor', dtype=None, shape=None): - super().__init__(name=name, dtype=dtype) - self._shape = None - self._total = None - self._count = None - self._built = False - if shape is not None: - self._build(shape) - - def _build(self, shape): - self._shape = tf.TensorShape(shape) - self._build_input_shape = self._shape - # Create new state variables - self._total = self.add_weight( - name='total', shape=shape, initializer='zeros') - self._count = self.add_weight( - name='count', shape=shape, initializer='zeros') - with tf.init_scope(): - if not tf.executing_eagerly(): - backend._initialize_variables(backend._get_session()) # pylint: disable=protected-access - self._built = True - - @property - def total(self): - return self._total if self._built else None - - @property - def count(self): - return self._count if self._built else None - - def update_state(self, values, sample_weight=None): - """Accumulates statistics for computing the element-wise mean. - Args: - values: Per-example value. - sample_weight: Optional weighting of each example. Defaults to 1. + @dtensor_utils.inject_mesh + def __init__(self, name="mean_tensor", dtype=None, shape=None): + super().__init__(name=name, dtype=dtype) + self._shape = None + self._total = None + self._count = None + self._built = False + if shape is not None: + self._build(shape) + + def _build(self, shape): + self._shape = tf.TensorShape(shape) + self._build_input_shape = self._shape + # Create new state variables + self._total = self.add_weight( + name="total", shape=shape, initializer="zeros" + ) + self._count = self.add_weight( + name="count", shape=shape, initializer="zeros" + ) + with tf.init_scope(): + if not tf.executing_eagerly(): + backend._initialize_variables(backend._get_session()) + self._built = True + + @property + def total(self): + return self._total if self._built else None + + @property + def count(self): + return self._count if self._built else None + + def update_state(self, values, sample_weight=None): + """Accumulates statistics for computing the element-wise mean. + + Args: + values: Per-example value. + sample_weight: Optional weighting of each example. Defaults to `1`. + + Returns: + Update op. + """ + values = tf.cast(values, self._dtype) + if not self._built: + self._build(values.shape) + elif values.shape != self._shape: + raise ValueError( + "MeanTensor input values must always have the same " + "shape. Expected shape (set during the first call): " + f"{self._shape}. " + f"Got: {values.shape}." + ) + + num_values = tf.ones_like(values) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self._dtype) + + # Update dimensions of weights to match with values if possible. + ( + values, + _, + sample_weight, + ) = losses_utils.squeeze_or_expand_dimensions( + values, sample_weight=sample_weight + ) + try: + # Broadcast weights if possible. + sample_weight = tf.__internal__.ops.broadcast_weights( + sample_weight, values + ) + except ValueError: + # Reduce values to same ndim as weight array + ndim = backend.ndim(values) + weight_ndim = backend.ndim(sample_weight) + values = tf.reduce_mean( + values, axis=list(range(weight_ndim, ndim)) + ) + + num_values = tf.multiply(num_values, sample_weight) + values = tf.multiply(values, sample_weight) + + update_total_op = self._total.assign_add(values) + with tf.control_dependencies([update_total_op]): + return self._count.assign_add(num_values) - Returns: - Update op. - """ - values = tf.cast(values, self._dtype) - if not self._built: - self._build(values.shape) - elif values.shape != self._shape: - raise ValueError( - 'MeanTensor input values must always have the same ' - f'shape. Expected shape (set during the first call): {self._shape}. ' - f'Got: {values.shape}.') - - num_values = tf.ones_like(values) - if sample_weight is not None: - sample_weight = tf.cast(sample_weight, self._dtype) - - # Update dimensions of weights to match with values if possible. - values, _, sample_weight = losses_utils.squeeze_or_expand_dimensions( - values, sample_weight=sample_weight) - try: - # Broadcast weights if possible. - sample_weight = tf.__internal__.ops.broadcast_weights( - sample_weight, values) - except ValueError: - # Reduce values to same ndim as weight array - ndim = backend.ndim(values) - weight_ndim = backend.ndim(sample_weight) - values = tf.reduce_mean( - values, axis=list(range(weight_ndim, ndim))) - - num_values = tf.multiply(num_values, sample_weight) - values = tf.multiply(values, sample_weight) - - update_total_op = self._total.assign_add(values) - with tf.control_dependencies([update_total_op]): - return self._count.assign_add(num_values) - - def result(self): - if not self._built: - raise ValueError( - 'MeanTensor does not have any value yet. Please call the MeanTensor ' - 'instance or use `.update_state(value)` before retrieving the result.' - ) - return tf.math.divide_no_nan(self.total, self.count) - - def reset_state(self): - if self._built: - backend.batch_set_value([ - (v, np.zeros(v.shape.as_list())) for v in self.variables - ]) + def result(self): + if not self._built: + raise ValueError( + "MeanTensor does not have any value yet. Please call the " + "MeanTensor instance or use `.update_state(value)` " + "before retrieving the result." + ) + return tf.math.divide_no_nan(self.total, self.count) + + def reset_state(self): + if self._built: + backend.batch_set_value( + [(v, np.zeros(v.shape.as_list())) for v in self.variables] + ) class SumOverBatchSize(Reduce): - """Computes the weighted sum over batch size of the given values. + """Computes the weighted sum over batch size of the given values. - For example, if values is [1, 3, 5, 7] then the metric value is 4. - If the weights were specified as [1, 1, 0, 0] then the value would be 1. + For example, if values is [1, 3, 5, 7] then the metric value is 4. + If the weights were specified as [1, 1, 0, 0] then the value would be 1. - This metric creates two variables, `total` and `count` that are used to - compute the average of `values`. This average is ultimately returned as sum - over batch size which is an idempotent operation that simply divides `total` - by `count`. + This metric creates two variables, `total` and `count` that are used to + compute the average of `values`. This average is ultimately returned as sum + over batch size which is an idempotent operation that simply divides `total` + by `count`. - If `sample_weight` is `None`, weights default to 1. Use `sample_weight` of 0 - to mask values. - """ + If `sample_weight` is `None`, weights default to 1. Use `sample_weight` of + 0 to mask values. + """ - def __init__(self, name='sum_over_batch_size', dtype=None): - super().__init__( - reduction=metrics_utils.Reduction.SUM_OVER_BATCH_SIZE, - name=name, - dtype=dtype) + def __init__(self, name="sum_over_batch_size", dtype=None): + super().__init__( + reduction=metrics_utils.Reduction.SUM_OVER_BATCH_SIZE, + name=name, + dtype=dtype, + ) class SumOverBatchSizeMetricWrapper(SumOverBatchSize): - """Wraps a function with the `SumOverBatchSizeMetricWrapper` metric.""" - - def __init__(self, fn, name=None, dtype=None, **kwargs): - """Creates a `SumOverBatchSizeMetricWrapper` instance. - - Args: - fn: The metric function to wrap, with signature `fn(y_true, y_pred, - **kwargs)`. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - **kwargs: The keyword arguments that are passed on to `fn`. - """ - super().__init__(name=name, dtype=dtype) - self._fn = fn - self._fn_kwargs = kwargs - - def update_state(self, y_true, y_pred, sample_weight=None): - y_true = tf.cast(y_true, self._dtype) - y_pred = tf.cast(y_pred, self._dtype) - y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( - y_pred, y_true) - - ag_fn = tf.__internal__.autograph.tf_convert(self._fn, tf.__internal__.autograph.control_status_ctx()) - matches = ag_fn(y_true, y_pred, **self._fn_kwargs) - return super().update_state( - matches, sample_weight=sample_weight) + """Wraps a function with the `SumOverBatchSizeMetricWrapper` metric.""" + + def __init__(self, fn, name=None, dtype=None, **kwargs): + """Creates a `SumOverBatchSizeMetricWrapper` instance. + + Args: + fn: The metric function to wrap, with signature `fn(y_true, y_pred, + **kwargs)`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + **kwargs: The keyword arguments that are passed on to `fn`. + """ + super().__init__(name=name, dtype=dtype) + self._fn = fn + self._fn_kwargs = kwargs - def get_config(self): - config = {} - for k, v in self._fn_kwargs.items(): - config[k] = backend.eval(v) if is_tensor_or_variable(v) else v - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, self._dtype) + y_pred = tf.cast(y_pred, self._dtype) + y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true + ) + + ag_fn = tf.__internal__.autograph.tf_convert( + self._fn, tf.__internal__.autograph.control_status_ctx() + ) + matches = ag_fn(y_true, y_pred, **self._fn_kwargs) + mask = losses_utils.get_mask(matches) + sample_weight = losses_utils.apply_valid_mask( + matches, sample_weight, mask, self.reduction + ) + return super().update_state(matches, sample_weight=sample_weight) + + def get_config(self): + config = { + k: backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v + for k, v in self._fn_kwargs.items() + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) def clone_metric(metric): - """Returns a clone of the metric if stateful, otherwise returns it as is.""" - if isinstance(metric, Metric): - with tf.init_scope(): - return metric.__class__.from_config(metric.get_config()) - return metric + """Returns a clone of the metric if stateful, otherwise returns it as is.""" + if isinstance(metric, Metric): + # Metrics created within a remotely-executed tf.function during + # parameter server evaluation should not be lifted out of the graph by + # `init_scope`. This way the metric variables can be local: freely + # usable and mutable within the function. This supports a visitation + # guarantee for model evaluation. + if tf_utils.in_local_vars_context(): + return metric.__class__.from_config(metric.get_config()) + else: + with tf.init_scope(): + return metric.__class__.from_config(metric.get_config()) + return metric def clone_metrics(metrics): - """Clones the given metric list/dict.""" - return tf.nest.map_structure(clone_metric, metrics) + """Clones the given metric list/dict.""" + return tf.nest.map_structure(clone_metric, metrics) def is_built_in(cls): - return cls.__module__.startswith('.'.join(Metric.__module__.split('.')[:-1])) + return cls.__module__.startswith( + ".".join(Metric.__module__.split(".")[:-1]) + ) + + +class _MetricDict(dict): + """Wrapper for returned dictionary of metrics.""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._metric_obj = None diff --git a/keras/metrics/base_metric_test.py b/keras/metrics/base_metric_test.py index 11ba02d0f3ca..d7287179f89f 100644 --- a/keras/metrics/base_metric_test.py +++ b/keras/metrics/base_metric_test.py @@ -17,727 +17,802 @@ import copy import os +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + +from keras import Model from keras import layers from keras import metrics -from keras import Model from keras.engine import base_layer from keras.engine import training as training_module from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class KerasSumTest(tf.test.TestCase, parameterized.TestCase): - - def test_sum(self): - with self.test_session(): - m = metrics.Sum(name='my_sum') - - # check config - self.assertEqual(m.name, 'my_sum') - self.assertTrue(m.stateful) - self.assertEqual(m.dtype, tf.float32) - self.assertLen(m.variables, 1) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # check initial state - self.assertEqual(self.evaluate(m.total), 0) - - # check __call__() - self.assertEqual(self.evaluate(m(100)), 100) - self.assertEqual(self.evaluate(m.total), 100) - - # check update_state() and result() + state accumulation + tensor input - update_op = m.update_state(tf.convert_to_tensor([1, 5])) - self.evaluate(update_op) - self.assertAlmostEqual(self.evaluate(m.result()), 106) - self.assertEqual(self.evaluate(m.total), 106) # 100 + 1 + 5 - - # check reset_state() - m.reset_state() - self.assertEqual(self.evaluate(m.total), 0) - - def test_sum_with_sample_weight(self): - m = metrics.Sum(dtype=tf.float64) - self.assertEqual(m.dtype, tf.float64) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # check scalar weight - result_t = m(100, sample_weight=0.5) - self.assertEqual(self.evaluate(result_t), 50) - self.assertEqual(self.evaluate(m.total), 50) - - # check weights not scalar and weights rank matches values rank - result_t = m([1, 5], sample_weight=[1, 0.2]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 52., 4) # 50 + 1 + 5 * 0.2 - self.assertAlmostEqual(self.evaluate(m.total), 52., 4) - - # check weights broadcast - result_t = m([1, 2], sample_weight=0.5) - self.assertAlmostEqual(self.evaluate(result_t), 53.5, 1) # 52 + 0.5 + 1 - self.assertAlmostEqual(self.evaluate(m.total), 53.5, 1) - - # check weights squeeze - result_t = m([1, 5], sample_weight=[[1], [0.2]]) - self.assertAlmostEqual(self.evaluate(result_t), 55.5, 1) # 53.5 + 1 + 1 - self.assertAlmostEqual(self.evaluate(m.total), 55.5, 1) - - # check weights expand - result_t = m([[1], [5]], sample_weight=[1, 0.2]) - self.assertAlmostEqual(self.evaluate(result_t), 57.5, 2) # 55.5 + 1 + 1 - self.assertAlmostEqual(self.evaluate(m.total), 57.5, 1) - - # check values reduced to the dimensions of weight - result_t = m([[[1., 2.], [3., 2.], [0.5, 4.]]], sample_weight=[0.5]) - result = np.round(self.evaluate(result_t), decimals=2) - # result = (prev: 57.5) + 0.5 + 1 + 1.5 + 1 + 0.25 + 2 - self.assertAlmostEqual(result, 63.75, 2) - self.assertAlmostEqual(self.evaluate(m.total), 63.75, 2) - - def test_sum_graph_with_placeholder(self): - with tf.compat.v1.get_default_graph().as_default(), self.cached_session() as sess: - m = metrics.Sum() - v = tf.compat.v1.placeholder(tf.float32) - w = tf.compat.v1.placeholder(tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # check __call__() - result_t = m(v, sample_weight=w) - result = sess.run(result_t, feed_dict=({v: 100, w: 0.5})) - self.assertEqual(result, 50) - self.assertEqual(self.evaluate(m.total), 50) - - # check update_state() and result() - result = sess.run(result_t, feed_dict=({v: [1, 5], w: [1, 0.2]})) - self.assertAlmostEqual(result, 52., 2) # 50 + 1 + 5 * 0.2 - self.assertAlmostEqual(self.evaluate(m.total), 52., 2) - - def test_save_restore(self): - with self.test_session(): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') - m = metrics.Sum() - checkpoint = tf.train.Checkpoint(sum=m) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # update state - self.evaluate(m(100.)) - self.evaluate(m(200.)) - - # save checkpoint and then add an update - save_path = checkpoint.save(checkpoint_prefix) - self.evaluate(m(1000.)) - - # restore to the same checkpoint sum object (= 300) - checkpoint.restore(save_path).assert_consumed().run_restore_ops() - self.evaluate(m(300.)) - self.assertEqual(600., self.evaluate(m.result())) - - # restore to a different checkpoint sum object - restore_sum = metrics.Sum() - restore_checkpoint = tf.train.Checkpoint(sum=restore_sum) - status = restore_checkpoint.restore(save_path) - restore_update = restore_sum(300.) - status.assert_consumed().run_restore_ops() - self.evaluate(restore_update) - self.assertEqual(600., self.evaluate(restore_sum.result())) + def test_sum(self): + with self.test_session(): + m = metrics.Sum(name="my_sum") + + # check config + self.assertEqual(m.name, "my_sum") + self.assertTrue(m.stateful) + self.assertEqual(m.dtype, tf.float32) + self.assertLen(m.variables, 1) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # check initial state + self.assertEqual(self.evaluate(m.total), 0) + + # check __call__() + self.assertEqual(self.evaluate(m(100)), 100) + self.assertEqual(self.evaluate(m.total), 100) + + # check update_state() and result() + state accumulation + tensor + # input + update_op = m.update_state(tf.convert_to_tensor([1, 5])) + self.evaluate(update_op) + self.assertAlmostEqual(self.evaluate(m.result()), 106) + self.assertEqual(self.evaluate(m.total), 106) # 100 + 1 + 5 + + # check reset_state() + m.reset_state() + self.assertEqual(self.evaluate(m.total), 0) + + def test_sum_with_sample_weight(self): + m = metrics.Sum(dtype=tf.float64) + self.assertEqual(m.dtype, tf.float64) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # check scalar weight + result_t = m(100, sample_weight=0.5) + self.assertEqual(self.evaluate(result_t), 50) + self.assertEqual(self.evaluate(m.total), 50) + + # check weights not scalar and weights rank matches values rank + result_t = m([1, 5], sample_weight=[1, 0.2]) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 52.0, 4) # 50 + 1 + 5 * 0.2 + self.assertAlmostEqual(self.evaluate(m.total), 52.0, 4) + + # check weights broadcast + result_t = m([1, 2], sample_weight=0.5) + self.assertAlmostEqual(self.evaluate(result_t), 53.5, 1) # 52 + 0.5 + 1 + self.assertAlmostEqual(self.evaluate(m.total), 53.5, 1) + + # check weights squeeze + result_t = m([1, 5], sample_weight=[[1], [0.2]]) + self.assertAlmostEqual(self.evaluate(result_t), 55.5, 1) # 53.5 + 1 + 1 + self.assertAlmostEqual(self.evaluate(m.total), 55.5, 1) + + # check weights expand + result_t = m([[1], [5]], sample_weight=[1, 0.2]) + self.assertAlmostEqual(self.evaluate(result_t), 57.5, 2) # 55.5 + 1 + 1 + self.assertAlmostEqual(self.evaluate(m.total), 57.5, 1) + + # check values reduced to the dimensions of weight + result_t = m( + [[[1.0, 2.0], [3.0, 2.0], [0.5, 4.0]]], sample_weight=[0.5] + ) + result = np.round(self.evaluate(result_t), decimals=2) + # result = (prev: 57.5) + 0.5 + 1 + 1.5 + 1 + 0.25 + 2 + self.assertAlmostEqual(result, 63.75, 2) + self.assertAlmostEqual(self.evaluate(m.total), 63.75, 2) + + def test_sum_graph_with_placeholder(self): + with tf.compat.v1.get_default_graph().as_default(), self.cached_session() as sess: # noqa: E501 + m = metrics.Sum() + v = tf.compat.v1.placeholder(tf.float32) + w = tf.compat.v1.placeholder(tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # check __call__() + result_t = m(v, sample_weight=w) + result = sess.run(result_t, feed_dict=({v: 100, w: 0.5})) + self.assertEqual(result, 50) + self.assertEqual(self.evaluate(m.total), 50) + + # check update_state() and result() + result = sess.run(result_t, feed_dict=({v: [1, 5], w: [1, 0.2]})) + self.assertAlmostEqual(result, 52.0, 2) # 50 + 1 + 5 * 0.2 + self.assertAlmostEqual(self.evaluate(m.total), 52.0, 2) + + def test_save_restore(self): + with self.test_session(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + m = metrics.Sum() + checkpoint = tf.train.Checkpoint(sum=m) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # update state + self.evaluate(m(100.0)) + self.evaluate(m(200.0)) + + # save checkpoint and then add an update + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate(m(1000.0)) + + # restore to the same checkpoint sum object (= 300) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.evaluate(m(300.0)) + self.assertEqual(600.0, self.evaluate(m.result())) + + # restore to a different checkpoint sum object + restore_sum = metrics.Sum() + restore_checkpoint = tf.train.Checkpoint(sum=restore_sum) + status = restore_checkpoint.restore(save_path) + restore_update = restore_sum(300.0) + status.assert_consumed().run_restore_ops() + self.evaluate(restore_update) + self.assertEqual(600.0, self.evaluate(restore_sum.result())) + + def test_init_scope_during_add_weight(self): + seen_variables = 0 + + def capture_variable_creation(next_creator_fn, **kwargs) -> tf.Variable: + nonlocal seen_variables + seen_variables += 1 + return tf.constant(seen_variables) + + @tf.function + def create_variables(): + # When this method is called in a graph context, any usage of + # `tf.init_scope` will bypass this variable creator scope, resulting + # in different behavior. + with tf.variable_creator_scope(capture_variable_creation): + return metrics.Sum().variables + + metric_variables = self.evaluate(create_variables()) + # The Sum metric contains a single `total` variable, which the creation + # scope has changed to a `1` tensor. + self.assertAllEqual([1], metric_variables) class MeanTest(test_combinations.TestCase): - # TODO(b/120949004): Re-enable garbage collection check - # @tf_test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - @test_combinations.run_all_keras_modes - def test_mean(self): - m = metrics.Mean(name='my_mean') - - # check config - self.assertEqual(m.name, 'my_mean') - self.assertTrue(m.stateful) - self.assertEqual(m.dtype, tf.float32) - self.assertEqual(len(m.variables), 2) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # check initial state - self.assertEqual(self.evaluate(m.total), 0) - self.assertEqual(self.evaluate(m.count), 0) - - # check __call__() - self.assertEqual(self.evaluate(m(100)), 100) - self.assertEqual(self.evaluate(m.total), 100) - self.assertEqual(self.evaluate(m.count), 1) - - # check update_state() and result() + state accumulation + tensor input - update_op = m.update_state([ - tf.convert_to_tensor(1), - tf.convert_to_tensor(5) - ]) - self.evaluate(update_op) - self.assertAlmostEqual(self.evaluate(m.result()), 106 / 3, 2) - self.assertEqual(self.evaluate(m.total), 106) # 100 + 1 + 5 - self.assertEqual(self.evaluate(m.count), 3) - - # check reset_state() - m.reset_state() - self.assertEqual(self.evaluate(m.total), 0) - self.assertEqual(self.evaluate(m.count), 0) - - # Check save and restore config - m2 = metrics.Mean.from_config(m.get_config()) - self.assertEqual(m2.name, 'my_mean') - self.assertTrue(m2.stateful) - self.assertEqual(m2.dtype, tf.float32) - self.assertEqual(len(m2.variables), 2) - - @test_utils.run_v2_only - def test_function_wrapped_reset_state(self): - m = metrics.Mean(name='my_mean') - - # check reset_state in function. - @tf.function - def reset_in_fn(): - m.reset_state() - return m.update_state(100) - - for _ in range(5): - self.evaluate(reset_in_fn()) - self.assertEqual(self.evaluate(m.count), 1) - - @test_combinations.run_all_keras_modes - def test_mean_with_sample_weight(self): - m = metrics.Mean(dtype=tf.float64) - self.assertEqual(m.dtype, tf.float64) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # check scalar weight - result_t = m(100, sample_weight=0.5) - self.assertEqual(self.evaluate(result_t), 50 / 0.5) - self.assertEqual(self.evaluate(m.total), 50) - self.assertEqual(self.evaluate(m.count), 0.5) - - # check weights not scalar and weights rank matches values rank - result_t = m([1, 5], sample_weight=[1, 0.2]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 52 / 1.7, 2) - self.assertAlmostEqual(self.evaluate(m.total), 52, 2) # 50 + 1 + 5 * 0.2 - self.assertAlmostEqual(self.evaluate(m.count), 1.7, 2) # 0.5 + 1.2 - - # check weights broadcast - result_t = m([1, 2], sample_weight=0.5) - self.assertAlmostEqual(self.evaluate(result_t), 53.5 / 2.7, 2) - self.assertAlmostEqual(self.evaluate(m.total), 53.5, 2) # 52 + 0.5 + 1 - self.assertAlmostEqual(self.evaluate(m.count), 2.7, 2) # 1.7 + 0.5 + 0.5 - - # check weights squeeze - result_t = m([1, 5], sample_weight=[[1], [0.2]]) - self.assertAlmostEqual(self.evaluate(result_t), 55.5 / 3.9, 2) - self.assertAlmostEqual(self.evaluate(m.total), 55.5, 2) # 53.5 + 1 + 1 - self.assertAlmostEqual(self.evaluate(m.count), 3.9, 2) # 2.7 + 1.2 - - # check weights expand - result_t = m([[1], [5]], sample_weight=[1, 0.2]) - self.assertAlmostEqual(self.evaluate(result_t), 57.5 / 5.1, 2) - self.assertAlmostEqual(self.evaluate(m.total), 57.5, 2) # 55.5 + 1 + 1 - self.assertAlmostEqual(self.evaluate(m.count), 5.1, 2) # 3.9 + 1.2 - - # check values reduced to the dimensions of weight - result_t = m([[[1., 2.], [3., 2.], [0.5, 4.]]], sample_weight=[0.5]) - result = np.round(self.evaluate(result_t), decimals=2) # 58.5 / 5.6 - self.assertEqual(result, 10.45) - self.assertEqual(np.round(self.evaluate(m.total), decimals=2), 58.54) - self.assertEqual(np.round(self.evaluate(m.count), decimals=2), 5.6) - - @test_combinations.run_all_keras_modes - def test_mean_graph_with_placeholder(self): - with tf.compat.v1.get_default_graph().as_default(), self.cached_session() as sess: - m = metrics.Mean() - v = tf.compat.v1.placeholder(tf.float32) - w = tf.compat.v1.placeholder(tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # check __call__() - result_t = m(v, sample_weight=w) - result = sess.run(result_t, feed_dict=({v: 100, w: 0.5})) - self.assertEqual(self.evaluate(m.total), 50) - self.assertEqual(self.evaluate(m.count), 0.5) - self.assertEqual(result, 50 / 0.5) - - # check update_state() and result() - result = sess.run(result_t, feed_dict=({v: [1, 5], w: [1, 0.2]})) - self.assertAlmostEqual(self.evaluate(m.total), 52, 2) # 50 + 1 + 5 * 0.2 - self.assertAlmostEqual(self.evaluate(m.count), 1.7, 2) # 0.5 + 1.2 - self.assertAlmostEqual(result, 52 / 1.7, 2) - - @test_combinations.run_all_keras_modes - def test_save_restore(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') - m = metrics.Mean() - checkpoint = tf.train.Checkpoint(mean=m) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - - # update state - self.evaluate(m(100.)) - self.evaluate(m(200.)) - - # save checkpoint and then add an update - save_path = checkpoint.save(checkpoint_prefix) - self.evaluate(m(1000.)) - - # restore to the same checkpoint mean object - checkpoint.restore(save_path).assert_consumed().run_restore_ops() - self.evaluate(m(300.)) - self.assertEqual(200., self.evaluate(m.result())) - - # restore to a different checkpoint mean object - restore_mean = metrics.Mean() - restore_checkpoint = tf.train.Checkpoint(mean=restore_mean) - status = restore_checkpoint.restore(save_path) - restore_update = restore_mean(300.) - status.assert_consumed().run_restore_ops() - self.evaluate(restore_update) - self.assertEqual(200., self.evaluate(restore_mean.result())) - self.assertEqual(3, self.evaluate(restore_mean.count)) - - @test_combinations.run_all_keras_modes - def test_multiple_instances(self): - m = metrics.Mean() - m2 = metrics.Mean() - - self.assertEqual(m.name, 'mean') - self.assertEqual(m2.name, 'mean') - - self.assertEqual([v.name for v in m.variables], - test_utils.get_expected_metric_variable_names( - ['total', 'count'])) - self.assertEqual([v.name for v in m2.variables], - test_utils.get_expected_metric_variable_names( - ['total', 'count'], name_suffix='_1')) - - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - self.evaluate(tf.compat.v1.variables_initializer(m2.variables)) - - # check initial state - self.assertEqual(self.evaluate(m.total), 0) - self.assertEqual(self.evaluate(m.count), 0) - self.assertEqual(self.evaluate(m2.total), 0) - self.assertEqual(self.evaluate(m2.count), 0) - - # check __call__() - self.assertEqual(self.evaluate(m(100)), 100) - self.assertEqual(self.evaluate(m.total), 100) - self.assertEqual(self.evaluate(m.count), 1) - self.assertEqual(self.evaluate(m2.total), 0) - self.assertEqual(self.evaluate(m2.count), 0) - - self.assertEqual(self.evaluate(m2([63, 10])), 36.5) - self.assertEqual(self.evaluate(m2.total), 73) - self.assertEqual(self.evaluate(m2.count), 2) - self.assertEqual(self.evaluate(m.result()), 100) - self.assertEqual(self.evaluate(m.total), 100) - self.assertEqual(self.evaluate(m.count), 1) - - @test_utils.run_v2_only - def test_deepcopy_of_metrics(self): - m = metrics.Mean(name='my_mean') - - m.reset_state() - m.update_state(100) - m_copied = copy.deepcopy(m) - m_copied.update_state(200) - - self.assertEqual(self.evaluate(m.result()), 100) - self.assertEqual(self.evaluate(m_copied.result()), 150) - - m.reset_state() - - self.assertEqual(self.evaluate(m.result()), 0) - self.assertEqual(self.evaluate(m_copied.result()), 150) + # TODO(b/120949004): Re-enable garbage collection check + # @tf_test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_combinations.run_all_keras_modes + def test_mean(self): + m = metrics.Mean(name="my_mean") + + # check config + self.assertEqual(m.name, "my_mean") + self.assertTrue(m.stateful) + self.assertEqual(m.dtype, tf.float32) + self.assertEqual(len(m.variables), 2) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # check initial state + self.assertEqual(self.evaluate(m.total), 0) + self.assertEqual(self.evaluate(m.count), 0) + + # check __call__() + self.assertEqual(self.evaluate(m(100)), 100) + self.assertEqual(self.evaluate(m.total), 100) + self.assertEqual(self.evaluate(m.count), 1) + + # check update_state() and result() + state accumulation + tensor input + update_op = m.update_state( + [tf.convert_to_tensor(1), tf.convert_to_tensor(5)] + ) + self.evaluate(update_op) + self.assertAlmostEqual(self.evaluate(m.result()), 106 / 3, 2) + self.assertEqual(self.evaluate(m.total), 106) # 100 + 1 + 5 + self.assertEqual(self.evaluate(m.count), 3) + + # check reset_state() + m.reset_state() + self.assertEqual(self.evaluate(m.total), 0) + self.assertEqual(self.evaluate(m.count), 0) + + # Check save and restore config + m2 = metrics.Mean.from_config(m.get_config()) + self.assertEqual(m2.name, "my_mean") + self.assertTrue(m2.stateful) + self.assertEqual(m2.dtype, tf.float32) + self.assertEqual(len(m2.variables), 2) + + @test_utils.run_v2_only + def test_function_wrapped_reset_state(self): + m = metrics.Mean(name="my_mean") + + # check reset_state in function. + @tf.function + def reset_in_fn(): + m.reset_state() + m.update_state(100) + + for _ in range(5): + self.evaluate(reset_in_fn()) + self.assertEqual(self.evaluate(m.count), 1) + + @test_combinations.run_all_keras_modes + def test_mean_with_sample_weight(self): + m = metrics.Mean(dtype=tf.float64) + self.assertEqual(m.dtype, tf.float64) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # check scalar weight + result_t = m(100, sample_weight=0.5) + self.assertEqual(self.evaluate(result_t), 50 / 0.5) + self.assertEqual(self.evaluate(m.total), 50) + self.assertEqual(self.evaluate(m.count), 0.5) + + # check weights not scalar and weights rank matches values rank + result_t = m([1, 5], sample_weight=[1, 0.2]) + result = self.evaluate(result_t) + self.assertAlmostEqual(result, 52 / 1.7, 2) + self.assertAlmostEqual( + self.evaluate(m.total), 52, 2 + ) # 50 + 1 + 5 * 0.2 + self.assertAlmostEqual(self.evaluate(m.count), 1.7, 2) # 0.5 + 1.2 + + # check weights broadcast + result_t = m([1, 2], sample_weight=0.5) + self.assertAlmostEqual(self.evaluate(result_t), 53.5 / 2.7, 2) + self.assertAlmostEqual(self.evaluate(m.total), 53.5, 2) # 52 + 0.5 + 1 + self.assertAlmostEqual( + self.evaluate(m.count), 2.7, 2 + ) # 1.7 + 0.5 + 0.5 + + # check weights squeeze + result_t = m([1, 5], sample_weight=[[1], [0.2]]) + self.assertAlmostEqual(self.evaluate(result_t), 55.5 / 3.9, 2) + self.assertAlmostEqual(self.evaluate(m.total), 55.5, 2) # 53.5 + 1 + 1 + self.assertAlmostEqual(self.evaluate(m.count), 3.9, 2) # 2.7 + 1.2 + + # check weights expand + result_t = m([[1], [5]], sample_weight=[1, 0.2]) + self.assertAlmostEqual(self.evaluate(result_t), 57.5 / 5.1, 2) + self.assertAlmostEqual(self.evaluate(m.total), 57.5, 2) # 55.5 + 1 + 1 + self.assertAlmostEqual(self.evaluate(m.count), 5.1, 2) # 3.9 + 1.2 + + # check values reduced to the dimensions of weight + result_t = m( + [[[1.0, 2.0], [3.0, 2.0], [0.5, 4.0]]], sample_weight=[0.5] + ) + result = np.round(self.evaluate(result_t), decimals=2) # 58.5 / 5.6 + self.assertEqual(result, 10.45) + self.assertEqual(np.round(self.evaluate(m.total), decimals=2), 58.54) + self.assertEqual(np.round(self.evaluate(m.count), decimals=2), 5.6) + + @test_combinations.run_all_keras_modes + def test_mean_graph_with_placeholder(self): + with tf.compat.v1.get_default_graph().as_default(), self.cached_session() as sess: # noqa: E501 + m = metrics.Mean() + v = tf.compat.v1.placeholder(tf.float32) + w = tf.compat.v1.placeholder(tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # check __call__() + result_t = m(v, sample_weight=w) + result = sess.run(result_t, feed_dict=({v: 100, w: 0.5})) + self.assertEqual(self.evaluate(m.total), 50) + self.assertEqual(self.evaluate(m.count), 0.5) + self.assertEqual(result, 50 / 0.5) + + # check update_state() and result() + result = sess.run(result_t, feed_dict=({v: [1, 5], w: [1, 0.2]})) + self.assertAlmostEqual( + self.evaluate(m.total), 52, 2 + ) # 50 + 1 + 5 * 0.2 + self.assertAlmostEqual(self.evaluate(m.count), 1.7, 2) # 0.5 + 1.2 + self.assertAlmostEqual(result, 52 / 1.7, 2) + + @test_combinations.run_all_keras_modes + def test_save_restore(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + m = metrics.Mean() + checkpoint = tf.train.Checkpoint(mean=m) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + + # update state + self.evaluate(m(100.0)) + self.evaluate(m(200.0)) + + # save checkpoint and then add an update + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate(m(1000.0)) + + # restore to the same checkpoint mean object + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.evaluate(m(300.0)) + self.assertEqual(200.0, self.evaluate(m.result())) + + # restore to a different checkpoint mean object + restore_mean = metrics.Mean() + restore_checkpoint = tf.train.Checkpoint(mean=restore_mean) + status = restore_checkpoint.restore(save_path) + restore_update = restore_mean(300.0) + status.assert_consumed().run_restore_ops() + self.evaluate(restore_update) + self.assertEqual(200.0, self.evaluate(restore_mean.result())) + self.assertEqual(3, self.evaluate(restore_mean.count)) + + @test_combinations.run_all_keras_modes + def test_multiple_instances(self): + m = metrics.Mean() + m2 = metrics.Mean() + + self.assertEqual(m.name, "mean") + self.assertEqual(m2.name, "mean") + + self.assertEqual( + [v.name for v in m.variables], + test_utils.get_expected_metric_variable_names(["total", "count"]), + ) + self.assertEqual( + [v.name for v in m2.variables], + test_utils.get_expected_metric_variable_names( + ["total", "count"], name_suffix="_1" + ), + ) + + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + self.evaluate(tf.compat.v1.variables_initializer(m2.variables)) + + # check initial state + self.assertEqual(self.evaluate(m.total), 0) + self.assertEqual(self.evaluate(m.count), 0) + self.assertEqual(self.evaluate(m2.total), 0) + self.assertEqual(self.evaluate(m2.count), 0) + + # check __call__() + self.assertEqual(self.evaluate(m(100)), 100) + self.assertEqual(self.evaluate(m.total), 100) + self.assertEqual(self.evaluate(m.count), 1) + self.assertEqual(self.evaluate(m2.total), 0) + self.assertEqual(self.evaluate(m2.count), 0) + + self.assertEqual(self.evaluate(m2([63, 10])), 36.5) + self.assertEqual(self.evaluate(m2.total), 73) + self.assertEqual(self.evaluate(m2.count), 2) + self.assertEqual(self.evaluate(m.result()), 100) + self.assertEqual(self.evaluate(m.total), 100) + self.assertEqual(self.evaluate(m.count), 1) + + @test_utils.run_v2_only + def test_deepcopy_of_metrics(self): + m = metrics.Mean(name="my_mean") + + m.reset_state() + m.update_state(100) + m_copied = copy.deepcopy(m) + m_copied.update_state(200) + + self.assertEqual(self.evaluate(m.result()), 100) + self.assertEqual(self.evaluate(m_copied.result()), 150) + + m.reset_state() + + self.assertEqual(self.evaluate(m.result()), 0) + self.assertEqual(self.evaluate(m_copied.result()), 150) class MeanTensorTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_config(self): - with self.test_session(): - m = metrics.MeanTensor(name='mean_by_element') - - # check config - self.assertEqual(m.name, 'mean_by_element') - self.assertTrue(m.stateful) - self.assertEqual(m.dtype, tf.float32) - self.assertEmpty(m.variables) - - with self.assertRaisesRegex(ValueError, 'does not have any value yet'): - m.result() - - self.evaluate(m([[3], [5], [3]])) - self.assertAllEqual(m._shape, [3, 1]) - - m2 = metrics.MeanTensor.from_config(m.get_config()) - self.assertEqual(m2.name, 'mean_by_element') - self.assertTrue(m2.stateful) - self.assertEqual(m2.dtype, tf.float32) - self.assertEmpty(m2.variables) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_unweighted(self): - with self.test_session(): - m = metrics.MeanTensor(dtype=tf.float64) - - # check __call__() - self.assertAllClose(self.evaluate(m([100, 40])), [100, 40]) - self.assertAllClose(self.evaluate(m.total), [100, 40]) - self.assertAllClose(self.evaluate(m.count), [1, 1]) - - # check update_state() and result() + state accumulation + tensor input - update_op = m.update_state([ - tf.convert_to_tensor(1), - tf.convert_to_tensor(5) - ]) - self.evaluate(update_op) - self.assertAllClose(self.evaluate(m.result()), [50.5, 22.5]) - self.assertAllClose(self.evaluate(m.total), [101, 45]) - self.assertAllClose(self.evaluate(m.count), [2, 2]) - - # check reset_state() - m.reset_state() - self.assertAllClose(self.evaluate(m.total), [0, 0]) - self.assertAllClose(self.evaluate(m.count), [0, 0]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_weighted(self): - with self.test_session(): - m = metrics.MeanTensor(dtype=tf.float64) - self.assertEqual(m.dtype, tf.float64) - - # check scalar weight - result_t = m([100, 30], sample_weight=0.5) - self.assertAllClose(self.evaluate(result_t), [100, 30]) - self.assertAllClose(self.evaluate(m.total), [50, 15]) - self.assertAllClose(self.evaluate(m.count), [0.5, 0.5]) - - # check weights not scalar and weights rank matches values rank - result_t = m([1, 5], sample_weight=[1, 0.2]) - result = self.evaluate(result_t) - self.assertAllClose(result, [51 / 1.5, 16 / 0.7], 2) - self.assertAllClose(self.evaluate(m.total), [51, 16]) - self.assertAllClose(self.evaluate(m.count), [1.5, 0.7]) - - # check weights broadcast - result_t = m([1, 2], sample_weight=0.5) - self.assertAllClose(self.evaluate(result_t), [51.5 / 2, 17 / 1.2]) - self.assertAllClose(self.evaluate(m.total), [51.5, 17]) - self.assertAllClose(self.evaluate(m.count), [2, 1.2]) - - # check weights squeeze - result_t = m([1, 5], sample_weight=[[1], [0.2]]) - self.assertAllClose(self.evaluate(result_t), [52.5 / 3, 18 / 1.4]) - self.assertAllClose(self.evaluate(m.total), [52.5, 18]) - self.assertAllClose(self.evaluate(m.count), [3, 1.4]) - - # check weights expand - m = metrics.MeanTensor(dtype=tf.float64) - self.evaluate(tf.compat.v1.variables_initializer(m.variables)) - result_t = m([[1], [5]], sample_weight=[1, 0.2]) - self.assertAllClose(self.evaluate(result_t), [[1], [5]]) - self.assertAllClose(self.evaluate(m.total), [[1], [1]]) - self.assertAllClose(self.evaluate(m.count), [[1], [0.2]]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_invalid_value_shape(self): - m = metrics.MeanTensor(dtype=tf.float64) - m([1]) - with self.assertRaisesRegex( - ValueError, 'MeanTensor input values must always have the same shape'): - m([1, 5]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_build_in_tf_function(self): - """Ensure that variables are created correctly in a tf function.""" - m = metrics.MeanTensor(dtype=tf.float64) - - @tf.function - def call_metric(x): - return m(x) - - with self.test_session(): - self.assertAllClose(self.evaluate(call_metric([100, 40])), [100, 40]) - self.assertAllClose(self.evaluate(m.total), [100, 40]) - self.assertAllClose(self.evaluate(m.count), [1, 1]) - self.assertAllClose(self.evaluate(call_metric([20, 2])), [60, 21]) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_in_keras_model(self): - class ModelWithMetric(Model): - - def __init__(self): - super().__init__() - self.dense1 = layers.Dense( - 3, activation='relu', kernel_initializer='ones') - self.dense2 = layers.Dense( - 1, activation='sigmoid', kernel_initializer='ones') - self.mean_tensor = metrics.MeanTensor() - - def call(self, x): - x = self.dense1(x) - x = self.dense2(x) - self.mean_tensor(self.dense1.kernel) - return x - - model = ModelWithMetric() - model.compile( - loss='mae', - optimizer='rmsprop', - run_eagerly=True) - - x = np.ones((100, 4)) - y = np.zeros((100, 1)) - model.evaluate(x, y, batch_size=50) - self.assertAllClose(self.evaluate(model.mean_tensor.result()), - np.ones((4, 3))) - self.assertAllClose(self.evaluate(model.mean_tensor.total), - np.full((4, 3), 2)) - self.assertAllClose(self.evaluate(model.mean_tensor.count), - np.full((4, 3), 2)) - - model.evaluate(x, y, batch_size=25) - self.assertAllClose(self.evaluate(model.mean_tensor.result()), - np.ones((4, 3))) - self.assertAllClose(self.evaluate(model.mean_tensor.total), - np.full((4, 3), 4)) - self.assertAllClose(self.evaluate(model.mean_tensor.count), - np.full((4, 3), 4)) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_config(self): + with self.test_session(): + m = metrics.MeanTensor(name="mean_by_element") + + # check config + self.assertEqual(m.name, "mean_by_element") + self.assertTrue(m.stateful) + self.assertEqual(m.dtype, tf.float32) + self.assertEmpty(m.variables) + + with self.assertRaisesRegex( + ValueError, "does not have any value yet" + ): + m.result() + + self.evaluate(m([[3], [5], [3]])) + self.assertAllEqual(m._shape, [3, 1]) + + m2 = metrics.MeanTensor.from_config(m.get_config()) + self.assertEqual(m2.name, "mean_by_element") + self.assertTrue(m2.stateful) + self.assertEqual(m2.dtype, tf.float32) + self.assertEmpty(m2.variables) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_unweighted(self): + with self.test_session(): + m = metrics.MeanTensor(dtype=tf.float64) + + # check __call__() + self.assertAllClose(self.evaluate(m([100, 40])), [100, 40]) + self.assertAllClose(self.evaluate(m.total), [100, 40]) + self.assertAllClose(self.evaluate(m.count), [1, 1]) + + # check update_state() and result() + state accumulation + tensor + # input + update_op = m.update_state( + [tf.convert_to_tensor(1), tf.convert_to_tensor(5)] + ) + self.evaluate(update_op) + self.assertAllClose(self.evaluate(m.result()), [50.5, 22.5]) + self.assertAllClose(self.evaluate(m.total), [101, 45]) + self.assertAllClose(self.evaluate(m.count), [2, 2]) + + # check reset_state() + m.reset_state() + self.assertAllClose(self.evaluate(m.total), [0, 0]) + self.assertAllClose(self.evaluate(m.count), [0, 0]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_weighted(self): + with self.test_session(): + m = metrics.MeanTensor(dtype=tf.float64) + self.assertEqual(m.dtype, tf.float64) + + # check scalar weight + result_t = m([100, 30], sample_weight=0.5) + self.assertAllClose(self.evaluate(result_t), [100, 30]) + self.assertAllClose(self.evaluate(m.total), [50, 15]) + self.assertAllClose(self.evaluate(m.count), [0.5, 0.5]) + + # check weights not scalar and weights rank matches values rank + result_t = m([1, 5], sample_weight=[1, 0.2]) + result = self.evaluate(result_t) + self.assertAllClose(result, [51 / 1.5, 16 / 0.7], 2) + self.assertAllClose(self.evaluate(m.total), [51, 16]) + self.assertAllClose(self.evaluate(m.count), [1.5, 0.7]) + + # check weights broadcast + result_t = m([1, 2], sample_weight=0.5) + self.assertAllClose(self.evaluate(result_t), [51.5 / 2, 17 / 1.2]) + self.assertAllClose(self.evaluate(m.total), [51.5, 17]) + self.assertAllClose(self.evaluate(m.count), [2, 1.2]) + + # check weights squeeze + result_t = m([1, 5], sample_weight=[[1], [0.2]]) + self.assertAllClose(self.evaluate(result_t), [52.5 / 3, 18 / 1.4]) + self.assertAllClose(self.evaluate(m.total), [52.5, 18]) + self.assertAllClose(self.evaluate(m.count), [3, 1.4]) + + # check weights expand + m = metrics.MeanTensor(dtype=tf.float64) + self.evaluate(tf.compat.v1.variables_initializer(m.variables)) + result_t = m([[1], [5]], sample_weight=[1, 0.2]) + self.assertAllClose(self.evaluate(result_t), [[1], [5]]) + self.assertAllClose(self.evaluate(m.total), [[1], [1]]) + self.assertAllClose(self.evaluate(m.count), [[1], [0.2]]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_invalid_value_shape(self): + m = metrics.MeanTensor(dtype=tf.float64) + m([1]) + with self.assertRaisesRegex( + ValueError, + "MeanTensor input values must always have the same shape", + ): + m([1, 5]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_build_in_tf_function(self): + """Ensure that variables are created correctly in a tf function.""" + m = metrics.MeanTensor(dtype=tf.float64) + + @tf.function + def call_metric(x): + return m(x) + + with self.test_session(): + self.assertAllClose( + self.evaluate(call_metric([100, 40])), [100, 40] + ) + self.assertAllClose(self.evaluate(m.total), [100, 40]) + self.assertAllClose(self.evaluate(m.count), [1, 1]) + self.assertAllClose(self.evaluate(call_metric([20, 2])), [60, 21]) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_in_keras_model(self): + class ModelWithMetric(Model): + def __init__(self): + super().__init__() + self.dense1 = layers.Dense( + 3, activation="relu", kernel_initializer="ones" + ) + self.dense2 = layers.Dense( + 1, activation="sigmoid", kernel_initializer="ones" + ) + self.mean_tensor = metrics.MeanTensor() + + def call(self, x): + x = self.dense1(x) + x = self.dense2(x) + self.mean_tensor(self.dense1.kernel) + return x + + model = ModelWithMetric() + model.compile(loss="mae", optimizer="rmsprop", run_eagerly=True) + + x = np.ones((100, 4)) + y = np.zeros((100, 1)) + model.evaluate(x, y, batch_size=50) + self.assertAllClose( + self.evaluate(model.mean_tensor.result()), np.ones((4, 3)) + ) + self.assertAllClose( + self.evaluate(model.mean_tensor.total), np.full((4, 3), 2) + ) + self.assertAllClose( + self.evaluate(model.mean_tensor.count), np.full((4, 3), 2) + ) + + model.evaluate(x, y, batch_size=25) + self.assertAllClose( + self.evaluate(model.mean_tensor.result()), np.ones((4, 3)) + ) + self.assertAllClose( + self.evaluate(model.mean_tensor.total), np.full((4, 3), 4) + ) + self.assertAllClose( + self.evaluate(model.mean_tensor.count), np.full((4, 3), 4) + ) class BinaryTruePositives(metrics.Metric): + def __init__(self, name="binary_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name="tp", initializer="zeros") - def __init__(self, name='binary_true_positives', **kwargs): - super().__init__(name=name, **kwargs) - self.true_positives = self.add_weight(name='tp', initializer='zeros') + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) - def update_state(self, y_true, y_pred, sample_weight=None): - y_true = tf.cast(y_true, tf.bool) - y_pred = tf.cast(y_pred, tf.bool) + values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) + values = tf.cast(values, self.dtype) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, dtype=self.dtype) + sample_weight = tf.__internal__.ops.broadcast_weights( + sample_weight, values + ) + values = tf.multiply(values, sample_weight) + self.true_positives.assign_add(tf.reduce_sum(values)) - values = tf.logical_and( - tf.equal(y_true, True), tf.equal(y_pred, True)) - values = tf.cast(values, self.dtype) - if sample_weight is not None: - sample_weight = tf.cast(sample_weight, dtype=self.dtype) - sample_weight = tf.__internal__.ops.broadcast_weights( - sample_weight, values) - values = tf.multiply(values, sample_weight) - self.true_positives.assign_add(tf.reduce_sum(values)) - - def result(self): - return self.true_positives + def result(self): + return self.true_positives class BinaryTruePositivesViaControlFlow(metrics.Metric): + def __init__(self, name="binary_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name="tp", initializer="zeros") - def __init__(self, name='binary_true_positives', **kwargs): - super().__init__(name=name, **kwargs) - self.true_positives = self.add_weight(name='tp', initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - y_true = tf.cast(y_true, tf.bool) - y_pred = tf.cast(y_pred, tf.bool) + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) - for i in range(len(y_true)): - for j in range(len(y_true[i])): - if y_true[i][j] and y_pred[i][j]: - if sample_weight is None: - self.true_positives.assign_add(1) - else: - self.true_positives.assign_add(sample_weight[i][0]) + for i in range(len(y_true)): + for j in range(len(y_true[i])): + if y_true[i][j] and y_pred[i][j]: + if sample_weight is None: + self.true_positives.assign_add(1) + else: + self.true_positives.assign_add(sample_weight[i][0]) - def result(self): - if tf.constant(True): - return self.true_positives - return 0.0 + def result(self): + if tf.constant(True): + return self.true_positives + return 0.0 -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CustomMetricsTest(tf.test.TestCase): - - def test_config(self): - btp_obj = BinaryTruePositives(name='btp', dtype=tf.int32) - self.assertEqual(btp_obj.name, 'btp') - self.assertEqual(btp_obj.dtype, tf.int32) - - # Check save and restore config - btp_obj2 = BinaryTruePositives.from_config(btp_obj.get_config()) - self.assertEqual(btp_obj2.name, 'btp') - self.assertEqual(btp_obj2.dtype, tf.int32) - - def test_unweighted(self): - btp_obj = BinaryTruePositives() - self.evaluate(tf.compat.v1.variables_initializer(btp_obj.variables)) - y_true = tf.constant([[0, 0.9, 0, 1, 0], [0, 0, 1, 1, 1], - [1, 1, 1, 1, 0], [0, 0, 0, 0, 1.5]]) - y_pred = tf.constant([[0, 0, 1, 5, 0], [1, 1, 1, 1, 1], - [0, 1, 0, 1, 0], [1, 10, 1, 1, 1]]) - - update_op = btp_obj.update_state(y_true, y_pred) # pylint: disable=assignment-from-no-return - self.evaluate(update_op) - result = btp_obj.result() - self.assertEqual(7, self.evaluate(result)) - - def test_weighted(self): - btp_obj = BinaryTruePositives() - self.evaluate(tf.compat.v1.variables_initializer(btp_obj.variables)) - y_true = tf.constant([[0, 0.9, 0, 1, 0], [0, 0, 1, 1, 1], - [1, 1, 1, 1, 0], [0, 0, 0, 0, 1.5]]) - y_pred = tf.constant([[0, 0, 1, 5, 0], [1, 1, 1, 1, 1], - [0, 1, 0, 1, 0], [1, 10, 1, 1, 1]]) - sample_weight = tf.constant([[1.], [1.5], [2.], [2.5]]) - result = btp_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertEqual(12, self.evaluate(result)) - - def test_autograph(self): - metric = BinaryTruePositivesViaControlFlow() - self.evaluate(tf.compat.v1.variables_initializer(metric.variables)) - y_true = tf.constant([[0, 0.9, 0, 1, 0], [0, 0, 1, 1, 1], - [1, 1, 1, 1, 0], [0, 0, 0, 0, 1.5]]) - y_pred = tf.constant([[0, 0, 1, 5, 0], [1, 1, 1, 1, 1], - [0, 1, 0, 1, 0], [1, 10, 1, 1, 1]]) - sample_weight = tf.constant([[1.], [1.5], [2.], [2.5]]) - - @tf.function - def compute_metric(y_true, y_pred, sample_weight): - metric(y_true, y_pred, sample_weight) - return metric.result() - - result = compute_metric(y_true, y_pred, sample_weight) - self.assertEqual(12, self.evaluate(result)) - - def test_metric_wrappers_autograph(self): - def metric_fn(y_true, y_pred): - x = tf.constant(0.0) - for i in range(len(y_true)): - for j in range(len(y_true[i])): - if tf.equal(y_true[i][j], y_pred[i][j]) and y_true[i][j] > 0: - x += 1.0 - return x - - mean_metric = metrics.MeanMetricWrapper(metric_fn) - sum_metric = metrics.SumOverBatchSizeMetricWrapper(metric_fn) - self.evaluate(tf.compat.v1.variables_initializer(mean_metric.variables)) - self.evaluate(tf.compat.v1.variables_initializer(sum_metric.variables)) - - y_true = tf.constant([[0, 0, 0, 1, 0], - [0, 0, 1, 1, 1], - [1, 1, 1, 1, 0], - [1, 1, 1, 0, 1]]) - y_pred = tf.constant([[0, 0, 1, 1, 0], - [1, 1, 1, 1, 1], - [0, 1, 0, 1, 0], - [1, 1, 1, 1, 1]]) - - @tf.function - def tf_functioned_metric_fn(metric, y_true, y_pred): - return metric(y_true, y_pred) - - metric_result = tf_functioned_metric_fn(mean_metric, y_true, y_pred) - self.assertAllClose(self.evaluate(metric_result), 10, 1e-2) - metric_result = tf_functioned_metric_fn(sum_metric, y_true, y_pred) - self.assertAllClose(self.evaluate(metric_result), 10, 1e-2) - - def test_metric_not_tracked_as_sublayer_in_layer(self): - - class MyLayer(base_layer.Layer): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.mean_obj = metrics.Mean(name='my_mean_obj') - - def call(self, x): - self.add_metric( - tf.reduce_sum(x), aggregation='mean', name='my_mean_tensor') - self.add_metric(self.mean_obj(x)) - return x - - layer = MyLayer() - x = np.ones((1, 1)) - layer(x) - self.assertLen(list(layer._flatten_layers(include_self=False)), 0) - self.assertLen(layer.metrics, 2) - - def test_metric_not_tracked_as_sublayer_in_model(self): - - class MyModel(training_module.Model): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.mean_obj = metrics.Mean(name='my_mean_obj') - - def call(self, x): - self.add_metric( - tf.reduce_sum(x), aggregation='mean', name='my_mean_tensor') - self.add_metric(self.mean_obj(x)) - return x - - model = MyModel() - x = np.ones((1, 1)) - model(x) - self.assertLen(list(model._flatten_layers(include_self=False)), 0) - self.assertLen(model.layers, 0) - self.assertLen(model.metrics, 2) - - def test_invalid_custom_metric_class_error_msg(self): - x = layers.Input(shape=(2,)) - y = layers.Dense(3)(x) - model = training_module.Model(x, y) - - class BadMetric(metrics.Metric): - - def update_state(self, y_true, y_pred, sample_weight=None): - return - - def result(self): - return - - with self.assertRaisesRegex(RuntimeError, - 'can only be a single'): - model.compile('sgd', - 'mse', - metrics=[BadMetric()]) - model.fit(np.ones((10, 2)), np.ones((10, 3))) - - def test_invalid_custom_metric_fn_error_msg(self): - x = layers.Input(shape=(2,)) - y = layers.Dense(3)(x) - model = training_module.Model(x, y) - - def bad_metric(y_true, y_pred, sample_weight=None): # pylint: disable=unused-argument - return None - - def dict_metric(y_true, y_pred, sample_weight=None): # pylint: disable=unused-argument - return {'value': 0.} - - with self.assertRaisesRegex(RuntimeError, - 'The output of a metric function can only be'): - model.compile('sgd', - 'mse', - metrics=[bad_metric]) - model.fit(np.ones((10, 2)), np.ones((10, 3))) - with self.assertRaisesRegex(RuntimeError, - 'To return a dict of values, implement'): - model.compile('sgd', - 'mse', - metrics=[dict_metric]) - model.fit(np.ones((10, 2)), np.ones((10, 3))) - - -if __name__ == '__main__': - tf.test.main() + def test_config(self): + btp_obj = BinaryTruePositives(name="btp", dtype=tf.int32) + self.assertEqual(btp_obj.name, "btp") + self.assertEqual(btp_obj.dtype, tf.int32) + + # Check save and restore config + btp_obj2 = BinaryTruePositives.from_config(btp_obj.get_config()) + self.assertEqual(btp_obj2.name, "btp") + self.assertEqual(btp_obj2.dtype, tf.int32) + + def test_unweighted(self): + btp_obj = BinaryTruePositives() + self.evaluate(tf.compat.v1.variables_initializer(btp_obj.variables)) + y_true = tf.constant( + [ + [0, 0.9, 0, 1, 0], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 0], + [0, 0, 0, 0, 1.5], + ] + ) + y_pred = tf.constant( + [ + [0, 0, 1, 5, 0], + [1, 1, 1, 1, 1], + [0, 1, 0, 1, 0], + [1, 10, 1, 1, 1], + ] + ) + + update_op = btp_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = btp_obj.result() + self.assertEqual(7, self.evaluate(result)) + + def test_weighted(self): + btp_obj = BinaryTruePositives() + self.evaluate(tf.compat.v1.variables_initializer(btp_obj.variables)) + y_true = tf.constant( + [ + [0, 0.9, 0, 1, 0], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 0], + [0, 0, 0, 0, 1.5], + ] + ) + y_pred = tf.constant( + [ + [0, 0, 1, 5, 0], + [1, 1, 1, 1, 1], + [0, 1, 0, 1, 0], + [1, 10, 1, 1, 1], + ] + ) + sample_weight = tf.constant([[1.0], [1.5], [2.0], [2.5]]) + result = btp_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertEqual(12, self.evaluate(result)) + + def test_autograph(self): + metric = BinaryTruePositivesViaControlFlow() + self.evaluate(tf.compat.v1.variables_initializer(metric.variables)) + y_true = tf.constant( + [ + [0, 0.9, 0, 1, 0], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 0], + [0, 0, 0, 0, 1.5], + ] + ) + y_pred = tf.constant( + [ + [0, 0, 1, 5, 0], + [1, 1, 1, 1, 1], + [0, 1, 0, 1, 0], + [1, 10, 1, 1, 1], + ] + ) + sample_weight = tf.constant([[1.0], [1.5], [2.0], [2.5]]) + + @tf.function + def compute_metric(y_true, y_pred, sample_weight): + metric(y_true, y_pred, sample_weight) + return metric.result() + + result = compute_metric(y_true, y_pred, sample_weight) + self.assertEqual(12, self.evaluate(result)) + + def test_metric_wrappers_autograph(self): + def metric_fn(y_true, y_pred): + x = tf.constant(0.0) + for i in range(len(y_true)): + for j in range(len(y_true[i])): + if ( + tf.equal(y_true[i][j], y_pred[i][j]) + and y_true[i][j] > 0 + ): + x += 1.0 + return x + + mean_metric = metrics.MeanMetricWrapper(metric_fn) + sum_metric = metrics.SumOverBatchSizeMetricWrapper(metric_fn) + self.evaluate(tf.compat.v1.variables_initializer(mean_metric.variables)) + self.evaluate(tf.compat.v1.variables_initializer(sum_metric.variables)) + + y_true = tf.constant( + [[0, 0, 0, 1, 0], [0, 0, 1, 1, 1], [1, 1, 1, 1, 0], [1, 1, 1, 0, 1]] + ) + y_pred = tf.constant( + [[0, 0, 1, 1, 0], [1, 1, 1, 1, 1], [0, 1, 0, 1, 0], [1, 1, 1, 1, 1]] + ) + + @tf.function + def tf_functioned_metric_fn(metric, y_true, y_pred): + return metric(y_true, y_pred) + + metric_result = tf_functioned_metric_fn(mean_metric, y_true, y_pred) + self.assertAllClose(self.evaluate(metric_result), 10, 1e-2) + metric_result = tf_functioned_metric_fn(sum_metric, y_true, y_pred) + self.assertAllClose(self.evaluate(metric_result), 10, 1e-2) + + def test_metric_not_tracked_as_sublayer_in_layer(self): + class MyLayer(base_layer.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.mean_obj = metrics.Mean(name="my_mean_obj") + + def call(self, x): + self.add_metric( + tf.reduce_sum(x), aggregation="mean", name="my_mean_tensor" + ) + self.add_metric(self.mean_obj(x)) + return x + + layer = MyLayer() + x = np.ones((1, 1)) + layer(x) + self.assertLen(list(layer._flatten_layers(include_self=False)), 0) + self.assertLen(layer.metrics, 2) + + def test_metric_not_tracked_as_sublayer_in_model(self): + class MyModel(training_module.Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.mean_obj = metrics.Mean(name="my_mean_obj") + + def call(self, x): + self.add_metric( + tf.reduce_sum(x), aggregation="mean", name="my_mean_tensor" + ) + self.add_metric(self.mean_obj(x)) + return x + + model = MyModel() + x = np.ones((1, 1)) + model(x) + self.assertLen(list(model._flatten_layers(include_self=False)), 0) + self.assertLen(model.layers, 0) + self.assertLen(model.metrics, 2) + + def test_invalid_custom_metric_class_error_msg(self): + x = layers.Input(shape=(2,)) + y = layers.Dense(3)(x) + model = training_module.Model(x, y) + + class BadMetric(metrics.Metric): + def update_state(self, y_true, y_pred, sample_weight=None): + return + + def result(self): + return + + with self.assertRaisesRegex(RuntimeError, "can only be a single"): + model.compile("sgd", "mse", metrics=[BadMetric()]) + model.fit(np.ones((10, 2)), np.ones((10, 3))) + + def test_invalid_custom_metric_fn_error_msg(self): + x = layers.Input(shape=(2,)) + y = layers.Dense(3)(x) + model = training_module.Model(x, y) + + def bad_metric(y_true, y_pred, sample_weight=None): + return None + + def dict_metric(y_true, y_pred, sample_weight=None): + return {"value": 0.0} + + with self.assertRaisesRegex( + RuntimeError, "The output of a metric function can only be" + ): + model.compile("sgd", "mse", metrics=[bad_metric]) + model.fit(np.ones((10, 2)), np.ones((10, 3))) + with self.assertRaisesRegex( + RuntimeError, "To return a dict of values, implement" + ): + model.compile("sgd", "mse", metrics=[dict_metric]) + model.fit(np.ones((10, 2)), np.ones((10, 3))) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/confusion_matrix_test.py b/keras/metrics/confusion_matrix_test.py deleted file mode 100644 index cf8889218a3a..000000000000 --- a/keras/metrics/confusion_matrix_test.py +++ /dev/null @@ -1,1897 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras metrics functions.""" - -import tensorflow.compat.v2 as tf - -import json - -from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations -from keras import layers -from keras import metrics -from keras import models -from keras.utils import metrics_utils -from tensorflow.python.platform import tf_logging - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class FalsePositivesTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - fp_obj = metrics.FalsePositives(name='my_fp', thresholds=[0.4, 0.9]) - self.assertEqual(fp_obj.name, 'my_fp') - self.assertLen(fp_obj.variables, 1) - self.assertEqual(fp_obj.thresholds, [0.4, 0.9]) - - # Check save and restore config - fp_obj2 = metrics.FalsePositives.from_config(fp_obj.get_config()) - self.assertEqual(fp_obj2.name, 'my_fp') - self.assertLen(fp_obj2.variables, 1) - self.assertEqual(fp_obj2.thresholds, [0.4, 0.9]) - - def test_unweighted(self): - fp_obj = metrics.FalsePositives() - self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) - - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = fp_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = fp_obj.result() - self.assertAllClose(7., result) - - def test_weighted(self): - fp_obj = metrics.FalsePositives() - self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = fp_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(14., self.evaluate(result)) - - def test_unweighted_with_thresholds(self): - fp_obj = metrics.FalsePositives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - - update_op = fp_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = fp_obj.result() - self.assertAllClose([7., 4., 2.], result) - - def test_weighted_with_thresholds(self): - fp_obj = metrics.FalsePositives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - sample_weight = ((1.0, 2.0, 3.0, 5.0), (7.0, 11.0, 13.0, 17.0), - (19.0, 23.0, 29.0, 31.0), (5.0, 15.0, 10.0, 0)) - - result = fp_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose([125., 42., 12.], self.evaluate(result)) - - def test_threshold_limit(self): - with self.assertRaisesRegex( - ValueError, - r'Threshold values must be in \[0, 1\]. Received: \[-1, 2\]'): - metrics.FalsePositives(thresholds=[-1, 0.5, 2]) - - with self.assertRaisesRegex( - ValueError, - r'Threshold values must be in \[0, 1\]. Received: \[None\]'): - metrics.FalsePositives(thresholds=[None]) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class FalseNegativesTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - fn_obj = metrics.FalseNegatives(name='my_fn', thresholds=[0.4, 0.9]) - self.assertEqual(fn_obj.name, 'my_fn') - self.assertLen(fn_obj.variables, 1) - self.assertEqual(fn_obj.thresholds, [0.4, 0.9]) - - # Check save and restore config - fn_obj2 = metrics.FalseNegatives.from_config(fn_obj.get_config()) - self.assertEqual(fn_obj2.name, 'my_fn') - self.assertLen(fn_obj2.variables, 1) - self.assertEqual(fn_obj2.thresholds, [0.4, 0.9]) - - def test_unweighted(self): - fn_obj = metrics.FalseNegatives() - self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) - - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = fn_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = fn_obj.result() - self.assertAllClose(3., result) - - def test_weighted(self): - fn_obj = metrics.FalseNegatives() - self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = fn_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(5., self.evaluate(result)) - - def test_unweighted_with_thresholds(self): - fn_obj = metrics.FalseNegatives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - - update_op = fn_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = fn_obj.result() - self.assertAllClose([1., 4., 6.], result) - - def test_weighted_with_thresholds(self): - fn_obj = metrics.FalseNegatives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - sample_weight = ((3.0,), (5.0,), (7.0,), (4.0,)) - - result = fn_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose([4., 16., 23.], self.evaluate(result)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TrueNegativesTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - tn_obj = metrics.TrueNegatives(name='my_tn', thresholds=[0.4, 0.9]) - self.assertEqual(tn_obj.name, 'my_tn') - self.assertLen(tn_obj.variables, 1) - self.assertEqual(tn_obj.thresholds, [0.4, 0.9]) - - # Check save and restore config - tn_obj2 = metrics.TrueNegatives.from_config(tn_obj.get_config()) - self.assertEqual(tn_obj2.name, 'my_tn') - self.assertLen(tn_obj2.variables, 1) - self.assertEqual(tn_obj2.thresholds, [0.4, 0.9]) - - def test_unweighted(self): - tn_obj = metrics.TrueNegatives() - self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) - - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = tn_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = tn_obj.result() - self.assertAllClose(3., result) - - def test_weighted(self): - tn_obj = metrics.TrueNegatives() - self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = tn_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(4., self.evaluate(result)) - - def test_unweighted_with_thresholds(self): - tn_obj = metrics.TrueNegatives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - - update_op = tn_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = tn_obj.result() - self.assertAllClose([2., 5., 7.], result) - - def test_weighted_with_thresholds(self): - tn_obj = metrics.TrueNegatives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - sample_weight = ((0.0, 2.0, 3.0, 5.0),) - - result = tn_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose([5., 15., 23.], self.evaluate(result)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TruePositivesTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - tp_obj = metrics.TruePositives(name='my_tp', thresholds=[0.4, 0.9]) - self.assertEqual(tp_obj.name, 'my_tp') - self.assertLen(tp_obj.variables, 1) - self.assertEqual(tp_obj.thresholds, [0.4, 0.9]) - - # Check save and restore config - tp_obj2 = metrics.TruePositives.from_config(tp_obj.get_config()) - self.assertEqual(tp_obj2.name, 'my_tp') - self.assertLen(tp_obj2.variables, 1) - self.assertEqual(tp_obj2.thresholds, [0.4, 0.9]) - - def test_unweighted(self): - tp_obj = metrics.TruePositives() - self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) - - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = tp_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = tp_obj.result() - self.assertAllClose(7., result) - - def test_weighted(self): - tp_obj = metrics.TruePositives() - self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = tp_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(12., self.evaluate(result)) - - def test_unweighted_with_thresholds(self): - tp_obj = metrics.TruePositives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - - update_op = tp_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = tp_obj.result() - self.assertAllClose([6., 3., 1.], result) - - def test_weighted_with_thresholds(self): - tp_obj = metrics.TruePositives(thresholds=[0.15, 0.5, 0.85]) - self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) - - y_pred = tf.constant(((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6), - (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))) - y_true = tf.constant(((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), - (1, 1, 1, 1))) - - result = tp_obj(y_true, y_pred, sample_weight=37.) - self.assertAllClose([222., 111., 37.], self.evaluate(result)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class PrecisionTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - p_obj = metrics.Precision( - name='my_precision', thresholds=[0.4, 0.9], top_k=15, class_id=12) - self.assertEqual(p_obj.name, 'my_precision') - self.assertLen(p_obj.variables, 2) - self.assertEqual([v.name for v in p_obj.variables], - ['true_positives:0', 'false_positives:0']) - self.assertEqual(p_obj.thresholds, [0.4, 0.9]) - self.assertEqual(p_obj.top_k, 15) - self.assertEqual(p_obj.class_id, 12) - - # Check save and restore config - p_obj2 = metrics.Precision.from_config(p_obj.get_config()) - self.assertEqual(p_obj2.name, 'my_precision') - self.assertLen(p_obj2.variables, 2) - self.assertEqual(p_obj2.thresholds, [0.4, 0.9]) - self.assertEqual(p_obj2.top_k, 15) - self.assertEqual(p_obj2.class_id, 12) - - def test_value_is_idempotent(self): - p_obj = metrics.Precision(thresholds=[0.3, 0.72]) - y_pred = tf.random.uniform(shape=(10, 3)) - y_true = tf.random.uniform(shape=(10, 3)) - update_op = p_obj.update_state(y_true, y_pred) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - - # Run several updates. - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_precision = self.evaluate(p_obj.result()) - for _ in range(10): - self.assertArrayNear(initial_precision, self.evaluate(p_obj.result()), - 1e-3) - - def test_unweighted(self): - p_obj = metrics.Precision() - y_pred = tf.constant([1, 0, 1, 0], shape=(1, 4)) - y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - - def test_unweighted_all_incorrect(self): - p_obj = metrics.Precision(thresholds=[0.5]) - inputs = np.random.randint(0, 2, size=(100, 1)) - y_pred = tf.constant(inputs) - y_true = tf.constant(1 - inputs) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(0, self.evaluate(result)) - - def test_weighted(self): - p_obj = metrics.Precision() - y_pred = tf.constant([[1, 0, 1, 0], [1, 0, 1, 0]]) - y_true = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj( - y_true, - y_pred, - sample_weight=tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]])) - weighted_tp = 3.0 + 4.0 - weighted_positives = (1.0 + 3.0) + (4.0 + 2.0) - expected_precision = weighted_tp / weighted_positives - self.assertAlmostEqual(expected_precision, self.evaluate(result)) - - def test_div_by_zero(self): - p_obj = metrics.Precision() - y_pred = tf.constant([0, 0, 0, 0]) - y_true = tf.constant([0, 0, 0, 0]) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj(y_true, y_pred) - self.assertEqual(0, self.evaluate(result)) - - def test_unweighted_with_threshold(self): - p_obj = metrics.Precision(thresholds=[0.5, 0.7]) - y_pred = tf.constant([1, 0, 0.6, 0], shape=(1, 4)) - y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj(y_true, y_pred) - self.assertArrayNear([0.5, 0.], self.evaluate(result), 0) - - def test_weighted_with_threshold(self): - p_obj = metrics.Precision(thresholds=[0.5, 1.]) - y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) - y_pred = tf.constant([[1, 0], [0.6, 0]], - shape=(2, 2), - dtype=tf.float32) - weights = tf.constant([[4, 0], [3, 1]], - shape=(2, 2), - dtype=tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj(y_true, y_pred, sample_weight=weights) - weighted_tp = 0 + 3. - weighted_positives = (0 + 3.) + (4. + 0.) - expected_precision = weighted_tp / weighted_positives - self.assertArrayNear([expected_precision, 0], self.evaluate(result), 1e-3) - - def test_multiple_updates(self): - p_obj = metrics.Precision(thresholds=[0.5, 1.]) - y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) - y_pred = tf.constant([[1, 0], [0.6, 0]], - shape=(2, 2), - dtype=tf.float32) - weights = tf.constant([[4, 0], [3, 1]], - shape=(2, 2), - dtype=tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - update_op = p_obj.update_state(y_true, y_pred, sample_weight=weights) - for _ in range(2): - self.evaluate(update_op) - - weighted_tp = (0 + 3.) + (0 + 3.) - weighted_positives = ((0 + 3.) + (4. + 0.)) + ((0 + 3.) + (4. + 0.)) - expected_precision = weighted_tp / weighted_positives - self.assertArrayNear([expected_precision, 0], self.evaluate(p_obj.result()), - 1e-3) - - def test_unweighted_top_k(self): - p_obj = metrics.Precision(top_k=3) - y_pred = tf.constant([0.2, 0.1, 0.5, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(1. / 3, self.evaluate(result)) - - def test_weighted_top_k(self): - p_obj = metrics.Precision(top_k=3) - y_pred1 = tf.constant([0.2, 0.1, 0.4, 0, 0.2], shape=(1, 5)) - y_true1 = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - self.evaluate( - p_obj( - y_true1, - y_pred1, - sample_weight=tf.constant([[1, 4, 2, 3, 5]]))) - - y_pred2 = tf.constant([0.2, 0.6, 0.4, 0.2, 0.2], shape=(1, 5)) - y_true2 = tf.constant([1, 0, 1, 1, 1], shape=(1, 5)) - result = p_obj(y_true2, y_pred2, sample_weight=tf.constant(3)) - - tp = (2 + 5) + (3 + 3) - predicted_positives = (1 + 2 + 5) + (3 + 3 + 3) - expected_precision = tp / predicted_positives - self.assertAlmostEqual(expected_precision, self.evaluate(result)) - - def test_unweighted_class_id(self): - p_obj = metrics.Precision(class_id=2) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - - y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) - - y_pred = tf.constant([0.2, 0.1, 0, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) - - y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 0, 0, 0], shape=(1, 5)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) - self.assertAlmostEqual(1, self.evaluate(p_obj.false_positives)) - - def test_unweighted_top_k_and_class_id(self): - p_obj = metrics.Precision(class_id=2, top_k=2) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - - y_pred = tf.constant([0.2, 0.6, 0.3, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) - - y_pred = tf.constant([1, 1, 0.9, 1, 1], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) - - def test_unweighted_top_k_and_threshold(self): - p_obj = metrics.Precision(thresholds=.7, top_k=2) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - - y_pred = tf.constant([0.2, 0.8, 0.6, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) - result = p_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class RecallTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - r_obj = metrics.Recall( - name='my_recall', thresholds=[0.4, 0.9], top_k=15, class_id=12) - self.assertEqual(r_obj.name, 'my_recall') - self.assertLen(r_obj.variables, 2) - self.assertEqual([v.name for v in r_obj.variables], - ['true_positives:0', 'false_negatives:0']) - self.assertEqual(r_obj.thresholds, [0.4, 0.9]) - self.assertEqual(r_obj.top_k, 15) - self.assertEqual(r_obj.class_id, 12) - - # Check save and restore config - r_obj2 = metrics.Recall.from_config(r_obj.get_config()) - self.assertEqual(r_obj2.name, 'my_recall') - self.assertLen(r_obj2.variables, 2) - self.assertEqual(r_obj2.thresholds, [0.4, 0.9]) - self.assertEqual(r_obj2.top_k, 15) - self.assertEqual(r_obj2.class_id, 12) - - def test_value_is_idempotent(self): - r_obj = metrics.Recall(thresholds=[0.3, 0.72]) - y_pred = tf.random.uniform(shape=(10, 3)) - y_true = tf.random.uniform(shape=(10, 3)) - update_op = r_obj.update_state(y_true, y_pred) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - - # Run several updates. - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_recall = self.evaluate(r_obj.result()) - for _ in range(10): - self.assertArrayNear(initial_recall, self.evaluate(r_obj.result()), 1e-3) - - def test_unweighted(self): - r_obj = metrics.Recall() - y_pred = tf.constant([1, 0, 1, 0], shape=(1, 4)) - y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - - def test_unweighted_all_incorrect(self): - r_obj = metrics.Recall(thresholds=[0.5]) - inputs = np.random.randint(0, 2, size=(100, 1)) - y_pred = tf.constant(inputs) - y_true = tf.constant(1 - inputs) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0, self.evaluate(result)) - - def test_weighted(self): - r_obj = metrics.Recall() - y_pred = tf.constant([[1, 0, 1, 0], [0, 1, 0, 1]]) - y_true = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj( - y_true, - y_pred, - sample_weight=tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]])) - weighted_tp = 3.0 + 1.0 - weighted_t = (2.0 + 3.0) + (4.0 + 1.0) - expected_recall = weighted_tp / weighted_t - self.assertAlmostEqual(expected_recall, self.evaluate(result)) - - def test_div_by_zero(self): - r_obj = metrics.Recall() - y_pred = tf.constant([0, 0, 0, 0]) - y_true = tf.constant([0, 0, 0, 0]) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj(y_true, y_pred) - self.assertEqual(0, self.evaluate(result)) - - def test_unweighted_with_threshold(self): - r_obj = metrics.Recall(thresholds=[0.5, 0.7]) - y_pred = tf.constant([1, 0, 0.6, 0], shape=(1, 4)) - y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj(y_true, y_pred) - self.assertArrayNear([0.5, 0.], self.evaluate(result), 0) - - def test_weighted_with_threshold(self): - r_obj = metrics.Recall(thresholds=[0.5, 1.]) - y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) - y_pred = tf.constant([[1, 0], [0.6, 0]], - shape=(2, 2), - dtype=tf.float32) - weights = tf.constant([[1, 4], [3, 2]], - shape=(2, 2), - dtype=tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj(y_true, y_pred, sample_weight=weights) - weighted_tp = 0 + 3. - weighted_positives = (0 + 3.) + (4. + 0.) - expected_recall = weighted_tp / weighted_positives - self.assertArrayNear([expected_recall, 0], self.evaluate(result), 1e-3) - - def test_multiple_updates(self): - r_obj = metrics.Recall(thresholds=[0.5, 1.]) - y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) - y_pred = tf.constant([[1, 0], [0.6, 0]], - shape=(2, 2), - dtype=tf.float32) - weights = tf.constant([[1, 4], [3, 2]], - shape=(2, 2), - dtype=tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - update_op = r_obj.update_state(y_true, y_pred, sample_weight=weights) - for _ in range(2): - self.evaluate(update_op) - - weighted_tp = (0 + 3.) + (0 + 3.) - weighted_positives = ((0 + 3.) + (4. + 0.)) + ((0 + 3.) + (4. + 0.)) - expected_recall = weighted_tp / weighted_positives - self.assertArrayNear([expected_recall, 0], self.evaluate(r_obj.result()), - 1e-3) - - def test_unweighted_top_k(self): - r_obj = metrics.Recall(top_k=3) - y_pred = tf.constant([0.2, 0.1, 0.5, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - - def test_weighted_top_k(self): - r_obj = metrics.Recall(top_k=3) - y_pred1 = tf.constant([0.2, 0.1, 0.4, 0, 0.2], shape=(1, 5)) - y_true1 = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - self.evaluate( - r_obj( - y_true1, - y_pred1, - sample_weight=tf.constant([[1, 4, 2, 3, 5]]))) - - y_pred2 = tf.constant([0.2, 0.6, 0.4, 0.2, 0.2], shape=(1, 5)) - y_true2 = tf.constant([1, 0, 1, 1, 1], shape=(1, 5)) - result = r_obj(y_true2, y_pred2, sample_weight=tf.constant(3)) - - tp = (2 + 5) + (3 + 3) - positives = (4 + 2 + 5) + (3 + 3 + 3 + 3) - expected_recall = tp / positives - self.assertAlmostEqual(expected_recall, self.evaluate(result)) - - def test_unweighted_class_id(self): - r_obj = metrics.Recall(class_id=2) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - - y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(r_obj.false_negatives)) - - y_pred = tf.constant([0.2, 0.1, 0, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) - self.assertAlmostEqual(1, self.evaluate(r_obj.false_negatives)) - - y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 0, 0, 0], shape=(1, 5)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) - self.assertAlmostEqual(1, self.evaluate(r_obj.false_negatives)) - - def test_unweighted_top_k_and_class_id(self): - r_obj = metrics.Recall(class_id=2, top_k=2) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - - y_pred = tf.constant([0.2, 0.6, 0.3, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) - self.assertAlmostEqual(0, self.evaluate(r_obj.false_negatives)) - - y_pred = tf.constant([1, 1, 0.9, 1, 1], shape=(1, 5)) - y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0.5, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) - self.assertAlmostEqual(1, self.evaluate(r_obj.false_negatives)) - - def test_unweighted_top_k_and_threshold(self): - r_obj = metrics.Recall(thresholds=.7, top_k=2) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - - y_pred = tf.constant([0.2, 0.8, 0.6, 0, 0.2], shape=(1, 5)) - y_true = tf.constant([1, 1, 1, 0, 1], shape=(1, 5)) - result = r_obj(y_true, y_pred) - self.assertAlmostEqual(0.25, self.evaluate(result)) - self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) - self.assertAlmostEqual(3, self.evaluate(r_obj.false_negatives)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class SensitivityAtSpecificityTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - s_obj = metrics.SensitivityAtSpecificity( - 0.4, - num_thresholds=100, - class_id=12, - name='sensitivity_at_specificity_1') - self.assertEqual(s_obj.name, 'sensitivity_at_specificity_1') - self.assertLen(s_obj.variables, 4) - self.assertEqual(s_obj.specificity, 0.4) - self.assertEqual(s_obj.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - # Check save and restore config - s_obj2 = metrics.SensitivityAtSpecificity.from_config(s_obj.get_config()) - self.assertEqual(s_obj2.name, 'sensitivity_at_specificity_1') - self.assertLen(s_obj2.variables, 4) - self.assertEqual(s_obj2.specificity, 0.4) - self.assertEqual(s_obj2.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - def test_value_is_idempotent(self): - s_obj = metrics.SensitivityAtSpecificity(0.7) - y_pred = tf.random.uniform((10, 3), - maxval=1, - dtype=tf.float32, - seed=1) - y_true = tf.random.uniform((10, 3), - maxval=2, - dtype=tf.int64, - seed=1) - update_op = s_obj.update_state(y_true, y_pred) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - - # Run several updates. - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_sensitivity = self.evaluate(s_obj.result()) - for _ in range(10): - self.assertAlmostEqual(initial_sensitivity, self.evaluate(s_obj.result()), - 1e-3) - - def test_unweighted_all_correct(self): - with self.test_session(): - s_obj = metrics.SensitivityAtSpecificity(0.7) - inputs = np.random.randint(0, 2, size=(100, 1)) - y_pred = tf.constant(inputs, dtype=tf.float32) - y_true = tf.constant(inputs) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - - def test_unweighted_high_specificity(self): - s_obj = metrics.SensitivityAtSpecificity(0.8) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.1, 0.45, 0.5, 0.8, 0.9] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(0.8, self.evaluate(result)) - - def test_unweighted_low_specificity(self): - s_obj = metrics.SensitivityAtSpecificity(0.4) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(0.6, self.evaluate(result)) - - def test_unweighted_class_id(self): - s_obj = metrics.SpecificityAtSensitivity(0.4, class_id=2) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 2, 2, 2, 2, 2] - - y_pred = tf.transpose([pred_values] * 3) - y_true = tf.one_hot(label_values, depth=3) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(0.6, self.evaluate(result)) - - @parameterized.parameters([tf.bool, tf.int32, tf.float32]) - def test_weighted(self, label_dtype): - s_obj = metrics.SensitivityAtSpecificity(0.4) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - weight_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.cast(label_values, dtype=label_dtype) - weights = tf.constant(weight_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred, sample_weight=weights) - self.assertAlmostEqual(0.675, self.evaluate(result)) - - def test_invalid_specificity(self): - with self.assertRaisesRegex( - ValueError, r'`specificity` must be in the range \[0, 1\].'): - metrics.SensitivityAtSpecificity(-1) - - def test_invalid_num_thresholds(self): - with self.assertRaisesRegex( - ValueError, 'Argument `num_thresholds` must be an integer > 0'): - metrics.SensitivityAtSpecificity(0.4, num_thresholds=-1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class SpecificityAtSensitivityTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - s_obj = metrics.SpecificityAtSensitivity( - 0.4, - num_thresholds=100, - class_id=12, - name='specificity_at_sensitivity_1') - self.assertEqual(s_obj.name, 'specificity_at_sensitivity_1') - self.assertLen(s_obj.variables, 4) - self.assertEqual(s_obj.sensitivity, 0.4) - self.assertEqual(s_obj.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - # Check save and restore config - s_obj2 = metrics.SpecificityAtSensitivity.from_config(s_obj.get_config()) - self.assertEqual(s_obj2.name, 'specificity_at_sensitivity_1') - self.assertLen(s_obj2.variables, 4) - self.assertEqual(s_obj2.sensitivity, 0.4) - self.assertEqual(s_obj2.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - def test_value_is_idempotent(self): - s_obj = metrics.SpecificityAtSensitivity(0.7) - y_pred = tf.random.uniform((10, 3), - maxval=1, - dtype=tf.float32, - seed=1) - y_true = tf.random.uniform((10, 3), - maxval=2, - dtype=tf.int64, - seed=1) - update_op = s_obj.update_state(y_true, y_pred) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - - # Run several updates. - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_specificity = self.evaluate(s_obj.result()) - for _ in range(10): - self.assertAlmostEqual(initial_specificity, self.evaluate(s_obj.result()), - 1e-3) - - def test_unweighted_all_correct(self): - s_obj = metrics.SpecificityAtSensitivity(0.7) - inputs = np.random.randint(0, 2, size=(100, 1)) - y_pred = tf.constant(inputs, dtype=tf.float32) - y_true = tf.constant(inputs) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - - def test_unweighted_high_sensitivity(self): - s_obj = metrics.SpecificityAtSensitivity(1.0) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(0.2, self.evaluate(result)) - - def test_unweighted_low_sensitivity(self): - s_obj = metrics.SpecificityAtSensitivity(0.4) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(0.6, self.evaluate(result)) - - def test_unweighted_class_id(self): - s_obj = metrics.SpecificityAtSensitivity(0.4, class_id=2) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 2, 2, 2, 2, 2] - - y_pred = tf.transpose([pred_values] * 3) - y_true = tf.one_hot(label_values, depth=3) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(0.6, self.evaluate(result)) - - @parameterized.parameters([tf.bool, tf.int32, tf.float32]) - def test_weighted(self, label_dtype): - s_obj = metrics.SpecificityAtSensitivity(0.4) - pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - weight_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.cast(label_values, dtype=label_dtype) - weights = tf.constant(weight_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred, sample_weight=weights) - self.assertAlmostEqual(0.4, self.evaluate(result)) - - def test_invalid_sensitivity(self): - with self.assertRaisesRegex( - ValueError, r'`sensitivity` must be in the range \[0, 1\].'): - metrics.SpecificityAtSensitivity(-1) - - def test_invalid_num_thresholds(self): - with self.assertRaisesRegex( - ValueError, 'Argument `num_thresholds` must be an integer > 0'): - metrics.SpecificityAtSensitivity(0.4, num_thresholds=-1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class PrecisionAtRecallTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - s_obj = metrics.PrecisionAtRecall( - 0.4, num_thresholds=100, class_id=12, name='precision_at_recall_1') - self.assertEqual(s_obj.name, 'precision_at_recall_1') - self.assertLen(s_obj.variables, 4) - self.assertEqual(s_obj.recall, 0.4) - self.assertEqual(s_obj.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - # Check save and restore config - s_obj2 = metrics.PrecisionAtRecall.from_config(s_obj.get_config()) - self.assertEqual(s_obj2.name, 'precision_at_recall_1') - self.assertLen(s_obj2.variables, 4) - self.assertEqual(s_obj2.recall, 0.4) - self.assertEqual(s_obj2.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - def test_value_is_idempotent(self): - s_obj = metrics.PrecisionAtRecall(0.7) - y_pred = tf.random.uniform((10, 3), - maxval=1, - dtype=tf.float32, - seed=1) - y_true = tf.random.uniform((10, 3), - maxval=2, - dtype=tf.int64, - seed=1) - update_op = s_obj.update_state(y_true, y_pred) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - - # Run several updates. - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_precision = self.evaluate(s_obj.result()) - for _ in range(10): - self.assertAlmostEqual(initial_precision, self.evaluate(s_obj.result()), - 1e-3) - - def test_unweighted_all_correct(self): - s_obj = metrics.PrecisionAtRecall(0.7) - inputs = np.random.randint(0, 2, size=(100, 1)) - y_pred = tf.constant(inputs, dtype=tf.float32) - y_true = tf.constant(inputs) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - - def test_unweighted_high_recall(self): - s_obj = metrics.PrecisionAtRecall(0.8) - pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # For 0.5 < decision threshold < 0.6. - self.assertAlmostEqual(2.0/3, self.evaluate(result)) - - def test_unweighted_low_recall(self): - s_obj = metrics.PrecisionAtRecall(0.6) - pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # For 0.2 < decision threshold < 0.5. - self.assertAlmostEqual(0.75, self.evaluate(result)) - - def test_unweighted_class_id(self): - s_obj = metrics.PrecisionAtRecall(0.6, class_id=2) - pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] - label_values = [0, 0, 0, 0, 0, 2, 2, 2, 2, 2] - - y_pred = tf.transpose([pred_values] * 3) - y_true = tf.one_hot(label_values, depth=3) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # For 0.2 < decision threshold < 0.5. - self.assertAlmostEqual(0.75, self.evaluate(result)) - - @parameterized.parameters([tf.bool, tf.int32, tf.float32]) - def test_weighted(self, label_dtype): - s_obj = metrics.PrecisionAtRecall(7.0/8) - pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] - label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - weight_values = [2, 1, 2, 1, 2, 1, 2, 2, 1, 2] - - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.cast(label_values, dtype=label_dtype) - weights = tf.constant(weight_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred, sample_weight=weights) - # For 0.0 < decision threshold < 0.2. - self.assertAlmostEqual(0.7, self.evaluate(result)) - - def test_invalid_sensitivity(self): - with self.assertRaisesRegex(ValueError, - r'`recall` must be in the range \[0, 1\].'): - metrics.PrecisionAtRecall(-1) - - def test_invalid_num_thresholds(self): - with self.assertRaisesRegex( - ValueError, 'Argument `num_thresholds` must be an integer > 0'): - metrics.PrecisionAtRecall(0.4, num_thresholds=-1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class RecallAtPrecisionTest(tf.test.TestCase, parameterized.TestCase): - - def test_config(self): - s_obj = metrics.RecallAtPrecision( - 0.4, num_thresholds=100, class_id=12, name='recall_at_precision_1') - self.assertEqual(s_obj.name, 'recall_at_precision_1') - self.assertLen(s_obj.variables, 4) - self.assertEqual(s_obj.precision, 0.4) - self.assertEqual(s_obj.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - # Check save and restore config - s_obj2 = metrics.RecallAtPrecision.from_config(s_obj.get_config()) - self.assertEqual(s_obj2.name, 'recall_at_precision_1') - self.assertLen(s_obj2.variables, 4) - self.assertEqual(s_obj2.precision, 0.4) - self.assertEqual(s_obj2.num_thresholds, 100) - self.assertEqual(s_obj.class_id, 12) - - def test_value_is_idempotent(self): - s_obj = metrics.RecallAtPrecision(0.7) - y_pred = tf.random.uniform((10, 3), - maxval=1, - dtype=tf.float32, - seed=1) - y_true = tf.random.uniform((10, 3), - maxval=2, - dtype=tf.int64, - seed=1) - update_op = s_obj.update_state(y_true, y_pred) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - - # Run several updates. - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_recall = self.evaluate(s_obj.result()) - for _ in range(10): - self.assertAlmostEqual(initial_recall, self.evaluate(s_obj.result()), - 1e-3) - - def test_unweighted_all_correct(self): - s_obj = metrics.RecallAtPrecision(0.7) - inputs = np.random.randint(0, 2, size=(100, 1)) - y_pred = tf.constant(inputs, dtype=tf.float32) - y_true = tf.constant(inputs) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - self.assertAlmostEqual(1, self.evaluate(result)) - - def test_unweighted_high_precision(self): - s_obj = metrics.RecallAtPrecision(0.75) - pred_values = [ - 0.05, 0.1, 0.2, 0.3, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.9, 0.95 - ] - label_values = [0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1] - # precisions: [1/2, 6/11, 1/2, 5/9, 5/8, 5/7, 2/3, 3/5, 3/5, 2/3, 1/2, 1]. - # recalls: [1, 1, 5/6, 5/6, 5/6, 5/6, 2/3, 1/2, 1/2, 1/3, 1/6, 1/6]. - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # The precision 0.75 can be reached at thresholds 0.4<=t<0.45. - self.assertAlmostEqual(0.5, self.evaluate(result)) - - def test_unweighted_low_precision(self): - s_obj = metrics.RecallAtPrecision(2.0 / 3) - pred_values = [ - 0.05, 0.1, 0.2, 0.3, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.9, 0.95 - ] - label_values = [0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1] - # precisions: [1/2, 6/11, 1/2, 5/9, 5/8, 5/7, 2/3, 3/5, 3/5, 2/3, 1/2, 1]. - # recalls: [1, 1, 5/6, 5/6, 5/6, 5/6, 2/3, 1/2, 1/2, 1/3, 1/6, 1/6]. - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # The precision 5/7 can be reached at thresholds 00.3<=t<0.35. - self.assertAlmostEqual(5. / 6, self.evaluate(result)) - - def test_unweighted_class_id(self): - s_obj = metrics.RecallAtPrecision(2.0 / 3, class_id=2) - pred_values = [ - 0.05, 0.1, 0.2, 0.3, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.9, 0.95 - ] - label_values = [0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2] - # precisions: [1/2, 6/11, 1/2, 5/9, 5/8, 5/7, 2/3, 3/5, 3/5, 2/3, 1/2, 1]. - # recalls: [1, 1, 5/6, 5/6, 5/6, 5/6, 2/3, 1/2, 1/2, 1/3, 1/6, 1/6]. - y_pred = tf.transpose([pred_values] * 3) - y_true = tf.one_hot(label_values, depth=3) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # The precision 5/7 can be reached at thresholds 00.3<=t<0.35. - self.assertAlmostEqual(5. / 6, self.evaluate(result)) - - @parameterized.parameters([tf.bool, tf.int32, tf.float32]) - def test_weighted(self, label_dtype): - s_obj = metrics.RecallAtPrecision(0.75) - pred_values = [0.1, 0.2, 0.3, 0.5, 0.6, 0.9, 0.9] - label_values = [0, 1, 0, 0, 0, 1, 1] - weight_values = [1, 2, 1, 2, 1, 2, 1] - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.cast(label_values, dtype=label_dtype) - weights = tf.constant(weight_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred, sample_weight=weights) - self.assertAlmostEqual(0.6, self.evaluate(result)) - - def test_unachievable_precision(self): - s_obj = metrics.RecallAtPrecision(2.0 / 3) - pred_values = [0.1, 0.2, 0.3, 0.9] - label_values = [1, 1, 0, 0] - y_pred = tf.constant(pred_values, dtype=tf.float32) - y_true = tf.constant(label_values) - self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) - result = s_obj(y_true, y_pred) - # The highest possible precision is 1/2 which is below the required - # value, expect 0 recall. - self.assertAlmostEqual(0, self.evaluate(result)) - - def test_invalid_sensitivity(self): - with self.assertRaisesRegex(ValueError, - r'`precision` must be in the range \[0, 1\].'): - metrics.RecallAtPrecision(-1) - - def test_invalid_num_thresholds(self): - with self.assertRaisesRegex( - ValueError, 'Argument `num_thresholds` must be an integer > 0'): - metrics.RecallAtPrecision(0.4, num_thresholds=-1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class AUCTest(tf.test.TestCase, parameterized.TestCase): - - def setup(self): - self.num_thresholds = 3 - self.y_pred = tf.constant([0, 0.5, 0.3, 0.9], dtype=tf.float32) - epsilon = 1e-12 - self.y_pred_logits = -tf.math.log(1.0 / (self.y_pred + epsilon) - 1.0) - self.y_true = tf.constant([0, 0, 1, 1]) - self.sample_weight = [1, 2, 3, 4] - - # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7] - # y_pred when threshold = 0 - 1e-7 : [1, 1, 1, 1] - # y_pred when threshold = 0.5 : [0, 0, 0, 1] - # y_pred when threshold = 1 + 1e-7 : [0, 0, 0, 0] - - # without sample_weight: - # tp = np.sum([[0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0]], axis=1) - # fp = np.sum([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], axis=1) - # fn = np.sum([[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 1]], axis=1) - # tn = np.sum([[0, 0, 0, 0], [1, 1, 0, 0], [1, 1, 0, 0]], axis=1) - - # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] - - # with sample_weight: - # tp = np.sum([[0, 0, 3, 4], [0, 0, 0, 4], [0, 0, 0, 0]], axis=1) - # fp = np.sum([[1, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], axis=1) - # fn = np.sum([[0, 0, 0, 0], [0, 0, 3, 0], [0, 0, 3, 4]], axis=1) - # tn = np.sum([[0, 0, 0, 0], [1, 2, 0, 0], [1, 2, 0, 0]], axis=1) - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - - def test_config(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=100, - curve='PR', - summation_method='majoring', - name='auc_1') - auc_obj.update_state(self.y_true, self.y_pred) - self.assertEqual(auc_obj.name, 'auc_1') - self.assertLen(auc_obj.variables, 4) - self.assertEqual(auc_obj.num_thresholds, 100) - self.assertEqual(auc_obj.curve, metrics_utils.AUCCurve.PR) - self.assertEqual(auc_obj.summation_method, - metrics_utils.AUCSummationMethod.MAJORING) - old_config = auc_obj.get_config() - self.assertNotIn('thresholds', old_config) - self.assertDictEqual(old_config, json.loads(json.dumps(old_config))) - - # Check save and restore config. - auc_obj2 = metrics.AUC.from_config(auc_obj.get_config()) - auc_obj2.update_state(self.y_true, self.y_pred) - self.assertEqual(auc_obj2.name, 'auc_1') - self.assertLen(auc_obj2.variables, 4) - self.assertEqual(auc_obj2.num_thresholds, 100) - self.assertEqual(auc_obj2.curve, metrics_utils.AUCCurve.PR) - self.assertEqual(auc_obj2.summation_method, - metrics_utils.AUCSummationMethod.MAJORING) - new_config = auc_obj2.get_config() - self.assertNotIn('thresholds', new_config) - self.assertDictEqual(old_config, new_config) - self.assertAllClose(auc_obj.thresholds, auc_obj2.thresholds) - - def test_config_manual_thresholds(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=None, - curve='PR', - summation_method='majoring', - name='auc_1', - thresholds=[0.3, 0.5]) - auc_obj.update_state(self.y_true, self.y_pred) - self.assertEqual(auc_obj.name, 'auc_1') - self.assertLen(auc_obj.variables, 4) - self.assertEqual(auc_obj.num_thresholds, 4) - self.assertAllClose(auc_obj.thresholds, [0.0, 0.3, 0.5, 1.0]) - self.assertEqual(auc_obj.curve, metrics_utils.AUCCurve.PR) - self.assertEqual(auc_obj.summation_method, - metrics_utils.AUCSummationMethod.MAJORING) - old_config = auc_obj.get_config() - self.assertDictEqual(old_config, json.loads(json.dumps(old_config))) - - # Check save and restore config. - auc_obj2 = metrics.AUC.from_config(auc_obj.get_config()) - auc_obj2.update_state(self.y_true, self.y_pred) - self.assertEqual(auc_obj2.name, 'auc_1') - self.assertLen(auc_obj2.variables, 4) - self.assertEqual(auc_obj2.num_thresholds, 4) - self.assertEqual(auc_obj2.curve, metrics_utils.AUCCurve.PR) - self.assertEqual(auc_obj2.summation_method, - metrics_utils.AUCSummationMethod.MAJORING) - new_config = auc_obj2.get_config() - self.assertDictEqual(old_config, new_config) - self.assertAllClose(auc_obj.thresholds, auc_obj2.thresholds) - - def test_value_is_idempotent(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=3) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - - # Run several updates. - update_op = auc_obj.update_state(self.y_true, self.y_pred) - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_auc = self.evaluate(auc_obj.result()) - for _ in range(10): - self.assertAllClose(initial_auc, self.evaluate(auc_obj.result()), 1e-3) - - def test_unweighted_all_correct(self): - self.setup() - auc_obj = metrics.AUC() - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_true) - self.assertEqual(self.evaluate(result), 1) - - def test_unweighted(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred) - - # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] - # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0] - # fp_rate = [2/2, 0, 0] = [1, 0, 0] - # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25] - # widths = [(1 - 0), (0 - 0)] = [1, 0] - expected_result = (0.75 * 1 + 0.25 * 0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_unweighted_from_logits(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred_logits) - - # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] - # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0] - # fp_rate = [2/2, 0, 0] = [1, 0, 0] - # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25] - # widths = [(1 - 0), (0 - 0)] = [1, 0] - expected_result = (0.75 * 1 + 0.25 * 0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_manual_thresholds(self): - self.setup() - # Verify that when specified, thresholds are used instead of num_thresholds. - auc_obj = metrics.AUC(num_thresholds=2, thresholds=[0.5]) - self.assertEqual(auc_obj.num_thresholds, 3) - self.assertAllClose(auc_obj.thresholds, [0.0, 0.5, 1.0]) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred) - - # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] - # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0] - # fp_rate = [2/2, 0, 0] = [1, 0, 0] - # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25] - # widths = [(1 - 0), (0 - 0)] = [1, 0] - expected_result = (0.75 * 1 + 0.25 * 0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_roc_interpolation(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight) - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] - # fp_rate = [3/3, 0, 0] = [1, 0, 0] - # heights = [(1 + 0.571)/2, (0.571 + 0)/2] = [0.7855, 0.2855] - # widths = [(1 - 0), (0 - 0)] = [1, 0] - expected_result = (0.7855 * 1 + 0.2855 * 0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_roc_majoring(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, summation_method='majoring') - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight) - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] - # fp_rate = [3/3, 0, 0] = [1, 0, 0] - # heights = [max(1, 0.571), max(0.571, 0)] = [1, 0.571] - # widths = [(1 - 0), (0 - 0)] = [1, 0] - expected_result = (1 * 1 + 0.571 * 0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_roc_minoring(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, summation_method='minoring') - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight) - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] - # fp_rate = [3/3, 0, 0] = [1, 0, 0] - # heights = [min(1, 0.571), min(0.571, 0)] = [0.571, 0] - # widths = [(1 - 0), (0 - 0)] = [1, 0] - expected_result = (0.571 * 1 + 0 * 0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_pr_majoring(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, - curve='PR', - summation_method='majoring') - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight) - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - # precision = [7/(7+3), 4/4, 0] = [0.7, 1, 0] - # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] - # heights = [max(0.7, 1), max(1, 0)] = [1, 1] - # widths = [(1 - 0.571), (0.571 - 0)] = [0.429, 0.571] - expected_result = (1 * 0.429 + 1 * 0.571) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_pr_minoring(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, - curve='PR', - summation_method='minoring') - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight) - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - # precision = [7/(7+3), 4/4, 0] = [0.7, 1, 0] - # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] - # heights = [min(0.7, 1), min(1, 0)] = [0.7, 0] - # widths = [(1 - 0.571), (0.571 - 0)] = [0.429, 0.571] - expected_result = (0.7 * 0.429 + 0 * 0.571) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_pr_interpolation(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, curve='PR') - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight) - - # auc = (slope / Total Pos) * [dTP - intercept * log(Pb/Pa)] - - # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] - # P = tp + fp = [10, 4, 0] - # dTP = [7-4, 4-0] = [3, 4] - # dP = [10-4, 4-0] = [6, 4] - # slope = dTP/dP = [0.5, 1] - # intercept = (TPa+(slope*Pa) = [(4 - 0.5*4), (0 - 1*0)] = [2, 0] - # (Pb/Pa) = (Pb/Pa) if Pb > 0 AND Pa > 0 else 1 = [10/4, 4/0] = [2.5, 1] - # auc * TotalPos = [(0.5 * (3 + 2 * log(2.5))), (1 * (4 + 0))] - # = [2.416, 4] - # auc = [2.416, 4]/(tp[1:]+fn[1:]) - expected_result = (2.416/7 + 4/7) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_invalid_num_thresholds(self): - with self.assertRaisesRegex( - ValueError, 'Argument `num_thresholds` must be an integer > 1'): - metrics.AUC(num_thresholds=-1) - - with self.assertRaisesRegex( - ValueError, 'Argument `num_thresholds` must be an integer > 1.'): - metrics.AUC(num_thresholds=1) - - def test_invalid_curve(self): - with self.assertRaisesRegex(ValueError, - 'Invalid AUC curve value: "Invalid".'): - metrics.AUC(curve='Invalid') - - def test_invalid_summation_method(self): - with self.assertRaisesRegex( - ValueError, 'Invalid AUC summation method value: "Invalid".'): - metrics.AUC(summation_method='Invalid') - - def test_extra_dims(self): - try: - from scipy import special # pylint: disable=g-import-not-at-top - self.setup() - logits = special.expit(-np.array([[[-10., 10., -10.], [10., -10., 10.]], - [[-12., 12., -12.], [12., -12., 12.]]], - dtype=np.float32)) - labels = np.array([[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], - dtype=np.int64) - auc_obj = metrics.AUC() - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(labels, logits) - self.assertEqual(self.evaluate(result), 0.5) - except ImportError as e: - tf_logging.warning('Cannot test special functions: %s' % str(e)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MultiAUCTest(tf.test.TestCase, parameterized.TestCase): - - def setup(self): - self.num_thresholds = 5 - self.y_pred = tf.constant( - np.array([[0, 0.5, 0.3, 0.9], [0.1, 0.2, 0.3, 0.4]]).T, - dtype=tf.float32) - - epsilon = 1e-12 - self.y_pred_logits = -tf.math.log(1.0 / (self.y_pred + epsilon) - 1.0) - - self.y_true_good = tf.constant( - np.array([[0, 0, 1, 1], [0, 0, 1, 1]]).T) - self.y_true_bad = tf.constant( - np.array([[0, 0, 1, 1], [1, 1, 0, 0]]).T) - self.sample_weight = [1, 2, 3, 4] - - # threshold values are [0 - 1e-7, 0.25, 0.5, 0.75, 1 + 1e-7] - # y_pred when threshold = 0 - 1e-7 : [[1, 1, 1, 1], [1, 1, 1, 1]] - # y_pred when threshold = 0.25 : [[0, 1, 1, 1], [0, 0, 1, 1]] - # y_pred when threshold = 0.5 : [[0, 0, 0, 1], [0, 0, 0, 0]] - # y_pred when threshold = 0.75 : [[0, 0, 0, 1], [0, 0, 0, 0]] - # y_pred when threshold = 1 + 1e-7 : [[0, 0, 0, 0], [0, 0, 0, 0]] - - # for y_true_good, over thresholds: - # tp = [[2, 2, 1, 1, 0], [2, 2, 0, 0, 0]] - # fp = [[2, 1, 0, 0 , 0], [2, 0, 0 ,0, 0]] - # fn = [[0, 0, 1, 1, 2], [0, 0, 2, 2, 2]] - # tn = [[0, 1, 2, 2, 2], [0, 2, 2, 2, 2]] - - # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] - # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] - - # for y_true_bad: - # tp = [[2, 2, 1, 1, 0], [2, 0, 0, 0, 0]] - # fp = [[2, 1, 0, 0 , 0], [2, 2, 0 ,0, 0]] - # fn = [[0, 0, 1, 1, 2], [0, 2, 2, 2, 2]] - # tn = [[0, 1, 2, 2, 2], [0, 0, 2, 2, 2]] - - # tpr = [[1, 1, 0.5, 0.5, 0], [1, 0, 0, 0, 0]] - # fpr = [[1, 0.5, 0, 0, 0], [1, 1, 0, 0, 0]] - - # for y_true_good with sample_weights: - - # tp = [[7, 7, 4, 4, 0], [7, 7, 0, 0, 0]] - # fp = [[3, 2, 0, 0, 0], [3, 0, 0, 0, 0]] - # fn = [[0, 0, 3, 3, 7], [0, 0, 7, 7, 7]] - # tn = [[0, 1, 3, 3, 3], [0, 3, 3, 3, 3]] - - # tpr = [[1, 1, 0.57, 0.57, 0], [1, 1, 0, 0, 0]] - # fpr = [[1, 0.67, 0, 0, 0], [1, 0, 0, 0, 0]] - - def test_value_is_idempotent(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(num_thresholds=5, multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - - # Run several updates. - update_op = auc_obj.update_state(self.y_true_good, self.y_pred) - for _ in range(10): - self.evaluate(update_op) - - # Then verify idempotency. - initial_auc = self.evaluate(auc_obj.result()) - for _ in range(10): - self.assertAllClose(initial_auc, self.evaluate(auc_obj.result()), 1e-3) - - def test_unweighted_all_correct(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_true_good) - self.assertEqual(self.evaluate(result), 1) - - def test_unweighted_all_correct_flat(self): - self.setup() - auc_obj = metrics.AUC(multi_label=False) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_true_good) - self.assertEqual(self.evaluate(result), 1) - - def test_unweighted(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, - multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred) - - # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] - # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] - expected_result = (0.875 + 1.0) / 2.0 - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_unweighted_from_logits(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, - multi_label=True, - from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred_logits) - - # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] - # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] - expected_result = (0.875 + 1.0) / 2.0 - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_sample_weight_flat(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, multi_label=False) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred, sample_weight=[1, 2, 3, 4]) - - # tpr = [1, 1, 0.2857, 0.2857, 0] - # fpr = [1, 0.3333, 0, 0, 0] - expected_result = 1.0 - (0.3333 * (1.0 - 0.2857) / 2.0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_full_sample_weight_flat(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, multi_label=False) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - sw = np.arange(4 * 2) - sw = sw.reshape(4, 2) - result = auc_obj(self.y_true_good, self.y_pred, sample_weight=sw) - - # tpr = [1, 1, 0.2727, 0.2727, 0] - # fpr = [1, 0.3333, 0, 0, 0] - expected_result = 1.0 - (0.3333 * (1.0 - 0.2727) / 2.0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_label_weights(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, - multi_label=True, - label_weights=[0.75, 0.25]) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred) - - # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] - # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] - expected_result = (0.875 * 0.75 + 1.0 * 0.25) / (0.75 + 0.25) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_label_weights_flat(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, - multi_label=False, - label_weights=[0.75, 0.25]) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred) - - # tpr = [1, 1, 0.375, 0.375, 0] - # fpr = [1, 0.375, 0, 0, 0] - expected_result = 1.0 - ((1.0 - 0.375) * 0.375 / 2.0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-2) - - def test_unweighted_flat(self): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, multi_label=False) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred) - - # tp = [4, 4, 1, 1, 0] - # fp = [4, 1, 0, 0, 0] - # fn = [0, 0, 3, 3, 4] - # tn = [0, 3, 4, 4, 4] - - # tpr = [1, 1, 0.25, 0.25, 0] - # fpr = [1, 0.25, 0, 0, 0] - expected_result = 1.0 - (3.0 / 32.0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_unweighted_flat_from_logits(self): - self.setup() - auc_obj = metrics.AUC( - num_thresholds=self.num_thresholds, multi_label=False, from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred_logits) - - # tp = [4, 4, 1, 1, 0] - # fp = [4, 1, 0, 0, 0] - # fn = [0, 0, 3, 3, 4] - # tn = [0, 3, 4, 4, 4] - - # tpr = [1, 1, 0.25, 0.25, 0] - # fpr = [1, 0.25, 0, 0, 0] - expected_result = 1.0 - (3.0 / 32.0) - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_manual_thresholds(self): - with self.test_session(): - self.setup() - # Verify that when specified, thresholds are used instead of - # num_thresholds. - auc_obj = metrics.AUC(num_thresholds=2, thresholds=[0.5], - multi_label=True) - self.assertEqual(auc_obj.num_thresholds, 3) - self.assertAllClose(auc_obj.thresholds, [0.0, 0.5, 1.0]) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj(self.y_true_good, self.y_pred) - - # tp = [[2, 1, 0], [2, 0, 0]] - # fp = [2, 0, 0], [2, 0, 0]] - # fn = [[0, 1, 2], [0, 2, 2]] - # tn = [[0, 2, 2], [0, 2, 2]] - - # tpr = [[1, 0.5, 0], [1, 0, 0]] - # fpr = [[1, 0, 0], [1, 0, 0]] - - # auc by slice = [0.75, 0.5] - expected_result = (0.75 + 0.5) / 2.0 - - self.assertAllClose(self.evaluate(result), expected_result, 1e-3) - - def test_weighted_roc_interpolation(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, - multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - result = auc_obj( - self.y_true_good, self.y_pred, sample_weight=self.sample_weight) - - # tpr = [[1, 1, 0.57, 0.57, 0], [1, 1, 0, 0, 0]] - # fpr = [[1, 0.67, 0, 0, 0], [1, 0, 0, 0, 0]] - expected_result = 1.0 - 0.5 * 0.43 * 0.67 - self.assertAllClose(self.evaluate(result), expected_result, 1e-1) - - def test_pr_interpolation_unweighted(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, curve='PR', - multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - good_result = auc_obj(self.y_true_good, self.y_pred) - with self.subTest(name='good'): - # PR AUCs are 0.917 and 1.0 respectively - self.assertAllClose(self.evaluate(good_result), (0.91667 + 1.0) / 2.0, - 1e-1) - bad_result = auc_obj(self.y_true_bad, self.y_pred) - with self.subTest(name='bad'): - # PR AUCs are 0.917 and 0.5 respectively - self.assertAllClose(self.evaluate(bad_result), (0.91667 + 0.5) / 2.0, - 1e-1) - - def test_pr_interpolation(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, curve='PR', - multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - good_result = auc_obj(self.y_true_good, self.y_pred, - sample_weight=self.sample_weight) - # PR AUCs are 0.939 and 1.0 respectively - self.assertAllClose(self.evaluate(good_result), (0.939 + 1.0) / 2.0, - 1e-1) - - def test_keras_model_compiles(self): - inputs = layers.Input(shape=(10,)) - output = layers.Dense(3, activation='sigmoid')(inputs) - model = models.Model(inputs=inputs, outputs=output) - model.compile( - loss='binary_crossentropy', - metrics=[metrics.AUC(multi_label=True)] - ) - - def test_reset_state(self): - with self.test_session(): - self.setup() - auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, - multi_label=True) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - auc_obj(self.y_true_good, self.y_pred) - auc_obj.reset_state() - self.assertAllEqual(auc_obj.true_positives, np.zeros((5, 2))) - - -@test_combinations.generate(test_combinations.combine(mode=['eager'])) -class ThresholdsTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - metrics.TruePositives(), - metrics.TrueNegatives(), - metrics.FalsePositives(), - metrics.FalseNegatives(), - metrics.Precision(), - metrics.Recall(), - metrics.SensitivityAtSpecificity(0.5), - metrics.SpecificityAtSensitivity(0.5), - metrics.PrecisionAtRecall(0.5), - metrics.RecallAtPrecision(0.5), - metrics.AUC()]) - def test_with_default_thresholds(self, metric_obj): - # By default, the thresholds will be evenly distributed if there are more - # than 1. In case there is only 1 thresholds, then we expect - # _thresholds_distributed_evenly to be false. - expected = len(metric_obj.thresholds) > 1 - self.assertEqual(metric_obj._thresholds_distributed_evenly, expected) - - @parameterized.parameters([ - metrics.TruePositives, - metrics.TrueNegatives, - metrics.FalsePositives, - metrics.FalseNegatives, - metrics.Precision, - metrics.Recall]) - def test_with_manual_thresholds(self, metric_cls): - even_thresholds = [0.0, 0.25, 0.5, 0.75, 1.0] - metric_obj = metric_cls(thresholds=even_thresholds) - self.assertTrue(metric_obj._thresholds_distributed_evenly) - - uneven_thresholds = [0.0, 0.45, 1.0] - metric_obj = metric_cls(thresholds=uneven_thresholds) - self.assertFalse(metric_obj._thresholds_distributed_evenly) - - def test_manual_thresholds_auc(self): - # The AUC metric handles manual thresholds input differently (it will add - # 0.0 and 1.0 for user). - even_thresholds = [0.25, 0.5, 0.75] - auc = metrics.AUC(thresholds=even_thresholds) - self.assertTrue(auc._thresholds_distributed_evenly) - - # Test for save model - cloned = metrics.AUC.from_config(auc.get_config()) - self.assertTrue(cloned._thresholds_distributed_evenly) - - uneven_thresholds = [0.45,] - auc = metrics.AUC(thresholds=uneven_thresholds) - self.assertFalse(auc._thresholds_distributed_evenly) - - cloned = metrics.AUC.from_config(auc.get_config()) - self.assertFalse(cloned._thresholds_distributed_evenly) - - @parameterized.parameters([ - metrics.TruePositives, - metrics.TrueNegatives, - metrics.FalsePositives, - metrics.FalseNegatives, - metrics.Precision, - metrics.Recall, - metrics.AUC]) - def test_even_thresholds_correctness(self, metric_cls): - with tf.compat.forward_compatibility_horizon(2021, 6, 9): - # make sure the old approach and new approach produce same result - # for evenly distributed thresholds - y_true = np.random.randint(2, size=(10,)) - y_pred = np.random.rand(10) - - even_thresholds = [0.0, 0.25, 0.5, 0.75, 1.0] - if metric_cls == metrics.AUC: - even_thresholds = even_thresholds[1:-1] - metric_obj = metric_cls(thresholds=even_thresholds) - metric_obj.update_state(y_true, y_pred) - result1 = metric_obj.result() - - metric_obj2 = metric_cls(thresholds=even_thresholds) - # Force to use the old approach - metric_obj2._thresholds_distributed_evenly = False - metric_obj2.update_state(y_true, y_pred) - result2 = metric_obj2.result() - - self.assertAllClose(result1, result2) - # Check all the variables are the same, eg tp, tn, fp, fn - for v1, v2 in zip(metric_obj.variables, metric_obj2.variables): - self.assertAllClose(v1, v2) - - @parameterized.parameters([ - metrics.SensitivityAtSpecificity, - metrics.SpecificityAtSensitivity, - metrics.PrecisionAtRecall, - metrics.RecallAtPrecision]) - def test_even_thresholds_correctness_2(self, metric_cls): - with tf.compat.forward_compatibility_horizon(2021, 6, 9): - y_true = np.random.randint(2, size=(10,)) - y_pred = np.random.rand(10) - - metric_obj = metric_cls(0.5) - metric_obj.update_state(y_true, y_pred) - result1 = metric_obj.result() - - metric_obj2 = metric_cls(0.5) - # Force to use the old approach - metric_obj2._thresholds_distributed_evenly = False - metric_obj2.update_state(y_true, y_pred) - result2 = metric_obj2.result() - - self.assertAllClose(result1, result2) - # Check all the variables are the same, eg tp, tn, fp, fn - for v1, v2 in zip(metric_obj.variables, metric_obj2.variables): - self.assertAllClose(v1, v2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/metrics/confusion_metrics.py b/keras/metrics/confusion_metrics.py new file mode 100644 index 000000000000..75584ff795e1 --- /dev/null +++ b/keras/metrics/confusion_metrics.py @@ -0,0 +1,1707 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Confusion metrics, i.e. metrics based on True/False positives/negatives.""" + +import abc + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras import activations +from keras import backend +from keras.dtensor import utils as dtensor_utils +from keras.metrics import base_metric +from keras.utils import metrics_utils +from keras.utils.generic_utils import to_list +from keras.utils.tf_utils import is_tensor_or_variable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +class _ConfusionMatrixConditionCount(base_metric.Metric): + """Calculates the number of the given confusion matrix condition. + + Args: + confusion_matrix_cond: One of `metrics_utils.ConfusionMatrix` conditions. + thresholds: (Optional) A float value or a python list/tuple of float + threshold values in [0, 1]. A threshold is compared with prediction + values to determine the truth value of predictions + (i.e., above the threshold is `true`, below is `false`). One metric + value is generated for each threshold value. Defaults to `0.5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + """ + + def __init__( + self, confusion_matrix_cond, thresholds=None, name=None, dtype=None + ): + super().__init__(name=name, dtype=dtype) + self._confusion_matrix_cond = confusion_matrix_cond + self.init_thresholds = thresholds + self.thresholds = metrics_utils.parse_init_thresholds( + thresholds, default_threshold=0.5 + ) + self._thresholds_distributed_evenly = ( + metrics_utils.is_evenly_distributed_thresholds(self.thresholds) + ) + self.accumulator = self.add_weight( + "accumulator", shape=(len(self.thresholds),), initializer="zeros" + ) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates the metric statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + return metrics_utils.update_confusion_matrix_variables( + {self._confusion_matrix_cond: self.accumulator}, + y_true, + y_pred, + thresholds=self.thresholds, + thresholds_distributed_evenly=self._thresholds_distributed_evenly, + sample_weight=sample_weight, + ) + + def result(self): + if len(self.thresholds) == 1: + result = self.accumulator[0] + else: + result = self.accumulator + return tf.convert_to_tensor(result) + + def reset_state(self): + backend.batch_set_value( + [(v, np.zeros(v.shape.as_list())) for v in self.variables] + ) + + def get_config(self): + config = {"thresholds": self.init_thresholds} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.FalsePositives") +class FalsePositives(_ConfusionMatrixConditionCount): + """Calculates the number of false positives. + + If `sample_weight` is given, calculates the sum of the weights of + false positives. This metric creates one local variable, `accumulator` + that is used to keep track of the number of false positives. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + thresholds: (Optional) A float value, or a Python + list/tuple of float threshold values in [0, 1]. A threshold is compared + with prediction values to determine the truth value of predictions + (i.e., above the threshold is `true`, below is `false`). If used with a + loss function that sets `from_logits=True` (i.e. no sigmoid applied to + predictions), `thresholds` should be set to 0. One metric value is + generated for each threshold value. Defaults to `0.5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.FalsePositives() + >>> m.update_state([0, 1, 0, 0], [0, 0, 1, 1]) + >>> m.result().numpy() + 2.0 + + >>> m.reset_state() + >>> m.update_state([0, 1, 0, 0], [0, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.FalsePositives()]) + ``` + + Usage with a loss with `from_logits=True`: + + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.FalsePositives(thresholds=0)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, thresholds=None, name=None, dtype=None): + super().__init__( + confusion_matrix_cond=metrics_utils.ConfusionMatrix.FALSE_POSITIVES, + thresholds=thresholds, + name=name, + dtype=dtype, + ) + + +@keras_export("keras.metrics.FalseNegatives") +class FalseNegatives(_ConfusionMatrixConditionCount): + """Calculates the number of false negatives. + + If `sample_weight` is given, calculates the sum of the weights of + false negatives. This metric creates one local variable, `accumulator` + that is used to keep track of the number of false negatives. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + thresholds: (Optional) A float value, or a Python + list/tuple of float threshold values in [0, 1]. A threshold is compared + with prediction values to determine the truth value of predictions + (i.e., above the threshold is `true`, below is `false`). If used with a + loss function that sets `from_logits=True` (i.e. no sigmoid applied to + predictions), `thresholds` should be set to 0. One metric value is + generated for each threshold value. Defaults to `0.5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.FalseNegatives() + >>> m.update_state([0, 1, 1, 1], [0, 1, 0, 0]) + >>> m.result().numpy() + 2.0 + + >>> m.reset_state() + >>> m.update_state([0, 1, 1, 1], [0, 1, 0, 0], sample_weight=[0, 0, 1, 0]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.FalseNegatives()]) + ``` + + Usage with a loss with `from_logits=True`: + + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.FalseNegatives(thresholds=0)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, thresholds=None, name=None, dtype=None): + super().__init__( + confusion_matrix_cond=metrics_utils.ConfusionMatrix.FALSE_NEGATIVES, + thresholds=thresholds, + name=name, + dtype=dtype, + ) + + +@keras_export("keras.metrics.TrueNegatives") +class TrueNegatives(_ConfusionMatrixConditionCount): + """Calculates the number of true negatives. + + If `sample_weight` is given, calculates the sum of the weights of + true negatives. This metric creates one local variable, `accumulator` + that is used to keep track of the number of true negatives. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + thresholds: (Optional) A float value, or a Python + list/tuple of float threshold values in [0, 1]. A threshold is compared + with prediction values to determine the truth value of predictions + (i.e., above the threshold is `true`, below is `false`). If used with a + loss function that sets `from_logits=True` (i.e. no sigmoid applied to + predictions), `thresholds` should be set to 0. One metric value is + generated for each threshold value. Defaults to `0.5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.TrueNegatives() + >>> m.update_state([0, 1, 0, 0], [1, 1, 0, 0]) + >>> m.result().numpy() + 2.0 + + >>> m.reset_state() + >>> m.update_state([0, 1, 0, 0], [1, 1, 0, 0], sample_weight=[0, 0, 1, 0]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.TrueNegatives()]) + ``` + + Usage with a loss with `from_logits=True`: + + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.TrueNegatives(thresholds=0)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, thresholds=None, name=None, dtype=None): + super().__init__( + confusion_matrix_cond=metrics_utils.ConfusionMatrix.TRUE_NEGATIVES, + thresholds=thresholds, + name=name, + dtype=dtype, + ) + + +@keras_export("keras.metrics.TruePositives") +class TruePositives(_ConfusionMatrixConditionCount): + """Calculates the number of true positives. + + If `sample_weight` is given, calculates the sum of the weights of + true positives. This metric creates one local variable, `true_positives` + that is used to keep track of the number of true positives. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + thresholds: (Optional) A float value, or a Python + list/tuple of float threshold values in [0, 1]. A threshold is compared + with prediction values to determine the truth value of predictions + (i.e., above the threshold is `true`, below is `false`). If used with a + loss function that sets `from_logits=True` (i.e. no sigmoid applied to + predictions), `thresholds` should be set to 0. One metric value is + generated for each threshold value. Defaults to `0.5`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.TruePositives() + >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) + >>> m.result().numpy() + 2.0 + + >>> m.reset_state() + >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.TruePositives()]) + ``` + + Usage with a loss with `from_logits=True`: + + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.TruePositives(thresholds=0)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, thresholds=None, name=None, dtype=None): + super().__init__( + confusion_matrix_cond=metrics_utils.ConfusionMatrix.TRUE_POSITIVES, + thresholds=thresholds, + name=name, + dtype=dtype, + ) + + +@keras_export("keras.metrics.Precision") +class Precision(base_metric.Metric): + """Computes the precision of the predictions with respect to the labels. + + The metric creates two local variables, `true_positives` and + `false_positives` that are used to compute the precision. This value is + ultimately returned as `precision`, an idempotent operation that simply + divides `true_positives` by the sum of `true_positives` and + `false_positives`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + If `top_k` is set, we'll calculate precision as how often on average a class + among the top-k classes with the highest predicted values of a batch entry + is correct and can be found in the label for that entry. + + If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is above the threshold and/or in + the top-k highest predictions, and computing the fraction of them for which + `class_id` is indeed a correct label. + + Args: + thresholds: (Optional) A float value, or a Python list/tuple of float + threshold values in [0, 1]. A threshold is compared with prediction + values to determine the truth value of predictions (i.e., above the + threshold is `true`, below is `false`). If used with a loss function + that sets `from_logits=True` (i.e. no sigmoid applied to predictions), + `thresholds` should be set to 0. One metric value is generated for each + threshold value. If neither thresholds nor top_k are set, the default is + to calculate precision with `thresholds=0.5`. + top_k: (Optional) Unset by default. An int value specifying the top-k + predictions to consider when calculating precision. + class_id: (Optional) Integer class ID for which we want binary metrics. + This must be in the half-open interval `[0, num_classes)`, where + `num_classes` is the last dimension of predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.Precision() + >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) + >>> m.result().numpy() + 0.6666667 + + >>> m.reset_state() + >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + >>> m.result().numpy() + 1.0 + + >>> # With top_k=2, it will calculate precision over y_true[:2] + >>> # and y_pred[:2] + >>> m = tf.keras.metrics.Precision(top_k=2) + >>> m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) + >>> m.result().numpy() + 0.0 + + >>> # With top_k=4, it will calculate precision over y_true[:4] + >>> # and y_pred[:4] + >>> m = tf.keras.metrics.Precision(top_k=4) + >>> m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) + >>> m.result().numpy() + 0.5 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.Precision()]) + ``` + + Usage with a loss with `from_logits=True`: + + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.Precision(thresholds=0)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, thresholds=None, top_k=None, class_id=None, name=None, dtype=None + ): + super().__init__(name=name, dtype=dtype) + self.init_thresholds = thresholds + self.top_k = top_k + self.class_id = class_id + + default_threshold = 0.5 if top_k is None else metrics_utils.NEG_INF + self.thresholds = metrics_utils.parse_init_thresholds( + thresholds, default_threshold=default_threshold + ) + self._thresholds_distributed_evenly = ( + metrics_utils.is_evenly_distributed_thresholds(self.thresholds) + ) + self.true_positives = self.add_weight( + "true_positives", shape=(len(self.thresholds),), initializer="zeros" + ) + self.false_positives = self.add_weight( + "false_positives", + shape=(len(self.thresholds),), + initializer="zeros", + ) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates true positive and false positive statistics. + + Args: + y_true: The ground truth values, with the same dimensions as `y_pred`. + Will be cast to `bool`. + y_pred: The predicted values. Each element must be in the range + `[0, 1]`. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + return metrics_utils.update_confusion_matrix_variables( + { + metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, # noqa: E501 + metrics_utils.ConfusionMatrix.FALSE_POSITIVES: self.false_positives, # noqa: E501 + }, + y_true, + y_pred, + thresholds=self.thresholds, + thresholds_distributed_evenly=self._thresholds_distributed_evenly, + top_k=self.top_k, + class_id=self.class_id, + sample_weight=sample_weight, + ) + + def result(self): + result = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_positives), + ) + return result[0] if len(self.thresholds) == 1 else result + + def reset_state(self): + num_thresholds = len(to_list(self.thresholds)) + backend.batch_set_value( + [ + (v, np.zeros((num_thresholds,))) + for v in (self.true_positives, self.false_positives) + ] + ) + + def get_config(self): + config = { + "thresholds": self.init_thresholds, + "top_k": self.top_k, + "class_id": self.class_id, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.Recall") +class Recall(base_metric.Metric): + """Computes the recall of the predictions with respect to the labels. + + This metric creates two local variables, `true_positives` and + `false_negatives`, that are used to compute the recall. This value is + ultimately returned as `recall`, an idempotent operation that simply divides + `true_positives` by the sum of `true_positives` and `false_negatives`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + If `top_k` is set, recall will be computed as how often on average a class + among the labels of a batch entry is in the top-k predictions. + + If `class_id` is specified, we calculate recall by considering only the + entries in the batch for which `class_id` is in the label, and computing the + fraction of them for which `class_id` is above the threshold and/or in the + top-k predictions. + + Args: + thresholds: (Optional) A float value, or a Python list/tuple of float + threshold values in [0, 1]. A threshold is compared with prediction + values to determine the truth value of predictions (i.e., above the + threshold is `true`, below is `false`). If used with a loss function + that sets `from_logits=True` (i.e. no sigmoid applied to predictions), + `thresholds` should be set to 0. One metric value is generated for each + threshold value. If neither thresholds nor top_k are set, the default is + to calculate recall with `thresholds=0.5`. + top_k: (Optional) Unset by default. An int value specifying the top-k + predictions to consider when calculating recall. + class_id: (Optional) Integer class ID for which we want binary metrics. + This must be in the half-open interval `[0, num_classes)`, where + `num_classes` is the last dimension of predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.Recall() + >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) + >>> m.result().numpy() + 0.6666667 + + >>> m.reset_state() + >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.Recall()]) + ``` + + Usage with a loss with `from_logits=True`: + + ```python + model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.Recall(thresholds=0)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, thresholds=None, top_k=None, class_id=None, name=None, dtype=None + ): + super().__init__(name=name, dtype=dtype) + self.init_thresholds = thresholds + self.top_k = top_k + self.class_id = class_id + + default_threshold = 0.5 if top_k is None else metrics_utils.NEG_INF + self.thresholds = metrics_utils.parse_init_thresholds( + thresholds, default_threshold=default_threshold + ) + self._thresholds_distributed_evenly = ( + metrics_utils.is_evenly_distributed_thresholds(self.thresholds) + ) + self.true_positives = self.add_weight( + "true_positives", shape=(len(self.thresholds),), initializer="zeros" + ) + self.false_negatives = self.add_weight( + "false_negatives", + shape=(len(self.thresholds),), + initializer="zeros", + ) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates true positive and false negative statistics. + + Args: + y_true: The ground truth values, with the same dimensions as `y_pred`. + Will be cast to `bool`. + y_pred: The predicted values. Each element must be in the range + `[0, 1]`. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + return metrics_utils.update_confusion_matrix_variables( + { + metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, # noqa: E501 + metrics_utils.ConfusionMatrix.FALSE_NEGATIVES: self.false_negatives, # noqa: E501 + }, + y_true, + y_pred, + thresholds=self.thresholds, + thresholds_distributed_evenly=self._thresholds_distributed_evenly, + top_k=self.top_k, + class_id=self.class_id, + sample_weight=sample_weight, + ) + + def result(self): + result = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_negatives), + ) + return result[0] if len(self.thresholds) == 1 else result + + def reset_state(self): + num_thresholds = len(to_list(self.thresholds)) + backend.batch_set_value( + [ + (v, np.zeros((num_thresholds,))) + for v in (self.true_positives, self.false_negatives) + ] + ) + + def get_config(self): + config = { + "thresholds": self.init_thresholds, + "top_k": self.top_k, + "class_id": self.class_id, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class SensitivitySpecificityBase(base_metric.Metric, metaclass=abc.ABCMeta): + """Abstract base class for computing sensitivity and specificity. + + For additional information about specificity and sensitivity, see + [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). + """ + + def __init__( + self, value, num_thresholds=200, class_id=None, name=None, dtype=None + ): + super().__init__(name=name, dtype=dtype) + if num_thresholds <= 0: + raise ValueError( + "Argument `num_thresholds` must be an integer > 0. " + f"Received: num_thresholds={num_thresholds}" + ) + self.value = value + self.class_id = class_id + self.true_positives = self.add_weight( + "true_positives", shape=(num_thresholds,), initializer="zeros" + ) + self.true_negatives = self.add_weight( + "true_negatives", shape=(num_thresholds,), initializer="zeros" + ) + self.false_positives = self.add_weight( + "false_positives", shape=(num_thresholds,), initializer="zeros" + ) + self.false_negatives = self.add_weight( + "false_negatives", shape=(num_thresholds,), initializer="zeros" + ) + + # Compute `num_thresholds` thresholds in [0, 1] + if num_thresholds == 1: + self.thresholds = [0.5] + self._thresholds_distributed_evenly = False + else: + thresholds = [ + (i + 1) * 1.0 / (num_thresholds - 1) + for i in range(num_thresholds - 2) + ] + self.thresholds = [0.0] + thresholds + [1.0] + self._thresholds_distributed_evenly = True + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates confusion matrix statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + return metrics_utils.update_confusion_matrix_variables( + { + metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, # noqa: E501 + metrics_utils.ConfusionMatrix.TRUE_NEGATIVES: self.true_negatives, # noqa: E501 + metrics_utils.ConfusionMatrix.FALSE_POSITIVES: self.false_positives, # noqa: E501 + metrics_utils.ConfusionMatrix.FALSE_NEGATIVES: self.false_negatives, # noqa: E501 + }, + y_true, + y_pred, + thresholds=self.thresholds, + thresholds_distributed_evenly=self._thresholds_distributed_evenly, + class_id=self.class_id, + sample_weight=sample_weight, + ) + + def reset_state(self): + num_thresholds = len(self.thresholds) + confusion_matrix_variables = ( + self.true_positives, + self.true_negatives, + self.false_positives, + self.false_negatives, + ) + backend.batch_set_value( + [ + (v, np.zeros((num_thresholds,))) + for v in confusion_matrix_variables + ] + ) + + def get_config(self): + config = {"class_id": self.class_id} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _find_max_under_constraint(self, constrained, dependent, predicate): + """Returns the maximum of dependent_statistic that satisfies the + constraint. + + Args: + constrained: Over these values the constraint + is specified. A rank-1 tensor. + dependent: From these values the maximum that satiesfies the + constraint is selected. Values in this tensor and in + `constrained` are linked by having the same threshold at each + position, hence this tensor must have the same shape. + predicate: A binary boolean functor to be applied to arguments + `constrained` and `self.value`, e.g. `tf.greater`. + + Returns: + maximal dependent value, if no value satiesfies the constraint 0.0. + """ + feasible = tf.where(predicate(constrained, self.value)) + feasible_exists = tf.greater(tf.size(feasible), 0) + max_dependent = tf.reduce_max(tf.gather(dependent, feasible)) + + return tf.where(feasible_exists, max_dependent, 0.0) + + +@keras_export("keras.metrics.SensitivityAtSpecificity") +class SensitivityAtSpecificity(SensitivitySpecificityBase): + """Computes best sensitivity where specificity is >= specified value. + + the sensitivity at a given specificity. + + `Sensitivity` measures the proportion of actual positives that are correctly + identified as such (tp / (tp + fn)). + `Specificity` measures the proportion of actual negatives that are correctly + identified as such (tn / (tn + fp)). + + This metric creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the sensitivity at the given specificity. The threshold for the + given specificity value is computed and used to evaluate the corresponding + sensitivity. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is above the threshold + predictions, and computing the fraction of them for which `class_id` is + indeed a correct label. + + For additional information about specificity and sensitivity, see + [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). + + Args: + specificity: A scalar value in range `[0, 1]`. + num_thresholds: (Optional) The number of thresholds to + use for matching the given specificity. Defaults to `200`. + class_id: (Optional) Integer class ID for which we want binary metrics. + This must be in the half-open interval `[0, num_classes)`, where + `num_classes` is the last dimension of predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.SensitivityAtSpecificity(0.5) + >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], + ... sample_weight=[1, 1, 2, 2, 1]) + >>> m.result().numpy() + 0.333333 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.SensitivityAtSpecificity()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + specificity, + num_thresholds=200, + class_id=None, + name=None, + dtype=None, + ): + if specificity < 0 or specificity > 1: + raise ValueError( + "Argument `specificity` must be in the range [0, 1]. " + f"Received: specificity={specificity}" + ) + self.specificity = specificity + self.num_thresholds = num_thresholds + super().__init__( + specificity, + num_thresholds=num_thresholds, + class_id=class_id, + name=name, + dtype=dtype, + ) + + def result(self): + specificities = tf.math.divide_no_nan( + self.true_negatives, + tf.math.add(self.true_negatives, self.false_positives), + ) + sensitivities = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_negatives), + ) + return self._find_max_under_constraint( + specificities, sensitivities, tf.greater_equal + ) + + def get_config(self): + config = { + "num_thresholds": self.num_thresholds, + "specificity": self.specificity, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.SpecificityAtSensitivity") +class SpecificityAtSensitivity(SensitivitySpecificityBase): + """Computes best specificity where sensitivity is >= specified value. + + `Sensitivity` measures the proportion of actual positives that are correctly + identified as such (tp / (tp + fn)). + `Specificity` measures the proportion of actual negatives that are correctly + identified as such (tn / (tn + fp)). + + This metric creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the specificity at the given sensitivity. The threshold for the + given sensitivity value is computed and used to evaluate the corresponding + specificity. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is above the threshold + predictions, and computing the fraction of them for which `class_id` is + indeed a correct label. + + For additional information about specificity and sensitivity, see + [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). + + Args: + sensitivity: A scalar value in range `[0, 1]`. + num_thresholds: (Optional) The number of thresholds to + use for matching the given sensitivity. Defaults to `200`. + class_id: (Optional) Integer class ID for which we want binary metrics. + This must be in the half-open interval `[0, num_classes)`, where + `num_classes` is the last dimension of predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.SpecificityAtSensitivity(0.5) + >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + >>> m.result().numpy() + 0.66666667 + + >>> m.reset_state() + >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], + ... sample_weight=[1, 1, 2, 2, 2]) + >>> m.result().numpy() + 0.5 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.SpecificityAtSensitivity()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + sensitivity, + num_thresholds=200, + class_id=None, + name=None, + dtype=None, + ): + if sensitivity < 0 or sensitivity > 1: + raise ValueError( + "Argument `sensitivity` must be in the range [0, 1]. " + f"Received: sensitivity={sensitivity}" + ) + self.sensitivity = sensitivity + self.num_thresholds = num_thresholds + super().__init__( + sensitivity, + num_thresholds=num_thresholds, + class_id=class_id, + name=name, + dtype=dtype, + ) + + def result(self): + sensitivities = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_negatives), + ) + specificities = tf.math.divide_no_nan( + self.true_negatives, + tf.math.add(self.true_negatives, self.false_positives), + ) + return self._find_max_under_constraint( + sensitivities, specificities, tf.greater_equal + ) + + def get_config(self): + config = { + "num_thresholds": self.num_thresholds, + "sensitivity": self.sensitivity, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.PrecisionAtRecall") +class PrecisionAtRecall(SensitivitySpecificityBase): + """Computes best precision where recall is >= specified value. + + This metric creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the precision at the given recall. The threshold for the given + recall value is computed and used to evaluate the corresponding precision. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is above the threshold + predictions, and computing the fraction of them for which `class_id` is + indeed a correct label. + + Args: + recall: A scalar value in range `[0, 1]`. + num_thresholds: (Optional) The number of thresholds to + use for matching the given recall. Defaults to `200`. + class_id: (Optional) Integer class ID for which we want binary metrics. + This must be in the half-open interval `[0, num_classes)`, where + `num_classes` is the last dimension of predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.PrecisionAtRecall(0.5) + >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], + ... sample_weight=[2, 2, 2, 1, 1]) + >>> m.result().numpy() + 0.33333333 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.PrecisionAtRecall(recall=0.8)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, recall, num_thresholds=200, class_id=None, name=None, dtype=None + ): + if recall < 0 or recall > 1: + raise ValueError( + "Argument `recall` must be in the range [0, 1]. " + f"Received: recall={recall}" + ) + self.recall = recall + self.num_thresholds = num_thresholds + super().__init__( + value=recall, + num_thresholds=num_thresholds, + class_id=class_id, + name=name, + dtype=dtype, + ) + + def result(self): + recalls = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_negatives), + ) + precisions = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_positives), + ) + return self._find_max_under_constraint( + recalls, precisions, tf.greater_equal + ) + + def get_config(self): + config = {"num_thresholds": self.num_thresholds, "recall": self.recall} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.RecallAtPrecision") +class RecallAtPrecision(SensitivitySpecificityBase): + """Computes best recall where precision is >= specified value. + + For a given score-label-distribution the required precision might not + be achievable, in this case 0.0 is returned as recall. + + This metric creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the recall at the given precision. The threshold for the given + precision value is computed and used to evaluate the corresponding recall. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is above the threshold + predictions, and computing the fraction of them for which `class_id` is + indeed a correct label. + + Args: + precision: A scalar value in range `[0, 1]`. + num_thresholds: (Optional) The number of thresholds to + use for matching the given precision. Defaults to `200`. + class_id: (Optional) Integer class ID for which we want binary metrics. + This must be in the half-open interval `[0, num_classes)`, where + `num_classes` is the last dimension of predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.RecallAtPrecision(0.8) + >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9], + ... sample_weight=[1, 0, 0, 1]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.RecallAtPrecision(precision=0.8)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + precision, + num_thresholds=200, + class_id=None, + name=None, + dtype=None, + ): + if precision < 0 or precision > 1: + raise ValueError( + "Argument `precision` must be in the range [0, 1]. " + f"Received: precision={precision}" + ) + self.precision = precision + self.num_thresholds = num_thresholds + super().__init__( + value=precision, + num_thresholds=num_thresholds, + class_id=class_id, + name=name, + dtype=dtype, + ) + + def result(self): + precisions = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_positives), + ) + recalls = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_negatives), + ) + return self._find_max_under_constraint( + precisions, recalls, tf.greater_equal + ) + + def get_config(self): + config = { + "num_thresholds": self.num_thresholds, + "precision": self.precision, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.AUC") +class AUC(base_metric.Metric): + """Approximates the AUC (Area under the curve) of the ROC or PR curves. + + The AUC (Area under the curve) of the ROC (Receiver operating + characteristic; default) or PR (Precision Recall) curves are quality + measures of binary classifiers. Unlike the accuracy, and like cross-entropy + losses, ROC-AUC and PR-AUC evaluate all the operational points of a model. + + This class approximates AUCs using a Riemann sum. During the metric + accumulation phrase, predictions are accumulated within predefined buckets + by value. The AUC is then computed by interpolating per-bucket averages. + These buckets define the evaluated operational points. + + This metric creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the AUC. To discretize the AUC curve, a linearly spaced set of + thresholds is used to compute pairs of recall and precision values. The area + under the ROC-curve is therefore computed using the height of the recall + values by the false positive rate, while the area under the PR-curve is the + computed using the height of the precision values by the recall. + + This value is ultimately returned as `auc`, an idempotent operation that + computes the area under a discretized curve of precision versus recall + values (computed using the aforementioned variables). The `num_thresholds` + variable controls the degree of discretization with larger numbers of + thresholds more closely approximating the true AUC. The quality of the + approximation may vary dramatically depending on `num_thresholds`. The + `thresholds` parameter can be used to manually specify thresholds which + split the predictions more evenly. + + For a best approximation of the real AUC, `predictions` should be + distributed approximately uniformly in the range [0, 1] (if + `from_logits=False`). The quality of the AUC approximation may be poor if + this is not the case. Setting `summation_method` to 'minoring' or 'majoring' + can help quantify the error in the approximation by providing lower or upper + bound estimate of the AUC. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + num_thresholds: (Optional) The number of thresholds to + use when discretizing the roc curve. Values must be > 1. + Defaults to `200`. + curve: (Optional) Specifies the name of the curve to be computed, 'ROC' + [default] or 'PR' for the Precision-Recall-curve. + summation_method: (Optional) Specifies the [Riemann summation method]( + https://en.wikipedia.org/wiki/Riemann_sum) used. + 'interpolation' (default) applies mid-point summation scheme for + `ROC`. For PR-AUC, interpolates (true/false) positives but not the + ratio that is precision (see Davis & Goadrich 2006 for details); + 'minoring' applies left summation for increasing intervals and right + summation for decreasing intervals; 'majoring' does the opposite. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + thresholds: (Optional) A list of floating point values to use as the + thresholds for discretizing the curve. If set, the `num_thresholds` + parameter is ignored. Values should be in [0, 1]. Endpoint thresholds + equal to {-epsilon, 1+epsilon} for a small positive epsilon value will + be automatically included with these to correctly handle predictions + equal to exactly 0 or 1. + multi_label: boolean indicating whether multilabel data should be + treated as such, wherein AUC is computed separately for each label and + then averaged across labels, or (when False) if the data should be + flattened into a single label before AUC computation. In the latter + case, when multilabel data is passed to AUC, each label-prediction pair + is treated as an individual data point. Should be set to False for + multi-class data. + num_labels: (Optional) The number of labels, used when `multi_label` is + True. If `num_labels` is not specified, then state variables get created + on the first call to `update_state`. + label_weights: (Optional) list, array, or tensor of non-negative weights + used to compute AUCs for multilabel data. When `multi_label` is True, + the weights are applied to the individual label AUCs when they are + averaged to produce the multi-label AUC. When it's False, they are used + to weight the individual label predictions in computing the confusion + matrix on the flattened data. Note that this is unlike class_weights in + that class_weights weights the example depending on the value of its + label, whereas label_weights depends only on the index of that label + before flattening; therefore `label_weights` should not be used for + multi-class data. + from_logits: boolean indicating whether the predictions (`y_pred` in + `update_state`) are probabilities or sigmoid logits. As a rule of thumb, + when using a keras loss, the `from_logits` constructor argument of the + loss should match the AUC `from_logits` constructor argument. + + Standalone usage: + + >>> m = tf.keras.metrics.AUC(num_thresholds=3) + >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + >>> # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7] + >>> # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] + >>> # tp_rate = recall = [1, 0.5, 0], fp_rate = [1, 0, 0] + >>> # auc = ((((1+0.5)/2)*(1-0)) + (((0.5+0)/2)*(0-0))) = 0.75 + >>> m.result().numpy() + 0.75 + + >>> m.reset_state() + >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9], + ... sample_weight=[1, 0, 0, 1]) + >>> m.result().numpy() + 1.0 + + Usage with `compile()` API: + + ```python + # Reports the AUC of a model outputting a probability. + model.compile(optimizer='sgd', + loss=tf.keras.losses.BinaryCrossentropy(), + metrics=[tf.keras.metrics.AUC()]) + + # Reports the AUC of a model outputting a logit. + model.compile(optimizer='sgd', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.AUC(from_logits=True)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + num_thresholds=200, + curve="ROC", + summation_method="interpolation", + name=None, + dtype=None, + thresholds=None, + multi_label=False, + num_labels=None, + label_weights=None, + from_logits=False, + ): + # Validate configurations. + if isinstance(curve, metrics_utils.AUCCurve) and curve not in list( + metrics_utils.AUCCurve + ): + raise ValueError( + f'Invalid `curve` argument value "{curve}". ' + f"Expected one of: {list(metrics_utils.AUCCurve)}" + ) + if isinstance( + summation_method, metrics_utils.AUCSummationMethod + ) and summation_method not in list(metrics_utils.AUCSummationMethod): + raise ValueError( + "Invalid `summation_method` " + f'argument value "{summation_method}". ' + f"Expected one of: {list(metrics_utils.AUCSummationMethod)}" + ) + + # Update properties. + self._init_from_thresholds = thresholds is not None + if thresholds is not None: + # If specified, use the supplied thresholds. + self.num_thresholds = len(thresholds) + 2 + thresholds = sorted(thresholds) + self._thresholds_distributed_evenly = ( + metrics_utils.is_evenly_distributed_thresholds( + np.array([0.0] + thresholds + [1.0]) + ) + ) + else: + if num_thresholds <= 1: + raise ValueError( + "Argument `num_thresholds` must be an integer > 1. " + f"Received: num_thresholds={num_thresholds}" + ) + + # Otherwise, linearly interpolate (num_thresholds - 2) thresholds in + # (0, 1). + self.num_thresholds = num_thresholds + thresholds = [ + (i + 1) * 1.0 / (num_thresholds - 1) + for i in range(num_thresholds - 2) + ] + self._thresholds_distributed_evenly = True + + # Add an endpoint "threshold" below zero and above one for either + # threshold method to account for floating point imprecisions. + self._thresholds = np.array( + [0.0 - backend.epsilon()] + thresholds + [1.0 + backend.epsilon()] + ) + + if isinstance(curve, metrics_utils.AUCCurve): + self.curve = curve + else: + self.curve = metrics_utils.AUCCurve.from_str(curve) + if isinstance(summation_method, metrics_utils.AUCSummationMethod): + self.summation_method = summation_method + else: + self.summation_method = metrics_utils.AUCSummationMethod.from_str( + summation_method + ) + super().__init__(name=name, dtype=dtype) + + # Handle multilabel arguments. + self.multi_label = multi_label + self.num_labels = num_labels + if label_weights is not None: + label_weights = tf.constant(label_weights, dtype=self.dtype) + tf.debugging.assert_non_negative( + label_weights, + message="All values of `label_weights` must be non-negative.", + ) + self.label_weights = label_weights + + else: + self.label_weights = None + + self._from_logits = from_logits + + self._built = False + if self.multi_label: + if num_labels: + shape = tf.TensorShape([None, num_labels]) + self._build(shape) + else: + if num_labels: + raise ValueError( + "`num_labels` is needed only when `multi_label` is True." + ) + self._build(None) + + @property + def thresholds(self): + """The thresholds used for evaluating AUC.""" + return list(self._thresholds) + + def _build(self, shape): + """Initialize TP, FP, TN, and FN tensors, given the shape of the + data.""" + if self.multi_label: + if shape.ndims != 2: + raise ValueError( + "`y_pred` must have rank 2 when `multi_label=True`. " + f"Found rank {shape.ndims}. " + f"Full shape received for `y_pred`: {shape}" + ) + self._num_labels = shape[1] + variable_shape = tf.TensorShape( + [self.num_thresholds, self._num_labels] + ) + else: + variable_shape = tf.TensorShape([self.num_thresholds]) + + self._build_input_shape = shape + # Create metric variables + self.true_positives = self.add_weight( + "true_positives", shape=variable_shape, initializer="zeros" + ) + self.true_negatives = self.add_weight( + "true_negatives", shape=variable_shape, initializer="zeros" + ) + self.false_positives = self.add_weight( + "false_positives", shape=variable_shape, initializer="zeros" + ) + self.false_negatives = self.add_weight( + "false_negatives", shape=variable_shape, initializer="zeros" + ) + + if self.multi_label: + with tf.init_scope(): + # This should only be necessary for handling v1 behavior. In v2, + # AUC should be initialized outside of any tf.functions, and + # therefore in eager mode. + if not tf.executing_eagerly(): + backend._initialize_variables(backend._get_session()) + + self._built = True + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates confusion matrix statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + if not self._built: + self._build(tf.TensorShape(y_pred.shape)) + + if self.multi_label or (self.label_weights is not None): + # y_true should have shape (number of examples, number of labels). + shapes = [(y_true, ("N", "L"))] + if self.multi_label: + # TP, TN, FP, and FN should all have shape + # (number of thresholds, number of labels). + shapes.extend( + [ + (self.true_positives, ("T", "L")), + (self.true_negatives, ("T", "L")), + (self.false_positives, ("T", "L")), + (self.false_negatives, ("T", "L")), + ] + ) + if self.label_weights is not None: + # label_weights should be of length equal to the number of + # labels. + shapes.append((self.label_weights, ("L",))) + tf.debugging.assert_shapes( + shapes, message="Number of labels is not consistent." + ) + + # Only forward label_weights to update_confusion_matrix_variables when + # multi_label is False. Otherwise the averaging of individual label AUCs + # is handled in AUC.result + label_weights = None if self.multi_label else self.label_weights + + if self._from_logits: + y_pred = activations.sigmoid(y_pred) + + return metrics_utils.update_confusion_matrix_variables( + { + metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, # noqa: E501 + metrics_utils.ConfusionMatrix.TRUE_NEGATIVES: self.true_negatives, # noqa: E501 + metrics_utils.ConfusionMatrix.FALSE_POSITIVES: self.false_positives, # noqa: E501 + metrics_utils.ConfusionMatrix.FALSE_NEGATIVES: self.false_negatives, # noqa: E501 + }, + y_true, + y_pred, + self._thresholds, + thresholds_distributed_evenly=self._thresholds_distributed_evenly, + sample_weight=sample_weight, + multi_label=self.multi_label, + label_weights=label_weights, + ) + + def interpolate_pr_auc(self): + """Interpolation formula inspired by section 4 of Davis & Goadrich 2006. + + https://www.biostat.wisc.edu/~page/rocpr.pdf + + Note here we derive & use a closed formula not present in the paper + as follows: + + Precision = TP / (TP + FP) = TP / P + + Modeling all of TP (true positive), FP (false positive) and their sum + P = TP + FP (predicted positive) as varying linearly within each + interval [A, B] between successive thresholds, we get + + Precision slope = dTP / dP + = (TP_B - TP_A) / (P_B - P_A) + = (TP - TP_A) / (P - P_A) + Precision = (TP_A + slope * (P - P_A)) / P + + The area within the interval is (slope / total_pos_weight) times + + int_A^B{Precision.dP} = int_A^B{(TP_A + slope * (P - P_A)) * dP / P} + int_A^B{Precision.dP} = int_A^B{slope * dP + intercept * dP / P} + + where intercept = TP_A - slope * P_A = TP_B - slope * P_B, resulting in + + int_A^B{Precision.dP} = TP_B - TP_A + intercept * log(P_B / P_A) + + Bringing back the factor (slope / total_pos_weight) we'd put aside, we + get + + slope * [dTP + intercept * log(P_B / P_A)] / total_pos_weight + + where dTP == TP_B - TP_A. + + Note that when P_A == 0 the above calculation simplifies into + + int_A^B{Precision.dTP} = int_A^B{slope * dTP} = slope * (TP_B - TP_A) + + which is really equivalent to imputing constant precision throughout the + first bucket having >0 true positives. + + Returns: + pr_auc: an approximation of the area under the P-R curve. + """ + dtp = ( + self.true_positives[: self.num_thresholds - 1] + - self.true_positives[1:] + ) + p = tf.math.add(self.true_positives, self.false_positives) + dp = p[: self.num_thresholds - 1] - p[1:] + prec_slope = tf.math.divide_no_nan( + dtp, tf.maximum(dp, 0), name="prec_slope" + ) + intercept = self.true_positives[1:] - tf.multiply(prec_slope, p[1:]) + + safe_p_ratio = tf.where( + tf.logical_and(p[: self.num_thresholds - 1] > 0, p[1:] > 0), + tf.math.divide_no_nan( + p[: self.num_thresholds - 1], + tf.maximum(p[1:], 0), + name="recall_relative_ratio", + ), + tf.ones_like(p[1:]), + ) + + pr_auc_increment = tf.math.divide_no_nan( + prec_slope * (dtp + intercept * tf.math.log(safe_p_ratio)), + tf.maximum(self.true_positives[1:] + self.false_negatives[1:], 0), + name="pr_auc_increment", + ) + + if self.multi_label: + by_label_auc = tf.reduce_sum( + pr_auc_increment, name=self.name + "_by_label", axis=0 + ) + if self.label_weights is None: + # Evenly weighted average of the label AUCs. + return tf.reduce_mean(by_label_auc, name=self.name) + else: + # Weighted average of the label AUCs. + return tf.math.divide_no_nan( + tf.reduce_sum( + tf.multiply(by_label_auc, self.label_weights) + ), + tf.reduce_sum(self.label_weights), + name=self.name, + ) + else: + return tf.reduce_sum(pr_auc_increment, name="interpolate_pr_auc") + + def result(self): + if ( + self.curve == metrics_utils.AUCCurve.PR + and self.summation_method + == metrics_utils.AUCSummationMethod.INTERPOLATION + ): + # This use case is different and is handled separately. + return self.interpolate_pr_auc() + + # Set `x` and `y` values for the curves based on `curve` config. + recall = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_negatives), + ) + if self.curve == metrics_utils.AUCCurve.ROC: + fp_rate = tf.math.divide_no_nan( + self.false_positives, + tf.math.add(self.false_positives, self.true_negatives), + ) + x = fp_rate + y = recall + else: # curve == 'PR'. + precision = tf.math.divide_no_nan( + self.true_positives, + tf.math.add(self.true_positives, self.false_positives), + ) + x = recall + y = precision + + # Find the rectangle heights based on `summation_method`. + if ( + self.summation_method + == metrics_utils.AUCSummationMethod.INTERPOLATION + ): + # Note: the case ('PR', 'interpolation') has been handled above. + heights = (y[: self.num_thresholds - 1] + y[1:]) / 2.0 + elif self.summation_method == metrics_utils.AUCSummationMethod.MINORING: + heights = tf.minimum(y[: self.num_thresholds - 1], y[1:]) + # self.summation_method = metrics_utils.AUCSummationMethod.MAJORING: + else: + heights = tf.maximum(y[: self.num_thresholds - 1], y[1:]) + + # Sum up the areas of all the rectangles. + if self.multi_label: + riemann_terms = tf.multiply( + x[: self.num_thresholds - 1] - x[1:], heights + ) + by_label_auc = tf.reduce_sum( + riemann_terms, name=self.name + "_by_label", axis=0 + ) + + if self.label_weights is None: + # Unweighted average of the label AUCs. + return tf.reduce_mean(by_label_auc, name=self.name) + else: + # Weighted average of the label AUCs. + return tf.math.divide_no_nan( + tf.reduce_sum( + tf.multiply(by_label_auc, self.label_weights) + ), + tf.reduce_sum(self.label_weights), + name=self.name, + ) + else: + return tf.reduce_sum( + tf.multiply(x[: self.num_thresholds - 1] - x[1:], heights), + name=self.name, + ) + + def reset_state(self): + if self._built: + confusion_matrix_variables = ( + self.true_positives, + self.true_negatives, + self.false_positives, + self.false_negatives, + ) + if self.multi_label: + backend.batch_set_value( + [ + (v, np.zeros((self.num_thresholds, self._num_labels))) + for v in confusion_matrix_variables + ] + ) + else: + backend.batch_set_value( + [ + (v, np.zeros((self.num_thresholds,))) + for v in confusion_matrix_variables + ] + ) + + def get_config(self): + if is_tensor_or_variable(self.label_weights): + label_weights = backend.eval(self.label_weights) + else: + label_weights = self.label_weights + config = { + "num_thresholds": self.num_thresholds, + "curve": self.curve.value, + "summation_method": self.summation_method.value, + "multi_label": self.multi_label, + "num_labels": self.num_labels, + "label_weights": label_weights, + "from_logits": self._from_logits, + } + # optimization to avoid serializing a large number of generated + # thresholds + if self._init_from_thresholds: + # We remove the endpoint thresholds as an inverse of how the + # thresholds were initialized. This ensures that a metric + # initialized from this config has the same thresholds. + config["thresholds"] = self.thresholds[1:-1] + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras/metrics/confusion_metrics_test.py b/keras/metrics/confusion_metrics_test.py new file mode 100644 index 000000000000..a647e4efc67a --- /dev/null +++ b/keras/metrics/confusion_metrics_test.py @@ -0,0 +1,2739 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for confusion metrics.""" + +import json + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized +from tensorflow.python.platform import tf_logging + +from keras import backend +from keras import layers +from keras import metrics +from keras import models +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import metrics_utils + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class FalsePositivesTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + fp_obj = metrics.FalsePositives(name="my_fp", thresholds=[0.4, 0.9]) + self.assertEqual(fp_obj.name, "my_fp") + self.assertLen(fp_obj.variables, 1) + self.assertEqual(fp_obj.thresholds, [0.4, 0.9]) + + # Check save and restore config + fp_obj2 = metrics.FalsePositives.from_config(fp_obj.get_config()) + self.assertEqual(fp_obj2.name, "my_fp") + self.assertLen(fp_obj2.variables, 1) + self.assertEqual(fp_obj2.thresholds, [0.4, 0.9]) + + def test_unweighted(self): + fp_obj = metrics.FalsePositives() + self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) + + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = fp_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = fp_obj.result() + self.assertAllClose(7.0, result) + + def test_weighted(self): + fp_obj = metrics.FalsePositives() + self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = fp_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(14.0, self.evaluate(result)) + + def test_unweighted_with_thresholds(self): + fp_obj = metrics.FalsePositives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + + update_op = fp_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = fp_obj.result() + self.assertAllClose([7.0, 4.0, 2.0], result) + + def test_weighted_with_thresholds(self): + fp_obj = metrics.FalsePositives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + sample_weight = ( + (1.0, 2.0, 3.0, 5.0), + (7.0, 11.0, 13.0, 17.0), + (19.0, 23.0, 29.0, 31.0), + (5.0, 15.0, 10.0, 0), + ) + + result = fp_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose([125.0, 42.0, 12.0], self.evaluate(result)) + + def test_threshold_limit(self): + with self.assertRaisesRegex( + ValueError, + r"Threshold values must be in \[0, 1\]. Received: \[-1, 2\]", + ): + metrics.FalsePositives(thresholds=[-1, 0.5, 2]) + + with self.assertRaisesRegex( + ValueError, + r"Threshold values must be in \[0, 1\]. Received: \[None\]", + ): + metrics.FalsePositives(thresholds=[None]) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class FalseNegativesTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + fn_obj = metrics.FalseNegatives(name="my_fn", thresholds=[0.4, 0.9]) + self.assertEqual(fn_obj.name, "my_fn") + self.assertLen(fn_obj.variables, 1) + self.assertEqual(fn_obj.thresholds, [0.4, 0.9]) + + # Check save and restore config + fn_obj2 = metrics.FalseNegatives.from_config(fn_obj.get_config()) + self.assertEqual(fn_obj2.name, "my_fn") + self.assertLen(fn_obj2.variables, 1) + self.assertEqual(fn_obj2.thresholds, [0.4, 0.9]) + + def test_unweighted(self): + fn_obj = metrics.FalseNegatives() + self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) + + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = fn_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = fn_obj.result() + self.assertAllClose(3.0, result) + + def test_weighted(self): + fn_obj = metrics.FalseNegatives() + self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = fn_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(5.0, self.evaluate(result)) + + def test_unweighted_with_thresholds(self): + fn_obj = metrics.FalseNegatives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + + update_op = fn_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = fn_obj.result() + self.assertAllClose([1.0, 4.0, 6.0], result) + + def test_weighted_with_thresholds(self): + fn_obj = metrics.FalseNegatives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + sample_weight = ((3.0,), (5.0,), (7.0,), (4.0,)) + + result = fn_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose([4.0, 16.0, 23.0], self.evaluate(result)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TrueNegativesTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + tn_obj = metrics.TrueNegatives(name="my_tn", thresholds=[0.4, 0.9]) + self.assertEqual(tn_obj.name, "my_tn") + self.assertLen(tn_obj.variables, 1) + self.assertEqual(tn_obj.thresholds, [0.4, 0.9]) + + # Check save and restore config + tn_obj2 = metrics.TrueNegatives.from_config(tn_obj.get_config()) + self.assertEqual(tn_obj2.name, "my_tn") + self.assertLen(tn_obj2.variables, 1) + self.assertEqual(tn_obj2.thresholds, [0.4, 0.9]) + + def test_unweighted(self): + tn_obj = metrics.TrueNegatives() + self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) + + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = tn_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = tn_obj.result() + self.assertAllClose(3.0, result) + + def test_weighted(self): + tn_obj = metrics.TrueNegatives() + self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = tn_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(4.0, self.evaluate(result)) + + def test_unweighted_with_thresholds(self): + tn_obj = metrics.TrueNegatives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + + update_op = tn_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = tn_obj.result() + self.assertAllClose([2.0, 5.0, 7.0], result) + + def test_weighted_with_thresholds(self): + tn_obj = metrics.TrueNegatives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + sample_weight = ((0.0, 2.0, 3.0, 5.0),) + + result = tn_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose([5.0, 15.0, 23.0], self.evaluate(result)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TruePositivesTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + tp_obj = metrics.TruePositives(name="my_tp", thresholds=[0.4, 0.9]) + self.assertEqual(tp_obj.name, "my_tp") + self.assertLen(tp_obj.variables, 1) + self.assertEqual(tp_obj.thresholds, [0.4, 0.9]) + + # Check save and restore config + tp_obj2 = metrics.TruePositives.from_config(tp_obj.get_config()) + self.assertEqual(tp_obj2.name, "my_tp") + self.assertLen(tp_obj2.variables, 1) + self.assertEqual(tp_obj2.thresholds, [0.4, 0.9]) + + def test_unweighted(self): + tp_obj = metrics.TruePositives() + self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) + + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = tp_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = tp_obj.result() + self.assertAllClose(7.0, result) + + def test_weighted(self): + tp_obj = metrics.TruePositives() + self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = tp_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(12.0, self.evaluate(result)) + + def test_unweighted_with_thresholds(self): + tp_obj = metrics.TruePositives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + + update_op = tp_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = tp_obj.result() + self.assertAllClose([6.0, 3.0, 1.0], result) + + def test_weighted_with_thresholds(self): + tp_obj = metrics.TruePositives(thresholds=[0.15, 0.5, 0.85]) + self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) + + y_pred = tf.constant( + ( + (0.9, 0.2, 0.8, 0.1), + (0.2, 0.9, 0.7, 0.6), + (0.1, 0.2, 0.4, 0.3), + (0, 1, 0.7, 0.3), + ) + ) + y_true = tf.constant( + ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1)) + ) + + result = tp_obj(y_true, y_pred, sample_weight=37.0) + self.assertAllClose([222.0, 111.0, 37.0], self.evaluate(result)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class PrecisionTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + p_obj = metrics.Precision( + name="my_precision", thresholds=[0.4, 0.9], top_k=15, class_id=12 + ) + self.assertEqual(p_obj.name, "my_precision") + self.assertLen(p_obj.variables, 2) + self.assertEqual( + [v.name for v in p_obj.variables], + ["true_positives:0", "false_positives:0"], + ) + self.assertEqual(p_obj.thresholds, [0.4, 0.9]) + self.assertEqual(p_obj.top_k, 15) + self.assertEqual(p_obj.class_id, 12) + + # Check save and restore config + p_obj2 = metrics.Precision.from_config(p_obj.get_config()) + self.assertEqual(p_obj2.name, "my_precision") + self.assertLen(p_obj2.variables, 2) + self.assertEqual(p_obj2.thresholds, [0.4, 0.9]) + self.assertEqual(p_obj2.top_k, 15) + self.assertEqual(p_obj2.class_id, 12) + + def test_value_is_idempotent(self): + p_obj = metrics.Precision(thresholds=[0.3, 0.72]) + y_pred = tf.random.uniform(shape=(10, 3)) + y_true = tf.random.uniform(shape=(10, 3)) + update_op = p_obj.update_state(y_true, y_pred) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + + # Run several updates. + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_precision = self.evaluate(p_obj.result()) + for _ in range(10): + self.assertArrayNear( + initial_precision, self.evaluate(p_obj.result()), 1e-3 + ) + + def test_unweighted(self): + p_obj = metrics.Precision() + y_pred = tf.constant([1, 0, 1, 0], shape=(1, 4)) + y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + + def test_unweighted_all_incorrect(self): + p_obj = metrics.Precision(thresholds=[0.5]) + inputs = np.random.randint(0, 2, size=(100, 1)) + y_pred = tf.constant(inputs) + y_true = tf.constant(1 - inputs) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(0, self.evaluate(result)) + + def test_weighted(self): + p_obj = metrics.Precision() + y_pred = tf.constant([[1, 0, 1, 0], [1, 0, 1, 0]]) + y_true = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj( + y_true, + y_pred, + sample_weight=tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]]), + ) + weighted_tp = 3.0 + 4.0 + weighted_positives = (1.0 + 3.0) + (4.0 + 2.0) + expected_precision = weighted_tp / weighted_positives + self.assertAlmostEqual(expected_precision, self.evaluate(result)) + + def test_div_by_zero(self): + p_obj = metrics.Precision() + y_pred = tf.constant([0, 0, 0, 0]) + y_true = tf.constant([0, 0, 0, 0]) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj(y_true, y_pred) + self.assertEqual(0, self.evaluate(result)) + + def test_unweighted_with_threshold(self): + p_obj = metrics.Precision(thresholds=[0.5, 0.7]) + y_pred = tf.constant([1, 0, 0.6, 0], shape=(1, 4)) + y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj(y_true, y_pred) + self.assertArrayNear([0.5, 0.0], self.evaluate(result), 0) + + def test_weighted_with_threshold(self): + p_obj = metrics.Precision(thresholds=[0.5, 1.0]) + y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) + y_pred = tf.constant([[1, 0], [0.6, 0]], shape=(2, 2), dtype=tf.float32) + weights = tf.constant([[4, 0], [3, 1]], shape=(2, 2), dtype=tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj(y_true, y_pred, sample_weight=weights) + weighted_tp = 0 + 3.0 + weighted_positives = (0 + 3.0) + (4.0 + 0.0) + expected_precision = weighted_tp / weighted_positives + self.assertArrayNear( + [expected_precision, 0], self.evaluate(result), 1e-3 + ) + + def test_multiple_updates(self): + p_obj = metrics.Precision(thresholds=[0.5, 1.0]) + y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) + y_pred = tf.constant([[1, 0], [0.6, 0]], shape=(2, 2), dtype=tf.float32) + weights = tf.constant([[4, 0], [3, 1]], shape=(2, 2), dtype=tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + update_op = p_obj.update_state(y_true, y_pred, sample_weight=weights) + for _ in range(2): + self.evaluate(update_op) + + weighted_tp = (0 + 3.0) + (0 + 3.0) + weighted_positives = ((0 + 3.0) + (4.0 + 0.0)) + ( + (0 + 3.0) + (4.0 + 0.0) + ) + expected_precision = weighted_tp / weighted_positives + self.assertArrayNear( + [expected_precision, 0], self.evaluate(p_obj.result()), 1e-3 + ) + + def test_unweighted_top_k(self): + p_obj = metrics.Precision(top_k=3) + y_pred = tf.constant([0.2, 0.1, 0.5, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(1.0 / 3, self.evaluate(result)) + + def test_weighted_top_k(self): + p_obj = metrics.Precision(top_k=3) + y_pred1 = tf.constant([0.2, 0.1, 0.4, 0, 0.2], shape=(1, 5)) + y_true1 = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + self.evaluate( + p_obj( + y_true1, y_pred1, sample_weight=tf.constant([[1, 4, 2, 3, 5]]) + ) + ) + + y_pred2 = tf.constant([0.2, 0.6, 0.4, 0.2, 0.2], shape=(1, 5)) + y_true2 = tf.constant([1, 0, 1, 1, 1], shape=(1, 5)) + result = p_obj(y_true2, y_pred2, sample_weight=tf.constant(3)) + + tp = (2 + 5) + (3 + 3) + predicted_positives = (1 + 2 + 5) + (3 + 3 + 3) + expected_precision = tp / predicted_positives + self.assertAlmostEqual(expected_precision, self.evaluate(result)) + + def test_unweighted_class_id(self): + p_obj = metrics.Precision(class_id=2) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + + y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) + + y_pred = tf.constant([0.2, 0.1, 0, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) + + y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 0, 0, 0], shape=(1, 5)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) + self.assertAlmostEqual(1, self.evaluate(p_obj.false_positives)) + + def test_unweighted_top_k_and_class_id(self): + p_obj = metrics.Precision(class_id=2, top_k=2) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + + y_pred = tf.constant([0.2, 0.6, 0.3, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) + + y_pred = tf.constant([1, 1, 0.9, 1, 1], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) + + def test_unweighted_top_k_and_threshold(self): + p_obj = metrics.Precision(thresholds=0.7, top_k=2) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + + y_pred = tf.constant([0.2, 0.8, 0.6, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) + result = p_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(p_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(p_obj.false_positives)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class RecallTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + r_obj = metrics.Recall( + name="my_recall", thresholds=[0.4, 0.9], top_k=15, class_id=12 + ) + self.assertEqual(r_obj.name, "my_recall") + self.assertLen(r_obj.variables, 2) + self.assertEqual( + [v.name for v in r_obj.variables], + ["true_positives:0", "false_negatives:0"], + ) + self.assertEqual(r_obj.thresholds, [0.4, 0.9]) + self.assertEqual(r_obj.top_k, 15) + self.assertEqual(r_obj.class_id, 12) + + # Check save and restore config + r_obj2 = metrics.Recall.from_config(r_obj.get_config()) + self.assertEqual(r_obj2.name, "my_recall") + self.assertLen(r_obj2.variables, 2) + self.assertEqual(r_obj2.thresholds, [0.4, 0.9]) + self.assertEqual(r_obj2.top_k, 15) + self.assertEqual(r_obj2.class_id, 12) + + def test_value_is_idempotent(self): + r_obj = metrics.Recall(thresholds=[0.3, 0.72]) + y_pred = tf.random.uniform(shape=(10, 3)) + y_true = tf.random.uniform(shape=(10, 3)) + update_op = r_obj.update_state(y_true, y_pred) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + + # Run several updates. + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_recall = self.evaluate(r_obj.result()) + for _ in range(10): + self.assertArrayNear( + initial_recall, self.evaluate(r_obj.result()), 1e-3 + ) + + def test_unweighted(self): + r_obj = metrics.Recall() + y_pred = tf.constant([1, 0, 1, 0], shape=(1, 4)) + y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + + def test_unweighted_all_incorrect(self): + r_obj = metrics.Recall(thresholds=[0.5]) + inputs = np.random.randint(0, 2, size=(100, 1)) + y_pred = tf.constant(inputs) + y_true = tf.constant(1 - inputs) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0, self.evaluate(result)) + + def test_weighted(self): + r_obj = metrics.Recall() + y_pred = tf.constant([[1, 0, 1, 0], [0, 1, 0, 1]]) + y_true = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj( + y_true, + y_pred, + sample_weight=tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]]), + ) + weighted_tp = 3.0 + 1.0 + weighted_t = (2.0 + 3.0) + (4.0 + 1.0) + expected_recall = weighted_tp / weighted_t + self.assertAlmostEqual(expected_recall, self.evaluate(result)) + + def test_div_by_zero(self): + r_obj = metrics.Recall() + y_pred = tf.constant([0, 0, 0, 0]) + y_true = tf.constant([0, 0, 0, 0]) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj(y_true, y_pred) + self.assertEqual(0, self.evaluate(result)) + + def test_unweighted_with_threshold(self): + r_obj = metrics.Recall(thresholds=[0.5, 0.7]) + y_pred = tf.constant([1, 0, 0.6, 0], shape=(1, 4)) + y_true = tf.constant([0, 1, 1, 0], shape=(1, 4)) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj(y_true, y_pred) + self.assertArrayNear([0.5, 0.0], self.evaluate(result), 0) + + def test_weighted_with_threshold(self): + r_obj = metrics.Recall(thresholds=[0.5, 1.0]) + y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) + y_pred = tf.constant([[1, 0], [0.6, 0]], shape=(2, 2), dtype=tf.float32) + weights = tf.constant([[1, 4], [3, 2]], shape=(2, 2), dtype=tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj(y_true, y_pred, sample_weight=weights) + weighted_tp = 0 + 3.0 + weighted_positives = (0 + 3.0) + (4.0 + 0.0) + expected_recall = weighted_tp / weighted_positives + self.assertArrayNear([expected_recall, 0], self.evaluate(result), 1e-3) + + def test_multiple_updates(self): + r_obj = metrics.Recall(thresholds=[0.5, 1.0]) + y_true = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) + y_pred = tf.constant([[1, 0], [0.6, 0]], shape=(2, 2), dtype=tf.float32) + weights = tf.constant([[1, 4], [3, 2]], shape=(2, 2), dtype=tf.float32) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + update_op = r_obj.update_state(y_true, y_pred, sample_weight=weights) + for _ in range(2): + self.evaluate(update_op) + + weighted_tp = (0 + 3.0) + (0 + 3.0) + weighted_positives = ((0 + 3.0) + (4.0 + 0.0)) + ( + (0 + 3.0) + (4.0 + 0.0) + ) + expected_recall = weighted_tp / weighted_positives + self.assertArrayNear( + [expected_recall, 0], self.evaluate(r_obj.result()), 1e-3 + ) + + def test_unweighted_top_k(self): + r_obj = metrics.Recall(top_k=3) + y_pred = tf.constant([0.2, 0.1, 0.5, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + + def test_weighted_top_k(self): + r_obj = metrics.Recall(top_k=3) + y_pred1 = tf.constant([0.2, 0.1, 0.4, 0, 0.2], shape=(1, 5)) + y_true1 = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + self.evaluate( + r_obj( + y_true1, y_pred1, sample_weight=tf.constant([[1, 4, 2, 3, 5]]) + ) + ) + + y_pred2 = tf.constant([0.2, 0.6, 0.4, 0.2, 0.2], shape=(1, 5)) + y_true2 = tf.constant([1, 0, 1, 1, 1], shape=(1, 5)) + result = r_obj(y_true2, y_pred2, sample_weight=tf.constant(3)) + + tp = (2 + 5) + (3 + 3) + positives = (4 + 2 + 5) + (3 + 3 + 3 + 3) + expected_recall = tp / positives + self.assertAlmostEqual(expected_recall, self.evaluate(result)) + + def test_unweighted_class_id(self): + r_obj = metrics.Recall(class_id=2) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + + y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(r_obj.false_negatives)) + + y_pred = tf.constant([0.2, 0.1, 0, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) + self.assertAlmostEqual(1, self.evaluate(r_obj.false_negatives)) + + y_pred = tf.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 0, 0, 0], shape=(1, 5)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) + self.assertAlmostEqual(1, self.evaluate(r_obj.false_negatives)) + + def test_unweighted_top_k_and_class_id(self): + r_obj = metrics.Recall(class_id=2, top_k=2) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + + y_pred = tf.constant([0.2, 0.6, 0.3, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) + self.assertAlmostEqual(0, self.evaluate(r_obj.false_negatives)) + + y_pred = tf.constant([1, 1, 0.9, 1, 1], shape=(1, 5)) + y_true = tf.constant([0, 1, 1, 0, 0], shape=(1, 5)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0.5, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) + self.assertAlmostEqual(1, self.evaluate(r_obj.false_negatives)) + + def test_unweighted_top_k_and_threshold(self): + r_obj = metrics.Recall(thresholds=0.7, top_k=2) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + + y_pred = tf.constant([0.2, 0.8, 0.6, 0, 0.2], shape=(1, 5)) + y_true = tf.constant([1, 1, 1, 0, 1], shape=(1, 5)) + result = r_obj(y_true, y_pred) + self.assertAlmostEqual(0.25, self.evaluate(result)) + self.assertAlmostEqual(1, self.evaluate(r_obj.true_positives)) + self.assertAlmostEqual(3, self.evaluate(r_obj.false_negatives)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SensitivityAtSpecificityTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + s_obj = metrics.SensitivityAtSpecificity( + 0.4, + num_thresholds=100, + class_id=12, + name="sensitivity_at_specificity_1", + ) + self.assertEqual(s_obj.name, "sensitivity_at_specificity_1") + self.assertLen(s_obj.variables, 4) + self.assertEqual(s_obj.specificity, 0.4) + self.assertEqual(s_obj.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + # Check save and restore config + s_obj2 = metrics.SensitivityAtSpecificity.from_config( + s_obj.get_config() + ) + self.assertEqual(s_obj2.name, "sensitivity_at_specificity_1") + self.assertLen(s_obj2.variables, 4) + self.assertEqual(s_obj2.specificity, 0.4) + self.assertEqual(s_obj2.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + def test_value_is_idempotent(self): + s_obj = metrics.SensitivityAtSpecificity(0.7) + y_pred = tf.random.uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + y_true = tf.random.uniform((10, 3), maxval=2, dtype=tf.int64, seed=1) + update_op = s_obj.update_state(y_true, y_pred) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + + # Run several updates. + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_sensitivity = self.evaluate(s_obj.result()) + for _ in range(10): + self.assertAlmostEqual( + initial_sensitivity, self.evaluate(s_obj.result()), 1e-3 + ) + + def test_unweighted_all_correct(self): + with self.test_session(): + s_obj = metrics.SensitivityAtSpecificity(0.7) + inputs = np.random.randint(0, 2, size=(100, 1)) + y_pred = tf.constant(inputs, dtype=tf.float32) + y_true = tf.constant(inputs) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + + def test_unweighted_high_specificity(self): + s_obj = metrics.SensitivityAtSpecificity(0.8) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.1, 0.45, 0.5, 0.8, 0.9] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(0.8, self.evaluate(result)) + + def test_unweighted_low_specificity(self): + s_obj = metrics.SensitivityAtSpecificity(0.4) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(0.6, self.evaluate(result)) + + def test_unweighted_class_id(self): + s_obj = metrics.SpecificityAtSensitivity(0.4, class_id=2) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 2, 2, 2, 2, 2] + + y_pred = tf.transpose([pred_values] * 3) + y_true = tf.one_hot(label_values, depth=3) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(0.6, self.evaluate(result)) + + @parameterized.parameters([tf.bool, tf.int32, tf.float32]) + def test_weighted(self, label_dtype): + s_obj = metrics.SensitivityAtSpecificity(0.4) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weight_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.cast(label_values, dtype=label_dtype) + weights = tf.constant(weight_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred, sample_weight=weights) + self.assertAlmostEqual(0.675, self.evaluate(result)) + + def test_invalid_specificity(self): + with self.assertRaisesRegex( + ValueError, r"`specificity` must be in the range \[0, 1\]." + ): + metrics.SensitivityAtSpecificity(-1) + + def test_invalid_num_thresholds(self): + with self.assertRaisesRegex( + ValueError, "Argument `num_thresholds` must be an integer > 0" + ): + metrics.SensitivityAtSpecificity(0.4, num_thresholds=-1) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SpecificityAtSensitivityTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + s_obj = metrics.SpecificityAtSensitivity( + 0.4, + num_thresholds=100, + class_id=12, + name="specificity_at_sensitivity_1", + ) + self.assertEqual(s_obj.name, "specificity_at_sensitivity_1") + self.assertLen(s_obj.variables, 4) + self.assertEqual(s_obj.sensitivity, 0.4) + self.assertEqual(s_obj.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + # Check save and restore config + s_obj2 = metrics.SpecificityAtSensitivity.from_config( + s_obj.get_config() + ) + self.assertEqual(s_obj2.name, "specificity_at_sensitivity_1") + self.assertLen(s_obj2.variables, 4) + self.assertEqual(s_obj2.sensitivity, 0.4) + self.assertEqual(s_obj2.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + def test_value_is_idempotent(self): + s_obj = metrics.SpecificityAtSensitivity(0.7) + y_pred = tf.random.uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + y_true = tf.random.uniform((10, 3), maxval=2, dtype=tf.int64, seed=1) + update_op = s_obj.update_state(y_true, y_pred) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + + # Run several updates. + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_specificity = self.evaluate(s_obj.result()) + for _ in range(10): + self.assertAlmostEqual( + initial_specificity, self.evaluate(s_obj.result()), 1e-3 + ) + + def test_unweighted_all_correct(self): + s_obj = metrics.SpecificityAtSensitivity(0.7) + inputs = np.random.randint(0, 2, size=(100, 1)) + y_pred = tf.constant(inputs, dtype=tf.float32) + y_true = tf.constant(inputs) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + + def test_unweighted_high_sensitivity(self): + s_obj = metrics.SpecificityAtSensitivity(1.0) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(0.2, self.evaluate(result)) + + def test_unweighted_low_sensitivity(self): + s_obj = metrics.SpecificityAtSensitivity(0.4) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(0.6, self.evaluate(result)) + + def test_unweighted_class_id(self): + s_obj = metrics.SpecificityAtSensitivity(0.4, class_id=2) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 2, 2, 2, 2, 2] + + y_pred = tf.transpose([pred_values] * 3) + y_true = tf.one_hot(label_values, depth=3) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(0.6, self.evaluate(result)) + + @parameterized.parameters([tf.bool, tf.int32, tf.float32]) + def test_weighted(self, label_dtype): + s_obj = metrics.SpecificityAtSensitivity(0.4) + pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weight_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.cast(label_values, dtype=label_dtype) + weights = tf.constant(weight_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred, sample_weight=weights) + self.assertAlmostEqual(0.4, self.evaluate(result)) + + def test_invalid_sensitivity(self): + with self.assertRaisesRegex( + ValueError, r"`sensitivity` must be in the range \[0, 1\]." + ): + metrics.SpecificityAtSensitivity(-1) + + def test_invalid_num_thresholds(self): + with self.assertRaisesRegex( + ValueError, "Argument `num_thresholds` must be an integer > 0" + ): + metrics.SpecificityAtSensitivity(0.4, num_thresholds=-1) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class PrecisionAtRecallTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + s_obj = metrics.PrecisionAtRecall( + 0.4, num_thresholds=100, class_id=12, name="precision_at_recall_1" + ) + self.assertEqual(s_obj.name, "precision_at_recall_1") + self.assertLen(s_obj.variables, 4) + self.assertEqual(s_obj.recall, 0.4) + self.assertEqual(s_obj.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + # Check save and restore config + s_obj2 = metrics.PrecisionAtRecall.from_config(s_obj.get_config()) + self.assertEqual(s_obj2.name, "precision_at_recall_1") + self.assertLen(s_obj2.variables, 4) + self.assertEqual(s_obj2.recall, 0.4) + self.assertEqual(s_obj2.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + def test_value_is_idempotent(self): + s_obj = metrics.PrecisionAtRecall(0.7) + y_pred = tf.random.uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + y_true = tf.random.uniform((10, 3), maxval=2, dtype=tf.int64, seed=1) + update_op = s_obj.update_state(y_true, y_pred) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + + # Run several updates. + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_precision = self.evaluate(s_obj.result()) + for _ in range(10): + self.assertAlmostEqual( + initial_precision, self.evaluate(s_obj.result()), 1e-3 + ) + + def test_unweighted_all_correct(self): + s_obj = metrics.PrecisionAtRecall(0.7) + inputs = np.random.randint(0, 2, size=(100, 1)) + y_pred = tf.constant(inputs, dtype=tf.float32) + y_true = tf.constant(inputs) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + + def test_unweighted_high_recall(self): + s_obj = metrics.PrecisionAtRecall(0.8) + pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # For 0.5 < decision threshold < 0.6. + self.assertAlmostEqual(2.0 / 3, self.evaluate(result)) + + def test_unweighted_low_recall(self): + s_obj = metrics.PrecisionAtRecall(0.6) + pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # For 0.2 < decision threshold < 0.5. + self.assertAlmostEqual(0.75, self.evaluate(result)) + + def test_unweighted_class_id(self): + s_obj = metrics.PrecisionAtRecall(0.6, class_id=2) + pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] + label_values = [0, 0, 0, 0, 0, 2, 2, 2, 2, 2] + + y_pred = tf.transpose([pred_values] * 3) + y_true = tf.one_hot(label_values, depth=3) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # For 0.2 < decision threshold < 0.5. + self.assertAlmostEqual(0.75, self.evaluate(result)) + + @parameterized.parameters([tf.bool, tf.int32, tf.float32]) + def test_weighted(self, label_dtype): + s_obj = metrics.PrecisionAtRecall(7.0 / 8) + pred_values = [0.0, 0.1, 0.2, 0.5, 0.6, 0.2, 0.5, 0.6, 0.8, 0.9] + label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weight_values = [2, 1, 2, 1, 2, 1, 2, 2, 1, 2] + + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.cast(label_values, dtype=label_dtype) + weights = tf.constant(weight_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred, sample_weight=weights) + # For 0.0 < decision threshold < 0.2. + self.assertAlmostEqual(0.7, self.evaluate(result)) + + def test_invalid_sensitivity(self): + with self.assertRaisesRegex( + ValueError, r"`recall` must be in the range \[0, 1\]." + ): + metrics.PrecisionAtRecall(-1) + + def test_invalid_num_thresholds(self): + with self.assertRaisesRegex( + ValueError, "Argument `num_thresholds` must be an integer > 0" + ): + metrics.PrecisionAtRecall(0.4, num_thresholds=-1) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class RecallAtPrecisionTest(tf.test.TestCase, parameterized.TestCase): + def test_config(self): + s_obj = metrics.RecallAtPrecision( + 0.4, num_thresholds=100, class_id=12, name="recall_at_precision_1" + ) + self.assertEqual(s_obj.name, "recall_at_precision_1") + self.assertLen(s_obj.variables, 4) + self.assertEqual(s_obj.precision, 0.4) + self.assertEqual(s_obj.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + # Check save and restore config + s_obj2 = metrics.RecallAtPrecision.from_config(s_obj.get_config()) + self.assertEqual(s_obj2.name, "recall_at_precision_1") + self.assertLen(s_obj2.variables, 4) + self.assertEqual(s_obj2.precision, 0.4) + self.assertEqual(s_obj2.num_thresholds, 100) + self.assertEqual(s_obj.class_id, 12) + + def test_value_is_idempotent(self): + s_obj = metrics.RecallAtPrecision(0.7) + y_pred = tf.random.uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + y_true = tf.random.uniform((10, 3), maxval=2, dtype=tf.int64, seed=1) + update_op = s_obj.update_state(y_true, y_pred) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + + # Run several updates. + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_recall = self.evaluate(s_obj.result()) + for _ in range(10): + self.assertAlmostEqual( + initial_recall, self.evaluate(s_obj.result()), 1e-3 + ) + + def test_unweighted_all_correct(self): + s_obj = metrics.RecallAtPrecision(0.7) + inputs = np.random.randint(0, 2, size=(100, 1)) + y_pred = tf.constant(inputs, dtype=tf.float32) + y_true = tf.constant(inputs) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + self.assertAlmostEqual(1, self.evaluate(result)) + + def test_unweighted_high_precision(self): + s_obj = metrics.RecallAtPrecision(0.75) + pred_values = [ + 0.05, + 0.1, + 0.2, + 0.3, + 0.3, + 0.35, + 0.4, + 0.45, + 0.5, + 0.6, + 0.9, + 0.95, + ] + label_values = [0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1] + # precisions: [1/2, 6/11, 1/2, 5/9, 5/8, 5/7, 2/3, 3/5, 3/5, 2/3, 1/2, + # 1]. + # recalls: [1, 1, 5/6, 5/6, 5/6, 5/6, 2/3, 1/2, 1/2, 1/3, 1/6, + # 1/6]. + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # The precision 0.75 can be reached at thresholds 0.4<=t<0.45. + self.assertAlmostEqual(0.5, self.evaluate(result)) + + def test_unweighted_low_precision(self): + s_obj = metrics.RecallAtPrecision(2.0 / 3) + pred_values = [ + 0.05, + 0.1, + 0.2, + 0.3, + 0.3, + 0.35, + 0.4, + 0.45, + 0.5, + 0.6, + 0.9, + 0.95, + ] + label_values = [0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1] + # precisions: [1/2, 6/11, 1/2, 5/9, 5/8, 5/7, 2/3, 3/5, 3/5, 2/3, 1/2, + # 1]. + # recalls: [1, 1, 5/6, 5/6, 5/6, 5/6, 2/3, 1/2, 1/2, 1/3, 1/6, + # 1/6]. + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # The precision 5/7 can be reached at thresholds 00.3<=t<0.35. + self.assertAlmostEqual(5.0 / 6, self.evaluate(result)) + + def test_unweighted_class_id(self): + s_obj = metrics.RecallAtPrecision(2.0 / 3, class_id=2) + pred_values = [ + 0.05, + 0.1, + 0.2, + 0.3, + 0.3, + 0.35, + 0.4, + 0.45, + 0.5, + 0.6, + 0.9, + 0.95, + ] + label_values = [0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2] + # precisions: [1/2, 6/11, 1/2, 5/9, 5/8, 5/7, 2/3, 3/5, 3/5, 2/3, 1/2, + # 1]. + # recalls: [1, 1, 5/6, 5/6, 5/6, 5/6, 2/3, 1/2, 1/2, 1/3, 1/6, + # 1/6]. + y_pred = tf.transpose([pred_values] * 3) + y_true = tf.one_hot(label_values, depth=3) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # The precision 5/7 can be reached at thresholds 00.3<=t<0.35. + self.assertAlmostEqual(5.0 / 6, self.evaluate(result)) + + @parameterized.parameters([tf.bool, tf.int32, tf.float32]) + def test_weighted(self, label_dtype): + s_obj = metrics.RecallAtPrecision(0.75) + pred_values = [0.1, 0.2, 0.3, 0.5, 0.6, 0.9, 0.9] + label_values = [0, 1, 0, 0, 0, 1, 1] + weight_values = [1, 2, 1, 2, 1, 2, 1] + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.cast(label_values, dtype=label_dtype) + weights = tf.constant(weight_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred, sample_weight=weights) + self.assertAlmostEqual(0.6, self.evaluate(result)) + + def test_unachievable_precision(self): + s_obj = metrics.RecallAtPrecision(2.0 / 3) + pred_values = [0.1, 0.2, 0.3, 0.9] + label_values = [1, 1, 0, 0] + y_pred = tf.constant(pred_values, dtype=tf.float32) + y_true = tf.constant(label_values) + self.evaluate(tf.compat.v1.variables_initializer(s_obj.variables)) + result = s_obj(y_true, y_pred) + # The highest possible precision is 1/2 which is below the required + # value, expect 0 recall. + self.assertAlmostEqual(0, self.evaluate(result)) + + def test_invalid_sensitivity(self): + with self.assertRaisesRegex( + ValueError, r"`precision` must be in the range \[0, 1\]." + ): + metrics.RecallAtPrecision(-1) + + def test_invalid_num_thresholds(self): + with self.assertRaisesRegex( + ValueError, "Argument `num_thresholds` must be an integer > 0" + ): + metrics.RecallAtPrecision(0.4, num_thresholds=-1) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class AUCTest(tf.test.TestCase, parameterized.TestCase): + def setup(self): + self.num_thresholds = 3 + self.y_pred = tf.constant([0, 0.5, 0.3, 0.9], dtype=tf.float32) + self.y_pred_multi_label = tf.constant( + [[0.0, 0.4], [0.5, 0.7], [0.3, 0.2], [0.9, 0.3]], dtype=tf.float32 + ) + epsilon = 1e-12 + self.y_pred_logits = -tf.math.log(1.0 / (self.y_pred + epsilon) - 1.0) + self.y_true = tf.constant([0, 0, 1, 1]) + self.y_true_multi_label = tf.constant([[0, 0], [1, 1], [1, 1], [1, 0]]) + self.sample_weight = [1, 2, 3, 4] + + # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7] + # y_pred when threshold = 0 - 1e-7 : [1, 1, 1, 1] + # y_pred when threshold = 0.5 : [0, 0, 0, 1] + # y_pred when threshold = 1 + 1e-7 : [0, 0, 0, 0] + + # without sample_weight: + # tp = np.sum([[0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0]], axis=1) + # fp = np.sum([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], axis=1) + # fn = np.sum([[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 1]], axis=1) + # tn = np.sum([[0, 0, 0, 0], [1, 1, 0, 0], [1, 1, 0, 0]], axis=1) + + # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] + + # with sample_weight: + # tp = np.sum([[0, 0, 3, 4], [0, 0, 0, 4], [0, 0, 0, 0]], axis=1) + # fp = np.sum([[1, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], axis=1) + # fn = np.sum([[0, 0, 0, 0], [0, 0, 3, 0], [0, 0, 3, 4]], axis=1) + # tn = np.sum([[0, 0, 0, 0], [1, 2, 0, 0], [1, 2, 0, 0]], axis=1) + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + + def test_config(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=100, + curve="PR", + summation_method="majoring", + name="auc_1", + dtype=tf.float64, + multi_label=True, + num_labels=2, + from_logits=True, + ) + auc_obj.update_state(self.y_true_multi_label, self.y_pred_multi_label) + self.assertEqual(auc_obj.name, "auc_1") + self.assertEqual(auc_obj._dtype, tf.float64) + self.assertLen(auc_obj.variables, 4) + self.assertEqual(auc_obj.num_thresholds, 100) + self.assertEqual(auc_obj.curve, metrics_utils.AUCCurve.PR) + self.assertEqual( + auc_obj.summation_method, metrics_utils.AUCSummationMethod.MAJORING + ) + self.assertTrue(auc_obj.multi_label) + self.assertEqual(auc_obj.num_labels, 2) + self.assertTrue(auc_obj._from_logits) + old_config = auc_obj.get_config() + self.assertNotIn("thresholds", old_config) + self.assertDictEqual(old_config, json.loads(json.dumps(old_config))) + + # Check save and restore config. + auc_obj2 = metrics.AUC.from_config(auc_obj.get_config()) + auc_obj2.update_state(self.y_true_multi_label, self.y_pred_multi_label) + self.assertEqual(auc_obj2.name, "auc_1") + self.assertLen(auc_obj2.variables, 4) + self.assertEqual(auc_obj2.num_thresholds, 100) + self.assertEqual(auc_obj2.curve, metrics_utils.AUCCurve.PR) + self.assertEqual( + auc_obj2.summation_method, metrics_utils.AUCSummationMethod.MAJORING + ) + self.assertTrue(auc_obj2.multi_label) + self.assertEqual(auc_obj2.num_labels, 2) + self.assertTrue(auc_obj2._from_logits) + new_config = auc_obj2.get_config() + self.assertNotIn("thresholds", new_config) + self.assertDictEqual(old_config, new_config) + self.assertAllClose(auc_obj.thresholds, auc_obj2.thresholds) + + def test_config_manual_thresholds(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=None, + curve="PR", + summation_method="majoring", + name="auc_1", + thresholds=[0.3, 0.5], + ) + auc_obj.update_state(self.y_true, self.y_pred) + self.assertEqual(auc_obj.name, "auc_1") + self.assertLen(auc_obj.variables, 4) + self.assertEqual(auc_obj.num_thresholds, 4) + self.assertAllClose(auc_obj.thresholds, [0.0, 0.3, 0.5, 1.0]) + self.assertEqual(auc_obj.curve, metrics_utils.AUCCurve.PR) + self.assertEqual( + auc_obj.summation_method, metrics_utils.AUCSummationMethod.MAJORING + ) + old_config = auc_obj.get_config() + self.assertDictEqual(old_config, json.loads(json.dumps(old_config))) + + # Check save and restore config. + auc_obj2 = metrics.AUC.from_config(auc_obj.get_config()) + auc_obj2.update_state(self.y_true, self.y_pred) + self.assertEqual(auc_obj2.name, "auc_1") + self.assertLen(auc_obj2.variables, 4) + self.assertEqual(auc_obj2.num_thresholds, 4) + self.assertEqual(auc_obj2.curve, metrics_utils.AUCCurve.PR) + self.assertEqual( + auc_obj2.summation_method, metrics_utils.AUCSummationMethod.MAJORING + ) + new_config = auc_obj2.get_config() + self.assertDictEqual(old_config, new_config) + self.assertAllClose(auc_obj.thresholds, auc_obj2.thresholds) + + def test_value_is_idempotent(self): + self.setup() + auc_obj = metrics.AUC(num_thresholds=3) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + + # Run several updates. + update_op = auc_obj.update_state(self.y_true, self.y_pred) + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_auc = self.evaluate(auc_obj.result()) + for _ in range(10): + self.assertAllClose( + initial_auc, self.evaluate(auc_obj.result()), 1e-3 + ) + + def test_unweighted_all_correct(self): + self.setup() + auc_obj = metrics.AUC() + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true, self.y_true) + self.assertEqual(self.evaluate(result), 1) + + def test_unweighted(self): + self.setup() + auc_obj = metrics.AUC(num_thresholds=self.num_thresholds) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true, self.y_pred) + + # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] + # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0] + # fp_rate = [2/2, 0, 0] = [1, 0, 0] + # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25] + # widths = [(1 - 0), (0 - 0)] = [1, 0] + expected_result = 0.75 * 1 + 0.25 * 0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_unweighted_from_logits(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, from_logits=True + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true, self.y_pred_logits) + + # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] + # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0] + # fp_rate = [2/2, 0, 0] = [1, 0, 0] + # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25] + # widths = [(1 - 0), (0 - 0)] = [1, 0] + expected_result = 0.75 * 1 + 0.25 * 0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_manual_thresholds(self): + self.setup() + # Verify that when specified, thresholds are used instead of + # num_thresholds. + auc_obj = metrics.AUC(num_thresholds=2, thresholds=[0.5]) + self.assertEqual(auc_obj.num_thresholds, 3) + self.assertAllClose(auc_obj.thresholds, [0.0, 0.5, 1.0]) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true, self.y_pred) + + # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] + # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0] + # fp_rate = [2/2, 0, 0] = [1, 0, 0] + # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25] + # widths = [(1 - 0), (0 - 0)] = [1, 0] + expected_result = 0.75 * 1 + 0.25 * 0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_roc_interpolation(self): + self.setup() + auc_obj = metrics.AUC(num_thresholds=self.num_thresholds) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true, self.y_pred, sample_weight=self.sample_weight + ) + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] + # fp_rate = [3/3, 0, 0] = [1, 0, 0] + # heights = [(1 + 0.571)/2, (0.571 + 0)/2] = [0.7855, 0.2855] + # widths = [(1 - 0), (0 - 0)] = [1, 0] + expected_result = 0.7855 * 1 + 0.2855 * 0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_roc_majoring(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, summation_method="majoring" + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true, self.y_pred, sample_weight=self.sample_weight + ) + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] + # fp_rate = [3/3, 0, 0] = [1, 0, 0] + # heights = [max(1, 0.571), max(0.571, 0)] = [1, 0.571] + # widths = [(1 - 0), (0 - 0)] = [1, 0] + expected_result = 1 * 1 + 0.571 * 0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_roc_minoring(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, summation_method="minoring" + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true, self.y_pred, sample_weight=self.sample_weight + ) + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] + # fp_rate = [3/3, 0, 0] = [1, 0, 0] + # heights = [min(1, 0.571), min(0.571, 0)] = [0.571, 0] + # widths = [(1 - 0), (0 - 0)] = [1, 0] + expected_result = 0.571 * 1 + 0 * 0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_pr_majoring(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, + curve="PR", + summation_method="majoring", + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true, self.y_pred, sample_weight=self.sample_weight + ) + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + # precision = [7/(7+3), 4/4, 0] = [0.7, 1, 0] + # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] + # heights = [max(0.7, 1), max(1, 0)] = [1, 1] + # widths = [(1 - 0.571), (0.571 - 0)] = [0.429, 0.571] + expected_result = 1 * 0.429 + 1 * 0.571 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_pr_minoring(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, + curve="PR", + summation_method="minoring", + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true, self.y_pred, sample_weight=self.sample_weight + ) + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + # precision = [7/(7+3), 4/4, 0] = [0.7, 1, 0] + # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0] + # heights = [min(0.7, 1), min(1, 0)] = [0.7, 0] + # widths = [(1 - 0.571), (0.571 - 0)] = [0.429, 0.571] + expected_result = 0.7 * 0.429 + 0 * 0.571 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_pr_interpolation(self): + self.setup() + auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, curve="PR") + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true, self.y_pred, sample_weight=self.sample_weight + ) + + # auc = (slope / Total Pos) * [dTP - intercept * log(Pb/Pa)] + + # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3] + # P = tp + fp = [10, 4, 0] + # dTP = [7-4, 4-0] = [3, 4] + # dP = [10-4, 4-0] = [6, 4] + # slope = dTP/dP = [0.5, 1] + # intercept = (TPa+(slope*Pa) = [(4 - 0.5*4), (0 - 1*0)] = [2, 0] + # (Pb/Pa) = (Pb/Pa) if Pb > 0 AND Pa > 0 else 1 = [10/4, 4/0] = [2.5, 1] + # auc * TotalPos = [(0.5 * (3 + 2 * log(2.5))), (1 * (4 + 0))] + # = [2.416, 4] + # auc = [2.416, 4]/(tp[1:]+fn[1:]) + expected_result = 2.416 / 7 + 4 / 7 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_invalid_num_thresholds(self): + with self.assertRaisesRegex( + ValueError, "Argument `num_thresholds` must be an integer > 1" + ): + metrics.AUC(num_thresholds=-1) + + with self.assertRaisesRegex( + ValueError, "Argument `num_thresholds` must be an integer > 1." + ): + metrics.AUC(num_thresholds=1) + + def test_invalid_curve(self): + with self.assertRaisesRegex( + ValueError, 'Invalid AUC curve value: "Invalid".' + ): + metrics.AUC(curve="Invalid") + + def test_invalid_summation_method(self): + with self.assertRaisesRegex( + ValueError, 'Invalid AUC summation method value: "Invalid".' + ): + metrics.AUC(summation_method="Invalid") + + def test_extra_dims(self): + try: + from scipy import special + + self.setup() + logits = special.expit( + -np.array( + [ + [[-10.0, 10.0, -10.0], [10.0, -10.0, 10.0]], + [[-12.0, 12.0, -12.0], [12.0, -12.0, 12.0]], + ], + dtype=np.float32, + ) + ) + labels = np.array( + [[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], dtype=np.int64 + ) + auc_obj = metrics.AUC() + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(labels, logits) + self.assertEqual(self.evaluate(result), 0.5) + except ImportError as e: + tf_logging.warning(f"Cannot test special functions: {str(e)}") + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MultiAUCTest(tf.test.TestCase, parameterized.TestCase): + def setup(self): + self.num_thresholds = 5 + self.y_pred = tf.constant( + np.array([[0, 0.5, 0.3, 0.9], [0.1, 0.2, 0.3, 0.4]]).T, + dtype=tf.float32, + ) + + epsilon = 1e-12 + self.y_pred_logits = -tf.math.log(1.0 / (self.y_pred + epsilon) - 1.0) + + self.y_true_good = tf.constant(np.array([[0, 0, 1, 1], [0, 0, 1, 1]]).T) + self.y_true_bad = tf.constant(np.array([[0, 0, 1, 1], [1, 1, 0, 0]]).T) + self.sample_weight = [1, 2, 3, 4] + + # threshold values are [0 - 1e-7, 0.25, 0.5, 0.75, 1 + 1e-7] + # y_pred when threshold = 0 - 1e-7 : [[1, 1, 1, 1], [1, 1, 1, 1]] + # y_pred when threshold = 0.25 : [[0, 1, 1, 1], [0, 0, 1, 1]] + # y_pred when threshold = 0.5 : [[0, 0, 0, 1], [0, 0, 0, 0]] + # y_pred when threshold = 0.75 : [[0, 0, 0, 1], [0, 0, 0, 0]] + # y_pred when threshold = 1 + 1e-7 : [[0, 0, 0, 0], [0, 0, 0, 0]] + + # for y_true_good, over thresholds: + # tp = [[2, 2, 1, 1, 0], [2, 2, 0, 0, 0]] + # fp = [[2, 1, 0, 0 , 0], [2, 0, 0 ,0, 0]] + # fn = [[0, 0, 1, 1, 2], [0, 0, 2, 2, 2]] + # tn = [[0, 1, 2, 2, 2], [0, 2, 2, 2, 2]] + + # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] + # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] + + # for y_true_bad: + # tp = [[2, 2, 1, 1, 0], [2, 0, 0, 0, 0]] + # fp = [[2, 1, 0, 0 , 0], [2, 2, 0 ,0, 0]] + # fn = [[0, 0, 1, 1, 2], [0, 2, 2, 2, 2]] + # tn = [[0, 1, 2, 2, 2], [0, 0, 2, 2, 2]] + + # tpr = [[1, 1, 0.5, 0.5, 0], [1, 0, 0, 0, 0]] + # fpr = [[1, 0.5, 0, 0, 0], [1, 1, 0, 0, 0]] + + # for y_true_good with sample_weights: + + # tp = [[7, 7, 4, 4, 0], [7, 7, 0, 0, 0]] + # fp = [[3, 2, 0, 0, 0], [3, 0, 0, 0, 0]] + # fn = [[0, 0, 3, 3, 7], [0, 0, 7, 7, 7]] + # tn = [[0, 1, 3, 3, 3], [0, 3, 3, 3, 3]] + + # tpr = [[1, 1, 0.57, 0.57, 0], [1, 1, 0, 0, 0]] + # fpr = [[1, 0.67, 0, 0, 0], [1, 0, 0, 0, 0]] + + def test_value_is_idempotent(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC(num_thresholds=5, multi_label=True) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + + # Run several updates. + update_op = auc_obj.update_state(self.y_true_good, self.y_pred) + for _ in range(10): + self.evaluate(update_op) + + # Then verify idempotency. + initial_auc = self.evaluate(auc_obj.result()) + for _ in range(10): + self.assertAllClose( + initial_auc, self.evaluate(auc_obj.result()), 1e-3 + ) + + def test_unweighted_all_correct(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC(multi_label=True) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_true_good) + self.assertEqual(self.evaluate(result), 1) + + def test_unweighted_all_correct_flat(self): + self.setup() + auc_obj = metrics.AUC(multi_label=False) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_true_good) + self.assertEqual(self.evaluate(result), 1) + + def test_unweighted(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, multi_label=True + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred) + + # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] + # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] + expected_result = (0.875 + 1.0) / 2.0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_unweighted_from_logits(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, + multi_label=True, + from_logits=True, + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred_logits) + + # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] + # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] + expected_result = (0.875 + 1.0) / 2.0 + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_sample_weight_flat(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, multi_label=False + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true_good, self.y_pred, sample_weight=[1, 2, 3, 4] + ) + + # tpr = [1, 1, 0.2857, 0.2857, 0] + # fpr = [1, 0.3333, 0, 0, 0] + expected_result = 1.0 - (0.3333 * (1.0 - 0.2857) / 2.0) + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_full_sample_weight_flat(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, multi_label=False + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + sw = np.arange(4 * 2) + sw = sw.reshape(4, 2) + result = auc_obj(self.y_true_good, self.y_pred, sample_weight=sw) + + # tpr = [1, 1, 0.2727, 0.2727, 0] + # fpr = [1, 0.3333, 0, 0, 0] + expected_result = 1.0 - (0.3333 * (1.0 - 0.2727) / 2.0) + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_label_weights(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, + multi_label=True, + label_weights=[0.75, 0.25], + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred) + + # tpr = [[1, 1, 0.5, 0.5, 0], [1, 1, 0, 0, 0]] + # fpr = [[1, 0.5, 0, 0, 0], [1, 0, 0, 0, 0]] + expected_result = (0.875 * 0.75 + 1.0 * 0.25) / (0.75 + 0.25) + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_label_weights_flat(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, + multi_label=False, + label_weights=[0.75, 0.25], + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred) + + # tpr = [1, 1, 0.375, 0.375, 0] + # fpr = [1, 0.375, 0, 0, 0] + expected_result = 1.0 - ((1.0 - 0.375) * 0.375 / 2.0) + self.assertAllClose(self.evaluate(result), expected_result, 1e-2) + + def test_unweighted_flat(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, multi_label=False + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred) + + # tp = [4, 4, 1, 1, 0] + # fp = [4, 1, 0, 0, 0] + # fn = [0, 0, 3, 3, 4] + # tn = [0, 3, 4, 4, 4] + + # tpr = [1, 1, 0.25, 0.25, 0] + # fpr = [1, 0.25, 0, 0, 0] + expected_result = 1.0 - (3.0 / 32.0) + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_unweighted_flat_from_logits(self): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, + multi_label=False, + from_logits=True, + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred_logits) + + # tp = [4, 4, 1, 1, 0] + # fp = [4, 1, 0, 0, 0] + # fn = [0, 0, 3, 3, 4] + # tn = [0, 3, 4, 4, 4] + + # tpr = [1, 1, 0.25, 0.25, 0] + # fpr = [1, 0.25, 0, 0, 0] + expected_result = 1.0 - (3.0 / 32.0) + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_manual_thresholds(self): + with self.test_session(): + self.setup() + # Verify that when specified, thresholds are used instead of + # num_thresholds. + auc_obj = metrics.AUC( + num_thresholds=2, thresholds=[0.5], multi_label=True + ) + self.assertEqual(auc_obj.num_thresholds, 3) + self.assertAllClose(auc_obj.thresholds, [0.0, 0.5, 1.0]) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj(self.y_true_good, self.y_pred) + + # tp = [[2, 1, 0], [2, 0, 0]] + # fp = [2, 0, 0], [2, 0, 0]] + # fn = [[0, 1, 2], [0, 2, 2]] + # tn = [[0, 2, 2], [0, 2, 2]] + + # tpr = [[1, 0.5, 0], [1, 0, 0]] + # fpr = [[1, 0, 0], [1, 0, 0]] + + # auc by slice = [0.75, 0.5] + expected_result = (0.75 + 0.5) / 2.0 + + self.assertAllClose(self.evaluate(result), expected_result, 1e-3) + + def test_weighted_roc_interpolation(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, multi_label=True + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + result = auc_obj( + self.y_true_good, self.y_pred, sample_weight=self.sample_weight + ) + + # tpr = [[1, 1, 0.57, 0.57, 0], [1, 1, 0, 0, 0]] + # fpr = [[1, 0.67, 0, 0, 0], [1, 0, 0, 0, 0]] + expected_result = 1.0 - 0.5 * 0.43 * 0.67 + self.assertAllClose(self.evaluate(result), expected_result, 1e-1) + + def test_pr_interpolation_unweighted(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, curve="PR", multi_label=True + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + good_result = auc_obj(self.y_true_good, self.y_pred) + with self.subTest(name="good"): + # PR AUCs are 0.917 and 1.0 respectively + self.assertAllClose( + self.evaluate(good_result), (0.91667 + 1.0) / 2.0, 1e-1 + ) + bad_result = auc_obj(self.y_true_bad, self.y_pred) + with self.subTest(name="bad"): + # PR AUCs are 0.917 and 0.5 respectively + self.assertAllClose( + self.evaluate(bad_result), (0.91667 + 0.5) / 2.0, 1e-1 + ) + + def test_pr_interpolation(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, curve="PR", multi_label=True + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + good_result = auc_obj( + self.y_true_good, self.y_pred, sample_weight=self.sample_weight + ) + # PR AUCs are 0.939 and 1.0 respectively + self.assertAllClose( + self.evaluate(good_result), (0.939 + 1.0) / 2.0, 1e-1 + ) + + def test_keras_model_compiles(self): + inputs = layers.Input(shape=(10,)) + output = layers.Dense(3, activation="sigmoid")(inputs) + model = models.Model(inputs=inputs, outputs=output) + model.compile( + loss="binary_crossentropy", metrics=[metrics.AUC(multi_label=True)] + ) + + def test_reset_state(self): + with self.test_session(): + self.setup() + auc_obj = metrics.AUC( + num_thresholds=self.num_thresholds, multi_label=True + ) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + auc_obj(self.y_true_good, self.y_pred) + auc_obj.reset_state() + self.assertAllEqual(auc_obj.true_positives, np.zeros((5, 2))) + + +@test_combinations.generate(test_combinations.combine(mode=["eager"])) +class ThresholdsTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.parameters( + [ + metrics.TruePositives(), + metrics.TrueNegatives(), + metrics.FalsePositives(), + metrics.FalseNegatives(), + metrics.Precision(), + metrics.Recall(), + metrics.SensitivityAtSpecificity(0.5), + metrics.SpecificityAtSensitivity(0.5), + metrics.PrecisionAtRecall(0.5), + metrics.RecallAtPrecision(0.5), + metrics.AUC(), + ] + ) + def test_with_default_thresholds(self, metric_obj): + # By default, the thresholds will be evenly distributed if there are + # more than 1. In case there is only 1 thresholds, then we expect + # _thresholds_distributed_evenly to be false. + expected = len(metric_obj.thresholds) > 1 + self.assertEqual(metric_obj._thresholds_distributed_evenly, expected) + + @parameterized.parameters( + [ + metrics.TruePositives, + metrics.TrueNegatives, + metrics.FalsePositives, + metrics.FalseNegatives, + metrics.Precision, + metrics.Recall, + ] + ) + def test_with_manual_thresholds(self, metric_cls): + even_thresholds = [0.0, 0.25, 0.5, 0.75, 1.0] + metric_obj = metric_cls(thresholds=even_thresholds) + self.assertTrue(metric_obj._thresholds_distributed_evenly) + + uneven_thresholds = [0.0, 0.45, 1.0] + metric_obj = metric_cls(thresholds=uneven_thresholds) + self.assertFalse(metric_obj._thresholds_distributed_evenly) + + def test_manual_thresholds_auc(self): + # The AUC metric handles manual thresholds input differently (it will + # add 0.0 and 1.0 for user). + even_thresholds = [0.25, 0.5, 0.75] + auc = metrics.AUC(thresholds=even_thresholds) + self.assertTrue(auc._thresholds_distributed_evenly) + + # Test for save model + cloned = metrics.AUC.from_config(auc.get_config()) + self.assertTrue(cloned._thresholds_distributed_evenly) + + uneven_thresholds = [ + 0.45, + ] + auc = metrics.AUC(thresholds=uneven_thresholds) + self.assertFalse(auc._thresholds_distributed_evenly) + + cloned = metrics.AUC.from_config(auc.get_config()) + self.assertFalse(cloned._thresholds_distributed_evenly) + + @parameterized.parameters( + [ + metrics.TruePositives, + metrics.TrueNegatives, + metrics.FalsePositives, + metrics.FalseNegatives, + metrics.Precision, + metrics.Recall, + metrics.AUC, + ] + ) + def test_even_thresholds_correctness(self, metric_cls): + with tf.compat.forward_compatibility_horizon(2021, 6, 9): + # make sure the old approach and new approach produce same result + # for evenly distributed thresholds + y_true = np.random.randint(2, size=(10,)) + y_pred = np.random.rand(10) + + even_thresholds = [0.0, 0.25, 0.5, 0.75, 1.0] + if metric_cls == metrics.AUC: + even_thresholds = even_thresholds[1:-1] + metric_obj = metric_cls(thresholds=even_thresholds) + metric_obj.update_state(y_true, y_pred) + result1 = metric_obj.result() + + metric_obj2 = metric_cls(thresholds=even_thresholds) + # Force to use the old approach + metric_obj2._thresholds_distributed_evenly = False + metric_obj2.update_state(y_true, y_pred) + result2 = metric_obj2.result() + + self.assertAllClose(result1, result2) + # Check all the variables are the same, eg tp, tn, fp, fn + for v1, v2 in zip(metric_obj.variables, metric_obj2.variables): + self.assertAllClose(v1, v2) + + @parameterized.parameters( + [ + metrics.SensitivityAtSpecificity, + metrics.SpecificityAtSensitivity, + metrics.PrecisionAtRecall, + metrics.RecallAtPrecision, + ] + ) + def test_even_thresholds_correctness_2(self, metric_cls): + with tf.compat.forward_compatibility_horizon(2021, 6, 9): + y_true = np.random.randint(2, size=(10,)) + y_pred = np.random.rand(10) + + metric_obj = metric_cls(0.5) + metric_obj.update_state(y_true, y_pred) + result1 = metric_obj.result() + + metric_obj2 = metric_cls(0.5) + # Force to use the old approach + metric_obj2._thresholds_distributed_evenly = False + metric_obj2.update_state(y_true, y_pred) + result2 = metric_obj2.result() + + self.assertAllClose(result1, result2) + # Check all the variables are the same, eg tp, tn, fp, fn + for v1, v2 in zip(metric_obj.variables, metric_obj2.variables): + self.assertAllClose(v1, v2) + + +class BinaryTruePositives(metrics.Metric): + def __init__(self, name="binary_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name="tp", initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) + values = tf.cast(values, self.dtype) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, dtype=self.dtype) + sample_weight = tf.__internal__.ops.broadcast_weights( + sample_weight, values + ) + values = tf.multiply(values, sample_weight) + self.true_positives.assign_add(tf.reduce_sum(values)) + + def result(self): + return self.true_positives + + +class BinaryTruePositivesViaControlFlow(metrics.Metric): + def __init__(self, name="binary_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name="tp", initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + for i in range(len(y_true)): + for j in range(len(y_true[i])): + if y_true[i][j] and y_pred[i][j]: + if sample_weight is None: + self.true_positives.assign_add(1) + else: + self.true_positives.assign_add(sample_weight[i][0]) + + def result(self): + if tf.constant(True): + return self.true_positives + return 0.0 + + +def _get_model(compile_metrics): + model_layers = [ + layers.Dense(3, activation="relu", kernel_initializer="ones"), + layers.Dense(1, activation="sigmoid", kernel_initializer="ones"), + ] + + model = test_utils.get_model_from_layers(model_layers, input_shape=(4,)) + model.compile( + loss="mae", + metrics=compile_metrics, + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + +@test_combinations.run_with_all_model_types +@test_combinations.run_all_keras_modes +class ResetStatesTest(test_combinations.TestCase): + def test_reset_state_false_positives(self): + fp_obj = metrics.FalsePositives() + model = _get_model([fp_obj]) + x = np.ones((100, 4)) + y = np.zeros((100, 1)) + model.evaluate(x, y) + self.assertEqual(self.evaluate(fp_obj.accumulator), 100.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(fp_obj.accumulator), 100.0) + + def test_reset_state_false_negatives(self): + fn_obj = metrics.FalseNegatives() + model = _get_model([fn_obj]) + x = np.zeros((100, 4)) + y = np.ones((100, 1)) + model.evaluate(x, y) + self.assertEqual(self.evaluate(fn_obj.accumulator), 100.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(fn_obj.accumulator), 100.0) + + def test_reset_state_true_negatives(self): + tn_obj = metrics.TrueNegatives() + model = _get_model([tn_obj]) + x = np.zeros((100, 4)) + y = np.zeros((100, 1)) + model.evaluate(x, y) + self.assertEqual(self.evaluate(tn_obj.accumulator), 100.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(tn_obj.accumulator), 100.0) + + def test_reset_state_true_positives(self): + tp_obj = metrics.TruePositives() + model = _get_model([tp_obj]) + x = np.ones((100, 4)) + y = np.ones((100, 1)) + model.evaluate(x, y) + self.assertEqual(self.evaluate(tp_obj.accumulator), 100.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(tp_obj.accumulator), 100.0) + + def test_reset_state_precision(self): + p_obj = metrics.Precision() + model = _get_model([p_obj]) + x = np.concatenate((np.ones((50, 4)), np.ones((50, 4)))) + y = np.concatenate((np.ones((50, 1)), np.zeros((50, 1)))) + model.evaluate(x, y) + self.assertEqual(self.evaluate(p_obj.true_positives), 50.0) + self.assertEqual(self.evaluate(p_obj.false_positives), 50.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(p_obj.true_positives), 50.0) + self.assertEqual(self.evaluate(p_obj.false_positives), 50.0) + + def test_precision_update_state_with_logits(self): + p_obj = metrics.Precision() + # Update state with logits (not in range (0, 1)) should not an raise + # error. + p_obj.update_state([-0.5, 0.5], [-2.0, 2.0]) + + def test_reset_state_recall(self): + r_obj = metrics.Recall() + model = _get_model([r_obj]) + x = np.concatenate((np.ones((50, 4)), np.zeros((50, 4)))) + y = np.concatenate((np.ones((50, 1)), np.ones((50, 1)))) + model.evaluate(x, y) + self.assertEqual(self.evaluate(r_obj.true_positives), 50.0) + self.assertEqual(self.evaluate(r_obj.false_negatives), 50.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(r_obj.true_positives), 50.0) + self.assertEqual(self.evaluate(r_obj.false_negatives), 50.0) + + def test_reset_state_sensitivity_at_specificity(self): + s_obj = metrics.SensitivityAtSpecificity(0.5, num_thresholds=1) + model = _get_model([s_obj]) + x = np.concatenate( + ( + np.ones((25, 4)), + np.zeros((25, 4)), + np.zeros((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(s_obj.true_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_negatives), 25.0) + self.assertEqual(self.evaluate(s_obj.true_negatives), 25.0) + + def test_reset_state_specificity_at_sensitivity(self): + s_obj = metrics.SpecificityAtSensitivity(0.5, num_thresholds=1) + model = _get_model([s_obj]) + x = np.concatenate( + ( + np.ones((25, 4)), + np.zeros((25, 4)), + np.zeros((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(s_obj.true_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_negatives), 25.0) + self.assertEqual(self.evaluate(s_obj.true_negatives), 25.0) + + def test_reset_state_precision_at_recall(self): + s_obj = metrics.PrecisionAtRecall(recall=0.5, num_thresholds=1) + model = _get_model([s_obj]) + x = np.concatenate( + ( + np.ones((25, 4)), + np.zeros((25, 4)), + np.zeros((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(s_obj.true_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_negatives), 25.0) + self.assertEqual(self.evaluate(s_obj.true_negatives), 25.0) + + def test_reset_state_recall_at_precision(self): + s_obj = metrics.RecallAtPrecision(precision=0.5, num_thresholds=1) + model = _get_model([s_obj]) + x = np.concatenate( + ( + np.ones((25, 4)), + np.zeros((25, 4)), + np.zeros((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(s_obj.true_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_positives), 25.0) + self.assertEqual(self.evaluate(s_obj.false_negatives), 25.0) + self.assertEqual(self.evaluate(s_obj.true_negatives), 25.0) + + def test_reset_state_auc(self): + auc_obj = metrics.AUC(num_thresholds=3) + model = _get_model([auc_obj]) + x = np.concatenate( + ( + np.ones((25, 4)), + np.zeros((25, 4)), + np.zeros((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(auc_obj.true_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.false_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.false_negatives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.true_negatives[1]), 25.0) + + def test_reset_state_auc_from_logits(self): + auc_obj = metrics.AUC(num_thresholds=3, from_logits=True) + + model_layers = [ + layers.Dense(1, kernel_initializer="ones", use_bias=False) + ] + model = test_utils.get_model_from_layers(model_layers, input_shape=(4,)) + model.compile( + loss="mae", + metrics=[auc_obj], + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.concatenate( + ( + np.ones((25, 4)), + -np.ones((25, 4)), + -np.ones((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(auc_obj.true_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.false_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.false_negatives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.true_negatives[1]), 25.0) + + def test_reset_state_auc_manual_thresholds(self): + auc_obj = metrics.AUC(thresholds=[0.5]) + model = _get_model([auc_obj]) + x = np.concatenate( + ( + np.ones((25, 4)), + np.zeros((25, 4)), + np.zeros((25, 4)), + np.ones((25, 4)), + ) + ) + y = np.concatenate( + ( + np.ones((25, 1)), + np.zeros((25, 1)), + np.ones((25, 1)), + np.zeros((25, 1)), + ) + ) + + for _ in range(2): + model.evaluate(x, y) + self.assertEqual(self.evaluate(auc_obj.true_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.false_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.false_negatives[1]), 25.0) + self.assertEqual(self.evaluate(auc_obj.true_negatives[1]), 25.0) + + def test_reset_state_mean_iou(self): + m_obj = metrics.MeanIoU(num_classes=2) + model = _get_model([m_obj]) + x = np.asarray( + [[0, 0, 0, 0], [1, 1, 1, 1], [1, 0, 1, 0], [0, 1, 0, 1]], + dtype=np.float32, + ) + y = np.asarray([[0], [1], [1], [1]], dtype=np.float32) + model.evaluate(x, y) + self.assertArrayNear(self.evaluate(m_obj.total_cm)[0], [1, 0], 1e-1) + self.assertArrayNear(self.evaluate(m_obj.total_cm)[1], [3, 0], 1e-1) + model.evaluate(x, y) + self.assertArrayNear(self.evaluate(m_obj.total_cm)[0], [1, 0], 1e-1) + self.assertArrayNear(self.evaluate(m_obj.total_cm)[1], [3, 0], 1e-1) + + def test_reset_state_recall_float64(self): + # Test case for GitHub issue 36790. + try: + backend.set_floatx("float64") + r_obj = metrics.Recall() + model = _get_model([r_obj]) + x = np.concatenate((np.ones((50, 4)), np.zeros((50, 4)))) + y = np.concatenate((np.ones((50, 1)), np.ones((50, 1)))) + model.evaluate(x, y) + self.assertEqual(self.evaluate(r_obj.true_positives), 50.0) + self.assertEqual(self.evaluate(r_obj.false_negatives), 50.0) + model.evaluate(x, y) + self.assertEqual(self.evaluate(r_obj.true_positives), 50.0) + self.assertEqual(self.evaluate(r_obj.false_negatives), 50.0) + finally: + backend.set_floatx("float32") + + def test_function_wrapped_reset_state(self): + m = metrics.Mean(name="my_mean") + + # check reset_state in function. + @tf.function + def reset_in_fn(): + m.reset_state() + m.update_state(100) + + for _ in range(5): + reset_in_fn() + if not tf.executing_eagerly(): + self.evaluate( + tf.compat.v1.get_default_graph().get_operations()[-1] + ) + self.assertEqual(self.evaluate(m.count), 1) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MergeStateTest(test_combinations.TestCase): + def test_merge_state_incompatible_metrics(self): + with self.assertRaisesRegex( + ValueError, "Metric .* is not compatible with .*" + ): + obj1 = metrics.FalsePositives() + self.evaluate(tf.compat.v1.variables_initializer(obj1.variables)) + obj2 = metrics.Accuracy() + self.evaluate(tf.compat.v1.variables_initializer(obj2.variables)) + self.evaluate(obj1.merge_state([obj2])) + + def test_merge_state_accuracy(self): + a_objs = [] + for y_true, y_pred in zip( + [[[1], [2]], [[3], [4]]], [[[0], [2]], [[3], [4]]] + ): + a_obj = metrics.Accuracy() + a_objs.append(a_obj) + self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) + self.evaluate(a_obj.update_state(y_true, y_pred)) + self.evaluate(a_objs[0].merge_state(a_objs[1:])) + self.assertEqual(self.evaluate(a_objs[0].total), 3.0) + self.assertEqual(self.evaluate(a_objs[0].count), 4.0) + self.assertEqual(self.evaluate(a_objs[0].result()), 0.75) + + def test_merge_state_false_positives(self): + fp_objs = [] + for _ in range(4): + fp_obj = metrics.FalsePositives() + fp_objs.append(fp_obj) + self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) + y_true = np.zeros((25, 1)) + y_pred = np.ones((25, 1)) + self.evaluate(fp_obj.update_state(y_true, y_pred)) + self.evaluate(fp_objs[0].merge_state(fp_objs[1:])) + self.assertEqual(self.evaluate(fp_objs[0].accumulator), 100.0) + + def test_merge_state_false_negatives(self): + fn_objs = [] + for _ in range(4): + fn_obj = metrics.FalseNegatives() + fn_objs.append(fn_obj) + self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) + y_true = np.ones((25, 1)) + y_pred = np.zeros((25, 1)) + self.evaluate(fn_obj.update_state(y_true, y_pred)) + self.evaluate(fn_objs[0].merge_state(fn_objs[1:])) + self.assertEqual(self.evaluate(fn_objs[0].accumulator), 100.0) + + def test_merge_state_true_negatives(self): + tn_objs = [] + for _ in range(4): + tn_obj = metrics.TrueNegatives() + tn_objs.append(tn_obj) + self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) + y_true = np.zeros((25, 1)) + y_pred = np.zeros((25, 1)) + self.evaluate(tn_obj.update_state(y_true, y_pred)) + self.evaluate(tn_objs[0].merge_state(tn_objs[1:])) + self.assertEqual(self.evaluate(tn_objs[0].accumulator), 100.0) + + def test_merge_state_true_positives(self): + tp_objs = [] + for _ in range(4): + tp_obj = metrics.TruePositives() + tp_objs.append(tp_obj) + self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) + y_true = np.ones((25, 1)) + y_pred = np.ones((25, 1)) + self.evaluate(tp_obj.update_state(y_true, y_pred)) + self.evaluate(tp_objs[0].merge_state(tp_objs[1:])) + self.assertEqual(self.evaluate(tp_objs[0].accumulator), 100.0) + + def test_merge_state_precision(self): + p_objs = [] + for _ in range(5): + p_obj = metrics.Precision() + p_objs.append(p_obj) + self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) + y_true = np.concatenate((np.ones((10, 1)), np.zeros((10, 1)))) + y_pred = np.concatenate((np.ones((10, 1)), np.ones((10, 1)))) + self.evaluate(p_obj.update_state(y_true, y_pred)) + self.evaluate(p_objs[0].merge_state(p_objs[1:])) + self.assertEqual(self.evaluate(p_objs[0].true_positives), 50.0) + self.assertEqual(self.evaluate(p_objs[0].false_positives), 50.0) + + def test_merge_state_recall(self): + r_objs = [] + for _ in range(5): + r_obj = metrics.Recall() + r_objs.append(r_obj) + self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) + y_true = np.concatenate((np.ones((10, 1)), np.ones((10, 1)))) + y_pred = np.concatenate((np.ones((10, 1)), np.zeros((10, 1)))) + self.evaluate(r_obj.update_state(y_true, y_pred)) + self.evaluate(r_objs[0].merge_state(r_objs[1:])) + self.assertEqual(self.evaluate(r_objs[0].true_positives), 50.0) + self.assertEqual(self.evaluate(r_objs[0].false_negatives), 50.0) + + def test_merge_state_sensitivity_at_specificity(self): + sas_objs = [] + for _ in range(5): + sas_obj = metrics.SensitivityAtSpecificity(0.5, num_thresholds=1) + sas_objs.append(sas_obj) + self.evaluate(tf.compat.v1.variables_initializer(sas_obj.variables)) + y_true = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + np.zeros((5, 1)), + ) + ) + y_pred = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + ) + ) + self.evaluate(sas_obj.update_state(y_true, y_pred)) + self.evaluate(sas_objs[0].merge_state(sas_objs[1:])) + self.assertEqual(self.evaluate(sas_objs[0].true_positives), 25.0) + self.assertEqual(self.evaluate(sas_objs[0].false_positives), 25.0) + self.assertEqual(self.evaluate(sas_objs[0].false_negatives), 25.0) + self.assertEqual(self.evaluate(sas_objs[0].true_negatives), 25.0) + + def test_merge_state_specificity_at_sensitivity(self): + sas_objs = [] + for _ in range(5): + sas_obj = metrics.SpecificityAtSensitivity(0.5, num_thresholds=1) + sas_objs.append(sas_obj) + self.evaluate(tf.compat.v1.variables_initializer(sas_obj.variables)) + y_true = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + np.zeros((5, 1)), + ) + ) + y_pred = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + ) + ) + self.evaluate(sas_obj.update_state(y_true, y_pred)) + self.evaluate(sas_objs[0].merge_state(sas_objs[1:])) + self.assertEqual(self.evaluate(sas_objs[0].true_positives), 25.0) + self.assertEqual(self.evaluate(sas_objs[0].false_positives), 25.0) + self.assertEqual(self.evaluate(sas_objs[0].false_negatives), 25.0) + self.assertEqual(self.evaluate(sas_objs[0].true_negatives), 25.0) + + def test_merge_state_precision_at_recall(self): + par_objs = [] + for _ in range(5): + par_obj = metrics.PrecisionAtRecall(recall=0.5, num_thresholds=1) + par_objs.append(par_obj) + self.evaluate(tf.compat.v1.variables_initializer(par_obj.variables)) + y_true = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + np.zeros((5, 1)), + ) + ) + y_pred = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + ) + ) + self.evaluate(par_obj.update_state(y_true, y_pred)) + self.evaluate(par_objs[0].merge_state(par_objs[1:])) + self.assertEqual(self.evaluate(par_objs[0].true_positives), 25.0) + self.assertEqual(self.evaluate(par_objs[0].false_positives), 25.0) + self.assertEqual(self.evaluate(par_objs[0].false_negatives), 25.0) + self.assertEqual(self.evaluate(par_objs[0].true_negatives), 25.0) + + def test_merge_state_recall_at_precision(self): + rap_objs = [] + for _ in range(5): + rap_obj = metrics.PrecisionAtRecall(recall=0.5, num_thresholds=1) + rap_objs.append(rap_obj) + self.evaluate(tf.compat.v1.variables_initializer(rap_obj.variables)) + y_true = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + np.zeros((5, 1)), + ) + ) + y_pred = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + ) + ) + self.evaluate(rap_obj.update_state(y_true, y_pred)) + self.evaluate(rap_objs[0].merge_state(rap_objs[1:])) + self.assertEqual(self.evaluate(rap_objs[0].true_positives), 25.0) + self.assertEqual(self.evaluate(rap_objs[0].false_positives), 25.0) + self.assertEqual(self.evaluate(rap_objs[0].false_negatives), 25.0) + self.assertEqual(self.evaluate(rap_objs[0].true_negatives), 25.0) + + def test_merge_state_auc(self): + auc_objs = [] + for _ in range(5): + auc_obj = metrics.AUC(num_thresholds=3) + auc_objs.append(auc_obj) + self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) + y_true = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + np.zeros((5, 1)), + ) + ) + y_pred = np.concatenate( + ( + np.ones((5, 1)), + np.zeros((5, 1)), + np.zeros((5, 1)), + np.ones((5, 1)), + ) + ) + self.evaluate(auc_obj.update_state(y_true, y_pred)) + self.evaluate(auc_objs[0].merge_state(auc_objs[1:])) + self.assertEqual(self.evaluate(auc_objs[0].true_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_objs[0].false_positives[1]), 25.0) + self.assertEqual(self.evaluate(auc_objs[0].false_negatives[1]), 25.0) + self.assertEqual(self.evaluate(auc_objs[0].true_negatives[1]), 25.0) + + def test_merge_state_mean_iou(self): + m_objs = [] + for y_true, y_pred in zip( + [[0], [1], [1], [1]], [[0.5], [1.0], [1.0], [1.0]] + ): + m_obj = metrics.MeanIoU(num_classes=2) + m_objs.append(m_obj) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + self.evaluate(m_obj.update_state(y_true, y_pred)) + self.evaluate(m_objs[0].merge_state(m_objs[1:])) + self.assertArrayNear(self.evaluate(m_objs[0].total_cm)[0], [1, 0], 1e-1) + self.assertArrayNear(self.evaluate(m_objs[0].total_cm)[1], [0, 3], 1e-1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/f_score_metrics.py b/keras/metrics/f_score_metrics.py new file mode 100644 index 000000000000..3e59a0de0063 --- /dev/null +++ b/keras/metrics/f_score_metrics.py @@ -0,0 +1,323 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""F-Score metrics.""" + +import tensorflow.compat.v2 as tf +from tensorflow.python.util.tf_export import keras_export + +from keras.dtensor import utils as dtensor_utils +from keras.metrics import base_metric + + +# Adapted from TF-Addons implementation. +@keras_export("keras.metrics.FBetaScore") +class FBetaScore(base_metric.Metric): + """Computes F-Beta score. + + This is the weighted harmonic mean of precision and recall. + Its output range is `[0, 1]`. It works for both multi-class + and multi-label classification. + + It is defined as: + + ```python + b2 = beta ** 2 + f_beta_score = (1 + b2) * (precision * recall) / (precision * b2 + recall) + ``` + + Args: + average: Type of averaging to be performed across per-class results + in the multi-class case. + Acceptable values are `None`, `"micro"`, `"macro"` and + `"weighted"`. Default value is `None`. + If `None`, no averaging is performed and `result()` will return + the score for each class. + If `"micro"`, compute metrics globally by counting the total + true positives, false negatives and false positives. + If `"macro"`, compute metrics for each label, + and return their unweighted mean. + This does not take label imbalance into account. + If `"weighted"`, compute metrics for each label, + and return their average weighted by support + (the number of true instances for each label). + This alters `"macro"` to account for label imbalance. + It can result in an score that is not between precision and recall. + beta: Determines the weight of given to recall + in the harmonic mean between precision and recall (see pseudocode + equation above). Default value is 1. + threshold: Elements of `y_pred` greater than `threshold` are + converted to be 1, and the rest 0. If `threshold` is + `None`, the argmax of `y_pred` is converted to 1, and the rest to 0. + name: Optional. String name of the metric instance. + dtype: Optional. Data type of the metric result. + + Returns: + F-Beta Score: float. + + Example: + + >>> metric = tf.keras.metrics.FBetaScore(beta=2.0, threshold=0.5) + >>> y_true = np.array([[1, 1, 1], + ... [1, 0, 0], + ... [1, 1, 0]], np.int32) + >>> y_pred = np.array([[0.2, 0.6, 0.7], + ... [0.2, 0.6, 0.6], + ... [0.6, 0.8, 0.0]], np.float32) + >>> metric.update_state(y_true, y_pred) + >>> result = metric.result() + >>> result.numpy() + array([0.3846154 , 0.90909094, 0.8333334 ], dtype=float32) + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + average=None, + beta=1.0, + threshold=None, + name="fbeta_score", + dtype=None, + ): + super().__init__(name=name, dtype=dtype) + + if average not in (None, "micro", "macro", "weighted"): + raise ValueError( + "Invalid `average` argument value. Expected one of: " + "{None, 'micro', 'macro', 'weighted'}. " + f"Received: average={average}" + ) + + if not isinstance(beta, float): + raise ValueError( + "Invalid `beta` argument value. " + "It should be a Python float. " + f"Received: beta={beta} of type '{type(beta)}'" + ) + if beta <= 0.0: + raise ValueError( + "Invalid `beta` argument value. " + "It should be > 0. " + f"Received: beta={beta}" + ) + + if threshold is not None: + if not isinstance(threshold, float): + raise ValueError( + "Invalid `threshold` argument value. " + "It should be a Python float. " + f"Received: threshold={threshold} " + f"of type '{type(threshold)}'" + ) + if threshold > 1.0 or threshold <= 0.0: + raise ValueError( + "Invalid `threshold` argument value. " + "It should verify 0 < threshold <= 1. " + f"Received: threshold={threshold}" + ) + + self.average = average + self.beta = beta + self.threshold = threshold + self.axis = None + self.built = False + + if self.average != "micro": + self.axis = 0 + + def build(self, y_true_shape, y_pred_shape): + if len(y_pred_shape) != 2 or len(y_true_shape) != 2: + raise ValueError( + "FBetaScore expects 2D inputs with shape " + "(batch_size, output_dim). Received input " + f"shapes: y_pred.shape={y_pred_shape} and " + f"y_true.shape={y_true_shape}." + ) + if y_pred_shape[-1] is None or y_true_shape[-1] is None: + raise ValueError( + "FBetaScore expects 2D inputs with shape " + "(batch_size, output_dim), with output_dim fully " + "defined (not None). Received input " + f"shapes: y_pred.shape={y_pred_shape} and " + f"y_true.shape={y_true_shape}." + ) + num_classes = y_pred_shape[-1] + if self.average != "micro": + init_shape = [num_classes] + else: + init_shape = [] + + def _add_zeros_weight(name): + return self.add_weight( + name, + shape=init_shape, + initializer="zeros", + dtype=self.dtype, + ) + + self.true_positives = _add_zeros_weight("true_positives") + self.false_positives = _add_zeros_weight("false_positives") + self.false_negatives = _add_zeros_weight("false_negatives") + self.intermediate_weights = _add_zeros_weight("intermediate_weights") + self.built = True + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.convert_to_tensor(y_true, dtype=self.dtype) + y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype) + if not self.built: + self.build(y_true.shape, y_pred.shape) + + if self.threshold is None: + threshold = tf.reduce_max(y_pred, axis=-1, keepdims=True) + # make sure [0, 0, 0] doesn't become [1, 1, 1] + # Use abs(x) > eps, instead of x != 0 to check for zero + y_pred = tf.logical_and(y_pred >= threshold, tf.abs(y_pred) > 1e-9) + else: + y_pred = y_pred > self.threshold + y_pred = tf.cast(y_pred, dtype=self.dtype) + + def _weighted_sum(val, sample_weight): + if sample_weight is not None: + val = tf.math.multiply(val, tf.expand_dims(sample_weight, 1)) + return tf.reduce_sum(val, axis=self.axis) + + self.true_positives.assign_add( + _weighted_sum(y_pred * y_true, sample_weight) + ) + self.false_positives.assign_add( + _weighted_sum(y_pred * (1 - y_true), sample_weight) + ) + self.false_negatives.assign_add( + _weighted_sum((1 - y_pred) * y_true, sample_weight) + ) + self.intermediate_weights.assign_add( + _weighted_sum(y_true, sample_weight) + ) + + def result(self): + precision = tf.math.divide_no_nan( + self.true_positives, self.true_positives + self.false_positives + ) + recall = tf.math.divide_no_nan( + self.true_positives, self.true_positives + self.false_negatives + ) + + mul_value = precision * recall + add_value = (tf.math.square(self.beta) * precision) + recall + mean = tf.math.divide_no_nan(mul_value, add_value) + f1_score = mean * (1 + tf.math.square(self.beta)) + + if self.average == "weighted": + weights = tf.math.divide_no_nan( + self.intermediate_weights, + tf.reduce_sum(self.intermediate_weights), + ) + f1_score = tf.reduce_sum(f1_score * weights) + + elif self.average is not None: # [micro, macro] + f1_score = tf.reduce_mean(f1_score) + + return f1_score + + def get_config(self): + """Returns the serializable config of the metric.""" + + config = { + "average": self.average, + "beta": self.beta, + "threshold": self.threshold, + } + + base_config = super().get_config() + return {**base_config, **config} + + def reset_state(self): + for v in self.variables: + v.assign(tf.zeros(v.shape, dtype=v.dtype)) + + +@keras_export("keras.metrics.F1Score") +class F1Score(FBetaScore): + r"""Computes F-1 Score. + + This is the harmonic mean of precision and recall. + Its output range is `[0, 1]`. It works for both multi-class + and multi-label classification. + + It is defined as: + + ```python + f1_score = 2 * (precision * recall) / (precision + recall) + ``` + + Args: + average: Type of averaging to be performed on data. + Acceptable values are `None`, `"micro"`, `"macro"` + and `"weighted"`. Default value is `None`. + If `None`, no averaging is performed and `result()` will return + the score for each class. + If `"micro"`, compute metrics globally by counting the total + true positives, false negatives and false positives. + If `"macro"`, compute metrics for each label, + and return their unweighted mean. + This does not take label imbalance into account. + If `"weighted"`, compute metrics for each label, + and return their average weighted by support + (the number of true instances for each label). + This alters `"macro"` to account for label imbalance. + It can result in an score that is not between precision and recall. + threshold: Elements of `y_pred` greater than `threshold` are + converted to be 1, and the rest 0. If `threshold` is + `None`, the argmax of `y_pred` is converted to 1, and the rest to 0. + name: Optional. String name of the metric instance. + dtype: Optional. Data type of the metric result. + + Returns: + F-1 Score: float. + + Example: + + >>> metric = tf.keras.metrics.F1Score(threshold=0.5) + >>> y_true = np.array([[1, 1, 1], + ... [1, 0, 0], + ... [1, 1, 0]], np.int32) + >>> y_pred = np.array([[0.2, 0.6, 0.7], + ... [0.2, 0.6, 0.6], + ... [0.6, 0.8, 0.0]], np.float32) + >>> metric.update_state(y_true, y_pred) + >>> result = metric.result() + >>> result.numpy() + array([0.5 , 0.8 , 0.6666667], dtype=float32) + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + average=None, + threshold=None, + name="f1_score", + dtype=None, + ): + super().__init__( + average=average, + beta=1.0, + threshold=threshold, + name=name, + dtype=dtype, + ) + + def get_config(self): + base_config = super().get_config() + del base_config["beta"] + return base_config diff --git a/keras/metrics/f_score_metrics_test.py b/keras/metrics/f_score_metrics_test.py new file mode 100644 index 000000000000..8854467ad8e5 --- /dev/null +++ b/keras/metrics/f_score_metrics_test.py @@ -0,0 +1,277 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for F-score metrics.""" + +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.metrics import f_score_metrics +from keras.testing_infra import test_utils + + +@test_utils.run_v2_only +class FBetaScoreTest(parameterized.TestCase, tf.test.TestCase): + def _run_test( + self, + y_true, + y_pred, + sample_weights, + average, + beta, + threshold, + reference_result, + ): + y_true = tf.constant(y_true, dtype="float32") + y_pred = tf.constant(y_pred, dtype="float32") + fbeta = f_score_metrics.FBetaScore(average, beta, threshold) + fbeta.update_state(y_true, y_pred, sample_weights) + result = fbeta.result().numpy() + self.assertAllClose(result, reference_result, atol=1e-6) + + def test_config(self): + fbeta_obj = f_score_metrics.FBetaScore( + beta=0.5, threshold=0.3, average=None + ) + self.assertEqual(fbeta_obj.beta, 0.5) + self.assertEqual(fbeta_obj.average, None) + self.assertEqual(fbeta_obj.threshold, 0.3) + self.assertEqual(fbeta_obj.dtype, tf.float32) + + # Check save and restore config + fbeta_obj2 = f_score_metrics.FBetaScore.from_config( + fbeta_obj.get_config() + ) + self.assertEqual(fbeta_obj2.beta, 0.5) + self.assertEqual(fbeta_obj2.average, None) + self.assertEqual(fbeta_obj2.threshold, 0.3) + self.assertEqual(fbeta_obj2.dtype, tf.float32) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + average=["micro", "macro", "weighted"], beta=[0.5, 1.0, 2.0] + ) + ) + def test_fbeta_perfect_score(self, average, beta): + y_true = [[1, 1, 1], [1, 0, 0], [1, 1, 0]] + y_pred = [[0.7, 0.7, 0.7], [1, 0, 0], [0.9, 0.8, 0]] + self._run_test( + y_true, + y_pred, + None, + average=average, + beta=beta, + threshold=0.66, + reference_result=1.0, + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + average=["micro", "macro", "weighted"], beta=[0.5, 1.0, 2.0] + ) + ) + def test_fbeta_worst_score(self, average, beta): + y_true = [[0, 0, 0], [0, 1, 0], [0, 0, 1]] + y_pred = [[0.7, 0.7, 0.7], [1, 0, 0], [0.9, 0.8, 0]] + self._run_test( + y_true, + y_pred, + None, + average=average, + beta=beta, + threshold=0.66, + reference_result=0.0, + ) + + @parameterized.parameters( + # average, beta, result + (None, 0.5, [0.71428573, 0.5, 0.833334]), + (None, 1.0, [0.8, 0.5, 0.6666667]), + (None, 2.0, [0.9090904, 0.5, 0.555556]), + ("micro", 0.5, 0.6666667), + ("micro", 1.0, 0.6666667), + ("micro", 2.0, 0.6666667), + ("macro", 0.5, 0.6825397), + ("macro", 1.0, 0.6555555), + ("macro", 2.0, 0.6548822), + ("weighted", 0.5, 0.6825397), + ("weighted", 1.0, 0.6555555), + ("weighted", 2.0, 0.6548822), + ) + def test_fbeta_random_score(self, average, beta, result): + y_pred = [[0.7, 0.7, 0.7], [1, 0, 0], [0.9, 0.8, 0]] + y_true = [[0, 0, 1], [1, 1, 0], [1, 1, 1]] + self._run_test( + y_true, + y_pred, + None, + average=average, + beta=beta, + threshold=0.66, + reference_result=result, + ) + + @parameterized.parameters( + # average, beta, result + (None, 0.5, [0.9090904, 0.555556, 1.0]), + (None, 1.0, [0.8, 0.6666667, 1.0]), + (None, 2.0, [0.71428573, 0.833334, 1.0]), + ("micro", 0.5, 0.833334), + ("micro", 1.0, 0.833334), + ("micro", 2.0, 0.833334), + ("macro", 0.5, 0.821549), + ("macro", 1.0, 0.822222), + ("macro", 2.0, 0.849206), + ("weighted", 0.5, 0.880471), + ("weighted", 1.0, 0.844445), + ("weighted", 2.0, 0.829365), + ) + def test_fbeta_random_score_none(self, average, beta, result): + y_true = [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [1, 0, 0], + [1, 0, 0], + [0, 0, 1], + ] + y_pred = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + self._run_test( + y_true, + y_pred, + None, + average=average, + beta=beta, + threshold=None, + reference_result=result, + ) + + @parameterized.parameters( + # average, beta, sample_weights, result + (None, 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.909091, 0.555556, 1.0]), + (None, 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], [1.0, 0.0, 1.0]), + (None, 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], [0.9375, 0.714286, 1.0]), + (None, 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.8, 0.666667, 1.0]), + (None, 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], [1.0, 0.0, 1.0]), + (None, 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], [0.857143, 0.8, 1.0]), + (None, 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.714286, 0.833333, 1.0]), + (None, 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], [1.0, 0.0, 1.0]), + (None, 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], [0.789474, 0.909091, 1.0]), + ("micro", 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.833333), + ("micro", 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("micro", 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.9), + ("micro", 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.833333), + ("micro", 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("micro", 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.9), + ("micro", 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.833333), + ("micro", 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("micro", 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.9), + ("macro", 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.821549), + ("macro", 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 0.666667), + ("macro", 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.883929), + ("macro", 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.822222), + ("macro", 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 0.666667), + ("macro", 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.885714), + ("macro", 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.849206), + ("macro", 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 0.666667), + ("macro", 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.899522), + ("weighted", 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.880471), + ("weighted", 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("weighted", 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.917857), + ("weighted", 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.844444), + ("weighted", 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("weighted", 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.902857), + ("weighted", 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.829365), + ("weighted", 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("weighted", 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.897608), + ) + def test_fbeta_weighted_random_score_none( + self, average, beta, sample_weights, result + ): + y_true = [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [1, 0, 0], + [1, 0, 0], + [0, 0, 1], + ] + y_pred = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + self._run_test( + y_true, + y_pred, + sample_weights, + average=average, + beta=beta, + threshold=None, + reference_result=result, + ) + + +@test_utils.run_v2_only +class F1ScoreTest(tf.test.TestCase): + def test_config(self): + f1_obj = f_score_metrics.F1Score() + config = f1_obj.get_config() + self.assertNotIn("beta", config) + + # Check save and restore config + f1_obj = f_score_metrics.F1Score.from_config(config) + self.assertEqual(f1_obj.average, None) + self.assertEqual(f1_obj.dtype, tf.float32) + + def test_correctness(self): + f1 = f_score_metrics.F1Score() + fbeta = f_score_metrics.FBetaScore(beta=1.0) + + y_true = [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [1, 0, 0], + [1, 0, 0], + [0, 0, 1], + ] + y_pred = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + + fbeta.update_state(y_true, y_pred) + f1.update_state(y_true, y_pred) + self.assertAllClose( + fbeta.result().numpy(), f1.result().numpy(), atol=1e-6 + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/hinge_metrics.py b/keras/metrics/hinge_metrics.py new file mode 100644 index 000000000000..ff49472c8f0d --- /dev/null +++ b/keras/metrics/hinge_metrics.py @@ -0,0 +1,136 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Hinge metrics.""" + +from keras.dtensor import utils as dtensor_utils +from keras.losses import categorical_hinge +from keras.losses import hinge +from keras.losses import squared_hinge +from keras.metrics import base_metric + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.metrics.Hinge") +class Hinge(base_metric.MeanMetricWrapper): + """Computes the hinge metric between `y_true` and `y_pred`. + + `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + provided we will convert them to -1 or 1. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.Hinge() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + >>> m.result().numpy() + 1.3 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 1.1 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', loss='mse', metrics=[tf.keras.metrics.Hinge()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="hinge", dtype=None): + super().__init__(hinge, name, dtype=dtype) + + +@keras_export("keras.metrics.SquaredHinge") +class SquaredHinge(base_metric.MeanMetricWrapper): + """Computes the squared hinge metric between `y_true` and `y_pred`. + + `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + provided we will convert them to -1 or 1. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.SquaredHinge() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + >>> m.result().numpy() + 1.86 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 1.46 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.SquaredHinge()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="squared_hinge", dtype=None): + super().__init__(squared_hinge, name, dtype=dtype) + + +@keras_export("keras.metrics.CategoricalHinge") +class CategoricalHinge(base_metric.MeanMetricWrapper): + """Computes the categorical hinge metric between `y_true` and `y_pred`. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.CategoricalHinge() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + >>> m.result().numpy() + 1.4000001 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 1.2 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.CategoricalHinge()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="categorical_hinge", dtype=None): + super().__init__(categorical_hinge, name, dtype=dtype) diff --git a/keras/metrics/hinge_metrics_test.py b/keras/metrics/hinge_metrics_test.py new file mode 100644 index 000000000000..d5b093142102 --- /dev/null +++ b/keras/metrics/hinge_metrics_test.py @@ -0,0 +1,193 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras metrics.""" + +import tensorflow.compat.v2 as tf + +from keras import metrics +from keras.testing_infra import test_combinations + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class HingeTest(tf.test.TestCase): + def test_config(self): + hinge_obj = metrics.Hinge(name="hinge", dtype=tf.int32) + self.assertEqual(hinge_obj.name, "hinge") + self.assertEqual(hinge_obj._dtype, tf.int32) + + # Check save and restore config + hinge_obj2 = metrics.Hinge.from_config(hinge_obj.get_config()) + self.assertEqual(hinge_obj2.name, "hinge") + self.assertEqual(hinge_obj2._dtype, tf.int32) + + def test_unweighted(self): + hinge_obj = metrics.Hinge() + self.evaluate(tf.compat.v1.variables_initializer(hinge_obj.variables)) + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # metric = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] + # = [0.6, 0.4125] + # reduced metric = (0.6 + 0.4125) / 2 + + update_op = hinge_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = hinge_obj.result() + self.assertAllClose(0.506, result, atol=1e-3) + + def test_weighted(self): + hinge_obj = metrics.Hinge() + self.evaluate(tf.compat.v1.variables_initializer(hinge_obj.variables)) + y_true = tf.constant([[-1, 1, -1, 1], [-1, -1, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + sample_weight = tf.constant([1.5, 2.0]) + + # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # metric = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] + # = [0.6, 0.4125] + # weighted metric = [0.6 * 1.5, 0.4125 * 2] + # reduced metric = (0.6 * 1.5 + 0.4125 * 2) / (1.5 + 2) + + result = hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(0.493, self.evaluate(result), atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SquaredHingeTest(tf.test.TestCase): + def test_config(self): + sq_hinge_obj = metrics.SquaredHinge(name="sq_hinge", dtype=tf.int32) + self.assertEqual(sq_hinge_obj.name, "sq_hinge") + self.assertEqual(sq_hinge_obj._dtype, tf.int32) + + # Check save and restore config + sq_hinge_obj2 = metrics.SquaredHinge.from_config( + sq_hinge_obj.get_config() + ) + self.assertEqual(sq_hinge_obj2.name, "sq_hinge") + self.assertEqual(sq_hinge_obj2._dtype, tf.int32) + + def test_unweighted(self): + sq_hinge_obj = metrics.SquaredHinge() + self.evaluate( + tf.compat.v1.variables_initializer(sq_hinge_obj.variables) + ) + y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + + # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, + # 0.4]] + # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], + # [0.5625, 0, 0.25, 0.16]] + # metric = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / + # 4] + # = [0.485, 0.2431] + # reduced metric = (0.485 + 0.2431) / 2 + + update_op = sq_hinge_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = sq_hinge_obj.result() + self.assertAllClose(0.364, result, atol=1e-3) + + def test_weighted(self): + sq_hinge_obj = metrics.SquaredHinge() + self.evaluate( + tf.compat.v1.variables_initializer(sq_hinge_obj.variables) + ) + y_true = tf.constant([[-1, 1, -1, 1], [-1, -1, 1, 1]]) + y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], [-0.25, -1.0, 0.5, 0.6]]) + sample_weight = tf.constant([1.5, 2.0]) + + # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 + + # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] + # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] + # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, + # 0.4]] + # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], + # [0.5625, 0, 0.25, 0.16]] + # metric = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / + # 4] + # = [0.485, 0.2431] + # weighted metric = [0.485 * 1.5, 0.2431 * 2] + # reduced metric = (0.485 * 1.5 + 0.2431 * 2) / (1.5 + 2) + + result = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(0.347, self.evaluate(result), atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class CategoricalHingeTest(tf.test.TestCase): + def test_config(self): + cat_hinge_obj = metrics.CategoricalHinge( + name="cat_hinge", dtype=tf.int32 + ) + self.assertEqual(cat_hinge_obj.name, "cat_hinge") + self.assertEqual(cat_hinge_obj._dtype, tf.int32) + + # Check save and restore config + cat_hinge_obj2 = metrics.CategoricalHinge.from_config( + cat_hinge_obj.get_config() + ) + self.assertEqual(cat_hinge_obj2.name, "cat_hinge") + self.assertEqual(cat_hinge_obj2._dtype, tf.int32) + + def test_unweighted(self): + cat_hinge_obj = metrics.CategoricalHinge() + self.evaluate( + tf.compat.v1.variables_initializer(cat_hinge_obj.variables) + ) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = cat_hinge_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = cat_hinge_obj.result() + self.assertAllClose(0.5, result, atol=1e-5) + + def test_weighted(self): + cat_hinge_obj = metrics.CategoricalHinge() + self.evaluate( + tf.compat.v1.variables_initializer(cat_hinge_obj.variables) + ) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(0.5, self.evaluate(result), atol=1e-5) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/iou_metrics.py b/keras/metrics/iou_metrics.py new file mode 100644 index 000000000000..377ef8858f96 --- /dev/null +++ b/keras/metrics/iou_metrics.py @@ -0,0 +1,759 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""IoU metrics.""" + +from typing import List +from typing import Optional +from typing import Tuple +from typing import Union + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.dtensor import utils as dtensor_utils +from keras.metrics import base_metric + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +class _IoUBase(base_metric.Metric): + """Computes the confusion matrix for Intersection-Over-Union metrics. + + Intersection-Over-Union is a common evaluation metric for semantic image + segmentation. + + For an individual class, the IoU metric is defined as follows: + + ``` + iou = true_positives / (true_positives + false_positives + false_negatives) + ``` + + From IoUs of individual classes, the MeanIoU can be computed as the mean of + the individual IoUs. + + To compute IoUs, the predictions are accumulated in a confusion matrix, + weighted by `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + num_classes: The possible number of labels the prediction task can have. + This value must be provided, since a confusion matrix of size + `(num_classes, num_classes)` will be allocated. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + ignore_class: Optional integer. The ID of a class to be ignored during + metric computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in segmentation + maps. By default (`ignore_class=None`), all classes are considered. + sparse_y_true: Whether labels are encoded using integers or + dense floating point vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + sparse_y_pred: Whether predictions are encoded using integers or + dense floating point vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + axis: (Optional) -1 is the dimension containing the logits. + Defaults to `-1`. + """ + + def __init__( + self, + num_classes: int, + name: Optional[str] = None, + dtype: Optional[Union[str, tf.dtypes.DType]] = None, + ignore_class: Optional[int] = None, + sparse_y_true: bool = True, + sparse_y_pred: bool = True, + axis: int = -1, + ): + super().__init__(name=name, dtype=dtype) + self.num_classes = num_classes + self.ignore_class = ignore_class + self.sparse_y_true = sparse_y_true + self.sparse_y_pred = sparse_y_pred + self.axis = axis + + # Variable to accumulate the predictions in the confusion matrix. + self.total_cm = self.add_weight( + "total_confusion_matrix", + shape=(num_classes, num_classes), + initializer="zeros", + ) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates the confusion matrix statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + + if not self.sparse_y_true: + y_true = tf.argmax(y_true, axis=self.axis) + if not self.sparse_y_pred: + y_pred = tf.argmax(y_pred, axis=self.axis) + + y_true = tf.cast(y_true, self._dtype) + y_pred = tf.cast(y_pred, self._dtype) + + # Flatten the input if its rank > 1. + if y_pred.shape.ndims > 1: + y_pred = tf.reshape(y_pred, [-1]) + + if y_true.shape.ndims > 1: + y_true = tf.reshape(y_true, [-1]) + + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self._dtype) + if sample_weight.shape.ndims > 1: + sample_weight = tf.reshape(sample_weight, [-1]) + + if self.ignore_class is not None: + ignore_class = tf.cast(self.ignore_class, y_true.dtype) + valid_mask = tf.not_equal(y_true, ignore_class) + y_true = y_true[valid_mask] + y_pred = y_pred[valid_mask] + if sample_weight is not None: + sample_weight = sample_weight[valid_mask] + + # Accumulate the prediction to current confusion matrix. + current_cm = tf.math.confusion_matrix( + y_true, + y_pred, + self.num_classes, + weights=sample_weight, + dtype=self._dtype, + ) + return self.total_cm.assign_add(current_cm) + + def reset_state(self): + backend.set_value( + self.total_cm, np.zeros((self.num_classes, self.num_classes)) + ) + + +@keras_export("keras.metrics.IoU") +class IoU(_IoUBase): + """Computes the Intersection-Over-Union metric for specific target classes. + + General definition and computation: + + Intersection-Over-Union is a common evaluation metric for semantic image + segmentation. + + For an individual class, the IoU metric is defined as follows: + + ``` + iou = true_positives / (true_positives + false_positives + false_negatives) + ``` + + To compute IoUs, the predictions are accumulated in a confusion matrix, + weighted by `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Note, this class first computes IoUs for all individual classes, then + returns the mean of IoUs for the classes that are specified by + `target_class_ids`. If `target_class_ids` has only one id value, the IoU of + that specific class is returned. + + Args: + num_classes: The possible number of labels the prediction task can have. + A confusion matrix of dimension = [num_classes, num_classes] will be + allocated to accumulate predictions from which the metric is calculated. + target_class_ids: A tuple or list of target class ids for which the metric + is returned. To compute IoU for a specific class, a list (or tuple) of a + single id value should be provided. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + ignore_class: Optional integer. The ID of a class to be ignored during + metric computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in segmentation + maps. By default (`ignore_class=None`), all classes are considered. + sparse_y_true: Whether labels are encoded using integers or + dense floating point vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + sparse_y_pred: Whether predictions are encoded using integers or + dense floating point vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + axis: (Optional) -1 is the dimension containing the logits. + Defaults to `-1`. + + Standalone usage: + + >>> # cm = [[1, 1], + >>> # [1, 1]] + >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + >>> # iou = true_positives / (sum_row + sum_col - true_positives)) + >>> # iou = [0.33, 0.33] + >>> m = tf.keras.metrics.IoU(num_classes=2, target_class_ids=[0]) + >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) + >>> m.result().numpy() + 0.33333334 + + >>> m.reset_state() + >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1], + ... sample_weight=[0.3, 0.3, 0.3, 0.1]) + >>> # cm = [[0.3, 0.3], + >>> # [0.3, 0.1]] + >>> # sum_row = [0.6, 0.4], sum_col = [0.6, 0.4], + >>> # true_positives = [0.3, 0.1] + >>> # iou = [0.33, 0.14] + >>> m.result().numpy() + 0.33333334 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.IoU(num_classes=2, target_class_ids=[0])]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + num_classes: int, + target_class_ids: Union[List[int], Tuple[int, ...]], + name: Optional[str] = None, + dtype: Optional[Union[str, tf.dtypes.DType]] = None, + ignore_class: Optional[int] = None, + sparse_y_true: bool = True, + sparse_y_pred: bool = True, + axis: int = -1, + ): + super().__init__( + name=name, + num_classes=num_classes, + ignore_class=ignore_class, + sparse_y_true=sparse_y_true, + sparse_y_pred=sparse_y_pred, + axis=axis, + dtype=dtype, + ) + if max(target_class_ids) >= num_classes: + raise ValueError( + f"Target class id {max(target_class_ids)} " + "is out of range, which is " + f"[{0}, {num_classes})." + ) + self.target_class_ids = list(target_class_ids) + + def result(self): + """Compute the intersection-over-union via the confusion matrix.""" + sum_over_row = tf.cast( + tf.reduce_sum(self.total_cm, axis=0), dtype=self._dtype + ) + sum_over_col = tf.cast( + tf.reduce_sum(self.total_cm, axis=1), dtype=self._dtype + ) + true_positives = tf.cast( + tf.linalg.tensor_diag_part(self.total_cm), dtype=self._dtype + ) + + # sum_over_row + sum_over_col = + # 2 * true_positives + false_positives + false_negatives. + denominator = sum_over_row + sum_over_col - true_positives + + # Only keep the target classes + true_positives = tf.gather(true_positives, self.target_class_ids) + denominator = tf.gather(denominator, self.target_class_ids) + + # If the denominator is 0, we need to ignore the class. + num_valid_entries = tf.reduce_sum( + tf.cast(tf.not_equal(denominator, 0), dtype=self._dtype) + ) + + iou = tf.math.divide_no_nan(true_positives, denominator) + + return tf.math.divide_no_nan( + tf.reduce_sum(iou, name="mean_iou"), num_valid_entries + ) + + def get_config(self): + config = { + "num_classes": self.num_classes, + "target_class_ids": self.target_class_ids, + "ignore_class": self.ignore_class, + "sparse_y_true": self.sparse_y_true, + "sparse_y_pred": self.sparse_y_pred, + "axis": self.axis, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.BinaryIoU") +class BinaryIoU(IoU): + """Computes the Intersection-Over-Union metric for class 0 and/or 1. + + General definition and computation: + + Intersection-Over-Union is a common evaluation metric for semantic image + segmentation. + + For an individual class, the IoU metric is defined as follows: + + ``` + iou = true_positives / (true_positives + false_positives + false_negatives) + ``` + + To compute IoUs, the predictions are accumulated in a confusion matrix, + weighted by `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + This class can be used to compute IoUs for a binary classification task + where the predictions are provided as logits. First a `threshold` is applied + to the predicted values such that those that are below the `threshold` are + converted to class 0 and those that are above the `threshold` are converted + to class 1. + + IoUs for classes 0 and 1 are then computed, the mean of IoUs for the classes + that are specified by `target_class_ids` is returned. + + Note: with `threshold=0`, this metric has the same behavior as `IoU`. + + Args: + target_class_ids: A tuple or list of target class ids for which the metric + is returned. Options are `[0]`, `[1]`, or `[0, 1]`. With `[0]` (or + `[1]`), the IoU metric for class 0 (or class 1, respectively) is + returned. With `[0, 1]`, the mean of IoUs for the two classes is + returned. + threshold: A threshold that applies to the prediction logits to convert + them to either predicted class 0 if the logit is below `threshold` or + predicted class 1 if the logit is above `threshold`. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) + >>> m.update_state([0, 1, 0, 1], [0.1, 0.2, 0.4, 0.7]) + >>> m.result().numpy() + 0.33333334 + + >>> m.reset_state() + >>> m.update_state([0, 1, 0, 1], [0.1, 0.2, 0.4, 0.7], + ... sample_weight=[0.2, 0.3, 0.4, 0.1]) + >>> # cm = [[0.2, 0.4], + >>> # [0.3, 0.1]] + >>> # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], + >>> # true_positives = [0.2, 0.1] + >>> # iou = [0.222, 0.125] + >>> m.result().numpy() + 0.17361112 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.BinaryIoU(target_class_ids=[0], threshold=0.5)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + target_class_ids: Union[List[int], Tuple[int, ...]] = (0, 1), + threshold=0.5, + name=None, + dtype=None, + ): + + super().__init__( + num_classes=2, + target_class_ids=target_class_ids, + name=name, + dtype=dtype, + ) + self.threshold = threshold + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates the confusion matrix statistics. + + Before the confusion matrix is updated, the predicted values are + thresholded to be: + 0 for values that are smaller than the `threshold` + 1 for values that are larger or equal to the `threshold` + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + y_pred = tf.cast(y_pred, self._dtype) + y_pred = tf.cast(y_pred >= self.threshold, self._dtype) + return super().update_state(y_true, y_pred, sample_weight) + + def get_config(self): + return { + "target_class_ids": self.target_class_ids, + "threshold": self.threshold, + "name": self.name, + "dtype": self._dtype, + } + + +@keras_export("keras.metrics.MeanIoU") +class MeanIoU(IoU): + """Computes the mean Intersection-Over-Union metric. + + General definition and computation: + + Intersection-Over-Union is a common evaluation metric for semantic image + segmentation. + + For an individual class, the IoU metric is defined as follows: + + ``` + iou = true_positives / (true_positives + false_positives + false_negatives) + ``` + + To compute IoUs, the predictions are accumulated in a confusion matrix, + weighted by `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Note that this class first computes IoUs for all individual classes, then + returns the mean of these values. + + Args: + num_classes: The possible number of labels the prediction task can have. + This value must be provided, since a confusion matrix of dimension = + [num_classes, num_classes] will be allocated. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + ignore_class: Optional integer. The ID of a class to be ignored during + metric computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in segmentation + maps. By default (`ignore_class=None`), all classes are considered. + sparse_y_true: Whether labels are encoded using integers or + dense floating point vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + sparse_y_pred: Whether predictions are encoded using integers or + dense floating point vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + axis: (Optional) The dimension containing the logits. Defaults to `-1`. + + Standalone usage: + + >>> # cm = [[1, 1], + >>> # [1, 1]] + >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + >>> # iou = true_positives / (sum_row + sum_col - true_positives)) + >>> # result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 = 0.33 + >>> m = tf.keras.metrics.MeanIoU(num_classes=2) + >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) + >>> m.result().numpy() + 0.33333334 + + >>> m.reset_state() + >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1], + ... sample_weight=[0.3, 0.3, 0.3, 0.1]) + >>> m.result().numpy() + 0.23809525 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.MeanIoU(num_classes=2)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + num_classes: int, + name: Optional[str] = None, + dtype: Optional[Union[str, tf.dtypes.DType]] = None, + ignore_class: Optional[int] = None, + sparse_y_true: bool = True, + sparse_y_pred: bool = True, + axis: int = -1, + ): + target_class_ids = list(range(num_classes)) + super().__init__( + name=name, + num_classes=num_classes, + target_class_ids=target_class_ids, + axis=axis, + dtype=dtype, + ignore_class=ignore_class, + sparse_y_true=sparse_y_true, + sparse_y_pred=sparse_y_pred, + ) + + def get_config(self): + return { + "num_classes": self.num_classes, + "name": self.name, + "dtype": self._dtype, + "ignore_class": self.ignore_class, + "sparse_y_true": self.sparse_y_true, + "sparse_y_pred": self.sparse_y_pred, + "axis": self.axis, + } + + +@keras_export("keras.metrics.OneHotIoU") +class OneHotIoU(IoU): + """Computes the Intersection-Over-Union metric for one-hot encoded labels. + + General definition and computation: + + Intersection-Over-Union is a common evaluation metric for semantic image + segmentation. + + For an individual class, the IoU metric is defined as follows: + + ``` + iou = true_positives / (true_positives + false_positives + false_negatives) + ``` + + To compute IoUs, the predictions are accumulated in a confusion matrix, + weighted by `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + This class can be used to compute IoU for multi-class classification tasks + where the labels are one-hot encoded (the last axis should have one + dimension per class). Note that the predictions should also have the same + shape. To compute the IoU, first the labels and predictions are converted + back into integer format by taking the argmax over the class axis. Then the + same computation steps as for the base `IoU` class apply. + + Note, if there is only one channel in the labels and predictions, this class + is the same as class `IoU`. In this case, use `IoU` instead. + + Also, make sure that `num_classes` is equal to the number of classes in the + data, to avoid a "labels out of bound" error when the confusion matrix is + computed. + + Args: + num_classes: The possible number of labels the prediction task can have. + A confusion matrix of shape `(num_classes, num_classes)` will be + allocated to accumulate predictions from which the metric is calculated. + target_class_ids: A tuple or list of target class ids for which the metric + is returned. To compute IoU for a specific class, a list (or tuple) of a + single id value should be provided. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + ignore_class: Optional integer. The ID of a class to be ignored during + metric computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in segmentation + maps. By default (`ignore_class=None`), all classes are considered. + sparse_y_pred: Whether predictions are encoded using natural numbers or + probability distribution vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + axis: (Optional) The dimension containing the logits. Defaults to `-1`. + + Standalone usage: + + >>> y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + >>> y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], + ... [0.1, 0.4, 0.5]]) + >>> sample_weight = [0.1, 0.2, 0.3, 0.4] + >>> m = tf.keras.metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) + >>> m.update_state( + ... y_true=y_true, y_pred=y_pred, sample_weight=sample_weight) + >>> # cm = [[0, 0, 0.2+0.4], + >>> # [0.3, 0, 0], + >>> # [0, 0, 0.1]] + >>> # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] + >>> # true_positives = [0, 0, 0.1] + >>> # single_iou = true_positives / (sum_row + sum_col - true_positives)) + >>> # mean_iou = (0 / (0.3 + 0.6 - 0) + 0.1 / (0.7 + 0.1 - 0.1)) / 2 + >>> m.result().numpy() + 0.071 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.OneHotIoU(num_classes=3, target_class_id=[1])]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + num_classes: int, + target_class_ids: Union[List[int], Tuple[int, ...]], + name=None, + dtype=None, + ignore_class: Optional[int] = None, + sparse_y_pred: bool = False, + axis: int = -1, + ): + super().__init__( + num_classes=num_classes, + target_class_ids=target_class_ids, + name=name, + dtype=dtype, + ignore_class=ignore_class, + sparse_y_true=False, + sparse_y_pred=sparse_y_pred, + axis=axis, + ) + + def get_config(self): + return { + "num_classes": self.num_classes, + "target_class_ids": self.target_class_ids, + "name": self.name, + "dtype": self._dtype, + "ignore_class": self.ignore_class, + "sparse_y_pred": self.sparse_y_pred, + "axis": self.axis, + } + + +@keras_export("keras.metrics.OneHotMeanIoU") +class OneHotMeanIoU(MeanIoU): + """Computes mean Intersection-Over-Union metric for one-hot encoded labels. + + General definition and computation: + + Intersection-Over-Union is a common evaluation metric for semantic image + segmentation. + + For an individual class, the IoU metric is defined as follows: + + ``` + iou = true_positives / (true_positives + false_positives + false_negatives) + ``` + + To compute IoUs, the predictions are accumulated in a confusion matrix, + weighted by `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + This class can be used to compute the mean IoU for multi-class + classification tasks where the labels are one-hot encoded (the last axis + should have one dimension per class). Note that the predictions should also + have the same shape. To compute the mean IoU, first the labels and + predictions are converted back into integer format by taking the argmax over + the class axis. Then the same computation steps as for the base `MeanIoU` + class apply. + + Note, if there is only one channel in the labels and predictions, this class + is the same as class `MeanIoU`. In this case, use `MeanIoU` instead. + + Also, make sure that `num_classes` is equal to the number of classes in the + data, to avoid a "labels out of bound" error when the confusion matrix is + computed. + + Args: + num_classes: The possible number of labels the prediction task can have. + A confusion matrix of shape `(num_classes, num_classes)` will be + allocated to accumulate predictions from which the metric is calculated. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + ignore_class: Optional integer. The ID of a class to be ignored during + metric computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in segmentation + maps. By default (`ignore_class=None`), all classes are considered. + sparse_y_pred: Whether predictions are encoded using natural numbers or + probability distribution vectors. If `False`, the `tf.argmax` function + will be used to determine each sample's most likely associated label. + axis: (Optional) The dimension containing the logits. Defaults to `-1`. + + Standalone usage: + + >>> y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + >>> y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], + ... [0.1, 0.4, 0.5]]) + >>> sample_weight = [0.1, 0.2, 0.3, 0.4] + >>> m = tf.keras.metrics.OneHotMeanIoU(num_classes=3) + >>> m.update_state( + ... y_true=y_true, y_pred=y_pred, sample_weight=sample_weight) + >>> # cm = [[0, 0, 0.2+0.4], + >>> # [0.3, 0, 0], + >>> # [0, 0, 0.1]] + >>> # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] + >>> # true_positives = [0, 0, 0.1] + >>> # single_iou = true_positives / (sum_row + sum_col - true_positives)) + >>> # mean_iou = (0 + 0 + 0.1 / (0.7 + 0.1 - 0.1)) / 3 + >>> m.result().numpy() + 0.048 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.OneHotMeanIoU(num_classes=3)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + num_classes: int, + name: str = None, + dtype: Optional[Union[str, tf.dtypes.DType]] = None, + ignore_class: Optional[int] = None, + sparse_y_pred: bool = False, + axis: int = -1, + ): + super().__init__( + num_classes=num_classes, + axis=axis, + name=name, + dtype=dtype, + ignore_class=ignore_class, + sparse_y_true=False, + sparse_y_pred=sparse_y_pred, + ) + + def get_config(self): + return { + "num_classes": self.num_classes, + "name": self.name, + "dtype": self._dtype, + "ignore_class": self.ignore_class, + "sparse_y_pred": self.sparse_y_pred, + "axis": self.axis, + } diff --git a/keras/metrics/iou_metrics_test.py b/keras/metrics/iou_metrics_test.py new file mode 100644 index 000000000000..a642abeeeffe --- /dev/null +++ b/keras/metrics/iou_metrics_test.py @@ -0,0 +1,475 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras metrics.""" + +import tensorflow.compat.v2 as tf + +from keras import metrics +from keras.testing_infra import test_combinations + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class IoUTest(tf.test.TestCase): + def test_config(self): + obj = metrics.IoU( + num_classes=2, target_class_ids=[1, 0], name="iou_class_1_0" + ) + self.assertEqual(obj.name, "iou_class_1_0") + self.assertEqual(obj.num_classes, 2) + self.assertEqual(obj.target_class_ids, [1, 0]) + + obj2 = metrics.IoU.from_config(obj.get_config()) + self.assertEqual(obj2.name, "iou_class_1_0") + self.assertEqual(obj2.num_classes, 2) + self.assertEqual(obj2.target_class_ids, [1, 0]) + + def test_unweighted(self): + y_pred = [0, 1, 0, 1] + y_true = [0, 0, 1, 1] + + obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + + result = obj(y_true, y_pred) + + # cm = [[1, 1], + # [1, 1]] + # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_weighted(self): + y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32) + y_true = tf.constant([0, 0, 1, 1]) + sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) + + obj = metrics.IoU(num_classes=2, target_class_ids=[1, 0]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + + result = obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, + # 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.1 / (0.4 + 0.5 - 0.1) + 0.2 / (0.6 + 0.5 - 0.2) + ) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_multi_dim_input(self): + y_pred = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) + y_true = tf.constant([[0, 0], [1, 1]]) + sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) + + obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + + result = obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, + # 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1) + ) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_zero_valid_entries(self): + obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + self.assertAllClose(self.evaluate(obj.result()), 0, atol=1e-3) + + def test_zero_and_non_zero_entries(self): + y_pred = tf.constant([1], dtype=tf.float32) + y_true = tf.constant([1]) + + obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred) + + # cm = [[0, 0], + # [0, 1]] + # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (1 / (1 + 1 - 1)) / 1 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BinaryIoUTest(tf.test.TestCase): + def test_config(self): + obj = metrics.BinaryIoU( + target_class_ids=[1, 0], threshold=0.1, name="iou_class_1_0" + ) + self.assertEqual(obj.name, "iou_class_1_0") + self.assertAlmostEqual(obj.threshold, 0.1) + self.assertEqual(obj.target_class_ids, [1, 0]) + + obj2 = metrics.BinaryIoU.from_config(obj.get_config()) + self.assertEqual(obj.name, "iou_class_1_0") + self.assertAlmostEqual(obj2.threshold, 0.1) + self.assertEqual(obj.target_class_ids, [1, 0]) + + def test_different_thresholds_weighted(self): + y_true = [0, 1, 0, 1] + y_pred = [0.1, 0.2, 0.4, 0.7] + + sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) + # with threshold = 0.3, y_pred will be converted to [0, 0, 1, 1] + # cm = [[0.2, 0.4], + # [0.3, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, + # 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1) + ) / 2 + obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + sample_weight = tf.constant([0.1, 0.2, 0.4, 0.3]) + # with threshold = 0.5, y_pred will be converted to [0, 0, 0, 1] + # cm = [[0.1+0.4, 0], + # [0.2, 0.3]] + # sum_row = [0.5, 0.5], sum_col = [0.7, 0.3], true_positives = [0.5, + # 0.3] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.5 / (0.5 + 0.7 - 0.5) + 0.3 / (0.5 + 0.3 - 0.3) + ) / 2 + obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.5) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_different_thresholds_unweighted(self): + y_true = [0, 1, 0, 1] + y_pred = [0.1, 0.2, 0.4, 0.7] + + # with threshold = 0.3, y_pred will be converted to [0, 0, 1, 1] + # cm = [[1, 1], + # [1, 1]] + # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 + obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + # with threshold = 0.5, y_pred will be converted to [0, 0, 0, 1] + # cm = [[2, 0], + # [1, 1]] + # sum_row = [2, 2], sum_col = [3, 1], true_positives = [2, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (2 / (2 + 3 - 2) + 1 / (2 + 1 - 1)) / 2 + obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.5) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_multi_dim_input(self): + y_true = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) + y_pred = tf.constant([[0.1, 0.7], [0.9, 0.3]]) + threshold = 0.4 # y_pred will become [[0, 1], [1, 0]] + sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) + # cm = [[0.2, 0.4], + # [0.1, 0.3]] + # sum_row = [0.6, 0.4], sum_col = [0.3, 0.7], true_positives = [0.2, + # 0.3] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.2 / (0.6 + 0.3 - 0.2) + 0.3 / (0.4 + 0.7 - 0.3) + ) / 2 + obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=threshold) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_zero_valid_entries(self): + obj = metrics.BinaryIoU(target_class_ids=[0, 1]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + self.assertAllClose(self.evaluate(obj.result()), 0, atol=1e-3) + + def test_zero_and_non_zero_entries(self): + y_pred = tf.constant([0.6], dtype=tf.float32) + threshold = 0.5 + y_true = tf.constant([1]) + + obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=threshold) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred) + + # cm = [[0, 0], + # [0, 1]] + # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = 1 / (1 + 1 - 1) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MeanIoUTest(tf.test.TestCase): + def test_config(self): + m_obj = metrics.MeanIoU(num_classes=2, name="mean_iou") + self.assertEqual(m_obj.name, "mean_iou") + self.assertEqual(m_obj.num_classes, 2) + + m_obj2 = metrics.MeanIoU.from_config(m_obj.get_config()) + self.assertEqual(m_obj2.name, "mean_iou") + self.assertEqual(m_obj2.num_classes, 2) + + def test_unweighted(self): + y_pred = [0, 1, 0, 1] + y_true = [0, 0, 1, 1] + + m_obj = metrics.MeanIoU(num_classes=2) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + + result = m_obj(y_true, y_pred) + + # cm = [[1, 1], + # [1, 1]] + # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_unweighted_ignore_class_255(self): + y_pred = [0, 1, 1, 1] + y_true = [0, 1, 2, 255] + + m_obj = metrics.MeanIoU(num_classes=3, ignore_class=255) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + + result = m_obj(y_true, y_pred) + + # cm = [[1, 0, 0], + # [0, 1, 0], + # [0, 1, 0]] + # sum_row = [1, 1, 1], sum_col = [1, 2, 0], true_positives = [1, 1, 0] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 1 / (1 + 1 - 1) + 1 / (2 + 1 - 1) + 0 / (0 + 1 - 0) + ) / 3 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_unweighted_ignore_class_1(self): + y_pred = [0, 1, 1, 1] + y_true = [0, 1, 2, -1] + + m_obj = metrics.MeanIoU(num_classes=3, ignore_class=-1) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + + result = m_obj(y_true, y_pred) + + # cm = [[1, 0, 0], + # [0, 1, 0], + # [0, 1, 0]] + # sum_row = [1, 1, 1], sum_col = [1, 2, 0], true_positives = [1, 1, 0] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 1 / (1 + 1 - 1) + 1 / (2 + 1 - 1) + 0 / (0 + 1 - 0) + ) / 3 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_weighted(self): + y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32) + y_true = tf.constant([0, 0, 1, 1]) + sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) + + m_obj = metrics.MeanIoU(num_classes=2) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + + result = m_obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, + # 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1) + ) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_weighted_ignore_class_1(self): + y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32) + y_true = tf.constant([0, 0, 1, -1]) + sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) + + m_obj = metrics.MeanIoU(num_classes=2, ignore_class=-1) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + + result = m_obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.0]] + # sum_row = [0.6, 0.3], sum_col = [0.5, 0.4], true_positives = [0.2, + # 0.0] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.2 / (0.6 + 0.5 - 0.2) + 0.0 / (0.3 + 0.4 - 0.0) + ) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_multi_dim_input(self): + y_pred = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) + y_true = tf.constant([[0, 0], [1, 1]]) + sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) + + m_obj = metrics.MeanIoU(num_classes=2) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + + result = m_obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, + # 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1) + ) / 2 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_zero_valid_entries(self): + m_obj = metrics.MeanIoU(num_classes=2) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + self.assertAllClose(self.evaluate(m_obj.result()), 0, atol=1e-3) + + def test_zero_and_non_zero_entries(self): + y_pred = tf.constant([1], dtype=tf.float32) + y_true = tf.constant([1]) + + m_obj = metrics.MeanIoU(num_classes=2) + self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) + result = m_obj(y_true, y_pred) + + # cm = [[0, 0], + # [0, 1]] + # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (0 + 1 / (1 + 1 - 1)) / 1 + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class OneHotIoUTest(tf.test.TestCase): + def test_unweighted(self): + y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + # y_true will be converted to [2, 0, 1, 0] + y_pred = tf.constant( + [[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], [0.1, 0.4, 0.5]] + ) + # y_pred will be converted to [2, 2, 0, 2] + # cm = [[0, 0, 2], + # [1, 0, 0], + # [0, 0, 1] + # sum_row = [1, 0, 3], sum_col = [2, 1, 1], true_positives = [0, 0, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (0 / (1 + 2 - 0) + 1 / (3 + 1 - 1)) / 2 + obj = metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_weighted(self): + y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + # y_true will be converted to [2, 0, 1, 0] + y_pred = tf.constant( + [[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], [0.1, 0.4, 0.5]] + ) + # y_pred will be converted to [2, 2, 0, 2] + sample_weight = [0.1, 0.2, 0.3, 0.4] + # cm = [[0, 0, 0.2+0.4], + # [0.3, 0, 0], + # [0, 0, 0.1]] + # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] + # true_positives = [0, 0, 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (0 / (0.3 + 0.6 - 0) + 0.1 / (0.7 + 0.1 - 0.1)) / 2 + obj = metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class OneHotMeanIoUTest(tf.test.TestCase): + def test_unweighted(self): + y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + # y_true will be converted to [2, 0, 1, 0] + y_pred = tf.constant( + [[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], [0.1, 0.4, 0.5]] + ) + # y_pred will be converted to [2, 2, 0, 2] + # cm = [[0, 0, 2], + # [1, 0, 0], + # [0, 0, 1] + # sum_row = [1, 0, 3], sum_col = [2, 1, 1], true_positives = [0, 0, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = (0 + 0 + 1 / (3 + 1 - 1)) / 3 + obj = metrics.OneHotMeanIoU(num_classes=3) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + def test_weighted(self): + y_true = tf.constant( + [ + [0, 0, 1], + [1, 0, 0], + [0, 1, 0], + [1, 0, 0], + [1, 0, 0], + ] + ) + # y_true will be converted to [2, 0, 1, 0, 0] + y_pred = tf.constant( + [ + [0.2, 0.3, 0.5], + [0.1, 0.2, 0.7], + [0.5, 0.3, 0.1], + [0.1, 0.4, 0.5], + [0.6, 0.2, 0.2], + ] + ) + # y_pred will be converted to [2, 2, 0, 2, 0] + sample_weight = [0.1, 0.2, 0.3, 0.3, 0.1] + # cm = [[0.1, 0, 0.2+0.3], + # [0.3, 0, 0], + # [0, 0, 0.1]] + # sum_row = [0.4, 0, 0.6], sum_col = [0.6, 0.3, 0.1] + # true_positives = [0.1, 0, 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = ( + 0.1 / (0.4 + 0.6 - 0.1) + 0 + 0.1 / (0.6 + 0.1 - 0.1) + ) / 3 + obj = metrics.OneHotMeanIoU(num_classes=3) + self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) + result = obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/metrics.py b/keras/metrics/metrics.py deleted file mode 100644 index 18a114d28250..000000000000 --- a/keras/metrics/metrics.py +++ /dev/null @@ -1,3471 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=g-classes-have-attributes -# pylint: disable=g-doc-return-or-yield -"""Built-in metrics.""" - -import abc -from typing import List, Tuple, Union - -from keras import activations -from keras import backend -from keras.dtensor import utils as dtensor_utils -from keras.losses import binary_crossentropy -from keras.losses import categorical_crossentropy -from keras.losses import categorical_hinge -from keras.losses import hinge -from keras.losses import kullback_leibler_divergence -from keras.losses import logcosh -from keras.losses import mean_absolute_error -from keras.losses import mean_absolute_percentage_error -from keras.losses import mean_squared_error -from keras.losses import mean_squared_logarithmic_error -from keras.losses import poisson -from keras.losses import sparse_categorical_crossentropy -from keras.losses import squared_hinge -from keras.metrics import base_metric -from keras.utils import losses_utils -from keras.utils import metrics_utils -from keras.utils.generic_utils import to_list -from keras.utils.tf_utils import is_tensor_or_variable -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -@keras_export('keras.metrics.MeanRelativeError') -class MeanRelativeError(base_metric.Mean): - """Computes the mean relative error by normalizing with the given values. - - This metric creates two local variables, `total` and `count` that are used to - compute the mean relative error. This is weighted by `sample_weight`, and - it is ultimately returned as `mean_relative_error`: - an idempotent operation that simply divides `total` by `count`. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - normalizer: The normalizer values with same shape as predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.MeanRelativeError(normalizer=[1, 3, 2, 3]) - >>> m.update_state([1, 3, 2, 3], [2, 4, 6, 8]) - - >>> # metric = mean(|y_pred - y_true| / normalizer) - >>> # = mean([1, 1, 4, 5] / [1, 3, 2, 3]) = mean([1, 1/3, 2, 5/3]) - >>> # = 5/4 = 1.25 - >>> m.result().numpy() - 1.25 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.MeanRelativeError(normalizer=[1, 3])]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, normalizer, name=None, dtype=None): - super().__init__(name=name, dtype=dtype) - normalizer = tf.cast(normalizer, self._dtype) - self.normalizer = normalizer - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates metric statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - y_true = tf.cast(y_true, self._dtype) - y_pred = tf.cast(y_pred, self._dtype) - [y_pred, y_true], sample_weight = \ - metrics_utils.ragged_assert_compatible_and_get_flat_values( - [y_pred, y_true], sample_weight) - y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( - y_pred, y_true) - - y_pred, self.normalizer = losses_utils.remove_squeezable_dimensions( - y_pred, self.normalizer) - y_pred.shape.assert_is_compatible_with(y_true.shape) - relative_errors = tf.math.divide_no_nan( - tf.abs(y_true - y_pred), self.normalizer) - - return super().update_state( - relative_errors, sample_weight=sample_weight) - - def get_config(self): - n = self.normalizer - config = {'normalizer': backend.eval(n) if is_tensor_or_variable(n) else n} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.Accuracy') -class Accuracy(base_metric.MeanMetricWrapper): - """Calculates how often predictions equal labels. - - This metric creates two local variables, `total` and `count` that are used to - compute the frequency with which `y_pred` matches `y_true`. This frequency is - ultimately returned as `binary accuracy`: an idempotent operation that simply - divides `total` by `count`. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.Accuracy() - >>> m.update_state([[1], [2], [3], [4]], [[0], [2], [3], [4]]) - >>> m.result().numpy() - 0.75 - - >>> m.reset_state() - >>> m.update_state([[1], [2], [3], [4]], [[0], [2], [3], [4]], - ... sample_weight=[1, 1, 0, 0]) - >>> m.result().numpy() - 0.5 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.Accuracy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='accuracy', dtype=None): - super().__init__(accuracy, name, dtype=dtype) - - -@keras_export('keras.metrics.BinaryAccuracy') -class BinaryAccuracy(base_metric.MeanMetricWrapper): - """Calculates how often predictions match binary labels. - - This metric creates two local variables, `total` and `count` that are used to - compute the frequency with which `y_pred` matches `y_true`. This frequency is - ultimately returned as `binary accuracy`: an idempotent operation that simply - divides `total` by `count`. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - threshold: (Optional) Float representing the threshold for deciding - whether prediction values are 1 or 0. - - Standalone usage: - - >>> m = tf.keras.metrics.BinaryAccuracy() - >>> m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]]) - >>> m.result().numpy() - 0.75 - - >>> m.reset_state() - >>> m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]], - ... sample_weight=[1, 0, 0, 1]) - >>> m.result().numpy() - 0.5 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.BinaryAccuracy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='binary_accuracy', dtype=None, threshold=0.5): - super().__init__( - metrics_utils.binary_matches, name, dtype=dtype, threshold=threshold) - - -@keras_export('keras.metrics.CategoricalAccuracy') -class CategoricalAccuracy(base_metric.MeanMetricWrapper): - """Calculates how often predictions match one-hot labels. - - You can provide logits of classes as `y_pred`, since argmax of - logits and probabilities are same. - - This metric creates two local variables, `total` and `count` that are used to - compute the frequency with which `y_pred` matches `y_true`. This frequency is - ultimately returned as `categorical accuracy`: an idempotent operation that - simply divides `total` by `count`. - - `y_pred` and `y_true` should be passed in as vectors of probabilities, rather - than as labels. If necessary, use `tf.one_hot` to expand `y_true` as a vector. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.CategoricalAccuracy() - >>> m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], - ... [0.05, 0.95, 0]]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], - ... [0.05, 0.95, 0]], - ... sample_weight=[0.7, 0.3]) - >>> m.result().numpy() - 0.3 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.CategoricalAccuracy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='categorical_accuracy', dtype=None): - super().__init__( - lambda y_true, y_pred: metrics_utils.sparse_categorical_matches( # pylint: disable=g-long-lambda - tf.math.argmax(y_true, axis=-1), y_pred), - name, - dtype=dtype) - - -@keras_export('keras.metrics.SparseCategoricalAccuracy') -class SparseCategoricalAccuracy(base_metric.MeanMetricWrapper): - """Calculates how often predictions match integer labels. - - ```python - acc = np.dot(sample_weight, np.equal(y_true, np.argmax(y_pred, axis=1)) - ``` - - You can provide logits of classes as `y_pred`, since argmax of - logits and probabilities are same. - - This metric creates two local variables, `total` and `count` that are used to - compute the frequency with which `y_pred` matches `y_true`. This frequency is - ultimately returned as `sparse categorical accuracy`: an idempotent operation - that simply divides `total` by `count`. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.SparseCategoricalAccuracy() - >>> m.update_state([[2], [1]], [[0.1, 0.6, 0.3], [0.05, 0.95, 0]]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([[2], [1]], [[0.1, 0.6, 0.3], [0.05, 0.95, 0]], - ... sample_weight=[0.7, 0.3]) - >>> m.result().numpy() - 0.3 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='sparse_categorical_accuracy', dtype=None): - super().__init__( - metrics_utils.sparse_categorical_matches, name, dtype=dtype) - - -_SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING = """Accumulates metric statistics. - -For sparse categorical metrics, the shapes of `y_true` and `y_pred` are -different. - -Args: - y_true: Ground truth label values. shape = `[batch_size, d0, .. dN-1]` or - shape = `[batch_size, d0, .. dN-1, 1]`. - y_pred: The predicted probability values. shape = `[batch_size, d0, .. dN]`. - sample_weight: Optional `sample_weight` acts as a - coefficient for the metric. If a scalar is provided, then the metric is - simply scaled by the given value. If `sample_weight` is a tensor of size - `[batch_size]`, then the metric for each sample of the batch is rescaled - by the corresponding element in the `sample_weight` vector. If the shape - of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted - to this shape), then each metric element of `y_pred` is scaled by the - corresponding value of `sample_weight`. (Note on `dN-1`: all metric - functions reduce by 1 dimension, usually the last axis (-1)). - -Returns: - Update op. -""" - -SparseCategoricalAccuracy.update_state.__doc__ = _SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING - - -@keras_export('keras.metrics.TopKCategoricalAccuracy') -class TopKCategoricalAccuracy(base_metric.MeanMetricWrapper): - """Computes how often targets are in the top `K` predictions. - - Args: - k: (Optional) Number of top elements to look at for computing accuracy. - Defaults to 5. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.TopKCategoricalAccuracy(k=1) - >>> m.update_state([[0, 0, 1], [0, 1, 0]], - ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([[0, 0, 1], [0, 1, 0]], - ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], - ... sample_weight=[0.7, 0.3]) - >>> m.result().numpy() - 0.3 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.TopKCategoricalAccuracy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, k=5, name='top_k_categorical_accuracy', dtype=None): - super().__init__( - lambda yt, yp, k: metrics_utils.sparse_top_k_categorical_matches( # pylint: disable=g-long-lambda - tf.math.argmax(yt, axis=-1), yp, k), - name, - dtype=dtype, - k=k) - - -@keras_export('keras.metrics.SparseTopKCategoricalAccuracy') -class SparseTopKCategoricalAccuracy(base_metric.MeanMetricWrapper): - """Computes how often integer targets are in the top `K` predictions. - - Args: - k: (Optional) Number of top elements to look at for computing accuracy. - Defaults to 5. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1) - >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], - ... sample_weight=[0.7, 0.3]) - >>> m.result().numpy() - 0.3 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, k=5, name='sparse_top_k_categorical_accuracy', dtype=None): - super().__init__( - metrics_utils.sparse_top_k_categorical_matches, name, dtype=dtype, k=k) - - -SparseTopKCategoricalAccuracy.update_state.__doc__ = _SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING - - -class _ConfusionMatrixConditionCount(base_metric.Metric): - """Calculates the number of the given confusion matrix condition. - - Args: - confusion_matrix_cond: One of `metrics_utils.ConfusionMatrix` conditions. - thresholds: (Optional) Defaults to 0.5. A float value or a python list/tuple - of float threshold values in [0, 1]. A threshold is compared with - prediction values to determine the truth value of predictions (i.e., above - the threshold is `true`, below is `false`). One metric value is generated - for each threshold value. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - """ - - def __init__(self, - confusion_matrix_cond, - thresholds=None, - name=None, - dtype=None): - super().__init__(name=name, dtype=dtype) - self._confusion_matrix_cond = confusion_matrix_cond - self.init_thresholds = thresholds - self.thresholds = metrics_utils.parse_init_thresholds( - thresholds, default_threshold=0.5) - self._thresholds_distributed_evenly = ( - metrics_utils.is_evenly_distributed_thresholds(self.thresholds)) - self.accumulator = self.add_weight( - 'accumulator', - shape=(len(self.thresholds),), - initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates the metric statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - return metrics_utils.update_confusion_matrix_variables( - {self._confusion_matrix_cond: self.accumulator}, - y_true, - y_pred, - thresholds=self.thresholds, - thresholds_distributed_evenly=self._thresholds_distributed_evenly, - sample_weight=sample_weight) - - def result(self): - if len(self.thresholds) == 1: - result = self.accumulator[0] - else: - result = self.accumulator - return tf.convert_to_tensor(result) - - def reset_state(self): - backend.batch_set_value([ - (v, np.zeros(v.shape.as_list())) for v in self.variables - ]) - - def get_config(self): - config = {'thresholds': self.init_thresholds} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.FalsePositives') -class FalsePositives(_ConfusionMatrixConditionCount): - """Calculates the number of false positives. - - If `sample_weight` is given, calculates the sum of the weights of - false positives. This metric creates one local variable, `accumulator` - that is used to keep track of the number of false positives. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - thresholds: (Optional) Defaults to 0.5. A float value, or a Python - list/tuple of float threshold values in [0, 1]. A threshold is compared - with prediction values to determine the truth value of predictions - (i.e., above the threshold is `true`, below is `false`). If used with a - loss function that sets `from_logits=True` (i.e. no sigmoid applied to - predictions), `thresholds` should be set to 0. One metric value is - generated for each threshold value. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.FalsePositives() - >>> m.update_state([0, 1, 0, 0], [0, 0, 1, 1]) - >>> m.result().numpy() - 2.0 - - >>> m.reset_state() - >>> m.update_state([0, 1, 0, 0], [0, 0, 1, 1], sample_weight=[0, 0, 1, 0]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.FalsePositives()]) - ``` - - Usage with a loss with `from_logits=True`: - - ```python - model.compile(optimizer='adam', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.FalsePositives(thresholds=0)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, thresholds=None, name=None, dtype=None): - super().__init__( - confusion_matrix_cond=metrics_utils.ConfusionMatrix.FALSE_POSITIVES, - thresholds=thresholds, - name=name, - dtype=dtype) - - -@keras_export('keras.metrics.FalseNegatives') -class FalseNegatives(_ConfusionMatrixConditionCount): - """Calculates the number of false negatives. - - If `sample_weight` is given, calculates the sum of the weights of - false negatives. This metric creates one local variable, `accumulator` - that is used to keep track of the number of false negatives. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - thresholds: (Optional) Defaults to 0.5. A float value, or a Python - list/tuple of float threshold values in [0, 1]. A threshold is compared - with prediction values to determine the truth value of predictions - (i.e., above the threshold is `true`, below is `false`). If used with a - loss function that sets `from_logits=True` (i.e. no sigmoid applied to - predictions), `thresholds` should be set to 0. One metric value is - generated for each threshold value. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.FalseNegatives() - >>> m.update_state([0, 1, 1, 1], [0, 1, 0, 0]) - >>> m.result().numpy() - 2.0 - - >>> m.reset_state() - >>> m.update_state([0, 1, 1, 1], [0, 1, 0, 0], sample_weight=[0, 0, 1, 0]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.FalseNegatives()]) - ``` - - Usage with a loss with `from_logits=True`: - - ```python - model.compile(optimizer='adam', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.FalseNegatives(thresholds=0)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, thresholds=None, name=None, dtype=None): - super().__init__( - confusion_matrix_cond=metrics_utils.ConfusionMatrix.FALSE_NEGATIVES, - thresholds=thresholds, - name=name, - dtype=dtype) - - -@keras_export('keras.metrics.TrueNegatives') -class TrueNegatives(_ConfusionMatrixConditionCount): - """Calculates the number of true negatives. - - If `sample_weight` is given, calculates the sum of the weights of - true negatives. This metric creates one local variable, `accumulator` - that is used to keep track of the number of true negatives. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - thresholds: (Optional) Defaults to 0.5. A float value, or a Python - list/tuple of float threshold values in [0, 1]. A threshold is compared - with prediction values to determine the truth value of predictions - (i.e., above the threshold is `true`, below is `false`). If used with a - loss function that sets `from_logits=True` (i.e. no sigmoid applied to - predictions), `thresholds` should be set to 0. One metric value is - generated for each threshold value. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.TrueNegatives() - >>> m.update_state([0, 1, 0, 0], [1, 1, 0, 0]) - >>> m.result().numpy() - 2.0 - - >>> m.reset_state() - >>> m.update_state([0, 1, 0, 0], [1, 1, 0, 0], sample_weight=[0, 0, 1, 0]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.TrueNegatives()]) - ``` - - Usage with a loss with `from_logits=True`: - - ```python - model.compile(optimizer='adam', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.TrueNegatives(thresholds=0)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, thresholds=None, name=None, dtype=None): - super().__init__( - confusion_matrix_cond=metrics_utils.ConfusionMatrix.TRUE_NEGATIVES, - thresholds=thresholds, - name=name, - dtype=dtype) - - -@keras_export('keras.metrics.TruePositives') -class TruePositives(_ConfusionMatrixConditionCount): - """Calculates the number of true positives. - - If `sample_weight` is given, calculates the sum of the weights of - true positives. This metric creates one local variable, `true_positives` - that is used to keep track of the number of true positives. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - thresholds: (Optional) Defaults to 0.5. A float value, or a Python - list/tuple of float threshold values in [0, 1]. A threshold is compared - with prediction values to determine the truth value of predictions - (i.e., above the threshold is `true`, below is `false`). If used with a - loss function that sets `from_logits=True` (i.e. no sigmoid applied to - predictions), `thresholds` should be set to 0. One metric value is - generated for each threshold value. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.TruePositives() - >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) - >>> m.result().numpy() - 2.0 - - >>> m.reset_state() - >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.TruePositives()]) - ``` - - Usage with a loss with `from_logits=True`: - - ```python - model.compile(optimizer='adam', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.TruePositives(thresholds=0)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, thresholds=None, name=None, dtype=None): - super().__init__( - confusion_matrix_cond=metrics_utils.ConfusionMatrix.TRUE_POSITIVES, - thresholds=thresholds, - name=name, - dtype=dtype) - - -@keras_export('keras.metrics.Precision') -class Precision(base_metric.Metric): - """Computes the precision of the predictions with respect to the labels. - - The metric creates two local variables, `true_positives` and `false_positives` - that are used to compute the precision. This value is ultimately returned as - `precision`, an idempotent operation that simply divides `true_positives` - by the sum of `true_positives` and `false_positives`. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - If `top_k` is set, we'll calculate precision as how often on average a class - among the top-k classes with the highest predicted values of a batch entry is - correct and can be found in the label for that entry. - - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is above the threshold and/or in the - top-k highest predictions, and computing the fraction of them for which - `class_id` is indeed a correct label. - - Args: - thresholds: (Optional) A float value, or a Python list/tuple of float - threshold values in [0, 1]. A threshold is compared with prediction - values to determine the truth value of predictions (i.e., above the - threshold is `true`, below is `false`). If used with a loss function that - sets `from_logits=True` (i.e. no sigmoid applied to predictions), - `thresholds` should be set to 0. One metric value is generated for each - threshold value. If neither thresholds nor top_k are set, the default is - to calculate precision with `thresholds=0.5`. - top_k: (Optional) Unset by default. An int value specifying the top-k - predictions to consider when calculating precision. - class_id: (Optional) Integer class ID for which we want binary metrics. - This must be in the half-open interval `[0, num_classes)`, where - `num_classes` is the last dimension of predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.Precision() - >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) - >>> m.result().numpy() - 0.6666667 - - >>> m.reset_state() - >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) - >>> m.result().numpy() - 1.0 - - >>> # With top_k=2, it will calculate precision over y_true[:2] and y_pred[:2] - >>> m = tf.keras.metrics.Precision(top_k=2) - >>> m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) - >>> m.result().numpy() - 0.0 - - >>> # With top_k=4, it will calculate precision over y_true[:4] and y_pred[:4] - >>> m = tf.keras.metrics.Precision(top_k=4) - >>> m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) - >>> m.result().numpy() - 0.5 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.Precision()]) - ``` - - Usage with a loss with `from_logits=True`: - - ```python - model.compile(optimizer='adam', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.Precision(thresholds=0)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - thresholds=None, - top_k=None, - class_id=None, - name=None, - dtype=None): - super().__init__(name=name, dtype=dtype) - self.init_thresholds = thresholds - self.top_k = top_k - self.class_id = class_id - - default_threshold = 0.5 if top_k is None else metrics_utils.NEG_INF - self.thresholds = metrics_utils.parse_init_thresholds( - thresholds, default_threshold=default_threshold) - self._thresholds_distributed_evenly = ( - metrics_utils.is_evenly_distributed_thresholds(self.thresholds)) - self.true_positives = self.add_weight( - 'true_positives', - shape=(len(self.thresholds),), - initializer='zeros') - self.false_positives = self.add_weight( - 'false_positives', - shape=(len(self.thresholds),), - initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates true positive and false positive statistics. - - Args: - y_true: The ground truth values, with the same dimensions as `y_pred`. - Will be cast to `bool`. - y_pred: The predicted values. Each element must be in the range `[0, 1]`. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - return metrics_utils.update_confusion_matrix_variables( - { - metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, - metrics_utils.ConfusionMatrix.FALSE_POSITIVES: self.false_positives - }, - y_true, - y_pred, - thresholds=self.thresholds, - thresholds_distributed_evenly=self._thresholds_distributed_evenly, - top_k=self.top_k, - class_id=self.class_id, - sample_weight=sample_weight) - - def result(self): - result = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_positives)) - return result[0] if len(self.thresholds) == 1 else result - - def reset_state(self): - num_thresholds = len(to_list(self.thresholds)) - backend.batch_set_value([(v, np.zeros((num_thresholds,))) - for v in (self.true_positives, - self.false_positives)]) - - def get_config(self): - config = { - 'thresholds': self.init_thresholds, - 'top_k': self.top_k, - 'class_id': self.class_id - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.Recall') -class Recall(base_metric.Metric): - """Computes the recall of the predictions with respect to the labels. - - This metric creates two local variables, `true_positives` and - `false_negatives`, that are used to compute the recall. This value is - ultimately returned as `recall`, an idempotent operation that simply divides - `true_positives` by the sum of `true_positives` and `false_negatives`. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - If `top_k` is set, recall will be computed as how often on average a class - among the labels of a batch entry is in the top-k predictions. - - If `class_id` is specified, we calculate recall by considering only the - entries in the batch for which `class_id` is in the label, and computing the - fraction of them for which `class_id` is above the threshold and/or in the - top-k predictions. - - Args: - thresholds: (Optional) A float value, or a Python list/tuple of float - threshold values in [0, 1]. A threshold is compared with prediction - values to determine the truth value of predictions (i.e., above the - threshold is `true`, below is `false`). If used with a loss function that - sets `from_logits=True` (i.e. no sigmoid applied to predictions), - `thresholds` should be set to 0. One metric value is generated for each - threshold value. If neither thresholds nor top_k are set, the default is - to calculate recall with `thresholds=0.5`. - top_k: (Optional) Unset by default. An int value specifying the top-k - predictions to consider when calculating recall. - class_id: (Optional) Integer class ID for which we want binary metrics. - This must be in the half-open interval `[0, num_classes)`, where - `num_classes` is the last dimension of predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.Recall() - >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) - >>> m.result().numpy() - 0.6666667 - - >>> m.reset_state() - >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.Recall()]) - ``` - - Usage with a loss with `from_logits=True`: - - ```python - model.compile(optimizer='adam', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.Recall(thresholds=0)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - thresholds=None, - top_k=None, - class_id=None, - name=None, - dtype=None): - super().__init__(name=name, dtype=dtype) - self.init_thresholds = thresholds - self.top_k = top_k - self.class_id = class_id - - default_threshold = 0.5 if top_k is None else metrics_utils.NEG_INF - self.thresholds = metrics_utils.parse_init_thresholds( - thresholds, default_threshold=default_threshold) - self._thresholds_distributed_evenly = ( - metrics_utils.is_evenly_distributed_thresholds(self.thresholds)) - self.true_positives = self.add_weight( - 'true_positives', - shape=(len(self.thresholds),), - initializer='zeros') - self.false_negatives = self.add_weight( - 'false_negatives', - shape=(len(self.thresholds),), - initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates true positive and false negative statistics. - - Args: - y_true: The ground truth values, with the same dimensions as `y_pred`. - Will be cast to `bool`. - y_pred: The predicted values. Each element must be in the range `[0, 1]`. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - return metrics_utils.update_confusion_matrix_variables( - { - metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, - metrics_utils.ConfusionMatrix.FALSE_NEGATIVES: self.false_negatives - }, - y_true, - y_pred, - thresholds=self.thresholds, - thresholds_distributed_evenly=self._thresholds_distributed_evenly, - top_k=self.top_k, - class_id=self.class_id, - sample_weight=sample_weight) - - def result(self): - result = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_negatives)) - return result[0] if len(self.thresholds) == 1 else result - - def reset_state(self): - num_thresholds = len(to_list(self.thresholds)) - backend.batch_set_value([(v, np.zeros((num_thresholds,))) - for v in (self.true_positives, - self.false_negatives)]) - - def get_config(self): - config = { - 'thresholds': self.init_thresholds, - 'top_k': self.top_k, - 'class_id': self.class_id - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class SensitivitySpecificityBase(base_metric.Metric, metaclass=abc.ABCMeta): - """Abstract base class for computing sensitivity and specificity. - - For additional information about specificity and sensitivity, see - [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). - """ - - def __init__(self, - value, - num_thresholds=200, - class_id=None, - name=None, - dtype=None): - super().__init__(name=name, dtype=dtype) - if num_thresholds <= 0: - raise ValueError( - 'Argument `num_thresholds` must be an integer > 0. ' - f'Received: num_thresholds={num_thresholds}') - self.value = value - self.class_id = class_id - self.true_positives = self.add_weight( - 'true_positives', - shape=(num_thresholds,), - initializer='zeros') - self.true_negatives = self.add_weight( - 'true_negatives', - shape=(num_thresholds,), - initializer='zeros') - self.false_positives = self.add_weight( - 'false_positives', - shape=(num_thresholds,), - initializer='zeros') - self.false_negatives = self.add_weight( - 'false_negatives', - shape=(num_thresholds,), - initializer='zeros') - - # Compute `num_thresholds` thresholds in [0, 1] - if num_thresholds == 1: - self.thresholds = [0.5] - self._thresholds_distributed_evenly = False - else: - thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) - for i in range(num_thresholds - 2)] - self.thresholds = [0.0] + thresholds + [1.0] - self._thresholds_distributed_evenly = True - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates confusion matrix statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - return metrics_utils.update_confusion_matrix_variables( - { - metrics_utils.ConfusionMatrix.TRUE_POSITIVES: self.true_positives, - metrics_utils.ConfusionMatrix.TRUE_NEGATIVES: self.true_negatives, - metrics_utils.ConfusionMatrix.FALSE_POSITIVES: self.false_positives, - metrics_utils.ConfusionMatrix.FALSE_NEGATIVES: self.false_negatives, - }, - y_true, - y_pred, - thresholds=self.thresholds, - thresholds_distributed_evenly=self._thresholds_distributed_evenly, - class_id=self.class_id, - sample_weight=sample_weight) - - def reset_state(self): - num_thresholds = len(self.thresholds) - confusion_matrix_variables = (self.true_positives, self.true_negatives, - self.false_positives, self.false_negatives) - backend.batch_set_value([ - (v, np.zeros((num_thresholds,))) for v in confusion_matrix_variables - ]) - - def get_config(self): - config = {'class_id': self.class_id} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def _find_max_under_constraint(self, constrained, dependent, predicate): - """Returns the maximum of dependent_statistic that satisfies the constraint. - - Args: - constrained: Over these values the constraint - is specified. A rank-1 tensor. - dependent: From these values the maximum that satiesfies the - constraint is selected. Values in this tensor and in - `constrained` are linked by having the same threshold at each - position, hence this tensor must have the same shape. - predicate: A binary boolean functor to be applied to arguments - `constrained` and `self.value`, e.g. `tf.greater`. - - Returns maximal dependent value, if no value satiesfies the constraint 0.0. - """ - feasible = tf.where(predicate(constrained, self.value)) - feasible_exists = tf.greater(tf.size(feasible), 0) - max_dependent = tf.reduce_max(tf.gather(dependent, feasible)) - - return tf.where(feasible_exists, max_dependent, 0.0) - - -@keras_export('keras.metrics.SensitivityAtSpecificity') -class SensitivityAtSpecificity(SensitivitySpecificityBase): - """Computes best sensitivity where specificity is >= specified value. - - the sensitivity at a given specificity. - - `Sensitivity` measures the proportion of actual positives that are correctly - identified as such (tp / (tp + fn)). - `Specificity` measures the proportion of actual negatives that are correctly - identified as such (tn / (tn + fp)). - - This metric creates four local variables, `true_positives`, `true_negatives`, - `false_positives` and `false_negatives` that are used to compute the - sensitivity at the given specificity. The threshold for the given specificity - value is computed and used to evaluate the corresponding sensitivity. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is above the threshold predictions, - and computing the fraction of them for which `class_id` is indeed a correct - label. - - For additional information about specificity and sensitivity, see - [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). - - Args: - specificity: A scalar value in range `[0, 1]`. - num_thresholds: (Optional) Defaults to 200. The number of thresholds to - use for matching the given specificity. - class_id: (Optional) Integer class ID for which we want binary metrics. - This must be in the half-open interval `[0, num_classes)`, where - `num_classes` is the last dimension of predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.SensitivityAtSpecificity(0.5) - >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], - ... sample_weight=[1, 1, 2, 2, 1]) - >>> m.result().numpy() - 0.333333 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.SensitivityAtSpecificity()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - specificity, - num_thresholds=200, - class_id=None, - name=None, - dtype=None): - if specificity < 0 or specificity > 1: - raise ValueError( - 'Argument `specificity` must be in the range [0, 1]. ' - f'Received: specificity={specificity}') - self.specificity = specificity - self.num_thresholds = num_thresholds - super().__init__( - specificity, - num_thresholds=num_thresholds, - class_id=class_id, - name=name, - dtype=dtype) - - def result(self): - specificities = tf.math.divide_no_nan( - self.true_negatives, - tf.math.add(self.true_negatives, self.false_positives)) - sensitivities = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_negatives)) - return self._find_max_under_constraint( - specificities, sensitivities, tf.greater_equal) - - def get_config(self): - config = { - 'num_thresholds': self.num_thresholds, - 'specificity': self.specificity - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.SpecificityAtSensitivity') -class SpecificityAtSensitivity(SensitivitySpecificityBase): - """Computes best specificity where sensitivity is >= specified value. - - `Sensitivity` measures the proportion of actual positives that are correctly - identified as such (tp / (tp + fn)). - `Specificity` measures the proportion of actual negatives that are correctly - identified as such (tn / (tn + fp)). - - This metric creates four local variables, `true_positives`, `true_negatives`, - `false_positives` and `false_negatives` that are used to compute the - specificity at the given sensitivity. The threshold for the given sensitivity - value is computed and used to evaluate the corresponding specificity. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is above the threshold predictions, - and computing the fraction of them for which `class_id` is indeed a correct - label. - - For additional information about specificity and sensitivity, see - [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). - - Args: - sensitivity: A scalar value in range `[0, 1]`. - num_thresholds: (Optional) Defaults to 200. The number of thresholds to - use for matching the given sensitivity. - class_id: (Optional) Integer class ID for which we want binary metrics. - This must be in the half-open interval `[0, num_classes)`, where - `num_classes` is the last dimension of predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.SpecificityAtSensitivity(0.5) - >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) - >>> m.result().numpy() - 0.66666667 - - >>> m.reset_state() - >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], - ... sample_weight=[1, 1, 2, 2, 2]) - >>> m.result().numpy() - 0.5 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.SpecificityAtSensitivity()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - sensitivity, - num_thresholds=200, - class_id=None, - name=None, - dtype=None): - if sensitivity < 0 or sensitivity > 1: - raise ValueError( - 'Argument `sensitivity` must be in the range [0, 1]. ' - f'Received: sensitivity={sensitivity}') - self.sensitivity = sensitivity - self.num_thresholds = num_thresholds - super().__init__( - sensitivity, - num_thresholds=num_thresholds, - class_id=class_id, - name=name, - dtype=dtype) - - def result(self): - sensitivities = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_negatives)) - specificities = tf.math.divide_no_nan( - self.true_negatives, - tf.math.add(self.true_negatives, self.false_positives)) - return self._find_max_under_constraint( - sensitivities, specificities, tf.greater_equal) - - def get_config(self): - config = { - 'num_thresholds': self.num_thresholds, - 'sensitivity': self.sensitivity - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.PrecisionAtRecall') -class PrecisionAtRecall(SensitivitySpecificityBase): - """Computes best precision where recall is >= specified value. - - This metric creates four local variables, `true_positives`, `true_negatives`, - `false_positives` and `false_negatives` that are used to compute the - precision at the given recall. The threshold for the given recall - value is computed and used to evaluate the corresponding precision. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is above the threshold predictions, - and computing the fraction of them for which `class_id` is indeed a correct - label. - - Args: - recall: A scalar value in range `[0, 1]`. - num_thresholds: (Optional) Defaults to 200. The number of thresholds to - use for matching the given recall. - class_id: (Optional) Integer class ID for which we want binary metrics. - This must be in the half-open interval `[0, num_classes)`, where - `num_classes` is the last dimension of predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.PrecisionAtRecall(0.5) - >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], - ... sample_weight=[2, 2, 2, 1, 1]) - >>> m.result().numpy() - 0.33333333 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.PrecisionAtRecall(recall=0.8)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - recall, - num_thresholds=200, - class_id=None, - name=None, - dtype=None): - if recall < 0 or recall > 1: - raise ValueError( - 'Argument `recall` must be in the range [0, 1]. ' - f'Received: recall={recall}') - self.recall = recall - self.num_thresholds = num_thresholds - super().__init__( - value=recall, - num_thresholds=num_thresholds, - class_id=class_id, - name=name, - dtype=dtype) - - def result(self): - recalls = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_negatives)) - precisions = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_positives)) - return self._find_max_under_constraint( - recalls, precisions, tf.greater_equal) - - def get_config(self): - config = {'num_thresholds': self.num_thresholds, 'recall': self.recall} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.RecallAtPrecision') -class RecallAtPrecision(SensitivitySpecificityBase): - """Computes best recall where precision is >= specified value. - - For a given score-label-distribution the required precision might not - be achievable, in this case 0.0 is returned as recall. - - This metric creates four local variables, `true_positives`, `true_negatives`, - `false_positives` and `false_negatives` that are used to compute the - recall at the given precision. The threshold for the given precision - value is computed and used to evaluate the corresponding recall. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is above the threshold predictions, - and computing the fraction of them for which `class_id` is indeed a correct - label. - - Args: - precision: A scalar value in range `[0, 1]`. - num_thresholds: (Optional) Defaults to 200. The number of thresholds to - use for matching the given precision. - class_id: (Optional) Integer class ID for which we want binary metrics. - This must be in the half-open interval `[0, num_classes)`, where - `num_classes` is the last dimension of predictions. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.RecallAtPrecision(0.8) - >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9], - ... sample_weight=[1, 0, 0, 1]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.RecallAtPrecision(precision=0.8)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - precision, - num_thresholds=200, - class_id=None, - name=None, - dtype=None): - if precision < 0 or precision > 1: - raise ValueError( - 'Argument `precision` must be in the range [0, 1]. ' - f'Received: precision={precision}') - self.precision = precision - self.num_thresholds = num_thresholds - super().__init__( - value=precision, - num_thresholds=num_thresholds, - class_id=class_id, - name=name, - dtype=dtype) - - def result(self): - precisions = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_positives)) - recalls = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_negatives)) - return self._find_max_under_constraint( - precisions, recalls, tf.greater_equal) - - def get_config(self): - config = {'num_thresholds': self.num_thresholds, - 'precision': self.precision} - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.AUC') -class AUC(base_metric.Metric): - """Approximates the AUC (Area under the curve) of the ROC or PR curves. - - The AUC (Area under the curve) of the ROC (Receiver operating - characteristic; default) or PR (Precision Recall) curves are quality measures - of binary classifiers. Unlike the accuracy, and like cross-entropy - losses, ROC-AUC and PR-AUC evaluate all the operational points of a model. - - This class approximates AUCs using a Riemann sum. During the metric - accumulation phrase, predictions are accumulated within predefined buckets - by value. The AUC is then computed by interpolating per-bucket averages. These - buckets define the evaluated operational points. - - This metric creates four local variables, `true_positives`, `true_negatives`, - `false_positives` and `false_negatives` that are used to compute the AUC. - To discretize the AUC curve, a linearly spaced set of thresholds is used to - compute pairs of recall and precision values. The area under the ROC-curve is - therefore computed using the height of the recall values by the false positive - rate, while the area under the PR-curve is the computed using the height of - the precision values by the recall. - - This value is ultimately returned as `auc`, an idempotent operation that - computes the area under a discretized curve of precision versus recall values - (computed using the aforementioned variables). The `num_thresholds` variable - controls the degree of discretization with larger numbers of thresholds more - closely approximating the true AUC. The quality of the approximation may vary - dramatically depending on `num_thresholds`. The `thresholds` parameter can be - used to manually specify thresholds which split the predictions more evenly. - - For a best approximation of the real AUC, `predictions` should be distributed - approximately uniformly in the range [0, 1] (if `from_logits=False`). The - quality of the AUC approximation may be poor if this is not the case. Setting - `summation_method` to 'minoring' or 'majoring' can help quantify the error in - the approximation by providing lower or upper bound estimate of the AUC. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - num_thresholds: (Optional) Defaults to 200. The number of thresholds to - use when discretizing the roc curve. Values must be > 1. - curve: (Optional) Specifies the name of the curve to be computed, 'ROC' - [default] or 'PR' for the Precision-Recall-curve. - summation_method: (Optional) Specifies the [Riemann summation method]( - https://en.wikipedia.org/wiki/Riemann_sum) used. - 'interpolation' (default) applies mid-point summation scheme for `ROC`. - For PR-AUC, interpolates (true/false) positives but not the ratio that - is precision (see Davis & Goadrich 2006 for details); - 'minoring' applies left summation - for increasing intervals and right summation for decreasing intervals; - 'majoring' does the opposite. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - thresholds: (Optional) A list of floating point values to use as the - thresholds for discretizing the curve. If set, the `num_thresholds` - parameter is ignored. Values should be in [0, 1]. Endpoint thresholds - equal to {-epsilon, 1+epsilon} for a small positive epsilon value will - be automatically included with these to correctly handle predictions - equal to exactly 0 or 1. - multi_label: boolean indicating whether multilabel data should be - treated as such, wherein AUC is computed separately for each label and - then averaged across labels, or (when False) if the data should be - flattened into a single label before AUC computation. In the latter - case, when multilabel data is passed to AUC, each label-prediction pair - is treated as an individual data point. Should be set to False for - multi-class data. - num_labels: (Optional) The number of labels, used when `multi_label` is - True. If `num_labels` is not specified, then state variables get created - on the first call to `update_state`. - label_weights: (Optional) list, array, or tensor of non-negative weights - used to compute AUCs for multilabel data. When `multi_label` is True, - the weights are applied to the individual label AUCs when they are - averaged to produce the multi-label AUC. When it's False, they are used - to weight the individual label predictions in computing the confusion - matrix on the flattened data. Note that this is unlike class_weights in - that class_weights weights the example depending on the value of its - label, whereas label_weights depends only on the index of that label - before flattening; therefore `label_weights` should not be used for - multi-class data. - from_logits: boolean indicating whether the predictions (`y_pred` in - `update_state`) are probabilities or sigmoid logits. As a rule of thumb, - when using a keras loss, the `from_logits` constructor argument of the - loss should match the AUC `from_logits` constructor argument. - - Standalone usage: - - >>> m = tf.keras.metrics.AUC(num_thresholds=3) - >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) - >>> # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7] - >>> # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] - >>> # tp_rate = recall = [1, 0.5, 0], fp_rate = [1, 0, 0] - >>> # auc = ((((1+0.5)/2)*(1-0)) + (((0.5+0)/2)*(0-0))) = 0.75 - >>> m.result().numpy() - 0.75 - - >>> m.reset_state() - >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9], - ... sample_weight=[1, 0, 0, 1]) - >>> m.result().numpy() - 1.0 - - Usage with `compile()` API: - - ```python - # Reports the AUC of a model outputting a probability. - model.compile(optimizer='sgd', - loss=tf.keras.losses.BinaryCrossentropy(), - metrics=[tf.keras.metrics.AUC()]) - - # Reports the AUC of a model outputting a logit. - model.compile(optimizer='sgd', - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - metrics=[tf.keras.metrics.AUC(from_logits=True)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - num_thresholds=200, - curve='ROC', - summation_method='interpolation', - name=None, - dtype=None, - thresholds=None, - multi_label=False, - num_labels=None, - label_weights=None, - from_logits=False): - # Validate configurations. - if isinstance(curve, metrics_utils.AUCCurve) and curve not in list( - metrics_utils.AUCCurve): - raise ValueError( - f'Invalid `curve` argument value "{curve}". ' - f'Expected one of: {list(metrics_utils.AUCCurve)}') - if isinstance( - summation_method, - metrics_utils.AUCSummationMethod) and summation_method not in list( - metrics_utils.AUCSummationMethod): - raise ValueError( - f'Invalid `summation_method` argument value "{summation_method}". ' - f'Expected one of: {list(metrics_utils.AUCSummationMethod)}') - - # Update properties. - self._init_from_thresholds = thresholds is not None - if thresholds is not None: - # If specified, use the supplied thresholds. - self.num_thresholds = len(thresholds) + 2 - thresholds = sorted(thresholds) - self._thresholds_distributed_evenly = ( - metrics_utils.is_evenly_distributed_thresholds( - np.array([0.0] + thresholds + [1.0]))) - else: - if num_thresholds <= 1: - raise ValueError('Argument `num_thresholds` must be an integer > 1. ' - f'Received: num_thresholds={num_thresholds}') - - # Otherwise, linearly interpolate (num_thresholds - 2) thresholds in - # (0, 1). - self.num_thresholds = num_thresholds - thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) - for i in range(num_thresholds - 2)] - self._thresholds_distributed_evenly = True - - # Add an endpoint "threshold" below zero and above one for either - # threshold method to account for floating point imprecisions. - self._thresholds = np.array([0.0 - backend.epsilon()] + thresholds + - [1.0 + backend.epsilon()]) - - if isinstance(curve, metrics_utils.AUCCurve): - self.curve = curve - else: - self.curve = metrics_utils.AUCCurve.from_str(curve) - if isinstance(summation_method, metrics_utils.AUCSummationMethod): - self.summation_method = summation_method - else: - self.summation_method = metrics_utils.AUCSummationMethod.from_str( - summation_method) - super().__init__(name=name, dtype=dtype) - - # Handle multilabel arguments. - self.multi_label = multi_label - if label_weights is not None: - label_weights = tf.constant(label_weights, dtype=self.dtype) - tf.debugging.assert_non_negative( - label_weights, - message='All values of `label_weights` must be non-negative.') - self.label_weights = label_weights - - else: - self.label_weights = None - - self._from_logits = from_logits - - self._built = False - if self.multi_label: - if num_labels: - shape = tf.TensorShape([None, num_labels]) - self._build(shape) - else: - if num_labels: - raise ValueError( - '`num_labels` is needed only when `multi_label` is True.') - self._build(None) - - @property - def thresholds(self): - """The thresholds used for evaluating AUC.""" - return list(self._thresholds) - - def _build(self, shape): - """Initialize TP, FP, TN, and FN tensors, given the shape of the data.""" - if self.multi_label: - if shape.ndims != 2: - raise ValueError( - '`y_true` must have rank 2 when `multi_label=True`. ' - f'Found rank {shape.ndims}. ' - f'Full shape received for `y_true`: {shape}') - self._num_labels = shape[1] - variable_shape = tf.TensorShape([self.num_thresholds, self._num_labels]) - else: - variable_shape = tf.TensorShape([self.num_thresholds]) - - self._build_input_shape = shape - # Create metric variables - self.true_positives = self.add_weight( - 'true_positives', - shape=variable_shape, - initializer='zeros') - self.true_negatives = self.add_weight( - 'true_negatives', - shape=variable_shape, - initializer='zeros') - self.false_positives = self.add_weight( - 'false_positives', - shape=variable_shape, - initializer='zeros') - self.false_negatives = self.add_weight( - 'false_negatives', - shape=variable_shape, - initializer='zeros') - - if self.multi_label: - with tf.init_scope(): - # This should only be necessary for handling v1 behavior. In v2, AUC - # should be initialized outside of any tf.functions, and therefore in - # eager mode. - if not tf.executing_eagerly(): - backend._initialize_variables(backend._get_session()) # pylint: disable=protected-access - - self._built = True - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates confusion matrix statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - if not self._built: - self._build(tf.TensorShape(y_pred.shape)) - - if self.multi_label or (self.label_weights is not None): - # y_true should have shape (number of examples, number of labels). - shapes = [ - (y_true, ('N', 'L')) - ] - if self.multi_label: - # TP, TN, FP, and FN should all have shape - # (number of thresholds, number of labels). - shapes.extend([(self.true_positives, ('T', 'L')), - (self.true_negatives, ('T', 'L')), - (self.false_positives, ('T', 'L')), - (self.false_negatives, ('T', 'L'))]) - if self.label_weights is not None: - # label_weights should be of length equal to the number of labels. - shapes.append((self.label_weights, ('L',))) - tf.debugging.assert_shapes( - shapes, message='Number of labels is not consistent.') - - # Only forward label_weights to update_confusion_matrix_variables when - # multi_label is False. Otherwise the averaging of individual label AUCs is - # handled in AUC.result - label_weights = None if self.multi_label else self.label_weights - - if self._from_logits: - y_pred = activations.sigmoid(y_pred) - - return metrics_utils.update_confusion_matrix_variables( - { - metrics_utils.ConfusionMatrix.TRUE_POSITIVES: - self.true_positives, - metrics_utils.ConfusionMatrix.TRUE_NEGATIVES: - self.true_negatives, - metrics_utils.ConfusionMatrix.FALSE_POSITIVES: - self.false_positives, - metrics_utils.ConfusionMatrix.FALSE_NEGATIVES: - self.false_negatives, - }, - y_true, - y_pred, - self._thresholds, - thresholds_distributed_evenly=self._thresholds_distributed_evenly, - sample_weight=sample_weight, - multi_label=self.multi_label, - label_weights=label_weights) - - def interpolate_pr_auc(self): - """Interpolation formula inspired by section 4 of Davis & Goadrich 2006. - - https://www.biostat.wisc.edu/~page/rocpr.pdf - - Note here we derive & use a closed formula not present in the paper - as follows: - - Precision = TP / (TP + FP) = TP / P - - Modeling all of TP (true positive), FP (false positive) and their sum - P = TP + FP (predicted positive) as varying linearly within each interval - [A, B] between successive thresholds, we get - - Precision slope = dTP / dP - = (TP_B - TP_A) / (P_B - P_A) - = (TP - TP_A) / (P - P_A) - Precision = (TP_A + slope * (P - P_A)) / P - - The area within the interval is (slope / total_pos_weight) times - - int_A^B{Precision.dP} = int_A^B{(TP_A + slope * (P - P_A)) * dP / P} - int_A^B{Precision.dP} = int_A^B{slope * dP + intercept * dP / P} - - where intercept = TP_A - slope * P_A = TP_B - slope * P_B, resulting in - - int_A^B{Precision.dP} = TP_B - TP_A + intercept * log(P_B / P_A) - - Bringing back the factor (slope / total_pos_weight) we'd put aside, we get - - slope * [dTP + intercept * log(P_B / P_A)] / total_pos_weight - - where dTP == TP_B - TP_A. - - Note that when P_A == 0 the above calculation simplifies into - - int_A^B{Precision.dTP} = int_A^B{slope * dTP} = slope * (TP_B - TP_A) - - which is really equivalent to imputing constant precision throughout the - first bucket having >0 true positives. - - Returns: - pr_auc: an approximation of the area under the P-R curve. - """ - dtp = self.true_positives[:self.num_thresholds - - 1] - self.true_positives[1:] - p = tf.math.add(self.true_positives, self.false_positives) - dp = p[:self.num_thresholds - 1] - p[1:] - prec_slope = tf.math.divide_no_nan( - dtp, tf.maximum(dp, 0), name='prec_slope') - intercept = self.true_positives[1:] - tf.multiply(prec_slope, p[1:]) - - safe_p_ratio = tf.where( - tf.logical_and(p[:self.num_thresholds - 1] > 0, p[1:] > 0), - tf.math.divide_no_nan( - p[:self.num_thresholds - 1], - tf.maximum(p[1:], 0), - name='recall_relative_ratio'), - tf.ones_like(p[1:])) - - pr_auc_increment = tf.math.divide_no_nan( - prec_slope * (dtp + intercept * tf.math.log(safe_p_ratio)), - tf.maximum(self.true_positives[1:] + self.false_negatives[1:], 0), - name='pr_auc_increment') - - if self.multi_label: - by_label_auc = tf.reduce_sum( - pr_auc_increment, name=self.name + '_by_label', axis=0) - if self.label_weights is None: - # Evenly weighted average of the label AUCs. - return tf.reduce_mean(by_label_auc, name=self.name) - else: - # Weighted average of the label AUCs. - return tf.math.divide_no_nan( - tf.reduce_sum( - tf.multiply(by_label_auc, self.label_weights)), - tf.reduce_sum(self.label_weights), - name=self.name) - else: - return tf.reduce_sum(pr_auc_increment, name='interpolate_pr_auc') - - def result(self): - if (self.curve == metrics_utils.AUCCurve.PR and - self.summation_method == metrics_utils.AUCSummationMethod.INTERPOLATION - ): - # This use case is different and is handled separately. - return self.interpolate_pr_auc() - - # Set `x` and `y` values for the curves based on `curve` config. - recall = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_negatives)) - if self.curve == metrics_utils.AUCCurve.ROC: - fp_rate = tf.math.divide_no_nan( - self.false_positives, - tf.math.add(self.false_positives, self.true_negatives)) - x = fp_rate - y = recall - else: # curve == 'PR'. - precision = tf.math.divide_no_nan( - self.true_positives, - tf.math.add(self.true_positives, self.false_positives)) - x = recall - y = precision - - # Find the rectangle heights based on `summation_method`. - if self.summation_method == metrics_utils.AUCSummationMethod.INTERPOLATION: - # Note: the case ('PR', 'interpolation') has been handled above. - heights = (y[:self.num_thresholds - 1] + y[1:]) / 2. - elif self.summation_method == metrics_utils.AUCSummationMethod.MINORING: - heights = tf.minimum(y[:self.num_thresholds - 1], y[1:]) - else: # self.summation_method = metrics_utils.AUCSummationMethod.MAJORING: - heights = tf.maximum(y[:self.num_thresholds - 1], y[1:]) - - # Sum up the areas of all the rectangles. - if self.multi_label: - riemann_terms = tf.multiply(x[:self.num_thresholds - 1] - x[1:], heights) - by_label_auc = tf.reduce_sum( - riemann_terms, name=self.name + '_by_label', axis=0) - - if self.label_weights is None: - # Unweighted average of the label AUCs. - return tf.reduce_mean(by_label_auc, name=self.name) - else: - # Weighted average of the label AUCs. - return tf.math.divide_no_nan( - tf.reduce_sum( - tf.multiply(by_label_auc, self.label_weights)), - tf.reduce_sum(self.label_weights), - name=self.name) - else: - return tf.reduce_sum( - tf.multiply(x[:self.num_thresholds - 1] - x[1:], heights), - name=self.name) - - def reset_state(self): - if self._built: - confusion_matrix_variables = (self.true_positives, self.true_negatives, - self.false_positives, self.false_negatives) - if self.multi_label: - backend.batch_set_value( - [(v, np.zeros((self.num_thresholds, self._num_labels))) - for v in confusion_matrix_variables]) - else: - backend.batch_set_value([(v, np.zeros((self.num_thresholds,))) - for v in confusion_matrix_variables]) - - def get_config(self): - if is_tensor_or_variable(self.label_weights): - label_weights = backend.eval(self.label_weights) - else: - label_weights = self.label_weights - config = { - 'num_thresholds': self.num_thresholds, - 'curve': self.curve.value, - 'summation_method': self.summation_method.value, - 'multi_label': self.multi_label, - 'label_weights': label_weights - } - # optimization to avoid serializing a large number of generated thresholds - if self._init_from_thresholds: - # We remove the endpoint thresholds as an inverse of how the thresholds - # were initialized. This ensures that a metric initialized from this - # config has the same thresholds. - config['thresholds'] = self.thresholds[1:-1] - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.CosineSimilarity') -class CosineSimilarity(base_metric.MeanMetricWrapper): - """Computes the cosine similarity between the labels and predictions. - - `cosine similarity = (a . b) / ||a|| ||b||` - - See: [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity). - - This metric keeps the average cosine similarity between `predictions` and - `labels` over a stream of data. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - axis: (Optional) Defaults to -1. The dimension along which the cosine - similarity is computed. - - Standalone usage: - - >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] - >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] - >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] - >>> # result = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) - >>> # = ((0. + 0.) + (0.5 + 0.5)) / 2 - >>> m = tf.keras.metrics.CosineSimilarity(axis=1) - >>> m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]]) - >>> m.result().numpy() - 0.49999997 - - >>> m.reset_state() - >>> m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]], - ... sample_weight=[0.3, 0.7]) - >>> m.result().numpy() - 0.6999999 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.CosineSimilarity(axis=1)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='cosine_similarity', dtype=None, axis=-1): - super().__init__( - cosine_similarity, name, dtype=dtype, axis=axis) - - -@keras_export('keras.metrics.MeanAbsoluteError') -class MeanAbsoluteError(base_metric.MeanMetricWrapper): - """Computes the mean absolute error between the labels and predictions. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.MeanAbsoluteError() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 0.25 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.5 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.MeanAbsoluteError()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='mean_absolute_error', dtype=None): - super().__init__( - mean_absolute_error, name, dtype=dtype) - - -@keras_export('keras.metrics.MeanAbsolutePercentageError') -class MeanAbsolutePercentageError(base_metric.MeanMetricWrapper): - """Computes the mean absolute percentage error between `y_true` and `y_pred`. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.MeanAbsolutePercentageError() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 250000000.0 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 500000000.0 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.MeanAbsolutePercentageError()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='mean_absolute_percentage_error', dtype=None): - super().__init__( - mean_absolute_percentage_error, name, dtype=dtype) - - -@keras_export('keras.metrics.MeanSquaredError') -class MeanSquaredError(base_metric.MeanMetricWrapper): - """Computes the mean squared error between `y_true` and `y_pred`. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.MeanSquaredError() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 0.25 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.5 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.MeanSquaredError()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='mean_squared_error', dtype=None): - super().__init__( - mean_squared_error, name, dtype=dtype) - - -@keras_export('keras.metrics.MeanSquaredLogarithmicError') -class MeanSquaredLogarithmicError(base_metric.MeanMetricWrapper): - """Computes the mean squared logarithmic error between `y_true` and `y_pred`. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.MeanSquaredLogarithmicError() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 0.12011322 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.24022643 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.MeanSquaredLogarithmicError()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='mean_squared_logarithmic_error', dtype=None): - super().__init__( - mean_squared_logarithmic_error, name, dtype=dtype) - - -@keras_export('keras.metrics.Hinge') -class Hinge(base_metric.MeanMetricWrapper): - """Computes the hinge metric between `y_true` and `y_pred`. - - `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are - provided we will convert them to -1 or 1. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.Hinge() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) - >>> m.result().numpy() - 1.3 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 1.1 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', loss='mse', metrics=[tf.keras.metrics.Hinge()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='hinge', dtype=None): - super().__init__(hinge, name, dtype=dtype) - - -@keras_export('keras.metrics.SquaredHinge') -class SquaredHinge(base_metric.MeanMetricWrapper): - """Computes the squared hinge metric between `y_true` and `y_pred`. - - `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are - provided we will convert them to -1 or 1. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.SquaredHinge() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) - >>> m.result().numpy() - 1.86 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 1.46 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.SquaredHinge()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='squared_hinge', dtype=None): - super().__init__(squared_hinge, name, dtype=dtype) - - -@keras_export('keras.metrics.CategoricalHinge') -class CategoricalHinge(base_metric.MeanMetricWrapper): - """Computes the categorical hinge metric between `y_true` and `y_pred`. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.CategoricalHinge() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) - >>> m.result().numpy() - 1.4000001 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 1.2 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.CategoricalHinge()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='categorical_hinge', dtype=None): - super().__init__(categorical_hinge, name, dtype=dtype) - - -@keras_export('keras.metrics.RootMeanSquaredError') -class RootMeanSquaredError(base_metric.Mean): - """Computes root mean squared error metric between `y_true` and `y_pred`. - - Standalone usage: - - >>> m = tf.keras.metrics.RootMeanSquaredError() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 0.5 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.70710677 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.RootMeanSquaredError()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='root_mean_squared_error', dtype=None): - super().__init__(name, dtype=dtype) - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates root mean squared error statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - y_true = tf.cast(y_true, self._dtype) - y_pred = tf.cast(y_pred, self._dtype) - y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( - y_pred, y_true) - error_sq = tf.math.squared_difference(y_pred, y_true) - return super().update_state( - error_sq, sample_weight=sample_weight) - - def result(self): - return tf.sqrt(tf.math.divide_no_nan(self.total, self.count)) - - -@keras_export('keras.metrics.LogCoshError') -class LogCoshError(base_metric.MeanMetricWrapper): - """Computes the logarithm of the hyperbolic cosine of the prediction error. - - `logcosh = log((exp(x) + exp(-x))/2)`, where x is the error (y_pred - y_true) - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.LogCoshError() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 0.10844523 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.21689045 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.LogCoshError()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='logcosh', dtype=None): - super().__init__(logcosh, name, dtype=dtype) - - -@keras_export('keras.metrics.Poisson') -class Poisson(base_metric.MeanMetricWrapper): - """Computes the Poisson metric between `y_true` and `y_pred`. - - `metric = y_pred - y_true * log(y_pred)` - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.Poisson() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) - >>> m.result().numpy() - 0.49999997 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.99999994 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.Poisson()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='poisson', dtype=None): - super().__init__(poisson, name, dtype=dtype) - - -@keras_export('keras.metrics.KLDivergence') -class KLDivergence(base_metric.MeanMetricWrapper): - """Computes Kullback-Leibler divergence metric between `y_true` and `y_pred`. - - `metric = y_true * log(y_true / y_pred)` - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.KLDivergence() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) - >>> m.result().numpy() - 0.45814306 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.9162892 - - Usage with `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.KLDivergence()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, name='kullback_leibler_divergence', dtype=None): - super().__init__( - kullback_leibler_divergence, name, dtype=dtype) - - -class _IoUBase(base_metric.Metric): - """Computes the confusion matrix for Intersection-Over-Union metrics. - - Intersection-Over-Union is a common evaluation metric for semantic image - segmentation. - - For an individual class, the IoU metric is defined as follows: - - ``` - iou = true_positives / (true_positives + false_positives + false_negatives) - ``` - - From IoUs of individual classes, the MeanIoU can be computed as the mean of - the individual IoUs. - - To compute IoUs, the predictions are accumulated in a confusion matrix, - weighted by `sample_weight` and the metric is then calculated from it. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Args: - num_classes: The possible number of labels the prediction task can have. - This value must be provided, since a confusion matrix of size - `(num_classes, num_classes)` will be allocated. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - """ - - def __init__(self, num_classes, name=None, dtype=None): - super().__init__(name=name, dtype=dtype) - self.num_classes = num_classes - - # Variable to accumulate the predictions in the confusion matrix. - self.total_cm = self.add_weight( - 'total_confusion_matrix', - shape=(num_classes, num_classes), - initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates the confusion matrix statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - - y_true = tf.cast(y_true, self._dtype) - y_pred = tf.cast(y_pred, self._dtype) - - # Flatten the input if its rank > 1. - if y_pred.shape.ndims > 1: - y_pred = tf.reshape(y_pred, [-1]) - - if y_true.shape.ndims > 1: - y_true = tf.reshape(y_true, [-1]) - - if sample_weight is not None: - sample_weight = tf.cast(sample_weight, self._dtype) - if sample_weight.shape.ndims > 1: - sample_weight = tf.reshape(sample_weight, [-1]) - - # Accumulate the prediction to current confusion matrix. - current_cm = tf.math.confusion_matrix( - y_true, - y_pred, - self.num_classes, - weights=sample_weight, - dtype=self._dtype) - return self.total_cm.assign_add(current_cm) - - def reset_state(self): - backend.set_value( - self.total_cm, np.zeros((self.num_classes, self.num_classes))) - - -@keras_export('keras.metrics.IoU') -class IoU(_IoUBase): - """Computes the Intersection-Over-Union metric for specific target classes. - - General definition and computation: - - Intersection-Over-Union is a common evaluation metric for semantic image - segmentation. - - For an individual class, the IoU metric is defined as follows: - - ``` - iou = true_positives / (true_positives + false_positives + false_negatives) - ``` - - To compute IoUs, the predictions are accumulated in a confusion matrix, - weighted by `sample_weight` and the metric is then calculated from it. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Note, this class first computes IoUs for all individual classes, then returns - the mean of IoUs for the classes that are specified by `target_class_ids`. If - `target_class_ids` has only one id value, the IoU of that specific class is - returned. - - Args: - num_classes: The possible number of labels the prediction task can have. - A confusion matrix of dimension = [num_classes, num_classes] will be - allocated to accumulate predictions from which the metric is calculated. - target_class_ids: A tuple or list of target class ids for which the metric - is returned. To compute IoU for a specific class, a list (or tuple) of a - single id value should be provided. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> # cm = [[1, 1], - >>> # [1, 1]] - >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] - >>> # iou = true_positives / (sum_row + sum_col - true_positives)) - >>> # iou = [0.33, 0.33] - >>> m = tf.keras.metrics.IoU(num_classes=2, target_class_ids=[0]) - >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) - >>> m.result().numpy() - 0.33333334 - - >>> m.reset_state() - >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1], - ... sample_weight=[0.3, 0.3, 0.3, 0.1]) - >>> # cm = [[0.3, 0.3], - >>> # [0.3, 0.1]] - >>> # sum_row = [0.6, 0.4], sum_col = [0.6, 0.4], true_positives = [0.3, 0.1] - >>> # iou = [0.33, 0.14] - >>> m.result().numpy() - 0.33333334 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.IoU(num_classes=2, target_class_ids=[0])]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__( - self, - num_classes: int, - target_class_ids: Union[List[int], Tuple[int, ...]], - name=None, - dtype=None, - ): - super().__init__( - name=name, - num_classes=num_classes, - dtype=dtype, - ) - if max(target_class_ids) >= num_classes: - raise ValueError( - f'Target class id {max(target_class_ids)} is out of range, which is ' - f'[{0}, {num_classes}).') - self.target_class_ids = list(target_class_ids) - - def result(self): - """Compute the intersection-over-union via the confusion matrix.""" - sum_over_row = tf.cast( - tf.reduce_sum(self.total_cm, axis=0), dtype=self._dtype) - sum_over_col = tf.cast( - tf.reduce_sum(self.total_cm, axis=1), dtype=self._dtype) - true_positives = tf.cast( - tf.linalg.tensor_diag_part(self.total_cm), dtype=self._dtype) - - # sum_over_row + sum_over_col = - # 2 * true_positives + false_positives + false_negatives. - denominator = sum_over_row + sum_over_col - true_positives - - # Only keep the target classes - true_positives = tf.gather(true_positives, self.target_class_ids) - denominator = tf.gather(denominator, self.target_class_ids) - - # If the denominator is 0, we need to ignore the class. - num_valid_entries = tf.reduce_sum( - tf.cast(tf.not_equal(denominator, 0), dtype=self._dtype)) - - iou = tf.math.divide_no_nan(true_positives, denominator) - - return tf.math.divide_no_nan( - tf.reduce_sum(iou, name='mean_iou'), num_valid_entries) - - def get_config(self): - config = { - 'num_classes': self.num_classes, - 'target_class_ids': self.target_class_ids, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_export('keras.metrics.BinaryIoU') -class BinaryIoU(IoU): - """Computes the Intersection-Over-Union metric for class 0 and/or 1. - - General definition and computation: - - Intersection-Over-Union is a common evaluation metric for semantic image - segmentation. - - For an individual class, the IoU metric is defined as follows: - - ``` - iou = true_positives / (true_positives + false_positives + false_negatives) - ``` - - To compute IoUs, the predictions are accumulated in a confusion matrix, - weighted by `sample_weight` and the metric is then calculated from it. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - This class can be used to compute IoUs for a binary classification task where - the predictions are provided as logits. First a `threshold` is applied to the - predicted values such that those that are below the `threshold` are converted - to class 0 and those that are above the `threshold` are converted to class 1. - - IoUs for classes 0 and 1 are then computed, the mean of IoUs for the classes - that are specified by `target_class_ids` is returned. - - Note: with `threshold=0`, this metric has the same behavior as `IoU`. - - Args: - target_class_ids: A tuple or list of target class ids for which the metric - is returned. Options are `[0]`, `[1]`, or `[0, 1]`. With `[0]` (or `[1]`), - the IoU metric for class 0 (or class 1, respectively) is returned. With - `[0, 1]`, the mean of IoUs for the two classes is returned. - threshold: A threshold that applies to the prediction logits to convert them - to either predicted class 0 if the logit is below `threshold` or predicted - class 1 if the logit is above `threshold`. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> m = tf.keras.metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) - >>> m.update_state([0, 1, 0, 1], [0.1, 0.2, 0.4, 0.7]) - >>> m.result().numpy() - 0.33333334 - - >>> m.reset_state() - >>> m.update_state([0, 1, 0, 1], [0.1, 0.2, 0.4, 0.7], - ... sample_weight=[0.2, 0.3, 0.4, 0.1]) - >>> # cm = [[0.2, 0.4], - >>> # [0.3, 0.1]] - >>> # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] - >>> # iou = [0.222, 0.125] - >>> m.result().numpy() - 0.17361112 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.BinaryIoU(target_class_ids=[0], threshold=0.5)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__( - self, - target_class_ids: Union[List[int], Tuple[int, ...]] = (0, 1), - threshold=0.5, - name=None, - dtype=None, - ): - - super().__init__( - num_classes=2, - target_class_ids=target_class_ids, - name=name, - dtype=dtype, - ) - self.threshold = threshold - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates the confusion matrix statistics. - - Before the confusion matrix is updated, the predicted values are thresholded - to be: - 0 for values that are smaller than the `threshold` - 1 for values that are larger or equal to the `threshold` - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - y_pred = tf.cast(y_pred, self._dtype) - y_pred = tf.cast(y_pred >= self.threshold, self._dtype) - return super().update_state(y_true, y_pred, sample_weight) - - def get_config(self): - return { - 'target_class_ids': self.target_class_ids, - 'threshold': self.threshold, - 'name': self.name, - 'dtype': self._dtype, - } - - -@keras_export('keras.metrics.MeanIoU') -class MeanIoU(IoU): - """Computes the mean Intersection-Over-Union metric. - - General definition and computation: - - Intersection-Over-Union is a common evaluation metric for semantic image - segmentation. - - For an individual class, the IoU metric is defined as follows: - - ``` - iou = true_positives / (true_positives + false_positives + false_negatives) - ``` - - To compute IoUs, the predictions are accumulated in a confusion matrix, - weighted by `sample_weight` and the metric is then calculated from it. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - Note that this class first computes IoUs for all individual classes, then - returns the mean of these values. - - Args: - num_classes: The possible number of labels the prediction task can have. - This value must be provided, since a confusion matrix of dimension = - [num_classes, num_classes] will be allocated. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> # cm = [[1, 1], - >>> # [1, 1]] - >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] - >>> # iou = true_positives / (sum_row + sum_col - true_positives)) - >>> # result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 = 0.33 - >>> m = tf.keras.metrics.MeanIoU(num_classes=2) - >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) - >>> m.result().numpy() - 0.33333334 - - >>> m.reset_state() - >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1], - ... sample_weight=[0.3, 0.3, 0.3, 0.1]) - >>> m.result().numpy() - 0.23809525 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.MeanIoU(num_classes=2)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, num_classes, name=None, dtype=None): - target_class_ids = list(range(num_classes)) - super().__init__( - name=name, - num_classes=num_classes, - target_class_ids=target_class_ids, - dtype=dtype, - ) - - def get_config(self): - return { - 'num_classes': self.num_classes, - 'name': self.name, - 'dtype': self._dtype, - } - - -@keras_export('keras.metrics.OneHotIoU') -class OneHotIoU(IoU): - """Computes the Intersection-Over-Union metric for one-hot encoded labels. - - General definition and computation: - - Intersection-Over-Union is a common evaluation metric for semantic image - segmentation. - - For an individual class, the IoU metric is defined as follows: - - ``` - iou = true_positives / (true_positives + false_positives + false_negatives) - ``` - - To compute IoUs, the predictions are accumulated in a confusion matrix, - weighted by `sample_weight` and the metric is then calculated from it. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - This class can be used to compute IoU for multi-class classification tasks - where the labels are one-hot encoded (the last axis should have one dimension - per class). Note that the predictions should also have the same shape. To - compute the IoU, first the labels and predictions are converted back into - integer format by taking the argmax over the class axis. Then the same - computation steps as for the base `IoU` class apply. - - Note, if there is only one channel in the labels and predictions, this class - is the same as class `IoU`. In this case, use `IoU` instead. - - Also, make sure that `num_classes` is equal to the number of classes in the - data, to avoid a "labels out of bound" error when the confusion matrix is - computed. - - Args: - num_classes: The possible number of labels the prediction task can have. - A confusion matrix of shape `(num_classes, num_classes)` will be - allocated to accumulate predictions from which the metric is calculated. - target_class_ids: A tuple or list of target class ids for which the metric - is returned. To compute IoU for a specific class, a list (or tuple) of a - single id value should be provided. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) - >>> y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], - ... [0.1, 0.4, 0.5]]) - >>> sample_weight = [0.1, 0.2, 0.3, 0.4] - >>> m = tf.keras.metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) - >>> m.update_state(y_true=y_true, y_pred=y_pred, sample_weight=sample_weight) - >>> # cm = [[0, 0, 0.2+0.4], - >>> # [0.3, 0, 0], - >>> # [0, 0, 0.1]] - >>> # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] - >>> # true_positives = [0, 0, 0.1] - >>> # single_iou = true_positives / (sum_row + sum_col - true_positives)) - >>> # mean_iou = (0 / (0.3 + 0.6 - 0) + 0.1 / (0.7 + 0.1 - 0.1)) / 2 - >>> m.result().numpy() - 0.071 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.OneHotIoU(num_classes=3, target_class_id=[1])]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__( - self, - num_classes: int, - target_class_ids: Union[List[int], Tuple[int, ...]], - name=None, - dtype=None, - ): - super().__init__( - num_classes=num_classes, - target_class_ids=target_class_ids, - name=name, - dtype=dtype, - ) - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates the confusion matrix statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - # Select max hot-encoding channels to convert into all-class format - y_true = tf.argmax(y_true, axis=-1, output_type=tf.int32) - y_pred = tf.argmax(y_pred, axis=-1, output_type=tf.int32) - - return super().update_state(y_true, y_pred, sample_weight) - - -@keras_export('keras.metrics.OneHotMeanIoU') -class OneHotMeanIoU(MeanIoU): - """Computes mean Intersection-Over-Union metric for one-hot encoded labels. - - General definition and computation: - - Intersection-Over-Union is a common evaluation metric for semantic image - segmentation. - - For an individual class, the IoU metric is defined as follows: - - ``` - iou = true_positives / (true_positives + false_positives + false_negatives) - ``` - - To compute IoUs, the predictions are accumulated in a confusion matrix, - weighted by `sample_weight` and the metric is then calculated from it. - - If `sample_weight` is `None`, weights default to 1. - Use `sample_weight` of 0 to mask values. - - This class can be used to compute the mean IoU for multi-class classification - tasks where the labels are one-hot encoded (the last axis should have one - dimension per class). Note that the predictions should also have the same - shape. To compute the mean IoU, first the labels and predictions are converted - back into integer format by taking the argmax over the class axis. Then the - same computation steps as for the base `MeanIoU` class apply. - - Note, if there is only one channel in the labels and predictions, this class - is the same as class `MeanIoU`. In this case, use `MeanIoU` instead. - - Also, make sure that `num_classes` is equal to the number of classes in the - data, to avoid a "labels out of bound" error when the confusion matrix is - computed. - - Args: - num_classes: The possible number of labels the prediction task can have. - A confusion matrix of shape `(num_classes, num_classes)` will be - allocated to accumulate predictions from which the metric is calculated. - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - - Standalone usage: - - >>> y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) - >>> y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], - ... [0.1, 0.4, 0.5]]) - >>> sample_weight = [0.1, 0.2, 0.3, 0.4] - >>> m = tf.keras.metrics.OneHotMeanIoU(num_classes=3) - >>> m.update_state(y_true=y_true, y_pred=y_pred, sample_weight=sample_weight) - >>> # cm = [[0, 0, 0.2+0.4], - >>> # [0.3, 0, 0], - >>> # [0, 0, 0.1]] - >>> # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] - >>> # true_positives = [0, 0, 0.1] - >>> # single_iou = true_positives / (sum_row + sum_col - true_positives)) - >>> # mean_iou = (0 + 0 + 0.1 / (0.7 + 0.1 - 0.1)) / 3 - >>> m.result().numpy() - 0.048 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.OneHotMeanIoU(num_classes=3)]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__( - self, - num_classes: int, - name=None, - dtype=None, - ): - super().__init__( - num_classes=num_classes, - name=name, - dtype=dtype, - ) - - def update_state(self, y_true, y_pred, sample_weight=None): - """Accumulates the confusion matrix statistics. - - Args: - y_true: The ground truth values. - y_pred: The predicted values. - sample_weight: Optional weighting of each example. Defaults to 1. Can be a - `Tensor` whose rank is either 0, or the same rank as `y_true`, and must - be broadcastable to `y_true`. - - Returns: - Update op. - """ - # Select max hot-encoding channels to convert into all-class format - y_true = tf.argmax(y_true, axis=-1, output_type=tf.int32) - y_pred = tf.argmax(y_pred, axis=-1, output_type=tf.int32) - - return super().update_state(y_true, y_pred, sample_weight) - - -@keras_export('keras.metrics.BinaryCrossentropy') -class BinaryCrossentropy(base_metric.MeanMetricWrapper): - """Computes the crossentropy metric between the labels and predictions. - - This is the crossentropy metric class to be used when there are only two - label classes (0 and 1). - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - from_logits: (Optional )Whether output is expected to be a logits tensor. - By default, we consider that output encodes a probability distribution. - label_smoothing: (Optional) Float in [0, 1]. When > 0, label values are - smoothed, meaning the confidence on label values are relaxed. - e.g. `label_smoothing=0.2` means that we will use a value of `0.1` for - label `0` and `0.9` for label `1`". - - Standalone usage: - - >>> m = tf.keras.metrics.BinaryCrossentropy() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) - >>> m.result().numpy() - 0.81492424 - - >>> m.reset_state() - >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], - ... sample_weight=[1, 0]) - >>> m.result().numpy() - 0.9162905 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.BinaryCrossentropy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - name='binary_crossentropy', - dtype=None, - from_logits=False, - label_smoothing=0): - super().__init__( - binary_crossentropy, - name, - dtype=dtype, - from_logits=from_logits, - label_smoothing=label_smoothing) - - -@keras_export('keras.metrics.CategoricalCrossentropy') -class CategoricalCrossentropy(base_metric.MeanMetricWrapper): - """Computes the crossentropy metric between the labels and predictions. - - This is the crossentropy metric class to be used when there are multiple - label classes (2 or more). Here we assume that labels are given as a `one_hot` - representation. eg., When labels values are [2, 0, 1], - `y_true` = [[0, 0, 1], [1, 0, 0], [0, 1, 0]]. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - from_logits: (Optional) Whether output is expected to be a logits tensor. - By default, we consider that output encodes a probability distribution. - label_smoothing: (Optional) Float in [0, 1]. When > 0, label values are - smoothed, meaning the confidence on label values are relaxed. e.g. - `label_smoothing=0.2` means that we will use a value of `0.1` for label - `0` and `0.9` for label `1`" - - Standalone usage: - - >>> # EPSILON = 1e-7, y = y_true, y` = y_pred - >>> # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - >>> # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - >>> # xent = -sum(y * log(y'), axis = -1) - >>> # = -((log 0.95), (log 0.1)) - >>> # = [0.051, 2.302] - >>> # Reduced xent = (0.051 + 2.302) / 2 - >>> m = tf.keras.metrics.CategoricalCrossentropy() - >>> m.update_state([[0, 1, 0], [0, 0, 1]], - ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) - >>> m.result().numpy() - 1.1769392 - - >>> m.reset_state() - >>> m.update_state([[0, 1, 0], [0, 0, 1]], - ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]], - ... sample_weight=tf.constant([0.3, 0.7])) - >>> m.result().numpy() - 1.6271976 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.CategoricalCrossentropy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - name='categorical_crossentropy', - dtype=None, - from_logits=False, - label_smoothing=0): - super().__init__( - categorical_crossentropy, - name, - dtype=dtype, - from_logits=from_logits, - label_smoothing=label_smoothing) - - -@keras_export('keras.metrics.SparseCategoricalCrossentropy') -class SparseCategoricalCrossentropy(base_metric.MeanMetricWrapper): - """Computes the crossentropy metric between the labels and predictions. - - Use this crossentropy metric when there are two or more label classes. - We expect labels to be provided as integers. If you want to provide labels - using `one-hot` representation, please use `CategoricalCrossentropy` metric. - There should be `# classes` floating point values per feature for `y_pred` - and a single floating point value per feature for `y_true`. - - In the snippet below, there is a single floating point value per example for - `y_true` and `# classes` floating pointing values per example for `y_pred`. - The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is - `[batch_size, num_classes]`. - - Args: - name: (Optional) string name of the metric instance. - dtype: (Optional) data type of the metric result. - from_logits: (Optional) Whether output is expected to be a logits tensor. - By default, we consider that output encodes a probability distribution. - axis: (Optional) Defaults to -1. The dimension along which the metric is - computed. - - Standalone usage: - - >>> # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] - >>> # logits = log(y_pred) - >>> # softmax = exp(logits) / sum(exp(logits), axis=-1) - >>> # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - >>> # xent = -sum(y * log(softmax), 1) - >>> # log(softmax) = [[-2.9957, -0.0513, -16.1181], - >>> # [-2.3026, -0.2231, -2.3026]] - >>> # y_true * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] - >>> # xent = [0.0513, 2.3026] - >>> # Reduced xent = (0.0513 + 2.3026) / 2 - >>> m = tf.keras.metrics.SparseCategoricalCrossentropy() - >>> m.update_state([1, 2], - ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) - >>> m.result().numpy() - 1.1769392 - - >>> m.reset_state() - >>> m.update_state([1, 2], - ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]], - ... sample_weight=tf.constant([0.3, 0.7])) - >>> m.result().numpy() - 1.6271976 - - Usage with `compile()` API: - - ```python - model.compile( - optimizer='sgd', - loss='mse', - metrics=[tf.keras.metrics.SparseCategoricalCrossentropy()]) - ``` - """ - - @dtensor_utils.inject_mesh - def __init__(self, - name='sparse_categorical_crossentropy', - dtype=None, - from_logits=False, - axis=-1): - super().__init__( - sparse_categorical_crossentropy, - name, - dtype=dtype, - from_logits=from_logits, - axis=axis) - - -SparseCategoricalCrossentropy.update_state.__doc__ = _SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING - - -def accuracy(y_true, y_pred): - [y_pred, y_true], _ = \ - metrics_utils.ragged_assert_compatible_and_get_flat_values( - [y_pred, y_true]) - y_true.shape.assert_is_compatible_with(y_pred.shape) - if y_true.dtype != y_pred.dtype: - y_pred = tf.cast(y_pred, y_true.dtype) - return tf.cast(tf.equal(y_true, y_pred), backend.floatx()) - - -@keras_export('keras.metrics.binary_accuracy') -@tf.__internal__.dispatch.add_dispatch_support -def binary_accuracy(y_true, y_pred, threshold=0.5): - """Calculates how often predictions match binary labels. - - Standalone usage: - >>> y_true = [[1], [1], [0], [0]] - >>> y_pred = [[1], [1], [0], [0]] - >>> m = tf.keras.metrics.binary_accuracy(y_true, y_pred) - >>> assert m.shape == (4,) - >>> m.numpy() - array([1., 1., 1., 1.], dtype=float32) - - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - threshold: (Optional) Float representing the threshold for deciding whether - prediction values are 1 or 0. - - Returns: - Binary accuracy values. shape = `[batch_size, d0, .. dN-1]` - """ - # Note: calls metrics_utils.binary_matches with mean reduction. This maintains - # public facing binary_accuracy behavior and seperates it from the vital - # behavior of the binary_matches method needed in backend dependencies. - - return tf.reduce_mean( - metrics_utils.binary_matches(y_true, y_pred, threshold), axis=-1) - - -@keras_export('keras.metrics.categorical_accuracy') -@tf.__internal__.dispatch.add_dispatch_support -def categorical_accuracy(y_true, y_pred): - """Calculates how often predictions match one-hot labels. - - Standalone usage: - >>> y_true = [[0, 0, 1], [0, 1, 0]] - >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] - >>> m = tf.keras.metrics.categorical_accuracy(y_true, y_pred) - >>> assert m.shape == (2,) - >>> m.numpy() - array([0., 1.], dtype=float32) - - You can provide logits of classes as `y_pred`, since argmax of - logits and probabilities are same. - - Args: - y_true: One-hot ground truth values. - y_pred: The prediction values. - - Returns: - Categorical accuracy values. - """ - # Note: wraps metrics_utils.categorical_matches. This seperates public facing - # categorical_accuracy behavior from the vital behavior of the - # categorical_matches method needed in backend dependencies. - - return metrics_utils.sparse_categorical_matches( - tf.math.argmax(y_true, axis=-1), y_pred) - - -@keras_export('keras.metrics.sparse_categorical_accuracy') -@tf.__internal__.dispatch.add_dispatch_support -def sparse_categorical_accuracy(y_true, y_pred): - """Calculates how often predictions match integer labels. - - Standalone usage: - >>> y_true = [2, 1] - >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] - >>> m = tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred) - >>> assert m.shape == (2,) - >>> m.numpy() - array([0., 1.], dtype=float32) - - You can provide logits of classes as `y_pred`, since argmax of - logits and probabilities are same. - - Args: - y_true: Integer ground truth values. - y_pred: The prediction values. - - Returns: - Sparse categorical accuracy values. - """ - # Note: wraps metrics_utils.sparse_categorical_matches method and checks for - # squeezing to align with expected public facing behavior. This seperates - # public facing sparse_categorical_accuracy behavior from the vital behavior - # of the sparse_categorical_matches method needed in backend dependencies. - - matches = metrics_utils.sparse_categorical_matches(y_true, y_pred) - - # if shape is (num_samples, 1) squeeze - if matches.shape.ndims > 1 and matches.shape[-1] == 1: - matches = tf.squeeze(matches, [-1]) - - return matches - - -@keras_export('keras.metrics.top_k_categorical_accuracy') -@tf.__internal__.dispatch.add_dispatch_support -def top_k_categorical_accuracy(y_true, y_pred, k=5): - """Computes how often targets are in the top `K` predictions. - - Standalone usage: - >>> y_true = [[0, 0, 1], [0, 1, 0]] - >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] - >>> m = tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=3) - >>> assert m.shape == (2,) - >>> m.numpy() - array([1., 1.], dtype=float32) - - Args: - y_true: The ground truth values. - y_pred: The prediction values. - k: (Optional) Number of top elements to look at for computing accuracy. - Defaults to 5. - - Returns: - Top K categorical accuracy value. - """ - # Note: wraps metrics_utils.top_k_categorical_matches. This seperates - # public facing top_k_categorical_accuracy behavior from the vital behavior - # of the top_k_categorical_matches method needed in backend dependencies. - - return metrics_utils.sparse_top_k_categorical_matches( - tf.math.argmax(y_true, axis=-1), y_pred, k) - - -@keras_export('keras.metrics.sparse_top_k_categorical_accuracy') -@tf.__internal__.dispatch.add_dispatch_support -def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): - """Computes how often integer targets are in the top `K` predictions. - - Standalone usage: - >>> y_true = [2, 1] - >>> y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] - >>> m = tf.keras.metrics.sparse_top_k_categorical_accuracy( - ... y_true, y_pred, k=3) - >>> assert m.shape == (2,) - >>> m.numpy() - array([1., 1.], dtype=float32) - - Args: - y_true: tensor of true targets. - y_pred: tensor of predicted targets. - k: (Optional) Number of top elements to look at for computing accuracy. - Defaults to 5. - - Returns: - Sparse top K categorical accuracy value. - """ - # Note: wraps metrics_utils.sparse_top_k_categorical_matches. This seperates - # public facing sparse_top_k_categorical_accuracy behavior from the vital - # behavior of the sparse_top_k_categorical_matches method needed in backend - # dependencies. - - return metrics_utils.sparse_top_k_categorical_matches(y_true, y_pred, k) - - -def cosine_similarity(y_true, y_pred, axis=-1): - """Computes the cosine similarity between labels and predictions. - - Args: - y_true: The ground truth values. - y_pred: The prediction values. - axis: (Optional) Defaults to -1. The dimension along which the cosine - similarity is computed. - - Returns: - Cosine similarity value. - """ - y_true = tf.linalg.l2_normalize(y_true, axis=axis) - y_pred = tf.linalg.l2_normalize(y_pred, axis=axis) - return tf.reduce_sum(y_true * y_pred, axis=axis) diff --git a/keras/metrics/metrics_correctness_test.py b/keras/metrics/metrics_correctness_test.py index a3566d39df8c..6532a151252f 100644 --- a/keras/metrics/metrics_correctness_test.py +++ b/keras/metrics/metrics_correctness_test.py @@ -14,700 +14,810 @@ # ============================================================================== """Tests metrics correctness using Keras model.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations from keras import layers from keras import losses from keras import metrics +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import losses_utils def get_multi_io_model(): - inp_1 = layers.Input(shape=(1,), name='input_1') - inp_2 = layers.Input(shape=(1,), name='input_2') - x = layers.Dense(3, kernel_initializer='ones', trainable=False) - out_1 = layers.Dense( - 1, kernel_initializer='ones', name='output_1', trainable=False) - out_2 = layers.Dense( - 1, kernel_initializer='ones', name='output_2', trainable=False) + inp_1 = layers.Input(shape=(1,), name="input_1") + inp_2 = layers.Input(shape=(1,), name="input_2") + x = layers.Dense(3, kernel_initializer="ones", trainable=False) + out_1 = layers.Dense( + 1, kernel_initializer="ones", name="output_1", trainable=False + ) + out_2 = layers.Dense( + 1, kernel_initializer="ones", name="output_2", trainable=False + ) - branch_a = [inp_1, x, out_1] - branch_b = [inp_2, x, out_2] - return test_utils.get_multi_io_model(branch_a, branch_b) + branch_a = [inp_1, x, out_1] + branch_b = [inp_2, x, out_2] + return test_utils.get_multi_io_model(branch_a, branch_b) def custom_generator_multi_io(sample_weights=None): - batch_size = 2 - num_samples = 5 - inputs = np.asarray([[1.], [2.], [3.], [4.], [5.]]) - targets_1 = np.asarray([[2.], [4.], [6.], [8.], [10.]]) - targets_2 = np.asarray([[1.], [2.], [3.], [4.], [5.]]) - start = 0 - while True: - if start > num_samples: - start = 0 - end = start + batch_size - x = [inputs[start:end], inputs[start:end]] - y = [targets_1[start:end], targets_2[start:end]] - if sample_weights: - sw = tf.nest.map_structure(lambda w: w[start:end], sample_weights) - else: - sw = None - start = end - yield x, y, sw - - -@test_combinations.run_with_all_model_types(exclude_models=['sequential']) + batch_size = 2 + num_samples = 5 + inputs = np.asarray([[1.0], [2.0], [3.0], [4.0], [5.0]]) + targets_1 = np.asarray([[2.0], [4.0], [6.0], [8.0], [10.0]]) + targets_2 = np.asarray([[1.0], [2.0], [3.0], [4.0], [5.0]]) + start = 0 + while True: + if start > num_samples: + start = 0 + end = start + batch_size + x = [inputs[start:end], inputs[start:end]] + y = [targets_1[start:end], targets_2[start:end]] + if sample_weights: + sw = tf.nest.map_structure(lambda w: w[start:end], sample_weights) + else: + sw = None + start = end + yield x, y, sw + + +@test_combinations.run_with_all_model_types(exclude_models=["sequential"]) @test_combinations.run_all_keras_modes(always_skip_v1=True) class TestMetricsCorrectnessMultiIO(test_combinations.TestCase): - - def _get_compiled_multi_io_model(self): - model = get_multi_io_model() - model.compile( - optimizer='rmsprop', - loss='mse', - metrics=[metrics.MeanSquaredError(name='mean_squared_error')], - weighted_metrics=[ - metrics.MeanSquaredError(name='mean_squared_error_2') - ], - run_eagerly=test_utils.should_run_eagerly()) - return model - - def setUp(self): - super(TestMetricsCorrectnessMultiIO, self).setUp() - self.x = np.asarray([[1.], [2.], [3.], [4.], [5.]]) - self.y1 = np.asarray([[2.], [4.], [6.], [8.], [10.]]) - self.y2 = np.asarray([[1.], [2.], [3.], [4.], [5.]]) - self.sample_weight_1 = np.asarray([2., 3., 4., 5., 6.]) - self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5, 3.]) - - # y_true_1 = [[2.], [4.], [6.], [8.], [10.]] - # y_pred_1 = [[3.], [6.], [9.], [12.], [15.]] - # y_true_2 = [[1.], [2.], [3.], [4.], [5.]] - # y_pred_2 = [[3.], [6.], [9.], [12.], [15.]] - - # Weighted metric `output_1`: - # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + - # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) + - # ((15 - 10)^2 * 6) - # = 280 - # Count = (2 + 3) + (4 + 5) + 6 = 20 - # Result = 14 - - # Weighted metric `output_2`: - # Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) + - # ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5) + - # (15 - 5)^2 * 3.0 - # = 440 - # Count = (3.5 + 2.5) + (1.5 + 0.5) + 3.0 = 11.0 - # Result = 40 - - # Loss `output_1` with weights: - # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + - # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) + - # ((15 - 10)^2 * 6) - # = 280 - # Count = 2 + 2 + 1 - # Result = 56 - - # Loss `output_1` without weights/Metric `output_1`: - # Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + (12 - 8)^2) + (15 - 10)^2 - # = 55 - # Count = 2 + 2 + 1 - # Result = 11 - - # Loss `output_2` with weights: - # Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) + - # ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5) + - # (15 - 5)^2 * 3.0 - # = 440 - # Count = 2 + 2 + 1 - # Result = 88 - - # Loss `output_2` without weights/Metric `output_2`: - # Total = ((3 - 1)^2 + (6 - 2)^2) + ((9 - 3)^2 + (12 - 4)^2) + (15 - 5)^2 - # = 220 - # Count = 2 + 2 + 1 - # Result = 44 - - # Total loss with weights = 56 + 88 = 144 - # Total loss without weights = 11 + 44 = 55 - - self.wmse = 'mean_squared_error_2' - self.expected_fit_result_with_weights = { - 'output_1_mean_squared_error': [11, 11], - 'output_2_mean_squared_error': [44, 44], - 'output_1_' + self.wmse: [14, 14], - 'output_2_' + self.wmse: [40, 40], - 'loss': [144, 144], - 'output_1_loss': [56, 56], - 'output_2_loss': [88, 88], - } - - self.expected_fit_result_with_weights_output_2 = { - 'output_1_mean_squared_error': [11, 11], - 'output_2_mean_squared_error': [44, 44], - 'output_1_' + self.wmse: [11, 11], - 'output_2_' + self.wmse: [40, 40], - 'loss': [99, 99], - 'output_1_loss': [11, 11], - 'output_2_loss': [88, 88], - } - - self.expected_fit_result = { - 'output_1_mean_squared_error': [11, 11], - 'output_2_mean_squared_error': [44, 44], - 'output_1_' + self.wmse: [11, 11], - 'output_2_' + self.wmse: [44, 44], - 'loss': [55, 55], - 'output_1_loss': [11, 11], - 'output_2_loss': [44, 44], - } - - # In the order: 'loss', 'output_1_loss', 'output_2_loss', - # 'output_1_mean_squared_error', 'output_1_mean_squared_error_2', - # 'output_2_mean_squared_error', 'output_2_mean_squared_error_2' - self.expected_batch_result_with_weights = [144, 56, 88, 11, 14, 44, 40] - self.expected_batch_result_with_weights_output_2 = [ - 99, 11, 88, 11, 11, 44, 40 - ] - self.expected_batch_result = [55, 11, 44, 11, 11, 44, 44] - - def test_fit(self): - model = self._get_compiled_multi_io_model() - history = model.fit([self.x, self.x], [self.y1, self.y2], - batch_size=2, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_fit_with_sample_weight(self): - model = self._get_compiled_multi_io_model() - history = model.fit([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }, - batch_size=2, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - # Set weights for one output (use batch size). - history = model.fit([self.x, self.x], [self.y1, self.y2], - sample_weight={'output_2': self.sample_weight_2}, - batch_size=2, - epochs=2, - shuffle=False) - - for key, value in self.expected_fit_result_with_weights_output_2.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_eval(self): - model = self._get_compiled_multi_io_model() - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=2) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) - - def test_eval_with_sample_weight(self): - model = self._get_compiled_multi_io_model() - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=2, - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(eval_result, self.expected_batch_result_with_weights, - 1e-3) - - # Set weights for one output. - model = self._get_compiled_multi_io_model() - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=2, - sample_weight={ - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(eval_result, - self.expected_batch_result_with_weights_output_2, 1e-3) - - # Verify that metric value is same with arbitrary weights and batch size. - x = np.random.random((50, 1)) - y = np.random.random((50, 1)) - w = np.random.random((50,)) - mse1 = model.evaluate([x, x], [y, y], sample_weight=[w, w], batch_size=5)[3] - mse2 = model.evaluate([x, x], [y, y], sample_weight=[w, w], - batch_size=10)[3] - self.assertAllClose(mse1, mse2, 1e-3) - - def test_train_on_batch(self): - model = self._get_compiled_multi_io_model() - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2]) - self.assertAllClose(result, self.expected_batch_result, 1e-3) - - def test_train_on_batch_with_sample_weight(self): - model = self._get_compiled_multi_io_model() - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - # Set weights for one output. - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, - self.expected_batch_result_with_weights_output_2, 1e-3) - - def test_test_on_batch(self): - model = self._get_compiled_multi_io_model() - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2]) - self.assertAllClose(result, self.expected_batch_result, 1e-3) - - def test_test_on_batch_with_sample_weight(self): - model = self._get_compiled_multi_io_model() - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - # Set weights for one output. - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, - self.expected_batch_result_with_weights_output_2, 1e-3) - - def test_fit_generator(self): - model = self._get_compiled_multi_io_model() - history = model.fit_generator( - custom_generator_multi_io(), steps_per_epoch=3, epochs=2) - for key, value in self.expected_fit_result.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_fit_generator_with_sample_weight(self): - model = self._get_compiled_multi_io_model() - history = model.fit_generator( - custom_generator_multi_io( - sample_weights=[self.sample_weight_1, self.sample_weight_2]), - steps_per_epoch=3, - epochs=2) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - # Set weights for one output. - history = model.fit_generator( - custom_generator_multi_io( - sample_weights={'output_2': self.sample_weight_2}), - steps_per_epoch=3, - epochs=2) - for key, value in self.expected_fit_result_with_weights_output_2.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_eval_generator(self): - model = self._get_compiled_multi_io_model() - eval_result = model.evaluate_generator(custom_generator_multi_io(), steps=3) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) - - def test_eval_generator_with_sample_weight(self): - model = self._get_compiled_multi_io_model() - eval_result = model.evaluate_generator( - custom_generator_multi_io( - sample_weights=[self.sample_weight_1, self.sample_weight_2]), - steps=3) - self.assertAllClose(eval_result, self.expected_batch_result_with_weights, - 1e-3) - - # Set weights for one output. - eval_result = model.evaluate_generator( - custom_generator_multi_io( - sample_weights={'output_2': self.sample_weight_2}), - steps=3) - self.assertAllClose(eval_result, - self.expected_batch_result_with_weights_output_2, 1e-3) + def _get_compiled_multi_io_model(self): + model = get_multi_io_model() + model.compile( + optimizer="rmsprop", + loss="mse", + metrics=[metrics.MeanSquaredError(name="mean_squared_error")], + weighted_metrics=[ + metrics.MeanSquaredError(name="mean_squared_error_2") + ], + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + def setUp(self): + super(TestMetricsCorrectnessMultiIO, self).setUp() + self.x = np.asarray([[1.0], [2.0], [3.0], [4.0], [5.0]]) + self.y1 = np.asarray([[2.0], [4.0], [6.0], [8.0], [10.0]]) + self.y2 = np.asarray([[1.0], [2.0], [3.0], [4.0], [5.0]]) + self.sample_weight_1 = np.asarray([2.0, 3.0, 4.0, 5.0, 6.0]) + self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5, 3.0]) + + # y_true_1 = [[2.], [4.], [6.], [8.], [10.]] + # y_pred_1 = [[3.], [6.], [9.], [12.], [15.]] + # y_true_2 = [[1.], [2.], [3.], [4.], [5.]] + # y_pred_2 = [[3.], [6.], [9.], [12.], [15.]] + + # Weighted metric `output_1`: + # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + + # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) + + # ((15 - 10)^2 * 6) + # = 280 + # Count = (2 + 3) + (4 + 5) + 6 = 20 + # Result = 14 + + # Weighted metric `output_2`: + # Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) + + # ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5) + + # (15 - 5)^2 * 3.0 + # = 440 + # Count = (3.5 + 2.5) + (1.5 + 0.5) + 3.0 = 11.0 + # Result = 40 + + # Loss `output_1` with weights: + # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + + # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) + + # ((15 - 10)^2 * 6) + # = 280 + # Count = 2 + 2 + 1 + # Result = 56 + + # Loss `output_1` without weights/Metric `output_1`: + # Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + \ + # (12 - 8)^2) + (15 - 10)^2 + # = 55 + # Count = 2 + 2 + 1 + # Result = 11 + + # Loss `output_2` with weights: + # Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) + + # ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5) + + # (15 - 5)^2 * 3.0 + # = 440 + # Count = 2 + 2 + 1 + # Result = 88 + + # Loss `output_2` without weights/Metric `output_2`: + # Total = ((3 - 1)^2 + (6 - 2)^2) + ((9 - 3)^2 + \ + # (12 - 4)^2) + (15 - 5)^2 + # = 220 + # Count = 2 + 2 + 1 + # Result = 44 + + # Total loss with weights = 56 + 88 = 144 + # Total loss without weights = 11 + 44 = 55 + + self.wmse = "mean_squared_error_2" + self.expected_fit_result_with_weights = { + "output_1_mean_squared_error": [11, 11], + "output_2_mean_squared_error": [44, 44], + "output_1_" + self.wmse: [14, 14], + "output_2_" + self.wmse: [40, 40], + "loss": [144, 144], + "output_1_loss": [56, 56], + "output_2_loss": [88, 88], + } + + self.expected_fit_result_with_weights_output_2 = { + "output_1_mean_squared_error": [11, 11], + "output_2_mean_squared_error": [44, 44], + "output_1_" + self.wmse: [11, 11], + "output_2_" + self.wmse: [40, 40], + "loss": [99, 99], + "output_1_loss": [11, 11], + "output_2_loss": [88, 88], + } + + self.expected_fit_result = { + "output_1_mean_squared_error": [11, 11], + "output_2_mean_squared_error": [44, 44], + "output_1_" + self.wmse: [11, 11], + "output_2_" + self.wmse: [44, 44], + "loss": [55, 55], + "output_1_loss": [11, 11], + "output_2_loss": [44, 44], + } + + # In the order: 'loss', 'output_1_loss', 'output_2_loss', + # 'output_1_mean_squared_error', 'output_1_mean_squared_error_2', + # 'output_2_mean_squared_error', 'output_2_mean_squared_error_2' + self.expected_batch_result_with_weights = [144, 56, 88, 11, 14, 44, 40] + self.expected_batch_result_with_weights_output_2 = [ + 99, + 11, + 88, + 11, + 11, + 44, + 40, + ] + self.expected_batch_result = [55, 11, 44, 11, 11, 44, 44] + + def test_fit(self): + model = self._get_compiled_multi_io_model() + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + batch_size=2, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + batch_size=2, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + # Set weights for one output (use batch size). + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={"output_2": self.sample_weight_2}, + batch_size=2, + epochs=2, + shuffle=False, + ) + + for ( + key, + value, + ) in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_eval(self): + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate( + [self.x, self.x], [self.y1, self.y2], batch_size=2 + ) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + def test_eval_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate( + [self.x, self.x], + [self.y1, self.y2], + batch_size=2, + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights, 1e-3 + ) + + # Set weights for one output. + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate( + [self.x, self.x], + [self.y1, self.y2], + batch_size=2, + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights_output_2, 1e-3 + ) + + # Verify that metric value is same with arbitrary weights and batch + # size. + x = np.random.random((50, 1)) + y = np.random.random((50, 1)) + w = np.random.random((50,)) + mse1 = model.evaluate( + [x, x], [y, y], sample_weight=[w, w], batch_size=5 + )[3] + mse2 = model.evaluate( + [x, x], [y, y], sample_weight=[w, w], batch_size=10 + )[3] + self.assertAllClose(mse1, mse2, 1e-3) + + def test_train_on_batch(self): + model = self._get_compiled_multi_io_model() + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2]) + self.assertAllClose(result, self.expected_batch_result, 1e-3) + + def test_train_on_batch_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + result = model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + # Set weights for one output. + result = model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights_output_2, 1e-3 + ) + + def test_test_on_batch(self): + model = self._get_compiled_multi_io_model() + result = model.test_on_batch([self.x, self.x], [self.y1, self.y2]) + self.assertAllClose(result, self.expected_batch_result, 1e-3) + + def test_test_on_batch_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + result = model.test_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + # Set weights for one output. + result = model.test_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights_output_2, 1e-3 + ) + + def test_fit_generator(self): + model = self._get_compiled_multi_io_model() + history = model.fit_generator( + custom_generator_multi_io(), steps_per_epoch=3, epochs=2 + ) + for key, value in self.expected_fit_result.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_generator_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + history = model.fit_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2] + ), + steps_per_epoch=3, + epochs=2, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + # Set weights for one output. + history = model.fit_generator( + custom_generator_multi_io( + sample_weights={"output_2": self.sample_weight_2} + ), + steps_per_epoch=3, + epochs=2, + ) + for ( + key, + value, + ) in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_eval_generator(self): + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate_generator( + custom_generator_multi_io(), steps=3 + ) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + def test_eval_generator_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2] + ), + steps=3, + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights, 1e-3 + ) + + # Set weights for one output. + eval_result = model.evaluate_generator( + custom_generator_multi_io( + sample_weights={"output_2": self.sample_weight_2} + ), + steps=3, + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights_output_2, 1e-3 + ) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes(always_skip_v1=True) class TestMetricsCorrectnessSingleIO(test_combinations.TestCase): - - def _get_model(self): - x = layers.Dense(3, kernel_initializer='ones', trainable=False) - out = layers.Dense( - 1, kernel_initializer='ones', name='output', trainable=False) - model = test_utils.get_model_from_layers([x, out], input_shape=(1,)) - model.compile( - optimizer='rmsprop', - loss='mse', - metrics=[metrics.MeanSquaredError(name='mean_squared_error')], - weighted_metrics=[ - metrics.MeanSquaredError(name='mean_squared_error_2') - ], - run_eagerly=test_utils.should_run_eagerly()) - return model - - def _custom_generator(self, sample_weight=None): - batch_size = 2 - num_samples = 4 - x = np.asarray([[1.], [2.], [3.], [4.]]) - y = np.asarray([[2.], [4.], [6.], [8.]]) - w = sample_weight - i = 0 - - while True: - batch_index = i * batch_size % num_samples - i += 1 - start = batch_index - end = start + batch_size - yield x[start:end], y[start:end], None if w is None else w[start:end] - - def setUp(self): - super(TestMetricsCorrectnessSingleIO, self).setUp() - self.x = np.asarray([[1.], [2.], [3.], [4.]]) - self.y = np.asarray([[2.], [4.], [6.], [8.]]) - self.sample_weight = np.asarray([2., 3., 4., 5.]) - self.class_weight = {i: 1 for i in range(10)} - self.class_weight.update({2: 2, 4: 3, 6: 4, 8: 5}) - - # y_true = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]] - - # Metric: - # Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + (12 - 8)^2) = 30, - # Count = 2 + 2 - # Result = 7.5 - - # Weighted metric: - # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + - # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) - # = 130 - # Count = (2 + 3) + (4 + 5) - # Result = 9.2857141 - - # Total loss with weights: - # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + - # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) - # = 130, - # Count = 2 + 2 - # Result = 32.5 - - # Total loss without weights: - # Total = ((3 - 2)^2 + (6 - 4)^2) + - # ((9 - 6)^2 + (12 - 8)^2) - # = 30, - # Count = 2 + 2 - # Result = 7.5 - - wmse = 'mean_squared_error_2' - - self.expected_fit_result_with_weights = { - 'mean_squared_error': [7.5, 7.5], - wmse: [9.286, 9.286], - 'loss': [32.5, 32.5] - } - - self.expected_fit_result = { - 'mean_squared_error': [7.5, 7.5], - wmse: [7.5, 7.5], - 'loss': [7.5, 7.5] - } - - # In the order: 'loss', 'mean_squared_error', 'mean_squared_error_2' - self.expected_batch_result_with_weights = [32.5, 7.5, 9.286] - self.expected_batch_result = [7.5, 7.5, 7.5] - - def test_fit(self): - model = self._get_model() - - history = model.fit( - self.x, - self.y, - batch_size=2, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_fit_with_sample_weight(self): - model = self._get_model() - history = model.fit( - self.x, - self.y, - sample_weight=self.sample_weight, - batch_size=2, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_fit_with_class_weight(self): - model = self._get_model() - history = model.fit( - self.x, - self.y, - class_weight=self.class_weight, - batch_size=2, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_eval(self): - model = self._get_model() - eval_result = model.evaluate(self.x, self.y, batch_size=2) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) - - def test_eval_with_sample_weight(self): - model = self._get_model() - eval_result = model.evaluate( - self.x, self.y, batch_size=2, sample_weight=self.sample_weight) - self.assertAllClose(eval_result, self.expected_batch_result_with_weights, - 1e-3) - - # Verify that metric value is same with arbitrary weights and batch size. - x = np.random.random((50, 1)) - y = np.random.random((50, 1)) - w = np.random.random((50,)) - mse1 = model.evaluate(x, y, sample_weight=w, batch_size=5)[1] - mse2 = model.evaluate(x, y, sample_weight=w, batch_size=10)[1] - self.assertAllClose(mse1, mse2, 1e-3) - - def test_train_on_batch(self): - model = self._get_model() - result = model.train_on_batch(self.x, self.y) - self.assertAllClose(result, self.expected_batch_result, 1e-3) - - def test_train_on_batch_with_sample_weight(self): - model = self._get_model() - result = model.train_on_batch( - self.x, self.y, sample_weight=self.sample_weight) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - def test_train_on_batch_with_class_weight(self): - model = self._get_model() - result = model.train_on_batch( - self.x, self.y, class_weight=self.class_weight) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - def test_test_on_batch(self): - model = self._get_model() - result = model.test_on_batch(self.x, self.y) - self.assertAllClose(result, self.expected_batch_result, 1e-3) - - def test_test_on_batch_with_sample_weight(self): - model = self._get_model() - result = model.test_on_batch( - self.x, self.y, sample_weight=self.sample_weight) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - def test_fit_generator(self): - model = self._get_model() - history = model.fit_generator( - self._custom_generator(), steps_per_epoch=2, epochs=2) - for key, value in self.expected_fit_result.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_fit_generator_with_sample_weight(self): - model = self._get_model() - history = model.fit_generator( - self._custom_generator(sample_weight=self.sample_weight), - steps_per_epoch=2, - epochs=2) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_fit_generator_with_class_weight(self): - model = self._get_model() - history = model.fit_generator( - self._custom_generator(), - steps_per_epoch=2, - epochs=2, - class_weight=self.class_weight) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - def test_eval_generator(self): - model = self._get_model() - eval_result = model.evaluate_generator(self._custom_generator(), steps=2) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) - - def test_eval_generator_with_sample_weight(self): - model = self._get_model() - eval_result = model.evaluate_generator( - self._custom_generator(sample_weight=self.sample_weight), steps=2) - self.assertAllClose(eval_result, self.expected_batch_result_with_weights, - 1e-3) - - -@test_combinations.run_with_all_model_types(exclude_models=['sequential']) + def _get_model(self): + x = layers.Dense(3, kernel_initializer="ones", trainable=False) + out = layers.Dense( + 1, kernel_initializer="ones", name="output", trainable=False + ) + model = test_utils.get_model_from_layers([x, out], input_shape=(1,)) + model.compile( + optimizer="rmsprop", + loss="mse", + metrics=[metrics.MeanSquaredError(name="mean_squared_error")], + weighted_metrics=[ + metrics.MeanSquaredError(name="mean_squared_error_2") + ], + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + def _custom_generator(self, sample_weight=None): + batch_size = 2 + num_samples = 4 + x = np.asarray([[1.0], [2.0], [3.0], [4.0]]) + y = np.asarray([[2.0], [4.0], [6.0], [8.0]]) + w = sample_weight + i = 0 + + while True: + batch_index = i * batch_size % num_samples + i += 1 + start = batch_index + end = start + batch_size + yield x[start:end], y[start:end], None if w is None else w[ + start:end + ] + + def setUp(self): + super(TestMetricsCorrectnessSingleIO, self).setUp() + self.x = np.asarray([[1.0], [2.0], [3.0], [4.0]]) + self.y = np.asarray([[2.0], [4.0], [6.0], [8.0]]) + self.sample_weight = np.asarray([2.0, 3.0, 4.0, 5.0]) + self.class_weight = {i: 1 for i in range(10)} + self.class_weight.update({2: 2, 4: 3, 6: 4, 8: 5}) + + # y_true = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]] + + # Metric: + # Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + (12 - 8)^2) = 30, + # Count = 2 + 2 + # Result = 7.5 + + # Weighted metric: + # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + + # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) + # = 130 + # Count = (2 + 3) + (4 + 5) + # Result = 9.2857141 + + # Total loss with weights: + # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + + # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) + # = 130, + # Count = 2 + 2 + # Result = 32.5 + + # Total loss without weights: + # Total = ((3 - 2)^2 + (6 - 4)^2) + + # ((9 - 6)^2 + (12 - 8)^2) + # = 30, + # Count = 2 + 2 + # Result = 7.5 + + wmse = "mean_squared_error_2" + + self.expected_fit_result_with_weights = { + "mean_squared_error": [7.5, 7.5], + wmse: [9.286, 9.286], + "loss": [32.5, 32.5], + } + + self.expected_fit_result = { + "mean_squared_error": [7.5, 7.5], + wmse: [7.5, 7.5], + "loss": [7.5, 7.5], + } + + # In the order: 'loss', 'mean_squared_error', 'mean_squared_error_2' + self.expected_batch_result_with_weights = [32.5, 7.5, 9.286] + self.expected_batch_result = [7.5, 7.5, 7.5] + + def test_fit(self): + model = self._get_model() + + history = model.fit( + self.x, self.y, batch_size=2, epochs=2, shuffle=False + ) + for key, value in self.expected_fit_result.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_with_sample_weight(self): + model = self._get_model() + history = model.fit( + self.x, + self.y, + sample_weight=self.sample_weight, + batch_size=2, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_with_class_weight(self): + model = self._get_model() + history = model.fit( + self.x, + self.y, + class_weight=self.class_weight, + batch_size=2, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_eval(self): + model = self._get_model() + eval_result = model.evaluate(self.x, self.y, batch_size=2) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + def test_eval_with_sample_weight(self): + model = self._get_model() + eval_result = model.evaluate( + self.x, self.y, batch_size=2, sample_weight=self.sample_weight + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights, 1e-3 + ) + + # Verify that metric value is same with arbitrary weights and batch + # size. + x = np.random.random((50, 1)) + y = np.random.random((50, 1)) + w = np.random.random((50,)) + mse1 = model.evaluate(x, y, sample_weight=w, batch_size=5)[1] + mse2 = model.evaluate(x, y, sample_weight=w, batch_size=10)[1] + self.assertAllClose(mse1, mse2, 1e-3) + + def test_train_on_batch(self): + model = self._get_model() + result = model.train_on_batch(self.x, self.y) + self.assertAllClose(result, self.expected_batch_result, 1e-3) + + def test_train_on_batch_with_sample_weight(self): + model = self._get_model() + result = model.train_on_batch( + self.x, self.y, sample_weight=self.sample_weight + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + def test_train_on_batch_with_class_weight(self): + model = self._get_model() + result = model.train_on_batch( + self.x, self.y, class_weight=self.class_weight + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + def test_test_on_batch(self): + model = self._get_model() + result = model.test_on_batch(self.x, self.y) + self.assertAllClose(result, self.expected_batch_result, 1e-3) + + def test_test_on_batch_with_sample_weight(self): + model = self._get_model() + result = model.test_on_batch( + self.x, self.y, sample_weight=self.sample_weight + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + def test_fit_generator(self): + model = self._get_model() + history = model.fit_generator( + self._custom_generator(), steps_per_epoch=2, epochs=2 + ) + for key, value in self.expected_fit_result.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_generator_with_sample_weight(self): + model = self._get_model() + history = model.fit_generator( + self._custom_generator(sample_weight=self.sample_weight), + steps_per_epoch=2, + epochs=2, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_generator_with_class_weight(self): + model = self._get_model() + history = model.fit_generator( + self._custom_generator(), + steps_per_epoch=2, + epochs=2, + class_weight=self.class_weight, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_eval_generator(self): + model = self._get_model() + eval_result = model.evaluate_generator( + self._custom_generator(), steps=2 + ) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + def test_eval_generator_with_sample_weight(self): + model = self._get_model() + eval_result = model.evaluate_generator( + self._custom_generator(sample_weight=self.sample_weight), steps=2 + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights, 1e-3 + ) + + +@test_combinations.run_with_all_model_types(exclude_models=["sequential"]) @test_combinations.run_all_keras_modes(always_skip_v1=True) -@parameterized.parameters([ - losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, - losses_utils.ReductionV2.AUTO, - losses_utils.ReductionV2.SUM -]) +@parameterized.parameters( + [ + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + losses_utils.ReductionV2.AUTO, + losses_utils.ReductionV2.SUM, + ] +) class TestOutputLossMetrics(test_combinations.TestCase): - - def _get_compiled_multi_io_model(self, loss): - model = get_multi_io_model() - model.compile( - optimizer='rmsprop', - loss=loss, - run_eagerly=test_utils.should_run_eagerly()) - return model - - def setUp(self): - super(TestOutputLossMetrics, self).setUp() - self.x = np.asarray([[1.], [2.], [3.], [4.], [5.]]) - self.y1 = np.asarray([[2.], [4.], [6.], [8.], [10.]]) - self.y2 = np.asarray([[1.], [2.], [3.], [4.], [5.]]) - self.sample_weight_1 = np.asarray([2., 3., 4., 5., 6.]) - self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5, 3.]) - - # y_true_1 = [[2.], [4.], [6.], [8.], [10.]] - # y_pred_1 = [[3.], [6.], [9.], [12.], [15.]] - # y_true_2 = [[1.], [2.], [3.], [4.], [5.]] - # y_pred_2 = [[3.], [6.], [9.], [12.], [15.]] - - # Loss `output_1`: - # Per-sample weighted losses - # Batch 1 = [(3 - 2)^2 * 2, (6 - 4)^2 * 3)] = [2, 12] - # Batch 2 = [((9 - 6)^2 * 4, (12 - 8)^2 * 5)] = [36, 80] - # Batch 3 = [(15 - 10)^2 * 6] = [150] - - # Result (reduction=SUM) = ((2 + 12)*2 + (36 + 80)*2 + 150) / 5 = 82 - # Result (reduction=SUM_OVER_BATCH_SIZE/AUTO/NONE) = 280 / 5 = 56 - - # Loss `output_2`: - # Per-sample weighted losses - # Batch 1 = [(3 - 1)^2 * 3.5, (6 - 2)^2 * 2.5)] = [14, 40] - # Batch 2 = [(9 - 3)^2 * 1.5, (12 - 4)^2 * 0.5)] = [54, 32] - # Batch 3 = [(15 - 5)^2 * 3] = [300] - - # Result (reduction=SUM) = ((14 + 40)*2 + (54 + 32)*2 + 300) / 5 = 116 - # Result (reduction=SUM_OVER_BATCH_SIZE/AUTO/NONE) = 440 / 5 = 88 - - # When reduction is 'NONE' loss value that is passed to the optimizer will - # be vector loss but what is reported is a scalar, which is an average of - # all the values in all the batch vectors. - - # Total loss = Output_loss_1 + Output_loss_2 - - sum_over_batch_size_fit_result = { - 'loss': [144, 144], - 'output_1_loss': [56, 56], - 'output_2_loss': [88, 88], - } - - self.expected_fit_result = { - losses_utils.ReductionV2.NONE: - sum_over_batch_size_fit_result, - losses_utils.ReductionV2.SUM: { - 'loss': [198, 198], - 'output_1_loss': [82, 82], - 'output_2_loss': [116, 116], - }, - losses_utils.ReductionV2.AUTO: - sum_over_batch_size_fit_result, - losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: - sum_over_batch_size_fit_result, - } - - # In the order: 'loss', 'output_1_loss', 'output_2_loss', - self.expected_batch_result = { - losses_utils.ReductionV2.NONE: [144, 56, 88], - losses_utils.ReductionV2.SUM: [198, 82, 116], - losses_utils.ReductionV2.AUTO: [144, 56, 88], - losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: [144, 56, 88], - } - - # 2 + 12 + 36 + 80 + 150 = 280 - # 14 + 40 + 54 + 32 + 300 = 440 - self.expected_single_batch_result = [720, 280, 440] - - def test_fit(self, reduction): - model = self._get_compiled_multi_io_model( - loss=losses.MeanSquaredError(reduction=reduction)) - history = model.fit([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }, - batch_size=2, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result[reduction].items(): - self.assertAllClose(history.history[key], value) - - def test_eval(self, reduction): - model = self._get_compiled_multi_io_model( - loss=losses.MeanSquaredError(reduction=reduction)) - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=2, - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(eval_result, self.expected_batch_result[reduction]) - - def test_train_on_batch(self, reduction): - model = self._get_compiled_multi_io_model( - loss=losses.MeanSquaredError(reduction=reduction)) - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - - expected_values = self.expected_batch_result[reduction] - if reduction == losses_utils.ReductionV2.SUM: - expected_values = self.expected_single_batch_result - self.assertAllClose(result, expected_values) - - def test_test_on_batch(self, reduction): - model = self._get_compiled_multi_io_model( - loss=losses.MeanSquaredError(reduction=reduction)) - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - expected_values = self.expected_batch_result[reduction] - if reduction == losses_utils.ReductionV2.SUM: - expected_values = self.expected_single_batch_result - self.assertAllClose(result, expected_values) - - def test_fit_generator(self, reduction): - model = self._get_compiled_multi_io_model( - loss=losses.MeanSquaredError(reduction=reduction)) - history = model.fit_generator( - custom_generator_multi_io( - sample_weights=[self.sample_weight_1, self.sample_weight_2]), - steps_per_epoch=3, - epochs=2) - for key, value in self.expected_fit_result[reduction].items(): - self.assertAllClose(history.history[key], value) - - def test_eval_generator(self, reduction): - model = self._get_compiled_multi_io_model( - loss=losses.MeanSquaredError(reduction=reduction)) - eval_result = model.evaluate_generator( - custom_generator_multi_io( - sample_weights=[self.sample_weight_1, self.sample_weight_2]), - steps=3) - self.assertAllClose(eval_result, self.expected_batch_result[reduction]) - - -if __name__ == '__main__': - tf.test.main() + def _get_compiled_multi_io_model(self, loss): + model = get_multi_io_model() + model.compile( + optimizer="rmsprop", + loss=loss, + run_eagerly=test_utils.should_run_eagerly(), + ) + return model + + def setUp(self): + super(TestOutputLossMetrics, self).setUp() + self.x = np.asarray([[1.0], [2.0], [3.0], [4.0], [5.0]]) + self.y1 = np.asarray([[2.0], [4.0], [6.0], [8.0], [10.0]]) + self.y2 = np.asarray([[1.0], [2.0], [3.0], [4.0], [5.0]]) + self.sample_weight_1 = np.asarray([2.0, 3.0, 4.0, 5.0, 6.0]) + self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5, 3.0]) + + # y_true_1 = [[2.], [4.], [6.], [8.], [10.]] + # y_pred_1 = [[3.], [6.], [9.], [12.], [15.]] + # y_true_2 = [[1.], [2.], [3.], [4.], [5.]] + # y_pred_2 = [[3.], [6.], [9.], [12.], [15.]] + + # Loss `output_1`: + # Per-sample weighted losses + # Batch 1 = [(3 - 2)^2 * 2, (6 - 4)^2 * 3)] = [2, 12] + # Batch 2 = [((9 - 6)^2 * 4, (12 - 8)^2 * 5)] = [36, 80] + # Batch 3 = [(15 - 10)^2 * 6] = [150] + + # Result (reduction=SUM) = ((2 + 12)*2 + (36 + 80)*2 + 150) / 5 = 82 + # Result (reduction=SUM_OVER_BATCH_SIZE/AUTO/NONE) = 280 / 5 = 56 + + # Loss `output_2`: + # Per-sample weighted losses + # Batch 1 = [(3 - 1)^2 * 3.5, (6 - 2)^2 * 2.5)] = [14, 40] + # Batch 2 = [(9 - 3)^2 * 1.5, (12 - 4)^2 * 0.5)] = [54, 32] + # Batch 3 = [(15 - 5)^2 * 3] = [300] + + # Result (reduction=SUM) = ((14 + 40)*2 + (54 + 32)*2 + 300) / 5 = 116 + # Result (reduction=SUM_OVER_BATCH_SIZE/AUTO/NONE) = 440 / 5 = 88 + + # When reduction is 'NONE' loss value that is passed to the optimizer + # will be vector loss but what is reported is a scalar, which is an + # average of all the values in all the batch vectors. + + # Total loss = Output_loss_1 + Output_loss_2 + + sum_over_batch_size_fit_result = { + "loss": [144, 144], + "output_1_loss": [56, 56], + "output_2_loss": [88, 88], + } + + self.expected_fit_result = { + losses_utils.ReductionV2.NONE: sum_over_batch_size_fit_result, + losses_utils.ReductionV2.SUM: { + "loss": [198, 198], + "output_1_loss": [82, 82], + "output_2_loss": [116, 116], + }, + losses_utils.ReductionV2.AUTO: sum_over_batch_size_fit_result, + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: sum_over_batch_size_fit_result, # noqa: E501 + } + + # In the order: 'loss', 'output_1_loss', 'output_2_loss', + self.expected_batch_result = { + losses_utils.ReductionV2.NONE: [144, 56, 88], + losses_utils.ReductionV2.SUM: [198, 82, 116], + losses_utils.ReductionV2.AUTO: [144, 56, 88], + losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: [144, 56, 88], + } + + # 2 + 12 + 36 + 80 + 150 = 280 + # 14 + 40 + 54 + 32 + 300 = 440 + self.expected_single_batch_result = [720, 280, 440] + + def test_fit(self, reduction): + model = self._get_compiled_multi_io_model( + loss=losses.MeanSquaredError(reduction=reduction) + ) + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + batch_size=2, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result[reduction].items(): + self.assertAllClose(history.history[key], value) + + def test_eval(self, reduction): + model = self._get_compiled_multi_io_model( + loss=losses.MeanSquaredError(reduction=reduction) + ) + eval_result = model.evaluate( + [self.x, self.x], + [self.y1, self.y2], + batch_size=2, + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose(eval_result, self.expected_batch_result[reduction]) + + def test_train_on_batch(self, reduction): + model = self._get_compiled_multi_io_model( + loss=losses.MeanSquaredError(reduction=reduction) + ) + result = model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + + expected_values = self.expected_batch_result[reduction] + if reduction == losses_utils.ReductionV2.SUM: + expected_values = self.expected_single_batch_result + self.assertAllClose(result, expected_values) + + def test_test_on_batch(self, reduction): + model = self._get_compiled_multi_io_model( + loss=losses.MeanSquaredError(reduction=reduction) + ) + result = model.test_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + expected_values = self.expected_batch_result[reduction] + if reduction == losses_utils.ReductionV2.SUM: + expected_values = self.expected_single_batch_result + self.assertAllClose(result, expected_values) + + def test_fit_generator(self, reduction): + model = self._get_compiled_multi_io_model( + loss=losses.MeanSquaredError(reduction=reduction) + ) + history = model.fit_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2] + ), + steps_per_epoch=3, + epochs=2, + ) + for key, value in self.expected_fit_result[reduction].items(): + self.assertAllClose(history.history[key], value) + + def test_eval_generator(self, reduction): + model = self._get_compiled_multi_io_model( + loss=losses.MeanSquaredError(reduction=reduction) + ) + eval_result = model.evaluate_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2] + ), + steps=3, + ) + self.assertAllClose(eval_result, self.expected_batch_result[reduction]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/metrics_functional_test.py b/keras/metrics/metrics_functional_test.py index 76a3875051ff..c52a2f4cea25 100644 --- a/keras/metrics/metrics_functional_test.py +++ b/keras/metrics/metrics_functional_test.py @@ -14,137 +14,177 @@ # ============================================================================== """Tests for Keras metrics functions.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np from keras import backend -from keras.testing_infra import test_combinations from keras import metrics +from keras.testing_infra import test_combinations class KerasFunctionalMetricsTest(tf.test.TestCase, parameterized.TestCase): - - def test_metrics(self): - with self.cached_session(): - y_a = backend.variable(np.random.random((6, 7))) - y_b = backend.variable(np.random.random((6, 7))) - for metric in [metrics.binary_accuracy, metrics.categorical_accuracy]: - output = metric(y_a, y_b) - self.assertEqual(backend.eval(output).shape, (6,)) - - def test_sparse_categorical_accuracy_int(self): - with self.cached_session(): - metric = metrics.sparse_categorical_accuracy - y_true = backend.variable(np.random.randint(0, 7, (6,))) - y_pred = backend.variable(np.random.random((6, 7))) - self.assertEqual(backend.eval(metric(y_true, y_pred)).shape, (6,)) - - # Test correctness if the shape of y_true is (num_samples,) - y_true = backend.variable([1., 0., 0., 0.]) - y_pred = backend.variable( - [[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) - self.assertAllEqual( - backend.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) - - # Test correctness if the shape of y_true is (num_samples, 1) - y_true = backend.variable([[1.], [0.], [0.], [0.]]) - y_pred = backend.variable( - [[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) - self.assertAllEqual( - backend.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) - - # Test correctness if the shape of y_true is (batch_size, seq_length) and - # y_pred is (batch_size, seq_length, num_classes) - y_pred = backend.variable( - np.array([[[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], - [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]]])) - y_true = backend.variable(np.array([[1, 0], [1, 0]])) - self.assertAllEqual( - backend.eval(metric(y_true, y_pred)), [[1., 0.], [0., 1.]]) - - def test_sparse_categorical_accuracy_float(self): - with self.cached_session(): - metric = metrics.sparse_categorical_accuracy - y_true = backend.variable(np.random.random((6,))) - y_pred = backend.variable(np.random.random((6, 7))) - self.assertEqual(backend.eval(metric(y_true, y_pred)).shape, (6,)) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_sparse_categorical_accuracy_eager(self): - """Tests that ints passed in via Eager return results. See b/113504761.""" - metric = metrics.sparse_categorical_accuracy - y_true = np.arange(6).reshape([6, 1]) - y_pred = np.arange(36).reshape([6, 6]) - self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.]) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_sparse_categorical_accuracy_float_eager(self): - """Tests that floats passed in via Eager return results. See b/113504761.""" - metric = metrics.sparse_categorical_accuracy - y_true = np.arange(6, dtype=np.float32).reshape([6, 1]) - y_pred = np.arange(36).reshape([6, 6]) - self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.]) - - def test_sparse_top_k_categorical_accuracy(self): - with self.cached_session(): - # Test correctness if the shape of y_true is (num_samples, 1) - y_pred = backend.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) - y_true = backend.variable(np.array([[1], [0]])) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) - self.assertEqual(np.mean(result), 1) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) - self.assertEqual(np.mean(result), 0.5) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) - self.assertEqual(np.mean(result), 0.) - - # Test correctness if the shape of y_true is (num_samples,) - y_pred = backend.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) - y_true = backend.variable(np.array([1, 0])) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) - self.assertEqual(np.mean(result), 1) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) - self.assertEqual(np.mean(result), 0.5) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) - self.assertEqual(np.mean(result), 0.) - - # Test correctness if the shape of y_true is (batch_size, seq_length) and - # y_pred is (batch_size, seq_length, num_classes) - y_pred = backend.variable( - np.array([[[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]], - [[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.3, 0.2, 0.1]]])) - y_true = backend.variable(np.array([[1, 0, 0], [1, 0, 1]])) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) - self.assertEqual(np.mean(result), 1) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) - self.assertEqual(np.mean(result), 0.5) - result = backend.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) - self.assertEqual(np.mean(result), 0.) - - def test_top_k_categorical_accuracy(self): - with self.cached_session(): - y_pred = backend.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) - y_true = backend.variable(np.array([[0, 1, 0], [1, 0, 0]])) - result = backend.eval( - metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)) - self.assertEqual(np.mean(result), 1) - result = backend.eval( - metrics.top_k_categorical_accuracy(y_true, y_pred, k=2)) - self.assertEqual(np.mean(result), 0.5) - result = backend.eval( - metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) - self.assertEqual(np.mean(result), 0.) - - -if __name__ == '__main__': - tf.test.main() + def test_metrics(self): + with self.cached_session(): + y_a = backend.variable(np.random.random((6, 7))) + y_b = backend.variable(np.random.random((6, 7))) + for metric in [ + metrics.binary_accuracy, + metrics.categorical_accuracy, + ]: + output = metric(y_a, y_b) + self.assertEqual(backend.eval(output).shape, (6,)) + + def test_sparse_categorical_accuracy_int(self): + with self.cached_session(): + metric = metrics.sparse_categorical_accuracy + y_true = backend.variable(np.random.randint(0, 7, (6,))) + y_pred = backend.variable(np.random.random((6, 7))) + self.assertEqual(backend.eval(metric(y_true, y_pred)).shape, (6,)) + + # Test correctness if the shape of y_true is (num_samples,) + y_true = backend.variable([1.0, 0.0, 0.0, 0.0]) + y_pred = backend.variable( + [[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]] + ) + self.assertAllEqual( + backend.eval(metric(y_true, y_pred)), [0.0, 1.0, 1.0, 1.0] + ) + + # Test correctness if the shape of y_true is (num_samples, 1) + y_true = backend.variable([[1.0], [0.0], [0.0], [0.0]]) + y_pred = backend.variable( + [[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]] + ) + self.assertAllEqual( + backend.eval(metric(y_true, y_pred)), [0.0, 1.0, 1.0, 1.0] + ) + + # Test correctness if the shape of y_true is (batch_size, + # seq_length) and y_pred is (batch_size, seq_length, num_classes) + y_pred = backend.variable( + np.array( + [ + [[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], + [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]], + ] + ) + ) + y_true = backend.variable(np.array([[1, 0], [1, 0]])) + self.assertAllEqual( + backend.eval(metric(y_true, y_pred)), [[1.0, 0.0], [0.0, 1.0]] + ) + + def test_sparse_categorical_accuracy_float(self): + with self.cached_session(): + metric = metrics.sparse_categorical_accuracy + y_true = backend.variable(np.random.random((6,))) + y_pred = backend.variable(np.random.random((6, 7))) + self.assertEqual(backend.eval(metric(y_true, y_pred)).shape, (6,)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_sparse_categorical_accuracy_eager(self): + """Tests that ints passed in via Eager return results. See + b/113504761.""" + metric = metrics.sparse_categorical_accuracy + y_true = np.arange(6).reshape([6, 1]) + y_pred = np.arange(36).reshape([6, 6]) + self.assertAllEqual( + metric(y_true, y_pred), [0.0, 0.0, 0.0, 0.0, 0.0, 1.0] + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_sparse_categorical_accuracy_float_eager(self): + """Tests that floats passed in via Eager return results. See + b/113504761.""" + metric = metrics.sparse_categorical_accuracy + y_true = np.arange(6, dtype=np.float32).reshape([6, 1]) + y_pred = np.arange(36).reshape([6, 6]) + self.assertAllEqual( + metric(y_true, y_pred), [0.0, 0.0, 0.0, 0.0, 0.0, 1.0] + ) + + def test_sparse_top_k_categorical_accuracy(self): + with self.cached_session(): + # Test correctness if the shape of y_true is (num_samples, 1) + y_pred = backend.variable( + np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]) + ) + y_true = backend.variable(np.array([[1], [0]])) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3) + ) + self.assertEqual(np.mean(result), 1) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2) + ) + self.assertEqual(np.mean(result), 0.5) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1) + ) + self.assertEqual(np.mean(result), 0.0) + + # Test correctness if the shape of y_true is (num_samples,) + y_pred = backend.variable( + np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]) + ) + y_true = backend.variable(np.array([1, 0])) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3) + ) + self.assertEqual(np.mean(result), 1) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2) + ) + self.assertEqual(np.mean(result), 0.5) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1) + ) + self.assertEqual(np.mean(result), 0.0) + + # Test correctness if the shape of y_true is (batch_size, + # seq_length) and y_pred is (batch_size, seq_length, num_classes) + y_pred = backend.variable( + np.array( + [ + [[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]], + [[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.3, 0.2, 0.1]], + ] + ) + ) + y_true = backend.variable(np.array([[1, 0, 0], [1, 0, 1]])) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3) + ) + self.assertEqual(np.mean(result), 1) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2) + ) + self.assertEqual(np.mean(result), 0.5) + result = backend.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1) + ) + self.assertEqual(np.mean(result), 0.0) + + def test_top_k_categorical_accuracy(self): + with self.cached_session(): + y_pred = backend.variable( + np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]) + ) + y_true = backend.variable(np.array([[0, 1, 0], [1, 0, 0]])) + result = backend.eval( + metrics.top_k_categorical_accuracy(y_true, y_pred, k=3) + ) + self.assertEqual(np.mean(result), 1) + result = backend.eval( + metrics.top_k_categorical_accuracy(y_true, y_pred, k=2) + ) + self.assertEqual(np.mean(result), 0.5) + result = backend.eval( + metrics.top_k_categorical_accuracy(y_true, y_pred, k=1) + ) + self.assertEqual(np.mean(result), 0.0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/metrics_test.py b/keras/metrics/metrics_test.py deleted file mode 100644 index 2597b2e41615..000000000000 --- a/keras/metrics/metrics_test.py +++ /dev/null @@ -1,2248 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras metrics.""" - -import json -import math - -from keras import backend -from keras import layers -from keras import metrics -from keras import Model -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class KerasAccuracyTest(tf.test.TestCase): - - def test_accuracy(self): - acc_obj = metrics.Accuracy(name='my_acc') - - # check config - self.assertEqual(acc_obj.name, 'my_acc') - self.assertTrue(acc_obj.stateful) - self.assertEqual(len(acc_obj.variables), 2) - self.assertEqual(acc_obj.dtype, tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - update_op = acc_obj.update_state([[1], [2], [3], [4]], [[1], [2], [3], [4]]) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # Check save and restore config - a2 = metrics.Accuracy.from_config(acc_obj.get_config()) - self.assertEqual(a2.name, 'my_acc') - self.assertTrue(a2.stateful) - self.assertEqual(len(a2.variables), 2) - self.assertEqual(a2.dtype, tf.float32) - - # check with sample_weight - result_t = acc_obj([[2], [1]], [[2], [0]], sample_weight=[[0.5], [0.2]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.96, 2) # 4.5/4.7 - - def test_accuracy_ragged(self): - acc_obj = metrics.Accuracy(name='my_acc') - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - rt1 = tf.ragged.constant([[1], [2], [3], [4]]) - rt2 = tf.ragged.constant([[1], [2], [3], [4]]) - update_op = acc_obj.update_state(rt1, rt2) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check with sample_weight - rt1 = tf.ragged.constant([[2], [1]]) - rt2 = tf.ragged.constant([[2], [0]]) - sw_ragged = tf.ragged.constant([[0.5], [0.2]]) - result_t = acc_obj(rt1, rt2, sample_weight=sw_ragged) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.96, 2) # 4.5/4.7 - - def test_binary_accuracy(self): - acc_obj = metrics.BinaryAccuracy(name='my_acc') - - # check config - self.assertEqual(acc_obj.name, 'my_acc') - self.assertTrue(acc_obj.stateful) - self.assertEqual(len(acc_obj.variables), 2) - self.assertEqual(acc_obj.dtype, tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - update_op = acc_obj.update_state([[1], [0]], [[1], [0]]) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check y_pred squeeze - update_op = acc_obj.update_state([[1], [1]], [[[1]], [[0]]]) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertAlmostEqual(result, 0.75, 2) # 3/4 - - # check y_true squeeze - result_t = acc_obj([[[1]], [[1]]], [[1], [0]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.67, 2) # 4/6 - - # check with sample_weight - result_t = acc_obj([[1], [1]], [[1], [0]], [[0.5], [0.2]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.67, 2) # 4.5/6.7 - - def test_binary_accuracy_ragged(self): - acc_obj = metrics.BinaryAccuracy(name='my_acc') - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - rt1 = tf.ragged.constant([[1], [0]]) - rt2 = tf.ragged.constant([[1], [0]]) - update_op = acc_obj.update_state(rt1, rt2) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check y_true squeeze only supported for dense tensors and is - # not supported by ragged tensor (different ranks). --> error - rt1 = tf.ragged.constant([[[1], [1]]]) - rt2 = tf.ragged.constant([[1], [0]]) - with self.assertRaises(ValueError): - result_t = acc_obj(rt1, rt2) - result = self.evaluate(result_t) - - def test_binary_accuracy_threshold(self): - acc_obj = metrics.BinaryAccuracy(threshold=0.7) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - result_t = acc_obj([[1], [1], [0], [0]], [[0.9], [0.6], [0.4], [0.8]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.5, 2) - - def test_binary_accuracy_threshold_ragged(self): - acc_obj = metrics.BinaryAccuracy(threshold=0.7) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - rt1 = tf.ragged.constant([[1], [1], [0], [0]]) - rt2 = tf.ragged.constant([[0.9], [0.6], [0.4], [0.8]]) - result_t = acc_obj(rt1, rt2) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.5, 2) - - def test_categorical_accuracy(self): - acc_obj = metrics.CategoricalAccuracy(name='my_acc') - - # check config - self.assertEqual(acc_obj.name, 'my_acc') - self.assertTrue(acc_obj.stateful) - self.assertEqual(len(acc_obj.variables), 2) - self.assertEqual(acc_obj.dtype, tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - update_op = acc_obj.update_state([[0, 0, 1], [0, 1, 0]], - [[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check with sample_weight - result_t = acc_obj([[0, 0, 1], [0, 1, 0]], - [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], [[0.5], [0.2]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.93, 2) # 2.5/2.7 - - def test_categorical_accuracy_ragged(self): - acc_obj = metrics.CategoricalAccuracy(name='my_acc') - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - rt1 = tf.ragged.constant([[0, 0, 1], [0, 1, 0]]) - rt2 = tf.ragged.constant([[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) - update_op = acc_obj.update_state(rt1, rt2) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check with sample_weight - rt1 = tf.ragged.constant([[0, 0, 1], [0, 1, 0]]) - rt2 = tf.ragged.constant([[0.1, 0.1, 0.8], [0.05, 0, 0.95]]) - sample_weight = tf.ragged.constant([[0.5], [0.2]]) - with self.assertRaises(tf.errors.InvalidArgumentError): - result_t = acc_obj(rt1, rt2, sample_weight) - result = self.evaluate(result_t) - - def test_sparse_categorical_accuracy(self): - acc_obj = metrics.SparseCategoricalAccuracy(name='my_acc') - - # check config - self.assertEqual(acc_obj.name, 'my_acc') - self.assertTrue(acc_obj.stateful) - self.assertEqual(len(acc_obj.variables), 2) - self.assertEqual(acc_obj.dtype, tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - update_op = acc_obj.update_state([[2], [1]], - [[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check with sample_weight - result_t = acc_obj([[2], [1]], [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], - [[0.5], [0.2]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.93, 2) # 2.5/2.7 - - def test_sparse_categorical_accuracy_ragged(self): - acc_obj = metrics.SparseCategoricalAccuracy(name='my_acc') - - # verify that correct value is returned - rt1 = tf.ragged.constant([[2], [1]]) - rt2 = tf.ragged.constant([[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) - - with self.assertRaises(tf.errors.InvalidArgumentError): - # sparse_categorical_accuracy is not supported for composite/ragged - # tensors. - update_op = acc_obj.update_state(rt1, rt2) - self.evaluate(update_op) - - def test_sparse_categorical_accuracy_mismatched_dims(self): - acc_obj = metrics.SparseCategoricalAccuracy(name='my_acc') - - # check config - self.assertEqual(acc_obj.name, 'my_acc') - self.assertTrue(acc_obj.stateful) - self.assertEqual(len(acc_obj.variables), 2) - self.assertEqual(acc_obj.dtype, tf.float32) - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - # verify that correct value is returned - update_op = acc_obj.update_state([2, 1], [[0.1, 0.1, 0.8], [0.05, 0.95, 0]]) - self.evaluate(update_op) - result = self.evaluate(acc_obj.result()) - self.assertEqual(result, 1) # 2/2 - - # check with sample_weight - result_t = acc_obj([2, 1], [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], - [[0.5], [0.2]]) - result = self.evaluate(result_t) - self.assertAlmostEqual(result, 0.93, 2) # 2.5/2.7 - - def test_sparse_categorical_accuracy_mismatched_dims_dynamic(self): - with tf.compat.v1.get_default_graph().as_default(), self.cached_session() as sess: - acc_obj = metrics.SparseCategoricalAccuracy(name='my_acc') - self.evaluate(tf.compat.v1.variables_initializer(acc_obj.variables)) - - t = tf.compat.v1.placeholder(tf.float32) - p = tf.compat.v1.placeholder(tf.float32) - w = tf.compat.v1.placeholder(tf.float32) - - result_t = acc_obj(t, p, w) - result = sess.run( - result_t, - feed_dict=({ - t: [2, 1], - p: [[0.1, 0.1, 0.8], [0.05, 0, 0.95]], - w: [[0.5], [0.2]] - })) - self.assertAlmostEqual(result, 0.71, 2) # 2.5/2.7 - - def test_get_acc(self): - acc_fn = metrics.get('acc') - self.assertEqual(acc_fn, metrics.accuracy) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class CosineSimilarityTest(tf.test.TestCase): - - def l2_norm(self, x, axis): - epsilon = 1e-12 - square_sum = np.sum(np.square(x), axis=axis, keepdims=True) - x_inv_norm = 1 / np.sqrt(np.maximum(square_sum, epsilon)) - return np.multiply(x, x_inv_norm) - - def setup(self, axis=1): - self.np_y_true = np.asarray([[1, 9, 2], [-5, -2, 6]], dtype=np.float32) - self.np_y_pred = np.asarray([[4, 8, 12], [8, 1, 3]], dtype=np.float32) - - y_true = self.l2_norm(self.np_y_true, axis) - y_pred = self.l2_norm(self.np_y_pred, axis) - self.expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(axis,)) - - self.y_true = tf.constant(self.np_y_true) - self.y_pred = tf.constant(self.np_y_pred) - - def test_config(self): - cosine_obj = metrics.CosineSimilarity( - axis=2, name='my_cos', dtype=tf.int32) - self.assertEqual(cosine_obj.name, 'my_cos') - self.assertEqual(cosine_obj._dtype, tf.int32) - - # Check save and restore config - cosine_obj2 = metrics.CosineSimilarity.from_config(cosine_obj.get_config()) - self.assertEqual(cosine_obj2.name, 'my_cos') - self.assertEqual(cosine_obj2._dtype, tf.int32) - - def test_unweighted(self): - self.setup() - cosine_obj = metrics.CosineSimilarity() - self.evaluate(tf.compat.v1.variables_initializer(cosine_obj.variables)) - loss = cosine_obj(self.y_true, self.y_pred) - expected_loss = np.mean(self.expected_loss) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_weighted(self): - self.setup() - cosine_obj = metrics.CosineSimilarity() - self.evaluate(tf.compat.v1.variables_initializer(cosine_obj.variables)) - sample_weight = np.asarray([1.2, 3.4]) - loss = cosine_obj( - self.y_true, - self.y_pred, - sample_weight=tf.constant(sample_weight)) - expected_loss = np.sum( - self.expected_loss * sample_weight) / np.sum(sample_weight) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - def test_axis(self): - self.setup(axis=1) - cosine_obj = metrics.CosineSimilarity(axis=1) - self.evaluate(tf.compat.v1.variables_initializer(cosine_obj.variables)) - loss = cosine_obj(self.y_true, self.y_pred) - expected_loss = np.mean(self.expected_loss) - self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MeanAbsoluteErrorTest(tf.test.TestCase): - - def test_config(self): - mae_obj = metrics.MeanAbsoluteError(name='my_mae', dtype=tf.int32) - self.assertEqual(mae_obj.name, 'my_mae') - self.assertEqual(mae_obj._dtype, tf.int32) - - # Check save and restore config - mae_obj2 = metrics.MeanAbsoluteError.from_config(mae_obj.get_config()) - self.assertEqual(mae_obj2.name, 'my_mae') - self.assertEqual(mae_obj2._dtype, tf.int32) - - def test_unweighted(self): - mae_obj = metrics.MeanAbsoluteError() - self.evaluate(tf.compat.v1.variables_initializer(mae_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = mae_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = mae_obj.result() - self.assertAllClose(0.5, result, atol=1e-5) - - def test_weighted(self): - mae_obj = metrics.MeanAbsoluteError() - self.evaluate(tf.compat.v1.variables_initializer(mae_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = mae_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(0.54285, self.evaluate(result), atol=1e-5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MeanAbsolutePercentageErrorTest(tf.test.TestCase): - - def test_config(self): - mape_obj = metrics.MeanAbsolutePercentageError( - name='my_mape', dtype=tf.int32) - self.assertEqual(mape_obj.name, 'my_mape') - self.assertEqual(mape_obj._dtype, tf.int32) - - # Check save and restore config - mape_obj2 = metrics.MeanAbsolutePercentageError.from_config( - mape_obj.get_config()) - self.assertEqual(mape_obj2.name, 'my_mape') - self.assertEqual(mape_obj2._dtype, tf.int32) - - def test_unweighted(self): - mape_obj = metrics.MeanAbsolutePercentageError() - self.evaluate(tf.compat.v1.variables_initializer(mape_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = mape_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = mape_obj.result() - self.assertAllClose(35e7, result, atol=1e-5) - - def test_weighted(self): - mape_obj = metrics.MeanAbsolutePercentageError() - self.evaluate(tf.compat.v1.variables_initializer(mape_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = mape_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(40e7, self.evaluate(result), atol=1e-5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MeanSquaredErrorTest(tf.test.TestCase): - - def test_config(self): - mse_obj = metrics.MeanSquaredError(name='my_mse', dtype=tf.int32) - self.assertEqual(mse_obj.name, 'my_mse') - self.assertEqual(mse_obj._dtype, tf.int32) - - # Check save and restore config - mse_obj2 = metrics.MeanSquaredError.from_config(mse_obj.get_config()) - self.assertEqual(mse_obj2.name, 'my_mse') - self.assertEqual(mse_obj2._dtype, tf.int32) - - def test_unweighted(self): - mse_obj = metrics.MeanSquaredError() - self.evaluate(tf.compat.v1.variables_initializer(mse_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = mse_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = mse_obj.result() - self.assertAllClose(0.5, result, atol=1e-5) - - def test_weighted(self): - mse_obj = metrics.MeanSquaredError() - self.evaluate(tf.compat.v1.variables_initializer(mse_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = mse_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(0.54285, self.evaluate(result), atol=1e-5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MeanSquaredLogarithmicErrorTest(tf.test.TestCase): - - def test_config(self): - msle_obj = metrics.MeanSquaredLogarithmicError( - name='my_msle', dtype=tf.int32) - self.assertEqual(msle_obj.name, 'my_msle') - self.assertEqual(msle_obj._dtype, tf.int32) - - # Check save and restore config - msle_obj2 = metrics.MeanSquaredLogarithmicError.from_config( - msle_obj.get_config()) - self.assertEqual(msle_obj2.name, 'my_msle') - self.assertEqual(msle_obj2._dtype, tf.int32) - - def test_unweighted(self): - msle_obj = metrics.MeanSquaredLogarithmicError() - self.evaluate(tf.compat.v1.variables_initializer(msle_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = msle_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = msle_obj.result() - self.assertAllClose(0.24022, result, atol=1e-5) - - def test_weighted(self): - msle_obj = metrics.MeanSquaredLogarithmicError() - self.evaluate(tf.compat.v1.variables_initializer(msle_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = msle_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(0.26082, self.evaluate(result), atol=1e-5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class HingeTest(tf.test.TestCase): - - def test_config(self): - hinge_obj = metrics.Hinge(name='hinge', dtype=tf.int32) - self.assertEqual(hinge_obj.name, 'hinge') - self.assertEqual(hinge_obj._dtype, tf.int32) - - # Check save and restore config - hinge_obj2 = metrics.Hinge.from_config(hinge_obj.get_config()) - self.assertEqual(hinge_obj2.name, 'hinge') - self.assertEqual(hinge_obj2._dtype, tf.int32) - - def test_unweighted(self): - hinge_obj = metrics.Hinge() - self.evaluate(tf.compat.v1.variables_initializer(hinge_obj.variables)) - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], - [-0.25, -1., 0.5, 0.6]]) - - # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # metric = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] - # = [0.6, 0.4125] - # reduced metric = (0.6 + 0.4125) / 2 - - update_op = hinge_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = hinge_obj.result() - self.assertAllClose(0.506, result, atol=1e-3) - - def test_weighted(self): - hinge_obj = metrics.Hinge() - self.evaluate(tf.compat.v1.variables_initializer(hinge_obj.variables)) - y_true = tf.constant([[-1, 1, -1, 1], [-1, -1, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], - [-0.25, -1., 0.5, 0.6]]) - sample_weight = tf.constant([1.5, 2.]) - - # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # metric = [(0.7 + 0.8 + 0.9 + 0) / 4, (0.75 + 0 + 0.5 + 0.4) / 4] - # = [0.6, 0.4125] - # weighted metric = [0.6 * 1.5, 0.4125 * 2] - # reduced metric = (0.6 * 1.5 + 0.4125 * 2) / (1.5 + 2) - - result = hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(0.493, self.evaluate(result), atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class SquaredHingeTest(tf.test.TestCase): - - def test_config(self): - sq_hinge_obj = metrics.SquaredHinge(name='sq_hinge', dtype=tf.int32) - self.assertEqual(sq_hinge_obj.name, 'sq_hinge') - self.assertEqual(sq_hinge_obj._dtype, tf.int32) - - # Check save and restore config - sq_hinge_obj2 = metrics.SquaredHinge.from_config(sq_hinge_obj.get_config()) - self.assertEqual(sq_hinge_obj2.name, 'sq_hinge') - self.assertEqual(sq_hinge_obj2._dtype, tf.int32) - - def test_unweighted(self): - sq_hinge_obj = metrics.SquaredHinge() - self.evaluate(tf.compat.v1.variables_initializer(sq_hinge_obj.variables)) - y_true = tf.constant([[0, 1, 0, 1], [0, 0, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], - [-0.25, -1., 0.5, 0.6]]) - - # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true = [[-1, 1, -1, 1], [-1, -1, 1, 1]] - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] - # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], - # [0.5625, 0, 0.25, 0.16]] - # metric = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] - # = [0.485, 0.2431] - # reduced metric = (0.485 + 0.2431) / 2 - - update_op = sq_hinge_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = sq_hinge_obj.result() - self.assertAllClose(0.364, result, atol=1e-3) - - def test_weighted(self): - sq_hinge_obj = metrics.SquaredHinge() - self.evaluate(tf.compat.v1.variables_initializer(sq_hinge_obj.variables)) - y_true = tf.constant([[-1, 1, -1, 1], [-1, -1, 1, 1]]) - y_pred = tf.constant([[-0.3, 0.2, -0.1, 1.6], - [-0.25, -1., 0.5, 0.6]]) - sample_weight = tf.constant([1.5, 2.]) - - # metric = max(0, 1-y_true * y_pred), where y_true is -1/1 - - # y_true * y_pred = [[0.3, 0.2, 0.1, 1.6], [0.25, 1, 0.5, 0.6]] - # 1 - y_true * y_pred = [[0.7, 0.8, 0.9, -0.6], [0.75, 0, 0.5, 0.4]] - # max(0, 1 - y_true * y_pred) = [[0.7, 0.8, 0.9, 0], [0.75, 0, 0.5, 0.4]] - # squared(max(0, 1 - y_true * y_pred)) = [[0.49, 0.64, 0.81, 0], - # [0.5625, 0, 0.25, 0.16]] - # metric = [(0.49 + 0.64 + 0.81 + 0) / 4, (0.5625 + 0 + 0.25 + 0.16) / 4] - # = [0.485, 0.2431] - # weighted metric = [0.485 * 1.5, 0.2431 * 2] - # reduced metric = (0.485 * 1.5 + 0.2431 * 2) / (1.5 + 2) - - result = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(0.347, self.evaluate(result), atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class CategoricalHingeTest(tf.test.TestCase): - - def test_config(self): - cat_hinge_obj = metrics.CategoricalHinge( - name='cat_hinge', dtype=tf.int32) - self.assertEqual(cat_hinge_obj.name, 'cat_hinge') - self.assertEqual(cat_hinge_obj._dtype, tf.int32) - - # Check save and restore config - cat_hinge_obj2 = metrics.CategoricalHinge.from_config( - cat_hinge_obj.get_config()) - self.assertEqual(cat_hinge_obj2.name, 'cat_hinge') - self.assertEqual(cat_hinge_obj2._dtype, tf.int32) - - def test_unweighted(self): - cat_hinge_obj = metrics.CategoricalHinge() - self.evaluate(tf.compat.v1.variables_initializer(cat_hinge_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - - update_op = cat_hinge_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = cat_hinge_obj.result() - self.assertAllClose(0.5, result, atol=1e-5) - - def test_weighted(self): - cat_hinge_obj = metrics.CategoricalHinge() - self.evaluate(tf.compat.v1.variables_initializer(cat_hinge_obj.variables)) - y_true = tf.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), - (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))) - y_pred = tf.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), - (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))) - sample_weight = tf.constant((1., 1.5, 2., 2.5)) - result = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(0.5, self.evaluate(result), atol=1e-5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class RootMeanSquaredErrorTest(tf.test.TestCase): - - def test_config(self): - rmse_obj = metrics.RootMeanSquaredError(name='rmse', dtype=tf.int32) - self.assertEqual(rmse_obj.name, 'rmse') - self.assertEqual(rmse_obj._dtype, tf.int32) - - rmse_obj2 = metrics.RootMeanSquaredError.from_config(rmse_obj.get_config()) - self.assertEqual(rmse_obj2.name, 'rmse') - self.assertEqual(rmse_obj2._dtype, tf.int32) - - def test_unweighted(self): - rmse_obj = metrics.RootMeanSquaredError() - self.evaluate(tf.compat.v1.variables_initializer(rmse_obj.variables)) - y_true = tf.constant((2, 4, 6)) - y_pred = tf.constant((1, 3, 2)) - - update_op = rmse_obj.update_state(y_true, y_pred) - self.evaluate(update_op) - result = rmse_obj.result() - # error = [-1, -1, -4], square(error) = [1, 1, 16], mean = 18/3 = 6 - self.assertAllClose(math.sqrt(6), result, atol=1e-3) - - def test_weighted(self): - rmse_obj = metrics.RootMeanSquaredError() - self.evaluate(tf.compat.v1.variables_initializer(rmse_obj.variables)) - y_true = tf.constant((2, 4, 6, 8)) - y_pred = tf.constant((1, 3, 2, 3)) - sample_weight = tf.constant((0, 1, 0, 1)) - result = rmse_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(math.sqrt(13), self.evaluate(result), atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TopKCategoricalAccuracyTest(tf.test.TestCase): - - def test_config(self): - a_obj = metrics.TopKCategoricalAccuracy(name='topkca', dtype=tf.int32) - self.assertEqual(a_obj.name, 'topkca') - self.assertEqual(a_obj._dtype, tf.int32) - - a_obj2 = metrics.TopKCategoricalAccuracy.from_config(a_obj.get_config()) - self.assertEqual(a_obj2.name, 'topkca') - self.assertEqual(a_obj2._dtype, tf.int32) - - def test_correctness(self): - a_obj = metrics.TopKCategoricalAccuracy() - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - y_true = tf.constant([[0, 0, 1], [0, 1, 0]]) - y_pred = tf.constant([[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) - - result = a_obj(y_true, y_pred) - self.assertEqual(1, self.evaluate(result)) # both the samples match - - # With `k` < 5. - a_obj = metrics.TopKCategoricalAccuracy(k=1) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - result = a_obj(y_true, y_pred) - self.assertEqual(0.5, self.evaluate(result)) # only sample #2 matches - - # With `k` > 5. - y_true = tf.constant([[0, 0, 1, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0]]) - y_pred = tf.constant([[0.5, 0.9, 0.1, 0.7, 0.6, 0.5, 0.4], - [0.05, 0.95, 0, 0, 0, 0, 0]]) - a_obj = metrics.TopKCategoricalAccuracy(k=6) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - result = a_obj(y_true, y_pred) - self.assertEqual(0.5, self.evaluate(result)) # only 1 sample matches. - - def test_weighted(self): - a_obj = metrics.TopKCategoricalAccuracy(k=2) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - y_true = tf.constant([[0, 1, 0], [1, 0, 0], [0, 0, 1]]) - y_pred = tf.constant([[0, 0.9, 0.1], [0, 0.9, 0.1], [0, 0.9, 0.1]]) - sample_weight = tf.constant((1.0, 0.0, 1.0)) - result = a_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(1.0, self.evaluate(result), atol=1e-5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class SparseTopKCategoricalAccuracyTest(tf.test.TestCase): - - def test_config(self): - a_obj = metrics.SparseTopKCategoricalAccuracy( - name='stopkca', dtype=tf.int32) - self.assertEqual(a_obj.name, 'stopkca') - self.assertEqual(a_obj._dtype, tf.int32) - - a_obj2 = metrics.SparseTopKCategoricalAccuracy.from_config( - a_obj.get_config()) - self.assertEqual(a_obj2.name, 'stopkca') - self.assertEqual(a_obj2._dtype, tf.int32) - - def test_correctness(self): - a_obj = metrics.SparseTopKCategoricalAccuracy() - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - y_true = tf.constant([2, 1]) - y_pred = tf.constant([[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) - - result = a_obj(y_true, y_pred) - self.assertEqual(1, self.evaluate(result)) # both the samples match - - # With `k` < 5. - a_obj = metrics.SparseTopKCategoricalAccuracy(k=1) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - result = a_obj(y_true, y_pred) - self.assertEqual(0.5, self.evaluate(result)) # only sample #2 matches - - # With `k` > 5. - y_pred = tf.constant([[0.5, 0.9, 0.1, 0.7, 0.6, 0.5, 0.4], - [0.05, 0.95, 0, 0, 0, 0, 0]]) - a_obj = metrics.SparseTopKCategoricalAccuracy(k=6) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - result = a_obj(y_true, y_pred) - self.assertEqual(0.5, self.evaluate(result)) # only 1 sample matches. - - def test_weighted(self): - a_obj = metrics.SparseTopKCategoricalAccuracy(k=2) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - y_true = tf.constant([1, 0, 2]) - y_pred = tf.constant([[0, 0.9, 0.1], [0, 0.9, 0.1], [0, 0.9, 0.1]]) - sample_weight = tf.constant((1.0, 0.0, 1.0)) - result = a_obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(1.0, self.evaluate(result), atol=1e-5) - - def test_sparse_top_k_categorical_accuracy_mismatched_dims_dynamic(self): - - if not tf.compat.v1.executing_eagerly(): - # Test will fail in v1 graph mode since the metric is not a normal layer. - # It will aggregate the output by batch dim, which failed on v1 code. - self.skipTest('v2 eager mode only') - - class AccLayer(layers.Layer): - - def build(self, _): - self.acc = metrics.SparseTopKCategoricalAccuracy(k=1) - - def call(self, y_true, y_pred): - return self.acc(y_true, y_pred) - - label = layers.Input(shape=[1]) - predict = layers.Input(shape=[3]) - metric_result = AccLayer()(label, predict) - model = Model([label, predict], metric_result) - - result = model.predict([tf.constant([[2], [1]]), - tf.constant([[0.1, 0.1, 0.8], [0.05, 0, 0.95]])], - steps=1) - self.assertAllClose(result, 0.5) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class LogCoshErrorTest(tf.test.TestCase): - - def setup(self): - y_pred = np.asarray([1, 9, 2, -5, -2, 6]).reshape((2, 3)) - y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) - - self.batch_size = 6 - error = y_pred - y_true - self.expected_results = np.log((np.exp(error) + np.exp(-error)) / 2) - - self.y_pred = tf.constant(y_pred, dtype=tf.float32) - self.y_true = tf.constant(y_true) - - def test_config(self): - logcosh_obj = metrics.LogCoshError(name='logcosh', dtype=tf.int32) - self.assertEqual(logcosh_obj.name, 'logcosh') - self.assertEqual(logcosh_obj._dtype, tf.int32) - - def test_unweighted(self): - self.setup() - logcosh_obj = metrics.LogCoshError() - self.evaluate(tf.compat.v1.variables_initializer(logcosh_obj.variables)) - - update_op = logcosh_obj.update_state(self.y_true, self.y_pred) - self.evaluate(update_op) - result = logcosh_obj.result() - expected_result = np.sum(self.expected_results) / self.batch_size - self.assertAllClose(result, expected_result, atol=1e-3) - - def test_weighted(self): - self.setup() - logcosh_obj = metrics.LogCoshError() - self.evaluate(tf.compat.v1.variables_initializer(logcosh_obj.variables)) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - result = logcosh_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - - sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)) - expected_result = np.multiply(self.expected_results, sample_weight) - expected_result = np.sum(expected_result) / np.sum(sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class PoissonTest(tf.test.TestCase): - - def setup(self): - y_pred = np.asarray([1, 9, 2, 5, 2, 6]).reshape((2, 3)) - y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) - - self.batch_size = 6 - self.expected_results = y_pred - np.multiply(y_true, np.log(y_pred)) - - self.y_pred = tf.constant(y_pred, dtype=tf.float32) - self.y_true = tf.constant(y_true) - - def test_config(self): - poisson_obj = metrics.Poisson(name='poisson', dtype=tf.int32) - self.assertEqual(poisson_obj.name, 'poisson') - self.assertEqual(poisson_obj._dtype, tf.int32) - - poisson_obj2 = metrics.Poisson.from_config(poisson_obj.get_config()) - self.assertEqual(poisson_obj2.name, 'poisson') - self.assertEqual(poisson_obj2._dtype, tf.int32) - - def test_unweighted(self): - self.setup() - poisson_obj = metrics.Poisson() - self.evaluate(tf.compat.v1.variables_initializer(poisson_obj.variables)) - - update_op = poisson_obj.update_state(self.y_true, self.y_pred) - self.evaluate(update_op) - result = poisson_obj.result() - expected_result = np.sum(self.expected_results) / self.batch_size - self.assertAllClose(result, expected_result, atol=1e-3) - - def test_weighted(self): - self.setup() - poisson_obj = metrics.Poisson() - self.evaluate(tf.compat.v1.variables_initializer(poisson_obj.variables)) - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - - result = poisson_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)) - expected_result = np.multiply(self.expected_results, sample_weight) - expected_result = np.sum(expected_result) / np.sum(sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class KLDivergenceTest(tf.test.TestCase): - - def setup(self): - y_pred = np.asarray([.4, .9, .12, .36, .3, .4]).reshape((2, 3)) - y_true = np.asarray([.5, .8, .12, .7, .43, .8]).reshape((2, 3)) - - self.batch_size = 2 - self.expected_results = np.multiply(y_true, np.log(y_true / y_pred)) - - self.y_pred = tf.constant(y_pred, dtype=tf.float32) - self.y_true = tf.constant(y_true) - - def test_config(self): - k_obj = metrics.KLDivergence(name='kld', dtype=tf.int32) - self.assertEqual(k_obj.name, 'kld') - self.assertEqual(k_obj._dtype, tf.int32) - - k_obj2 = metrics.KLDivergence.from_config(k_obj.get_config()) - self.assertEqual(k_obj2.name, 'kld') - self.assertEqual(k_obj2._dtype, tf.int32) - - def test_unweighted(self): - self.setup() - k_obj = metrics.KLDivergence() - self.evaluate(tf.compat.v1.variables_initializer(k_obj.variables)) - - update_op = k_obj.update_state(self.y_true, self.y_pred) - self.evaluate(update_op) - result = k_obj.result() - expected_result = np.sum(self.expected_results) / self.batch_size - self.assertAllClose(result, expected_result, atol=1e-3) - - def test_weighted(self): - self.setup() - k_obj = metrics.KLDivergence() - self.evaluate(tf.compat.v1.variables_initializer(k_obj.variables)) - - sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) - result = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) - - sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)) - expected_result = np.multiply(self.expected_results, sample_weight) - expected_result = np.sum(expected_result) / (1.2 + 3.4) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MeanRelativeErrorTest(tf.test.TestCase): - - def test_config(self): - normalizer = tf.constant([1, 3], dtype=tf.float32) - mre_obj = metrics.MeanRelativeError(normalizer=normalizer, name='mre') - self.assertEqual(mre_obj.name, 'mre') - self.assertArrayNear(self.evaluate(mre_obj.normalizer), [1, 3], 1e-1) - - mre_obj2 = metrics.MeanRelativeError.from_config(mre_obj.get_config()) - self.assertEqual(mre_obj2.name, 'mre') - self.assertArrayNear(self.evaluate(mre_obj2.normalizer), [1, 3], 1e-1) - - def test_unweighted(self): - np_y_pred = np.asarray([2, 4, 6, 8], dtype=np.float32) - np_y_true = np.asarray([1, 3, 2, 3], dtype=np.float32) - expected_error = np.mean( - np.divide(np.absolute(np_y_pred - np_y_true), np_y_true)) - - y_pred = tf.constant(np_y_pred, shape=(1, 4), dtype=tf.float32) - y_true = tf.constant(np_y_true, shape=(1, 4)) - - mre_obj = metrics.MeanRelativeError(normalizer=y_true) - self.evaluate(tf.compat.v1.variables_initializer(mre_obj.variables)) - - result = mre_obj(y_true, y_pred) - self.assertAllClose(self.evaluate(result), expected_error, atol=1e-3) - - def test_weighted(self): - np_y_pred = np.asarray([2, 4, 6, 8], dtype=np.float32) - np_y_true = np.asarray([1, 3, 2, 3], dtype=np.float32) - sample_weight = np.asarray([0.2, 0.3, 0.5, 0], dtype=np.float32) - rel_errors = np.divide(np.absolute(np_y_pred - np_y_true), np_y_true) - expected_error = np.sum(rel_errors * sample_weight) - - y_pred = tf.constant(np_y_pred, dtype=tf.float32) - y_true = tf.constant(np_y_true) - - mre_obj = metrics.MeanRelativeError(normalizer=y_true) - self.evaluate(tf.compat.v1.variables_initializer(mre_obj.variables)) - - result = mre_obj( - y_true, y_pred, sample_weight=tf.constant(sample_weight)) - self.assertAllClose(self.evaluate(result), expected_error, atol=1e-3) - - def test_zero_normalizer(self): - y_pred = tf.constant([2, 4], dtype=tf.float32) - y_true = tf.constant([1, 3]) - - mre_obj = metrics.MeanRelativeError(normalizer=tf.zeros_like(y_true)) - self.evaluate(tf.compat.v1.variables_initializer(mre_obj.variables)) - - result = mre_obj(y_true, y_pred) - self.assertEqual(self.evaluate(result), 0) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class IoUTest(tf.test.TestCase): - - def test_config(self): - obj = metrics.IoU( - num_classes=2, target_class_ids=[1, 0], name='iou_class_1_0') - self.assertEqual(obj.name, 'iou_class_1_0') - self.assertEqual(obj.num_classes, 2) - self.assertEqual(obj.target_class_ids, [1, 0]) - - obj2 = metrics.IoU.from_config(obj.get_config()) - self.assertEqual(obj2.name, 'iou_class_1_0') - self.assertEqual(obj2.num_classes, 2) - self.assertEqual(obj2.target_class_ids, [1, 0]) - - def test_unweighted(self): - y_pred = [0, 1, 0, 1] - y_true = [0, 0, 1, 1] - - obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - - result = obj(y_true, y_pred) - - # cm = [[1, 1], - # [1, 1]] - # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_weighted(self): - y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32) - y_true = tf.constant([0, 0, 1, 1]) - sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) - - obj = metrics.IoU(num_classes=2, target_class_ids=[1, 0]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - - result = obj(y_true, y_pred, sample_weight=sample_weight) - - # cm = [[0.2, 0.3], - # [0.4, 0.1]] - # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.1 / (0.4 + 0.5 - 0.1) + 0.2 / (0.6 + 0.5 - 0.2)) / 2 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_multi_dim_input(self): - y_pred = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) - y_true = tf.constant([[0, 0], [1, 1]]) - sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) - - obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - - result = obj(y_true, y_pred, sample_weight=sample_weight) - - # cm = [[0.2, 0.3], - # [0.4, 0.1]] - # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1)) / 2 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_zero_valid_entries(self): - obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - self.assertAllClose( - self.evaluate(obj.result()), 0, atol=1e-3) - - def test_zero_and_non_zero_entries(self): - y_pred = tf.constant([1], dtype=tf.float32) - y_true = tf.constant([1]) - - obj = metrics.IoU(num_classes=2, target_class_ids=[0, 1]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred) - - # cm = [[0, 0], - # [0, 1]] - # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (1 / (1 + 1 - 1)) / 1 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class BinaryIoUTest(tf.test.TestCase): - - def test_config(self): - obj = metrics.BinaryIoU( - target_class_ids=[1, 0], threshold=0.1, name='iou_class_1_0') - self.assertEqual(obj.name, 'iou_class_1_0') - self.assertAlmostEqual(obj.threshold, 0.1) - self.assertEqual(obj.target_class_ids, [1, 0]) - - obj2 = metrics.BinaryIoU.from_config(obj.get_config()) - self.assertEqual(obj.name, 'iou_class_1_0') - self.assertAlmostEqual(obj2.threshold, 0.1) - self.assertEqual(obj.target_class_ids, [1, 0]) - - def test_different_thresholds_weighted(self): - y_true = [0, 1, 0, 1] - y_pred = [0.1, 0.2, 0.4, 0.7] - - sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) - # with threshold = 0.3, y_pred will be converted to [0, 0, 1, 1] - # cm = [[0.2, 0.4], - # [0.3, 0.1]] - # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1)) / 2 - obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - sample_weight = tf.constant([0.1, 0.2, 0.4, 0.3]) - # with threshold = 0.5, y_pred will be converted to [0, 0, 0, 1] - # cm = [[0.1+0.4, 0], - # [0.2, 0.3]] - # sum_row = [0.5, 0.5], sum_col = [0.7, 0.3], true_positives = [0.5, 0.3] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.5 / (0.5 + 0.7 - 0.5) + 0.3 / (0.5 + 0.3 - 0.3)) / 2 - obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.5) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_different_thresholds_unweighted(self): - y_true = [0, 1, 0, 1] - y_pred = [0.1, 0.2, 0.4, 0.7] - - # with threshold = 0.3, y_pred will be converted to [0, 0, 1, 1] - # cm = [[1, 1], - # [1, 1]] - # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 - obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - # with threshold = 0.5, y_pred will be converted to [0, 0, 0, 1] - # cm = [[2, 0], - # [1, 1]] - # sum_row = [2, 2], sum_col = [3, 1], true_positives = [2, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (2 / (2 + 3 - 2) + 1 / (2 + 1 - 1)) / 2 - obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.5) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_multi_dim_input(self): - y_true = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) - y_pred = tf.constant([[0.1, 0.7], [0.9, 0.3]]) - threshold = 0.4 # y_pred will become [[0, 1], [1, 0]] - sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) - # cm = [[0.2, 0.4], - # [0.1, 0.3]] - # sum_row = [0.6, 0.4], sum_col = [0.3, 0.7], true_positives = [0.2, 0.3] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.2 / (0.6 + 0.3 - 0.2) + 0.3 / (0.4 + 0.7 - 0.3)) / 2 - obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=threshold) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_zero_valid_entries(self): - obj = metrics.BinaryIoU(target_class_ids=[0, 1]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - self.assertAllClose( - self.evaluate(obj.result()), 0, atol=1e-3) - - def test_zero_and_non_zero_entries(self): - y_pred = tf.constant([0.6], dtype=tf.float32) - threshold = 0.5 - y_true = tf.constant([1]) - - obj = metrics.BinaryIoU(target_class_ids=[0, 1], threshold=threshold) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred) - - # cm = [[0, 0], - # [0, 1]] - # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = 1 / (1 + 1 - 1) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MeanIoUTest(tf.test.TestCase): - - def test_config(self): - m_obj = metrics.MeanIoU(num_classes=2, name='mean_iou') - self.assertEqual(m_obj.name, 'mean_iou') - self.assertEqual(m_obj.num_classes, 2) - - m_obj2 = metrics.MeanIoU.from_config(m_obj.get_config()) - self.assertEqual(m_obj2.name, 'mean_iou') - self.assertEqual(m_obj2.num_classes, 2) - - def test_unweighted(self): - y_pred = [0, 1, 0, 1] - y_true = [0, 0, 1, 1] - - m_obj = metrics.MeanIoU(num_classes=2) - self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) - - result = m_obj(y_true, y_pred) - - # cm = [[1, 1], - # [1, 1]] - # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_weighted(self): - y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32) - y_true = tf.constant([0, 0, 1, 1]) - sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) - - m_obj = metrics.MeanIoU(num_classes=2) - self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) - - result = m_obj(y_true, y_pred, sample_weight=sample_weight) - - # cm = [[0.2, 0.3], - # [0.4, 0.1]] - # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1)) / 2 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_multi_dim_input(self): - y_pred = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) - y_true = tf.constant([[0, 0], [1, 1]]) - sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) - - m_obj = metrics.MeanIoU(num_classes=2) - self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) - - result = m_obj(y_true, y_pred, sample_weight=sample_weight) - - # cm = [[0.2, 0.3], - # [0.4, 0.1]] - # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1)) / 2 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_zero_valid_entries(self): - m_obj = metrics.MeanIoU(num_classes=2) - self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) - self.assertAllClose(self.evaluate(m_obj.result()), 0, atol=1e-3) - - def test_zero_and_non_zero_entries(self): - y_pred = tf.constant([1], dtype=tf.float32) - y_true = tf.constant([1]) - - m_obj = metrics.MeanIoU(num_classes=2) - self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) - result = m_obj(y_true, y_pred) - - # cm = [[0, 0], - # [0, 1]] - # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0 + 1 / (1 + 1 - 1)) / 1 - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class OneHotIoUTest(tf.test.TestCase): - - def test_unweighted(self): - y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) - # y_true will be converted to [2, 0, 1, 0] - y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], - [0.1, 0.4, 0.5]]) - # y_pred will be converted to [2, 2, 0, 2] - # cm = [[0, 0, 2], - # [1, 0, 0], - # [0, 0, 1] - # sum_row = [1, 0, 3], sum_col = [2, 1, 1], true_positives = [0, 0, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0 / (1 + 2 - 0) + 1 / (3 + 1 - 1)) / 2 - obj = metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_weighted(self): - y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) - # y_true will be converted to [2, 0, 1, 0] - y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], - [0.1, 0.4, 0.5]]) - # y_pred will be converted to [2, 2, 0, 2] - sample_weight = [0.1, 0.2, 0.3, 0.4] - # cm = [[0, 0, 0.2+0.4], - # [0.3, 0, 0], - # [0, 0, 0.1]] - # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] - # true_positives = [0, 0, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0 / (0.3 + 0.6 - 0) + 0.1 / (0.7 + 0.1 - 0.1)) / 2 - obj = metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class OneHotMeanIoUTest(tf.test.TestCase): - - def test_unweighted(self): - y_true = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) - # y_true will be converted to [2, 0, 1, 0] - y_pred = tf.constant([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], - [0.1, 0.4, 0.5]]) - # y_pred will be converted to [2, 2, 0, 2] - # cm = [[0, 0, 2], - # [1, 0, 0], - # [0, 0, 1] - # sum_row = [1, 0, 3], sum_col = [2, 1, 1], true_positives = [0, 0, 1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0 + 0 + 1 / (3 + 1 - 1)) / 3 - obj = metrics.OneHotMeanIoU(num_classes=3) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - def test_weighted(self): - y_true = tf.constant([ - [0, 0, 1], - [1, 0, 0], - [0, 1, 0], - [1, 0, 0], - [1, 0, 0], - ]) - # y_true will be converted to [2, 0, 1, 0, 0] - y_pred = tf.constant([ - [0.2, 0.3, 0.5], - [0.1, 0.2, 0.7], - [0.5, 0.3, 0.1], - [0.1, 0.4, 0.5], - [0.6, 0.2, 0.2], - ]) - # y_pred will be converted to [2, 2, 0, 2, 0] - sample_weight = [0.1, 0.2, 0.3, 0.3, 0.1] - # cm = [[0.1, 0, 0.2+0.3], - # [0.3, 0, 0], - # [0, 0, 0.1]] - # sum_row = [0.4, 0, 0.6], sum_col = [0.6, 0.3, 0.1] - # true_positives = [0.1, 0, 0.1] - # iou = true_positives / (sum_row + sum_col - true_positives)) - expected_result = (0.1 / (0.4 + 0.6 - 0.1) + 0 + 0.1 / - (0.6 + 0.1 - 0.1)) / 3 - obj = metrics.OneHotMeanIoU(num_classes=3) - self.evaluate(tf.compat.v1.variables_initializer(obj.variables)) - result = obj(y_true, y_pred, sample_weight=sample_weight) - self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class BinaryCrossentropyTest(tf.test.TestCase): - - def test_config(self): - bce_obj = metrics.BinaryCrossentropy( - name='bce', dtype=tf.int32, label_smoothing=0.2) - self.assertEqual(bce_obj.name, 'bce') - self.assertEqual(bce_obj._dtype, tf.int32) - - old_config = bce_obj.get_config() - self.assertAllClose(old_config['label_smoothing'], 0.2, 1e-3) - - # Check save and restore config - bce_obj2 = metrics.BinaryCrossentropy.from_config(old_config) - self.assertEqual(bce_obj2.name, 'bce') - self.assertEqual(bce_obj2._dtype, tf.int32) - new_config = bce_obj2.get_config() - self.assertDictEqual(old_config, new_config) - - def test_unweighted(self): - bce_obj = metrics.BinaryCrossentropy() - self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) - result = bce_obj(y_true, y_pred) - - # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] - - # Metric = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) - # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), - # -log(Y_MAX + EPSILON), -log(1)] - # = [(0 + 15.33) / 2, (0 + 0) / 2] - # Reduced metric = 7.665 / 2 - - self.assertAllClose(self.evaluate(result), 3.833, atol=1e-3) - - def test_unweighted_with_logits(self): - bce_obj = metrics.BinaryCrossentropy(from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) - - y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) - y_pred = tf.constant([[100.0, -100.0, 100.0], - [100.0, 100.0, -100.0]]) - result = bce_obj(y_true, y_pred) - - # Metric = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # = [((100 - 100 * 1 + log(1 + exp(-100))) + - # (0 + 100 * 0 + log(1 + exp(-100))) + - # (100 - 100 * 1 + log(1 + exp(-100))), - # ((100 - 100 * 0 + log(1 + exp(-100))) + - # (100 - 100 * 1 + log(1 + exp(-100))) + - # (0 + 100 * 1 + log(1 + exp(-100))))] - # = [(0 + 0 + 0) / 3, 200 / 3] - # Reduced metric = (0 + 66.666) / 2 - - self.assertAllClose(self.evaluate(result), 33.333, atol=1e-3) - - def test_weighted(self): - bce_obj = metrics.BinaryCrossentropy() - self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) - y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) - y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) - sample_weight = tf.constant([1.5, 2.]) - result = bce_obj(y_true, y_pred, sample_weight=sample_weight) - - # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] - - # Metric = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) - # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), - # -log(Y_MAX + EPSILON), -log(1)] - # = [(0 + 15.33) / 2, (0 + 0) / 2] - # Weighted metric = [7.665 * 1.5, 0] - # Reduced metric = 7.665 * 1.5 / (1.5 + 2) - - self.assertAllClose(self.evaluate(result), 3.285, atol=1e-3) - - def test_weighted_from_logits(self): - bce_obj = metrics.BinaryCrossentropy(from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) - y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) - y_pred = tf.constant([[100.0, -100.0, 100.0], - [100.0, 100.0, -100.0]]) - sample_weight = tf.constant([2., 2.5]) - result = bce_obj(y_true, y_pred, sample_weight=sample_weight) - - # Metric = max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # = [(0 + 0 + 0) / 3, 200 / 3] - # Weighted metric = [0, 66.666 * 2.5] - # Reduced metric = 66.666 * 2.5 / (2 + 2.5) - - self.assertAllClose(self.evaluate(result), 37.037, atol=1e-3) - - def test_label_smoothing(self): - logits = tf.constant(((100., -100., -100.))) - y_true = tf.constant(((1, 0, 1))) - label_smoothing = 0.1 - # Metric: max(x, 0) - x * z + log(1 + exp(-abs(x))) - # (where x = logits and z = y_true) - # Label smoothing: z' = z * (1 - L) + 0.5L - # After label smoothing, label 1 becomes 1 - 0.5L - # label 0 becomes 0.5L - # Applying the above two fns to the given input: - # (100 - 100 * (1 - 0.5 L) + 0 + - # 0 + 100 * (0.5 L) + 0 + - # 0 + 100 * (1 - 0.5 L) + 0) * (1/3) - # = (100 + 50L) * 1/3 - bce_obj = metrics.BinaryCrossentropy( - from_logits=True, label_smoothing=label_smoothing) - self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) - result = bce_obj(y_true, logits) - expected_value = (100.0 + 50.0 * label_smoothing) / 3.0 - self.assertAllClose(expected_value, self.evaluate(result), atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class CategoricalCrossentropyTest(tf.test.TestCase): - - def test_config(self): - cce_obj = metrics.CategoricalCrossentropy( - name='cce', dtype=tf.int32, label_smoothing=0.2) - self.assertEqual(cce_obj.name, 'cce') - self.assertEqual(cce_obj._dtype, tf.int32) - - old_config = cce_obj.get_config() - self.assertAllClose(old_config['label_smoothing'], 0.2, 1e-3) - - # Check save and restore config - cce_obj2 = metrics.CategoricalCrossentropy.from_config(old_config) - self.assertEqual(cce_obj2.name, 'cce') - self.assertEqual(cce_obj2._dtype, tf.int32) - new_config = cce_obj2.get_config() - self.assertDictEqual(old_config, new_config) - - def test_unweighted(self): - cce_obj = metrics.CategoricalCrossentropy() - self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) - - y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) - y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) - result = cce_obj(y_true, y_pred) - - # EPSILON = 1e-7, y = y_true, y` = y_pred - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - - # Metric = -sum(y * log(y'), axis = -1) - # = -((log 0.95), (log 0.1)) - # = [0.051, 2.302] - # Reduced metric = (0.051 + 2.302) / 2 - - self.assertAllClose(self.evaluate(result), 1.176, atol=1e-3) - - def test_unweighted_from_logits(self): - cce_obj = metrics.CategoricalCrossentropy(from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) - - y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) - logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) - result = cce_obj(y_true, logits) - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # xent = -sum(labels * log(softmax), 1) - - # exp(logits) = [[2.718, 8103.084, 1], [2.718, 2980.958, 2.718]] - # sum(exp(logits), axis=-1) = [8106.802, 2986.394] - # softmax = [[0.00033, 0.99954, 0.00012], [0.00091, 0.99817, 0.00091]] - # log(softmax) = [[-8.00045, -0.00045, -9.00045], - # [-7.00182, -0.00182, -7.00182]] - # labels * log(softmax) = [[0, -0.00045, 0], [0, 0, -7.00182]] - # xent = [0.00045, 7.00182] - # Reduced xent = (0.00045 + 7.00182) / 2 - - self.assertAllClose(self.evaluate(result), 3.5011, atol=1e-3) - - def test_weighted(self): - cce_obj = metrics.CategoricalCrossentropy() - self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) - - y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) - y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) - sample_weight = tf.constant([1.5, 2.]) - result = cce_obj(y_true, y_pred, sample_weight=sample_weight) - - # EPSILON = 1e-7, y = y_true, y` = y_pred - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - - # Metric = -sum(y * log(y'), axis = -1) - # = -((log 0.95), (log 0.1)) - # = [0.051, 2.302] - # Weighted metric = [0.051 * 1.5, 2.302 * 2.] - # Reduced metric = (0.051 * 1.5 + 2.302 * 2.) / 3.5 - - self.assertAllClose(self.evaluate(result), 1.338, atol=1e-3) - - def test_weighted_from_logits(self): - cce_obj = metrics.CategoricalCrossentropy(from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) - - y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) - logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) - sample_weight = tf.constant([1.5, 2.]) - result = cce_obj(y_true, logits, sample_weight=sample_weight) - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # xent = -sum(labels * log(softmax), 1) - # xent = [0.00045, 7.00182] - # weighted xent = [0.000675, 14.00364] - # Reduced xent = (0.000675 + 14.00364) / (1.5 + 2) - - self.assertAllClose(self.evaluate(result), 4.0012, atol=1e-3) - - def test_label_smoothing(self): - y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) - logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) - label_smoothing = 0.1 - - # Label smoothing: z' = z * (1 - L) + L/n, - # where L = label smoothing value and n = num classes - # Label value 1 becomes: 1 - L + L/n - # Label value 0 becomes: L/n - # y_true with label_smoothing = [[0.0333, 0.9333, 0.0333], - # [0.0333, 0.0333, 0.9333]] - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # xent = -sum(labels * log(softmax), 1) - # log(softmax) = [[-8.00045, -0.00045, -9.00045], - # [-7.00182, -0.00182, -7.00182]] - # labels * log(softmax) = [[-0.26641, -0.00042, -0.29971], - # [-0.23316, -0.00006, -6.53479]] - # xent = [0.56654, 6.76801] - # Reduced xent = (0.56654 + 6.76801) / 2 - - cce_obj = metrics.CategoricalCrossentropy( - from_logits=True, label_smoothing=label_smoothing) - self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) - loss = cce_obj(y_true, logits) - self.assertAllClose(self.evaluate(loss), 3.667, atol=1e-3) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class SparseCategoricalCrossentropyTest(tf.test.TestCase): - - def test_config(self): - scce_obj = metrics.SparseCategoricalCrossentropy( - name='scce', dtype=tf.int32) - self.assertEqual(scce_obj.name, 'scce') - self.assertEqual(scce_obj.dtype, tf.int32) - old_config = scce_obj.get_config() - self.assertDictEqual(old_config, json.loads(json.dumps(old_config))) - - # Check save and restore config - scce_obj2 = metrics.SparseCategoricalCrossentropy.from_config(old_config) - self.assertEqual(scce_obj2.name, 'scce') - self.assertEqual(scce_obj2.dtype, tf.int32) - new_config = scce_obj2.get_config() - self.assertDictEqual(old_config, new_config) - - def test_unweighted(self): - scce_obj = metrics.SparseCategoricalCrossentropy() - self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) - - y_true = np.asarray([1, 2]) - y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) - result = scce_obj(y_true, y_pred) - - # EPSILON = 1e-7, y = y_true, y` = y_pred - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - # logits = log(y`) = [[-2.9957, -0.0513, -16.1181], - # [-2.3026, -0.2231, -2.3026]] - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # y = one_hot(y) = [[0, 1, 0], [0, 0, 1]] - # xent = -sum(y * log(softmax), 1) - - # exp(logits) = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - # sum(exp(logits), axis=-1) = [1, 1] - # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - # log(softmax) = [[-2.9957, -0.0513, -16.1181], - # [-2.3026, -0.2231, -2.3026]] - # y * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] - # xent = [0.0513, 2.3026] - # Reduced xent = (0.0513 + 2.3026) / 2 - - self.assertAllClose(self.evaluate(result), 1.176, atol=1e-3) - - def test_unweighted_from_logits(self): - scce_obj = metrics.SparseCategoricalCrossentropy(from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) - - y_true = np.asarray([1, 2]) - logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) - result = scce_obj(y_true, logits) - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] - # xent = -sum(y_true * log(softmax), 1) - - # exp(logits) = [[2.718, 8103.084, 1], [2.718, 2980.958, 2.718]] - # sum(exp(logits), axis=-1) = [8106.802, 2986.394] - # softmax = [[0.00033, 0.99954, 0.00012], [0.00091, 0.99817, 0.00091]] - # log(softmax) = [[-8.00045, -0.00045, -9.00045], - # [-7.00182, -0.00182, -7.00182]] - # y_true * log(softmax) = [[0, -0.00045, 0], [0, 0, -7.00182]] - # xent = [0.00045, 7.00182] - # Reduced xent = (0.00045 + 7.00182) / 2 - - self.assertAllClose(self.evaluate(result), 3.5011, atol=1e-3) - - def test_weighted(self): - scce_obj = metrics.SparseCategoricalCrossentropy() - self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) - - y_true = np.asarray([1, 2]) - y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) - sample_weight = tf.constant([1.5, 2.]) - result = scce_obj(y_true, y_pred, sample_weight=sample_weight) - - # EPSILON = 1e-7, y = y_true, y` = y_pred - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - # logits = log(y`) = [[-2.9957, -0.0513, -16.1181], - # [-2.3026, -0.2231, -2.3026]] - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # y = one_hot(y) = [[0, 1, 0], [0, 0, 1]] - # xent = -sum(y * log(softmax), 1) - - # exp(logits) = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - # sum(exp(logits), axis=-1) = [1, 1] - # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] - # log(softmax) = [[-2.9957, -0.0513, -16.1181], - # [-2.3026, -0.2231, -2.3026]] - # y * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] - # xent = [0.0513, 2.3026] - # Weighted xent = [0.051 * 1.5, 2.302 * 2.] - # Reduced xent = (0.051 * 1.5 + 2.302 * 2.) / 3.5 - - self.assertAllClose(self.evaluate(result), 1.338, atol=1e-3) - - def test_weighted_from_logits(self): - scce_obj = metrics.SparseCategoricalCrossentropy(from_logits=True) - self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) - - y_true = np.asarray([1, 2]) - logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) - sample_weight = tf.constant([1.5, 2.]) - result = scce_obj(y_true, logits, sample_weight=sample_weight) - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] - # xent = -sum(y_true * log(softmax), 1) - # xent = [0.00045, 7.00182] - # weighted xent = [0.000675, 14.00364] - # Reduced xent = (0.000675 + 14.00364) / (1.5 + 2) - - self.assertAllClose(self.evaluate(result), 4.0012, atol=1e-3) - - def test_axis(self): - scce_obj = metrics.SparseCategoricalCrossentropy(axis=0) - self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) - - y_true = np.asarray([1, 2]) - y_pred = np.asarray([[0.05, 0.1], [0.95, 0.8], [0, 0.1]]) - result = scce_obj(y_true, y_pred) - - # EPSILON = 1e-7, y = y_true, y` = y_pred - # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) - # y` = [[0.05, 0.1], [0.95, 0.8], [EPSILON, 0.1]] - # logits = log(y`) = [[-2.9957, -2.3026], - # [-0.0513, -0.2231], - # [-16.1181, -2.3026]] - - # softmax = exp(logits) / sum(exp(logits), axis=-1) - # y = one_hot(y) = [[0, 0], [1, 0], [0, 1]] - # xent = -sum(y * log(softmax), 1) - - # exp(logits) = [[0.05, 0.1], [0.95, 0.8], [EPSILON, 0.1]] - # sum(exp(logits)) = [1, 1] - # softmax = [[0.05, 0.1], [0.95, 0.8], [EPSILON, 0.1]] - # log(softmax) = [[-2.9957, -2.3026], - # [-0.0513, -0.2231], - # [-16.1181, -2.3026]] - # y * log(softmax) = [[0, 0], [-0.0513, 0], [0, -2.3026]] - # xent = [0.0513, 2.3026] - # Reduced xent = (0.0513 + 2.3026) / 2 - - self.assertAllClose(self.evaluate(result), 1.176, atol=1e-3) - - -class BinaryTruePositives(metrics.Metric): - - def __init__(self, name='binary_true_positives', **kwargs): - super().__init__(name=name, **kwargs) - self.true_positives = self.add_weight(name='tp', initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - y_true = tf.cast(y_true, tf.bool) - y_pred = tf.cast(y_pred, tf.bool) - - values = tf.logical_and( - tf.equal(y_true, True), tf.equal(y_pred, True)) - values = tf.cast(values, self.dtype) - if sample_weight is not None: - sample_weight = tf.cast(sample_weight, dtype=self.dtype) - sample_weight = tf.__internal__.ops.broadcast_weights( - sample_weight, values) - values = tf.multiply(values, sample_weight) - self.true_positives.assign_add(tf.reduce_sum(values)) - - def result(self): - return self.true_positives - - -class BinaryTruePositivesViaControlFlow(metrics.Metric): - - def __init__(self, name='binary_true_positives', **kwargs): - super().__init__(name=name, **kwargs) - self.true_positives = self.add_weight(name='tp', initializer='zeros') - - def update_state(self, y_true, y_pred, sample_weight=None): - y_true = tf.cast(y_true, tf.bool) - y_pred = tf.cast(y_pred, tf.bool) - - for i in range(len(y_true)): - for j in range(len(y_true[i])): - if y_true[i][j] and y_pred[i][j]: - if sample_weight is None: - self.true_positives.assign_add(1) - else: - self.true_positives.assign_add(sample_weight[i][0]) - - def result(self): - if tf.constant(True): - return self.true_positives - return 0.0 - - -def _get_model(compile_metrics): - model_layers = [ - layers.Dense(3, activation='relu', kernel_initializer='ones'), - layers.Dense(1, activation='sigmoid', kernel_initializer='ones')] - - model = test_utils.get_model_from_layers(model_layers, input_shape=(4,)) - model.compile( - loss='mae', - metrics=compile_metrics, - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - return model - - -@test_combinations.run_with_all_model_types -@test_combinations.run_all_keras_modes -class ResetStatesTest(test_combinations.TestCase): - - def test_reset_state_false_positives(self): - fp_obj = metrics.FalsePositives() - model = _get_model([fp_obj]) - x = np.ones((100, 4)) - y = np.zeros((100, 1)) - model.evaluate(x, y) - self.assertEqual(self.evaluate(fp_obj.accumulator), 100.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(fp_obj.accumulator), 100.) - - def test_reset_state_false_negatives(self): - fn_obj = metrics.FalseNegatives() - model = _get_model([fn_obj]) - x = np.zeros((100, 4)) - y = np.ones((100, 1)) - model.evaluate(x, y) - self.assertEqual(self.evaluate(fn_obj.accumulator), 100.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(fn_obj.accumulator), 100.) - - def test_reset_state_true_negatives(self): - tn_obj = metrics.TrueNegatives() - model = _get_model([tn_obj]) - x = np.zeros((100, 4)) - y = np.zeros((100, 1)) - model.evaluate(x, y) - self.assertEqual(self.evaluate(tn_obj.accumulator), 100.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(tn_obj.accumulator), 100.) - - def test_reset_state_true_positives(self): - tp_obj = metrics.TruePositives() - model = _get_model([tp_obj]) - x = np.ones((100, 4)) - y = np.ones((100, 1)) - model.evaluate(x, y) - self.assertEqual(self.evaluate(tp_obj.accumulator), 100.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(tp_obj.accumulator), 100.) - - def test_reset_state_precision(self): - p_obj = metrics.Precision() - model = _get_model([p_obj]) - x = np.concatenate((np.ones((50, 4)), np.ones((50, 4)))) - y = np.concatenate((np.ones((50, 1)), np.zeros((50, 1)))) - model.evaluate(x, y) - self.assertEqual(self.evaluate(p_obj.true_positives), 50.) - self.assertEqual(self.evaluate(p_obj.false_positives), 50.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(p_obj.true_positives), 50.) - self.assertEqual(self.evaluate(p_obj.false_positives), 50.) - - def test_precision_update_state_with_logits(self): - p_obj = metrics.Precision() - # Update state with logits (not in range (0, 1)) should not an raise error. - p_obj.update_state([-0.5, 0.5], [-2., 2.]) - - def test_reset_state_recall(self): - r_obj = metrics.Recall() - model = _get_model([r_obj]) - x = np.concatenate((np.ones((50, 4)), np.zeros((50, 4)))) - y = np.concatenate((np.ones((50, 1)), np.ones((50, 1)))) - model.evaluate(x, y) - self.assertEqual(self.evaluate(r_obj.true_positives), 50.) - self.assertEqual(self.evaluate(r_obj.false_negatives), 50.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(r_obj.true_positives), 50.) - self.assertEqual(self.evaluate(r_obj.false_negatives), 50.) - - def test_reset_state_sensitivity_at_specificity(self): - s_obj = metrics.SensitivityAtSpecificity(0.5, num_thresholds=1) - model = _get_model([s_obj]) - x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)), - np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)), - np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(s_obj.true_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_negatives), 25.) - self.assertEqual(self.evaluate(s_obj.true_negatives), 25.) - - def test_reset_state_specificity_at_sensitivity(self): - s_obj = metrics.SpecificityAtSensitivity(0.5, num_thresholds=1) - model = _get_model([s_obj]) - x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)), - np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)), - np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(s_obj.true_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_negatives), 25.) - self.assertEqual(self.evaluate(s_obj.true_negatives), 25.) - - def test_reset_state_precision_at_recall(self): - s_obj = metrics.PrecisionAtRecall(recall=0.5, num_thresholds=1) - model = _get_model([s_obj]) - x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)), - np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)), - np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(s_obj.true_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_negatives), 25.) - self.assertEqual(self.evaluate(s_obj.true_negatives), 25.) - - def test_reset_state_recall_at_precision(self): - s_obj = metrics.RecallAtPrecision(precision=0.5, num_thresholds=1) - model = _get_model([s_obj]) - x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)), - np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)), - np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(s_obj.true_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_positives), 25.) - self.assertEqual(self.evaluate(s_obj.false_negatives), 25.) - self.assertEqual(self.evaluate(s_obj.true_negatives), 25.) - - def test_reset_state_auc(self): - auc_obj = metrics.AUC(num_thresholds=3) - model = _get_model([auc_obj]) - x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)), - np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)), - np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(auc_obj.true_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.false_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.false_negatives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.true_negatives[1]), 25.) - - def test_reset_state_auc_from_logits(self): - auc_obj = metrics.AUC(num_thresholds=3, from_logits=True) - - model_layers = [layers.Dense(1, kernel_initializer='ones', use_bias=False)] - model = test_utils.get_model_from_layers(model_layers, input_shape=(4,)) - model.compile( - loss='mae', - metrics=[auc_obj], - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.concatenate((np.ones((25, 4)), -np.ones((25, 4)), -np.ones( - (25, 4)), np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones( - (25, 1)), np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(auc_obj.true_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.false_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.false_negatives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.true_negatives[1]), 25.) - - def test_reset_state_auc_manual_thresholds(self): - auc_obj = metrics.AUC(thresholds=[0.5]) - model = _get_model([auc_obj]) - x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)), - np.ones((25, 4)))) - y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)), - np.zeros((25, 1)))) - - for _ in range(2): - model.evaluate(x, y) - self.assertEqual(self.evaluate(auc_obj.true_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.false_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.false_negatives[1]), 25.) - self.assertEqual(self.evaluate(auc_obj.true_negatives[1]), 25.) - - def test_reset_state_mean_iou(self): - m_obj = metrics.MeanIoU(num_classes=2) - model = _get_model([m_obj]) - x = np.asarray([[0, 0, 0, 0], [1, 1, 1, 1], [1, 0, 1, 0], [0, 1, 0, 1]], - dtype=np.float32) - y = np.asarray([[0], [1], [1], [1]], dtype=np.float32) - model.evaluate(x, y) - self.assertArrayNear(self.evaluate(m_obj.total_cm)[0], [1, 0], 1e-1) - self.assertArrayNear(self.evaluate(m_obj.total_cm)[1], [3, 0], 1e-1) - model.evaluate(x, y) - self.assertArrayNear(self.evaluate(m_obj.total_cm)[0], [1, 0], 1e-1) - self.assertArrayNear(self.evaluate(m_obj.total_cm)[1], [3, 0], 1e-1) - - def test_reset_state_recall_float64(self): - # Test case for GitHub issue 36790. - try: - backend.set_floatx('float64') - r_obj = metrics.Recall() - model = _get_model([r_obj]) - x = np.concatenate((np.ones((50, 4)), np.zeros((50, 4)))) - y = np.concatenate((np.ones((50, 1)), np.ones((50, 1)))) - model.evaluate(x, y) - self.assertEqual(self.evaluate(r_obj.true_positives), 50.) - self.assertEqual(self.evaluate(r_obj.false_negatives), 50.) - model.evaluate(x, y) - self.assertEqual(self.evaluate(r_obj.true_positives), 50.) - self.assertEqual(self.evaluate(r_obj.false_negatives), 50.) - finally: - backend.set_floatx('float32') - - def test_function_wrapped_reset_state(self): - m = metrics.Mean(name='my_mean') - - # check reset_state in function. - @tf.function - def reset_in_fn(): - m.reset_state() - return m.update_state(100) - - for _ in range(5): - self.evaluate(reset_in_fn()) - self.assertEqual(self.evaluate(m.count), 1) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MergeStateTest(test_combinations.TestCase): - - def test_merge_state_incompatible_metrics(self): - with self.assertRaisesRegex(ValueError, - 'Metric .* is not compatible with .*'): - obj1 = metrics.FalsePositives() - self.evaluate(tf.compat.v1.variables_initializer(obj1.variables)) - obj2 = metrics.Accuracy() - self.evaluate(tf.compat.v1.variables_initializer(obj2.variables)) - self.evaluate(obj1.merge_state([obj2])) - - def test_merge_state_accuracy(self): - a_objs = [] - for y_true, y_pred in zip([[[1], [2]], [[3], [4]]], - [[[0], [2]], [[3], [4]]]): - a_obj = metrics.Accuracy() - a_objs.append(a_obj) - self.evaluate(tf.compat.v1.variables_initializer(a_obj.variables)) - self.evaluate(a_obj.update_state(y_true, y_pred)) - self.evaluate(a_objs[0].merge_state(a_objs[1:])) - self.assertEqual(self.evaluate(a_objs[0].total), 3.) - self.assertEqual(self.evaluate(a_objs[0].count), 4.) - self.assertEqual(self.evaluate(a_objs[0].result()), 0.75) - - def test_merge_state_false_positives(self): - fp_objs = [] - for _ in range(4): - fp_obj = metrics.FalsePositives() - fp_objs.append(fp_obj) - self.evaluate(tf.compat.v1.variables_initializer(fp_obj.variables)) - y_true = np.zeros((25, 1)) - y_pred = np.ones((25, 1)) - self.evaluate(fp_obj.update_state(y_true, y_pred)) - self.evaluate(fp_objs[0].merge_state(fp_objs[1:])) - self.assertEqual(self.evaluate(fp_objs[0].accumulator), 100.) - - def test_merge_state_false_negatives(self): - fn_objs = [] - for _ in range(4): - fn_obj = metrics.FalseNegatives() - fn_objs.append(fn_obj) - self.evaluate(tf.compat.v1.variables_initializer(fn_obj.variables)) - y_true = np.ones((25, 1)) - y_pred = np.zeros((25, 1)) - self.evaluate(fn_obj.update_state(y_true, y_pred)) - self.evaluate(fn_objs[0].merge_state(fn_objs[1:])) - self.assertEqual(self.evaluate(fn_objs[0].accumulator), 100.) - - def test_merge_state_true_negatives(self): - tn_objs = [] - for _ in range(4): - tn_obj = metrics.TrueNegatives() - tn_objs.append(tn_obj) - self.evaluate(tf.compat.v1.variables_initializer(tn_obj.variables)) - y_true = np.zeros((25, 1)) - y_pred = np.zeros((25, 1)) - self.evaluate(tn_obj.update_state(y_true, y_pred)) - self.evaluate(tn_objs[0].merge_state(tn_objs[1:])) - self.assertEqual(self.evaluate(tn_objs[0].accumulator), 100.) - - def test_merge_state_true_positives(self): - tp_objs = [] - for _ in range(4): - tp_obj = metrics.TruePositives() - tp_objs.append(tp_obj) - self.evaluate(tf.compat.v1.variables_initializer(tp_obj.variables)) - y_true = np.ones((25, 1)) - y_pred = np.ones((25, 1)) - self.evaluate(tp_obj.update_state(y_true, y_pred)) - self.evaluate(tp_objs[0].merge_state(tp_objs[1:])) - self.assertEqual(self.evaluate(tp_objs[0].accumulator), 100.) - - def test_merge_state_precision(self): - p_objs = [] - for _ in range(5): - p_obj = metrics.Precision() - p_objs.append(p_obj) - self.evaluate(tf.compat.v1.variables_initializer(p_obj.variables)) - y_true = np.concatenate((np.ones((10, 1)), np.zeros((10, 1)))) - y_pred = np.concatenate((np.ones((10, 1)), np.ones((10, 1)))) - self.evaluate(p_obj.update_state(y_true, y_pred)) - self.evaluate(p_objs[0].merge_state(p_objs[1:])) - self.assertEqual(self.evaluate(p_objs[0].true_positives), 50.) - self.assertEqual(self.evaluate(p_objs[0].false_positives), 50.) - - def test_merge_state_recall(self): - r_objs = [] - for _ in range(5): - r_obj = metrics.Recall() - r_objs.append(r_obj) - self.evaluate(tf.compat.v1.variables_initializer(r_obj.variables)) - y_true = np.concatenate((np.ones((10, 1)), np.ones((10, 1)))) - y_pred = np.concatenate((np.ones((10, 1)), np.zeros((10, 1)))) - self.evaluate(r_obj.update_state(y_true, y_pred)) - self.evaluate(r_objs[0].merge_state(r_objs[1:])) - self.assertEqual(self.evaluate(r_objs[0].true_positives), 50.) - self.assertEqual(self.evaluate(r_objs[0].false_negatives), 50.) - - def test_merge_state_sensitivity_at_specificity(self): - sas_objs = [] - for _ in range(5): - sas_obj = metrics.SensitivityAtSpecificity(0.5, num_thresholds=1) - sas_objs.append(sas_obj) - self.evaluate(tf.compat.v1.variables_initializer(sas_obj.variables)) - y_true = np.concatenate((np.ones((5, 1)), np.zeros((5, 1)), np.ones( - (5, 1)), np.zeros((5, 1)))) - y_pred = np.concatenate((np.ones((5, 1)), np.zeros( - (5, 1)), np.zeros((5, 1)), np.ones((5, 1)))) - self.evaluate(sas_obj.update_state(y_true, y_pred)) - self.evaluate(sas_objs[0].merge_state(sas_objs[1:])) - self.assertEqual(self.evaluate(sas_objs[0].true_positives), 25.) - self.assertEqual(self.evaluate(sas_objs[0].false_positives), 25.) - self.assertEqual(self.evaluate(sas_objs[0].false_negatives), 25.) - self.assertEqual(self.evaluate(sas_objs[0].true_negatives), 25.) - - def test_merge_state_specificity_at_sensitivity(self): - sas_objs = [] - for _ in range(5): - sas_obj = metrics.SpecificityAtSensitivity(0.5, num_thresholds=1) - sas_objs.append(sas_obj) - self.evaluate(tf.compat.v1.variables_initializer(sas_obj.variables)) - y_true = np.concatenate((np.ones((5, 1)), np.zeros((5, 1)), np.ones( - (5, 1)), np.zeros((5, 1)))) - y_pred = np.concatenate((np.ones((5, 1)), np.zeros( - (5, 1)), np.zeros((5, 1)), np.ones((5, 1)))) - self.evaluate(sas_obj.update_state(y_true, y_pred)) - self.evaluate(sas_objs[0].merge_state(sas_objs[1:])) - self.assertEqual(self.evaluate(sas_objs[0].true_positives), 25.) - self.assertEqual(self.evaluate(sas_objs[0].false_positives), 25.) - self.assertEqual(self.evaluate(sas_objs[0].false_negatives), 25.) - self.assertEqual(self.evaluate(sas_objs[0].true_negatives), 25.) - - def test_merge_state_precision_at_recall(self): - par_objs = [] - for _ in range(5): - par_obj = metrics.PrecisionAtRecall(recall=0.5, num_thresholds=1) - par_objs.append(par_obj) - self.evaluate(tf.compat.v1.variables_initializer(par_obj.variables)) - y_true = np.concatenate((np.ones((5, 1)), np.zeros((5, 1)), np.ones( - (5, 1)), np.zeros((5, 1)))) - y_pred = np.concatenate((np.ones((5, 1)), np.zeros( - (5, 1)), np.zeros((5, 1)), np.ones((5, 1)))) - self.evaluate(par_obj.update_state(y_true, y_pred)) - self.evaluate(par_objs[0].merge_state(par_objs[1:])) - self.assertEqual(self.evaluate(par_objs[0].true_positives), 25.) - self.assertEqual(self.evaluate(par_objs[0].false_positives), 25.) - self.assertEqual(self.evaluate(par_objs[0].false_negatives), 25.) - self.assertEqual(self.evaluate(par_objs[0].true_negatives), 25.) - - def test_merge_state_recall_at_precision(self): - rap_objs = [] - for _ in range(5): - rap_obj = metrics.PrecisionAtRecall(recall=0.5, num_thresholds=1) - rap_objs.append(rap_obj) - self.evaluate(tf.compat.v1.variables_initializer(rap_obj.variables)) - y_true = np.concatenate((np.ones((5, 1)), np.zeros((5, 1)), np.ones( - (5, 1)), np.zeros((5, 1)))) - y_pred = np.concatenate((np.ones((5, 1)), np.zeros( - (5, 1)), np.zeros((5, 1)), np.ones((5, 1)))) - self.evaluate(rap_obj.update_state(y_true, y_pred)) - self.evaluate(rap_objs[0].merge_state(rap_objs[1:])) - self.assertEqual(self.evaluate(rap_objs[0].true_positives), 25.) - self.assertEqual(self.evaluate(rap_objs[0].false_positives), 25.) - self.assertEqual(self.evaluate(rap_objs[0].false_negatives), 25.) - self.assertEqual(self.evaluate(rap_objs[0].true_negatives), 25.) - - def test_merge_state_auc(self): - auc_objs = [] - for _ in range(5): - auc_obj = metrics.AUC(num_thresholds=3) - auc_objs.append(auc_obj) - self.evaluate(tf.compat.v1.variables_initializer(auc_obj.variables)) - y_true = np.concatenate((np.ones((5, 1)), np.zeros((5, 1)), np.ones( - (5, 1)), np.zeros((5, 1)))) - y_pred = np.concatenate((np.ones((5, 1)), np.zeros( - (5, 1)), np.zeros((5, 1)), np.ones((5, 1)))) - self.evaluate(auc_obj.update_state(y_true, y_pred)) - self.evaluate(auc_objs[0].merge_state(auc_objs[1:])) - self.assertEqual(self.evaluate(auc_objs[0].true_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_objs[0].false_positives[1]), 25.) - self.assertEqual(self.evaluate(auc_objs[0].false_negatives[1]), 25.) - self.assertEqual(self.evaluate(auc_objs[0].true_negatives[1]), 25.) - - def test_merge_state_mean_iou(self): - m_objs = [] - for y_true, y_pred in zip([[0], [1], [1], [1]], - [[0.5], [1.0], [1.0], [1.0]]): - m_obj = metrics.MeanIoU(num_classes=2) - m_objs.append(m_obj) - self.evaluate(tf.compat.v1.variables_initializer(m_obj.variables)) - self.evaluate(m_obj.update_state(y_true, y_pred)) - self.evaluate(m_objs[0].merge_state(m_objs[1:])) - self.assertArrayNear(self.evaluate(m_objs[0].total_cm)[0], [1, 0], 1e-1) - self.assertArrayNear(self.evaluate(m_objs[0].total_cm)[1], [0, 3], 1e-1) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/metrics/probabilistic_metrics.py b/keras/metrics/probabilistic_metrics.py new file mode 100644 index 000000000000..c2c8d4871d0b --- /dev/null +++ b/keras/metrics/probabilistic_metrics.py @@ -0,0 +1,346 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Probabilistic metrics (based on Entropy).""" + +from typing import Optional +from typing import Union + +import tensorflow.compat.v2 as tf + +from keras.dtensor import utils as dtensor_utils +from keras.losses import binary_crossentropy +from keras.losses import categorical_crossentropy +from keras.losses import kullback_leibler_divergence +from keras.losses import poisson +from keras.losses import sparse_categorical_crossentropy +from keras.metrics import base_metric + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.metrics.Poisson") +class Poisson(base_metric.MeanMetricWrapper): + """Computes the Poisson score between `y_true` and `y_pred`. + + 🐟 🐟 🐟 + + It is defined as: `poisson_score = y_pred - y_true * log(y_pred)`. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.Poisson() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 0.49999997 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.99999994 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='categorical_crossentropy', + metrics=[tf.keras.metrics.Poisson()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="poisson", dtype=None): + super().__init__(poisson, name, dtype=dtype) + + +@keras_export("keras.metrics.KLDivergence") +class KLDivergence(base_metric.MeanMetricWrapper): + """Computes Kullback-Leibler divergence metric between `y_true` and + `y_pred`. + + `metric = y_true * log(y_true / y_pred)` + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.KLDivergence() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + >>> m.result().numpy() + 0.45814306 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.9162892 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='categorical_crossentropy', + metrics=[tf.keras.metrics.KLDivergence()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="kullback_leibler_divergence", dtype=None): + super().__init__(kullback_leibler_divergence, name, dtype=dtype) + + +@keras_export("keras.metrics.BinaryCrossentropy") +class BinaryCrossentropy(base_metric.MeanMetricWrapper): + """Computes the crossentropy metric between the labels and predictions. + + This is the crossentropy metric class to be used when there are only two + label classes (0 and 1). + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + from_logits: (Optional) Whether output is expected to be a logits tensor. + By default, we consider that output encodes a probability distribution. + label_smoothing: (Optional) Float in [0, 1]. When > 0, label values are + smoothed, meaning the confidence on label values are relaxed. + e.g. `label_smoothing=0.2` means that we will use a value of `0.1` for + label `0` and `0.9` for label `1`". + + Standalone usage: + + >>> m = tf.keras.metrics.BinaryCrossentropy() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + >>> m.result().numpy() + 0.81492424 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.9162905 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='binary_crossentropy', + metrics=[tf.keras.metrics.BinaryCrossentropy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + name="binary_crossentropy", + dtype=None, + from_logits=False, + label_smoothing=0, + ): + super().__init__( + binary_crossentropy, + name, + dtype=dtype, + from_logits=from_logits, + label_smoothing=label_smoothing, + ) + + +@keras_export("keras.metrics.CategoricalCrossentropy") +class CategoricalCrossentropy(base_metric.MeanMetricWrapper): + """Computes the crossentropy metric between the labels and predictions. + + This is the crossentropy metric class to be used when there are multiple + label classes (2 or more). Here we assume that labels are given as a + `one_hot` representation. eg., When labels values are [2, 0, 1], + `y_true` = [[0, 0, 1], [1, 0, 0], [0, 1, 0]]. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + from_logits: (Optional) Whether output is expected to be a logits tensor. + By default, we consider that output encodes a probability distribution. + label_smoothing: (Optional) Float in [0, 1]. When > 0, label values are + smoothed, meaning the confidence on label values are relaxed. e.g. + `label_smoothing=0.2` means that we will use a value of `0.1` for label + `0` and `0.9` for label `1`" + axis: (Optional) -1 is the dimension along which entropy is + computed. Defaults to `-1`. + + Standalone usage: + + >>> # EPSILON = 1e-7, y = y_true, y` = y_pred + >>> # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + >>> # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + >>> # xent = -sum(y * log(y'), axis = -1) + >>> # = -((log 0.95), (log 0.1)) + >>> # = [0.051, 2.302] + >>> # Reduced xent = (0.051 + 2.302) / 2 + >>> m = tf.keras.metrics.CategoricalCrossentropy() + >>> m.update_state([[0, 1, 0], [0, 0, 1]], + ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + >>> m.result().numpy() + 1.1769392 + + >>> m.reset_state() + >>> m.update_state([[0, 1, 0], [0, 0, 1]], + ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + ... sample_weight=tf.constant([0.3, 0.7])) + >>> m.result().numpy() + 1.6271976 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='categorical_crossentropy', + metrics=[tf.keras.metrics.CategoricalCrossentropy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + name="categorical_crossentropy", + dtype=None, + from_logits=False, + label_smoothing=0, + axis=-1, + ): + super().__init__( + categorical_crossentropy, + name, + dtype=dtype, + from_logits=from_logits, + label_smoothing=label_smoothing, + axis=axis, + ) + + +@keras_export("keras.metrics.SparseCategoricalCrossentropy") +class SparseCategoricalCrossentropy(base_metric.MeanMetricWrapper): + """Computes the crossentropy metric between the labels and predictions. + + Use this crossentropy metric when there are two or more label classes. + We expect labels to be provided as integers. If you want to provide labels + using `one-hot` representation, please use `CategoricalCrossentropy` metric. + There should be `# classes` floating point values per feature for `y_pred` + and a single floating point value per feature for `y_true`. + + In the snippet below, there is a single floating point value per example for + `y_true` and `# classes` floating pointing values per example for `y_pred`. + The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is + `[batch_size, num_classes]`. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + from_logits: (Optional) Whether output is expected to be a logits tensor. + By default, we consider that output encodes a probability distribution. + ignore_class: Optional integer. The ID of a class to be ignored during + metric computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in segmentation + maps. By default (`ignore_class=None`), all classes are considered. + axis: (Optional) The dimension along which entropy is + computed. Defaults to `-1`. + + Standalone usage: + + >>> # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] + >>> # logits = log(y_pred) + >>> # softmax = exp(logits) / sum(exp(logits), axis=-1) + >>> # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + >>> # xent = -sum(y * log(softmax), 1) + >>> # log(softmax) = [[-2.9957, -0.0513, -16.1181], + >>> # [-2.3026, -0.2231, -2.3026]] + >>> # y_true * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] + >>> # xent = [0.0513, 2.3026] + >>> # Reduced xent = (0.0513 + 2.3026) / 2 + >>> m = tf.keras.metrics.SparseCategoricalCrossentropy() + >>> m.update_state([1, 2], + ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + >>> m.result().numpy() + 1.1769392 + + >>> m.reset_state() + >>> m.update_state([1, 2], + ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + ... sample_weight=tf.constant([0.3, 0.7])) + >>> m.result().numpy() + 1.6271976 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='sparse_categorical_crossentropy', + metrics=[tf.keras.metrics.SparseCategoricalCrossentropy()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + name: str = "sparse_categorical_crossentropy", + dtype: Optional[Union[str, tf.dtypes.DType]] = None, + from_logits: bool = False, + ignore_class: Optional[int] = None, + axis: int = -1, + ): + super().__init__( + sparse_categorical_crossentropy, + name, + dtype=dtype, + from_logits=from_logits, + ignore_class=ignore_class, + axis=axis, + ) + + +_SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING = """Accumulates metric statistics. + +For sparse categorical metrics, the shapes of `y_true` and `y_pred` are +different. + +Args: + y_true: Ground truth label values. shape = `[batch_size, d0, .. dN-1]` or + shape = `[batch_size, d0, .. dN-1, 1]`. + y_pred: The predicted probability values. shape = `[batch_size, d0, .. dN]`. + sample_weight: Optional `sample_weight` acts as a + coefficient for the metric. If a scalar is provided, then the metric is + simply scaled by the given value. If `sample_weight` is a tensor of size + `[batch_size]`, then the metric for each sample of the batch is rescaled + by the corresponding element in the `sample_weight` vector. If the shape + of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted + to this shape), then each metric element of `y_pred` is scaled by the + corresponding value of `sample_weight`. (Note on `dN-1`: all metric + functions reduce by 1 dimension, usually the last axis (-1)). + +Returns: + Update op. +""" + +SparseCategoricalCrossentropy.update_state.__doc__ = ( + _SPARSE_CATEGORICAL_UPDATE_STATE_DOCSTRING +) diff --git a/keras/metrics/probabilistic_metrics_test.py b/keras/metrics/probabilistic_metrics_test.py new file mode 100644 index 000000000000..0a2e8577d565 --- /dev/null +++ b/keras/metrics/probabilistic_metrics_test.py @@ -0,0 +1,567 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras metrics.""" + +import json + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras import metrics +from keras.testing_infra import test_combinations + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class PoissonTest(tf.test.TestCase): + def setup(self): + y_pred = np.asarray([1, 9, 2, 5, 2, 6]).reshape((2, 3)) + y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) + + self.batch_size = 6 + self.expected_results = y_pred - np.multiply(y_true, np.log(y_pred)) + + self.y_pred = tf.constant(y_pred, dtype=tf.float32) + self.y_true = tf.constant(y_true) + + def test_config(self): + poisson_obj = metrics.Poisson(name="poisson", dtype=tf.int32) + self.assertEqual(poisson_obj.name, "poisson") + self.assertEqual(poisson_obj._dtype, tf.int32) + + poisson_obj2 = metrics.Poisson.from_config(poisson_obj.get_config()) + self.assertEqual(poisson_obj2.name, "poisson") + self.assertEqual(poisson_obj2._dtype, tf.int32) + + def test_unweighted(self): + self.setup() + poisson_obj = metrics.Poisson() + self.evaluate(tf.compat.v1.variables_initializer(poisson_obj.variables)) + + update_op = poisson_obj.update_state(self.y_true, self.y_pred) + self.evaluate(update_op) + result = poisson_obj.result() + expected_result = np.sum(self.expected_results) / self.batch_size + self.assertAllClose(result, expected_result, atol=1e-3) + + def test_weighted(self): + self.setup() + poisson_obj = metrics.Poisson() + self.evaluate(tf.compat.v1.variables_initializer(poisson_obj.variables)) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + + result = poisson_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape( + (2, 3) + ) + expected_result = np.multiply(self.expected_results, sample_weight) + expected_result = np.sum(expected_result) / np.sum(sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class KLDivergenceTest(tf.test.TestCase): + def setup(self): + y_pred = np.asarray([0.4, 0.9, 0.12, 0.36, 0.3, 0.4]).reshape((2, 3)) + y_true = np.asarray([0.5, 0.8, 0.12, 0.7, 0.43, 0.8]).reshape((2, 3)) + + self.batch_size = 2 + self.expected_results = np.multiply(y_true, np.log(y_true / y_pred)) + + self.y_pred = tf.constant(y_pred, dtype=tf.float32) + self.y_true = tf.constant(y_true) + + def test_config(self): + k_obj = metrics.KLDivergence(name="kld", dtype=tf.int32) + self.assertEqual(k_obj.name, "kld") + self.assertEqual(k_obj._dtype, tf.int32) + + k_obj2 = metrics.KLDivergence.from_config(k_obj.get_config()) + self.assertEqual(k_obj2.name, "kld") + self.assertEqual(k_obj2._dtype, tf.int32) + + def test_unweighted(self): + self.setup() + k_obj = metrics.KLDivergence() + self.evaluate(tf.compat.v1.variables_initializer(k_obj.variables)) + + update_op = k_obj.update_state(self.y_true, self.y_pred) + self.evaluate(update_op) + result = k_obj.result() + expected_result = np.sum(self.expected_results) / self.batch_size + self.assertAllClose(result, expected_result, atol=1e-3) + + def test_weighted(self): + self.setup() + k_obj = metrics.KLDivergence() + self.evaluate(tf.compat.v1.variables_initializer(k_obj.variables)) + + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + result = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight) + + sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape( + (2, 3) + ) + expected_result = np.multiply(self.expected_results, sample_weight) + expected_result = np.sum(expected_result) / (1.2 + 3.4) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class BinaryCrossentropyTest(tf.test.TestCase): + def test_config(self): + bce_obj = metrics.BinaryCrossentropy( + name="bce", dtype=tf.int32, label_smoothing=0.2 + ) + self.assertEqual(bce_obj.name, "bce") + self.assertEqual(bce_obj._dtype, tf.int32) + + old_config = bce_obj.get_config() + self.assertAllClose(old_config["label_smoothing"], 0.2, 1e-3) + + # Check save and restore config + bce_obj2 = metrics.BinaryCrossentropy.from_config(old_config) + self.assertEqual(bce_obj2.name, "bce") + self.assertEqual(bce_obj2._dtype, tf.int32) + new_config = bce_obj2.get_config() + self.assertDictEqual(old_config, new_config) + + def test_unweighted(self): + bce_obj = metrics.BinaryCrossentropy() + self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) + result = bce_obj(y_true, y_pred) + + # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] + + # Metric = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) + # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), + # -log(Y_MAX + EPSILON), -log(1)] + # = [(0 + 15.33) / 2, (0 + 0) / 2] + # Reduced metric = 7.665 / 2 + + self.assertAllClose(self.evaluate(result), 3.833, atol=1e-3) + + def test_unweighted_with_logits(self): + bce_obj = metrics.BinaryCrossentropy(from_logits=True) + self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) + + y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) + y_pred = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) + result = bce_obj(y_true, y_pred) + + # Metric = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # = [((100 - 100 * 1 + log(1 + exp(-100))) + + # (0 + 100 * 0 + log(1 + exp(-100))) + + # (100 - 100 * 1 + log(1 + exp(-100))), + # ((100 - 100 * 0 + log(1 + exp(-100))) + + # (100 - 100 * 1 + log(1 + exp(-100))) + + # (0 + 100 * 1 + log(1 + exp(-100))))] + # = [(0 + 0 + 0) / 3, 200 / 3] + # Reduced metric = (0 + 66.666) / 2 + + self.assertAllClose(self.evaluate(result), 33.333, atol=1e-3) + + def test_weighted(self): + bce_obj = metrics.BinaryCrossentropy() + self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) + y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2]) + y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2]) + sample_weight = tf.constant([1.5, 2.0]) + result = bce_obj(y_true, y_pred, sample_weight=sample_weight) + + # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999 + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON] + + # Metric = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON)) + # = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON), + # -log(Y_MAX + EPSILON), -log(1)] + # = [(0 + 15.33) / 2, (0 + 0) / 2] + # Weighted metric = [7.665 * 1.5, 0] + # Reduced metric = 7.665 * 1.5 / (1.5 + 2) + + self.assertAllClose(self.evaluate(result), 3.285, atol=1e-3) + + def test_weighted_from_logits(self): + bce_obj = metrics.BinaryCrossentropy(from_logits=True) + self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) + y_true = tf.constant([[1, 0, 1], [0, 1, 1]]) + y_pred = tf.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]) + sample_weight = tf.constant([2.0, 2.5]) + result = bce_obj(y_true, y_pred, sample_weight=sample_weight) + + # Metric = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # = [(0 + 0 + 0) / 3, 200 / 3] + # Weighted metric = [0, 66.666 * 2.5] + # Reduced metric = 66.666 * 2.5 / (2 + 2.5) + + self.assertAllClose(self.evaluate(result), 37.037, atol=1e-3) + + def test_label_smoothing(self): + logits = tf.constant(((100.0, -100.0, -100.0))) + y_true = tf.constant(((1, 0, 1))) + label_smoothing = 0.1 + # Metric: max(x, 0) - x * z + log(1 + exp(-abs(x))) + # (where x = logits and z = y_true) + # Label smoothing: z' = z * (1 - L) + 0.5L + # After label smoothing, label 1 becomes 1 - 0.5L + # label 0 becomes 0.5L + # Applying the above two fns to the given input: + # (100 - 100 * (1 - 0.5 L) + 0 + + # 0 + 100 * (0.5 L) + 0 + + # 0 + 100 * (1 - 0.5 L) + 0) * (1/3) + # = (100 + 50L) * 1/3 + bce_obj = metrics.BinaryCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + self.evaluate(tf.compat.v1.variables_initializer(bce_obj.variables)) + result = bce_obj(y_true, logits) + expected_value = (100.0 + 50.0 * label_smoothing) / 3.0 + self.assertAllClose(expected_value, self.evaluate(result), atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class CategoricalCrossentropyTest(tf.test.TestCase): + def test_config(self): + cce_obj = metrics.CategoricalCrossentropy( + name="cce", dtype=tf.int32, label_smoothing=0.2 + ) + self.assertEqual(cce_obj.name, "cce") + self.assertEqual(cce_obj._dtype, tf.int32) + + old_config = cce_obj.get_config() + self.assertAllClose(old_config["label_smoothing"], 0.2, 1e-3) + + # Check save and restore config + cce_obj2 = metrics.CategoricalCrossentropy.from_config(old_config) + self.assertEqual(cce_obj2.name, "cce") + self.assertEqual(cce_obj2._dtype, tf.int32) + new_config = cce_obj2.get_config() + self.assertDictEqual(old_config, new_config) + + def test_unweighted(self): + cce_obj = metrics.CategoricalCrossentropy() + self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) + + y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) + y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + result = cce_obj(y_true, y_pred) + + # EPSILON = 1e-7, y = y_true, y` = y_pred + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + + # Metric = -sum(y * log(y'), axis = -1) + # = -((log 0.95), (log 0.1)) + # = [0.051, 2.302] + # Reduced metric = (0.051 + 2.302) / 2 + + self.assertAllClose(self.evaluate(result), 1.176, atol=1e-3) + + def test_unweighted_from_logits(self): + cce_obj = metrics.CategoricalCrossentropy(from_logits=True) + self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) + + y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) + logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) + result = cce_obj(y_true, logits) + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # xent = -sum(labels * log(softmax), 1) + + # exp(logits) = [[2.718, 8103.084, 1], [2.718, 2980.958, 2.718]] + # sum(exp(logits), axis=-1) = [8106.802, 2986.394] + # softmax = [[0.00033, 0.99954, 0.00012], [0.00091, 0.99817, 0.00091]] + # log(softmax) = [[-8.00045, -0.00045, -9.00045], + # [-7.00182, -0.00182, -7.00182]] + # labels * log(softmax) = [[0, -0.00045, 0], [0, 0, -7.00182]] + # xent = [0.00045, 7.00182] + # Reduced xent = (0.00045 + 7.00182) / 2 + + self.assertAllClose(self.evaluate(result), 3.5011, atol=1e-3) + + def test_weighted(self): + cce_obj = metrics.CategoricalCrossentropy() + self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) + + y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) + y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + sample_weight = tf.constant([1.5, 2.0]) + result = cce_obj(y_true, y_pred, sample_weight=sample_weight) + + # EPSILON = 1e-7, y = y_true, y` = y_pred + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + + # Metric = -sum(y * log(y'), axis = -1) + # = -((log 0.95), (log 0.1)) + # = [0.051, 2.302] + # Weighted metric = [0.051 * 1.5, 2.302 * 2.] + # Reduced metric = (0.051 * 1.5 + 2.302 * 2.) / 3.5 + + self.assertAllClose(self.evaluate(result), 1.338, atol=1e-3) + + def test_weighted_from_logits(self): + cce_obj = metrics.CategoricalCrossentropy(from_logits=True) + self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) + + y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) + logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) + sample_weight = tf.constant([1.5, 2.0]) + result = cce_obj(y_true, logits, sample_weight=sample_weight) + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # xent = -sum(labels * log(softmax), 1) + # xent = [0.00045, 7.00182] + # weighted xent = [0.000675, 14.00364] + # Reduced xent = (0.000675 + 14.00364) / (1.5 + 2) + + self.assertAllClose(self.evaluate(result), 4.0012, atol=1e-3) + + def test_label_smoothing(self): + y_true = np.asarray([[0, 1, 0], [0, 0, 1]]) + logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) + label_smoothing = 0.1 + + # Label smoothing: z' = z * (1 - L) + L/n, + # where L = label smoothing value and n = num classes + # Label value 1 becomes: 1 - L + L/n + # Label value 0 becomes: L/n + # y_true with label_smoothing = [[0.0333, 0.9333, 0.0333], + # [0.0333, 0.0333, 0.9333]] + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # xent = -sum(labels * log(softmax), 1) + # log(softmax) = [[-8.00045, -0.00045, -9.00045], + # [-7.00182, -0.00182, -7.00182]] + # labels * log(softmax) = [[-0.26641, -0.00042, -0.29971], + # [-0.23316, -0.00006, -6.53479]] + # xent = [0.56654, 6.76801] + # Reduced xent = (0.56654 + 6.76801) / 2 + + cce_obj = metrics.CategoricalCrossentropy( + from_logits=True, label_smoothing=label_smoothing + ) + self.evaluate(tf.compat.v1.variables_initializer(cce_obj.variables)) + loss = cce_obj(y_true, logits) + self.assertAllClose(self.evaluate(loss), 3.667, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SparseCategoricalCrossentropyTest(tf.test.TestCase): + def test_config(self): + scce_obj = metrics.SparseCategoricalCrossentropy( + name="scce", dtype=tf.int32 + ) + self.assertEqual(scce_obj.name, "scce") + self.assertEqual(scce_obj.dtype, tf.int32) + old_config = scce_obj.get_config() + self.assertDictEqual(old_config, json.loads(json.dumps(old_config))) + + # Check save and restore config + scce_obj2 = metrics.SparseCategoricalCrossentropy.from_config( + old_config + ) + self.assertEqual(scce_obj2.name, "scce") + self.assertEqual(scce_obj2.dtype, tf.int32) + new_config = scce_obj2.get_config() + self.assertDictEqual(old_config, new_config) + + def test_unweighted(self): + scce_obj = metrics.SparseCategoricalCrossentropy() + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([1, 2]) + y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + result = scce_obj(y_true, y_pred) + + # EPSILON = 1e-7, y = y_true, y` = y_pred + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + # logits = log(y`) = [[-2.9957, -0.0513, -16.1181], + # [-2.3026, -0.2231, -2.3026]] + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # y = one_hot(y) = [[0, 1, 0], [0, 0, 1]] + # xent = -sum(y * log(softmax), 1) + + # exp(logits) = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + # sum(exp(logits), axis=-1) = [1, 1] + # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + # log(softmax) = [[-2.9957, -0.0513, -16.1181], + # [-2.3026, -0.2231, -2.3026]] + # y * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] + # xent = [0.0513, 2.3026] + # Reduced xent = (0.0513 + 2.3026) / 2 + + self.assertAllClose(self.evaluate(result), 1.176, atol=1e-3) + + def test_unweighted_ignore_class(self): + scce_obj = metrics.SparseCategoricalCrossentropy(ignore_class=-1) + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([-1, 2]) + y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + result = scce_obj(y_true, y_pred) + + self.assertAllClose(self.evaluate(result), 2.3026, atol=1e-3) + + def test_unweighted_from_logits(self): + scce_obj = metrics.SparseCategoricalCrossentropy(from_logits=True) + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([1, 2]) + logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) + result = scce_obj(y_true, logits) + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] + # xent = -sum(y_true * log(softmax), 1) + + # exp(logits) = [[2.718, 8103.084, 1], [2.718, 2980.958, 2.718]] + # sum(exp(logits), axis=-1) = [8106.802, 2986.394] + # softmax = [[0.00033, 0.99954, 0.00012], [0.00091, 0.99817, 0.00091]] + # log(softmax) = [[-8.00045, -0.00045, -9.00045], + # [-7.00182, -0.00182, -7.00182]] + # y_true * log(softmax) = [[0, -0.00045, 0], [0, 0, -7.00182]] + # xent = [0.00045, 7.00182] + # Reduced xent = (0.00045 + 7.00182) / 2 + + self.assertAllClose(self.evaluate(result), 3.5011, atol=1e-3) + + def test_weighted(self): + scce_obj = metrics.SparseCategoricalCrossentropy() + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([1, 2]) + y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + sample_weight = tf.constant([1.5, 2.0]) + result = scce_obj(y_true, y_pred, sample_weight=sample_weight) + + # EPSILON = 1e-7, y = y_true, y` = y_pred + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + # logits = log(y`) = [[-2.9957, -0.0513, -16.1181], + # [-2.3026, -0.2231, -2.3026]] + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # y = one_hot(y) = [[0, 1, 0], [0, 0, 1]] + # xent = -sum(y * log(softmax), 1) + + # exp(logits) = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + # sum(exp(logits), axis=-1) = [1, 1] + # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + # log(softmax) = [[-2.9957, -0.0513, -16.1181], + # [-2.3026, -0.2231, -2.3026]] + # y * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] + # xent = [0.0513, 2.3026] + # Weighted xent = [0.051 * 1.5, 2.302 * 2.] + # Reduced xent = (0.051 * 1.5 + 2.302 * 2.) / 3.5 + + self.assertAllClose(self.evaluate(result), 1.338, atol=1e-3) + + def test_weighted_ignore_class(self): + scce_obj = metrics.SparseCategoricalCrossentropy(ignore_class=-1) + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([1, 2, -1]) + y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.1, 0.8, 0.1]]) + sample_weight = tf.constant([1.5, 2.0, 1.5]) + result = scce_obj(y_true, y_pred, sample_weight=sample_weight) + + self.assertAllClose(self.evaluate(result), 1.338, atol=1e-3) + + def test_weighted_from_logits(self): + scce_obj = metrics.SparseCategoricalCrossentropy(from_logits=True) + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([1, 2]) + logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32) + sample_weight = tf.constant([1.5, 2.0]) + result = scce_obj(y_true, logits, sample_weight=sample_weight) + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] + # xent = -sum(y_true * log(softmax), 1) + # xent = [0.00045, 7.00182] + # weighted xent = [0.000675, 14.00364] + # Reduced xent = (0.000675 + 14.00364) / (1.5 + 2) + + self.assertAllClose(self.evaluate(result), 4.0012, atol=1e-3) + + def test_axis(self): + scce_obj = metrics.SparseCategoricalCrossentropy(axis=0) + self.evaluate(tf.compat.v1.variables_initializer(scce_obj.variables)) + + y_true = np.asarray([1, 2]) + y_pred = np.asarray([[0.05, 0.1], [0.95, 0.8], [0, 0.1]]) + result = scce_obj(y_true, y_pred) + + # EPSILON = 1e-7, y = y_true, y` = y_pred + # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + # y` = [[0.05, 0.1], [0.95, 0.8], [EPSILON, 0.1]] + # logits = log(y`) = [[-2.9957, -2.3026], + # [-0.0513, -0.2231], + # [-16.1181, -2.3026]] + + # softmax = exp(logits) / sum(exp(logits), axis=-1) + # y = one_hot(y) = [[0, 0], [1, 0], [0, 1]] + # xent = -sum(y * log(softmax), 1) + + # exp(logits) = [[0.05, 0.1], [0.95, 0.8], [EPSILON, 0.1]] + # sum(exp(logits)) = [1, 1] + # softmax = [[0.05, 0.1], [0.95, 0.8], [EPSILON, 0.1]] + # log(softmax) = [[-2.9957, -2.3026], + # [-0.0513, -0.2231], + # [-16.1181, -2.3026]] + # y * log(softmax) = [[0, 0], [-0.0513, 0], [0, -2.3026]] + # xent = [0.0513, 2.3026] + # Reduced xent = (0.0513 + 2.3026) / 2 + + self.assertAllClose(self.evaluate(result), 1.176, atol=1e-3) + + +class BinaryTruePositives(metrics.Metric): + def __init__(self, name="binary_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_weight(name="tp", initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) + values = tf.cast(values, self.dtype) + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, dtype=self.dtype) + sample_weight = tf.__internal__.ops.broadcast_weights( + sample_weight, values + ) + values = tf.multiply(values, sample_weight) + self.true_positives.assign_add(tf.reduce_sum(values)) + + def result(self): + return self.true_positives + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/py_metric.py b/keras/metrics/py_metric.py new file mode 100644 index 000000000000..e0718203119f --- /dev/null +++ b/keras/metrics/py_metric.py @@ -0,0 +1,191 @@ +# Copyright 2023 The Keras Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Base class for Python-based metrics""" + +import types + +import tensorflow.compat.v2 as tf +from tensorflow.python.util.tf_export import keras_export + +from keras.metrics import base_metric + + +@keras_export("keras.metrics.experimental.PyMetric", v1=[]) +class PyMetric(base_metric.Metric): + """Metric which runs in Python, compiled outside of the TensorFlow graph. + + Args: + name: (Optional) string name of the PyMetric instance. + dtype: (Optional) data type of the PyMetric result. + **kwargs: Additional layer keywords arguments. + + Usage of `PyMetric` is generally identical to `keras.metrics.Metric`. + It can be used in isolation, or in tandem with the `compile()` API. For more + information about the usage of `PyMetric`, see `keras.metrics.Metric`. + + Unlike regular metrics, `PyMetric` instances are outside-compiled + with respect to the TensorFlow graph during training or evaluation. + They have access to the same + inputs of a standard in-graph metric, but they run in a Python interpreter + on the host CPU. Any data stored in a `PyMetric` is located on the main + memory of the host CPU, and any TensorFlow ops used in a PyMetric are + run eagerly on the host CPU. + + As a result, `PyMetric` instances are generally not as performant + as in-graph metrics, and should only be used in cases where computing + the metric inside of the TensorFlow graph is either impossible + or prohibitively expensive. + + **Note:** Due to the use of `tf.py_function`, PyMetrics + are incompatible with XLA and therefore TPUs. + + Methods to be implemented by subclasses: + + * `update_state()`: Handles updates to internal state variables + * `result()`: Computes and returns a scalar value or a dict of scalar values + for the metric from the state variables. + * `reset_state()`: Computes and returns a scalar value for the metric from + the state variables. + + This subclass implementation is similar to that of `keras.metrics.Metric`, + with two notable differences: + + * Inputs to `update_state()` in a `PyMetric` are eager tensors, and both + `update_state()` and `result()` run outside of the TensorFlow graph, + executing any TensorFlow ops eagerly. + * `reset_state()` is also called at initialization time to initialize the + Python state of the metric. + * `result()` can only return a single scalar. It does not support returning + a dictionary of results like `keras.metrics.Metric`. + + Example subclass implementation using sklearn's Jaccard Score: + + ```python + from sklearn.metrics import jaccard_score + import tensorflow as tf + + class JaccardScore(tf.keras.metrics.experimental.PyMetric): + + def __init__(self, name='jaccard_score', **kwargs): + super().__init__(name=name, **kwargs) + + def update_state(self, y_true, y_pred, sample_weight=None): + self.jaccard_sum += jaccard_score(y_pred, y_true, average="macro") + self.count += 1 + + def reset_state(self): + self.jaccard_sum = 0. + self.count = 0. + + def result(self): + return self.jaccard_sum / self.count + ``` + """ + + def __init__(self, name=None, dtype=None, **kwargs): + super().__init__(name=name, dtype=dtype, **kwargs) + self.reset_state() + + def __new__(cls, *args, **kwargs): + obj = super(base_metric.Metric, cls).__new__(cls) + + # Wrap the update_state function in a py_function and scope it to /cpu:0 + obj_update_state = obj.update_state + + def update_state_on_cpu(y_true, y_pred, sample_weight=None): + with tf.device("/cpu:0"): + return obj_update_state(y_true, y_pred, sample_weight) + + obj.update_state_on_cpu = update_state_on_cpu + + def update_state_fn(self, y_true, y_pred, sample_weight=None): + eager_inputs = [y_true, y_pred] + if sample_weight is not None: + eager_inputs.append(sample_weight) + return tf.py_function( + func=self.update_state_on_cpu, inp=eager_inputs, Tout=[] + ) + + obj.update_state = types.MethodType(update_state_fn, obj) + + # Wrap the result function in a py_function and scope it to /cpu:0 + obj_result = obj.result + + def result_on_host_cpu(): + with tf.device("/cpu:0"): + return obj_result() + + obj.result_on_host_cpu = result_on_host_cpu + + def result_fn(self): + return tf.py_function( + self.result_on_host_cpu, inp=[], Tout=obj.dtype + ) + + obj.result = types.MethodType(result_fn, obj) + + return obj + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates statistics for the metric. + + **Note:** This function is executed outside of the TensorFlow graph + on the CPU host. + + This means: + + a) Inputs are eager tensors. + b) Any TensorFlow ops run in this method are run eagerly. + c) Any Tensors created are allocated to the CPU's main memory. + + Args: + y_true: Target output + y_pred: Predicted output + sample_weight: (Optional) weights for the individual samples in + `y_true` and `y_pred` + """ + raise NotImplementedError("Subclasses should implement `update_state`") + + def merge_state(self, metrics): + """Merges the state from one or more metrics. + + `PyMetric` instances that intend to support merging state must override + this method, as the default implementation + in `keras.metrics.Metric` does not apply to `PyMetric`. + """ + raise NotImplementedError("Subclasses should implement `merge_state`") + + def reset_state(self): + """Resets all of the metric state variables. + + This function is called between epochs when a metric is evaluated during + training. It's also called when the metric is initialized. + """ + raise NotImplementedError("Subclasses should implement `reset_state`") + + def result(self): + """Computes and returns the scalar metric value. + + **Note:** This function is executed outside of the TensorFlow graph + on the CPU host. This means any TensorFlow ops run in this method + are run eagerly. + + Result computation is an idempotent operation that simply calculates the + metric value using the state variables. + + Returns: + A Python scalar. + """ + raise NotImplementedError("Subclasses should implement `result`") diff --git a/keras/metrics/py_metric_test.py b/keras/metrics/py_metric_test.py new file mode 100644 index 000000000000..d8f00d3a5109 --- /dev/null +++ b/keras/metrics/py_metric_test.py @@ -0,0 +1,145 @@ +# Copyright 2023 The Keras Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras PyMetric classes.""" + + +import tensorflow.compat.v2 as tf + +from keras import metrics +from keras.testing_infra import test_combinations + + +class KTrimmedMean(metrics.PyMetric): + """An example PyMetric which computes the trimmed mean of `y_pred`.""" + + def __init__(self, k=0.1, name="k_trimmed_mean", **kwargs): + super().__init__(name=name, **kwargs) + self.k = k + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = y_true.numpy() + + if sample_weight is not None: + y_true *= sample_weight.numpy() + + # Insert y_pred into our values list (keeping the list sorted) + index = 0 + for i, element in enumerate(self.values): + if y_true > element: + index = i + break + self.values = self.values[:index] + [y_true] + self.values[index:] + + def reset_state(self): + self.values = [] + + def result(self): + k = int(self.k * len(self.values)) + return tf.reduce_mean(self.values[k:-k]) + + def get_config(self): + config = super().get_config() + config.update({"k": self.k}) + return config + + +class Mean(metrics.PyMetric): + """An example PyMetric which computes the mean of `y_pred`.""" + + def __init__(self, name="mean", **kwargs): + super().__init__(name=name, **kwargs) + + def update_state(self, y_true, y_pred, sample_weight=None): + self.values.append(y_true) + + def reset_state(self): + self.values = [] + + def result(self): + return tf.reduce_mean(tf.concat(self.values, axis=0)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class PyMetricsTest(tf.test.TestCase): + def test_config(self): + ktm_object = KTrimmedMean(name="ktm", k=0.2, dtype=tf.float16) + self.assertEqual(ktm_object.name, "ktm") + self.assertEqual(ktm_object.k, 0.2) + self.assertEqual(ktm_object.dtype, tf.float16) + + # Check save and restore config + ktm_object2 = KTrimmedMean.from_config(ktm_object.get_config()) + self.assertEqual(ktm_object2.name, "ktm") + self.assertEqual(ktm_object.k, 0.2) + self.assertEqual(ktm_object2.dtype, tf.float16) + + def test_unweighted(self): + ktm_object = KTrimmedMean(k=0.2) + + for y_true in [-100, -10, 1, 2, 3, 4, 5, 6, 14, 9001]: + self.evaluate( + ktm_object.update_state( + tf.constant(y_true, dtype=tf.float32), + y_pred=tf.constant(0, dtype=tf.float32), + ) + ) + + result = ktm_object.result() + self.assertEqual(3.5, self.evaluate(result)) + + def test_weighted(self): + ktm_object = KTrimmedMean(k=0.2) + + for y_true in [-100, -10, 1, 2, 3, 4, 5, 6, 14, 9001]: + self.evaluate( + ktm_object.update_state( + tf.constant(y_true, dtype=tf.float32), + y_pred=tf.constant(0, dtype=tf.float32), + sample_weight=tf.constant(2, dtype=tf.float32), + ) + ) + + result = ktm_object.result() + self.assertEqual(7, self.evaluate(result)) + + def test_state_stored_on_cpu_host(self): + with tf.device("/device:GPU:0"): + mean_obj = Mean() + + y_true_0 = tf.constant([0, 1, 2], dtype=tf.float32) + y_true_1 = tf.constant([3, 4], dtype=tf.float32) + self.evaluate( + mean_obj.update_state( + y_true=y_true_0, y_pred=tf.constant(0, dtype=tf.float32) + ) + ) + self.evaluate( + mean_obj.update_state( + y_true=y_true_1, y_pred=tf.constant(0, dtype=tf.float32) + ) + ) + + self.assertEqual(2, self.evaluate(mean_obj.result())) + + if not tf.executing_eagerly(): + self.assertEndsWith(y_true_0.device, "/device:GPU:0") + self.assertEndsWith(y_true_1.device, "/device:GPU:0") + + self.assertEndsWith(mean_obj.values[0].device, "/device:CPU:0") + self.assertEndsWith(mean_obj.values[1].device, "/device:CPU:0") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/metrics/regression_metrics.py b/keras/metrics/regression_metrics.py new file mode 100644 index 000000000000..ccc4702f6039 --- /dev/null +++ b/keras/metrics/regression_metrics.py @@ -0,0 +1,626 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Regression metrics, e.g. MAE/MSE/etc.""" + +import warnings + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.dtensor import utils as dtensor_utils +from keras.losses import logcosh +from keras.losses import mean_absolute_error +from keras.losses import mean_absolute_percentage_error +from keras.losses import mean_squared_error +from keras.losses import mean_squared_logarithmic_error +from keras.metrics import base_metric +from keras.utils import losses_utils +from keras.utils import metrics_utils +from keras.utils.tf_utils import is_tensor_or_variable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.metrics.MeanRelativeError") +class MeanRelativeError(base_metric.Mean): + """Computes the mean relative error by normalizing with the given values. + + This metric creates two local variables, `total` and `count` that are used + to compute the mean relative error. This is weighted by `sample_weight`, and + it is ultimately returned as `mean_relative_error`: an idempotent operation + that simply divides `total` by `count`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Args: + normalizer: The normalizer values with same shape as predictions. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.MeanRelativeError(normalizer=[1, 3, 2, 3]) + >>> m.update_state([1, 3, 2, 3], [2, 4, 6, 8]) + + >>> # metric = mean(|y_pred - y_true| / normalizer) + >>> # = mean([1, 1, 4, 5] / [1, 3, 2, 3]) = mean([1, 1/3, 2, 5/3]) + >>> # = 5/4 = 1.25 + >>> m.result().numpy() + 1.25 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.MeanRelativeError(normalizer=[1, 3])]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, normalizer, name=None, dtype=None): + super().__init__(name=name, dtype=dtype) + normalizer = tf.cast(normalizer, self._dtype) + self.normalizer = normalizer + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates metric statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + y_true = tf.cast(y_true, self._dtype) + y_pred = tf.cast(y_pred, self._dtype) + [ + y_pred, + y_true, + ], sample_weight = metrics_utils.ragged_assert_compatible_and_get_flat_values( # noqa: E501 + [y_pred, y_true], sample_weight + ) + y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true + ) + + y_pred, self.normalizer = losses_utils.remove_squeezable_dimensions( + y_pred, self.normalizer + ) + y_pred.shape.assert_is_compatible_with(y_true.shape) + relative_errors = tf.math.divide_no_nan( + tf.abs(y_true - y_pred), self.normalizer + ) + + return super().update_state( + relative_errors, sample_weight=sample_weight + ) + + def get_config(self): + n = self.normalizer + config = { + "normalizer": backend.eval(n) if is_tensor_or_variable(n) else n + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@keras_export("keras.metrics.CosineSimilarity") +class CosineSimilarity(base_metric.MeanMetricWrapper): + """Computes the cosine similarity between the labels and predictions. + + `cosine similarity = (a . b) / ||a|| ||b||` + + See: [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity). + + This metric keeps the average cosine similarity between `predictions` and + `labels` over a stream of data. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + axis: (Optional) The dimension along which the cosine + similarity is computed. Defaults to `-1`. + + Standalone usage: + + >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] + >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] + >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] + >>> # result = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) + >>> # = ((0. + 0.) + (0.5 + 0.5)) / 2 + >>> m = tf.keras.metrics.CosineSimilarity(axis=1) + >>> m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]]) + >>> m.result().numpy() + 0.49999997 + + >>> m.reset_state() + >>> m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]], + ... sample_weight=[0.3, 0.7]) + >>> m.result().numpy() + 0.6999999 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.CosineSimilarity(axis=1)]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="cosine_similarity", dtype=None, axis=-1): + super().__init__(cosine_similarity, name, dtype=dtype, axis=axis) + + +@keras_export("keras.metrics.MeanAbsoluteError") +class MeanAbsoluteError(base_metric.MeanMetricWrapper): + """Computes the mean absolute error between the labels and predictions. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.MeanAbsoluteError() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 0.25 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.5 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.MeanAbsoluteError()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="mean_absolute_error", dtype=None): + super().__init__(mean_absolute_error, name, dtype=dtype) + + +@keras_export("keras.metrics.MeanAbsolutePercentageError") +class MeanAbsolutePercentageError(base_metric.MeanMetricWrapper): + """Computes the mean absolute percentage error between `y_true` and + `y_pred`. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.MeanAbsolutePercentageError() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 250000000.0 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 500000000.0 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.MeanAbsolutePercentageError()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="mean_absolute_percentage_error", dtype=None): + super().__init__(mean_absolute_percentage_error, name, dtype=dtype) + + +@keras_export("keras.metrics.MeanSquaredError") +class MeanSquaredError(base_metric.MeanMetricWrapper): + """Computes the mean squared error between `y_true` and `y_pred`. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.MeanSquaredError() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 0.25 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.5 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.MeanSquaredError()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="mean_squared_error", dtype=None): + super().__init__(mean_squared_error, name, dtype=dtype) + + +@keras_export("keras.metrics.MeanSquaredLogarithmicError") +class MeanSquaredLogarithmicError(base_metric.MeanMetricWrapper): + """Computes the mean squared logarithmic error between `y_true` and + `y_pred`. + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.MeanSquaredLogarithmicError() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 0.12011322 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.24022643 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.MeanSquaredLogarithmicError()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="mean_squared_logarithmic_error", dtype=None): + super().__init__(mean_squared_logarithmic_error, name, dtype=dtype) + + +@keras_export("keras.metrics.RootMeanSquaredError") +class RootMeanSquaredError(base_metric.Mean): + """Computes root mean squared error metric between `y_true` and `y_pred`. + + Standalone usage: + + >>> m = tf.keras.metrics.RootMeanSquaredError() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 0.5 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.70710677 + + Usage with `compile()` API: + + ```python + model.compile( + optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.RootMeanSquaredError()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="root_mean_squared_error", dtype=None): + super().__init__(name, dtype=dtype) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates root mean squared error statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Can + be a `Tensor` whose rank is either 0, or the same rank as `y_true`, + and must be broadcastable to `y_true`. Defaults to `1`. + + Returns: + Update op. + """ + y_true = tf.cast(y_true, self._dtype) + y_pred = tf.cast(y_pred, self._dtype) + y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true + ) + error_sq = tf.math.squared_difference(y_pred, y_true) + return super().update_state(error_sq, sample_weight=sample_weight) + + def result(self): + return tf.sqrt(tf.math.divide_no_nan(self.total, self.count)) + + +@keras_export("keras.metrics.LogCoshError") +class LogCoshError(base_metric.MeanMetricWrapper): + """Computes the logarithm of the hyperbolic cosine of the prediction error. + + `logcosh = log((exp(x) + exp(-x))/2)`, where x is the error (y_pred - + y_true) + + Args: + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + Standalone usage: + + >>> m = tf.keras.metrics.LogCoshError() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + >>> m.result().numpy() + 0.10844523 + + >>> m.reset_state() + >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + ... sample_weight=[1, 0]) + >>> m.result().numpy() + 0.21689045 + + Usage with `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss='mse', + metrics=[tf.keras.metrics.LogCoshError()]) + ``` + """ + + @dtensor_utils.inject_mesh + def __init__(self, name="logcosh", dtype=None): + super().__init__(logcosh, name, dtype=dtype) + + +# Adapted from TF-Addons implementation (RSquare class). +@keras_export("keras.metrics.R2Score") +class R2Score(base_metric.Metric): + """Computes R2 score. + + This is also called the + [coefficient of + determination](https://en.wikipedia.org/wiki/Coefficient_of_determination). + + It indicates how close the fitted regression line + is to ground-truth data. + + - The highest score possible is 1.0. It indicates that the predictors + perfectly accounts for variation in the target. + - A score of 0.0 indicates that the predictors do not + account for variation in the target. + - It can also be negative if the model is worse than random. + + This metric can also compute the "Adjusted R2" score. + + Args: + class_aggregation: Specifies how to aggregate scores corresponding to + different output classes (or target dimensions), + i.e. different dimensions on the last axis of the predictions. + Equivalent to `multioutput` argument in Scikit-Learn. + Should be one of + `None` (no aggregation), `"uniform_average"`, + `"variance_weighted_average"`. + num_regressors: Number of independent regressors used + ("Adjusted R2" score). 0 is the standard R2 score. + Defaults to `0`. + name: Optional. string name of the metric instance. + dtype: Optional. data type of the metric result. + + Example: + + >>> y_true = np.array([[1], [4], [3]], dtype=np.float32) + >>> y_pred = np.array([[2], [4], [4]], dtype=np.float32) + >>> metric = tf.keras.metrics.R2Score() + >>> metric.update_state(y_true, y_pred) + >>> result = metric.result() + >>> result.numpy() + 0.57142854 + """ + + @dtensor_utils.inject_mesh + def __init__( + self, + class_aggregation="uniform_average", + num_regressors=0, + name="r2_score", + dtype=None, + ): + super().__init__(name=name, dtype=dtype) + + valid_class_aggregation_values = ( + None, + "uniform_average", + "variance_weighted_average", + ) + if class_aggregation not in valid_class_aggregation_values: + raise ValueError( + "Invalid value for argument `class_aggregation`. Expected " + f"one of {valid_class_aggregation_values}. " + f"Received: class_aggregation={class_aggregation}" + ) + if num_regressors < 0: + raise ValueError( + "Invalid value for argument `num_regressors`. " + "Expected a value >= 0. " + f"Received: num_regressors={num_regressors}" + ) + self.class_aggregation = class_aggregation + self.num_regressors = num_regressors + self.num_samples = self.add_weight(name="num_samples", dtype="int32") + self.built = False + + def build(self, y_true_shape, y_pred_shape): + if len(y_pred_shape) != 2 or len(y_true_shape) != 2: + raise ValueError( + "R2Score expects 2D inputs with shape " + "(batch_size, output_dim). Received input " + f"shapes: y_pred.shape={y_pred_shape} and " + f"y_true.shape={y_true_shape}." + ) + if y_pred_shape[-1] is None or y_true_shape[-1] is None: + raise ValueError( + "R2Score expects 2D inputs with shape " + "(batch_size, output_dim), with output_dim fully " + "defined (not None). Received input " + f"shapes: y_pred.shape={y_pred_shape} and " + f"y_true.shape={y_true_shape}." + ) + num_classes = y_pred_shape[-1] + self.squared_sum = self.add_weight( + name="squared_sum", + shape=[num_classes], + initializer="zeros", + ) + self.sum = self.add_weight( + name="sum", + shape=[num_classes], + initializer="zeros", + ) + self.total_mse = self.add_weight( + name="residual", + shape=[num_classes], + initializer="zeros", + ) + self.count = self.add_weight( + name="count", + shape=[num_classes], + initializer="zeros", + ) + self.built = True + + def update_state(self, y_true, y_pred, sample_weight=None): + y_true = tf.convert_to_tensor(y_true, dtype=self.dtype) + y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype) + if not self.built: + self.build(y_true.shape, y_pred.shape) + + if sample_weight is None: + sample_weight = 1 + + sample_weight = tf.convert_to_tensor(sample_weight, dtype=self.dtype) + if sample_weight.shape.rank == 1: + # Make sure there's a features dimension + sample_weight = tf.expand_dims(sample_weight, axis=1) + sample_weight = tf.__internal__.ops.broadcast_weights( + weights=sample_weight, values=y_true + ) + + weighted_y_true = y_true * sample_weight + self.sum.assign_add(tf.reduce_sum(weighted_y_true, axis=0)) + self.squared_sum.assign_add( + tf.reduce_sum(y_true * weighted_y_true, axis=0) + ) + self.total_mse.assign_add( + tf.reduce_sum((y_true - y_pred) ** 2 * sample_weight, axis=0) + ) + self.count.assign_add(tf.reduce_sum(sample_weight, axis=0)) + self.num_samples.assign_add(tf.size(y_true)) + + def result(self): + mean = self.sum / self.count + total = self.squared_sum - self.sum * mean + raw_scores = 1 - (self.total_mse / total) + raw_scores = tf.where(tf.math.is_inf(raw_scores), 0.0, raw_scores) + + if self.class_aggregation == "uniform_average": + r2_score = tf.reduce_mean(raw_scores) + elif self.class_aggregation == "variance_weighted_average": + weighted_sum = tf.reduce_sum(total * raw_scores) + sum_of_weights = tf.reduce_sum(total) + r2_score = weighted_sum / sum_of_weights + else: + r2_score = raw_scores + + if self.num_regressors != 0: + if self.num_regressors > self.num_samples - 1: + warnings.warn( + "More independent predictors than datapoints " + "in adjusted R2 score. Falling back to standard R2 score.", + stacklevel=2, + ) + elif self.num_regressors == self.num_samples - 1: + warnings.warn( + "Division by zero in Adjusted R2 score. " + "Falling back to standard R2 score.", + stacklevel=2, + ) + else: + n = tf.cast(self.num_samples, dtype=tf.float32) + p = tf.cast(self.num_regressors, dtype=tf.float32) + num = tf.multiply( + tf.subtract(1.0, r2_score), tf.subtract(n, 1.0) + ) + den = tf.subtract(tf.subtract(n, p), 1.0) + r2_score = tf.subtract(1.0, tf.divide(num, den)) + return r2_score + + def reset_state(self): + for v in self.variables: + v.assign(tf.zeros(v.shape, dtype=v.dtype)) + + def get_config(self): + config = { + "class_aggregation": self.class_aggregation, + "num_regressors": self.num_regressors, + } + base_config = super().get_config() + return {**base_config, **config} + + +def cosine_similarity(y_true, y_pred, axis=-1): + """Computes the cosine similarity between labels and predictions. + + Args: + y_true: The ground truth values. + y_pred: The prediction values. + axis: (Optional) -1 is the dimension along which the cosine + similarity is computed. Defaults to `-1`. + + Returns: + Cosine similarity value. + """ + y_true = tf.linalg.l2_normalize(y_true, axis=axis) + y_pred = tf.linalg.l2_normalize(y_pred, axis=axis) + return tf.reduce_sum(y_true * y_pred, axis=axis) diff --git a/keras/metrics/regression_metrics_test.py b/keras/metrics/regression_metrics_test.py new file mode 100644 index 000000000000..57b1a8191d35 --- /dev/null +++ b/keras/metrics/regression_metrics_test.py @@ -0,0 +1,506 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras metrics.""" + +import math + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras import Input +from keras import metrics +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class CosineSimilarityTest(tf.test.TestCase): + def l2_norm(self, x, axis): + epsilon = 1e-12 + square_sum = np.sum(np.square(x), axis=axis, keepdims=True) + x_inv_norm = 1 / np.sqrt(np.maximum(square_sum, epsilon)) + return np.multiply(x, x_inv_norm) + + def setup(self, axis=1): + self.np_y_true = np.asarray([[1, 9, 2], [-5, -2, 6]], dtype=np.float32) + self.np_y_pred = np.asarray([[4, 8, 12], [8, 1, 3]], dtype=np.float32) + + y_true = self.l2_norm(self.np_y_true, axis) + y_pred = self.l2_norm(self.np_y_pred, axis) + self.expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(axis,)) + + self.y_true = tf.constant(self.np_y_true) + self.y_pred = tf.constant(self.np_y_pred) + + def test_config(self): + cosine_obj = metrics.CosineSimilarity( + axis=2, name="my_cos", dtype=tf.int32 + ) + self.assertEqual(cosine_obj.name, "my_cos") + self.assertEqual(cosine_obj._dtype, tf.int32) + + # Check save and restore config + cosine_obj2 = metrics.CosineSimilarity.from_config( + cosine_obj.get_config() + ) + self.assertEqual(cosine_obj2.name, "my_cos") + self.assertEqual(cosine_obj2._dtype, tf.int32) + + def test_unweighted(self): + self.setup() + cosine_obj = metrics.CosineSimilarity() + self.evaluate(tf.compat.v1.variables_initializer(cosine_obj.variables)) + loss = cosine_obj(self.y_true, self.y_pred) + expected_loss = np.mean(self.expected_loss) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_weighted(self): + self.setup() + cosine_obj = metrics.CosineSimilarity() + self.evaluate(tf.compat.v1.variables_initializer(cosine_obj.variables)) + sample_weight = np.asarray([1.2, 3.4]) + loss = cosine_obj( + self.y_true, self.y_pred, sample_weight=tf.constant(sample_weight) + ) + expected_loss = np.sum(self.expected_loss * sample_weight) / np.sum( + sample_weight + ) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + def test_axis(self): + self.setup(axis=1) + cosine_obj = metrics.CosineSimilarity(axis=1) + self.evaluate(tf.compat.v1.variables_initializer(cosine_obj.variables)) + loss = cosine_obj(self.y_true, self.y_pred) + expected_loss = np.mean(self.expected_loss) + self.assertAlmostEqual(self.evaluate(loss), expected_loss, 3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MeanAbsoluteErrorTest(tf.test.TestCase): + def test_config(self): + mae_obj = metrics.MeanAbsoluteError(name="my_mae", dtype=tf.int32) + self.assertEqual(mae_obj.name, "my_mae") + self.assertEqual(mae_obj._dtype, tf.int32) + + # Check save and restore config + mae_obj2 = metrics.MeanAbsoluteError.from_config(mae_obj.get_config()) + self.assertEqual(mae_obj2.name, "my_mae") + self.assertEqual(mae_obj2._dtype, tf.int32) + + def test_unweighted(self): + mae_obj = metrics.MeanAbsoluteError() + self.evaluate(tf.compat.v1.variables_initializer(mae_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = mae_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = mae_obj.result() + self.assertAllClose(0.5, result, atol=1e-5) + + def test_weighted(self): + mae_obj = metrics.MeanAbsoluteError() + self.evaluate(tf.compat.v1.variables_initializer(mae_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = mae_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(0.54285, self.evaluate(result), atol=1e-5) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MeanAbsolutePercentageErrorTest(tf.test.TestCase): + def test_config(self): + mape_obj = metrics.MeanAbsolutePercentageError( + name="my_mape", dtype=tf.int32 + ) + self.assertEqual(mape_obj.name, "my_mape") + self.assertEqual(mape_obj._dtype, tf.int32) + + # Check save and restore config + mape_obj2 = metrics.MeanAbsolutePercentageError.from_config( + mape_obj.get_config() + ) + self.assertEqual(mape_obj2.name, "my_mape") + self.assertEqual(mape_obj2._dtype, tf.int32) + + def test_unweighted(self): + mape_obj = metrics.MeanAbsolutePercentageError() + self.evaluate(tf.compat.v1.variables_initializer(mape_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = mape_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = mape_obj.result() + self.assertAllClose(35e7, result, atol=1e-5) + + def test_weighted(self): + mape_obj = metrics.MeanAbsolutePercentageError() + self.evaluate(tf.compat.v1.variables_initializer(mape_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = mape_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(40e7, self.evaluate(result), atol=1e-5) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MeanSquaredErrorTest(tf.test.TestCase): + def test_config(self): + mse_obj = metrics.MeanSquaredError(name="my_mse", dtype=tf.int32) + self.assertEqual(mse_obj.name, "my_mse") + self.assertEqual(mse_obj._dtype, tf.int32) + + # Check save and restore config + mse_obj2 = metrics.MeanSquaredError.from_config(mse_obj.get_config()) + self.assertEqual(mse_obj2.name, "my_mse") + self.assertEqual(mse_obj2._dtype, tf.int32) + + def test_unweighted(self): + mse_obj = metrics.MeanSquaredError() + self.evaluate(tf.compat.v1.variables_initializer(mse_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = mse_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = mse_obj.result() + self.assertAllClose(0.5, result, atol=1e-5) + + def test_weighted(self): + mse_obj = metrics.MeanSquaredError() + self.evaluate(tf.compat.v1.variables_initializer(mse_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = mse_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(0.54285, self.evaluate(result), atol=1e-5) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MeanSquaredLogarithmicErrorTest(tf.test.TestCase): + def test_config(self): + msle_obj = metrics.MeanSquaredLogarithmicError( + name="my_msle", dtype=tf.int32 + ) + self.assertEqual(msle_obj.name, "my_msle") + self.assertEqual(msle_obj._dtype, tf.int32) + + # Check save and restore config + msle_obj2 = metrics.MeanSquaredLogarithmicError.from_config( + msle_obj.get_config() + ) + self.assertEqual(msle_obj2.name, "my_msle") + self.assertEqual(msle_obj2._dtype, tf.int32) + + def test_unweighted(self): + msle_obj = metrics.MeanSquaredLogarithmicError() + self.evaluate(tf.compat.v1.variables_initializer(msle_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + + update_op = msle_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = msle_obj.result() + self.assertAllClose(0.24022, result, atol=1e-5) + + def test_weighted(self): + msle_obj = metrics.MeanSquaredLogarithmicError() + self.evaluate(tf.compat.v1.variables_initializer(msle_obj.variables)) + y_true = tf.constant( + ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)) + ) + y_pred = tf.constant( + ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)) + ) + sample_weight = tf.constant((1.0, 1.5, 2.0, 2.5)) + result = msle_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(0.26082, self.evaluate(result), atol=1e-5) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class RootMeanSquaredErrorTest(tf.test.TestCase): + def test_config(self): + rmse_obj = metrics.RootMeanSquaredError(name="rmse", dtype=tf.int32) + self.assertEqual(rmse_obj.name, "rmse") + self.assertEqual(rmse_obj._dtype, tf.int32) + + rmse_obj2 = metrics.RootMeanSquaredError.from_config( + rmse_obj.get_config() + ) + self.assertEqual(rmse_obj2.name, "rmse") + self.assertEqual(rmse_obj2._dtype, tf.int32) + + def test_unweighted(self): + rmse_obj = metrics.RootMeanSquaredError() + self.evaluate(tf.compat.v1.variables_initializer(rmse_obj.variables)) + y_true = tf.constant((2, 4, 6)) + y_pred = tf.constant((1, 3, 2)) + + update_op = rmse_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = rmse_obj.result() + # error = [-1, -1, -4], square(error) = [1, 1, 16], mean = 18/3 = 6 + self.assertAllClose(math.sqrt(6), result, atol=1e-3) + + def test_weighted(self): + rmse_obj = metrics.RootMeanSquaredError() + self.evaluate(tf.compat.v1.variables_initializer(rmse_obj.variables)) + y_true = tf.constant((2, 4, 6, 8)) + y_pred = tf.constant((1, 3, 2, 3)) + sample_weight = tf.constant((0, 1, 0, 1)) + result = rmse_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(math.sqrt(13), self.evaluate(result), atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class LogCoshErrorTest(tf.test.TestCase): + def setup(self): + y_pred = np.asarray([1, 9, 2, -5, -2, 6]).reshape((2, 3)) + y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3)) + + self.batch_size = 6 + error = y_pred - y_true + self.expected_results = np.log((np.exp(error) + np.exp(-error)) / 2) + + self.y_pred = tf.constant(y_pred, dtype=tf.float32) + self.y_true = tf.constant(y_true) + + def test_config(self): + logcosh_obj = metrics.LogCoshError(name="logcosh", dtype=tf.int32) + self.assertEqual(logcosh_obj.name, "logcosh") + self.assertEqual(logcosh_obj._dtype, tf.int32) + + def test_unweighted(self): + self.setup() + logcosh_obj = metrics.LogCoshError() + self.evaluate(tf.compat.v1.variables_initializer(logcosh_obj.variables)) + + update_op = logcosh_obj.update_state(self.y_true, self.y_pred) + self.evaluate(update_op) + result = logcosh_obj.result() + expected_result = np.sum(self.expected_results) / self.batch_size + self.assertAllClose(result, expected_result, atol=1e-3) + + def test_weighted(self): + self.setup() + logcosh_obj = metrics.LogCoshError() + self.evaluate(tf.compat.v1.variables_initializer(logcosh_obj.variables)) + sample_weight = tf.constant([1.2, 3.4], shape=(2, 1)) + result = logcosh_obj( + self.y_true, self.y_pred, sample_weight=sample_weight + ) + + sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape( + (2, 3) + ) + expected_result = np.multiply(self.expected_results, sample_weight) + expected_result = np.sum(expected_result) / np.sum(sample_weight) + self.assertAllClose(self.evaluate(result), expected_result, atol=1e-3) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MeanRelativeErrorTest(tf.test.TestCase): + def test_config(self): + normalizer = tf.constant([1, 3], dtype=tf.float32) + mre_obj = metrics.MeanRelativeError(normalizer=normalizer, name="mre") + self.assertEqual(mre_obj.name, "mre") + self.assertArrayNear(self.evaluate(mre_obj.normalizer), [1, 3], 1e-1) + + mre_obj2 = metrics.MeanRelativeError.from_config(mre_obj.get_config()) + self.assertEqual(mre_obj2.name, "mre") + self.assertArrayNear(self.evaluate(mre_obj2.normalizer), [1, 3], 1e-1) + + def test_unweighted(self): + np_y_pred = np.asarray([2, 4, 6, 8], dtype=np.float32) + np_y_true = np.asarray([1, 3, 2, 3], dtype=np.float32) + expected_error = np.mean( + np.divide(np.absolute(np_y_pred - np_y_true), np_y_true) + ) + + y_pred = tf.constant(np_y_pred, shape=(1, 4), dtype=tf.float32) + y_true = tf.constant(np_y_true, shape=(1, 4)) + + mre_obj = metrics.MeanRelativeError(normalizer=y_true) + self.evaluate(tf.compat.v1.variables_initializer(mre_obj.variables)) + + result = mre_obj(y_true, y_pred) + self.assertAllClose(self.evaluate(result), expected_error, atol=1e-3) + + def test_weighted(self): + np_y_pred = np.asarray([2, 4, 6, 8], dtype=np.float32) + np_y_true = np.asarray([1, 3, 2, 3], dtype=np.float32) + sample_weight = np.asarray([0.2, 0.3, 0.5, 0], dtype=np.float32) + rel_errors = np.divide(np.absolute(np_y_pred - np_y_true), np_y_true) + expected_error = np.sum(rel_errors * sample_weight) + + y_pred = tf.constant(np_y_pred, dtype=tf.float32) + y_true = tf.constant(np_y_true) + + mre_obj = metrics.MeanRelativeError(normalizer=y_true) + self.evaluate(tf.compat.v1.variables_initializer(mre_obj.variables)) + + result = mre_obj( + y_true, y_pred, sample_weight=tf.constant(sample_weight) + ) + self.assertAllClose(self.evaluate(result), expected_error, atol=1e-3) + + def test_zero_normalizer(self): + y_pred = tf.constant([2, 4], dtype=tf.float32) + y_true = tf.constant([1, 3]) + + mre_obj = metrics.MeanRelativeError(normalizer=tf.zeros_like(y_true)) + self.evaluate(tf.compat.v1.variables_initializer(mre_obj.variables)) + + result = mre_obj(y_true, y_pred) + self.assertEqual(self.evaluate(result), 0) + + +@test_utils.run_v2_only +class R2ScoreTest(parameterized.TestCase, tf.test.TestCase): + def _run_test( + self, + y_true, + y_pred, + sample_weights, + class_aggregation, + num_regressors, + reference_result, + ): + y_true = tf.constant(y_true, dtype="float32") + y_pred = tf.constant(y_pred, dtype="float32") + r2 = metrics.R2Score(class_aggregation, num_regressors) + r2.update_state(y_true, y_pred, sample_weights) + result = r2.result().numpy() + self.assertAllClose(result, reference_result, atol=1e-6) + + def test_config(self): + r2_obj = metrics.R2Score( + class_aggregation=None, + num_regressors=2, + ) + self.assertEqual(r2_obj.class_aggregation, None) + self.assertEqual(r2_obj.num_regressors, 2) + self.assertEqual(r2_obj.dtype, tf.float32) + + # Check save and restore config + r2_obj2 = metrics.R2Score.from_config(r2_obj.get_config()) + self.assertEqual(r2_obj2.class_aggregation, None) + self.assertEqual(r2_obj2.num_regressors, 2) + self.assertEqual(r2_obj2.dtype, tf.float32) + + @parameterized.parameters( + # class_aggregation, num_regressors, result + (None, 0, [0.37, -1.295, 0.565]), + ("uniform_average", 0, -0.12), + ("variance_weighted_average", 0, -0.12), + ) + def test_r2_sklearn_comparison( + self, class_aggregation, num_regressors, result + ): + y_true = [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]] + y_pred = [[0.4, 0.5, 0.6], [0.1, 0.2, 0.3], [0.5, 0.8, 0.2]] + self._run_test( + y_true, + y_pred, + None, + class_aggregation=class_aggregation, + num_regressors=num_regressors, + reference_result=result, + ) + + @parameterized.parameters( + # class_aggregation, num_regressors, result + (None, 0, [0.17305559, -8.836666, -0.521]), + (None, 1, [0.054920673, -10.241904, -0.7382858]), + (None, 2, [-0.10259259, -12.115555, -1.0280001]), + ("uniform_average", 0, -3.0615367889404297), + ("uniform_average", 1, -3.641756534576416), + ("uniform_average", 2, -4.415382385253906), + ("variance_weighted_average", 0, -1.3710224628448486), + ("variance_weighted_average", 1, -1.7097399234771729), + ("variance_weighted_average", 2, -2.161363363265991), + ) + def test_r2_tfa_comparison(self, class_aggregation, num_regressors, result): + y_true = [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]] + y_pred = [[0.4, 0.9, 1.6], [0.1, 1.2, 0.6], [1.5, 0.8, 0.6]] + sample_weights = [0.8, 0.1, 0.4] + self._run_test( + y_true, + y_pred, + sample_weights, + class_aggregation=class_aggregation, + num_regressors=num_regressors, + reference_result=result, + ) + + def test_errors(self): + # Bad class_aggregation value + with self.assertRaisesRegex( + ValueError, "Invalid value for argument `class_aggregation`" + ): + metrics.R2Score(class_aggregation="wrong") + + # Bad num_regressors value + with self.assertRaisesRegex( + ValueError, "Invalid value for argument `num_regressors`" + ): + metrics.R2Score(num_regressors=-1) + + # Bad input shape + with self.assertRaisesRegex(ValueError, "expects 2D inputs with shape"): + r2 = metrics.R2Score() + r2.update_state(tf.constant([0.0, 1.0]), tf.constant([0.0, 1.0])) + + with self.assertRaisesRegex( + ValueError, "with output_dim fully defined" + ): + r2 = metrics.R2Score() + r2.update_state(Input(shape=(None,)), tf.constant([[0.0], [1.0]])) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/BUILD b/keras/mixed_precision/BUILD index b1e5162a1990..d29b508403e5 100644 --- a/keras/mixed_precision/BUILD +++ b/keras/mixed_precision/BUILD @@ -16,10 +16,12 @@ # Description: # Contains the Keras Mixed Precision API (TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test") # buildifier: disable=same-origin-load package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ # TODO(scottzhu): Remove these two deps and convert the test to integration test. "//third_party/tensorflow/python/distribute:__pkg__", # For collective_all_reduce_strategy_test @@ -64,7 +66,7 @@ tf_py_test( ":policy", "//:expect_tensorflow_installed", "//keras", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", ], ) @@ -82,7 +84,6 @@ cuda_py_test( name = "device_compatibility_check_test", srcs = ["device_compatibility_check_test.py"], srcs_version = "PY3", - tfrt_enabled = True, deps = [ ":device_compatibility_check", "//:expect_tensorflow_installed", @@ -111,7 +112,8 @@ tf_py_test( ":autocast_variable", "//:expect_absl_installed", "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_v2", + "//keras/layers", + "//keras/optimizers/legacy:optimizers", ], ) @@ -122,7 +124,7 @@ py_library( deps = [ "//:expect_absl_installed", "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/utils:generic_utils", ], ) @@ -147,14 +149,13 @@ cuda_py_test( size = "small", srcs = ["mixed_precision_graph_rewrite_test.py"], python_version = "PY3", - tfrt_enabled = True, deps = [ ":loss_scale_optimizer", ":policy", "//:expect_absl_installed", "//:expect_tensorflow_installed", "//keras", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", "//keras/testing_infra:test_utils", ], diff --git a/keras/mixed_precision/__init__.py b/keras/mixed_precision/__init__.py index 62e8e80e3656..58c7cd9475f5 100644 --- a/keras/mixed_precision/__init__.py +++ b/keras/mixed_precision/__init__.py @@ -20,6 +20,6 @@ """ from keras.mixed_precision.loss_scale_optimizer import LossScaleOptimizer -from keras.mixed_precision.policy import global_policy from keras.mixed_precision.policy import Policy +from keras.mixed_precision.policy import global_policy from keras.mixed_precision.policy import set_global_policy diff --git a/keras/mixed_precision/autocast_variable.py b/keras/mixed_precision/autocast_variable.py index ec541edda0fe..eea3192b80fb 100644 --- a/keras/mixed_precision/autocast_variable.py +++ b/keras/mixed_precision/autocast_variable.py @@ -14,535 +14,611 @@ # ============================================================================== """Contains AutoCastVariable, a variable which automatically casts itself.""" +import threading +from typing import Optional + import tensorflow.compat.v2 as tf -import threading from keras.distribute import distributed_training_utils - # _autocast_dtype.dtype is the dtype AutoCastVariables should be cast to, or # None if AutoCastVariables should not be cast. _autocast_dtype = threading.local() def numpy_text(tensor, is_repr=False): - """Human readable representation of a tensor's numpy value.""" - if tensor.dtype.is_numpy_compatible: - # pylint: disable=protected-access - text = repr(tensor._numpy()) if is_repr else str(tensor._numpy()) - # pylint: enable=protected-access - else: - text = '' - if '\n' in text: - text = '\n' + text - return text - - -class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor): - """Variable that will cast itself to a different dtype in applicable contexts. - - This class wraps a floating-point `tf.Variable`. It emulates the variable - interface and delegates to the wrapped variable, but it additionally will cast - the wrapped variable under an `enable_auto_cast_variables(dtype)` context - manager. - - For example: + """Human readable representation of a tensor's numpy value.""" + if tensor.dtype.is_numpy_compatible: - >>> v = tf.Variable(1.0, dtype=tf.float32) - >>> v = AutoCastVariable(v) - >>> tf.identity(v).dtype - tf.float32 - >>> with enable_auto_cast_variables(tf.float16): - ... tf.identity(v).dtype - tf.float16 + text = repr(tensor._numpy()) if is_repr else str(tensor._numpy()) - The purpose of this class is to allow Keras layers to create variables in - float32, and automatically cast them to float16 or bfloat16 when the layer is - called. - """ - - def __init__(self, variable): - """Creates an AutoCastVariable instance. - - Args: - variable: A floating-point resource variable to wrap. - - Raises: - ValueError: If `variable` is not a floating-point resource variable - """ - if not isinstance(variable, tf.Variable): - raise ValueError('variable must be of type tf.ResourceVariable, but got: ' - '%s' % variable) - if not variable.dtype.is_floating: - raise ValueError('variable must be a floating point variable but has ' - 'type: %s' % variable.dtype.name) - self._variable = variable - # 'delegate' means AutoCastVariable.op return self._variable.op, which will - # raise an AttributeError in Eager (as intended). If set to any other value, - # AutoCastVariable.op returns that value instead, which is used to set the - # op attribute in AutoCastVariable.assign(). - self._op = 'delegate' - - def _should_cast(self): - """Returns True if this variable should be casted when accessed.""" - autocast_dtype = getattr(_autocast_dtype, 'dtype', None) - return autocast_dtype is not None and self.dtype != autocast_dtype - - @property - def dtype(self): - """The dtype of the underlying variable, before any casts are done.""" - return self._variable.dtype - - @property - def true_dtype(self): - """Deprecated alias of `dtype`.""" - return self._variable.dtype - - @property - def _cast_dtype(self): - dtype = getattr(_autocast_dtype, 'dtype', None) - return dtype or self._variable.dtype - - def value(self): - val = self._variable.value() - if not self._should_cast(): - return val - return tf.cast(val, self._cast_dtype) - - def read_value(self): - val = self._variable.read_value() - return tf.cast(val, self._cast_dtype) - - def sparse_read(self, indices, name=None): - """Reads the value of this variable sparsely, using `gather`.""" - val = self._variable.sparse_read(indices, name=name) - return tf.cast(val, self._cast_dtype) - - def gather_nd(self, indices, name=None): - """Gather slices of the variable into a Tensor.""" - val = self._variable.gather_nd(indices, name=name) - return tf.cast(val, self._cast_dtype) - - def __getattr__(self, name): - return getattr(self._variable, name) - - def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): - """Converts this variable to a tensor.""" - if as_ref: - # This ValueError should not occur in practice since it is impossible to - # pass as_ref=True using public APIs. - raise ValueError('Cannot convert AutoCastVariable to a tensor if ' - 'as_ref=True is passed to convert_to_tensor') - if not self._should_cast(): - return tf.convert_to_tensor(self._variable, dtype=dtype, - name=name) - if dtype is not None and not dtype.is_compatible_with(self._cast_dtype): - raise ValueError( - 'Incompatible type conversion requested to type {!r} for ' - 'AutoCastVariable which is casted to type {!r}'.format( - dtype.name, self._cast_dtype.name)) - val = tf.convert_to_tensor( - self._variable, dtype=self._variable.dtype, name=name) - return tf.cast(val, self._cast_dtype) - - def _should_act_as_resource_variable(self): - """Pass resource_variable_ops.is_resource_variable check.""" - pass - - def __repr__(self): - if tf.executing_eagerly() and not self._in_graph_mode: - repr_str = ("') - return repr_str.format( - v=self, np_repr=numpy_text(self.read_value(), is_repr=True)) else: - repr_str = ("') - return repr_str.format(v=self) - - # Method delegations: We delegate the following methods to self._variable. - # Each of these methods simply calls the same method on self._variable. The - # base Variable raises NotImplementedError for most of these, so we must - # override them. - # - # We do not define the following methods from Variable for the following - # reasons: - # * 'count_up_to': This method only applies to int variables, which cannot - # be wrapped with an AutoCastVariable. - # * 'ref': Instead we inherit the definition from Variable. - # If we defined and delegated to Variable, the ref of an AutoCastVariable - # would be the same as the ref of the underlying variable, which would be - # strange as they are different Python objects. - - def set_shape(self, shape): - return self._variable.set_shape(self, shape) - - @property - def trainable(self): - return self._variable.trainable - - @property - def synchronization(self): - return self._variable.synchronization - - @property - def aggregation(self): - return self._variable.aggregation - - def eval(self, session=None): - return self._variable.eval(session) - - def initialized_value(self): - return self._variable.initialized_value() - - @property - def initial_value(self): - return self._variable.initial_value - - @property - def constraint(self): - return self._variable.constraint - - def _apply_assign_update(self, - update_fn, - value, - use_locking=None, - name=None, - read_value=True): - # TODO(b/146181571): This logic can be simplified once - # DistributedVariable.assign returns a DistributedVariable. Currently for - # MirroredStrategy, it returns a Mirrored value. - if tf.compat.v1.executing_eagerly_outside_functions(): - assign_op = update_fn(value, use_locking, name, False) - if read_value: - # We create a new AutoCastVariable with the same underlying tf.Variable. - # The new AutoCastVariable is identical except the 'op' attribute is - # defined. This matches the behavior of tf.Variable.assign. - var = create_autocast_variable(self._variable) - var._op = assign_op # pylint:disable=protected-access - return var - return assign_op - - # Fallback to wrapping the returned variable in graph mode if possible - assign_var = update_fn(value, use_locking, name, read_value) - if read_value and tf.__internal__.ops.is_resource_variable(assign_var): - return create_autocast_variable(assign_var) - return assign_var - - def _apply_update(self, update_fn, *args, **kwargs): - update_var = update_fn(*args, **kwargs) - if tf.compat.v1.executing_eagerly_outside_functions(): - return self - - # Fallback to wrapping the returned variable in graph mode if possible - if tf.__internal__.ops.is_resource_variable(update_var): - return create_autocast_variable(update_var) - return update_var - - def assign(self, value, use_locking=None, name=None, read_value=True): - return self._apply_assign_update(self._variable.assign, value, use_locking, - name, read_value) - - def assign_add(self, delta, use_locking=None, name=None, read_value=True): - return self._apply_assign_update(self._variable.assign_add, delta, - use_locking, name, read_value) - - def assign_sub(self, delta, use_locking=None, name=None, read_value=True): - return self._apply_assign_update(self._variable.assign_sub, delta, - use_locking, name, read_value) - - def scatter_sub(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_sub, sparse_delta, - use_locking, name) - - def scatter_add(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_add, sparse_delta, - use_locking, name) - - def scatter_max(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_max, sparse_delta, - use_locking, name) - - def scatter_min(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_min, sparse_delta, - use_locking, name) - - def scatter_mul(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_mul, sparse_delta, - use_locking, name) - - def scatter_div(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_div, sparse_delta, - use_locking, name) - - def scatter_update(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_update, sparse_delta, - use_locking, name) - - def batch_scatter_update(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.batch_scatter_update, sparse_delta, - use_locking, name) - - def scatter_nd_sub(self, indices, updates, name=None): - return self._apply_update(self._variable.scatter_nd_sub, indices, updates, - name) - - def scatter_nd_add(self, indices, updates, name=None): - return self._apply_update(self._variable.scatter_nd_add, indices, updates, - name) - - def scatter_nd_update(self, indices, updates, name=None): - return self._apply_update(self._variable.scatter_nd_update, indices, - updates, name) - - def load(self, value, session=None): - return self._variable.load(value, session) - - @property - def name(self): - return self._variable.name - - @property - def _shared_name(self): - return self._variable._shared_name # pylint:disable=protected-access - - @property - def initializer(self): - return self._variable.initializer - - @property - def device(self): - return self._variable.device - - @property - def op(self): - if self._op == 'delegate': - return self._variable.op - return self._op - - def _as_graph_element(self): - graph_element = self._variable._as_graph_element() # pylint:disable=protected-access - if graph_element is None: - return self._op - return graph_element - - @property - def graph(self): - return self._variable.graph - - @property - def shape(self): - return self._variable.shape - - def get_shape(self): - return self._variable.get_shape() - - def _gather_saveables_for_checkpoint(self): - # By delegating this method to the wrapped variable, checkpoints with - # AutoCastVariables are identical to checkpoints with normal variables. - # Therefore models checkpointed with AutoCastVariables can be restored on - # models with normal variables, and vice versa. - return self._variable._gather_saveables_for_checkpoint() # pylint:disable=protected-access - - def _map_resources(self, save_options): - # By delegating this method to the wrapped variable, SavedModel with - # AutoCastVariables are identical to SavedModel with normal variables. - obj_map, resource_map = self._variable._map_resources(save_options) # pylint:disable=protected-access - obj_map[self] = obj_map[self._variable] - return obj_map, resource_map + text = "" + if "\n" in text: + text = "\n" + text + return text - # TODO(reedwm): Maybe encode the fact the variable is an AutoCastVariable in - # to_proto(). - def to_proto(self, export_scope=None): - return self._variable.to_proto(export_scope) - def from_proto(self, variable_def, import_scope=None): - return self._variable.from_proto(variable_def, import_scope) +class AutoCastVariableSpec(tf.types.experimental.TraceType): + """TraceType for AutoCastVariableSpec for tracing with tf.function. - # Delegate the private attributes _handle_name and _initializer_op to - # self._variable. SavedModel sets these attributes when loading a model. For - # example, it sets _handle_name here: - # https://github.com/tensorflow/tensorflow/blob/db26bd574fa95b5bdd53c08463dd19407cc0297e/tensorflow/python/keras/saving/saved_model/load.py#L211 - # We need to expose these attributes on AutoCastVariable as well for - # SavedModel to work properly. - # TODO(reedwm/kathywu): Find a better way to support SavedModel. Exposing - # private attributes is hacky and difficult to maintain. - @property - def _handle_name(self): - return self._variable._handle_name # pylint: disable=protected-access - - @_handle_name.setter - def _handle_name(self, handle_name): - self._variable._handle_name = handle_name # pylint: disable=protected-access - - @property - def _initializer_op(self): - return self._variable._initializer_op # pylint: disable=protected-access - - @_initializer_op.setter - def _initializer_op(self, initializer_op): - self._variable._initializer_op = initializer_op # pylint: disable=protected-access - - # Operator overloads: - # Note we only overload operators that support floating-point types, as - # non-float variables cannot be wrapped with an AutoCastVariable. - # Also note: We call read_value() instead of value(), because value() causes - # gradients not to work properly when TPUStrategy is used: b/143380936 - - def __add__(self, o): - return self.read_value() + o - - def __radd__(self, o): - return o + self.read_value() - - def __sub__(self, o): - return self.read_value() - o - - def __rsub__(self, o): - return o - self.read_value() - - def __mul__(self, o): - return self.read_value() * o - - def __rmul__(self, o): - return o * self.read_value() - - def __truediv__(self, o): - return self.read_value() / o + This class implements the Type for AutoCastVariable used in tracing. + """ - def __rtruediv__(self, o): - return o / self.read_value() + def __init__(self, value): + self._value = value - def __floordiv__(self, o): - return self.read_value() // o + def is_subtype_of(self, other) -> bool: + """If the other spec is the same as `self`, return True.""" + return self == other - def __rfloordiv__(self, o): - return o // self.read_value() + def most_specific_common_supertype(self, others): + """`self` is the common supertype if all input types match it.""" + return self if all(self == other for other in others) else None - def __mod__(self, o): - return self.read_value() % o + def placeholder_value(self, placeholder_context=None): + """Use the AutoCastVariable value itself as a placeholder.""" + return self._value - def __rmod__(self, o): - return o % self.read_value() + def _cast(self, value, _): + return value - def __lt__(self, o): - return self.read_value() < o + def _to_tensors(self, value): + return [] - def __le__(self, o): - return self.read_value() <= o + def __hash__(self) -> int: + return hash(id(self._value)) - def __gt__(self, o): - return self.read_value() > o + def __eq__(self, other) -> bool: + return self is other - def __ge__(self, o): - return self.read_value() >= o - def __getitem__(self, o): - return self.read_value()[o] +class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor): + """Variable that casts itself to a different dtype in applicable contexts. + + This class wraps a floating-point `tf.Variable`. It emulates the variable + interface and delegates to the wrapped variable, but it additionally will + cast the wrapped variable under an `enable_auto_cast_variables(dtype)` + context manager. + + For example: + + >>> v = tf.Variable(1.0, dtype=tf.float32) + >>> v = AutoCastVariable(v) + >>> tf.identity(v).dtype + tf.float32 + >>> with enable_auto_cast_variables(tf.float16): + ... tf.identity(v).dtype + tf.float16 + + The purpose of this class is to allow Keras layers to create variables in + float32, and automatically cast them to float16 or bfloat16 when the layer + is called. + """ - def __pow__(self, o, modulo=None): - return pow(self.read_value(), o, modulo) + def __init__(self, variable): + """Creates an AutoCastVariable instance. + + Args: + variable: A floating-point resource variable to wrap. + + Raises: + ValueError: If `variable` is not a floating-point resource variable + """ + if not isinstance(variable, tf.Variable): + raise ValueError( + "variable must be of type tf.ResourceVariable, but got: %s" + % variable + ) + if not variable.dtype.is_floating: + raise ValueError( + "variable must be a floating point variable but has type: %s" + % variable.dtype.name + ) + self._variable = variable + # 'delegate' means AutoCastVariable.op return self._variable.op, which + # will raise an AttributeError in Eager (as intended). If set to any + # other value, AutoCastVariable.op returns that value instead, which is + # used to set the op attribute in AutoCastVariable.assign(). + self._op = "delegate" + + def _should_cast(self): + """Returns True if this variable should be casted when accessed.""" + autocast_dtype = getattr(_autocast_dtype, "dtype", None) + return autocast_dtype is not None and self.dtype != autocast_dtype + + @property + def dtype(self): + """The dtype of the underlying variable, before any casts are done.""" + return self._variable.dtype + + @property + def true_dtype(self): + """Deprecated alias of `dtype`.""" + return self._variable.dtype + + @property + def _cast_dtype(self): + dtype = getattr(_autocast_dtype, "dtype", None) + return dtype or self._variable.dtype + + def value(self): + val = self._variable.value() + if not self._should_cast(): + return val + return tf.cast(val, self._cast_dtype) + + def read_value(self): + val = self._variable.read_value() + return tf.cast(val, self._cast_dtype) + + def sparse_read(self, indices, name=None): + """Reads the value of this variable sparsely, using `gather`.""" + val = self._variable.sparse_read(indices, name=name) + return tf.cast(val, self._cast_dtype) + + def gather_nd(self, indices, name=None): + """Gather slices of the variable into a Tensor.""" + val = self._variable.gather_nd(indices, name=name) + return tf.cast(val, self._cast_dtype) + + def __getattr__(self, name): + return getattr(self._variable, name) + + def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): + """Converts this variable to a tensor.""" + if as_ref: + # This ValueError should not occur in practice since it is + # impossible to pass as_ref=True using public APIs. + raise ValueError( + "Cannot convert AutoCastVariable to a tensor if " + "as_ref=True is passed to convert_to_tensor" + ) + if not self._should_cast(): + return tf.convert_to_tensor(self._variable, dtype=dtype, name=name) + if dtype is not None and not dtype.is_compatible_with(self._cast_dtype): + raise ValueError( + "Incompatible type conversion requested to type {!r} for " + "AutoCastVariable which is casted to type {!r}".format( + dtype.name, self._cast_dtype.name + ) + ) + val = tf.convert_to_tensor( + self._variable, dtype=self._variable.dtype, name=name + ) + return tf.cast(val, self._cast_dtype) + + def __tf_tensor__( + self, + dtype: Optional[tf.dtypes.DType] = None, + name: Optional[str] = None, + ) -> tf.Tensor: + return self._dense_var_to_tensor(dtype=dtype, name=name) + + def _should_act_as_resource_variable(self): + """Pass resource_variable_ops.is_resource_variable check.""" + pass - def __rpow__(self, o): - return pow(o, self.read_value()) + def __repr__(self): + if tf.executing_eagerly() and not self._in_graph_mode: + repr_str = ( + "" + ) + return repr_str.format( + v=self, np_repr=numpy_text(self.read_value(), is_repr=True) + ) + else: + repr_str = ( + "" + ) + return repr_str.format(v=self) + + # Method delegations: We delegate the following methods to self._variable. + # Each of these methods simply calls the same method on self._variable. The + # base Variable raises NotImplementedError for most of these, so we must + # override them. + # + # We do not define the following methods from Variable for the following + # reasons: + # * 'count_up_to': This method only applies to int variables, which cannot + # be wrapped with an AutoCastVariable. + # * 'ref': Instead we inherit the definition from Variable. + # If we defined and delegated to Variable, the ref of an + # AutoCastVariable would be the same as the ref of the underlying + # variable, which would be strange as they are different Python objects. + + def set_shape(self, shape): + return self._variable.set_shape(self, shape) + + @property + def trainable(self): + return self._variable.trainable + + @property + def synchronization(self): + return self._variable.synchronization + + @property + def aggregation(self): + return self._variable.aggregation + + def eval(self, session=None): + return self._variable.eval(session) + + def initialized_value(self): + return self._variable.initialized_value() + + @property + def initial_value(self): + return self._variable.initial_value + + @property + def constraint(self): + return self._variable.constraint + + def _apply_assign_update( + self, update_fn, value, use_locking=None, name=None, read_value=True + ): + # TODO(b/146181571): This logic can be simplified once + # DistributedVariable.assign returns a DistributedVariable. Currently + # for MirroredStrategy, it returns a Mirrored value. + if tf.compat.v1.executing_eagerly_outside_functions(): + assign_op = update_fn(value, use_locking, name, False) + if read_value: + # We create a new AutoCastVariable with the same underlying + # tf.Variable. The new AutoCastVariable is identical except the + # 'op' attribute is defined. This matches the behavior of + # tf.Variable.assign. + var = create_autocast_variable(self._variable) + var._op = assign_op + return var + return assign_op + + # Fallback to wrapping the returned variable in graph mode if possible + assign_var = update_fn(value, use_locking, name, read_value) + if read_value and tf.__internal__.ops.is_resource_variable(assign_var): + return create_autocast_variable(assign_var) + return assign_var + + def _apply_update(self, update_fn, *args, **kwargs): + update_var = update_fn(*args, **kwargs) + if tf.compat.v1.executing_eagerly_outside_functions(): + return self + + # Fallback to wrapping the returned variable in graph mode if possible + if tf.__internal__.ops.is_resource_variable(update_var): + return create_autocast_variable(update_var) + return update_var + + def assign(self, value, use_locking=None, name=None, read_value=True): + return self._apply_assign_update( + self._variable.assign, value, use_locking, name, read_value + ) + + def assign_add(self, delta, use_locking=None, name=None, read_value=True): + return self._apply_assign_update( + self._variable.assign_add, delta, use_locking, name, read_value + ) + + def assign_sub(self, delta, use_locking=None, name=None, read_value=True): + return self._apply_assign_update( + self._variable.assign_sub, delta, use_locking, name, read_value + ) + + def scatter_sub(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_sub, sparse_delta, use_locking, name + ) + + def scatter_add(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_add, sparse_delta, use_locking, name + ) + + def scatter_max(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_max, sparse_delta, use_locking, name + ) + + def scatter_min(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_min, sparse_delta, use_locking, name + ) + + def scatter_mul(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_mul, sparse_delta, use_locking, name + ) + + def scatter_div(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_div, sparse_delta, use_locking, name + ) + + def scatter_update(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.scatter_update, sparse_delta, use_locking, name + ) + + def batch_scatter_update(self, sparse_delta, use_locking=False, name=None): + return self._apply_update( + self._variable.batch_scatter_update, sparse_delta, use_locking, name + ) + + def scatter_nd_sub(self, indices, updates, name=None): + return self._apply_update( + self._variable.scatter_nd_sub, indices, updates, name + ) + + def scatter_nd_add(self, indices, updates, name=None): + return self._apply_update( + self._variable.scatter_nd_add, indices, updates, name + ) + + def scatter_nd_update(self, indices, updates, name=None): + return self._apply_update( + self._variable.scatter_nd_update, indices, updates, name + ) + + def load(self, value, session=None): + return self._variable.load(value, session) + + @property + def name(self): + return self._variable.name + + @property + def _shared_name(self): + return self._variable._shared_name + + @property + def initializer(self): + return self._variable.initializer + + @property + def device(self): + return self._variable.device + + @property + def op(self): + if self._op == "delegate": + return self._variable.op + return self._op + + def _as_graph_element(self): + graph_element = self._variable._as_graph_element() + if graph_element is None: + return self._op + return graph_element + + @property + def graph(self): + return self._variable.graph + + @property + def shape(self): + return self._variable.shape + + def get_shape(self): + return self._variable.get_shape() + + def __tf_tracing_type__(self, context): + return AutoCastVariableSpec(self) + + def _gather_saveables_for_checkpoint(self): + # By delegating this method to the wrapped variable, checkpoints with + # AutoCastVariables are identical to checkpoints with normal variables. + # Therefore models checkpointed with AutoCastVariables can be restored + # on models with normal variables, and vice versa. + return self._variable._gather_saveables_for_checkpoint() + + def _export_to_saved_model_graph( + self, object_map, tensor_map, options, **kwargs + ): + # By delegating this method to the wrapped variable, SavedModel with + # AutoCastVariables are identical to SavedModel with normal variables. + resource_list = self._variable._export_to_saved_model_graph( + object_map, tensor_map, options, **kwargs + ) + object_map[self] = object_map[self._variable] + return resource_list + + # TODO(reedwm): Maybe encode the fact the variable is an AutoCastVariable in + # to_proto(). + def to_proto(self, export_scope=None): + return self._variable.to_proto(export_scope) + + def from_proto(self, variable_def, import_scope=None): + return self._variable.from_proto(variable_def, import_scope) + + # Delegate the private attributes _handle_name and _initializer_op to + # self._variable. SavedModel sets these attributes when loading a model. For + # example, it sets _handle_name here: + # https://github.com/tensorflow/tensorflow/blob/db26bd574fa95b5bdd53c08463dd19407cc0297e/tensorflow/python/keras/saving/saved_model/load.py#L211 + # We need to expose these attributes on AutoCastVariable as well for + # SavedModel to work properly. + # TODO(reedwm/kathywu): Find a better way to support SavedModel. Exposing + # private attributes is hacky and difficult to maintain. + @property + def _handle_name(self): + return self._variable._handle_name + + @_handle_name.setter + def _handle_name(self, handle_name): + self._variable._handle_name = handle_name + + @property + def _initializer_op(self): + return self._variable._initializer_op + + @_initializer_op.setter + def _initializer_op(self, initializer_op): + self._variable._initializer_op = initializer_op + + # Operator overloads: + # Note we only overload operators that support floating-point types, as + # non-float variables cannot be wrapped with an AutoCastVariable. + # Also note: We call read_value() instead of value(), because value() causes + # gradients not to work properly when TPUStrategy is used: b/143380936 + + def __add__(self, o): + return self.read_value() + o + + def __radd__(self, o): + return o + self.read_value() + + def __sub__(self, o): + return self.read_value() - o + + def __rsub__(self, o): + return o - self.read_value() + + def __mul__(self, o): + return self.read_value() * o + + def __rmul__(self, o): + return o * self.read_value() + + def __truediv__(self, o): + return self.read_value() / o + + def __rtruediv__(self, o): + return o / self.read_value() + + def __floordiv__(self, o): + return self.read_value() // o + + def __rfloordiv__(self, o): + return o // self.read_value() + + def __mod__(self, o): + return self.read_value() % o + + def __rmod__(self, o): + return o % self.read_value() + + def __lt__(self, o): + return self.read_value() < o + + def __le__(self, o): + return self.read_value() <= o + + def __gt__(self, o): + return self.read_value() > o + + def __ge__(self, o): + return self.read_value() >= o + + def __getitem__(self, o): + return self.read_value()[o] - def __neg__(self): - return -self.read_value() # pylint: disable=invalid-unary-operand-type + def __pow__(self, o, modulo=None): + return pow(self.read_value(), o, modulo) + + def __rpow__(self, o): + return pow(o, self.read_value()) - def __abs__(self): - return abs(self.read_value()) + def __neg__(self): + return -self.read_value() - def __div__(self, o): - try: - return self.read_value().__div__(o) - except AttributeError: - # See https://docs.python.org/3/library/constants.html#NotImplemented - return NotImplemented + def __abs__(self): + return abs(self.read_value()) - def __rdiv__(self, o): - try: - return self.read_value().__rdiv__(o) - except AttributeError: - # See https://docs.python.org/3/library/constants.html#NotImplemented - return NotImplemented + def __div__(self, o): + try: + return self.read_value().__div__(o) + except AttributeError: + # See + # https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented - def __matmul__(self, o): - try: - return self.read_value().__matmul__(o) - except AttributeError: - # See https://docs.python.org/3/library/constants.html#NotImplemented - return NotImplemented + def __rdiv__(self, o): + try: + return self.read_value().__rdiv__(o) + except AttributeError: + # See + # https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented - def __rmatmul__(self, o): - try: - return self.read_value().__rmatmul__(o) - except AttributeError: - # See https://docs.python.org/3/library/constants.html#NotImplemented - return NotImplemented + def __matmul__(self, o): + try: + return self.read_value().__matmul__(o) + except AttributeError: + # See + # https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented - # pylint: enable=multiple-statements + def __rmatmul__(self, o): + try: + return self.read_value().__rmatmul__(o) + except AttributeError: + # See + # https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented -tf.register_tensor_conversion_function(AutoCastVariable, - AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access +tf.register_tensor_conversion_function( + AutoCastVariable, AutoCastVariable._dense_var_to_tensor +) def create_autocast_variable(variable): - """Creates an AutoCastVariable that wraps another variable. + """Creates an AutoCastVariable that wraps another variable. - This typically just returns `AutoCastVariable(variable)`. But, if the variable - is a DistributedVariable or one of its subclasses, we instead dynamically - create a class that subclasses from both AutoCastVariable and - variable.__class__. This is so the returned variable will still pass - `isinstance(variable, variable.__class__)`, which is required for - DistributedVariables and its subclasses to work properly. + This typically just returns `AutoCastVariable(variable)`. But, if the + variable is a DistributedVariable or one of its subclasses, we instead + dynamically create a class that subclasses from both AutoCastVariable and + variable.__class__. This is so the returned variable will still pass + `isinstance(variable, variable.__class__)`, which is required for + DistributedVariables and its subclasses to work properly. - Args: - variable: A floating-point resource variable to wrap. + Args: + variable: A floating-point resource variable to wrap. - Returns: - An AutoCastVariable that wraps the variable. - """ - if not distributed_training_utils.is_distributed_variable(variable): - return AutoCastVariable(variable) + Returns: + An AutoCastVariable that wraps the variable. + """ + if not distributed_training_utils.is_distributed_variable(variable): + return AutoCastVariable(variable) - class AutoCastDistributedVariable(AutoCastVariable, variable.__class__): - """An AutoCastVariable that also subclasses from variable.__class__. + class AutoCastDistributedVariable(AutoCastVariable, variable.__class__): + """An AutoCastVariable that also subclasses from variable.__class__. - variable.__class__ is either a DistributedVariable or an - AggregatingVariable. - """ + variable.__class__ is either a DistributedVariable or an + AggregatingVariable. + """ - def __repr__(self): + def __repr__(self): - # pylint: disable=missing-format-attribute - return ('' - ).format(v=self) - # pylint: enable=missing-format-attribute + return ( + "" + ).format(v=self) - return AutoCastDistributedVariable(variable) + return AutoCastDistributedVariable(variable) -class enable_auto_cast_variables: # pylint:disable=invalid-name - """Context manager which enables the autocasting of `AutoCastVariable`s. +class enable_auto_cast_variables: + """Context manager which enables the autocasting of `AutoCastVariable`s. - Under this context manager, `AutoCastVariable`s will be cast to `dtype` if - `dtype` is floating-point. Otherwise, `AutoCastVariable`s will not be cast. - """ + Under this context manager, `AutoCastVariable`s will be cast to `dtype` if + `dtype` is floating-point. Otherwise, `AutoCastVariable`s will not be cast. + """ - __slots__ = ['_dtype', '_prev_dtype'] + __slots__ = ["_dtype", "_prev_dtype"] - def __init__(self, dtype): - if dtype and not dtype.is_floating: - dtype = None - self._dtype = dtype + def __init__(self, dtype): + if dtype and not dtype.is_floating: + dtype = None + self._dtype = dtype - def __enter__(self): - self._prev_dtype = getattr(_autocast_dtype, 'dtype', None) - _autocast_dtype.dtype = self._dtype + def __enter__(self): + self._prev_dtype = getattr(_autocast_dtype, "dtype", None) + _autocast_dtype.dtype = self._dtype - def __exit__(self, type_arg, value_arg, traceback_arg): - _autocast_dtype.dtype = self._prev_dtype + def __exit__(self, type_arg, value_arg, traceback_arg): + _autocast_dtype.dtype = self._prev_dtype diff --git a/keras/mixed_precision/autocast_variable_test.py b/keras/mixed_precision/autocast_variable_test.py index efd1314f7c92..1a6637b6fcc5 100644 --- a/keras/mixed_precision/autocast_variable_test.py +++ b/keras/mixed_precision/autocast_variable_test.py @@ -14,561 +14,647 @@ # ============================================================================== """Tests for AutoCastVariable.""" -import tensorflow.compat.v2 as tf - import os import threading -from absl.testing import parameterized import numpy as np -from keras.mixed_precision import autocast_variable -from keras.optimizers.optimizer_v2 import adadelta -from keras.optimizers.optimizer_v2 import adagrad -from keras.optimizers.optimizer_v2 import adam -from keras.optimizers.optimizer_v2 import adamax -from keras.optimizers.optimizer_v2 import ftrl -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_v2 -from keras.optimizers.optimizer_v2 import nadam -from keras.optimizers.optimizer_v2 import rmsprop +import tensorflow.compat.v2 as tf +from absl.testing import parameterized -maybe_distribute = tf.__internal__.test.combinations.combine(distribution=[ - tf.__internal__.distribute.combinations.default_strategy, - tf.__internal__.distribute.combinations.mirrored_strategy_with_cpu_1_and_2 -]) +from keras.layers import Dense +from keras.mixed_precision import autocast_variable +from keras.optimizers.legacy import adadelta +from keras.optimizers.legacy import adagrad +from keras.optimizers.legacy import adam +from keras.optimizers.legacy import adamax +from keras.optimizers.legacy import ftrl +from keras.optimizers.legacy import gradient_descent as gradient_descent_v2 +from keras.optimizers.legacy import nadam +from keras.optimizers.legacy import rmsprop + +maybe_distribute = tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.default_strategy, + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_cpus, # noqa: E501 + ] +) def get_var(val, dtype, name=None): - return tf.Variable(val, dtype=dtype, name=name) - - -def set_cpu_logical_devices_to_at_least(num): - """Create cpu logical devices of at least a given number.""" - physical_devices = tf.config.list_physical_devices('CPU') - if not physical_devices: - raise RuntimeError('No CPU found') - if len(physical_devices) >= num: - return - # By default each physical device corresponds to one logical device. We create - # multiple logical devices for the last physical device so that we have `num` - # logical devices. - num = num - len(physical_devices) + 1 - logical_devices = [] - for _ in range(num): - logical_devices.append(tf.config.LogicalDeviceConfiguration()) - # Create logical devices from the last device since sometimes the first GPU - # is the primary graphic card and may have less memory available. - tf.config.set_logical_device_configuration(physical_devices[-1], logical_devices) - - -@tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(mode=['graph', 'eager'])) -class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase): + return tf.Variable(val, dtype=dtype, name=name) - def setUp(self): - set_cpu_logical_devices_to_at_least(3) - super().setUp() - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_read(self, distribution): - with distribution.scope(): - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - - # outside of auto cast scope. - self.assertEqual(x.dtype, tf.float32) - self.assertEqual(x.value().dtype, tf.float32) - self.assertEqual(x.read_value().dtype, tf.float32) - self.assertEqual(tf.identity(x).dtype, tf.float32) - - # within auto cast scope of different dtype - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.assertEqual(x.dtype, tf.float32) - self.assertEqual(x.value().dtype, tf.float16) - self.assertEqual(x.read_value().dtype, tf.float16) - self.assertEqual(tf.identity(x).dtype, tf.float16) - - # within auto cast scope of same dtype - with autocast_variable.enable_auto_cast_variables(tf.float32): - self.assertEqual(x.dtype, tf.float32) - self.assertEqual(x.value().dtype, tf.float32) - self.assertEqual(x.read_value().dtype, tf.float32) - self.assertEqual(tf.identity(x).dtype, tf.float32) - - def test_sparse_reads(self): - x = get_var([1., 2], tf.float32) - # DistributedVariables do not support sparse_read or gather_nd, so we pass - # distribute=False - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - - self.assertEqual(x.sparse_read([0]).dtype, tf.float32) - self.assertEqual(x.gather_nd([0]).dtype, tf.float32) - - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.assertEqual(x.sparse_read([0]).dtype, tf.float16) - self.assertEqual(x.gather_nd([0]).dtype, tf.float16) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_read_nested_scopes(self, distribution): - with distribution.scope(): - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.assertEqual(x.read_value().dtype, tf.float16) - - with autocast_variable.enable_auto_cast_variables(tf.float32): - self.assertEqual(x.read_value().dtype, tf.float32) - - self.assertEqual(x.read_value().dtype, tf.float16) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_dtype_is_not_string(self, distribution): - with distribution.scope(): - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.assertEqual(x.dtype, tf.float32) - self.assertIsInstance(x.dtype, tf.DType) - self.assertEqual(x.true_dtype, tf.float32) - self.assertIsInstance(x.true_dtype, tf.DType) - - dtype = tf.float16 - with autocast_variable.enable_auto_cast_variables(dtype): - self.assertEqual(x.dtype, tf.float32) - self.assertIsInstance(x.dtype, tf.DType) - self.assertEqual(x.true_dtype, tf.float32) - self.assertIsInstance(x.true_dtype, tf.DType) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_method_delegations(self, distribution): - # Test AutoCastVariable correctly delegates Variable methods to the - # underlying variable. - with self.test_session(), distribution.scope(): - for read_dtype in (tf.float32, tf.float16): - if tf.distribute.has_strategy() and not tf.executing_eagerly(): - # MirroredVariable.assign will (incorrectly) return a Mirrored value - # instead of a MirroredVariable in graph mode. - # So we cannot properly wrap it in an AutoCastVariable. - evaluate = self.evaluate - else: - def evaluate(var): - self.assertIsInstance(var, autocast_variable.AutoCastVariable) - self.assertEqual(tf.identity(var).dtype, read_dtype) # pylint: disable=cell-var-from-loop - return self.evaluate(var) +@tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine(mode=["graph", "eager"]) +) +class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase): + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_read(self, distribution): + with distribution.scope(): + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + self.evaluate(x.initializer) - x = get_var(7., tf.float32) + # outside of auto cast scope. + self.assertEqual(x.dtype, tf.float32) + self.assertEqual(x.value().dtype, tf.float32) + self.assertEqual(x.read_value().dtype, tf.float32) + self.assertEqual(tf.identity(x).dtype, tf.float32) + + # within auto cast scope of different dtype + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.assertEqual(x.dtype, tf.float32) + self.assertEqual(x.value().dtype, tf.float16) + self.assertEqual(x.read_value().dtype, tf.float16) + self.assertEqual(tf.identity(x).dtype, tf.float16) + + # within auto cast scope of same dtype + with autocast_variable.enable_auto_cast_variables(tf.float32): + self.assertEqual(x.dtype, tf.float32) + self.assertEqual(x.value().dtype, tf.float32) + self.assertEqual(x.read_value().dtype, tf.float32) + self.assertEqual(tf.identity(x).dtype, tf.float32) + + def test_sparse_reads(self): + x = get_var([1.0, 2], tf.float32) + # DistributedVariables do not support sparse_read or gather_nd, so we + # pass distribute=False x = autocast_variable.create_autocast_variable(x) - with autocast_variable.enable_auto_cast_variables(read_dtype): - self.evaluate(x.initializer) - self.assertEqual(self.evaluate(x.value()), 7) - self.assertEqual(self.evaluate(x.read_value()), 7) - self.assertTrue(x.trainable) - self.assertEqual(x.synchronization, x._variable.synchronization) - self.assertEqual(x.aggregation, x._variable.aggregation) - self.assertEqual(self.evaluate(x.initialized_value()), 7) - if not tf.executing_eagerly(): - if not tf.distribute.has_strategy(): - # These functions are not supported for DistributedVariables - x.load(9) - self.assertEqual(x.eval(), 9) - self.assertEqual(self.evaluate(x.initial_value), 7) - self.assertEqual(x.op, x._variable.op) - self.assertEqual(x.graph, x._variable.graph) - if not tf.distribute.has_strategy(): - # These attributes are not supported for DistributedVariables - self.assertIsNone(x.constraint) - self.assertEqual(x.initializer, x._variable.initializer) - self.assertEqual(evaluate(x.assign(8)), 8) - self.assertEqual(evaluate(x.assign_add(2)), 10) - self.assertEqual(evaluate(x.assign_sub(3)), 7) - self.assertEqual(x.name, x._variable.name) - self.assertEqual(x.device, x._variable.device) - self.assertEqual(x.shape, ()) - self.assertEqual(x.get_shape(), ()) - - if not tf.distribute.has_strategy(): - # Test scatter_* methods. These are not supported for - # DistributedVariables - x = get_var([7, 8], tf.float32) - x = autocast_variable.create_autocast_variable(x) - with autocast_variable.enable_auto_cast_variables(read_dtype): + self.evaluate(x.initializer) + + self.assertEqual(x.sparse_read([0]).dtype, tf.float32) + self.assertEqual(x.gather_nd([0]).dtype, tf.float32) + + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.assertEqual(x.sparse_read([0]).dtype, tf.float16) + self.assertEqual(x.gather_nd([0]).dtype, tf.float16) + + def test_tf_function_with_variable_and_autocast_variable(self): + ones = tf.ones((2, 2)) + layer1 = Dense(2, dtype="float32") + layer2 = Dense(2, dtype="mixed_float16") + layer1(ones) + layer2(ones) + + @tf.function + def f(x): + return x + 1 + + self.assertEqual(f(layer1.kernel).dtype, tf.dtypes.float32) + self.assertEqual(f(layer2.kernel).dtype, tf.dtypes.float32) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_read_nested_scopes(self, distribution): + with distribution.scope(): + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + self.evaluate(x.initializer) + + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.assertEqual(x.read_value().dtype, tf.float16) + + with autocast_variable.enable_auto_cast_variables(tf.float32): + self.assertEqual(x.read_value().dtype, tf.float32) + + self.assertEqual(x.read_value().dtype, tf.float16) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_dtype_is_not_string(self, distribution): + with distribution.scope(): + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + self.assertEqual(x.dtype, tf.float32) + self.assertIsInstance(x.dtype, tf.DType) + self.assertEqual(x.true_dtype, tf.float32) + self.assertIsInstance(x.true_dtype, tf.DType) + + dtype = tf.float16 + with autocast_variable.enable_auto_cast_variables(dtype): + self.assertEqual(x.dtype, tf.float32) + self.assertIsInstance(x.dtype, tf.DType) + self.assertEqual(x.true_dtype, tf.float32) + self.assertIsInstance(x.true_dtype, tf.DType) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_method_delegations(self, distribution): + # Test AutoCastVariable correctly delegates Variable methods to the + # underlying variable. + with self.test_session(), distribution.scope(): + for read_dtype in (tf.float32, tf.float16): + if tf.distribute.has_strategy() and not tf.executing_eagerly(): + # MirroredVariable.assign will (incorrectly) return a + # Mirrored value instead of a MirroredVariable in graph + # mode. So we cannot properly wrap it in an + # AutoCastVariable. + evaluate = self.evaluate + else: + + def evaluate(var): + self.assertIsInstance( + var, autocast_variable.AutoCastVariable + ) + self.assertEqual(tf.identity(var).dtype, read_dtype) + return self.evaluate(var) + + x = get_var(7.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + with autocast_variable.enable_auto_cast_variables(read_dtype): + self.evaluate(x.initializer) + self.assertEqual(self.evaluate(x.value()), 7) + self.assertEqual(self.evaluate(x.read_value()), 7) + self.assertTrue(x.trainable) + self.assertEqual( + x.synchronization, x._variable.synchronization + ) + self.assertEqual(x.aggregation, x._variable.aggregation) + self.assertEqual(self.evaluate(x.read_value()), 7) + if not tf.executing_eagerly(): + if not tf.distribute.has_strategy(): + # These functions are not supported for + # DistributedVariables + x.load(9) + self.assertEqual(x.eval(), 9) + self.assertEqual(self.evaluate(x.initial_value), 7) + self.assertEqual(x.op, x._variable.op) + self.assertEqual(x.graph, x._variable.graph) + if not tf.distribute.has_strategy(): + # These attributes are not supported for + # DistributedVariables + self.assertIsNone(x.constraint) + self.assertEqual(x.initializer, x._variable.initializer) + self.assertEqual(evaluate(x.assign(8)), 8) + self.assertEqual(evaluate(x.assign_add(2)), 10) + self.assertEqual(evaluate(x.assign_sub(3)), 7) + self.assertEqual(x.name, x._variable.name) + self.assertEqual(x.device, x._variable.device) + self.assertEqual(x.shape, ()) + self.assertEqual(x.get_shape(), ()) + + if not tf.distribute.has_strategy(): + # Test scatter_* methods. These are not supported for + # DistributedVariables + x = get_var([7, 8], tf.float32) + x = autocast_variable.create_autocast_variable(x) + with autocast_variable.enable_auto_cast_variables( + read_dtype + ): + self.evaluate(x.initializer) + self.assertAllEqual(self.evaluate(x.value()), [7, 8]) + + def slices(val, index): + return tf.IndexedSlices( + values=tf.constant(val, dtype=tf.float32), + indices=tf.constant(index, dtype=tf.int32), + dense_shape=tf.constant([2], dtype=tf.int32), + ) + + self.assertAllEqual( + evaluate(x.scatter_sub(slices(1.0, 0))), [6, 8] + ) + self.assertAllEqual( + evaluate(x.scatter_add(slices(1.0, 0))), [7, 8] + ) + self.assertAllEqual( + evaluate(x.scatter_max(slices(9.0, 1))), [7, 9] + ) + self.assertAllEqual( + evaluate(x.scatter_min(slices(8.0, 1))), [7, 8] + ) + self.assertAllEqual( + evaluate(x.scatter_mul(slices(2.0, 1))), [7, 16] + ) + self.assertAllEqual( + evaluate(x.scatter_div(slices(2.0, 1))), [7, 8] + ) + self.assertAllEqual( + evaluate(x.scatter_update(slices(4.0, 1))), [7, 4] + ) + self.assertAllEqual( + evaluate(x.scatter_nd_sub([[0], [1]], [1.0, 2.0])), + [6, 2], + ) + self.assertAllEqual( + evaluate(x.scatter_nd_add([[0], [1]], [1.0, 2.0])), + [7, 4], + ) + self.assertAllEqual( + evaluate( + x.scatter_nd_update([[0], [1]], [1.0, 2.0]) + ), + [1, 2], + ) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_operator_overloads(self, distribution): + with distribution.scope(): + for read_dtype in (tf.float32, tf.float16): + x = get_var(7.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + with autocast_variable.enable_auto_cast_variables(read_dtype): + self.evaluate(x.initializer) + self.assertAlmostEqual(8, self.evaluate(x + 1)) + self.assertAlmostEqual(10, self.evaluate(3 + x)) + self.assertAlmostEqual(14, self.evaluate(x + x)) + self.assertAlmostEqual(5, self.evaluate(x - 2)) + self.assertAlmostEqual(6, self.evaluate(13 - x)) + self.assertAlmostEqual(0, self.evaluate(x - x)) + self.assertAlmostEqual(14, self.evaluate(x * 2)) + self.assertAlmostEqual(21, self.evaluate(3 * x)) + self.assertAlmostEqual(49, self.evaluate(x * x)) + self.assertAlmostEqual(3.5, self.evaluate(x / 2)) + self.assertAlmostEqual(1.5, self.evaluate(10.5 / x)) + self.assertAlmostEqual(3, self.evaluate(x // 2)) + self.assertAlmostEqual(2, self.evaluate(15 // x)) + if read_dtype == tf.float32: + # The "mod" operator does not support float16 + self.assertAlmostEqual(1, self.evaluate(x % 2)) + self.assertAlmostEqual(2, self.evaluate(16 % x)) + self.assertTrue(self.evaluate(x < 12)) + self.assertTrue(self.evaluate(x <= 12)) + self.assertFalse(self.evaluate(x > 12)) + self.assertFalse(self.evaluate(x >= 12)) + self.assertFalse(self.evaluate(12 < x)) + self.assertFalse(self.evaluate(12 <= x)) + self.assertTrue(self.evaluate(12 > x)) + self.assertTrue(self.evaluate(12 >= x)) + self.assertAlmostEqual( + 343, self.evaluate(pow(x, 3)), places=4 + ) + self.assertAlmostEqual( + 128, self.evaluate(pow(2, x)), places=4 + ) + self.assertAlmostEqual(-7, self.evaluate(-x)) + self.assertAlmostEqual(7, self.evaluate(abs(x))) + + x = get_var([7, 8, 9], tf.float32) + x = autocast_variable.create_autocast_variable(x) + self.evaluate(x.initializer) + self.assertEqual(self.evaluate(x[1]), 8) + if tf.__internal__.tf2.enabled() and tf.executing_eagerly(): + self.assertAllEqual( + x == [7.0, 8.0, 10.0], [True, True, False] + ) + self.assertAllEqual( + x != [7.0, 8.0, 10.0], [False, False, True] + ) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_assign(self, distribution): + with distribution.scope(): + x = get_var(0.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + self.evaluate(x.initializer) + + # outside of auto cast scope. + v1 = tf.constant(3.0, dtype=tf.float32) + v2 = tf.constant(3.0, dtype=tf.float16) + + def run_and_check(): + # Assign float32 values + self.assertAllClose(3.0, self.evaluate(x.assign(v1))) + self.assertAllClose(3.0 * 2, self.evaluate(x.assign_add(v1))) + self.assertAllClose(3.0, self.evaluate(x.assign_sub(v1))) + + # Attempt to assign float16 values + with self.assertRaisesRegex( + ValueError, + "conversion requested dtype float32 for Tensor with dtype " + "float16", + ): + self.evaluate(x.assign(v2)) + with self.assertRaisesRegex( + ValueError, + "conversion requested dtype float32 for Tensor with dtype " + "float16", + ): + self.evaluate(x.assign_add(v2)) + with self.assertRaisesRegex( + ValueError, + "conversion requested dtype float32 for Tensor with dtype " + "float16", + ): + self.evaluate(x.assign_sub(v2)) + + # Assign Python floats + self.assertAllClose(0.0, self.evaluate(x.assign(0.0))) + self.assertAllClose(3.0, self.evaluate(x.assign(3.0))) + self.assertAllClose(3.0 * 2, self.evaluate(x.assign_add(3.0))) + self.assertAllClose(3.0, self.evaluate(x.assign_sub(3.0))) + + # Assign multiple times + # This currently doesn't work in graph mode if a strategy is + # used + if not tf.distribute.has_strategy() or tf.executing_eagerly(): + assign = x.assign(1.0) + self.assertAllClose(1.0, self.evaluate(assign)) + self.assertAllClose(0.0, self.evaluate(assign.assign(0.0))) + assign_add = x.assign_add(3.0) + self.assertAllClose(3.0, self.evaluate(assign_add)) + self.assertAllClose( + 3.0 * 3, + self.evaluate(x.assign_add(3.0).assign_add(3.0)), + ) + self.assertAllClose(3.0 * 3, x) + assign_sub = x.assign_sub(3.0) + self.assertAllClose(3.0 * 2, self.evaluate(assign_sub)) + self.assertAllClose( + 0.0, self.evaluate(x.assign_sub(3.0).assign_sub(3.0)) + ) + + # Assign with read_value=False + self.assertIsNone( + self.evaluate(x.assign(1.0, read_value=False)) + ) + self.assertAllClose(1.0, self.evaluate(x)) + self.assertIsNone( + self.evaluate(x.assign_add(2.0, read_value=False)) + ) + self.assertAllClose(3.0, self.evaluate(x)) + self.assertIsNone( + self.evaluate(x.assign_sub(3.0, read_value=False)) + ) + self.assertAllClose(0.0, self.evaluate(x)) + + # Use the tf.assign functions instead of the var.assign methods. + self.assertAllClose( + 0.0, self.evaluate(tf.compat.v1.assign(x, 0.0)) + ) + self.assertAllClose( + 3.0, self.evaluate(tf.compat.v1.assign(x, 3.0)) + ) + self.assertAllClose( + 3.0 * 2, self.evaluate(tf.compat.v1.assign_add(x, 3.0)) + ) + self.assertAllClose( + 3.0, self.evaluate(tf.compat.v1.assign_sub(x, 3.0)) + ) + + run_and_check() + # reset x + self.evaluate(x.assign(0.0)) + # within auto cast scope. + with autocast_variable.enable_auto_cast_variables(tf.float16): + # assign still expect float32 value even if in float16 scope + run_and_check() + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_assign_tf_function(self, distribution): + if not tf.executing_eagerly(): + self.skipTest("Test is not compatible with graph mode") + + with distribution.scope(): + x = get_var(0.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + + @tf.function + def run_assign(): + return ( + x.assign(1.0) + .assign_add(3.0) + .assign_add(3.0) + .assign_sub(2.0) + ) + + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.assertAllClose(5.0, self.evaluate(run_assign())) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_op_attribute(self, distribution): + with distribution.scope(): + x = get_var(0.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + + # Variable.op raises an AttributeError in Eager mode and is an op in + # graph mode. Variable.assign(...).op is None in Eager mode and an + # op in Graph mode or a tf.function. We test this is also true of + # AutoCastVariable. + if tf.executing_eagerly(): + with self.assertRaises(AttributeError): + x.op + self.assertIsNone(x.assign(1.0).op) + self.assertIsNone(x.assign_add(1.0).op) + self.assertIsNone(x.assign_sub(1.0).op) + else: + self.assertIsNotNone(x.op) + self.assertIsNotNone(x.assign(1.0).op) + self.assertIsNotNone(x.assign_add(1.0).op) + self.assertIsNotNone(x.assign_sub(1.0).op) + + @tf.function + def func(): + self.assertIsNotNone(x.assign(1.0).op) + self.assertIsNotNone(x.assign_add(1.0).op) + self.assertIsNotNone(x.assign_sub(1.0).op) + + func() + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_tf_function_control_dependencies(self, distribution): + if not tf.executing_eagerly(): + self.skipTest("Test is not compatible with graph mode") + + with distribution.scope(): + x = get_var(0.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + + @tf.function + def func(): + update = x.assign_add(1.0) + with tf.control_dependencies([update]): + x.assign_add(1.0) + + func() + self.assertAllClose(2.0, self.evaluate(x)) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_assign_stays_in_true_dtype(self, distribution): + with distribution.scope(): + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) self.evaluate(x.initializer) - self.assertAllEqual(self.evaluate(x.value()), [7, 8]) - - def slices(val, index): - return tf.IndexedSlices( - values=tf.constant(val, dtype=tf.float32), - indices=tf.constant(index, dtype=tf.int32), - dense_shape=tf.constant([2], dtype=tf.int32)) - - self.assertAllEqual(evaluate(x.scatter_sub(slices(1., 0))), [6, 8]) - self.assertAllEqual(evaluate(x.scatter_add(slices(1., 0))), [7, 8]) - self.assertAllEqual(evaluate(x.scatter_max(slices(9., 1))), [7, 9]) - self.assertAllEqual(evaluate(x.scatter_min(slices(8., 1))), [7, 8]) - self.assertAllEqual(evaluate(x.scatter_mul(slices(2., 1))), [7, 16]) - self.assertAllEqual(evaluate(x.scatter_div(slices(2., 1))), [7, 8]) - self.assertAllEqual( - evaluate(x.scatter_update(slices(4., 1))), [7, 4]) - self.assertAllEqual( - evaluate(x.scatter_nd_sub([[0], [1]], [1., 2.])), [6, 2]) - self.assertAllEqual( - evaluate(x.scatter_nd_add([[0], [1]], [1., 2.])), [7, 4]) - self.assertAllEqual( - evaluate(x.scatter_nd_update([[0], [1]], [1., 2.])), [1, 2]) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_operator_overloads(self, distribution): - with distribution.scope(): - for read_dtype in (tf.float32, tf.float16): - x = get_var(7., tf.float32) + # small_val is a value such that 1.0 + small_val == 1.0 in fp16, but + # not in fp32 + small_val = np.finfo("float16").eps / 2 + small_tensor = tf.constant(small_val, dtype=tf.float32) + with autocast_variable.enable_auto_cast_variables(tf.float16): + # Variable should be increased, despite it appearing to be the + # same float16 value. + self.evaluate(x.assign(1.0 + small_tensor)) + self.assertEqual(1.0, self.evaluate(x.value())) + self.assertEqual(1.0 + small_val, self.evaluate(x)) + + self.evaluate(x.assign(1.0)) + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.evaluate(x.assign_add(small_tensor)) + self.assertEqual(1.0, self.evaluate(x.value())) + self.assertEqual(1.0 + small_val, self.evaluate(x)) + + def test_thread_local_autocast_dtype(self): + x = get_var(1.0, tf.float32) x = autocast_variable.create_autocast_variable(x) - with autocast_variable.enable_auto_cast_variables(read_dtype): - self.evaluate(x.initializer) - self.assertAlmostEqual(8, self.evaluate(x + 1)) - self.assertAlmostEqual(10, self.evaluate(3 + x)) - self.assertAlmostEqual(14, self.evaluate(x + x)) - self.assertAlmostEqual(5, self.evaluate(x - 2)) - self.assertAlmostEqual(6, self.evaluate(13 - x)) - self.assertAlmostEqual(0, self.evaluate(x - x)) - self.assertAlmostEqual(14, self.evaluate(x * 2)) - self.assertAlmostEqual(21, self.evaluate(3 * x)) - self.assertAlmostEqual(49, self.evaluate(x * x)) - self.assertAlmostEqual(3.5, self.evaluate(x / 2)) - self.assertAlmostEqual(1.5, self.evaluate(10.5 / x)) - self.assertAlmostEqual(3, self.evaluate(x // 2)) - self.assertAlmostEqual(2, self.evaluate(15 // x)) - if read_dtype == tf.float32: - # The "mod" operator does not support float16 - self.assertAlmostEqual(1, self.evaluate(x % 2)) - self.assertAlmostEqual(2, self.evaluate(16 % x)) - self.assertTrue(self.evaluate(x < 12)) - self.assertTrue(self.evaluate(x <= 12)) - self.assertFalse(self.evaluate(x > 12)) - self.assertFalse(self.evaluate(x >= 12)) - self.assertFalse(self.evaluate(12 < x)) - self.assertFalse(self.evaluate(12 <= x)) - self.assertTrue(self.evaluate(12 > x)) - self.assertTrue(self.evaluate(12 >= x)) - self.assertAlmostEqual(343, self.evaluate(pow(x, 3)), places=4) - self.assertAlmostEqual(128, self.evaluate(pow(2, x)), places=4) - self.assertAlmostEqual(-7, self.evaluate(-x)) - self.assertAlmostEqual(7, self.evaluate(abs(x))) - - x = get_var([7, 8, 9], tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - self.assertEqual(self.evaluate(x[1]), 8) - if tf.__internal__.tf2.enabled() and tf.executing_eagerly(): - self.assertAllEqual(x == [7., 8., 10.], [True, True, False]) - self.assertAllEqual(x != [7., 8., 10.], [False, False, True]) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_assign(self, distribution): - with distribution.scope(): - x = get_var(0., tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - - # outside of auto cast scope. - v1 = tf.constant(3., dtype=tf.float32) - v2 = tf.constant(3., dtype=tf.float16) - - def run_and_check(): - # Assign float32 values - self.assertAllClose(3., self.evaluate(x.assign(v1))) - self.assertAllClose(3. * 2, self.evaluate(x.assign_add(v1))) - self.assertAllClose(3., self.evaluate(x.assign_sub(v1))) - - # Attempt to assign float16 values - with self.assertRaisesRegex( - ValueError, - 'conversion requested dtype float32 for Tensor with dtype float16'): - self.evaluate(x.assign(v2)) - with self.assertRaisesRegex( - ValueError, - 'conversion requested dtype float32 for Tensor with dtype float16'): - self.evaluate(x.assign_add(v2)) - with self.assertRaisesRegex( - ValueError, - 'conversion requested dtype float32 for Tensor with dtype float16'): - self.evaluate(x.assign_sub(v2)) - - # Assign Python floats - self.assertAllClose(0., self.evaluate(x.assign(0.))) - self.assertAllClose(3., self.evaluate(x.assign(3.))) - self.assertAllClose(3. * 2, self.evaluate(x.assign_add(3.))) - self.assertAllClose(3., self.evaluate(x.assign_sub(3.))) - - # Assign multiple times - # This currently doesn't work in graph mode if a strategy is used - if not tf.distribute.has_strategy() or tf.executing_eagerly(): - assign = x.assign(1.) - self.assertAllClose(1., self.evaluate(assign)) - self.assertAllClose(0., self.evaluate(assign.assign(0.))) - assign_add = x.assign_add(3.) - self.assertAllClose(3., self.evaluate(assign_add)) - self.assertAllClose(3. * 3, - self.evaluate(x.assign_add(3.).assign_add(3.))) - self.assertAllClose(3. * 3, x) - assign_sub = x.assign_sub(3.) - self.assertAllClose(3. * 2, self.evaluate(assign_sub)) - self.assertAllClose(0., - self.evaluate(x.assign_sub(3.).assign_sub(3.))) - - # Assign with read_value=False - self.assertIsNone(self.evaluate(x.assign(1., read_value=False))) - self.assertAllClose(1., self.evaluate(x)) - self.assertIsNone(self.evaluate(x.assign_add(2., read_value=False))) - self.assertAllClose(3., self.evaluate(x)) - self.assertIsNone(self.evaluate(x.assign_sub(3., read_value=False))) - self.assertAllClose(0., self.evaluate(x)) - - # Use the tf.assign functions instead of the var.assign methods. - self.assertAllClose(0., self.evaluate(tf.compat.v1.assign(x, 0.))) - self.assertAllClose(3., self.evaluate(tf.compat.v1.assign(x, 3.))) - self.assertAllClose(3. * 2, - self.evaluate(tf.compat.v1.assign_add(x, 3.))) - self.assertAllClose(3., self.evaluate(tf.compat.v1.assign_sub(x, 3.))) - - run_and_check() - # reset x - self.evaluate(x.assign(0.)) - # within auto cast scope. - with autocast_variable.enable_auto_cast_variables(tf.float16): - # assign still expect float32 value even if in float16 scope - run_and_check() - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_assign_tf_function(self, distribution): - if not tf.executing_eagerly(): - self.skipTest('Test is not compatible with graph mode') - - with distribution.scope(): - x = get_var(0., tf.float32) - x = autocast_variable.create_autocast_variable(x) - - @tf.function - def run_assign(): - return x.assign(1.).assign_add(3.).assign_add(3.).assign_sub(2.) - - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.assertAllClose(5., self.evaluate(run_assign())) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_op_attribute(self, distribution): - with distribution.scope(): - x = get_var(0., tf.float32) - x = autocast_variable.create_autocast_variable(x) - - # Variable.op raises an AttributeError in Eager mode and is an op in graph - # mode. Variable.assign(...).op is None in Eager mode and an op in Graph - # mode or a tf.function. We test this is also true of AutoCastVariable. - if tf.executing_eagerly(): - with self.assertRaises(AttributeError): - x.op # pylint: disable=pointless-statement - self.assertIsNone(x.assign(1.0).op) - self.assertIsNone(x.assign_add(1.0).op) - self.assertIsNone(x.assign_sub(1.0).op) - else: - self.assertIsNotNone(x.op) - self.assertIsNotNone(x.assign(1.0).op) - self.assertIsNotNone(x.assign_add(1.0).op) - self.assertIsNotNone(x.assign_sub(1.0).op) - - @tf.function - def func(): - self.assertIsNotNone(x.assign(1.0).op) - self.assertIsNotNone(x.assign_add(1.0).op) - self.assertIsNotNone(x.assign_sub(1.0).op) - - func() - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_tf_function_control_dependencies(self, distribution): - if not tf.executing_eagerly(): - self.skipTest('Test is not compatible with graph mode') - - with distribution.scope(): - x = get_var(0., tf.float32) - x = autocast_variable.create_autocast_variable(x) - - @tf.function - def func(): - update = x.assign_add(1.) - with tf.control_dependencies([update]): - x.assign_add(1.) - - func() - self.assertAllClose(2., self.evaluate(x)) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_assign_stays_in_true_dtype(self, distribution): - with distribution.scope(): - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - # small_val is a value such that 1.0 + small_val == 1.0 in fp16, but not - # in fp32 - small_val = np.finfo('float16').eps / 2 - small_tensor = tf.constant(small_val, dtype=tf.float32) - with autocast_variable.enable_auto_cast_variables(tf.float16): - # Variable should be increased, despite it appearing to be the same - # float16 value. - self.evaluate(x.assign(1. + small_tensor)) - self.assertEqual(1., self.evaluate(x.value())) - self.assertEqual(1. + small_val, self.evaluate(x)) - - self.evaluate(x.assign(1.)) - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.evaluate(x.assign_add(small_tensor)) - self.assertEqual(1., self.evaluate(x.value())) - self.assertEqual(1. + small_val, self.evaluate(x)) - - def test_thread_local_autocast_dtype(self): - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.assertEqual(tf.identity(x).dtype, tf.float16) - - # New threads should not see the modified value of the autocast dtype. - var_dtype = None - def f(): - nonlocal var_dtype - var_dtype = x._cast_dtype - thread = threading.Thread(target=f) - thread.start() - thread.join() - self.assertEqual(var_dtype, tf.float32) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_checkpoint(self, distribution): - with self.test_session(): - with distribution.scope(): - x = get_var(1., tf.float32) + self.evaluate(x.initializer) + + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.assertEqual(tf.identity(x).dtype, tf.float16) + + # New threads should not see the modified value of the autocast + # dtype. + var_dtype = None + + def f(): + nonlocal var_dtype + var_dtype = x._cast_dtype + + thread = threading.Thread(target=f) + thread.start() + thread.join() + self.assertEqual(var_dtype, tf.float32) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_checkpoint(self, distribution): + with self.test_session(): + with distribution.scope(): + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + self.evaluate(x.initializer) + self.evaluate(x.assign(123.0)) + + checkpoint = tf.train.Checkpoint(x=x) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + save_path = checkpoint.save(prefix) + self.evaluate(x.assign(234.0)) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.assertEqual(self.evaluate(x), 123.0) + + @tf.__internal__.distribute.combinations.generate(maybe_distribute) + def test_invalid_wrapped_variable(self, distribution): + with distribution.scope(): + # Wrap a non-variable + with self.assertRaisesRegex(ValueError, "variable must be of type"): + x = tf.constant([1.0], dtype=tf.float32) + autocast_variable.create_autocast_variable(x) + + # Wrap a non-floating point variable + with self.assertRaisesRegex( + ValueError, "variable must be a floating point" + ): + x = get_var(1, tf.int32) + autocast_variable.create_autocast_variable(x) + + def test_repr(self): + # We do not test with DistributionStrategy because we do not want to + # rely on the exact __repr__ output of a DistributedVariable. + x = get_var(1.0, tf.float32, name="x") x = autocast_variable.create_autocast_variable(x) - self.evaluate(x.initializer) - self.evaluate(x.assign(123.)) - - checkpoint = tf.train.Checkpoint(x=x) - prefix = os.path.join(self.get_temp_dir(), 'ckpt') - save_path = checkpoint.save(prefix) - self.evaluate(x.assign(234.)) - checkpoint.restore(save_path).assert_consumed().run_restore_ops() - self.assertEqual(self.evaluate(x), 123.) - - @tf.__internal__.distribute.combinations.generate(maybe_distribute) - def test_invalid_wrapped_variable(self, distribution): - with distribution.scope(): - # Wrap a non-variable - with self.assertRaisesRegex(ValueError, 'variable must be of type'): - x = tf.constant([1.], dtype=tf.float32) - autocast_variable.create_autocast_variable(x) - - # Wrap a non-floating point variable - with self.assertRaisesRegex(ValueError, - 'variable must be a floating point'): - x = get_var(1, tf.int32) - autocast_variable.create_autocast_variable(x) - - def test_repr(self): - # We do not test with DistributionStrategy because we do not want to rely on - # the exact __repr__ output of a DistributedVariable. - x = get_var(1., tf.float32, name='x') - x = autocast_variable.create_autocast_variable(x) - if tf.executing_eagerly(): - self.assertStartsWith( - repr(x), - "", + ) + with autocast_variable.enable_auto_cast_variables(tf.float16): + self.assertEqual( + repr(x), + "", + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + distribution=[ + tf.__internal__.distribute.combinations.mirrored_strategy_with_two_cpus, # noqa: E501 + ] ) - else: - self.assertEqual( - repr(x), - "" - ) - with autocast_variable.enable_auto_cast_variables(tf.float16): - self.assertEqual( - repr(x), - "" + ) + def test_repr_distributed(self, distribution): + with distribution.scope(): + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + use_policy = getattr( + distribution.extended, "_use_var_policy", False + ) + if use_policy: + self.assertRegex( + repr(x).replace("\n", " "), + "", + ) + else: + self.assertRegex( + repr(x).replace("\n", " "), + "", + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + optimizer_class=[ + adadelta.Adadelta, + adagrad.Adagrad, + adam.Adam, + adamax.Adamax, + ftrl.Ftrl, + gradient_descent_v2.SGD, + nadam.Nadam, + rmsprop.RMSprop, + tf.compat.v1.train.GradientDescentOptimizer, + ], + use_tf_function=[False, True], ) - - def test_repr_distributed(self): - strategy = tf.distribute.MirroredStrategy(['/cpu:1', '/cpu:2']) - with strategy.scope(): - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - use_policy = getattr(strategy.extended, '_use_var_policy', False) - if use_policy: - self.assertRegex( - repr(x).replace('\n', ' '), - '') - else: - self.assertRegex( - repr(x).replace('\n', ' '), - '') - - @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine( - optimizer_class=[ - adadelta.Adadelta, - adagrad.Adagrad, - adam.Adam, - adamax.Adamax, - ftrl.Ftrl, - gradient_descent_v2.SGD, - nadam.Nadam, - rmsprop.RMSprop, - tf.compat.v1.train.GradientDescentOptimizer - ], - use_tf_function=[False, True])) - def test_optimizer(self, optimizer_class, use_tf_function): - if use_tf_function and not tf.executing_eagerly(): - self.skipTest('Test does not support graph mode with tf.function') - x = get_var(1., tf.float32) - x = autocast_variable.create_autocast_variable(x) - y = get_var(1., tf.float32) - opt = optimizer_class(learning_rate=1.) - - def f(): - # Minimize both the AutoCastVariable and the normal tf.Variable. Both - # variables should be updated to the same value. - op = opt.minimize(lambda: x + y, var_list=[x, y]) - return None if tf.compat.v1.executing_eagerly_outside_functions() else op - - if use_tf_function: - f = tf.function(f) - - if tf.executing_eagerly(): - f() - else: - op = f() - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(op) - # Assert the AutoCastVariable has changed from its initial value - self.assertNotEqual(self.evaluate(x), 1.) - # Assert AutoCastVariable is updated correctly by comparing it to the normal - # variable - self.assertAlmostEqual(self.evaluate(x), self.evaluate(y)) - if optimizer_class in (gradient_descent_v2.SGD, - tf.compat.v1.train.GradientDescentOptimizer): - # With SGD, the variables decreases by exactly 1 - self.assertEqual(self.evaluate(x), 0) - - -if __name__ == '__main__': - tf.test.main() + ) + def test_optimizer(self, optimizer_class, use_tf_function): + if use_tf_function and not tf.executing_eagerly(): + self.skipTest("Test does not support graph mode with tf.function") + x = get_var(1.0, tf.float32) + x = autocast_variable.create_autocast_variable(x) + y = get_var(1.0, tf.float32) + opt = optimizer_class(learning_rate=1.0) + + def f(): + # Minimize both the AutoCastVariable and the normal tf.Variable. + # Both variables should be updated to the same value. + op = opt.minimize(lambda: x + y, var_list=[x, y]) + return ( + None + if tf.compat.v1.executing_eagerly_outside_functions() + else op + ) + + if use_tf_function: + f = tf.function(f) + + if tf.executing_eagerly(): + f() + else: + op = f() + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(op) + # Assert the AutoCastVariable has changed from its initial value + self.assertNotEqual(self.evaluate(x), 1.0) + # Assert AutoCastVariable is updated correctly by comparing it to the + # normal variable + self.assertAlmostEqual(self.evaluate(x), self.evaluate(y)) + if optimizer_class in ( + gradient_descent_v2.SGD, + tf.compat.v1.train.GradientDescentOptimizer, + ): + # With SGD, the variables decreases by exactly 1 + self.assertEqual(self.evaluate(x), 0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/device_compatibility_check.py b/keras/mixed_precision/device_compatibility_check.py index 6f58e00bd386..477b61b562d8 100644 --- a/keras/mixed_precision/device_compatibility_check.py +++ b/keras/mixed_precision/device_compatibility_check.py @@ -14,134 +14,153 @@ # ============================================================================== """Contains function to log if devices are compatible with mixed precision.""" +import itertools + import tensorflow.compat.v2 as tf -import itertools +# isort: off from tensorflow.python.platform import tf_logging - -_COMPAT_CHECK_PREFIX = 'Mixed precision compatibility check (mixed_float16): ' -_COMPAT_CHECK_OK_PREFIX = _COMPAT_CHECK_PREFIX + 'OK' -_COMPAT_CHECK_WARNING_PREFIX = _COMPAT_CHECK_PREFIX + 'WARNING' +_COMPAT_CHECK_PREFIX = "Mixed precision compatibility check (mixed_float16): " +_COMPAT_CHECK_OK_PREFIX = _COMPAT_CHECK_PREFIX + "OK" +_COMPAT_CHECK_WARNING_PREFIX = _COMPAT_CHECK_PREFIX + "WARNING" _COMPAT_CHECK_WARNING_SUFFIX = ( - 'If you will use compatible GPU(s) not attached to this host, e.g. by ' - 'running a multi-worker model, you can ignore this warning. This message ' - 'will only be logged once') + "If you will use compatible GPU(s) not attached to this host, e.g. by " + "running a multi-worker model, you can ignore this warning. This message " + "will only be logged once" +) def _dedup_strings(device_strs): - """Groups together consecutive identical strings. - - For example, given: - ['GPU 1', 'GPU 2', 'GPU 2', 'GPU 3', 'GPU 3', 'GPU 3'] - This function returns: - ['GPU 1', 'GPU 2 (x2)', 'GPU 3 (x3)'] - - Args: - device_strs: A list of strings, each representing a device. - - Returns: - A copy of the input, but identical consecutive strings are merged into a - single string. - """ - new_device_strs = [] - for device_str, vals in itertools.groupby(device_strs): - num = len(list(vals)) - if num == 1: - new_device_strs.append(device_str) - else: - new_device_strs.append('%s (x%d)' % (device_str, num)) - return new_device_strs + """Groups together consecutive identical strings. + + For example, given: + ['GPU 1', 'GPU 2', 'GPU 2', 'GPU 3', 'GPU 3', 'GPU 3'] + This function returns: + ['GPU 1', 'GPU 2 (x2)', 'GPU 3 (x3)'] + + Args: + device_strs: A list of strings, each representing a device. + + Returns: + A copy of the input, but identical consecutive strings are merged into a + single string. + """ + new_device_strs = [] + for device_str, vals in itertools.groupby(device_strs): + num = len(list(vals)) + if num == 1: + new_device_strs.append(device_str) + else: + new_device_strs.append("%s (x%d)" % (device_str, num)) + return new_device_strs def _log_device_compatibility_check(policy_name, gpu_details_list): - """Logs a compatibility check if the devices support the policy. - - Currently only logs for the policy mixed_float16. - - Args: - policy_name: The name of the dtype policy. - gpu_details_list: A list of dicts, one dict per GPU. Each dict - is the device details for a GPU, as returned by - `tf.config.experimental.get_device_details()`. - """ - if policy_name != 'mixed_float16': - # TODO(b/145686977): Log if the policy is 'mixed_bfloat16'. This requires - # checking if a TPU is available. - return - supported_device_strs = [] - unsupported_device_strs = [] - for details in gpu_details_list: - name = details.get('device_name', 'Unknown GPU') - cc = details.get('compute_capability') - if cc: - device_str = '%s, compute capability %s.%s' % (name, cc[0], cc[1]) - if cc >= (7, 0): - supported_device_strs.append(device_str) - else: - unsupported_device_strs.append(device_str) - else: - unsupported_device_strs.append( - name + ', no compute capability (probably not an Nvidia GPU)') - - if unsupported_device_strs: - warning_str = _COMPAT_CHECK_WARNING_PREFIX + '\n' - if supported_device_strs: - warning_str += ('Some of your GPUs may run slowly with dtype policy ' - 'mixed_float16 because they do not all have compute ' - 'capability of at least 7.0. Your GPUs:\n') - elif len(unsupported_device_strs) == 1: - warning_str += ('Your GPU may run slowly with dtype policy mixed_float16 ' - 'because it does not have compute capability of at least ' - '7.0. Your GPU:\n') + """Logs a compatibility check if the devices support the policy. + + Currently only logs for the policy mixed_float16. + + Args: + policy_name: The name of the dtype policy. + gpu_details_list: A list of dicts, one dict per GPU. Each dict + is the device details for a GPU, as returned by + `tf.config.experimental.get_device_details()`. + """ + if policy_name != "mixed_float16": + # TODO(b/145686977): Log if the policy is 'mixed_bfloat16'. This + # requires checking if a TPU is available. + return + supported_device_strs = [] + unsupported_device_strs = [] + for details in gpu_details_list: + name = details.get("device_name", "Unknown GPU") + cc = details.get("compute_capability") + if cc: + device_str = f"{name}, compute capability {cc[0]}.{cc[1]}" + if cc >= (7, 0): + supported_device_strs.append(device_str) + else: + unsupported_device_strs.append(device_str) + else: + unsupported_device_strs.append( + name + ", no compute capability (probably not an Nvidia GPU)" + ) + + if unsupported_device_strs: + warning_str = _COMPAT_CHECK_WARNING_PREFIX + "\n" + if supported_device_strs: + warning_str += ( + "Some of your GPUs may run slowly with dtype policy " + "mixed_float16 because they do not all have compute " + "capability of at least 7.0. Your GPUs:\n" + ) + elif len(unsupported_device_strs) == 1: + warning_str += ( + "Your GPU may run slowly with dtype policy mixed_float16 " + "because it does not have compute capability of at least " + "7.0. Your GPU:\n" + ) + else: + warning_str += ( + "Your GPUs may run slowly with dtype policy " + "mixed_float16 because they do not have compute " + "capability of at least 7.0. Your GPUs:\n" + ) + for device_str in _dedup_strings( + supported_device_strs + unsupported_device_strs + ): + warning_str += " " + device_str + "\n" + warning_str += ( + "See https://developer.nvidia.com/cuda-gpus for a list of " + "GPUs and their compute capabilities.\n" + ) + warning_str += _COMPAT_CHECK_WARNING_SUFFIX + tf_logging.warning(warning_str) + elif not supported_device_strs: + tf_logging.warning( + "%s\n" + "The dtype policy mixed_float16 may run slowly because " + "this machine does not have a GPU. Only Nvidia GPUs with " + "compute capability of at least 7.0 run quickly with " + "mixed_float16.\n%s" + % (_COMPAT_CHECK_WARNING_PREFIX, _COMPAT_CHECK_WARNING_SUFFIX) + ) + elif len(supported_device_strs) == 1: + tf_logging.info( + "%s\n" + "Your GPU will likely run quickly with dtype policy " + "mixed_float16 as it has compute capability of at least " + "7.0. Your GPU: %s" + % (_COMPAT_CHECK_OK_PREFIX, supported_device_strs[0]) + ) else: - warning_str += ('Your GPUs may run slowly with dtype policy ' - 'mixed_float16 because they do not have compute ' - 'capability of at least 7.0. Your GPUs:\n') - for device_str in _dedup_strings(supported_device_strs + - unsupported_device_strs): - warning_str += ' ' + device_str + '\n' - warning_str += ('See https://developer.nvidia.com/cuda-gpus for a list of ' - 'GPUs and their compute capabilities.\n') - warning_str += _COMPAT_CHECK_WARNING_SUFFIX - tf_logging.warning(warning_str) - elif not supported_device_strs: - tf_logging.warning( - '%s\n' - 'The dtype policy mixed_float16 may run slowly because ' - 'this machine does not have a GPU. Only Nvidia GPUs with ' - 'compute capability of at least 7.0 run quickly with ' - 'mixed_float16.\n%s' % (_COMPAT_CHECK_WARNING_PREFIX, - _COMPAT_CHECK_WARNING_SUFFIX)) - elif len(supported_device_strs) == 1: - tf_logging.info('%s\n' - 'Your GPU will likely run quickly with dtype policy ' - 'mixed_float16 as it has compute capability of at least ' - '7.0. Your GPU: %s' % (_COMPAT_CHECK_OK_PREFIX, - supported_device_strs[0])) - else: - tf_logging.info('%s\n' - 'Your GPUs will likely run quickly with dtype policy ' - 'mixed_float16 as they all have compute capability of at ' - 'least 7.0' % _COMPAT_CHECK_OK_PREFIX) + tf_logging.info( + "%s\n" + "Your GPUs will likely run quickly with dtype policy " + "mixed_float16 as they all have compute capability of at " + "least 7.0" % _COMPAT_CHECK_OK_PREFIX + ) _logged_compatibility_check = False def log_device_compatibility_check(policy_name): - """Logs a compatibility check if the devices support the policy. - - Currently only logs for the policy mixed_float16. A log is shown only the - first time this function is called. - - Args: - policy_name: The name of the dtype policy. - """ - global _logged_compatibility_check - if _logged_compatibility_check: - return - _logged_compatibility_check = True - gpus = tf.config.list_physical_devices('GPU') - gpu_details_list = [tf.config.experimental.get_device_details(g) for g in gpus] - _log_device_compatibility_check(policy_name, gpu_details_list) + """Logs a compatibility check if the devices support the policy. + + Currently only logs for the policy mixed_float16. A log is shown only the + first time this function is called. + + Args: + policy_name: The name of the dtype policy. + """ + global _logged_compatibility_check + if _logged_compatibility_check: + return + _logged_compatibility_check = True + gpus = tf.config.list_physical_devices("GPU") + gpu_details_list = [ + tf.config.experimental.get_device_details(g) for g in gpus + ] + _log_device_compatibility_check(policy_name, gpu_details_list) diff --git a/keras/mixed_precision/device_compatibility_check_test.py b/keras/mixed_precision/device_compatibility_check_test.py index 5d58dbec1014..9b355e09b296 100644 --- a/keras/mixed_precision/device_compatibility_check_test.py +++ b/keras/mixed_precision/device_compatibility_check_test.py @@ -14,128 +14,151 @@ # ============================================================================== """Tests the device compatibility check.""" -import tensorflow.compat.v2 as tf - import re -from keras.testing_infra import test_combinations +import tensorflow.compat.v2 as tf + from keras.mixed_precision import device_compatibility_check +from keras.testing_infra import test_combinations + +# isort: off from tensorflow.python.platform import tf_logging def device_details(device_name, compute_capability=None): - details = {} - if device_name: - details['device_name'] = device_name - if compute_capability: - details['compute_capability'] = compute_capability - return details + details = {} + if device_name: + details["device_name"] = device_name + if compute_capability: + details["compute_capability"] = compute_capability + return details -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class DeviceCompatibilityCheckTest(tf.test.TestCase): - - def _test_compat_check(self, device_attr_list, should_warn, expected_regex, - policy_name='mixed_float16'): - with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_warn, \ - tf.compat.v1.test.mock.patch.object(tf_logging, 'info') as mock_info: - device_compatibility_check._log_device_compatibility_check( - policy_name, device_attr_list) - if should_warn: - self.assertRegex(mock_warn.call_args[0][0], expected_regex) - mock_info.assert_not_called() - else: - self.assertRegex(mock_info.call_args[0][0], expected_regex) - mock_warn.assert_not_called() - - def test_supported(self): - details_list = [device_details('GPU 1', (7, 1))] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): OK\n' - r'Your GPU will likely run quickly with dtype policy mixed_float16 as ' - r'it has compute capability of at least 7.0. Your GPU: GPU 1, compute ' - r'capability 7.1', flags=re.MULTILINE) - self._test_compat_check(details_list, False, regex) - - details_list = [ - device_details('GPU 1', (7, 0)), - device_details('GPU 2', (7, 1)), - device_details('GPU 3', (8, 0)), - ] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): OK\n' - r'Your GPUs will likely run quickly with dtype policy mixed_float16 as ' - r'they all have compute capability of at least 7.0', flags=re.MULTILINE) - self._test_compat_check(details_list, False, regex) - - def test_unsupported(self): - details_list = [ - device_details('GPU 1', (6, 0)) - ] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): WARNING\n' - r'Your GPU may run slowly with dtype policy mixed_float16.*\n' - r' GPU 1, compute capability 6.0\n' - r'See.*', flags=re.MULTILINE) - self._test_compat_check(details_list, True, regex) - - details_list = [ - device_details(None) - ] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): WARNING\n' - r'Your GPU may run slowly with dtype policy mixed_float16.*\n' - r' Unknown GPU, no compute capability \(probably not an Nvidia GPU\)\n' - r'See.*', flags=re.MULTILINE) - self._test_compat_check(details_list, True, regex) - - details_list = [ - device_details('GPU 1', (6, 0)), - device_details('GPU 2', (3, 10)), - ] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): WARNING\n' - r'Your GPUs may run slowly with dtype policy mixed_float16.*\n' - r' GPU 1, compute capability 6.0\n' - r' GPU 2, compute capability 3.10\n' - r'See.*', flags=re.MULTILINE) - self._test_compat_check(details_list, True, regex) - - details_list = [ - device_details('GPU 1', (6, 0)), - device_details('GPU 1', (6, 0)), - device_details('GPU 1', (6, 0)), - device_details('GPU 2', (3, 10)), - ] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): WARNING\n' - r'Your GPUs may run slowly with dtype policy mixed_float16.*\n' - r' GPU 1, compute capability 6.0 \(x3\)\n' - r' GPU 2, compute capability 3.10\n' - r'See.*', flags=re.MULTILINE) - self._test_compat_check(details_list, True, regex) - - details_list = [] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): WARNING\n' - r'The dtype policy mixed_float16 may run slowly because this machine ' - r'does not have a GPU', flags=re.MULTILINE) - self._test_compat_check(details_list, True, regex) - - def test_mix_of_supported_and_unsupported(self): - details_list = [ - device_details('GPU 1', (7, 0)), - device_details('GPU 1', (7, 0)), - device_details('GPU 2', (6, 0)) - ] - regex = re.compile( - r'.*compatibility check \(mixed_float16\): WARNING\n' - r'Some of your GPUs may run slowly with dtype policy mixed_float16.*\n' - r' GPU 1, compute capability 7.0 \(x2\)\n' - r' GPU 2, compute capability 6.0\n' - r'See.*', flags=re.MULTILINE) - self._test_compat_check(details_list, True, regex) - - -if __name__ == '__main__': - tf.test.main() + def _test_compat_check( + self, + device_attr_list, + should_warn, + expected_regex, + policy_name="mixed_float16", + ): + with tf.compat.v1.test.mock.patch.object( + tf_logging, "warning" + ) as mock_warn, tf.compat.v1.test.mock.patch.object( + tf_logging, "info" + ) as mock_info: + device_compatibility_check._log_device_compatibility_check( + policy_name, device_attr_list + ) + if should_warn: + self.assertRegex(mock_warn.call_args[0][0], expected_regex) + mock_info.assert_not_called() + else: + self.assertRegex(mock_info.call_args[0][0], expected_regex) + mock_warn.assert_not_called() + + def test_supported(self): + details_list = [device_details("GPU 1", (7, 1))] + regex = re.compile( + r".*compatibility check \(mixed_float16\): OK\n" + r"Your GPU will likely run quickly with dtype policy mixed_float16 " + r"as it has compute capability of at least 7.0. Your GPU: GPU 1, " + r"compute capability 7.1", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, False, regex) + + details_list = [ + device_details("GPU 1", (7, 0)), + device_details("GPU 2", (7, 1)), + device_details("GPU 3", (8, 0)), + ] + regex = re.compile( + r".*compatibility check \(mixed_float16\): OK\n" + r"Your GPUs will likely run quickly with dtype policy " + r"mixed_float16 as they all have compute capability of " + r"at least 7.0", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, False, regex) + + def test_unsupported(self): + details_list = [device_details("GPU 1", (6, 0))] + regex = re.compile( + r".*compatibility check \(mixed_float16\): WARNING\n" + r"Your GPU may run slowly with dtype policy mixed_float16.*\n" + r" GPU 1, compute capability 6.0\n" + r"See.*", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, True, regex) + + details_list = [device_details(None)] + regex = re.compile( + r".*compatibility check \(mixed_float16\): WARNING\n" + r"Your GPU may run slowly with dtype policy mixed_float16.*\n" + r" Unknown GPU, no compute capability " + r"\(probably not an Nvidia GPU\)\nSee.*", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, True, regex) + + details_list = [ + device_details("GPU 1", (6, 0)), + device_details("GPU 2", (3, 10)), + ] + regex = re.compile( + r".*compatibility check \(mixed_float16\): WARNING\n" + r"Your GPUs may run slowly with dtype policy mixed_float16.*\n" + r" GPU 1, compute capability 6.0\n" + r" GPU 2, compute capability 3.10\n" + r"See.*", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, True, regex) + + details_list = [ + device_details("GPU 1", (6, 0)), + device_details("GPU 1", (6, 0)), + device_details("GPU 1", (6, 0)), + device_details("GPU 2", (3, 10)), + ] + regex = re.compile( + r".*compatibility check \(mixed_float16\): WARNING\n" + r"Your GPUs may run slowly with dtype policy mixed_float16.*\n" + r" GPU 1, compute capability 6.0 \(x3\)\n" + r" GPU 2, compute capability 3.10\n" + r"See.*", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, True, regex) + + details_list = [] + regex = re.compile( + r".*compatibility check \(mixed_float16\): WARNING\n" + r"The dtype policy mixed_float16 may run slowly because this " + r"machine does not have a GPU", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, True, regex) + + def test_mix_of_supported_and_unsupported(self): + details_list = [ + device_details("GPU 1", (7, 0)), + device_details("GPU 1", (7, 0)), + device_details("GPU 2", (6, 0)), + ] + regex = re.compile( + r".*compatibility check \(mixed_float16\): WARNING\n" + r"Some of your GPUs may run slowly with dtype policy " + r"mixed_float16.*\n GPU 1, compute capability 7.0 \(x2\)\n" + r" GPU 2, compute capability 6.0\n" + r"See.*", + flags=re.MULTILINE, + ) + self._test_compat_check(details_list, True, regex) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/layer_correctness_test.py b/keras/mixed_precision/layer_correctness_test.py index 02a012ba5241..274b4e186e7c 100644 --- a/keras/mixed_precision/layer_correctness_test.py +++ b/keras/mixed_precision/layer_correctness_test.py @@ -14,14 +14,12 @@ # ============================================================================== """Tests various Layer subclasses have correct outputs with mixed precision.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations + from keras import layers from keras import models -from keras.testing_infra import test_utils from keras.layers import activation from keras.layers import attention from keras.layers import convolutional @@ -31,240 +29,325 @@ from keras.layers import pooling from keras.layers import regularization from keras.layers import reshaping +from keras.layers.normalization import batch_normalization +from keras.layers.normalization import layer_normalization +from keras.layers.preprocessing import image_preprocessing +from keras.layers.preprocessing import normalization from keras.layers.rnn import bidirectional from keras.layers.rnn import conv_lstm2d -from keras.layers.rnn import simple_rnn from keras.layers.rnn import gru from keras.layers.rnn import gru_v1 from keras.layers.rnn import lstm from keras.layers.rnn import lstm_v1 +from keras.layers.rnn import simple_rnn from keras.layers.rnn import time_distributed -from keras.layers.normalization import batch_normalization -from keras.layers.normalization import layer_normalization -from keras.layers.preprocessing import image_preprocessing -from keras.layers.preprocessing import normalization from keras.mixed_precision import policy +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils def create_mirrored_strategy(): - # The test creates two virtual CPUs, and we use both of them to test with - # multiple devices. - return tf.distribute.MirroredStrategy(['cpu:0', 'cpu:1']) + # The test creates two virtual CPUs, and we use both of them to test with + # multiple devices. + # pylint: disable=protected-access + tf.distribute.MirroredStrategy._collective_key_base += 1 + return tf.distribute.MirroredStrategy(["cpu:0", "cpu:1"]) def _create_normalization_layer_with_adapt(): - layer = normalization.Normalization() - layer.adapt(np.random.normal(size=(10, 4))) - return layer + layer = normalization.Normalization() + layer.adapt(np.random.normal(size=(10, 4))) + return layer def _create_normalization_layer_without_adapt(): - return normalization.Normalization( - mean=np.random.normal(size=(4,)), - variance=np.random.uniform(0.5, 2., size=(4,)) - ) + return normalization.Normalization( + mean=np.random.normal(size=(4,)), + variance=np.random.uniform(0.5, 2.0, size=(4,)), + ) @test_utils.run_v2_only class LayerCorrectnessTest(test_combinations.TestCase): + def setUp(self): + super().setUp() + # Set two virtual CPUs to test MirroredStrategy with multiple devices + cpus = tf.config.list_physical_devices("CPU") + tf.config.set_logical_device_configuration( + cpus[0], + [ + tf.config.LogicalDeviceConfiguration(), + tf.config.LogicalDeviceConfiguration(), + ], + ) + self.strategy = create_mirrored_strategy() - def setUp(self): - super().setUp() - # Set two virtual CPUs to test MirroredStrategy with multiple devices - cpus = tf.config.list_physical_devices('CPU') - tf.config.set_logical_device_configuration(cpus[0], [ - tf.config.LogicalDeviceConfiguration(), - tf.config.LogicalDeviceConfiguration(), - ]) - - def _create_model_from_layer(self, layer, input_shapes): - inputs = [layers.Input(batch_input_shape=s) for s in input_shapes] - if len(inputs) == 1: - inputs = inputs[0] - y = layer(inputs) - model = models.Model(inputs, y) - model.compile('sgd', 'mse') - return model + def _create_model_from_layer(self, layer, input_shapes): + inputs = [layers.Input(batch_input_shape=s) for s in input_shapes] + if len(inputs) == 1: + inputs = inputs[0] + y = layer(inputs) + model = models.Model(inputs, y) + model.compile("sgd", "mse") + return model - @parameterized.named_parameters( - ('LeakyReLU', activation.LeakyReLU, (2, 2)), - ('PReLU', activation.PReLU, (2, 2)), - ('ELU', activation.ELU, (2, 2)), - ('ThresholdedReLU', activation.ThresholdedReLU, (2, 2)), - ('Softmax', activation.Softmax, (2, 2)), - ('ReLU', activation.ReLU, (2, 2)), - ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)), - ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)), - ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)), - ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2), - (2, 2, 2, 2)), - ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2), - (2, 2, 2, 1)), - ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2), - (2, 2, 2, 1)), - ('UpSampling2D', reshaping.UpSampling2D, (2, 2, 2, 1)), - ('ZeroPadding2D', reshaping.ZeroPadding2D, (2, 2, 2, 1)), - ('Cropping2D', reshaping.Cropping2D, (2, 3, 3, 1)), - ('ConvLSTM2D', - lambda: conv_lstm2d.ConvLSTM2D(4, kernel_size=(2, 2)), (4, 4, 4, 4, 4)), - ('Dense', lambda: core.Dense(2), (2, 2)), - ('Dropout', lambda: regularization.Dropout(0.5), (2, 2)), - ('SpatialDropout2D', - lambda: regularization.SpatialDropout2D(0.5), (2, 2, 2, 2)), - ('Activation', lambda: core.Activation('sigmoid'), (2, 2)), - ('Reshape', lambda: reshaping.Reshape((1, 4, 1)), (2, 2, 2)), - ('Permute', lambda: reshaping.Permute((2, 1)), (2, 2, 2)), - ('Attention', attention.Attention, [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), - ('AdditiveAttention', attention.AdditiveAttention, [(2, 2, 3), - (2, 3, 3), - (2, 3, 3)]), - ('Embedding', lambda: core.Embedding(4, 4), - (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))), - ('LocallyConnected1D', lambda: locally_connected.LocallyConnected1D(2, 2), - (2, 2, 1)), - ('LocallyConnected2D', lambda: locally_connected.LocallyConnected2D(2, 2), - (2, 2, 2, 1)), - ('Add', merging.Add, [(2, 2), (2, 2)]), - ('Subtract', merging.Subtract, [(2, 2), (2, 2)]), - ('Multiply', merging.Multiply, [(2, 2), (2, 2)]), - ('Average', merging.Average, [(2, 2), (2, 2)]), - ('Maximum', merging.Maximum, [(2, 2), (2, 2)]), - ('Minimum', merging.Minimum, [(2, 2), (2, 2)]), - ('Concatenate', merging.Concatenate, [(2, 2), (2, 2)]), - ('Dot', lambda: merging.Dot(1), [(2, 2), (2, 2)]), - ('GaussianNoise', lambda: regularization.GaussianNoise(0.5), (2, 2)), - ('GaussianDropout', lambda: regularization.GaussianDropout(0.5), (2, 2)), - ('AlphaDropout', lambda: regularization.AlphaDropout(0.5), (2, 2)), - ('BatchNormalization', batch_normalization.BatchNormalization, - (2, 2), 1e-2, 1e-2), - ('LayerNormalization', layer_normalization.LayerNormalization, (2, 2)), - ('LayerNormalizationUnfused', - lambda: layer_normalization.LayerNormalization(axis=1), (2, 2, 2)), - ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)), - ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)), - ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)), - ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D, (2, 2, 2, 1)), - ('SimpleRNN', lambda: simple_rnn.SimpleRNN(units=4), - (4, 4, 4), 1e-2, 1e-2), - ('SimpleRNN_stateful', - lambda: simple_rnn.SimpleRNN(units=4, stateful=True), - (4, 4, 4), 1e-2, 1e-2), - ('GRU', lambda: gru_v1.GRU(units=4), (4, 4, 4)), - ('LSTM', lambda: lstm_v1.LSTM(units=4), (4, 4, 4)), - ('GRUV2', lambda: gru.GRU(units=4), (4, 4, 4)), - ('GRUV2_stateful', lambda: gru.GRU(units=4, stateful=True), - (4, 4, 4)), - ('LSTMV2', lambda: lstm.LSTM(units=4), (4, 4, 4)), - ('LSTMV2_stateful', lambda: lstm.LSTM(units=4, stateful=True), - (4, 4, 4)), - ('TimeDistributed', - lambda: time_distributed.TimeDistributed(core.Dense(2)), (2, 2, 2)), - ('Bidirectional', - lambda: bidirectional.Bidirectional(simple_rnn.SimpleRNN(units=4)), - (2, 2, 2)), - ('AttentionLayerCausal', lambda: attention.Attention(causal=True), [ - (2, 2, 3), (2, 3, 3), (2, 3, 3) - ]), - ('AdditiveAttentionLayerCausal', - lambda: attention.AdditiveAttention(causal=True), [(2, 3, 4), - (2, 3, 4), - (2, 3, 4)]), - ('NormalizationAdapt', _create_normalization_layer_with_adapt, (4, 4)), - ('NormalizationNoAdapt', _create_normalization_layer_without_adapt, - (4, 4)), - ('Resizing', lambda: image_preprocessing.Resizing(3, 3), (2, 5, 5, 1)), - ('Rescaling', lambda: image_preprocessing.Rescaling(2., 1.), (6, 6)), - ('CenterCrop', lambda: image_preprocessing.CenterCrop(3, 3), - (2, 5, 5, 1)) - ) - def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3, - input_data=None): - """Tests a layer by comparing the float32 and mixed precision weights. + @parameterized.named_parameters( + ("LeakyReLU", activation.LeakyReLU, (2, 2)), + ("PReLU", activation.PReLU, (2, 2)), + ("ELU", activation.ELU, (2, 2)), + ("ThresholdedReLU", activation.ThresholdedReLU, (2, 2)), + ("Softmax", activation.Softmax, (2, 2)), + ("ReLU", activation.ReLU, (2, 2)), + ("Conv1D", lambda: convolutional.Conv1D(2, 2), (2, 2, 1)), + ("Conv2D", lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)), + ("Conv3D", lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)), + ( + "Conv2DTranspose", + lambda: convolutional.Conv2DTranspose(2, 2), + (2, 2, 2, 2), + ), + ( + "SeparableConv2D", + lambda: convolutional.SeparableConv2D(2, 2), + (2, 2, 2, 1), + ), + ( + "DepthwiseConv2D", + lambda: convolutional.DepthwiseConv2D(2, 2), + (2, 2, 2, 1), + ), + ("UpSampling2D", reshaping.UpSampling2D, (2, 2, 2, 1)), + ("ZeroPadding2D", reshaping.ZeroPadding2D, (2, 2, 2, 1)), + ("Cropping2D", reshaping.Cropping2D, (2, 3, 3, 1)), + ( + "ConvLSTM2D", + lambda: conv_lstm2d.ConvLSTM2D(4, kernel_size=(2, 2)), + (4, 4, 4, 4, 4), + ), + ("Dense", lambda: core.Dense(2), (2, 2)), + ("Dropout", lambda: regularization.Dropout(0.5), (2, 2)), + ( + "SpatialDropout2D", + lambda: regularization.SpatialDropout2D(0.5), + (2, 2, 2, 2), + ), + ("Activation", lambda: core.Activation("sigmoid"), (2, 2)), + ("Reshape", lambda: reshaping.Reshape((1, 4, 1)), (2, 2, 2)), + ("Permute", lambda: reshaping.Permute((2, 1)), (2, 2, 2)), + ("Attention", attention.Attention, [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), + ( + "AdditiveAttention", + attention.AdditiveAttention, + [(2, 2, 3), (2, 3, 3), (2, 3, 3)], + ), + ( + "Embedding", + lambda: core.Embedding(4, 4), + (2, 4), + 2e-3, + 2e-3, + np.random.randint(4, size=(2, 4)), + ), + ( + "LocallyConnected1D", + lambda: locally_connected.LocallyConnected1D(2, 2), + (2, 2, 1), + ), + ( + "LocallyConnected2D", + lambda: locally_connected.LocallyConnected2D(2, 2), + (2, 2, 2, 1), + ), + ("Add", merging.Add, [(2, 2), (2, 2)]), + ("Subtract", merging.Subtract, [(2, 2), (2, 2)]), + ("Multiply", merging.Multiply, [(2, 2), (2, 2)]), + ("Average", merging.Average, [(2, 2), (2, 2)]), + ("Maximum", merging.Maximum, [(2, 2), (2, 2)]), + ("Minimum", merging.Minimum, [(2, 2), (2, 2)]), + ("Concatenate", merging.Concatenate, [(2, 2), (2, 2)]), + ("Dot", lambda: merging.Dot(1), [(2, 2), (2, 2)]), + ("GaussianNoise", lambda: regularization.GaussianNoise(0.5), (2, 2)), + ( + "GaussianDropout", + lambda: regularization.GaussianDropout(0.5), + (2, 2), + ), + ("AlphaDropout", lambda: regularization.AlphaDropout(0.5), (2, 2)), + ( + "BatchNormalization", + batch_normalization.BatchNormalization, + (2, 2), + 1e-2, + 1e-2, + ), + ("LayerNormalization", layer_normalization.LayerNormalization, (2, 2)), + ( + "LayerNormalizationUnfused", + lambda: layer_normalization.LayerNormalization(axis=1), + (2, 2, 2), + ), + ("MaxPooling2D", pooling.MaxPooling2D, (2, 2, 2, 1)), + ("AveragePooling2D", pooling.AveragePooling2D, (2, 2, 2, 1)), + ("GlobalMaxPooling2D", pooling.GlobalMaxPooling2D, (2, 2, 2, 1)), + ( + "GlobalAveragePooling2D", + pooling.GlobalAveragePooling2D, + (2, 2, 2, 1), + ), + ( + "SimpleRNN", + lambda: simple_rnn.SimpleRNN(units=4), + (4, 4, 4), + 1e-2, + 1e-2, + ), + ( + "SimpleRNN_stateful", + lambda: simple_rnn.SimpleRNN(units=4, stateful=True), + (4, 4, 4), + 1e-2, + 1e-2, + ), + ("GRU", lambda: gru_v1.GRU(units=4), (4, 4, 4)), + ("LSTM", lambda: lstm_v1.LSTM(units=4), (4, 4, 4)), + ("GRUV2", lambda: gru.GRU(units=4), (4, 4, 4)), + ("GRUV2_stateful", lambda: gru.GRU(units=4, stateful=True), (4, 4, 4)), + ("LSTMV2", lambda: lstm.LSTM(units=4), (4, 4, 4)), + ( + "LSTMV2_stateful", + lambda: lstm.LSTM(units=4, stateful=True), + (4, 4, 4), + ), + ( + "TimeDistributed", + lambda: time_distributed.TimeDistributed(core.Dense(2)), + (2, 2, 2), + ), + ( + "Bidirectional", + lambda: bidirectional.Bidirectional(simple_rnn.SimpleRNN(units=4)), + (2, 2, 2), + ), + ("NormalizationAdapt", _create_normalization_layer_with_adapt, (4, 4)), + ( + "NormalizationNoAdapt", + _create_normalization_layer_without_adapt, + (4, 4), + ), + ("Resizing", lambda: image_preprocessing.Resizing(3, 3), (2, 5, 5, 1)), + ("Rescaling", lambda: image_preprocessing.Rescaling(2.0, 1.0), (6, 6)), + ( + "CenterCrop", + lambda: image_preprocessing.CenterCrop(3, 3), + (2, 5, 5, 1), + ), + ) + def test_layer( + self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3, input_data=None + ): + """Tests a layer by comparing the float32 and mixed precision weights. - A float32 layer, a mixed precision layer, and a distributed mixed precision - layer are run. The three layers are identical other than their dtypes and - distribution strategies. The outputs after predict() and weights after fit() - are asserted to be close. + A float32 layer, a mixed precision layer, and a distributed mixed + precision layer are run. The three layers are identical other than their + dtypes and distribution strategies. The outputs after predict() and + weights after fit() are asserted to be close. - Args: - f32_layer_fn: A function returning a float32 layer. The other two layers - will automatically be created from this - input_shape: The shape of the input to the layer, including the batch - dimension. Or a list of shapes if the layer takes multiple inputs. - rtol: The relative tolerance to be asserted. - atol: The absolute tolerance to be asserted. - input_data: A Numpy array with the data of the input. If None, input data - will be randomly generated - """ + Args: + f32_layer_fn: A function returning a float32 layer. The other two + layers will automatically be created from this. + input_shape: The shape of the input to the layer, including the batch + dimension. Or a list of shapes if the layer takes multiple inputs. + rtol: The relative tolerance to be asserted. + atol: The absolute tolerance to be asserted. + input_data: A Numpy array with the data of the input. If None, input + data will be randomly generated. + """ - if f32_layer_fn == reshaping.ZeroPadding2D and tf.test.is_built_with_rocm(): - return - if isinstance(input_shape[0], int): - input_shapes = [input_shape] - else: - input_shapes = input_shape - strategy = create_mirrored_strategy() - f32_layer = f32_layer_fn() + if ( + f32_layer_fn == reshaping.ZeroPadding2D + and tf.test.is_built_with_rocm() + ): + return + if isinstance(input_shape[0], int): + input_shapes = [input_shape] + else: + input_shapes = input_shape + f32_layer = f32_layer_fn() - # Create the layers - assert f32_layer.dtype == f32_layer._compute_dtype == 'float32' - config = f32_layer.get_config() - config['dtype'] = policy.Policy('mixed_float16') - mp_layer = f32_layer.__class__.from_config(config) - distributed_mp_layer = f32_layer.__class__.from_config(config) + # Create the layers + assert f32_layer.dtype == f32_layer._compute_dtype == "float32" + config = f32_layer.get_config() + config["dtype"] = policy.Policy("mixed_float16") + mp_layer = f32_layer.__class__.from_config(config) + distributed_mp_layer = f32_layer.__class__.from_config(config) - # Compute per_replica_input_shapes for the distributed model - global_batch_size = input_shapes[0][0] - assert global_batch_size % strategy.num_replicas_in_sync == 0, ( - 'The number of replicas, %d, does not divide the global batch size of ' - '%d' % (strategy.num_replicas_in_sync, global_batch_size)) - per_replica_batch_size = ( - global_batch_size // strategy.num_replicas_in_sync) - per_replica_input_shapes = [(per_replica_batch_size,) + s[1:] - for s in input_shapes] + # Compute per_replica_input_shapes for the distributed model + global_batch_size = input_shapes[0][0] + assert global_batch_size % self.strategy.num_replicas_in_sync == 0, ( + "The number of replicas, %d, does not divide the global batch " + "size of %d" + % (self.strategy.num_replicas_in_sync, global_batch_size) + ) + per_replica_batch_size = ( + global_batch_size // self.strategy.num_replicas_in_sync + ) + per_replica_input_shapes = [ + (per_replica_batch_size,) + s[1:] for s in input_shapes + ] - # Create the models - f32_model = self._create_model_from_layer(f32_layer, input_shapes) - mp_model = self._create_model_from_layer(mp_layer, input_shapes) - with strategy.scope(): - distributed_mp_model = self._create_model_from_layer( - distributed_mp_layer, per_replica_input_shapes) + # Create the models + f32_model = self._create_model_from_layer(f32_layer, input_shapes) + mp_model = self._create_model_from_layer(mp_layer, input_shapes) + with self.strategy.scope(): + distributed_mp_model = self._create_model_from_layer( + distributed_mp_layer, per_replica_input_shapes + ) - # Set all model weights to the same values - f32_weights = f32_model.get_weights() - mp_model.set_weights(f32_weights) - distributed_mp_model.set_weights(f32_weights) + # Set all model weights to the same values + f32_weights = f32_model.get_weights() + mp_model.set_weights(f32_weights) + distributed_mp_model.set_weights(f32_weights) - # Generate input data - if input_data is None: - # Cast inputs to float16 to avoid measuring error from having f16 layers - # cast to float16. - input_data = [np.random.normal(size=s).astype('float16') - for s in input_shapes] - if len(input_data) == 1: - input_data = input_data[0] + # Generate input data + if input_data is None: + # Cast inputs to float16 to avoid measuring error from having f16 + # layers cast to float16. + input_data = [ + np.random.normal(size=s).astype("float16") for s in input_shapes + ] + if len(input_data) == 1: + input_data = input_data[0] - # Assert all models have close outputs. - f32_output = f32_model.predict(input_data) - mp_output = mp_model.predict(input_data) - self.assertAllClose( - mp_output, f32_output, rtol=rtol, atol=atol) - self.assertAllClose( - distributed_mp_model.predict(input_data), f32_output, rtol=rtol, - atol=atol) + # Assert all models have close outputs. + f32_output = f32_model.predict(input_data) + mp_output = mp_model.predict(input_data) + self.assertAllClose(mp_output, f32_output, rtol=rtol, atol=atol) + self.assertAllClose( + distributed_mp_model.predict(input_data), + f32_output, + rtol=rtol, + atol=atol, + ) - # Run fit() on models - output = np.random.normal(size=f32_model.outputs[0].shape).astype('float16') - for model in f32_model, mp_model, distributed_mp_model: - model.fit(input_data, output, batch_size=global_batch_size) + # Run fit() on models + output = np.random.normal(size=f32_model.outputs[0].shape).astype( + "float16" + ) + for model in f32_model, mp_model, distributed_mp_model: + model.fit(input_data, output, batch_size=global_batch_size) - # Assert all models have close weights - f32_weights = f32_model.get_weights() - self.assertAllClose( - mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol) - self.assertAllClose( - distributed_mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol) + # Assert all models have close weights + f32_weights = f32_model.get_weights() + self.assertAllClose( + mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol + ) + self.assertAllClose( + distributed_mp_model.get_weights(), + f32_weights, + rtol=rtol, + atol=atol, + ) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/layer_test.py b/keras/mixed_precision/layer_test.py index 404649a99417..b45133d0a5ca 100644 --- a/keras/mixed_precision/layer_test.py +++ b/keras/mixed_precision/layer_test.py @@ -14,13 +14,12 @@ # ============================================================================== """Tests keras.layers.Layer works properly with mixed precision.""" -import tensorflow.compat.v2 as tf - import os -from absl.testing import parameterized import numpy as np -from keras.testing_infra import test_combinations +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + from keras import layers from keras import models from keras.engine import base_layer @@ -28,15 +27,16 @@ from keras.engine import input_spec from keras.mixed_precision import policy from keras.mixed_precision import test_util as mp_test_util -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent +from keras.testing_infra import test_combinations class MultiplyLayerWithFunction(mp_test_util.MultiplyLayer): - """Same as MultiplyLayer, but _multiply is decorated with a tf.function.""" + """Same as MultiplyLayer, but _multiply is decorated with a tf.function.""" - @tf.function - def _multiply(self, x, y): - return super()._multiply(x, y) + @tf.function + def _multiply(self, x, y): + return super()._multiply(x, y) # If called outside any strategy.scope() calls, this will return the default @@ -45,381 +45,464 @@ def _multiply(self, x, y): def create_mirrored_strategy(): - """Create a MirroredStrategy, using a GPU if it is available.""" - if tf.config.list_logical_devices('GPU'): - return tf.distribute.MirroredStrategy(['cpu:0', 'gpu:0']) - else: - return tf.distribute.MirroredStrategy(['cpu:0']) + """Create a MirroredStrategy, using a GPU if it is available.""" + if tf.config.list_logical_devices("GPU"): + return tf.distribute.MirroredStrategy(["cpu:0", "gpu:0"]) + else: + return tf.distribute.MirroredStrategy(["cpu:0"]) def create_central_storage_strategy(): - """Create a CentralStorageStrategy, using a GPU if it is available.""" - compute_devices = ['cpu:0', 'gpu:0'] if ( - tf.config.list_logical_devices('GPU')) else ['cpu:0'] - return tf.distribute.experimental.CentralStorageStrategy( - compute_devices, parameter_device='cpu:0') + """Create a CentralStorageStrategy, using a GPU if it is available.""" + compute_devices = ( + ["cpu:0", "gpu:0"] + if (tf.config.list_logical_devices("GPU")) + else ["cpu:0"] + ) + return tf.distribute.experimental.CentralStorageStrategy( + compute_devices, parameter_device="cpu:0" + ) -TESTCASES = ({ - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn -}, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy -}) +TESTCASES = ( + {"testcase_name": "base", "strategy_fn": default_strategy_fn}, + {"testcase_name": "distribute", "strategy_fn": create_mirrored_strategy}, +) -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LayerTest(test_combinations.TestCase): - """Test mixed precision with Keras layers.""" - - @parameterized.named_parameters(*TESTCASES) - def test_mixed_policies_(self, strategy_fn): - strategy = strategy_fn() - for dtype in 'float16', 'bfloat16': - x = tf.constant([1.]) - policy_name = 'mixed_' + dtype - with strategy.scope(), policy.policy_scope(policy_name): - layer = mp_test_util.MultiplyLayer(assert_type=dtype) - self.assertEqual(layer.dtype, tf.float32) - self.assertEqual(layer.dtype_policy.name, policy_name) - y = layer(x) - self.assertEqual(layer.v.dtype, tf.float32) - self.assertEqual(y.dtype, dtype) - self.assertEqual(layer.dtype_policy.name, policy_name) - self.assertIsInstance(layer.dtype_policy, policy.Policy) - self.assertEqual(layer.compute_dtype, dtype) - self.assertEqual(layer.dtype, tf.float32) - self.assertEqual(layer.variable_dtype, tf.float32) - self.assertEqual(layer.dtype_policy.name, policy_name) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(y), 1.) - - def test_layer_with_int_variable(self): - class LayerWithIntVar(base_layer.Layer): - - def build(self, _): - self.v = self.add_weight('v', dtype='int32', trainable=False) - - def call(self, inputs): - # Only float variables should be autocasted. This will fail if self.v is - # autocasted to float32 - return tf.cast(inputs, 'int32') + self.v - - x = tf.constant([1.]) - layer = LayerWithIntVar(dtype='mixed_float16') - self.assertEqual(layer(x).dtype, 'int32') - - @parameterized.named_parameters(*TESTCASES) - def test_layer_with_non_autocast_variable(self, strategy_fn): - x = tf.constant([1.]) - with strategy_fn().scope(): - with policy.policy_scope('mixed_float16'): - layer = mp_test_util.MultiplyLayerWithoutAutoCast( - assert_type=tf.float16) - y = layer(x) - self.assertEqual(layer.v.dtype, tf.float32) - self.assertEqual(y.dtype, tf.float16) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(y), 1.) - - @parameterized.named_parameters(*TESTCASES) - def test_layer_calling_tf_function(self, strategy_fn): - x = tf.constant([1.]) - with strategy_fn().scope(): - with policy.policy_scope('mixed_float16'): - layer = MultiplyLayerWithFunction(assert_type=tf.float16) - y = layer(x) - self.assertEqual(layer.v.dtype, tf.float32) - self.assertEqual(y.dtype, tf.float16) + """Test mixed precision with Keras layers.""" + + @parameterized.named_parameters(*TESTCASES) + def test_mixed_policies_(self, strategy_fn): + strategy = strategy_fn() + for dtype in "float16", "bfloat16": + x = tf.constant([1.0]) + policy_name = "mixed_" + dtype + with strategy.scope(), policy.policy_scope(policy_name): + layer = mp_test_util.MultiplyLayer(assert_type=dtype) + self.assertEqual(layer.dtype, tf.float32) + self.assertEqual(layer.dtype_policy.name, policy_name) + y = layer(x) + self.assertEqual(layer.v.dtype, tf.float32) + self.assertEqual(y.dtype, dtype) + self.assertEqual(layer.dtype_policy.name, policy_name) + self.assertIsInstance(layer.dtype_policy, policy.Policy) + self.assertEqual(layer.compute_dtype, dtype) + self.assertEqual(layer.dtype, tf.float32) + self.assertEqual(layer.variable_dtype, tf.float32) + self.assertEqual(layer.dtype_policy.name, policy_name) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(y), 1.0) + + def test_layer_with_int_variable(self): + class LayerWithIntVar(base_layer.Layer): + def build(self, _): + self.v = self.add_weight("v", dtype="int32", trainable=False) + + def call(self, inputs): + # Only float variables should be autocasted. This will fail if + # self.v is autocasted to float32 + return tf.cast(inputs, "int32") + self.v + + x = tf.constant([1.0]) + layer = LayerWithIntVar(dtype="mixed_float16") + self.assertEqual(layer(x).dtype, "int32") + + @parameterized.named_parameters(*TESTCASES) + def test_layer_with_non_autocast_variable(self, strategy_fn): + x = tf.constant([1.0]) + with strategy_fn().scope(): + with policy.policy_scope("mixed_float16"): + layer = mp_test_util.MultiplyLayerWithoutAutoCast( + assert_type=tf.float16 + ) + y = layer(x) + self.assertEqual(layer.v.dtype, tf.float32) + self.assertEqual(y.dtype, tf.float16) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(y), 1.0) + + @parameterized.named_parameters(*TESTCASES) + def test_layer_calling_tf_function(self, strategy_fn): + x = tf.constant([1.0]) + with strategy_fn().scope(): + with policy.policy_scope("mixed_float16"): + layer = MultiplyLayerWithFunction(assert_type=tf.float16) + y = layer(x) + self.assertEqual(layer.v.dtype, tf.float32) + self.assertEqual(y.dtype, tf.float16) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(y), 1.0) + + @parameterized.named_parameters(*TESTCASES) + def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn): + x = tf.constant([1.0]) + with strategy_fn().scope(): + with policy.policy_scope("mixed_float16"): + # Test on MultiplyLayer + layer = mp_test_util.MultiplyLayer( + assert_type=tf.float16, + regularizer=mp_test_util.IdentityRegularizer(), + ) + layer(x) + (regularizer_loss,) = layer.losses + self.assertEqual(regularizer_loss.dtype, tf.float32) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(regularizer_loss), 1.0) + + # Test on MultiplyLayerWithoutAutoCast + layer = mp_test_util.MultiplyLayerWithoutAutoCast( + assert_type=tf.float16, + regularizer=mp_test_util.IdentityRegularizer(), + ) + layer(x) + (regularizer_loss,) = layer.losses + self.assertEqual(regularizer_loss.dtype, tf.float32) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(regularizer_loss), 1.0) + + @parameterized.named_parameters(*TESTCASES) + def test_passing_policy_to_layer(self, strategy_fn): + x = tf.constant([1.0], dtype=tf.float16) + with strategy_fn().scope(): + # Passing a Policy to 'dtype' sets the policy for that layer. + layer = mp_test_util.MultiplyLayer( + assert_type=tf.float16, dtype=policy.Policy("mixed_float16") + ) + # layer.dtype refers to the variable dtype + self.assertEqual(layer.dtype, tf.float32) + layer(x) + self.assertEqual(layer.v.dtype, tf.float32) + with policy.policy_scope("mixed_float16"): + # Passing a Policy to dtype overrides the global Policy + layer = mp_test_util.MultiplyLayer( + assert_type=tf.float64, dtype=policy.Policy("float64") + ) + self.assertEqual(layer.dtype_policy.name, "float64") + self.assertIsInstance(layer.dtype_policy, policy.Policy) + self.assertEqual(layer.compute_dtype, tf.float64) + self.assertEqual(layer.dtype, tf.float64) + self.assertEqual(layer.variable_dtype, tf.float64) + self.assertEqual(layer(x).dtype, tf.float64) + self.assertEqual(layer.v.dtype, tf.float64) + + @parameterized.named_parameters(*TESTCASES) + def test_gradient(self, strategy_fn): + x = tf.constant([1.0]) + with strategy_fn().scope() as strategy: + with policy.policy_scope("mixed_float16"): + layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) + # Learning rate is small enough that if applied to a float16 + # variable, the variable will not change. So this tests the + # learning rate is not applied to a float16 value, but instead + # the float32 variable. + opt = gradient_descent.SGD(2**-14) + + def run_fn(): + with tf.GradientTape() as tape: + y = layer(x) + # Divide by num_replicas_in_sync, as the effective total + # loss is the sum of each of the replica's losses. + y /= strategy.num_replicas_in_sync + + grad = tape.gradient(y, layer.v) + return opt.apply_gradients([(grad, layer.v)]) + + op = strategy.experimental_run(run_fn) + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(op) + # The gradient with respective to the variable is 1. Since the + # variable is initialized with 1 and the learning rate is + # 2**-14, the new variable value should be: init_val - gradient + # * learning_rate, which is 1 - 1 * 2**-14 + self.assertEqual(self.evaluate(layer.v), 1 - 2**-14) + + def _test_checkpointing_layer_weights( + self, strategy_fn, mixed_prec_when_saving, mixed_prec_when_loading + ): + # In this test, we potentially save with mixed precision enabled and + # load with mixed precision disabled, or vice versa. This is possible + # because variables are float32 regardless of whether mixed precision is + # enabled. + save_policy = "mixed_float16" if mixed_prec_when_saving else "float32" + load_policy = "mixed_float16" if mixed_prec_when_loading else "float32" + save_input_dtype = "float16" if mixed_prec_when_saving else "float32" + load_input_dtype = "float16" if mixed_prec_when_loading else "float32" + + # Create a layer and save a checkpoint. + x = tf.constant([1.0]) + with strategy_fn().scope(): + with policy.policy_scope(save_policy): + layer = mp_test_util.MultiplyLayer(assert_type=save_input_dtype) + layer(x) # Build layer + layer.set_weights([np.array(100.0)]) + self.assertEqual(self.evaluate(layer(x)), 100.0) + checkpoint = tf.train.Checkpoint(layer=layer) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + save_path = checkpoint.save(prefix) + + # Create a new layer and restore the checkpoint. + x = tf.constant([1.0]) + with strategy_fn().scope(): + with policy.policy_scope(load_policy): + layer = mp_test_util.MultiplyLayer(assert_type=load_input_dtype) + layer(x) # Build layer + layer.set_weights([np.array(200.0)]) + self.assertEqual(self.evaluate(layer(x)), 200.0) + checkpoint = tf.train.Checkpoint(layer=layer) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.assertEqual(layer.get_weights(), [100.0]) + self.assertEqual(self.evaluate(layer(x)), 100.0) + + @parameterized.named_parameters(*TESTCASES) + def test_checkpointing_layer_weights(self, strategy_fn): + with self.test_session(): + self._test_checkpointing_layer_weights( + strategy_fn, + mixed_prec_when_saving=True, + mixed_prec_when_loading=True, + ) + self._test_checkpointing_layer_weights( + strategy_fn, + mixed_prec_when_saving=True, + mixed_prec_when_loading=False, + ) + self._test_checkpointing_layer_weights( + strategy_fn, + mixed_prec_when_saving=False, + mixed_prec_when_loading=True, + ) + + @parameterized.named_parameters(*TESTCASES) + def test_config(self, strategy_fn): + x = tf.constant([1.0], dtype=tf.float16) + with strategy_fn().scope(): + for layer, dtype in ( + (mp_test_util.MultiplyLayer(), "float32"), + (mp_test_util.MultiplyLayer(dtype="float64"), "float64"), + ( + mp_test_util.MultiplyLayer(dtype=policy.Policy("float64")), + "float64", + ), + ): + config = layer.get_config() + self.assertEqual(config["dtype"], dtype) + self.assertIsInstance(config["dtype"], str) + layer = mp_test_util.MultiplyLayer.from_config(config) + self.assertEqual(layer.dtype, dtype) + self.assertEqual(layer(x).dtype, dtype) + self.assertEqual(layer.v.dtype, dtype) + + layer = mp_test_util.MultiplyLayer(dtype="mixed_float16") + config = layer.get_config() + if tf.__internal__.tf2.enabled(): + self.assertEqual( + config["dtype"], + { + "module": "keras.mixed_precision", + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + "registered_name": None, + }, + ) + else: + self.assertEqual( + config["dtype"], + { + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + }, + ) + layer = mp_test_util.MultiplyLayer.from_config(config) + self.assertEqual(layer.dtype, "float32") + self.assertEqual(layer(x).dtype, "float16") + self.assertEqual(layer.v.dtype, "float32") + config = layer.get_config() + if tf.__internal__.tf2.enabled(): + self.assertEqual( + config["dtype"], + { + "module": "keras.mixed_precision", + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + "registered_name": None, + }, + ) + else: + self.assertEqual( + config["dtype"], + { + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + }, + ) + + layer = mp_test_util.MultiplyLayer(dtype=policy.Policy("_infer")) + config = layer.get_config() + self.assertIsNone(config["dtype"]) + layer = mp_test_util.MultiplyLayer.from_config(config) + # If a layer is serialized with the "_infer" policy, when + # deserialized into TF 2 it will have the global policy instead of + # "_infer". This is because "_infer" is serialized into None, and + # passing dtype=None in TensorFlow 2 indicates to use the global + # policy. + self.assertEqual(layer.dtype, "float32") + self.assertEqual(layer(x).dtype, "float32") + self.assertEqual(layer.v.dtype, "float32") + + @parameterized.named_parameters(*TESTCASES) + def test_from_config_policy_v1(self, strategy_fn): + # Test that layers serialized in previous Keras versions with the + # now-deleted PolicyV1 can be deserialized. In such cases, the PolicyV1 + # will be converted to a Policy, since PolicyV1 no longer exists. Unlike + # Policy, PolicyV1 had a "loss_scale" field, which is silently dropped + # when deserialized. + x = tf.constant([1.0], dtype=tf.float16) + with strategy_fn().scope(): + layer = mp_test_util.MultiplyLayer(dtype="mixed_float16") + config = layer.get_config() + # Change the serialized dtype policy to a PolicyV1 + if tf.__internal__.tf2.enabled(): + config["dtype"] = { + "module": "keras.mixed_precision", + "class_name": "PolicyV1", + "config": {"name": "mixed_float16", "loss_scale": None}, + "registered_name": None, + } + else: + config["dtype"] = { + "class_name": "PolicyV1", + "config": {"name": "mixed_float16", "loss_scale": None}, + } + layer = mp_test_util.MultiplyLayer.from_config(config) + self.assertEqual(layer.dtype, "float32") + self.assertEqual(layer(x).dtype, "float16") + self.assertEqual(layer.v.dtype, "float32") + config = layer.get_config() + # The loss_scale is silently dropped + if tf.__internal__.tf2.enabled(): + self.assertEqual( + config["dtype"], + { + "module": "keras.mixed_precision", + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + "registered_name": None, + }, + ) + else: + self.assertEqual( + config["dtype"], + { + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + }, + ) + + layer = mp_test_util.MultiplyLayer(dtype="float64") + config = layer.get_config() + config["dtype"] = { + "class_name": "PolicyV1", + "config": { + "name": "float64", + "loss_scale": { + "class_name": "FixedLossScale", + "config": {"loss_scale_value": 2.0}, + }, + }, + } + layer = mp_test_util.MultiplyLayer.from_config(config) + self.assertEqual(layer.dtype, "float64") + self.assertEqual(layer(x).dtype, "float64") + self.assertEqual(layer.v.dtype, "float64") + config = layer.get_config() + self.assertEqual(config["dtype"], "float64") + + layer = mp_test_util.MultiplyLayer(dtype=policy.Policy("_infer")) + config = layer.get_config() + config["dtype"] = { + "class_name": "PolicyV1", + "config": { + "name": "_infer", + "loss_scale": { + "class_name": "FixedLossScale", + "config": {"loss_scale_value": 2.0}, + }, + }, + } + layer = mp_test_util.MultiplyLayer.from_config(config) + self.assertEqual(layer.dtype, None) + self.assertEqual(layer(x).dtype, "float16") + self.assertEqual(layer.v.dtype, "float16") + self.assertEqual(type(layer.dtype_policy), policy.Policy) + config = layer.get_config() + self.assertEqual(config["dtype"], "float16") + + def test_delete_variable(self): + layer = base_layer.Layer(dtype="mixed_float16") + layer.x = layer.add_weight("x") + self.assertEqual(layer.trainable_weights, [layer.x]) + del layer.x + self.assertEqual(layer.trainable_weights, []) + + def test_build_and_call_layer_in_function(self): + layer = mp_test_util.MultiplyLayer(dtype=policy.Policy("mixed_float16")) + + @tf.function + def f(): + return layer(1.0) + + y = f() self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(y), 1.) - - @parameterized.named_parameters(*TESTCASES) - def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn): - x = tf.constant([1.]) - with strategy_fn().scope(): - with policy.policy_scope('mixed_float16'): - # Test on MultiplyLayer - layer = mp_test_util.MultiplyLayer( - assert_type=tf.float16, - regularizer=mp_test_util.IdentityRegularizer()) - layer(x) - (regularizer_loss,) = layer.losses - self.assertEqual(regularizer_loss.dtype, tf.float32) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(regularizer_loss), 1.) - - # Test on MultiplyLayerWithoutAutoCast - layer = mp_test_util.MultiplyLayerWithoutAutoCast( - assert_type=tf.float16, - regularizer=mp_test_util.IdentityRegularizer()) + self.assertEqual(y.dtype, "float16") + self.assertEqual(layer.v.dtype, "float32") + self.assertEqual(self.evaluate(y), 1.0) + + def test_unsupported_strategy(self): + strategy = create_central_storage_strategy() + with strategy.scope(), self.assertRaisesRegex( + ValueError, + "Mixed precision is not supported with the " + "tf.distribute.Strategy: CentralStorageStrategy.", + ): + mp_test_util.MultiplyLayer(dtype="mixed_float16") + # Non-mixed policies are fine + mp_test_util.MultiplyLayer(dtype=policy.Policy("float64")) + + def test_input_spec_dtype(self): + # Test the InputSpec's dtype is compared against the inputs before the + # layer casts them, not after. + layer = mp_test_util.MultiplyLayer(dtype="float64") + layer.input_spec = input_spec.InputSpec(dtype="float16") + + # Test passing Eager tensors + x = tf.ones((2, 2), dtype="float16") layer(x) - (regularizer_loss,) = layer.losses - self.assertEqual(regularizer_loss.dtype, tf.float32) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(regularizer_loss), 1.) - - @parameterized.named_parameters(*TESTCASES) - def test_passing_policy_to_layer(self, strategy_fn): - x = tf.constant([1.], dtype=tf.float16) - with strategy_fn().scope(): - # Passing a Policy to 'dtype' sets the policy for that layer. - layer = mp_test_util.MultiplyLayer( - assert_type=tf.float16, dtype=policy.Policy('mixed_float16')) - # layer.dtype refers to the variable dtype - self.assertEqual(layer.dtype, tf.float32) - layer(x) - self.assertEqual(layer.v.dtype, tf.float32) - with policy.policy_scope('mixed_float16'): - # Passing a Policy to dtype overrides the global Policy - layer = mp_test_util.MultiplyLayer( - assert_type=tf.float64, dtype=policy.Policy('float64')) - self.assertEqual(layer.dtype_policy.name, 'float64') - self.assertIsInstance(layer.dtype_policy, policy.Policy) - self.assertEqual(layer.compute_dtype, tf.float64) - self.assertEqual(layer.dtype, tf.float64) - self.assertEqual(layer.variable_dtype, tf.float64) - self.assertEqual(layer(x).dtype, tf.float64) - self.assertEqual(layer.v.dtype, tf.float64) - - @parameterized.named_parameters(*TESTCASES) - def test_gradient(self, strategy_fn): - x = tf.constant([1.]) - with strategy_fn().scope() as strategy: - with policy.policy_scope('mixed_float16'): - layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) - # Learning rate is small enough that if applied to a float16 variable, - # the variable will not change. So this tests the learning rate is not - # applied to a float16 value, but instead the float32 variable. - opt = gradient_descent.SGD(2**-14) - - def run_fn(): - with tf.GradientTape() as tape: + x = tf.ones((2, 2), dtype="float64") + with self.assertRaisesRegex( + ValueError, "expected dtype=float16, found dtype=.*float64" + ): + layer(x) + + # Test passing symbolic tensors + x = layers.Input((2,), dtype="float16") + y = layer(x) + model = models.Model(x, y) + model(tf.ones((2, 2))) + + x = layers.Input((2,), dtype="float64") + with self.assertRaisesRegex( + ValueError, "expected dtype=float16, found dtype=.*float64" + ): + # In TF2, the error is only raised when the model is run y = layer(x) - # Divide by num_replicas_in_sync, as the effective total loss is the - # sum of each of the replica's losses. - y /= strategy.num_replicas_in_sync - - grad = tape.gradient(y, layer.v) - return opt.apply_gradients([(grad, layer.v)]) - - op = strategy.experimental_run(run_fn) - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(op) - # The gradient with respective to the variable is 1. Since the - # variable is initialized with 1 and the learning rate is 2**-14, the - # new variable value should be: init_val - gradient * learning_rate, - # which is 1 - 1 * 2**-14 - self.assertEqual(self.evaluate(layer.v), 1 - 2**-14) - - def _test_checkpointing_layer_weights(self, strategy_fn, - mixed_prec_when_saving, - mixed_prec_when_loading): - # In this test, we potentially save with mixed precision enabled and load - # with mixed precision disabled, or vice versa. This is possible because - # variables are float32 regardless of whether mixed precision is enabled. - save_policy = 'mixed_float16' if mixed_prec_when_saving else 'float32' - load_policy = 'mixed_float16' if mixed_prec_when_loading else 'float32' - save_input_dtype = 'float16' if mixed_prec_when_saving else 'float32' - load_input_dtype = 'float16' if mixed_prec_when_loading else 'float32' - - # Create a layer and save a checkpoint. - x = tf.constant([1.]) - with strategy_fn().scope(): - with policy.policy_scope(save_policy): - layer = mp_test_util.MultiplyLayer(assert_type=save_input_dtype) - layer(x) # Build layer - layer.set_weights([np.array(100.)]) - self.assertEqual(self.evaluate(layer(x)), 100.) - checkpoint = tf.train.Checkpoint(layer=layer) - prefix = os.path.join(self.get_temp_dir(), 'ckpt') - save_path = checkpoint.save(prefix) - - # Create a new layer and restore the checkpoint. - x = tf.constant([1.]) - with strategy_fn().scope(): - with policy.policy_scope(load_policy): - layer = mp_test_util.MultiplyLayer(assert_type=load_input_dtype) - layer(x) # Build layer - layer.set_weights([np.array(200.)]) - self.assertEqual(self.evaluate(layer(x)), 200.) - checkpoint = tf.train.Checkpoint(layer=layer) - checkpoint.restore(save_path).assert_consumed().run_restore_ops() - self.assertEqual(layer.get_weights(), [100.]) - self.assertEqual(self.evaluate(layer(x)), 100.) - - @parameterized.named_parameters(*TESTCASES) - def test_checkpointing_layer_weights(self, strategy_fn): - with self.test_session(): - self._test_checkpointing_layer_weights( - strategy_fn, mixed_prec_when_saving=True, - mixed_prec_when_loading=True) - self._test_checkpointing_layer_weights( - strategy_fn, mixed_prec_when_saving=True, - mixed_prec_when_loading=False) - self._test_checkpointing_layer_weights( - strategy_fn, mixed_prec_when_saving=False, - mixed_prec_when_loading=True) - - @parameterized.named_parameters(*TESTCASES) - def test_config(self, strategy_fn): - x = tf.constant([1.], dtype=tf.float16) - with strategy_fn().scope(): - for layer, dtype in ( - (mp_test_util.MultiplyLayer(), 'float32'), - (mp_test_util.MultiplyLayer(dtype='float64'), 'float64'), - (mp_test_util.MultiplyLayer(dtype=policy.Policy('float64')), - 'float64')): - config = layer.get_config() - self.assertEqual(config['dtype'], dtype) - self.assertIsInstance(config['dtype'], str) - layer = mp_test_util.MultiplyLayer.from_config(config) - self.assertEqual(layer.dtype, dtype) - self.assertEqual(layer(x).dtype, dtype) - self.assertEqual(layer.v.dtype, dtype) - - layer = mp_test_util.MultiplyLayer(dtype='mixed_float16') - config = layer.get_config() - self.assertEqual(config['dtype'], - {'class_name': 'Policy', - 'config': {'name': 'mixed_float16'}}) - layer = mp_test_util.MultiplyLayer.from_config(config) - self.assertEqual(layer.dtype, 'float32') - self.assertEqual(layer(x).dtype, 'float16') - self.assertEqual(layer.v.dtype, 'float32') - config = layer.get_config() - self.assertEqual(config['dtype'], - {'class_name': 'Policy', - 'config': {'name': 'mixed_float16'}}) - - layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('_infer')) - config = layer.get_config() - self.assertIsNone(config['dtype']) - layer = mp_test_util.MultiplyLayer.from_config(config) - # If a layer is serialized with the "_infer" policy, when deserialized - # into TF 2 it will have the global policy instead of "_infer". This is - # because "_infer" is serialized into None, and passing dtype=None in - # TensorFlow 2 indicates to use the global policy. - self.assertEqual(layer.dtype, 'float32') - self.assertEqual(layer(x).dtype, 'float32') - self.assertEqual(layer.v.dtype, 'float32') - - @parameterized.named_parameters(*TESTCASES) - def test_from_config_policy_v1(self, strategy_fn): - # Test that layers serialized in previous Keras versions with the - # now-deleted PolicyV1 can be deserialized. In such cases, the PolicyV1 will - # be converted to a Policy, since PolicyV1 no longer exists. Unlike Policy, - # PolicyV1 had a "loss_scale" field, which is silently dropped when - # deserialized. - x = tf.constant([1.], dtype=tf.float16) - with strategy_fn().scope(): - - layer = mp_test_util.MultiplyLayer(dtype='mixed_float16') - config = layer.get_config() - # Change the serialized dtype policy to a PolicyV1 - config['dtype'] = {'class_name': 'PolicyV1', - 'config': {'name': 'mixed_float16', - 'loss_scale': None}} - layer = mp_test_util.MultiplyLayer.from_config(config) - self.assertEqual(layer.dtype, 'float32') - self.assertEqual(layer(x).dtype, 'float16') - self.assertEqual(layer.v.dtype, 'float32') - config = layer.get_config() - # The loss_scale is silently dropped - self.assertEqual(config['dtype'], - {'class_name': 'Policy', - 'config': {'name': 'mixed_float16'}}) - - layer = mp_test_util.MultiplyLayer(dtype='float64') - config = layer.get_config() - config['dtype'] = {'class_name': 'PolicyV1', - 'config': {'name': 'float64', - 'loss_scale': { - 'class_name': 'FixedLossScale', - 'config': {'loss_scale_value': 2.0}}}} - layer = mp_test_util.MultiplyLayer.from_config(config) - self.assertEqual(layer.dtype, 'float64') - self.assertEqual(layer(x).dtype, 'float64') - self.assertEqual(layer.v.dtype, 'float64') - config = layer.get_config() - self.assertEqual(config['dtype'], 'float64') - - layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('_infer')) - config = layer.get_config() - config['dtype'] = {'class_name': 'PolicyV1', - 'config': {'name': '_infer', - 'loss_scale': { - 'class_name': 'FixedLossScale', - 'config': {'loss_scale_value': 2.0}}}} - layer = mp_test_util.MultiplyLayer.from_config(config) - self.assertEqual(layer.dtype, None) - self.assertEqual(layer(x).dtype, 'float16') - self.assertEqual(layer.v.dtype, 'float16') - self.assertEqual(type(layer.dtype_policy), policy.Policy) - config = layer.get_config() - self.assertEqual(config['dtype'], 'float16') - - def test_delete_variable(self): - layer = base_layer.Layer(dtype='mixed_float16') - layer.x = layer.add_weight('x') - self.assertEqual(layer.trainable_weights, [layer.x]) - del layer.x - self.assertEqual(layer.trainable_weights, []) - - def test_build_and_call_layer_in_function(self): - layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('mixed_float16')) - @tf.function - def f(): - return layer(1.) - y = f() - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(y.dtype, 'float16') - self.assertEqual(layer.v.dtype, 'float32') - self.assertEqual(self.evaluate(y), 1.) - - def test_unsupported_strategy(self): - strategy = create_central_storage_strategy() - with strategy.scope(), self.assertRaisesRegex( - ValueError, 'Mixed precision is not supported with the ' - 'tf.distribute.Strategy: CentralStorageStrategy. Either ' - 'stop using mixed precision by removing the use of the ' - '"mixed_float16" policy or use a different Strategy, e.g. ' - 'a MirroredStrategy.'): - mp_test_util.MultiplyLayer(dtype='mixed_float16') - # Non-mixed policies are fine - mp_test_util.MultiplyLayer(dtype=policy.Policy('float64')) - - def test_input_spec_dtype(self): - # Test the InputSpec's dtype is compared against the inputs before the layer - # casts them, not after. - layer = mp_test_util.MultiplyLayer(dtype='float64') - layer.input_spec = input_spec.InputSpec(dtype='float16') - - # Test passing Eager tensors - x = tf.ones((2, 2), dtype='float16') - layer(x) - x = tf.ones((2, 2), dtype='float64') - with self.assertRaisesRegex( - ValueError, 'expected dtype=float16, found dtype=.*float64'): - layer(x) - - # Test passing symbolic tensors - x = layers.Input((2,), dtype='float16') - y = layer(x) - model = models.Model(x, y) - model(tf.ones((2, 2))) - - x = layers.Input((2,), dtype='float64') - with self.assertRaisesRegex( - ValueError, 'expected dtype=float16, found dtype=.*float64'): - # In TF2, the error is only raised when the model is run - y = layer(x) - model = models.Model(x, y) - model(tf.ones((2, 2))) - - -if __name__ == '__main__': - base_layer_utils.enable_v2_dtype_behavior() - tf.test.main() + model = models.Model(x, y) + model(tf.ones((2, 2))) + + +if __name__ == "__main__": + base_layer_utils.enable_v2_dtype_behavior() + tf.test.main() diff --git a/keras/mixed_precision/loss_scale_optimizer.py b/keras/mixed_precision/loss_scale_optimizer.py index dc35117eec13..4ea1b5d8d9c2 100644 --- a/keras/mixed_precision/loss_scale_optimizer.py +++ b/keras/mixed_precision/loss_scale_optimizer.py @@ -14,77 +14,88 @@ # ============================================================================== """Contains the loss scaling optimizer class.""" +import tensorflow.compat.v2 as tf + from keras import backend from keras import optimizers -from keras.optimizers.optimizer_experimental import optimizer as optimizer_experimental -from keras.optimizers.optimizer_v2 import optimizer_v2 -from keras.optimizers.optimizer_v2 import utils as optimizer_utils -from keras.utils import generic_utils - -import tensorflow.compat.v2 as tf +from keras.dtensor import utils as dtensor_utils +from keras.optimizers import optimizer +from keras.optimizers import utils as optimizer_utils +from keras.optimizers.legacy import optimizer_v2 +from keras.saving import serialization_lib -from tensorflow.python.keras.optimizer_v2 import optimizer_v2 as legacy_optimizer +# isort: off from tensorflow.python.platform import tf_logging from tensorflow.python.util.tf_export import keras_export class _UnwrapPreventer: - """Wrapper that DistributionStrategy will not unwrap. + """Wrapper that DistributionStrategy will not unwrap. - Typically, DistributionStrategy will unwrap values when going from a cross- - replica context to a replica context via `call_for_each_replica`. This class - is a wrapper that DistributionStrategy will not unwrap, so it can be used to - prevent it from unwrapping a value. + Typically, DistributionStrategy will unwrap values when going from a cross- + replica context to a replica context via `call_for_each_replica`. This class + is a wrapper that DistributionStrategy will not unwrap, so it can be used to + prevent it from unwrapping a value. - TODO(reedwm): Find/implement a better way of preventing values from being - unwrapped by DistributionStrategy - """ + TODO(reedwm): Find/implement a better way of preventing values from being + unwrapped by DistributionStrategy + """ - __slots__ = ['value'] + __slots__ = ["value"] - def __init__(self, value): - self.value = value + def __init__(self, value): + self.value = value def _is_all_finite(grads): - """Returns a scalar boolean tensor indicating if all gradients are finite.""" - is_finite_per_grad = [ - tf.reduce_all(tf.math.is_finite(g)) for g in grads if g is not None - ] - return tf.reduce_all(is_finite_per_grad) + """Returns a scalar boolean tensor indicating if all gradients are + finite.""" + + def raw_values(g): + return g.values if isinstance(g, tf.IndexedSlices) else g + + is_finite_per_grad = [ + tf.reduce_all(tf.math.is_finite(raw_values(g))) + for g in grads + if g is not None + ] + return tf.reduce_all(is_finite_per_grad) def _op_in_graph_mode(tensor): - """Returns the tensor's op in graph mode, or the tensor in eager mode. + """Returns the tensor's op in graph mode, or the tensor in eager mode. - This is useful because sometimes an op is needed in graph mode instead of a - tensor. In eager mode, there are no ops. + This is useful because sometimes an op is needed in graph mode instead of a + tensor. In eager mode, there are no ops. - Args: - tensor: A tensor. + Args: + tensor: A tensor. - Returns: - The tensor's op in graph mode. The tensor in eager mode. - """ - if tf.executing_eagerly(): - return tensor - return tensor.op + Returns: + The tensor's op in graph mode. The tensor in eager mode. + """ + if tf.executing_eagerly(): + return tensor + return tensor.op def _assign_if_finite(var, value): - """Assigns a value to a variable if the value is finite.""" - return tf.cond( - tf.math.is_finite(value), lambda: _op_in_graph_mode(var.assign(value)), - tf.no_op) + """Assigns a value to a variable if the value is finite.""" + return tf.cond( + tf.math.is_finite(value), + lambda: _op_in_graph_mode(var.assign(value)), + tf.no_op, + ) -def _maybe_warn_about_scaling(loss_has_been_scaled, - gradients_have_been_unscaled): - """Warn if the loss or gradients hasn't been scaled or unscaled.""" - if loss_has_been_scaled and gradients_have_been_unscaled: - return +def _maybe_warn_about_scaling( + loss_has_been_scaled, gradients_have_been_unscaled +): + """Warn if the loss or gradients hasn't been scaled or unscaled.""" + if loss_has_been_scaled and gradients_have_been_unscaled: + return - example_code = """ + example_code = """ with tf.GradientTape() as tape: loss = loss_fn() scaled_loss = opt.get_scaled_loss(loss) @@ -92,1310 +103,1514 @@ def _maybe_warn_about_scaling(loss_has_been_scaled, grads = opt.get_unscaled_gradients(scaled_grads) opt.apply_gradients([(grads, var)])""" - if not loss_has_been_scaled and not gradients_have_been_unscaled: - tf_logging.warning( - 'You forgot to call LossScaleOptimizer.get_scaled_loss() and ' - 'LossScaleOptimizer.get_unscaled_gradients() before calling ' - 'LossScaleOptimizer.apply_gradients(). This will likely result in ' - 'worse model quality, so please call them in the correct places! For ' - f'example:{example_code}\nFor more information, see ' - 'https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/LossScaleOptimizer' - ) - elif not loss_has_been_scaled: - tf_logging.warning( - 'You forgot to call LossScaleOptimizer.get_scaled_loss() before ' - 'calling LossScaleOptimizer.apply_gradients() (you did call ' - 'get_unscaled_gradients() however). This will likely result in worse ' - 'model quality, so please call get_scaled_loss() in the correct place! ' - f'For example:{example_code}\nFor more information, see ' - 'https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/LossScaleOptimizer' - ) - elif not gradients_have_been_unscaled: - tf_logging.warning( - 'You forgot to call LossScaleOptimizer.get_unscaled_gradients() ' - 'before calling LossScaleOptimizer.apply_gradients() (you did call ' - 'get_scaled_loss() however). This will likely result in worse ' - 'model quality, so please call get_unscaled_gradients() in the correct ' - f'place! For example:{example_code}\nFor more information, see ' - 'https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/LossScaleOptimizer' - ) + if not loss_has_been_scaled and not gradients_have_been_unscaled: + tf_logging.warning( + "You forgot to call LossScaleOptimizer.get_scaled_loss() and " + "LossScaleOptimizer.get_unscaled_gradients() before calling " + "LossScaleOptimizer.apply_gradients(). This will likely result in " + "worse model quality, so please call them in the correct places! " + f"For example:{example_code}\nFor more information, see " + "https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/LossScaleOptimizer" # noqa: E501 + ) + elif not loss_has_been_scaled: + tf_logging.warning( + "You forgot to call LossScaleOptimizer.get_scaled_loss() before " + "calling LossScaleOptimizer.apply_gradients() (you did call " + "get_unscaled_gradients() however). This will likely result in " + "worse model quality, so please call get_scaled_loss() in the " + f"correct place! For example:{example_code}\nFor more information, " + "see " + "https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/LossScaleOptimizer" # noqa: E501 + ) + elif not gradients_have_been_unscaled: + tf_logging.warning( + "You forgot to call LossScaleOptimizer.get_unscaled_gradients() " + "before calling LossScaleOptimizer.apply_gradients() (you did call " + "get_scaled_loss() however). This will likely result in worse " + "model quality, so please call get_unscaled_gradients() in the " + f"correct place! For example:{example_code}\nFor more information, " + "see " + "https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/LossScaleOptimizer" # noqa: E501 + ) class _DynamicLossScaleState(tf.__internal__.tracking.Trackable): - """The state of a dynamic loss scale.""" - - def __init__(self, - initial_loss_scale, - growth_steps, - multiplier): - """Creates the dynamic loss scale.""" - super().__init__() - self._initial_loss_scale = float(initial_loss_scale) - self._growth_steps = int(growth_steps) - self._multiplier = float(multiplier) - - self._weights = {} - self._current_loss_scale = self._add_weight( - name='current_loss_scale', - dtype=tf.float32, - initial_value=self._initial_loss_scale) - # The number of consecutive steps with finite gradients since the last - # nonfinite gradient or change in loss scale. The name is 'good_steps' for - # backwards compatibility with older checkpoints. - self._counter = self._add_weight( - name='good_steps', dtype=tf.int64, initial_value=0) - - def _add_weight(self, name, initial_value, dtype=None): - """Adds a weight to this loss scale. - - Args: - name: Variable name. - initial_value: The variable's initial value. - dtype: The type of the variable. - - Returns: - A variable. - - Raises: - RuntimeError: If a weight with `name` has already been added. - """ - variable = tf.Variable( - initial_value=initial_value, - name=name, - dtype=dtype, - trainable=False, - synchronization=tf.VariableSynchronization.AUTO, - # Set aggregation to NONE, as loss scaling variables should never be - # aggregated. - aggregation=tf.VariableAggregation.NONE) - if tf.executing_eagerly(): - graph_key = None - else: - graph = tf.compat.v1.get_default_graph() - graph_key = graph._graph_key # pylint: disable=protected-access - - key = (name, graph_key) - self._weights[key] = variable - self._handle_deferred_dependencies(name=name, trackable=variable) - backend.track_variable(variable) - return variable - - def _trackable_children(self, save_type='checkpoint', **kwargs): - """From Trackable. Gather graph-specific weights to save.""" - if tf.executing_eagerly(): - graph_key = None - else: - graph = tf.compat.v1.get_default_graph() - graph_key = graph._graph_key # pylint: disable=protected-access - weights = {} - for (name, g), v in sorted(self._weights.items(), key=lambda i: i[0][0]): - if g == graph_key: - weights[name] = v - weights.update( - super()._trackable_children(save_type, **kwargs)) - return weights - - def _lookup_dependency(self, name): - """From Trackable. Find a weight in the current graph.""" - unconditional = super()._lookup_dependency(name) - if unconditional is not None: - return unconditional - if tf.executing_eagerly(): - graph_key = None - else: - graph = tf.compat.v1.get_default_graph() - graph_key = graph._graph_key # pylint: disable=protected-access - return self._weights.get((name, graph_key), None) - - @property - def initial_loss_scale(self): - return self._initial_loss_scale - - @property - def growth_steps(self): - return self._growth_steps - - @property - def multiplier(self): - return self._multiplier - - @property - def current_loss_scale(self): - """Returns the current loss scale as a float32 `tf.Variable`.""" - return self._current_loss_scale - - @property - def counter(self): - """Returns the counter as a float32 `tf.Variable`.""" - return self._counter - - def __call__(self): - """Returns the current loss scale as a scalar `float32` tensor.""" - return tf.convert_to_tensor(self._current_loss_scale) - - def update(self, grads): - """Updates the value of the loss scale. - - Args: - grads: A nested structure of unscaled gradients, each which is an - all-reduced gradient of the loss with respect to a weight. - - Returns: - update_op: In eager mode, None. In graph mode, an op to update the loss - scale. - should_apply_gradients: Either a bool or a scalar boolean tensor. If - False, the caller should skip applying `grads` to the variables this - step. - """ - grads = tf.nest.flatten(grads) - if tf.distribute.has_strategy( - ) and tf.distribute.in_cross_replica_context(): - distribution = tf.distribute.get_strategy() - is_finite_per_replica = distribution.extended.call_for_each_replica( - _is_all_finite, args=(grads,)) - # Each replica computed the same `is_finite` value, since `grads` is - # all-reduced across replicas. Arbitrarily take `is_finite` from the first - # replica. - is_finite = ( - distribution.experimental_local_results(is_finite_per_replica)[0]) - else: - is_finite = _is_all_finite(grads) - - def update_if_finite_grads(): - """Update assuming the gradients are finite.""" - - def incr_loss_scale(): - new_loss_scale = self.current_loss_scale * self.multiplier - return tf.group( - _assign_if_finite(self.current_loss_scale, new_loss_scale), - self.counter.assign(0)) - - return tf.cond( - self.counter + 1 >= self.growth_steps, - incr_loss_scale, - lambda: _op_in_graph_mode(self.counter.assign_add(1))) - - def update_if_not_finite_grads(): - """Update assuming the gradients are nonfinite.""" - - new_loss_scale = tf.maximum( - self.current_loss_scale / self.multiplier, 1) - return tf.group( - self.counter.assign(0), - self.current_loss_scale.assign(new_loss_scale)) - - update_op = tf.cond(is_finite, - update_if_finite_grads, - update_if_not_finite_grads) - should_apply_gradients = is_finite - return update_op, should_apply_gradients + """The state of a dynamic loss scale.""" + + def __init__(self, initial_loss_scale, growth_steps, multiplier): + """Creates the dynamic loss scale.""" + super().__init__() + self._initial_loss_scale = float(initial_loss_scale) + self._growth_steps = int(growth_steps) + self._multiplier = float(multiplier) + + self._weights = {} + self._current_loss_scale = self._add_weight( + name="current_loss_scale", + dtype=tf.float32, + initial_value=self._initial_loss_scale, + ) + # The number of consecutive steps with finite gradients since the last + # nonfinite gradient or change in loss scale. The name is 'good_steps' + # for backwards compatibility with older checkpoints. + self._counter = self._add_weight( + name="good_steps", dtype=tf.int64, initial_value=0 + ) + + def _add_weight(self, name, initial_value, dtype=None): + """Adds a weight to this loss scale. + + Args: + name: Variable name. + initial_value: The variable's initial value. + dtype: The type of the variable. + + Returns: + A variable. + + Raises: + RuntimeError: If a weight with `name` has already been added. + """ + variable = tf.Variable( + initial_value=initial_value, + name=name, + dtype=dtype, + trainable=False, + synchronization=tf.VariableSynchronization.AUTO, + # Set aggregation to NONE, as loss scaling variables should never be + # aggregated. + aggregation=tf.VariableAggregation.NONE, + ) + if tf.executing_eagerly(): + graph_key = None + else: + graph = tf.compat.v1.get_default_graph() + graph_key = graph._graph_key + + key = (name, graph_key) + self._weights[key] = variable + self._handle_deferred_dependencies(name=name, trackable=variable) + backend.track_variable(variable) + return variable + + def _trackable_children(self, save_type="checkpoint", **kwargs): + """From Trackable. Gather graph-specific weights to save.""" + if tf.executing_eagerly(): + graph_key = None + else: + graph = tf.compat.v1.get_default_graph() + graph_key = graph._graph_key + weights = {} + for (name, g), v in sorted( + self._weights.items(), key=lambda i: i[0][0] + ): + if g == graph_key: + weights[name] = v + weights.update(super()._trackable_children(save_type, **kwargs)) + return weights + + def _lookup_dependency(self, name, cached_dependencies=None): + """From Trackable. Find a weight in the current graph.""" + if cached_dependencies is not None: + unconditional = cached_dependencies.get(name) + else: + unconditional = super()._lookup_dependency(name) + if unconditional is not None: + return unconditional + if tf.executing_eagerly(): + graph_key = None + else: + graph = tf.compat.v1.get_default_graph() + graph_key = graph._graph_key + return self._weights.get((name, graph_key), None) + + @property + def initial_loss_scale(self): + return self._initial_loss_scale + + @property + def growth_steps(self): + return self._growth_steps + + @property + def multiplier(self): + return self._multiplier + + @property + def current_loss_scale(self): + """Returns the current loss scale as a float32 `tf.Variable`.""" + return self._current_loss_scale + + @property + def counter(self): + """Returns the counter as a float32 `tf.Variable`.""" + return self._counter + + def __call__(self): + """Returns the current loss scale as a scalar `float32` tensor.""" + return tf.convert_to_tensor(self._current_loss_scale) + + def update(self, grads): + """Updates the value of the loss scale. + + Args: + grads: A nested structure of unscaled gradients, each which is an + all-reduced gradient of the loss with respect to a weight. + + Returns: + update_op: In eager mode, None. In graph mode, an op to update the + loss scale. + should_apply_gradients: Either a bool or a scalar boolean tensor. If + False, the caller should skip applying `grads` to the variables this + step. + """ + grads = tf.nest.flatten(grads) + if ( + tf.distribute.has_strategy() + and tf.distribute.in_cross_replica_context() + ): + distribution = tf.distribute.get_strategy() + is_finite_per_replica = distribution.extended.call_for_each_replica( + _is_all_finite, args=(grads,) + ) + # Each replica computed the same `is_finite` value, since `grads` is + # all-reduced across replicas. Arbitrarily take `is_finite` from the + # first replica. + is_finite = distribution.experimental_local_results( + is_finite_per_replica + )[0] + else: + is_finite = _is_all_finite(grads) + + def update_if_finite_grads(): + """Update assuming the gradients are finite.""" + + def incr_loss_scale(): + new_loss_scale = self.current_loss_scale * self.multiplier + return tf.group( + _assign_if_finite(self.current_loss_scale, new_loss_scale), + self.counter.assign(0), + ) + + return tf.cond( + self.counter + 1 >= self.growth_steps, + incr_loss_scale, + lambda: _op_in_graph_mode(self.counter.assign_add(1)), + ) + + def update_if_not_finite_grads(): + """Update assuming the gradients are nonfinite.""" + + new_loss_scale = tf.maximum( + self.current_loss_scale / self.multiplier, 1 + ) + return tf.group( + self.counter.assign(0), + self.current_loss_scale.assign(new_loss_scale), + ) + + update_op = tf.cond( + is_finite, update_if_finite_grads, update_if_not_finite_grads + ) + should_apply_gradients = is_finite + return update_op, should_apply_gradients # See LossScaleOptimizer docstring for why this is so big -_DEFAULT_INITIAL_SCALE = 2 ** 15 +_DEFAULT_INITIAL_SCALE = 2**15 _DEFAULT_GROWTH_STEPS = 2000 # TODO(b/215389169): Delete this class after `OptimizerV2` is deprecated. class LossScaleOptimizerMetaclass(type): - """Metaclass that delegates LossScaleOptimizer instance creation. - - This metaclass causes a LossScaleOptimizer or LossScaleOptimizerV3 to be - created when a BaseLossScaleOptimizer is constructed. As a result, when a - user creates a loss scale optimizer with - `tf.keras.mixed_precision.LossScaleOptimizer(opt)`, either a - LossScaleOptimizer or LossScaleOptimizerV3 will be created, depending on the - type of `opt`. - """ - - def __call__(cls, inner_optimizer, *args, **kwargs): - if cls is not BaseLossScaleOptimizer: - return super(LossScaleOptimizerMetaclass, - cls).__call__(inner_optimizer, *args, **kwargs) - if isinstance(inner_optimizer, optimizer_v2.OptimizerV2): - return LossScaleOptimizer(inner_optimizer, *args, **kwargs) - elif isinstance(inner_optimizer, optimizer_experimental.Optimizer): - return LossScaleOptimizerV3(inner_optimizer, *args, **kwargs) - - # Raise TypeError because inner_optimizer is not an optimizer - msg = (f'"inner_optimizer" must be an instance of ' - f'`tf.keras.optimizers.Optimizer` or ' - f'`tf.keras.optimizers.experimental.Optimizer`, but got: ' - f'{inner_optimizer}.') - if isinstance(inner_optimizer, legacy_optimizer.OptimizerV2): - msg += (' Please make sure "inner_optimizer" is not an instance of ' - '`tensorflow.python.keras.optimizers`, which is ' - 'the legacy keras code and will be removed in future release. ' - 'Please use the tf.keras public API instead.') - raise TypeError(msg) - - -# TODO(b/215389169): Delete this class after `OptimizerV2` is deprecated. -# pylint: disable=g-classes-have-attributes -@keras_export('keras.mixed_precision.LossScaleOptimizer') -class BaseLossScaleOptimizer(metaclass=LossScaleOptimizerMetaclass): - """An optimizer that applies loss scaling to prevent numeric underflow. - - Loss scaling is a technique to prevent numeric underflow in intermediate - gradients when float16 is used. To prevent underflow, the loss is multiplied - (or "scaled") by a certain factor called the "loss scale", which causes - intermediate gradients to be scaled by the loss scale as well. The final - gradients are divided (or "unscaled") by the loss scale to bring them back to - their original value. - - `LossScaleOptimizer` wraps another optimizer and applies loss scaling to it. - By default, the loss scale is dynamically updated over time so you do not have - to choose the loss scale. The `minimize` method automatically scales the loss, - unscales the gradients, and updates the loss scale so all you have to do is - wrap your optimizer with a `LossScaleOptimizer` if you use `minimize`. For - example: - - >>> opt = tf.keras.optimizers.SGD(0.25) - >>> opt = tf.keras.mixed_precision.LossScaleOptimizer(opt) - >>> var = tf.Variable(1.) - >>> loss_fn = lambda: var ** 2 - >>> # 'minimize' applies loss scaling and updates the loss sale. - >>> opt.minimize(loss_fn, var_list=var) - >>> var.numpy() - 0.5 - - If a `tf.GradientTape` is used to compute gradients instead of `minimize`, you - must scale the loss and gradients manually. This can be done with the - `LossScaleOptimizer.get_scaled_loss` and - `LossScaleOptimizer.get_unscaled_gradients` methods. For example: - - >>> with tf.GradientTape() as tape: - ... loss = loss_fn() - ... scaled_loss = opt.get_scaled_loss(loss) - >>> scaled_grad = tape.gradient(scaled_loss, var) - >>> (grad,) = opt.get_unscaled_gradients([scaled_grad]) - >>> opt.apply_gradients([(grad, var)]) # Loss scale is updated here - >>> var.numpy() - 0.25 - - Warning: If you forget to call `get_scaled_loss` or `get_unscaled_gradients` - (or both) when using a `tf.GradientTape`, the model will likely converge to a - worse quality. Please make sure you call each function exactly once. - - When mixed precision with float16 is used, there is typically no risk of - underflow affecting model quality if loss scaling is properly used. See - [the mixed precision guide]( - https://www.tensorflow.org/guide/keras/mixed_precision) for more information - on how to use mixed precision. - - Args: - inner_optimizer: The `tf.keras.optimizers.Optimizer` or - `tf.keras.optimizers.experimental.Optimizer` instance to wrap. - dynamic: Bool indicating whether dynamic loss scaling is used. Defaults to - True. If True, the loss scale will be dynamically updated over time using - an algorithm that keeps the loss scale at approximately its optimal value. - If False, a single fixed loss scale is used and `initial_scale` must be - specified, which is used as the loss scale. Recommended to keep as True, - as choosing a fixed loss scale can be tricky. Currently, there is a small - performance overhead to dynamic loss scaling compared to fixed loss - scaling. - initial_scale: The initial loss scale. If `dynamic` is True, this defaults - to `2 ** 15`. If `dynamic` is False, this must be specified and acts as - the sole loss scale, as the loss scale does not change over time. When - dynamic loss scaling is used, is better for this to be a very high number, - because a loss scale that is too high gets lowered far more quickly than a - loss scale that is too low gets raised. - dynamic_growth_steps: With dynamic loss scaling, every - `dynamic_growth_steps` steps with finite gradients, the loss scale is - doubled. Defaults to 2000. If a nonfinite gradient is encountered, the - count is reset back to zero, gradients are skipped that step, and the loss - scale is halved. The count can be queried with - `LossScaleOptimizer.dynamic_counter`. This argument can only be specified - if `dynamic` is True. - - `LossScaleOptimizer` will occasionally skip applying gradients to the - variables, in which case the trainable variables will not change that step. - This is done because the dynamic loss scale will sometimes be raised too - high, causing overflow in the gradients. Typically, the first 2 to 15 steps of - the model are skipped as the initial loss scale is very high, but afterwards - steps will only be skipped on average 0.05% of the time (the fraction of steps - skipped is `1 / dynamic_growth_steps`). - - `LossScaleOptimizer` delegates all public `Optimizer` methods to the inner - optimizer. Additionally, in methods `minimize` and `get_gradients`, it scales - the loss and unscales the gradients. In methods `minimize` and - `apply_gradients`, it additionally updates the loss scale and skips applying - gradients if any gradient has a nonfinite value. - - ### Hyperparameters - - If wrapping a `tf.keras.optimizers.Optimizer`, hyperparameters can be accessed - and set on the LossScaleOptimizer, which will be delegated to the wrapped - optimizer. - - >>> opt = tf.keras.optimizers.Adam(beta_1=0.8, epsilon=1e-5) - >>> opt = tf.keras.mixed_precision.LossScaleOptimizer(opt) - >>> opt.beta_1 # Equivalent to `opt.inner_optimizer.beta_1` - 0.8 - >>> opt.beta_1 = 0.7 # Equivalent to `opt.inner_optimizer.beta_1 = 0.7` - >>> opt.beta_1 - 0.7 - >>> opt.inner_optimizer.beta_1 - 0.7 - - However, accessing or setting non-hyperparameters is not delegated to the - LossScaleOptimizer. In an Adam optimizer, `beta_1` is a hyperparameter but - `epsilon` is not, as the Adam optimizer only calls `Optimizer._set_hyper` on - `beta_1`. - - >>> opt.inner_optimizer.epsilon - 1e-5 - >>> opt.epsilon - Traceback (most recent call last): - ... - AttributeError: 'LossScaleOptimizer' object has no attribute 'epsilon' - >>> opt.epsilon = 1e-4 # This does NOT set epsilon on `opt.inner_optimizer` - >>> opt.inner_optimizer.epsilon - >>> 1e-5 - - In the above example, despite epsilon being set on the LossScaleOptimizer, the - old epsilon value will still be used when training as epsilon was not set on - the inner optimizer. - """ - - @property - def dynamic(self): - """Bool indicating whether dynamic loss scaling is used.""" - raise NotImplementedError - - @property - def loss_scale(self): - """The current loss scale as a float32 scalar tensor.""" - raise NotImplementedError - - @property - def dynamic_counter(self): - """The number of steps since the loss scale was last increased or decreased. - - This is None if `LossScaleOptimizer.dynamic` is False. - - The counter is incremented every step. Once it reaches - `LossScaleOptimizer.dynamic_growth_steps`, the loss scale will be doubled - and the counter will be reset back to zero. If nonfinite gradients are - encountered, the loss scale will be halved and the counter will be reset - back to zero. + """Metaclass that delegates LossScaleOptimizer instance creation. + + This metaclass causes a LossScaleOptimizer or LossScaleOptimizerV3 to be + created when a BaseLossScaleOptimizer is constructed. As a result, when a + user creates a loss scale optimizer with + `tf.keras.mixed_precision.LossScaleOptimizer(opt)`, either a + LossScaleOptimizer or LossScaleOptimizerV3 will be created, depending on the + type of `opt`. """ - raise NotImplementedError - @property - def initial_scale(self): - """The initial loss scale. - - If `LossScaleOptimizer.dynamic` is False, this is the same number as - `LossScaleOptimizer.loss_scale`, as the loss scale never changes. - """ - raise NotImplementedError + def __call__(cls, inner_optimizer, *args, **kwargs): + if cls is not BaseLossScaleOptimizer: + return super(LossScaleOptimizerMetaclass, cls).__call__( + inner_optimizer, *args, **kwargs + ) + if isinstance(inner_optimizer, optimizer_v2.OptimizerV2): + return LossScaleOptimizer(inner_optimizer, *args, **kwargs) + elif isinstance(inner_optimizer, optimizer.Optimizer): + return LossScaleOptimizerV3(inner_optimizer, *args, **kwargs) + + # Raise TypeError because inner_optimizer is not an optimizer + msg = ( + '"inner_optimizer" must be an instance of ' + "`tf.keras.optimizers.Optimizer` or " + "`tf.keras.optimizers.experimental.Optimizer`, but got: " + f"{inner_optimizer}." + ) + raise TypeError(msg) - @property - def dynamic_growth_steps(self): - """The number of steps it takes to increase the loss scale. - This is None if `LossScaleOptimizer.dynamic` is False. - - Every `dynamic_growth_steps` consecutive steps with finite gradients, the - loss scale is increased. - """ - raise NotImplementedError - - @property - def inner_optimizer(self): - """The optimizer that this LossScaleOptimizer is wrapping.""" - raise NotImplementedError - - def get_scaled_loss(self, loss): - """Scales the loss by the loss scale. +# TODO(b/215389169): Delete this class after `OptimizerV2` is deprecated. - This method is only needed if you compute gradients manually, e.g. with - `tf.GradientTape`. In that case, call this method to scale the loss before - passing the loss to `tf.GradientTape`. If you use - `LossScaleOptimizer.minimize` or `LossScaleOptimizer.get_gradients`, loss - scaling is automatically applied and this method is unneeded. - If this method is called, `get_unscaled_gradients` should also be called. - See the `tf.keras.mixed_precision.LossScaleOptimizer` doc for - an example. +@keras_export("keras.mixed_precision.LossScaleOptimizer") +class BaseLossScaleOptimizer(metaclass=LossScaleOptimizerMetaclass): + """An optimizer that applies loss scaling to prevent numeric underflow. + + Loss scaling is a technique to prevent numeric underflow in intermediate + gradients when float16 is used. To prevent underflow, the loss is multiplied + (or "scaled") by a certain factor called the "loss scale", which causes + intermediate gradients to be scaled by the loss scale as well. The final + gradients are divided (or "unscaled") by the loss scale to bring them back + to their original value. + + `LossScaleOptimizer` wraps another optimizer and applies loss scaling to it. + By default, the loss scale is dynamically updated over time so you do not + have to choose the loss scale. The `minimize` method automatically scales + the loss, unscales the gradients, and updates the loss scale so all you have + to do is wrap your optimizer with a `LossScaleOptimizer` if you use + `minimize`. For example: + + >>> opt = tf.keras.optimizers.experimental.SGD(0.25) + >>> opt = tf.keras.mixed_precision.LossScaleOptimizer(opt) + >>> var = tf.Variable(1.) + >>> loss_fn = lambda: var ** 2 + >>> # 'minimize' applies loss scaling and updates the loss sale. + >>> opt.minimize(loss_fn, var_list=[var]) + >>> var.numpy() + 0.5 + + If a `tf.GradientTape` is used to compute gradients instead of `minimize`, + you must scale the loss and gradients manually. This can be done with the + `LossScaleOptimizer.get_scaled_loss` and + `LossScaleOptimizer.get_unscaled_gradients` methods. For example: + + >>> with tf.GradientTape() as tape: + ... loss = loss_fn() + ... scaled_loss = opt.get_scaled_loss(loss) + >>> scaled_grad = tape.gradient(scaled_loss, var) + >>> (grad,) = opt.get_unscaled_gradients([scaled_grad]) + >>> opt.apply_gradients([(grad, var)]) # Loss scale is updated here + >>> var.numpy() + 0.25 + + Warning: If you forget to call `get_scaled_loss` or `get_unscaled_gradients` + (or both) when using a `tf.GradientTape`, the model will likely converge to + a worse quality. Please make sure you call each function exactly once. + + When mixed precision with float16 is used, there is typically no risk of + underflow affecting model quality if loss scaling is properly used. See + [the mixed precision guide]( + https://www.tensorflow.org/guide/keras/mixed_precision) for more information + on how to use mixed precision. Args: - loss: The loss, which will be multiplied by the loss scale. Can either be - a tensor or a callable returning a tensor. - - Returns: - `loss` multiplied by `LossScaleOptimizer.loss_scale`. + inner_optimizer: The `tf.keras.optimizers.Optimizer` or + `tf.keras.optimizers.experimental.Optimizer` instance to wrap. + dynamic: Bool indicating whether dynamic loss scaling is used. If `True`, + the loss scale will be dynamically updated over time using an algorithm + that keeps the loss scale at approximately its optimal value. If False, + a single fixed loss scale is used and `initial_scale` must be + specified, which is used as the loss scale. + Recommended to keep as True, as choosing a fixed loss scale can be + tricky. Currently, there is a small performance overhead to dynamic loss + scaling compared to fixed loss scaling. Defaults to `True`. + initial_scale: The initial loss scale. If `dynamic` is True, this defaults + to `2 ** 15`. If `dynamic` is False, this must be specified and acts as + the sole loss scale, as the loss scale does not change over time. When + dynamic loss scaling is used, is better for this to be a very high + number, because a loss scale that is too high gets lowered far more + quickly than a loss scale that is too low gets raised. + dynamic_growth_steps: With dynamic loss scaling, every + `dynamic_growth_steps` steps with finite gradients, the loss scale is + doubled. If a nonfinite gradient is encountered, the + count is reset back to zero, gradients are skipped that step, and the + loss scale is halved. The count can be queried with + `LossScaleOptimizer.dynamic_counter`. This argument can only be + specified if `dynamic` is True. Defaults to `2000`. + + `LossScaleOptimizer` will occasionally skip applying gradients to the + variables, in which case the trainable variables will not change that step. + This is done because the dynamic loss scale will sometimes be raised too + high, causing overflow in the gradients. Typically, the first 2 to 15 steps + of the model are skipped as the initial loss scale is very high, but + afterwards steps will only be skipped on average 0.05% of the time (the + fraction of steps skipped is `1 / dynamic_growth_steps`). + + `LossScaleOptimizer` delegates all public `Optimizer` methods to the inner + optimizer. Additionally, in methods `minimize` and `get_gradients`, it + scales the loss and unscales the gradients. In methods `minimize` and + `apply_gradients`, it additionally updates the loss scale and skips applying + gradients if any gradient has a nonfinite value. + + ### Hyperparameters + + If wrapping a `tf.keras.optimizers.Optimizer`, hyperparameters can be + accessed and set on the LossScaleOptimizer, which will be delegated to the + wrapped optimizer. + + >>> opt = tf.keras.optimizers.legacy.Adam(beta_1=0.8, epsilon=1e-5) + >>> opt = tf.keras.mixed_precision.LossScaleOptimizer(opt) + >>> opt.beta_1 # Equivalent to `opt.inner_optimizer.beta_1` + 0.8 + >>> opt.beta_1 = 0.7 # Equivalent to `opt.inner_optimizer.beta_1 = 0.7` + >>> opt.beta_1 + 0.7 + >>> opt.inner_optimizer.beta_1 + 0.7 + + However, accessing or setting non-hyperparameters is not delegated to the + LossScaleOptimizer. In an Adam optimizer, `beta_1` is a hyperparameter but + `epsilon` is not, as the Adam optimizer only calls `Optimizer._set_hyper` on + `beta_1`. + + >>> opt.inner_optimizer.epsilon + 1e-5 + >>> opt.epsilon + Traceback (most recent call last): + ... + AttributeError: 'LossScaleOptimizer' object has no attribute 'epsilon' + >>> opt.epsilon = 1e-4 # This does NOT set epsilon on `opt.inner_optimizer` + >>> opt.inner_optimizer.epsilon + >>> 1e-5 + + In the above example, despite epsilon being set on the LossScaleOptimizer, + the old epsilon value will still be used when training as epsilon was not + set on the inner optimizer. """ - # Calls to this function would be delegated to `get_scaled_loss` - # of either `LossScaleOptimizer` or `LossScaleOptimizerV3`, depending on - # the type of `inner_optimizer`. - raise NotImplementedError - - def get_unscaled_gradients(self, grads): - """Unscales the gradients by the loss scale. - - This method is only needed if you compute gradients manually, e.g. with - `tf.GradientTape`. In that case, call this method to unscale the gradients - after computing them with `tf.GradientTape`. If you use - `LossScaleOptimizer.minimize` or `LossScaleOptimizer.get_gradients`, loss - scaling is automatically applied and this method is unneeded. - - If this method is called, `get_scaled_loss` should also be called. See - the `tf.keras.mixed_precision.LossScaleOptimizer` doc for an - example. - Args: - grads: A list of tensors, each which will be divided by the loss scale. - Can have None values, which are ignored. - - Returns: - A new list the same size as `grads`, where every non-None value in `grads` - is divided by `LossScaleOptimizer.loss_scale`. + @property + def dynamic(self): + """Bool indicating whether dynamic loss scaling is used.""" + raise NotImplementedError + + @property + def loss_scale(self): + """The current loss scale as a float32 scalar tensor.""" + raise NotImplementedError + + @property + def dynamic_counter(self): + """The number of steps since the loss scale was last increased or + decreased. + + This is None if `LossScaleOptimizer.dynamic` is False. + + The counter is incremented every step. Once it reaches + `LossScaleOptimizer.dynamic_growth_steps`, the loss scale will be + doubled and the counter will be reset back to zero. If nonfinite + gradients are encountered, the loss scale will be halved and the counter + will be reset back to zero. + """ + raise NotImplementedError + + @property + def initial_scale(self): + """The initial loss scale. + + If `LossScaleOptimizer.dynamic` is False, this is the same number as + `LossScaleOptimizer.loss_scale`, as the loss scale never changes. + """ + raise NotImplementedError + + @property + def dynamic_growth_steps(self): + """The number of steps it takes to increase the loss scale. + + This is None if `LossScaleOptimizer.dynamic` is False. + + Every `dynamic_growth_steps` consecutive steps with finite gradients, + the loss scale is increased. + """ + raise NotImplementedError + + @property + def inner_optimizer(self): + """The optimizer that this LossScaleOptimizer is wrapping.""" + raise NotImplementedError + + def get_scaled_loss(self, loss): + """Scales the loss by the loss scale. + + This method is only needed if you compute gradients manually, e.g. with + `tf.GradientTape`. In that case, call this method to scale the loss + before passing the loss to `tf.GradientTape`. If you use + `LossScaleOptimizer.minimize` or `LossScaleOptimizer.get_gradients`, + loss scaling is automatically applied and this method is unneeded. + + If this method is called, `get_unscaled_gradients` should also be + called. See the `tf.keras.mixed_precision.LossScaleOptimizer` doc for + an example. + + Args: + loss: The loss, which will be multiplied by the loss scale. Can either + be a tensor or a callable returning a tensor. + + Returns: + `loss` multiplied by `LossScaleOptimizer.loss_scale`. + """ + # Calls to this function would be delegated to `get_scaled_loss` + # of either `LossScaleOptimizer` or `LossScaleOptimizerV3`, depending on + # the type of `inner_optimizer`. + raise NotImplementedError + + def get_unscaled_gradients(self, grads): + """Unscales the gradients by the loss scale. + + This method is only needed if you compute gradients manually, e.g. with + `tf.GradientTape`. In that case, call this method to unscale the + gradients after computing them with `tf.GradientTape`. If you use + `LossScaleOptimizer.minimize` or `LossScaleOptimizer.get_gradients`, + loss scaling is automatically applied and this method is unneeded. + + If this method is called, `get_scaled_loss` should also be called. See + the `tf.keras.mixed_precision.LossScaleOptimizer` doc for an + example. + + Args: + grads: A list of tensors, each which will be divided by the loss + scale. Can have None values, which are ignored. + + Returns: + A new list the same size as `grads`, where every non-None value in + `grads` is divided by `LossScaleOptimizer.loss_scale`. + """ + # Calls to this function would be delegated to `get_unscaled_gradients` + # of either `LossScaleOptimizer` or `LossScaleOptimizerV3`, depending on + # the type of `inner_optimizer`. + raise NotImplementedError + + +class LossScaleOptimizer( + tf.__internal__.tracking.DelegatingTrackableMixin, + optimizer_v2.OptimizerV2, + BaseLossScaleOptimizer, +): + """An optimizer that applies loss scaling to prevent numeric underflow.""" + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + inner_optimizer, + dynamic=True, + initial_scale=None, + dynamic_growth_steps=None, + ): + if not isinstance(inner_optimizer, optimizer_v2.OptimizerV2): + if isinstance(inner_optimizer, optimizer.Optimizer): + # Give better error message if the new experimental optimizer is + # passed. + raise TypeError( + "You passed an instance of the new experimental " + "optimizer, `optimizer.Optimizer`, " + "to LossScaleOptimizer, but " + "only the classic optimizers subclassing from " + "`tf.keras.optimizers.Optimizer` can be passed. Please " + "use `loss_scale_optimizer.LossScaleOptimizerV3` " + "instead of " + "`tf.keras.mixed_precision.LossScaleOptimizer`, " + "as the former supports wrapping " + "instances of the new experimental optimizer. " + f"Got optimizer: {inner_optimizer}" + ) + msg = ( + '"inner_optimizer" must be an instance of ' + "`tf.keras.optimizers.Optimizer`, but got: %s. " + % inner_optimizer + ) + raise TypeError(msg) + if not isinstance(dynamic, bool): + # Catch errors if a user incorrectly passes a string or float to the + # second argument argument, as this was commonly done for the + # now-removed LossScaleOptimizerV1. + raise TypeError( + '"dynamic" argument to LossScaleOptimizer.__init__ must ' + "be a bool, but got: %r" % (dynamic,) + ) + if isinstance(inner_optimizer, LossScaleOptimizer): + raise TypeError( + "LossScaleOptimizer cannot wrap another " + "LossScaleOptimizer, but got: %s" % (inner_optimizer,) + ) + _raise_if_strategy_unsupported() + if getattr( + inner_optimizer, "_is_wrapped_by_loss_scale_optimizer", False + ): + # TODO(reedwm): Maybe support this. The difficulty is that LSO has + # the same checkpoint format as the inner optimizer, so multiple + # LSOs wrapping the same optimizer causes the checkpointing logic to + # become confused. + raise ValueError( + '"inner_optimizer" is already wrapped by a ' + "LossScaleOptimizer. An optimizer can only be wrapped " + "by a single LossScaleOptimizer" + ) + self._optimizer = inner_optimizer + self._optimizer._is_wrapped_by_loss_scale_optimizer = True + + # We don't call super().__init__, since we do not want to call + # OptimizerV2's constructor. + tf.__internal__.tracking.DelegatingTrackableMixin.__init__( + self, self._optimizer + ) + + if dynamic: + if initial_scale is None: + initial_scale = _DEFAULT_INITIAL_SCALE + if dynamic_growth_steps is None: + dynamic_growth_steps = _DEFAULT_GROWTH_STEPS + self._loss_scale = _DynamicLossScaleState( + initial_scale, dynamic_growth_steps, multiplier=2 + ) + self._track_trackable(self._loss_scale, "loss_scale") + else: + if initial_scale is None: + raise ValueError( + '"initial_scale" must be specified if "dynamic" is False' + ) + self._loss_scale = float(initial_scale) + if dynamic_growth_steps is not None: + raise ValueError( + '"dynamic_growth_steps" must be None if "dynamic" ' + "is False, but got: %s" % (dynamic_growth_steps,) + ) + + # Used to track whether get_scaled_loss() and get_unscaled_gradients() + # have been called + self._loss_has_been_scaled = False + self._gradients_have_been_unscaled = False + + # To support restoring TensorFlow 2.2 checkpoints. + self._track_trackable( + FakeOptimizerForRestoration(self._optimizer), "base_optimizer" + ) + + @property + def dynamic(self): + return isinstance(self._loss_scale, _DynamicLossScaleState) + + @property + def loss_scale(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return tf.convert_to_tensor(self._loss_scale.current_loss_scale) + else: + return tf.convert_to_tensor(self._loss_scale) + + @property + def dynamic_counter(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.counter + else: + return None + + @property + def initial_scale(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.initial_loss_scale + else: + return self._loss_scale + + @property + def dynamic_growth_steps(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.growth_steps + else: + return None + + @property + def inner_optimizer(self): + return self._optimizer + + def get_scaled_loss(self, loss): + self._loss_has_been_scaled = True + if callable(loss): + + def new_loss(): + loss_val = loss() + return loss_val * tf.cast(self.loss_scale, loss_val.dtype) + + return new_loss + else: + return loss * tf.cast(self.loss_scale, loss.dtype) + + def get_unscaled_gradients(self, grads): + self._gradients_have_been_unscaled = True + loss_scale_reciprocal = 1.0 / self.loss_scale + return [ + _multiply_gradient(g, loss_scale_reciprocal) + if g is not None + else None + for g in grads + ] + + def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): + tape = tf.GradientTape() if tape is None else tape + with tape: + loss = self.get_scaled_loss(loss) + grads_and_vars = self._optimizer._compute_gradients( + loss, var_list, grad_loss, tape=tape + ) + grads = [g for g, _ in grads_and_vars] + weights = [v for _, v in grads_and_vars] + unscaled_grads = self.get_unscaled_gradients(grads) + return list(zip(unscaled_grads, weights)) + + def get_gradients(self, loss, params): + loss = self.get_scaled_loss(loss) + grads = self._optimizer.get_gradients(loss, params) + return self.get_unscaled_gradients(grads) + + def _create_all_weights(self, var_list): + self._optimizer._create_all_weights(var_list) + + def apply_gradients( + self, grads_and_vars, name=None, experimental_aggregate_gradients=True + ): + if tf.distribute.in_cross_replica_context(): + raise ValueError( + "apply_gradients() must be called in a replica context." + ) + # We check for the strategy here despite already checking in the + # constructor as frequently the optimizer is created outside the + # strategy's scope. + _raise_if_strategy_unsupported() + _maybe_warn_about_scaling( + self._loss_has_been_scaled, self._gradients_have_been_unscaled + ) + + grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) + if experimental_aggregate_gradients: + # We must aggregate the gradients here instead of in + # self.optimizer.apply_gradients, so that any NaN or Inf gradients + # are propagated to each replica. If any replica has a NaN or Inf + # gradient, they must all have a NaN or Inf gradient so that they + # all skip the step. + grads_and_vars = self._optimizer._transform_unaggregated_gradients( + grads_and_vars + ) + grads_and_vars = self._optimizer._aggregate_gradients( + grads_and_vars + ) + + grads_and_vars = tuple(grads_and_vars) + grads = [g for g, _ in grads_and_vars] + # We do not want DistributionStrategy to unwrap any MirroredVariables in + # grads_and_vars, because even in a replica context, the wrapped + # optimizer expects mirrored variables. So we wrap the variables with an + # _UnwrapPreventer, preventing DistributionStrategy from unwrapping the + # MirroredVariables. + wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars]) + + def do_not_apply_fn(): + # Normally self._optimizer.iterations is incremented in + # self._optimizer.apply_gradients(). Since that is not called in + # this branch, we increment it here instead. + return self._optimizer.iterations.assign_add(1, read_value=False) + + def _if_should_apply_grads(grads): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.update(grads) + else: + return (tf.no_op(), True) + + if tf.__internal__.distribute.strategy_supports_no_merge_call(): + loss_scale_update_op, should_apply_grads = _if_should_apply_grads( + grads + ) + + def apply_fn(): + return self._apply_gradients(grads, wrapped_vars, name) + + maybe_apply_op = tf.__internal__.smart_cond.smart_cond( + should_apply_grads, apply_fn, do_not_apply_fn + ) + return tf.group(maybe_apply_op, loss_scale_update_op) + + else: + + def _apply_gradients_cross_replica( + distribution, grads, wrapped_vars, name + ): + ( + loss_scale_update_op, + should_apply_grads, + ) = _if_should_apply_grads(grads) + + def apply_fn(): + return distribution.extended.call_for_each_replica( + self._apply_gradients, args=(grads, wrapped_vars, name) + ) + + # Note: We must call this cond() in a cross-replica context. + # DistributionStrategy does not support having a cond in a + # replica context with a branch that calls `merge_call`, and + # self._optimizer.apply_gradients calls `merge_call`. + maybe_apply_op = tf.__internal__.smart_cond.smart_cond( + should_apply_grads, apply_fn, do_not_apply_fn + ) + return tf.group(maybe_apply_op, loss_scale_update_op) + + return tf.distribute.get_replica_context().merge_call( + _apply_gradients_cross_replica, args=(grads, wrapped_vars, name) + ) + + def _apply_gradients(self, grads, wrapped_vars, name): + # Pass experimental_aggregate_gradients=False since LossScaleOptimizer + # already aggregated the gradients. + # TODO(reedwm): This will raise a fairly cryptic error message if + # self._optimizer.apply_gradients does not take + # experimental_aggregate_gradients. + return self._optimizer.apply_gradients( + list(zip(grads, wrapped_vars.value)), + name=name, + experimental_aggregate_gradients=False, + ) + + def get_config(self): + serialized_optimizer = optimizers.serialize(self._optimizer) + return { + "inner_optimizer": serialized_optimizer, + "dynamic": self.dynamic, + "initial_scale": self.initial_scale, + "dynamic_growth_steps": self.dynamic_growth_steps, + } + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() # Make a copy, since we mutate config + if "loss_scale" in config: + # If loss_scale is in config, we assume we are deserializing a + # LossScaleOptimizer from TF 2.3 or below. We convert the config so + # it can be deserialized in the current LossScaleOptimizer. + loss_scale = serialization_lib.deserialize_keras_object( + config.pop("loss_scale"), + module_objects={ + "FixedLossScale": tf.compat.v1.mixed_precision.FixedLossScale, # noqa: E501 + "DynamicLossScale": tf.compat.v1.mixed_precision.DynamicLossScale, # noqa: E501 + }, + printable_module_name="loss scale", + ) + + if isinstance( + loss_scale, tf.compat.v1.mixed_precision.FixedLossScale + ): + config["dynamic"] = False + config["initial_scale"] = loss_scale._loss_scale_value + elif isinstance( + loss_scale, tf.compat.v1.mixed_precision.DynamicLossScale + ): + config["dynamic"] = True + config["initial_scale"] = loss_scale.initial_loss_scale + config["dynamic_growth_steps"] = loss_scale.increment_period + if loss_scale.multiplier != 2: + raise ValueError( + "Cannot deserialize LossScaleOptimizer with a " + "DynamicLossScale whose multiplier is not 2. Got " + "DynamicLossScale: %s" % (loss_scale,) + ) + else: + raise ValueError( + "Serialized LossScaleOptimizers with a LossScale that is " + "neither a FixedLossScale nor a DynamicLossScale can no " + "longer be deserialized" + ) + config["inner_optimizer"] = config.pop("optimizer") + if isinstance(config["inner_optimizer"], optimizer_v2.OptimizerV2): + inner_optimizer = config["inner_optimizer"] + else: + inner_optimizer = optimizers.deserialize( + config["inner_optimizer"], + custom_objects=custom_objects, + use_legacy_optimizer=True, + ) + del config["inner_optimizer"] + return cls(inner_optimizer, **config) + + # Delegations: We delegate most OptimizerV2 methods to the wrapped optimizer + # below. + + @property + def iterations(self): + return self._optimizer.iterations + + @iterations.setter + def iterations(self, variable): + self._optimizer.iterations = variable + + def get_slot_names(self): + return self._optimizer.get_slot_names() + + def variables(self): + return self._optimizer.variables() + + @property + def weights(self): + return self._optimizer.weights + + def get_weights(self): + return self._optimizer.get_weights() + + def set_weights(self, weights): + return self._optimizer.set_weights(weights) + + @property + def clipnorm(self): + return self._optimizer.clipnorm + + @clipnorm.setter + def clipnorm(self, val): + self._optimizer.clipnorm = val + + @property + def global_clipnorm(self): + return self._optimizer.global_clipnorm + + @global_clipnorm.setter + def global_clipnorm(self, val): + self._optimizer.global_clipnorm = val + + @property + def clipvalue(self): + return self._optimizer.clipvalue + + @clipvalue.setter + def clipvalue(self, val): + self._optimizer.clipvalue = val + + def _aggregate_gradients(self, grads_and_vars): + return self._optimizer._aggregate_gradients(grads_and_vars) + + def _restore_slot_variable(self, slot_name, variable, slot_variable): + return self._optimizer._restore_slot_variable( + slot_name, + variable, + slot_variable, + ) + + def _create_or_restore_slot_variable( + self, slot_variable_position, slot_name, variable + ): + return self._optimizer._create_or_restore_slot_variable( + slot_variable_position, slot_name, variable + ) + + def get_slot(self, var, slot_name): + return self._optimizer.get_slot(var, slot_name) + + def add_slot(self, var, slot_name, initializer="zeros"): + return self._optimizer.add_slot(var, slot_name, initializer) + + def __getattribute__(self, name): + try: + return object.__getattribute__(self, name) + except AttributeError as e: + if name == "_optimizer" or name == "_hyper": + # Avoid infinite recursion + raise e + + # Delegate hyperparameter accesses to inner optimizer. + if name == "lr": + name = "learning_rate" + if name in self._optimizer._hyper: + return self._optimizer._get_hyper(name) + raise e + + def __dir__(self): + result = set(super().__dir__()) + if "_optimizer" in result: + result |= self._optimizer._hyper.keys() + if "learning_rate" in self._optimizer._hyper.keys(): + result.add("lr") + return list(result) + + def __setattr__(self, name, value): + if name == "lr": + name = "learning_rate" + # Delegate setting hyperparameter to inner optimizer if the attribute + # does not exist on the LossScaleOptimizer + try: + # We cannot check for the 'iterations' attribute as it cannot be set + # after it is accessed. + if name != "iterations": + object.__getattribute__(self, name) + has_attribute = True + except AttributeError: + has_attribute = False + if ( + name != "_optimizer" + and name in self._optimizer._hyper + and not has_attribute + ): + self._optimizer._set_hyper(name, value) + else: + super().__setattr__(name, value) + + # Explicitly delegate learning_rate. Normally hyperparameters are delegated + # in __getattribute__, but if a hyperparameter is not in + # self._optimizer._hyper (e.g. because self._optimizer itself wraps another + # optimizer), then it won't be delegated. Since learning_rate is a very + # commonly accessed hyperparameter, we delegate it here. + @property + def learning_rate(self): + return self._optimizer.learning_rate + + @learning_rate.setter + def learning_rate(self, value): + self._optimizer.learning_rate = value + + @property + def lr(self): + return self._optimizer.learning_rate + + @lr.setter + def lr(self, value): + self._optimizer.lr = value + + # We do not override some OptimizerV2 methods. For each, we describe why we + # do not delegate them to self._optimizer: + # * get_updates: get_updates() calls get_gradients(). Since we override + # get_gradients(), we cannot delegate get_updates() to self._optimizer, + # otherwise the overridden get_gradients() method would not be called. + # Luckily, get_updates() does not access any OptimizerV2 fields, so + # inheriting the OptimizerV2 version works fine. + # * minimize: We don't delegate for a similar as get_updates(): it calls + # both self._compute_gradients() and self.apply_gradients(), and both need + # to have the LossScaleOptimizer version called. + + # TODO(reedwm): Maybe throw an error if mixed precision is used without this + # optimizer being used. + + +class LossScaleOptimizerV3( + tf.__internal__.tracking.DelegatingTrackableMixin, + optimizer.Optimizer, + BaseLossScaleOptimizer, +): + """An optimizer that applies loss scaling to prevent numeric underflow. + + This is a copy of the `mixed_precision.LossScaleOptimizer` class + defined above, except it subclasses and wraps the new experimental Optimizer + class instead of the `tf.keras.optimizers.Optimizer` class. Some of the + methods this class defines and calls are different compared to + LossScaleOptimizer due to the differences between the two Optimizer base + classes. Additionally, this class does not support the legacy graph mode, + but LossScaleOptimizer does. + + Since the new experimental Optimizer does not have a hyperparameter concept, + LossScaleOptimizerV3 does not delegate arbitrary hyperparameter accesses to + the inner optimizer, unlike LossScaleOptimizer. LossScaleOptimizerV3 does + delegate the "learning_rate" attribute, however. """ - # Calls to this function would be delegated to `get_unscaled_gradients` - # of either `LossScaleOptimizer` or `LossScaleOptimizerV3`, depending on - # the type of `inner_optimizer`. - raise NotImplementedError + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__( + self, + inner_optimizer, + dynamic=True, + initial_scale=None, + dynamic_growth_steps=None, + ): + if not isinstance(inner_optimizer, optimizer.Optimizer): + if isinstance(inner_optimizer, optimizer_v2.OptimizerV2): + # Give better error message if the OptimizerV2 class is passed + # instead of the new experimental optimizer. + raise TypeError( + "You passed a `tf.keras.optimizers.Optimizer` instance to " + "LossScaleOptimizerV3, but only the new experimental " + "optimizer defined in " + "keras/optimizer_expeirmental/optimizer.py can be " + "passed. Please use " + "`tf.keras.mixed_precision.LossScaleOptimizer` " + "instead of LossScaleOptimizerV3, as the former supports " + "`tf.keras.optimizers.Optimizer`s. Got optimizer: " + f"{inner_optimizer}" + ) + raise TypeError( + '"inner_optimizer" must be an instance of ' + f"Optimizer, but got: {inner_optimizer}." + ) + if not isinstance(dynamic, bool): + # Catch errors if a user incorrectly passes a string or float to the + # second argument argument, as this was commonly done for the + # now-removed LossScaleOptimizerV1. + raise TypeError( + '"dynamic" argument to LossScaleOptimizer.__init__ must ' + f"be a bool, but got: {repr(dynamic)}" + ) + if isinstance(inner_optimizer, LossScaleOptimizerV3): + raise TypeError( + "LossScaleOptimizer cannot wrap another " + f"LossScaleOptimizer, but got: {inner_optimizer}" + ) + _raise_if_strategy_unsupported() + if getattr( + inner_optimizer, "_is_wrapped_by_loss_scale_optimizer", False + ): + # TODO(reedwm): Maybe support this. The difficulty is that LSO has + # the same checkpoint format as the inner optimizer, so multiple + # LSOs wrapping the same optimizer causes the checkpointing logic to + # become confused. + raise ValueError( + '"inner_optimizer" is already wrapped by a ' + "LossScaleOptimizer. An optimizer can only be wrapped " + "by a single LossScaleOptimizer" + ) + self._optimizer = inner_optimizer + self._optimizer._is_wrapped_by_loss_scale_optimizer = True + + # We don't call super().__init__, since we do not want to call + # Optimizer's constructor. + tf.__internal__.tracking.DelegatingTrackableMixin.__init__( + self, self._optimizer + ) + + if dynamic: + if initial_scale is None: + initial_scale = _DEFAULT_INITIAL_SCALE + if dynamic_growth_steps is None: + dynamic_growth_steps = _DEFAULT_GROWTH_STEPS + self._loss_scale = _DynamicLossScaleState( + initial_scale, dynamic_growth_steps, multiplier=2 + ) + self._track_trackable(self._loss_scale, "loss_scale") + else: + if initial_scale is None: + raise ValueError( + '"initial_scale" must be specified if "dynamic" is False' + ) + self._loss_scale = float(initial_scale) + if dynamic_growth_steps is not None: + raise ValueError( + '"dynamic_growth_steps" must be None if "dynamic" ' + f"is False, but got: {dynamic_growth_steps}" + ) + + # Used to track whether get_scaled_loss() and get_unscaled_gradients() + # have been called + self._loss_has_been_scaled = False + self._gradients_have_been_unscaled = False + + @property + def dynamic(self): + return isinstance(self._loss_scale, _DynamicLossScaleState) + + @property + def loss_scale(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return tf.convert_to_tensor(self._loss_scale.current_loss_scale) + else: + return tf.convert_to_tensor(self._loss_scale) + + @property + def dynamic_counter(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.counter + else: + return None + + @property + def initial_scale(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.initial_loss_scale + else: + return self._loss_scale + + @property + def dynamic_growth_steps(self): + if isinstance(self._loss_scale, _DynamicLossScaleState): + return self._loss_scale.growth_steps + else: + return None + + @property + def inner_optimizer(self): + return self._optimizer + + def get_scaled_loss(self, loss): + self._loss_has_been_scaled = True + if callable(loss): + + def new_loss(): + loss_val = loss() + return loss_val * tf.cast(self.loss_scale, loss_val.dtype) + + return new_loss + else: + return loss * tf.cast(self.loss_scale, loss.dtype) + + def get_unscaled_gradients(self, grads): + self._gradients_have_been_unscaled = True + loss_scale_reciprocal = 1.0 / self.loss_scale + return [ + _multiply_gradient(g, loss_scale_reciprocal) + if g is not None + else None + for g in grads + ] + + def compute_gradients(self, loss, var_list, tape=None): + tape = tf.GradientTape() if tape is None else tape + with tape: + loss = self.get_scaled_loss(loss) + grads_and_vars = self._optimizer.compute_gradients( + loss, var_list, tape=tape + ) + grads = [g for g, _ in grads_and_vars] + weights = [v for _, v in grads_and_vars] + unscaled_grads = self.get_unscaled_gradients(grads) + return list(zip(unscaled_grads, weights)) + + def apply_gradients( + self, grads_and_vars, skip_gradients_aggregation=False, **kwargs + ): + grads_and_vars = list(grads_and_vars) + grads, trainable_variables = zip(*grads_and_vars) + with tf.init_scope(): + # Lift variable creation to init scope to avoid environment + # issues. + self.build(trainable_variables) + if tf.distribute.in_cross_replica_context(): + raise ValueError( + "apply_gradients() must be called in a replica context." + ) + # We check for the strategy here despite already checking in the + # constructor as frequently the optimizer is created outside the + # strategy's scope. + _raise_if_strategy_unsupported() + _maybe_warn_about_scaling( + self._loss_has_been_scaled, self._gradients_have_been_unscaled + ) + + grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) + # `experimental_aggregate_gradients` is an arg in `apply_gradients` of + # v2 optimizer -- the reverse of `skip_gradients_aggregation`. + # We read it from kwargs for backward compatibility. + experimental_aggregate_gradients = kwargs.pop( + "experimental_aggregate_gradients", True + ) + run_with_dtensor = ( + # `_run_with_dtensor` is for dtensor based strategy scope, and + # `_mesh` is when user explicitly specify the mesh setting for + # optimizer. + self._optimizer._run_with_dtensor + or self._optimizer._mesh + ) + + if ( + not skip_gradients_aggregation + and experimental_aggregate_gradients + and not run_with_dtensor + ): + # We must aggregate the gradients here instead of in + # self.optimizer.apply_gradients, so that any NaN or Inf gradients + # are propagated to each replica. If any replica has a NaN or Inf + # gradient, they must all have a NaN or Inf gradient so that they + # all skip the step. + grads_and_vars = self._optimizer.aggregate_gradients(grads_and_vars) + + grads_and_vars = tuple(grads_and_vars) + grads = [g for g, _ in grads_and_vars] + # We do not want DistributionStrategy to unwrap any MirroredVariables in + # grads_and_vars, because even in a replica context, the wrapped + # optimizer expects mirrored variables. So we wrap the variables with an + # _UnwrapPreventer, preventing DistributionStrategy from unwrapping the + # MirroredVariables. + wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars]) + + def do_not_apply_fn(): + # Normally self._optimizer.iterations is incremented in + # self._optimizer.apply_gradients(). Since that is not called in + # this branch, we increment it here instead. + self._optimizer.iterations.assign_add(1, read_value=False) + + def _if_should_apply_grads(grads): + if isinstance(self._loss_scale, _DynamicLossScaleState): + _, should_apply_grad = self._loss_scale.update(grads) + return should_apply_grad + else: + return True + + if tf.__internal__.distribute.strategy_supports_no_merge_call(): + should_apply_grads = _if_should_apply_grads(grads) + + def apply_fn(): + return self._apply_gradients(grads, wrapped_vars) + + tf.__internal__.smart_cond.smart_cond( + should_apply_grads, apply_fn, do_not_apply_fn + ) + else: + + def _apply_gradients_cross_replica( + distribution, grads, wrapped_vars + ): + should_apply_grads = _if_should_apply_grads(grads) + + def apply_fn(): + distribution.extended.call_for_each_replica( + self._apply_gradients, args=(grads, wrapped_vars) + ) + + # Note: We must call this cond() in a cross-replica context. + # DistributionStrategy does not support having a cond in a + # replica context with a branch that calls `merge_call`, and + # self._optimizer.apply_gradients calls `merge_call`. + tf.__internal__.smart_cond.smart_cond( + should_apply_grads, apply_fn, do_not_apply_fn + ) + + tf.distribute.get_replica_context().merge_call( + _apply_gradients_cross_replica, args=(grads, wrapped_vars) + ) + + def _apply_gradients(self, grads, wrapped_vars): + # Pass skip_gradients_aggregation=True since LossScaleOptimizer + # already aggregated the gradients. + self._optimizer.apply_gradients( + list(zip(grads, wrapped_vars.value)), + skip_gradients_aggregation=True, + ) + + def get_config(self): + serialized_optimizer = optimizers.serialize(self._optimizer) + return { + "inner_optimizer": serialized_optimizer, + "dynamic": self.dynamic, + "initial_scale": self.initial_scale, + "dynamic_growth_steps": self.dynamic_growth_steps, + } + + @classmethod + def from_config(cls, config, custom_objects=None): + config = config.copy() # Make a copy, since we mutate config + if isinstance(config["inner_optimizer"], optimizer.Optimizer): + inner_optimizer = config["inner_optimizer"] + else: + inner_optimizer = optimizers.deserialize( + config["inner_optimizer"], + custom_objects=custom_objects, + use_legacy_optimizer=False, + ) + del config["inner_optimizer"] + return cls(inner_optimizer, **config) + + @property + def iterations(self): + return self._optimizer.iterations + + @iterations.setter + def iterations(self, variable): + self._optimizer.iterations = variable + + @property + def variables(self): + return self._optimizer.variables + + def build(self, var_list): + return self._optimizer.build(var_list) + + @property + def learning_rate(self): + return self._optimizer.learning_rate + + @learning_rate.setter + def learning_rate(self, learning_rate): + self._optimizer.learning_rate = learning_rate + + @property + def use_ema(self): + return self._optimizer.use_ema + + @use_ema.setter + def use_ema(self, use_ema): + self._optimizer.use_ema = use_ema + + @property + def ema_momentum(self): + return self._optimizer.ema_momentum + + @ema_momentum.setter + def ema_momentum(self, ema_momentum): + self._optimizer.ema_momentum = ema_momentum + + def finalize_variable_values(self, var_list): + self._optimizer.finalize_variable_values(var_list) -# pylint: disable=g-classes-have-attributes -class LossScaleOptimizer(tf.__internal__.tracking.DelegatingTrackableMixin, - optimizer_v2.OptimizerV2, BaseLossScaleOptimizer): - """An optimizer that applies loss scaling to prevent numeric underflow.""" - _HAS_AGGREGATE_GRAD = True +class FakeOptimizerForRestoration(tf.__internal__.tracking.Trackable): + """A fake optimizer used to support restoring TensorFlow 2.2 checkpoints. + + The checkpoint format for LossScaleOptimizers changed after TF 2.2. This + class exists to support restoring TF 2.2 checkpoints in newer version of + TensorFlow. + + In TF 2.2, LossScaleOptimizer would track the wrapped optimizer by calling + the following in LossScaleOptimizer.__init__ + + ``` + self._track_trackable(self._optimizer, 'base_optimizer') + ``` + + This means a dependency from the LossScaleOptimizer to the wrapped optimizer + would be stored in the checkpoint. However now, the checkpoint format with a + LossScaleOptimizer is the same as the format without a LossScaleOptimizer, + except the loss scale is also stored. This means there is no dependency from + the LossScaleOptimizer to the wrapped optimizer. Instead, the + LossScaleOptimizer acts as if it is the wrapped optimizer, from a + checkpoint's perspective, by overriding all Trackable methods and delegating + them to the wrapped optimizer. + + To allow restoring TF 2.2. checkpoints, LossScaleOptimizer adds a dependency + on this class instead of the inner optimizer. When restored, this class will + instead restore the slot variables of the inner optimizer. Since this class + has no variables, it does not affect the checkpoint when saved. + """ - def __init__(self, inner_optimizer, dynamic=True, initial_scale=None, - dynamic_growth_steps=None): - if not isinstance(inner_optimizer, optimizer_v2.OptimizerV2): - if isinstance(inner_optimizer, optimizer_experimental.Optimizer): - # Give better error message if the new experimental optimizer is passed. - raise TypeError( - f'You passed an instance of the new experimental optimizer, ' - f'`optimizer_experimental.Optimizer`, to LossScaleOptimizer, but ' - f'only the classic optimizers subclassing from ' - f'`tf.keras.optimizers.Optimizer` can be passed. Please use ' - f'`loss_scale_optimizer.LossScaleOptimizerV3` instead of ' - f'`tf.keras.mixed_precision.LossScaleOptimizer`, as the former ' - f'supports wrapping instances of the new experimental optimizer. ' - f'Got optimizer: {inner_optimizer}') - msg = ('"inner_optimizer" must be an instance of ' - '`tf.keras.optimizers.Optimizer`, but got: %s. ' % inner_optimizer) - if isinstance(inner_optimizer, legacy_optimizer.OptimizerV2): - msg += ('Please make sure "inner_optimizer" is not an instance of ' - '`tensorflow.python.keras.optimizers`, which is ' - 'the legacy keras code and will be removed in future release. ' - 'Please use the tf.keras public API instead.') - raise TypeError(msg) - if not isinstance(dynamic, bool): - # Catch errors if a user incorrectly passes a string or float to the - # second argument argument, as this was commonly done for the now-removed - # LossScaleOptimizerV1. - raise TypeError('"dynamic" argument to LossScaleOptimizer.__init__ must ' - 'be a bool, but got: %r' % (dynamic,)) - if isinstance(inner_optimizer, LossScaleOptimizer): - raise TypeError('LossScaleOptimizer cannot wrap another ' - 'LossScaleOptimizer, but got: %s' % (inner_optimizer,)) - _raise_if_strategy_unsupported() - if getattr(inner_optimizer, '_is_wrapped_by_loss_scale_optimizer', False): - # TODO(reedwm): Maybe support this. The difficulty is that LSO has the - # same checkpoint format as the inner optimizer, so multiple LSOs wrapping - # the same optimizer causes the checkpointing logic to become confused. - raise ValueError('"inner_optimizer" is already wrapped by a ' - 'LossScaleOptimizer. An optimizer can only be wrapped ' - 'by a single LossScaleOptimizer') - self._optimizer = inner_optimizer - self._optimizer._is_wrapped_by_loss_scale_optimizer = True - - # We don't call super().__init__, since we do not want to call OptimizerV2's - # constructor. - tf.__internal__.tracking.DelegatingTrackableMixin.__init__(self, - self._optimizer) - - if dynamic: - if initial_scale is None: - initial_scale = _DEFAULT_INITIAL_SCALE - if dynamic_growth_steps is None: - dynamic_growth_steps = _DEFAULT_GROWTH_STEPS - self._loss_scale = _DynamicLossScaleState( - initial_scale, dynamic_growth_steps, multiplier=2) - self._track_trackable(self._loss_scale, 'loss_scale') - else: - if initial_scale is None: - raise ValueError('"initial_scale" must be specified if "dynamic" is ' - 'False') - self._loss_scale = float(initial_scale) - if dynamic_growth_steps is not None: - raise ValueError('"dynamic_growth_steps" must be None if "dynamic" ' - 'is False, but got: %s' % (dynamic_growth_steps,)) - - # Used to track whether get_scaled_loss() and get_unscaled_gradients() have - # been called - self._loss_has_been_scaled = False - self._gradients_have_been_unscaled = False - - # To support restoring TensorFlow 2.2 checkpoints. - self._track_trackable(FakeOptimizerForRestoration(self._optimizer), - 'base_optimizer') - - @property - def dynamic(self): - return isinstance(self._loss_scale, _DynamicLossScaleState) - - @property - def loss_scale(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return tf.convert_to_tensor( - self._loss_scale.current_loss_scale) - else: - return tf.convert_to_tensor(self._loss_scale) + def __init__(self, optimizer): + self._optimizer = optimizer - @property - def dynamic_counter(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.counter - else: - return None + def get_slot_names(self): + return self._optimizer.get_slot_names() - @property - def initial_scale(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.initial_loss_scale - else: - return self._loss_scale + def _create_or_restore_slot_variable( + self, slot_variable_position, slot_name, variable + ): + return self._optimizer._create_or_restore_slot_variable( + slot_variable_position, slot_name, variable + ) - @property - def dynamic_growth_steps(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.growth_steps - else: - return None - - @property - def inner_optimizer(self): - return self._optimizer - - def get_scaled_loss(self, loss): - self._loss_has_been_scaled = True - if callable(loss): - def new_loss(): - loss_val = loss() - return loss_val * tf.cast(self.loss_scale, loss_val.dtype) - return new_loss - else: - return loss * tf.cast(self.loss_scale, loss.dtype) - def get_unscaled_gradients(self, grads): - self._gradients_have_been_unscaled = True - loss_scale_reciprocal = 1. / self.loss_scale - return [ - _multiply_gradient(g, loss_scale_reciprocal) if g is not None else None - for g in grads - ] +def _create_loss_scale_optimizer_from_v1_loss_scale(optimizer, loss_scale): + """Creates an LSO from a tf.compat.v1.mixed_precision.LossScale. - def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): - tape = tf.GradientTape() if tape is None else tape - with tape: - loss = self.get_scaled_loss(loss) - grads_and_vars = self._optimizer._compute_gradients( # pylint: disable=protected-access - loss, - var_list, - grad_loss, - tape=tape) - grads = [g for g, _ in grads_and_vars] - weights = [v for _, v in grads_and_vars] - unscaled_grads = self.get_unscaled_gradients(grads) - return list(zip(unscaled_grads, weights)) - - def get_gradients(self, loss, params): - loss = self.get_scaled_loss(loss) - grads = self._optimizer.get_gradients(loss, params) - return self.get_unscaled_gradients(grads) - - def _create_all_weights(self, var_list): - self._optimizer._create_all_weights(var_list) # pylint: disable=protected-access - - def apply_gradients(self, - grads_and_vars, - name=None, - experimental_aggregate_gradients=True): - if tf.distribute.in_cross_replica_context(): - raise ValueError('apply_gradients() must be called in a replica context.') - # We check for the strategy here despite already checking in the constructor - # as frequently the optimizer is created outside the strategy's scope. - _raise_if_strategy_unsupported() - _maybe_warn_about_scaling(self._loss_has_been_scaled, - self._gradients_have_been_unscaled) - - grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) - if experimental_aggregate_gradients: - # We must aggregate the gradients here instead of in - # self.optimizer.apply_gradients, so that any NaN or Inf gradients are - # propagated to each replica. If any replica has a NaN or Inf gradient, - # they must all have a NaN or Inf gradient so that they all skip the step. - # pylint: disable=protected-access - grads_and_vars = self._optimizer._transform_unaggregated_gradients( - grads_and_vars) - grads_and_vars = self._optimizer._aggregate_gradients(grads_and_vars) - # pylint: enable=protected-access - - grads_and_vars = tuple(grads_and_vars) - grads = [g for g, _ in grads_and_vars] - # We do not want DistributionStrategy to unwrap any MirroredVariables in - # grads_and_vars, because even in a replica context, the wrapped - # optimizer expects mirrored variables. So we wrap the variables with an - # _UnwrapPreventer, preventing DistributionStrategy from unwrapping the - # MirroredVariables. - wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars]) - - def do_not_apply_fn(): - # Normally self._optimizer.iterations is incremented in - # self._optimizer.apply_gradients(). Since that is not called in this - # branch, we increment it here instead. - return self._optimizer.iterations.assign_add(1, read_value=False) - - def _if_should_apply_grads(grads): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.update(grads) - else: - return (tf.no_op(), True) - - if tf.__internal__.distribute.strategy_supports_no_merge_call(): - loss_scale_update_op, should_apply_grads = _if_should_apply_grads(grads) - def apply_fn(): - return self._apply_gradients(grads, wrapped_vars, name) - - maybe_apply_op = tf.__internal__.smart_cond.smart_cond(should_apply_grads, apply_fn, - do_not_apply_fn) - return tf.group(maybe_apply_op, loss_scale_update_op) + This is only used to pass to + `tf.__internal__.mixed_precision.register_loss_scale_wrapper` below, which + is called so that + `tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite` can + wrap a Keras optimizer with a LossScaleOptimizer. - else: + Args: + optimizer: An OptimizerV2 instance. + loss_scale: A `tf.compat.v1.mixed_precision.LossScale` instance - def _apply_gradients_cross_replica(distribution, grads, wrapped_vars, - name): - loss_scale_update_op, should_apply_grads = _if_should_apply_grads(grads) - - def apply_fn(): - return distribution.extended.call_for_each_replica( - self._apply_gradients, - args=(grads, wrapped_vars, name)) - - # Note: We must call this cond() in a cross-replica context. - # DistributionStrategy does not support having a cond in a replica - # context with a branch that calls `merge_call`, and - # self._optimizer.apply_gradients calls `merge_call`. - maybe_apply_op = tf.__internal__.smart_cond.smart_cond(should_apply_grads, apply_fn, - do_not_apply_fn) - return tf.group(maybe_apply_op, loss_scale_update_op) - return tf.distribute.get_replica_context().merge_call( - _apply_gradients_cross_replica, - args=(grads, wrapped_vars, name)) - - def _apply_gradients(self, grads, wrapped_vars, name): - # Pass experimental_aggregate_gradients=False since LossScaleOptimizer - # already aggregated the gradients. - # TODO(reedwm): This will raise a fairly cryptic error message if - # self._optimizer.apply_gradients does not take - # experimental_aggregate_gradients. - return self._optimizer.apply_gradients( - list(zip(grads, wrapped_vars.value)), - name=name, - experimental_aggregate_gradients=False) - - def get_config(self): - serialized_optimizer = optimizers.serialize(self._optimizer) - return { - 'inner_optimizer': serialized_optimizer, - 'dynamic': self.dynamic, - 'initial_scale': self.initial_scale, - 'dynamic_growth_steps': self.dynamic_growth_steps, - } - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() # Make a copy, since we mutate config - if 'loss_scale' in config: - # If loss_scale is in config, we assume we are deserializing a - # LossScaleOptimizer from TF 2.3 or below. We convert the config so it - # can be deserialized in the current LossScaleOptimizer. - loss_scale = generic_utils.deserialize_keras_object( - config.pop('loss_scale'), - module_objects={ - 'FixedLossScale': tf.compat.v1.mixed_precision.FixedLossScale, - 'DynamicLossScale': tf.compat.v1.mixed_precision.DynamicLossScale, - }, - printable_module_name='loss scale') - - if isinstance(loss_scale, tf.compat.v1.mixed_precision.FixedLossScale): - config['dynamic'] = False - config['initial_scale'] = loss_scale._loss_scale_value # pylint: disable=protected-access - elif isinstance(loss_scale, - tf.compat.v1.mixed_precision.DynamicLossScale): - config['dynamic'] = True - config['initial_scale'] = loss_scale.initial_loss_scale - config['dynamic_growth_steps'] = loss_scale.increment_period + Returns: + A LossScaleOptimizer that wraps `optimizer` and uses the same loss scaling + algorithm as `loss_scale`. + """ + if isinstance(loss_scale, (int, float)): + return LossScaleOptimizer( + optimizer, dynamic=False, initial_scale=loss_scale + ) + elif isinstance(loss_scale, tf.compat.v1.mixed_precision.FixedLossScale): + ls_val = loss_scale._loss_scale_value + return LossScaleOptimizer( + optimizer, dynamic=False, initial_scale=ls_val + ) + elif loss_scale == "dynamic": + return LossScaleOptimizer(optimizer) + elif isinstance(loss_scale, tf.compat.v1.mixed_precision.DynamicLossScale): if loss_scale.multiplier != 2: - raise ValueError('Cannot deserialize LossScaleOptimizer with a ' - 'DynamicLossScale whose multiplier is not 2. Got ' - 'DynamicLossScale: %s' % (loss_scale,)) - else: - raise ValueError( - 'Serialized LossScaleOptimizers with a LossScale that is neither a ' - 'FixedLossScale nor a DynamicLossScale can no longer be ' - 'deserialized') - config['inner_optimizer'] = config.pop('optimizer') - inner_optimizer = optimizers.deserialize( - config['inner_optimizer'], custom_objects=custom_objects) - del config['inner_optimizer'] - return cls(inner_optimizer, **config) - - # Delegations: We delegate most OptimizerV2 methods to the wrapped optimizer - # below. - - @property - def iterations(self): - return self._optimizer.iterations - - @iterations.setter - def iterations(self, variable): - self._optimizer.iterations = variable - - def get_slot_names(self): - return self._optimizer.get_slot_names() - - def variables(self): - return self._optimizer.variables() - - @property - def weights(self): - return self._optimizer.weights - - def get_weights(self): - return self._optimizer.get_weights() - - def set_weights(self, weights): - return self._optimizer.set_weights(weights) - - @property - def clipnorm(self): - return self._optimizer.clipnorm - - @clipnorm.setter - def clipnorm(self, val): - self._optimizer.clipnorm = val - - @property - def global_clipnorm(self): - return self._optimizer.global_clipnorm - - @global_clipnorm.setter - def global_clipnorm(self, val): - self._optimizer.global_clipnorm = val - - @property - def clipvalue(self): - return self._optimizer.clipvalue - - @clipvalue.setter - def clipvalue(self, val): - self._optimizer.clipvalue = val - - def _aggregate_gradients(self, grads_and_vars): - return self._optimizer._aggregate_gradients(grads_and_vars) # pylint: disable=protected-access - - def _restore_slot_variable(self, slot_name, variable, slot_variable): - return self._optimizer._restore_slot_variable(slot_name, variable, # pylint: disable=protected-access - slot_variable) - - def _create_or_restore_slot_variable(self, slot_variable_position, slot_name, - variable): - return self._optimizer._create_or_restore_slot_variable( # pylint: disable=protected-access - slot_variable_position, slot_name, variable) - - def get_slot(self, var, slot_name): - return self._optimizer.get_slot(var, slot_name) - - def add_slot(self, var, slot_name, initializer='zeros'): - return self._optimizer.add_slot(var, slot_name, initializer) - - def __getattribute__(self, name): - try: - return object.__getattribute__(self, name) - except AttributeError as e: - if name == '_optimizer' or name == '_hyper': - # Avoid infinite recursion - raise e - - # Delegate hyperparameter accesses to inner optimizer. - if name == 'lr': - name = 'learning_rate' - if name in self._optimizer._hyper: - return self._optimizer._get_hyper(name) - raise e - - def __dir__(self): - result = set(super().__dir__()) - if '_optimizer' in result: - result |= self._optimizer._hyper.keys() - if 'learning_rate' in self._optimizer._hyper.keys(): - result.add('lr') - return list(result) - - def __setattr__(self, name, value): - if name == 'lr': - name = 'learning_rate' - # Delegate setting hyperparameter to inner optimizer if the attribute does - # not exist on the LossScaleOptimizer - try: - # We cannot check for the 'iterations' attribute as it cannot be set after - # it is accessed. - if name != 'iterations': - object.__getattribute__(self, name) - has_attribute = True - except AttributeError: - has_attribute = False - if (name != '_optimizer' and name in self._optimizer._hyper - and not has_attribute): - self._optimizer._set_hyper(name, value) - else: - super().__setattr__(name, value) - - # Explicitly delegate learning_rate. Normally hyperparameters are delegated in - # __getattribute__, but if a hyperparameter is not in self._optimizer._hyper - # (e.g. because self._optimizer itself wraps another optimizer), then it won't - # be delegated. Since learning_rate is a very commonly accessed - # hyperparameter, we delegate it here. - @property - def learning_rate(self): - return self._optimizer.learning_rate - - @learning_rate.setter - def learning_rate(self, value): - self._optimizer.learning_rate = value - - @property - def lr(self): - return self._optimizer.learning_rate - - @lr.setter - def lr(self, value): - self._optimizer.lr = value - - # We do not override some OptimizerV2 methods. For each, we describe why we do - # not delegate them to self._optimizer: - # * get_updates: get_updates() calls get_gradients(). Since we override - # get_gradients(), we cannot delegate get_updates() to self._optimizer, - # otherwise the overridden get_gradients() method would not be called. - # Luckily, get_updates() does not access any OptimizerV2 fields, so - # inheriting the OptimizerV2 version works fine. - # * minimize: We don't delegate for a similar as get_updates(): it calls - # both self._compute_gradients() and self.apply_gradients(), and both need - # to have the LossScaleOptimizer version called. - - # TODO(reedwm): Maybe throw an error if mixed precision is used without this - # optimizer being used. - - -class LossScaleOptimizerV3(tf.__internal__.tracking.DelegatingTrackableMixin, - optimizer_experimental.Optimizer, - BaseLossScaleOptimizer): - """An optimizer that applies loss scaling to prevent numeric underflow. - - This is a copy of the `mixed_precision.LossScaleOptimizer` class - defined above, except it subclasses and wraps the new experimental Optimizer - class instead of the `tf.keras.optimizers.Optimizer` class. Some of the - methods this class defines and calls are different compared to - LossScaleOptimizer due to the differences between the two Optimizer base - classes. Additionally, this class does not support the legacy graph mode, but - LossScaleOptimizer does. - - Since the new experimental Optimizer does not have a hyperparameter concept, - LossScaleOptimizerV3 does not delegate arbitrary hyperparameter accesses to - the inner optimizer, unlike LossScaleOptimizer. LossScaleOptimizerV3 does - delegate the "learning_rate" attribute, however. - """ - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self, inner_optimizer, dynamic=True, initial_scale=None, - dynamic_growth_steps=None): - if not isinstance(inner_optimizer, optimizer_experimental.Optimizer): - if isinstance(inner_optimizer, optimizer_v2.OptimizerV2): - # Give better error message if the OptimizerV2 class is passed instead - # of the new experimental optimizer. + raise ValueError( + 'When passing a DynamicLossScale to "loss_scale", ' + "DynamicLossScale.multiplier must be 2. Got: " + f"{loss_scale}" + ) + return LossScaleOptimizer( + optimizer, + initial_scale=loss_scale.initial_loss_scale, + dynamic_growth_steps=loss_scale.increment_period, + ) + elif isinstance(loss_scale, tf.compat.v1.mixed_precision.LossScale): raise TypeError( - f'You passed a `tf.keras.optimizer.Optimizer` instance to ' - f'LossScaleOptimizerV3, but only the new experimental optimizer ' - f'defined in keras/optimizer_expeirmental/optimizer.py can be ' - f'passed. Please use `tf.keras.mixed_precision.LossScaleOptimizer` ' - f'instead of LossScaleOptimizerV3, as the former supports ' - f'`tf.keras.optimizer.Optimizer`s. Got optimizer: ' - f'{inner_optimizer}') - raise TypeError(f'"inner_optimizer" must be an instance of ' - f'Optimizer, but got: {inner_optimizer}.') - if not isinstance(dynamic, bool): - # Catch errors if a user incorrectly passes a string or float to the - # second argument argument, as this was commonly done for the now-removed - # LossScaleOptimizerV1. - raise TypeError(f'"dynamic" argument to LossScaleOptimizer.__init__ must ' - f'be a bool, but got: {repr(dynamic)}') - if isinstance(inner_optimizer, LossScaleOptimizerV3): - raise TypeError(f'LossScaleOptimizer cannot wrap another ' - f'LossScaleOptimizer, but got: {inner_optimizer}') - _raise_if_strategy_unsupported() - if getattr(inner_optimizer, '_is_wrapped_by_loss_scale_optimizer', False): - # TODO(reedwm): Maybe support this. The difficulty is that LSO has the - # same checkpoint format as the inner optimizer, so multiple LSOs wrapping - # the same optimizer causes the checkpointing logic to become confused. - raise ValueError('"inner_optimizer" is already wrapped by a ' - 'LossScaleOptimizer. An optimizer can only be wrapped ' - 'by a single LossScaleOptimizer') - self._optimizer = inner_optimizer - self._optimizer._is_wrapped_by_loss_scale_optimizer = True - - # We don't call super().__init__, since we do not want to call Optimizer's - # constructor. - tf.__internal__.tracking.DelegatingTrackableMixin.__init__(self, - self._optimizer) - - if dynamic: - if initial_scale is None: - initial_scale = _DEFAULT_INITIAL_SCALE - if dynamic_growth_steps is None: - dynamic_growth_steps = _DEFAULT_GROWTH_STEPS - self._loss_scale = _DynamicLossScaleState( - initial_scale, dynamic_growth_steps, multiplier=2) - self._track_trackable(self._loss_scale, 'loss_scale') - else: - if initial_scale is None: - raise ValueError('"initial_scale" must be specified if "dynamic" is ' - 'False') - self._loss_scale = float(initial_scale) - if dynamic_growth_steps is not None: - raise ValueError(f'"dynamic_growth_steps" must be None if "dynamic" ' - f'is False, but got: {dynamic_growth_steps}') - - # Used to track whether get_scaled_loss() and get_unscaled_gradients() have - # been called - self._loss_has_been_scaled = False - self._gradients_have_been_unscaled = False - - @property - def dynamic(self): - return isinstance(self._loss_scale, _DynamicLossScaleState) - - @property - def loss_scale(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return tf.convert_to_tensor( - self._loss_scale.current_loss_scale) - else: - return tf.convert_to_tensor(self._loss_scale) - - @property - def dynamic_counter(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.counter - else: - return None - - @property - def initial_scale(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.initial_loss_scale - else: - return self._loss_scale - - @property - def dynamic_growth_steps(self): - if isinstance(self._loss_scale, _DynamicLossScaleState): - return self._loss_scale.growth_steps - else: - return None - - @property - def inner_optimizer(self): - return self._optimizer - - def get_scaled_loss(self, loss): - self._loss_has_been_scaled = True - if callable(loss): - def new_loss(): - loss_val = loss() - return loss_val * tf.cast(self.loss_scale, loss_val.dtype) - return new_loss - else: - return loss * tf.cast(self.loss_scale, loss.dtype) - - def get_unscaled_gradients(self, grads): - self._gradients_have_been_unscaled = True - loss_scale_reciprocal = 1. / self.loss_scale - return [ - _multiply_gradient(g, loss_scale_reciprocal) if g is not None else None - for g in grads - ] - - def compute_gradients(self, loss, var_list, tape=None): - tape = tf.GradientTape() if tape is None else tape - with tape: - loss = self.get_scaled_loss(loss) - grads_and_vars = self._optimizer.compute_gradients( # pylint: disable=protected-access - loss, - var_list, - tape=tape) - grads = [g for g, _ in grads_and_vars] - weights = [v for _, v in grads_and_vars] - unscaled_grads = self.get_unscaled_gradients(grads) - return list(zip(unscaled_grads, weights)) - - def apply_gradients(self, - grads_and_vars, - skip_gradients_aggregation=False): - if tf.distribute.in_cross_replica_context(): - raise ValueError('apply_gradients() must be called in a replica context.') - # We check for the strategy here despite already checking in the constructor - # as frequently the optimizer is created outside the strategy's scope. - _raise_if_strategy_unsupported() - _maybe_warn_about_scaling(self._loss_has_been_scaled, - self._gradients_have_been_unscaled) - - grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) - if not skip_gradients_aggregation: - # We must aggregate the gradients here instead of in - # self.optimizer.apply_gradients, so that any NaN or Inf gradients are - # propagated to each replica. If any replica has a NaN or Inf gradient, - # they must all have a NaN or Inf gradient so that they all skip the step. - # pylint: disable=protected-access - grads_and_vars = self._optimizer.aggregate_gradients(grads_and_vars) - # pylint: enable=protected-access - - grads_and_vars = tuple(grads_and_vars) - grads = [g for g, _ in grads_and_vars] - # We do not want DistributionStrategy to unwrap any MirroredVariables in - # grads_and_vars, because even in a replica context, the wrapped - # optimizer expects mirrored variables. So we wrap the variables with an - # _UnwrapPreventer, preventing DistributionStrategy from unwrapping the - # MirroredVariables. - wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars]) - - def do_not_apply_fn(): - # Normally self._optimizer.iterations is incremented in - # self._optimizer.apply_gradients(). Since that is not called in this - # branch, we increment it here instead. - self._optimizer.iterations.assign_add(1, read_value=False) - - def _if_should_apply_grads(grads): - if isinstance(self._loss_scale, _DynamicLossScaleState): - _, should_apply_grad = self._loss_scale.update(grads) - return should_apply_grad - else: - return True - - if tf.__internal__.distribute.strategy_supports_no_merge_call(): - should_apply_grads = _if_should_apply_grads(grads) - def apply_fn(): - return self._apply_gradients(grads, wrapped_vars) - tf.__internal__.smart_cond.smart_cond(should_apply_grads, apply_fn, - do_not_apply_fn) + "Passing a LossScale that is not a FixedLossScale or a " + f"DynamicLossScale is not supported. Got: {loss_scale}" + ) else: - - def _apply_gradients_cross_replica(distribution, grads, wrapped_vars): - should_apply_grads = _if_should_apply_grads(grads) - - def apply_fn(): - distribution.extended.call_for_each_replica( - self._apply_gradients, - args=(grads, wrapped_vars)) - - # Note: We must call this cond() in a cross-replica context. - # DistributionStrategy does not support having a cond in a replica - # context with a branch that calls `merge_call`, and - # self._optimizer.apply_gradients calls `merge_call`. - tf.__internal__.smart_cond.smart_cond(should_apply_grads, apply_fn, - do_not_apply_fn) - tf.distribute.get_replica_context().merge_call( - _apply_gradients_cross_replica, - args=(grads, wrapped_vars)) - - def _apply_gradients(self, grads, wrapped_vars): - # Pass skip_gradients_aggregation=True since LossScaleOptimizer - # already aggregated the gradients. - self._optimizer.apply_gradients( - list(zip(grads, wrapped_vars.value)), - skip_gradients_aggregation=True) - - def get_config(self): - serialized_optimizer = optimizers.serialize(self._optimizer) - return { - 'inner_optimizer': serialized_optimizer, - 'dynamic': self.dynamic, - 'initial_scale': self.initial_scale, - 'dynamic_growth_steps': self.dynamic_growth_steps, - } - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() # Make a copy, since we mutate config - inner_optimizer = optimizers.deserialize( - config['inner_optimizer'], custom_objects=custom_objects) - del config['inner_optimizer'] - return cls(inner_optimizer, **config) - - @property - def iterations(self): - return self._optimizer.iterations - - @iterations.setter - def iterations(self, variable): - self._optimizer.iterations = variable - - @property - def learning_rate(self): - return self._optimizer.learning_rate - - @learning_rate.setter - def learning_rate(self, learning_rate): - self._optimizer.learning_rate = learning_rate - - -class FakeOptimizerForRestoration(tf.__internal__.tracking.Trackable): - """A fake optimizer used to support restoring TensorFlow 2.2 checkpoints. - - The checkpoint format for LossScaleOptimizers changed after TF 2.2. This class - exists to support restoring TF 2.2 checkpoints in newer version of TensorFlow. - - In TF 2.2, LossScaleOptimizer would track the wrapped optimizer by calling the - following in LossScaleOptimizer.__init__ - - ``` - self._track_trackable(self._optimizer, 'base_optimizer') - ``` - - This means a dependency from the LossScaleOptimizer to the wrapped optimizer - would be stored in the checkpoint. However now, the checkpoint format with a - LossScaleOptimizer is the same as the format without a LossScaleOptimizer, - except the loss scale is also stored. This means there is no dependency from - the LossScaleOptimizer to the wrapped optimizer. Instead, the - LossScaleOptimizer acts as if it is the wrapped optimizer, from a checkpoint's - perspective, by overriding all Trackable methods and delegating them to the - wrapped optimizer. - - To allow restoring TF 2.2. checkpoints, LossScaleOptimizer adds a dependency - on this class instead of the inner optimizer. When restored, this class will - instead restore the slot variables of the inner optimizer. Since this class - has no variables, it does not affect the checkpoint when saved. - """ - - def __init__(self, optimizer): - self._optimizer = optimizer - - def get_slot_names(self): - return self._optimizer.get_slot_names() - - def _create_or_restore_slot_variable(self, slot_variable_position, slot_name, - variable): - return self._optimizer._create_or_restore_slot_variable( # pylint: disable=protected-access - slot_variable_position, slot_name, variable) - - -def _create_loss_scale_optimizer_from_v1_loss_scale(optimizer, loss_scale): - """Creates an LSO from a tf.compat.v1.mixed_precision.LossScale. - - This is only used to pass to - `tf.__internal__.mixed_precision.register_loss_scale_wrapper` below, which is - called so that - `tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite` can - wrap a Keras optimizer with a LossScaleOptimizer. - - Args: - optimizer: An OptimizerV2 instance. - loss_scale: A `tf.compat.v1.mixed_precision.LossScale` instance - - Returns: - A LossScaleOptimizer that wraps `optimizer` and uses the same loss scaling - algorithm as `loss_scale`. - """ - if isinstance(loss_scale, (int, float)): - return LossScaleOptimizer(optimizer, dynamic=False, - initial_scale=loss_scale) - elif isinstance(loss_scale, tf.compat.v1.mixed_precision.FixedLossScale): - ls_val = loss_scale._loss_scale_value # pylint: disable=protected-access - return LossScaleOptimizer(optimizer, dynamic=False, - initial_scale=ls_val) - elif loss_scale == 'dynamic': - return LossScaleOptimizer(optimizer) - elif isinstance(loss_scale, tf.compat.v1.mixed_precision.DynamicLossScale): - if loss_scale.multiplier != 2: - raise ValueError(f'When passing a DynamicLossScale to "loss_scale", ' - f'DynamicLossScale.multiplier must be 2. Got: ' - f'{loss_scale}') - return LossScaleOptimizer( - optimizer, initial_scale=loss_scale.initial_loss_scale, - dynamic_growth_steps=loss_scale.increment_period) - elif isinstance(loss_scale, tf.compat.v1.mixed_precision.LossScale): - raise TypeError(f'Passing a LossScale that is not a FixedLossScale or a ' - f'DynamicLossScale is not supported. Got: {loss_scale}') - else: - raise ValueError(f'Invalid value passed to loss_scale. loss_scale ' - f'must be the string "dynamic" (recommended), an int, ' - f'a float, a FixedLossScale, or a DynamicLossScale. Got ' - f'value: {loss_scale}') + raise ValueError( + "Invalid value passed to loss_scale. loss_scale " + 'must be the string "dynamic" (recommended), an int, ' + "a float, a FixedLossScale, or a DynamicLossScale. Got " + f"value: {loss_scale}" + ) tf.__internal__.mixed_precision.register_loss_scale_wrapper( - optimizer_v2.OptimizerV2, _create_loss_scale_optimizer_from_v1_loss_scale, - LossScaleOptimizer) + optimizer_v2.OptimizerV2, + _create_loss_scale_optimizer_from_v1_loss_scale, + LossScaleOptimizer, +) def _multiply_gradient(gradient, scale): - """Multiply a (possibly sparse) gradient by the given scale factor.""" - scale = tf.cast(scale, gradient.dtype) - if isinstance(gradient, tf.IndexedSlices): - return tf.IndexedSlices( - gradient.values * scale, - gradient.indices, - dense_shape=gradient.dense_shape) - else: - return gradient * scale + """Multiply a (possibly sparse) gradient by the given scale factor.""" + scale = tf.cast(scale, gradient.dtype) + if isinstance(gradient, tf.IndexedSlices): + return tf.IndexedSlices( + gradient.values * scale, + gradient.indices, + dense_shape=gradient.dense_shape, + ) + else: + return gradient * scale def strategy_supports_loss_scaling(): - """Returns True if the current Strategy supports loss scaling.""" - if not tf.distribute.has_strategy(): - return True - strategy = tf.distribute.get_strategy() - # Strategies are supported if either there is only one replica or if variables - # are replicated per device. Otherwise, the current model.fit() implementation - # and most custom training loops incorrectly unscale the gradients. Currently, - # gradients are unscaled once per compute replica, but they should be unscaled - # once per variable replica. When there is one variable replica for each - # compute replica, this works fine, but otherwise issues will occur. - # TODO(reedwm): Support all strategies. - return isinstance(strategy, ( - tf.distribute.MultiWorkerMirroredStrategy, - tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy, - tf.distribute.OneDeviceStrategy, - tf.compat.v1.distribute.OneDeviceStrategy, - tf.distribute.MirroredStrategy, - tf.compat.v1.distribute.MirroredStrategy, - )) + """Returns True if the current Strategy supports loss scaling.""" + if not tf.distribute.has_strategy(): + return True + strategy = tf.distribute.get_strategy() + # Strategies are supported if either there is only one replica or if + # variables are replicated per device. Otherwise, the current model.fit() + # implementation and most custom training loops incorrectly unscale the + # gradients. Currently, gradients are unscaled once per compute replica, but + # they should be unscaled once per variable replica. When there is one + # variable replica for each compute replica, this works fine, but otherwise + # issues will occur. + # TODO(reedwm): Support all strategies. + return ( + isinstance( + strategy, + ( + tf.distribute.MultiWorkerMirroredStrategy, + tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy, # noqa: E501 + tf.distribute.OneDeviceStrategy, + tf.compat.v1.distribute.OneDeviceStrategy, + tf.distribute.MirroredStrategy, + tf.compat.v1.distribute.MirroredStrategy, + ), + ) + or dtensor_utils.running_with_dtensor_strategy() + ) def _raise_if_strategy_unsupported(): - """Raise an exception if the current strategy doesn't support loss scaling.""" - if not strategy_supports_loss_scaling(): - strategy = tf.distribute.get_strategy() - if isinstance(strategy, - (tf.distribute.experimental.TPUStrategy, - tf.compat.v1.distribute.experimental.TPUStrategy, - tf.distribute.TPUStrategy)): - raise ValueError( - 'Loss scaling is not supported with TPUStrategy. Loss scaling is ' - 'unnecessary with TPUs, since they support bfloat16 instead of ' - 'float16 and bfloat16 does not require loss scaling. You should ' - 'remove the use of the LossScaleOptimizer when TPUs are used.') - else: - raise ValueError(f'Loss scaling is not supported with the ' - f'tf.distribute.Strategy: ' - f'{strategy.__class__.__name__}. Try using a different ' - f'Strategy, e.g. a MirroredStrategy') + """Raise an exception if the current strategy doesn't support loss + scaling.""" + if not strategy_supports_loss_scaling(): + strategy = tf.distribute.get_strategy() + if isinstance( + strategy, + ( + tf.distribute.experimental.TPUStrategy, + tf.compat.v1.distribute.experimental.TPUStrategy, + tf.distribute.TPUStrategy, + ), + ): + raise ValueError( + "Loss scaling is not supported with TPUStrategy. Loss scaling " + "is unnecessary with TPUs, since they support bfloat16 instead " + "of float16 and bfloat16 does not require loss scaling. You " + "should remove the use of the LossScaleOptimizer when TPUs are " + "used." + ) + else: + raise ValueError( + "Loss scaling is not supported with the " + "tf.distribute.Strategy: " + f"{strategy.__class__.__name__}. Try using a different " + "Strategy, e.g. a MirroredStrategy" + ) diff --git a/keras/mixed_precision/loss_scale_optimizer_test.py b/keras/mixed_precision/loss_scale_optimizer_test.py index fd495d51ee3d..e7c2885bca79 100644 --- a/keras/mixed_precision/loss_scale_optimizer_test.py +++ b/keras/mixed_precision/loss_scale_optimizer_test.py @@ -17,23 +17,27 @@ import os from unittest import mock +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized from keras import optimizers from keras.mixed_precision import loss_scale_optimizer from keras.mixed_precision import test_util as mp_test_util -from keras.optimizers.optimizer_experimental import optimizer as optimizer_experimental -from keras.optimizers.optimizer_experimental import sgd as sgd_experimental -from keras.optimizers.optimizer_v2 import adam -from keras.optimizers.optimizer_v2 import gradient_descent -from keras.optimizers.optimizer_v2 import optimizer_v2 +from keras.optimizers import adam as adam_experimental +from keras.optimizers import optimizer as optimizer_experimental +from keras.optimizers import sgd as sgd_experimental +from keras.optimizers.legacy import adam +from keras.optimizers.legacy import gradient_descent +from keras.optimizers.legacy import optimizer_v2 +from keras.optimizers.schedules import learning_rate_schedule from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.python.framework import test_util as tf_test_utils -from tensorflow.python.keras.optimizer_v2 import gradient_descent as legacy_sgd +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) from tensorflow.python.platform import tf_logging # If called outside any strategy.scope() calls, this will return the default @@ -42,1142 +46,1340 @@ def create_mirrored_strategy(): - if tf.config.list_logical_devices('GPU'): - return tf.distribute.MirroredStrategy(['cpu:0', 'gpu:0']) - else: - return tf.distribute.MirroredStrategy(['cpu:0']) + if tf.config.list_logical_devices("GPU"): + return tf.distribute.MirroredStrategy(["cpu:0", "gpu:0"]) + else: + return tf.distribute.MirroredStrategy(["cpu:0"]) STRATEGY_FNS = [default_strategy_fn, create_mirrored_strategy] def create_sgd(base_optimizer_cls, *args, **kwargs): - """Creates an SGD optimizer. - - Will return either the new experimental SGD optimizer subclassing from - `optimizer_experimental.Optimizer` or the old SGD optimizer subclassing from - `optimizer_v2.OptimizerV2`, depending on `base_optimizer_cls`. - - Args: - base_optimizer_cls: What the superclass of the returned SGD optimizer will - be. Either `optimizer_experimental.Optimizer` or - `optimizer_v2.OptimizerV2`. - *args: Arguments to pass to the SGD constructor - **kwargs: Keyword arguments to pass to the SGD constructor. - - Returns: - An SGD optimizer. - """ - if base_optimizer_cls == optimizer_v2.OptimizerV2: - return gradient_descent.SGD(*args, **kwargs) - else: - assert base_optimizer_cls == optimizer_experimental.Optimizer, ( - f'Got invalid base_optimizer_cls: {base_optimizer_cls}') - return sgd_experimental.SGD(*args, **kwargs) + """Creates an SGD optimizer. + + Will return either the new experimental SGD optimizer subclassing from + `optimizer_experimental.Optimizer` or the old SGD optimizer subclassing from + `optimizer_v2.OptimizerV2`, depending on `base_optimizer_cls`. + + Args: + base_optimizer_cls: What the superclass of the returned SGD optimizer will + be. Either `optimizer_experimental.Optimizer` or + `optimizer_v2.OptimizerV2`. + *args: Arguments to pass to the SGD constructor + **kwargs: Keyword arguments to pass to the SGD constructor. + + Returns: + An SGD optimizer. + """ + if base_optimizer_cls == optimizer_v2.OptimizerV2: + return gradient_descent.SGD(*args, **kwargs) + else: + assert ( + base_optimizer_cls == optimizer_experimental.Optimizer + ), f"Got invalid base_optimizer_cls: {base_optimizer_cls}" + return sgd_experimental.SGD(*args, **kwargs) # TODO(b/215568552): Remove this as the delegation is handled by metaclass. -def create_lso(inner_optimizer, - dynamic=True, - initial_scale=None, - dynamic_growth_steps=None): - """Creates a LossScaleOptimizer. - - Creates either the new LossScaleOptimizerV3 subclassing from - `optimizer_experimental.Optimizer` or the old LossScaleOptimizer subclassing - from `optimizer_v2.OptimizerV2`, depending on the type of `inner_optimizer`. - - Args: - inner_optimizer: The optimizer to wrap. Either an - `optimizer_experimental.Optimizer` or an `optimizer_v2.OptimizerV2`. - dynamic: Whether dynamic loss scaling is used. - initial_scale: The initial loss scale. - dynamic_growth_steps: How frequently to increase the dynamic loss scale. - - Returns: - Returns a LossScaleOptimizerV3 or a LossScaleOptimizer, depending on the - type of `inner_optimizer`. - """ - return loss_scale_optimizer.BaseLossScaleOptimizer( - inner_optimizer, - dynamic=dynamic, - initial_scale=initial_scale, - dynamic_growth_steps=dynamic_growth_steps) +def create_lso( + inner_optimizer, dynamic=True, initial_scale=None, dynamic_growth_steps=None +): + """Creates a LossScaleOptimizer. + + Creates either the new LossScaleOptimizerV3 subclassing from + `optimizer_experimental.Optimizer` or the old LossScaleOptimizer subclassing + from `optimizer_v2.OptimizerV2`, depending on the type of `inner_optimizer`. + + Args: + inner_optimizer: The optimizer to wrap. Either an + `optimizer_experimental.Optimizer` or an `optimizer_v2.OptimizerV2`. + dynamic: Whether dynamic loss scaling is used. + initial_scale: The initial loss scale. + dynamic_growth_steps: How frequently to increase the dynamic loss scale. + + Returns: + Returns a LossScaleOptimizerV3 or a LossScaleOptimizer, depending on the + type of `inner_optimizer`. + """ + return loss_scale_optimizer.BaseLossScaleOptimizer( + inner_optimizer, + dynamic=dynamic, + initial_scale=initial_scale, + dynamic_growth_steps=dynamic_growth_steps, + ) def opt_and_strategy_and_mode_combinations(): - """Returns combinations for running with multiple optimizers and strategies. - - Returns: - Combinations that run with both OptimizerV2 and the experimental optimizer; - and with the default strategy and mirrored strategy; and in both graph and - eager mode. - """ - # For the experimental optimizer, don't use graph mode directly since it's - # unsupported. Instead, run both without and with a tf.function, in order to - # test both graph and eager mode. - experimental_opt_combinations = test_combinations.combine( - opt_cls=optimizer_experimental.Optimizer, - strategy_fn=STRATEGY_FNS, - mode='eager', - use_tf_function=[False, True]) - orig_opt_combinations = test_combinations.combine( - opt_cls=optimizer_v2.OptimizerV2, - strategy_fn=STRATEGY_FNS, - mode=['graph', 'eager'], - use_tf_function=False) - return experimental_opt_combinations + orig_opt_combinations + """Returns combinations for running with multiple optimizers and strategies. + + Returns: + Combinations that run with both OptimizerV2 and the experimental + optimizer; and with the default strategy and mirrored strategy; and in + both graph and eager mode. + """ + # For the experimental optimizer, don't use graph mode directly since it's + # unsupported. Instead, run both without and with a tf.function, in order to + # test both graph and eager mode. + experimental_opt_combinations = test_combinations.combine( + opt_cls=optimizer_experimental.Optimizer, + strategy_fn=STRATEGY_FNS, + mode="eager", + use_tf_function=[False, True], + ) + orig_opt_combinations = test_combinations.combine( + opt_cls=optimizer_v2.OptimizerV2, + strategy_fn=STRATEGY_FNS, + mode=["graph", "eager"], + use_tf_function=False, + ) + return experimental_opt_combinations + orig_opt_combinations def opt_combinations_only(): - """Returns two combinations for running with the two base optimizers.""" - experimental_opt_combinations = test_combinations.combine( - mode='eager', opt_cls=optimizer_experimental.Optimizer) - orig_opt_combination = test_combinations.combine( - opt_cls=optimizer_v2.OptimizerV2) - return experimental_opt_combinations + orig_opt_combination + """Returns two combinations for running with the two base optimizers.""" + experimental_opt_combinations = test_combinations.combine( + mode="eager", opt_cls=optimizer_experimental.Optimizer + ) + orig_opt_combination = test_combinations.combine( + opt_cls=optimizer_v2.OptimizerV2 + ) + return experimental_opt_combinations + orig_opt_combination @tf_test_utils.with_control_flow_v2 class LossScaleOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def _run_if_in_graph_mode(self, val): + # Running only in graph mode is useful, because optimizers sometimes + # return a value that, in Graph mode, is runnable with self.evaluate. + # But in Eager mode, the optimizer already does the computations and the + # return value cannot be run. + if not tf.executing_eagerly(): + self.evaluate(val) + + def _eval_if_tensor(self, val): + # Calls self.evaluate on val if val is a Tensor or Variable. This is + # useful, since hyperparameters are tf.Variables on OptimizerV2 and are + # Python floats on the experimental optimizer. + return ( + self.evaluate(val) + if isinstance(val, (tf.Tensor, tf.Variable)) + else val + ) + + def _run_fn_with_grad_check(self, strategy, var, opt, expected_grad): + grad_check_fn = mp_test_util.create_identity_with_grad_check_fn( + expected_grad + ) + loss = lambda: grad_check_fn(var) / strategy.num_replicas_in_sync + return lambda: opt.minimize(loss, var_list=[var]) + + def testIsInstance(self): + optimizer = create_lso(sgd_experimental.SGD()) + self.assertIsInstance( + optimizer, loss_scale_optimizer.BaseLossScaleOptimizer + ) + + optimizer = create_lso(gradient_descent.SGD()) + self.assertIsInstance( + optimizer, loss_scale_optimizer.BaseLossScaleOptimizer + ) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testFixedLossScaleAppliedToLossWithMinimize( + self, opt_cls, strategy_fn, use_tf_function + ): + with strategy_fn().scope() as strategy: + var = tf.Variable([5.0]) + opt = create_sgd(opt_cls, 2.0) + loss_scale = 10.0 + opt = create_lso(opt, dynamic=False, initial_scale=loss_scale) + self.assertEqual(self.evaluate(opt.loss_scale), loss_scale) + self.assertIsInstance(opt.loss_scale, tf.Tensor) + # We need num_replicas_in_sync to divide loss_scale, otherwise + # loss_scale / strategy.num_replicas_in_sync will not be exact, + # which could lead to assertion failures due to rounding issues. + self.assertEqual(loss_scale % strategy.num_replicas_in_sync, 0) + run_fn = self._run_fn_with_grad_check( + strategy, var, opt, loss_scale / strategy.num_replicas_in_sync + ) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # The loss is the identity of the variable. Therefore the gradient + # is 1, and so the variable will be init_val - grad * lr == 5 - 1 * + # 2 == 3 + self.assertAllClose([3.0], self.evaluate(var)) + + def testFixedLossScaleAppliedToLossWithGetGradients(self): + with tf.Graph().as_default(): + var = tf.Variable([2.0]) + opt = gradient_descent.SGD(1.0) + loss_scale = 10.0 + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, dynamic=False, initial_scale=loss_scale + ) + grad_check_fn = mp_test_util.create_identity_with_grad_check_fn( + loss_scale + ) + loss = grad_check_fn(var) + run_op = opt.get_gradients(loss, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # This will cause an assertion to run, as + # mp_test_util.create_identity_with_grad_check_fn added an assertion + # op. + self.evaluate(run_op) + + @test_combinations.generate(opt_combinations_only()) + def testDynamicAttrsWithFixedLossScale(self, opt_cls): + opt = create_sgd(opt_cls) + opt = create_lso(opt, dynamic=False, initial_scale=2.0) + self.assertFalse(opt.dynamic) + self.assertIsNone(opt.dynamic_counter) + self.assertIsNone(opt.dynamic_growth_steps) + + @test_combinations.generate(opt_combinations_only()) + def testGetScaledLoss(self, opt_cls): + opt = create_sgd(opt_cls) + opt = create_lso(opt, dynamic=False, initial_scale=2.0) + loss = tf.convert_to_tensor(5.0) + self.assertEqual(10.0, self.evaluate(opt.get_scaled_loss(loss))) + self.assertEqual( + 10.0, self.evaluate(opt.get_scaled_loss(lambda: loss)()) + ) + loss = tf.convert_to_tensor(5.0, dtype="float16") + self.assertEqual(10.0, self.evaluate(opt.get_scaled_loss(loss))) + self.assertEqual( + 10.0, self.evaluate(opt.get_scaled_loss(lambda: loss)()) + ) + + @test_combinations.generate(opt_combinations_only()) + def testGetUnscaledGradients(self, opt_cls): + opt = create_sgd(opt_cls) + opt = create_lso(opt, dynamic=False, initial_scale=2) + scaled_grads = [ + tf.convert_to_tensor(3.0), + None, + tf.convert_to_tensor(-4.0, dtype="float16"), + ] + grads = opt.get_unscaled_gradients(scaled_grads) + grads = [self.evaluate(g) if g is not None else g for g in grads] + self.assertEqual([1.5, None, -2.0], grads) + + @test_combinations.generate(opt_combinations_only()) + def testGetUnscaledSparseGradients(self, opt_cls): + opt = create_sgd(opt_cls) + opt = create_lso(opt, dynamic=False, initial_scale=2) + sparse_scaled_grad = tf.IndexedSlices( + tf.convert_to_tensor([[4.0, 2.0], [8.0, 5.0]]), + tf.convert_to_tensor([1, 3], dtype="int32"), + dense_shape=tf.convert_to_tensor([5, 2], dtype="int32"), + ) + sparse_grad = opt.get_unscaled_gradients([sparse_scaled_grad])[0] + self.assertIsInstance(sparse_grad, tf.IndexedSlices) + self.assertAllEqual( + [[2.0, 1.0], [4.0, 2.5]], self.evaluate(sparse_grad.values) + ) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testDynamicLossScale(self, opt_cls, strategy_fn, use_tf_function): + strategy = strategy_fn() + learning_rate = 2.0 + expected_gradient = tf.Variable( + learning_rate / strategy.num_replicas_in_sync + ) + with strategy.scope(): + var = tf.Variable([5.0]) + opt = create_sgd(opt_cls, learning_rate) + opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) + self.assertEqual(opt.initial_scale, 2.0) + self.assertIsInstance(opt.initial_scale, float) + self.assertEqual(opt.dynamic_growth_steps, 1) + self.assertIsInstance(opt.dynamic_growth_steps, int) + + self.assertEqual( + opt.initial_scale % strategy.num_replicas_in_sync, 0 + ) + run_fn = self._run_fn_with_grad_check( + strategy, var, opt, expected_gradient + ) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # The loss is the identity of the variable. Therefore the gradient + # is 1, and so the variable will be init_val - grad * lr == 5 - 1 * + # 2 == 3 + self.assertAllClose([3.0], self.evaluate(var)) + + # Loss scale will be double, so the expected gradient is also + # doubled. + self.evaluate( + expected_gradient.assign( + 2 * learning_rate / strategy.num_replicas_in_sync + ) + ) + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + # As before, the 2 is subtracted from the variable, making it's new + # value 1. + self.assertAllClose([1.0], self.evaluate(var)) + + @test_combinations.generate(opt_combinations_only()) + def testDynamicLossScaleDefaultValues(self, opt_cls): + opt = create_sgd(opt_cls) + opt = create_lso(opt) + self.assertEqual(opt.initial_scale, 2**15) + self.assertEqual(opt.dynamic_growth_steps, 2000) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(opt.loss_scale), 2**15) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testClipping(self, opt_cls, strategy_fn, use_tf_function): + strategy = strategy_fn() + learning_rate = 2.0 + for clip_type in ("clipnorm", "global_clipnorm", "clipvalue"): + with strategy.scope(), self.subTest(clip_type=clip_type): + var = tf.Variable([5.0]) + opt = create_sgd(opt_cls, learning_rate, **{clip_type: 2.0}) + opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) + if isinstance(opt, loss_scale_optimizer.LossScaleOptimizer): + # Only OptimizerV2 exposes the clipping attributes + self.assertEqual(getattr(opt, clip_type), 2.0) + self.assertEqual( + opt.initial_scale % strategy.num_replicas_in_sync, 0 + ) + + loss = lambda: var * 4 / strategy.num_replicas_in_sync + run_fn = lambda: opt.minimize(loss, var_list=[var]) + if use_tf_function: + run_fn = tf.function(run_fn) + + # Test running with clipped gradients + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # The gradient is 4 but is clipped to 2, so the variable will be + # init_val - clipped_grad * lr == 5 - 2 * 2 == 1 + self.assertAllClose([1.0], self.evaluate(var)) + self.assertEqual(self.evaluate(opt.loss_scale), 4) + + if isinstance(opt, loss_scale_optimizer.LossScaleOptimizerV3): + # Only OptimizerV2 exposes the clipping attributes, so we + # cannot set them on the new optimizer + return + # Test changing the clip amount and running again + setattr(opt, clip_type, 3.0) + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + # The gradient is 4 but is clipped to 3, so the variable will be + # prev_var - clipped_grad * lr == 1 - 3 * 2 == -5 + self.assertAllClose([-5.0], self.evaluate(var)) + self.assertEqual(self.evaluate(opt.loss_scale), 8) + + # Test Inf gradients are still skipped instead of being clipped + loss = lambda: var * float("Inf") + run_fn = lambda: opt.minimize(loss, var_list=[var]) + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + self.assertAllClose( + [-5.0], self.evaluate(var) + ) # Var does not change + self.assertEqual(self.evaluate(opt.loss_scale), 4) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testDynamicUpdate(self, opt_cls, strategy_fn, use_tf_function): + with strategy_fn().scope() as strategy: + var = tf.Variable([1.0, 2.0]) + opt = create_sgd(opt_cls, 1.0) + opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) + + # Test optimizer with finite gradients + loss = lambda: var * 2.0 / strategy.num_replicas_in_sync + run_fn = lambda: opt.minimize(loss, var_list=[var]) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # Gradient is 2, so variable will have 2 subtracted from it + self.assertAllClose([-1.0, 0.0], self.evaluate(var)) + # Loss scale has doubled from 2 to 4 + self.assertEqual(4.0, self.evaluate(opt.loss_scale)) + + # Test optimizer with NaN gradients + loss = lambda: var * float("NaN") + run_fn = lambda: opt.minimize(loss, var_list=[var]) + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + # Variable should not change from before, due to NaN gradients. + self.assertAllClose(self.evaluate(var), [-1.0, 0.0]) + # Loss scale should half due to NaN gradients. + self.assertEqual(2.0, self.evaluate(opt.loss_scale)) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testDynamicLossScaleWithFloat16Loss( + self, opt_cls, strategy_fn, use_tf_function + ): + strategy = strategy_fn() + learning_rate = 2.0 + with strategy.scope(): + var = tf.Variable([5.0]) + opt = create_sgd(opt_cls, learning_rate) + opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) + + def loss(): + return tf.cast(var / strategy.num_replicas_in_sync, "float16") + + run_fn = lambda: opt.minimize(loss, var_list=[var]) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # The loss is the identity of the variable. Therefore the gradient + # is 1, and so the variable will be init_val - grad * lr == 5 - 1 * + # 2 == 3 + self.assertAllClose([3.0], self.evaluate(var)) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testNanOnOneReplicaOnly(self, opt_cls, strategy_fn, use_tf_function): + if strategy_fn == default_strategy_fn: + self.skipTest("The test is only useful for non-default strategies") + if not tf.test.is_gpu_available(): + self.skipTest("Test requires GPU") + if ( + not tf.executing_eagerly() + and not tf.compat.v1.control_flow_v2_enabled() + ): + self.skipTest( + "b/181283011: GradientTape does not work properly with " + "V1 control flow, and opt.minimize uses GradientTape" + ) + with strategy_fn().scope() as strategy: + var = tf.Variable([1.0, 2.0]) + opt = create_sgd(opt_cls, 1.0) + opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=2) + + def loss(): + rep_id = ( + tf.distribute.get_replica_context().replica_id_in_sync_group + ) + # The last element of last replica's gradient is NaN. + return tf.cond( + tf.equal(rep_id, 0), + lambda: var * 2.0, + lambda: var * tf.constant([1.0, float("NaN")]), + ) + + run_fn = lambda: opt.minimize(loss, var_list=[var]) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # Variable should not change from before, due to NaN gradients. + self.assertAllClose(self.evaluate(var), [1.0, 2.0]) + # Loss scale should half due to NaN gradients. + self.assertEqual(1.0, self.evaluate(opt.loss_scale)) + + def testCustomAggregater(self): + def gradient_aggregator(grads_and_vars): + # Simulate an all-reduce where a replica has a NaN gradient by + # setting the last gradient to NaN + grads_and_vars = list(grads_and_vars) + last_grad, last_var = grads_and_vars[-1] + grads_and_vars[-1] = (last_grad * float("NaN"), last_var) + return grads_and_vars + + var = tf.Variable([1.0, 2.0]) + opt = gradient_descent.SGD(1.0, gradient_aggregator=gradient_aggregator) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, initial_scale=2, dynamic_growth_steps=2 + ) + + loss = lambda: var * 2 + run_op = opt.minimize(loss, var_list=[var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # Variable should not change from before, due to NaN gradients. + self.assertAllClose(self.evaluate(var), [1.0, 2.0]) + # Loss scale should half due to NaN gradients. + self.assertEqual(1.0, self.evaluate(opt.loss_scale)) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testDynamicLossScaleWithSlots( + self, opt_cls, strategy_fn, use_tf_function + ): + strategy_obj = strategy_fn() + if ( + isinstance(strategy_obj, tf.distribute.MirroredStrategy) + and tf.compat.v1.control_flow_v2_enabled() + and not tf.executing_eagerly() + ): + self.skipTest("b/138667997") + with strategy_obj.scope() as strategy: + var = tf.Variable([1.0, 2.0]) + # An SGD optimizer with momentum has slot variables. + opt = create_sgd(opt_cls, 1.0, momentum=1.0) + initial_scale = 2.0 + opt = create_lso( + opt, initial_scale=initial_scale, dynamic_growth_steps=1 + ) + loss = lambda: var / strategy.num_replicas_in_sync + run_fn = lambda: opt.minimize(loss, var_list=[var]) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + # The momentum accumulator starts at 0 and the gradient is 1. The + # accumulator is incremented by the gradient, so it is now 1. Then + # the variable is subtracted by the accumulator, so the variable is + # subtracted by 1. + self.assertAllClose([0.0, 1.0], self.evaluate(var)) + self.assertEqual(self.evaluate(opt.loss_scale), initial_scale * 2) + + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + # The momentum accumulator was 1 before this step and the gradient + # is 1. The accumulator is incremented by the gradient, so it is + # now 2. Then the variable is subtracted by the accumulator, so the + # variable is subtracted by 2. + self.assertAllClose([-2.0, -1.0], self.evaluate(var)) + self.assertEqual(self.evaluate(opt.loss_scale), initial_scale * 4) + + if isinstance(opt, loss_scale_optimizer.LossScaleOptimizer): + self.assertEqual(opt.get_slot_names(), ["momentum"]) + + def testIterations(self): + opt = gradient_descent.SGD(2.0) + lso = loss_scale_optimizer.LossScaleOptimizer( + opt, dynamic=False, initial_scale=10.0 + ) + lso.iterations = 7 + self.assertEqual(lso.iterations, 7) + self.assertEqual(opt.iterations, 7) + + @test_combinations.generate(opt_and_strategy_and_mode_combinations()) + def testIterationsIncremented(self, opt_cls, strategy_fn, use_tf_function): + with strategy_fn().scope() as strategy: + # Test iterations is incremented in opt.minimize. + opt = create_sgd(opt_cls, 1.0) + opt = create_lso(opt) + var = tf.Variable([5.0]) + loss = lambda: var * 2.0 / strategy.num_replicas_in_sync + run_fn = lambda: opt.minimize(loss, [var]) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + self.assertEqual( + self.evaluate(var), 3.0 + ) # Grad is 2, so var is 5 - 2 + self.assertEqual(self.evaluate(opt.iterations), 1) + + # Test iterations is incremented in opt.minimize even if gradients + # aren't applied to variables due to NaN gradients. + loss = lambda: var * float("NaN") + run_fn = lambda: opt.minimize(loss, [var]) + if use_tf_function: + run_fn = tf.function(run_fn) + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + self.assertEqual(self.evaluate(var), 3.0) + self.assertEqual(self.evaluate(opt.iterations), 2) + + def testWeightMethods(self): + with self.test_session(): + var = tf.Variable([1.0]) + opt = gradient_descent.SGD(1.0) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, initial_scale=2.0, dynamic_growth_steps=1 + ) + run_op = opt.minimize(lambda: var * 2, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + + self.assertLen(opt.weights, 1) # The 'iterations' weight + self.assertEqual(self.evaluate(opt.weights[0]), 1) + self.assertEqual(opt.get_weights()[0], 1) + self.assertEqual(self.evaluate(opt.variables()[0]), 1) + opt.set_weights([np.array(2.0)]) + self.assertEqual(self.evaluate(opt.variables()[0]), 2) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def testHyperParametersExposedLSOV3(self): + opt = adam_experimental.Adam(learning_rate=1.0, beta_1=0.5, beta_2=0.9) + lso = loss_scale_optimizer.BaseLossScaleOptimizer(opt) + lso.learning_rate = tf.Variable(0.005) + self.assertAllClose(self.evaluate(lso.learning_rate), 0.005) + self.assertIs(lso.learning_rate, opt.learning_rate) + + lso.use_ema = True + self.assertEqual(lso.use_ema, True) + self.assertEqual(opt.use_ema, True) + + lso.ema_momentum = 0.88 + self.assertEqual(lso.ema_momentum, 0.88) + self.assertEqual(opt.ema_momentum, 0.88) + + def testHyperParametersExposed(self): + with self.cached_session(): + opt = adam.Adam(learning_rate=1.0, beta_1=0.5, beta_2=0.9) + lso = loss_scale_optimizer.LossScaleOptimizer(opt) + # Force hyperparameters to be created + opt.lr + self.evaluate(tf.compat.v1.global_variables_initializer()) + + self.assertEqual(self.evaluate(lso.beta_1), 0.5) + self.assertIsInstance(lso.beta_1, tf.Variable) + self.assertEqual(self.evaluate(lso.lr), 1.0) + self.assertIs(lso.lr, opt.lr) + self.assertIs(lso.lr, lso.learning_rate) + + lso.beta_1 = 0.25 + self.assertEqual(self.evaluate(lso.beta_1), 0.25) + self.assertEqual(self.evaluate(opt.beta_1), 0.25) + self.assertIs(lso.beta_1, opt.beta_1) + opt.beta_1 = 0.75 + self.assertEqual(self.evaluate(lso.beta_1), 0.75) + self.assertEqual(self.evaluate(opt.beta_1), 0.75) + self.assertIs(lso.beta_1, opt.beta_1) + lso.lr = 2.0 + self.assertEqual(self.evaluate(lso.lr), 2.0) + self.assertEqual(self.evaluate(lso.learning_rate), 2.0) + self.assertEqual(self.evaluate(opt.lr), 2.0) + self.assertEqual(self.evaluate(opt.learning_rate), 2.0) + self.assertIs(lso.lr, opt.lr) + + # Test setting attribute that is both attribute on + # LossScaleOptimizer and hyperparameter on wrapped optimizer. + class MyOpt(gradient_descent.SGD): + def __init__(self): + super().__init__() + self._set_hyper("loss_scale", 123.0) + + opt = MyOpt() + lso = loss_scale_optimizer.LossScaleOptimizer(opt) + with self.assertRaises(AttributeError): + lso.loss_scale = 2.0 + + @test_combinations.generate(opt_combinations_only()) + def testArbitraryAttributesNotExposed(self, opt_cls): + opt = create_sgd(opt_cls) + lso = create_lso(opt) + self.assertFalse(opt.nesterov) + with self.assertRaisesRegex( + AttributeError, + "'LossScaleOptimizer(V3)?' object has no attribute 'nesterov'", + ): + lso.nesterov + + lso.nesterov = True + self.assertTrue(lso.nesterov) + self.assertFalse(opt.nesterov) + + def testDir(self): + lso = loss_scale_optimizer.LossScaleOptimizer(gradient_descent.SGD()) + dir_result = dir(lso) + self.assertIn("learning_rate", dir_result) # Hyperparameter + self.assertIn("lr", dir_result) # Hyperparameter + self.assertIn("minimize", dir_result) # Attribute + self.assertIn("loss_scale", dir_result) # Attribute + self.assertNotIn("nesterov", dir_result) # Attribute on inner optimizer + self.assertIn("nesterov", dir(lso.inner_optimizer)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testApplyGradientsGetsUnwrappedTensors(self): + # Tests that gradients passed to apply_gradients are not wrapped in a + # DistributionStrategy wrapper, such as PerReplica, but instead are raw + # Tensors. Optimizer subclasses that override apply_gradients() expect + # raw Tensors, even though the base Optimizer can handle PerReplica + # gradients. + + outer_self = self + + class MyOptimizer(gradient_descent.SGD): + def apply_gradients( + self, + grads_and_vars, + name=None, + experimental_aggregate_gradients=True, + ): + for grad, _ in grads_and_vars: + outer_self.assertIsInstance(grad, tf.Tensor) + return super().apply_gradients( + grads_and_vars, name, experimental_aggregate_gradients + ) + + with create_mirrored_strategy().scope() as strategy: + var = tf.Variable([5.0]) + opt = MyOptimizer(learning_rate=1.0) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, dynamic=False, initial_scale=1 + ) + loss = lambda: var * 2.0 + run_fn = lambda: opt.minimize(loss, [var]) + strategy.experimental_run(run_fn) + + @test_combinations.generate( + test_combinations.combine(mode="eager", use_tf_function=[False, True]) + ) + def testApplyGradientsGetsUnwrappedTensorsWithNewOptimizer( + self, use_tf_function + ): + outer_self = self + + class MyOptimizer(sgd_experimental.SGD): + def apply_gradients( + self, + grads_and_vars, + skip_gradients_aggregation=False, + experimental_aggregate_gradients=True, + ): + for grad, _ in grads_and_vars: + outer_self.assertIsInstance(grad, tf.Tensor) + return super().apply_gradients( + grads_and_vars, + skip_gradients_aggregation=skip_gradients_aggregation, + ) + + with create_mirrored_strategy().scope() as strategy: + var = tf.Variable([5.0]) + opt = MyOptimizer(learning_rate=1.0) + opt = loss_scale_optimizer.LossScaleOptimizerV3( + opt, dynamic=False, initial_scale=1 + ) + loss = lambda: var * 2.0 + run_fn = lambda: opt.minimize(loss, [var]) + if use_tf_function: + run_fn = tf.function(run_fn) + strategy.experimental_run(run_fn) + + @test_combinations.generate(opt_combinations_only()) + def testLossScaleDelegationWithWrapper(self, opt_cls): + # Test learning_rate is exposed when LossScaleOptimizer wraps another + # wrapper. + + class MyOptimizer(opt_cls): + def __init__(self): + super().__init__("MyOptimizer") + self.inner_optimizer = create_sgd(opt_cls, learning_rate=1.0) + + @property + def learning_rate(self): + return self.inner_optimizer.learning_rate + + @learning_rate.setter + def learning_rate(self, value): + self.inner_optimizer.learning_rate = value + + def get_config(self): + return {} + + with self.cached_session(): + opt = MyOptimizer() + opt = create_lso(opt) + + # Force hyperparameters to be created + opt.learning_rate + self.evaluate(tf.compat.v1.global_variables_initializer()) + + self.assertEqual(self.evaluate(opt.learning_rate), 1.0) + self.assertEqual( + self.evaluate( + opt.inner_optimizer.inner_optimizer.learning_rate + ), + 1.0, + ) + opt.learning_rate = 2.0 + self.assertEqual(self.evaluate(opt.learning_rate), 2.0) + self.assertEqual( + self.evaluate( + opt.inner_optimizer.inner_optimizer.learning_rate + ), + 2.0, + ) + + @test_combinations.generate( + test_combinations.combine( + opt_cls=optimizer_v2.OptimizerV2, + strategy_fn=STRATEGY_FNS, + mode=["graph", "eager"], + use_tf_function=False, + save_with_ls=[False, True], + restore_with_ls=[False, True], + ) + + test_combinations.combine( + opt_cls=optimizer_experimental.Optimizer, + strategy_fn=STRATEGY_FNS, + mode="eager", + use_tf_function=[False, True], + save_with_ls=[False, True], + restore_with_ls=[False, True], + ) + ) + def testCheckpoint( + self, + opt_cls, + strategy_fn, + use_tf_function, + save_with_ls, + restore_with_ls, + ): + + if not save_with_ls and not restore_with_ls: + self.skipTest( + "Skipping because save_with_ls=False and " + "restore_with_ls=False, which means loss scaling is not " + "used" + ) + + sgd_cls = type(create_sgd(opt_cls)) + + class MySGD(sgd_cls): + """A custom optimizer that tracks an extra variable.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.my_var = tf.Variable(0.0) + self._track_trackable(self.my_var, "my_var") + + strategy = strategy_fn() + replicas = strategy.num_replicas_in_sync + if ( + isinstance(strategy, tf.distribute.MirroredStrategy) + and not tf.executing_eagerly() + ): + # TODO(b/121381184): Enable running the test in this case. + return + + with self.test_session(), strategy.scope(): + # Build and run a simple model. + var = tf.Variable([2.0]) + opt = inner_opt = MySGD(1.0, momentum=1.0) + if save_with_ls: + opt = create_lso( + opt, initial_scale=1.0, dynamic_growth_steps=2.0 + ) + run_fn = lambda: opt.minimize( + lambda: var / replicas + 1.0, var_list=[var] + ) + if use_tf_function: + run_fn = tf.function(run_fn) + opt_op = strategy.experimental_run(run_fn) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(strategy.experimental_local_results(opt_op)) + + # Assert values. + self.assertEqual(self.evaluate(var), 1.0) + if save_with_ls: + self.assertEqual(self.evaluate(opt.loss_scale), 1.0) + self.assertEqual(self.evaluate(opt.dynamic_counter), 1) + if opt_cls == optimizer_v2.OptimizerV2: + slot_var = opt.get_slot(var, "momentum") + self.assertEqual(self.evaluate(slot_var).item(), -1) + self.assertEqual(self.evaluate(opt.iterations), 1) + + # Set optimizer variable to check arbitrary optimizer attributes can + # be saved/restored + self.evaluate(inner_opt.my_var.assign(1.0)) + + # Save a checkpoint. + checkpoint = tf.train.Checkpoint(optimizer=opt, var=var) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + save_path = checkpoint.save(prefix) + + # Create new model + var = tf.Variable([2.0]) + opt = inner_opt = MySGD(1.0, momentum=1.0) + if restore_with_ls: + opt = create_lso( + opt, initial_scale=1.0, dynamic_growth_steps=2.0 + ) + + # Restore new model. + checkpoint = tf.train.Checkpoint(optimizer=opt, var=var) + status = checkpoint.restore(save_path) + if save_with_ls: + status.assert_existing_objects_matched() + else: + status.assert_nontrivial_match() + + # Assert restored values. We can only assert in eager mode since the + # variables are uninitialized in graph mode + if tf.executing_eagerly(): + self.assertEqual(self.evaluate(var), 1.0) + if save_with_ls and restore_with_ls: + self.assertEqual(self.evaluate(opt.loss_scale), 1.0) + self.assertEqual(self.evaluate(opt.dynamic_counter), 1) + elif restore_with_ls: + self.assertEqual(self.evaluate(opt.loss_scale), 1.0) + self.assertEqual(self.evaluate(opt.dynamic_counter), 0) + self.assertEqual(self.evaluate(opt.iterations), 1) + + # Run the model again. + run_fn = lambda: opt.minimize( + lambda: var / replicas + 1.0, var_list=[var] + ) + if use_tf_function: + run_fn = tf.function(run_fn) + opt_op = strategy.experimental_run(run_fn) + + # Assert new values. + self.evaluate(tf.compat.v1.global_variables_initializer()) + status.run_restore_ops() + self.evaluate(strategy.experimental_local_results(opt_op)) + self.assertEqual(self.evaluate(var), -1) + if opt_cls == optimizer_v2.OptimizerV2: + slot_var = opt.get_slot(var, "momentum") + self.assertEqual(self.evaluate(slot_var).item(), -2) + self.assertEqual(self.evaluate(opt.iterations), 2) + self.assertEqual(self.evaluate(inner_opt.my_var), 1) + + # Restore model again to test restoring after slots are created + status = checkpoint.restore(save_path) + if save_with_ls and restore_with_ls: + status.assert_consumed() + elif save_with_ls: + status.assert_existing_objects_matched() + elif restore_with_ls: + status.assert_nontrivial_match() + status.run_restore_ops() + self.assertEqual(self.evaluate(var), 1) + if opt_cls == optimizer_v2.OptimizerV2: + self.assertEqual(self.evaluate(slot_var).item(), -1) + + @test_combinations.generate( + test_combinations.combine(config_version=["v2", "tf2_3"]) + + test_combinations.combine(config_version="v3", mode="eager") + ) + def testGetConfigFixed(self, config_version): + # Get a config from LossScaleOptimizer, LossScaleOptimizerV3, or the + # LossScaleOptimizer from TF 2.3. Then restore the config into a + # LossScaleOptimizer or LossScaleOptimizerV3 + if config_version == "v2": + opt = gradient_descent.SGD(2.0, momentum=0.5) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, dynamic=False, initial_scale=2 + ) + config = opt.get_config() + opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) + elif config_version == "v3": + opt = sgd_experimental.SGD(2.0, momentum=0.5) + opt = loss_scale_optimizer.LossScaleOptimizerV3( + opt, dynamic=False, initial_scale=2 + ) + config = opt.get_config() + opt = loss_scale_optimizer.LossScaleOptimizerV3.from_config(config) + else: + self.assertEqual(config_version, "tf2_3") + config = { + "optimizer": { + "class_name": "SGD", + "config": { + "learning_rate": 2.0, + "momentum": 0.5, + "decay": 0.0, + "nesterov": False, + "name": "SGD", + }, + }, + "loss_scale": { + "class_name": "FixedLossScale", + "config": {"loss_scale_value": 2.0}, + }, + } + opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) + + # Force hyperparameters to be created + opt.learning_rate + self.evaluate(tf.compat.v1.global_variables_initializer()) - def _run_if_in_graph_mode(self, val): - # Running only in graph mode is useful, because optimizers sometimes return - # a value that, in Graph mode, is runnable with self.evaluate. But in Eager - # mode, the optimizer already does the computations and the return value - # cannot be run. - if not tf.executing_eagerly(): - self.evaluate(val) - - def _eval_if_tensor(self, val): - # Calls self.evaluate on val if val is a Tensor or Variable. This is useful, - # since hyperparameters are tf.Variables on OptimizerV2 and are Python - # floats on the experimental optimizer. - return (self.evaluate(val) if isinstance(val, (tf.Tensor, tf.Variable)) - else val) - - def _run_fn_with_grad_check(self, strategy, var, opt, expected_grad): - grad_check_fn = mp_test_util.create_identity_with_grad_check_fn( - expected_grad) - loss = lambda: grad_check_fn(var) / strategy.num_replicas_in_sync - return lambda: opt.minimize(loss, var_list=[var]) - - def testIsInstance(self): - optimizer = create_lso(sgd_experimental.SGD()) - self.assertIsInstance(optimizer, - loss_scale_optimizer.BaseLossScaleOptimizer) - - optimizer = create_lso(gradient_descent.SGD()) - self.assertIsInstance(optimizer, - loss_scale_optimizer.BaseLossScaleOptimizer) - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testFixedLossScaleAppliedToLossWithMinimize(self, opt_cls, strategy_fn, - use_tf_function): - with strategy_fn().scope() as strategy: - var = tf.Variable([5.0]) - opt = create_sgd(opt_cls, 2.0) - loss_scale = 10. - opt = create_lso(opt, dynamic=False, initial_scale=loss_scale) - self.assertEqual(self.evaluate(opt.loss_scale), loss_scale) - self.assertIsInstance(opt.loss_scale, tf.Tensor) - # We need num_replicas_in_sync to divide loss_scale, otherwise loss_scale - # / strategy.num_replicas_in_sync will not be exact, which could lead to - # assertion failures due to rounding issues. - self.assertEqual(loss_scale % strategy.num_replicas_in_sync, 0) - run_fn = self._run_fn_with_grad_check( - strategy, var, opt, loss_scale / strategy.num_replicas_in_sync) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # The loss is the identity of the variable. Therefore the gradient is 1, - # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3 - self.assertAllClose([3.], self.evaluate(var)) - - def testFixedLossScaleAppliedToLossWithGetGradients(self): - with tf.Graph().as_default(): - var = tf.Variable([2.0]) - opt = gradient_descent.SGD(1.0) - loss_scale = 10. - opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, - initial_scale=loss_scale) - grad_check_fn = mp_test_util.create_identity_with_grad_check_fn( - loss_scale) - loss = grad_check_fn(var) - run_op = opt.get_gradients(loss, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # This will cause an assertion to run, as - # mp_test_util.create_identity_with_grad_check_fn added an assertion op. - self.evaluate(run_op) - - @test_combinations.generate(opt_combinations_only()) - def testDynamicAttrsWithFixedLossScale(self, opt_cls): - opt = create_sgd(opt_cls) - opt = create_lso(opt, dynamic=False, initial_scale=2.) - self.assertFalse(opt.dynamic) - self.assertIsNone(opt.dynamic_counter) - self.assertIsNone(opt.dynamic_growth_steps) - - @test_combinations.generate(opt_combinations_only()) - def testGetScaledLoss(self, opt_cls): - opt = create_sgd(opt_cls) - opt = create_lso(opt, dynamic=False, initial_scale=2.) - loss = tf.convert_to_tensor(5.) - self.assertEqual(10., self.evaluate(opt.get_scaled_loss(loss))) - self.assertEqual(10., self.evaluate(opt.get_scaled_loss(lambda: loss)())) - loss = tf.convert_to_tensor(5., dtype='float16') - self.assertEqual(10., self.evaluate(opt.get_scaled_loss(loss))) - self.assertEqual(10., self.evaluate(opt.get_scaled_loss(lambda: loss)())) - - @test_combinations.generate(opt_combinations_only()) - def testGetUnscaledGradients(self, opt_cls): - opt = create_sgd(opt_cls) - opt = create_lso(opt, dynamic=False, initial_scale=2) - scaled_grads = [ - tf.convert_to_tensor(3.), None, - tf.convert_to_tensor(-4., dtype='float16') - ] - grads = opt.get_unscaled_gradients(scaled_grads) - grads = [self.evaluate(g) if g is not None else g for g in grads] - self.assertEqual([1.5, None, -2.], grads) - - @test_combinations.generate(opt_combinations_only()) - def testGetUnscaledSparseGradients(self, opt_cls): - opt = create_sgd(opt_cls) - opt = create_lso(opt, dynamic=False, initial_scale=2) - sparse_scaled_grad = tf.IndexedSlices( - tf.convert_to_tensor([[4., 2.], [8., 5.]]), - tf.convert_to_tensor([1, 3], dtype='int32'), - dense_shape=tf.convert_to_tensor([5, 2], dtype='int32')) - sparse_grad = opt.get_unscaled_gradients([sparse_scaled_grad])[0] - self.assertIsInstance(sparse_grad, tf.IndexedSlices) - self.assertAllEqual([[2., 1.], [4., 2.5]], - self.evaluate(sparse_grad.values)) - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testDynamicLossScale(self, opt_cls, strategy_fn, use_tf_function): - strategy = strategy_fn() - learning_rate = 2. - expected_gradient = tf.Variable(learning_rate / - strategy.num_replicas_in_sync) - with strategy.scope(): - var = tf.Variable([5.0]) - opt = create_sgd(opt_cls, learning_rate) - opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) - self.assertEqual(opt.initial_scale, 2.) - self.assertIsInstance(opt.initial_scale, float) - self.assertEqual(opt.dynamic_growth_steps, 1) - self.assertIsInstance(opt.dynamic_growth_steps, int) - - self.assertEqual(opt.initial_scale % strategy.num_replicas_in_sync, 0) - run_fn = self._run_fn_with_grad_check(strategy, var, opt, - expected_gradient) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # The loss is the identity of the variable. Therefore the gradient is 1, - # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3 - self.assertAllClose([3.], self.evaluate(var)) - - # Loss scale will be double, so the expected gradient is also doubled. - self.evaluate(expected_gradient.assign( - 2 * learning_rate / strategy.num_replicas_in_sync)) - run_op = strategy.experimental_run(run_fn) - self._run_if_in_graph_mode(run_op) - # As before, the 2 is subtracted from the variable, making it's new value - # 1. - self.assertAllClose([1.], self.evaluate(var)) - - @test_combinations.generate(opt_combinations_only()) - def testDynamicLossScaleDefaultValues(self, opt_cls): - opt = create_sgd(opt_cls) - opt = create_lso(opt) - self.assertEqual(opt.initial_scale, 2 ** 15) - self.assertEqual(opt.dynamic_growth_steps, 2000) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(opt.loss_scale), 2 ** 15) - - # pylint: disable=cell-var-from-loop - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testClipping(self, opt_cls, strategy_fn, use_tf_function): - strategy = strategy_fn() - learning_rate = 2. - for clip_type in ('clipnorm', 'global_clipnorm', 'clipvalue'): - with strategy.scope(), self.subTest(clip_type=clip_type): + # Test attributes on the optimizer + self.assertEqual(self.evaluate(opt.learning_rate), 2.0) + self.assertEqual(self.evaluate(opt.inner_optimizer.learning_rate), 2.0) + self.assertEqual( + self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5 + ) + self.assertEqual(self.evaluate(opt.loss_scale), 2.0) + self.assertEqual(opt.initial_scale, 2.0) + self.assertIsNone(opt.dynamic_growth_steps) + self.assertIsNone(opt.dynamic_counter) + self.assertFalse(opt.dynamic) + + # Ensure the optimizer can be used var = tf.Variable([5.0]) - opt = create_sgd(opt_cls, learning_rate, **{clip_type: 2.0}) - opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) - if isinstance(opt, loss_scale_optimizer.LossScaleOptimizer): - # Only OptimizerV2 exposes the clipping attributes - self.assertEqual(getattr(opt, clip_type), 2.0) - self.assertEqual(opt.initial_scale % strategy.num_replicas_in_sync, 0) - - loss = lambda: var * 4 / strategy.num_replicas_in_sync - run_fn = lambda: opt.minimize(loss, var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - - # Test running with clipped gradients - run_op = strategy.experimental_run(run_fn) + run_op = self._run_fn_with_grad_check( + tf.distribute.get_strategy(), var, opt, 2 + )() self.evaluate(tf.compat.v1.global_variables_initializer()) self._run_if_in_graph_mode(run_op) - # The gradient is 4 but is clipped to 2, so the variable will be - # init_val - clipped_grad * lr == 5 - 2 * 2 == 1 - self.assertAllClose([1.], self.evaluate(var)) - self.assertEqual(self.evaluate(opt.loss_scale), 4) - - if isinstance(opt, loss_scale_optimizer.LossScaleOptimizerV3): - # Only OptimizerV2 exposes the clipping attributes, so we cannot set - # them on the new optimizer - return - # Test changing the clip amount and running again - setattr(opt, clip_type, 3.0) - run_op = strategy.experimental_run(run_fn) + self.assertEqual(self.evaluate(var), [3.0]) + + @test_combinations.generate( + test_combinations.combine(config_version=["v2", "tf2_3"]) + + test_combinations.combine(config_version="v3", mode="eager") + ) + def testGetConfigDynamic(self, config_version): + # Get a config from LossScaleOptimizer, LossScaleOptimizerV3, or the + # LossScaleOptimizer from TF 2.3. Then restore the config into a + # LossScaleOptimizer or LossScaleOptimizerV3 + if config_version == "v2": + opt = gradient_descent.SGD(2.0, momentum=0.5) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, initial_scale=2, dynamic_growth_steps=3 + ) + config = opt.get_config() + opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) + elif config_version == "v3": + opt = sgd_experimental.SGD(2.0, momentum=0.5) + opt = loss_scale_optimizer.LossScaleOptimizerV3( + opt, initial_scale=2, dynamic_growth_steps=3 + ) + config = opt.get_config() + opt = loss_scale_optimizer.LossScaleOptimizerV3.from_config(config) + else: + self.assertEqual(config_version, "tf2_3") + config = { + "optimizer": { + "class_name": "SGD", + "config": { + "learning_rate": 2.0, + "momentum": 0.5, + "decay": 0.0, + "nesterov": False, + "name": "SGD", + }, + }, + "loss_scale": { + "class_name": "DynamicLossScale", + "config": { + "initial_loss_scale": 2.0, + "increment_period": 3, + "multiplier": 2.0, + }, + }, + } + opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) + + # Force hyperparameters to be created + opt.learning_rate + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Test attributes on the optimizer + self.assertEqual(self.evaluate(opt.learning_rate), 2.0) + self.assertEqual(self.evaluate(opt.inner_optimizer.learning_rate), 2.0) + self.assertEqual( + self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5 + ) + self.assertEqual(self.evaluate(opt.loss_scale), 2.0) + self.assertEqual(opt.initial_scale, 2.0) + self.assertEqual(opt.dynamic_growth_steps, 3.0) + self.assertTrue(opt.dynamic) + + # Ensure the optimizer can be used + var = tf.Variable([5.0]) + run_op = self._run_fn_with_grad_check( + tf.distribute.get_strategy(), var, opt, 2 + )() + self.evaluate(tf.compat.v1.global_variables_initializer()) self._run_if_in_graph_mode(run_op) - # The gradient is 4 but is clipped to 3, so the variable will be - # prev_var - clipped_grad * lr == 1 - 3 * 2 == -5 - self.assertAllClose([-5.], self.evaluate(var)) - self.assertEqual(self.evaluate(opt.loss_scale), 8) - - # Test Inf gradients are still skipped instead of being clipped - loss = lambda: var * float('Inf') - run_fn = lambda: opt.minimize(loss, var_list=[var]) - run_op = strategy.experimental_run(run_fn) + self.assertEqual(self.evaluate(var), [3.0]) + self.assertEqual(self.evaluate(opt.dynamic_counter), 1) + + def test_from_config_with_invalid_multiplier(self): + config = { + "optimizer": { + "class_name": "SGD", + "config": { + "learning_rate": 2.0, + "momentum": 0.5, + "decay": 0.0, + "nesterov": False, + "name": "SGD", + }, + }, + "loss_scale": { + "class_name": "DynamicLossScale", + "config": { + "initial_loss_scale": 2.0, + "increment_period": 3, + "multiplier": 4.0, + }, + }, + } + + expected_error = ( + "Cannot deserialize LossScaleOptimizer with a " + "DynamicLossScale whose multiplier is not 2. Got " + "DynamicLossScale: DynamicLossScale\\(" + ) + with self.assertRaisesRegex(ValueError, expected_error): + loss_scale_optimizer.LossScaleOptimizer.from_config(config) + + @test_combinations.generate( + test_combinations.combine(lso_type=["v1", "v2"]) + + test_combinations.combine(lso_type="v3", mode="eager") + ) + def testSerializationWithBuiltInOptimizer(self, lso_type): + if lso_type in ("v1", "v2"): + opt = gradient_descent.SGD(2.0, momentum=0.5) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, initial_scale=2.0, dynamic_growth_steps=3.0 + ) + config = optimizers.serialize(opt) + if lso_type == "v1": + # LossScaleOptimizerV1 was an older experimental version of LSO + # that is now deleted. The config had the same format as LSO but + # the class name was different. This tests that LSO V1 configs + # can still be deserialized, which are deserialized as a + # (non-V1) LSO + config["class_name"] = "LossScaleOptimizerV1" + else: + opt = sgd_experimental.SGD(2.0, momentum=0.5) + opt = loss_scale_optimizer.LossScaleOptimizerV3( + opt, initial_scale=2.0, dynamic_growth_steps=3 + ) + config = optimizers.serialize(opt) + opt = optimizers.deserialize(config) + # Force hyperparameters to be created + opt.learning_rate + self.evaluate(tf.compat.v1.global_variables_initializer()) + + self.assertEqual(self.evaluate(opt.learning_rate), 2.0) + self.assertEqual( + self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5 + ) + self.assertEqual(self.evaluate(opt.loss_scale), 2.0) + self.assertEqual(opt.dynamic_growth_steps, 3.0) + self.assertTrue(opt.dynamic) + if lso_type in ("v1", "v2"): + self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer) + else: + self.assertEqual( + type(opt), loss_scale_optimizer.LossScaleOptimizerV3 + ) + + # Ensure the optimizer can be used + var = tf.Variable([5.0]) + run_op = self._run_fn_with_grad_check( + tf.distribute.get_strategy(), var, opt, 2 + )() + self.evaluate(tf.compat.v1.global_variables_initializer()) self._run_if_in_graph_mode(run_op) - self.assertAllClose([-5.], self.evaluate(var)) # Var does not change - self.assertEqual(self.evaluate(opt.loss_scale), 4) - # pylint: enable=cell-var-from-loop - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testDynamicUpdate(self, opt_cls, strategy_fn, use_tf_function): - with strategy_fn().scope() as strategy: - var = tf.Variable([1.0, 2.0]) - opt = create_sgd(opt_cls, 1.0) - opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) - - # Test optimizer with finite gradients - loss = lambda: var * 2.0 / strategy.num_replicas_in_sync - run_fn = lambda: opt.minimize(loss, var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # Gradient is 2, so variable will have 2 subtracted from it - self.assertAllClose([-1.0, 0.0], self.evaluate(var)) - # Loss scale has doubled from 2 to 4 - self.assertEqual(4., self.evaluate(opt.loss_scale)) - - # Test optimizer with NaN gradients - loss = lambda: var * float('NaN') - run_fn = lambda: opt.minimize(loss, var_list=[var]) - run_op = strategy.experimental_run(run_fn) - self._run_if_in_graph_mode(run_op) - # Variable should not change from before, due to NaN gradients. - self.assertAllClose(self.evaluate(var), [-1.0, 0.0]) - # Loss scale should half due to NaN gradients. - self.assertEqual(2., self.evaluate(opt.loss_scale)) - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testDynamicLossScaleWithFloat16Loss(self, opt_cls, strategy_fn, - use_tf_function): - strategy = strategy_fn() - learning_rate = 2. - with strategy.scope(): - var = tf.Variable([5.0]) - opt = create_sgd(opt_cls, learning_rate) - opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=1) - - def loss(): - return tf.cast(var / strategy.num_replicas_in_sync, 'float16') - run_fn = lambda: opt.minimize(loss, var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # The loss is the identity of the variable. Therefore the gradient is 1, - # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3 - self.assertAllClose([3.], self.evaluate(var)) - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testNanOnOneReplicaOnly(self, opt_cls, strategy_fn, use_tf_function): - if strategy_fn == default_strategy_fn: - self.skipTest('The test is only useful for non-default strategies') - if not tf.test.is_gpu_available(): - self.skipTest('Test requires GPU') - if (not tf.executing_eagerly() and - not tf.compat.v1.control_flow_v2_enabled()): - self.skipTest('b/181283011: GradientTape does not work properly with ' - 'V1 control flow, and opt.minimize uses GradientTape') - with strategy_fn().scope() as strategy: - var = tf.Variable([1.0, 2.0]) - opt = create_sgd(opt_cls, 1.0) - opt = create_lso(opt, initial_scale=2, dynamic_growth_steps=2) - - def loss(): - rep_id = (tf.distribute.get_replica_context().replica_id_in_sync_group) - # The last element of last replica's gradient is NaN. - return tf.cond( - tf.equal(rep_id, 0), lambda: var * 2., - lambda: var * tf.constant([1., float('NaN')])) - run_fn = lambda: opt.minimize(loss, var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # Variable should not change from before, due to NaN gradients. - self.assertAllClose(self.evaluate(var), [1.0, 2.0]) - # Loss scale should half due to NaN gradients. - self.assertEqual(1., self.evaluate(opt.loss_scale)) - - def testCustomAggregater(self): - def gradient_aggregator(grads_and_vars): - # Simulate an all-reduce where a replica has a NaN gradient by setting - # the last gradient to NaN - grads_and_vars = list(grads_and_vars) - last_grad, last_var = grads_and_vars[-1] - grads_and_vars[-1] = (last_grad * float('NaN'), last_var) - return grads_and_vars - - var = tf.Variable([1.0, 2.0]) - opt = gradient_descent.SGD(1.0, gradient_aggregator=gradient_aggregator) - opt = loss_scale_optimizer.LossScaleOptimizer(opt, initial_scale=2, - dynamic_growth_steps=2) - - loss = lambda: var * 2 - run_op = opt.minimize(loss, var_list=[var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # Variable should not change from before, due to NaN gradients. - self.assertAllClose(self.evaluate(var), [1.0, 2.0]) - # Loss scale should half due to NaN gradients. - self.assertEqual(1., self.evaluate(opt.loss_scale)) - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testDynamicLossScaleWithSlots(self, opt_cls, strategy_fn, - use_tf_function): - strategy_obj = strategy_fn() - if (isinstance(strategy_obj, tf.distribute.MirroredStrategy) and - tf.compat.v1.control_flow_v2_enabled() and - not tf.executing_eagerly()): - self.skipTest('b/138667997') - with strategy_obj.scope() as strategy: - var = tf.Variable([1.0, 2.0]) - # An SGD optimizer with momentum has slot variables. - opt = create_sgd(opt_cls, 1.0, momentum=1.) - initial_scale = 2. - opt = create_lso(opt, initial_scale=initial_scale, dynamic_growth_steps=1) - loss = lambda: var / strategy.num_replicas_in_sync - run_fn = lambda: opt.minimize(loss, var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - # The momentum accumulator starts at 0 and the gradient is 1. The - # accumulator is incremented by the gradient, so it is now 1. Then the - # variable is subtracted by the accumulator, so the variable is subtracted - # by 1. - self.assertAllClose([0.0, 1.0], self.evaluate(var)) - self.assertEqual(self.evaluate(opt.loss_scale), initial_scale * 2) - - run_op = strategy.experimental_run(run_fn) - self._run_if_in_graph_mode(run_op) - # The momentum accumulator was 1 before this step and the gradient is 1. - # The accumulator is incremented by the gradient, so it is now 2. Then the - # variable is subtracted by the accumulator, so the variable is subtracted - # by 2. - self.assertAllClose([-2., -1.], self.evaluate(var)) - self.assertEqual(self.evaluate(opt.loss_scale), initial_scale * 4) - - if isinstance(opt, loss_scale_optimizer.LossScaleOptimizer): - self.assertEqual(opt.get_slot_names(), ['momentum']) - - def testIterations(self): - opt = gradient_descent.SGD(2.0) - lso = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, - initial_scale=10.) - lso.iterations = 7 - self.assertEqual(lso.iterations, 7) - self.assertEqual(opt.iterations, 7) - - @test_combinations.generate(opt_and_strategy_and_mode_combinations()) - def testIterationsIncremented(self, opt_cls, strategy_fn, use_tf_function): - with strategy_fn().scope() as strategy: - # Test iterations is incremented in opt.minimize. - opt = create_sgd(opt_cls, 1.0) - opt = create_lso(opt) - var = tf.Variable([5.0]) - loss = lambda: var * 2.0 / strategy.num_replicas_in_sync - run_fn = lambda: opt.minimize(loss, [var]) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - self.assertEqual(self.evaluate(var), 3.0) # Grad is 2, so var is 5 - 2 - self.assertEqual(self.evaluate(opt.iterations), 1) - - # Test iterations is incremented in opt.minimize even if gradients aren't - # applied to variables due to NaN gradients. - loss = lambda: var * float('NaN') - run_fn = lambda: opt.minimize(loss, [var]) - if use_tf_function: - run_fn = tf.function(run_fn) - run_op = strategy.experimental_run(run_fn) - self._run_if_in_graph_mode(run_op) - self.assertEqual(self.evaluate(var), 3.0) - self.assertEqual(self.evaluate(opt.iterations), 2) - - def testWeightMethods(self): - with self.test_session(): - var = tf.Variable([1.0]) - opt = gradient_descent.SGD(1.0) - opt = loss_scale_optimizer.LossScaleOptimizer(opt, initial_scale=2., - dynamic_growth_steps=1) - run_op = opt.minimize(lambda: var * 2, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - - self.assertLen(opt.weights, 1) # The 'iterations' weight - self.assertEqual(self.evaluate(opt.weights[0]), 1) - self.assertEqual(opt.get_weights()[0], 1) - self.assertEqual(self.evaluate(opt.variables()[0]), 1) - opt.set_weights([np.array(2.)]) - self.assertEqual(self.evaluate(opt.variables()[0]), 2) - - def testHyperParametersExposed(self): - with self.cached_session(): - opt = adam.Adam(learning_rate=1.0, beta_1=0.5, beta_2=0.9) - lso = loss_scale_optimizer.LossScaleOptimizer(opt) - # Force hyperparameters to be created - opt.lr # pylint: disable=pointless-statement - self.evaluate(tf.compat.v1.global_variables_initializer()) - - self.assertEqual(self.evaluate(lso.beta_1), 0.5) - self.assertIsInstance(lso.beta_1, tf.Variable) - self.assertEqual(self.evaluate(lso.lr), 1.0) - self.assertIs(lso.lr, opt.lr) - self.assertIs(lso.lr, lso.learning_rate) - - lso.beta_1 = 0.25 - self.assertEqual(self.evaluate(lso.beta_1), 0.25) - self.assertEqual(self.evaluate(opt.beta_1), 0.25) - self.assertIs(lso.beta_1, opt.beta_1) - opt.beta_1 = 0.75 - self.assertEqual(self.evaluate(lso.beta_1), 0.75) - self.assertEqual(self.evaluate(opt.beta_1), 0.75) - self.assertIs(lso.beta_1, opt.beta_1) - lso.lr = 2.0 - self.assertEqual(self.evaluate(lso.lr), 2.0) - self.assertEqual(self.evaluate(lso.learning_rate), 2.0) - self.assertEqual(self.evaluate(opt.lr), 2.0) - self.assertEqual(self.evaluate(opt.learning_rate), 2.0) - self.assertIs(lso.lr, opt.lr) - - # Test setting attribute that is both attribute on LossScaleOptimizer and - # hyperparameter on wrapped optimizer. - class MyOpt(gradient_descent.SGD): - - def __init__(self): - super().__init__() - self._set_hyper('loss_scale', 123.) - - opt = MyOpt() - lso = loss_scale_optimizer.LossScaleOptimizer(opt) - with self.assertRaises(AttributeError): - lso.loss_scale = 2. - - @test_combinations.generate(opt_combinations_only()) - def testArbitraryAttributesNotExposed(self, opt_cls): - opt = create_sgd(opt_cls) - lso = create_lso(opt) - self.assertFalse(opt.nesterov) - with self.assertRaisesRegex( - AttributeError, - "'LossScaleOptimizer(V3)?' object has no attribute 'nesterov'"): - lso.nesterov # pylint: disable=pointless-statement - - lso.nesterov = True - self.assertTrue(lso.nesterov) - self.assertFalse(opt.nesterov) - - def testDir(self): - lso = loss_scale_optimizer.LossScaleOptimizer(gradient_descent.SGD()) - dir_result = dir(lso) - self.assertIn('learning_rate', dir_result) # Hyperparameter - self.assertIn('lr', dir_result) # Hyperparameter - self.assertIn('minimize', dir_result) # Attribute - self.assertIn('loss_scale', dir_result) # Attribute - self.assertNotIn('nesterov', dir_result) # Attribute on inner optimizer - self.assertIn('nesterov', dir(lso.inner_optimizer)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testApplyGradientsGetsUnwrappedTensors(self): - # Tests that gradients passed to apply_gradients are not wrapped in a - # DistributionStrategy wrapper, such as PerReplica, but instead are raw - # Tensors. Optimizer subclasses that override apply_gradients() expect raw - # Tensors, even though the base Optimizer can handle PerReplica gradients. - - outer_self = self - - class MyOptimizer(gradient_descent.SGD): - - def apply_gradients(self, - grads_and_vars, - name=None, - experimental_aggregate_gradients=True): - for grad, _ in grads_and_vars: - outer_self.assertIsInstance(grad, tf.Tensor) - return super().apply_gradients(grads_and_vars, name, - experimental_aggregate_gradients) - - with create_mirrored_strategy().scope() as strategy: - var = tf.Variable([5.0]) - opt = MyOptimizer(learning_rate=1.0) - opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, - initial_scale=1) - loss = lambda: var * 2.0 - run_fn = lambda: opt.minimize(loss, [var]) - strategy.experimental_run(run_fn) - - @test_combinations.generate( - test_combinations.combine(mode='eager', use_tf_function=[False, True])) - def testApplyGradientsGetsUnwrappedTensorsWithNewOptimizer( - self, use_tf_function): - outer_self = self - - class MyOptimizer(sgd_experimental.SGD): - - def apply_gradients(self, - grads_and_vars, - skip_gradients_aggregation=False): - for grad, _ in grads_and_vars: - outer_self.assertIsInstance(grad, tf.Tensor) - return super().apply_gradients(grads_and_vars, - skip_gradients_aggregation) - - with create_mirrored_strategy().scope() as strategy: - var = tf.Variable([5.0]) - opt = MyOptimizer(learning_rate=1.0) - opt = loss_scale_optimizer.LossScaleOptimizerV3( - opt, dynamic=False, initial_scale=1) - loss = lambda: var * 2.0 - run_fn = lambda: opt.minimize(loss, [var]) - if use_tf_function: - run_fn = tf.function(run_fn) - strategy.experimental_run(run_fn) - - @test_combinations.generate(opt_combinations_only()) - def testLossScaleDelegationWithWrapper(self, opt_cls): - # Test learning_rate is exposed when LossScaleOptimizer wraps another - # wrapper. - - class MyOptimizer(opt_cls): - - def __init__(self): - super().__init__('MyOptimizer') - self.inner_optimizer = create_sgd(opt_cls, learning_rate=1.0) - - @property - def learning_rate(self): - return self.inner_optimizer.learning_rate - - @learning_rate.setter - def learning_rate(self, value): - self.inner_optimizer.learning_rate = value - - def get_config(self): - return {} - - with self.cached_session(): - opt = MyOptimizer() - opt = create_lso(opt) - - # Force hyperparameters to be created - opt.learning_rate # pylint: disable=pointless-statement - self.evaluate(tf.compat.v1.global_variables_initializer()) - - self.assertEqual(self.evaluate(opt.learning_rate), 1.0) - self.assertEqual( - self.evaluate(opt.inner_optimizer.inner_optimizer.learning_rate), 1.0) - opt.learning_rate = 2.0 - self.assertEqual(self.evaluate(opt.learning_rate), 2.0) - self.assertEqual(self.evaluate( - opt.inner_optimizer.inner_optimizer.learning_rate), 2.0) - - @test_combinations.generate( - test_combinations.combine( - opt_cls=optimizer_v2.OptimizerV2, - strategy_fn=STRATEGY_FNS, - mode=['graph', 'eager'], - use_tf_function=False, - save_with_ls=[False, True], - restore_with_ls=[False, True]) + test_combinations.combine( - opt_cls=optimizer_experimental.Optimizer, - strategy_fn=STRATEGY_FNS, - mode='eager', - use_tf_function=[False, True], - save_with_ls=[False, True], - restore_with_ls=[False, True])) - def testCheckpoint(self, opt_cls, strategy_fn, use_tf_function, save_with_ls, - restore_with_ls): - - if not save_with_ls and not restore_with_ls: - self.skipTest('Skipping because save_with_ls=False and ' - 'restore_with_ls=False, which means loss scaling is not ' - 'used') - - sgd_cls = type(create_sgd(opt_cls)) - - class MySGD(sgd_cls): - """A custom optimizer that tracks an extra variable.""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.my_var = tf.Variable(0.) - self._track_trackable(self.my_var, 'my_var') - - strategy = strategy_fn() - replicas = strategy.num_replicas_in_sync - if (isinstance(strategy, tf.distribute.MirroredStrategy) and - not tf.executing_eagerly()): - # TODO(b/121381184): Enable running the test in this case. - return - - with self.test_session(), strategy.scope(): - # Build and run a simple model. - var = tf.Variable([2.0]) - opt = inner_opt = MySGD(1., momentum=1.) - if save_with_ls: - opt = create_lso(opt, initial_scale=1., dynamic_growth_steps=2.) - run_fn = lambda: opt.minimize(lambda: var / replicas + 1., var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - opt_op = strategy.experimental_run(run_fn) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(strategy.experimental_local_results(opt_op)) - - # Assert values. - self.assertEqual(self.evaluate(var), 1.) - if save_with_ls: - self.assertEqual(self.evaluate(opt.loss_scale), 1.) + self.assertEqual(self.evaluate(var), [3.0]) self.assertEqual(self.evaluate(opt.dynamic_counter), 1) - if opt_cls == optimizer_v2.OptimizerV2: - slot_var = opt.get_slot(var, 'momentum') - self.assertEqual(self.evaluate(slot_var).item(), -1) - self.assertEqual(self.evaluate(opt.iterations), 1) - - # Set optimizer variable to check arbitrary optimizer attributes can be - # saved/restored - self.evaluate(inner_opt.my_var.assign(1.)) - - # Save a checkpoint. - checkpoint = tf.train.Checkpoint(optimizer=opt, var=var) - prefix = os.path.join(self.get_temp_dir(), 'ckpt') - save_path = checkpoint.save(prefix) - - # Create new model - var = tf.Variable([2.0]) - opt = inner_opt = MySGD(1., momentum=1.) - if restore_with_ls: - opt = create_lso(opt, initial_scale=1., dynamic_growth_steps=2.) - - # Restore new model. - checkpoint = tf.train.Checkpoint(optimizer=opt, var=var) - status = checkpoint.restore(save_path) - if save_with_ls: - status.assert_existing_objects_matched() - else: - status.assert_nontrivial_match() - - # Assert restored values. We can only assert in eager mode since the - # variables are uninitialized in graph mode - if tf.executing_eagerly(): - self.assertEqual(self.evaluate(var), 1.) - if save_with_ls and restore_with_ls: - self.assertEqual(self.evaluate(opt.loss_scale), 1.) - self.assertEqual(self.evaluate(opt.dynamic_counter), 1) - elif restore_with_ls: - self.assertEqual(self.evaluate(opt.loss_scale), 1.) - self.assertEqual(self.evaluate(opt.dynamic_counter), 0) - self.assertEqual(self.evaluate(opt.iterations), 1) - - # Run the model again. - run_fn = lambda: opt.minimize(lambda: var / replicas + 1., var_list=[var]) - if use_tf_function: - run_fn = tf.function(run_fn) - opt_op = strategy.experimental_run(run_fn) - - # Assert new values. - self.evaluate(tf.compat.v1.global_variables_initializer()) - status.run_restore_ops() - self.evaluate(strategy.experimental_local_results(opt_op)) - self.assertEqual(self.evaluate(var), -1) - if opt_cls == optimizer_v2.OptimizerV2: - slot_var = opt.get_slot(var, 'momentum') - self.assertEqual(self.evaluate(slot_var).item(), -2) - self.assertEqual(self.evaluate(opt.iterations), 2) - self.assertEqual(self.evaluate(inner_opt.my_var), 1) - - # Restore model again to test restoring after slots are created - status = checkpoint.restore(save_path) - if save_with_ls and restore_with_ls: - status.assert_consumed() - elif save_with_ls: - status.assert_existing_objects_matched() - elif restore_with_ls: - status.assert_nontrivial_match() - status.run_restore_ops() - self.assertEqual(self.evaluate(var), 1) - if opt_cls == optimizer_v2.OptimizerV2: - self.assertEqual(self.evaluate(slot_var).item(), -1) - - @test_combinations.generate( - test_combinations.combine(config_version=['v2', 'tf2_3']) + - test_combinations.combine(config_version='v3', mode='eager')) - def testGetConfigFixed(self, config_version): - # Get a config from LossScaleOptimizer, LossScaleOptimizerV3, or the - # LossScaleOptimizer from TF 2.3. Then restore the config into a - # LossScaleOptimizer or LossScaleOptimizerV3 - if config_version == 'v2': - opt = gradient_descent.SGD(2., momentum=0.5) - opt = loss_scale_optimizer.LossScaleOptimizer( - opt, dynamic=False, initial_scale=2) - config = opt.get_config() - opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) - elif config_version == 'v3': - opt = sgd_experimental.SGD(2., momentum=0.5) - opt = loss_scale_optimizer.LossScaleOptimizerV3( - opt, dynamic=False, initial_scale=2) - config = opt.get_config() - opt = loss_scale_optimizer.LossScaleOptimizerV3.from_config(config) - else: - self.assertEqual(config_version, 'tf2_3') - config = { - 'optimizer': { - 'class_name': 'SGD', - 'config': { - 'learning_rate': 2.0, - 'momentum': 0.5, - 'decay': 0.0, - 'nesterov': False, - 'name': 'SGD', - } - }, - 'loss_scale': { - 'class_name': 'FixedLossScale', - 'config': {'loss_scale_value': 2.0} - }, - } - opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) - - # Force hyperparameters to be created - opt.learning_rate # pylint: disable=pointless-statement - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Test attributes on the optimizer - self.assertEqual(self.evaluate(opt.learning_rate), 2.) - self.assertEqual(self.evaluate(opt.inner_optimizer.learning_rate), 2.) - self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5) - self.assertEqual(self.evaluate(opt.loss_scale), 2.) - self.assertEqual(opt.initial_scale, 2.) - self.assertIsNone(opt.dynamic_growth_steps) - self.assertIsNone(opt.dynamic_counter) - self.assertFalse(opt.dynamic) - - # Ensure the optimizer can be used - var = tf.Variable([5.0]) - run_op = self._run_fn_with_grad_check( - tf.distribute.get_strategy(), var, opt, 2)() - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - self.assertEqual(self.evaluate(var), [3.]) - - @test_combinations.generate( - test_combinations.combine(config_version=['v2', 'tf2_3']) + - test_combinations.combine(config_version='v3', mode='eager')) - def testGetConfigDynamic(self, config_version): - # Get a config from LossScaleOptimizer, LossScaleOptimizerV3, or the - # LossScaleOptimizer from TF 2.3. Then restore the config into a - # LossScaleOptimizer or LossScaleOptimizerV3 - if config_version == 'v2': - opt = gradient_descent.SGD(2., momentum=0.5) - opt = loss_scale_optimizer.LossScaleOptimizer( - opt, initial_scale=2, dynamic_growth_steps=3) - config = opt.get_config() - opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) - elif config_version == 'v3': - opt = sgd_experimental.SGD(2., momentum=0.5) - opt = loss_scale_optimizer.LossScaleOptimizerV3( - opt, initial_scale=2, dynamic_growth_steps=3) - config = opt.get_config() - opt = loss_scale_optimizer.LossScaleOptimizerV3.from_config(config) - else: - self.assertEqual(config_version, 'tf2_3') - config = { - 'optimizer': { - 'class_name': 'SGD', - 'config': { - 'learning_rate': 2.0, - 'momentum': 0.5, - 'decay': 0.0, - 'nesterov': False, - 'name': 'SGD', - } - }, - 'loss_scale': { - 'class_name': 'DynamicLossScale', - 'config': { - 'initial_loss_scale': 2.0, - 'increment_period': 3, - 'multiplier': 2.0, - } - }, - } - opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) - - # Force hyperparameters to be created - opt.learning_rate # pylint: disable=pointless-statement - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Test attributes on the optimizer - self.assertEqual(self.evaluate(opt.learning_rate), 2.) - self.assertEqual(self.evaluate(opt.inner_optimizer.learning_rate), 2.) - self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5) - self.assertEqual(self.evaluate(opt.loss_scale), 2.) - self.assertEqual(opt.initial_scale, 2.) - self.assertEqual(opt.dynamic_growth_steps, 3.) - self.assertTrue(opt.dynamic) - - # Ensure the optimizer can be used - var = tf.Variable([5.0]) - run_op = self._run_fn_with_grad_check( - tf.distribute.get_strategy(), var, opt, 2)() - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - self.assertEqual(self.evaluate(var), [3.]) - self.assertEqual(self.evaluate(opt.dynamic_counter), 1) - - def test_from_config_with_invalid_multiplier(self): - config = { - 'optimizer': { - 'class_name': 'SGD', - 'config': { - 'learning_rate': 2.0, - 'momentum': 0.5, - 'decay': 0.0, - 'nesterov': False, - 'name': 'SGD', - } - }, - 'loss_scale': { - 'class_name': 'DynamicLossScale', - 'config': { - 'initial_loss_scale': 2.0, - 'increment_period': 3, - 'multiplier': 4.0, - } - }, - } - - expected_error = ('Cannot deserialize LossScaleOptimizer with a ' - 'DynamicLossScale whose multiplier is not 2. Got ' - 'DynamicLossScale: DynamicLossScale\\(') - with self.assertRaisesRegex(ValueError, expected_error): - loss_scale_optimizer.LossScaleOptimizer.from_config(config) - - @test_combinations.generate( - test_combinations.combine(lso_type=['v1', 'v2']) + - test_combinations.combine(lso_type='v3', mode='eager')) - def testSerializationWithBuiltInOptimizer(self, lso_type): - if lso_type in ('v1', 'v2'): - opt = gradient_descent.SGD(2., momentum=0.5) - opt = loss_scale_optimizer.LossScaleOptimizer( - opt, initial_scale=2., dynamic_growth_steps=3.) - config = optimizers.serialize(opt) - if lso_type == 'v1': - # LossScaleOptimizerV1 was an older experimental version of LSO that is - # now deleted. The config had the same format as LSO but the class - # name was different. This tests that LSO V1 configs can still be - # deserialized, which are deserialized as a (non-V1) LSO - config['class_name'] = 'LossScaleOptimizerV1' - else: - opt = sgd_experimental.SGD(2., momentum=0.5) - opt = loss_scale_optimizer.LossScaleOptimizerV3( - opt, initial_scale=2., dynamic_growth_steps=3) - config = optimizers.serialize(opt) - opt = optimizers.deserialize(config) - # Force hyperparameters to be created - opt.learning_rate # pylint: disable=pointless-statement - self.evaluate(tf.compat.v1.global_variables_initializer()) - - self.assertEqual(self.evaluate(opt.learning_rate), 2.) - self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5) - self.assertEqual(self.evaluate(opt.loss_scale), 2.) - self.assertEqual(opt.dynamic_growth_steps, 3.) - self.assertTrue(opt.dynamic) - if lso_type in ('v1', 'v2'): - self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer) - else: - self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizerV3) - - # Ensure the optimizer can be used - var = tf.Variable([5.0]) - run_op = self._run_fn_with_grad_check( - tf.distribute.get_strategy(), var, opt, 2)() - self.evaluate(tf.compat.v1.global_variables_initializer()) - self._run_if_in_graph_mode(run_op) - self.assertEqual(self.evaluate(var), [3.]) - self.assertEqual(self.evaluate(opt.dynamic_counter), 1) - - @test_combinations.generate(opt_combinations_only()) - def testSerializationWithCustomOptimizer(self, opt_cls): - sgd_cls = type(create_sgd(opt_cls)) - - class MySGD(sgd_cls): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.my_attribute = 123 - - opt = MySGD(2., momentum=0.5) - opt = create_lso(opt, initial_scale=2., dynamic_growth_steps=3.) - config = optimizers.serialize(opt) - custom_objects = {'MySGD': MySGD} - opt = optimizers.deserialize(config, custom_objects=custom_objects) - # Force hyperparameters to be created - opt.learning_rate # pylint: disable=pointless-statement - self.evaluate(tf.compat.v1.global_variables_initializer()) - - self.assertEqual(self.evaluate(opt.learning_rate), 2.) - self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5) - self.assertEqual(self.evaluate(opt.loss_scale), 2.) - self.assertEqual(opt.dynamic_growth_steps, 3.) - self.assertEqual(opt.inner_optimizer.my_attribute, 123) - - @test_combinations.generate(opt_combinations_only()) - def testUnsupportedStrategy(self, opt_cls): - strategy = tf.distribute.experimental.CentralStorageStrategy() - expected_error = ( - 'Loss scaling is not supported with the tf.distribute.Strategy: ' - 'CentralStorageStrategy. Try using a different Strategy, e.g. a ' - 'MirroredStrategy') - with strategy.scope(), self.assertRaisesRegex(ValueError, expected_error): - create_lso(create_sgd(opt_cls)) - opt = create_lso(create_sgd(opt_cls)) - with strategy.scope(): - var = tf.Variable(1.0) - loss = lambda: var * 2.0 - run_fn = lambda: opt.minimize(loss, [var]) - with self.assertRaisesRegex(ValueError, expected_error): - strategy.experimental_run(run_fn) - - @test_combinations.generate(opt_combinations_only()) - def testInvalidArgsWithFixedLossScale(self, opt_cls): - opt = create_sgd(opt_cls) - with self.assertRaisesRegex( - ValueError, '"initial_scale" must be specified if "dynamic" is False'): - create_lso(opt, dynamic=False) - opt = create_sgd(opt_cls) - with self.assertRaisesRegex( - ValueError, '"dynamic_growth_steps" must be None if "dynamic" is ' - 'False, but got: 2'): - create_lso(opt, dynamic=False, initial_scale=1, dynamic_growth_steps=2) - - @test_combinations.generate(opt_combinations_only()) - def testDynamicMustBeBool(self, opt_cls): - opt = create_sgd(opt_cls) - with self.assertRaisesRegex( - TypeError, '"dynamic" argument to LossScaleOptimizer.__init__ must be ' - "a bool, but got: 'dynamic'"): - create_lso(opt, 'dynamic') - - @test_combinations.generate(opt_combinations_only()) - def testScalingWarning(self, opt_cls): - var = tf.Variable(1.0) - lso = create_lso(create_sgd(opt_cls)) - with mock.patch.object(tf_logging, 'warning') as mock_warn: - lso.apply_gradients([(tf.constant(1.0), var)]) - self.assertIn( - 'You forgot to call LossScaleOptimizer.get_scaled_loss() and ' - 'LossScaleOptimizer.get_unscaled_gradients() before', - mock_warn.call_args_list[0][0][0]) - lso = create_lso(create_sgd(opt_cls)) - with mock.patch.object(tf_logging, 'warning') as mock_warn: - lso.get_scaled_loss(tf.constant(1.0)) - lso.apply_gradients([(tf.constant(1.0), var)]) - self.assertIn( - 'You forgot to call LossScaleOptimizer.get_unscaled_gradients() ' - 'before', - mock_warn.call_args_list[0][0][0]) - lso = create_lso(create_sgd(opt_cls)) - with mock.patch.object(tf_logging, 'warning') as mock_warn: - lso.get_unscaled_gradients([tf.constant(1.0)]) - lso.apply_gradients([(tf.constant(1.0), var)]) - self.assertIn( - 'You forgot to call LossScaleOptimizer.get_scaled_loss() before', - mock_warn.call_args_list[0][0][0]) - lso = create_lso(create_sgd(opt_cls)) - with mock.patch.object(tf_logging, 'warning') as mock_warn: - lso.get_scaled_loss(tf.constant(1.0)) - lso.get_unscaled_gradients([tf.constant(1.0)]) - lso.apply_gradients([(tf.constant(1.0), var)]) - mock_warn.assert_not_called() - - @test_combinations.generate(opt_combinations_only()) - def testErrorWhenNesting(self, opt_cls): - opt = create_sgd(opt_cls) - opt = create_lso(opt) - with self.assertRaisesRegex( - TypeError, 'LossScaleOptimizer cannot wrap another LossScaleOptimizer'): - create_lso(opt) - - @test_combinations.generate(opt_combinations_only()) - def testErrorWrappingSameOptimizerMultipleTimes(self, opt_cls): - inner_opt = create_sgd(opt_cls) - create_lso(inner_opt) - with self.assertRaisesRegex( - ValueError, - '"inner_optimizer" is already wrapped by a LossScaleOptimizer.'): - create_lso(inner_opt) - - def testErrorWhenWrappingNonOptimizer(self): - with self.assertRaisesRegex( - TypeError, - '"inner_optimizer" must be an instance of ' - '`tf.keras.optimizers.Optimizer` or ' - '`tf.keras.optimizers.experimental.Optimizer`, but got: 1'): - loss_scale_optimizer.BaseLossScaleOptimizer(1) - - def testErrorWhenWrappingLegacyKerasOptimizers(self): - sgd = legacy_sgd.SGD() - with self.assertRaisesRegex( - TypeError, 'not an instance of `tensorflow.python.keras.optimizers`'): - loss_scale_optimizer.BaseLossScaleOptimizer(sgd) - - def testErrorWhenV3LsoWrapsV2Optimizer(self): - sgd = gradient_descent.SGD() - with self.assertRaisesRegex( - TypeError, 'only the new experimental optimizer ' - 'defined in keras/optimizer_expeirmental/optimizer.py can be ' - 'passed'): - loss_scale_optimizer.LossScaleOptimizerV3(sgd) - - def testErrorWhenV2LsoWrapsV3Optimizer(self): - sgd = sgd_experimental.SGD() - with self.assertRaisesRegex( - TypeError, 'only the classic optimizers subclassing from ' - '`tf.keras.optimizers.Optimizer` can be passed'): - loss_scale_optimizer.LossScaleOptimizer(sgd) - - -if __name__ == '__main__': - tf.test.main() + + @test_combinations.generate(opt_combinations_only()) + def testSerializationWithCustomOptimizer(self, opt_cls): + sgd_cls = type(create_sgd(opt_cls)) + + class MySGD(sgd_cls): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.my_attribute = 123 + + opt = MySGD(2.0, momentum=0.5) + opt = create_lso(opt, initial_scale=2.0, dynamic_growth_steps=3.0) + config = optimizers.serialize(opt) + custom_objects = {"MySGD": MySGD} + opt = optimizers.deserialize(config, custom_objects=custom_objects) + # Force hyperparameters to be created + opt.learning_rate + self.evaluate(tf.compat.v1.global_variables_initializer()) + + self.assertEqual(self.evaluate(opt.learning_rate), 2.0) + self.assertEqual( + self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5 + ) + self.assertEqual(self.evaluate(opt.loss_scale), 2.0) + self.assertEqual(opt.dynamic_growth_steps, 3.0) + self.assertEqual(opt.inner_optimizer.my_attribute, 123) + + @test_utils.run_v2_only + def testConvertToLegacyOptimizer(self): + opt = sgd_experimental.SGD(1.0) + opt = loss_scale_optimizer.BaseLossScaleOptimizer(opt) + converted_opt = optimizers.convert_to_legacy_optimizer(opt) + self.assertEqual( + type(converted_opt), loss_scale_optimizer.LossScaleOptimizer + ) + + reference_opt = gradient_descent.SGD(1.0) + reference_opt = loss_scale_optimizer.BaseLossScaleOptimizer( + reference_opt + ) + self.assertEqual(converted_opt.get_config(), reference_opt.get_config()) + + # Test with a custom learning rate schedule + class CustomLRSchedule(learning_rate_schedule.LearningRateSchedule): + def __init__(self, initial_learning_rate): + self.initial_learning_rate = initial_learning_rate + + def __call__(self, step): + step = tf.cast(step, tf.float32) + return self.initial_learning_rate / (step + 1) + + def get_config(self): + return {"initial_learning_rate": self.initial_learning_rate} + + opt = sgd_experimental.SGD(CustomLRSchedule(1.0)) + opt = loss_scale_optimizer.BaseLossScaleOptimizer(opt) + converted_opt = optimizers.convert_to_legacy_optimizer(opt) + self.assertEqual( + type(converted_opt), loss_scale_optimizer.LossScaleOptimizer + ) + + reference_opt = gradient_descent.SGD(CustomLRSchedule(1.0)) + reference_opt = loss_scale_optimizer.BaseLossScaleOptimizer( + reference_opt + ) + self.assertEqual(converted_opt.get_config(), reference_opt.get_config()) + + @test_combinations.generate(opt_combinations_only()) + def testUnsupportedStrategy(self, opt_cls): + strategy = tf.distribute.experimental.CentralStorageStrategy() + expected_error = ( + "Loss scaling is not supported with the tf.distribute.Strategy: " + "CentralStorageStrategy. Try using a different Strategy, e.g. a " + "MirroredStrategy" + ) + with strategy.scope(), self.assertRaisesRegex( + ValueError, expected_error + ): + create_lso(create_sgd(opt_cls)) + opt = create_lso(create_sgd(opt_cls)) + with strategy.scope(): + var = tf.Variable(1.0) + loss = lambda: var * 2.0 + run_fn = lambda: opt.minimize(loss, [var]) + with self.assertRaisesRegex(ValueError, expected_error): + strategy.experimental_run(run_fn) + + @test_combinations.generate(opt_combinations_only()) + def testInvalidArgsWithFixedLossScale(self, opt_cls): + opt = create_sgd(opt_cls) + with self.assertRaisesRegex( + ValueError, + '"initial_scale" must be specified if "dynamic" is False', + ): + create_lso(opt, dynamic=False) + opt = create_sgd(opt_cls) + with self.assertRaisesRegex( + ValueError, + '"dynamic_growth_steps" must be None if "dynamic" is ' + "False, but got: 2", + ): + create_lso( + opt, dynamic=False, initial_scale=1, dynamic_growth_steps=2 + ) + + @test_combinations.generate(opt_combinations_only()) + def testDynamicMustBeBool(self, opt_cls): + opt = create_sgd(opt_cls) + with self.assertRaisesRegex( + TypeError, + '"dynamic" argument to LossScaleOptimizer.__init__ must be ' + "a bool, but got: 'dynamic'", + ): + create_lso(opt, "dynamic") + + @test_combinations.generate(opt_combinations_only()) + def testScalingWarning(self, opt_cls): + var = tf.Variable(1.0) + lso = create_lso(create_sgd(opt_cls)) + with mock.patch.object(tf_logging, "warning") as mock_warn: + lso.apply_gradients([(tf.constant(1.0), var)]) + self.assertIn( + "You forgot to call LossScaleOptimizer.get_scaled_loss() and " + "LossScaleOptimizer.get_unscaled_gradients() before", + mock_warn.call_args_list[0][0][0], + ) + lso = create_lso(create_sgd(opt_cls)) + with mock.patch.object(tf_logging, "warning") as mock_warn: + lso.get_scaled_loss(tf.constant(1.0)) + lso.apply_gradients([(tf.constant(1.0), var)]) + self.assertIn( + "You forgot to call " + "LossScaleOptimizer.get_unscaled_gradients() before", + mock_warn.call_args_list[0][0][0], + ) + lso = create_lso(create_sgd(opt_cls)) + with mock.patch.object(tf_logging, "warning") as mock_warn: + lso.get_unscaled_gradients([tf.constant(1.0)]) + lso.apply_gradients([(tf.constant(1.0), var)]) + self.assertIn( + "You forgot to call LossScaleOptimizer.get_scaled_loss() " + "before", + mock_warn.call_args_list[0][0][0], + ) + + @test_combinations.generate(opt_combinations_only()) + def testScalingNoWarning(self, opt_cls): + var = tf.Variable(1.0) + lso = create_lso(create_sgd(opt_cls)) + with mock.patch.object(tf_logging, "warning") as mock_warn: + lso.get_scaled_loss(tf.constant(1.0)) + lso.get_unscaled_gradients([tf.constant(1.0)]) + lso.apply_gradients([(tf.constant(1.0), var)]) + mock_warn.assert_not_called() + + @test_combinations.generate(opt_combinations_only()) + def testErrorWhenNesting(self, opt_cls): + opt = create_sgd(opt_cls) + opt = create_lso(opt) + with self.assertRaisesRegex( + TypeError, + "LossScaleOptimizer cannot wrap another LossScaleOptimizer", + ): + create_lso(opt) + + @test_combinations.generate(opt_combinations_only()) + def testErrorWrappingSameOptimizerMultipleTimes(self, opt_cls): + inner_opt = create_sgd(opt_cls) + create_lso(inner_opt) + with self.assertRaisesRegex( + ValueError, + '"inner_optimizer" is already wrapped by a LossScaleOptimizer.', + ): + create_lso(inner_opt) + + def testErrorWhenWrappingNonOptimizer(self): + with self.assertRaisesRegex( + TypeError, + '"inner_optimizer" must be an instance of ' + "`tf.keras.optimizers.Optimizer` or " + "`tf.keras.optimizers.experimental.Optimizer`, but got: 1", + ): + loss_scale_optimizer.BaseLossScaleOptimizer(1) + + def testErrorWhenV3LsoWrapsV2Optimizer(self): + sgd = gradient_descent.SGD() + with self.assertRaisesRegex( + TypeError, + "only the new experimental optimizer " + "defined in keras/optimizer_expeirmental/optimizer.py can be " + "passed", + ): + loss_scale_optimizer.LossScaleOptimizerV3(sgd) + + def testErrorWhenV2LsoWrapsV3Optimizer(self): + sgd = sgd_experimental.SGD() + with self.assertRaisesRegex( + TypeError, + "only the classic optimizers subclassing from " + "`tf.keras.optimizers.Optimizer` can be passed", + ): + loss_scale_optimizer.LossScaleOptimizer(sgd) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/mixed_precision_graph_rewrite_test.py b/keras/mixed_precision/mixed_precision_graph_rewrite_test.py index 8e36245621cf..6f8523393475 100644 --- a/keras/mixed_precision/mixed_precision_graph_rewrite_test.py +++ b/keras/mixed_precision/mixed_precision_graph_rewrite_test.py @@ -14,138 +14,167 @@ # ============================================================================== """Tests Keras integration with enable_mixed_precision_graph_rewrite().""" +import os + import tensorflow.compat.v2 as tf -import os +from keras.mixed_precision import ( + loss_scale_optimizer as loss_scale_optimizer_v2, +) +from keras.mixed_precision import policy +from keras.optimizers.legacy import gradient_descent as gradient_descent_v2 from keras.testing_infra import test_combinations from keras.testing_infra import test_utils -from keras.mixed_precision import loss_scale_optimizer as loss_scale_optimizer_v2 -from keras.mixed_precision import policy -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_v2 class MixedPrecisionTest(test_combinations.TestCase): - IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE' - - def setUp(self): - super().setUp() - # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass - # to ignore performance and always transform the graph. - self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR) - os.environ[self.IGNORE_PERF_VAR] = '1' - - def tearDown(self): - # Set the IGNORE_PERF_VAR variable back to it's original value. - if self._original_ignore_perf_value is not None: - os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value - else: - del os.environ[self.IGNORE_PERF_VAR] - - tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() - super().tearDown() - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_wrap_optimizer_fixed_loss_scale(self): - opt = gradient_descent_v2.SGD(1.0) - opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - opt, 123) - self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(opt.loss_scale), 123.) - self.assertFalse(opt.dynamic) - self.assertTrue(opt.initial_scale, 123.) - - opt = gradient_descent_v2.SGD(1.0) - opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - opt, tf.compat.v1.mixed_precision.FixedLossScale(123)) - self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(opt.loss_scale), 123.) - self.assertFalse(opt.dynamic) - self.assertTrue(opt.initial_scale, 123.) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_wrap_optimizer_dynamic_loss_scale(self): - opt = gradient_descent_v2.SGD(1.0) - opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - opt, 'dynamic') - self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(opt.loss_scale), 2. ** 15) - self.assertTrue(opt.dynamic) - self.assertTrue(opt.initial_scale, 2. ** 15) - self.assertTrue(opt.dynamic_growth_steps, 2000) - - opt = gradient_descent_v2.SGD(1.0) - opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - opt, tf.compat.v1.mixed_precision.DynamicLossScale( - initial_loss_scale=4, increment_period=1000)) - self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(opt.loss_scale), 4.) - self.assertTrue(opt.dynamic) - self.assertTrue(opt.initial_scale, 4.) - self.assertTrue(opt.dynamic_growth_steps, 1000) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_wrap_optimizer_dynamic_loss_scale_errors(self): - - opt = gradient_descent_v2.SGD(1.0) - with self.assertRaisesRegex( - ValueError, 'When passing a DynamicLossScale to "loss_scale", ' - 'DynamicLossScale.multiplier must be 2. Got: ' - 'DynamicLossScale'): - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - opt, tf.compat.v1.mixed_precision.DynamicLossScale(multiplier=4.)) - - class MyLossScale(tf.compat.v1.mixed_precision.LossScale): - - def __call__(self): - return 1. - - def update(self, grads): - return None, True - - def get_config(self): - return {} - - with self.assertRaisesRegex( - TypeError, 'Passing a LossScale that is not a FixedLossScale or a ' - 'DynamicLossScale is not supported. Got:'): - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - opt, MyLossScale()) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_optimizer_errors(self): - opt = gradient_descent_v2.SGD(1.0) - opt = loss_scale_optimizer_v2.LossScaleOptimizer(opt) - with self.assertRaisesRegex( - ValueError, '"opt" must not already be an instance of a ' - 'LossScaleOptimizer.'): - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt) - self.assertFalse(tf.config.optimizer.get_experimental_options() - .get('auto_mixed_precision', False)) - - @test_utils.enable_v2_dtype_behavior - def test_error_if_policy_is_set(self): - with policy.policy_scope('mixed_float16'): - with self.assertRaisesRegex(ValueError, - 'the global Keras dtype Policy has been set'): + IGNORE_PERF_VAR = "TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE" + + def setUp(self): + super().setUp() + # Enable the tests to be run on pre-Volta GPUs by telling the grappler + # pass to ignore performance and always transform the graph. + self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR) + os.environ[self.IGNORE_PERF_VAR] = "1" + + def tearDown(self): + # Set the IGNORE_PERF_VAR variable back to its original value. + if self._original_ignore_perf_value is not None: + os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value + else: + del os.environ[self.IGNORE_PERF_VAR] + + tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() + super().tearDown() + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_wrap_optimizer_fixed_loss_scale(self): + opt = gradient_descent_v2.SGD(1.0) + opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt, 123 + ) + self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(opt.loss_scale), 123.0) + self.assertFalse(opt.dynamic) + self.assertTrue(opt.initial_scale, 123.0) + + opt = gradient_descent_v2.SGD(1.0) + opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt, tf.compat.v1.mixed_precision.FixedLossScale(123) + ) + self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(opt.loss_scale), 123.0) + self.assertFalse(opt.dynamic) + self.assertTrue(opt.initial_scale, 123.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_wrap_optimizer_dynamic_loss_scale(self): + opt = gradient_descent_v2.SGD(1.0) + opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt, "dynamic" + ) + self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(opt.loss_scale), 2.0**15) + self.assertTrue(opt.dynamic) + self.assertTrue(opt.initial_scale, 2.0**15) + self.assertTrue(opt.dynamic_growth_steps, 2000) + + opt = gradient_descent_v2.SGD(1.0) + opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt, + tf.compat.v1.mixed_precision.DynamicLossScale( + initial_loss_scale=4, increment_period=1000 + ), + ) + self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(opt.loss_scale), 4.0) + self.assertTrue(opt.dynamic) + self.assertTrue(opt.initial_scale, 4.0) + self.assertTrue(opt.dynamic_growth_steps, 1000) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_wrap_optimizer_dynamic_loss_scale_errors(self): + + opt = gradient_descent_v2.SGD(1.0) + with self.assertRaisesRegex( + ValueError, + 'When passing a DynamicLossScale to "loss_scale", ' + "DynamicLossScale.multiplier must be 2. Got: " + "DynamicLossScale", + ): + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt, + tf.compat.v1.mixed_precision.DynamicLossScale(multiplier=4.0), + ) + + class MyLossScale(tf.compat.v1.mixed_precision.LossScale): + def __call__(self): + return 1.0 + + def update(self, grads): + return None, True + + def get_config(self): + return {} + + with self.assertRaisesRegex( + TypeError, + "Passing a LossScale that is not a FixedLossScale or a " + "DynamicLossScale is not supported. Got:", + ): + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt, MyLossScale() + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_optimizer_errors(self): + opt = gradient_descent_v2.SGD(1.0) + opt = loss_scale_optimizer_v2.LossScaleOptimizer(opt) + with self.assertRaisesRegex( + ValueError, + '"opt" must not already be an instance of a LossScaleOptimizer.', + ): + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + opt + ) + self.assertFalse( + tf.config.optimizer.get_experimental_options().get( + "auto_mixed_precision", False + ) + ) + + @test_utils.enable_v2_dtype_behavior + def test_error_if_policy_is_set(self): + with policy.policy_scope("mixed_float16"): + with self.assertRaisesRegex( + ValueError, "the global Keras dtype Policy has been set" + ): + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( # noqa: E501 + gradient_descent_v2.SGD(1.0) + ) + # Test no error is thrown when the policy is currently the default. tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - gradient_descent_v2.SGD(1.0)) - # Test no error is thrown when the policy is currently the default. - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - gradient_descent_v2.SGD(1.0)) - # Test no error is thrown when the policy is a non-mixed policy. - with policy.policy_scope('float64'): - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - gradient_descent_v2.SGD(1.0)) - - -if __name__ == '__main__': - tf.test.main() + gradient_descent_v2.SGD(1.0) + ) + # Test no error is thrown when the policy is a non-mixed policy. + with policy.policy_scope("float64"): + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + gradient_descent_v2.SGD(1.0) + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/model_test.py b/keras/mixed_precision/model_test.py index 86c8187ec0ca..0663d589f336 100644 --- a/keras/mixed_precision/model_test.py +++ b/keras/mixed_precision/model_test.py @@ -14,19 +14,16 @@ # ============================================================================== """Tests keras.Model works properly with mixed precision.""" -import tensorflow.compat.v2 as tf - import os +import numpy as np +import tensorflow.compat.v2 as tf from absl import flags from absl.testing import parameterized -import numpy as np + from keras import backend -from keras.testing_infra import test_combinations from keras import layers from keras import models -from keras.optimizers import optimizer_v1 -from keras.testing_infra import test_utils from keras.applications import densenet from keras.applications import efficientnet from keras.applications import inception_resnet_v2 @@ -43,10 +40,14 @@ from keras.mixed_precision import loss_scale_optimizer from keras.mixed_precision import policy from keras.mixed_precision import test_util as mp_test_util -from keras.optimizers.optimizer_v2 import gradient_descent -from keras.saving import save -from keras.utils import generic_utils - +from keras.optimizers import optimizer_v1 +from keras.optimizers import sgd +from keras.optimizers.legacy import gradient_descent +from keras.saving import object_registration +from keras.saving.legacy import save +from keras.saving.serialization_lib import SafeModeScope +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils # If called outside any strategy.scope() calls, this will return the default # strategy. @@ -54,779 +55,989 @@ def create_mirrored_strategy(): - """Create a MirroredStrategy, using a GPU if it is available.""" - if tf.config.list_logical_devices('GPU'): - return tf.distribute.MirroredStrategy(['cpu:0', 'gpu:0']) - else: - return tf.distribute.MirroredStrategy(['cpu:0']) + """Create a MirroredStrategy, using a GPU if it is available.""" + if tf.config.list_logical_devices("GPU"): + return tf.distribute.MirroredStrategy(["cpu:0", "gpu:0"]) + else: + return tf.distribute.MirroredStrategy(["cpu:0"]) -TESTCASES = ({ - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn -}, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy -}) +TESTCASES = ( + {"testcase_name": "base", "strategy_fn": default_strategy_fn}, + {"testcase_name": "distribute", "strategy_fn": create_mirrored_strategy}, +) class KerasModelTest(test_combinations.TestCase): - """Test mixed precision with Keras models.""" - - def _skip_if_strategy_unsupported(self, strategy_fn): - if (strategy_fn != default_strategy_fn and - test_utils.get_model_type() == 'subclass'): - self.skipTest('Non-default strategies are unsupported with subclassed ' - 'models') - - def _skip_if_save_format_unsupported(self, save_format): - model_type = test_utils.get_model_type() - if save_format == 'h5' and model_type == 'subclass': - self.skipTest('Saving subclassed models with the HDF5 format is ' - 'unsupported') - if (save_format == 'tf' and model_type == 'subclass' and - not tf.executing_eagerly()): - self.skipTest('b/148820505: This combination of features is currently ' - 'broken.') - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }, { - 'testcase_name': 'operator', - 'strategy_fn': create_mirrored_strategy, - 'use_operator': True - }, { - 'testcase_name': 'regularizer', - 'strategy_fn': create_mirrored_strategy, - 'use_regularizer': True - }, { - 'testcase_name': 'get_config', - 'strategy_fn': create_mirrored_strategy, - 'get_config': True, - 'use_regularizer': True, - }, { - 'testcase_name': 'saved_model', - 'strategy_fn': default_strategy_fn, - 'save_format': 'tf', - 'use_regularizer': True, - }, { - 'testcase_name': 'saved_model_input_spec', - 'strategy_fn': default_strategy_fn, - 'save_format': 'tf', - 'use_regularizer': True, - 'use_input_spec': True, - }, { - 'testcase_name': 'h5', - 'strategy_fn': default_strategy_fn, - 'save_format': 'h5', - 'use_regularizer': True, - }, { - 'testcase_name': 'saved_model_distribute', - 'strategy_fn': create_mirrored_strategy, - 'save_format': 'tf', - 'use_regularizer': True, - }, { - 'testcase_name': 'saved_model_input_spec_distribute', - 'strategy_fn': create_mirrored_strategy, - 'save_format': 'tf', - 'use_regularizer': True, - 'use_input_spec': True, - }, { - 'testcase_name': 'h5_distribute', - 'strategy_fn': create_mirrored_strategy, - 'save_format': 'h5', - 'use_regularizer': True, - }) - def test_model(self, - strategy_fn, - use_operator=False, - use_regularizer=False, - policy_name='mixed_float16', - get_config=False, - save_format=None, - use_input_spec=False): - self._skip_if_strategy_unsupported(strategy_fn) - self._skip_if_save_format_unsupported(save_format) - if use_regularizer: - weight_regularizer = mp_test_util.IdentityRegularizer() - activity_regularizer = mp_test_util.ReduceSumRegularizer() - else: - weight_regularizer = activity_regularizer = None - with strategy_fn().scope(): - with policy.policy_scope(policy_name): - layer = mp_test_util.MultiplyLayer( - assert_type=tf.float16, - use_operator=use_operator, - regularizer=weight_regularizer, - activity_regularizer=activity_regularizer, - input_shape=(1,)) - if use_input_spec: - layer.input_spec = input_spec.InputSpec(shape=(None, 1)) - model = test_utils.get_model_from_layers([layer], input_shape=(1,), - input_dtype=tf.float16) - if get_config: - config = model.get_config() - model = model.__class__.from_config( - config, - custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer}) - (layer,) = (layer for layer in model.layers - if isinstance(layer, mp_test_util.MultiplyLayer)) - - def loss_fn(y_true, y_pred): - del y_true - return tf.reduce_mean(y_pred) - - # Learning rate is small enough that if applied to a float16 variable, - # the variable will not change. So this tests the learning rate not - # applied to a float16 value, but instead the float32 variable. - opt = gradient_descent.SGD(2**-14) - # Use a fixed loss scale, as this test will fail if gradients are - # skipped for a step due to dynamic loss scaling. - opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, - initial_scale=8) - model.compile( - opt, - loss=loss_fn, - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((2, 1)) - y = np.ones((2, 1)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) - model.fit(dataset) - # Variable starts at 1, and should have gradient of 2 ** -14 subtracted - # from it. - expected = 1 - 2**-14 - if use_regularizer: - # Weight and activity regularizer each add another 2 ** -14 to the - # gradient. - expected -= 2 * 2**-14 - self.assertEqual(backend.eval(layer.v), expected) - - if save_format: - with generic_utils.CustomObjectScope( - {'MultiplyLayer': mp_test_util.MultiplyLayer, 'loss_fn': loss_fn}): - self._test_saving(model, dataset, save_format, use_regularizer) - - def _test_saving(self, model, dataset, save_format, use_regularizer): - # Save and load model, asserting variable does not change - save_path = os.path.join(self.get_temp_dir(), 'model') - model.save(save_path, save_format=save_format) - model = save.load_model(save_path) - (layer,) = (layer for layer in model.layers - if 'MultiplyLayer' in layer.__class__.__name__) - expected = 1 - 2**-14 - if use_regularizer: - expected -= 2 * 2**-14 - self.assertEqual(backend.eval(layer.v), expected) - - # Continue training, and assert variable is correct value - model.fit(dataset) - new_expected = expected - 2 ** -14 - if use_regularizer: - new_expected -= 2 * 2 ** -14 - self.assertEqual(backend.eval(layer.v), new_expected) - - # Load saved model again, and assert variable is previous value - model = save.load_model(save_path) - (layer,) = (layer for layer in model.layers - if 'MultiplyLayer' in layer.__class__.__name__) - self.assertEqual(backend.eval(layer.v), expected) - - # Ensure various dtype-related aspects of the layer are correct - self.assertEqual(layer.dtype, 'float32') - self.assertEqual(layer.dtype_policy.name, 'mixed_float16') - self.assertEqual(layer.v.dtype, 'float32') - self.assertEqual(layer(np.ones((2, 1))).dtype, 'float16') - - self.assertEqual(type(model.dtype_policy), policy.Policy) - self.assertEqual(layer.get_config()['dtype'], - {'class_name': 'Policy', 'config': { - 'name': 'mixed_float16'}}) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }) - def test_fixed_loss_scaling(self, - strategy_fn): - # Note: We do not test mixed precision in this method, only loss scaling. - loss_scale = 8. - batch_size = 4 - with strategy_fn().scope(): - x = layers.Input(shape=(1,), batch_size=batch_size) - layer = mp_test_util.MultiplyLayer() - y = layer(x) - - # The gradient of 'y' at this point is 1. With loss scaling, the gradient - # is 'loss_scale'. We divide by the batch size since the loss is averaged - # across batch elements. - expected_gradient = loss_scale / batch_size - identity_with_grad_check_fn = ( - mp_test_util.create_identity_with_grad_check_fn([expected_gradient])) - y = core.Lambda(identity_with_grad_check_fn)(y) - model = models.Model(inputs=x, outputs=y) - - def loss_fn(y_true, y_pred): - del y_true - return tf.reduce_mean(y_pred) - - opt = gradient_descent.SGD(1.) - opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, - initial_scale=loss_scale) - model.compile( - opt, - loss=loss_fn, - run_eagerly=test_utils.should_run_eagerly()) - - self.assertEqual(backend.eval(layer.v), 1) - x = np.ones((batch_size, 1)) - y = np.ones((batch_size, 1)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) - model.fit(dataset) - # Variable starts at 1, and should have gradient of 1 subtracted from it. - expected = 0 - self.assertEqual(backend.eval(layer.v), expected) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }, { - 'testcase_name': 'loss_scaling', - 'strategy_fn': create_mirrored_strategy, - 'use_loss_scaling': True - }) - def test_advanced_model(self, strategy_fn, use_loss_scaling=False): - # The advanced model tests mixed-precision-related features that would occur - # in a resnet50 model. It tests a model that has: - # * Multiple layers, some which use auto-cast variables and some which do - # not - # * Regularization on some variables and not others. - # * A fixed loss scale (if use_loss_scaling is True) - - strategy = strategy_fn() - if use_loss_scaling: - loss_scale = 8. - learning_rate = 2**-14 - - with strategy.scope(): - with policy.policy_scope(policy.Policy('mixed_float16')): - x = layers.Input(shape=(1,), batch_size=2) - layer1 = mp_test_util.MultiplyLayer( - assert_type=tf.float16, - regularizer=mp_test_util.IdentityRegularizer(), - use_operator=True) - layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( - assert_type=tf.float16, use_operator=True) - layer3 = mp_test_util.MultiplyLayer(assert_type=tf.float16, - use_operator=False) - layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( - assert_type=tf.float16, - regularizer=mp_test_util.IdentityRegularizer(), - use_operator=False) - y = layer1(x) - y = layer2(y) - y = layer3(y) - y = layer4(y) - if use_loss_scaling: - # The gradient of 'y' at this point is 1. With loss scaling, the - # gradient is 'loss_scale'. We divide by the batch size of 2 since the - # loss is averaged across batch elements. - expected_gradient = loss_scale / 2 - identity_with_grad_check_fn = ( - mp_test_util.create_identity_with_grad_check_fn( - expected_dtype=tf.float16, - expected_gradient=[expected_gradient])) - y = core.Lambda(identity_with_grad_check_fn)(y) - model = models.Model(inputs=x, outputs=y) - - def loss_fn(y_true, y_pred): - del y_true - return tf.reduce_mean(y_pred) - - opt = gradient_descent.SGD(learning_rate) + """Test mixed precision with Keras models.""" + + def _skip_if_strategy_unsupported(self, strategy_fn): + if ( + strategy_fn != default_strategy_fn + and test_utils.get_model_type() == "subclass" + ): + self.skipTest( + "Non-default strategies are unsupported with subclassed models" + ) + + def _skip_if_save_format_unsupported(self, save_format): + model_type = test_utils.get_model_type() + if save_format == "h5" and model_type == "subclass": + self.skipTest( + "Saving subclassed models with the HDF5 format is unsupported" + ) + if ( + save_format == "tf" + and model_type == "subclass" + and not tf.executing_eagerly() + ): + self.skipTest( + "b/148820505: This combination of features is currently broken." + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + {"testcase_name": "base", "strategy_fn": default_strategy_fn}, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + { + "testcase_name": "operator", + "strategy_fn": create_mirrored_strategy, + "use_operator": True, + }, + { + "testcase_name": "regularizer", + "strategy_fn": create_mirrored_strategy, + "use_regularizer": True, + }, + { + "testcase_name": "get_config", + "strategy_fn": create_mirrored_strategy, + "get_config": True, + "use_regularizer": True, + }, + { + "testcase_name": "saved_model", + "strategy_fn": default_strategy_fn, + "save_format": "tf", + "use_regularizer": True, + }, + { + "testcase_name": "saved_model_input_spec", + "strategy_fn": default_strategy_fn, + "save_format": "tf", + "use_regularizer": True, + "use_input_spec": True, + }, + { + "testcase_name": "h5", + "strategy_fn": default_strategy_fn, + "save_format": "h5", + "use_regularizer": True, + }, + { + "testcase_name": "saved_model_distribute", + "strategy_fn": create_mirrored_strategy, + "save_format": "tf", + "use_regularizer": True, + }, + { + "testcase_name": "saved_model_legacy_distribute", + "strategy_fn": create_mirrored_strategy, + "save_format": "tf", + "use_regularizer": True, + "use_legacy_optimizer": True, + }, + { + "testcase_name": "saved_model_input_spec_distribute", + "strategy_fn": create_mirrored_strategy, + "save_format": "tf", + "use_regularizer": True, + "use_input_spec": True, + }, + { + "testcase_name": "h5_distribute", + "strategy_fn": create_mirrored_strategy, + "save_format": "h5", + "use_regularizer": True, + }, + { + "testcase_name": "h5_legacy_distribute", + "strategy_fn": create_mirrored_strategy, + "save_format": "h5", + "use_regularizer": True, + "use_legacy_optimizer": True, + }, + ) + def test_model( + self, + strategy_fn, + use_operator=False, + use_regularizer=False, + policy_name="mixed_float16", + get_config=False, + save_format=None, + use_input_spec=False, + use_legacy_optimizer=False, + ): + self._skip_if_strategy_unsupported(strategy_fn) + self._skip_if_save_format_unsupported(save_format) + if not tf.__internal__.tf2.enabled(): + # The non-legacy optimizer is only supported in TF2 + use_legacy_optimizer = True + if use_regularizer: + weight_regularizer = mp_test_util.IdentityRegularizer() + activity_regularizer = mp_test_util.ReduceSumRegularizer() + else: + weight_regularizer = activity_regularizer = None + with strategy_fn().scope(): + with policy.policy_scope(policy_name): + layer = mp_test_util.MultiplyLayer( + assert_type=tf.float16, + use_operator=use_operator, + regularizer=weight_regularizer, + activity_regularizer=activity_regularizer, + input_shape=(1,), + ) + if use_input_spec: + layer.input_spec = input_spec.InputSpec(shape=(None, 1)) + model = test_utils.get_model_from_layers( + [layer], input_shape=(1,), input_dtype=tf.float16 + ) + if get_config: + config = model.get_config() + model = model.__class__.from_config( + config, + custom_objects={ + "MultiplyLayer": mp_test_util.MultiplyLayer + }, + ) + (layer,) = ( + layer + for layer in model.layers + if isinstance(layer, mp_test_util.MultiplyLayer) + ) + + def loss_fn(y_true, y_pred): + del y_true + return tf.reduce_mean(y_pred) + + # Learning rate is small enough that if applied to a float16 + # variable, the variable will not change. So this tests the + # learning rate not applied to a float16 value, but instead the + # float32 variable. + learning_rate = 2**-14 + if use_legacy_optimizer: + opt = gradient_descent.SGD(learning_rate) + else: + opt = sgd.SGD(learning_rate) + # Use a fixed loss scale, as this test will fail if gradients + # are skipped for a step due to dynamic loss scaling. + opt = loss_scale_optimizer.BaseLossScaleOptimizer( + opt, dynamic=False, initial_scale=8 + ) + model.compile( + opt, + loss=loss_fn, + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((2, 1)) + y = np.ones((2, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + model.fit(dataset) + # Variable starts at 1, and should have gradient of 2 ** -14 subtracted + # from it. + expected = 1 - 2**-14 + if use_regularizer: + # Weight and activity regularizer each add another 2 ** -14 to the + # gradient. + expected -= 2 * 2**-14 + self.assertEqual(backend.eval(layer.v), expected) + + if save_format: + with object_registration.CustomObjectScope( + { + "MultiplyLayer": mp_test_util.MultiplyLayer, + "loss_fn": loss_fn, + } + ): + self._test_saving(model, dataset, save_format, use_regularizer) + + def _test_saving(self, model, dataset, save_format, use_regularizer): + # Save and load model, asserting variable does not change + save_path = os.path.join(self.get_temp_dir(), "model") + model.save(save_path, save_format=save_format) + model = save.load_model(save_path) + (layer,) = ( + layer + for layer in model.layers + if "MultiplyLayer" in layer.__class__.__name__ + ) + expected = 1 - 2**-14 + if use_regularizer: + expected -= 2 * 2**-14 + self.assertEqual(backend.eval(layer.v), expected) + + # Continue training, and assert variable is correct value + model.fit(dataset) + new_expected = expected - 2**-14 + if use_regularizer: + new_expected -= 2 * 2**-14 + self.assertEqual(backend.eval(layer.v), new_expected) + + # Load saved model again, and assert variable is previous value + model = save.load_model(save_path) + (layer,) = ( + layer + for layer in model.layers + if "MultiplyLayer" in layer.__class__.__name__ + ) + self.assertEqual(backend.eval(layer.v), expected) + + # Ensure various dtype-related aspects of the layer are correct + self.assertEqual(layer.dtype, "float32") + self.assertEqual(layer.dtype_policy.name, "mixed_float16") + self.assertEqual(layer.v.dtype, "float32") + self.assertEqual(layer(np.ones((2, 1))).dtype, "float16") + + self.assertEqual(type(model.dtype_policy), policy.Policy) + if tf.__internal__.tf2.enabled(): + self.assertEqual( + layer.get_config()["dtype"], + { + "module": "keras.mixed_precision", + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + "registered_name": None, + }, + ) + else: + self.assertEqual( + layer.get_config()["dtype"], + { + "class_name": "Policy", + "config": {"name": "mixed_float16"}, + }, + ) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + {"testcase_name": "base", "strategy_fn": default_strategy_fn}, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + ) + def test_fixed_loss_scaling(self, strategy_fn): + # The non-legacy optimizer is only supported in TF2 + use_legacy_optimizer = not tf.__internal__.tf2.enabled() + # Note: We do not test mixed precision in this method, only loss + # scaling. + loss_scale = 8.0 + batch_size = 4 + with strategy_fn().scope(): + x = layers.Input(shape=(1,), batch_size=batch_size) + layer = mp_test_util.MultiplyLayer() + y = layer(x) + + # The gradient of 'y' at this point is 1. With loss scaling, the + # gradient is 'loss_scale'. We divide by the batch size since the + # loss is averaged across batch elements. + expected_gradient = loss_scale / batch_size + identity_with_grad_check_fn = ( + mp_test_util.create_identity_with_grad_check_fn( + [expected_gradient] + ) + ) + y = core.Lambda(identity_with_grad_check_fn)(y) + model = models.Model(inputs=x, outputs=y) + + def loss_fn(y_true, y_pred): + del y_true + return tf.reduce_mean(y_pred) + + if use_legacy_optimizer: + opt = gradient_descent.SGD(1.0) + else: + opt = sgd.SGD(1.0) + opt = loss_scale_optimizer.BaseLossScaleOptimizer( + opt, dynamic=False, initial_scale=loss_scale + ) + model.compile( + opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly() + ) + + self.assertEqual(backend.eval(layer.v), 1) + x = np.ones((batch_size, 1)) + y = np.ones((batch_size, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) + model.fit(dataset) + # Variable starts at 1, and should have gradient of 1 subtracted from + # it. + expected = 0 + self.assertEqual(backend.eval(layer.v), expected) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + {"testcase_name": "base", "strategy_fn": default_strategy_fn}, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + { + "testcase_name": "loss_scaling", + "strategy_fn": create_mirrored_strategy, + "use_loss_scaling": True, + }, + ) + def test_advanced_model(self, strategy_fn, use_loss_scaling=False): + # The advanced model tests mixed-precision-related features that would + # occur in a resnet50 model. It tests a model that has: + # * Multiple layers, some which use auto-cast variables and some which + # do not + # * Regularization on some variables and not others. + # * A fixed loss scale (if use_loss_scaling is True) + + strategy = strategy_fn() if use_loss_scaling: - opt = loss_scale_optimizer.LossScaleOptimizer( - opt, dynamic=False, initial_scale=loss_scale) - model.compile( - opt, - loss=loss_fn, - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((2, 1)) - y = np.ones((2, 1)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) - model.fit(dataset) - for layer in (layer1, layer2, layer3, layer4): - if layer.losses: - # Layer has weight regularizer - self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) - else: - # Layer does not have weight regularizer - self.assertEqual(backend.eval(layer.v), 1 - learning_rate) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }, { - 'testcase_name': 'get_config', - 'strategy_fn': create_mirrored_strategy, - 'get_config': True, - }) - def test_dynamic_loss_scaling(self, - strategy_fn, - get_config=False): - strategy = strategy_fn() - initial_loss_scale = 2. - batch_size = 4 - expected_gradient = backend.variable([initial_loss_scale / batch_size], - dtype=tf.float16) - # If this variable is set to True, the model below will have NaN gradients - have_nan_gradients = backend.variable(False, dtype=tf.bool) - with strategy.scope(): - opt = gradient_descent.SGD(1.) - opt = loss_scale_optimizer.LossScaleOptimizer( - opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2) - with policy.policy_scope('mixed_float16'): - x = layers.Input( - shape=(1,), batch_size=batch_size, dtype=tf.float16) - layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) - y = layer(x) - identity_with_nan_grads = ( - mp_test_util.create_identity_with_nan_gradients_fn( - have_nan_gradients)) - y = core.Lambda(identity_with_nan_grads)(y) - identity_with_grad_check_fn = ( - mp_test_util.create_identity_with_grad_check_fn( - expected_dtype=tf.float16, - expected_gradient=expected_gradient)) - y = core.Lambda(identity_with_grad_check_fn)(y) - model = models.Model(inputs=x, outputs=y) - if get_config: - config = model.get_config() - model = model.__class__.from_config( - config, - custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer}) - (layer,) = (layer for layer in model.layers - if isinstance(layer, mp_test_util.MultiplyLayer)) - - def loss_fn(y_true, y_pred): - del y_true - return tf.reduce_mean(y_pred) - - model.compile( - opt, - loss=loss_fn, - run_eagerly=test_utils.should_run_eagerly()) - - self.assertEqual(backend.eval(layer.v), 1) - x = np.ones((batch_size, 1)) - y = np.ones((batch_size, 1)) - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) - model.fit(dataset) - # The variables starts with 1 and has a gradient of 1, so will go down by 1 - # each step. - self.assertEqual(backend.eval(layer.v), 0) - - model.fit(dataset) - self.assertEqual(backend.eval(layer.v), -1) - - # There have been two steps without NaNs, so the loss scale will double - backend.set_value(expected_gradient, - backend.get_value(expected_gradient * 2)) - model.fit(dataset) - self.assertEqual(backend.eval(layer.v), -2) - - # Next test with NaN gradients. - backend.set_value(have_nan_gradients, True) - model.fit(dataset) - # Variable should not be updated - self.assertEqual(backend.eval(layer.v), -2) - - # Test with finite gradients again - backend.set_value(have_nan_gradients, False) - # The loss scale will be halved due to the NaNs, so the gradient will also - # be halved - backend.set_value(expected_gradient, - backend.get_value(expected_gradient / 2)) - model.fit(dataset) - self.assertEqual(backend.eval(layer.v), -3) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_compile_wraps_with_loss_scale_optimizer(self): - x = layers.Input(shape=(1,)) - y = mp_test_util.MultiplyLayer()(x) - - with policy.policy_scope('mixed_float16'): - # Test optimizer is automatically wrapped with LSO - model = models.Model(x, y) - model.compile(gradient_descent.SGD(1.), 'mse') - self.assertIsInstance(model.optimizer, - loss_scale_optimizer.LossScaleOptimizer) - self.assertEqual(backend.get_value(model.optimizer.learning_rate), 1.) - - # Test optimizer specified as string is automatically wrapped in LSO - model = models.Model(x, y) - model.compile('sgd', 'mse') - self.assertIsInstance(model.optimizer, - loss_scale_optimizer.LossScaleOptimizer) - - # Test if an LSO is passed, optimizer is not automatically wrapped with - # another LSO - model = models.Model(x, y) - optimizer = loss_scale_optimizer.LossScaleOptimizer( - gradient_descent.SGD(1.), dynamic_growth_steps=2) - model.compile(optimizer, 'mse') - self.assertIsInstance(model.optimizer, - loss_scale_optimizer.LossScaleOptimizer) - self.assertEqual(model.optimizer.dynamic_growth_steps, 2) - - with policy.policy_scope('mixed_bfloat16'): - # Test mixed_bfloat16 models are not automatically wrapped with LSO - model = models.Model(x, y) - model.compile(gradient_descent.SGD(1.), 'mse') - self.assertNotIsInstance(model.optimizer, - loss_scale_optimizer.LossScaleOptimizer) - self.assertIsInstance(model.optimizer, gradient_descent.SGD) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_pass_invalid_optimizer_with_loss_scaling(self): - with policy.policy_scope(policy.Policy('mixed_float16')): - x = layers.Input(shape=(1,)) - y = mp_test_util.MultiplyLayer()(x) - model = models.Model(x, y) - if tf.executing_eagerly(): - error_msg = 'Use a `tf.keras` Optimizer instead' - else: - error_msg = 'optimizer" must be an instance of ' - with self.assertRaisesRegex(ValueError, error_msg): - model.compile(optimizer_v1.SGD(1.), 'mse') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_functional_model_loss_dtype(self): - with policy.policy_scope('float16'): - x = layers.Input(shape=(1,)) - y = mp_test_util.MultiplyLayer()(x) - model = models.Model(x, y) - model.add_loss(tf.cast(y, 'float32')) - # The loss should not be casted to the policy's dtype. - self.assertEqual(model.losses[0].dtype, 'float32') - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn, - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }, { - 'testcase_name': 'base_h5', - 'strategy_fn': default_strategy_fn, - 'h5': True, - }, { - 'testcase_name': 'distribute_h5', - 'strategy_fn': create_mirrored_strategy, - 'h5': True, - }) - def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False): - with strategy_fn().scope(): - with policy.policy_scope('mixed_float16'): - x = layers.Input(shape=(1,), batch_size=2) - layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) - y = layer(x) - model = models.Model(inputs=x, outputs=y) - - model.set_weights([np.array(100.)]) - x = np.ones((2, 1)) - self.assertAllClose(backend.get_value(model(x)), x * 100.) - suffix = '.h5' if h5 else '' - weights_file = os.path.join(self.get_temp_dir(), 'weights' + suffix) - model.save_weights(weights_file) - - model.set_weights([np.array(200.)]) - self.assertAllClose(backend.get_value(model(x)), x * 200.) - model.load_weights(weights_file) - self.assertAllClose(backend.get_value(model(x)), x * 100.) - self.assertEqual(model.get_weights(), [np.array(100.)]) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn, - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }, { - 'testcase_name': 'different_var_name', - 'strategy_fn': default_strategy_fn, - 'var_name': 'w' - }, { - 'testcase_name': 'different_var_name_distribute', - 'strategy_fn': create_mirrored_strategy, - 'var_name': 'w' - }) - def test_save_slot_variables_with_autocast_vars(self, - strategy_fn, - var_name='v'): - p = policy.Policy('mixed_float16') - with strategy_fn().scope(), policy.policy_scope(p): - x = layers.Input(shape=(2,), batch_size=2) - # Having a var_name other than 'v' tests that a fixed bug (b/134713714) - # does not reoccur. The bug was that a crash would occur when saving a - # checkpoint where an AutoCastVariable with a slot variable would have a - # different name than the layer attribute's name (layer.v in this case). - layer = mp_test_util.MultiplyLayer(assert_type=tf.float16, - var_name=var_name) - y = layer(x) - model = models.Model(inputs=x, outputs=y) - opt = gradient_descent.SGD(1., 1.) - opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, - initial_scale=1) - model.compile( - optimizer=opt, - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) - weights_file = os.path.join(self.get_temp_dir(), 'weights') - model.save_weights(weights_file) - saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum')) - - model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) - new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum')) - self.assertNotEqual(new_slot, saved_slot) - - model.load_weights(weights_file) - restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum')) - self.assertEqual(restored_slot, saved_slot) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters(*TESTCASES) - def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn): - strategy = strategy_fn() - if (isinstance(strategy, tf.distribute.MirroredStrategy) and - not tf.executing_eagerly()): - # TODO(b/121381184): Enable running the test in this case. - return - - # Create and run model. - with strategy.scope(): - x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) - y = mp_test_util.MultiplyLayer(assert_type=tf.float32)(x) - model = models.Model(inputs=x, outputs=y) - - opt = gradient_descent.SGD(1.) - opt = loss_scale_optimizer.LossScaleOptimizer( - opt, initial_scale=1., dynamic_growth_steps=2.) - model.compile( - optimizer=opt, - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - # Run for 3 steps (6 examples with a batch size of 2) - model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2) - self.assertEqual(backend.get_value(opt.loss_scale), 2) - self.assertEqual(backend.get_value(opt.dynamic_counter), 1) - - # Save model weights. - save_prefix = os.path.join(self.get_temp_dir(), 'ckpt') - model.save_weights(save_prefix) - - # Run model again for 1 step (2 examples with a batch size of 2) - model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2) - self.assertEqual(backend.get_value(opt.loss_scale), 4) - self.assertEqual(backend.get_value(opt.dynamic_counter), 0) - - # Load model weights and ensure loss scale weights are restored. - model.load_weights(save_prefix) - self.assertEqual(backend.get_value(opt.loss_scale), 2) - self.assertEqual(backend.get_value(opt.dynamic_counter), 1) - - @test_combinations.run_all_keras_modes - def test_restore_old_loss_scale_checkpoint(self): - # Ensure a checkpoint from TF 2.2 can be loaded. The checkpoint format - # of LossScaleOptimizer changed, but old checkpoints can still be loaded - opt = gradient_descent.SGD(0.1, momentum=0.1) - opt = loss_scale_optimizer.LossScaleOptimizer(opt) - model = sequential.Sequential([core.Dense(2,)]) - - # The checkpoint and expected values were obtained from the program in - # testdata/BUILD. - ckpt_dir = os.path.join( - flags.FLAGS['test_srcdir'].value, - 'org_keras/keras', - 'mixed_precision/testdata/lso_ckpt_tf2.2') - # ckpt_dir = test.test_src_dir_path( - # 'python/keras/mixed_precision/testdata/lso_ckpt_tf2.2') - model.load_weights(os.path.join(ckpt_dir, 'ckpt')) - model.compile(opt, 'mse', run_eagerly=test_utils.should_run_eagerly()) - model(np.zeros((2, 2))) # Create model weights - opt._create_all_weights(model.weights) - expected_kernel = np.array([[9.229685, 10.901115], [10.370763, 9.757362]]) - expected_slot = np.array([[10.049943, 9.917691], [10.049943, 9.917691]]) - self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel) - self.assertAllClose( - self.evaluate(opt.get_slot(model.weights[0], 'momentum')), - expected_slot) - self.assertEqual(self.evaluate(opt.loss_scale), 32768) - self.assertEqual(self.evaluate(opt.dynamic_counter), 1) - - # Check restoring works even after the model is compiled and the weights - # have been created. - model.fit(np.random.normal(size=(2, 2)), np.random.normal(size=(2, 2))) - self.assertNotAllClose(self.evaluate(model.weights[0]), expected_kernel) - self.assertNotAllClose( - self.evaluate(opt.get_slot(model.weights[0], 'momentum')), - expected_slot) - model.load_weights(os.path.join(ckpt_dir, 'ckpt')) - self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel) - self.assertAllClose( - self.evaluate(opt.get_slot(model.weights[0], 'momentum')), - expected_slot) - self.assertEqual(self.evaluate(opt.loss_scale), 32768) - self.assertEqual(self.evaluate(opt.dynamic_counter), 1) - - def test_restore_old_saved_model(self): - saved_model_dir = os.path.join( - flags.FLAGS['test_srcdir'].value, - 'org_keras/keras', - 'mixed_precision/testdata/lso_savedmodel_tf2.2') - # saved_model_dir = test.test_src_dir_path( - # 'python/keras/mixed_precision/testdata/' - # 'lso_savedmodel_tf2.2') - model = save.load_model(saved_model_dir) - expected_kernel = np.array([[9.229685, 10.901115], [10.370763, 9.757362]]) - self.assertAllClose(backend.eval(model.weights[0]), expected_kernel) - self.assertEqual(type(model.optimizer), - loss_scale_optimizer.LossScaleOptimizer) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters( - { - 'testcase_name': 'base', - 'strategy_fn': default_strategy_fn, - }, { - 'testcase_name': 'distribute', - 'strategy_fn': create_mirrored_strategy, - }, { - 'testcase_name': 'base_h5', - 'strategy_fn': default_strategy_fn, - 'h5': True, - }, { - 'testcase_name': 'distribute_h5', - 'strategy_fn': create_mirrored_strategy, - 'h5': True, - }) - def test_save_model_with_dynamic_loss_scaling( - self, strategy_fn, h5=False): - # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy - # as well. - strategy = strategy_fn() - if (isinstance(strategy, tf.distribute.MirroredStrategy) and - not tf.executing_eagerly()): - # TODO(b/121381184): Enable running the test in this case. - return - - # Create and run model. - with strategy.scope(): - x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) - y = mp_test_util.MultiplyLayer()(x) - model = models.Model(inputs=x, outputs=y) - - opt = gradient_descent.SGD(1.) - opt = loss_scale_optimizer.LossScaleOptimizer(opt, initial_scale=1., - dynamic_growth_steps=2.) - model.compile( - optimizer=opt, - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - # Run for 3 steps (6 examples with a batch size of 2) - model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2) - self.assertEqual(backend.get_value(opt.loss_scale), 2) - self.assertEqual(backend.get_value(opt.dynamic_counter), 1) - (weight,) = model.trainable_weights - orig_weight = backend.get_value(weight) - - # Save model weights. - save_path = os.path.join(self.get_temp_dir(), 'model') - model.save(save_path, save_format='h5' if h5 else 'tf') - - # Run model again for 1 step (2 examples with a batch size of 2) - model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) - new_weight = backend.get_value(weight) - self.assertNotEqual(new_weight, orig_weight) - self.assertEqual(backend.get_value(opt.loss_scale), 4) - self.assertEqual(backend.get_value(opt.dynamic_counter), 0) - - # Load model weights and ensure loss scale weights are restored. - model = save.load_model( - save_path, custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer}) - (weight,) = model.trainable_weights - loaded_weight = backend.get_value(weight) - self.assertEqual(loaded_weight, orig_weight) - # Currently the loss scale isn't always saved when the model is saved with - # Model.save(). So we assert the loss scale either has the value when it was - # saved, or the value it was initialized with. - # TODO(reedwm): Always save/restore the loss scale with Model.save(). - self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2)) - self.assertIn(backend.get_value(model.optimizer.dynamic_counter), (0, 1)) - - # Test optimizer attributes and type - self.assertEqual(model.optimizer.initial_scale, 1.) - self.assertEqual(model.optimizer.dynamic_growth_steps, 2.) - self.assertEqual(type(model.optimizer), - loss_scale_optimizer.LossScaleOptimizer) + loss_scale = 8.0 + learning_rate = 2**-14 + # The non-legacy optimizer is only supported in TF2 + use_legacy_optimizer = not tf.__internal__.tf2.enabled() + + with strategy.scope(): + with policy.policy_scope(policy.Policy("mixed_float16")): + x = layers.Input(shape=(1,), batch_size=2) + layer1 = mp_test_util.MultiplyLayer( + assert_type=tf.float16, + regularizer=mp_test_util.IdentityRegularizer(), + use_operator=True, + ) + layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( + assert_type=tf.float16, use_operator=True + ) + layer3 = mp_test_util.MultiplyLayer( + assert_type=tf.float16, use_operator=False + ) + layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( + assert_type=tf.float16, + regularizer=mp_test_util.IdentityRegularizer(), + use_operator=False, + ) + y = layer1(x) + y = layer2(y) + y = layer3(y) + y = layer4(y) + if use_loss_scaling: + # The gradient of 'y' at this point is 1. With loss scaling, + # the gradient is 'loss_scale'. We divide by the batch size + # of 2 since the loss is averaged across batch elements. + expected_gradient = loss_scale / 2 + identity_with_grad_check_fn = ( + mp_test_util.create_identity_with_grad_check_fn( + expected_dtype=tf.float16, + expected_gradient=[expected_gradient], + ) + ) + y = core.Lambda(identity_with_grad_check_fn)(y) + model = models.Model(inputs=x, outputs=y) + + def loss_fn(y_true, y_pred): + del y_true + return tf.reduce_mean(y_pred) + + if use_legacy_optimizer: + opt = gradient_descent.SGD(learning_rate) + else: + opt = sgd.SGD(learning_rate) + if use_loss_scaling: + opt = loss_scale_optimizer.BaseLossScaleOptimizer( + opt, dynamic=False, initial_scale=loss_scale + ) + model.compile( + opt, + loss=loss_fn, + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((2, 1)) + y = np.ones((2, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) + model.fit(dataset) + for layer in (layer1, layer2, layer3, layer4): + if layer.losses: + # Layer has weight regularizer + self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) + else: + # Layer does not have weight regularizer + self.assertEqual(backend.eval(layer.v), 1 - learning_rate) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters( + {"testcase_name": "base", "strategy_fn": default_strategy_fn}, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + { + "testcase_name": "get_config", + "strategy_fn": create_mirrored_strategy, + "get_config": True, + }, + ) + def test_dynamic_loss_scaling(self, strategy_fn, get_config=False): + strategy = strategy_fn() + initial_loss_scale = 2.0 + batch_size = 4 + expected_gradient = backend.variable( + [initial_loss_scale / batch_size], dtype=tf.float16 + ) + # If this variable is set to True, the model below will have NaN + # gradients + have_nan_gradients = backend.variable(False, dtype=tf.bool) + with strategy.scope(): + opt = sgd.SGD(1.0) + opt = loss_scale_optimizer.BaseLossScaleOptimizer( + opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2 + ) + with policy.policy_scope("mixed_float16"): + x = layers.Input( + shape=(1,), batch_size=batch_size, dtype=tf.float16 + ) + layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) + y = layer(x) + identity_with_nan_grads = ( + mp_test_util.create_identity_with_nan_gradients_fn( + have_nan_gradients + ) + ) + y = core.Lambda(identity_with_nan_grads)(y) + identity_with_grad_check_fn = ( + mp_test_util.create_identity_with_grad_check_fn( + expected_dtype=tf.float16, + expected_gradient=expected_gradient, + ) + ) + y = core.Lambda(identity_with_grad_check_fn)(y) + model = models.Model(inputs=x, outputs=y) + if get_config: + config = model.get_config() + with SafeModeScope(safe_mode=False): + model = model.__class__.from_config( + config, + custom_objects={ + "MultiplyLayer": mp_test_util.MultiplyLayer + }, + ) + (layer,) = ( + layer + for layer in model.layers + if isinstance(layer, mp_test_util.MultiplyLayer) + ) + + def loss_fn(y_true, y_pred): + del y_true + return tf.reduce_mean(y_pred) + + model.compile( + opt, + loss=loss_fn, + run_eagerly=test_utils.should_run_eagerly(), + ) + + self.assertEqual(backend.eval(layer.v), 1) + x = np.ones((batch_size, 1)) + y = np.ones((batch_size, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) + model.fit(dataset) + # The variables starts with 1 and has a gradient of 1, so will go down + # by 1 each step. + self.assertEqual(backend.eval(layer.v), 0) + + model.fit(dataset) + self.assertEqual(backend.eval(layer.v), -1) + + # There have been two steps without NaNs, so the loss scale will double + backend.set_value( + expected_gradient, backend.get_value(expected_gradient * 2) + ) + model.fit(dataset) + self.assertEqual(backend.eval(layer.v), -2) + + # Next test with NaN gradients. + backend.set_value(have_nan_gradients, True) + model.fit(dataset) + # Variable should not be updated + self.assertEqual(backend.eval(layer.v), -2) + + # Test with finite gradients again + backend.set_value(have_nan_gradients, False) + # The loss scale will be halved due to the NaNs, so the gradient will + # also be halved + backend.set_value( + expected_gradient, backend.get_value(expected_gradient / 2) + ) + model.fit(dataset) + self.assertEqual(backend.eval(layer.v), -3) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_compile_wraps_with_loss_scale_optimizer(self): + x = layers.Input(shape=(1,)) + y = mp_test_util.MultiplyLayer()(x) + + # The non-legacy optimizer is only supported in TF2 + use_legacy_optimizer = ( + not tf.__internal__.tf2.enabled() or not tf.executing_eagerly() + ) + + with policy.policy_scope("mixed_float16"): + # Test optimizer is automatically wrapped with LSO + model = models.Model(x, y) + if use_legacy_optimizer: + optimizer = gradient_descent.SGD(1.0) + else: + optimizer = sgd.SGD(1.0) + model.compile(optimizer, "mse") + self.assertIsInstance( + model.optimizer, loss_scale_optimizer.BaseLossScaleOptimizer + ) + self.assertEqual( + backend.get_value(model.optimizer.learning_rate), 1.0 + ) + + # Test optimizer specified as string is automatically wrapped in LSO + model = models.Model(x, y) + model.compile("sgd", "mse") + self.assertIsInstance( + model.optimizer, loss_scale_optimizer.BaseLossScaleOptimizer + ) + + # Test if an LSO is passed, optimizer is not automatically wrapped + # with another LSO + model = models.Model(x, y) + if use_legacy_optimizer: + optimizer = gradient_descent.SGD(1.0) + else: + optimizer = sgd.SGD(1.0) + optimizer = loss_scale_optimizer.BaseLossScaleOptimizer( + optimizer, dynamic_growth_steps=2 + ) + model.compile(optimizer, "mse") + self.assertIsInstance( + model.optimizer, loss_scale_optimizer.BaseLossScaleOptimizer + ) + self.assertEqual(model.optimizer.dynamic_growth_steps, 2) + + with policy.policy_scope("mixed_bfloat16"): + # Test mixed_bfloat16 models are not automatically wrapped with LSO + model = models.Model(x, y) + if use_legacy_optimizer: + optimizer = gradient_descent.SGD(1.0) + else: + optimizer = sgd.SGD(1.0) + model.compile(optimizer, "mse") + self.assertNotIsInstance( + model.optimizer, loss_scale_optimizer.BaseLossScaleOptimizer + ) + self.assertIsInstance( + model.optimizer, + gradient_descent.SGD if use_legacy_optimizer else sgd.SGD, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_pass_invalid_optimizer_with_loss_scaling(self): + with policy.policy_scope(policy.Policy("mixed_float16")): + x = layers.Input(shape=(1,)) + y = mp_test_util.MultiplyLayer()(x) + model = models.Model(x, y) + if tf.executing_eagerly(): + error_msg = "Use a `tf.keras` Optimizer instead" + else: + error_msg = 'optimizer" must be an instance of ' + with self.assertRaisesRegex(ValueError, error_msg): + model.compile(optimizer_v1.SGD(1.0), "mse") + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_functional_model_loss_dtype(self): + with policy.policy_scope("float16"): + x = layers.Input(shape=(1,)) + y = mp_test_util.MultiplyLayer()(x) + model = models.Model(x, y) + model.add_loss(tf.cast(y, "float32")) + # The loss should not be casted to the policy's dtype. + self.assertEqual(model.losses[0].dtype, "float32") + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + { + "testcase_name": "base", + "strategy_fn": default_strategy_fn, + }, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + { + "testcase_name": "base_h5", + "strategy_fn": default_strategy_fn, + "h5": True, + }, + { + "testcase_name": "distribute_h5", + "strategy_fn": create_mirrored_strategy, + "h5": True, + }, + ) + def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False): + with strategy_fn().scope(): + with policy.policy_scope("mixed_float16"): + x = layers.Input(shape=(1,), batch_size=2) + layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) + y = layer(x) + model = models.Model(inputs=x, outputs=y) + + model.set_weights([np.array(100.0)]) + x = np.ones((2, 1)) + self.assertAllClose(backend.get_value(model(x)), x * 100.0) + suffix = ".h5" if h5 else "" + weights_file = os.path.join(self.get_temp_dir(), "weights" + suffix) + model.save_weights(weights_file) + + model.set_weights([np.array(200.0)]) + self.assertAllClose(backend.get_value(model(x)), x * 200.0) + model.load_weights(weights_file) + self.assertAllClose(backend.get_value(model(x)), x * 100.0) + self.assertEqual(model.get_weights(), [np.array(100.0)]) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + { + "testcase_name": "base", + "strategy_fn": default_strategy_fn, + }, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + { + "testcase_name": "distribute_legacy", + "strategy_fn": create_mirrored_strategy, + "use_legacy_optimizer": True, + }, + { + "testcase_name": "different_var_name", + "strategy_fn": default_strategy_fn, + "var_name": "w", + }, + { + "testcase_name": "different_var_name_distribute", + "strategy_fn": create_mirrored_strategy, + "var_name": "w", + }, + ) + def test_save_slot_variables_with_autocast_vars( + self, strategy_fn, var_name="v", use_legacy_optimizer=False + ): + if not tf.__internal__.tf2.enabled(): + # The non-legacy optimizer is only supported in TF2 + use_legacy_optimizer = True + p = policy.Policy("mixed_float16") + with strategy_fn().scope(), policy.policy_scope(p): + x = layers.Input(shape=(2,), batch_size=2) + # Having a var_name other than 'v' tests that a fixed bug + # (b/134713714) does not reoccur. The bug was that a crash would + # occur when saving a checkpoint where an AutoCastVariable with a + # slot variable would have a different name than the layer + # attribute's name (layer.v in this case). + layer = mp_test_util.MultiplyLayer( + assert_type=tf.float16, var_name=var_name + ) + y = layer(x) + model = models.Model(inputs=x, outputs=y) + if use_legacy_optimizer: + opt = gradient_descent.SGD(1.0, 1.0) + else: + opt = sgd.SGD(1.0, 1.0) + opt = loss_scale_optimizer.BaseLossScaleOptimizer( + opt, dynamic=False, initial_scale=1 + ) + model.compile( + optimizer=opt, + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + + def get_momentum_slot(): + if use_legacy_optimizer: + return opt.get_slot(layer.v, "momentum") + else: + return opt.inner_optimizer.momentums[0] + + model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) + weights_file = os.path.join(self.get_temp_dir(), "weights") + model.save_weights(weights_file) + saved_slot = backend.get_value(get_momentum_slot()) + + model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) + new_slot = backend.get_value(get_momentum_slot()) + self.assertNotEqual(new_slot, saved_slot) + + model.load_weights(weights_file) + restored_slot = backend.get_value(get_momentum_slot()) + self.assertEqual(restored_slot, saved_slot) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters(*TESTCASES) + def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn): + strategy = strategy_fn() + if ( + isinstance(strategy, tf.distribute.MirroredStrategy) + and not tf.executing_eagerly() + ): + # TODO(b/121381184): Enable running the test in this case. + return + + # The non-legacy optimizer is only supported in TF2 + use_legacy_optimizer = not tf.__internal__.tf2.enabled() + + # Create and run model. + with strategy.scope(): + x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) + y = mp_test_util.MultiplyLayer(assert_type=tf.float32)(x) + model = models.Model(inputs=x, outputs=y) + + if use_legacy_optimizer: + opt = gradient_descent.SGD(1.0) + else: + opt = sgd.SGD(1.0) + opt = loss_scale_optimizer.BaseLossScaleOptimizer( + opt, initial_scale=1.0, dynamic_growth_steps=2.0 + ) + model.compile( + optimizer=opt, + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + # Run for 3 steps (6 examples with a batch size of 2) + model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2) + self.assertEqual(backend.get_value(opt.loss_scale), 2) + self.assertEqual(backend.get_value(opt.dynamic_counter), 1) + + # Save model weights. + save_prefix = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_prefix) + + # Run model again for 1 step (2 examples with a batch size of 2) + model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2) + self.assertEqual(backend.get_value(opt.loss_scale), 4) + self.assertEqual(backend.get_value(opt.dynamic_counter), 0) + + # Load model weights and ensure loss scale weights are restored. + model.load_weights(save_prefix) + self.assertEqual(backend.get_value(opt.loss_scale), 2) + self.assertEqual(backend.get_value(opt.dynamic_counter), 1) + + @test_combinations.run_all_keras_modes + def test_restore_old_loss_scale_checkpoint(self): + # Ensure a checkpoint from TF 2.2 can be loaded. The checkpoint format + # of LossScaleOptimizer changed, but old checkpoints can still be loaded + # into the legacy optimizers. + opt = gradient_descent.SGD(0.1, momentum=0.1) + opt = loss_scale_optimizer.LossScaleOptimizer(opt) + model = sequential.Sequential( + [ + core.Dense( + 2, + ) + ] + ) + + # The checkpoint and expected values were obtained from the program in + # testdata/BUILD. + ckpt_dir = os.path.join( + flags.FLAGS["test_srcdir"].value, + "org_keras/keras", + "mixed_precision/testdata/lso_ckpt_tf2.2", + ) + # ckpt_dir = test.test_src_dir_path( + # 'python/keras/mixed_precision/testdata/lso_ckpt_tf2.2') + model.load_weights(os.path.join(ckpt_dir, "ckpt")) + model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly()) + model(np.zeros((2, 2))) # Create model weights + opt._create_all_weights(model.weights) + expected_kernel = np.array( + [[9.229685, 10.901115], [10.370763, 9.757362]] + ) + expected_slot = np.array([[10.049943, 9.917691], [10.049943, 9.917691]]) + self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel) + self.assertAllClose( + self.evaluate(opt.get_slot(model.weights[0], "momentum")), + expected_slot, + ) + self.assertEqual(self.evaluate(opt.loss_scale), 32768) + self.assertEqual(self.evaluate(opt.dynamic_counter), 1) + + # Check restoring works even after the model is compiled and the weights + # have been created. + model.fit(np.random.normal(size=(2, 2)), np.random.normal(size=(2, 2))) + self.assertNotAllClose(self.evaluate(model.weights[0]), expected_kernel) + self.assertNotAllClose( + self.evaluate(opt.get_slot(model.weights[0], "momentum")), + expected_slot, + ) + model.load_weights(os.path.join(ckpt_dir, "ckpt")) + self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel) + self.assertAllClose( + self.evaluate(opt.get_slot(model.weights[0], "momentum")), + expected_slot, + ) + self.assertEqual(self.evaluate(opt.loss_scale), 32768) + self.assertEqual(self.evaluate(opt.dynamic_counter), 1) + + def test_restore_old_saved_model(self): + saved_model_dir = os.path.join( + flags.FLAGS["test_srcdir"].value, + "org_keras/keras", + "mixed_precision/testdata/lso_savedmodel_tf2.2", + ) + # saved_model_dir = test.test_src_dir_path( + # 'python/keras/mixed_precision/testdata/' + # 'lso_savedmodel_tf2.2') + model = save.load_model(saved_model_dir) + expected_kernel = np.array( + [[9.229685, 10.901115], [10.370763, 9.757362]] + ) + self.assertAllClose(backend.eval(model.weights[0]), expected_kernel) + self.assertEqual( + type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer + ) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + { + "testcase_name": "base", + "strategy_fn": default_strategy_fn, + }, + { + "testcase_name": "distribute", + "strategy_fn": create_mirrored_strategy, + }, + { + "testcase_name": "base_h5", + "strategy_fn": default_strategy_fn, + "h5": True, + }, + { + "testcase_name": "distribute_h5", + "strategy_fn": create_mirrored_strategy, + "h5": True, + }, + ) + def test_save_model_with_dynamic_loss_scaling(self, strategy_fn, h5=False): + # TODO(reedwm): Support and test saving model with a mixed_[b]float16 + # policy as well. + strategy = strategy_fn() + if ( + isinstance(strategy, tf.distribute.MirroredStrategy) + and not tf.executing_eagerly() + ): + # TODO(b/121381184): Enable running the test in this case. + return + + # Create and run model. + with strategy.scope(): + x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) + y = mp_test_util.MultiplyLayer()(x) + model = models.Model(inputs=x, outputs=y) + + # Only test the legacy optimizer. The new optimizer does not + # support saving optimizer weights. + opt = gradient_descent.SGD(1.0) + opt = loss_scale_optimizer.LossScaleOptimizer( + opt, initial_scale=1.0, dynamic_growth_steps=2.0 + ) + model.compile( + optimizer=opt, + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + # Run for 3 steps (6 examples with a batch size of 2) + model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2) + self.assertEqual(backend.get_value(opt.loss_scale), 2) + self.assertEqual(backend.get_value(opt.dynamic_counter), 1) + (weight,) = model.trainable_weights + orig_weight = backend.get_value(weight) + + # Save model weights. + save_path = os.path.join(self.get_temp_dir(), "model") + model.save(save_path, save_format="h5" if h5 else "tf") + + # Run model again for 1 step (2 examples with a batch size of 2) + model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) + new_weight = backend.get_value(weight) + self.assertNotEqual(new_weight, orig_weight) + self.assertEqual(backend.get_value(opt.loss_scale), 4) + self.assertEqual(backend.get_value(opt.dynamic_counter), 0) + + # Load model weights and ensure loss scale weights are restored. + model = save.load_model( + save_path, + custom_objects={"MultiplyLayer": mp_test_util.MultiplyLayer}, + ) + (weight,) = model.trainable_weights + loaded_weight = backend.get_value(weight) + self.assertEqual(loaded_weight, orig_weight) + # Currently the loss scale isn't always saved when the model is saved + # with Model.save(). So we assert the loss scale either has the value + # when it was saved, or the value it was initialized with. + # TODO(reedwm): Always save/restore the loss scale with Model.save(). + self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2)) + self.assertIn( + backend.get_value(model.optimizer.dynamic_counter), (0, 1) + ) + + # Test optimizer attributes and type + self.assertEqual(model.optimizer.initial_scale, 1.0) + self.assertEqual(model.optimizer.dynamic_growth_steps, 2.0) + self.assertEqual( + type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer + ) class ApplicationModelTest(test_combinations.TestCase): - """Tests that application models can be built with mixed precision. - - This does not test that such models can be trained in mixed precision, as - doing so takes too much time for a unit test. - """ - - @parameterized.named_parameters( - ('densenet', densenet.DenseNet121), - ('efficientnet', efficientnet.EfficientNetB0), - ('inception_resnet_v2', inception_resnet_v2.InceptionResNetV2), - ('inception_v3', inception_v3.InceptionV3), - ('mobilenet', mobilenet.MobileNet), - ('nasnet', nasnet.NASNetMobile), - ('vgg16', vgg16.VGG16), - ('xception', xception.Xception), - ('resnet50', resnet.ResNet50), - ) - def test_application_model(self, app): - # Run on CPU since model weights may exhaust GPU memory - with policy.policy_scope('mixed_float16'), tf.device('/CPU:0'): - app(weights=None) - - -if __name__ == '__main__': - base_layer_utils.enable_v2_dtype_behavior() - tf.test.main() + """Tests that application models can be built with mixed precision. + + This does not test that such models can be trained in mixed precision, as + doing so takes too much time for a unit test. + """ + + @parameterized.named_parameters( + ("densenet", densenet.DenseNet121), + ("efficientnet", efficientnet.EfficientNetB0), + ("inception_resnet_v2", inception_resnet_v2.InceptionResNetV2), + ("inception_v3", inception_v3.InceptionV3), + ("mobilenet", mobilenet.MobileNet), + ("nasnet", nasnet.NASNetMobile), + ("vgg16", vgg16.VGG16), + ("xception", xception.Xception), + ("resnet50", resnet.ResNet50), + ) + def test_application_model(self, app): + # Run on CPU since model weights may exhaust GPU memory + with policy.policy_scope("mixed_float16"), tf.device("/CPU:0"): + app(weights=None) + + +if __name__ == "__main__": + base_layer_utils.enable_v2_dtype_behavior() + tf.test.main() diff --git a/keras/mixed_precision/policy.py b/keras/mixed_precision/policy.py index 967ffe96c529..faaf9377eea9 100644 --- a/keras/mixed_precision/policy.py +++ b/keras/mixed_precision/policy.py @@ -14,479 +14,542 @@ # ============================================================================== """Contains the Policy class for mixed precision training.""" +import contextlib + import tensorflow.compat.v2 as tf -import contextlib from keras import backend from keras.engine import base_layer_utils from keras.mixed_precision import device_compatibility_check -from keras.utils import generic_utils +from keras.mixed_precision import loss_scale_optimizer +from keras.saving import serialization_lib + +# isort: off from tensorflow.python.util.tf_export import keras_export -# pylint: disable=g-classes-have-attributes -@keras_export('keras.mixed_precision.Policy', v1=[]) +@keras_export("keras.mixed_precision.Policy", v1=[]) class Policy: - """A dtype policy for a Keras layer. - - A dtype policy determines a layer's computation and variable dtypes. Each - layer has a policy. Policies can be passed to the `dtype` argument of layer - constructors, or a global policy can be set with - `tf.keras.mixed_precision.set_global_policy`. - - Args: - name: The policy name, which determines the compute and variable dtypes. Can - be any dtype name, such as `'float32'` or `'float64'`, which causes both - the compute and variable dtypes will be that dtype. Can also be the string - `'mixed_float16'` or `'mixed_bfloat16'`, which causes the compute dtype to - be float16 or bfloat16 and the variable dtype to be float32. - - Typically you only need to interact with dtype policies when using mixed - precision, which is the use of float16 or bfloat16 for computations and - float32 for variables. This is why the term `mixed_precision` appears in the - API name. Mixed precision can be enabled by passing `'mixed_float16'` or - `'mixed_bfloat16'` to `tf.keras.mixed_precision.set_global_policy`. See [the - mixed precision guide](https://www.tensorflow.org/guide/keras/mixed_precision) - for more information on how to use mixed precision. - - >>> tf.keras.mixed_precision.set_global_policy('mixed_float16') - >>> layer1 = tf.keras.layers.Dense(10) - >>> layer1.dtype_policy # `layer1` will automatically use mixed precision - - >>> # Can optionally override layer to use float32 instead of mixed precision. - >>> layer2 = tf.keras.layers.Dense(10, dtype='float32') - >>> layer2.dtype_policy - - >>> # Set policy back to initial float32 for future examples. - >>> tf.keras.mixed_precision.set_global_policy('float32') - - In the example above, passing `dtype='float32'` to the layer is equivalent to - passing `dtype=tf.keras.mixed_precision.Policy('float32')`. In general, - passing a dtype policy name to a layer is equivalent to passing the - corresponding policy, so it is never necessary to explicitly construct a - `Policy` object. - - Note: `Model.compile` will automatically wrap an optimizer with a - `tf.keras.mixed_precision.LossScaleOptimizer` if you use the `'mixed_float16'` - policy. If you use a custom training loop instead of calling `Model.compile`, - you should explicitly use a `tf.keras.mixed_precision.LossScaleOptimizer` to - avoid numeric underflow with float16. - - ### How a layer uses its policy's compute dtype - - A layer casts its inputs to its compute dtype. This causes the layer's - computations and output to also be in the compute dtype. For example: - - >>> x = tf.ones((4, 4, 4, 4), dtype='float64') - >>> # `layer`'s policy defaults to float32. - >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2) - >>> layer.compute_dtype # Equivalent to layer.dtype_policy.compute_dtype - 'float32' - >>> # `layer` casts its inputs to its compute dtype and does computations in - >>> # that dtype. - >>> y = layer(x) - >>> y.dtype - tf.float32 - - Note that the base `tf.keras.layers.Layer` class inserts the casts. If - subclassing your own layer, you do not have to insert any casts. - - Currently, only tensors in the first argument to the layer's `call` method are - casted (although this will likely be changed in a future minor release). For - example: - - >>> class MyLayer(tf.keras.layers.Layer): - ... # Bug! `b` will not be casted. - ... def call(self, a, b): - ... return a + 1., b + 1. - >>> a = tf.constant(1., dtype="float32") - >>> b = tf.constant(1., dtype="float32") - >>> layer = MyLayer(dtype="float64") - >>> x, y = layer(a, b) - >>> x.dtype - tf.float64 - >>> y.dtype - tf.float32 - - If writing your own layer with multiple inputs, you should either explicitly - cast other tensors to `self.compute_dtype` in `call` or accept all tensors in - the first argument as a list. - - The casting only occurs in TensorFlow 2. If - `tf.compat.v1.disable_v2_behavior()` has been called, you can enable the - casting behavior with `tf.compat.v1.keras.layers.enable_v2_dtype_behavior()`. - - ### How a layer uses its policy's variable dtype - - The default dtype of variables created by `tf.keras.layers.Layer.add_weight` - is the layer's policy's variable dtype. - - If a layer's compute and variable dtypes differ, `add_weight` will wrap - floating-point variables with a special wrapper called an `AutoCastVariable`. - `AutoCastVariable` is identical to the original variable except it casts - itself to the layer's compute dtype when used within `Layer.call`. This means - if you are writing a layer, you do not have to explicitly cast the variables - to the layer's compute dtype. For example: - - >>> class SimpleDense(tf.keras.layers.Layer): - ... - ... def build(self, input_shape): - ... # With mixed precision, self.kernel is a float32 AutoCastVariable - ... self.kernel = self.add_weight('kernel', (input_shape[-1], 10)) - ... - ... def call(self, inputs): - ... # With mixed precision, self.kernel will be casted to float16 - ... return tf.linalg.matmul(inputs, self.kernel) - ... - >>> layer = SimpleDense(dtype='mixed_float16') - >>> y = layer(tf.ones((10, 10))) - >>> y.dtype - tf.float16 - >>> layer.kernel.dtype - tf.float32 - - A layer author can prevent a variable from being wrapped with an - `AutoCastVariable` by passing `experimental_autocast=False` to `add_weight`, - which is useful if the float32 value of the variable must be accessed within - the layer. - - ### How to write a layer that supports mixed precision and float64. - - For the most part, layers will automatically support mixed precision and - float64 without any additional work, due to the fact the base layer - automatically casts inputs, creates variables of the correct type, and in the - case of mixed precision, wraps variables with `AutoCastVariables`. - - The primary case where you need extra work to support mixed precision or - float64 is when you create a new tensor, such as with `tf.ones` or - `tf.random.normal`, In such cases, you must create the tensor of the correct - dtype. For example, if you call `tf.random.normal`, you must pass the compute - dtype, which is the dtype the inputs have been casted to: - - >>> class AddRandom(tf.keras.layers.Layer): - ... - ... def call(self, inputs): - ... # We must pass `dtype=inputs.dtype`, otherwise a TypeError may - ... # occur when adding `inputs` to `rand`. - ... rand = tf.random.normal(shape=inputs.shape, dtype=inputs.dtype) - ... return inputs + rand - >>> layer = AddRandom(dtype='mixed_float16') - >>> y = layer(x) - >>> y.dtype - tf.float16 - - If you did not pass `dtype=inputs.dtype` to `tf.random.normal`, a - `TypeError` would have occurred. This is because the `tf.random.normal`'s - dtype defaults to `"float32"`, but the input dtype is float16. You cannot add - a float32 tensor with a float16 tensor. - """ - - def __init__(self, name): - if isinstance(name, tf.DType): - raise TypeError("'name' must be a string, not a DType. " - "Instead, pass DType.name. Got: %s" % (name.name,)) - elif not isinstance(name, str): - raise TypeError("'name' must be a string, but got: %s" % (name,)) - self._name = name - self._compute_dtype, self._variable_dtype = self._parse_name(name) - if name in ('mixed_float16', 'mixed_bloat16'): - device_compatibility_check.log_device_compatibility_check(name) - - def _parse_name(self, name): - """Parses a Policy name into a compute and variable dtype. + """A dtype policy for a Keras layer. - Args: - name: The name of the policy: + A dtype policy determines a layer's computation and variable dtypes. Each + layer has a policy. Policies can be passed to the `dtype` argument of layer + constructors, or a global policy can be set with + `tf.keras.mixed_precision.set_global_policy`. - Returns: - The (compute_dtype, variable_dtype) pair. + Args: + name: The policy name, which determines the compute and variable dtypes. + Can be any dtype name, such as `'float32'` or `'float64'`, which causes + both the compute and variable dtypes will be that dtype. Can also be the + string `'mixed_float16'` or `'mixed_bfloat16'`, which causes the compute + dtype to be float16 or bfloat16 and the variable dtype to be float32. + + Typically you only need to interact with dtype policies when using mixed + precision, which is the use of float16 or bfloat16 for computations and + float32 for variables. This is why the term `mixed_precision` appears in the + API name. Mixed precision can be enabled by passing `'mixed_float16'` or + `'mixed_bfloat16'` to `tf.keras.mixed_precision.set_global_policy`. See [the + mixed precision + guide](https://www.tensorflow.org/guide/keras/mixed_precision) for more + information on how to use mixed precision. + + >>> tf.keras.mixed_precision.set_global_policy('mixed_float16') + >>> layer1 = tf.keras.layers.Dense(10) + >>> layer1.dtype_policy # `layer1` will automatically use mixed precision + + >>> # Can optionally override layer to use float32 + >>> # instead of mixed precision. + >>> layer2 = tf.keras.layers.Dense(10, dtype='float32') + >>> layer2.dtype_policy + + >>> # Set policy back to initial float32 for future examples. + >>> tf.keras.mixed_precision.set_global_policy('float32') + + In the example above, passing `dtype='float32'` to the layer is equivalent + to passing `dtype=tf.keras.mixed_precision.Policy('float32')`. In general, + passing a dtype policy name to a layer is equivalent to passing the + corresponding policy, so it is never necessary to explicitly construct a + `Policy` object. + + Note: `Model.compile` will automatically wrap an optimizer with a + `tf.keras.mixed_precision.LossScaleOptimizer` if you use the + `'mixed_float16'` policy. If you use a custom training loop instead of + calling `Model.compile`, you should explicitly use a + `tf.keras.mixed_precision.LossScaleOptimizer` to avoid numeric underflow + with float16. + + ### How a layer uses its policy's compute dtype + + A layer casts its inputs to its compute dtype. This causes the layer's + computations and output to also be in the compute dtype. For example: + + >>> x = tf.ones((4, 4, 4, 4), dtype='float64') + >>> # `layer`'s policy defaults to float32. + >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2) + >>> layer.compute_dtype # Equivalent to layer.dtype_policy.compute_dtype + 'float32' + >>> # `layer` casts its inputs to its compute dtype and does computations in + >>> # that dtype. + >>> y = layer(x) + >>> y.dtype + tf.float32 + + Note that the base `tf.keras.layers.Layer` class inserts the casts. If + subclassing your own layer, you do not have to insert any casts. + + Currently, only tensors in the first argument to the layer's `call` method + are casted (although this will likely be changed in a future minor release). + For example: + + >>> class MyLayer(tf.keras.layers.Layer): + ... # Bug! `b` will not be casted. + ... def call(self, a, b): + ... return a + 1., b + 1. + >>> a = tf.constant(1., dtype="float32") + >>> b = tf.constant(1., dtype="float32") + >>> layer = MyLayer(dtype="float64") + >>> x, y = layer(a, b) + >>> x.dtype + tf.float64 + >>> y.dtype + tf.float32 + + If writing your own layer with multiple inputs, you should either explicitly + cast other tensors to `self.compute_dtype` in `call` or accept all tensors + in the first argument as a list. + + The casting only occurs in TensorFlow 2. If + `tf.compat.v1.disable_v2_behavior()` has been called, you can enable the + casting behavior with + `tf.compat.v1.keras.layers.enable_v2_dtype_behavior()`. + + ### How a layer uses its policy's variable dtype + + The default dtype of variables created by `tf.keras.layers.Layer.add_weight` + is the layer's policy's variable dtype. + + If a layer's compute and variable dtypes differ, `add_weight` will wrap + floating-point variables with a special wrapper called an + `AutoCastVariable`. `AutoCastVariable` is identical to the original + variable except it casts itself to the layer's compute dtype when used + within `Layer.call`. This means if you are writing a layer, you do not have + to explicitly cast the variables to the layer's compute dtype. For example: + + >>> class SimpleDense(tf.keras.layers.Layer): + ... + ... def build(self, input_shape): + ... # With mixed precision, self.kernel is a float32 AutoCastVariable + ... self.kernel = self.add_weight('kernel', (input_shape[-1], 10)) + ... + ... def call(self, inputs): + ... # With mixed precision, self.kernel will be casted to float16 + ... return tf.linalg.matmul(inputs, self.kernel) + ... + >>> layer = SimpleDense(dtype='mixed_float16') + >>> y = layer(tf.ones((10, 10))) + >>> y.dtype + tf.float16 + >>> layer.kernel.dtype + tf.float32 + + A layer author can prevent a variable from being wrapped with an + `AutoCastVariable` by passing `experimental_autocast=False` to `add_weight`, + which is useful if the float32 value of the variable must be accessed within + the layer. + + ### How to write a layer that supports mixed precision and float64. + + For the most part, layers will automatically support mixed precision and + float64 without any additional work, due to the fact the base layer + automatically casts inputs, creates variables of the correct type, and in + the case of mixed precision, wraps variables with `AutoCastVariables`. + + The primary case where you need extra work to support mixed precision or + float64 is when you create a new tensor, such as with `tf.ones` or + `tf.random.normal`, In such cases, you must create the tensor of the correct + dtype. For example, if you call `tf.random.normal`, you must pass the + compute dtype, which is the dtype the inputs have been casted to: + + >>> class AddRandom(tf.keras.layers.Layer): + ... + ... def call(self, inputs): + ... # We must pass `dtype=inputs.dtype`, otherwise a TypeError may + ... # occur when adding `inputs` to `rand`. + ... rand = tf.random.normal(shape=inputs.shape, dtype=inputs.dtype) + ... return inputs + rand + >>> layer = AddRandom(dtype='mixed_float16') + >>> y = layer(x) + >>> y.dtype + tf.float16 + + If you did not pass `dtype=inputs.dtype` to `tf.random.normal`, a + `TypeError` would have occurred. This is because the `tf.random.normal`'s + dtype defaults to `"float32"`, but the input dtype is float16. You cannot + add a float32 tensor with a float16 tensor. """ - if name.endswith('_float32_vars'): - error_msg = ('Policies ending in \'_float32_vars\' have been removed ' - 'from TensorFlow.') - if name in ('infer_float32_vars', 'infer_with_float32_vars'): - error_msg += (' Please use the \'mixed_float16\' or \'mixed_bfloat16\' ' - 'policy instead.') - elif name == 'float16_with_float32_vars': - error_msg += (' Please use the \'mixed_float16\' policy instead.') - elif name == 'bfloat16_with_float32_vars': - error_msg += (' Please use the \'mixed_bfloat16\' policy instead.') - error_msg += ' Got policy name: \'%s\'' % name - raise ValueError(error_msg) - - if name == 'mixed_float16': - return 'float16', 'float32' - elif name == 'mixed_bfloat16': - return 'bfloat16', 'float32' - elif name == '_infer': - # The "_infer" policy exists only for compatibility with TF 1, where - # "_infer" is the default. The behavior matches the behavior of TF 1's - # behavior before policies were introduced. With "_infer", the computation - # and variable dtype are inferred from the first input the first time the - # layer is called. Once the layer is called for the first time, the - # layer's policy will change to the dtype of the first input, and it will - # no longer have the "_infer" policy. - # - # The infer policy should be considered an implementation detail and may - # be removed in the future. - return None, None - try: - dtype = tf.as_dtype(name).name - except TypeError: - error = ("Cannot convert value %s to a mixed precision Policy. " - "Valid policies include 'mixed_float16', 'mixed_bfloat16', " - "and the name of any dtype such as 'float32'." % (name,)) - raise ValueError(error) - return dtype, dtype + def __init__(self, name): + if isinstance(name, tf.DType): + raise TypeError( + "'name' must be a string, not a DType. " + f"Instead, pass DType.name. Received: name={name.name}" + ) + elif not isinstance(name, str): + raise TypeError(f"'name' must be a string, but got: {name}") + self._name = name + self._compute_dtype, self._variable_dtype = self._parse_name(name) + if name in ("mixed_float16", "mixed_bfloat16"): + device_compatibility_check.log_device_compatibility_check(name) + + def _parse_name(self, name): + """Parses a Policy name into a compute and variable dtype. + + Args: + name: The name of the policy: + + Returns: + The (compute_dtype, variable_dtype) pair. + """ + if name.endswith("_float32_vars"): + error_msg = ( + "Policies ending in '_float32_vars' have been removed " + "from TensorFlow." + ) + if name in ("infer_float32_vars", "infer_with_float32_vars"): + error_msg += ( + " Please use the 'mixed_float16' or 'mixed_bfloat16' " + "policy instead." + ) + elif name == "float16_with_float32_vars": + error_msg += " Please use the 'mixed_float16' policy instead." + elif name == "bfloat16_with_float32_vars": + error_msg += " Please use the 'mixed_bfloat16' policy instead." + error_msg += f" Got policy name: '{name}'" + raise ValueError(error_msg) + + if name == "mixed_float16": + return "float16", "float32" + elif name == "mixed_bfloat16": + return "bfloat16", "float32" + elif name == "_infer": + # The "_infer" policy exists only for compatibility with TF 1, where + # "_infer" is the default. The behavior matches the behavior of TF + # 1's behavior before policies were introduced. With "_infer", the + # computation and variable dtype are inferred from the first input + # the first time the layer is called. Once the layer is called for + # the first time, the layer's policy will change to the dtype of the + # first input, and it will no longer have the "_infer" policy. + # + # The infer policy should be considered an implementation detail and + # may be removed in the future. + return None, None + + try: + dtype = tf.as_dtype(name).name + except TypeError: + raise ValueError( + f"Cannot convert value {name} to a mixed precision Policy. " + "Valid policies include 'mixed_float16', 'mixed_bfloat16', " + "and the name of any dtype such as 'float32'." + ) + return dtype, dtype + + @property + def variable_dtype(self): + """The variable dtype of this policy. + + This is the dtype layers will create their variables in, unless a layer + explicitly chooses a different dtype. If this is different than + `Policy.compute_dtype`, Layers will cast variables to the compute dtype + to avoid type errors. + + Variable regularizers are run in the variable dtype, not the compute + dtype. + + Returns: + The variable dtype of this policy, as a string. + """ + return self._variable_dtype + + @property + def compute_dtype(self): + """The compute dtype of this policy. + + This is the dtype layers will do their computations in. Typically layers + output tensors with the compute dtype as well. + + Note that even if the compute dtype is float16 or bfloat16, hardware + devices may not do individual adds, multiplies, and other fundamental + operations in float16 or bfloat16, but instead may do some of them in + float32 for numeric stability. The compute dtype is the dtype of the + inputs and outputs of the TensorFlow ops that the layer executes. + Internally, many TensorFlow ops will do certain internal calculations in + float32 or some other device-internal intermediate format with higher + precision than float16/bfloat16, to increase numeric stability. + + For example, a `tf.keras.layers.Dense` layer, when run on a GPU with a + float16 compute dtype, will pass float16 inputs to `tf.linalg.matmul`. + But, `tf.linalg.matmul` will do use float32 intermediate math. The + performance benefit of float16 is still apparent, due to increased + memory bandwidth and the fact modern GPUs have specialized hardware for + computing matmuls on float16 inputs while still keeping intermediate + computations in float32. + + Returns: + The compute dtype of this policy, as a string. + """ + return self._compute_dtype + + @property + def name(self): + """Returns the name of this policy.""" + return self._name + + def __repr__(self): + return f'' + + def get_config(self): + return {"name": self.name} + + @classmethod + def from_config(cls, config, custom_objects=None): + del custom_objects + if "loss_scale" in config: + config = config.copy() + # Policy.get_config in TensorFlow 2.3 and below had a loss_scale. We + # silently drop it. + del config["loss_scale"] + return cls(**config) + + +# The current global policy in effect. If None, it means the current value of +# floatx should be used as the policy if the V2 dtype behavior is enabled, +# or "_infer" otherwise. +# TODO(reedwm): Make this thread local? +_global_policy = None - @property - def variable_dtype(self): - """The variable dtype of this policy. - This is the dtype layers will create their variables in, unless a layer - explicitly chooses a different dtype. If this is different than - `Policy.compute_dtype`, Layers will cast variables to the compute dtype to - avoid type errors. +@keras_export("keras.mixed_precision.global_policy", v1=[]) +def global_policy(): + """Returns the global dtype policy. - Variable regularizers are run in the variable dtype, not the compute dtype. + The global policy is the default `tf.keras.mixed_precision.Policy` used for + layers, if no policy is passed to the layer constructor. If no policy has + been set with `keras.mixed_precision.set_global_policy`, this will return a + policy constructed from `tf.keras.backend.floatx()` (floatx defaults to + float32). - Returns: - The variable dtype of this policy, as a string. - """ - return self._variable_dtype - - @property - def compute_dtype(self): - """The compute dtype of this policy. - - This is the dtype layers will do their computations in. Typically layers - output tensors with the compute dtype as well. - - Note that even if the compute dtype is float16 or bfloat16, hardware devices - may not do individual adds, multiplies, and other fundamental operations in - float16 or bfloat16, but instead may do some of them in float32 for numeric - stability. The compute dtype is the dtype of the inputs and outputs of the - TensorFlow ops that the layer executes. Internally, many TensorFlow ops will - do certain internal calculations in float32 or some other device-internal - intermediate format with higher precision than float16/bfloat16, to increase - numeric stability. - - For example, a `tf.keras.layers.Dense` layer, when run on a GPU with a - float16 compute dtype, will pass float16 inputs to `tf.linalg.matmul`. But, - `tf.linalg.matmul` will do use float32 intermediate math. The performance - benefit of float16 is still apparent, due to increased memory bandwidth and - the fact modern GPUs have specialized hardware for computing matmuls on - float16 inputs while still keeping intermediate computations in float32. + >>> tf.keras.mixed_precision.global_policy() + + >>> tf.keras.layers.Dense(10).dtype_policy # Defaults to the global policy + + + If TensorFlow 2 behavior has been disabled with + `tf.compat.v1.disable_v2_behavior()`, this will instead return a special + "_infer" policy which infers the dtype from the dtype of the first input the + first time the layer is called. This behavior matches the behavior that + existed in TensorFlow 1. + + See `tf.keras.mixed_precision.Policy` for more information on policies. Returns: - The compute dtype of this policy, as a string. + The global Policy. """ - return self._compute_dtype - - @property - def name(self): - """Returns the name of this policy.""" - return self._name + if _global_policy is None: + if base_layer_utils.v2_dtype_behavior_enabled(): + return Policy(backend.floatx()) + else: + return Policy("_infer") + return _global_policy - def __repr__(self): - return '' % self._name - def get_config(self): - return {'name': self.name} +def _check_if_mixed_precision_graph_rewrite_is_enabled(policy): + if tf.__internal__.train.is_mixed_precision_graph_rewrite_enabled(): + raise ValueError( + 'The global dtype policy cannot be set to "{policy.name}", because ' + "the mixed precision graph rewrite has already been enabled.\n" + "At most, one of the following can be called:\n\n" + " 1. tf.compat.v1.train.enable_mixed_precision_graph_rewrite() " + "(You called this first)\n" + " 2. tf.keras.mixed_precision.set_global_policy() with a mixed " + "precision policy (You called this second)\n\n" + "You called both functions, which is an error, because both " + "functions enable you to use mixed precision. If in doubt which " + "function to use, use the second, as it supports Eager execution " + "and is more customizable.".format(policy=policy) + ) + + +@keras_export("keras.mixed_precision.set_global_policy", v1=[]) +def set_global_policy(policy): + """Sets the global dtype policy. - @classmethod - def from_config(cls, config, custom_objects=None): - del custom_objects - if 'loss_scale' in config: - config = config.copy() - # Policy.get_config in TensorFlow 2.3 and below had a loss_scale. We - # silently drop it. - del config['loss_scale'] - return cls(**config) + The global policy is the default `tf.keras.mixed_precision.Policy` used for + layers, if no policy is passed to the layer constructor. + >>> tf.keras.mixed_precision.set_global_policy('mixed_float16') + >>> tf.keras.mixed_precision.global_policy() + + >>> tf.keras.layers.Dense(10).dtype_policy + + >>> # Global policy is not used if a policy + >>> # is directly passed to constructor + >>> tf.keras.layers.Dense(10, dtype='float64').dtype_policy + + >>> tf.keras.mixed_precision.set_global_policy('float32') -# The current global policy in effect. If None, it means the current value of -# floatx should be used as the policy if the V2 dtype behavior is enabled, -# or "_infer" otherwise. -# TODO(reedwm): Make this thread local? -_global_policy = None + If no global policy is set, layers will instead default to a Policy + constructed from `tf.keras.backend.floatx()`. + To use mixed precision, the global policy should be set to `'mixed_float16'` + or `'mixed_bfloat16'`, so that every layer uses a 16-bit compute dtype and + float32 variable dtype by default. -@keras_export('keras.mixed_precision.global_policy', v1=[]) -def global_policy(): - """Returns the global dtype policy. - - The global policy is the default `tf.keras.mixed_precision.Policy` used for - layers, if no policy is passed to the layer constructor. If no policy has been - set with `keras.mixed_precision.set_global_policy`, this will return a policy - constructed from `tf.keras.backend.floatx()` (floatx defaults to float32). - - >>> tf.keras.mixed_precision.global_policy() - - >>> tf.keras.layers.Dense(10).dtype_policy # Defaults to the global policy - - - If TensorFlow 2 behavior has been disabled with - `tf.compat.v1.disable_v2_behavior()`, this will instead return a special - "_infer" policy which infers the dtype from the dtype of the first input the - first time the layer is called. This behavior matches the behavior that - existed in TensorFlow 1. - - See `tf.keras.mixed_precision.Policy` for more information on policies. - - Returns: - The global Policy. - """ - if _global_policy is None: - if base_layer_utils.v2_dtype_behavior_enabled(): - return Policy(backend.floatx()) - else: - return Policy('_infer') - return _global_policy + Only floating point policies can be set as the global policy, such as + `'float32'` and `'mixed_float16'`. Non-floating point policies such as + `'int32'` and `'complex64'` cannot be set as the global policy because most + layers do not support such policies. + See `tf.keras.mixed_precision.Policy` for more information. -def _check_if_mixed_precision_graph_rewrite_is_enabled(policy): - if tf.__internal__.train.is_mixed_precision_graph_rewrite_enabled(): - raise ValueError( - 'The global dtype policy cannot be set to "{policy.name}", because the ' - 'mixed precision graph rewrite has already been enabled.\n' - 'At most, one of the following can be called:\n\n' - ' 1. tf.compat.v1.train.enable_mixed_precision_graph_rewrite() ' - '(You called this first)\n' - ' 2. tf.keras.mixed_precision.set_global_policy() with a mixed ' - 'precision policy (You called this second)\n\n' - 'You called both functions, which is an error, because both functions ' - 'enable you to use mixed precision. If in doubt which function to use, ' - 'use the second, as it supports Eager execution and is more ' - 'customizable.'.format(policy=policy)) - - -@keras_export('keras.mixed_precision.set_global_policy', v1=[]) -def set_global_policy(policy): - """Sets the global dtype policy. - - The global policy is the default `tf.keras.mixed_precision.Policy` used for - layers, if no policy is passed to the layer constructor. - - >>> tf.keras.mixed_precision.set_global_policy('mixed_float16') - >>> tf.keras.mixed_precision.global_policy() - - >>> tf.keras.layers.Dense(10).dtype_policy - - >>> # Global policy is not used if a policy is directly passed to constructor - >>> tf.keras.layers.Dense(10, dtype='float64').dtype_policy - - >>> tf.keras.mixed_precision.set_global_policy('float32') - - If no global policy is set, layers will instead default to a Policy - constructed from `tf.keras.backend.floatx()`. - - To use mixed precision, the global policy should be set to `'mixed_float16'` - or `'mixed_bfloat16'`, so that every layer uses a 16-bit compute dtype and - float32 variable dtype by default. - - Only floating point policies can be set as the global policy, such as - `'float32'` and `'mixed_float16'`. Non-floating point policies such as - `'int32'` and `'complex64'` cannot be set as the global policy because most - layers do not support such policies. - - See `tf.keras.mixed_precision.Policy` for more information. - - Args: - policy: A Policy, or a string that will be converted to a Policy. Can also - be None, in which case the global policy will be constructed from - `tf.keras.backend.floatx()` - """ - global _global_policy - if not base_layer_utils.v2_dtype_behavior_enabled(): - raise ValueError('The global policy can only be set in TensorFlow 2 or if ' - 'V2 dtype behavior has been set. To enable V2 dtype ' - 'behavior, call ' - '"tf.compat.v1.keras.layers.enable_v2_dtype_behavior()"') - if policy is not None and not isinstance(policy, Policy): - policy = Policy(policy) - is_mixed_policy = (policy is not None and - policy.compute_dtype != policy.variable_dtype) - if is_mixed_policy: - _check_if_mixed_precision_graph_rewrite_is_enabled(policy) - if (policy is not None and policy.compute_dtype is not None and - not tf.as_dtype(policy.compute_dtype).is_floating): - raise ValueError('set_global_policy can only be used to set the global ' - 'policy to floating-point policies, such as "float32" and ' - '"mixed_float16", but got policy: %s' - % (policy.name,)) - _global_policy = policy - tf.__internal__.train.set_using_mixed_precision_policy(is_mixed_policy) + Args: + policy: A Policy, or a string that will be converted to a Policy. Can also + be None, in which case the global policy will be constructed from + `tf.keras.backend.floatx()` + """ + global _global_policy + if not base_layer_utils.v2_dtype_behavior_enabled(): + raise ValueError( + "The global policy can only be set in TensorFlow 2 or if " + "V2 dtype behavior has been set. To enable V2 dtype " + "behavior, call " + '"tf.compat.v1.keras.layers.enable_v2_dtype_behavior()"' + ) + if policy is not None and not isinstance(policy, Policy): + policy = Policy(policy) + is_mixed_policy = ( + policy is not None and policy.compute_dtype != policy.variable_dtype + ) + if is_mixed_policy: + _check_if_mixed_precision_graph_rewrite_is_enabled(policy) + if ( + policy is not None + and policy.compute_dtype is not None + and not tf.as_dtype(policy.compute_dtype).is_floating + ): + raise ValueError( + "set_global_policy can only be used to set the global " + 'policy to floating-point policies, such as "float32" and ' + f'"mixed_float16", but got policy: {policy.name}' + ) + _global_policy = policy + tf.__internal__.train.set_using_mixed_precision_policy(is_mixed_policy) # TODO(reedwm): Make this thread local @contextlib.contextmanager def policy_scope(policy): - """A context manager that sets the global Policy under it. + """A context manager that sets the global Policy under it. - Args: - policy: A Policy, or a string that will be converted to a Policy.. + Args: + policy: A Policy, or a string that will be converted to a Policy.. - Yields: - Nothing. - """ - old_policy = _global_policy - try: - set_global_policy(policy) - yield - finally: - set_global_policy(old_policy) + Yields: + Nothing. + """ + old_policy = _global_policy + try: + set_global_policy(policy) + yield + finally: + set_global_policy(old_policy) + + +def get_policy(identifier): + if isinstance(identifier, Policy): + dtype_policy = identifier + elif isinstance(identifier, dict): + dtype_policy = deserialize(identifier) + elif isinstance(identifier, str) and identifier in ( + "mixed_float16", + "mixed_bfloat16", + ): + # The isinstance check is required since np.dtype raises an error if + # compared to a non-dtype string. + dtype_policy = Policy(identifier) + elif identifier: + dtype_policy = Policy(tf.as_dtype(identifier).name) + else: + dtype_policy = global_policy() + if ( + dtype_policy.name == "mixed_float16" + and not loss_scale_optimizer.strategy_supports_loss_scaling() + ): + # Although only loss scaling doesn't support certain strategies, to + # avoid confusion, we disallow the 'mixed_float16' policy with + # unsupported strategies. This is because 'mixed_float16' requires + # loss scaling for numeric stability. + strategy = tf.distribute.get_strategy() + raise ValueError( + "Mixed precision is not supported with the " + f"tf.distribute.Strategy: {strategy.__class__.__name__}. " + "Either stop using mixed precision by removing the use of " + f"the {dtype_policy.name} policy or " + "use a different Strategy, e.g. a MirroredStrategy." + ) + return dtype_policy def _is_convertible_to_dtype(dtype): - try: - tf.as_dtype(dtype) - return True - except TypeError: - return False + try: + tf.as_dtype(dtype) + return True + except TypeError: + return False def _policy_equivalent_to_dtype(policy): - """Returns True if the Policy is equivalent to a single dtype. + """Returns True if the Policy is equivalent to a single dtype. - A policy is equivalent to a single dtype if the policy's compute and variable - dtypes are the same and the policy's type is Policy and not a subclass of - Policy. + A policy is equivalent to a single dtype if the policy's compute and + variable dtypes are the same and the policy's type is Policy and not a + subclass of Policy. - The "_infer" policy is considered equivalent to a single dtype. + The "_infer" policy is considered equivalent to a single dtype. - Args: - policy: A Policy. + Args: + policy: A Policy. - Returns: - True, if the policy is equivalent to a single dtype. - """ - # We use type() instead of isinstance because a subclass of Policy is never - # equivalent to a dtype. - return (type(policy) == Policy and # pylint: disable=unidiomatic-typecheck - (policy.name == '_infer' or _is_convertible_to_dtype(policy.name))) + Returns: + True, if the policy is equivalent to a single dtype. + """ + # We use type() instead of isinstance because a subclass of Policy is never + # equivalent to a dtype. + return type(policy) == Policy and ( + policy.name == "_infer" or _is_convertible_to_dtype(policy.name) + ) def serialize(policy): - if _policy_equivalent_to_dtype(policy): - # We return either None or the policy name for compatibility with older - # versions of Keras. If the policy name is returned, it is a dtype string - # such as 'float32'. - return None if policy.name == '_infer' else policy.name - return generic_utils.serialize_keras_object(policy) + if _policy_equivalent_to_dtype(policy): + # We return either None or the policy name for compatibility with older + # versions of Keras. If the policy name is returned, it is a dtype + # string such as 'float32'. + return None if policy.name == "_infer" else policy.name + return serialization_lib.serialize_keras_object(policy) def deserialize(config, custom_objects=None): - if isinstance(config, str) and _is_convertible_to_dtype(config): - return Policy(config) - if config is None: - return Policy('_infer') - # PolicyV1 was an old version of Policy that was removed. Deserializing it - # turns it into a (non-V1) Policy. - module_objects = {'Policy': Policy, 'PolicyV1': Policy} - return generic_utils.deserialize_keras_object( - config, - module_objects=module_objects, - custom_objects=custom_objects, - printable_module_name='dtype policy') + if isinstance(config, str) and _is_convertible_to_dtype(config): + return Policy(config) + if config is None: + return Policy("_infer") + # PolicyV1 was an old version of Policy that was removed. Deserializing it + # turns it into a (non-V1) Policy. + module_objects = {"Policy": Policy, "PolicyV1": Policy} + return serialization_lib.deserialize_keras_object( + config, + module_objects=module_objects, + custom_objects=custom_objects, + printable_module_name="dtype policy", + ) diff --git a/keras/mixed_precision/policy_test.py b/keras/mixed_precision/policy_test.py index 7632966a4309..5131ce085b7e 100644 --- a/keras/mixed_precision/policy_test.py +++ b/keras/mixed_precision/policy_test.py @@ -15,237 +15,300 @@ """Tests Policies.""" import tensorflow.compat.v2 as tf - from absl.testing import parameterized -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils + from keras.engine import base_layer_utils from keras.mixed_precision import device_compatibility_check from keras.mixed_precision import policy as mp_policy -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +# isort: off from tensorflow.python.platform import tf_logging -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class PolicyTest(tf.test.TestCase, parameterized.TestCase): - """Tests Policies.""" - - @test_utils.enable_v2_dtype_behavior - def test_dtype_attributes(self): - for dtype in 'int32', 'bool', 'float16', 'float32': - policy = mp_policy.Policy(dtype) - self.assertEqual(policy.name, dtype) - self.assertEqual(policy.compute_dtype, dtype) - self.assertEqual(policy.variable_dtype, dtype) - - for dtype in 'float16', 'bfloat16': - policy = mp_policy.Policy('mixed_' + dtype) - self.assertEqual(policy.name, 'mixed_' + dtype) - self.assertEqual(policy.compute_dtype, dtype) - self.assertEqual(policy.variable_dtype, 'float32') - - policy = mp_policy.Policy('_infer') - self.assertEqual(policy.compute_dtype, None) - self.assertEqual(policy.variable_dtype, None) - - @test_utils.enable_v2_dtype_behavior - def test_repr(self): - # Test Policy repr - for policy in ('float32', 'int8', 'mixed_float16', 'mixed_bfloat16', - '_infer'): - self.assertEqual(repr(mp_policy.Policy(policy)), - '' % policy) - - @test_utils.enable_v2_dtype_behavior - def test_policy_errors(self): - # Test passing invalid strings - - with self.assertRaisesRegex( - ValueError, 'Cannot convert value abc to a mixed precision Policy.'): - mp_policy.Policy('abc') - - # Test passing a DType - with self.assertRaisesRegex( - TypeError, "'name' must be a string, not a DType. " - 'Instead, pass DType.name. Got: float16'): - mp_policy.Policy(tf.float16) - - # Test passing a non-DType invalid type - with self.assertRaisesRegex(TypeError, - "'name' must be a string, but got: 5"): - mp_policy.Policy(5) - - # Test passing a now-removed policy ending in float32_vars - with self.assertRaisesRegex( - ValueError, 'Policies ending in \'_float32_vars\' have been removed ' - 'from TensorFlow. Please use the \'mixed_float16\' or ' - '\'mixed_bfloat16\' policy instead. Got policy name: ' - '\'infer_float32_vars\''): - mp_policy.Policy('infer_float32_vars') - with self.assertRaisesRegex( - ValueError, 'Policies ending in \'_float32_vars\' have been removed ' - 'from TensorFlow. Please use the \'mixed_float16\' policy ' - 'instead. Got policy name: \'float16_with_float32_vars\''): - mp_policy.Policy('float16_with_float32_vars') - with self.assertRaisesRegex( - ValueError, 'Policies ending in \'_float32_vars\' have been removed ' - 'from TensorFlow. Please use the \'mixed_bfloat16\' policy ' - 'instead. Got policy name: \'bfloat16_with_float32_vars\''): - mp_policy.Policy('bfloat16_with_float32_vars') - with self.assertRaisesRegex( - ValueError, 'Policies ending in \'_float32_vars\' have been removed ' - 'from TensorFlow. Got policy name: ' - '\'int8_with_float32_vars\''): - mp_policy.Policy('int8_with_float32_vars') - - @test_utils.enable_v2_dtype_behavior - def test_global_policy(self): - if base_layer_utils.v2_dtype_behavior_enabled(): - default_policy = 'float32' - else: - default_policy = '_infer' - self.assertEqual(mp_policy.global_policy().name, default_policy) - try: - mp_policy.set_global_policy('mixed_float16') - self.assertEqual(mp_policy.global_policy().name, 'mixed_float16') - with tf.Graph().as_default(): # Policies are not associated with a graph - self.assertEqual(mp_policy.global_policy().name, 'mixed_float16') - mp_policy.set_global_policy('_infer') - self.assertEqual(mp_policy.global_policy().name, '_infer') - policy = mp_policy.Policy('mixed_bfloat16') - mp_policy.set_global_policy(policy) - self.assertIs(mp_policy.global_policy(), policy) - finally: - mp_policy.set_global_policy(None) - - @test_utils.enable_v2_dtype_behavior - def test_global_policy_dtype_error(self): - with self.assertRaisesRegex( - ValueError, - 'set_global_policy can only be used to set the global policy to ' - 'floating-point policies, such as "float32" and "mixed_float16", but ' - 'got policy: int32'): - mp_policy.set_global_policy('int32') - with self.assertRaisesRegex( - ValueError, - 'set_global_policy can only be used to set the global policy to ' - 'floating-point policies, such as "float32" and "mixed_float16", but ' - 'got policy: complex64'): - mp_policy.set_global_policy(mp_policy.Policy('complex64')) - - @test_utils.enable_v2_dtype_behavior - def test_device_compatibility_warning(self): - if not tf.executing_eagerly(): - self.skipTest('Run in eager mode only.') - - device_compatibility_check._logged_compatibility_check = False - with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_warn: - mp_policy.Policy('mixed_float16') - if tf.config.list_physical_devices('GPU'): - mock_warn.assert_not_called() - else: - self.assertRegex( - mock_warn.call_args[0][0], - r'Mixed precision compatibility check \(mixed_float16\): WARNING.*') - - if tf.config.list_physical_devices('GPU'): - # Assert message is only logged once - with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_warn: - mp_policy.Policy('mixed_float16') - mock_warn.assert_not_called() - - @test_utils.enable_v2_dtype_behavior - def test_policy_scope(self): - if base_layer_utils.v2_dtype_behavior_enabled(): - default_policy = 'float32' - else: - default_policy = '_infer' - with mp_policy.policy_scope('mixed_float16'): - self.assertEqual(mp_policy.global_policy().name, 'mixed_float16') - with mp_policy.policy_scope('_infer'): - self.assertEqual(mp_policy.global_policy().name, '_infer') - self.assertEqual(mp_policy.global_policy().name, 'mixed_float16') - self.assertEqual(mp_policy.global_policy().name, default_policy) - - @test_utils.enable_v2_dtype_behavior - def test_config(self): - for policy in ( - mp_policy.Policy('float16'), - mp_policy.Policy('float32'), - mp_policy.Policy('int16'), - mp_policy.Policy('mixed_float16'), - mp_policy.Policy('mixed_bfloat16'), - mp_policy.Policy('_infer'), - ): - config = policy.get_config() - new_policy = mp_policy.Policy.from_config(config) - # Comparing strings is the easiest way to ensure the policies are the - # same, as policy does not override the == operator. - self.assertEqual(str(policy), str(new_policy)) - - @test_utils.enable_v2_dtype_behavior - def test_serialization(self): - # Test policies that are equivalent to a single dtype - for policy_name in 'float16', 'float32', 'int8', 'string', 'bool': - policy = mp_policy.Policy(policy_name) - config = mp_policy.serialize(policy) - self.assertEqual(config, policy_name) - new_policy = mp_policy.deserialize(config) - self.assertEqual(str(policy), str(new_policy)) - - # Test "_infer" policy - policy = mp_policy.Policy('_infer') - config = mp_policy.serialize(policy) - self.assertIsNone(config) - new_policy = mp_policy.deserialize(config) - self.assertEqual(str(policy), str(new_policy)) - - class MyPolicy(mp_policy.Policy): - pass - - # Test policies that are not equivalent to a single dtype - for policy in ( - mp_policy.Policy('mixed_float16'), - mp_policy.Policy('mixed_bfloat16'), - MyPolicy('float32') - ): - config = mp_policy.serialize(policy) - self.assertEqual(config, {'class_name': policy.__class__.__name__, - 'config': {'name': policy.name}}) - new_policy = mp_policy.deserialize(config, - custom_objects={'MyPolicy': MyPolicy}) - self.assertEqual(str(policy), str(new_policy)) - - @test_utils.enable_v2_dtype_behavior - def test_error_if_graph_rewrite_enabled(self): - try: - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - gradient_descent.SGD(1.)) - with self.assertRaisesRegex( - ValueError, 'cannot be set to "mixed_float16", .* the mixed ' - 'precision graph rewrite has already been enabled'): - mp_policy.set_global_policy('mixed_float16') - with mp_policy.policy_scope('float64'): - pass # Non-mixed policies are allowed - finally: - tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() - - @test_utils.disable_v2_dtype_behavior - def test_v1_dtype_behavior(self): - # Setting global policies are not allowed with V1 dtype behavior - with self.assertRaisesRegex( - ValueError, 'global policy can only be set in TensorFlow 2'): - with mp_policy.policy_scope(mp_policy.Policy('_infer')): - pass - with self.assertRaisesRegex( - ValueError, 'global policy can only be set in TensorFlow 2'): - with mp_policy.policy_scope(mp_policy.Policy('float32')): - pass - with self.assertRaisesRegex( - ValueError, 'global policy can only be set in TensorFlow 2'): - with mp_policy.policy_scope(mp_policy.Policy('mixed_float16')): - pass - - -if __name__ == '__main__': - tf.test.main() + """Tests Policies.""" + + @test_utils.enable_v2_dtype_behavior + def test_dtype_attributes(self): + for dtype in "int32", "bool", "float16", "float32": + policy = mp_policy.Policy(dtype) + self.assertEqual(policy.name, dtype) + self.assertEqual(policy.compute_dtype, dtype) + self.assertEqual(policy.variable_dtype, dtype) + + for dtype in "float16", "bfloat16": + policy = mp_policy.Policy("mixed_" + dtype) + self.assertEqual(policy.name, "mixed_" + dtype) + self.assertEqual(policy.compute_dtype, dtype) + self.assertEqual(policy.variable_dtype, "float32") + + policy = mp_policy.Policy("_infer") + self.assertEqual(policy.compute_dtype, None) + self.assertEqual(policy.variable_dtype, None) + + @test_utils.enable_v2_dtype_behavior + def test_repr(self): + # Test Policy repr + for policy in ( + "float32", + "int8", + "mixed_float16", + "mixed_bfloat16", + "_infer", + ): + self.assertEqual( + repr(mp_policy.Policy(policy)), f'' + ) + + @test_utils.enable_v2_dtype_behavior + def test_policy_errors(self): + # Test passing invalid strings + + with self.assertRaisesRegex( + ValueError, "Cannot convert value abc to a mixed precision Policy." + ): + mp_policy.Policy("abc") + + # Test passing a DType + with self.assertRaisesRegex( + TypeError, "'name' must be a string, not a DType. " + ): + mp_policy.Policy(tf.float16) + + # Test passing a non-DType invalid type + with self.assertRaisesRegex( + TypeError, "'name' must be a string, but got: 5" + ): + mp_policy.Policy(5) + + # Test passing a now-removed policy ending in float32_vars + with self.assertRaisesRegex( + ValueError, + "Policies ending in '_float32_vars' have been removed " + "from TensorFlow. Please use the 'mixed_float16' or " + "'mixed_bfloat16' policy instead. Got policy name: " + "'infer_float32_vars'", + ): + mp_policy.Policy("infer_float32_vars") + with self.assertRaisesRegex( + ValueError, + "Policies ending in '_float32_vars' have been removed " + "from TensorFlow. Please use the 'mixed_float16' policy " + "instead. Got policy name: 'float16_with_float32_vars'", + ): + mp_policy.Policy("float16_with_float32_vars") + with self.assertRaisesRegex( + ValueError, + "Policies ending in '_float32_vars' have been removed " + "from TensorFlow. Please use the 'mixed_bfloat16' policy " + "instead. Got policy name: 'bfloat16_with_float32_vars'", + ): + mp_policy.Policy("bfloat16_with_float32_vars") + with self.assertRaisesRegex( + ValueError, + "Policies ending in '_float32_vars' have been removed " + "from TensorFlow. Got policy name: " + "'int8_with_float32_vars'", + ): + mp_policy.Policy("int8_with_float32_vars") + + @test_utils.enable_v2_dtype_behavior + def test_global_policy(self): + if base_layer_utils.v2_dtype_behavior_enabled(): + default_policy = "float32" + else: + default_policy = "_infer" + self.assertEqual(mp_policy.global_policy().name, default_policy) + try: + mp_policy.set_global_policy("mixed_float16") + self.assertEqual(mp_policy.global_policy().name, "mixed_float16") + # Policies are not associated with a graph + with tf.Graph().as_default(): + self.assertEqual( + mp_policy.global_policy().name, "mixed_float16" + ) + mp_policy.set_global_policy("_infer") + self.assertEqual(mp_policy.global_policy().name, "_infer") + policy = mp_policy.Policy("mixed_bfloat16") + mp_policy.set_global_policy(policy) + self.assertIs(mp_policy.global_policy(), policy) + finally: + mp_policy.set_global_policy(None) + + @test_utils.enable_v2_dtype_behavior + def test_global_policy_dtype_error(self): + with self.assertRaisesRegex( + ValueError, + "set_global_policy can only be used to set the global policy to " + 'floating-point policies, such as "float32" and "mixed_float16", ' + "but got policy: int32", + ): + mp_policy.set_global_policy("int32") + with self.assertRaisesRegex( + ValueError, + "set_global_policy can only be used to set the global policy to " + 'floating-point policies, such as "float32" and "mixed_float16", ' + "but got policy: complex64", + ): + mp_policy.set_global_policy(mp_policy.Policy("complex64")) + + @test_utils.enable_v2_dtype_behavior + def test_device_compatibility_warning(self): + if not tf.executing_eagerly(): + self.skipTest("Run in eager mode only.") + + device_compatibility_check._logged_compatibility_check = False + with tf.compat.v1.test.mock.patch.object( + tf_logging, "warning" + ) as mock_warn: + mp_policy.Policy("mixed_float16") + if tf.config.list_physical_devices("GPU"): + mock_warn.assert_not_called() + else: + self.assertRegex( + mock_warn.call_args[0][0], + r"Mixed precision compatibility check \(mixed_float16\): " + r"WARNING.*", + ) + + if tf.config.list_physical_devices("GPU"): + # Assert message is only logged once + with tf.compat.v1.test.mock.patch.object( + tf_logging, "warning" + ) as mock_warn: + mp_policy.Policy("mixed_float16") + mock_warn.assert_not_called() + + @test_utils.enable_v2_dtype_behavior + def test_policy_scope(self): + if base_layer_utils.v2_dtype_behavior_enabled(): + default_policy = "float32" + else: + default_policy = "_infer" + with mp_policy.policy_scope("mixed_float16"): + self.assertEqual(mp_policy.global_policy().name, "mixed_float16") + with mp_policy.policy_scope("_infer"): + self.assertEqual(mp_policy.global_policy().name, "_infer") + self.assertEqual(mp_policy.global_policy().name, "mixed_float16") + self.assertEqual(mp_policy.global_policy().name, default_policy) + + @test_utils.enable_v2_dtype_behavior + def test_config(self): + for policy in ( + mp_policy.Policy("float16"), + mp_policy.Policy("float32"), + mp_policy.Policy("int16"), + mp_policy.Policy("mixed_float16"), + mp_policy.Policy("mixed_bfloat16"), + mp_policy.Policy("_infer"), + ): + config = policy.get_config() + new_policy = mp_policy.Policy.from_config(config) + # Comparing strings is the easiest way to ensure the policies are + # the same, as policy does not override the == operator. + self.assertEqual(str(policy), str(new_policy)) + + @test_utils.enable_v2_dtype_behavior + def test_serialization(self): + # Test policies that are equivalent to a single dtype + for policy_name in "float16", "float32", "int8", "string", "bool": + policy = mp_policy.Policy(policy_name) + config = mp_policy.serialize(policy) + self.assertEqual(config, policy_name) + new_policy = mp_policy.deserialize(config) + self.assertEqual(str(policy), str(new_policy)) + + # Test "_infer" policy + policy = mp_policy.Policy("_infer") + config = mp_policy.serialize(policy) + self.assertIsNone(config) + new_policy = mp_policy.deserialize(config) + self.assertEqual(str(policy), str(new_policy)) + + class MyPolicy(mp_policy.Policy): + pass + + # Test policies that are not equivalent to a single dtype + for policy in ( + mp_policy.Policy("mixed_float16"), + mp_policy.Policy("mixed_bfloat16"), + MyPolicy("float32"), + ): + config = mp_policy.serialize(policy) + if tf.__internal__.tf2.enabled(): + if policy.name == "float32": + self.assertEqual( + config, + { + "module": None, + "class_name": policy.__class__.__name__, + "config": {"name": policy.name}, + "registered_name": "MyPolicy", + }, + ) + else: + self.assertEqual( + config, + { + "module": "keras.mixed_precision", + "class_name": policy.__class__.__name__, + "config": {"name": policy.name}, + "registered_name": None, + }, + ) + else: + self.assertEqual( + config, + { + "class_name": policy.__class__.__name__, + "config": {"name": policy.name}, + }, + ) + new_policy = mp_policy.deserialize( + config, custom_objects={"MyPolicy": MyPolicy} + ) + self.assertEqual(str(policy), str(new_policy)) + + @test_utils.enable_v2_dtype_behavior + def test_error_if_graph_rewrite_enabled(self): + try: + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + gradient_descent.SGD(1.0) + ) + with self.assertRaisesRegex( + ValueError, + 'cannot be set to "mixed_float16", .* the mixed ' + "precision graph rewrite has already been enabled", + ): + mp_policy.set_global_policy("mixed_float16") + with mp_policy.policy_scope("float64"): + pass # Non-mixed policies are allowed + finally: + tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() + + @test_utils.disable_v2_dtype_behavior + def test_v1_dtype_behavior(self): + # Setting global policies are not allowed with V1 dtype behavior + with self.assertRaisesRegex( + ValueError, "global policy can only be set in TensorFlow 2" + ): + with mp_policy.policy_scope(mp_policy.Policy("_infer")): + pass + with self.assertRaisesRegex( + ValueError, "global policy can only be set in TensorFlow 2" + ): + with mp_policy.policy_scope(mp_policy.Policy("float32")): + pass + with self.assertRaisesRegex( + ValueError, "global policy can only be set in TensorFlow 2" + ): + with mp_policy.policy_scope(mp_policy.Policy("mixed_float16")): + pass + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/mixed_precision/test_util.py b/keras/mixed_precision/test_util.py index f01987732518..43c422189e35 100644 --- a/keras/mixed_precision/test_util.py +++ b/keras/mixed_precision/test_util.py @@ -15,202 +15,228 @@ """Contains testing utilities related to mixed precision.""" import tensorflow.compat.v2 as tf + from keras import regularizers from keras.engine import base_layer def create_identity_with_grad_check_fn(expected_gradient, expected_dtype=None): - """Returns a function that asserts it's gradient has a certain value. - - This serves as a hook to assert intermediate gradients have a certain value. - This returns an identity function. The identity's gradient function is also - the identity function, except it asserts that the gradient equals - `expected_gradient` and has dtype `expected_dtype`. - - Args: - expected_gradient: The gradient function asserts that the gradient is this - value. - expected_dtype: The gradient function asserts the gradient has this dtype. - - Returns: - An identity function whose gradient function asserts the gradient has a - certain value. - """ - @tf.custom_gradient - def _identity_with_grad_check(x): - """Function that asserts it's gradient has a certain value.""" - x = tf.identity(x) - def grad(dx): - """Gradient function that asserts the gradient has a certain value.""" - if expected_dtype: - assert dx.dtype == expected_dtype, ( - 'dx.dtype should be %s but is: %s' % (expected_dtype, dx.dtype)) - expected_tensor = tf.convert_to_tensor( - expected_gradient, dtype=dx.dtype, name='expected_gradient') - # Control dependency is to ensure input is available. It's possible the - # dataset will throw a StopIteration to indicate there is no more data, in - # which case we don't want to run the assertion. - with tf.control_dependencies([x]): - assert_op = tf.compat.v1.assert_equal(dx, expected_tensor) - with tf.control_dependencies([assert_op]): - dx = tf.identity(dx) - return dx - return x, grad - # Keras sometimes has trouble serializing Lambda layers with a decorated - # function. So we define and return a non-decorated function. - def identity_with_grad_check(x): - return _identity_with_grad_check(x) - return identity_with_grad_check + """Returns a function that asserts it's gradient has a certain value. + + This serves as a hook to assert intermediate gradients have a certain value. + This returns an identity function. The identity's gradient function is also + the identity function, except it asserts that the gradient equals + `expected_gradient` and has dtype `expected_dtype`. + + Args: + expected_gradient: The gradient function asserts that the gradient is this + value. + expected_dtype: The gradient function asserts the gradient has this dtype. + + Returns: + An identity function whose gradient function asserts the gradient has a + certain value. + """ + + @tf.custom_gradient + def _identity_with_grad_check(x): + """Function that asserts it's gradient has a certain value.""" + x = tf.identity(x) + + def grad(dx): + """Gradient function that asserts the gradient has a certain + value.""" + if expected_dtype: + assert ( + dx.dtype == expected_dtype + ), f"dx.dtype should be {expected_dtype} but is: {dx.dtype}" + expected_tensor = tf.convert_to_tensor( + expected_gradient, dtype=dx.dtype, name="expected_gradient" + ) + # Control dependency is to ensure input is available. It's possible + # the dataset will throw a StopIteration to indicate there is no + # more data, in which case we don't want to run the assertion. + with tf.control_dependencies([x]): + assert_op = tf.compat.v1.assert_equal(dx, expected_tensor) + with tf.control_dependencies([assert_op]): + dx = tf.identity(dx) + return dx + + return x, grad + + # Keras sometimes has trouble serializing Lambda layers with a decorated + # function. So we define and return a non-decorated function. + def identity_with_grad_check(x): + return _identity_with_grad_check(x) + + return identity_with_grad_check def create_identity_with_nan_gradients_fn(have_nan_gradients): - """Returns a function that optionally has NaN gradients. - - This serves as a hook to introduce NaN gradients to a model. This returns an - identity function. The identity's gradient function will check if the boolean - tensor `have_nan_gradients` is True. If so, the gradient will be NaN. - Otherwise, the gradient will also be the identity. - - Args: - have_nan_gradients: A scalar boolean tensor. If True, gradients will be NaN. - Otherwise, the gradient function is the identity function. - - Returns: - An identity function whose gradient function will return NaNs, if - `have_nan_gradients` is True. - """ - @tf.custom_gradient - def _identity_with_nan_gradients(x): - """Function whose gradient is NaN iff `have_nan_gradients` is True.""" - x = tf.identity(x) - def grad(dx): - return tf.cond( - have_nan_gradients, - lambda: dx * float('NaN'), - lambda: dx - ) - return x, grad - # Keras sometimes has trouble serializing Lambda layers with a decorated - # function. So we define and return a non-decorated function. - def identity_with_nan_gradients(x): - return _identity_with_nan_gradients(x) - return identity_with_nan_gradients + """Returns a function that optionally has NaN gradients. + This serves as a hook to introduce NaN gradients to a model. This returns an + identity function. The identity's gradient function will check if the + boolean tensor `have_nan_gradients` is True. If so, the gradient will be + NaN. Otherwise, the gradient will also be the identity. -class AssertTypeLayer(base_layer.Layer): - """A layer which asserts it's inputs are a certain type.""" + Args: + have_nan_gradients: A scalar boolean tensor. If True, gradients will be + NaN. Otherwise, the gradient function is the identity function. - def __init__(self, assert_type=None, **kwargs): - self._assert_type = (tf.as_dtype(assert_type).name if assert_type - else None) - super().__init__(**kwargs) + Returns: + An identity function whose gradient function will return NaNs, if + `have_nan_gradients` is True. + """ - def assert_input_types(self, inputs): - """Asserts `inputs` are of the correct type. Should be called in call().""" - if self._assert_type: - inputs_flattened = tf.nest.flatten(inputs) - for inp in inputs_flattened: - assert inp.dtype.base_dtype == self._assert_type, ( - 'Input tensor has type %s which does not match assert type %s' % - (inp.dtype.name, self._assert_type)) + @tf.custom_gradient + def _identity_with_nan_gradients(x): + """Function whose gradient is NaN iff `have_nan_gradients` is True.""" + x = tf.identity(x) + def grad(dx): + return tf.cond( + have_nan_gradients, lambda: dx * float("NaN"), lambda: dx + ) -class MultiplyLayer(AssertTypeLayer): - """A layer which multiplies its input by a scalar variable.""" + return x, grad - def __init__(self, - regularizer=None, - activity_regularizer=None, - use_operator=False, - var_name='v', - **kwargs): - """Initializes the MultiplyLayer. + # Keras sometimes has trouble serializing Lambda layers with a decorated + # function. So we define and return a non-decorated function. + def identity_with_nan_gradients(x): + return _identity_with_nan_gradients(x) - Args: - regularizer: The weight regularizer on the scalar variable. - activity_regularizer: The activity regularizer. - use_operator: If True, add using the * operator. If False, add using - tf.multiply. - var_name: The name of the variable. It can be useful to pass a name other - than 'v', to test having the attribute name (self.v) being different - from the variable name. - **kwargs: Passed to AssertTypeLayer constructor. - """ - self._regularizer = regularizer - if isinstance(regularizer, dict): - self._regularizer = regularizers.deserialize(regularizer, - custom_objects=globals()) - self._activity_regularizer = activity_regularizer - if isinstance(activity_regularizer, dict): - self._activity_regularizer = regularizers.deserialize( - activity_regularizer, custom_objects=globals()) - - self._use_operator = use_operator - self._var_name = var_name - super().__init__( - activity_regularizer=self._activity_regularizer, **kwargs) - - def build(self, _): - self.v = self.add_weight( - self._var_name, (), initializer='ones', regularizer=self._regularizer) - self.built = True - - def call(self, inputs): - self.assert_input_types(inputs) - return self._multiply(inputs, self.v) - - def _multiply(self, x, y): - if self._use_operator: - return x * y - else: - return tf.multiply(x, y) - - def get_config(self): - config = super().get_config() - config['regularizer'] = regularizers.serialize(self._regularizer) - config['activity_regularizer'] = regularizers.serialize( - self._activity_regularizer) - config['use_operator'] = self._use_operator - config['var_name'] = self._var_name - config['assert_type'] = self._assert_type - return config + return identity_with_nan_gradients + + +class AssertTypeLayer(base_layer.Layer): + """A layer which asserts it's inputs are a certain type.""" + + def __init__(self, assert_type=None, **kwargs): + self._assert_type = ( + tf.as_dtype(assert_type).name if assert_type else None + ) + super().__init__(**kwargs) + + def assert_input_types(self, inputs): + """Asserts `inputs` are of the correct type. Should be called in + call().""" + if self._assert_type: + inputs_flattened = tf.nest.flatten(inputs) + for inp in inputs_flattened: + assert inp.dtype.base_dtype == self._assert_type, ( + "Input tensor has type %s which does " + "not match assert type %s" + % (inp.dtype.name, self._assert_type) + ) + + +class MultiplyLayer(AssertTypeLayer): + """A layer which multiplies its input by a scalar variable.""" + + def __init__( + self, + regularizer=None, + activity_regularizer=None, + use_operator=False, + var_name="v", + **kwargs, + ): + """Initializes the MultiplyLayer. + + Args: + regularizer: The weight regularizer on the scalar variable. + activity_regularizer: The activity regularizer. + use_operator: If True, add using the * operator. If False, add using + tf.multiply. + var_name: The name of the variable. It can be useful to pass a name + other than 'v', to test having the attribute name (self.v) being + different from the variable name. + **kwargs: Passed to AssertTypeLayer constructor. + """ + self._regularizer = regularizer + if isinstance(regularizer, dict): + self._regularizer = regularizers.deserialize( + regularizer, custom_objects=globals() + ) + self._activity_regularizer = activity_regularizer + if isinstance(activity_regularizer, dict): + self._activity_regularizer = regularizers.deserialize( + activity_regularizer, custom_objects=globals() + ) + + self._use_operator = use_operator + self._var_name = var_name + super().__init__( + activity_regularizer=self._activity_regularizer, **kwargs + ) + + def build(self, _): + self.v = self.add_weight( + self._var_name, + (), + initializer="ones", + regularizer=self._regularizer, + ) + self.built = True + + def call(self, inputs): + self.assert_input_types(inputs) + return self._multiply(inputs, self.v) + + def _multiply(self, x, y): + if self._use_operator: + return x * y + else: + return tf.multiply(x, y) + + def get_config(self): + config = super().get_config() + config["regularizer"] = regularizers.serialize(self._regularizer) + config["activity_regularizer"] = regularizers.serialize( + self._activity_regularizer + ) + config["use_operator"] = self._use_operator + config["var_name"] = self._var_name + config["assert_type"] = self._assert_type + return config class MultiplyLayerWithoutAutoCast(MultiplyLayer): - """Same as MultiplyLayer, but does not use AutoCastVariables.""" - - def build(self, _): - dtype = self.dtype - if dtype in ('float16', 'bfloat16'): - dtype = 'float32' - self.v = self.add_weight( - 'v', (), - initializer='ones', - dtype=dtype, - experimental_autocast=False, - regularizer=self._regularizer) - self.built = True - - def call(self, inputs): - self.assert_input_types(inputs) - assert self.v.dtype in (tf.float32, tf.float64) - return self._multiply(inputs, tf.cast(self.v, inputs.dtype)) + """Same as MultiplyLayer, but does not use AutoCastVariables.""" + + def build(self, _): + dtype = self.dtype + if dtype in ("float16", "bfloat16"): + dtype = "float32" + self.v = self.add_weight( + "v", + (), + initializer="ones", + dtype=dtype, + experimental_autocast=False, + regularizer=self._regularizer, + ) + self.built = True + + def call(self, inputs): + self.assert_input_types(inputs) + assert self.v.dtype in (tf.float32, tf.float64) + return self._multiply(inputs, tf.cast(self.v, inputs.dtype)) class IdentityRegularizer(regularizers.Regularizer): + def __call__(self, x): + assert x.dtype == tf.float32 + return tf.identity(x) - def __call__(self, x): - assert x.dtype == tf.float32 - return tf.identity(x) - - def get_config(self): - return {} + def get_config(self): + return {} class ReduceSumRegularizer(regularizers.Regularizer): + def __call__(self, x): + return tf.reduce_sum(x) - def __call__(self, x): - return tf.reduce_sum(x) - - def get_config(self): - return {} + def get_config(self): + return {} diff --git a/keras/mixed_precision/testdata/BUILD b/keras/mixed_precision/testdata/BUILD index cfb7f63eb457..cd79ce6cd465 100644 --- a/keras/mixed_precision/testdata/BUILD +++ b/keras/mixed_precision/testdata/BUILD @@ -2,10 +2,8 @@ # Contains checkpoints and SavedModels for testing purposes. package( - default_visibility = [ - "//keras:friends", - "//third_party/tensorflow/tools/pip_package:__pkg__", - ], + # copybara:uncomment default_applicable_licenses = ["//keras:license"], + default_visibility = ["//keras:friends"], licenses = ["notice"], ) diff --git a/keras/models/BUILD b/keras/models/BUILD index 66d533286c89..76161b078399 100644 --- a/keras/models/BUILD +++ b/keras/models/BUILD @@ -1,9 +1,11 @@ # Keras models +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "distribute_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], @@ -78,7 +80,6 @@ distribute_py_test( shard_count = 8, tags = [ "multi_gpu", - "no_oss", # TODO(b/226938240): Reenable "nomultivm", "requires-net:ipv4", ], diff --git a/keras/models/__init__.py b/keras/models/__init__.py index 77e0f86f4e2d..6737076ba4c8 100644 --- a/keras/models/__init__.py +++ b/keras/models/__init__.py @@ -13,20 +13,11 @@ # limitations under the License. # ============================================================================== """Keras models API.""" -# pylint: disable=g-bad-import-order + from keras.engine.functional import Functional from keras.engine.sequential import Sequential from keras.engine.training import Model -from keras.models.cloning import clone_and_build_model -from keras.models.cloning import clone_model -from keras.models.cloning import share_weights -from keras.models.sharpness_aware_minimization import SharpnessAwareMinimization -from keras.saving.model_config import model_from_config -from keras.saving.model_config import model_from_json -from keras.saving.model_config import model_from_yaml -from keras.saving.save import load_model -from keras.saving.save import save_model # Private symbols that are used in tests. # TODO(b/221261361): Clean up private symbols usage and remove these imports. @@ -34,3 +25,12 @@ from keras.models.cloning import _clone_layer from keras.models.cloning import _clone_layers_and_model_config from keras.models.cloning import _clone_sequential_model +from keras.models.cloning import clone_and_build_model +from keras.models.cloning import clone_model +from keras.models.cloning import share_weights +from keras.models.sharpness_aware_minimization import SharpnessAwareMinimization +from keras.saving.legacy.model_config import model_from_config +from keras.saving.legacy.model_config import model_from_json +from keras.saving.legacy.model_config import model_from_yaml +from keras.saving.saving_api import load_model +from keras.saving.saving_api import save_model diff --git a/keras/models/cloning.py b/keras/models/cloning.py index abf69a61262c..85c5ffd1319b 100644 --- a/keras/models/cloning.py +++ b/keras/models/cloning.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Code for model cloning, plus model-related API entries.""" import tensorflow.compat.v2 as tf + from keras import backend from keras import metrics as metrics_module -from keras.optimizers import optimizer_v1 from keras.engine import functional from keras.engine import sequential from keras.engine import training @@ -27,718 +27,875 @@ from keras.engine.base_layer import Layer from keras.engine.input_layer import Input from keras.engine.input_layer import InputLayer +from keras.optimizers import optimizer_v1 +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model.utils import keras_option_scope +from keras.saving.object_registration import CustomObjectScope from keras.utils import generic_utils from keras.utils import version_utils -from keras.utils.generic_utils import CustomObjectScope + +# isort: off from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export - # API entries importable from `keras.models`: -Model = training.Model # pylint: disable=invalid-name -Sequential = sequential.Sequential # pylint: disable=invalid-name +Model = training.Model +Sequential = sequential.Sequential # Callable used to clone a layer with weights preserved. def share_weights(layer): - return layer + return layer def _clone_layer(layer): - return layer.__class__.from_config(layer.get_config()) + return layer.__class__.from_config(layer.get_config()) def _insert_ancillary_layers(model, ancillary_layers, metrics_names, new_nodes): - """Inserts ancillary layers into the model with the proper order.""" - # Sort `AddMetric` layers so they agree with metrics_names. - metric_layers = [ - layer for layer in ancillary_layers if isinstance(layer, AddMetric) - ] - metric_layers.sort(key=lambda layer: metrics_names.index(layer.metric_name)) - ancillary_layers = [ - layer for layer in ancillary_layers if not isinstance(layer, AddMetric) - ] + metric_layers - model._insert_layers(ancillary_layers, relevant_nodes=list(new_nodes)) + """Inserts ancillary layers into the model with the proper order.""" + # Sort `AddMetric` layers so they agree with metrics_names. + metric_layers = [ + layer for layer in ancillary_layers if isinstance(layer, AddMetric) + ] + metric_layers.sort(key=lambda layer: metrics_names.index(layer.metric_name)) + ancillary_layers = [ + layer for layer in ancillary_layers if not isinstance(layer, AddMetric) + ] + metric_layers + model._insert_layers(ancillary_layers, relevant_nodes=list(new_nodes)) def _make_new_nodes(nodes_by_depth, layer_fn, layer_map, tensor_map): - """Uses the layers in `layer_map` to make new nodes based on `nodes_by_depth`. - - Args: - nodes_by_depth: Provides structure information to create new nodes. - layer_fn: Function to clone layers. - layer_map: Map from layers in `model` to new layers. - tensor_map: Map from tensors in `model` to newly compute tensors. - - Returns: - A set of new nodes. `layer_map` and `tensor_map` are updated. - """ - # Iterated over every node in the reference model, in depth order. - new_nodes = set() - depth_keys = list(nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - for depth in depth_keys: - nodes = nodes_by_depth[depth] - for node in nodes: - # Recover the corresponding layer. - layer = node.outbound_layer - - # Get or create layer. - if layer not in layer_map: - new_layer = layer_fn(layer) - layer_map[layer] = new_layer - layer = new_layer - else: - # Reuse previously cloned layer. - layer = layer_map[layer] - # Don't call InputLayer multiple times. - if isinstance(layer, InputLayer): - continue - - # If all previous input tensors are available in tensor_map, - # then call node.inbound_layer on them. - if all( - tensor in tensor_map for tensor in tf.nest.flatten(node.input_tensors)): - # Call layer. - args = tf.nest.map_structure(lambda t: tensor_map.get(t, t), - node.call_args) - kwargs = tf.nest.map_structure(lambda t: tensor_map.get(t, t), - node.call_kwargs) - output_tensors = layer(*args, **kwargs) - - # Thread-safe way to keep track of what node was created. - first_output_tensor = tf.nest.flatten(output_tensors)[0] - new_nodes.add( - layer._inbound_nodes[first_output_tensor._keras_history.node_index]) - - for x, y in zip( - tf.nest.flatten(node.output_tensors), tf.nest.flatten(output_tensors)): - tensor_map[x] = y - return new_nodes + """Make new nodes with the layers in `layer_map` based on `nodes_by_depth`. + + Args: + nodes_by_depth: Provides structure information to create new nodes. + layer_fn: Function to clone layers. + layer_map: Map from layers in `model` to new layers. + tensor_map: Map from tensors in `model` to newly compute tensors. + + Returns: + A set of new nodes. `layer_map` and `tensor_map` are updated. + """ + # Iterated over every node in the reference model, in depth order. + new_nodes = set() + depth_keys = list(nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + for depth in depth_keys: + nodes = nodes_by_depth[depth] + for node in nodes: + # Recover the corresponding layer. + layer = node.outbound_layer + + # Get or create layer. + if layer not in layer_map: + new_layer = layer_fn(layer) + layer_map[layer] = new_layer + layer = new_layer + else: + # Reuse previously cloned layer. + layer = layer_map[layer] + # Don't call InputLayer multiple times. + if isinstance(layer, InputLayer): + continue + + # If all previous input tensors are available in tensor_map, + # then call node.inbound_layer on them. + if all( + tensor in tensor_map + for tensor in tf.nest.flatten(node.input_tensors) + ): + # Call layer. + args = tf.nest.map_structure( + lambda t: tensor_map.get(t, t), node.call_args + ) + kwargs = tf.nest.map_structure( + lambda t: tensor_map.get(t, t), node.call_kwargs + ) + output_tensors = layer(*args, **kwargs) + + # Thread-safe way to keep track of what node was created. + first_output_tensor = tf.nest.flatten(output_tensors)[0] + new_nodes.add( + layer._inbound_nodes[ + first_output_tensor._keras_history.node_index + ] + ) + + for x, y in zip( + tf.nest.flatten(node.output_tensors), + tf.nest.flatten(output_tensors), + ): + tensor_map[x] = y + return new_nodes def _clone_functional_model(model, input_tensors=None, layer_fn=_clone_layer): - """Clone a functional `Model` instance. - - Model cloning is similar to calling a model on new inputs, - except that it creates new layers (and thus new weights) instead - of sharing the weights of the existing layers. - - Input layers are always cloned. - - Args: - model: Instance of `Model`. - input_tensors: optional list of input tensors - to build the model upon. If not provided, - placeholders will be created. - layer_fn: callable to be applied on non-input layers in the model. By - default it clones the layer. Another example is to preserve the layer - to share the weights. This is required when we create a per-replica - copy of the model with distribution strategy; we want the weights to - be shared but still feed inputs separately so we create new input - layers. - - Returns: - An instance of `Model` reproducing the behavior - of the original model, on top of new inputs tensors, - using newly instantiated weights. - - Raises: - ValueError: in case of invalid `model` argument value or `layer_fn` - argument value. - """ - if not isinstance(model, Model): - raise ValueError('Expected `model` argument ' - f'to be a `Model` instance. Received: model={model}') - if isinstance(model, Sequential): - raise ValueError('Expected `model` argument ' - 'to be a functional `Model` instance, ' - f'got a `Sequential` instance instead: {model}') - if not model._is_graph_network: - raise ValueError('Expected `model` argument ' - 'to be a functional `Model` instance, ' - f'but got a subclassed model instead: {model}') - - new_input_layers = {} # Cache for created layers. - if input_tensors is not None: - # Make sure that all input tensors come from a Keras layer. - input_tensors = tf.nest.flatten(input_tensors) - for i, input_tensor in enumerate(input_tensors): - original_input_layer = model._input_layers[i] - - # Cache input layer. Create a new layer if the tensor is originally not - # from a Keras layer. - if not backend.is_keras_tensor(input_tensor): - name = original_input_layer.name - input_tensor = Input(tensor=input_tensor, - name='input_wrapper_for_' + name) - newly_created_input_layer = input_tensor._keras_history.layer - new_input_layers[original_input_layer] = newly_created_input_layer - else: - new_input_layers[ - original_input_layer] = input_tensor._keras_history.layer - - if not callable(layer_fn): - raise ValueError('Expected `layer_fn` argument to be a callable. ' - f'Received: layer_fn={layer_fn}') - - model_configs, created_layers = _clone_layers_and_model_config( - model, new_input_layers, layer_fn) - # Reconstruct model from the config, using the cloned layers. - input_tensors, output_tensors, created_layers = ( - functional.reconstruct_from_config(model_configs, - created_layers=created_layers)) - metrics_names = model.metrics_names - model = Model(input_tensors, output_tensors, name=model.name) - # Layers not directly tied to outputs of the Model, such as loss layers - # created in `add_loss` and `add_metric`. - ancillary_layers = [ - layer for layer in created_layers.values() if layer not in model.layers - ] - # TODO(b/162887610): This may need to adjust the inbound node index if the - # created layers had already been used to define other models. - if ancillary_layers: - new_nodes = tf.nest.flatten([ - layer.inbound_nodes[1:] - if functional._should_skip_first_node(layer) - else layer.inbound_nodes for layer in created_layers.values() - ]) - _insert_ancillary_layers(model, ancillary_layers, metrics_names, new_nodes) - return model + """Clone a functional `Model` instance. + + Model cloning is similar to calling a model on new inputs, + except that it creates new layers (and thus new weights) instead + of sharing the weights of the existing layers. + + Input layers are always cloned. + + Args: + model: Instance of `Model`. + input_tensors: optional list of input tensors + to build the model upon. If not provided, + placeholders will be created. + layer_fn: callable to be applied on non-input layers in the model. By + default it clones the layer. Another example is to preserve the + layer to share the weights. This is required when we create a + per-replica copy of the model with distribution strategy; we want + the weights to be shared but still feed inputs separately so we + create new input layers. + + Returns: + An instance of `Model` reproducing the behavior + of the original model, on top of new inputs tensors, + using newly instantiated weights. + + Raises: + ValueError: in case of invalid `model` argument value or `layer_fn` + argument value. + """ + if layer_fn is None: + layer_fn = _clone_layer + + if not isinstance(model, Model): + raise ValueError( + "Expected `model` argument " + f"to be a `Model` instance. Received: model={model}" + ) + if isinstance(model, Sequential): + raise ValueError( + "Expected `model` argument " + "to be a functional `Model` instance, " + f"got a `Sequential` instance instead: {model}" + ) + if not model._is_graph_network: + raise ValueError( + "Expected `model` argument " + "to be a functional `Model` instance, " + f"but got a subclassed model instead: {model}" + ) + + new_input_layers = {} # Cache for created layers. + if input_tensors is not None: + # Make sure that all input tensors come from a Keras layer. + input_tensors = tf.nest.flatten(input_tensors) + for i, input_tensor in enumerate(input_tensors): + original_input_layer = model._input_layers[i] + + # Cache input layer. Create a new layer if the tensor is originally + # not from a Keras layer. + if not backend.is_keras_tensor(input_tensor): + name = original_input_layer.name + input_tensor = Input( + tensor=input_tensor, name="input_wrapper_for_" + name + ) + newly_created_input_layer = input_tensor._keras_history.layer + new_input_layers[ + original_input_layer + ] = newly_created_input_layer + else: + new_input_layers[ + original_input_layer + ] = input_tensor._keras_history.layer + + if not callable(layer_fn): + raise ValueError( + "Expected `layer_fn` argument to be a callable. " + f"Received: layer_fn={layer_fn}" + ) + + # For affected g3 users who need to default to old serialization in cloning + if getattr(model, "use_legacy_config", False): + with keras_option_scope( + save_traces=False, in_tf_saved_model_scope=True + ): + model_configs, created_layers = _clone_layers_and_model_config( + model, new_input_layers, layer_fn + ) + else: + model_configs, created_layers = _clone_layers_and_model_config( + model, new_input_layers, layer_fn + ) + # Reconstruct model from the config, using the cloned layers. + ( + input_tensors, + output_tensors, + created_layers, + ) = functional.reconstruct_from_config( + model_configs, created_layers=created_layers + ) + metrics_names = model.metrics_names + if functional.has_functional_like_constructor(model.__class__): + new_model = model.__class__( + input_tensors, output_tensors, name=model.name + ) + else: + # This may be incorrect: the new model will end up having a different + # class than the original. However various existing models rely + # on this behavior, so we keep it. + new_model = Model(input_tensors, output_tensors, name=model.name) + + # Layers not directly tied to outputs of the Model, such as loss layers + # created in `add_loss` and `add_metric`. + ancillary_layers = [ + layer + for layer in created_layers.values() + if layer not in new_model.layers + ] + # TODO(b/162887610): This may need to adjust the inbound node index if the + # created layers had already been used to define other models. + if ancillary_layers: + new_nodes = tf.nest.flatten( + [ + layer.inbound_nodes[1:] + if functional._should_skip_first_node(layer) + else layer.inbound_nodes + for layer in created_layers.values() + ] + ) + _insert_ancillary_layers( + new_model, ancillary_layers, metrics_names, new_nodes + ) + return new_model def _clone_layers_and_model_config(model, input_layers, layer_fn): - """Clones all layers, and returns the model config without serializing layers. - - This function ensures that only the node graph is retrieved when getting the - model config. The `layer_fn` used to clone layers might not rely on - `layer.get_config()`, so some custom layers do not define `get_config`. - Trying to retrieve the config results in errors. - - Args: - model: A Functional model. - input_layers: Dictionary mapping input layers in `model` to new input layers - layer_fn: Function used to clone all non-input layers. - - Returns: - Model config object, and a dictionary of newly created layers. - """ - created_layers = {} - def _copy_layer(layer): - # Whenever the network config attempts to get the layer serialization, - # return a dummy dictionary. - if layer in input_layers: - created_layers[layer.name] = input_layers[layer] - elif layer in model._input_layers: - created_layers[layer.name] = InputLayer(**layer.get_config()) - else: - created_layers[layer.name] = layer_fn(layer) - return {} + """Clones all layers; returns the model config without serializing layers. + + This function ensures that only the node graph is retrieved when getting the + model config. The `layer_fn` used to clone layers might not rely on + `layer.get_config()`, so some custom layers do not define `get_config`. + Trying to retrieve the config results in errors. + + Args: + model: A Functional model. + input_layers: Dictionary mapping input layers in `model` to new input + layers. + layer_fn: Function used to clone all non-input layers. + + Returns: + Model config object, and a dictionary of newly created layers. + """ + created_layers = {} + + def _copy_layer(layer): + # Whenever the network config attempts to get the layer serialization, + # return a dummy dictionary. + if layer in input_layers: + created_layers[layer.name] = input_layers[layer] + elif layer in model._input_layers: + created_layers[layer.name] = InputLayer(**layer.get_config()) + else: + created_layers[layer.name] = layer_fn(layer) + return {} - config = functional.get_network_config( - model, serialize_layer_fn=_copy_layer) - return config, created_layers + config = functional.get_network_config( + model, serialize_layer_fn=_copy_layer + ) + return config, created_layers def _remove_ancillary_layers(model, layer_map, layers): - """Removes and returns any ancillary layers from `layers` based on `model`. + """Removes and returns any ancillary layers from `layers` based on `model`. - Ancillary layers are part of the model topology but not used to compute the - model outputs, e.g., layers from `add_loss` and `add_metric`. + Ancillary layers are part of the model topology but not used to compute the + model outputs, e.g., layers from `add_loss` and `add_metric`. - Args: - model: A Keras Model. - layer_map: A map to from layers in the `model` to those in `layers`. - layers: A list of all layers. + Args: + model: A Keras Model. + layer_map: A map to from layers in the `model` to those in `layers`. + layers: A list of all layers. - Returns: - Two lists of layers: (1) `layers` with the ancillary layers removed, and (2) - the ancillary layers. - """ - ancillary_layers = [] # Additional layers for computing losses and metrics. - if not model._is_graph_network: - return layers, ancillary_layers + Returns: + Two lists of layers: (1) `layers` with the ancillary layers removed, and + (2) the ancillary layers. + """ + ancillary_layers = [] # Additional layers for computing losses and metrics. + if not model._is_graph_network: + return layers, ancillary_layers - # Ancillary layers are those with depth < 0. - depths = [depth for depth in model._nodes_by_depth.keys() if depth < 0] - depths.sort(reverse=True) # Order topologically from inputs to outputs. - for depth in depths: - for node in model._nodes_by_depth[depth]: - ancillary_layers.append(layer_map[node.outbound_layer]) + # Ancillary layers are those with depth < 0. + depths = [depth for depth in model._nodes_by_depth.keys() if depth < 0] + depths.sort(reverse=True) # Order topologically from inputs to outputs. + for depth in depths: + for node in model._nodes_by_depth[depth]: + ancillary_layers.append(layer_map[node.outbound_layer]) - return [l for l in layers if l not in ancillary_layers], ancillary_layers + return [l for l in layers if l not in ancillary_layers], ancillary_layers def _clone_sequential_model(model, input_tensors=None, layer_fn=_clone_layer): - """Clone a `Sequential` model instance. - - Model cloning is similar to calling a model on new inputs, - except that it creates new layers (and thus new weights) instead - of sharing the weights of the existing layers. - - Args: - model: Instance of `Sequential`. - input_tensors: optional list of input tensors - to build the model upon. If not provided, - placeholders will be created. - layer_fn: callable to be applied on non-input layers in the model. By - default it clones the layer. Another example is to preserve the layer - to share the weights. This is required when we create a per-replica - copy of the model with distribution strategy; we want the weights to - be shared but still feed inputs separately so we create new input - layers. - - Returns: - An instance of `Sequential` reproducing the behavior - of the original model, on top of new inputs tensors, - using newly instantiated weights. - - Raises: - ValueError: in case of invalid `model` argument value or `layer_fn` - argument value. - """ - if not isinstance(model, Sequential): - raise ValueError('Expected `model` argument ' - 'to be a `Sequential` model instance. ' - f'Received: model={model}') - - if not callable(layer_fn): - raise ValueError( - 'Expected `layer_fn` argument to be a callable. ' - f'Received: layer_fn={layer_fn}') - - layers = [] # Layers needed to compute the model's outputs. - layer_map = {} - # Ensure that all layers are cloned. The model's layers - # property will exclude the initial InputLayer (if it exists) in the model, - # resulting in a different Sequential model structure. - for layer in model._flatten_layers(include_self=False, recursive=False): - if isinstance(layer, InputLayer) and input_tensors is not None: - # If input tensors are provided, the original model's InputLayer is - # overwritten with a different InputLayer. - continue - cloned_layer = ( - _clone_layer(layer) - if isinstance(layer, InputLayer) else layer_fn(layer)) - layers.append(cloned_layer) - layer_map[layer] = cloned_layer - layers, ancillary_layers = _remove_ancillary_layers(model, layer_map, layers) - - if input_tensors is None: - cloned_model = Sequential(layers=layers, name=model.name) - elif len(generic_utils.to_list(input_tensors)) != 1: - raise ValueError( - 'To clone a `Sequential` model, we expect at most one tensor as part ' - f'of `input_tensors`. Received: input_tensors={input_tensors}') - else: - # Overwrite the original model's input layer. - if isinstance(input_tensors, tuple): - input_tensors = list(input_tensors) - x = generic_utils.to_list(input_tensors)[0] - if backend.is_keras_tensor(x): - origin_layer = x._keras_history.layer - if isinstance(origin_layer, InputLayer): - cloned_model = Sequential( - layers=[origin_layer] + layers, name=model.name) - else: - raise ValueError('Cannot clone a `Sequential` model on top ' - 'of a tensor that comes from a Keras layer ' - 'other than an `InputLayer`. ' - 'Use the Functional API instead. ' - f'Received: input_tensors={input_tensors}') + """Clone a `Sequential` model instance. + + Model cloning is similar to calling a model on new inputs, + except that it creates new layers (and thus new weights) instead + of sharing the weights of the existing layers. + + Args: + model: Instance of `Sequential`. + input_tensors: optional list of input tensors + to build the model upon. If not provided, + placeholders will be created. + layer_fn: callable to be applied on non-input layers in the model. By + default it clones the layer. Another example is to preserve the + layer to share the weights. This is required when we create a + per-replica copy of the model with distribution strategy; we want + the weights to be shared but still feed inputs separately so we + create new input layers. + + Returns: + An instance of `Sequential` reproducing the behavior + of the original model, on top of new inputs tensors, + using newly instantiated weights. + + Raises: + ValueError: in case of invalid `model` argument value or `layer_fn` + argument value. + """ + if layer_fn is None: + layer_fn = _clone_layer + + if not isinstance(model, Sequential): + raise ValueError( + "Expected `model` argument " + "to be a `Sequential` model instance. " + f"Received: model={model}" + ) + + if not callable(layer_fn): + raise ValueError( + "Expected `layer_fn` argument to be a callable. " + f"Received: layer_fn={layer_fn}" + ) + + layers = [] # Layers needed to compute the model's outputs. + layer_map = {} + # Ensure that all layers are cloned. The model's layers + # property will exclude the initial InputLayer (if it exists) in the model, + # resulting in a different Sequential model structure. + for layer in model._flatten_layers(include_self=False, recursive=False): + if isinstance(layer, InputLayer) and input_tensors is not None: + # If input tensors are provided, the original model's InputLayer is + # overwritten with a different InputLayer. + continue + cloned_layer = ( + _clone_layer(layer) + if isinstance(layer, InputLayer) + else layer_fn(layer) + ) + layers.append(cloned_layer) + layer_map[layer] = cloned_layer + layers, ancillary_layers = _remove_ancillary_layers( + model, layer_map, layers + ) + + if input_tensors is None: + cloned_model = Sequential(layers=layers, name=model.name) + elif len(generic_utils.to_list(input_tensors)) != 1: + raise ValueError( + "To clone a `Sequential` model, we expect at most one tensor as " + f"part of `input_tensors`. Received: input_tensors={input_tensors}" + ) else: - input_tensor = Input(tensor=x, name='input_wrapper_for_' + str(x.name)) - input_layer = input_tensor._keras_history.layer - cloned_model = Sequential(layers=[input_layer] + layers, name=model.name) - - if not ancillary_layers: + # Overwrite the original model's input layer. + if isinstance(input_tensors, tuple): + input_tensors = list(input_tensors) + x = generic_utils.to_list(input_tensors)[0] + if backend.is_keras_tensor(x): + origin_layer = x._keras_history.layer + if isinstance(origin_layer, InputLayer): + cloned_model = Sequential( + layers=[origin_layer] + layers, name=model.name + ) + else: + raise ValueError( + "Cannot clone a `Sequential` model on top " + "of a tensor that comes from a Keras layer " + "other than an `InputLayer`. " + "Use the Functional API instead. " + f"Received: input_tensors={input_tensors}" + ) + else: + input_tensor = Input( + tensor=x, name="input_wrapper_for_" + str(x.name) + ) + input_layer = input_tensor._keras_history.layer + cloned_model = Sequential( + layers=[input_layer] + layers, name=model.name + ) + + if not ancillary_layers: + return cloned_model + + tensor_map = {} # Maps tensors from `model` to those in `cloned_model`. + for depth, cloned_nodes in cloned_model._nodes_by_depth.items(): + nodes = model._nodes_by_depth[depth] + # This should be safe in a Sequential model. In an arbitrary network, + # you need to sort using the outbound layer of the node as a key. + for cloned_node, node in zip(cloned_nodes, nodes): + if isinstance(cloned_node.output_tensors, list): + for j, output_tensor in enumerate(cloned_node.output_tensors): + tensor_map[node.output_tensors[j]] = output_tensor + else: + tensor_map[node.output_tensors] = cloned_node.output_tensors + # Ancillary nodes have negative depth. + new_nodes = _make_new_nodes( + { + depth: nodes + for depth, nodes in model._nodes_by_depth.items() + if depth < 0 + }, + layer_fn, + layer_map, + tensor_map, + ) + _insert_ancillary_layers( + cloned_model, ancillary_layers, model.metrics_names, new_nodes + ) return cloned_model - tensor_map = {} # Maps tensors from `model` to those in `cloned_model`. - for depth, cloned_nodes in cloned_model._nodes_by_depth.items(): - nodes = model._nodes_by_depth[depth] - # This should be safe in a Sequential model. In an arbitrary network, you - # need to sort using the outbound layer of the node as a key. - for cloned_node, node in zip(cloned_nodes, nodes): - if isinstance(cloned_node.output_tensors, list): - for j, output_tensor in enumerate(cloned_node.output_tensors): - tensor_map[node.output_tensors[j]] = output_tensor - else: - tensor_map[node.output_tensors] = cloned_node.output_tensors - # Ancillary nodes have negative depth. - new_nodes = _make_new_nodes( - { - depth: nodes - for depth, nodes in model._nodes_by_depth.items() - if depth < 0 - }, layer_fn, layer_map, tensor_map) - _insert_ancillary_layers(cloned_model, ancillary_layers, model.metrics_names, - new_nodes) - return cloned_model - - -@keras_export('keras.models.clone_model') -def clone_model(model, input_tensors=None, clone_function=None): - """Clone a Functional or Sequential `Model` instance. - - Model cloning is similar to calling a model on new inputs, - except that it creates new layers (and thus new weights) instead - of sharing the weights of the existing layers. - - Note that - `clone_model` will not preserve the uniqueness of shared objects within the - model (e.g. a single variable attached to two distinct layers will be - restored as two separate variables). - - Args: - model: Instance of `Model` - (could be a Functional model or a Sequential model). - input_tensors: optional list of input tensors or InputLayer objects - to build the model upon. If not provided, - new `Input` objects will be created. - clone_function: Callable to be used to clone each layer in the target - model (except `InputLayer` instances). It takes as argument the layer - instance to be cloned, and returns the corresponding layer instance to - be used in the model copy. If unspecified, this callable defaults to - the following serialization/deserialization function: - `lambda layer: layer.__class__.from_config(layer.get_config())`. - By passing a custom callable, you can customize your copy of the - model, e.g. by wrapping certain layers of interest (you might want to - replace all `LSTM` instances with equivalent - `Bidirectional(LSTM(...))` instances, for example). - - Returns: - An instance of `Model` reproducing the behavior - of the original model, on top of new inputs tensors, - using newly instantiated weights. The cloned model may behave - differently from the original model if a custom `clone_function` - modifies the layer. - - Example: - - ```python - # Create a test Sequential model. - model = keras.Sequential([ - keras.Input(shape=(728,)), - keras.layers.Dense(32, activation='relu'), - keras.layers.Dense(1, activation='sigmoid'), - ]) - # Create a copy of the test model (with freshly initialized weights). - new_model = clone_model(model) - ``` - - Note that subclassed models cannot be cloned, since their internal - layer structure is not known. To achieve equivalent functionality - as `clone_model` in the case of a subclassed model, simply make sure - that the model class implements `get_config()` - (and optionally `from_config()`), and call: - - ```python - new_model = model.__class__.from_config(model.get_config()) - ``` - """ - with generic_utils.DisableSharedObjectScope(): - if clone_function is None: - clone_function = _clone_layer - if isinstance(model, Sequential): - return _clone_sequential_model( - model, input_tensors=input_tensors, layer_fn=clone_function) - else: - return _clone_functional_model( - model, input_tensors=input_tensors, layer_fn=clone_function) +@keras_export("keras.models.clone_model") +def clone_model(model, input_tensors=None, clone_function=None): + """Clone a Functional or Sequential `Model` instance. + + Model cloning is similar to calling a model on new inputs, + except that it creates new layers (and thus new weights) instead + of sharing the weights of the existing layers. + + Note that + `clone_model` will not preserve the uniqueness of shared objects within the + model (e.g. a single variable attached to two distinct layers will be + restored as two separate variables). + + Args: + model: Instance of `Model` + (could be a Functional model or a Sequential model). + input_tensors: optional list of input tensors or InputLayer objects + to build the model upon. If not provided, + new `Input` objects will be created. + clone_function: Callable to be used to clone each layer in the target + model (except `InputLayer` instances). It takes as argument the + layer instance to be cloned, and returns the corresponding layer + instance to be used in the model copy. If unspecified, this callable + becomes the following serialization/deserialization function: + `lambda layer: layer.__class__.from_config(layer.get_config())`. + By passing a custom callable, you can customize your copy of the + model, e.g. by wrapping certain layers of interest (you might want + to replace all `LSTM` instances with equivalent + `Bidirectional(LSTM(...))` instances, for example). + Defaults to `None`. + + Returns: + An instance of `Model` reproducing the behavior + of the original model, on top of new inputs tensors, + using newly instantiated weights. The cloned model may behave + differently from the original model if a custom `clone_function` + modifies the layer. + + Example: + + ```python + # Create a test Sequential model. + model = keras.Sequential([ + keras.Input(shape=(728,)), + keras.layers.Dense(32, activation='relu'), + keras.layers.Dense(1, activation='sigmoid'), + ]) + # Create a copy of the test model (with freshly initialized weights). + new_model = clone_model(model) + ``` + + Note that subclassed models cannot be cloned, since their internal + layer structure is not known. To achieve equivalent functionality + as `clone_model` in the case of a subclassed model, simply make sure + that the model class implements `get_config()` + (and optionally `from_config()`), and call: + + ```python + new_model = model.__class__.from_config(model.get_config()) + ``` + """ + with serialization.DisableSharedObjectScope(): + if isinstance(model, Sequential): + return _clone_sequential_model( + model, input_tensors=input_tensors, layer_fn=clone_function + ) + if isinstance(model, functional.Functional): + # If the get_config() method is the same as a regular Functional + # model, we're safe to use _clone_functional_model (which relies + # on a Functional constructor). In the case where the get_config + # is custom, this may not necessarily work, but if clone_function + # or input_tensors are passed, we attempt it anyway + # in order to preserve backwards compatibility. + if generic_utils.is_default(model.get_config) or ( + clone_function or input_tensors + ): + return _clone_functional_model( + model, input_tensors=input_tensors, layer_fn=clone_function + ) + + # Case of a custom model class + if clone_function or input_tensors: + raise ValueError( + "Arguments clone_function and input_tensors " + "are only supported for Sequential models " + "or Functional models. Received model of " + f"type '{model.__class__.__name__}', with " + f"clone_function={clone_function} and " + f"input_tensors={input_tensors}" + ) + # Note that a custom object scope may be required in this case. + return model.__class__.from_config(model.get_config()) # "Clone" a subclassed model by resetting all of the attributes. def _in_place_subclassed_model_reset(model): - """Substitute for model cloning that works for subclassed models. - - Subclassed models cannot be cloned because their topology is not serializable. - To "instantiate" an identical model in a new TF graph, we reuse the original - model object, but we clear its state. - - After calling this function on a model instance, you can use the model - instance as if it were a model clone (in particular you can use it in a new - graph). - - This method clears the state of the input model. It is thus destructive. - However the original state can be restored fully by calling - `_in_place_subclassed_model_state_restoration`. - - Args: - model: Instance of a Keras model created via subclassing. - - Raises: - ValueError: In case the model uses a subclassed model as inner layer. - """ - assert not model._is_graph_network # Only makes sense for subclassed networks - # Select correct base class for new Model. - version_utils.swap_class(model.__class__, training.Model, training_v1.Model, - tf.compat.v1.executing_eagerly_outside_functions()) - # Retrieve all layers tracked by the model as well as their attribute names - attributes_cache = {} - for name in dir(model): - # Skip attrs that track other trackables. - if name == 'submodules' or name == '_self_tracked_trackables': - continue - - try: - value = getattr(model, name) - except (AttributeError, ValueError, TypeError): - continue - if isinstance(value, Layer): - attributes_cache[name] = value - assert value in model.layers - if hasattr(value, 'layers') and value.layers: - raise ValueError('We do not support the use of nested layers ' - 'in `model_to_estimator` at this time. Found nested ' - f'layer: {value}') - elif isinstance( - value, (list, tuple)) and name not in ('layers', '_layers', 'metrics', - '_compile_metric_functions', - '_output_loss_metrics'): - # Handle case: list/tuple of layers (also tracked by the Network API). - if value and all(isinstance(val, Layer) for val in value): - raise ValueError('We do not support the use of list-of-layers ' - 'attributes in subclassed models used with ' - '`model_to_estimator` at this time. Found list ' - f'model: {name}') - - # Replace layers on the model with fresh layers - layers_to_names = {value: key for key, value in attributes_cache.items()} - original_layers = list( - model._flatten_layers(include_self=False, recursive=False)) - setattr_tracking = model._setattr_tracking - model._setattr_tracking = False - model._self_tracked_trackables = [] - for layer in original_layers: # We preserve layer order. - config = layer.get_config() - # This will not work for nested subclassed models used as layers. - # This would be theoretically possible to support, but would add complexity. - # Only do it if users complain. - if isinstance(layer, training.Model) and not layer._is_graph_network: - raise ValueError('We do not support the use of nested subclassed models ' - 'in `model_to_estimator` at this time. Found nested ' - f'model: {layer}') - fresh_layer = layer.__class__.from_config(config) - name = layers_to_names[layer] - setattr(model, name, fresh_layer) - model._self_tracked_trackables.append(fresh_layer) - - # Cache original model build attributes (in addition to layers) - if (not hasattr(model, '_original_attributes_cache') or - model._original_attributes_cache is None): - if model.built: - attributes_to_cache = [ - 'inputs', - 'outputs', - 'total_loss', - 'optimizer', - 'train_function', - 'test_function', - 'predict_function', - '_training_endpoints', - '_collected_trainable_weights', - '_feed_inputs', - '_feed_input_names', - '_feed_input_shapes', - ] - for name in attributes_to_cache: - attributes_cache[name] = getattr(model, name) - model._original_attributes_cache = attributes_cache - _reset_build_compile_trackers(model) - model._setattr_tracking = setattr_tracking + """Substitute for model cloning that works for subclassed models. + + Subclassed models cannot be cloned because their topology is not + serializable. To "instantiate" an identical model in a new TF graph, we + reuse the original model object, but we clear its state. + + After calling this function on a model instance, you can use the model + instance as if it were a model clone (in particular you can use it in a new + graph). + + This method clears the state of the input model. It is thus destructive. + However the original state can be restored fully by calling + `_in_place_subclassed_model_state_restoration`. + + Args: + model: Instance of a Keras model created via subclassing. + + Raises: + ValueError: In case the model uses a subclassed model as inner layer. + """ + assert ( + not model._is_graph_network + ) # Only makes sense for subclassed networks + # Select correct base class for new Model. + version_utils.swap_class( + model.__class__, + training.Model, + training_v1.Model, + tf.compat.v1.executing_eagerly_outside_functions(), + ) + # Retrieve all layers tracked by the model as well as their attribute names + attributes_cache = {} + for name in dir(model): + # Skip attrs that track other trackables. + if name == "submodules" or name == "_self_tracked_trackables": + continue + + try: + value = getattr(model, name) + except (AttributeError, ValueError, TypeError): + continue + if isinstance(value, Layer): + attributes_cache[name] = value + assert value in model.layers + if hasattr(value, "layers") and value.layers: + raise ValueError( + "We do not support the use of nested layers " + "in `model_to_estimator` at this time. Found nested " + f"layer: {value}" + ) + elif isinstance(value, (list, tuple)) and name not in ( + "layers", + "_layers", + "metrics", + "_compile_metric_functions", + "_output_loss_metrics", + ): + # Handle case: list/tuple of layers (also tracked by the Network + # API). + if value and all(isinstance(val, Layer) for val in value): + raise ValueError( + "We do not support the use of list-of-layers " + "attributes in subclassed models used with " + "`model_to_estimator` at this time. Found list " + f"model: {name}" + ) + + # Replace layers on the model with fresh layers + layers_to_names = {value: key for key, value in attributes_cache.items()} + original_layers = list( + model._flatten_layers(include_self=False, recursive=False) + ) + setattr_tracking = model._setattr_tracking + model._setattr_tracking = False + model._self_tracked_trackables = [] + for layer in original_layers: # We preserve layer order. + config = layer.get_config() + # This will not work for nested subclassed models used as layers. + # This would be theoretically possible to support, but would add + # complexity. Only do it if users complain. + if isinstance(layer, training.Model) and not layer._is_graph_network: + raise ValueError( + "We do not support the use of nested subclassed models " + "in `model_to_estimator` at this time. Found nested " + f"model: {layer}" + ) + fresh_layer = layer.__class__.from_config(config) + name = layers_to_names[layer] + setattr(model, name, fresh_layer) + model._self_tracked_trackables.append(fresh_layer) + + # Cache original model build attributes (in addition to layers) + if ( + not hasattr(model, "_original_attributes_cache") + or model._original_attributes_cache is None + ): + if model.built: + attributes_to_cache = [ + "inputs", + "outputs", + "total_loss", + "optimizer", + "train_function", + "test_function", + "predict_function", + "_training_endpoints", + "_collected_trainable_weights", + "_feed_inputs", + "_feed_input_names", + "_feed_input_shapes", + ] + for name in attributes_to_cache: + attributes_cache[name] = getattr(model, name) + model._original_attributes_cache = attributes_cache + _reset_build_compile_trackers(model) + model._setattr_tracking = setattr_tracking def _reset_build_compile_trackers(model): - """Reset state trackers for model. - - Note that we do not actually zero out attributes such as optimizer, - but instead rely on the expectation that all of the attrs will be - over-written on calling build/compile/etc. This is somewhat fragile, - insofar as we check elsewhere for the presence of these attributes as - evidence of having been built/compiled/etc. Pending a better way to do this, - we reset key attributes here to allow building and compiling. - - Args: - model: the model that is being reset - """ - # Reset build state - model.built = False - model.inputs = None - model.outputs = None - # Reset compile state - model._is_compiled = False # pylint:disable=protected-access - if not tf.compat.v1.executing_eagerly_outside_functions(): - model._v1_compile_was_called = False - model.optimizer = None + """Reset state trackers for model. + + Note that we do not actually zero out attributes such as optimizer, + but instead rely on the expectation that all of the attrs will be + over-written on calling build/compile/etc. This is somewhat fragile, + insofar as we check elsewhere for the presence of these attributes as + evidence of having been built/compiled/etc. Pending a better way to do this, + we reset key attributes here to allow building and compiling. + + Args: + model: the model that is being reset + """ + # Reset build state + model.built = False + model.inputs = None + model.outputs = None + # Reset compile state + model._is_compiled = False + if not tf.compat.v1.executing_eagerly_outside_functions(): + model._v1_compile_was_called = False + model.optimizer = None @keras_export( - 'keras.__internal__.models.in_place_subclassed_model_state_restoration', - v1=[]) + "keras.__internal__.models.in_place_subclassed_model_state_restoration", + v1=[], +) def in_place_subclassed_model_state_restoration(model): - """Restores the original state of a model after it was "reset". - - This undoes this action of `_in_place_subclassed_model_reset`, which is called - in `clone_and_build_model` if `in_place_reset` is set to True. - - Args: - model: Instance of a Keras model created via subclassing, on which - `_in_place_subclassed_model_reset` was previously called. - """ - assert not model._is_graph_network - # Restore layers and build attributes - if (hasattr(model, '_original_attributes_cache') and - model._original_attributes_cache is not None): - # Models have sticky attribute assignment, so we want to be careful to add - # back the previous attributes and track Layers by their original names - # without adding dependencies on "utility" attributes which Models exempt - # when they're constructed. - setattr_tracking = model._setattr_tracking - model._setattr_tracking = False - model._self_tracked_trackables = [] - for name, value in model._original_attributes_cache.items(): - setattr(model, name, value) - if isinstance(value, Layer): - model._self_tracked_trackables.append(value) - model._original_attributes_cache = None - model._setattr_tracking = setattr_tracking - else: - # Restore to the state of a never-called model. - _reset_build_compile_trackers(model) + """Restores the original state of a model after it was "reset". + + This undoes this action of `_in_place_subclassed_model_reset`, which is + called in `clone_and_build_model` if `in_place_reset` is set to True. + + Args: + model: Instance of a Keras model created via subclassing, on which + `_in_place_subclassed_model_reset` was previously called. + """ + assert not model._is_graph_network + # Restore layers and build attributes + if ( + hasattr(model, "_original_attributes_cache") + and model._original_attributes_cache is not None + ): + # Models have sticky attribute assignment, so we want to be careful to + # add back the previous attributes and track Layers by their original + # names without adding dependencies on "utility" attributes which Models + # exempt when they're constructed. + setattr_tracking = model._setattr_tracking + model._setattr_tracking = False + model._self_tracked_trackables = [] + for name, value in model._original_attributes_cache.items(): + setattr(model, name, value) + if isinstance(value, Layer): + model._self_tracked_trackables.append(value) + model._original_attributes_cache = None + model._setattr_tracking = setattr_tracking + else: + # Restore to the state of a never-called model. + _reset_build_compile_trackers(model) -@keras_export('keras.__internal__.models.clone_and_build_model', v1=[]) +@keras_export("keras.__internal__.models.clone_and_build_model", v1=[]) def clone_and_build_model( - model, input_tensors=None, target_tensors=None, custom_objects=None, - compile_clone=True, in_place_reset=False, optimizer_iterations=None, - optimizer_config=None): - """Clone a `Model` and build/compile it with the same settings used before. - - This function can be run in the same graph or in a separate graph from the - model. When using a separate graph, `in_place_reset` must be `False`. - - Note that, currently, the clone produced from this function may not work with - TPU DistributionStrategy. Try at your own risk. - - Args: - model: `tf.keras.Model` object. Can be Functional, Sequential, or - sub-classed. - input_tensors: Optional list or dictionary of input tensors to build the - model upon. If not provided, placeholders will be created. - target_tensors: Optional list of target tensors for compiling the model. If - not provided, placeholders will be created. - custom_objects: Optional dictionary mapping string names to custom classes - or functions. - compile_clone: Boolean, whether to compile model clone (default `True`). - in_place_reset: Boolean, whether to reset the model in place. Only used if - the model is a subclassed model. In the case of a subclassed model, - this argument must be set to `True` (default `False`). To restore the - original model, use the function - `in_place_subclassed_model_state_restoration(model)`. - optimizer_iterations: An iterations variable that will be incremented by the - optimizer if the clone is compiled. This argument is used when a Keras - model is cloned into an Estimator model function, because Estimators - create their own global step variable. - optimizer_config: Optimizer config dictionary or list of dictionary - returned from `get_config()`. This argument should be defined if - `clone_and_build_model` is called in a different graph or session from - the original model, and the optimizer is an instance of `OptimizerV2`. - - Returns: - Clone of the model. - - Raises: - ValueError: Cloning fails in the following cases - - cloning a subclassed model with `in_place_reset` set to False. - - compiling the clone when the original model has not been compiled. - """ - # Grab optimizer now, as we reset-in-place for subclassed models, but - # want to maintain access to the original optimizer. - orig_optimizer = model.optimizer - if compile_clone and not orig_optimizer: - raise ValueError( - 'Error when cloning model: `compile_clone` was set to True, but the ' - f'original model has not been compiled. Received: model={model}') - - if compile_clone: - compile_args = model._get_compile_args() # pylint: disable=protected-access - # Allows this method to be robust to switching graph and eager classes. - model._get_compile_args = lambda: compile_args - - with CustomObjectScope(custom_objects or {}): - if model._is_graph_network: - clone = clone_model(model, input_tensors=input_tensors) - elif isinstance(model, Sequential): - clone = clone_model(model, input_tensors=input_tensors) - if (not clone._is_graph_network and model._build_input_shape is not None): - if tf.compat.v1.executing_eagerly_outside_functions(): - clone.build(model._build_input_shape) + model, + input_tensors=None, + target_tensors=None, + custom_objects=None, + compile_clone=True, + in_place_reset=False, + optimizer_iterations=None, + optimizer_config=None, +): + """Clone a `Model` and build/compile it with the same settings used before. + + This function can be run in the same graph or in a separate graph from the + model. When using a separate graph, `in_place_reset` must be `False`. + + Note that, currently, the clone produced from this function may not work + with TPU DistributionStrategy. Try at your own risk. + + Args: + model: `tf.keras.Model` object. Can be Functional, Sequential, or + sub-classed. + input_tensors: Optional list or dictionary of input tensors to build the + model upon. If not provided, placeholders will be created. + target_tensors: Optional list of target tensors for compiling the model. + If not provided, placeholders will be created. + custom_objects: Optional dictionary mapping string names to custom classes + or functions. + compile_clone: Boolean, whether to compile model clone (default `True`). + in_place_reset: Boolean, whether to reset the model in place. Only used if + the model is a subclassed model. In the case of a subclassed model, + this argument must be set to `True` (default `False`). To restore the + original model, use the function + `in_place_subclassed_model_state_restoration(model)`. + optimizer_iterations: An iterations variable that will be incremented by + the optimizer if the clone is compiled. This argument is used when a + Keras model is cloned into an Estimator model function, because + Estimators create their own global step variable. + optimizer_config: Optimizer config dictionary or list of dictionary + returned from `get_config()`. This argument should be defined if + `clone_and_build_model` is called in a different graph or session from + the original model, and the optimizer is an instance of `OptimizerV2`. + + Returns: + Clone of the model. + + Raises: + ValueError: Cloning fails in the following cases + - cloning a subclassed model with `in_place_reset` set to False. + - compiling the clone when the original model has not been compiled. + """ + # Grab optimizer now, as we reset-in-place for subclassed models, but + # want to maintain access to the original optimizer. + orig_optimizer = model.optimizer + if compile_clone and not orig_optimizer: + raise ValueError( + "Error when cloning model: `compile_clone` was set to True, but " + f"the original model has not been compiled. Received: model={model}" + ) + + if compile_clone: + compile_args = model._get_compile_args() + # Allows this method to be robust to switching graph and eager classes. + model._get_compile_args = lambda: compile_args + + with CustomObjectScope(custom_objects or {}): + if model._is_graph_network: + clone = clone_model(model, input_tensors=input_tensors) + elif isinstance(model, Sequential): + clone = clone_model(model, input_tensors=input_tensors) + if ( + not clone._is_graph_network + and model._build_input_shape is not None + ): + if tf.compat.v1.executing_eagerly_outside_functions(): + clone.build(model._build_input_shape) + else: + clone._set_inputs( + backend.placeholder( + model._build_input_shape, + dtype=model.inputs[0].dtype, + ) + ) else: - clone._set_inputs( - backend.placeholder( - model._build_input_shape, dtype=model.inputs[0].dtype)) - else: - try: - # Prefer cloning the model if serial/deserial logic is implemented for - # subclassed model. - clone = model.__class__.from_config(model.get_config()) - except NotImplementedError: - logging.warning('This model is a subclassed model. Please implement ' - '`get_config` and `from_config` to better support ' - 'cloning the model.') - if not in_place_reset: - raise ValueError( - f'This model ({model}) is a subclassed model. ' - 'Such a model cannot be cloned, but there is a workaround where ' - 'the model is reset in-place. To use this, please set the ' - 'argument `in_place_reset` to `True`. This will reset the ' - 'attributes in the original model. To restore the attributes, ' - 'call `in_place_subclassed_model_state_restoration(model)`.') - clone = model - _in_place_subclassed_model_reset(clone) - if input_tensors is not None: - if isinstance(input_tensors, (list, tuple)) and len(input_tensors) == 1: - input_tensors = input_tensors[0] - clone._set_inputs(input_tensors) - - if compile_clone: - if isinstance(orig_optimizer, optimizer_v1.TFOptimizer): - optimizer = optimizer_v1.TFOptimizer( - orig_optimizer.optimizer, optimizer_iterations) - backend.track_tf_optimizer(optimizer) - else: - if not isinstance(orig_optimizer, (tuple, list)): - orig_optimizer = [orig_optimizer] - if optimizer_config is None: - optimizer = [ - opt.__class__.from_config(opt.get_config()) - for opt in orig_optimizer - ] - elif isinstance(optimizer_config, dict): - optimizer = [orig_optimizer[0].__class__.from_config(optimizer_config)] - else: - # optimizer config is list of dict, same order as orig_optimizer. - optimizer = [ - opt.__class__.from_config(opt_config) - for (opt, opt_config) in zip(orig_optimizer, optimizer_config) - ] - if optimizer_iterations is not None: - for opt in optimizer: - opt.iterations = optimizer_iterations - - if len(optimizer) == 1: - optimizer = optimizer[0] - - compile_args['optimizer'] = optimizer - if target_tensors is not None: - compile_args['target_tensors'] = target_tensors - # Ensure Metric objects in new model are separate from existing model. - compile_args['metrics'] = metrics_module.clone_metrics( - compile_args['metrics']) - compile_args['weighted_metrics'] = metrics_module.clone_metrics( - compile_args['weighted_metrics']) - clone.compile(**compile_args) - - return clone + try: + # Prefer cloning the model if serial/deserial logic is + # implemented for subclassed model. + clone = model.__class__.from_config(model.get_config()) + except NotImplementedError: + logging.warning( + "This model is a subclassed model. Please implement " + "`get_config` and `from_config` to better support " + "cloning the model." + ) + if not in_place_reset: + raise ValueError( + f"This model ({model}) is a subclassed model. " + "Such a model cannot be cloned, but there is a " + "workaround where the model is reset in-place. " + "To use this, please set the " + "argument `in_place_reset` to `True`. This will reset " + "the attributes in the original model. " + "To restore the attributes, call " + "`in_place_subclassed_model_state_restoration(model)`." + ) + clone = model + _in_place_subclassed_model_reset(clone) + if input_tensors is not None: + if ( + isinstance(input_tensors, (list, tuple)) + and len(input_tensors) == 1 + ): + input_tensors = input_tensors[0] + clone._set_inputs(input_tensors) + + if compile_clone: + if isinstance(orig_optimizer, optimizer_v1.TFOptimizer): + optimizer = optimizer_v1.TFOptimizer( + orig_optimizer.optimizer, optimizer_iterations + ) + backend.track_tf_optimizer(optimizer) + else: + if not isinstance(orig_optimizer, (tuple, list)): + orig_optimizer = [orig_optimizer] + if optimizer_config is None: + optimizer = [ + opt.__class__.from_config(opt.get_config()) + for opt in orig_optimizer + ] + elif isinstance(optimizer_config, dict): + optimizer = [ + orig_optimizer[0].__class__.from_config(optimizer_config) + ] + else: + # optimizer config is list of dict, same order as + # orig_optimizer. + optimizer = [ + opt.__class__.from_config(opt_config) + for (opt, opt_config) in zip( + orig_optimizer, optimizer_config + ) + ] + if optimizer_iterations is not None: + for opt in optimizer: + opt.iterations = optimizer_iterations + + if len(optimizer) == 1: + optimizer = optimizer[0] + + compile_args["optimizer"] = optimizer + if target_tensors is not None: + compile_args["target_tensors"] = target_tensors + # Ensure Metric objects in new model are separate from existing model. + compile_args["metrics"] = metrics_module.clone_metrics( + compile_args["metrics"] + ) + compile_args["weighted_metrics"] = metrics_module.clone_metrics( + compile_args["weighted_metrics"] + ) + clone.compile(**compile_args) + + return clone diff --git a/keras/models/cloning_test.py b/keras/models/cloning_test.py index f95423d57be0..ed79dcaa521d 100644 --- a/keras/models/cloning_test.py +++ b/keras/models/cloning_test.py @@ -14,559 +14,652 @@ # ============================================================================== """Tests for `models.py` (model cloning, mainly).""" -import tensorflow.compat.v2 as tf - import functools import os -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras from keras import backend -from keras.testing_infra import test_combinations from keras import metrics from keras import models from keras.optimizers import optimizer_v1 +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils class TestModel(keras.Model): - """A model subclass.""" + """A model subclass.""" - def __init__(self, n_outputs=4, trainable=True): - """A test class with one dense layer and number of outputs as a variable.""" - super().__init__() - self.layer1 = keras.layers.Dense(n_outputs) - self.n_outputs = tf.Variable(n_outputs, trainable=trainable) + def __init__(self, n_outputs=4, trainable=True): + """A test class with one dense layer and number of outputs as a + variable.""" + super().__init__() + self.layer1 = keras.layers.Dense(n_outputs) + self.n_outputs = tf.Variable(n_outputs, trainable=trainable) - def call(self, x): - return self.layer1(x) + def call(self, x): + return self.layer1(x) def _get_layers(input_shape=(4,), add_input_layer=False): - if add_input_layer: - model_layers = [keras.layers.InputLayer(input_shape=input_shape), - keras.layers.Dense(4)] - elif input_shape: - model_layers = [keras.layers.Dense(4, input_shape=input_shape)] - else: - model_layers = [keras.layers.Dense(4)] + if add_input_layer: + model_layers = [ + keras.layers.InputLayer(input_shape=input_shape), + keras.layers.Dense(4), + ] + elif input_shape: + model_layers = [keras.layers.Dense(4, input_shape=input_shape)] + else: + model_layers = [keras.layers.Dense(4)] - model_layers += [ - keras.layers.BatchNormalization(), - keras.layers.Dropout(0.5), - keras.layers.Dense(4)] + model_layers += [ + keras.layers.BatchNormalization(), + keras.layers.Dropout(0.5), + keras.layers.Dense(4), + ] - return model_layers + return model_layers def _get_model(input_shape=(4,)): - model_layers = _get_layers(input_shape=None, add_input_layer=False) - return test_utils.get_model_from_layers( - model_layers, input_shape=input_shape) + model_layers = _get_layers(input_shape=None, add_input_layer=False) + return test_utils.get_model_from_layers( + model_layers, input_shape=input_shape + ) class TestModelCloning(test_combinations.TestCase): + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + { + "testcase_name": "has_input_layer", + "input_shape": (4,), + "add_input_layer": True, + "share_weights": False, + }, + { + "testcase_name": "no_input_layer", + "input_shape": None, + "add_input_layer": False, + "share_weights": False, + }, + { + "testcase_name": "has_input_layer_share_weights", + "input_shape": (4,), + "add_input_layer": True, + "share_weights": True, + }, + { + "testcase_name": "no_input_layer_share_weights", + "input_shape": None, + "add_input_layer": False, + "share_weights": True, + }, + ] + ) + def test_clone_sequential_model( + self, input_shape, add_input_layer, share_weights + ): - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - {'testcase_name': 'has_input_layer', - 'input_shape': (4,), - 'add_input_layer': True, - 'share_weights': False}, - {'testcase_name': 'no_input_layer', - 'input_shape': None, - 'add_input_layer': False, - 'share_weights': False}, - {'testcase_name': 'has_input_layer_share_weights', - 'input_shape': (4,), - 'add_input_layer': True, - 'share_weights': True}, - {'testcase_name': 'no_input_layer_share_weights', - 'input_shape': None, - 'add_input_layer': False, - 'share_weights': True}, - ]) - def test_clone_sequential_model( - self, input_shape, add_input_layer, share_weights): - - if share_weights: - clone_fn = functools.partial( - keras.models._clone_sequential_model, layer_fn=models.share_weights) - else: - clone_fn = keras.models.clone_model - - val_a = np.random.random((10, 4)) - model = models.Sequential(_get_layers(input_shape, add_input_layer)) - # Sanity check - self.assertEqual( - isinstance( - list(model._flatten_layers(include_self=False, recursive=False))[0], - keras.layers.InputLayer), add_input_layer) - self.assertEqual(model._is_graph_network, add_input_layer) - - # With placeholder creation -- clone model should have an InputLayer - # if the original model has one. - new_model = clone_fn(model) - self.assertEqual( - isinstance( + if share_weights: + clone_fn = functools.partial( + keras.models._clone_sequential_model, + layer_fn=models.share_weights, + ) + else: + clone_fn = keras.models.clone_model + + val_a = np.random.random((10, 4)) + model = models.Sequential(_get_layers(input_shape, add_input_layer)) + # Sanity check + self.assertEqual( + isinstance( + list( + model._flatten_layers(include_self=False, recursive=False) + )[0], + keras.layers.InputLayer, + ), + add_input_layer, + ) + self.assertEqual(model._is_graph_network, add_input_layer) + + # With placeholder creation -- clone model should have an InputLayer + # if the original model has one. + new_model = clone_fn(model) + self.assertEqual( + isinstance( + list( + new_model._flatten_layers( + include_self=False, recursive=False + ) + )[0], + keras.layers.InputLayer, + ), + add_input_layer, + ) + self.assertEqual(new_model._is_graph_network, model._is_graph_network) + if ( + input_shape + and not tf.compat.v1.executing_eagerly_outside_functions() + ): + # update ops from batch norm needs to be included + self.assertGreaterEqual(len(new_model.updates), 2) + + # On top of new tensor -- clone model should always have an InputLayer. + input_a = keras.Input(shape=(4,), name="a") + new_model = clone_fn(model, input_tensors=input_a) + self.assertIsInstance( list( - new_model._flatten_layers(include_self=False, - recursive=False))[0], - keras.layers.InputLayer), add_input_layer) - self.assertEqual(new_model._is_graph_network, model._is_graph_network) - if input_shape and not tf.compat.v1.executing_eagerly_outside_functions(): - # update ops from batch norm needs to be included - self.assertGreaterEqual(len(new_model.updates), 2) - - # On top of new tensor -- clone model should always have an InputLayer. - input_a = keras.Input(shape=(4,), name='a') - new_model = clone_fn(model, input_tensors=input_a) - self.assertIsInstance( - list(new_model._flatten_layers(include_self=False, recursive=False))[0], - keras.layers.InputLayer) - # The new models inputs should have the properties of the new input tensor - if tf.__internal__.tf2.enabled(): - # In TF1, the new model will be a:0 - self.assertEqual(new_model.input_names[0], input_a.name) - self.assertEqual(new_model.inputs[0].shape, input_a.shape) - self.assertTrue(new_model._is_graph_network) - - # On top of new, non-Keras tensor -- clone model should always have an - # InputLayer. - if not tf.executing_eagerly(): - # TODO(b/121277734):Skip Eager contexts, as Input() layers raise an error - # saying they should not be used with EagerTensors - input_a = keras.backend.variable(val_a) - new_model = clone_fn(model, input_tensors=input_a) - self.assertIsInstance( - list(new_model._flatten_layers(include_self=False, - recursive=False))[0], - keras.layers.InputLayer) - self.assertTrue(new_model._is_graph_network) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - {'testcase_name': 'clone_weights', 'share_weights': False}, - {'testcase_name': 'share_weights', 'share_weights': True}, - ]) - def test_clone_functional_model(self, share_weights): - if share_weights: - clone_fn = functools.partial( - keras.models._clone_functional_model, layer_fn=models.share_weights) - else: - clone_fn = keras.models.clone_model - - val_a = np.random.random((10, 4)) - val_b = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - input_a = keras.Input(shape=(4,)) - input_b = keras.Input(shape=(4,)) - dense_1 = keras.layers.Dense(4,) - dense_2 = keras.layers.Dense(4,) - - x_a = dense_1(input_a) - x_a = keras.layers.Dropout(0.5)(x_a) - x_a = keras.layers.BatchNormalization()(x_a) - x_b = dense_1(input_b) - x_a = dense_2(x_a) - outputs = keras.layers.add([x_a, x_b]) - model = keras.models.Model([input_a, input_b], outputs) - - # With placeholder creation - new_model = clone_fn(model) - if not tf.compat.v1.executing_eagerly_outside_functions(): - self.assertGreaterEqual(len(new_model.updates), 2) - new_model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - new_model.train_on_batch([val_a, val_b], val_out) - - # On top of new tensors - input_a = keras.Input(shape=(4,), name='a') - input_b = keras.Input(shape=(4,), name='b') - new_input_tensors = [input_a, input_b] - new_model = keras.models.clone_model(model, input_tensors=new_input_tensors) - if not tf.compat.v1.executing_eagerly_outside_functions(): - self.assertLen(new_model.updates, 2) - new_model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - new_model.train_on_batch([val_a, val_b], val_out) - - # New model should use provided input tensors - self.assertListEqual(new_model.inputs, new_input_tensors) - - # On top of new, non-Keras tensors - if not tf.executing_eagerly(): - # TODO(b/121277734):Skip Eager contexts, as Input() layers raise an error - # saying they should not be used with EagerTensors - input_a = keras.backend.variable(val_a) - input_b = keras.backend.variable(val_b) - new_model = clone_fn(model, input_tensors=[input_a, input_b]) - self.assertGreaterEqual(len(new_model.updates), 2) - new_model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - new_model.train_on_batch(None, val_out) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - {'testcase_name': 'clone_weights', 'share_weights': False}, - {'testcase_name': 'share_weights', 'share_weights': True}, - ]) - def test_clone_functional_with_masking(self, share_weights): - if share_weights: - clone_fn = functools.partial( - keras.models._clone_functional_model, layer_fn=models.share_weights) - else: - clone_fn = keras.models.clone_model - - x = np.array([[[1.], [1.]], [[0.], [0.]]]) - inputs = keras.Input((2, 1)) - outputs = keras.layers.Masking(mask_value=0)(inputs) - outputs = keras.layers.TimeDistributed( - keras.layers.Dense(1, kernel_initializer='one'))(outputs) - model = keras.Model(inputs, outputs) - - model = clone_fn(model) - model.compile( - loss='mse', - optimizer=test_utils.get_v2_optimizer('adam'), - run_eagerly=test_utils.should_run_eagerly()) - y = np.array([[[1], [1]], [[1], [1]]]) - loss = model.train_on_batch(x, y) - self.assertEqual(float(loss), 0.) - - def test_clone_rnn(self): - # Test cloning a model with multiple cells in an RNN. This exercises a - # few "fancier" features such as the `Bidrectional` wrapper and - # `StackedRNNCells` under the hood. - inputs = keras.Input(shape=(3, 3)) - cells = [ - keras.layers.LSTMCell( - units=32, - enable_caching_device=True, - implementation=2, - activation='relu')] - rnn = keras.layers.RNN(cells, return_sequences=True) - outputs = keras.layers.Bidirectional(rnn)(inputs) - outputs = keras.layers.Dense( - 12, activation='softmax', name='scores')(outputs) - model = keras.Model(inputs=inputs, outputs=outputs) - model.compile( - loss=keras.losses.CategoricalCrossentropy(), - optimizer=keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.01), - metrics=['accuracy']) - keras.models.clone_model(model) - - def test_model_cloning_invalid_use_cases(self): - seq_model = keras.models.Sequential() - seq_model.add(keras.layers.Dense(4, input_shape=(4,))) - - x = keras.Input((4,)) - y = keras.layers.Dense(4)(x) - fn_model = keras.models.Model(x, y) - - with self.assertRaises(ValueError): - keras.models._clone_functional_model(seq_model) - with self.assertRaises(ValueError): - keras.models._clone_functional_model(None) - with self.assertRaises(ValueError): - keras.models._clone_sequential_model(fn_model) - - with self.assertRaises(ValueError): - keras.models._clone_sequential_model(seq_model, input_tensors=[x, x]) - with self.assertRaises(ValueError): - keras.models._clone_sequential_model(seq_model, input_tensors=y) - - def test_functional_cloning_does_not_create_unnecessary_placeholders(self): - with tf.Graph().as_default(): - x = keras.Input((4,)) - y = keras.layers.Dense(4)(x) - model = keras.models.Model(x, y) - graph = tf.Graph() - with graph.as_default(): - x = tf.ones((10, 4)) - _ = keras.models.clone_model(model, input_tensors=[x]) - has_placeholder = _has_placeholder(graph) - self.assertFalse(has_placeholder) - - def test_sequential_cloning_does_not_create_unnecessary_placeholders(self): - with tf.Graph().as_default(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(4, input_shape=(4,))) - graph = tf.Graph() - with graph.as_default(): - x = tf.ones((10, 4)) - _ = keras.models.clone_model(model, input_tensors=[x]) - has_placeholder = _has_placeholder(graph) - self.assertFalse(has_placeholder) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - {'testcase_name': 'clone_weights', 'share_weights': False}, - {'testcase_name': 'share_weights', 'share_weights': True}, - ]) - def test_functional_cloning_with_tensor_kwarg(self, share_weights): - """Test that cloning works with models that use Tensor kwargs.""" - - if share_weights: - clone_fn = functools.partial( - keras.models.clone_model, clone_function=models.share_weights) - else: - clone_fn = keras.models.clone_model - - class LayerWithTensorKwarg(keras.layers.Layer): - - def call(self, inputs, tensor=None): - if tensor is not None: - return inputs * tf.cast(tensor, tf.float32) + new_model._flatten_layers(include_self=False, recursive=False) + )[0], + keras.layers.InputLayer, + ) + # The new models inputs should have the properties of the new input + # tensor + if tf.__internal__.tf2.enabled(): + # In TF1, the new model will be a:0 + self.assertEqual(new_model.input_names[0], input_a.name) + self.assertEqual(new_model.inputs[0].shape, input_a.shape) + self.assertTrue(new_model._is_graph_network) + + # On top of new, non-Keras tensor -- clone model should always have an + # InputLayer. + if not tf.executing_eagerly(): + # TODO(b/121277734):Skip Eager contexts, as Input() layers raise an + # error saying they should not be used with EagerTensors + input_a = keras.backend.variable(val_a) + new_model = clone_fn(model, input_tensors=input_a) + self.assertIsInstance( + list( + new_model._flatten_layers( + include_self=False, recursive=False + ) + )[0], + keras.layers.InputLayer, + ) + self.assertTrue(new_model._is_graph_network) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + {"testcase_name": "clone_weights", "share_weights": False}, + {"testcase_name": "share_weights", "share_weights": True}, + ] + ) + def test_clone_functional_model(self, share_weights): + if share_weights: + clone_fn = functools.partial( + keras.models._clone_functional_model, + layer_fn=models.share_weights, + ) else: - return inputs + clone_fn = keras.models.clone_model + + val_a = np.random.random((10, 4)) + val_b = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + input_a = keras.Input(shape=(4,)) + input_b = keras.Input(shape=(4,)) + dense_1 = keras.layers.Dense( + 4, + ) + dense_2 = keras.layers.Dense( + 4, + ) + + x_a = dense_1(input_a) + x_a = keras.layers.Dropout(0.5)(x_a) + x_a = keras.layers.BatchNormalization()(x_a) + x_b = dense_1(input_b) + x_a = dense_2(x_a) + outputs = keras.layers.add([x_a, x_b]) + model = keras.models.Model([input_a, input_b], outputs) + + # With placeholder creation + new_model = clone_fn(model) + if not tf.compat.v1.executing_eagerly_outside_functions(): + self.assertGreaterEqual(len(new_model.updates), 2) + new_model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + new_model.train_on_batch([val_a, val_b], val_out) + + # On top of new tensors + input_a = keras.Input(shape=(4,), name="a") + input_b = keras.Input(shape=(4,), name="b") + new_input_tensors = [input_a, input_b] + new_model = keras.models.clone_model( + model, input_tensors=new_input_tensors + ) + if not tf.compat.v1.executing_eagerly_outside_functions(): + self.assertLen(new_model.updates, 2) + new_model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + new_model.train_on_batch([val_a, val_b], val_out) + + # New model should use provided input tensors + self.assertListEqual(new_model.inputs, new_input_tensors) + + # On top of new, non-Keras tensors + if not tf.executing_eagerly(): + # TODO(b/121277734):Skip Eager contexts, as Input() layers raise an + # error saying they should not be used with EagerTensors + input_a = keras.backend.variable(val_a) + input_b = keras.backend.variable(val_b) + new_model = clone_fn(model, input_tensors=[input_a, input_b]) + self.assertGreaterEqual(len(new_model.updates), 2) + new_model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + new_model.train_on_batch(None, val_out) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + {"testcase_name": "clone_weights", "share_weights": False}, + {"testcase_name": "share_weights", "share_weights": True}, + ] + ) + def test_clone_functional_with_masking(self, share_weights): + if share_weights: + clone_fn = functools.partial( + keras.models._clone_functional_model, + layer_fn=models.share_weights, + ) + else: + clone_fn = keras.models.clone_model - inputs = keras.layers.Input(shape=(3)) - t = tf.sequence_mask(tf.shape(inputs)[1]) - model = keras.models.Model(inputs, LayerWithTensorKwarg()(inputs, t)) - model.add_loss(tf.reduce_sum(model.outputs)) + x = np.array([[[1.0], [1.0]], [[0.0], [0.0]]]) + inputs = keras.Input((2, 1)) + outputs = keras.layers.Masking(mask_value=0)(inputs) + outputs = keras.layers.TimeDistributed( + keras.layers.Dense(1, kernel_initializer="one") + )(outputs) + model = keras.Model(inputs, outputs) - input_arr = np.random.random((1, 3)).astype(np.float32) - clone = clone_fn(model) + model = clone_fn(model) + model.compile( + loss="mse", + optimizer=test_utils.get_v2_optimizer("adam"), + run_eagerly=test_utils.should_run_eagerly(), + ) + y = np.array([[[1], [1]], [[1], [1]]]) + loss = model.train_on_batch(x, y) + self.assertEqual(float(loss), 0.0) + + def test_clone_rnn(self): + # Test cloning a model with multiple cells in an RNN. This exercises a + # few "fancier" features such as the `Bidrectional` wrapper and + # `StackedRNNCells` under the hood. + inputs = keras.Input(shape=(3, 3)) + cells = [ + keras.layers.LSTMCell( + units=32, + enable_caching_device=True, + implementation=2, + activation="relu", + ) + ] + rnn = keras.layers.RNN(cells, return_sequences=True) + outputs = keras.layers.Bidirectional(rnn)(inputs) + outputs = keras.layers.Dense(12, activation="softmax", name="scores")( + outputs + ) + model = keras.Model(inputs=inputs, outputs=outputs) + model.compile( + loss=keras.losses.CategoricalCrossentropy(), + optimizer=keras.optimizers.legacy.rmsprop.RMSprop(lr=0.01), + metrics=["accuracy"], + ) + keras.models.clone_model(model) + + def test_model_cloning_invalid_use_cases(self): + seq_model = keras.models.Sequential() + seq_model.add(keras.layers.Dense(4, input_shape=(4,))) + + x = keras.Input((4,)) + y = keras.layers.Dense(4)(x) + fn_model = keras.models.Model(x, y) + + with self.assertRaises(ValueError): + keras.models._clone_functional_model(seq_model) + with self.assertRaises(ValueError): + keras.models._clone_functional_model(None) + with self.assertRaises(ValueError): + keras.models._clone_sequential_model(fn_model) + + with self.assertRaises(ValueError): + keras.models._clone_sequential_model( + seq_model, input_tensors=[x, x] + ) + with self.assertRaises(ValueError): + keras.models._clone_sequential_model(seq_model, input_tensors=y) + + def test_functional_cloning_does_not_create_unnecessary_placeholders(self): + with tf.Graph().as_default(): + x = keras.Input((4,)) + y = keras.layers.Dense(4)(x) + model = keras.models.Model(x, y) + graph = tf.Graph() + with graph.as_default(): + x = tf.ones((10, 4)) + _ = keras.models.clone_model(model, input_tensors=[x]) + has_placeholder = _has_placeholder(graph) + self.assertFalse(has_placeholder) + + def test_sequential_cloning_does_not_create_unnecessary_placeholders(self): + with tf.Graph().as_default(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(4, input_shape=(4,))) + graph = tf.Graph() + with graph.as_default(): + x = tf.ones((10, 4)) + _ = keras.models.clone_model(model, input_tensors=[x]) + has_placeholder = _has_placeholder(graph) + self.assertFalse(has_placeholder) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + {"testcase_name": "clone_weights", "share_weights": False}, + {"testcase_name": "share_weights", "share_weights": True}, + ] + ) + def test_functional_cloning_with_tensor_kwarg(self, share_weights): + """Test that cloning works with models that use Tensor kwargs.""" - if tf.executing_eagerly(): - clone(input_arr) - loss = clone.losses[0] - else: - with self.session() as sess: - clone(input_arr) if share_weights: - self.skipTest('Weight sharing with inputs in call **kwargs does ' - 'not work correctly in v1') + clone_fn = functools.partial( + keras.models.clone_model, clone_function=models.share_weights + ) + else: + clone_fn = keras.models.clone_model + + class LayerWithTensorKwarg(keras.layers.Layer): + def call(self, inputs, tensor=None): + if tensor is not None: + return inputs * tf.cast(tensor, tf.float32) + else: + return inputs + + inputs = keras.layers.Input(shape=(3)) + t = tf.sequence_mask(tf.shape(inputs)[1]) + model = keras.models.Model(inputs, LayerWithTensorKwarg()(inputs, t)) + model.add_loss(tf.reduce_sum(model.outputs)) + + input_arr = np.random.random((1, 3)).astype(np.float32) + clone = clone_fn(model) + + if tf.executing_eagerly(): + clone(input_arr) + loss = clone.losses[0] else: - feed_dict = {clone.input: input_arr} - loss = sess.run(clone.losses[0], feed_dict=feed_dict) - self.assertAllClose(np.sum(input_arr), loss) + with self.session() as sess: + clone(input_arr) + if share_weights: + self.skipTest( + "Weight sharing with inputs in call **kwargs does " + "not work correctly in v1" + ) + else: + feed_dict = {clone.input: input_arr} + loss = sess.run(clone.losses[0], feed_dict=feed_dict) + self.assertAllClose(np.sum(input_arr), loss) def _has_placeholder(graph): - ops_types = [op.type for op in graph.get_operations()] - return any('Placeholder' in s for s in ops_types) + ops_types = [op.type for op in graph.get_operations()] + return any("Placeholder" in s for s in ops_types) class CheckpointingTests(test_combinations.TestCase): + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_optimizer_dependency(self): + model = _get_model() + opt = tf.compat.v1.train.AdamOptimizer(0.01) + model.compile( + optimizer=opt, + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_optimizer_dependency(self): - model = _get_model() - opt = tf.compat.v1.train.AdamOptimizer(.01) - model.compile( - optimizer=opt, - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - - model.fit( - x=np.array([[1., 2., 3., 4.]]), - y=np.array([[1., 1., 1., 1.]]), - epochs=2) - save_prefix = os.path.join(self.get_temp_dir(), 'ckpt') - beta1_power, _ = opt._get_beta_accumulators() - self.evaluate(beta1_power.assign(12.)) - model.save_weights(save_prefix) - self.evaluate(beta1_power.assign(13.)) - model.load_weights(save_prefix) - self.assertEqual(12., self.evaluate(beta1_power)) + model.fit( + x=np.array([[1.0, 2.0, 3.0, 4.0]]), + y=np.array([[1.0, 1.0, 1.0, 1.0]]), + epochs=2, + ) + save_prefix = os.path.join(self.get_temp_dir(), "ckpt") + beta1_power, _ = opt._get_beta_accumulators() + self.evaluate(beta1_power.assign(12.0)) + model.save_weights(save_prefix) + self.evaluate(beta1_power.assign(13.0)) + model.load_weights(save_prefix) + self.assertEqual(12.0, self.evaluate(beta1_power)) @test_combinations.run_all_keras_modes class TestModelBackend(test_combinations.TestCase): + def test_model_backend_float64_use_cases(self): + # Test case for GitHub issue 19318 + floatx = keras.backend.floatx() + keras.backend.set_floatx("float64") + + x = keras.Input((5,)) + y = keras.layers.Dense(1)(x) + model = keras.models.Model(x, y) + model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) - def test_model_backend_float64_use_cases(self): - # Test case for GitHub issue 19318 - floatx = keras.backend.floatx() - keras.backend.set_floatx('float64') - - x = keras.Input((5,)) - y = keras.layers.Dense(1)(x) - model = keras.models.Model(x, y) - model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - keras.backend.set_floatx(floatx) + keras.backend.set_floatx(floatx) class TestCloneAndBuildModel(test_combinations.TestCase): + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_clone_and_build_non_compiled_model(self): + inp = np.random.random((10, 4)) + out = np.random.random((10, 4)) + + model = _get_model() + + with self.assertRaisesRegex(ValueError, "has not been compiled"): + models.clone_and_build_model(model, compile_clone=True) + + is_subclassed = test_utils.get_model_type() == "subclass" + # With placeholder creation + new_model = models.clone_and_build_model( + model, compile_clone=False, in_place_reset=is_subclassed + ) + with self.assertRaisesRegex(RuntimeError, "must compile"): + new_model.evaluate(inp, out) + with self.assertRaisesRegex(RuntimeError, "must compile"): + new_model.train_on_batch(inp, out) + new_model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + new_model.train_on_batch(inp, out) + + # Create new tensors for inputs. + input_a = keras.Input(shape=(4,)) + new_model = models.clone_and_build_model( + model, + input_tensors=input_a, + compile_clone=False, + in_place_reset=is_subclassed, + ) + with self.assertRaisesRegex(RuntimeError, "must compile"): + new_model.evaluate(inp, out) + with self.assertRaisesRegex(RuntimeError, "must compile"): + new_model.train_on_batch(inp, out) + new_model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + new_model.train_on_batch(inp, out) + + def _assert_same_compile_params(self, model): + """Assert that two models have the same compile parameters.""" + + self.assertEqual("mse", model.loss) + self.assertIsInstance( + model.optimizer, + ( + optimizer_v1.RMSprop, + keras.optimizers.legacy.rmsprop.RMSprop, + ), + ) + + def _clone_and_build_test_helper(self, model, model_type): + inp = np.random.random((10, 4)) + out = np.random.random((10, 4)) + + is_subclassed = model_type == "subclass" + + # With placeholder creation + new_model = models.clone_and_build_model( + model, compile_clone=True, in_place_reset=is_subclassed + ) + + self._assert_same_compile_params(new_model) + new_model.train_on_batch(inp, out) + new_model.evaluate(inp, out) + + # Create new tensors for inputs. + input_a = keras.Input(shape=(4,), name="a") + new_model = models.clone_and_build_model( + model, + input_tensors=input_a, + compile_clone=True, + in_place_reset=is_subclassed, + ) + self._assert_same_compile_params(new_model) + new_model.train_on_batch(inp, out) + new_model.evaluate(inp, out) + + new_model = models.clone_and_build_model( + model, + input_tensors=input_a, + target_tensors=None, + compile_clone=True, + in_place_reset=is_subclassed, + ) + self._assert_same_compile_params(new_model) + new_model.train_on_batch(inp, out) + new_model.evaluate(inp, out) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_clone_and_build_compiled(self): + model = _get_model() + model.compile( + test_utils.get_v2_optimizer("rmsprop"), + "mse", + metrics=["acc", metrics.categorical_accuracy], + run_eagerly=test_utils.should_run_eagerly(), + ) + + self._clone_and_build_test_helper(model, test_utils.get_model_type()) - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_clone_and_build_non_compiled_model(self): - inp = np.random.random((10, 4)) - out = np.random.random((10, 4)) - - model = _get_model() - - with self.assertRaisesRegex(ValueError, 'has not been compiled'): - models.clone_and_build_model(model, compile_clone=True) - - is_subclassed = (test_utils.get_model_type() == 'subclass') - # With placeholder creation - new_model = models.clone_and_build_model( - model, compile_clone=False, in_place_reset=is_subclassed) - with self.assertRaisesRegex(RuntimeError, 'must compile'): - new_model.evaluate(inp, out) - with self.assertRaisesRegex(RuntimeError, 'must compile'): - new_model.train_on_batch(inp, out) - new_model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - new_model.train_on_batch(inp, out) - - # Create new tensors for inputs. - input_a = keras.Input(shape=(4,)) - new_model = models.clone_and_build_model( - model, - input_tensors=input_a, - compile_clone=False, - in_place_reset=is_subclassed) - with self.assertRaisesRegex(RuntimeError, 'must compile'): - new_model.evaluate(inp, out) - with self.assertRaisesRegex(RuntimeError, 'must compile'): - new_model.train_on_batch(inp, out) - new_model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - new_model.train_on_batch(inp, out) - - def _assert_same_compile_params(self, model): - """Assert that two models have the same compile parameters.""" - - self.assertEqual('mse', model.loss) - self.assertIsInstance( - model.optimizer, - (optimizer_v1.RMSprop, keras.optimizers.optimizer_v2.rmsprop.RMSprop)) - - def _clone_and_build_test_helper(self, model, model_type): - inp = np.random.random((10, 4)) - out = np.random.random((10, 4)) - - is_subclassed = (model_type == 'subclass') - - # With placeholder creation - new_model = models.clone_and_build_model( - model, compile_clone=True, in_place_reset=is_subclassed) - - self._assert_same_compile_params(new_model) - new_model.train_on_batch(inp, out) - new_model.evaluate(inp, out) - - # Create new tensors for inputs. - input_a = keras.Input(shape=(4,), name='a') - new_model = models.clone_and_build_model( - model, input_tensors=input_a, compile_clone=True, - in_place_reset=is_subclassed) - self._assert_same_compile_params(new_model) - new_model.train_on_batch(inp, out) - new_model.evaluate(inp, out) - - new_model = models.clone_and_build_model( - model, - input_tensors=input_a, - target_tensors=None, - compile_clone=True, - in_place_reset=is_subclassed) - self._assert_same_compile_params(new_model) - new_model.train_on_batch(inp, out) - new_model.evaluate(inp, out) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_clone_and_build_compiled(self): - model = _get_model() - model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - metrics=['acc', metrics.categorical_accuracy], - run_eagerly=test_utils.should_run_eagerly()) - - self._clone_and_build_test_helper(model, test_utils.get_model_type()) - - @test_combinations.run_all_keras_modes - def test_clone_and_build_sequential_without_inputs_defined(self): - model = models.Sequential(_get_layers(input_shape=None)) - model.compile( - test_utils.get_v2_optimizer('rmsprop'), - 'mse', - metrics=['acc', metrics.categorical_accuracy], - run_eagerly=test_utils.should_run_eagerly()) - self._clone_and_build_test_helper(model, 'sequential') - - inp = np.random.random((10, 4)) - out = np.random.random((10, 4)) - model.train_on_batch(inp, out) - self._clone_and_build_test_helper(model, 'sequential') - - def assert_optimizer_iterations_increases(self, optimizer): - model = _get_model() - model.compile( - optimizer, - 'mse', - metrics=['acc', metrics.categorical_accuracy], - run_eagerly=test_utils.should_run_eagerly()) - - global_step = keras.backend.variable(123, dtype=tf.int64) - clone_model = models.clone_and_build_model( - model, compile_clone=True, optimizer_iterations=global_step, - in_place_reset=(test_utils.get_model_type() == 'subclass')) - - inp = np.random.random((10, 4)) - out = np.random.random((10, 4)) - clone_model.train_on_batch(inp, out) - - self.assertEqual(backend.eval(global_step), 124) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_replace_tf_optimizer_iterations_variable(self): - if tf.executing_eagerly(): - self.skipTest('v1 optimizers not supported with eager.') - self.assert_optimizer_iterations_increases(tf.compat.v1.train.AdamOptimizer(0.01)) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_replace_keras_optimizer_iterations_variable(self): - self.assert_optimizer_iterations_increases('adam') - - def test_clone_optimizer_in_different_graph(self): - with tf.Graph().as_default(): - with self.session(): - model = test_utils.get_small_sequential_mlp(3, 4) - optimizer = keras.optimizers.optimizer_v2.adam.Adam() + @test_combinations.run_all_keras_modes + def test_clone_and_build_sequential_without_inputs_defined(self): + model = models.Sequential(_get_layers(input_shape=None)) model.compile( - optimizer, 'mse', metrics=['acc', metrics.categorical_accuracy], - ) - model.fit( - x=np.array([[1., 2., 3., 4.]]), - y=np.array([[1., 1., 1., 1.]]), - epochs=1) - optimizer_config = optimizer.get_config() - with tf.Graph().as_default(): - with self.session(): - with self.assertRaisesRegex(ValueError, 'Cannot use the given session'): - models.clone_and_build_model(model, compile_clone=True) - # The optimizer_config object allows the model to be cloned in a - # different graph. - models.clone_and_build_model(model, compile_clone=True, - optimizer_config=optimizer_config) - - -if __name__ == '__main__': - tf.test.main() + test_utils.get_v2_optimizer("rmsprop"), + "mse", + metrics=["acc", metrics.categorical_accuracy], + run_eagerly=test_utils.should_run_eagerly(), + ) + self._clone_and_build_test_helper(model, "sequential") + + inp = np.random.random((10, 4)) + out = np.random.random((10, 4)) + model.train_on_batch(inp, out) + self._clone_and_build_test_helper(model, "sequential") + + def assert_optimizer_iterations_increases(self, optimizer): + model = _get_model() + model.compile( + optimizer, + "mse", + metrics=["acc", metrics.categorical_accuracy], + run_eagerly=test_utils.should_run_eagerly(), + ) + + global_step = keras.backend.variable(123, dtype=tf.int64) + clone_model = models.clone_and_build_model( + model, + compile_clone=True, + optimizer_iterations=global_step, + in_place_reset=(test_utils.get_model_type() == "subclass"), + ) + + inp = np.random.random((10, 4)) + out = np.random.random((10, 4)) + clone_model.train_on_batch(inp, out) + + self.assertEqual(backend.eval(global_step), 124) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_replace_tf_optimizer_iterations_variable(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizers not supported with eager.") + self.assert_optimizer_iterations_increases( + tf.compat.v1.train.AdamOptimizer(0.01) + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_replace_keras_optimizer_iterations_variable(self): + self.assert_optimizer_iterations_increases("adam") + + def test_clone_optimizer_in_different_graph(self): + with tf.Graph().as_default(): + with self.session(): + model = test_utils.get_small_sequential_mlp(3, 4) + optimizer = keras.optimizers.legacy.adam.Adam() + model.compile( + optimizer, + "mse", + metrics=["acc", metrics.categorical_accuracy], + ) + model.fit( + x=np.array([[1.0, 2.0, 3.0, 4.0]]), + y=np.array([[1.0, 1.0, 1.0, 1.0]]), + epochs=1, + ) + optimizer_config = optimizer.get_config() + with tf.Graph().as_default(): + with self.session(): + with self.assertRaisesRegex( + ValueError, "Cannot use the given session" + ): + models.clone_and_build_model(model, compile_clone=True) + # The optimizer_config object allows the model to be cloned in a + # different graph. + models.clone_and_build_model( + model, compile_clone=True, optimizer_config=optimizer_config + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/models/sharpness_aware_minimization.py b/keras/models/sharpness_aware_minimization.py index 4e4e5233c384..543b767966ef 100644 --- a/keras/models/sharpness_aware_minimization.py +++ b/keras/models/sharpness_aware_minimization.py @@ -16,156 +16,176 @@ import copy +import tensorflow.compat.v2 as tf + from keras.engine import data_adapter from keras.layers import deserialize as deserialize_layer from keras.models import Model -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf +from keras.saving.object_registration import register_keras_serializable +from keras.saving.serialization_lib import serialize_keras_object +# isort: off from tensorflow.python.util.tf_export import keras_export -# pylint: disable=g-classes-have-attributes - -@generic_utils.register_keras_serializable() +@register_keras_serializable() @keras_export("keras.models.experimental.SharpnessAwareMinimization", v1=[]) class SharpnessAwareMinimization(Model): - """Sharpness aware minimization (SAM) training flow. - - Sharpness-aware minimization (SAM) is a technique that improves the model - generalization and provides robustness to label noise. Mini-batch splitting is - proven to improve the SAM's performance, so users can control how mini batches - are split via setting the `num_batch_splits` argument. - - Args: - model: `tf.keras.Model` instance. The inner model that does the - forward-backward pass. - rho: float, defaults to 0.05. The gradients scaling factor. - num_batch_splits: int, defaults to None. The number of mini batches to - split into from each data batch. If None, batches are not split into - sub-batches. - name: string, defaults to None. The name of the SAM model. - - Reference: - [Pierre Foret et al., 2020](https://arxiv.org/abs/2010.01412) - """ - - def __init__(self, model, rho=0.05, num_batch_splits=None, name=None): - super().__init__(name=name) - self.model = model - self.rho = rho - self.num_batch_splits = num_batch_splits - - def train_step(self, data): - """The logic of one SAM training step. + """Sharpness aware minimization (SAM) training flow. - Args: - data: A nested structure of `Tensor`s. It should be of structure - (x, y, sample_weight) or (x, y). - - Returns: - A dict mapping metric names to running average values. - """ - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) - - if self.num_batch_splits is not None: - x_split = tf.split(x, self.num_batch_splits) - y_split = tf.split(y, self.num_batch_splits) - else: - x_split = [x] - y_split = [y] - - gradients_all_batches = [] - pred_all_batches = [] - for (x_batch, y_batch) in zip(x_split, y_split): - epsilon_w_cache = [] - with tf.GradientTape() as tape: - pred = self.model(x_batch) - loss = self.compiled_loss(y_batch, pred) - pred_all_batches.append(pred) - trainable_variables = self.model.trainable_variables - gradients = tape.gradient(loss, trainable_variables) - - gradients_order2_norm = self._gradients_order2_norm(gradients) - scale = self.rho / (gradients_order2_norm + 1e-12) - - for (gradient, variable) in zip(gradients, trainable_variables): - epsilon_w = gradient * scale - self._distributed_apply_epsilon_w(variable, epsilon_w, - tf.distribute.get_strategy()) - epsilon_w_cache.append(epsilon_w) - - with tf.GradientTape() as tape: - pred = self(x_batch) - loss = self.compiled_loss(y_batch, pred) - gradients = tape.gradient(loss, trainable_variables) - if len(gradients_all_batches) == 0: - for gradient in gradients: - gradients_all_batches.append([gradient]) - else: - for (gradient, gradient_all_batches) in zip(gradients, - gradients_all_batches): - gradient_all_batches.append(gradient) - for (variable, epsilon_w) in zip(trainable_variables, epsilon_w_cache): - # Restore the variable to its original value before `apply_gradients()`. - self._distributed_apply_epsilon_w(variable, -epsilon_w, - tf.distribute.get_strategy()) - - gradients = [] - for gradient_all_batches in gradients_all_batches: - gradients.append(tf.reduce_sum(gradient_all_batches, axis=0)) - self.optimizer.apply_gradients(zip(gradients, trainable_variables)) - - pred = tf.concat(pred_all_batches, axis=0) - self.compiled_metrics.update_state(y, pred, sample_weight) - return {m.name: m.result() for m in self.metrics} - - def call(self, inputs): - """Forward pass of SAM. - - SAM delegates the forward pass call to the wrapped model. + Sharpness-aware minimization (SAM) is a technique that improves the model + generalization and provides robustness to label noise. Mini-batch splitting + is proven to improve the SAM's performance, so users can control how mini + batches are split via setting the `num_batch_splits` argument. Args: - inputs: Tensor. The model inputs. - - Returns: - A Tensor, the outputs of the wrapped model for given `inputs`. + model: `tf.keras.Model` instance. The inner model that does the + forward-backward pass. + rho: float. The gradients scaling factor. Defaults to `0.05`. + num_batch_splits: int. The number of mini batches to + split into from each data batch. If None, batches are not split into + sub-batches. Defaults to `None`. + name: string. The name of the SAM model. Defaults to `None`. + + Reference: + [Pierre Foret et al., 2020](https://arxiv.org/abs/2010.01412) """ - return self.model(inputs) - - def get_config(self): - config = super().get_config() - config.update({ - "model": generic_utils.serialize_keras_object(self.model), - "rho": self.rho, - }) - return config - - @classmethod - def from_config(cls, config, custom_objects=None): - # Avoid mutating the input dict. - config = copy.deepcopy(config) - model = deserialize_layer( - config.pop("model"), custom_objects=custom_objects) - config["model"] = model - return super().from_config(config, custom_objects) - - def _distributed_apply_epsilon_w(self, var, epsilon_w, strategy): - # Helper function to apply epsilon_w on model variables. - if isinstance(tf.distribute.get_strategy(), - (tf.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.CentralStorageStrategy)): - # Under PSS and CSS, the AggregatingVariable has to be kept in sync. - def distribute_apply(strategy, var, epsilon_w): - strategy.extended.update( - var, lambda x, y: x.assign_add(y), args=(epsilon_w,), group=False) - - tf.__internal__.distribute.interim.maybe_merge_call( - distribute_apply, tf.distribute.get_strategy(), var, epsilon_w) - else: - var.assign_add(epsilon_w) - - def _gradients_order2_norm(self, gradients): - norm = tf.norm( - tf.stack([tf.norm(grad) for grad in gradients if grad is not None])) - return norm + + def __init__(self, model, rho=0.05, num_batch_splits=None, name=None): + super().__init__(name=name) + self.model = model + self.rho = rho + self.num_batch_splits = num_batch_splits + + def train_step(self, data): + """The logic of one SAM training step. + + Args: + data: A nested structure of `Tensor`s. It should be of structure + (x, y, sample_weight) or (x, y). + + Returns: + A dict mapping metric names to running average values. + """ + x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + + if self.num_batch_splits is not None: + x_split = tf.split(x, self.num_batch_splits) + y_split = tf.split(y, self.num_batch_splits) + else: + x_split = [x] + y_split = [y] + + gradients_all_batches = [] + pred_all_batches = [] + for x_batch, y_batch in zip(x_split, y_split): + epsilon_w_cache = [] + with tf.GradientTape() as tape: + pred = self.model(x_batch) + loss = self.compiled_loss(y_batch, pred) + pred_all_batches.append(pred) + trainable_variables = self.model.trainable_variables + gradients = tape.gradient(loss, trainable_variables) + + gradients_order2_norm = self._gradients_order2_norm(gradients) + scale = self.rho / (gradients_order2_norm + 1e-12) + + for gradient, variable in zip(gradients, trainable_variables): + epsilon_w = gradient * scale + self._distributed_apply_epsilon_w( + variable, epsilon_w, tf.distribute.get_strategy() + ) + epsilon_w_cache.append(epsilon_w) + + with tf.GradientTape() as tape: + pred = self(x_batch) + loss = self.compiled_loss(y_batch, pred) + gradients = tape.gradient(loss, trainable_variables) + if len(gradients_all_batches) == 0: + for gradient in gradients: + gradients_all_batches.append([gradient]) + else: + for gradient, gradient_all_batches in zip( + gradients, gradients_all_batches + ): + gradient_all_batches.append(gradient) + for variable, epsilon_w in zip( + trainable_variables, epsilon_w_cache + ): + # Restore the variable to its original value before + # `apply_gradients()`. + self._distributed_apply_epsilon_w( + variable, -epsilon_w, tf.distribute.get_strategy() + ) + + gradients = [] + for gradient_all_batches in gradients_all_batches: + gradients.append(tf.reduce_sum(gradient_all_batches, axis=0)) + self.optimizer.apply_gradients(zip(gradients, trainable_variables)) + + pred = tf.concat(pred_all_batches, axis=0) + self.compiled_metrics.update_state(y, pred, sample_weight) + return {m.name: m.result() for m in self.metrics} + + def call(self, inputs): + """Forward pass of SAM. + + SAM delegates the forward pass call to the wrapped model. + + Args: + inputs: Tensor. The model inputs. + + Returns: + A Tensor, the outputs of the wrapped model for given `inputs`. + """ + return self.model(inputs) + + def get_config(self): + config = super().get_config() + config.update( + { + "model": serialize_keras_object(self.model), + "rho": self.rho, + } + ) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + # Avoid mutating the input dict. + config = copy.deepcopy(config) + model = deserialize_layer( + config.pop("model"), custom_objects=custom_objects + ) + config["model"] = model + return super().from_config(config, custom_objects) + + def _distributed_apply_epsilon_w(self, var, epsilon_w, strategy): + # Helper function to apply epsilon_w on model variables. + if isinstance( + tf.distribute.get_strategy(), + ( + tf.distribute.experimental.ParameterServerStrategy, + tf.distribute.experimental.CentralStorageStrategy, + ), + ): + # Under PSS and CSS, the AggregatingVariable has to be kept in sync. + def distribute_apply(strategy, var, epsilon_w): + strategy.extended.update( + var, + lambda x, y: x.assign_add(y), + args=(epsilon_w,), + group=False, + ) + + tf.__internal__.distribute.interim.maybe_merge_call( + distribute_apply, tf.distribute.get_strategy(), var, epsilon_w + ) + else: + var.assign_add(epsilon_w) + + def _gradients_order2_norm(self, gradients): + norm = tf.norm( + tf.stack([tf.norm(grad) for grad in gradients if grad is not None]) + ) + return norm diff --git a/keras/models/sharpness_aware_minimization_test.py b/keras/models/sharpness_aware_minimization_test.py index 7a0fd3760889..7571f179b5b0 100644 --- a/keras/models/sharpness_aware_minimization_test.py +++ b/keras/models/sharpness_aware_minimization_test.py @@ -2,12 +2,13 @@ import os +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.models import sharpness_aware_minimization -from keras.optimizers.optimizer_experimental import adam +from keras.optimizers import adam from keras.testing_infra import test_utils -import tensorflow.compat.v2 as tf ds_combinations = tf.__internal__.distribute.combinations @@ -24,107 +25,138 @@ @test_utils.run_v2_only class SharpnessAwareMinimizationTest(tf.test.TestCase, parameterized.TestCase): - - def test_sam_model_call(self): - model = keras.Sequential([ - keras.Input([2, 2]), - keras.layers.Dense(4), - ]) - sam_model = sharpness_aware_minimization.SharpnessAwareMinimization(model) - data = tf.random.uniform([2, 2]) - self.assertAllClose(model(data), sam_model(data)) - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine(strategy=STRATEGIES)) - def test_sam_model_fit(self, strategy): - with strategy.scope(): - model = keras.Sequential([ - keras.Input([2, 2]), - keras.layers.Dense(4), - keras.layers.Dense(1), - ]) - sam_model = sharpness_aware_minimization.SharpnessAwareMinimization(model) - data = tf.random.uniform([2, 2]) - label = data[:, 0] > 0.5 - - sam_model.compile( - optimizer=adam.Adam(), - loss=keras.losses.BinaryCrossentropy(from_logits=True), - ) - - sam_model.fit(data, label, steps_per_epoch=1) - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine(strategy=STRATEGIES)) - def test_sam_model_fit_with_sub_batch(self, strategy): - with strategy.scope(): - model = keras.Sequential([ - keras.Input([2, 2]), - keras.layers.Dense(4), - keras.layers.Dense(1), - ]) - sam_model = sharpness_aware_minimization.SharpnessAwareMinimization( - model, num_batch_splits=4) - data = tf.random.uniform([48, 2]) - label = data[:, 0] > 0.5 - - sam_model.compile( - optimizer=adam.Adam(), - loss=keras.losses.BinaryCrossentropy(from_logits=True), - ) - - sam_model.fit(data, label, steps_per_epoch=1) - - def test_save_sam(self): - model = keras.Sequential([ - keras.Input([2, 2]), - keras.layers.Dense(4), - keras.layers.Dense(1), - ]) - sam_model = sharpness_aware_minimization.SharpnessAwareMinimization(model) - data = tf.random.uniform([1, 2, 2]) - label = data[:, 0] > 0.5 - - sam_model.compile( - optimizer=adam.Adam(), - loss=keras.losses.BinaryCrossentropy(from_logits=True), + def test_sam_model_call(self): + model = keras.Sequential( + [ + keras.Input([2, 2]), + keras.layers.Dense(4), + ] + ) + sam_model = sharpness_aware_minimization.SharpnessAwareMinimization( + model + ) + data = tf.random.uniform([2, 2]) + self.assertAllClose(model(data), sam_model(data)) + + @ds_combinations.generate( + tf.__internal__.test.combinations.combine(strategy=STRATEGIES) ) - - sam_model.fit(data, label) - - path = os.path.join(self.get_temp_dir(), "model") - sam_model.save(path) - loaded_sam_model = keras.models.load_model(path) - loaded_sam_model.load_weights(path) - - self.assertAllClose(sam_model(data), loaded_sam_model(data)) - - def test_checkpoint_sam(self): - model = keras.Sequential([ - keras.Input([2, 2]), - keras.layers.Dense(4), - keras.layers.Dense(1), - ]) - sam_model_1 = sharpness_aware_minimization.SharpnessAwareMinimization(model) - sam_model_2 = sharpness_aware_minimization.SharpnessAwareMinimization(model) - data = tf.random.uniform([1, 2, 2]) - label = data[:, 0] > 0.5 - - sam_model_1.compile( - optimizer=adam.Adam(), - loss=keras.losses.BinaryCrossentropy(from_logits=True), + def test_sam_model_fit(self, strategy): + with strategy.scope(): + model = keras.Sequential( + [ + keras.Input([2, 2]), + keras.layers.Dense(4), + keras.layers.Dense(1), + ] + ) + sam_model = sharpness_aware_minimization.SharpnessAwareMinimization( + model + ) + data = tf.random.uniform([2, 2]) + label = data[:, 0] > 0.5 + + sam_model.compile( + optimizer=adam.Adam(), + loss=keras.losses.BinaryCrossentropy(from_logits=True), + ) + + sam_model.fit(data, label, steps_per_epoch=1) + + @ds_combinations.generate( + tf.__internal__.test.combinations.combine(strategy=STRATEGIES) ) - - sam_model_1.fit(data, label) - - checkpoint = tf.train.Checkpoint(sam_model_1) - checkpoint2 = tf.train.Checkpoint(sam_model_2) - temp_dir = self.get_temp_dir() - save_path = checkpoint.save(temp_dir) - checkpoint2.restore(save_path) - - self.assertAllClose(sam_model_1(data), sam_model_2(data)) + def test_sam_model_fit_with_sub_batch(self, strategy): + with strategy.scope(): + model = keras.Sequential( + [ + keras.Input([2, 2]), + keras.layers.Dense(4), + keras.layers.Dense(1), + ] + ) + sam_model = sharpness_aware_minimization.SharpnessAwareMinimization( + model, num_batch_splits=4 + ) + data = tf.random.uniform([48, 2]) + label = data[:, 0] > 0.5 + + sam_model.compile( + optimizer=adam.Adam(), + loss=keras.losses.BinaryCrossentropy(from_logits=True), + ) + + sam_model.fit(data, label, steps_per_epoch=1) + + def test_save_sam(self): + model = keras.Sequential( + [ + keras.Input([2, 2]), + keras.layers.Dense(4), + keras.layers.Dense(1), + ] + ) + sam_model = sharpness_aware_minimization.SharpnessAwareMinimization( + model + ) + data = tf.random.uniform([1, 2, 2]) + label = data[:, 0] > 0.5 + + sam_model.compile( + optimizer=adam.Adam(), + loss=keras.losses.BinaryCrossentropy(from_logits=True), + ) + + sam_model.fit(data, label) + + with self.subTest("savedmodel"): + path = os.path.join(self.get_temp_dir(), "model") + sam_model.save(path) + loaded_sam_model = keras.models.load_model(path) + loaded_sam_model.load_weights(path) + + self.assertAllClose(sam_model(data), loaded_sam_model(data)) + + with self.subTest("keras_v3"): + path = os.path.join(self.get_temp_dir(), "model.keras") + sam_model.save(path) + loaded_sam_model = keras.models.load_model(path) + loaded_sam_model.load_weights(path) + + self.assertAllClose(sam_model(data), loaded_sam_model(data)) + + def test_checkpoint_sam(self): + model = keras.Sequential( + [ + keras.Input([2, 2]), + keras.layers.Dense(4), + keras.layers.Dense(1), + ] + ) + sam_model_1 = sharpness_aware_minimization.SharpnessAwareMinimization( + model + ) + sam_model_2 = sharpness_aware_minimization.SharpnessAwareMinimization( + model + ) + data = tf.random.uniform([1, 2, 2]) + label = data[:, 0] > 0.5 + + sam_model_1.compile( + optimizer=adam.Adam(), + loss=keras.losses.BinaryCrossentropy(from_logits=True), + ) + + sam_model_1.fit(data, label) + + checkpoint = tf.train.Checkpoint(sam_model_1) + checkpoint2 = tf.train.Checkpoint(sam_model_2) + temp_dir = self.get_temp_dir() + save_path = checkpoint.save(temp_dir) + checkpoint2.restore(save_path) + + self.assertAllClose(sam_model_1(data), sam_model_2(data)) if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/optimizers/BUILD b/keras/optimizers/BUILD index e9fea1d46c55..f496373fefd2 100644 --- a/keras/optimizers/BUILD +++ b/keras/optimizers/BUILD @@ -1,17 +1,22 @@ # Description: # Contains the Keras Optimizer API. +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "tf_py_test") +load("@org_keras//keras:keras.bzl", "distribute_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/python:__pkg__", "//third_party/tensorflow/python/distribute:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/saved_model:__pkg__", # For unit tests. + "//third_party/tensorflow/python/tpu/tests:__pkg__", # For unit tests. + "//third_party/tensorflow/python/trackable:__pkg__", ], licenses = ["notice"], ) @@ -20,19 +25,41 @@ py_library( name = "optimizers", srcs = [ "__init__.py", + "adadelta.py", + "adafactor.py", + "adagrad.py", + "adam.py", + "adamax.py", + "adamw.py", + "ftrl.py", + "lion.py", + "nadam.py", + "optimizer.py", "optimizer_v1.py", + "rmsprop.py", + "sgd.py", ], srcs_version = "PY3", deps = [ + ":utils", + "//:expect_tensorflow_installed", "//keras:backend", - "//keras/optimizers/legacy:optimizer", - "//keras/optimizers/optimizer_experimental:optimizer", - "//keras/optimizers/optimizer_v2", + "//keras/dtensor:utils", + "//keras/optimizers/legacy:optimizers", "//keras/optimizers/schedules:learning_rate_schedule", "//keras/utils:engine_utils", ], ) +py_library( + name = "utils", + srcs = ["utils.py"], + srcs_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + ], +) + py_library( name = "legacy_learning_rate_decay", srcs = ["legacy_learning_rate_decay.py"], @@ -44,9 +71,9 @@ py_library( ) tf_py_test( - name = "optimizers_test", + name = "optimizer_v1_test", size = "medium", - srcs = ["optimizers_test.py"], + srcs = ["optimizer_v1_test.py"], python_version = "PY3", shard_count = 8, tags = ["notsan"], @@ -70,3 +97,55 @@ cuda_py_test( "//keras/testing_infra:test_combinations", ], ) + +# TODO(b/228209527): Combine this test with optimizer_test after +# fixing the NCCL issue. +distribute_py_test( + name = "optimizer_pss_test", + size = "medium", + srcs = ["optimizer_pss_test.py"], + shard_count = 32, + tags = [ + "multi_gpu", + "no_oss", + "no_windows", + ], + deps = [ + ":optimizers", + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + ], +) + +distribute_py_test( + name = "optimizer_test", + size = "medium", + srcs = ["optimizer_test.py"], + shard_count = 16, + tags = [ + "multi_gpu", + "no_windows", + "nomultivm", # TODO(b/203558991): Re-enable. + ], + deps = [ + ":optimizers", + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + ], +) + +cuda_py_test( + name = "lion_test", + size = "medium", + srcs = ["lion_test.py"], + shard_count = 4, + deps = [ + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras", + ], +) diff --git a/keras/optimizers/__init__.py b/keras/optimizers/__init__.py index eb4642e65090..39a02669950b 100644 --- a/keras/optimizers/__init__.py +++ b/keras/optimizers/__init__.py @@ -12,161 +12,318 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=g-bad-import-order """Built-in optimizer classes. For more examples see the base class `tf.keras.optimizers.Optimizer`. """ -import tensorflow.compat.v2 as tf +# Imports needed for deserialization. -# Symbols to be accessed under keras.optimizers. To be replaced with -# optimizers v2022 when they graduate out of experimental. -from keras.optimizers.optimizer_v2.gradient_descent import SGD -from keras.optimizers.optimizer_v2.rmsprop import RMSprop -from keras.optimizers.optimizer_v2.adam import Adam -from keras.optimizers.optimizer_v2.adadelta import Adadelta -from keras.optimizers.optimizer_v2.adagrad import Adagrad -from keras.optimizers.optimizer_v2.adamax import Adamax -from keras.optimizers.optimizer_v2.nadam import Nadam -from keras.optimizers.optimizer_v2.ftrl import Ftrl +import platform +import warnings + +import tensorflow.compat.v2 as tf +from absl import logging -# Imports needed for deserialization. from keras import backend -from keras.optimizers.optimizer_experimental import optimizer as optimizer_experimental -from keras.optimizers.optimizer_experimental import adadelta as adadelta_experimental -from keras.optimizers.optimizer_experimental import adagrad as adagrad_experimental -from keras.optimizers.optimizer_experimental import adam as adam_experimental -from keras.optimizers.optimizer_experimental import adamax as adamax_experimental -from keras.optimizers.optimizer_experimental import adamw as adamw_experimental -from keras.optimizers.optimizer_experimental import ftrl as ftrl_experimental -from keras.optimizers.optimizer_experimental import nadam as nadam_experimental -from keras.optimizers.optimizer_experimental import rmsprop as rmsprop_experimental -from keras.optimizers.optimizer_experimental import sgd as sgd_experimental -from keras.optimizers.legacy import optimizer as optimizer_legacy +from keras.optimizers import adadelta +from keras.optimizers import adafactor +from keras.optimizers import adagrad +from keras.optimizers import adam +from keras.optimizers import adamax +from keras.optimizers import adamw +from keras.optimizers import ftrl +from keras.optimizers import lion +from keras.optimizers import nadam +from keras.optimizers import optimizer as base_optimizer +from keras.optimizers import rmsprop +from keras.optimizers import sgd from keras.optimizers.legacy import adadelta as adadelta_legacy from keras.optimizers.legacy import adagrad as adagrad_legacy from keras.optimizers.legacy import adam as adam_legacy from keras.optimizers.legacy import adamax as adamax_legacy from keras.optimizers.legacy import ftrl as ftrl_legacy +from keras.optimizers.legacy import gradient_descent as gradient_descent_legacy from keras.optimizers.legacy import nadam as nadam_legacy +from keras.optimizers.legacy import optimizer_v2 as base_optimizer_legacy from keras.optimizers.legacy import rmsprop as rmsprop_legacy -from keras.optimizers.legacy import sgd as sgd_legacy +from keras.optimizers.legacy.adadelta import Adadelta +from keras.optimizers.legacy.adagrad import Adagrad +from keras.optimizers.legacy.adam import Adam +from keras.optimizers.legacy.adamax import Adamax +from keras.optimizers.legacy.ftrl import Ftrl + +# Symbols to be accessed under keras.optimizers. To be replaced with +# optimizers v2022 when they graduate out of experimental. +from keras.optimizers.legacy.gradient_descent import SGD +from keras.optimizers.legacy.nadam import Nadam +from keras.optimizers.legacy.rmsprop import RMSprop from keras.optimizers.optimizer_v1 import Optimizer from keras.optimizers.optimizer_v1 import TFOptimizer -from keras.optimizers.optimizer_v2 import adadelta as adadelta_v2 -from keras.optimizers.optimizer_v2 import adagrad as adagrad_v2 -from keras.optimizers.optimizer_v2 import adam as adam_v2 -from keras.optimizers.optimizer_v2 import adamax as adamax_v2 -from keras.optimizers.optimizer_v2 import ftrl -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_v2 -from keras.optimizers.optimizer_v2 import nadam as nadam_v2 -from keras.optimizers.optimizer_v2 import optimizer_v2 as base_optimizer_v2 -from keras.optimizers.optimizer_v2 import rmsprop as rmsprop_v2 -from keras.utils.generic_utils import deserialize_keras_object -from keras.utils.generic_utils import serialize_keras_object +from keras.optimizers.schedules import learning_rate_schedule +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object + +# isort: off from tensorflow.python.util.tf_export import keras_export +# pylint: disable=line-too-long + + +@keras_export("keras.optimizers.serialize") +def serialize(optimizer, use_legacy_format=False): + """Serialize the optimizer configuration to JSON compatible python dict. + + The configuration can be used for persistence and reconstruct the + `Optimizer` instance again. + + >>> tf.keras.optimizers.serialize(tf.keras.optimizers.legacy.SGD()) + {'module': 'keras.optimizers.legacy', 'class_name': 'SGD', 'config': {'name': 'SGD', 'learning_rate': 0.01, 'decay': 0.0, 'momentum': 0.0, 'nesterov': False}, 'registered_name': None}""" # noqa: E501 + """ + Args: + optimizer: An `Optimizer` instance to serialize. + + Returns: + Python dict which contains the configuration of the input optimizer. + """ + if optimizer is None: + return None + if not isinstance( + optimizer, + ( + base_optimizer.Optimizer, + Optimizer, + base_optimizer_legacy.OptimizerV2, + ), + ): + warnings.warn( + "The `keras.optimizers.serialize()` API should only be used for " + "objects of type `keras.optimizers.Optimizer`. Found an instance " + f"of type {type(optimizer)}, which may lead to improper " + "serialization." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(optimizer) + return serialize_keras_object(optimizer) + + +def is_arm_mac(): + return platform.system() == "Darwin" and platform.processor() == "arm" + -@keras_export('keras.optimizers.serialize') -def serialize(optimizer): - """Serialize the optimizer configuration to JSON compatible python dict. - - The configuration can be used for persistence and reconstruct the `Optimizer` - instance again. - - >>> tf.keras.optimizers.serialize(tf.keras.optimizers.SGD()) - {'class_name': 'SGD', 'config': {'name': 'SGD', 'learning_rate': 0.01, - 'decay': 0.0, 'momentum': 0.0, - 'nesterov': False}} - - Args: - optimizer: An `Optimizer` instance to serialize. - - Returns: - Python dict which contains the configuration of the input optimizer. - """ - return serialize_keras_object(optimizer) - - -@keras_export('keras.optimizers.deserialize') -def deserialize(config, custom_objects=None): - """Inverse of the `serialize` function. - - Args: - config: Optimizer configuration dictionary. - custom_objects: Optional dictionary mapping names (strings) to custom - objects (classes and functions) to be considered during deserialization. - - Returns: - A Keras Optimizer instance. - """ - # loss_scale_optimizer has a direct dependency of optimizer, import here - # rather than top to avoid the cyclic dependency. - from keras.mixed_precision import loss_scale_optimizer # pylint: disable=g-import-not-at-top - all_classes = { - 'adadelta': adadelta_v2.Adadelta, - 'adagrad': adagrad_v2.Adagrad, - 'adam': adam_v2.Adam, - 'adamax': adamax_v2.Adamax, - 'experimentaladadelta': adadelta_experimental.Adadelta, - 'experimentaladagrad': adagrad_experimental.Adagrad, - 'experimentaladam': adam_experimental.Adam, - 'experimentalsgd': sgd_experimental.SGD, - 'nadam': nadam_v2.Nadam, - 'rmsprop': rmsprop_v2.RMSprop, - 'sgd': gradient_descent_v2.SGD, - 'ftrl': ftrl.Ftrl, - 'lossscaleoptimizer': loss_scale_optimizer.LossScaleOptimizer, - 'lossscaleoptimizerv3': loss_scale_optimizer.LossScaleOptimizerV3, - # LossScaleOptimizerV1 was an old version of LSO that was removed. - # Deserializing it turns it into a LossScaleOptimizer - 'lossscaleoptimizerv1': loss_scale_optimizer.LossScaleOptimizer, - } - - # Make deserialization case-insensitive for built-in optimizers. - if config['class_name'].lower() in all_classes: - config['class_name'] = config['class_name'].lower() - return deserialize_keras_object( - config, - module_objects=all_classes, - custom_objects=custom_objects, - printable_module_name='optimizer') - - -@keras_export('keras.optimizers.get') -def get(identifier): - """Retrieves a Keras Optimizer instance. - - Args: - identifier: Optimizer identifier, one of - - String: name of an optimizer +@keras_export("keras.optimizers.deserialize") +def deserialize(config, custom_objects=None, use_legacy_format=False, **kwargs): + """Inverse of the `serialize` function. + + Args: + config: Optimizer configuration dictionary. + custom_objects: Optional dictionary mapping names (strings) to custom + objects (classes and functions) to be considered during + deserialization. + + Returns: + A Keras Optimizer instance. + """ + # loss_scale_optimizer has a direct dependency of optimizer, import here + # rather than top to avoid the cyclic dependency. + from keras.mixed_precision import ( + loss_scale_optimizer, + ) + + use_legacy_optimizer = kwargs.pop("use_legacy_optimizer", False) + if kwargs: + raise TypeError(f"Invalid keyword arguments: {kwargs}") + if len(config["config"]) > 0: + # If the optimizer config is not empty, then we use the value of + # `is_legacy_optimizer` to override `use_legacy_optimizer`. If + # `is_legacy_optimizer` does not exist in config, it means we are + # using the legacy optimzier. + use_legacy_optimizer = config["config"].get("is_legacy_optimizer", True) + if ( + tf.__internal__.tf2.enabled() + and tf.executing_eagerly() + and not is_arm_mac() + and not use_legacy_optimizer + ): + # We observed a slowdown of optimizer on M1 Mac, so we fall back to the + # legacy optimizer for M1 users now, see b/263339144 for more context. + all_classes = { + "adadelta": adadelta.Adadelta, + "adagrad": adagrad.Adagrad, + "adam": adam.Adam, + "adamax": adamax.Adamax, + "experimentaladadelta": adadelta.Adadelta, + "experimentaladagrad": adagrad.Adagrad, + "experimentaladam": adam.Adam, + "experimentalsgd": sgd.SGD, + "nadam": nadam.Nadam, + "rmsprop": rmsprop.RMSprop, + "sgd": sgd.SGD, + "ftrl": ftrl.Ftrl, + "lossscaleoptimizer": loss_scale_optimizer.LossScaleOptimizerV3, + "lossscaleoptimizerv3": loss_scale_optimizer.LossScaleOptimizerV3, + # LossScaleOptimizerV1 was an old version of LSO that was removed. + # Deserializing it turns it into a LossScaleOptimizer + "lossscaleoptimizerv1": loss_scale_optimizer.LossScaleOptimizer, + } + else: + all_classes = { + "adadelta": adadelta_legacy.Adadelta, + "adagrad": adagrad_legacy.Adagrad, + "adam": adam_legacy.Adam, + "adamax": adamax_legacy.Adamax, + "experimentaladadelta": adadelta.Adadelta, + "experimentaladagrad": adagrad.Adagrad, + "experimentaladam": adam.Adam, + "experimentalsgd": sgd.SGD, + "nadam": nadam_legacy.Nadam, + "rmsprop": rmsprop_legacy.RMSprop, + "sgd": gradient_descent_legacy.SGD, + "ftrl": ftrl_legacy.Ftrl, + "lossscaleoptimizer": loss_scale_optimizer.LossScaleOptimizer, + "lossscaleoptimizerv3": loss_scale_optimizer.LossScaleOptimizerV3, + # LossScaleOptimizerV1 was an old version of LSO that was removed. + # Deserializing it turns it into a LossScaleOptimizer + "lossscaleoptimizerv1": loss_scale_optimizer.LossScaleOptimizer, + } + + # Make deserialization case-insensitive for built-in optimizers. + if config["class_name"].lower() in all_classes: + config["class_name"] = config["class_name"].lower() + + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=all_classes, + custom_objects=custom_objects, + printable_module_name="optimizer", + ) + + return deserialize_keras_object( + config, + module_objects=all_classes, + custom_objects=custom_objects, + printable_module_name="optimizer", + ) + + +@keras_export( + "keras.__internal__.optimizers.convert_to_legacy_optimizer", v1=[] +) +def convert_to_legacy_optimizer(optimizer): + """Convert experimental optimizer to legacy optimizer. + + This function takes in a `keras.optimizers.Optimizer` + instance and converts it to the corresponding + `keras.optimizers.legacy.Optimizer` instance. + For example, `keras.optimizers.Adam(...)` to + `keras.optimizers.legacy.Adam(...)`. + + Args: + optimizer: An instance of `keras.optimizers.Optimizer`. + """ + # loss_scale_optimizer has a direct dependency of optimizer, import here + # rather than top to avoid the cyclic dependency. + from keras.mixed_precision import ( + loss_scale_optimizer, + ) + + if not isinstance(optimizer, base_optimizer.Optimizer): + raise ValueError( + "`convert_to_legacy_optimizer` should only be called " + "on instances of `tf.keras.optimizers.Optimizer`, but " + f"received {optimizer} of type {type(optimizer)}." + ) + optimizer_name = optimizer.__class__.__name__.lower() + config = optimizer.get_config() + # Remove fields that only exist in experimental optimizer. + keys_to_remove = [ + "weight_decay", + "use_ema", + "ema_momentum", + "ema_overwrite_frequency", + "jit_compile", + "is_legacy_optimizer", + ] + for key in keys_to_remove: + config.pop(key, None) + + if isinstance(optimizer, loss_scale_optimizer.LossScaleOptimizerV3): + # For LossScaleOptimizers, recursively convert the inner optimizer + config["inner_optimizer"] = convert_to_legacy_optimizer( + optimizer.inner_optimizer + ) + if optimizer_name == "lossscaleoptimizerv3": + optimizer_name = "lossscaleoptimizer" + + # Learning rate can be a custom LearningRateSchedule, which is stored as + # a dict in config, and cannot be deserialized. + if hasattr(optimizer, "_learning_rate") and isinstance( + optimizer._learning_rate, learning_rate_schedule.LearningRateSchedule + ): + config["learning_rate"] = optimizer._learning_rate + legacy_optimizer_config = { + "class_name": optimizer_name, + "config": config, + } + return deserialize(legacy_optimizer_config, use_legacy_optimizer=True) + + +@keras_export("keras.optimizers.get") +def get(identifier, **kwargs): + """Retrieves a Keras Optimizer instance. + + Args: + identifier: Optimizer identifier, one of - String: name of an optimizer - Dictionary: configuration dictionary. - Keras Optimizer instance (it - will be returned unchanged). - TensorFlow Optimizer instance (it - will be wrapped as a Keras Optimizer). - - Returns: - A Keras Optimizer instance. - - Raises: - ValueError: If `identifier` cannot be interpreted. - """ - if isinstance( - identifier, - (Optimizer, base_optimizer_v2.OptimizerV2, - optimizer_experimental.Optimizer)): - return identifier - # Wrap legacy TF optimizer instances - elif isinstance(identifier, tf.compat.v1.train.Optimizer): - opt = TFOptimizer(identifier) - backend.track_tf_optimizer(opt) - return opt - elif isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, str): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - else: - raise ValueError( - 'Could not interpret optimizer identifier: {}'.format(identifier)) + will be returned unchanged). - TensorFlow Optimizer instance (it will + be wrapped as a Keras Optimizer). + + Returns: + A Keras Optimizer instance. + + Raises: + ValueError: If `identifier` cannot be interpreted. + """ + use_legacy_optimizer = kwargs.pop("use_legacy_optimizer", False) + if kwargs: + raise TypeError(f"Invalid keyword arguments: {kwargs}") + if isinstance( + identifier, + ( + Optimizer, + base_optimizer_legacy.OptimizerV2, + ), + ): + return identifier + elif isinstance(identifier, base_optimizer.Optimizer): + if tf.__internal__.tf2.enabled(): + return identifier + else: + # If TF2 is disabled, we convert to the legacy + # optimizer. + return convert_to_legacy_optimizer(identifier) + + # Wrap legacy TF optimizer instances + elif isinstance(identifier, tf.compat.v1.train.Optimizer): + opt = TFOptimizer(identifier) + backend.track_tf_optimizer(opt) + return opt + elif isinstance(identifier, dict): + use_legacy_format = "module" not in identifier + return deserialize( + identifier, + use_legacy_optimizer=use_legacy_optimizer, + use_legacy_format=use_legacy_format, + ) + elif isinstance(identifier, str): + config = {"class_name": str(identifier), "config": {}} + return get( + config, + use_legacy_optimizer=use_legacy_optimizer, + ) + else: + raise ValueError( + f"Could not interpret optimizer identifier: {identifier}" + ) diff --git a/keras/optimizers/adadelta.py b/keras/optimizers/adadelta.py new file mode 100644 index 000000000000..a82eb5cdface --- /dev/null +++ b/keras/optimizers/adadelta.py @@ -0,0 +1,171 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Adadelta optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.Adadelta", + "keras.optimizers.Adadelta", + "keras.dtensor.experimental.optimizers.Adadelta", + v1=[], +) +class Adadelta(optimizer.Optimizer): + r"""Optimizer that implements the Adadelta algorithm. + + Adadelta optimization is a stochastic gradient descent method that is based + on adaptive learning rate per dimension to address two drawbacks: + + - The continual decay of learning rates throughout training. + - The need for a manually selected global learning rate. + + Adadelta is a more robust extension of Adagrad that adapts learning rates + based on a moving window of gradient updates, instead of accumulating all + past gradients. This way, Adadelta continues learning even when many updates + have been done. Compared to Adagrad, in the original version of Adadelta you + don't have to set an initial learning rate. In this version, the initial + learning rate can be set, as in most other Keras optimizers. + + Args: + learning_rate: Initial value for the learning rate: either a floating + point value, or a + `tf.keras.optimizers.schedules.LearningRateSchedule` instance. + Defaults to 0.001. Note that `Adadelta` tends to benefit from + higher initial learning rate values compared to other optimizers. To + match the exact form in the original paper, use 1.0. + rho: A `Tensor` or a floating point value. The decay rate. Defaults to + 0.95. + epsilon: Small floating point value used to maintain numerical + stability. Defaults to 1e-7. + {{base_optimizer_keyword_args}} + + Reference: + - [Zeiler, 2012](http://arxiv.org/abs/1212.5701) + """ + + def __init__( + self, + learning_rate=0.001, + rho=0.95, + epsilon=1e-7, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Adadelta", + **kwargs + ): + super().__init__( + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + name=name, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.rho = rho + self.epsilon = epsilon + + def build(self, var_list): + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + self._accumulated_grads = [] + self._accumulated_delta_vars = [] + for var in var_list: + self._accumulated_grads.append( + self.add_variable_from_reference(var, "accumulated_grad") + ) + self._accumulated_delta_vars.append( + self.add_variable_from_reference(var, "accumulated_delta_var") + ) + + def update_step(self, grad, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + + var_key = self._var_key(variable) + rho = self.rho + accumulated_grad = self._accumulated_grads[self._index_dict[var_key]] + accumulated_delta_var = self._accumulated_delta_vars[ + self._index_dict[var_key] + ] + + def rms(x): + return tf.sqrt(x + self.epsilon) + + if isinstance(grad, tf.IndexedSlices): + # Sparse gradients. + accumulated_grad.assign_add((rho - 1) * accumulated_grad) + accumulated_grad.scatter_add( + tf.IndexedSlices( + (1 - rho) * tf.square(grad.values), grad.indices + ) + ) + delta_var = ( + -rms(accumulated_delta_var) * grad / rms(accumulated_grad) + ) + accumulated_delta_var.assign( + rho * accumulated_delta_var + (1 - rho) * delta_var * delta_var + ) + else: + # Dense gradients. + accumulated_grad.assign( + rho * accumulated_grad + (1 - rho) * grad * grad + ) + delta_var = ( + -rms(accumulated_delta_var) * grad / rms(accumulated_grad) + ) + accumulated_delta_var.assign( + rho * accumulated_delta_var + (1 - rho) * delta_var * delta_var + ) + variable.assign_add(lr * delta_var) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "rho": self.rho, + "epsilon": self.epsilon, + } + ) + return config + + +Adadelta.__doc__ = Adadelta.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/adafactor.py b/keras/optimizers/adafactor.py new file mode 100644 index 000000000000..fb93bdac3710 --- /dev/null +++ b/keras/optimizers/adafactor.py @@ -0,0 +1,231 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Adagrad optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.optimizers.schedules import learning_rate_schedule +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.Adafactor", + "keras.optimizers.experimental.Adafactor", + v1=[], +) +class Adafactor(optimizer.Optimizer): + """Optimizer that implements the Adafactor algorithm. + + Adafactor is commonly used in NLP tasks, and has the advantage + of taking less memory because it only saves partial information of previous + gradients. + + The default argument setup is based on the original paper (see reference). + When gradients are of dimension > 2, Adafactor optimizer will delete the + last 2 dimensions separately in its accumulator variables. + + Args: + learning_rate: Initial value for the learning rate: + either a floating point value, + or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. + Defaults to 0.001. + beta_2_decay: float, defaults to -0.8. The decay rate of `beta_2`. + epsilon_1: float, defaults to 1e-30. A small offset to keep denominator + away from 0. + epsilon_2: float, defaults to 1e-3. A small offset to avoid learning + rate becoming too small by time. + clip_threshold: float, defaults to 1.0. Clipping threshold. This is a + part of Adafactor algorithm, independent from `clipnorm`, + `clipvalue` and `global_clipnorm`. + relative_step: bool, defaults to True. If `learning_rate` is a + constant and `relative_step=True`, learning rate will be adjusted + based on current iterations. This is a default learning rate decay + in Adafactor. + {{base_optimizer_keyword_args}} + + Reference: + - [Shazeer, Noam et al., 2018](https://arxiv.org/abs/1804.04235). + + """ + + def __init__( + self, + learning_rate=0.001, + beta_2_decay=-0.8, + epsilon_1=1e-30, + epsilon_2=1e-3, + clip_threshold=1.0, + relative_step=True, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Adafactor", + **kwargs, + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs, + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.beta_2_decay = beta_2_decay + self.epsilon_1 = epsilon_1 + self.epsilon_2 = epsilon_2 + self.clip_threshold = clip_threshold + self.relative_step = relative_step + + def build(self, var_list): + """Initialize optimizer variables. + + Adam optimizer has 3 types of variables: momentums, velocities and + velocity_hat (only set when amsgrad is applied), + + Args: + var_list: list of model variables to build Adam variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + self._r = [] + self._c = [] + self._v = [] + for var in var_list: + if len(var.shape) < 2: + # Don't factor if variable is of dimension < 2, but we still + # need to create dummy variables as placeholder. + self._r.append(tf.Variable(0, name=f"r/{var._shared_name}")) + self._c.append(tf.Variable(0, name=f"r/{var._shared_name}")) + else: + # Always factor the last 2 dimenstions. + r_shape = var.shape[:-1] + c_shape = var.shape[:-2] + var.shape[-1] + self._r.append( + self.add_variable( + shape=r_shape, + dtype=var.dtype, + name=f"r/{var._shared_name}", + ) + ) + self._c.append( + self.add_variable( + shape=c_shape, + dtype=var.dtype, + name=f"c/{var._shared_name}", + ) + ) + self._v.append( + self.add_variable_from_reference( + model_variable=var, variable_name="v" + ) + ) + + def _rms(self, x): + return tf.sqrt(tf.reduce_mean(tf.square(x))) + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + + lr = tf.cast(self.learning_rate, variable.dtype) + epsilon_2 = tf.cast(self.epsilon_2, variable.dtype) + one = tf.cast(1.0, variable.dtype) + local_step = tf.cast(self.iterations + 1, variable.dtype) + if ( + not isinstance( + self._learning_rate, learning_rate_schedule.LearningRateSchedule + ) + and self.relative_step + ): + # If `relative_step=True` and learning rate is a constant, we + # apply the relative step algorithm. + lr = tf.minimum(lr, tf.math.rsqrt(local_step)) + + var_key = self._var_key(variable) + r = self._r[self._index_dict[var_key]] + c = self._c[self._index_dict[var_key]] + v = self._v[self._index_dict[var_key]] + + rho_t = tf.minimum(lr, tf.math.rsqrt(local_step)) + alpha_t = tf.maximum(epsilon_2, self._rms(variable)) * rho_t + regulated_grad_square = tf.square(gradient) + self.epsilon_1 + beta_2_t = 1 - tf.pow(local_step, self.beta_2_decay) + + if len(variable.shape) >= 2: + # `r` deletes the last dimension of gradient, so it is of shape + # `gradient.shape[:-1]`. + r.assign( + beta_2_t * r + + (1 - beta_2_t) + * tf.reduce_mean(regulated_grad_square, axis=-1) + ) + # `c` deletes the second last dimension of gradient, so it is of + # shape `gradient.shape[:-2] + gradient.shape[-1]`. + c.assign( + beta_2_t * c + + (1 - beta_2_t) + * tf.reduce_mean(regulated_grad_square, axis=-2) + ) + v.assign( + tf.expand_dims( + r / tf.reduce_mean(r, axis=-1, keepdims=True), axis=-1 + ) + * tf.expand_dims(c, -2) + ) + else: + v.assign(beta_2_t * v + (1 - beta_2_t) * regulated_grad_square) + + # `convert_to_tensor` unifies the handling of sparse and dense grads. + u_t = tf.convert_to_tensor(gradient) * tf.math.rsqrt(v) + u_t_hat = u_t / tf.maximum(one, (self._rms(u_t) / self.clip_threshold)) + variable.assign_add(-alpha_t * u_t_hat) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "beta_2_decay": self.beta_2_decay, + "epsilon_1": self.epsilon_1, + "epsilon_2": self.epsilon_2, + "clip_threshold": self.clip_threshold, + "relative_step": self.relative_step, + } + ) + return config + + +Adafactor.__doc__ = Adafactor.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/adagrad.py b/keras/optimizers/adagrad.py new file mode 100644 index 000000000000..0840d492e21d --- /dev/null +++ b/keras/optimizers/adagrad.py @@ -0,0 +1,150 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Adagrad optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras import initializers +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.Adagrad", + "keras.optimizers.Adagrad", + "keras.dtensor.experimental.optimizers.Adagrad", + v1=[], +) +class Adagrad(optimizer.Optimizer): + r"""Optimizer that implements the Adagrad algorithm. + + Adagrad is an optimizer with parameter-specific learning rates, + which are adapted relative to how frequently a parameter gets + updated during training. The more updates a parameter receives, + the smaller the updates. + + Args: + learning_rate: Initial value for the learning rate: + either a floating point value, + or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. + Defaults to 0.001. Note that `Adagrad` tends to benefit from higher + initial learning rate values compared to other optimizers. To match + the exact form in the original paper, use 1.0. + initial_accumulator_value: Floating point value. + Starting value for the accumulators (per-parameter momentum values). + Must be non-negative. + epsilon: Small floating point value used to maintain numerical + stability. + {{base_optimizer_keyword_args}} + + Reference: + - [Duchi et al., 2011]( + http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). + """ + + def __init__( + self, + learning_rate=0.001, + initial_accumulator_value=0.1, + epsilon=1e-7, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Adagrad", + **kwargs + ): + super().__init__( + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + name=name, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.initial_accumulator_value = initial_accumulator_value + self.epsilon = epsilon + + def build(self, var_list): + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + self._accumulators = [] + initializer = initializers.Constant(self.initial_accumulator_value) + for var in var_list: + self._accumulators.append( + self.add_variable_from_reference( + var, + "accumulator", + initial_value=initializer(shape=var.shape, dtype=var.dtype), + ) + ) + + def update_step(self, grad, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + + var_key = self._var_key(variable) + accumulator = self._accumulators[self._index_dict[var_key]] + + if isinstance(grad, tf.IndexedSlices): + # Sparse gradients. + accumulator.scatter_add( + tf.IndexedSlices(grad.values * grad.values, grad.indices) + ) + sparse_accumulator = tf.gather(accumulator, indices=grad.indices) + sparse_denominator = tf.sqrt(sparse_accumulator + self.epsilon) + variable.scatter_add( + tf.IndexedSlices( + -lr * grad.values / sparse_denominator, grad.indices + ) + ) + else: + # Dense gradients. + accumulator.assign_add(grad * grad) + variable.assign_sub(lr * grad / tf.sqrt(accumulator + self.epsilon)) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "initial_accumulator_value": self.initial_accumulator_value, + "epsilon": self.epsilon, + } + ) + return config + + +Adagrad.__doc__ = Adagrad.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/adam.py b/keras/optimizers/adam.py new file mode 100644 index 000000000000..e17b10fa82bd --- /dev/null +++ b/keras/optimizers/adam.py @@ -0,0 +1,225 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Adam optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.Adam", + "keras.optimizers.experimental.Adam", + "keras.dtensor.experimental.optimizers.Adam", + v1=[], +) +class Adam(optimizer.Optimizer): + r"""Optimizer that implements the Adam algorithm. + + Adam optimization is a stochastic gradient descent method that is based on + adaptive estimation of first-order and second-order moments. + + According to + [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), + the method is "*computationally + efficient, has little memory requirement, invariant to diagonal rescaling of + gradients, and is well suited for problems that are large in terms of + data/parameters*". + + Args: + learning_rate: A `tf.Tensor`, floating point value, a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to `0.001`. + beta_1: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 1st moment estimates. + Defaults to `0.9`. + beta_2: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 2nd moment estimates. + Defaults to `0.999`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. + Defaults to `1e-7`. + amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm + from the paper "On the Convergence of Adam and beyond". + Defaults to `False`. + {{base_optimizer_keyword_args}} + + Reference: + - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + - [Reddi et al., 2018]( + https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. + + Notes: + + The default value of 1e-7 for epsilon might not be a good default in + general. For example, when training an Inception network on ImageNet a + current good choice is 1.0 or 0.1. Note that since Adam uses the + formulation just before Section 2.1 of the Kingma and Ba paper rather than + the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon + hat" in the paper. + + The sparse implementation of this algorithm (used when the gradient is an + IndexedSlices object, typically because of `tf.gather` or an embedding + lookup in the forward pass) does apply momentum to variable slices even if + they were not used in the forward pass (meaning they have a gradient equal + to zero). Momentum decay (beta1) is also applied to the entire momentum + accumulator. This means that the sparse behavior is equivalent to the dense + behavior (in contrast to some momentum implementations which ignore momentum + unless a variable slice was actually used). + """ + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + amsgrad=False, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Adam", + **kwargs + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon + self.amsgrad = amsgrad + + def build(self, var_list): + """Initialize optimizer variables. + + Adam optimizer has 3 types of variables: momentums, velocities and + velocity_hat (only set when amsgrad is applied), + + Args: + var_list: list of model variables to build Adam variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + self._momentums = [] + self._velocities = [] + for var in var_list: + self._momentums.append( + self.add_variable_from_reference( + model_variable=var, variable_name="m" + ) + ) + self._velocities.append( + self.add_variable_from_reference( + model_variable=var, variable_name="v" + ) + ) + if self.amsgrad: + self._velocity_hats = [] + for var in var_list: + self._velocity_hats.append( + self.add_variable_from_reference( + model_variable=var, variable_name="vhat" + ) + ) + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + local_step = tf.cast(self.iterations + 1, variable.dtype) + beta_1_power = tf.pow(tf.cast(self.beta_1, variable.dtype), local_step) + beta_2_power = tf.pow(tf.cast(self.beta_2, variable.dtype), local_step) + + var_key = self._var_key(variable) + m = self._momentums[self._index_dict[var_key]] + v = self._velocities[self._index_dict[var_key]] + + alpha = lr * tf.sqrt(1 - beta_2_power) / (1 - beta_1_power) + + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients. + m.assign_add(-m * (1 - self.beta_1)) + m.scatter_add( + tf.IndexedSlices( + gradient.values * (1 - self.beta_1), gradient.indices + ) + ) + v.assign_add(-v * (1 - self.beta_2)) + v.scatter_add( + tf.IndexedSlices( + tf.square(gradient.values) * (1 - self.beta_2), + gradient.indices, + ) + ) + if self.amsgrad: + v_hat = self._velocity_hats[self._index_dict[var_key]] + v_hat.assign(tf.maximum(v_hat, v)) + v = v_hat + variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) + else: + # Dense gradients. + m.assign_add((gradient - m) * (1 - self.beta_1)) + v.assign_add((tf.square(gradient) - v) * (1 - self.beta_2)) + if self.amsgrad: + v_hat = self._velocity_hats[self._index_dict[var_key]] + v_hat.assign(tf.maximum(v_hat, v)) + v = v_hat + variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "beta_1": self.beta_1, + "beta_2": self.beta_2, + "epsilon": self.epsilon, + "amsgrad": self.amsgrad, + } + ) + return config + + +Adam.__doc__ = Adam.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/adamax.py b/keras/optimizers/adamax.py new file mode 100644 index 000000000000..9b542ee57860 --- /dev/null +++ b/keras/optimizers/adamax.py @@ -0,0 +1,188 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Adamax optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.Adamax", "keras.optimizers.Adamax", v1=[] +) +class Adamax(optimizer.Optimizer): + """Optimizer that implements the Adamax algorithm. + + Adamax, a variant of Adam based on the infinity norm, is a first-order + gradient-based optimization method. Due to its capability of adjusting the + learning rate based on data characteristics, it is suited to learn + time-variant process, e.g., speech data with dynamically changed noise + conditions. Default parameters follow those provided in the paper (see + references below). + + Initialization: + + ```python + m = 0 # Initialize initial 1st moment vector + u = 0 # Initialize the exponentially weighted infinity norm + t = 0 # Initialize timestep + ``` + + The update rule for parameter `w` with gradient `g` is described at the end + of section 7.1 of the paper (see the referenece section): + + ```python + t += 1 + m = beta1 * m + (1 - beta) * g + u = max(beta2 * u, abs(g)) + current_lr = learning_rate / (1 - beta1 ** t) + w = w - current_lr * m / (u + epsilon) + ``` + + Args: + learning_rate: A `tf.Tensor`, floating point value, a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to `0.001`. + beta_1: A float value or a constant float tensor. The exponential decay + rate for the 1st moment estimates. + beta_2: A float value or a constant float tensor. The exponential decay + rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + {{base_optimizer_keyword_args}} + + Reference: + - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + """ + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Adamax", + **kwargs + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon + + def build(self, var_list): + """Initialize optimizer variables. + + Adamax optimizer has 2 types of variables: momentums (denoted as m), + exponentially weighted infinity norm (denoted as u). + + Args: + var_list: list of model variables to build Adamax variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + self._m = [] + self._u = [] + for var in var_list: + self._m.append( + self.add_variable_from_reference( + model_variable=var, variable_name="m" + ) + ) + self._u.append( + self.add_variable_from_reference( + model_variable=var, variable_name="u" + ) + ) + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + local_step = tf.cast(self.iterations + 1, variable.dtype) + beta_1_power = tf.pow(tf.cast(self.beta_1, variable.dtype), local_step) + + var_key = self._var_key(variable) + m = self._m[self._index_dict[var_key]] + u = self._u[self._index_dict[var_key]] + + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients. + indices = gradient.indices + m.assign_add(-m * (1 - self.beta_1)) + m.scatter_add( + tf.IndexedSlices(gradient.values * (1 - self.beta_1), indices) + ) + u.assign(u * self.beta_2) + u_slice = tf.gather(u, indices) + u_slice_incremental = ( + tf.maximum(u_slice, tf.abs(gradient.values)) - u_slice + ) + u.scatter_add(tf.IndexedSlices(u_slice_incremental, indices)) + variable.assign_sub( + (lr * m) / ((1 - beta_1_power) * (u + self.epsilon)) + ) + else: + # Dense gradients. + m.assign_add((gradient - m) * (1 - self.beta_1)) + u.assign(tf.maximum(self.beta_2 * u, tf.abs(gradient))) + variable.assign_sub( + (lr * m) / ((1 - beta_1_power) * (u + self.epsilon)) + ) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "beta_1": self.beta_1, + "beta_2": self.beta_2, + "epsilon": self.epsilon, + } + ) + return config + + +Adamax.__doc__ = Adamax.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/adamw.py b/keras/optimizers/adamw.py new file mode 100644 index 000000000000..8ae5195b5872 --- /dev/null +++ b/keras/optimizers/adamw.py @@ -0,0 +1,230 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""AdamW optimizer implementation.""" + + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.AdamW", + "keras.optimizers.experimental.AdamW", + "keras.dtensor.experimental.optimizers.AdamW", + v1=[], +) +class AdamW(optimizer.Optimizer): + r"""Optimizer that implements the AdamW algorithm. + + AdamW optimization is a stochastic gradient descent method that is based on + adaptive estimation of first-order and second-order moments with an added + method to decay weights per the techniques discussed in the paper, + 'Decoupled Weight Decay Regularization' by + [Loshchilov, Hutter et al., 2019](https://arxiv.org/abs/1711.05101). + + According to + [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), + the underying Adam method is "*computationally + efficient, has little memory requirement, invariant to diagonal rescaling of + gradients, and is well suited for problems that are large in terms of + data/parameters*". + + Args: + learning_rate: A `tf.Tensor`, floating point value, a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to 0.001. + beta_1: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 1st moment estimates. + Defaults to 0.9. + beta_2: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 2nd moment estimates. + Defaults to 0.999. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. + Defaults to 1e-7. + amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm + from the paper "On the Convergence of Adam and beyond". + Defaults to `False`. + {{base_optimizer_keyword_args}} + + Reference: + - [Loshchilov et al., 2019](https://arxiv.org/abs/1711.05101) + - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) for `adam` + - [Reddi et al., 2018]( + https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. + + Notes: + + The sparse implementation of this algorithm (used when the gradient is an + IndexedSlices object, typically because of `tf.gather` or an embedding + lookup in the forward pass) does apply momentum to variable slices even if + they were not used in the forward pass (meaning they have a gradient equal + to zero). Momentum decay (beta1) is also applied to the entire momentum + accumulator. This means that the sparse behavior is equivalent to the dense + behavior (in contrast to some momentum implementations which ignore momentum + unless a variable slice was actually used). + """ + + def __init__( + self, + learning_rate=0.001, + weight_decay=0.004, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + amsgrad=False, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="AdamW", + **kwargs + ): + super().__init__( + name=name, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.weight_decay = weight_decay + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon + self.amsgrad = amsgrad + + if self.weight_decay is None: + raise ValueError( + "Missing value of `weight_decay` which is required and" + " must be a float value." + ) + + def build(self, var_list): + """Initialize optimizer variables. + + AdamW optimizer has 3 types of variables: momentums, velocities and + velocity_hat (only set when amsgrad is applied), + + Args: + var_list: list of model variables to build AdamW variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + self._momentums = [] + self._velocities = [] + for var in var_list: + self._momentums.append( + self.add_variable_from_reference( + model_variable=var, variable_name="m" + ) + ) + self._velocities.append( + self.add_variable_from_reference( + model_variable=var, variable_name="v" + ) + ) + if self.amsgrad: + self._velocity_hats = [] + for var in var_list: + self._velocity_hats.append( + self.add_variable_from_reference( + model_variable=var, variable_name="vhat" + ) + ) + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + local_step = tf.cast(self.iterations + 1, variable.dtype) + beta_1_power = tf.pow(tf.cast(self.beta_1, variable.dtype), local_step) + beta_2_power = tf.pow(tf.cast(self.beta_2, variable.dtype), local_step) + + var_key = self._var_key(variable) + m = self._momentums[self._index_dict[var_key]] + v = self._velocities[self._index_dict[var_key]] + + alpha = lr * tf.sqrt(1 - beta_2_power) / (1 - beta_1_power) + + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients. + m.assign_add(-m * (1 - self.beta_1)) + m.scatter_add( + tf.IndexedSlices( + gradient.values * (1 - self.beta_1), gradient.indices + ) + ) + v.assign_add(-v * (1 - self.beta_2)) + v.scatter_add( + tf.IndexedSlices( + tf.square(gradient.values) * (1 - self.beta_2), + gradient.indices, + ) + ) + if self.amsgrad: + v_hat = self._velocity_hats[self._index_dict[var_key]] + v_hat.assign(tf.maximum(v_hat, v)) + v = v_hat + variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) + else: + # Dense gradients. + m.assign_add((gradient - m) * (1 - self.beta_1)) + v.assign_add((tf.square(gradient) - v) * (1 - self.beta_2)) + if self.amsgrad: + v_hat = self._velocity_hats[self._index_dict[var_key]] + v_hat.assign(tf.maximum(v_hat, v)) + v = v_hat + variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "weight_decay": self.weight_decay, + "beta_1": self.beta_1, + "beta_2": self.beta_2, + "epsilon": self.epsilon, + "amsgrad": self.amsgrad, + } + ) + return config + + +AdamW.__doc__ = AdamW.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/ftrl.py b/keras/optimizers/ftrl.py new file mode 100644 index 000000000000..30f4db99c928 --- /dev/null +++ b/keras/optimizers/ftrl.py @@ -0,0 +1,258 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""FTRL optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.Ftrl", "keras.optimizers.Ftrl", v1=[] +) +class Ftrl(optimizer.Optimizer): + r"""Optimizer that implements the FTRL algorithm. + + "Follow The Regularized Leader" (FTRL) is an optimization algorithm + developed at Google for click-through rate prediction in the early 2010s. It + is most suitable for shallow models with large and sparse feature spaces. + The algorithm is described by + [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf). + The Keras version has support for both online L2 regularization + (the L2 regularization described in the paper + above) and shrinkage-type L2 regularization + (which is the addition of an L2 penalty to the loss function). + + Initialization: + + ```python + n = 0 + sigma = 0 + z = 0 + ``` + + Update rule for one variable `w`: + + ```python + prev_n = n + n = n + g ** 2 + sigma = (n ** -lr_power - prev_n ** -lr_power) / lr + z = z + g - sigma * w + if abs(z) < lambda_1: + w = 0 + else: + w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2) + ``` + + Notation: + + - `lr` is the learning rate + - `g` is the gradient for the variable + - `lambda_1` is the L1 regularization strength + - `lambda_2` is the L2 regularization strength + - `lr_power` is the power to scale n. + + Check the documentation for the `l2_shrinkage_regularization_strength` + parameter for more details when shrinkage is enabled, in which case gradient + is replaced with a gradient with shrinkage. + + Args: + learning_rate: A `Tensor`, floating point value, a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to `0.001`. + learning_rate_power: A float value, must be less or equal to zero. + Controls how the learning rate decreases during training. Use zero + for a fixed learning rate. + initial_accumulator_value: The starting value for accumulators. Only + zero or positive values are allowed. + l1_regularization_strength: A float value, must be greater than or equal + to zero. Defaults to `0.0`. + l2_regularization_strength: A float value, must be greater than or equal + to zero. Defaults to `0.0`. + l2_shrinkage_regularization_strength: A float value, must be greater + than or equal to zero. This differs from L2 above in that the L2 + above is a stabilization penalty, whereas this L2 shrinkage is a + magnitude penalty. When input is sparse shrinkage will only happen + on the active weights. + beta: A float value, representing the beta value from the paper. + Defaults to 0.0. + {{base_optimizer_keyword_args}} + """ + + def __init__( + self, + learning_rate=0.001, + learning_rate_power=-0.5, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + l2_shrinkage_regularization_strength=0.0, + beta=0.0, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Ftrl", + **kwargs, + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs, + ) + + if initial_accumulator_value < 0.0: + raise ValueError( + "`initial_accumulator_value` needs to be positive or zero. " + "Received: initial_accumulator_value=" + f"{initial_accumulator_value}." + ) + if learning_rate_power > 0.0: + raise ValueError( + "`learning_rate_power` needs to be negative or zero. Received: " + f"learning_rate_power={learning_rate_power}." + ) + if l1_regularization_strength < 0.0: + raise ValueError( + "`l1_regularization_strength` needs to be positive or zero. " + "Received: l1_regularization_strength=" + f"{l1_regularization_strength}." + ) + if l2_regularization_strength < 0.0: + raise ValueError( + "`l2_regularization_strength` needs to be positive or zero. " + "Received: l2_regularization_strength=" + f"{l2_regularization_strength}." + ) + if l2_shrinkage_regularization_strength < 0.0: + raise ValueError( + "`l2_shrinkage_regularization_strength` needs to be positive " + "or zero. Received: l2_shrinkage_regularization_strength" + f"={l2_shrinkage_regularization_strength}." + ) + + self._learning_rate = self._build_learning_rate(learning_rate) + self.learning_rate_power = learning_rate_power + self.initial_accumulator_value = initial_accumulator_value + self.l1_regularization_strength = l1_regularization_strength + self.l2_regularization_strength = l2_regularization_strength + self.l2_shrinkage_regularization_strength = ( + l2_shrinkage_regularization_strength + ) + self.beta = beta + + def build(self, var_list): + """Initialize optimizer variables. + + Args: + var_list: list of model variables to build Ftrl variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._accumulators = [] + self._linears = [] + for var in var_list: + self._accumulators.append( + self.add_variable_from_reference( + model_variable=var, + variable_name="accumulator", + initial_value=tf.cast( + tf.fill( + dims=var.shape, value=self.initial_accumulator_value + ), + dtype=var.dtype, + ), + ) + ) + self._linears.append( + self.add_variable_from_reference( + model_variable=var, variable_name="linear" + ) + ) + self._built = True + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + + lr = tf.cast(self.learning_rate, variable.dtype) + var_key = self._var_key(variable) + accum = self._accumulators[self._index_dict[var_key]] + linear = self._linears[self._index_dict[var_key]] + + lr_power = self.learning_rate_power + l2_reg = self.l2_regularization_strength + l2_reg = l2_reg + self.beta / (2.0 * lr) + + # Ftrl optimizer has the same implementation for sparse and dense + # gradients update. + grad_to_use = ( + gradient + 2 * self.l2_shrinkage_regularization_strength * variable + ) + new_accum = accum + tf.pow(gradient, 2) + linear.assign_add( + grad_to_use + - (tf.pow(new_accum, -lr_power) - tf.pow(accum, -lr_power)) + / lr + * variable + ) + quadratic = tf.pow(new_accum, (-lr_power)) / lr + 2 * l2_reg + linear_clipped = tf.clip_by_value( + linear, + -self.l1_regularization_strength, + self.l1_regularization_strength, + ) + variable.assign((linear_clipped - linear) / quadratic) + accum.assign(new_accum) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "learning_rate_power": self.learning_rate_power, + "initial_accumulator_value": self.initial_accumulator_value, + "l1_regularization_strength": self.l1_regularization_strength, + "l2_regularization_strength": self.l2_regularization_strength, + "l2_shrinkage_regularization_strength": self.l2_shrinkage_regularization_strength, # noqa: E501 + "beta": self.beta, + } + ) + return config + + +Ftrl.__doc__ = Ftrl.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/legacy/BUILD b/keras/optimizers/legacy/BUILD index bc2d850fcfa1..ee714565e0ff 100644 --- a/keras/optimizers/legacy/BUILD +++ b/keras/optimizers/legacy/BUILD @@ -1,42 +1,163 @@ -# Legacy Keras optimizers. +# Description: +# Contains the Keras OptimizerV2 API (internal TensorFlow version). + +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], + # TODO(scottzhu): Remove non-keras deps from TF. default_visibility = [ "//keras:friends", + "//third_party/tensorflow/cc/saved_model:__pkg__", # For unit tests. + "//third_party/tensorflow/python/trackable:__pkg__", ], licenses = ["notice"], ) py_library( - name = "optimizer", + name = "optimizers", srcs = [ "adadelta.py", "adagrad.py", "adam.py", "adamax.py", "ftrl.py", + "gradient_descent.py", "nadam.py", - "optimizer.py", + "optimizer_v2.py", "rmsprop.py", - "sgd.py", ], srcs_version = "PY3", deps = [ "//:expect_tensorflow_installed", - "//keras/optimizers/optimizer_v2", + "//keras:backend", + "//keras:backend_config", + "//keras/engine:base_layer_utils", + "//keras/initializers", + "//keras/optimizers:utils", + "//keras/optimizers/schedules:learning_rate_schedule", + "//keras/utils:layer_utils", + "//keras/utils:tf_utils", + ], +) + +cuda_py_test( + name = "adagrad_test", + size = "medium", + srcs = ["adagrad_test.py"], + shard_count = 4, + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + ], +) + +cuda_py_test( + name = "adam_test", + size = "medium", + srcs = ["adam_test.py"], + shard_count = 4, + tags = [ + "no_rocm", + "no_windows", # TODO(b/171384138) + ], + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + ], +) + +cuda_py_test( + name = "adamax_test", + size = "medium", + srcs = ["adamax_test.py"], + shard_count = 4, + # TODO(b/168527439): invalid resource variable reference on GPU for TFRT. + tags = ["no_rocm"], + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + ], +) + +cuda_py_test( + name = "adadelta_test", + size = "medium", + srcs = ["adadelta_test.py"], + shard_count = 4, + # TODO(b/168527439): invalid resource variable reference on GPU for TFRT. + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", ], ) cuda_py_test( - name = "optimizer_test", + name = "ftrl_test", size = "medium", - srcs = ["optimizer_test.py"], + srcs = ["ftrl_test.py"], + shard_count = 4, + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + ], +) + +cuda_py_test( + name = "gradient_descent_test", + size = "medium", + srcs = ["gradient_descent_test.py"], + shard_count = 4, + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + ], +) + +cuda_py_test( + name = "nadam_test", + size = "medium", + srcs = ["nadam_test.py"], + shard_count = 4, + deps = [ + ":optimizers", + "//:expect_tensorflow_installed", + ], +) + +cuda_py_test( + name = "optimizer_v2_test", + size = "medium", + srcs = ["optimizer_v2_test.py"], shard_count = 8, + tags = [ + "no_windows", + ], deps = [ - ":optimizer", + ":optimizers", "//:expect_absl_installed", "//:expect_tensorflow_installed", "//keras", + "//keras/testing_infra:test_combinations", + ], +) + +cuda_py_test( + name = "rmsprop_test", + size = "medium", + srcs = ["rmsprop_test.py"], + shard_count = 2, + # TODO(b/168527439): invalid resource variable reference on GPU for TFRT. + deps = [ + ":optimizers", + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", ], ) diff --git a/keras/optimizers/legacy/__init__.py b/keras/optimizers/legacy/__init__.py index 144c69218e11..78cb171abbaf 100644 --- a/keras/optimizers/legacy/__init__.py +++ b/keras/optimizers/legacy/__init__.py @@ -12,13 +12,3 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy optimizer package.""" - -from keras.optimizers.legacy.adadelta import Adadelta -from keras.optimizers.legacy.adagrad import Adagrad -from keras.optimizers.legacy.adam import Adam -from keras.optimizers.legacy.adamax import Adamax -from keras.optimizers.legacy.ftrl import Ftrl -from keras.optimizers.legacy.nadam import Nadam -from keras.optimizers.legacy.rmsprop import RMSprop -from keras.optimizers.legacy.sgd import SGD diff --git a/keras/optimizers/legacy/adadelta.py b/keras/optimizers/legacy/adadelta.py index b803159d1fb9..9310a9bfcfd5 100644 --- a/keras/optimizers/legacy/adadelta.py +++ b/keras/optimizers/legacy/adadelta.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,153 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Adadelta optimizer implementation.""" +"""Adadelta optimizer implementation.""" -from keras.optimizers.optimizer_v2 import adadelta +import numpy as np +import tensorflow.compat.v2 as tf +from keras import backend_config +from keras.optimizers.legacy import optimizer_v2 + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Adadelta') -class Adadelta(adadelta.Adadelta): - pass +@keras_export( + "keras.optimizers.legacy.Adadelta", + v1=["keras.optimizers.Adadelta", "keras.optimizers.legacy.Adadelta"], +) +class Adadelta(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the Adadelta algorithm. + + Adadelta optimization is a stochastic gradient descent method that is based + on adaptive learning rate per dimension to address two drawbacks: + + - The continual decay of learning rates throughout training. + - The need for a manually selected global learning rate. + + Adadelta is a more robust extension of Adagrad that adapts learning rates + based on a moving window of gradient updates, instead of accumulating all + past gradients. This way, Adadelta continues learning even when many updates + have been done. Compared to Adagrad, in the original version of Adadelta you + don't have to set an initial learning rate. In this version, the initial + learning rate can be set, as in most other Keras optimizers. + + Args: + learning_rate: Initial value for the learning rate: + either a floating point value, + or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. + Note that `Adadelta` tends to benefit from higher initial learning rate + values compared to other optimizers. + To match the exact form in the original paper, use 1.0. + Defaults to `0.001`. + rho: A `Tensor` or a floating point value. The decay rate. + epsilon: Small floating point value used to maintain numerical stability. + name: Optional name prefix for the operations created when applying + gradients. Defaults to `"Adadelta"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Reference: + - [Zeiler, 2012](http://arxiv.org/abs/1212.5701) + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + rho=0.95, + epsilon=1e-7, + name="Adadelta", + **kwargs + ): + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("rho", rho) + self.epsilon = epsilon or backend_config.epsilon() + + def _create_slots(self, var_list): + # Separate for-loops to respect the ordering of slot variables from v1. + for v in var_list: + self.add_slot(v, "accum_grad") + for v in var_list: + self.add_slot(v, "accum_var") + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + apply_state[(var_device, var_dtype)].update( + dict( + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + rho=tf.identity(self._get_hyper("rho", var_dtype)), + ) + ) + + def set_weights(self, weights): + params = self.weights + # Override set_weights for backward compatibility of Keras V1 optimizer + # since it does not include iteration at head of the weight list. Set + # iteration to 0. + if len(params) == len(weights) + 1: + weights = [np.array(0)] + weights + super().set_weights(weights) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + accum_grad = self.get_slot(var, "accum_grad") + accum_var = self.get_slot(var, "accum_var") + return tf.raw_ops.ResourceApplyAdadelta( + var=var.handle, + accum=accum_grad.handle, + accum_update=accum_var.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + accum_grad = self.get_slot(var, "accum_grad") + accum_var = self.get_slot(var, "accum_var") + return tf.raw_ops.ResourceSparseApplyAdadelta( + var=var.handle, + accum=accum_grad.handle, + accum_update=accum_var.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + epsilon=coefficients["epsilon"], + grad=grad, + indices=indices, + use_locking=self._use_locking, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "rho": self._serialize_hyperparameter("rho"), + "epsilon": self.epsilon, + } + ) + return config diff --git a/keras/optimizers/legacy/adadelta_test.py b/keras/optimizers/legacy/adadelta_test.py new file mode 100644 index 000000000000..b9d8937b266f --- /dev/null +++ b/keras/optimizers/legacy/adadelta_test.py @@ -0,0 +1,223 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Adadelta Optimizer.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.optimizers.legacy import adadelta +from keras.testing_infra import test_combinations + +_DATA_TYPES = [tf.half, tf.float32, tf.float64, tf.complex64, tf.complex128] + + +class AdadeltaOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def doTestBasic(self, use_resource=False, use_callable_params=False): + num_updates = 4 # number of ADADELTA steps to perform + for dtype in _DATA_TYPES: + for grad in [0.2, 0.1, 0.01]: + for lr in [1.0, 0.5, 0.1]: + var0_init = [1.0, 2.0] + var1_init = [3.0, 4.0] + if use_resource: + var0 = tf.Variable(var0_init, dtype=dtype) + var1 = tf.Variable(var1_init, dtype=dtype) + else: + var0 = tf.Variable(var0_init, dtype=dtype) + var1 = tf.Variable(var1_init, dtype=dtype) + + grads = tf.constant([grad, grad], dtype=dtype) + + accum = 0.0 + accum_update = 0.0 + + # ADADELTA gradient optimizer + rho = 0.95 + epsilon = 1e-8 + if use_callable_params: + adadelta_opt = adadelta.Adadelta( + learning_rate=lambda: lr, + rho=lambda: rho, + epsilon=epsilon, + ) + else: + adadelta_opt = adadelta.Adadelta( + learning_rate=lr, rho=rho, epsilon=epsilon + ) + if not tf.executing_eagerly(): + adadelta_update = adadelta_opt.apply_gradients( + zip([grads, grads], [var0, var1]) + ) + self.evaluate( + tf.compat.v1.global_variables_initializer() + ) + + # Assign slots + slot = [None] * 2 + slot_update = [None] * 2 + slot[0] = adadelta_opt.get_slot(var0, "accum_grad") + self.assertEqual(slot[0].shape, var0.shape) + + slot_update[0] = adadelta_opt.get_slot( + var0, "accum_var" + ) + self.assertEqual(slot_update[0].shape, var0.shape) + + slot[1] = adadelta_opt.get_slot(var1, "accum_grad") + self.assertEqual(slot[1].shape, var1.shape) + + slot_update[1] = adadelta_opt.get_slot( + var1, "accum_var" + ) + self.assertEqual(slot_update[1].shape, var1.shape) + + # Fetch params to validate initial values + self.assertAllClose(var0_init, self.evaluate(var0)) + self.assertAllClose(var1_init, self.evaluate(var1)) + + update = [None] * num_updates + tot_update = 0 + for step in range(num_updates): + # Run adadelta update for comparison + if not tf.executing_eagerly(): + self.evaluate(adadelta_update) + else: + adadelta_opt.apply_gradients( + zip([grads, grads], [var0, var1]) + ) + + # Perform initial update without previous accum values + accum = accum * rho + (grad**2) * (1 - rho) + update[step] = ( + np.sqrt(accum_update + epsilon) + * (1.0 / np.sqrt(accum + epsilon)) + * grad + ) + accum_update = accum_update * rho + ( + update[step] ** 2 + ) * (1.0 - rho) + tot_update += update[step] * lr + + if not tf.executing_eagerly(): + # Check that the accumulators have been updated + # TODO(lxuechen): This is hard to test in eager mode + for slot_idx in range(2): + self.assertAllCloseAccordingToType( + np.array( + [accum, accum], + dtype=dtype.as_numpy_dtype(0), + ), + self.evaluate(slot[slot_idx]), + rtol=1e-5, + ) + + self.assertAllCloseAccordingToType( + np.array( + [accum_update, accum_update], + dtype=dtype.as_numpy_dtype(0), + ), + self.evaluate(slot_update[slot_idx]), + rtol=1e-5, + ) + + # Check that the parameters have been updated + self.assertAllCloseAccordingToType( + np.array( + [ + var0_init[0] - tot_update, + var0_init[1] - tot_update, + ], + dtype=dtype.as_numpy_dtype(0), + ), + self.evaluate(var0), + rtol=1e-5, + ) + + self.assertAllCloseAccordingToType( + np.array( + [ + var1_init[0] - tot_update, + var1_init[1] - tot_update, + ], + dtype=dtype.as_numpy_dtype(0), + ), + self.evaluate(var1), + rtol=1e-5, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testBasicCallableParams(self): + self.doTestBasic(use_resource=True, use_callable_params=True) + + def testMinimizeSparseResourceVariable(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + def loss(): + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + return pred * pred + + sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize( + loss, var_list=[var0] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllCloseAccordingToType( + [[1.0, 2.0]], self.evaluate(var0) + ) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], self.evaluate(var0) + ) + + def testConstructAdadeltaWithLR(self): + opt = adadelta.Adadelta(lr=1.0, rho=0.9, epsilon=1.0) + opt_2 = adadelta.Adadelta( + learning_rate=0.1, rho=0.9, epsilon=1.0, lr=1.0 + ) + opt_3 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1.0) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + def testConstructAdadeltaWithEpsilonValues(self): + opt = adadelta.Adadelta(epsilon=None) + self.assertEqual(opt.epsilon, 1e-7) + + opt = adadelta.Adadelta(epsilon=1e-8) + self.assertEqual(opt.epsilon, 1e-8) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/adagrad.py b/keras/optimizers/legacy/adagrad.py index 895ed7d9aa7c..4b130051416d 100644 --- a/keras/optimizers/legacy/adagrad.py +++ b/keras/optimizers/legacy/adagrad.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,174 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Adagrad optimizer implementation.""" +"""Adagrad optimizer implementation.""" -from keras.optimizers.optimizer_v2 import adagrad +import numpy as np +import tensorflow.compat.v2 as tf +from keras import backend_config +from keras.optimizers.legacy import optimizer_v2 + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Adagrad') -class Adagrad(adagrad.Adagrad): - pass +@keras_export( + "keras.optimizers.legacy.Adagrad", + v1=["keras.optimizers.Adagrad", "keras.optimizers.legacy.Adagrad"], +) +class Adagrad(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the Adagrad algorithm. + + Adagrad is an optimizer with parameter-specific learning rates, + which are adapted relative to how frequently a parameter gets + updated during training. The more updates a parameter receives, + the smaller the updates. + + Args: + learning_rate: Initial value for the learning rate: + either a floating point value, + or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. + Note that `Adagrad` tends to benefit from higher initial learning rate + values compared to other optimizers. + To match the exact form in the original paper, use 1.0. + Defaults to `0.001`. + initial_accumulator_value: Floating point value. + Starting value for the accumulators (per-parameter momentum values). + Must be non-negative. + epsilon: Small floating point value used to maintain numerical stability. + name: Optional name prefix for the operations created when applying + gradients. Defaults to `"Adagrad"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value.. + + Reference: + - [Duchi et al., 2011]( + http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + initial_accumulator_value=0.1, + epsilon=1e-7, + name="Adagrad", + **kwargs + ): + if initial_accumulator_value < 0.0: + raise ValueError( + "initial_accumulator_value must be non-negative: %s" + % initial_accumulator_value + ) + if epsilon is None: + epsilon = backend_config.epsilon() + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._initial_accumulator_value = initial_accumulator_value + self.epsilon = epsilon or backend_config.epsilon() + + def _create_slots(self, var_list): + for var in var_list: + dtype = var.dtype.base_dtype + init = tf.compat.v1.constant_initializer( + self._initial_accumulator_value, dtype=dtype + ) + self.add_slot(var, "accumulator", init) + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + apply_state[(var_device, var_dtype)].update( + dict( + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + neg_lr_t=-apply_state[(var_device, var_dtype)]["lr_t"], + zero=tf.zeros((), dtype=tf.int64), + ) + ) + + def set_weights(self, weights): + params = self.weights + # Override set_weights for backward compatibility of Keras V1 optimizer + # since it does not include iteration at head of the weight list. Set + # iteration to 0. + if len(params) == len(weights) + 1: + weights = [np.array(0)] + weights + super().set_weights(weights) + + @classmethod + def from_config(cls, config, custom_objects=None): + """Creates an optimizer from its config. + + This method is the reverse of `get_config`, + capable of instantiating the same optimizer from the config + dictionary. + + Args: + config: A Python dictionary, typically the output of get_config. + custom_objects: A Python dictionary mapping names to additional + Python objects used to create this optimizer, such as a function + used for a hyperparameter. + + Returns: + An optimizer instance. + """ + if "initial_accumulator_value" not in config: + config["initial_accumulator_value"] = 0.1 + if "lr" in config: + config["learning_rate"] = config.pop("lr") + return cls(**config) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + acc = self.get_slot(var, "accumulator") + return tf.raw_ops.ResourceApplyAdagradV2( + var=var.handle, + accum=acc.handle, + lr=coefficients["lr_t"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + acc = self.get_slot(var, "accumulator") + return tf.raw_ops.ResourceSparseApplyAdagradV2( + var=var.handle, + accum=acc.handle, + lr=coefficients["lr_t"], + epsilon=coefficients["epsilon"], + grad=grad, + indices=indices, + use_locking=self._use_locking, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "initial_accumulator_value": self._initial_accumulator_value, + "epsilon": self.epsilon, + } + ) + return config diff --git a/keras/optimizers/legacy/adagrad_test.py b/keras/optimizers/legacy/adagrad_test.py new file mode 100644 index 000000000000..221883aa3f49 --- /dev/null +++ b/keras/optimizers/legacy/adagrad_test.py @@ -0,0 +1,618 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for aggregate operations.""" + +import copy + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.optimizers.legacy import adagrad +from keras.optimizers.schedules import learning_rate_schedule +from keras.testing_infra import test_combinations + +_DATA_TYPES = [tf.half, tf.float32, tf.float64, tf.complex64, tf.complex128] + + +def adagrad_update_numpy(param, accum, g_t, lr=0.001, epsilon=1e-7): + accum_t = accum + g_t * g_t + param_t = param - lr * g_t / (np.sqrt(accum_t) + epsilon) + return param_t, accum_t + + +def sparse_adagrad_update_numpy( + param, accum, gindexs, gvalues, lr=0.001, epsilon=1e-7 +): + accum_t = copy.deepcopy(accum) + param_t = copy.deepcopy(param) + # first loop accumulates repeated indices if necessary. + for i in range(len(gindexs)): + gindex = gindexs[i] + gvalue = gvalues[i] + accum_t[gindex] = accum_t[gindex] + gvalue * gvalue + for i in range(len(gindexs)): + gindex = gindexs[i] + gvalue = gvalues[i] + param_t[gindex] = param_t[gindex] - lr * gvalue / ( + np.sqrt(accum_t[gindex]) + epsilon + ) + return param_t, accum_t + + +class AdagradOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def doTestBasic(self, use_callable_params=False): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = lambda: 3.0 + if not use_callable_params: + learning_rate = learning_rate() + + ada_opt = adagrad.Adagrad(learning_rate) + + accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + + if not tf.executing_eagerly(): + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllClose([1.0, 2.0], v0_val) + self.assertAllClose([3.0, 4.0], v1_val) + + # Run 3 steps of adagrad + for _ in range(3): + if not tf.executing_eagerly(): + self.evaluate(ada_update) + else: + ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + var0_np, accum0_np = adagrad_update_numpy( + var0_np, accum0_np, grads0_np, 3.0 + ) + var1_np, accum1_np = adagrad_update_numpy( + var1_np, accum1_np, grads1_np, 3.0 + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasic(self): + self.doTestBasic() + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testBasicCallableParams(self): + self.doTestBasic(use_callable_params=True) + + def testBasicWithLearningRateDecay(self): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 3.0 + decay = 0.5 + + ada_opt = adagrad.Adagrad(learning_rate, decay=decay) + + accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + + if not tf.executing_eagerly(): + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllClose([1.0, 2.0], v0_val) + self.assertAllClose([3.0, 4.0], v1_val) + + # Run 3 steps of adagrad + for t in range(3): + if not tf.executing_eagerly(): + self.evaluate(ada_update) + else: + ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + lr_np = learning_rate / (1 + decay * t) + var0_np, accum0_np = adagrad_update_numpy( + var0_np, accum0_np, grads0_np, lr_np + ) + var1_np, accum1_np = adagrad_update_numpy( + var1_np, accum1_np, grads1_np, lr_np + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testBasicWithLargeEpsilon(self): + var0_np = np.array([1.0, 2.0]) + var1_np = np.array([3.0, 4.0]) + grads0_np = np.array([0.1, 0.1]) + grads1_np = np.array([0.01, 0.01]) + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 3.0 + + ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) + + accum0_np = np.array([0.1, 0.1]) + accum1_np = np.array([0.1, 0.1]) + + if not tf.executing_eagerly(): + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllClose([1.0, 2.0], v0_val) + self.assertAllClose([3.0, 4.0], v1_val) + + # Run 3 steps of adagrad + for _ in range(3): + if not tf.executing_eagerly(): + self.evaluate(ada_update) + else: + ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + var0_np, accum0_np = adagrad_update_numpy( + var0_np, accum0_np, grads0_np, 3.0, 1.0 + ) + var1_np, accum1_np = adagrad_update_numpy( + var1_np, accum1_np, grads1_np, 3.0, 1.0 + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testBasicWithLearningRateInverseTimeDecay(self): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 3.0 + decay = 0.5 + lr_schedule = learning_rate_schedule.InverseTimeDecay( + learning_rate, decay_steps=1.0, decay_rate=decay + ) + + ada_opt = adagrad.Adagrad(lr_schedule) + + accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + + if not tf.executing_eagerly(): + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllClose([1.0, 2.0], v0_val) + self.assertAllClose([3.0, 4.0], v1_val) + + # Run 3 steps of adagrad + for t in range(3): + if not tf.executing_eagerly(): + self.evaluate(ada_update) + else: + ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + lr_np = learning_rate / (1 + decay * t) + var0_np, accum0_np = adagrad_update_numpy( + var0_np, accum0_np, grads0_np, lr_np + ) + var1_np, accum1_np = adagrad_update_numpy( + var1_np, accum1_np, grads1_np, lr_np + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testMinimizeSparseResourceVariable(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + def loss(): + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + return pred * pred + + sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllCloseAccordingToType( + [[1.0, 2.0], [3.0, 4.0]], self.evaluate(var0) + ) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1], [3, 4]], self.evaluate(var0), atol=0.01 + ) + + def testTensorLearningRate(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = tf.constant(3.0) + ada_opt = adagrad.Adagrad(learning_rate) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + # Run 3 steps of adagrad + for _ in range(3): + self.evaluate(ada_update) + var0_np, accum0_np = adagrad_update_numpy( + var0_np, accum0_np, grads0_np, learning_rate + ) + var1_np, accum1_np = adagrad_update_numpy( + var1_np, accum1_np, grads1_np, learning_rate + ) + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testSparseBasic(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array( + [0.01, 0, 0.01], dtype=dtype.as_numpy_dtype + ) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0_np_indices = np.array([0, 2], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np[grads0_np_indices]), + tf.constant(grads0_np_indices), + tf.constant([3]), + ) + grads1_np_indices = np.array([0, 2], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np[grads1_np_indices]), + tf.constant(grads1_np_indices), + tf.constant([3]), + ) + learning_rate = 3.0 + ada_opt = adagrad.Adagrad(learning_rate) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) + + accum0_np = np.array( + [0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype + ) + accum1_np = np.array( + [0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype + ) + + # Run 3 step of sgd + for _ in range(3): + self.evaluate(ada_update) + + var0_np, accum0_np = sparse_adagrad_update_numpy( + var0_np, + accum0_np, + grads0_np_indices, + grads0_np[grads0_np_indices], + learning_rate, + ) + var1_np, accum1_np = sparse_adagrad_update_numpy( + var1_np, + accum1_np, + grads1_np_indices, + grads1_np[grads1_np_indices], + learning_rate, + ) + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testSparseSingleVarDim(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + grads0_np_indices = np.array([0], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np[grads0_np_indices]), + tf.constant(grads0_np_indices), + tf.constant([3]), + ) + learning_rate = 3.0 + ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) + ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0], self.evaluate(var0)) + + accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) + + # Run 3 step of sgd + for _ in range(3): + self.evaluate(ada_update) + + var0_np, accum0_np = sparse_adagrad_update_numpy( + var0_np, + accum0_np, + grads0_np_indices, + grads0_np[grads0_np_indices], + learning_rate, + epsilon=1.0, + ) + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + + def testSparseRepeatedIndices(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) + + repeated_index_update_var = tf.Variable(var_np, dtype=dtype) + aggregated_update_var = tf.Variable(var_np, dtype=dtype) + grad_repeated_index = tf.IndexedSlices( + tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + tf.constant([1, 1]), + tf.constant([2, 1]), + ) + grad_aggregated = tf.IndexedSlices( + tf.constant([0.2], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + repeated_update = adagrad.Adagrad(3.0).apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + aggregated_update = adagrad.Adagrad(3.0).apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose( + self.evaluate(aggregated_update_var), + self.evaluate(repeated_index_update_var), + ) + for _ in range(3): + self.evaluate(repeated_update) + self.evaluate(aggregated_update) + self.assertAllClose( + self.evaluate(aggregated_update_var), + self.evaluate(repeated_index_update_var), + ) + + def testSparseRepeatedIndicesByEmbeddingLookUp(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var_repeated = tf.Variable([1.0, 2.0], dtype=dtype) + loss_repeated = lambda: tf.reduce_sum( + tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0]) + ) + var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype) + loss_aggregated = lambda: 2 * tf.reduce_sum( + tf.compat.v1.nn.embedding_lookup(var_aggregated, [0]) + ) + update_op_repeated = adagrad.Adagrad(2.0).minimize( + loss_repeated, var_list=[var_repeated] + ) + update_op_aggregated = adagrad.Adagrad(2.0).minimize( + loss_aggregated, var_list=[var_aggregated] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllCloseAccordingToType( + self.evaluate(var_repeated), self.evaluate(var_aggregated) + ) + for _ in range(3): + self.evaluate(update_op_repeated) + self.evaluate(update_op_aggregated) + self.assertAllCloseAccordingToType( + self.evaluate(var_repeated), + self.evaluate(var_aggregated), + ) + + def testSparseStability(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half]: + shape = [1, 6] + var0_np = np.array( + [ + [ + 0.00872496, + -0.106952, + 0.110467, + 0.226505, + -0.0147257, + -0.0105945, + ] + ], + dtype=dtype.as_numpy_dtype, + ) + var0 = tf.Variable(var0_np) + grads0_np = np.array( + [ + [ + -5.91278e-05, + 5.31673e-05, + -2.5779e-06, + 4.29153e-05, + -8.4877e-05, + -9.48906e-05, + ] + ], + dtype=dtype.as_numpy_dtype, + ) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np), tf.constant([0]), tf.constant(shape) + ) + ada_opt = adagrad.Adagrad(1.0) + ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) + slot0 = ada_opt.get_slot(var0, "accumulator") + init = tf.compat.v1.global_variables_initializer() + for _ in range(100): + self.evaluate(init) + self.evaluate(ada_update) + self.assertAllCloseAccordingToType( + np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), + self.evaluate(slot0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + [ + 0.00891194, + -0.10712013, + 0.11047515, + 0.22636929, + -0.0144573, + -0.01029443, + ] + ] + ), + self.evaluate(var0), + ) + + def testSharing(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 3.0 + ada_opt = adagrad.Adagrad(learning_rate) + # Apply the optimizer twice. Both applications will use + # the same accums. + ada_update1 = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + ada_update2 = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + slot0 = ada_opt.get_slot(var0, "accumulator") + self.assertEqual(slot0.shape, var0.shape) + slot1 = ada_opt.get_slot(var1, "accumulator") + self.assertEqual(slot1.shape, var1.shape) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values. + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Mix the first and the second adagrad for 3 steps. + self.evaluate(ada_update1) + self.evaluate(ada_update2) + self.evaluate(ada_update1) + + accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + for _ in range(3): + var0_np, accum0_np = adagrad_update_numpy( + var0_np, accum0_np, grads0_np, learning_rate + ) + var1_np, accum1_np = adagrad_update_numpy( + var1_np, accum1_np, grads1_np, learning_rate + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testConstructAdagradWithLR(self): + opt = adagrad.Adagrad(lr=1.0) + opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0) + opt_3 = adagrad.Adagrad(learning_rate=0.1) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/adam.py b/keras/optimizers/legacy/adam.py index 338470721b7f..fecc337c4c52 100644 --- a/keras/optimizers/legacy/adam.py +++ b/keras/optimizers/legacy/adam.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,515 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Adam optimizer implementation.""" +"""Adam optimizer implementation.""" -from keras.optimizers.optimizer_v2 import adam +import tensorflow.compat.v2 as tf +from keras import backend_config +from keras.optimizers.legacy import optimizer_v2 + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Adam') -class Adam(adam.Adam): - pass +@keras_export( + "keras.optimizers.legacy.Adam", + v1=["keras.optimizers.Adam", "keras.optimizers.legacy.Adam"], +) +class Adam(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the Adam algorithm. + + Adam optimization is a stochastic gradient descent method that is based on + adaptive estimation of first-order and second-order moments. + + According to + [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), + the method is "*computationally + efficient, has little memory requirement, invariant to diagonal rescaling of + gradients, and is well suited for problems that are large in terms of + data/parameters*". + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use, The + learning rate. Defaults to `0.001`. + beta_1: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 1st moment estimates. Defaults to `0.9`. + beta_2: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use, The + exponential decay rate for the 2nd moment estimates. Defaults to + `0.999`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to + `1e-7`. + amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from + the paper "On the Convergence of Adam and beyond". Defaults to `False`. + name: Optional name for the operations created when applying gradients. + Defaults to `"Adam"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Usage: + + >>> opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.1) + >>> var1 = tf.Variable(10.0) + >>> loss = lambda: (var1 ** 2)/2.0 # d(loss)/d(var1) == var1 + >>> step_count = opt.minimize(loss, [var1]).numpy() + >>> # The first step is `-learning_rate*sign(grad)` + >>> var1.numpy() + 9.9 + + Reference: + - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + - [Reddi et al., 2018]( + https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. + + Notes: + + The default value of 1e-7 for epsilon might not be a good default in + general. For example, when training an Inception network on ImageNet a + current good choice is 1.0 or 0.1. Note that since Adam uses the + formulation just before Section 2.1 of the Kingma and Ba paper rather than + the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon + hat" in the paper. + + The sparse implementation of this algorithm (used when the gradient is an + IndexedSlices object, typically because of `tf.gather` or an embedding + lookup in the forward pass) does apply momentum to variable slices even if + they were not used in the forward pass (meaning they have a gradient equal + to zero). Momentum decay (beta1) is also applied to the entire momentum + accumulator. This means that the sparse behavior is equivalent to the dense + behavior (in contrast to some momentum implementations which ignore momentum + unless a variable slice was actually used). + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + amsgrad=False, + name="Adam", + **kwargs + ): + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("beta_1", beta_1) + self._set_hyper("beta_2", beta_2) + self.epsilon = epsilon or backend_config.epsilon() + self.amsgrad = amsgrad + + def _create_slots(self, var_list): + # Create slots for the first and second moments. + # Separate for-loops to respect the ordering of slot variables from v1. + for var in var_list: + self.add_slot(var, "m") + for var in var_list: + self.add_slot(var, "v") + if self.amsgrad: + for var in var_list: + self.add_slot(var, "vhat") + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + + local_step = tf.cast(self.iterations + 1, var_dtype) + beta_1_t = tf.identity(self._get_hyper("beta_1", var_dtype)) + beta_2_t = tf.identity(self._get_hyper("beta_2", var_dtype)) + beta_1_power = tf.pow(beta_1_t, local_step) + beta_2_power = tf.pow(beta_2_t, local_step) + lr = apply_state[(var_device, var_dtype)]["lr_t"] * ( + tf.sqrt(1 - beta_2_power) / (1 - beta_1_power) + ) + apply_state[(var_device, var_dtype)].update( + dict( + lr=lr, + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + beta_1_t=beta_1_t, + beta_1_power=beta_1_power, + one_minus_beta_1_t=1 - beta_1_t, + beta_2_t=beta_2_t, + beta_2_power=beta_2_power, + one_minus_beta_2_t=1 - beta_2_t, + ) + ) + + def set_weights(self, weights): + params = self.weights + # If the weights are generated by Keras V1 optimizer, it includes vhats + # even without amsgrad, i.e, V1 optimizer has 3x + 1 variables, while V2 + # optimizer has 2x + 1 variables. Filter vhats out for compatibility. + num_vars = int((len(params) - 1) / 2) + if len(weights) == 3 * num_vars + 1: + weights = weights[: len(params)] + super().set_weights(weights) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + + if not self.amsgrad: + return tf.raw_ops.ResourceApplyAdam( + var=var.handle, + m=m.handle, + v=v.handle, + beta1_power=coefficients["beta_1_power"], + beta2_power=coefficients["beta_2_power"], + lr=coefficients["lr_t"], + beta1=coefficients["beta_1_t"], + beta2=coefficients["beta_2_t"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + else: + vhat = self.get_slot(var, "vhat") + return tf.raw_ops.ResourceApplyAdamWithAmsgrad( + var=var.handle, + m=m.handle, + v=v.handle, + vhat=vhat.handle, + beta1_power=coefficients["beta_1_power"], + beta2_power=coefficients["beta_2_power"], + lr=coefficients["lr_t"], + beta1=coefficients["beta_1_t"], + beta2=coefficients["beta_2_t"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_scaled_g_values = grad * coefficients["one_minus_beta_1_t"] + m_t = tf.compat.v1.assign( + m, m * coefficients["beta_1_t"], use_locking=self._use_locking + ) + with tf.control_dependencies([m_t]): + m_t = self._resource_scatter_add(m, indices, m_scaled_g_values) + + # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) + v = self.get_slot(var, "v") + v_scaled_g_values = (grad * grad) * coefficients["one_minus_beta_2_t"] + v_t = tf.compat.v1.assign( + v, v * coefficients["beta_2_t"], use_locking=self._use_locking + ) + with tf.control_dependencies([v_t]): + v_t = self._resource_scatter_add(v, indices, v_scaled_g_values) + + if not self.amsgrad: + v_sqrt = tf.sqrt(v_t) + var_update = tf.compat.v1.assign_sub( + var, + coefficients["lr"] * m_t / (v_sqrt + coefficients["epsilon"]), + use_locking=self._use_locking, + ) + return tf.group(*[var_update, m_t, v_t]) + else: + v_hat = self.get_slot(var, "vhat") + v_hat_t = tf.maximum(v_hat, v_t) + with tf.control_dependencies([v_hat_t]): + v_hat_t = tf.compat.v1.assign( + v_hat, v_hat_t, use_locking=self._use_locking + ) + v_hat_sqrt = tf.sqrt(v_hat_t) + var_update = tf.compat.v1.assign_sub( + var, + coefficients["lr"] + * m_t + / (v_hat_sqrt + coefficients["epsilon"]), + use_locking=self._use_locking, + ) + return tf.group(*[var_update, m_t, v_t, v_hat_t]) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "beta_1": self._serialize_hyperparameter("beta_1"), + "beta_2": self._serialize_hyperparameter("beta_2"), + "epsilon": self.epsilon, + "amsgrad": self.amsgrad, + } + ) + return config + + +class NonFusedAdam(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the Adam algorithm without fused kernels. + + Adam optimization is a stochastic gradient descent method that is based on + adaptive estimation of first-order and second-order moments. + According to the paper + [Adam: A Method for Stochastic Optimization. Kingma et al., + 2014](http://arxiv.org/abs/1412.6980), the method is "*computationally + efficient, has little memory requirement, invariant to diagonal rescaling of + gradients, and is well suited for problems that are large in terms of + data/parameters*". + + For AMSGrad see [On The Convergence Of Adam And Beyond. + Reddi et al., 5-8](https://openreview.net/pdf?id=ryQu7f-RZ). + + **If amsgrad = False**: + + initialize $m_0$ as 1st moment vector + initialize $v_0$ as 2nd moment vector + + The update rule for $\theta$ with gradient $g$ uses an optimization + described at the end of section 2 of the paper: + + $$lr_t = \mathrm{learning\_rate} * + \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$ + $$m_t = \beta_1 * m_{t-1} + (1 - \beta_1) * g$$ + $$v_t = \beta_2 * v_{t-1} + (1 - \beta_2) * g^2$$ + $$\theta_t = \theta_{t-1} - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ + + **If amsgrad = True**: + + initialize $m_0$ as 1st moment vector + initialize $v_0$ as 2nd moment vector + initialize $\hat{v}_0$ as 2nd moment vector + + The update rule for $\theta$ with gradient $g$ uses an optimization + described at the end of section 2 of the paper: + + $$lr_t = \mathrm{learning\_rate} * + \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$ + + $$m_t = \beta_1 * m_{t-1} + (1 - \beta_1) * g$$ + $$v_t = \beta_2 * v_{t-1} + (1 - \beta_2) * g^2$$ + $$\hat{v}_t = \max(\hat{v}_{t-1}, v_t)$$ + $$\theta_t = \theta_{t-1} - lr_t * m_t / (\sqrt{\hat{v}_t} + \epsilon)$$ + + The default value of 1e-7 for epsilon might not be a good default in + general. For example, when training an Inception network on ImageNet a + current good choice is 1.0 or 0.1. Note that since Adam uses the + formulation just before Section 2.1 of the Kingma and Ba paper rather than + the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon + hat" in the paper. + + The sparse implementation of this algorithm (used when the gradient is an + IndexedSlices object, typically because of `tf.gather` or an embedding + lookup in the forward pass) does apply momentum to variable slices even if + they were not used in the forward pass (meaning they have a gradient equal + to zero). Momentum decay (beta1) is also applied to the entire momentum + accumulator. This means that the sparse behavior is equivalent to the dense + behavior (in contrast to some momentum implementations which ignore momentum + unless a variable slice was actually used). + + Usage: + + >>> opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.1) + >>> var1 = tf.Variable(10.0) + >>> loss = lambda: (var1 ** 2)/2.0 # d(loss)/d(var1) == var1 + >>> step_count = opt.minimize(loss, [var1]).numpy() + >>> # The first step is `-learning_rate*sign(grad)` + >>> var1.numpy() + 9.9 + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + amsgrad=False, + name="Adam", + **kwargs + ): + """Construct a new Adam optimizer. + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is + a `tf.keras.optimizers.schedules.LearningRateSchedule`, or a + callable that takes no arguments and returns the actual value to + use, The learning rate. Defaults to `0.001`. + beta_1: A float value or a constant float tensor, or a callable that + takes no arguments and returns the actual value to use. The + exponential decay rate for the 1st moment estimates. Defaults to + `0.9`. + beta_2: A float value or a constant float tensor, or a callable that + takes no arguments and returns the actual value to use, The + exponential decay rate for the 2nd moment estimates. Defaults to + `0.999`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults + to `1e-7`. + amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm + from the paper "On the Convergence of Adam and beyond". Defaults to + `False`. + name: Optional name for the operations created when applying + gradients. Defaults to "Adam". + **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, + `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is + clip gradients by value, `decay` is included for backward + compatibility to allow time inverse decay of learning rate. `lr` is + included for backward compatibility, recommended to use + `learning_rate` instead. + """ + + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("beta_1", beta_1) + self._set_hyper("beta_2", beta_2) + self.epsilon = epsilon or backend_config.epsilon() + self.amsgrad = amsgrad + + def _create_slots(self, var_list): + # Create slots for the first and second moments. + # Separate for-loops to respect the ordering of slot variables from v1. + for var in var_list: + self.add_slot(var, "m") + for var in var_list: + self.add_slot(var, "v") + if self.amsgrad: + for var in var_list: + self.add_slot(var, "vhat") + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + + local_step = tf.cast(self.iterations + 1, var_dtype) + beta_1_t = tf.identity(self._get_hyper("beta_1", var_dtype)) + beta_2_t = tf.identity(self._get_hyper("beta_2", var_dtype)) + beta_1_power = tf.pow(beta_1_t, local_step) + beta_2_power = tf.pow(beta_2_t, local_step) + lr = apply_state[(var_device, var_dtype)]["lr_t"] * ( + tf.sqrt(1 - beta_2_power) / (1 - beta_1_power) + ) + apply_state[(var_device, var_dtype)].update( + dict( + lr=lr, + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + beta_1_t=beta_1_t, + beta_1_power=beta_1_power, + one_minus_beta_1_t=1 - beta_1_t, + beta_2_t=beta_2_t, + beta_2_power=beta_2_power, + one_minus_beta_2_t=1 - beta_2_t, + ) + ) + + def set_weights(self, weights): + params = self.weights + # If the weights are generated by Keras V1 optimizer, it includes vhats + # even without amsgrad, i.e, V1 optimizer has 3x + 1 variables, while V2 + # optimizer has 2x + 1 variables. Filter vhats out for compatibility. + num_vars = int((len(params) - 1) / 2) + if len(weights) == 3 * num_vars + 1: + weights = weights[: len(params)] + super().set_weights(weights) + + @tf.function(jit_compile=True) + def _resource_apply_dense_impl(self, grad, var, apply_state): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + + alpha = ( + coefficients["lr_t"] + * tf.sqrt(1 - coefficients["beta_2_power"]) + / (1 - coefficients["beta_1_power"]) + ) + m.assign_add((grad - m) * (1 - coefficients["beta_1_t"])) + v.assign_add((tf.square(grad) - v) * (1 - coefficients["beta_2_t"])) + if self.amsgrad: + vhat = self.get_slot(var, "vhat") + vhat.assign(tf.maximum(vhat, v)) + v = vhat + var.assign_sub((m * alpha) / (tf.sqrt(v) + coefficients["epsilon"])) + + def _resource_apply_dense(self, grad, var, apply_state=None): + self._resource_apply_dense_impl(grad, var, apply_state) + if not tf.executing_eagerly(): + return tf.compat.v1.get_default_graph().get_operations()[-1] + + @tf.function(jit_compile=True) + def _resource_apply_sparse_impl(self, grad, var, indices, apply_state): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_scaled_g_values = grad * coefficients["one_minus_beta_1_t"] + m.assign(m * coefficients["beta_1_t"]) + m.scatter_add(tf.IndexedSlices(m_scaled_g_values, indices)) + + # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) + v = self.get_slot(var, "v") + v_scaled_g_values = (grad * grad) * coefficients["one_minus_beta_2_t"] + v.assign(v * coefficients["beta_2_t"]) + v.scatter_add(tf.IndexedSlices(v_scaled_g_values, indices)) + + if not self.amsgrad: + var.assign_sub( + coefficients["lr"] * m / (tf.sqrt(v) + coefficients["epsilon"]) + ) + else: + v_hat = self.get_slot(var, "vhat") + v_hat.assign(tf.maximum(v_hat, v)) + var.assign_sub( + coefficients["lr"] + * m + / (tf.sqrt(v_hat) + coefficients["epsilon"]) + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + self._resource_apply_sparse_impl(grad, var, indices, apply_state) + if not tf.executing_eagerly(): + return tf.compat.v1.get_default_graph().get_operations()[-1] + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "beta_1": self._serialize_hyperparameter("beta_1"), + "beta_2": self._serialize_hyperparameter("beta_2"), + "epsilon": self.epsilon, + "amsgrad": self.amsgrad, + } + ) + return config diff --git a/keras/optimizers/legacy/adam_test.py b/keras/optimizers/legacy/adam_test.py new file mode 100644 index 000000000000..f796b5a98e69 --- /dev/null +++ b/keras/optimizers/legacy/adam_test.py @@ -0,0 +1,1196 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Adam.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.optimizers import optimizer_v1 +from keras.optimizers.legacy import adam +from keras.optimizers.schedules import learning_rate_schedule +from keras.testing_infra import test_combinations + + +def adam_update_numpy( + param, g_t, t, m, v, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-7 +): + lr_t = lr * np.sqrt(1 - beta2 ** (t + 1)) / (1 - beta1 ** (t + 1)) + + m_t = beta1 * m + (1 - beta1) * g_t + v_t = beta2 * v + (1 - beta2) * g_t * g_t + + param_t = param - lr_t * m_t / (np.sqrt(v_t) + epsilon) + return param_t, m_t, v_t + + +def adam_update_numpy_amsgrad( + param, g_t, t, m, v, vhat, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-7 +): + lr_t = lr * np.sqrt(1 - beta2 ** (t + 1)) / (1 - beta1 ** (t + 1)) + + m_t = beta1 * m + (1 - beta1) * g_t + v_t = beta2 * v + (1 - beta2) * g_t * g_t + vhat_t = np.maximum(vhat, v_t) + + param_t = param - lr_t * m_t / (np.sqrt(vhat_t) + epsilon) + return param_t, m_t, v_t, vhat_t + + +def adam_sparse_update_numpy_amsgrad( + param, + indices, + g_t, + t, + m, + v, + vhat, + lr=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-7, +): + m_t, v_t, vhat_t, param_t = ( + np.copy(m), + np.copy(v), + np.copy(vhat), + np.copy(param), + ) + lr_t = lr * np.sqrt(1 - beta2 ** (t + 1)) / (1 - beta1 ** (t + 1)) + m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t + v_t_slice = beta2 * v[indices] + (1 - beta2) * g_t * g_t + m_t[indices] = m_t_slice + v_t[indices] = v_t_slice + v_hat_t = np.maximum(vhat_t, v_t) + v_hat_t_slice = v_hat_t[indices] + param_t_slice = param[indices] - ( + lr_t * (m_t_slice / (np.sqrt(v_hat_t_slice) + epsilon)) + ) + param_t[indices] = param_t_slice + return param_t, m_t, v_t, vhat_t + + +def get_beta_accumulators(opt, dtype): + local_step = tf.cast(opt.iterations + 1, dtype) + beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype) + beta_1_power = tf.pow(beta_1_t, local_step) + beta_2_t = tf.cast(opt._get_hyper("beta_2"), dtype) + beta_2_power = tf.pow(beta_2_t, local_step) + return (beta_1_power, beta_2_power) + + +class AdamOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def testSparse(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array( + [0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype + ) + var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array( + [0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype + ) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0_np_indices = np.array([0, 2], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np[grads0_np_indices]), + tf.constant(grads0_np_indices), + tf.constant([3]), + ) + grads1_np_indices = np.array([0, 2], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np[grads1_np_indices]), + tf.constant(grads1_np_indices), + tf.constant([3]), + ) + opt = adam.Adam() + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + # Run 3 steps of Adam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + update.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testSparseDevicePlacement(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for index_dtype in [tf.int32, tf.int64]: + with tf.Graph().as_default(), self.cached_session( + force_gpu=tf.test.is_gpu_available() + ): + # If a GPU is available, tests that all optimizer ops can be + # placed on it (i.e. they have GPU kernels). + var = tf.Variable([[1.0], [2.0]]) + indices = tf.constant([0, 1], dtype=index_dtype) + g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) + optimizer = adam.Adam(3.0) + minimize_op = optimizer.minimize(g_sum, var_list=[var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + minimize_op.run() + + def testSparseRepeatedIndices(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + repeated_index_update_var = tf.Variable( + [[1.0], [2.0]], dtype=dtype + ) + aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) + grad_repeated_index = tf.IndexedSlices( + tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + tf.constant([1, 1]), + tf.constant([2, 1]), + ) + grad_aggregated = tf.IndexedSlices( + tf.constant([0.2], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + repeated_update = adam.Adam().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + aggregated_update = adam.Adam().apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose( + aggregated_update_var, + self.evaluate(repeated_index_update_var), + ) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose( + aggregated_update_var, + self.evaluate(repeated_index_update_var), + ) + + def doTestBasic(self, use_callable_params=False): + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = lambda: 0.001 + beta1 = lambda: 0.9 + beta2 = lambda: 0.999 + epsilon = lambda: 1e-8 + if not use_callable_params: + learning_rate = learning_rate() + beta1 = beta1() + beta2 = beta2() + epsilon = epsilon() + + opt = adam.Adam(learning_rate=learning_rate) + if not tf.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of Adam + for t in range(3): + beta_1_power, beta_2_power = get_beta_accumulators( + opt, dtype + ) + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + if not tf.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testResourceBasic(self): + self.doTestBasic() + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testBasicCallableParams(self): + self.doTestBasic(use_callable_params=True) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicWithAmsgrad(self): + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + opt = adam.Adam(amsgrad=True) + if not tf.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of Adam + for t in range(3): + beta_1_power, beta_2_power = get_beta_accumulators( + opt, dtype + ) + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + if not tf.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0, v0hat = adam_update_numpy_amsgrad( + var0_np, grads0_np, t, m0, v0, v0hat + ) + var1_np, m1, v1, v1hat = adam_update_numpy_amsgrad( + var1_np, grads1_np, t, m1, v1, v1hat + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testSparseWithAmsgrad(self): + # dtypes.half does not work on gpu + eager. + for dtype in [tf.float32, tf.float64]: + with self.cached_session(): + m0 = np.array([[0.0], [0.0]]) + v0 = np.array([[0.0], [0.0]]) + v0hat = np.array([[0.0], [0.0]]) + indices_np = np.array([1]) + indices = tf.constant(indices_np, dtype=tf.int32) + var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) + repeated_index_update_var = tf.Variable(var0_np, dtype=dtype) + aggregated_update_var = tf.Variable(var0_np, dtype=dtype) + grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype) + grad_repeated_index = tf.IndexedSlices( + tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + tf.constant([1, 1]), + tf.constant([2, 1]), + ) + grad_aggregated = tf.IndexedSlices( + grads0_np, indices, tf.constant([2, 1]) + ) + opt_repeated = adam.Adam(amsgrad=True) + opt_aggregated = adam.Adam(amsgrad=True) + if not tf.executing_eagerly(): + repeated_update = opt_repeated.apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + aggregated_update = opt_aggregated.apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose( + self.evaluate(aggregated_update_var), + self.evaluate(repeated_index_update_var), + ) + for t in range(3): + if not tf.executing_eagerly(): + self.evaluate(repeated_update) + self.evaluate(aggregated_update) + else: + opt_repeated.apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + opt_aggregated.apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + + var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad( + var0_np, indices_np, grads0_np, t, m0, v0, v0hat + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(aggregated_update_var) + ) + self.assertAllCloseAccordingToType( + self.evaluate(aggregated_update_var), + self.evaluate(repeated_index_update_var), + ) + + def testBasicWithLearningRateDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 0.001 + beta_1 = 0.9 + beta_2 = 0.999 + epsilon = 1e-7 + decay = 0.5 + + opt = adam.Adam( + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + epsilon=epsilon, + decay=decay, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of Adam + for t in range(3): + self.evaluate(update) + lr_np = learning_rate / (1 + decay * t) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0, lr=lr_np + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1, lr=lr_np + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testBasicWithLearningRateInverseTimeDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 0.001 + decay = 0.5 + lr_schedule = learning_rate_schedule.InverseTimeDecay( + learning_rate, decay_steps=1.0, decay_rate=decay + ) + beta_1 = 0.9 + beta_2 = 0.999 + epsilon = 1e-7 + + opt = adam.Adam( + learning_rate=lr_schedule, + beta_1=beta_1, + beta_2=beta_2, + epsilon=epsilon, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of Adam + for t in range(3): + self.evaluate(update) + + lr_np = learning_rate / (1 + decay * t) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0, lr=lr_np + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1, lr=lr_np + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testTensorLearningRate(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = adam.Adam(tf.constant(0.001)) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + # Run 3 steps of Adam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + update.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testSharing(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = adam.Adam() + update1 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + update2 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of intertwined Adam1 and Adam2. + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testSlotsUniqueEager(self): + v1 = tf.Variable(1.0) + v2 = tf.Variable(1.0) + opt = adam.Adam(1.0) + opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) + # There should be iteration, and two unique slot variables for v1 and + # v2. + self.assertLen(set(v.ref() for v in opt.variables()), 5) + self.assertEqual( + self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) + ) + + def testSetWeightsFromV1AdamWithoutMinimize(self): + keras_v1_adam = optimizer_v1.Adam() + keras_v2_adam = adam.Adam() + keras_v2_adam.set_weights(keras_v1_adam.get_weights()) + keras_v1_iteration = keras_v1_adam.iterations + keras_v2_iteration = keras_v2_adam.iterations + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual( + self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration) + ) + + def testConstructAdamWithLR(self): + opt = adam.Adam(lr=1.0) + opt_2 = adam.Adam(learning_rate=0.1, lr=1.0) + opt_3 = adam.Adam(learning_rate=0.1) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + +class NonFusedAdamOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def testSparse(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array( + [0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype + ) + var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array( + [0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype + ) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0_np_indices = np.array([0, 2], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np[grads0_np_indices]), + tf.constant(grads0_np_indices), + tf.constant([3]), + ) + grads1_np_indices = np.array([0, 2], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np[grads1_np_indices]), + tf.constant(grads1_np_indices), + tf.constant([3]), + ) + opt = adam.NonFusedAdam() + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + # Run 3 steps of NonFusedAdam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + update.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testSparseDevicePlacement(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for index_dtype in [tf.int32, tf.int64]: + with tf.Graph().as_default(), self.cached_session( + force_gpu=tf.test.is_gpu_available() + ): + # If a GPU is available, tests that all optimizer ops can be + # placed on it (i.e. they have GPU kernels). + var = tf.Variable([[1.0], [2.0]]) + indices = tf.constant([0, 1], dtype=index_dtype) + g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) + optimizer = adam.NonFusedAdam(3.0) + minimize_op = optimizer.minimize(g_sum, var_list=[var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + minimize_op.run() + + def testSparseRepeatedIndices(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + repeated_index_update_var = tf.Variable( + [[1.0], [2.0]], dtype=dtype + ) + aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) + grad_repeated_index = tf.IndexedSlices( + tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + tf.constant([1, 1]), + tf.constant([2, 1]), + ) + grad_aggregated = tf.IndexedSlices( + tf.constant([0.2], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + repeated_update = adam.NonFusedAdam().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + aggregated_update = adam.NonFusedAdam().apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose( + aggregated_update_var, + self.evaluate(repeated_index_update_var), + ) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose( + aggregated_update_var, + self.evaluate(repeated_index_update_var), + ) + + def doTestBasic(self, use_callable_params=False): + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = lambda: 0.001 + beta1 = lambda: 0.9 + beta2 = lambda: 0.999 + epsilon = lambda: 1e-8 + if not use_callable_params: + learning_rate = learning_rate() + beta1 = beta1() + beta2 = beta2() + epsilon = epsilon() + + opt = adam.NonFusedAdam(learning_rate=learning_rate) + if not tf.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of NonFusedAdam + for t in range(3): + beta_1_power, beta_2_power = get_beta_accumulators( + opt, dtype + ) + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + if not tf.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0), rtol=1e-4, atol=1e-4 + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1), rtol=1e-4, atol=1e-4 + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testResourceBasic(self): + self.doTestBasic() + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testBasicCallableParams(self): + self.doTestBasic(use_callable_params=True) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicWithAmsgrad(self): + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + opt = adam.NonFusedAdam(amsgrad=True) + if not tf.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of NonFusedAdam + for t in range(3): + beta_1_power, beta_2_power = get_beta_accumulators( + opt, dtype + ) + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + if not tf.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0, v0hat = adam_update_numpy_amsgrad( + var0_np, grads0_np, t, m0, v0, v0hat + ) + var1_np, m1, v1, v1hat = adam_update_numpy_amsgrad( + var1_np, grads1_np, t, m1, v1, v1hat + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0), rtol=1e-4, atol=1e-4 + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1), rtol=1e-4, atol=1e-4 + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testSparseWithAmsgrad(self): + # dtypes.half does not work on gpu + eager. + for dtype in [tf.float32, tf.float64]: + with self.cached_session(): + m0 = np.array([[0.0], [0.0]]) + v0 = np.array([[0.0], [0.0]]) + v0hat = np.array([[0.0], [0.0]]) + indices_np = np.array([1]) + indices = tf.constant(indices_np, dtype=tf.int32) + var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) + repeated_index_update_var = tf.Variable(var0_np, dtype=dtype) + aggregated_update_var = tf.Variable(var0_np, dtype=dtype) + grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype) + grad_repeated_index = tf.IndexedSlices( + tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + tf.constant([1, 1]), + tf.constant([2, 1]), + ) + grad_aggregated = tf.IndexedSlices( + grads0_np, indices, tf.constant([2, 1]) + ) + opt_repeated = adam.NonFusedAdam(amsgrad=True) + opt_aggregated = adam.NonFusedAdam(amsgrad=True) + if not tf.executing_eagerly(): + repeated_update = opt_repeated.apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + aggregated_update = opt_aggregated.apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose( + self.evaluate(aggregated_update_var), + self.evaluate(repeated_index_update_var), + ) + for t in range(3): + if not tf.executing_eagerly(): + self.evaluate(repeated_update) + self.evaluate(aggregated_update) + else: + opt_repeated.apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + opt_aggregated.apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + + var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad( + var0_np, indices_np, grads0_np, t, m0, v0, v0hat + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(aggregated_update_var) + ) + self.assertAllCloseAccordingToType( + self.evaluate(aggregated_update_var), + self.evaluate(repeated_index_update_var), + ) + + def testBasicWithLearningRateDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 0.001 + beta_1 = 0.9 + beta_2 = 0.999 + epsilon = 1e-7 + decay = 0.5 + + opt = adam.NonFusedAdam( + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + epsilon=epsilon, + decay=decay, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of NonFusedAdam + for t in range(3): + self.evaluate(update) + lr_np = learning_rate / (1 + decay * t) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0, lr=lr_np + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1, lr=lr_np + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testBasicWithLearningRateInverseTimeDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 0.001 + decay = 0.5 + lr_schedule = learning_rate_schedule.InverseTimeDecay( + learning_rate, decay_steps=1.0, decay_rate=decay + ) + beta_1 = 0.9 + beta_2 = 0.999 + epsilon = 1e-7 + + opt = adam.NonFusedAdam( + learning_rate=lr_schedule, + beta_1=beta_1, + beta_2=beta_2, + epsilon=epsilon, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 3 steps of NonFusedAdam + for t in range(3): + self.evaluate(update) + + lr_np = learning_rate / (1 + decay * t) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0, lr=lr_np + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1, lr=lr_np + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testTensorLearningRate(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = adam.NonFusedAdam(tf.constant(0.001)) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + # Run 3 steps of NonFusedAdam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + update.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testSharing(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = adam.NonFusedAdam() + update1 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + update2 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of intertwined NonFusedAdam1 and NonFusedAdam2. + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), self.evaluate(beta_2_power) + ) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/adamax.py b/keras/optimizers/legacy/adamax.py index 016a2f172578..f89690fadb7a 100644 --- a/keras/optimizers/legacy/adamax.py +++ b/keras/optimizers/legacy/adamax.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,190 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Adamax optimizer implementation.""" +"""Adamax optimizer implementation.""" -from keras.optimizers.optimizer_v2 import adamax +import tensorflow.compat.v2 as tf +from keras import backend_config +from keras.optimizers.legacy import optimizer_v2 + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Adamax') -class Adamax(adamax.Adamax): - pass +@keras_export( + "keras.optimizers.legacy.Adamax", + v1=["keras.optimizers.Adamax", "keras.optimizers.legacy.Adamax"], +) +class Adamax(optimizer_v2.OptimizerV2): + """Optimizer that implements the Adamax algorithm. + + It is a variant of Adam based on the infinity norm. + Default parameters follow those provided in the paper. + Adamax is sometimes superior to adam, specially in models with embeddings. + + Initialization: + + ```python + m = 0 # Initialize initial 1st moment vector + v = 0 # Initialize the exponentially weighted infinity norm + t = 0 # Initialize timestep + ``` + + The update rule for parameter `w` with gradient `g` is + described at the end of section 7.1 of the paper: + + ```python + t += 1 + m = beta1 * m + (1 - beta) * g + v = max(beta2 * v, abs(g)) + current_lr = learning_rate / (1 - beta1 ** t) + w = w - current_lr * m / (v + epsilon) + ``` + + Similarly to `Adam`, the epsilon is added for numerical stability + (especially to get rid of division by zero when `v_t == 0`). + + In contrast to `Adam`, the sparse implementation of this algorithm + (used when the gradient is an IndexedSlices object, typically because of + `tf.gather` or an embedding lookup in the forward pass) only updates + variable slices and corresponding `m_t`, `v_t` terms when that part of + the variable was used in the forward pass. This means that the sparse + behavior is contrast to the dense behavior (similar to some momentum + implementations which ignore momentum unless a variable slice was actually + used). + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. + beta_1: A float value or a constant float tensor. The exponential decay + rate for the 1st moment estimates. + beta_2: A float value or a constant float tensor. The exponential decay + rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + name: Optional name for the operations created when applying gradients. + Defaults to `"Adamax"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Reference: + - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + name="Adamax", + **kwargs + ): + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("beta_1", beta_1) + self._set_hyper("beta_2", beta_2) + self.epsilon = epsilon or backend_config.epsilon() + + def _create_slots(self, var_list): + # Separate for-loops to respect the ordering of slot variables from v1. + for var in var_list: + self.add_slot(var, "m") # Create slots for the first moments. + for var in var_list: + self.add_slot(var, "v") # Create slots for the second moments. + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + + local_step = tf.cast(self.iterations + 1, var_dtype) + beta_1_t = tf.identity(self._get_hyper("beta_1", var_dtype)) + beta_2_t = tf.identity(self._get_hyper("beta_2", var_dtype)) + beta_1_power = tf.pow(beta_1_t, local_step) + lr_t = apply_state[(var_device, var_dtype)]["lr_t"] + + apply_state[(var_device, var_dtype)].update( + dict( + neg_scaled_lr=-lr_t / (1 - beta_1_power), + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + beta_1_t=beta_1_t, + beta_1_power=beta_1_power, + one_minus_beta_1_t=1 - beta_1_t, + beta_2_t=beta_2_t, + zero=tf.zeros((), dtype=tf.int64), + ) + ) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + return tf.raw_ops.ResourceApplyAdaMax( + var=var.handle, + m=m.handle, + v=v.handle, + beta1_power=coefficients["beta_1_power"], + lr=coefficients["lr_t"], + beta1=coefficients["beta_1_t"], + beta2=coefficients["beta_2_t"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_slice = tf.gather(m, indices, axis=coefficients["zero"]) + m_t_slice = ( + m_slice * coefficients["beta_1_t"] + + grad * coefficients["one_minus_beta_1_t"] + ) + with tf.control_dependencies([m_t_slice]): + m_t = self._resource_scatter_update(m, indices, m_t_slice) + + # u_t = max(beta2 * u, abs(g_t)) + v = self.get_slot(var, "v") + v_slice = tf.gather(v, indices, axis=coefficients["zero"]) + v_t_slice = tf.maximum(v_slice * coefficients["beta_2_t"], tf.abs(grad)) + with tf.control_dependencies([v_t_slice]): + v_t = self._resource_scatter_update(v, indices, v_t_slice) + # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t + var_slice = coefficients["neg_scaled_lr"] * ( + m_t_slice / (v_t_slice + coefficients["epsilon"]) + ) + with tf.control_dependencies([var_slice]): + var_update = self._resource_scatter_add(var, indices, var_slice) + return tf.group(*[var_update, m_t, v_t]) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "beta_1": self._serialize_hyperparameter("beta_1"), + "beta_2": self._serialize_hyperparameter("beta_2"), + "epsilon": self.epsilon, + } + ) + return config diff --git a/keras/optimizers/legacy/adamax_test.py b/keras/optimizers/legacy/adamax_test.py new file mode 100644 index 000000000000..b0a921dc03b6 --- /dev/null +++ b/keras/optimizers/legacy/adamax_test.py @@ -0,0 +1,421 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Adamax.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.optimizers.legacy import adamax +from keras.testing_infra import test_combinations + + +def adamax_update_numpy( + param, g_t, t, m, v, alpha=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8 +): + m_t = beta1 * m + (1 - beta1) * g_t + v_t = np.maximum(beta2 * v, np.abs(g_t)) + param_t = param - (alpha / (1 - beta1 ** (t + 1))) * (m_t / (v_t + epsilon)) + return param_t, m_t, v_t + + +def adamax_sparse_update_numpy( + param, + indices, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, +): + m_t, v_t, param_t = np.copy(m), np.copy(v), np.copy(param) + m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t + v_t_slice = np.maximum(beta2 * v[indices], np.abs(g_t)) + param_t_slice = param[indices] - ( + (alpha / (1 - beta1 ** (t + 1))) * (m_t_slice / (v_t_slice + epsilon)) + ) + m_t[indices] = m_t_slice + v_t[indices] = v_t_slice + param_t[indices] = param_t_slice + return param_t, m_t, v_t + + +def get_beta_accumulators(opt, dtype): + local_step = tf.cast(opt.iterations + 1, dtype) + beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype) + beta_1_power = tf.pow(beta_1_t, local_step) + return beta_1_power + + +class AdamaxOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def testResourceSparse(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) + m0, v0, m1, v1 = ( + zero_slots(), + zero_slots(), + zero_slots(), + zero_slots(), + ) + var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np), + tf.constant(grads0_np_indices), + tf.constant([3]), + ) + grads1_np_indices = np.array([2, 1], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np), + tf.constant(grads1_np_indices), + tf.constant([3]), + ) + opt = adamax.Adamax() + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0, 3.0], var0) + self.assertAllClose([4.0, 5.0, 6.0], var1) + + beta1_power = get_beta_accumulators(opt, dtype) + + # Run 3 steps of Adamax + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), beta1_power + ) + update.run() + + var0_np, m0, v0 = adamax_sparse_update_numpy( + var0_np, grads0_np_indices, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adamax_sparse_update_numpy( + var1_np, grads1_np_indices, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + def testSparseDevicePlacement(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for index_dtype in [tf.int32, tf.int64]: + with tf.Graph().as_default(), self.cached_session( + force_gpu=tf.test.is_gpu_available() + ): + # If a GPU is available, tests that all optimizer ops can be + # placed on it (i.e. they have GPU kernels). + var = tf.Variable([[1.0], [2.0]]) + indices = tf.constant([0, 1], dtype=index_dtype) + g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) + optimizer = adamax.Adamax(3.0) + minimize_op = optimizer.minimize(g_sum, var_list=[var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + minimize_op.run() + + def testSparseRepeatedIndices(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + repeated_index_update_var = tf.Variable( + [[1.0], [2.0]], dtype=dtype + ) + aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) + grad_repeated_index = tf.IndexedSlices( + tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + tf.constant([1, 1]), + tf.constant([2, 1]), + ) + grad_aggregated = tf.IndexedSlices( + tf.constant([0.2], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + repeated_update = adamax.Adamax().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)] + ) + aggregated_update = adamax.Adamax().apply_gradients( + [(grad_aggregated, aggregated_update_var)] + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose( + aggregated_update_var, repeated_index_update_var.eval() + ) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose( + aggregated_update_var, repeated_index_update_var.eval() + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasic(self): + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with self.session(graph=tf.Graph(), use_gpu=True): + # Initialize variables for numpy implementation. + m0 = np.array([0.0, 0.0]) + v0 = np.array([0.0, 0.0]) + m1 = np.array([0.0, 0.0]) + v1 = np.array([0.0, 0.0]) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + opt = adamax.Adamax() + if not tf.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of Adamax + for t in range(3): + beta_1_power = get_beta_accumulators(opt, dtype) + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + if not tf.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0 = adamax_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adamax_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0), rtol=1e-2 + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1), rtol=1e-2 + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicWithLearningRateDecay(self): + for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): + with self.session(graph=tf.Graph(), use_gpu=True): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, name="var0_%d" % i) + var1 = tf.Variable(var1_np, name="var1_%d" % i) + + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + + learning_rate = 0.001 + decay = 0.002 + opt = adamax.Adamax(learning_rate=learning_rate, decay=decay) + if not tf.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of Adamax + for t in range(3): + beta_1_power = get_beta_accumulators(opt, dtype) + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), self.evaluate(beta_1_power) + ) + if not tf.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + lr = learning_rate / (1 + decay * t) + + var0_np, m0, v0 = adamax_update_numpy( + var0_np, grads0_np, t, m0, v0, alpha=lr + ) + var1_np, m1, v1 = adamax_update_numpy( + var1_np, grads1_np, t, m1, v1, alpha=lr + ) + + # Validate updated params + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0), rtol=1e-2 + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1), rtol=1e-2 + ) + + def testTensorLearningRate(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = adamax.Adamax(tf.constant(0.001)) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0) + self.assertAllClose([3.0, 4.0], var1) + + beta1_power = get_beta_accumulators(opt, dtype) + + # Run 3 steps of Adamax + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), beta1_power + ) + update.run() + + var0_np, m0, v0 = adamax_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adamax_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + def testSharing(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = adamax.Adamax() + update1 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + update2 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + beta1_power = get_beta_accumulators(opt, dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0) + self.assertAllClose([3.0, 4.0], var1) + + # Run 3 steps of intertwined Adamax1 and Adamax2. + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), beta1_power + ) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adamax_update_numpy( + var0_np, grads0_np, t, m0, v0 + ) + var1_np, m1, v1 = adamax_update_numpy( + var1_np, grads1_np, t, m1, v1 + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testSlotsUniqueEager(self): + v1 = tf.Variable(1.0) + v2 = tf.Variable(1.0) + opt = adamax.Adamax(1.0) + opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) + # There should be iteration, and two unique slot variables for v1 and + # v2. + self.assertLen({id(v) for v in opt.variables()}, 5) + + def testConstructAdamaxWithLR(self): + opt = adamax.Adamax(lr=1.0) + opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0) + opt_3 = adamax.Adamax(learning_rate=0.1) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/ftrl.py b/keras/optimizers/legacy/ftrl.py index e8469a504e3f..0e592b268743 100644 --- a/keras/optimizers/legacy/ftrl.py +++ b/keras/optimizers/legacy/ftrl.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,298 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Ftrl optimizer implementation.""" +"""Ftrl-proximal optimizer implementation.""" -from keras.optimizers.optimizer_v2 import ftrl +import tensorflow.compat.v2 as tf + +from keras.optimizers.legacy import optimizer_v2 + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Ftrl') -class Ftrl(ftrl.Ftrl): - pass +@keras_export( + "keras.optimizers.legacy.Ftrl", + v1=["keras.optimizers.Ftrl", "keras.optimizers.legacy.Ftrl"], +) +class Ftrl(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the FTRL algorithm. + + "Follow The Regularized Leader" (FTRL) is an optimization algorithm + developed at Google for click-through rate prediction in the early 2010s. It + is most suitable for shallow models with large and sparse feature spaces. + The algorithm is described by + [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf). + The Keras version has support for both online L2 regularization + (the L2 regularization described in the paper + above) and shrinkage-type L2 regularization + (which is the addition of an L2 penalty to the loss function). + + Initialization: + + ```python + n = 0 + sigma = 0 + z = 0 + ``` + + Update rule for one variable `w`: + + ```python + prev_n = n + n = n + g ** 2 + sigma = (sqrt(n) - sqrt(prev_n)) / lr + z = z + g - sigma * w + if abs(z) < lambda_1: + w = 0 + else: + w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2) + ``` + + Notation: + + - `lr` is the learning rate + - `g` is the gradient for the variable + - `lambda_1` is the L1 regularization strength + - `lambda_2` is the L2 regularization strength + + Check the documentation for the `l2_shrinkage_regularization_strength` + parameter for more details when shrinkage is enabled, in which case gradient + is replaced with a gradient with shrinkage. + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. + learning_rate_power: A float value, must be less or equal to zero. + Controls how the learning rate decreases during training. Use zero for + a fixed learning rate. + initial_accumulator_value: The starting value for accumulators. + Only zero or positive values are allowed. + l1_regularization_strength: A float value, must be greater than or + equal to zero. Defaults to `0.0`. + l2_regularization_strength: A float value, must be greater than or + equal to zero. Defaults to `0.0`. + name: Optional name prefix for the operations created when applying + gradients. Defaults to `"Ftrl"`. + l2_shrinkage_regularization_strength: A float value, must be greater than + or equal to zero. This differs from L2 above in that the L2 above is a + stabilization penalty, whereas this L2 shrinkage is a magnitude penalty. + When input is sparse shrinkage will only happen on the active weights. + beta: A float value, representing the beta value from the paper. + Defaults to `0.0`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Reference: + - [McMahan et al., 2013]( + https://research.google.com/pubs/archive/41159.pdf) + """ + + def __init__( + self, + learning_rate=0.001, + learning_rate_power=-0.5, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + name="Ftrl", + l2_shrinkage_regularization_strength=0.0, + beta=0.0, + **kwargs, + ): + super().__init__(name, **kwargs) + + if initial_accumulator_value < 0.0: + raise ValueError( + "`initial_accumulator_value` needs to be " + "positive or zero. Received: " + f"initial_accumulator_value={initial_accumulator_value}." + ) + if learning_rate_power > 0.0: + raise ValueError( + "`learning_rate_power` needs to be " + "negative or zero. Received: " + f"learning_rate_power={learning_rate_power}." + ) + if l1_regularization_strength < 0.0: + raise ValueError( + "`l1_regularization_strength` needs to be positive or zero. " + "Received: l1_regularization_strength=" + f"{l1_regularization_strength}." + ) + if l2_regularization_strength < 0.0: + raise ValueError( + "`l2_regularization_strength` needs to be positive or zero. " + "Received: l2_regularization_strength=" + f"{l2_regularization_strength}." + ) + if l2_shrinkage_regularization_strength < 0.0: + raise ValueError( + "`l2_shrinkage_regularization_strength` needs to be positive " + "or zero. Received: l2_shrinkage_regularization_strength" + f"={l2_shrinkage_regularization_strength}." + ) + + self._set_hyper("learning_rate", learning_rate) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("learning_rate_power", learning_rate_power) + self._set_hyper( + "l1_regularization_strength", l1_regularization_strength + ) + self._set_hyper( + "l2_regularization_strength", l2_regularization_strength + ) + self._set_hyper("beta", beta) + self._initial_accumulator_value = initial_accumulator_value + self._l2_shrinkage_regularization_strength = ( + l2_shrinkage_regularization_strength + ) + + def _create_slots(self, var_list): + # Create the "accum" and "linear" slots. + for var in var_list: + dtype = var.dtype.base_dtype + init = tf.compat.v1.constant_initializer( + self._initial_accumulator_value, dtype=dtype + ) + self.add_slot(var, "accumulator", init) + self.add_slot(var, "linear") + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + apply_state[(var_device, var_dtype)].update( + dict( + learning_rate_power=tf.identity( + self._get_hyper("learning_rate_power", var_dtype) + ), + l1_regularization_strength=tf.identity( + self._get_hyper("l1_regularization_strength", var_dtype) + ), + l2_regularization_strength=tf.identity( + self._get_hyper("l2_regularization_strength", var_dtype) + ), + beta=tf.identity(self._get_hyper("beta", var_dtype)), + l2_shrinkage_regularization_strength=tf.cast( + self._l2_shrinkage_regularization_strength, var_dtype + ), + ) + ) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + # Adjust L2 regularization strength to include beta to avoid the + # underlying TensorFlow ops needing to include it. + adjusted_l2_regularization_strength = coefficients[ + "l2_regularization_strength" + ] + coefficients["beta"] / (2.0 * coefficients["lr_t"]) + + accum = self.get_slot(var, "accumulator") + linear = self.get_slot(var, "linear") + + if self._l2_shrinkage_regularization_strength <= 0.0: + return tf.raw_ops.ResourceApplyFtrl( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + lr=coefficients["lr_t"], + l1=coefficients["l1_regularization_strength"], + l2=adjusted_l2_regularization_strength, + lr_power=coefficients["learning_rate_power"], + use_locking=self._use_locking, + ) + else: + return tf.raw_ops.ResourceApplyFtrlV2( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + lr=coefficients["lr_t"], + l1=coefficients["l1_regularization_strength"], + l2=adjusted_l2_regularization_strength, + l2_shrinkage=coefficients[ + "l2_shrinkage_regularization_strength" + ], + lr_power=coefficients["learning_rate_power"], + use_locking=self._use_locking, + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + # Adjust L2 regularization strength to include beta to avoid the + # underlying TensorFlow ops needing to include it. + adjusted_l2_regularization_strength = coefficients[ + "l2_regularization_strength" + ] + coefficients["beta"] / (2.0 * coefficients["lr_t"]) + + accum = self.get_slot(var, "accumulator") + linear = self.get_slot(var, "linear") + + if self._l2_shrinkage_regularization_strength <= 0.0: + return tf.raw_ops.ResourceSparseApplyFtrl( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + indices=indices, + lr=coefficients["lr_t"], + l1=coefficients["l1_regularization_strength"], + l2=adjusted_l2_regularization_strength, + lr_power=coefficients["learning_rate_power"], + use_locking=self._use_locking, + ) + else: + return tf.raw_ops.ResourceSparseApplyFtrlV2( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + indices=indices, + lr=coefficients["lr_t"], + l1=coefficients["l1_regularization_strength"], + l2=adjusted_l2_regularization_strength, + l2_shrinkage=coefficients[ + "l2_shrinkage_regularization_strength" + ], + lr_power=coefficients["learning_rate_power"], + use_locking=self._use_locking, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "initial_accumulator_value": self._initial_accumulator_value, + "learning_rate_power": self._serialize_hyperparameter( + "learning_rate_power" + ), + "l1_regularization_strength": self._serialize_hyperparameter( + "l1_regularization_strength" + ), + "l2_regularization_strength": self._serialize_hyperparameter( + "l2_regularization_strength" + ), + "beta": self._serialize_hyperparameter("beta"), + "l2_shrinkage_regularization_strength": self._l2_shrinkage_regularization_strength, # noqa: E501 + } + ) + return config diff --git a/keras/optimizers/legacy/ftrl_test.py b/keras/optimizers/legacy/ftrl_test.py new file mode 100644 index 000000000000..4c1caa941243 --- /dev/null +++ b/keras/optimizers/legacy/ftrl_test.py @@ -0,0 +1,558 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for Ftrl operations.""" + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras.optimizers.legacy import ftrl + + +class FtrlOptimizerTest(tf.test.TestCase): + def doTestFtrlwithoutRegularization(self, use_resource=False): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + if use_resource: + var0 = tf.Variable([0.0, 0.0], dtype=dtype) + var1 = tf.Variable([0.0, 0.0], dtype=dtype) + else: + var0 = tf.Variable([0.0, 0.0], dtype=dtype) + var1 = tf.Variable([0.0, 0.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllClose([0.0, 0.0], v0_val) + self.assertAllClose([0.0, 0.0], v1_val) + + # Run 3 steps FTRL + for _ in range(3): + update.run() + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-2.60260963, -4.29698515]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.28432083, -0.56694895]), v1_val + ) + + def testFtrlWithoutRegularization(self): + self.doTestFtrlwithoutRegularization(use_resource=False) + + def testResourceFtrlWithoutRegularization(self): + self.doTestFtrlwithoutRegularization(use_resource=True) + + def testFtrlwithoutRegularization2(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([4.0, 3.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 3 steps FTRL + for _ in range(3): + update.run() + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-2.55607247, -3.98729396]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.28232238, -0.56096673]), v1_val + ) + + def testMinimizeSparseResourceVariable(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + def loss(): + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + return pred * pred + + sgd_op = ftrl.Ftrl(1.0).minimize(loss, var_list=[var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllCloseAccordingToType( + [[1.0, 2.0]], self.evaluate(var0) + ) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1]], self.evaluate(var0), atol=0.01 + ) + + def testFtrlWithL1(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([4.0, 3.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=0.0, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-7.66718769, -10.91273689]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.93460727, -1.86147261]), v1_val + ) + + def testFtrlWithBeta(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([4.0, 3.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.Ftrl(3.0, initial_accumulator_value=0.1, beta=0.1) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-6.096838, -9.162214]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.717741, -1.425132]), v1_val + ) + + def testFtrlWithL2_Beta(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([4.0, 3.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.1, + beta=0.1, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-2.735487, -4.704625]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.294335, -0.586556]), v1_val + ) + + def testFtrlWithL1_L2(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([4.0, 3.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=2.0, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-0.24059935, -0.46829352]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.02406147, -0.04830509]), v1_val + ) + + def testFtrlWithL1_L2_L2Shrinkage(self): + """Test the new FTRL op with support for l2 shrinkage. + + The addition of this parameter which places a constant pressure on + weights towards the origin causes the gradient descent trajectory to + differ. The weights will tend to have smaller magnitudes with this + parameter set. + """ + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([4.0, 3.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=2.0, + l2_shrinkage_regularization_strength=0.1, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-0.22578995, -0.44345796]), v0_val + ) + self.assertAllCloseAccordingToType( + np.array([-0.14378493, -0.13229476]), v1_val + ) + + def testFtrlWithL1_L2_L2ShrinkageSparse(self): + """Tests the new FTRL op with support for l2 shrinkage on sparse + grads.""" + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) + var1 = tf.Variable([[4.0], [3.0]], dtype=dtype) + grads0 = tf.IndexedSlices( + tf.constant([0.1], shape=[1, 1], dtype=dtype), + tf.constant([0]), + tf.constant([2, 1]), + ) + grads1 = tf.IndexedSlices( + tf.constant([0.02], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + + opt = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=2.0, + l2_shrinkage_regularization_strength=0.1, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([[1.0], [2.0]], v0_val) + self.assertAllCloseAccordingToType([[4.0], [3.0]], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + [[-0.22578995], [2.0]], v0_val + ) + self.assertAllCloseAccordingToType( + [[4.0], [-0.13229476]], v1_val + ) + + def testFtrlWithL2ShrinkageDoesNotChangeLrSchedule(self): + """Verifies that l2 shrinkage in FTRL does not change lr schedule.""" + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session() as sess: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([1.0, 2.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.1, 0.2], dtype=dtype) + + opt0 = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=2.0, + l2_shrinkage_regularization_strength=0.1, + ) + opt1 = ftrl.Ftrl( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=2.0, + ) + update0 = opt0.apply_gradients([(grads0, var0)]) + update1 = opt1.apply_gradients([(grads1, var1)]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([1.0, 2.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update0.run() + update1.run() + + v0_val, v1_val = self.evaluate([var0, var1]) + # var0 is experiencing L2 shrinkage so it should be smaller than + # var1 in magnitude. + self.assertTrue((v0_val**2 < v1_val**2).all()) + accum0 = sess.run(opt0.get_slot(var0, "accumulator")) + accum1 = sess.run(opt1.get_slot(var1, "accumulator")) + # L2 shrinkage should not change how we update grad accumulator. + self.assertAllCloseAccordingToType(accum0, accum1) + + def applyOptimizer(self, opt, dtype, steps=5, is_sparse=False): + if is_sparse: + var0 = tf.Variable([[0.0], [0.0]], dtype=dtype) + var1 = tf.Variable([[0.0], [0.0]], dtype=dtype) + grads0 = tf.IndexedSlices( + tf.constant([0.1], shape=[1, 1], dtype=dtype), + tf.constant([0]), + tf.constant([2, 1]), + ) + grads1 = tf.IndexedSlices( + tf.constant([0.02], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + else: + var0 = tf.Variable([0.0, 0.0], dtype=dtype) + var1 = tf.Variable([0.0, 0.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.2], dtype=dtype) + grads1 = tf.constant([0.01, 0.02], dtype=dtype) + + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + if is_sparse: + self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val) + self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val) + else: + self.assertAllCloseAccordingToType([0.0, 0.0], v0_val) + self.assertAllCloseAccordingToType([0.0, 0.0], v1_val) + + # Run Ftrl for a few steps + for _ in range(steps): + update.run() + + v0_val, v1_val = self.evaluate([var0, var1]) + return v0_val, v1_val + + # When variables are initialized with Zero, FTRL-Proximal has two + # properties: + # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical + # with GradientDescent. + # 2. Without L1&L2 but with adaptive learning rate, FTRL-Proximal is + # identical with Adagrad. + # So, basing on these two properties, we test if our implementation of + # FTRL-Proximal performs same updates as Adagrad or GradientDescent. + def testEquivAdagradwithoutRegularization(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + val0, val1 = self.applyOptimizer( + ftrl.Ftrl( + 3.0, + # Adagrad learning rate + learning_rate_power=-0.5, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + ), + dtype, + ) + + with tf.Graph().as_default(), self.cached_session(): + val2, val3 = self.applyOptimizer( + tf.compat.v1.train.AdagradOptimizer( + 3.0, initial_accumulator_value=0.1 + ), + dtype, + ) + + self.assertAllCloseAccordingToType(val0, val2) + self.assertAllCloseAccordingToType(val1, val3) + + def testEquivSparseAdagradwithoutRegularization(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + val0, val1 = self.applyOptimizer( + ftrl.Ftrl( + 3.0, + # Adagrad learning rate + learning_rate_power=-0.5, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + ), + dtype, + is_sparse=True, + ) + + with tf.Graph().as_default(), self.cached_session(): + val2, val3 = self.applyOptimizer( + tf.compat.v1.train.AdagradOptimizer( + 3.0, initial_accumulator_value=0.1 + ), + dtype, + is_sparse=True, + ) + + self.assertAllCloseAccordingToType(val0, val2) + self.assertAllCloseAccordingToType(val1, val3) + + def testEquivSparseGradientDescentwithoutRegularization(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + val0, val1 = self.applyOptimizer( + ftrl.Ftrl( + 3.0, + # Fixed learning rate + learning_rate_power=-0.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + ), + dtype, + is_sparse=True, + ) + + with tf.Graph().as_default(), self.cached_session(): + val2, val3 = self.applyOptimizer( + tf.compat.v1.train.GradientDescentOptimizer(3.0), + dtype, + is_sparse=True, + ) + + self.assertAllCloseAccordingToType(val0, val2) + self.assertAllCloseAccordingToType(val1, val3) + + def testEquivGradientDescentwithoutRegularization(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32]: + with tf.Graph().as_default(), self.cached_session(): + val0, val1 = self.applyOptimizer( + ftrl.Ftrl( + 3.0, + # Fixed learning rate + learning_rate_power=-0.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + ), + dtype, + ) + + with tf.Graph().as_default(), self.cached_session(): + val2, val3 = self.applyOptimizer( + tf.compat.v1.train.GradientDescentOptimizer(3.0), dtype + ) + + self.assertAllCloseAccordingToType(val0, val2) + self.assertAllCloseAccordingToType(val1, val3) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/gradient_descent.py b/keras/optimizers/legacy/gradient_descent.py new file mode 100644 index 000000000000..8d305f705e6e --- /dev/null +++ b/keras/optimizers/legacy/gradient_descent.py @@ -0,0 +1,222 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""SGD optimizer implementation.""" + + +import tensorflow.compat.v2 as tf + +from keras.optimizers.legacy import optimizer_v2 + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@keras_export( + "keras.optimizers.legacy.SGD", + v1=["keras.optimizers.SGD", "keras.optimizers.legacy.SGD"], +) +class SGD(optimizer_v2.OptimizerV2): + r"""Gradient descent (with momentum) optimizer. + + Update rule for parameter `w` with gradient `g` when `momentum=0`: + + ```python + w = w - learning_rate * g + ``` + + Update rule when `momentum` is larger than 0: + + ```python + velocity = momentum * velocity - learning_rate * g + w = w + velocity + ``` + + When `nesterov=True`, this rule becomes: + + ```python + velocity = momentum * velocity - learning_rate * g + w = w + momentum * velocity - learning_rate * g + ``` + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to `0.01`. + momentum: float hyperparameter >= 0 that accelerates gradient descent in + the relevant direction and dampens oscillations. Vanilla gradient + descent means no momentum. Defaults to `0.`. + nesterov: boolean. Whether to apply Nesterov momentum. + Defaults to `False`. + name: Optional name prefix for the operations created when applying + gradients. Defaults to `"SGD"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Usage: + + >>> opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.1) + >>> var = tf.Variable(1.0) + >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 + >>> step_count = opt.minimize(loss, [var]).numpy() + >>> # Step is `- learning_rate * grad` + >>> var.numpy() + 0.9 + + >>> opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.1, momentum=0.9) + >>> var = tf.Variable(1.0) + >>> val0 = var.value() + >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 + >>> # First step is `- learning_rate * grad` + >>> step_count = opt.minimize(loss, [var]).numpy() + >>> val1 = var.value() + >>> (val0 - val1).numpy() + 0.1 + >>> # On later steps, step-size increases because of momentum + >>> step_count = opt.minimize(loss, [var]).numpy() + >>> val2 = var.value() + >>> (val1 - val2).numpy() + 0.18 + + Reference: + - For `nesterov=True`, See [Sutskever et al., 2013]( + https://github.com/mlresearch/v28/blob/gh-pages/sutskever13.pdf). + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.01, + momentum=0.0, + nesterov=False, + name="SGD", + **kwargs, + ): + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + + self._momentum = False + if ( + isinstance(momentum, tf.Tensor) + or callable(momentum) + or momentum > 0 + ): + self._momentum = True + if isinstance(momentum, (int, float)) and ( + momentum < 0 or momentum > 1 + ): + raise ValueError( + "`momentum` must be between [0, 1]. Received: " + f"momentum={momentum} (of type {type(momentum)})." + ) + self._set_hyper("momentum", momentum) + + self.nesterov = nesterov + + def _create_slots(self, var_list): + if self._momentum: + for var in var_list: + self.add_slot(var, "momentum") + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + apply_state[(var_device, var_dtype)]["momentum"] = tf.identity( + self._get_hyper("momentum", var_dtype) + ) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + if self._momentum: + momentum_var = self.get_slot(var, "momentum") + return tf.raw_ops.ResourceApplyKerasMomentum( + var=var.handle, + accum=momentum_var.handle, + lr=coefficients["lr_t"], + grad=grad, + momentum=coefficients["momentum"], + use_locking=self._use_locking, + use_nesterov=self.nesterov, + ) + else: + return tf.raw_ops.ResourceApplyGradientDescent( + var=var.handle, + alpha=coefficients["lr_t"], + delta=grad, + use_locking=self._use_locking, + ) + + def _resource_apply_sparse_duplicate_indices( + self, grad, var, indices, **kwargs + ): + if self._momentum: + return super()._resource_apply_sparse_duplicate_indices( + grad, var, indices, **kwargs + ) + else: + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = kwargs.get("apply_state", {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + return tf.raw_ops.ResourceScatterAdd( + resource=var.handle, + indices=indices, + updates=-grad * coefficients["lr_t"], + ) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + # This method is only needed for momentum optimization. + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + momentum_var = self.get_slot(var, "momentum") + return tf.raw_ops.ResourceSparseApplyKerasMomentum( + var=var.handle, + accum=momentum_var.handle, + lr=coefficients["lr_t"], + grad=grad, + indices=indices, + momentum=coefficients["momentum"], + use_locking=self._use_locking, + use_nesterov=self.nesterov, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "momentum": self._serialize_hyperparameter("momentum"), + "nesterov": self.nesterov, + } + ) + return config diff --git a/keras/optimizers/legacy/gradient_descent_test.py b/keras/optimizers/legacy/gradient_descent_test.py new file mode 100644 index 000000000000..ec5bc4e99bd7 --- /dev/null +++ b/keras/optimizers/legacy/gradient_descent_test.py @@ -0,0 +1,881 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional test for GradientDescent.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.optimizers.legacy import gradient_descent +from keras.optimizers.schedules import learning_rate_schedule +from keras.testing_infra import test_combinations + + +class GradientDescentOptimizerTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasic(self): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + sgd = gradient_descent.SGD(3.0) + sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1) + ) + + def _test_basic_sgd_with_learning_rate_decay(self, sgd, dtype): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + if not tf.executing_eagerly(): + sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 2 steps of sgd + if not tf.executing_eagerly(): + self.evaluate(sgd_op) + else: + sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) + # Validate updated params + self.assertAllCloseAccordingToType( + [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1) + ) + + if not tf.executing_eagerly(): + self.evaluate(sgd_op) + else: + sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) + # Validate updated params + self.assertAllCloseAccordingToType( + [1.0 - 3.0 * 0.1 - 2.0 * 0.1, 2.0 - 3.0 * 0.1 - 2.0 * 0.1], + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * 0.01 - 2.0 * 0.01, 4.0 - 3.0 * 0.01 - 2.0 * 0.01], + self.evaluate(var1), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicWithLearningRateDecay(self): + for dtype in [tf.half, tf.float32, tf.float64]: + learning_rate = 3.0 + decay = 0.5 + sgd = gradient_descent.SGD(learning_rate=learning_rate, decay=decay) + self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicWithLearningRateInverseTimeDecay(self): + for dtype in [tf.half, tf.float32, tf.float64]: + learning_rate = learning_rate_schedule.InverseTimeDecay( + 3.0, decay_steps=1.0, decay_rate=0.5 + ) + sgd = gradient_descent.SGD(learning_rate=learning_rate) + self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self): + for dtype in [tf.half, tf.float32, tf.float64]: + learning_rate = learning_rate_schedule.InverseTimeDecay( + 3.0, decay_steps=1.0, decay_rate=0.5 + ) + sgd = gradient_descent.SGD(learning_rate=learning_rate) + sgd = gradient_descent.SGD.from_config(sgd.get_config()) + self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasicCallableParams(self): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + lr = lambda: 3.0 + sgd = gradient_descent.SGD(lr) + sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testMinimizeResourceVariable(self): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + var1 = tf.Variable([3.0], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + loss = lambda: tf.matmul(var0, x) + var1 + sgd = gradient_descent.SGD(1.0) + sgd_op = sgd.minimize(loss, [var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[1.0 - 4.0, 2.0 - 5.0]], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) + + def testMinimizeSparseResourceVariable(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + var1 = tf.Variable([3.0], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + def loss(): + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + pred += var1 + return pred * pred + + sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 + np_grad = 2 * np_pred + self.assertAllCloseAccordingToType( + [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + [3.0 - np_grad], self.evaluate(var1) + ) + + def testTensorLearningRate(self): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + lrate = tf.constant(3.0) + sgd_op = gradient_descent.SGD(lrate).apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1) + ) + + def testGradWrtRef(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + opt = gradient_descent.SGD(3.0) + values = [1.0, 3.0] + vars_ = [tf.Variable([v], dtype=dtype) for v in values] + loss = lambda: vars_[0] + vars_[1] + grads_and_vars = opt._compute_gradients(loss, vars_) + self.evaluate(tf.compat.v1.global_variables_initializer()) + for grad, _ in grads_and_vars: + self.assertAllCloseAccordingToType( + [1.0], self.evaluate(grad) + ) + + def testSparseBasic(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) + var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = tf.IndexedSlices( + tf.constant([0.1], shape=[1, 1], dtype=dtype), + tf.constant([0]), + tf.constant([2, 1]), + ) + grads1 = tf.IndexedSlices( + tf.constant([0.01], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + sgd_op = gradient_descent.SGD(3.0).apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[1.0 - 3.0 * 0.1], [2.0]], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [[3.0], [4.0 - 3.0 * 0.01]], self.evaluate(var1) + ) + + def testSparseBasicWithLearningRateDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) + var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = tf.IndexedSlices( + tf.constant([0.1], shape=[1, 1], dtype=dtype), + tf.constant([0]), + tf.constant([2, 1]), + ) + grads1 = tf.IndexedSlices( + tf.constant([0.01], shape=[1, 1], dtype=dtype), + tf.constant([1]), + tf.constant([2, 1]), + ) + sgd_op = gradient_descent.SGD(3.0, decay=0.5).apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 2 steps of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[1.0 - 3.0 * 0.1], [2.0]], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [[3.0], [4.0 - 3.0 * 0.01]], self.evaluate(var1) + ) + + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], + self.evaluate(var1), + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCapturingInFunctionWhileExecutingEagerly(self): + optimizer = gradient_descent.SGD(1.0) + + var_holder = {} + + def step(): + if not var_holder: + var_holder["var"] = tf.Variable(1.0) + else: + var_holder["var"].assign(1.0) + + with tf.GradientTape() as tape: + loss = var_holder["var"] ** 2 + grad = tape.gradient(loss, var_holder["var"]) + optimizer.apply_gradients([(grad, var_holder["var"])]) + return var_holder["var"].read_value() + + compiled_step = tf.function(step) + + self.assertEqual(float(step()), -1.0) + self.assertEqual(float(compiled_step()), -1.0) + # This shouldn't fail; in particular, the learning rate tensor should + # be an EagerTensor once again, not a graph Tensor. + self.assertEqual(float(step()), -1.0) + + def testConstructSGDWithLR(self): + opt = gradient_descent.SGD(lr=1.0) + opt_2 = gradient_descent.SGD(learning_rate=0.1, lr=1.0) + opt_3 = gradient_descent.SGD(learning_rate=0.1) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + +class MomentumOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum): + accum = accum * momentum - g * lr + var += accum * momentum - g * lr + return var, accum + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasic(self): + for _, dtype in enumerate([tf.half, tf.float32, tf.float64]): + var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0") + var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1") + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + learning_rate = 2.0 + momentum = 0.9 + mom_opt = gradient_descent.SGD( + learning_rate=learning_rate, momentum=momentum + ) + # self.assertFalse(mom_opt._initial_decay) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + + # Step 1: the momentum accumulators where 0. So we should see a + # normal update: v -= grad * learning_rate + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0) + ) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1) + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1), + ) + # Step 2: the momentum accumulators contain the previous update. + self.evaluate(mom_update) + if tf.executing_eagerly(): + mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array( + [(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)] + ), + self.evaluate(slot0), + ) + self.assertAllCloseAccordingToType( + np.array( + [(0.9 * (-0.02) - 2.0 * 0.01), (0.9 * (-0.02) - 2.0 * 0.01)] + ), + self.evaluate(slot1), + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array( + [ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + ] + ), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0), + ] + ), + self.evaluate(var1), + ) + + def testNesterovMomentum(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.float32, tf.float64]: + var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0") + var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1") + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + loss = lambda: 5 * var0 * var0 + 3 * var1 + mom_op = gradient_descent.SGD( + learning_rate=2.0, momentum=0.9, nesterov=True + ) + opt_op = mom_op.minimize(loss, [var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + for _ in range(1, 5): + self.evaluate(opt_op) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9 + ) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9 + ) + self.assertAllClose(var0_np, self.evaluate(var0)) + self.assertAllClose(var1_np, self.evaluate(var1)) + + def testSparseNesterovMomentum(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session() as sess: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + grads = [] + for t in range(1, 5): + grads.append(var0_np * 10) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9 + ) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9 + ) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + var0 = tf.Variable(var0_np, dtype=dtype, name="var0") + var1 = tf.Variable(var1_np, dtype=dtype, name="var1") + mom_op = gradient_descent.SGD( + learning_rate=2.0, momentum=0.9, nesterov=True + ) + x_feed = tf.compat.v1.placeholder(dtype) + y_feed = tf.IndexedSlices( + x_feed, tf.constant([0, 1]), tf.constant([2]) + ) + grads_and_vars = [ + (y_feed, var0), + (tf.constant([3.0, 3.0], dtype=dtype), var1), + ] + opt_update = mom_op.apply_gradients(grads_and_vars) + self.evaluate(tf.compat.v1.global_variables_initializer()) + for t in range(1, 5): + sess.run(opt_update, feed_dict={x_feed: grads[t - 1]}) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9 + ) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9 + ) + self.assertAllClose(var0_np, self.evaluate(var0)) + self.assertAllClose(var1_np, self.evaluate(var1)) + + def testMinimizeSparseResourceVariable(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + + def loss(): + x = tf.constant([[4.0], [5.0]], dtype=dtype) + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + return pred * pred + + opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) + sgd_op = opt.minimize(loss, [var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], self.evaluate(var0) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testMinimizeWith2DIndicesForEmbeddingLookup(self): + var0 = tf.Variable(tf.ones([2, 2])) + + def loss(): + return tf.reduce_sum(tf.compat.v1.nn.embedding_lookup(var0, [[1]])) + + opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) + sgd_op = opt.minimize(loss, [var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(sgd_op) + self.assertAllCloseAccordingToType( + [[1, 1], [0, 0]], self.evaluate(var0) + ) + + def testTensorLearningRateAndMomentum(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + mom_opt = gradient_descent.SGD( + learning_rate=tf.constant(2.0), momentum=tf.constant(0.9) + ) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Step 1: the momentum accumulators where 0. So we should see a + # normal update: v -= grad * learning_rate + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0) + ) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1) + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1), + ) + # Step 2: the momentum accumulators contain the previous update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array( + [(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)] + ), + self.evaluate(slot0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + (0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01), + ] + ), + self.evaluate(slot1), + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array( + [ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + ] + ), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0), + ] + ), + self.evaluate(var1), + ) + + def testSparse(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) + var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) + grads0 = tf.IndexedSlices( + tf.constant([[0.1, 0.1]], dtype=dtype), + tf.constant([1]), + tf.constant([4, 2]), + ) + grads1 = tf.IndexedSlices( + tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype), + tf.constant([2, 3]), + tf.constant([4, 2]), + ) + mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + + # Fetch params to validate initial values + self.assertAllClose([0, 0], self.evaluate(var0)[0]) + self.assertAllClose([0, 0], self.evaluate(var0)[1]) + self.assertAllClose([1, 1], self.evaluate(var1)[2]) + + # Step 1: the momentum accumulators are 0. So we should see a + # normal update: v -= grad * learning_rate + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([0, 0]), self.evaluate(slot0)[0] + ) + self.assertAllCloseAccordingToType( + np.array([-2.0 * 0.1, -2.0 * 0.1]), self.evaluate(slot0)[1] + ) + self.assertAllCloseAccordingToType( + np.array([-2.0 * 0.01, -2.0 * 0.01]), + self.evaluate(slot1)[2], + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([0, 0]), self.evaluate(var0)[0] + ) + self.assertAllCloseAccordingToType( + np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), + self.evaluate(var0)[1], + ) + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), + self.evaluate(var1)[2], + ) + # Step 2: the momentum accumulators contain the previous update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) + self.assertAllCloseAccordingToType( + np.array( + [(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)] + ), + self.evaluate(slot0)[1], + ) + self.assertAllCloseAccordingToType( + np.array( + [ + (0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01), + ] + ), + self.evaluate(slot1)[2], + ) + # Check that the parameters have been updated. + self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) + self.assertAllCloseAccordingToType( + np.array( + [ + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + ] + ), + self.evaluate(var0)[1], + ) + self.assertAllCloseAccordingToType( + np.array( + [ + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), + ] + ), + self.evaluate(var1)[2], + ) + + def testSharing(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) + mom_update1 = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + mom_update2 = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Step 1: the momentum accumulators where 0. So we should see a + # normal update: v -= grad * learning_rate + self.evaluate(mom_update1) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0) + ) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1) + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1), + ) + # Step 2: the second momentum accumulators contain the previous + # update. + self.evaluate(mom_update2) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array( + [(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)] + ), + self.evaluate(slot0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + (0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01), + ] + ), + self.evaluate(slot1), + ) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array( + [ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + ] + ), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0), + ] + ), + self.evaluate(var1), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testConfig(self): + opt = gradient_descent.SGD( + learning_rate=1.0, momentum=0.9, nesterov=True + ) + config = opt.get_config() + opt2 = gradient_descent.SGD.from_config(config) + lr = opt.lr + lr2 = opt2.lr + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(lr), self.evaluate(lr2)) + self.assertAllClose( + self.evaluate(opt._get_hyper("momentum")), + self.evaluate(opt2._get_hyper("momentum")), + ) + self.assertAllClose( + self.evaluate(opt._get_hyper("decay")), + self.evaluate(opt2._get_hyper("decay")), + ) + var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) + loss = lambda: 3 * var0 + # learning rate variable created when calling minimize. + opt.minimize(loss, [var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + config = opt.get_config() + opt3 = gradient_descent.SGD.from_config(config) + lr3 = opt3.lr + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(lr), self.evaluate(lr3)) + self.assertAllClose( + self.evaluate(opt._get_hyper("momentum")), + self.evaluate(opt3._get_hyper("momentum")), + ) + self.assertAllClose( + self.evaluate(opt._get_hyper("decay")), + self.evaluate(opt3._get_hyper("decay")), + ) + self.assertTrue(opt3.nesterov) + + def testNesterovWithoutMomentum(self): + with self.assertRaisesRegex(ValueError, "must be between"): + gradient_descent.SGD(learning_rate=1.0, momentum=2.0) + + def testConstructMomentumWithLR(self): + opt = gradient_descent.SGD(lr=1.0, momentum=0.9) + opt_2 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9, lr=1.0) + opt_3 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testMinimizeLossTensor(self): + for dtype in [tf.half, tf.float32, tf.float64]: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + var1 = tf.Variable([3.0], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + tape = tf.GradientTape() + with tape: + loss = tf.matmul(var0, x) + var1 + sgd = gradient_descent.SGD(1.0) + with self.assertRaisesRegex(ValueError, "`tape` is required"): + sgd.minimize(loss, [var0, var1]) + sgd.minimize(loss, [var0, var1], tape=tape) + + self.assertAllCloseAccordingToType( + [[1.0 - 4.0, 2.0 - 5.0]], self.evaluate(var0) + ) + self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/nadam.py b/keras/optimizers/legacy/nadam.py index 6884e964e5c5..263ccca4a649 100644 --- a/keras/optimizers/legacy/nadam.py +++ b/keras/optimizers/legacy/nadam.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,243 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy Nadam optimizer implementation.""" +"""Nadam optimizer implementation.""" -from keras.optimizers.optimizer_v2 import nadam +import tensorflow.compat.v2 as tf +from keras import backend_config +from keras.optimizers.legacy import optimizer_v2 +from keras.optimizers.schedules import learning_rate_schedule + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.Nadam') -class Nadam(nadam.Nadam): - pass +@keras_export( + "keras.optimizers.legacy.Nadam", + v1=["keras.optimizers.Nadam", "keras.optimizers.legacy.Nadam"], +) +class Nadam(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the NAdam algorithm. + Much like Adam is essentially RMSprop with momentum, Nadam is Adam with + Nesterov momentum. + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + beta_1: A float value or a constant float tensor. The exponential decay + rate for the 1st moment estimates. + beta_2: A float value or a constant float tensor. The exponential decay + rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + name: Optional name for the operations created when applying gradients. + Defaults to `"Nadam"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Usage Example: + >>> opt = tf.keras.optimizers.legacy.Nadam(learning_rate=0.2) + >>> var1 = tf.Variable(10.0) + >>> loss = lambda: (var1 ** 2) / 2.0 + >>> step_count = opt.minimize(loss, [var1]).numpy() + >>> "{:.1f}".format(var1.numpy()) + 9.8 + + Reference: + - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf). + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + name="Nadam", + **kwargs + ): + # Backwards compatibility with keras NAdam optimizer. + kwargs["decay"] = kwargs.pop("schedule_decay", 0.004) + learning_rate = kwargs.get("lr", learning_rate) + if isinstance( + learning_rate, learning_rate_schedule.LearningRateSchedule + ): + raise ValueError( + "The Nadam optimizer does not support " + "tf.keras.optimizers.LearningRateSchedules as the " + "learning rate." + ) + + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("beta_1", beta_1) + self._set_hyper("beta_2", beta_2) + self.epsilon = epsilon or backend_config.epsilon() + self._m_cache = None + + def _create_slots(self, var_list): + var_dtype = var_list[0].dtype.base_dtype + if self._m_cache is None: + self._m_cache = self.add_weight( + "momentum_cache", + shape=[], + dtype=var_dtype, + initializer="ones", + trainable=False, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + ) + self._weights.append(self._m_cache) + # Separate for-loops to respect the ordering of slot variables from v1. + for var in var_list: + # Create slots for the first moments. + self.add_slot(var, "m") + for var in var_list: + # Create slots for the second moments. + self.add_slot(var, "v") + + def _prepare_local(self, var_device, var_dtype, apply_state): + lr_t = tf.identity(self._get_hyper("learning_rate", var_dtype)) + beta_1_t = tf.identity(self._get_hyper("beta_1", var_dtype)) + beta_2_t = tf.identity(self._get_hyper("beta_2", var_dtype)) + local_step = tf.cast(self.iterations + 1, var_dtype) + next_step = tf.cast(self.iterations + 2, var_dtype) + + decay_base = tf.cast(0.96, var_dtype) + + m_t = beta_1_t * ( + 1.0 - 0.5 * (tf.pow(decay_base, self._initial_decay * local_step)) + ) + m_t_1 = beta_1_t * ( + 1.0 - 0.5 * (tf.pow(decay_base, self._initial_decay * next_step)) + ) + + m_schedule_new = tf.cast(self._m_cache_read, var_dtype) * m_t + if var_dtype is self._m_cache.dtype: + m_schedule_new = tf.identity( + tf.compat.v1.assign( + self._m_cache, m_schedule_new, use_locking=self._use_locking + ) + ) + m_schedule_next = m_schedule_new * m_t_1 + + apply_state[(var_device, var_dtype)] = dict( + lr_t=lr_t, + neg_lr_t=-lr_t, + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + beta_1_t=beta_1_t, + beta_2_t=beta_2_t, + m_t=m_t, + m_t_1=m_t_1, + one_minus_beta_1_t=1 - beta_1_t, + one_minus_beta_2_t=1 - beta_2_t, + one_minus_m_t=1.0 - m_t, + one_minus_m_schedule_new=1.0 - m_schedule_new, + one_minus_m_schedule_next=1.0 - m_schedule_next, + v_t_prime_denominator=1.0 - tf.pow(beta_2_t, local_step), + ) + + def _prepare(self, var_list): + # Get the value of the momentum cache before starting to apply + # gradients. + self._m_cache_read = tf.identity(self._m_cache) + return super()._prepare(var_list) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + + g_prime = grad / coefficients["one_minus_m_schedule_new"] + m_t = ( + coefficients["beta_1_t"] * m + + coefficients["one_minus_beta_1_t"] * grad + ) + m_t = tf.compat.v1.assign(m, m_t, use_locking=self._use_locking) + m_t_prime = m_t / coefficients["one_minus_m_schedule_next"] + v_t = coefficients["beta_2_t"] * v + coefficients[ + "one_minus_beta_2_t" + ] * tf.square(grad) + v_t = tf.compat.v1.assign(v, v_t, use_locking=self._use_locking) + v_t_prime = v_t / coefficients["v_t_prime_denominator"] + m_t_bar = ( + coefficients["one_minus_m_t"] * g_prime + + coefficients["m_t_1"] * m_t_prime + ) + var_t = var - coefficients["lr_t"] * m_t_bar / ( + tf.sqrt(v_t_prime) + coefficients["epsilon"] + ) + return tf.compat.v1.assign(var, var_t, use_locking=self._use_locking).op + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + + g_prime = grad / coefficients["one_minus_m_schedule_new"] + + # m_t = beta1 * m + (1 - beta1) * g_t + m_scaled_g_values = grad * coefficients["one_minus_beta_1_t"] + m_t = tf.compat.v1.assign( + m, m * coefficients["beta_1_t"], use_locking=self._use_locking + ) + + with tf.control_dependencies([m_t]): + m_t = self._resource_scatter_add(m, indices, m_scaled_g_values) + m_t_slice = tf.gather(m_t, indices) + + m_t_prime = m_t_slice / coefficients["one_minus_m_schedule_next"] + m_t_bar = ( + coefficients["one_minus_m_t"] * g_prime + + coefficients["m_t_1"] * m_t_prime + ) + + # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) + v_scaled_g_values = (grad * grad) * coefficients["one_minus_beta_2_t"] + v_t = tf.compat.v1.assign( + v, v * coefficients["beta_2_t"], use_locking=self._use_locking + ) + + with tf.control_dependencies([v_t]): + v_t = self._resource_scatter_add(v, indices, v_scaled_g_values) + v_t_slice = tf.gather(v_t, indices) + + v_t_prime = v_t_slice / coefficients["v_t_prime_denominator"] + v_prime_sqrt_plus_eps = tf.sqrt(v_t_prime) + coefficients["epsilon"] + + var_update = self._resource_scatter_add( + var, + indices, + coefficients["neg_lr_t"] * m_t_bar / v_prime_sqrt_plus_eps, + ) + return tf.group(*[var_update, m_t_bar, v_t]) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "beta_1": self._serialize_hyperparameter("beta_1"), + "beta_2": self._serialize_hyperparameter("beta_2"), + "epsilon": self.epsilon, + } + ) + return config diff --git a/keras/optimizers/legacy/nadam_test.py b/keras/optimizers/legacy/nadam_test.py new file mode 100644 index 000000000000..aee3453c42f1 --- /dev/null +++ b/keras/optimizers/legacy/nadam_test.py @@ -0,0 +1,203 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Nadam.""" + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras.optimizers.legacy import nadam + + +def get_beta_accumulators(opt, dtype): + local_step = tf.cast(opt.iterations + 1, dtype) + beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype) + beta_1_power = tf.pow(beta_1_t, local_step) + beta_2_t = tf.cast(opt._get_hyper("beta_2"), dtype) + beta_2_power = tf.pow(beta_2_t, local_step) + return (beta_1_power, beta_2_power) + + +def update_m_cache(m_cache, t, beta1=0.9): + mu_t = beta1 * (1 - 0.5 * 0.96 ** (0.004 * (t + 1))) + m_cache_t = m_cache * mu_t + return m_cache_t + + +def nadam_update_numpy( + param, + g_t, + t, + m, + v, + m_cache, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, +): + + mu_t = beta1 * (1 - 0.5 * 0.96 ** (0.004 * (t + 1))) + mu_t_1 = beta1 * (1 - 0.5 * 0.96 ** (0.004 * (t + 2))) + m_cache_t_1 = m_cache * mu_t_1 + g_prime_t = g_t / (1 - m_cache) + m_t = beta1 * m + (1 - beta1) * g_t + v_t = beta2 * v + (1 - beta2) * g_t * g_t + + m_prime_t = m_t / (1 - m_cache_t_1) + v_prime_t = v_t / (1 - beta2 ** (t + 1)) + m_bar_t = (1 - mu_t) * g_prime_t + mu_t_1 * m_prime_t + + param_t = param - alpha * m_bar_t / (np.sqrt(v_prime_t) + epsilon) + return param_t, m_t, v_t + + +class NadamOptimizerTest(tf.test.TestCase): + def testSparse(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + sparse_epsilon = 1e-7 + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1, mcache = 0.0, 0.0, 0.0, 0.0, 1.0 + var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array( + [0.01, 0, 0.01], dtype=dtype.as_numpy_dtype + ) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0_np_indices = np.array([0, 2], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np[grads0_np_indices]), + tf.constant(grads0_np_indices), + tf.constant([3]), + ) + grads1_np_indices = np.array([0, 2], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np[grads1_np_indices]), + tf.constant(grads1_np_indices), + tf.constant([3]), + ) + opt = nadam.Nadam(epsilon=sparse_epsilon) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 1.0, 2.0], var0) + self.assertAllClose([3.0, 3.0, 4.0], var1) + + beta1_power, beta2_power = get_beta_accumulators(opt, dtype) + + # Run 3 steps of Nadam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9 ** (t + 1), beta1_power + ) + self.assertAllCloseAccordingToType( + 0.999 ** (t + 1), beta2_power + ) + update.run() + + mcache = update_m_cache(mcache, t) + var0_np, m0, v0 = nadam_update_numpy( + var0_np, + grads0_np, + t, + m0, + v0, + mcache, + epsilon=sparse_epsilon, + ) + var1_np, m1, v1 = nadam_update_numpy( + var1_np, + grads1_np, + t, + m1, + v1, + mcache, + epsilon=sparse_epsilon, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + def testBasic(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for dtype in [tf.half, tf.float32, tf.float64]: + with tf.Graph().as_default(), self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1, mcache = 0.0, 0.0, 0.0, 0.0, 1.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + opt = nadam.Nadam() + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0) + self.assertAllClose([3.0, 4.0], var1) + + # Run 3 steps of Nadam + for t in range(3): + update.run() + + mcache = update_m_cache(mcache, t) + var0_np, m0, v0 = nadam_update_numpy( + var0_np, grads0_np, t, m0, v0, mcache + ) + var1_np, m1, v1 = nadam_update_numpy( + var1_np, grads1_np, t, m1, v1, mcache + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + def testConstructNAdamWithLR(self): + opt = nadam.Nadam(lr=1.0) + opt_2 = nadam.Nadam(learning_rate=0.1, lr=1.0) + opt_3 = nadam.Nadam(learning_rate=0.1) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + def testConstructNAdamWithScheduleDecay(self): + opt = nadam.Nadam(schedule_decay=0.2) + self.assertIsInstance(opt.decay, tf.Variable) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.decay), (0.2)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/optimizer_test.py b/keras/optimizers/legacy/optimizer_test.py deleted file mode 100644 index 9c8604509e29..000000000000 --- a/keras/optimizers/legacy/optimizer_test.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Tests for optimizer.""" - -from absl.testing import parameterized -import keras -from keras.optimizers.legacy import adadelta -from keras.optimizers.legacy import adagrad -from keras.optimizers.legacy import adam -from keras.optimizers.legacy import adamax -from keras.optimizers.legacy import ftrl -from keras.optimizers.legacy import nadam -from keras.optimizers.legacy import rmsprop -from keras.optimizers.legacy import sgd -import tensorflow.compat.v2 as tf - -adadelta_fn = tf.__internal__.test.combinations.NamedObject( - "adadelta", lambda: adadelta.Adadelta(0.002)) -adagrad_fn = tf.__internal__.test.combinations.NamedObject( - "adagrad", lambda: adagrad.Adagrad(0.002)) -adam_fn = tf.__internal__.test.combinations.NamedObject( - "adam", lambda: adam.Adam(0.002)) -adamax_fn = tf.__internal__.test.combinations.NamedObject( - "adamax", lambda: adamax.Adamax(0.002)) -ftrl_fn = tf.__internal__.test.combinations.NamedObject( - "ftrl", lambda: ftrl.Ftrl(0.002)) -gradient_descent_fn = tf.__internal__.test.combinations.NamedObject( - "sgd", lambda: sgd.SGD(0.002)) -nadam_fn = tf.__internal__.test.combinations.NamedObject( - "nadam", lambda: nadam.Nadam(0.002)) -rmsprop_fn = tf.__internal__.test.combinations.NamedObject( - "rmsprop", lambda: rmsprop.RMSprop(0.002)) - -OPTIMIZER_FN = [ - adadelta_fn, - adagrad_fn, - adam_fn, - adamax_fn, - ftrl_fn, - gradient_descent_fn, - nadam_fn, - rmsprop_fn, -] - - -class OptimizerFuntionalityTest(tf.test.TestCase, parameterized.TestCase): - """Test the functionality of optimizer.""" - - @parameterized.product(optimizer_fn=OPTIMIZER_FN) - def testModelFit(self, optimizer_fn): - model = keras.Sequential( - [keras.layers.Input(shape=(1,)), - keras.layers.Dense(1)]) - optimizer = optimizer_fn() - x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) - y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) - model.compile(loss="mse", optimizer=optimizer) - model.fit(x, y, epochs=1, steps_per_epoch=5) - - -if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/optimizers/legacy/optimizer_v2.py b/keras/optimizers/legacy/optimizer_v2.py new file mode 100644 index 000000000000..984d721f0b37 --- /dev/null +++ b/keras/optimizers/legacy/optimizer_v2.py @@ -0,0 +1,1727 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Version 2 of class Optimizer.""" + + +import abc +import contextlib +import functools +import warnings +from copy import deepcopy + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras import initializers +from keras.engine import base_layer_utils +from keras.optimizers import utils as optimizer_utils +from keras.optimizers.schedules import learning_rate_schedule +from keras.utils import generic_utils +from keras.utils import layer_utils +from keras.utils import tf_inspect +from keras.utils import tf_utils + +# isort: off +from tensorflow.python.util.tf_export import keras_export + +keras_optimizers_gauge = tf.__internal__.monitoring.BoolGauge( + "/tensorflow/api/keras/optimizers", "keras optimizer usage", "method" +) + +_DEFAULT_VALID_DTYPES = frozenset( + [ + tf.float16, + tf.bfloat16, + tf.float32, + tf.float64, + tf.complex64, + tf.complex128, + ] +) + + +def _deduplicate_indexed_slices(values, indices): + """Sums `values` associated with any non-unique `indices`. + + Args: + values: A `Tensor` with rank >= 1. + indices: A one-dimensional integer `Tensor`, indexing into the first + dimension of `values` (as in an IndexedSlices object). + + Returns: + A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a + de-duplicated version of `indices` and `summed_values` contains the sum of + `values` slices associated with each unique index. + """ + unique_indices, new_index_positions = tf.unique(indices) + summed_values = tf.math.unsorted_segment_sum( + values, new_index_positions, tf.shape(unique_indices)[0] + ) + return (summed_values, unique_indices) + + +class NullContextmanager: + def __init__(self, *args, **kwargs): + pass + + def __enter__(self): + pass + + def __exit__(self, type_arg, value_arg, traceback_arg): + return False # False values do not suppress exceptions + + +def name_scope_only_in_function_or_graph(name): + """Internal-only entry point for `name_scope*`. + + Enters a compat.v1.name_scope only when in a function or graph, + not when running fully eagerly. + + Args: + name: The name argument that is passed to the op function. + + Returns: + `name_scope*` context manager. + """ + if not tf.executing_eagerly(): + return tf.name_scope(name) + else: + return NullContextmanager() + + +@keras_export( + "keras.optimizers.legacy.Optimizer", + v1=["keras.optimizers.Optimizer", "keras.optimizers.legacy.Optimizer"], +) +class OptimizerV2(tf.__internal__.tracking.Trackable): + """Base class for legacy Keras optimizers. + + You should not use this class directly, but instead instantiate one of its + subclasses such as `tf.keras.optimizers.legacy.SGD`, + `tf.keras.optimizers.legacy.Adam`, etc. + + This is the default Keras optimizer base class until v2.10 (included). + In v2.11 and later, `tf.keras.optimizers.Optimizer` + points to a new base class implementation. The legacy class won't be + deleted in the future and will continue to be available at + `tf.keras.optimizers.legacy.Optimizer`. + + ### Usage + + ```python + # Create an optimizer with the desired parameters. + opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.1) + # `loss` is a callable that takes no argument and returns the value + # to minimize. + var1 = tf.Variable(2.0) + var2 = tf.Variable(5.0) + loss = lambda: 3 * var1 * var1 + 2 * var2 * var2 + # In graph mode, returns op that minimizes the loss by updating the listed + # variables. + opt_op = opt.minimize(loss, var_list=[var1, var2]) + opt_op.run() + # In eager mode, simply call minimize to update the list of variables. + opt.minimize(loss, var_list=[var1, var2]) + ``` + + ### Usage in custom training loops + + In Keras models, sometimes variables are created when the model is first + called, instead of construction time. Examples include 1) sequential models + without input shape pre-defined, or 2) subclassed models. Pass var_list as + callable in these cases. + + Example: + + ```python + opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.1) + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(num_hidden, activation='relu')) + model.add(tf.keras.layers.Dense(num_classes, activation='sigmoid')) + loss_fn = lambda: tf.keras.losses.mse(model(input), output) + var_list_fn = lambda: model.trainable_weights + for input, output in data: + opt.minimize(loss_fn, var_list_fn) + ``` + + ### Processing gradients before applying them + + Calling `minimize()` takes care of both computing the gradients and + applying them to the variables. If you want to process the gradients + before applying them you can instead use the optimizer in three steps: + + 1. Compute the gradients with `tf.GradientTape`. + 2. Process the gradients as you wish. + 3. Apply the processed gradients with `apply_gradients()`. + + Example: + + ```python + # Create an optimizer. + opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.1) + + # Compute the gradients for a list of variables. + with tf.GradientTape() as tape: + loss = + vars = + grads = tape.gradient(loss, vars) + + # Process the gradients, for example cap them, etc. + # capped_grads = [MyCapper(g) for g in grads] + processed_grads = [process_gradient(g) for g in grads] + + # Ask the optimizer to apply the processed gradients. + opt.apply_gradients(zip(processed_grads, var_list)) + ``` + + ### Use with `tf.distribute.Strategy` + + This optimizer class is `tf.distribute.Strategy` aware, which means it + automatically sums gradients across all replicas. To average gradients, + you divide your loss by the global batch size, which is done + automatically if you use `tf.keras` built-in training or evaluation loops. + See the `reduction` argument of your loss which should be set to + `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` for averaging or + `tf.keras.losses.Reduction.SUM` for not. + + To aggregate gradients yourself, call `apply_gradients` with + `experimental_aggregate_gradients` set to False. This is useful if you need + to process aggregated gradients. + + If you are not using these and you want to average gradients, you should use + `tf.math.reduce_sum` to add up your per-example losses and then divide by + the global batch size. Note that when using `tf.distribute.Strategy`, the + first component of a tensor's shape is the *replica-local* batch size, which + is off by a factor equal to the number of replicas being used to compute a + single step. As a result, using `tf.math.reduce_mean` will give the wrong + answer, resulting in gradients that can be many times too big. + + ### Variable Constraints + + All Keras optimizers respect variable constraints. If constraint function is + passed to any variable, the constraint will be applied to the variable after + the gradient has been applied to the variable. + Important: If gradient is sparse tensor, variable constraint is not + supported. + + ### Thread Compatibility + + The entire optimizer is currently thread compatible, not thread-safe. The + user needs to perform synchronization if necessary. + + ### Slots + + Many optimizer subclasses, such as `Adam` and `Adagrad` allocate and manage + additional variables associated with the variables to train. These are + called Slots. Slots have names and you can ask the optimizer for the + names of the slots that it uses. Once you have a slot name you can ask the + optimizer for the variable it created to hold the slot value. + + This can be useful if you want to log debug a training algorithm, report + stats about the slots, etc. + + ### Hyperparameters + + These are arguments passed to the optimizer subclass constructor + (the `__init__` method), and then passed to `self._set_hyper()`. + They can be either regular Python values (like 1.0), tensors, or + callables. If they are callable, the callable will be called during + `apply_gradients()` to get the value for the hyper parameter. + + Hyperparameters can be overwritten through user code: + + Example: + + ```python + # Create an optimizer with the desired parameters. + opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.1) + # `loss` is a callable that takes no argument and returns the value + # to minimize. + loss = lambda: 3 * var1 + 2 * var2 + # In eager mode, simply call minimize to update the list of variables. + opt.minimize(loss, var_list=[var1, var2]) + # update learning rate + opt.learning_rate = 0.05 + opt.minimize(loss, var_list=[var1, var2]) + ``` + + ### Callable learning rate + + Optimizer accepts a callable learning rate in two ways. The first way is + through built-in or customized + `tf.keras.optimizers.schedules.LearningRateSchedule`. The schedule will be + called on each iteration with `schedule(iteration)`, a `tf.Variable` + owned by the optimizer. + + Example: + + >>> var = tf.Variable(np.random.random(size=(1,))) + >>> learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( + ... initial_learning_rate=.01, decay_steps=20, decay_rate=.1) + >>> opt = tf.keras.optimizers.legacy.SGD(learning_rate=learning_rate) + >>> loss = lambda: 3 * var + >>> opt.minimize(loss, var_list=[var]) + >> var = tf.Variable(np.random.random(size=(1,))) + >>> def lr_callable(): + ... return .1 + >>> opt = tf.keras.optimizers.legacy.SGD(learning_rate=lr_callable) + >>> loss = lambda: 3 * var + >>> opt.minimize(loss, var_list=[var]) + = 0, received: {kwargs[k]}") + if k == "lr": + warnings.warn( + "The `lr` argument is deprecated, " + "use `learning_rate` instead.", + stacklevel=2, + ) + + self._use_locking = True + self._init_set_name(name) + self._hyper = {} + # dict: {variable name : {slot name : variable}} + self._slots = {} + self._slot_names = [] + self._weights = [] + self._iterations = None + + # For implementing Trackable. Stores information about how to restore + # slot variables which have not yet been created + # (trackable._CheckpointPosition objects). + # {slot_name : + # {_var_key(variable_to_train): [checkpoint_position, ... ], ... }, + # ... } + self._deferred_slot_restorations = {} + + decay = kwargs.pop("decay", 0.0) + if decay < 0.0: + raise ValueError( + f"decay cannot be less than 0. Received: decay={decay}." + ) + self._initial_decay = decay + + self._hypers_created = False + # Store the distribution strategy object if the optimizer is created + # inside strategy scope, so it could be used to create variables later. + if tf.distribute.has_strategy(): + self._distribution_strategy = tf.distribute.get_strategy() + else: + self._distribution_strategy = None + + # Configure gradient transformations. + if gradient_aggregator is None: + gradient_aggregator = optimizer_utils.all_reduce_sum_gradients + self.gradient_aggregator = gradient_aggregator + if gradient_transformers is None: + gradient_transformers = [] + self.gradient_transformers = gradient_transformers + self.clipnorm = kwargs.pop("clipnorm", None) + self.global_clipnorm = kwargs.pop("global_clipnorm", None) + if self.clipnorm is not None and self.global_clipnorm is not None: + raise ValueError( + "Cannot accept both `clipnorm` and `global_clipnorm`. " + "Received: `clipnorm`={}, `global_clipnorm`={}.".format( + self.clipnorm, self.global_clipnorm + ) + ) + self.clipvalue = kwargs.pop("clipvalue", None) + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + # DistributionStrategy singleton cannot be serialized + if k == "_distribution_strategy": + continue + setattr(result, k, deepcopy(v, memo)) + result._distribution_strategy = self._distribution_strategy + return result + + @property + def clipnorm(self): + """`float` or `None`. If set, clips gradients to a maximum norm.""" + return self._clipnorm + + @property + def global_clipnorm(self): + """`float` or `None`. + + If set, clips gradients to a maximum norm. + + Check `tf.clip_by_global_norm` for more details. + """ + return self._global_clipnorm + + @clipnorm.setter + def clipnorm(self, val): + if val is not None and self.gradient_transformers: + raise ValueError( + "`clipnorm` cannot be set when `gradient_transformers` " + "is set. Instead, use the `gradient_transformers` to " + "specify clipping and other transformations. Received: " + f"val={val}, " + f"gradient_transformers={self.gradient_transformers}." + ) + self._clipnorm = val + self._clipnorm_fn = optimizer_utils.make_gradient_clipnorm_fn( + self._clipnorm + ) + + @global_clipnorm.setter + def global_clipnorm(self, val): + if val is not None and self.gradient_transformers: + raise ValueError( + "`global_clipnorm` cannot be set when " + "`gradient_transformers` " + "is set. Instead, use the `gradient_transformers` to " + "specify clipping and other transformations. Received: " + f"val={val}, " + f"gradient_transformers={self.gradient_transformers}." + ) + self._global_clipnorm = val + self._global_clipnorm_fn = ( + optimizer_utils.make_global_gradient_clipnorm_fn( + self._global_clipnorm + ) + ) + + @property + def clipvalue(self): + """`float` or `None`. If set, clips gradients to a maximum value.""" + return self._clipvalue + + @clipvalue.setter + def clipvalue(self, val): + if val is not None and self.gradient_transformers: + raise ValueError( + "`clipvalue` cannot be set when `gradient_transformers` " + "is set. Instead, use the `gradient_transformers` to " + "specify clipping and other transformations. Received: " + f"val={val}, " + f"gradient_transformers={self.gradient_transformers}." + ) + self._clipvalue = val + self._clipvalue_fn = optimizer_utils.make_gradient_clipvalue_fn( + self._clipvalue + ) + + def _transform_loss(self, loss): + """Called in `.minimize` to transform loss before computing + gradients.""" + return loss + + def _get_gradients(self, tape, loss, var_list, grad_loss=None): + """Called in `minimize` to compute gradients from loss.""" + grads = tape.gradient(loss, var_list, grad_loss) + return list(zip(grads, var_list)) + + def _transform_unaggregated_gradients(self, grads_and_vars): + """Called in `apply_gradients` before gradient aggregation.""" + return grads_and_vars + + def _aggregate_gradients(self, grads_and_vars): + """Called in `apply_gradients` to aggregate gradients across devices. + + Note that user subclasses may override this, so the interface should not + be changed. + + Args: + grads_and_vars: List of (gradient, variable) pairs. + + Returns: + A list of (aggregrated_gradient, variable) pairs. By default, this + calls `self.gradient_aggregator`. + """ + return self.gradient_aggregator(grads_and_vars) + + def _transform_gradients(self, grads_and_vars): + """Called in `apply_gradients` after aggregation.""" + if self._clipvalue is not None: + grads_and_vars = self._clipvalue_fn(grads_and_vars) + if self._clipnorm is not None: + grads_and_vars = self._clipnorm_fn(grads_and_vars) + if self._global_clipnorm is not None: + grads_and_vars = self._global_clipnorm_fn(grads_and_vars) + + for fn in self.gradient_transformers: + grads_and_vars = fn(grads_and_vars) + return grads_and_vars + + def minimize(self, loss, var_list, grad_loss=None, name=None, tape=None): + """Minimize `loss` by updating `var_list`. + + This method simply computes gradient using `tf.GradientTape` and calls + `apply_gradients()`. If you want to process the gradient before applying + then call `tf.GradientTape` and `apply_gradients()` explicitly instead + of using this function. + + Args: + loss: `Tensor` or callable. If a callable, `loss` should take no + arguments and return the value to minimize. If a `Tensor`, the + `tape` argument must be passed. + var_list: list or tuple of `Variable` objects to update to minimize + `loss`, or a callable returning the list or tuple of `Variable` + objects. Use callable when the variable list would otherwise be + incomplete before `minimize` since the variables are created at the + first time `loss` is called. + grad_loss: (Optional). A `Tensor` holding the gradient computed for + `loss`. + name: (Optional) str. Name for the returned operation. + tape: (Optional) `tf.GradientTape`. If `loss` is provided as a + `Tensor`, the tape that computed the `loss` must be provided. + + Returns: + An `Operation` that updates the variables in `var_list`. The + `iterations` will be automatically increased by 1. + + Raises: + ValueError: If some of the variables are not `Variable` objects. + + """ + grads_and_vars = self._compute_gradients( + loss, var_list=var_list, grad_loss=grad_loss, tape=tape + ) + return self.apply_gradients(grads_and_vars, name=name) + + def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): + """Compute gradients of `loss` for the variables in `var_list`. + + This is the first part of `minimize()`. It returns a list + of (gradient, variable) pairs where "gradient" is the gradient + for "variable". Note that "gradient" can be a `Tensor`, an + `IndexedSlices`, or `None` if there is no gradient for the + given variable. + + Args: + loss: `Tensor` or callable. If a callable, `loss` should take no + arguments and return the value to minimize. If a `Tensor`, the + `tape` argument must be passed. + var_list: list or tuple of `Variable` objects to update to minimize + `loss`, or a callable returning the list or tuple of `Variable` + objects. Use callable when the variable list would otherwise be + incomplete before `minimize` and the variables are created at the + first time when `loss` is called. + grad_loss: Optional. A `Tensor` holding the gradient computed for + `loss`. + tape: (Optional) `tf.GradientTape`. If `loss` is provided as a + `Tensor`, the tape that computed the `loss` must be provided. + + Returns: + A list of (gradient, variable) pairs. Variable is always present, but + gradient can be `None`. + + Raises: + TypeError: If `var_list` contains anything else than `Variable` + objects. + ValueError: If some arguments are invalid, or var_list is None. + """ + # TODO(joshl): Test that we handle weight decay in a reasonable way. + if not callable(loss) and tape is None: + raise ValueError( + "`tape` is required when a `Tensor` loss is passed. " + f"Received: loss={loss}, tape={tape}." + ) + tape = tape if tape is not None else tf.GradientTape() + + if callable(loss): + with tape: + if not callable(var_list): + tape.watch(var_list) + loss = loss() + if callable(var_list): + var_list = var_list() + + with tape: + loss = self._transform_loss(loss) + + var_list = tf.nest.flatten(var_list) + with tf.name_scope(self._name + "/gradients"): + grads_and_vars = self._get_gradients( + tape, loss, var_list, grad_loss + ) + + self._assert_valid_dtypes( + [ + v + for g, v in grads_and_vars + if g is not None and v.dtype != tf.resource + ] + ) + + return grads_and_vars + + def apply_gradients( + self, grads_and_vars, name=None, experimental_aggregate_gradients=True + ): + """Apply gradients to variables. + + This is the second part of `minimize()`. It returns an `Operation` that + applies gradients. + + The method sums gradients from all replicas in the presence of + `tf.distribute.Strategy` by default. You can aggregate gradients + yourself by passing `experimental_aggregate_gradients=False`. + + Example: + + ```python + grads = tape.gradient(loss, vars) + grads = tf.distribute.get_replica_context().all_reduce('sum', grads) + # Processing aggregated gradients. + optimizer.apply_gradients(zip(grads, vars), + experimental_aggregate_gradients=False) + + ``` + + Args: + grads_and_vars: List of (gradient, variable) pairs. + name: Optional name for the returned operation. When `None`, uses the + name passed to the `Optimizer` constructor. Defaults to `None`. + experimental_aggregate_gradients: Whether to sum gradients from + different replicas in the presence of `tf.distribute.Strategy`. If + False, it's user responsibility to aggregate the gradients. Default + to `True`. + + Returns: + An `Operation` that applies the specified gradients. The `iterations` + will be automatically increased by 1. + + Raises: + TypeError: If `grads_and_vars` is malformed. + ValueError: If none of the variables have gradients. + RuntimeError: If called in a cross-replica context. + """ + grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) + var_list = [v for (_, v) in grads_and_vars] + + with tf.name_scope(self._name): + # Create iteration if necessary. + with tf.init_scope(): + self._create_all_weights(var_list) + + if not grads_and_vars: + # Distribution strategy does not support reducing an empty list + # of gradients + return tf.no_op() + + if tf.distribute.in_cross_replica_context(): + raise RuntimeError( + "`apply_gradients() cannot be called in cross-replica " + "context. Use `tf.distribute.Strategy.run` to enter " + "replica context. For more information, please see the " + "docstring of `tf.distribute.get_replica_context`." + ) + + strategy = tf.distribute.get_strategy() + if ( + not experimental_aggregate_gradients + and strategy + and isinstance( + strategy, + ( + tf.compat.v1.distribute.experimental.ParameterServerStrategy, # noqa: E501 + tf.distribute.experimental.ParameterServerStrategy, + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, # noqa: E501 + ), + ) + ): + raise NotImplementedError( + "`experimental_aggregate_gradients=False is not supported " + "for ParameterServerStrategy and CentralStorageStrategy. " + f"Used: strategy={strategy}." + ) + + apply_state = self._prepare(var_list) + if experimental_aggregate_gradients: + grads_and_vars = self._transform_unaggregated_gradients( + grads_and_vars + ) + grads_and_vars = self._aggregate_gradients(grads_and_vars) + grads_and_vars = self._transform_gradients(grads_and_vars) + + return tf.__internal__.distribute.interim.maybe_merge_call( + functools.partial( + self._distributed_apply, apply_state=apply_state + ), + strategy, + grads_and_vars, + name=name, + ) + + def _distributed_apply( + self, distribution, grads_and_vars, apply_state, name + ): + """`apply_gradients` using a `DistributionStrategy`.""" + + def apply_grad_to_update_var(var, grad): + """Apply gradient to variable.""" + if isinstance(var, tf.Tensor): + raise NotImplementedError( + "Updating a `Tensor` is not implemented. " + f"Received: var={var}." + ) + + apply_kwargs = {} + if isinstance(grad, tf.IndexedSlices): + if var.constraint is not None: + raise RuntimeError( + "Cannot use a constraint function on a sparse " + f"variable. Received: grad={grad}, " + f"var.constraint={var.constraint}." + ) + if "apply_state" in self._sparse_apply_args: + apply_kwargs["apply_state"] = apply_state + return self._resource_apply_sparse_duplicate_indices( + grad.values, var, grad.indices, **apply_kwargs + ) + + if "apply_state" in self._dense_apply_args: + apply_kwargs["apply_state"] = apply_state + update_op = self._resource_apply_dense(grad, var, **apply_kwargs) + if var.constraint is not None: + with tf.control_dependencies([update_op]): + return var.assign(var.constraint(var)) + else: + return update_op + + eagerly_outside_functions = ( + tf.compat.v1.executing_eagerly_outside_functions() + ) + update_ops = [] + with name_scope_only_in_function_or_graph(name or self._name): + for grad, var in grads_and_vars: + # Colocate the update with variables to avoid unnecessary + # communication delays. See b/136304694. + with distribution.extended.colocate_vars_with(var): + with name_scope_only_in_function_or_graph( + "update" + if eagerly_outside_functions + else "update_" + var.op.name + ): + update_op = distribution.extended.update( + var, + apply_grad_to_update_var, + args=(grad,), + group=False, + ) + if tf.distribute.in_cross_replica_context(): + # In cross-replica context, extended.update returns + # a list of update ops from all replicas + # (group=False). + update_ops.extend(update_op) + else: + # In replica context, extended.update return the + # single update op of current replica. + update_ops.append(update_op) + + any_symbolic = any( + isinstance(i, tf.Operation) or tf_utils.is_symbolic_tensor(i) + for i in update_ops + ) + if not tf.executing_eagerly() or any_symbolic: + # If the current context is graph mode or any of the update ops + # are symbolic then the step update should be carried out under + # a graph context. (eager updates execute immediately) + with backend._current_graph(update_ops).as_default(): + with tf.control_dependencies([tf.group(update_ops)]): + return self.iterations.assign_add(1, read_value=False) + + return self.iterations.assign_add(1) + + def get_gradients(self, loss, params): + """Returns gradients of `loss` with respect to `params`. + + Should be used only in legacy v1 graph mode. + + Args: + loss: Loss tensor. + params: List of variables. + + Returns: + List of gradient tensors. + + Raises: + ValueError: In case any gradient cannot be computed (e.g. if gradient + function not implemented). + """ + params = tf.nest.flatten(params) + with backend.get_graph().as_default(), backend.name_scope( + self._name + "/gradients" + ): + grads = tf.compat.v1.gradients(loss, params) + for grad, param in zip(grads, params): + if grad is None: + raise ValueError( + "Variable {} has `None` for gradient. " + "Please make sure that all of your ops have a " + "gradient defined (i.e. are differentiable). " + "Common ops without gradient: " + "K.argmax, K.round, K.eval.".format(param) + ) + return grads + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + grads_and_vars = list(zip(grads, params)) + self._assert_valid_dtypes( + [ + v + for g, v in grads_and_vars + if g is not None and v.dtype != tf.resource + ] + ) + return [self.apply_gradients(grads_and_vars)] + + def _set_hyper(self, name, value): + """set hyper `name` to value. value can be callable, tensor, numeric.""" + if isinstance(value, tf.__internal__.tracking.Trackable): + self._track_trackable(value, name, overwrite=True) + if name not in self._hyper: + self._hyper[name] = value + else: + prev_value = self._hyper[name] + if ( + callable(prev_value) + or isinstance( + prev_value, + ( + tf.Tensor, + int, + float, + learning_rate_schedule.LearningRateSchedule, + ), + ) + or isinstance( + value, learning_rate_schedule.LearningRateSchedule + ) + ): + self._hyper[name] = value + else: + backend.set_value(self._hyper[name], value) + + def _get_hyper(self, name, dtype=None): + if not self._hypers_created: + self._create_hypers() + value = self._hyper[name] + if isinstance(value, learning_rate_schedule.LearningRateSchedule): + return value + if callable(value): + value = value() + if dtype: + return tf.cast(value, dtype) + else: + return value + + def _create_slots(self, var_list): + pass + + def _create_slots_for_sharded_variables(self, var_list): + """Add ShardedVariables to slots to later reconstruct for checkpointing. + + ShardedVariables don't have slot variables created for them; their + shards do. This function allows users to call get_slot with a + ShardedVariable input and receive a ShardedVariable output containing + the appropriate slot vars. + + Iterate over the variables to find shards, and aggregate the sharded + containers in a set. Add these ShardedVariables to _slots so that + get_slot can retrieve the proper slot variables for their component + shards, and reconstruct those into a ShardedVariable. + + Args: + var_list: list or tuple of `Variable` objects that will be minimized + using this optimizer. + """ + sharded_vars = set() + for var in var_list: + if getattr(var, "_sharded_container", False): + sharded_vars.add(var._sharded_container()) + + for sharded_var in sharded_vars: + sharded_key = _var_key(sharded_var) + slot_dict = {} + for slot in self.get_slot_names(): + slot_dict[slot] = sharded_var + self._slots[sharded_key] = slot_dict + + def _create_all_weights(self, var_list): + """Creates all weights, including iterations, hyperparameters and slot + vars. + + This will add newly created variables to `optimizer.weights`. + + New variables are only created when this method is called the first + time, or when called with different variables in the var_list. + + Args: + var_list: list or tuple of `Variable` objects that will be minimized + using this optimizer. + """ + + _ = self.iterations + self._create_hypers() + self._create_slots(var_list) + self._create_slots_for_sharded_variables(var_list) + + def __getattribute__(self, name): + """Overridden to support hyperparameter access.""" + try: + return super().__getattribute__(name) + except AttributeError as e: + # Needed to avoid infinite recursion with __setattr__. + if name == "_hyper": + raise e + # Backwards compatibility with Keras optimizers. + if name == "lr": + name = "learning_rate" + if name in self._hyper: + return self._get_hyper(name) + raise e + + def __dir__(self): + result = set(super().__dir__()) + if "_hyper" in result: + result |= self._hyper.keys() + if "learning_rate" in self._hyper.keys(): + result.add("lr") + return list(result) + + def __setattr__(self, name, value): + """Override setattr to support dynamic hyperparameter setting.""" + # Backwards compatibility with Keras optimizers. + if name == "lr": + name = "learning_rate" + if hasattr(self, "_hyper") and name in self._hyper: + self._set_hyper(name, value) + else: + super().__setattr__(name, value) + + def get_slot_names(self): + """A list of names for this optimizer's slots.""" + return self._slot_names + + def add_slot(self, var, slot_name, initializer="zeros", shape=None): + """Add a new slot variable for `var`. + + A slot variable is an additional variable associated with `var` to + train. It is allocated and managed by optimizers, e.g. `Adam`. + + Args: + var: a `Variable` object. + slot_name: name of the slot variable. + initializer: initializer of the slot variable + shape: (Optional) shape of the slot variable. If not set, it will + default to the shape of `var`. + + Returns: + A slot variable. + """ + if slot_name not in self._slot_names: + self._slot_names.append(slot_name) + var_key = _var_key(var) + slot_dict = self._slots.setdefault(var_key, {}) + weight = slot_dict.get(slot_name, None) + if weight is None: + if isinstance(initializer, str) or callable(initializer): + initializer = initializers.get(initializer) + if isinstance( + initializer, + tf.__internal__.tracking.CheckpointInitialValueCallable, + ) or (shape is not None): + slot_shape = shape + else: + slot_shape = var.shape + initial_value = functools.partial( + initializer, shape=slot_shape, dtype=var.dtype + ) + else: + initial_value = initializer + + with self._distribution_strategy_scope(): + strategy = tf.distribute.get_strategy() + if not strategy.extended.variable_created_in_scope(var): + raise ValueError( + "Trying to create optimizer slot variable under the " + "scope for tf.distribute.Strategy ({}), which is " + "different from the scope used for the original " + "variable ({}). Make sure the slot variables are " + "created under the same strategy scope. This may " + "happen if you're restoring from a checkpoint " + "outside the scope.".format(strategy, var) + ) + + with strategy.extended.colocate_vars_with(var): + weight = tf.Variable( + name=f"{var._shared_name}/{slot_name}", + dtype=var.dtype, + trainable=False, + initial_value=initial_value, + ) + backend.track_variable(weight) + slot_dict[slot_name] = weight + self._restore_slot_variable( + slot_name=slot_name, variable=var, slot_variable=weight + ) + self._weights.append(weight) + return weight + + def get_slot(self, var, slot_name): + var_key = _var_key(var) + slot_dict = self._slots[var_key] + slot_variable = slot_dict[slot_name] + if isinstance( + slot_variable, tf.__internal__.distribute.ShardedVariable + ): + # Construct a ShardedVariable that points to the input + # ShardedVariable's component shard's slot variables. + shard_vars = [] + for shard in slot_variable.variables: + slot_shard = self.get_slot(shard, slot_name) + shard_vars.append(slot_shard) + slot_variable = tf.__internal__.distribute.ShardedVariable( + shard_vars, name=slot_variable.name + ) + return slot_variable + + def _prepare(self, var_list): + keys = set() + for var in var_list: + if isinstance(var, tf.distribute.DistributedValues): + var_devices = var._devices + else: + var_devices = [var.device] + var_dtype = var.dtype.base_dtype + for var_device in var_devices: + keys.add((var_device, var_dtype)) + + apply_state = {} + for var_device, var_dtype in keys: + apply_state[(var_device, var_dtype)] = {} + with tf.device(var_device): + self._prepare_local(var_device, var_dtype, apply_state) + + return apply_state + + def _prepare_local(self, var_device, var_dtype, apply_state): + if "learning_rate" in self._hyper: + lr_t = tf.identity(self._decayed_lr(var_dtype)) + apply_state[(var_device, var_dtype)]["lr_t"] = lr_t + + def _fallback_apply_state(self, var_device, var_dtype): + """Compatibility for subclasses that don't pass apply_state through.""" + apply_state = {(var_device, var_dtype): {}} + self._prepare_local(var_device, var_dtype, apply_state) + return apply_state[(var_device, var_dtype)] + + def _create_hypers(self): + if self._hypers_created: + return + with self._distribution_strategy_scope(): + # Iterate hyper values deterministically. + for name, value in sorted(self._hyper.items()): + if isinstance(value, (tf.Tensor, tf.Variable)) or callable( + value + ): + # The check for `callable` covers the usage when `value` is + # a `LearningRateSchedule`, in which case it does not need + # to create a variable. + continue + else: + self._hyper[name] = self.add_weight( + name, + shape=[], + trainable=False, + initializer=value, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + ) + self._hypers_created = True + + @property + def iterations(self): + """Variable. The number of training steps this Optimizer has run.""" + if self._iterations is None: + with self._distribution_strategy_scope(): + self._iterations = self.add_weight( + "iter", + shape=[], + dtype=tf.int64, + trainable=False, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + ) + self._weights.append(self._iterations) + return self._iterations + + @iterations.setter + def iterations(self, variable): + if self._iterations is not None: + raise RuntimeError( + "Cannot set `iterations` to a new Variable after " + "the Optimizer weights have been created. Here it is " + f"attempting to set `iterations` to {variable}." + ) + self._iterations = variable + self._weights.append(self._iterations) + + def _decayed_lr(self, var_dtype): + """Get decayed learning rate as a Tensor with dtype=var_dtype.""" + lr_t = self._get_hyper("learning_rate", var_dtype) + if isinstance(lr_t, learning_rate_schedule.LearningRateSchedule): + local_step = tf.cast(self.iterations, var_dtype) + lr_t = tf.cast(lr_t(local_step), var_dtype) + if self._initial_decay > 0.0: + local_step = tf.cast(self.iterations, var_dtype) + decay_t = tf.cast(self._initial_decay, var_dtype) + lr_t = lr_t / (1.0 + decay_t * local_step) + return lr_t + + @abc.abstractmethod + def get_config(self): + """Returns the config of the optimizer. + + An optimizer config is a Python dictionary (serializable) + containing the configuration of an optimizer. + The same optimizer can be reinstantiated later + (without any saved state) from this configuration. + + Returns: + Python dictionary. + """ + config = {"name": self._name} + if self.clipnorm is not None: + config["clipnorm"] = self.clipnorm + if self.clipvalue is not None: + config["clipvalue"] = self.clipvalue + if self.global_clipnorm is not None: + config["global_clipnorm"] = self.global_clipnorm + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + """Creates an optimizer from its config. + + This method is the reverse of `get_config`, + capable of instantiating the same optimizer from the config + dictionary. + + Args: + config: A Python dictionary, typically the output of get_config. + custom_objects: A Python dictionary mapping names to additional + Python objects used to create this optimizer, such as a function + used for a hyperparameter. + + Returns: + An optimizer instance. + """ + if "lr" in config: + config["learning_rate"] = config.pop("lr") + if "learning_rate" in config: + if isinstance(config["learning_rate"], dict): + config["learning_rate"] = learning_rate_schedule.deserialize( + config["learning_rate"], custom_objects=custom_objects + ) + return cls(**config) + + def _serialize_hyperparameter(self, hyperparameter_name): + """Serialize a hyperparameter that can be a float, callable, or + Tensor.""" + value = self._hyper[hyperparameter_name] + if isinstance(value, learning_rate_schedule.LearningRateSchedule): + return learning_rate_schedule.serialize(value) + if callable(value): + return value() + if tf.is_tensor(value): + return backend.get_value(value) + return value + + def variables(self): + """Returns variables of this Optimizer based on the order created.""" + return self._weights + + @property + def weights(self): + """Returns variables of this Optimizer based on the order created.""" + return self._weights + + def get_weights(self): + """Returns the current weights of the optimizer. + + The weights of an optimizer are its state (ie, variables). + This function returns the weight values associated with this + optimizer as a list of Numpy arrays. The first value is always the + iterations count of the optimizer, followed by the optimizer's state + variables in the order they were created. The returned list can in turn + be used to load state into similarly parameterized optimizers. + + For example, the RMSprop optimizer for this simple model returns a list + of three values-- the iteration count, followed by the root-mean-square + value of the kernel and bias of the single Dense layer: + + >>> opt = tf.keras.optimizers.legacy.RMSprop() + >>> m = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + >>> m.compile(opt, loss='mse') + >>> data = np.arange(100).reshape(5, 20) + >>> labels = np.zeros(5) + >>> results = m.fit(data, labels) # Training. + >>> len(opt.get_weights()) + 3 + + Returns: + Weights values as a list of numpy arrays. + """ + params = self.weights + return backend.batch_get_value(params) + + # TODO(tanzheny): Maybe share this logic with base_layer. + def set_weights(self, weights): + """Set the weights of the optimizer. + + The weights of an optimizer are its state (ie, variables). + This function takes the weight values associated with this + optimizer as a list of Numpy arrays. The first value is always the + iterations count of the optimizer, followed by the optimizer's state + variables in the order they are created. The passed values are used to + set the new state of the optimizer. + + For example, the RMSprop optimizer for this simple model takes a list of + three values-- the iteration count, followed by the root-mean-square + value of the kernel and bias of the single Dense layer: + + >>> opt = tf.keras.optimizers.legacy.RMSprop() + >>> m = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) + >>> m.compile(opt, loss='mse') + >>> data = np.arange(100).reshape(5, 20) + >>> labels = np.zeros(5) + >>> results = m.fit(data, labels) # Training. + >>> new_weights = [np.array(10), np.ones([20, 10]), np.zeros([10])] + >>> opt.set_weights(new_weights) + >>> opt.iterations + + + Args: + weights: weight values as a list of numpy arrays. + """ + params = self.weights + if len(params) != len(weights): + raise ValueError( + f"You called `set_weights(weights)` on optimizer {self._name} " + f"with a weight list of length {str(len(weights))}, " + f"but the optimizer was expecting {str(len(params))} " + f"weights. Provided weights: {str(weights)[:50]}..." + ) + if not params: + return + weight_value_tuples = [] + param_values = backend.batch_get_value(params) + for pv, p, w in zip(param_values, params, weights): + if pv.shape != w.shape: + raise ValueError( + f"Optimizer weight shape {str(pv.shape)} " + "not compatible with " + f"provided weight shape {str(w.shape)}." + ) + weight_value_tuples.append((p, w)) + backend.batch_set_value(weight_value_tuples) + + def add_weight( + self, + name, + shape, + dtype=None, + initializer="zeros", + trainable=None, + synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.VariableAggregation.NONE, + ): + + if dtype is None: + dtype = tf.float32 + if isinstance(initializer, str) or callable(initializer): + initializer = initializers.get(initializer) + + if synchronization == tf.VariableSynchronization.ON_READ: + if trainable: + raise ValueError( + "Synchronization value can be set to " + "VariableSynchronization.ON_READ only for non-trainable " + "variables. You have specified trainable=True and " + "synchronization=VariableSynchronization.ON_READ." + ) + else: + # Set trainable to be false when variable is to be synced on + # read. + trainable = False + elif trainable is None: + trainable = True + + variable = self._add_variable_with_custom_getter( + name=name, + shape=shape, + getter=base_layer_utils.make_variable, + overwrite=True, + initializer=initializer, + dtype=dtype, + trainable=trainable, + use_resource=True, + synchronization=synchronization, + aggregation=aggregation, + ) + backend.track_variable(variable) + + return variable + + def _init_set_name(self, name, zero_based=True): + if not name: + self._name = backend.unique_object_name( + generic_utils.to_snake_case(self.__class__.__name__), + zero_based=zero_based, + ) + else: + self._name = name + + def _assert_valid_dtypes(self, tensors): + """Asserts tensors are all valid types (see `_valid_dtypes`). + + Args: + tensors: Tensors to check. + + Raises: + ValueError: If any tensor is not a valid type. + """ + valid_dtypes = self._valid_dtypes() + for t in tensors: + dtype = t.dtype.base_dtype + if dtype not in valid_dtypes: + raise ValueError( + "Invalid type {} for {}, expected: {}.".format( + dtype, t.name, [v for v in valid_dtypes] + ) + ) + + def _valid_dtypes(self): + """Valid types for loss, variables and gradients. + + Subclasses should override to allow other float types. + + Returns: + Valid types for loss, variables and gradients. + """ + return _DEFAULT_VALID_DTYPES + + def _call_if_callable(self, param): + """Call the function if param is callable.""" + return param() if callable(param) else param + + def _resource_apply_dense(self, grad, handle, apply_state): + """Add ops to apply dense gradients to the variable `handle`. + + Args: + grad: a `Tensor` representing the gradient. + handle: a `Tensor` of dtype `resource` which points to the variable to + be updated. + apply_state: A dict which is used across multiple apply calls. + + Returns: + An `Operation` which updates the value of the variable. + """ + raise NotImplementedError( + "`_resource_apply_dense` must be implemented in subclasses." + ) + + def _resource_apply_sparse_duplicate_indices( + self, grad, handle, indices, **kwargs + ): + """Add ops to apply sparse gradients to `handle`, with repeated indices. + + Optimizers which override this method must deal with repeated indices. + See the docstring of `_apply_sparse_duplicate_indices` for details. By + default the correct behavior, to sum non-unique indices and their + associated gradients, is enforced by first pre-processing `grad` and + `indices` and passing them on to `_resource_apply_sparse`. Optimizers + which deal correctly with duplicate indices may instead override this + method to avoid the overhead of summing. + + Args: + grad: a `Tensor` representing the gradient for the affected indices. + handle: a `Tensor` of dtype `resource` which points to the variable to + be updated. + indices: a `Tensor` of integral type representing the indices for + which the gradient is nonzero. Indices may be repeated. + **kwargs: May optionally contain `apply_state` + + Returns: + An `Operation` which updates the value of the variable. + """ + summed_grad, unique_indices = _deduplicate_indexed_slices( + values=grad, indices=indices + ) + return self._resource_apply_sparse( + summed_grad, handle, unique_indices, **kwargs + ) + + def _resource_apply_sparse(self, grad, handle, indices, apply_state): + """Add ops to apply sparse gradients to the variable `handle`. + + Similar to `_apply_sparse`, the `indices` argument to this method has + been de-duplicated. Optimizers which deal correctly with non-unique + indices may instead override `_resource_apply_sparse_duplicate_indices` + to avoid this overhead. + + Args: + grad: a `Tensor` representing the gradient for the affected indices. + handle: a `Tensor` of dtype `resource` which points to the variable to + be updated. + indices: a `Tensor` of integral type representing the indices for + which the gradient is nonzero. Indices are unique. + apply_state: A dict which is used across multiple apply calls. + + Returns: + An `Operation` which updates the value of the variable. + """ + raise NotImplementedError( + "`_resource_apply_sparse` Must be implemented in subclasses." + ) + + def _resource_scatter_add(self, x, i, v): + with tf.control_dependencies( + [ + tf.raw_ops.ResourceScatterAdd( + resource=x.handle, indices=i, updates=v + ) + ] + ): + return x.value() + + def _resource_scatter_update(self, x, i, v): + with tf.control_dependencies( + [ + tf.raw_ops.ResourceScatterUpdate( + resource=x.handle, indices=i, updates=v + ) + ] + ): + return x.value() + + @property + @layer_utils.cached_per_instance + def _dense_apply_args(self): + return tf_inspect.getfullargspec(self._resource_apply_dense).args + + @property + @layer_utils.cached_per_instance + def _sparse_apply_args(self): + return tf_inspect.getfullargspec(self._resource_apply_sparse).args + + # --------------- + # For implementing the trackable interface + # --------------- + + def _restore_slot_variable(self, slot_name, variable, slot_variable): + """Restore a newly created slot variable's value.""" + variable_key = _var_key(variable) + deferred_restorations = self._deferred_slot_restorations.get( + slot_name, {} + ).pop(variable_key, []) + # Iterate over restores, highest restore UID first to minimize the + # number of assignments. + deferred_restorations.sort( + key=lambda position: position.restore_uid, reverse=True + ) + for checkpoint_position in deferred_restorations: + checkpoint_position.restore(slot_variable) + + def _create_or_restore_slot_variable( + self, slot_variable_position, slot_name, variable + ): + """Returns the slot variable that should have a value restored into it. + + It is up to the caller to restore the value into the slot variable if a + valid slot variable is returned. + + Called when a variable which has an associated slot variable is created + or restored. When executing eagerly, we create the slot variable with a + restoring initializer. + + No new variables are created when graph building. Instead, + _restore_slot_variable catches these after normal creation and adds + restore ops to the graph. This method is nonetheless important when + graph building for the case when a slot variable has already been + created but `variable` has just been added to a dependency graph + (causing us to realize that the slot variable needs to be restored). + + Args: + slot_variable_position: A `trackable._CheckpointPosition` object + indicating the slot variable `Trackable` object to be restored. + slot_name: The name of this `Optimizer`'s slot to restore into. + variable: The variable object this slot is being created for. + + Returns: + A slot variable that should have a value restored into it, or None if + a slot variable should not be restored at this time. + """ + variable_key = _var_key(variable) + slot_dict = self._slots.get(variable_key, {}) + slot_variable = slot_dict.get(slot_name, None) + if ( + slot_variable is None + and tf.executing_eagerly() + and slot_variable_position.is_simple_variable() + # Defer slot variable creation if there is an active variable + # creator scope. Generally we'd like to eagerly create/restore slot + # variables when possible, but this may mean that scopes intended to + # catch `variable` also catch its eagerly created slot variable + # unintentionally (specifically make_template would add a dependency + # on a slot variable if not for this case). Deferring is mostly + # harmless (aside from double initialization), and makes variable + # creator scopes behave the same way they do when graph building. + # + # One notable case is with distribution strategy, which uses + # variable creator scope but always desires the `variable` and the + # slot to use the same scope, thus we can safely eagerly + # create/restore slot variables. + and ( + not tf.compat.v1.get_default_graph()._variable_creator_stack + or self._distribution_strategy + ) + ): + initializer = ( + tf.__internal__.tracking.CheckpointInitialValueCallable( + checkpoint_position=slot_variable_position + ) + ) + slot_variable = self.add_slot( + var=variable, + initializer=initializer, + slot_name=slot_name, + shape=slot_variable_position.value_shape(), + ) + # Slot variables are not owned by any one object (because we don't + # want to save the slot variable if the optimizer is saved without + # the non-slot variable, or if the non-slot variable is saved + # without the optimizer; it's a dependency hypergraph with edges of + # the form (optimizer, non-slot variable, variable)). So we don't + # _track_ slot variables anywhere, and instead special-case this + # dependency and otherwise pretend it's a normal graph. + if slot_variable is not None: + # For sharded variables, we need the logic in get_slot to combine + # slot variables for its shards + if (slot_variable is variable) and ( + isinstance(variable, tf.__internal__.distribute.ShardedVariable) + ): + return self.get_slot(variable, slot_name) + # If we've either made this slot variable, or if we've pulled out an + # existing slot variable, we should restore it. + return slot_variable + else: + # We didn't make the slot variable. Defer restoring until it gets + # created normally. We keep a list rather than the one with the + # highest restore UID in case slot variables have their own + # dependencies, in which case those could differ between restores. + self._deferred_slot_restorations.setdefault( + slot_name, {} + ).setdefault(variable_key, []).append(slot_variable_position) + return None + + @contextlib.contextmanager + def _distribution_strategy_scope(self): + """Returns the `tf.distribute.Strategy` this optimizer was created + under.""" + if self._distribution_strategy and not tf.distribute.has_strategy(): + with self._distribution_strategy.scope(): + yield self._distribution_strategy.scope() + else: + yield + + +def _var_key(var): + """Key for representing a primary variable, for looking up slots. + + In graph mode the name is derived from the var shared name. + In eager mode the name is derived from the var unique id. + If distribution strategy exists, get the primary variable first. + + Args: + var: the variable. + + Returns: + the unique name of the variable. + """ + + # Get the distributed variable if it exists. + if hasattr(var, "_distributed_container"): + var = var._distributed_container() + elif ( + tf_utils.is_extension_type(var) + and hasattr(var, "handle") + and hasattr(var.handle, "_distributed_container") + ): + # For ResourceVariables, the _distributed_container attribute + # is added to their handle tensors. + var = var.handle._distributed_container() + if getattr(var, "_in_graph_mode", False): + return var._shared_name + return var._unique_id + + +def _get_slot_key_from_var(var, slot_name): + """Get the slot key for the variable: var_name/slot_name.""" + + name = _var_key(var) + return name + "/" + slot_name + + +class RestoredOptimizer(OptimizerV2): + """A non-functional Optimizer implementation for checkpoint compatibility. + + Holds slot variables and hyperparameters when an optimizer is restored from + a SavedModel. These variables may be referenced in functions along with ops + created by the original optimizer, but currently we do not support using the + optimizer object itself (e.g. through `apply_gradients`). + """ + + # TODO(allenl): Make the restored optimizer functional by tracing its apply + # methods. + + def __init__(self): + super().__init__("RestoredOptimizer") + self._hypers_created = True + + def get_config(self): + # TODO(allenl): Save and restore the Optimizer's config + raise NotImplementedError( + "Restoring functional Optimizers from SavedModels is not currently " + "supported. Please file a feature request if this limitation " + "bothers you." + ) + + +tf.__internal__.saved_model.load.register_revived_type( + "optimizer", + lambda obj: isinstance(obj, OptimizerV2), + versions=[ + tf.__internal__.saved_model.load.VersionedTypeRegistration( + object_factory=lambda proto: RestoredOptimizer(), + version=2, + min_producer_version=1, + min_consumer_version=1, + setter=RestoredOptimizer._set_hyper, + ) + ], +) diff --git a/keras/optimizers/legacy/optimizer_v2_test.py b/keras/optimizers/legacy/optimizer_v2_test.py new file mode 100644 index 000000000000..47ffec24453f --- /dev/null +++ b/keras/optimizers/legacy/optimizer_v2_test.py @@ -0,0 +1,1474 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional test for OptimizerV2.""" + +import collections +from copy import deepcopy + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras import backend +from keras import callbacks +from keras import losses +from keras.engine import input_layer +from keras.engine import sequential +from keras.engine import training +from keras.layers import core +from keras.layers import regularization +from keras.optimizers import optimizer_v1 +from keras.optimizers.legacy import adadelta +from keras.optimizers.legacy import adagrad +from keras.optimizers.legacy import adam +from keras.optimizers.legacy import adamax +from keras.optimizers.legacy import ftrl +from keras.optimizers.legacy import gradient_descent +from keras.optimizers.legacy import nadam +from keras.optimizers.legacy import optimizer_v2 +from keras.optimizers.legacy import rmsprop +from keras.optimizers.schedules import learning_rate_schedule +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import np_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + +_DATA_TYPES = [tf.half, tf.float32, tf.float64] +# TODO(b/141710709): complex support in NVCC and ROCM. +if not tf_test_utils.IsBuiltWithNvcc() and not tf.test.is_built_with_rocm(): + _DATA_TYPES += [tf.complex64, tf.complex128] + + +class OptimizerTest(tf.test.TestCase, parameterized.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testBasic(self): + for dtype in _DATA_TYPES: + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + loss = lambda: 5 * var0 + 3 * var1 + sgd = gradient_descent.SGD(3.0) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Run 1 step of sgd through optimizer + opt_op = sgd.minimize(loss, var_list=[var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + # Validate updated params + self.assertAllClose([-14.0, -13.0], self.evaluate(var0)) + self.assertAllClose([-6.0, -5.0], self.evaluate(var1)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testAdaptiveLearningRate(self): + for dtype in _DATA_TYPES: + with self.test_session(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + + def loss(): + return 5 * var0 + 3 * var1 + + sgd = gradient_descent.SGD(1.0) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Run 1 step of sgd through optimizer + opt_op = sgd.minimize(loss, [var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + # Validate updated params + # var0 = [1., 2.] - 1.0 * [5, 5] + self.assertAllClose([-4.0, -3.0], self.evaluate(var0)) + # var1 = [3., 4.] - 1.0 * [3, 3] + self.assertAllClose([0.0, 1.0], self.evaluate(var1)) + + sgd.learning_rate = 0.5 + if tf.executing_eagerly(): + sgd.minimize(loss, [var0, var1]) + else: + self.evaluate(opt_op) + # Validate updated params + # var0 = [-4., -3.] - 0.5 * [5, 5] + self.assertAllClose([-6.5, -5.5], self.evaluate(var0)) + # var1 = [0., 1.] - 0.5 * [3, 3] + self.assertAllClose([-1.5, -0.5], self.evaluate(var1)) + + sgd.learning_rate = learning_rate_schedule.InverseTimeDecay( + 0.5, decay_steps=1.0, decay_rate=0.5 + ) + if tf.executing_eagerly(): + sgd.minimize(loss, [var0, var1]) + else: + self.evaluate(opt_op) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testPrecomputedGradient(self): + for dtype in _DATA_TYPES: + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + loss = lambda: 5 * var0 + 3 * var1 + grad_loss = tf.constant([42, -42], dtype=dtype) + sgd = gradient_descent.SGD(3.0) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Run 1 step of sgd through optimizer + opt_op = sgd.minimize( + loss, var_list=[var0, var1], grad_loss=grad_loss + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + # Validate updated params + self.assertAllClose( + [1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], + self.evaluate(var0), + ) + self.assertAllClose( + [3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], + self.evaluate(var1), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoGradients(self): + for dtype in _DATA_TYPES: + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + loss = lambda: 5 * var0 + sgd_op = gradient_descent.SGD(3.0) + with self.assertRaisesRegex(ValueError, "No gradients"): + # var1 has no gradient + sgd_op.minimize(loss, var_list=[var1]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoGradientsForAnyVariables_Minimize(self): + for dtype in _DATA_TYPES: + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + loss = lambda: tf.constant(5.0) + + sgd_op = gradient_descent.SGD(3.0) + with self.assertRaisesRegex( + ValueError, "No gradients provided for any variable" + ): + sgd_op.minimize(loss, var_list=[var0, var1]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoGradientsForAnyVariables_ApplyGradients(self): + for dtype in _DATA_TYPES: + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + sgd_op = gradient_descent.SGD(3.0) + with self.assertRaisesRegex( + ValueError, "No gradients provided for any variable" + ): + sgd_op.apply_gradients([(None, var0), (None, var1)]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testGradientsAsVariables(self): + for i, dtype in enumerate(_DATA_TYPES): + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + loss = lambda: 5 * var0 + 3 * var1 + + sgd = gradient_descent.SGD(3.0) + grads_and_vars = sgd._compute_gradients(loss, [var0, var1]) + # Convert gradients to tf.Variables + converted_grads = [ + tf.Variable(tf.zeros([2], dtype), name="c_%d_%d" % (i, j)) + for j, gv in enumerate(grads_and_vars) + ] + convert_ops = [ + tf.compat.v1.assign(converted_grads[j], gv[0]) + for j, gv in enumerate(grads_and_vars) + ] + + # Run convert_ops to achieve the gradients converting + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(convert_ops) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 1 step of sgd through optimizer + converted_grads_and_vars = list( + zip(converted_grads, [var0, var1]) + ) + opt_op = sgd.apply_gradients(converted_grads_and_vars) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(convert_ops) + self.evaluate(opt_op) + + # Validate updated params + self.assertAllClose([-14.0, -13.0], self.evaluate(var0)) + self.assertAllClose([-6.0, -5.0], self.evaluate(var1)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testComputeGradientsWithTensors(self): + with test_utils.use_gpu(): + x = tf.convert_to_tensor(1.0) + + def f(): + return x * x + + sgd = gradient_descent.SGD(3.0) + grads_and_vars = sgd._compute_gradients(f, [x]) + self.assertLen(grads_and_vars, 1) + grad, x_as_var = grads_and_vars[0] + self.assertIs(x, x_as_var) + self.assertEqual(2.0, self.evaluate(grad)) + + with self.assertRaises(NotImplementedError): + sgd.apply_gradients(grads_and_vars) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testConstraint(self): + constraint_01 = lambda x: tf.clip_by_value(x, -0.1, 0.0) + constraint_0 = lambda x: tf.clip_by_value(x, 0.0, 1.0) + with test_utils.use_gpu(): + var0 = tf.Variable([1.0, 2.0], constraint=constraint_01) + var1 = tf.Variable([3.0, 4.0], constraint=constraint_0) + loss = lambda: 5 * var0 + 3 * var1 + sgd = gradient_descent.SGD(3.0) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Run 1 step of sgd through optimizer + opt_op = sgd.minimize(loss, var_list=[var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + # Validate updated params + self.assertAllClose([-0.1, -0.1], self.evaluate(var0)) + self.assertAllClose([0.0, 0.0], self.evaluate(var1)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testIterationWithoutMinimize(self): + with test_utils.use_gpu(): + sgd = gradient_descent.SGD(3.0) + self.evaluate(sgd.iterations.initializer) + self.assertEqual(0, self.evaluate(sgd.iterations)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testConfig(self): + with test_utils.use_gpu(): + opt = gradient_descent.SGD(learning_rate=1.0) + config = opt.get_config() + opt2 = gradient_descent.SGD.from_config(config) + lr = opt._get_hyper("learning_rate") + lr2 = opt2._get_hyper("learning_rate") + self.evaluate(tf.compat.v1.global_variables_initializer()) + # assert both are equal float values. + self.assertEqual(self.evaluate(lr), self.evaluate(lr2)) + var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) + loss = lambda: 3 * var0 + # learning rate variable created when calling minimize. + opt.minimize(loss, [var0]) + opt3 = gradient_descent.SGD.from_config(config) + lr3 = opt3._get_hyper("learning_rate") + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual(self.evaluate(lr), self.evaluate(lr3)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testConfigWithLearningRateDecay(self): + with test_utils.use_gpu(): + var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) + for decay_schedule in [ + learning_rate_schedule.InverseTimeDecay( + 0.5, decay_steps=1.0, decay_rate=0.1 + ), + learning_rate_schedule.PiecewiseConstantDecay([5], [1.0, 0.5]), + ]: + step = 10 + opt = gradient_descent.SGD(decay_schedule) + config = opt.get_config() + opt2 = gradient_descent.SGD.from_config(config) + # assert both are equal float values. + self.assertAllEqual( + decay_schedule(step), opt._get_hyper("learning_rate")(step) + ) + self.assertAllEqual( + decay_schedule(step), opt2._get_hyper("learning_rate")(step) + ) + loss = lambda: 3 * var0 + # learning rate variable is created when calling minimize. + opt.minimize(loss, [var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + config = opt.get_config() + opt3 = gradient_descent.SGD.from_config(config) + self.assertAllEqual( + self.evaluate(opt._get_hyper("learning_rate")(step)), + opt3._get_hyper("learning_rate")(step), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testGradClipValue(self): + with test_utils.use_gpu(): + var = tf.Variable([1.0, 2.0]) + loss = lambda: 3 * var + opt = gradient_descent.SGD(learning_rate=1.0, clipvalue=1.0) + opt_op = opt.minimize(loss, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + self.assertAllClose([0.0, 1.0], self.evaluate(var)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testGradClipNorm(self): + with test_utils.use_gpu(): + var = tf.Variable([1.0]) + loss = lambda: 3 * var + opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0) + opt_op = opt.minimize(loss, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + self.assertAllClose([0.0], self.evaluate(var)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testGradGlobalClipNorm(self): + with test_utils.use_gpu(): + # l2 norm is 5.0 + var1 = tf.Variable([1.0]) + var2 = tf.Variable([2.0]) + loss = lambda: 3 * var1 + 4 * var2 + opt = gradient_descent.SGD(learning_rate=1.0, global_clipnorm=2.0) + opt_op = opt.minimize(loss, [var1, var2]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + # grad1 = 3.0 * 2.0 / 5.0 = 1.2 + self.assertAllClose([-0.2], self.evaluate(var1)) + # grad2 = 4.0 * 2.0 / 5.0 = 1.6 + self.assertAllClose([0.4], self.evaluate(var2)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInvalidClipNorm(self): + with self.assertRaisesRegex(ValueError, ">= 0"): + gradient_descent.SGD(learning_rate=1.0, clipnorm=-1.0) + + @test_combinations.generate( + test_combinations.combine( + mode=["graph", "eager"], + clip_type=["clipnorm", "global_clipnorm", "clipvalue"], + ) + ) + def testConfigWithCliping(self, clip_type): + opt = gradient_descent.SGD(learning_rate=1.0, **{clip_type: 2.0}) + config = opt.get_config() + opt = gradient_descent.SGD.from_config(config) + self.assertEqual(getattr(opt, clip_type), 2.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testInvalidKwargs(self): + with self.assertRaisesRegex(TypeError, "Unexpected keyword argument"): + gradient_descent.SGD(learning_rate=1.0, invalidkwargs=1.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testWeights(self): + with test_utils.use_gpu(): + opt1 = adam.Adam(learning_rate=1.0) + var1 = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss1 = lambda: 3 * var1 + opt_op_1 = opt1.minimize(loss1, [var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + config = opt1.get_config() + opt2 = adam.Adam.from_config(config) + var2 = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss2 = lambda: 3 * var2 + opt_op_2 = opt2.minimize(loss2, [var2]) + weights = opt1.get_weights() + + # Assert set_weights and both variables get updated to same value. + self.evaluate(tf.compat.v1.global_variables_initializer()) + opt2.set_weights(weights) + self.evaluate([opt_op_1, opt_op_2]) + self.assertAllClose(self.evaluate(var1), self.evaluate(var2)) + self.assertEqual(1, self.evaluate(opt1.iterations)) + self.assertEqual(1, self.evaluate(opt2.iterations)) + + var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) + var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) + loss3 = lambda: 3 * var3 + 5 * var4 + opt_op_3 = opt1.minimize(loss3, [var3, var4]) + + # Assert set_weights with ValueError since weight list does not + # match. + self.evaluate(tf.compat.v1.global_variables_initializer()) + weights = opt1.get_weights() + with self.assertRaisesRegex(ValueError, "but the optimizer was"): + opt2.set_weights(weights) + + # Assert set_weights and variables get updated to same value. + var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) + var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) + loss4 = lambda: 3 * var5 + 5 * var6 + opt_op_4 = opt2.minimize(loss4, [var5, var6]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + opt2.set_weights(weights) + self.evaluate([opt_op_3, opt_op_4]) + self.assertAllClose( + self.evaluate([var3, var4]), self.evaluate([var5, var6]) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testGettingHyperParameters(self): + with self.test_session(): + opt = adam.Adam(learning_rate=1.0) + var = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss = lambda: 3 * var + opt_op = opt.minimize(loss, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + + lr = self.evaluate(opt.lr) + self.assertEqual(1.0, lr) + + opt.lr = 2.0 + lr = self.evaluate(opt.lr) + self.assertEqual(2.0, lr) + + self.evaluate(opt.lr.assign(3.0)) + lr = self.evaluate(opt.lr) + self.assertEqual(3.0, lr) + + with self.assertRaises(AttributeError): + opt.not_an_attr += 3 + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testGettingHyperParametersWithLrInConstructor(self): + with self.test_session(): + opt = gradient_descent.SGD(lr=3.0) + var = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss = lambda: 3 * var + opt_op = opt.minimize(loss, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt.learning_rate, tf.Variable) + + lr = self.evaluate(opt.lr) + self.assertEqual(3.0, lr) + + opt.lr = 2.0 + lr = self.evaluate(opt.lr) + self.assertEqual(2.0, lr) + + self.evaluate(opt.lr.assign(4.0)) + lr = self.evaluate(opt.lr) + self.assertEqual(4.0, lr) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDir(self): + opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.1) + dir_result = set(dir(opt)) + self.assertIn("learning_rate", dir_result) # Hyperparameter + self.assertIn("lr", dir_result) # Hyperparameter + self.assertIn("momentum", dir_result) # Hyperparameter + self.assertIn("nesterov", dir_result) # Attribute + self.assertIn("minimize", dir_result) # Attribute + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testOptimizerWithKerasModel(self): + a = input_layer.Input(shape=(3,), name="input_a") + b = input_layer.Input(shape=(3,), name="input_b") + + dense = core.Dense(4, name="dense") + c = dense(a) + d = dense(b) + e = regularization.Dropout(0.5, name="dropout")(c) + + model = training.Model([a, b], [d, e]) + + optimizer = gradient_descent.SGD(learning_rate=0.001) + loss = "mse" + model.compile(optimizer, loss, metrics=["mae"]) + + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_d_np = np.random.random((10, 4)) + output_e_np = np.random.random((10, 4)) + + model.fit( + [input_a_np, input_b_np], + [output_d_np, output_e_np], + epochs=1, + batch_size=5, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testOptimizerWithCallbacks(self): + np.random.seed(1331) + input_np = np.random.random((10, 3)) + output_np = np.random.random((10, 4)) + a = input_layer.Input(shape=(3,), name="input_a") + model = sequential.Sequential() + model.add(core.Dense(4, kernel_initializer="zeros", name="dense")) + model.add(regularization.Dropout(0.5, name="dropout")) + model(a) + optimizer = gradient_descent.SGD(learning_rate=0.1) + model.compile(optimizer, loss="mse", metrics=["mae"]) + # This does not reduce the LR after the first epoch (due to low delta). + cbks = [ + callbacks.ReduceLROnPlateau( + monitor="val_loss", + factor=0.1, + min_delta=0, + patience=1, + cooldown=5, + ) + ] + model.fit( + input_np, + output_np, + batch_size=10, + validation_data=(input_np, output_np), + callbacks=cbks, + epochs=2, + verbose=0, + ) + self.assertAllClose( + float(backend.get_value(model.optimizer.lr)), 0.1, atol=1e-4 + ) + + # This should reduce the LR after the first epoch (due to high delta). + cbks = [ + callbacks.ReduceLROnPlateau( + monitor="val_loss", + factor=0.1, + min_delta=10, + patience=1, + cooldown=5, + ) + ] + model.fit( + input_np, + output_np, + batch_size=10, + validation_data=(input_np, output_np), + callbacks=cbks, + epochs=2, + verbose=2, + ) + self.assertAllClose( + float(backend.get_value(model.optimizer.lr)), 0.01, atol=1e-4 + ) + + def testOptimizerSetIterations(self): + global_step = tf.compat.v1.train.get_or_create_global_step() + opt = adam.Adam(learning_rate=1.0) + opt.iterations = global_step + var = tf.Variable([1.0, 2.0], dtype=tf.float32) + self.evaluate(tf.compat.v1.global_variables_initializer()) + init_step_value = self.evaluate(global_step) + loss = lambda: 3 * var + opt_op = opt.minimize(loss, [var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + new_step_value = self.evaluate(global_step) + self.assertEqual(new_step_value, init_step_value + 1) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testOptimizerWithCallableVarList(self): + train_samples = 20 + input_dim = 1 + num_classes = 2 + (x, y), _ = test_utils.get_test_data( + train_samples=train_samples, + test_samples=10, + input_shape=(input_dim,), + num_classes=num_classes, + ) + y = np_utils.to_categorical(y) + + num_hidden = 1 + model = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, num_classes=num_classes + ) + opt = adam.Adam() + + loss = lambda: losses.mean_squared_error(model(x), y) + var_list = lambda: model.trainable_weights + + with self.assertRaisesRegex( + ValueError, "Weights for model .* have not yet been created" + ): + var_list() + train_op = opt.minimize(loss, var_list) + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertEqual( + [[0.0]], self.evaluate(opt.get_slot(var_list()[0], "m")) + ) + self.evaluate(train_op) + self.assertNotEqual( + [[0.0]], self.evaluate(opt.get_slot(var_list()[0], "m")) + ) + self.assertLen(var_list(), 4) + + def testVarKey(self): + with tf.compat.v1.get_default_graph().as_default(): + a = tf.Variable([1.0, 2.0], name="var") + b = tf.Variable([1.0], name="var") + self.assertTrue(a._in_graph_mode) + self.assertTrue(b._in_graph_mode) + var_key = optimizer_v2._var_key(a) + self.assertEqual("var", var_key) + var_key = optimizer_v2._var_key(b) + self.assertEqual("var_1", var_key) + + def testVarName(self): + with tf.compat.v1.get_default_graph().as_default(): + var = tf.Variable([1.0, 2.0], name="var") + loss = var + 1.0 + opt = adam.Adam() + opt.get_updates(loss, [var]) + opt_vars = opt.variables() + self.assertLen(opt_vars, 3) + self.assertEqual("Adam/iter:0", opt_vars[0].name) + self.assertEqual("Adam/var/m:0", opt_vars[1].name) + var_2 = tf.Variable([1.0, 2.0], name="var_2") + loss = var_2 + 1.0 + with backend.name_scope("outter"): + opt.get_updates(loss, [var_2]) + opt_vars = opt.variables() + self.assertLen(opt_vars, 5) + self.assertEqual("outter/Adam/var_2/m:0", opt_vars[3].name) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testEmptyVarList(self): + opt = gradient_descent.SGD(1.0) + opt.minimize(lambda: tf.constant(1.0), []) + opt.apply_gradients([]) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testAggregationTrue(self): + # Test that experimental_aggregate_gradients=True works without + # distributed strategy. + var = tf.Variable([1.0, 2.0]) + opt = gradient_descent.SGD(3.0) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose([1.0, 2.0], self.evaluate(var)) + opt_op = opt.apply_gradients( + [([0.1, 0.1], var)], experimental_aggregate_gradients=True + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + self.assertAllClose([0.7, 1.7], self.evaluate(var)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testAggregationFalse(self): + # Test that experimental_aggregate_gradients=False works without + # distributed strategy. + var = tf.Variable([1.0, 2.0]) + opt = gradient_descent.SGD(3.0) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose([1.0, 2.0], self.evaluate(var)) + opt_op = opt.apply_gradients( + [([0.1, 0.1], var)], experimental_aggregate_gradients=False + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + self.assertAllClose([0.7, 1.7], self.evaluate(var)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testRestoringIterationsWithoutAnOptimizer(self): + opt = gradient_descent.SGD(3.0) + opt.iterations.assign(5) + checkpoint = tf.train.Checkpoint(optimizer=opt) + path = checkpoint.save(self.get_temp_dir()) + + # Following verifies that the `iterations` can be restored with the + # absence of an `Optimizer` object (using a `Checkpoint` as a + # placeholder). + iterations_var = tf.Variable(0, dtype=tf.int64) + optimizer_checkpoint = tf.train.Checkpoint(iter=iterations_var) + checkpoint_to_restore = tf.train.Checkpoint( + optimizer=optimizer_checkpoint + ) + checkpoint_to_restore.restore(path) + + self.assertEqual(5, self.evaluate(iterations_var)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testSlotWithNonstandardShapeRestoresBasedOnCheckpoint(self): + # First create an optimizer and a slot variable with a non-standard + # shape. + x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) + slot_shape = [2, 1] + optimizer_1 = optimizer_v2.OptimizerV2(name="test") + optimizer_1.add_slot(x, "test_slot", "ones", shape=slot_shape) + + # Then save the variable and optimizer to a checkpoint. + checkpoint_1 = tf.train.Checkpoint(var=x, optimizer=optimizer_1) + checkpoint_path = checkpoint_1.save(self.get_temp_dir()) + + # Create a new optimizer and call restore on it (and x) + optimizer_2 = optimizer_v2.OptimizerV2(name="test") + checkpoint_2 = tf.train.Checkpoint(var=x, optimizer=optimizer_2) + checkpoint_2.restore(checkpoint_path) + + self.assertEqual( + slot_shape, optimizer_2.get_slot(x, "test_slot").shape.as_list() + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_gradient_aggregator(self): + def gradient_aggregator(grads_and_vars): + # Simulate an all-reduce where the other replica has zeros for + # gradients, by dividing each gradient by 2. + grads = [g for g, _ in grads_and_vars] + vars = [v for _, v in grads_and_vars] + all_reduced_grads = [g / 2 for g in grads] + return list(zip(all_reduced_grads, vars)) + + var = tf.Variable(2.0) + sgd = gradient_descent.SGD(1.0, gradient_aggregator=gradient_aggregator) + loss = lambda: 2 * var + opt_op = sgd.minimize(loss, var_list=[var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + self.assertEqual(self.evaluate(var), 1.0) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_override_aggregate_gradients(self): + class MyOptimizer(gradient_descent.SGD): + def _aggregate_gradients(self, grads_and_vars): + # Simulate an all-reduce where the other replica has zeros for + # gradients, by dividing each gradient by 2. + grads = [g for g, _ in grads_and_vars] + vars = [v for _, v in grads_and_vars] + all_reduced_grads = [g / 2 for g in grads] + return list(zip(all_reduced_grads, vars)) + + var = tf.Variable(2.0) + sgd = MyOptimizer(1.0) + loss = lambda: 2 * var + opt_op = sgd.minimize(loss, var_list=[var]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + self.assertEqual(self.evaluate(var), 1.0) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_create_slots_for_sharded_variables(self): + # set names so that ShardedVariable is well-named for slot variable + # keying. + var_a = tf.Variable([1.0], name="part_0") + var_b = tf.Variable([2.0], name="part_1") + sharded_var = tf.__internal__.distribute.ShardedVariable([var_a, var_b]) + + opt = adagrad.Adagrad() + opt._create_slots(sharded_var.variables) + opt._create_slots_for_sharded_variables(sharded_var.variables) + + sharded_slot = opt.get_slot(sharded_var, "accumulator") + self.assertIsInstance( + sharded_slot, tf.__internal__.distribute.ShardedVariable + ) + + slot_a = opt.get_slot(var_a, "accumulator") + self.assertAllClose(sharded_slot.variables[0], slot_a) + slot_b = opt.get_slot(var_b, "accumulator") + self.assertAllClose(sharded_slot.variables[1], slot_b) + + +@test_combinations.run_all_keras_modes +class OptimizersCompatibilityTest(test_combinations.TestCase): + def _testOptimizersCompatibility(self, opt_v1, opt_v2, test_weights=True): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + np.random.seed(1331) + with test_utils.use_gpu(): + train_samples = 20 + input_dim = 3 + num_classes = 2 + (x, y), _ = test_utils.get_test_data( + train_samples=train_samples, + test_samples=10, + input_shape=(input_dim,), + num_classes=num_classes, + ) + y = np_utils.to_categorical(y) + + num_hidden = 5 + model_v1 = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_v1.compile( + opt_v1, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + model_v1.fit(x, y, batch_size=5, epochs=1) + + model_v2 = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_v2.set_weights(model_v1.get_weights()) + model_v2.compile( + opt_v2, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + if not tf.compat.v1.executing_eagerly_outside_functions(): + model_v2._make_train_function() + if test_weights: + opt_v2.set_weights(opt_v1.get_weights()) + + hist_1 = model_v1.fit(x, y, batch_size=5, epochs=1, shuffle=False) + hist_2 = model_v2.fit(x, y, batch_size=5, epochs=1, shuffle=False) + self.assertAllClose( + model_v1.get_weights(), + model_v2.get_weights(), + rtol=1e-5, + atol=1e-5, + ) + self.assertAllClose( + hist_1.history["loss"], + hist_2.history["loss"], + rtol=1e-5, + atol=1e-5, + ) + + def testAdadeltaCompatibility(self): + opt_v1 = optimizer_v1.Adadelta(lr=0.01) + opt_v2 = adadelta.Adadelta(learning_rate=0.01) + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testAdagradCompatibility(self): + opt_v1 = optimizer_v1.Adagrad(lr=0.01) + opt_v2 = adagrad.Adagrad(learning_rate=0.01) + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testAdamCompatibility(self): + opt_v1 = optimizer_v1.Adam() + opt_v2 = adam.Adam() + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testAdamaxCompatibility(self): + opt_v1 = optimizer_v1.Adamax(lr=0.01) + opt_v2 = adamax.Adamax(learning_rate=0.01) + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testNadamCompatibility(self): + opt_v1 = optimizer_v1.Nadam(lr=0.001) + opt_v2 = nadam.Nadam(learning_rate=0.001) + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testMomentumCompatibility(self): + opt_v1 = optimizer_v1.SGD(lr=0.01, momentum=0.9) + opt_v2 = gradient_descent.SGD(learning_rate=0.01, momentum=0.9) + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testRMSpropCompatibility(self): + opt_v1 = optimizer_v1.RMSprop() + opt_v2 = rmsprop.RMSprop() + self._testOptimizersCompatibility(opt_v1, opt_v2) + + def testSGDCompatibility(self): + opt_v1 = optimizer_v1.SGD(lr=0.01) + opt_v2 = gradient_descent.SGD(learning_rate=0.01) + self._testOptimizersCompatibility(opt_v1, opt_v2, False) + + def testNumericEquivalenceForNesterovMomentum(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + np.random.seed(1331) + with test_utils.use_gpu(): + train_samples = 20 + input_dim = 3 + num_classes = 2 + (x, y), _ = test_utils.get_test_data( + train_samples=train_samples, + test_samples=10, + input_shape=(input_dim,), + num_classes=num_classes, + ) + y = np_utils.to_categorical(y) + + num_hidden = 5 + model_k_v1 = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_k_v2 = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_k_v2.set_weights(model_k_v1.get_weights()) + model_tf = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_tf.set_weights(model_k_v2.get_weights()) + + opt_k_v1 = optimizer_v1.SGD(momentum=0.9, nesterov=True) + opt_k_v2 = gradient_descent.SGD(momentum=0.9, nesterov=True) + opt_tf = tf.compat.v1.train.MomentumOptimizer( + learning_rate=0.01, momentum=0.9, use_nesterov=True + ) + + model_k_v1.compile( + opt_k_v1, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + model_k_v2.compile( + opt_k_v2, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + model_tf.compile( + opt_tf, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + + hist_k_v1 = model_k_v1.fit( + x, y, batch_size=5, epochs=10, shuffle=False + ) + hist_k_v2 = model_k_v2.fit( + x, y, batch_size=5, epochs=10, shuffle=False + ) + hist_tf = model_tf.fit(x, y, batch_size=5, epochs=10, shuffle=False) + + self.assertAllClose( + model_k_v1.get_weights(), model_tf.get_weights() + ) + self.assertAllClose( + model_k_v1.get_weights(), model_k_v2.get_weights() + ) + self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) + self.assertAllClose( + hist_k_v1.history["loss"], hist_tf.history["loss"] + ) + self.assertAllClose( + hist_k_v1.history["loss"], hist_k_v2.history["loss"] + ) + + def testNumericEquivalenceForAmsgrad(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + np.random.seed(1331) + with test_utils.use_gpu(): + train_samples = 20 + input_dim = 3 + num_classes = 2 + (x, y), _ = test_utils.get_test_data( + train_samples=train_samples, + test_samples=10, + input_shape=(input_dim,), + num_classes=num_classes, + ) + y = np_utils.to_categorical(y) + + num_hidden = 5 + model_k_v1 = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_k_v2 = test_utils.get_small_sequential_mlp( + num_hidden=num_hidden, + num_classes=num_classes, + input_dim=input_dim, + ) + model_k_v2.set_weights(model_k_v1.get_weights()) + + opt_k_v1 = optimizer_v1.Adam(amsgrad=True) + opt_k_v2 = adam.Adam(amsgrad=True) + + model_k_v1.compile( + opt_k_v1, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + model_k_v2.compile( + opt_k_v2, + loss="categorical_crossentropy", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + + hist_k_v1 = model_k_v1.fit( + x, y, batch_size=5, epochs=10, shuffle=False + ) + hist_k_v2 = model_k_v2.fit( + x, y, batch_size=5, epochs=10, shuffle=False + ) + + self.assertAllClose( + model_k_v1.get_weights(), model_k_v2.get_weights() + ) + self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) + self.assertAllClose( + hist_k_v1.history["loss"], hist_k_v2.history["loss"] + ) + + +# Note: These tests are kept in a separate class to avoid bugs in some +# distributions of Python that break AutoGraph which is used by tf.function. +@test_combinations.generate(test_combinations.combine(mode=["eager"])) +class OptimizerWithFunctionTest(tf.test.TestCase, parameterized.TestCase): + def testBasic(self): + var = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss = lambda: 3 * var + opt = adam.Adam(learning_rate=1.0) + + @tf.function + def fn(): + opt.minimize(loss, [var]) + return var + + self.assertAllClose([0.0, 1.0], fn(), atol=1e-4) + self.assertAllClose([-1, 0.0], fn(), atol=1e-4) + + def testBasicWithConstantDecay(self): + var = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss = lambda: 3 * var + opt = adam.Adam(learning_rate=1.0) + + @tf.function + def fn(): + opt.minimize(loss, [var]) + return var + + self.assertAllClose([0.0, 1.0], fn(), atol=1e-4) + self.assertAllClose([-1, 0.0], fn(), atol=1e-4) + + def testVarKeyWithVarCreatedInEager(self): + a = tf.Variable([1.0, 2.0], name="var") + b = tf.Variable([1.0], name="var") + + @tf_test_utils.also_run_as_tf_function + def var_key_test(): + self.assertFalse(a._in_graph_mode) + self.assertFalse(b._in_graph_mode) + var_key_a = optimizer_v2._var_key(a) + self.assertStartsWith(var_key_a, "var_") + var_key_b = optimizer_v2._var_key(b) + self.assertStartsWith(var_key_b, "var_") + self.assertNotEqual(var_key_a, var_key_b) + + var_key_test() + + def testLearningRateDecayUsedInTwoFunctions(self): + a = tf.Variable([1.0, 2.0], name="var") + b = tf.Variable([1.0], name="var") + + learning_rate_decay = learning_rate_schedule.InverseTimeDecay( + 0.5, decay_steps=1.0, decay_rate=0.5 + ) + opt = adam.Adam(learning_rate=learning_rate_decay) + loss_a = lambda: 3 * a + loss_b = lambda: 2 * b + + @tf.function + def fn_a(): + opt.minimize(loss_a, [a]) + return a + + @tf.function + def fn_b(): + opt.minimize(loss_b, [b]) + return b + + fn_a() + fn_b() + + +_NUM_LEARNERS = 50 +APPLY_SCOPE = "debug_apply" +ALLOWLIST = [ + # optimizer_v2._deduplicate_indexed_slices contains an indexed slice: + # array_ops.shape(unique_indices)[0] + # which winds up expanding to [0:1:1] thereby creating three constants + # to represent the indices. + ("embeddings/strided_slice/stack", "Const"), +] + + +def get_inputs(op): + op_inputs = list(op.inputs) + op.control_inputs + names = [i.name for i in op_inputs] + op_inputs = [getattr(i, "op", i) for i in op_inputs] + return op_inputs, names + + +def strip_name(node): + if "Placeholder" in node.op: + return + node.name = "" + + +def topological_sort(graph): + graph_ops = graph.get_operations() + + sources = [] + result = [] + + inputs = {} + outputs = collections.defaultdict(set) + for op in graph_ops: + op_inputs = get_inputs(op)[0] + if not op_inputs: + sources.append(op) + + inputs[op] = set(op_inputs) + for i in op_inputs: + outputs[i].add(op) + + while sources: + op = sources.pop() + for op_output in outputs[op]: + inputs[op_output].remove(op) + if not inputs[op_output]: + sources.append(op_output) + + result.append(op) + + # Check correctness. + if len(result) != len(graph_ops): + raise ValueError( + f"Sort result has {len(result)} ops, " + f"source graph has {len(graph_ops)}." + ) + + sort_check_seen = set() + for op in result: + sort_check_seen.add(op) + for i in get_inputs(op)[0]: + assert i in sort_check_seen + + return result + + +def identify_redundant_ops(graph): + """Implements basic common subexpression elimination. + + This is not intended to replicate the graph semantics of TensorFlow Graphs + (for instance it does not handle stateful op ordering), nor is it intended + to replace the common subexpression elimination Grappler pass. Rather, it + provides a high level sanity check that clearly redundant ops are not being + created. + + Args: + graph: The graph to be analyzed. + + Returns: + A count of the duplicate ops and a description of the structure of each. + """ + sorted_ops = topological_sort(graph) + duplicates = collections.defaultdict(list) + unified_node_defs = {} + name_map = {} + + for op in sorted_ops: + input_names = [] + for op_input, name in zip(*get_inputs(op)): + input_def = op_input.node_def + + # Operations can have multiple outputs. We track which is used to + # prevent overzealous elimination. + input_def.name = name + + input_def.input[:] = [name_map.get(i, i) for i in input_def.input] + strip_name(input_def) + + # NodeDef.SerializeToString() does not provide identical serialized + # representations for identical NodeDefs, so we instead use string + # representation as a dict key. + key = repr(input_def) + + if key in unified_node_defs: + input_names.append(unified_node_defs[key]) + + else: + unified_node_defs[key] = op_input.name + input_names.append(name) + + node_def = op.node_def + node_def.input[:] = input_names + strip_name(node_def) + + key = repr(node_def) + duplicates[key].append(op) + name_map[op.name] = duplicates[key][0].name + + num_duplicates = 0 + duplicate_types = [] + for standard_def, op_defs in duplicates.items(): + # We are only interested in testing the apply method of the optimizer + op_defs = [i for i in op_defs if APPLY_SCOPE in i.name] + + # We only check for per-apply redundant ops. + if len(op_defs) < _NUM_LEARNERS: + continue + + # Certain ops are simply not worth eliminating, and are instead simply + # ignored. + name, op_type = op_defs[0].name, op_defs[0].type + if any( + allowlisted_scope in name and op_type == allowlisted_type + for allowlisted_scope, allowlisted_type in ALLOWLIST + ): + continue + + num_duplicates += len(op_defs) + traceback = [] + for level in op_defs[0].traceback: + traceback.append(f" {level[0]} {level[2]}:{level[1]}") + + duplicate_types.append( + "# Example name: {}\n# Op creation stack:\n{}\n{}".format( + op_defs[0].name, "\n".join(traceback), standard_def + ) + ) + + return num_duplicates, duplicate_types + + +def make_model(): + r"""Constructs a simple ensemble of weak learners model. + + --------- --------- --------- --------- + | Input | | Input | ... | Input | | Input | + --------- --------- --------- --------- + | | | | + V V V V + --------- --------- --------- --------- + | Embed | | Embed | ... | Embed | | Embed | + --------- --------- --------- --------- + | | | | + V V V V + --------- --------- --------- --------- + | Dense | | Dense | ... | Dense | | Dense | + --------- --------- --------- --------- + \ | | / + \ | | / + --------------------------------------------- + | + --------- + | Dense | + --------- + + This topology is chosen because it exercises both dense and sparse update + paths. + + Returns: + A model for testing optimizer coefficient reuse. + """ + inputs = [] + intermediates = [] + for _ in range(_NUM_LEARNERS): + inp = keras.layers.Input(shape=(1,), dtype=tf.int32) + layer = keras.layers.Embedding(1, 4)(inp) + layer = keras.layers.Dense(1)(layer) + + inputs.append(inp) + intermediates.append(layer) + + layer = keras.layers.Concatenate(axis=-1)(intermediates) + layer = keras.layers.Dense(1)(layer) + + return keras.models.Model(inputs, layer) + + +COEFFICIENT_PARAMS = ( + ("Adadelta", adadelta.Adadelta, None), + ("Adagrad", adagrad.Adagrad, None), + ("Adam", adam.Adam, None), + ("Adam_amdgrad", adam.Adam, dict(amsgrad=True)), + ("Adamax", adamax.Adamax, None), + ("Ftrl", ftrl.Ftrl, None), + ( + "Ftrl_l2_shrinkage", + ftrl.Ftrl, + dict(l2_shrinkage_regularization_strength=0.1), + ), + ("SGD", gradient_descent.SGD, None), + ("SGD_momentum", gradient_descent.SGD, dict(momentum=0.5)), + ("Nadam", nadam.Nadam, None), + ("RMSprop", rmsprop.RMSprop, None), + ("RMSprop_centered", rmsprop.RMSprop, dict(centered=True)), + ("RMSprop_momentum", rmsprop.RMSprop, dict(momentum=0.5)), + ( + "RMSprop_momentum_centered", + rmsprop.RMSprop, + dict(momentum=0.5, centered=True), + ), +) + + +class OptimizerCoefficientTest(test_combinations.TestCase): + @parameterized.named_parameters(*COEFFICIENT_PARAMS) + def test_duplicate_ops(self, optimizer_class, init_kwargs=None): + init_kwargs = init_kwargs or {} + optimizer = optimizer_class(**init_kwargs) + + graph = tf.Graph() + with graph.as_default(): + model = make_model() + trainable_variables = model.trainable_variables + grads = optimizer.get_gradients( + model.outputs[0], trainable_variables + ) + + with backend.name_scope(APPLY_SCOPE): + optimizer.apply_gradients(zip(grads, trainable_variables)) + + num_duplicates, duplicate_types = identify_redundant_ops(graph) + if num_duplicates: + # Avoid spamming logs. + if len(duplicate_types) > 3: + duplicate_types = duplicate_types[:3] + ["..."] + + num_total = len(graph.get_operations()) + raise ValueError( + "{} of {} ({:.1f}%) ops were duplicates:\n\n{}".format( + num_duplicates, + num_total, + num_duplicates / num_total * 100, + "\n".join(duplicate_types), + ) + ) + + @parameterized.named_parameters(*COEFFICIENT_PARAMS) + def test_subclass_compat(self, optimizer_class, init_kwargs=None): + """Ensure that subclassed optimizers without apply_state still work.""" + + class SubclassedOptimizer(optimizer_class): + def _resource_apply_dense(self, grad, var): + return super()._resource_apply_dense(grad, var) + + def _resource_apply_sparse(self, grad, var, indices): + return super()._resource_apply_sparse(grad, var, indices) + + init_kwargs = init_kwargs or {} + optimizer = SubclassedOptimizer(**init_kwargs) + + graph = tf.Graph() + with graph.as_default(): + model = make_model() + trainable_variables = model.trainable_variables + grads = optimizer.get_gradients( + model.outputs[0], trainable_variables + ) + + with backend.name_scope(APPLY_SCOPE): + optimizer.apply_gradients(zip(grads, trainable_variables)) + + +class DeepcopyTests(tf.test.TestCase): + def setUp(self): + self.optimizer = adam.Adam(0.42) + super().setUp() + + def test_deepcopy(self): + clone = deepcopy(self.optimizer) + assert clone.get_config()["learning_rate"] == 0.42, "wrong lr" + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/rmsprop.py b/keras/optimizers/legacy/rmsprop.py index fe1bf7ab1a33..5537de9cc8ab 100644 --- a/keras/optimizers/legacy/rmsprop.py +++ b/keras/optimizers/legacy/rmsprop.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,338 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Legacy RMSprop optimizer implementation.""" +"""RMSprop optimizer implementation.""" -from keras.optimizers.optimizer_v2 import rmsprop +import numpy as np +import tensorflow.compat.v2 as tf +from keras import backend_config +from keras.optimizers.legacy import optimizer_v2 + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.optimizers.legacy.RMSprop') -class RMSprop(rmsprop.RMSprop): - pass +@keras_export( + "keras.optimizers.legacy.RMSprop", + v1=["keras.optimizers.RMSprop", "keras.optimizers.legacy.RMSprop"], +) +class RMSprop(optimizer_v2.OptimizerV2): + r"""Optimizer that implements the RMSprop algorithm. + + The gist of RMSprop is to: + + - Maintain a moving (discounted) average of the square of gradients + - Divide the gradient by the root of this average + + This implementation of RMSprop uses plain momentum, not Nesterov momentum. + + The centered version additionally maintains a moving average of the + gradients, and uses that average to estimate the variance. + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to `0.001`. + rho: Discounting factor for the history/coming gradient. Defaults to + `0.9`. + momentum: A scalar or a scalar `Tensor`. Defaults to `0.0`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to + `1e-7`. + centered: Boolean. If `True`, gradients are normalized by the estimated + variance of the gradient; if False, by the uncentered second moment. + Setting this to `True` may help with training, but is slightly more + expensive in terms of computation and memory. Defaults to `False`. + name: Optional name prefix for the operations created when applying + gradients. Defaults to `"RMSprop"`. + **kwargs: keyword arguments. Allowed arguments are `clipvalue`, + `clipnorm`, `global_clipnorm`. + If `clipvalue` (float) is set, the gradient of each weight + is clipped to be no higher than this value. + If `clipnorm` (float) is set, the gradient of each weight + is individually clipped so that its norm is no higher than this value. + If `global_clipnorm` (float) is set the gradient of all weights is + clipped so that their global norm is no higher than this value. + + Note that in the dense implementation of this algorithm, variables and their + corresponding accumulators (momentum, gradient moving average, square + gradient moving average) will be updated even if the gradient is zero + (i.e. accumulators will decay, momentum will be applied). The sparse + implementation (used when the gradient is an `IndexedSlices` object, + typically because of `tf.gather` or an embedding lookup in the forward pass) + will not update variable slices or their accumulators unless those slices + were used in the forward pass (nor is there an "eventual" correction to + account for these omitted updates). This leads to more efficient updates for + large embedding lookup tables (where most of the slices are not accessed in + a particular graph execution), but differs from the published algorithm. + + Usage: + + >>> opt = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.1) + >>> var1 = tf.Variable(10.0) + >>> loss = lambda: (var1 ** 2) / 2.0 # d(loss) / d(var1) = var1 + >>> step_count = opt.minimize(loss, [var1]).numpy() + >>> var1.numpy() + 9.683772 + + Reference: + - [Hinton, 2012]( + http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) + """ + + _HAS_AGGREGATE_GRAD = True + + def __init__( + self, + learning_rate=0.001, + rho=0.9, + momentum=0.0, + epsilon=1e-7, + centered=False, + name="RMSprop", + **kwargs, + ): + """Construct a new RMSprop optimizer. + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is + a `tf.keras.optimizers.schedules.LearningRateSchedule`, or a + callable that takes no arguments and returns the actual value to + use. The learning rate. Defaults to `0.001`. + rho: Discounting factor for the history/coming gradient. Defaults to + `0.9`. + momentum: A scalar or a scalar `Tensor`. Defaults to `0.0`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults + to 1e-7. + centered: Boolean. If `True`, gradients are normalized by the + estimated variance of the gradient; if False, by the uncentered + second moment. Setting this to `True` may help with training, but + is slightly more expensive in terms of computation and memory. + Defaults to `False`. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "RMSprop". + **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, + `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is + clip gradients by value, `decay` is included for backward + compatibility to allow time inverse decay of learning rate. `lr` is + included for backward compatibility, recommended to use + `learning_rate` instead. + + @compatibility(eager) + When eager execution is enabled, `learning_rate`, `decay`, `momentum`, + and `epsilon` can each be a callable that takes no arguments and returns + the actual value to use. This can be useful for changing these values + across different invocations of optimizer functions. + @end_compatibility + """ + super().__init__(name, **kwargs) + self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) + self._set_hyper("decay", self._initial_decay) + self._set_hyper("rho", rho) + + self._momentum = False + if ( + isinstance(momentum, tf.Tensor) + or callable(momentum) + or momentum > 0 + ): + self._momentum = True + if isinstance(momentum, (int, float)) and ( + momentum < 0 or momentum > 1 + ): + raise ValueError( + "`momentum` must be between [0, 1]. Received: " + f"momentum={momentum} (of type {type(momentum)})." + ) + self._set_hyper("momentum", momentum) + + self.epsilon = epsilon or backend_config.epsilon() + self.centered = centered + + def _create_slots(self, var_list): + for var in var_list: + self.add_slot(var, "rms") + if self._momentum: + for var in var_list: + self.add_slot(var, "momentum") + if self.centered: + for var in var_list: + self.add_slot(var, "mg") + + def _prepare_local(self, var_device, var_dtype, apply_state): + super()._prepare_local(var_device, var_dtype, apply_state) + + rho = tf.identity(self._get_hyper("rho", var_dtype)) + apply_state[(var_device, var_dtype)].update( + dict( + neg_lr_t=-apply_state[(var_device, var_dtype)]["lr_t"], + epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), + rho=rho, + momentum=tf.identity(self._get_hyper("momentum", var_dtype)), + one_minus_rho=1.0 - rho, + ) + ) + + def _resource_apply_dense(self, grad, var, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + rms = self.get_slot(var, "rms") + if self._momentum: + mom = self.get_slot(var, "momentum") + if self.centered: + mg = self.get_slot(var, "mg") + return tf.raw_ops.ResourceApplyCenteredRMSProp( + var=var.handle, + mg=mg.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + else: + return tf.raw_ops.ResourceApplyRMSProp( + var=var.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, + use_locking=self._use_locking, + ) + else: + rms_t = coefficients["rho"] * rms + coefficients[ + "one_minus_rho" + ] * tf.square(grad) + rms_t = tf.compat.v1.assign( + rms, rms_t, use_locking=self._use_locking + ) + denom_t = rms_t + if self.centered: + mg = self.get_slot(var, "mg") + mg_t = ( + coefficients["rho"] * mg + + coefficients["one_minus_rho"] * grad + ) + mg_t = tf.compat.v1.assign( + mg, mg_t, use_locking=self._use_locking + ) + denom_t = rms_t - tf.square(mg_t) + var_t = var - coefficients["lr_t"] * grad / ( + tf.sqrt(denom_t) + coefficients["epsilon"] + ) + return tf.compat.v1.assign( + var, var_t, use_locking=self._use_locking + ).op + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = (apply_state or {}).get( + (var_device, var_dtype) + ) or self._fallback_apply_state(var_device, var_dtype) + + rms = self.get_slot(var, "rms") + if self._momentum: + mom = self.get_slot(var, "momentum") + if self.centered: + mg = self.get_slot(var, "mg") + return tf.raw_ops.ResourceSparseApplyCenteredRMSProp( + var=var.handle, + mg=mg.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, + indices=indices, + use_locking=self._use_locking, + ) + else: + return tf.raw_ops.ResourceSparseApplyRMSProp( + var=var.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, + indices=indices, + use_locking=self._use_locking, + ) + else: + rms_scaled_g_values = (grad * grad) * coefficients["one_minus_rho"] + rms_t = tf.compat.v1.assign( + rms, rms * coefficients["rho"], use_locking=self._use_locking + ) + with tf.control_dependencies([rms_t]): + rms_t = self._resource_scatter_add( + rms, indices, rms_scaled_g_values + ) + rms_slice = tf.gather(rms_t, indices) + denom_slice = rms_slice + if self.centered: + mg = self.get_slot(var, "mg") + mg_scaled_g_values = grad * coefficients["one_minus_rho"] + mg_t = tf.compat.v1.assign( + mg, mg * coefficients["rho"], use_locking=self._use_locking + ) + with tf.control_dependencies([mg_t]): + mg_t = self._resource_scatter_add( + mg, indices, mg_scaled_g_values + ) + mg_slice = tf.gather(mg_t, indices) + denom_slice = rms_slice - tf.square(mg_slice) + var_update = self._resource_scatter_add( + var, + indices, + coefficients["neg_lr_t"] + * grad + / (tf.sqrt(denom_slice) + coefficients["epsilon"]), + ) + if self.centered: + return tf.group(*[var_update, rms_t, mg_t]) + return tf.group(*[var_update, rms_t]) + + def set_weights(self, weights): + params = self.weights + # Override set_weights for backward compatibility of Keras V1 optimizer + # since it does not include iteration at head of the weight list. Set + # iteration to 0. + if len(params) == len(weights) + 1: + weights = [np.array(0)] + weights + super().set_weights(weights) + + def get_config(self): + config = super().get_config() + config.update( + { + "learning_rate": self._serialize_hyperparameter( + "learning_rate" + ), + "decay": self._initial_decay, + "rho": self._serialize_hyperparameter("rho"), + "momentum": self._serialize_hyperparameter("momentum"), + "epsilon": self.epsilon, + "centered": self.centered, + } + ) + return config + + +RMSProp = RMSprop diff --git a/keras/optimizers/legacy/rmsprop_test.py b/keras/optimizers/legacy/rmsprop_test.py new file mode 100644 index 000000000000..f47d3f6b6717 --- /dev/null +++ b/keras/optimizers/legacy/rmsprop_test.py @@ -0,0 +1,814 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rmsprop.""" + +import copy +import itertools +import math + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +from keras.optimizers.legacy import rmsprop +from keras.optimizers.schedules import learning_rate_schedule +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) + +_DATA_TYPES = [tf.half, tf.float32, tf.float64, tf.complex64, tf.complex128] + +_TEST_PARAM_VALUES = [ + # learning_rate, rho, momentum, epsilon, centered + [0.05, 0.9, 0.0, 1e-3, True], + [0.05, 0.9, 0.0, 1e-3, False], + [0.1, 0.9, 0.0, 1e-3, True], + [0.01, 0.9, 0.0, 1e-5, True], + [0.01, 0.9, 0.9, 1e-5, True], +] + +_TESTPARAMS = [ + [data_type] + values + for data_type, values in itertools.product(_DATA_TYPES, _TEST_PARAM_VALUES) +] + + +class RMSpropOptimizerTest(tf.test.TestCase, parameterized.TestCase): + def _rmsprop_update_numpy( + self, var, g, mg, rms, mom, lr, rho, momentum, epsilon, centered + ): + rms_t = rms * rho + (1 - rho) * g * g + if centered: + mg_t = mg * rho + (1 - rho) * g + denom_t = rms_t - mg_t * mg_t + else: + mg_t = mg + denom_t = rms_t + if momentum > 0.0: + mom_t = momentum * mom + lr * g / (np.sqrt(denom_t + epsilon)) + var_t = var - mom_t + else: + mom_t = mom + var_t = var - lr * g / (np.sqrt(denom_t) + epsilon) + return var_t, mg_t, rms_t, mom_t + + def _sparse_rmsprop_update_numpy( + self, + var, + gindexs, + gvalues, + mg, + rms, + mom, + lr, + rho, + momentum, + epsilon, + centered, + ): + mg_t = copy.deepcopy(mg) + rms_t = copy.deepcopy(rms) + mom_t = copy.deepcopy(mom) + var_t = copy.deepcopy(var) + for i in range(len(gindexs)): + gindex = gindexs[i] + gvalue = gvalues[i] + rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue + if centered: + mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue + denom_t = rms_t[gindex] - mg_t[gindex] * mg_t[gindex] + else: + denom_t = rms_t[gindex] + if momentum > 0.0: + mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt( + denom_t + epsilon + ) + var_t[gindex] = var[gindex] - mom_t[gindex] + else: + mom_t[gindex] = mom[gindex] + var_t[gindex] = var[gindex] - lr * gvalue / ( + np.sqrt(denom_t) + epsilon + ) + return var_t, mg_t, rms_t, mom_t + + def testDense(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for ( + dtype, + learning_rate, + rho, + momentum, + epsilon, + centered, + ) in _TESTPARAMS: + with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu(): # noqa: E501 + # Initialize variables for numpy implementation. + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np, dtype=dtype) + var1 = tf.Variable(var1_np, dtype=dtype) + grads0 = tf.constant(grads0_np, dtype=dtype) + grads1 = tf.constant(grads1_np, dtype=dtype) + opt = rmsprop.RMSprop( + learning_rate=learning_rate, + rho=rho, + momentum=momentum, + epsilon=epsilon, + centered=centered, + ) + + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + if centered: + mg0 = opt.get_slot(var0, "mg") + mg1 = opt.get_slot(var1, "mg") + else: + mg0 = None + mg1 = None + + if momentum > 0.0: + mom0 = opt.get_slot(var0, "momentum") + mom1 = opt.get_slot(var1, "momentum") + else: + mom0 = None + mom1 = None + + rms0 = opt.get_slot(var0, "rms") + self.assertIsNotNone(rms0) + rms1 = opt.get_slot(var1, "rms") + self.assertIsNotNone(rms1) + + mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of RMSprop + for _ in range(1, 4): + self.evaluate(update) + + ( + var0_np, + mg0_np, + rms0_np, + mom0_np, + ) = self._rmsprop_update_numpy( + var0_np, + grads0_np, + mg0_np, + rms0_np, + mom0_np, + learning_rate, + rho, + momentum, + epsilon, + centered, + ) + ( + var1_np, + mg1_np, + rms1_np, + mom1_np, + ) = self._rmsprop_update_numpy( + var1_np, + grads1_np, + mg1_np, + rms1_np, + mom1_np, + learning_rate, + rho, + momentum, + epsilon, + centered, + ) + + # Validate updated params + if centered: + self.assertAllCloseAccordingToType( + mg0_np, self.evaluate(mg0) + ) + self.assertAllCloseAccordingToType( + mg1_np, self.evaluate(mg1) + ) + if momentum > 0.0: + self.assertAllCloseAccordingToType( + mom0_np, self.evaluate(mom0) + ) + self.assertAllCloseAccordingToType( + mom1_np, self.evaluate(mom1) + ) + self.assertAllCloseAccordingToType( + rms0_np, self.evaluate(rms0) + ) + self.assertAllCloseAccordingToType( + rms1_np, self.evaluate(rms1) + ) + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + def testDenseWithLearningRateDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + var0_np = np.array([1.0, 2.0]) + grads0_np = np.array([0.1, 0.2]) + var1_np = np.array([3.0, 4.0]) + grads1_np = np.array([0.01, 0.2]) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + learning_rate = 0.01 + rho = 0.9 + momentum = 0.0 + epsilon = 1e-7 + centered = False + decay = 0.5 + opt = rmsprop.RMSprop( + learning_rate=learning_rate, + rho=rho, + momentum=momentum, + epsilon=epsilon, + centered=centered, + decay=decay, + ) + + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + rms0 = opt.get_slot(var0, "rms") + self.assertIsNotNone(rms0) + rms1 = opt.get_slot(var1, "rms") + self.assertIsNotNone(rms1) + if momentum > 0.0: + mom0 = opt.get_slot(var0, "momentum") + mom1 = opt.get_slot(var1, "momentum") + else: + mom0 = None + mom1 = None + + mg0_np = np.array([0.0, 0.0]) + mg1_np = np.array([0.0, 0.0]) + rms0_np = np.array([0.0, 0.0]) + rms1_np = np.array([0.0, 0.0]) + mom0_np = np.array([0.0, 0.0]) + mom1_np = np.array([0.0, 0.0]) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 4 steps of RMSprop + for t in range(2): + self.evaluate(update) + + lr = learning_rate / (1 + decay * t) + var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( + var0_np, + grads0_np, + mg0_np, + rms0_np, + mom0_np, + lr, + rho, + momentum, + epsilon, + centered, + ) + var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( + var1_np, + grads1_np, + mg1_np, + rms1_np, + mom1_np, + lr, + rho, + momentum, + epsilon, + centered, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) + self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) + if momentum > 0.0: + self.assertAllCloseAccordingToType( + mom0_np, self.evaluate(mom0) + ) + self.assertAllCloseAccordingToType( + mom1_np, self.evaluate(mom1) + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testDenseWithLearningRateInverseTimeDecay(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + var0_np = np.array([1.0, 2.0]) + grads0_np = np.array([0.1, 0.2]) + var1_np = np.array([3.0, 4.0]) + grads1_np = np.array([0.01, 0.2]) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + learning_rate = 0.01 + rho = 0.9 + momentum = 0.0 + epsilon = 1e-7 + centered = False + decay = 0.5 + lr_schedule = learning_rate_schedule.InverseTimeDecay( + learning_rate, decay_steps=1.0, decay_rate=decay + ) + opt = rmsprop.RMSprop( + learning_rate=lr_schedule, + rho=rho, + momentum=momentum, + epsilon=epsilon, + centered=centered, + ) + + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + rms0 = opt.get_slot(var0, "rms") + self.assertIsNotNone(rms0) + rms1 = opt.get_slot(var1, "rms") + self.assertIsNotNone(rms1) + if momentum > 0.0: + mom0 = opt.get_slot(var0, "momentum") + mom1 = opt.get_slot(var1, "momentum") + else: + mom0 = None + mom1 = None + + mg0_np = np.array([0.0, 0.0]) + mg1_np = np.array([0.0, 0.0]) + rms0_np = np.array([0.0, 0.0]) + rms1_np = np.array([0.0, 0.0]) + mom0_np = np.array([0.0, 0.0]) + mom1_np = np.array([0.0, 0.0]) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 4 steps of RMSprop + for t in range(2): + self.evaluate(update) + + lr = learning_rate / (1 + decay * t) + var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( + var0_np, + grads0_np, + mg0_np, + rms0_np, + mom0_np, + lr, + rho, + momentum, + epsilon, + centered, + ) + var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( + var1_np, + grads1_np, + mg1_np, + rms1_np, + mom1_np, + lr, + rho, + momentum, + epsilon, + centered, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) + self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) + if momentum > 0.0: + self.assertAllCloseAccordingToType( + mom0_np, self.evaluate(mom0) + ) + self.assertAllCloseAccordingToType( + mom1_np, self.evaluate(mom1) + ) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testMinimizeSparseResourceVariable(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + def loss(): + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + return pred * pred + + sgd_op = rmsprop.RMSprop( + learning_rate=1.0, + rho=0.0, + momentum=0.0, + epsilon=0.0, + centered=False, + ).minimize(loss, var_list=[var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllCloseAccordingToType( + [[1.0, 2.0]], self.evaluate(var0) + ) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[0.0, 1.0]], self.evaluate(var0), atol=0.01 + ) + + def testMinimizeSparseResourceVariableCentered(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + for dtype in _DATA_TYPES: + var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) + x = tf.constant([[4.0], [5.0]], dtype=dtype) + + def loss(): + pred = tf.matmul( + tf.compat.v1.nn.embedding_lookup([var0], [0]), x + ) + return pred * pred + + # loss = lambda: pred * pred + # disable=cell-var-from-loop + sgd_op = rmsprop.RMSprop( + learning_rate=1.0, + rho=0.0, + momentum=0.0, + epsilon=1.0, + centered=True, + ).minimize(loss, var_list=[var0]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllCloseAccordingToType( + [[1.0, 2.0]], self.evaluate(var0) + ) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], self.evaluate(var0), atol=0.01 + ) + + def testSparse(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + for ( + dtype, + learning_rate, + rho, + momentum, + epsilon, + centered, + ) in _TESTPARAMS: + with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu(): # noqa: E501 + # Initialize variables for numpy implementation. + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0_np_indices = np.array([0], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np), + tf.constant(grads0_np_indices), + tf.constant([1]), + ) + grads1_np_indices = np.array([1], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np), + tf.constant(grads1_np_indices), + tf.constant([1]), + ) + opt = rmsprop.RMSprop( + learning_rate=learning_rate, + rho=rho, + momentum=momentum, + epsilon=epsilon, + centered=centered, + ) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + + if centered: + mg0 = opt.get_slot(var0, "mg") + self.assertEqual(mg0 is not None, centered) + mg1 = opt.get_slot(var1, "mg") + self.assertEqual(mg1 is not None, centered) + else: + mg0 = None + mg1 = None + rms0 = opt.get_slot(var0, "rms") + self.assertIsNotNone(rms0) + rms1 = opt.get_slot(var1, "rms") + self.assertIsNotNone(rms1) + if momentum > 0.0: + mom0 = opt.get_slot(var0, "momentum") + mom1 = opt.get_slot(var1, "momentum") + else: + mom0 = None + mom1 = None + + mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of RMSprop + for _ in range(1, 4): + self.evaluate(update) + + ( + var0_np, + mg0_np, + rms0_np, + mom0_np, + ) = self._sparse_rmsprop_update_numpy( + var0_np, + grads0_np_indices, + grads0_np, + mg0_np, + rms0_np, + mom0_np, + learning_rate, + rho, + momentum, + epsilon, + centered, + ) + ( + var1_np, + mg1_np, + rms1_np, + mom1_np, + ) = self._sparse_rmsprop_update_numpy( + var1_np, + grads1_np_indices, + grads1_np, + mg1_np, + rms1_np, + mom1_np, + learning_rate, + rho, + momentum, + epsilon, + centered, + ) + + # Validate updated params + if centered: + self.assertAllCloseAccordingToType( + mg0_np, self.evaluate(mg0) + ) + self.assertAllCloseAccordingToType( + mg1_np, self.evaluate(mg1) + ) + self.assertAllCloseAccordingToType( + rms0_np, self.evaluate(rms0) + ) + self.assertAllCloseAccordingToType( + rms1_np, self.evaluate(rms1) + ) + if momentum > 0.0: + self.assertAllCloseAccordingToType( + mom0_np, self.evaluate(mom0) + ) + self.assertAllCloseAccordingToType( + mom1_np, self.evaluate(mom1) + ) + self.assertAllCloseAccordingToType( + var0_np, self.evaluate(var0) + ) + self.assertAllCloseAccordingToType( + var1_np, self.evaluate(var1) + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testCallableParams(self): + for dtype in _DATA_TYPES: + var0 = tf.Variable([1.0, 2.0], dtype=dtype) + var1 = tf.Variable([3.0, 4.0], dtype=dtype) + grads0 = tf.constant([0.1, 0.1], dtype=dtype) + grads1 = tf.constant([0.01, 0.01], dtype=dtype) + + learning_rate = lambda: 2.0 + rho = lambda: 0.9 + momentum = lambda: 0.0 + epsilon = 1.0 + opt = rmsprop.RMSprop(learning_rate, rho, momentum, epsilon) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Step 1: the rms accumulators where 1. So we should see a normal + # update: v -= grad * learning_rate + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + # Check the parameters. + self.assertAllCloseAccordingToType( + np.array( + [ + 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)), + 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)), + ] + ), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)), + 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)), + ] + ), + self.evaluate(var1), + ) + # Step 2: the root mean square accumulators contain the previous + # update. + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + # Check the parameters. + self.assertAllCloseAccordingToType( + np.array( + [ + 1.0 + - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) + - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)), + 2.0 + - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) + - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)), + ] + ), + self.evaluate(var0), + ) + self.assertAllCloseAccordingToType( + np.array( + [ + 3.0 + - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) + - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)), + 4.0 + - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) + - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)), + ] + ), + self.evaluate(var1), + ) + + def testConstructRMSpropWithLR(self): + opt = rmsprop.RMSprop(lr=1.0) + opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0) + opt_3 = rmsprop.RMSprop(learning_rate=0.1) + self.assertIsInstance(opt.lr, tf.Variable) + self.assertIsInstance(opt_2.lr, tf.Variable) + self.assertIsInstance(opt_3.lr, tf.Variable) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(opt.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) + self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testSlotsUniqueEager(self): + v1 = tf.Variable(1.0) + v2 = tf.Variable(1.0) + + opt = rmsprop.RMSprop(1.0, momentum=0.0, centered=False) + opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) + # There should be iteration, and one unique slot variable for v1 and v2. + self.assertLen(set({id(v) for v in opt.variables()}), 3) + self.assertEqual( + self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) + ) + + opt = rmsprop.RMSprop(learning_rate=1.0, momentum=0.2, centered=False) + opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) + # There should be iteration, and two unique slot variables for v1 and + # v2. + self.assertLen(set({id(v) for v in opt.variables()}), 5) + self.assertEqual( + self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) + ) + + opt = rmsprop.RMSprop(learning_rate=1.0, momentum=0.2, centered=True) + opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) + # There should be iteration, and three unique slot variables for v1 and + # v2 + self.assertLen(set({id(v) for v in opt.variables()}), 7) + self.assertEqual( + self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testMomentumProperValue(self): + with self.assertRaisesRegex( + ValueError, + r"`momentum` must be between \[0, 1\]. " + r"Received: momentum=2.5 \(of type \).", + ): + rmsprop.RMSprop(1.0, momentum=2.5, centered=False) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class SlotColocationTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.parameters([True, False]) + @tf_test_utils.run_gpu_only + def testRunMinimizeOnGPUForCPUVariables(self, use_resource): + with tf.device("/device:CPU:0"): + if use_resource: + var0 = tf.Variable([1.0, 2.0], dtype=tf.float32) + var1 = tf.Variable([3.0, 4.0], dtype=tf.float32) + else: + var0 = tf.Variable([1.0, 2.0], dtype=tf.float32) + var1 = tf.Variable([3.0, 4.0], dtype=tf.float32) + + def loss(): + return 5 * var0 + 3 * var1 + + opt = rmsprop.RMSprop( + learning_rate=1.0, decay=0.9, momentum=0.5, epsilon=1.0 + ) + + # Fetch params to validate initial values + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 1 step through optimizer on GPU. + # Slot variables are created the first time optimizer is used on some + # variable. This tests that slot variables will be colocated with the + # base variable. + with tf.device("/device:GPU:0"): + # Note that for eager execution, minimize expects a function instead + # of a Tensor. + opt_op = opt.minimize(loss, [var0, var1]) + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.evaluate(opt_op) + + # Validate updated params, All variables should have decreased. + self.assertTrue( + all(v < 0.0 for v in self.evaluate(var0)), + msg=f"updated variables: {self.evaluate(var0)}", + ) + self.assertTrue( + all(v < 2.0 for v in self.evaluate(var1)), + msg=f"updated variables: {self.evaluate(var1)}", + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/legacy/sgd.py b/keras/optimizers/legacy/sgd.py deleted file mode 100644 index b53744adbc8e..000000000000 --- a/keras/optimizers/legacy/sgd.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Legacy SGD optimizer implementation.""" - -from keras.optimizers.optimizer_v2 import gradient_descent - -from tensorflow.python.util.tf_export import keras_export - - -@keras_export('keras.optimizers.legacy.SGD') -class SGD(gradient_descent.SGD): - pass diff --git a/keras/optimizers/legacy_learning_rate_decay.py b/keras/optimizers/legacy_learning_rate_decay.py index 34afbd4f4c4c..8d8c217cecdf 100644 --- a/keras/optimizers/legacy_learning_rate_decay.py +++ b/keras/optimizers/legacy_learning_rate_decay.py @@ -14,754 +14,800 @@ # ============================================================================== """Various learning rate decay functions.""" +import functools + import tensorflow.compat.v2 as tf -import functools from keras.optimizers.schedules import learning_rate_schedule + +# isort: off from tensorflow.python.util.tf_export import tf_export @tf_export(v1=["train.exponential_decay"]) -def exponential_decay(learning_rate, - global_step, - decay_steps, - decay_rate, - staircase=False, - name=None): - """Applies exponential decay to the learning rate. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies an exponential decay function - to a provided initial learning rate. It requires a `global_step` value to - compute the decayed learning rate. You can just pass a TensorFlow variable - that you increment at each training step. - - The function returns the decayed learning rate. It is computed as: - - ```python - decayed_learning_rate = learning_rate * - decay_rate ^ (global_step / decay_steps) - ``` - - If the argument `staircase` is `True`, then `global_step / decay_steps` is an - integer division and the decayed learning rate follows a staircase function. - - Example: decay every 100000 steps with a base of 0.96: - - ```python - ... - global_step = tf.Variable(0, trainable=False) - starter_learning_rate = 0.1 - learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, - global_step, - 100000, 0.96, staircase=True) - # Passing global_step to minimize() will increment it at each step. - learning_step = ( - tf.compat.v1.train.GradientDescentOptimizer(learning_rate) - .minimize(...my loss..., global_step=global_step) - ) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. - The initial learning rate. - global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global - step to use for the decay computation. Must not be negative. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must - be positive. See the decay computation above. - decay_rate: A scalar `float32` or `float64` `Tensor` or a Python number. - The decay rate. - staircase: Boolean. If `True` decay the learning rate at discrete intervals - name: String. Optional name of the operation. Defaults to - 'ExponentialDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - - Raises: - ValueError: if `global_step` is not supplied. - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.ExponentialDecay( - learning_rate, decay_steps, decay_rate, staircase=staircase, name=name) - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def exponential_decay( + learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False, + name=None, +): + """Applies exponential decay to the learning rate. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies an exponential decay function + to a provided initial learning rate. It requires a `global_step` value to + compute the decayed learning rate. You can just pass a TensorFlow variable + that you increment at each training step. + + The function returns the decayed learning rate. It is computed as: + + ```python + decayed_learning_rate = learning_rate * + decay_rate ^ (global_step / decay_steps) + ``` + + If the argument `staircase` is `True`, then `global_step / decay_steps` is + an integer division and the decayed learning rate follows a staircase + function. + + Example: decay every 100000 steps with a base of 0.96: + + ```python + ... + global_step = tf.Variable(0, trainable=False) + starter_learning_rate = 0.1 + learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, + global_step, + 100000, 0.96, staircase=True) + # Passing global_step to minimize() will increment it at each step. + learning_step = ( + tf.compat.v1.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) + ) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` `Tensor` or a Python + number. The initial learning rate. + global_step: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. Must not be negative. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must + be positive. See the decay computation above. + decay_rate: A scalar `float32` or `float64` `Tensor` or a Python number. + The decay rate. + staircase: Boolean. If `True` decay the learning rate at discrete + intervals + name: String. Optional name of the operation. Defaults to + 'ExponentialDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + + Raises: + ValueError: if `global_step` is not supplied. + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.ExponentialDecay( + learning_rate, decay_steps, decay_rate, staircase=staircase, name=name + ) + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.piecewise_constant_decay", "train.piecewise_constant"]) def piecewise_constant(x, boundaries, values, name=None): - """Piecewise constant from boundaries and interval values. - - Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 - for the next 10000 steps, and 0.1 for any additional steps. - - ```python - global_step = tf.Variable(0, trainable=False) - boundaries = [100000, 110000] - values = [1.0, 0.5, 0.1] - learning_rate = tf.compat.v1.train.piecewise_constant(global_step, boundaries, - values) - - # Later, whenever we perform an optimization step, we increment global_step. - ``` - - Args: - x: A 0-D scalar `Tensor`. Must be one of the following types: `float32`, - `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`. - boundaries: A list of `Tensor`s or `int`s or `float`s with strictly - increasing entries, and with all elements having the same type as `x`. - values: A list of `Tensor`s or `float`s or `int`s that specifies the values - for the intervals defined by `boundaries`. It should have one more element - than `boundaries`, and all elements should have the same type. - name: A string. Optional name of the operation. Defaults to - 'PiecewiseConstant'. - - Returns: - A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`, - `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ..., - and values[-1] when `x > boundaries[-1]`. - - Raises: - ValueError: if types of `x` and `boundaries` do not match, or types of all - `values` do not match or - the number of elements in the lists does not match. - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - boundaries = tf.nest.map_structure(tf.convert_to_tensor, - tf.nest.flatten(boundaries)) - values = tf.nest.map_structure(tf.convert_to_tensor, - tf.nest.flatten(values)) - x_recomp = tf.convert_to_tensor(x) - # Avoid explicit conversion to x's dtype. This could result in faulty - # comparisons, for example if floats are converted to integers. - for i, b in enumerate(boundaries): - if b.dtype.base_dtype != x_recomp.dtype.base_dtype: - # We can promote int32 boundaries to int64 without loss of precision. - # This covers the most common case where the user passes in boundaries - # as an array of Python integers. - if (b.dtype.base_dtype == tf.int32 and - x_recomp.dtype.base_dtype == tf.int64): - b = tf.cast(b, x_recomp.dtype.base_dtype) - boundaries[i] = b - else: - raise ValueError( - f"`boundaries` ({b.dtype.base_dtype}) must have the same dtype as " - f"x ({x_recomp.dtype.base_dtype}).") - for v in values[1:]: - if v.dtype.base_dtype != values[0].dtype.base_dtype: - raise ValueError( - f"`values` must have elements all with the same dtype " - f"({values[0].dtype.base_dtype} vs {v.dtype.base_dtype}).") - decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( - boundaries, values, name=name) - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(x) - else: - decayed_lr = functools.partial(decayed_lr, x) - return decayed_lr + """Piecewise constant from boundaries and interval values. + + Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 + for the next 10000 steps, and 0.1 for any additional steps. + + ```python + global_step = tf.Variable(0, trainable=False) + boundaries = [100000, 110000] + values = [1.0, 0.5, 0.1] + learning_rate = tf.compat.v1.train.piecewise_constant( + global_step, boundaries, values) + + # Later, whenever we perform an optimization step, we increment global_step. + ``` + + Args: + x: A 0-D scalar `Tensor`. Must be one of the following types: `float32`, + `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`. + boundaries: A list of `Tensor`s or `int`s or `float`s with strictly + increasing entries, and with all elements having the same type as `x`. + values: A list of `Tensor`s or `float`s or `int`s that specifies the + values for the intervals defined by `boundaries`. It should have one + more element than `boundaries`, and all elements should have the same + type. + name: A string. Optional name of the operation. Defaults to + 'PiecewiseConstant'. + + Returns: + A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`, + `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ..., + and values[-1] when `x > boundaries[-1]`. + + Raises: + ValueError: if types of `x` and `boundaries` do not match, or types of all + `values` do not match or + the number of elements in the lists does not match. + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + boundaries = tf.nest.map_structure( + tf.convert_to_tensor, tf.nest.flatten(boundaries) + ) + values = tf.nest.map_structure( + tf.convert_to_tensor, tf.nest.flatten(values) + ) + x_recomp = tf.convert_to_tensor(x) + # Avoid explicit conversion to x's dtype. This could result in faulty + # comparisons, for example if floats are converted to integers. + for i, b in enumerate(boundaries): + if b.dtype.base_dtype != x_recomp.dtype.base_dtype: + # We can promote int32 boundaries to int64 without loss of + # precision. This covers the most common case where the user passes + # in boundaries as an array of Python integers. + if ( + b.dtype.base_dtype == tf.int32 + and x_recomp.dtype.base_dtype == tf.int64 + ): + b = tf.cast(b, x_recomp.dtype.base_dtype) + boundaries[i] = b + else: + raise ValueError( + f"`boundaries` ({b.dtype.base_dtype}) must have the same " + f"dtype as x ({x_recomp.dtype.base_dtype})." + ) + for v in values[1:]: + if v.dtype.base_dtype != values[0].dtype.base_dtype: + raise ValueError( + "`values` must have elements all with the same dtype " + f"({values[0].dtype.base_dtype} vs {v.dtype.base_dtype})." + ) + decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( + boundaries, values, name=name + ) + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(x) + else: + decayed_lr = functools.partial(decayed_lr, x) + return decayed_lr @tf_export(v1=["train.polynomial_decay"]) -def polynomial_decay(learning_rate, - global_step, - decay_steps, - end_learning_rate=0.0001, - power=1.0, - cycle=False, - name=None): - """Applies a polynomial decay to the learning rate. - - It is commonly observed that a monotonically decreasing learning rate, whose - degree of change is carefully chosen, results in a better performing model. - This function applies a polynomial decay function to a provided initial - `learning_rate` to reach an `end_learning_rate` in the given `decay_steps`. - - It requires a `global_step` value to compute the decayed learning rate. You - can just pass a TensorFlow variable that you increment at each training step. - - The function returns the decayed learning rate. It is computed as: - - ```python - global_step = min(global_step, decay_steps) - decayed_learning_rate = (learning_rate - end_learning_rate) * - (1 - global_step / decay_steps) ^ (power) + - end_learning_rate - - ``` - - If `cycle` is True then a multiple of `decay_steps` is used, the first one - that is bigger than `global_steps`. - - ```python - decay_steps = decay_steps * ceil(global_step / decay_steps) - decayed_learning_rate = (learning_rate - end_learning_rate) * - (1 - global_step / decay_steps) ^ (power) + - end_learning_rate - - ``` - - Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5): - - ```python - ... - global_step = tf.Variable(0, trainable=False) - starter_learning_rate = 0.1 - end_learning_rate = 0.01 - decay_steps = 10000 - learning_rate = tf.compat.v1.train.polynomial_decay(starter_learning_rate, - global_step, - decay_steps, end_learning_rate, - power=0.5) - # Passing global_step to minimize() will increment it at each step. - learning_step = ( - tf.compat.v1.train.GradientDescentOptimizer(learning_rate) - .minimize(...my loss..., global_step=global_step) - ) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. - The initial learning rate. - global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global - step to use for the decay computation. Must not be negative. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must - be positive. See the decay computation above. - end_learning_rate: A scalar `float32` or `float64` `Tensor` or a Python - number. The minimal end learning rate. - power: A scalar `float32` or `float64` `Tensor` or a Python number. The - power of the polynomial. Defaults to linear, 1.0. - cycle: A boolean, whether or not it should cycle beyond decay_steps. - name: String. Optional name of the operation. Defaults to - 'PolynomialDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - - Raises: - ValueError: if `global_step` is not supplied. - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.PolynomialDecay( - learning_rate, - decay_steps, - end_learning_rate=end_learning_rate, - power=power, - cycle=cycle, - name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def polynomial_decay( + learning_rate, + global_step, + decay_steps, + end_learning_rate=0.0001, + power=1.0, + cycle=False, + name=None, +): + """Applies a polynomial decay to the learning rate. + + It is commonly observed that a monotonically decreasing learning rate, whose + degree of change is carefully chosen, results in a better performing model. + This function applies a polynomial decay function to a provided initial + `learning_rate` to reach an `end_learning_rate` in the given `decay_steps`. + + It requires a `global_step` value to compute the decayed learning rate. You + can just pass a TensorFlow variable that you increment at each training + step. + + The function returns the decayed learning rate. It is computed as: + + ```python + global_step = min(global_step, decay_steps) + decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ (power) + + end_learning_rate + + ``` + + If `cycle` is True then a multiple of `decay_steps` is used, the first one + that is bigger than `global_steps`. + + ```python + decay_steps = decay_steps * ceil(global_step / decay_steps) + decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ (power) + + end_learning_rate + + ``` + + Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5): + + ```python + ... + global_step = tf.Variable(0, trainable=False) + starter_learning_rate = 0.1 + end_learning_rate = 0.01 + decay_steps = 10000 + learning_rate = tf.compat.v1.train.polynomial_decay(starter_learning_rate, + global_step, + decay_steps, end_learning_rate, + power=0.5) + # Passing global_step to minimize() will increment it at each step. + learning_step = ( + tf.compat.v1.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) + ) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` `Tensor` or a Python + number. The initial learning rate. + global_step: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. Must not be negative. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must + be positive. See the decay computation above. + end_learning_rate: A scalar `float32` or `float64` `Tensor` or a Python + number. The minimal end learning rate. + power: A scalar `float32` or `float64` `Tensor` or a Python number. The + power of the polynomial. Defaults to `1.0`. + cycle: A boolean, whether it should cycle beyond decay_steps. Defaults to + `False`. + name: String. Optional name of the operation. Defaults to + 'PolynomialDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + + Raises: + ValueError: if `global_step` is not supplied. + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.PolynomialDecay( + learning_rate, + decay_steps, + end_learning_rate=end_learning_rate, + power=power, + cycle=cycle, + name=name, + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.natural_exp_decay"]) -def natural_exp_decay(learning_rate, - global_step, - decay_steps, - decay_rate, - staircase=False, - name=None): - """Applies natural exponential decay to the initial learning rate. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies an exponential decay function - to a provided initial learning rate. It requires an `global_step` value to - compute the decayed learning rate. You can just pass a TensorFlow variable - that you increment at each training step. - - The function returns the decayed learning rate. It is computed as: - - ```python - decayed_learning_rate = learning_rate * exp(-decay_rate * global_step / - decay_step) - ``` - - or, if `staircase` is `True`, as: - - ```python - decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step / - decay_step)) - ``` - - Example: decay exponentially with a base of 0.96: - - ```python - ... - global_step = tf.Variable(0, trainable=False) - learning_rate = 0.1 - decay_steps = 5 - k = 0.5 - learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate, - global_step, - decay_steps, k) - - # Passing global_step to minimize() will increment it at each step. - learning_step = ( - tf.compat.v1.train.GradientDescentOptimizer(learning_rate) - .minimize(...my loss..., global_step=global_step) - ) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. - The initial learning rate. - global_step: A Python number. Global step to use for the decay computation. - Must not be negative. - decay_steps: How often to apply decay. - decay_rate: A Python number. The decay rate. - staircase: Whether to apply decay in a discrete staircase, as opposed to - continuous, fashion. - name: String. Optional name of the operation. Defaults to - 'ExponentialTimeDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - - Raises: - ValueError: if `global_step` is not supplied. - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - natural_exp_rate = tf.exp(tf.negative(decay_rate)) - decayed_lr = learning_rate_schedule.ExponentialDecay( - learning_rate, - decay_steps, - natural_exp_rate, - staircase=staircase, - name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def natural_exp_decay( + learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False, + name=None, +): + """Applies natural exponential decay to the initial learning rate. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies an exponential decay + function to a provided initial learning rate. It requires an `global_step` + value to compute the decayed learning rate. You can just pass a TensorFlow + variable that you increment at each training step. + + The function returns the decayed learning rate. It is computed as: + + ```python + decayed_learning_rate = learning_rate * exp(-decay_rate * global_step / + decay_step) + ``` + + or, if `staircase` is `True`, as: + + ```python + decayed_learning_rate = learning_rate * exp(-decay_rate * \ + floor(global_step / decay_step)) + ``` + + Example: decay exponentially with a base of 0.96: + + ```python + ... + global_step = tf.Variable(0, trainable=False) + learning_rate = 0.1 + decay_steps = 5 + k = 0.5 + learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate, + global_step, + decay_steps, k) + + # Passing global_step to minimize() will increment it at each step. + learning_step = ( + tf.compat.v1.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) + ) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` `Tensor` or a Python + number. The initial learning rate. + global_step: A Python number. Global step to use for the decay + computation. Must not be negative. + decay_steps: How often to apply decay. + decay_rate: A Python number. The decay rate. + staircase: Whether to apply decay in a discrete staircase, as opposed to + continuous, fashion. + name: String. Optional name of the operation. Defaults to + 'ExponentialTimeDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + + Raises: + ValueError: if `global_step` is not supplied. + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + natural_exp_rate = tf.exp(tf.negative(decay_rate)) + decayed_lr = learning_rate_schedule.ExponentialDecay( + learning_rate, + decay_steps, + natural_exp_rate, + staircase=staircase, + name=name, + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.inverse_time_decay"]) -def inverse_time_decay(learning_rate, - global_step, - decay_steps, - decay_rate, - staircase=False, - name=None): - """Applies inverse time decay to the initial learning rate. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies an inverse decay function - to a provided initial learning rate. It requires an `global_step` value to - compute the decayed learning rate. You can just pass a TensorFlow variable - that you increment at each training step. - - The function returns the decayed learning rate. It is computed as: - - ```python - decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / - decay_step) - ``` - - or, if `staircase` is `True`, as: - - ```python - decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / - decay_step)) - ``` - - Example: decay 1/t with a rate of 0.5: - - ```python - ... - global_step = tf.Variable(0, trainable=False) - learning_rate = 0.1 - decay_steps = 1.0 - decay_rate = 0.5 - learning_rate = tf.compat.v1.train.inverse_time_decay(learning_rate, - global_step, - decay_steps, decay_rate) - - # Passing global_step to minimize() will increment it at each step. - learning_step = ( - tf.compat.v1.train.GradientDescentOptimizer(learning_rate) - .minimize(...my loss..., global_step=global_step) - ) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. - The initial learning rate. - global_step: A Python number. Global step to use for the decay computation. - Must not be negative. - decay_steps: How often to apply decay. - decay_rate: A Python number. The decay rate. - staircase: Whether to apply decay in a discrete staircase, as opposed to - continuous, fashion. - name: String. Optional name of the operation. Defaults to - 'InverseTimeDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - - Raises: - ValueError: if `global_step` is not supplied. - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.InverseTimeDecay( - learning_rate, decay_steps, decay_rate, staircase=staircase, name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def inverse_time_decay( + learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False, + name=None, +): + """Applies inverse time decay to the initial learning rate. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies an inverse decay function + to a provided initial learning rate. It requires an `global_step` value to + compute the decayed learning rate. You can just pass a TensorFlow variable + that you increment at each training step. + + The function returns the decayed learning rate. It is computed as: + + ```python + decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / + decay_step) + ``` + + or, if `staircase` is `True`, as: + + ```python + decayed_learning_rate = learning_rate / (1 + decay_rate * \ + floor(global_step / decay_step)) + ``` + + Example: decay 1/t with a rate of 0.5: + + ```python + ... + global_step = tf.Variable(0, trainable=False) + learning_rate = 0.1 + decay_steps = 1.0 + decay_rate = 0.5 + learning_rate = tf.compat.v1.train.inverse_time_decay(learning_rate, + global_step, + decay_steps, decay_rate) + + # Passing global_step to minimize() will increment it at each step. + learning_step = ( + tf.compat.v1.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) + ) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` `Tensor` or a Python + number. The initial learning rate. + global_step: A Python number. Global step to use for the decay + computation. Must not be negative. + decay_steps: How often to apply decay. + decay_rate: A Python number. The decay rate. + staircase: Whether to apply decay in a discrete staircase, as opposed to + continuous, fashion. + name: String. Optional name of the operation. Defaults to + 'InverseTimeDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + + Raises: + ValueError: if `global_step` is not supplied. + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.InverseTimeDecay( + learning_rate, decay_steps, decay_rate, staircase=staircase, name=name + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.cosine_decay"]) def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None): - """Applies cosine decay to the learning rate. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies a cosine decay function - to a provided initial learning rate. It requires a `global_step` value to - compute the decayed learning rate. You can just pass a TensorFlow variable - that you increment at each training step. - - The function returns the decayed learning rate. It is computed as: - ```python - global_step = min(global_step, decay_steps) - cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps)) - decayed = (1 - alpha) * cosine_decay + alpha - decayed_learning_rate = learning_rate * decayed - ``` - - Example usage: - ```python - decay_steps = 1000 - lr_decayed = cosine_decay(learning_rate, global_step, decay_steps) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` Tensor or a Python number. - The initial learning rate. - global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global - step to use for the decay computation. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number - of steps to decay over. - alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum - learning rate value as a fraction of learning_rate. - name: String. Optional name of the operation. Defaults to 'CosineDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - Raises: - ValueError: if `global_step` is not supplied. - - References: - Stochastic Gradient Descent with Warm Restarts: - [Loshchilov et al., 2017] - (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) - ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.CosineDecay( - learning_rate, decay_steps, alpha=alpha, name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr + """Applies cosine decay to the learning rate. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies a cosine decay function + to a provided initial learning rate. It requires a `global_step` value to + compute the decayed learning rate. You can just pass a TensorFlow variable + that you increment at each training step. + + The function returns the decayed learning rate. It is computed as: + ```python + global_step = min(global_step, decay_steps) + cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps)) + decayed = (1 - alpha) * cosine_decay + alpha + decayed_learning_rate = learning_rate * decayed + ``` + + Example usage: + ```python + decay_steps = 1000 + lr_decayed = cosine_decay(learning_rate, global_step, decay_steps) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` Tensor or a Python number. + The initial learning rate. + global_step: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Number of steps to decay over. + alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum + learning rate value as a fraction of learning_rate. + name: String. Optional name of the operation. Defaults to 'CosineDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + Raises: + ValueError: if `global_step` is not supplied. + + References: + Stochastic Gradient Descent with Warm Restarts: + [Loshchilov et al., 2017] + (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) + ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.CosineDecay( + learning_rate, decay_steps, alpha=alpha, name=name + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.cosine_decay_restarts"]) -def cosine_decay_restarts(learning_rate, - global_step, - first_decay_steps, - t_mul=2.0, - m_mul=1.0, - alpha=0.0, - name=None): - """Applies cosine decay with restarts to the learning rate. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies a cosine decay function with - restarts to a provided initial learning rate. It requires a `global_step` - value to compute the decayed learning rate. You can just pass a TensorFlow - variable that you increment at each training step. - - The function returns the decayed learning rate while taking into account - possible warm restarts. The learning rate multiplier first decays - from 1 to `alpha` for `first_decay_steps` steps. Then, a warm - restart is performed. Each new warm restart runs for `t_mul` times more steps - and with `m_mul` times smaller initial learning rate. - - Example usage: - ```python - first_decay_steps = 1000 - lr_decayed = cosine_decay_restarts(learning_rate, global_step, - first_decay_steps) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` Tensor or a Python number. - The initial learning rate. - global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global - step to use for the decay computation. - first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. - Number of steps to decay over. - t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. Used to - derive the number of iterations in the i-th period - m_mul: A scalar `float32` or `float64` `Tensor` or a Python number. - Used to derive the initial learning rate of the i-th period: - alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum - learning rate value as a fraction of the learning_rate. - name: String. Optional name of the operation. Defaults to 'SGDRDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - Raises: - ValueError: if `global_step` is not supplied. - - References: - Stochastic Gradient Descent with Warm Restarts: - [Loshchilov et al., 2017] - (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) - ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.CosineDecayRestarts( - learning_rate, - first_decay_steps, - t_mul=t_mul, - m_mul=m_mul, - alpha=alpha, - name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def cosine_decay_restarts( + learning_rate, + global_step, + first_decay_steps, + t_mul=2.0, + m_mul=1.0, + alpha=0.0, + name=None, +): + """Applies cosine decay with restarts to the learning rate. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies a cosine decay function with + restarts to a provided initial learning rate. It requires a `global_step` + value to compute the decayed learning rate. You can just pass a TensorFlow + variable that you increment at each training step. + + The function returns the decayed learning rate while taking into account + possible warm restarts. The learning rate multiplier first decays + from 1 to `alpha` for `first_decay_steps` steps. Then, a warm restart is + performed. Each new warm restart runs for `t_mul` times more steps and with + `m_mul` times smaller initial learning rate. + + Example usage: + ```python + first_decay_steps = 1000 + lr_decayed = cosine_decay_restarts(learning_rate, global_step, + first_decay_steps) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` Tensor or a Python number. + The initial learning rate. + global_step: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. + first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python + number. Number of steps to decay over. + t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. Used + to derive the number of iterations in the i-th period + m_mul: A scalar `float32` or `float64` `Tensor` or a Python number. + Used to derive the initial learning rate of the i-th period: + alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum + learning rate value as a fraction of the learning_rate. + name: String. Optional name of the operation. Defaults to 'SGDRDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + Raises: + ValueError: if `global_step` is not supplied. + + References: + Stochastic Gradient Descent with Warm Restarts: + [Loshchilov et al., 2017] + (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) + ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.CosineDecayRestarts( + learning_rate, + first_decay_steps, + t_mul=t_mul, + m_mul=m_mul, + alpha=alpha, + name=name, + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.linear_cosine_decay"]) -def linear_cosine_decay(learning_rate, - global_step, - decay_steps, - num_periods=0.5, - alpha=0.0, - beta=0.001, - name=None): - """Applies linear cosine decay to the learning rate. - - Note that linear cosine decay is more aggressive than cosine decay and - larger initial learning rates can typically be used. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies a linear cosine decay function - to a provided initial learning rate. It requires a `global_step` value to - compute the decayed learning rate. You can just pass a TensorFlow variable - that you increment at each training step. - - The function returns the decayed learning rate. It is computed as: - ```python - global_step = min(global_step, decay_steps) - linear_decay = (decay_steps - global_step) / decay_steps) - cosine_decay = 0.5 * ( - 1 + cos(pi * 2 * num_periods * global_step / decay_steps)) - decayed = (alpha + linear_decay) * cosine_decay + beta - decayed_learning_rate = learning_rate * decayed - ``` - - Example usage: - ```python - decay_steps = 1000 - lr_decayed = linear_cosine_decay(learning_rate, global_step, decay_steps) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` Tensor or a Python number. - The initial learning rate. - global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global - step to use for the decay computation. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number - of steps to decay over. - num_periods: Number of periods in the cosine part of the decay. See - computation above. - alpha: See computation above. - beta: See computation above. - name: String. Optional name of the operation. Defaults to - 'LinearCosineDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - Raises: - ValueError: if `global_step` is not supplied. - - References: - Neural Optimizer Search with Reinforcement Learning: - [Bello et al., 2017](http://proceedings.mlr.press/v70/bello17a.html) - ([pdf](http://proceedings.mlr.press/v70/bello17a/bello17a.pdf)) - Stochastic Gradient Descent with Warm Restarts: - [Loshchilov et al., 2017] - (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) - ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.LinearCosineDecay( - learning_rate, - decay_steps, - num_periods=num_periods, - alpha=alpha, - beta=beta, - name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def linear_cosine_decay( + learning_rate, + global_step, + decay_steps, + num_periods=0.5, + alpha=0.0, + beta=0.001, + name=None, +): + """Applies linear cosine decay to the learning rate. + + Note that linear cosine decay is more aggressive than cosine decay and + larger initial learning rates can typically be used. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies a linear cosine decay + function to a provided initial learning rate. It requires a `global_step` + value to compute the decayed learning rate. You can just pass a TensorFlow + variable that you increment at each training step. + + The function returns the decayed learning rate. It is computed as: + ```python + global_step = min(global_step, decay_steps) + linear_decay = (decay_steps - global_step) / decay_steps) + cosine_decay = 0.5 * ( + 1 + cos(pi * 2 * num_periods * global_step / decay_steps)) + decayed = (alpha + linear_decay) * cosine_decay + beta + decayed_learning_rate = learning_rate * decayed + ``` + + Example usage: + ```python + decay_steps = 1000 + lr_decayed = linear_cosine_decay(learning_rate, global_step, decay_steps) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` Tensor or a Python number. + The initial learning rate. + global_step: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Number of steps to decay over. + num_periods: Number of periods in the cosine part of the decay. See + computation above. + alpha: See computation above. + beta: See computation above. + name: String. Optional name of the operation. Defaults to + 'LinearCosineDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + Raises: + ValueError: if `global_step` is not supplied. + + References: + Neural Optimizer Search with Reinforcement Learning: + [Bello et al., 2017](http://proceedings.mlr.press/v70/bello17a.html) + ([pdf](http://proceedings.mlr.press/v70/bello17a/bello17a.pdf)) + Stochastic Gradient Descent with Warm Restarts: + [Loshchilov et al., 2017] + (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) + ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.LinearCosineDecay( + learning_rate, + decay_steps, + num_periods=num_periods, + alpha=alpha, + beta=beta, + name=name, + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr @tf_export(v1=["train.noisy_linear_cosine_decay"]) -def noisy_linear_cosine_decay(learning_rate, - global_step, - decay_steps, - initial_variance=1.0, - variance_decay=0.55, - num_periods=0.5, - alpha=0.0, - beta=0.001, - name=None): - """Applies noisy linear cosine decay to the learning rate. - - Note that linear cosine decay is more aggressive than cosine decay and - larger initial learning rates can typically be used. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This function applies a noisy linear - cosine decay function to a provided initial learning rate. - It requires a `global_step` value to compute the decayed learning rate. - You can just pass a TensorFlow variable that you increment at each - training step. - - The function returns the decayed learning rate. It is computed as: - ```python - global_step = min(global_step, decay_steps) - linear_decay = (decay_steps - global_step) / decay_steps) - cosine_decay = 0.5 * ( - 1 + cos(pi * 2 * num_periods * global_step / decay_steps)) - decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta - decayed_learning_rate = learning_rate * decayed - ``` - where eps_t is 0-centered gaussian noise with variance - initial_variance / (1 + global_step) ** variance_decay - - Example usage: - ```python - decay_steps = 1000 - lr_decayed = noisy_linear_cosine_decay( - learning_rate, global_step, decay_steps) - ``` - - Args: - learning_rate: A scalar `float32` or `float64` Tensor or a Python number. - The initial learning rate. - global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global - step to use for the decay computation. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number - of steps to decay over. - initial_variance: initial variance for the noise. See computation above. - variance_decay: decay for the noise's variance. See computation above. - num_periods: Number of periods in the cosine part of the decay. See - computation above. - alpha: See computation above. - beta: See computation above. - name: String. Optional name of the operation. Defaults to - 'NoisyLinearCosineDecay'. - - Returns: - A scalar `Tensor` of the same type as `learning_rate`. The decayed - learning rate. - Raises: - ValueError: if `global_step` is not supplied. - - References: - Neural Optimizer Search with Reinforcement Learning: - [Bello et al., 2017](http://proceedings.mlr.press/v70/bello17a.html) - ([pdf](http://proceedings.mlr.press/v70/bello17a/bello17a.pdf)) - Stochastic Gradient Descent with Warm Restarts: - [Loshchilov et al., 2017] - (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) - ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) - - @compatibility(eager) - When eager execution is enabled, this function returns a function which in - turn returns the decayed learning rate Tensor. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - @end_compatibility - """ - decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay( - learning_rate, - decay_steps, - initial_variance=initial_variance, - variance_decay=variance_decay, - num_periods=num_periods, - alpha=alpha, - beta=beta, - name=name) - - if not tf.executing_eagerly(): - decayed_lr = decayed_lr(global_step) - else: - decayed_lr = functools.partial(decayed_lr, global_step) - return decayed_lr +def noisy_linear_cosine_decay( + learning_rate, + global_step, + decay_steps, + initial_variance=1.0, + variance_decay=0.55, + num_periods=0.5, + alpha=0.0, + beta=0.001, + name=None, +): + """Applies noisy linear cosine decay to the learning rate. + + Note that linear cosine decay is more aggressive than cosine decay and + larger initial learning rates can typically be used. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This function applies a noisy linear + cosine decay function to a provided initial learning rate. + It requires a `global_step` value to compute the decayed learning rate. + You can just pass a TensorFlow variable that you increment at each + training step. + + The function returns the decayed learning rate. It is computed as: + ```python + global_step = min(global_step, decay_steps) + linear_decay = (decay_steps - global_step) / decay_steps) + cosine_decay = 0.5 * ( + 1 + cos(pi * 2 * num_periods * global_step / decay_steps)) + decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta + decayed_learning_rate = learning_rate * decayed + ``` + where eps_t is 0-centered gaussian noise with variance + initial_variance / (1 + global_step) ** variance_decay + + Example usage: + ```python + decay_steps = 1000 + lr_decayed = noisy_linear_cosine_decay( + learning_rate, global_step, decay_steps) + ``` + + Args: + learning_rate: A scalar `float32` or `float64` Tensor or a Python number. + The initial learning rate. + global_step: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Number of steps to decay over. + initial_variance: initial variance for the noise. See computation above. + variance_decay: decay for the noise's variance. See computation above. + num_periods: Number of periods in the cosine part of the decay. See + computation above. + alpha: See computation above. + beta: See computation above. + name: String. Optional name of the operation. Defaults to + 'NoisyLinearCosineDecay'. + + Returns: + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + Raises: + ValueError: if `global_step` is not supplied. + + References: + Neural Optimizer Search with Reinforcement Learning: + [Bello et al., 2017](http://proceedings.mlr.press/v70/bello17a.html) + ([pdf](http://proceedings.mlr.press/v70/bello17a/bello17a.pdf)) + Stochastic Gradient Descent with Warm Restarts: + [Loshchilov et al., 2017] + (https://openreview.net/forum?id=Skq89Scxx¬eId=Skq89Scxx) + ([pdf](https://openreview.net/pdf?id=Skq89Scxx)) + + @compatibility(eager) + When eager execution is enabled, this function returns a function which in + turn returns the decayed learning rate Tensor. This can be useful for + changing the learning rate value across different invocations of optimizer + functions. + @end_compatibility + """ + decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay( + learning_rate, + decay_steps, + initial_variance=initial_variance, + variance_decay=variance_decay, + num_periods=num_periods, + alpha=alpha, + beta=beta, + name=name, + ) + + if not tf.executing_eagerly(): + decayed_lr = decayed_lr(global_step) + else: + decayed_lr = functools.partial(decayed_lr, global_step) + return decayed_lr diff --git a/keras/optimizers/legacy_learning_rate_decay_test.py b/keras/optimizers/legacy_learning_rate_decay_test.py index 7c93d1efeaea..d0322426560c 100644 --- a/keras/optimizers/legacy_learning_rate_decay_test.py +++ b/keras/optimizers/legacy_learning_rate_decay_test.py @@ -14,459 +14,479 @@ # ============================================================================== """Functional test for learning rate decay.""" +import math + import tensorflow.compat.v2 as tf -import math from keras.testing_infra import test_combinations @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LRDecayTest(test_combinations.TestCase): - - def testContinuous(self): - self.evaluate(tf.compat.v1.global_variables_initializer()) - step = 5 - decayed_lr = tf.compat.v1.train.exponential_decay(0.05, step, 10, 0.96) - expected = .05 * 0.96**(5.0 / 10.0) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testStaircase(self): - if tf.executing_eagerly(): - step = tf.Variable(0) - self.evaluate(tf.compat.v1.global_variables_initializer()) - decayed_lr = tf.compat.v1.train.exponential_decay( - .1, step, 3, 0.96, staircase=True) - - # No change to learning rate due to staircase - expected = .1 - self.evaluate(step.assign(1)) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - expected = .1 - self.evaluate(step.assign(2)) - self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6) - - # Decayed learning rate - expected = .1 * 0.96 ** (100 // 3) - self.evaluate(step.assign(100)) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testVariables(self): - step = tf.Variable(1) - - decayed_lr = tf.compat.v1.train.exponential_decay( - .1, step, 3, 0.96, staircase=True) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # No change to learning rate - assign_1 = step.assign(1) - if not tf.executing_eagerly(): - self.evaluate(assign_1.op) - self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6) - assign_2 = step.assign(2) - if not tf.executing_eagerly(): - self.evaluate(assign_2.op) - self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6) - # Decayed learning rate - assign_100 = step.assign(100) - if not tf.executing_eagerly(): - self.evaluate(assign_100.op) - expected = .1 * 0.96**(100 // 3) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testPiecewiseConstant(self): - x = tf.Variable(-999) - decayed_lr = tf.compat.v1.train.piecewise_constant( - x, [100, 110, 120], [1.0, 0.1, 0.01, 0.001]) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - - self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6) - self.evaluate(x.assign(100)) - self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6) - self.evaluate(x.assign(105)) - self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) - self.evaluate(x.assign(110)) - self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) - self.evaluate(x.assign(120)) - self.assertAllClose(self.evaluate(decayed_lr), 0.01, 1e-6) - self.evaluate(x.assign(999)) - self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6) - - def testPiecewiseConstantEdgeCases(self): - x_int = tf.Variable(0, dtype=tf.int32) - boundaries, values = [-1.0, 1.0], [1, 2, 3] - with self.assertRaises(ValueError): - decayed_lr = tf.compat.v1.train.piecewise_constant( - x_int, boundaries, values) - if tf.executing_eagerly(): - decayed_lr() - - x = tf.Variable(0.0) - boundaries, values = [-1.0, 1.0], [1.0, 2, 3] - with self.assertRaises(ValueError): - decayed_lr = tf.compat.v1.train.piecewise_constant( - x, boundaries, values) - if tf.executing_eagerly(): - decayed_lr() - - # Test that ref types are valid. - if not tf.executing_eagerly(): - x = tf.compat.v1.Variable(0.0, use_resource=False) - x_ref = x.op.outputs[0] # float32_ref tensor should be accepted - boundaries, values = [1.0, 2.0], [1, 2, 3] - tf.compat.v1.train.piecewise_constant(x_ref, boundaries, values) - - # Test casting boundaries from int32 to int64. - x_int64 = tf.Variable(0, dtype=tf.int64) - boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7] - decayed_lr = tf.compat.v1.train.piecewise_constant( - x_int64, boundaries, values) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6) - self.evaluate(x_int64.assign(1)) - self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6) - self.evaluate(x_int64.assign(2)) - self.assertAllClose(self.evaluate(decayed_lr), 0.5, 1e-6) - self.evaluate(x_int64.assign(3)) - self.assertAllClose(self.evaluate(decayed_lr), 0.6, 1e-6) - self.evaluate(x_int64.assign(4)) - self.assertAllClose(self.evaluate(decayed_lr), 0.7, 1e-6) + def testContinuous(self): + self.evaluate(tf.compat.v1.global_variables_initializer()) + step = 5 + decayed_lr = tf.compat.v1.train.exponential_decay(0.05, step, 10, 0.96) + expected = 0.05 * 0.96 ** (5.0 / 10.0) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testStaircase(self): + if tf.executing_eagerly(): + step = tf.Variable(0) + self.evaluate(tf.compat.v1.global_variables_initializer()) + decayed_lr = tf.compat.v1.train.exponential_decay( + 0.1, step, 3, 0.96, staircase=True + ) + + # No change to learning rate due to staircase + expected = 0.1 + self.evaluate(step.assign(1)) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + expected = 0.1 + self.evaluate(step.assign(2)) + self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) + + # Decayed learning rate + expected = 0.1 * 0.96 ** (100 // 3) + self.evaluate(step.assign(100)) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testVariables(self): + step = tf.Variable(1) + + decayed_lr = tf.compat.v1.train.exponential_decay( + 0.1, step, 3, 0.96, staircase=True + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + # No change to learning rate + assign_1 = step.assign(1) + if not tf.executing_eagerly(): + self.evaluate(assign_1.op) + self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) + assign_2 = step.assign(2) + if not tf.executing_eagerly(): + self.evaluate(assign_2.op) + self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) + # Decayed learning rate + assign_100 = step.assign(100) + if not tf.executing_eagerly(): + self.evaluate(assign_100.op) + expected = 0.1 * 0.96 ** (100 // 3) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testPiecewiseConstant(self): + x = tf.Variable(-999) + decayed_lr = tf.compat.v1.train.piecewise_constant( + x, [100, 110, 120], [1.0, 0.1, 0.01, 0.001] + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + + self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6) + self.evaluate(x.assign(100)) + self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6) + self.evaluate(x.assign(105)) + self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) + self.evaluate(x.assign(110)) + self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6) + self.evaluate(x.assign(120)) + self.assertAllClose(self.evaluate(decayed_lr), 0.01, 1e-6) + self.evaluate(x.assign(999)) + self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6) + + def testPiecewiseConstantEdgeCases(self): + x_int = tf.Variable(0, dtype=tf.int32) + boundaries, values = [-1.0, 1.0], [1, 2, 3] + with self.assertRaises(ValueError): + decayed_lr = tf.compat.v1.train.piecewise_constant( + x_int, boundaries, values + ) + if tf.executing_eagerly(): + decayed_lr() + + x = tf.Variable(0.0) + boundaries, values = [-1.0, 1.0], [1.0, 2, 3] + with self.assertRaises(ValueError): + decayed_lr = tf.compat.v1.train.piecewise_constant( + x, boundaries, values + ) + if tf.executing_eagerly(): + decayed_lr() + + # Test that ref types are valid. + if not tf.executing_eagerly(): + x = tf.compat.v1.Variable(0.0, use_resource=False) + x_ref = x.op.outputs[0] # float32_ref tensor should be accepted + boundaries, values = [1.0, 2.0], [1, 2, 3] + tf.compat.v1.train.piecewise_constant(x_ref, boundaries, values) + + # Test casting boundaries from int32 to int64. + x_int64 = tf.Variable(0, dtype=tf.int64) + boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7] + decayed_lr = tf.compat.v1.train.piecewise_constant( + x_int64, boundaries, values + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6) + self.evaluate(x_int64.assign(1)) + self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6) + self.evaluate(x_int64.assign(2)) + self.assertAllClose(self.evaluate(decayed_lr), 0.5, 1e-6) + self.evaluate(x_int64.assign(3)) + self.assertAllClose(self.evaluate(decayed_lr), 0.6, 1e-6) + self.evaluate(x_int64.assign(4)) + self.assertAllClose(self.evaluate(decayed_lr), 0.7, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LinearDecayTest(test_combinations.TestCase): - - def testHalfWay(self): - step = 5 - lr = 0.05 - end_lr = 0.0 - decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) - expected = lr * 0.5 - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testEnd(self): - step = 10 - lr = 0.05 - end_lr = 0.001 - decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testHalfWayWithEnd(self): - step = 5 - lr = 0.05 - end_lr = 0.001 - decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) - expected = (lr + end_lr) * 0.5 - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testBeyondEnd(self): - step = 15 - lr = 0.05 - end_lr = 0.001 - decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testBeyondEndWithCycle(self): - step = 15 - lr = 0.05 - end_lr = 0.001 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, 10, end_lr, cycle=True) - expected = (lr - end_lr) * 0.25 + end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + def testHalfWay(self): + step = 5 + lr = 0.05 + end_lr = 0.0 + decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) + expected = lr * 0.5 + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testEnd(self): + step = 10 + lr = 0.05 + end_lr = 0.001 + decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testHalfWayWithEnd(self): + step = 5 + lr = 0.05 + end_lr = 0.001 + decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) + expected = (lr + end_lr) * 0.5 + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testBeyondEnd(self): + step = 15 + lr = 0.05 + end_lr = 0.001 + decayed_lr = tf.compat.v1.train.polynomial_decay(lr, step, 10, end_lr) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testBeyondEndWithCycle(self): + step = 15 + lr = 0.05 + end_lr = 0.001 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, 10, end_lr, cycle=True + ) + expected = (lr - end_lr) * 0.25 + end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SqrtDecayTest(test_combinations.TestCase): - - def testHalfWay(self): - step = 5 - lr = 0.05 - end_lr = 0.0 - power = 0.5 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, 10, end_lr, power=power) - expected = lr * 0.5**power - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testEnd(self): - step = 10 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, 10, end_lr, power=power) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testHalfWayWithEnd(self): - step = 5 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, 10, end_lr, power=power) - expected = (lr - end_lr) * 0.5**power + end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testBeyondEnd(self): - step = 15 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, 10, end_lr, power=power) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testBeyondEndWithCycle(self): - step = 15 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, 10, end_lr, power=power, cycle=True) - expected = (lr - end_lr) * 0.25**power + end_lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + def testHalfWay(self): + step = 5 + lr = 0.05 + end_lr = 0.0 + power = 0.5 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, 10, end_lr, power=power + ) + expected = lr * 0.5**power + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testEnd(self): + step = 10 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, 10, end_lr, power=power + ) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testHalfWayWithEnd(self): + step = 5 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, 10, end_lr, power=power + ) + expected = (lr - end_lr) * 0.5**power + end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testBeyondEnd(self): + step = 15 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, 10, end_lr, power=power + ) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testBeyondEndWithCycle(self): + step = 15 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, 10, end_lr, power=power, cycle=True + ) + expected = (lr - end_lr) * 0.25**power + end_lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class PolynomialDecayTest(test_combinations.TestCase): - - def testBeginWithCycle(self): - lr = 0.001 - decay_steps = 10 - step = 0 - decayed_lr = tf.compat.v1.train.polynomial_decay( - lr, step, decay_steps, cycle=True) - expected = lr - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + def testBeginWithCycle(self): + lr = 0.001 + decay_steps = 10 + step = 0 + decayed_lr = tf.compat.v1.train.polynomial_decay( + lr, step, decay_steps, cycle=True + ) + expected = lr + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class ExponentialDecayTest(test_combinations.TestCase): - - def testDecay(self): - initial_lr = 0.1 - k = 10 - decay_rate = 0.96 - step = tf.Variable(0) - decayed_lr = tf.compat.v1.train.natural_exp_decay(initial_lr, step, k, - decay_rate) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - for i in range(k + 1): - expected = initial_lr * math.exp(-i / k * decay_rate) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - self.evaluate(step.assign_add(1)) - - def testStaircase(self): - initial_lr = 0.1 - k = 10 - decay_rate = 0.96 - step = tf.Variable(0) - decayed_lr = tf.compat.v1.train.natural_exp_decay( - initial_lr, step, k, decay_rate, staircase=True) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - for i in range(k + 1): - expected = initial_lr * math.exp(-decay_rate * (i // k)) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - self.evaluate(step.assign_add(1)) + def testDecay(self): + initial_lr = 0.1 + k = 10 + decay_rate = 0.96 + step = tf.Variable(0) + decayed_lr = tf.compat.v1.train.natural_exp_decay( + initial_lr, step, k, decay_rate + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + for i in range(k + 1): + expected = initial_lr * math.exp(-i / k * decay_rate) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + self.evaluate(step.assign_add(1)) + + def testStaircase(self): + initial_lr = 0.1 + k = 10 + decay_rate = 0.96 + step = tf.Variable(0) + decayed_lr = tf.compat.v1.train.natural_exp_decay( + initial_lr, step, k, decay_rate, staircase=True + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + for i in range(k + 1): + expected = initial_lr * math.exp(-decay_rate * (i // k)) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + self.evaluate(step.assign_add(1)) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class InverseDecayTest(test_combinations.TestCase): - - def testDecay(self): - initial_lr = 0.1 - k = 10 - decay_rate = 0.96 - step = tf.Variable(0) - decayed_lr = tf.compat.v1.train.inverse_time_decay(initial_lr, step, k, - decay_rate) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - for i in range(k + 1): - expected = initial_lr / (1 + i / k * decay_rate) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - self.evaluate(step.assign_add(1)) - - def testStaircase(self): - initial_lr = 0.1 - k = 10 - decay_rate = 0.96 - step = tf.Variable(0) - decayed_lr = tf.compat.v1.train.inverse_time_decay( - initial_lr, step, k, decay_rate, staircase=True) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - for i in range(k + 1): - expected = initial_lr / (1 + decay_rate * (i // k)) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - self.evaluate(step.assign_add(1)) + def testDecay(self): + initial_lr = 0.1 + k = 10 + decay_rate = 0.96 + step = tf.Variable(0) + decayed_lr = tf.compat.v1.train.inverse_time_decay( + initial_lr, step, k, decay_rate + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + for i in range(k + 1): + expected = initial_lr / (1 + i / k * decay_rate) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + self.evaluate(step.assign_add(1)) + + def testStaircase(self): + initial_lr = 0.1 + k = 10 + decay_rate = 0.96 + step = tf.Variable(0) + decayed_lr = tf.compat.v1.train.inverse_time_decay( + initial_lr, step, k, decay_rate, staircase=True + ) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + for i in range(k + 1): + expected = initial_lr / (1 + decay_rate * (i // k)) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + self.evaluate(step.assign_add(1)) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CosineDecayTest(test_combinations.TestCase): - - def np_cosine_decay(self, step, decay_steps, alpha=0.0): - step = min(step, decay_steps) - completed_fraction = step / decay_steps - decay = 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) - return (1.0 - alpha) * decay + alpha - - def testDecay(self): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.cosine_decay(initial_lr, step, - num_training_steps) - expected = self.np_cosine_decay(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testAlpha(self): - num_training_steps = 1000 - initial_lr = 1.0 - alpha = 0.1 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.cosine_decay(initial_lr, step, - num_training_steps, alpha) - expected = self.np_cosine_decay(step, num_training_steps, alpha) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + def np_cosine_decay(self, step, decay_steps, alpha=0.0): + step = min(step, decay_steps) + completed_fraction = step / decay_steps + decay = 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) + return (1.0 - alpha) * decay + alpha + + def testDecay(self): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.cosine_decay( + initial_lr, step, num_training_steps + ) + expected = self.np_cosine_decay(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testAlpha(self): + num_training_steps = 1000 + initial_lr = 1.0 + alpha = 0.1 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.cosine_decay( + initial_lr, step, num_training_steps, alpha + ) + expected = self.np_cosine_decay(step, num_training_steps, alpha) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CosineDecayRestartsTest(test_combinations.TestCase): - - def np_cosine_decay_restarts(self, step, decay_steps, t_mul=2.0, m_mul=1.0, - alpha=0.0): - fac = 1.0 - while step >= decay_steps: - step -= decay_steps - decay_steps *= t_mul - fac *= m_mul - - completed_fraction = step / decay_steps - decay = fac * 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) - return (1.0 - alpha) * decay + alpha - - def testDecay(self): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.cosine_decay_restarts( - initial_lr, step, num_training_steps) - expected = self.np_cosine_decay_restarts(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testAlpha(self): - num_training_steps = 1000 - initial_lr = 1.0 - alpha = 0.1 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.cosine_decay_restarts( - initial_lr, step, num_training_steps, alpha=alpha) - expected = self.np_cosine_decay_restarts( - step, num_training_steps, alpha=alpha) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testMMul(self): - num_training_steps = 1000 - initial_lr = 1.0 - m_mul = 0.9 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.cosine_decay_restarts( - initial_lr, step, num_training_steps, m_mul=m_mul) - expected = self.np_cosine_decay_restarts( - step, num_training_steps, m_mul=m_mul) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testTMul(self): - num_training_steps = 1000 - initial_lr = 1.0 - t_mul = 1.0 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.cosine_decay_restarts( - initial_lr, step, num_training_steps, t_mul=t_mul) - expected = self.np_cosine_decay_restarts( - step, num_training_steps, t_mul=t_mul) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + def np_cosine_decay_restarts( + self, step, decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0 + ): + fac = 1.0 + while step >= decay_steps: + step -= decay_steps + decay_steps *= t_mul + fac *= m_mul + + completed_fraction = step / decay_steps + decay = fac * 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) + return (1.0 - alpha) * decay + alpha + + def testDecay(self): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.cosine_decay_restarts( + initial_lr, step, num_training_steps + ) + expected = self.np_cosine_decay_restarts(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testAlpha(self): + num_training_steps = 1000 + initial_lr = 1.0 + alpha = 0.1 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.cosine_decay_restarts( + initial_lr, step, num_training_steps, alpha=alpha + ) + expected = self.np_cosine_decay_restarts( + step, num_training_steps, alpha=alpha + ) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testMMul(self): + num_training_steps = 1000 + initial_lr = 1.0 + m_mul = 0.9 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.cosine_decay_restarts( + initial_lr, step, num_training_steps, m_mul=m_mul + ) + expected = self.np_cosine_decay_restarts( + step, num_training_steps, m_mul=m_mul + ) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testTMul(self): + num_training_steps = 1000 + initial_lr = 1.0 + t_mul = 1.0 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.cosine_decay_restarts( + initial_lr, step, num_training_steps, t_mul=t_mul + ) + expected = self.np_cosine_decay_restarts( + step, num_training_steps, t_mul=t_mul + ) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class LinearCosineDecayTest(test_combinations.TestCase): - - def np_linear_cosine_decay(self, - step, - decay_steps, - alpha=0.0, - beta=0.001, - num_periods=0.5): - step = min(step, decay_steps) - linear_decayed = float(decay_steps - step) / decay_steps - fraction = 2.0 * num_periods * step / float(decay_steps) - cosine_decayed = 0.5 * (1.0 + math.cos(math.pi * fraction)) - return (alpha + linear_decayed) * cosine_decayed + beta - - def testDefaultDecay(self): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.linear_cosine_decay( - initial_lr, step, num_training_steps) - expected = self.np_linear_cosine_decay(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - - def testNonDefaultDecay(self): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - decayed_lr = tf.compat.v1.train.linear_cosine_decay( - initial_lr, - step, - num_training_steps, - alpha=0.1, - beta=1e-4, - num_periods=5) - expected = self.np_linear_cosine_decay( - step, num_training_steps, alpha=0.1, beta=1e-4, num_periods=5) - self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + def np_linear_cosine_decay( + self, step, decay_steps, alpha=0.0, beta=0.001, num_periods=0.5 + ): + step = min(step, decay_steps) + linear_decayed = float(decay_steps - step) / decay_steps + fraction = 2.0 * num_periods * step / float(decay_steps) + cosine_decayed = 0.5 * (1.0 + math.cos(math.pi * fraction)) + return (alpha + linear_decayed) * cosine_decayed + beta + + def testDefaultDecay(self): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.linear_cosine_decay( + initial_lr, step, num_training_steps + ) + expected = self.np_linear_cosine_decay(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) + + def testNonDefaultDecay(self): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + decayed_lr = tf.compat.v1.train.linear_cosine_decay( + initial_lr, + step, + num_training_steps, + alpha=0.1, + beta=1e-4, + num_periods=5, + ) + expected = self.np_linear_cosine_decay( + step, num_training_steps, alpha=0.1, beta=1e-4, num_periods=5 + ) + self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class NoisyLinearCosineDecayTest(test_combinations.TestCase): - - def testDefaultNoisyLinearCosine(self): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - # No numerical check because of noise - decayed_lr = tf.compat.v1.train.noisy_linear_cosine_decay( - initial_lr, step, num_training_steps) - # Cannot be deterministically tested - self.evaluate(decayed_lr) - - def testNonDefaultNoisyLinearCosine(self): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - # No numerical check because of noise - decayed_lr = tf.compat.v1.train.noisy_linear_cosine_decay( - initial_lr, - step, - num_training_steps, - initial_variance=0.5, - variance_decay=0.1, - alpha=0.1, - beta=1e-4, - num_periods=5) - # Cannot be deterministically tested - self.evaluate(decayed_lr) + def testDefaultNoisyLinearCosine(self): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + # No numerical check because of noise + decayed_lr = tf.compat.v1.train.noisy_linear_cosine_decay( + initial_lr, step, num_training_steps + ) + # Cannot be deterministically tested + self.evaluate(decayed_lr) + + def testNonDefaultNoisyLinearCosine(self): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + # No numerical check because of noise + decayed_lr = tf.compat.v1.train.noisy_linear_cosine_decay( + initial_lr, + step, + num_training_steps, + initial_variance=0.5, + variance_decay=0.1, + alpha=0.1, + beta=1e-4, + num_periods=5, + ) + # Cannot be deterministically tested + self.evaluate(decayed_lr) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/optimizers/lion.py b/keras/optimizers/lion.py new file mode 100644 index 000000000000..8c9084981018 --- /dev/null +++ b/keras/optimizers/lion.py @@ -0,0 +1,167 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Lion optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export("keras.optimizers.Lion", v1=[]) +class Lion(optimizer.Optimizer): + """Optimizer that implements the Lion algorithm. + + The Lion optimizer is a stochastic-gradient-descent method that uses the + sign operator to control the magnitude of the update, unlike other adaptive + optimizers such as Adam that rely on second-order moments. This make + Lion more memory-efficient as it only keeps track of the momentum. According + to the authors (see reference), its performance gain over Adam grows with + the batch size. Because the update of Lion is produced through the sign + operation, resulting in a larger norm, a suitable learning rate for Lion is + typically 3-10x smaller than that for AdamW. The weight decay for Lion + should be in turn 3-10x larger than that for AdamW to maintain a + similar strength (lr * wd). + + Args: + learning_rate: A `tf.Tensor`, floating point value, a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to 0.0001. + beta_1: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + rate to combine the current gradient and the 1st moment estimate. + beta_2: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 1st moment estimate. + {{base_optimizer_keyword_args}} + + References: + - [Chen et al., 2023](http://arxiv.org/abs/2302.06675) + - [Authors' implementation]( + http://github.com/google/automl/tree/master/lion) + + """ + + def __init__( + self, + learning_rate=0.0001, + beta_1=0.9, + beta_2=0.99, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Lion", + **kwargs, + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs, + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.beta_1 = beta_1 + self.beta_2 = beta_2 + if beta_1 <= 0 or beta_1 > 1: + raise ValueError( + f"`beta_1`={beta_1} must be between ]0, 1]. Otherwise, " + "the optimizer degenerates to SignSGD." + ) + + def build(self, var_list): + """Initialize optimizer variables. + + Lion optimizer has one variable `momentums`. + + Args: + var_list: list of model variables to build Lion variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self.momentums = [] + for var in var_list: + self.momentums.append( + self.add_variable_from_reference( + model_variable=var, variable_name="m" + ) + ) + self._built = True + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + beta_1 = tf.cast(self.beta_1, variable.dtype) + beta_2 = tf.cast(self.beta_2, variable.dtype) + var_key = self._var_key(variable) + m = self.momentums[self._index_dict[var_key]] + + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients (use m as a buffer) + m.assign(m * beta_1) + m.scatter_add( + tf.IndexedSlices( + gradient.values * (1.0 - beta_1), gradient.indices + ) + ) + variable.assign_sub(lr * tf.math.sign(m)) + + m.assign(m * beta_2 / beta_1) + m.scatter_add( + tf.IndexedSlices( + gradient.values * (1.0 - beta_2 / beta_1), gradient.indices + ) + ) + else: + # Dense gradients + variable.assign_sub( + lr * tf.math.sign(m * beta_1 + gradient * (1.0 - beta_1)) + ) + m.assign(m * beta_2 + gradient * (1.0 - beta_2)) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "beta_1": self.beta_1, + "beta_2": self.beta_2, + } + ) + return config + + +Lion.__doc__ = Lion.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/lion_test.py b/keras/optimizers/lion_test.py new file mode 100644 index 000000000000..6cd44066fd6e --- /dev/null +++ b/keras/optimizers/lion_test.py @@ -0,0 +1,149 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Lion.""" + +import numpy as np +import tensorflow.compat.v2 as tf +from tensorflow.python.framework import dtypes + +from keras.optimizers.lion import Lion + + +def lion_update_numpy( + params, + grads, + momentums, + learning_rate=0.0001, + beta_1=0.9, + beta_2=0.99, +): + params = params - learning_rate * np.sign( + beta_1 * momentums + (1 - beta_1) * grads + ) + momentums = beta_2 * momentums + (1 - beta_2) * grads + return params, momentums + + +class LionOptimizerTest(tf.test.TestCase): + def testDense(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + learning_rate = 0.0001 + beta_1 = 0.9 + beta_2 = 0.99 + with self.cached_session(): + m0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + m1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.9, 0.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.1, 0.0], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0 = tf.constant(grads0_np) + grads1 = tf.constant(grads1_np) + optimizer = Lion( + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + ) + + # Run 3 steps of Lion + for _ in range(3): + optimizer.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + var0_np, m0_np = lion_update_numpy( + var0_np, + grads0_np, + m0_np, + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + ) + var1_np, m1_np = lion_update_numpy( + var1_np, + grads1_np, + m1_np, + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + ) + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + def testSparse(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + learning_rate = 0.0001 + beta_1 = 0.9 + beta_2 = 0.99 + with self.cached_session(): + m0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + m1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.9, 0.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.1, 0.0], dtype=dtype.as_numpy_dtype) + + var0 = tf.Variable(var0_np) + var1 = tf.Variable(var1_np) + grads0_np_indices = np.array([0], dtype=np.int32) + grads0 = tf.IndexedSlices( + tf.constant(grads0_np[grads0_np_indices]), + tf.constant(grads0_np_indices), + tf.constant([2]), + ) + grads1_np_indices = np.array([0], dtype=np.int32) + grads1 = tf.IndexedSlices( + tf.constant(grads1_np[grads1_np_indices]), + tf.constant(grads1_np_indices), + tf.constant([2]), + ) + + optimizer = Lion( + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + ) + + # Run 3 steps of Lion + for _ in range(3): + optimizer.apply_gradients( + zip([grads0, grads1], [var0, var1]) + ) + var0_np, m0_np = lion_update_numpy( + var0_np, + grads0_np, + m0_np, + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + ) + var1_np, m1_np = lion_update_numpy( + var1_np, + grads1_np, + m1_np, + learning_rate=learning_rate, + beta_1=beta_1, + beta_2=beta_2, + ) + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0) + self.assertAllCloseAccordingToType(var1_np, var1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/nadam.py b/keras/optimizers/nadam.py new file mode 100644 index 000000000000..c24de740410c --- /dev/null +++ b/keras/optimizers/nadam.py @@ -0,0 +1,207 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Nadam optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.Nadam", "keras.optimizers.Nadam", v1=[] +) +class Nadam(optimizer.Optimizer): + r"""Optimizer that implements the Nadam algorithm. + + Much like Adam is essentially RMSprop with momentum, Nadam is Adam with + Nesterov momentum. + + Args: + learning_rate: A `tf.Tensor`, floating point value, a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to `0.001`. + beta_1: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 1st moment estimates. + Defaults to `0.9`. + beta_2: A float value or a constant float tensor, or a callable + that takes no arguments and returns the actual value to use. The + exponential decay rate for the 2nd moment estimates. Defaults to + `0.999`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. + Defaults to `1e-7`. + {{base_optimizer_keyword_args}} + + Reference: + - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf). + + """ + + def __init__( + self, + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-7, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="Nadam", + **kwargs + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon + + def build(self, var_list): + """Initialize optimizer variables. + + Nadam optimizer has 2 types of variables: momentums and velocities. + + Args: + var_list: list of model variables to build Nadam variables on. + """ + super().build(var_list) + if getattr(self, "_built", False): + return + self._built = True + self._momentums = [] + self._velocities = [] + self._u_product = tf.Variable(1.0, dtype=var_list[0].dtype) + # Keep a counter on how many times of _u_product has been computed to + # avoid duplicated computations. + self._u_product_counter = 1 + + for var in var_list: + self._momentums.append( + self.add_variable_from_reference( + model_variable=var, variable_name="m" + ) + ) + self._velocities.append( + self.add_variable_from_reference( + model_variable=var, variable_name="v" + ) + ) + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + var_dtype = variable.dtype + lr = tf.cast(self.learning_rate, var_dtype) + local_step = tf.cast(self.iterations + 1, var_dtype) + next_step = tf.cast(self.iterations + 2, var_dtype) + decay = tf.cast(0.96, var_dtype) + beta_1 = tf.cast(self.beta_1, var_dtype) + beta_2 = tf.cast(self.beta_2, var_dtype) + u_t = beta_1 * (1.0 - 0.5 * (tf.pow(decay, local_step))) + u_t_1 = beta_1 * (1.0 - 0.5 * (tf.pow(decay, next_step))) + + def get_cached_u_product(): + return self._u_product + + def compute_new_u_product(): + u_product_t = self._u_product * u_t + self._u_product.assign(u_product_t) + self._u_product_counter += 1 + return u_product_t + + u_product_t = tf.cond( + self._u_product_counter == (self.iterations + 2), + true_fn=get_cached_u_product, + false_fn=compute_new_u_product, + ) + u_product_t_1 = u_product_t * u_t_1 + beta_2_power = tf.pow(beta_2, local_step) + + var_key = self._var_key(variable) + m = self._momentums[self._index_dict[var_key]] + v = self._velocities[self._index_dict[var_key]] + + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients. + m.assign_add(-m * (1 - beta_1)) + m.scatter_add( + tf.IndexedSlices( + gradient.values * (1 - beta_1), gradient.indices + ) + ) + v.assign_add(-v * (1 - beta_2)) + v.scatter_add( + tf.IndexedSlices( + tf.square(gradient.values) * (1 - beta_2), gradient.indices + ) + ) + m_hat = u_t_1 * m / (1 - u_product_t_1) + (1 - u_t) * gradient / ( + 1 - u_product_t + ) + v_hat = v / (1 - beta_2_power) + + variable.assign_sub((m_hat * lr) / (tf.sqrt(v_hat) + self.epsilon)) + else: + # Dense gradients. + m.assign_add((gradient - m) * (1 - beta_1)) + v.assign_add((tf.square(gradient) - v) * (1 - beta_2)) + m_hat = u_t_1 * m / (1 - u_product_t_1) + (1 - u_t) * gradient / ( + 1 - u_product_t + ) + v_hat = v / (1 - beta_2_power) + + variable.assign_sub((m_hat * lr) / (tf.sqrt(v_hat) + self.epsilon)) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "beta_1": self.beta_1, + "beta_2": self.beta_2, + "epsilon": self.epsilon, + } + ) + return config + + +Nadam.__doc__ = Nadam.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/optimizer.py b/keras/optimizers/optimizer.py new file mode 100644 index 000000000000..59f343182ad7 --- /dev/null +++ b/keras/optimizers/optimizer.py @@ -0,0 +1,1403 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Base class of optimizer.""" + +import abc +import platform +import re + +import tensorflow.compat.v2 as tf +from absl import logging + +from keras import backend +from keras import initializers +from keras.dtensor import utils as dtensor_utils +from keras.optimizers import utils as optimizer_utils +from keras.optimizers.schedules import learning_rate_schedule +from keras.utils import tf_utils + +# isort: off +from tensorflow.python.util.tf_export import keras_export +from tensorflow.tools.docs import doc_controls + + +class _BaseOptimizer(tf.__internal__.tracking.AutoTrackable): + """Optimizer base class, which only supports non-distribute use case.""" + + def __init__( + self, + name, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + **kwargs, + ): + self.name = name + self.weight_decay = weight_decay + self.clipnorm = clipnorm + self.global_clipnorm = global_clipnorm + self.clipvalue = clipvalue + self.use_ema = use_ema + # Optimizer only benefits from XLA when training on GPU. So if no + # GPU is found, we turn off XLA. + if ( + jit_compile + and tf_utils.can_jit_compile() + and tf.config.list_physical_devices("GPU") + ): + self.jit_compile = True + else: + self.jit_compile = False + + if platform.system() == "Darwin" and platform.processor() == "arm": + logging.warning( + "At this time, the v2.11+ optimizer " + f"`tf.keras.optimizers.{self.__class__.__name__}` runs slowly " + "on M1/M2 Macs, please use the legacy Keras optimizer " + "instead, located at " + f"`tf.keras.optimizers.legacy.{self.__class__.__name__}`." + ) + + if use_ema: + # Verify the arguments related to EMA. + if ema_momentum > 1 or ema_momentum < 0: + raise ValueError( + "`ema_momentum` must be in the range [0, 1]. " + f"Received: ema_momentum={ema_momentum}" + ) + if ema_overwrite_frequency and ( + not isinstance(ema_overwrite_frequency, int) + or ema_overwrite_frequency < 1 + ): + raise ValueError( + "`ema_overwrite_frequency` must be an integer > 1 or None. " + "Received: ema_overwrite_frequency=" + f"{ema_overwrite_frequency}" + ) + self.ema_momentum = ema_momentum + self.ema_overwrite_frequency = ema_overwrite_frequency + + if self.clipnorm is not None and self.global_clipnorm is not None: + raise ValueError( + "At most one of `clipnorm` and `global_clipnorm` can " + f"be set. Received: clipnorm={self.clipnorm}, " + f"global_clipnorm={self.global_clipnorm}." + ) + + self._variables = [] + self._create_iteration_variable() + self._process_kwargs(kwargs) + + def _create_iteration_variable(self): + """Create the iterations counter variable.""" + with tf.init_scope(): + # Lift the variable creation to init scope to avoid environment + # issue. + self._iterations = tf.Variable( + 0, name="iteration", dtype=tf.int64, trainable=False + ) + self._variables.append(self._iterations) + + def _process_kwargs(self, kwargs): + # Remove the `is_legacy_optimizer` arg, which is for serialization only. + kwargs.pop("is_legacy_optimizer", None) + lr = kwargs.pop("lr", None) + if lr: + logging.warning( + "`lr` is deprecated in Keras optimizer, please use " + "`learning_rate` or use the legacy optimizer, e.g.," + f"tf.keras.optimizers.legacy.{self.__class__.__name__}." + ) + legacy_kwargs = { + "decay", + "gradient_aggregator", + "gradient_transformers", + } + for k in kwargs: + if k in legacy_kwargs: + raise ValueError( + f"{k} is deprecated in the new Keras optimizer, please " + "check the docstring for valid arguments, or use the " + "legacy optimizer, e.g., " + f"tf.keras.optimizers.legacy.{self.__class__.__name__}." + ) + else: + raise TypeError( + f"{k} is not a valid argument, kwargs should be empty " + " for `optimizer_experimental.Optimizer`." + ) + + def _create_or_restore_slot_variable(self, **kwargs): + raise ValueError( + "You are trying to restore a checkpoint from a legacy Keras " + "optimizer into a v2.11+ Optimizer, which can cause " + "errors. Please update the optimizer referenced in your code " + "to be an instance of " + "`tf.keras.optimizers.legacy.Optimizer`, e.g.: " + f"`tf.keras.optimizers.legacy.{self.__class__.__name__}`." + ) + + def _var_key(self, variable): + """Get a unique identifier of the given variable.""" + # Get the distributed variable if it exists. + # TODO(b/199214315): replace _unique_id with ref() after fixing ref() + # issues on AggregatingVariable. + return variable._unique_id + + def _deduplicate_sparse_grad(self, grads): + """Deduplicate sparse gradient. + + For sparse gradients, i.e., gradient is of type `tf.IndexedSlices`, + it is possible that `gradient.indices` has duplicated indices. + This function adds up values for the duplicated indices, and returns + a `tf.IndexedSlices` with indices of unique values. + """ + processed_grads = [] + for grad in grads: + if isinstance(grad, tf.IndexedSlices): + values = grad.values + indices = grad.indices + unique_indices, new_index_positions = tf.unique(indices) + summed_values = tf.math.unsorted_segment_sum( + values, new_index_positions, tf.shape(unique_indices)[0] + ) + processed_grads.append( + tf.IndexedSlices( + summed_values, unique_indices, grad.dense_shape + ) + ) + else: + processed_grads.append(grad) + + return processed_grads + + @abc.abstractmethod + def update_step(self, gradient, variable): + """Function to update variable value based on given gradients. + + This method must be implemented in customized optimizers. + + Args: + gradient: backpropagated gradient of the given variable. + variable: variable whose value needs to be updated. + + Returns: + An `Operation` that applies the specified gradients. + + """ + raise NotImplementedError + + @tf.function(jit_compile=True) + def _update_step_xla(self, gradient, variable, key): + """A wrapper of `update_step` to enable XLA acceleration. + + Due to `tf.function` tracing mechanism, for (gradient, variable) pairs + of the same shape and dtype, the execution graph always invoke the first + pair it has seen. Thus, we need a `key` argument to make each (gradient, + variable) pair unique. In additions, XLA cannot understand string input, + so the key is an integer. + + Args: + gradient: backpropagated gradient of the given variable. + variable: variable whose value needs to be updated. + key (int): a unique key that identifies the variable. + + Returns: + An `Operation` that applies the specified gradients. + """ + return self._update_step(gradient, variable) + + def _update_step(self, gradient, variable): + if getattr(variable, "_unique_id", None) is None: + # Variable has no `_unique_id` if called during `model.save()`, in + # which case we do not want to update the variable. + return + if self._var_key(variable) not in self._index_dict: + raise KeyError( + f"The optimizer cannot recognize variable {variable.name}. " + "This usually means you are trying to call the optimizer to " + "update different parts of the model separately. Please call " + "`optimizer.build(variables)` with the full list of trainable " + "variables before the training loop or use legacy optimizer " + f"`tf.keras.optimizers.legacy.{self.__class__.__name__}." + ) + self.update_step(gradient, variable) + + def compute_gradients(self, loss, var_list, tape=None): + """Compute gradients of loss on trainable variables. + + Args: + loss: `Tensor` or callable. If a callable, `loss` should take no + arguments and return the value to minimize. + var_list: list or tuple of `Variable` objects to update to minimize + `loss`, or a callable returning the list or tuple of `Variable` + objects. Use callable when the variable list would otherwise be + incomplete before `minimize` since the variables are created at the + first time `loss` is called. + tape: (Optional) `tf.GradientTape`. If `loss` is provided as a + `Tensor`, the tape that computed the `loss` must be provided. + + Returns: + A list of (gradient, variable) pairs. Variable is always present, but + gradient can be `None`. + """ + if not callable(loss) and tape is None: + raise ValueError( + "`tape` is required when a `Tensor` loss is passed. " + f"Received: loss={loss}, tape={tape}." + ) + if tape is None: + tape = tf.GradientTape() + if callable(loss): + with tape: + if not callable(var_list): + tape.watch(var_list) + loss = loss() + if callable(var_list): + var_list = var_list() + + grads = tape.gradient(loss, var_list) + return list(zip(grads, var_list)) + + def _clip_gradients(self, grads): + clipped_grads = [] + if self.clipnorm and self.clipnorm > 0: + for g in grads: + if g is None: + clipped_grads.append(g) + else: + clipped_grads.append(tf.clip_by_norm(g, self.clipnorm)) + return clipped_grads + + if self.global_clipnorm and self.global_clipnorm > 0: + return tf.clip_by_global_norm(grads, self.global_clipnorm)[0] + + if self.clipvalue and self.clipvalue > 0: + for g in grads: + if g is None: + clipped_grads.append(g) + else: + clipped_grads.append( + tf.clip_by_value( + g, + clip_value_min=-self.clipvalue, + clip_value_max=self.clipvalue, + ) + ) + return clipped_grads + + return grads + + @property + def iterations(self): + """The number of training steps this `optimizer` has run. + + By default, iterations would be incremented by one every time + `apply_gradients()` is called. + """ + return self._iterations + + @iterations.setter + def iterations(self, variable): + if getattr(self, "_built", False): + raise RuntimeError( + "Cannot set `iterations` to a new Variable after " + "the Optimizer weights have been created. Here it is " + f"attempting to set `iterations` to {variable}." + "Usually this means you are trying to set `iterations`" + " after calling `apply_gradients()`. Please set " + "`iterations` before calling `apply_gradients()`." + ) + self._iterations = variable + + @property + def learning_rate(self): + if not hasattr(self, "_learning_rate") or self._learning_rate is None: + raise ValueError( + "Missing learning rate, please set self.learning_rate at" + " optimizer creation time." + ) + lr = self._learning_rate + if isinstance(lr, learning_rate_schedule.LearningRateSchedule): + # If the optimizer takes in LearningRateSchedule, then each call to + # learning_rate would return `self._current_learning_rate`, which is + # updated at each call to `apply_gradients`. + return self._current_learning_rate + return lr + + @learning_rate.setter + def learning_rate(self, learning_rate): + if isinstance( + learning_rate, learning_rate_schedule.LearningRateSchedule + ): + self._learning_rate = learning_rate + else: + if isinstance( + self._learning_rate, learning_rate_schedule.LearningRateSchedule + ): + raise TypeError( + "This optimizer was created with a `LearningRateSchedule`" + " object as its `learning_rate` constructor argument, " + "hence its learning rate is not settable. If you need the" + " learning rate to be settable, you should instantiate " + "the optimizer with a float `learning_rate` argument." + ) + self._learning_rate.assign(learning_rate) + + @property + @doc_controls.do_not_generate_docs + def lr(self): + """Alias of `learning_rate()`. + + `lr()` is heavily called in workflows using `optimizer_v2.OptimizerV2`, + so we keep it for backward compabitliy. + """ + return self.learning_rate + + @lr.setter + def lr(self, learning_rate): + self.learning_rate = learning_rate + + def _build_learning_rate(self, learning_rate): + with tf.init_scope(): + if isinstance( + learning_rate, learning_rate_schedule.LearningRateSchedule + ): + # Create a variable to hold the current learning rate. + current_learning_rate = tf.convert_to_tensor( + learning_rate(self.iterations) + ) + self._current_learning_rate = tf.Variable( + current_learning_rate, + name="current_learning_rate", + dtype=current_learning_rate.dtype, + trainable=False, + ) + return learning_rate + + return tf.Variable( + learning_rate, + name="learning_rate", + dtype=backend.floatx(), + trainable=False, + ) + + @abc.abstractmethod + def build(self, var_list): + """Initialize the optimizer's variables, such as momemtum variables. + + This function has to be implemented by subclass optimizers, and subclass + optimizers need to call `super().build(var_list)`. + + Args: + var_list: List of model variables to build optimizers on. For example, + SGD optimizer with momentum will store one momentum variable + corresponding to each model variable. + """ + if getattr(self, "_built", False): + return + self._build_index_dict(var_list) + if self.use_ema: + self._model_variables_moving_average = [] + for var in var_list: + # Make a copy of the model variables, we will use the copy to + # store the moving average of model variables. + self._model_variables_moving_average.append( + self.add_variable_from_reference( + var, "average", initial_value=var + ) + ) + + def _build_index_dict(self, var_list): + """Build variable to index dictionary. + + Build a dictionary that maps variable to the index of it in the given + var_list. + + Args: + var_list: List of variables to build index dict on. + + Returns: + None + """ + self._index_dict = {} + for i, var in enumerate(var_list): + var_key = self._var_key(var) + self._index_dict[var_key] = i + + def add_variable(self, shape, dtype=None, initializer="zeros", name=None): + """Create an optimizer variable. + + Args: + shape: A list of integers, a tuple of integers, or a 1-D Tensor of + type int32. Defaults to scalar if unspecified. + dtype: The DType of the optimizer variable to be created. Defaults to + `tf.keras.backend.floatx` if unspecified. + initializer: string or callable. Initializer instance. + name: The name of the optimizer variable to be created. + + Returns: + An optimizer variable, in the format of tf.Variable. + + """ + if isinstance(initializer, str): + initializer = initializers.get(initializer) + if dtype is None: + dtype = backend.floatx() + if shape is None: + shape = [] + variable = tf.Variable( + initial_value=initializer(shape, dtype), name=name, trainable=False + ) + self._variables.append(variable) + return variable + + def add_variable_from_reference( + self, model_variable, variable_name, shape=None, initial_value=None + ): + """Create an optimizer variable from model variable. + + Create an optimizer variable based on the information of model variable. + For example, in SGD optimizer momemtum, for each model variable, a + corresponding momemtum variable is created of the same shape and dtype. + + Args: + model_variable: tf.Variable. The corresponding model variable to the + optimizer variable to be created. + variable_name: String. The name prefix of the optimizer variable to be + created. The create variables name will follow the pattern + `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`. + shape: List or Tuple, defaults to None. The shape of the optimizer + variable to be created. If None, the created variable will have the + same shape as `model_variable`. + initial_value: A Tensor, or Python object convertible to a Tensor, + defaults to None. The initial value of the optimizer variable, if + None, the initial value will be default to 0. + + Returns: + An optimizer variable. + """ + if initial_value is None: + if shape is None: + if model_variable.shape.rank is None: + # When the rank is None, we cannot get a concrete + # `model_variable.shape`, we use dynamic shape. + initial_value = tf.zeros_like( + model_variable, dtype=model_variable.dtype + ) + else: + # We cannot always use `zeros_like`, because some cases + # the shape exists while values don't. + initial_value = tf.zeros( + model_variable.shape, dtype=model_variable.dtype + ) + else: + initial_value = tf.zeros(shape, dtype=model_variable.dtype) + variable = tf.Variable( + initial_value=initial_value, + name=f"{variable_name}/{model_variable._shared_name}", + dtype=model_variable.dtype, + trainable=False, + ) + self._variables.append(variable) + return variable + + def minimize(self, loss, var_list, tape=None): + """Minimize `loss` by updating `var_list`. + + This method simply computes gradient using `tf.GradientTape` and calls + `apply_gradients()`. If you want to process the gradient before applying + then call `tf.GradientTape` and `apply_gradients()` explicitly instead + of using this function. + + Args: + loss: `Tensor` or callable. If a callable, `loss` should take no + arguments and return the value to minimize. + var_list: list or tuple of `Variable` objects to update to minimize + `loss`, or a callable returning the list or tuple of `Variable` + objects. Use callable when the variable list would otherwise be + incomplete before `minimize` since the variables are created at the + first time `loss` is called. + tape: (Optional) `tf.GradientTape`. + + Returns: + None + """ + grads_and_vars = self.compute_gradients(loss, var_list, tape) + self.apply_gradients(grads_and_vars) + + def _compute_current_learning_rate(self): + if isinstance( + self._learning_rate, learning_rate_schedule.LearningRateSchedule + ): + # Compute the current learning rate at the beginning of variable + # update. + if hasattr(self, "_current_learning_rate"): + self._current_learning_rate.assign( + self._learning_rate(self.iterations) + ) + else: + current_learning_rate = tf.convert_to_tensor( + self._learning_rate(self.iterations) + ) + self._current_learning_rate = tf.Variable( + current_learning_rate, + name="current_learning_rate", + dtype=current_learning_rate.dtype, + trainable=False, + ) + + def exclude_from_weight_decay(self, var_list=None, var_names=None): + """Exclude variables from weight decay. + + This method must be called before the optimizer's `build` method is + called. You can set specific variables to exclude out, or set a list of + strings as the anchor words, if any of which appear in a variable's + name, then the variable is excluded. + + Args: + var_list: A list of `tf.Variable`s to exclude from weight decay. + var_names: A list of strings. If any string in `var_names` appear + in the model variable's name, then this model variable is + excluded from weight decay. For example, `var_names=['bias']` + excludes all bias variables from weight decay. + """ + if hasattr(self, "_built") and self._built: + raise ValueError( + "`exclude_from_weight_decay()` can only be configued before " + "the optimizer is built." + ) + + if var_list: + self._exclude_from_weight_decay = [ + self._var_key(variable) for variable in var_list + ] + else: + self._exclude_from_weight_decay = [] + self._exclude_from_weight_decay_names = var_names or [] + + def _use_weight_decay(self, variable): + exclude_from_weight_decay = getattr( + self, "_exclude_from_weight_decay", [] + ) + exclude_from_weight_decay_names = getattr( + self, "_exclude_from_weight_decay_names", [] + ) + variable_id = self._var_key(variable) + for exclude_id in exclude_from_weight_decay: + if variable_id == exclude_id: + return False + for name in exclude_from_weight_decay_names: + if re.search(name, variable.name) is not None: + return False + return True + + def apply_gradients(self, grads_and_vars, name=None): + """Apply gradients to variables. + + Args: + grads_and_vars: List of `(gradient, variable)` pairs. + name: string, defaults to None. The name of the namescope to + use when creating variables. If None, `self.name` will be used. + + Returns: + A `tf.Variable`, representing the current iteration. + + Raises: + TypeError: If `grads_and_vars` is malformed. + """ + self._compute_current_learning_rate() + grads_and_vars = list(grads_and_vars) + if len(grads_and_vars) == 0: + # It is possible that the grad is empty. In this case, + # `apply_gradients` is a no-op. + return self._iterations + grads, trainable_variables = zip(*grads_and_vars) + scope_name = name or self.name or "optimizer" + with tf.name_scope(scope_name): + with tf.init_scope(): + # Lift variable creation to init scope to avoid environment + # issues. + self.build(trainable_variables) + grads_and_vars = optimizer_utils.filter_empty_gradients( + grads_and_vars + ) + if len(list(grads_and_vars)) == 0: + # Check again after filtering gradients. + return self._iterations + + grads, trainable_variables = zip(*grads_and_vars) + + grads = self._clip_gradients(grads) + grads = self._deduplicate_sparse_grad(grads) + self._apply_weight_decay(trainable_variables) + grads_and_vars = list(zip(grads, trainable_variables)) + iteration = self._internal_apply_gradients(grads_and_vars) + + # Apply variable constraints after applying gradients. + for variable in trainable_variables: + if variable.constraint is not None: + variable.assign(variable.constraint(variable)) + return iteration + + def _apply_weight_decay(self, variables): + if self.weight_decay is None: + return + for variable in variables: + if self._use_weight_decay(variable): + lr = tf.cast(self.learning_rate, variable.dtype) + wd = tf.cast(self.weight_decay, variable.dtype) + variable.assign_sub(variable * wd * lr) + + def _internal_apply_gradients(self, grads_and_vars): + """Helper function of apply gradients. + + This is required for separating out distributed training logic. + + Args: + grads_and_vars: List of (gradient, variable) pairs. + """ + if self.jit_compile: + for grad, var in grads_and_vars: + self._update_step_xla(grad, var, id(self._var_key(var))) + else: + for grad, var in grads_and_vars: + self._update_step(grad, var) + return self.iterations.assign_add(1) + + def _update_model_variables_moving_average(self, var_list): + """Update the stored moving average using the latest value.""" + if self.use_ema: + for var in var_list: + average = self._model_variables_moving_average[ + self._index_dict[self._var_key(var)] + ] + average.assign( + self.ema_momentum * average + (1 - self.ema_momentum) * var + ) + + def _overwrite_model_variables_with_average_value(self, var_list): + """Overwrite model variables with its moving average.""" + for var in var_list: + average = self._model_variables_moving_average[ + self._index_dict[self._var_key(var)] + ] + var.assign(average) + + def finalize_variable_values(self, var_list): + """Set the final value of model's trainable variables. + + Sometimes there are some extra steps before ending the variable updates, + such as overriding the model variables with its average value. + + Args: + var_list: list of model variables. + """ + if self.use_ema: + # If the optimizer uses EMA, then when finalizing, we replace the + # model variable value with its moving average stored inside + # optimizer. + self._overwrite_model_variables_with_average_value(var_list) + + def _serialize_hyperparameter(self, hyperparameter): + """Serialize a hyperparameter that can be a numeric or callable.""" + if isinstance( + hyperparameter, learning_rate_schedule.LearningRateSchedule + ): + return learning_rate_schedule.serialize(hyperparameter) + if isinstance(hyperparameter, tf.Variable): + return hyperparameter.numpy() + if callable(hyperparameter): + return hyperparameter() + return hyperparameter + + def get_config(self): + """Returns the config of the optimizer. + + An optimizer config is a Python dictionary (serializable) + containing the configuration of an optimizer. + The same optimizer can be reinstantiated later + (without any saved state) from this configuration. + + Subclass optimizer should override this method to include other + hyperparameters. + + Returns: + Python dictionary. + """ + config = { + "name": self.name, + "weight_decay": self.weight_decay, + "clipnorm": self.clipnorm, + "global_clipnorm": self.global_clipnorm, + "clipvalue": self.clipvalue, + "use_ema": self.use_ema, + "ema_momentum": self.ema_momentum, + "ema_overwrite_frequency": self.ema_overwrite_frequency, + "jit_compile": self.jit_compile, + "is_legacy_optimizer": False, + } + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + """Creates an optimizer from its config. + + This method is the reverse of `get_config`, capable of instantiating the + same optimizer from the config dictionary. + + Args: + config: A Python dictionary, typically the output of get_config. + custom_objects: A Python dictionary mapping names to additional + user-defined Python objects needed to recreate this optimizer. + + Returns: + An optimizer instance. + """ + if "learning_rate" in config: + if isinstance(config["learning_rate"], dict): + config["learning_rate"] = learning_rate_schedule.deserialize( + config["learning_rate"], custom_objects=custom_objects + ) + return cls(**config) + + @property + def variables(self): + """Returns variables of this optimizer.""" + return CallableList(self._variables) + + def set_weights(self, weights): + """Set the weights of the optimizer. + + Args: + weights: a list of `tf.Variable`s or numpy arrays, the target values + of optimizer variables. It should have the same order as + `self._variables`. + """ + if not getattr(self, "_built", False): + raise ValueError( + "You are calling `set_weights()` on an optimizer that has not " + "yet been built. Please call " + "`optimizer.build(trainable_variables)` to create the " + "optimizer weights before calling `set_weights()`." + ) + + for variable, weight in zip(self._variables, weights): + if variable.shape != weight.shape: + raise ValueError( + f"Optimizer variable {self._var_key(variable)} has shape " + f"{str(variable.shape)} not compatible with provided " + f"weight shape {str(weight.shape)}." + ) + variable.assign(weight) + + def save_own_variables(self, store): + """Get the state of this optimizer object.""" + for i, variable in enumerate(self.variables): + store[str(i)] = variable.numpy() + + def load_own_variables(self, store): + """Set the state of this optimizer object.""" + if len(store.keys()) != len(self.variables): + msg = ( + f"Skipping variable loading for optimizer '{self.name}', " + f"because it has {len(self.variables)} variables whereas " + f"the saved optimizer has {len(store.keys())} variables. " + ) + if len(self.variables) == 0: + msg += ( + "This is likely because the optimizer has not been " + "called/built yet." + ) + logging.warning(msg) + return + for i, variable in enumerate(self.variables): + variable.assign(store[str(i)]) + + +base_optimizer_keyword_args = """name: String. The name to use + for momentum accumulator weights created by + the optimizer. + weight_decay: Float, defaults to None. If set, weight decay is applied. + clipnorm: Float. If set, the gradient of each weight is individually + clipped so that its norm is no higher than this value. + clipvalue: Float. If set, the gradient of each weight is clipped to be no + higher than this value. + global_clipnorm: Float. If set, the gradient of all weights is clipped so + that their global norm is no higher than this value. + use_ema: Boolean, defaults to False. If True, exponential moving average + (EMA) is applied. EMA consists of computing an exponential moving + average of the weights of the model (as the weight values change after + each training batch), and periodically overwriting the weights with + their moving average. + ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + This is the momentum to use when computing + the EMA of the model's weights: + `new_average = ema_momentum * old_average + (1 - ema_momentum) * + current_variable_value`. + ema_overwrite_frequency: Int or None, defaults to None. Only used if + `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + we overwrite the model variable by its moving average. + If None, the optimizer + does not overwrite model variables in the middle of training, and you + need to explicitly overwrite the variables at the end of training + by calling `optimizer.finalize_variable_values()` + (which updates the model + variables in-place). When using the built-in `fit()` training loop, + this happens automatically after the last epoch, + and you don't need to do anything. + jit_compile: Boolean, defaults to True. + If True, the optimizer will use XLA + compilation. If no GPU device is found, this flag will be ignored. + mesh: optional `tf.experimental.dtensor.Mesh` instance. When provided, + the optimizer will be run in DTensor mode, e.g. state + tracking variable will be a DVariable, and aggregation/reduction will + happen in the global DTensor context. + **kwargs: keyword arguments only used for backward compatibility.""" + + +@keras_export( + "keras.optimizers.Optimizer", + "keras.optimizers.experimental.Optimizer", + v1=[], +) +class Optimizer(_BaseOptimizer): + """Abstract optimizer base class. + + This class supports distributed training. If you want to implement your own + optimizer, please subclass this class instead of _BaseOptimizer. + + Args: + {{base_optimizer_keyword_args}} + + ### Usage + + ```python + # Create an optimizer with the desired parameters. + opt = keras.optimizers.SGD(learning_rate=0.1) + var1, var2 = tf.Variable(1.0), tf.Variable(2.0) + # `loss` is a callable that takes no argument and returns the value + # to minimize. + loss = lambda: 3 * var1 * var1 + 2 * var2 * var2 + # Call minimize to update the list of variables. + opt.minimize(loss, var_list=[var1, var2]) + ``` + + ### Processing gradients before applying them + + Calling `minimize()` takes care of both computing the gradients and + applying them to the variables. If you want to process the gradients + before applying them you can instead use the optimizer in three steps: + + 1. Compute the gradients with `tf.GradientTape`. + 2. Process the gradients as you wish. + 3. Apply the processed gradients with `apply_gradients()`. + + Example: + + ```python + # Create an optimizer. + opt = tf.keras.optimizers.experimental.SGD(learning_rate=0.1) + var1, var2 = tf.Variable(1.0), tf.Variable(2.0) + + # Compute the gradients for a list of variables. + with tf.GradientTape() as tape: + loss = 3 * var1 * var1 + 2 * var2 * var2 + grads = tape.gradient(loss, [var1, var2]) + + # Process the gradients. + grads[0] = grads[0] + 1 + + # Ask the optimizer to apply the gradients on variables. + opt.apply_gradients(zip(grads, [var1, var2])) + ``` + + ### Dynamic learning rate + + Dynamic learning rate can be achieved by setting learning rate as a built-in + or customized `tf.keras.optimizers.schedules.LearningRateSchedule`. + + Example: + + >>> var = tf.Variable(np.random.random(size=(1,))) + >>> learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( + ... initial_learning_rate=.01, decay_steps=20, decay_rate=.1) + >>> opt = tf.keras.optimizers.experimental.SGD(learning_rate=learning_rate) + >>> loss = lambda: 3 * var + >>> opt.minimize(loss, var_list=[var]) + + ### Gradients clipping + + Users can clip the gradients before applying to variables by setting + `clipnorm`, `clipvalue` and `global_clipnorm`. Notice that `clipnorm` and + `global_clipnorm` can only have one being set. + + Example: + + >>> opt = tf.keras.optimizers.experimental.SGD(learning_rate=1, clipvalue=1) + >>> var1, var2 = tf.Variable(2.0), tf.Variable(2.0) + >>> with tf.GradientTape() as tape: + ... loss = 2 * var1 + 2 * var2 + >>> grads = tape.gradient(loss, [var1, var2]) + >>> print([grads[0].numpy(), grads[1].numpy()]) + [2.0, 2.0] + >>> opt.apply_gradients(zip(grads, [var1, var2])) + >>> # Without clipping, we should get [0, 0], but as gradients are clipped + >>> # to have max value 1, we get [1.0, 1.0]. + >>> print([var1.numpy(), var2.numpy()]) + [1.0, 1.0] + + ### Using weight decay. + + Weight decay in certain scenarios can boost the model's performance. Keras + has built-in support for weight decay in all optimizers. Users can apply + weight decay by setting `weight_decay` argument. + + >>> opt = tf.keras.optimizers.experimental.SGD(1, weight_decay=0.004) + >>> grads, var1, var2 = tf.zeros(()), tf.Variable(2.0), tf.Variable(2.0) + >>> # You can exclude variables from weight decay, in this case we + >>> # exclude `var2`. + >>> opt.exclude_from_weight_decay(var_list=[var2]) + >>> opt.apply_gradients(zip([grads, grads], [var1, var2])) + >>> print([var1.numpy(), var2.numpy()]) + [1.992, 2.0] + + + ### Using exponential moving average. + + Empirically it has been found that using the exponential moving average + (EMA) of the trained parameters of a deep network achieves a better + performance than using its trained parameters directly. Keras optimizers + allows users to compute this moving average and overwrite the model + variables at desired time. + + Example: + + ```python + # Create an SGD optimizer with EMA on. `ema_momentum` controls the decay + # rate of the moving average. `ema_momentum=1` means no decay and the stored + # moving average is always model variable's initial value before training. + # Reversely, `ema_momentum=0` is equivalent to not using EMA. + # `ema_overwrite_frequency=3` means every 3 iterations, we overwrite the + # trainable variables with their moving average values. + opt = tf.keras.optimizers.experimental.SGD( + learning_rate=1, + use_ema=True, + ema_momentum=0.5, + ema_overwrite_frequency=3) + var1, var2 = tf.Variable(2.0), tf.Variable(2.0) + with tf.GradientTape() as tape: + loss = var1 + var2 + grads = tape.gradient(loss, [var1, var2]) + # First iteration: [var1, var2] = [1.0, 1.0] + opt.apply_gradients(zip(grads, [var1, var2])) + print([var1, var2]) + + # Second iteration: [var1, var2] = [0.0, 0.0] + opt.apply_gradients(zip(grads, [var1, var2])) + print([var1, var2]) + + # Third iteration, without EMA, we should see [var1, var2] = [-1.0, -1.0], + # but overwriting results in [var1, var2] = [-0.125, -0.125]. The full + # calculation for the moving average of var1 is: + # var1=2*0.5**3+1*(1-0.5)*0.5**2+0*(1-0.5)*0.5**1+(-1)*(1-0.5)=-0.125. + opt.apply_gradients(zip(grads, [var1, var2])) + print([var1, var2]) + + ``` + When optimizer is constructed with `use_ema=True`, in custom training loop, + users can explicitly call `finalize_variable_values()` to overwrite + trainable variables with their EMA values. `finalize_variable_values()` is + by default called at the end of `model.fit()`. + + ### Use with `tf.distribute.Strategy` + + This optimizer class is `tf.distribute.Strategy` aware, which means it + automatically sums gradients across all replicas. To aggregate gradients + yourself, call `apply_gradients` with `skip_gradients_aggregation` set to + True. This is useful if you need to process aggregated gradients. + + ```python + # This example is not runnable, it consists of dummy code for simple + # tutorial. + strategy = tf.distribute.experimental.TPUStrategy() + + with strategy.scope(): + opt = tf.keras.optimizers.experimental.SGD() + model = magic_function_that_returns_model() + gradients = magic_function_that_returns_gradients() + # Custom logic to aggregate gradients. + gradients = strategy.reduce("SUM", gradients, axis=None) + opt.apply_gradients(zip(gradients, model.trainable_variables), + skip_gradients_aggregation=True) + ``` + + ### Creating a custom optimizer + + If you intend to create your own optimization algorithm, please inherit from + this class and override the following methods: + + - `build`: Create your optimizer-related variables, such as `momentums` in + SGD optimizer. + - `update_step`: Implement your optimizer's updating logic. + - `get_config`: serialization of the optimizer, include all hyper + parameters. + + Your optimizer would automatically be compatible with tensorflow distributed + training if you subclass `optimizer_experimental.Optimizer`. + + """ + + def __init__( + self, + name, + weight_decay=0, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + **kwargs, + ): + """Create a new Optimizer.""" + mesh = kwargs.pop("mesh", None) + self._mesh = mesh + super().__init__( + name, + weight_decay, + clipnorm, + clipvalue, + global_clipnorm, + use_ema, + ema_momentum, + ema_overwrite_frequency, + jit_compile, + **kwargs, + ) + self._distribution_strategy = tf.distribute.get_strategy() + self._run_with_dtensor = dtensor_utils.running_with_dtensor_strategy() + + def add_variable_from_reference( + self, model_variable, variable_name, shape=None, initial_value=None + ): + if self._mesh: + if initial_value is None: + # Use tf.zeros_like which will propagate the layout information + # from the model weights if any. + initial_value = tf.zeros_like(model_variable) + elif isinstance(initial_value, tf.Tensor): + initial_value = tf.experimental.dtensor.copy_to_mesh( + initial_value, + tf.experimental.dtensor.Layout.replicated( + self._mesh, rank=initial_value.shape.rank + ), + ) + variable = tf.experimental.dtensor.DVariable( + initial_value=initial_value, + name=f"{variable_name}/{model_variable._shared_name}", + dtype=model_variable.dtype, + trainable=False, + ) + self._variables.append(variable) + return variable + else: + strategy = tf.distribute.get_strategy() + with strategy.extended.colocate_vars_with(model_variable): + return super().add_variable_from_reference( + model_variable, variable_name, shape, initial_value + ) + + def _create_iteration_variable(self): + if self._mesh: + init_val = tf.constant(0, dtype=tf.int64) + init_val = tf.experimental.dtensor.copy_to_mesh( + init_val, + tf.experimental.dtensor.Layout.replicated(self._mesh, rank=0), + ) + with tf.init_scope(): + # Lift the variable creation to init scope to avoid environment + # issue. + self._iterations = tf.experimental.dtensor.DVariable( + init_val, name="iteration" + ) + self._variables.append(self._iterations) + else: + super()._create_iteration_variable() + + def _var_key(self, variable): + """Get a unique identifier of the given variable.""" + + # Get the distributed variable if it exists. + # TODO(b/197554203): replace _distributed_container() with a public api. + if hasattr(variable, "_distributed_container"): + variable = variable._distributed_container() + elif ( + tf_utils.is_extension_type(variable) + and hasattr(variable, "handle") + and hasattr(variable.handle, "_distributed_container") + ): + # For ResourceVariables, the _distributed_container attribute + # is added to their handle tensors. + variable = variable.handle._distributed_container() + return super()._var_key(variable) + + def aggregate_gradients(self, grads_and_vars): + """Aggregate gradients on all devices. + + By default, we will perform reduce_sum of gradients across devices. + Users can implement their own aggregation logic by overriding this + method. + + Args: + grads_and_vars: List of (gradient, variable) pairs. + + Returns: + List of (gradient, variable) pairs. + """ + if self._mesh or self._run_with_dtensor: + logging.warning( + "Calling aggregate_gradients is unnecessary when the model " + "is used with DTensor, which includes aggregation of " + "replicated gradients as part of backward pass." + ) + return grads_and_vars + else: + return optimizer_utils.all_reduce_sum_gradients(grads_and_vars) + + def apply_gradients( + self, + grads_and_vars, + name=None, + skip_gradients_aggregation=False, + **kwargs, + ): + """Apply gradients to variables. + + Args: + grads_and_vars: List of `(gradient, variable)` pairs. + name: string, defaults to None. The name of the namescope to + use when creating variables. If None, `self.name` will be used. + skip_gradients_aggregation: If true, gradients aggregation will not be + performed inside optimizer. Usually this arg is set to True when you + write custom code aggregating gradients outside the optimizer. + **kwargs: keyword arguments only used for backward compatibility. + + Returns: + A `tf.Variable`, representing the current iteration. + + Raises: + TypeError: If `grads_and_vars` is malformed. + RuntimeError: If called in a cross-replica context. + """ + if self._mesh or self._run_with_dtensor: + # Skip any usage of strategy logic for DTensor + return super().apply_gradients(grads_and_vars, name=name) + + # `experimental_aggregate_gradients` is an arg in `apply_gradients` of + # v2 optimizer -- the reverse of `skip_gradients_aggregation`. + # We read it from kwargs for backward compatibility. + experimental_aggregate_gradients = kwargs.pop( + "experimental_aggregate_gradients", True + ) + if not skip_gradients_aggregation and experimental_aggregate_gradients: + grads_and_vars = self.aggregate_gradients(grads_and_vars) + return super().apply_gradients(grads_and_vars, name=name) + + def _apply_weight_decay(self, variables): + # Apply weight decay in distributed setup. + if self.weight_decay is None: + return + + def distributed_apply_weight_decay(distribution, variables, **kwargs): + def weight_decay_fn(variable): + if self._use_weight_decay(variable): + lr = tf.cast(self.learning_rate, variable.dtype) + wd = tf.cast(self.weight_decay, variable.dtype) + variable.assign_sub(variable * wd * lr) + + for variable in variables: + distribution.extended.update( + variable, weight_decay_fn, group=False + ) + + tf.__internal__.distribute.interim.maybe_merge_call( + distributed_apply_weight_decay, + self._distribution_strategy, + variables, + ) + + def _internal_apply_gradients(self, grads_and_vars): + if self._mesh or self._run_with_dtensor: + # Skip any usage of strategy logic for DTensor + return super()._internal_apply_gradients(grads_and_vars) + + return tf.__internal__.distribute.interim.maybe_merge_call( + self._distributed_apply_gradients_fn, + self._distribution_strategy, + grads_and_vars, + ) + + def _overwrite_model_variables_with_average_value(self, var_list): + """Overwrite model variables with their moving average values. + + This function overwrites variables on each device. + Args: + var_list: list of model variables. + """ + if self._mesh or self._run_with_dtensor: + # Skip any usage of strategy logic for DTensor + super()._overwrite_model_variables_with_average_value(var_list) + + strategy = self._distribution_strategy + # Override model variable by the stored average value on all devices. + for var in var_list: + average = self._model_variables_moving_average[ + self._index_dict[self._var_key(var)] + ] + strategy.extended.update( + var, lambda a, b: a.assign(b), args=(average,) + ) + + def _build_learning_rate(self, learning_rate): + if not self._mesh: + return super()._build_learning_rate(learning_rate) + + # For DTensor + variable_creation = tf.experimental.dtensor.DVariable + init_value_convert_fn = lambda x: tf.experimental.dtensor.copy_to_mesh( + x, tf.experimental.dtensor.Layout.replicated(self._mesh, rank=0) + ) + if isinstance( + learning_rate, learning_rate_schedule.LearningRateSchedule + ): + current_learning_rate = tf.convert_to_tensor( + learning_rate(self.iterations) + ) + current_learning_rate = init_value_convert_fn(current_learning_rate) + # Create a variable to hold the current learning rate. + # Note that the init value `learning_rate(self.iterations)` should + # have the correct layout information from self.iterations. + self._current_learning_rate = variable_creation( + current_learning_rate, + name="learning_rate", + dtype=tf.float32, + ) + return learning_rate + + init_val = init_value_convert_fn( + tf.constant(learning_rate, dtype=tf.float32) + ) + return variable_creation( + init_val, + name="learning_rate", + dtype=backend.floatx(), + trainable=False, + ) + + def _update_model_variables_moving_average(self, var_list): + """Update the stored moving average using the latest value.""" + if self.use_ema: + + def update_average(average, var): + average.assign( + self.ema_momentum * average + (1 - self.ema_momentum) * var + ) + + for var in var_list: + average = self._model_variables_moving_average[ + self._index_dict[self._var_key(var)] + ] + self._distribution_strategy.extended.update( + average, update_average, args=(var,), group=False + ) + + def _distributed_apply_gradients_fn( + self, distribution, grads_and_vars, **kwargs + ): + """`apply_gradients` using a `DistributionStrategy`.""" + + def apply_grad_to_update_var(var, grad): + if self.jit_compile: + return self._update_step_xla(grad, var, id(self._var_key(var))) + else: + return self._update_step(grad, var) + + for grad, var in grads_and_vars: + distribution.extended.update( + var, apply_grad_to_update_var, args=(grad,), group=False + ) + + if self.use_ema: + _, var_list = zip(*grads_and_vars) + self._update_model_variables_moving_average(var_list) + if self.ema_overwrite_frequency: + # Only when self.ema_overwrite_frequency is not None, we + # overwrite the model variables. + should_overwrite_model_vars = ( + self.iterations + 1 + ) % self.ema_overwrite_frequency == 0 + tf.cond( + tf.cast(should_overwrite_model_vars, tf.bool), + true_fn=lambda: self._overwrite_model_variables_with_average_value( # noqa: E501 + var_list + ), + false_fn=lambda: None, + ) + return self.iterations.assign_add(1) + + +class RestoredOptimizer(Optimizer): + def __init__(self): + super().__init__("RestoredOptimizer") + + def get_config(self): + raise NotImplementedError( + "Restoring functional Optimizers from SavedModels is not currently " + "supported. Please file a feature request if this limitation " + "bothers you." + ) + + +class CallableList(list): + """Temporary shim to support both `opt.variables()` and `opt.variables`.""" + + def __call__(self): + return self + + +# Register the optimizer for loading from saved_model purpose. +tf.__internal__.saved_model.load.register_revived_type( + "experimentalOptimizer", + lambda obj: isinstance(obj, Optimizer), + versions=[ + tf.__internal__.saved_model.load.VersionedTypeRegistration( + object_factory=lambda proto: RestoredOptimizer(), + version=2, + min_producer_version=1, + min_consumer_version=1, + ) + ], +) + +Optimizer.__doc__ = Optimizer.__doc__.replace( + "{{base_optimizer_keyword_args}}", base_optimizer_keyword_args +) diff --git a/keras/optimizers/optimizer_experimental/BUILD b/keras/optimizers/optimizer_experimental/BUILD deleted file mode 100644 index 834f3f5ff55f..000000000000 --- a/keras/optimizers/optimizer_experimental/BUILD +++ /dev/null @@ -1,75 +0,0 @@ -# Reworked keras optimizer. For more context, please refer to go/new-keras-optimizer. - -load("@org_keras//keras:keras.bzl", "distribute_py_test") - -package( - default_visibility = [ - "//keras:friends", - ], - licenses = ["notice"], -) - -py_library( - name = "optimizer", - srcs = [ - "__init__.py", - "adadelta.py", - "adagrad.py", - "adam.py", - "adamax.py", - "adamw.py", - "ftrl.py", - "nadam.py", - "optimizer.py", - "rmsprop.py", - "sgd.py", - ], - srcs_version = "PY3", - deps = [ - "//:expect_tensorflow_installed", - "//keras:backend", - "//keras/initializers", - "//keras/optimizers/optimizer_v2", - "//keras/optimizers/schedules:learning_rate_schedule", - ], -) - -distribute_py_test( - name = "optimizer_test", - size = "medium", - srcs = ["optimizer_test.py"], - shard_count = 8, - tags = [ - "multi_gpu", - "no_windows", - "nomultivm", # TODO(b/203558991): Re-enable. - ], - deps = [ - "//:expect_absl_installed", - "//:expect_tensorflow_installed", - "//keras", - "//keras/optimizers", - "//keras/testing_infra:test_combinations", - ], -) - -# TODO(b/228209527): Combine this test with optimizer_test after -# fixing the NCCL issue. -distribute_py_test( - name = "optimizer_pss_test", - size = "medium", - srcs = ["optimizer_pss_test.py"], - shard_count = 32, - tags = [ - "multi_gpu", - "no_oss", - "no_windows", - ], - deps = [ - "//:expect_absl_installed", - "//:expect_tensorflow_installed", - "//keras", - "//keras/optimizers", - "//keras/testing_infra:test_combinations", - ], -) diff --git a/keras/optimizers/optimizer_experimental/README.md b/keras/optimizers/optimizer_experimental/README.md deleted file mode 100644 index 1099d68727ff..000000000000 --- a/keras/optimizers/optimizer_experimental/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Reworked Keras Optimizer - -This directory contains code for [reworked Keras optimizer](go/new-keras-optimizer). -Code in this directory is still under development. To check out production -optimizer code, please refer to directory optimizer_v2/. - -The optimizer rework is mainly about reducing the complexity, and is transparent - to users. Optimizer's public api will remain the same as today. diff --git a/keras/optimizers/optimizer_experimental/adadelta.py b/keras/optimizers/optimizer_experimental/adadelta.py deleted file mode 100644 index deb788eb5977..000000000000 --- a/keras/optimizers/optimizer_experimental/adadelta.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adadelta optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.Adadelta', v1=[]) -class Adadelta(optimizer.Optimizer): - r"""Optimizer that implements the Adadelta algorithm. - - Adadelta optimization is a stochastic gradient descent method that is based on - adaptive learning rate per dimension to address two drawbacks: - - - The continual decay of learning rates throughout training. - - The need for a manually selected global learning rate. - - Adadelta is a more robust extension of Adagrad that adapts learning rates - based on a moving window of gradient updates, instead of accumulating all - past gradients. This way, Adadelta continues learning even when many updates - have been done. Compared to Adagrad, in the original version of Adadelta you - don't have to set an initial learning rate. In this version, the initial - learning rate can be set, as in most other Keras optimizers. - - Args: - learning_rate: Initial value for the learning rate: - either a floating point value, - or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. - Defaults to 0.001. - Note that `Adadelta` tends to benefit from higher initial learning rate - values compared to other optimizers. - To match the exact form in the original paper, use 1.0. - rho: A `Tensor` or a floating point value. The decay rate. Defaults to 0.95. - epsilon: Small floating point value used to maintain numerical stability. - Defaults to 1e-7. - {{base_optimizer_keyword_args}} - - Reference: - - [Zeiler, 2012](http://arxiv.org/abs/1212.5701) - """ - - def __init__(self, - learning_rate=0.001, - rho=0.95, - epsilon=1e-7, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='Adadelta', - **kwargs): - super().__init__( - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - name=name, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.rho = rho - self.epsilon = epsilon - - def build(self, var_list): - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._built = True - self._accumulated_grads = [] - self._accumulated_delta_vars = [] - for var in var_list: - self._accumulated_grads.append( - self.add_variable_from_reference(var, 'accumulated_grad')) - self._accumulated_delta_vars.append( - self.add_variable_from_reference(var, 'accumulated_delta_var')) - - def update_step(self, grad, variable): - """Update step given gradient and the associated model variable.""" - lr = tf.cast(self.learning_rate, variable.dtype) - - var_key = self._var_key(variable) - rho = self.rho - accumulated_grad = self._accumulated_grads[self._index_dict[var_key]] - accumulated_delta_var = self._accumulated_delta_vars[ - self._index_dict[var_key]] - - def rms(x): - return tf.sqrt(x + self.epsilon) - - if isinstance(grad, tf.IndexedSlices): - # Sparse gradients. - accumulated_grad.assign_add((rho - 1) * accumulated_grad) - accumulated_grad.scatter_add(tf.IndexedSlices( - (1 - rho) * tf.square(grad.values), grad.indices)) - delta_var = -rms(accumulated_delta_var) * grad / rms(accumulated_grad) - accumulated_delta_var.assign(rho * accumulated_delta_var + - (1 - rho) * delta_var * delta_var) - else: - # Dense gradients. - accumulated_grad.assign(rho * accumulated_grad + (1 - rho) * grad * grad) - delta_var = -rms(accumulated_delta_var) * grad / rms(accumulated_grad) - accumulated_delta_var.assign(rho * accumulated_delta_var + - (1 - rho) * delta_var * delta_var) - variable.assign_add(lr * delta_var) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'rho': self.rho, - 'epsilon': self.epsilon, - }) - return config - -Adadelta.__doc__ = Adadelta.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/adagrad.py b/keras/optimizers/optimizer_experimental/adagrad.py deleted file mode 100644 index a65bace9f185..000000000000 --- a/keras/optimizers/optimizer_experimental/adagrad.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adagrad optimizer implementation.""" - -from keras import initializers -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.Adagrad', v1=[]) -class Adagrad(optimizer.Optimizer): - r"""Optimizer that implements the Adagrad algorithm. - - Adagrad is an optimizer with parameter-specific learning rates, - which are adapted relative to how frequently a parameter gets - updated during training. The more updates a parameter receives, - the smaller the updates. - - Args: - learning_rate: Initial value for the learning rate: - either a floating point value, - or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. - Defaults to 0.001. - Note that `Adagrad` tends to benefit from higher initial learning rate - values compared to other optimizers. - To match the exact form in the original paper, use 1.0. - initial_accumulator_value: Floating point value. - Starting value for the accumulators (per-parameter momentum values). - Must be non-negative. - epsilon: Small floating point value used to maintain numerical stability. - {{base_optimizer_keyword_args}} - - Reference: - - [Duchi et al., 2011]( - http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). - """ - - def __init__(self, - learning_rate=0.001, - initial_accumulator_value=0.1, - epsilon=1e-7, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='Adagrad', - **kwargs): - super().__init__( - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - name=name, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.initial_accumulator_value = initial_accumulator_value - self.epsilon = epsilon - - def build(self, var_list): - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._built = True - self._accumulators = [] - initializer = initializers.Constant(self.initial_accumulator_value) - for var in var_list: - self._accumulators.append( - self.add_variable_from_reference( - var, - 'accumulator', - initial_value=initializer(shape=var.shape, dtype=var.dtype))) - - def update_step(self, grad, variable): - """Update step given gradient and the associated model variable.""" - lr = tf.cast(self.learning_rate, variable.dtype) - - var_key = self._var_key(variable) - accumulator = self._accumulators[self._index_dict[var_key]] - - if isinstance(grad, tf.IndexedSlices): - # Sparse gradients. - accumulator.scatter_add( - tf.IndexedSlices(grad.values * grad.values, grad.indices)) - else: - # Dense gradients. - accumulator.assign_add(grad * grad) - variable.assign_sub(lr * grad / tf.sqrt(accumulator + self.epsilon)) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'initial_accumulator_value': self.initial_accumulator_value, - 'epsilon': self.epsilon, - }) - return config - - -Adagrad.__doc__ = Adagrad.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/adam.py b/keras/optimizers/optimizer_experimental/adam.py deleted file mode 100644 index 5d7f271dc034..000000000000 --- a/keras/optimizers/optimizer_experimental/adam.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adam optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.Adam', v1=[]) -class Adam(optimizer.Optimizer): - r"""Optimizer that implements the Adam algorithm. - - Adam optimization is a stochastic gradient descent method that is based on - adaptive estimation of first-order and second-order moments. - - According to - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), - the method is "*computationally - efficient, has little memory requirement, invariant to diagonal rescaling of - gradients, and is well suited for problems that are large in terms of - data/parameters*". - - Args: - learning_rate: A `tf.Tensor`, floating point value, a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - beta_1: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 1st moment estimates. Defaults to 0.9. - beta_2: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 2nd moment estimates. Defaults to 0.999. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". Defaults to `False`. - {{base_optimizer_keyword_args}} - - Reference: - - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) - - [Reddi et al., 2018]( - https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. - - Notes: - - The default value of 1e-7 for epsilon might not be a good default in - general. For example, when training an Inception network on ImageNet a - current good choice is 1.0 or 0.1. Note that since Adam uses the - formulation just before Section 2.1 of the Kingma and Ba paper rather than - the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon - hat" in the paper. - - The sparse implementation of this algorithm (used when the gradient is an - IndexedSlices object, typically because of `tf.gather` or an embedding - lookup in the forward pass) does apply momentum to variable slices even if - they were not used in the forward pass (meaning they have a gradient equal - to zero). Momentum decay (beta1) is also applied to the entire momentum - accumulator. This means that the sparse behavior is equivalent to the dense - behavior (in contrast to some momentum implementations which ignore momentum - unless a variable slice was actually used). - """ - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - amsgrad=False, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='Adam', - **kwargs): - super().__init__( - name=name, - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.amsgrad = amsgrad - - def build(self, var_list): - """Initialize optimizer variables. - - Adam optimizer has 3 types of variables: momentums, velocities and - velocity_hat (only set when amsgrad is applied), - - Args: - var_list: list of model variables to build Adam variables on. - """ - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._built = True - self._momentums = [] - self._velocities = [] - for var in var_list: - self._momentums.append( - self.add_variable_from_reference( - model_variable=var, variable_name='m')) - self._velocities.append( - self.add_variable_from_reference( - model_variable=var, variable_name='v')) - if self.amsgrad: - self._velocity_hats = [] - for var in var_list: - self._velocity_hats.append( - self.add_variable_from_reference( - model_variable=var, variable_name='vhat')) - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - beta_1_power = None - beta_2_power = None - lr = tf.cast(self.learning_rate, variable.dtype) - local_step = tf.cast(self.iterations + 1, variable.dtype) - beta_1_power = tf.pow(tf.cast(self.beta_1, variable.dtype), local_step) - beta_2_power = tf.pow(tf.cast(self.beta_2, variable.dtype), local_step) - - var_key = self._var_key(variable) - m = self._momentums[self._index_dict[var_key]] - v = self._velocities[self._index_dict[var_key]] - - alpha = (lr * tf.sqrt(1 - beta_2_power) / (1 - beta_1_power)) - - if isinstance(gradient, tf.IndexedSlices): - # Sparse gradients. - m.assign_add(-m * (1 - self.beta_1)) - m.scatter_add( - tf.IndexedSlices(gradient.values * (1 - self.beta_1), - gradient.indices)) - v.assign_add(-v * (1 - self.beta_2)) - v.scatter_add( - tf.IndexedSlices( - tf.square(gradient.values) * (1 - self.beta_2), gradient.indices)) - if self.amsgrad: - v_hat = self._velocity_hats[self._index_dict[var_key]] - v_hat.assign(tf.maximum(v_hat, v)) - v = v_hat - variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) - else: - # Dense gradients. - m.assign_add((gradient - m) * (1 - self.beta_1)) - v.assign_add((tf.square(gradient) - v) * (1 - self.beta_2)) - if self.amsgrad: - v_hat = self._velocity_hats[self._index_dict[var_key]] - v_hat.assign(tf.maximum(v_hat, v)) - v = v_hat - variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'beta_1': self.beta_1, - 'beta_2': self.beta_2, - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad, - }) - return config - - -Adam.__doc__ = Adam.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/adamax.py b/keras/optimizers/optimizer_experimental/adamax.py deleted file mode 100644 index 2d4f89dc7c95..000000000000 --- a/keras/optimizers/optimizer_experimental/adamax.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adamax optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.Adamax', v1=[]) -class Adamax(optimizer.Optimizer): - """Optimizer that implements the Adamax algorithm. - - Adamax, a variant of Adam based on the infinity norm, is a first-order - gradient-based optimization method. Due to its capability of adjusting the - learning rate based on data characteristics, it is suited to learn - time-variant process, e.g., speech data with dynamically changed noise - conditions. Default parameters follow those provided in the paper (see - references below). - - Initialization: - - ```python - m = 0 # Initialize initial 1st moment vector - u = 0 # Initialize the exponentially weighted infinity norm - t = 0 # Initialize timestep - ``` - - The update rule for parameter `w` with gradient `g` is - described at the end of section 7.1 of the paper (see the referenece section): - - ```python - t += 1 - m = beta1 * m + (1 - beta) * g - u = max(beta2 * u, abs(g)) - current_lr = learning_rate / (1 - beta1 ** t) - w = w - current_lr * m / (u + epsilon) - ``` - - Args: - learning_rate: A `tf.Tensor`, floating point value, a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - beta_1: A float value or a constant float tensor. The exponential decay - rate for the 1st moment estimates. - beta_2: A float value or a constant float tensor. The exponential decay - rate for the exponentially weighted infinity norm. - epsilon: A small constant for numerical stability. - {{base_optimizer_keyword_args}} - - Reference: - - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) - """ - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='Adamax', - **kwargs): - super().__init__( - name=name, - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - - def build(self, var_list): - """Initialize optimizer variables. - - Adamax optimizer has 2 types of variables: momentums (denoted as m), - exponentially weighted infinity norm (denoted as u). - - Args: - var_list: list of model variables to build Adamax variables on. - """ - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._built = True - self._m = [] - self._u = [] - for var in var_list: - self._m.append( - self.add_variable_from_reference( - model_variable=var, variable_name='m')) - self._u.append( - self.add_variable_from_reference( - model_variable=var, variable_name='u')) - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - lr = tf.cast(self.learning_rate, variable.dtype) - local_step = tf.cast(self.iterations + 1, variable.dtype) - beta_1_power = tf.pow(tf.cast(self.beta_1, variable.dtype), local_step) - - var_key = self._var_key(variable) - m = self._m[self._index_dict[var_key]] - u = self._u[self._index_dict[var_key]] - - if isinstance(gradient, tf.IndexedSlices): - # Sparse gradients. - indices = gradient.indices - m.assign_add(-m * (1 - self.beta_1)) - m.scatter_add( - tf.IndexedSlices(gradient.values * (1 - self.beta_1), indices)) - u.assign(u * self.beta_2) - u_slice = tf.gather(u, indices) - u_slice_incremental = tf.maximum( - u_slice, - tf.abs(gradient.values)) - u_slice - u.scatter_add(tf.IndexedSlices(u_slice_incremental, indices)) - variable.assign_sub((lr * m) / ((1 - beta_1_power) * (u + self.epsilon))) - else: - # Dense gradients. - m.assign_add((gradient - m) * (1 - self.beta_1)) - u.assign(tf.maximum(self.beta_2 * u, tf.abs(gradient))) - variable.assign_sub((lr * m) / ((1 - beta_1_power) * (u + self.epsilon))) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'beta_1': self.beta_1, - 'beta_2': self.beta_2, - 'epsilon': self.epsilon, - }) - return config - - -Adamax.__doc__ = Adamax.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/adamw.py b/keras/optimizers/optimizer_experimental/adamw.py deleted file mode 100644 index 296fbcf8ca19..000000000000 --- a/keras/optimizers/optimizer_experimental/adamw.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""AdamW optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.AdamW', v1=[]) -class AdamW(optimizer.Optimizer): - r"""Optimizer that implements the AdamW algorithm. - - AdamW optimization is a stochastic gradient descent method that is based on - adaptive estimation of first-order and second-order moments with an added - method to decay weights per the techniques discussed in the paeper, - 'Decoupled Weight Decay Regularization' by - [Loshchilov, Hutter et al., 2019](https://arxiv.org/abs/1711.05101). - - According to - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), - the underying Adam method is "*computationally - efficient, has little memory requirement, invariant to diagonal rescaling of - gradients, and is well suited for problems that are large in terms of - data/parameters*". - - Args: - learning_rate: A `tf.Tensor`, floating point value, a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - weight_decay: A `tf.Tensor`, floating point value. The weight decay. - Defaults to 0.004. - beta_1: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 1st moment estimates. Defaults to 0.9. - beta_2: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 2nd moment estimates. Defaults to 0.999. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". Defaults to `False`. - {{base_optimizer_keyword_args}} - - Reference: - - [Loshchilov et al., 2019](https://arxiv.org/abs/1711.05101) - - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) for `adam` - - [Reddi et al., 2018]( - https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. - - Notes: - - The default value of 1e-7 for epsilon might not be a good default in - general. For example, when training an Inception network on ImageNet a - current good choice is 1.0 or 0.1. Note that since Adam uses the - formulation just before Section 2.1 of the Kingma and Ba paper rather than - the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon - hat" in the paper. - - The sparse implementation of this algorithm (used when the gradient is an - IndexedSlices object, typically because of `tf.gather` or an embedding - lookup in the forward pass) does apply momentum to variable slices even if - they were not used in the forward pass (meaning they have a gradient equal - to zero). Momentum decay (beta1) is also applied to the entire momentum - accumulator. This means that the sparse behavior is equivalent to the dense - behavior (in contrast to some momentum implementations which ignore momentum - unless a variable slice was actually used). - """ - - def __init__(self, - learning_rate=0.001, - weight_decay=0.004, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - amsgrad=False, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='AdamW', - **kwargs): - super().__init__( - name=name, - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.weight_decay = weight_decay - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.amsgrad = amsgrad - - if self.weight_decay is None: - raise ValueError('Missing value of `weight_decay` which is required and' - ' must be a float value.') - - def build(self, var_list, exclude_from_weight_decay=None): - """Initialize optimizer variables. - - AdamW optimizer has 3 types of variables: momentums, velocities and - velocity_hat (only set when amsgrad is applied), - - Args: - var_list: list of model variables to build AdamW variables on. - exclude_from_weight_decay: list of model variables that will be excluded - from weight decay. - """ - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._built = True - if not hasattr(self, '_exclude_from_weight_decay'): - self._exclude_from_weight_decay = exclude_from_weight_decay or [] - self._momentums = [] - self._velocities = [] - for var in var_list: - self._momentums.append( - self.add_variable_from_reference( - model_variable=var, variable_name='m')) - self._velocities.append( - self.add_variable_from_reference( - model_variable=var, variable_name='v')) - if self.amsgrad: - self._velocity_hats = [] - for var in var_list: - self._velocity_hats.append( - self.add_variable_from_reference( - model_variable=var, variable_name='vhat')) - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - beta_1_power = None - beta_2_power = None - lr = tf.cast(self.learning_rate, variable.dtype) - local_step = tf.cast(self.iterations + 1, variable.dtype) - beta_1_power = tf.pow(tf.cast(self.beta_1, variable.dtype), local_step) - beta_2_power = tf.pow(tf.cast(self.beta_2, variable.dtype), local_step) - - var_key = self._var_key(variable) - m = self._momentums[self._index_dict[var_key]] - v = self._velocities[self._index_dict[var_key]] - - alpha = (lr * tf.sqrt(1 - beta_2_power) / (1 - beta_1_power)) - - # Apply step weight decay - if (self.weight_decay != 0 and - variable not in self._exclude_from_weight_decay): - wd = tf.cast(self.weight_decay, variable.dtype) - variable.assign_sub(variable * wd) - - if isinstance(gradient, tf.IndexedSlices): - # Sparse gradients. - m.assign_add(-m * (1 - self.beta_1)) - m.scatter_add( - tf.IndexedSlices(gradient.values * (1 - self.beta_1), - gradient.indices)) - v.assign_add(-v * (1 - self.beta_2)) - v.scatter_add( - tf.IndexedSlices( - tf.square(gradient.values) * (1 - self.beta_2), gradient.indices)) - if self.amsgrad: - v_hat = self._velocity_hats[self._index_dict[var_key]] - v_hat.assign(tf.maximum(v_hat, v)) - v = v_hat - variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) - else: - # Dense gradients. - m.assign_add((gradient - m) * (1 - self.beta_1)) - v.assign_add((tf.square(gradient) - v) * (1 - self.beta_2)) - if self.amsgrad: - v_hat = self._velocity_hats[self._index_dict[var_key]] - v_hat.assign(tf.maximum(v_hat, v)) - v = v_hat - variable.assign_sub((m * alpha) / (tf.sqrt(v) + self.epsilon)) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'weight_decay': self.weight_decay, - 'beta_1': self.beta_1, - 'beta_2': self.beta_2, - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad, - }) - return config - - def exclude_from_weight_decay(self, var_list): - if hasattr(self, '_built') and self._built: - raise ValueError( - '`exclude_from_weight_decay()` can only be configued before ' - 'the optimizer is built.' - ) - - self._exclude_from_weight_decay = var_list or [] - - -AdamW.__doc__ = AdamW.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/ftrl.py b/keras/optimizers/optimizer_experimental/ftrl.py deleted file mode 100644 index aa7ffe3cc319..000000000000 --- a/keras/optimizers/optimizer_experimental/ftrl.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""FTRL optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.Ftrl', v1=[]) -class Ftrl(optimizer.Optimizer): - r"""Optimizer that implements the FTRL algorithm. - - "Follow The Regularized Leader" (FTRL) is an optimization algorithm developed - at Google for click-through rate prediction in the early 2010s. It is most - suitable for shallow models with large and sparse feature spaces. - The algorithm is described by - [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf). - The Keras version has support for both online L2 regularization - (the L2 regularization described in the paper - above) and shrinkage-type L2 regularization - (which is the addition of an L2 penalty to the loss function). - - Initialization: - - ```python - n = 0 - sigma = 0 - z = 0 - ``` - - Update rule for one variable `w`: - - ```python - prev_n = n - n = n + g ** 2 - sigma = (n ** -lr_power - prev_n ** -lr_power) / lr - z = z + g - sigma * w - if abs(z) < lambda_1: - w = 0 - else: - w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2) - ``` - - Notation: - - - `lr` is the learning rate - - `g` is the gradient for the variable - - `lambda_1` is the L1 regularization strength - - `lambda_2` is the L2 regularization strength - - `lr_power` is the power to scale n. - - Check the documentation for the `l2_shrinkage_regularization_strength` - parameter for more details when shrinkage is enabled, in which case gradient - is replaced with a gradient with shrinkage. - - Args: - learning_rate: A `Tensor`, floating point value, a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable that - takes no arguments and returns the actual value to use. The learning rate. - Defaults to 0.001. - learning_rate_power: A float value, must be less or equal to zero. Controls - how the learning rate decreases during training. Use zero for a fixed - learning rate. - initial_accumulator_value: The starting value for accumulators. Only zero or - positive values are allowed. - l1_regularization_strength: A float value, must be greater than or equal to - zero. Defaults to 0.0. - l2_regularization_strength: A float value, must be greater than or equal to - zero. Defaults to 0.0. - l2_shrinkage_regularization_strength: A float value, must be greater than or - equal to zero. This differs from L2 above in that the L2 above is a - stabilization penalty, whereas this L2 shrinkage is a magnitude penalty. - When input is sparse shrinkage will only happen on the active weights. - beta: A float value, representing the beta value from the paper. Defaults to - 0.0. - {{base_optimizer_keyword_args}} - """ - - def __init__(self, - learning_rate=0.001, - learning_rate_power=-0.5, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0, - l2_shrinkage_regularization_strength=0.0, - beta=0.0, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='Ftrl', - **kwargs): - super().__init__( - name=name, - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - **kwargs) - - if initial_accumulator_value < 0.0: - raise ValueError( - '`initial_accumulator_value` needs to be positive or zero. Received: ' - f'initial_accumulator_value={initial_accumulator_value}.') - if learning_rate_power > 0.0: - raise ValueError( - '`learning_rate_power` needs to be negative or zero. Received: ' - f'learning_rate_power={learning_rate_power}.') - if l1_regularization_strength < 0.0: - raise ValueError( - '`l1_regularization_strength` needs to be positive or zero. ' - f'Received: l1_regularization_strength={l1_regularization_strength}.') - if l2_regularization_strength < 0.0: - raise ValueError( - '`l2_regularization_strength` needs to be positive or zero. ' - f'Received: l2_regularization_strength={l2_regularization_strength}.') - if l2_shrinkage_regularization_strength < 0.0: - raise ValueError( - '`l2_shrinkage_regularization_strength` needs to be positive or ' - 'zero. Received: l2_shrinkage_regularization_strength' - f'={l2_shrinkage_regularization_strength}.') - - self._learning_rate = self._build_learning_rate(learning_rate) - self.learning_rate_power = learning_rate_power - self.initial_accumulator_value = initial_accumulator_value - self.l1_regularization_strength = l1_regularization_strength - self.l2_regularization_strength = l2_regularization_strength - self.l2_shrinkage_regularization_strength = ( - l2_shrinkage_regularization_strength) - self.beta = beta - - def build(self, var_list): - """Initialize optimizer variables. - - Args: - var_list: list of model variables to build Ftrl variables on. - """ - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._accumulators = [] - self._linears = [] - for var in var_list: - self._accumulators.append( - self.add_variable_from_reference( - model_variable=var, - variable_name='accumulator', - initial_value=tf.cast( - tf.fill(dims=var.shape, value=self.initial_accumulator_value), - dtype=var.dtype))) - self._linears.append( - self.add_variable_from_reference( - model_variable=var, variable_name='linear')) - self._built = True - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - - lr = tf.cast(self.learning_rate, variable.dtype) - var_key = self._var_key(variable) - accum = self._accumulators[self._index_dict[var_key]] - linear = self._linears[self._index_dict[var_key]] - - lr_power = self.learning_rate_power - l2_reg = self.l2_regularization_strength - l2_reg = (l2_reg + self.beta / (2. * lr)) - - # Ftrl optimizer has the same implementation for sparse and dense - # gradients update. - grad_to_use = ( - gradient + 2 * self.l2_shrinkage_regularization_strength * variable) - new_accum = accum + tf.pow(gradient, 2) - linear.assign_add(grad_to_use - - (tf.pow(new_accum, -lr_power) - - tf.pow(accum, -lr_power)) / lr * variable) - quadratic = tf.pow(new_accum, - (-lr_power)) / lr + 2 * l2_reg - linear_clipped = tf.clip_by_value(linear, - -self.l1_regularization_strength, - self.l1_regularization_strength) - variable.assign((linear_clipped - linear) / quadratic) - accum.assign(new_accum) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': - self._serialize_hyperparameter(self._learning_rate), - 'learning_rate_power': - self.learning_rate_power, - 'initial_accumulator_value': - self.initial_accumulator_value, - 'l1_regularization_strength': - self.l1_regularization_strength, - 'l2_regularization_strength': - self.l2_regularization_strength, - 'l2_shrinkage_regularization_strength': - self.l2_shrinkage_regularization_strength, - 'beta': - self.beta, - }) - return config - - -Ftrl.__doc__ = Ftrl.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/nadam.py b/keras/optimizers/optimizer_experimental/nadam.py deleted file mode 100644 index b9557ad70da2..000000000000 --- a/keras/optimizers/optimizer_experimental/nadam.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Nadam optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.Nadam', v1=[]) -class Nadam(optimizer.Optimizer): - r"""Optimizer that implements the Nadam algorithm. - - Much like Adam is essentially RMSprop with momentum, Nadam is Adam with - Nesterov momentum. - - Args: - learning_rate: A `tf.Tensor`, floating point value, a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - beta_1: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 1st moment estimates. Defaults to 0.9. - beta_2: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 2nd moment estimates. Defaults to 0.999. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - {{base_optimizer_keyword_args}} - - Reference: - - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf). - - """ - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='Nadam', - **kwargs): - super().__init__( - name=name, - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - - def build(self, var_list): - """Initialize optimizer variables. - - Nadam optimizer has 2 types of variables: momentums and velocities. - - Args: - var_list: list of model variables to build Nadam variables on. - """ - super().build(var_list) - if getattr(self, '_built', False): - return - self._built = True - self._momentums = [] - self._velocities = [] - self._u_product = tf.Variable(1.0, dtype=var_list[0].dtype) - # Keep a counter on how many times of _u_product has been computed to - # avoid duplicated computations. - self._u_product_counter = 1 - - for var in var_list: - self._momentums.append( - self.add_variable_from_reference( - model_variable=var, variable_name='m')) - self._velocities.append( - self.add_variable_from_reference( - model_variable=var, variable_name='v')) - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - var_dtype = variable.dtype - lr = tf.cast(self.learning_rate, var_dtype) - local_step = tf.cast(self.iterations + 1, var_dtype) - next_step = tf.cast(self.iterations + 2, var_dtype) - decay = tf.cast(0.96, var_dtype) - beta_1 = tf.cast(self.beta_1, var_dtype) - beta_2 = tf.cast(self.beta_2, var_dtype) - u_t = beta_1 * (1. - 0.5 * (tf.pow(decay, local_step))) - u_t_1 = beta_1 * (1. - 0.5 * (tf.pow(decay, next_step))) - def get_cached_u_product(): - return self._u_product - - def compute_new_u_product(): - u_product_t = self._u_product * u_t - self._u_product.assign(u_product_t) - self._u_product_counter += 1 - return u_product_t - - u_product_t = tf.cond( - self._u_product_counter == (self.iterations + 2), - true_fn=get_cached_u_product, - false_fn=compute_new_u_product) - u_product_t_1 = u_product_t * u_t_1 - beta_2_power = tf.pow(beta_2, local_step) - - var_key = self._var_key(variable) - m = self._momentums[self._index_dict[var_key]] - v = self._velocities[self._index_dict[var_key]] - - if isinstance(gradient, tf.IndexedSlices): - # Sparse gradients. - m.assign_add(-m * (1 - beta_1)) - m.scatter_add( - tf.IndexedSlices(gradient.values * (1 - beta_1), - gradient.indices)) - v.assign_add(-v * (1 - beta_2)) - v.scatter_add( - tf.IndexedSlices( - tf.square(gradient.values) * (1 - beta_2), gradient.indices)) - m_hat = ( - u_t_1 * m / (1 - u_product_t_1) + (1 - u_t) * gradient / - (1 - u_product_t)) - v_hat = v / (1 - beta_2_power) - - variable.assign_sub((m_hat * lr) / (tf.sqrt(v_hat) + self.epsilon)) - else: - # Dense gradients. - m.assign_add((gradient - m) * (1 - beta_1)) - v.assign_add((tf.square(gradient) - v) * (1 - beta_2)) - m_hat = ( - u_t_1 * m / (1 - u_product_t_1) + (1 - u_t) * gradient / - (1 - u_product_t)) - v_hat = v / (1 - beta_2_power) - - variable.assign_sub((m_hat * lr) / (tf.sqrt(v_hat) + self.epsilon)) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'beta_1': self.beta_1, - 'beta_2': self.beta_2, - 'epsilon': self.epsilon, - }) - return config - -Nadam.__doc__ = Nadam.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/optimizer.py b/keras/optimizers/optimizer_experimental/optimizer.py deleted file mode 100644 index eed265b8d0f6..000000000000 --- a/keras/optimizers/optimizer_experimental/optimizer.py +++ /dev/null @@ -1,895 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Base class of optimizer. - -This is under development, and subject to interface/implementation changes. -""" - -import abc -from absl import logging - -from keras import backend -from keras import initializers -from keras.optimizers.optimizer_v2 import utils as optimizer_utils -from keras.optimizers.schedules import learning_rate_schedule -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export -from tensorflow.tools.docs import doc_controls - - -class _BaseOptimizer(tf.Module): - """Optimizer base class, which only supports non-distribute use case.""" - - def __init__(self, - name, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - **kwargs): - self._name = name - self.clipnorm = clipnorm - self.global_clipnorm = global_clipnorm - self.clipvalue = clipvalue - self.use_ema = use_ema - self.jit_compile = jit_compile - if not tf.config.list_physical_devices("GPU"): - # Optimizer only benefits from XLA when training on GPU. So if no GPU is - # found, we turn off XLA. - self.jit_compile = False - if use_ema: - # Verify the arguments related to EMA. - if ema_momentum > 1 or ema_momentum < 0: - raise ValueError("`ema_momentum` must be in the range [0, 1]. " - f"Received: ema_momentum={ema_momentum}") - if ema_overwrite_frequency and (not isinstance( - ema_overwrite_frequency, int) or ema_overwrite_frequency < 1): - raise ValueError( - "`ema_overwrite_frequency` must be an integer > 1 or None. " - f"Received: ema_overwrite_frequency={ema_overwrite_frequency}") - self.ema_momentum = ema_momentum - self.ema_overwrite_frequency = ema_overwrite_frequency - - if self.clipnorm is not None and self.global_clipnorm is not None: - raise ValueError(f"At most one of `clipnorm` and `global_clipnorm` can " - f"be set. Received: clipnorm={self.clipnorm}, " - f"global_clipnorm={self.global_clipnorm}.") - - self._create_iteration_variable() - self._process_kwargs(kwargs) - - def _create_iteration_variable(self): - """Create the iterations counter variable.""" - with tf.init_scope(): - # Lift the variable creation to init scope to avoid environment issue. - self._iterations = tf.Variable( - 0, name="iteration", dtype=tf.int64, trainable=False) - - def _process_kwargs(self, kwargs): - legacy_kwargs = { - "lr", "decay", "gradient_transformers", "gradient_aggregator" - } - for k in kwargs: - if k in legacy_kwargs: - logging.warning( - "%s is deprecated in `optimizer_experimental.Optimizer`" - ", please check the docstring for valid arguments.", k) - else: - raise TypeError(f"{k} is not a valid argument, kwargs should be empty " - " for `optimizer_experimental.Optimizer`.") - - def _var_key(self, variable): - """Get a unique identifier of the given variable.""" - # Get the distributed variable if it exists. - # TODO(b/199214315): replace _unique_id with ref() after fixing ref() issues - # on AggregatingVariable. - return variable._unique_id # pylint: disable=protected-access - - @abc.abstractmethod - def update_step(self, gradient, variable): - """Function to update variable value based on given gradients. - - This method must be implemented in customized optimizers. - - Args: - gradient: backpropagated gradient of the given variable. - variable: variable whose value needs to be updated. - - Returns: - An `Operation` that applies the specified gradients. - - """ - raise NotImplementedError - - @tf.function(jit_compile=True) - def _update_step_xla(self, gradient, variable, key): - """A wrapper of `update_step` to enable XLA acceleration. - - Due to `tf.function` tracing mechanism, for (gradient, variable) pairs of - the same shape and dtype, the execution graph always invoke the first - pair it has seen. Thus, we need a `key` argument to make each - (gradient, variable) pair unique. In additions, XLA cannot understand - string input, so the key is an integer. - - Args: - gradient: backpropagated gradient of the given variable. - variable: variable whose value needs to be updated. - key (int): a unique key that identifies the variable. - - Returns: - An `Operation` that applies the specified gradients. - """ - return self._update_step(gradient, variable) - - def _update_step(self, gradient, variable): - if getattr(variable, "_unique_id", None) is None: - # Variable has no `_unique_id` if called during `model.save()`, in which - # case we do not want to update the variable. - return - if self._var_key(variable) not in self._index_dict: - raise KeyError( - f"The optimizer cannot recognize variable {variable.name}. This " - f"usually means that you're reusing an optimizer previously created " - f"for a different model. Try creating a new optimizer instance.") - self.update_step(gradient, variable) - - def compute_gradients(self, loss, var_list, tape=None): - """Compute gradients of loss on trainable variables. - - Args: - loss: `Tensor` or callable. If a callable, `loss` should take no arguments - and return the value to minimize. - var_list: list or tuple of `Variable` objects to update to minimize - `loss`. - tape: (Optional) `tf.GradientTape`. If `loss` is provided as a `Tensor`, - the tape that computed the `loss` must be provided. - - Returns: - A list of (gradient, variable) pairs. Variable is always present, but - gradient can be `None`. - """ - if not callable(loss) and tape is None: - raise ValueError("`tape` is required when a `Tensor` loss is passed. " - f"Received: loss={loss}, tape={tape}.") - if tape is None: - tape = tf.GradientTape() - if callable(loss): - with tape: - tape.watch(var_list) - loss = loss() - grads = tape.gradient(loss, var_list) - return list(zip(grads, var_list)) - - def _clip_gradients(self, grads): - clipped_grads = [] - if self.clipnorm and self.clipnorm > 0: - for g in grads: - if g is None: - clipped_grads.append(g) - else: - clipped_grads.append(tf.clip_by_norm(g, self.clipnorm)) - return clipped_grads - - if self.global_clipnorm and self.global_clipnorm > 0: - return tf.clip_by_global_norm(grads, self.global_clipnorm)[0] - - if self.clipvalue and self.clipvalue > 0: - for g in grads: - if g is None: - clipped_grads.append(g) - else: - clipped_grads.append( - tf.clip_by_value( - g, - clip_value_min=-self.clipvalue, # pylint: disable=invalid-unary-operand-type - clip_value_max=self.clipvalue)) - return clipped_grads - - return grads - - @property - def iterations(self): - """The number of training steps this `optimizer` has run. - - By default, iterations would be incremented by one every time - `apply_gradients()` is called. - """ - return self._iterations - - @iterations.setter - def iterations(self, variable): - if getattr(self, "_built", False): - raise RuntimeError("Cannot set `iterations` to a new Variable after " - "the Optimizer weights have been created. Here it is " - f"attempting to set `iterations` to {variable}." - "Usually this means you are trying to set `iterations`" - " after calling `apply_gradients()`. Please set " - "`iterations` before calling `apply_gradients()`.") - self._iterations = variable - - @property - def learning_rate(self): - if not hasattr(self, "_learning_rate") or self._learning_rate is None: - raise ValueError("Missing learning rate, please set self.learning_rate at" - " optimizer creation time.") - lr = self._learning_rate - if isinstance(lr, learning_rate_schedule.LearningRateSchedule): - # If the optimizer takes in LearningRateSchedule, then each call to - # learning_rate would return `self._current_learning_rate`, which is - # updated at each call to `apply_gradients`. - return self._current_learning_rate - return lr - - @learning_rate.setter - def learning_rate(self, learning_rate): - if isinstance(self._learning_rate, - learning_rate_schedule.LearningRateSchedule): - raise TypeError("This optimizer was created with a `LearningRateSchedule`" - " object as its `learning_rate` constructor argument, " - "hence its learning rate is not settable. If you need the" - " learning rate to be settable, you should instantiate " - "the optimizer with a float `learning_rate` argument.") - self._learning_rate.assign(learning_rate) - - @property - @doc_controls.do_not_generate_docs - def lr(self): - """Alias of `learning_rate()`. - - `lr()` is heavily called in workflows using `optimizer_v2.OptimizerV2`, - so we keep it for backward compabitliy. - """ - return self.learning_rate - - @lr.setter - def lr(self, learning_rate): - self.learning_rate = learning_rate - - def _build_learning_rate(self, learning_rate): - if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule): - # Create a variable to hold the current learning rate. - self._current_learning_rate = tf.Variable( - learning_rate(self.iterations), - name="learning_rate", - dtype=tf.float32, - trainable=False) - return learning_rate - return tf.Variable( - learning_rate, - name="learning_rate", - dtype=backend.floatx(), - trainable=False) - - @abc.abstractmethod - def build(self, var_list): - """Initialize the optimizer's variables, such as momemtum variables. - - This function has to be implemented by subclass optimizers, and subclass - optimizers need to call `super().build(var_list)`. - - Args: - var_list: List of model variables to build optimizers on. For example, SGD - optimizer with momentum will store one momentum variable corresponding - to each model variable. - """ - if getattr(self, "_built", False): - return - self._build_index_dict(var_list) - if self.use_ema: - self._model_variables_moving_average = [] - for var in var_list: - # Make a copy of the model variables, we will use the copy to store the - # moving average of model variables. - self._model_variables_moving_average.append( - self.add_variable_from_reference(var, "average", initial_value=var)) - - def _build_index_dict(self, var_list): - """Build variable to index dictionary. - - Build a dictionary that maps variable to the index of it in the given - var_list. - - Args: - var_list: List of variables to build index dict on. - - Returns: - None - """ - self._index_dict = {} - for i, var in enumerate(var_list): - var_key = self._var_key(var) - self._index_dict[var_key] = i - - def add_variable(self, shape, dtype=None, initializer="zeros", name=None): - """Create an optimizer variable. - - Args: - shape: A list of integers, a tuple of integers, or a 1-D Tensor of type - int32. Defaults to scalar if unspecified. - dtype: The DType of the optimizer variable to be created. Defaults to - `tf.keras.backend.floatx` if unspecified. - initializer: string or callable. Initializer instance. - name: The name of the optimizer variable to be created. - - Returns: - An optimizer variable, in the format of tf.Variable. - - """ - if isinstance(initializer, str): - initializer = initializers.get(initializer) - if dtype is None: - dtype = backend.floatx() - if shape is None: - shape = [] - return tf.Variable( - initial_value=initializer(shape, dtype), name=name, trainable=False) - - def add_variable_from_reference(self, - model_variable, - variable_name, - shape=None, - initial_value=None): - """Create an optimizer variable from model variable. - - Create an optimizer variable based on the information of model variable. - For example, in SGD optimizer momemtum, for each model variable, a - corresponding momemtum variable is created of the same shape and dtype. - - Args: - model_variable: tf.Variable. The corresponding model variable to the - optimizer variable to be created. - variable_name: String. The name prefix of the optimizer variable to be - created. The create variables name will follow the pattern - `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`. - shape: List or Tuple, defaults to None. The shape of the optimizer - variable to be created. If None, the created variable will have the - same shape as `model_variable`. - initial_value: A Tensor, or Python object convertible to a Tensor, - defaults to None. The initial value of the optimizer variable, if None, - the initial value will be default to 0. - - Returns: - An optimizer variable. - """ - if initial_value is None: - if shape is None: - initial_value = tf.zeros( - shape=model_variable.shape, dtype=model_variable.dtype) - else: - initial_value = tf.zeros(shape, dtype=model_variable.dtype) - return tf.Variable( - initial_value=initial_value, - name=f"{variable_name}/{model_variable._shared_name}", # pylint: disable=protected-access - dtype=model_variable.dtype, - trainable=False) - - def minimize(self, loss, var_list, tape=None): - """Minimize `loss` by updating `var_list`. - - This method simply computes gradient using `tf.GradientTape` and calls - `apply_gradients()`. If you want to process the gradient before applying - then call `tf.GradientTape` and `apply_gradients()` explicitly instead - of using this function. - - Args: - loss: `Tensor` or callable. If a callable, `loss` should take no arguments - and return the value to minimize. - var_list: list or tuple of `Variable` objects to update to minimize - `loss`. - tape: (Optional) `tf.GradientTape`. - - Returns: - None - """ - grads_and_vars = self.compute_gradients(loss, var_list, tape) - self.apply_gradients(grads_and_vars) - - def apply_gradients(self, grads_and_vars): - """Apply gradients to variables. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - - Returns: - None - - Raises: - TypeError: If `grads_and_vars` is malformed. - """ - if isinstance(self._learning_rate, - learning_rate_schedule.LearningRateSchedule): - # Compute the current learning rate at the beginning of variable update. - self._current_learning_rate.assign(self._learning_rate(self.iterations)) - grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) - grads, trainable_variables = zip(*grads_and_vars) - scope_name = self._name or "optimizer" - with tf.name_scope(scope_name): - with tf.init_scope(): - # Lift variable creation to init scope to avoid environment issues. - self.build(trainable_variables) - grads = self._clip_gradients(grads) - grads_and_vars = list(zip(grads, trainable_variables)) - self._internal_apply_gradients(grads_and_vars) - - def _internal_apply_gradients(self, grads_and_vars): - """Helper function of apply gradients. - - This is required for separating out distributed training logic. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - """ - if self.jit_compile: - for grad, var in grads_and_vars: - self._update_step_xla(grad, var, id(self._var_key(var))) - else: - for grad, var in grads_and_vars: - self._update_step(grad, var) - - self.iterations.assign_add(1) - - def _update_model_variables_moving_average(self, var_list): - """Update the stored moving average using the latest value.""" - if self.use_ema: - for (var, average) in zip(var_list, self._model_variables_moving_average): - average.assign(self.ema_momentum * average + - (1 - self.ema_momentum) * var) - - def _overwrite_model_variables_with_average_value(self, var_list): - """Overwrite model variables with its moving average.""" - if len(var_list) != len(self._model_variables_moving_average): - raise ValueError(f"The length of model variables ({len(var_list)}) to " - f"override does not match the length of model variables " - f"stored in the optimizer " - f"({len(self._model_variables_moving_average)}). Please " - f"check if the optimizer was called on your model.") - self._overwrite_model_variables_with_average_value_helper(var_list) - - def _overwrite_model_variables_with_average_value_helper(self, var_list): - """Helper function that overwrites model variables.""" - for var, average_var in zip(var_list, self._model_variables_moving_average): - var.assign(average_var) - - def finalize_variable_values(self, var_list): - """Set the final value of model's trainable variables. - - Sometimes there are some extra steps before ending the variable updates, - such as overriding the model variables with its average value. - - Args: - var_list: list of model variables. - """ - if self.use_ema: - # If the optimizer uses EMA, then when finalizing, we replace the model - # variable value with its moving average stored inside optimizer. - self._overwrite_model_variables_with_average_value(var_list) - - def _serialize_hyperparameter(self, hyperparameter): - """Serialize a hyperparameter that can be a numeric or callable.""" - if isinstance(hyperparameter, learning_rate_schedule.LearningRateSchedule): - return learning_rate_schedule.serialize(hyperparameter) - if isinstance(hyperparameter, tf.Variable): - return hyperparameter.numpy() - if callable(hyperparameter): - return hyperparameter() - return hyperparameter - - def get_config(self): - """Returns the config of the optimizer. - - An optimizer config is a Python dictionary (serializable) - containing the configuration of an optimizer. - The same optimizer can be reinstantiated later - (without any saved state) from this configuration. - - Subclass optimizer should override this method to include other - hyperparameters. - - Returns: - Python dictionary. - """ - config = { - "clipnorm": self.clipnorm, - "global_clipnorm": self.global_clipnorm, - "clipvalue": self.clipvalue, - "use_ema": self.use_ema, - "ema_momentum": self.ema_momentum, - "ema_overwrite_frequency": self.ema_overwrite_frequency, - "jit_compile": self.jit_compile, - } - return config - - @classmethod - def from_config(cls, config): - """Creates an optimizer from its config. - - This method is the reverse of `get_config`, capable of instantiating the - same optimizer from the config dictionary. - - Args: - config: A Python dictionary, typically the output of get_config. - - Returns: - An optimizer instance. - """ - if "learning_rate" in config: - if isinstance(config["learning_rate"], dict): - config["learning_rate"] = learning_rate_schedule.deserialize( - config["learning_rate"]) - return cls(**config) - - -base_optimizer_keyword_args = """name: String. The name to use - for momentum accumulator weights created by - the optimizer. - clipnorm: Float. If set, the gradient of each weight is individually - clipped so that its norm is no higher than this value. - clipvalue: Float. If set, the gradient of each weight is clipped to be no - higher than this value. - global_clipnorm: Float. If set, the gradient of all weights is clipped so - that their global norm is no higher than this value. - use_ema: Boolean, defaults to False. If True, exponential moving average - (EMA) is applied. EMA consists of computing an exponential moving - average of the weights of the model (as the weight values change after - each training batch), and periodically overwriting the weights with - their moving average. - ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. This is - the momentum to use when computing the EMA of the model's weights: - `new_average = ema_momentum * old_average + (1 - ema_momentum) * - current_variable_value`. - ema_overwrite_frequency: Int or None, defaults to None. Only used if - `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, we - overwrite the model variable by its moving average. If None, the optimizer - does not overwrite model variables in the middle of training, and you - need to explicitly overwrite the variables at the end of training - by calling `optimizer.finalize_variable_values()` (which updates the model - variables in-place). When using the built-in `fit()` training loop, this - happens automatically after the last epoch, and you don't need to do - anything. - jit_compile: Boolean, defaults to True. If True, the optimizer will use XLA - compilation. If no GPU device is found, this flag will be ignored. - **kwargs: keyword arguments only used for backward compatibility.""" - - -# pylint: disable=g-classes-have-attributes -@keras_export("keras.optimizers.experimental.Optimizer", v1=[]) -class Optimizer(_BaseOptimizer): - """Abstract optimizer base class. - - This class supports distributed training. If you want to implement your own - optimizer, please subclass this class instead of _BaseOptimizer. - - Args: - {{base_optimizer_keyword_args}} - - ### Usage - - ```python - # Create an optimizer with the desired parameters. - opt = tf.keras.optimizers.experimental.SGD(learning_rate=0.1) - var1, var2 = tf.Variable(1.0), tf.Variable(2.0) - # `loss` is a callable that takes no argument and returns the value - # to minimize. - loss = lambda: 3 * var1 * var1 + 2 * var2 * var2 - # Call minimize to update the list of variables. - opt.minimize(loss, var_list=[var1, var2]) - ``` - - ### Processing gradients before applying them - - Calling `minimize()` takes care of both computing the gradients and - applying them to the variables. If you want to process the gradients - before applying them you can instead use the optimizer in three steps: - - 1. Compute the gradients with `tf.GradientTape`. - 2. Process the gradients as you wish. - 3. Apply the processed gradients with `apply_gradients()`. - - Example: - - ```python - # Create an optimizer. - opt = tf.keras.optimizers.experimental.SGD(learning_rate=0.1) - var1, var2 = tf.Variable(1.0), tf.Variable(2.0) - - # Compute the gradients for a list of variables. - with tf.GradientTape() as tape: - loss = 3 * var1 * var1 + 2 * var2 * var2 - grads = tape.gradient(loss, [var1, var2]) - - # Process the gradients. - grads[0] = grads[0] + 1 - - # Ask the optimizer to apply the gradients on variables. - opt.apply_gradients(zip(grads, [var1, var2])) - ``` - - ### Dynamic learning rate - - Dynamic learning rate can be achieved by setting learning rate as a built-in - or customized `tf.keras.optimizers.schedules.LearningRateSchedule`. - - Example: - - >>> var = tf.Variable(np.random.random(size=(1,))) - >>> learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( - ... initial_learning_rate=.01, decay_steps=20, decay_rate=.1) - >>> opt = tf.keras.optimizers.experimental.SGD(learning_rate=learning_rate) - >>> loss = lambda: 3 * var - >>> opt.minimize(loss, var_list=[var]) - - ### Gradients clipping - - Users can clip the gradients before applying to variables by setting - `clipnorm`, `clipvalue` and `global_clipnorm`. Notice that `clipnorm` and - `global_clipnorm` can only have one being set. - - Example: - - >>> opt = tf.keras.optimizers.experimental.SGD(learning_rate=1, clipvalue=1) - >>> var1, var2 = tf.Variable(2.0), tf.Variable(2.0) - >>> with tf.GradientTape() as tape: - ... loss = 2 * var1 + 2 * var2 - >>> grads = tape.gradient(loss, [var1, var2]) - >>> print([grads[0].numpy(), grads[1].numpy()]) - [2.0, 2.0] - >>> opt.apply_gradients(zip(grads, [var1, var2])) - >>> # Without clipping, we should get [0, 0], but as gradients are clipped to - >>> # have max value 1, we get [1.0, 1.0]. - >>> print([var1.numpy(), var2.numpy()]) - [1.0, 1.0] - - ### Using exponential moving average. - - Empirically it has been found that using the exponential moving average (EMA) - of the trained parameters of a deep network achieves a better performance than - using its trained parameters directly. Keras optimizers allows users to - compute this moving average and overwrite the model variables at desired time. - - Example: - - ```python - # Create an SGD optimizer with EMA on. `ema_momentum` controls the decay rate - # of the moving average. `ema_momentum=1` means no decay and the stored moving - # average is always model variable's initial value before training. Reversely, - # `ema_momentum=0` is equivalent to not using EMA. `ema_overwrite_frequency=3` - # means every 3 iterations, we overwrite the trainable variables with their - # moving average values. - opt = tf.keras.optimizers.experimental.SGD( - learning_rate=1, - use_ema=True, - ema_momentum=0.5, - ema_overwrite_frequency=3) - var1, var2 = tf.Variable(2.0), tf.Variable(2.0) - with tf.GradientTape() as tape: - loss = var1 + var2 - grads = tape.gradient(loss, [var1, var2]) - # First iteration: [var1, var2] = [1.0, 1.0] - opt.apply_gradients(zip(grads, [var1, var2])) - print([var1, var2]) - - # Second iteration: [var1, var2] = [0.0, 0.0] - opt.apply_gradients(zip(grads, [var1, var2])) - print([var1, var2]) - - # Third iteration, without EMA, we should see [var1, var2] = [-1.0, -1.0], - # but overwriting results in [var1, var2] = [-0.125, -0.125]. The full - # calculation for the moving average of var1 is: - # var1=2*0.5**3+1*(1-0.5)*0.5**2+0*(1-0.5)*0.5**1+(-1)*(1-0.5)=-0.125. - opt.apply_gradients(zip(grads, [var1, var2])) - print([var1, var2]) - - ``` - When optimizer is constructed with `use_ema=True`, in custom training loop, - users can explicitly call `finalize_variable_values()` to overwrite trainable - variables with their EMA values. `finalize_variable_values()` is by default - called at the end of `model.fit()`. - - ### Use with `tf.distribute.Strategy` - - This optimizer class is `tf.distribute.Strategy` aware, which means it - automatically sums gradients across all replicas. To aggregate gradients - yourself, call `apply_gradients` with `skip_aggregate_gradients` set to True. - This is useful if you need to process aggregated gradients. - - ```python - # This example is not runnable, it consists of dummy code for simple tutorial. - strategy = tf.distribute.experimental.TPUStrategy() - - with strategy.scope(): - opt = tf.keras.optimizers.experimental.SGD() - model = magic_function_that_returns_model() - gradients = magic_function_that_returns_gradients() - # Custom logic to aggregate gradients. - gradients = strategy.reduce("SUM", gradients, axis=None) - opt.apply_gradients(zip(gradients, model.trainable_variables), - skip_aggregate_gradients=True) - ``` - - ### Creating a custom optimizer - - If you intend to create your own optimization algorithm, please inherit from - this class and override the following methods: - - - `build`: Create your optimizer-related variables, such as `momentums` in - SGD optimizer. - - `update_step`: Implement your optimizer's updating logic. - - `get_config`: serialization of the optimizer, include all hyper - parameters. - - Your optimizer would automatically be compatible with tensorflow distributed - training if you subclass `optimizer_experimental.Optimizer`. - - """ - - def __init__(self, - name, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - **kwargs): - """Create a new Optimizer.""" - - super().__init__(name, clipnorm, clipvalue, global_clipnorm, use_ema, - ema_momentum, ema_overwrite_frequency, jit_compile, - **kwargs) - self._distribution_strategy = tf.distribute.get_strategy() - - def add_variable_from_reference(self, - model_variable, - variable_name, - shape=None, - initial_value=None): - strategy = tf.distribute.get_strategy() - with strategy.extended.colocate_vars_with(model_variable): - return super().add_variable_from_reference(model_variable, variable_name, - shape, initial_value) - - def _var_key(self, variable): - """Get a unique identifier of the given variable.""" - # pylint: disable=protected-access - # Get the distributed variable if it exists. - # TODO(b/197554203): replace _distributed_container() with a public api. - if hasattr(variable, "_distributed_container"): - variable = variable._distributed_container() - return super()._var_key(variable) - - def aggregate_gradients(self, grads_and_vars): - """Aggregate gradients on all devices. - - By default we will perform reduce_sum of gradients across devices. Users can - implement their own aggregation logic by overriding this method. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - - Returns: - List of (gradient, variable) pairs. - """ - return optimizer_utils.all_reduce_sum_gradients(grads_and_vars) - - def apply_gradients(self, grads_and_vars, skip_gradients_aggregation=False): - """Apply gradients to variables. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - skip_gradients_aggregation: If true, gradients aggregation will not be - performed inside optimizer. Usually this arg is set to True when you - write custom code aggregating gradients outside the optimizer. - - Returns: - None - - Raises: - TypeError: If `grads_and_vars` is malformed. - RuntimeError: If called in a cross-replica context. - """ - if not skip_gradients_aggregation: - grads_and_vars = self.aggregate_gradients(grads_and_vars) - super().apply_gradients(grads_and_vars) - - def _internal_apply_gradients(self, grads_and_vars): - tf.__internal__.distribute.interim.maybe_merge_call( - self._distributed_apply_gradients_fn, self._distribution_strategy, - grads_and_vars) - - def _overwrite_model_variables_with_average_value_helper(self, var_list): - """Helper function to _overwrite_model_variables_with_average_value. - - This function overwrites variables on each device. - Args: - var_list: list of model variables. - """ - strategy = self._distribution_strategy - # Override model variable by the stored average value on all devices. - for var, average_var in zip(var_list, self._model_variables_moving_average): - strategy.extended.update( - var, lambda a, b: a.assign(b), args=(average_var,)) - - def _update_model_variables_moving_average(self, var_list): - """Update the stored moving average using the latest value.""" - if self.use_ema: - def update_average(average, var): - average.assign(self.ema_momentum * average + - (1 - self.ema_momentum) * var) - - for (var, average) in zip(var_list, self._model_variables_moving_average): - self._distribution_strategy.extended.update( - average, update_average, args=(var,), group=False) - - def _distributed_apply_gradients_fn(self, distribution, grads_and_vars, - **kwargs): - """`apply_gradients` using a `DistributionStrategy`.""" - - def apply_grad_to_update_var(var, grad): - if self.jit_compile: - return self._update_step_xla(grad, var, id(self._var_key(var))) - else: - return self._update_step(grad, var) - - for grad, var in grads_and_vars: - distribution.extended.update( - var, apply_grad_to_update_var, args=(grad,), group=False) - self.iterations.assign_add(1) - - if self.use_ema: - _, var_list = zip(*grads_and_vars) - self._update_model_variables_moving_average(var_list) - if self.ema_overwrite_frequency: - # Only when self.ema_overwrite_frequency is not None, we overwrite the - # model variables. - should_overwrite_model_vars = ( - self.iterations % self.ema_overwrite_frequency == 0) - tf.cond( - tf.cast(should_overwrite_model_vars, tf.bool), - true_fn=lambda: self._overwrite_model_variables_with_average_value( # pylint: disable=g-long-lambda - var_list), - false_fn=lambda: None) - - -class RestoredOptimizer(Optimizer): - - def __init__(self): - super().__init__("RestoredOptimizer") - - def get_config(self): - raise NotImplementedError( - "Restoring functional Optimizers from SavedModels is not currently " - "supported. Please file a feature request if this limitation bothers " - "you.") - - -# Register the optimizer for loading from saved_model purpose. -tf.__internal__.saved_model.load.register_revived_type( - "experimentalOptimizer", - lambda obj: isinstance(obj, Optimizer), - versions=[ - tf.__internal__.saved_model.load.VersionedTypeRegistration( - object_factory=lambda proto: RestoredOptimizer(), - version=2, - min_producer_version=1, - min_consumer_version=1) - ]) - -Optimizer.__doc__ = Optimizer.__doc__.replace( - "{{base_optimizer_keyword_args}}", base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/optimizer_pss_test.py b/keras/optimizers/optimizer_experimental/optimizer_pss_test.py deleted file mode 100644 index 8cc1ba33f1ac..000000000000 --- a/keras/optimizers/optimizer_experimental/optimizer_pss_test.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Tests for calling optimizer on ParameterServerStrategy.""" - -from absl.testing import parameterized -import keras -from keras.optimizers.optimizer_experimental import adadelta -from keras.optimizers.optimizer_experimental import adagrad -from keras.optimizers.optimizer_experimental import adam -from keras.optimizers.optimizer_experimental import adamax -from keras.optimizers.optimizer_experimental import adamw -from keras.optimizers.optimizer_experimental import ftrl -from keras.optimizers.optimizer_experimental import nadam -from keras.optimizers.optimizer_experimental import rmsprop -from keras.optimizers.optimizer_experimental import sgd -from keras.utils import dataset_creator -from keras.utils import losses_utils -import tensorflow.compat.v2 as tf - -ds_combinations = tf.__internal__.distribute.combinations - -STRATEGIES = [ - ds_combinations.parameter_server_strategy_3worker_2ps_cpu, - ds_combinations.parameter_server_strategy_3worker_2ps_1gpu, -] - -adadelta_fn = tf.__internal__.test.combinations.NamedObject( - "adadelta", - lambda: adadelta.Adadelta( # pylint: disable=g-long-lambda - 0.002, - use_ema=True, - ema_overwrite_frequency=None)) -adagrad_fn = tf.__internal__.test.combinations.NamedObject( - "adagrad", lambda: adagrad.Adagrad(0.002)) -adam_fn = tf.__internal__.test.combinations.NamedObject( - "adam", lambda: adam.Adam(0.002)) -adamax_fn = tf.__internal__.test.combinations.NamedObject( - "adamax", lambda: adamax.Adamax(0.002)) -adamw_fn = tf.__internal__.test.combinations.NamedObject( - "adamw", lambda: adamw.AdamW(0.002, weight_decay=0.004)) -ftrl_fn = tf.__internal__.test.combinations.NamedObject( - "ftrl", lambda: ftrl.Ftrl(0.002)) -nadam_fn = tf.__internal__.test.combinations.NamedObject( - "experimentnadam", lambda: nadam.Nadam(0.002)) -rmsprop_fn = tf.__internal__.test.combinations.NamedObject( - "rmsprop", lambda: rmsprop.RMSprop(0.002)) -sgd_fn = tf.__internal__.test.combinations.NamedObject( - "sgdaverage", - lambda: sgd.SGD( # pylint: disable=g-long-lambda - 0.002, - use_ema=True, - ema_overwrite_frequency=1)) - -OPTIMIZER_FN = [ - adadelta_fn, - adagrad_fn, - adam_fn, - adamax_fn, - adamw_fn, - ftrl_fn, - nadam_fn, - rmsprop_fn, - sgd_fn, -] - - -# TODO(b/228209527): Combine this test with optimizer_test after -# fixing the NCCL issue. -class OptimizerPssTest(tf.test.TestCase, parameterized.TestCase): - - def _get_model(self): - return keras.Sequential( - [keras.layers.Input(shape=(1,)), - keras.layers.Dense(1)]) - - def _get_dataset_fn(self): - - def dataset_fn(_): - x, y = [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0] - ds = tf.data.Dataset.from_tensor_slices((x, y)) - ds = ds.repeat().batch(6) - return ds - - return dataset_fn - - def _verify_accumulators_updated(self, optimizer): - variables = optimizer.variables - for var in variables: - if "iteration" not in var.name and "learning_rate" not in var.name: - # Find a variable not iteration or learning_rate, and verify its value - # is updated (not 0). - self.assertNotAllEqual(var, 0) - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine( - strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN)) - def testGetGradientsInModelPss(self, strategy, optimizer_fn): - with strategy.scope(): - model = self._get_model() - optimizer = optimizer_fn() - ds_fn = self._get_dataset_fn() - if isinstance(strategy, tf.distribute.ParameterServerStrategy): - ds = dataset_creator.DatasetCreator(ds_fn) - else: - ds = ds_fn(None) - model.compile(loss="mse", optimizer=optimizer) - model.fit(ds, epochs=1, steps_per_epoch=5) - - self._verify_accumulators_updated(optimizer) - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine( - strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN)) - def testGetGradientsInCustomTrainingLoopPss(self, strategy, optimizer_fn): - coordinator = ( - tf.distribute.experimental.coordinator.ClusterCoordinator(strategy)) - - with strategy.scope(): - model = self._get_model() - optimizer = optimizer_fn() - - def per_worker_dataset_fn(): - return strategy.distribute_datasets_from_function( - self._get_dataset_fn()) - - ds = coordinator.create_per_worker_dataset(per_worker_dataset_fn) - - @tf.function - def train_step(iterator): - - def replica_fn(data): - features, labels = data - with tf.GradientTape() as tape: - output = model(tf.expand_dims(features, axis=1)) - loss = keras.losses.MeanSquaredError( - reduction=losses_utils.ReductionV2.NONE)(labels, output) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(grads, model.trainable_variables)) - - strategy.run(replica_fn, args=(next(iterator),)) - - for _ in range(3): - coordinator.schedule(train_step, args=(iter(ds),)) - coordinator.join() - self.assertEqual(self.evaluate(optimizer.iterations), 3) - self._verify_accumulators_updated(optimizer) - - -if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/optimizers/optimizer_experimental/optimizer_test.py b/keras/optimizers/optimizer_experimental/optimizer_test.py deleted file mode 100644 index d1998205bcfa..000000000000 --- a/keras/optimizers/optimizer_experimental/optimizer_test.py +++ /dev/null @@ -1,525 +0,0 @@ -"""Tests for the reworked optimizer. - -More context in go/new-keras-optimizer -""" - -import os -import re - -from absl import logging -from absl.testing import parameterized -import keras -from keras.optimizers.optimizer_experimental import adadelta as adadelta_new -from keras.optimizers.optimizer_experimental import adagrad as adagrad_new -from keras.optimizers.optimizer_experimental import adam as adam_new -from keras.optimizers.optimizer_experimental import adamax as adamax_new -from keras.optimizers.optimizer_experimental import adamw as adamw_new -from keras.optimizers.optimizer_experimental import ftrl as ftrl_new -from keras.optimizers.optimizer_experimental import nadam as nadam_new -from keras.optimizers.optimizer_experimental import rmsprop as rmsprop_new -from keras.optimizers.optimizer_experimental import sgd as sgd_new -from keras.optimizers.optimizer_v2 import adadelta as adadelta_old -from keras.optimizers.optimizer_v2 import adagrad as adagrad_old -from keras.optimizers.optimizer_v2 import adam as adam_old -from keras.optimizers.optimizer_v2 import ftrl as ftrl_old -from keras.optimizers.optimizer_v2 import gradient_descent as sgd_old -from keras.optimizers.optimizer_v2 import rmsprop as rmsprop_old -from keras.optimizers.schedules import learning_rate_schedule -from keras.utils import losses_utils -import numpy as np -import tensorflow.compat.v2 as tf - -ds_combinations = tf.__internal__.distribute.combinations - -STRATEGIES = [ - # TODO(b/202992598): Add PSS strategy once the XLA issues is resolved. - ds_combinations.one_device_strategy, - ds_combinations.mirrored_strategy_with_cpu_1_and_2, - ds_combinations.mirrored_strategy_with_two_gpus, - ds_combinations.tpu_strategy, - ds_combinations.cloud_tpu_strategy, - ds_combinations.multi_worker_mirrored_2x1_cpu, - ds_combinations.multi_worker_mirrored_2x2_gpu, - ds_combinations.central_storage_strategy_with_two_gpus, -] - -adadelta_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentaladadelta", - lambda: adadelta_new.Adadelta( # pylint: disable=g-long-lambda - 0.002, - use_ema=True, - ema_overwrite_frequency=None)) -adagrad_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002)) -adam_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentaladam", lambda: adam_new.Adam(0.002)) -adamax_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentaladamax", lambda: adamax_new.Adamax(0.002)) -adamw_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004)) -ftrl_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentalftrl", lambda: ftrl_new.Ftrl(0.002)) -nadam_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentnadam", lambda: nadam_new.Nadam(0.002)) -rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002)) -sgd_new_fn = tf.__internal__.test.combinations.NamedObject( - "experimentalsgdaverage", - lambda: sgd_new.SGD( # pylint: disable=g-long-lambda - 0.002, - use_ema=True, - ema_overwrite_frequency=1)) - -OPTIMIZER_FN = [ - adadelta_new_fn, - adagrad_new_fn, - adam_new_fn, - adamax_new_fn, - adamw_new_fn, - ftrl_new_fn, - nadam_new_fn, - rmsprop_new_fn, - sgd_new_fn, -] - - -class OptimizerFuntionalityTest(tf.test.TestCase, parameterized.TestCase): - """Test the functionality of optimizer.""" - - def testAddVariableFromReference(self): - optimizer = adam_new.Adam() - variable = optimizer.add_variable_from_reference( - tf.Variable(1.0, name="tmp"), "test") - self.assertEqual(variable._shared_name, "test/tmp") - self.assertEqual(self.evaluate(variable), 0) - - def testAddVarialeWithCustomShape(self): - optimizer = adam_new.Adam() - variable = optimizer.add_variable_from_reference( - tf.Variable([1.0, 2.0], name="tmp"), "test", shape=[]) - self.assertEqual(variable, tf.Variable(0.)) - - def testBuildIndexDict(self): - optimizer = adam_new.Adam() - var_list = [tf.Variable(0, name=f"var{i}") for i in range(10)] - optimizer._build_index_dict(var_list) - self.assertEqual(optimizer._index_dict[optimizer._var_key(var_list[7])], 7) - - def testClipNorm(self): - optimizer = adam_new.Adam(clipnorm=1) - grad = [tf.convert_to_tensor([100.0, 100.0])] - clipped_grad = optimizer._clip_gradients(grad) - self.assertAllClose(clipped_grad[0], [2**0.5 / 2, 2**0.5 / 2]) - - def testClipValue(self): - optimizer = adam_new.Adam(clipvalue=1) - grad = [tf.convert_to_tensor([100.0, 100.0])] - clipped_grad = optimizer._clip_gradients(grad) - self.assertAllEqual(clipped_grad[0], [1.0, 1.0]) - - def testWeightDecay(self): - grads, var1, var2, var3 = tf.zeros( - ()), tf.Variable(2.0), tf.Variable(2.0), tf.Variable(2.0) - optimizer_1 = adamw_new.AdamW(learning_rate=0.001, weight_decay=0.004) - optimizer_1.apply_gradients(zip([grads], [var1])) - - optimizer_2 = adamw_new.AdamW(learning_rate=0.001, weight_decay=0.004) - optimizer_2.exclude_from_weight_decay([var2]) - optimizer_2.apply_gradients(zip([grads], [var2])) - - optimizer_3 = adamw_new.AdamW(learning_rate=0.001, weight_decay=0.004) - optimizer_3.build([var3], exclude_from_weight_decay=[var3]) - optimizer_3.apply_gradients(zip([grads], [var3])) - - self.assertEqual(var1, 1.992) - self.assertEqual(var2, 2.0) - self.assertEqual(var3, 2.0) - - def testClipGlobalNorm(self): - optimizer = adam_new.Adam(global_clipnorm=1) - grad = [ - tf.cast([100.0, 100.0], dtype=tf.float32), - tf.cast([100.0, 100.0], dtype=tf.float32) - ] - clipped_grad = optimizer._clip_gradients(grad) - self.assertAllClose(clipped_grad[0], [0.5, 0.5]) - - def testPassingLegacyArgsRaiseWarning(self): - with self.assertLogs(level="WARNING") as log_output: - logging.set_verbosity(logging.WARNING) - _ = adam_new.Adam(clipnorm=1, decay=0.5) - expected_log = "decay is deprecated in" - output = log_output[0][0].message - - self.assertTrue(re.search(expected_log, output)) - - def testPassingLegacyClipnorm(self): - optimizer = adam_new.Adam(clipnorm=1) - self.assertEqual(optimizer.clipnorm, 1) - - def testReturnAllOptimizerVariables(self): - x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) - optimizer = adam_new.Adam() - grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]]) - optimizer.apply_gradients(zip([grads], [x])) - optimizer_variables = optimizer.variables - all_names = [var._shared_name for var in optimizer_variables] - self.assertLen(optimizer_variables, 4) - self.assertCountEqual( - all_names, - ["iteration", "learning_rate", "Adam/m/Variable", "Adam/v/Variable"]) - - def testSetLearningRate(self): - optimizer = adam_new.Adam(learning_rate=1.0) - self.assertIsInstance(optimizer._learning_rate, tf.Variable) - self.assertEqual(self.evaluate(optimizer.learning_rate), 1.0) - optimizer.learning_rate = 2.0 - self.assertEqual(self.evaluate(optimizer.learning_rate), 2.0) - # Test the legacy setter. - optimizer.lr = 3.0 - self.assertEqual(self.evaluate(optimizer.learning_rate), 3.0) - - lr_schedule = learning_rate_schedule.ExponentialDecay( - initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9) - optimizer = adam_new.Adam(learning_rate=lr_schedule) - self.assertIsInstance(optimizer._learning_rate, - learning_rate_schedule.ExponentialDecay) - self.assertEqual(optimizer.learning_rate, 0.01) - # Test the legacy property. - self.assertEqual(optimizer.lr, 0.01) - - x = tf.Variable([1.0, 2.0], dtype=tf.float32) - grads = tf.convert_to_tensor([1.0, 2.0]) - for _ in range(2): - optimizer.apply_gradients(zip([grads], [x])) - self.assertTrue(optimizer.learning_rate < 0.01 and - optimizer.learning_rate > 0.00999) - with self.assertRaisesRegex(TypeError, "This optimizer was created with*"): - optimizer.learning_rate = 2.0 - - def testSetIterations(self): - optimizer = adam_new.Adam(jit_compile=False) - optimizer.iterations = tf.Variable(2, dtype=tf.int32) - self.assertEqual(optimizer.iterations, 2) - var_list = [tf.Variable(2.0), tf.Variable(2.0)] - grads = tf.convert_to_tensor([1.0, 1.0]) - optimizer.apply_gradients(zip(grads, var_list)) - self.assertEqual(optimizer.iterations, 3) - with self.assertRaisesRegex(RuntimeError, "Cannot set*"): - optimizer.iterations = 2 - - def testPassingMissingWDError(self): - with self.assertRaises(ValueError): - _ = adamw_new.AdamW(0.01, weight_decay=None) - - with self.assertRaisesRegex(ValueError, "Missing value of"): - _ = adamw_new.AdamW(0.01, weight_decay=None) - - def testMovingAverageOptimizer(self): - optimizer = sgd_new.SGD( - learning_rate=1, - use_ema=True, - ema_momentum=0.5, - ema_overwrite_frequency=3) - - var1, var2 = tf.Variable(2.0), tf.Variable(2.0) - with tf.GradientTape() as tape: - loss = var1 + var2 - grads = tape.gradient(loss, [var1, var2]) - # First iteration: [var1, var2] = [1.0, 1.0] - optimizer.apply_gradients(zip(grads, [var1, var2])) - self.assertAllEqual([var1.numpy(), var2.numpy()], [1.0, 1.0]) - - # Second iteration: [var1, var2] = [0.0, 0.0] - optimizer.apply_gradients(zip(grads, [var1, var2])) - self.assertAllEqual([var1.numpy(), var2.numpy()], [0.0, 0.0]) - - # Third iteration, without EMA, we should see [var1, var2] = [-1.0, -1.0], - # but overwriting results in [var1, var2] = [-0.125, -0.125]. - optimizer.apply_gradients(zip(grads, [var1, var2])) - self.assertAllEqual([var1.numpy(), var2.numpy()], [-0.125, -0.125]) - - def testGetAndFromConfig(self): - optimizer = adam_new.Adam( - learning_rate=np.float64(0.05), - beta_1=0.7, - beta_2=0.77, - amsgrad=True, - epsilon=0.001, - clipnorm=0.5, - use_ema=True, - ema_momentum=0.5, - ema_overwrite_frequency=50) - config = optimizer.get_config() - expected_config = { - "learning_rate": np.float32(0.05), - "beta_1": 0.7, - "beta_2": 0.77, - "epsilon": 0.001, - "amsgrad": True, - "clipnorm": 0.5, - "global_clipnorm": None, - "clipvalue": None, - "use_ema": True, - "ema_momentum": 0.5, - "ema_overwrite_frequency": 50, - } - self.assertDictContainsSubset(expected_config, config) - restored_optimizer = adam_new.Adam.from_config(config) - self.assertDictEqual(restored_optimizer.get_config(), - optimizer.get_config()) - - def testCheckpointOptimizer(self): - x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) - lr_schedule = learning_rate_schedule.ExponentialDecay( - initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9) - optimizer_1 = adam_new.Adam( - learning_rate=lr_schedule, beta_1=0.8, beta_2=0.888) - grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]]) - - for _ in range(1): - optimizer_1.apply_gradients(zip([grads], [x])) - - # Then save the variable and optimizer to a checkpoint. - checkpoint_1 = tf.train.Checkpoint(var=x, optimizer=optimizer_1) - checkpoint_path = checkpoint_1.save(self.get_temp_dir()) - - # Create a new optimizer and call restore on it (and x) - x2 = tf.Variable([[0., 0.], [0., 0.]], dtype=x.dtype) - optimizer_2 = adam_new.Adam(learning_rate=0.02, beta_1=0.7, beta_2=0.777) - optimizer_2.build([x2]) - checkpoint_2 = tf.train.Checkpoint(var=x2, optimizer=optimizer_2) - checkpoint_2.restore(checkpoint_path) - - self.assertTrue( - (self.evaluate(optimizer_1._momentums._storage[0]) == self.evaluate( - optimizer_2._momentums._storage[0])).all()) - self.assertEqual( - self.evaluate(optimizer_1._iterations), - self.evaluate(optimizer_2._iterations)) - - @parameterized.product(optimizer_fn=OPTIMIZER_FN) - def testSaveAndLoadOptimizerWithModel(self, optimizer_fn): - model = keras.Sequential( - [keras.layers.Input(shape=(1,)), - keras.layers.Dense(1)]) - optimizer = optimizer_fn() - optimizer.clipnorm = 0.1 - x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) - y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) - model.compile(loss="mse", optimizer=optimizer) - model.fit(x, y) - - # Save in h5 format. - path = os.path.join(self.get_temp_dir(), "model.h5") - model.save(path) - loaded_model = keras.models.load_model(path) - loaded_model.load_weights(path) - loaded_optimizer = loaded_model.optimizer - self.assertEqual(type(optimizer), type(loaded_optimizer)) - self.assertEqual(loaded_optimizer.learning_rate, 0.002) - self.assertEqual(loaded_optimizer.clipnorm, 0.1) - - # Save in Keras SavedModel format. - model.fit(x, y) - path = os.path.join(self.get_temp_dir(), "model") - model.save(path) - loaded_model = keras.models.load_model(path) - loaded_model.load_weights(path) - loaded_optimizer = loaded_model.optimizer - self.assertEqual(type(optimizer), type(loaded_optimizer)) - self.assertEqual(loaded_optimizer.learning_rate, 0.002) - self.assertEqual(loaded_optimizer.clipnorm, 0.1) - - @parameterized.product(optimizer_fn=OPTIMIZER_FN) - def testSparseGradientsWorkAsExpected(self, optimizer_fn): - optimizer_1 = optimizer_fn() - optimizer_2 = optimizer_fn() - x1 = tf.Variable(np.ones([5]), dtype=tf.float64) - x2 = tf.Variable(np.ones([5]), dtype=tf.float64) - grads = tf.convert_to_tensor([0, 1., 1.5, 0, 0], dtype=tf.float64) - sparse_grads = tf.IndexedSlices( - tf.convert_to_tensor([1., 1.5], dtype=tf.float64), - tf.convert_to_tensor([1, 2]), - dense_shape=tf.convert_to_tensor([len(grads)])) - for _ in range(5): - optimizer_1.apply_gradients(zip([grads], [x1])) - optimizer_2.apply_gradients(zip([sparse_grads], [x2])) - self.assertAllClose(x1, x2) - - -class OptimizerRegressionTest(tf.test.TestCase, parameterized.TestCase): - """Test optimizer outputs the same numerical results as optimizer_v2.""" - - def _compare_numerical(self, old_optimizer, new_optimizer): - x1 = tf.Variable(np.ones([10]), dtype=tf.float64) - x2 = tf.Variable(np.ones([10]), dtype=tf.float64) - grads = tf.convert_to_tensor(np.arange(0.1, 1.1, 0.1)) - sparse_grads = tf.IndexedSlices( - tf.convert_to_tensor([0, 0.2, 0.4, 0.8], dtype=tf.float64), - tf.convert_to_tensor([0, 2, 4, 6]), - dense_shape=tf.convert_to_tensor([len(grads)])) - - for _ in range(5): - self.assertAllClose(x1, x2) - old_optimizer.apply_gradients(zip([grads], [x1])) - new_optimizer.apply_gradients(zip([grads], [x2])) - - for _ in range(5): - self.assertAllClose(x1, x2) - old_optimizer.apply_gradients(zip([sparse_grads], [x1])) - new_optimizer.apply_gradients(zip([sparse_grads], [x2])) - - def testAdam(self): - self._compare_numerical( - adam_old.Adam(amsgrad=True), adam_new.Adam(amsgrad=True)) - - def testAdadelta(self): - self._compare_numerical(adadelta_old.Adadelta(), adadelta_new.Adadelta()) - - def testAdagrad(self): - self._compare_numerical(adagrad_old.Adagrad(), adagrad_new.Adagrad()) - - def testFtrl(self): - self._compare_numerical(ftrl_old.Ftrl(), ftrl_new.Ftrl()) - - def testRMSprop(self): - self._compare_numerical(rmsprop_old.RMSprop(), rmsprop_new.RMSprop()) - - @parameterized.product(nesterov=[True, False]) - def testSgd(self, nesterov): - self._compare_numerical( - sgd_old.SGD(nesterov=nesterov), sgd_new.SGD(nesterov=nesterov)) - - -class DistributedTrainingTest(tf.test.TestCase, parameterized.TestCase): - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine( - strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN)) - def testGetGradientsInModel(self, strategy, optimizer_fn): - with strategy.scope(): - model = keras.Sequential( - [keras.layers.Input(shape=(1,)), - keras.layers.Dense(1)]) - optimizer = optimizer_fn() - x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) - y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) - model.compile(loss="mse", optimizer=optimizer) - model.fit(x, y, epochs=1, steps_per_epoch=5) - if optimizer.name == "Adam": - # Assert the momentum variable is not 0. - self.assertNotEqual(self.evaluate(optimizer._momentums._storage[0]), 0) - elif optimizer.name == "Adadelta": - # Assert the accumulated variable is not 0. - self.assertNotEqual( - self.evaluate(optimizer._accumulated_grads._storage[0]), 0) - elif optimizer.name == "Adagrad": - # Assert the accumulated variable is not 0. - self.assertNotEqual(self.evaluate(optimizer._accumulators._storage[0]), 0) - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine( - strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN)) - def testGetGradientsInCustomTrainingLoop(self, strategy, optimizer_fn): - with strategy.scope(): - model = keras.Sequential( - [keras.layers.Input(shape=(1,)), - keras.layers.Dense(1)]) - optimizer = optimizer_fn() - - def per_worker_dataset_fn(): - - def dataset_fn(_): - x, y = [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0] - ds = tf.data.Dataset.from_tensor_slices((x, y)) - ds = ds.repeat().batch(6) - return ds - - return strategy.distribute_datasets_from_function(dataset_fn) - - ds = per_worker_dataset_fn() - - @tf.function - def train_step(ds): - - def replica_fn(data): - features, labels = data - with tf.GradientTape() as tape: - output = model(tf.expand_dims(features, axis=1)) - loss = keras.losses.MeanSquaredError( - reduction=losses_utils.ReductionV2.NONE)(labels, output) - grads = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(grads, model.trainable_variables)) - - strategy.run(replica_fn, args=(next(iter(ds)),)) - - for _ in range(3): - train_step(ds) - self.assertEqual(self.evaluate(optimizer.iterations), 3) - - @ds_combinations.generate( - tf.__internal__.test.combinations.combine(strategy=[ - ds_combinations.mirrored_strategy_with_two_gpus, - ds_combinations.tpu_strategy, - ds_combinations.multi_worker_mirrored_2x2_gpu, - ds_combinations.central_storage_strategy_with_two_gpus, - ])) - def testJitCompile(self, strategy): - # Test the optimizer yields same numerical results when jit_compile is - # on and off. - with strategy.scope(): - optimizer_1 = adam_new.Adam( - jit_compile=False, use_ema=True, ema_overwrite_frequency=1) - optimizer_2 = adam_new.Adam( - jit_compile=True, use_ema=True, ema_overwrite_frequency=1) - model_1 = keras.Sequential([ - keras.layers.Input(shape=(2,)), - keras.layers.Dense(5), - keras.layers.Dense(1) - ]) - model_2 = keras.models.clone_model(model_1) - model_2.set_weights(model_1.get_weights()) - - def per_worker_dataset_fn(): - - def dataset_fn(_): - x = np.random.rand(6, 2) - y = [1, 1, 1, 0, 0, 0] - ds = tf.data.Dataset.from_tensor_slices((x, y)) - ds = ds.repeat().batch(6) - return ds - - return strategy.distribute_datasets_from_function(dataset_fn) - - ds = per_worker_dataset_fn() - - @tf.function - def train_step(ds): - - def replica_fn(data): - features, labels = data - with tf.GradientTape() as tape: - output_1 = model_1(features) - loss_1 = keras.losses.MeanSquaredError( - reduction=losses_utils.ReductionV2.NONE)(labels, output_1) - grads_1 = tape.gradient(loss_1, model_1.trainable_variables) - optimizer_1.apply_gradients(zip(grads_1, model_1.trainable_variables)) - - with tf.GradientTape() as tape: - output_2 = model_2(features) - loss_2 = keras.losses.MeanSquaredError( - reduction=losses_utils.ReductionV2.NONE)(labels, output_2) - grads_2 = tape.gradient(loss_2, model_2.trainable_variables) - optimizer_2.apply_gradients(zip(grads_2, model_2.trainable_variables)) - - strategy.run(replica_fn, args=(next(iter(ds)),)) - - for _ in range(3): - train_step(ds) - self.assertAllClose(model_1.trainable_variables[0][0], - model_2.trainable_variables[0][0]) - - -if __name__ == "__main__": - tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/optimizers/optimizer_experimental/rmsprop.py b/keras/optimizers/optimizer_experimental/rmsprop.py deleted file mode 100644 index dbfbf1ba30b0..000000000000 --- a/keras/optimizers/optimizer_experimental/rmsprop.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""RMSprop optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.RMSprop', v1=[]) -class RMSprop(optimizer.Optimizer): - r"""Optimizer that implements the RMSprop algorithm. - - The gist of RMSprop is to: - - - Maintain a moving (discounted) average of the square of gradients - - Divide the gradient by the root of this average - - This implementation of RMSprop uses plain momentum, not Nesterov momentum. - - The centered version additionally maintains a moving average of the - gradients, and uses that average to estimate the variance. - - Args: - learning_rate: Initial value for the learning rate: - either a floating point value, - or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. - Defaults to 0.001. - rho: float, defaults to 0.9. Discounting factor for the old gradients. - momentum: float, defaults to 0.0. If not 0.0., the optimizer tracks the - momentum value, with a decay rate equals to `1 - momentum`. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - centered: Boolean. If `True`, gradients are normalized by the estimated - variance of the gradient; if False, by the uncentered second moment. - Setting this to `True` may help with training, but is slightly more - expensive in terms of computation and memory. Defaults to `False`. - {{base_optimizer_keyword_args}} - - Usage: - - >>> opt = tf.keras.optimizers.RMSprop(learning_rate=0.1) - >>> var1 = tf.Variable(10.0) - >>> loss = lambda: (var1 ** 2) / 2.0 # d(loss) / d(var1) = var1 - >>> step_count = opt.minimize(loss, [var1]).numpy() - >>> var1.numpy() - 9.683772 - - Reference: - - [Hinton, 2012]( - http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) - - """ - - def __init__(self, - learning_rate=0.001, - rho=0.9, - momentum=0.0, - epsilon=1e-7, - centered=False, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=100, - jit_compile=True, - name='RMSprop', - **kwargs): - super().__init__( - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - name=name, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.rho = rho - self.momentum = momentum - self.epsilon = epsilon - self.centered = centered - - def build(self, var_list): - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self._built = True - - self._velocities = [] - for var in var_list: - self._velocities.append( - self.add_variable_from_reference(var, 'velocity')) - - self._momentums = [] - if self.momentum > 0: - for var in var_list: - self._momentums.append( - self.add_variable_from_reference(var, 'momentum')) - - self._average_gradients = [] - if self.centered: - for var in var_list: - self._average_gradients.append( - self.add_variable_from_reference(var, 'average_gradient')) - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - lr = tf.cast(self.learning_rate, variable.dtype) - - var_key = self._var_key(variable) - velocity = self._velocities[self._index_dict[var_key]] - momentum = None - if self.momentum > 0: - momentum = self._momentums[self._index_dict[var_key]] - average_grad = None - if self.centered: - average_grad = self._average_gradients[self._index_dict[var_key]] - - rho = self.rho - - if isinstance(gradient, tf.IndexedSlices): - # Sparse gradients. - velocity.assign(rho * velocity) - velocity.scatter_add(tf.IndexedSlices( - tf.square(gradient.values) * (1 - rho), gradient.indices)) - if self.centered: - average_grad.assign(rho * average_grad) - average_grad.scatter_add( - tf.IndexedSlices( - tf.square(gradient.values) * (1 - rho), gradient.indices)) - velocity.assign_add(-tf.square(average_grad)) - velocity_value = tf.gather(velocity, gradient.indices) - transformed_grad = tf.IndexedSlices( - gradient.values / (tf.sqrt(velocity_value) + self.epsilon), - gradient.indices) - - if self.momentum > 0: - momentum.assign(self.momentum * momentum) - momentum.scatter_add(transformed_grad) - variable.assign_add(-lr * momentum) - else: - variable.scatter_add( - tf.IndexedSlices(-lr * transformed_grad.values, - transformed_grad.indices)) - else: - # Dense gradients. - velocity.assign(rho * velocity + (1 - rho) * tf.square(gradient)) - if self.centered: - average_grad.assign(rho * average_grad + - (1 - rho) * tf.square(gradient)) - velocity.assign_add(-tf.square(average_grad)) - transformed_grad = gradient / (tf.sqrt(velocity) + self.epsilon) - if self.momentum > 0: - momentum.assign(self.momentum * momentum + transformed_grad) - variable.assign_add(-lr * momentum) - else: - variable.assign_add(-lr * transformed_grad) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'rho': self.rho, - 'momentum': self.momentum, - 'epsilon': self.epsilon, - 'centered': self.centered, - }) - return config - - -RMSprop.__doc__ = RMSprop.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_experimental/sgd.py b/keras/optimizers/optimizer_experimental/sgd.py deleted file mode 100644 index c2bb7ce15210..000000000000 --- a/keras/optimizers/optimizer_experimental/sgd.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SGD optimizer implementation.""" - -from keras.optimizers.optimizer_experimental import optimizer -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@generic_utils.register_keras_serializable() -@keras_export('keras.optimizers.experimental.SGD', v1=[]) -class SGD(optimizer.Optimizer): - r"""Gradient descent (with momentum) optimizer. - - Update rule for parameter `w` with gradient `g` when `momentum` is 0: - - ```python - w = w - learning_rate * g - ``` - - Update rule when `momentum` is larger than 0: - - ```python - velocity = momentum * velocity - learning_rate * g - w = w + velocity - ``` - - When `nesterov=True`, this rule becomes: - - ```python - velocity = momentum * velocity - learning_rate * g - w = w + momentum * velocity - learning_rate * g - ``` - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - momentum: float hyperparameter >= 0 that accelerates gradient descent - in the relevant - direction and dampens oscillations. Defaults to 0, i.e., vanilla gradient - descent. - nesterov: boolean. Whether to apply Nesterov momentum. - Defaults to `False`. - {{base_optimizer_keyword_args}} - - Usage: - - >>> opt = tf.keras.optimizers.SGD(learning_rate=0.1) - >>> var = tf.Variable(1.0) - >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 - >>> step_count = opt.minimize(loss, [var]).numpy() - >>> # Step is `- learning_rate * grad` - >>> var.numpy() - 0.9 - - >>> opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9) - >>> var = tf.Variable(1.0) - >>> val0 = var.value() - >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 - >>> # First step is `- learning_rate * grad` - >>> step_count = opt.minimize(loss, [var]).numpy() - >>> val1 = var.value() - >>> (val0 - val1).numpy() - 0.1 - >>> # On later steps, step-size increases because of momentum - >>> step_count = opt.minimize(loss, [var]).numpy() - >>> val2 = var.value() - >>> (val1 - val2).numpy() - 0.18 - - Reference: - - For `nesterov=True`, See [Sutskever et al., 2013]( - http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). - """ - - def __init__(self, - learning_rate=0.01, - momentum=0.0, - nesterov=False, - amsgrad=False, - clipnorm=None, - clipvalue=None, - global_clipnorm=None, - use_ema=False, - ema_momentum=0.99, - ema_overwrite_frequency=None, - jit_compile=True, - name='SGD', - **kwargs): - super().__init__( - name=name, - clipnorm=clipnorm, - clipvalue=clipvalue, - global_clipnorm=global_clipnorm, - use_ema=use_ema, - ema_momentum=ema_momentum, - ema_overwrite_frequency=ema_overwrite_frequency, - jit_compile=jit_compile, - **kwargs) - self._learning_rate = self._build_learning_rate(learning_rate) - self.momentum = momentum - self.nesterov = nesterov - if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): - raise ValueError('`momentum` must be between [0, 1].') - - def build(self, var_list): - """Initialize optimizer variables. - - SGD optimizer has one variable `momentums`, only set if `self.momentum` - is not 0. - - Args: - var_list: list of model variables to build SGD variables on. - """ - super().build(var_list) - if hasattr(self, '_built') and self._built: - return - self.momentums = [] - if self.momentum != 0: - for var in var_list: - self.momentums.append( - self.add_variable_from_reference( - model_variable=var, variable_name='m')) - self._built = True - - def update_step(self, gradient, variable): - """Update step given gradient and the associated model variable.""" - lr = tf.cast(self.learning_rate, variable.dtype) - m = None - var_key = self._var_key(variable) - if self.momentum != 0: - momentum = tf.cast(self.momentum, variable.dtype) - m = self.momentums[self._index_dict[var_key]] - - # TODO(b/204321487): Add nesterov acceleration. - if isinstance(gradient, tf.IndexedSlices): - # Sparse gradients. - add_value = tf.IndexedSlices(-gradient.values * lr, gradient.indices) - if m is not None: - m.assign(m * momentum) - m.scatter_add(add_value) - if self.nesterov: - variable.scatter_add(add_value) - variable.assign_add(m * momentum) - else: - variable.assign_add(m) - else: - variable.scatter_add(add_value) - else: - # Dense gradients - if m is not None: - m.assign(-gradient * lr + m * momentum) - if self.nesterov: - variable.assign_add(-gradient * lr + m * momentum) - else: - variable.assign_add(m) - else: - variable.assign_add(-gradient * lr) - - def get_config(self): - config = super().get_config() - - config.update({ - 'learning_rate': self._serialize_hyperparameter(self._learning_rate), - 'momentum': self.momentum, - 'nesterov': self.nesterov, - }) - return config - - -SGD.__doc__ = SGD.__doc__.replace( - '{{base_optimizer_keyword_args}}', optimizer.base_optimizer_keyword_args) diff --git a/keras/optimizers/optimizer_pss_test.py b/keras/optimizers/optimizer_pss_test.py new file mode 100644 index 000000000000..f4ff19c98bb5 --- /dev/null +++ b/keras/optimizers/optimizer_pss_test.py @@ -0,0 +1,165 @@ +"""Tests for calling optimizer on ParameterServerStrategy.""" + +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras.optimizers import adadelta +from keras.optimizers import adagrad +from keras.optimizers import adam +from keras.optimizers import adamax +from keras.optimizers import adamw +from keras.optimizers import ftrl +from keras.optimizers import lion +from keras.optimizers import nadam +from keras.optimizers import rmsprop +from keras.optimizers import sgd +from keras.utils import dataset_creator +from keras.utils import losses_utils + +ds_combinations = tf.__internal__.distribute.combinations + +STRATEGIES = [ + ds_combinations.parameter_server_strategy_3worker_2ps_cpu, + ds_combinations.parameter_server_strategy_3worker_2ps_1gpu, +] + +adadelta_fn = tf.__internal__.test.combinations.NamedObject( + "adadelta", + lambda: adadelta.Adadelta( + 0.002, use_ema=True, ema_overwrite_frequency=None + ), +) +adagrad_fn = tf.__internal__.test.combinations.NamedObject( + "adagrad", lambda: adagrad.Adagrad(0.002) +) +adam_fn = tf.__internal__.test.combinations.NamedObject( + "adam", lambda: adam.Adam(0.002) +) +adamax_fn = tf.__internal__.test.combinations.NamedObject( + "adamax", lambda: adamax.Adamax(0.002) +) +adamw_fn = tf.__internal__.test.combinations.NamedObject( + "adamw", lambda: adamw.AdamW(0.002, weight_decay=0.004) +) +ftrl_fn = tf.__internal__.test.combinations.NamedObject( + "ftrl", lambda: ftrl.Ftrl(0.002) +) +lion_fn = tf.__internal__.test.combinations.NamedObject( + "lion", lambda: lion.Lion(0.002) +) +nadam_fn = tf.__internal__.test.combinations.NamedObject( + "experimentnadam", lambda: nadam.Nadam(0.002) +) +rmsprop_fn = tf.__internal__.test.combinations.NamedObject( + "rmsprop", lambda: rmsprop.RMSprop(0.002) +) +sgd_fn = tf.__internal__.test.combinations.NamedObject( + "sgdaverage", + lambda: sgd.SGD(0.002, use_ema=True, ema_overwrite_frequency=1), +) + +OPTIMIZER_FN = [ + adadelta_fn, + adagrad_fn, + adam_fn, + adamax_fn, + adamw_fn, + ftrl_fn, + lion_fn, + nadam_fn, + rmsprop_fn, + sgd_fn, +] + + +# TODO(b/228209527): Combine this test with optimizer_test after +# fixing the NCCL issue. +class OptimizerPssTest(tf.test.TestCase, parameterized.TestCase): + def _get_model(self): + return keras.Sequential( + [keras.layers.Input(shape=(1,)), keras.layers.Dense(1)] + ) + + def _get_dataset_fn(self): + def dataset_fn(_): + x, y = [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0] + ds = tf.data.Dataset.from_tensor_slices((x, y)) + ds = ds.repeat().batch(6) + return ds + + return dataset_fn + + def _verify_accumulators_updated(self, optimizer): + variables = optimizer.variables + for var in variables: + if "iteration" not in var.name and "learning_rate" not in var.name: + # Find a variable not iteration or learning_rate, and verify its + # value is updated (not 0). + self.assertNotAllEqual(var, 0) + + @ds_combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN + ) + ) + def testGetGradientsInModelPss(self, strategy, optimizer_fn): + with strategy.scope(): + model = self._get_model() + optimizer = optimizer_fn() + ds_fn = self._get_dataset_fn() + if isinstance(strategy, tf.distribute.ParameterServerStrategy): + ds = dataset_creator.DatasetCreator(ds_fn) + else: + ds = ds_fn(None) + model.compile(loss="mse", optimizer=optimizer) + model.fit(ds, epochs=1, steps_per_epoch=5) + + self._verify_accumulators_updated(optimizer) + + @ds_combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN + ) + ) + def testGetGradientsInCustomTrainingLoopPss(self, strategy, optimizer_fn): + coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator( + strategy + ) + + with strategy.scope(): + model = self._get_model() + optimizer = optimizer_fn() + + def per_worker_dataset_fn(): + return strategy.distribute_datasets_from_function( + self._get_dataset_fn() + ) + + ds = coordinator.create_per_worker_dataset(per_worker_dataset_fn) + + @tf.function + def train_step(iterator): + def replica_fn(data): + features, labels = data + with tf.GradientTape() as tape: + output = model(tf.expand_dims(features, axis=1)) + loss = keras.losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.NONE + )(labels, output) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + zip(grads, model.trainable_variables) + ) + + strategy.run(replica_fn, args=(next(iterator),)) + + for _ in range(3): + coordinator.schedule(train_step, args=(iter(ds),)) + coordinator.join() + self.assertEqual(self.evaluate(optimizer.iterations), 3) + self._verify_accumulators_updated(optimizer) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/optimizers/optimizer_test.py b/keras/optimizers/optimizer_test.py new file mode 100644 index 000000000000..f501038a2cd1 --- /dev/null +++ b/keras/optimizers/optimizer_test.py @@ -0,0 +1,868 @@ +"""Tests for the reworked optimizer. + +More context in go/new-keras-optimizer +""" + +import os +from unittest import mock + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras.optimizers import adadelta as adadelta_new +from keras.optimizers import adafactor as adafactor_new +from keras.optimizers import adagrad as adagrad_new +from keras.optimizers import adam as adam_new +from keras.optimizers import adamax as adamax_new +from keras.optimizers import adamw as adamw_new +from keras.optimizers import ftrl as ftrl_new +from keras.optimizers import lion as lion_new +from keras.optimizers import nadam as nadam_new +from keras.optimizers import rmsprop as rmsprop_new +from keras.optimizers import sgd as sgd_new +from keras.optimizers.legacy import adadelta as adadelta_old +from keras.optimizers.legacy import adagrad as adagrad_old +from keras.optimizers.legacy import adam as adam_old +from keras.optimizers.legacy import ftrl as ftrl_old +from keras.optimizers.legacy import gradient_descent as sgd_old +from keras.optimizers.legacy import rmsprop as rmsprop_old +from keras.optimizers.schedules import learning_rate_schedule +from keras.testing_infra import test_utils +from keras.utils import losses_utils + +ds_combinations = tf.__internal__.distribute.combinations + +STRATEGIES = [ + # TODO(b/202992598): Add PSS strategy once the XLA issues is resolved. + ds_combinations.one_device_strategy, + ds_combinations.mirrored_strategy_with_two_cpus, + ds_combinations.mirrored_strategy_with_two_gpus, + ds_combinations.tpu_strategy, + ds_combinations.cloud_tpu_strategy, + ds_combinations.multi_worker_mirrored_2x1_cpu, + ds_combinations.multi_worker_mirrored_2x2_gpu, + ds_combinations.central_storage_strategy_with_two_gpus, +] + +adadelta_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentaladadelta", + lambda: adadelta_new.Adadelta( + 0.002, use_ema=True, ema_overwrite_frequency=None + ), +) +adagrad_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002) +) +adafactor_new_fn = tf.__internal__.test.combinations.NamedObject( + "adafactor", lambda: adafactor_new.Adafactor(0.002) +) +adam_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentaladam", lambda: adam_new.Adam(0.002) +) +adamax_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentaladamax", lambda: adamax_new.Adamax(0.002) +) +adamw_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004) +) +ftrl_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentalftrl", lambda: ftrl_new.Ftrl(0.002) +) +lion_new_fn = tf.__internal__.test.combinations.NamedObject( + "lion", lambda: lion_new.Lion(0.002) +) +nadam_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentnadam", lambda: nadam_new.Nadam(0.002) +) +rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002) +) +sgd_new_fn = tf.__internal__.test.combinations.NamedObject( + "experimentalsgdaverage", + lambda: sgd_new.SGD( + 0.002, weight_decay=0.004, use_ema=True, ema_overwrite_frequency=1 + ), +) + +OPTIMIZER_FN = [ + adadelta_new_fn, + adagrad_new_fn, + adafactor_new_fn, + adam_new_fn, + adamax_new_fn, + adamw_new_fn, + ftrl_new_fn, + lion_new_fn, + nadam_new_fn, + rmsprop_new_fn, + sgd_new_fn, +] + + +class OptimizerFuntionalityTest(tf.test.TestCase, parameterized.TestCase): + """Test the functionality of optimizer.""" + + def testAddVariableFromReference(self): + optimizer = adam_new.Adam() + variable = optimizer.add_variable_from_reference( + tf.Variable(1.0, name="tmp"), "test" + ) + self.assertEqual(variable._shared_name, "test/tmp") + self.assertEqual(self.evaluate(variable), 0) + + def testAddVarialeWithCustomShape(self): + optimizer = adam_new.Adam() + variable = optimizer.add_variable_from_reference( + tf.Variable([1.0, 2.0], name="tmp"), "test", shape=[] + ) + self.assertEqual(variable, tf.Variable(0.0)) + + def testBuildIndexDict(self): + optimizer = adam_new.Adam() + var_list = [tf.Variable(0, name=f"var{i}") for i in range(10)] + optimizer._build_index_dict(var_list) + self.assertEqual( + optimizer._index_dict[optimizer._var_key(var_list[7])], 7 + ) + + def testComputeGradients(self): + optimizer = adam_new.Adam() + x = tf.Variable([1.0, 2.0], dtype=tf.float32) + loss_fn = lambda: x + # Test Tensor-type var_list. + var_list = [x] + grads_and_vars = optimizer.compute_gradients(loss_fn, var_list) + grads, _ = zip(*grads_and_vars) + self.assertAllEqual(grads[0], tf.constant([1.0, 1.0])) + # Test callable-type var_list, and create variable in loss fn. + x = [] + + def loss_fn(): + variable = tf.Variable([1.0, 2.0], dtype=tf.float32) + x.append(variable) + return variable + + var_list = lambda: x + + grads_and_vars = optimizer.compute_gradients(loss_fn, var_list) + grads, _ = zip(*grads_and_vars) + self.assertAllEqual(grads[0], tf.constant([1.0, 1.0])) + + def testClipNorm(self): + optimizer = adam_new.Adam(clipnorm=1) + grad = [tf.convert_to_tensor([100.0, 100.0])] + clipped_grad = optimizer._clip_gradients(grad) + self.assertAllClose(clipped_grad[0], [2**0.5 / 2, 2**0.5 / 2]) + + def testClipValue(self): + optimizer = adam_new.Adam(clipvalue=1) + grad = [tf.convert_to_tensor([100.0, 100.0])] + clipped_grad = optimizer._clip_gradients(grad) + self.assertAllEqual(clipped_grad[0], [1.0, 1.0]) + + def testWeightDecay(self): + grads, var1, var2, var3 = ( + tf.zeros(()), + tf.Variable(2.0), + tf.Variable(2.0, name="exclude"), + tf.Variable(2.0), + ) + optimizer_1 = adamw_new.AdamW(learning_rate=1, weight_decay=0.004) + optimizer_1.apply_gradients(zip([grads], [var1])) + + optimizer_2 = adamw_new.AdamW(learning_rate=1, weight_decay=0.004) + optimizer_2.exclude_from_weight_decay(var_names=["exclude"]) + optimizer_2.apply_gradients(zip([grads, grads], [var1, var2])) + + optimizer_3 = adamw_new.AdamW(learning_rate=1, weight_decay=0.004) + optimizer_3.exclude_from_weight_decay(var_list=[var3]) + optimizer_3.apply_gradients(zip([grads, grads], [var1, var3])) + + self.assertEqual(var1, 1.9760959) + self.assertEqual(var2, 2.0) + self.assertEqual(var3, 2.0) + + grads, var1, var2, var3 = ( + tf.zeros(()), + tf.Variable(2.0), + tf.Variable(2.0, name="exclude"), + tf.Variable(2.0), + ) + optimizer_1 = sgd_new.SGD(learning_rate=1, weight_decay=0.004) + optimizer_1.apply_gradients(zip([grads], [var1])) + + optimizer_2 = sgd_new.SGD(learning_rate=1, weight_decay=0.004) + optimizer_2.exclude_from_weight_decay(var_names=["exclude"]) + optimizer_2.apply_gradients(zip([grads, grads], [var1, var2])) + + optimizer_3 = sgd_new.SGD(learning_rate=1, weight_decay=0.004) + optimizer_3.exclude_from_weight_decay(var_list=[var3]) + optimizer_3.apply_gradients(zip([grads, grads], [var1, var3])) + + self.assertEqual(var1, 1.9760959) + self.assertEqual(var2, 2.0) + self.assertEqual(var3, 2.0) + + def testClipGlobalNorm(self): + optimizer = adam_new.Adam(global_clipnorm=1) + grad = [ + tf.cast([100.0, 100.0], dtype=tf.float32), + tf.cast([100.0, 100.0], dtype=tf.float32), + ] + clipped_grad = optimizer._clip_gradients(grad) + self.assertAllClose(clipped_grad[0], [0.5, 0.5]) + + def testPassingLegacyArgsRaiseError(self): + with self.assertRaisesRegex(ValueError, "decay is deprecated*"): + _ = adam_new.Adam(clipnorm=1, decay=0.5) + + def testPassingLegacyClipnorm(self): + optimizer = adam_new.Adam(clipnorm=1) + self.assertEqual(optimizer.clipnorm, 1) + + def testReturnAllOptimizerVariables(self): + x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) + optimizer = adam_new.Adam() + grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]]) + optimizer.apply_gradients(zip([grads], [x])) + optimizer_variables = optimizer.variables + all_names = [var._shared_name for var in optimizer_variables] + self.assertLen(optimizer_variables, 3) + self.assertCountEqual( + all_names, + [ + "iteration", + "Adam/m/Variable", + "Adam/v/Variable", + ], + ) + + def testSetWeights(self): + x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) + optimizer_1 = adam_new.Adam() + grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]]) + optimizer_1.apply_gradients(zip([grads], [x])) + optimizer_2 = adam_new.Adam() + with self.assertRaisesRegex(ValueError, "You are calling*"): + optimizer_2.set_weights(optimizer_1.variables) + optimizer_2.build([x]) + optimizer_2.set_weights(optimizer_1.variables) + self.assertAllClose(optimizer_1.variables, optimizer_2.variables) + + def testSetLearningRate(self): + optimizer = adam_new.Adam(learning_rate=1.0) + self.assertIsInstance(optimizer._learning_rate, tf.Variable) + self.assertEqual(self.evaluate(optimizer.learning_rate), 1.0) + optimizer.learning_rate = 2.0 + self.assertEqual(self.evaluate(optimizer.learning_rate), 2.0) + # Test the legacy setter. + optimizer.lr = 3.0 + self.assertEqual(self.evaluate(optimizer.learning_rate), 3.0) + + lr_schedule = learning_rate_schedule.ExponentialDecay( + initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9 + ) + optimizer = adam_new.Adam(learning_rate=lr_schedule) + self.assertIsInstance( + optimizer._learning_rate, learning_rate_schedule.ExponentialDecay + ) + self.assertEqual(optimizer.learning_rate, 0.01) + # Test the legacy property. + self.assertEqual(optimizer.lr, 0.01) + + x = tf.Variable([1.0, 2.0], dtype=tf.float32) + grads = tf.convert_to_tensor([1.0, 2.0]) + for _ in range(2): + optimizer.apply_gradients(zip([grads], [x])) + self.assertTrue( + optimizer.learning_rate < 0.01 and optimizer.learning_rate > 0.00999 + ) + # Check it does not throw error to set `learning_rate` by a + # LearningRateScheduler instance. + optimizer.learning_rate = learning_rate_schedule.ExponentialDecay( + initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9 + ) + with self.assertRaisesRegex( + TypeError, "This optimizer was created with*" + ): + optimizer.learning_rate = 2.0 + + def testSetIterations(self): + optimizer = adam_new.Adam(jit_compile=False) + optimizer.iterations = tf.Variable(2, dtype=tf.int32) + self.assertEqual(optimizer.iterations, 2) + var_list = [tf.Variable(2.0), tf.Variable(2.0)] + grads = tf.convert_to_tensor([1.0, 1.0]) + iterations = optimizer.apply_gradients(zip(grads, var_list)) + self.assertEqual(iterations, 3) + self.assertEqual(optimizer.iterations, 3) + with self.assertRaisesRegex(RuntimeError, "Cannot set*"): + optimizer.iterations = 2 + + def testVariableConstraints(self): + optimizer = adam_new.Adam() + inputs = keras.layers.Input(shape=[1]) + outputs = keras.layers.Dense(1, kernel_constraint="NonNeg")(inputs) + model = keras.models.Model(inputs=inputs, outputs=outputs) + model.trainable_variables[0] = -999999 # Set as a negative number. + grads = [tf.zeros(1, 1), tf.zeros(1)] + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + self.assertEqual(model.trainable_variables[0], 0.0) + + def testNoGradients(self): + optimizer = adam_new.Adam(jit_compile=False) + optimizer.apply_gradients(zip([], [])) + + def testApplyGradientsNameArg(self): + optimizer = adam_new.Adam(jit_compile=False) + var_list = [tf.Variable(2.0), tf.Variable(2.0)] + grads = tf.convert_to_tensor([1.0, 1.0]) + optimizer.apply_gradients(zip(grads, var_list), name="dummy") + self.assertIn("dummy", optimizer._velocities[0].name) + + def testPassingMissingWDError(self): + with self.assertRaises(ValueError): + _ = adamw_new.AdamW(0.01, weight_decay=None) + + with self.assertRaisesRegex(ValueError, "Missing value of"): + _ = adamw_new.AdamW(0.01, weight_decay=None) + + def testMovingAverageOptimizer(self): + optimizer = sgd_new.SGD( + learning_rate=1, + use_ema=True, + ema_momentum=0.5, + ema_overwrite_frequency=3, + ) + + # `var2` does not produce gradients. + var1, var2, var3 = tf.Variable(2.0), tf.Variable(2.0), tf.Variable(2.0) + with tf.GradientTape() as tape: + loss = var1 + var3 + grads = tape.gradient(loss, [var1, var2, var3]) + # First iteration: [var1, var2, var3] = [1.0, 2.0, 1.0] + optimizer.apply_gradients(zip(grads, [var1, var2, var3])) + self.assertAllEqual( + [var1.numpy(), var2.numpy(), var3.numpy()], + [1.0, 2.0, 1.0], + ) + + # Second iteration: [var1, var2, var3] = [0.0, 2.0, 0.0] + optimizer.apply_gradients(zip(grads, [var1, var2, var3])) + self.assertAllEqual( + [var1.numpy(), var2.numpy(), var3.numpy()], + [0.0, 2.0, 0.0], + ) + + # Third iteration, without EMA, we should see [var1, var2, var3] = + # [-1.0, 2.0 -1.0], but overwriting results in [var1, var2] = + # [-0.125, 2.0, -0.125]. + optimizer.apply_gradients(zip(grads, [var1, var2, var3])) + self.assertAllEqual( + [var1.numpy(), var2.numpy(), var3.numpy()], + [-0.125, 2.0, -0.125], + ) + + def testGetAndFromConfig(self): + class CustomLRSchedule(learning_rate_schedule.LearningRateSchedule): + def __init__(self, initial_learning_rate): + self.initial_learning_rate = initial_learning_rate + + def __call__(self, step): + step = tf.cast(step, tf.float32) + return self.initial_learning_rate / (step + 1) + + def get_config(self): + return {"initial_learning_rate": self.initial_learning_rate} + + learning_rate = CustomLRSchedule(0.05) + optimizer = adam_new.Adam( + learning_rate=learning_rate, + beta_1=0.7, + beta_2=0.77, + amsgrad=True, + epsilon=0.001, + clipnorm=0.5, + use_ema=True, + ema_momentum=0.5, + ema_overwrite_frequency=50, + name="custom_adam", + ) + config = optimizer.get_config() + expected_config = { + "name": "custom_adam", + "beta_1": 0.7, + "beta_2": 0.77, + "epsilon": 0.001, + "amsgrad": True, + "clipnorm": 0.5, + "global_clipnorm": None, + "clipvalue": None, + "use_ema": True, + "ema_momentum": 0.5, + "ema_overwrite_frequency": 50, + "is_legacy_optimizer": False, + } + expected_learning_rate = { + "class_name": "CustomLRSchedule", + "config": {"initial_learning_rate": 0.05}, + "module": None, + "registered_name": "CustomLRSchedule", + } + self.assertDictContainsSubset(expected_config, config) + self.assertDictEqual(expected_learning_rate, config["learning_rate"]) + + restored_optimizer = adam_new.Adam.from_config( + config, custom_objects={"CustomLRSchedule": CustomLRSchedule} + ) + self.assertDictEqual( + restored_optimizer.get_config(), optimizer.get_config() + ) + + def testCheckpointOptimizer(self): + x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) + lr_schedule = learning_rate_schedule.ExponentialDecay( + initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9 + ) + optimizer_1 = adam_new.Adam( + learning_rate=lr_schedule, beta_1=0.8, beta_2=0.888 + ) + grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]]) + + for _ in range(1): + optimizer_1.apply_gradients(zip([grads], [x])) + + # Then save the variable and optimizer to a checkpoint. + checkpoint_1 = tf.train.Checkpoint(var=x, optimizer=optimizer_1) + checkpoint_path = checkpoint_1.save(self.get_temp_dir()) + + # Create a new optimizer and call restore on it (and x) + x2 = tf.Variable([[0.0, 0.0], [0.0, 0.0]], dtype=x.dtype) + optimizer_2 = adam_new.Adam( + learning_rate=lr_schedule, beta_1=0.8, beta_2=0.888 + ) + checkpoint_2 = tf.train.Checkpoint(var=x2, optimizer=optimizer_2) + checkpoint_2.restore(checkpoint_path) + + for _ in range(2): + optimizer_1.apply_gradients(zip([grads], [x])) + optimizer_2.apply_gradients(zip([grads], [x])) + + self.assertTrue( + ( + self.evaluate(optimizer_1._momentums._storage[0]) + == self.evaluate(optimizer_2._momentums._storage[0]) + ).all() + ) + self.assertEqual( + self.evaluate(optimizer_1._iterations), + self.evaluate(optimizer_2._iterations), + ) + + def testCheckpointOptimizerWithModel(self): + inputs = keras.layers.Input(shape=(1,)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + optimizer = adamax_new_fn() + x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + model.compile(loss="mse", optimizer=optimizer) + path = os.path.join(self.get_temp_dir(), "ckpt") + checkpoint_callback = keras.callbacks.ModelCheckpoint(path) + model.fit(x, y, callbacks=[checkpoint_callback]) + + new_model = keras.Model(inputs=inputs, outputs=outputs) + new_optimizer = adamax_new_fn() + new_model.compile(loss="mse", optimizer=new_optimizer) + new_model.load_weights(path) + self.assertEqual( + new_model.optimizer.iterations.numpy(), + model.optimizer.iterations.numpy(), + ) + + def testRestoreOldOptimizerCheckpoint(self): + inputs = keras.layers.Input(shape=(1,)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + optimizer = adam_old.Adam() + x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + model.compile(loss="mse", optimizer=optimizer) + path = os.path.join(self.get_temp_dir(), "ckpt") + checkpoint_callback = keras.callbacks.ModelCheckpoint(path) + model.fit(x, y, callbacks=[checkpoint_callback]) + + new_model = keras.Model(inputs=inputs, outputs=outputs) + new_optimizer = adam_new.Adam() + new_model.compile(loss="mse", optimizer=new_optimizer) + with self.assertRaisesRegex( + ValueError, "You are trying to restore a checkpoint.*Adam.*" + ): + new_model.load_weights(path) + + @parameterized.product(optimizer_fn=OPTIMIZER_FN) + def testSaveAndLoadOptimizerWithModel(self, optimizer_fn): + inputs = keras.layers.Input(shape=(1,)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs=inputs, outputs=outputs) + optimizer = optimizer_fn() + optimizer.clipnorm = 0.1 + x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + model.compile(loss="mse", optimizer=optimizer) + model.fit(x, y) + + # Save in h5 format. + path = os.path.join(self.get_temp_dir(), "model.h5") + model.save(path) + loaded_model = keras.models.load_model(path) + loaded_model.load_weights(path) + loaded_optimizer = loaded_model.optimizer + self.assertEqual(type(optimizer), type(loaded_optimizer)) + self.assertEqual(loaded_optimizer.learning_rate, 0.002) + self.assertEqual(loaded_optimizer.clipnorm, 0.1) + self.assertAllClose(optimizer.variables, loaded_optimizer.variables) + + # Save in Keras SavedModel format. + model.fit(x, y) + path = os.path.join(self.get_temp_dir(), "model") + model.save(path) + loaded_model = keras.models.load_model(path) + loaded_model.load_weights(path) + loaded_optimizer = loaded_model.optimizer + self.assertEqual(type(optimizer), type(loaded_optimizer)) + self.assertEqual(loaded_optimizer.learning_rate, 0.002) + self.assertEqual(loaded_optimizer.clipnorm, 0.1) + loaded_optimizer.build(loaded_model.trainable_variables) + self.assertAllClose(optimizer.variables, loaded_optimizer.variables) + + # Save in `.keras` format. + path = os.path.join(self.get_temp_dir(), "model.keras") + model.save(path) + loaded_model = keras.models.load_model(path) + loaded_model.load_weights(path) + loaded_optimizer = loaded_model.optimizer + self.assertEqual(type(optimizer), type(loaded_optimizer)) + self.assertEqual(loaded_optimizer.learning_rate, 0.002) + self.assertEqual(loaded_optimizer.clipnorm, 0.1) + self.assertAllClose(optimizer.variables, loaded_optimizer.variables) + + @parameterized.product(optimizer_fn=OPTIMIZER_FN) + def testSparseGradientsWorkAsExpected(self, optimizer_fn): + optimizer_1 = optimizer_fn() + optimizer_2 = optimizer_fn() + x1 = tf.Variable(np.ones([5]), dtype=tf.float64) + x2 = tf.Variable(np.ones([5]), dtype=tf.float64) + grads = tf.convert_to_tensor([0, 1.0, 1.5, 0, 0], dtype=tf.float64) + sparse_grads = tf.IndexedSlices( + tf.convert_to_tensor([1.0, 1.5], dtype=tf.float64), + tf.convert_to_tensor([1, 2]), + dense_shape=tf.convert_to_tensor([len(grads)]), + ) + for _ in range(5): + optimizer_1.apply_gradients(zip([grads], [x1])) + optimizer_2.apply_gradients(zip([sparse_grads], [x2])) + self.assertAllClose(x1, x2) + + @test_utils.run_v2_only + def test_convert_to_legacy_optimizer(self): + if not tf.executing_eagerly(): + # The conversion could only happen in eager mode. + return + optimizer_list = [ + "adadelta", + "adagrad", + "adam", + "adamax", + "nadam", + "rmsprop", + "sgd", + "ftrl", + ] + # Test conversion does not throw errors. + for name in optimizer_list: + experimental_optimizer = keras.optimizers.get( + name, use_legacy_optimizer=False + ) + reference_legacy_optimizer = keras.optimizers.get( + name, use_legacy_optimizer=True + ) + converted_legacy_optimizer = ( + keras.optimizers.convert_to_legacy_optimizer( + experimental_optimizer + ) + ) + self.assertEqual( + type(reference_legacy_optimizer), + type(converted_legacy_optimizer), + ) + self.assertDictEqual( + reference_legacy_optimizer.get_config(), + converted_legacy_optimizer.get_config(), + ) + + lr_schedule = learning_rate_schedule.ExponentialDecay( + initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9 + ) + optimizer = adam_new.Adam(learning_rate=lr_schedule) + legacy_optimizer = keras.optimizers.convert_to_legacy_optimizer( + optimizer + ) + self.assertDictEqual( + optimizer.get_config()["learning_rate"], + legacy_optimizer.get_config()["learning_rate"], + ) + + class CustomLRSchedule(learning_rate_schedule.LearningRateSchedule): + def __init__(self, initial_learning_rate): + self.initial_learning_rate = initial_learning_rate + + def __call__(self, step): + step = tf.cast(step, tf.float32) + return self.initial_learning_rate / (step + 1) + + def get_config(self): + return {"initial_learning_rate": self.initial_learning_rate} + + lr_schedule = CustomLRSchedule(0.001) + optimizer = adam_new.Adam(learning_rate=lr_schedule) + legacy_optimizer = keras.optimizers.convert_to_legacy_optimizer( + optimizer + ) + self.assertDictEqual( + optimizer.get_config()["learning_rate"], + legacy_optimizer.get_config()["learning_rate"], + ) + + @test_utils.run_v2_only + def test_arm_mac_get_legacy_optimizer(self): + with mock.patch( + "platform.system", + mock.MagicMock(return_value="Darwin"), + ): + with mock.patch( + "platform.processor", + mock.MagicMock(return_value="arm"), + ): + optimizer = keras.optimizers.get("adam") + self.assertIsInstance(optimizer, adam_old.Adam) + + +class OptimizerRegressionTest(tf.test.TestCase, parameterized.TestCase): + """Test optimizer outputs the same numerical results as optimizer_v2.""" + + def _compare_numerical(self, old_optimizer, new_optimizer): + x1 = tf.Variable(np.ones([10]), dtype=tf.float64) + x2 = tf.Variable(np.ones([10]), dtype=tf.float64) + grads = tf.convert_to_tensor(np.arange(0.1, 1.1, 0.1)) + first_grads = tf.constant([0.01] * 10, dtype=tf.float64) + sparse_grads = tf.IndexedSlices( + tf.convert_to_tensor([0, 0.2, 0.4, 0.8, 0.8], dtype=tf.float64), + tf.convert_to_tensor([0, 2, 4, 6, 6]), + dense_shape=tf.convert_to_tensor([len(grads)]), + ) + + old_optimizer.apply_gradients(zip([first_grads], [x1])) + new_optimizer.apply_gradients(zip([first_grads], [x2])) + for _ in range(5): + self.assertAllClose(x1, x2, rtol=5e-4, atol=5e-4) + old_optimizer.apply_gradients(zip([grads], [x1])) + new_optimizer.apply_gradients(zip([grads], [x2])) + + for _ in range(5): + self.assertAllClose(x1, x2, rtol=5e-4, atol=5e-4) + old_optimizer.apply_gradients(zip([sparse_grads], [x1])) + new_optimizer.apply_gradients(zip([sparse_grads], [x2])) + + def testAdam(self): + self._compare_numerical( + adam_old.Adam(amsgrad=True), adam_new.Adam(amsgrad=True) + ) + + def testAdadelta(self): + self._compare_numerical( + adadelta_old.Adadelta(), adadelta_new.Adadelta() + ) + + def testAdagrad(self): + self._compare_numerical(adagrad_old.Adagrad(), adagrad_new.Adagrad()) + + def testFtrl(self): + self._compare_numerical(ftrl_old.Ftrl(), ftrl_new.Ftrl()) + + def testRMSprop(self): + self._compare_numerical( + rmsprop_old.RMSprop(centered=True), + rmsprop_new.RMSprop(centered=True), + ) + + @parameterized.product(nesterov=[True, False]) + def testSgd(self, nesterov): + self._compare_numerical( + sgd_old.SGD(nesterov=nesterov), sgd_new.SGD(nesterov=nesterov) + ) + + def testWeightDecay(self): + self._compare_numerical( + adam_new.Adam(learning_rate=1, weight_decay=0.5, epsilon=0), + adamw_new.AdamW(learning_rate=1, weight_decay=0.5, epsilon=0), + ) + + +class DistributedTrainingTest(tf.test.TestCase, parameterized.TestCase): + @ds_combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN + ) + ) + def testGetGradientsInModel(self, strategy, optimizer_fn): + with strategy.scope(): + model = keras.Sequential( + [keras.layers.Input(shape=(1,)), keras.layers.Dense(1)] + ) + optimizer = optimizer_fn() + x = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + y = tf.expand_dims(tf.convert_to_tensor([1, 1, 1, 0, 0, 0]), axis=1) + model.compile(loss="mse", optimizer=optimizer) + model.fit(x, y, epochs=1, steps_per_epoch=5) + if optimizer.name == "Adam": + # Assert the momentum variable is not 0. + self.assertNotEqual( + self.evaluate(optimizer._momentums._storage[0]), 0 + ) + elif optimizer.name == "Adadelta": + # Assert the accumulated variable is not 0. + self.assertNotEqual( + self.evaluate(optimizer._accumulated_grads._storage[0]), 0 + ) + elif optimizer.name == "Adagrad": + # Assert the accumulated variable is not 0. + self.assertNotEqual( + self.evaluate(optimizer._accumulators._storage[0]), 0 + ) + + @ds_combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=STRATEGIES, optimizer_fn=OPTIMIZER_FN + ) + ) + def testGetGradientsInCustomTrainingLoop(self, strategy, optimizer_fn): + with strategy.scope(): + model = keras.Sequential( + [keras.layers.Input(shape=(1,)), keras.layers.Dense(1)] + ) + optimizer = optimizer_fn() + + def per_worker_dataset_fn(): + def dataset_fn(_): + x, y = [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0] + ds = tf.data.Dataset.from_tensor_slices((x, y)) + ds = ds.repeat().batch(6) + return ds + + return strategy.distribute_datasets_from_function(dataset_fn) + + ds = per_worker_dataset_fn() + + @tf.function + def train_step(ds): + def replica_fn(data): + features, labels = data + with tf.GradientTape() as tape: + output = model(tf.expand_dims(features, axis=1)) + loss = keras.losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.NONE + )(labels, output) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + zip(grads, model.trainable_variables) + ) + + strategy.run(replica_fn, args=(next(iter(ds)),)) + + for _ in range(3): + train_step(ds) + self.assertEqual(self.evaluate(optimizer.iterations), 3) + + @ds_combinations.generate( + tf.__internal__.test.combinations.combine( + strategy=[ + ds_combinations.mirrored_strategy_with_two_gpus, + ds_combinations.tpu_strategy, + ds_combinations.multi_worker_mirrored_2x2_gpu, + ds_combinations.central_storage_strategy_with_two_gpus, + ] + ) + ) + def testJitCompile(self, strategy): + # Test the optimizer yields same numerical results when jit_compile is + # on and off. + with strategy.scope(): + optimizer_1 = adam_new.Adam( + jit_compile=False, use_ema=True, ema_overwrite_frequency=1 + ) + optimizer_2 = adam_new.Adam( + jit_compile=True, use_ema=True, ema_overwrite_frequency=1 + ) + model_1 = keras.Sequential( + [ + keras.layers.Input(shape=(2,)), + keras.layers.Dense(5), + keras.layers.Dense(1), + ] + ) + model_2 = keras.models.clone_model(model_1) + model_2.set_weights(model_1.get_weights()) + + def per_worker_dataset_fn(): + def dataset_fn(_): + x = np.random.rand(6, 2) + y = [1, 1, 1, 0, 0, 0] + ds = tf.data.Dataset.from_tensor_slices((x, y)) + ds = ds.repeat().batch(6) + return ds + + return strategy.distribute_datasets_from_function(dataset_fn) + + ds = per_worker_dataset_fn() + + @tf.function + def train_step(ds): + def replica_fn(data): + features, labels = data + with tf.GradientTape() as tape: + output_1 = model_1(features) + loss_1 = keras.losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.NONE + )(labels, output_1) + grads_1 = tape.gradient(loss_1, model_1.trainable_variables) + optimizer_1.apply_gradients( + zip(grads_1, model_1.trainable_variables), + skip_gradients_aggregation=False, + ) + + with tf.GradientTape() as tape: + output_2 = model_2(features) + loss_2 = keras.losses.MeanSquaredError( + reduction=losses_utils.ReductionV2.NONE + )(labels, output_2) + grads_2 = tape.gradient(loss_2, model_2.trainable_variables) + optimizer_2.apply_gradients( + zip(grads_2, model_2.trainable_variables), + experimental_aggregate_gradients=True, + ) + + strategy.run(replica_fn, args=(next(iter(ds)),)) + + for _ in range(3): + train_step(ds) + self.assertAllClose( + model_1.trainable_variables[0][0], + model_2.trainable_variables[0][0], + ) + + +if __name__ == "__main__": + tf.__internal__.distribute.multi_process_runner.test_main() diff --git a/keras/optimizers/optimizer_v1.py b/keras/optimizers/optimizer_v1.py index a366b2154d2e..5cb3544ecf9e 100644 --- a/keras/optimizers/optimizer_v1.py +++ b/keras/optimizers/optimizer_v1.py @@ -12,828 +12,913 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=g-classes-have-attributes + + """Legacy v1 optimizer classes. For more examples see the base class `tf.compat.v1.keras.optimizers.Optimizer`. """ import tensorflow.compat.v2 as tf + from keras import backend class Optimizer: - """Abstract optimizer base class. - - Note: this is the parent class of all optimizers, not an actual optimizer - that can be used for training models. - - All Keras optimizers support the following keyword arguments: - - clipnorm: float >= 0. Gradients will be clipped - when their L2 norm exceeds this value. - clipvalue: float >= 0. Gradients will be clipped - when their absolute value exceeds this value. - """ - - def __init__(self, **kwargs): - allowed_kwargs = {'clipnorm', 'clipvalue'} - for k in kwargs: - if k not in allowed_kwargs: - raise TypeError('Unexpected keyword argument ' - 'passed to optimizer: ' + str(k)) - # checks that clipnorm >= 0 and clipvalue >= 0 - if kwargs[k] < 0: - raise ValueError('Expected {} >= 0, received: {}'.format(k, kwargs[k])) - self.__dict__.update(kwargs) - self.updates = [] - self.weights = [] - - # Set this to False, indicating `apply_gradients` does not take the - # `experimental_aggregate_gradients` argument. - _HAS_AGGREGATE_GRAD = False - - def _create_all_weights(self, params): - """Creates and sets all optimizer weights. + """Abstract optimizer base class. - Args: - params: list or tuple of `Variable` objects that will be minimized - using this optimizer. + Note: this is the parent class of all optimizers, not an actual optimizer + that can be used for training models. - Returns: - Specific weight values that are used in `get_updates` - """ - raise NotImplementedError + All Keras optimizers support the following keyword arguments: - def get_updates(self, loss, params): - raise NotImplementedError + clipnorm: float >= 0. Gradients will be clipped + when their L2 norm exceeds this value. + clipvalue: float >= 0. Gradients will be clipped + when their absolute value exceeds this value. + """ - def get_gradients(self, loss, params): - """Returns gradients of `loss` with respect to `params`. + def __init__(self, **kwargs): + allowed_kwargs = {"clipnorm", "clipvalue"} + for k in kwargs: + if k not in allowed_kwargs: + raise TypeError( + "Unexpected keyword argument passed to optimizer: " + str(k) + ) + # checks that clipnorm >= 0 and clipvalue >= 0 + if kwargs[k] < 0: + raise ValueError(f"Expected {k} >= 0, received: {kwargs[k]}") + self.__dict__.update(kwargs) + self.updates = [] + self.weights = [] + + # Set this to False, indicating `apply_gradients` does not take the + # `experimental_aggregate_gradients` argument. + _HAS_AGGREGATE_GRAD = False + + def _create_all_weights(self, params): + """Creates and sets all optimizer weights. + + Args: + params: list or tuple of `Variable` objects that will be minimized + using this optimizer. + + Returns: + Specific weight values that are used in `get_updates` + """ + raise NotImplementedError + + def get_updates(self, loss, params): + raise NotImplementedError + + def get_gradients(self, loss, params): + """Returns gradients of `loss` with respect to `params`. + + Args: + loss: Loss tensor. + params: List of variables. + + Returns: + List of gradient tensors. + + Raises: + ValueError: In case any gradient cannot be computed (e.g. if + gradient function not implemented). + """ + grads = backend.gradients(loss, params) + if any(g is None for g in grads): + raise ValueError( + "An operation has `None` for gradient. " + "Please make sure that all of your ops have a " + "gradient defined (i.e. are differentiable). " + "Common ops without gradient: " + "backend.argmax, backend.round, backend.eval." + ) + if hasattr(self, "clipnorm"): + grads = [tf.clip_by_norm(g, self.clipnorm) for g in grads] + if hasattr(self, "clipvalue"): + grads = [ + tf.clip_by_value(g, -self.clipvalue, self.clipvalue) + for g in grads + ] + return grads + + def set_weights(self, weights): + """Sets the weights of the optimizer, from Numpy arrays. + + Should only be called after computing the gradients + (otherwise the optimizer has no weights). + + Args: + weights: a list of Numpy arrays. The number of arrays and their + shape must match number of the dimensions of the weights of the + optimizer (i.e. it should match the output of `get_weights`). + + Raises: + ValueError: in case of incompatible weight shapes. + """ + params = self.weights + if len(params) != len(weights): + raise ValueError( + "Length of the specified weight list (" + + str(len(weights)) + + ") does not match the number of weights of the optimizer (" + + str(len(params)) + + ")" + ) + weight_value_tuples = [] + param_values = backend.batch_get_value(params) + for pv, p, w in zip(param_values, params, weights): + if pv.shape != w.shape: + raise ValueError( + "Optimizer weight shape " + + str(pv.shape) + + " not compatible with provided weight shape " + + str(w.shape) + ) + weight_value_tuples.append((p, w)) + backend.batch_set_value(weight_value_tuples) + + def get_weights(self): + """Returns the current value of the weights of the optimizer. + + Returns: + A list of numpy arrays. + """ + return backend.batch_get_value(self.weights) + + def get_config(self): + config = {} + if hasattr(self, "clipnorm"): + config["clipnorm"] = self.clipnorm + if hasattr(self, "clipvalue"): + config["clipvalue"] = self.clipvalue + return config + + @classmethod + def from_config(cls, config): + return cls(**config) - Args: - loss: Loss tensor. - params: List of variables. - Returns: - List of gradient tensors. +class SGD(Optimizer): + """Stochastic gradient descent optimizer. - Raises: - ValueError: In case any gradient cannot be computed (e.g. if gradient - function not implemented). - """ - grads = backend.gradients(loss, params) - if any(g is None for g in grads): - raise ValueError('An operation has `None` for gradient. ' - 'Please make sure that all of your ops have a ' - 'gradient defined (i.e. are differentiable). ' - 'Common ops without gradient: ' - 'backend.argmax, backend.round, backend.eval.') - if hasattr(self, 'clipnorm'): - grads = [tf.clip_by_norm(g, self.clipnorm) for g in grads] - if hasattr(self, 'clipvalue'): - grads = [ - tf.clip_by_value(g, -self.clipvalue, self.clipvalue) - for g in grads - ] - return grads - - def set_weights(self, weights): - """Sets the weights of the optimizer, from Numpy arrays. - - Should only be called after computing the gradients - (otherwise the optimizer has no weights). + Includes support for momentum, + learning rate decay, and Nesterov momentum. Args: - weights: a list of Numpy arrays. The number of arrays and their shape - must match number of the dimensions of the weights of the optimizer - (i.e. it should match the output of `get_weights`). - - Raises: - ValueError: in case of incompatible weight shapes. + lr: float >= 0. Learning rate. + momentum: float >= 0. Parameter that accelerates SGD in the relevant + direction and dampens oscillations. + decay: float >= 0. Learning rate decay over each update. + nesterov: boolean. Whether to apply Nesterov momentum. """ - params = self.weights - if len(params) != len(weights): - raise ValueError('Length of the specified weight list (' + - str(len(weights)) + - ') does not match the number of weights ' - 'of the optimizer (' + str(len(params)) + ')') - weight_value_tuples = [] - param_values = backend.batch_get_value(params) - for pv, p, w in zip(param_values, params, weights): - if pv.shape != w.shape: - raise ValueError('Optimizer weight shape ' + str(pv.shape) + - ' not compatible with ' - 'provided weight shape ' + str(w.shape)) - weight_value_tuples.append((p, w)) - backend.batch_set_value(weight_value_tuples) - - def get_weights(self): - """Returns the current value of the weights of the optimizer. - - Returns: - A list of numpy arrays. - """ - return backend.batch_get_value(self.weights) - def get_config(self): - config = {} - if hasattr(self, 'clipnorm'): - config['clipnorm'] = self.clipnorm - if hasattr(self, 'clipvalue'): - config['clipvalue'] = self.clipvalue - return config + def __init__( + self, lr=0.01, momentum=0.0, decay=0.0, nesterov=False, **kwargs + ): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + self.lr = backend.variable(lr, name="lr") + self.momentum = backend.variable(momentum, name="momentum") + self.decay = backend.variable(decay, name="decay") + self.initial_decay = decay + self.nesterov = nesterov + + def _create_all_weights(self, params): + shapes = [backend.int_shape(p) for p in params] + moments = [backend.zeros(shape) for shape in shapes] + self.weights = [self.iterations] + moments + return moments + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] + + lr = self.lr + if self.initial_decay > 0: + lr = lr * ( + 1.0 + / ( + 1.0 + + self.decay + * tf.cast(self.iterations, backend.dtype(self.decay)) + ) + ) + # momentum + moments = self._create_all_weights(params) + for p, g, m in zip(params, grads, moments): + v = self.momentum * m - lr * g # velocity + self.updates.append(tf.compat.v1.assign(m, v)) + + if self.nesterov: + new_p = p + self.momentum * v - lr * g + else: + new_p = p + v + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) + return self.updates - @classmethod - def from_config(cls, config): - return cls(**config) + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "momentum": float(backend.get_value(self.momentum)), + "decay": float(backend.get_value(self.decay)), + "nesterov": self.nesterov, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) -class SGD(Optimizer): - """Stochastic gradient descent optimizer. +class RMSprop(Optimizer): + """RMSProp optimizer. - Includes support for momentum, - learning rate decay, and Nesterov momentum. + It is recommended to leave the parameters of this optimizer + at their default values + (except the learning rate, which can be freely tuned). - Args: + Args: lr: float >= 0. Learning rate. - momentum: float >= 0. Parameter that accelerates SGD in the relevant - direction and dampens oscillations. + rho: float >= 0. + epsilon: float >= 0. Fuzz factor. + If `None`, defaults to `backend.epsilon()`. decay: float >= 0. Learning rate decay over each update. - nesterov: boolean. Whether to apply Nesterov momentum. - """ - - def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.iterations = backend.variable(0, dtype='int64', name='iterations') - self.lr = backend.variable(lr, name='lr') - self.momentum = backend.variable(momentum, name='momentum') - self.decay = backend.variable(decay, name='decay') - self.initial_decay = decay - self.nesterov = nesterov - - def _create_all_weights(self, params): - shapes = [backend.int_shape(p) for p in params] - moments = [backend.zeros(shape) for shape in shapes] - self.weights = [self.iterations] + moments - return moments - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * ( - 1. / - (1. + - self.decay * tf.cast(self.iterations, - backend.dtype(self.decay)))) - # momentum - moments = self._create_all_weights(params) - for p, g, m in zip(params, grads, moments): - v = self.momentum * m - lr * g # velocity - self.updates.append(tf.compat.v1.assign(m, v)) - - if self.nesterov: - new_p = p + self.momentum * v - lr * g - else: - new_p = p + v - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'momentum': float(backend.get_value(self.momentum)), - 'decay': float(backend.get_value(self.decay)), - 'nesterov': self.nesterov - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """ + def __init__(self, lr=0.001, rho=0.9, epsilon=None, decay=0.0, **kwargs): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.lr = backend.variable(lr, name="lr") + self.rho = backend.variable(rho, name="rho") + self.decay = backend.variable(decay, name="decay") + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + if epsilon is None: + epsilon = backend.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + + def _create_all_weights(self, params): + accumulators = [ + backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) + for p in params + ] + self.weights = accumulators + return accumulators + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + accumulators = self._create_all_weights(params) + self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] -class RMSprop(Optimizer): - """RMSProp optimizer. - - It is recommended to leave the parameters of this optimizer - at their default values - (except the learning rate, which can be freely tuned). - - Args: - lr: float >= 0. Learning rate. - rho: float >= 0. - epsilon: float >= 0. Fuzz factor. - If `None`, defaults to `backend.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - """ - - def __init__(self, lr=0.001, rho=0.9, epsilon=None, decay=0., **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.lr = backend.variable(lr, name='lr') - self.rho = backend.variable(rho, name='rho') - self.decay = backend.variable(decay, name='decay') - self.iterations = backend.variable(0, dtype='int64', name='iterations') - if epsilon is None: - epsilon = backend.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - - def _create_all_weights(self, params): - accumulators = [ - backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) - for p in params] - self.weights = accumulators - return accumulators - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - accumulators = self._create_all_weights(params) - self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * ( - 1. / - (1. + - self.decay * tf.cast(self.iterations, - backend.dtype(self.decay)))) - - for p, g, a in zip(params, grads, accumulators): - # update accumulator - new_a = self.rho * a + (1. - self.rho) * tf.square(g) - self.updates.append(tf.compat.v1.assign(a, new_a)) - new_p = p - lr * g / (backend.sqrt(new_a) + self.epsilon) - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'rho': float(backend.get_value(self.rho)), - 'decay': float(backend.get_value(self.decay)), - 'epsilon': self.epsilon - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + lr = self.lr + if self.initial_decay > 0: + lr = lr * ( + 1.0 + / ( + 1.0 + + self.decay + * tf.cast(self.iterations, backend.dtype(self.decay)) + ) + ) + + for p, g, a in zip(params, grads, accumulators): + # update accumulator + new_a = self.rho * a + (1.0 - self.rho) * tf.square(g) + self.updates.append(tf.compat.v1.assign(a, new_a)) + new_p = p - lr * g / (backend.sqrt(new_a) + self.epsilon) + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) + return self.updates + + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "rho": float(backend.get_value(self.rho)), + "decay": float(backend.get_value(self.decay)), + "epsilon": self.epsilon, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class Adagrad(Optimizer): - """Adagrad optimizer. + """Adagrad optimizer. - Adagrad is an optimizer with parameter-specific learning rates, - which are adapted relative to how frequently a parameter gets - updated during training. The more updates a parameter receives, - the smaller the updates. + Adagrad is an optimizer with parameter-specific learning rates, + which are adapted relative to how frequently a parameter gets + updated during training. The more updates a parameter receives, + the smaller the updates. - It is recommended to leave the parameters of this optimizer - at their default values. + It is recommended to leave the parameters of this optimizer + at their default values. - # Arguments - lr: float >= 0. Initial learning rate. - epsilon: float >= 0. If `None`, defaults to `backend.epsilon()`. - decay: float >= 0. Learning rate decay over each update. + # Arguments + lr: float >= 0. Initial learning rate. + epsilon: float >= 0. If `None`, defaults to `backend.epsilon()`. + decay: float >= 0. Learning rate decay over each update. + + # References + - [Adaptive Subgradient Methods for Online Learning and Stochastic + Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + """ + + def __init__(self, lr=0.01, epsilon=None, decay=0.0, **kwargs): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.lr = backend.variable(lr, name="lr") + self.decay = backend.variable(decay, name="decay") + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + if epsilon is None: + epsilon = backend.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + + def _create_all_weights(self, params): + shapes = [backend.int_shape(p) for p in params] + accumulators = [backend.zeros(shape) for shape in shapes] + self.weights = accumulators + return accumulators + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + accumulators = self._create_all_weights(params) + + self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] - # References - - [Adaptive Subgradient Methods for Online Learning and Stochastic - Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) - """ - - def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.lr = backend.variable(lr, name='lr') - self.decay = backend.variable(decay, name='decay') - self.iterations = backend.variable(0, dtype='int64', name='iterations') - if epsilon is None: - epsilon = backend.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - - def _create_all_weights(self, params): - shapes = [backend.int_shape(p) for p in params] - accumulators = [backend.zeros(shape) for shape in shapes] - self.weights = accumulators - return accumulators - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - accumulators = self._create_all_weights(params) - - self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * ( - 1. / - (1. + - self.decay * tf.cast(self.iterations, - backend.dtype(self.decay)))) - - for p, g, a in zip(params, grads, accumulators): - new_a = a + tf.square(g) # update accumulator - self.updates.append(tf.compat.v1.assign(a, new_a)) - new_p = p - lr * g / (backend.sqrt(new_a) + self.epsilon) - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'decay': float(backend.get_value(self.decay)), - 'epsilon': self.epsilon - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + lr = self.lr + if self.initial_decay > 0: + lr = lr * ( + 1.0 + / ( + 1.0 + + self.decay + * tf.cast(self.iterations, backend.dtype(self.decay)) + ) + ) + + for p, g, a in zip(params, grads, accumulators): + new_a = a + tf.square(g) # update accumulator + self.updates.append(tf.compat.v1.assign(a, new_a)) + new_p = p - lr * g / (backend.sqrt(new_a) + self.epsilon) + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) + return self.updates + + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "decay": float(backend.get_value(self.decay)), + "epsilon": self.epsilon, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class Adadelta(Optimizer): - """Adadelta optimizer. - - Adadelta is a more robust extension of Adagrad - that adapts learning rates based on a moving window of gradient updates, - instead of accumulating all past gradients. This way, Adadelta continues - learning even when many updates have been done. Compared to Adagrad, in the - original version of Adadelta you don't have to set an initial learning - rate. In this version, initial learning rate and decay factor can - be set, as in most other Keras optimizers. - - It is recommended to leave the parameters of this optimizer - at their default values. - - Arguments: - lr: float >= 0. Initial learning rate, defaults to 1. - It is recommended to leave it at the default value. - rho: float >= 0. Adadelta decay factor, corresponding to fraction of - gradient to keep at each time step. - epsilon: float >= 0. Fuzz factor. - If `None`, defaults to `backend.epsilon()`. - decay: float >= 0. Initial learning rate decay. - - References: - - [Adadelta - an adaptive learning rate - method](http://arxiv.org/abs/1212.5701) - """ - - def __init__(self, lr=1.0, rho=0.95, epsilon=None, decay=0., **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.lr = backend.variable(lr, name='lr') - self.decay = backend.variable(decay, name='decay') - self.iterations = backend.variable(0, dtype='int64', name='iterations') - if epsilon is None: - epsilon = backend.epsilon() - self.rho = rho - self.epsilon = epsilon - self.initial_decay = decay - - def _create_all_weights(self, params): - shapes = [backend.int_shape(p) for p in params] - accumulators = [backend.zeros(shape) for shape in shapes] - delta_accumulators = [backend.zeros(shape) for shape in shapes] - self.weights = accumulators + delta_accumulators - return accumulators, delta_accumulators - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] - accumulators, delta_accumulators = self._create_all_weights(params) - - lr = self.lr - if self.initial_decay > 0: - lr = lr * ( - 1. / - (1. + - self.decay * tf.cast(self.iterations, - backend.dtype(self.decay)))) - - for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): - # update accumulator - new_a = self.rho * a + (1. - self.rho) * tf.square(g) - self.updates.append(tf.compat.v1.assign(a, new_a)) - - # use the new accumulator and the *old* delta_accumulator - update = g * backend.sqrt(d_a + self.epsilon) / backend.sqrt( - new_a + self.epsilon) - new_p = p - lr * update - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - - # update delta_accumulator - new_d_a = self.rho * d_a + (1 - self.rho) * tf.square(update) - self.updates.append(tf.compat.v1.assign(d_a, new_d_a)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'rho': self.rho, - 'decay': float(backend.get_value(self.decay)), - 'epsilon': self.epsilon - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Adadelta optimizer. + + Adadelta is a more robust extension of Adagrad + that adapts learning rates based on a moving window of gradient updates, + instead of accumulating all past gradients. This way, Adadelta continues + learning even when many updates have been done. Compared to Adagrad, in the + original version of Adadelta you don't have to set an initial learning + rate. In this version, initial learning rate and decay factor can + be set, as in most other Keras optimizers. + + It is recommended to leave the parameters of this optimizer + at their default values. + + Arguments: + lr: float >= 0. Initial learning rate, defaults to 1. + It is recommended to leave it at the default value. + rho: float >= 0. Adadelta decay factor, corresponding to fraction of + gradient to keep at each time step. + epsilon: float >= 0. Fuzz factor. + If `None`, defaults to `backend.epsilon()`. + decay: float >= 0. Initial learning rate decay. + + References: + - [Adadelta - an adaptive learning rate + method](http://arxiv.org/abs/1212.5701) + """ + + def __init__(self, lr=1.0, rho=0.95, epsilon=None, decay=0.0, **kwargs): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.lr = backend.variable(lr, name="lr") + self.decay = backend.variable(decay, name="decay") + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + if epsilon is None: + epsilon = backend.epsilon() + self.rho = rho + self.epsilon = epsilon + self.initial_decay = decay + + def _create_all_weights(self, params): + shapes = [backend.int_shape(p) for p in params] + accumulators = [backend.zeros(shape) for shape in shapes] + delta_accumulators = [backend.zeros(shape) for shape in shapes] + self.weights = accumulators + delta_accumulators + return accumulators, delta_accumulators + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] + accumulators, delta_accumulators = self._create_all_weights(params) + + lr = self.lr + if self.initial_decay > 0: + lr = lr * ( + 1.0 + / ( + 1.0 + + self.decay + * tf.cast(self.iterations, backend.dtype(self.decay)) + ) + ) + + for p, g, a, d_a in zip( + params, grads, accumulators, delta_accumulators + ): + # update accumulator + new_a = self.rho * a + (1.0 - self.rho) * tf.square(g) + self.updates.append(tf.compat.v1.assign(a, new_a)) + + # use the new accumulator and the *old* delta_accumulator + update = ( + g + * backend.sqrt(d_a + self.epsilon) + / backend.sqrt(new_a + self.epsilon) + ) + new_p = p - lr * update + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) + + # update delta_accumulator + new_d_a = self.rho * d_a + (1 - self.rho) * tf.square(update) + self.updates.append(tf.compat.v1.assign(d_a, new_d_a)) + return self.updates + + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "rho": self.rho, + "decay": float(backend.get_value(self.decay)), + "epsilon": self.epsilon, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class Adam(Optimizer): - """Adam optimizer. - - Default parameters follow those provided in the original paper. - - Args: - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. - If `None`, defaults to `backend.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - amsgrad: boolean. Whether to apply the AMSGrad variant of this algorithm - from the paper "On the Convergence of Adam and Beyond". - """ - - def __init__(self, - lr=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=None, - decay=0., - amsgrad=False, - **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.iterations = backend.variable(0, dtype='int64', name='iterations') - self.lr = backend.variable(lr, name='lr') - self.beta_1 = backend.variable(beta_1, name='beta_1') - self.beta_2 = backend.variable(beta_2, name='beta_2') - self.decay = backend.variable(decay, name='decay') - if epsilon is None: - epsilon = backend.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - self.amsgrad = amsgrad - - def _create_all_weights(self, params): - ms = [ - backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) - for p in params] - vs = [ - backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) - for p in params] - if self.amsgrad: - vhats = [ - backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) - for p in params] - else: - vhats = [backend.zeros(1) for _ in params] - self.weights = [self.iterations] + ms + vs + vhats - return ms, vs, vhats - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * ( - 1. / - (1. + - self.decay * tf.cast(self.iterations, - backend.dtype(self.decay)))) - - with tf.control_dependencies([tf.compat.v1.assign_add(self.iterations, 1)]): - t = tf.cast(self.iterations, backend.floatx()) - lr_t = lr * ( - backend.sqrt(1. - tf.pow(self.beta_2, t)) / - (1. - tf.pow(self.beta_1, t))) - - ms, vs, vhats = self._create_all_weights(params) - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * tf.square(g) - if self.amsgrad: - vhat_t = tf.maximum(vhat, v_t) - p_t = p - lr_t * m_t / (backend.sqrt(vhat_t) + self.epsilon) - self.updates.append(tf.compat.v1.assign(vhat, vhat_t)) - else: - p_t = p - lr_t * m_t / (backend.sqrt(v_t) + self.epsilon) - - self.updates.append(tf.compat.v1.assign(m, m_t)) - self.updates.append(tf.compat.v1.assign(v, v_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'beta_1': float(backend.get_value(self.beta_1)), - 'beta_2': float(backend.get_value(self.beta_2)), - 'decay': float(backend.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Adam optimizer. + + Default parameters follow those provided in the original paper. + + Args: + lr: float >= 0. Learning rate. + beta_1: float, 0 < beta < 1. Generally close to 1. + beta_2: float, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. + If `None`, defaults to `backend.epsilon()`. + decay: float >= 0. Learning rate decay over each update. + amsgrad: boolean. Whether to apply the AMSGrad variant of this algorithm + from the paper "On the Convergence of Adam and Beyond". + """ + + def __init__( + self, + lr=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=None, + decay=0.0, + amsgrad=False, + **kwargs, + ): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + self.lr = backend.variable(lr, name="lr") + self.beta_1 = backend.variable(beta_1, name="beta_1") + self.beta_2 = backend.variable(beta_2, name="beta_2") + self.decay = backend.variable(decay, name="decay") + if epsilon is None: + epsilon = backend.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + self.amsgrad = amsgrad + + def _create_all_weights(self, params): + ms = [ + backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) + for p in params + ] + vs = [ + backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) + for p in params + ] + if self.amsgrad: + vhats = [ + backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) + for p in params + ] + else: + vhats = [backend.zeros(1) for _ in params] + self.weights = [self.iterations] + ms + vs + vhats + return ms, vs, vhats + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [] + + lr = self.lr + if self.initial_decay > 0: + lr = lr * ( + 1.0 + / ( + 1.0 + + self.decay + * tf.cast(self.iterations, backend.dtype(self.decay)) + ) + ) + + with tf.control_dependencies( + [tf.compat.v1.assign_add(self.iterations, 1)] + ): + t = tf.cast(self.iterations, backend.floatx()) + lr_t = lr * ( + backend.sqrt(1.0 - tf.pow(self.beta_2, t)) + / (1.0 - tf.pow(self.beta_1, t)) + ) + + ms, vs, vhats = self._create_all_weights(params) + for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): + m_t = (self.beta_1 * m) + (1.0 - self.beta_1) * g + v_t = (self.beta_2 * v) + (1.0 - self.beta_2) * tf.square(g) + if self.amsgrad: + vhat_t = tf.maximum(vhat, v_t) + p_t = p - lr_t * m_t / (backend.sqrt(vhat_t) + self.epsilon) + self.updates.append(tf.compat.v1.assign(vhat, vhat_t)) + else: + p_t = p - lr_t * m_t / (backend.sqrt(v_t) + self.epsilon) + + self.updates.append(tf.compat.v1.assign(m, m_t)) + self.updates.append(tf.compat.v1.assign(v, v_t)) + new_p = p_t + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) + return self.updates + + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "beta_1": float(backend.get_value(self.beta_1)), + "beta_2": float(backend.get_value(self.beta_2)), + "decay": float(backend.get_value(self.decay)), + "epsilon": self.epsilon, + "amsgrad": self.amsgrad, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class Adamax(Optimizer): - """Adamax optimizer from Adam paper's Section 7. - - It is a variant of Adam based on the infinity norm. - Default parameters follow those provided in the paper. - - Args: - lr: float >= 0. Learning rate. - beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. - If `None`, defaults to `backend.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - """ - - def __init__(self, - lr=0.002, - beta_1=0.9, - beta_2=0.999, - epsilon=None, - decay=0., - **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.iterations = backend.variable(0, dtype='int64', name='iterations') - self.lr = backend.variable(lr, name='lr') - self.beta_1 = backend.variable(beta_1, name='beta_1') - self.beta_2 = backend.variable(beta_2, name='beta_2') - self.decay = backend.variable(decay, name='decay') - if epsilon is None: - epsilon = backend.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - - def _create_all_weights(self, params): - - shapes = [backend.int_shape(p) for p in params] - # zero init of 1st moment - ms = [backend.zeros(shape) for shape in shapes] - # zero init of exponentially weighted infinity norm - us = [backend.zeros(shape) for shape in shapes] - self.weights = [self.iterations] + ms + us - return ms, us - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * ( - 1. / - (1. + - self.decay * tf.cast(self.iterations, - backend.dtype(self.decay)))) - - with tf.control_dependencies([tf.compat.v1.assign_add(self.iterations, 1)]): - t = tf.cast(self.iterations, backend.floatx()) - lr_t = lr / (1. - tf.pow(self.beta_1, t)) - - ms, us = self._create_all_weights(params) - - for p, g, m, u in zip(params, grads, ms, us): - - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - u_t = tf.maximum(self.beta_2 * u, tf.abs(g)) - p_t = p - lr_t * m_t / (u_t + self.epsilon) - - self.updates.append(tf.compat.v1.assign(m, m_t)) - self.updates.append(tf.compat.v1.assign(u, u_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'beta_1': float(backend.get_value(self.beta_1)), - 'beta_2': float(backend.get_value(self.beta_2)), - 'decay': float(backend.get_value(self.decay)), - 'epsilon': self.epsilon - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Adamax optimizer from Adam paper's Section 7. + + It is a variant of Adam based on the infinity norm. + Default parameters follow those provided in the paper. + + Args: + lr: float >= 0. Learning rate. + beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. + If `None`, defaults to `backend.epsilon()`. + decay: float >= 0. Learning rate decay over each update. + """ + + def __init__( + self, + lr=0.002, + beta_1=0.9, + beta_2=0.999, + epsilon=None, + decay=0.0, + **kwargs, + ): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + self.lr = backend.variable(lr, name="lr") + self.beta_1 = backend.variable(beta_1, name="beta_1") + self.beta_2 = backend.variable(beta_2, name="beta_2") + self.decay = backend.variable(decay, name="decay") + if epsilon is None: + epsilon = backend.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + + def _create_all_weights(self, params): + + shapes = [backend.int_shape(p) for p in params] + # zero init of 1st moment + ms = [backend.zeros(shape) for shape in shapes] + # zero init of exponentially weighted infinity norm + us = [backend.zeros(shape) for shape in shapes] + self.weights = [self.iterations] + ms + us + return ms, us + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [] + + lr = self.lr + if self.initial_decay > 0: + lr = lr * ( + 1.0 + / ( + 1.0 + + self.decay + * tf.cast(self.iterations, backend.dtype(self.decay)) + ) + ) + + with tf.control_dependencies( + [tf.compat.v1.assign_add(self.iterations, 1)] + ): + t = tf.cast(self.iterations, backend.floatx()) + lr_t = lr / (1.0 - tf.pow(self.beta_1, t)) + + ms, us = self._create_all_weights(params) + + for p, g, m, u in zip(params, grads, ms, us): + + m_t = (self.beta_1 * m) + (1.0 - self.beta_1) * g + u_t = tf.maximum(self.beta_2 * u, tf.abs(g)) + p_t = p - lr_t * m_t / (u_t + self.epsilon) + + self.updates.append(tf.compat.v1.assign(m, m_t)) + self.updates.append(tf.compat.v1.assign(u, u_t)) + new_p = p_t + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) + return self.updates + + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "beta_1": float(backend.get_value(self.beta_1)), + "beta_2": float(backend.get_value(self.beta_2)), + "decay": float(backend.get_value(self.decay)), + "epsilon": self.epsilon, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) class Nadam(Optimizer): - """Nesterov Adam optimizer. - - Much like Adam is essentially RMSprop with momentum, - Nadam is Adam RMSprop with Nesterov momentum. - - Default parameters follow those provided in the paper. - It is recommended to leave the parameters of this optimizer - at their default values. - - Args: - lr: float >= 0. Learning rate. - beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. - If `None`, defaults to `backend.epsilon()`. - """ - - def __init__(self, - lr=0.002, - beta_1=0.9, - beta_2=0.999, - epsilon=None, - schedule_decay=0.004, - **kwargs): - super().__init__(**kwargs) - with backend.name_scope(self.__class__.__name__): - self.iterations = backend.variable(0, dtype='int64', name='iterations') - self.m_schedule = backend.variable(1., name='m_schedule') - self.lr = backend.variable(lr, name='lr') - self.beta_1 = backend.variable(beta_1, name='beta_1') - self.beta_2 = backend.variable(beta_2, name='beta_2') - if epsilon is None: - epsilon = backend.epsilon() - self.epsilon = epsilon - self.schedule_decay = schedule_decay - - def _create_all_weights(self, params): - shapes = [backend.int_shape(p) for p in params] - ms = [backend.zeros(shape) for shape in shapes] - vs = [backend.zeros(shape) for shape in shapes] - - self.weights = [self.iterations, self.m_schedule] + ms + vs - return ms, vs - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [] - - with tf.control_dependencies([tf.compat.v1.assign_add(self.iterations, 1)]): - t = tf.cast(self.iterations, backend.floatx()) - - # Due to the recommendations in [2], i.e. warming momentum schedule - momentum_cache_t = self.beta_1 * ( - 1. - 0.5 * - (tf.pow(backend.cast_to_floatx(0.96), t * self.schedule_decay))) - momentum_cache_t_1 = self.beta_1 * ( - 1. - 0.5 * - (tf.pow(backend.cast_to_floatx(0.96), - (t + 1) * self.schedule_decay))) - m_schedule_new = self.m_schedule * momentum_cache_t - m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 - self.updates.append((self.m_schedule, m_schedule_new)) - - ms, vs = self._create_all_weights(params) - - for p, g, m, v in zip(params, grads, ms, vs): - # the following equations given in [1] - g_prime = g / (1. - m_schedule_new) - m_t = self.beta_1 * m + (1. - self.beta_1) * g - m_t_prime = m_t / (1. - m_schedule_next) - v_t = self.beta_2 * v + (1. - self.beta_2) * tf.square(g) - v_t_prime = v_t / (1. - tf.pow(self.beta_2, t)) - m_t_bar = (1. - - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime - - self.updates.append(tf.compat.v1.assign(m, m_t)) - self.updates.append(tf.compat.v1.assign(v, v_t)) - - p_t = p - self.lr * m_t_bar / (backend.sqrt(v_t_prime) + self.epsilon) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(tf.compat.v1.assign(p, new_p)) - return self.updates - - def get_config(self): - config = { - 'lr': float(backend.get_value(self.lr)), - 'beta_1': float(backend.get_value(self.beta_1)), - 'beta_2': float(backend.get_value(self.beta_2)), - 'epsilon': self.epsilon, - 'schedule_decay': self.schedule_decay - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + """Nesterov Adam optimizer. + Much like Adam is essentially RMSprop with momentum, + Nadam is Adam RMSprop with Nesterov momentum. -class TFOptimizer(Optimizer, tf.__internal__.tracking.Trackable): - """Wrapper class for native TensorFlow optimizers.""" - - def __init__(self, optimizer, iterations=None): # pylint: disable=super-init-not-called - self.optimizer = optimizer - self._track_trackable(optimizer, name='optimizer') - if iterations is None: - with backend.name_scope(self.__class__.__name__): - self.iterations = backend.variable(0, dtype='int64', name='iterations') - else: - self.iterations = iterations - self._track_trackable(self.iterations, name='global_step') - - def _clip_gradients(self, grads): - """Clip gradients according to the clipnorm and clipvalue attributes.""" - # TFOptimizer wrapper has no gradient clipping options. - return grads - - def minimize(self, loss, var_list, grad_loss=None, tape=None): - """Mimics the `OptimizerV2.minimize` API.""" - if not callable(loss) and tape is None: - raise ValueError('`tape` is required when a `Tensor` loss is passed.') - tape = tape if tape is not None else tf.GradientTape() - - if callable(loss): - with tape: - if not callable(var_list): - tape.watch(var_list) - loss = loss() - if callable(var_list): - var_list = var_list() - - var_list = tf.nest.flatten(var_list) - if var_list: - grads = tape.gradient(loss, var_list, grad_loss) - grads_and_vars = list(zip(grads, var_list)) - self.apply_gradients(grads_and_vars) - - def apply_gradients(self, grads_and_vars): - self.optimizer.apply_gradients(grads_and_vars, global_step=self.iterations) - - def get_grads(self, loss, params): - return self.optimizer.compute_gradients(loss, params) - - def get_updates(self, loss, params): - if tf.distribute.has_strategy(): - self.updates = [] - - if not params: - # After the model vars have been created, the second call to get_updates - # is called with params as an empty list. This ensures that we call - # compute_gradients with params=None. - grads = self.optimizer.compute_gradients(loss) - else: - grads = self.optimizer.compute_gradients(loss, params) - global_step = tf.compat.v1.train.get_global_step() - opt_update = self.optimizer.apply_gradients(grads, global_step) - else: - if not params: - self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] + Default parameters follow those provided in the paper. + It is recommended to leave the parameters of this optimizer + at their default values. + + Args: + lr: float >= 0. Learning rate. + beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. + If `None`, defaults to `backend.epsilon()`. + """ + + def __init__( + self, + lr=0.002, + beta_1=0.9, + beta_2=0.999, + epsilon=None, + schedule_decay=0.004, + **kwargs, + ): + super().__init__(**kwargs) + with backend.name_scope(self.__class__.__name__): + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + self.m_schedule = backend.variable(1.0, name="m_schedule") + self.lr = backend.variable(lr, name="lr") + self.beta_1 = backend.variable(beta_1, name="beta_1") + self.beta_2 = backend.variable(beta_2, name="beta_2") + if epsilon is None: + epsilon = backend.epsilon() + self.epsilon = epsilon + self.schedule_decay = schedule_decay + + def _create_all_weights(self, params): + shapes = [backend.int_shape(p) for p in params] + ms = [backend.zeros(shape) for shape in shapes] + vs = [backend.zeros(shape) for shape in shapes] + + self.weights = [self.iterations, self.m_schedule] + ms + vs + return ms, vs + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [] + + with tf.control_dependencies( + [tf.compat.v1.assign_add(self.iterations, 1)] + ): + t = tf.cast(self.iterations, backend.floatx()) + + # Due to the recommendations in [2], i.e. warming momentum schedule + momentum_cache_t = self.beta_1 * ( + 1.0 + - 0.5 + * (tf.pow(backend.cast_to_floatx(0.96), t * self.schedule_decay)) + ) + momentum_cache_t_1 = self.beta_1 * ( + 1.0 + - 0.5 + * ( + tf.pow( + backend.cast_to_floatx(0.96), (t + 1) * self.schedule_decay + ) + ) + ) + m_schedule_new = self.m_schedule * momentum_cache_t + m_schedule_next = ( + self.m_schedule * momentum_cache_t * momentum_cache_t_1 + ) + self.updates.append((self.m_schedule, m_schedule_new)) + + ms, vs = self._create_all_weights(params) + + for p, g, m, v in zip(params, grads, ms, vs): + # the following equations given in [1] + g_prime = g / (1.0 - m_schedule_new) + m_t = self.beta_1 * m + (1.0 - self.beta_1) * g + m_t_prime = m_t / (1.0 - m_schedule_next) + v_t = self.beta_2 * v + (1.0 - self.beta_2) * tf.square(g) + v_t_prime = v_t / (1.0 - tf.pow(self.beta_2, t)) + m_t_bar = ( + 1.0 - momentum_cache_t + ) * g_prime + momentum_cache_t_1 * m_t_prime + + self.updates.append(tf.compat.v1.assign(m, m_t)) + self.updates.append(tf.compat.v1.assign(v, v_t)) + + p_t = p - self.lr * m_t_bar / ( + backend.sqrt(v_t_prime) + self.epsilon + ) + new_p = p_t + + # Apply constraints. + if getattr(p, "constraint", None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(tf.compat.v1.assign(p, new_p)) return self.updates - # Updates list starts out empty because the iterations variable is - # incremented in optimizer.apply_gradients() - self.updates = [] - grads = self.optimizer.compute_gradients(loss, params) - opt_update = self.optimizer.apply_gradients( - grads, global_step=self.iterations) + def get_config(self): + config = { + "lr": float(backend.get_value(self.lr)), + "beta_1": float(backend.get_value(self.beta_1)), + "beta_2": float(backend.get_value(self.beta_2)), + "epsilon": self.epsilon, + "schedule_decay": self.schedule_decay, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) - self.updates.append(opt_update) - return self.updates - @property - def weights(self): - raise NotImplementedError +class TFOptimizer(Optimizer, tf.__internal__.tracking.Trackable): + """Wrapper class for native TensorFlow optimizers.""" + + def __init__(self, optimizer, iterations=None): + self.optimizer = optimizer + self._track_trackable(optimizer, name="optimizer") + if iterations is None: + with backend.name_scope(self.__class__.__name__): + self.iterations = backend.variable( + 0, dtype="int64", name="iterations" + ) + else: + self.iterations = iterations + self._track_trackable(self.iterations, name="global_step") + + def _clip_gradients(self, grads): + """Clip gradients according to the clipnorm and clipvalue attributes.""" + # TFOptimizer wrapper has no gradient clipping options. + return grads + + def minimize(self, loss, var_list, grad_loss=None, tape=None): + """Mimics the `OptimizerV2.minimize` API.""" + if not callable(loss) and tape is None: + raise ValueError( + "`tape` is required when a `Tensor` loss is passed." + ) + tape = tape if tape is not None else tf.GradientTape() + + if callable(loss): + with tape: + if not callable(var_list): + tape.watch(var_list) + loss = loss() + if callable(var_list): + var_list = var_list() + + var_list = tf.nest.flatten(var_list) + if var_list: + grads = tape.gradient(loss, var_list, grad_loss) + grads_and_vars = list(zip(grads, var_list)) + self.apply_gradients(grads_and_vars) + + def apply_gradients(self, grads_and_vars): + self.optimizer.apply_gradients( + grads_and_vars, global_step=self.iterations + ) + + def get_grads(self, loss, params): + return self.optimizer.compute_gradients(loss, params) + + def get_updates(self, loss, params): + if tf.distribute.has_strategy(): + self.updates = [] + + if not params: + # After the model vars have been created, the second call to + # get_updates is called with params as an empty list. This + # ensures that we call compute_gradients with params=None. + grads = self.optimizer.compute_gradients(loss) + else: + grads = self.optimizer.compute_gradients(loss, params) + global_step = tf.compat.v1.train.get_global_step() + opt_update = self.optimizer.apply_gradients(grads, global_step) + else: + if not params: + self.updates = [tf.compat.v1.assign_add(self.iterations, 1)] + return self.updates + + # Updates list starts out empty because the iterations variable is + # incremented in optimizer.apply_gradients() + self.updates = [] + grads = self.optimizer.compute_gradients(loss, params) + opt_update = self.optimizer.apply_gradients( + grads, global_step=self.iterations + ) + + self.updates.append(opt_update) + return self.updates + + @property + def weights(self): + raise NotImplementedError - def get_config(self): - raise NotImplementedError + def get_config(self): + raise NotImplementedError - def from_config(self, config): - raise NotImplementedError + def from_config(self, config): + raise NotImplementedError # Aliases. diff --git a/keras/optimizers/optimizer_v1_test.py b/keras/optimizers/optimizer_v1_test.py new file mode 100644 index 000000000000..977d573ee5b6 --- /dev/null +++ b/keras/optimizers/optimizer_v1_test.py @@ -0,0 +1,304 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras optimizers.""" + +import gc +import weakref + +import numpy as np +import tensorflow.compat.v2 as tf + +import keras +from keras.optimizers import optimizer_v1 +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import np_utils + +# isort: off +from tensorflow.python.training.adam import AdamOptimizer +from tensorflow.python.training.experimental.loss_scale_optimizer import ( # noqa: E501 + MixedPrecisionLossScaleOptimizer, +) + + +def _get_model(input_dim, num_hidden, output_dim): + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + num_hidden, activation="relu", input_shape=(input_dim,) + ) + ) + model.add(keras.layers.Dense(output_dim, activation="softmax")) + return model + + +@test_combinations.run_all_keras_modes +class KerasOptimizersTest(test_combinations.TestCase): + def _test_optimizer(self, optimizer, target=0.75): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=1000, + test_samples=200, + input_shape=(10,), + num_classes=2, + ) + y_train = np_utils.to_categorical(y_train) + model = _get_model(x_train.shape[1], 20, y_train.shape[1]) + model.compile( + loss="categorical_crossentropy", + optimizer=optimizer, + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + np.testing.assert_equal( + keras.backend.get_value(model.optimizer.iterations), 0 + ) + history = model.fit( + x_train, y_train, epochs=2, batch_size=16, verbose=0 + ) + np.testing.assert_equal( + keras.backend.get_value(model.optimizer.iterations), 126 + ) # 63 steps per epoch + self.assertGreaterEqual(history.history["acc"][-1], target) + config = keras.optimizers.serialize(optimizer) + optim = keras.optimizers.deserialize(config) + new_config = keras.optimizers.serialize(optim) + new_config["class_name"] = new_config["class_name"].lower() + new_config["config"].pop("name", None) + if "amsgrad" not in config["config"]: + new_config["config"].pop("amsgrad", None) + if ( + "decay" in new_config["config"] + and "schedule_decay" in config["config"] + ): + new_config["config"]["schedule_decay"] = new_config["config"].pop( + "decay" + ) + if "momentum" not in config["config"]: + new_config["config"].pop("momentum", None) + if "centered" not in config["config"]: + new_config["config"].pop("centered", None) + self.assertDictEqual(config, new_config) + + # Test constraints. + model = keras.models.Sequential() + dense = keras.layers.Dense( + 10, + input_shape=(x_train.shape[1],), + kernel_constraint=lambda x: 0.0 * x + 1.0, + bias_constraint=lambda x: 0.0 * x + 2.0, + activation="relu", + ) + model.add(dense) + model.add(keras.layers.Dense(y_train.shape[1], activation="softmax")) + model.compile( + loss="categorical_crossentropy", + optimizer=optimizer, + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + np.testing.assert_equal( + keras.backend.get_value(model.optimizer.iterations), 126 + ) # Using same optimizer from before + model.train_on_batch(x_train[:10], y_train[:10]) + np.testing.assert_equal( + keras.backend.get_value(model.optimizer.iterations), 127 + ) + kernel, bias = dense.get_weights() + np.testing.assert_allclose(kernel, 1.0, atol=1e-3) + np.testing.assert_allclose(bias, 2.0, atol=1e-3) + + def test_sgd(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.SGD()) + + def test_momentum(self): + with self.cached_session(): + self._test_optimizer( + optimizer_v1.SGD(lr=0.01, momentum=0.9, nesterov=True) + ) + + def test_rmsprop(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.RMSprop()) + self._test_optimizer(optimizer_v1.RMSprop(decay=1e-3)) + + def test_adagrad(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.Adagrad()) + self._test_optimizer(optimizer_v1.Adagrad(decay=1e-3)) + + def test_adadelta(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.Adadelta(), target=0.6) + # Accuracy seems dependent on the initialization. Even adding + # tf.compat.v1.Print nodes in the graph seemed to affect the + # initialization seed, and hence the accuracy. + self._test_optimizer(optimizer_v1.Adadelta(decay=1e-3), target=0.4) + + def test_adam(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.Adam()) + # Accuracy seems dependent on the seed initialization. + # TODO(b/121051441): fix test flakiness. + self._test_optimizer(optimizer_v1.Adam(decay=1e-3), target=0.73) + self._test_optimizer(optimizer_v1.Adam(amsgrad=True)) + + def test_adamax(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.Adamax()) + self._test_optimizer(optimizer_v1.Adamax(decay=1e-3)) + + def test_nadam(self): + with self.cached_session(): + self._test_optimizer(optimizer_v1.Nadam()) + + def test_clipnorm(self): + with self.cached_session(): + self._test_optimizer( + optimizer_v1.SGD(lr=0.01, momentum=0.9, clipnorm=0.5) + ) + + def test_clipvalue(self): + with self.cached_session(): + self._test_optimizer( + optimizer_v1.SGD(lr=0.01, momentum=0.9, clipvalue=0.5) + ) + + def test_tf_optimizer(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01)) + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 2, + input_shape=(3,), + kernel_constraint=keras.constraints.MaxNorm(1), + ) + ) + # This is possible + model.compile( + loss="mean_squared_error", + optimizer=optimizer, + run_eagerly=test_utils.should_run_eagerly(), + ) + keras.backend.track_tf_optimizer(optimizer) + model.fit( + np.random.random((5, 3)), + np.random.random((5, 2)), + epochs=1, + batch_size=5, + verbose=0, + ) + # not supported + with self.assertRaises(NotImplementedError): + _ = optimizer.weights + with self.assertRaises(NotImplementedError): + optimizer.get_config() + with self.assertRaises(NotImplementedError): + optimizer.from_config(None) + + def test_optimizer_garbage_collection(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + graph = tf.Graph() + with graph.as_default(): + optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01)) + keras.backend.track_tf_optimizer(optimizer) + optimizer_weak = weakref.ref(optimizer) + graph_weak = weakref.ref(graph) + del graph, optimizer + gc.collect() + # Check that the weak references are dead now. + self.assertIs(graph_weak(), None) + self.assertIs(optimizer_weak(), None) + + def test_tf_optimizer_iterations(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + with self.cached_session(): + optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01)) + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 2, + input_shape=(3,), + kernel_constraint=keras.constraints.MaxNorm(1), + ) + ) + model.compile( + loss="mean_squared_error", + optimizer=optimizer, + run_eagerly=test_utils.should_run_eagerly(), + ) + keras.backend.track_tf_optimizer(optimizer) + self.assertEqual( + keras.backend.get_value(model.optimizer.iterations), 0 + ) + + model.fit( + np.random.random((55, 3)), + np.random.random((55, 2)), + epochs=1, + batch_size=5, + verbose=0, + ) + self.assertEqual( + keras.backend.get_value(model.optimizer.iterations), 11 + ) + + def test_negative_clipvalue_or_clipnorm(self): + with self.assertRaises(ValueError): + _ = optimizer_v1.SGD(lr=0.01, clipvalue=-0.5) + with self.assertRaises(ValueError): + _ = optimizer_v1.Adam(clipnorm=-2.0) + + def test_mixed_precision_loss_scale_optimizer(self): + if tf.executing_eagerly(): + self.skipTest("v1 optimizer does not run in eager mode") + optimizer = MixedPrecisionLossScaleOptimizer(AdamOptimizer(), "dynamic") + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 2, + input_shape=(3,), + kernel_constraint=keras.constraints.MaxNorm(1), + ) + ) + model.compile( + loss="mean_squared_error", + optimizer=optimizer, + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + np.random.random((5, 3)), + np.random.random((5, 2)), + epochs=1, + batch_size=5, + verbose=0, + ) + + def test_deserialization_error(self): + with self.assertRaisesRegex( + ValueError, "Could not interpret optimizer" + ): + keras.optimizers.get(0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/optimizer_v2/BUILD b/keras/optimizers/optimizer_v2/BUILD deleted file mode 100644 index 2784f3a20ae3..000000000000 --- a/keras/optimizers/optimizer_v2/BUILD +++ /dev/null @@ -1,162 +0,0 @@ -# Description: -# Contains the Keras OptimizerV2 API (internal TensorFlow version). - -load("@org_keras//keras:keras.bzl", "cuda_py_test") - -package( - # TODO(scottzhu): Remove non-keras deps from TF. - default_visibility = [ - "//keras:friends", - "//third_party/tensorflow/python:__pkg__", - "//third_party/tensorflow/python/distribute:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", - ], - licenses = ["notice"], -) - -py_library( - name = "optimizer_v2", - srcs = [ - "adadelta.py", - "adagrad.py", - "adam.py", - "adamax.py", - "ftrl.py", - "gradient_descent.py", - "nadam.py", - "optimizer_v2.py", - "rmsprop.py", - "utils.py", - ], - srcs_version = "PY3", - deps = [ - "//:expect_tensorflow_installed", - "//keras:backend", - "//keras:backend_config", - "//keras/engine:base_layer_utils", - "//keras/initializers", - "//keras/optimizers/schedules:learning_rate_schedule", - "//keras/utils:layer_utils", - "//keras/utils:tf_utils", - ], -) - -cuda_py_test( - name = "adagrad_test", - size = "medium", - srcs = ["adagrad_test.py"], - shard_count = 4, - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_combinations", - ], -) - -cuda_py_test( - name = "adam_test", - size = "medium", - srcs = ["adam_test.py"], - shard_count = 4, - tags = [ - "no_rocm", - "no_windows", # TODO(b/171384138) - ], - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_combinations", - ], -) - -cuda_py_test( - name = "adamax_test", - size = "medium", - srcs = ["adamax_test.py"], - shard_count = 4, - # TODO(b/168527439): invalid resource variable reference on GPU for TFRT. - tags = ["no_rocm"], - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_combinations", - ], -) - -cuda_py_test( - name = "adadelta_test", - size = "medium", - srcs = ["adadelta_test.py"], - shard_count = 4, - # TODO(b/168527439): invalid resource variable reference on GPU for TFRT. - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_combinations", - ], -) - -cuda_py_test( - name = "ftrl_test", - size = "medium", - srcs = ["ftrl_test.py"], - shard_count = 4, - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - ], -) - -cuda_py_test( - name = "gradient_descent_test", - size = "medium", - srcs = ["gradient_descent_test.py"], - shard_count = 4, - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_combinations", - ], -) - -cuda_py_test( - name = "nadam_test", - size = "medium", - srcs = ["nadam_test.py"], - shard_count = 4, - deps = [ - ":optimizer_v2", - "//:expect_tensorflow_installed", - ], -) - -cuda_py_test( - name = "optimizer_v2_test", - size = "medium", - srcs = ["optimizer_v2_test.py"], - shard_count = 8, - tags = [ - "no_windows", - ], - deps = [ - ":optimizer_v2", - "//:expect_absl_installed", - "//:expect_tensorflow_installed", - "//keras", - "//keras/testing_infra:test_combinations", - ], -) - -cuda_py_test( - name = "rmsprop_test", - size = "medium", - srcs = ["rmsprop_test.py"], - shard_count = 2, - # TODO(b/168527439): invalid resource variable reference on GPU for TFRT. - deps = [ - ":optimizer_v2", - "//:expect_absl_installed", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_combinations", - ], -) diff --git a/keras/optimizers/optimizer_v2/adadelta.py b/keras/optimizers/optimizer_v2/adadelta.py deleted file mode 100644 index 378e756ad050..000000000000 --- a/keras/optimizers/optimizer_v2/adadelta.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adadelta optimizer implementation.""" - -import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes - -import numpy as np -from keras import backend_config -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.optimizers.Adadelta') -class Adadelta(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the Adadelta algorithm. - - Adadelta optimization is a stochastic gradient descent method that is based on - adaptive learning rate per dimension to address two drawbacks: - - - The continual decay of learning rates throughout training. - - The need for a manually selected global learning rate. - - Adadelta is a more robust extension of Adagrad that adapts learning rates - based on a moving window of gradient updates, instead of accumulating all - past gradients. This way, Adadelta continues learning even when many updates - have been done. Compared to Adagrad, in the original version of Adadelta you - don't have to set an initial learning rate. In this version, the initial - learning rate can be set, as in most other Keras optimizers. - - Args: - learning_rate: Initial value for the learning rate: - either a floating point value, - or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. - Defaults to 0.001. - Note that `Adadelta` tends to benefit from higher initial learning rate - values compared to other optimizers. - To match the exact form in the original paper, use 1.0. - rho: A `Tensor` or a floating point value. The decay rate. - epsilon: Small floating point value used to maintain numerical stability. - name: Optional name prefix for the operations created when applying - gradients. Defaults to `"Adadelta"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Reference: - - [Zeiler, 2012](http://arxiv.org/abs/1212.5701) - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - rho=0.95, - epsilon=1e-7, - name='Adadelta', - **kwargs): - super().__init__(name, **kwargs) - self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) - self._set_hyper('decay', self._initial_decay) - self._set_hyper('rho', rho) - self.epsilon = epsilon or backend_config.epsilon() - - def _create_slots(self, var_list): - # Separate for-loops to respect the ordering of slot variables from v1. - for v in var_list: - self.add_slot(v, 'accum_grad') - for v in var_list: - self.add_slot(v, 'accum_var') - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - apply_state[(var_device, var_dtype)].update( - dict( - epsilon=tf.convert_to_tensor( - self.epsilon, var_dtype), - rho=tf.identity(self._get_hyper('rho', var_dtype)))) - - def set_weights(self, weights): - params = self.weights - # Override set_weights for backward compatibility of Keras V1 optimizer - # since it does not include iteration at head of the weight list. Set - # iteration to 0. - if len(params) == len(weights) + 1: - weights = [np.array(0)] + weights - super().set_weights(weights) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - accum_grad = self.get_slot(var, 'accum_grad') - accum_var = self.get_slot(var, 'accum_var') - return tf.raw_ops.ResourceApplyAdadelta( - var=var.handle, - accum=accum_grad.handle, - accum_update=accum_var.handle, - lr=coefficients['lr_t'], - rho=coefficients['rho'], - epsilon=coefficients['epsilon'], - grad=grad, - use_locking=self._use_locking) - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - accum_grad = self.get_slot(var, 'accum_grad') - accum_var = self.get_slot(var, 'accum_var') - return tf.raw_ops.ResourceSparseApplyAdadelta( - var=var.handle, - accum=accum_grad.handle, - accum_update=accum_var.handle, - lr=coefficients['lr_t'], - rho=coefficients['rho'], - epsilon=coefficients['epsilon'], - grad=grad, - indices=indices, - use_locking=self._use_locking) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': self._serialize_hyperparameter('learning_rate'), - 'decay': self._initial_decay, - 'rho': self._serialize_hyperparameter('rho'), - 'epsilon': self.epsilon, - }) - return config diff --git a/keras/optimizers/optimizer_v2/adadelta_test.py b/keras/optimizers/optimizer_v2/adadelta_test.py deleted file mode 100644 index db768532e3a5..000000000000 --- a/keras/optimizers/optimizer_v2/adadelta_test.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Adadelta Optimizer.""" - -import tensorflow.compat.v2 as tf - -from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations -from keras.optimizers.optimizer_v2 import adadelta - -_DATA_TYPES = [ - tf.half, tf.float32, tf.float64, tf.complex64, - tf.complex128 -] - - -class AdadeltaOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def doTestBasic(self, use_resource=False, use_callable_params=False): - num_updates = 4 # number of ADADELTA steps to perform - for dtype in _DATA_TYPES: - for grad in [0.2, 0.1, 0.01]: - for lr in [1.0, 0.5, 0.1]: - var0_init = [1.0, 2.0] - var1_init = [3.0, 4.0] - if use_resource: - var0 = tf.Variable(var0_init, dtype=dtype) - var1 = tf.Variable(var1_init, dtype=dtype) - else: - var0 = tf.Variable(var0_init, dtype=dtype) - var1 = tf.Variable(var1_init, dtype=dtype) - - grads = tf.constant([grad, grad], dtype=dtype) - - accum = 0.0 - accum_update = 0.0 - - # ADADELTA gradient optimizer - rho = 0.95 - epsilon = 1e-8 - if use_callable_params: - adadelta_opt = adadelta.Adadelta( - learning_rate=lambda: lr, # pylint: disable=cell-var-from-loop - rho=lambda: rho, # pylint: disable=cell-var-from-loop - epsilon=epsilon) # pylint: disable=cell-var-from-loop - else: - adadelta_opt = adadelta.Adadelta( - learning_rate=lr, rho=rho, epsilon=epsilon) - if not tf.executing_eagerly(): - adadelta_update = adadelta_opt.apply_gradients( - zip([grads, grads], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Assign slots - slot = [None] * 2 - slot_update = [None] * 2 - slot[0] = adadelta_opt.get_slot(var0, "accum_grad") - self.assertEqual(slot[0].shape, var0.shape) - - slot_update[0] = adadelta_opt.get_slot(var0, "accum_var") - self.assertEqual(slot_update[0].shape, var0.shape) - - slot[1] = adadelta_opt.get_slot(var1, "accum_grad") - self.assertEqual(slot[1].shape, var1.shape) - - slot_update[1] = adadelta_opt.get_slot(var1, "accum_var") - self.assertEqual(slot_update[1].shape, var1.shape) - - # Fetch params to validate initial values - self.assertAllClose(var0_init, self.evaluate(var0)) - self.assertAllClose(var1_init, self.evaluate(var1)) - - update = [None] * num_updates - tot_update = 0 - for step in range(num_updates): - # Run adadelta update for comparison - if not tf.executing_eagerly(): - self.evaluate(adadelta_update) - else: - adadelta_opt.apply_gradients(zip([grads, grads], [var0, var1])) - - # Perform initial update without previous accum values - accum = accum * rho + (grad**2) * (1 - rho) - update[step] = ( - np.sqrt(accum_update + epsilon) * - (1. / np.sqrt(accum + epsilon)) * grad) - accum_update = ( - accum_update * rho + (update[step]**2) * (1.0 - rho)) - tot_update += update[step] * lr - - if not tf.executing_eagerly(): - # Check that the accumulators have been updated - # TODO(lxuechen): This is hard to test in eager mode - for slot_idx in range(2): - self.assertAllCloseAccordingToType( - np.array([accum, accum], dtype=dtype.as_numpy_dtype(0)), - self.evaluate(slot[slot_idx]), - rtol=1e-5) - - self.assertAllCloseAccordingToType( - np.array( - [accum_update, accum_update], - dtype=dtype.as_numpy_dtype(0)), - self.evaluate(slot_update[slot_idx]), - rtol=1e-5) - - # Check that the parameters have been updated - self.assertAllCloseAccordingToType( - np.array( - [var0_init[0] - tot_update, var0_init[1] - tot_update], - dtype=dtype.as_numpy_dtype(0)), - self.evaluate(var0), - rtol=1e-5) - - self.assertAllCloseAccordingToType( - np.array( - [var1_init[0] - tot_update, var1_init[1] - tot_update], - dtype=dtype.as_numpy_dtype(0)), - self.evaluate(var1), - rtol=1e-5) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testResourceBasic(self): - self.doTestBasic(use_resource=True) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testBasicCallableParams(self): - self.doTestBasic(use_resource=True, use_callable_params=True) - - def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - def loss(): - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - return pred * pred - - sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize( - loss, var_list=[var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) - - def testConstructAdadeltaWithLR(self): - opt = adadelta.Adadelta(lr=1.0, rho=0.9, epsilon=1.) - opt_2 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1., lr=1.0) - opt_3 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1.) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - def testConstructAdadeltaWithEpsilonValues(self): - opt = adadelta.Adadelta(epsilon=None) - self.assertEqual(opt.epsilon, 1e-7) - - opt = adadelta.Adadelta(epsilon=1e-8) - self.assertEqual(opt.epsilon, 1e-8) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/adagrad.py b/keras/optimizers/optimizer_v2/adagrad.py deleted file mode 100644 index c1fe8dba563b..000000000000 --- a/keras/optimizers/optimizer_v2/adagrad.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adagrad optimizer implementation.""" - -import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes - -import numpy as np -from keras import backend_config -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.optimizers.Adagrad') -class Adagrad(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the Adagrad algorithm. - - Adagrad is an optimizer with parameter-specific learning rates, - which are adapted relative to how frequently a parameter gets - updated during training. The more updates a parameter receives, - the smaller the updates. - - Args: - learning_rate: Initial value for the learning rate: - either a floating point value, - or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. - Defaults to 0.001. - Note that `Adagrad` tends to benefit from higher initial learning rate - values compared to other optimizers. - To match the exact form in the original paper, use 1.0. - initial_accumulator_value: Floating point value. - Starting value for the accumulators (per-parameter momentum values). - Must be non-negative. - epsilon: Small floating point value used to maintain numerical stability. - name: Optional name prefix for the operations created when applying - gradients. Defaults to `"Adagrad"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value.. - - Reference: - - [Duchi et al., 2011]( - http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - initial_accumulator_value=0.1, - epsilon=1e-7, - name='Adagrad', - **kwargs): - if initial_accumulator_value < 0.0: - raise ValueError('initial_accumulator_value must be non-negative: %s' % - initial_accumulator_value) - if epsilon is None: - epsilon = backend_config.epsilon() - super().__init__(name, **kwargs) - self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) - self._set_hyper('decay', self._initial_decay) - self._initial_accumulator_value = initial_accumulator_value - self.epsilon = epsilon or backend_config.epsilon() - - def _create_slots(self, var_list): - for var in var_list: - dtype = var.dtype.base_dtype - init = tf.compat.v1.constant_initializer( - self._initial_accumulator_value, dtype=dtype) - self.add_slot(var, 'accumulator', init) - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - apply_state[(var_device, var_dtype)].update( - dict( - epsilon=tf.convert_to_tensor( - self.epsilon, var_dtype), - neg_lr_t=-apply_state[(var_device, var_dtype)]['lr_t'], - zero=tf.zeros((), dtype=tf.int64))) - - def set_weights(self, weights): - params = self.weights - # Override set_weights for backward compatibility of Keras V1 optimizer - # since it does not include iteration at head of the weight list. Set - # iteration to 0. - if len(params) == len(weights) + 1: - weights = [np.array(0)] + weights - super().set_weights(weights) - - @classmethod - def from_config(cls, config, custom_objects=None): - """Creates an optimizer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same optimizer from the config - dictionary. - - Args: - config: A Python dictionary, typically the output of get_config. - custom_objects: A Python dictionary mapping names to additional Python - objects used to create this optimizer, such as a function used for a - hyperparameter. - - Returns: - An optimizer instance. - """ - if 'initial_accumulator_value' not in config: - config['initial_accumulator_value'] = 0.1 - if 'lr' in config: - config['learning_rate'] = config.pop('lr') - return cls(**config) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - acc = self.get_slot(var, 'accumulator') - return tf.raw_ops.ResourceApplyAdagradV2( - var=var.handle, - accum=acc.handle, - lr=coefficients['lr_t'], - epsilon=coefficients['epsilon'], - grad=grad, - use_locking=self._use_locking) - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - acc = self.get_slot(var, 'accumulator') - return tf.raw_ops.ResourceSparseApplyAdagradV2( - var=var.handle, - accum=acc.handle, - lr=coefficients['lr_t'], - epsilon=coefficients['epsilon'], - grad=grad, - indices=indices, - use_locking=self._use_locking) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': self._serialize_hyperparameter('learning_rate'), - 'decay': self._initial_decay, - 'initial_accumulator_value': self._initial_accumulator_value, - 'epsilon': self.epsilon, - }) - return config diff --git a/keras/optimizers/optimizer_v2/adagrad_test.py b/keras/optimizers/optimizer_v2/adagrad_test.py deleted file mode 100644 index 7db5a0c19a07..000000000000 --- a/keras/optimizers/optimizer_v2/adagrad_test.py +++ /dev/null @@ -1,526 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional tests for aggregate operations.""" - -import tensorflow.compat.v2 as tf - -import copy - -from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations -from keras.optimizers.optimizer_v2 import adagrad -from keras.optimizers.schedules import learning_rate_schedule - -_DATA_TYPES = [ - tf.half, tf.float32, tf.float64, tf.complex64, - tf.complex128 -] - - -def adagrad_update_numpy(param, accum, g_t, lr=0.001, epsilon=1e-7): - accum_t = accum + g_t * g_t - param_t = param - lr * g_t / (np.sqrt(accum_t) + epsilon) - return param_t, accum_t - - -def sparse_adagrad_update_numpy(param, - accum, - gindexs, - gvalues, - lr=0.001, - epsilon=1e-7): - accum_t = copy.deepcopy(accum) - param_t = copy.deepcopy(param) - # first loop accumulates repeated indices if necessary. - for i in range(len(gindexs)): - gindex = gindexs[i] - gvalue = gvalues[i] - accum_t[gindex] = accum_t[gindex] + gvalue * gvalue - for i in range(len(gindexs)): - gindex = gindexs[i] - gvalue = gvalues[i] - param_t[gindex] = param_t[gindex] - lr * gvalue / ( - np.sqrt(accum_t[gindex]) + epsilon) - return param_t, accum_t - - -class AdagradOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def doTestBasic(self, use_callable_params=False): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = lambda: 3.0 - if not use_callable_params: - learning_rate = learning_rate() - - ada_opt = adagrad.Adagrad(learning_rate) - - accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - - if not tf.executing_eagerly(): - ada_update = ada_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllClose([1.0, 2.0], v0_val) - self.assertAllClose([3.0, 4.0], v1_val) - - # Run 3 steps of adagrad - for _ in range(3): - if not tf.executing_eagerly(): - self.evaluate(ada_update) - else: - ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, - 3.0) - var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, - 3.0) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasic(self): - self.doTestBasic() - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testBasicCallableParams(self): - self.doTestBasic(use_callable_params=True) - - def testBasicWithLearningRateDecay(self): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 3.0 - decay = 0.5 - - ada_opt = adagrad.Adagrad(learning_rate, decay=decay) - - accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - - if not tf.executing_eagerly(): - ada_update = ada_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllClose([1.0, 2.0], v0_val) - self.assertAllClose([3.0, 4.0], v1_val) - - # Run 3 steps of adagrad - for t in range(3): - if not tf.executing_eagerly(): - self.evaluate(ada_update) - else: - ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - lr_np = learning_rate / (1 + decay * t) - var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, - lr_np) - var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, - lr_np) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testBasicWithLargeEpsilon(self): - var0_np = np.array([1.0, 2.0]) - var1_np = np.array([3.0, 4.0]) - grads0_np = np.array([0.1, 0.1]) - grads1_np = np.array([0.01, 0.01]) - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 3.0 - - ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) - - accum0_np = np.array([0.1, 0.1]) - accum1_np = np.array([0.1, 0.1]) - - if not tf.executing_eagerly(): - ada_update = ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllClose([1.0, 2.0], v0_val) - self.assertAllClose([3.0, 4.0], v1_val) - - # Run 3 steps of adagrad - for _ in range(3): - if not tf.executing_eagerly(): - self.evaluate(ada_update) - else: - ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, - 3.0, 1.0) - var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, - 3.0, 1.0) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testBasicWithLearningRateInverseTimeDecay(self): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 3.0 - decay = 0.5 - lr_schedule = learning_rate_schedule.InverseTimeDecay( - learning_rate, decay_steps=1.0, decay_rate=decay) - - ada_opt = adagrad.Adagrad(lr_schedule) - - accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - - if not tf.executing_eagerly(): - ada_update = ada_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllClose([1.0, 2.0], v0_val) - self.assertAllClose([3.0, 4.0], v1_val) - - # Run 3 steps of adagrad - for t in range(3): - if not tf.executing_eagerly(): - self.evaluate(ada_update) - else: - ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - lr_np = learning_rate / (1 + decay * t) - var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, - lr_np) - var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, - lr_np) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - def loss(): - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - return pred * pred - - sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], - self.evaluate(var0)) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[0, 1], [3, 4]], - self.evaluate(var0), - atol=0.01) - - def testTensorLearningRate(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = tf.constant(3.0) - ada_opt = adagrad.Adagrad(learning_rate) - ada_update = ada_opt.apply_gradients(zip([grads0, grads1], - [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - # Run 3 steps of adagrad - for _ in range(3): - self.evaluate(ada_update) - var0_np, accum0_np = adagrad_update_numpy( - var0_np, accum0_np, grads0_np, learning_rate) - var1_np, accum1_np = adagrad_update_numpy( - var1_np, accum1_np, grads1_np, learning_rate) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testSparseBasic(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0_np_indices = np.array([0, 2], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np[grads0_np_indices]), - tf.constant(grads0_np_indices), tf.constant([3])) - grads1_np_indices = np.array([0, 2], dtype=np.int32) - grads1 = tf.IndexedSlices( - tf.constant(grads1_np[grads1_np_indices]), - tf.constant(grads1_np_indices), tf.constant([3])) - learning_rate = 3.0 - ada_opt = adagrad.Adagrad(learning_rate) - ada_update = ada_opt.apply_gradients(zip([grads0, grads1], - [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) - - accum0_np = np.array([0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype) - - # Run 3 step of sgd - for _ in range(3): - self.evaluate(ada_update) - - var0_np, accum0_np = sparse_adagrad_update_numpy( - var0_np, accum0_np, grads0_np_indices, - grads0_np[grads0_np_indices], learning_rate) - var1_np, accum1_np = sparse_adagrad_update_numpy( - var1_np, accum1_np, grads1_np_indices, - grads1_np[grads1_np_indices], learning_rate) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testSparseSingleVarDim(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - grads0_np_indices = np.array([0], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np[grads0_np_indices]), - tf.constant(grads0_np_indices), tf.constant([3])) - learning_rate = 3.0 - ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.) - ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0], self.evaluate(var0)) - - accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) - - # Run 3 step of sgd - for _ in range(3): - self.evaluate(ada_update) - - var0_np, accum0_np = sparse_adagrad_update_numpy( - var0_np, - accum0_np, - grads0_np_indices, - grads0_np[grads0_np_indices], - learning_rate, - epsilon=1.) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - - def testSparseRepeatedIndices(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) - - repeated_index_update_var = tf.Variable( - var_np, dtype=dtype) - aggregated_update_var = tf.Variable( - var_np, dtype=dtype) - grad_repeated_index = tf.IndexedSlices( - tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), - tf.constant([1, 1]), tf.constant([2, 1])) - grad_aggregated = tf.IndexedSlices( - tf.constant([0.2], shape=[1, 1], dtype=dtype), - tf.constant([1]), tf.constant([2, 1])) - repeated_update = adagrad.Adagrad(3.0).apply_gradients([ - (grad_repeated_index, repeated_index_update_var) - ]) - aggregated_update = adagrad.Adagrad(3.0).apply_gradients([ - (grad_aggregated, aggregated_update_var) - ]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose( - self.evaluate(aggregated_update_var), - self.evaluate(repeated_index_update_var)) - for _ in range(3): - self.evaluate(repeated_update) - self.evaluate(aggregated_update) - self.assertAllClose( - self.evaluate(aggregated_update_var), - self.evaluate(repeated_index_update_var)) - - def testSparseRepeatedIndicesByEmbeddingLookUp(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var_repeated = tf.Variable([1.0, 2.0], dtype=dtype) - loss_repeated = lambda: tf.reduce_sum( # pylint: disable=g-long-lambda - tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0])) # pylint: disable=cell-var-from-loop - var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype) - loss_aggregated = lambda: 2 * tf.reduce_sum( # pylint: disable=g-long-lambda - tf.compat.v1.nn.embedding_lookup(var_aggregated, [0])) # pylint: disable=cell-var-from-loop - update_op_repeated = adagrad.Adagrad(2.0).minimize( - loss_repeated, var_list=[var_repeated]) - update_op_aggregated = adagrad.Adagrad(2.0).minimize( - loss_aggregated, var_list=[var_aggregated]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllCloseAccordingToType( - self.evaluate(var_repeated), self.evaluate(var_aggregated)) - for _ in range(3): - self.evaluate(update_op_repeated) - self.evaluate(update_op_aggregated) - self.assertAllCloseAccordingToType( - self.evaluate(var_repeated), self.evaluate(var_aggregated)) - - def testSparseStability(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half]: - shape = [1, 6] - var0_np = np.array([[0.00872496, -0.106952, 0.110467, - 0.226505, -0.0147257, -0.0105945]], - dtype=dtype.as_numpy_dtype) - var0 = tf.Variable(var0_np) - grads0_np = np.array([[ - -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, - -9.48906e-05 - ]], - dtype=dtype.as_numpy_dtype) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np), tf.constant([0]), - tf.constant(shape)) - ada_opt = adagrad.Adagrad(1.0) - ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) - slot0 = ada_opt.get_slot(var0, "accumulator") - init = tf.compat.v1.global_variables_initializer() - for _ in range(100): - self.evaluate(init) - self.evaluate(ada_update) - self.assertAllCloseAccordingToType( - np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([[ - 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, - -0.01029443 - ]]), self.evaluate(var0)) - - def testSharing(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 3.0 - ada_opt = adagrad.Adagrad(learning_rate) - # Apply the optimizer twice. Both applications will use - # the same accums. - ada_update1 = ada_opt.apply_gradients(zip([grads0, grads1], - [var0, var1])) - ada_update2 = ada_opt.apply_gradients(zip([grads0, grads1], - [var0, var1])) - slot0 = ada_opt.get_slot(var0, "accumulator") - self.assertEqual(slot0.shape, var0.shape) - slot1 = ada_opt.get_slot(var1, "accumulator") - self.assertEqual(slot1.shape, var1.shape) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values. - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Mix the first and the second adagrad for 3 steps. - self.evaluate(ada_update1) - self.evaluate(ada_update2) - self.evaluate(ada_update1) - - accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - for _ in range(3): - var0_np, accum0_np = adagrad_update_numpy( - var0_np, accum0_np, grads0_np, learning_rate) - var1_np, accum1_np = adagrad_update_numpy( - var1_np, accum1_np, grads1_np, learning_rate) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testConstructAdagradWithLR(self): - opt = adagrad.Adagrad(lr=1.0) - opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0) - opt_3 = adagrad.Adagrad(learning_rate=0.1) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/adam.py b/keras/optimizers/optimizer_v2/adam.py deleted file mode 100644 index b96bd69c499d..000000000000 --- a/keras/optimizers/optimizer_v2/adam.py +++ /dev/null @@ -1,472 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adam optimizer implementation.""" - -import tensorflow.compat.v2 as tf -from keras import backend_config -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.optimizers.Adam') -class Adam(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the Adam algorithm. - - Adam optimization is a stochastic gradient descent method that is based on - adaptive estimation of first-order and second-order moments. - - According to - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), - the method is "*computationally - efficient, has little memory requirement, invariant to diagonal rescaling of - gradients, and is well suited for problems that are large in terms of - data/parameters*". - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use, The - learning rate. Defaults to 0.001. - beta_1: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use. The - exponential decay rate for the 1st moment estimates. Defaults to 0.9. - beta_2: A float value or a constant float tensor, or a callable - that takes no arguments and returns the actual value to use, The - exponential decay rate for the 2nd moment estimates. Defaults to 0.999. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". Defaults to `False`. - name: Optional name for the operations created when applying gradients. - Defaults to `"Adam"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Usage: - - >>> opt = tf.keras.optimizers.Adam(learning_rate=0.1) - >>> var1 = tf.Variable(10.0) - >>> loss = lambda: (var1 ** 2)/2.0 # d(loss)/d(var1) == var1 - >>> step_count = opt.minimize(loss, [var1]).numpy() - >>> # The first step is `-learning_rate*sign(grad)` - >>> var1.numpy() - 9.9 - - Reference: - - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) - - [Reddi et al., 2018]( - https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. - - Notes: - - The default value of 1e-7 for epsilon might not be a good default in - general. For example, when training an Inception network on ImageNet a - current good choice is 1.0 or 0.1. Note that since Adam uses the - formulation just before Section 2.1 of the Kingma and Ba paper rather than - the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon - hat" in the paper. - - The sparse implementation of this algorithm (used when the gradient is an - IndexedSlices object, typically because of `tf.gather` or an embedding - lookup in the forward pass) does apply momentum to variable slices even if - they were not used in the forward pass (meaning they have a gradient equal - to zero). Momentum decay (beta1) is also applied to the entire momentum - accumulator. This means that the sparse behavior is equivalent to the dense - behavior (in contrast to some momentum implementations which ignore momentum - unless a variable slice was actually used). - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - amsgrad=False, - name='Adam', - **kwargs): - super().__init__(name, **kwargs) - self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) - self._set_hyper('decay', self._initial_decay) - self._set_hyper('beta_1', beta_1) - self._set_hyper('beta_2', beta_2) - self.epsilon = epsilon or backend_config.epsilon() - self.amsgrad = amsgrad - - def _create_slots(self, var_list): - # Create slots for the first and second moments. - # Separate for-loops to respect the ordering of slot variables from v1. - for var in var_list: - self.add_slot(var, 'm') - for var in var_list: - self.add_slot(var, 'v') - if self.amsgrad: - for var in var_list: - self.add_slot(var, 'vhat') - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - - local_step = tf.cast(self.iterations + 1, var_dtype) - beta_1_t = tf.identity(self._get_hyper('beta_1', var_dtype)) - beta_2_t = tf.identity(self._get_hyper('beta_2', var_dtype)) - beta_1_power = tf.pow(beta_1_t, local_step) - beta_2_power = tf.pow(beta_2_t, local_step) - lr = (apply_state[(var_device, var_dtype)]['lr_t'] * - (tf.sqrt(1 - beta_2_power) / (1 - beta_1_power))) - apply_state[(var_device, var_dtype)].update( - dict( - lr=lr, - epsilon=tf.convert_to_tensor( - self.epsilon, var_dtype), - beta_1_t=beta_1_t, - beta_1_power=beta_1_power, - one_minus_beta_1_t=1 - beta_1_t, - beta_2_t=beta_2_t, - beta_2_power=beta_2_power, - one_minus_beta_2_t=1 - beta_2_t)) - - def set_weights(self, weights): - params = self.weights - # If the weights are generated by Keras V1 optimizer, it includes vhats - # even without amsgrad, i.e, V1 optimizer has 3x + 1 variables, while V2 - # optimizer has 2x + 1 variables. Filter vhats out for compatibility. - num_vars = int((len(params) - 1) / 2) - if len(weights) == 3 * num_vars + 1: - weights = weights[:len(params)] - super().set_weights(weights) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - m = self.get_slot(var, 'm') - v = self.get_slot(var, 'v') - - if not self.amsgrad: - return tf.raw_ops.ResourceApplyAdam( - var=var.handle, - m=m.handle, - v=v.handle, - beta1_power=coefficients['beta_1_power'], - beta2_power=coefficients['beta_2_power'], - lr=coefficients['lr_t'], - beta1=coefficients['beta_1_t'], - beta2=coefficients['beta_2_t'], - epsilon=coefficients['epsilon'], - grad=grad, - use_locking=self._use_locking) - else: - vhat = self.get_slot(var, 'vhat') - return tf.raw_ops.ResourceApplyAdamWithAmsgrad( - var=var.handle, - m=m.handle, - v=v.handle, - vhat=vhat.handle, - beta1_power=coefficients['beta_1_power'], - beta2_power=coefficients['beta_2_power'], - lr=coefficients['lr_t'], - beta1=coefficients['beta_1_t'], - beta2=coefficients['beta_2_t'], - epsilon=coefficients['epsilon'], - grad=grad, - use_locking=self._use_locking) - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - # m_t = beta1 * m + (1 - beta1) * g_t - m = self.get_slot(var, 'm') - m_scaled_g_values = grad * coefficients['one_minus_beta_1_t'] - m_t = tf.compat.v1.assign(m, m * coefficients['beta_1_t'], - use_locking=self._use_locking) - with tf.control_dependencies([m_t]): - m_t = self._resource_scatter_add(m, indices, m_scaled_g_values) - - # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) - v = self.get_slot(var, 'v') - v_scaled_g_values = (grad * grad) * coefficients['one_minus_beta_2_t'] - v_t = tf.compat.v1.assign(v, v * coefficients['beta_2_t'], - use_locking=self._use_locking) - with tf.control_dependencies([v_t]): - v_t = self._resource_scatter_add(v, indices, v_scaled_g_values) - - if not self.amsgrad: - v_sqrt = tf.sqrt(v_t) - var_update = tf.compat.v1.assign_sub( - var, coefficients['lr'] * m_t / (v_sqrt + coefficients['epsilon']), - use_locking=self._use_locking) - return tf.group(*[var_update, m_t, v_t]) - else: - v_hat = self.get_slot(var, 'vhat') - v_hat_t = tf.maximum(v_hat, v_t) - with tf.control_dependencies([v_hat_t]): - v_hat_t = tf.compat.v1.assign( - v_hat, v_hat_t, use_locking=self._use_locking) - v_hat_sqrt = tf.sqrt(v_hat_t) - var_update = tf.compat.v1.assign_sub( - var, - coefficients['lr'] * m_t / (v_hat_sqrt + coefficients['epsilon']), - use_locking=self._use_locking) - return tf.group(*[var_update, m_t, v_t, v_hat_t]) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': self._serialize_hyperparameter('learning_rate'), - 'decay': self._initial_decay, - 'beta_1': self._serialize_hyperparameter('beta_1'), - 'beta_2': self._serialize_hyperparameter('beta_2'), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad, - }) - return config - - -class NonFusedAdam(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the Adam algorithm without fused kernels. - - Adam optimization is a stochastic gradient descent method that is based on - adaptive estimation of first-order and second-order moments. - According to the paper - [Adam: A Method for Stochastic Optimization. Kingma et al., - 2014](http://arxiv.org/abs/1412.6980), the method is "*computationally - efficient, has little memory requirement, invariant to diagonal rescaling of - gradients, and is well suited for problems that are large in terms of - data/parameters*". - - For AMSGrad see [On The Convergence Of Adam And Beyond. - Reddi et al., 5-8](https://openreview.net/pdf?id=ryQu7f-RZ). - - **If amsgrad = False**: - - initialize $m_0$ as 1st moment vector - initialize $v_0$ as 2nd moment vector - - The update rule for $\theta$ with gradient $g$ uses an optimization - described at the end of section 2 of the paper: - - $$lr_t = \mathrm{learning\_rate} * - \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$ - $$m_t = \beta_1 * m_{t-1} + (1 - \beta_1) * g$$ - $$v_t = \beta_2 * v_{t-1} + (1 - \beta_2) * g^2$$ - $$\theta_t = \theta_{t-1} - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ - - **If amsgrad = True**: - - initialize $m_0$ as 1st moment vector - initialize $v_0$ as 2nd moment vector - initialize $\hat{v}_0$ as 2nd moment vector - - The update rule for $\theta$ with gradient $g$ uses an optimization - described at the end of section 2 of the paper: - - $$lr_t = \mathrm{learning\_rate} * - \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$ - - $$m_t = \beta_1 * m_{t-1} + (1 - \beta_1) * g$$ - $$v_t = \beta_2 * v_{t-1} + (1 - \beta_2) * g^2$$ - $$\hat{v}_t = \max(\hat{v}_{t-1}, v_t)$$ - $$\theta_t = \theta_{t-1} - lr_t * m_t / (\sqrt{\hat{v}_t} + \epsilon)$$ - - The default value of 1e-7 for epsilon might not be a good default in - general. For example, when training an Inception network on ImageNet a - current good choice is 1.0 or 0.1. Note that since Adam uses the - formulation just before Section 2.1 of the Kingma and Ba paper rather than - the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon - hat" in the paper. - - The sparse implementation of this algorithm (used when the gradient is an - IndexedSlices object, typically because of `tf.gather` or an embedding - lookup in the forward pass) does apply momentum to variable slices even if - they were not used in the forward pass (meaning they have a gradient equal - to zero). Momentum decay (beta1) is also applied to the entire momentum - accumulator. This means that the sparse behavior is equivalent to the dense - behavior (in contrast to some momentum implementations which ignore momentum - unless a variable slice was actually used). - - Usage: - - >>> opt = tf.keras.optimizers.Adam(learning_rate=0.1) - >>> var1 = tf.Variable(10.0) - >>> loss = lambda: (var1 ** 2)/2.0 # d(loss)/d(var1) == var1 - >>> step_count = opt.minimize(loss, [var1]).numpy() - >>> # The first step is `-learning_rate*sign(grad)` - >>> var1.numpy() - 9.9 - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - amsgrad=False, - name='Adam', - **kwargs): - """Construct a new Adam optimizer. - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable that - takes no arguments and returns the actual value to use, The learning - rate. Defaults to 0.001. - beta_1: A float value or a constant float tensor, or a callable that takes - no arguments and returns the actual value to use. The exponential decay - rate for the 1st moment estimates. Defaults to 0.9. - beta_2: A float value or a constant float tensor, or a callable that takes - no arguments and returns the actual value to use, The exponential decay - rate for the 2nd moment estimates. Defaults to 0.999. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". Defaults to `False`. - name: Optional name for the operations created when applying gradients. - Defaults to "Adam". - **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, - `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip - gradients by value, `decay` is included for backward compatibility to - allow time inverse decay of learning rate. `lr` is included for backward - compatibility, recommended to use `learning_rate` instead. - """ - - super().__init__(name, **kwargs) - self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) - self._set_hyper('decay', self._initial_decay) - self._set_hyper('beta_1', beta_1) - self._set_hyper('beta_2', beta_2) - self.epsilon = epsilon or backend_config.epsilon() - self.amsgrad = amsgrad - - def _create_slots(self, var_list): - # Create slots for the first and second moments. - # Separate for-loops to respect the ordering of slot variables from v1. - for var in var_list: - self.add_slot(var, 'm') - for var in var_list: - self.add_slot(var, 'v') - if self.amsgrad: - for var in var_list: - self.add_slot(var, 'vhat') - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - - local_step = tf.cast(self.iterations + 1, var_dtype) - beta_1_t = tf.identity(self._get_hyper('beta_1', var_dtype)) - beta_2_t = tf.identity(self._get_hyper('beta_2', var_dtype)) - beta_1_power = tf.pow(beta_1_t, local_step) - beta_2_power = tf.pow(beta_2_t, local_step) - lr = ( - apply_state[(var_device, var_dtype)]['lr_t'] * - (tf.sqrt(1 - beta_2_power) / (1 - beta_1_power))) - apply_state[(var_device, var_dtype)].update( - dict( - lr=lr, - epsilon=tf.convert_to_tensor( - self.epsilon, var_dtype), - beta_1_t=beta_1_t, - beta_1_power=beta_1_power, - one_minus_beta_1_t=1 - beta_1_t, - beta_2_t=beta_2_t, - beta_2_power=beta_2_power, - one_minus_beta_2_t=1 - beta_2_t)) - - def set_weights(self, weights): - params = self.weights - # If the weights are generated by Keras V1 optimizer, it includes vhats - # even without amsgrad, i.e, V1 optimizer has 3x + 1 variables, while V2 - # optimizer has 2x + 1 variables. Filter vhats out for compatibility. - num_vars = int((len(params) - 1) / 2) - if len(weights) == 3 * num_vars + 1: - weights = weights[:len(params)] - super().set_weights(weights) - - @tf.function(jit_compile=True) - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) or - self._fallback_apply_state(var_device, var_dtype)) - - m = self.get_slot(var, 'm') - v = self.get_slot(var, 'v') - - alpha = ( - coefficients['lr_t'] * tf.sqrt(1 - coefficients['beta_2_power']) / - (1 - coefficients['beta_1_power'])) - m.assign_add((grad - m) * (1 - coefficients['beta_1_t'])) - v.assign_add((tf.square(grad) - v) * (1 - coefficients['beta_2_t'])) - if self.amsgrad: - vhat = self.get_slot(var, 'vhat') - vhat.assign(tf.maximum(vhat, v)) - v = vhat - var.assign_sub( - (m * alpha) / (tf.sqrt(v) - coefficients['epsilon'])) - - @tf.function(jit_compile=True) - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) or - self._fallback_apply_state(var_device, var_dtype)) - - # m_t = beta1 * m + (1 - beta1) * g_t - m = self.get_slot(var, 'm') - m_scaled_g_values = grad * coefficients['one_minus_beta_1_t'] - m.assign(m * coefficients['beta_1_t']) - m.scatter_add(tf.IndexedSlices(m_scaled_g_values, indices)) - - # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) - v = self.get_slot(var, 'v') - v_scaled_g_values = (grad * grad) * coefficients['one_minus_beta_2_t'] - v.assign(v * coefficients['beta_2_t']) - v.scatter_add(tf.IndexedSlices(v_scaled_g_values, indices)) - - if not self.amsgrad: - var.assign_sub(coefficients['lr'] * m / - (tf.sqrt(v) + coefficients['epsilon'])) - else: - v_hat = self.get_slot(var, 'vhat') - v_hat.assign(tf.maximum(v_hat, v)) - var.assign_sub(coefficients['lr'] * m / - (tf.sqrt(v_hat) + coefficients['epsilon'])) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': self._serialize_hyperparameter('learning_rate'), - 'decay': self._initial_decay, - 'beta_1': self._serialize_hyperparameter('beta_1'), - 'beta_2': self._serialize_hyperparameter('beta_2'), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad, - }) - return config diff --git a/keras/optimizers/optimizer_v2/adam_test.py b/keras/optimizers/optimizer_v2/adam_test.py deleted file mode 100644 index 6384fa109596..000000000000 --- a/keras/optimizers/optimizer_v2/adam_test.py +++ /dev/null @@ -1,981 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Adam.""" - -import tensorflow.compat.v2 as tf - -from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations -from keras.optimizers import optimizer_v1 -from keras.optimizers.optimizer_v2 import adam -from keras.optimizers.schedules import learning_rate_schedule - - -def adam_update_numpy(param, - g_t, - t, - m, - v, - lr=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-7): - lr_t = lr * np.sqrt(1 - beta2**(t + 1)) / (1 - beta1**(t + 1)) - - m_t = beta1 * m + (1 - beta1) * g_t - v_t = beta2 * v + (1 - beta2) * g_t * g_t - - param_t = param - lr_t * m_t / (np.sqrt(v_t) + epsilon) - return param_t, m_t, v_t - - -def adam_update_numpy_amsgrad(param, - g_t, - t, - m, - v, - vhat, - lr=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-7): - lr_t = lr * np.sqrt(1 - beta2**(t + 1)) / (1 - beta1**(t + 1)) - - m_t = beta1 * m + (1 - beta1) * g_t - v_t = beta2 * v + (1 - beta2) * g_t * g_t - vhat_t = np.maximum(vhat, v_t) - - param_t = param - lr_t * m_t / (np.sqrt(vhat_t) + epsilon) - return param_t, m_t, v_t, vhat_t - - -def adam_sparse_update_numpy_amsgrad(param, - indices, - g_t, - t, - m, - v, - vhat, - lr=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-7): - m_t, v_t, vhat_t, param_t = (np.copy(m), np.copy(v), np.copy(vhat), - np.copy(param)) - lr_t = lr * np.sqrt(1 - beta2**(t + 1)) / (1 - beta1**(t + 1)) - m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t - v_t_slice = beta2 * v[indices] + (1 - beta2) * g_t * g_t - m_t[indices] = m_t_slice - v_t[indices] = v_t_slice - v_hat_t = np.maximum(vhat_t, v_t) - v_hat_t_slice = v_hat_t[indices] - param_t_slice = param[indices] - ( - lr_t * (m_t_slice / (np.sqrt(v_hat_t_slice) + epsilon))) - param_t[indices] = param_t_slice - return param_t, m_t, v_t, vhat_t - - -def get_beta_accumulators(opt, dtype): - local_step = tf.cast(opt.iterations + 1, dtype) - beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype) - beta_1_power = tf.pow(beta_1_t, local_step) - beta_2_t = tf.cast(opt._get_hyper("beta_2"), dtype) - beta_2_power = tf.pow(beta_2_t, local_step) - return (beta_1_power, beta_2_power) - - -class AdamOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def testSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0_np_indices = np.array([0, 2], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np[grads0_np_indices]), - tf.constant(grads0_np_indices), tf.constant([3])) - grads1_np_indices = np.array([0, 2], dtype=np.int32) - grads1 = tf.IndexedSlices( - tf.constant(grads1_np[grads1_np_indices]), - tf.constant(grads1_np_indices), tf.constant([3])) - opt = adam.Adam() - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) - - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - # Run 3 steps of Adam - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - update.run() - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testSparseDevicePlacement(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for index_dtype in [tf.int32, tf.int64]: - with tf.Graph().as_default(), self.cached_session( - force_gpu=tf.test.is_gpu_available()): - # If a GPU is available, tests that all optimizer ops can be placed on - # it (i.e. they have GPU kernels). - var = tf.Variable([[1.0], [2.0]]) - indices = tf.constant([0, 1], dtype=index_dtype) - g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) # pylint: disable=cell-var-from-loop - optimizer = adam.Adam(3.0) - minimize_op = optimizer.minimize(g_sum, var_list=[var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - minimize_op.run() - - def testSparseRepeatedIndices(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - repeated_index_update_var = tf.Variable( - [[1.0], [2.0]], dtype=dtype) - aggregated_update_var = tf.Variable( - [[1.0], [2.0]], dtype=dtype) - grad_repeated_index = tf.IndexedSlices( - tf.constant( - [0.1, 0.1], shape=[2, 1], dtype=dtype), - tf.constant([1, 1]), - tf.constant([2, 1])) - grad_aggregated = tf.IndexedSlices( - tf.constant( - [0.2], shape=[1, 1], dtype=dtype), - tf.constant([1]), - tf.constant([2, 1])) - repeated_update = adam.Adam().apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - aggregated_update = adam.Adam().apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(aggregated_update_var, - self.evaluate(repeated_index_update_var)) - for _ in range(3): - repeated_update.run() - aggregated_update.run() - self.assertAllClose(aggregated_update_var, - self.evaluate(repeated_index_update_var)) - - def doTestBasic(self, use_callable_params=False): - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = lambda: 0.001 - beta1 = lambda: 0.9 - beta2 = lambda: 0.999 - epsilon = lambda: 1e-8 - if not use_callable_params: - learning_rate = learning_rate() - beta1 = beta1() - beta2 = beta2() - epsilon = epsilon() - - opt = adam.Adam(learning_rate=learning_rate) - if not tf.executing_eagerly(): - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of Adam - for t in range(3): - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - if not tf.executing_eagerly(): - self.evaluate(update) - else: - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testResourceBasic(self): - self.doTestBasic() - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testBasicCallableParams(self): - self.doTestBasic(use_callable_params=True) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicWithAmsgrad(self): - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - opt = adam.Adam(amsgrad=True) - if not tf.executing_eagerly(): - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of Adam - for t in range(3): - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - if not tf.executing_eagerly(): - self.evaluate(update) - else: - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - var0_np, m0, v0, v0hat = adam_update_numpy_amsgrad( - var0_np, grads0_np, t, m0, v0, v0hat) - var1_np, m1, v1, v1hat = adam_update_numpy_amsgrad( - var1_np, grads1_np, t, m1, v1, v1hat) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testSparseWithAmsgrad(self): - # dtypes.half does not work on gpu + eager. - for dtype in [tf.float32, tf.float64]: - with self.cached_session(): - m0 = np.array([[0.0], [0.0]]) - v0 = np.array([[0.0], [0.0]]) - v0hat = np.array([[0.0], [0.0]]) - indices_np = np.array([1]) - indices = tf.constant(indices_np, dtype=tf.int32) - var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) - repeated_index_update_var = tf.Variable(var0_np, dtype=dtype) - aggregated_update_var = tf.Variable(var0_np, dtype=dtype) - grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype) - grad_repeated_index = tf.IndexedSlices( - tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), - tf.constant([1, 1]), tf.constant([2, 1])) - grad_aggregated = tf.IndexedSlices(grads0_np, indices, - tf.constant([2, 1])) - opt_repeated = adam.Adam(amsgrad=True) - opt_aggregated = adam.Adam(amsgrad=True) - if not tf.executing_eagerly(): - repeated_update = opt_repeated.apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - aggregated_update = opt_aggregated.apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose( - self.evaluate(aggregated_update_var), - self.evaluate(repeated_index_update_var)) - for t in range(3): - if not tf.executing_eagerly(): - self.evaluate(repeated_update) - self.evaluate(aggregated_update) - else: - opt_repeated.apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - opt_aggregated.apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - - var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad( - var0_np, indices_np, grads0_np, t, m0, v0, v0hat) - - # Validate updated params - self.assertAllCloseAccordingToType( - var0_np, self.evaluate(aggregated_update_var)) - self.assertAllCloseAccordingToType( - self.evaluate(aggregated_update_var), - self.evaluate(repeated_index_update_var)) - - def testBasicWithLearningRateDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 0.001 - beta_1 = 0.9 - beta_2 = 0.999 - epsilon = 1e-7 - decay = 0.5 - - opt = adam.Adam( - learning_rate=learning_rate, - beta_1=beta_1, - beta_2=beta_2, - epsilon=epsilon, - decay=decay) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of Adam - for t in range(3): - self.evaluate(update) - lr_np = learning_rate / (1 + decay * t) - - var0_np, m0, v0 = adam_update_numpy( - var0_np, grads0_np, t, m0, v0, lr=lr_np) - var1_np, m1, v1 = adam_update_numpy( - var1_np, grads1_np, t, m1, v1, lr=lr_np) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testBasicWithLearningRateInverseTimeDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 0.001 - decay = 0.5 - lr_schedule = learning_rate_schedule.InverseTimeDecay( - learning_rate, decay_steps=1.0, decay_rate=decay) - beta_1 = 0.9 - beta_2 = 0.999 - epsilon = 1e-7 - - opt = adam.Adam( - learning_rate=lr_schedule, - beta_1=beta_1, - beta_2=beta_2, - epsilon=epsilon) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of Adam - for t in range(3): - self.evaluate(update) - - lr_np = learning_rate / (1 + decay * t) - - var0_np, m0, v0 = adam_update_numpy( - var0_np, grads0_np, t, m0, v0, lr=lr_np) - var1_np, m1, v1 = adam_update_numpy( - var1_np, grads1_np, t, m1, v1, lr=lr_np) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testTensorLearningRate(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = adam.Adam(tf.constant(0.001)) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - # Run 3 steps of Adam - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - update.run() - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testSharing(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = adam.Adam() - update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 3 steps of intertwined Adam1 and Adam2. - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - if t % 2 == 0: - update1.run() - else: - update2.run() - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testSlotsUniqueEager(self): - v1 = tf.Variable(1.) - v2 = tf.Variable(1.) - opt = adam.Adam(1.) - opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) - # There should be iteration, and two unique slot variables for v1 and v2. - self.assertLen(set(v.ref() for v in opt.variables()), 5) - self.assertEqual( - self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) - - def testSetWeightsFromV1AdamWithoutMinimize(self): - keras_v1_adam = optimizer_v1.Adam() - keras_v2_adam = adam.Adam() - keras_v2_adam.set_weights(keras_v1_adam.get_weights()) - keras_v1_iteration = keras_v1_adam.iterations - keras_v2_iteration = keras_v2_adam.iterations - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual( - self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration)) - - def testConstructAdamWithLR(self): - opt = adam.Adam(lr=1.0) - opt_2 = adam.Adam(learning_rate=0.1, lr=1.0) - opt_3 = adam.Adam(learning_rate=0.1) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - -class NonFusedAdamOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def testSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0_np_indices = np.array([0, 2], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np[grads0_np_indices]), - tf.constant(grads0_np_indices), tf.constant([3])) - grads1_np_indices = np.array([0, 2], dtype=np.int32) - grads1 = tf.IndexedSlices( - tf.constant(grads1_np[grads1_np_indices]), - tf.constant(grads1_np_indices), tf.constant([3])) - opt = adam.NonFusedAdam() - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) - - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - # Run 3 steps of NonFusedAdam - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - update.run() - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testSparseDevicePlacement(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for index_dtype in [tf.int32, tf.int64]: - with tf.Graph().as_default(), self.cached_session( - force_gpu=tf.test.is_gpu_available()): - # If a GPU is available, tests that all optimizer ops can be placed on - # it (i.e. they have GPU kernels). - var = tf.Variable([[1.0], [2.0]]) - indices = tf.constant([0, 1], dtype=index_dtype) - g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) # pylint: disable=cell-var-from-loop - optimizer = adam.NonFusedAdam(3.0) - minimize_op = optimizer.minimize(g_sum, var_list=[var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - minimize_op.run() - - def testSparseRepeatedIndices(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - repeated_index_update_var = tf.Variable( - [[1.0], [2.0]], dtype=dtype) - aggregated_update_var = tf.Variable( - [[1.0], [2.0]], dtype=dtype) - grad_repeated_index = tf.IndexedSlices( - tf.constant( - [0.1, 0.1], shape=[2, 1], dtype=dtype), - tf.constant([1, 1]), - tf.constant([2, 1])) - grad_aggregated = tf.IndexedSlices( - tf.constant( - [0.2], shape=[1, 1], dtype=dtype), - tf.constant([1]), - tf.constant([2, 1])) - repeated_update = adam.NonFusedAdam().apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - aggregated_update = adam.NonFusedAdam().apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(aggregated_update_var, - self.evaluate(repeated_index_update_var)) - for _ in range(3): - repeated_update.run() - aggregated_update.run() - self.assertAllClose(aggregated_update_var, - self.evaluate(repeated_index_update_var)) - - def doTestBasic(self, use_callable_params=False): - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = lambda: 0.001 - beta1 = lambda: 0.9 - beta2 = lambda: 0.999 - epsilon = lambda: 1e-8 - if not use_callable_params: - learning_rate = learning_rate() - beta1 = beta1() - beta2 = beta2() - epsilon = epsilon() - - opt = adam.NonFusedAdam(learning_rate=learning_rate) - if not tf.executing_eagerly(): - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of NonFusedAdam - for t in range(3): - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - if not tf.executing_eagerly(): - self.evaluate(update) - else: - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType( - var0_np, self.evaluate(var0), rtol=1e-4, atol=1e-4) - self.assertAllCloseAccordingToType( - var1_np, self.evaluate(var1), rtol=1e-4, atol=1e-4) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testResourceBasic(self): - self.doTestBasic() - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testBasicCallableParams(self): - self.doTestBasic(use_callable_params=True) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicWithAmsgrad(self): - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - opt = adam.NonFusedAdam(amsgrad=True) - if not tf.executing_eagerly(): - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of NonFusedAdam - for t in range(3): - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - if not tf.executing_eagerly(): - self.evaluate(update) - else: - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - var0_np, m0, v0, v0hat = adam_update_numpy_amsgrad( - var0_np, grads0_np, t, m0, v0, v0hat) - var1_np, m1, v1, v1hat = adam_update_numpy_amsgrad( - var1_np, grads1_np, t, m1, v1, v1hat) - - # Validate updated params - self.assertAllCloseAccordingToType( - var0_np, self.evaluate(var0), rtol=1e-4, atol=1e-4) - self.assertAllCloseAccordingToType( - var1_np, self.evaluate(var1), rtol=1e-4, atol=1e-4) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testSparseWithAmsgrad(self): - # dtypes.half does not work on gpu + eager. - for dtype in [tf.float32, tf.float64]: - with self.cached_session(): - m0 = np.array([[0.0], [0.0]]) - v0 = np.array([[0.0], [0.0]]) - v0hat = np.array([[0.0], [0.0]]) - indices_np = np.array([1]) - indices = tf.constant(indices_np, dtype=tf.int32) - var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) - repeated_index_update_var = tf.Variable(var0_np, dtype=dtype) - aggregated_update_var = tf.Variable(var0_np, dtype=dtype) - grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype) - grad_repeated_index = tf.IndexedSlices( - tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), - tf.constant([1, 1]), tf.constant([2, 1])) - grad_aggregated = tf.IndexedSlices(grads0_np, indices, - tf.constant([2, 1])) - opt_repeated = adam.NonFusedAdam(amsgrad=True) - opt_aggregated = adam.NonFusedAdam(amsgrad=True) - if not tf.executing_eagerly(): - repeated_update = opt_repeated.apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - aggregated_update = opt_aggregated.apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose( - self.evaluate(aggregated_update_var), - self.evaluate(repeated_index_update_var)) - for t in range(3): - if not tf.executing_eagerly(): - self.evaluate(repeated_update) - self.evaluate(aggregated_update) - else: - opt_repeated.apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - opt_aggregated.apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - - var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad( - var0_np, indices_np, grads0_np, t, m0, v0, v0hat) - - # Validate updated params - self.assertAllCloseAccordingToType( - var0_np, self.evaluate(aggregated_update_var)) - self.assertAllCloseAccordingToType( - self.evaluate(aggregated_update_var), - self.evaluate(repeated_index_update_var)) - - def testBasicWithLearningRateDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 0.001 - beta_1 = 0.9 - beta_2 = 0.999 - epsilon = 1e-7 - decay = 0.5 - - opt = adam.NonFusedAdam( - learning_rate=learning_rate, - beta_1=beta_1, - beta_2=beta_2, - epsilon=epsilon, - decay=decay) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of NonFusedAdam - for t in range(3): - self.evaluate(update) - lr_np = learning_rate / (1 + decay * t) - - var0_np, m0, v0 = adam_update_numpy( - var0_np, grads0_np, t, m0, v0, lr=lr_np) - var1_np, m1, v1 = adam_update_numpy( - var1_np, grads1_np, t, m1, v1, lr=lr_np) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testBasicWithLearningRateInverseTimeDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 0.001 - decay = 0.5 - lr_schedule = learning_rate_schedule.InverseTimeDecay( - learning_rate, decay_steps=1.0, decay_rate=decay) - beta_1 = 0.9 - beta_2 = 0.999 - epsilon = 1e-7 - - opt = adam.NonFusedAdam( - learning_rate=lr_schedule, - beta_1=beta_1, - beta_2=beta_2, - epsilon=epsilon) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 3 steps of NonFusedAdam - for t in range(3): - self.evaluate(update) - - lr_np = learning_rate / (1 + decay * t) - - var0_np, m0, v0 = adam_update_numpy( - var0_np, grads0_np, t, m0, v0, lr=lr_np) - var1_np, m1, v1 = adam_update_numpy( - var1_np, grads1_np, t, m1, v1, lr=lr_np) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testTensorLearningRate(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = adam.NonFusedAdam(tf.constant(0.001)) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - # Run 3 steps of NonFusedAdam - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - update.run() - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testSharing(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = adam.NonFusedAdam() - update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 3 steps of intertwined NonFusedAdam1 and NonFusedAdam2. - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - self.assertAllCloseAccordingToType(0.999**(t + 1), - self.evaluate(beta_2_power)) - if t % 2 == 0: - update1.run() - else: - update2.run() - - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/adamax.py b/keras/optimizers/optimizer_v2/adamax.py deleted file mode 100644 index 972a08ed43bd..000000000000 --- a/keras/optimizers/optimizer_v2/adamax.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Adamax optimizer implementation.""" - -import tensorflow.compat.v2 as tf -from keras import backend_config -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.optimizers.Adamax') -class Adamax(optimizer_v2.OptimizerV2): - """Optimizer that implements the Adamax algorithm. - - It is a variant of Adam based on the infinity norm. - Default parameters follow those provided in the paper. - Adamax is sometimes superior to adam, specially in models with embeddings. - - Initialization: - - ```python - m = 0 # Initialize initial 1st moment vector - v = 0 # Initialize the exponentially weighted infinity norm - t = 0 # Initialize timestep - ``` - - The update rule for parameter `w` with gradient `g` is - described at the end of section 7.1 of the paper: - - ```python - t += 1 - m = beta1 * m + (1 - beta) * g - v = max(beta2 * v, abs(g)) - current_lr = learning_rate / (1 - beta1 ** t) - w = w - current_lr * m / (v + epsilon) - ``` - - Similarly to `Adam`, the epsilon is added for numerical stability - (especially to get rid of division by zero when `v_t == 0`). - - In contrast to `Adam`, the sparse implementation of this algorithm - (used when the gradient is an IndexedSlices object, typically because of - `tf.gather` or an embedding lookup in the forward pass) only updates - variable slices and corresponding `m_t`, `v_t` terms when that part of - the variable was used in the forward pass. This means that the sparse - behavior is contrast to the dense behavior (similar to some momentum - implementations which ignore momentum unless a variable slice was actually - used). - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. - beta_1: A float value or a constant float tensor. The exponential decay - rate for the 1st moment estimates. - beta_2: A float value or a constant float tensor. The exponential decay - rate for the exponentially weighted infinity norm. - epsilon: A small constant for numerical stability. - name: Optional name for the operations created when applying gradients. - Defaults to `"Adamax"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Reference: - - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - name='Adamax', - **kwargs): - super().__init__(name, **kwargs) - self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) - self._set_hyper('decay', self._initial_decay) - self._set_hyper('beta_1', beta_1) - self._set_hyper('beta_2', beta_2) - self.epsilon = epsilon or backend_config.epsilon() - - def _create_slots(self, var_list): - # Separate for-loops to respect the ordering of slot variables from v1. - for var in var_list: - self.add_slot(var, 'm') # Create slots for the first moments. - for var in var_list: - self.add_slot(var, 'v') # Create slots for the second moments. - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - - local_step = tf.cast(self.iterations + 1, var_dtype) - beta_1_t = tf.identity(self._get_hyper('beta_1', var_dtype)) - beta_2_t = tf.identity(self._get_hyper('beta_2', var_dtype)) - beta_1_power = tf.pow(beta_1_t, local_step) - lr_t = apply_state[(var_device, var_dtype)]['lr_t'] - - apply_state[(var_device, var_dtype)].update( - dict( - neg_scaled_lr=-lr_t / (1 - beta_1_power), - epsilon=tf.convert_to_tensor( - self.epsilon, var_dtype), - beta_1_t=beta_1_t, - beta_1_power=beta_1_power, - one_minus_beta_1_t=1 - beta_1_t, - beta_2_t=beta_2_t, - zero=tf.zeros((), dtype=tf.int64))) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - m = self.get_slot(var, 'm') - v = self.get_slot(var, 'v') - return tf.raw_ops.ResourceApplyAdaMax( - var=var.handle, - m=m.handle, - v=v.handle, - beta1_power=coefficients['beta_1_power'], - lr=coefficients['lr_t'], - beta1=coefficients['beta_1_t'], - beta2=coefficients['beta_2_t'], - epsilon=coefficients['epsilon'], - grad=grad, - use_locking=self._use_locking) - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - # m_t = beta1 * m + (1 - beta1) * g_t - m = self.get_slot(var, 'm') - m_slice = tf.gather(m, indices, axis=coefficients['zero']) - m_t_slice = (m_slice * coefficients['beta_1_t'] + - grad * coefficients['one_minus_beta_1_t']) - with tf.control_dependencies([m_t_slice]): - m_t = self._resource_scatter_update(m, indices, m_t_slice) - - # u_t = max(beta2 * u, abs(g_t)) - v = self.get_slot(var, 'v') - v_slice = tf.gather(v, indices, axis=coefficients['zero']) - v_t_slice = tf.maximum(v_slice * coefficients['beta_2_t'], - tf.abs(grad)) - with tf.control_dependencies([v_t_slice]): - v_t = self._resource_scatter_update(v, indices, v_t_slice) - # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t - var_slice = coefficients['neg_scaled_lr'] * ( - m_t_slice / (v_t_slice + coefficients['epsilon'])) - with tf.control_dependencies([var_slice]): - var_update = self._resource_scatter_add(var, indices, var_slice) - return tf.group(*[var_update, m_t, v_t]) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': self._serialize_hyperparameter('learning_rate'), - 'decay': self._initial_decay, - 'beta_1': self._serialize_hyperparameter('beta_1'), - 'beta_2': self._serialize_hyperparameter('beta_2'), - 'epsilon': self.epsilon, - }) - return config diff --git a/keras/optimizers/optimizer_v2/adamax_test.py b/keras/optimizers/optimizer_v2/adamax_test.py deleted file mode 100644 index 5d5eb52bfd71..000000000000 --- a/keras/optimizers/optimizer_v2/adamax_test.py +++ /dev/null @@ -1,368 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Adamax.""" - -import tensorflow.compat.v2 as tf - -from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations -from keras.optimizers.optimizer_v2 import adamax - - -def adamax_update_numpy(param, - g_t, - t, - m, - v, - alpha=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8): - m_t = beta1 * m + (1 - beta1) * g_t - v_t = np.maximum(beta2 * v, np.abs(g_t)) - param_t = param - (alpha / (1 - beta1**(t + 1))) * (m_t / (v_t + epsilon)) - return param_t, m_t, v_t - - -def adamax_sparse_update_numpy(param, - indices, - g_t, - t, - m, - v, - alpha=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8): - m_t, v_t, param_t = np.copy(m), np.copy(v), np.copy(param) - m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t - v_t_slice = np.maximum(beta2 * v[indices], np.abs(g_t)) - param_t_slice = param[indices] - ( - (alpha / (1 - beta1**(t + 1))) * (m_t_slice / (v_t_slice + epsilon))) - m_t[indices] = m_t_slice - v_t[indices] = v_t_slice - param_t[indices] = param_t_slice - return param_t, m_t, v_t - - -def get_beta_accumulators(opt, dtype): - local_step = tf.cast(opt.iterations + 1, dtype) - beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype) - beta_1_power = tf.pow(beta_1_t, local_step) - return beta_1_power - - -class AdamaxOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def testResourceSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) # pylint: disable=cell-var-from-loop - m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(), zero_slots() - var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - - grads0_np_indices = np.array([0, 1], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np), - tf.constant(grads0_np_indices), tf.constant([3])) - grads1_np_indices = np.array([2, 1], dtype=np.int32) - grads1 = tf.IndexedSlices( - tf.constant(grads1_np), - tf.constant(grads1_np_indices), tf.constant([3])) - opt = adamax.Adamax() - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0, 3.0], var0) - self.assertAllClose([4.0, 5.0, 6.0], var1) - - beta1_power = get_beta_accumulators(opt, dtype) - - # Run 3 steps of Adamax - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) - update.run() - - var0_np, m0, v0 = adamax_sparse_update_numpy( - var0_np, grads0_np_indices, grads0_np, t, m0, v0) - var1_np, m1, v1 = adamax_sparse_update_numpy( - var1_np, grads1_np_indices, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0) - self.assertAllCloseAccordingToType(var1_np, var1) - - def testSparseDevicePlacement(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for index_dtype in [tf.int32, tf.int64]: - with tf.Graph().as_default(), self.cached_session( - force_gpu=tf.test.is_gpu_available()): - # If a GPU is available, tests that all optimizer ops can be placed on - # it (i.e. they have GPU kernels). - var = tf.Variable([[1.0], [2.0]]) - indices = tf.constant([0, 1], dtype=index_dtype) - g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) # pylint: disable=cell-var-from-loop - optimizer = adamax.Adamax(3.0) - minimize_op = optimizer.minimize(g_sum, var_list=[var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - minimize_op.run() - - def testSparseRepeatedIndices(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - repeated_index_update_var = tf.Variable( - [[1.0], [2.0]], dtype=dtype) - aggregated_update_var = tf.Variable( - [[1.0], [2.0]], dtype=dtype) - grad_repeated_index = tf.IndexedSlices( - tf.constant( - [0.1, 0.1], shape=[2, 1], dtype=dtype), - tf.constant([1, 1]), - tf.constant([2, 1])) - grad_aggregated = tf.IndexedSlices( - tf.constant( - [0.2], shape=[1, 1], dtype=dtype), - tf.constant([1]), - tf.constant([2, 1])) - repeated_update = adamax.Adamax().apply_gradients( - [(grad_repeated_index, repeated_index_update_var)]) - aggregated_update = adamax.Adamax().apply_gradients( - [(grad_aggregated, aggregated_update_var)]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(aggregated_update_var, - repeated_index_update_var.eval()) - for _ in range(3): - repeated_update.run() - aggregated_update.run() - self.assertAllClose(aggregated_update_var, - repeated_index_update_var.eval()) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasic(self): - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with self.session(graph=tf.Graph(), use_gpu=True): - # Initialize variables for numpy implementation. - m0 = np.array([0.0, 0.0]) - v0 = np.array([0.0, 0.0]) - m1 = np.array([0.0, 0.0]) - v1 = np.array([0.0, 0.0]) - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - opt = adamax.Adamax() - if not tf.executing_eagerly(): - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 3 steps of Adamax - for t in range(3): - beta_1_power = get_beta_accumulators(opt, dtype) - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - if not tf.executing_eagerly(): - self.evaluate(update) - else: - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType( - var0_np, self.evaluate(var0), rtol=1e-2) - self.assertAllCloseAccordingToType( - var1_np, self.evaluate(var1), rtol=1e-2) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicWithLearningRateDecay(self): - for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): - with self.session(graph=tf.Graph(), use_gpu=True): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, name="var0_%d" % i) - var1 = tf.Variable(var1_np, name="var1_%d" % i) - - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - - learning_rate = 0.001 - decay = 0.002 - opt = adamax.Adamax(learning_rate=learning_rate, decay=decay) - if not tf.executing_eagerly(): - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 3 steps of Adamax - for t in range(3): - beta_1_power = get_beta_accumulators(opt, dtype) - self.assertAllCloseAccordingToType(0.9**(t + 1), - self.evaluate(beta_1_power)) - if not tf.executing_eagerly(): - self.evaluate(update) - else: - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - - lr = learning_rate / (1 + decay * t) - - var0_np, m0, v0 = adamax_update_numpy( - var0_np, grads0_np, t, m0, v0, alpha=lr) - var1_np, m1, v1 = adamax_update_numpy( - var1_np, grads1_np, t, m1, v1, alpha=lr) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), - rtol=1e-2) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), - rtol=1e-2) - - def testTensorLearningRate(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = adamax.Adamax(tf.constant(0.001)) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], var0) - self.assertAllClose([3.0, 4.0], var1) - - beta1_power = get_beta_accumulators(opt, dtype) - - # Run 3 steps of Adamax - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) - update.run() - - var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0) - self.assertAllCloseAccordingToType(var1_np, var1) - - def testSharing(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = adamax.Adamax() - update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - beta1_power = get_beta_accumulators(opt, dtype) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], var0) - self.assertAllClose([3.0, 4.0], var1) - - # Run 3 steps of intertwined Adamax1 and Adamax2. - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) - if t % 2 == 0: - update1.run() - else: - update2.run() - - var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0) - self.assertAllCloseAccordingToType(var1_np, var1) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testSlotsUniqueEager(self): - v1 = tf.Variable(1.) - v2 = tf.Variable(1.) - opt = adamax.Adamax(1.) - opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) - # There should be iteration, and two unique slot variables for v1 and v2. - self.assertLen({id(v) for v in opt.variables()}, 5) - - def testConstructAdamaxWithLR(self): - opt = adamax.Adamax(lr=1.0) - opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0) - opt_3 = adamax.Adamax(learning_rate=0.1) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/ftrl.py b/keras/optimizers/optimizer_v2/ftrl.py deleted file mode 100644 index 104f6c551952..000000000000 --- a/keras/optimizers/optimizer_v2/ftrl.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Ftrl-proximal optimizer implementation.""" -# pylint: disable=g-bad-import-order -# pylint: disable=g-classes-have-attributes - -import tensorflow.compat.v2 as tf -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.optimizers.Ftrl') -class Ftrl(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the FTRL algorithm. - - "Follow The Regularized Leader" (FTRL) is an optimization algorithm developed - at Google for click-through rate prediction in the early 2010s. It is most - suitable for shallow models with large and sparse feature spaces. - The algorithm is described by - [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf). - The Keras version has support for both online L2 regularization - (the L2 regularization described in the paper - above) and shrinkage-type L2 regularization - (which is the addition of an L2 penalty to the loss function). - - Initialization: - - ```python - n = 0 - sigma = 0 - z = 0 - ``` - - Update rule for one variable `w`: - - ```python - prev_n = n - n = n + g ** 2 - sigma = (sqrt(n) - sqrt(prev_n)) / lr - z = z + g - sigma * w - if abs(z) < lambda_1: - w = 0 - else: - w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2) - ``` - - Notation: - - - `lr` is the learning rate - - `g` is the gradient for the variable - - `lambda_1` is the L1 regularization strength - - `lambda_2` is the L2 regularization strength - - Check the documentation for the `l2_shrinkage_regularization_strength` - parameter for more details when shrinkage is enabled, in which case gradient - is replaced with a gradient with shrinkage. - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. - learning_rate_power: A float value, must be less or equal to zero. - Controls how the learning rate decreases during training. Use zero for - a fixed learning rate. - initial_accumulator_value: The starting value for accumulators. - Only zero or positive values are allowed. - l1_regularization_strength: A float value, must be greater than or - equal to zero. Defaults to 0.0. - l2_regularization_strength: A float value, must be greater than or - equal to zero. Defaults to 0.0. - name: Optional name prefix for the operations created when applying - gradients. Defaults to `"Ftrl"`. - l2_shrinkage_regularization_strength: A float value, must be greater than - or equal to zero. This differs from L2 above in that the L2 above is a - stabilization penalty, whereas this L2 shrinkage is a magnitude penalty. - When input is sparse shrinkage will only happen on the active weights. - beta: A float value, representing the beta value from the paper. - Defaults to 0.0. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Reference: - - [McMahan et al., 2013]( - https://research.google.com/pubs/archive/41159.pdf) - """ - - def __init__(self, - learning_rate=0.001, - learning_rate_power=-0.5, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0, - name='Ftrl', - l2_shrinkage_regularization_strength=0.0, - beta=0.0, - **kwargs): - super().__init__(name, **kwargs) - - if initial_accumulator_value < 0.0: - raise ValueError( - '`initial_accumulator_value` needs to be positive or zero. Received: ' - f'initial_accumulator_value={initial_accumulator_value}.') - if learning_rate_power > 0.0: - raise ValueError( - '`learning_rate_power` needs to be negative or zero. Received: ' - f'learning_rate_power={learning_rate_power}.') - if l1_regularization_strength < 0.0: - raise ValueError( - '`l1_regularization_strength` needs to be positive or zero. ' - f'Received: l1_regularization_strength={l1_regularization_strength}.') - if l2_regularization_strength < 0.0: - raise ValueError( - '`l2_regularization_strength` needs to be positive or zero. ' - f'Received: l2_regularization_strength={l2_regularization_strength}.') - if l2_shrinkage_regularization_strength < 0.0: - raise ValueError( - '`l2_shrinkage_regularization_strength` needs to be positive or ' - 'zero. Received: l2_shrinkage_regularization_strength' - f'={l2_shrinkage_regularization_strength}.') - - self._set_hyper('learning_rate', learning_rate) - self._set_hyper('decay', self._initial_decay) - self._set_hyper('learning_rate_power', learning_rate_power) - self._set_hyper('l1_regularization_strength', l1_regularization_strength) - self._set_hyper('l2_regularization_strength', l2_regularization_strength) - self._set_hyper('beta', beta) - self._initial_accumulator_value = initial_accumulator_value - self._l2_shrinkage_regularization_strength = ( - l2_shrinkage_regularization_strength) - - def _create_slots(self, var_list): - # Create the "accum" and "linear" slots. - for var in var_list: - dtype = var.dtype.base_dtype - init = tf.compat.v1.constant_initializer( - self._initial_accumulator_value, dtype=dtype) - self.add_slot(var, 'accumulator', init) - self.add_slot(var, 'linear') - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - apply_state[(var_device, var_dtype)].update( - dict( - learning_rate_power=tf.identity( - self._get_hyper('learning_rate_power', var_dtype)), - l1_regularization_strength=tf.identity( - self._get_hyper('l1_regularization_strength', var_dtype)), - l2_regularization_strength=tf.identity( - self._get_hyper('l2_regularization_strength', var_dtype)), - beta=tf.identity(self._get_hyper('beta', var_dtype)), - l2_shrinkage_regularization_strength=tf.cast( - self._l2_shrinkage_regularization_strength, var_dtype))) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - # Adjust L2 regularization strength to include beta to avoid the underlying - # TensorFlow ops needing to include it. - adjusted_l2_regularization_strength = ( - coefficients['l2_regularization_strength'] + coefficients['beta'] / - (2. * coefficients['lr_t'])) - - accum = self.get_slot(var, 'accumulator') - linear = self.get_slot(var, 'linear') - - if self._l2_shrinkage_regularization_strength <= 0.0: - return tf.raw_ops.ResourceApplyFtrl( - var=var.handle, - accum=accum.handle, - linear=linear.handle, - grad=grad, - lr=coefficients['lr_t'], - l1=coefficients['l1_regularization_strength'], - l2=adjusted_l2_regularization_strength, - lr_power=coefficients['learning_rate_power'], - use_locking=self._use_locking) - else: - return tf.raw_ops.ResourceApplyFtrlV2( - var=var.handle, - accum=accum.handle, - linear=linear.handle, - grad=grad, - lr=coefficients['lr_t'], - l1=coefficients['l1_regularization_strength'], - l2=adjusted_l2_regularization_strength, - l2_shrinkage=coefficients['l2_shrinkage_regularization_strength'], - lr_power=coefficients['learning_rate_power'], - use_locking=self._use_locking) - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - # Adjust L2 regularization strength to include beta to avoid the underlying - # TensorFlow ops needing to include it. - adjusted_l2_regularization_strength = ( - coefficients['l2_regularization_strength'] + coefficients['beta'] / - (2. * coefficients['lr_t'])) - - accum = self.get_slot(var, 'accumulator') - linear = self.get_slot(var, 'linear') - - if self._l2_shrinkage_regularization_strength <= 0.0: - return tf.raw_ops.ResourceSparseApplyFtrl( - var=var.handle, - accum=accum.handle, - linear=linear.handle, - grad=grad, - indices=indices, - lr=coefficients['lr_t'], - l1=coefficients['l1_regularization_strength'], - l2=adjusted_l2_regularization_strength, - lr_power=coefficients['learning_rate_power'], - use_locking=self._use_locking) - else: - return tf.raw_ops.ResourceSparseApplyFtrlV2( - var=var.handle, - accum=accum.handle, - linear=linear.handle, - grad=grad, - indices=indices, - lr=coefficients['lr_t'], - l1=coefficients['l1_regularization_strength'], - l2=adjusted_l2_regularization_strength, - l2_shrinkage=coefficients['l2_shrinkage_regularization_strength'], - lr_power=coefficients['learning_rate_power'], - use_locking=self._use_locking) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': - self._serialize_hyperparameter('learning_rate'), - 'decay': - self._initial_decay, - 'initial_accumulator_value': - self._initial_accumulator_value, - 'learning_rate_power': - self._serialize_hyperparameter('learning_rate_power'), - 'l1_regularization_strength': - self._serialize_hyperparameter('l1_regularization_strength'), - 'l2_regularization_strength': - self._serialize_hyperparameter('l2_regularization_strength'), - 'beta': - self._serialize_hyperparameter('beta'), - 'l2_shrinkage_regularization_strength': - self._l2_shrinkage_regularization_strength, - }) - return config diff --git a/keras/optimizers/optimizer_v2/ftrl_test.py b/keras/optimizers/optimizer_v2/ftrl_test.py deleted file mode 100644 index 187e868c30d2..000000000000 --- a/keras/optimizers/optimizer_v2/ftrl_test.py +++ /dev/null @@ -1,484 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional tests for Ftrl operations.""" - -import tensorflow.compat.v2 as tf - -import numpy as np -from keras.optimizers.optimizer_v2 import ftrl - - -class FtrlOptimizerTest(tf.test.TestCase): - - def doTestFtrlwithoutRegularization(self, use_resource=False): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - if use_resource: - var0 = tf.Variable([0.0, 0.0], dtype=dtype) - var1 = tf.Variable([0.0, 0.0], dtype=dtype) - else: - var0 = tf.Variable([0.0, 0.0], dtype=dtype) - var1 = tf.Variable([0.0, 0.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllClose([0.0, 0.0], v0_val) - self.assertAllClose([0.0, 0.0], v1_val) - - # Run 3 steps FTRL - for _ in range(3): - update.run() - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-2.60260963, -4.29698515]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.28432083, -0.56694895]), v1_val) - - def testFtrlWithoutRegularization(self): - self.doTestFtrlwithoutRegularization(use_resource=False) - - def testResourceFtrlWithoutRegularization(self): - self.doTestFtrlwithoutRegularization(use_resource=True) - - def testFtrlwithoutRegularization2(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([4.0, 3.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) - - # Run 3 steps FTRL - for _ in range(3): - update.run() - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-2.55607247, -3.98729396]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.28232238, -0.56096673]), v1_val) - - def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - def loss(): - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - return pred * pred - - sgd_op = ftrl.Ftrl(1.0).minimize(loss, var_list=[var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - self.assertAllCloseAccordingToType([[0, 1]], - self.evaluate(var0), - atol=0.01) - - def testFtrlWithL1(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([4.0, 3.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=0.0) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update.run() - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-7.66718769, -10.91273689]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.93460727, -1.86147261]), v1_val) - - def testFtrlWithBeta(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([4.0, 3.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - opt = ftrl.Ftrl(3.0, initial_accumulator_value=0.1, beta=0.1) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update.run() - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-6.096838, -9.162214]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.717741, -1.425132]), v1_val) - - def testFtrlWithL2_Beta(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([4.0, 3.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.1, - beta=0.1) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update.run() - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-2.735487, -4.704625]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.294335, -0.586556]), v1_val) - - def testFtrlWithL1_L2(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([4.0, 3.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=2.0) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update.run() - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-0.24059935, -0.46829352]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.02406147, -0.04830509]), v1_val) - - def testFtrlWithL1_L2_L2Shrinkage(self): - """Test the new FTRL op with support for l2 shrinkage. - - The addition of this parameter which places a constant pressure on weights - towards the origin causes the gradient descent trajectory to differ. The - weights will tend to have smaller magnitudes with this parameter set. - """ - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([4.0, 3.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=2.0, - l2_shrinkage_regularization_strength=0.1) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update.run() - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType( - np.array([-0.22578995, -0.44345796]), v0_val) - self.assertAllCloseAccordingToType( - np.array([-0.14378493, -0.13229476]), v1_val) - - def testFtrlWithL1_L2_L2ShrinkageSparse(self): - """Tests the new FTRL op with support for l2 shrinkage on sparse grads.""" - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) - var1 = tf.Variable([[4.0], [3.0]], dtype=dtype) - grads0 = tf.IndexedSlices( - tf.constant([0.1], shape=[1, 1], dtype=dtype), - tf.constant([0]), tf.constant([2, 1])) - grads1 = tf.IndexedSlices( - tf.constant([0.02], shape=[1, 1], dtype=dtype), - tf.constant([1]), tf.constant([2, 1])) - - opt = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=2.0, - l2_shrinkage_regularization_strength=0.1) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([[1.0], [2.0]], v0_val) - self.assertAllCloseAccordingToType([[4.0], [3.0]], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update.run() - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([[-0.22578995], [2.]], v0_val) - self.assertAllCloseAccordingToType([[4.], [-0.13229476]], v1_val) - - def testFtrlWithL2ShrinkageDoesNotChangeLrSchedule(self): - """Verifies that l2 shrinkage in FTRL does not change lr schedule.""" - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session() as sess: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([1.0, 2.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.1, 0.2], dtype=dtype) - - opt0 = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=2.0, - l2_shrinkage_regularization_strength=0.1) - opt1 = ftrl.Ftrl( - 3.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=2.0) - update0 = opt0.apply_gradients([(grads0, var0)]) - update1 = opt1.apply_gradients([(grads1, var1)]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) - self.assertAllCloseAccordingToType([1.0, 2.0], v1_val) - - # Run 10 steps FTRL - for _ in range(10): - update0.run() - update1.run() - - v0_val, v1_val = self.evaluate([var0, var1]) - # var0 is experiencing L2 shrinkage so it should be smaller than var1 - # in magnitude. - self.assertTrue((v0_val**2 < v1_val**2).all()) - accum0 = sess.run(opt0.get_slot(var0, "accumulator")) - accum1 = sess.run(opt1.get_slot(var1, "accumulator")) - # L2 shrinkage should not change how we update grad accumulator. - self.assertAllCloseAccordingToType(accum0, accum1) - - def applyOptimizer(self, opt, dtype, steps=5, is_sparse=False): - if is_sparse: - var0 = tf.Variable([[0.0], [0.0]], dtype=dtype) - var1 = tf.Variable([[0.0], [0.0]], dtype=dtype) - grads0 = tf.IndexedSlices( - tf.constant([0.1], shape=[1, 1], dtype=dtype), - tf.constant([0]), tf.constant([2, 1])) - grads1 = tf.IndexedSlices( - tf.constant([0.02], shape=[1, 1], dtype=dtype), - tf.constant([1]), tf.constant([2, 1])) - else: - var0 = tf.Variable([0.0, 0.0], dtype=dtype) - var1 = tf.Variable([0.0, 0.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.2], dtype=dtype) - grads1 = tf.constant([0.01, 0.02], dtype=dtype) - - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - v0_val, v1_val = self.evaluate([var0, var1]) - if is_sparse: - self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val) - self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val) - else: - self.assertAllCloseAccordingToType([0.0, 0.0], v0_val) - self.assertAllCloseAccordingToType([0.0, 0.0], v1_val) - - # Run Ftrl for a few steps - for _ in range(steps): - update.run() - - v0_val, v1_val = self.evaluate([var0, var1]) - return v0_val, v1_val - - # When variables are initialized with Zero, FTRL-Proximal has two properties: - # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical - # with GradientDescent. - # 2. Without L1&L2 but with adaptive learning rate, FTRL-Proximal is identical - # with Adagrad. - # So, basing on these two properties, we test if our implementation of - # FTRL-Proximal performs same updates as Adagrad or GradientDescent. - def testEquivAdagradwithoutRegularization(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - val0, val1 = self.applyOptimizer( - ftrl.Ftrl( - 3.0, - # Adagrad learning rate - learning_rate_power=-0.5, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0), - dtype) - - with tf.Graph().as_default(), self.cached_session(): - val2, val3 = self.applyOptimizer( - tf.compat.v1.train.AdagradOptimizer(3.0, initial_accumulator_value=0.1), dtype) - - self.assertAllCloseAccordingToType(val0, val2) - self.assertAllCloseAccordingToType(val1, val3) - - def testEquivSparseAdagradwithoutRegularization(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - val0, val1 = self.applyOptimizer( - ftrl.Ftrl( - 3.0, - # Adagrad learning rate - learning_rate_power=-0.5, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0), - dtype, - is_sparse=True) - - with tf.Graph().as_default(), self.cached_session(): - val2, val3 = self.applyOptimizer( - tf.compat.v1.train.AdagradOptimizer(3.0, initial_accumulator_value=0.1), - dtype, - is_sparse=True) - - self.assertAllCloseAccordingToType(val0, val2) - self.assertAllCloseAccordingToType(val1, val3) - - def testEquivSparseGradientDescentwithoutRegularization(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - val0, val1 = self.applyOptimizer( - ftrl.Ftrl( - 3.0, - # Fixed learning rate - learning_rate_power=-0.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0), - dtype, - is_sparse=True) - - with tf.Graph().as_default(), self.cached_session(): - val2, val3 = self.applyOptimizer( - tf.compat.v1.train.GradientDescentOptimizer(3.0), - dtype, - is_sparse=True) - - self.assertAllCloseAccordingToType(val0, val2) - self.assertAllCloseAccordingToType(val1, val3) - - def testEquivGradientDescentwithoutRegularization(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32]: - with tf.Graph().as_default(), self.cached_session(): - val0, val1 = self.applyOptimizer( - ftrl.Ftrl( - 3.0, - # Fixed learning rate - learning_rate_power=-0.0, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0), - dtype) - - with tf.Graph().as_default(), self.cached_session(): - val2, val3 = self.applyOptimizer( - tf.compat.v1.train.GradientDescentOptimizer(3.0), dtype) - - self.assertAllCloseAccordingToType(val0, val2) - self.assertAllCloseAccordingToType(val1, val3) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/gradient_descent.py b/keras/optimizers/optimizer_v2/gradient_descent.py deleted file mode 100644 index 47c91d9a5756..000000000000 --- a/keras/optimizers/optimizer_v2/gradient_descent.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SGD optimizer implementation.""" -# pylint: disable=g-bad-import-order -# pylint: disable=g-classes-have-attributes -import tensorflow.compat.v2 as tf -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export("keras.optimizers.SGD") -class SGD(optimizer_v2.OptimizerV2): - r"""Gradient descent (with momentum) optimizer. - - Update rule for parameter `w` with gradient `g` when `momentum` is 0: - - ```python - w = w - learning_rate * g - ``` - - Update rule when `momentum` is larger than 0: - - ```python - velocity = momentum * velocity - learning_rate * g - w = w + velocity - ``` - - When `nesterov=True`, this rule becomes: - - ```python - velocity = momentum * velocity - learning_rate * g - w = w + momentum * velocity - learning_rate * g - ``` - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.01. - momentum: float hyperparameter >= 0 that accelerates gradient descent - in the relevant - direction and dampens oscillations. Defaults to 0, i.e., vanilla gradient - descent. - nesterov: boolean. Whether to apply Nesterov momentum. - Defaults to `False`. - name: Optional name prefix for the operations created when applying - gradients. Defaults to `"SGD"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Usage: - - >>> opt = tf.keras.optimizers.SGD(learning_rate=0.1) - >>> var = tf.Variable(1.0) - >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 - >>> step_count = opt.minimize(loss, [var]).numpy() - >>> # Step is `- learning_rate * grad` - >>> var.numpy() - 0.9 - - >>> opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9) - >>> var = tf.Variable(1.0) - >>> val0 = var.value() - >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 - >>> # First step is `- learning_rate * grad` - >>> step_count = opt.minimize(loss, [var]).numpy() - >>> val1 = var.value() - >>> (val0 - val1).numpy() - 0.1 - >>> # On later steps, step-size increases because of momentum - >>> step_count = opt.minimize(loss, [var]).numpy() - >>> val2 = var.value() - >>> (val1 - val2).numpy() - 0.18 - - Reference: - - For `nesterov=True`, See [Sutskever et al., 2013]( - http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.01, - momentum=0.0, - nesterov=False, - name="SGD", - **kwargs): - super().__init__(name, **kwargs) - self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) - self._set_hyper("decay", self._initial_decay) - - self._momentum = False - if isinstance(momentum, tf.Tensor) or callable(momentum) or momentum > 0: - self._momentum = True - if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): - raise ValueError(f"`momentum` must be between [0, 1]. Received: " - f"momentum={momentum} (of type {type(momentum)}).") - self._set_hyper("momentum", momentum) - - self.nesterov = nesterov - - def _create_slots(self, var_list): - if self._momentum: - for var in var_list: - self.add_slot(var, "momentum") - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - apply_state[(var_device, var_dtype)]["momentum"] = tf.identity( - self._get_hyper("momentum", var_dtype)) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - if self._momentum: - momentum_var = self.get_slot(var, "momentum") - return tf.raw_ops.ResourceApplyKerasMomentum( - var=var.handle, - accum=momentum_var.handle, - lr=coefficients["lr_t"], - grad=grad, - momentum=coefficients["momentum"], - use_locking=self._use_locking, - use_nesterov=self.nesterov) - else: - return tf.raw_ops.ResourceApplyGradientDescent( - var=var.handle, - alpha=coefficients["lr_t"], - delta=grad, - use_locking=self._use_locking) - - def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, - **kwargs): - if self._momentum: - return super()._resource_apply_sparse_duplicate_indices( - grad, var, indices, **kwargs) - else: - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = (kwargs.get("apply_state", {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - return tf.raw_ops.ResourceScatterAdd( - resource=var.handle, - indices=indices, - updates=-grad * coefficients["lr_t"]) - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - # This method is only needed for momentum optimization. - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - momentum_var = self.get_slot(var, "momentum") - return tf.raw_ops.ResourceSparseApplyKerasMomentum( - var=var.handle, - accum=momentum_var.handle, - lr=coefficients["lr_t"], - grad=grad, - indices=indices, - momentum=coefficients["momentum"], - use_locking=self._use_locking, - use_nesterov=self.nesterov) - - def get_config(self): - config = super().get_config() - config.update({ - "learning_rate": self._serialize_hyperparameter("learning_rate"), - "decay": self._initial_decay, - "momentum": self._serialize_hyperparameter("momentum"), - "nesterov": self.nesterov, - }) - return config diff --git a/keras/optimizers/optimizer_v2/gradient_descent_test.py b/keras/optimizers/optimizer_v2/gradient_descent_test.py deleted file mode 100644 index d97b341fb543..000000000000 --- a/keras/optimizers/optimizer_v2/gradient_descent_test.py +++ /dev/null @@ -1,726 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional test for GradientDescent.""" - -import tensorflow.compat.v2 as tf - -from absl.testing import parameterized -import numpy as np -from keras.testing_infra import test_combinations -from keras.optimizers.optimizer_v2 import gradient_descent -from keras.optimizers.schedules import learning_rate_schedule - - -class GradientDescentOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasic(self): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - sgd = gradient_descent.SGD(3.0) - sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - self.evaluate(var1)) - - def _test_basic_sgd_with_learning_rate_decay(self, sgd, dtype): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - if not tf.executing_eagerly(): - sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 2 steps of sgd - if not tf.executing_eagerly(): - self.evaluate(sgd_op) - else: - sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) - # Validate updated params - self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - self.evaluate(var1)) - - if not tf.executing_eagerly(): - self.evaluate(sgd_op) - else: - sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) - # Validate updated params - self.assertAllCloseAccordingToType( - [1.0 - 3.0 * 0.1 - 2.0 * 0.1, 2.0 - 3.0 * 0.1 - 2.0 * 0.1], - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - [3.0 - 3.0 * 0.01 - 2.0 * 0.01, 4.0 - 3.0 * 0.01 - 2.0 * 0.01], - self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicWithLearningRateDecay(self): - for dtype in [tf.half, tf.float32, tf.float64]: - learning_rate = 3.0 - decay = 0.5 - sgd = gradient_descent.SGD(learning_rate=learning_rate, decay=decay) - self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicWithLearningRateInverseTimeDecay(self): - for dtype in [tf.half, tf.float32, tf.float64]: - learning_rate = learning_rate_schedule.InverseTimeDecay( - 3.0, decay_steps=1.0, decay_rate=0.5) - sgd = gradient_descent.SGD(learning_rate=learning_rate) - self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self): - for dtype in [tf.half, tf.float32, tf.float64]: - learning_rate = learning_rate_schedule.InverseTimeDecay( - 3.0, decay_steps=1.0, decay_rate=0.5) - sgd = gradient_descent.SGD(learning_rate=learning_rate) - sgd = gradient_descent.SGD.from_config(sgd.get_config()) - self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasicCallableParams(self): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - lr = lambda: 3.0 - sgd = gradient_descent.SGD(lr) - sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testMinimizeResourceVariable(self): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - var1 = tf.Variable([3.0], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - loss = lambda: tf.matmul(var0, x) + var1 # pylint: disable=cell-var-from-loop - sgd = gradient_descent.SGD(1.0) - sgd_op = sgd.minimize(loss, [var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) - - def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - var1 = tf.Variable([3.0], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - def loss(): - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - pred += var1 # pylint: disable=cell-var-from-loop - return pred * pred - - sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 - np_grad = 2 * np_pred - self.assertAllCloseAccordingToType( - [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - np_grad], self.evaluate(var1)) - - def testTensorLearningRate(self): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - lrate = tf.constant(3.0) - sgd_op = gradient_descent.SGD(lrate).apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - self.evaluate(var1)) - - def testGradWrtRef(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - opt = gradient_descent.SGD(3.0) - values = [1.0, 3.0] - vars_ = [tf.Variable([v], dtype=dtype) for v in values] - loss = lambda: vars_[0] + vars_[1] # pylint: disable=cell-var-from-loop - grads_and_vars = opt._compute_gradients(loss, vars_) - self.evaluate(tf.compat.v1.global_variables_initializer()) - for grad, _ in grads_and_vars: - self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) - - def testSparseBasic(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) - var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = tf.IndexedSlices( - tf.constant([0.1], shape=[1, 1], dtype=dtype), - tf.constant([0]), tf.constant([2, 1])) - grads1 = tf.IndexedSlices( - tf.constant([0.01], shape=[1, 1], dtype=dtype), - tf.constant([1]), tf.constant([2, 1])) - sgd_op = gradient_descent.SGD(3.0).apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) - - def testSparseBasicWithLearningRateDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) - var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = tf.IndexedSlices( - tf.constant([0.1], shape=[1, 1], dtype=dtype), - tf.constant([0]), tf.constant([2, 1])) - grads1 = tf.IndexedSlices( - tf.constant([0.01], shape=[1, 1], dtype=dtype), - tf.constant([1]), tf.constant([2, 1])) - sgd_op = gradient_descent.SGD( - 3.0, decay=0.5).apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 2 steps of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) - - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType( - [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0)) - self.assertAllCloseAccordingToType( - [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testCapturingInFunctionWhileExecutingEagerly(self): - optimizer = gradient_descent.SGD(1.0) - - var_holder = {} - def step(): - if not var_holder: - var_holder["var"] = tf.Variable(1.0) - else: - var_holder["var"].assign(1.0) - - with tf.GradientTape() as tape: - loss = var_holder["var"]**2 - grad = tape.gradient(loss, var_holder["var"]) - optimizer.apply_gradients([(grad, var_holder["var"])]) - return var_holder["var"].read_value() - - compiled_step = tf.function(step) - - self.assertEqual(float(step()), -1.0) - self.assertEqual(float(compiled_step()), -1.0) - # This shouldn't fail; in particular, the learning rate tensor should - # be an EagerTensor once again, not a graph Tensor. - self.assertEqual(float(step()), -1.0) - - def testConstructSGDWithLR(self): - opt = gradient_descent.SGD(lr=1.0) - opt_2 = gradient_descent.SGD(learning_rate=0.1, lr=1.0) - opt_3 = gradient_descent.SGD(learning_rate=0.1) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - -class MomentumOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum): - accum = accum * momentum - g * lr - var += (accum * momentum - g * lr) - return var, accum - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testBasic(self): - for _, dtype in enumerate([tf.half, tf.float32, tf.float64]): - var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0") - var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1") - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - learning_rate = 2.0 - momentum = 0.9 - mom_opt = gradient_descent.SGD( - learning_rate=learning_rate, momentum=momentum) - # self.assertFalse(mom_opt._initial_decay) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the momentum accumulators contain the previous update. - self.evaluate(mom_update) - if tf.executing_eagerly(): - mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) - - def testNesterovMomentum(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.float32, tf.float64]: - var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0") - var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1") - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - loss = lambda: 5 * var0 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - mom_op = gradient_descent.SGD( - learning_rate=2.0, momentum=0.9, nesterov=True) - opt_op = mom_op.minimize(loss, [var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - for _ in range(1, 5): - self.evaluate(opt_op) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - self.assertAllClose(var0_np, self.evaluate(var0)) - self.assertAllClose(var1_np, self.evaluate(var1)) - - def testSparseNesterovMomentum(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session() as sess: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - grads = [] - for t in range(1, 5): - grads.append(var0_np * 10) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - var0 = tf.Variable(var0_np, dtype=dtype, name="var0") - var1 = tf.Variable(var1_np, dtype=dtype, name="var1") - mom_op = gradient_descent.SGD( - learning_rate=2.0, momentum=0.9, nesterov=True) - x_feed = tf.compat.v1.placeholder(dtype) - y_feed = tf.IndexedSlices(x_feed, tf.constant([0, 1]), - tf.constant([2])) - grads_and_vars = [(y_feed, var0), - (tf.constant([3.0, 3.0], dtype=dtype), var1)] - opt_update = mom_op.apply_gradients(grads_and_vars) - self.evaluate(tf.compat.v1.global_variables_initializer()) - for t in range(1, 5): - sess.run(opt_update, feed_dict={x_feed: grads[t - 1]}) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - self.assertAllClose(var0_np, self.evaluate(var0)) - self.assertAllClose(var1_np, self.evaluate(var1)) - - def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - - # pylint: disable=cell-var-from-loop - def loss(): - x = tf.constant([[4.0], [5.0]], dtype=dtype) - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) - return pred * pred - - # pylint: enable=cell-var-from-loop - - opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) - sgd_op = opt.minimize(loss, [var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testMinimizeWith2DIndicesForEmbeddingLookup(self): - var0 = tf.Variable(tf.ones([2, 2])) - - def loss(): - return tf.reduce_sum(tf.compat.v1.nn.embedding_lookup(var0, [[1]])) - - opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) - sgd_op = opt.minimize(loss, [var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(sgd_op) - self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) - - def testTensorLearningRateAndMomentum(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - mom_opt = gradient_descent.SGD( - learning_rate=tf.constant(2.0), - momentum=tf.constant(0.9)) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the momentum accumulators contain the previous update. - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) - - def testSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) - var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) - grads0 = tf.IndexedSlices( - tf.constant([[.1, .1]], dtype=dtype), - tf.constant([1]), tf.constant([4, 2])) - grads1 = tf.IndexedSlices( - tf.constant([[.01, .01], [.01, .01]], dtype=dtype), - tf.constant([2, 3]), tf.constant([4, 2])) - mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - - # Fetch params to validate initial values - self.assertAllClose([0, 0], self.evaluate(var0)[0]) - self.assertAllClose([0, 0], self.evaluate(var0)[1]) - self.assertAllClose([1, 1], self.evaluate(var1)[2]) - - # Step 1: the momentum accumulators are 0. So we should see a normal - # update: v -= grad * learning_rate - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([0, 0]), - self.evaluate(slot0)[0]) - self.assertAllCloseAccordingToType( - np.array([-2.0 * .1, -2.0 * .1]), - self.evaluate(slot0)[1]) - self.assertAllCloseAccordingToType( - np.array([-2.0 * .01, -2.0 * .01]), - self.evaluate(slot1)[2]) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([0, 0]), - self.evaluate(var0)[0]) - self.assertAllCloseAccordingToType( - np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), - self.evaluate(var0)[1]) - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), - self.evaluate(var1)[2]) - # Step 2: the momentum accumulators contain the previous update. - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)[1]) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), - self.evaluate(slot1)[2]) - # Check that the parameters have been updated. - self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) - self.assertAllCloseAccordingToType( - np.array([ - -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), - self.evaluate(var0)[1]) - self.assertAllCloseAccordingToType( - np.array([ - 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), - self.evaluate(var1)[2]) - - def testSharing(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) - mom_update1 = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - mom_update2 = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate - self.evaluate(mom_update1) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the second momentum accumulators contain the previous update. - self.evaluate(mom_update2) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testConfig(self): - opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9, nesterov=True) - config = opt.get_config() - opt2 = gradient_descent.SGD.from_config(config) - lr = opt.lr - lr2 = opt2.lr - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(lr), self.evaluate(lr2)) - self.assertAllClose( - self.evaluate(opt._get_hyper("momentum")), - self.evaluate(opt2._get_hyper("momentum"))) - self.assertAllClose( - self.evaluate(opt._get_hyper("decay")), - self.evaluate(opt2._get_hyper("decay"))) - var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) - loss = lambda: 3 * var0 - # learning rate variable created when calling minimize. - opt.minimize(loss, [var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - config = opt.get_config() - opt3 = gradient_descent.SGD.from_config(config) - lr3 = opt3.lr - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(lr), self.evaluate(lr3)) - self.assertAllClose( - self.evaluate(opt._get_hyper("momentum")), - self.evaluate(opt3._get_hyper("momentum"))) - self.assertAllClose( - self.evaluate(opt._get_hyper("decay")), - self.evaluate(opt3._get_hyper("decay"))) - self.assertTrue(opt3.nesterov) - - def testNesterovWithoutMomentum(self): - with self.assertRaisesRegex(ValueError, "must be between"): - gradient_descent.SGD(learning_rate=1.0, momentum=2.0) - - def testConstructMomentumWithLR(self): - opt = gradient_descent.SGD(lr=1.0, momentum=0.9) - opt_2 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9, lr=1.0) - opt_3 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testMinimizeLossTensor(self): - for dtype in [tf.half, tf.float32, tf.float64]: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - var1 = tf.Variable([3.0], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - tape = tf.GradientTape() - with tape: - loss = tf.matmul(var0, x) + var1 - sgd = gradient_descent.SGD(1.0) - with self.assertRaisesRegex(ValueError, "`tape` is required"): - sgd.minimize(loss, [var0, var1]) - sgd.minimize(loss, [var0, var1], tape=tape) - - self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/nadam.py b/keras/optimizers/optimizer_v2/nadam.py deleted file mode 100644 index 96007cce1c01..000000000000 --- a/keras/optimizers/optimizer_v2/nadam.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Nadam optimizer implementation.""" - -import tensorflow.compat.v2 as tf -from keras import backend_config -from keras.optimizers.schedules import learning_rate_schedule -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export('keras.optimizers.Nadam') -class Nadam(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the NAdam algorithm. - Much like Adam is essentially RMSprop with momentum, Nadam is Adam with - Nesterov momentum. - - Args: - learning_rate: A Tensor or a floating point value. The learning rate. - beta_1: A float value or a constant float tensor. The exponential decay - rate for the 1st moment estimates. - beta_2: A float value or a constant float tensor. The exponential decay - rate for the exponentially weighted infinity norm. - epsilon: A small constant for numerical stability. - name: Optional name for the operations created when applying gradients. - Defaults to `"Nadam"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Usage Example: - >>> opt = tf.keras.optimizers.Nadam(learning_rate=0.2) - >>> var1 = tf.Variable(10.0) - >>> loss = lambda: (var1 ** 2) / 2.0 - >>> step_count = opt.minimize(loss, [var1]).numpy() - >>> "{:.1f}".format(var1.numpy()) - 9.8 - - Reference: - - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf). - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-7, - name='Nadam', - **kwargs): - # Backwards compatibility with keras NAdam optimizer. - kwargs['decay'] = kwargs.pop('schedule_decay', 0.004) - learning_rate = kwargs.get('lr', learning_rate) - if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule): - raise ValueError('The Nadam optimizer does not support ' - 'tf.keras.optimizers.LearningRateSchedules as the ' - 'learning rate.') - - super().__init__(name, **kwargs) - self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) - self._set_hyper('decay', self._initial_decay) - self._set_hyper('beta_1', beta_1) - self._set_hyper('beta_2', beta_2) - self.epsilon = epsilon or backend_config.epsilon() - self._m_cache = None - - def _create_slots(self, var_list): - var_dtype = var_list[0].dtype.base_dtype - if self._m_cache is None: - self._m_cache = self.add_weight( - 'momentum_cache', - shape=[], - dtype=var_dtype, - initializer='ones', - trainable=False, - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - self._weights.append(self._m_cache) - # Separate for-loops to respect the ordering of slot variables from v1. - for var in var_list: - # Create slots for the first moments. - self.add_slot(var, 'm') - for var in var_list: - # Create slots for the second moments. - self.add_slot(var, 'v') - - def _prepare_local(self, var_device, var_dtype, apply_state): - lr_t = tf.identity(self._get_hyper('learning_rate', var_dtype)) - beta_1_t = tf.identity(self._get_hyper('beta_1', var_dtype)) - beta_2_t = tf.identity(self._get_hyper('beta_2', var_dtype)) - local_step = tf.cast(self.iterations + 1, var_dtype) - next_step = tf.cast(self.iterations + 2, var_dtype) - - decay_base = tf.cast(0.96, var_dtype) - - m_t = beta_1_t * (1. - 0.5 * ( - tf.pow(decay_base, self._initial_decay * local_step))) - m_t_1 = beta_1_t * (1. - 0.5 * ( - tf.pow(decay_base, self._initial_decay * next_step))) - - m_schedule_new = tf.cast(self._m_cache_read, var_dtype) * m_t - if var_dtype is self._m_cache.dtype: - m_schedule_new = tf.identity(tf.compat.v1.assign( - self._m_cache, m_schedule_new, use_locking=self._use_locking)) - m_schedule_next = m_schedule_new * m_t_1 - - apply_state[(var_device, var_dtype)] = dict( - lr_t=lr_t, - neg_lr_t=-lr_t, # pylint: disable=invalid-unary-operand-type - epsilon=tf.convert_to_tensor(self.epsilon, var_dtype), - beta_1_t=beta_1_t, - beta_2_t=beta_2_t, - m_t=m_t, - m_t_1=m_t_1, - one_minus_beta_1_t=1 - beta_1_t, - one_minus_beta_2_t=1 - beta_2_t, - one_minus_m_t=1. - m_t, - one_minus_m_schedule_new=1. - m_schedule_new, - one_minus_m_schedule_next=1. - m_schedule_next, - v_t_prime_denominator=1. - tf.pow(beta_2_t, local_step), - ) - - def _prepare(self, var_list): - # Get the value of the momentum cache before starting to apply gradients. - self._m_cache_read = tf.identity(self._m_cache) - return super()._prepare(var_list) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - m = self.get_slot(var, 'm') - v = self.get_slot(var, 'v') - - g_prime = grad / coefficients['one_minus_m_schedule_new'] - m_t = (coefficients['beta_1_t'] * m + - coefficients['one_minus_beta_1_t'] * grad) - m_t = tf.compat.v1.assign(m, m_t, use_locking=self._use_locking) - m_t_prime = m_t / coefficients['one_minus_m_schedule_next'] - v_t = (coefficients['beta_2_t'] * v + - coefficients['one_minus_beta_2_t'] * tf.square(grad)) - v_t = tf.compat.v1.assign(v, v_t, use_locking=self._use_locking) - v_t_prime = v_t / coefficients['v_t_prime_denominator'] - m_t_bar = (coefficients['one_minus_m_t'] * g_prime + - coefficients['m_t_1'] * m_t_prime) - var_t = var - coefficients['lr_t'] * m_t_bar / ( - tf.sqrt(v_t_prime) + coefficients['epsilon']) - return tf.compat.v1.assign(var, var_t, use_locking=self._use_locking).op - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - m = self.get_slot(var, 'm') - v = self.get_slot(var, 'v') - - g_prime = grad / coefficients['one_minus_m_schedule_new'] - - # m_t = beta1 * m + (1 - beta1) * g_t - m_scaled_g_values = grad * coefficients['one_minus_beta_1_t'] - m_t = tf.compat.v1.assign(m, m * coefficients['beta_1_t'], - use_locking=self._use_locking) - - with tf.control_dependencies([m_t]): - m_t = self._resource_scatter_add(m, indices, m_scaled_g_values) - m_t_slice = tf.gather(m_t, indices) - - m_t_prime = m_t_slice / coefficients['one_minus_m_schedule_next'] - m_t_bar = (coefficients['one_minus_m_t'] * g_prime + - coefficients['m_t_1'] * m_t_prime) - - # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) - v_scaled_g_values = (grad * grad) * coefficients['one_minus_beta_2_t'] - v_t = tf.compat.v1.assign(v, v * coefficients['beta_2_t'], - use_locking=self._use_locking) - - with tf.control_dependencies([v_t]): - v_t = self._resource_scatter_add(v, indices, v_scaled_g_values) - v_t_slice = tf.gather(v_t, indices) - - v_t_prime = v_t_slice / coefficients['v_t_prime_denominator'] - v_prime_sqrt_plus_eps = tf.sqrt(v_t_prime) + coefficients['epsilon'] - - var_update = self._resource_scatter_add( - var, indices, - coefficients['neg_lr_t'] * m_t_bar / v_prime_sqrt_plus_eps) - return tf.group(*[var_update, m_t_bar, v_t]) - - def get_config(self): - config = super().get_config() - config.update({ - 'learning_rate': self._serialize_hyperparameter('learning_rate'), - 'decay': self._initial_decay, - 'beta_1': self._serialize_hyperparameter('beta_1'), - 'beta_2': self._serialize_hyperparameter('beta_2'), - 'epsilon': self.epsilon, - }) - return config diff --git a/keras/optimizers/optimizer_v2/nadam_test.py b/keras/optimizers/optimizer_v2/nadam_test.py deleted file mode 100644 index 2fd09df4e3a0..000000000000 --- a/keras/optimizers/optimizer_v2/nadam_test.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Nadam.""" - -import tensorflow.compat.v2 as tf - -import numpy as np -from keras.optimizers.optimizer_v2 import nadam - - -def get_beta_accumulators(opt, dtype): - local_step = tf.cast(opt.iterations + 1, dtype) - beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype) - beta_1_power = tf.pow(beta_1_t, local_step) - beta_2_t = tf.cast(opt._get_hyper("beta_2"), dtype) - beta_2_power = tf.pow(beta_2_t, local_step) - return (beta_1_power, beta_2_power) - - -def update_m_cache(m_cache, t, beta1=0.9): - mu_t = beta1 * (1 - 0.5 * 0.96**(0.004 * (t + 1))) - m_cache_t = m_cache * mu_t - return m_cache_t - - -def nadam_update_numpy(param, - g_t, - t, - m, - v, - m_cache, - alpha=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8): - - mu_t = beta1 * (1 - 0.5 * 0.96**(0.004 * (t + 1))) - mu_t_1 = beta1 * (1 - 0.5 * 0.96**(0.004 * (t + 2))) - m_cache_t_1 = m_cache * mu_t_1 - g_prime_t = g_t / (1 - m_cache) - m_t = beta1 * m + (1 - beta1) * g_t - v_t = beta2 * v + (1 - beta2) * g_t * g_t - - m_prime_t = m_t / (1 - m_cache_t_1) - v_prime_t = v_t / (1 - beta2**(t + 1)) - m_bar_t = (1 - mu_t) * g_prime_t + mu_t_1 * m_prime_t - - param_t = param - alpha * m_bar_t / (np.sqrt(v_prime_t) + epsilon) - return param_t, m_t, v_t - - -class NadamOptimizerTest(tf.test.TestCase): - - def testSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - sparse_epsilon = 1e-7 - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1, mcache = 0.0, 0.0, 0.0, 0.0, 1.0 - var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0_np_indices = np.array([0, 2], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np[grads0_np_indices]), - tf.constant(grads0_np_indices), tf.constant([3])) - grads1_np_indices = np.array([0, 2], dtype=np.int32) - grads1 = tf.IndexedSlices( - tf.constant(grads1_np[grads1_np_indices]), - tf.constant(grads1_np_indices), tf.constant([3])) - opt = nadam.Nadam(epsilon=sparse_epsilon) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 1.0, 2.0], var0) - self.assertAllClose([3.0, 3.0, 4.0], var1) - - beta1_power, beta2_power = get_beta_accumulators(opt, dtype) - - # Run 3 steps of Nadam - for t in range(3): - self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) - self.assertAllCloseAccordingToType(0.999**(t + 1), beta2_power) - update.run() - - mcache = update_m_cache(mcache, t) - var0_np, m0, v0 = nadam_update_numpy( - var0_np, grads0_np, t, m0, v0, mcache, epsilon=sparse_epsilon) - var1_np, m1, v1 = nadam_update_numpy( - var1_np, grads1_np, t, m1, v1, mcache, epsilon=sparse_epsilon) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0) - self.assertAllCloseAccordingToType(var1_np, var1) - - def testBasic(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for dtype in [tf.half, tf.float32, tf.float64]: - with tf.Graph().as_default(), self.cached_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1, mcache = 0.0, 0.0, 0.0, 0.0, 1.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - opt = nadam.Nadam() - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], var0) - self.assertAllClose([3.0, 4.0], var1) - - # Run 3 steps of Nadam - for t in range(3): - update.run() - - mcache = update_m_cache(mcache, t) - var0_np, m0, v0 = nadam_update_numpy(var0_np, grads0_np, t, m0, v0, - mcache) - var1_np, m1, v1 = nadam_update_numpy(var1_np, grads1_np, t, m1, v1, - mcache) - - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0) - self.assertAllCloseAccordingToType(var1_np, var1) - - def testConstructNAdamWithLR(self): - opt = nadam.Nadam(lr=1.0) - opt_2 = nadam.Nadam(learning_rate=0.1, lr=1.0) - opt_3 = nadam.Nadam(learning_rate=0.1) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - def testConstructNAdamWithScheduleDecay(self): - opt = nadam.Nadam(schedule_decay=0.2) - self.assertIsInstance(opt.decay, tf.Variable) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.decay), (0.2)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/optimizer_v2.py b/keras/optimizers/optimizer_v2/optimizer_v2.py deleted file mode 100644 index a9d37f21f50c..000000000000 --- a/keras/optimizers/optimizer_v2/optimizer_v2.py +++ /dev/null @@ -1,1542 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Version 2 of class Optimizer.""" -# pylint: disable=g-bad-name - - -import abc -import contextlib -import functools -import warnings -from keras import backend -from keras import initializers -from keras.engine import base_layer_utils -from keras.optimizers.optimizer_v2 import utils as optimizer_utils -from keras.optimizers.schedules import learning_rate_schedule -from keras.utils import generic_utils -from keras.utils import layer_utils -from keras.utils import tf_inspect -from keras.utils import tf_utils -import tensorflow.compat.v2 as tf -from tensorflow.python.util.tf_export import keras_export - - -keras_optimizers_gauge = tf.__internal__.monitoring.BoolGauge( - "/tensorflow/api/keras/optimizers", "keras optimizer usage", "method") - -_DEFAULT_VALID_DTYPES = frozenset([ - tf.float16, tf.bfloat16, tf.float32, tf.float64, - tf.complex64, tf.complex128 -]) - - -def _deduplicate_indexed_slices(values, indices): - """Sums `values` associated with any non-unique `indices`. - - Args: - values: A `Tensor` with rank >= 1. - indices: A one-dimensional integer `Tensor`, indexing into the first - dimension of `values` (as in an IndexedSlices object). - - Returns: - A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a - de-duplicated version of `indices` and `summed_values` contains the sum of - `values` slices associated with each unique index. - """ - unique_indices, new_index_positions = tf.unique(indices) - summed_values = tf.math.unsorted_segment_sum( - values, new_index_positions, - tf.shape(unique_indices)[0]) - return (summed_values, unique_indices) - - -class NullContextmanager: - - def __init__(self, *args, **kwargs): - pass - - def __enter__(self): - pass - - def __exit__(self, type_arg, value_arg, traceback_arg): - return False # False values do not suppress exceptions - - -def name_scope_only_in_function_or_graph(name): - """Internal-only entry point for `name_scope*`. - - Enters a compat.v1.name_scope only when in a function or graph, - not when running fully eagerly. - - Args: - name: The name argument that is passed to the op function. - - Returns: - `name_scope*` context manager. - """ - if not tf.executing_eagerly(): - return tf.name_scope(name) - else: - return NullContextmanager() - - -@keras_export( - "keras.optimizers.Optimizer", - metaclass=abc.ABCMeta) -class OptimizerV2(tf.__internal__.tracking.Trackable): - """Base class for Keras optimizers. - - You should not use this class directly, but instead instantiate one of its - subclasses such as `tf.keras.optimizers.SGD`, `tf.keras.optimizers.Adam`, etc. - - ### Usage - - ```python - # Create an optimizer with the desired parameters. - opt = tf.keras.optimizers.SGD(learning_rate=0.1) - # `loss` is a callable that takes no argument and returns the value - # to minimize. - loss = lambda: 3 * var1 * var1 + 2 * var2 * var2 - # In graph mode, returns op that minimizes the loss by updating the listed - # variables. - opt_op = opt.minimize(loss, var_list=[var1, var2]) - opt_op.run() - # In eager mode, simply call minimize to update the list of variables. - opt.minimize(loss, var_list=[var1, var2]) - ``` - - ### Usage in custom training loops - - In Keras models, sometimes variables are created when the model is first - called, instead of construction time. Examples include 1) sequential models - without input shape pre-defined, or 2) subclassed models. Pass var_list as - callable in these cases. - - Example: - - ```python - opt = tf.keras.optimizers.SGD(learning_rate=0.1) - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(num_hidden, activation='relu')) - model.add(tf.keras.layers.Dense(num_classes, activation='sigmoid')) - loss_fn = lambda: tf.keras.losses.mse(model(input), output) - var_list_fn = lambda: model.trainable_weights - for input, output in data: - opt.minimize(loss_fn, var_list_fn) - ``` - - ### Processing gradients before applying them - - Calling `minimize()` takes care of both computing the gradients and - applying them to the variables. If you want to process the gradients - before applying them you can instead use the optimizer in three steps: - - 1. Compute the gradients with `tf.GradientTape`. - 2. Process the gradients as you wish. - 3. Apply the processed gradients with `apply_gradients()`. - - Example: - - ```python - # Create an optimizer. - opt = tf.keras.optimizers.SGD(learning_rate=0.1) - - # Compute the gradients for a list of variables. - with tf.GradientTape() as tape: - loss = - vars = - grads = tape.gradient(loss, vars) - - # Process the gradients, for example cap them, etc. - # capped_grads = [MyCapper(g) for g in grads] - processed_grads = [process_gradient(g) for g in grads] - - # Ask the optimizer to apply the processed gradients. - opt.apply_gradients(zip(processed_grads, var_list)) - ``` - - ### Use with `tf.distribute.Strategy` - - This optimizer class is `tf.distribute.Strategy` aware, which means it - automatically sums gradients across all replicas. To average gradients, - you divide your loss by the global batch size, which is done - automatically if you use `tf.keras` built-in training or evaluation loops. - See the `reduction` argument of your loss which should be set to - `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` for averaging or - `tf.keras.losses.Reduction.SUM` for not. - - To aggregate gradients yourself, call `apply_gradients` with - `experimental_aggregate_gradients` set to False. This is useful if you need to - process aggregated gradients. - - If you are not using these and you want to average gradients, you should use - `tf.math.reduce_sum` to add up your per-example losses and then divide by the - global batch size. Note that when using `tf.distribute.Strategy`, the first - component of a tensor's shape is the *replica-local* batch size, which is off - by a factor equal to the number of replicas being used to compute a single - step. As a result, using `tf.math.reduce_mean` will give the wrong answer, - resulting in gradients that can be many times too big. - - ### Variable Constraints - - All Keras optimizers respect variable constraints. If constraint function is - passed to any variable, the constraint will be applied to the variable after - the gradient has been applied to the variable. - Important: If gradient is sparse tensor, variable constraint is not supported. - - ### Thread Compatibility - - The entire optimizer is currently thread compatible, not thread-safe. The user - needs to perform synchronization if necessary. - - ### Slots - - Many optimizer subclasses, such as `Adam` and `Adagrad` allocate and manage - additional variables associated with the variables to train. These are called - Slots. Slots have names and you can ask the optimizer for the names of - the slots that it uses. Once you have a slot name you can ask the optimizer - for the variable it created to hold the slot value. - - This can be useful if you want to log debug a training algorithm, report stats - about the slots, etc. - - ### Hyperparameters - - These are arguments passed to the optimizer subclass constructor - (the `__init__` method), and then passed to `self._set_hyper()`. - They can be either regular Python values (like 1.0), tensors, or - callables. If they are callable, the callable will be called during - `apply_gradients()` to get the value for the hyper parameter. - - Hyperparameters can be overwritten through user code: - - Example: - - ```python - # Create an optimizer with the desired parameters. - opt = tf.keras.optimizers.SGD(learning_rate=0.1) - # `loss` is a callable that takes no argument and returns the value - # to minimize. - loss = lambda: 3 * var1 + 2 * var2 - # In eager mode, simply call minimize to update the list of variables. - opt.minimize(loss, var_list=[var1, var2]) - # update learning rate - opt.learning_rate = 0.05 - opt.minimize(loss, var_list=[var1, var2]) - ``` - - ### Callable learning rate - - Optimizer accepts a callable learning rate in two ways. The first way is - through built-in or customized - `tf.keras.optimizers.schedules.LearningRateSchedule`. The schedule will be - called on each iteration with `schedule(iteration)`, a `tf.Variable` - owned by the optimizer. - - Example: - - >>> var = tf.Variable(np.random.random(size=(1,))) - >>> learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( - ... initial_learning_rate=.01, decay_steps=20, decay_rate=.1) - >>> opt = tf.keras.optimizers.SGD(learning_rate=learning_rate) - >>> loss = lambda: 3 * var - >>> opt.minimize(loss, var_list=[var]) - >> var = tf.Variable(np.random.random(size=(1,))) - >>> def lr_callable(): - ... return .1 - >>> opt = tf.keras.optimizers.SGD(learning_rate=lr_callable) - >>> loss = lambda: 3 * var - >>> opt.minimize(loss, var_list=[var]) - = 0, received: {}".format(k, kwargs[k])) - if k == "lr": - warnings.warn( - "The `lr` argument is deprecated, use `learning_rate` instead.", - stacklevel=2) - - self._use_locking = True - self._init_set_name(name) - self._hyper = {} - # dict: {variable name : {slot name : variable}} - self._slots = {} - self._slot_names = [] - self._weights = [] - self._iterations = None - - # For implementing Trackable. Stores information about how to restore - # slot variables which have not yet been created - # (trackable._CheckpointPosition objects). - # {slot_name : - # {_var_key(variable_to_train): [checkpoint_position, ... ], ... }, - # ... } - self._deferred_slot_restorations = {} - - decay = kwargs.pop("decay", 0.0) - if decay < 0.: - raise ValueError("decay cannot be less than 0. " - "Received: decay={}.".format(decay)) - self._initial_decay = decay - - self._hypers_created = False - # Store the distribution strategy object if the optimizer is created inside - # strategy scope, so it could be used to create variables later. - if tf.distribute.has_strategy(): - self._distribution_strategy = tf.distribute.get_strategy() - else: - self._distribution_strategy = None - - # Configure gradient transformations. - if gradient_aggregator is None: - gradient_aggregator = optimizer_utils.all_reduce_sum_gradients - self.gradient_aggregator = gradient_aggregator - if gradient_transformers is None: - gradient_transformers = [] - self.gradient_transformers = gradient_transformers - self.clipnorm = kwargs.pop("clipnorm", None) - self.global_clipnorm = kwargs.pop("global_clipnorm", None) - if self.clipnorm is not None and self.global_clipnorm is not None: - raise ValueError("Cannot accept both `clipnorm` and `global_clipnorm`. " - "Received: `clipnorm`={}, `global_clipnorm`={}.".format( - self.clipnorm, self.global_clipnorm)) - self.clipvalue = kwargs.pop("clipvalue", None) - - @property - def clipnorm(self): - """`float` or `None`. If set, clips gradients to a maximum norm.""" - return self._clipnorm - - @property - def global_clipnorm(self): - """`float` or `None`. - - If set, clips gradients to a maximum norm. - - Check `tf.clip_by_global_norm` for more details. - """ - return self._global_clipnorm - - @clipnorm.setter - def clipnorm(self, val): - if val is not None and self.gradient_transformers: - raise ValueError("`clipnorm` cannot be set when `gradient_transformers` " - "is set. Instead, use the `gradient_transformers` to " - "specify clipping and other transformations. Received: " - f"val={val}, " - f"gradient_transformers={self.gradient_transformers}.") - self._clipnorm = val - self._clipnorm_fn = optimizer_utils.make_gradient_clipnorm_fn( - self._clipnorm) - - @global_clipnorm.setter - def global_clipnorm(self, val): - if val is not None and self.gradient_transformers: - raise ValueError("`global_clipnorm` cannot be set when " - "`gradient_transformers` " - "is set. Instead, use the `gradient_transformers` to " - "specify clipping and other transformations. Received: " - f"val={val}, " - f"gradient_transformers={self.gradient_transformers}.") - self._global_clipnorm = val - self._global_clipnorm_fn = optimizer_utils.make_global_gradient_clipnorm_fn( - self._global_clipnorm) - - @property - def clipvalue(self): - """`float` or `None`. If set, clips gradients to a maximum value.""" - return self._clipvalue - - @clipvalue.setter - def clipvalue(self, val): - if val is not None and self.gradient_transformers: - raise ValueError("`clipvalue` cannot be set when `gradient_transformers` " - "is set. Instead, use the `gradient_transformers` to " - "specify clipping and other transformations. Received: " - f"val={val}, " - f"gradient_transformers={self.gradient_transformers}.") - self._clipvalue = val - self._clipvalue_fn = optimizer_utils.make_gradient_clipvalue_fn( - self._clipvalue) - - def _transform_loss(self, loss): - """Called in `.minimize` to transform loss before computing gradients.""" - return loss - - def _get_gradients(self, tape, loss, var_list, grad_loss=None): - """Called in `minimize` to compute gradients from loss.""" - grads = tape.gradient(loss, var_list, grad_loss) - return list(zip(grads, var_list)) - - def _transform_unaggregated_gradients(self, grads_and_vars): - """Called in `apply_gradients` before gradient aggregation.""" - return grads_and_vars - - def _aggregate_gradients(self, grads_and_vars): - """Called in `apply_gradients` to aggregate gradients across devices. - - Note that user subclasses may override this, so the interface should not be - changed. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - - Returns: - A list of (aggregrated_gradient, variable) pairs. By default, this calls - `self.gradient_aggregator`. - """ - return self.gradient_aggregator(grads_and_vars) - - def _transform_gradients(self, grads_and_vars): - """Called in `apply_gradients` after aggregation.""" - if self._clipvalue is not None: - grads_and_vars = self._clipvalue_fn(grads_and_vars) - if self._clipnorm is not None: - grads_and_vars = self._clipnorm_fn(grads_and_vars) - if self._global_clipnorm is not None: - grads_and_vars = self._global_clipnorm_fn(grads_and_vars) - - for fn in self.gradient_transformers: - grads_and_vars = fn(grads_and_vars) - return grads_and_vars - - def minimize(self, loss, var_list, grad_loss=None, name=None, tape=None): - """Minimize `loss` by updating `var_list`. - - This method simply computes gradient using `tf.GradientTape` and calls - `apply_gradients()`. If you want to process the gradient before applying - then call `tf.GradientTape` and `apply_gradients()` explicitly instead - of using this function. - - Args: - loss: `Tensor` or callable. If a callable, `loss` should take no arguments - and return the value to minimize. If a `Tensor`, the `tape` argument - must be passed. - var_list: list or tuple of `Variable` objects to update to minimize - `loss`, or a callable returning the list or tuple of `Variable` objects. - Use callable when the variable list would otherwise be incomplete before - `minimize` since the variables are created at the first time `loss` is - called. - grad_loss: (Optional). A `Tensor` holding the gradient computed for - `loss`. - name: (Optional) str. Name for the returned operation. - tape: (Optional) `tf.GradientTape`. If `loss` is provided as a `Tensor`, - the tape that computed the `loss` must be provided. - - Returns: - An `Operation` that updates the variables in `var_list`. The `iterations` - will be automatically increased by 1. - - Raises: - ValueError: If some of the variables are not `Variable` objects. - - """ - grads_and_vars = self._compute_gradients( - loss, var_list=var_list, grad_loss=grad_loss, tape=tape) - return self.apply_gradients(grads_and_vars, name=name) - - def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): - """Compute gradients of `loss` for the variables in `var_list`. - - This is the first part of `minimize()`. It returns a list - of (gradient, variable) pairs where "gradient" is the gradient - for "variable". Note that "gradient" can be a `Tensor`, an - `IndexedSlices`, or `None` if there is no gradient for the - given variable. - - Args: - loss: `Tensor` or callable. If a callable, `loss` should take no - arguments and return the value to minimize. If a `Tensor`, the `tape` - argument must be passed. - var_list: list or tuple of `Variable` objects to update to minimize - `loss`, or a callable returning the list or tuple of `Variable` objects. - Use callable when the variable list would otherwise be incomplete before - `minimize` and the variables are created at the first time when `loss` - is called. - grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. - tape: (Optional) `tf.GradientTape`. If `loss` is provided as a `Tensor`, - the tape that computed the `loss` must be provided. - - Returns: - A list of (gradient, variable) pairs. Variable is always present, but - gradient can be `None`. - - Raises: - TypeError: If `var_list` contains anything else than `Variable` objects. - ValueError: If some arguments are invalid, or var_list is None. - """ - # TODO(joshl): Test that we handle weight decay in a reasonable way. - if not callable(loss) and tape is None: - raise ValueError("`tape` is required when a `Tensor` loss is passed. " - f"Received: loss={loss}, tape={tape}.") - tape = tape if tape is not None else tf.GradientTape() - - if callable(loss): - with tape: - if not callable(var_list): - tape.watch(var_list) - loss = loss() - if callable(var_list): - var_list = var_list() - - with tape: - loss = self._transform_loss(loss) - - var_list = tf.nest.flatten(var_list) - with tf.name_scope(self._name + "/gradients"): - grads_and_vars = self._get_gradients(tape, loss, var_list, grad_loss) - - self._assert_valid_dtypes([ - v for g, v in grads_and_vars - if g is not None and v.dtype != tf.resource - ]) - - return grads_and_vars - - def apply_gradients(self, - grads_and_vars, - name=None, - experimental_aggregate_gradients=True): - """Apply gradients to variables. - - This is the second part of `minimize()`. It returns an `Operation` that - applies gradients. - - The method sums gradients from all replicas in the presence of - `tf.distribute.Strategy` by default. You can aggregate gradients yourself by - passing `experimental_aggregate_gradients=False`. - - Example: - - ```python - grads = tape.gradient(loss, vars) - grads = tf.distribute.get_replica_context().all_reduce('sum', grads) - # Processing aggregated gradients. - optimizer.apply_gradients(zip(grads, vars), - experimental_aggregate_gradients=False) - - ``` - - Args: - grads_and_vars: List of (gradient, variable) pairs. - name: Optional name for the returned operation. Default to the name passed - to the `Optimizer` constructor. - experimental_aggregate_gradients: Whether to sum gradients from different - replicas in the presence of `tf.distribute.Strategy`. If False, it's - user responsibility to aggregate the gradients. Default to True. - - Returns: - An `Operation` that applies the specified gradients. The `iterations` - will be automatically increased by 1. - - Raises: - TypeError: If `grads_and_vars` is malformed. - ValueError: If none of the variables have gradients. - RuntimeError: If called in a cross-replica context. - """ - grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) - var_list = [v for (_, v) in grads_and_vars] - - with tf.name_scope(self._name): - # Create iteration if necessary. - with tf.init_scope(): - self._create_all_weights(var_list) - - if not grads_and_vars: - # Distribution strategy does not support reducing an empty list of - # gradients - return tf.no_op() - - if tf.distribute.in_cross_replica_context(): - raise RuntimeError( - "`apply_gradients() cannot be called in cross-replica context. " - "Use `tf.distribute.Strategy.run` to enter replica " - "context. For more information, please see the docstring of " - "`tf.distribute.get_replica_context`.") - - strategy = tf.distribute.get_strategy() - if (not experimental_aggregate_gradients and strategy and isinstance( - strategy, - (tf.compat.v1.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.ParameterServerStrategy, - tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy))): - raise NotImplementedError( - "`experimental_aggregate_gradients=False is not supported for " - "ParameterServerStrategy and CentralStorageStrategy. Used: " - f"strategy={strategy}.") - - apply_state = self._prepare(var_list) - if experimental_aggregate_gradients: - grads_and_vars = self._transform_unaggregated_gradients(grads_and_vars) - grads_and_vars = self._aggregate_gradients(grads_and_vars) - grads_and_vars = self._transform_gradients(grads_and_vars) - - return tf.__internal__.distribute.interim.maybe_merge_call( - functools.partial(self._distributed_apply, apply_state=apply_state), - strategy, - grads_and_vars, - name=name) - - def _distributed_apply(self, distribution, grads_and_vars, apply_state, name): - """`apply_gradients` using a `DistributionStrategy`.""" - - def apply_grad_to_update_var(var, grad): - """Apply gradient to variable.""" - if isinstance(var, tf.Tensor): - raise NotImplementedError( - f"Updating a `Tensor` is not implemented. Received: var={var}.") - - apply_kwargs = {} - if isinstance(grad, tf.IndexedSlices): - if var.constraint is not None: - raise RuntimeError( - "Cannot use a constraint function on a sparse variable. " - f"Received: grad={grad}, var.constraint={var.constraint}.") - if "apply_state" in self._sparse_apply_args: - apply_kwargs["apply_state"] = apply_state - return self._resource_apply_sparse_duplicate_indices( - grad.values, var, grad.indices, **apply_kwargs) - - if "apply_state" in self._dense_apply_args: - apply_kwargs["apply_state"] = apply_state - update_op = self._resource_apply_dense(grad, var, **apply_kwargs) - if var.constraint is not None: - with tf.control_dependencies([update_op]): - return var.assign(var.constraint(var)) - else: - return update_op - - eagerly_outside_functions = tf.compat.v1.executing_eagerly_outside_functions() - update_ops = [] - with name_scope_only_in_function_or_graph(name or self._name): - for grad, var in grads_and_vars: - # Colocate the update with variables to avoid unnecessary communication - # delays. See b/136304694. - with distribution.extended.colocate_vars_with(var): - with name_scope_only_in_function_or_graph( - "update" if eagerly_outside_functions else "update_" + - var.op.name): - update_op = distribution.extended.update( - var, apply_grad_to_update_var, args=(grad,), group=False) - if tf.distribute.in_cross_replica_context(): - # In cross-replica context, extended.update returns a list of - # update ops from all replicas (group=False). - update_ops.extend(update_op) - else: - # In replica context, extended.update return the single update op - # of current replica. - update_ops.append(update_op) - - any_symbolic = any(isinstance(i, tf.Operation) or - tf_utils.is_symbolic_tensor(i) for i in update_ops) - if not tf.executing_eagerly() or any_symbolic: - # If the current context is graph mode or any of the update ops are - # symbolic then the step update should be carried out under a graph - # context. (eager updates execute immediately) - with backend._current_graph(update_ops).as_default(): # pylint: disable=protected-access - with tf.control_dependencies([tf.group(update_ops)]): - return self.iterations.assign_add(1, read_value=False) - - return self.iterations.assign_add(1) - - def get_gradients(self, loss, params): - """Returns gradients of `loss` with respect to `params`. - - Should be used only in legacy v1 graph mode. - - Args: - loss: Loss tensor. - params: List of variables. - - Returns: - List of gradient tensors. - - Raises: - ValueError: In case any gradient cannot be computed (e.g. if gradient - function not implemented). - """ - params = tf.nest.flatten(params) - with backend.get_graph().as_default(), backend.name_scope(self._name + - "/gradients"): - grads = tf.compat.v1.gradients(loss, params) - for grad, param in zip(grads, params): - if grad is None: - raise ValueError("Variable {} has `None` for gradient. " - "Please make sure that all of your ops have a " - "gradient defined (i.e. are differentiable). " - "Common ops without gradient: " - "K.argmax, K.round, K.eval.".format(param)) - return grads - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - grads_and_vars = list(zip(grads, params)) - self._assert_valid_dtypes([ - v for g, v in grads_and_vars - if g is not None and v.dtype != tf.resource - ]) - return [self.apply_gradients(grads_and_vars)] - - def _set_hyper(self, name, value): - """set hyper `name` to value. value can be callable, tensor, numeric.""" - if isinstance(value, tf.__internal__.tracking.Trackable): - self._track_trackable(value, name, overwrite=True) - if name not in self._hyper: - self._hyper[name] = value - else: - prev_value = self._hyper[name] - if (callable(prev_value) - or isinstance(prev_value, - (tf.Tensor, int, float, - learning_rate_schedule.LearningRateSchedule)) - or isinstance(value, learning_rate_schedule.LearningRateSchedule)): - self._hyper[name] = value - else: - backend.set_value(self._hyper[name], value) - - def _get_hyper(self, name, dtype=None): - if not self._hypers_created: - self._create_hypers() - value = self._hyper[name] - if isinstance(value, learning_rate_schedule.LearningRateSchedule): - return value - if callable(value): - value = value() - if dtype: - return tf.cast(value, dtype) - else: - return value - - def _create_slots(self, var_list): - pass - - def _create_slots_for_sharded_variables(self, var_list): - """Add ShardedVariables to slots to later reconstruct for checkpointing. - - ShardedVariables don't have slot variables created for them; their shards - do. This function allows users to call get_slot with a ShardedVariable input - and receive a ShardedVariable output containing the appropriate slot vars. - - Iterate over the variables to find shards, and aggregate the sharded - containers in a set. Add these ShardedVariables to _slots so that get_slot - can retrieve the proper slot variables for their component shards, and - reconstruct those into a ShardedVariable. - - Args: - var_list: list or tuple of `Variable` objects that will be minimized - using this optimizer. - """ - sharded_vars = set() - for var in var_list: - if getattr(var, "_sharded_container", False): - sharded_vars.add(var._sharded_container()) # pylint: disable=protected-access - - for sharded_var in sharded_vars: - sharded_key = _var_key(sharded_var) - slot_dict = {} - for slot in self.get_slot_names(): - slot_dict[slot] = sharded_var - self._slots[sharded_key] = slot_dict - - def _create_all_weights(self, var_list): - """Creates all weights, including iterations, hyperparameters and slot vars. - - This will add newly created variables to `optimizer.weights`. - - New variables are only created when this method is called the first time, or - when called with different variables in the var_list. - - Args: - var_list: list or tuple of `Variable` objects that will be minimized - using this optimizer. - """ - - _ = self.iterations - self._create_hypers() - self._create_slots(var_list) - self._create_slots_for_sharded_variables(var_list) - - def __getattribute__(self, name): - """Overridden to support hyperparameter access.""" - try: - return super().__getattribute__(name) - except AttributeError as e: - # Needed to avoid infinite recursion with __setattr__. - if name == "_hyper": - raise e - # Backwards compatibility with Keras optimizers. - if name == "lr": - name = "learning_rate" - if name in self._hyper: - return self._get_hyper(name) - raise e - - def __dir__(self): - result = set(super().__dir__()) - if "_hyper" in result: - result |= self._hyper.keys() - if "learning_rate" in self._hyper.keys(): - result.add("lr") - return list(result) - - def __setattr__(self, name, value): - """Override setattr to support dynamic hyperparameter setting.""" - # Backwards compatibility with Keras optimizers. - if name == "lr": - name = "learning_rate" - if hasattr(self, "_hyper") and name in self._hyper: - self._set_hyper(name, value) - else: - super().__setattr__(name, value) - - def get_slot_names(self): - """A list of names for this optimizer's slots.""" - return self._slot_names - - def add_slot(self, var, slot_name, initializer="zeros", shape=None): - """Add a new slot variable for `var`. - - A slot variable is an additional variable associated with `var` to train. - It is allocated and managed by optimizers, e.g. `Adam`. - - Args: - var: a `Variable` object. - slot_name: name of the slot variable. - initializer: initializer of the slot variable - shape: (Optional) shape of the slot variable. If not set, it will default - to the shape of `var`. - - Returns: - A slot variable. - """ - if slot_name not in self._slot_names: - self._slot_names.append(slot_name) - var_key = _var_key(var) - slot_dict = self._slots.setdefault(var_key, {}) - weight = slot_dict.get(slot_name, None) - if weight is None: - if isinstance(initializer, str) or callable(initializer): - initializer = initializers.get(initializer) - if isinstance(initializer, tf.__internal__.tracking - .CheckpointInitialValueCallable) or (shape is not None): - slot_shape = shape - else: - slot_shape = var.shape - initial_value = functools.partial( - initializer, shape=slot_shape, dtype=var.dtype) - else: - initial_value = initializer - - with self._distribution_strategy_scope(): - strategy = tf.distribute.get_strategy() - if not strategy.extended.variable_created_in_scope(var): - raise ValueError( - "Trying to create optimizer slot variable under the scope for " - "tf.distribute.Strategy ({}), which is different from the scope " - "used for the original variable ({}). Make sure the slot " - "variables are created under the same strategy scope. This may " - "happen if you're restoring from a checkpoint outside the scope." - .format(strategy, var)) - - with strategy.extended.colocate_vars_with(var): - weight = tf.Variable( - name="%s/%s" % (var._shared_name, slot_name), # pylint: disable=protected-access - dtype=var.dtype, - trainable=False, - initial_value=initial_value) - backend.track_variable(weight) - slot_dict[slot_name] = weight - self._restore_slot_variable( - slot_name=slot_name, variable=var, - slot_variable=weight) - self._weights.append(weight) - return weight - - def get_slot(self, var, slot_name): - var_key = _var_key(var) - slot_dict = self._slots[var_key] - slot_variable = slot_dict[slot_name] - if isinstance(slot_variable, - tf.__internal__.distribute.ShardedVariable): - # Construct a ShardedVariable that points to the input ShardedVariable's - # component shard's slot variables. - shard_vars = [] - for shard in slot_variable.variables: - slot_shard = self.get_slot(shard, slot_name) - shard_vars.append(slot_shard) - slot_variable = ( - tf.__internal__.distribute.ShardedVariable( - shard_vars, name=slot_variable.name) - ) - return slot_variable - - def _prepare(self, var_list): - keys = set() - for var in var_list: - if isinstance(var, tf.distribute.DistributedValues): - var_devices = var._devices # pylint: disable=protected-access - else: - var_devices = [var.device] - var_dtype = var.dtype.base_dtype - for var_device in var_devices: - keys.add((var_device, var_dtype)) - - apply_state = {} - for var_device, var_dtype in keys: - apply_state[(var_device, var_dtype)] = {} - with tf.device(var_device): - self._prepare_local(var_device, var_dtype, apply_state) - - return apply_state - - def _prepare_local(self, var_device, var_dtype, apply_state): - if "learning_rate" in self._hyper: - lr_t = tf.identity(self._decayed_lr(var_dtype)) - apply_state[(var_device, var_dtype)]["lr_t"] = lr_t - - def _fallback_apply_state(self, var_device, var_dtype): - """Compatibility for subclasses that don't pass apply_state through.""" - apply_state = {(var_device, var_dtype): {}} - self._prepare_local(var_device, var_dtype, apply_state) - return apply_state[(var_device, var_dtype)] - - def _create_hypers(self): - if self._hypers_created: - return - with self._distribution_strategy_scope(): - # Iterate hyper values deterministically. - for name, value in sorted(self._hyper.items()): - if isinstance(value, - (tf.Tensor, tf.Variable)) or callable(value): - # The check for `callable` covers the usage when `value` is a - # `LearningRateSchedule`, in which case it does not need to create a - # variable. - continue - else: - self._hyper[name] = self.add_weight( - name, - shape=[], - trainable=False, - initializer=value, - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - self._hypers_created = True - - @property - def iterations(self): - """Variable. The number of training steps this Optimizer has run.""" - if self._iterations is None: - with self._distribution_strategy_scope(): - self._iterations = self.add_weight( - "iter", - shape=[], - dtype=tf.int64, - trainable=False, - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - self._weights.append(self._iterations) - return self._iterations - - @iterations.setter - def iterations(self, variable): - if self._iterations is not None: - raise RuntimeError("Cannot set `iterations` to a new Variable after " - "the Optimizer weights have been created. Here it is " - f"attempting to set `iterations` to {variable}.") - self._iterations = variable - self._weights.append(self._iterations) - - def _decayed_lr(self, var_dtype): - """Get decayed learning rate as a Tensor with dtype=var_dtype.""" - lr_t = self._get_hyper("learning_rate", var_dtype) - if isinstance(lr_t, learning_rate_schedule.LearningRateSchedule): - local_step = tf.cast(self.iterations, var_dtype) - lr_t = tf.cast(lr_t(local_step), var_dtype) - if self._initial_decay > 0.: - local_step = tf.cast(self.iterations, var_dtype) - decay_t = tf.cast(self._initial_decay, var_dtype) - lr_t = lr_t / (1. + decay_t * local_step) - return lr_t - - @abc.abstractmethod - def get_config(self): - """Returns the config of the optimizer. - - An optimizer config is a Python dictionary (serializable) - containing the configuration of an optimizer. - The same optimizer can be reinstantiated later - (without any saved state) from this configuration. - - Returns: - Python dictionary. - """ - config = {"name": self._name} - if self.clipnorm is not None: - config["clipnorm"] = self.clipnorm - if self.clipvalue is not None: - config["clipvalue"] = self.clipvalue - if self.global_clipnorm is not None: - config["global_clipnorm"] = self.global_clipnorm - return config - - @classmethod - def from_config(cls, config, custom_objects=None): - """Creates an optimizer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same optimizer from the config - dictionary. - - Args: - config: A Python dictionary, typically the output of get_config. - custom_objects: A Python dictionary mapping names to additional Python - objects used to create this optimizer, such as a function used for a - hyperparameter. - - Returns: - An optimizer instance. - """ - if "lr" in config: - config["learning_rate"] = config.pop("lr") - if "learning_rate" in config: - if isinstance(config["learning_rate"], dict): - config["learning_rate"] = learning_rate_schedule.deserialize( - config["learning_rate"], custom_objects=custom_objects) - return cls(**config) - - def _serialize_hyperparameter(self, hyperparameter_name): - """Serialize a hyperparameter that can be a float, callable, or Tensor.""" - value = self._hyper[hyperparameter_name] - if isinstance(value, learning_rate_schedule.LearningRateSchedule): - return learning_rate_schedule.serialize(value) - if callable(value): - return value() - if tf.is_tensor(value): - return backend.get_value(value) - return value - - def variables(self): - """Returns variables of this Optimizer based on the order created.""" - return self._weights - - @property - def weights(self): - """Returns variables of this Optimizer based on the order created.""" - return self._weights - - def get_weights(self): - """Returns the current weights of the optimizer. - - The weights of an optimizer are its state (ie, variables). - This function returns the weight values associated with this - optimizer as a list of Numpy arrays. The first value is always the - iterations count of the optimizer, followed by the optimizer's state - variables in the order they were created. The returned list can in turn - be used to load state into similarly parameterized optimizers. - - For example, the RMSprop optimizer for this simple model returns a list of - three values-- the iteration count, followed by the root-mean-square value - of the kernel and bias of the single Dense layer: - - >>> opt = tf.keras.optimizers.RMSprop() - >>> m = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - >>> m.compile(opt, loss='mse') - >>> data = np.arange(100).reshape(5, 20) - >>> labels = np.zeros(5) - >>> results = m.fit(data, labels) # Training. - >>> len(opt.get_weights()) - 3 - - Returns: - Weights values as a list of numpy arrays. - """ - params = self.weights - return backend.batch_get_value(params) - - # TODO(tanzheny): Maybe share this logic with base_layer. - def set_weights(self, weights): - """Set the weights of the optimizer. - - The weights of an optimizer are its state (ie, variables). - This function takes the weight values associated with this - optimizer as a list of Numpy arrays. The first value is always the - iterations count of the optimizer, followed by the optimizer's state - variables in the order they are created. The passed values are used to set - the new state of the optimizer. - - For example, the RMSprop optimizer for this simple model takes a list of - three values-- the iteration count, followed by the root-mean-square value - of the kernel and bias of the single Dense layer: - - >>> opt = tf.keras.optimizers.RMSprop() - >>> m = tf.keras.models.Sequential([tf.keras.layers.Dense(10)]) - >>> m.compile(opt, loss='mse') - >>> data = np.arange(100).reshape(5, 20) - >>> labels = np.zeros(5) - >>> results = m.fit(data, labels) # Training. - >>> new_weights = [np.array(10), np.ones([20, 10]), np.zeros([10])] - >>> opt.set_weights(new_weights) - >>> opt.iterations - - - Args: - weights: weight values as a list of numpy arrays. - """ - params = self.weights - if len(params) != len(weights): - raise ValueError( - f"You called `set_weights(weights)` on optimizer {self._name} " - f"with a weight list of length {str(len(weights))}, " - f"but the optimizer was expecting {str(len(params))} " - f"weights. Provided weights: {str(weights)[:50]}...") - if not params: - return - weight_value_tuples = [] - param_values = backend.batch_get_value(params) - for pv, p, w in zip(param_values, params, weights): - if pv.shape != w.shape: - raise ValueError(f"Optimizer weight shape {str(pv.shape)} " - "not compatible with " - f"provided weight shape {str(w.shape)}.") - weight_value_tuples.append((p, w)) - backend.batch_set_value(weight_value_tuples) - - def add_weight(self, - name, - shape, - dtype=None, - initializer="zeros", - trainable=None, - synchronization=tf.VariableSynchronization.AUTO, - aggregation=tf.VariableAggregation.NONE): - - if dtype is None: - dtype = tf.float32 - if isinstance(initializer, str) or callable(initializer): - initializer = initializers.get(initializer) - - if synchronization == tf.VariableSynchronization.ON_READ: - if trainable: - raise ValueError( - "Synchronization value can be set to " - "VariableSynchronization.ON_READ only for non-trainable variables. " - "You have specified trainable=True and " - "synchronization=VariableSynchronization.ON_READ.") - else: - # Set trainable to be false when variable is to be synced on read. - trainable = False - elif trainable is None: - trainable = True - - variable = self._add_variable_with_custom_getter( - name=name, - shape=shape, - getter=base_layer_utils.make_variable, - overwrite=True, - initializer=initializer, - dtype=dtype, - trainable=trainable, - use_resource=True, - synchronization=synchronization, - aggregation=aggregation) - backend.track_variable(variable) - - return variable - - def _init_set_name(self, name, zero_based=True): - if not name: - self._name = backend.unique_object_name( - generic_utils.to_snake_case(self.__class__.__name__), - zero_based=zero_based) - else: - self._name = name - - def _assert_valid_dtypes(self, tensors): - """Asserts tensors are all valid types (see `_valid_dtypes`). - - Args: - tensors: Tensors to check. - - Raises: - ValueError: If any tensor is not a valid type. - """ - valid_dtypes = self._valid_dtypes() - for t in tensors: - dtype = t.dtype.base_dtype - if dtype not in valid_dtypes: - raise ValueError("Invalid type {} for {}, expected: {}.".format( - dtype, t.name, [v for v in valid_dtypes])) - - def _valid_dtypes(self): - """Valid types for loss, variables and gradients. - - Subclasses should override to allow other float types. - - Returns: - Valid types for loss, variables and gradients. - """ - return _DEFAULT_VALID_DTYPES - - def _call_if_callable(self, param): - """Call the function if param is callable.""" - return param() if callable(param) else param - - def _resource_apply_dense(self, grad, handle, apply_state): - """Add ops to apply dense gradients to the variable `handle`. - - Args: - grad: a `Tensor` representing the gradient. - handle: a `Tensor` of dtype `resource` which points to the variable to be - updated. - apply_state: A dict which is used across multiple apply calls. - - Returns: - An `Operation` which updates the value of the variable. - """ - raise NotImplementedError("`_resource_apply_dense` must be implemented in " - "subclasses.") - - def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices, - **kwargs): - """Add ops to apply sparse gradients to `handle`, with repeated indices. - - Optimizers which override this method must deal with repeated indices. See - the docstring of `_apply_sparse_duplicate_indices` for details. By default - the correct behavior, to sum non-unique indices and their associated - gradients, is enforced by first pre-processing `grad` and `indices` and - passing them on to `_resource_apply_sparse`. Optimizers which deal correctly - with duplicate indices may instead override this method to avoid the - overhead of summing. - - Args: - grad: a `Tensor` representing the gradient for the affected indices. - handle: a `Tensor` of dtype `resource` which points to the variable to be - updated. - indices: a `Tensor` of integral type representing the indices for which - the gradient is nonzero. Indices may be repeated. - **kwargs: May optionally contain `apply_state` - - Returns: - An `Operation` which updates the value of the variable. - """ - summed_grad, unique_indices = _deduplicate_indexed_slices( - values=grad, indices=indices) - return self._resource_apply_sparse(summed_grad, handle, unique_indices, - **kwargs) - - def _resource_apply_sparse(self, grad, handle, indices, apply_state): - """Add ops to apply sparse gradients to the variable `handle`. - - Similar to `_apply_sparse`, the `indices` argument to this method has been - de-duplicated. Optimizers which deal correctly with non-unique indices may - instead override `_resource_apply_sparse_duplicate_indices` to avoid this - overhead. - - Args: - grad: a `Tensor` representing the gradient for the affected indices. - handle: a `Tensor` of dtype `resource` which points to the variable to be - updated. - indices: a `Tensor` of integral type representing the indices for which - the gradient is nonzero. Indices are unique. - apply_state: A dict which is used across multiple apply calls. - - Returns: - An `Operation` which updates the value of the variable. - """ - raise NotImplementedError("`_resource_apply_sparse` Must be implemented in " - "subclasses.") - - def _resource_scatter_add(self, x, i, v): - with tf.control_dependencies([ - tf.raw_ops.ResourceScatterAdd( - resource=x.handle, indices=i, updates=v) - ]): - return x.value() - - def _resource_scatter_update(self, x, i, v): - with tf.control_dependencies( - [tf.raw_ops.ResourceScatterUpdate( - resource=x.handle, indices=i, updates=v)]): - return x.value() - - @property - @layer_utils.cached_per_instance - def _dense_apply_args(self): - return tf_inspect.getfullargspec(self._resource_apply_dense).args - - @property - @layer_utils.cached_per_instance - def _sparse_apply_args(self): - return tf_inspect.getfullargspec(self._resource_apply_sparse).args - - # --------------- - # For implementing the trackable interface - # --------------- - - def _restore_slot_variable(self, slot_name, variable, slot_variable): - """Restore a newly created slot variable's value.""" - variable_key = _var_key(variable) - deferred_restorations = self._deferred_slot_restorations.get( - slot_name, {}).pop(variable_key, []) - # Iterate over restores, highest restore UID first to minimize the number - # of assignments. - deferred_restorations.sort(key=lambda position: position.restore_uid, - reverse=True) - for checkpoint_position in deferred_restorations: - checkpoint_position.restore(slot_variable) - - def _create_or_restore_slot_variable( - self, slot_variable_position, slot_name, variable): - """Returns the slot variable that should have a value restored into it. - - It is up to the caller to restore the value into the slot variable if a - valid slot variable is returned. - - Called when a variable which has an associated slot variable is created or - restored. When executing eagerly, we create the slot variable with a - restoring initializer. - - No new variables are created when graph building. Instead, - _restore_slot_variable catches these after normal creation and adds restore - ops to the graph. This method is nonetheless important when graph building - for the case when a slot variable has already been created but `variable` - has just been added to a dependency graph (causing us to realize that the - slot variable needs to be restored). - - Args: - slot_variable_position: A `trackable._CheckpointPosition` object - indicating the slot variable `Trackable` object to be restored. - slot_name: The name of this `Optimizer`'s slot to restore into. - variable: The variable object this slot is being created for. - - Returns: - A slot variable that should have a value restored into it, or None if a - slot variable should not be restored at this time. - """ - variable_key = _var_key(variable) - slot_dict = self._slots.get(variable_key, {}) - slot_variable = slot_dict.get(slot_name, None) - if (slot_variable is None and tf.executing_eagerly() and - slot_variable_position.is_simple_variable() - # Defer slot variable creation if there is an active variable creator - # scope. Generally we'd like to eagerly create/restore slot variables - # when possible, but this may mean that scopes intended to catch - # `variable` also catch its eagerly created slot variable - # unintentionally (specifically make_template would add a dependency on - # a slot variable if not for this case). Deferring is mostly harmless - # (aside from double initialization), and makes variable creator scopes - # behave the same way they do when graph building. - # - # One notable case is with distribution strategy, which uses variable - # creator scope but always desires the `variable` and the slot to use - # the same scope, thus we can safely eagerly create/restore slot - # variables. - and (not tf.compat.v1.get_default_graph()._variable_creator_stack or # pylint: disable=protected-access - self._distribution_strategy)): - initializer = tf.__internal__.tracking.CheckpointInitialValueCallable( - checkpoint_position=slot_variable_position) - slot_variable = self.add_slot( - var=variable, - initializer=initializer, - slot_name=slot_name, - shape=slot_variable_position.value_shape()) - # Slot variables are not owned by any one object (because we don't want to - # save the slot variable if the optimizer is saved without the non-slot - # variable, or if the non-slot variable is saved without the optimizer; - # it's a dependency hypergraph with edges of the form (optimizer, non-slot - # variable, variable)). So we don't _track_ slot variables anywhere, and - # instead special-case this dependency and otherwise pretend it's a normal - # graph. - if slot_variable is not None: - # For sharded variables, we need the logic in get_slot to combine slot - # variables for its shards - if (slot_variable is variable) and (isinstance( - variable, tf.__internal__.distribute.ShardedVariable)): - return self.get_slot(variable, slot_name) - # If we've either made this slot variable, or if we've pulled out an - # existing slot variable, we should restore it. - return slot_variable - else: - # We didn't make the slot variable. Defer restoring until it gets created - # normally. We keep a list rather than the one with the highest restore - # UID in case slot variables have their own dependencies, in which case - # those could differ between restores. - self._deferred_slot_restorations.setdefault( - slot_name, {}).setdefault(variable_key, []).append( - slot_variable_position) - return None - - @contextlib.contextmanager - def _distribution_strategy_scope(self): - """Returns the `tf.distribute.Strategy` this optimizer was created under.""" - if self._distribution_strategy and not tf.distribute.has_strategy(): - with self._distribution_strategy.scope(): - yield self._distribution_strategy.scope() - else: - yield - - -def _var_key(var): - """Key for representing a primary variable, for looking up slots. - - In graph mode the name is derived from the var shared name. - In eager mode the name is derived from the var unique id. - If distribution strategy exists, get the primary variable first. - - Args: - var: the variable. - - Returns: - the unique name of the variable. - """ - - # pylint: disable=protected-access - # Get the distributed variable if it exists. - if hasattr(var, "_distributed_container"): - var = var._distributed_container() - if getattr(var, "_in_graph_mode", False): - return var._shared_name - return var._unique_id - - -def _get_slot_key_from_var(var, slot_name): - """Get the slot key for the variable: var_name/slot_name.""" - - name = _var_key(var) - return name + "/" + slot_name - - -class RestoredOptimizer(OptimizerV2): - """A non-functional Optimizer implementation for checkpoint compatibility. - - Holds slot variables and hyperparameters when an optimizer is restored from a - SavedModel. These variables may be referenced in functions along with ops - created by the original optimizer, but currently we do not support using the - optimizer object itself (e.g. through `apply_gradients`). - """ - # TODO(allenl): Make the restored optimizer functional by tracing its apply - # methods. - - def __init__(self): - super().__init__("RestoredOptimizer") - self._hypers_created = True - - def get_config(self): - # TODO(allenl): Save and restore the Optimizer's config - raise NotImplementedError( - "Restoring functional Optimizers from SavedModels is not currently " - "supported. Please file a feature request if this limitation bothers " - "you.") - -tf.__internal__.saved_model.load.register_revived_type( - "optimizer", - lambda obj: isinstance(obj, OptimizerV2), - versions=[tf.__internal__.saved_model.load.VersionedTypeRegistration( - object_factory=lambda proto: RestoredOptimizer(), - version=2, - min_producer_version=1, - min_consumer_version=1, - setter=RestoredOptimizer._set_hyper # pylint: disable=protected-access - )]) diff --git a/keras/optimizers/optimizer_v2/optimizer_v2_test.py b/keras/optimizers/optimizer_v2/optimizer_v2_test.py deleted file mode 100644 index f22efb0050d0..000000000000 --- a/keras/optimizers/optimizer_v2/optimizer_v2_test.py +++ /dev/null @@ -1,1320 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional test for OptimizerV2.""" - -import collections - -from absl.testing import parameterized -import keras -from keras import backend -from keras import callbacks -from keras import losses -from keras.engine import input_layer -from keras.engine import sequential -from keras.engine import training -from keras.layers import core -from keras.layers import regularization -from keras.optimizers import optimizer_v1 -from keras.optimizers.optimizer_v2 import adadelta -from keras.optimizers.optimizer_v2 import adagrad -from keras.optimizers.optimizer_v2 import adam -from keras.optimizers.optimizer_v2 import adamax -from keras.optimizers.optimizer_v2 import ftrl -from keras.optimizers.optimizer_v2 import gradient_descent -from keras.optimizers.optimizer_v2 import nadam -from keras.optimizers.optimizer_v2 import optimizer_v2 -from keras.optimizers.optimizer_v2 import rmsprop -from keras.optimizers.schedules import learning_rate_schedule -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.utils import np_utils -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.python.framework import test_util as tf_test_utils - - -_DATA_TYPES = [tf.half, tf.float32, tf.float64] -# TODO(b/141710709): complex support in NVCC and ROCM. -if (not tf_test_utils.IsBuiltWithNvcc() and not tf.test.is_built_with_rocm()): - _DATA_TYPES += [tf.complex64, tf.complex128] - - -class OptimizerTest(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testBasic(self): - for dtype in _DATA_TYPES: - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - sgd = gradient_descent.SGD(3.0) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Run 1 step of sgd through optimizer - opt_op = sgd.minimize(loss, var_list=[var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - # Validate updated params - self.assertAllClose([-14., -13.], self.evaluate(var0)) - self.assertAllClose([-6., -5.], self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testAdaptiveLearningRate(self): - for dtype in _DATA_TYPES: - with self.test_session(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - - def loss(): - return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - - sgd = gradient_descent.SGD(1.0) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Run 1 step of sgd through optimizer - opt_op = sgd.minimize(loss, [var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - # Validate updated params - # var0 = [1., 2.] - 1.0 * [5, 5] - self.assertAllClose([-4., -3.], self.evaluate(var0)) - # var1 = [3., 4.] - 1.0 * [3, 3] - self.assertAllClose([0., 1.], self.evaluate(var1)) - - sgd.learning_rate = 0.5 - if tf.executing_eagerly(): - sgd.minimize(loss, [var0, var1]) - else: - self.evaluate(opt_op) - # Validate updated params - # var0 = [-4., -3.] - 0.5 * [5, 5] - self.assertAllClose([-6.5, -5.5], self.evaluate(var0)) - # var1 = [0., 1.] - 0.5 * [3, 3] - self.assertAllClose([-1.5, -0.5], self.evaluate(var1)) - - sgd.learning_rate = learning_rate_schedule.InverseTimeDecay( - 0.5, decay_steps=1.0, decay_rate=0.5) - if tf.executing_eagerly(): - sgd.minimize(loss, [var0, var1]) - else: - self.evaluate(opt_op) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testPrecomputedGradient(self): - for dtype in _DATA_TYPES: - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - grad_loss = tf.constant([42, -42], dtype=dtype) - sgd = gradient_descent.SGD(3.0) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Run 1 step of sgd through optimizer - opt_op = sgd.minimize(loss, var_list=[var0, var1], grad_loss=grad_loss) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - # Validate updated params - self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], - self.evaluate(var0)) - self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], - self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoGradients(self): - for dtype in _DATA_TYPES: - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - loss = lambda: 5 * var0 # pylint: disable=cell-var-from-loop - sgd_op = gradient_descent.SGD(3.0) - with self.assertRaisesRegex(ValueError, 'No gradients'): - # var1 has no gradient - sgd_op.minimize(loss, var_list=[var1]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoGradientsForAnyVariables_Minimize(self): - for dtype in _DATA_TYPES: - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - loss = lambda: tf.constant(5.0) - - sgd_op = gradient_descent.SGD(3.0) - with self.assertRaisesRegex(ValueError, - 'No gradients provided for any variable'): - sgd_op.minimize(loss, var_list=[var0, var1]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testNoGradientsForAnyVariables_ApplyGradients(self): - for dtype in _DATA_TYPES: - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - sgd_op = gradient_descent.SGD(3.0) - with self.assertRaisesRegex(ValueError, - 'No gradients provided for any variable'): - sgd_op.apply_gradients([(None, var0), (None, var1)]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testGradientsAsVariables(self): - for i, dtype in enumerate(_DATA_TYPES): - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - - sgd = gradient_descent.SGD(3.0) - grads_and_vars = sgd._compute_gradients(loss, [var0, var1]) - # Convert gradients to tf.Variables - converted_grads = [ - tf.Variable( - tf.zeros([2], dtype), name='c_%d_%d' % (i, j)) - for j, gv in enumerate(grads_and_vars) - ] - convert_ops = [ - tf.compat.v1.assign(converted_grads[j], gv[0]) - for j, gv in enumerate(grads_and_vars) - ] - - # Run convert_ops to achieve the gradients converting - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(convert_ops) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 1 step of sgd through optimizer - converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) - opt_op = sgd.apply_gradients(converted_grads_and_vars) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(convert_ops) - self.evaluate(opt_op) - - # Validate updated params - self.assertAllClose([-14., -13.], self.evaluate(var0)) - self.assertAllClose([-6., -5.], self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testComputeGradientsWithTensors(self): - with test_utils.use_gpu(): - x = tf.convert_to_tensor(1.0) - - def f(): - return x * x - - sgd = gradient_descent.SGD(3.0) - grads_and_vars = sgd._compute_gradients(f, [x]) - self.assertLen(grads_and_vars, 1) - grad, x_as_var = grads_and_vars[0] - self.assertIs(x, x_as_var) - self.assertEqual(2.0, self.evaluate(grad)) - - with self.assertRaises(NotImplementedError): - sgd.apply_gradients(grads_and_vars) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testConstraint(self): - constraint_01 = lambda x: tf.clip_by_value(x, -0.1, 0.) - constraint_0 = lambda x: tf.clip_by_value(x, 0., 1.) - with test_utils.use_gpu(): - var0 = tf.Variable([1.0, 2.0], constraint=constraint_01) - var1 = tf.Variable([3.0, 4.0], constraint=constraint_0) - loss = lambda: 5 * var0 + 3 * var1 - sgd = gradient_descent.SGD(3.0) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Run 1 step of sgd through optimizer - opt_op = sgd.minimize(loss, var_list=[var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - # Validate updated params - self.assertAllClose([-0.1, -0.1], self.evaluate(var0)) - self.assertAllClose([0., 0.], self.evaluate(var1)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testIterationWithoutMinimize(self): - with test_utils.use_gpu(): - sgd = gradient_descent.SGD(3.0) - self.evaluate(sgd.iterations.initializer) - self.assertEqual(0, self.evaluate(sgd.iterations)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testConfig(self): - with test_utils.use_gpu(): - opt = gradient_descent.SGD(learning_rate=1.0) - config = opt.get_config() - opt2 = gradient_descent.SGD.from_config(config) - lr = opt._get_hyper('learning_rate') - lr2 = opt2._get_hyper('learning_rate') - self.evaluate(tf.compat.v1.global_variables_initializer()) - # assert both are equal float values. - self.assertEqual(self.evaluate(lr), self.evaluate(lr2)) - var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) - loss = lambda: 3 * var0 - # learning rate variable created when calling minimize. - opt.minimize(loss, [var0]) - opt3 = gradient_descent.SGD.from_config(config) - lr3 = opt3._get_hyper('learning_rate') - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual(self.evaluate(lr), self.evaluate(lr3)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testConfigWithLearningRateDecay(self): - with test_utils.use_gpu(): - var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) - for decay_schedule in [ - learning_rate_schedule.InverseTimeDecay( - 0.5, decay_steps=1.0, decay_rate=0.1), - learning_rate_schedule.PiecewiseConstantDecay( - [5], [1., .5]) - ]: - step = 10 - opt = gradient_descent.SGD(decay_schedule) - config = opt.get_config() - opt2 = gradient_descent.SGD.from_config(config) - # assert both are equal float values. - self.assertAllEqual( - decay_schedule(step), - opt._get_hyper('learning_rate')(step)) - self.assertAllEqual( - decay_schedule(step), - opt2._get_hyper('learning_rate')(step)) - loss = lambda: 3 * var0 - # learning rate variable is created when calling minimize. - opt.minimize(loss, [var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - config = opt.get_config() - opt3 = gradient_descent.SGD.from_config(config) - self.assertAllEqual( - self.evaluate(opt._get_hyper('learning_rate')(step)), - opt3._get_hyper('learning_rate')(step)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testGradClipValue(self): - with test_utils.use_gpu(): - var = tf.Variable([1.0, 2.0]) - loss = lambda: 3 * var - opt = gradient_descent.SGD(learning_rate=1.0, clipvalue=1.0) - opt_op = opt.minimize(loss, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - self.assertAllClose([0., 1.], self.evaluate(var)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testGradClipNorm(self): - with test_utils.use_gpu(): - var = tf.Variable([1.0]) - loss = lambda: 3 * var - opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0) - opt_op = opt.minimize(loss, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - self.assertAllClose([0.], self.evaluate(var)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testGradGlobalClipNorm(self): - with test_utils.use_gpu(): - # l2 norm is 5.0 - var1 = tf.Variable([1.0]) - var2 = tf.Variable([2.0]) - loss = lambda: 3 * var1 + 4 * var2 - opt = gradient_descent.SGD(learning_rate=1.0, global_clipnorm=2.0) - opt_op = opt.minimize(loss, [var1, var2]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - # grad1 = 3.0 * 2.0 / 5.0 = 1.2 - self.assertAllClose([-.2], self.evaluate(var1)) - # grad2 = 4.0 * 2.0 / 5.0 = 1.6 - self.assertAllClose([.4], self.evaluate(var2)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInvalidClipNorm(self): - with self.assertRaisesRegex(ValueError, '>= 0'): - gradient_descent.SGD(learning_rate=1.0, clipnorm=-1.0) - - @test_combinations.generate( - test_combinations.combine( - mode=['graph', 'eager'], - clip_type=['clipnorm', 'global_clipnorm', 'clipvalue'])) - def testConfigWithCliping(self, clip_type): - opt = gradient_descent.SGD(learning_rate=1.0, **{clip_type: 2.0}) - config = opt.get_config() - opt = gradient_descent.SGD.from_config(config) - self.assertEqual(getattr(opt, clip_type), 2.0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testInvalidKwargs(self): - with self.assertRaisesRegex(TypeError, 'Unexpected keyword argument'): - gradient_descent.SGD(learning_rate=1.0, invalidkwargs=1.0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testWeights(self): - with test_utils.use_gpu(): - opt1 = adam.Adam(learning_rate=1.0) - var1 = tf.Variable([1.0, 2.0], dtype=tf.float32) - loss1 = lambda: 3 * var1 - opt_op_1 = opt1.minimize(loss1, [var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - config = opt1.get_config() - opt2 = adam.Adam.from_config(config) - var2 = tf.Variable([1.0, 2.0], dtype=tf.float32) - loss2 = lambda: 3 * var2 - opt_op_2 = opt2.minimize(loss2, [var2]) - weights = opt1.get_weights() - - # Assert set_weights and both variables get updated to same value. - self.evaluate(tf.compat.v1.global_variables_initializer()) - opt2.set_weights(weights) - self.evaluate([opt_op_1, opt_op_2]) - self.assertAllClose(self.evaluate(var1), self.evaluate(var2)) - self.assertEqual(1, self.evaluate(opt1.iterations)) - self.assertEqual(1, self.evaluate(opt2.iterations)) - - var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) - var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) - loss3 = lambda: 3 * var3 + 5 * var4 - opt_op_3 = opt1.minimize(loss3, [var3, var4]) - - # Assert set_weights with ValueError since weight list does not match. - self.evaluate(tf.compat.v1.global_variables_initializer()) - weights = opt1.get_weights() - with self.assertRaisesRegex(ValueError, 'but the optimizer was'): - opt2.set_weights(weights) - - # Assert set_weights and variables get updated to same value. - var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) - var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) - loss4 = lambda: 3 * var5 + 5 * var6 - opt_op_4 = opt2.minimize(loss4, [var5, var6]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - opt2.set_weights(weights) - self.evaluate([opt_op_3, opt_op_4]) - self.assertAllClose( - self.evaluate([var3, var4]), self.evaluate([var5, var6])) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testGettingHyperParameters(self): - with self.test_session(): - opt = adam.Adam(learning_rate=1.0) - var = tf.Variable([1.0, 2.0], dtype=tf.float32) - loss = lambda: 3 * var - opt_op = opt.minimize(loss, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - - lr = self.evaluate(opt.lr) - self.assertEqual(1.0, lr) - - opt.lr = 2.0 - lr = self.evaluate(opt.lr) - self.assertEqual(2.0, lr) - - self.evaluate(opt.lr.assign(3.0)) - lr = self.evaluate(opt.lr) - self.assertEqual(3.0, lr) - - with self.assertRaises(AttributeError): - opt.not_an_attr += 3 - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testGettingHyperParametersWithLrInConstructor(self): - with self.test_session(): - opt = gradient_descent.SGD(lr=3.0) - var = tf.Variable([1.0, 2.0], dtype=tf.float32) - loss = lambda: 3 * var - opt_op = opt.minimize(loss, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt.learning_rate, tf.Variable) - - lr = self.evaluate(opt.lr) - self.assertEqual(3.0, lr) - - opt.lr = 2.0 - lr = self.evaluate(opt.lr) - self.assertEqual(2.0, lr) - - self.evaluate(opt.lr.assign(4.0)) - lr = self.evaluate(opt.lr) - self.assertEqual(4.0, lr) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testDir(self): - opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.1) - dir_result = set(dir(opt)) - self.assertIn('learning_rate', dir_result) # Hyperparameter - self.assertIn('lr', dir_result) # Hyperparameter - self.assertIn('momentum', dir_result) # Hyperparameter - self.assertIn('nesterov', dir_result) # Attribute - self.assertIn('minimize', dir_result) # Attribute - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testOptimizerWithKerasModel(self): - a = input_layer.Input(shape=(3,), name='input_a') - b = input_layer.Input(shape=(3,), name='input_b') - - dense = core.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = regularization.Dropout(0.5, name='dropout')(c) - - model = training.Model([a, b], [d, e]) - - optimizer = gradient_descent.SGD(learning_rate=0.001) - loss = 'mse' - model.compile(optimizer, loss, metrics=['mae']) - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_d_np = np.random.random((10, 4)) - output_e_np = np.random.random((10, 4)) - - model.fit([input_a_np, input_b_np], [output_d_np, output_e_np], - epochs=1, - batch_size=5) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testOptimizerWithCallbacks(self): - np.random.seed(1331) - input_np = np.random.random((10, 3)) - output_np = np.random.random((10, 4)) - a = input_layer.Input(shape=(3,), name='input_a') - model = sequential.Sequential() - model.add(core.Dense(4, kernel_initializer='zeros', name='dense')) - model.add(regularization.Dropout(0.5, name='dropout')) - model(a) - optimizer = gradient_descent.SGD(learning_rate=0.1) - model.compile(optimizer, loss='mse', metrics=['mae']) - # This does not reduce the LR after the first epoch (due to low delta). - cbks = [ - callbacks.ReduceLROnPlateau( - monitor='val_loss', factor=0.1, min_delta=0, patience=1, cooldown=5) - ] - model.fit( - input_np, - output_np, - batch_size=10, - validation_data=(input_np, output_np), - callbacks=cbks, - epochs=2, - verbose=0) - self.assertAllClose( - float(backend.get_value(model.optimizer.lr)), 0.1, atol=1e-4) - - # This should reduce the LR after the first epoch (due to high delta). - cbks = [ - callbacks.ReduceLROnPlateau( - monitor='val_loss', - factor=0.1, - min_delta=10, - patience=1, - cooldown=5) - ] - model.fit( - input_np, - output_np, - batch_size=10, - validation_data=(input_np, output_np), - callbacks=cbks, - epochs=2, - verbose=2) - self.assertAllClose( - float(backend.get_value(model.optimizer.lr)), 0.01, atol=1e-4) - - def testOptimizerSetIterations(self): - global_step = tf.compat.v1.train.get_or_create_global_step() - opt = adam.Adam(learning_rate=1.0) - opt.iterations = global_step - var = tf.Variable([1.0, 2.0], dtype=tf.float32) - self.evaluate(tf.compat.v1.global_variables_initializer()) - init_step_value = self.evaluate(global_step) - loss = lambda: 3 * var - opt_op = opt.minimize(loss, [var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - new_step_value = self.evaluate(global_step) - self.assertEqual(new_step_value, init_step_value + 1) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testOptimizerWithCallableVarList(self): - train_samples = 20 - input_dim = 1 - num_classes = 2 - (x, y), _ = test_utils.get_test_data( - train_samples=train_samples, - test_samples=10, - input_shape=(input_dim,), - num_classes=num_classes) - y = np_utils.to_categorical(y) - - num_hidden = 1 - model = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes) - opt = adam.Adam() - - loss = lambda: losses.mean_squared_error(model(x), y) - var_list = lambda: model.trainable_weights - - with self.assertRaisesRegex( - ValueError, 'Weights for model .* have not yet been created'): - var_list() - train_op = opt.minimize(loss, var_list) - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertEqual( - [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) - self.evaluate(train_op) - self.assertNotEqual( - [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) - self.assertLen(var_list(), 4) - - def testVarKey(self): - with tf.compat.v1.get_default_graph().as_default(): - a = tf.Variable([1., 2.], name='var') - b = tf.Variable([1.], name='var') - self.assertTrue(a._in_graph_mode) - self.assertTrue(b._in_graph_mode) - var_key = optimizer_v2._var_key(a) - self.assertEqual('var', var_key) - var_key = optimizer_v2._var_key(b) - self.assertEqual('var_1', var_key) - - def testVarName(self): - with tf.compat.v1.get_default_graph().as_default(): - var = tf.Variable([1., 2.], name='var') - loss = var + 1. - opt = adam.Adam() - opt.get_updates(loss, [var]) - opt_vars = opt.variables() - self.assertLen(opt_vars, 3) - self.assertEqual('Adam/iter:0', opt_vars[0].name) - self.assertEqual('Adam/var/m:0', opt_vars[1].name) - var_2 = tf.Variable([1., 2.], name='var_2') - loss = var_2 + 1. - with backend.name_scope('outter'): - opt.get_updates(loss, [var_2]) - opt_vars = opt.variables() - self.assertLen(opt_vars, 5) - self.assertEqual('outter/Adam/var_2/m:0', opt_vars[3].name) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testEmptyVarList(self): - opt = gradient_descent.SGD(1.) - opt.minimize(lambda: tf.constant(1.), []) - opt.apply_gradients([]) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testAggregationTrue(self): - # Test that experimental_aggregate_gradients=True works without distributed - # strategy. - var = tf.Variable([1., 2.]) - opt = gradient_descent.SGD(3.0) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose([1., 2.], self.evaluate(var)) - opt_op = opt.apply_gradients([([0.1, 0.1], var)], - experimental_aggregate_gradients=True) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - self.assertAllClose([0.7, 1.7], self.evaluate(var)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def testAggregationFalse(self): - # Test that experimental_aggregate_gradients=False works without distributed - # strategy. - var = tf.Variable([1., 2.]) - opt = gradient_descent.SGD(3.0) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose([1., 2.], self.evaluate(var)) - opt_op = opt.apply_gradients([([0.1, 0.1], var)], - experimental_aggregate_gradients=False) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - self.assertAllClose([0.7, 1.7], self.evaluate(var)) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testRestoringIterationsWithoutAnOptimizer(self): - opt = gradient_descent.SGD(3.0) - opt.iterations.assign(5) - checkpoint = tf.train.Checkpoint(optimizer=opt) - path = checkpoint.save(self.get_temp_dir()) - - # Following verifies that the `iterations` can be restored with the absence - # of an `Optimizer` object (using a `Checkpoint` as a placeholder). - iterations_var = tf.Variable(0, dtype=tf.int64) - optimizer_checkpoint = tf.train.Checkpoint(iter=iterations_var) - checkpoint_to_restore = tf.train.Checkpoint( - optimizer=optimizer_checkpoint) - checkpoint_to_restore.restore(path) - - self.assertEqual(5, self.evaluate(iterations_var)) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def testSlotWithNonstandardShapeRestoresBasedOnCheckpoint(self): - # First create an optimizer and a slot variable with a non-standard shape. - x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32) - slot_shape = [2, 1] - optimizer_1 = optimizer_v2.OptimizerV2(name='test') - optimizer_1.add_slot(x, 'test_slot', 'ones', shape=slot_shape) - - # Then save the variable and optimizer to a checkpoint. - checkpoint_1 = tf.train.Checkpoint(var=x, optimizer=optimizer_1) - checkpoint_path = checkpoint_1.save(self.get_temp_dir()) - - # Create a new optimizer and call restore on it (and x) - optimizer_2 = optimizer_v2.OptimizerV2(name='test') - checkpoint_2 = tf.train.Checkpoint(var=x, optimizer=optimizer_2) - checkpoint_2.restore(checkpoint_path) - - self.assertEqual(slot_shape, - optimizer_2.get_slot(x, 'test_slot').shape.as_list()) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_gradient_aggregator(self): - def gradient_aggregator(grads_and_vars): - # Simulate an all-reduce where the other replica has zeros for gradients, - # by dividing each gradient by 2. - grads = [g for g, _ in grads_and_vars] - vars = [v for _, v in grads_and_vars] # pylint: disable=redefined-builtin - all_reduced_grads = [g / 2 for g in grads] - return list(zip(all_reduced_grads, vars)) - - var = tf.Variable(2.0) - sgd = gradient_descent.SGD(1.0, gradient_aggregator=gradient_aggregator) - loss = lambda: 2 * var - opt_op = sgd.minimize(loss, var_list=[var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - self.assertEqual(self.evaluate(var), 1.0) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_override_aggregate_gradients(self): - class MyOptimizer(gradient_descent.SGD): - - def _aggregate_gradients(self, grads_and_vars): - # Simulate an all-reduce where the other replica has zeros for - # gradients, by dividing each gradient by 2. - grads = [g for g, _ in grads_and_vars] - vars = [v for _, v in grads_and_vars] # pylint: disable=redefined-builtin - all_reduced_grads = [g / 2 for g in grads] - return list(zip(all_reduced_grads, vars)) - - var = tf.Variable(2.0) - sgd = MyOptimizer(1.0) - loss = lambda: 2 * var - opt_op = sgd.minimize(loss, var_list=[var]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - self.assertEqual(self.evaluate(var), 1.0) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_create_slots_for_sharded_variables(self): - # set names so that ShardedVariable is well-named for slot variable keying. - var_a = tf.Variable([1.0], name='part_0') - var_b = tf.Variable([2.0], name='part_1') - sharded_var = tf.__internal__.distribute.ShardedVariable([var_a, var_b]) - - opt = adagrad.Adagrad() - opt._create_slots(sharded_var.variables) - opt._create_slots_for_sharded_variables(sharded_var.variables) - - sharded_slot = opt.get_slot(sharded_var, 'accumulator') - self.assertIsInstance( - sharded_slot, tf.__internal__.distribute.ShardedVariable) - - slot_a = opt.get_slot(var_a, 'accumulator') - self.assertAllClose(sharded_slot.variables[0], slot_a) - slot_b = opt.get_slot(var_b, 'accumulator') - self.assertAllClose(sharded_slot.variables[1], slot_b) - - -@test_combinations.run_all_keras_modes -class OptimizersCompatibilityTest(test_combinations.TestCase): - - def _testOptimizersCompatibility(self, opt_v1, opt_v2, test_weights=True): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - np.random.seed(1331) - with test_utils.use_gpu(): - train_samples = 20 - input_dim = 3 - num_classes = 2 - (x, y), _ = test_utils.get_test_data( - train_samples=train_samples, - test_samples=10, - input_shape=(input_dim,), - num_classes=num_classes) - y = np_utils.to_categorical(y) - - num_hidden = 5 - model_v1 = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_v1.compile( - opt_v1, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - model_v1.fit(x, y, batch_size=5, epochs=1) - - model_v2 = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_v2.set_weights(model_v1.get_weights()) - model_v2.compile( - opt_v2, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - if not tf.compat.v1.executing_eagerly_outside_functions(): - model_v2._make_train_function() - if test_weights: - opt_v2.set_weights(opt_v1.get_weights()) - - hist_1 = model_v1.fit(x, y, batch_size=5, epochs=1, shuffle=False) - hist_2 = model_v2.fit(x, y, batch_size=5, epochs=1, shuffle=False) - self.assertAllClose(model_v1.get_weights(), model_v2.get_weights(), - rtol=1e-5, atol=1e-5) - self.assertAllClose(hist_1.history['loss'], hist_2.history['loss'], - rtol=1e-5, atol=1e-5) - - def testAdadeltaCompatibility(self): - opt_v1 = optimizer_v1.Adadelta(lr=0.01) - opt_v2 = adadelta.Adadelta(learning_rate=0.01) - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testAdagradCompatibility(self): - opt_v1 = optimizer_v1.Adagrad(lr=0.01) - opt_v2 = adagrad.Adagrad(learning_rate=0.01) - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testAdamCompatibility(self): - opt_v1 = optimizer_v1.Adam() - opt_v2 = adam.Adam() - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testAdamaxCompatibility(self): - opt_v1 = optimizer_v1.Adamax(lr=0.01) - opt_v2 = adamax.Adamax(learning_rate=0.01) - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testNadamCompatibility(self): - opt_v1 = optimizer_v1.Nadam(lr=0.001) - opt_v2 = nadam.Nadam(learning_rate=0.001) - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testMomentumCompatibility(self): - opt_v1 = optimizer_v1.SGD(lr=0.01, momentum=0.9) - opt_v2 = gradient_descent.SGD(learning_rate=0.01, momentum=0.9) - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testRMSpropCompatibility(self): - opt_v1 = optimizer_v1.RMSprop() - opt_v2 = rmsprop.RMSprop() - self._testOptimizersCompatibility(opt_v1, opt_v2) - - def testSGDCompatibility(self): - opt_v1 = optimizer_v1.SGD(lr=0.01) - opt_v2 = gradient_descent.SGD(learning_rate=0.01) - self._testOptimizersCompatibility(opt_v1, opt_v2, False) - - def testNumericEquivalenceForNesterovMomentum(self): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - np.random.seed(1331) - with test_utils.use_gpu(): - train_samples = 20 - input_dim = 3 - num_classes = 2 - (x, y), _ = test_utils.get_test_data( - train_samples=train_samples, - test_samples=10, - input_shape=(input_dim,), - num_classes=num_classes) - y = np_utils.to_categorical(y) - - num_hidden = 5 - model_k_v1 = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_k_v2 = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_k_v2.set_weights(model_k_v1.get_weights()) - model_tf = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_tf.set_weights(model_k_v2.get_weights()) - - opt_k_v1 = optimizer_v1.SGD(momentum=0.9, nesterov=True) - opt_k_v2 = gradient_descent.SGD(momentum=0.9, nesterov=True) - opt_tf = tf.compat.v1.train.MomentumOptimizer( - learning_rate=0.01, momentum=0.9, use_nesterov=True) - - model_k_v1.compile( - opt_k_v1, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - model_k_v2.compile( - opt_k_v2, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - model_tf.compile( - opt_tf, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - - hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False) - hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False) - hist_tf = model_tf.fit(x, y, batch_size=5, epochs=10, shuffle=False) - - self.assertAllClose(model_k_v1.get_weights(), model_tf.get_weights()) - self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights()) - self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) - self.assertAllClose(hist_k_v1.history['loss'], hist_tf.history['loss']) - self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss']) - - def testNumericEquivalenceForAmsgrad(self): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - np.random.seed(1331) - with test_utils.use_gpu(): - train_samples = 20 - input_dim = 3 - num_classes = 2 - (x, y), _ = test_utils.get_test_data( - train_samples=train_samples, - test_samples=10, - input_shape=(input_dim,), - num_classes=num_classes) - y = np_utils.to_categorical(y) - - num_hidden = 5 - model_k_v1 = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_k_v2 = test_utils.get_small_sequential_mlp( - num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) - model_k_v2.set_weights(model_k_v1.get_weights()) - - opt_k_v1 = optimizer_v1.Adam(amsgrad=True) - opt_k_v2 = adam.Adam(amsgrad=True) - - model_k_v1.compile( - opt_k_v1, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - model_k_v2.compile( - opt_k_v2, - loss='categorical_crossentropy', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - - hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False) - hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False) - - self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights()) - self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) - self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss']) - - -# Note: These tests are kept in a separate class to avoid bugs in some -# distributions of Python that break AutoGraph which is used by tf.function. -@test_combinations.generate(test_combinations.combine(mode=['eager'])) -class OptimizerWithFunctionTest(tf.test.TestCase, parameterized.TestCase): - - def testBasic(self): - var = tf.Variable([1.0, 2.0], dtype=tf.float32) - loss = lambda: 3 * var - opt = adam.Adam(learning_rate=1.0) - - @tf.function - def fn(): - opt.minimize(loss, [var]) - return var - - self.assertAllClose([0., 1.], fn(), atol=1e-4) - self.assertAllClose([-1, 0.], fn(), atol=1e-4) - - def testBasicWithConstantDecay(self): - var = tf.Variable([1.0, 2.0], dtype=tf.float32) - loss = lambda: 3 * var - opt = adam.Adam(learning_rate=1.0) - - @tf.function - def fn(): - opt.minimize(loss, [var]) - return var - - self.assertAllClose([0., 1.], fn(), atol=1e-4) - self.assertAllClose([-1, 0.], fn(), atol=1e-4) - - def testVarKeyWithVarCreatedInEager(self): - a = tf.Variable([1., 2.], name='var') - b = tf.Variable([1.], name='var') - - @tf_test_utils.also_run_as_tf_function - def var_key_test(): - self.assertFalse(a._in_graph_mode) - self.assertFalse(b._in_graph_mode) - var_key_a = optimizer_v2._var_key(a) - self.assertStartsWith(var_key_a, 'var_') - var_key_b = optimizer_v2._var_key(b) - self.assertStartsWith(var_key_b, 'var_') - self.assertNotEqual(var_key_a, var_key_b) - - var_key_test() - - def testLearningRateDecayUsedInTwoFunctions(self): - a = tf.Variable([1., 2.], name='var') - b = tf.Variable([1.], name='var') - - learning_rate_decay = learning_rate_schedule.InverseTimeDecay( - 0.5, decay_steps=1.0, decay_rate=0.5) - opt = adam.Adam(learning_rate=learning_rate_decay) - loss_a = lambda: 3 * a - loss_b = lambda: 2 * b - - @tf.function - def fn_a(): - opt.minimize(loss_a, [a]) - return a - - @tf.function - def fn_b(): - opt.minimize(loss_b, [b]) - return b - - fn_a() - fn_b() - - -_NUM_LEARNERS = 50 -APPLY_SCOPE = 'debug_apply' -ALLOWLIST = [ - # optimizer_v2._deduplicate_indexed_slices contains an indexed slice: - # array_ops.shape(unique_indices)[0] - # which winds up expanding to [0:1:1] thereby creating three constants - # to represent the indices. - ('embeddings/strided_slice/stack', 'Const'), -] - - -def get_inputs(op): - op_inputs = list(op.inputs) + op.control_inputs - names = [i.name for i in op_inputs] - op_inputs = [getattr(i, 'op', i) for i in op_inputs] - return op_inputs, names - - -def strip_name(node): - if 'Placeholder' in node.op: - return - node.name = '' - - -def topological_sort(graph): - graph_ops = graph.get_operations() - - sources = [] - result = [] - - inputs = {} - outputs = collections.defaultdict(set) - for op in graph_ops: - op_inputs = get_inputs(op)[0] - if not op_inputs: - sources.append(op) - - inputs[op] = set(op_inputs) - for i in op_inputs: - outputs[i].add(op) - - while sources: - op = sources.pop() - for op_output in outputs[op]: - inputs[op_output].remove(op) - if not inputs[op_output]: - sources.append(op_output) - - result.append(op) - - # Check correctness. - if len(result) != len(graph_ops): - raise ValueError('Sort result has {} ops, source graph has {}.' - .format(len(result), len(graph_ops))) - - sort_check_seen = set() - for op in result: - sort_check_seen.add(op) - for i in get_inputs(op)[0]: - assert i in sort_check_seen - - return result - - -def identify_redundant_ops(graph): - """Implements basic common subexpression elimination. - - This is not intended to replicate the graph semantics of TensorFlow Graphs - (for instance it does not handle stateful op ordering), nor is it intended to - replace the common subexpression elimination Grappler pass. Rather, it - provides a high level sanity check that clearly redundant ops are not being - created. - - Args: - graph: The graph to be analyzed. - - Returns: - A count of the duplicate ops and a description of the structure of each. - """ - sorted_ops = topological_sort(graph) - duplicates = collections.defaultdict(list) - unified_node_defs = {} - name_map = {} - - for op in sorted_ops: - input_names = [] - for op_input, name in zip(*get_inputs(op)): - input_def = op_input.node_def - - # Operations can have multiple outputs. We track which is used to prevent - # overzealous elimination. - input_def.name = name - - input_def.input[:] = [name_map.get(i, i) for i in input_def.input] - strip_name(input_def) - - # NodeDef.SerializeToString() does not provide identical serialized - # representations for identical NodeDefs, so we instead use string - # representation as a dict key. - key = repr(input_def) - - if key in unified_node_defs: - input_names.append(unified_node_defs[key]) - - else: - unified_node_defs[key] = op_input.name - input_names.append(name) - - node_def = op.node_def - node_def.input[:] = input_names - strip_name(node_def) - - key = repr(node_def) - duplicates[key].append(op) - name_map[op.name] = duplicates[key][0].name - - num_duplicates = 0 - duplicate_types = [] - for standard_def, op_defs in duplicates.items(): - # We are only interested in testing the apply method of the optimizer - op_defs = [i for i in op_defs if APPLY_SCOPE in i.name] - - # We only check for per-apply redundant ops. - if len(op_defs) < _NUM_LEARNERS: - continue - - # Certain ops are simply not worth eliminating, and are instead simply - # ignored. - name, op_type = op_defs[0].name, op_defs[0].type - if any(allowlisted_scope in name and op_type == allowlisted_type - for allowlisted_scope, allowlisted_type in ALLOWLIST): - continue - - num_duplicates += len(op_defs) - traceback = [] - for level in op_defs[0].traceback: - traceback.append(' {} {}:{}'.format(level[0], level[2], level[1])) - - duplicate_types.append( - '# Example name: {}\n# Op creation stack:\n{}\n{}'.format( - op_defs[0].name, - '\n'.join(traceback), - standard_def)) - - return num_duplicates, duplicate_types - - -def make_model(): - r"""Constructs a simple ensemble of weak learners model. - - --------- --------- --------- --------- - | Input | | Input | ... | Input | | Input | - --------- --------- --------- --------- - | | | | - V V V V - --------- --------- --------- --------- - | Embed | | Embed | ... | Embed | | Embed | - --------- --------- --------- --------- - | | | | - V V V V - --------- --------- --------- --------- - | Dense | | Dense | ... | Dense | | Dense | - --------- --------- --------- --------- - \ | | / - \ | | / - --------------------------------------------- - | - --------- - | Dense | - --------- - - This topology is chosen because it exercises both dense and sparse update - paths. - - Returns: - A model for testing optimizer coefficient reuse. - """ - inputs = [] - intermediates = [] - for _ in range(_NUM_LEARNERS): - inp = keras.layers.Input(shape=(1,), dtype=tf.int32) - layer = keras.layers.Embedding(1, 4)(inp) - layer = keras.layers.Dense(1)(layer) - - inputs.append(inp) - intermediates.append(layer) - - layer = keras.layers.Concatenate(axis=-1)(intermediates) - layer = keras.layers.Dense(1)(layer) - - return keras.models.Model(inputs, layer) - - -COEFFICIENT_PARAMS = ( - ('Adadelta', adadelta.Adadelta, None), - ('Adagrad', adagrad.Adagrad, None), - ('Adam', adam.Adam, None), - ('Adam_amdgrad', adam.Adam, dict(amsgrad=True)), - ('Adamax', adamax.Adamax, None), - ('Ftrl', ftrl.Ftrl, None), - ('Ftrl_l2_shrinkage', ftrl.Ftrl, - dict(l2_shrinkage_regularization_strength=0.1)), - ('SGD', gradient_descent.SGD, None), - ('SGD_momentum', gradient_descent.SGD, dict(momentum=0.5)), - ('Nadam', nadam.Nadam, None), - ('RMSprop', rmsprop.RMSprop, None), - ('RMSprop_centered', rmsprop.RMSprop, dict(centered=True)), - ('RMSprop_momentum', rmsprop.RMSprop, dict(momentum=0.5)), - ('RMSprop_momentum_centered', rmsprop.RMSprop, - dict(momentum=0.5, centered=True)), -) - - -class OptimizerCoefficientTest(test_combinations.TestCase): - - @parameterized.named_parameters(*COEFFICIENT_PARAMS) - def test_duplicate_ops(self, optimizer_class, init_kwargs=None): - init_kwargs = init_kwargs or {} - optimizer = optimizer_class(**init_kwargs) - - graph = tf.Graph() - with graph.as_default(): - model = make_model() - trainable_variables = model.trainable_variables - grads = optimizer.get_gradients(model.outputs[0], trainable_variables) - - with backend.name_scope(APPLY_SCOPE): - optimizer.apply_gradients(zip(grads, trainable_variables)) - - num_duplicates, duplicate_types = identify_redundant_ops(graph) - if num_duplicates: - # Avoid spamming logs. - if len(duplicate_types) > 3: - duplicate_types = duplicate_types[:3] + ['...'] - - num_total = len(graph.get_operations()) - raise ValueError('{} of {} ({:.1f}%) ops were duplicates:\n\n{}'.format( - num_duplicates, num_total, num_duplicates / num_total * 100, - '\n'.join(duplicate_types))) - - @parameterized.named_parameters(*COEFFICIENT_PARAMS) - def test_subclass_compat(self, optimizer_class, init_kwargs=None): - """Ensure that subclassed optimizers without apply_state still work.""" - - class SubclassedOptimizer(optimizer_class): - - def _resource_apply_dense(self, grad, var): # pylint: disable=useless-super-delegation - return super()._resource_apply_dense(grad, var) - - def _resource_apply_sparse(self, grad, var, indices): # pylint: disable=useless-super-delegation - return super()._resource_apply_sparse( - grad, var, indices) - - init_kwargs = init_kwargs or {} - optimizer = SubclassedOptimizer(**init_kwargs) - - graph = tf.Graph() - with graph.as_default(): - model = make_model() - trainable_variables = model.trainable_variables - grads = optimizer.get_gradients(model.outputs[0], trainable_variables) - - with backend.name_scope(APPLY_SCOPE): - optimizer.apply_gradients(zip(grads, trainable_variables)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/rmsprop.py b/keras/optimizers/optimizer_v2/rmsprop.py deleted file mode 100644 index c3c7fbd52bd9..000000000000 --- a/keras/optimizers/optimizer_v2/rmsprop.py +++ /dev/null @@ -1,300 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""RMSprop optimizer implementation.""" - -import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes - -import numpy as np -from keras import backend_config -from keras.optimizers.optimizer_v2 import optimizer_v2 -from tensorflow.python.util.tf_export import keras_export - - -# pylint: disable=g-classes-have-attributes -@keras_export("keras.optimizers.RMSprop") -class RMSprop(optimizer_v2.OptimizerV2): - r"""Optimizer that implements the RMSprop algorithm. - - The gist of RMSprop is to: - - - Maintain a moving (discounted) average of the square of gradients - - Divide the gradient by the root of this average - - This implementation of RMSprop uses plain momentum, not Nesterov momentum. - - The centered version additionally maintains a moving average of the - gradients, and uses that average to estimate the variance. - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - rho: Discounting factor for the history/coming gradient. Defaults to 0.9. - momentum: A scalar or a scalar `Tensor`. Defaults to 0.0. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - centered: Boolean. If `True`, gradients are normalized by the estimated - variance of the gradient; if False, by the uncentered second moment. - Setting this to `True` may help with training, but is slightly more - expensive in terms of computation and memory. Defaults to `False`. - name: Optional name prefix for the operations created when applying - gradients. Defaults to `"RMSprop"`. - **kwargs: keyword arguments. Allowed arguments are `clipvalue`, - `clipnorm`, `global_clipnorm`. - If `clipvalue` (float) is set, the gradient of each weight - is clipped to be no higher than this value. - If `clipnorm` (float) is set, the gradient of each weight - is individually clipped so that its norm is no higher than this value. - If `global_clipnorm` (float) is set the gradient of all weights is - clipped so that their global norm is no higher than this value. - - Note that in the dense implementation of this algorithm, variables and their - corresponding accumulators (momentum, gradient moving average, square - gradient moving average) will be updated even if the gradient is zero - (i.e. accumulators will decay, momentum will be applied). The sparse - implementation (used when the gradient is an `IndexedSlices` object, - typically because of `tf.gather` or an embedding lookup in the forward pass) - will not update variable slices or their accumulators unless those slices - were used in the forward pass (nor is there an "eventual" correction to - account for these omitted updates). This leads to more efficient updates for - large embedding lookup tables (where most of the slices are not accessed in - a particular graph execution), but differs from the published algorithm. - - Usage: - - >>> opt = tf.keras.optimizers.RMSprop(learning_rate=0.1) - >>> var1 = tf.Variable(10.0) - >>> loss = lambda: (var1 ** 2) / 2.0 # d(loss) / d(var1) = var1 - >>> step_count = opt.minimize(loss, [var1]).numpy() - >>> var1.numpy() - 9.683772 - - Reference: - - [Hinton, 2012]( - http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) - """ - - _HAS_AGGREGATE_GRAD = True - - def __init__(self, - learning_rate=0.001, - rho=0.9, - momentum=0.0, - epsilon=1e-7, - centered=False, - name="RMSprop", - **kwargs): - """Construct a new RMSprop optimizer. - - Args: - learning_rate: A `Tensor`, floating point value, or a schedule that is a - `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable - that takes no arguments and returns the actual value to use. The - learning rate. Defaults to 0.001. - rho: Discounting factor for the history/coming gradient. Defaults to 0.9. - momentum: A scalar or a scalar `Tensor`. Defaults to 0.0. - epsilon: A small constant for numerical stability. This epsilon is - "epsilon hat" in the Kingma and Ba paper (in the formula just before - Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to - 1e-7. - centered: Boolean. If `True`, gradients are normalized by the estimated - variance of the gradient; if False, by the uncentered second moment. - Setting this to `True` may help with training, but is slightly more - expensive in terms of computation and memory. Defaults to `False`. - name: Optional name prefix for the operations created when applying - gradients. Defaults to "RMSprop". - **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, - `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip - gradients by value, `decay` is included for backward compatibility to - allow time inverse decay of learning rate. `lr` is included for backward - compatibility, recommended to use `learning_rate` instead. - - @compatibility(eager) - When eager execution is enabled, `learning_rate`, `decay`, `momentum`, and - `epsilon` can each be a callable that takes no arguments and returns the - actual value to use. This can be useful for changing these values across - different invocations of optimizer functions. - @end_compatibility - """ - super().__init__(name, **kwargs) - self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) - self._set_hyper("decay", self._initial_decay) - self._set_hyper("rho", rho) - - self._momentum = False - if isinstance(momentum, tf.Tensor) or callable(momentum) or momentum > 0: - self._momentum = True - if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): - raise ValueError(f"`momentum` must be between [0, 1]. Received: " - f"momentum={momentum} (of type {type(momentum)}).") - self._set_hyper("momentum", momentum) - - self.epsilon = epsilon or backend_config.epsilon() - self.centered = centered - - def _create_slots(self, var_list): - for var in var_list: - self.add_slot(var, "rms") - if self._momentum: - for var in var_list: - self.add_slot(var, "momentum") - if self.centered: - for var in var_list: - self.add_slot(var, "mg") - - def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) - - rho = tf.identity(self._get_hyper("rho", var_dtype)) - apply_state[(var_device, var_dtype)].update( - dict( - neg_lr_t=-apply_state[(var_device, var_dtype)]["lr_t"], - epsilon=tf.convert_to_tensor( - self.epsilon, var_dtype), - rho=rho, - momentum=tf.identity(self._get_hyper("momentum", var_dtype)), - one_minus_rho=1. - rho)) - - def _resource_apply_dense(self, grad, var, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - rms = self.get_slot(var, "rms") - if self._momentum: - mom = self.get_slot(var, "momentum") - if self.centered: - mg = self.get_slot(var, "mg") - return tf.raw_ops.ResourceApplyCenteredRMSProp( - var=var.handle, - mg=mg.handle, - ms=rms.handle, - mom=mom.handle, - lr=coefficients["lr_t"], - rho=coefficients["rho"], - momentum=coefficients["momentum"], - epsilon=coefficients["epsilon"], - grad=grad, - use_locking=self._use_locking) - else: - return tf.raw_ops.ResourceApplyRMSProp( - var=var.handle, - ms=rms.handle, - mom=mom.handle, - lr=coefficients["lr_t"], - rho=coefficients["rho"], - momentum=coefficients["momentum"], - epsilon=coefficients["epsilon"], - grad=grad, - use_locking=self._use_locking) - else: - rms_t = (coefficients["rho"] * rms + - coefficients["one_minus_rho"] * tf.square(grad)) - rms_t = tf.compat.v1.assign(rms, rms_t, use_locking=self._use_locking) - denom_t = rms_t - if self.centered: - mg = self.get_slot(var, "mg") - mg_t = coefficients["rho"] * mg + coefficients["one_minus_rho"] * grad - mg_t = tf.compat.v1.assign(mg, mg_t, use_locking=self._use_locking) - denom_t = rms_t - tf.square(mg_t) - var_t = var - coefficients["lr_t"] * grad / ( - tf.sqrt(denom_t) + coefficients["epsilon"]) - return tf.compat.v1.assign(var, var_t, use_locking=self._use_locking).op - - def _resource_apply_sparse(self, grad, var, indices, apply_state=None): - var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) - - rms = self.get_slot(var, "rms") - if self._momentum: - mom = self.get_slot(var, "momentum") - if self.centered: - mg = self.get_slot(var, "mg") - return tf.raw_ops.ResourceSparseApplyCenteredRMSProp( - var=var.handle, - mg=mg.handle, - ms=rms.handle, - mom=mom.handle, - lr=coefficients["lr_t"], - rho=coefficients["rho"], - momentum=coefficients["momentum"], - epsilon=coefficients["epsilon"], - grad=grad, - indices=indices, - use_locking=self._use_locking) - else: - return tf.raw_ops.ResourceSparseApplyRMSProp( - var=var.handle, - ms=rms.handle, - mom=mom.handle, - lr=coefficients["lr_t"], - rho=coefficients["rho"], - momentum=coefficients["momentum"], - epsilon=coefficients["epsilon"], - grad=grad, - indices=indices, - use_locking=self._use_locking) - else: - rms_scaled_g_values = (grad * grad) * coefficients["one_minus_rho"] - rms_t = tf.compat.v1.assign(rms, rms * coefficients["rho"], - use_locking=self._use_locking) - with tf.control_dependencies([rms_t]): - rms_t = self._resource_scatter_add(rms, indices, rms_scaled_g_values) - rms_slice = tf.gather(rms_t, indices) - denom_slice = rms_slice - if self.centered: - mg = self.get_slot(var, "mg") - mg_scaled_g_values = grad * coefficients["one_minus_rho"] - mg_t = tf.compat.v1.assign(mg, mg * coefficients["rho"], - use_locking=self._use_locking) - with tf.control_dependencies([mg_t]): - mg_t = self._resource_scatter_add(mg, indices, mg_scaled_g_values) - mg_slice = tf.gather(mg_t, indices) - denom_slice = rms_slice - tf.square(mg_slice) - var_update = self._resource_scatter_add( - var, indices, coefficients["neg_lr_t"] * grad / ( - tf.sqrt(denom_slice) + coefficients["epsilon"])) - if self.centered: - return tf.group(*[var_update, rms_t, mg_t]) - return tf.group(*[var_update, rms_t]) - - def set_weights(self, weights): - params = self.weights - # Override set_weights for backward compatibility of Keras V1 optimizer - # since it does not include iteration at head of the weight list. Set - # iteration to 0. - if len(params) == len(weights) + 1: - weights = [np.array(0)] + weights - super().set_weights(weights) - - def get_config(self): - config = super().get_config() - config.update({ - "learning_rate": self._serialize_hyperparameter("learning_rate"), - "decay": self._initial_decay, - "rho": self._serialize_hyperparameter("rho"), - "momentum": self._serialize_hyperparameter("momentum"), - "epsilon": self.epsilon, - "centered": self.centered, - }) - return config - - -RMSProp = RMSprop diff --git a/keras/optimizers/optimizer_v2/rmsprop_test.py b/keras/optimizers/optimizer_v2/rmsprop_test.py deleted file mode 100644 index 6175520576d5..000000000000 --- a/keras/optimizers/optimizer_v2/rmsprop_test.py +++ /dev/null @@ -1,589 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for rmsprop.""" - -import tensorflow.compat.v2 as tf - -import copy -import itertools -import math - -from absl.testing import parameterized -import numpy as np -from tensorflow.python.framework import test_util as tf_test_utils -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.optimizers.schedules import learning_rate_schedule -from keras.optimizers.optimizer_v2 import rmsprop - -_DATA_TYPES = [ - tf.half, tf.float32, tf.float64, tf.complex64, - tf.complex128 -] - -_TEST_PARAM_VALUES = [ - # learning_rate, rho, momentum, epsilon, centered - [0.05, 0.9, 0.0, 1e-3, True], - [0.05, 0.9, 0.0, 1e-3, False], - [0.1, 0.9, 0.0, 1e-3, True], - [0.01, 0.9, 0.0, 1e-5, True], - [0.01, 0.9, 0.9, 1e-5, True], -] - -_TESTPARAMS = [ - [data_type] + values - for data_type, values in itertools.product(_DATA_TYPES, _TEST_PARAM_VALUES) -] - - -class RMSpropOptimizerTest(tf.test.TestCase, parameterized.TestCase): - - def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, rho, momentum, - epsilon, centered): - rms_t = rms * rho + (1 - rho) * g * g - if centered: - mg_t = mg * rho + (1 - rho) * g - denom_t = rms_t - mg_t * mg_t - else: - mg_t = mg - denom_t = rms_t - if momentum > 0.: - mom_t = momentum * mom + lr * g / (np.sqrt(denom_t + epsilon)) - var_t = var - mom_t - else: - mom_t = mom - var_t = var - lr * g / (np.sqrt(denom_t) + epsilon) - return var_t, mg_t, rms_t, mom_t - - def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom, - lr, rho, momentum, epsilon, centered): - mg_t = copy.deepcopy(mg) - rms_t = copy.deepcopy(rms) - mom_t = copy.deepcopy(mom) - var_t = copy.deepcopy(var) - for i in range(len(gindexs)): - gindex = gindexs[i] - gvalue = gvalues[i] - rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue - if centered: - mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue - denom_t = rms_t[gindex] - mg_t[gindex] * mg_t[gindex] - else: - denom_t = rms_t[gindex] - if momentum > 0.: - mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(denom_t + - epsilon) - var_t[gindex] = var[gindex] - mom_t[gindex] - else: - mom_t[gindex] = mom[gindex] - var_t[gindex] = var[gindex] - lr * gvalue / (np.sqrt(denom_t) + epsilon) - return var_t, mg_t, rms_t, mom_t - - def testDense(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: - with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu(): - # Initialize variables for numpy implementation. - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np, dtype=dtype) - var1 = tf.Variable(var1_np, dtype=dtype) - grads0 = tf.constant(grads0_np, dtype=dtype) - grads1 = tf.constant(grads1_np, dtype=dtype) - opt = rmsprop.RMSprop( - learning_rate=learning_rate, - rho=rho, - momentum=momentum, - epsilon=epsilon, - centered=centered) - - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - if centered: - mg0 = opt.get_slot(var0, "mg") - mg1 = opt.get_slot(var1, "mg") - else: - mg0 = None - mg1 = None - - if momentum > 0.: - mom0 = opt.get_slot(var0, "momentum") - mom1 = opt.get_slot(var1, "momentum") - else: - mom0 = None - mom1 = None - - rms0 = opt.get_slot(var0, "rms") - self.assertIsNotNone(rms0) - rms1 = opt.get_slot(var1, "rms") - self.assertIsNotNone(rms1) - - mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 3 steps of RMSprop - for _ in range(1, 4): - self.evaluate(update) - - var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( - var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, - momentum, epsilon, centered) - var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( - var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, - momentum, epsilon, centered) - - # Validate updated params - if centered: - self.assertAllCloseAccordingToType(mg0_np, self.evaluate(mg0)) - self.assertAllCloseAccordingToType(mg1_np, self.evaluate(mg1)) - if momentum > 0.: - self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) - self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) - self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) - self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testDenseWithLearningRateDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - var0_np = np.array([1.0, 2.0]) - grads0_np = np.array([0.1, 0.2]) - var1_np = np.array([3.0, 4.0]) - grads1_np = np.array([0.01, 0.2]) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - learning_rate = 0.01 - rho = 0.9 - momentum = 0.0 - epsilon = 1e-7 - centered = False - decay = 0.5 - opt = rmsprop.RMSprop( - learning_rate=learning_rate, - rho=rho, - momentum=momentum, - epsilon=epsilon, - centered=centered, - decay=decay) - - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - rms0 = opt.get_slot(var0, "rms") - self.assertIsNotNone(rms0) - rms1 = opt.get_slot(var1, "rms") - self.assertIsNotNone(rms1) - if momentum > 0.: - mom0 = opt.get_slot(var0, "momentum") - mom1 = opt.get_slot(var1, "momentum") - else: - mom0 = None - mom1 = None - - mg0_np = np.array([0.0, 0.0]) - mg1_np = np.array([0.0, 0.0]) - rms0_np = np.array([0.0, 0.0]) - rms1_np = np.array([0.0, 0.0]) - mom0_np = np.array([0.0, 0.0]) - mom1_np = np.array([0.0, 0.0]) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 4 steps of RMSprop - for t in range(2): - self.evaluate(update) - - lr = learning_rate / (1 + decay * t) - var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( - var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, - epsilon, centered) - var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( - var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, - epsilon, centered) - - # Validate updated params - self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) - self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) - if momentum > 0.: - self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) - self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testDenseWithLearningRateInverseTimeDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - var0_np = np.array([1.0, 2.0]) - grads0_np = np.array([0.1, 0.2]) - var1_np = np.array([3.0, 4.0]) - grads1_np = np.array([0.01, 0.2]) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0 = tf.constant(grads0_np) - grads1 = tf.constant(grads1_np) - learning_rate = 0.01 - rho = 0.9 - momentum = 0.0 - epsilon = 1e-7 - centered = False - decay = 0.5 - lr_schedule = learning_rate_schedule.InverseTimeDecay( - learning_rate, decay_steps=1.0, decay_rate=decay) - opt = rmsprop.RMSprop( - learning_rate=lr_schedule, - rho=rho, - momentum=momentum, - epsilon=epsilon, - centered=centered) - - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - rms0 = opt.get_slot(var0, "rms") - self.assertIsNotNone(rms0) - rms1 = opt.get_slot(var1, "rms") - self.assertIsNotNone(rms1) - if momentum > 0.: - mom0 = opt.get_slot(var0, "momentum") - mom1 = opt.get_slot(var1, "momentum") - else: - mom0 = None - mom1 = None - - mg0_np = np.array([0.0, 0.0]) - mg1_np = np.array([0.0, 0.0]) - rms0_np = np.array([0.0, 0.0]) - rms1_np = np.array([0.0, 0.0]) - mom0_np = np.array([0.0, 0.0]) - mom1_np = np.array([0.0, 0.0]) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 4 steps of RMSprop - for t in range(2): - self.evaluate(update) - - lr = learning_rate / (1 + decay * t) - var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( - var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, - epsilon, centered) - var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( - var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, - epsilon, centered) - - # Validate updated params - self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) - self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) - if momentum > 0.: - self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) - self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - def loss(): - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - return pred * pred - - sgd_op = rmsprop.RMSprop( - learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=0.0, - centered=False).minimize( - loss, var_list=[var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[0., 1.]], - self.evaluate(var0), - atol=0.01) - - def testMinimizeSparseResourceVariableCentered(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - for dtype in _DATA_TYPES: - var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) - x = tf.constant([[4.0], [5.0]], dtype=dtype) - - def loss(): - pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - return pred * pred - - # loss = lambda: pred * pred # pylint: disable=cell-var-from-loop - sgd_op = rmsprop.RMSprop( - learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=1.0, - centered=True).minimize( - loss, var_list=[var0]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[-111, -138]], - self.evaluate(var0), - atol=0.01) - - def testSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: - with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu(): - # Initialize variables for numpy implementation. - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype) - - var0 = tf.Variable(var0_np) - var1 = tf.Variable(var1_np) - grads0_np_indices = np.array([0], dtype=np.int32) - grads0 = tf.IndexedSlices( - tf.constant(grads0_np), - tf.constant(grads0_np_indices), tf.constant([1])) - grads1_np_indices = np.array([1], dtype=np.int32) - grads1 = tf.IndexedSlices( - tf.constant(grads1_np), - tf.constant(grads1_np_indices), tf.constant([1])) - opt = rmsprop.RMSprop( - learning_rate=learning_rate, - rho=rho, - momentum=momentum, - epsilon=epsilon, - centered=centered) - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(tf.compat.v1.global_variables_initializer()) - - if centered: - mg0 = opt.get_slot(var0, "mg") - self.assertEqual(mg0 is not None, centered) - mg1 = opt.get_slot(var1, "mg") - self.assertEqual(mg1 is not None, centered) - else: - mg0 = None - mg1 = None - rms0 = opt.get_slot(var0, "rms") - self.assertIsNotNone(rms0) - rms1 = opt.get_slot(var1, "rms") - self.assertIsNotNone(rms1) - if momentum > 0.: - mom0 = opt.get_slot(var0, "momentum") - mom1 = opt.get_slot(var1, "momentum") - else: - mom0 = None - mom1 = None - - mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 3 steps of RMSprop - for _ in range(1, 4): - self.evaluate(update) - - var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy( - var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np, - learning_rate, rho, momentum, epsilon, centered) - var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy( - var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np, - learning_rate, rho, momentum, epsilon, centered) - - # Validate updated params - if centered: - self.assertAllCloseAccordingToType(mg0_np, self.evaluate(mg0)) - self.assertAllCloseAccordingToType(mg1_np, self.evaluate(mg1)) - self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) - self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) - if momentum > 0.: - self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) - self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testCallableParams(self): - for dtype in _DATA_TYPES: - var0 = tf.Variable([1.0, 2.0], dtype=dtype) - var1 = tf.Variable([3.0, 4.0], dtype=dtype) - grads0 = tf.constant([0.1, 0.1], dtype=dtype) - grads1 = tf.constant([0.01, 0.01], dtype=dtype) - - learning_rate = lambda: 2.0 - rho = lambda: 0.9 - momentum = lambda: 0.0 - epsilon = 1.0 - opt = rmsprop.RMSprop(learning_rate, rho, momentum, epsilon) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Step 1: the rms accumulators where 1. So we should see a normal - # update: v -= grad * learning_rate - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - # Check the parameters. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)), - 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)), - 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - ]), self.evaluate(var1)) - # Step 2: the root mean square accumulators contain the previous update. - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - # Check the parameters. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)), - 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)), - 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)) - ]), self.evaluate(var1)) - - def testConstructRMSpropWithLR(self): - opt = rmsprop.RMSprop(lr=1.0) - opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0) - opt_3 = rmsprop.RMSprop(learning_rate=0.1) - self.assertIsInstance(opt.lr, tf.Variable) - self.assertIsInstance(opt_2.lr, tf.Variable) - self.assertIsInstance(opt_3.lr, tf.Variable) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(opt.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) - self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testSlotsUniqueEager(self): - v1 = tf.Variable(1.) - v2 = tf.Variable(1.) - - opt = rmsprop.RMSprop(1., momentum=0., centered=False) - opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) - # There should be iteration, and one unique slot variable for v1 and v2. - self.assertLen(set({id(v) for v in opt.variables()}), 3) - self.assertEqual( - self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) - - opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=False) - opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) - # There should be iteration, and two unique slot variables for v1 and v2. - self.assertLen(set({id(v) for v in opt.variables()}), 5) - self.assertEqual( - self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) - - opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=True) - opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) - # There should be iteration, and three unique slot variables for v1 and v2 - self.assertLen(set({id(v) for v in opt.variables()}), 7) - self.assertEqual( - self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testMomentumProperValue(self): - with self.assertRaisesRegex(ValueError, - r"`momentum` must be between \[0, 1\]. " - r"Received: momentum=2.5 \(of type \)."): - rmsprop.RMSprop(1., momentum=2.5, centered=False) - - -@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) -class SlotColocationTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters([True, False]) - @tf_test_utils.run_gpu_only - def testRunMinimizeOnGPUForCPUVariables(self, use_resource): - with tf.device("/device:CPU:0"): - if use_resource: - var0 = tf.Variable([1.0, 2.0], dtype=tf.float32) - var1 = tf.Variable([3.0, 4.0], dtype=tf.float32) - else: - var0 = tf.Variable([1.0, 2.0], dtype=tf.float32) - var1 = tf.Variable([3.0, 4.0], dtype=tf.float32) - - def loss(): - return 5 * var0 + 3 * var1 - - opt = rmsprop.RMSprop( - learning_rate=1.0, decay=0.9, momentum=0.5, epsilon=1.0) - - # Fetch params to validate initial values - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - # Run 1 step through optimizer on GPU. - # Slot variables are created the first time optimizer is used on some - # variable. This tests that slot variables will be colocated with the base - # variable. - with tf.device("/device:GPU:0"): - # Note that for eager execution, minimize expects a function instead of a - # Tensor. - opt_op = opt.minimize(loss, [var0, var1]) - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.evaluate(opt_op) - - # Validate updated params, All variables should have decreased. - self.assertTrue(all(v < 0.0 for v in self.evaluate(var0)), - msg="updated variables: %s" % self.evaluate(var0)) - self.assertTrue(all(v < 2.0 for v in self.evaluate(var1)), - msg="updated variables: %s" % self.evaluate(var1)) - -if __name__ == "__main__": - tf.test.main() diff --git a/keras/optimizers/optimizer_v2/utils.py b/keras/optimizers/optimizer_v2/utils.py deleted file mode 100644 index 52cee4124227..000000000000 --- a/keras/optimizers/optimizer_v2/utils.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Optimizer utilities.""" - -import tensorflow.compat.v2 as tf -from tensorflow.python.platform import tf_logging as logging - - -def all_reduce_sum_gradients(grads_and_vars): - """Returns all-reduced gradients aggregated via summation. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - - Returns: - List of (gradient, variable) pairs where gradients have been all-reduced. - """ - grads_and_vars = list(grads_and_vars) - filtered_grads_and_vars = filter_empty_gradients(grads_and_vars) - if filtered_grads_and_vars: - if tf.__internal__.distribute.strategy_supports_no_merge_call(): - grads = [pair[0] for pair in filtered_grads_and_vars] - reduced = tf.distribute.get_replica_context().all_reduce( - tf.distribute.ReduceOp.SUM, grads) - else: - # TODO(b/183257003): Remove this branch - reduced = tf.distribute.get_replica_context().merge_call( - _all_reduce_sum_fn, args=(filtered_grads_and_vars,)) - else: - reduced = [] - # Copy 'reduced' but add None gradients back in - reduced_with_nones = [] - reduced_pos = 0 - for g, v in grads_and_vars: - if g is None: - reduced_with_nones.append((None, v)) - else: - reduced_with_nones.append((reduced[reduced_pos], v)) - reduced_pos += 1 - assert reduced_pos == len(reduced), "Failed to add all gradients" - return reduced_with_nones - - -def filter_empty_gradients(grads_and_vars): - """Filter out `(grad, var)` pairs that have a gradient equal to `None`.""" - grads_and_vars = tuple(grads_and_vars) - if not grads_and_vars: - return grads_and_vars - - filtered = [] - vars_with_empty_grads = [] - for grad, var in grads_and_vars: - if grad is None: - vars_with_empty_grads.append(var) - else: - filtered.append((grad, var)) - filtered = tuple(filtered) - - if not filtered: - variable = ([v.name for _, v in grads_and_vars],) - raise ValueError(f"No gradients provided for any variable: {variable}. " - f"Provided `grads_and_vars` is {grads_and_vars}.") - if vars_with_empty_grads: - logging.warning( - ("Gradients do not exist for variables %s when minimizing the loss. " - "If you're using `model.compile()`, did you forget to provide a `loss`" - "argument?"), - ([v.name for v in vars_with_empty_grads])) - return filtered - - -def make_gradient_clipnorm_fn(clipnorm): - """Creates a gradient transformation function for clipping by norm.""" - if clipnorm is None: - return lambda grads_and_vars: grads_and_vars - - def gradient_clipnorm_fn(grads_and_vars): - - if isinstance(tf.distribute.get_strategy(), - (tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - raise ValueError( - "`clipnorm` is not supported with `CenteralStorageStrategy`. " - f"The strategy used is {tf.distribute.get_strategy()}.") - - clipped_grads_and_vars = [ - (tf.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars - ] - return clipped_grads_and_vars - - return gradient_clipnorm_fn - - -def make_global_gradient_clipnorm_fn(clipnorm): - """Creates a gradient transformation function for clipping by norm.""" - if clipnorm is None: - return lambda grads_and_vars: grads_and_vars - - def gradient_clipnorm_fn(grads_and_vars): - - if isinstance(tf.distribute.get_strategy(), - (tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - raise ValueError( - "`global_clipnorm` is not supported with `CenteralStorageStrategy`. " - f"The strategy used is {tf.distribute.get_strategy()}.") - - grads, variables = zip(*grads_and_vars) - clipped_grads, _ = tf.clip_by_global_norm(grads, clipnorm) - clipped_grads_and_vars = list(zip(clipped_grads, variables)) - return clipped_grads_and_vars - - return gradient_clipnorm_fn - - -def make_gradient_clipvalue_fn(clipvalue): - """Creates a gradient transformation function for clipping by value.""" - if clipvalue is None: - return lambda grads_and_vars: grads_and_vars - - def gradient_clipvalue_fn(grads_and_vars): - - if isinstance(tf.distribute.get_strategy(), - (tf.distribute.experimental.CentralStorageStrategy, - tf.compat.v1.distribute.experimental.CentralStorageStrategy)): - raise ValueError( - "`clipvalue` is not supported with `CenteralStorageStrategy`. " - f"The strategy used is {tf.distribute.get_strategy()}.") - - clipped_grads_and_vars = [(tf.clip_by_value(g, -clipvalue, - clipvalue), v) - for g, v in grads_and_vars] - return clipped_grads_and_vars - - return gradient_clipvalue_fn - - -def _all_reduce_sum_fn(distribution, grads_and_vars): - return distribution.extended.batch_reduce_to(tf.distribute.ReduceOp.SUM, - grads_and_vars) diff --git a/keras/optimizers/optimizers_test.py b/keras/optimizers/optimizers_test.py deleted file mode 100644 index ee08cb7eded3..000000000000 --- a/keras/optimizers/optimizers_test.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras optimizers.""" - -import tensorflow.compat.v2 as tf - -import gc -import weakref - -import numpy as np - -import keras -from keras.optimizers import optimizer_v1 -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.utils import np_utils -from tensorflow.python.training.adam import AdamOptimizer -from tensorflow.python.training.experimental.loss_scale_optimizer import MixedPrecisionLossScaleOptimizer - - -def _get_model(input_dim, num_hidden, output_dim): - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, - activation='relu', - input_shape=(input_dim,))) - model.add(keras.layers.Dense(output_dim, activation='softmax')) - return model - - -@test_combinations.run_all_keras_modes -class KerasOptimizersTest(test_combinations.TestCase): - - def _test_optimizer(self, optimizer, target=0.75): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=1000, test_samples=200, input_shape=(10,), num_classes=2) - y_train = np_utils.to_categorical(y_train) - model = _get_model(x_train.shape[1], 20, y_train.shape[1]) - model.compile( - loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - np.testing.assert_equal( - keras.backend.get_value(model.optimizer.iterations), 0) - history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0) - np.testing.assert_equal( - keras.backend.get_value(model.optimizer.iterations), - 126) # 63 steps per epoch - self.assertGreaterEqual(history.history['acc'][-1], target) - config = keras.optimizers.serialize(optimizer) - optim = keras.optimizers.deserialize(config) - new_config = keras.optimizers.serialize(optim) - new_config['class_name'] = new_config['class_name'].lower() - new_config['config'].pop('name', None) - if 'amsgrad' not in config['config']: - new_config['config'].pop('amsgrad', None) - if 'decay' in new_config['config'] and 'schedule_decay' in config['config']: - new_config['config']['schedule_decay'] = new_config['config'].pop('decay') - if 'momentum' not in config['config']: - new_config['config'].pop('momentum', None) - if 'centered' not in config['config']: - new_config['config'].pop('centered', None) - self.assertDictEqual(config, new_config) - - # Test constraints. - model = keras.models.Sequential() - dense = keras.layers.Dense( - 10, - input_shape=(x_train.shape[1],), - kernel_constraint=lambda x: 0. * x + 1., - bias_constraint=lambda x: 0. * x + 2., - activation='relu') - model.add(dense) - model.add(keras.layers.Dense(y_train.shape[1], activation='softmax')) - model.compile( - loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - np.testing.assert_equal( - keras.backend.get_value(model.optimizer.iterations), - 126) # Using same optimizer from before - model.train_on_batch(x_train[:10], y_train[:10]) - np.testing.assert_equal( - keras.backend.get_value(model.optimizer.iterations), 127) - kernel, bias = dense.get_weights() - np.testing.assert_allclose(kernel, 1., atol=1e-3) - np.testing.assert_allclose(bias, 2., atol=1e-3) - - def test_sgd(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.SGD()) - - def test_momentum(self): - with self.cached_session(): - self._test_optimizer( - optimizer_v1.SGD(lr=0.01, momentum=0.9, nesterov=True)) - - def test_rmsprop(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.RMSprop()) - self._test_optimizer(optimizer_v1.RMSprop(decay=1e-3)) - - def test_adagrad(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.Adagrad()) - self._test_optimizer(optimizer_v1.Adagrad(decay=1e-3)) - - def test_adadelta(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.Adadelta(), target=0.6) - # Accuracy seems dependent on the initialization. Even adding - # tf.compat.v1.Print nodes in the graph seemed to affect the - # initialization seed, and hence the accuracy. - self._test_optimizer(optimizer_v1.Adadelta(decay=1e-3), target=0.4) - - def test_adam(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.Adam()) - # Accuracy seems dependent on the seed initialization. - # TODO(b/121051441): fix test flakiness. - self._test_optimizer(optimizer_v1.Adam(decay=1e-3), target=0.73) - self._test_optimizer(optimizer_v1.Adam(amsgrad=True)) - - def test_adamax(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.Adamax()) - self._test_optimizer(optimizer_v1.Adamax(decay=1e-3)) - - def test_nadam(self): - with self.cached_session(): - self._test_optimizer(optimizer_v1.Nadam()) - - def test_clipnorm(self): - with self.cached_session(): - self._test_optimizer( - optimizer_v1.SGD(lr=0.01, momentum=0.9, clipnorm=0.5)) - - def test_clipvalue(self): - with self.cached_session(): - self._test_optimizer( - optimizer_v1.SGD(lr=0.01, momentum=0.9, clipvalue=0.5)) - - def test_tf_optimizer(self): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01)) - model = keras.models.Sequential() - model.add(keras.layers.Dense( - 2, input_shape=(3,), kernel_constraint=keras.constraints.MaxNorm(1))) - # This is possible - model.compile( - loss='mean_squared_error', - optimizer=optimizer, - run_eagerly=test_utils.should_run_eagerly()) - keras.backend.track_tf_optimizer(optimizer) - model.fit(np.random.random((5, 3)), - np.random.random((5, 2)), - epochs=1, - batch_size=5, - verbose=0) - # not supported - with self.assertRaises(NotImplementedError): - _ = optimizer.weights - with self.assertRaises(NotImplementedError): - optimizer.get_config() - with self.assertRaises(NotImplementedError): - optimizer.from_config(None) - - def test_optimizer_garbage_collection(self): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - graph = tf.Graph() - with graph.as_default(): - optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01)) - keras.backend.track_tf_optimizer(optimizer) - optimizer_weak = weakref.ref(optimizer) - graph_weak = weakref.ref(graph) - del graph, optimizer - gc.collect() - # Check that the weak references are dead now. - self.assertIs(graph_weak(), None) - self.assertIs(optimizer_weak(), None) - - def test_tf_optimizer_iterations(self): - if tf.executing_eagerly(): - self.skipTest( - 'v1 optimizer does not run in eager mode') - with self.cached_session(): - optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01)) - model = keras.models.Sequential() - model.add(keras.layers.Dense( - 2, input_shape=(3,), kernel_constraint=keras.constraints.MaxNorm(1))) - model.compile( - loss='mean_squared_error', - optimizer=optimizer, - run_eagerly=test_utils.should_run_eagerly()) - keras.backend.track_tf_optimizer(optimizer) - self.assertEqual(keras.backend.get_value(model.optimizer.iterations), 0) - - model.fit(np.random.random((55, 3)), - np.random.random((55, 2)), - epochs=1, - batch_size=5, - verbose=0) - self.assertEqual(keras.backend.get_value(model.optimizer.iterations), 11) - - def test_negative_clipvalue_or_clipnorm(self): - with self.assertRaises(ValueError): - _ = optimizer_v1.SGD(lr=0.01, clipvalue=-0.5) - with self.assertRaises(ValueError): - _ = optimizer_v1.Adam(clipnorm=-2.0) - - def test_mixed_precision_loss_scale_optimizer(self): - if tf.executing_eagerly(): - self.skipTest('v1 optimizer does not run in eager mode') - optimizer = MixedPrecisionLossScaleOptimizer(AdamOptimizer(), 'dynamic') - model = keras.models.Sequential() - model.add( - keras.layers.Dense( - 2, input_shape=(3,), - kernel_constraint=keras.constraints.MaxNorm(1))) - model.compile( - loss='mean_squared_error', - optimizer=optimizer, - run_eagerly=test_utils.should_run_eagerly()) - model.fit( - np.random.random((5, 3)), - np.random.random((5, 2)), - epochs=1, - batch_size=5, - verbose=0) - - def test_deserialization_error(self): - with self.assertRaisesRegex(ValueError, 'Could not interpret optimizer'): - keras.optimizers.get(0) - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/optimizers/rmsprop.py b/keras/optimizers/rmsprop.py new file mode 100644 index 000000000000..c59a822ca55a --- /dev/null +++ b/keras/optimizers/rmsprop.py @@ -0,0 +1,218 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""RMSprop optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.RMSprop", + "keras.optimizers.RMSprop", + "keras.dtensor.experimental.optimizers.RMSprop", + v1=[], +) +class RMSprop(optimizer.Optimizer): + r"""Optimizer that implements the RMSprop algorithm. + + The gist of RMSprop is to: + + - Maintain a moving (discounted) average of the square of gradients + - Divide the gradient by the root of this average + + This implementation of RMSprop uses plain momentum, not Nesterov momentum. + + The centered version additionally maintains a moving average of the + gradients, and uses that average to estimate the variance. + + Args: + learning_rate: Initial value for the learning rate: + either a floating point value, + or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. + Defaults to 0.001. + rho: float, defaults to 0.9. Discounting factor for the old gradients. + momentum: float, defaults to 0.0. If not 0.0., the optimizer tracks the + momentum value, with a decay rate equals to `1 - momentum`. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. + Defaults to `1e-7`. + centered: Boolean. If `True`, gradients are normalized by the estimated + variance of the gradient; if False, by the uncentered second moment. + Setting this to `True` may help with training, but is slightly more + expensive in terms of computation and memory. Defaults to `False`. + {{base_optimizer_keyword_args}} + + Usage: + + >>> opt = tf.keras.optimizers.RMSprop(learning_rate=0.1) + >>> var1 = tf.Variable(10.0) + >>> loss = lambda: (var1 ** 2) / 2.0 # d(loss) / d(var1) = var1 + >>> opt.minimize(loss, [var1]) + >>> var1.numpy() + 9.683772 + + Reference: + - [Hinton, 2012](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) # noqa: E501 + """ + + def __init__( + self, + learning_rate=0.001, + rho=0.9, + momentum=0.0, + epsilon=1e-7, + centered=False, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=100, + jit_compile=True, + name="RMSprop", + **kwargs + ): + super().__init__( + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + name=name, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.rho = rho + self.momentum = momentum + self.epsilon = epsilon + self.centered = centered + + def build(self, var_list): + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self._built = True + + self._velocities = [] + for var in var_list: + self._velocities.append( + self.add_variable_from_reference(var, "velocity") + ) + + self._momentums = [] + if self.momentum > 0: + for var in var_list: + self._momentums.append( + self.add_variable_from_reference(var, "momentum") + ) + + self._average_gradients = [] + if self.centered: + for var in var_list: + self._average_gradients.append( + self.add_variable_from_reference(var, "average_gradient") + ) + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + + var_key = self._var_key(variable) + velocity = self._velocities[self._index_dict[var_key]] + momentum = None + if self.momentum > 0: + momentum = self._momentums[self._index_dict[var_key]] + average_grad = None + if self.centered: + average_grad = self._average_gradients[self._index_dict[var_key]] + + rho = self.rho + + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients. + velocity.assign(rho * velocity) + velocity.scatter_add( + tf.IndexedSlices( + tf.square(gradient.values) * (1 - rho), gradient.indices + ) + ) + if self.centered: + average_grad.assign(rho * average_grad) + average_grad.scatter_add( + tf.IndexedSlices( + gradient.values * (1 - rho), gradient.indices + ) + ) + denominator = velocity - tf.square(average_grad) + self.epsilon + else: + denominator = velocity + self.epsilon + denominator_slices = tf.gather(denominator, gradient.indices) + increment = tf.IndexedSlices( + lr * gradient.values * tf.math.rsqrt(denominator_slices), + gradient.indices, + ) + + if self.momentum > 0: + momentum.assign(self.momentum * momentum) + momentum.scatter_add(increment) + variable.assign_add(-momentum) + else: + variable.scatter_add(-increment) + else: + # Dense gradients. + velocity.assign(rho * velocity + (1 - rho) * tf.square(gradient)) + if self.centered: + average_grad.assign(rho * average_grad + (1 - rho) * gradient) + denominator = velocity - tf.square(average_grad) + self.epsilon + else: + denominator = velocity + self.epsilon + increment = lr * gradient * tf.math.rsqrt(denominator) + if self.momentum > 0: + momentum.assign(self.momentum * momentum + increment) + variable.assign_add(-momentum) + else: + variable.assign_add(-increment) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "rho": self.rho, + "momentum": self.momentum, + "epsilon": self.epsilon, + "centered": self.centered, + } + ) + return config + + +RMSprop.__doc__ = RMSprop.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/schedules/BUILD b/keras/optimizers/schedules/BUILD index c0a313e338c5..a4854299cf40 100644 --- a/keras/optimizers/schedules/BUILD +++ b/keras/optimizers/schedules/BUILD @@ -1,14 +1,16 @@ # Description: # Contains the learning rate schedule API, +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "cuda_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/python:__pkg__", "//third_party/tensorflow/python/distribute:__pkg__", - "//third_party/tensorflow/python/training/tracking:__pkg__", + "//third_party/tensorflow/python/trackable:__pkg__", ], licenses = ["notice"], ) @@ -35,7 +37,7 @@ cuda_py_test( "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", ], ) diff --git a/keras/optimizers/schedules/__init__.py b/keras/optimizers/schedules/__init__.py index e5ffd337974e..cfa6e7a47ff2 100644 --- a/keras/optimizers/schedules/__init__.py +++ b/keras/optimizers/schedules/__init__.py @@ -14,7 +14,9 @@ # ============================================================================== """Learning rate schedule API.""" -from keras.optimizers.schedules.learning_rate_schedules import ExponentialDecay -from keras.optimizers.schedules.learning_rate_schedules import InverseTimeDecay -from keras.optimizers.schedules.learning_rate_schedules import PiecewiseConstantDecay -from keras.optimizers.schedules.learning_rate_schedules import PolynomialDecay +from keras.optimizers.schedules.learning_rate_schedule import ExponentialDecay +from keras.optimizers.schedules.learning_rate_schedule import InverseTimeDecay +from keras.optimizers.schedules.learning_rate_schedule import ( + PiecewiseConstantDecay, +) +from keras.optimizers.schedules.learning_rate_schedule import PolynomialDecay diff --git a/keras/optimizers/schedules/learning_rate_schedule.py b/keras/optimizers/schedules/learning_rate_schedule.py index 0aa8765dbb2c..c017a7d6d5f4 100644 --- a/keras/optimizers/schedules/learning_rate_schedule.py +++ b/keras/optimizers/schedules/learning_rate_schedule.py @@ -14,1071 +14,1246 @@ # ============================================================================== """Various learning rate schedule functions.""" -import tensorflow.compat.v2 as tf - import abc import math + +import tensorflow.compat.v2 as tf + from keras import backend -from keras.utils import generic_utils +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization + +# isort: off from tensorflow.python.util.tf_export import keras_export @keras_export("keras.optimizers.schedules.LearningRateSchedule") class LearningRateSchedule: - """The learning rate schedule base class. + """The learning rate schedule base class. - You can use a learning rate schedule to modulate how the learning rate - of your optimizer changes over time. + You can use a learning rate schedule to modulate how the learning rate + of your optimizer changes over time. - Several built-in learning rate schedules are available, such as - `tf.keras.optimizers.schedules.ExponentialDecay` or - `tf.keras.optimizers.schedules.PiecewiseConstantDecay`: + Several built-in learning rate schedules are available, such as + `tf.keras.optimizers.schedules.ExponentialDecay` or + `tf.keras.optimizers.schedules.PiecewiseConstantDecay`: - ```python - lr_schedule = keras.optimizers.schedules.ExponentialDecay( - initial_learning_rate=1e-2, - decay_steps=10000, - decay_rate=0.9) - optimizer = keras.optimizers.SGD(learning_rate=lr_schedule) - ``` + ```python + lr_schedule = keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate=1e-2, + decay_steps=10000, + decay_rate=0.9) + optimizer = keras.optimizers.SGD(learning_rate=lr_schedule) + ``` - A `LearningRateSchedule` instance can be passed in as the `learning_rate` - argument of any optimizer. + A `LearningRateSchedule` instance can be passed in as the `learning_rate` + argument of any optimizer. - To implement your own schedule object, you should implement the `__call__` - method, which takes a `step` argument (scalar integer tensor, the - current training step count). - Like for any other Keras object, you can also optionally - make your object serializable by implementing the `get_config` - and `from_config` methods. + To implement your own schedule object, you should implement the `__call__` + method, which takes a `step` argument (scalar integer tensor, the + current training step count). + Like for any other Keras object, you can also optionally + make your object serializable by implementing the `get_config` + and `from_config` methods. - Example: + Example: - ```python - class MyLRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): + ```python + class MyLRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): - def __init__(self, initial_learning_rate): - self.initial_learning_rate = initial_learning_rate + def __init__(self, initial_learning_rate): + self.initial_learning_rate = initial_learning_rate - def __call__(self, step): - return self.initial_learning_rate / (step + 1) + def __call__(self, step): + return self.initial_learning_rate / (step + 1) - optimizer = tf.keras.optimizers.SGD(learning_rate=MyLRSchedule(0.1)) - ``` - """ + optimizer = tf.keras.optimizers.SGD(learning_rate=MyLRSchedule(0.1)) + ``` + """ - @abc.abstractmethod - def __call__(self, step): - raise NotImplementedError("Learning rate schedule must override __call__") + @abc.abstractmethod + def __call__(self, step): + raise NotImplementedError( + f"Learning rate schedule '{self.__class__.__name__}' " + "must override `__call__(self, step)`." + ) - @abc.abstractmethod - def get_config(self): - raise NotImplementedError("Learning rate schedule must override get_config") + @abc.abstractmethod + def get_config(self): + raise NotImplementedError( + f"Learning rate schedule '{self.__class__.__name__}' " + "must override `get_config()` in order to be serializable." + ) - @classmethod - def from_config(cls, config): - """Instantiates a `LearningRateSchedule` from its config. + @classmethod + def from_config(cls, config): + """Instantiates a `LearningRateSchedule` from its config. - Args: - config: Output of `get_config()`. + Args: + config: Output of `get_config()`. - Returns: - A `LearningRateSchedule` instance. - """ - return cls(**config) + Returns: + A `LearningRateSchedule` instance. + """ + return cls(**config) @keras_export("keras.optimizers.schedules.ExponentialDecay") class ExponentialDecay(LearningRateSchedule): - """A LearningRateSchedule that uses an exponential decay schedule. - - When training a model, it is often useful to lower the learning rate as - the training progresses. This schedule applies an exponential decay function - to an optimizer step, given a provided initial learning rate. - - The schedule is a 1-arg callable that produces a decayed learning - rate when passed the current optimizer step. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - It is computed as: - - ```python - def decayed_learning_rate(step): - return initial_learning_rate * decay_rate ^ (step / decay_steps) - ``` - - If the argument `staircase` is `True`, then `step / decay_steps` is - an integer division and the decayed learning rate follows a - staircase function. - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. - Example: When fitting a Keras model, decay every 100000 steps with a base - of 0.96: - - ```python - initial_learning_rate = 0.1 - lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( - initial_learning_rate, - decay_steps=100000, - decay_rate=0.96, - staircase=True) - - model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule), - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - model.fit(data, labels, epochs=5) - ``` - - The learning rate schedule is also serializable and deserializable using - `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - decay_steps, - decay_rate, - staircase=False, - name=None): - """Applies exponential decay to the learning rate. + """A LearningRateSchedule that uses an exponential decay schedule. + + When training a model, it is often useful to lower the learning rate as + the training progresses. This schedule applies an exponential decay function + to an optimizer step, given a provided initial learning rate. + + The schedule is a 1-arg callable that produces a decayed learning + rate when passed the current optimizer step. This can be useful for changing + the learning rate value across different invocations of optimizer functions. + It is computed as: + + ```python + def decayed_learning_rate(step): + return initial_learning_rate * decay_rate ^ (step / decay_steps) + ``` + + If the argument `staircase` is `True`, then `step / decay_steps` is + an integer division and the decayed learning rate follows a + staircase function. + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. + Example: When fitting a Keras model, decay every 100000 steps with a base + of 0.96: + + ```python + initial_learning_rate = 0.1 + lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate, + decay_steps=100000, + decay_rate=0.96, + staircase=True) - Args: - initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a - Python number. The initial learning rate. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. - Must be positive. See the decay computation above. - decay_rate: A scalar `float32` or `float64` `Tensor` or a - Python number. The decay rate. - staircase: Boolean. If `True` decay the learning rate at discrete - intervals - name: String. Optional name of the operation. Defaults to - 'ExponentialDecay'. + model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule), + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + model.fit(data, labels, epochs=5) + ``` + + The learning rate schedule is also serializable and deserializable using + `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. + + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - self.initial_learning_rate = initial_learning_rate - self.decay_steps = decay_steps - self.decay_rate = decay_rate - self.staircase = staircase - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "ExponentialDecay") as name: - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - decay_steps = tf.cast(self.decay_steps, dtype) - decay_rate = tf.cast(self.decay_rate, dtype) - - global_step_recomp = tf.cast(step, dtype) - p = global_step_recomp / decay_steps - if self.staircase: - p = tf.floor(p) - return tf.multiply( - initial_learning_rate, tf.pow(decay_rate, p), name=name) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "decay_steps": self.decay_steps, - "decay_rate": self.decay_rate, - "staircase": self.staircase, - "name": self.name - } + + def __init__( + self, + initial_learning_rate, + decay_steps, + decay_rate, + staircase=False, + name=None, + ): + """Applies exponential decay to the learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Must be positive. See the decay computation above. + decay_rate: A scalar `float32` or `float64` `Tensor` or a + Python number. The decay rate. + staircase: Boolean. If `True` decay the learning rate at discrete + intervals + name: String. Optional name of the operation. Defaults to + 'ExponentialDecay'. + """ + super().__init__() + self.initial_learning_rate = initial_learning_rate + self.decay_steps = decay_steps + self.decay_rate = decay_rate + self.staircase = staircase + self.name = name + + def __call__(self, step): + with tf.name_scope(self.name or "ExponentialDecay") as name: + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + decay_steps = tf.cast(self.decay_steps, dtype) + decay_rate = tf.cast(self.decay_rate, dtype) + + global_step_recomp = tf.cast(step, dtype) + p = global_step_recomp / decay_steps + if self.staircase: + p = tf.floor(p) + return tf.multiply( + initial_learning_rate, tf.pow(decay_rate, p), name=name + ) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "decay_steps": self.decay_steps, + "decay_rate": self.decay_rate, + "staircase": self.staircase, + "name": self.name, + } @keras_export("keras.optimizers.schedules.PiecewiseConstantDecay") class PiecewiseConstantDecay(LearningRateSchedule): - """A LearningRateSchedule that uses a piecewise constant decay schedule. - - The function returns a 1-arg callable to compute the piecewise constant - when passed the current optimizer step. This can be useful for changing the - learning rate value across different invocations of optimizer functions. - - Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 - for the next 10000 steps, and 0.1 for any additional steps. - - ```python - step = tf.Variable(0, trainable=False) - boundaries = [100000, 110000] - values = [1.0, 0.5, 0.1] - learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay( - boundaries, values) - - # Later, whenever we perform an optimization step, we pass in the step. - learning_rate = learning_rate_fn(step) - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. The learning rate schedule is also serializable and - deserializable using `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as the boundary tensors. - - The output of the 1-arg function that takes the `step` - is `values[0]` when `step <= boundaries[0]`, - `values[1]` when `step > boundaries[0]` and `step <= boundaries[1]`, ..., - and values[-1] when `step > boundaries[-1]`. - """ - - def __init__( - self, - boundaries, - values, - name=None): - """Piecewise constant from boundaries and interval values. + """A LearningRateSchedule that uses a piecewise constant decay schedule. - Args: - boundaries: A list of `Tensor`s or `int`s or `float`s with strictly - increasing entries, and with all elements having the same type as the - optimizer step. - values: A list of `Tensor`s or `float`s or `int`s that specifies the - values for the intervals defined by `boundaries`. It should have one - more element than `boundaries`, and all elements should have the same - type. - name: A string. Optional name of the operation. Defaults to - 'PiecewiseConstant'. - - Raises: - ValueError: if the number of elements in the lists do not match. + The function returns a 1-arg callable to compute the piecewise constant + when passed the current optimizer step. This can be useful for changing the + learning rate value across different invocations of optimizer functions. + + Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 + for the next 10000 steps, and 0.1 for any additional steps. + + ```python + step = tf.Variable(0, trainable=False) + boundaries = [100000, 110000] + values = [1.0, 0.5, 0.1] + learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay( + boundaries, values) + + # Later, whenever we perform an optimization step, we pass in the step. + learning_rate = learning_rate_fn(step) + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. The learning rate schedule is also serializable and + deserializable using `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. + + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as the boundary tensors. + + The output of the 1-arg function that takes the `step` + is `values[0]` when `step <= boundaries[0]`, + `values[1]` when `step > boundaries[0]` and `step <= boundaries[1]`, ..., + and values[-1] when `step > boundaries[-1]`. """ - super().__init__() - - if len(boundaries) != len(values) - 1: - raise ValueError( - "The length of boundaries should be 1 less than the length of " - f"values. Received: boundaries={boundaries} of length " - f"{len(boundaries)}, and values={values} of length {len(values)}.") - - self.boundaries = boundaries - self.values = values - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "PiecewiseConstant"): - boundaries = tf.nest.map_structure(tf.convert_to_tensor, - tf.nest.flatten(self.boundaries)) - values = tf.nest.map_structure(tf.convert_to_tensor, - tf.nest.flatten(self.values)) - x_recomp = tf.convert_to_tensor(step) - for i, b in enumerate(boundaries): - if b.dtype.base_dtype != x_recomp.dtype.base_dtype: - # We cast the boundaries to have the same type as the step - b = tf.cast(b, x_recomp.dtype.base_dtype) - boundaries[i] = b - pred_fn_pairs = [] - pred_fn_pairs.append((x_recomp <= boundaries[0], lambda: values[0])) - pred_fn_pairs.append((x_recomp > boundaries[-1], lambda: values[-1])) - for low, high, v in zip(boundaries[:-1], boundaries[1:], values[1:-1]): - # Need to bind v here; can do this with lambda v=v: ... - pred = (x_recomp > low) & (x_recomp <= high) - pred_fn_pairs.append((pred, lambda v=v: v)) - - # The default isn't needed here because our conditions are mutually - # exclusive and exhaustive, but tf.case requires it. - default = lambda: values[0] - return tf.case(pred_fn_pairs, default, exclusive=True) - - def get_config(self): - return { - "boundaries": self.boundaries, - "values": self.values, - "name": self.name - } + + def __init__(self, boundaries, values, name=None): + """Piecewise constant from boundaries and interval values. + + Args: + boundaries: A list of `Tensor`s or `int`s or `float`s with strictly + increasing entries, and with all elements having the same type as + the optimizer step. + values: A list of `Tensor`s or `float`s or `int`s that specifies the + values for the intervals defined by `boundaries`. It should have one + more element than `boundaries`, and all elements should have the + same type. + name: A string. Optional name of the operation. Defaults to + 'PiecewiseConstant'. + + Raises: + ValueError: if the number of elements in the lists do not match. + """ + super().__init__() + + if len(boundaries) != len(values) - 1: + raise ValueError( + "The length of boundaries should be 1 less than the length of " + f"values. Received: boundaries={boundaries} of length " + f"{len(boundaries)}, and values={values} " + f"of length {len(values)}." + ) + + self.boundaries = boundaries + self.values = values + self.name = name + + def __call__(self, step): + with tf.name_scope(self.name or "PiecewiseConstant"): + boundaries = tf.nest.map_structure( + tf.convert_to_tensor, tf.nest.flatten(self.boundaries) + ) + values = tf.nest.map_structure( + tf.convert_to_tensor, tf.nest.flatten(self.values) + ) + x_recomp = tf.convert_to_tensor(step) + for i, b in enumerate(boundaries): + if b.dtype.base_dtype != x_recomp.dtype.base_dtype: + # We cast the boundaries to have the same type as the step + b = tf.cast(b, x_recomp.dtype.base_dtype) + boundaries[i] = b + pred_fn_pairs = [] + pred_fn_pairs.append((x_recomp <= boundaries[0], lambda: values[0])) + pred_fn_pairs.append( + (x_recomp > boundaries[-1], lambda: values[-1]) + ) + for low, high, v in zip( + boundaries[:-1], boundaries[1:], values[1:-1] + ): + # Need to bind v here; can do this with lambda v=v: ... + pred = (x_recomp > low) & (x_recomp <= high) + pred_fn_pairs.append((pred, lambda v=v: v)) + + # The default isn't needed here because our conditions are mutually + # exclusive and exhaustive, but tf.case requires it. + default = lambda: values[0] + return tf.case(pred_fn_pairs, default, exclusive=True) + + def get_config(self): + return { + "boundaries": self.boundaries, + "values": self.values, + "name": self.name, + } @keras_export("keras.optimizers.schedules.PolynomialDecay") class PolynomialDecay(LearningRateSchedule): - """A LearningRateSchedule that uses a polynomial decay schedule. - - It is commonly observed that a monotonically decreasing learning rate, whose - degree of change is carefully chosen, results in a better performing model. - This schedule applies a polynomial decay function to an optimizer step, - given a provided `initial_learning_rate`, to reach an `end_learning_rate` - in the given `decay_steps`. - - It requires a `step` value to compute the decayed learning rate. You - can just pass a TensorFlow variable that you increment at each training - step. - - The schedule is a 1-arg callable that produces a decayed learning rate - when passed the current optimizer step. This can be useful for changing the - learning rate value across different invocations of optimizer functions. - It is computed as: - - ```python - def decayed_learning_rate(step): - step = min(step, decay_steps) - return ((initial_learning_rate - end_learning_rate) * - (1 - step / decay_steps) ^ (power) - ) + end_learning_rate - ``` - - If `cycle` is True then a multiple of `decay_steps` is used, the first one - that is bigger than `step`. - - ```python - def decayed_learning_rate(step): - decay_steps = decay_steps * ceil(step / decay_steps) - return ((initial_learning_rate - end_learning_rate) * - (1 - step / decay_steps) ^ (power) - ) + end_learning_rate - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. - Example: Fit a model while decaying from 0.1 to 0.01 in 10000 steps using - sqrt (i.e. power=0.5): - - ```python - ... - starter_learning_rate = 0.1 - end_learning_rate = 0.01 - decay_steps = 10000 - learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay( - starter_learning_rate, - decay_steps, - end_learning_rate, - power=0.5) - - model.compile(optimizer=tf.keras.optimizers.SGD( - learning_rate=learning_rate_fn), - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - model.fit(data, labels, epochs=5) - ``` - - The learning rate schedule is also serializable and deserializable using - `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - decay_steps, - end_learning_rate=0.0001, - power=1.0, - cycle=False, - name=None): - """Applies a polynomial decay to the learning rate. + """A LearningRateSchedule that uses a polynomial decay schedule. + + It is commonly observed that a monotonically decreasing learning rate, whose + degree of change is carefully chosen, results in a better performing model. + This schedule applies a polynomial decay function to an optimizer step, + given a provided `initial_learning_rate`, to reach an `end_learning_rate` + in the given `decay_steps`. + + It requires a `step` value to compute the decayed learning rate. You + can just pass a TensorFlow variable that you increment at each training + step. + + The schedule is a 1-arg callable that produces a decayed learning rate + when passed the current optimizer step. This can be useful for changing the + learning rate value across different invocations of optimizer functions. + It is computed as: + + ```python + def decayed_learning_rate(step): + step = min(step, decay_steps) + return ((initial_learning_rate - end_learning_rate) * + (1 - step / decay_steps) ^ (power) + ) + end_learning_rate + ``` + + If `cycle` is True then a multiple of `decay_steps` is used, the first one + that is bigger than `step`. + + ```python + def decayed_learning_rate(step): + decay_steps = decay_steps * ceil(step / decay_steps) + return ((initial_learning_rate - end_learning_rate) * + (1 - step / decay_steps) ^ (power) + ) + end_learning_rate + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. + Example: Fit a model while decaying from 0.1 to 0.01 in 10000 steps using + sqrt (i.e. power=0.5): + + ```python + ... + starter_learning_rate = 0.1 + end_learning_rate = 0.01 + decay_steps = 10000 + learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay( + starter_learning_rate, + decay_steps, + end_learning_rate, + power=0.5) + + model.compile(optimizer=tf.keras.optimizers.SGD( + learning_rate=learning_rate_fn), + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + model.fit(data, labels, epochs=5) + ``` + + The learning rate schedule is also serializable and deserializable using + `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. - Args: - initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a - Python number. The initial learning rate. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. - Must be positive. See the decay computation above. - end_learning_rate: A scalar `float32` or `float64` `Tensor` or a - Python number. The minimal end learning rate. - power: A scalar `float32` or `float64` `Tensor` or a - Python number. The power of the polynomial. Defaults to linear, 1.0. - cycle: A boolean, whether or not it should cycle beyond decay_steps. - name: String. Optional name of the operation. Defaults to - 'PolynomialDecay'. + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - - self.initial_learning_rate = initial_learning_rate - self.decay_steps = decay_steps - self.end_learning_rate = end_learning_rate - self.power = power - self.cycle = cycle - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "PolynomialDecay") as name: - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - end_learning_rate = tf.cast(self.end_learning_rate, dtype) - power = tf.cast(self.power, dtype) - - global_step_recomp = tf.cast(step, dtype) - decay_steps_recomp = tf.cast(self.decay_steps, dtype) - if self.cycle: - # Find the first multiple of decay_steps that is bigger than - # global_step. If global_step is zero set the multiplier to 1 - multiplier = tf.where( - tf.equal(global_step_recomp, 0), 1.0, - tf.math.ceil(global_step_recomp / self.decay_steps)) - decay_steps_recomp = tf.multiply(decay_steps_recomp, multiplier) - else: - # Make sure that the global_step used is not bigger than decay_steps. - global_step_recomp = tf.minimum(global_step_recomp, - decay_steps_recomp) - - p = tf.divide(global_step_recomp, decay_steps_recomp) - return tf.add( - tf.multiply(initial_learning_rate - end_learning_rate, - tf.pow(1 - p, power)), - end_learning_rate, - name=name) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "decay_steps": self.decay_steps, - "end_learning_rate": self.end_learning_rate, - "power": self.power, - "cycle": self.cycle, - "name": self.name - } + + def __init__( + self, + initial_learning_rate, + decay_steps, + end_learning_rate=0.0001, + power=1.0, + cycle=False, + name=None, + ): + """Applies a polynomial decay to the learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Must be positive. See the decay computation above. + end_learning_rate: A scalar `float32` or `float64` `Tensor` or a + Python number. The minimal end learning rate. + power: A scalar `float32` or `float64` `Tensor` or a + Python number. The power of the polynomial. Defaults to `1.0`. + cycle: A boolean, whether it should cycle beyond decay_steps. + name: String. Optional name of the operation. Defaults to + 'PolynomialDecay'. + """ + super().__init__() + + self.initial_learning_rate = initial_learning_rate + self.decay_steps = decay_steps + self.end_learning_rate = end_learning_rate + self.power = power + self.cycle = cycle + self.name = name + + def __call__(self, step): + with tf.name_scope(self.name or "PolynomialDecay") as name: + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + end_learning_rate = tf.cast(self.end_learning_rate, dtype) + power = tf.cast(self.power, dtype) + + global_step_recomp = tf.cast(step, dtype) + decay_steps_recomp = tf.cast(self.decay_steps, dtype) + if self.cycle: + # Find the first multiple of decay_steps that is bigger than + # global_step. If global_step is zero set the multiplier to 1 + multiplier = tf.where( + tf.equal(global_step_recomp, 0), + 1.0, + tf.math.ceil(global_step_recomp / self.decay_steps), + ) + decay_steps_recomp = tf.multiply(decay_steps_recomp, multiplier) + else: + # Make sure that the global_step used is not bigger than + # decay_steps. + global_step_recomp = tf.minimum( + global_step_recomp, decay_steps_recomp + ) + + p = tf.divide(global_step_recomp, decay_steps_recomp) + return tf.add( + tf.multiply( + initial_learning_rate - end_learning_rate, + tf.pow(1 - p, power), + ), + end_learning_rate, + name=name, + ) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "decay_steps": self.decay_steps, + "end_learning_rate": self.end_learning_rate, + "power": self.power, + "cycle": self.cycle, + "name": self.name, + } @keras_export("keras.optimizers.schedules.InverseTimeDecay") class InverseTimeDecay(LearningRateSchedule): - """A LearningRateSchedule that uses an inverse time decay schedule. - - When training a model, it is often useful to lower the learning rate as - the training progresses. This schedule applies the inverse decay function - to an optimizer step, given a provided initial learning rate. - It requires a `step` value to compute the decayed learning rate. You can - just pass a TensorFlow variable that you increment at each training step. - - The schedule is a 1-arg callable that produces a decayed learning - rate when passed the current optimizer step. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - It is computed as: - - ```python - def decayed_learning_rate(step): - return initial_learning_rate / (1 + decay_rate * step / decay_step) - ``` - - or, if `staircase` is `True`, as: - - ```python - def decayed_learning_rate(step): - return initial_learning_rate / (1 + decay_rate * floor(step / decay_step)) - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. - Example: Fit a Keras model when decaying 1/t with a rate of 0.5: - - ```python - ... - initial_learning_rate = 0.1 - decay_steps = 1.0 - decay_rate = 0.5 - learning_rate_fn = keras.optimizers.schedules.InverseTimeDecay( - initial_learning_rate, decay_steps, decay_rate) - - model.compile(optimizer=tf.keras.optimizers.SGD( - learning_rate=learning_rate_fn), - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - model.fit(data, labels, epochs=5) - ``` - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - decay_steps, - decay_rate, - staircase=False, - name=None): - """Applies inverse time decay to the initial learning rate. + """A LearningRateSchedule that uses an inverse time decay schedule. + + When training a model, it is often useful to lower the learning rate as + the training progresses. This schedule applies the inverse decay function + to an optimizer step, given a provided initial learning rate. + It requires a `step` value to compute the decayed learning rate. You can + just pass a TensorFlow variable that you increment at each training step. + + The schedule is a 1-arg callable that produces a decayed learning + rate when passed the current optimizer step. This can be useful for changing + the learning rate value across different invocations of optimizer functions. + It is computed as: + + ```python + def decayed_learning_rate(step): + return initial_learning_rate / (1 + decay_rate * step / decay_step) + ``` + + or, if `staircase` is `True`, as: + + ```python + def decayed_learning_rate(step): + return initial_learning_rate / (1 + decay_rate * floor(step / decay_step)) + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. + Example: Fit a Keras model when decaying 1/t with a rate of 0.5: + + ```python + ... + initial_learning_rate = 0.1 + decay_steps = 1.0 + decay_rate = 0.5 + learning_rate_fn = keras.optimizers.schedules.InverseTimeDecay( + initial_learning_rate, decay_steps, decay_rate) + + model.compile(optimizer=tf.keras.optimizers.SGD( + learning_rate=learning_rate_fn), + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + model.fit(data, labels, epochs=5) + ``` - Args: - initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a - Python number. The initial learning rate. - decay_steps: How often to apply decay. - decay_rate: A Python number. The decay rate. - staircase: Whether to apply decay in a discrete staircase, as opposed to - continuous, fashion. - name: String. Optional name of the operation. Defaults to - 'InverseTimeDecay'. + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - - self.initial_learning_rate = initial_learning_rate - self.decay_steps = decay_steps - self.decay_rate = decay_rate - self.staircase = staircase - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "InverseTimeDecay") as name: - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - decay_steps = tf.cast(self.decay_steps, dtype) - decay_rate = tf.cast(self.decay_rate, dtype) - - global_step_recomp = tf.cast(step, dtype) - p = global_step_recomp / decay_steps - if self.staircase: - p = tf.floor(p) - const = tf.cast(tf.constant(1), dtype) - denom = tf.add(const, tf.multiply(decay_rate, p)) - return tf.divide(initial_learning_rate, denom, name=name) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "decay_steps": self.decay_steps, - "decay_rate": self.decay_rate, - "staircase": self.staircase, - "name": self.name - } - - -@keras_export("keras.optimizers.schedules.CosineDecay", - "keras.experimental.CosineDecay") + + def __init__( + self, + initial_learning_rate, + decay_steps, + decay_rate, + staircase=False, + name=None, + ): + """Applies inverse time decay to the initial learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. + decay_steps: How often to apply decay. + decay_rate: A Python number. The decay rate. + staircase: Whether to apply decay in a discrete staircase, as opposed + to continuous, fashion. + name: String. Optional name of the operation. Defaults to + 'InverseTimeDecay'. + """ + super().__init__() + + self.initial_learning_rate = initial_learning_rate + self.decay_steps = decay_steps + self.decay_rate = decay_rate + self.staircase = staircase + self.name = name + + def __call__(self, step): + with tf.name_scope(self.name or "InverseTimeDecay") as name: + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + decay_steps = tf.cast(self.decay_steps, dtype) + decay_rate = tf.cast(self.decay_rate, dtype) + + global_step_recomp = tf.cast(step, dtype) + p = global_step_recomp / decay_steps + if self.staircase: + p = tf.floor(p) + const = tf.cast(tf.constant(1), dtype) + denom = tf.add(const, tf.multiply(decay_rate, p)) + return tf.divide(initial_learning_rate, denom, name=name) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "decay_steps": self.decay_steps, + "decay_rate": self.decay_rate, + "staircase": self.staircase, + "name": self.name, + } + + +@keras_export( + "keras.optimizers.schedules.CosineDecay", "keras.experimental.CosineDecay" +) class CosineDecay(LearningRateSchedule): - """A LearningRateSchedule that uses a cosine decay schedule. - - See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), - SGDR: Stochastic Gradient Descent with Warm Restarts. - - When training a model, it is often useful to lower the learning rate as - the training progresses. This schedule applies a cosine decay function - to an optimizer step, given a provided initial learning rate. - It requires a `step` value to compute the decayed learning rate. You can - just pass a TensorFlow variable that you increment at each training step. - - The schedule is a 1-arg callable that produces a decayed learning - rate when passed the current optimizer step. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - It is computed as: - - ```python - def decayed_learning_rate(step): - step = min(step, decay_steps) - cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) - decayed = (1 - alpha) * cosine_decay + alpha - return initial_learning_rate * decayed - ``` - - Example usage: - ```python - decay_steps = 1000 - lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( - initial_learning_rate, decay_steps) - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. The learning rate schedule is also serializable and - deserializable using `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - decay_steps, - alpha=0.0, - name=None): - """Applies cosine decay to the learning rate. + """A LearningRateSchedule that uses a cosine decay with optional warmup. + + See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), + SGDR: Stochastic Gradient Descent with Warm Restarts. + + For the idea of a linear warmup of our learning rate, + see [Goyal et al.](https://arxiv.org/pdf/1706.02677.pdf). + + When we begin training a model, we often want an initial increase in our + learning rate followed by a decay. If `warmup_target` is an int, this + schedule applies a linear increase per optimizer step to our learning rate + from `initial_learning_rate` to `warmup_target` for a duration of + `warmup_steps`. Afterwards, it applies a cosine decay function taking our + learning rate from `warmup_target` to `alpha` for a duration of + `decay_steps`. If `warmup_target` is None we skip warmup and our decay + will take our learning rate from `initial_learning_rate` to `alpha`. + It requires a `step` value to compute the learning rate. You can + just pass a TensorFlow variable that you increment at each training step. + + The schedule is a 1-arg callable that produces a warmup followed by a + decayed learning rate when passed the current optimizer step. This can be + useful for changing the learning rate value across different invocations of + optimizer functions. + + Our warmup is computed as: + + ```python + def warmup_learning_rate(step): + completed_fraction = step / warmup_steps + total_delta = target_warmup - initial_learning_rate + return completed_fraction * total_delta + ``` + + And our decay is computed as: + + ```python + if warmup_target is None: + initial_decay_lr = initial_learning_rate + else: + initial_decay_lr = warmup_target + + def decayed_learning_rate(step): + step = min(step, decay_steps) + cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) + decayed = (1 - alpha) * cosine_decay + alpha + return initial_decay_lr * decayed + ``` + + Example usage without warmup: + + ```python + decay_steps = 1000 + initial_learning_rate = 0.1 + lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( + initial_learning_rate, decay_steps) + ``` + + Example usage with warmup: + + ```python + decay_steps = 1000 + initial_learning_rate = 0 + warmup_steps = 1000 + target_learning_rate = 0.1 + lr_warmup_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( + initial_learning_rate, decay_steps, warmup_target=target_learning_rate, + warmup_steps=warmup_steps + ) + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. The learning rate schedule is also serializable and + deserializable using `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. - Args: - initial_learning_rate: A scalar `float32` or `float64` Tensor or a - Python number. The initial learning rate. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. - Number of steps to decay over. - alpha: A scalar `float32` or `float64` Tensor or a Python number. - Minimum learning rate value as a fraction of initial_learning_rate. - name: String. Optional name of the operation. Defaults to 'CosineDecay'. + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - - self.initial_learning_rate = initial_learning_rate - self.decay_steps = decay_steps - self.alpha = alpha - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "CosineDecay"): - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - decay_steps = tf.cast(self.decay_steps, dtype) - - global_step_recomp = tf.cast(step, dtype) - global_step_recomp = tf.minimum(global_step_recomp, decay_steps) - completed_fraction = global_step_recomp / decay_steps - cosine_decayed = 0.5 * (1.0 + tf.cos( - tf.constant(math.pi, dtype=dtype) * completed_fraction)) - - decayed = (1 - self.alpha) * cosine_decayed + self.alpha - return tf.multiply(initial_learning_rate, decayed) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "decay_steps": self.decay_steps, - "alpha": self.alpha, - "name": self.name - } - - -@keras_export("keras.optimizers.schedules.CosineDecayRestarts", - "keras.experimental.CosineDecayRestarts") -class CosineDecayRestarts(LearningRateSchedule): - """A LearningRateSchedule that uses a cosine decay schedule with restarts. - - See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), - SGDR: Stochastic Gradient Descent with Warm Restarts. - - When training a model, it is often useful to lower the learning rate as - the training progresses. This schedule applies a cosine decay function with - restarts to an optimizer step, given a provided initial learning rate. - It requires a `step` value to compute the decayed learning rate. You can - just pass a TensorFlow variable that you increment at each training step. - - The schedule is a 1-arg callable that produces a decayed learning - rate when passed the current optimizer step. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - - The learning rate multiplier first decays - from 1 to `alpha` for `first_decay_steps` steps. Then, a warm - restart is performed. Each new warm restart runs for `t_mul` times more - steps and with `m_mul` times initial learning rate as the new learning rate. - - Example usage: - ```python - first_decay_steps = 1000 - lr_decayed_fn = ( - tf.keras.optimizers.schedules.CosineDecayRestarts( + + def __init__( + self, initial_learning_rate, - first_decay_steps)) - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. The learning rate schedule is also serializable and - deserializable using `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - first_decay_steps, - t_mul=2.0, - m_mul=1.0, - alpha=0.0, - name=None): - """Applies cosine decay with restarts to the learning rate. + decay_steps, + alpha=0.0, + name=None, + warmup_target=None, + warmup_steps=0, + ): + """Applies cosine decay to the learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a + Python int. The initial learning rate. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python int. + Number of steps to decay over. + alpha: A scalar `float32` or `float64` `Tensor` or a Python int. + Minimum learning rate value for decay as a fraction of + `initial_learning_rate`. + name: String. Optional name of the operation. Defaults to + 'CosineDecay'. + warmup_target: None or a scalar `float32` or `float64` `Tensor` or a + Python int. The target learning rate for our warmup phase. Will cast + to the `initial_learning_rate` datatype. Setting to None will skip + warmup and begins decay phase from `initial_learning_rate`. + Otherwise scheduler will warmup from `initial_learning_rate` to + `warmup_target`. + warmup_steps: A scalar `int32` or `int64` `Tensor` or a Python int. + Number of steps to warmup over. + """ + super().__init__() + + self.initial_learning_rate = initial_learning_rate + self.decay_steps = decay_steps + self.alpha = alpha + self.name = name + self.warmup_steps = warmup_steps + self.warmup_target = warmup_target + + def _decay_function(self, step, decay_steps, decay_from_lr, dtype): + with tf.name_scope(self.name or "CosineDecay"): + completed_fraction = step / decay_steps + tf_pi = tf.constant(math.pi, dtype=dtype) + cosine_decayed = 0.5 * (1.0 + tf.cos(tf_pi * completed_fraction)) + decayed = (1 - self.alpha) * cosine_decayed + self.alpha + return tf.multiply(decay_from_lr, decayed) + + def _warmup_function( + self, step, warmup_steps, warmup_target, initial_learning_rate + ): + with tf.name_scope(self.name or "CosineDecay"): + completed_fraction = step / warmup_steps + total_step_delta = warmup_target - initial_learning_rate + return total_step_delta * completed_fraction + initial_learning_rate - Args: - initial_learning_rate: A scalar `float32` or `float64` Tensor or a Python - number. The initial learning rate. - first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python - number. Number of steps to decay over. - t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. - Used to derive the number of iterations in the i-th period. - m_mul: A scalar `float32` or `float64` `Tensor` or a Python number. - Used to derive the initial learning rate of the i-th period. - alpha: A scalar `float32` or `float64` Tensor or a Python number. - Minimum learning rate value as a fraction of the initial_learning_rate. - name: String. Optional name of the operation. Defaults to 'SGDRDecay'. + def __call__(self, step): + with tf.name_scope(self.name or "CosineDecay"): + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + decay_steps = tf.cast(self.decay_steps, dtype) + global_step_recomp = tf.cast(step, dtype) + + if self.warmup_target is None: + global_step_recomp = tf.minimum(global_step_recomp, decay_steps) + return self._decay_function( + global_step_recomp, + decay_steps, + initial_learning_rate, + dtype, + ) + + warmup_target = tf.cast(self.warmup_target, dtype) + warmup_steps = tf.cast(self.warmup_steps, dtype) + + global_step_recomp = tf.minimum( + global_step_recomp, decay_steps + warmup_steps + ) + + return tf.cond( + global_step_recomp < warmup_steps, + lambda: self._warmup_function( + global_step_recomp, + warmup_steps, + warmup_target, + initial_learning_rate, + ), + lambda: self._decay_function( + global_step_recomp - warmup_steps, + decay_steps, + warmup_target, + dtype, + ), + ) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "decay_steps": self.decay_steps, + "alpha": self.alpha, + "name": self.name, + "warmup_target": self.warmup_target, + "warmup_steps": self.warmup_steps, + } + + +@keras_export( + "keras.optimizers.schedules.CosineDecayRestarts", + "keras.experimental.CosineDecayRestarts", +) +class CosineDecayRestarts(LearningRateSchedule): + """A LearningRateSchedule that uses a cosine decay schedule with restarts. + + See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), + SGDR: Stochastic Gradient Descent with Warm Restarts. + + When training a model, it is often useful to lower the learning rate as + the training progresses. This schedule applies a cosine decay function with + restarts to an optimizer step, given a provided initial learning rate. + It requires a `step` value to compute the decayed learning rate. You can + just pass a TensorFlow variable that you increment at each training step. + + The schedule is a 1-arg callable that produces a decayed learning + rate when passed the current optimizer step. This can be useful for changing + the learning rate value across different invocations of optimizer functions. + + The learning rate multiplier first decays + from 1 to `alpha` for `first_decay_steps` steps. Then, a warm + restart is performed. Each new warm restart runs for `t_mul` times more + steps and with `m_mul` times initial learning rate as the new learning rate. + + Example usage: + ```python + first_decay_steps = 1000 + lr_decayed_fn = ( + tf.keras.optimizers.schedules.CosineDecayRestarts( + initial_learning_rate, + first_decay_steps)) + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. The learning rate schedule is also serializable and + deserializable using `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. + + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - - self.initial_learning_rate = initial_learning_rate - self.first_decay_steps = first_decay_steps - self._t_mul = t_mul - self._m_mul = m_mul - self.alpha = alpha - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "SGDRDecay") as name: - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - first_decay_steps = tf.cast(self.first_decay_steps, dtype) - alpha = tf.cast(self.alpha, dtype) - t_mul = tf.cast(self._t_mul, dtype) - m_mul = tf.cast(self._m_mul, dtype) - - global_step_recomp = tf.cast(step, dtype) - completed_fraction = global_step_recomp / first_decay_steps - - def compute_step(completed_fraction, geometric=False): - """Helper for `cond` operation.""" - if geometric: - i_restart = tf.floor( - tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) / - tf.math.log(t_mul)) - - sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul) - completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart - - else: - i_restart = tf.floor(completed_fraction) - completed_fraction -= i_restart - - return i_restart, completed_fraction - - i_restart, completed_fraction = tf.cond( - tf.equal(t_mul, 1.0), - lambda: compute_step(completed_fraction, geometric=False), - lambda: compute_step(completed_fraction, geometric=True)) - - m_fac = m_mul**i_restart - cosine_decayed = 0.5 * m_fac * (1.0 + tf.cos( - tf.constant(math.pi, dtype=dtype) * completed_fraction)) - decayed = (1 - alpha) * cosine_decayed + alpha - - return tf.multiply(initial_learning_rate, decayed, name=name) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "first_decay_steps": self.first_decay_steps, - "t_mul": self._t_mul, - "m_mul": self._m_mul, - "alpha": self.alpha, - "name": self.name - } + + def __init__( + self, + initial_learning_rate, + first_decay_steps, + t_mul=2.0, + m_mul=1.0, + alpha=0.0, + name=None, + ): + """Applies cosine decay with restarts to the learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` Tensor or a + Python number. The initial learning rate. + first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python + number. Number of steps to decay over. + t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. + Used to derive the number of iterations in the i-th period. + m_mul: A scalar `float32` or `float64` `Tensor` or a Python number. + Used to derive the initial learning rate of the i-th period. + alpha: A scalar `float32` or `float64` Tensor or a Python number. + Minimum learning rate value as a fraction of the + initial_learning_rate. + name: String. Optional name of the operation. Defaults to 'SGDRDecay'. + """ + super().__init__() + + self.initial_learning_rate = initial_learning_rate + self.first_decay_steps = first_decay_steps + self._t_mul = t_mul + self._m_mul = m_mul + self.alpha = alpha + self.name = name + + def __call__(self, step): + with tf.name_scope(self.name or "SGDRDecay") as name: + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + first_decay_steps = tf.cast(self.first_decay_steps, dtype) + alpha = tf.cast(self.alpha, dtype) + t_mul = tf.cast(self._t_mul, dtype) + m_mul = tf.cast(self._m_mul, dtype) + + global_step_recomp = tf.cast(step, dtype) + completed_fraction = global_step_recomp / first_decay_steps + + def compute_step(completed_fraction, geometric=False): + """Helper for `cond` operation.""" + if geometric: + i_restart = tf.floor( + tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) + / tf.math.log(t_mul) + ) + + sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul) + completed_fraction = ( + completed_fraction - sum_r + ) / t_mul**i_restart + + else: + i_restart = tf.floor(completed_fraction) + completed_fraction -= i_restart + + return i_restart, completed_fraction + + i_restart, completed_fraction = tf.cond( + tf.equal(t_mul, 1.0), + lambda: compute_step(completed_fraction, geometric=False), + lambda: compute_step(completed_fraction, geometric=True), + ) + + m_fac = m_mul**i_restart + cosine_decayed = ( + 0.5 + * m_fac + * ( + 1.0 + + tf.cos( + tf.constant(math.pi, dtype=dtype) * completed_fraction + ) + ) + ) + decayed = (1 - alpha) * cosine_decayed + alpha + + return tf.multiply(initial_learning_rate, decayed, name=name) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "first_decay_steps": self.first_decay_steps, + "t_mul": self._t_mul, + "m_mul": self._m_mul, + "alpha": self.alpha, + "name": self.name, + } # Note: this code is still used by V1 APIs. class LinearCosineDecay(LearningRateSchedule): - """A LearningRateSchedule that uses a linear cosine decay schedule. - - See [Bello et al., ICML2017] Neural Optimizer Search with RL. - https://arxiv.org/abs/1709.07417 - - For the idea of warm starts here controlled by `num_periods`, - see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent - with Warm Restarts. https://arxiv.org/abs/1608.03983 - - Note that linear cosine decay is more aggressive than cosine decay and - larger initial learning rates can typically be used. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This schedule applies a linear cosine decay - function to an optimizer step, given a provided initial learning rate. - It requires a `step` value to compute the decayed learning rate. You can - just pass a TensorFlow variable that you increment at each training step. - - The schedule is a 1-arg callable that produces a decayed learning - rate when passed the current optimizer step. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - It is computed as: - - ```python - def decayed_learning_rate(step): - step = min(step, decay_steps) - linear_decay = (decay_steps - step) / decay_steps - cosine_decay = 0.5 * ( - 1 + cos(pi * 2 * num_periods * step / decay_steps)) - decayed = (alpha + linear_decay) * cosine_decay + beta - return initial_learning_rate * decayed - ``` - - Example usage: - ```python - decay_steps = 1000 - lr_decayed_fn = ( - tf.keras.experimental.LinearCosineDecay( - initial_learning_rate, decay_steps)) - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. The learning rate schedule is also serializable and - deserializable using `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - decay_steps, - num_periods=0.5, - alpha=0.0, - beta=0.001, - name=None): - """Applies linear cosine decay to the learning rate. + """A LearningRateSchedule that uses a linear cosine decay schedule. + + See [Bello et al., ICML2017] Neural Optimizer Search with RL. + https://arxiv.org/abs/1709.07417 + + For the idea of warm starts here controlled by `num_periods`, + see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent + with Warm Restarts. https://arxiv.org/abs/1608.03983 + + Note that linear cosine decay is more aggressive than cosine decay and + larger initial learning rates can typically be used. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This schedule applies a linear cosine decay + function to an optimizer step, given a provided initial learning rate. + It requires a `step` value to compute the decayed learning rate. You can + just pass a TensorFlow variable that you increment at each training step. + + The schedule is a 1-arg callable that produces a decayed learning + rate when passed the current optimizer step. This can be useful for changing + the learning rate value across different invocations of optimizer functions. + It is computed as: + + ```python + def decayed_learning_rate(step): + step = min(step, decay_steps) + linear_decay = (decay_steps - step) / decay_steps + cosine_decay = 0.5 * ( + 1 + cos(pi * 2 * num_periods * step / decay_steps)) + decayed = (alpha + linear_decay) * cosine_decay + beta + return initial_learning_rate * decayed + ``` + + Example usage: + ```python + decay_steps = 1000 + lr_decayed_fn = ( + tf.keras.experimental.LinearCosineDecay( + initial_learning_rate, decay_steps)) + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. The learning rate schedule is also serializable and + deserializable using `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. - Args: - initial_learning_rate: A scalar `float32` or `float64` Tensor or a Python - number. The initial learning rate. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. - Number of steps to decay over. - num_periods: Number of periods in the cosine part of the decay. - See computation above. - alpha: See computation above. - beta: See computation above. - name: String. Optional name of the operation. Defaults to - 'LinearCosineDecay'. + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - - self.initial_learning_rate = initial_learning_rate - self.decay_steps = decay_steps - self.num_periods = num_periods - self.alpha = alpha - self.beta = beta - self.name = name - - def __call__(self, step): - with tf.name_scope(self.name or "LinearCosineDecay") as name: - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - decay_steps = tf.cast(self.decay_steps, dtype) - num_periods = tf.cast(self.num_periods, dtype) - alpha = tf.cast(self.alpha, dtype) - beta = tf.cast(self.beta, dtype) - - global_step_recomp = tf.cast(step, dtype) - global_step_recomp = tf.minimum(global_step_recomp, decay_steps) - linear_decayed = (decay_steps - global_step_recomp) / decay_steps - completed_fraction = global_step_recomp / decay_steps - fraction = 2.0 * num_periods * completed_fraction - cosine_decayed = 0.5 * ( - 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction)) - - linear_cosine_decayed = (alpha + linear_decayed) * cosine_decayed + beta - return tf.multiply(initial_learning_rate, linear_cosine_decayed, - name=name) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "decay_steps": self.decay_steps, - "num_periods": self.num_periods, - "alpha": self.alpha, - "beta": self.beta, - "name": self.name - } + + def __init__( + self, + initial_learning_rate, + decay_steps, + num_periods=0.5, + alpha=0.0, + beta=0.001, + name=None, + ): + """Applies linear cosine decay to the learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` Tensor or a + Python number. The initial learning rate. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Number of steps to decay over. + num_periods: Number of periods in the cosine part of the decay. + See computation above. + alpha: See computation above. + beta: See computation above. + name: String. Optional name of the operation. Defaults to + 'LinearCosineDecay'. + """ + super().__init__() + + self.initial_learning_rate = initial_learning_rate + self.decay_steps = decay_steps + self.num_periods = num_periods + self.alpha = alpha + self.beta = beta + self.name = name + + def __call__(self, step): + with tf.name_scope(self.name or "LinearCosineDecay") as name: + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + decay_steps = tf.cast(self.decay_steps, dtype) + num_periods = tf.cast(self.num_periods, dtype) + alpha = tf.cast(self.alpha, dtype) + beta = tf.cast(self.beta, dtype) + + global_step_recomp = tf.cast(step, dtype) + global_step_recomp = tf.minimum(global_step_recomp, decay_steps) + linear_decayed = (decay_steps - global_step_recomp) / decay_steps + completed_fraction = global_step_recomp / decay_steps + fraction = 2.0 * num_periods * completed_fraction + cosine_decayed = 0.5 * ( + 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction) + ) + + linear_cosine_decayed = ( + alpha + linear_decayed + ) * cosine_decayed + beta + return tf.multiply( + initial_learning_rate, linear_cosine_decayed, name=name + ) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "decay_steps": self.decay_steps, + "num_periods": self.num_periods, + "alpha": self.alpha, + "beta": self.beta, + "name": self.name, + } # Note: this code is still used by V1 APIs. class NoisyLinearCosineDecay(LearningRateSchedule): - """A LearningRateSchedule that uses a noisy linear cosine decay schedule. - - See [Bello et al., ICML2017] Neural Optimizer Search with RL. - https://arxiv.org/abs/1709.07417 - - For the idea of warm starts here controlled by `num_periods`, - see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent - with Warm Restarts. https://arxiv.org/abs/1608.03983 - - Note that linear cosine decay is more aggressive than cosine decay and - larger initial learning rates can typically be used. - - When training a model, it is often recommended to lower the learning rate as - the training progresses. This schedule applies a noisy linear cosine decay - function to an optimizer step, given a provided initial learning rate. - It requires a `step` value to compute the decayed learning rate. You can - just pass a TensorFlow variable that you increment at each training step. - - The schedule is a 1-arg callable that produces a decayed learning - rate when passed the current optimizer step. This can be useful for changing - the learning rate value across different invocations of optimizer functions. - It is computed as: - - ```python - def decayed_learning_rate(step): - step = min(step, decay_steps) - linear_decay = (decay_steps - step) / decay_steps) - cosine_decay = 0.5 * ( - 1 + cos(pi * 2 * num_periods * step / decay_steps)) - decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta - return initial_learning_rate * decayed - ``` - where eps_t is 0-centered gaussian noise with variance - initial_variance / (1 + global_step) ** variance_decay - - Example usage: - ```python - decay_steps = 1000 - lr_decayed_fn = ( - tf.keras.experimental.NoisyLinearCosineDecay( - initial_learning_rate, decay_steps)) - ``` - - You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` - as the learning rate. The learning rate schedule is also serializable and - deserializable using `tf.keras.optimizers.schedules.serialize` and - `tf.keras.optimizers.schedules.deserialize`. - - Returns: - A 1-arg callable learning rate schedule that takes the current optimizer - step and outputs the decayed learning rate, a scalar `Tensor` of the same - type as `initial_learning_rate`. - """ - - def __init__( - self, - initial_learning_rate, - decay_steps, - initial_variance=1.0, - variance_decay=0.55, - num_periods=0.5, - alpha=0.0, - beta=0.001, - seed=None, - name=None): - """Applies noisy linear cosine decay to the learning rate. + """A LearningRateSchedule that uses a noisy linear cosine decay schedule. + + See [Bello et al., ICML2017] Neural Optimizer Search with RL. + https://arxiv.org/abs/1709.07417 + + For the idea of warm starts here controlled by `num_periods`, + see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent + with Warm Restarts. https://arxiv.org/abs/1608.03983 + + Note that linear cosine decay is more aggressive than cosine decay and + larger initial learning rates can typically be used. + + When training a model, it is often recommended to lower the learning rate as + the training progresses. This schedule applies a noisy linear cosine decay + function to an optimizer step, given a provided initial learning rate. + It requires a `step` value to compute the decayed learning rate. You can + just pass a TensorFlow variable that you increment at each training step. + + The schedule is a 1-arg callable that produces a decayed learning + rate when passed the current optimizer step. This can be useful for changing + the learning rate value across different invocations of optimizer functions. + It is computed as: + + ```python + def decayed_learning_rate(step): + step = min(step, decay_steps) + linear_decay = (decay_steps - step) / decay_steps) + cosine_decay = 0.5 * ( + 1 + cos(pi * 2 * num_periods * step / decay_steps)) + decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta + return initial_learning_rate * decayed + ``` + where eps_t is 0-centered gaussian noise with variance + initial_variance / (1 + global_step) ** variance_decay + + Example usage: + ```python + decay_steps = 1000 + lr_decayed_fn = ( + tf.keras.experimental.NoisyLinearCosineDecay( + initial_learning_rate, decay_steps)) + ``` + + You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` + as the learning rate. The learning rate schedule is also serializable and + deserializable using `tf.keras.optimizers.schedules.serialize` and + `tf.keras.optimizers.schedules.deserialize`. - Args: - initial_learning_rate: A scalar `float32` or `float64` Tensor or a Python - number. The initial learning rate. - decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. - Number of steps to decay over. - initial_variance: initial variance for the noise. See computation above. - variance_decay: decay for the noise's variance. See computation above. - num_periods: Number of periods in the cosine part of the decay. - See computation above. - alpha: See computation above. - beta: See computation above. - seed: Integer, optional random seed to enable deterministic behavior. - name: String. Optional name of the operation. Defaults to - 'NoisyLinearCosineDecay'. + Returns: + A 1-arg callable learning rate schedule that takes the current optimizer + step and outputs the decayed learning rate, a scalar `Tensor` of the same + type as `initial_learning_rate`. """ - super().__init__() - - self.initial_learning_rate = initial_learning_rate - self.decay_steps = decay_steps - self.initial_variance = initial_variance - self.variance_decay = variance_decay - self.num_periods = num_periods - self.alpha = alpha - self.beta = beta - self.seed = seed - self.name = name - self._random_generator = backend.RandomGenerator(seed) - - def __call__(self, step): - with tf.name_scope(self.name or "NoisyLinearCosineDecay") as name: - initial_learning_rate = tf.convert_to_tensor( - self.initial_learning_rate, name="initial_learning_rate") - dtype = initial_learning_rate.dtype - decay_steps = tf.cast(self.decay_steps, dtype) - initial_variance = tf.cast(self.initial_variance, dtype) - variance_decay = tf.cast(self.variance_decay, dtype) - num_periods = tf.cast(self.num_periods, dtype) - alpha = tf.cast(self.alpha, dtype) - beta = tf.cast(self.beta, dtype) - - global_step_recomp = tf.cast(step, dtype) - global_step_recomp = tf.minimum(global_step_recomp, decay_steps) - linear_decayed = (decay_steps - global_step_recomp) / decay_steps - variance = initial_variance / ( - tf.pow(1.0 + global_step_recomp, variance_decay)) - std = tf.sqrt(variance) - noisy_linear_decayed = ( - linear_decayed + self._random_generator.random_normal( - linear_decayed.shape, stddev=std)) - - completed_fraction = global_step_recomp / decay_steps - fraction = 2.0 * num_periods * completed_fraction - cosine_decayed = 0.5 * ( - 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction)) - noisy_linear_cosine_decayed = ( - (alpha + noisy_linear_decayed) * cosine_decayed + beta) - - return tf.multiply( - initial_learning_rate, noisy_linear_cosine_decayed, name=name) - - def get_config(self): - return { - "initial_learning_rate": self.initial_learning_rate, - "decay_steps": self.decay_steps, - "initial_variance": self.initial_variance, - "variance_decay": self.variance_decay, - "num_periods": self.num_periods, - "alpha": self.alpha, - "beta": self.beta, - "seed": self.seed, - "name": self.name, - } + + def __init__( + self, + initial_learning_rate, + decay_steps, + initial_variance=1.0, + variance_decay=0.55, + num_periods=0.5, + alpha=0.0, + beta=0.001, + seed=None, + name=None, + ): + """Applies noisy linear cosine decay to the learning rate. + + Args: + initial_learning_rate: A scalar `float32` or `float64` Tensor or a + Python number. The initial learning rate. + decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. + Number of steps to decay over. + initial_variance: initial variance for the noise. See computation + above. + variance_decay: decay for the noise's variance. See computation above. + num_periods: Number of periods in the cosine part of the decay. + See computation above. + alpha: See computation above. + beta: See computation above. + seed: Integer, optional random seed to enable deterministic behavior. + name: String. Optional name of the operation. Defaults to + 'NoisyLinearCosineDecay'. + """ + super().__init__() + + self.initial_learning_rate = initial_learning_rate + self.decay_steps = decay_steps + self.initial_variance = initial_variance + self.variance_decay = variance_decay + self.num_periods = num_periods + self.alpha = alpha + self.beta = beta + self.seed = seed + self.name = name + self._random_generator = backend.RandomGenerator(seed) + + def __call__(self, step): + with tf.name_scope(self.name or "NoisyLinearCosineDecay") as name: + initial_learning_rate = tf.convert_to_tensor( + self.initial_learning_rate, name="initial_learning_rate" + ) + dtype = initial_learning_rate.dtype + decay_steps = tf.cast(self.decay_steps, dtype) + initial_variance = tf.cast(self.initial_variance, dtype) + variance_decay = tf.cast(self.variance_decay, dtype) + num_periods = tf.cast(self.num_periods, dtype) + alpha = tf.cast(self.alpha, dtype) + beta = tf.cast(self.beta, dtype) + + global_step_recomp = tf.cast(step, dtype) + global_step_recomp = tf.minimum(global_step_recomp, decay_steps) + linear_decayed = (decay_steps - global_step_recomp) / decay_steps + variance = initial_variance / ( + tf.pow(1.0 + global_step_recomp, variance_decay) + ) + std = tf.sqrt(variance) + noisy_linear_decayed = ( + linear_decayed + + self._random_generator.random_normal( + linear_decayed.shape, stddev=std + ) + ) + + completed_fraction = global_step_recomp / decay_steps + fraction = 2.0 * num_periods * completed_fraction + cosine_decayed = 0.5 * ( + 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction) + ) + noisy_linear_cosine_decayed = ( + alpha + noisy_linear_decayed + ) * cosine_decayed + beta + + return tf.multiply( + initial_learning_rate, noisy_linear_cosine_decayed, name=name + ) + + def get_config(self): + return { + "initial_learning_rate": self.initial_learning_rate, + "decay_steps": self.decay_steps, + "initial_variance": self.initial_variance, + "variance_decay": self.variance_decay, + "num_periods": self.num_periods, + "alpha": self.alpha, + "beta": self.beta, + "seed": self.seed, + "name": self.name, + } @keras_export("keras.optimizers.schedules.serialize") -def serialize(learning_rate_schedule): - """Serializes a `LearningRateSchedule` into a JSON-compatible representation. +def serialize(learning_rate_schedule, use_legacy_format=False): + """Serializes a `LearningRateSchedule` into a JSON-compatible dict. - Args: - learning_rate_schedule: The `LearningRateSchedule` object to serialize. + Args: + learning_rate_schedule: The `LearningRateSchedule` object to serialize. - Returns: - A JSON-serializable dict representing the object's config. + Returns: + A JSON-serializable dict representing the object's config. - Example: + Example: - >>> lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( - ... 0.1, decay_steps=100000, decay_rate=0.96, staircase=True) - >>> tf.keras.optimizers.schedules.serialize(lr_schedule) - {'class_name': 'ExponentialDecay', 'config': {...}} - """ - return generic_utils.serialize_keras_object(learning_rate_schedule) + >>> lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( + ... 0.1, decay_steps=100000, decay_rate=0.96, staircase=True) + >>> tf.keras.optimizers.schedules.serialize(lr_schedule) + {'module': 'keras.optimizers.schedules', + 'class_name': 'ExponentialDecay', 'config': {...}, + 'registered_name': None} + """ + if use_legacy_format: + return legacy_serialization.serialize_keras_object( + learning_rate_schedule + ) + + return serialization_lib.serialize_keras_object(learning_rate_schedule) @keras_export("keras.optimizers.schedules.deserialize") -def deserialize(config, custom_objects=None): - """Instantiates a `LearningRateSchedule` object from a serialized form. - - Args: - config: The serialized form of the `LearningRateSchedule`. - Dictionary of the form {'class_name': str, 'config': dict}. - custom_objects: A dictionary mapping class names (or function names) of - custom (non-Keras) objects to class/functions. - - Returns: - A `LearningRateSchedule` object. - - Example: - - ```python - # Configuration for PolynomialDecay - config = { - 'class_name': 'PolynomialDecay', - 'config': {'cycle': False, - 'decay_steps': 10000, - 'end_learning_rate': 0.01, - 'initial_learning_rate': 0.1, - 'name': None, - 'power': 0.5}} - lr_schedule = tf.keras.optimizers.schedules.deserialize(config) - ``` - """ - return generic_utils.deserialize_keras_object( - config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name="decay") +def deserialize(config, custom_objects=None, use_legacy_format=False): + """Instantiates a `LearningRateSchedule` object from a serialized form. + + Args: + config: The serialized form of the `LearningRateSchedule`. + Dictionary of the form {'class_name': str, 'config': dict}. + custom_objects: A dictionary mapping class names (or function names) of + custom (non-Keras) objects to class/functions. + + Returns: + A `LearningRateSchedule` object. + + Example: + + ```python + # Configuration for PolynomialDecay + config = { + 'class_name': 'PolynomialDecay', + 'config': {'cycle': False, + 'decay_steps': 10000, + 'end_learning_rate': 0.01, + 'initial_learning_rate': 0.1, + 'name': None, + 'power': 0.5}} + lr_schedule = tf.keras.optimizers.schedules.deserialize(config) + ``` + """ + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="decay", + ) + + return serialization_lib.deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="decay", + ) diff --git a/keras/optimizers/schedules/learning_rate_schedule_test.py b/keras/optimizers/schedules/learning_rate_schedule_test.py index 4239da5894b4..e78709d9089a 100644 --- a/keras/optimizers/schedules/learning_rate_schedule_test.py +++ b/keras/optimizers/schedules/learning_rate_schedule_test.py @@ -16,433 +16,501 @@ import math +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent from keras.optimizers.schedules import learning_rate_schedule from keras.testing_infra import test_combinations -import numpy as np - -import tensorflow.compat.v2 as tf def _maybe_serialized(lr_decay, serialize_and_deserialize): - if serialize_and_deserialize: - serialized = learning_rate_schedule.serialize(lr_decay) - return learning_rate_schedule.deserialize(serialized) - else: - return lr_decay + if serialize_and_deserialize: + serialized = learning_rate_schedule.serialize(lr_decay) + return learning_rate_schedule.deserialize(serialized) + else: + return lr_decay -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) class LRDecayTestV2(tf.test.TestCase, parameterized.TestCase): - - def testContinuous(self, serialize): - self.evaluate(tf.compat.v1.global_variables_initializer()) - step = 5 - decayed_lr = learning_rate_schedule.ExponentialDecay(0.05, 10, 0.96) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = .05 * 0.96**(5.0 / 10.0) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testStaircase(self, serialize): - if tf.executing_eagerly(): - step = tf.Variable(0) - self.evaluate(tf.compat.v1.global_variables_initializer()) - decayed_lr = learning_rate_schedule.ExponentialDecay( - .1, 3, 0.96, staircase=True) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - - # No change to learning rate due to staircase - expected = .1 - self.evaluate(step.assign(1)) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - expected = .1 - self.evaluate(step.assign(2)) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - # Decayed learning rate - expected = .1 * 0.96 ** (100 // 3) - self.evaluate(step.assign(100)) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testVariables(self, serialize): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with tf.Graph().as_default(): - step = tf.Variable(1) - assign_1 = step.assign(1) - assign_2 = step.assign(2) - assign_100 = step.assign(100) - decayed_lr = learning_rate_schedule.ExponentialDecay( - .1, 3, 0.96, staircase=True) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - # No change to learning rate - self.evaluate(assign_1.op) - self.assertAllClose(self.evaluate(decayed_lr(step)), .1, 1e-6) - self.evaluate(assign_2.op) - self.assertAllClose(self.evaluate(decayed_lr(step)), .1, 1e-6) - # Decayed learning rate - self.evaluate(assign_100.op) - expected = .1 * 0.96**(100 // 3) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testPiecewiseConstant(self, serialize): - x = tf.Variable(-999) - decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( - [100, 110, 120], [1.0, 0.1, 0.01, 0.001]) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - - self.assertAllClose(self.evaluate(decayed_lr(x)), 1.0, 1e-6) - self.evaluate(x.assign(100)) - self.assertAllClose(self.evaluate(decayed_lr(x)), 1.0, 1e-6) - self.evaluate(x.assign(105)) - self.assertAllClose(self.evaluate(decayed_lr(x)), 0.1, 1e-6) - self.evaluate(x.assign(110)) - self.assertAllClose(self.evaluate(decayed_lr(x)), 0.1, 1e-6) - self.evaluate(x.assign(120)) - self.assertAllClose(self.evaluate(decayed_lr(x)), 0.01, 1e-6) - self.evaluate(x.assign(999)) - self.assertAllClose(self.evaluate(decayed_lr(x)), 0.001, 1e-6) - - def testPiecewiseFunction(self, serialize): - if not tf.executing_eagerly(): - self.skipTest("Run on eager mode only.") - - del serialize - v = tf.Variable(1.) - def loss_fn(): - return v * v - learning_rate = learning_rate_schedule.PiecewiseConstantDecay( - [1.], [1., 0.1]) - opt = gradient_descent.SGD(learning_rate=learning_rate) - - @tf.function - def minimize(): - with tf.GradientTape() as tape: - loss = loss_fn() - g = tape.gradient(loss, [v]) - opt.apply_gradients(list(zip(g, [v]))) - - minimize() - self.assertAllEqual(v.read_value(), -1.0) - - def testPiecewiseConstantEdgeCases(self, serialize): - # Test casting boundaries from int32 to int64. - x_int64 = tf.Variable(0, dtype=tf.int64) - boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7] - decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( - boundaries, values) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.4, 1e-6) - self.evaluate(x_int64.assign(1)) - self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.4, 1e-6) - self.evaluate(x_int64.assign(2)) - self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.5, 1e-6) - self.evaluate(x_int64.assign(3)) - self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.6, 1e-6) - self.evaluate(x_int64.assign(4)) - self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.7, 1e-6) + def testContinuous(self, serialize): + self.evaluate(tf.compat.v1.global_variables_initializer()) + step = 5 + decayed_lr = learning_rate_schedule.ExponentialDecay(0.05, 10, 0.96) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = 0.05 * 0.96 ** (5.0 / 10.0) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testStaircase(self, serialize): + if tf.executing_eagerly(): + step = tf.Variable(0) + self.evaluate(tf.compat.v1.global_variables_initializer()) + decayed_lr = learning_rate_schedule.ExponentialDecay( + 0.1, 3, 0.96, staircase=True + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + + # No change to learning rate due to staircase + expected = 0.1 + self.evaluate(step.assign(1)) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + expected = 0.1 + self.evaluate(step.assign(2)) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + # Decayed learning rate + expected = 0.1 * 0.96 ** (100 // 3) + self.evaluate(step.assign(100)) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testVariables(self, serialize): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with tf.Graph().as_default(): + step = tf.Variable(1) + assign_1 = step.assign(1) + assign_2 = step.assign(2) + assign_100 = step.assign(100) + decayed_lr = learning_rate_schedule.ExponentialDecay( + 0.1, 3, 0.96, staircase=True + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + # No change to learning rate + self.evaluate(assign_1.op) + self.assertAllClose(self.evaluate(decayed_lr(step)), 0.1, 1e-6) + self.evaluate(assign_2.op) + self.assertAllClose(self.evaluate(decayed_lr(step)), 0.1, 1e-6) + # Decayed learning rate + self.evaluate(assign_100.op) + expected = 0.1 * 0.96 ** (100 // 3) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testPiecewiseConstant(self, serialize): + x = tf.Variable(-999) + decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( + [100, 110, 120], [1.0, 0.1, 0.01, 0.001] + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + + self.assertAllClose(self.evaluate(decayed_lr(x)), 1.0, 1e-6) + self.evaluate(x.assign(100)) + self.assertAllClose(self.evaluate(decayed_lr(x)), 1.0, 1e-6) + self.evaluate(x.assign(105)) + self.assertAllClose(self.evaluate(decayed_lr(x)), 0.1, 1e-6) + self.evaluate(x.assign(110)) + self.assertAllClose(self.evaluate(decayed_lr(x)), 0.1, 1e-6) + self.evaluate(x.assign(120)) + self.assertAllClose(self.evaluate(decayed_lr(x)), 0.01, 1e-6) + self.evaluate(x.assign(999)) + self.assertAllClose(self.evaluate(decayed_lr(x)), 0.001, 1e-6) + + def testPiecewiseFunction(self, serialize): + if not tf.executing_eagerly(): + self.skipTest("Run on eager mode only.") + + del serialize + v = tf.Variable(1.0) + + def loss_fn(): + return v * v + + learning_rate = learning_rate_schedule.PiecewiseConstantDecay( + [1.0], [1.0, 0.1] + ) + opt = gradient_descent.SGD(learning_rate=learning_rate) + + @tf.function + def minimize(): + with tf.GradientTape() as tape: + loss = loss_fn() + g = tape.gradient(loss, [v]) + opt.apply_gradients(list(zip(g, [v]))) + + minimize() + self.assertAllEqual(v.read_value(), -1.0) + + def testPiecewiseConstantEdgeCases(self, serialize): + # Test casting boundaries from int32 to int64. + x_int64 = tf.Variable(0, dtype=tf.int64) + boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7] + decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( + boundaries, values + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.4, 1e-6) + self.evaluate(x_int64.assign(1)) + self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.4, 1e-6) + self.evaluate(x_int64.assign(2)) + self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.5, 1e-6) + self.evaluate(x_int64.assign(3)) + self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.6, 1e-6) + self.evaluate(x_int64.assign(4)) + self.assertAllClose(self.evaluate(decayed_lr(x_int64)), 0.7, 1e-6) # @parameterized.named_parameters( # ("NotSerialized", False), # ("Serialized", True)) -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) class LinearDecayTestV2(tf.test.TestCase, parameterized.TestCase): - - def testHalfWay(self, serialize): - step = 5 - lr = 0.05 - end_lr = 0.0 - decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = lr * 0.5 - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testEnd(self, serialize): - step = 10 - lr = 0.05 - end_lr = 0.001 - decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testHalfWayWithEnd(self, serialize): - step = 5 - lr = 0.05 - end_lr = 0.001 - decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = (lr + end_lr) * 0.5 - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testBeyondEnd(self, serialize): - step = 15 - lr = 0.05 - end_lr = 0.001 - decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testBeyondEndWithCycle(self, serialize): - step = 15 - lr = 0.05 - end_lr = 0.001 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, 10, end_lr, cycle=True) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = (lr - end_lr) * 0.25 + end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + def testHalfWay(self, serialize): + step = 5 + lr = 0.05 + end_lr = 0.0 + decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = lr * 0.5 + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testEnd(self, serialize): + step = 10 + lr = 0.05 + end_lr = 0.001 + decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testHalfWayWithEnd(self, serialize): + step = 5 + lr = 0.05 + end_lr = 0.001 + decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = (lr + end_lr) * 0.5 + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testBeyondEnd(self, serialize): + step = 15 + lr = 0.05 + end_lr = 0.001 + decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testBeyondEndWithCycle(self, serialize): + step = 15 + lr = 0.05 + end_lr = 0.001 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, 10, end_lr, cycle=True + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = (lr - end_lr) * 0.25 + end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) # @parameterized.named_parameters( # ("NotSerialized", False), # ("Serialized", True)) -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) -class SqrtDecayTestV2(tf.test.TestCase, - parameterized.TestCase): - - def testHalfWay(self, serialize): - step = 5 - lr = 0.05 - end_lr = 0.0 - power = 0.5 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, 10, end_lr, power=power) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = lr * 0.5**power - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testEnd(self, serialize): - step = 10 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, 10, end_lr, power=power) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testHalfWayWithEnd(self, serialize): - step = 5 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, 10, end_lr, power=power) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = (lr - end_lr) * 0.5**power + end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testBeyondEnd(self, serialize): - step = 15 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, 10, end_lr, power=power) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testBeyondEndWithCycle(self, serialize): - step = 15 - lr = 0.05 - end_lr = 0.001 - power = 0.5 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, 10, end_lr, power=power, cycle=True) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = (lr - end_lr) * 0.25**power + end_lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) +class SqrtDecayTestV2(tf.test.TestCase, parameterized.TestCase): + def testHalfWay(self, serialize): + step = 5 + lr = 0.05 + end_lr = 0.0 + power = 0.5 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, 10, end_lr, power=power + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = lr * 0.5**power + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testEnd(self, serialize): + step = 10 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, 10, end_lr, power=power + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testHalfWayWithEnd(self, serialize): + step = 5 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, 10, end_lr, power=power + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = (lr - end_lr) * 0.5**power + end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testBeyondEnd(self, serialize): + step = 15 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, 10, end_lr, power=power + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testBeyondEndWithCycle(self, serialize): + step = 15 + lr = 0.05 + end_lr = 0.001 + power = 0.5 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, 10, end_lr, power=power, cycle=True + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = (lr - end_lr) * 0.25**power + end_lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) # @parameterized.named_parameters( # ("NotSerialized", False), # ("Serialized", True)) -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) -class PolynomialDecayTestV2(tf.test.TestCase, - parameterized.TestCase): - - def testBeginWithCycle(self, serialize): - lr = 0.001 - decay_steps = 10 - step = 0 - decayed_lr = learning_rate_schedule.PolynomialDecay( - lr, decay_steps, cycle=True) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = lr - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) +class PolynomialDecayTestV2(tf.test.TestCase, parameterized.TestCase): + def testBeginWithCycle(self, serialize): + lr = 0.001 + decay_steps = 10 + step = 0 + decayed_lr = learning_rate_schedule.PolynomialDecay( + lr, decay_steps, cycle=True + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = lr + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) # @parameterized.named_parameters( # ("NotSerialized", False), # ("Serialized", True)) -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) class InverseDecayTestV2(tf.test.TestCase, parameterized.TestCase): - - def testDecay(self, serialize): - initial_lr = 0.1 - k = 10 - decay_rate = 0.96 - step = tf.Variable(0) - decayed_lr = learning_rate_schedule.InverseTimeDecay(initial_lr, k, - decay_rate) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - for i in range(k + 1): - expected = initial_lr / (1 + i / k * decay_rate) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - self.evaluate(step.assign_add(1)) - - def testStaircase(self, serialize): - initial_lr = 0.1 - k = 10 - decay_rate = 0.96 - step = tf.Variable(0) - decayed_lr = learning_rate_schedule.InverseTimeDecay( - initial_lr, k, decay_rate, staircase=True) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - - self.evaluate(tf.compat.v1.global_variables_initializer()) - for i in range(k + 1): - expected = initial_lr / (1 + decay_rate * (i // k)) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - self.evaluate(step.assign_add(1)) - - -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) + def testDecay(self, serialize): + initial_lr = 0.1 + k = 10 + decay_rate = 0.96 + step = tf.Variable(0) + decayed_lr = learning_rate_schedule.InverseTimeDecay( + initial_lr, k, decay_rate + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + for i in range(k + 1): + expected = initial_lr / (1 + i / k * decay_rate) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + self.evaluate(step.assign_add(1)) + + def testStaircase(self, serialize): + initial_lr = 0.1 + k = 10 + decay_rate = 0.96 + step = tf.Variable(0) + decayed_lr = learning_rate_schedule.InverseTimeDecay( + initial_lr, k, decay_rate, staircase=True + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + + self.evaluate(tf.compat.v1.global_variables_initializer()) + for i in range(k + 1): + expected = initial_lr / (1 + decay_rate * (i // k)) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + self.evaluate(step.assign_add(1)) + + +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) class CosineDecayTestV2(tf.test.TestCase, parameterized.TestCase): - - def np_cosine_decay(self, step, decay_steps, alpha=0.0): - step = min(step, decay_steps) - completed_fraction = step / decay_steps - decay = 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) - return (1.0 - alpha) * decay + alpha - - def testDecay(self, serialize): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecay(initial_lr, - num_training_steps) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testAlpha(self, serialize): - num_training_steps = 1000 - initial_lr = 1.0 - alpha = 0.1 - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecay(initial_lr, - num_training_steps, - alpha) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay(step, num_training_steps, alpha) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testFloat64InitLearningRate(self, serialize): - num_training_steps = 1000 - initial_lr = np.float64(1.0) - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecay(initial_lr, - num_training_steps) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - -@test_combinations.generate(test_combinations.combine(serialize=[False, True], - mode=["graph", "eager"])) -class CosineDecayRestartsTestV2(tf.test.TestCase, - parameterized.TestCase): - - def np_cosine_decay_restarts(self, step, decay_steps, t_mul=2.0, m_mul=1.0, - alpha=0.0): - fac = 1.0 - while step >= decay_steps: - step -= decay_steps - decay_steps *= t_mul - fac *= m_mul - - completed_fraction = step / decay_steps - decay = fac * 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) - return (1.0 - alpha) * decay + alpha - - def testDecay(self, serialize): - num_training_steps = 1000 - initial_lr = 1.0 - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecayRestarts( - initial_lr, num_training_steps) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay_restarts(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testFloat64InitLearningRate(self, serialize): - num_training_steps = 1000 - initial_lr = np.float64(1.0) - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecayRestarts( - initial_lr, num_training_steps) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay_restarts(step, num_training_steps) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testAlpha(self, serialize): - num_training_steps = 1000 - initial_lr = 1.0 - alpha = 0.1 - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecayRestarts( - initial_lr, num_training_steps, alpha=alpha) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay_restarts( - step, num_training_steps, alpha=alpha) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testMMul(self, serialize): - num_training_steps = 1000 - initial_lr = 1.0 - m_mul = 0.9 - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecayRestarts( - initial_lr, num_training_steps, m_mul=m_mul) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay_restarts( - step, num_training_steps, m_mul=m_mul) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) - - def testTMul(self, serialize): - num_training_steps = 1000 - initial_lr = 1.0 - t_mul = 1.0 - for step in range(0, 1500, 250): - decayed_lr = learning_rate_schedule.CosineDecayRestarts( - initial_lr, num_training_steps, t_mul=t_mul) - decayed_lr = _maybe_serialized(decayed_lr, serialize) - expected = self.np_cosine_decay_restarts( - step, num_training_steps, t_mul=t_mul) - self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + def np_cosine_decay(self, step, decay_steps, alpha=0.0): + step = min(step, decay_steps) + completed_fraction = step / decay_steps + decay = 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) + return (1.0 - alpha) * decay + alpha + + def testDecay(self, serialize): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecay( + initial_lr, num_training_steps + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def linear_warmup(self, step, warmup_steps, initial_lr, target_lr): + completed_fraction = step / warmup_steps + total_delta = target_lr - initial_lr + return completed_fraction * total_delta + + def testWarmup(self, serialize): + warmup_steps = 1500 + initial_lr = 0.0 + target_lr = 10.0 + for step in range(0, 1500, 250): + lr = learning_rate_schedule.CosineDecay( + initial_lr, + 0, + warmup_target=target_lr, + warmup_steps=warmup_steps, + ) + lr = _maybe_serialized(lr, serialize) + expected = self.linear_warmup( + step, warmup_steps, initial_lr, target_lr + ) + self.assertAllClose(self.evaluate(lr(step)), expected) + + def testAlpha(self, serialize): + num_training_steps = 1000 + initial_lr = 1.0 + alpha = 0.1 + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecay( + initial_lr, num_training_steps, alpha + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay(step, num_training_steps, alpha) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testFloat64InitLearningRate(self, serialize): + num_training_steps = 1000 + initial_lr = np.float64(1.0) + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecay( + initial_lr, num_training_steps + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testWarmupDecay(self, serialize): + warmup_steps = 2000 + decay_steps = 1000 + initial_lr = 0.0 + target_lr = 10.0 + for step in range(0, 3000, 250): + lr = learning_rate_schedule.CosineDecay( + initial_lr, + decay_steps, + warmup_target=target_lr, + warmup_steps=warmup_steps, + ) + lr = _maybe_serialized(lr, serialize) + if step < warmup_steps + 1: + expected = self.linear_warmup( + step, warmup_steps, initial_lr, target_lr + ) + else: + expected = target_lr * self.np_cosine_decay( + step - warmup_steps, decay_steps + ) + self.assertAllClose(self.evaluate(lr(step)), expected) + + +@test_combinations.generate( + test_combinations.combine(serialize=[False, True], mode=["graph", "eager"]) +) +class CosineDecayRestartsTestV2(tf.test.TestCase, parameterized.TestCase): + def np_cosine_decay_restarts( + self, step, decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0 + ): + fac = 1.0 + while step >= decay_steps: + step -= decay_steps + decay_steps *= t_mul + fac *= m_mul + + completed_fraction = step / decay_steps + decay = fac * 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) + return (1.0 - alpha) * decay + alpha + + def testDecay(self, serialize): + num_training_steps = 1000 + initial_lr = 1.0 + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecayRestarts( + initial_lr, num_training_steps + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay_restarts(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testFloat64InitLearningRate(self, serialize): + num_training_steps = 1000 + initial_lr = np.float64(1.0) + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecayRestarts( + initial_lr, num_training_steps + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay_restarts(step, num_training_steps) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testAlpha(self, serialize): + num_training_steps = 1000 + initial_lr = 1.0 + alpha = 0.1 + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecayRestarts( + initial_lr, num_training_steps, alpha=alpha + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay_restarts( + step, num_training_steps, alpha=alpha + ) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testMMul(self, serialize): + num_training_steps = 1000 + initial_lr = 1.0 + m_mul = 0.9 + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecayRestarts( + initial_lr, num_training_steps, m_mul=m_mul + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay_restarts( + step, num_training_steps, m_mul=m_mul + ) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) + + def testTMul(self, serialize): + num_training_steps = 1000 + initial_lr = 1.0 + t_mul = 1.0 + for step in range(0, 1500, 250): + decayed_lr = learning_rate_schedule.CosineDecayRestarts( + initial_lr, num_training_steps, t_mul=t_mul + ) + decayed_lr = _maybe_serialized(decayed_lr, serialize) + expected = self.np_cosine_decay_restarts( + step, num_training_steps, t_mul=t_mul + ) + self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/optimizers/sgd.py b/keras/optimizers/sgd.py new file mode 100644 index 000000000000..c6f83e1eefa4 --- /dev/null +++ b/keras/optimizers/sgd.py @@ -0,0 +1,207 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""SGD optimizer implementation.""" + +import tensorflow.compat.v2 as tf + +from keras.optimizers import optimizer +from keras.saving.object_registration import register_keras_serializable + +# isort: off +from tensorflow.python.util.tf_export import keras_export + + +@register_keras_serializable() +@keras_export( + "keras.optimizers.experimental.SGD", + "keras.optimizers.SGD", + "keras.dtensor.experimental.optimizers.SGD", + v1=[], +) +class SGD(optimizer.Optimizer): + r"""Gradient descent (with momentum) optimizer. + + Update rule for parameter `w` with gradient `g` when `momentum` is 0: + + ```python + w = w - learning_rate * g + ``` + + Update rule when `momentum` is larger than 0: + + ```python + velocity = momentum * velocity - learning_rate * g + w = w + velocity + ``` + + When `nesterov=True`, this rule becomes: + + ```python + velocity = momentum * velocity - learning_rate * g + w = w + momentum * velocity - learning_rate * g + ``` + + Args: + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `keras.optimizers.schedules.LearningRateSchedule`, or a callable + that takes no arguments and returns the actual value to use. The + learning rate. Defaults to 0.001. + momentum: float hyperparameter >= 0 that accelerates gradient descent in + the relevant direction and dampens oscillations. + Defaults to 0, i.e., vanilla gradient descent. + nesterov: boolean. Whether to apply Nesterov momentum. + Defaults to `False`. + {{base_optimizer_keyword_args}} + + Usage: + + >>> opt = tf.keras.optimizers.SGD(learning_rate=0.1) + >>> var = tf.Variable(1.0) + >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 + >>> opt.minimize(loss, [var]) + >>> # Step is `- learning_rate * grad` + >>> var.numpy() + 0.9 + + >>> opt = tf.keras.optimizers.SGD(0.1, momentum=0.9) + >>> var = tf.Variable(1.0) + >>> val0 = var.value() + >>> loss = lambda: (var ** 2)/2.0 # d(loss)/d(var1) = var1 + >>> # First step is `- learning_rate * grad` + >>> opt.minimize(loss, [var]) + >>> val1 = var.value() + >>> (val0 - val1).numpy() + 0.1 + >>> # On later steps, step-size increases because of momentum + >>> opt.minimize(loss, [var]) + >>> val2 = var.value() + >>> (val1 - val2).numpy() + 0.18 + + Reference: + - For `nesterov=True`, See [Sutskever et al., 2013]( + http://proceedings.mlr.press/v28/sutskever13.pdf). + """ + + def __init__( + self, + learning_rate=0.01, + momentum=0.0, + nesterov=False, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + jit_compile=True, + name="SGD", + **kwargs + ): + super().__init__( + name=name, + weight_decay=weight_decay, + clipnorm=clipnorm, + clipvalue=clipvalue, + global_clipnorm=global_clipnorm, + use_ema=use_ema, + ema_momentum=ema_momentum, + ema_overwrite_frequency=ema_overwrite_frequency, + jit_compile=jit_compile, + **kwargs + ) + self._learning_rate = self._build_learning_rate(learning_rate) + self.momentum = momentum + self.nesterov = nesterov + if isinstance(momentum, (int, float)) and ( + momentum < 0 or momentum > 1 + ): + raise ValueError("`momentum` must be between [0, 1].") + + def build(self, var_list): + """Initialize optimizer variables. + + SGD optimizer has one variable `momentums`, only set if `self.momentum` + is not 0. + + Args: + var_list: list of model variables to build SGD variables on. + """ + super().build(var_list) + if hasattr(self, "_built") and self._built: + return + self.momentums = [] + for var in var_list: + self.momentums.append( + self.add_variable_from_reference( + model_variable=var, variable_name="m" + ) + ) + self._built = True + + def update_step(self, gradient, variable): + """Update step given gradient and the associated model variable.""" + lr = tf.cast(self.learning_rate, variable.dtype) + m = None + var_key = self._var_key(variable) + momentum = tf.cast(self.momentum, variable.dtype) + m = self.momentums[self._index_dict[var_key]] + + # TODO(b/204321487): Add nesterov acceleration. + if isinstance(gradient, tf.IndexedSlices): + # Sparse gradients. + add_value = tf.IndexedSlices( + -gradient.values * lr, gradient.indices + ) + if m is not None: + m.assign(m * momentum) + m.scatter_add(add_value) + if self.nesterov: + variable.scatter_add(add_value) + variable.assign_add(m * momentum) + else: + variable.assign_add(m) + else: + variable.scatter_add(add_value) + else: + # Dense gradients + if m is not None: + m.assign(-gradient * lr + m * momentum) + if self.nesterov: + variable.assign_add(-gradient * lr + m * momentum) + else: + variable.assign_add(m) + else: + variable.assign_add(-gradient * lr) + + def get_config(self): + config = super().get_config() + + config.update( + { + "learning_rate": self._serialize_hyperparameter( + self._learning_rate + ), + "momentum": self.momentum, + "nesterov": self.nesterov, + } + ) + return config + + +SGD.__doc__ = SGD.__doc__.replace( + "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args +) diff --git a/keras/optimizers/utils.py b/keras/optimizers/utils.py new file mode 100644 index 000000000000..720ed64fd0a3 --- /dev/null +++ b/keras/optimizers/utils.py @@ -0,0 +1,177 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Optimizer utilities.""" + +import tensorflow.compat.v2 as tf + +# isort: off +from tensorflow.python.platform import tf_logging as logging + + +def all_reduce_sum_gradients(grads_and_vars): + """Returns all-reduced gradients aggregated via summation. + + Args: + grads_and_vars: List of (gradient, variable) pairs. + + Returns: + List of (gradient, variable) pairs where gradients have been all-reduced. + """ + grads_and_vars = list(grads_and_vars) + filtered_grads_and_vars = filter_empty_gradients(grads_and_vars) + if filtered_grads_and_vars: + if tf.__internal__.distribute.strategy_supports_no_merge_call(): + grads = [pair[0] for pair in filtered_grads_and_vars] + reduced = tf.distribute.get_replica_context().all_reduce( + tf.distribute.ReduceOp.SUM, grads + ) + else: + # TODO(b/183257003): Remove this branch + reduced = tf.distribute.get_replica_context().merge_call( + _all_reduce_sum_fn, args=(filtered_grads_and_vars,) + ) + else: + reduced = [] + # Copy 'reduced' but add None gradients back in + reduced_with_nones = [] + reduced_pos = 0 + for g, v in grads_and_vars: + if g is None: + reduced_with_nones.append((None, v)) + else: + reduced_with_nones.append((reduced[reduced_pos], v)) + reduced_pos += 1 + assert reduced_pos == len(reduced), "Failed to add all gradients" + return reduced_with_nones + + +def filter_empty_gradients(grads_and_vars): + """Filter out `(grad, var)` pairs that have a gradient equal to `None`.""" + grads_and_vars = tuple(grads_and_vars) + if not grads_and_vars: + return grads_and_vars + + filtered = [] + vars_with_empty_grads = [] + for grad, var in grads_and_vars: + if grad is None: + vars_with_empty_grads.append(var) + else: + filtered.append((grad, var)) + filtered = tuple(filtered) + + if not filtered: + variable = ([v.name for _, v in grads_and_vars],) + raise ValueError( + f"No gradients provided for any variable: {variable}. " + f"Provided `grads_and_vars` is {grads_and_vars}." + ) + if vars_with_empty_grads: + logging.warning( + "Gradients do not exist for variables %s when minimizing the " + "loss. If you're using `model.compile()`, did you forget to " + "provide a `loss` argument?", + ([v.name for v in vars_with_empty_grads]), + ) + return filtered + + +def make_gradient_clipnorm_fn(clipnorm): + """Creates a gradient transformation function for clipping by norm.""" + if clipnorm is None: + return lambda grads_and_vars: grads_and_vars + + def gradient_clipnorm_fn(grads_and_vars): + + if isinstance( + tf.distribute.get_strategy(), + ( + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + raise ValueError( + "`clipnorm` is not supported with `CenteralStorageStrategy`. " + f"The strategy used is {tf.distribute.get_strategy()}." + ) + + clipped_grads_and_vars = [ + (tf.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars + ] + return clipped_grads_and_vars + + return gradient_clipnorm_fn + + +def make_global_gradient_clipnorm_fn(clipnorm): + """Creates a gradient transformation function for clipping by norm.""" + if clipnorm is None: + return lambda grads_and_vars: grads_and_vars + + def gradient_clipnorm_fn(grads_and_vars): + + if isinstance( + tf.distribute.get_strategy(), + ( + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + raise ValueError( + "`global_clipnorm` is not supported with " + "`CenteralStorageStrategy`. " + f"The strategy used is {tf.distribute.get_strategy()}." + ) + + grads, variables = zip(*grads_and_vars) + clipped_grads, _ = tf.clip_by_global_norm(grads, clipnorm) + clipped_grads_and_vars = list(zip(clipped_grads, variables)) + return clipped_grads_and_vars + + return gradient_clipnorm_fn + + +def make_gradient_clipvalue_fn(clipvalue): + """Creates a gradient transformation function for clipping by value.""" + if clipvalue is None: + return lambda grads_and_vars: grads_and_vars + + def gradient_clipvalue_fn(grads_and_vars): + + if isinstance( + tf.distribute.get_strategy(), + ( + tf.distribute.experimental.CentralStorageStrategy, + tf.compat.v1.distribute.experimental.CentralStorageStrategy, + ), + ): + raise ValueError( + "`clipvalue` is not supported with `CenteralStorageStrategy`. " + f"The strategy used is {tf.distribute.get_strategy()}." + ) + + clipped_grads_and_vars = [ + (tf.clip_by_value(g, -clipvalue, clipvalue), v) + for g, v in grads_and_vars + ] + return clipped_grads_and_vars + + return gradient_clipvalue_fn + + +def _all_reduce_sum_fn(distribution, grads_and_vars): + return distribution.extended.batch_reduce_to( + tf.distribute.ReduceOp.SUM, grads_and_vars + ) diff --git a/keras/premade_models/BUILD b/keras/premade_models/BUILD index 00286775da63..3441331df273 100644 --- a/keras/premade_models/BUILD +++ b/keras/premade_models/BUILD @@ -1,8 +1,11 @@ +# Placeholder: load unaliased py_library + # Description: # Contains the Keras Premade Models (internal TensorFlow version). load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], diff --git a/keras/premade_models/linear.py b/keras/premade_models/linear.py index a2518bf0d08c..e24236166955 100644 --- a/keras/premade_models/linear.py +++ b/keras/premade_models/linear.py @@ -15,6 +15,7 @@ """Built-in linear model classes.""" import tensorflow.compat.v2 as tf + from keras import activations from keras import initializers from keras import regularizers @@ -22,179 +23,196 @@ from keras.engine import input_spec from keras.engine import training from keras.layers import core + +# isort: off from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import keras_export @keras_export( - 'keras.experimental.LinearModel', - v1=['keras.experimental.LinearModel', 'keras.models.LinearModel']) -@deprecation.deprecated_endpoints('keras.experimental.LinearModel') + "keras.experimental.LinearModel", + v1=["keras.experimental.LinearModel", "keras.models.LinearModel"], +) +@deprecation.deprecated_endpoints("keras.experimental.LinearModel") class LinearModel(training.Model): - r"""Linear Model for regression and classification problems. - - This model approximates the following function: - $$y = \beta + \sum_{i=1}^{N} w_{i} * x_{i}$$ - where $$\beta$$ is the bias and $$w_{i}$$ is the weight for each feature. - - Example: - - ```python - model = LinearModel() - model.compile(optimizer='sgd', loss='mse') - model.fit(x, y, epochs=epochs) - ``` - - This model accepts sparse float inputs as well: - - Example: - ```python - model = LinearModel() - opt = tf.keras.optimizers.Adam() - loss_fn = tf.keras.losses.MeanSquaredError() - with tf.GradientTape() as tape: - output = model(sparse_input) - loss = tf.reduce_mean(loss_fn(target, output)) - grads = tape.gradient(loss, model.weights) - opt.apply_gradients(zip(grads, model.weights)) - ``` - - """ - - def __init__(self, - units=1, - activation=None, - use_bias=True, - kernel_initializer='zeros', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - **kwargs): - """Create a Linear Model. - - Args: - units: Positive integer, output dimension without the batch size. - activation: Activation function to use. - If you don't specify anything, no activation is applied. - use_bias: whether to calculate the bias/intercept for this model. If set - to False, no bias/intercept will be used in calculations, e.g., the data - is already centered. - kernel_initializer: Initializer for the `kernel` weights matrices. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: regularizer for kernel vectors. - bias_regularizer: regularizer for bias vector. - **kwargs: The keyword arguments that are passed on to BaseLayer.__init__. + r"""Linear Model for regression and classification problems. + + This model approximates the following function: + $$y = \beta + \sum_{i=1}^{N} w_{i} * x_{i}$$ + where $$\beta$$ is the bias and $$w_{i}$$ is the weight for each feature. + + Example: + + ```python + model = LinearModel() + model.compile(optimizer='sgd', loss='mse') + model.fit(x, y, epochs=epochs) + ``` + + This model accepts sparse float inputs as well: + + Example: + ```python + model = LinearModel() + opt = tf.keras.optimizers.Adam() + loss_fn = tf.keras.losses.MeanSquaredError() + with tf.GradientTape() as tape: + output = model(sparse_input) + loss = tf.reduce_mean(loss_fn(target, output)) + grads = tape.gradient(loss, model.weights) + opt.apply_gradients(zip(grads, model.weights)) + ``` + """ - self.units = units - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - super().__init__(**kwargs) - base_layer.keras_premade_model_gauge.get_cell('Linear').set(True) - - def build(self, input_shape): - if isinstance(input_shape, dict): - names = sorted(list(input_shape.keys())) - self.input_specs = [] - self.dense_layers = [] - for name in names: - shape = input_shape[name] - layer = core.Dense( - units=self.units, - use_bias=False, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - name=name) - layer.build(shape) - self.input_specs.append( - input_spec.InputSpec(shape=shape, name=name)) - self.dense_layers.append(layer) - elif isinstance(input_shape, (tuple, list)) and all( - isinstance(shape, tf.TensorShape) for shape in input_shape): - self.dense_layers = [] - for shape in input_shape: - layer = core.Dense( - units=self.units, - use_bias=False, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer) - layer.build(shape) - self.dense_layers.append(layer) - else: - # input_shape can be a single TensorShape or a tuple of ints. - layer = core.Dense( - units=self.units, - use_bias=False, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer) - layer.build(input_shape) - self.dense_layers = [layer] - - if self.use_bias: - self.bias = self.add_weight( - 'bias', - shape=self.units, - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - dtype=self.dtype, - trainable=True) - else: - self.bias = None - self.built = True - - def call(self, inputs): - result = None - if isinstance(inputs, dict): - names = [layer.name for layer in self.dense_layers] - different_keys = set(names) - set(inputs.keys()) - if different_keys: - raise ValueError( - 'The `inputs` dictionary does not match ' - 'the structure expected by the model.' - f'\n\tExpected keys: {set(names)}' - f'\n\tReceived keys: {set(inputs.keys())}' - f'\n\tMissing keys: {different_keys}') - inputs = [inputs[name] for name in names] - for inp, layer in zip(inputs, self.dense_layers): - output = layer(inp) - if result is None: - result = output + def __init__( + self, + units=1, + activation=None, + use_bias=True, + kernel_initializer="zeros", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + **kwargs, + ): + """Create a Linear Model. + + Args: + units: Positive integer, output dimension without the batch size. + activation: Activation function to use. + If you don't specify anything, no activation is applied. + use_bias: whether to calculate the bias/intercept for this model. If + set to False, no bias/intercept will be used in calculations, e.g., + the data is already centered. + kernel_initializer: Initializer for the `kernel` weights matrices. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: regularizer for kernel vectors. + bias_regularizer: regularizer for bias vector. + **kwargs: The keyword arguments that are passed on to + BaseLayer.__init__. + """ + + self.units = units + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + super().__init__(**kwargs) + base_layer.keras_premade_model_gauge.get_cell("Linear").set(True) + + def build(self, input_shape): + if isinstance(input_shape, dict): + names = sorted(list(input_shape.keys())) + self.input_specs = [] + self.dense_layers = [] + for name in names: + shape = input_shape[name] + layer = core.Dense( + units=self.units, + use_bias=False, + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer, + name=name, + ) + layer.build(shape) + self.input_specs.append( + input_spec.InputSpec(shape=shape, name=name) + ) + self.dense_layers.append(layer) + elif isinstance(input_shape, (tuple, list)) and all( + isinstance(shape, tf.TensorShape) for shape in input_shape + ): + self.dense_layers = [] + for shape in input_shape: + layer = core.Dense( + units=self.units, + use_bias=False, + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer, + ) + layer.build(shape) + self.dense_layers.append(layer) + else: + # input_shape can be a single TensorShape or a tuple of ints. + layer = core.Dense( + units=self.units, + use_bias=False, + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer, + ) + layer.build(input_shape) + self.dense_layers = [layer] + + if self.use_bias: + self.bias = self.add_weight( + "bias", + shape=self.units, + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + dtype=self.dtype, + trainable=True, + ) else: - result += output - elif isinstance(inputs, (tuple, list)): - for inp, layer in zip(inputs, self.dense_layers): - output = layer(inp) - if result is None: - result = output + self.bias = None + self.built = True + + def call(self, inputs): + result = None + if isinstance(inputs, dict): + names = [layer.name for layer in self.dense_layers] + different_keys = set(names) - set(inputs.keys()) + if different_keys: + raise ValueError( + "The `inputs` dictionary does not match " + "the structure expected by the model." + f"\n\tExpected keys: {set(names)}" + f"\n\tReceived keys: {set(inputs.keys())}" + f"\n\tMissing keys: {different_keys}" + ) + inputs = [inputs[name] for name in names] + for inp, layer in zip(inputs, self.dense_layers): + output = layer(inp) + if result is None: + result = output + else: + result += output + elif isinstance(inputs, (tuple, list)): + for inp, layer in zip(inputs, self.dense_layers): + output = layer(inp) + if result is None: + result = output + else: + result += output else: - result += output - else: - result = self.dense_layers[0](inputs) - - if self.use_bias: - result = tf.nn.bias_add(result, self.bias) - if self.activation is not None: - return self.activation(result) # pylint: disable=not-callable - return result - - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - } - base_config = base_layer.Layer.get_config(self) - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - del custom_objects - return cls(**config) + result = self.dense_layers[0](inputs) + + if self.use_bias: + result = tf.nn.bias_add(result, self.bias) + if self.activation is not None: + return self.activation(result) + return result + + def get_config(self): + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + } + base_config = base_layer.Layer.get_config(self) + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + del custom_objects + return cls(**config) diff --git a/keras/premade_models/linear_test.py b/keras/premade_models/linear_test.py index c31dda2e40b5..9d7d83b76b2a 100644 --- a/keras/premade_models/linear_test.py +++ b/keras/premade_models/linear_test.py @@ -14,160 +14,164 @@ # ============================================================================== """Tests for Keras Premade Linear models.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras import backend -from keras.testing_infra import test_combinations from keras import losses from keras.engine import input_layer from keras.engine import sequential from keras.engine import training from keras.feature_column import dense_features_v2 from keras.layers import core -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent from keras.premade_models import linear +from keras.testing_infra import test_combinations @test_combinations.run_all_keras_modes(always_skip_v1=True) class LinearModelTest(test_combinations.TestCase): - - def test_linear_model_with_single_input(self): - model = linear.LinearModel() - inp = np.random.uniform(low=-5., high=5., size=(64, 2)) - output = .3 * inp[:, 0] + .2 * inp[:, 1] - model.compile('sgd', 'mse', []) - model.fit(inp, output, epochs=5) - self.assertTrue(model.built) - - def test_linear_model_with_list_input(self): - model = linear.LinearModel() - input_a = np.random.uniform(low=-5., high=5., size=(64, 1)) - input_b = np.random.uniform(low=-5., high=5., size=(64, 1)) - output = .3 * input_a + .2 * input_b - model.compile('sgd', 'mse', []) - model.fit([input_a, input_b], output, epochs=5) - - def test_linear_model_with_mismatched_dict_inputs(self): - model = linear.LinearModel() - input_a = np.random.uniform(low=-5., high=5., size=(64, 1)) - input_b = np.random.uniform(low=-5., high=5., size=(64, 1)) - output = .3 * input_a + .2 * input_b - model.compile('sgd', 'mse', []) - model.build({'a': tf.TensorShape([None, 1]), - 'b': tf.TensorShape([None, 1])}) - with self.assertRaisesRegex(ValueError, 'Missing keys'): - model.fit({'c': input_a, 'b': input_b}, output, epochs=5) - - def test_linear_model_with_dict_input(self): - model = linear.LinearModel() - input_a = np.random.uniform(low=-5., high=5., size=(64, 1)) - input_b = np.random.uniform(low=-5., high=5., size=(64, 1)) - output = .3 * input_a + .2 * input_b - model.compile('sgd', 'mse', []) - model.fit({'a': input_a, 'b': input_b}, output, epochs=5) - - def test_linear_model_as_layer(self): - input_a = input_layer.Input(shape=(1,), name='a') - output_a = linear.LinearModel()(input_a) - input_b = input_layer.Input(shape=(1,), name='b') - output_b = core.Dense(units=1)(input_b) - output = output_a + output_b - model = training.Model(inputs=[input_a, input_b], outputs=[output]) - input_a_np = np.random.uniform(low=-5., high=5., size=(64, 1)) - input_b_np = np.random.uniform(low=-5., high=5., size=(64, 1)) - output_np = .3 * input_a_np + .2 * input_b_np - model.compile('sgd', 'mse', []) - model.fit([input_a_np, input_b_np], output_np, epochs=5) - - def test_linear_model_with_sparse_input(self): - indices = tf.constant([[0, 0], [0, 2], [1, 0], [1, 1]], - dtype=tf.int64) - values = tf.constant([.4, .6, .8, .5]) - shape = tf.constant([2, 3], dtype=tf.int64) - model = linear.LinearModel() - inp = tf.SparseTensor(indices, values, shape) - output = model(inp) - self.evaluate(tf.compat.v1.global_variables_initializer()) - if tf.executing_eagerly(): - weights = model.get_weights() - weights[0] = np.ones((3, 1)) - model.set_weights(weights) - output = model(inp) - self.assertAllClose([[1.], [1.3]], self.evaluate(output)) - - def test_linear_model_with_sparse_input_and_custom_training(self): - batch_size = 64 - indices = [] - values = [] - target = np.zeros((batch_size, 1)) - for i in range(64): - rand_int = np.random.randint(3) - if rand_int == 0: - indices.append((i, 0)) - val = np.random.uniform(low=-5., high=5.) - values.append(val) - target[i] = 0.3 * val - elif rand_int == 1: - indices.append((i, 1)) - val = np.random.uniform(low=-5., high=5.) - values.append(val) - target[i] = 0.2 * val - else: - indices.append((i, 0)) - indices.append((i, 1)) - val_1 = np.random.uniform(low=-5., high=5.) - val_2 = np.random.uniform(low=-5., high=5.) - values.append(val_1) - values.append(val_2) - target[i] = 0.3 * val_1 + 0.2 * val_2 - - indices = np.asarray(indices) - values = np.asarray(values) - shape = tf.constant([batch_size, 2], dtype=tf.int64) - inp = tf.SparseTensor(indices, values, shape) - model = linear.LinearModel(use_bias=False) - opt = gradient_descent.SGD() - for _ in range(20): - with tf.GradientTape() as t: + def test_linear_model_with_single_input(self): + model = linear.LinearModel() + inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 2)) + output = 0.3 * inp[:, 0] + 0.2 * inp[:, 1] + model.compile("sgd", "mse", []) + model.fit(inp, output, epochs=5) + self.assertTrue(model.built) + + def test_linear_model_with_list_input(self): + model = linear.LinearModel() + input_a = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + input_b = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + output = 0.3 * input_a + 0.2 * input_b + model.compile("sgd", "mse", []) + model.fit([input_a, input_b], output, epochs=5) + + def test_linear_model_with_mismatched_dict_inputs(self): + model = linear.LinearModel() + input_a = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + input_b = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + output = 0.3 * input_a + 0.2 * input_b + model.compile("sgd", "mse", []) + model.build( + {"a": tf.TensorShape([None, 1]), "b": tf.TensorShape([None, 1])} + ) + with self.assertRaisesRegex(ValueError, "Missing keys"): + model.fit({"c": input_a, "b": input_b}, output, epochs=5) + + def test_linear_model_with_dict_input(self): + model = linear.LinearModel() + input_a = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + input_b = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + output = 0.3 * input_a + 0.2 * input_b + model.compile("sgd", "mse", []) + model.fit({"a": input_a, "b": input_b}, output, epochs=5) + + def test_linear_model_as_layer(self): + input_a = input_layer.Input(shape=(1,), name="a") + output_a = linear.LinearModel()(input_a) + input_b = input_layer.Input(shape=(1,), name="b") + output_b = core.Dense(units=1)(input_b) + output = output_a + output_b + model = training.Model(inputs=[input_a, input_b], outputs=[output]) + input_a_np = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + input_b_np = np.random.uniform(low=-5.0, high=5.0, size=(64, 1)) + output_np = 0.3 * input_a_np + 0.2 * input_b_np + model.compile("sgd", "mse", []) + model.fit([input_a_np, input_b_np], output_np, epochs=5) + + def test_linear_model_with_sparse_input(self): + indices = tf.constant([[0, 0], [0, 2], [1, 0], [1, 1]], dtype=tf.int64) + values = tf.constant([0.4, 0.6, 0.8, 0.5]) + shape = tf.constant([2, 3], dtype=tf.int64) + model = linear.LinearModel() + inp = tf.SparseTensor(indices, values, shape) output = model(inp) - loss = backend.mean(losses.mean_squared_error(target, output)) - grads = t.gradient(loss, model.trainable_variables) - grads_and_vars = zip(grads, model.trainable_variables) - opt.apply_gradients(grads_and_vars) - - # This test is an example for a regression on categorical inputs, i.e., - # the output is 0.4, 0.6, 0.9 when input is 'alpha', 'beta', 'gamma' - # separately. - def test_linear_model_with_feature_column(self): - vocab_list = ['alpha', 'beta', 'gamma'] - vocab_val = [0.4, 0.6, 0.9] - data = np.random.choice(vocab_list, size=256) - y = np.zeros_like(data, dtype=np.float32) - for vocab, val in zip(vocab_list, vocab_val): - indices = np.where(data == vocab) - y[indices] = val + np.random.uniform( - low=-0.01, high=0.01, size=indices[0].shape) - cat_column = tf.feature_column.categorical_column_with_vocabulary_list( - key='symbol', vocabulary_list=vocab_list) - ind_column = tf.feature_column.indicator_column(cat_column) - dense_feature_layer = dense_features_v2.DenseFeatures([ind_column]) - linear_model = linear.LinearModel( - use_bias=False, kernel_initializer='zeros') - combined = sequential.Sequential([dense_feature_layer, linear_model]) - opt = gradient_descent.SGD(learning_rate=0.1) - combined.compile(opt, 'mse', []) - combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10) - self.assertAllClose([[0.4], [0.6], [0.9]], - combined.layers[1].dense_layers[0].kernel.numpy(), - atol=0.01) - - def test_config(self): - linear_model = linear.LinearModel(units=3, use_bias=True) - config = linear_model.get_config() - cloned_linear_model = linear.LinearModel.from_config(config) - self.assertEqual(linear_model.units, cloned_linear_model.units) - - -if __name__ == '__main__': - tf.test.main() + self.evaluate(tf.compat.v1.global_variables_initializer()) + if tf.executing_eagerly(): + weights = model.get_weights() + weights[0] = np.ones((3, 1)) + model.set_weights(weights) + output = model(inp) + self.assertAllClose([[1.0], [1.3]], self.evaluate(output)) + + def test_linear_model_with_sparse_input_and_custom_training(self): + batch_size = 64 + indices = [] + values = [] + target = np.zeros((batch_size, 1)) + for i in range(64): + rand_int = np.random.randint(3) + if rand_int == 0: + indices.append((i, 0)) + val = np.random.uniform(low=-5.0, high=5.0) + values.append(val) + target[i] = 0.3 * val + elif rand_int == 1: + indices.append((i, 1)) + val = np.random.uniform(low=-5.0, high=5.0) + values.append(val) + target[i] = 0.2 * val + else: + indices.append((i, 0)) + indices.append((i, 1)) + val_1 = np.random.uniform(low=-5.0, high=5.0) + val_2 = np.random.uniform(low=-5.0, high=5.0) + values.append(val_1) + values.append(val_2) + target[i] = 0.3 * val_1 + 0.2 * val_2 + + indices = np.asarray(indices) + values = np.asarray(values) + shape = tf.constant([batch_size, 2], dtype=tf.int64) + inp = tf.SparseTensor(indices, values, shape) + model = linear.LinearModel(use_bias=False) + opt = gradient_descent.SGD() + for _ in range(20): + with tf.GradientTape() as t: + output = model(inp) + loss = backend.mean(losses.mean_squared_error(target, output)) + grads = t.gradient(loss, model.trainable_variables) + grads_and_vars = zip(grads, model.trainable_variables) + opt.apply_gradients(grads_and_vars) + + # This test is an example for a regression on categorical inputs, i.e., + # the output is 0.4, 0.6, 0.9 when input is 'alpha', 'beta', 'gamma' + # separately. + def test_linear_model_with_feature_column(self): + vocab_list = ["alpha", "beta", "gamma"] + vocab_val = [0.4, 0.6, 0.9] + data = np.random.choice(vocab_list, size=256) + y = np.zeros_like(data, dtype=np.float32) + for vocab, val in zip(vocab_list, vocab_val): + indices = np.where(data == vocab) + y[indices] = val + np.random.uniform( + low=-0.01, high=0.01, size=indices[0].shape + ) + cat_column = tf.feature_column.categorical_column_with_vocabulary_list( + key="symbol", vocabulary_list=vocab_list + ) + ind_column = tf.feature_column.indicator_column(cat_column) + dense_feature_layer = dense_features_v2.DenseFeatures([ind_column]) + linear_model = linear.LinearModel( + use_bias=False, kernel_initializer="zeros" + ) + combined = sequential.Sequential([dense_feature_layer, linear_model]) + opt = gradient_descent.SGD(learning_rate=0.1) + combined.compile(opt, "mse", []) + combined.fit(x={"symbol": data}, y=y, batch_size=32, epochs=10) + self.assertAllClose( + [[0.4], [0.6], [0.9]], + combined.layers[1].dense_layers[0].kernel.numpy(), + atol=0.01, + ) + + def test_config(self): + linear_model = linear.LinearModel(units=3, use_bias=True) + config = linear_model.get_config() + cloned_linear_model = linear.LinearModel.from_config(config) + self.assertEqual(linear_model.units, cloned_linear_model.units) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/premade_models/wide_deep.py b/keras/premade_models/wide_deep.py index 89f9fe0c538d..b06aa60cf729 100644 --- a/keras/premade_models/wide_deep.py +++ b/keras/premade_models/wide_deep.py @@ -15,203 +15,226 @@ """Built-in WideNDeep model classes.""" import tensorflow.compat.v2 as tf + from keras import activations from keras import backend from keras import layers as layer_module from keras.engine import base_layer from keras.engine import data_adapter from keras.engine import training as keras_training -from keras.utils import generic_utils +from keras.saving import serialization_lib + +# isort: off from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import keras_export @keras_export( - 'keras.experimental.WideDeepModel', - v1=['keras.experimental.WideDeepModel', 'keras.models.WideDeepModel']) -@deprecation.deprecated_endpoints('keras.experimental.WideDeepModel') + "keras.experimental.WideDeepModel", + v1=["keras.experimental.WideDeepModel", "keras.models.WideDeepModel"], +) +@deprecation.deprecated_endpoints("keras.experimental.WideDeepModel") class WideDeepModel(keras_training.Model): - r"""Wide & Deep Model for regression and classification problems. - - This model jointly train a linear and a dnn model. - - Example: - - ```python - linear_model = LinearModel() - dnn_model = keras.Sequential([keras.layers.Dense(units=64), - keras.layers.Dense(units=1)]) - combined_model = WideDeepModel(linear_model, dnn_model) - combined_model.compile(optimizer=['sgd', 'adam'], loss='mse', metrics=['mse']) - # define dnn_inputs and linear_inputs as separate numpy arrays or - # a single numpy array if dnn_inputs is same as linear_inputs. - combined_model.fit([linear_inputs, dnn_inputs], y, epochs) - # or define a single `tf.data.Dataset` that contains a single tensor or - # separate tensors for dnn_inputs and linear_inputs. - dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y)) - combined_model.fit(dataset, epochs) - ``` - - Both linear and dnn model can be pre-compiled and trained separately - before jointly training: - - Example: - ```python - linear_model = LinearModel() - linear_model.compile('adagrad', 'mse') - linear_model.fit(linear_inputs, y, epochs) - dnn_model = keras.Sequential([keras.layers.Dense(units=1)]) - dnn_model.compile('rmsprop', 'mse') - dnn_model.fit(dnn_inputs, y, epochs) - combined_model = WideDeepModel(linear_model, dnn_model) - combined_model.compile(optimizer=['sgd', 'adam'], loss='mse', metrics=['mse']) - combined_model.fit([linear_inputs, dnn_inputs], y, epochs) - ``` - - """ - - def __init__(self, linear_model, dnn_model, activation=None, **kwargs): - """Create a Wide & Deep Model. - - Args: - linear_model: a premade LinearModel, its output must match the output of - the dnn model. - dnn_model: a `tf.keras.Model`, its output must match the output of the - linear model. - activation: Activation function. Set it to None to maintain a linear - activation. - **kwargs: The keyword arguments that are passed on to BaseLayer.__init__. - Allowed keyword arguments include `name`. + r"""Wide & Deep Model for regression and classification problems. + + This model jointly train a linear and a dnn model. + + Example: + + ```python + linear_model = LinearModel() + dnn_model = keras.Sequential([keras.layers.Dense(units=64), + keras.layers.Dense(units=1)]) + combined_model = WideDeepModel(linear_model, dnn_model) + combined_model.compile(optimizer=['sgd', 'adam'], + loss='mse', metrics=['mse']) + # define dnn_inputs and linear_inputs as separate numpy arrays or + # a single numpy array if dnn_inputs is same as linear_inputs. + combined_model.fit([linear_inputs, dnn_inputs], y, epochs) + # or define a single `tf.data.Dataset` that contains a single tensor or + # separate tensors for dnn_inputs and linear_inputs. + dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y)) + combined_model.fit(dataset, epochs) + ``` + + Both linear and dnn model can be pre-compiled and trained separately + before jointly training: + + Example: + ```python + linear_model = LinearModel() + linear_model.compile('adagrad', 'mse') + linear_model.fit(linear_inputs, y, epochs) + dnn_model = keras.Sequential([keras.layers.Dense(units=1)]) + dnn_model.compile('rmsprop', 'mse') + dnn_model.fit(dnn_inputs, y, epochs) + combined_model = WideDeepModel(linear_model, dnn_model) + combined_model.compile(optimizer=['sgd', 'adam'], + loss='mse', metrics=['mse']) + combined_model.fit([linear_inputs, dnn_inputs], y, epochs) + ``` + """ - super().__init__(**kwargs) - base_layer.keras_premade_model_gauge.get_cell('WideDeep').set(True) - self.linear_model = linear_model - self.dnn_model = dnn_model - self.activation = activations.get(activation) - - def call(self, inputs, training=None): - if not isinstance(inputs, (tuple, list)) or len(inputs) != 2: - linear_inputs = dnn_inputs = inputs - else: - linear_inputs, dnn_inputs = inputs - linear_output = self.linear_model(linear_inputs) - # pylint: disable=protected-access - if self.dnn_model._expects_training_arg: - if training is None: - training = backend.learning_phase() - dnn_output = self.dnn_model(dnn_inputs, training=training) - else: - dnn_output = self.dnn_model(dnn_inputs) - output = tf.nest.map_structure( - lambda x, y: (x + y), linear_output, dnn_output) - if self.activation: - return tf.nest.map_structure(self.activation, output) - return output - - # This does not support gradient scaling and LossScaleOptimizer. - def train_step(self, data): - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) - with tf.GradientTape() as tape: - y_pred = self(x, training=True) - loss = self.compiled_loss( - y, y_pred, sample_weight, regularization_losses=self.losses) - self.compiled_metrics.update_state(y, y_pred, sample_weight) - - if isinstance(self.optimizer, (list, tuple)): - linear_vars = self.linear_model.trainable_variables - dnn_vars = self.dnn_model.trainable_variables - linear_grads, dnn_grads = tape.gradient(loss, (linear_vars, dnn_vars)) - - linear_optimizer = self.optimizer[0] - dnn_optimizer = self.optimizer[1] - linear_optimizer.apply_gradients(zip(linear_grads, linear_vars)) - dnn_optimizer.apply_gradients(zip(dnn_grads, dnn_vars)) - else: - trainable_variables = self.trainable_variables - grads = tape.gradient(loss, trainable_variables) - self.optimizer.apply_gradients(zip(grads, trainable_variables)) - - return {m.name: m.result() for m in self.metrics} - - def _make_train_function(self): - # Only needed for graph mode and model_to_estimator. - has_recompiled = self._recompile_weights_loss_and_weighted_metrics() - self._check_trainable_weights_consistency() - # If we have re-compiled the loss/weighted metric sub-graphs then create - # train function even if one exists already. This is because - # `_feed_sample_weights` list has been updated on re-compile. - if getattr(self, 'train_function', None) is None or has_recompiled: - # Restore the compiled trainable state. - current_trainable_state = self._get_trainable_state() - self._set_trainable_state(self._compiled_trainable_state) - - inputs = ( - self._feed_inputs + self._feed_targets + self._feed_sample_weights) - if not isinstance(backend.symbolic_learning_phase(), int): - inputs += [backend.symbolic_learning_phase()] - - if isinstance(self.optimizer, (list, tuple)): - linear_optimizer = self.optimizer[0] - dnn_optimizer = self.optimizer[1] - else: - linear_optimizer = self.optimizer - dnn_optimizer = self.optimizer - - with backend.get_graph().as_default(): - with backend.name_scope('training'): - # Training updates - updates = [] - linear_updates = linear_optimizer.get_updates( - params=self.linear_model.trainable_weights, # pylint: disable=protected-access - loss=self.total_loss) - updates += linear_updates - dnn_updates = dnn_optimizer.get_updates( - params=self.dnn_model.trainable_weights, # pylint: disable=protected-access - loss=self.total_loss) - updates += dnn_updates - # Unconditional updates - updates += self.get_updates_for(None) - # Conditional updates relevant to this model - updates += self.get_updates_for(self.inputs) - - metrics = self._get_training_eval_metrics() - metrics_tensors = [ - m._call_result for m in metrics if hasattr(m, '_call_result') # pylint: disable=protected-access - ] - - with backend.name_scope('training'): - # Gets loss and metrics. Updates weights at each call. - fn = backend.function( - inputs, [self.total_loss] + metrics_tensors, - updates=updates, - name='train_function', - **self._function_kwargs) - setattr(self, 'train_function', fn) - - # Restore the current trainable state - self._set_trainable_state(current_trainable_state) - - def get_config(self): - linear_config = generic_utils.serialize_keras_object(self.linear_model) - dnn_config = generic_utils.serialize_keras_object(self.dnn_model) - config = { - 'linear_model': linear_config, - 'dnn_model': dnn_config, - 'activation': activations.serialize(self.activation), - } - base_config = base_layer.Layer.get_config(self) - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - linear_config = config.pop('linear_model') - linear_model = layer_module.deserialize(linear_config, custom_objects) - dnn_config = config.pop('dnn_model') - dnn_model = layer_module.deserialize(dnn_config, custom_objects) - activation = activations.deserialize( - config.pop('activation', None), custom_objects=custom_objects) - return cls( - linear_model=linear_model, - dnn_model=dnn_model, - activation=activation, - **config) + + def __init__(self, linear_model, dnn_model, activation=None, **kwargs): + """Create a Wide & Deep Model. + + Args: + linear_model: a premade LinearModel, its output must match the output + of the dnn model. + dnn_model: a `tf.keras.Model`, its output must match the output of the + linear model. + activation: Activation function. Set it to None to maintain a linear + activation. + **kwargs: The keyword arguments that are passed on to + BaseLayer.__init__. Allowed keyword arguments include `name`. + """ + super().__init__(**kwargs) + base_layer.keras_premade_model_gauge.get_cell("WideDeep").set(True) + self.linear_model = linear_model + self.dnn_model = dnn_model + self.activation = activations.get(activation) + + def call(self, inputs, training=None): + if not isinstance(inputs, (tuple, list)) or len(inputs) != 2: + linear_inputs = dnn_inputs = inputs + else: + linear_inputs, dnn_inputs = inputs + linear_output = self.linear_model(linear_inputs) + + if self.dnn_model._expects_training_arg: + if training is None: + training = backend.learning_phase() + dnn_output = self.dnn_model(dnn_inputs, training=training) + else: + dnn_output = self.dnn_model(dnn_inputs) + output = tf.nest.map_structure( + lambda x, y: (x + y), linear_output, dnn_output + ) + if self.activation: + return tf.nest.map_structure(self.activation, output) + return output + + # This does not support gradient scaling and LossScaleOptimizer. + def train_step(self, data): + x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + with tf.GradientTape() as tape: + y_pred = self(x, training=True) + loss = self.compiled_loss( + y, y_pred, sample_weight, regularization_losses=self.losses + ) + self.compiled_metrics.update_state(y, y_pred, sample_weight) + + if isinstance(self.optimizer, (list, tuple)): + linear_vars = self.linear_model.trainable_variables + dnn_vars = self.dnn_model.trainable_variables + linear_grads, dnn_grads = tape.gradient( + loss, (linear_vars, dnn_vars) + ) + + linear_optimizer = self.optimizer[0] + dnn_optimizer = self.optimizer[1] + linear_optimizer.apply_gradients(zip(linear_grads, linear_vars)) + dnn_optimizer.apply_gradients(zip(dnn_grads, dnn_vars)) + else: + trainable_variables = self.trainable_variables + grads = tape.gradient(loss, trainable_variables) + self.optimizer.apply_gradients(zip(grads, trainable_variables)) + + return {m.name: m.result() for m in self.metrics} + + def _make_train_function(self): + # Only needed for graph mode and model_to_estimator. + has_recompiled = self._recompile_weights_loss_and_weighted_metrics() + self._check_trainable_weights_consistency() + # If we have re-compiled the loss/weighted metric sub-graphs then create + # train function even if one exists already. This is because + # `_feed_sample_weights` list has been updated on re-compile. + if getattr(self, "train_function", None) is None or has_recompiled: + # Restore the compiled trainable state. + current_trainable_state = self._get_trainable_state() + self._set_trainable_state(self._compiled_trainable_state) + + inputs = ( + self._feed_inputs + + self._feed_targets + + self._feed_sample_weights + ) + if not isinstance(backend.symbolic_learning_phase(), int): + inputs += [backend.symbolic_learning_phase()] + + if isinstance(self.optimizer, (list, tuple)): + linear_optimizer = self.optimizer[0] + dnn_optimizer = self.optimizer[1] + else: + linear_optimizer = self.optimizer + dnn_optimizer = self.optimizer + + with backend.get_graph().as_default(): + with backend.name_scope("training"): + # Training updates + updates = [] + linear_updates = linear_optimizer.get_updates( + params=self.linear_model.trainable_weights, + loss=self.total_loss, + ) + updates += linear_updates + dnn_updates = dnn_optimizer.get_updates( + params=self.dnn_model.trainable_weights, + loss=self.total_loss, + ) + updates += dnn_updates + # Unconditional updates + updates += self.get_updates_for(None) + # Conditional updates relevant to this model + updates += self.get_updates_for(self.inputs) + + metrics = self._get_training_eval_metrics() + metrics_tensors = [ + m._call_result + for m in metrics + if hasattr(m, "_call_result") + ] + + with backend.name_scope("training"): + # Gets loss and metrics. Updates weights at each call. + fn = backend.function( + inputs, + [self.total_loss] + metrics_tensors, + updates=updates, + name="train_function", + **self._function_kwargs + ) + setattr(self, "train_function", fn) + + # Restore the current trainable state + self._set_trainable_state(current_trainable_state) + + def get_config(self): + linear_config = serialization_lib.serialize_keras_object( + self.linear_model + ) + dnn_config = serialization_lib.serialize_keras_object(self.dnn_model) + config = { + "linear_model": linear_config, + "dnn_model": dnn_config, + "activation": activations.serialize(self.activation), + } + base_config = base_layer.Layer.get_config(self) + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + linear_config = config.pop("linear_model") + linear_model = layer_module.deserialize(linear_config, custom_objects) + dnn_config = config.pop("dnn_model") + dnn_model = layer_module.deserialize(dnn_config, custom_objects) + activation = activations.deserialize( + config.pop("activation", None), custom_objects=custom_objects + ) + return cls( + linear_model=linear_model, + dnn_model=dnn_model, + activation=activation, + **config + ) diff --git a/keras/premade_models/wide_deep_test.py b/keras/premade_models/wide_deep_test.py index 5b0ec003f87b..076c12efb300 100644 --- a/keras/premade_models/wide_deep_test.py +++ b/keras/premade_models/wide_deep_test.py @@ -14,257 +14,304 @@ # ============================================================================== """Tests for Keras Premade WideNDeep models.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.engine import input_layer from keras.engine import sequential from keras.engine import training from keras.feature_column import dense_features_v2 from keras.layers import core -from keras.optimizers.optimizer_v2 import gradient_descent +from keras.optimizers.legacy import gradient_descent from keras.premade_models import linear from keras.premade_models import wide_deep +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils @test_combinations.run_all_keras_modes(always_skip_v1=True) class WideDeepModelTest(test_combinations.TestCase): + def test_wide_deep_model(self): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + linear_inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 2)) + dnn_inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 3)) + inputs = [linear_inp, dnn_inp] + output = 0.3 * linear_inp[:, 0] + 0.2 * dnn_inp[:, 1] + wide_deep_model.compile( + optimizer=["sgd", "adam"], + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + wide_deep_model.fit(inputs, output, epochs=5) + self.assertTrue(wide_deep_model.built) + + def test_wide_deep_model_backprop(self): + with self.cached_session(): + linear_model = linear.LinearModel( + units=1, kernel_initializer="zeros" + ) + dnn_model = sequential.Sequential( + [core.Dense(units=1, kernel_initializer="zeros")] + ) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + linear_inp = np.array([[1.0]]) + dnn_inp = np.array([[1.0]]) + inputs = [linear_inp, dnn_inp] + output = linear_inp + 2 * dnn_inp + linear_opt = gradient_descent.SGD(learning_rate=0.1) + dnn_opt = gradient_descent.SGD(learning_rate=0.3) + wide_deep_model.compile( + optimizer=[linear_opt, dnn_opt], + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + self.evaluate(tf.compat.v1.global_variables_initializer()) + wide_deep_model.fit(inputs, output, epochs=1) + self.assertAllClose( + [[0.6]], + self.evaluate( + wide_deep_model.linear_model.dense_layers[0].kernel + ), + ) + self.assertAllClose( + [[1.8]], + self.evaluate(wide_deep_model.dnn_model.layers[0].kernel), + ) + + def test_wide_deep_model_with_single_input(self): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + inputs = np.random.uniform(low=-5.0, high=5.0, size=(64, 3)) + output = 0.3 * inputs[:, 0] + wide_deep_model.compile( + optimizer=["sgd", "adam"], + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + wide_deep_model.fit(inputs, output, epochs=5) - def test_wide_deep_model(self): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - linear_inp = np.random.uniform(low=-5., high=5., size=(64, 2)) - dnn_inp = np.random.uniform(low=-5., high=5., size=(64, 3)) - inputs = [linear_inp, dnn_inp] - output = .3 * linear_inp[:, 0] + .2 * dnn_inp[:, 1] - wide_deep_model.compile( - optimizer=['sgd', 'adam'], - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - wide_deep_model.fit(inputs, output, epochs=5) - self.assertTrue(wide_deep_model.built) + def test_wide_deep_model_with_multi_outputs(self): + inp = input_layer.Input(shape=(1,), name="linear") + l = linear.LinearModel(units=2, use_bias=False)(inp) + l1, l2 = tf.split(l, num_or_size_splits=2, axis=1) + linear_model = training.Model(inp, [l1, l2]) + linear_model.set_weights([np.asarray([[0.5, 0.3]])]) + h = core.Dense(units=2, use_bias=False)(inp) + h1, h2 = tf.split(h, num_or_size_splits=2, axis=1) + dnn_model = training.Model(inp, [h1, h2]) + dnn_model.set_weights([np.asarray([[0.1, -0.5]])]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + inp_np = np.asarray([[1.0]]) + out1, out2 = wide_deep_model(inp_np) + # output should be (0.5 + 0.1), and (0.3 - 0.5) + self.assertAllClose([[0.6]], out1) + self.assertAllClose([[-0.2]], out2) - def test_wide_deep_model_backprop(self): - with self.cached_session(): - linear_model = linear.LinearModel(units=1, kernel_initializer='zeros') - dnn_model = sequential.Sequential( - [core.Dense(units=1, kernel_initializer='zeros')]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - linear_inp = np.array([[1.]]) - dnn_inp = np.array([[1.]]) - inputs = [linear_inp, dnn_inp] - output = linear_inp + 2 * dnn_inp - linear_opt = gradient_descent.SGD(learning_rate=.1) - dnn_opt = gradient_descent.SGD(learning_rate=.3) - wide_deep_model.compile( - optimizer=[linear_opt, dnn_opt], - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - self.evaluate(tf.compat.v1.global_variables_initializer()) - wide_deep_model.fit(inputs, output, epochs=1) - self.assertAllClose( - [[0.6]], - self.evaluate(wide_deep_model.linear_model.dense_layers[0].kernel)) - self.assertAllClose([[1.8]], - self.evaluate( - wide_deep_model.dnn_model.layers[0].kernel)) + wide_deep_model = wide_deep.WideDeepModel( + linear_model, dnn_model, activation="relu" + ) + out1, out2 = wide_deep_model(inp_np) + # output should be relu((0.5 + 0.1)), and relu((0.3 - 0.5)) + self.assertAllClose([[0.6]], out1) + self.assertAllClose([[0.0]], out2) - def test_wide_deep_model_with_single_input(self): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - inputs = np.random.uniform(low=-5., high=5., size=(64, 3)) - output = .3 * inputs[:, 0] - wide_deep_model.compile( - optimizer=['sgd', 'adam'], - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - wide_deep_model.fit(inputs, output, epochs=5) + def test_wide_deep_model_with_single_optimizer(self): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + linear_inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 2)) + dnn_inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 3)) + inputs = [linear_inp, dnn_inp] + output = 0.3 * linear_inp[:, 0] + 0.2 * dnn_inp[:, 1] + wide_deep_model.compile( + optimizer="sgd", + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + wide_deep_model.fit(inputs, output, epochs=5) + self.assertTrue(wide_deep_model.built) - def test_wide_deep_model_with_multi_outputs(self): - inp = input_layer.Input(shape=(1,), name='linear') - l = linear.LinearModel(units=2, use_bias=False)(inp) - l1, l2 = tf.split(l, num_or_size_splits=2, axis=1) - linear_model = training.Model(inp, [l1, l2]) - linear_model.set_weights([np.asarray([[0.5, 0.3]])]) - h = core.Dense(units=2, use_bias=False)(inp) - h1, h2 = tf.split(h, num_or_size_splits=2, axis=1) - dnn_model = training.Model(inp, [h1, h2]) - dnn_model.set_weights([np.asarray([[0.1, -0.5]])]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - inp_np = np.asarray([[1.]]) - out1, out2 = wide_deep_model(inp_np) - # output should be (0.5 + 0.1), and (0.3 - 0.5) - self.assertAllClose([[0.6]], out1) - self.assertAllClose([[-0.2]], out2) + def test_wide_deep_model_as_layer(self): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1)]) + linear_input = input_layer.Input(shape=(3,), name="linear") + dnn_input = input_layer.Input(shape=(5,), name="dnn") + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + wide_deep_output = wide_deep_model((linear_input, dnn_input)) + input_b = input_layer.Input(shape=(1,), name="b") + output_b = core.Dense(units=1)(input_b) + model = training.Model( + inputs=[linear_input, dnn_input, input_b], + outputs=[wide_deep_output + output_b], + ) + linear_input_np = np.random.uniform(low=-5.0, high=5.0, size=(64, 3)) + dnn_input_np = np.random.uniform(low=-5.0, high=5.0, size=(64, 5)) + input_b_np = np.random.uniform(low=-5.0, high=5.0, size=(64,)) + output_np = ( + linear_input_np[:, 0] + 0.2 * dnn_input_np[:, 1] + input_b_np + ) + model.compile( + optimizer="sgd", + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + [linear_input_np, dnn_input_np, input_b_np], output_np, epochs=5 + ) - wide_deep_model = wide_deep.WideDeepModel( - linear_model, dnn_model, activation='relu') - out1, out2 = wide_deep_model(inp_np) - # output should be relu((0.5 + 0.1)), and relu((0.3 - 0.5)) - self.assertAllClose([[0.6]], out1) - self.assertAllClose([[0.]], out2) + def test_wide_deep_model_with_sub_model_trained(self): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) + wide_deep_model = wide_deep.WideDeepModel( + linear.LinearModel(units=1), + sequential.Sequential([core.Dense(units=1, input_dim=3)]), + ) + linear_inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 2)) + dnn_inp = np.random.uniform(low=-5.0, high=5.0, size=(64, 3)) + inputs = [linear_inp, dnn_inp] + output = 0.3 * linear_inp[:, 0] + 0.2 * dnn_inp[:, 1] + linear_model.compile( + optimizer="sgd", + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + dnn_model.compile( + optimizer="adam", + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + linear_model.fit(linear_inp, output, epochs=50) + dnn_model.fit(dnn_inp, output, epochs=50) + wide_deep_model.compile( + optimizer=["sgd", "adam"], + loss="mse", + metrics=[], + run_eagerly=test_utils.should_run_eagerly(), + ) + wide_deep_model.fit(inputs, output, epochs=50) - def test_wide_deep_model_with_single_optimizer(self): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - linear_inp = np.random.uniform(low=-5., high=5., size=(64, 2)) - dnn_inp = np.random.uniform(low=-5., high=5., size=(64, 3)) - inputs = [linear_inp, dnn_inp] - output = .3 * linear_inp[:, 0] + .2 * dnn_inp[:, 1] - wide_deep_model.compile( - optimizer='sgd', - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - wide_deep_model.fit(inputs, output, epochs=5) - self.assertTrue(wide_deep_model.built) + # This test is an example for cases where linear and dnn model accepts + # same raw input and same transformed inputs, i.e., the raw input is + # categorical, and both linear and dnn model accept one hot encoding. + def test_wide_deep_model_with_single_feature_column(self): + vocab_list = ["alpha", "beta", "gamma"] + vocab_val = [0.4, 0.6, 0.9] + data = np.random.choice(vocab_list, size=256) + y = np.zeros_like(data, dtype=np.float32) + for vocab, val in zip(vocab_list, vocab_val): + indices = np.where(data == vocab) + y[indices] = val + np.random.uniform( + low=-0.01, high=0.01, size=indices[0].shape + ) + cat_column = tf.feature_column.categorical_column_with_vocabulary_list( + key="symbol", vocabulary_list=vocab_list + ) + ind_column = tf.feature_column.indicator_column(cat_column) + dense_feature_layer = dense_features_v2.DenseFeatures([ind_column]) + linear_model = linear.LinearModel( + use_bias=False, kernel_initializer="zeros" + ) + dnn_model = sequential.Sequential([core.Dense(units=1)]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + combined = sequential.Sequential([dense_feature_layer, wide_deep_model]) + opt = gradient_descent.SGD(learning_rate=0.1) + combined.compile( + opt, "mse", [], run_eagerly=test_utils.should_run_eagerly() + ) + combined.fit(x={"symbol": data}, y=y, batch_size=32, epochs=10) - def test_wide_deep_model_as_layer(self): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1)]) - linear_input = input_layer.Input(shape=(3,), name='linear') - dnn_input = input_layer.Input(shape=(5,), name='dnn') - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - wide_deep_output = wide_deep_model((linear_input, dnn_input)) - input_b = input_layer.Input(shape=(1,), name='b') - output_b = core.Dense(units=1)(input_b) - model = training.Model( - inputs=[linear_input, dnn_input, input_b], - outputs=[wide_deep_output + output_b]) - linear_input_np = np.random.uniform(low=-5., high=5., size=(64, 3)) - dnn_input_np = np.random.uniform(low=-5., high=5., size=(64, 5)) - input_b_np = np.random.uniform(low=-5., high=5., size=(64,)) - output_np = linear_input_np[:, 0] + .2 * dnn_input_np[:, 1] + input_b_np - model.compile( - optimizer='sgd', - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - model.fit([linear_input_np, dnn_input_np, input_b_np], output_np, epochs=5) + # This test is an example for cases where linear and dnn model accepts + # same raw input but different transformed inputs, i.e,. the raw input is + # categorical, and linear model accepts one hot encoding, while dnn model + # accepts embedding encoding. + def test_wide_deep_model_with_two_feature_columns(self): + vocab_list = ["alpha", "beta", "gamma"] + vocab_val = [0.4, 0.6, 0.9] + data = np.random.choice(vocab_list, size=256) + y = np.zeros_like(data, dtype=np.float32) + for vocab, val in zip(vocab_list, vocab_val): + indices = np.where(data == vocab) + y[indices] = val + np.random.uniform( + low=-0.01, high=0.01, size=indices[0].shape + ) + cat_column = tf.feature_column.categorical_column_with_vocabulary_list( + key="symbol", vocabulary_list=vocab_list + ) + ind_column = tf.feature_column.indicator_column(cat_column) + emb_column = tf.feature_column.embedding_column(cat_column, dimension=5) + linear_feature_layer = dense_features_v2.DenseFeatures([ind_column]) + linear_model = linear.LinearModel( + use_bias=False, kernel_initializer="zeros" + ) + combined_linear = sequential.Sequential( + [linear_feature_layer, linear_model] + ) + dnn_model = sequential.Sequential([core.Dense(units=1)]) + dnn_feature_layer = dense_features_v2.DenseFeatures([emb_column]) + combined_dnn = sequential.Sequential([dnn_feature_layer, dnn_model]) + wide_deep_model = wide_deep.WideDeepModel(combined_linear, combined_dnn) + opt = gradient_descent.SGD(learning_rate=0.1) + wide_deep_model.compile( + opt, "mse", [], run_eagerly=test_utils.should_run_eagerly() + ) + wide_deep_model.fit(x={"symbol": data}, y=y, batch_size=32, epochs=10) - def test_wide_deep_model_with_sub_model_trained(self): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) - wide_deep_model = wide_deep.WideDeepModel( - linear.LinearModel(units=1), - sequential.Sequential([core.Dense(units=1, input_dim=3)])) - linear_inp = np.random.uniform(low=-5., high=5., size=(64, 2)) - dnn_inp = np.random.uniform(low=-5., high=5., size=(64, 3)) - inputs = [linear_inp, dnn_inp] - output = .3 * linear_inp[:, 0] + .2 * dnn_inp[:, 1] - linear_model.compile( - optimizer='sgd', - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - dnn_model.compile( - optimizer='adam', - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - linear_model.fit(linear_inp, output, epochs=50) - dnn_model.fit(dnn_inp, output, epochs=50) - wide_deep_model.compile( - optimizer=['sgd', 'adam'], - loss='mse', - metrics=[], - run_eagerly=test_utils.should_run_eagerly()) - wide_deep_model.fit(inputs, output, epochs=50) + def test_config(self): + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + config = wide_deep_model.get_config() + cloned_wide_deep_model = wide_deep.WideDeepModel.from_config(config) + self.assertEqual( + linear_model.units, cloned_wide_deep_model.linear_model.units + ) + self.assertEqual( + dnn_model.layers[0].units, + cloned_wide_deep_model.dnn_model.layers[0].units, + ) - # This test is an example for cases where linear and dnn model accepts - # same raw input and same transformed inputs, i.e., the raw input is - # categorical, and both linear and dnn model accept one hot encoding. - def test_wide_deep_model_with_single_feature_column(self): - vocab_list = ['alpha', 'beta', 'gamma'] - vocab_val = [0.4, 0.6, 0.9] - data = np.random.choice(vocab_list, size=256) - y = np.zeros_like(data, dtype=np.float32) - for vocab, val in zip(vocab_list, vocab_val): - indices = np.where(data == vocab) - y[indices] = val + np.random.uniform( - low=-0.01, high=0.01, size=indices[0].shape) - cat_column = tf.feature_column.categorical_column_with_vocabulary_list( - key='symbol', vocabulary_list=vocab_list) - ind_column = tf.feature_column.indicator_column(cat_column) - dense_feature_layer = dense_features_v2.DenseFeatures([ind_column]) - linear_model = linear.LinearModel( - use_bias=False, kernel_initializer='zeros') - dnn_model = sequential.Sequential([core.Dense(units=1)]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - combined = sequential.Sequential([dense_feature_layer, wide_deep_model]) - opt = gradient_descent.SGD(learning_rate=0.1) - combined.compile( - opt, - 'mse', [], - run_eagerly=test_utils.should_run_eagerly()) - combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10) + def test_config_with_custom_objects(self): + def my_activation(x): + return x - # This test is an example for cases where linear and dnn model accepts - # same raw input but different transformed inputs, i.e,. the raw input is - # categorical, and linear model accepts one hot encoding, while dnn model - # accepts embedding encoding. - def test_wide_deep_model_with_two_feature_columns(self): - vocab_list = ['alpha', 'beta', 'gamma'] - vocab_val = [0.4, 0.6, 0.9] - data = np.random.choice(vocab_list, size=256) - y = np.zeros_like(data, dtype=np.float32) - for vocab, val in zip(vocab_list, vocab_val): - indices = np.where(data == vocab) - y[indices] = val + np.random.uniform( - low=-0.01, high=0.01, size=indices[0].shape) - cat_column = tf.feature_column.categorical_column_with_vocabulary_list( - key='symbol', vocabulary_list=vocab_list) - ind_column = tf.feature_column.indicator_column(cat_column) - emb_column = tf.feature_column.embedding_column(cat_column, dimension=5) - linear_feature_layer = dense_features_v2.DenseFeatures([ind_column]) - linear_model = linear.LinearModel( - use_bias=False, kernel_initializer='zeros') - combined_linear = sequential.Sequential( - [linear_feature_layer, linear_model]) - dnn_model = sequential.Sequential([core.Dense(units=1)]) - dnn_feature_layer = dense_features_v2.DenseFeatures([emb_column]) - combined_dnn = sequential.Sequential([dnn_feature_layer, dnn_model]) - wide_deep_model = wide_deep.WideDeepModel(combined_linear, combined_dnn) - opt = gradient_descent.SGD(learning_rate=0.1) - wide_deep_model.compile( - opt, - 'mse', [], - run_eagerly=test_utils.should_run_eagerly()) - wide_deep_model.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10) + linear_model = linear.LinearModel(units=1) + dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) + wide_deep_model = wide_deep.WideDeepModel( + linear_model, dnn_model, activation=my_activation + ) + config = wide_deep_model.get_config() + cloned_wide_deep_model = wide_deep.WideDeepModel.from_config( + config, custom_objects={"my_activation": my_activation} + ) + self.assertEqual(cloned_wide_deep_model.activation, my_activation) - def test_config(self): - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) - wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) - config = wide_deep_model.get_config() - cloned_wide_deep_model = wide_deep.WideDeepModel.from_config(config) - self.assertEqual(linear_model.units, - cloned_wide_deep_model.linear_model.units) - self.assertEqual(dnn_model.layers[0].units, - cloned_wide_deep_model.dnn_model.layers[0].units) + def test_export(self): + input1 = input_layer.Input(shape=(1,)) + output1 = linear.LinearModel()(input1) + linear_model = training.Model(input1, output1) - def test_config_with_custom_objects(self): + input2 = input_layer.Input(shape=(1,)) + output2 = core.Dense(units=1)(input2) + dnn_model = training.Model(input2, output2) - def my_activation(x): - return x + wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) + wide_deep_model.compile(optimizer=["adam", "adam"]) - linear_model = linear.LinearModel(units=1) - dnn_model = sequential.Sequential([core.Dense(units=1, input_dim=3)]) - wide_deep_model = wide_deep.WideDeepModel( - linear_model, dnn_model, activation=my_activation) - config = wide_deep_model.get_config() - cloned_wide_deep_model = wide_deep.WideDeepModel.from_config( - config, custom_objects={'my_activation': my_activation}) - self.assertEqual(cloned_wide_deep_model.activation, my_activation) + output = wide_deep_model([input1, input2]) + model = training.Model([input1, input2], output) + model.compile() + model.export(self.get_temp_dir()) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/preprocessing/BUILD b/keras/preprocessing/BUILD index 8cb88f6ecbbc..f4613447a258 100644 --- a/keras/preprocessing/BUILD +++ b/keras/preprocessing/BUILD @@ -1,9 +1,11 @@ # Description: # Contains the Keras preprocessing layers (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], diff --git a/keras/preprocessing/image.py b/keras/preprocessing/image.py index 08ee76e0c949..2aec637f51b9 100644 --- a/keras/preprocessing/image.py +++ b/keras/preprocessing/image.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=invalid-name -# pylint: disable=g-import-not-at-top -# pylint: disable=g-classes-have-attributes + """Utilies for image preprocessing and augmentation. @@ -35,1518 +33,847 @@ import threading import warnings +import numpy as np + from keras import backend from keras.utils import data_utils from keras.utils import image_utils -import numpy as np +from keras.utils import io_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export try: - import scipy - from scipy import linalg # pylint: disable=unused-import - from scipy import ndimage # pylint: disable=unused-import + import scipy + from scipy import linalg # noqa: F401 + from scipy import ndimage # noqa: F401 except ImportError: - pass + pass try: - from PIL import ImageEnhance + from PIL import ImageEnhance except ImportError: - ImageEnhance = None + ImageEnhance = None -@keras_export('keras.preprocessing.image.Iterator') +@keras_export("keras.preprocessing.image.Iterator") class Iterator(data_utils.Sequence): - """Base class for image data iterators. - - Deprecated: `tf.keras.preprocessing.image.Iterator` is not recommended for - new code. Prefer loading images with - `tf.keras.utils.image_dataset_from_directory` and transforming the output - `tf.data.Dataset` with preprocessing layers. For more information, see the - tutorials for [loading images]( - https://www.tensorflow.org/tutorials/load_data/images) and - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Every `Iterator` must implement the `_get_batches_of_transformed_samples` - method. - - Args: - n: Integer, total number of samples in the dataset to loop over. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seeding for data shuffling. - """ - white_list_formats = ('png', 'jpg', 'jpeg', 'bmp', 'ppm', 'tif', 'tiff') - - def __init__(self, n, batch_size, shuffle, seed): - self.n = n - self.batch_size = batch_size - self.seed = seed - self.shuffle = shuffle - self.batch_index = 0 - self.total_batches_seen = 0 - self.lock = threading.Lock() - self.index_array = None - self.index_generator = self._flow_index() - - def _set_index_array(self): - self.index_array = np.arange(self.n) - if self.shuffle: - self.index_array = np.random.permutation(self.n) - - def __getitem__(self, idx): - if idx >= len(self): - raise ValueError('Asked to retrieve element {idx}, ' - 'but the Sequence ' - 'has length {length}'.format(idx=idx, length=len(self))) - if self.seed is not None: - np.random.seed(self.seed + self.total_batches_seen) - self.total_batches_seen += 1 - if self.index_array is None: - self._set_index_array() - index_array = self.index_array[self.batch_size * idx:self.batch_size * - (idx + 1)] - return self._get_batches_of_transformed_samples(index_array) - - def __len__(self): - return (self.n + self.batch_size - 1) // self.batch_size # round up - - def on_epoch_end(self): - self._set_index_array() - - def reset(self): - self.batch_index = 0 - - def _flow_index(self): - # Ensure self.batch_index is 0. - self.reset() - while 1: - if self.seed is not None: - np.random.seed(self.seed + self.total_batches_seen) - if self.batch_index == 0: + """Base class for image data iterators. + + Deprecated: `tf.keras.preprocessing.image.Iterator` is not recommended for + new code. Prefer loading images with + `tf.keras.utils.image_dataset_from_directory` and transforming the output + `tf.data.Dataset` with preprocessing layers. For more information, see the + tutorials for [loading images]( + https://www.tensorflow.org/tutorials/load_data/images) and + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Every `Iterator` must implement the `_get_batches_of_transformed_samples` + method. + + Args: + n: Integer, total number of samples in the dataset to loop over. + batch_size: Integer, size of a batch. + shuffle: Boolean, whether to shuffle the data between epochs. + seed: Random seeding for data shuffling. + """ + + white_list_formats = ("png", "jpg", "jpeg", "bmp", "ppm", "tif", "tiff") + + def __init__(self, n, batch_size, shuffle, seed): + self.n = n + self.batch_size = batch_size + self.seed = seed + self.shuffle = shuffle + self.batch_index = 0 + self.total_batches_seen = 0 + self.lock = threading.Lock() + self.index_array = None + self.index_generator = self._flow_index() + + def _set_index_array(self): + self.index_array = np.arange(self.n) + if self.shuffle: + self.index_array = np.random.permutation(self.n) + + def __getitem__(self, idx): + if idx >= len(self): + raise ValueError( + "Asked to retrieve element {idx}, " + "but the Sequence " + "has length {length}".format(idx=idx, length=len(self)) + ) + if self.seed is not None: + np.random.seed(self.seed + self.total_batches_seen) + self.total_batches_seen += 1 + if self.index_array is None: + self._set_index_array() + index_array = self.index_array[ + self.batch_size * idx : self.batch_size * (idx + 1) + ] + return self._get_batches_of_transformed_samples(index_array) + + def __len__(self): + return (self.n + self.batch_size - 1) // self.batch_size # round up + + def on_epoch_end(self): self._set_index_array() - if self.n == 0: - # Avoiding modulo by zero error - current_index = 0 - else: - current_index = (self.batch_index * self.batch_size) % self.n - if self.n > current_index + self.batch_size: - self.batch_index += 1 - else: + def reset(self): self.batch_index = 0 - self.total_batches_seen += 1 - yield self.index_array[current_index:current_index + self.batch_size] - def __iter__(self): - # Needed if we want to do something like: - # for x, y in data_gen.flow(...): - return self + def _flow_index(self): + # Ensure self.batch_index is 0. + self.reset() + while 1: + if self.seed is not None: + np.random.seed(self.seed + self.total_batches_seen) + if self.batch_index == 0: + self._set_index_array() + + if self.n == 0: + # Avoiding modulo by zero error + current_index = 0 + else: + current_index = (self.batch_index * self.batch_size) % self.n + if self.n > current_index + self.batch_size: + self.batch_index += 1 + else: + self.batch_index = 0 + self.total_batches_seen += 1 + yield self.index_array[ + current_index : current_index + self.batch_size + ] + + def __iter__(self): + # Needed if we want to do something like: + # for x, y in data_gen.flow(...): + return self + + def __next__(self, *args, **kwargs): + return self.next(*args, **kwargs) + + def next(self): + """For python 2.x. + + Returns: + The next batch. + """ + with self.lock: + index_array = next(self.index_generator) + # The transformation of images is not under thread lock + # so it can be done in parallel + return self._get_batches_of_transformed_samples(index_array) + + def _get_batches_of_transformed_samples(self, index_array): + """Gets a batch of transformed samples. + + Args: + index_array: Array of sample indices to include in batch. + Returns: + A batch of transformed samples. + """ + raise NotImplementedError - def __next__(self, *args, **kwargs): - return self.next(*args, **kwargs) - def next(self): - """For python 2.x. +def _iter_valid_files(directory, white_list_formats, follow_links): + """Iterates on files with extension. - Returns: - The next batch. + Args: + directory: Absolute path to the directory + containing files to be counted + white_list_formats: Set of strings containing allowed extensions for + the files to be counted. + follow_links: Boolean, follow symbolic links to subdirectories. + Yields: + Tuple of (root, filename) with extension in `white_list_formats`. """ - with self.lock: - index_array = next(self.index_generator) - # The transformation of images is not under thread lock - # so it can be done in parallel - return self._get_batches_of_transformed_samples(index_array) - def _get_batches_of_transformed_samples(self, index_array): - """Gets a batch of transformed samples. + def _recursive_list(subpath): + return sorted( + os.walk(subpath, followlinks=follow_links), key=lambda x: x[0] + ) + + for root, _, files in _recursive_list(directory): + for fname in sorted(files): + if fname.lower().endswith(".tiff"): + warnings.warn( + 'Using ".tiff" files with multiple bands ' + "will cause distortion. Please verify your output." + ) + if fname.lower().endswith(white_list_formats): + yield root, fname + + +def _list_valid_filenames_in_directory( + directory, white_list_formats, split, class_indices, follow_links +): + """Lists paths of files in `subdir` with extensions in `white_list_formats`. Args: - index_array: Array of sample indices to include in batch. + directory: absolute path to a directory containing the files to list. + The directory name is used as class label + and must be a key of `class_indices`. + white_list_formats: set of strings containing allowed extensions for + the files to be counted. + split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into + account a certain fraction of files in each directory. + E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent + of images in each directory. + class_indices: dictionary mapping a class name to its index. + follow_links: boolean, follow symbolic links to subdirectories. + Returns: - A batch of transformed samples. + classes: a list of class indices + filenames: the path of valid files in `directory`, relative from + `directory`'s parent (e.g., if `directory` is "dataset/class1", + the filenames will be + `["class1/file1.jpg", "class1/file2.jpg", ...]`). + """ + dirname = os.path.basename(directory) + if split: + all_files = list( + _iter_valid_files(directory, white_list_formats, follow_links) + ) + num_files = len(all_files) + start, stop = int(split[0] * num_files), int(split[1] * num_files) + valid_files = all_files[start:stop] + else: + valid_files = _iter_valid_files( + directory, white_list_formats, follow_links + ) + classes = [] + filenames = [] + for root, fname in valid_files: + classes.append(class_indices[dirname]) + absolute_path = os.path.join(root, fname) + relative_path = os.path.join( + dirname, os.path.relpath(absolute_path, directory) + ) + filenames.append(relative_path) + + return classes, filenames + + +class BatchFromFilesMixin: + """Adds methods related to getting batches from filenames. + + It includes the logic to transform image files to batches. """ - raise NotImplementedError - -def _iter_valid_files(directory, white_list_formats, follow_links): - """Iterates on files with extension. - - Args: - directory: Absolute path to the directory - containing files to be counted - white_list_formats: Set of strings containing allowed extensions for - the files to be counted. - follow_links: Boolean, follow symbolic links to subdirectories. - Yields: - Tuple of (root, filename) with extension in `white_list_formats`. - """ - - def _recursive_list(subpath): - return sorted( - os.walk(subpath, followlinks=follow_links), key=lambda x: x[0]) - - for root, _, files in _recursive_list(directory): - for fname in sorted(files): - if fname.lower().endswith('.tiff'): - warnings.warn('Using ".tiff" files with multiple bands ' - 'will cause distortion. Please verify your output.') - if fname.lower().endswith(white_list_formats): - yield root, fname - - -def _list_valid_filenames_in_directory(directory, white_list_formats, split, - class_indices, follow_links): - """Lists paths of files in `subdir` with extensions in `white_list_formats`. - - Args: - directory: absolute path to a directory containing the files to list. - The directory name is used as class label - and must be a key of `class_indices`. - white_list_formats: set of strings containing allowed extensions for - the files to be counted. - split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into - account a certain fraction of files in each directory. - E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent - of images in each directory. - class_indices: dictionary mapping a class name to its index. - follow_links: boolean, follow symbolic links to subdirectories. - - Returns: - classes: a list of class indices - filenames: the path of valid files in `directory`, relative from - `directory`'s parent (e.g., if `directory` is "dataset/class1", - the filenames will be - `["class1/file1.jpg", "class1/file2.jpg", ...]`). - """ - dirname = os.path.basename(directory) - if split: - all_files = list( - _iter_valid_files(directory, white_list_formats, follow_links)) - num_files = len(all_files) - start, stop = int(split[0] * num_files), int(split[1] * num_files) - valid_files = all_files[start:stop] - else: - valid_files = _iter_valid_files(directory, white_list_formats, follow_links) - classes = [] - filenames = [] - for root, fname in valid_files: - classes.append(class_indices[dirname]) - absolute_path = os.path.join(root, fname) - relative_path = os.path.join(dirname, - os.path.relpath(absolute_path, directory)) - filenames.append(relative_path) - - return classes, filenames - - -class BatchFromFilesMixin(): - """Adds methods related to getting batches from filenames. - - It includes the logic to transform image files to batches. - """ - - def set_processing_attrs(self, image_data_generator, target_size, color_mode, - data_format, save_to_dir, save_prefix, save_format, - subset, interpolation, keep_aspect_ratio): - """Sets attributes to use later for processing files into a batch. + def set_processing_attrs( + self, + image_data_generator, + target_size, + color_mode, + data_format, + save_to_dir, + save_prefix, + save_format, + subset, + interpolation, + keep_aspect_ratio, + ): + """Sets attributes to use later for processing files into a batch. + + Args: + image_data_generator: Instance of `ImageDataGenerator` + to use for random transformations and normalization. + target_size: tuple of integers, dimensions to resize input images + to. + color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. + Color mode to read images. + data_format: String, one of `channels_first`, `channels_last`. + save_to_dir: Optional directory where to save the pictures + being yielded, in a viewable format. This is useful + for visualizing the random transformations being + applied, for debugging purposes. + save_prefix: String prefix to use for saving sample + images (if `save_to_dir` is set). + save_format: Format to use for saving sample images + (if `save_to_dir` is set). + subset: Subset of data (`"training"` or `"validation"`) if + validation_split is set in ImageDataGenerator. + interpolation: Interpolation method used to resample the image if + the target size is different from that of the loaded image. + Supported methods are "nearest", "bilinear", and "bicubic". If + PIL version 1.1.3 or newer is installed, "lanczos" is also + supported. If PIL version 3.4.0 or newer is installed, "box" and + "hamming" are also supported. By default, "nearest" is used. + keep_aspect_ratio: Boolean, whether to resize images to a target + size without aspect ratio distortion. The image is cropped in + the center with target aspect ratio before resizing. + """ + self.image_data_generator = image_data_generator + self.target_size = tuple(target_size) + self.keep_aspect_ratio = keep_aspect_ratio + if color_mode not in {"rgb", "rgba", "grayscale"}: + raise ValueError( + "Invalid color mode:", + color_mode, + '; expected "rgb", "rgba", or "grayscale".', + ) + self.color_mode = color_mode + self.data_format = data_format + if self.color_mode == "rgba": + if self.data_format == "channels_last": + self.image_shape = self.target_size + (4,) + else: + self.image_shape = (4,) + self.target_size + elif self.color_mode == "rgb": + if self.data_format == "channels_last": + self.image_shape = self.target_size + (3,) + else: + self.image_shape = (3,) + self.target_size + else: + if self.data_format == "channels_last": + self.image_shape = self.target_size + (1,) + else: + self.image_shape = (1,) + self.target_size + self.save_to_dir = save_to_dir + self.save_prefix = save_prefix + self.save_format = save_format + self.interpolation = interpolation + if subset is not None: + validation_split = self.image_data_generator._validation_split + if subset == "validation": + split = (0, validation_split) + elif subset == "training": + split = (validation_split, 1) + else: + raise ValueError( + "Invalid subset name: %s;" + 'expected "training" or "validation"' % (subset,) + ) + else: + split = None + self.split = split + self.subset = subset + + def _get_batches_of_transformed_samples(self, index_array): + """Gets a batch of transformed samples. + + Args: + index_array: Array of sample indices to include in batch. + Returns: + A batch of transformed samples. + """ + batch_x = np.zeros( + (len(index_array),) + self.image_shape, dtype=self.dtype + ) + # build batch of image data + # self.filepaths is dynamic, is better to call it once outside the loop + filepaths = self.filepaths + for i, j in enumerate(index_array): + img = image_utils.load_img( + filepaths[j], + color_mode=self.color_mode, + target_size=self.target_size, + interpolation=self.interpolation, + keep_aspect_ratio=self.keep_aspect_ratio, + ) + x = image_utils.img_to_array(img, data_format=self.data_format) + # Pillow images should be closed after `load_img`, + # but not PIL images. + if hasattr(img, "close"): + img.close() + if self.image_data_generator: + params = self.image_data_generator.get_random_transform(x.shape) + x = self.image_data_generator.apply_transform(x, params) + x = self.image_data_generator.standardize(x) + batch_x[i] = x + # optionally save augmented images to disk for debugging purposes + if self.save_to_dir: + for i, j in enumerate(index_array): + img = image_utils.array_to_img( + batch_x[i], self.data_format, scale=True + ) + fname = "{prefix}_{index}_{hash}.{format}".format( + prefix=self.save_prefix, + index=j, + hash=np.random.randint(1e7), + format=self.save_format, + ) + img.save(os.path.join(self.save_to_dir, fname)) + # build batch of labels + if self.class_mode == "input": + batch_y = batch_x.copy() + elif self.class_mode in {"binary", "sparse"}: + batch_y = np.empty(len(batch_x), dtype=self.dtype) + for i, n_observation in enumerate(index_array): + batch_y[i] = self.classes[n_observation] + elif self.class_mode == "categorical": + batch_y = np.zeros( + (len(batch_x), len(self.class_indices)), dtype=self.dtype + ) + for i, n_observation in enumerate(index_array): + batch_y[i, self.classes[n_observation]] = 1.0 + elif self.class_mode == "multi_output": + batch_y = [output[index_array] for output in self.labels] + elif self.class_mode == "raw": + batch_y = self.labels[index_array] + else: + return batch_x + if self.sample_weight is None: + return batch_x, batch_y + else: + return batch_x, batch_y, self.sample_weight[index_array] + + @property + def filepaths(self): + """List of absolute paths to image files.""" + raise NotImplementedError( + "`filepaths` property method has not " + "been implemented in {}.".format(type(self).__name__) + ) + + @property + def labels(self): + """Class labels of every observation.""" + raise NotImplementedError( + "`labels` property method has not been implemented in {}.".format( + type(self).__name__ + ) + ) + + @property + def sample_weight(self): + raise NotImplementedError( + "`sample_weight` property method has not " + "been implemented in {}.".format(type(self).__name__) + ) + + +@keras_export("keras.preprocessing.image.DirectoryIterator") +class DirectoryIterator(BatchFromFilesMixin, Iterator): + """Iterator capable of reading images from a directory on disk. + + Deprecated: `tf.keras.preprocessing.image.DirectoryIterator` is not + recommended for new code. Prefer loading images with + `tf.keras.utils.image_dataset_from_directory` and transforming the output + `tf.data.Dataset` with preprocessing layers. For more information, see the + tutorials for [loading images]( + https://www.tensorflow.org/tutorials/load_data/images) and + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). Args: - image_data_generator: Instance of `ImageDataGenerator` - to use for random transformations and normalization. - target_size: tuple of integers, dimensions to resize input images - to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. - Color mode to read images. + directory: Path to the directory to read images from. Each subdirectory + in this directory will be considered to contain images from one class, + or alternatively you could specify class subdirectories via the + `classes` argument. + image_data_generator: Instance of `ImageDataGenerator` to use for random + transformations and normalization. + target_size: tuple of integers, dimensions to resize input images to. + color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read + images. + classes: Optional list of strings, names of subdirectories containing + images from each class (e.g. `["dogs", "cats"]`). It will be computed + automatically if not set. + class_mode: Mode for yielding the targets: + - `"binary"`: binary targets (if there are only two classes), + - `"categorical"`: categorical targets, + - `"sparse"`: integer targets, + - `"input"`: targets are images identical to input images (mainly + used to work with autoencoders), + - `None`: no targets get yielded (only input images are yielded). + batch_size: Integer, size of a batch. + shuffle: Boolean, whether to shuffle the data between epochs. + seed: Random seed for data shuffling. data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). + save_to_dir: Optional directory where to save the pictures being + yielded, in a viewable format. This is useful for visualizing the + random transformations being applied, for debugging purposes. + save_prefix: String prefix to use for saving sample images (if + `save_to_dir` is set). + save_format: Format to use for saving sample images (if `save_to_dir` is + set). subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. + validation_split is set in ImageDataGenerator. interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are "nearest", "bilinear", and "bicubic". - If PIL version 1.1.3 or newer is installed, "lanczos" is also - supported. If PIL version 3.4.0 or newer is installed, "box" and - "hamming" are also supported. By default, "nearest" is used. + target size is different from that of the loaded image. Supported + methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 + or newer is installed, "lanczos" is also supported. If PIL version + 3.4.0 or newer is installed, "box" and "hamming" are also supported. + By default, "nearest" is used. keep_aspect_ratio: Boolean, whether to resize images to a target size without aspect ratio distortion. The image is cropped in the center with target aspect ratio before resizing. + dtype: Dtype to use for generated arrays. """ - self.image_data_generator = image_data_generator - self.target_size = tuple(target_size) - self.keep_aspect_ratio = keep_aspect_ratio - if color_mode not in {'rgb', 'rgba', 'grayscale'}: - raise ValueError('Invalid color mode:', color_mode, - '; expected "rgb", "rgba", or "grayscale".') - self.color_mode = color_mode - self.data_format = data_format - if self.color_mode == 'rgba': - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (4,) - else: - self.image_shape = (4,) + self.target_size - elif self.color_mode == 'rgb': - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (3,) - else: - self.image_shape = (3,) + self.target_size - else: - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (1,) - else: - self.image_shape = (1,) + self.target_size - self.save_to_dir = save_to_dir - self.save_prefix = save_prefix - self.save_format = save_format - self.interpolation = interpolation - if subset is not None: - validation_split = self.image_data_generator._validation_split # pylint: disable=protected-access - if subset == 'validation': - split = (0, validation_split) - elif subset == 'training': - split = (validation_split, 1) - else: - raise ValueError('Invalid subset name: %s;' - 'expected "training" or "validation"' % (subset,)) - else: - split = None - self.split = split - self.subset = subset - def _get_batches_of_transformed_samples(self, index_array): - """Gets a batch of transformed samples. + allowed_class_modes = {"categorical", "binary", "sparse", "input", None} - Args: - index_array: Array of sample indices to include in batch. - Returns: - A batch of transformed samples. - """ - batch_x = np.zeros((len(index_array),) + self.image_shape, dtype=self.dtype) - # build batch of image data - # self.filepaths is dynamic, is better to call it once outside the loop - filepaths = self.filepaths - for i, j in enumerate(index_array): - img = image_utils.load_img( - filepaths[j], - color_mode=self.color_mode, - target_size=self.target_size, - interpolation=self.interpolation, - keep_aspect_ratio=self.keep_aspect_ratio) - x = image_utils.img_to_array(img, data_format=self.data_format) - # Pillow images should be closed after `load_img`, - # but not PIL images. - if hasattr(img, 'close'): - img.close() - if self.image_data_generator: - params = self.image_data_generator.get_random_transform(x.shape) - x = self.image_data_generator.apply_transform(x, params) - x = self.image_data_generator.standardize(x) - batch_x[i] = x - # optionally save augmented images to disk for debugging purposes - if self.save_to_dir: - for i, j in enumerate(index_array): - img = image_utils.array_to_img(batch_x[i], self.data_format, scale=True) - fname = '{prefix}_{index}_{hash}.{format}'.format( - prefix=self.save_prefix, - index=j, - hash=np.random.randint(1e7), - format=self.save_format) - img.save(os.path.join(self.save_to_dir, fname)) - # build batch of labels - if self.class_mode == 'input': - batch_y = batch_x.copy() - elif self.class_mode in {'binary', 'sparse'}: - batch_y = np.empty(len(batch_x), dtype=self.dtype) - for i, n_observation in enumerate(index_array): - batch_y[i] = self.classes[n_observation] - elif self.class_mode == 'categorical': - batch_y = np.zeros((len(batch_x), len(self.class_indices)), - dtype=self.dtype) - for i, n_observation in enumerate(index_array): - batch_y[i, self.classes[n_observation]] = 1. - elif self.class_mode == 'multi_output': - batch_y = [output[index_array] for output in self.labels] - elif self.class_mode == 'raw': - batch_y = self.labels[index_array] - else: - return batch_x - if self.sample_weight is None: - return batch_x, batch_y - else: - return batch_x, batch_y, self.sample_weight[index_array] - - @property - def filepaths(self): - """List of absolute paths to image files.""" - raise NotImplementedError( - '`filepaths` property method has not been implemented in {}.'.format( - type(self).__name__)) - - @property - def labels(self): - """Class labels of every observation.""" - raise NotImplementedError( - '`labels` property method has not been implemented in {}.'.format( - type(self).__name__)) - - @property - def sample_weight(self): - raise NotImplementedError( - '`sample_weight` property method has not been implemented in {}.' - .format(type(self).__name__)) - - -@keras_export('keras.preprocessing.image.DirectoryIterator') -class DirectoryIterator(BatchFromFilesMixin, Iterator): - """Iterator capable of reading images from a directory on disk. - - Deprecated: `tf.keras.preprocessing.image.DirectoryIterator` is not - recommended for new code. Prefer loading images with - `tf.keras.utils.image_dataset_from_directory` and transforming the output - `tf.data.Dataset` with preprocessing layers. For more information, see the - tutorials for [loading images]( - https://www.tensorflow.org/tutorials/load_data/images) and - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - directory: Path to the directory to read images from. Each subdirectory in - this directory will be considered to contain images from one class, or - alternatively you could specify class subdirectories via the `classes` - argument. - image_data_generator: Instance of `ImageDataGenerator` to use for random - transformations and normalization. - target_size: tuple of integers, dimensions to resize input images to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read - images. - classes: Optional list of strings, names of subdirectories containing - images from each class (e.g. `["dogs", "cats"]`). It will be computed - automatically if not set. - class_mode: Mode for yielding the targets: - - `"binary"`: binary targets (if there are only two classes), - - `"categorical"`: categorical targets, - - `"sparse"`: integer targets, - - `"input"`: targets are images identical to input images (mainly used - to work with autoencoders), - - `None`: no targets get yielded (only input images are yielded). - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures being yielded, - in a viewable format. This is useful for visualizing the random - transformations being applied, for debugging purposes. - save_prefix: String prefix to use for saving sample images (if - `save_to_dir` is set). - save_format: Format to use for saving sample images (if `save_to_dir` is - set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. Supported - methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 - or newer is installed, "lanczos" is also supported. If PIL version 3.4.0 - or newer is installed, "box" and "hamming" are also supported. By - default, "nearest" is used. - keep_aspect_ratio: Boolean, whether to resize images to a target size - without aspect ratio distortion. The image is cropped in the center - with target aspect ratio before resizing. - dtype: Dtype to use for generated arrays. - """ - allowed_class_modes = {'categorical', 'binary', 'sparse', 'input', None} - - def __init__(self, - directory, - image_data_generator, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - data_format=None, - save_to_dir=None, - save_prefix='', - save_format='png', - follow_links=False, - subset=None, - interpolation='nearest', - keep_aspect_ratio=False, - dtype=None): - if data_format is None: - data_format = backend.image_data_format() - if dtype is None: - dtype = backend.floatx() - super().set_processing_attrs(image_data_generator, target_size, color_mode, - data_format, save_to_dir, save_prefix, - save_format, subset, interpolation, - keep_aspect_ratio) - self.directory = directory - self.classes = classes - if class_mode not in self.allowed_class_modes: - raise ValueError('Invalid class_mode: {}; expected one of: {}' - .format(class_mode, self.allowed_class_modes)) - self.class_mode = class_mode - self.dtype = dtype - # First, count the number of samples and classes. - self.samples = 0 - - if not classes: - classes = [] - for subdir in sorted(os.listdir(directory)): - if os.path.isdir(os.path.join(directory, subdir)): - classes.append(subdir) - self.num_classes = len(classes) - self.class_indices = dict(zip(classes, range(len(classes)))) - - pool = multiprocessing.pool.ThreadPool() - - # Second, build an index of the images - # in the different class subfolders. - results = [] - self.filenames = [] - i = 0 - for dirpath in (os.path.join(directory, subdir) for subdir in classes): - results.append( - pool.apply_async(_list_valid_filenames_in_directory, - (dirpath, self.white_list_formats, self.split, - self.class_indices, follow_links))) - classes_list = [] - for res in results: - classes, filenames = res.get() - classes_list.append(classes) - self.filenames += filenames - self.samples = len(self.filenames) - self.classes = np.zeros((self.samples,), dtype='int32') - for classes in classes_list: - self.classes[i:i + len(classes)] = classes - i += len(classes) - - print('Found %d images belonging to %d classes.' % - (self.samples, self.num_classes)) - pool.close() - pool.join() - self._filepaths = [ - os.path.join(self.directory, fname) for fname in self.filenames - ] - super().__init__(self.samples, batch_size, shuffle, seed) - - @property - def filepaths(self): - return self._filepaths - - @property - def labels(self): - return self.classes - - @property # mixin needs this property to work - def sample_weight(self): - # no sample weights will be returned - return None - - -@keras_export('keras.preprocessing.image.NumpyArrayIterator') + def __init__( + self, + directory, + image_data_generator, + target_size=(256, 256), + color_mode="rgb", + classes=None, + class_mode="categorical", + batch_size=32, + shuffle=True, + seed=None, + data_format=None, + save_to_dir=None, + save_prefix="", + save_format="png", + follow_links=False, + subset=None, + interpolation="nearest", + keep_aspect_ratio=False, + dtype=None, + ): + if data_format is None: + data_format = backend.image_data_format() + if dtype is None: + dtype = backend.floatx() + super().set_processing_attrs( + image_data_generator, + target_size, + color_mode, + data_format, + save_to_dir, + save_prefix, + save_format, + subset, + interpolation, + keep_aspect_ratio, + ) + self.directory = directory + self.classes = classes + if class_mode not in self.allowed_class_modes: + raise ValueError( + "Invalid class_mode: {}; expected one of: {}".format( + class_mode, self.allowed_class_modes + ) + ) + self.class_mode = class_mode + self.dtype = dtype + # First, count the number of samples and classes. + self.samples = 0 + + if not classes: + classes = [] + for subdir in sorted(os.listdir(directory)): + if os.path.isdir(os.path.join(directory, subdir)): + classes.append(subdir) + self.num_classes = len(classes) + self.class_indices = dict(zip(classes, range(len(classes)))) + + pool = multiprocessing.pool.ThreadPool() + + # Second, build an index of the images + # in the different class subfolders. + results = [] + self.filenames = [] + i = 0 + for dirpath in (os.path.join(directory, subdir) for subdir in classes): + results.append( + pool.apply_async( + _list_valid_filenames_in_directory, + ( + dirpath, + self.white_list_formats, + self.split, + self.class_indices, + follow_links, + ), + ) + ) + classes_list = [] + for res in results: + classes, filenames = res.get() + classes_list.append(classes) + self.filenames += filenames + self.samples = len(self.filenames) + self.classes = np.zeros((self.samples,), dtype="int32") + for classes in classes_list: + self.classes[i : i + len(classes)] = classes + i += len(classes) + + io_utils.print_msg( + f"Found {self.samples} images belonging to " + f"{self.num_classes} classes." + ) + pool.close() + pool.join() + self._filepaths = [ + os.path.join(self.directory, fname) for fname in self.filenames + ] + super().__init__(self.samples, batch_size, shuffle, seed) + + @property + def filepaths(self): + return self._filepaths + + @property + def labels(self): + return self.classes + + @property # mixin needs this property to work + def sample_weight(self): + # no sample weights will be returned + return None + + +@keras_export("keras.preprocessing.image.NumpyArrayIterator") class NumpyArrayIterator(Iterator): - """Iterator yielding data from a Numpy array. - - Deprecated: `tf.keras.preprocessing.image.NumpyArrayIterator` is not - recommended for new code. Prefer loading images with - `tf.keras.utils.image_dataset_from_directory` and transforming the output - `tf.data.Dataset` with preprocessing layers. For more information, see the - tutorials for [loading images]( - https://www.tensorflow.org/tutorials/load_data/images) and - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - x: Numpy array of input data or tuple. If tuple, the second elements is - either another numpy array or a list of numpy arrays, each of which gets - passed through as an output without any modifications. - y: Numpy array of targets data. - image_data_generator: Instance of `ImageDataGenerator` to use for random - transformations and normalization. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - sample_weight: Numpy array of sample weights. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures being yielded, - in a viewable format. This is useful for visualizing the random - transformations being applied, for debugging purposes. - save_prefix: String prefix to use for saving sample images (if - `save_to_dir` is set). - save_format: Format to use for saving sample images (if `save_to_dir` is - set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - ignore_class_split: Boolean (default: False), ignore difference - in number of classes in labels across train and validation - split (useful for non-classification tasks) - dtype: Dtype to use for the generated arrays. - """ - - def __init__(self, - x, - y, - image_data_generator, - batch_size=32, - shuffle=False, - sample_weight=None, - seed=None, - data_format=None, - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - ignore_class_split=False, - dtype=None): - if data_format is None: - data_format = backend.image_data_format() - if dtype is None: - dtype = backend.floatx() - self.dtype = dtype - if isinstance(x, tuple) or isinstance(x, list): - if not isinstance(x[1], list): - x_misc = [np.asarray(x[1])] - else: - x_misc = [np.asarray(xx) for xx in x[1]] - x = x[0] - for xx in x_misc: - if len(x) != len(xx): - raise ValueError('All of the arrays in `x` ' - 'should have the same length. ' - 'Found a pair with: len(x[0]) = %s, len(x[?]) = %s' % - (len(x), len(xx))) - else: - x_misc = [] - - if y is not None and len(x) != len(y): - raise ValueError('`x` (images tensor) and `y` (labels) ' - 'should have the same length. ' - 'Found: x.shape = %s, y.shape = %s' % - (np.asarray(x).shape, np.asarray(y).shape)) - if sample_weight is not None and len(x) != len(sample_weight): - raise ValueError('`x` (images tensor) and `sample_weight` ' - 'should have the same length. ' - 'Found: x.shape = %s, sample_weight.shape = %s' % - (np.asarray(x).shape, np.asarray(sample_weight).shape)) - if subset is not None: - if subset not in {'training', 'validation'}: - raise ValueError('Invalid subset name:', subset, - '; expected "training" or "validation".') - split_idx = int(len(x) * image_data_generator._validation_split) - - if (y is not None and not ignore_class_split and not np.array_equal( - np.unique(y[:split_idx]), np.unique(y[split_idx:]))): - raise ValueError('Training and validation subsets ' - 'have different number of classes after ' - 'the split. If your numpy arrays are ' - 'sorted by the label, you might want ' - 'to shuffle them.') - - if subset == 'validation': - x = x[:split_idx] - x_misc = [np.asarray(xx[:split_idx]) for xx in x_misc] - if y is not None: - y = y[:split_idx] - else: - x = x[split_idx:] - x_misc = [np.asarray(xx[split_idx:]) for xx in x_misc] - if y is not None: - y = y[split_idx:] - - self.x = np.asarray(x, dtype=self.dtype) - self.x_misc = x_misc - if self.x.ndim != 4: - raise ValueError( - 'Input data in `NumpyArrayIterator` ' - 'should have rank 4. You passed an array ' - 'with shape', self.x.shape) - channels_axis = 3 if data_format == 'channels_last' else 1 - if self.x.shape[channels_axis] not in {1, 3, 4}: - warnings.warn('NumpyArrayIterator is set to use the ' - 'data format convention "' + data_format + '" ' - '(channels on axis ' + str(channels_axis) + - '), i.e. expected either 1, 3, or 4 ' - 'channels on axis ' + str(channels_axis) + '. ' - 'However, it was passed an array with shape ' + - str(self.x.shape) + ' (' + - str(self.x.shape[channels_axis]) + ' channels).') - if y is not None: - self.y = np.asarray(y) - else: - self.y = None - if sample_weight is not None: - self.sample_weight = np.asarray(sample_weight) - else: - self.sample_weight = None - self.image_data_generator = image_data_generator - self.data_format = data_format - self.save_to_dir = save_to_dir - self.save_prefix = save_prefix - self.save_format = save_format - super().__init__(x.shape[0], batch_size, shuffle, seed) - - def _get_batches_of_transformed_samples(self, index_array): - batch_x = np.zeros( - tuple([len(index_array)] + list(self.x.shape)[1:]), dtype=self.dtype) - for i, j in enumerate(index_array): - x = self.x[j] - params = self.image_data_generator.get_random_transform(x.shape) - x = self.image_data_generator.apply_transform( - x.astype(self.dtype), params) - x = self.image_data_generator.standardize(x) - batch_x[i] = x - - if self.save_to_dir: - for i, j in enumerate(index_array): - img = image_utils.array_to_img(batch_x[i], self.data_format, scale=True) - fname = '{prefix}_{index}_{hash}.{format}'.format( - prefix=self.save_prefix, - index=j, - hash=np.random.randint(1e4), - format=self.save_format) - img.save(os.path.join(self.save_to_dir, fname)) - batch_x_miscs = [xx[index_array] for xx in self.x_misc] - output = (batch_x if not batch_x_miscs else [batch_x] + batch_x_miscs,) - if self.y is None: - return output[0] - output += (self.y[index_array],) - if self.sample_weight is not None: - output += (self.sample_weight[index_array],) - return output - - -def validate_filename(filename, white_list_formats): - """Check if a filename refers to a valid file. - - Args: - filename: String, absolute path to a file - white_list_formats: Set, allowed file extensions - Returns: - A boolean value indicating if the filename is valid or not - """ - return (filename.lower().endswith(white_list_formats) and - os.path.isfile(filename)) - - -class DataFrameIterator(BatchFromFilesMixin, Iterator): - """Iterator capable of reading images from a directory on disk as a dataframe. - - Args: - dataframe: Pandas dataframe containing the filepaths relative to - `directory` (or absolute paths if `directory` is None) of the images in - a string column. It should include other column/s depending on the - `class_mode`: - if `class_mode` is `"categorical"` (default value) it - must include the `y_col` column with the class/es of each image. - Values in column can be string/list/tuple if a single class or - list/tuple if multiple classes. - if `class_mode` is `"binary"` or - `"sparse"` it must include the given `y_col` column with class values - as strings. - if `class_mode` is `"raw"` or `"multi_output"` it should - contain the columns specified in `y_col`. - if `class_mode` is - `"input"` or `None` no extra column is needed. - directory: string, path to the directory to read images from. If `None`, - data in `x_col` column should be absolute paths. - image_data_generator: Instance of `ImageDataGenerator` to use for random - transformations and normalization. If None, no transformations and - normalizations are made. - x_col: string, column in `dataframe` that contains the filenames (or - absolute paths if `directory` is `None`). - y_col: string or list, column/s in `dataframe` that has the target data. - weight_col: string, column in `dataframe` that contains the sample - weights. Default: `None`. - target_size: tuple of integers, dimensions to resize input images to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read - images. - classes: Optional list of strings, classes to use (e.g. `["dogs", - "cats"]`). If None, all classes in `y_col` will be used. - class_mode: one of "binary", "categorical", "input", "multi_output", - "raw", "sparse" or None. Default: "categorical". - Mode for yielding the targets: - - `"binary"`: 1D numpy array of binary labels, - - `"categorical"`: 2D numpy array of one-hot encoded labels. Supports - multi-label output. - - `"input"`: images identical to input images (mainly used to work - with autoencoders), - - `"multi_output"`: list with the values of the different columns, - - `"raw"`: numpy array of values in `y_col` column(s), - - `"sparse"`: 1D numpy array of integer labels, - `None`, no targets - are returned (the generator will only yield batches of image data, - which is useful to use in `model.predict()`). - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures being yielded, - in a viewable format. This is useful for visualizing the random - transformations being applied, for debugging purposes. - save_prefix: String prefix to use for saving sample images (if - `save_to_dir` is set). - save_format: Format to use for saving sample images (if `save_to_dir` is - set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. Supported - methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 - or newer is installed, "lanczos" is also supported. If PIL version 3.4.0 - or newer is installed, "box" and "hamming" are also supported. By - default, "nearest" is used. - keep_aspect_ratio: Boolean, whether to resize images to a target size - without aspect ratio distortion. The image is cropped in the center - with target aspect ratio before resizing. - dtype: Dtype to use for the generated arrays. - validate_filenames: Boolean, whether to validate image filenames in - `x_col`. If `True`, invalid images will be ignored. Disabling this - option can lead to speed-up in the instantiation of this class. Default: - `True`. - """ - allowed_class_modes = { - 'binary', 'categorical', 'input', 'multi_output', 'raw', 'sparse', None - } - - def __init__(self, - dataframe, - directory=None, - image_data_generator=None, - x_col='filename', - y_col='class', - weight_col=None, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - data_format='channels_last', - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - interpolation='nearest', - keep_aspect_ratio=False, - dtype='float32', - validate_filenames=True): - super().set_processing_attrs(image_data_generator, target_size, color_mode, - data_format, save_to_dir, save_prefix, - save_format, subset, interpolation, - keep_aspect_ratio) - df = dataframe.copy() - self.directory = directory or '' - self.class_mode = class_mode - self.dtype = dtype - # check that inputs match the required class_mode - self._check_params(df, x_col, y_col, weight_col, classes) - if validate_filenames: # check which image files are valid and keep them - df = self._filter_valid_filepaths(df, x_col) - if class_mode not in ['input', 'multi_output', 'raw', None]: - df, classes = self._filter_classes(df, y_col, classes) - num_classes = len(classes) - # build an index of all the unique classes - self.class_indices = dict(zip(classes, range(len(classes)))) - # retrieve only training or validation set - if self.split: - num_files = len(df) - start = int(self.split[0] * num_files) - stop = int(self.split[1] * num_files) - df = df.iloc[start:stop, :] - # get labels for each observation - if class_mode not in ['input', 'multi_output', 'raw', None]: - self.classes = self.get_classes(df, y_col) - self.filenames = df[x_col].tolist() - self._sample_weight = df[weight_col].values if weight_col else None - - if class_mode == 'multi_output': - self._targets = [np.array(df[col].tolist()) for col in y_col] - if class_mode == 'raw': - self._targets = df[y_col].values - self.samples = len(self.filenames) - validated_string = 'validated' if validate_filenames else 'non-validated' - if class_mode in ['input', 'multi_output', 'raw', None]: - print(f'Found {self.samples} {validated_string} image filenames.') - else: - print(f'Found {self.samples} {validated_string} image filenames ' - f'belonging to {num_classes} classes.') - self._filepaths = [ - os.path.join(self.directory, fname) for fname in self.filenames - ] - super().__init__(self.samples, batch_size, shuffle, seed) - - def _check_params(self, df, x_col, y_col, weight_col, classes): - # check class mode is one of the currently supported - if self.class_mode not in self.allowed_class_modes: - raise ValueError('Invalid class_mode: {}; expected one of: {}'.format( - self.class_mode, self.allowed_class_modes)) - # check that y_col has several column names if class_mode is multi_output - if (self.class_mode == 'multi_output') and not isinstance(y_col, list): - raise TypeError( - 'If class_mode="{}", y_col must be a list. Received {}.'.format( - self.class_mode, - type(y_col).__name__)) - # check that filenames/filepaths column values are all strings - if not all(df[x_col].apply(lambda x: isinstance(x, str))): - raise TypeError( - 'All values in column x_col={} must be strings.'.format(x_col)) - # check labels are string if class_mode is binary or sparse - if self.class_mode in {'binary', 'sparse'}: - if not all(df[y_col].apply(lambda x: isinstance(x, str))): - raise TypeError('If class_mode="{}", y_col="{}" column ' - 'values must be strings.'.format( - self.class_mode, y_col)) - # check that if binary there are only 2 different classes - if self.class_mode == 'binary': - if classes: - classes = set(classes) - if len(classes) != 2: - raise ValueError('If class_mode="binary" there must be 2 ' - 'classes. {} class/es were given.'.format( - len(classes))) - elif df[y_col].nunique() != 2: - raise ValueError('If class_mode="binary" there must be 2 classes. ' - 'Found {} classes.'.format(df[y_col].nunique())) - # check values are string, list or tuple if class_mode is categorical - if self.class_mode == 'categorical': - types = (str, list, tuple) - if not all(df[y_col].apply(lambda x: isinstance(x, types))): - raise TypeError('If class_mode="{}", y_col="{}" column ' - 'values must be type string, list or tuple.'.format( - self.class_mode, y_col)) - # raise warning if classes are given but will be unused - if classes and self.class_mode in {'input', 'multi_output', 'raw', None}: - warnings.warn( - '`classes` will be ignored given the class_mode="{}"'.format( - self.class_mode)) - # check that if weight column that the values are numerical - if weight_col and not issubclass(df[weight_col].dtype.type, np.number): - raise TypeError( - 'Column weight_col={} must be numeric.'.format(weight_col)) - - def get_classes(self, df, y_col): - labels = [] - for label in df[y_col]: - if isinstance(label, (list, tuple)): - labels.append([self.class_indices[lbl] for lbl in label]) - else: - labels.append(self.class_indices[label]) - return labels - - @staticmethod - def _filter_classes(df, y_col, classes): - df = df.copy() - - def remove_classes(labels, classes): - if isinstance(labels, (list, tuple)): - labels = [cls for cls in labels if cls in classes] - return labels or None - elif isinstance(labels, str): - return labels if labels in classes else None - else: - raise TypeError( - 'Expect string, list or tuple but found {} in {} column '.format( - type(labels), y_col)) - - if classes: - # prepare for membership lookup - classes = list(collections.OrderedDict.fromkeys(classes).keys()) - df[y_col] = df[y_col].apply(lambda x: remove_classes(x, classes)) - else: - classes = set() - for v in df[y_col]: - if isinstance(v, (list, tuple)): - classes.update(v) - else: - classes.add(v) - classes = sorted(classes) - return df.dropna(subset=[y_col]), classes - - def _filter_valid_filepaths(self, df, x_col): - """Keep only dataframe rows with valid filenames. + """Iterator yielding data from a Numpy array. + + Deprecated: `tf.keras.preprocessing.image.NumpyArrayIterator` is not + recommended for new code. Prefer loading images with + `tf.keras.utils.image_dataset_from_directory` and transforming the output + `tf.data.Dataset` with preprocessing layers. For more information, see the + tutorials for [loading images]( + https://www.tensorflow.org/tutorials/load_data/images) and + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). Args: - df: Pandas dataframe containing filenames in a column - x_col: string, column in `df` that contains the filenames or filepaths - Returns: - absolute paths to image files - """ - filepaths = df[x_col].map(lambda fname: os.path.join(self.directory, fname)) - mask = filepaths.apply(validate_filename, args=(self.white_list_formats,)) - n_invalid = (~mask).sum() - if n_invalid: - warnings.warn('Found {} invalid image filename(s) in x_col="{}". ' - 'These filename(s) will be ignored.'.format( - n_invalid, x_col)) - return df[mask] - - @property - def filepaths(self): - return self._filepaths - - @property - def labels(self): - if self.class_mode in {'multi_output', 'raw'}: - return self._targets - else: - return self.classes - - @property - def sample_weight(self): - return self._sample_weight - - -def flip_axis(x, axis): - x = np.asarray(x).swapaxes(axis, 0) - x = x[::-1, ...] - x = x.swapaxes(0, axis) - return x - - -@keras_export('keras.preprocessing.image.ImageDataGenerator') -class ImageDataGenerator(): - """Generate batches of tensor image data with real-time data augmentation. - - Deprecated: `tf.keras.preprocessing.image.ImageDataGenerator` is not - recommended for new code. Prefer loading images with - `tf.keras.utils.image_dataset_from_directory` and transforming the output - `tf.data.Dataset` with preprocessing layers. For more information, see the - tutorials for [loading images]( - https://www.tensorflow.org/tutorials/load_data/images) and - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - The data will be looped over (in batches). - - Args: - featurewise_center: Boolean. Set input mean to 0 over the dataset, - feature-wise. - samplewise_center: Boolean. Set each sample mean to 0. - featurewise_std_normalization: Boolean. Divide inputs by std of the - dataset, feature-wise. - samplewise_std_normalization: Boolean. Divide each input by its std. - zca_epsilon: epsilon for ZCA whitening. Default is 1e-6. - zca_whitening: Boolean. Apply ZCA whitening. - rotation_range: Int. Degree range for random rotations. - width_shift_range: Float, 1-D array-like or int - - float: fraction of total width, if < 1, or pixels if >= 1. - - 1-D array-like: random elements from the array. - - int: integer number of pixels from interval `(-width_shift_range, - +width_shift_range)` - With `width_shift_range=2` possible values - are integers `[-1, 0, +1]`, same as with `width_shift_range=[-1, 0, - +1]`, while with `width_shift_range=1.0` possible values are floats - in the interval [-1.0, +1.0). - height_shift_range: Float, 1-D array-like or int - - float: fraction of total height, if < 1, or pixels if >= 1. - - 1-D array-like: random elements from the array. - - int: integer number of pixels from interval `(-height_shift_range, - +height_shift_range)` - With `height_shift_range=2` possible values - are integers `[-1, 0, +1]`, same as with `height_shift_range=[-1, 0, - +1]`, while with `height_shift_range=1.0` possible values are floats - in the interval [-1.0, +1.0). - brightness_range: Tuple or list of two floats. Range for picking a - brightness shift value from. - shear_range: Float. Shear Intensity (Shear angle in counter-clockwise - direction in degrees) - zoom_range: Float or [lower, upper]. Range for random zoom. If a float, - `[lower, upper] = [1-zoom_range, 1+zoom_range]`. - channel_shift_range: Float. Range for random channel shifts. - fill_mode: One of {"constant", "nearest", "reflect" or "wrap"}. Default is - 'nearest'. Points outside the boundaries of the input are filled - according to the given mode: - - 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k) - - 'nearest': aaaaaaaa|abcd|dddddddd - - 'reflect': abcddcba|abcd|dcbaabcd - - 'wrap': abcdabcd|abcd|abcdabcd - cval: Float or Int. Value used for points outside the boundaries when - `fill_mode = "constant"`. - horizontal_flip: Boolean. Randomly flip inputs horizontally. - vertical_flip: Boolean. Randomly flip inputs vertically. - rescale: rescaling factor. Defaults to None. If None or 0, no rescaling is - applied, otherwise we multiply the data by the value provided (after - applying all other transformations). - preprocessing_function: function that will be applied on each input. The - function will run after the image is resized and augmented. - The function should take one argument: one image (Numpy tensor with - rank 3), and should output a Numpy tensor with the same shape. - data_format: Image data format, either "channels_first" or - "channels_last". "channels_last" mode means that the images should have - shape `(samples, height, width, channels)`, "channels_first" mode means - that the images should have shape `(samples, channels, height, width)`. - It defaults to the `image_data_format` value found in your Keras config - file at `~/.keras/keras.json`. If you never set it, then it will be - "channels_last". - validation_split: Float. Fraction of images reserved for validation - (strictly between 0 and 1). - dtype: Dtype to use for the generated arrays. - - Raises: - ValueError: If the value of the argument, `data_format` is other than - `"channels_last"` or `"channels_first"`. - ValueError: If the value of the argument, `validation_split` > 1 - or `validation_split` < 0. - - Examples: - - Example of using `.flow(x, y)`: - - ```python - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - y_train = utils.to_categorical(y_train, num_classes) - y_test = utils.to_categorical(y_test, num_classes) - datagen = ImageDataGenerator( - featurewise_center=True, - featurewise_std_normalization=True, - rotation_range=20, - width_shift_range=0.2, - height_shift_range=0.2, - horizontal_flip=True, - validation_split=0.2) - # compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied) - datagen.fit(x_train) - # fits the model on batches with real-time data augmentation: - model.fit(datagen.flow(x_train, y_train, batch_size=32, - subset='training'), - validation_data=datagen.flow(x_train, y_train, - batch_size=8, subset='validation'), - steps_per_epoch=len(x_train) / 32, epochs=epochs) - # here's a more "manual" example - for e in range(epochs): - print('Epoch', e) - batches = 0 - for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32): - model.fit(x_batch, y_batch) - batches += 1 - if batches >= len(x_train) / 32: - # we need to break the loop by hand because - # the generator loops indefinitely - break - ``` - - Example of using `.flow_from_directory(directory)`: - - ```python - train_datagen = ImageDataGenerator( - rescale=1./255, - shear_range=0.2, - zoom_range=0.2, - horizontal_flip=True) - test_datagen = ImageDataGenerator(rescale=1./255) - train_generator = train_datagen.flow_from_directory( - 'data/train', - target_size=(150, 150), - batch_size=32, - class_mode='binary') - validation_generator = test_datagen.flow_from_directory( - 'data/validation', - target_size=(150, 150), - batch_size=32, - class_mode='binary') - model.fit( - train_generator, - steps_per_epoch=2000, - epochs=50, - validation_data=validation_generator, - validation_steps=800) - ``` - - Example of transforming images and masks together. - - ```python - # we create two instances with the same arguments - data_gen_args = dict(featurewise_center=True, - featurewise_std_normalization=True, - rotation_range=90, - width_shift_range=0.1, - height_shift_range=0.1, - zoom_range=0.2) - image_datagen = ImageDataGenerator(**data_gen_args) - mask_datagen = ImageDataGenerator(**data_gen_args) - # Provide the same seed and keyword arguments to the fit and flow methods - seed = 1 - image_datagen.fit(images, augment=True, seed=seed) - mask_datagen.fit(masks, augment=True, seed=seed) - image_generator = image_datagen.flow_from_directory( - 'data/images', - class_mode=None, - seed=seed) - mask_generator = mask_datagen.flow_from_directory( - 'data/masks', - class_mode=None, - seed=seed) - # combine generators into one which yields image and masks - train_generator = zip(image_generator, mask_generator) - model.fit( - train_generator, - steps_per_epoch=2000, - epochs=50) - ``` - """ - - def __init__(self, - featurewise_center=False, - samplewise_center=False, - featurewise_std_normalization=False, - samplewise_std_normalization=False, - zca_whitening=False, - zca_epsilon=1e-6, - rotation_range=0, - width_shift_range=0., - height_shift_range=0., - brightness_range=None, - shear_range=0., - zoom_range=0., - channel_shift_range=0., - fill_mode='nearest', - cval=0., - horizontal_flip=False, - vertical_flip=False, - rescale=None, - preprocessing_function=None, - data_format=None, - validation_split=0.0, - interpolation_order=1, - dtype=None): - if data_format is None: - data_format = backend.image_data_format() - if dtype is None: - dtype = backend.floatx() - - self.featurewise_center = featurewise_center - self.samplewise_center = samplewise_center - self.featurewise_std_normalization = featurewise_std_normalization - self.samplewise_std_normalization = samplewise_std_normalization - self.zca_whitening = zca_whitening - self.zca_epsilon = zca_epsilon - self.rotation_range = rotation_range - self.width_shift_range = width_shift_range - self.height_shift_range = height_shift_range - self.shear_range = shear_range - self.zoom_range = zoom_range - self.channel_shift_range = channel_shift_range - self.fill_mode = fill_mode - self.cval = cval - self.horizontal_flip = horizontal_flip - self.vertical_flip = vertical_flip - self.rescale = rescale - self.preprocessing_function = preprocessing_function - self.dtype = dtype - self.interpolation_order = interpolation_order - - if data_format not in {'channels_last', 'channels_first'}: - raise ValueError('`data_format` should be `"channels_last"` ' - '(channel after row and column) or ' - '`"channels_first"` (channel before row and column). ' - 'Received: %s' % data_format) - self.data_format = data_format - if data_format == 'channels_first': - self.channel_axis = 1 - self.row_axis = 2 - self.col_axis = 3 - if data_format == 'channels_last': - self.channel_axis = 3 - self.row_axis = 1 - self.col_axis = 2 - if validation_split and not 0 < validation_split < 1: - raise ValueError('`validation_split` must be strictly between 0 and 1. ' - ' Received: %s' % validation_split) - self._validation_split = validation_split - - self.mean = None - self.std = None - self.zca_whitening_matrix = None - - if isinstance(zoom_range, (float, int)): - self.zoom_range = [1 - zoom_range, 1 + zoom_range] - elif (len(zoom_range) == 2 and - all(isinstance(val, (float, int)) for val in zoom_range)): - self.zoom_range = [zoom_range[0], zoom_range[1]] - else: - raise ValueError('`zoom_range` should be a float or ' - 'a tuple or list of two floats. ' - 'Received: %s' % (zoom_range,)) - if zca_whitening: - if not featurewise_center: - self.featurewise_center = True - warnings.warn('This ImageDataGenerator specifies ' - '`zca_whitening`, which overrides ' - 'setting of `featurewise_center`.') - if featurewise_std_normalization: - self.featurewise_std_normalization = False - warnings.warn('This ImageDataGenerator specifies ' - '`zca_whitening` ' - 'which overrides setting of' - '`featurewise_std_normalization`.') - if featurewise_std_normalization: - if not featurewise_center: - self.featurewise_center = True - warnings.warn('This ImageDataGenerator specifies ' - '`featurewise_std_normalization`, ' - 'which overrides setting of ' - '`featurewise_center`.') - if samplewise_std_normalization: - if not samplewise_center: - self.samplewise_center = True - warnings.warn('This ImageDataGenerator specifies ' - '`samplewise_std_normalization`, ' - 'which overrides setting of ' - '`samplewise_center`.') - if brightness_range is not None: - if (not isinstance(brightness_range, (tuple, list)) or - len(brightness_range) != 2): - raise ValueError( - '`brightness_range should be tuple or list of two floats. ' - 'Received: %s' % (brightness_range,)) - self.brightness_range = brightness_range - - def flow(self, - x, - y=None, - batch_size=32, - shuffle=True, - sample_weight=None, - seed=None, - save_to_dir=None, - save_prefix='', - save_format='png', - ignore_class_split=False, - subset=None): - """Takes data & label arrays, generates batches of augmented data. - - Args: - x: Input data. Numpy array of rank 4 or a tuple. If tuple, the first - element should contain the images and the second element another numpy - array or a list of numpy arrays that gets passed to the output without - any modifications. Can be used to feed the model miscellaneous data - along with the images. In case of grayscale data, the channels axis of - the image array should have value 1, in case of RGB data, it should - have value 3, and in case of RGBA data, it should have value 4. - y: Labels. - batch_size: Int (default: 32). - shuffle: Boolean (default: True). - sample_weight: Sample weights. - seed: Int (default: None). - save_to_dir: None or str (default: None). This allows you to optionally - specify a directory to which to save the augmented pictures being - generated (useful for visualizing what you are doing). - save_prefix: Str (default: `''`). Prefix to use for filenames of saved - pictures (only relevant if `save_to_dir` is set). - save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif", "tif", - "jpg" (only relevant if `save_to_dir` is set). Default: "png". + x: Numpy array of input data or tuple. If tuple, the second elements is + either another numpy array or a list of numpy arrays, each of which + gets passed through as an output without any modifications. + y: Numpy array of targets data. + image_data_generator: Instance of `ImageDataGenerator` to use for random + transformations and normalization. + batch_size: Integer, size of a batch. + shuffle: Boolean, whether to shuffle the data between epochs. + sample_weight: Numpy array of sample weights. + seed: Random seed for data shuffling. + data_format: String, one of `channels_first`, `channels_last`. + save_to_dir: Optional directory where to save the pictures being + yielded, in a viewable format. This is useful for visualizing the + random transformations being applied, for debugging purposes. + save_prefix: String prefix to use for saving sample images (if + `save_to_dir` is set). + save_format: Format to use for saving sample images (if `save_to_dir` is + set). + subset: Subset of data (`"training"` or `"validation"`) if + validation_split is set in ImageDataGenerator. ignore_class_split: Boolean (default: False), ignore difference in number of classes in labels across train and validation split (useful for non-classification tasks) - subset: Subset of data (`"training"` or `"validation"`) if - `validation_split` is set in `ImageDataGenerator`. - - Returns: - An `Iterator` yielding tuples of `(x, y)` - where `x` is a numpy array of image data - (in the case of a single image input) or a list - of numpy arrays (in the case with - additional inputs) and `y` is a numpy array - of corresponding labels. If 'sample_weight' is not None, - the yielded tuples are of the form `(x, y, sample_weight)`. - If `y` is None, only the numpy array `x` is returned. - Raises: - ValueError: If the Value of the argument, `subset` is other than - "training" or "validation". - + dtype: Dtype to use for the generated arrays. """ - return NumpyArrayIterator( + + def __init__( + self, x, y, - self, - batch_size=batch_size, - shuffle=shuffle, - sample_weight=sample_weight, - seed=seed, - data_format=self.data_format, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - ignore_class_split=ignore_class_split, - subset=subset, - dtype=self.dtype) - - def flow_from_directory(self, - directory, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - save_to_dir=None, - save_prefix='', - save_format='png', - follow_links=False, - subset=None, - interpolation='nearest', - keep_aspect_ratio=False): - """Takes the path to a directory & generates batches of augmented data. + image_data_generator, + batch_size=32, + shuffle=False, + sample_weight=None, + seed=None, + data_format=None, + save_to_dir=None, + save_prefix="", + save_format="png", + subset=None, + ignore_class_split=False, + dtype=None, + ): + if data_format is None: + data_format = backend.image_data_format() + if dtype is None: + dtype = backend.floatx() + self.dtype = dtype + if isinstance(x, tuple) or isinstance(x, list): + if not isinstance(x[1], list): + x_misc = [np.asarray(x[1])] + else: + x_misc = [np.asarray(xx) for xx in x[1]] + x = x[0] + for xx in x_misc: + if len(x) != len(xx): + raise ValueError( + "All of the arrays in `x` " + "should have the same length. " + "Found a pair with: len(x[0]) = %s, len(x[?]) = %s" + % (len(x), len(xx)) + ) + else: + x_misc = [] + + if y is not None and len(x) != len(y): + raise ValueError( + "`x` (images tensor) and `y` (labels) " + "should have the same length. " + "Found: x.shape = %s, y.shape = %s" + % (np.asarray(x).shape, np.asarray(y).shape) + ) + if sample_weight is not None and len(x) != len(sample_weight): + raise ValueError( + "`x` (images tensor) and `sample_weight` " + "should have the same length. " + "Found: x.shape = %s, sample_weight.shape = %s" + % (np.asarray(x).shape, np.asarray(sample_weight).shape) + ) + if subset is not None: + if subset not in {"training", "validation"}: + raise ValueError( + "Invalid subset name:", + subset, + '; expected "training" or "validation".', + ) + split_idx = int(len(x) * image_data_generator._validation_split) + + if ( + y is not None + and not ignore_class_split + and not np.array_equal( + np.unique(y[:split_idx]), np.unique(y[split_idx:]) + ) + ): + raise ValueError( + "Training and validation subsets " + "have different number of classes after " + "the split. If your numpy arrays are " + "sorted by the label, you might want " + "to shuffle them." + ) + + if subset == "validation": + x = x[:split_idx] + x_misc = [np.asarray(xx[:split_idx]) for xx in x_misc] + if y is not None: + y = y[:split_idx] + else: + x = x[split_idx:] + x_misc = [np.asarray(xx[split_idx:]) for xx in x_misc] + if y is not None: + y = y[split_idx:] + + self.x = np.asarray(x, dtype=self.dtype) + self.x_misc = x_misc + if self.x.ndim != 4: + raise ValueError( + "Input data in `NumpyArrayIterator` " + "should have rank 4. You passed an array " + "with shape", + self.x.shape, + ) + channels_axis = 3 if data_format == "channels_last" else 1 + if self.x.shape[channels_axis] not in {1, 3, 4}: + warnings.warn( + 'NumpyArrayIterator is set to use the data format convention "' + + data_format + + '" (channels on axis ' + + str(channels_axis) + + "), i.e. expected either 1, 3, or 4 channels on axis " + + str(channels_axis) + + ". However, it was passed an array with shape " + + str(self.x.shape) + + " (" + + str(self.x.shape[channels_axis]) + + " channels)." + ) + if y is not None: + self.y = np.asarray(y) + else: + self.y = None + if sample_weight is not None: + self.sample_weight = np.asarray(sample_weight) + else: + self.sample_weight = None + self.image_data_generator = image_data_generator + self.data_format = data_format + self.save_to_dir = save_to_dir + self.save_prefix = save_prefix + self.save_format = save_format + super().__init__(x.shape[0], batch_size, shuffle, seed) + + def _get_batches_of_transformed_samples(self, index_array): + batch_x = np.zeros( + tuple([len(index_array)] + list(self.x.shape)[1:]), dtype=self.dtype + ) + for i, j in enumerate(index_array): + x = self.x[j] + params = self.image_data_generator.get_random_transform(x.shape) + x = self.image_data_generator.apply_transform( + x.astype(self.dtype), params + ) + x = self.image_data_generator.standardize(x) + batch_x[i] = x + + if self.save_to_dir: + for i, j in enumerate(index_array): + img = image_utils.array_to_img( + batch_x[i], self.data_format, scale=True + ) + fname = "{prefix}_{index}_{hash}.{format}".format( + prefix=self.save_prefix, + index=j, + hash=np.random.randint(1e4), + format=self.save_format, + ) + img.save(os.path.join(self.save_to_dir, fname)) + batch_x_miscs = [xx[index_array] for xx in self.x_misc] + output = (batch_x if not batch_x_miscs else [batch_x] + batch_x_miscs,) + if self.y is None: + return output[0] + output += (self.y[index_array],) + if self.sample_weight is not None: + output += (self.sample_weight[index_array],) + return output - Args: - directory: string, path to the target directory. It should contain one - subdirectory per class. Any PNG, JPG, BMP, PPM or TIF images inside - each of the subdirectories directory tree will be included in the - generator. See [this script]( - https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) - for more details. - target_size: Tuple of integers `(height, width)`, defaults to `(256, - 256)`. The dimensions to which all images found will be resized. - color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". Whether - the images will be converted to have 1, 3, or 4 channels. - classes: Optional list of class subdirectories - (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list - of classes will be automatically inferred from the subdirectory - names/structure under `directory`, where each subdirectory will be - treated as a different class (and the order of the classes, which - will map to the label indices, will be alphanumeric). The - dictionary containing the mapping from class names to class - indices can be obtained via the attribute `class_indices`. - class_mode: One of "categorical", "binary", "sparse", - "input", or None. Default: "categorical". - Determines the type of label arrays that are returned: - - "categorical" will be 2D one-hot encoded labels, - - "binary" will be 1D binary labels, - "sparse" will be 1D integer labels, - - "input" will be images identical - to input images (mainly used to work with autoencoders). - - If None, no labels are returned - (the generator will only yield batches of image data, - which is useful to use with `model.predict_generator()`). - Please note that in case of class_mode None, - the data still needs to reside in a subdirectory - of `directory` for it to work correctly. - batch_size: Size of the batches of data (default: 32). - shuffle: Whether to shuffle the data (default: True) If set to False, - sorts the data in alphanumeric order. - seed: Optional random seed for shuffling and transformations. - save_to_dir: None or str (default: None). This allows you to optionally - specify a directory to which to save the augmented pictures being - generated (useful for visualizing what you are doing). - save_prefix: Str. Prefix to use for filenames of saved pictures (only - relevant if `save_to_dir` is set). - save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif", "tif", - "jpg" - (only relevant if `save_to_dir` is set). Default: "png". - follow_links: Whether to follow symlinks inside - class subdirectories (default: False). - subset: Subset of data (`"training"` or `"validation"`) if - `validation_split` is set in `ImageDataGenerator`. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. Supported - methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version - 1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL - version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also - supported. By default, `"nearest"` is used. - keep_aspect_ratio: Boolean, whether to resize images to a target - size without aspect ratio distortion. The image is cropped in - the center with target aspect ratio before resizing. +def validate_filename(filename, white_list_formats): + """Check if a filename refers to a valid file. + + Args: + filename: String, absolute path to a file + white_list_formats: Set, allowed file extensions Returns: - A `DirectoryIterator` yielding tuples of `(x, y)` - where `x` is a numpy array containing a batch - of images with shape `(batch_size, *target_size, channels)` - and `y` is a numpy array of corresponding labels. + A boolean value indicating if the filename is valid or not """ - return DirectoryIterator( - directory, - self, - target_size=target_size, - color_mode=color_mode, - keep_aspect_ratio=keep_aspect_ratio, - classes=classes, - class_mode=class_mode, - data_format=self.data_format, - batch_size=batch_size, - shuffle=shuffle, - seed=seed, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - follow_links=follow_links, - subset=subset, - interpolation=interpolation, - dtype=self.dtype) - - def flow_from_dataframe(self, - dataframe, - directory=None, - x_col='filename', - y_col='class', - weight_col=None, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - interpolation='nearest', - validate_filenames=True, - **kwargs): - """Takes the dataframe and the path to a directory + generates batches. - - The generated batches contain augmented/normalized data. - - **A simple tutorial can be found **[here]( - http://bit.ly/keras_flow_from_dataframe). + return filename.lower().endswith(white_list_formats) and os.path.isfile( + filename + ) + + +class DataFrameIterator(BatchFromFilesMixin, Iterator): + """Iterator capable of reading images from a directory as a dataframe. Args: dataframe: Pandas dataframe containing the filepaths relative to - `directory` (or absolute paths if `directory` is None) of the - images in a string column. It should include other column/s - depending on the `class_mode`: - - if `class_mode` is `"categorical"` (default value) it must - include the `y_col` column with the class/es of each image. - Values in column can be string/list/tuple if a single class - or list/tuple if multiple classes. - - if `class_mode` is `"binary"` or `"sparse"` it must include - the given `y_col` column with class values as strings. + `directory` (or absolute paths if `directory` is None) of the images + in a string column. It should include other column/s depending on the + `class_mode`: - if `class_mode` is `"categorical"` (default value) it + must include the `y_col` column with the class/es of each image. + Values in column can be string/list/tuple if a single class or + list/tuple if multiple classes. + - if `class_mode` is `"binary"` or `"sparse"` it must include the + given `y_col` column with class values as strings. - if `class_mode` is `"raw"` or `"multi_output"` it should contain - the columns specified in `y_col`. + the columns specified in `y_col`. - if `class_mode` is `"input"` or `None` no extra column is needed. directory: string, path to the directory to read images from. If `None`, data in `x_col` column should be absolute paths. + image_data_generator: Instance of `ImageDataGenerator` to use for random + transformations and normalization. If None, no transformations and + normalizations are made. x_col: string, column in `dataframe` that contains the filenames (or absolute paths if `directory` is `None`). y_col: string or list, column/s in `dataframe` that has the target data. weight_col: string, column in `dataframe` that contains the sample weights. Default: `None`. - target_size: tuple of integers `(height, width)`, default: `(256, 256)`. - The dimensions to which all images found will be resized. - color_mode: one of "grayscale", "rgb", "rgba". Default: "rgb". Whether - the images will be converted to have 1 or 3 color channels. - classes: optional list of classes (e.g. `['dogs', 'cats']`). Default is - None. If not provided, the list of classes will be automatically - inferred from the `y_col`, which will map to the label indices, will - be alphanumeric). The dictionary containing the mapping from class - names to class indices can be obtained via the attribute - `class_indices`. + target_size: tuple of integers, dimensions to resize input images to. + color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read + images. + classes: Optional list of strings, classes to use (e.g. `["dogs", + "cats"]`). If None, all classes in `y_col` will be used. class_mode: one of "binary", "categorical", "input", "multi_output", - "raw", sparse" or None. Default: "categorical". - Mode for yielding the targets: + "raw", "sparse" or None. Default: "categorical". + Mode for yielding the targets: - `"binary"`: 1D numpy array of binary labels, - `"categorical"`: 2D numpy array of one-hot encoded labels. Supports multi-label output. @@ -1557,777 +884,1740 @@ def flow_from_dataframe(self, - `"sparse"`: 1D numpy array of integer labels, - `None`, no targets are returned (the generator will only yield batches of image data, which is useful to use in `model.predict()`). - batch_size: size of the batches of data (default: 32). - shuffle: whether to shuffle the data (default: True) - seed: optional random seed for shuffling and transformations. - save_to_dir: None or str (default: None). This allows you to optionally - specify a directory to which to save the augmented pictures being - generated (useful for visualizing what you are doing). - save_prefix: str. Prefix to use for filenames of saved pictures (only - relevant if `save_to_dir` is set). - save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif", "tif", - "jpg" (only relevant if `save_to_dir` is set). Default: "png". + batch_size: Integer, size of a batch. + shuffle: Boolean, whether to shuffle the data between epochs. + seed: Random seed for data shuffling. + data_format: String, one of `channels_first`, `channels_last`. + save_to_dir: Optional directory where to save the pictures being + yielded, in a viewable format. This is useful for visualizing the + random transformations being applied, for debugging purposes. + save_prefix: String prefix to use for saving sample images (if + `save_to_dir` is set). + save_format: Format to use for saving sample images (if `save_to_dir` is + set). subset: Subset of data (`"training"` or `"validation"`) if - `validation_split` is set in `ImageDataGenerator`. + validation_split is set in ImageDataGenerator. interpolation: Interpolation method used to resample the image if the target size is different from that of the loaded image. Supported - methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version - 1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL - version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also - supported. By default, `"nearest"` is used. + methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 + or newer is installed, "lanczos" is also supported. If PIL version + 3.4.0 or newer is installed, "box" and "hamming" are also supported. + By default, "nearest" is used. + keep_aspect_ratio: Boolean, whether to resize images to a target size + without aspect ratio distortion. The image is cropped in the center + with target aspect ratio before resizing. + dtype: Dtype to use for the generated arrays. validate_filenames: Boolean, whether to validate image filenames in `x_col`. If `True`, invalid images will be ignored. Disabling this - option can lead to speed-up in the execution of this function. - Defaults to `True`. - **kwargs: legacy arguments for raising deprecation warnings. - - Returns: - A `DataFrameIterator` yielding tuples of `(x, y)` - where `x` is a numpy array containing a batch - of images with shape `(batch_size, *target_size, channels)` - and `y` is a numpy array of corresponding labels. + option can lead to speed-up in the instantiation of this class. + Default: `True`. """ - if 'has_ext' in kwargs: - warnings.warn( - 'has_ext is deprecated, filenames in the dataframe have ' - 'to match the exact filenames in disk.', DeprecationWarning) - if 'sort' in kwargs: - warnings.warn( - 'sort is deprecated, batches will be created in the' - 'same order than the filenames provided if shuffle' - 'is set to False.', DeprecationWarning) - if class_mode == 'other': - warnings.warn( - '`class_mode` "other" is deprecated, please use ' - '`class_mode` "raw".', DeprecationWarning) - class_mode = 'raw' - if 'drop_duplicates' in kwargs: - warnings.warn( - 'drop_duplicates is deprecated, you can drop duplicates ' - 'by using the pandas.DataFrame.drop_duplicates method.', - DeprecationWarning) - - return DataFrameIterator( + + allowed_class_modes = { + "binary", + "categorical", + "input", + "multi_output", + "raw", + "sparse", + None, + } + + def __init__( + self, dataframe, + directory=None, + image_data_generator=None, + x_col="filename", + y_col="class", + weight_col=None, + target_size=(256, 256), + color_mode="rgb", + classes=None, + class_mode="categorical", + batch_size=32, + shuffle=True, + seed=None, + data_format="channels_last", + save_to_dir=None, + save_prefix="", + save_format="png", + subset=None, + interpolation="nearest", + keep_aspect_ratio=False, + dtype="float32", + validate_filenames=True, + ): + super().set_processing_attrs( + image_data_generator, + target_size, + color_mode, + data_format, + save_to_dir, + save_prefix, + save_format, + subset, + interpolation, + keep_aspect_ratio, + ) + df = dataframe.copy() + self.directory = directory or "" + self.class_mode = class_mode + self.dtype = dtype + # check that inputs match the required class_mode + self._check_params(df, x_col, y_col, weight_col, classes) + if ( + validate_filenames + ): # check which image files are valid and keep them + df = self._filter_valid_filepaths(df, x_col) + if class_mode not in ["input", "multi_output", "raw", None]: + df, classes = self._filter_classes(df, y_col, classes) + num_classes = len(classes) + # build an index of all the unique classes + self.class_indices = dict(zip(classes, range(len(classes)))) + # retrieve only training or validation set + if self.split: + num_files = len(df) + start = int(self.split[0] * num_files) + stop = int(self.split[1] * num_files) + df = df.iloc[start:stop, :] + # get labels for each observation + if class_mode not in ["input", "multi_output", "raw", None]: + self.classes = self.get_classes(df, y_col) + self.filenames = df[x_col].tolist() + self._sample_weight = df[weight_col].values if weight_col else None + + if class_mode == "multi_output": + self._targets = [np.array(df[col].tolist()) for col in y_col] + if class_mode == "raw": + self._targets = df[y_col].values + self.samples = len(self.filenames) + validated_string = ( + "validated" if validate_filenames else "non-validated" + ) + if class_mode in ["input", "multi_output", "raw", None]: + io_utils.print_msg( + f"Found {self.samples} {validated_string} image filenames." + ) + else: + io_utils.print_msg( + f"Found {self.samples} {validated_string} image filenames " + f"belonging to {num_classes} classes." + ) + self._filepaths = [ + os.path.join(self.directory, fname) for fname in self.filenames + ] + super().__init__(self.samples, batch_size, shuffle, seed) + + def _check_params(self, df, x_col, y_col, weight_col, classes): + # check class mode is one of the currently supported + if self.class_mode not in self.allowed_class_modes: + raise ValueError( + "Invalid class_mode: {}; expected one of: {}".format( + self.class_mode, self.allowed_class_modes + ) + ) + # check that y_col has several column names if class_mode is + # multi_output + if (self.class_mode == "multi_output") and not isinstance(y_col, list): + raise TypeError( + 'If class_mode="{}", y_col must be a list. Received {}.'.format( + self.class_mode, type(y_col).__name__ + ) + ) + # check that filenames/filepaths column values are all strings + if not all(df[x_col].apply(lambda x: isinstance(x, str))): + raise TypeError( + f"All values in column x_col={x_col} must be strings." + ) + # check labels are string if class_mode is binary or sparse + if self.class_mode in {"binary", "sparse"}: + if not all(df[y_col].apply(lambda x: isinstance(x, str))): + raise TypeError( + 'If class_mode="{}", y_col="{}" column ' + "values must be strings.".format(self.class_mode, y_col) + ) + # check that if binary there are only 2 different classes + if self.class_mode == "binary": + if classes: + classes = set(classes) + if len(classes) != 2: + raise ValueError( + 'If class_mode="binary" there must be 2 ' + "classes. {} class/es were given.".format(len(classes)) + ) + elif df[y_col].nunique() != 2: + raise ValueError( + 'If class_mode="binary" there must be 2 classes. ' + "Found {} classes.".format(df[y_col].nunique()) + ) + # check values are string, list or tuple if class_mode is categorical + if self.class_mode == "categorical": + types = (str, list, tuple) + if not all(df[y_col].apply(lambda x: isinstance(x, types))): + raise TypeError( + 'If class_mode="{}", y_col="{}" column ' + "values must be type string, list or tuple.".format( + self.class_mode, y_col + ) + ) + # raise warning if classes are given but will be unused + if classes and self.class_mode in { + "input", + "multi_output", + "raw", + None, + }: + warnings.warn( + '`classes` will be ignored given the class_mode="{}"'.format( + self.class_mode + ) + ) + # check that if weight column that the values are numerical + if weight_col and not issubclass(df[weight_col].dtype.type, np.number): + raise TypeError(f"Column weight_col={weight_col} must be numeric.") + + def get_classes(self, df, y_col): + labels = [] + for label in df[y_col]: + if isinstance(label, (list, tuple)): + labels.append([self.class_indices[lbl] for lbl in label]) + else: + labels.append(self.class_indices[label]) + return labels + + @staticmethod + def _filter_classes(df, y_col, classes): + df = df.copy() + + def remove_classes(labels, classes): + if isinstance(labels, (list, tuple)): + labels = [cls for cls in labels if cls in classes] + return labels or None + elif isinstance(labels, str): + return labels if labels in classes else None + else: + raise TypeError( + "Expect string, list or tuple " + "but found {} in {} column ".format(type(labels), y_col) + ) + + if classes: + # prepare for membership lookup + classes = list(collections.OrderedDict.fromkeys(classes).keys()) + df[y_col] = df[y_col].apply(lambda x: remove_classes(x, classes)) + else: + classes = set() + for v in df[y_col]: + if isinstance(v, (list, tuple)): + classes.update(v) + else: + classes.add(v) + classes = sorted(classes) + return df.dropna(subset=[y_col]), classes + + def _filter_valid_filepaths(self, df, x_col): + """Keep only dataframe rows with valid filenames. + + Args: + df: Pandas dataframe containing filenames in a column + x_col: string, column in `df` that contains the filenames or + filepaths + Returns: + absolute paths to image files + """ + filepaths = df[x_col].map( + lambda fname: os.path.join(self.directory, fname) + ) + mask = filepaths.apply( + validate_filename, args=(self.white_list_formats,) + ) + n_invalid = (~mask).sum() + if n_invalid: + warnings.warn( + 'Found {} invalid image filename(s) in x_col="{}". ' + "These filename(s) will be ignored.".format(n_invalid, x_col) + ) + return df[mask] + + @property + def filepaths(self): + return self._filepaths + + @property + def labels(self): + if self.class_mode in {"multi_output", "raw"}: + return self._targets + else: + return self.classes + + @property + def sample_weight(self): + return self._sample_weight + + +def flip_axis(x, axis): + x = np.asarray(x).swapaxes(axis, 0) + x = x[::-1, ...] + x = x.swapaxes(0, axis) + return x + + +@keras_export("keras.preprocessing.image.ImageDataGenerator") +class ImageDataGenerator: + """Generate batches of tensor image data with real-time data augmentation. + + Deprecated: `tf.keras.preprocessing.image.ImageDataGenerator` is not + recommended for new code. Prefer loading images with + `tf.keras.utils.image_dataset_from_directory` and transforming the output + `tf.data.Dataset` with preprocessing layers. For more information, see the + tutorials for [loading images]( + https://www.tensorflow.org/tutorials/load_data/images) and + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). + + The data will be looped over (in batches). + + Args: + featurewise_center: Boolean. Set input mean to 0 over the dataset, + feature-wise. + samplewise_center: Boolean. Set each sample mean to 0. + featurewise_std_normalization: Boolean. Divide inputs by std of the + dataset, feature-wise. + samplewise_std_normalization: Boolean. Divide each input by its std. + zca_epsilon: epsilon for ZCA whitening. Default is 1e-6. + zca_whitening: Boolean. Apply ZCA whitening. + rotation_range: Int. Degree range for random rotations. + width_shift_range: Float, 1-D array-like or int + - float: fraction of total width, if < 1, or pixels if >= 1. + - 1-D array-like: random elements from the array. + - int: integer number of pixels from interval `(-width_shift_range, + +width_shift_range)` - With `width_shift_range=2` possible values + are integers `[-1, 0, +1]`, same as with `width_shift_range=[-1, + 0, +1]`, while with `width_shift_range=1.0` possible values are + floats in the interval [-1.0, +1.0). + height_shift_range: Float, 1-D array-like or int + - float: fraction of total height, if < 1, or pixels if >= 1. + - 1-D array-like: random elements from the array. + - int: integer number of pixels from interval `(-height_shift_range, + +height_shift_range)` - With `height_shift_range=2` possible + values are integers `[-1, 0, +1]`, same as with + `height_shift_range=[-1, 0, +1]`, while with + `height_shift_range=1.0` possible values are floats in the + interval [-1.0, +1.0). + brightness_range: Tuple or list of two floats. Range for picking a + brightness shift value from. + shear_range: Float. Shear Intensity (Shear angle in counter-clockwise + direction in degrees) + zoom_range: Float or [lower, upper]. Range for random zoom. If a float, + `[lower, upper] = [1-zoom_range, 1+zoom_range]`. + channel_shift_range: Float. Range for random channel shifts. + fill_mode: One of {"constant", "nearest", "reflect" or "wrap"}. Default + is 'nearest'. Points outside the boundaries of the input are filled + according to the given mode: + - 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k) + - 'nearest': aaaaaaaa|abcd|dddddddd + - 'reflect': abcddcba|abcd|dcbaabcd + - 'wrap': abcdabcd|abcd|abcdabcd + cval: Float or Int. Value used for points outside the boundaries when + `fill_mode = "constant"`. + horizontal_flip: Boolean. Randomly flip inputs horizontally. + vertical_flip: Boolean. Randomly flip inputs vertically. + rescale: rescaling factor. If None or 0, no rescaling + is applied, otherwise we multiply the data by the value provided + (after applying all other transformations). Defaults to `None`. + preprocessing_function: function that will be applied on each input. The + function will run after the image is resized and augmented. + The function should take one argument: one image (Numpy tensor with + rank 3), and should output a Numpy tensor with the same shape. + data_format: Image data format, either "channels_first" or + "channels_last". "channels_last" mode means that the images should + have shape `(samples, height, width, channels)`, "channels_first" mode + means that the images should have shape `(samples, channels, height, + width)`. When unspecified, uses `image_data_format` value found in + your Keras config file at `~/.keras/keras.json` (if exists) else + 'channels_last'. Defaults to "channels_last". + validation_split: Float. Fraction of images reserved for validation + (strictly between 0 and 1). + dtype: Dtype to use for the generated arrays. + + Raises: + ValueError: If the value of the argument, `data_format` is other than + `"channels_last"` or `"channels_first"`. + ValueError: If the value of the argument, `validation_split` > 1 + or `validation_split` < 0. + + Examples: + + Example of using `.flow(x, y)`: + + ```python + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + y_train = utils.to_categorical(y_train, num_classes) + y_test = utils.to_categorical(y_test, num_classes) + datagen = ImageDataGenerator( + featurewise_center=True, + featurewise_std_normalization=True, + rotation_range=20, + width_shift_range=0.2, + height_shift_range=0.2, + horizontal_flip=True, + validation_split=0.2) + # compute quantities required for featurewise normalization + # (std, mean, and principal components if ZCA whitening is applied) + datagen.fit(x_train) + # fits the model on batches with real-time data augmentation: + model.fit(datagen.flow(x_train, y_train, batch_size=32, + subset='training'), + validation_data=datagen.flow(x_train, y_train, + batch_size=8, subset='validation'), + steps_per_epoch=len(x_train) / 32, epochs=epochs) + # here's a more "manual" example + for e in range(epochs): + print('Epoch', e) + batches = 0 + for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32): + model.fit(x_batch, y_batch) + batches += 1 + if batches >= len(x_train) / 32: + # we need to break the loop by hand because + # the generator loops indefinitely + break + ``` + + Example of using `.flow_from_directory(directory)`: + + ```python + train_datagen = ImageDataGenerator( + rescale=1./255, + shear_range=0.2, + zoom_range=0.2, + horizontal_flip=True) + test_datagen = ImageDataGenerator(rescale=1./255) + train_generator = train_datagen.flow_from_directory( + 'data/train', + target_size=(150, 150), + batch_size=32, + class_mode='binary') + validation_generator = test_datagen.flow_from_directory( + 'data/validation', + target_size=(150, 150), + batch_size=32, + class_mode='binary') + model.fit( + train_generator, + steps_per_epoch=2000, + epochs=50, + validation_data=validation_generator, + validation_steps=800) + ``` + + Example of transforming images and masks together. + + ```python + # we create two instances with the same arguments + data_gen_args = dict(featurewise_center=True, + featurewise_std_normalization=True, + rotation_range=90, + width_shift_range=0.1, + height_shift_range=0.1, + zoom_range=0.2) + image_datagen = ImageDataGenerator(**data_gen_args) + mask_datagen = ImageDataGenerator(**data_gen_args) + # Provide the same seed and keyword arguments to the fit and flow methods + seed = 1 + image_datagen.fit(images, augment=True, seed=seed) + mask_datagen.fit(masks, augment=True, seed=seed) + image_generator = image_datagen.flow_from_directory( + 'data/images', + class_mode=None, + seed=seed) + mask_generator = mask_datagen.flow_from_directory( + 'data/masks', + class_mode=None, + seed=seed) + # combine generators into one which yields image and masks + train_generator = zip(image_generator, mask_generator) + model.fit( + train_generator, + steps_per_epoch=2000, + epochs=50) + ``` + """ + + def __init__( + self, + featurewise_center=False, + samplewise_center=False, + featurewise_std_normalization=False, + samplewise_std_normalization=False, + zca_whitening=False, + zca_epsilon=1e-6, + rotation_range=0, + width_shift_range=0.0, + height_shift_range=0.0, + brightness_range=None, + shear_range=0.0, + zoom_range=0.0, + channel_shift_range=0.0, + fill_mode="nearest", + cval=0.0, + horizontal_flip=False, + vertical_flip=False, + rescale=None, + preprocessing_function=None, + data_format=None, + validation_split=0.0, + interpolation_order=1, + dtype=None, + ): + if data_format is None: + data_format = backend.image_data_format() + if dtype is None: + dtype = backend.floatx() + + self.featurewise_center = featurewise_center + self.samplewise_center = samplewise_center + self.featurewise_std_normalization = featurewise_std_normalization + self.samplewise_std_normalization = samplewise_std_normalization + self.zca_whitening = zca_whitening + self.zca_epsilon = zca_epsilon + self.rotation_range = rotation_range + self.width_shift_range = width_shift_range + self.height_shift_range = height_shift_range + self.shear_range = shear_range + self.zoom_range = zoom_range + self.channel_shift_range = channel_shift_range + self.fill_mode = fill_mode + self.cval = cval + self.horizontal_flip = horizontal_flip + self.vertical_flip = vertical_flip + self.rescale = rescale + self.preprocessing_function = preprocessing_function + self.dtype = dtype + self.interpolation_order = interpolation_order + + if data_format not in {"channels_last", "channels_first"}: + raise ValueError( + '`data_format` should be `"channels_last"` ' + "(channel after row and column) or " + '`"channels_first"` (channel before row and column). ' + "Received: %s" % data_format + ) + self.data_format = data_format + if data_format == "channels_first": + self.channel_axis = 1 + self.row_axis = 2 + self.col_axis = 3 + if data_format == "channels_last": + self.channel_axis = 3 + self.row_axis = 1 + self.col_axis = 2 + if validation_split and not 0 < validation_split < 1: + raise ValueError( + "`validation_split` must be strictly between 0 and 1. " + " Received: %s" % validation_split + ) + self._validation_split = validation_split + + self.mean = None + self.std = None + self.zca_whitening_matrix = None + + if isinstance(zoom_range, (float, int)): + self.zoom_range = [1 - zoom_range, 1 + zoom_range] + elif len(zoom_range) == 2 and all( + isinstance(val, (float, int)) for val in zoom_range + ): + self.zoom_range = [zoom_range[0], zoom_range[1]] + else: + raise ValueError( + "`zoom_range` should be a float or " + "a tuple or list of two floats. " + "Received: %s" % (zoom_range,) + ) + if zca_whitening: + if not featurewise_center: + self.featurewise_center = True + warnings.warn( + "This ImageDataGenerator specifies " + "`zca_whitening`, which overrides " + "setting of `featurewise_center`." + ) + if featurewise_std_normalization: + self.featurewise_std_normalization = False + warnings.warn( + "This ImageDataGenerator specifies " + "`zca_whitening` " + "which overrides setting of" + "`featurewise_std_normalization`." + ) + if featurewise_std_normalization: + if not featurewise_center: + self.featurewise_center = True + warnings.warn( + "This ImageDataGenerator specifies " + "`featurewise_std_normalization`, " + "which overrides setting of " + "`featurewise_center`." + ) + if samplewise_std_normalization: + if not samplewise_center: + self.samplewise_center = True + warnings.warn( + "This ImageDataGenerator specifies " + "`samplewise_std_normalization`, " + "which overrides setting of " + "`samplewise_center`." + ) + if brightness_range is not None: + if ( + not isinstance(brightness_range, (tuple, list)) + or len(brightness_range) != 2 + ): + raise ValueError( + "`brightness_range should be tuple or list of two floats. " + "Received: %s" % (brightness_range,) + ) + self.brightness_range = brightness_range + + def flow( + self, + x, + y=None, + batch_size=32, + shuffle=True, + sample_weight=None, + seed=None, + save_to_dir=None, + save_prefix="", + save_format="png", + ignore_class_split=False, + subset=None, + ): + """Takes data & label arrays, generates batches of augmented data. + + Args: + x: Input data. Numpy array of rank 4 or a tuple. If tuple, the first + element should contain the images and the second element another + numpy array or a list of numpy arrays that gets passed to the + output without any modifications. Can be used to feed the model + miscellaneous data along with the images. In case of grayscale + data, the channels axis of the image array should have value 1, in + case of RGB data, it should have value 3, and in case of RGBA + data, it should have value 4. + y: Labels. + batch_size: Int (default: 32). + shuffle: Boolean (default: True). + sample_weight: Sample weights. + seed: Int (default: None). + save_to_dir: None or str (default: None). This allows you to + optionally specify a directory to which to save the augmented + pictures being generated (useful for visualizing what you are + doing). + save_prefix: Str (default: `''`). Prefix to use for filenames of + saved pictures (only relevant if `save_to_dir` is set). + save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif", + "tif", "jpg" (only relevant if `save_to_dir` is set). Default: + "png". + ignore_class_split: Boolean (default: False), ignore difference + in number of classes in labels across train and validation + split (useful for non-classification tasks) + subset: Subset of data (`"training"` or `"validation"`) if + `validation_split` is set in `ImageDataGenerator`. + + Returns: + An `Iterator` yielding tuples of `(x, y)` + where `x` is a numpy array of image data + (in the case of a single image input) or a list + of numpy arrays (in the case with + additional inputs) and `y` is a numpy array + of corresponding labels. If 'sample_weight' is not None, + the yielded tuples are of the form `(x, y, sample_weight)`. + If `y` is None, only the numpy array `x` is returned. + Raises: + ValueError: If the Value of the argument, `subset` is other than + "training" or "validation". + + """ + return NumpyArrayIterator( + x, + y, + self, + batch_size=batch_size, + shuffle=shuffle, + sample_weight=sample_weight, + seed=seed, + data_format=self.data_format, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + ignore_class_split=ignore_class_split, + subset=subset, + dtype=self.dtype, + ) + + def flow_from_directory( + self, directory, + target_size=(256, 256), + color_mode="rgb", + classes=None, + class_mode="categorical", + batch_size=32, + shuffle=True, + seed=None, + save_to_dir=None, + save_prefix="", + save_format="png", + follow_links=False, + subset=None, + interpolation="nearest", + keep_aspect_ratio=False, + ): + """Takes the path to a directory & generates batches of augmented data. + + Args: + directory: string, path to the target directory. It should contain + one subdirectory per class. Any PNG, JPG, BMP, PPM or TIF images + inside each of the subdirectories directory tree will be included + in the generator. See [this script]( + https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) + for more details. + target_size: Tuple of integers `(height, width)`. The dimensions to + which all images found will be resized. Defaults to `(256,256)`. + color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". + Whether the images will be converted to have 1, 3, or 4 channels. + classes: Optional list of class subdirectories (e.g. `['dogs', + 'cats']`). Default: None. If not provided, the list of classes + will be automatically inferred from the subdirectory + names/structure under `directory`, where each subdirectory will be + treated as a different class (and the order of the classes, which + will map to the label indices, will be alphanumeric). The + dictionary containing the mapping from class names to class + indices can be obtained via the attribute `class_indices`. + class_mode: One of "categorical", "binary", "sparse", + "input", or None. + Determines the type of label arrays that are returned: + - "categorical" will be 2D one-hot encoded labels, + - "binary" will be 1D binary labels, + - "sparse" will be 1D integer labels, + - "input" will be images identical + to input images (mainly used to work with autoencoders). + - If None, no labels are returned + (the generator will only yield batches of image data, + which is useful to use with `model.predict_generator()`). + Please note that in case of class_mode None, + the data still needs to reside in a subdirectory + of `directory` for it to work correctly. + Defaults to "categorical". + batch_size: Size of the batches of data. Defaults to `32`. + shuffle: Whether to shuffle the data If `False`, sorts the + data in alphanumeric order. Defaults to `True`. + seed: Optional random seed for shuffling and transformations. + save_to_dir: None or str (default: None). This allows you to + optionally specify a directory to which to save the augmented + pictures being generated (useful for visualizing what you are + doing). + save_prefix: Str. Prefix to use for filenames of saved pictures + (only relevant if `save_to_dir` is set). + save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif", + "tif", "jpg" (only relevant if `save_to_dir` is set). + Defaults to "png". + follow_links: Whether to follow symlinks inside + class subdirectories. Defaults to `False`. + subset: Subset of data (`"training"` or `"validation"`) if + `validation_split` is set in `ImageDataGenerator`. + interpolation: Interpolation method used to resample the image if + the target size is different from that of the loaded image. + Supported methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. + If PIL version 1.1.3 or newer is installed, `"lanczos"` is also + supported. If PIL version 3.4.0 or newer is installed, `"box"` and + `"hamming"` are also supported. Defaults to `"nearest"`. + keep_aspect_ratio: Boolean, whether to resize images to a target + size without aspect ratio distortion. The image is cropped in + the center with target aspect ratio before resizing. + + Returns: + A `DirectoryIterator` yielding tuples of `(x, y)` + where `x` is a numpy array containing a batch + of images with shape `(batch_size, *target_size, channels)` + and `y` is a numpy array of corresponding labels. + """ + return DirectoryIterator( + directory, + self, + target_size=target_size, + color_mode=color_mode, + keep_aspect_ratio=keep_aspect_ratio, + classes=classes, + class_mode=class_mode, + data_format=self.data_format, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + follow_links=follow_links, + subset=subset, + interpolation=interpolation, + dtype=self.dtype, + ) + + def flow_from_dataframe( self, - x_col=x_col, - y_col=y_col, - weight_col=weight_col, - target_size=target_size, - color_mode=color_mode, - classes=classes, - class_mode=class_mode, - data_format=self.data_format, - batch_size=batch_size, - shuffle=shuffle, - seed=seed, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - subset=subset, - interpolation=interpolation, - validate_filenames=validate_filenames, - dtype=self.dtype) - - def standardize(self, x): - """Applies the normalization configuration in-place to a batch of inputs. - - `x` is changed in-place since the function is mainly used internally - to standardize images and feed them to your network. If a copy of `x` - would be created instead it would have a significant performance cost. - If you want to apply this method without changing the input in-place - you can call the method creating a copy before: - - standardize(np.copy(x)) + dataframe, + directory=None, + x_col="filename", + y_col="class", + weight_col=None, + target_size=(256, 256), + color_mode="rgb", + classes=None, + class_mode="categorical", + batch_size=32, + shuffle=True, + seed=None, + save_to_dir=None, + save_prefix="", + save_format="png", + subset=None, + interpolation="nearest", + validate_filenames=True, + **kwargs, + ): + """Takes the dataframe and the path to a directory + generates batches. + + The generated batches contain augmented/normalized data. + + **A simple tutorial can be found **[here]( + http://bit.ly/keras_flow_from_dataframe). + + Args: + dataframe: Pandas dataframe containing the filepaths relative to + `directory` (or absolute paths if `directory` is None) of the + images in a string column. It should include other column/s + depending on the `class_mode`: + - if `class_mode` is `"categorical"` (default value) it must + include the `y_col` column with the class/es of each image. + Values in column can be string/list/tuple if a single class + or list/tuple if multiple classes. + - if `class_mode` is `"binary"` or `"sparse"` it must include + the given `y_col` column with class values as strings. + - if `class_mode` is `"raw"` or `"multi_output"` it should + contain the columns specified in `y_col`. + - if `class_mode` is `"input"` or `None` no extra column is + needed. + directory: string, path to the directory to read images from. If + `None`, data in `x_col` column should be absolute paths. + x_col: string, column in `dataframe` that contains the filenames (or + absolute paths if `directory` is `None`). + y_col: string or list, column/s in `dataframe` that has the target + data. + weight_col: string, column in `dataframe` that contains the sample + weights. Default: `None`. + target_size: tuple of integers `(height, width)`, default: `(256, + 256)`. The dimensions to which all images found will be resized. + color_mode: one of "grayscale", "rgb", "rgba". Default: "rgb". + Whether the images will be converted to have 1 or 3 color + channels. + classes: optional list of classes (e.g. `['dogs', 'cats']`). Default + is None. If not provided, the list of classes will be + automatically inferred from the `y_col`, which will map to the + label indices, will be alphanumeric). The dictionary containing + the mapping from class names to class indices can be obtained via + the attribute `class_indices`. + class_mode: one of "binary", "categorical", "input", "multi_output", + "raw", sparse" or None. Default: "categorical". + Mode for yielding the targets: + - `"binary"`: 1D numpy array of binary labels, + - `"categorical"`: 2D numpy array of one-hot encoded labels. + Supports multi-label output. + - `"input"`: images identical to input images (mainly used to + work with autoencoders), + - `"multi_output"`: list with the values of the different + columns, + - `"raw"`: numpy array of values in `y_col` column(s), + - `"sparse"`: 1D numpy array of integer labels, + - `None`, no targets are returned (the generator will only yield + batches of image data, which is useful to use in + `model.predict()`). + batch_size: size of the batches of data (default: 32). + shuffle: whether to shuffle the data (default: True) + seed: optional random seed for shuffling and transformations. + save_to_dir: None or str (default: None). This allows you to + optionally specify a directory to which to save the augmented + pictures being generated (useful for visualizing what you are + doing). + save_prefix: str. Prefix to use for filenames of saved pictures + (only relevant if `save_to_dir` is set). + save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif", + "tif", "jpg" (only relevant if `save_to_dir` is set). Default: + "png". + subset: Subset of data (`"training"` or `"validation"`) if + `validation_split` is set in `ImageDataGenerator`. + interpolation: Interpolation method used to resample the image if + the target size is different from that of the loaded image. + Supported methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. + If PIL version 1.1.3 or newer is installed, `"lanczos"` is also + supported. If PIL version 3.4.0 or newer is installed, `"box"` and + `"hamming"` are also supported. By default, `"nearest"` is used. + validate_filenames: Boolean, whether to validate image filenames in + `x_col`. If `True`, invalid images will be ignored. Disabling this + option can lead to speed-up in the execution of this function. + Defaults to `True`. + **kwargs: legacy arguments for raising deprecation warnings. + + Returns: + A `DataFrameIterator` yielding tuples of `(x, y)` + where `x` is a numpy array containing a batch + of images with shape `(batch_size, *target_size, channels)` + and `y` is a numpy array of corresponding labels. + """ + if "has_ext" in kwargs: + warnings.warn( + "has_ext is deprecated, filenames in the dataframe have " + "to match the exact filenames in disk.", + DeprecationWarning, + ) + if "sort" in kwargs: + warnings.warn( + "sort is deprecated, batches will be created in the" + "same order than the filenames provided if shuffle" + "is set to False.", + DeprecationWarning, + ) + if class_mode == "other": + warnings.warn( + '`class_mode` "other" is deprecated, please use ' + '`class_mode` "raw".', + DeprecationWarning, + ) + class_mode = "raw" + if "drop_duplicates" in kwargs: + warnings.warn( + "drop_duplicates is deprecated, you can drop duplicates " + "by using the pandas.DataFrame.drop_duplicates method.", + DeprecationWarning, + ) + + return DataFrameIterator( + dataframe, + directory, + self, + x_col=x_col, + y_col=y_col, + weight_col=weight_col, + target_size=target_size, + color_mode=color_mode, + classes=classes, + class_mode=class_mode, + data_format=self.data_format, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + subset=subset, + interpolation=interpolation, + validate_filenames=validate_filenames, + dtype=self.dtype, + ) + + def standardize(self, x): + """Applies the normalization configuration in-place to a batch of + inputs. + + `x` is changed in-place since the function is mainly used internally + to standardize images and feed them to your network. If a copy of `x` + would be created instead it would have a significant performance cost. + If you want to apply this method without changing the input in-place + you can call the method creating a copy before: + + standardize(np.copy(x)) + + Args: + x: Batch of inputs to be normalized. + + Returns: + The inputs, normalized. + """ + if self.preprocessing_function: + x = self.preprocessing_function(x) + if self.rescale: + x *= self.rescale + if self.samplewise_center: + x -= np.mean(x, keepdims=True) + if self.samplewise_std_normalization: + x /= np.std(x, keepdims=True) + 1e-6 + + if self.featurewise_center: + if self.mean is not None: + x -= self.mean + else: + warnings.warn( + "This ImageDataGenerator specifies " + "`featurewise_center`, but it hasn't " + "been fit on any training data. Fit it " + "first by calling `.fit(numpy_data)`." + ) + if self.featurewise_std_normalization: + if self.std is not None: + x /= self.std + 1e-6 + else: + warnings.warn( + "This ImageDataGenerator specifies " + "`featurewise_std_normalization`, " + "but it hasn't " + "been fit on any training data. Fit it " + "first by calling `.fit(numpy_data)`." + ) + if self.zca_whitening: + if self.zca_whitening_matrix is not None: + flat_x = x.reshape(-1, np.prod(x.shape[-3:])) + white_x = flat_x @ self.zca_whitening_matrix + x = np.reshape(white_x, x.shape) + else: + warnings.warn( + "This ImageDataGenerator specifies " + "`zca_whitening`, but it hasn't " + "been fit on any training data. Fit it " + "first by calling `.fit(numpy_data)`." + ) + return x + + def get_random_transform(self, img_shape, seed=None): + """Generates random parameters for a transformation. + + Args: + img_shape: Tuple of integers. + Shape of the image that is transformed. + seed: Random seed. + + Returns: + A dictionary containing randomly chosen parameters describing the + transformation. + """ + img_row_axis = self.row_axis - 1 + img_col_axis = self.col_axis - 1 + + if seed is not None: + np.random.seed(seed) + + if self.rotation_range: + theta = np.random.uniform(-self.rotation_range, self.rotation_range) + else: + theta = 0 + + if self.height_shift_range: + try: # 1-D array-like or int + tx = np.random.choice(self.height_shift_range) + tx *= np.random.choice([-1, 1]) + except ValueError: # floating point + tx = np.random.uniform( + -self.height_shift_range, self.height_shift_range + ) + if np.max(self.height_shift_range) < 1: + tx *= img_shape[img_row_axis] + else: + tx = 0 + + if self.width_shift_range: + try: # 1-D array-like or int + ty = np.random.choice(self.width_shift_range) + ty *= np.random.choice([-1, 1]) + except ValueError: # floating point + ty = np.random.uniform( + -self.width_shift_range, self.width_shift_range + ) + if np.max(self.width_shift_range) < 1: + ty *= img_shape[img_col_axis] + else: + ty = 0 + + if self.shear_range: + shear = np.random.uniform(-self.shear_range, self.shear_range) + else: + shear = 0 + + if self.zoom_range[0] == 1 and self.zoom_range[1] == 1: + zx, zy = 1, 1 + else: + zx, zy = np.random.uniform( + self.zoom_range[0], self.zoom_range[1], 2 + ) + + flip_horizontal = (np.random.random() < 0.5) * self.horizontal_flip + flip_vertical = (np.random.random() < 0.5) * self.vertical_flip + + channel_shift_intensity = None + if self.channel_shift_range != 0: + channel_shift_intensity = np.random.uniform( + -self.channel_shift_range, self.channel_shift_range + ) + + brightness = None + if self.brightness_range is not None: + brightness = np.random.uniform( + self.brightness_range[0], self.brightness_range[1] + ) + + transform_parameters = { + "theta": theta, + "tx": tx, + "ty": ty, + "shear": shear, + "zx": zx, + "zy": zy, + "flip_horizontal": flip_horizontal, + "flip_vertical": flip_vertical, + "channel_shift_intensity": channel_shift_intensity, + "brightness": brightness, + } + + return transform_parameters + + def apply_transform(self, x, transform_parameters): + """Applies a transformation to an image according to given parameters. + + Args: + x: 3D tensor, single image. + transform_parameters: Dictionary with string - parameter pairs + describing the transformation. + Currently, the following parameters + from the dictionary are used: + - `'theta'`: Float. Rotation angle in degrees. + - `'tx'`: Float. Shift in the x direction. + - `'ty'`: Float. Shift in the y direction. + - `'shear'`: Float. Shear angle in degrees. + - `'zx'`: Float. Zoom in the x direction. + - `'zy'`: Float. Zoom in the y direction. + - `'flip_horizontal'`: Boolean. Horizontal flip. + - `'flip_vertical'`: Boolean. Vertical flip. + - `'channel_shift_intensity'`: Float. Channel shift intensity. + - `'brightness'`: Float. Brightness shift intensity. + + Returns: + A transformed version of the input (same shape). + """ + # x is a single image, so it doesn't have image number at index 0 + img_row_axis = self.row_axis - 1 + img_col_axis = self.col_axis - 1 + img_channel_axis = self.channel_axis - 1 + + x = apply_affine_transform( + x, + transform_parameters.get("theta", 0), + transform_parameters.get("tx", 0), + transform_parameters.get("ty", 0), + transform_parameters.get("shear", 0), + transform_parameters.get("zx", 1), + transform_parameters.get("zy", 1), + row_axis=img_row_axis, + col_axis=img_col_axis, + channel_axis=img_channel_axis, + fill_mode=self.fill_mode, + cval=self.cval, + order=self.interpolation_order, + ) + + if transform_parameters.get("channel_shift_intensity") is not None: + x = apply_channel_shift( + x, + transform_parameters["channel_shift_intensity"], + img_channel_axis, + ) + + if transform_parameters.get("flip_horizontal", False): + x = flip_axis(x, img_col_axis) + + if transform_parameters.get("flip_vertical", False): + x = flip_axis(x, img_row_axis) + + if transform_parameters.get("brightness") is not None: + x = apply_brightness_shift( + x, transform_parameters["brightness"], False + ) + + return x + + def random_transform(self, x, seed=None): + """Applies a random transformation to an image. + + Args: + x: 3D tensor, single image. + seed: Random seed. + + Returns: + A randomly transformed version of the input (same shape). + """ + params = self.get_random_transform(x.shape, seed) + return self.apply_transform(x, params) + + def fit(self, x, augment=False, rounds=1, seed=None): + """Fits the data generator to some sample data. + + This computes the internal data stats related to the + data-dependent transformations, based on an array of sample data. + + Only required if `featurewise_center` or + `featurewise_std_normalization` or `zca_whitening` are set to True. + + When `rescale` is set to a value, rescaling is applied to + sample data before computing the internal data stats. + + Args: + x: Sample data. Should have rank 4. + In case of grayscale data, + the channels axis should have value 1, in case + of RGB data, it should have value 3, and in case + of RGBA data, it should have value 4. + augment: Boolean (default: False). + Whether to fit on randomly augmented samples. + rounds: Int (default: 1). + If using data augmentation (`augment=True`), + this is how many augmentation passes over the data to use. + seed: Int (default: None). Random seed. + """ + x = np.asarray(x, dtype=self.dtype) + if x.ndim != 4: + raise ValueError( + "Input to `.fit()` should have rank 4. Got array with shape: " + + str(x.shape) + ) + if x.shape[self.channel_axis] not in {1, 3, 4}: + warnings.warn( + "Expected input to be images (as Numpy array) " + 'following the data format convention "' + + self.data_format + + '" (channels on axis ' + + str(self.channel_axis) + + "), i.e. expected either 1, 3 or 4 channels on axis " + + str(self.channel_axis) + + ". However, it was passed an array with shape " + + str(x.shape) + + " (" + + str(x.shape[self.channel_axis]) + + " channels)." + ) + + if seed is not None: + np.random.seed(seed) + + x = np.copy(x) + if self.rescale: + x *= self.rescale + + if augment: + ax = np.zeros( + tuple([rounds * x.shape[0]] + list(x.shape)[1:]), + dtype=self.dtype, + ) + for r in range(rounds): + for i in range(x.shape[0]): + ax[i + r * x.shape[0]] = self.random_transform(x[i]) + x = ax + + if self.featurewise_center: + self.mean = np.mean(x, axis=(0, self.row_axis, self.col_axis)) + broadcast_shape = [1, 1, 1] + broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis] + self.mean = np.reshape(self.mean, broadcast_shape) + x -= self.mean + + if self.featurewise_std_normalization: + self.std = np.std(x, axis=(0, self.row_axis, self.col_axis)) + broadcast_shape = [1, 1, 1] + broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis] + self.std = np.reshape(self.std, broadcast_shape) + x /= self.std + 1e-6 + + if self.zca_whitening: + n = len(x) + flat_x = np.reshape(x, (n, -1)) + + u, s, _ = np.linalg.svd(flat_x.T, full_matrices=False) + s_inv = np.sqrt(n) / (s + self.zca_epsilon) + self.zca_whitening_matrix = (u * s_inv).dot(u.T) + + +@keras_export("keras.preprocessing.image.random_rotation") +def random_rotation( + x, + rg, + row_axis=1, + col_axis=2, + channel_axis=0, + fill_mode="nearest", + cval=0.0, + interpolation_order=1, +): + """Performs a random rotation of a Numpy image tensor. + + Deprecated: `tf.keras.preprocessing.image.random_rotation` does not operate + on tensors and is not recommended for new code. Prefer + `tf.keras.layers.RandomRotation` which provides equivalent functionality as + a preprocessing layer. For more information, see the tutorial for + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). Args: - x: Batch of inputs to be normalized. + x: Input tensor. Must be 3D. + rg: Rotation range, in degrees. + row_axis: Index of axis for rows in the input tensor. + col_axis: Index of axis for columns in the input tensor. + channel_axis: Index of axis for channels in the input tensor. + fill_mode: Points outside the boundaries of the input + are filled according to the given mode + (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). + cval: Value used for points outside the boundaries + of the input if `mode='constant'`. + interpolation_order: int, order of spline interpolation. + see `ndimage.interpolation.affine_transform` Returns: - The inputs, normalized. + Rotated Numpy image tensor. """ - if self.preprocessing_function: - x = self.preprocessing_function(x) - if self.rescale: - x *= self.rescale - if self.samplewise_center: - x -= np.mean(x, keepdims=True) - if self.samplewise_std_normalization: - x /= (np.std(x, keepdims=True) + 1e-6) - - if self.featurewise_center: - if self.mean is not None: - x -= self.mean - else: - warnings.warn('This ImageDataGenerator specifies ' - '`featurewise_center`, but it hasn\'t ' - 'been fit on any training data. Fit it ' - 'first by calling `.fit(numpy_data)`.') - if self.featurewise_std_normalization: - if self.std is not None: - x /= (self.std + 1e-6) - else: - warnings.warn('This ImageDataGenerator specifies ' - '`featurewise_std_normalization`, ' - 'but it hasn\'t ' - 'been fit on any training data. Fit it ' - 'first by calling `.fit(numpy_data)`.') - if self.zca_whitening: - if self.zca_whitening_matrix is not None: - flat_x = x.reshape(-1, np.prod(x.shape[-3:])) - white_x = flat_x @ self.zca_whitening_matrix - x = np.reshape(white_x, x.shape) - else: - warnings.warn('This ImageDataGenerator specifies ' - '`zca_whitening`, but it hasn\'t ' - 'been fit on any training data. Fit it ' - 'first by calling `.fit(numpy_data)`.') + theta = np.random.uniform(-rg, rg) + x = apply_affine_transform( + x, + theta=theta, + row_axis=row_axis, + col_axis=col_axis, + channel_axis=channel_axis, + fill_mode=fill_mode, + cval=cval, + order=interpolation_order, + ) return x - def get_random_transform(self, img_shape, seed=None): - """Generates random parameters for a transformation. + +@keras_export("keras.preprocessing.image.random_shift") +def random_shift( + x, + wrg, + hrg, + row_axis=1, + col_axis=2, + channel_axis=0, + fill_mode="nearest", + cval=0.0, + interpolation_order=1, +): + """Performs a random spatial shift of a Numpy image tensor. + + Deprecated: `tf.keras.preprocessing.image.random_shift` does not operate on + tensors and is not recommended for new code. Prefer + `tf.keras.layers.RandomTranslation` which provides equivalent functionality + as a preprocessing layer. For more information, see the tutorial for + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). Args: - img_shape: Tuple of integers. - Shape of the image that is transformed. - seed: Random seed. + x: Input tensor. Must be 3D. + wrg: Width shift range, as a float fraction of the width. + hrg: Height shift range, as a float fraction of the height. + row_axis: Index of axis for rows in the input tensor. + col_axis: Index of axis for columns in the input tensor. + channel_axis: Index of axis for channels in the input tensor. + fill_mode: Points outside the boundaries of the input + are filled according to the given mode + (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). + cval: Value used for points outside the boundaries + of the input if `mode='constant'`. + interpolation_order: int, order of spline interpolation. + see `ndimage.interpolation.affine_transform` Returns: - A dictionary containing randomly chosen parameters describing the - transformation. + Shifted Numpy image tensor. """ - img_row_axis = self.row_axis - 1 - img_col_axis = self.col_axis - 1 + h, w = x.shape[row_axis], x.shape[col_axis] + tx = np.random.uniform(-hrg, hrg) * h + ty = np.random.uniform(-wrg, wrg) * w + x = apply_affine_transform( + x, + tx=tx, + ty=ty, + row_axis=row_axis, + col_axis=col_axis, + channel_axis=channel_axis, + fill_mode=fill_mode, + cval=cval, + order=interpolation_order, + ) + return x - if seed is not None: - np.random.seed(seed) - if self.rotation_range: - theta = np.random.uniform(-self.rotation_range, self.rotation_range) - else: - theta = 0 - - if self.height_shift_range: - try: # 1-D array-like or int - tx = np.random.choice(self.height_shift_range) - tx *= np.random.choice([-1, 1]) - except ValueError: # floating point - tx = np.random.uniform(-self.height_shift_range, - self.height_shift_range) - if np.max(self.height_shift_range) < 1: - tx *= img_shape[img_row_axis] - else: - tx = 0 - - if self.width_shift_range: - try: # 1-D array-like or int - ty = np.random.choice(self.width_shift_range) - ty *= np.random.choice([-1, 1]) - except ValueError: # floating point - ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) - if np.max(self.width_shift_range) < 1: - ty *= img_shape[img_col_axis] - else: - ty = 0 +@keras_export("keras.preprocessing.image.random_shear") +def random_shear( + x, + intensity, + row_axis=1, + col_axis=2, + channel_axis=0, + fill_mode="nearest", + cval=0.0, + interpolation_order=1, +): + """Performs a random spatial shear of a Numpy image tensor. - if self.shear_range: - shear = np.random.uniform(-self.shear_range, self.shear_range) - else: - shear = 0 + Args: + x: Input tensor. Must be 3D. + intensity: Transformation intensity in degrees. + row_axis: Index of axis for rows in the input tensor. + col_axis: Index of axis for columns in the input tensor. + channel_axis: Index of axis for channels in the input tensor. + fill_mode: Points outside the boundaries of the input + are filled according to the given mode + (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). + cval: Value used for points outside the boundaries + of the input if `mode='constant'`. + interpolation_order: int, order of spline interpolation. + see `ndimage.interpolation.affine_transform` - if self.zoom_range[0] == 1 and self.zoom_range[1] == 1: - zx, zy = 1, 1 - else: - zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2) - - flip_horizontal = (np.random.random() < 0.5) * self.horizontal_flip - flip_vertical = (np.random.random() < 0.5) * self.vertical_flip - - channel_shift_intensity = None - if self.channel_shift_range != 0: - channel_shift_intensity = np.random.uniform(-self.channel_shift_range, - self.channel_shift_range) - - brightness = None - if self.brightness_range is not None: - brightness = np.random.uniform(self.brightness_range[0], - self.brightness_range[1]) - - transform_parameters = { - 'theta': theta, - 'tx': tx, - 'ty': ty, - 'shear': shear, - 'zx': zx, - 'zy': zy, - 'flip_horizontal': flip_horizontal, - 'flip_vertical': flip_vertical, - 'channel_shift_intensity': channel_shift_intensity, - 'brightness': brightness - } + Returns: + Sheared Numpy image tensor. + """ + shear = np.random.uniform(-intensity, intensity) + x = apply_affine_transform( + x, + shear=shear, + row_axis=row_axis, + col_axis=col_axis, + channel_axis=channel_axis, + fill_mode=fill_mode, + cval=cval, + order=interpolation_order, + ) + return x - return transform_parameters - def apply_transform(self, x, transform_parameters): - """Applies a transformation to an image according to given parameters. +@keras_export("keras.preprocessing.image.random_zoom") +def random_zoom( + x, + zoom_range, + row_axis=1, + col_axis=2, + channel_axis=0, + fill_mode="nearest", + cval=0.0, + interpolation_order=1, +): + """Performs a random spatial zoom of a Numpy image tensor. + + Deprecated: `tf.keras.preprocessing.image.random_zoom` does not operate on + tensors and is not recommended for new code. Prefer + `tf.keras.layers.RandomZoom` which provides equivalent functionality as + a preprocessing layer. For more information, see the tutorial for + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). Args: - x: 3D tensor, single image. - transform_parameters: Dictionary with string - parameter pairs - describing the transformation. - Currently, the following parameters - from the dictionary are used: - - `'theta'`: Float. Rotation angle in degrees. - - `'tx'`: Float. Shift in the x direction. - - `'ty'`: Float. Shift in the y direction. - - `'shear'`: Float. Shear angle in degrees. - - `'zx'`: Float. Zoom in the x direction. - - `'zy'`: Float. Zoom in the y direction. - - `'flip_horizontal'`: Boolean. Horizontal flip. - - `'flip_vertical'`: Boolean. Vertical flip. - - `'channel_shift_intensity'`: Float. Channel shift intensity. - - `'brightness'`: Float. Brightness shift intensity. + x: Input tensor. Must be 3D. + zoom_range: Tuple of floats; zoom range for width and height. + row_axis: Index of axis for rows in the input tensor. + col_axis: Index of axis for columns in the input tensor. + channel_axis: Index of axis for channels in the input tensor. + fill_mode: Points outside the boundaries of the input + are filled according to the given mode + (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). + cval: Value used for points outside the boundaries + of the input if `mode='constant'`. + interpolation_order: int, order of spline interpolation. + see `ndimage.interpolation.affine_transform` Returns: - A transformed version of the input (same shape). + Zoomed Numpy image tensor. + + Raises: + ValueError: if `zoom_range` isn't a tuple. """ - # x is a single image, so it doesn't have image number at index 0 - img_row_axis = self.row_axis - 1 - img_col_axis = self.col_axis - 1 - img_channel_axis = self.channel_axis - 1 + if len(zoom_range) != 2: + raise ValueError( + "`zoom_range` should be a tuple or list of two floats. Received: %s" + % (zoom_range,) + ) + if zoom_range[0] == 1 and zoom_range[1] == 1: + zx, zy = 1, 1 + else: + zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) x = apply_affine_transform( x, - transform_parameters.get('theta', 0), - transform_parameters.get('tx', 0), - transform_parameters.get('ty', 0), - transform_parameters.get('shear', 0), - transform_parameters.get('zx', 1), - transform_parameters.get('zy', 1), - row_axis=img_row_axis, - col_axis=img_col_axis, - channel_axis=img_channel_axis, - fill_mode=self.fill_mode, - cval=self.cval, - order=self.interpolation_order) - - if transform_parameters.get('channel_shift_intensity') is not None: - x = apply_channel_shift(x, - transform_parameters['channel_shift_intensity'], - img_channel_axis) - - if transform_parameters.get('flip_horizontal', False): - x = flip_axis(x, img_col_axis) - - if transform_parameters.get('flip_vertical', False): - x = flip_axis(x, img_row_axis) - - if transform_parameters.get('brightness') is not None: - x = apply_brightness_shift(x, transform_parameters['brightness'], False) - + zx=zx, + zy=zy, + row_axis=row_axis, + col_axis=col_axis, + channel_axis=channel_axis, + fill_mode=fill_mode, + cval=cval, + order=interpolation_order, + ) return x - def random_transform(self, x, seed=None): - """Applies a random transformation to an image. + +@keras_export("keras.preprocessing.image.apply_channel_shift") +def apply_channel_shift(x, intensity, channel_axis=0): + """Performs a channel shift. Args: - x: 3D tensor, single image. - seed: Random seed. + x: Input tensor. Must be 3D. + intensity: Transformation intensity. + channel_axis: Index of axis for channels in the input tensor. Returns: - A randomly transformed version of the input (same shape). + Numpy image tensor. """ - params = self.get_random_transform(x.shape, seed) - return self.apply_transform(x, params) + x = np.rollaxis(x, channel_axis, 0) + min_x, max_x = np.min(x), np.max(x) + channel_images = [ + np.clip(x_channel + intensity, min_x, max_x) for x_channel in x + ] + x = np.stack(channel_images, axis=0) + x = np.rollaxis(x, 0, channel_axis + 1) + return x + + +@keras_export("keras.preprocessing.image.random_channel_shift") +def random_channel_shift(x, intensity_range, channel_axis=0): + """Performs a random channel shift. - def fit(self, x, augment=False, rounds=1, seed=None): - """Fits the data generator to some sample data. + Args: + x: Input tensor. Must be 3D. + intensity_range: Transformation intensity. + channel_axis: Index of axis for channels in the input tensor. - This computes the internal data stats related to the - data-dependent transformations, based on an array of sample data. + Returns: + Numpy image tensor. + """ + intensity = np.random.uniform(-intensity_range, intensity_range) + return apply_channel_shift(x, intensity, channel_axis=channel_axis) - Only required if `featurewise_center` or - `featurewise_std_normalization` or `zca_whitening` are set to True. - When `rescale` is set to a value, rescaling is applied to - sample data before computing the internal data stats. +@keras_export("keras.preprocessing.image.apply_brightness_shift") +def apply_brightness_shift(x, brightness, scale=True): + """Performs a brightness shift. Args: - x: Sample data. Should have rank 4. - In case of grayscale data, - the channels axis should have value 1, in case - of RGB data, it should have value 3, and in case - of RGBA data, it should have value 4. - augment: Boolean (default: False). - Whether to fit on randomly augmented samples. - rounds: Int (default: 1). - If using data augmentation (`augment=True`), - this is how many augmentation passes over the data to use. - seed: Int (default: None). Random seed. - """ - x = np.asarray(x, dtype=self.dtype) - if x.ndim != 4: - raise ValueError('Input to `.fit()` should have rank 4. ' - 'Got array with shape: ' + str(x.shape)) - if x.shape[self.channel_axis] not in {1, 3, 4}: - warnings.warn('Expected input to be images (as Numpy array) ' - 'following the data format convention "' + - self.data_format + '" (channels on axis ' + - str(self.channel_axis) + '), i.e. expected ' - 'either 1, 3 or 4 channels on axis ' + - str(self.channel_axis) + '. ' - 'However, it was passed an array with shape ' + - str(x.shape) + ' (' + str(x.shape[self.channel_axis]) + - ' channels).') - - if seed is not None: - np.random.seed(seed) - - x = np.copy(x) - if self.rescale: - x *= self.rescale - - if augment: - ax = np.zeros( - tuple([rounds * x.shape[0]] + list(x.shape)[1:]), dtype=self.dtype) - for r in range(rounds): - for i in range(x.shape[0]): - ax[i + r * x.shape[0]] = self.random_transform(x[i]) - x = ax - - if self.featurewise_center: - self.mean = np.mean(x, axis=(0, self.row_axis, self.col_axis)) - broadcast_shape = [1, 1, 1] - broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis] - self.mean = np.reshape(self.mean, broadcast_shape) - x -= self.mean - - if self.featurewise_std_normalization: - self.std = np.std(x, axis=(0, self.row_axis, self.col_axis)) - broadcast_shape = [1, 1, 1] - broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis] - self.std = np.reshape(self.std, broadcast_shape) - x /= (self.std + 1e-6) - - if self.zca_whitening: - n = len(x) - flat_x = np.reshape(x, (n, -1)) - - u, s, _ = np.linalg.svd(flat_x.T, full_matrices=False) - s_inv = np.sqrt(n) / (s + self.zca_epsilon) - self.zca_whitening_matrix = (u * s_inv).dot(u.T) - - -@keras_export('keras.preprocessing.image.random_rotation') -def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random rotation of a Numpy image tensor. - - Deprecated: `tf.keras.preprocessing.image.random_rotation` does not operate on - tensors and is not recommended for new code. Prefer - `tf.keras.layers.RandomRotation` which provides equivalent functionality as a - preprocessing layer. For more information, see the tutorial for - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - x: Input tensor. Must be 3D. - rg: Rotation range, in degrees. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - Returns: - Rotated Numpy image tensor. - """ - theta = np.random.uniform(-rg, rg) - x = apply_affine_transform(x, - theta=theta, - row_axis=row_axis, - col_axis=col_axis, - channel_axis=channel_axis, - fill_mode=fill_mode, - cval=cval, - order=interpolation_order) - return x - - -@keras_export('keras.preprocessing.image.random_shift') -def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random spatial shift of a Numpy image tensor. - - Deprecated: `tf.keras.preprocessing.image.random_shift` does not operate on - tensors and is not recommended for new code. Prefer - `tf.keras.layers.RandomTranslation` which provides equivalent functionality as - a preprocessing layer. For more information, see the tutorial for - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - x: Input tensor. Must be 3D. - wrg: Width shift range, as a float fraction of the width. - hrg: Height shift range, as a float fraction of the height. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - Returns: - Shifted Numpy image tensor. - """ - h, w = x.shape[row_axis], x.shape[col_axis] - tx = np.random.uniform(-hrg, hrg) * h - ty = np.random.uniform(-wrg, wrg) * w - x = apply_affine_transform(x, - tx=tx, - ty=ty, - row_axis=row_axis, - col_axis=col_axis, - channel_axis=channel_axis, - fill_mode=fill_mode, - cval=cval, - order=interpolation_order) - return x - - -@keras_export('keras.preprocessing.image.random_shear') -def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random spatial shear of a Numpy image tensor. - - Args: - x: Input tensor. Must be 3D. - intensity: Transformation intensity in degrees. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - Returns: - Sheared Numpy image tensor. - """ - shear = np.random.uniform(-intensity, intensity) - x = apply_affine_transform( - x, - shear=shear, - row_axis=row_axis, - col_axis=col_axis, - channel_axis=channel_axis, - fill_mode=fill_mode, - cval=cval, - order=interpolation_order) - return x - - -@keras_export('keras.preprocessing.image.random_zoom') -def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random spatial zoom of a Numpy image tensor. - - Deprecated: `tf.keras.preprocessing.image.random_zoom` does not operate on - tensors and is not recommended for new code. Prefer - `tf.keras.layers.RandomZoom` which provides equivalent functionality as - a preprocessing layer. For more information, see the tutorial for - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - x: Input tensor. Must be 3D. - zoom_range: Tuple of floats; zoom range for width and height. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - Returns: - Zoomed Numpy image tensor. - - Raises: - ValueError: if `zoom_range` isn't a tuple. - """ - if len(zoom_range) != 2: - raise ValueError('`zoom_range` should be a tuple or list of two' - ' floats. Received: %s' % (zoom_range,)) - - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - x = apply_affine_transform( - x, - zx=zx, - zy=zy, - row_axis=row_axis, - col_axis=col_axis, - channel_axis=channel_axis, - fill_mode=fill_mode, - cval=cval, - order=interpolation_order) - return x - - -@keras_export('keras.preprocessing.image.apply_channel_shift') -def apply_channel_shift(x, intensity, channel_axis=0): - """Performs a channel shift. - - Args: - x: Input tensor. Must be 3D. - intensity: Transformation intensity. - channel_axis: Index of axis for channels in the input tensor. - - Returns: - Numpy image tensor. - """ - x = np.rollaxis(x, channel_axis, 0) - min_x, max_x = np.min(x), np.max(x) - channel_images = [ - np.clip(x_channel + intensity, min_x, max_x) for x_channel in x] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_axis + 1) - return x - - -@keras_export('keras.preprocessing.image.random_channel_shift') -def random_channel_shift(x, intensity_range, channel_axis=0): - """Performs a random channel shift. + x: Input tensor. Must be 3D. + brightness: Float. The new brightness value. + scale: Whether to rescale the image such that minimum and maximum values + are 0 and 255 respectively. Default: True. - Args: - x: Input tensor. Must be 3D. - intensity_range: Transformation intensity. - channel_axis: Index of axis for channels in the input tensor. + Returns: + Numpy image tensor. - Returns: - Numpy image tensor. - """ - intensity = np.random.uniform(-intensity_range, intensity_range) - return apply_channel_shift(x, intensity, channel_axis=channel_axis) + Raises: + ImportError: if PIL is not available. + """ + if ImageEnhance is None: + raise ImportError( + "Using brightness shifts requires PIL. Install PIL or Pillow." + ) + x_min, x_max = np.min(x), np.max(x) + local_scale = (x_min < 0) or (x_max > 255) + x = image_utils.array_to_img(x, scale=local_scale or scale) + x = imgenhancer_Brightness = ImageEnhance.Brightness(x) + x = imgenhancer_Brightness.enhance(brightness) + x = image_utils.img_to_array(x) + if not scale and local_scale: + x = x / 255 * (x_max - x_min) + x_min + return x -@keras_export('keras.preprocessing.image.apply_brightness_shift') -def apply_brightness_shift(x, brightness, scale=True): - """Performs a brightness shift. - - Args: - x: Input tensor. Must be 3D. - brightness: Float. The new brightness value. - scale: Whether to rescale the image such that minimum and maximum values - are 0 and 255 respectively. Default: True. - - Returns: - Numpy image tensor. - - Raises: - ImportError: if PIL is not available. - """ - if ImageEnhance is None: - raise ImportError('Using brightness shifts requires PIL. ' - 'Install PIL or Pillow.') - x_min, x_max = np.min(x), np.max(x) - local_scale = (x_min < 0) or (x_max > 255) - x = image_utils.array_to_img(x, scale=local_scale or scale) - x = imgenhancer_Brightness = ImageEnhance.Brightness(x) - x = imgenhancer_Brightness.enhance(brightness) - x = image_utils.img_to_array(x) - if not scale and local_scale: - x = x / 255 * (x_max - x_min) + x_min - return x - - -@keras_export('keras.preprocessing.image.random_brightness') +@keras_export("keras.preprocessing.image.random_brightness") def random_brightness(x, brightness_range, scale=True): - """Performs a random brightness shift. - - Deprecated: `tf.keras.preprocessing.image.random_brightness` does not operate - on tensors and is not recommended for new code. Prefer - `tf.keras.layers.RandomBrightness` which provides equivalent functionality as - a preprocessing layer. For more information, see the tutorial for - [augmenting images]( - https://www.tensorflow.org/tutorials/images/data_augmentation), as well as - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers). - - Args: - x: Input tensor. Must be 3D. - brightness_range: Tuple of floats; brightness range. - scale: Whether to rescale the image such that minimum and maximum values - are 0 and 255 respectively. Default: True. - - Returns: - Numpy image tensor. - - Raises: - ValueError if `brightness_range` isn't a tuple. - """ - if len(brightness_range) != 2: - raise ValueError( - '`brightness_range should be tuple or list of two floats. ' - 'Received: %s' % (brightness_range,)) - - u = np.random.uniform(brightness_range[0], brightness_range[1]) - return apply_brightness_shift(x, u, scale) + """Performs a random brightness shift. + + Deprecated: `tf.keras.preprocessing.image.random_brightness` does not + operate on tensors and is not recommended for new code. Prefer + `tf.keras.layers.RandomBrightness` which provides equivalent functionality + as a preprocessing layer. For more information, see the tutorial for + [augmenting images]( + https://www.tensorflow.org/tutorials/images/data_augmentation), as well as + the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers). + + Args: + x: Input tensor. Must be 3D. + brightness_range: Tuple of floats; brightness range. + scale: Whether to rescale the image such that minimum and maximum values + are 0 and 255 respectively. Default: True. + + Returns: + Numpy image tensor. + + Raises: + ValueError if `brightness_range` isn't a tuple. + """ + if len(brightness_range) != 2: + raise ValueError( + "`brightness_range should be tuple or list of two floats. " + "Received: %s" % (brightness_range,) + ) + + u = np.random.uniform(brightness_range[0], brightness_range[1]) + return apply_brightness_shift(x, u, scale) def transform_matrix_offset_center(matrix, x, y): - o_x = float(x) / 2 - 0.5 - o_y = float(y) / 2 - 0.5 - offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) - reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) - transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) - return transform_matrix - - -@keras_export('keras.preprocessing.image.apply_affine_transform') -def apply_affine_transform(x, theta=0, tx=0, ty=0, shear=0, zx=1, zy=1, - row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., order=1): - """Applies an affine transformation specified by the parameters given. - - Args: - x: 3D numpy array - a 2D image with one or more channels. - theta: Rotation angle in degrees. - tx: Width shift. - ty: Heigh shift. - shear: Shear angle in degrees. - zx: Zoom in x direction. - zy: Zoom in y direction - row_axis: Index of axis for rows (aka Y axis) in the input - image. Direction: left to right. - col_axis: Index of axis for columns (aka X axis) in the input - image. Direction: top to bottom. - channel_axis: Index of axis for channels in the input image. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - order: int, order of interpolation - - Returns: - The transformed version of the input. - - Raises: - ImportError: if SciPy is not available. - """ - if scipy is None: - raise ImportError('Image transformations require SciPy. ' - 'Install SciPy.') - - # Input sanity checks: - # 1. x must 2D image with one or more channels (i.e., a 3D tensor) - # 2. channels must be either first or last dimension - if np.unique([row_axis, col_axis, channel_axis]).size != 3: - raise ValueError("'row_axis', 'col_axis', and 'channel_axis'" - " must be distinct") - - # shall we support negative indices? - valid_indices = set([0, 1, 2]) - actual_indices = set([row_axis, col_axis, channel_axis]) - if actual_indices != valid_indices: - raise ValueError( - f'Invalid axis\' indices: {actual_indices - valid_indices}') - - if x.ndim != 3: - raise ValueError('Input arrays must be multi-channel 2D images.') - if channel_axis not in [0, 2]: - raise ValueError('Channels are allowed and the first and last dimensions.') - - transform_matrix = None - if theta != 0: - theta = np.deg2rad(theta) - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], - [np.sin(theta), np.cos(theta), 0], - [0, 0, 1]]) - transform_matrix = rotation_matrix - - if tx != 0 or ty != 0: - shift_matrix = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) - if transform_matrix is None: - transform_matrix = shift_matrix - else: - transform_matrix = np.dot(transform_matrix, shift_matrix) - - if shear != 0: - shear = np.deg2rad(shear) - shear_matrix = np.array([[1, -np.sin(shear), 0], - [0, np.cos(shear), 0], - [0, 0, 1]]) - if transform_matrix is None: - transform_matrix = shear_matrix - else: - transform_matrix = np.dot(transform_matrix, shear_matrix) - - if zx != 1 or zy != 1: - zoom_matrix = np.array([[zx, 0, 0], - [0, zy, 0], - [0, 0, 1]]) - if transform_matrix is None: - transform_matrix = zoom_matrix - else: - transform_matrix = np.dot(transform_matrix, zoom_matrix) + o_x = float(x) / 2 - 0.5 + o_y = float(y) / 2 - 0.5 + offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) + reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) + transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) + return transform_matrix + + +@keras_export("keras.preprocessing.image.apply_affine_transform") +def apply_affine_transform( + x, + theta=0, + tx=0, + ty=0, + shear=0, + zx=1, + zy=1, + row_axis=1, + col_axis=2, + channel_axis=0, + fill_mode="nearest", + cval=0.0, + order=1, +): + """Applies an affine transformation specified by the parameters given. - if transform_matrix is not None: - h, w = x.shape[row_axis], x.shape[col_axis] - transform_matrix = transform_matrix_offset_center( - transform_matrix, h, w) - x = np.rollaxis(x, channel_axis, 0) + Args: + x: 3D numpy array - a 2D image with one or more channels. + theta: Rotation angle in degrees. + tx: Width shift. + ty: Heigh shift. + shear: Shear angle in degrees. + zx: Zoom in x direction. + zy: Zoom in y direction + row_axis: Index of axis for rows (aka Y axis) in the input + image. Direction: left to right. + col_axis: Index of axis for columns (aka X axis) in the input + image. Direction: top to bottom. + channel_axis: Index of axis for channels in the input image. + fill_mode: Points outside the boundaries of the input + are filled according to the given mode + (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). + cval: Value used for points outside the boundaries + of the input if `mode='constant'`. + order: int, order of interpolation - # Matrix construction assumes that coordinates are x, y (in that order). - # However, regular numpy arrays use y,x (aka i,j) indexing. - # Possible solution is: - # 1. Swap the x and y axes. - # 2. Apply transform. - # 3. Swap the x and y axes again to restore image-like data ordering. - # Mathematically, it is equivalent to the following transformation: - # M' = PMP, where P is the permutation matrix, M is the original - # transformation matrix. - if col_axis > row_axis: - transform_matrix[:, [0, 1]] = transform_matrix[:, [1, 0]] - transform_matrix[[0, 1]] = transform_matrix[[1, 0]] - final_affine_matrix = transform_matrix[:2, :2] - final_offset = transform_matrix[:2, 2] - - channel_images = [ndimage.interpolation.affine_transform( # pylint: disable=g-complex-comprehension - x_channel, - final_affine_matrix, - final_offset, - order=order, - mode=fill_mode, - cval=cval) for x_channel in x] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_axis + 1) - return x + Returns: + The transformed version of the input. + + Raises: + ImportError: if SciPy is not available. + """ + if scipy is None: + raise ImportError("Image transformations require SciPy. Install SciPy.") + + # Input sanity checks: + # 1. x must 2D image with one or more channels (i.e., a 3D tensor) + # 2. channels must be either first or last dimension + if np.unique([row_axis, col_axis, channel_axis]).size != 3: + raise ValueError( + "'row_axis', 'col_axis', and 'channel_axis' must be distinct" + ) + + # shall we support negative indices? + valid_indices = set([0, 1, 2]) + actual_indices = set([row_axis, col_axis, channel_axis]) + if actual_indices != valid_indices: + raise ValueError( + f"Invalid axis' indices: {actual_indices - valid_indices}" + ) + + if x.ndim != 3: + raise ValueError("Input arrays must be multi-channel 2D images.") + if channel_axis not in [0, 2]: + raise ValueError( + "Channels are allowed and the first and last dimensions." + ) + + transform_matrix = None + if theta != 0: + theta = np.deg2rad(theta) + rotation_matrix = np.array( + [ + [np.cos(theta), -np.sin(theta), 0], + [np.sin(theta), np.cos(theta), 0], + [0, 0, 1], + ] + ) + transform_matrix = rotation_matrix + + if tx != 0 or ty != 0: + shift_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) + if transform_matrix is None: + transform_matrix = shift_matrix + else: + transform_matrix = np.dot(transform_matrix, shift_matrix) + + if shear != 0: + shear = np.deg2rad(shear) + shear_matrix = np.array( + [[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]] + ) + if transform_matrix is None: + transform_matrix = shear_matrix + else: + transform_matrix = np.dot(transform_matrix, shear_matrix) + + if zx != 1 or zy != 1: + zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) + if transform_matrix is None: + transform_matrix = zoom_matrix + else: + transform_matrix = np.dot(transform_matrix, zoom_matrix) + + if transform_matrix is not None: + h, w = x.shape[row_axis], x.shape[col_axis] + transform_matrix = transform_matrix_offset_center( + transform_matrix, h, w + ) + x = np.rollaxis(x, channel_axis, 0) + + # Matrix construction assumes that coordinates are x, y (in that order). + # However, regular numpy arrays use y,x (aka i,j) indexing. + # Possible solution is: + # 1. Swap the x and y axes. + # 2. Apply transform. + # 3. Swap the x and y axes again to restore image-like data ordering. + # Mathematically, it is equivalent to the following transformation: + # M' = PMP, where P is the permutation matrix, M is the original + # transformation matrix. + if col_axis > row_axis: + transform_matrix[:, [0, 1]] = transform_matrix[:, [1, 0]] + transform_matrix[[0, 1]] = transform_matrix[[1, 0]] + final_affine_matrix = transform_matrix[:2, :2] + final_offset = transform_matrix[:2, 2] + + channel_images = [ + ndimage.interpolation.affine_transform( + x_channel, + final_affine_matrix, + final_offset, + order=order, + mode=fill_mode, + cval=cval, + ) + for x_channel in x + ] + x = np.stack(channel_images, axis=0) + x = np.rollaxis(x, 0, channel_axis + 1) + return x diff --git a/keras/preprocessing/image_test.py b/keras/preprocessing/image_test.py index ac8515181f4b..90a379cc8d97 100644 --- a/keras/preprocessing/image_test.py +++ b/keras/preprocessing/image_test.py @@ -19,2057 +19,2345 @@ import shutil import tempfile +import numpy as np +import pandas as pd +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras import layers from keras.engine import sequential from keras.preprocessing import image from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import image_utils -import numpy as np -import pandas as pd -import tensorflow.compat.v2 as tf try: - import PIL # pylint:disable=g-import-not-at-top + import PIL except ImportError: - PIL = None - - -def _generate_test_images(include_rgba=False, - include_16bit=False, - include_32bit=False): - img_w = img_h = 20 - rgb_images = [] - rgba_images = [] - gray_images = [] - gray_images_16bit = [] - gray_images_32bit = [] - for _ in range(8): - bias = np.random.rand(img_w, img_h, 1) * 64 - variance = np.random.rand(img_w, img_h, 1) * (255 - 64) - # RGB - imarray = np.random.rand(img_w, img_h, 3) * variance + bias - im = PIL.Image.fromarray(imarray.astype('uint8')).convert('RGB') - rgb_images.append(im) - # RGBA - imarray = np.random.rand(img_w, img_h, 4) * variance + bias - im = PIL.Image.fromarray(imarray.astype('uint8')).convert('RGBA') - rgba_images.append(im) - # 8-bit grayscale - imarray = np.random.rand(img_w, img_h, 1) * variance + bias - im = PIL.Image.fromarray(imarray.astype('uint8').squeeze()).convert('L') - gray_images.append(im) - # 16-bit grayscale - imarray = np.array( - np.random.randint(-2147483648, 2147483647, (img_w, img_h))) - im = PIL.Image.fromarray(imarray.astype('uint16')) - gray_images_16bit.append(im) - # 32-bit grayscale - im = PIL.Image.fromarray(imarray.astype('uint32')) - gray_images_32bit.append(im) - - ret = [rgb_images, gray_images] - if include_rgba: - ret.append(rgba_images) - if include_16bit: - ret.append(gray_images_16bit) - if include_32bit: - ret.append(gray_images_32bit) - return ret + PIL = None + + +def _generate_test_images( + include_rgba=False, include_16bit=False, include_32bit=False +): + img_w = img_h = 20 + rgb_images = [] + rgba_images = [] + gray_images = [] + gray_images_16bit = [] + gray_images_32bit = [] + for _ in range(8): + bias = np.random.rand(img_w, img_h, 1) * 64 + variance = np.random.rand(img_w, img_h, 1) * (255 - 64) + # RGB + imarray = np.random.rand(img_w, img_h, 3) * variance + bias + im = PIL.Image.fromarray(imarray.astype("uint8")).convert("RGB") + rgb_images.append(im) + # RGBA + imarray = np.random.rand(img_w, img_h, 4) * variance + bias + im = PIL.Image.fromarray(imarray.astype("uint8")).convert("RGBA") + rgba_images.append(im) + # 8-bit grayscale + imarray = np.random.rand(img_w, img_h, 1) * variance + bias + im = PIL.Image.fromarray(imarray.astype("uint8").squeeze()).convert("L") + gray_images.append(im) + # 16-bit grayscale + imarray = np.array( + np.random.randint(-2147483648, 2147483647, (img_w, img_h)) + ) + im = PIL.Image.fromarray(imarray.astype("uint16")) + gray_images_16bit.append(im) + # 32-bit grayscale + im = PIL.Image.fromarray(imarray.astype("uint32")) + gray_images_32bit.append(im) + + ret = [rgb_images, gray_images] + if include_rgba: + ret.append(rgba_images) + if include_16bit: + ret.append(gray_images_16bit) + if include_32bit: + ret.append(gray_images_32bit) + return ret @test_utils.run_v2_only class TestImage(test_combinations.TestCase): - - def test_iterator_empty_directory(self): - # Testing with different batch sizes - for batch_size in [0, 32]: - data_iterator = image.Iterator(0, batch_size, False, 0) - ret = next(data_iterator.index_generator) - self.assertEqual(ret.size, 0) - - def test_image(self): - if PIL is None: - return # Skip test if PIL is not available. - - for test_images in _generate_test_images(): - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True) - # Basic test before fit - x = np.random.random((32, 10, 10, 3)) - generator.flow(x) - - # Fit - generator.fit(images, augment=True) - - for x, _ in generator.flow( - images, np.arange(images.shape[0]), shuffle=True): - self.assertEqual(x.shape[1:], images.shape[1:]) - break - - def test_image_with_split_value_error(self): - with self.assertRaises(ValueError): - image.ImageDataGenerator(validation_split=5) - - def test_image_invalid_data(self): - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_last') - - # Test fit with invalid data - with self.assertRaises(ValueError): - x = np.random.random((3, 10, 10)) - generator.fit(x) - # Test flow with invalid data - with self.assertRaises(ValueError): - generator.flow(np.arange(5)) - # Invalid number of channels: will work but raise a warning - x = np.random.random((32, 10, 10, 5)) - generator.flow(x) - - with self.assertRaises(ValueError): - generator = image.ImageDataGenerator(data_format='unknown') - - generator = image.ImageDataGenerator(zoom_range=(2., 2.)) - - def test_image_fit(self): - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_last') - # Test grayscale - x = np.random.random((32, 10, 10, 1)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 10, 10, 3)) - generator.fit(x) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_first') - # Test grayscale - x = np.random.random((32, 1, 10, 10)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 3, 10, 10)) - generator.fit(x) - - def test_directory_iterator(self): - if PIL is None: - return # Skip test if PIL is not available. - - num_classes = 2 - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(temp_dir, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in _generate_test_images(): - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.jpg'.format(count)) - filenames.append(filename) - im.save(os.path.join(temp_dir, filename)) - count += 1 - - # Test image loading util - fname = os.path.join(temp_dir, filenames[0]) - _ = image_utils.load_img(fname) - _ = image_utils.load_img(fname, grayscale=True) - _ = image_utils.load_img(fname, target_size=(10, 10)) - _ = image_utils.load_img( - fname, target_size=(10, 10), interpolation='bilinear') - - # create iterator - generator = image.ImageDataGenerator() - dir_iterator = generator.flow_from_directory(temp_dir) - - # check number of classes and images - self.assertEqual(len(dir_iterator.class_indices), num_classes) - self.assertEqual(len(dir_iterator.classes), count) - self.assertEqual(set(dir_iterator.filenames), set(filenames)) - - def preprocessing_function(x): - """This will fail if not provided by a Numpy array. - - Note: This is made to enforce backward compatibility. - - Args: - x: A numpy array. - - Returns: - An array of zeros with the same shape as the given array. - """ - self.assertEqual(x.shape, (26, 26, 3)) - self.assertIs(type(x), np.ndarray) - return np.zeros_like(x) - - # Test usage as Sequence - generator = image.ImageDataGenerator( - preprocessing_function=preprocessing_function) - dir_seq = generator.flow_from_directory( - str(temp_dir), - target_size=(26, 26), - color_mode='rgb', - batch_size=3, - class_mode='categorical') - self.assertEqual(len(dir_seq), count // 3 + 1) - x1, y1 = dir_seq[1] - self.assertEqual(x1.shape, (3, 26, 26, 3)) - self.assertEqual(y1.shape, (3, num_classes)) - x1, y1 = dir_seq[5] - self.assertTrue((x1 == 0).all()) - - def directory_iterator_with_validation_split_test_helper( - self, validation_split): - if PIL is None: - return # Skip test if PIL is not available. - - num_classes = 2 - tmp_folder = tempfile.mkdtemp(prefix='test_images') - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(tmp_folder, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in _generate_test_images(): - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.jpg'.format(count)) - filenames.append(filename) - im.save(os.path.join(tmp_folder, filename)) - count += 1 - - # create iterator - generator = image.ImageDataGenerator(validation_split=validation_split) - - with self.assertRaises(ValueError): - generator.flow_from_directory(tmp_folder, subset='foo') - - num_validation = int(count * validation_split) - num_training = count - num_validation - train_iterator = generator.flow_from_directory( - tmp_folder, subset='training') - self.assertEqual(train_iterator.samples, num_training) - - valid_iterator = generator.flow_from_directory( - tmp_folder, subset='validation') - self.assertEqual(valid_iterator.samples, num_validation) - - # check number of classes and images - self.assertEqual(len(train_iterator.class_indices), num_classes) - self.assertEqual(len(train_iterator.classes), num_training) - self.assertEqual( - len(set(train_iterator.filenames) & set(filenames)), num_training) - - model = sequential.Sequential([layers.Flatten(), layers.Dense(2)]) - model.compile(optimizer='sgd', loss='mse') - model.fit(train_iterator, epochs=1) - - shutil.rmtree(tmp_folder) - - @test_combinations.run_all_keras_modes - def test_directory_iterator_with_validation_split_25_percent(self): - self.directory_iterator_with_validation_split_test_helper(0.25) - - @test_combinations.run_all_keras_modes - def test_directory_iterator_with_validation_split_40_percent(self): - self.directory_iterator_with_validation_split_test_helper(0.40) - - @test_combinations.run_all_keras_modes - def test_directory_iterator_with_validation_split_50_percent(self): - self.directory_iterator_with_validation_split_test_helper(0.50) - - def test_batch_standardize(self): - if PIL is None: - return # Skip test if PIL is not available. - - # ImageDataGenerator.standardize should work on batches - for test_images in _generate_test_images(): - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True) - generator.fit(images, augment=True) - - transformed = np.copy(images) - for i, im in enumerate(transformed): - transformed[i] = generator.random_transform(im) - transformed = generator.standardize(transformed) - - def test_img_transforms(self): - x = np.random.random((3, 200, 200)) - _ = image.random_rotation(x, 20) - _ = image.random_shift(x, 0.2, 0.2) - _ = image.random_shear(x, 2.) - _ = image.random_zoom(x, (0.5, 0.5)) - _ = image.apply_channel_shift(x, 2, 2) - _ = image.apply_affine_transform(x, 2) - with self.assertRaises(ValueError): - image.random_zoom(x, (0, 0, 0)) - _ = image.random_channel_shift(x, 2.) + def test_iterator_empty_directory(self): + # Testing with different batch sizes + for batch_size in [0, 32]: + data_iterator = image.Iterator(0, batch_size, False, 0) + ret = next(data_iterator.index_generator) + self.assertEqual(ret.size, 0) + + def test_image(self): + if PIL is None: + return # Skip test if PIL is not available. + + for test_images in _generate_test_images(): + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + ) + # Basic test before fit + x = np.random.random((32, 10, 10, 3)) + generator.flow(x) + + # Fit + generator.fit(images, augment=True) + + for x, _ in generator.flow( + images, np.arange(images.shape[0]), shuffle=True + ): + self.assertEqual(x.shape[1:], images.shape[1:]) + break + + def test_image_with_split_value_error(self): + with self.assertRaises(ValueError): + image.ImageDataGenerator(validation_split=5) + + def test_image_invalid_data(self): + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + data_format="channels_last", + ) + + # Test fit with invalid data + with self.assertRaises(ValueError): + x = np.random.random((3, 10, 10)) + generator.fit(x) + # Test flow with invalid data + with self.assertRaises(ValueError): + generator.flow(np.arange(5)) + # Invalid number of channels: will work but raise a warning + x = np.random.random((32, 10, 10, 5)) + generator.flow(x) + + with self.assertRaises(ValueError): + generator = image.ImageDataGenerator(data_format="unknown") + + generator = image.ImageDataGenerator(zoom_range=(2.0, 2.0)) + + def test_image_fit(self): + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + data_format="channels_last", + ) + # Test grayscale + x = np.random.random((32, 10, 10, 1)) + generator.fit(x) + # Test RBG + x = np.random.random((32, 10, 10, 3)) + generator.fit(x) + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + data_format="channels_first", + ) + # Test grayscale + x = np.random.random((32, 1, 10, 10)) + generator.fit(x) + # Test RBG + x = np.random.random((32, 3, 10, 10)) + generator.fit(x) + + def test_directory_iterator(self): + if PIL is None: + return # Skip test if PIL is not available. + + num_classes = 2 + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + + # create folders and subfolders + paths = [] + for cl in range(num_classes): + class_directory = f"class-{cl}" + classpaths = [ + class_directory, + os.path.join(class_directory, "subfolder-1"), + os.path.join(class_directory, "subfolder-2"), + os.path.join(class_directory, "subfolder-1", "sub-subfolder"), + ] + for path in classpaths: + os.mkdir(os.path.join(temp_dir, path)) + paths.append(classpaths) + + # save the images in the paths + count = 0 + filenames = [] + for test_images in _generate_test_images(): + for im in test_images: + # rotate image class + im_class = count % num_classes + # rotate subfolders + classpaths = paths[im_class] + filename = os.path.join( + classpaths[count % len(classpaths)], + f"image-{count}.jpg", + ) + filenames.append(filename) + im.save(os.path.join(temp_dir, filename)) + count += 1 + + # Test image loading util + fname = os.path.join(temp_dir, filenames[0]) + _ = image_utils.load_img(fname) + _ = image_utils.load_img(fname, grayscale=True) + _ = image_utils.load_img(fname, target_size=(10, 10)) + _ = image_utils.load_img( + fname, target_size=(10, 10), interpolation="bilinear" + ) + + # create iterator + generator = image.ImageDataGenerator() + dir_iterator = generator.flow_from_directory(temp_dir) + + # check number of classes and images + self.assertEqual(len(dir_iterator.class_indices), num_classes) + self.assertEqual(len(dir_iterator.classes), count) + self.assertEqual(set(dir_iterator.filenames), set(filenames)) + + def preprocessing_function(x): + """This will fail if not provided by a Numpy array. + + Note: This is made to enforce backward compatibility. + + Args: + x: A numpy array. + + Returns: + An array of zeros with the same shape as the given array. + """ + self.assertEqual(x.shape, (26, 26, 3)) + self.assertIs(type(x), np.ndarray) + return np.zeros_like(x) + + # Test usage as Sequence + generator = image.ImageDataGenerator( + preprocessing_function=preprocessing_function + ) + dir_seq = generator.flow_from_directory( + str(temp_dir), + target_size=(26, 26), + color_mode="rgb", + batch_size=3, + class_mode="categorical", + ) + self.assertEqual(len(dir_seq), count // 3 + 1) + x1, y1 = dir_seq[1] + self.assertEqual(x1.shape, (3, 26, 26, 3)) + self.assertEqual(y1.shape, (3, num_classes)) + x1, y1 = dir_seq[5] + self.assertTrue((x1 == 0).all()) + + def directory_iterator_with_validation_split_test_helper( + self, validation_split + ): + if PIL is None: + return # Skip test if PIL is not available. + + num_classes = 2 + tmp_folder = tempfile.mkdtemp(prefix="test_images") + + # create folders and subfolders + paths = [] + for cl in range(num_classes): + class_directory = f"class-{cl}" + classpaths = [ + class_directory, + os.path.join(class_directory, "subfolder-1"), + os.path.join(class_directory, "subfolder-2"), + os.path.join(class_directory, "subfolder-1", "sub-subfolder"), + ] + for path in classpaths: + os.mkdir(os.path.join(tmp_folder, path)) + paths.append(classpaths) + + # save the images in the paths + count = 0 + filenames = [] + for test_images in _generate_test_images(): + for im in test_images: + # rotate image class + im_class = count % num_classes + # rotate subfolders + classpaths = paths[im_class] + filename = os.path.join( + classpaths[count % len(classpaths)], + f"image-{count}.jpg", + ) + filenames.append(filename) + im.save(os.path.join(tmp_folder, filename)) + count += 1 + + # create iterator + generator = image.ImageDataGenerator(validation_split=validation_split) + + with self.assertRaises(ValueError): + generator.flow_from_directory(tmp_folder, subset="foo") + + num_validation = int(count * validation_split) + num_training = count - num_validation + train_iterator = generator.flow_from_directory( + tmp_folder, subset="training" + ) + self.assertEqual(train_iterator.samples, num_training) + + valid_iterator = generator.flow_from_directory( + tmp_folder, subset="validation" + ) + self.assertEqual(valid_iterator.samples, num_validation) + + # check number of classes and images + self.assertEqual(len(train_iterator.class_indices), num_classes) + self.assertEqual(len(train_iterator.classes), num_training) + self.assertEqual( + len(set(train_iterator.filenames) & set(filenames)), num_training + ) + + model = sequential.Sequential([layers.Flatten(), layers.Dense(2)]) + model.compile(optimizer="sgd", loss="mse") + model.fit(train_iterator, epochs=1) + + shutil.rmtree(tmp_folder) + + @test_combinations.run_all_keras_modes + def test_directory_iterator_with_validation_split_25_percent(self): + self.directory_iterator_with_validation_split_test_helper(0.25) + + @test_combinations.run_all_keras_modes + def test_directory_iterator_with_validation_split_40_percent(self): + self.directory_iterator_with_validation_split_test_helper(0.40) + + @test_combinations.run_all_keras_modes + def test_directory_iterator_with_validation_split_50_percent(self): + self.directory_iterator_with_validation_split_test_helper(0.50) + + def test_batch_standardize(self): + if PIL is None: + return # Skip test if PIL is not available. + + # ImageDataGenerator.standardize should work on batches + for test_images in _generate_test_images(): + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + ) + generator.fit(images, augment=True) + + transformed = np.copy(images) + for i, im in enumerate(transformed): + transformed[i] = generator.random_transform(im) + transformed = generator.standardize(transformed) + + def test_img_transforms(self): + x = np.random.random((3, 200, 200)) + _ = image.random_rotation(x, 20) + _ = image.random_shift(x, 0.2, 0.2) + _ = image.random_shear(x, 2.0) + _ = image.random_zoom(x, (0.5, 0.5)) + _ = image.apply_channel_shift(x, 2, 2) + _ = image.apply_affine_transform(x, 2) + with self.assertRaises(ValueError): + image.random_zoom(x, (0, 0, 0)) + _ = image.random_channel_shift(x, 2.0) @test_utils.run_v2_only class TestDirectoryIterator(test_combinations.TestCase): - - def test_directory_iterator(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images( - include_rgba=True, include_16bit=True, include_32bit=True) - num_classes = 2 - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(tmpdir.full_path, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.png'.format(count)) - filenames.append(filename) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - # create iterator - generator = image.ImageDataGenerator() - dir_iterator = generator.flow_from_directory(tmpdir.full_path) - - # check number of classes and images - self.assertLen(dir_iterator.class_indices, num_classes) - self.assertLen(dir_iterator.classes, count) - self.assertEqual(set(dir_iterator.filenames), set(filenames)) - - # Test invalid use cases - with self.assertRaises(ValueError): - generator.flow_from_directory(tmpdir.full_path, color_mode='cmyk') - with self.assertRaises(ValueError): - generator.flow_from_directory(tmpdir.full_path, class_mode='output') - - def preprocessing_function(x): - # This will fail if not provided by a Numpy array. - # Note: This is made to enforce backward compatibility. - self.assertEqual(x.shape, (26, 26, 3)) - self.assertIsInstance(x, np.ndarray) - - return np.zeros_like(x) - - # Test usage as Sequence - generator = image.ImageDataGenerator( - preprocessing_function=preprocessing_function) - dir_seq = generator.flow_from_directory( - tmpdir.full_path, - target_size=(26, 26), - color_mode='rgb', - batch_size=3, - class_mode='categorical') - self.assertLen(dir_seq, np.ceil(count / 3.)) - x1, y1 = dir_seq[1] - self.assertEqual(x1.shape, (3, 26, 26, 3)) - self.assertEqual(y1.shape, (3, num_classes)) - x1, y1 = dir_seq[5] - self.assertTrue((x1 == 0).all()) - - with self.assertRaises(ValueError): - x1, y1 = dir_seq[14] # there are 40 images and batch size is 3 - - def test_directory_iterator_class_mode_input(self): - tmpdir = self.create_tempdir() - os.mkdir(os.path.join(tmpdir.full_path, 'class-1')) - all_test_images = _generate_test_images( - include_rgba=True, include_16bit=True, include_32bit=True) - - # save the images in the paths - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = os.path.join(tmpdir, 'class-1', 'image-{}.png'.format(count)) - im.save(filename) - count += 1 - - # create iterator - generator = image.ImageDataGenerator() - dir_iterator = generator.flow_from_directory( - tmpdir.full_path, class_mode='input') - batch = next(dir_iterator) - - # check if input and output have the same shape - self.assertEqual(batch[0].shape, batch[1].shape) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) - - @parameterized.parameters([ - (0.25, 30), - (0.50, 20), - (0.75, 10), - ]) - def test_directory_iterator_with_validation_split(self, validation_split, - num_training): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images( - include_rgba=True, include_16bit=True, include_32bit=True) - num_classes = 2 - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(tmpdir.full_path, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.png'.format(count)) - filenames.append(filename) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - # create iterator - generator = image.ImageDataGenerator(validation_split=validation_split) - - with self.assertRaises(ValueError): - generator.flow_from_directory(tmpdir.full_path, subset='foo') - - train_iterator = generator.flow_from_directory( - tmpdir.full_path, subset='training') - self.assertEqual(train_iterator.samples, num_training) - - valid_iterator = generator.flow_from_directory( - tmpdir.full_path, subset='validation') - self.assertEqual(valid_iterator.samples, count - num_training) - - # check number of classes and images - self.assertLen(train_iterator.class_indices, num_classes) - self.assertLen(train_iterator.classes, num_training) - self.assertLen(set(train_iterator.filenames) & set(filenames), num_training) + def test_directory_iterator(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images( + include_rgba=True, include_16bit=True, include_32bit=True + ) + num_classes = 2 + + # create folders and subfolders + paths = [] + for cl in range(num_classes): + class_directory = f"class-{cl}" + classpaths = [ + class_directory, + os.path.join(class_directory, "subfolder-1"), + os.path.join(class_directory, "subfolder-2"), + os.path.join(class_directory, "subfolder-1", "sub-subfolder"), + ] + for path in classpaths: + os.mkdir(os.path.join(tmpdir.full_path, path)) + paths.append(classpaths) + + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + # rotate image class + im_class = count % num_classes + # rotate subfolders + classpaths = paths[im_class] + filename = os.path.join( + classpaths[count % len(classpaths)], + f"image-{count}.png", + ) + filenames.append(filename) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + # create iterator + generator = image.ImageDataGenerator() + dir_iterator = generator.flow_from_directory(tmpdir.full_path) + + # check number of classes and images + self.assertLen(dir_iterator.class_indices, num_classes) + self.assertLen(dir_iterator.classes, count) + self.assertEqual(set(dir_iterator.filenames), set(filenames)) + + # Test invalid use cases + with self.assertRaises(ValueError): + generator.flow_from_directory(tmpdir.full_path, color_mode="cmyk") + with self.assertRaises(ValueError): + generator.flow_from_directory(tmpdir.full_path, class_mode="output") + + def preprocessing_function(x): + # This will fail if not provided by a Numpy array. + # Note: This is made to enforce backward compatibility. + self.assertEqual(x.shape, (26, 26, 3)) + self.assertIsInstance(x, np.ndarray) + + return np.zeros_like(x) + + # Test usage as Sequence + generator = image.ImageDataGenerator( + preprocessing_function=preprocessing_function + ) + dir_seq = generator.flow_from_directory( + tmpdir.full_path, + target_size=(26, 26), + color_mode="rgb", + batch_size=3, + class_mode="categorical", + ) + self.assertLen(dir_seq, np.ceil(count / 3.0)) + x1, y1 = dir_seq[1] + self.assertEqual(x1.shape, (3, 26, 26, 3)) + self.assertEqual(y1.shape, (3, num_classes)) + x1, y1 = dir_seq[5] + self.assertTrue((x1 == 0).all()) + + with self.assertRaises(ValueError): + x1, y1 = dir_seq[14] # there are 40 images and batch size is 3 + + def test_directory_iterator_class_mode_input(self): + tmpdir = self.create_tempdir() + os.mkdir(os.path.join(tmpdir.full_path, "class-1")) + all_test_images = _generate_test_images( + include_rgba=True, include_16bit=True, include_32bit=True + ) + + # save the images in the paths + count = 0 + for test_images in all_test_images: + for im in test_images: + filename = os.path.join(tmpdir, "class-1", f"image-{count}.png") + im.save(filename) + count += 1 + + # create iterator + generator = image.ImageDataGenerator() + dir_iterator = generator.flow_from_directory( + tmpdir.full_path, class_mode="input" + ) + batch = next(dir_iterator) + + # check if input and output have the same shape + self.assertEqual(batch[0].shape, batch[1].shape) + # check if the input and output images are not the same numpy array + input_img = batch[0][0] + output_img = batch[1][0] + output_img[0][0][0] += 1 + self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) + + @parameterized.parameters( + [ + (0.25, 30), + (0.50, 20), + (0.75, 10), + ] + ) + def test_directory_iterator_with_validation_split( + self, validation_split, num_training + ): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images( + include_rgba=True, include_16bit=True, include_32bit=True + ) + num_classes = 2 + + # create folders and subfolders + paths = [] + for cl in range(num_classes): + class_directory = f"class-{cl}" + classpaths = [ + class_directory, + os.path.join(class_directory, "subfolder-1"), + os.path.join(class_directory, "subfolder-2"), + os.path.join(class_directory, "subfolder-1", "sub-subfolder"), + ] + for path in classpaths: + os.mkdir(os.path.join(tmpdir.full_path, path)) + paths.append(classpaths) + + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + # rotate image class + im_class = count % num_classes + # rotate subfolders + classpaths = paths[im_class] + filename = os.path.join( + classpaths[count % len(classpaths)], + f"image-{count}.png", + ) + filenames.append(filename) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + # create iterator + generator = image.ImageDataGenerator(validation_split=validation_split) + + with self.assertRaises(ValueError): + generator.flow_from_directory(tmpdir.full_path, subset="foo") + + train_iterator = generator.flow_from_directory( + tmpdir.full_path, subset="training" + ) + self.assertEqual(train_iterator.samples, num_training) + + valid_iterator = generator.flow_from_directory( + tmpdir.full_path, subset="validation" + ) + self.assertEqual(valid_iterator.samples, count - num_training) + + # check number of classes and images + self.assertLen(train_iterator.class_indices, num_classes) + self.assertLen(train_iterator.classes, num_training) + self.assertLen( + set(train_iterator.filenames) & set(filenames), num_training + ) @test_utils.run_v2_only class TestNumpyArrayIterator(test_combinations.TestCase): - - def test_numpy_array_iterator(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - - image_data_generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1) - - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - images = np.vstack(img_list) - dsize = images.shape[0] - - iterator = image.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - image_data_generator, - shuffle=False, - save_to_dir=tmpdir.full_path, - batch_size=3) - x, y = next(iterator) - self.assertEqual(x.shape, images[:3].shape) - self.assertEqual(list(y), [0, 1, 2]) - - # Test with sample weights - iterator = image.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - image_data_generator, - shuffle=False, - sample_weight=np.arange(images.shape[0]) + 1, - save_to_dir=tmpdir.full_path, - batch_size=3) - x, y, w = iterator.next() - self.assertEqual(x.shape, images[:3].shape) - self.assertEqual(list(y), [0, 1, 2]) - self.assertEqual(list(w), [1, 2, 3]) - - # Test with `shuffle=True` - iterator = image.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - image_data_generator, - shuffle=True, - save_to_dir=tmpdir.full_path, - batch_size=3, - seed=42) - x, y = iterator.next() - self.assertEqual(x.shape, images[:3].shape) - # Check that the sequence is shuffled. - self.assertNotEqual(list(y), [0, 1, 2]) - - # Test without y - iterator = image.NumpyArrayIterator( - images, - None, - image_data_generator, - shuffle=True, - save_to_dir=tmpdir.full_path, - batch_size=3) - x = iterator.next() - self.assertIsInstance(x, np.ndarray) - self.assertEqual(x.shape, images[:3].shape) - - # Test with a single miscellaneous input data array - x_misc1 = np.random.random(dsize) - iterator = image.NumpyArrayIterator((images, x_misc1), - np.arange(dsize), - image_data_generator, - shuffle=False, - batch_size=2) - for i, (x, y) in enumerate(iterator): - self.assertEqual(x[0].shape, images[:2].shape) - self.assertTrue((x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all()) - if i == 2: - break - - # Test with two miscellaneous inputs - x_misc2 = np.random.random((dsize, 3, 3)) - iterator = image.NumpyArrayIterator((images, [x_misc1, x_misc2]), - np.arange(dsize), - image_data_generator, - shuffle=False, - batch_size=2) - for i, (x, y) in enumerate(iterator): - self.assertEqual(x[0].shape, images[:2].shape) - self.assertTrue((x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all()) - self.assertTrue((x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all()) - if i == 2: - break - - # Test cases with `y = None` - iterator = image.NumpyArrayIterator( - images, None, image_data_generator, batch_size=3) - x = iterator.next() - self.assertIsInstance(x, np.ndarray) - self.assertEqual(x.shape, images[:3].shape) - - iterator = image.NumpyArrayIterator((images, x_misc1), - None, - image_data_generator, - batch_size=3, - shuffle=False) - x = iterator.next() - self.assertIsInstance(x, list) - self.assertEqual(x[0].shape, images[:3].shape) - self.assertTrue((x[1] == x_misc1[:3]).all()) - - iterator = image.NumpyArrayIterator((images, [x_misc1, x_misc2]), - None, - image_data_generator, - batch_size=3, - shuffle=False) - x = iterator.next() - self.assertIsInstance(x, list) - self.assertEqual(x[0].shape, images[:3].shape) - self.assertTrue((x[1] == x_misc1[:3]).all()) - self.assertTrue((x[2] == x_misc2[:3]).all()) - - # Test with validation split - generator = image.ImageDataGenerator(validation_split=0.2) - iterator = image.NumpyArrayIterator(images, None, generator, batch_size=3) - x = iterator.next() - self.assertIsInstance(x, np.ndarray) - self.assertEqual(x.shape, images[:3].shape) - - # Test some failure cases: - x_misc_err = np.random.random((dsize + 1, 3, 3)) - - with self.assertRaisesRegex(ValueError, 'All of the arrays in'): - image.NumpyArrayIterator((images, x_misc_err), - np.arange(dsize), - generator, - batch_size=3) - - with self.assertRaisesRegex(ValueError, - r'`x` \(images tensor\) and `y` \(labels\)'): - image.NumpyArrayIterator((images, x_misc1), - np.arange(dsize + 1), - generator, - batch_size=3) - - # Test `flow` behavior as Sequence - seq = image.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - generator, - shuffle=False, - save_to_dir=tmpdir.full_path, - batch_size=3) - self.assertLen(seq, images.shape[0] // 3 + 1) - x, y = seq[0] - self.assertEqual(x.shape, images[:3].shape) - self.assertEqual(list(y), [0, 1, 2]) - - # Test with `shuffle=True` - seq = image.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - generator, - shuffle=True, - save_to_dir=tmpdir.full_path, - batch_size=3, - seed=123) - x, y = seq[0] - # Check that the sequence is shuffled. - self.assertNotEqual(list(y), [0, 1, 2]) - # `on_epoch_end` should reshuffle the sequence. - seq.on_epoch_end() - _, y2 = seq[0] - self.assertNotEqual(list(y), list(y2)) - - # test order_interpolation - labels = np.array([[2, 2, 0, 2, 2], [1, 3, 2, 3, 1], [2, 1, 0, 1, 2], - [3, 1, 0, 2, 0], [3, 1, 3, 2, 1]]) - label_generator = image.ImageDataGenerator( - rotation_range=90., interpolation_order=0) - labels_gen = image.NumpyArrayIterator( - labels[np.newaxis, ..., np.newaxis], None, label_generator, seed=123) - self.assertTrue((np.unique(labels) == np.unique(next(labels_gen))).all()) + def test_numpy_array_iterator(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + + image_data_generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + interpolation_order=1, + ) + + for test_images in all_test_images: + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + images = np.vstack(img_list) + dsize = images.shape[0] + + iterator = image.NumpyArrayIterator( + images, + np.arange(images.shape[0]), + image_data_generator, + shuffle=False, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + x, y = next(iterator) + self.assertEqual(x.shape, images[:3].shape) + self.assertEqual(list(y), [0, 1, 2]) + + # Test with sample weights + iterator = image.NumpyArrayIterator( + images, + np.arange(images.shape[0]), + image_data_generator, + shuffle=False, + sample_weight=np.arange(images.shape[0]) + 1, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + x, y, w = iterator.next() + self.assertEqual(x.shape, images[:3].shape) + self.assertEqual(list(y), [0, 1, 2]) + self.assertEqual(list(w), [1, 2, 3]) + + # Test with `shuffle=True` + iterator = image.NumpyArrayIterator( + images, + np.arange(images.shape[0]), + image_data_generator, + shuffle=True, + save_to_dir=tmpdir.full_path, + batch_size=3, + seed=42, + ) + x, y = iterator.next() + self.assertEqual(x.shape, images[:3].shape) + # Check that the sequence is shuffled. + self.assertNotEqual(list(y), [0, 1, 2]) + + # Test without y + iterator = image.NumpyArrayIterator( + images, + None, + image_data_generator, + shuffle=True, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + x = iterator.next() + self.assertIsInstance(x, np.ndarray) + self.assertEqual(x.shape, images[:3].shape) + + # Test with a single miscellaneous input data array + x_misc1 = np.random.random(dsize) + iterator = image.NumpyArrayIterator( + (images, x_misc1), + np.arange(dsize), + image_data_generator, + shuffle=False, + batch_size=2, + ) + for i, (x, y) in enumerate(iterator): + self.assertEqual(x[0].shape, images[:2].shape) + self.assertTrue( + (x[1] == x_misc1[(i * 2) : ((i + 1) * 2)]).all() + ) + if i == 2: + break + + # Test with two miscellaneous inputs + x_misc2 = np.random.random((dsize, 3, 3)) + iterator = image.NumpyArrayIterator( + (images, [x_misc1, x_misc2]), + np.arange(dsize), + image_data_generator, + shuffle=False, + batch_size=2, + ) + for i, (x, y) in enumerate(iterator): + self.assertEqual(x[0].shape, images[:2].shape) + self.assertTrue( + (x[1] == x_misc1[(i * 2) : ((i + 1) * 2)]).all() + ) + self.assertTrue( + (x[2] == x_misc2[(i * 2) : ((i + 1) * 2)]).all() + ) + if i == 2: + break + + # Test cases with `y = None` + iterator = image.NumpyArrayIterator( + images, None, image_data_generator, batch_size=3 + ) + x = iterator.next() + self.assertIsInstance(x, np.ndarray) + self.assertEqual(x.shape, images[:3].shape) + + iterator = image.NumpyArrayIterator( + (images, x_misc1), + None, + image_data_generator, + batch_size=3, + shuffle=False, + ) + x = iterator.next() + self.assertIsInstance(x, list) + self.assertEqual(x[0].shape, images[:3].shape) + self.assertTrue((x[1] == x_misc1[:3]).all()) + + iterator = image.NumpyArrayIterator( + (images, [x_misc1, x_misc2]), + None, + image_data_generator, + batch_size=3, + shuffle=False, + ) + x = iterator.next() + self.assertIsInstance(x, list) + self.assertEqual(x[0].shape, images[:3].shape) + self.assertTrue((x[1] == x_misc1[:3]).all()) + self.assertTrue((x[2] == x_misc2[:3]).all()) + + # Test with validation split + generator = image.ImageDataGenerator(validation_split=0.2) + iterator = image.NumpyArrayIterator( + images, None, generator, batch_size=3 + ) + x = iterator.next() + self.assertIsInstance(x, np.ndarray) + self.assertEqual(x.shape, images[:3].shape) + + # Test some failure cases: + x_misc_err = np.random.random((dsize + 1, 3, 3)) + + with self.assertRaisesRegex(ValueError, "All of the arrays in"): + image.NumpyArrayIterator( + (images, x_misc_err), + np.arange(dsize), + generator, + batch_size=3, + ) + + with self.assertRaisesRegex( + ValueError, r"`x` \(images tensor\) and `y` \(labels\)" + ): + image.NumpyArrayIterator( + (images, x_misc1), + np.arange(dsize + 1), + generator, + batch_size=3, + ) + + # Test `flow` behavior as Sequence + seq = image.NumpyArrayIterator( + images, + np.arange(images.shape[0]), + generator, + shuffle=False, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + self.assertLen(seq, images.shape[0] // 3 + 1) + x, y = seq[0] + self.assertEqual(x.shape, images[:3].shape) + self.assertEqual(list(y), [0, 1, 2]) + + # Test with `shuffle=True` + seq = image.NumpyArrayIterator( + images, + np.arange(images.shape[0]), + generator, + shuffle=True, + save_to_dir=tmpdir.full_path, + batch_size=3, + seed=123, + ) + x, y = seq[0] + # Check that the sequence is shuffled. + self.assertNotEqual(list(y), [0, 1, 2]) + # `on_epoch_end` should reshuffle the sequence. + seq.on_epoch_end() + _, y2 = seq[0] + self.assertNotEqual(list(y), list(y2)) + + # test order_interpolation + labels = np.array( + [ + [2, 2, 0, 2, 2], + [1, 3, 2, 3, 1], + [2, 1, 0, 1, 2], + [3, 1, 0, 2, 0], + [3, 1, 3, 2, 1], + ] + ) + label_generator = image.ImageDataGenerator( + rotation_range=90.0, interpolation_order=0 + ) + labels_gen = image.NumpyArrayIterator( + labels[np.newaxis, ..., np.newaxis], None, label_generator, seed=123 + ) + self.assertTrue( + (np.unique(labels) == np.unique(next(labels_gen))).all() + ) @test_utils.run_v2_only class TestDataFrameIterator(test_combinations.TestCase): - - def test_dataframe_iterator(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - num_classes = 2 - - # save the images in the tmpdir - count = 0 - filenames = [] - filepaths = [] - filenames_without = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - filename_without = 'image-{}'.format(count) - filenames.append(filename) - filepaths.append(os.path.join(tmpdir.full_path, filename)) - filenames_without.append(filename_without) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - df = pd.DataFrame({ - 'filename': filenames, - 'class': [str(random.randint(0, 1)) for _ in filenames], - 'filepaths': filepaths - }) - - # create iterator - iterator = image.DataFrameIterator(df, tmpdir.full_path) - batch = next(iterator) - self.assertLen(batch, 2) - self.assertIsInstance(batch[0], np.ndarray) - self.assertIsInstance(batch[1], np.ndarray) - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, x_col='filepaths') - df_iterator_dir = generator.flow_from_dataframe(df, tmpdir.full_path) - df_sparse_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, class_mode='sparse') - self.assertFalse(np.isnan(df_sparse_iterator.classes).any()) - # check number of classes and images - self.assertLen(df_iterator.class_indices, num_classes) - self.assertLen(df_iterator.classes, count) - self.assertEqual(set(df_iterator.filenames), set(filepaths)) - self.assertLen(df_iterator_dir.class_indices, num_classes) - self.assertLen(df_iterator_dir.classes, count) - self.assertEqual(set(df_iterator_dir.filenames), set(filenames)) - # test without shuffle - _, batch_y = next( - generator.flow_from_dataframe( - df, tmpdir.full_path, shuffle=False, class_mode='sparse')) - self.assertTrue( - (batch_y == df['class'].astype('float')[:len(batch_y)]).all()) - # Test invalid use cases - with self.assertRaises(ValueError): - generator.flow_from_dataframe(df, tmpdir.full_path, color_mode='cmyk') - with self.assertRaises(ValueError): - generator.flow_from_dataframe(df, tmpdir.full_path, class_mode='output') - with self.assertWarns(DeprecationWarning): - generator.flow_from_dataframe(df, tmpdir.full_path, has_ext=True) - with self.assertWarns(DeprecationWarning): - generator.flow_from_dataframe(df, tmpdir.full_path, has_ext=False) - - def preprocessing_function(x): - # This will fail if not provided by a Numpy array. - # Note: This is made to enforce backward compatibility. - - self.assertEqual(x.shape, (26, 26, 3)) - self.assertIsInstance(x, np.ndarray) - - return np.zeros_like(x) - - # Test usage as Sequence - generator = image.ImageDataGenerator( - preprocessing_function=preprocessing_function) - dir_seq = generator.flow_from_dataframe( - df, - tmpdir.full_path, - target_size=(26, 26), - color_mode='rgb', - batch_size=3, - class_mode='categorical') - self.assertLen(dir_seq, np.ceil(count / 3)) - x1, y1 = dir_seq[1] - self.assertEqual(x1.shape, (3, 26, 26, 3)) - self.assertEqual(y1.shape, (3, num_classes)) - x1, y1 = dir_seq[5] - self.assertTrue((x1 == 0).all()) - - with self.assertRaises(ValueError): - x1, y1 = dir_seq[9] - - def test_dataframe_iterator_validate_filenames(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - df = pd.DataFrame({'filename': filenames + ['test.jpp', 'test.jpg']}) - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, class_mode='input') - self.assertLen(df_iterator.filenames, len(df['filename']) - 2) - df_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, class_mode='input', validate_filenames=False) - self.assertLen(df_iterator.filenames, len(df['filename'])) - - def test_dataframe_iterator_sample_weights(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - df = pd.DataFrame({'filename': filenames}) - df['weight'] = ([2, 5] * len(df))[:len(df)] - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, - tmpdir.full_path, - x_col='filename', - y_col=None, - shuffle=False, - batch_size=5, - weight_col='weight', - class_mode='input') - - batch = next(df_iterator) - self.assertLen(batch, 3) # (x, y, weights) - # check if input and output have the same shape and they're the same - self.assertEqual(batch[0].all(), batch[1].all()) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) - self.assertAllEqual(np.array([2, 5, 2, 5, 2]), batch[2]) - - # fail - df['weight'] = (['2', '5'] * len(df))[:len(df)] - with self.assertRaises(TypeError): - image.ImageDataGenerator().flow_from_dataframe( - df, weight_col='weight', class_mode='input') - - def test_dataframe_iterator_class_mode_input(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - df = pd.DataFrame({'filename': filenames}) - generator = image.ImageDataGenerator() - df_autoencoder_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, x_col='filename', y_col=None, class_mode='input') - - batch = next(df_autoencoder_iterator) - - # check if input and output have the same shape and they're the same - self.assertAllClose(batch[0], batch[1]) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) - - df_autoencoder_iterator = generator.flow_from_dataframe( - df, - tmpdir.full_path, - x_col='filename', - y_col='class', - class_mode='input') - - batch = next(df_autoencoder_iterator) - - # check if input and output have the same shape and they're the same - self.assertEqual(batch[0].all(), batch[1].all()) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) - - def test_dataframe_iterator_class_mode_categorical_multi_label(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - filenames = [] - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']] - df = pd.DataFrame({ - 'filename': filenames, - 'class': [random.choice(label_opt) for _ in filenames[:-2]] + - ['b', 'a'] - }) - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, tmpdir.full_path) - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, np.ndarray) - self.assertEqual(batch_y.shape, (len(batch_x), 2)) - for labels in batch_y: - self.assertTrue(all(label in {0, 1} for label in labels)) - - # on first 3 batches - df = pd.DataFrame({ - 'filename': - filenames, - 'class': [['b', 'a']] + ['b'] + [['c']] + - [random.choice(label_opt) for _ in filenames[:-3]] - }) - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, shuffle=False) - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, np.ndarray) - self.assertEqual(batch_y.shape, (len(batch_x), 3)) - for labels in batch_y: - self.assertTrue(all(label in {0, 1} for label in labels)) - self.assertTrue((batch_y[0] == np.array([1, 1, 0])).all()) - self.assertTrue((batch_y[1] == np.array([0, 1, 0])).all()) - self.assertTrue((batch_y[2] == np.array([0, 0, 1])).all()) - - def test_dataframe_iterator_class_mode_multi_output(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - filenames = [] - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - # fit both outputs are a single number - df = pd.DataFrame({ - 'filename': filenames - }).assign( - output_0=np.random.uniform(size=len(filenames)), - output_1=np.random.uniform(size=len(filenames))) - df_iterator = image.ImageDataGenerator().flow_from_dataframe( - df, - y_col=['output_0', 'output_1'], - directory=tmpdir.full_path, - batch_size=3, - shuffle=False, - class_mode='multi_output') - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, list) - self.assertLen(batch_y, 2) - self.assertAllEqual(batch_y[0], np.array(df['output_0'].tolist()[:3])) - self.assertAllEqual(batch_y[1], np.array(df['output_1'].tolist()[:3])) - # if one of the outputs is a 1D array - df['output_1'] = [ - np.random.uniform(size=(2, 2, 1)).flatten() for _ in range(len(df)) - ] - df_iterator = image.ImageDataGenerator().flow_from_dataframe( - df, - y_col=['output_0', 'output_1'], - directory=tmpdir.full_path, - batch_size=3, - shuffle=False, - class_mode='multi_output') - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, list) - self.assertLen(batch_y, 2) - self.assertAllEqual(batch_y[0], np.array(df['output_0'].tolist()[:3])) - self.assertAllEqual(batch_y[1], np.array(df['output_1'].tolist()[:3])) - # if one of the outputs is a 2D array - df['output_1'] = [np.random.uniform(size=(2, 2, 1)) for _ in range(len(df))] - df_iterator = image.ImageDataGenerator().flow_from_dataframe( - df, - y_col=['output_0', 'output_1'], - directory=tmpdir.full_path, - batch_size=3, - shuffle=False, - class_mode='multi_output') - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, list) - self.assertLen(batch_y, 2) - self.assertAllEqual(batch_y[0], np.array(df['output_0'].tolist()[:3])) - self.assertAllEqual(batch_y[1], np.array(df['output_1'].tolist()[:3])) - # fail if single column - with self.assertRaises(TypeError): - image.ImageDataGenerator().flow_from_dataframe( - df, - y_col='output_0', - directory=tmpdir.full_path, - class_mode='multi_output') - - def test_dataframe_iterator_class_mode_raw(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - filenames = [] - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - # case for 1D output - df = pd.DataFrame({ - 'filename': filenames - }).assign( - output_0=np.random.uniform(size=len(filenames)), - output_1=np.random.uniform(size=len(filenames))) - df_iterator = image.ImageDataGenerator().flow_from_dataframe( - df, - y_col='output_0', - directory=tmpdir.full_path, - batch_size=3, - shuffle=False, - class_mode='raw') - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, np.ndarray) - self.assertEqual(batch_y.shape, (3,)) - self.assertAllEqual(batch_y, df['output_0'].values[:3]) - # case with a 2D output - df_iterator = image.ImageDataGenerator().flow_from_dataframe( - df, - y_col=['output_0', 'output_1'], - directory=tmpdir.full_path, - batch_size=3, - shuffle=False, - class_mode='raw') - batch_x, batch_y = next(df_iterator) - self.assertIsInstance(batch_x, np.ndarray) - self.assertLen(batch_x.shape, 4) - self.assertIsInstance(batch_y, np.ndarray) - self.assertEqual(batch_y.shape, (3, 2)) - self.assertAllEqual(batch_y, df[['output_0', 'output_1']].values[:3]) - - @parameterized.parameters([ - (0.25, 18), - (0.50, 12), - (0.75, 6), - ]) - def test_dataframe_iterator_with_validation_split(self, validation_split, - num_training): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - num_classes = 2 - - # save the images in the tmpdir - count = 0 - filenames = [] - filenames_without = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - filename_without = 'image-{}'.format(count) - filenames.append(filename) - filenames_without.append(filename_without) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - df = pd.DataFrame({ - 'filename': filenames, - 'class': [str(random.randint(0, 1)) for _ in filenames] - }) - # create iterator - generator = image.ImageDataGenerator(validation_split=validation_split) - df_sparse_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, class_mode='sparse') - if np.isnan(next(df_sparse_iterator)[:][1]).any(): - raise ValueError('Invalid values.') - - with self.assertRaises(ValueError): - generator.flow_from_dataframe(df, tmpdir.full_path, subset='foo') - - train_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, subset='training') - self.assertEqual(train_iterator.samples, num_training) - - valid_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, subset='validation') - self.assertEqual(valid_iterator.samples, count - num_training) - - # check number of classes and images - self.assertLen(train_iterator.class_indices, num_classes) - self.assertLen(train_iterator.classes, num_training) - self.assertLen(set(train_iterator.filenames) & set(filenames), num_training) - - def test_dataframe_iterator_with_custom_indexed_dataframe(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - num_classes = 2 - - # save the images in the tmpdir - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - filenames.append(filename) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - # create dataframes - classes = np.random.randint(num_classes, size=len(filenames)) - classes = [str(c) for c in classes] - df = pd.DataFrame({'filename': filenames, 'class': classes}) - df2 = pd.DataFrame({ - 'filename': filenames, - 'class': classes - }, - index=np.arange(1, - len(filenames) + 1)) - df3 = pd.DataFrame({ - 'filename': filenames, - 'class': classes - }, - index=filenames) - - # create iterators - seed = 1 - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, tmpdir.full_path, seed=seed) - df2_iterator = generator.flow_from_dataframe( - df2, tmpdir.full_path, seed=seed) - df3_iterator = generator.flow_from_dataframe( - df3, tmpdir.full_path, seed=seed) - - # Test all iterators return same pairs of arrays - for _ in range(len(filenames)): - a1, c1 = next(df_iterator) - a2, c2 = next(df2_iterator) - a3, c3 = next(df3_iterator) - self.assertAllEqual(a1, a2) - self.assertAllEqual(a1, a3) - self.assertAllEqual(c1, c2) - self.assertAllEqual(c1, c3) - - def test_dataframe_iterator_n(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - - # save the images in the tmpdir - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - filenames.append(filename) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - # exclude first two items - n_files = len(filenames) - input_filenames = filenames[2:] - - # create dataframes - classes = np.random.randint(2, size=len(input_filenames)) - classes = [str(c) for c in classes] - df = pd.DataFrame({'filename': input_filenames}) - df2 = pd.DataFrame({'filename': input_filenames, 'class': classes}) - - # create iterators - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, class_mode=None) - df2_iterator = generator.flow_from_dataframe( - df2, tmpdir.full_path, class_mode='binary') - - # Test the number of items in iterators - self.assertEqual(df_iterator.n, n_files - 2) - self.assertEqual(df2_iterator.n, n_files - 2) - - def test_dataframe_iterator_absolute_path(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - - # save the images in the tmpdir - count = 0 - file_paths = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{:0>5}.png'.format(count) - file_path = os.path.join(tmpdir.full_path, filename) - file_paths.append(file_path) - im.save(file_path) - count += 1 - - # prepare an image with a forbidden extension. - file_path_fbd = os.path.join(tmpdir.full_path, 'image-forbid.fbd') - shutil.copy(file_path, file_path_fbd) - - # create dataframes - classes = np.random.randint(2, size=len(file_paths)) - classes = [str(c) for c in classes] - df = pd.DataFrame({'filename': file_paths}) - df2 = pd.DataFrame({'filename': file_paths, 'class': classes}) - df3 = pd.DataFrame({'filename': ['image-not-exist.png'] + file_paths}) - df4 = pd.DataFrame({'filename': file_paths + [file_path_fbd]}) - - # create iterators - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, None, class_mode=None, shuffle=False, batch_size=1) - df2_iterator = generator.flow_from_dataframe( - df2, None, class_mode='binary', shuffle=False, batch_size=1) - df3_iterator = generator.flow_from_dataframe( - df3, None, class_mode=None, shuffle=False, batch_size=1) - df4_iterator = generator.flow_from_dataframe( - df4, None, class_mode=None, shuffle=False, batch_size=1) - - validation_split = 0.2 - generator_split = image.ImageDataGenerator( - validation_split=validation_split) - df_train_iterator = generator_split.flow_from_dataframe( - df, - None, - class_mode=None, - shuffle=False, - subset='training', - batch_size=1) - df_val_iterator = generator_split.flow_from_dataframe( - df, - None, - class_mode=None, - shuffle=False, - subset='validation', - batch_size=1) - - # Test the number of items in iterators - self.assertLen(file_paths, df_iterator.n) - self.assertLen(file_paths, df2_iterator.n) - self.assertLen(file_paths, df3_iterator.n) - self.assertLen(file_paths, df4_iterator.n) - self.assertEqual(df_val_iterator.n, int(validation_split * len(file_paths))) - self.assertLen(file_paths, df_train_iterator.n + df_val_iterator.n) - - # Test flow_from_dataframe - for i in range(len(file_paths)): - a1 = next(df_iterator) - a2, _ = next(df2_iterator) - a3 = next(df3_iterator) - a4 = next(df4_iterator) - - if i < df_val_iterator.n: - a5 = next(df_val_iterator) - else: - a5 = next(df_train_iterator) - - self.assertAllEqual(a1, a2) - self.assertAllEqual(a1, a3) - self.assertAllEqual(a1, a4) - self.assertAllEqual(a1, a5) - - def test_dataframe_iterator_with_subdirs(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - num_classes = 2 - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(tmpdir, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.png'.format(count)) - filenames.append(filename) - im.save(os.path.join(tmpdir.full_path, filename)) - count += 1 - - # create dataframe - classes = np.random.randint(num_classes, size=len(filenames)) - classes = [str(c) for c in classes] - df = pd.DataFrame({'filename': filenames, 'class': classes}) - - # create iterator - generator = image.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, tmpdir.full_path, class_mode='binary') - - # Test the number of items in iterator - self.assertLen(filenames, df_iterator.n) - self.assertEqual(set(df_iterator.filenames), set(filenames)) - - def test_dataframe_iterator_classes_indices_order(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(os.path.join(tmpdir.full_path, filename)) - filenames.append(filename) - count += 1 - - # Test the class_indices without classes input - generator = image.ImageDataGenerator() - label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']] - df_f = pd.DataFrame({ - 'filename': filenames, - 'class': ['a', 'b'] + - [random.choice(label_opt) for _ in filenames[:-2]] - }) - flow_forward_iter = generator.flow_from_dataframe(df_f, tmpdir.full_path) - label_rev = ['b', 'a', ['b'], ['a'], ['b', 'a'], ['a', 'b']] - df_r = pd.DataFrame({ - 'filename': filenames, - 'class': ['b', 'a'] + - [random.choice(label_rev) for _ in filenames[:-2]] - }) - flow_backward_iter = generator.flow_from_dataframe(df_r, tmpdir.full_path) - - # check class_indices - self.assertEqual(flow_forward_iter.class_indices, - flow_backward_iter.class_indices) - - # Test the class_indices with classes input - generator_2 = image.ImageDataGenerator() - df_f2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']], - columns=['filename', 'class']) - flow_forward = generator_2.flow_from_dataframe(df_f2, classes=['A', 'B']) - df_b2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']], - columns=['filename', 'class']) - flow_backward = generator_2.flow_from_dataframe(df_b2, classes=['B', 'A']) - - # check class_indices - self.assertNotEqual(flow_forward.class_indices, flow_backward.class_indices) + def test_dataframe_iterator(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + num_classes = 2 + + # save the images in the tmpdir + count = 0 + filenames = [] + filepaths = [] + filenames_without = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + filename_without = f"image-{count}" + filenames.append(filename) + filepaths.append(os.path.join(tmpdir.full_path, filename)) + filenames_without.append(filename_without) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + df = pd.DataFrame( + { + "filename": filenames, + "class": [str(random.randint(0, 1)) for _ in filenames], + "filepaths": filepaths, + } + ) + + # create iterator + iterator = image.DataFrameIterator(df, tmpdir.full_path) + batch = next(iterator) + self.assertLen(batch, 2) + self.assertIsInstance(batch[0], np.ndarray) + self.assertIsInstance(batch[1], np.ndarray) + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe(df, x_col="filepaths") + df_iterator_dir = generator.flow_from_dataframe(df, tmpdir.full_path) + df_sparse_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode="sparse" + ) + self.assertFalse(np.isnan(df_sparse_iterator.classes).any()) + # check number of classes and images + self.assertLen(df_iterator.class_indices, num_classes) + self.assertLen(df_iterator.classes, count) + self.assertEqual(set(df_iterator.filenames), set(filepaths)) + self.assertLen(df_iterator_dir.class_indices, num_classes) + self.assertLen(df_iterator_dir.classes, count) + self.assertEqual(set(df_iterator_dir.filenames), set(filenames)) + # test without shuffle + _, batch_y = next( + generator.flow_from_dataframe( + df, tmpdir.full_path, shuffle=False, class_mode="sparse" + ) + ) + self.assertTrue( + (batch_y == df["class"].astype("float")[: len(batch_y)]).all() + ) + # Test invalid use cases + with self.assertRaises(ValueError): + generator.flow_from_dataframe( + df, tmpdir.full_path, color_mode="cmyk" + ) + with self.assertRaises(ValueError): + generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode="output" + ) + with self.assertWarns(DeprecationWarning): + generator.flow_from_dataframe(df, tmpdir.full_path, has_ext=True) + with self.assertWarns(DeprecationWarning): + generator.flow_from_dataframe(df, tmpdir.full_path, has_ext=False) + + def preprocessing_function(x): + # This will fail if not provided by a Numpy array. + # Note: This is made to enforce backward compatibility. + + self.assertEqual(x.shape, (26, 26, 3)) + self.assertIsInstance(x, np.ndarray) + + return np.zeros_like(x) + + # Test usage as Sequence + generator = image.ImageDataGenerator( + preprocessing_function=preprocessing_function + ) + dir_seq = generator.flow_from_dataframe( + df, + tmpdir.full_path, + target_size=(26, 26), + color_mode="rgb", + batch_size=3, + class_mode="categorical", + ) + self.assertLen(dir_seq, np.ceil(count / 3)) + x1, y1 = dir_seq[1] + self.assertEqual(x1.shape, (3, 26, 26, 3)) + self.assertEqual(y1.shape, (3, num_classes)) + x1, y1 = dir_seq[5] + self.assertTrue((x1 == 0).all()) + + with self.assertRaises(ValueError): + x1, y1 = dir_seq[9] + + def test_dataframe_iterator_validate_filenames(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + df = pd.DataFrame({"filename": filenames + ["test.jpp", "test.jpg"]}) + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode="input" + ) + self.assertLen(df_iterator.filenames, len(df["filename"]) - 2) + df_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode="input", validate_filenames=False + ) + self.assertLen(df_iterator.filenames, len(df["filename"])) + + def test_dataframe_iterator_sample_weights(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + df = pd.DataFrame({"filename": filenames}) + df["weight"] = ([2, 5] * len(df))[: len(df)] + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, + tmpdir.full_path, + x_col="filename", + y_col=None, + shuffle=False, + batch_size=5, + weight_col="weight", + class_mode="input", + ) + + batch = next(df_iterator) + self.assertLen(batch, 3) # (x, y, weights) + # check if input and output have the same shape and they're the same + self.assertEqual(batch[0].all(), batch[1].all()) + # check if the input and output images are not the same numpy array + input_img = batch[0][0] + output_img = batch[1][0] + output_img[0][0][0] += 1 + self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) + self.assertAllEqual(np.array([2, 5, 2, 5, 2]), batch[2]) + + # fail + df["weight"] = (["2", "5"] * len(df))[: len(df)] + with self.assertRaises(TypeError): + image.ImageDataGenerator().flow_from_dataframe( + df, weight_col="weight", class_mode="input" + ) + + def test_dataframe_iterator_class_mode_input(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + df = pd.DataFrame({"filename": filenames}) + generator = image.ImageDataGenerator() + df_autoencoder_iterator = generator.flow_from_dataframe( + df, + tmpdir.full_path, + x_col="filename", + y_col=None, + class_mode="input", + ) + + batch = next(df_autoencoder_iterator) + + # check if input and output have the same shape and they're the same + self.assertAllClose(batch[0], batch[1]) + # check if the input and output images are not the same numpy array + input_img = batch[0][0] + output_img = batch[1][0] + output_img[0][0][0] += 1 + self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) + + df_autoencoder_iterator = generator.flow_from_dataframe( + df, + tmpdir.full_path, + x_col="filename", + y_col="class", + class_mode="input", + ) + + batch = next(df_autoencoder_iterator) + + # check if input and output have the same shape and they're the same + self.assertEqual(batch[0].all(), batch[1].all()) + # check if the input and output images are not the same numpy array + input_img = batch[0][0] + output_img = batch[1][0] + output_img[0][0][0] += 1 + self.assertNotEqual(input_img[0][0][0], output_img[0][0][0]) + + def test_dataframe_iterator_class_mode_categorical_multi_label(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + filenames = [] + count = 0 + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + label_opt = ["a", "b", ["a"], ["b"], ["a", "b"], ["b", "a"]] + df = pd.DataFrame( + { + "filename": filenames, + "class": [random.choice(label_opt) for _ in filenames[:-2]] + + ["b", "a"], + } + ) + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe(df, tmpdir.full_path) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, np.ndarray) + self.assertEqual(batch_y.shape, (len(batch_x), 2)) + for labels in batch_y: + self.assertTrue(all(label in {0, 1} for label in labels)) + + # on first 3 batches + df = pd.DataFrame( + { + "filename": filenames, + "class": [["b", "a"]] + + ["b"] + + [["c"]] + + [random.choice(label_opt) for _ in filenames[:-3]], + } + ) + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, shuffle=False + ) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, np.ndarray) + self.assertEqual(batch_y.shape, (len(batch_x), 3)) + for labels in batch_y: + self.assertTrue(all(label in {0, 1} for label in labels)) + self.assertTrue((batch_y[0] == np.array([1, 1, 0])).all()) + self.assertTrue((batch_y[1] == np.array([0, 1, 0])).all()) + self.assertTrue((batch_y[2] == np.array([0, 0, 1])).all()) + + def test_dataframe_iterator_class_mode_multi_output(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + filenames = [] + count = 0 + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + # fit both outputs are a single number + df = pd.DataFrame({"filename": filenames}).assign( + output_0=np.random.uniform(size=len(filenames)), + output_1=np.random.uniform(size=len(filenames)), + ) + df_iterator = image.ImageDataGenerator().flow_from_dataframe( + df, + y_col=["output_0", "output_1"], + directory=tmpdir.full_path, + batch_size=3, + shuffle=False, + class_mode="multi_output", + ) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, list) + self.assertLen(batch_y, 2) + self.assertAllEqual(batch_y[0], np.array(df["output_0"].tolist()[:3])) + self.assertAllEqual(batch_y[1], np.array(df["output_1"].tolist()[:3])) + # if one of the outputs is a 1D array + df["output_1"] = [ + np.random.uniform(size=(2, 2, 1)).flatten() for _ in range(len(df)) + ] + df_iterator = image.ImageDataGenerator().flow_from_dataframe( + df, + y_col=["output_0", "output_1"], + directory=tmpdir.full_path, + batch_size=3, + shuffle=False, + class_mode="multi_output", + ) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, list) + self.assertLen(batch_y, 2) + self.assertAllEqual(batch_y[0], np.array(df["output_0"].tolist()[:3])) + self.assertAllEqual(batch_y[1], np.array(df["output_1"].tolist()[:3])) + # if one of the outputs is a 2D array + df["output_1"] = [ + np.random.uniform(size=(2, 2, 1)) for _ in range(len(df)) + ] + df_iterator = image.ImageDataGenerator().flow_from_dataframe( + df, + y_col=["output_0", "output_1"], + directory=tmpdir.full_path, + batch_size=3, + shuffle=False, + class_mode="multi_output", + ) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, list) + self.assertLen(batch_y, 2) + self.assertAllEqual(batch_y[0], np.array(df["output_0"].tolist()[:3])) + self.assertAllEqual(batch_y[1], np.array(df["output_1"].tolist()[:3])) + # fail if single column + with self.assertRaises(TypeError): + image.ImageDataGenerator().flow_from_dataframe( + df, + y_col="output_0", + directory=tmpdir.full_path, + class_mode="multi_output", + ) + + def test_dataframe_iterator_class_mode_raw(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + filenames = [] + count = 0 + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + # case for 1D output + df = pd.DataFrame({"filename": filenames}).assign( + output_0=np.random.uniform(size=len(filenames)), + output_1=np.random.uniform(size=len(filenames)), + ) + df_iterator = image.ImageDataGenerator().flow_from_dataframe( + df, + y_col="output_0", + directory=tmpdir.full_path, + batch_size=3, + shuffle=False, + class_mode="raw", + ) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, np.ndarray) + self.assertEqual(batch_y.shape, (3,)) + self.assertAllEqual(batch_y, df["output_0"].values[:3]) + # case with a 2D output + df_iterator = image.ImageDataGenerator().flow_from_dataframe( + df, + y_col=["output_0", "output_1"], + directory=tmpdir.full_path, + batch_size=3, + shuffle=False, + class_mode="raw", + ) + batch_x, batch_y = next(df_iterator) + self.assertIsInstance(batch_x, np.ndarray) + self.assertLen(batch_x.shape, 4) + self.assertIsInstance(batch_y, np.ndarray) + self.assertEqual(batch_y.shape, (3, 2)) + self.assertAllEqual(batch_y, df[["output_0", "output_1"]].values[:3]) + + @parameterized.parameters( + [ + (0.25, 18), + (0.50, 12), + (0.75, 6), + ] + ) + def test_dataframe_iterator_with_validation_split( + self, validation_split, num_training + ): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + num_classes = 2 + + # save the images in the tmpdir + count = 0 + filenames = [] + filenames_without = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + filename_without = f"image-{count}" + filenames.append(filename) + filenames_without.append(filename_without) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + df = pd.DataFrame( + { + "filename": filenames, + "class": [str(random.randint(0, 1)) for _ in filenames], + } + ) + # create iterator + generator = image.ImageDataGenerator(validation_split=validation_split) + df_sparse_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode="sparse" + ) + if np.isnan(next(df_sparse_iterator)[:][1]).any(): + raise ValueError("Invalid values.") + + with self.assertRaises(ValueError): + generator.flow_from_dataframe(df, tmpdir.full_path, subset="foo") + + train_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, subset="training" + ) + self.assertEqual(train_iterator.samples, num_training) + + valid_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, subset="validation" + ) + self.assertEqual(valid_iterator.samples, count - num_training) + + # check number of classes and images + self.assertLen(train_iterator.class_indices, num_classes) + self.assertLen(train_iterator.classes, num_training) + self.assertLen( + set(train_iterator.filenames) & set(filenames), num_training + ) + + def test_dataframe_iterator_with_custom_indexed_dataframe(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + num_classes = 2 + + # save the images in the tmpdir + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + filenames.append(filename) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + # create dataframes + classes = np.random.randint(num_classes, size=len(filenames)) + classes = [str(c) for c in classes] + df = pd.DataFrame({"filename": filenames, "class": classes}) + df2 = pd.DataFrame( + {"filename": filenames, "class": classes}, + index=np.arange(1, len(filenames) + 1), + ) + df3 = pd.DataFrame( + {"filename": filenames, "class": classes}, index=filenames + ) + + # create iterators + seed = 1 + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, seed=seed + ) + df2_iterator = generator.flow_from_dataframe( + df2, tmpdir.full_path, seed=seed + ) + df3_iterator = generator.flow_from_dataframe( + df3, tmpdir.full_path, seed=seed + ) + + # Test all iterators return same pairs of arrays + for _ in range(len(filenames)): + a1, c1 = next(df_iterator) + a2, c2 = next(df2_iterator) + a3, c3 = next(df3_iterator) + self.assertAllEqual(a1, a2) + self.assertAllEqual(a1, a3) + self.assertAllEqual(c1, c2) + self.assertAllEqual(c1, c3) + + def test_dataframe_iterator_n(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + + # save the images in the tmpdir + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + filenames.append(filename) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + # exclude first two items + n_files = len(filenames) + input_filenames = filenames[2:] + + # create dataframes + classes = np.random.randint(2, size=len(input_filenames)) + classes = [str(c) for c in classes] + df = pd.DataFrame({"filename": input_filenames}) + df2 = pd.DataFrame({"filename": input_filenames, "class": classes}) + + # create iterators + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode=None + ) + df2_iterator = generator.flow_from_dataframe( + df2, tmpdir.full_path, class_mode="binary" + ) + + # Test the number of items in iterators + self.assertEqual(df_iterator.n, n_files - 2) + self.assertEqual(df2_iterator.n, n_files - 2) + + def test_dataframe_iterator_absolute_path(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + + # save the images in the tmpdir + count = 0 + file_paths = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count:0>5}.png" + file_path = os.path.join(tmpdir.full_path, filename) + file_paths.append(file_path) + im.save(file_path) + count += 1 + + # prepare an image with a forbidden extension. + file_path_fbd = os.path.join(tmpdir.full_path, "image-forbid.fbd") + shutil.copy(file_path, file_path_fbd) + + # create dataframes + classes = np.random.randint(2, size=len(file_paths)) + classes = [str(c) for c in classes] + df = pd.DataFrame({"filename": file_paths}) + df2 = pd.DataFrame({"filename": file_paths, "class": classes}) + df3 = pd.DataFrame({"filename": ["image-not-exist.png"] + file_paths}) + df4 = pd.DataFrame({"filename": file_paths + [file_path_fbd]}) + + # create iterators + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, None, class_mode=None, shuffle=False, batch_size=1 + ) + df2_iterator = generator.flow_from_dataframe( + df2, None, class_mode="binary", shuffle=False, batch_size=1 + ) + df3_iterator = generator.flow_from_dataframe( + df3, None, class_mode=None, shuffle=False, batch_size=1 + ) + df4_iterator = generator.flow_from_dataframe( + df4, None, class_mode=None, shuffle=False, batch_size=1 + ) + + validation_split = 0.2 + generator_split = image.ImageDataGenerator( + validation_split=validation_split + ) + df_train_iterator = generator_split.flow_from_dataframe( + df, + None, + class_mode=None, + shuffle=False, + subset="training", + batch_size=1, + ) + df_val_iterator = generator_split.flow_from_dataframe( + df, + None, + class_mode=None, + shuffle=False, + subset="validation", + batch_size=1, + ) + + # Test the number of items in iterators + self.assertLen(file_paths, df_iterator.n) + self.assertLen(file_paths, df2_iterator.n) + self.assertLen(file_paths, df3_iterator.n) + self.assertLen(file_paths, df4_iterator.n) + self.assertEqual( + df_val_iterator.n, int(validation_split * len(file_paths)) + ) + self.assertLen(file_paths, df_train_iterator.n + df_val_iterator.n) + + # Test flow_from_dataframe + for i in range(len(file_paths)): + a1 = next(df_iterator) + a2, _ = next(df2_iterator) + a3 = next(df3_iterator) + a4 = next(df4_iterator) + + if i < df_val_iterator.n: + a5 = next(df_val_iterator) + else: + a5 = next(df_train_iterator) + + self.assertAllEqual(a1, a2) + self.assertAllEqual(a1, a3) + self.assertAllEqual(a1, a4) + self.assertAllEqual(a1, a5) + + def test_dataframe_iterator_with_subdirs(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + num_classes = 2 + + # create folders and subfolders + paths = [] + for cl in range(num_classes): + class_directory = f"class-{cl}" + classpaths = [ + class_directory, + os.path.join(class_directory, "subfolder-1"), + os.path.join(class_directory, "subfolder-2"), + os.path.join(class_directory, "subfolder-1", "sub-subfolder"), + ] + for path in classpaths: + os.mkdir(os.path.join(tmpdir, path)) + paths.append(classpaths) + + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + # rotate image class + im_class = count % num_classes + # rotate subfolders + classpaths = paths[im_class] + filename = os.path.join( + classpaths[count % len(classpaths)], + f"image-{count}.png", + ) + filenames.append(filename) + im.save(os.path.join(tmpdir.full_path, filename)) + count += 1 + + # create dataframe + classes = np.random.randint(num_classes, size=len(filenames)) + classes = [str(c) for c in classes] + df = pd.DataFrame({"filename": filenames, "class": classes}) + + # create iterator + generator = image.ImageDataGenerator() + df_iterator = generator.flow_from_dataframe( + df, tmpdir.full_path, class_mode="binary" + ) + + # Test the number of items in iterator + self.assertLen(filenames, df_iterator.n) + self.assertEqual(set(df_iterator.filenames), set(filenames)) + + def test_dataframe_iterator_classes_indices_order(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + # save the images in the paths + count = 0 + filenames = [] + for test_images in all_test_images: + for im in test_images: + filename = f"image-{count}.png" + im.save(os.path.join(tmpdir.full_path, filename)) + filenames.append(filename) + count += 1 + + # Test the class_indices without classes input + generator = image.ImageDataGenerator() + label_opt = ["a", "b", ["a"], ["b"], ["a", "b"], ["b", "a"]] + df_f = pd.DataFrame( + { + "filename": filenames, + "class": ["a", "b"] + + [random.choice(label_opt) for _ in filenames[:-2]], + } + ) + flow_forward_iter = generator.flow_from_dataframe( + df_f, tmpdir.full_path + ) + label_rev = ["b", "a", ["b"], ["a"], ["b", "a"], ["a", "b"]] + df_r = pd.DataFrame( + { + "filename": filenames, + "class": ["b", "a"] + + [random.choice(label_rev) for _ in filenames[:-2]], + } + ) + flow_backward_iter = generator.flow_from_dataframe( + df_r, tmpdir.full_path + ) + + # check class_indices + self.assertEqual( + flow_forward_iter.class_indices, flow_backward_iter.class_indices + ) + + # Test the class_indices with classes input + generator_2 = image.ImageDataGenerator() + df_f2 = pd.DataFrame( + [["data/A.jpg", "A"], ["data/B.jpg", "B"]], + columns=["filename", "class"], + ) + flow_forward = generator_2.flow_from_dataframe( + df_f2, classes=["A", "B"] + ) + df_b2 = pd.DataFrame( + [["data/A.jpg", "A"], ["data/B.jpg", "B"]], + columns=["filename", "class"], + ) + flow_backward = generator_2.flow_from_dataframe( + df_b2, classes=["B", "A"] + ) + + # check class_indices + self.assertNotEqual( + flow_forward.class_indices, flow_backward.class_indices + ) @test_utils.run_v2_only class TestImageDataGenerator(test_combinations.TestCase): - - def test_image_data_generator(self): - all_test_images = _generate_test_images(include_rgba=True) - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - - image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1) - - def test_image_data_generator_with_validation_split(self): - all_test_images = _generate_test_images(include_rgba=True) - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - labels = np.concatenate( - [np.zeros((int(len(images) / 2),)), - np.ones((int(len(images) / 2),))]) - generator = image.ImageDataGenerator(validation_split=0.5) - - # training and validation sets would have different - # number of classes, because labels are sorted - with self.assertRaisesRegex( - ValueError, - 'Training and validation subsets have different number of classes'): - generator.flow( - images, labels, shuffle=False, batch_size=10, subset='validation') - - # test non categorical labels with validation split - generator.flow( - images, - labels, - shuffle=False, - batch_size=10, - ignore_class_split=True, - subset='validation') - - labels = np.concatenate([ - np.zeros((int(len(images) / 4),)), - np.ones((int(len(images) / 4),)), - np.zeros((int(len(images) / 4),)), - np.ones((int(len(images) / 4),)) - ]) - - seq = generator.flow( - images, labels, shuffle=False, batch_size=10, subset='validation') - - _, y = seq[0] - self.assertLen(np.unique(y), 2) - - seq = generator.flow( - images, labels, shuffle=False, batch_size=10, subset='training') - _, y2 = seq[0] - self.assertLen(np.unique(y2), 2) - - with self.assertRaises(ValueError): - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - batch_size=3, - subset='foo') - - def test_image_data_generator_with_split_value_error(self): - with self.assertRaises(ValueError): - image.ImageDataGenerator(validation_split=5) - - def test_image_data_generator_invalid_data(self): - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_last') - # Test fit with invalid data - with self.assertRaises(ValueError): - x = np.random.random((3, 10, 10)) - generator.fit(x) - - # Test flow with invalid data - with self.assertRaises(ValueError): - x = np.random.random((32, 10, 10)) - generator.flow(np.arange(x.shape[0])) - - def test_image_data_generator_fit(self): - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=(0.2, 0.2), - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1, - data_format='channels_last') - x = np.random.random((32, 10, 10, 3)) - generator.fit(x, augment=True) - # Test grayscale - x = np.random.random((32, 10, 10, 1)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 10, 10, 3)) - generator.fit(x) - # Test more samples than dims - x = np.random.random((32, 4, 4, 1)) - generator.fit(x) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=(0.2, 0.2), - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1, - data_format='channels_first') - x = np.random.random((32, 10, 10, 3)) - generator.fit(x, augment=True) - # Test grayscale - x = np.random.random((32, 1, 10, 10)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 3, 10, 10)) - generator.fit(x) - # Test more samples than dims - x = np.random.random((32, 1, 4, 4)) - generator.fit(x) - - def test_image_data_generator_flow(self): - tmpdir = self.create_tempdir() - all_test_images = _generate_test_images(include_rgba=True) - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - dsize = images.shape[0] - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1) - - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - save_to_dir=tmpdir.full_path, - batch_size=3) - - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - sample_weight=np.arange(images.shape[0]) + 1, - save_to_dir=tmpdir.full_path, - batch_size=3) - - # Test with `shuffle=True` - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=True, - save_to_dir=tmpdir.full_path, - batch_size=3, - seed=42) - - # Test without y - generator.flow( - images, - None, - shuffle=True, - save_to_dir=tmpdir.full_path, - batch_size=3) - - # Test with a single miscellaneous input data array - x_misc1 = np.random.random(dsize) - generator.flow((images, x_misc1), - np.arange(dsize), - shuffle=False, - batch_size=2) - - # Test with two miscellaneous inputs - x_misc2 = np.random.random((dsize, 3, 3)) - generator.flow((images, [x_misc1, x_misc2]), - np.arange(dsize), - shuffle=False, - batch_size=2) - - # Test cases with `y = None` - generator.flow(images, None, batch_size=3) - generator.flow((images, x_misc1), None, batch_size=3, shuffle=False) - generator.flow((images, [x_misc1, x_misc2]), - None, - batch_size=3, - shuffle=False) - generator = image.ImageDataGenerator(validation_split=0.2) - generator.flow(images, batch_size=3) - - # Test some failure cases: - x_misc_err = np.random.random((dsize + 1, 3, 3)) - with self.assertRaisesRegex(ValueError, 'All of the arrays in'): - generator.flow((images, x_misc_err), np.arange(dsize), batch_size=3) - - with self.assertRaisesRegex(ValueError, - r'`x` \(images tensor\) and `y` \(labels\)'): - generator.flow((images, x_misc1), np.arange(dsize + 1), batch_size=3) - - # Test `flow` behavior as Sequence - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - save_to_dir=tmpdir.full_path, - batch_size=3) - - # Test with `shuffle=True` - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=True, - save_to_dir=tmpdir.full_path, - batch_size=3, - seed=123) - - # test order_interpolation - labels = np.array([[2, 2, 0, 2, 2], [1, 3, 2, 3, 1], [2, 1, 0, 1, 2], - [3, 1, 0, 2, 0], [3, 1, 3, 2, 1]]) - - label_generator = image.ImageDataGenerator( - rotation_range=90., interpolation_order=0) - label_generator.flow(x=labels[np.newaxis, ..., np.newaxis], seed=123) - - def test_valid_args(self): - with self.assertRaises(ValueError): - image.ImageDataGenerator(brightness_range=0.1) - - def test_batch_standardize(self): - all_test_images = _generate_test_images(include_rgba=True) - # ImageDataGenerator.standardize should work on batches - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True) - generator.fit(images, augment=True) - - transformed = np.copy(images) - for i, im in enumerate(transformed): - transformed[i] = generator.random_transform(im) - transformed = generator.standardize(transformed) - - def test_deterministic_transform(self): - x = np.ones((32, 32, 3)) - generator = image.ImageDataGenerator( - rotation_range=90, fill_mode='constant') - x = np.random.random((32, 32, 3)) - self.assertAllClose( - generator.apply_transform(x, {'flip_vertical': True}), x[::-1, :, :]) - self.assertAllClose( - generator.apply_transform(x, {'flip_horizontal': True}), x[:, ::-1, :]) - x = np.ones((3, 3, 3)) - x_rotated = np.array([[[0., 0., 0.], [1., 1., 1.], [0., 0., 0.]], - [[1., 1., 1.], [1., 1., 1.], [1., 1., 1.]], - [[0., 0., 0.], [1., 1., 1.], [0., 0., 0.]]]) - self.assertAllClose(generator.apply_transform(x, {'theta': 45}), x_rotated) - - def test_random_transforms(self): - x = np.random.random((2, 28, 28)) - # Test get_random_transform with predefined seed - seed = 1 - generator = image.ImageDataGenerator( - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0.1, - brightness_range=(1, 5), - horizontal_flip=True, - vertical_flip=True) - transform_dict = generator.get_random_transform(x.shape, seed) - transform_dict2 = generator.get_random_transform(x.shape, seed * 2) - self.assertNotEqual(transform_dict['theta'], 0) - self.assertNotEqual(transform_dict['theta'], transform_dict2['theta']) - self.assertNotEqual(transform_dict['tx'], 0) - self.assertNotEqual(transform_dict['tx'], transform_dict2['tx']) - self.assertNotEqual(transform_dict['ty'], 0) - self.assertNotEqual(transform_dict['ty'], transform_dict2['ty']) - self.assertNotEqual(transform_dict['shear'], 0) - self.assertNotEqual(transform_dict['shear'], transform_dict2['shear']) - self.assertNotEqual(transform_dict['zx'], 0) - self.assertNotEqual(transform_dict['zx'], transform_dict2['zx']) - self.assertNotEqual(transform_dict['zy'], 0) - self.assertNotEqual(transform_dict['zy'], transform_dict2['zy']) - self.assertNotEqual(transform_dict['channel_shift_intensity'], 0) - self.assertNotEqual(transform_dict['channel_shift_intensity'], - transform_dict2['channel_shift_intensity']) - self.assertNotEqual(transform_dict['brightness'], 0) - self.assertNotEqual(transform_dict['brightness'], - transform_dict2['brightness']) - - # Test get_random_transform without any randomness - generator = image.ImageDataGenerator() - transform_dict = generator.get_random_transform(x.shape, seed) - self.assertEqual(transform_dict['theta'], 0) - self.assertEqual(transform_dict['tx'], 0) - self.assertEqual(transform_dict['ty'], 0) - self.assertEqual(transform_dict['shear'], 0) - self.assertEqual(transform_dict['zx'], 1) - self.assertEqual(transform_dict['zy'], 1) - self.assertIsNone(transform_dict['channel_shift_intensity'], None) - self.assertIsNone(transform_dict['brightness'], None) - - def test_fit_rescale(self): - all_test_images = _generate_test_images(include_rgba=True) - rescale = 1. / 255 - - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(image_utils.img_to_array(im)[None, ...]) - images = np.vstack(img_list) - - # featurewise_center test - generator = image.ImageDataGenerator( - rescale=rescale, featurewise_center=True, dtype='float64') - generator.fit(images) - batch = generator.flow(images, batch_size=8).next() - self.assertLess(abs(np.mean(batch)), 1e-6) - - # featurewise_std_normalization test - generator = image.ImageDataGenerator( - rescale=rescale, - featurewise_center=True, - featurewise_std_normalization=True, - dtype='float64') - generator.fit(images) - batch = generator.flow(images, batch_size=8).next() - self.assertLess(abs(np.mean(batch)), 1e-6) - self.assertLess(abs(1 - np.std(batch)), 1e-5) - - # zca_whitening test - generator = image.ImageDataGenerator( - rescale=rescale, - featurewise_center=True, - zca_whitening=True, - dtype='float64') - generator.fit(images) - batch = generator.flow(images, batch_size=8).next() - batch = np.reshape( - batch, - (batch.shape[0], batch.shape[1] * batch.shape[2] * batch.shape[3])) - # Y * Y_T = n * I, where Y = W * X - identity = np.dot(batch, batch.T) / batch.shape[0] - self.assertTrue( - ((np.abs(identity) - np.identity(identity.shape[0])) < 1e-6).all()) + def test_image_data_generator(self): + all_test_images = _generate_test_images(include_rgba=True) + for test_images in all_test_images: + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + + image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + interpolation_order=1, + ) + + def test_image_data_generator_with_validation_split(self): + all_test_images = _generate_test_images(include_rgba=True) + for test_images in all_test_images: + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + labels = np.concatenate( + [ + np.zeros((int(len(images) / 2),)), + np.ones((int(len(images) / 2),)), + ] + ) + generator = image.ImageDataGenerator(validation_split=0.5) + + # training and validation sets would have different + # number of classes, because labels are sorted + with self.assertRaisesRegex( + ValueError, + "Training and validation subsets have " + "different number of classes", + ): + generator.flow( + images, + labels, + shuffle=False, + batch_size=10, + subset="validation", + ) + + # test non categorical labels with validation split + generator.flow( + images, + labels, + shuffle=False, + batch_size=10, + ignore_class_split=True, + subset="validation", + ) + + labels = np.concatenate( + [ + np.zeros((int(len(images) / 4),)), + np.ones((int(len(images) / 4),)), + np.zeros((int(len(images) / 4),)), + np.ones((int(len(images) / 4),)), + ] + ) + + seq = generator.flow( + images, + labels, + shuffle=False, + batch_size=10, + subset="validation", + ) + + _, y = seq[0] + self.assertLen(np.unique(y), 2) + + seq = generator.flow( + images, labels, shuffle=False, batch_size=10, subset="training" + ) + _, y2 = seq[0] + self.assertLen(np.unique(y2), 2) + + with self.assertRaises(ValueError): + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + batch_size=3, + subset="foo", + ) + + def test_image_data_generator_with_split_value_error(self): + with self.assertRaises(ValueError): + image.ImageDataGenerator(validation_split=5) + + def test_image_data_generator_invalid_data(self): + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + data_format="channels_last", + ) + # Test fit with invalid data + with self.assertRaises(ValueError): + x = np.random.random((3, 10, 10)) + generator.fit(x) + + # Test flow with invalid data + with self.assertRaises(ValueError): + x = np.random.random((32, 10, 10)) + generator.flow(np.arange(x.shape[0])) + + def test_image_data_generator_fit(self): + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=(0.2, 0.2), + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + interpolation_order=1, + data_format="channels_last", + ) + x = np.random.random((32, 10, 10, 3)) + generator.fit(x, augment=True) + # Test grayscale + x = np.random.random((32, 10, 10, 1)) + generator.fit(x) + # Test RBG + x = np.random.random((32, 10, 10, 3)) + generator.fit(x) + # Test more samples than dims + x = np.random.random((32, 4, 4, 1)) + generator.fit(x) + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=(0.2, 0.2), + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + interpolation_order=1, + data_format="channels_first", + ) + x = np.random.random((32, 10, 10, 3)) + generator.fit(x, augment=True) + # Test grayscale + x = np.random.random((32, 1, 10, 10)) + generator.fit(x) + # Test RBG + x = np.random.random((32, 3, 10, 10)) + generator.fit(x) + # Test more samples than dims + x = np.random.random((32, 1, 4, 4)) + generator.fit(x) + + def test_image_data_generator_flow(self): + tmpdir = self.create_tempdir() + all_test_images = _generate_test_images(include_rgba=True) + for test_images in all_test_images: + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + dsize = images.shape[0] + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + interpolation_order=1, + ) + + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + sample_weight=np.arange(images.shape[0]) + 1, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + + # Test with `shuffle=True` + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=True, + save_to_dir=tmpdir.full_path, + batch_size=3, + seed=42, + ) + + # Test without y + generator.flow( + images, + None, + shuffle=True, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + + # Test with a single miscellaneous input data array + x_misc1 = np.random.random(dsize) + generator.flow( + (images, x_misc1), np.arange(dsize), shuffle=False, batch_size=2 + ) + + # Test with two miscellaneous inputs + x_misc2 = np.random.random((dsize, 3, 3)) + generator.flow( + (images, [x_misc1, x_misc2]), + np.arange(dsize), + shuffle=False, + batch_size=2, + ) + + # Test cases with `y = None` + generator.flow(images, None, batch_size=3) + generator.flow((images, x_misc1), None, batch_size=3, shuffle=False) + generator.flow( + (images, [x_misc1, x_misc2]), None, batch_size=3, shuffle=False + ) + generator = image.ImageDataGenerator(validation_split=0.2) + generator.flow(images, batch_size=3) + + # Test some failure cases: + x_misc_err = np.random.random((dsize + 1, 3, 3)) + with self.assertRaisesRegex(ValueError, "All of the arrays in"): + generator.flow( + (images, x_misc_err), np.arange(dsize), batch_size=3 + ) + + with self.assertRaisesRegex( + ValueError, r"`x` \(images tensor\) and `y` \(labels\)" + ): + generator.flow( + (images, x_misc1), np.arange(dsize + 1), batch_size=3 + ) + + # Test `flow` behavior as Sequence + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + save_to_dir=tmpdir.full_path, + batch_size=3, + ) + + # Test with `shuffle=True` + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=True, + save_to_dir=tmpdir.full_path, + batch_size=3, + seed=123, + ) + + # test order_interpolation + labels = np.array( + [ + [2, 2, 0, 2, 2], + [1, 3, 2, 3, 1], + [2, 1, 0, 1, 2], + [3, 1, 0, 2, 0], + [3, 1, 3, 2, 1], + ] + ) + + label_generator = image.ImageDataGenerator( + rotation_range=90.0, interpolation_order=0 + ) + label_generator.flow(x=labels[np.newaxis, ..., np.newaxis], seed=123) + + def test_valid_args(self): + with self.assertRaises(ValueError): + image.ImageDataGenerator(brightness_range=0.1) + + def test_batch_standardize(self): + all_test_images = _generate_test_images(include_rgba=True) + # ImageDataGenerator.standardize should work on batches + for test_images in all_test_images: + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + generator = image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.0, + brightness_range=(1, 5), + fill_mode="nearest", + cval=0.5, + horizontal_flip=True, + vertical_flip=True, + ) + generator.fit(images, augment=True) + + transformed = np.copy(images) + for i, im in enumerate(transformed): + transformed[i] = generator.random_transform(im) + transformed = generator.standardize(transformed) + + def test_deterministic_transform(self): + x = np.ones((32, 32, 3)) + generator = image.ImageDataGenerator( + rotation_range=90, fill_mode="constant" + ) + x = np.random.random((32, 32, 3)) + self.assertAllClose( + generator.apply_transform(x, {"flip_vertical": True}), x[::-1, :, :] + ) + self.assertAllClose( + generator.apply_transform(x, {"flip_horizontal": True}), + x[:, ::-1, :], + ) + x = np.ones((3, 3, 3)) + x_rotated = np.array( + [ + [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0]], + [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], + [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0]], + ] + ) + self.assertAllClose( + generator.apply_transform(x, {"theta": 45}), x_rotated + ) + + def test_random_transforms(self): + x = np.random.random((2, 28, 28)) + # Test get_random_transform with predefined seed + seed = 1 + generator = image.ImageDataGenerator( + rotation_range=90.0, + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0.1, + brightness_range=(1, 5), + horizontal_flip=True, + vertical_flip=True, + ) + transform_dict = generator.get_random_transform(x.shape, seed) + transform_dict2 = generator.get_random_transform(x.shape, seed * 2) + self.assertNotEqual(transform_dict["theta"], 0) + self.assertNotEqual(transform_dict["theta"], transform_dict2["theta"]) + self.assertNotEqual(transform_dict["tx"], 0) + self.assertNotEqual(transform_dict["tx"], transform_dict2["tx"]) + self.assertNotEqual(transform_dict["ty"], 0) + self.assertNotEqual(transform_dict["ty"], transform_dict2["ty"]) + self.assertNotEqual(transform_dict["shear"], 0) + self.assertNotEqual(transform_dict["shear"], transform_dict2["shear"]) + self.assertNotEqual(transform_dict["zx"], 0) + self.assertNotEqual(transform_dict["zx"], transform_dict2["zx"]) + self.assertNotEqual(transform_dict["zy"], 0) + self.assertNotEqual(transform_dict["zy"], transform_dict2["zy"]) + self.assertNotEqual(transform_dict["channel_shift_intensity"], 0) + self.assertNotEqual( + transform_dict["channel_shift_intensity"], + transform_dict2["channel_shift_intensity"], + ) + self.assertNotEqual(transform_dict["brightness"], 0) + self.assertNotEqual( + transform_dict["brightness"], transform_dict2["brightness"] + ) + + # Test get_random_transform without any randomness + generator = image.ImageDataGenerator() + transform_dict = generator.get_random_transform(x.shape, seed) + self.assertEqual(transform_dict["theta"], 0) + self.assertEqual(transform_dict["tx"], 0) + self.assertEqual(transform_dict["ty"], 0) + self.assertEqual(transform_dict["shear"], 0) + self.assertEqual(transform_dict["zx"], 1) + self.assertEqual(transform_dict["zy"], 1) + self.assertIsNone(transform_dict["channel_shift_intensity"], None) + self.assertIsNone(transform_dict["brightness"], None) + + def test_fit_rescale(self): + all_test_images = _generate_test_images(include_rgba=True) + rescale = 1.0 / 255 + + for test_images in all_test_images: + img_list = [] + for im in test_images: + img_list.append(image_utils.img_to_array(im)[None, ...]) + images = np.vstack(img_list) + + # featurewise_center test + generator = image.ImageDataGenerator( + rescale=rescale, featurewise_center=True, dtype="float64" + ) + generator.fit(images) + batch = generator.flow(images, batch_size=8).next() + self.assertLess(abs(np.mean(batch)), 1e-6) + + # featurewise_std_normalization test + generator = image.ImageDataGenerator( + rescale=rescale, + featurewise_center=True, + featurewise_std_normalization=True, + dtype="float64", + ) + generator.fit(images) + batch = generator.flow(images, batch_size=8).next() + self.assertLess(abs(np.mean(batch)), 1e-6) + self.assertLess(abs(1 - np.std(batch)), 1e-5) + + # zca_whitening test + generator = image.ImageDataGenerator( + rescale=rescale, + featurewise_center=True, + zca_whitening=True, + dtype="float64", + ) + generator.fit(images) + batch = generator.flow(images, batch_size=8).next() + batch = np.reshape( + batch, + ( + batch.shape[0], + batch.shape[1] * batch.shape[2] * batch.shape[3], + ), + ) + # Y * Y_T = n * I, where Y = W * X + identity = np.dot(batch, batch.T) / batch.shape[0] + self.assertTrue( + ( + (np.abs(identity) - np.identity(identity.shape[0])) < 1e-6 + ).all() + ) @test_utils.run_v2_only class TestAffineTransformations(test_combinations.TestCase): - - def test_random_transforms(self): - x = np.random.random((2, 28, 28)) - self.assertEqual(image.random_rotation(x, 45).shape, (2, 28, 28)) - self.assertEqual(image.random_shift(x, 1, 1).shape, (2, 28, 28)) - self.assertEqual(image.random_shear(x, 20).shape, (2, 28, 28)) - self.assertEqual(image.random_channel_shift(x, 20).shape, (2, 28, 28)) - - def test_deterministic_transform(self): - x = np.ones((3, 3, 3)) - x_rotated = np.array([[[0., 0., 0.], [1., 1., 1.], [0., 0., 0.]], - [[1., 1., 1.], [1., 1., 1.], [1., 1., 1.]], - [[0., 0., 0.], [1., 1., 1.], [0., 0., 0.]]]) - self.assertAllClose( - image.apply_affine_transform( - x, - theta=45, - row_axis=0, - col_axis=1, - channel_axis=2, - fill_mode='constant'), x_rotated) - - def test_matrix_center(self): - x = np.expand_dims(np.array([ - [0, 1], - [0, 0], - ]), -1) - x_rotated90 = np.expand_dims(np.array([ - [1, 0], - [0, 0], - ]), -1) - - self.assertAllClose( - image.apply_affine_transform( - x, theta=90, row_axis=0, col_axis=1, channel_axis=2), x_rotated90) - - def test_translation(self): - x = np.array([ - [0, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 0, 0], - ]) - x_up = np.array([ - [0, 1, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - ]) - x_dn = np.array([ - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 1, 0, 0], - ]) - x_left = np.array([ - [0, 0, 0, 0], - [1, 0, 0, 0], - [0, 0, 0, 0], - ]) - x_right = np.array([ - [0, 0, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 0], - ]) - - # Channels first - x_test = np.expand_dims(x, 0) - - # Horizontal translation - self.assertAllEqual(x_left, - np.squeeze(image.apply_affine_transform(x_test, tx=1))) - self.assertAllEqual(x_right, - np.squeeze(image.apply_affine_transform(x_test, tx=-1))) - - # change axes: x<->y - self.assertAllEqual( - x_left, - np.squeeze( - image.apply_affine_transform(x_test, ty=1, row_axis=2, col_axis=1))) - self.assertAllEqual( - x_right, - np.squeeze( - image.apply_affine_transform(x_test, ty=-1, row_axis=2, - col_axis=1))) - - # Vertical translation - self.assertAllEqual(x_up, - np.squeeze(image.apply_affine_transform(x_test, ty=1))) - self.assertAllEqual(x_dn, - np.squeeze(image.apply_affine_transform(x_test, ty=-1))) - - # change axes: x<->y - self.assertAllEqual( - x_up, - np.squeeze( - image.apply_affine_transform(x_test, tx=1, row_axis=2, col_axis=1))) - self.assertAllEqual( - x_dn, - np.squeeze( - image.apply_affine_transform(x_test, tx=-1, row_axis=2, - col_axis=1))) - - # Channels last - x_test = np.expand_dims(x, -1) - - # Horizontal translation - self.assertAllEqual( - x_left, - np.squeeze( - image.apply_affine_transform( - x_test, tx=1, row_axis=0, col_axis=1, channel_axis=2))) - self.assertAllEqual( - x_right, - np.squeeze( - image.apply_affine_transform( - x_test, tx=-1, row_axis=0, col_axis=1, channel_axis=2))) - - # change axes: x<->y - self.assertAllEqual( - x_left, - np.squeeze( - image.apply_affine_transform( - x_test, ty=1, row_axis=1, col_axis=0, channel_axis=2))) - self.assertAllEqual( - x_right, - np.squeeze( - image.apply_affine_transform( - x_test, ty=-1, row_axis=1, col_axis=0, channel_axis=2))) - - # Vertical translation - self.assertAllEqual( - x_up, - np.squeeze( - image.apply_affine_transform( - x_test, ty=1, row_axis=0, col_axis=1, channel_axis=2))) - self.assertAllEqual( - x_dn, - np.squeeze( - image.apply_affine_transform( - x_test, ty=-1, row_axis=0, col_axis=1, channel_axis=2))) - - # change axes: x<->y - self.assertAllEqual( - x_up, - np.squeeze( + def test_random_transforms(self): + x = np.random.random((2, 28, 28)) + self.assertEqual(image.random_rotation(x, 45).shape, (2, 28, 28)) + self.assertEqual(image.random_shift(x, 1, 1).shape, (2, 28, 28)) + self.assertEqual(image.random_shear(x, 20).shape, (2, 28, 28)) + self.assertEqual(image.random_channel_shift(x, 20).shape, (2, 28, 28)) + + def test_deterministic_transform(self): + x = np.ones((3, 3, 3)) + x_rotated = np.array( + [ + [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0]], + [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], + [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0]], + ] + ) + self.assertAllClose( image.apply_affine_transform( - x_test, tx=1, row_axis=1, col_axis=0, channel_axis=2))) - self.assertAllEqual( - x_dn, - np.squeeze( + x, + theta=45, + row_axis=0, + col_axis=1, + channel_axis=2, + fill_mode="constant", + ), + x_rotated, + ) + + def test_matrix_center(self): + x = np.expand_dims( + np.array( + [ + [0, 1], + [0, 0], + ] + ), + -1, + ) + x_rotated90 = np.expand_dims( + np.array( + [ + [1, 0], + [0, 0], + ] + ), + -1, + ) + + self.assertAllClose( image.apply_affine_transform( - x_test, tx=-1, row_axis=1, col_axis=0, channel_axis=2))) - - def test_random_zoom(self): - x = np.random.random((2, 28, 28)) - self.assertEqual(image.random_zoom(x, (5, 5)).shape, (2, 28, 28)) - self.assertAllClose(x, image.random_zoom(x, (1, 1))) - - def test_random_zoom_error(self): - with self.assertRaises(ValueError): - image.random_zoom(0, zoom_range=[0]) - - def test_random_brightness_error(self): - with self.assertRaises(ValueError): - image.random_brightness(0, [0]) - - def test_random_brightness_scale(self): - img = np.ones((1, 1, 3)) * 128 - zeros = np.zeros((1, 1, 3)) - must_be_128 = image.random_brightness(img, [1, 1], False) - self.assertAllEqual(img, must_be_128) - must_be_0 = image.random_brightness(img, [1, 1], True) - self.assertAllEqual(zeros, must_be_0) - - def test_random_brightness_scale_outside_range_positive(self): - img = np.ones((1, 1, 3)) * 1024 - zeros = np.zeros((1, 1, 3)) - must_be_1024 = image.random_brightness(img, [1, 1], False) - self.assertAllEqual(img, must_be_1024) - must_be_0 = image.random_brightness(img, [1, 1], True) - self.assertAllEqual(zeros, must_be_0) - - def test_random_brightness_scale_outside_range_negative(self): - img = np.ones((1, 1, 3)) * -1024 - zeros = np.zeros((1, 1, 3)) - must_be_neg_1024 = image.random_brightness(img, [1, 1], False) - self.assertAllEqual(img, must_be_neg_1024) - must_be_0 = image.random_brightness(img, [1, 1], True) - self.assertAllEqual(zeros, must_be_0) - - -if __name__ == '__main__': - tf.test.main() + x, theta=90, row_axis=0, col_axis=1, channel_axis=2 + ), + x_rotated90, + ) + + def test_translation(self): + x = np.array( + [ + [0, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 0, 0], + ] + ) + x_up = np.array( + [ + [0, 1, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + ] + ) + x_dn = np.array( + [ + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 1, 0, 0], + ] + ) + x_left = np.array( + [ + [0, 0, 0, 0], + [1, 0, 0, 0], + [0, 0, 0, 0], + ] + ) + x_right = np.array( + [ + [0, 0, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 0], + ] + ) + + # Channels first + x_test = np.expand_dims(x, 0) + + # Horizontal translation + self.assertAllEqual( + x_left, np.squeeze(image.apply_affine_transform(x_test, tx=1)) + ) + self.assertAllEqual( + x_right, np.squeeze(image.apply_affine_transform(x_test, tx=-1)) + ) + + # change axes: x<->y + self.assertAllEqual( + x_left, + np.squeeze( + image.apply_affine_transform( + x_test, ty=1, row_axis=2, col_axis=1 + ) + ), + ) + self.assertAllEqual( + x_right, + np.squeeze( + image.apply_affine_transform( + x_test, ty=-1, row_axis=2, col_axis=1 + ) + ), + ) + + # Vertical translation + self.assertAllEqual( + x_up, np.squeeze(image.apply_affine_transform(x_test, ty=1)) + ) + self.assertAllEqual( + x_dn, np.squeeze(image.apply_affine_transform(x_test, ty=-1)) + ) + + # change axes: x<->y + self.assertAllEqual( + x_up, + np.squeeze( + image.apply_affine_transform( + x_test, tx=1, row_axis=2, col_axis=1 + ) + ), + ) + self.assertAllEqual( + x_dn, + np.squeeze( + image.apply_affine_transform( + x_test, tx=-1, row_axis=2, col_axis=1 + ) + ), + ) + + # Channels last + x_test = np.expand_dims(x, -1) + + # Horizontal translation + self.assertAllEqual( + x_left, + np.squeeze( + image.apply_affine_transform( + x_test, tx=1, row_axis=0, col_axis=1, channel_axis=2 + ) + ), + ) + self.assertAllEqual( + x_right, + np.squeeze( + image.apply_affine_transform( + x_test, tx=-1, row_axis=0, col_axis=1, channel_axis=2 + ) + ), + ) + + # change axes: x<->y + self.assertAllEqual( + x_left, + np.squeeze( + image.apply_affine_transform( + x_test, ty=1, row_axis=1, col_axis=0, channel_axis=2 + ) + ), + ) + self.assertAllEqual( + x_right, + np.squeeze( + image.apply_affine_transform( + x_test, ty=-1, row_axis=1, col_axis=0, channel_axis=2 + ) + ), + ) + + # Vertical translation + self.assertAllEqual( + x_up, + np.squeeze( + image.apply_affine_transform( + x_test, ty=1, row_axis=0, col_axis=1, channel_axis=2 + ) + ), + ) + self.assertAllEqual( + x_dn, + np.squeeze( + image.apply_affine_transform( + x_test, ty=-1, row_axis=0, col_axis=1, channel_axis=2 + ) + ), + ) + + # change axes: x<->y + self.assertAllEqual( + x_up, + np.squeeze( + image.apply_affine_transform( + x_test, tx=1, row_axis=1, col_axis=0, channel_axis=2 + ) + ), + ) + self.assertAllEqual( + x_dn, + np.squeeze( + image.apply_affine_transform( + x_test, tx=-1, row_axis=1, col_axis=0, channel_axis=2 + ) + ), + ) + + def test_random_zoom(self): + x = np.random.random((2, 28, 28)) + self.assertEqual(image.random_zoom(x, (5, 5)).shape, (2, 28, 28)) + self.assertAllClose(x, image.random_zoom(x, (1, 1))) + + def test_random_zoom_error(self): + with self.assertRaises(ValueError): + image.random_zoom(0, zoom_range=[0]) + + def test_random_brightness_error(self): + with self.assertRaises(ValueError): + image.random_brightness(0, [0]) + + def test_random_brightness_scale(self): + img = np.ones((1, 1, 3)) * 128 + zeros = np.zeros((1, 1, 3)) + must_be_128 = image.random_brightness(img, [1, 1], False) + self.assertAllEqual(img, must_be_128) + must_be_0 = image.random_brightness(img, [1, 1], True) + self.assertAllEqual(zeros, must_be_0) + + def test_random_brightness_scale_outside_range_positive(self): + img = np.ones((1, 1, 3)) * 1024 + zeros = np.zeros((1, 1, 3)) + must_be_1024 = image.random_brightness(img, [1, 1], False) + self.assertAllEqual(img, must_be_1024) + must_be_0 = image.random_brightness(img, [1, 1], True) + self.assertAllEqual(zeros, must_be_0) + + def test_random_brightness_scale_outside_range_negative(self): + img = np.ones((1, 1, 3)) * -1024 + zeros = np.zeros((1, 1, 3)) + must_be_neg_1024 = image.random_brightness(img, [1, 1], False) + self.assertAllEqual(img, must_be_neg_1024) + must_be_0 = image.random_brightness(img, [1, 1], True) + self.assertAllEqual(zeros, must_be_0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/preprocessing/sequence.py b/keras/preprocessing/sequence.py index f5f686614a1f..25569118718b 100644 --- a/keras/preprocessing/sequence.py +++ b/keras/preprocessing/sequence.py @@ -20,349 +20,366 @@ with sequences. See the [tf.data guide](https://www.tensorflow.org/guide/data) for more details. """ -# pylint: disable=invalid-name -# pylint: disable=g-classes-have-attributes import json import random -from keras.utils import data_utils import numpy as np +from keras.utils import data_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export def _remove_long_seq(maxlen, seq, label): - """Removes sequences that exceed the maximum length. + """Removes sequences that exceed the maximum length. - Args: - maxlen: Int, maximum length of the output sequences. - seq: List of lists, where each sublist is a sequence. - label: List where each element is an integer. + Args: + maxlen: Int, maximum length of the output sequences. + seq: List of lists, where each sublist is a sequence. + label: List where each element is an integer. - Returns: - new_seq, new_label: shortened lists for `seq` and `label`. - """ - new_seq, new_label = [], [] - for x, y in zip(seq, label): - if len(x) < maxlen: - new_seq.append(x) - new_label.append(y) - return new_seq, new_label + Returns: + new_seq, new_label: shortened lists for `seq` and `label`. + """ + new_seq, new_label = [], [] + for x, y in zip(seq, label): + if len(x) < maxlen: + new_seq.append(x) + new_label.append(y) + return new_seq, new_label -@keras_export('keras.preprocessing.sequence.TimeseriesGenerator') +@keras_export("keras.preprocessing.sequence.TimeseriesGenerator") class TimeseriesGenerator(data_utils.Sequence): - """Utility class for generating batches of temporal data. - - Deprecated: `tf.keras.preprocessing.sequence.TimeseriesGenerator` does not - operate on tensors and is not recommended for new code. Prefer using a - `tf.data.Dataset` which provides a more efficient and flexible mechanism for - batching, shuffling, and windowing input. See the - [tf.data guide](https://www.tensorflow.org/guide/data) for more details. - - This class takes in a sequence of data-points gathered at - equal intervals, along with time series parameters such as - stride, length of history, etc., to produce batches for - training/validation. - - Arguments: - data: Indexable generator (such as list or Numpy array) - containing consecutive data points (timesteps). - The data should be at 2D, and axis 0 is expected - to be the time dimension. - targets: Targets corresponding to timesteps in `data`. - It should have same length as `data`. - length: Length of the output sequences (in number of timesteps). - sampling_rate: Period between successive individual timesteps - within sequences. For rate `r`, timesteps - `data[i]`, `data[i-r]`, ... `data[i - length]` - are used for create a sample sequence. - stride: Period between successive output sequences. - For stride `s`, consecutive output samples would - be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. - start_index: Data points earlier than `start_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - end_index: Data points later than `end_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - shuffle: Whether to shuffle output samples, - or instead draw them in chronological order. - reverse: Boolean: if `true`, timesteps in each output sample will be - in reverse chronological order. - batch_size: Number of timeseries samples in each batch - (except maybe the last one). - - Returns: - A [Sequence]( - https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence) - instance. - - Examples: - ```python - from keras.preprocessing.sequence import TimeseriesGenerator - import numpy as np - data = np.array([[i] for i in range(50)]) - targets = np.array([[i] for i in range(50)]) - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, - batch_size=2) - assert len(data_gen) == 20 - batch_0 = data_gen[0] - x, y = batch_0 - assert np.array_equal(x, - np.array([[[0], [2], [4], [6], [8]], - [[1], [3], [5], [7], [9]]])) - assert np.array_equal(y, - np.array([[10], [11]])) - ``` - """ - - def __init__(self, - data, - targets, - length, - sampling_rate=1, - stride=1, - start_index=0, - end_index=None, - shuffle=False, - reverse=False, - batch_size=128): - - if len(data) != len(targets): - raise ValueError('Data and targets have to be' + ' of same length. ' - 'Data length is {}'.format(len(data)) + - ' while target length is {}'.format(len(targets))) - - self.data = data - self.targets = targets - self.length = length - self.sampling_rate = sampling_rate - self.stride = stride - self.start_index = start_index + length - if end_index is None: - end_index = len(data) - 1 - self.end_index = end_index - self.shuffle = shuffle - self.reverse = reverse - self.batch_size = batch_size - - if self.start_index > self.end_index: - raise ValueError('`start_index+length=%i > end_index=%i` ' - 'is disallowed, as no part of the sequence ' - 'would be left to be used as current step.' % - (self.start_index, self.end_index)) - - def __len__(self): - return (self.end_index - self.start_index + - self.batch_size * self.stride) // ( - self.batch_size * self.stride) - - def __getitem__(self, index): - if self.shuffle: - rows = np.random.randint( - self.start_index, self.end_index + 1, size=self.batch_size) - else: - i = self.start_index + self.batch_size * self.stride * index - rows = np.arange( - i, min(i + self.batch_size * self.stride, self.end_index + 1), - self.stride) - - samples = np.array( - [self.data[row - self.length:row:self.sampling_rate] for row in rows]) - targets = np.array([self.targets[row] for row in rows]) - - if self.reverse: - return samples[:, ::-1, ...], targets - return samples, targets - - def get_config(self): - """Returns the TimeseriesGenerator configuration as Python dictionary. + """Utility class for generating batches of temporal data. + + Deprecated: `tf.keras.preprocessing.sequence.TimeseriesGenerator` does not + operate on tensors and is not recommended for new code. Prefer using a + `tf.data.Dataset` which provides a more efficient and flexible mechanism for + batching, shuffling, and windowing input. See the + [tf.data guide](https://www.tensorflow.org/guide/data) for more details. + + This class takes in a sequence of data-points gathered at + equal intervals, along with time series parameters such as + stride, length of history, etc., to produce batches for + training/validation. + + Arguments: + data: Indexable generator (such as list or Numpy array) + containing consecutive data points (timesteps). + The data should be at 2D, and axis 0 is expected + to be the time dimension. + targets: Targets corresponding to timesteps in `data`. + It should have same length as `data`. + length: Length of the output sequences (in number of timesteps). + sampling_rate: Period between successive individual timesteps + within sequences. For rate `r`, timesteps + `data[i]`, `data[i-r]`, ... `data[i - length]` + are used for create a sample sequence. + stride: Period between successive output sequences. + For stride `s`, consecutive output samples would + be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. + start_index: Data points earlier than `start_index` will not be used + in the output sequences. This is useful to reserve part of the + data for test or validation. + end_index: Data points later than `end_index` will not be used + in the output sequences. This is useful to reserve part of the + data for test or validation. + shuffle: Whether to shuffle output samples, + or instead draw them in chronological order. + reverse: Boolean: if `true`, timesteps in each output sample will be + in reverse chronological order. + batch_size: Number of timeseries samples in each batch + (except maybe the last one). Returns: - A Python dictionary with the TimeseriesGenerator configuration. + A [Sequence]( + https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence) + instance. + + Examples: + ```python + from keras.preprocessing.sequence import TimeseriesGenerator + import numpy as np + data = np.array([[i] for i in range(50)]) + targets = np.array([[i] for i in range(50)]) + data_gen = TimeseriesGenerator(data, targets, + length=10, sampling_rate=2, + batch_size=2) + assert len(data_gen) == 20 + batch_0 = data_gen[0] + x, y = batch_0 + assert np.array_equal(x, + np.array([[[0], [2], [4], [6], [8]], + [[1], [3], [5], [7], [9]]])) + assert np.array_equal(y, + np.array([[10], [11]])) + ``` """ - data = self.data - if type(self.data).__module__ == np.__name__: - data = self.data.tolist() - try: - json_data = json.dumps(data) - except TypeError as e: - raise TypeError('Data not JSON Serializable:', data) from e - - targets = self.targets - if type(self.targets).__module__ == np.__name__: - targets = self.targets.tolist() - try: - json_targets = json.dumps(targets) - except TypeError as e: - raise TypeError('Targets not JSON Serializable:', targets) from e - - return { - 'data': json_data, - 'targets': json_targets, - 'length': self.length, - 'sampling_rate': self.sampling_rate, - 'stride': self.stride, - 'start_index': self.start_index, - 'end_index': self.end_index, - 'shuffle': self.shuffle, - 'reverse': self.reverse, - 'batch_size': self.batch_size - } - - def to_json(self, **kwargs): - """Returns a JSON string containing the timeseries generator configuration. + + def __init__( + self, + data, + targets, + length, + sampling_rate=1, + stride=1, + start_index=0, + end_index=None, + shuffle=False, + reverse=False, + batch_size=128, + ): + + if len(data) != len(targets): + raise ValueError( + "Data and targets have to be" + + f" of same length. Data length is {len(data)}" + + f" while target length is {len(targets)}" + ) + + self.data = data + self.targets = targets + self.length = length + self.sampling_rate = sampling_rate + self.stride = stride + self.start_index = start_index + length + if end_index is None: + end_index = len(data) - 1 + self.end_index = end_index + self.shuffle = shuffle + self.reverse = reverse + self.batch_size = batch_size + + if self.start_index > self.end_index: + raise ValueError( + "`start_index+length=%i > end_index=%i` " + "is disallowed, as no part of the sequence " + "would be left to be used as current step." + % (self.start_index, self.end_index) + ) + + def __len__(self): + return ( + self.end_index - self.start_index + self.batch_size * self.stride + ) // (self.batch_size * self.stride) + + def __getitem__(self, index): + if self.shuffle: + rows = np.random.randint( + self.start_index, self.end_index + 1, size=self.batch_size + ) + else: + i = self.start_index + self.batch_size * self.stride * index + rows = np.arange( + i, + min(i + self.batch_size * self.stride, self.end_index + 1), + self.stride, + ) + + samples = np.array( + [ + self.data[row - self.length : row : self.sampling_rate] + for row in rows + ] + ) + targets = np.array([self.targets[row] for row in rows]) + + if self.reverse: + return samples[:, ::-1, ...], targets + return samples, targets + + def get_config(self): + """Returns the TimeseriesGenerator configuration as Python dictionary. + + Returns: + A Python dictionary with the TimeseriesGenerator configuration. + """ + data = self.data + if type(self.data).__module__ == np.__name__: + data = self.data.tolist() + try: + json_data = json.dumps(data) + except TypeError as e: + raise TypeError("Data not JSON Serializable:", data) from e + + targets = self.targets + if type(self.targets).__module__ == np.__name__: + targets = self.targets.tolist() + try: + json_targets = json.dumps(targets) + except TypeError as e: + raise TypeError("Targets not JSON Serializable:", targets) from e + + return { + "data": json_data, + "targets": json_targets, + "length": self.length, + "sampling_rate": self.sampling_rate, + "stride": self.stride, + "start_index": self.start_index, + "end_index": self.end_index, + "shuffle": self.shuffle, + "reverse": self.reverse, + "batch_size": self.batch_size, + } + + def to_json(self, **kwargs): + """Returns a JSON string containing the generator's configuration. + + Args: + **kwargs: Additional keyword arguments to be passed + to `json.dumps()`. + + Returns: + A JSON string containing the tokenizer configuration. + """ + config = self.get_config() + timeseries_generator_config = { + "class_name": self.__class__.__name__, + "config": config, + } + return json.dumps(timeseries_generator_config, **kwargs) + + +@keras_export("keras.preprocessing.sequence.make_sampling_table") +def make_sampling_table(size, sampling_factor=1e-5): + """Generates a word rank-based probabilistic sampling table. + + Used for generating the `sampling_table` argument for `skipgrams`. + `sampling_table[i]` is the probability of sampling + the word i-th most common word in a dataset + (more common words should be sampled less frequently, for balance). + + The sampling probabilities are generated according + to the sampling distribution used in word2vec: + + ``` + p(word) = (min(1, sqrt(word_frequency / sampling_factor) / + (word_frequency / sampling_factor))) + ``` + + We assume that the word frequencies follow Zipf's law (s=1) to derive + a numerical approximation of frequency(rank): + + `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))` + where `gamma` is the Euler-Mascheroni constant. Args: - **kwargs: Additional keyword arguments - to be passed to `json.dumps()`. + size: Int, number of possible words to sample. + sampling_factor: The sampling factor in the word2vec formula. + Returns: - A JSON string containing the tokenizer configuration. + A 1D Numpy array of length `size` where the ith entry + is the probability that a word of rank i should be sampled. """ - config = self.get_config() - timeseries_generator_config = { - 'class_name': self.__class__.__name__, - 'config': config - } - return json.dumps(timeseries_generator_config, **kwargs) + gamma = 0.577 + rank = np.arange(size) + rank[0] = 1 + inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1.0 / (12.0 * rank) + f = sampling_factor * inv_fq + + return np.minimum(1.0, f / np.sqrt(f)) + + +@keras_export("keras.preprocessing.sequence.skipgrams") +def skipgrams( + sequence, + vocabulary_size, + window_size=4, + negative_samples=1.0, + shuffle=True, + categorical=False, + sampling_table=None, + seed=None, +): + """Generates skipgram word pairs. + + This function transforms a sequence of word indexes (list of integers) + into tuples of words of the form: + + - (word, word in the same window), with label 1 (positive samples). + - (word, random word from the vocabulary), with label 0 (negative samples). + + Read more about Skipgram in this gnomic paper by Mikolov et al.: + [Efficient Estimation of Word Representations in + Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) + Args: + sequence: A word sequence (sentence), encoded as a list + of word indices (integers). If using a `sampling_table`, + word indices are expected to match the rank + of the words in a reference dataset (e.g. 10 would encode + the 10-th most frequently occurring token). + Note that index 0 is expected to be a non-word and will be skipped. + vocabulary_size: Int, maximum possible word index + 1 + window_size: Int, size of sampling windows (technically half-window). + The window of a word `w_i` will be + `[i - window_size, i + window_size+1]`. + negative_samples: Float >= 0. 0 for no negative (i.e. random) samples. + 1 for same number as positive samples. + shuffle: Whether to shuffle the word couples before returning them. + categorical: bool. if False, labels will be + integers (eg. `[0, 1, 1 .. ]`), + if `True`, labels will be categorical, e.g. + `[[1,0],[0,1],[0,1] .. ]`. + sampling_table: 1D array of size `vocabulary_size` where the entry i + encodes the probability to sample a word of rank i. + seed: Random seed. -@keras_export('keras.preprocessing.sequence.make_sampling_table') -def make_sampling_table(size, sampling_factor=1e-5): - """Generates a word rank-based probabilistic sampling table. - - Used for generating the `sampling_table` argument for `skipgrams`. - `sampling_table[i]` is the probability of sampling - the word i-th most common word in a dataset - (more common words should be sampled less frequently, for balance). - - The sampling probabilities are generated according - to the sampling distribution used in word2vec: - - ``` - p(word) = (min(1, sqrt(word_frequency / sampling_factor) / - (word_frequency / sampling_factor))) - ``` - - We assume that the word frequencies follow Zipf's law (s=1) to derive - a numerical approximation of frequency(rank): - - `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))` - where `gamma` is the Euler-Mascheroni constant. - - Args: - size: Int, number of possible words to sample. - sampling_factor: The sampling factor in the word2vec formula. - - Returns: - A 1D Numpy array of length `size` where the ith entry - is the probability that a word of rank i should be sampled. - """ - gamma = 0.577 - rank = np.arange(size) - rank[0] = 1 - inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1. / (12. * rank) - f = sampling_factor * inv_fq - - return np.minimum(1., f / np.sqrt(f)) - - -@keras_export('keras.preprocessing.sequence.skipgrams') -def skipgrams(sequence, - vocabulary_size, - window_size=4, - negative_samples=1., - shuffle=True, - categorical=False, - sampling_table=None, - seed=None): - """Generates skipgram word pairs. - - This function transforms a sequence of word indexes (list of integers) - into tuples of words of the form: - - - (word, word in the same window), with label 1 (positive samples). - - (word, random word from the vocabulary), with label 0 (negative samples). - - Read more about Skipgram in this gnomic paper by Mikolov et al.: - [Efficient Estimation of Word Representations in - Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) - - Args: - sequence: A word sequence (sentence), encoded as a list - of word indices (integers). If using a `sampling_table`, - word indices are expected to match the rank - of the words in a reference dataset (e.g. 10 would encode - the 10-th most frequently occurring token). - Note that index 0 is expected to be a non-word and will be skipped. - vocabulary_size: Int, maximum possible word index + 1 - window_size: Int, size of sampling windows (technically half-window). - The window of a word `w_i` will be - `[i - window_size, i + window_size+1]`. - negative_samples: Float >= 0. 0 for no negative (i.e. random) samples. - 1 for same number as positive samples. - shuffle: Whether to shuffle the word couples before returning them. - categorical: bool. if False, labels will be - integers (eg. `[0, 1, 1 .. ]`), - if `True`, labels will be categorical, e.g. - `[[1,0],[0,1],[0,1] .. ]`. - sampling_table: 1D array of size `vocabulary_size` where the entry i - encodes the probability to sample a word of rank i. - seed: Random seed. - - Returns: - couples, labels: where `couples` are int pairs and - `labels` are either 0 or 1. - - Note: - By convention, index 0 in the vocabulary is - a non-word and will be skipped. - """ - couples = [] - labels = [] - for i, wi in enumerate(sequence): - if not wi: - continue - if sampling_table is not None: - if sampling_table[wi] < random.random(): - continue - - window_start = max(0, i - window_size) - window_end = min(len(sequence), i + window_size + 1) - for j in range(window_start, window_end): - if j != i: - wj = sequence[j] - if not wj: - continue - couples.append([wi, wj]) + Returns: + couples, labels: where `couples` are int pairs and + `labels` are either 0 or 1. + + Note: + By convention, index 0 in the vocabulary is + a non-word and will be skipped. + """ + couples = [] + labels = [] + for i, wi in enumerate(sequence): + if not wi: + continue + if sampling_table is not None: + if sampling_table[wi] < random.random(): + continue + + window_start = max(0, i - window_size) + window_end = min(len(sequence), i + window_size + 1) + for j in range(window_start, window_end): + if j != i: + wj = sequence[j] + if not wj: + continue + couples.append([wi, wj]) + if categorical: + labels.append([0, 1]) + else: + labels.append(1) + + if negative_samples > 0: + num_negative_samples = int(len(labels) * negative_samples) + words = [c[0] for c in couples] + random.shuffle(words) + + couples += [ + [words[i % len(words)], random.randint(1, vocabulary_size - 1)] + for i in range(num_negative_samples) + ] if categorical: - labels.append([0, 1]) + labels += [[1, 0]] * num_negative_samples else: - labels.append(1) - - if negative_samples > 0: - num_negative_samples = int(len(labels) * negative_samples) - words = [c[0] for c in couples] - random.shuffle(words) - - couples += [[words[i % len(words)], - random.randint(1, vocabulary_size - 1)] - for i in range(num_negative_samples)] - if categorical: - labels += [[1, 0]] * num_negative_samples - else: - labels += [0] * num_negative_samples - - if shuffle: - if seed is None: - seed = random.randint(0, 10e6) - random.seed(seed) - random.shuffle(couples) - random.seed(seed) - random.shuffle(labels) - - return couples, labels + labels += [0] * num_negative_samples + + if shuffle: + if seed is None: + seed = random.randint(0, 10e6) + random.seed(seed) + random.shuffle(couples) + random.seed(seed) + random.shuffle(labels) + + return couples, labels diff --git a/keras/preprocessing/sequence_test.py b/keras/preprocessing/sequence_test.py index b34fc082801e..a5b2637efcc8 100644 --- a/keras/preprocessing/sequence_test.py +++ b/keras/preprocessing/sequence_test.py @@ -16,178 +16,222 @@ import math -from keras.preprocessing import sequence import numpy as np import tensorflow.compat.v2 as tf +from keras.preprocessing import sequence + class TestSequence(tf.test.TestCase): - - def test_make_sampling_table(self): - a = sequence.make_sampling_table(3) - self.assertAllClose( - a, np.asarray([0.00315225, 0.00315225, 0.00547597]), rtol=.1) - - def test_skipgrams(self): - # test with no window size and binary labels - couples, labels = sequence.skipgrams(np.arange(3), vocabulary_size=3) - for couple in couples: - self.assertIn(couple[0], [0, 1, 2]) - self.assertIn(couple[1], [0, 1, 2]) - - # test window size and categorical labels - couples, labels = sequence.skipgrams( - np.arange(5), vocabulary_size=5, window_size=1, categorical=True) - for couple in couples: - self.assertLessEqual(couple[0] - couple[1], 3) - for label in labels: - self.assertLen(label, 2) - - def test_remove_long_seq(self): - maxlen = 5 - seq = [ - [1, 2, 3], - [1, 2, 3, 4, 5, 6], - ] - label = ['a', 'b'] - new_seq, new_label = sequence._remove_long_seq(maxlen, seq, label) - self.assertEqual(new_seq, [[1, 2, 3]]) - self.assertEqual(new_label, ['a']) - - def test_TimeseriesGenerator(self): - data = np.array([[i] for i in range(50)]) - targets = np.array([[i] for i in range(50)]) - - data_gen = sequence.TimeseriesGenerator( - data, targets, length=10, sampling_rate=2, batch_size=2) - self.assertLen(data_gen, 20) - self.assertAllClose( - data_gen[0][0], - np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]])) - self.assertAllClose(data_gen[0][1], np.array([[10], [11]])) - self.assertAllClose( - data_gen[1][0], - np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], [9], [11]]])) - self.assertAllClose(data_gen[1][1], np.array([[12], [13]])) - - data_gen = sequence.TimeseriesGenerator( - data, targets, length=10, sampling_rate=2, reverse=True, batch_size=2) - self.assertLen(data_gen, 20) - self.assertAllClose( - data_gen[0][0], - np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]])) - self.assertAllClose(data_gen[0][1], np.array([[10], [11]])) - - data_gen = sequence.TimeseriesGenerator( - data, targets, length=10, sampling_rate=2, shuffle=True, batch_size=1) - batch = data_gen[0] - r = batch[1][0][0] - self.assertAllClose( - batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4], [r - 2]]])) - self.assertAllClose(batch[1], np.array([ - [r], - ])) - - data_gen = sequence.TimeseriesGenerator( - data, targets, length=10, sampling_rate=2, stride=2, batch_size=2) - self.assertLen(data_gen, 10) - self.assertAllClose( - data_gen[1][0], - np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], [12], [14]]])) - self.assertAllClose(data_gen[1][1], np.array([[14], [16]])) - - data_gen = sequence.TimeseriesGenerator( - data, - targets, - length=10, - sampling_rate=2, - start_index=10, - end_index=30, - batch_size=2) - self.assertLen(data_gen, 6) - self.assertAllClose( - data_gen[0][0], - np.array([[[10], [12], [14], [16], [18]], [[11], [13], [15], [17], - [19]]])) - self.assertAllClose(data_gen[0][1], np.array([[20], [21]])) - - data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)]) - targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)]) - data_gen = sequence.TimeseriesGenerator( - data, - targets, - length=10, - sampling_rate=2, - start_index=10, - end_index=30, - batch_size=2) - self.assertLen(data_gen, 6) - self.assertAllClose( - data_gen[0][0], - np.array([np.array(data[10:19:2]), - np.array(data[11:20:2])])) - self.assertAllClose(data_gen[0][1], np.array([targets[20], targets[21]])) - - with self.assertRaisesRegex( - ValueError, r'`start_index\+length=50 > end_index=49` is disallowed'): - sequence.TimeseriesGenerator(data, targets, length=50) - - def test_TimeSeriesGenerator_doesnt_miss_any_sample(self): - x = np.array([[i] for i in range(10)]) - - for length in range(3, 10): - g = sequence.TimeseriesGenerator(x, x, length=length, batch_size=1) - expected = max(0, len(x) - length) - actual = len(g) - - self.assertEqual(expected, actual) - - if len(g) > 0: # pylint: disable=g-explicit-length-test - # All elements in range(length, 10) should be used as current step - expected = np.arange(length, 10).reshape(-1, 1) - - y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) - self.assertAllClose(y, expected) - - x = np.array([[i] for i in range(23)]) - - strides = (1, 1, 5, 7, 3, 5, 3) - lengths = (3, 3, 4, 3, 1, 3, 7) - batch_sizes = (6, 6, 6, 5, 6, 6, 6) - shuffles = (False, True, True, False, False, False, False) - - for stride, length, batch_size, shuffle in zip(strides, lengths, - batch_sizes, shuffles): - g = sequence.TimeseriesGenerator( - x, - x, - length=length, - sampling_rate=1, - stride=stride, - start_index=0, - end_index=None, - shuffle=shuffle, - reverse=False, - batch_size=batch_size) - if shuffle: - # all batches have the same size when shuffle is True. - expected_sequences = math.ceil( - (23 - length) / float(batch_size * stride)) * batch_size - else: - # last batch will be different if `(samples - length) / stride` - # is not a multiple of `batch_size`. - expected_sequences = math.ceil((23 - length) / float(stride)) - - expected_batches = math.ceil(expected_sequences / float(batch_size)) - - y = [g[ix][1] for ix in range(len(g))] - - actual_sequences = sum(len(y_) for y_ in y) - actual_batches = len(y) - - self.assertEqual(expected_sequences, actual_sequences) - self.assertEqual(expected_batches, actual_batches) - - -if __name__ == '__main__': - tf.test.main() + def test_make_sampling_table(self): + a = sequence.make_sampling_table(3) + self.assertAllClose( + a, np.asarray([0.00315225, 0.00315225, 0.00547597]), rtol=0.1 + ) + + def test_skipgrams(self): + # test with no window size and binary labels + couples, labels = sequence.skipgrams(np.arange(3), vocabulary_size=3) + for couple in couples: + self.assertIn(couple[0], [0, 1, 2]) + self.assertIn(couple[1], [0, 1, 2]) + + # test window size and categorical labels + couples, labels = sequence.skipgrams( + np.arange(5), vocabulary_size=5, window_size=1, categorical=True + ) + for couple in couples: + self.assertLessEqual(couple[0] - couple[1], 3) + for label in labels: + self.assertLen(label, 2) + + def test_remove_long_seq(self): + maxlen = 5 + seq = [ + [1, 2, 3], + [1, 2, 3, 4, 5, 6], + ] + label = ["a", "b"] + new_seq, new_label = sequence._remove_long_seq(maxlen, seq, label) + self.assertEqual(new_seq, [[1, 2, 3]]) + self.assertEqual(new_label, ["a"]) + + def test_TimeseriesGenerator(self): + data = np.array([[i] for i in range(50)]) + targets = np.array([[i] for i in range(50)]) + + data_gen = sequence.TimeseriesGenerator( + data, targets, length=10, sampling_rate=2, batch_size=2 + ) + self.assertLen(data_gen, 20) + self.assertAllClose( + data_gen[0][0], + np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]]), + ) + self.assertAllClose(data_gen[0][1], np.array([[10], [11]])) + self.assertAllClose( + data_gen[1][0], + np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], [9], [11]]]), + ) + self.assertAllClose(data_gen[1][1], np.array([[12], [13]])) + + data_gen = sequence.TimeseriesGenerator( + data, + targets, + length=10, + sampling_rate=2, + reverse=True, + batch_size=2, + ) + self.assertLen(data_gen, 20) + self.assertAllClose( + data_gen[0][0], + np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]]), + ) + self.assertAllClose(data_gen[0][1], np.array([[10], [11]])) + + data_gen = sequence.TimeseriesGenerator( + data, + targets, + length=10, + sampling_rate=2, + shuffle=True, + batch_size=1, + ) + batch = data_gen[0] + r = batch[1][0][0] + self.assertAllClose( + batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4], [r - 2]]]) + ) + self.assertAllClose( + batch[1], + np.array( + [ + [r], + ] + ), + ) + + data_gen = sequence.TimeseriesGenerator( + data, targets, length=10, sampling_rate=2, stride=2, batch_size=2 + ) + self.assertLen(data_gen, 10) + self.assertAllClose( + data_gen[1][0], + np.array( + [[[4], [6], [8], [10], [12]], [[6], [8], [10], [12], [14]]] + ), + ) + self.assertAllClose(data_gen[1][1], np.array([[14], [16]])) + + data_gen = sequence.TimeseriesGenerator( + data, + targets, + length=10, + sampling_rate=2, + start_index=10, + end_index=30, + batch_size=2, + ) + self.assertLen(data_gen, 6) + self.assertAllClose( + data_gen[0][0], + np.array( + [[[10], [12], [14], [16], [18]], [[11], [13], [15], [17], [19]]] + ), + ) + self.assertAllClose(data_gen[0][1], np.array([[20], [21]])) + + data = np.array( + [np.random.random_sample((1, 2, 3, 4)) for i in range(50)] + ) + targets = np.array( + [np.random.random_sample((3, 2, 1)) for i in range(50)] + ) + data_gen = sequence.TimeseriesGenerator( + data, + targets, + length=10, + sampling_rate=2, + start_index=10, + end_index=30, + batch_size=2, + ) + self.assertLen(data_gen, 6) + self.assertAllClose( + data_gen[0][0], + np.array([np.array(data[10:19:2]), np.array(data[11:20:2])]), + ) + self.assertAllClose( + data_gen[0][1], np.array([targets[20], targets[21]]) + ) + + with self.assertRaisesRegex( + ValueError, r"`start_index\+length=50 > end_index=49` is disallowed" + ): + sequence.TimeseriesGenerator(data, targets, length=50) + + def test_TimeSeriesGenerator_doesnt_miss_any_sample(self): + x = np.array([[i] for i in range(10)]) + + for length in range(3, 10): + g = sequence.TimeseriesGenerator(x, x, length=length, batch_size=1) + expected = max(0, len(x) - length) + actual = len(g) + + self.assertEqual(expected, actual) + + if len(g) > 0: + # All elements in range(length, 10) should be used as current + # step + expected = np.arange(length, 10).reshape(-1, 1) + + y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) + self.assertAllClose(y, expected) + + x = np.array([[i] for i in range(23)]) + + strides = (1, 1, 5, 7, 3, 5, 3) + lengths = (3, 3, 4, 3, 1, 3, 7) + batch_sizes = (6, 6, 6, 5, 6, 6, 6) + shuffles = (False, True, True, False, False, False, False) + + for stride, length, batch_size, shuffle in zip( + strides, lengths, batch_sizes, shuffles + ): + g = sequence.TimeseriesGenerator( + x, + x, + length=length, + sampling_rate=1, + stride=stride, + start_index=0, + end_index=None, + shuffle=shuffle, + reverse=False, + batch_size=batch_size, + ) + if shuffle: + # all batches have the same size when shuffle is True. + expected_sequences = ( + math.ceil((23 - length) / float(batch_size * stride)) + * batch_size + ) + else: + # last batch will be different if `(samples - length) / stride` + # is not a multiple of `batch_size`. + expected_sequences = math.ceil((23 - length) / float(stride)) + + expected_batches = math.ceil(expected_sequences / float(batch_size)) + + y = [g[ix][1] for ix in range(len(g))] + + actual_sequences = sum(len(y_) for y_ in y) + actual_batches = len(y) + + self.assertEqual(expected_sequences, actual_sequences) + self.assertEqual(expected_batches, actual_batches) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/preprocessing/text.py b/keras/preprocessing/text.py index ba7f626f09b3..a429fb4b56a8 100644 --- a/keras/preprocessing/text.py +++ b/keras/preprocessing/text.py @@ -23,8 +23,6 @@ and [preprocessing layer guide] (https://www.tensorflow.org/guide/keras/preprocessing_layers). """ -# pylint: disable=invalid-name -# pylint: disable=g-classes-have-attributes import collections @@ -33,549 +31,583 @@ import warnings import numpy as np -from tensorflow.python.util.tf_export import keras_export - - -@keras_export('keras.preprocessing.text.text_to_word_sequence') -def text_to_word_sequence(input_text, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' '): - r"""Converts a text to a sequence of words (or tokens). - - Deprecated: `tf.keras.preprocessing.text.text_to_word_sequence` does not - operate on tensors and is not recommended for new code. Prefer - `tf.strings.regex_replace` and `tf.strings.split` which provide equivalent - functionality and accept `tf.Tensor` input. For an overview of text handling - in Tensorflow, see the [text loading tutorial] - (https://www.tensorflow.org/tutorials/load_data/text). - This function transforms a string of text into a list of words - while ignoring `filters` which include punctuations by default. - - >>> sample_text = 'This is a sample sentence.' - >>> tf.keras.preprocessing.text.text_to_word_sequence(sample_text) - ['this', 'is', 'a', 'sample', 'sentence'] - - Args: - input_text: Input text (string). - filters: list (or concatenation) of characters to filter out, such as - punctuation. Default: ``'!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n'``, - includes basic punctuation, tabs, and newlines. - lower: boolean. Whether to convert the input to lowercase. - split: str. Separator for word splitting. - - Returns: - A list of words (or tokens). - """ - if lower: - input_text = input_text.lower() - - translate_dict = {c: split for c in filters} - translate_map = str.maketrans(translate_dict) - input_text = input_text.translate(translate_map) - - seq = input_text.split(split) - return [i for i in seq if i] - - -@keras_export('keras.preprocessing.text.one_hot') -def one_hot(input_text, - n, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' ', - analyzer=None): - r"""One-hot encodes a text into a list of word indexes of size `n`. - - Deprecated: `tf.keras.text.preprocessing.one_hot` does not operate on tensors - and is not recommended for new code. Prefer `tf.keras.layers.Hashing` with - `output_mode='one_hot'` which provides equivalent functionality through a - layer which accepts `tf.Tensor` input. See the [preprocessing layer guide] - (https://www.tensorflow.org/guide/keras/preprocessing_layers) - for an overview of preprocessing layers. - - This function receives as input a string of text and returns a - list of encoded integers each corresponding to a word (or token) - in the given input string. - - Args: - input_text: Input text (string). - n: int. Size of vocabulary. - filters: list (or concatenation) of characters to filter out, such as - punctuation. Default: - ``` - '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\t\n - ```, - includes basic punctuation, tabs, and newlines. - lower: boolean. Whether to set the text to lowercase. - split: str. Separator for word splitting. - analyzer: function. Custom analyzer to split the text - - Returns: - List of integers in `[1, n]`. Each integer encodes a word - (unicity non-guaranteed). - """ - return hashing_trick( - input_text, - n, - hash_function=hash, - filters=filters, - lower=lower, - split=split, - analyzer=analyzer) - - -@keras_export('keras.preprocessing.text.hashing_trick') -def hashing_trick(text, - n, - hash_function=None, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' ', - analyzer=None): - r"""Converts a text to a sequence of indexes in a fixed-size hashing space. - - Deprecated: `tf.keras.text.preprocessing.hashing_trick` does not operate on - tensors and is not recommended for new code. Prefer `tf.keras.layers.Hashing` - which provides equivalent functionality through a layer which accepts - `tf.Tensor` input. See the [preprocessing layer guide] - (https://www.tensorflow.org/guide/keras/preprocessing_layers) - for an overview of preprocessing layers. - - Args: - text: Input text (string). - n: Dimension of the hashing space. - hash_function: defaults to python `hash` function, can be 'md5' or - any function that takes in input a string and returns a int. - Note that 'hash' is not a stable hashing function, so - it is not consistent across different runs, while 'md5' - is a stable hashing function. - filters: list (or concatenation) of characters to filter out, such as - punctuation. Default: ``!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n``, - includes basic punctuation, tabs, and newlines. - lower: boolean. Whether to set the text to lowercase. - split: str. Separator for word splitting. - analyzer: function. Custom analyzer to split the text - - Returns: - A list of integer word indices (unicity non-guaranteed). - `0` is a reserved index that won't be assigned to any word. - Two or more words may be assigned to the same index, due to possible - collisions by the hashing function. - The [probability]( - https://en.wikipedia.org/wiki/Birthday_problem#Probability_table) - of a collision is in relation to the dimension of the hashing space and - the number of distinct objects. - """ - if hash_function is None: - hash_function = hash - elif hash_function == 'md5': - hash_function = lambda w: int(hashlib.md5(w.encode()).hexdigest(), 16) - - if analyzer is None: - seq = text_to_word_sequence(text, filters=filters, lower=lower, split=split) - else: - seq = analyzer(text) - - return [(hash_function(w) % (n - 1) + 1) for w in seq] - - -@keras_export('keras.preprocessing.text.Tokenizer') -class Tokenizer(object): - """Text tokenization utility class. - - Deprecated: `tf.keras.preprocessing.text.Tokenizer` does not operate on - tensors and is not recommended for new code. Prefer - `tf.keras.layers.TextVectorization` which provides equivalent functionality - through a layer which accepts `tf.Tensor` input. See the - [text loading tutorial](https://www.tensorflow.org/tutorials/load_data/text) - for an overview of the layer and text handling in tensorflow. - - This class allows to vectorize a text corpus, by turning each - text into either a sequence of integers (each integer being the index - of a token in a dictionary) or into a vector where the coefficient - for each token could be binary, based on word count, based on tf-idf... - - By default, all punctuation is removed, turning the texts into - space-separated sequences of words - (words maybe include the `'` character). These sequences are then - split into lists of tokens. They will then be indexed or vectorized. - - `0` is a reserved index that won't be assigned to any word. - - Args: - num_words: the maximum number of words to keep, based - on word frequency. Only the most common `num_words-1` words will - be kept. - filters: a string where each element is a character that will be - filtered from the texts. The default is all punctuation, plus - tabs and line breaks, minus the `'` character. - lower: boolean. Whether to convert the texts to lowercase. - split: str. Separator for word splitting. - char_level: if True, every character will be treated as a token. - oov_token: if given, it will be added to word_index and used to - replace out-of-vocabulary words during text_to_sequence calls - analyzer: function. Custom analyzer to split the text. - The default analyzer is text_to_word_sequence - """ - - def __init__(self, - num_words=None, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' ', - char_level=False, - oov_token=None, - analyzer=None, - **kwargs): - # Legacy support - if 'nb_words' in kwargs: - warnings.warn('The `nb_words` argument in `Tokenizer` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - document_count = kwargs.pop('document_count', 0) - if kwargs: - raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) - - self.word_counts = collections.OrderedDict() - self.word_docs = collections.defaultdict(int) - self.filters = filters - self.split = split - self.lower = lower - self.num_words = num_words - self.document_count = document_count - self.char_level = char_level - self.oov_token = oov_token - self.index_docs = collections.defaultdict(int) - self.word_index = {} - self.index_word = {} - self.analyzer = analyzer - - def fit_on_texts(self, texts): - """Updates internal vocabulary based on a list of texts. - - In the case where texts contains lists, - we assume each entry of the lists to be a token. - - Required before using `texts_to_sequences` or `texts_to_matrix`. - - Args: - texts: can be a list of strings, - a generator of strings (for memory-efficiency), - or a list of list of strings. - """ - for text in texts: - self.document_count += 1 - if self.char_level or isinstance(text, list): - if self.lower: - if isinstance(text, list): - text = [text_elem.lower() for text_elem in text] - else: - text = text.lower() - seq = text - else: - if self.analyzer is None: - seq = text_to_word_sequence( - text, filters=self.filters, lower=self.lower, split=self.split) - else: - seq = self.analyzer(text) - for w in seq: - if w in self.word_counts: - self.word_counts[w] += 1 - else: - self.word_counts[w] = 1 - for w in set(seq): - # In how many documents each word occurs - self.word_docs[w] += 1 - - wcounts = list(self.word_counts.items()) - wcounts.sort(key=lambda x: x[1], reverse=True) - # forcing the oov_token to index 1 if it exists - if self.oov_token is None: - sorted_voc = [] - else: - sorted_voc = [self.oov_token] - sorted_voc.extend(wc[0] for wc in wcounts) - - # note that index 0 is reserved, never assigned to an existing word - self.word_index = dict(zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))) - - self.index_word = {c: w for w, c in self.word_index.items()} - - for w, c in list(self.word_docs.items()): - self.index_docs[self.word_index[w]] = c +# isort: off +from tensorflow.python.util.tf_export import keras_export - def fit_on_sequences(self, sequences): - """Updates internal vocabulary based on a list of sequences. - Required before using `sequences_to_matrix` - (if `fit_on_texts` was never called). +@keras_export("keras.preprocessing.text.text_to_word_sequence") +def text_to_word_sequence( + input_text, + filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', + lower=True, + split=" ", +): + r"""Converts a text to a sequence of words (or tokens). - Args: - sequences: A list of sequence. - A "sequence" is a list of integer word indices. - """ - self.document_count += len(sequences) - for seq in sequences: - seq = set(seq) - for i in seq: - self.index_docs[i] += 1 + Deprecated: `tf.keras.preprocessing.text.text_to_word_sequence` does not + operate on tensors and is not recommended for new code. Prefer + `tf.strings.regex_replace` and `tf.strings.split` which provide equivalent + functionality and accept `tf.Tensor` input. For an overview of text handling + in Tensorflow, see the [text loading tutorial] + (https://www.tensorflow.org/tutorials/load_data/text). - def texts_to_sequences(self, texts): - """Transforms each text in texts to a sequence of integers. + This function transforms a string of text into a list of words + while ignoring `filters` which include punctuations by default. - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. + >>> sample_text = 'This is a sample sentence.' + >>> tf.keras.preprocessing.text.text_to_word_sequence(sample_text) + ['this', 'is', 'a', 'sample', 'sentence'] Args: - texts: A list of texts (strings). + input_text: Input text (string). + filters: list (or concatenation) of characters to filter out, such as + punctuation. Default: ``'!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n'``, + includes basic punctuation, tabs, and newlines. + lower: boolean. Whether to convert the input to lowercase. + split: str. Separator for word splitting. Returns: - A list of sequences. + A list of words (or tokens). """ - return list(self.texts_to_sequences_generator(texts)) - - def texts_to_sequences_generator(self, texts): - """Transforms each text in `texts` to a sequence of integers. - - Each item in texts can also be a list, - in which case we assume each item of that list to be a token. - - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. + if lower: + input_text = input_text.lower() + + translate_dict = {c: split for c in filters} + translate_map = str.maketrans(translate_dict) + input_text = input_text.translate(translate_map) + + seq = input_text.split(split) + return [i for i in seq if i] + + +@keras_export("keras.preprocessing.text.one_hot") +def one_hot( + input_text, + n, + filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', + lower=True, + split=" ", + analyzer=None, +): + r"""One-hot encodes a text into a list of word indexes of size `n`. + + Deprecated: `tf.keras.text.preprocessing.one_hot` does not operate on + tensors and is not recommended for new code. Prefer + `tf.keras.layers.Hashing` with `output_mode='one_hot'` which provides + equivalent functionality through a layer which accepts `tf.Tensor` input. + See the [preprocessing layer guide] + (https://www.tensorflow.org/guide/keras/preprocessing_layers) for an + overview of preprocessing layers. + + This function receives as input a string of text and returns a + list of encoded integers each corresponding to a word (or token) + in the given input string. Args: - texts: A list of texts (strings). + input_text: Input text (string). + n: int. Size of vocabulary. + filters: list (or concatenation) of characters to filter out, such as + punctuation. Default: + ``` + '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\t\n + ```, + includes basic punctuation, tabs, and newlines. + lower: boolean. Whether to set the text to lowercase. + split: str. Separator for word splitting. + analyzer: function. Custom analyzer to split the text - Yields: - Yields individual sequences. + Returns: + List of integers in `[1, n]`. Each integer encodes a word + (unicity non-guaranteed). """ - num_words = self.num_words - oov_token_index = self.word_index.get(self.oov_token) - for text in texts: - if self.char_level or isinstance(text, list): - if self.lower: - if isinstance(text, list): - text = [text_elem.lower() for text_elem in text] - else: - text = text.lower() - seq = text - else: - if self.analyzer is None: - seq = text_to_word_sequence( - text, filters=self.filters, lower=self.lower, split=self.split) - else: - seq = self.analyzer(text) - vect = [] - for w in seq: - i = self.word_index.get(w) - if i is not None: - if num_words and i >= num_words: - if oov_token_index is not None: - vect.append(oov_token_index) - else: - vect.append(i) - elif self.oov_token is not None: - vect.append(oov_token_index) - yield vect - - def sequences_to_texts(self, sequences): - """Transforms each sequence into a list of text. - - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. + return hashing_trick( + input_text, + n, + hash_function=hash, + filters=filters, + lower=lower, + split=split, + analyzer=analyzer, + ) + + +@keras_export("keras.preprocessing.text.hashing_trick") +def hashing_trick( + text, + n, + hash_function=None, + filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', + lower=True, + split=" ", + analyzer=None, +): + r"""Converts a text to a sequence of indexes in a fixed-size hashing space. + + Deprecated: `tf.keras.text.preprocessing.hashing_trick` does not operate on + tensors and is not recommended for new code. Prefer + `tf.keras.layers.Hashing` which provides equivalent functionality through a + layer which accepts `tf.Tensor` input. See the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers) for an + overview of preprocessing layers. Args: - sequences: A list of sequences (list of integers). + text: Input text (string). + n: Dimension of the hashing space. + hash_function: When `None` uses a python `hash` function. Can be 'md5' + or any function that takes in input a string and returns a int. + Note that 'hash' is not a stable hashing function, so + it is not consistent across different runs, while 'md5' + is a stable hashing function. Defaults to `None`. + filters: list (or concatenation) of characters to filter out, such as + punctuation. Default: ``!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n``, + includes basic punctuation, tabs, and newlines. + lower: boolean. Whether to set the text to lowercase. + split: str. Separator for word splitting. + analyzer: function. Custom analyzer to split the text Returns: - A list of texts (strings) + A list of integer word indices (unicity non-guaranteed). + `0` is a reserved index that won't be assigned to any word. + Two or more words may be assigned to the same index, due to possible + collisions by the hashing function. + The [probability]( + https://en.wikipedia.org/wiki/Birthday_problem#Probability_table) + of a collision is in relation to the dimension of the hashing space and + the number of distinct objects. """ - return list(self.sequences_to_texts_generator(sequences)) + if hash_function is None: + hash_function = hash + elif hash_function == "md5": + hash_function = lambda w: int(hashlib.md5(w.encode()).hexdigest(), 16) + + if analyzer is None: + seq = text_to_word_sequence( + text, filters=filters, lower=lower, split=split + ) + else: + seq = analyzer(text) - def sequences_to_texts_generator(self, sequences): - """Transforms each sequence in `sequences` to a list of texts(strings). + return [(hash_function(w) % (n - 1) + 1) for w in seq] - Each sequence has to a list of integers. - In other words, sequences should be a list of sequences - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. +@keras_export("keras.preprocessing.text.Tokenizer") +class Tokenizer(object): + """Text tokenization utility class. - Args: - sequences: A list of sequences. + Deprecated: `tf.keras.preprocessing.text.Tokenizer` does not operate on + tensors and is not recommended for new code. Prefer + `tf.keras.layers.TextVectorization` which provides equivalent functionality + through a layer which accepts `tf.Tensor` input. See the + [text loading tutorial](https://www.tensorflow.org/tutorials/load_data/text) + for an overview of the layer and text handling in tensorflow. - Yields: - Yields individual texts. - """ - num_words = self.num_words - oov_token_index = self.word_index.get(self.oov_token) - for seq in sequences: - vect = [] - for num in seq: - word = self.index_word.get(num) - if word is not None: - if num_words and num >= num_words: - if oov_token_index is not None: - vect.append(self.index_word[oov_token_index]) - else: - vect.append(word) - elif self.oov_token is not None: - vect.append(self.index_word[oov_token_index]) - vect = ' '.join(vect) - yield vect - - def texts_to_matrix(self, texts, mode='binary'): - """Convert a list of texts to a Numpy matrix. + This class allows to vectorize a text corpus, by turning each + text into either a sequence of integers (each integer being the index + of a token in a dictionary) or into a vector where the coefficient + for each token could be binary, based on word count, based on tf-idf... - Args: - texts: list of strings. - mode: one of "binary", "count", "tfidf", "freq". - - Returns: - A Numpy matrix. - """ - sequences = self.texts_to_sequences(texts) - return self.sequences_to_matrix(sequences, mode=mode) + By default, all punctuation is removed, turning the texts into + space-separated sequences of words + (words may include the `'` character). These sequences are then + split into lists of tokens. They will then be indexed or vectorized. - def sequences_to_matrix(self, sequences, mode='binary'): - """Converts a list of sequences into a Numpy matrix. + `0` is a reserved index that won't be assigned to any word. Args: - sequences: list of sequences - (a sequence is a list of integer word indices). - mode: one of "binary", "count", "tfidf", "freq" - - Returns: - A Numpy matrix. - - Raises: - ValueError: In case of invalid `mode` argument, - or if the Tokenizer requires to be fit to sample data. + num_words: the maximum number of words to keep, based + on word frequency. Only the most common `num_words-1` words will + be kept. + filters: a string where each element is a character that will be + filtered from the texts. The default is all punctuation, plus + tabs and line breaks, minus the `'` character. + lower: boolean. Whether to convert the texts to lowercase. + split: str. Separator for word splitting. + char_level: if True, every character will be treated as a token. + oov_token: if given, it will be added to word_index and used to + replace out-of-vocabulary words during text_to_sequence calls + analyzer: function. Custom analyzer to split the text. + The default analyzer is text_to_word_sequence """ - if not self.num_words: - if self.word_index: - num_words = len(self.word_index) + 1 - else: - raise ValueError('Specify a dimension (`num_words` argument), ' - 'or fit on some text data first.') - else: - num_words = self.num_words - - if mode == 'tfidf' and not self.document_count: - raise ValueError('Fit the Tokenizer on some data ' - 'before using tfidf mode.') - - x = np.zeros((len(sequences), num_words)) - for i, seq in enumerate(sequences): - if not seq: - continue - counts = collections.defaultdict(int) - for j in seq: - if j >= num_words: - continue - counts[j] += 1 - for j, c in list(counts.items()): - if mode == 'count': - x[i][j] = c - elif mode == 'freq': - x[i][j] = c / len(seq) - elif mode == 'binary': - x[i][j] = 1 - elif mode == 'tfidf': - # Use weighting scheme 2 in - # https://en.wikipedia.org/wiki/Tf%E2%80%93idf - tf = 1 + np.log(c) - idf = np.log(1 + self.document_count / - (1 + self.index_docs.get(j, 0))) - x[i][j] = tf * idf - else: - raise ValueError('Unknown vectorization mode:', mode) - return x - def get_config(self): - """Returns the tokenizer configuration as Python dictionary. - - The word count dictionaries used by the tokenizer get serialized - into plain JSON, so that the configuration can be read by other - projects. + def __init__( + self, + num_words=None, + filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', + lower=True, + split=" ", + char_level=False, + oov_token=None, + analyzer=None, + **kwargs + ): + # Legacy support + if "nb_words" in kwargs: + warnings.warn( + "The `nb_words` argument in `Tokenizer` " + "has been renamed `num_words`." + ) + num_words = kwargs.pop("nb_words") + document_count = kwargs.pop("document_count", 0) + if kwargs: + raise TypeError("Unrecognized keyword arguments: " + str(kwargs)) + + self.word_counts = collections.OrderedDict() + self.word_docs = collections.defaultdict(int) + self.filters = filters + self.split = split + self.lower = lower + self.num_words = num_words + self.document_count = document_count + self.char_level = char_level + self.oov_token = oov_token + self.index_docs = collections.defaultdict(int) + self.word_index = {} + self.index_word = {} + self.analyzer = analyzer + + def fit_on_texts(self, texts): + """Updates internal vocabulary based on a list of texts. + + In the case where texts contains lists, + we assume each entry of the lists to be a token. + + Required before using `texts_to_sequences` or `texts_to_matrix`. + + Args: + texts: can be a list of strings, + a generator of strings (for memory-efficiency), + or a list of list of strings. + """ + for text in texts: + self.document_count += 1 + if self.char_level or isinstance(text, list): + if self.lower: + if isinstance(text, list): + text = [text_elem.lower() for text_elem in text] + else: + text = text.lower() + seq = text + else: + if self.analyzer is None: + seq = text_to_word_sequence( + text, + filters=self.filters, + lower=self.lower, + split=self.split, + ) + else: + seq = self.analyzer(text) + for w in seq: + if w in self.word_counts: + self.word_counts[w] += 1 + else: + self.word_counts[w] = 1 + for w in set(seq): + # In how many documents each word occurs + self.word_docs[w] += 1 + + wcounts = list(self.word_counts.items()) + wcounts.sort(key=lambda x: x[1], reverse=True) + # forcing the oov_token to index 1 if it exists + if self.oov_token is None: + sorted_voc = [] + else: + sorted_voc = [self.oov_token] + sorted_voc.extend(wc[0] for wc in wcounts) + + # note that index 0 is reserved, never assigned to an existing word + self.word_index = dict( + zip(sorted_voc, list(range(1, len(sorted_voc) + 1))) + ) + + self.index_word = {c: w for w, c in self.word_index.items()} + + for w, c in list(self.word_docs.items()): + self.index_docs[self.word_index[w]] = c + + def fit_on_sequences(self, sequences): + """Updates internal vocabulary based on a list of sequences. + + Required before using `sequences_to_matrix` + (if `fit_on_texts` was never called). + + Args: + sequences: A list of sequence. + A "sequence" is a list of integer word indices. + """ + self.document_count += len(sequences) + for seq in sequences: + seq = set(seq) + for i in seq: + self.index_docs[i] += 1 + + def texts_to_sequences(self, texts): + """Transforms each text in texts to a sequence of integers. + + Only top `num_words-1` most frequent words will be taken into account. + Only words known by the tokenizer will be taken into account. + + Args: + texts: A list of texts (strings). + + Returns: + A list of sequences. + """ + return list(self.texts_to_sequences_generator(texts)) + + def texts_to_sequences_generator(self, texts): + """Transforms each text in `texts` to a sequence of integers. + + Each item in texts can also be a list, + in which case we assume each item of that list to be a token. + + Only top `num_words-1` most frequent words will be taken into account. + Only words known by the tokenizer will be taken into account. + + Args: + texts: A list of texts (strings). + + Yields: + Yields individual sequences. + """ + num_words = self.num_words + oov_token_index = self.word_index.get(self.oov_token) + for text in texts: + if self.char_level or isinstance(text, list): + if self.lower: + if isinstance(text, list): + text = [text_elem.lower() for text_elem in text] + else: + text = text.lower() + seq = text + else: + if self.analyzer is None: + seq = text_to_word_sequence( + text, + filters=self.filters, + lower=self.lower, + split=self.split, + ) + else: + seq = self.analyzer(text) + vect = [] + for w in seq: + i = self.word_index.get(w) + if i is not None: + if num_words and i >= num_words: + if oov_token_index is not None: + vect.append(oov_token_index) + else: + vect.append(i) + elif self.oov_token is not None: + vect.append(oov_token_index) + yield vect + + def sequences_to_texts(self, sequences): + """Transforms each sequence into a list of text. + + Only top `num_words-1` most frequent words will be taken into account. + Only words known by the tokenizer will be taken into account. + + Args: + sequences: A list of sequences (list of integers). + + Returns: + A list of texts (strings) + """ + return list(self.sequences_to_texts_generator(sequences)) + + def sequences_to_texts_generator(self, sequences): + """Transforms each sequence in `sequences` to a list of texts(strings). + + Each sequence has to a list of integers. + In other words, sequences should be a list of sequences + + Only top `num_words-1` most frequent words will be taken into account. + Only words known by the tokenizer will be taken into account. + + Args: + sequences: A list of sequences. + + Yields: + Yields individual texts. + """ + num_words = self.num_words + oov_token_index = self.word_index.get(self.oov_token) + for seq in sequences: + vect = [] + for num in seq: + word = self.index_word.get(num) + if word is not None: + if num_words and num >= num_words: + if oov_token_index is not None: + vect.append(self.index_word[oov_token_index]) + else: + vect.append(word) + elif self.oov_token is not None: + vect.append(self.index_word[oov_token_index]) + vect = " ".join(vect) + yield vect + + def texts_to_matrix(self, texts, mode="binary"): + """Convert a list of texts to a Numpy matrix. + + Args: + texts: list of strings. + mode: one of "binary", "count", "tfidf", "freq". + + Returns: + A Numpy matrix. + """ + sequences = self.texts_to_sequences(texts) + return self.sequences_to_matrix(sequences, mode=mode) + + def sequences_to_matrix(self, sequences, mode="binary"): + """Converts a list of sequences into a Numpy matrix. + + Args: + sequences: list of sequences + (a sequence is a list of integer word indices). + mode: one of "binary", "count", "tfidf", "freq" + + Returns: + A Numpy matrix. + + Raises: + ValueError: In case of invalid `mode` argument, + or if the Tokenizer requires to be fit to sample data. + """ + if not self.num_words: + if self.word_index: + num_words = len(self.word_index) + 1 + else: + raise ValueError( + "Specify a dimension (`num_words` argument), " + "or fit on some text data first." + ) + else: + num_words = self.num_words + + if mode == "tfidf" and not self.document_count: + raise ValueError( + "Fit the Tokenizer on some data before using tfidf mode." + ) + + x = np.zeros((len(sequences), num_words)) + for i, seq in enumerate(sequences): + if not seq: + continue + counts = collections.defaultdict(int) + for j in seq: + if j >= num_words: + continue + counts[j] += 1 + for j, c in list(counts.items()): + if mode == "count": + x[i][j] = c + elif mode == "freq": + x[i][j] = c / len(seq) + elif mode == "binary": + x[i][j] = 1 + elif mode == "tfidf": + # Use weighting scheme 2 in + # https://en.wikipedia.org/wiki/Tf%E2%80%93idf + tf = 1 + np.log(c) + idf = np.log( + 1 + + self.document_count / (1 + self.index_docs.get(j, 0)) + ) + x[i][j] = tf * idf + else: + raise ValueError("Unknown vectorization mode:", mode) + return x + + def get_config(self): + """Returns the tokenizer configuration as Python dictionary. + + The word count dictionaries used by the tokenizer get serialized + into plain JSON, so that the configuration can be read by other + projects. + + Returns: + A Python dictionary with the tokenizer configuration. + """ + json_word_counts = json.dumps(self.word_counts) + json_word_docs = json.dumps(self.word_docs) + json_index_docs = json.dumps(self.index_docs) + json_word_index = json.dumps(self.word_index) + json_index_word = json.dumps(self.index_word) + + return { + "num_words": self.num_words, + "filters": self.filters, + "lower": self.lower, + "split": self.split, + "char_level": self.char_level, + "oov_token": self.oov_token, + "document_count": self.document_count, + "word_counts": json_word_counts, + "word_docs": json_word_docs, + "index_docs": json_index_docs, + "index_word": json_index_word, + "word_index": json_word_index, + } + + def to_json(self, **kwargs): + """Returns a JSON string containing the tokenizer configuration. + + To load a tokenizer from a JSON string, use + `keras.preprocessing.text.tokenizer_from_json(json_string)`. + + Args: + **kwargs: Additional keyword arguments + to be passed to `json.dumps()`. + + Returns: + A JSON string containing the tokenizer configuration. + """ + config = self.get_config() + tokenizer_config = { + "class_name": self.__class__.__name__, + "config": config, + } + return json.dumps(tokenizer_config, **kwargs) + + +@keras_export("keras.preprocessing.text.tokenizer_from_json") +def tokenizer_from_json(json_string): + """Parses a JSON tokenizer configuration and returns a tokenizer instance. - Returns: - A Python dictionary with the tokenizer configuration. - """ - json_word_counts = json.dumps(self.word_counts) - json_word_docs = json.dumps(self.word_docs) - json_index_docs = json.dumps(self.index_docs) - json_word_index = json.dumps(self.word_index) - json_index_word = json.dumps(self.index_word) - - return { - 'num_words': self.num_words, - 'filters': self.filters, - 'lower': self.lower, - 'split': self.split, - 'char_level': self.char_level, - 'oov_token': self.oov_token, - 'document_count': self.document_count, - 'word_counts': json_word_counts, - 'word_docs': json_word_docs, - 'index_docs': json_index_docs, - 'index_word': json_index_word, - 'word_index': json_word_index - } - - def to_json(self, **kwargs): - """Returns a JSON string containing the tokenizer configuration. - - To load a tokenizer from a JSON string, use - `keras.preprocessing.text.tokenizer_from_json(json_string)`. + Deprecated: `tf.keras.preprocessing.text.Tokenizer` does not operate on + tensors and is not recommended for new code. Prefer + `tf.keras.layers.TextVectorization` which provides equivalent functionality + through a layer which accepts `tf.Tensor` input. See the + [text loading tutorial](https://www.tensorflow.org/tutorials/load_data/text) + for an overview of the layer and text handling in tensorflow. Args: - **kwargs: Additional keyword arguments - to be passed to `json.dumps()`. + json_string: JSON string encoding a tokenizer configuration. Returns: - A JSON string containing the tokenizer configuration. + A Keras Tokenizer instance """ - config = self.get_config() - tokenizer_config = {'class_name': self.__class__.__name__, 'config': config} - return json.dumps(tokenizer_config, **kwargs) - - -@keras_export('keras.preprocessing.text.tokenizer_from_json') -def tokenizer_from_json(json_string): - """Parses a JSON tokenizer configuration and returns a tokenizer instance. - - Deprecated: `tf.keras.preprocessing.text.Tokenizer` does not operate on - tensors and is not recommended for new code. Prefer - `tf.keras.layers.TextVectorization` which provides equivalent functionality - through a layer which accepts `tf.Tensor` input. See the - [text loading tutorial](https://www.tensorflow.org/tutorials/load_data/text) - for an overview of the layer and text handling in tensorflow. - - Args: - json_string: JSON string encoding a tokenizer configuration. - - Returns: - A Keras Tokenizer instance - """ - tokenizer_config = json.loads(json_string) - config = tokenizer_config.get('config') - - word_counts = json.loads(config.pop('word_counts')) - word_docs = json.loads(config.pop('word_docs')) - index_docs = json.loads(config.pop('index_docs')) - # Integer indexing gets converted to strings with json.dumps() - index_docs = {int(k): v for k, v in index_docs.items()} - index_word = json.loads(config.pop('index_word')) - index_word = {int(k): v for k, v in index_word.items()} - word_index = json.loads(config.pop('word_index')) - - tokenizer = Tokenizer(**config) - tokenizer.word_counts = word_counts - tokenizer.word_docs = word_docs - tokenizer.index_docs = index_docs - tokenizer.word_index = word_index - tokenizer.index_word = index_word - return tokenizer + tokenizer_config = json.loads(json_string) + config = tokenizer_config.get("config") + + word_counts = json.loads(config.pop("word_counts")) + word_docs = json.loads(config.pop("word_docs")) + index_docs = json.loads(config.pop("index_docs")) + # Integer indexing gets converted to strings with json.dumps() + index_docs = {int(k): v for k, v in index_docs.items()} + index_word = json.loads(config.pop("index_word")) + index_word = {int(k): v for k, v in index_word.items()} + word_index = json.loads(config.pop("word_index")) + + tokenizer = Tokenizer(**config) + tokenizer.word_counts = word_counts + tokenizer.word_docs = word_docs + tokenizer.index_docs = index_docs + tokenizer.word_index = word_index + tokenizer.index_word = index_word + return tokenizer diff --git a/keras/preprocessing/text_test.py b/keras/preprocessing/text_test.py index 7edbe05f4415..a73e81ccc620 100644 --- a/keras/preprocessing/text_test.py +++ b/keras/preprocessing/text_test.py @@ -17,281 +17,332 @@ import collections -from keras.preprocessing import text import numpy as np import tensorflow.compat.v2 as tf +from keras.preprocessing import text + class TestText(tf.test.TestCase): - - def test_one_hot(self): - sample_text = 'The cat sat on the mat.' - encoded = text.one_hot(sample_text, 5) - self.assertLen(encoded, 6) - self.assertLessEqual(np.max(encoded), 4) - self.assertGreaterEqual(np.min(encoded), 0) - - sample_text = 'The-cat-sat-on-the-mat' - encoded2 = text.one_hot( - sample_text, 5, analyzer=lambda t: t.lower().split('-')) - self.assertEqual(encoded, encoded2) - self.assertLen(encoded, 6) - self.assertLessEqual(np.max(encoded), 4) - self.assertGreaterEqual(np.min(encoded), 0) - - def test_hashing_trick_hash(self): - sample_text = 'The cat sat on the mat.' - encoded = text.hashing_trick(sample_text, 5) - self.assertLen(encoded, 6) - self.assertLessEqual(np.max(encoded), 4) - self.assertGreaterEqual(np.min(encoded), 1) - - def test_hashing_trick_md5(self): - sample_text = 'The cat sat on the mat.' - encoded = text.hashing_trick(sample_text, 5, hash_function='md5') - self.assertLen(encoded, 6) - self.assertLessEqual(np.max(encoded), 4) - self.assertGreaterEqual(np.min(encoded), 1) - - def test_tokenizer(self): - sample_texts = [ - 'The cat sat on the mat.', 'The dog sat on the log.', - 'Dogs and cats living together.' - ] - tokenizer = text.Tokenizer(num_words=10) - tokenizer.fit_on_texts(sample_texts) - - sequences = [] - for seq in tokenizer.texts_to_sequences_generator(sample_texts): - sequences.append(seq) - self.assertLess(np.max(np.max(sequences)), 10) - self.assertEqual(np.min(np.min(sequences)), 1) - - tokenizer.fit_on_sequences(sequences) - - for mode in ['binary', 'count', 'tfidf', 'freq']: - tokenizer.texts_to_matrix(sample_texts, mode) - - def test_tokenizer_serde_no_fitting(self): - tokenizer = text.Tokenizer(num_words=100) - - tokenizer_json = tokenizer.to_json() - recovered = text.tokenizer_from_json(tokenizer_json) - - self.assertEqual(tokenizer.get_config(), recovered.get_config()) - - self.assertEqual(tokenizer.word_docs, recovered.word_docs) - self.assertEqual(tokenizer.word_counts, recovered.word_counts) - self.assertEqual(tokenizer.word_index, recovered.word_index) - self.assertEqual(tokenizer.index_word, recovered.index_word) - self.assertEqual(tokenizer.index_docs, recovered.index_docs) - - def test_tokenizer_serde_fitting(self): - sample_texts = [ - 'There was a time that the pieces fit, but I watched them fall away', - 'Mildewed and smoldering, strangled by our coveting', - 'I\'ve done the math enough to know the dangers of our second guessing' - ] - tokenizer = text.Tokenizer(num_words=100) - tokenizer.fit_on_texts(sample_texts) - - seq_generator = tokenizer.texts_to_sequences_generator(sample_texts) - sequences = [seq for seq in seq_generator] - tokenizer.fit_on_sequences(sequences) - - tokenizer_json = tokenizer.to_json() - recovered = text.tokenizer_from_json(tokenizer_json) - - self.assertEqual(tokenizer.char_level, recovered.char_level) - self.assertEqual(tokenizer.document_count, recovered.document_count) - self.assertEqual(tokenizer.filters, recovered.filters) - self.assertEqual(tokenizer.lower, recovered.lower) - self.assertEqual(tokenizer.num_words, recovered.num_words) - self.assertEqual(tokenizer.oov_token, recovered.oov_token) - - self.assertEqual(tokenizer.word_docs, recovered.word_docs) - self.assertEqual(tokenizer.word_counts, recovered.word_counts) - self.assertEqual(tokenizer.word_index, recovered.word_index) - self.assertEqual(tokenizer.index_word, recovered.index_word) - self.assertEqual(tokenizer.index_docs, recovered.index_docs) - - def test_sequential_fit(self): - texts = [ - 'The cat sat on the mat.', 'The dog sat on the log.', - 'Dogs and cats living together.' - ] - word_sequences = [['The', 'cat', 'is', 'sitting'], - ['The', 'dog', 'is', 'standing']] - - tokenizer = text.Tokenizer() - tokenizer.fit_on_texts(texts) - tokenizer.fit_on_texts(word_sequences) - - self.assertEqual(tokenizer.document_count, 5) - - tokenizer.texts_to_matrix(texts) - tokenizer.texts_to_matrix(word_sequences) - - def test_text_to_word_sequence(self): - sample_text = 'hello! ? world!' - self.assertEqual( - text.text_to_word_sequence(sample_text), ['hello', 'world']) - - def test_text_to_word_sequence_multichar_split(self): - sample_text = 'hello!stop?world!' - self.assertEqual( - text.text_to_word_sequence(sample_text, split='stop'), - ['hello', 'world']) - - def test_text_to_word_sequence_unicode(self): - sample_text = u'ali! veli? kırk dokuz elli' - self.assertEqual( - text.text_to_word_sequence(sample_text), - [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) - - def test_text_to_word_sequence_unicode_multichar_split(self): - sample_text = u'ali!stopveli?stopkırkstopdokuzstopelli' - self.assertEqual( - text.text_to_word_sequence(sample_text, split='stop'), - [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) - - def test_tokenizer_unicode(self): - sample_texts = [ - u'ali veli kırk dokuz elli', u'ali veli kırk dokuz elli veli kırk dokuz' - ] - tokenizer = text.Tokenizer(num_words=5) - tokenizer.fit_on_texts(sample_texts) - - self.assertLen(tokenizer.word_counts, 5) - - def test_tokenizer_oov_flag(self): - """Test of Out of Vocabulary (OOV) flag in text.Tokenizer.""" - x_train = ['This text has only known words'] - x_test = ['This text has some unknown words'] # 2 OOVs: some, unknown - - # Default, without OOV flag - tokenizer = text.Tokenizer() - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - self.assertLen(x_test_seq[0], 4) # discards 2 OOVs - - # With OOV feature - tokenizer = text.Tokenizer(oov_token='') - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - self.assertLen(x_test_seq[0], 6) # OOVs marked in place - - def test_tokenizer_oov_flag_and_num_words(self): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = text.Tokenizer(num_words=3, oov_token='') - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = ' '.join(tokenizer.index_word[t] for t in x_test_seq[0]) - self.assertLen(x_test_seq[0], 6) - self.assertEqual(trans_text, 'this ') - - def test_sequences_to_texts_with_num_words_and_oov_token(self): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = text.Tokenizer(num_words=3, oov_token='') - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - self.assertEqual(trans_text, ['this ']) - - def test_sequences_to_texts_no_num_words(self): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = text.Tokenizer(oov_token='') - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - self.assertEqual(trans_text, ['this text has words']) - - def test_sequences_to_texts_no_oov_token(self): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = text.Tokenizer(num_words=3) - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - self.assertEqual(trans_text, ['this text']) - - def test_sequences_to_texts_no_num_words_no_oov_token(self): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = text.Tokenizer() - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - self.assertEqual(trans_text, ['this text has words']) - - def test_sequences_to_texts(self): - texts = [ - 'The cat sat on the mat.', 'The dog sat on the log.', - 'Dogs and cats living together.' - ] - tokenizer = text.Tokenizer(num_words=10, oov_token='') - tokenizer.fit_on_texts(texts) - tokenized_text = tokenizer.texts_to_sequences(texts) - trans_text = tokenizer.sequences_to_texts(tokenized_text) - self.assertEqual(trans_text, [ - 'the cat sat on the mat', 'the dog sat on the log', - 'dogs ' - ]) - - def test_tokenizer_lower_flag(self): - """Tests for `lower` flag in text.Tokenizer.""" - # word level tokenizer with sentences as texts - word_tokenizer = text.Tokenizer(lower=True) - texts = [ - 'The cat sat on the mat.', 'The dog sat on the log.', - 'Dog and Cat living Together.' - ] - word_tokenizer.fit_on_texts(texts) - expected_word_counts = collections.OrderedDict([('the', 4), ('cat', 2), - ('sat', 2), ('on', 2), - ('mat', 1), ('dog', 2), - ('log', 1), ('and', 1), - ('living', 1), - ('together', 1)]) - self.assertEqual(word_tokenizer.word_counts, expected_word_counts) - - # word level tokenizer with word_sequences as texts - word_tokenizer = text.Tokenizer(lower=True) - word_sequences = [['The', 'cat', 'is', 'sitting'], - ['The', 'dog', 'is', 'standing']] - word_tokenizer.fit_on_texts(word_sequences) - expected_word_counts = collections.OrderedDict([('the', 2), ('cat', 1), - ('is', 2), ('sitting', 1), - ('dog', 1), - ('standing', 1)]) - self.assertEqual(word_tokenizer.word_counts, expected_word_counts) - - # char level tokenizer with sentences as texts - char_tokenizer = text.Tokenizer(lower=True, char_level=True) - texts = [ - 'The cat sat on the mat.', 'The dog sat on the log.', - 'Dog and Cat living Together.' - ] - char_tokenizer.fit_on_texts(texts) - expected_word_counts = collections.OrderedDict([ - ('t', 11), ('h', 5), ('e', 6), (' ', 14), ('c', 2), ('a', 6), ('s', 2), - ('o', 6), ('n', 4), ('m', 1), ('.', 3), ('d', 3), ('g', 5), ('l', 2), - ('i', 2), ('v', 1), ('r', 1) - ]) - self.assertEqual(char_tokenizer.word_counts, expected_word_counts) - - -if __name__ == '__main__': - tf.test.main() + def test_one_hot(self): + sample_text = "The cat sat on the mat." + encoded = text.one_hot(sample_text, 5) + self.assertLen(encoded, 6) + self.assertLessEqual(np.max(encoded), 4) + self.assertGreaterEqual(np.min(encoded), 0) + + sample_text = "The-cat-sat-on-the-mat" + encoded2 = text.one_hot( + sample_text, 5, analyzer=lambda t: t.lower().split("-") + ) + self.assertEqual(encoded, encoded2) + self.assertLen(encoded, 6) + self.assertLessEqual(np.max(encoded), 4) + self.assertGreaterEqual(np.min(encoded), 0) + + def test_hashing_trick_hash(self): + sample_text = "The cat sat on the mat." + encoded = text.hashing_trick(sample_text, 5) + self.assertLen(encoded, 6) + self.assertLessEqual(np.max(encoded), 4) + self.assertGreaterEqual(np.min(encoded), 1) + + def test_hashing_trick_md5(self): + sample_text = "The cat sat on the mat." + encoded = text.hashing_trick(sample_text, 5, hash_function="md5") + self.assertLen(encoded, 6) + self.assertLessEqual(np.max(encoded), 4) + self.assertGreaterEqual(np.min(encoded), 1) + + def test_tokenizer(self): + sample_texts = [ + "The cat sat on the mat.", + "The dog sat on the log.", + "Dogs and cats living together.", + ] + tokenizer = text.Tokenizer(num_words=10) + tokenizer.fit_on_texts(sample_texts) + + sequences = [] + for seq in tokenizer.texts_to_sequences_generator(sample_texts): + sequences.append(seq) + self.assertLess(np.max(np.max(np.asarray(sequences, dtype=object))), 10) + self.assertEqual(np.min(np.min(np.asarray(sequences, dtype=object))), 1) + + tokenizer.fit_on_sequences(sequences) + + for mode in ["binary", "count", "tfidf", "freq"]: + tokenizer.texts_to_matrix(sample_texts, mode) + + def test_tokenizer_serde_no_fitting(self): + tokenizer = text.Tokenizer(num_words=100) + + tokenizer_json = tokenizer.to_json() + recovered = text.tokenizer_from_json(tokenizer_json) + + self.assertEqual(tokenizer.get_config(), recovered.get_config()) + + self.assertEqual(tokenizer.word_docs, recovered.word_docs) + self.assertEqual(tokenizer.word_counts, recovered.word_counts) + self.assertEqual(tokenizer.word_index, recovered.word_index) + self.assertEqual(tokenizer.index_word, recovered.index_word) + self.assertEqual(tokenizer.index_docs, recovered.index_docs) + + def test_tokenizer_serde_fitting(self): + sample_texts = [ + "There was a time that the pieces fit, but I watched " + "them fall away", + "Mildewed and smoldering, strangled by our coveting", + "I've done the math enough to know the dangers of our second " + "guessing", + ] + tokenizer = text.Tokenizer(num_words=100) + tokenizer.fit_on_texts(sample_texts) + + seq_generator = tokenizer.texts_to_sequences_generator(sample_texts) + sequences = [seq for seq in seq_generator] + tokenizer.fit_on_sequences(sequences) + + tokenizer_json = tokenizer.to_json() + recovered = text.tokenizer_from_json(tokenizer_json) + + self.assertEqual(tokenizer.char_level, recovered.char_level) + self.assertEqual(tokenizer.document_count, recovered.document_count) + self.assertEqual(tokenizer.filters, recovered.filters) + self.assertEqual(tokenizer.lower, recovered.lower) + self.assertEqual(tokenizer.num_words, recovered.num_words) + self.assertEqual(tokenizer.oov_token, recovered.oov_token) + + self.assertEqual(tokenizer.word_docs, recovered.word_docs) + self.assertEqual(tokenizer.word_counts, recovered.word_counts) + self.assertEqual(tokenizer.word_index, recovered.word_index) + self.assertEqual(tokenizer.index_word, recovered.index_word) + self.assertEqual(tokenizer.index_docs, recovered.index_docs) + + def test_sequential_fit(self): + texts = [ + "The cat sat on the mat.", + "The dog sat on the log.", + "Dogs and cats living together.", + ] + word_sequences = [ + ["The", "cat", "is", "sitting"], + ["The", "dog", "is", "standing"], + ] + + tokenizer = text.Tokenizer() + tokenizer.fit_on_texts(texts) + tokenizer.fit_on_texts(word_sequences) + + self.assertEqual(tokenizer.document_count, 5) + + tokenizer.texts_to_matrix(texts) + tokenizer.texts_to_matrix(word_sequences) + + def test_text_to_word_sequence(self): + sample_text = "hello! ? world!" + self.assertEqual( + text.text_to_word_sequence(sample_text), ["hello", "world"] + ) + + def test_text_to_word_sequence_multichar_split(self): + sample_text = "hello!stop?world!" + self.assertEqual( + text.text_to_word_sequence(sample_text, split="stop"), + ["hello", "world"], + ) + + def test_text_to_word_sequence_unicode(self): + sample_text = "ali! veli? kırk dokuz elli" + self.assertEqual( + text.text_to_word_sequence(sample_text), + ["ali", "veli", "kırk", "dokuz", "elli"], + ) + + def test_text_to_word_sequence_unicode_multichar_split(self): + sample_text = "ali!stopveli?stopkırkstopdokuzstopelli" + self.assertEqual( + text.text_to_word_sequence(sample_text, split="stop"), + ["ali", "veli", "kırk", "dokuz", "elli"], + ) + + def test_tokenizer_unicode(self): + sample_texts = [ + "ali veli kırk dokuz elli", + "ali veli kırk dokuz elli veli kırk dokuz", + ] + tokenizer = text.Tokenizer(num_words=5) + tokenizer.fit_on_texts(sample_texts) + + self.assertLen(tokenizer.word_counts, 5) + + def test_tokenizer_oov_flag(self): + """Test of Out of Vocabulary (OOV) flag in text.Tokenizer.""" + x_train = ["This text has only known words"] + x_test = ["This text has some unknown words"] # 2 OOVs: some, unknown + + # Default, without OOV flag + tokenizer = text.Tokenizer() + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + self.assertLen(x_test_seq[0], 4) # discards 2 OOVs + + # With OOV feature + tokenizer = text.Tokenizer(oov_token="") + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + self.assertLen(x_test_seq[0], 6) # OOVs marked in place + + def test_tokenizer_oov_flag_and_num_words(self): + x_train = ["This text has only known words this text"] + x_test = ["This text has some unknown words"] + + tokenizer = text.Tokenizer(num_words=3, oov_token="") + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + trans_text = " ".join(tokenizer.index_word[t] for t in x_test_seq[0]) + self.assertLen(x_test_seq[0], 6) + self.assertEqual(trans_text, "this ") + + def test_sequences_to_texts_with_num_words_and_oov_token(self): + x_train = ["This text has only known words this text"] + x_test = ["This text has some unknown words"] + + tokenizer = text.Tokenizer(num_words=3, oov_token="") + + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + trans_text = tokenizer.sequences_to_texts(x_test_seq) + self.assertEqual(trans_text, ["this "]) + + def test_sequences_to_texts_no_num_words(self): + x_train = ["This text has only known words this text"] + x_test = ["This text has some unknown words"] + + tokenizer = text.Tokenizer(oov_token="") + + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + trans_text = tokenizer.sequences_to_texts(x_test_seq) + self.assertEqual(trans_text, ["this text has words"]) + + def test_sequences_to_texts_no_oov_token(self): + x_train = ["This text has only known words this text"] + x_test = ["This text has some unknown words"] + + tokenizer = text.Tokenizer(num_words=3) + + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + trans_text = tokenizer.sequences_to_texts(x_test_seq) + self.assertEqual(trans_text, ["this text"]) + + def test_sequences_to_texts_no_num_words_no_oov_token(self): + x_train = ["This text has only known words this text"] + x_test = ["This text has some unknown words"] + + tokenizer = text.Tokenizer() + + tokenizer.fit_on_texts(x_train) + x_test_seq = tokenizer.texts_to_sequences(x_test) + trans_text = tokenizer.sequences_to_texts(x_test_seq) + self.assertEqual(trans_text, ["this text has words"]) + + def test_sequences_to_texts(self): + texts = [ + "The cat sat on the mat.", + "The dog sat on the log.", + "Dogs and cats living together.", + ] + tokenizer = text.Tokenizer(num_words=10, oov_token="") + tokenizer.fit_on_texts(texts) + tokenized_text = tokenizer.texts_to_sequences(texts) + trans_text = tokenizer.sequences_to_texts(tokenized_text) + self.assertEqual( + trans_text, + [ + "the cat sat on the mat", + "the dog sat on the log", + "dogs ", + ], + ) + + def test_tokenizer_lower_flag(self): + """Tests for `lower` flag in text.Tokenizer.""" + # word level tokenizer with sentences as texts + word_tokenizer = text.Tokenizer(lower=True) + texts = [ + "The cat sat on the mat.", + "The dog sat on the log.", + "Dog and Cat living Together.", + ] + word_tokenizer.fit_on_texts(texts) + expected_word_counts = collections.OrderedDict( + [ + ("the", 4), + ("cat", 2), + ("sat", 2), + ("on", 2), + ("mat", 1), + ("dog", 2), + ("log", 1), + ("and", 1), + ("living", 1), + ("together", 1), + ] + ) + self.assertEqual(word_tokenizer.word_counts, expected_word_counts) + + # word level tokenizer with word_sequences as texts + word_tokenizer = text.Tokenizer(lower=True) + word_sequences = [ + ["The", "cat", "is", "sitting"], + ["The", "dog", "is", "standing"], + ] + word_tokenizer.fit_on_texts(word_sequences) + expected_word_counts = collections.OrderedDict( + [ + ("the", 2), + ("cat", 1), + ("is", 2), + ("sitting", 1), + ("dog", 1), + ("standing", 1), + ] + ) + self.assertEqual(word_tokenizer.word_counts, expected_word_counts) + + # char level tokenizer with sentences as texts + char_tokenizer = text.Tokenizer(lower=True, char_level=True) + texts = [ + "The cat sat on the mat.", + "The dog sat on the log.", + "Dog and Cat living Together.", + ] + char_tokenizer.fit_on_texts(texts) + expected_word_counts = collections.OrderedDict( + [ + ("t", 11), + ("h", 5), + ("e", 6), + (" ", 14), + ("c", 2), + ("a", 6), + ("s", 2), + ("o", 6), + ("n", 4), + ("m", 1), + (".", 3), + ("d", 3), + ("g", 5), + ("l", 2), + ("i", 2), + ("v", 1), + ("r", 1), + ] + ) + self.assertEqual(char_tokenizer.word_counts, expected_word_counts) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/protobuf/BUILD b/keras/protobuf/BUILD index 413dcb74d90b..e2f9c1f3ba70 100644 --- a/keras/protobuf/BUILD +++ b/keras/protobuf/BUILD @@ -4,6 +4,7 @@ load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", ], diff --git a/keras/regularizers.py b/keras/regularizers.py index 627f481c3eb8..763b99097000 100644 --- a/keras/regularizers.py +++ b/keras/regularizers.py @@ -13,386 +13,403 @@ # limitations under the License. # ============================================================================== """Built-in regularizers.""" -# pylint: disable=g-classes-have-attributes -# pylint: disable=invalid-name -import tensorflow.compat.v2 as tf import math +import warnings + +import tensorflow.compat.v2 as tf from keras import backend -from keras.utils.generic_utils import deserialize_keras_object -from keras.utils.generic_utils import serialize_keras_object +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object + +# isort: off from tensorflow.python.util.tf_export import keras_export def _check_penalty_number(x): - """check penalty number availability, raise ValueError if failed.""" - if not isinstance(x, (float, int)): - raise ValueError( - f'Value {x} is not a valid regularization penalty number, ' - 'expected an int or float value.') + """check penalty number availability, raise ValueError if failed.""" + if not isinstance(x, (float, int)): + raise ValueError( + f"Value {x} is not a valid regularization penalty number, " + "expected an int or float value." + ) - if math.isinf(x) or math.isnan(x): - raise ValueError( - f'Value {x} is not a valid regularization penalty number, ' - 'an infinite number or NaN are not valid values.') + if math.isinf(x) or math.isnan(x): + raise ValueError( + f"Value {x} is not a valid regularization penalty number, " + "an infinite number or NaN are not valid values." + ) def _none_to_default(inputs, default): - return default if inputs is None else default + return default if inputs is None else default -@keras_export('keras.regularizers.Regularizer') +@keras_export("keras.regularizers.Regularizer") class Regularizer: - """Regularizer base class. - - Regularizers allow you to apply penalties on layer parameters or layer - activity during optimization. These penalties are summed into the loss - function that the network optimizes. - - Regularization penalties are applied on a per-layer basis. The exact API will - depend on the layer, but many layers (e.g. `Dense`, `Conv1D`, `Conv2D` and - `Conv3D`) have a unified API. - - These layers expose 3 keyword arguments: - - - `kernel_regularizer`: Regularizer to apply a penalty on the layer's kernel - - `bias_regularizer`: Regularizer to apply a penalty on the layer's bias - - `activity_regularizer`: Regularizer to apply a penalty on the layer's output - - All layers (including custom layers) expose `activity_regularizer` as a - settable property, whether or not it is in the constructor arguments. - - The value returned by the `activity_regularizer` is divided by the input - batch size so that the relative weighting between the weight regularizers and - the activity regularizers does not change with the batch size. - - You can access a layer's regularization penalties by calling `layer.losses` - after calling the layer on inputs. - - ## Example - - >>> layer = tf.keras.layers.Dense( - ... 5, input_dim=5, - ... kernel_initializer='ones', - ... kernel_regularizer=tf.keras.regularizers.L1(0.01), - ... activity_regularizer=tf.keras.regularizers.L2(0.01)) - >>> tensor = tf.ones(shape=(5, 5)) * 2.0 - >>> out = layer(tensor) - - >>> # The kernel regularization term is 0.25 - >>> # The activity regularization term (after dividing by the batch size) is 5 - >>> tf.math.reduce_sum(layer.losses) - - - ## Available penalties - - ```python - tf.keras.regularizers.L1(0.3) # L1 Regularization Penalty - tf.keras.regularizers.L2(0.1) # L2 Regularization Penalty - tf.keras.regularizers.L1L2(l1=0.01, l2=0.01) # L1 + L2 penalties - ``` - - ## Directly calling a regularizer - - Compute a regularization loss on a tensor by directly calling a regularizer - as if it is a one-argument function. - - E.g. - >>> regularizer = tf.keras.regularizers.L2(2.) - >>> tensor = tf.ones(shape=(5, 5)) - >>> regularizer(tensor) - - - - ## Developing new regularizers - - Any function that takes in a weight matrix and returns a scalar - tensor can be used as a regularizer, e.g.: - - >>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l1') - ... def l1_reg(weight_matrix): - ... return 0.01 * tf.math.reduce_sum(tf.math.abs(weight_matrix)) - ... - >>> layer = tf.keras.layers.Dense(5, input_dim=5, - ... kernel_initializer='ones', kernel_regularizer=l1_reg) - >>> tensor = tf.ones(shape=(5, 5)) - >>> out = layer(tensor) - >>> layer.losses - [] - - Alternatively, you can write your custom regularizers in an - object-oriented way by extending this regularizer base class, e.g.: - - >>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l2') - ... class L2Regularizer(tf.keras.regularizers.Regularizer): - ... def __init__(self, l2=0.): # pylint: disable=redefined-outer-name - ... self.l2 = l2 - ... - ... def __call__(self, x): - ... return self.l2 * tf.math.reduce_sum(tf.math.square(x)) - ... - ... def get_config(self): - ... return {'l2': float(self.l2)} - ... - >>> layer = tf.keras.layers.Dense( - ... 5, input_dim=5, kernel_initializer='ones', - ... kernel_regularizer=L2Regularizer(l2=0.5)) - - >>> tensor = tf.ones(shape=(5, 5)) - >>> out = layer(tensor) - >>> layer.losses - [] - - ### A note on serialization and deserialization: - - Registering the regularizers as serializable is optional if you are just - training and executing models, exporting to and from SavedModels, or saving - and loading weight checkpoints. - - Registration is required for saving and - loading models to HDF5 format, Keras model cloning, some visualization - utilities, and exporting models to and from JSON. If using this functionality, - you must make sure any python process running your model has also defined - and registered your custom regularizer. - """ - - def __call__(self, x): - """Compute a regularization penalty from an input tensor.""" - return 0. - - @classmethod - def from_config(cls, config): - """Creates a regularizer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same regularizer from the config - dictionary. - - This method is used by Keras `model_to_estimator`, saving and - loading models to HDF5 formats, Keras model cloning, some visualization - utilities, and exporting models to and from JSON. + """Regularizer base class. + + Regularizers allow you to apply penalties on layer parameters or layer + activity during optimization. These penalties are summed into the loss + function that the network optimizes. + + Regularization penalties are applied on a per-layer basis. The exact API + will depend on the layer, but many layers (e.g. `Dense`, `Conv1D`, `Conv2D` + and `Conv3D`) have a unified API. + + These layers expose 3 keyword arguments: + + - `kernel_regularizer`: Regularizer to apply a penalty on the layer's kernel + - `bias_regularizer`: Regularizer to apply a penalty on the layer's bias + - `activity_regularizer`: Regularizer to apply a penalty on the layer's + output + + All layers (including custom layers) expose `activity_regularizer` as a + settable property, whether or not it is in the constructor arguments. + + The value returned by the `activity_regularizer` is divided by the input + batch size so that the relative weighting between the weight regularizers + and the activity regularizers does not change with the batch size. + + You can access a layer's regularization penalties by calling `layer.losses` + after calling the layer on inputs. + + ## Example + + >>> layer = tf.keras.layers.Dense( + ... 5, input_dim=5, + ... kernel_initializer='ones', + ... kernel_regularizer=tf.keras.regularizers.L1(0.01), + ... activity_regularizer=tf.keras.regularizers.L2(0.01)) + >>> tensor = tf.ones(shape=(5, 5)) * 2.0 + >>> out = layer(tensor) + + >>> # The kernel regularization term is 0.25 + >>> # The activity regularization term (after dividing by the batch size) + >>> # is 5 + >>> tf.math.reduce_sum(layer.losses) + + + ## Available penalties + + ```python + tf.keras.regularizers.L1(0.3) # L1 Regularization Penalty + tf.keras.regularizers.L2(0.1) # L2 Regularization Penalty + tf.keras.regularizers.L1L2(l1=0.01, l2=0.01) # L1 + L2 penalties + ``` + + ## Directly calling a regularizer + + Compute a regularization loss on a tensor by directly calling a regularizer + as if it is a one-argument function. + + E.g. + >>> regularizer = tf.keras.regularizers.L2(2.) + >>> tensor = tf.ones(shape=(5, 5)) + >>> regularizer(tensor) + + + + ## Developing new regularizers + + Any function that takes in a weight matrix and returns a scalar + tensor can be used as a regularizer, e.g.: + + >>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l1') + ... def l1_reg(weight_matrix): + ... return 0.01 * tf.math.reduce_sum(tf.math.abs(weight_matrix)) + ... + >>> layer = tf.keras.layers.Dense(5, input_dim=5, + ... kernel_initializer='ones', kernel_regularizer=l1_reg) + >>> tensor = tf.ones(shape=(5, 5)) + >>> out = layer(tensor) + >>> layer.losses + [] + + Alternatively, you can write your custom regularizers in an + object-oriented way by extending this regularizer base class, e.g.: + + >>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l2') + ... class L2Regularizer(tf.keras.regularizers.Regularizer): + ... def __init__(self, l2=0.): + ... self.l2 = l2 + ... + ... def __call__(self, x): + ... return self.l2 * tf.math.reduce_sum(tf.math.square(x)) + ... + ... def get_config(self): + ... return {'l2': float(self.l2)} + ... + >>> layer = tf.keras.layers.Dense( + ... 5, input_dim=5, kernel_initializer='ones', + ... kernel_regularizer=L2Regularizer(l2=0.5)) + + >>> tensor = tf.ones(shape=(5, 5)) + >>> out = layer(tensor) + >>> layer.losses + [] + + ### A note on serialization and deserialization: + + Registering the regularizers as serializable is optional if you are just + training and executing models, exporting to and from SavedModels, or saving + and loading weight checkpoints. + + Registration is required for saving and + loading models to HDF5 format, Keras model cloning, some visualization + utilities, and exporting models to and from JSON. If using this + functionality, you must make sure any python process running your model has + also defined and registered your custom regularizer. + """ - Args: - config: A Python dictionary, typically the output of get_config. + def __call__(self, x): + """Compute a regularization penalty from an input tensor.""" + return 0.0 - Returns: - A regularizer instance. - """ - return cls(**config) + @classmethod + def from_config(cls, config): + """Creates a regularizer from its config. - def get_config(self): - """Returns the config of the regularizer. + This method is the reverse of `get_config`, + capable of instantiating the same regularizer from the config + dictionary. - An regularizer config is a Python dictionary (serializable) - containing all configuration parameters of the regularizer. - The same regularizer can be reinstantiated later - (without any saved state) from this configuration. + This method is used by Keras `model_to_estimator`, saving and + loading models to HDF5 formats, Keras model cloning, some visualization + utilities, and exporting models to and from JSON. - This method is optional if you are just training and executing models, - exporting to and from SavedModels, or using weight checkpoints. + Args: + config: A Python dictionary, typically the output of get_config. - This method is required for Keras `model_to_estimator`, saving and - loading models to HDF5 formats, Keras model cloning, some visualization - utilities, and exporting models to and from JSON. + Returns: + A regularizer instance. + """ + return cls(**config) - Returns: - Python dictionary. - """ - raise NotImplementedError(f'{self} does not implement get_config()') + def get_config(self): + """Returns the config of the regularizer. + + An regularizer config is a Python dictionary (serializable) + containing all configuration parameters of the regularizer. + The same regularizer can be reinstantiated later + (without any saved state) from this configuration. + + This method is optional if you are just training and executing models, + exporting to and from SavedModels, or using weight checkpoints. + This method is required for Keras `model_to_estimator`, saving and + loading models to HDF5 formats, Keras model cloning, some visualization + utilities, and exporting models to and from JSON. -@keras_export('keras.regularizers.L1L2') + Returns: + Python dictionary. + """ + raise NotImplementedError(f"{self} does not implement get_config()") + + +@keras_export("keras.regularizers.L1L2") class L1L2(Regularizer): - """A regularizer that applies both L1 and L2 regularization penalties. + """A regularizer that applies both L1 and L2 regularization penalties. - The L1 regularization penalty is computed as: - `loss = l1 * reduce_sum(abs(x))` + The L1 regularization penalty is computed as: + `loss = l1 * reduce_sum(abs(x))` - The L2 regularization penalty is computed as - `loss = l2 * reduce_sum(square(x))` + The L2 regularization penalty is computed as + `loss = l2 * reduce_sum(square(x))` - L1L2 may be passed to a layer as a string identifier: + L1L2 may be passed to a layer as a string identifier: - >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l1_l2') + >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l1_l2') - In this case, the default values used are `l1=0.01` and `l2=0.01`. + In this case, the default values used are `l1=0.01` and `l2=0.01`. - Arguments: - l1: Float; L1 regularization factor. - l2: Float; L2 regularization factor. - """ + Arguments: + l1: Float; L1 regularization factor. + l2: Float; L2 regularization factor. + """ - def __init__(self, l1=0., l2=0.): # pylint: disable=redefined-outer-name - # The default value for l1 and l2 are different from the value in l1_l2 - # for backward compatibility reason. Eg, L1L2(l2=0.1) will only have l2 - # and no l1 penalty. - l1 = 0. if l1 is None else l1 - l2 = 0. if l2 is None else l2 - _check_penalty_number(l1) - _check_penalty_number(l2) + def __init__(self, l1=0.0, l2=0.0): + # The default value for l1 and l2 are different from the value in l1_l2 + # for backward compatibility reason. Eg, L1L2(l2=0.1) will only have l2 + # and no l1 penalty. + l1 = 0.0 if l1 is None else l1 + l2 = 0.0 if l2 is None else l2 + _check_penalty_number(l1) + _check_penalty_number(l2) - self.l1 = backend.cast_to_floatx(l1) - self.l2 = backend.cast_to_floatx(l2) + self.l1 = backend.cast_to_floatx(l1) + self.l2 = backend.cast_to_floatx(l2) - def __call__(self, x): - regularization = backend.constant(0., dtype=x.dtype) - if self.l1: - regularization += self.l1 * tf.reduce_sum(tf.abs(x)) - if self.l2: - regularization += self.l2 * tf.reduce_sum(tf.square(x)) - return regularization + def __call__(self, x): + regularization = backend.constant(0.0, dtype=x.dtype) + if self.l1: + regularization += self.l1 * tf.reduce_sum(tf.abs(x)) + if self.l2: + # equivalent to "self.l2 * tf.reduce_sum(tf.square(x))" + regularization += 2.0 * self.l2 * tf.nn.l2_loss(x) + return regularization - def get_config(self): - return {'l1': float(self.l1), 'l2': float(self.l2)} + def get_config(self): + return {"l1": float(self.l1), "l2": float(self.l2)} -@keras_export('keras.regularizers.L1', 'keras.regularizers.l1') +@keras_export("keras.regularizers.L1", "keras.regularizers.l1") class L1(Regularizer): - """A regularizer that applies a L1 regularization penalty. + """A regularizer that applies a L1 regularization penalty. - The L1 regularization penalty is computed as: - `loss = l1 * reduce_sum(abs(x))` + The L1 regularization penalty is computed as: + `loss = l1 * reduce_sum(abs(x))` - L1 may be passed to a layer as a string identifier: + L1 may be passed to a layer as a string identifier: - >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l1') + >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l1') - In this case, the default value used is `l1=0.01`. + In this case, the default value used is `l1=0.01`. - Arguments: - l1: Float; L1 regularization factor. - """ + Arguments: + l1: Float; L1 regularization factor. + """ - def __init__(self, l1=0.01, **kwargs): # pylint: disable=redefined-outer-name - l1 = kwargs.pop('l', l1) # Backwards compatibility - if kwargs: - raise TypeError(f'Argument(s) not recognized: {kwargs}') + def __init__(self, l1=0.01, **kwargs): + l1 = kwargs.pop("l", l1) # Backwards compatibility + if kwargs: + raise TypeError(f"Argument(s) not recognized: {kwargs}") - l1 = 0.01 if l1 is None else l1 - _check_penalty_number(l1) + l1 = 0.01 if l1 is None else l1 + _check_penalty_number(l1) - self.l1 = backend.cast_to_floatx(l1) + self.l1 = backend.cast_to_floatx(l1) - def __call__(self, x): - return self.l1 * tf.reduce_sum(tf.abs(x)) + def __call__(self, x): + return self.l1 * tf.reduce_sum(tf.abs(x)) - def get_config(self): - return {'l1': float(self.l1)} + def get_config(self): + return {"l1": float(self.l1)} -@keras_export('keras.regularizers.L2', 'keras.regularizers.l2') +@keras_export("keras.regularizers.L2", "keras.regularizers.l2") class L2(Regularizer): - """A regularizer that applies a L2 regularization penalty. + """A regularizer that applies a L2 regularization penalty. - The L2 regularization penalty is computed as: - `loss = l2 * reduce_sum(square(x))` + The L2 regularization penalty is computed as: + `loss = l2 * reduce_sum(square(x))` - L2 may be passed to a layer as a string identifier: + L2 may be passed to a layer as a string identifier: - >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l2') + >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l2') - In this case, the default value used is `l2=0.01`. + In this case, the default value used is `l2=0.01`. - Arguments: - l2: Float; L2 regularization factor. - """ + Arguments: + l2: Float; L2 regularization factor. + """ - def __init__(self, l2=0.01, **kwargs): # pylint: disable=redefined-outer-name - l2 = kwargs.pop('l', l2) # Backwards compatibility - if kwargs: - raise TypeError(f'Argument(s) not recognized: {kwargs}') + def __init__(self, l2=0.01, **kwargs): + l2 = kwargs.pop("l", l2) # Backwards compatibility + if kwargs: + raise TypeError(f"Argument(s) not recognized: {kwargs}") - l2 = 0.01 if l2 is None else l2 - _check_penalty_number(l2) + l2 = 0.01 if l2 is None else l2 + _check_penalty_number(l2) - self.l2 = backend.cast_to_floatx(l2) + self.l2 = backend.cast_to_floatx(l2) - def __call__(self, x): - return self.l2 * tf.reduce_sum(tf.square(x)) + def __call__(self, x): + # equivalent to "self.l2 * tf.reduce_sum(tf.square(x))" + return 2.0 * self.l2 * tf.nn.l2_loss(x) - def get_config(self): - return {'l2': float(self.l2)} + def get_config(self): + return {"l2": float(self.l2)} @keras_export( - 'keras.regularizers.OrthogonalRegularizer', - 'keras.regularizers.orthogonal_regularizer', - v1=[]) + "keras.regularizers.OrthogonalRegularizer", + "keras.regularizers.orthogonal_regularizer", + v1=[], +) class OrthogonalRegularizer(Regularizer): - """A regularizer that encourages input vectors to be orthogonal to each other. - - It can be applied to either the rows of a matrix (`mode="rows"`) or its - columns (`mode="columns"`). When applied to a `Dense` kernel of shape - `(input_dim, units)`, rows mode will seek to make the feature vectors - (i.e. the basis of the output space) orthogonal to each other. - - Arguments: - factor: Float. The regularization factor. The regularization penalty will - be proportional to `factor` times the mean of the dot products between - the L2-normalized rows (if `mode="rows"`, or columns if `mode="columns"`) - of the inputs, excluding the product of each row/column with itself. - Defaults to 0.01. - mode: String, one of `{"rows", "columns"}`. Defaults to `"rows"`. In rows - mode, the regularization effect seeks to make the rows of the input - orthogonal to each other. In columns mode, it seeks to make the columns - of the input orthogonal to each other. - - Example: - - >>> regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.01) - >>> layer = tf.keras.layers.Dense(units=4, kernel_regularizer=regularizer) - """ - - def __init__(self, factor=0.01, mode='rows'): - _check_penalty_number(factor) - self.factor = backend.cast_to_floatx(factor) - if mode not in {'rows', 'columns'}: - raise ValueError('Invalid value for argument `mode`. Expected one of ' - f'{{"rows", "columns"}}. Received: mode={mode}') - self.mode = mode - - def __call__(self, inputs): - if inputs.shape.rank != 2: - raise ValueError( - 'Inputs to OrthogonalRegularizer must have rank 2. Received: ' - f'inputs.shape == {inputs.shape}') - if self.mode == 'rows': - inputs = tf.math.l2_normalize(inputs, axis=1) - product = tf.matmul(inputs, tf.transpose(inputs)) - size = inputs.shape[0] - else: - inputs = tf.math.l2_normalize(inputs, axis=0) - product = tf.matmul(tf.transpose(inputs), inputs) - size = inputs.shape[1] - product_no_diagonal = product * (1. - tf.eye(size, dtype=inputs.dtype)) - num_pairs = size * (size - 1.) / 2. - return self.factor * 0.5 * tf.reduce_sum( - tf.abs(product_no_diagonal)) / num_pairs - - def get_config(self): - return {'factor': float(self.factor), 'mode': self.mode} - - -@keras_export('keras.regularizers.l1_l2') -def l1_l2(l1=0.01, l2=0.01): # pylint: disable=redefined-outer-name - r"""Create a regularizer that applies both L1 and L2 penalties. - - The L1 regularization penalty is computed as: - `loss = l1 * reduce_sum(abs(x))` + """Regularizer that encourages input vectors to be orthogonal to each other. + + It can be applied to either the rows of a matrix (`mode="rows"`) or its + columns (`mode="columns"`). When applied to a `Dense` kernel of shape + `(input_dim, units)`, rows mode will seek to make the feature vectors + (i.e. the basis of the output space) orthogonal to each other. + + Arguments: + factor: Float. The regularization factor. The regularization penalty will + be proportional to `factor` times the mean of the dot products between + the L2-normalized rows (if `mode="rows"`, or columns if + `mode="columns"`) of the inputs, excluding the product of each + row/column with itself. Defaults to 0.01. + mode: String, one of `{"rows", "columns"}`. Defaults to `"rows"`. In rows + mode, the regularization effect seeks to make the rows of the input + orthogonal to each other. In columns mode, it seeks to make the columns + of the input orthogonal to each other. + + Example: + + >>> regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.01) + >>> layer = tf.keras.layers.Dense(units=4, kernel_regularizer=regularizer) + """ - The L2 regularization penalty is computed as: - `loss = l2 * reduce_sum(square(x))` + def __init__(self, factor=0.01, mode="rows"): + _check_penalty_number(factor) + self.factor = backend.cast_to_floatx(factor) + if mode not in {"rows", "columns"}: + raise ValueError( + "Invalid value for argument `mode`. Expected one of " + f'{{"rows", "columns"}}. Received: mode={mode}' + ) + self.mode = mode + + def __call__(self, inputs): + if inputs.shape.rank != 2: + raise ValueError( + "Inputs to OrthogonalRegularizer must have rank 2. Received: " + f"inputs.shape == {inputs.shape}" + ) + if self.mode == "rows": + inputs = tf.math.l2_normalize(inputs, axis=1) + product = tf.matmul(inputs, tf.transpose(inputs)) + size = inputs.shape[0] + else: + inputs = tf.math.l2_normalize(inputs, axis=0) + product = tf.matmul(tf.transpose(inputs), inputs) + size = inputs.shape[1] + product_no_diagonal = product * (1.0 - tf.eye(size, dtype=inputs.dtype)) + num_pairs = size * (size - 1.0) / 2.0 + return ( + self.factor + * 0.5 + * tf.reduce_sum(tf.abs(product_no_diagonal)) + / num_pairs + ) + + def get_config(self): + return {"factor": float(self.factor), "mode": self.mode} + + +@keras_export("keras.regularizers.l1_l2") +def l1_l2(l1=0.01, l2=0.01): + r"""Create a regularizer that applies both L1 and L2 penalties. + + The L1 regularization penalty is computed as: + `loss = l1 * reduce_sum(abs(x))` + + The L2 regularization penalty is computed as: + `loss = l2 * reduce_sum(square(x))` - Args: - l1: Float; L1 regularization factor. - l2: Float; L2 regularization factor. + Args: + l1: Float; L1 regularization factor. + l2: Float; L2 regularization factor. - Returns: - An L1L2 Regularizer with the given regularization factors. - """ - return L1L2(l1=l1, l2=l2) + Returns: + An L1L2 Regularizer with the given regularization factors. + """ + return L1L2(l1=l1, l2=l2) # Deserialization aliases. @@ -401,35 +418,56 @@ def l1_l2(l1=0.01, l2=0.01): # pylint: disable=redefined-outer-name orthogonal_regularizer = OrthogonalRegularizer -@keras_export('keras.regularizers.serialize') -def serialize(regularizer): - return serialize_keras_object(regularizer) - - -@keras_export('keras.regularizers.deserialize') -def deserialize(config, custom_objects=None): - if config == 'l1_l2': - # Special case necessary since the defaults used for "l1_l2" (string) - # differ from those of the L1L2 class. - return L1L2(l1=0.01, l2=0.01) - return deserialize_keras_object( - config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='regularizer') - - -@keras_export('keras.regularizers.get') +@keras_export("keras.regularizers.serialize") +def serialize(regularizer, use_legacy_format=False): + if regularizer is None: + return None + if not isinstance(regularizer, Regularizer): + warnings.warn( + "The `keras.regularizers.serialize()` API should only be used for " + "objects of type `keras.regularizers.Regularizer`. Found an " + f"instance of type {type(regularizer)}, which may lead to improper " + "serialization." + ) + if use_legacy_format: + return legacy_serialization.serialize_keras_object(regularizer) + return serialize_keras_object(regularizer) + + +@keras_export("keras.regularizers.deserialize") +def deserialize(config, custom_objects=None, use_legacy_format=False): + if config == "l1_l2": + # Special case necessary since the defaults used for "l1_l2" (string) + # differ from those of the L1L2 class. + return L1L2(l1=0.01, l2=0.01) + if use_legacy_format: + return legacy_serialization.deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="regularizer", + ) + return deserialize_keras_object( + config, + module_objects=globals(), + custom_objects=custom_objects, + printable_module_name="regularizer", + ) + + +@keras_export("keras.regularizers.get") def get(identifier): - """Retrieve a regularizer instance from a config or identifier.""" - if identifier is None: - return None - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, str): - return deserialize(str(identifier)) - elif callable(identifier): - return identifier - else: - raise ValueError( - f'Could not interpret regularizer identifier: {identifier}') + """Retrieve a regularizer instance from a config or identifier.""" + if identifier is None: + return None + if isinstance(identifier, dict): + use_legacy_format = "module" not in identifier + return deserialize(identifier, use_legacy_format=use_legacy_format) + elif isinstance(identifier, str): + return deserialize(str(identifier)) + elif callable(identifier): + return identifier + else: + raise ValueError( + f"Could not interpret regularizer identifier: {identifier}" + ) diff --git a/keras/regularizers_test.py b/keras/regularizers_test.py index 01e23092f56a..e8bc3606e12c 100644 --- a/keras/regularizers_test.py +++ b/keras/regularizers_test.py @@ -14,321 +14,369 @@ # ============================================================================== """Tests for Keras regularizers.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras -from keras.testing_infra import test_combinations from keras import regularizers +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import np_utils - DATA_DIM = 5 NUM_CLASSES = 2 -class KerasRegularizersTest(test_combinations.TestCase, - parameterized.TestCase): - - def create_model(self, - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None): - model = keras.models.Sequential() - model.add(keras.layers.Dense(NUM_CLASSES, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - input_shape=(DATA_DIM,))) - return model - - def regularizer_fn_tensor(x): - return tf.constant(0.) - - def regularizer_fn_scalar(x): - return 0. - - class RegularizerTensor(regularizers.Regularizer): - def __call__(self, x): - return tf.constant(0.) - - class RegularizerScalar(regularizers.Regularizer): - def __call__(self, x): - return 0. - - def get_data(self): - (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( - train_samples=10, - test_samples=10, - input_shape=(DATA_DIM,), - num_classes=NUM_CLASSES) - y_train = np_utils.to_categorical(y_train, NUM_CLASSES) - y_test = np_utils.to_categorical(y_test, NUM_CLASSES) - return (x_train, y_train), (x_test, y_test) - - def create_multi_input_model_from(self, layer1, layer2): - input_1 = keras.layers.Input(shape=(DATA_DIM,)) - input_2 = keras.layers.Input(shape=(DATA_DIM,)) - out1 = layer1(input_1) - out2 = layer2(input_2) - out = keras.layers.Average()([out1, out2]) - model = keras.models.Model([input_1, input_2], out) - model.add_loss(keras.backend.mean(out2)) - model.add_loss(tf.reduce_sum(input_1)) - return model - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('l1', regularizers.l1()), - ('l2', regularizers.l2()), - ('l1_l2', regularizers.l1_l2()), - ('l2_zero', keras.regularizers.l2(0.)), - ('function_tensor', regularizer_fn_tensor), - ('function_scalar', regularizer_fn_scalar), - ('lambda_tensor', lambda x: tf.constant(0.)), - ('lambda_scalar', lambda x: 0.), - ('regularizer_base_class', regularizers.Regularizer()), - ('regularizer_custom_class_tensor', RegularizerTensor()), - ('regularizer_custom_class_scalar', RegularizerScalar()), - ]) - def test_kernel_regularization(self, regularizer): - (x_train, y_train), _ = self.get_data() - model = self.create_model(kernel_regularizer=regularizer) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.assertEqual(len(model.losses), 1) - model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('l1', regularizers.l1()), - ('l2', regularizers.l2()), - ('l1_l2', regularizers.l1_l2()), - ('l2_zero', keras.regularizers.l2(0.)), - ('function_tensor', regularizer_fn_tensor), - ('function_scalar', regularizer_fn_scalar), - ('lambda_tensor', lambda x: tf.constant(0.)), - ('lambda_scalar', lambda x: 0.), - ('regularizer_base_class', regularizers.Regularizer()), - ('regularizer_custom_class_tensor', RegularizerTensor()), - ('regularizer_custom_class_scalar', RegularizerScalar()), - ]) - def test_bias_regularization(self, regularizer): - (x_train, y_train), _ = self.get_data() - model = self.create_model(bias_regularizer=regularizer) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.assertEqual(len(model.losses), 1) - model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('l1', regularizers.l1()), - ('l2', regularizers.l2()), - ('l1_l2', regularizers.l1_l2()), - ('l2_zero', keras.regularizers.l2(0.)), - ('function_tensor', regularizer_fn_tensor), - ('function_scalar', regularizer_fn_scalar), - ('lambda_tensor', lambda x: tf.constant(0.)), - ('lambda_scalar', lambda x: 0.), - ('regularizer_base_class', regularizers.Regularizer()), - ('regularizer_custom_class_tensor', RegularizerTensor()), - ('regularizer_custom_class_scalar', RegularizerScalar()), - ]) - def test_activity_regularization(self, regularizer): - (x_train, y_train), _ = self.get_data() - model = self.create_model(activity_regularizer=regularizer) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.assertEqual(len(model.losses), 1 if tf.executing_eagerly() else 1) - model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_zero_regularization(self): - # Verifies that training with zero regularization works. - x, y = np.ones((10, 10)), np.ones((10, 3)) - model = test_utils.get_model_from_layers( - [keras.layers.Dense(3, kernel_regularizer=keras.regularizers.l2(0))], - input_shape=(10,)) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=5, epochs=1) - - def test_custom_regularizer_saving(self): - - def my_regularizer(weights): - return tf.reduce_sum(tf.abs(weights)) - - inputs = keras.Input((10,)) - outputs = keras.layers.Dense(1, kernel_regularizer=my_regularizer)(inputs) - model = keras.Model(inputs, outputs) - model2 = model.from_config( - model.get_config(), custom_objects={'my_regularizer': my_regularizer}) - self.assertEqual(model2.layers[1].kernel_regularizer, my_regularizer) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('l1', regularizers.l1()), - ('l2', regularizers.l2()), - ('l1_l2', regularizers.l1_l2()), - ]) - def test_regularization_shared_layer(self, regularizer): - dense_layer = keras.layers.Dense( - NUM_CLASSES, - kernel_regularizer=regularizer, - activity_regularizer=regularizer) - model = self.create_multi_input_model_from(dense_layer, dense_layer) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.assertLen(model.losses, 5) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('l1', regularizers.l1()), - ('l2', regularizers.l2()), - ('l1_l2', regularizers.l1_l2()), - ]) - def test_regularization_shared_model(self, regularizer): - dense_layer = keras.layers.Dense( - NUM_CLASSES, - kernel_regularizer=regularizer, - activity_regularizer=regularizer) - - input_tensor = keras.layers.Input(shape=(DATA_DIM,)) - dummy_model = keras.models.Model(input_tensor, dense_layer(input_tensor)) - - model = self.create_multi_input_model_from(dummy_model, dummy_model) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - self.assertLen(model.losses, 6) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters([ - ('l1', regularizers.l1()), - ('l2', regularizers.l2()), - ('l1_l2', regularizers.l1_l2()), - ]) - def test_regularization_shared_layer_in_different_models(self, regularizer): - shared_dense = keras.layers.Dense( - NUM_CLASSES, - kernel_regularizer=regularizer, - activity_regularizer=regularizer) - models = [] - for _ in range(2): - input_tensor = keras.layers.Input(shape=(DATA_DIM,)) - unshared_dense = keras.layers.Dense( - NUM_CLASSES, kernel_regularizer=regularizer) - out = unshared_dense(shared_dense(input_tensor)) - models.append(keras.models.Model(input_tensor, out)) - - model = self.create_multi_input_model_from( - layer1=models[0], layer2=models[1]) - model.compile( - loss='categorical_crossentropy', - optimizer='sgd', - run_eagerly=test_utils.should_run_eagerly()) - - # We expect to see 9 losses on the model: - # - 2 from the 2 add_loss calls on the outer model. - # - 3 from the weight regularizers on the shared_dense layer, unshared_dense - # in inner model 1, unshared_dense in inner model 2. - # - 4 from activity regularizers on the shared_dense layer. - self.assertLen(model.losses, 9) - - def test_deserialization_error(self): - with self.assertRaisesRegex(ValueError, 'Could not interpret regularizer'): - keras.regularizers.get(0) - - @parameterized.named_parameters([ - ('l1', regularizers.l1(l1=None), 0.01), - ('l2', regularizers.l2(l2=None), 0.01), - ('l1_l2', regularizers.l1_l2(l1=None, l2=None), 0.), - ]) - def test_default_value_when_init_with_none(self, regularizer, expected_value): - expected_value = np.asarray(expected_value) - if hasattr(regularizer, 'l1'): - self.assertAllClose(regularizer.l1, expected_value) - if hasattr(regularizer, 'l2'): - self.assertAllClose(regularizer.l2, expected_value) - - @test_utils.run_v2_only - def test_orthogonal_regularizer(self): - # Test correctness. - factor = 0.1 - reg_rows = regularizers.OrthogonalRegularizer(factor=factor, mode='rows') - reg_cols = regularizers.OrthogonalRegularizer(factor=factor, mode='columns') - - # Test with square matrix - inputs = tf.constant([[1, 1, 1, 1], - [2, 0, 0, 0], - [0, 0, 3, 1]], dtype='float32') - normalized_rows = tf.math.l2_normalize(inputs, axis=1) - normalized_cols = tf.math.l2_normalize(inputs, axis=0) - rows_pairs = [ - tf.reduce_sum(normalized_rows[0] * normalized_rows[1]), - tf.reduce_sum(normalized_rows[0] * normalized_rows[2]), - tf.reduce_sum(normalized_rows[1] * normalized_rows[2]), - ] - col_pairs = [ - tf.reduce_sum(normalized_cols[:, 0] * normalized_cols[:, 1]), - tf.reduce_sum(normalized_cols[:, 0] * normalized_cols[:, 2]), - tf.reduce_sum(normalized_cols[:, 0] * normalized_cols[:, 3]), - tf.reduce_sum(normalized_cols[:, 1] * normalized_cols[:, 2]), - tf.reduce_sum(normalized_cols[:, 1] * normalized_cols[:, 3]), - tf.reduce_sum(normalized_cols[:, 2] * normalized_cols[:, 3]), - ] - num_row_pairs = 3 - num_col_pairs = 6 - # Expected: factor * sum(pairwise_dot_products_of_rows) / num_row_pairs - self.assertAllClose(reg_rows(inputs), - factor * sum(rows_pairs) / num_row_pairs) - # Expected: factor * sum(pairwise_dot_products_of_columns) / num_col_pairs - self.assertAllClose(reg_cols(inputs), - factor * sum(col_pairs) / num_col_pairs) - - # Test incorrect usage. - with self.assertRaisesRegex(ValueError, 'must have rank 2'): - reg_rows(tf.constant([1, 1], dtype='float32')) - - # Test serialization - self.assertDictEqual(reg_cols.get_config(), - {'factor': factor, 'mode': 'columns'}) - - # Test usage in model. - model_inputs = keras.Input((3,)) - model_outputs = keras.layers.Dense( - 4, kernel_regularizer=reg_rows)(model_inputs) - model = keras.Model(model_inputs, model_outputs) - model.compile(optimizer='rmsprop', loss='mse') - model.fit(np.random.random((16, 3)), np.random.random((16, 4)), epochs=1) - - # Test serialization and deserialiation as part of model. - inputs = tf.constant([[1, 1, 1], - [2, 0, 0], - [0, 0, 3]], dtype='float32') - outputs = model(inputs) - config = model.get_config() - weights = model.get_weights() - model = keras.Model.from_config(config) - model.set_weights(weights) - self.assertAllClose(model(inputs), outputs, atol=1e-5) - - -if __name__ == '__main__': - tf.test.main() +class KerasRegularizersTest(test_combinations.TestCase, parameterized.TestCase): + def create_model( + self, + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + ): + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + NUM_CLASSES, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + input_shape=(DATA_DIM,), + ) + ) + return model + + def regularizer_fn_tensor(x): + return tf.constant(0.0) + + def regularizer_fn_scalar(x): + return 0.0 + + class RegularizerTensor(regularizers.Regularizer): + def __call__(self, x): + return tf.constant(0.0) + + class RegularizerScalar(regularizers.Regularizer): + def __call__(self, x): + return 0.0 + + def get_data(self): + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=10, + test_samples=10, + input_shape=(DATA_DIM,), + num_classes=NUM_CLASSES, + ) + y_train = np_utils.to_categorical(y_train, NUM_CLASSES) + y_test = np_utils.to_categorical(y_test, NUM_CLASSES) + return (x_train, y_train), (x_test, y_test) + + def create_multi_input_model_from(self, layer1, layer2): + input_1 = keras.layers.Input(shape=(DATA_DIM,)) + input_2 = keras.layers.Input(shape=(DATA_DIM,)) + out1 = layer1(input_1) + out2 = layer2(input_2) + out = keras.layers.Average()([out1, out2]) + model = keras.models.Model([input_1, input_2], out) + model.add_loss(keras.backend.mean(out2)) + model.add_loss(tf.reduce_sum(input_1)) + return model + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("l1", regularizers.l1()), + ("l2", regularizers.l2()), + ("l1_l2", regularizers.l1_l2()), + ("l2_zero", keras.regularizers.l2(0.0)), + ("function_tensor", regularizer_fn_tensor), + ("function_scalar", regularizer_fn_scalar), + ("lambda_tensor", lambda x: tf.constant(0.0)), + ("lambda_scalar", lambda x: 0.0), + ("regularizer_base_class", regularizers.Regularizer()), + ("regularizer_custom_class_tensor", RegularizerTensor()), + ("regularizer_custom_class_scalar", RegularizerScalar()), + ] + ) + def test_kernel_regularization(self, regularizer): + (x_train, y_train), _ = self.get_data() + model = self.create_model(kernel_regularizer=regularizer) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertEqual(len(model.losses), 1) + model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("l1", regularizers.l1()), + ("l2", regularizers.l2()), + ("l1_l2", regularizers.l1_l2()), + ("l2_zero", keras.regularizers.l2(0.0)), + ("function_tensor", regularizer_fn_tensor), + ("function_scalar", regularizer_fn_scalar), + ("lambda_tensor", lambda x: tf.constant(0.0)), + ("lambda_scalar", lambda x: 0.0), + ("regularizer_base_class", regularizers.Regularizer()), + ("regularizer_custom_class_tensor", RegularizerTensor()), + ("regularizer_custom_class_scalar", RegularizerScalar()), + ] + ) + def test_bias_regularization(self, regularizer): + (x_train, y_train), _ = self.get_data() + model = self.create_model(bias_regularizer=regularizer) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertEqual(len(model.losses), 1) + model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("l1", regularizers.l1()), + ("l2", regularizers.l2()), + ("l1_l2", regularizers.l1_l2()), + ("l2_zero", keras.regularizers.l2(0.0)), + ("function_tensor", regularizer_fn_tensor), + ("function_scalar", regularizer_fn_scalar), + ("lambda_tensor", lambda x: tf.constant(0.0)), + ("lambda_scalar", lambda x: 0.0), + ("regularizer_base_class", regularizers.Regularizer()), + ("regularizer_custom_class_tensor", RegularizerTensor()), + ("regularizer_custom_class_scalar", RegularizerScalar()), + ] + ) + def test_activity_regularization(self, regularizer): + (x_train, y_train), _ = self.get_data() + model = self.create_model(activity_regularizer=regularizer) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertEqual(len(model.losses), 1 if tf.executing_eagerly() else 1) + model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def test_zero_regularization(self): + # Verifies that training with zero regularization works. + x, y = np.ones((10, 10)), np.ones((10, 3)) + model = test_utils.get_model_from_layers( + [ + keras.layers.Dense( + 3, kernel_regularizer=keras.regularizers.l2(0) + ) + ], + input_shape=(10,), + ) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(x, y, batch_size=5, epochs=1) + + def test_custom_regularizer_saving(self): + def my_regularizer(weights): + return tf.reduce_sum(tf.abs(weights)) + + inputs = keras.Input((10,)) + outputs = keras.layers.Dense(1, kernel_regularizer=my_regularizer)( + inputs + ) + model = keras.Model(inputs, outputs) + model2 = model.from_config( + model.get_config(), + custom_objects={"my_regularizer": my_regularizer}, + ) + self.assertEqual(model2.layers[1].kernel_regularizer, my_regularizer) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("l1", regularizers.l1()), + ("l2", regularizers.l2()), + ("l1_l2", regularizers.l1_l2()), + ] + ) + def test_regularization_shared_layer(self, regularizer): + dense_layer = keras.layers.Dense( + NUM_CLASSES, + kernel_regularizer=regularizer, + activity_regularizer=regularizer, + ) + model = self.create_multi_input_model_from(dense_layer, dense_layer) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertLen(model.losses, 5) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("l1", regularizers.l1()), + ("l2", regularizers.l2()), + ("l1_l2", regularizers.l1_l2()), + ] + ) + def test_regularization_shared_model(self, regularizer): + dense_layer = keras.layers.Dense( + NUM_CLASSES, + kernel_regularizer=regularizer, + activity_regularizer=regularizer, + ) + + input_tensor = keras.layers.Input(shape=(DATA_DIM,)) + dummy_model = keras.models.Model( + input_tensor, dense_layer(input_tensor) + ) + + model = self.create_multi_input_model_from(dummy_model, dummy_model) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertLen(model.losses, 6) + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + [ + ("l1", regularizers.l1()), + ("l2", regularizers.l2()), + ("l1_l2", regularizers.l1_l2()), + ] + ) + def test_regularization_shared_layer_in_different_models(self, regularizer): + shared_dense = keras.layers.Dense( + NUM_CLASSES, + kernel_regularizer=regularizer, + activity_regularizer=regularizer, + ) + models = [] + for _ in range(2): + input_tensor = keras.layers.Input(shape=(DATA_DIM,)) + unshared_dense = keras.layers.Dense( + NUM_CLASSES, kernel_regularizer=regularizer + ) + out = unshared_dense(shared_dense(input_tensor)) + models.append(keras.models.Model(input_tensor, out)) + + model = self.create_multi_input_model_from( + layer1=models[0], layer2=models[1] + ) + model.compile( + loss="categorical_crossentropy", + optimizer="sgd", + run_eagerly=test_utils.should_run_eagerly(), + ) + + # We expect to see 9 losses on the model: + # - 2 from the 2 add_loss calls on the outer model. + # - 3 from the weight regularizers on the shared_dense layer, + # unshared_dense in inner model 1, unshared_dense in inner model 2. + # - 4 from activity regularizers on the shared_dense layer. + self.assertLen(model.losses, 9) + + def test_deserialization_error(self): + with self.assertRaisesRegex( + ValueError, "Could not interpret regularizer" + ): + keras.regularizers.get(0) + + @parameterized.named_parameters( + [ + ("l1", regularizers.l1(l1=None), 0.01), + ("l2", regularizers.l2(l2=None), 0.01), + ("l1_l2", regularizers.l1_l2(l1=None, l2=None), 0.0), + ] + ) + def test_default_value_when_init_with_none( + self, regularizer, expected_value + ): + expected_value = np.asarray(expected_value) + if hasattr(regularizer, "l1"): + self.assertAllClose(regularizer.l1, expected_value) + if hasattr(regularizer, "l2"): + self.assertAllClose(regularizer.l2, expected_value) + + @test_utils.run_v2_only + def test_orthogonal_regularizer(self): + # Test correctness. + factor = 0.1 + reg_rows = regularizers.OrthogonalRegularizer( + factor=factor, mode="rows" + ) + reg_cols = regularizers.OrthogonalRegularizer( + factor=factor, mode="columns" + ) + + # Test with square matrix + inputs = tf.constant( + [[1, 1, 1, 1], [2, 0, 0, 0], [0, 0, 3, 1]], dtype="float32" + ) + normalized_rows = tf.math.l2_normalize(inputs, axis=1) + normalized_cols = tf.math.l2_normalize(inputs, axis=0) + rows_pairs = [ + tf.reduce_sum(normalized_rows[0] * normalized_rows[1]), + tf.reduce_sum(normalized_rows[0] * normalized_rows[2]), + tf.reduce_sum(normalized_rows[1] * normalized_rows[2]), + ] + col_pairs = [ + tf.reduce_sum(normalized_cols[:, 0] * normalized_cols[:, 1]), + tf.reduce_sum(normalized_cols[:, 0] * normalized_cols[:, 2]), + tf.reduce_sum(normalized_cols[:, 0] * normalized_cols[:, 3]), + tf.reduce_sum(normalized_cols[:, 1] * normalized_cols[:, 2]), + tf.reduce_sum(normalized_cols[:, 1] * normalized_cols[:, 3]), + tf.reduce_sum(normalized_cols[:, 2] * normalized_cols[:, 3]), + ] + num_row_pairs = 3 + num_col_pairs = 6 + # Expected: factor * sum(pairwise_dot_products_of_rows) / num_row_pairs + self.assertAllClose( + reg_rows(inputs), factor * sum(rows_pairs) / num_row_pairs + ) + # Expected: factor * sum(pairwise_dot_products_of_columns) / + # num_col_pairs + self.assertAllClose( + reg_cols(inputs), factor * sum(col_pairs) / num_col_pairs + ) + + # Test incorrect usage. + with self.assertRaisesRegex(ValueError, "must have rank 2"): + reg_rows(tf.constant([1, 1], dtype="float32")) + + # Test serialization + self.assertDictEqual( + reg_cols.get_config(), {"factor": factor, "mode": "columns"} + ) + + # Test usage in model. + model_inputs = keras.Input((3,)) + model_outputs = keras.layers.Dense(4, kernel_regularizer=reg_rows)( + model_inputs + ) + model = keras.Model(model_inputs, model_outputs) + model.compile(optimizer="rmsprop", loss="mse") + model.fit( + np.random.random((16, 3)), np.random.random((16, 4)), epochs=1 + ) + + # Test serialization and deserialiation as part of model. + inputs = tf.constant([[1, 1, 1], [2, 0, 0], [0, 0, 3]], dtype="float32") + outputs = model(inputs) + config = model.get_config() + weights = model.get_weights() + model = keras.Model.from_config(config) + model.set_weights(weights) + self.assertAllClose(model(inputs), outputs, atol=1e-5) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/BUILD b/keras/saving/BUILD index 826069278d0b..ab4a8830fd69 100644 --- a/keras/saving/BUILD +++ b/keras/saving/BUILD @@ -1,9 +1,11 @@ # Description: # Contains the Keras save model API (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove non-keras deps from TF. default_visibility = [ "//keras:friends", @@ -16,15 +18,18 @@ py_library( name = "saving", srcs = [ "__init__.py", - "hdf5_format.py", - "model_config.py", + "legacy/hdf5_format.py", + "legacy/model_config.py", + "legacy/save.py", + "legacy/saving_utils.py", "pickle_utils.py", - "save.py", - "saved_model_experimental.py", - "saving_utils.py", + "saving_api.py", ], srcs_version = "PY3", deps = [ + ":object_registration", + ":serialization", + ":serialization_lib", "//:expect_h5py_installed", "//:expect_tensorflow_installed", "//:expect_yaml_installed", @@ -35,18 +40,94 @@ py_library( "//keras/mixed_precision:autocast_variable", "//keras/optimizers", "//keras/protobuf:saved_metadata_proto_py_pb2", - "//keras/saving/saved_model", - "//keras/saving/utils_v1", + "//keras/saving/legacy/saved_model", "//keras/utils:engine_utils", "//keras/utils:metrics_utils", "//keras/utils:mode_keys", ], ) +py_library( + name = "saving_lib", + srcs = [ + "saving_lib.py", + ], + srcs_version = "PY3", + deps = [ + ":serialization_lib", + "//:expect_tensorflow_installed", + "//keras/utils:generic_utils", + "//keras/utils:io_utils", + ], +) + +tf_py_test( + name = "saving_lib_test", + size = "medium", + srcs = ["saving_lib_test.py"], + python_version = "PY3", + deps = [ + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + "//keras/utils:generic_utils", + ], +) + +py_library( + name = "object_registration", + srcs = [ + "object_registration.py", + ], + srcs_version = "PY3", +) + +py_library( + name = "serialization_lib", + srcs = [ + "serialization_lib.py", + ], + srcs_version = "PY3", + deps = [ + ":object_registration", + ":serialization", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras/saving/legacy/saved_model:utils", + ], +) + +py_library( + name = "serialization", + srcs = [ + "legacy/serialization.py", + ], + srcs_version = "PY3", + deps = [ + ":object_registration", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras/utils:tf_contextlib", + "//keras/utils:tf_inspect", + ], +) + +tf_py_test( + name = "object_registration_test", + size = "small", + srcs = ["object_registration_test.py"], + python_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + "//keras", + ], +) + tf_py_test( name = "metrics_serialization_test", size = "medium", - srcs = ["metrics_serialization_test.py"], + srcs = ["legacy/metrics_serialization_test.py"], python_version = "PY3", shard_count = 8, tags = [ @@ -64,7 +145,7 @@ tf_py_test( tf_py_test( name = "losses_serialization_test", size = "medium", - srcs = ["losses_serialization_test.py"], + srcs = ["legacy/losses_serialization_test.py"], python_version = "PY3", shard_count = 4, deps = [ @@ -94,7 +175,7 @@ tf_py_test( tf_py_test( name = "save_weights_test", size = "medium", - srcs = ["save_weights_test.py"], + srcs = ["legacy/save_weights_test.py"], python_version = "PY3", shard_count = 4, tags = [ @@ -114,7 +195,7 @@ tf_py_test( tf_py_test( name = "save_test", size = "medium", - srcs = ["save_test.py"], + srcs = ["legacy/save_test.py"], python_version = "PY3", shard_count = 4, tags = [ @@ -130,34 +211,30 @@ tf_py_test( ) tf_py_test( - name = "saved_model_experimental_test", + name = "saving_utils_test", size = "medium", - srcs = ["saved_model_experimental_test.py"], + srcs = ["legacy/saving_utils_test.py"], python_version = "PY3", - shard_count = 4, - tags = [ - "no_oss", # TODO(b/119349471): Re-enable - "no_windows", - ], + tags = ["notsan"], deps = [ "//:expect_absl_installed", "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", + "//keras/testing_infra:test_combinations", ], ) tf_py_test( - name = "saving_utils_test", - size = "medium", - srcs = ["saving_utils_test.py"], + name = "serialization_lib_test", + size = "small", + srcs = ["serialization_lib_test.py"], python_version = "PY3", - tags = ["notsan"], deps = [ "//:expect_absl_installed", - "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", + "//keras/saving:serialization", "//keras/testing_infra:test_combinations", ], ) diff --git a/keras/saving/experimental/saving_lib.py b/keras/saving/experimental/saving_lib.py deleted file mode 100644 index 7ccc0c8c9799..000000000000 --- a/keras/saving/experimental/saving_lib.py +++ /dev/null @@ -1,276 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras python-based idempotent saving functions (experimental).""" -import importlib -import json -import os -import types -from keras.saving.saved_model import json_utils -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf -from tensorflow.python.util import tf_export - -_CONFIG_FILE = 'config.keras' - -# A temporary flag to enable the new idempotent saving framework. -_ENABLED = False - - -def load(dirpath): - """Load a saved python model.""" - file_path = os.path.join(dirpath, _CONFIG_FILE) - with tf.io.gfile.GFile(file_path, 'r') as f: - config_json = f.read() - config_dict = json_utils.decode(config_json) - return deserialize_keras_object(config_dict) - - -def save(model, dirpath): - """Save a saved python model.""" - if not tf.io.gfile.exists(dirpath): - tf.io.gfile.mkdir(dirpath) - file_path = os.path.join(dirpath, _CONFIG_FILE) - - # TODO(rchao): Save the model's metadata (e.g. Keras version) in a separate - # file in the archive. - # TODO(rchao): Save the model's state (e.g. layer weights/vocab) in a separate - # set of files in the archive. - # TODO(rchao): Write the config into a file in an archive. In this prototype - # we're temporarily settled on a standalone json file. - serialized_model_dict = serialize_keras_object(model) - config_json = json.dumps(serialized_model_dict, cls=json_utils.Encoder) - with tf.io.gfile.GFile(file_path, 'w') as f: - f.write(config_json) - - -# TODO(rchao): Replace the current Keras' `deserialize_keras_object` with this -# (as well as the reciprocal function). -def deserialize_keras_object(config_dict): - """Retrieve the object by deserializing the config dict. - - The config dict is a python dictionary that consists of a set of key-value - pairs, and represents a Keras object, such as an `Optimizer`, `Layer`, - `Metrics`, etc. The saving and loading library uses the following keys to - record information of a Keras object: - - - `class_name`: String. For classes that have an exported Keras namespace, - this is the full path that starts with "keras", such as - "keras.optimizers.Adam". For classes that do not have an exported Keras - namespace, this is the name of the class, as exactly defined in the source - code, such as "LossesContainer". - - `config`: Dict. Library-defined or user-defined key-value pairs that store - the configuration of the object, as obtained by `object.get_config()`. - - `module`: String. The path of the python module, such as - "keras.engine.compile_utils". Built-in Keras classes - expect to have prefix `keras`. For classes that have an exported Keras - namespace, this is `None` since the class can be fully identified by the - full Keras path. - - `registered_name`: String. The key the class is registered under via - `keras.utils.register_keras_serializable(package, name)` API. The key has - the format of '{package}>{name}', where `package` and `name` are the - arguments passed to `register_keras_serializable()`. If `name` is not - provided, it defaults to the class name. If `registered_name` successfully - resolves to a class (that was registered), `class_name` and `config` values - in the dict will not be used. `registered_name` is only used for - non-built-in classes. - - For example, the following dictionary represents the built-in Adam optimizer - with the relevant config. Note that for built-in (exported symbols that have - an exported Keras namespace) classes, the library tracks the class by the - the import location of the built-in object in the Keras namespace, e.g. - `"keras.optimizers.Adam"`, and this information is stored in `class_name`: - - ``` - dict_structure = { - "class_name": "keras.optimizers.Adam", - "config": { - "amsgrad": false, - "beta_1": 0.8999999761581421, - "beta_2": 0.9990000128746033, - "decay": 0.0, - "epsilon": 1e-07, - "learning_rate": 0.0010000000474974513, - "name": "Adam" - }, - "module": null, - "registered_name": "Adam" - } - # Returns an `Adam` instance identical to the original one. - deserialize_keras_object(dict_structure) - ``` - - If the class does not have an exported Keras namespace, the library tracks it - by its `module` and `class_name`. For example: - - ``` - dict_structure = { - "class_name": "LossesContainer", - "config": { - "losses": [...], - "total_loss_mean": {...}, - }, - "module": "keras.engine.compile_utils", - "registered_name": "LossesContainer" - } - - # Returns a `LossesContainer` instance identical to the original one. - deserialize_keras_object(dict_structure) - ``` - - And the following dictionary represents a user-customized `MeanSquaredError` - loss: - - ``` - @keras.utils.generic_utils.register_keras_serializable(package='my_package') - class ModifiedMeanSquaredError(keras.losses.MeanSquaredError): - ... - - dict_structure = { - "class_name": "ModifiedMeanSquaredError", - "config": { - "fn": "mean_squared_error", - "name": "mean_squared_error", - "reduction": "auto" - }, - "registered_name": "my_package>ModifiedMeanSquaredError" - } - # Gives `ModifiedMeanSquaredError` object - deserialize_keras_object(dict_structure) - ``` - - Args: - config_dict: the python dict structure to deserialize the Keras object from. - - Returns: - The Keras object that is deserialized from `config_dict`. - - """ - # TODO(rchao): Design a 'version' key for `config_dict` for defining versions - # for classes. - class_name = config_dict['class_name'] - config = config_dict['config'] - module = config_dict['module'] - registered_name = config_dict['registered_name'] - - # Strings and functions will have `builtins` as its module. - if module == 'builtins': - if class_name == 'str': - if not isinstance(config, str): - raise TypeError('Config of string is supposed to be a string. ' - f'Received: {config}.') - return config - - elif class_name == 'function': - custom_function = generic_utils.get_custom_objects_by_name( - registered_name) - if custom_function is not None: - # If there is a custom function registered (via - # `register_keras_serializable` API), that takes precedence. - return custom_function - - # Otherwise, attempt to import the tracked module, and find the function. - function_module = config.get('module', None) - try: - function_module = importlib.import_module(function_module) - except ImportError as e: - raise ImportError( - f'The function module {function_module} is not available. The ' - f'config dictionary provided is {config_dict}.') from e - return vars(function_module).get(config['function_name']) - - raise TypeError(f'Unrecognized type: {class_name}') - - custom_class = generic_utils.get_custom_objects_by_name(registered_name) - if custom_class is not None: - # For others (classes), see if there is a custom class registered (via - # `register_keras_serializable` API). If so, that takes precedence. - return custom_class.from_config(config) - else: - # Otherwise, attempt to retrieve the class object given the `module`, and - # `class_name`. - if module is None: - # In the case where `module` is not recorded, the `class_name` represents - # the full exported Keras namespace (used by `keras_export`) such as - # "keras.optimizers.Adam". - cls = tf_export.get_symbol_from_name(class_name) - else: - # In the case where `module` is available, the class does not have an - # Keras namespace (which is the case when the symbol is not exported via - # `keras_export`). Import the tracked module (that is used for the - # internal path), find the class, and use its config. - mod = importlib.import_module(module) - cls = vars(mod).get(class_name, None) - if not hasattr(cls, 'from_config'): - raise TypeError(f'Unable to reconstruct an instance of {cls}.') - return cls.from_config(config) - - -def serialize_keras_object(obj): - """Retrieve the config dict by serializing the Keras object. - - `serialize_keras_object()` serializes a Keras object to a python dictionary - that represents the object, and is a reciprocal function of - `deserialize_keras_object()`. See `deserialize_keras_object()` for more - information about the config format. - - Args: - obj: the Keras object to serialize. - - Returns: - A python dict that represents the object. The python dict can be - deserialized via `deserialize_keras_object()`. - """ - - # Note that in the case of the `obj` being a function, the module used will be - # "builtins", and the `class_name` used will be "function"; in the case of the - # `obj` being a string, the module used will be "builtins", and the - # `class_name` used will be "str" - module = None - - # This gets the `keras.*` exported name, such as "keras.optimizers.Adam". - class_name = tf_export.get_canonical_name_for_symbol( - obj.__class__, api_name='keras') - if class_name is None: - module = obj.__class__.__module__ - class_name = obj.__class__.__name__ - return { - 'module': module, - 'class_name': class_name, - 'config': _get_object_config(obj), - 'registered_name': _get_object_registered_name(obj) - } - - -def _get_object_registered_name(obj): - if isinstance(obj, types.FunctionType): - return generic_utils.get_registered_name(obj) - else: - return generic_utils.get_registered_name(obj.__class__) - - -def _get_object_config(obj): - """Return the object's config depending on string, function, or others.""" - if isinstance(obj, str): - # Use the content of the string as the config for string. - return obj - elif isinstance(obj, types.FunctionType): - # Keep track of the function's module and name in a dict as the config. - return { - 'module': obj.__module__, - 'function_name': obj.__name__, - } - if not hasattr(obj, 'get_config'): - raise TypeError(f'Unable to recognize the config of {obj}.') - return obj.get_config() diff --git a/keras/saving/experimental/saving_lib_test.py b/keras/saving/experimental/saving_lib_test.py deleted file mode 100644 index 4f289d8d9e8a..000000000000 --- a/keras/saving/experimental/saving_lib_test.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras python-based idempotent saving functions (experimental).""" -import os -import sys - -import keras -from keras import backend -from keras.saving.experimental import saving_lib -from keras.saving.saved_model import json_utils -from keras.utils import generic_utils -from keras.utils import io_utils -import numpy as np -import tensorflow.compat.v2 as tf - -train_step_message = 'This is my training step' - - -@keras.utils.generic_utils.register_keras_serializable( - package='my_custom_package') -class MyDense(keras.layers.Dense): - - def two(self): - return 2 - - -@keras.utils.generic_utils.register_keras_serializable( - package='my_custom_package') -class CustomModelX(keras.Model): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dense1 = MyDense(1) - - def call(self, inputs): - return self.dense1(inputs) - - def train_step(self, data): - tf.print(train_step_message) - x, y = data - with tf.GradientTape() as tape: - y_pred = self(x) - loss = self.compiled_loss(y, y_pred) - - gradients = tape.gradient(loss, self.trainable_variables) - self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) - return {} - - def one(self): - return 1 - - -@keras.utils.generic_utils.register_keras_serializable( - package='my_custom_package') -def my_mean_squared_error(y_true, y_pred): - """Identical to built-in `mean_squared_error`, added here as a custom func.""" - return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) - - -module_my_mean_squared_error = my_mean_squared_error - - -class NewSavingTest(tf.test.TestCase): - - def setUp(self): - super().setUp() - saving_lib._ENABLED = True - - def tearDown(self): - super().tearDown() - saving_lib._ENABLED = False - - def _get_subclassed_model(self): - subclassed_model = CustomModelX() - subclassed_model.compile( - optimizer='adam', - loss=[ - 'mse', keras.losses.mean_squared_error, - keras.losses.MeanSquaredError(), my_mean_squared_error - ]) - return subclassed_model - - def test_saving_after_compile_but_before_fit(self): - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - subclassed_model = self._get_subclassed_model() - subclassed_model._save_new(temp_dir) - - # This is so that we can register another function with the same custom - # object key, and make sure the newly registered function is used while - # loading. - del generic_utils._GLOBAL_CUSTOM_OBJECTS[ - 'my_custom_package>my_mean_squared_error'] - - @keras.utils.generic_utils.register_keras_serializable( - package='my_custom_package') - def my_mean_squared_error(y_true, y_pred): # pylint: disable=redefined-outer-name - """Function-local `mean_squared_error`.""" - return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) - - loaded_model = saving_lib.load(temp_dir) - - # Everything should be the same class or function for the original model - # and the loaded model. - for model in [subclassed_model, loaded_model]: - self.assertIs(model.optimizer.__class__, - keras.optimizers.optimizer_v2.adam.Adam) - self.assertIs(model.compiled_loss.__class__, - keras.engine.compile_utils.LossesContainer) - self.assertEqual(model.compiled_loss._losses[0], 'mse') - self.assertIs(model.compiled_loss._losses[1], - keras.losses.mean_squared_error) - self.assertIs(model.compiled_loss._losses[2].__class__, - keras.losses.MeanSquaredError) - self.assertIs(model.compiled_loss._total_loss_mean.__class__, - keras.metrics.base_metric.Mean) - - # Except for a custom function used because the loaded model is supposed to - # be using the newly registered custom function. - self.assertIs(subclassed_model.compiled_loss._losses[3], - module_my_mean_squared_error) - self.assertIs(loaded_model.compiled_loss._losses[3], my_mean_squared_error) - self.assertIsNot(module_my_mean_squared_error, my_mean_squared_error) - - def test_saving_after_fit(self): - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - subclassed_model = self._get_subclassed_model() - - x = np.random.random((100, 32)) - y = np.random.random((100, 1)) - subclassed_model.fit(x, y, epochs=1) - subclassed_model._save_new(temp_dir) - loaded_model = saving_lib.load(temp_dir) - - io_utils.enable_interactive_logging() - # `tf.print` writes to stderr. This is to make sure the custom training step - # is used. - with self.captureWritesToStream(sys.stderr) as printed: - loaded_model.fit(x, y, epochs=1) - self.assertRegex(printed.contents(), train_step_message) - - # Check that the custom classes do get used. - self.assertIsInstance(loaded_model, CustomModelX) - self.assertIsInstance(loaded_model.dense1, MyDense) - # Check that the custom method is available. - self.assertEqual(loaded_model.one(), 1) - self.assertEqual(loaded_model.dense1.two(), 2) - - # Everything should be the same class or function for the original model - # and the loaded model. - for model in [subclassed_model, loaded_model]: - self.assertIs(model.optimizer.__class__, - keras.optimizers.optimizer_v2.adam.Adam) - self.assertIs(model.compiled_loss.__class__, - keras.engine.compile_utils.LossesContainer) - self.assertIs(model.compiled_loss._losses[0].__class__, - keras.losses.LossFunctionWrapper) - self.assertIs(model.compiled_loss._losses[1].__class__, - keras.losses.LossFunctionWrapper) - self.assertIs(model.compiled_loss._losses[2].__class__, - keras.losses.MeanSquaredError) - self.assertIs(model.compiled_loss._losses[3].__class__, - keras.losses.LossFunctionWrapper) - self.assertIs(model.compiled_loss._total_loss_mean.__class__, - keras.metrics.base_metric.Mean) - - def test_saving_preserve_unbuilt_state(self): - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - subclassed_model = CustomModelX() - subclassed_model._save_new(temp_dir) - loaded_model = saving_lib.load(temp_dir) - self.assertFalse(subclassed_model.built) - self.assertFalse(loaded_model.built) - - def test_saving_preserve_built_state(self): - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - subclassed_model = self._get_subclassed_model() - x = np.random.random((100, 32)) - y = np.random.random((100, 1)) - subclassed_model.fit(x, y, epochs=1) - subclassed_model._save_new(temp_dir) - loaded_model = saving_lib.load(temp_dir) - self.assertTrue(subclassed_model.built) - self.assertTrue(loaded_model.built) - self.assertEqual(subclassed_model._build_input_shape, - loaded_model._build_input_shape) - self.assertEqual( - tf.TensorShape([None, 32]), loaded_model._build_input_shape) - - def test_saved_module_paths_and_class_names(self): - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - subclassed_model = self._get_subclassed_model() - x = np.random.random((100, 32)) - y = np.random.random((100, 1)) - subclassed_model.fit(x, y, epochs=1) - subclassed_model._save_new(temp_dir) - - file_path = os.path.join(temp_dir, saving_lib._CONFIG_FILE) - with tf.io.gfile.GFile(file_path, 'r') as f: - config_json = f.read() - config_dict = json_utils.decode(config_json) - self.assertEqual(config_dict['registered_name'], - 'my_custom_package>CustomModelX') - self.assertIsNone(config_dict['config']['optimizer']['module']) - self.assertEqual(config_dict['config']['optimizer']['class_name'], - 'keras.optimizers.Adam') - self.assertEqual(config_dict['config']['loss']['module'], - 'keras.engine.compile_utils') - self.assertEqual(config_dict['config']['loss']['class_name'], - 'LossesContainer') - - - def test_functional_model_with_tf_op_lambda_layer(self): - - class ToString: - - def __init__(self): - self.contents = '' - - def __call__(self, msg): - self.contents += msg + '\n' - - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - - inputs = keras.layers.Input(shape=(32,)) - outputs = keras.layers.Dense(1)(inputs) - outputs = outputs + inputs - functional_model = keras.Model(inputs, outputs) - functional_to_string = ToString() - functional_model.summary(print_fn=functional_to_string) - functional_model.compile(optimizer='adam', loss='mse', metrics=['mae']) - - x = np.random.random((1000, 32)) - y = np.random.random((1000, 1)) - functional_model.fit(x, y, epochs=3) - functional_model._save_new(temp_dir) - loaded_model = saving_lib.load(temp_dir) - loaded_to_string = ToString() - loaded_model.summary(print_fn=loaded_to_string) - - self.assertEqual(functional_to_string.contents, loaded_to_string.contents) - - -if __name__ == '__main__': - if tf.__internal__.tf2.enabled(): - tf.test.main() diff --git a/keras/saving/hdf5_format.py b/keras/saving/hdf5_format.py deleted file mode 100644 index cb7ef4b36069..000000000000 --- a/keras/saving/hdf5_format.py +++ /dev/null @@ -1,992 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=protected-access -"""Functions for saving and loading a Keras Model from HDF5 format.""" - -import tensorflow.compat.v2 as tf - -import json -import os - -import numpy as np - -from keras import backend -from keras.optimizers import optimizer_v1 -from keras.optimizers.optimizer_experimental import optimizer as optimizer_experimental -from keras.saving import model_config as model_config_lib -from keras.saving import saving_utils -from keras.saving.saved_model import json_utils -from keras.utils.generic_utils import LazyLoader -from keras.utils.io_utils import ask_to_proceed_with_overwrite -from tensorflow.python.platform import tf_logging as logging - - -# pylint: disable=g-import-not-at-top -try: - import h5py - HDF5_OBJECT_HEADER_LIMIT = 64512 -except ImportError: - h5py = None -# pylint: enable=g-import-not-at-top - -# TODO(b/134426265): Switch back to single-quotes to match the rest of the file -# once the issue with copybara is fixed. -# pylint:disable=g-inconsistent-quotes -sequential_lib = LazyLoader( - "sequential_lib", globals(), - "keras.engine.sequential") -# pylint:enable=g-inconsistent-quotes - - -def save_model_to_hdf5(model, filepath, overwrite=True, include_optimizer=True): - """Saves a model to a HDF5 file. - - The saved model contains: - - the model's configuration (topology) - - the model's weights - - the model's optimizer's state (if any) - - Thus the saved model can be reinstantiated in - the exact same state, without any of the code - used for model definition or training. - - Args: - model: Keras model instance to be saved. - filepath: One of the following: - - String, path where to save the model - - `h5py.File` object where to save the model - overwrite: Whether we should overwrite any existing - model at the target location, or instead - ask the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - - Raises: - ImportError: if h5py is not available. - """ - - if h5py is None: - raise ImportError('`save_model()` using h5 format requires h5py. Could not ' - 'import h5py.') - - # TODO(psv) Add warning when we save models that contain non-serializable - # entities like metrics added using `add_metric` and losses added using - # `add_loss.` - if len(model.weights) != len(model._undeduplicated_weights): - logging.warning('Found duplicated `Variable`s in Model\'s `weights`. ' - 'This is usually caused by `Variable`s being shared by ' - 'Layers in the Model. These `Variable`s will be treated ' - 'as separate `Variable`s when the Model is restored. To ' - 'avoid this, please save with `save_format="tf"`.') - - if not isinstance(filepath, h5py.File): - # If file exists and should not be overwritten. - if not overwrite and os.path.isfile(filepath): - proceed = ask_to_proceed_with_overwrite(filepath) - if not proceed: - return - - # Try creating dir if not exist - dirpath = os.path.dirname(filepath) - if not os.path.exists(dirpath): - tf.io.gfile.makedirs(dirpath) - - f = h5py.File(filepath, mode='w') - opened_new_file = True - else: - f = filepath - opened_new_file = False - - try: - model_metadata = saving_utils.model_metadata(model, include_optimizer) - for k, v in model_metadata.items(): - if isinstance(v, (dict, list, tuple)): - f.attrs[k] = json.dumps( - v, default=json_utils.get_json_type).encode('utf8') - else: - f.attrs[k] = v - - model_weights_group = f.create_group('model_weights') - save_weights_to_hdf5_group(model_weights_group, model) - - # TODO(b/128683857): Add integration tests between tf.keras and external - # Keras, to avoid breaking TF.js users. - if isinstance(model.optimizer, optimizer_experimental.Optimizer): - logging.warning('HDF5 format does not save weights of' - ' `optimizer_experimental.Optimizer`, your optimizer will' - ' be recompiled at loading time.') - elif (include_optimizer and model.optimizer and - not isinstance(model.optimizer, optimizer_v1.TFOptimizer)): - save_optimizer_weights_to_hdf5_group(f, model.optimizer) - - f.flush() - finally: - if opened_new_file: - f.close() - - -def load_model_from_hdf5(filepath, custom_objects=None, compile=True): # pylint: disable=redefined-builtin - """Loads a model saved via `save_model_to_hdf5`. - - Args: - filepath: One of the following: - - String, path to the saved model - - `h5py.File` object from which to load the model - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - compile: Boolean, whether to compile the model - after loading. - - Returns: - A Keras model instance. If an optimizer was found - as part of the saved model, the model is already - compiled. Otherwise, the model is uncompiled and - a warning will be displayed. When `compile` is set - to False, the compilation is omitted without any - warning. - - Raises: - ImportError: if h5py is not available. - ValueError: In case of an invalid savefile. - """ - if h5py is None: - raise ImportError('`load_model()` using h5 format requires h5py. Could not ' - 'import h5py.') - - if not custom_objects: - custom_objects = {} - - opened_new_file = not isinstance(filepath, h5py.File) - if opened_new_file: - f = h5py.File(filepath, mode='r') - else: - f = filepath - - model = None - try: - # instantiate model - model_config = f.attrs.get('model_config') - if model_config is None: - raise ValueError(f'No model config found in the file at {filepath}.') - if hasattr(model_config, 'decode'): - model_config = model_config.decode('utf-8') - model_config = json_utils.decode(model_config) - model = model_config_lib.model_from_config(model_config, - custom_objects=custom_objects) - - # set weights - load_weights_from_hdf5_group(f['model_weights'], model) - - if compile: - # instantiate optimizer - training_config = f.attrs.get('training_config') - if hasattr(training_config, 'decode'): - training_config = training_config.decode('utf-8') - if training_config is None: - logging.warning('No training configuration found in the save file, so ' - 'the model was *not* compiled. Compile it manually.') - return model - training_config = json_utils.decode(training_config) - - # Compile model. - model.compile(**saving_utils.compile_args_from_training_config( - training_config, custom_objects), from_serialized=True) - saving_utils.try_build_compiled_arguments(model) - - # Set optimizer weights. - if isinstance(model.optimizer, optimizer_experimental.Optimizer): - logging.warning('Loading model from HDF5 will not restore the ' - 'optimizer\'s weights, since the optimizer is an ' - 'instance of `optimizer_experimental.Optimizer`') - elif 'optimizer_weights' in f: - try: - model.optimizer._create_all_weights(model.trainable_variables) - except (NotImplementedError, AttributeError): - logging.warning( - 'Error when creating the weights of optimizer {}, making it ' - 'impossible to restore the saved optimizer state. As a result, ' - 'your model is starting with a freshly initialized optimizer.') - - optimizer_weight_values = load_optimizer_weights_from_hdf5_group(f) - try: - model.optimizer.set_weights(optimizer_weight_values) - except ValueError: - logging.warning('Error in loading the saved optimizer ' - 'state. As a result, your model is ' - 'starting with a freshly initialized ' - 'optimizer.') - finally: - if opened_new_file: - f.close() - return model - - -def preprocess_weights_for_loading(layer, - weights, - original_keras_version=None, - original_backend=None): - """Preprocess layer weights between different Keras formats. - - Converts layers weights from Keras 1 format to Keras 2 and also weights of - cuDNN layers in Keras 2. - - Args: - layer: Layer instance. - weights: List of weights values (Numpy arrays). - original_keras_version: Keras version for the weights, as a string. - original_backend: Keras backend the weights were trained with, - as a string. - - Returns: - A list of weights values (Numpy arrays). - """ - def convert_nested_bidirectional(weights): - """Converts layers nested in `Bidirectional` wrapper. - - This function uses `preprocess_weights_for_loading()` for converting - layers. - - Args: - weights: List of weights values (Numpy arrays). - - Returns: - A list of weights values (Numpy arrays). - """ - num_weights_per_layer = len(weights) // 2 - forward_weights = preprocess_weights_for_loading( - layer.forward_layer, weights[:num_weights_per_layer], - original_keras_version, original_backend) - backward_weights = preprocess_weights_for_loading( - layer.backward_layer, weights[num_weights_per_layer:], - original_keras_version, original_backend) - return forward_weights + backward_weights - - def convert_nested_time_distributed(weights): - """Converts layers nested in `TimeDistributed` wrapper. - - This function uses `preprocess_weights_for_loading()` for converting nested - layers. - - Args: - weights: List of weights values (Numpy arrays). - - Returns: - A list of weights values (Numpy arrays). - """ - return preprocess_weights_for_loading( - layer.layer, weights, original_keras_version, original_backend) - - def convert_nested_model(weights): - """Converts layers nested in `Model` or `Sequential`. - - This function uses `preprocess_weights_for_loading()` for converting nested - layers. - - Args: - weights: List of weights values (Numpy arrays). - - Returns: - A list of weights values (Numpy arrays). - """ - trainable_weights = weights[:len(layer.trainable_weights)] - non_trainable_weights = weights[len(layer.trainable_weights):] - - new_trainable_weights = [] - new_non_trainable_weights = [] - - for sublayer in layer.layers: - num_trainable_weights = len(sublayer.trainable_weights) - num_non_trainable_weights = len(sublayer.non_trainable_weights) - if sublayer.weights: - preprocessed = preprocess_weights_for_loading( - layer=sublayer, - weights=(trainable_weights[:num_trainable_weights] + - non_trainable_weights[:num_non_trainable_weights]), - original_keras_version=original_keras_version, - original_backend=original_backend) - new_trainable_weights.extend(preprocessed[:num_trainable_weights]) - new_non_trainable_weights.extend(preprocessed[num_trainable_weights:]) - - trainable_weights = trainable_weights[num_trainable_weights:] - non_trainable_weights = non_trainable_weights[ - num_non_trainable_weights:] - new_trainable_weights += layer._trainable_weights - new_non_trainable_weights += layer._non_trainable_weights - return new_trainable_weights + new_non_trainable_weights - - # Convert layers nested in Bidirectional/Model/Sequential. - # Both transformation should be ran for both Keras 1->2 conversion - # and for conversion of cuDNN layers. - if layer.__class__.__name__ == 'Bidirectional': - weights = convert_nested_bidirectional(weights) - if layer.__class__.__name__ == 'TimeDistributed': - weights = convert_nested_time_distributed(weights) - elif layer.__class__.__name__ in ['Model', 'Sequential', 'Functional']: - weights = convert_nested_model(weights) - - if original_keras_version == '1': - if layer.__class__.__name__ == 'TimeDistributed': - weights = preprocess_weights_for_loading( - layer.layer, weights, original_keras_version, original_backend) - - if layer.__class__.__name__ == 'Conv1D': - shape = weights[0].shape - # Handle Keras 1.1 format - if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: - # Legacy shape: - # (filters, input_dim, filter_length, 1) - assert shape[0] == layer.filters and shape[2:] == (layer.kernel_size[0], - 1) - weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) - weights[0] = weights[0][:, 0, :, :] - - if layer.__class__.__name__ == 'Conv2D': - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) - - if layer.__class__.__name__ == 'Conv2DTranspose': - if layer.data_format == 'channels_last': - # old: (kernel_rows, kernel_cols, stack_size, filters) - # new: (kernel_rows, kernel_cols, filters, stack_size) - weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, filters, stack_size) - weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) - - if layer.__class__.__name__ == 'Conv3D': - if layer.data_format == 'channels_first': - # old: (filters, stack_size, ...) - # new: (..., stack_size, filters) - weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) - - if layer.__class__.__name__ == 'GRU': - if len(weights) == 9: - kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) - recurrent_kernel = np.concatenate( - [weights[1], weights[4], weights[7]], axis=-1) - bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ == 'LSTM': - if len(weights) == 12: - # old: i, c, f, o - # new: i, f, c, o - kernel = np.concatenate( - [weights[0], weights[6], weights[3], weights[9]], axis=-1) - recurrent_kernel = np.concatenate( - [weights[1], weights[7], weights[4], weights[10]], axis=-1) - bias = np.concatenate( - [weights[2], weights[8], weights[5], weights[11]], axis=-1) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ == 'ConvLSTM2D': - if len(weights) == 12: - kernel = np.concatenate( - [weights[0], weights[6], weights[3], weights[9]], axis=-1) - recurrent_kernel = np.concatenate( - [weights[1], weights[7], weights[4], weights[10]], axis=-1) - bias = np.concatenate( - [weights[2], weights[8], weights[5], weights[11]], axis=-1) - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - kernel = np.transpose(kernel, (2, 3, 1, 0)) - recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) - weights = [kernel, recurrent_kernel, bias] - - conv_layers = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'] - if layer.__class__.__name__ in conv_layers: - if backend.int_shape(layer.weights[0]) != weights[0].shape: - weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) - if layer.__class__.__name__ == 'ConvLSTM2D': - weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) - - # convert cuDNN layers - return _convert_rnn_weights(layer, weights) - - -def _convert_rnn_weights(layer, weights): - """Converts weights for RNN layers between native and cuDNN format. - - Input kernels for each gate are transposed and converted between Fortran - and C layout, recurrent kernels are transposed. For LSTM biases are summed/ - split in half, for GRU biases are reshaped. - - Weights can be converted in both directions between `LSTM` and`CuDNNSLTM` - and between `CuDNNGRU` and `GRU(reset_after=True)`. Default `GRU` is not - compatible with `CuDNNGRU`. - - For missing biases in `LSTM`/`GRU` (`use_bias=False`) no conversion is made. - - Args: - layer: Target layer instance. - weights: List of source weights values (input kernels, recurrent kernels, - [biases]) (Numpy arrays). - - Returns: - A list of converted weights values (Numpy arrays). - - Raises: - ValueError: for incompatible GRU layer/weights or incompatible biases - """ - - def transform_kernels(kernels, func, n_gates): - """Transforms kernel for each gate separately using given function. - - Args: - kernels: Stacked array of kernels for individual gates. - func: Function applied to kernel of each gate. - n_gates: Number of gates (4 for LSTM, 3 for GRU). - - Returns: - Stacked array of transformed kernels. - """ - return np.hstack([func(k) for k in np.hsplit(kernels, n_gates)]) - - def transpose_input(from_cudnn): - """Makes a function that transforms input kernels from/to cuDNN format. - - It keeps the shape, but changes between the layout (Fortran/C). Eg.: - - ``` - Keras cuDNN - [[0, 1, 2], <---> [[0, 2, 4], - [3, 4, 5]] [1, 3, 5]] - ``` - - It can be passed to `transform_kernels()`. - - Args: - from_cudnn: `True` if source weights are in cuDNN format, `False` if - they're in plain Keras format. - - Returns: - Function that converts input kernel to the other format. - """ - order = 'F' if from_cudnn else 'C' - - def transform(kernel): - return kernel.T.reshape(kernel.shape, order=order) - - return transform - - target_class = layer.__class__.__name__ - - # convert the weights between CuDNNLSTM and LSTM - if target_class in ['LSTM', 'CuDNNLSTM'] and len(weights) == 3: - # determine if we're loading a CuDNNLSTM layer - # from the number of bias weights: - # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) - # if there's no bias weight in the file, skip this conversion - units = weights[1].shape[0] - bias_shape = weights[2].shape - n_gates = 4 - - if bias_shape == (2 * units * n_gates,): - source = 'CuDNNLSTM' - elif bias_shape == (units * n_gates,): - source = 'LSTM' - else: - raise ValueError('Invalid bias shape: ' + str(bias_shape)) - - def convert_lstm_weights(weights, from_cudnn=True): - """Converts the weights between CuDNNLSTM and LSTM. - - Args: - weights: Original weights. - from_cudnn: Indicates whether original weights are from cuDNN layer. - - Returns: - Updated weights compatible with LSTM. - """ - - # Transpose (and reshape) input and recurrent kernels - kernels = transform_kernels(weights[0], transpose_input(from_cudnn), - n_gates) - recurrent_kernels = transform_kernels(weights[1], lambda k: k.T, n_gates) - if from_cudnn: - # merge input and recurrent biases into a single set - biases = np.sum(np.split(weights[2], 2, axis=0), axis=0) - else: - # Split single set of biases evenly to two sets. The way of - # splitting doesn't matter as long as the two sets sum is kept. - biases = np.tile(0.5 * weights[2], 2) - return [kernels, recurrent_kernels, biases] - - if source != target_class: - weights = convert_lstm_weights(weights, from_cudnn=source == 'CuDNNLSTM') - - # convert the weights between CuDNNGRU and GRU(reset_after=True) - if target_class in ['GRU', 'CuDNNGRU'] and len(weights) == 3: - # We can determine the source of the weights from the shape of the bias. - # If there is no bias we skip the conversion since - # CuDNNGRU always has biases. - - units = weights[1].shape[0] - bias_shape = weights[2].shape - n_gates = 3 - - def convert_gru_weights(weights, from_cudnn=True): - """Converts the weights between CuDNNGRU and GRU. - - Args: - weights: Original weights. - from_cudnn: Indicates whether original weights are from cuDNN layer. - - Returns: - Updated weights compatible with GRU. - """ - - kernels = transform_kernels(weights[0], transpose_input(from_cudnn), - n_gates) - recurrent_kernels = transform_kernels(weights[1], lambda k: k.T, n_gates) - biases = np.array(weights[2]).reshape((2, -1) if from_cudnn else -1) - return [kernels, recurrent_kernels, biases] - - if bias_shape == (2 * units * n_gates,): - source = 'CuDNNGRU' - elif bias_shape == (2, units * n_gates): - source = 'GRU(reset_after=True)' - elif bias_shape == (units * n_gates,): - source = 'GRU(reset_after=False)' - else: - raise ValueError('Invalid bias shape: ' + str(bias_shape)) - - if target_class == 'CuDNNGRU': - target = 'CuDNNGRU' - elif layer.reset_after: - target = 'GRU(reset_after=True)' - else: - target = 'GRU(reset_after=False)' - - # only convert between different types - if source != target: - types = (source, target) - if 'GRU(reset_after=False)' in types: - raise ValueError('%s is not compatible with %s' % types) - if source == 'CuDNNGRU': - weights = convert_gru_weights(weights, from_cudnn=True) - elif source == 'GRU(reset_after=True)': - weights = convert_gru_weights(weights, from_cudnn=False) - - return weights - - -def save_optimizer_weights_to_hdf5_group(hdf5_group, optimizer): - """Saves optimizer weights of a optimizer to a HDF5 group. - - Args: - hdf5_group: HDF5 group. - optimizer: optimizer instance. - """ - - symbolic_weights = getattr(optimizer, 'weights') - if symbolic_weights: - weights_group = hdf5_group.create_group('optimizer_weights') - weight_names = [str(w.name).encode('utf8') for w in symbolic_weights] - save_attributes_to_hdf5_group(weights_group, 'weight_names', weight_names) - weight_values = backend.batch_get_value(symbolic_weights) - for name, val in zip(weight_names, weight_values): - param_dset = weights_group.create_dataset( - name, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - - -def load_optimizer_weights_from_hdf5_group(hdf5_group): - """Load optimizer weights from a HDF5 group. - - Args: - hdf5_group: A pointer to a HDF5 group. - - Returns: - data: List of optimizer weight names. - """ - weights_group = hdf5_group['optimizer_weights'] - optimizer_weight_names = load_attributes_from_hdf5_group( - weights_group, 'weight_names') - return [weights_group[weight_name] for weight_name in optimizer_weight_names] - - -def save_subset_weights_to_hdf5_group(f, weights): - """Save top-level weights of a model to a HDF5 group. - - Args: - f: HDF5 group. - weights: List of weight variables. - """ - weight_values = backend.batch_get_value(weights) - weight_names = [w.name.encode('utf8') for w in weights] - save_attributes_to_hdf5_group(f, 'weight_names', weight_names) - for name, val in zip(weight_names, weight_values): - param_dset = f.create_dataset(name, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - - -def save_weights_to_hdf5_group(f, model): - """Saves the weights of a list of layers to a HDF5 group. - - Args: - f: HDF5 group. - model: Model instance. - """ - from keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - save_attributes_to_hdf5_group( - f, 'layer_names', [layer.name.encode('utf8') for layer in model.layers]) - f.attrs['backend'] = backend.backend().encode('utf8') - f.attrs['keras_version'] = str(keras_version).encode('utf8') - - # Sort model layers by layer name to ensure that group names are strictly - # growing to avoid prefix issues. - for layer in sorted(model.layers, key=lambda x: x.name): - g = f.create_group(layer.name) - weights = _legacy_weights(layer) - save_subset_weights_to_hdf5_group(g, weights) - weights = model._trainable_weights + model._non_trainable_weights - g = f.create_group('top_level_model_weights') - save_subset_weights_to_hdf5_group(g, weights) - - -def load_subset_weights_from_hdf5_group(f): - """Load layer weights of a model from hdf5. - - Args: - f: A pointer to a HDF5 group. - - Returns: - List of NumPy arrays of the weight values. - - Raises: - ValueError: in case of mismatch between provided model - and weights file. - """ - weight_names = load_attributes_from_hdf5_group(f, 'weight_names') - return [np.asarray(f[weight_name]) for weight_name in weight_names] - - -def load_weights_from_hdf5_group(f, model): - """Implements topological (order-based) weight loading. - - Args: - f: A pointer to a HDF5 group. - model: Model instance. - - Raises: - ValueError: in case of mismatch between provided layers - and weights file. - """ - if 'keras_version' in f.attrs: - original_keras_version = f.attrs['keras_version'] - if hasattr(original_keras_version, 'decode'): - original_keras_version = original_keras_version.decode('utf8') - else: - original_keras_version = '1' - if 'backend' in f.attrs: - original_backend = f.attrs['backend'] - if hasattr(original_backend, 'decode'): - original_backend = original_backend.decode('utf8') - else: - original_backend = None - - filtered_layers = [] - for layer in model.layers: - weights = _legacy_weights(layer) - if weights: - filtered_layers.append(layer) - - layer_names = load_attributes_from_hdf5_group(f, 'layer_names') - filtered_layer_names = [] - for name in layer_names: - g = f[name] - weight_names = load_attributes_from_hdf5_group(g, 'weight_names') - if weight_names: - filtered_layer_names.append(name) - layer_names = filtered_layer_names - if len(layer_names) != len(filtered_layers): - raise ValueError( - f'Layer count mismatch when loading weights from file. ' - f'Model expected {len(filtered_layers)} layers, found ' - f'{len(layer_names)} saved layers.') - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - g = f[name] - layer = filtered_layers[k] - symbolic_weights = _legacy_weights(layer) - weight_values = load_subset_weights_from_hdf5_group(g) - weight_values = preprocess_weights_for_loading(layer, weight_values, - original_keras_version, - original_backend) - if len(weight_values) != len(symbolic_weights): - raise ValueError( - f'Weight count mismatch for layer #{k} (named {layer.name} in the ' - f'current model, {name} in the save file). ' - f'Layer expects {len(symbolic_weights)} weight(s). Received ' - f'{len(weight_values)} saved weight(s)') - weight_value_tuples += zip(symbolic_weights, weight_values) - - if 'top_level_model_weights' in f: - symbolic_weights = model._trainable_weights + model._non_trainable_weights - weight_values = load_subset_weights_from_hdf5_group( - f['top_level_model_weights']) - if len(weight_values) != len(symbolic_weights): - raise ValueError( - f'Weight count mismatch for top-level weights when loading weights ' - f'from file. ' - f'Model expects {len(symbolic_weights)} top-level weight(s). ' - f'Received {len(weight_values)} saved top-level weight(s)') - weight_value_tuples += zip(symbolic_weights, weight_values) - backend.batch_set_value(weight_value_tuples) - - # Perform any layer defined finalization of the layer state. - for layer in model._flatten_layers(): - layer.finalize_state() - - -def load_weights_from_hdf5_group_by_name(f, model, skip_mismatch=False): - """Implements name-based weight loading (instead of topological loading). - - Layers that have no matching name are skipped. - - Args: - f: A pointer to a HDF5 group. - model: Model instance. - skip_mismatch: Boolean, whether to skip loading of layers - where there is a mismatch in the number of weights, - or a mismatch in the shape of the weights. - - Raises: - ValueError: in case of mismatch between provided layers - and weights file and skip_match=False. - """ - if 'keras_version' in f.attrs: - original_keras_version = f.attrs['keras_version'] - if hasattr(original_keras_version, 'decode'): - original_keras_version = original_keras_version.decode('utf8') - else: - original_keras_version = '1' - if 'backend' in f.attrs: - original_backend = f.attrs['backend'] - if hasattr(original_backend, 'decode'): - original_backend = original_backend.decode('utf8') - else: - original_backend = None - - # New file format. - layer_names = load_attributes_from_hdf5_group(f, 'layer_names') - - # Reverse index of layer name to list of layers with name. - index = {} - for layer in model.layers: - if layer.name: - index.setdefault(layer.name, []).append(layer) - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - g = f[name] - weight_values = load_subset_weights_from_hdf5_group(g) - for layer in index.get(name, []): - symbolic_weights = _legacy_weights(layer) - weight_values = preprocess_weights_for_loading( - layer, weight_values, original_keras_version, original_backend) - if len(weight_values) != len(symbolic_weights): - if skip_mismatch: - logging.warning( - f'Skipping loading of weights for layer #{k} (named ' - f'{layer.name}) due to mismatch in number of weights. ' - f'Layer expects {len(symbolic_weights)} weight(s). Received ' - f'{len(weight_values)} saved weight(s)') - continue - raise ValueError( - f'Weight count mismatch for layer #{k} (named {layer.name}). ' - f'Layer expects {len(symbolic_weights)} weight(s). Received ' - f'{len(weight_values)} saved weight(s)') - # Set values. - for i in range(len(weight_values)): - expected_shape = backend.int_shape(symbolic_weights[i]) - received_shape = weight_values[i].shape - if expected_shape != received_shape: - if skip_mismatch: - logging.warning( - f'Skipping loading weights for layer #{k} (named ' - f'{layer.name}) due to mismatch in shape for weight ' - f'{symbolic_weights[i].name}. ' - f'Weight expects shape {expected_shape}. Received saved weight ' - f'with shape {received_shape}') - continue - raise ValueError( - f'Shape mismatch in layer #{k} (named {layer.name}) for weight ' - f'{symbolic_weights[i].name}. ' - f'Weight expects shape {expected_shape}. Received saved weight ' - f'with shape {received_shape}') - else: - weight_value_tuples.append((symbolic_weights[i], weight_values[i])) - - if 'top_level_model_weights' in f: - symbolic_weights = model._trainable_weights + model._non_trainable_weights - weight_values = load_subset_weights_from_hdf5_group( - f['top_level_model_weights']) - - if len(weight_values) != len(symbolic_weights): - if skip_mismatch: - logging.warning( - f'Skipping loading top-level weights for model due to mismatch ' - f'in number of weights. ' - f'Model expects {len(symbolic_weights)} top-level weight(s). ' - f'Received {len(weight_values)} saved top-level weight(s)') - else: - raise ValueError( - f'Weight count mismatch for top-level weights of model. ' - f'Model expects {len(symbolic_weights)} top-level weight(s). ' - f'Received {len(weight_values)} saved top-level weight(s)') - else: - for i in range(len(weight_values)): - expected_shape = backend.int_shape(symbolic_weights[i]) - received_shape = weight_values[i].shape - if expected_shape != received_shape: - if skip_mismatch: - logging.warning( - f'Skipping loading top-level weight for model due to ' - f'mismatch in shape for weight {symbolic_weights[i].name}. ' - f'Weight expects shape {expected_shape}. Received saved weight ' - f'with shape {received_shape}') - else: - raise ValueError( - f'Shape mismatch in model for top-level weight ' - f'{symbolic_weights[i].name}. ' - f'Weight expects shape {expected_shape}. Received saved weight ' - f'with shape {received_shape}') - else: - weight_value_tuples.append((symbolic_weights[i], weight_values[i])) - - backend.batch_set_value(weight_value_tuples) - - # Perform any layer defined finalization of the layer state. - for layer in model._flatten_layers(): - layer.finalize_state() - - -def save_attributes_to_hdf5_group(group, name, data): - """Saves attributes (data) of the specified name into the HDF5 group. - - This method deals with an inherent problem of HDF5 file which is not - able to store data larger than HDF5_OBJECT_HEADER_LIMIT bytes. - - Args: - group: A pointer to a HDF5 group. - name: A name of the attributes to save. - data: Attributes data to store. - - Raises: - RuntimeError: If any single attribute is too large to be saved. - """ - # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` - # because in that case even chunking the array would not make the saving - # possible. - bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT] - - # Expecting this to never be true. - if bad_attributes: - raise RuntimeError( - 'The following attributes cannot be saved to HDF5 file because they ' - f'are larger than {HDF5_OBJECT_HEADER_LIMIT} bytes: {bad_attributes}') - - data_npy = np.asarray(data) - - num_chunks = 1 - chunked_data = np.array_split(data_npy, num_chunks) - - # This will never loop forever thanks to the test above. - while any(x.nbytes > HDF5_OBJECT_HEADER_LIMIT for x in chunked_data): - num_chunks += 1 - chunked_data = np.array_split(data_npy, num_chunks) - - if num_chunks > 1: - for chunk_id, chunk_data in enumerate(chunked_data): - group.attrs['%s%d' % (name, chunk_id)] = chunk_data - else: - group.attrs[name] = data - - -def load_attributes_from_hdf5_group(group, name): - """Loads attributes of the specified name from the HDF5 group. - - This method deals with an inherent problem - of HDF5 file which is not able to store - data larger than HDF5_OBJECT_HEADER_LIMIT bytes. - - Args: - group: A pointer to a HDF5 group. - name: A name of the attributes to load. - - Returns: - data: Attributes data. - """ - if name in group.attrs: - data = [ - n.decode('utf8') if hasattr(n, 'decode') else n - for n in group.attrs[name] - ] - else: - data = [] - chunk_id = 0 - while '%s%d' % (name, chunk_id) in group.attrs: - data.extend([ - n.decode('utf8') if hasattr(n, 'decode') else n - for n in group.attrs['%s%d' % (name, chunk_id)] - ]) - chunk_id += 1 - return data - - -def _legacy_weights(layer): - """DO NOT USE. - - For legacy reason, the layer.weights was in the order of - [self.trainable_weights + self.non_trainable_weights], and this order was - used for preserving the weights in h5 format. The new order of layer.weights - are the same as layer.get_weights() which is more intuitive for user. To - keep supporting the existing saved h5 file, this method should be used to - save/load weights. In future version, we will delete this method and - introduce a breaking change for h5 and stay with the new order for weights. - - Args: - layer: a `tf.keras.Model` or `tf.keras.layers.Layer` instance. - - Returns: - A list of variables with the order of trainable_weights, followed by - non_trainable_weights. - """ - weights = layer.trainable_weights + layer.non_trainable_weights - if any(not isinstance(w, tf.Variable) for w in weights): - raise NotImplementedError( - f'Save or restore weights that is not an instance of `tf.Variable` is ' - f'not supported in h5, use `save_format=\'tf\'` instead. Received a ' - f'model or layer {layer.__class__.__name__} with weights {weights}') - return weights diff --git a/keras/saving/legacy/__init__.py b/keras/saving/legacy/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/keras/saving/legacy/hdf5_format.py b/keras/saving/legacy/hdf5_format.py new file mode 100644 index 000000000000..8d4a95eeaaa8 --- /dev/null +++ b/keras/saving/legacy/hdf5_format.py @@ -0,0 +1,1119 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functions for saving and loading a Keras Model from HDF5 format.""" + +import json +import os + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.optimizers import optimizer as optimizer_base +from keras.optimizers import optimizer_v1 +from keras.saving import object_registration +from keras.saving.legacy import model_config as model_config_lib +from keras.saving.legacy import saving_utils +from keras.saving.legacy.saved_model import json_utils +from keras.utils.generic_utils import LazyLoader +from keras.utils.io_utils import ask_to_proceed_with_overwrite + +# isort: off +from tensorflow.python.platform import tf_logging as logging + +try: + import h5py + + HDF5_OBJECT_HEADER_LIMIT = 64512 +except ImportError: + h5py = None + +# TODO(b/134426265): Switch back to single-quotes to match the rest of the file +# once the issue with copybara is fixed. + +sequential_lib = LazyLoader( + "sequential_lib", globals(), "keras.engine.sequential" +) + + +def save_model_to_hdf5(model, filepath, overwrite=True, include_optimizer=True): + """Saves a model to a HDF5 file. + + The saved model contains: + - the model's configuration (topology) + - the model's weights + - the model's optimizer's state (if any) + + Thus the saved model can be reinstantiated in + the exact same state, without any of the code + used for model definition or training. + + Args: + model: Keras model instance to be saved. + filepath: One of the following: + - String, path where to save the model + - `h5py.File` object where to save the model + overwrite: Whether we should overwrite any existing + model at the target location, or instead + ask the user with a manual prompt. + include_optimizer: If True, save optimizer's state together. + + Raises: + ImportError: if h5py is not available. + """ + + if h5py is None: + raise ImportError( + "`save_model()` using h5 format requires h5py. Could not " + "import h5py." + ) + + # Ensures that all models saved in HDF5 format follow the old serialization + model.use_legacy_config = True + + # TODO(psv) Add warning when we save models that contain non-serializable + # entities like metrics added using `add_metric` and losses added using + # `add_loss.` + if len(model.weights) != len(model._undeduplicated_weights): + logging.warning( + "Found duplicated `Variable`s in Model's `weights`. " + "This is usually caused by `Variable`s being shared by " + "Layers in the Model. These `Variable`s will be treated " + "as separate `Variable`s when the Model is restored. To " + 'avoid this, please save with `save_format="tf"`.' + ) + + if not isinstance(filepath, h5py.File): + # If file exists and should not be overwritten. + if not overwrite and os.path.isfile(filepath): + proceed = ask_to_proceed_with_overwrite(filepath) + if not proceed: + return + + # Try creating dir if not exist + dirpath = os.path.dirname(filepath) + if not os.path.exists(dirpath): + tf.io.gfile.makedirs(dirpath) + + f = h5py.File(filepath, mode="w") + opened_new_file = True + else: + f = filepath + opened_new_file = False + + try: + model_metadata = saving_utils.model_metadata(model, include_optimizer) + for k, v in model_metadata.items(): + if isinstance(v, (dict, list, tuple)): + f.attrs[k] = json.dumps( + v, default=json_utils.get_json_type + ).encode("utf8") + else: + f.attrs[k] = v + + model_weights_group = f.create_group("model_weights") + save_weights_to_hdf5_group(model_weights_group, model) + + # TODO(b/128683857): Add integration tests between tf.keras and external + # Keras, to avoid breaking TF.js users. + if ( + include_optimizer + and model.optimizer + and not isinstance(model.optimizer, optimizer_v1.TFOptimizer) + ): + save_optimizer_weights_to_hdf5_group(f, model.optimizer) + + f.flush() + finally: + if opened_new_file: + f.close() + + # Remove legacy serialization attribute after H5 saving complete + delattr(model, "use_legacy_config") + + +def load_model_from_hdf5(filepath, custom_objects=None, compile=True): + """Loads a model saved via `save_model_to_hdf5`. + + Args: + filepath: One of the following: + - String, path to the saved model + - `h5py.File` object from which to load the model + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + compile: Boolean, whether to compile the model + after loading. + + Returns: + A Keras model instance. If an optimizer was found + as part of the saved model, the model is already + compiled. Otherwise, the model is uncompiled and + a warning will be displayed. When `compile` is set + to False, the compilation is omitted without any + warning. + + Raises: + ImportError: if h5py is not available. + ValueError: In case of an invalid savefile. + """ + if h5py is None: + raise ImportError( + "`load_model()` using h5 format requires h5py. Could not " + "import h5py." + ) + + if not custom_objects: + custom_objects = {} + + tlco = object_registration._THREAD_LOCAL_CUSTOM_OBJECTS.__dict__ + gco = object_registration._GLOBAL_CUSTOM_OBJECTS + custom_objects = {**custom_objects, **tlco, **gco} + + opened_new_file = not isinstance(filepath, h5py.File) + if opened_new_file: + f = h5py.File(filepath, mode="r") + else: + f = filepath + + model = None + try: + # instantiate model + model_config = f.attrs.get("model_config") + if model_config is None: + raise ValueError( + f"No model config found in the file at {filepath}." + ) + if hasattr(model_config, "decode"): + model_config = model_config.decode("utf-8") + model_config = json_utils.decode(model_config) + model = model_config_lib.model_from_config( + model_config, custom_objects=custom_objects + ) + + # set weights + load_weights_from_hdf5_group(f["model_weights"], model) + + if compile: + # instantiate optimizer + training_config = f.attrs.get("training_config") + if hasattr(training_config, "decode"): + training_config = training_config.decode("utf-8") + if training_config is None: + logging.warning( + "No training configuration found in the save file, so " + "the model was *not* compiled. Compile it manually." + ) + return model + training_config = json_utils.decode(training_config) + + # Compile model. + model.compile( + **saving_utils.compile_args_from_training_config( + training_config, custom_objects + ), + from_serialized=True, + ) + saving_utils.try_build_compiled_arguments(model) + + # Set optimizer weights. + if "optimizer_weights" in f: + try: + if isinstance(model.optimizer, optimizer_base.Optimizer): + model.optimizer.build(model.trainable_variables) + else: + model.optimizer._create_all_weights( + model.trainable_variables + ) + except (NotImplementedError, AttributeError): + logging.warning( + "Error when creating the weights of optimizer {}, " + "making it impossible to restore the saved optimizer " + "state. As a result, your model is starting with " + "a freshly initialized optimizer." + ) + + optimizer_weight_values = ( + load_optimizer_weights_from_hdf5_group(f) + ) + try: + model.optimizer.set_weights(optimizer_weight_values) + except ValueError: + logging.warning( + "Error in loading the saved optimizer " + "state. As a result, your model is " + "starting with a freshly initialized " + "optimizer." + ) + finally: + if opened_new_file: + f.close() + return model + + +def preprocess_weights_for_loading( + layer, weights, original_keras_version=None, original_backend=None +): + """Preprocess layer weights between different Keras formats. + + Converts layers weights from Keras 1 format to Keras 2 and also weights of + cuDNN layers in Keras 2. + + Args: + layer: Layer instance. + weights: List of weights values (Numpy arrays). + original_keras_version: Keras version for the weights, as a string. + original_backend: Keras backend the weights were trained with, + as a string. + + Returns: + A list of weights values (Numpy arrays). + """ + + def convert_nested_bidirectional(weights): + """Converts layers nested in `Bidirectional` wrapper. + + This function uses `preprocess_weights_for_loading()` for converting + layers. + + Args: + weights: List of weights values (Numpy arrays). + + Returns: + A list of weights values (Numpy arrays). + """ + num_weights_per_layer = len(weights) // 2 + forward_weights = preprocess_weights_for_loading( + layer.forward_layer, + weights[:num_weights_per_layer], + original_keras_version, + original_backend, + ) + backward_weights = preprocess_weights_for_loading( + layer.backward_layer, + weights[num_weights_per_layer:], + original_keras_version, + original_backend, + ) + return forward_weights + backward_weights + + def convert_nested_time_distributed(weights): + """Converts layers nested in `TimeDistributed` wrapper. + + This function uses `preprocess_weights_for_loading()` for converting + nested layers. + + Args: + weights: List of weights values (Numpy arrays). + + Returns: + A list of weights values (Numpy arrays). + """ + return preprocess_weights_for_loading( + layer.layer, weights, original_keras_version, original_backend + ) + + def convert_nested_model(weights): + """Converts layers nested in `Model` or `Sequential`. + + This function uses `preprocess_weights_for_loading()` for converting + nested layers. + + Args: + weights: List of weights values (Numpy arrays). + + Returns: + A list of weights values (Numpy arrays). + """ + trainable_weights = weights[: len(layer.trainable_weights)] + non_trainable_weights = weights[len(layer.trainable_weights) :] + + new_trainable_weights = [] + new_non_trainable_weights = [] + + for sublayer in layer.layers: + num_trainable_weights = len(sublayer.trainable_weights) + num_non_trainable_weights = len(sublayer.non_trainable_weights) + if sublayer.weights: + preprocessed = preprocess_weights_for_loading( + layer=sublayer, + weights=( + trainable_weights[:num_trainable_weights] + + non_trainable_weights[:num_non_trainable_weights] + ), + original_keras_version=original_keras_version, + original_backend=original_backend, + ) + new_trainable_weights.extend( + preprocessed[:num_trainable_weights] + ) + new_non_trainable_weights.extend( + preprocessed[num_trainable_weights:] + ) + + trainable_weights = trainable_weights[num_trainable_weights:] + non_trainable_weights = non_trainable_weights[ + num_non_trainable_weights: + ] + new_trainable_weights += layer._trainable_weights + new_non_trainable_weights += layer._non_trainable_weights + return new_trainable_weights + new_non_trainable_weights + + # Convert layers nested in Bidirectional/Model/Sequential. + # Both transformation should be ran for both Keras 1->2 conversion + # and for conversion of cuDNN layers. + if layer.__class__.__name__ == "Bidirectional": + weights = convert_nested_bidirectional(weights) + if layer.__class__.__name__ == "TimeDistributed": + weights = convert_nested_time_distributed(weights) + elif layer.__class__.__name__ in ["Model", "Sequential", "Functional"]: + weights = convert_nested_model(weights) + + if original_keras_version == "1": + if layer.__class__.__name__ == "TimeDistributed": + weights = preprocess_weights_for_loading( + layer.layer, weights, original_keras_version, original_backend + ) + + if layer.__class__.__name__ == "Conv1D": + shape = weights[0].shape + # Handle Keras 1.1 format + if ( + shape[:2] != (layer.kernel_size[0], 1) + or shape[3] != layer.filters + ): + # Legacy shape: + # (filters, input_dim, filter_length, 1) + assert shape[0] == layer.filters and shape[2:] == ( + layer.kernel_size[0], + 1, + ) + weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) + weights[0] = weights[0][:, 0, :, :] + + if layer.__class__.__name__ == "Conv2D": + if layer.data_format == "channels_first": + # old: (filters, stack_size, kernel_rows, kernel_cols) + # new: (kernel_rows, kernel_cols, stack_size, filters) + weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) + + if layer.__class__.__name__ == "Conv2DTranspose": + if layer.data_format == "channels_last": + # old: (kernel_rows, kernel_cols, stack_size, filters) + # new: (kernel_rows, kernel_cols, filters, stack_size) + weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) + if layer.data_format == "channels_first": + # old: (filters, stack_size, kernel_rows, kernel_cols) + # new: (kernel_rows, kernel_cols, filters, stack_size) + weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) + + if layer.__class__.__name__ == "Conv3D": + if layer.data_format == "channels_first": + # old: (filters, stack_size, ...) + # new: (..., stack_size, filters) + weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) + + if layer.__class__.__name__ == "GRU": + if len(weights) == 9: + kernel = np.concatenate( + [weights[0], weights[3], weights[6]], axis=-1 + ) + recurrent_kernel = np.concatenate( + [weights[1], weights[4], weights[7]], axis=-1 + ) + bias = np.concatenate( + [weights[2], weights[5], weights[8]], axis=-1 + ) + weights = [kernel, recurrent_kernel, bias] + + if layer.__class__.__name__ == "LSTM": + if len(weights) == 12: + # old: i, c, f, o + # new: i, f, c, o + kernel = np.concatenate( + [weights[0], weights[6], weights[3], weights[9]], axis=-1 + ) + recurrent_kernel = np.concatenate( + [weights[1], weights[7], weights[4], weights[10]], axis=-1 + ) + bias = np.concatenate( + [weights[2], weights[8], weights[5], weights[11]], axis=-1 + ) + weights = [kernel, recurrent_kernel, bias] + + if layer.__class__.__name__ == "ConvLSTM2D": + if len(weights) == 12: + kernel = np.concatenate( + [weights[0], weights[6], weights[3], weights[9]], axis=-1 + ) + recurrent_kernel = np.concatenate( + [weights[1], weights[7], weights[4], weights[10]], axis=-1 + ) + bias = np.concatenate( + [weights[2], weights[8], weights[5], weights[11]], axis=-1 + ) + if layer.data_format == "channels_first": + # old: (filters, stack_size, kernel_rows, kernel_cols) + # new: (kernel_rows, kernel_cols, stack_size, filters) + kernel = np.transpose(kernel, (2, 3, 1, 0)) + recurrent_kernel = np.transpose( + recurrent_kernel, (2, 3, 1, 0) + ) + weights = [kernel, recurrent_kernel, bias] + + conv_layers = [ + "Conv1D", + "Conv2D", + "Conv3D", + "Conv2DTranspose", + "ConvLSTM2D", + ] + if layer.__class__.__name__ in conv_layers: + if backend.int_shape(layer.weights[0]) != weights[0].shape: + weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) + if layer.__class__.__name__ == "ConvLSTM2D": + weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) + + # convert cuDNN layers + return _convert_rnn_weights(layer, weights) + + +def _convert_rnn_weights(layer, weights): + """Converts weights for RNN layers between native and cuDNN format. + + Input kernels for each gate are transposed and converted between Fortran + and C layout, recurrent kernels are transposed. For LSTM biases are summed/ + split in half, for GRU biases are reshaped. + + Weights can be converted in both directions between `LSTM` and`CuDNNSLTM` + and between `CuDNNGRU` and `GRU(reset_after=True)`. Default `GRU` is not + compatible with `CuDNNGRU`. + + For missing biases in `LSTM`/`GRU` (`use_bias=False`) no conversion is made. + + Args: + layer: Target layer instance. + weights: List of source weights values (input kernels, recurrent + kernels, [biases]) (Numpy arrays). + + Returns: + A list of converted weights values (Numpy arrays). + + Raises: + ValueError: for incompatible GRU layer/weights or incompatible biases + """ + + def transform_kernels(kernels, func, n_gates): + """Transforms kernel for each gate separately using given function. + + Args: + kernels: Stacked array of kernels for individual gates. + func: Function applied to kernel of each gate. + n_gates: Number of gates (4 for LSTM, 3 for GRU). + + Returns: + Stacked array of transformed kernels. + """ + return np.hstack([func(k) for k in np.hsplit(kernels, n_gates)]) + + def transpose_input(from_cudnn): + """Makes a function that transforms input kernels from/to cuDNN format. + + It keeps the shape, but changes between the layout (Fortran/C). Eg.: + + ``` + Keras cuDNN + [[0, 1, 2], <---> [[0, 2, 4], + [3, 4, 5]] [1, 3, 5]] + ``` + + It can be passed to `transform_kernels()`. + + Args: + from_cudnn: `True` if source weights are in cuDNN format, `False` if + they're in plain Keras format. + + Returns: + Function that converts input kernel to the other format. + """ + order = "F" if from_cudnn else "C" + + def transform(kernel): + return kernel.T.reshape(kernel.shape, order=order) + + return transform + + target_class = layer.__class__.__name__ + + # convert the weights between CuDNNLSTM and LSTM + if target_class in ["LSTM", "CuDNNLSTM"] and len(weights) == 3: + # determine if we're loading a CuDNNLSTM layer + # from the number of bias weights: + # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) + # if there's no bias weight in the file, skip this conversion + units = weights[1].shape[0] + bias_shape = weights[2].shape + n_gates = 4 + + if bias_shape == (2 * units * n_gates,): + source = "CuDNNLSTM" + elif bias_shape == (units * n_gates,): + source = "LSTM" + else: + raise ValueError("Invalid bias shape: " + str(bias_shape)) + + def convert_lstm_weights(weights, from_cudnn=True): + """Converts the weights between CuDNNLSTM and LSTM. + + Args: + weights: Original weights. + from_cudnn: Indicates whether original weights are from cuDNN + layer. + + Returns: + Updated weights compatible with LSTM. + """ + + # Transpose (and reshape) input and recurrent kernels + kernels = transform_kernels( + weights[0], transpose_input(from_cudnn), n_gates + ) + recurrent_kernels = transform_kernels( + weights[1], lambda k: k.T, n_gates + ) + if from_cudnn: + # merge input and recurrent biases into a single set + biases = np.sum(np.split(weights[2], 2, axis=0), axis=0) + else: + # Split single set of biases evenly to two sets. The way of + # splitting doesn't matter as long as the two sets sum is kept. + biases = np.tile(0.5 * weights[2], 2) + return [kernels, recurrent_kernels, biases] + + if source != target_class: + weights = convert_lstm_weights( + weights, from_cudnn=source == "CuDNNLSTM" + ) + + # convert the weights between CuDNNGRU and GRU(reset_after=True) + if target_class in ["GRU", "CuDNNGRU"] and len(weights) == 3: + # We can determine the source of the weights from the shape of the bias. + # If there is no bias we skip the conversion since + # CuDNNGRU always has biases. + + units = weights[1].shape[0] + bias_shape = weights[2].shape + n_gates = 3 + + def convert_gru_weights(weights, from_cudnn=True): + """Converts the weights between CuDNNGRU and GRU. + + Args: + weights: Original weights. + from_cudnn: Indicates whether original weights are from cuDNN + layer. + + Returns: + Updated weights compatible with GRU. + """ + + kernels = transform_kernels( + weights[0], transpose_input(from_cudnn), n_gates + ) + recurrent_kernels = transform_kernels( + weights[1], lambda k: k.T, n_gates + ) + biases = np.array(weights[2]).reshape((2, -1) if from_cudnn else -1) + return [kernels, recurrent_kernels, biases] + + if bias_shape == (2 * units * n_gates,): + source = "CuDNNGRU" + elif bias_shape == (2, units * n_gates): + source = "GRU(reset_after=True)" + elif bias_shape == (units * n_gates,): + source = "GRU(reset_after=False)" + else: + raise ValueError("Invalid bias shape: " + str(bias_shape)) + + if target_class == "CuDNNGRU": + target = "CuDNNGRU" + elif layer.reset_after: + target = "GRU(reset_after=True)" + else: + target = "GRU(reset_after=False)" + + # only convert between different types + if source != target: + types = (source, target) + if "GRU(reset_after=False)" in types: + raise ValueError("%s is not compatible with %s" % types) + if source == "CuDNNGRU": + weights = convert_gru_weights(weights, from_cudnn=True) + elif source == "GRU(reset_after=True)": + weights = convert_gru_weights(weights, from_cudnn=False) + + return weights + + +def save_optimizer_weights_to_hdf5_group(hdf5_group, optimizer): + """Saves optimizer weights of a optimizer to a HDF5 group. + + Args: + hdf5_group: HDF5 group. + optimizer: optimizer instance. + """ + if isinstance(optimizer, optimizer_base.Optimizer): + symbolic_weights = optimizer.variables + else: + symbolic_weights = getattr(optimizer, "weights") + if symbolic_weights: + weights_group = hdf5_group.create_group("optimizer_weights") + weight_names = [str(w.name).encode("utf8") for w in symbolic_weights] + save_attributes_to_hdf5_group( + weights_group, "weight_names", weight_names + ) + weight_values = backend.batch_get_value(symbolic_weights) + for name, val in zip(weight_names, weight_values): + param_dset = weights_group.create_dataset( + name, val.shape, dtype=val.dtype + ) + if not val.shape: + # scalar + param_dset[()] = val + else: + param_dset[:] = val + + +def load_optimizer_weights_from_hdf5_group(hdf5_group): + """Load optimizer weights from a HDF5 group. + + Args: + hdf5_group: A pointer to a HDF5 group. + + Returns: + data: List of optimizer weight names. + """ + weights_group = hdf5_group["optimizer_weights"] + optimizer_weight_names = load_attributes_from_hdf5_group( + weights_group, "weight_names" + ) + return [ + weights_group[weight_name] for weight_name in optimizer_weight_names + ] + + +def save_subset_weights_to_hdf5_group(f, weights): + """Save top-level weights of a model to a HDF5 group. + + Args: + f: HDF5 group. + weights: List of weight variables. + """ + weight_values = backend.batch_get_value(weights) + weight_names = [w.name.encode("utf8") for w in weights] + save_attributes_to_hdf5_group(f, "weight_names", weight_names) + for name, val in zip(weight_names, weight_values): + param_dset = f.create_dataset(name, val.shape, dtype=val.dtype) + if not val.shape: + # scalar + param_dset[()] = val + else: + param_dset[:] = val + + +def save_weights_to_hdf5_group(f, model): + """Saves the weights of a list of layers to a HDF5 group. + + Args: + f: HDF5 group. + model: Model instance. + """ + from keras import __version__ as keras_version + + save_attributes_to_hdf5_group( + f, "layer_names", [layer.name.encode("utf8") for layer in model.layers] + ) + f.attrs["backend"] = backend.backend().encode("utf8") + f.attrs["keras_version"] = str(keras_version).encode("utf8") + + # Sort model layers by layer name to ensure that group names are strictly + # growing to avoid prefix issues. + for layer in sorted(model.layers, key=lambda x: x.name): + g = f.create_group(layer.name) + weights = _legacy_weights(layer) + save_subset_weights_to_hdf5_group(g, weights) + weights = model._trainable_weights + model._non_trainable_weights + g = f.create_group("top_level_model_weights") + save_subset_weights_to_hdf5_group(g, weights) + + +def load_subset_weights_from_hdf5_group(f): + """Load layer weights of a model from hdf5. + + Args: + f: A pointer to a HDF5 group. + + Returns: + List of NumPy arrays of the weight values. + + Raises: + ValueError: in case of mismatch between provided model + and weights file. + """ + weight_names = load_attributes_from_hdf5_group(f, "weight_names") + return [np.asarray(f[weight_name]) for weight_name in weight_names] + + +def load_weights_from_hdf5_group(f, model): + """Implements topological (order-based) weight loading. + + Args: + f: A pointer to a HDF5 group. + model: Model instance. + + Raises: + ValueError: in case of mismatch between provided layers + and weights file. + """ + if "keras_version" in f.attrs: + original_keras_version = f.attrs["keras_version"] + if hasattr(original_keras_version, "decode"): + original_keras_version = original_keras_version.decode("utf8") + else: + original_keras_version = "1" + if "backend" in f.attrs: + original_backend = f.attrs["backend"] + if hasattr(original_backend, "decode"): + original_backend = original_backend.decode("utf8") + else: + original_backend = None + + filtered_layers = [] + for layer in model.layers: + weights = _legacy_weights(layer) + if weights: + filtered_layers.append(layer) + + layer_names = load_attributes_from_hdf5_group(f, "layer_names") + filtered_layer_names = [] + for name in layer_names: + g = f[name] + weight_names = load_attributes_from_hdf5_group(g, "weight_names") + if weight_names: + filtered_layer_names.append(name) + layer_names = filtered_layer_names + if len(layer_names) != len(filtered_layers): + raise ValueError( + "Layer count mismatch when loading weights from file. " + f"Model expected {len(filtered_layers)} layers, found " + f"{len(layer_names)} saved layers." + ) + + # We batch weight value assignments in a single backend call + # which provides a speedup in TensorFlow. + weight_value_tuples = [] + for k, name in enumerate(layer_names): + g = f[name] + layer = filtered_layers[k] + symbolic_weights = _legacy_weights(layer) + weight_values = load_subset_weights_from_hdf5_group(g) + weight_values = preprocess_weights_for_loading( + layer, weight_values, original_keras_version, original_backend + ) + if len(weight_values) != len(symbolic_weights): + raise ValueError( + f"Weight count mismatch for layer #{k} (named {layer.name} in " + f"the current model, {name} in the save file). " + f"Layer expects {len(symbolic_weights)} weight(s). Received " + f"{len(weight_values)} saved weight(s)" + ) + weight_value_tuples += zip(symbolic_weights, weight_values) + + if "top_level_model_weights" in f: + symbolic_weights = ( + model._trainable_weights + model._non_trainable_weights + ) + weight_values = load_subset_weights_from_hdf5_group( + f["top_level_model_weights"] + ) + if len(weight_values) != len(symbolic_weights): + raise ValueError( + "Weight count mismatch for top-level weights when loading " + "weights from file. " + f"Model expects {len(symbolic_weights)} top-level weight(s). " + f"Received {len(weight_values)} saved top-level weight(s)" + ) + weight_value_tuples += zip(symbolic_weights, weight_values) + backend.batch_set_value(weight_value_tuples) + + # Perform any layer defined finalization of the layer state. + for layer in model._flatten_layers(): + layer.finalize_state() + + +def load_weights_from_hdf5_group_by_name(f, model, skip_mismatch=False): + """Implements name-based weight loading (instead of topological loading). + + Layers that have no matching name are skipped. + + Args: + f: A pointer to a HDF5 group. + model: Model instance. + skip_mismatch: Boolean, whether to skip loading of layers + where there is a mismatch in the number of weights, + or a mismatch in the shape of the weights. + + Raises: + ValueError: in case of mismatch between provided layers + and weights file and skip_match=False. + """ + if "keras_version" in f.attrs: + original_keras_version = f.attrs["keras_version"] + if hasattr(original_keras_version, "decode"): + original_keras_version = original_keras_version.decode("utf8") + else: + original_keras_version = "1" + if "backend" in f.attrs: + original_backend = f.attrs["backend"] + if hasattr(original_backend, "decode"): + original_backend = original_backend.decode("utf8") + else: + original_backend = None + + # New file format. + layer_names = load_attributes_from_hdf5_group(f, "layer_names") + + # Reverse index of layer name to list of layers with name. + index = {} + for layer in model.layers: + if layer.name: + index.setdefault(layer.name, []).append(layer) + + # We batch weight value assignments in a single backend call + # which provides a speedup in TensorFlow. + weight_value_tuples = [] + for k, name in enumerate(layer_names): + g = f[name] + weight_values = load_subset_weights_from_hdf5_group(g) + for layer in index.get(name, []): + symbolic_weights = _legacy_weights(layer) + weight_values = preprocess_weights_for_loading( + layer, weight_values, original_keras_version, original_backend + ) + if len(weight_values) != len(symbolic_weights): + if skip_mismatch: + logging.warning( + f"Skipping loading of weights for layer #{k} (named " + f"{layer.name}) due to mismatch in number of weights. " + f"Layer expects {len(symbolic_weights)} weight(s). " + f"Received {len(weight_values)} saved weight(s)" + ) + continue + raise ValueError( + f"Weight count mismatch for layer #{k} " + f"(named {layer.name}). " + f"Layer expects {len(symbolic_weights)} weight(s). " + f"Received {len(weight_values)} saved weight(s)" + ) + # Set values. + for i in range(len(weight_values)): + expected_shape = backend.int_shape(symbolic_weights[i]) + received_shape = weight_values[i].shape + if expected_shape != received_shape: + if skip_mismatch: + logging.warning( + f"Skipping loading weights for layer #{k} (named " + f"{layer.name}) due to mismatch in shape for " + f"weight {symbolic_weights[i].name}. " + f"Weight expects shape {expected_shape}. " + "Received saved weight " + f"with shape {received_shape}" + ) + continue + raise ValueError( + f"Shape mismatch in layer #{k} (named {layer.name}) " + f"for weight {symbolic_weights[i].name}. " + f"Weight expects shape {expected_shape}. " + "Received saved weight " + f"with shape {received_shape}" + ) + else: + weight_value_tuples.append( + (symbolic_weights[i], weight_values[i]) + ) + + if "top_level_model_weights" in f: + symbolic_weights = ( + model._trainable_weights + model._non_trainable_weights + ) + weight_values = load_subset_weights_from_hdf5_group( + f["top_level_model_weights"] + ) + + if len(weight_values) != len(symbolic_weights): + if skip_mismatch: + logging.warning( + "Skipping loading top-level weights for model due to " + "mismatch in number of weights. " + f"Model expects {len(symbolic_weights)} " + "top-level weight(s). " + f"Received {len(weight_values)} saved top-level weight(s)" + ) + else: + raise ValueError( + "Weight count mismatch for top-level weights of model. " + f"Model expects {len(symbolic_weights)} " + "top-level weight(s). " + f"Received {len(weight_values)} saved top-level weight(s)" + ) + else: + for i in range(len(weight_values)): + expected_shape = backend.int_shape(symbolic_weights[i]) + received_shape = weight_values[i].shape + if expected_shape != received_shape: + if skip_mismatch: + logging.warning( + "Skipping loading top-level weight for model due " + "to mismatch in shape for " + f"weight {symbolic_weights[i].name}. " + f"Weight expects shape {expected_shape}. " + "Received saved weight " + f"with shape {received_shape}" + ) + else: + raise ValueError( + "Shape mismatch in model for top-level weight " + f"{symbolic_weights[i].name}. " + f"Weight expects shape {expected_shape}. " + "Received saved weight " + f"with shape {received_shape}" + ) + else: + weight_value_tuples.append( + (symbolic_weights[i], weight_values[i]) + ) + + backend.batch_set_value(weight_value_tuples) + + # Perform any layer defined finalization of the layer state. + for layer in model._flatten_layers(): + layer.finalize_state() + + +def save_attributes_to_hdf5_group(group, name, data): + """Saves attributes (data) of the specified name into the HDF5 group. + + This method deals with an inherent problem of HDF5 file which is not + able to store data larger than HDF5_OBJECT_HEADER_LIMIT bytes. + + Args: + group: A pointer to a HDF5 group. + name: A name of the attributes to save. + data: Attributes data to store. + + Raises: + RuntimeError: If any single attribute is too large to be saved. + """ + # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` + # because in that case even chunking the array would not make the saving + # possible. + bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT] + + # Expecting this to never be true. + if bad_attributes: + raise RuntimeError( + "The following attributes cannot be saved to HDF5 file because " + f"they are larger than {HDF5_OBJECT_HEADER_LIMIT} " + f"bytes: {bad_attributes}" + ) + + data_npy = np.asarray(data) + + num_chunks = 1 + chunked_data = np.array_split(data_npy, num_chunks) + + # This will never loop forever thanks to the test above. + while any(x.nbytes > HDF5_OBJECT_HEADER_LIMIT for x in chunked_data): + num_chunks += 1 + chunked_data = np.array_split(data_npy, num_chunks) + + if num_chunks > 1: + for chunk_id, chunk_data in enumerate(chunked_data): + group.attrs["%s%d" % (name, chunk_id)] = chunk_data + else: + group.attrs[name] = data + + +def load_attributes_from_hdf5_group(group, name): + """Loads attributes of the specified name from the HDF5 group. + + This method deals with an inherent problem + of HDF5 file which is not able to store + data larger than HDF5_OBJECT_HEADER_LIMIT bytes. + + Args: + group: A pointer to a HDF5 group. + name: A name of the attributes to load. + + Returns: + data: Attributes data. + """ + if name in group.attrs: + data = [ + n.decode("utf8") if hasattr(n, "decode") else n + for n in group.attrs[name] + ] + else: + data = [] + chunk_id = 0 + while "%s%d" % (name, chunk_id) in group.attrs: + data.extend( + [ + n.decode("utf8") if hasattr(n, "decode") else n + for n in group.attrs["%s%d" % (name, chunk_id)] + ] + ) + chunk_id += 1 + return data + + +def _legacy_weights(layer): + """DO NOT USE. + + For legacy reason, the layer.weights was in the order of + [self.trainable_weights + self.non_trainable_weights], and this order was + used for preserving the weights in h5 format. The new order of layer.weights + are the same as layer.get_weights() which is more intuitive for user. To + keep supporting the existing saved h5 file, this method should be used to + save/load weights. In future version, we will delete this method and + introduce a breaking change for h5 and stay with the new order for weights. + + Args: + layer: a `tf.keras.Model` or `tf.keras.layers.Layer` instance. + + Returns: + A list of variables with the order of trainable_weights, followed by + non_trainable_weights. + """ + weights = layer.trainable_weights + layer.non_trainable_weights + if any(not isinstance(w, tf.Variable) for w in weights): + raise NotImplementedError( + "Save or restore weights that is not an instance of `tf.Variable` " + "is not supported in h5, use `save_format='tf'` instead. Received " + f"a model or layer {layer.__class__.__name__} " + f"with weights {weights}" + ) + return weights diff --git a/keras/saving/legacy/losses_serialization_test.py b/keras/saving/legacy/losses_serialization_test.py new file mode 100644 index 000000000000..3a4df6ad84b5 --- /dev/null +++ b/keras/saving/legacy/losses_serialization_test.py @@ -0,0 +1,213 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras losses serialization.""" + +import os +import shutil + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras import layers +from keras import losses +from keras.optimizers import legacy as optimizer_legacy +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import losses_utils + +try: + import h5py +except ImportError: + h5py = None + + +# Custom loss class +class MyMeanAbsoluteError(losses.LossFunctionWrapper): + def __init__( + self, + reduction=losses_utils.ReductionV2.AUTO, + name="mean_absolute_error", + ): + super().__init__(my_mae, name=name, reduction=reduction) + + +# Custom loss function +def my_mae(y_true, y_pred): + return keras.backend.mean(tf.abs(y_pred - y_true), axis=-1) + + +def _get_multi_io_model(): + inp_1 = layers.Input(shape=(1,), name="input_1") + inp_2 = layers.Input(shape=(1,), name="input_2") + d = test_utils.Bias(name="output") + out_1 = d(inp_1) + out_2 = d(inp_2) + return keras.Model([inp_1, inp_2], [out_1, out_2]) + + +@test_combinations.run_all_keras_modes +@parameterized.named_parameters( + [ + dict(testcase_name="string", value="mae"), + dict(testcase_name="built_in_fn", value=losses.mae), + dict(testcase_name="built_in_class", value=losses.MeanAbsoluteError()), + dict(testcase_name="custom_fn", value=my_mae), + dict(testcase_name="custom_class", value=MyMeanAbsoluteError()), + dict(testcase_name="list_of_strings", value=["mae", "mae"]), + dict( + testcase_name="list_of_built_in_fns", value=[losses.mae, losses.mae] + ), + dict( + testcase_name="list_of_built_in_classes", + value=[losses.MeanAbsoluteError(), losses.MeanAbsoluteError()], + ), + dict(testcase_name="list_of_custom_fns", value=[my_mae, my_mae]), + dict( + testcase_name="list_of_custom_classes", + value=[MyMeanAbsoluteError(), MyMeanAbsoluteError()], + ), + dict( + testcase_name="dict_of_string", + value={ + "output": "mae", + "output_1": "mae", + }, + ), + dict( + testcase_name="dict_of_built_in_fn", + value={ + "output": losses.mae, + "output_1": losses.mae, + }, + ), + dict( + testcase_name="dict_of_built_in_class", + value={ + "output": losses.MeanAbsoluteError(), + "output_1": losses.MeanAbsoluteError(), + }, + ), + dict( + testcase_name="dict_of_custom_fn", + value={"output": my_mae, "output_1": my_mae}, + ), + dict( + testcase_name="dict_of_custom_class", + value={ + "output": MyMeanAbsoluteError(), + "output_1": MyMeanAbsoluteError(), + }, + ), + ] +) +class LossesSerialization(test_combinations.TestCase): + def setUp(self): + super(LossesSerialization, self).setUp() + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir) + self.model_filename = os.path.join(tmpdir, "tmp_model_loss.h5") + self.x = np.array([[0.0], [1.0], [2.0]], dtype="float32") + self.y = np.array([[0.5], [2.0], [3.5]], dtype="float32") + self.w = np.array([1.25, 0.5, 1.25], dtype="float32") + + def test_serializing_model_with_loss_with_custom_object_scope(self, value): + with keras.utils.custom_object_scope( + { + "MyMeanAbsoluteError": MyMeanAbsoluteError, + "my_mae": my_mae, + "Bias": test_utils.Bias, + } + ): + model = _get_multi_io_model() + model.compile( + optimizer_legacy.gradient_descent.SGD(0.1), + loss=value, + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + [self.x, self.x], + [self.y, self.y], + batch_size=3, + epochs=3, + sample_weight=[self.w, self.w], + ) + + # Assert training. + self.assertAllClose(history.history["loss"], [2.0, 1.6, 1.2], 1e-3) + eval_results = model.evaluate( + [self.x, self.x], + [self.y, self.y], + sample_weight=[self.w, self.w], + ) + + if h5py is None: + return + model.save(self.model_filename) + loaded_model = keras.models.load_model(self.model_filename) + loaded_model.predict([self.x, self.x]) + loaded_eval_results = loaded_model.evaluate( + [self.x, self.x], + [self.y, self.y], + sample_weight=[self.w, self.w], + ) + + # Assert all evaluation results are the same. + self.assertAllClose(eval_results, loaded_eval_results, 1e-9) + + def test_serializing_model_with_loss_with_custom_objects(self, value): + model = _get_multi_io_model() + model.compile( + optimizer_legacy.gradient_descent.SGD(0.1), + loss=value, + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + [self.x, self.x], + [self.y, self.y], + batch_size=3, + epochs=3, + sample_weight=[self.w, self.w], + ) + + # Assert training. + self.assertAllClose(history.history["loss"], [2.0, 1.6, 1.2], 1e-3) + eval_results = model.evaluate( + [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w] + ) + + if h5py is None: + return + model.save(self.model_filename) + loaded_model = keras.models.load_model( + self.model_filename, + custom_objects={ + "MyMeanAbsoluteError": MyMeanAbsoluteError, + "my_mae": my_mae, + "Bias": test_utils.Bias, + }, + ) + loaded_model.predict([self.x, self.x]) + loaded_eval_results = loaded_model.evaluate( + [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w] + ) + + # Assert all evaluation results are the same. + self.assertAllClose(eval_results, loaded_eval_results, 1e-9) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/legacy/metrics_serialization_test.py b/keras/saving/legacy/metrics_serialization_test.py new file mode 100644 index 000000000000..9956657d0440 --- /dev/null +++ b/keras/saving/legacy/metrics_serialization_test.py @@ -0,0 +1,278 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras metrics serialization.""" + +import os +import shutil + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras import layers +from keras import metrics +from keras.optimizers import legacy as optimizer_legacy +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import custom_object_scope + +try: + import h5py +except ImportError: + h5py = None + + +# Custom metric +class MyMeanAbsoluteError(metrics.MeanMetricWrapper): + def __init__(self, name="my_mae", dtype=None): + super().__init__(_my_mae, name, dtype=dtype) + + +# Custom metric function +def _my_mae(y_true, y_pred): + return keras.backend.mean(tf.abs(y_pred - y_true), axis=-1) + + +def _get_multi_io_model(): + inp_1 = layers.Input(shape=(1,), name="input_1") + inp_2 = layers.Input(shape=(1,), name="input_2") + d = test_utils.Bias(name="output") + out_1 = d(inp_1) + out_2 = d(inp_2) + return keras.Model([inp_1, inp_2], [out_1, out_2]) + + +@test_combinations.run_all_keras_modes +@parameterized.named_parameters( + dict(testcase_name="string", value=["mae"]), + dict(testcase_name="built_in_fn", value=[metrics.mae]), + dict(testcase_name="built_in_class", value=[metrics.MeanAbsoluteError]), + dict(testcase_name="custom_fn", value=[_my_mae]), + dict(testcase_name="custom_class", value=[MyMeanAbsoluteError]), + dict( + testcase_name="list_of_built_in_fn_and_list", + value=[metrics.mae, [metrics.mae]], + ), + dict( + testcase_name="list_of_built_in_class_and_list", + value=[metrics.MeanAbsoluteError, [metrics.MeanAbsoluteError]], + ), + dict( + testcase_name="list_of_custom_fn_and_list", value=[_my_mae, [_my_mae]] + ), + dict( + testcase_name="list_of_custom_class_and_list", + value=[MyMeanAbsoluteError, [MyMeanAbsoluteError]], + ), + dict( + testcase_name="list_of_lists_of_custom_fns", + value=[[_my_mae], [_my_mae, "mae"]], + ), + dict( + testcase_name="list_of_lists_of_custom_classes", + value=[[MyMeanAbsoluteError], [MyMeanAbsoluteError, "mae"]], + ), + dict( + testcase_name="dict_of_list_of_string", + value={ + "output": ["mae"], + "output_1": ["mae"], + }, + ), + dict( + testcase_name="dict_of_list_of_built_in_fn", + value={ + "output": [metrics.mae], + "output_1": [metrics.mae], + }, + ), + dict( + testcase_name="dict_of_list_of_built_in_class", + value={ + "output": [metrics.MeanAbsoluteError], + "output_1": [metrics.MeanAbsoluteError], + }, + ), + dict( + testcase_name="dict_of_list_of_custom_fn", + value={ + "output": [_my_mae], + "output_1": [_my_mae], + }, + ), + dict( + testcase_name="dict_of_list_of_custom_class", + value={ + "output": [MyMeanAbsoluteError], + "output_1": [MyMeanAbsoluteError], + }, + ), + dict( + testcase_name="dict_of_string", + value={ + "output": "mae", + "output_1": "mae", + }, + ), + dict( + testcase_name="dict_of_built_in_fn", + value={ + "output": metrics.mae, + "output_1": metrics.mae, + }, + ), + dict( + testcase_name="dict_of_built_in_class", + value={ + "output": metrics.MeanAbsoluteError, + "output_1": metrics.MeanAbsoluteError, + }, + ), + dict( + testcase_name="dict_of_custom_fn", + value={"output": _my_mae, "output_1": _my_mae}, + ), + dict( + testcase_name="dict_of_custom_class", + value={ + "output": MyMeanAbsoluteError, + "output_1": MyMeanAbsoluteError, + }, + ), +) +class MetricsSerialization(test_combinations.TestCase): + def setUp(self): + super(MetricsSerialization, self).setUp() + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir) + self.model_filename = os.path.join(tmpdir, "tmp_model_metric.h5") + self.x = np.array([[0.0], [1.0], [2.0]], dtype="float32") + self.y = np.array([[0.5], [2.0], [3.5]], dtype="float32") + self.w = np.array([1.25, 0.5, 1.25], dtype="float32") + + def test_serializing_model_with_metric_with_custom_object_scope( + self, value + ): + def get_instance(x): + if isinstance(x, str): + return x + if isinstance(x, type) and issubclass(x, metrics.Metric): + return x() + return x + + metric_input = tf.nest.map_structure(get_instance, value) + weighted_metric_input = tf.nest.map_structure(get_instance, value) + + with custom_object_scope( + { + "MyMeanAbsoluteError": MyMeanAbsoluteError, + "_my_mae": _my_mae, + "Bias": test_utils.Bias, + } + ): + model = _get_multi_io_model() + model.compile( + optimizer_legacy.gradient_descent.SGD(0.1), + "mae", + metrics=metric_input, + weighted_metrics=weighted_metric_input, + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + [self.x, self.x], + [self.y, self.y], + batch_size=3, + epochs=3, + sample_weight=[self.w, self.w], + ) + + # Assert training. + self.assertAllClose(history.history["loss"], [2.0, 1.6, 1.2], 1e-3) + eval_results = model.evaluate( + [self.x, self.x], + [self.y, self.y], + sample_weight=[self.w, self.w], + ) + + if h5py is None: + return + model.save(self.model_filename) + loaded_model = keras.models.load_model(self.model_filename) + loaded_model.predict([self.x, self.x]) + loaded_eval_results = loaded_model.evaluate( + [self.x, self.x], + [self.y, self.y], + sample_weight=[self.w, self.w], + ) + + # Assert all evaluation results are the same. + self.assertAllClose(eval_results, loaded_eval_results, 1e-9) + + def test_serializing_model_with_metric_with_custom_objects(self, value): + def get_instance(x): + if isinstance(x, str): + return x + if isinstance(x, type) and issubclass(x, metrics.Metric): + return x() + return x + + metric_input = tf.nest.map_structure(get_instance, value) + weighted_metric_input = tf.nest.map_structure(get_instance, value) + + model = _get_multi_io_model() + model.compile( + optimizer_legacy.gradient_descent.SGD(0.1), + "mae", + metrics=metric_input, + weighted_metrics=weighted_metric_input, + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + [self.x, self.x], + [self.y, self.y], + batch_size=3, + epochs=3, + sample_weight=[self.w, self.w], + ) + + # Assert training. + self.assertAllClose(history.history["loss"], [2.0, 1.6, 1.2], 1e-3) + eval_results = model.evaluate( + [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w] + ) + + if h5py is None: + return + model.save(self.model_filename) + loaded_model = keras.models.load_model( + self.model_filename, + custom_objects={ + "MyMeanAbsoluteError": MyMeanAbsoluteError, + "_my_mae": _my_mae, + "Bias": test_utils.Bias, + }, + ) + loaded_model.predict([self.x, self.x]) + loaded_eval_results = loaded_model.evaluate( + [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w] + ) + + # Assert all evaluation results are the same. + self.assertAllClose(eval_results, loaded_eval_results, 1e-9) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/legacy/model_config.py b/keras/saving/legacy/model_config.py new file mode 100644 index 000000000000..a916289b3ab6 --- /dev/null +++ b/keras/saving/legacy/model_config.py @@ -0,0 +1,125 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functions that save the model's config into different formats.""" + +# isort: off + +import threading +from tensorflow.python.util.tf_export import keras_export +from keras.saving.legacy import serialization + +MODULE_OBJECTS = threading.local() + + +@keras_export("keras.models.model_from_config") +def model_from_config(config, custom_objects=None): + """Instantiates a Keras model from its config. + + Usage: + ``` + # for a Functional API model + tf.keras.Model().from_config(model.get_config()) + + # for a Sequential model + tf.keras.Sequential().from_config(model.get_config()) + ``` + + Args: + config: Configuration dictionary. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + + Returns: + A Keras model instance (uncompiled). + + Raises: + TypeError: if `config` is not a dictionary. + """ + if isinstance(config, list): + raise TypeError( + "`model_from_config` expects a dictionary, not a list. " + f"Received: config={config}. Did you meant to use " + "`Sequential.from_config(config)`?" + ) + from keras import layers + + global MODULE_OBJECTS + + if not hasattr(MODULE_OBJECTS, "ALL_OBJECTS"): + layers.serialization.populate_deserializable_objects() + MODULE_OBJECTS.ALL_OBJECTS = layers.serialization.LOCAL.ALL_OBJECTS + + return serialization.deserialize_keras_object( + config, + module_objects=MODULE_OBJECTS.ALL_OBJECTS, + custom_objects=custom_objects, + printable_module_name="layer", + ) + + +@keras_export("keras.models.model_from_yaml") +def model_from_yaml(yaml_string, custom_objects=None): + """Parses a yaml model configuration file and returns a model instance. + + Note: Since TF 2.6, this method is no longer supported and will raise a + RuntimeError. + + Args: + yaml_string: YAML string or open file encoding a model configuration. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + + Returns: + A Keras model instance (uncompiled). + + Raises: + RuntimeError: announces that the method poses a security risk + """ + raise RuntimeError( + "Method `model_from_yaml()` has been removed due to security risk of " + "arbitrary code execution. Please use `Model.to_json()` and " + "`model_from_json()` instead." + ) + + +@keras_export("keras.models.model_from_json") +def model_from_json(json_string, custom_objects=None): + """Parses a JSON model configuration string and returns a model instance. + + Usage: + + >>> model = tf.keras.Sequential([ + ... tf.keras.layers.Dense(5, input_shape=(3,)), + ... tf.keras.layers.Softmax()]) + >>> config = model.to_json() + >>> loaded_model = tf.keras.models.model_from_json(config) + + Args: + json_string: JSON string encoding a model configuration. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + + Returns: + A Keras model instance (uncompiled). + """ + from keras.layers import ( + deserialize_from_json, + ) + + return deserialize_from_json(json_string, custom_objects=custom_objects) diff --git a/keras/saving/legacy/save.py b/keras/saving/legacy/save.py new file mode 100644 index 000000000000..4c6a3825308f --- /dev/null +++ b/keras/saving/legacy/save.py @@ -0,0 +1,547 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keras model saving code.""" + +import os + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.saving import object_registration +from keras.saving.legacy import hdf5_format +from keras.saving.legacy import saving_utils +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import load as saved_model_load +from keras.saving.legacy.saved_model import load_context +from keras.saving.legacy.saved_model import save as saved_model_save +from keras.saving.legacy.saved_model.utils import keras_option_scope +from keras.utils import io_utils +from keras.utils import traceback_utils + +try: + import h5py +except ImportError: + h5py = None + + +@traceback_utils.filter_traceback +def save_model( + model, + filepath, + overwrite=True, + include_optimizer=True, + save_format=None, + signatures=None, + options=None, + save_traces=True, +): + """Saves a model as a TensorFlow SavedModel or HDF5 file. + + See the [Serialization and Saving + guide](https://keras.io/guides/serialization_and_saving/) for details. + + Usage: + + >>> model = tf.keras.Sequential([ + ... tf.keras.layers.Dense(5, input_shape=(3,)), + ... tf.keras.layers.Softmax()]) + >>> model.save('/tmp/model') + >>> loaded_model = tf.keras.models.load_model('/tmp/model') + >>> x = tf.random.uniform((10, 3)) + >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) + + Note that `model.save()` is an alias for `tf.keras.models.save_model()`. + + The SavedModel and HDF5 file contains: + + - the model's configuration (topology) + - the model's weights + - the model's optimizer's state (if any) + + Thus models can be reinstantiated in the exact same state, without any of + the code used for model definition or training. + + Note that the model weights may have different scoped names after being + loaded. Scoped names include the model/layer names, such as + `"dense_1/kernel:0"`. It is recommended that you use the layer properties to + access specific variables, e.g. `model.get_layer("dense_1").kernel`. + + __SavedModel serialization format__ + + Keras SavedModel uses `tf.saved_model.save` to save the model and all + trackable objects attached to the model (e.g. layers and variables). The + model config, weights, and optimizer are saved in the SavedModel. + Additionally, for every Keras layer attached to the model, the SavedModel + stores: + + * the config and metadata -- e.g. name, dtype, trainable status + * traced call and loss functions, which are stored as TensorFlow + subgraphs. + + The traced functions allow the SavedModel format to save and load custom + layers without the original class definition. + + You can choose to not save the traced functions by disabling the + `save_traces` option. This will decrease the time it takes to save the model + and the amount of disk space occupied by the output SavedModel. If you + enable this option, then you _must_ provide all custom class definitions + when loading the model. See the `custom_objects` argument in + `tf.keras.models.load_model`. + + Args: + model: Keras model instance to be saved. + filepath: One of the following: + - String or `pathlib.Path` object, path where to save the model + - `h5py.File` object where to save the model + overwrite: Whether we should overwrite any existing model at the target + location, or instead ask the user with a manual prompt. + include_optimizer: If True, save optimizer's state together. + save_format: Either 'tf' or 'h5', indicating whether to save the model + to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' + in TF 1.X. + signatures: Signatures to save with the SavedModel. Applicable to the + 'tf' format only. Please see the `signatures` argument in + `tf.saved_model.save` for details. + options: (only applies to SavedModel format) + `tf.saved_model.SaveOptions` object that specifies options for saving + to SavedModel. + save_traces: (only applies to SavedModel format) When enabled, the + SavedModel will store the function traces for each layer. This + can be disabled, so that only the configs of each layer are stored. + Defaults to `True`. Disabling this will decrease serialization time + and reduce file size, but it requires that all custom layers/models + implement a `get_config()` method. + + Raises: + ImportError: If save format is hdf5, and h5py is not available. + """ + + from keras.engine import sequential + + default_format = "tf" if tf.__internal__.tf2.enabled() else "h5" + save_format = save_format or default_format + + filepath = io_utils.path_to_string(filepath) + + # If the user has not already called fit or built the underlying metrics, we + # should do that before saving to ensure the metric names have all + # appropriate name transformations applied. + saving_utils.try_build_compiled_arguments(model) + + if ( + save_format == "h5" + or (h5py is not None and isinstance(filepath, h5py.File)) + or saving_utils.is_hdf5_filepath(filepath) + ): + # TODO(b/130258301): add utility method for detecting model type. + if not model._is_graph_network and not isinstance( + model, sequential.Sequential + ): + raise NotImplementedError( + "Saving the model to HDF5 format requires the model to be a " + "Functional model or a Sequential model. It does not work for " + "subclassed models, because such models are defined via the " + "body of a Python method, which isn't safely serializable. " + "Consider saving to the Tensorflow SavedModel format (by " + 'setting save_format="tf") or using `save_weights`.' + ) + hdf5_format.save_model_to_hdf5( + model, filepath, overwrite, include_optimizer + ) + else: + with serialization.SharedObjectSavingScope(): + with keras_option_scope( + save_traces=save_traces, in_tf_saved_model_scope=True + ): + saved_model_save.save( + model, + filepath, + overwrite, + include_optimizer, + signatures, + options, + save_traces, + ) + + +@traceback_utils.filter_traceback +def load_model(filepath, custom_objects=None, compile=True, options=None): + """Loads a model saved via `model.save()`. + + Usage: + + >>> model = tf.keras.Sequential([ + ... tf.keras.layers.Dense(5, input_shape=(3,)), + ... tf.keras.layers.Softmax()]) + >>> model.save('/tmp/model') + >>> loaded_model = tf.keras.models.load_model('/tmp/model') + >>> x = tf.random.uniform((10, 3)) + >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) + + Note that the model weights may have different scoped names after being + loaded. Scoped names include the model/layer names, such as + `"dense_1/kernel:0"`. It is recommended that you use the layer properties to + access specific variables, e.g. `model.get_layer("dense_1").kernel`. + + Args: + filepath: One of the following: + - String or `pathlib.Path` object, path to the saved model + - `h5py.File` object from which to load the model + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + compile: Boolean, whether to compile the model + after loading. + options: Optional `tf.saved_model.LoadOptions` object that specifies + options for loading from SavedModel. + + Returns: + A Keras model instance. If the original model was compiled, and saved + with the optimizer, then the returned model will be compiled. Otherwise, + the model will be left uncompiled. In the case that an uncompiled model + is returned, a warning is displayed if the `compile` argument is set to + `True`. + + Raises: + ImportError: if loading from an hdf5 file and h5py is not available. + IOError: In case of an invalid savefile. + """ + with serialization.SharedObjectLoadingScope(): + custom_objects = custom_objects or {} + tlco = object_registration._THREAD_LOCAL_CUSTOM_OBJECTS.__dict__ + gco = object_registration._GLOBAL_CUSTOM_OBJECTS + custom_objects = {**custom_objects, **tlco, **gco} + with object_registration.CustomObjectScope(custom_objects): + with keras_option_scope( + save_traces=False, in_tf_saved_model_scope=True + ): + with load_context.load_context(options): + filepath_str = io_utils.path_to_string(filepath) + if isinstance(filepath_str, str): + if not tf.io.gfile.exists(filepath_str): + raise IOError( + f"No file or directory found at {filepath_str}" + ) + + if tf.io.gfile.isdir(filepath_str): + return saved_model_load.load( + filepath_str, compile, options + ) + else: + if h5py is None: + raise ImportError( + "Filepath looks like a hdf5 file but h5py" + "is not available." + f" filepath={filepath_str}" + ) + return hdf5_format.load_model_from_hdf5( + tf.io.gfile.GFile(filepath_str, mode="rb"), + custom_objects, + compile, + ) + elif h5py is not None and isinstance(filepath, h5py.File): + return hdf5_format.load_model_from_hdf5( + filepath, custom_objects, compile + ) + + raise IOError( + "Unable to load model. Filepath is not an hdf5 file (or h5py is not " + f"available) or SavedModel. Received: filepath={filepath}" + ) + + +def save_weights( + model, filepath, overwrite=True, save_format=None, options=None +): + """Saves all layer weights. + + Either saves in HDF5 or in TensorFlow format based on the `save_format` + argument. + + When saving in HDF5 format, the weight file has: + - `layer_names` (attribute), a list of strings + (ordered names of model layers). + - For every layer, a `group` named `layer.name` + - For every such layer group, a group attribute `weight_names`, + a list of strings + (ordered names of weights tensor of the layer). + - For every weight in the layer, a dataset + storing the weight value, named after the weight tensor. + + When saving in TensorFlow format, all objects referenced by the network + are saved in the same format as `tf.train.Checkpoint`, including any + `Layer` instances or `Optimizer` instances assigned to object + attributes. For networks constructed from inputs and outputs using + `tf.keras.Model(inputs, outputs)`, `Layer` instances used by the network + are tracked/saved automatically. For user-defined classes which inherit + from `tf.keras.Model`, `Layer` instances must be assigned to object + attributes, typically in the constructor. See the documentation of + `tf.train.Checkpoint` and `tf.keras.Model` for details. + + While the formats are the same, do not mix `save_weights` and + `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should + be loaded using `Model.load_weights`. Checkpoints saved using + `tf.train.Checkpoint.save` should be restored using the corresponding + `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over + `save_weights` for training checkpoints. + + The TensorFlow format matches objects and variables by starting at a + root object, `self` for `save_weights`, and greedily matching attribute + names. For `Model.save` this is the `Model`, and for `Checkpoint.save` + this is the `Checkpoint` even if the `Checkpoint` has a model attached. + This means saving a `tf.keras.Model` using `save_weights` and loading + into a `tf.train.Checkpoint` with a `Model` attached (or vice versa) + will not match the `Model`'s variables. See the + [guide to training checkpoints]( + https://www.tensorflow.org/guide/checkpoint) for details on + the TensorFlow format. + + Args: + filepath: String or PathLike, path to the file to save the weights + to. When saving in TensorFlow format, this is the prefix used + for checkpoint files (multiple files are generated). Note that + the '.h5' suffix causes weights to be saved in HDF5 format. + overwrite: Whether to silently overwrite any existing file at the + target location, or provide the user with a manual prompt. + save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or + '.keras' will default to HDF5 if `save_format` is `None`. + Otherwise `None` defaults to 'tf'. + options: Optional `tf.train.CheckpointOptions` object that specifies + options for saving weights. + + Raises: + ImportError: If `h5py` is not available when attempting to save in + HDF5 format. + """ + model._assert_weights_created() + filepath = io_utils.path_to_string(filepath) + filepath_is_h5 = saving_utils.is_hdf5_filepath(filepath) + if save_format is None: + if filepath_is_h5: + save_format = "h5" + else: + save_format = "tf" + else: + user_format = save_format.lower().strip() + if user_format in ("tensorflow", "tf"): + save_format = "tf" + elif user_format in ("hdf5", "h5", "keras"): + save_format = "h5" + else: + raise ValueError( + f"Unknown format. Received: `save_format`={save_format}. " + 'Was expecting one of {"tf", "h5"}.' + ) + if save_format == "tf" and filepath_is_h5: + raise ValueError( + 'save_weights got save_format="tf"/"tensorflow", but the ' + f"filepath ({filepath}) looks like an HDF5 file. " + 'Omit the ".h5"/".keras" when saving in TensorFlow format.' + ) + + if save_format == "h5" and h5py is None: + raise ImportError( + "`save_weights` requires h5py when saving in hdf5, but h5py is " + "not available. Try installing h5py package." + ) + if save_format == "tf": + check_filepath = filepath + ".index" + else: + check_filepath = filepath + # If file exists and should not be overwritten: + if not overwrite and os.path.isfile(check_filepath): + proceed = io_utils.ask_to_proceed_with_overwrite(check_filepath) + if not proceed: + return + if save_format == "h5": + with h5py.File(filepath, "w") as f: + hdf5_format.save_weights_to_hdf5_group(f, model) + else: + if not tf.executing_eagerly(): + # Call `get_session` to initialize any uninitialized variables. + backend.get_session() + model._checkpoint.write(filepath, options=options) + + # Record this checkpoint so it's visible from + # tf.train.latest_checkpoint. + tf.__internal__.train.update_checkpoint_state( + save_dir=os.path.dirname(filepath), + model_checkpoint_path=filepath, + save_relative_paths=True, + all_model_checkpoint_paths=[filepath], + ) + + +def load_weights( + model, filepath, by_name=False, skip_mismatch=False, options=None +): + """Loads all layer weights, either from a SavedModel or H5 weights file. + + If `by_name` is False weights are loaded based on the network's + topology. This means the architecture should be the same as when the + weights were saved. Note that layers that don't have weights are not + taken into account in the topological ordering, so adding or removing + layers is fine as long as they don't have weights. + + If `by_name` is True, weights are loaded into layers only if they share + the same name. This is useful for fine-tuning or transfer-learning + models where some of the layers have changed. + + Only topological loading (`by_name=False`) is supported when loading + weights from the TensorFlow format. Note that topological loading + differs slightly between TensorFlow and HDF5 formats for user-defined + classes inheriting from `tf.keras.Model`: HDF5 loads based on a + flattened list of weights, while the TensorFlow format loads based on + the object-local names of attributes to which layers are assigned in the + `Model`'s constructor. + + Args: + filepath: String, path to the weights file to load. For weight files + in TensorFlow format, this is the file prefix (the same as was + passed to `save_weights`). This can also be a path to a + SavedModel saved from `model.save`. + by_name: Boolean, whether to load weights by name or by topological + order. Only topological loading is supported for weight files in + TensorFlow format. + skip_mismatch: Boolean, whether to skip loading of layers where + there is a mismatch in the number of weights, or a mismatch in + the shape of the weight (only valid when `by_name=True`). + options: Optional `tf.train.CheckpointOptions` object that specifies + options for loading weights. + + Returns: + When loading a weight file in TensorFlow format, returns the same + status object as `tf.train.Checkpoint.restore`. When graph building, + restore ops are run automatically as soon as the network is built + (on first call for user-defined classes inheriting from `Model`, + immediately if it is already built). + + When loading weights in HDF5 format, returns `None`. + + Raises: + ImportError: If `h5py` is not available and the weight file is in + HDF5 format. + ValueError: If `skip_mismatch` is set to `True` when `by_name` is + `False`. + """ + if backend.is_tpu_strategy(model._distribution_strategy): + if model._distribution_strategy.extended.steps_per_run > 1 and ( + not saving_utils.is_hdf5_filepath(filepath) + ): + spr = model._distribution_strategy.extended.steps_per_run + raise ValueError( + "Load weights is not implemented with TPUStrategy " + "with `steps_per_run` greater than 1. The " + f"`steps_per_run` is {spr}" + ) + if skip_mismatch and not by_name: + raise ValueError( + "When calling model.load_weights, skip_mismatch can only be " + "set to True when by_name is True." + ) + + filepath, save_format = _detect_save_format(filepath) + if save_format == "tf": + status = model._checkpoint.read(filepath, options) + if by_name: + raise NotImplementedError( + "Weights may only be loaded based on topology into Models " + "when loading TensorFlow-formatted weights " + "(got by_name=True to load_weights)." + ) + if not tf.executing_eagerly(): + session = backend.get_session() + # Restore existing variables (if any) immediately, and set up a + # streaming restore for any variables created in the future. + tf.__internal__.tracking.streaming_restore( + status=status, session=session + ) + status.assert_nontrivial_match() + else: + status = None + if h5py is None: + raise ImportError( + "`load_weights` requires h5py package when loading weights " + "from HDF5. Try installing h5py." + ) + if not model._is_graph_network and not model.built: + raise ValueError( + "Unable to load weights saved in HDF5 format into a " + "subclassed Model which has not created its variables yet. " + "Call the Model first, then load the weights." + ) + model._assert_weights_created() + with h5py.File(filepath, "r") as f: + if "layer_names" not in f.attrs and "model_weights" in f: + f = f["model_weights"] + if by_name: + hdf5_format.load_weights_from_hdf5_group_by_name( + f, model, skip_mismatch + ) + else: + hdf5_format.load_weights_from_hdf5_group(f, model) + + # Perform any layer defined finalization of the layer state. + for layer in model.layers: + layer.finalize_state() + return status + + +def _detect_save_format(filepath): + """Returns path to weights file and save format.""" + + filepath = io_utils.path_to_string(filepath) + if saving_utils.is_hdf5_filepath(filepath): + return filepath, "h5" + + # Filepath could be a TensorFlow checkpoint file prefix or SavedModel + # directory. It's possible for filepath to be both a prefix and directory. + # Prioritize checkpoint over SavedModel. + if _is_readable_tf_checkpoint(filepath): + save_format = "tf" + elif tf.saved_model.contains_saved_model(filepath): + ckpt_path = os.path.join( + filepath, + tf.saved_model.VARIABLES_DIRECTORY, + tf.saved_model.VARIABLES_FILENAME, + ) + if _is_readable_tf_checkpoint(ckpt_path): + filepath = ckpt_path + save_format = "tf" + else: + raise ValueError( + "Unable to load weights. filepath {} appears to be a " + "SavedModel directory, but checkpoint either doesn't " + "exist, or is incorrectly formatted.".format(filepath) + ) + else: + # Not a TensorFlow checkpoint. This filepath is likely an H5 file that + # doesn't have the hdf5/keras extensions. + save_format = "h5" + return filepath, save_format + + +def _is_readable_tf_checkpoint(filepath): + try: + tf.compat.v1.train.NewCheckpointReader(filepath) + return True + except tf.errors.DataLossError: + # The checkpoint is not readable in TensorFlow format. + return False + + +# Inject the load_model function to keras_deps to remove the dependency +# from TFLite to Keras. +tf.__internal__.register_load_model_function(load_model) diff --git a/keras/saving/legacy/save_test.py b/keras/saving/legacy/save_test.py new file mode 100644 index 000000000000..b9ec7d5d749f --- /dev/null +++ b/keras/saving/legacy/save_test.py @@ -0,0 +1,1555 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras model saving code.""" + +import collections +import os +import pathlib +import shutil +import tempfile + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras import losses +from keras import optimizers +from keras.engine import functional +from keras.engine import sequential +from keras.feature_column import dense_features +from keras.feature_column import sequence_feature_column as ksfc +from keras.layers import core +from keras.optimizers import optimizer_v1 +from keras.premade_models.linear import LinearModel +from keras.saving import object_registration +from keras.saving.legacy import model_config +from keras.saving.legacy import save +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import utils as saved_model_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +try: + import h5py +except ImportError: + h5py = None + + +class TestSaveModel(tf.test.TestCase, parameterized.TestCase): + def setUp(self): + super().setUp() + self.model = test_utils.get_small_sequential_mlp(1, 2, 3) + self.subclassed_model = test_utils.get_small_subclass_mlp(1, 2) + + def assert_h5_format(self, path): + if h5py is not None: + self.assertTrue( + h5py.is_hdf5(path), + f"Model saved at path {path} is not a valid hdf5 file.", + ) + + def assert_saved_model(self, path): + tf.__internal__.saved_model.parse_saved_model(path) + + @test_utils.run_v2_only + def test_load_file_not_found(self): + path = pathlib.Path(self.get_temp_dir()) / "does_not_exist" + with self.assertRaisesRegex(IOError, "No file or directory found at"): + save.load_model(path) + + @test_utils.run_v2_only + def test_save_format_defaults(self): + path = os.path.join(self.get_temp_dir(), "model_path") + save.save_model(self.model, path) + self.assert_saved_model(path) + + @test_utils.run_v2_only + def test_save_format_defaults_pathlib(self): + path = pathlib.Path(self.get_temp_dir()) / "model_path" + save.save_model(self.model, path) + self.assert_saved_model(path) + + @test_utils.run_v2_only + def test_save_hdf5(self): + path = os.path.join(self.get_temp_dir(), "model") + save.save_model(self.model, path, save_format="h5") + self.assert_h5_format(path) + with self.assertRaisesRegex( + NotImplementedError, + "requires the model to be a Functional model " + "or a Sequential model.", + ): + save.save_model(self.subclassed_model, path, save_format="h5") + + @test_utils.run_v2_only + def test_save_load_hdf5_pathlib(self): + path = pathlib.Path(self.get_temp_dir()) / "model" + save.save_model(self.model, path, save_format="h5") + save.load_model(path) + + @test_utils.run_v2_only + def test_save_tf(self): + path = os.path.join(self.get_temp_dir(), "model") + save.save_model(self.model, path, save_format="tf") + self.assert_saved_model(path) + with self.assertRaisesRegex( + ValueError, + r"Model.*cannot be saved.*as opposed to `model.call\(\).*", + ): + save.save_model(self.subclassed_model, path, save_format="tf") + self.subclassed_model.predict(np.random.random((3, 5))) + save.save_model(self.subclassed_model, path, save_format="tf") + self.assert_saved_model(path) + + @test_utils.run_v2_only + def test_save_load_tf_string(self): + path = os.path.join(self.get_temp_dir(), "model") + save.save_model(self.model, path, save_format="tf") + save.load_model(path) + + @test_utils.run_v2_only + def test_save_load_tf_pathlib(self): + path = pathlib.Path(self.get_temp_dir()) / "model" + save.save_model(self.model, path, save_format="tf") + save.load_model(path) + + @test_utils.run_v2_only + def test_save_load_weights_tf_pathlib(self): + path = pathlib.Path(self.get_temp_dir()) / "model" + self.model.save_weights(path, save_format="tf") + self.model.load_weights(path) + + @test_utils.run_v2_only + def test_save_load_weights_hdf5_pathlib(self): + path = pathlib.Path(self.get_temp_dir()) / "model" + self.model.save_weights(path, save_format="h5") + self.model.load_weights(path) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_saving_h5_for_rnn_layers(self): + # See https://github.com/tensorflow/tensorflow/issues/35731 for details. + inputs = keras.Input([10, 91], name="train_input") + rnn_layers = [ + keras.layers.LSTMCell( + size, recurrent_dropout=0, name="rnn_cell%d" % i + ) + for i, size in enumerate([512, 512]) + ] + rnn_output = keras.layers.RNN( + rnn_layers, return_sequences=True, name="rnn_layer" + )(inputs) + pred_feat = keras.layers.Dense(91, name="prediction_features")( + rnn_output + ) + pred = keras.layers.Softmax()(pred_feat) + model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat]) + path = os.path.join(self.get_temp_dir(), "model_path.h5") + model.save(path) + + # Make sure the variable name is unique. + self.assertNotEqual( + rnn_layers[0].kernel.name, rnn_layers[1].kernel.name + ) + self.assertIn("rnn_cell1", rnn_layers[1].kernel.name) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_saving_optimizer_weights(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.layer = keras.layers.Dense(1) + + def call(self, x): + return self.layer(x) + + path = os.path.join(self.get_temp_dir(), "weights_path") + x, y = np.ones((10, 10)), np.ones((10, 1)) + + model = MyModel() + model.compile("rmsprop", loss="bce") + model.train_on_batch(x, y) + model.reset_metrics() + model.save_weights(path, save_format="tf") + + batch_loss = model.train_on_batch(x, y) + + new_model = MyModel() + new_model.compile("rmsprop", loss="bce") + new_model.train_on_batch(x, y) + new_model.reset_metrics() + + new_model.load_weights(path) + new_batch_loss = new_model.train_on_batch(x, y) + + self.assertAllClose(batch_loss, new_batch_loss) + + @test_combinations.generate( + test_combinations.combine(mode=["eager", "graph"]) + ) + def test_save_include_optimizer_false(self): + def get_variables(file_name): + reader = tf.train.load_checkpoint( + os.path.join(file_name, "variables/variables") + ) + shape_from_key = reader.get_variable_to_shape_map() + return sorted(shape_from_key.keys()) + + path = os.path.join(self.get_temp_dir(), "no_optimizer") + x, y = np.ones((10, 10)), np.ones((10, 1)) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(1)) + model.compile("adam", loss="mse") + model.train_on_batch(x, y) + model.save(path, save_format="tf", include_optimizer=False) + variables = get_variables(path) + + for v in variables: + self.assertNotIn("optimizer", v) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_saving_model_with_custom_object(self): + with object_registration.custom_object_scope(), self.cached_session(): + + @object_registration.register_keras_serializable() + class CustomLoss(losses.MeanSquaredError): + pass + + model = sequential.Sequential( + [core.Dense(units=1, input_shape=(1,))] + ) + model.compile(optimizer="sgd", loss=CustomLoss()) + model.fit(np.zeros([10, 1]), np.zeros([10, 1])) + + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "saving") + model.save(filepath) + + # Make sure the model can be correctly load back. + _ = save.load_model(filepath, compile=True) + + def test_saving_model_with_name_conflict(self): + class Sequential(keras.Model): + def __init__(self): + super().__init__() + self.layer = keras.layers.Dense(1) + + def call(self, x): + return self.layer(x) + + model = Sequential() + model(tf.ones((10, 10))) + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "Sequential") + + with self.assertLogs() as logs: + model.save(filepath, save_format="tf") + + expected_substring = ( + "has the same name 'Sequential' as a built-in Keras" + ) + matched = [log for log in logs.output if expected_substring in log] + self.assertNotEmpty(matched) + + def test_saving_built_in_model(self): + model = LinearModel() + model(tf.constant([[5.0]])) + temp_dir = self.get_temp_dir() + filepath = os.path.join(temp_dir, "LinearModel") + with self.assertLogs() as logs: + model.save(filepath, save_format="tf") + + expected_substring = ( + "has the same name 'LinearModel' as a built-in Keras" + ) + matched = [log for log in logs.output if expected_substring in log] + # Check that a warning is *not* logged for a premade model. + self.assertEmpty(matched) + + +@object_registration.register_keras_serializable(package="Foo") +class RegisteredSubLayer(keras.layers.Layer): + pass + + +class TestJson(test_combinations.TestCase): + """Tests to_json()/from_json().""" + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_saving_with_dense_features(self): + cols = [ + tf.feature_column.numeric_column("a"), + tf.feature_column.indicator_column( + tf.feature_column.categorical_column_with_vocabulary_list( + "b", ["one", "two"] + ) + ), + ] + input_layers = { + "a": keras.layers.Input(shape=(1,), name="a"), + "b": keras.layers.Input(shape=(1,), name="b", dtype="string"), + } + + fc_layer = dense_features.DenseFeatures(cols)(input_layers) + output = keras.layers.Dense(10)(fc_layer) + + model = keras.models.Model(input_layers, output) + + model.compile( + loss=keras.losses.MSE, + optimizer="rmsprop", + metrics=[keras.metrics.categorical_accuracy], + ) + + config = model.to_json() + loaded_model = model_config.model_from_json(config) + + inputs_a = np.arange(10).reshape(10, 1) + inputs_b = np.arange(10).reshape(10, 1).astype("str") + + with self.cached_session(): + # Initialize tables for V1 lookup. + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertLen( + loaded_model.predict({"a": inputs_a, "b": inputs_b}), 10 + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_saving_with_sequence_features(self): + cols = [ + tf.feature_column.sequence_numeric_column("a"), + tf.feature_column.indicator_column( + tf.feature_column.sequence_categorical_column_with_vocabulary_list( # noqa: E501 + "b", ["one", "two"] + ) + ), + ] + input_layers = { + "a": keras.layers.Input(shape=(None, 1), sparse=True, name="a"), + "b": keras.layers.Input( + shape=(None, 1), sparse=True, name="b", dtype="string" + ), + } + + fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) + # TODO(tibell): Figure out the right dtype and apply masking. + # sequence_length_mask = array_ops.sequence_mask(sequence_length) + # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) + x = keras.layers.GRU(32)(fc_layer) + output = keras.layers.Dense(10)(x) + + model = keras.models.Model(input_layers, output) + + model.compile( + loss=keras.losses.MSE, + optimizer="rmsprop", + metrics=[keras.metrics.categorical_accuracy], + ) + + config = model.to_json() + loaded_model = model_config.model_from_json(config) + + batch_size = 10 + timesteps = 1 + + values_a = np.arange(10, dtype=np.float32) + indices_a = np.zeros((10, 3), dtype=np.int64) + indices_a[:, 0] = np.arange(10) + inputs_a = tf.SparseTensor( + indices_a, values_a, (batch_size, timesteps, 1) + ) + + values_b = np.zeros(10, dtype=str) + indices_b = np.zeros((10, 3), dtype=np.int64) + indices_b[:, 0] = np.arange(10) + inputs_b = tf.SparseTensor( + indices_b, values_b, (batch_size, timesteps, 1) + ) + + with self.cached_session(): + # Initialize tables for V1 lookup. + if not tf.executing_eagerly(): + self.evaluate(tf.compat.v1.tables_initializer()) + + self.assertLen( + loaded_model.predict({"a": inputs_a, "b": inputs_b}, steps=1), + batch_size, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_nested_layers(self): + class MyLayer(keras.layers.Layer): + def __init__(self, sublayers, **kwargs): + super().__init__(**kwargs) + self.sublayers = sublayers + + def get_config(self): + config = super().get_config() + config["sublayers"] = self.sublayers + return config + + layer = MyLayer( + [ + keras.layers.Dense(2, name="MyDense"), + RegisteredSubLayer(name="MySubLayer"), + ] + ) + model = keras.Sequential([keras.Input([None]), layer]) + model_json = model.to_json() + + self.assertIn("Foo>RegisteredSubLayer", model_json) + + loaded_model = model_config.model_from_json( + model_json, custom_objects={"MyLayer": MyLayer} + ) + loaded_layer = loaded_model.layers[0] + self.assertIsInstance(loaded_layer.sublayers[0], keras.layers.Dense) + self.assertEqual(loaded_layer.sublayers[0].name, "MyDense") + self.assertIsInstance(loaded_layer.sublayers[1], RegisteredSubLayer) + self.assertEqual(loaded_layer.sublayers[1].name, "MySubLayer") + + +class MaskedTensor(tf.experimental.ExtensionType): + __name__ = "MaskedTensor_save_test" + values: tf.Tensor + mask: tf.Tensor + + class Spec(tf.TypeSpec): + @property + def shape(self): + return self.values.shape + + @property + def dtype(self): + return self.values.dtype + + def with_shape(self, shape): + values_spec = tf.TensorSpec( + shape, dtype=self.values.dtype, name=self.values.name + ) + mask_spec = tf.TensorSpec( + shape, dtype=self.mask.dtype, name=self.mask.name + ) + return MaskedTensor.Spec(values_spec, mask_spec) + + +@test_combinations.run_with_all_saved_model_formats +class TestWholeModelSaving(test_combinations.TestCase): + def _save_model_dir(self, dirname="saved_model"): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + return os.path.join(temp_dir, dirname) + + def _assert_same_weights_and_metrics(self, model, loaded_model): + """Checks that loaded weights & metrics are the same as the original. + + Args: + model: original model + loaded_model: loaded model + """ + self.assertAllClose(model.weights, loaded_model.weights) + + if loaded_model.optimizer: + if test_utils.get_save_format() == "tf": + # TODO(b/153110928): Keras TF format doesn't restore optimizer + # weights currently. + return + if isinstance( + loaded_model.optimizer, + keras.optimizers.optimizer.Optimizer, + ): + loaded_model.optimizer.build(loaded_model.trainable_variables) + self.assertAllClose( + model.optimizer.variables, + loaded_model.optimizer.variables, + ) + else: + self.assertAllClose( + model.optimizer.weights, loaded_model.optimizer.weights + ) + + # In V1/Graph mode, the model isn't built, so the metrics are not loaded + # immediately (requires model to be called on some data before building + # metrics). + check_metrics = tf.__internal__.tf2.enabled() and tf.executing_eagerly() + + if check_metrics: + self.assertAllEqual( + [m.name for m in model.metrics], + [m.name for m in loaded_model.metrics], + ) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_save_and_load(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + save_kwargs = test_utils.get_save_kwargs() + + if ( + save_format == "h5" or not save_kwargs.get("save_traces", True) + ) and test_utils.get_model_type() == "subclass": + # HDF5 format currently does not allow saving subclassed models. + # When saving with `save_traces=False`, the subclassed model must + # have a get_config/from_config, which the autogenerated model does + # not have. + return + + with self.cached_session(): + model = test_utils.get_model_from_layers( + [ + keras.layers.Dense(2), + keras.layers.RepeatVector(3), + keras.layers.TimeDistributed(keras.layers.Dense(3)), + ], + input_shape=(3,), + ) + model.compile( + loss=keras.losses.MSE, + optimizer=keras.optimizers.legacy.rmsprop.RMSprop(lr=0.0001), + metrics=[ + keras.metrics.categorical_accuracy, + keras.metrics.CategoricalCrossentropy( + name="cce", label_smoothing=tf.constant(0.2) + ), + ], + weighted_metrics=[ + keras.metrics.categorical_crossentropy, + keras.metrics.CategoricalCrossentropy( + name="cce", label_smoothing=tf.constant(0.2) + ), + ], + sample_weight_mode="temporal", + ) + + x = np.random.random((1, 3)) + y = np.random.random((1, 3, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + keras.models.save_model( + model, saved_model_dir, save_format=save_format, **save_kwargs + ) + + loaded_model = keras.models.load_model(saved_model_dir) + self._assert_same_weights_and_metrics(model, loaded_model) + + out2 = loaded_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + eval_out = model.evaluate(x, y) + eval_out2 = loaded_model.evaluate(x, y) + self.assertArrayNear(eval_out, eval_out2, 0.001) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sequential_model_saving_without_input_shape(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2)) + model.add(keras.layers.RepeatVector(3)) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + model.compile( + loss=keras.losses.MSE, + optimizer="rmsprop", + metrics=[ + keras.metrics.categorical_accuracy, + keras.metrics.CategoricalAccuracy(name="cat_acc"), + ], + weighted_metrics=[ + keras.metrics.categorical_accuracy, + keras.metrics.CategoricalAccuracy(name="cat_acc2"), + ], + sample_weight_mode="temporal", + ) + x = np.random.random((1, 3)) + y = np.random.random((1, 3, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + model.save(saved_model_dir, save_format=save_format) + + new_model = keras.models.load_model(saved_model_dir) + + self._assert_same_weights_and_metrics(model, new_model) + + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sequential_model_saving_without_compile(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.RepeatVector(3)) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + + x = np.random.random((1, 3)) + out = model.predict(x) + + # Save the model without any compilation or training. + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + + new_model = keras.models.load_model(saved_model_dir) + self._assert_same_weights_and_metrics(model, new_model) + + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_sequential_model_saving_2(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + with tf.Graph().as_default(), self.cached_session(): + # test with custom optimizer, loss + + class CustomOp(optimizer_v1.RMSprop): + pass + + def custom_loss(y_true, y_pred): + return keras.losses.mse(y_true, y_pred) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile( + loss=custom_loss, optimizer=CustomOp(), metrics=["acc"] + ) + + x = np.random.random((1, 3)) + y = np.random.random((1, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + + new_model = keras.models.load_model( + saved_model_dir, + custom_objects={ + "CustomOp": CustomOp, + "custom_loss": custom_loss, + }, + ) + self._assert_same_weights_and_metrics(model, new_model) + + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_saving_without_compilation(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss="mse", optimizer="sgd", metrics=["acc"]) + + keras.models.save_model(model, saved_model_dir, save_format=save_format) + model = keras.models.load_model(saved_model_dir) + + def test_saving_with_tf_optimizer(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile( + loss="mse", + optimizer=tf.compat.v1.train.AdadeltaOptimizer(0.1), + metrics=["acc"], + ) + + keras.models.save_model(model, saved_model_dir, save_format=save_format) + model = keras.models.load_model(saved_model_dir) + + def test_saving_right_after_compilation(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss="mse", optimizer="sgd", metrics=["acc"]) + if not tf.compat.v1.executing_eagerly_outside_functions(): + model._make_train_function() + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + model = keras.models.load_model(saved_model_dir) + + def test_saving_lambda_numpy_array_arguments(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + if h5py is None: + self.skipTest("h5py required to run this test") + + mean = np.random.random((4, 2, 3)) + std = np.abs(np.random.random((4, 2, 3))) + 1e-5 + inputs = keras.layers.Input(shape=(4, 2, 3)) + output = keras.layers.Lambda( + lambda image, mu, std: (image - mu) / std, + arguments={"mu": mean, "std": std}, + )(inputs) + model = keras.models.Model(inputs, output) + model.compile(loss="mse", optimizer="sgd", metrics=["acc"]) + + keras.models.save_model(model, saved_model_dir, save_format=save_format) + + model = keras.models.load_model(saved_model_dir) + + self.assertAllClose(mean, model.layers[1].arguments["mu"]) + self.assertAllClose(std, model.layers[1].arguments["std"]) + + def test_saving_model_with_long_layer_names(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + with self.cached_session(): + # This layer name will make the `layers_name` HDF5 attribute blow + # out of proportion. Note that it fits into the internal HDF5 + # attribute memory limit on its own but because h5py converts + # the list of layer names into numpy array, which uses the same + # amount of memory for every item, it increases the memory + # requirements substantially. + x = keras.Input(shape=(2,), name="input_" + ("x" * (2**15))) + f = x + for i in range(4): + f = keras.layers.Dense(2, name="dense_%d" % (i,))(f) + model = keras.Model(inputs=[x], outputs=[f]) + model.compile( + "adam", loss=keras.losses.MeanSquaredError(), metrics=["acc"] + ) + + x = np.random.random((1, 2)) + y = np.random.random((1, 2)) + model.train_on_batch(x, y) + out = model.predict(x) + + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + model = keras.models.load_model(saved_model_dir) + + if save_format in ["tf", "tensorflow"]: + return + # Check that the HDF5 files contains chunked array + # of layer names. + with h5py.File(saved_model_dir, "r") as h5file: + num_names_arrays = len( + [ + attr + for attr in h5file["model_weights"].attrs + if attr.startswith("layer_names") + ] + ) + # The chunking of layer names array should have happened. + self.assertGreater(num_names_arrays, 0) + out2 = model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_saving_model_with_long_weights_names(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + with self.cached_session(): + x = keras.Input(shape=(2,), name="nested_model_input") + f = x + for i in range(4): + f = keras.layers.Dense(2, name="nested_model_dense_%d" % (i,))( + f + ) + # This layer name will make the `weights_name` + # HDF5 attribute blow out of proportion. + f = keras.layers.Dense( + 2, name="nested_model_output" + ("x" * (2**14)) + )(f) + nested_model = keras.Model( + inputs=[x], outputs=[f], name="nested_model" + ) + + x = keras.Input(shape=(2,), name="outer_model_input") + f = nested_model(x) + f = keras.layers.Dense(2, name="outer_model_output")(f) + + model = keras.Model(inputs=[x], outputs=[f]) + model.compile(loss="mse", optimizer="adam", metrics=["acc"]) + + x = np.random.random((1, 2)) + y = np.random.random((1, 2)) + model.train_on_batch(x, y) + out = model.predict(x) + + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + model = keras.models.load_model(saved_model_dir) + + if save_format in ["h5", "hdf5", "keras"]: + # Check that the HDF5 files contains chunked array + # of weight names. + with h5py.File(saved_model_dir, "r") as h5file: + num_weight_arrays = len( + [ + attr + for attr in h5file["model_weights"][ + "nested_model" + ].attrs + if attr.startswith("weight_names") + ] + ) + # The chunking of layer names array should have happened. + self.assertGreater(num_weight_arrays, 0) + out2 = model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_model_saving_to_pre_created_h5py_file(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + with tf.Graph().as_default(), self.cached_session(): + inputs = keras.Input(shape=(3,)) + x = keras.layers.Dense(2)(inputs) + outputs = keras.layers.Dense(3)(x) + + model = keras.Model(inputs, outputs) + model.compile( + loss=keras.losses.MSE, + optimizer=optimizer_v1.Adam(), + metrics=[ + keras.metrics.categorical_accuracy, + keras.metrics.CategoricalAccuracy(), + ], + ) + x = np.random.random((1, 3)) + y = np.random.random((1, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + loaded_model = keras.models.load_model(saved_model_dir) + out1 = loaded_model.predict(x) + self.assertAllClose(out, out1, atol=1e-05) + if save_format in ["tf", "tensorflow"]: + return + + # Test h5 format specifically + fd, fname = tempfile.mkstemp(".h5") + with h5py.File(fname, mode="r+") as h5file: + keras.models.save_model(model, h5file) + loaded_model = keras.models.load_model(h5file) + out2 = loaded_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + # Test non-default options in h5 + with h5py.File( + "_", driver="core", mode="w", backing_store=False + ) as h5file: + keras.models.save_model(model, h5file) + loaded_model = keras.models.load_model(h5file) + out2 = loaded_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + # Cleanup + os.close(fd) + os.remove(fname) + + def test_model_saving_to_new_dir_path(self): + saved_model_dir = os.path.join( + self._save_model_dir(), "newdir", "saved_model" + ) + save_format = test_utils.get_save_format() + + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.RepeatVector(3)) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + + x = np.random.random((1, 3)) + out = model.predict(x) + + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + + new_model = keras.models.load_model(saved_model_dir) + self._assert_same_weights_and_metrics(model, new_model) + + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_model_raise_exception_with_failed_saving(self): + if h5py is None: + self.skipTest("h5py required to run this test") + + saved_model_dir = self._save_model_dir() + saved_model_path = os.path.join(saved_model_dir, "saved_model.h5") + + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.RepeatVector(3)) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + + with self.assertRaisesRegex(OSError, "Unable to create file"): + with h5py.File(saved_model_path, "w"): + keras.models.save_model(model, saved_model_path) + + def test_saving_constant_initializer_with_numpy(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 2, + input_shape=(3,), + kernel_initializer=keras.initializers.Constant(np.ones((3, 2))), + ) + ) + model.add(keras.layers.Dense(3)) + model.compile(loss="mse", optimizer="sgd", metrics=["acc"]) + keras.models.save_model(model, saved_model_dir, save_format=save_format) + model = keras.models.load_model(saved_model_dir) + + def test_saving_group_naming_h5py(self): + # Test saving model with layer which name is prefix to a previous layer + # name. + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + h5_path = os.path.join(temp_dir, "test.h5") + + input_layer = keras.layers.Input((None, None, 3), name="test_input") + x = keras.layers.Conv2D(1, 1, name="conv1/conv")(input_layer) + x = keras.layers.Activation("relu", name="conv1")(x) + model = keras.models.Model(inputs=input_layer, outputs=x) + + model.save_weights(h5_path) + model.load_weights(h5_path) + + def test_primitive_attrs_contain_no_extraneous_strings(self): + if h5py is None: + self.skipTest("h5py required to run this test") + + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_shape=[2])) + model.save(saved_model_dir, save_format=save_format) + if save_format in ["tf", "tensorflow"]: + return + + h5file = h5py.File(saved_model_dir, "r") + self.assertRegex( + h5file.attrs["keras_version"], r"^[\d]+\.[\d]+\.[\S]+$" + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_functional_model_with_custom_loss_and_metric(self): + def _make_model(): + inputs = keras.Input(shape=(4,)) + x = keras.layers.Dense(8, activation="relu")(inputs) + outputs = keras.layers.Dense(3, activation="softmax")(x) + model = keras.Model(inputs=inputs, outputs=outputs) + custom_loss = keras.layers.Lambda( + lambda x: keras.backend.sum(x * x) + )(x) + model.add_loss(custom_loss) + model.add_metric( + custom_loss, aggregation="mean", name="custom_loss" + ) + return model + + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + with self.cached_session(): + model = _make_model() + model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=optimizers.gradient_descent_legacy.SGD(), + metrics=[keras.metrics.SparseCategoricalCrossentropy()], + ) + x = np.random.normal(size=(32, 4)) + y = np.random.randint(0, 3, size=32) + model.train_on_batch(x, y) + evaluation_results = model.evaluate(x, y) + # Save and reload model. + model.save(saved_model_dir, save_format=save_format) + del model # Prevent misuse. + loaded_model = keras.models.load_model(saved_model_dir) + loaded_model_eval_results = loaded_model.evaluate(x, y) + # Assert all evaluation results are the same. + self.assertAllClose( + evaluation_results, loaded_model_eval_results, 1e-9 + ) + # Check correctness of the loss calculation. + self.assertAllGreater(evaluation_results, 0.0) + evaluation_results = dict( + zip(loaded_model.metrics_names, evaluation_results) + ) + self.assertNear( + evaluation_results["sparse_categorical_crossentropy"] + + evaluation_results["custom_loss"], + evaluation_results["loss"], + 1e-6, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_save_uncompiled_model_with_optimizer(self): + with self.cached_session() as session: + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + model = keras.models.Sequential( + [keras.layers.Dense(1, input_shape=(3,))] + ) + # Set the model's optimizer but don't compile. This can happen if + # the model is trained with a custom training loop. + model.optimizer = keras.optimizers.legacy.rmsprop.RMSprop(lr=0.0001) + if not tf.executing_eagerly(): + session.run([v.initializer for v in model.variables]) + model.save(saved_model_dir, save_format=save_format) + + if save_format in ["tf", "tensorflow"]: + loaded = keras.models.load_model(saved_model_dir) + self.assertIsInstance( + loaded.optimizer, + keras.optimizers.legacy.optimizer_v2.OptimizerV2, + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_functional_model_with_getitem_op_layer(self): + inp = keras.Input(shape=(8)) + + out = inp[:] + model = keras.Model(inputs=[inp], outputs=out) + batch_size = 7 + x = tf.stack([tf.range(8) for _ in range(batch_size)]) + args = [x] + expected = x[:] + + self.assertAllEqual(model(args), expected) + self.assertAllEqual( + model.predict(args, batch_size=batch_size), expected + ) + + # Make sure it can be successfully saved and loaded. + save_format = test_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model(model, saved_model_dir, save_format=save_format) + + loaded_model = keras.models.load_model(saved_model_dir) + + self.assertAllEqual(loaded_model(args), expected) + self.assertAllEqual( + loaded_model.predict(args, batch_size=batch_size), expected + ) + + @test_combinations.generate( + test_combinations.combine(mode=["eager", "graph"]) + ) + def test_custom_functional_registered(self): + def _get_cls_definition(): + class CustomModel(keras.Model): + def c(self): + return "c" + + return CustomModel + + cls = _get_cls_definition() + self.assertEqual(cls.__bases__[0], keras.Model) + + with self.cached_session() as sess: + input_ = keras.layers.Input(shape=(1,)) + output = keras.layers.Dense(1)(input_) + model = cls(input_, output) + # `cls` now inherits from `Functional` class. + self.assertEqual(cls.__bases__[0], functional.Functional) + + if not tf.executing_eagerly(): + sess.run([v.initializer for v in model.variables]) + + save_format = test_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + + loaded_model = keras.models.load_model( + saved_model_dir, custom_objects={"CustomModel": cls} + ) + self.assertIsInstance(loaded_model, cls) + + # Check with "new" `CustomModel` class definition. + new_cls = _get_cls_definition() + # The new `CustomModel` class is *not* derived from `Functional`. + self.assertEqual(new_cls.__bases__[0], keras.Model) + reloaded_model = keras.models.load_model( + saved_model_dir, custom_objects={"CustomModel": new_cls} + ) + self.assertIsInstance(reloaded_model, new_cls) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_custom_sequential_registered_no_scope(self): + @object_registration.register_keras_serializable(package="my_package") + class MyDense(keras.layers.Dense): + def __init__(self, units, **kwargs): + super().__init__(units, **kwargs) + + input_shape = [1] + inputs = keras.Input(shape=input_shape) + custom_layer = MyDense(1) + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + + model = keras.Sequential(layers=[inputs, custom_layer]) + model.save(saved_model_dir, save_format=save_format) + loaded_model = keras.models.load_model(saved_model_dir) + + x = tf.constant([5]) + self.assertAllEqual(model(x), loaded_model(x)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_custom_functional_registered_no_scope(self): + @object_registration.register_keras_serializable(package="my_package") + class MyDense(keras.layers.Dense): + def __init__(self, units, **kwargs): + super().__init__(units, **kwargs) + + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + input_shape = [1] + inputs = keras.Input(shape=input_shape) + outputs = MyDense(1)(inputs) + model = keras.Model(inputs, outputs) + + model.save(saved_model_dir, save_format=save_format) + loaded_model = keras.models.load_model(saved_model_dir) + + x = tf.constant([5]) + self.assertAllEqual(model(x), loaded_model(x)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_shared_objects(self): + class OuterLayer(keras.layers.Layer): + def __init__(self, inner_layer): + super().__init__() + self.inner_layer = inner_layer + + def call(self, inputs): + return self.inner_layer(inputs) + + def get_config(self): + return { + "inner_layer": serialization.serialize_keras_object( + self.inner_layer + ) + } + + @classmethod + def from_config(cls, config): + return cls( + serialization.deserialize_keras_object( + config["inner_layer"] + ) + ) + + class InnerLayer(keras.layers.Layer): + def __init__(self): + super().__init__() + self.v = self.add_weight(name="v", shape=[], dtype=tf.float32) + + def call(self, inputs): + return self.v + inputs + + @classmethod + def from_config(cls, config): + return cls() + + # Create a model with 2 output layers that share the same inner layer. + inner_layer = InnerLayer() + outer_layer_1 = OuterLayer(inner_layer) + outer_layer_2 = OuterLayer(inner_layer) + input_ = keras.Input(shape=(1,)) + model = keras.Model( + inputs=input_, + outputs=[outer_layer_1(input_), outer_layer_2(input_)], + ) + + # Changes to the shared layer should affect both outputs. + model.layers[1].inner_layer.v.assign(5) + self.assertAllEqual(model(1), [6.0, 6.0]) + model.layers[1].inner_layer.v.assign(3) + self.assertAllEqual(model(1), [4.0, 4.0]) + + # After loading, changes to the shared layer should still affect both + # outputs. + def _do_assertions(loaded): + loaded.layers[1].inner_layer.v.assign(5) + self.assertAllEqual(loaded(1), [6.0, 6.0]) + loaded.layers[1].inner_layer.v.assign(3) + self.assertAllEqual(loaded(1), [4.0, 4.0]) + loaded.layers[2].inner_layer.v.assign(5) + self.assertAllEqual(loaded(1), [6.0, 6.0]) + loaded.layers[2].inner_layer.v.assign(3) + self.assertAllEqual(loaded(1), [4.0, 4.0]) + + # We'd like to make sure we only attach shared object IDs when strictly + # necessary, so we'll recursively traverse the generated config to count + # whether we have the exact number we expect. + def _get_all_keys_recursive(dict_or_iterable): + if isinstance(dict_or_iterable, dict): + for key in dict_or_iterable.keys(): + yield key + for key in _get_all_keys_recursive(dict_or_iterable.values()): + yield key + elif isinstance(dict_or_iterable, str): + return + else: + try: + for item in dict_or_iterable: + for key in _get_all_keys_recursive(item): + yield key + # Not an iterable or dictionary + except TypeError: + return + + with object_registration.CustomObjectScope( + {"OuterLayer": OuterLayer, "InnerLayer": InnerLayer} + ): + # Test saving and loading to disk + save_format = test_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model( + model, saved_model_dir, save_format=save_format + ) + loaded = keras.models.load_model(saved_model_dir) + _do_assertions(loaded) + + # Test recreating directly from config + config = model.get_config() + key_count = collections.Counter(_get_all_keys_recursive(config)) + self.assertEqual(key_count[serialization.SHARED_OBJECT_KEY], 2) + loaded = keras.Model.from_config(config) + _do_assertions(loaded) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def test_shared_objects_wrapper(self): + """Tests that shared layers wrapped with `Wrapper` restore correctly.""" + input_ = keras.Input(shape=(1,)) + unwrapped = keras.layers.Layer(name="unwrapped") + wrapped = keras.layers.Wrapper(unwrapped, name="wrapped") + model = keras.Model( + inputs=input_, outputs=[unwrapped(input_), wrapped(input_)] + ) + + # Test recreating directly from config + config = model.get_config() + loaded = keras.Model.from_config(config) + self.assertIs(loaded.layers[1], loaded.layers[2].layer) + + # Test saving and loading to disk + save_format = test_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model(model, saved_model_dir, save_format=save_format) + loaded = keras.models.load_model(saved_model_dir) + self.assertIs(loaded.layers[1], loaded.layers[2].layer) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"], fit=[True, False]) + ) + def test_multi_output_metrics_name_stay_same(self, fit): + """Tests that metric names don't change with each save/load cycle. + + e.g. "head_0_accuracy" should not become "head_0_head_0_accuracy" after + saving and loading a model. + + Arguments: + fit: Whether the model should be fit before saving. + """ + # This doesn't work at all, so we can't check whether metric names are + # correct. + if not tf.executing_eagerly() and not fit: + self.skipTest("b/181767784") + + input_ = keras.Input((4,)) + model = keras.Model( + input_, + [ + keras.layers.Softmax(name="head_0")( + keras.layers.Dense(3)(input_) + ), + keras.layers.Softmax(name="head_1")( + keras.layers.Dense(5)(input_) + ), + ], + ) + metric = keras.metrics.BinaryAccuracy() + model.compile( + optimizer="rmsprop", + loss="mse", + metrics={"head_0": [metric, "accuracy"]}, + ) + + x = np.random.rand(2, 4) + y = { + "head_0": np.random.randint(2, size=(2, 3)), + "head_1": np.random.randint(2, size=(2, 5)), + } + + # Make sure metrix prefixing works the same regardless of whether the + # user has fit the model before saving. + if fit: + model.fit(x, y, verbose=0) + + # Save and reload. + save_format = test_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model(model, saved_model_dir, save_format=save_format) + loaded = keras.models.load_model(saved_model_dir) + + # Make sure the metrics names from the model before saving match the + # loaded model. + self.assertSequenceEqual(model.metrics_names, loaded.metrics_names) + + # Test only in eager mode because ragged tensor inputs + # cannot be used in graph mode. + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + @test_utils.run_v2_only + def test_save_functional_with_ragged_constant_input(self): + input1 = keras.Input(shape=[]) + input2 = tf.ragged.constant([[1.0, 2.0], [3.0]]) + outputs = keras.layers.Add()([input1, input2]) + model = keras.Model(input1, outputs) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir) + keras.models.load_model(saved_model_dir) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + @test_utils.run_v2_only + def test_save_functional_with_constant_input(self): + input1 = keras.Input(shape=[2]) + input2 = tf.constant([[1.0, 2.0]]) + outputs = keras.layers.Add()([input1, input2]) + model = keras.Model(input1, outputs) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir) + keras.models.load_model(saved_model_dir) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + @test_utils.run_v2_only + def test_save_functional_with_constant_string_input(self): + input1 = keras.Input(shape=[2], dtype=tf.string) + input2 = tf.constant([["単", "に"]]) + outputs = keras.layers.Concatenate()([input1, input2]) + model = keras.Model(input1, outputs) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir) + loaded_model = keras.models.load_model(saved_model_dir) + x = tf.constant([["a", "b"]]) + self.assertAllEqual(model(x), loaded_model(x)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + @test_utils.run_v2_only + def test_save_functional_with_ragged_constant_string_input(self): + input1 = keras.Input(shape=[1], dtype=tf.string) + input2 = tf.ragged.constant([["単", "に"], ["単"]]) + outputs = keras.layers.Concatenate(axis=0)([input1, input2]) + model = keras.Model(input1, outputs) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir) + loaded_model = keras.models.load_model(saved_model_dir) + x = tf.constant([["a"]]) + self.assertAllEqual(model(x), loaded_model(x)) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + @test_utils.run_v2_only + def test_save_inputs_spec_with_composite_tensor_names(self): + class KerasModel(keras.Model): + def call(self, inputs): + return inputs + + spec = MaskedTensor.Spec( + tf.TensorSpec([None], name="x__values"), + tf.TensorSpec([None], dtype=tf.bool, name="x__mask"), + ) + km1 = KerasModel() + inputs = keras.Input(type_spec=spec) + km1(inputs) + self.assertEqual(km1.save_spec()[0][0].mask.name, "x__mask") + + +# Factory functions to create models that will be serialized inside a Network. +def _make_graph_network(input_size, output_size): + inputs = keras.Input(input_size) + x = keras.layers.Dense(8, activation="relu")(inputs) + y = keras.layers.Dense(output_size)(x) + return keras.Model(inputs=inputs, outputs=y) + + +def _make_sequential(input_size, output_size): + del input_size + return keras.Sequential( + [ + keras.layers.Dense(8, activation="relu"), + keras.layers.Dense(output_size), + ] + ) + + +def _make_sequential_built(input_size, output_size): + model = _make_sequential(input_size, output_size) + model.build((None, input_size)) + return model + + +def _make_sequential_graph_network(input_size, output_size): + return keras.Sequential( + [ + keras.layers.InputLayer(input_size), + keras.layers.Dense(8, activation="relu"), + keras.layers.Dense(output_size), + ] + ) + + +def _make_sequential_input_shape(input_size, output_size): + return keras.Sequential( + [ + keras.layers.Dense(8, activation="relu", input_shape=(input_size,)), + keras.layers.Dense(output_size), + ] + ) + + +class _make_subclassed(keras.Model): + def __init__(self, input_size, output_size): + super().__init__() + self._config = {"input_size": input_size, "output_size": output_size} + self._hidden_layer = keras.layers.Dense( + 8, activation="relu", name="hidden" + ) + self._logits_layer = keras.layers.Dense(output_size, name="logits") + + def call(self, inputs): + x = self._hidden_layer(inputs) + return self._logits_layer(x) + + def get_config(self): + return self._config + + @classmethod + def from_config(cls, config): + return cls(**config) + + +class _make_subclassed_built(_make_subclassed): + def __init__(self, input_size, output_size): + super().__init__(input_size, output_size) + self.build((None, input_size)) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TestWholeModelSavingWithNesting(tf.test.TestCase, parameterized.TestCase): + """Tests saving a whole model that contains other models.""" + + @parameterized.named_parameters( + [ + ("graph_network", _make_graph_network), + ("sequential", _make_sequential), + ("sequential_built", _make_sequential_built), + ("sequential_graph_network", _make_sequential_graph_network), + ("sequential_input_shape", _make_sequential_input_shape), + ("subclassed", _make_subclassed), + ("subclassed_built", _make_subclassed_built), + ] + ) + def test_functional(self, model_fn): + """Tests serializing a model that uses a nested model to share + weights.""" + if h5py is None: + self.skipTest("h5py required to run this test") + + def _make_model(): + inputs = ( + keras.Input(shape=(4,), name="examples"), + keras.Input(shape=(4,), name="neighbors"), + ) + base_model = model_fn(inputs[0].shape.as_list()[-1], 2) + outputs = keras.layers.add( + [base_model(inputs[0]), base_model(inputs[1])] + ) + return keras.Model(inputs=inputs, outputs=outputs) + + with self.cached_session(): + x = ( + np.random.normal(size=(16, 4)).astype(np.float32), + np.random.normal(size=(16, 4)).astype(np.float32), + ) + model = _make_model() + predictions = model(x) + # Save and reload. + model_path = os.path.join(self.get_temp_dir(), "model.h5") + model.save(model_path) + del model + loaded_model = keras.models.load_model( + model_path, + custom_objects={ + "_make_subclassed": _make_subclassed, + "_make_subclassed_built": _make_subclassed_built, + }, + compile=False, + ) + self.assertAllClose(loaded_model(x), predictions, 1e-9) + + +if __name__ == "__main__": + with saved_model_utils.keras_option_scope( + save_traces=False, in_tf_saved_model_scope=True + ): + tf.test.main() diff --git a/keras/saving/legacy/save_weights_test.py b/keras/saving/legacy/save_weights_test.py new file mode 100644 index 000000000000..fbfcea017116 --- /dev/null +++ b/keras/saving/legacy/save_weights_test.py @@ -0,0 +1,764 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ,============================================================================ +"""Tests for model saving in the HDF5 format.""" + +import os +import shutil +import uuid + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras.engine import training +from keras.optimizers import optimizer_v1 +from keras.saving.legacy import hdf5_format +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +try: + import h5py +except ImportError: + h5py = None + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class TestWeightSavingAndLoading(tf.test.TestCase, parameterized.TestCase): + def _save_model_dir(self, dirname="saved_model"): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + return os.path.join(temp_dir, dirname) + + @test_combinations.run_with_all_weight_formats + def test_weight_loading(self): + saved_model_dir = self._save_model_dir() + save_format = test_utils.get_save_format() + with self.cached_session(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3)(a) + b = keras.layers.Dense(1)(x) + model = keras.models.Model(a, b) + + x = np.random.random((3, 2)) + ref_y = model.predict(x) + weights = model.get_weights() + model.set_weights(weights) + y = model.predict(x) + self.assertAllClose(ref_y, y) + + with self.assertRaises(ValueError): + model.set_weights(weights[1:]) + with self.assertRaises(ValueError): + model.set_weights(weights[::-1]) + + model.save_weights(saved_model_dir, save_format=save_format) + model.load_weights(saved_model_dir) + y = model.predict(x) + self.assertAllClose(ref_y, y) + + def test_weight_preprocessing(self): + input_dim = 3 + output_dim = 3 + size = 2 + cases = [ + [ + (keras.layers.Bidirectional(keras.layers.SimpleRNN(2))), + [np.random.random((2, 1)), np.random.random((2, 1))], + (None, 3, 2), + ], + [ + (keras.layers.TimeDistributed(keras.layers.Dense(1))), + [np.random.random((2, 1)), np.random.random((1,))], + (None, 3, 2), + ], + [ + (keras.layers.Conv1D(output_dim, size, use_bias=False)), + [np.random.random((output_dim, input_dim, size, 1))], + (None, 4, input_dim), + ], + [ + ( + keras.layers.Conv2D( + output_dim, + size, + use_bias=False, + data_format="channels_first", + ) + ), + [np.random.random((output_dim, input_dim, size, size))], + (None, input_dim, 4, 4), + ], + [ + ( + keras.layers.Conv2DTranspose( + output_dim, + size, + use_bias=False, + data_format="channels_first", + ) + ), + [np.random.random((output_dim, input_dim, size, size))], + (None, input_dim, 4, 4), + ], + [ + ( + keras.layers.Conv2DTranspose( + output_dim, + size, + use_bias=False, + data_format="channels_last", + ) + ), + [np.random.random((size, size, input_dim, output_dim))], + (None, 4, 4, input_dim), + ], + [ + ( + keras.layers.Conv3D( + output_dim, + size, + use_bias=False, + data_format="channels_first", + ) + ), + [np.random.random((output_dim, input_dim, size, size, size))], + (None, input_dim, 4, 4, 4), + ], + [ + (keras.layers.GRUV1(output_dim)), + [ + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + ], + (None, 4, input_dim), + ], + [ + (keras.layers.LSTMV1(output_dim)), + [ + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + ], + (None, 4, input_dim), + ], + ] + for layer, weights, input_shape in cases: + layer.build(input_shape) + _ = hdf5_format.preprocess_weights_for_loading( + layer, weights, original_keras_version="1" + ) + + model = keras.models.Sequential([keras.layers.Dense(2, input_dim=2)]) + _ = hdf5_format.preprocess_weights_for_loading( + model, model.weights, original_keras_version="1" + ) + + x = keras.Input((2,)) + y = keras.layers.Dense(2)(x) + model = keras.models.Model(x, y) + _ = hdf5_format.preprocess_weights_for_loading( + model, model.weights, original_keras_version="1" + ) + + @parameterized.named_parameters( + ("gru", keras.layers.GRU, {"units": 2, "input_shape": (3, 5)}), + ( + "gru_with_reset_after", + keras.layers.GRU, + {"units": 2, "input_shape": (3, 5), "reset_after": True}, + ), + ("lstm", keras.layers.LSTM, {"units": 2, "input_shape": (3, 5)}), + ( + "cudnngru", + keras.layers.CuDNNGRU, + {"units": 2, "input_shape": (3, 5)}, + ), + ( + "cudnnlstm", + keras.layers.CuDNNLSTM, + {"units": 2, "input_shape": (3, 5)}, + ), + ) + def test_preprocess_weights_for_loading_rnn_should_be_idempotent( + self, layer_class, layer_args + ): + with self.cached_session(): + layer = layer_class(**layer_args) + layer.build(input_shape=layer_args.get("input_shape")) + weights1 = layer.get_weights() + weights2 = hdf5_format.preprocess_weights_for_loading( + layer, weights1 + ) + _ = [ + self.assertAllClose(x, y, rtol=1e-05) + for (x, y) in zip(weights1, weights2) + ] + + def test_sequential_weight_loading(self): + if h5py is None: + return + + h5_path = self._save_model_dir("test.h5") + + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.add(keras.layers.Dense(num_classes)) + + x = np.random.random((batch_size, input_dim)) + ref_y = model.predict(x) + + model.save_weights(h5_path) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.add(keras.layers.Dense(num_classes)) + model.load_weights(h5_path) + y = model.predict(x) + + self.assertAllClose(y, ref_y) + + @test_combinations.run_with_all_saved_model_formats( + exclude_formats=["tf_no_traces"] + ) + def test_nested_model_weight_loading(self): + save_format = test_utils.get_save_format() + saved_model_dir = self._save_model_dir() + + batch_size = 5 + shape = (None, None, 3) + + with self.cached_session(): + + def gen_model(): + def seq_model(): + model = keras.models.Sequential( + [ + keras.layers.Conv2D(3, 1, input_shape=shape), + keras.layers.BatchNormalization(), + ] + ) + return model + + x = inner_inputs = keras.layers.Input((None, None, 3)) + x = seq_model()(x) + x = seq_model()(x) + inner_model = keras.models.Model(inner_inputs, x) + + inputs = keras.layers.Input(shape) + return keras.models.Model(inputs, inner_model(inputs)) + + model = gen_model() + x = np.random.random((batch_size, 1, 1, 3)) + ref_y = model.predict(x) + + model.save_weights(saved_model_dir, save_format=save_format) + + model = gen_model() + model.load_weights(saved_model_dir) + y = model.predict(x) + + self.assertAllClose(y, ref_y) + + def test_sequential_weight_loading_group_name_with_incorrect_length(self): + if h5py is None: + return + + h5_path = self._save_model_dir("test.h5") + + num_hidden = 5 + input_dim = 3 + num_classes = 2 + with self.cached_session(): + ref_model = keras.models.Sequential() + ref_model.add( + keras.layers.Dense(num_hidden, input_dim=input_dim, name="d1") + ) + ref_model.add(keras.layers.Dense(num_classes, name="d2")) + ref_model.compile( + loss=keras.losses.MSE, + optimizer="rmsprop", + metrics=[keras.metrics.categorical_accuracy], + ) + + f_ref_model = h5py.File(h5_path, "w") + hdf5_format.save_weights_to_hdf5_group(f_ref_model, ref_model) + + f_model = h5py.File(h5_path, "r") + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + num_hidden, use_bias=False, input_dim=input_dim, name="d1" + ) + ) + model.add(keras.layers.Dense(num_classes, name="d2")) + model.compile( + loss=keras.losses.MSE, + optimizer="rmsprop", + metrics=[keras.metrics.categorical_accuracy], + ) + with self.assertRaises( + ValueError, + msg=( + "Weight count mismatch for layer #0 (named d1). " + "Layer expects 1 weight(s). Received 2 saved weight(s)" + ), + ): + hdf5_format.load_weights_from_hdf5_group_by_name(f_model, model) + + hdf5_format.load_weights_from_hdf5_group_by_name( + f_model, model, skip_mismatch=True + ) + self.assertAllClose( + keras.backend.get_value(ref_model.layers[1].kernel), + keras.backend.get_value(model.layers[1].kernel), + ) + + def test_sequential_weight_loading_group_name_with_incorrect_shape(self): + if h5py is None: + return + + h5_path = self._save_model_dir("test.h5") + + num_hidden = 5 + input_dim = 3 + num_classes = 2 + with tf.Graph().as_default(), self.cached_session(): + ref_model = keras.models.Sequential() + ref_model.add( + keras.layers.Dense(num_hidden, input_dim=input_dim, name="d1") + ) + ref_model.add(keras.layers.Dense(num_classes, name="d2")) + ref_model.compile( + loss=keras.losses.MSE, + optimizer=optimizer_v1.RMSprop(lr=0.0001), + metrics=[keras.metrics.categorical_accuracy], + ) + + f_ref_model = h5py.File(h5_path, "w") + keras.backend.set_value( + ref_model.layers[1].bias, [3.5] * num_classes + ) + hdf5_format.save_weights_to_hdf5_group(f_ref_model, ref_model) + + f_model = h5py.File(h5_path, "r") + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + num_hidden + 5, input_dim=input_dim, name="d1" + ) + ) + model.add(keras.layers.Dense(num_classes, name="d2")) + model.compile( + loss=keras.losses.MSE, + optimizer=optimizer_v1.RMSprop(lr=0.0001), + metrics=[keras.metrics.categorical_accuracy], + ) + with self.assertRaises( + ValueError, + msg=( + "Shape mismatch in layer #0 (named d1) for weight " + "d1_1/kernel:0. Weight expects shape (3, 10). " + "Received saved weight with shape (3, 5)" + ), + ): + hdf5_format.load_weights_from_hdf5_group_by_name(f_model, model) + + hdf5_format.load_weights_from_hdf5_group_by_name( + f_model, model, skip_mismatch=True + ) + self.assertAllClose( + [3.5] * num_classes, + keras.backend.get_value(model.layers[1].bias), + ) + + @test_combinations.run_with_all_saved_model_formats( + exclude_formats=["tf_no_traces"] + ) + @test_combinations.run_with_all_model_types + def test_load_weights_from_saved_model(self): + save_path = self._save_model_dir() + save_format = test_utils.get_save_format() + + if save_format == "h5" and test_utils.get_model_type() == "subclass": + # TODO(b/173646281): HDF5 format currently does not allow saving + # subclassed models. + return + + with self.cached_session(): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + data = np.random.random((1, 3)) + labels = np.random.random((1, 4)) + model.compile(loss="mse", optimizer="rmsprop") + model.fit(data, labels) + model.save(save_path, save_format=save_format) + new_model = test_utils.get_small_mlp(1, 4, input_dim=3) + if test_utils.get_model_type() == "subclass": + # Call on test data to build the model. + new_model.predict(data) + new_model.load_weights(save_path) + self.assertAllClose(model.weights, new_model.weights) + + +class SubclassedModel(training.Model): + def __init__(self): + super().__init__() + self.x_layer = keras.layers.Dense(3) + self.b_layer = keras.layers.Dense(1) + + def call(self, a): + return self.b_layer(self.x_layer(a)) + + +class TestWeightSavingAndLoadingTFFormat( + tf.test.TestCase, parameterized.TestCase +): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_tensorflow_format_overwrite(self): + with self.cached_session() as session: + model = SubclassedModel() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, "ckpt") + + x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) + executing_eagerly = tf.executing_eagerly() + model(x) + if not executing_eagerly: + session.run([v.initializer for v in model.variables]) + model.save_weights(prefix, save_format="tensorflow") + model.save_weights(prefix, save_format="tensorflow", overwrite=True) + with self.assertRaises(EOFError): + # Indirectly tests that the user is prompted + model.save_weights( + prefix, save_format="tensorflow", overwrite=False + ) + + def test_no_default_session(self): + with tf.Graph().as_default(): + self.assertFalse(tf.compat.v1.get_default_session()) + data = np.random.random((1000, 32)).astype(np.float32) + labels = np.random.random((1000, 10)).astype(np.float32) + + model = keras.models.Sequential( + [ + keras.layers.Dense(10, activation="softmax"), + keras.layers.Dense(10, activation="softmax"), + ] + ) + + model.compile( + optimizer=tf.compat.v1.train.RMSPropOptimizer(0.001), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + model.fit(data, labels) + fname = os.path.join(self.get_temp_dir(), "weights", "ckpt") + model.save_weights(fname) + model.load_weights(fname) + + def test_no_graph_pollution(self): + with tf.compat.v1.get_default_graph().as_default(): + graph = tf.Graph() + with graph.as_default(), self.session(graph) as session: + model = SubclassedModel() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, "ckpt") + + x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) + model(x) + session.run([v.initializer for v in model.variables]) + model.save_weights(prefix, save_format="tensorflow") + op_count = len(graph.get_operations()) + model.save_weights(prefix, save_format="tensorflow") + self.assertLen(graph.get_operations(), op_count) + + model.load_weights(prefix) + op_count = len(graph.get_operations()) + model.load_weights(prefix) + self.assertLen(graph.get_operations(), op_count) + + def _weight_loading_test_template(self, make_model_fn): + with self.cached_session(): + model = make_model_fn() + model.compile( + loss="mse", + optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), + metrics=["acc", keras.metrics.CategoricalAccuracy()], + ) + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, "ckpt") + train_x = np.random.random((3, 2)) + train_y = np.random.random((3,)) + x = tf.constant(train_x, dtype=tf.float32) + + model.train_on_batch(train_x, train_y) + model.save_weights(prefix, save_format="tf") + ref_y_before_train = model.predict(train_x) + model.train_on_batch(train_x, train_y) + ref_y_after_train = model.predict(train_x) + for v in model.variables: + self.evaluate(v.assign(tf.random.normal(shape=tf.shape(v)))) + + self.addCleanup(shutil.rmtree, temp_dir) + + model.load_weights(prefix) + self.assertAllClose(ref_y_before_train, self.evaluate(model(x))) + + # Test restore-on-create if this is a subclassed Model (graph + # Networks will have already created their variables). + load_model = make_model_fn() + load_model.load_weights(prefix) + self.assertAllClose( + ref_y_before_train, self.evaluate(load_model(x)) + ) + load_model = make_model_fn() + load_model.load_weights(prefix) + # We need to run some of the restore ops for predict(), but not all + # variables have been created yet (optimizer slot variables). Tests + # incremental restore. + load_model.predict(train_x) + load_model.compile( + loss="mse", + optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), + metrics=["acc", keras.metrics.CategoricalAccuracy()], + ) + load_model.train_on_batch(train_x, train_y) + self.assertAllClose(ref_y_after_train, self.evaluate(load_model(x))) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_weight_loading_graph_model(self): + def _make_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3)(a) + b = keras.layers.Dense(1)(x) + return keras.models.Model(a, b) + + self._weight_loading_test_template(_make_graph_model) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_weight_loading_subclassed_model(self): + self._weight_loading_test_template(SubclassedModel) + + def _new_layer_weight_loading_test_template( + self, first_model_fn, second_model_fn + ): + with self.cached_session() as session: + model = first_model_fn() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, "ckpt") + + x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) + executing_eagerly = tf.executing_eagerly() + ref_y_tensor = model(x) + if not executing_eagerly: + session.run([v.initializer for v in model.variables]) + ref_y = self.evaluate(ref_y_tensor) + model.save_weights(prefix) + self.assertEqual(prefix, tf.train.latest_checkpoint(temp_dir)) + for v in model.variables: + self.evaluate(v.assign(tf.random.normal(shape=tf.shape(v)))) + + self.addCleanup(shutil.rmtree, temp_dir) + + second_model = second_model_fn() + status = second_model.load_weights(prefix) + second_model(x) + status.run_restore_ops() + second_model.save_weights(prefix) + # Check that the second model's checkpoint loads into the original + # model + status = model.load_weights(prefix) + status.run_restore_ops(session) + y = self.evaluate(model(x)) + self.assertAllClose(ref_y, y) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_weight_loading_graph_model_added_layer(self): + def _save_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name="first")(a) + b = keras.layers.Dense(1, name="second")(x) + return keras.models.Model(a, b) + + def _restore_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name="first")(a) + y = keras.layers.Dense(1, name="second")(x) + b = keras.layers.Dense(3, name="secondjr")(y) + return keras.models.Model(a, b) + + self._new_layer_weight_loading_test_template( + _save_graph_model, _restore_graph_model + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_weight_loading_graph_model_added_no_weight_layer(self): + def _save_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name="first")(a) + b = keras.layers.Dense(1, name="second")(x) + return keras.models.Model(a, b) + + def _restore_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name="first")(a) + b = keras.layers.Dense(1, name="second")(x) + y = keras.layers.Dropout(rate=0.1)(b) + return keras.models.Model(a, y) + + self._new_layer_weight_loading_test_template( + _save_graph_model, _restore_graph_model + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_weight_loading_subclassed_model_added_layer(self): + class SubclassedModelRestore(training.Model): + def __init__(self): + super().__init__() + self.x_layer = keras.layers.Dense(3) + self.y_layer = keras.layers.Dense(3) + self.b_layer = keras.layers.Dense(1) + + def call(self, a): + return self.b_layer(self.y_layer(self.x_layer(a))) + + self._new_layer_weight_loading_test_template( + SubclassedModel, SubclassedModelRestore + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_incompatible_checkpoint(self): + save_path = tf.train.Checkpoint().save( + os.path.join(self.get_temp_dir(), "ckpt") + ) + m = DummySubclassModel() + with self.assertRaisesRegex(AssertionError, "Nothing to load"): + m.load_weights(save_path) + m.dense = keras.layers.Dense(2) + m.dense(tf.constant([[1.0]])) + with self.assertRaisesRegex( + AssertionError, "Nothing except the root object matched" + ): + m.load_weights(save_path) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_directory_passed(self): + with self.cached_session(): + m = DummySubclassModel() + v = m.add_weight(name="v", shape=[]) + self.evaluate(v.assign(42.0)) + prefix = os.path.join( + self.get_temp_dir(), str(uuid.uuid4()), "ckpt/" + ) + m.save_weights(prefix) + self.evaluate(v.assign(2.0)) + m.load_weights(prefix) + self.assertEqual(42.0, self.evaluate(v)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_relative_path(self): + with self.cached_session(): + m = DummySubclassModel() + v = m.add_weight(name="v", shape=[]) + os.chdir(self.get_temp_dir()) + + prefix = "ackpt" + self.evaluate(v.assign(42.0)) + m.save_weights(prefix) + self.assertTrue(tf.io.gfile.exists("ackpt.index")) + self.evaluate(v.assign(1.0)) + m.load_weights(prefix) + self.assertEqual(42.0, self.evaluate(v)) + + prefix = "subdir/ackpt" + self.evaluate(v.assign(43.0)) + m.save_weights(prefix) + self.assertTrue(tf.io.gfile.exists("subdir/ackpt.index")) + self.evaluate(v.assign(2.0)) + m.load_weights(prefix) + self.assertEqual(43.0, self.evaluate(v)) + + prefix = "ackpt/" + self.evaluate(v.assign(44.0)) + m.save_weights(prefix) + self.assertTrue(tf.io.gfile.exists("ackpt/.index")) + self.evaluate(v.assign(3.0)) + m.load_weights(prefix) + self.assertEqual(44.0, self.evaluate(v)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_nonexistent_prefix_directory(self): + with self.cached_session(): + m = DummySubclassModel() + v = m.add_weight(name="v", shape=[]) + self.evaluate(v.assign(42.0)) + prefix = os.path.join( + self.get_temp_dir(), str(uuid.uuid4()), "bckpt" + ) + m.save_weights(prefix) + self.evaluate(v.assign(2.0)) + m.load_weights(prefix) + self.assertEqual(42.0, self.evaluate(v)) + + +class DummySubclassModel(training.Model): + pass + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/saved_model/BUILD b/keras/saving/legacy/saved_model/BUILD similarity index 89% rename from keras/saving/saved_model/BUILD rename to keras/saving/legacy/saved_model/BUILD index 58672e0776d1..ac954f803596 100644 --- a/keras/saving/saved_model/BUILD +++ b/keras/saving/legacy/saved_model/BUILD @@ -18,9 +18,12 @@ # buildifier: disable=same-origin-load +# Placeholder: load unaliased py_library +# Placeholder: load unaliased py_binary load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras/layers/rnn:__pkg__", "//keras/saving:__subpackages__", @@ -39,6 +42,16 @@ py_library( visibility = ["//visibility:private"], ) +py_library( + name = "utils", + srcs = ["utils.py"], + deps = [ + "//:expect_tensorflow_installed", + "//keras/engine:base_layer_utils", + "//keras/utils:layer_utils", + ], +) + py_library( name = "saved_model", srcs = [ @@ -54,11 +67,11 @@ py_library( "save.py", "save_impl.py", "serialized_attributes.py", - "utils.py", ], srcs_version = "PY3", deps = [ ":order_preserving_set", + ":utils", "//:expect_tensorflow_installed", "//keras/utils:generic_utils", ], @@ -104,6 +117,7 @@ tf_py_test( python_version = "PY3", shard_count = 4, tags = [ + "no_oss", # TODO(b/296236267) "no_pip", # TODO(b/202022379) "no_rocm", "no_windows", diff --git a/keras/saving/saved_model/README.md b/keras/saving/legacy/saved_model/README.md similarity index 100% rename from keras/saving/saved_model/README.md rename to keras/saving/legacy/saved_model/README.md diff --git a/keras/saving/legacy/saved_model/__init__.py b/keras/saving/legacy/saved_model/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/keras/saving/legacy/saved_model/base_serialization.py b/keras/saving/legacy/saved_model/base_serialization.py new file mode 100644 index 000000000000..51057c084dd7 --- /dev/null +++ b/keras/saving/legacy/saved_model/base_serialization.py @@ -0,0 +1,141 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Helper classes that list&validate all attributes to serialize to +SavedModel.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +from keras.saving.legacy.saved_model import json_utils +from keras.saving.legacy.saved_model import utils + + +class SavedModelSaver(object, metaclass=abc.ABCMeta): + """Saver defining the methods and properties used to serialize Keras + objects.""" + + def __init__(self, obj): + self.obj = obj + + @abc.abstractproperty + def object_identifier(self): + """String stored in object identifier field in the SavedModel proto. + + Returns: + A string with the object identifier, which is used at load time. + """ + raise NotImplementedError + + @property + def tracking_metadata(self): + """String stored in metadata field in the SavedModel proto. + + Returns: + A serialized JSON storing information necessary for recreating this + layer. + """ + # TODO(kathywu): check that serialized JSON can be loaded (e.g., if an + # object is in the python property) + return json_utils.Encoder().encode(self.python_properties) + + def trackable_children(self, serialization_cache): + """Lists all Trackable children connected to this object.""" + if not utils.should_save_traces(): + return {} + + children = self.objects_to_serialize(serialization_cache) + children.update(self.functions_to_serialize(serialization_cache)) + return children + + @abc.abstractproperty + def python_properties(self): + """Returns dictionary of python properties to save in the metadata. + + This dictionary must be serializable and deserializable to/from JSON. + + When loading, the items in this dict are used to initialize the object + and define attributes in the revived object. + """ + raise NotImplementedError + + @abc.abstractmethod + def objects_to_serialize(self, serialization_cache): + """Returns dictionary of extra checkpointable objects to serialize. + + See `functions_to_serialize` for an explanation of this function's + effects. + + Args: + serialization_cache: Dictionary passed to all objects in the same + object graph during serialization. + + Returns: + A dictionary mapping attribute names to checkpointable objects. + """ + raise NotImplementedError + + @abc.abstractmethod + def functions_to_serialize(self, serialization_cache): + """Returns extra functions to include when serializing a Keras object. + + Normally, when calling exporting an object to SavedModel, only the + functions and objects defined by the user are saved. For example: + + ``` + obj = tf.Module() + obj.v = tf.Variable(1.) + + @tf.function + def foo(...): ... + + obj.foo = foo + + w = tf.Variable(1.) + + tf.saved_model.save(obj, 'path/to/saved/model') + loaded = tf.saved_model.load('path/to/saved/model') + + loaded.v # Variable with the same value as obj.v + loaded.foo # Equivalent to obj.foo + loaded.w # AttributeError + ``` + + Assigning trackable objects to attributes creates a graph, which is used + for both checkpointing and SavedModel serialization. + + When the graph generated from attribute tracking is insufficient, extra + objects and functions may be added at serialization time. For example, + most models do not have their call function wrapped with a @tf.function + decorator. This results in `model.call` not being saved. Since Keras + objects should be revivable from the SavedModel format, the call + function is added as an extra function to serialize. + + This function and `objects_to_serialize` is called multiple times when + exporting to SavedModel. Please use the cache to avoid generating new + functions and objects. A fresh cache is created for each SavedModel + export. + + Args: + serialization_cache: Dictionary passed to all objects in the same + object graph during serialization. + + Returns: + A dictionary mapping attribute names to `Function` or + `ConcreteFunction`. + """ + raise NotImplementedError diff --git a/keras/saving/saved_model/constants.py b/keras/saving/legacy/saved_model/constants.py similarity index 76% rename from keras/saving/saved_model/constants.py rename to keras/saving/legacy/saved_model/constants.py index fae2c1bd07bc..c505586310c1 100644 --- a/keras/saving/saved_model/constants.py +++ b/keras/saving/legacy/saved_model/constants.py @@ -17,24 +17,24 @@ # Namespace used to store all attributes added during serialization. # e.g. the list of layers can be accessed using `loaded.keras_api.layers`, in an # object loaded from `tf.saved_model.load()`. -KERAS_ATTR = 'keras_api' +KERAS_ATTR = "keras_api" # Keys for the serialization cache. # Maps to the keras serialization dict {Layer --> SerializedAttributes object} -KERAS_CACHE_KEY = 'keras_serialized_attributes' +KERAS_CACHE_KEY = "keras_serialized_attributes" # Name of Keras metadata file stored in the SavedModel. -SAVED_METADATA_PATH = 'keras_metadata.pb' +SAVED_METADATA_PATH = "keras_metadata.pb" # Names of SavedObject Keras identifiers. -INPUT_LAYER_IDENTIFIER = '_tf_keras_input_layer' -LAYER_IDENTIFIER = '_tf_keras_layer' -METRIC_IDENTIFIER = '_tf_keras_metric' -MODEL_IDENTIFIER = '_tf_keras_model' -NETWORK_IDENTIFIER = '_tf_keras_network' -RNN_LAYER_IDENTIFIER = '_tf_keras_rnn_layer' -SEQUENTIAL_IDENTIFIER = '_tf_keras_sequential' +INPUT_LAYER_IDENTIFIER = "_tf_keras_input_layer" +LAYER_IDENTIFIER = "_tf_keras_layer" +METRIC_IDENTIFIER = "_tf_keras_metric" +MODEL_IDENTIFIER = "_tf_keras_model" +NETWORK_IDENTIFIER = "_tf_keras_network" +RNN_LAYER_IDENTIFIER = "_tf_keras_rnn_layer" +SEQUENTIAL_IDENTIFIER = "_tf_keras_sequential" KERAS_OBJECT_IDENTIFIERS = ( INPUT_LAYER_IDENTIFIER, diff --git a/keras/saving/legacy/saved_model/create_test_saved_model.py b/keras/saving/legacy/saved_model/create_test_saved_model.py new file mode 100644 index 000000000000..5a281df9c41d --- /dev/null +++ b/keras/saving/legacy/saved_model/create_test_saved_model.py @@ -0,0 +1,36 @@ +"""A binary that creates a serialized SavedModel from a keras model. + +This is used in tests to ensure that model serialization is deterministic across +different processes. +""" + +import tensorflow.compat.v2 as tf +from absl import app +from absl import flags + +from keras import regularizers +from keras.testing_infra import test_utils + +flags.DEFINE_string("output_path", "", "The path to write the SavedModel at.") + +FLAGS = flags.FLAGS + + +def main(_) -> None: + with test_utils.model_type_scope("functional"): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.layers[-1].activity_regularizer = regularizers.get("l2") + model.activity_regularizer = regularizers.get("l2") + model.compile(loss="mse", optimizer="rmsprop") + + def callable_loss(): + return tf.reduce_sum(model.weights[0]) + + model.add_loss(callable_loss) + + print(f"_____Writing saved model to: {FLAGS.output_path}") + model.save(FLAGS.output_path) + + +if __name__ == "__main__": + app.run(main) diff --git a/keras/saving/legacy/saved_model/determinism_test.py b/keras/saving/legacy/saved_model/determinism_test.py new file mode 100755 index 000000000000..dc9d8835d857 --- /dev/null +++ b/keras/saving/legacy/saved_model/determinism_test.py @@ -0,0 +1,33 @@ +"""Saves the same model twice and ensures that they are serialized the same.""" + +import subprocess + +import tensorflow.compat.v2 as tf +from absl import flags +from tensorflow.core.protobuf import saved_model_pb2 + +FLAGS = flags.FLAGS + + +class DeterminismTest(tf.test.TestCase): + def test_saving_is_deterministic(self): + create_saved_model = f"{FLAGS.test_srcdir}/create_test_saved_model.par" + saved_model_a_path = f"{FLAGS.test_tmpdir}/a" + saved_model_b_path = f"{FLAGS.test_tmpdir}/b" + + save_a = subprocess.Popen( + [create_saved_model, "--output_path", saved_model_a_path] + ) + save_b = subprocess.Popen( + [create_saved_model, "--output_path", saved_model_b_path] + ) + save_a.wait() + save_b.wait() + saved_model_a = saved_model_pb2.SavedModel() + with tf.io.gfile.GFile(f"{saved_model_a_path}/saved_model.pb") as f: + saved_model_a.MergeFromString(f.read()) + saved_model_b = saved_model_pb2.SavedModel() + with tf.io.gfile.GFile(f"{saved_model_b_path}/saved_model.pb") as f: + saved_model_b.MergeFromString(f.read()) + + self.assertProtoEquals(saved_model_a, saved_model_b) diff --git a/keras/saving/legacy/saved_model/json_utils.py b/keras/saving/legacy/saved_model/json_utils.py new file mode 100644 index 000000000000..05b0e285be75 --- /dev/null +++ b/keras/saving/legacy/saved_model/json_utils.py @@ -0,0 +1,237 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utils for creating and loading the Layer metadata for SavedModel. + +These are required to retain the original format of the build input shape, since +layers and models may have different build behaviors depending on if the shape +is a list, tuple, or TensorShape. For example, Network.build() will create +separate inputs if the given input_shape is a list, and will create a single +input if the given shape is a tuple. +""" + +import collections +import enum +import functools +import json + +import numpy as np +import tensorflow.compat.v2 as tf +import wrapt + +from keras.saving import serialization_lib +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model.utils import in_tf_saved_model_scope + +# isort: off +from tensorflow.python.framework import type_spec_registry + +_EXTENSION_TYPE_SPEC = "_EXTENSION_TYPE_SPEC" + + +class Encoder(json.JSONEncoder): + """JSON encoder and decoder that handles TensorShapes and tuples.""" + + def default(self, obj): + """Encodes objects for types that aren't handled by the default + encoder.""" + if isinstance(obj, tf.TensorShape): + items = obj.as_list() if obj.rank is not None else None + return {"class_name": "TensorShape", "items": items} + return get_json_type(obj) + + def encode(self, obj): + return super().encode(_encode_tuple(obj)) + + +def _encode_tuple(x): + if isinstance(x, tuple): + return { + "class_name": "__tuple__", + "items": tuple(_encode_tuple(i) for i in x), + } + elif isinstance(x, list): + return [_encode_tuple(i) for i in x] + elif isinstance(x, dict): + return {key: _encode_tuple(value) for key, value in x.items()} + else: + return x + + +def decode(json_string): + return json.loads(json_string, object_hook=_decode_helper) + + +def decode_and_deserialize( + json_string, module_objects=None, custom_objects=None +): + """Decodes the JSON and deserializes any Keras objects found in the dict.""" + return json.loads( + json_string, + object_hook=functools.partial( + _decode_helper, + deserialize=True, + module_objects=module_objects, + custom_objects=custom_objects, + ), + ) + + +def _decode_helper( + obj, deserialize=False, module_objects=None, custom_objects=None +): + """A decoding helper that is TF-object aware. + + Args: + obj: A decoded dictionary that may represent an object. + deserialize: Boolean. When True, deserializes any Keras + objects found in `obj`. Defaults to `False`. + module_objects: A dictionary of built-in objects to look the name up in. + Generally, `module_objects` is provided by midlevel library + implementers. + custom_objects: A dictionary of custom objects to look the name up in. + Generally, `custom_objects` is provided by the end user. + + Returns: + The decoded object. + """ + if isinstance(obj, dict) and "class_name" in obj: + if obj["class_name"] == "TensorShape": + return tf.TensorShape(obj["items"]) + elif obj["class_name"] == "TypeSpec": + return type_spec_registry.lookup(obj["type_spec"])._deserialize( + _decode_helper(obj["serialized"]) + ) + elif obj["class_name"] == "CompositeTensor": + spec = obj["spec"] + tensors = [] + for dtype, tensor in obj["tensors"]: + tensors.append( + tf.constant(tensor, dtype=tf.dtypes.as_dtype(dtype)) + ) + return tf.nest.pack_sequence_as( + _decode_helper(spec), tensors, expand_composites=True + ) + elif obj["class_name"] == "__tuple__": + return tuple(_decode_helper(i) for i in obj["items"]) + elif obj["class_name"] == "__ellipsis__": + return Ellipsis + elif deserialize and "__passive_serialization__" in obj: + # __passive_serialization__ is added by the JSON encoder when + # encoding an object that has a `get_config()` method. + try: + if in_tf_saved_model_scope() or "module" not in obj: + return serialization.deserialize_keras_object( + obj, + module_objects=module_objects, + custom_objects=custom_objects, + ) + else: + return serialization_lib.deserialize_keras_object( + obj, + module_objects=module_objects, + custom_objects=custom_objects, + ) + except ValueError: + pass + elif obj["class_name"] == "__bytes__": + return obj["value"].encode("utf-8") + return obj + + +def get_json_type(obj): + """Serializes any object to a JSON-serializable structure. + + Args: + obj: the object to serialize + + Returns: + JSON-serializable structure representing `obj`. + + Raises: + TypeError: if `obj` cannot be serialized. + """ + # if obj is a serializable Keras class instance + # e.g. optimizer, layer + if hasattr(obj, "get_config"): + serialized = serialization.serialize_keras_object(obj) + serialized["__passive_serialization__"] = True + return serialized + + # if obj is any numpy type + if type(obj).__module__ == np.__name__: + if isinstance(obj, np.ndarray): + return obj.tolist() + else: + return obj.item() + + # misc functions (e.g. loss function) + if callable(obj): + return obj.__name__ + + # if obj is a python 'type' + if type(obj).__name__ == type.__name__: + return obj.__name__ + + if isinstance(obj, tf.compat.v1.Dimension): + return obj.value + + if isinstance(obj, tf.TensorShape): + return obj.as_list() + + if isinstance(obj, tf.DType): + return obj.name + + if isinstance(obj, collections.abc.Mapping): + return dict(obj) + + if obj is Ellipsis: + return {"class_name": "__ellipsis__"} + + if isinstance(obj, wrapt.ObjectProxy): + return obj.__wrapped__ + + if isinstance(obj, tf.TypeSpec): + try: + type_spec_name = type_spec_registry.get_name(type(obj)) + return { + "class_name": "TypeSpec", + "type_spec": type_spec_name, + "serialized": obj._serialize(), + } + except ValueError: + raise ValueError( + f"Unable to serialize {obj} to JSON, because the TypeSpec " + f"class {type(obj)} has not been registered." + ) + if isinstance(obj, tf.__internal__.CompositeTensor): + spec = tf.type_spec_from_value(obj) + tensors = [] + for tensor in tf.nest.flatten(obj, expand_composites=True): + tensors.append((tensor.dtype.name, tensor.numpy().tolist())) + return { + "class_name": "CompositeTensor", + "spec": get_json_type(spec), + "tensors": tensors, + } + + if isinstance(obj, enum.Enum): + return obj.value + + if isinstance(obj, bytes): + return {"class_name": "__bytes__", "value": obj.decode("utf-8")} + + raise TypeError( + f"Unable to serialize {obj} to JSON. Unrecognized type {type(obj)}." + ) diff --git a/keras/saving/legacy/saved_model/json_utils_test.py b/keras/saving/legacy/saved_model/json_utils_test.py new file mode 100644 index 000000000000..3a86aad31520 --- /dev/null +++ b/keras/saving/legacy/saved_model/json_utils_test.py @@ -0,0 +1,107 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests the JSON encoder and decoder.""" + +import enum + +import tensorflow.compat.v2 as tf + +from keras.saving.legacy.saved_model import json_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +class JsonUtilsTest(test_combinations.TestCase): + def test_encode_decode_tensor_shape(self): + metadata = { + "key1": tf.TensorShape(None), + "key2": [tf.TensorShape([None]), tf.TensorShape([3, None, 5])], + } + string = json_utils.Encoder().encode(metadata) + loaded = json_utils.decode(string) + + self.assertEqual(set(loaded.keys()), {"key1", "key2"}) + self.assertAllEqual(loaded["key1"].rank, None) + self.assertAllEqual(loaded["key2"][0].as_list(), [None]) + self.assertAllEqual(loaded["key2"][1].as_list(), [3, None, 5]) + + def test_encode_decode_tuple(self): + metadata = {"key1": (3, 5), "key2": [(1, (3, 4)), (1,)]} + string = json_utils.Encoder().encode(metadata) + loaded = json_utils.decode(string) + + self.assertEqual(set(loaded.keys()), {"key1", "key2"}) + self.assertAllEqual(loaded["key1"], (3, 5)) + self.assertAllEqual(loaded["key2"], [(1, (3, 4)), (1,)]) + + def test_encode_decode_type_spec(self): + spec = tf.TensorSpec((1, 5), tf.float32) + string = json_utils.Encoder().encode(spec) + loaded = json_utils.decode(string) + self.assertEqual(spec, loaded) + + invalid_type_spec = { + "class_name": "TypeSpec", + "type_spec": "Invalid Type", + "serialized": None, + } + string = json_utils.Encoder().encode(invalid_type_spec) + with self.assertRaisesRegexp( + ValueError, "No TypeSpec has been registered" + ): + loaded = json_utils.decode(string) + + def test_encode_decode_enum(self): + class Enum(enum.Enum): + CLASS_A = "a" + CLASS_B = "b" + + config = {"key": Enum.CLASS_A, "key2": Enum.CLASS_B} + string = json_utils.Encoder().encode(config) + loaded = json_utils.decode(string) + self.assertAllEqual({"key": "a", "key2": "b"}, loaded) + + @test_utils.run_v2_only + def test_encode_decode_ragged_tensor(self): + x = tf.ragged.constant([[1.0, 2.0], [3.0]]) + string = json_utils.Encoder().encode(x) + loaded = json_utils.decode(string) + self.assertAllEqual(loaded, x) + + @test_utils.run_v2_only + def test_encode_decode_extension_type_tensor(self): + class MaskedTensor(tf.experimental.ExtensionType): + __name__ = "MaskedTensor" + values: tf.Tensor + mask: tf.Tensor + + x = MaskedTensor( + values=[[1, 2, 3], [4, 5, 6]], + mask=[[True, True, False], [True, False, True]], + ) + string = json_utils.Encoder().encode(x) + loaded = json_utils.decode(string) + self.assertAllEqual(loaded, x) + + def test_encode_decode_bytes(self): + b_string = b"abc" + json_string = json_utils.Encoder().encode(b_string) + loaded = json_utils.decode(json_string) + self.assertAllEqual(b_string, loaded) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/legacy/saved_model/layer_serialization.py b/keras/saving/legacy/saved_model/layer_serialization.py new file mode 100644 index 000000000000..ae7e320a0198 --- /dev/null +++ b/keras/saving/legacy/saved_model/layer_serialization.py @@ -0,0 +1,211 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Classes and functions implementing Layer SavedModel serialization.""" + +import tensorflow.compat.v2 as tf + +from keras.mixed_precision import policy +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import base_serialization +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import save_impl +from keras.saving.legacy.saved_model import serialized_attributes + + +class LayerSavedModelSaver(base_serialization.SavedModelSaver): + """Implements Layer SavedModel serialization.""" + + @property + def object_identifier(self): + return constants.LAYER_IDENTIFIER + + @property + def python_properties(self): + # TODO(kathywu): Add python property validator + return self._python_properties_internal() + + def _python_properties_internal(self): + """Returns dictionary of all python properties.""" + # TODO(kathywu): Add support for metrics serialization. + # TODO(kathywu): Synchronize with the keras spec (go/keras-json-spec) + # once the python config serialization has caught up. + metadata = dict( + name=self.obj.name, + trainable=self.obj.trainable, + expects_training_arg=self.obj._expects_training_arg, + dtype=policy.serialize(self.obj._dtype_policy), + batch_input_shape=getattr(self.obj, "_batch_input_shape", None), + stateful=self.obj.stateful, + must_restore_from_config=self.obj._must_restore_from_config, + preserve_input_structure_in_config=self.obj._preserve_input_structure_in_config, # noqa: E501 + autocast=self.obj._autocast, + ) + + metadata.update(get_serialized(self.obj)) + if self.obj.input_spec is not None: + # Layer's input_spec has already been type-checked in the property + # setter. + metadata["input_spec"] = tf.nest.map_structure( + lambda x: serialization.serialize_keras_object(x) + if x + else None, + self.obj.input_spec, + ) + if self.obj.activity_regularizer is not None and hasattr( + self.obj.activity_regularizer, "get_config" + ): + metadata[ + "activity_regularizer" + ] = serialization.serialize_keras_object( + self.obj.activity_regularizer + ) + if self.obj._build_input_shape is not None: + metadata["build_input_shape"] = self.obj._build_input_shape + return metadata + + def objects_to_serialize(self, serialization_cache): + return self._get_serialized_attributes( + serialization_cache + ).objects_to_serialize + + def functions_to_serialize(self, serialization_cache): + return self._get_serialized_attributes( + serialization_cache + ).functions_to_serialize + + def _get_serialized_attributes(self, serialization_cache): + """Generates or retrieves serialized attributes from cache.""" + keras_cache = serialization_cache.setdefault( + constants.KERAS_CACHE_KEY, {} + ) + if self.obj in keras_cache: + return keras_cache[self.obj] + + serialized_attr = keras_cache[ + self.obj + ] = serialized_attributes.SerializedAttributes.new(self.obj) + + if ( + save_impl.should_skip_serialization(self.obj) + or self.obj._must_restore_from_config + ): + return serialized_attr + + object_dict, function_dict = self._get_serialized_attributes_internal( + serialization_cache + ) + + serialized_attr.set_and_validate_objects(object_dict) + serialized_attr.set_and_validate_functions(function_dict) + return serialized_attr + + def _get_serialized_attributes_internal(self, serialization_cache): + """Returns dictionary of serialized attributes.""" + objects = save_impl.wrap_layer_objects(self.obj, serialization_cache) + functions = save_impl.wrap_layer_functions( + self.obj, serialization_cache + ) + # Attribute validator requires that the default save signature is added + # to function dict, even if the value is None. + functions["_default_save_signature"] = None + return objects, functions + + +# TODO(kathywu): Move serialization utils (and related utils from +# generic_utils.py) to a separate file. +def get_serialized(obj): + with serialization.skip_failed_serialization(): + # Store the config dictionary, which may be used when reviving the + # object. When loading, the program will attempt to revive the object + # from config, and if that fails, the object will be revived from the + # SavedModel. + return serialization.serialize_keras_object(obj) + + +class InputLayerSavedModelSaver(base_serialization.SavedModelSaver): + """InputLayer serialization.""" + + @property + def object_identifier(self): + return constants.INPUT_LAYER_IDENTIFIER + + @property + def python_properties(self): + + return dict( + class_name=type(self.obj).__name__, + name=self.obj.name, + dtype=self.obj.dtype, + sparse=self.obj.sparse, + ragged=self.obj.ragged, + batch_input_shape=self.obj._batch_input_shape, + config=self.obj.get_config(), + ) + + def objects_to_serialize(self, serialization_cache): + return {} + + def functions_to_serialize(self, serialization_cache): + return {} + + +class RNNSavedModelSaver(LayerSavedModelSaver): + """RNN layer serialization.""" + + @property + def object_identifier(self): + return constants.RNN_LAYER_IDENTIFIER + + def _get_serialized_attributes_internal(self, serialization_cache): + objects, functions = super()._get_serialized_attributes_internal( + serialization_cache + ) + states = tf.__internal__.tracking.wrap(self.obj.states) + # SaveModel require all the objects to be Trackable when saving. If the + # states is still a tuple after wrap_or_unwrap, it means it doesn't + # contain any trackable item within it, eg empty tuple or (None, None) + # for stateless ConvLSTM2D. We convert them to list so that + # wrap_or_unwrap can make it a Trackable again for saving. When loaded, + # ConvLSTM2D is able to handle the tuple/list conversion. + if isinstance(states, tuple): + states = tf.__internal__.tracking.wrap(list(states)) + objects["states"] = states + return objects, functions + + +class VocabularySavedModelSaver(LayerSavedModelSaver): + """Handles vocabulary layer serialization. + + This class is needed for StringLookup, IntegerLookup, and TextVectorization, + which all have a vocabulary as part of the config. Currently, we keep this + vocab as part of the config until saving, when we need to clear it to avoid + initializing a StaticHashTable twice (once when restoring the config and + once when restoring restoring module resources). After clearing the vocab, + we persist a property to the layer indicating it was constructed with a + vocab. + """ + + @property + def python_properties(self): + # TODO(kathywu): Add python property validator + metadata = self._python_properties_internal() + # Clear the vocabulary from the config during saving. + metadata["config"]["vocabulary"] = None + # Persist a property to track that a vocabulary was passed on + # construction. + metadata["config"][ + "has_input_vocabulary" + ] = self.obj._has_input_vocabulary + return metadata diff --git a/keras/saving/legacy/saved_model/load.py b/keras/saving/legacy/saved_model/load.py new file mode 100644 index 000000000000..ffc4bad14d5d --- /dev/null +++ b/keras/saving/legacy/saved_model/load.py @@ -0,0 +1,1384 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keras SavedModel deserialization.""" + +import re +import types +import warnings + +import tensorflow.compat.v1.logging as logging +import tensorflow.compat.v2 as tf +from google.protobuf import message + +from keras import backend +from keras import regularizers +from keras.engine import input_spec +from keras.optimizers.legacy import optimizer_v2 +from keras.protobuf import saved_metadata_pb2 +from keras.protobuf import versions_pb2 +from keras.saving import object_registration +from keras.saving.legacy import model_config +from keras.saving.legacy import saving_utils +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import json_utils +from keras.saving.legacy.saved_model import utils +from keras.saving.legacy.saved_model.serialized_attributes import ( + CommonEndpoints, +) +from keras.utils import layer_utils +from keras.utils import metrics_utils +from keras.utils import tf_inspect +from keras.utils.generic_utils import LazyLoader + +# To avoid circular dependencies between keras/engine and keras/saving, +# code in keras/saving must delay imports. + +# TODO(b/134426265): Switch back to single-quotes to match the rest of the file +# once the issue with copybara is fixed. + +models_lib = LazyLoader("models_lib", globals(), "keras.models") +base_layer = LazyLoader("base_layer", globals(), "keras.engine.base_layer") +layers_module = LazyLoader("layers_module", globals(), "keras.layers") +input_layer = LazyLoader("input_layer", globals(), "keras.engine.input_layer") +functional_lib = LazyLoader( + "functional_lib", globals(), "keras.engine.functional" +) +training_lib = LazyLoader("training_lib", globals(), "keras.engine.training") +training_lib_v1 = LazyLoader( + "training_lib_v1", globals(), "keras.engine.training_v1" +) +metrics = LazyLoader("metrics", globals(), "keras.metrics") +base_rnn = LazyLoader("base_rnn", globals(), "keras.layers.rnn.base_rnn") + + +PUBLIC_ATTRIBUTES = CommonEndpoints.all_functions.union( + CommonEndpoints.all_checkpointable_objects +) +PUBLIC_ATTRIBUTES.add(constants.KERAS_ATTR) + + +def load(path, compile=True, options=None): + """Loads Keras objects from a SavedModel. + + Any Keras layer or model saved to the SavedModel will be loaded back + as Keras objects. Other objects are loaded as regular trackable objects + (same as `tf.saved_model.load`). + + Currently, Keras saving/loading only retains the Keras object's weights, + losses, and call function. + + The loaded model can be re-compiled, but the original optimizer, compiled + loss functions, and metrics are not retained. This is temporary, and + `model.save` will soon be able to serialize compiled models. + + Args: + path: Path to SavedModel. + compile: If true, compile the model after loading it. + options: Optional `tf.saved_model.LoadOptions` object that specifies + options for loading from SavedModel. + + Returns: + Object loaded from SavedModel. + """ + # TODO(kathywu): Add saving/loading of optimizer, compiled losses and + # metrics. + # TODO(kathywu): Add code to load from objects that contain all endpoints + + # Look for metadata file or parse the SavedModel + metadata = saved_metadata_pb2.SavedMetadata() + meta_graph_def = tf.__internal__.saved_model.parse_saved_model( + path + ).meta_graphs[0] + object_graph_def = meta_graph_def.object_graph_def + path_to_metadata_pb = tf.io.gfile.join(path, constants.SAVED_METADATA_PATH) + if tf.compat.v1.gfile.Exists(path_to_metadata_pb): + try: + with tf.io.gfile.GFile(path_to_metadata_pb, "rb") as f: + file_content = f.read() + metadata.ParseFromString(file_content) + except message.DecodeError as e: + raise IOError( + f"Cannot parse keras metadata at path {path_to_metadata_pb}: " + f"Received error: {e}" + ) + else: + logging.warning( + "SavedModel saved prior to TF 2.5 detected when loading " + "Keras model. Please ensure that you are saving the model " + "with model.save() or tf.keras.models.save_model(), *NOT* " + "tf.saved_model.save(). To confirm, there should be a file " + 'named "keras_metadata.pb" in the SavedModel directory.' + ) + _read_legacy_metadata(object_graph_def, metadata, path) + + if not metadata.nodes: + # When there are no Keras objects, return the results from the core + # loader + return tf.saved_model.load(path, options=options) + + metadata = _update_to_current_version(metadata) + # Recreate layers and metrics using the info stored in the metadata. + keras_loader = KerasObjectLoader(metadata, object_graph_def) + keras_loader.load_layers(compile=compile) + + # Generate a dictionary of all loaded nodes. + nodes_to_load = {"root": None} + for node_id, loaded_node in keras_loader.loaded_nodes.items(): + nodes_to_load[keras_loader.get_path(node_id)] = loaded_node + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message="Trying to load ShardedVariables" + ) + loaded = tf.__internal__.saved_model.load_partial( + path, nodes_to_load, options=options + ) + + # Finalize the loaded layers and remove the extra tracked dependencies. + keras_loader.finalize_objects() + keras_loader.del_tracking() + + model = loaded["root"] + + if isinstance(model, training_lib.Model) and compile: + # TODO(kathywu): Use compiled objects from SavedModel, instead of + # creating new objects from the training config. + training_config = model._serialized_attributes["metadata"].get( + "training_config", None + ) + if training_config is not None: + model.compile( + **saving_utils.compile_args_from_training_config( + training_config + ), + from_serialized=True, + ) + saving_utils.try_build_compiled_arguments(model) + if isinstance(model.optimizer, optimizer_v2.OptimizerV2): + if model.optimizer.get_slot_names(): + logging.warning( + "Your optimizer uses slots. " + "Slots cannot be restored from saved_model, " + "as a result, your model is starting with " + "a new initialized optimizer." + ) + else: + logging.warning( + "No training configuration found in save file, so the " + "model was *not* compiled. Compile it manually." + ) + + # Force variables and resources to initialize. + if not tf.executing_eagerly(): + sess = backend.get_session() # Variables are initialized by this call. + sess.run( + tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TABLE_INITIALIZERS + ) + ) + + return model + + +def _update_to_current_version(metadata): + """Applies version updates to the metadata proto for backwards compat.""" + for node in metadata.nodes: + if node.version.producer == 1 and node.identifier in [ + constants.MODEL_IDENTIFIER, + constants.SEQUENTIAL_IDENTIFIER, + constants.NETWORK_IDENTIFIER, + ]: + node_metadata = json_utils.decode(node.metadata) + save_spec = node_metadata.get("save_spec") + + if save_spec is not None: + node_metadata["full_save_spec"] = ([save_spec], {}) + node.metadata = json_utils.Encoder().encode(node_metadata) + return metadata + + +def _read_legacy_metadata(object_graph_def, metadata, path): + """Builds a KerasMetadata proto from the SavedModel ObjectGraphDef.""" + # Older SavedModels store the metadata directly in the proto instead of the + # separate pb file. + node_paths = _generate_object_paths(object_graph_def) + for node_id, proto in enumerate(object_graph_def.nodes): + if ( + proto.WhichOneof("kind") == "user_object" + and proto.user_object.identifier + in constants.KERAS_OBJECT_IDENTIFIERS + ): + if not proto.user_object.metadata: + raise ValueError( + "Unable to create a Keras model from SavedModel at " + f"{path}. This SavedModel was exported with " + "`tf.saved_model.save`, and lacks the Keras metadata file. " + "Please save your Keras model by calling `model.save` " + "or `tf.keras.models.save_model`. Note that " + "you can still load this SavedModel with " + "`tf.saved_model.load`." + ) + metadata.nodes.add( + node_id=node_id, + node_path=node_paths[node_id], + version=versions_pb2.VersionDef( + producer=1, min_consumer=1, bad_consumers=[] + ), + identifier=proto.user_object.identifier, + metadata=proto.user_object.metadata, + ) + + +def _generate_object_paths(object_graph_def): + """Traverses through an ObjectGraphDef and builds a map of all node + paths.""" + paths = {0: "root"} + nodes_to_visit = [0] + + while nodes_to_visit: + current_node = nodes_to_visit.pop() + current_path = paths[current_node] + for reference in object_graph_def.nodes[current_node].children: + if reference.node_id in paths: + continue + paths[reference.node_id] = f"{current_path}.{reference.local_name}" + nodes_to_visit.append(reference.node_id) + + return paths + + +def _is_graph_network(layer): + """Determines whether the layer is a graph network.""" + + if isinstance(layer, RevivedNetwork): + return False + elif isinstance(layer, functional_lib.Functional): + return layer._is_graph_network or isinstance( + layer, models_lib.Sequential + ) + return False + + +class KerasObjectLoader: + """Loader that recreates Keras objects (e.g. + + layers, models). + + Layers and models are revived from either the config or SavedModel following + these rules: + 1. If object is a graph network (i.e. Sequential or Functional) then it will + be initialized using the structure from the config only after the + children layers have been created. Graph networks must be initialized + with inputs and outputs, so all child layers must be created beforehand. + 2. If object's config exists and the class can be found, then revive from + config. + 3. Object may have already been created if its parent was revived from + config. In this case, do nothing. + 4. If nothing of the above applies, compose the various artifacts from the + SavedModel to create a subclassed layer or model. At this time, custom + metrics are not supported. + + """ + + def __init__(self, metadata, object_graph_def): + self._metadata = {x.node_id: x for x in metadata.nodes} + self._proto = object_graph_def + + self._node_paths = { + node_data.node_id: node_data.node_path + for node_data in metadata.nodes + } + self.loaded_nodes = {} # Maps node path -> loaded node + + # Store all node ids that have already been traversed when tracking + # nodes that were recreated from the config. + self._traversed_nodes_from_config = set() + + # Maps model id -> (blank model obj, list of child layer or their node + # ids) This tracks all layers in functional and sequential models. These + # models are only reconstructed after all of their child layers have + # been created. + self.model_layer_dependencies = {} + self._models_to_reconstruct = [] + + def del_tracking(self): + """Removes tracked references that are only used when loading the + model.""" + # Now that the node object has been fully loaded, and the checkpoint has + # been restored, the object no longer needs to track objects added from + # SerializedAttributes. (Note that saving a training checkpoint still + # functions correctly, because layers and variables are tracked + # separately by the Layer object.) + # TODO(kathywu): Instead of outright deleting these nodes (which would + # make restoring from a different checkpoint tricky), mark them as extra + # dependencies that are OK to overwrite. + for node in self.loaded_nodes.values(): + node = node[0] + if not isinstance(node, base_layer.Layer): + # Loaded nodes can contain other trackable objects created when + # loading layers from the config, such as variables. + continue + for name in PUBLIC_ATTRIBUTES: + node._delete_tracking(name) + + if isinstance(node, functional_lib.Functional): + # Delete the temporary layer dependencies, which were used to + # restore the checkpointed values. When the model is live, the + # user can delete or add layers to the model at any time, so + # these layer dependencies may be obsolete. + dependencies = list(node._self_unconditional_dependency_names) + for name in dependencies: + if ( + re.match(r"^layer(_with_weights)?-[\d+]", name) + is not None + ): + node._delete_tracking(name) + + def _add_children_recreated_from_config(self, obj, proto, node_id): + """Recursively records objects recreated from config.""" + + if node_id in self._traversed_nodes_from_config: + return + + parent_path = self._node_paths[node_id] + self._traversed_nodes_from_config.add(node_id) + obj._maybe_initialize_trackable() + if isinstance(obj, base_layer.Layer) and not obj.built: + metadata = json_utils.decode(self._metadata[node_id].metadata) + self._try_build_layer( + obj, node_id, metadata.get("build_input_shape") + ) + + # Create list of all possible children + children = [] + # Look for direct children + for reference in proto.children: + obj_child = obj._lookup_dependency(reference.local_name) + children.append( + (obj_child, reference.node_id, reference.local_name) + ) + + # Add metrics that may have been added to the layer._metrics list. + # This is stored in the SavedModel as layer.keras_api.layer_metrics in + # SavedModels created after Tf 2.2. + metric_list_node_id = self._search_for_child_node( + node_id, [constants.KERAS_ATTR, "layer_metrics"] + ) + if metric_list_node_id is not None and hasattr(obj, "_metrics"): + obj_metrics = {m.name: m for m in obj._metrics} + for reference in self._proto.nodes[metric_list_node_id].children: + metric = obj_metrics.get(reference.local_name) + if metric is not None: + metric_path = "{}.layer_metrics.{}".format( + constants.KERAS_ATTR, reference.local_name + ) + children.append((metric, reference.node_id, metric_path)) + + for obj_child, child_id, child_name in children: + child_proto = self._proto.nodes[child_id] + + if not isinstance(obj_child, tf.__internal__.tracking.Trackable): + continue + if ( + child_proto.user_object.identifier + in tf.__internal__.saved_model.load.registered_identifiers() + ): + setter = tf.__internal__.saved_model.load.get_setter( + child_proto.user_object + ) + elif ( + obj_child._object_identifier + in constants.KERAS_OBJECT_IDENTIFIERS + ): + setter = _revive_setter + else: + setter = setattr + + if child_id in self.loaded_nodes: + if self.loaded_nodes[child_id][0] is not obj_child: + # This means that the same trackable object is referenced by + # two different objects that were recreated from the config. + logging.warning( + "Looks like there is an object (perhaps variable or " + "layer) that is shared between different " + "layers/models. This may cause issues when restoring " + "the variable values. Object: {}".format(obj_child) + ) + continue + + # Overwrite variable names with the ones saved in the SavedModel. + if ( + child_proto.WhichOneof("kind") == "variable" + and child_proto.variable.name + ): + obj_child._handle_name = child_proto.variable.name + ":0" + + if isinstance( + obj_child, tf.__internal__.tracking.TrackableDataStructure + ): + setter = lambda *args: None + + child_path = f"{parent_path}.{child_name}" + self._node_paths[child_id] = child_path + self._add_children_recreated_from_config( + obj_child, child_proto, child_id + ) + self.loaded_nodes[child_id] = obj_child, setter + + def load_layers(self, compile=True): + """Load all layer nodes from the metadata.""" + # Load metrics after models and layers, since it's likely that models + # and layers will create the metric when initialized (this avoids + # wasting time by creating objects multiple times). + metric_list = [] + for node_metadata in self._metadata.values(): + if node_metadata.identifier == constants.METRIC_IDENTIFIER: + metric_list.append(node_metadata) + continue + + self.loaded_nodes[node_metadata.node_id] = self._load_layer( + node_metadata.node_id, + node_metadata.identifier, + node_metadata.metadata, + ) + + for node_metadata in metric_list: + try: + self.loaded_nodes[node_metadata.node_id] = self._load_layer( + node_metadata.node_id, + node_metadata.identifier, + node_metadata.metadata, + ) + except ValueError as e: + # Metrics are only needed when the model is compiled later. We + # ignore errors when trying to load custom metrics when + # `compile=False` until custom metrics are serialized properly + # (b/135550038). + if compile: + raise e + logging.warning( + "Unable to restore custom metric. Please ensure that " + "the layer implements `get_config` and `from_config` " + "when saving. In addition, please use the " + "`custom_objects` arg when calling `load_model()`." + ) + + def _load_layer(self, node_id, identifier, metadata): + """Load a single layer from a SavedUserObject proto.""" + metadata = json_utils.decode(metadata) + + # If node was already created + if node_id in self.loaded_nodes: + node, setter = self.loaded_nodes[node_id] + + # Revive setter requires the object to have a + # `_serialized_attributes` property. Add it here. + _maybe_add_serialized_attributes(node, metadata) + + config = metadata.get("config") + if _is_graph_network(node) and serialization.validate_config( + config + ): + child_nodes = self._get_child_layer_node_ids(node_id) + self.model_layer_dependencies[node_id] = (node, child_nodes) + if not child_nodes: + self._models_to_reconstruct.append(node_id) + return node, setter + + # Detect whether this object can be revived from the config. If not, + # then revive from the SavedModel instead. + obj, setter = self._revive_from_config(identifier, metadata, node_id) + if obj is None: + obj, setter = revive_custom_object(identifier, metadata) + + # Add an attribute that stores the extra functions/objects saved in the + # SavedModel. Most of these functions/objects are ignored, but some are + # used later in the loading process (e.g. the list of regularization + # losses, or the training config of compiled models). + _maybe_add_serialized_attributes(obj, metadata) + return obj, setter + + def _revive_from_config(self, identifier, metadata, node_id): + """Revives a layer/model from config, or returns None.""" + if identifier == constants.METRIC_IDENTIFIER: + obj = self._revive_metric_from_config(metadata) + else: + obj = self._revive_graph_network( + identifier, metadata, node_id + ) or self._revive_layer_or_model_from_config(metadata, node_id) + + if obj is None: + return None, None + + setter = self._config_node_setter(_revive_setter) + self._add_children_recreated_from_config( + obj, self._proto.nodes[node_id], node_id + ) + return obj, setter + + def _revive_graph_network(self, identifier, metadata, node_id): + """Revives a graph network from config.""" + # Determine whether the metadata contains information for reviving a + # functional or Sequential model. + config = metadata.get("config") + if not serialization.validate_config(config): + return None + + class_name = tf.compat.as_str(metadata["class_name"]) + if object_registration.get_registered_object(class_name) is not None: + return None + model_is_functional_or_sequential = ( + metadata.get("is_graph_network", False) + or class_name == "Sequential" + or class_name == "Functional" + ) + if not model_is_functional_or_sequential: + return None + + # Revive functional and sequential models as blank model objects for now + # ( must be initialized to enable setattr tracking and attribute + # caching). Reconstruction of the network is deferred until all of the + # model's layers have been revived. + if class_name == "Sequential": + model = models_lib.Sequential(name=config["name"]) + # The model is a custom Sequential model. + elif identifier == constants.SEQUENTIAL_IDENTIFIER: + # Uses the custom class name, since the config does not have one. + model = models_lib.Sequential(name=class_name) + else: + model = models_lib.Functional( + inputs=[], outputs=[], name=config["name"] + ) + + # Record this model and its layers. This will later be used to + # reconstruct the model. + layers = self._get_child_layer_node_ids(node_id) + self.model_layer_dependencies[node_id] = (model, layers) + if not layers: + self._models_to_reconstruct.append(node_id) + return model + + def _revive_layer_or_model_from_config(self, metadata, node_id): + """Revives a layer/custom model from config; returns None if + infeasible.""" + # Check that the following requirements are met for reviving from + # config: + # 1. Object can be deserialized from config. + # 2. If the object needs to be built, then the build input shape can + # be found. + class_name = metadata.get("class_name") + config = metadata.get("config") + shared_object_id = metadata.get("shared_object_id") + must_restore_from_config = metadata.get("must_restore_from_config") + if not serialization.validate_config(config): + return None + + try: + try: + obj = model_config.model_from_config( + serialization.serialize_keras_class_and_config( + class_name, config, shared_object_id=shared_object_id + ) + ) + except (TypeError, KeyError) as e: + # A name conflict has occurred. The `class_name` is in the Keras + # native framework; however, the value in the framework is + # different from the user's class definition which confuses the + # KerasObjectLoader. + builtin_layer = layers_module.get_builtin_layer(class_name) + if builtin_layer: + raise RuntimeError( + f"Unable to restore object of class '{class_name}'. " + "One of several possible causes could be " + "a missing custom object. " + "Decorate your custom object with " + "`@keras.utils.register_keras_serializable()` and " + "include that file in your program, " + "or pass your class in a " + "`keras.utils.CustomObjectScope` " + "that wraps this load call. " + f"\n\nException: {e}" + ) from e + else: + raise + except Exception as e: + if must_restore_from_config: + raise e + else: + return None + + # Use the dtype, name, and trainable status. Often times these are not + # specified in custom configs, so retrieve their values from the + # metadata. + + obj._name = metadata["name"] + if metadata.get("trainable") is not None: + obj.trainable = metadata["trainable"] + if metadata.get("dtype") is not None: + obj._set_dtype_policy(metadata["dtype"]) + if metadata.get("stateful") is not None: + obj.stateful = metadata["stateful"] + if metadata.get("autocast") is not None: + obj._autocast = metadata["autocast"] + # Restore model save spec for subclassed models. (layers do not store a + # SaveSpec) + if isinstance(obj, training_lib.Model): + full_save_spec = metadata.get("full_save_spec") + if full_save_spec is not None: + args_spec, kwargs_spec = full_save_spec + inputs_spec = args_spec.pop(0) + obj._set_save_spec(inputs_spec, args_spec, kwargs_spec) + + build_input_shape = metadata.get("build_input_shape") + built = self._try_build_layer(obj, node_id, build_input_shape) + + if not built: + # If the layer cannot be built, revive a custom layer instead. + return None + return obj + + def _revive_metric_from_config(self, metadata): + """Revives a metric object using the config saved in the metadata.""" + class_name = tf.compat.as_str(metadata["class_name"]) + config = metadata.get("config") + + if not serialization.validate_config(config): + return None + + try: + obj = metrics.deserialize( + serialization.serialize_keras_class_and_config( + class_name, config + ) + ) + except ValueError: + return None + + build_input_shape = metadata.get("build_input_shape") + if build_input_shape is not None and hasattr(obj, "_build"): + obj._build(build_input_shape) + + return obj + + def _try_build_layer(self, obj, node_id, build_input_shape): + """Attempts to build the layer.""" + if obj.built or hasattr(obj.build, "_is_default"): + obj.built = True + return True + + if build_input_shape is None: + build_input_shape = self._infer_inputs( + node_id, convert_to_shapes=True + ) + + if build_input_shape is not None: + obj.build(build_input_shape) + base_layer.Layer.build(obj, build_input_shape) + return True + + return False + + def get_path(self, node_id): + return self._node_paths[node_id] + + def finalize_objects(self): + """Finish setting up Keras objects. + + This function is executed after all objects and functions have been + created. Call functions and losses are attached to each layer, and once + all layers have been fully set up, graph networks are initialized. + + Subclassed models that are revived from the SavedModel are treated like + layers, and have their call/loss functions attached here. + """ + # Finish setting up layers and subclassed models. This step attaches + # call functions and losses to each object, and sets model + # inputs/outputs. + layers_revived_from_config = [] + layers_revived_from_saved_model = [] + for node_id, (node, _) in self.loaded_nodes.items(): + if ( + not isinstance(node, base_layer.Layer) + # Don't finalize models until all layers have finished loading. + or node_id in self.model_layer_dependencies + ): + continue + + self._unblock_model_reconstruction(node_id, node) + + if isinstance(node, input_layer.InputLayer): + continue + elif isinstance(node, metrics.Metric): + continue + + if isinstance(node, (RevivedLayer, RevivedInputLayer)): + layers_revived_from_saved_model.append(node) + else: + layers_revived_from_config.append(node) + + _finalize_saved_model_layers(layers_revived_from_saved_model) + _finalize_config_layers(layers_revived_from_config) + + # Initialize graph networks, now that layer dependencies have been + # resolved. + self._reconstruct_all_models() + + def _unblock_model_reconstruction(self, layer_id, layer): + """Removes layer from blocking model reconstruction.""" + for model_id, v in self.model_layer_dependencies.items(): + _, layers = v + if layer_id not in layers: + continue + layers[layers.index(layer_id)] = layer + if all(isinstance(x, base_layer.Layer) for x in layers): + self._models_to_reconstruct.append(model_id) + + def _reconstruct_all_models(self): + """Reconstructs the network structure of all models.""" + all_initialized_models = set() + while self._models_to_reconstruct: + model_id = self._models_to_reconstruct.pop(0) + all_initialized_models.add(model_id) + model, layers = self.model_layer_dependencies[model_id] + self._reconstruct_model(model_id, model, layers) + _finalize_config_layers([model]) + + if all_initialized_models != set(self.model_layer_dependencies.keys()): + # This should not happen. + uninitialized_model_ids = ( + set(self.model_layer_dependencies.keys()) + - all_initialized_models + ) + uninitialized_model_names = [ + self.model_layer_dependencies[model_id][0].name + for model_id in uninitialized_model_ids + ] + raise ValueError( + "Error loading model(s) in the SavedModel format. " + "The following model(s) could not be initialized: " + f"{uninitialized_model_names}" + ) + + def _reconstruct_model(self, model_id, model, layers): + """Reconstructs the network structure.""" + config = json_utils.decode(self._metadata[model_id].metadata)["config"] + + # Set up model inputs + if model.inputs: + # Inputs may already be created if the model is instantiated in + # another object's __init__. + pass + elif isinstance(model, models_lib.Sequential): + if not layers or not isinstance(layers[0], input_layer.InputLayer): + if config["layers"][0]["class_name"] == "InputLayer": + layers.insert( + 0, + input_layer.InputLayer.from_config( + config["layers"][0]["config"] + ), + ) + elif "batch_input_shape" in config["layers"][0]["config"]: + batch_input_shape = config["layers"][0]["config"][ + "batch_input_shape" + ] + layers.insert( + 0, + input_layer.InputLayer( + input_shape=batch_input_shape[1:], + batch_size=batch_input_shape[0], + dtype=layers[0].dtype, + name=layers[0].name + "_input", + ), + ) + model.__init__(layers, name=config["name"]) + if not model.inputs: + first_layer = self._get_child_layer_node_ids(model_id)[0] + input_specs = self._infer_inputs(first_layer) + input_shapes = self._infer_inputs( + first_layer, convert_to_shapes=True + ) + model._set_inputs(input_specs) + if not model.built and not isinstance(input_specs, dict): + model.build(input_shapes) + else: # Reconstruct functional model + ( + inputs, + outputs, + created_layers, + ) = functional_lib.reconstruct_from_config( + config, created_layers={layer.name: layer for layer in layers} + ) + model.__init__(inputs, outputs, name=config["name"]) + functional_lib.connect_ancillary_layers(model, created_layers) + + # Set model dtype. + _set_network_attributes_from_metadata(model) + + # Unblock models that are dependent on this model. + self._unblock_model_reconstruction(model_id, model) + + def _get_child_layer_node_ids(self, node_id): + """Returns the node ids of each layer in a Sequential/Functional + model.""" + # Sequential and Functional track layers with names following the format + # "layer-N". Use this to generate the list of layers. + num_layers = 0 + child_layers = {} + pattern = re.compile("layer-(\\d+)") + + for child in self._proto.nodes[node_id].children: + m = pattern.match(child.local_name) + if m is None: + continue + layer_n = int(m.group(1)) + num_layers = max(layer_n + 1, num_layers) + child_layers[layer_n] = child.node_id + + ordered = [] + for n in range(num_layers): + child = child_layers.get(n) + if child is None: + break + ordered.append(child) + return ordered + + def _search_for_child_node(self, parent_id, path_to_child): + """Returns node id of child node. + + A helper method for traversing the object graph proto. + + As an example, say that the object graph proto in the SavedModel + contains an object with the following child and grandchild attributes: + + `parent.child_a.child_b` + + This method can be used to retrieve the node id of `child_b` using the + parent's node id by calling: + + `_search_for_child_node(parent_id, ['child_a', 'child_b'])`. + + Args: + parent_id: node id of parent node + path_to_child: list of children names. + + Returns: + node_id of child, or None if child isn't found. + """ + if not path_to_child: + return parent_id + + for child in self._proto.nodes[parent_id].children: + if child.local_name == path_to_child[0]: + return self._search_for_child_node( + child.node_id, path_to_child[1:] + ) + return None + + def _infer_inputs(self, layer_node_id, convert_to_shapes=False): + """Infers input shape of layer from SavedModel functions.""" + call_fn_id = self._search_for_child_node( + layer_node_id, ["call_and_return_all_conditional_losses"] + ) + if call_fn_id is None: + return None + + concrete_functions = self._proto.nodes[ + call_fn_id + ].function.concrete_functions + if not concrete_functions: + return None + call_fn_name = concrete_functions[0] + call_fn_proto = self._proto.concrete_functions[call_fn_name] + structured_input_signature = tf.__internal__.saved_model.decode_proto( + call_fn_proto.canonicalized_input_signature + ) + inputs = structured_input_signature[0][0] + if convert_to_shapes: + return tf.nest.map_structure(lambda spec: spec.shape, inputs) + else: + return inputs + + def _config_node_setter(self, setter): + """Creates edges for nodes that are recreated from config.""" + + def setattr_wrapper(obj, name, value): + # Avoid overwriting attributes of objects recreated from the config. + if obj._lookup_dependency(name) is None: + setter(obj, name, value) + + return setattr_wrapper + + +def _finalize_saved_model_layers(layers): + """Runs the final steps of loading Keras Layers from SavedModel.""" + + # 1. Set up call functions for all layers initialized from the SavedModel ( + # and not the config) + for layer in layers: + layer.built = True + layer_call = getattr( + _get_keras_attr(layer), "call_and_return_conditional_losses", None + ) + if layer_call and layer_call.concrete_functions: + call_spec = layer_utils.CallFunctionSpec( + tf_inspect.getfullargspec(layer_call) + ) + layer.call = utils.use_wrapped_call( + layer, layer_call, call_spec, return_method=True + ) + expects_training_arg = layer._serialized_attributes["metadata"][ + "expects_training_arg" + ] + if "training" in layer_call.function_spec.arg_names: + # This could change the value of `expects_training_arg` if this + # layer doesn't expect a training arg, but has a child layer + # that does. + expects_training_arg = True + layer._init_call_fn_args(expects_training_arg) + else: + layer.call = types.MethodType( + _unable_to_call_layer_due_to_serialization_issue, layer + ) + + for layer in layers: + # 2. Set model inputs and outputs. + if isinstance(layer, RevivedNetwork): + _set_network_attributes_from_metadata(layer) + + if hasattr( + _get_keras_attr(layer), "call_and_return_conditional_losses" + ): + call_fn = _get_keras_attr( + layer + ).call_and_return_conditional_losses + if not call_fn.concrete_functions: + continue + if call_fn.input_signature is None: + args, kwargs = infer_inputs_from_restored_call_function( + call_fn + ) + args = list(args) + inputs = args.pop(0) + else: + args = call_fn.input_signature + args = list(args) + inputs = args.pop(0) + kwargs = None + layer._set_save_spec(inputs, args, kwargs) + + # V1 models require calling _set_inputs to set the `.inputs` + # attr. Skip this step when there are multiple tensor inputs + # (this behavior is not well supported in V1 models). + if not any( + isinstance(x, tf.TensorSpec) + for x in tf.nest.flatten([args, kwargs]) + ): + layer._set_inputs(inputs) + + # 3. Add losses that aren't generated by the layer.call function. + _restore_layer_unconditional_losses(layer) + _restore_layer_activation_loss(layer) + + # 4. Restore metrics list + _restore_layer_metrics(layer) + + +def _unable_to_call_layer_due_to_serialization_issue( + layer, *unused_args, **unused_kwargs +): + """Replaces the `layer.call` if the layer was not fully serialized. + + Keras Model/Layer serialization is relatively relaxed because SavedModels + are not always loaded back as keras models. Thus, when there is an issue + tracing a non-signature function, a warning is logged instead of raising an + error. This results in a SavedModel where the model's call function is + saved, but the internal layer call functions are not. + + When deserialized with `tf.keras.models.load_model`, the internal layers + which do not have serialized call functions should raise an error when + called. + + Args: + layer: Layer without the serialized call function. + + Raises: + ValueError + """ + + raise ValueError( + f"Cannot call custom layer {layer.name} of type {type(layer)}, because " + "the call function was not serialized to the SavedModel." + "Please try one of the following methods to fix this issue:" + "\n\n(1) Implement `get_config` and `from_config` in the layer/model " + "class, and pass the object to the `custom_objects` argument when " + "loading the model. For more details, see: " + "https://www.tensorflow.org/guide/keras/save_and_serialize" + "\n\n(2) Ensure that the subclassed model or layer overwrites `call` " + "and not `__call__`. The input shape and dtype will be automatically " + "recorded when the object is called, and used when saving. To manually " + "specify the input shape/dtype, decorate the call function with " + "`@tf.function(input_signature=...)`." + ) + + +def _finalize_config_layers(layers): + """Runs the final steps of loading Keras Layers from config.""" + for layer in layers: + # It is assumed that layers define their unconditional losses after + # being recreated from the config and built. The exceptions to this are + # Functional and Sequential models, which only store conditional losses + # (losses dependent on the inputs) in the config. Unconditional losses + # like weight regularization must be revived from the SavedModel. + if _is_graph_network(layer): + _restore_layer_unconditional_losses(layer) + + # Some layers, like Dense, record their activation loss function in the + # config. However, not all layers do this, so the activation loss may be + # missing when restored from the config/hdf5. + # TODO(kathywu): Investigate ways to improve the config to ensure + # consistent loading behavior between HDF5 and SavedModel. + _restore_layer_activation_loss(layer) + + # Restore metrics list. + _restore_layer_metrics(layer) + + # Restore RNN layer states. + if ( + isinstance(layer, base_rnn.RNN) + and layer.stateful + and hasattr(_get_keras_attr(layer), "states") + ): + layer.states = getattr(_get_keras_attr(layer), "states", None) + for variable in tf.nest.flatten(layer.states): + backend.track_variable(variable) + + # Perform any layer defined finalization of the layer state. + layer.finalize_state() + + +def _finalize_metric(metric): + metric.update_state = types.MethodType( + metrics_utils.update_state_wrapper(metric.keras_api.update_state), + metric, + ) + metric.result = metric.keras_api.result + + +def _restore_layer_unconditional_losses(layer): + """Restore unconditional losses from SavedModel.""" + if hasattr(_get_keras_attr(layer), "layer_regularization_losses"): + losses = getattr( + _get_keras_attr(layer), "layer_regularization_losses", [] + ) + else: + # Some earlier SavedModels may not have layer_regularization_losses + # serialized separately. Fall back to using the regularization_losses + # list if it does not exist. + losses = layer._serialized_attributes.get("regularization_losses", []) + for loss in losses: + layer.add_loss(loss) + + +def _restore_layer_activation_loss(layer): + """Restore actiation loss from SavedModel.""" + # Use wrapped activity regularizer function if the layer's activity + # regularizer wasn't created during initialization. + activity_regularizer = getattr( + _get_keras_attr(layer), "activity_regularizer_fn", None + ) + if activity_regularizer and not layer.activity_regularizer: + try: + layer.activity_regularizer = activity_regularizer + except AttributeError: + # This may happen if a layer wrapper is saved with an activity + # regularizer. The wrapper object's activity regularizer is + # unsettable. + pass + + +def revive_custom_object(identifier, metadata): + """Revives object from SavedModel.""" + if tf.compat.v1.executing_eagerly_outside_functions(): + model_class = training_lib.Model + else: + model_class = training_lib_v1.Model + + revived_classes = { + constants.INPUT_LAYER_IDENTIFIER: ( + RevivedInputLayer, + input_layer.InputLayer, + ), + constants.LAYER_IDENTIFIER: (RevivedLayer, base_layer.Layer), + constants.MODEL_IDENTIFIER: (RevivedNetwork, model_class), + constants.NETWORK_IDENTIFIER: ( + RevivedNetwork, + functional_lib.Functional, + ), + constants.SEQUENTIAL_IDENTIFIER: ( + RevivedNetwork, + models_lib.Sequential, + ), + } + parent_classes = revived_classes.get(identifier, None) + + class_name = tf.compat.as_str(metadata["class_name"]) + if parent_classes is not None: + parent_classes = revived_classes[identifier] + revived_cls = type(class_name, parent_classes, {}) + return revived_cls._init_from_metadata(metadata) + else: + raise ValueError( + f'Unable to restore custom object of class "{class_name}" ' + f"(type {identifier}). Please make sure that this class is " + "included in the `custom_objects` arg when calling `load_model()`. " + "Also, check that the class implements `get_config` and " + f"`from_config`.\n\nComplete metadata: {metadata}" + ) + + +def _restore_layer_metrics(layer): + metrics_list = getattr(_get_keras_attr(layer), "layer_metrics", {}) + layer_metrics = {m.name: m for m in layer._metrics} + for name, metric in metrics_list.items(): + if name not in layer_metrics: + # Metrics may be added during initialization/building of custom + # layers. + layer._metrics.append(metric) + + +# TODO(kathywu): Centrally define keys and functions for both serialization and +# deserialization. +class RevivedLayer: + """Keras layer loaded from a SavedModel.""" + + @classmethod + def _init_from_metadata(cls, metadata): + """Create revived layer from metadata stored in the SavedModel proto.""" + init_args = dict(name=metadata["name"], trainable=metadata["trainable"]) + if metadata.get("dtype") is not None: + init_args["dtype"] = metadata["dtype"] + if metadata.get("batch_input_shape") is not None: + init_args["batch_input_shape"] = metadata["batch_input_shape"] + + revived_obj = cls(**init_args) + + with utils.no_automatic_dependency_tracking_scope(revived_obj): + + revived_obj._call_spec.expects_training_arg = metadata[ + "expects_training_arg" + ] + config = metadata.get("config") + if serialization.validate_config(config): + revived_obj._config = config + if metadata.get("input_spec") is not None: + revived_obj.input_spec = recursively_deserialize_keras_object( + metadata["input_spec"], + module_objects={"InputSpec": input_spec.InputSpec}, + ) + if metadata.get("activity_regularizer") is not None: + revived_obj.activity_regularizer = regularizers.deserialize( + metadata["activity_regularizer"] + ) + if metadata.get("_is_feature_layer") is not None: + revived_obj._is_feature_layer = metadata["_is_feature_layer"] + if metadata.get("stateful") is not None: + revived_obj.stateful = metadata["stateful"] + if metadata.get("autocast") is not None: + revived_obj._autocast = metadata["autocast"] + if metadata.get("preserve_input_structure_in_config") is not None: + revived_obj._preserve_input_structure_in_config = metadata[ + "preserve_input_structure_in_config" + ] + + return revived_obj, _revive_setter + + @property + def keras_api(self): + return self._serialized_attributes.get(constants.KERAS_ATTR, None) + + def get_config(self): + if hasattr(self, "_config"): + return self._config + else: + raise NotImplementedError + + +def _revive_setter(layer, name, value): + """Setter function that saves some attributes to separate dictionary.""" + # Many attributes in the SavedModel conflict with properties defined in + # Layer and Model. Save these attributes to a separate dictionary. + if name in PUBLIC_ATTRIBUTES: + + if isinstance(value, tf.__internal__.tracking.Trackable): + layer._track_trackable(value, name=name) + layer._serialized_attributes[name] = value + + elif ( + isinstance(layer, functional_lib.Functional) + and re.match(r"^layer(_with_weights)?-[\d+]", name) is not None + ): + # Edges named "layer-n" or "layer_with_weights-n", which are tracked in + # network._track_layers, should not be added as an attribute. They + # should be temporarily added as a dependency so that checkpointed + # values can be restored. These dependencies are manually deleted in + # KerasObjectLoader.del_tracking. + + # Set `overwrite=True` in the case that `layer` already tracks a + # different layer-n. This may cause variable values to not be loaded + # properly in the original layer-n, but we already warn the users about + # this (ctrl-f "shared between different layers/models"). + layer._track_trackable(value, name, overwrite=True) + elif getattr(layer, name, None) is not None: + # Don't overwrite already defined attributes. + pass + else: + setattr(layer, name, value) + + +class RevivedInputLayer: + """InputLayer loaded from a SavedModel.""" + + @classmethod + def _init_from_metadata(cls, metadata): + """Revives the saved InputLayer from the Metadata.""" + init_args = dict( + name=metadata["name"], + dtype=metadata["dtype"], + sparse=metadata["sparse"], + ragged=metadata["ragged"], + batch_input_shape=metadata["batch_input_shape"], + ) + revived_obj = cls(**init_args) + with utils.no_automatic_dependency_tracking_scope(revived_obj): + revived_obj._config = metadata["config"] + + return revived_obj, setattr + + def get_config(self): + return self._config + + +def recursively_deserialize_keras_object(config, module_objects=None): + """Deserialize Keras object from a nested structure.""" + if isinstance(config, dict): + if "class_name" in config: + return serialization.deserialize_keras_object( + config, module_objects=module_objects + ) + else: + return { + key: recursively_deserialize_keras_object( + config[key], module_objects + ) + for key in config + } + elif isinstance(config, (tuple, list)): + return [ + recursively_deserialize_keras_object(x, module_objects) + for x in config + ] + else: + raise ValueError( + "Unable to decode Keras layer config. Config should be a " + f"dictionary, tuple or list. Received: config={config}" + ) + + +def infer_inputs_from_restored_call_function(fn): + """Returns TypeSpec of inputs from a restored call function. + + Args: + fn: Restored layer call function. It is assumed that `fn` has at least one + concrete function and that the inputs are in the first argument. + + Returns: + TypeSpec of call function inputs in the form of (args, kwargs) + """ + + def common_spec(x, y): + if not isinstance(x, tf.TypeSpec): + # Doesn't particularly matter what is returned in this case because + # the result will be filtered out in _set_input_shape. + return x + + result = x._without_tensor_names().most_specific_common_supertype( + [y._without_tensor_names()] + ) + if result is None: + # Please file a bug if you are being hindered by this error. + raise TypeError(f"No common supertype of {x} and {y}.") + return result + + spec = fn.concrete_functions[0].structured_input_signature + for concrete in fn.concrete_functions[1:]: + spec2 = concrete.structured_input_signature + spec = tf.nest.map_structure(common_spec, spec, spec2) + return spec + + +class RevivedNetwork(RevivedLayer): + """Keras network of layers loaded from a SavedModel.""" + + @classmethod + def _init_from_metadata(cls, metadata): + """Create revived network from metadata stored in the SavedModel + proto.""" + revived_obj = cls(name=metadata["name"]) + + # Store attributes revived from SerializedAttributes in a un-tracked + # dictionary. The attributes are the ones listed in CommonEndpoints or + # "keras_api" for keras-specific attributes. + with utils.no_automatic_dependency_tracking_scope(revived_obj): + + revived_obj._call_spec.expects_training_arg = metadata[ + "expects_training_arg" + ] + config = metadata.get("config") + if serialization.validate_config(config): + revived_obj._config = config + + if metadata.get("activity_regularizer") is not None: + revived_obj.activity_regularizer = regularizers.deserialize( + metadata["activity_regularizer"] + ) + if metadata.get("autocast") is not None: + revived_obj._autocast = metadata["autocast"] + + return revived_obj, _revive_setter + + +def _set_network_attributes_from_metadata(revived_obj): + """Sets attributes recorded in the metadata.""" + with utils.no_automatic_dependency_tracking_scope(revived_obj): + + metadata = revived_obj._serialized_attributes["metadata"] + if metadata.get("dtype") is not None: + revived_obj._set_dtype_policy(metadata["dtype"]) + revived_obj._trainable = metadata["trainable"] + + +def _maybe_add_serialized_attributes(layer, metadata): + # Store attributes revived from SerializedAttributes in a un-tracked + # dictionary. The attributes are the ones listed in CommonEndpoints or + # "keras_api" for keras-specific attributes. + if not hasattr(layer, "_serialized_attributes"): + with utils.no_automatic_dependency_tracking_scope(layer): + layer._serialized_attributes = {"metadata": metadata} + + +def _get_keras_attr(layer): + return getattr(layer, "_serialized_attributes", {}).get( + constants.KERAS_ATTR, None + ) diff --git a/keras/saving/saved_model/load_context.py b/keras/saving/legacy/saved_model/load_context.py similarity index 50% rename from keras/saving/saved_model/load_context.py rename to keras/saving/legacy/saved_model/load_context.py index dd9d06c443d5..7e4d1d1b74e8 100644 --- a/keras/saving/saved_model/load_context.py +++ b/keras/saving/legacy/saved_model/load_context.py @@ -17,28 +17,30 @@ import contextlib import threading +import tensorflow.compat.v2 as tf + class LoadContext(threading.local): - """A context for loading a model.""" + """A context for loading a model.""" - def __init__(self): - super().__init__() - self._entered_load_context = [] - self._load_options = None + def __init__(self): + super().__init__() + self._entered_load_context = [] + self._load_options = None - def set_load_options(self, load_options): - self._load_options = load_options - self._entered_load_context.append(True) + def set_load_options(self, load_options): + self._load_options = load_options + self._entered_load_context.append(True) - def clear_load_options(self): - self._load_options = None - self._entered_load_context.pop() + def clear_load_options(self): + self._load_options = None + self._entered_load_context.pop() - def load_options(self): - return self._load_options + def load_options(self): + return self._load_options - def in_load_context(self): - return self._entered_load_context + def in_load_context(self): + return self._entered_load_context _load_context = LoadContext() @@ -46,18 +48,21 @@ def in_load_context(self): @contextlib.contextmanager def load_context(load_options): - _load_context.set_load_options(load_options) - try: - yield - finally: - _load_context.clear_load_options() + _load_context.set_load_options(load_options) + try: + yield + finally: + _load_context.clear_load_options() def get_load_options(): - """Returns the load options under a load context.""" - return _load_context.load_options() + """Returns the load options under a load context.""" + return _load_context.load_options() def in_load_context(): - """Returns whether under a load context.""" - return _load_context.in_load_context() + """Returns whether under a load context.""" + return _load_context.in_load_context() + + +tf.__internal__.register_load_context_function(in_load_context) diff --git a/keras/saving/legacy/saved_model/metric_serialization.py b/keras/saving/legacy/saved_model/metric_serialization.py new file mode 100644 index 000000000000..4d032ca28cab --- /dev/null +++ b/keras/saving/legacy/saved_model/metric_serialization.py @@ -0,0 +1,47 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Classes and functions implementing Metrics SavedModel serialization.""" + +import tensorflow.compat.v2 as tf + +from keras.saving import object_registration +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import layer_serialization + + +class MetricSavedModelSaver(layer_serialization.LayerSavedModelSaver): + """Metric serialization.""" + + @property + def object_identifier(self): + return constants.METRIC_IDENTIFIER + + def _python_properties_internal(self): + metadata = dict( + class_name=object_registration.get_registered_name(type(self.obj)), + name=self.obj.name, + dtype=self.obj.dtype, + ) + metadata.update(layer_serialization.get_serialized(self.obj)) + if self.obj._build_input_shape is not None: + metadata["build_input_shape"] = self.obj._build_input_shape + return metadata + + def _get_serialized_attributes_internal(self, unused_serialization_cache): + return ( + dict(variables=tf.__internal__.tracking.wrap(self.obj.variables)), + # TODO(b/135550038): save functions to enable saving custom metrics. + {}, + ) diff --git a/keras/saving/legacy/saved_model/model_serialization.py b/keras/saving/legacy/saved_model/model_serialization.py new file mode 100644 index 000000000000..991b92d92350 --- /dev/null +++ b/keras/saving/legacy/saved_model/model_serialization.py @@ -0,0 +1,67 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Classes and functions implementing to Model SavedModel serialization.""" + +from keras.saving.legacy import saving_utils +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import layer_serialization +from keras.saving.legacy.saved_model import save_impl + + +class ModelSavedModelSaver(layer_serialization.LayerSavedModelSaver): + """Model SavedModel serialization.""" + + @property + def object_identifier(self): + return constants.MODEL_IDENTIFIER + + def _python_properties_internal(self): + metadata = super()._python_properties_internal() + # Network stateful property is dependent on the child layers. + metadata.pop("stateful") + metadata["is_graph_network"] = self.obj._is_graph_network + spec = self.obj.save_spec(dynamic_batch=False) + metadata["full_save_spec"] = spec + # save_spec is saved for forward compatibility on older TF versions. + metadata["save_spec"] = None if spec is None else spec[0][0] + + metadata.update( + saving_utils.model_metadata( + self.obj, include_optimizer=True, require_config=False + ) + ) + return metadata + + def _get_serialized_attributes_internal(self, serialization_cache): + default_signature = None + + # Create a default signature function if this is the only object in the + # cache (i.e. this is the root level object). + if len(serialization_cache[constants.KERAS_CACHE_KEY]) == 1: + default_signature = save_impl.default_save_signature(self.obj) + + # Other than the default signature function, all other attributes match + # with the ones serialized by Layer. + objects, functions = super()._get_serialized_attributes_internal( + serialization_cache + ) + functions["_default_save_signature"] = default_signature + return objects, functions + + +class SequentialSavedModelSaver(ModelSavedModelSaver): + @property + def object_identifier(self): + return constants.SEQUENTIAL_IDENTIFIER diff --git a/keras/saving/saved_model/network_serialization.py b/keras/saving/legacy/saved_model/network_serialization.py similarity index 79% rename from keras/saving/saved_model/network_serialization.py rename to keras/saving/legacy/saved_model/network_serialization.py index 6e8e12e8168a..dfc2ba33531f 100644 --- a/keras/saving/saved_model/network_serialization.py +++ b/keras/saving/legacy/saved_model/network_serialization.py @@ -14,14 +14,14 @@ # ============================================================================== """Classes and functions implementing to Network SavedModel serialization.""" -from keras.saving.saved_model import constants -from keras.saving.saved_model import model_serialization +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import model_serialization # FunctionalModel serialization is pretty much the same as Model serialization. class NetworkSavedModelSaver(model_serialization.ModelSavedModelSaver): - """Network serialization.""" + """Network serialization.""" - @property - def object_identifier(self): - return constants.NETWORK_IDENTIFIER + @property + def object_identifier(self): + return constants.NETWORK_IDENTIFIER diff --git a/keras/saving/legacy/saved_model/order_preserving_set.py b/keras/saving/legacy/saved_model/order_preserving_set.py new file mode 100644 index 000000000000..f2479381534a --- /dev/null +++ b/keras/saving/legacy/saved_model/order_preserving_set.py @@ -0,0 +1,93 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A set based on dict so that it preserves key insertion order. + +Python Dicts are order-preserving since 3.6 +(https://mail.python.org/pipermail/python-dev/2017-December/151283.html), +but sets are not. This class implements a set on top of a dict so that we get +deterministic iteration order across runs. +""" + +import collections.abc + + +class OrderPreservingSet(collections.abc.MutableSet): + """A set based on dict so that it preserves key insertion order.""" + + def __init__(self, iterable=None): + self._dict = {item: None for item in (iterable or [])} + + # abstract from collections.MutableSet + def __len__(self): + return len(self._dict) + + # abstract from collections.MutableSet + def __contains__(self, value): + return value in self._dict + + # override from collections.MutableSet + def __iter__(self): + return iter(self._dict) + + # abstract from collections.MutableSet + def add(self, item): + self._dict[item] = None + + # abstract from collections.MutableSet + def discard(self, value): + del self._dict[value] + + # override from collections.MutableSet + def clear(self): + self._dict = {} + + # override from collections.Set + def __eq__(self, other): + if not isinstance(other, OrderPreservingSet): + return NotImplemented + return self._dict.keys() == other._dict.keys() + + # override from collections.Set + def __le__(self, other): + if not isinstance(other, OrderPreservingSet): + return NotImplemented + return self._dict.keys() <= other._dict.keys() + + # override from collections.Set + def __ge__(self, other): + if not isinstance(other, OrderPreservingSet): + return NotImplemented + return self._dict.keys() >= other._dict.keys() + + # override from collections.Set + def __and__(self, other): + # collections.Set defaults to the ordering in other, we want to use self + return self._from_iterable(value for value in self if value in other) + + # override from collections.Set + def __or__(self, other): + # ensure that other is ordered before performing __or__ + if not isinstance(other, OrderPreservingSet): + raise TypeError( + "cannot union an 'OrderPreservingSet' with an " + "unordered iterable." + ) + result = self._from_iterable(value for value in self) + for value in other: + result._dict[value] = None + return result + + def union(self, other): + return self | other diff --git a/keras/saving/legacy/saved_model/revive_test.py b/keras/saving/legacy/saved_model/revive_test.py new file mode 100644 index 000000000000..4a134fc82fdc --- /dev/null +++ b/keras/saving/legacy/saved_model/revive_test.py @@ -0,0 +1,458 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests reviving models from config and SavedModel. + +These tests ensure that a model revived from a combination of config and +SavedModel have the expected structure. +""" + +# TODO(kathywu): Move relevant tests from saved_model_test to +import shutil + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras import backend +from keras.saving.legacy.saved_model import load as keras_load +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import CustomObjectScope + + +class SubclassedModelNoConfig(keras.Model): + def __init__(self, a, b): + super().__init__() + + self.a = a + self.b = b + self.shared = CustomLayerNoConfig(a, b) + self.all_layers = [] + + def build(self, input_shape): + self.all_layers.extend( + [ + self.shared, + CustomLayerWithConfig(self.a + 1, self.b + 2), + CustomLayerNoConfig(self.a + 3, self.b + 4), + keras.Sequential( + [ + # TODO(b/145029112): Bug with losses when there are + # shared layers. self.shared, <-- Enable when bug is + # fixed. + CustomLayerNoConfig(self.a + 5, self.b + 6) + ] + ), + ] + ) + super().build(input_shape) + + def call(self, inputs): + x = inputs + for layer in self.all_layers: + x = layer(x) + return x + + +class SparseDense(keras.layers.Dense): + def call(self, inputs): + input_shape = tf.stack( + (tf.reduce_prod(tf.shape(inputs)[:-1]), self.kernel.shape[0]) + ) + output_shape = tf.concat( + (tf.shape(inputs)[:-1], [self.kernel.shape[1]]), -1 + ) + x = tf.sparse.reshape(inputs, input_shape) + return tf.reshape( + self.activation( + tf.sparse.sparse_dense_matmul(x, self.kernel) + self.bias + ), + output_shape, + ) + + +class SubclassedSparseModelNoConfig(keras.Model): + def __init__(self, a, b): + super().__init__() + self.a = a + self.shared = CustomLayerNoConfig(a, b) + self.all_layers = [SparseDense(4)] + + def call(self, inputs): + x = inputs + for layer in self.all_layers: + x = layer(x) + return self.shared(x + self.a) + + +class SubclassedModelWithConfig(SubclassedModelNoConfig): + def get_config(self): + return {"a": self.a, "b": self.b} + + @classmethod + def from_config(cls, config): + return cls(**config) + + +class CustomLayerNoConfig(keras.layers.Layer): + def __init__(self, a, b, name=None): + super().__init__(name=name) + self.a = tf.Variable(a, name="a") + self.b = b + + def a_regularizer(): + return self.a * 2 + + self.add_loss(a_regularizer) + self.sum_metric = keras.metrics.Sum(name="inputs_sum") + self.unused_metric = keras.metrics.Sum(name="not_added_to_metrics") + + def build(self, input_shape): + self.c = tf.Variable( + tf.constant(1.0, shape=input_shape[1:]), name=self.name + "_c" + ) + + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + self.add_metric(self.sum_metric(inputs)) + self.add_metric(inputs, aggregation="mean", name="mean") + + return inputs + self.c + + +class CustomLayerWithConfig(CustomLayerNoConfig): + def get_config(self): + return {"a": backend.get_value(self.a), "b": self.b, "name": self.name} + + +class CustomNetworkDefaultConfig(keras.Model): + def __init__(self, num_classes, name=None): + inputs = keras.Input((2, 3), name="inputs") + x = keras.layers.Flatten(name="flatten")(inputs) + y = keras.layers.Dense(num_classes, name="outputs")(x) + super().__init__(inputs, y, name=name) + + +class CustomNetworkWithConfig(CustomNetworkDefaultConfig): + def __init__(self, num_classes, name=None): + super().__init__(num_classes, name=name) + self._config_dict = dict(num_classes=num_classes) + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(config["num_classes"], name=config.get("name")) + + +class CustomNetworkWithConfigName(CustomNetworkWithConfig): + def __init__(self, num_classes, name=None): + super().__init__(num_classes, name=name) + self._config_dict["name"] = self.name + + +class UnregisteredCustomSequentialModel(keras.Sequential): + # This class is *not* registered in the CustomObjectScope. + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.add(keras.layers.InputLayer(input_shape=(2, 3))) + + +class FunctionalSubclassModel(keras.Model): + def __init__(self, units): + self.units = units + my_input = keras.Input(shape=(2, 3), name="inputs") + dense = keras.layers.Dense(self.units, activation="relu", name="dense") + output = dense(my_input) + outputs = {"output": output} + super().__init__(inputs=[my_input], outputs=outputs) + + def get_config(self): + return {"units": self.units} + + +class FunctionalSubclassModelWrongConfig(FunctionalSubclassModel): + def get_config(self): + return {} + + +# The WideDeepModel, whose name conflicts with a Keras built-in model, is +# registered in these tests. +class WideDeepModel(SubclassedModelWithConfig): + pass + + +class ReviveTestBase(test_combinations.TestCase): + def setUp(self): + super().setUp() + self.path = self.get_temp_dir() + self.addCleanup(shutil.rmtree, self.path, ignore_errors=True) + + def _assert_revived_correctness(self, model, revived): + self.assertAllEqual(model.input_names, revived.input_names) + self.assertAllEqual(model.output_names, revived.output_names) + if model.inputs is not None: + self.assertTrue( + all( + [ + i.shape.as_list() == r.shape.as_list() + and i.dtype == r.dtype + for (i, r) in zip(model.inputs, revived.inputs) + ] + ) + ) + self.assertTrue( + all( + [ + i.shape.as_list() == r.shape.as_list() + and i.dtype == r.dtype + for (i, r) in zip(model.outputs, revived.outputs) + ] + ) + ) + + self.assertAllClose( + self.evaluate(model.weights), self.evaluate(revived.weights) + ) + input_arr = tf.constant(np.random.random((2, 2, 3)).astype(np.float32)) + if isinstance(revived.save_spec()[0][0], tf.SparseTensorSpec): + input_arr = tf.sparse.from_dense(input_arr) + + self.assertAllClose(model(input_arr), revived(input_arr)) + self.assertAllClose(sum(model.losses), sum(revived.losses)) + self.assertAllClose(len(model.losses), len(revived.losses)) + self.assertEqual(len(model.metrics), len(revived.metrics)) + # TODO(b/150403085): Investigate why the metric order changes when + # running this test in tf-nightly. + self.assertAllClose( + sorted([m.result() for m in model.metrics]), + sorted([m.result() for m in revived.metrics]), + ) + model_layers = {layer.name: layer for layer in model.layers} + revived_layers = {layer.name: layer for layer in revived.layers} + self.assertAllEqual(model_layers.keys(), revived_layers.keys()) + + for name in model_layers: + model_layer = model_layers[name] + revived_layer = revived_layers[name] + self.assertEqual(model_layer.name, revived_layer.name) + self.assertEqual(model_layer.dtype, revived_layer.dtype) + self.assertEqual(model_layer.trainable, revived_layer.trainable) + if "WithConfig" in type(model_layer).__name__: + self.assertEqual(type(model_layer), type(revived_layer)) + else: + # When loading layers from SavedModel, a new class is + # dynamically created with the same name. + self.assertEqual( + type(model_layer).__name__, type(revived_layer).__name__ + ) + + +# These tests take a while to run, so each should run in a separate shard +# (putting them in the same TestCase resolves this). +class TestBigModelRevive(ReviveTestBase): + @test_combinations.run_with_all_model_types + def test_revive(self): + input_shape = None + if test_utils.get_model_type() == "functional": + input_shape = (2, 3) + + layer_with_config = CustomLayerWithConfig(1.0, 2) + layer_without_config = CustomLayerNoConfig(3.0, 4) + subclassed_with_config = SubclassedModelWithConfig(4.0, 6.0) + subclassed_without_config = SubclassedModelNoConfig(7.0, 8.0) + + inputs = keras.Input((2, 3)) + x = CustomLayerWithConfig(1.0, 2)(inputs) + x = CustomLayerNoConfig(3.0, 4)(x) + x = SubclassedModelWithConfig(4.0, 6.0)(x) + x = SubclassedModelNoConfig(7.0, 8.0)(x) + inner_model_functional = keras.Model(inputs, x) + + inner_model_sequential = keras.Sequential( + [ + CustomLayerWithConfig(1.0, 2), + CustomLayerNoConfig(3.0, 4), + SubclassedModelWithConfig(4.0, 6.0), + SubclassedModelNoConfig(7.0, 8.0), + ] + ) + + class SubclassedModel(keras.Model): + def __init__(self): + super().__init__() + self.all_layers = [ + CustomLayerWithConfig(1.0, 2), + CustomLayerNoConfig(3.0, 4), + SubclassedModelWithConfig(4.0, 6.0), + SubclassedModelNoConfig(7.0, 8.0), + ] + + def call(self, inputs): + x = inputs + for layer in self.all_layers: + x = layer(x) + return x + + inner_model_subclassed = SubclassedModel() + + layers = [ + layer_with_config, + layer_without_config, + subclassed_with_config, + subclassed_without_config, + inner_model_functional, + inner_model_sequential, + inner_model_subclassed, + ] + model = test_utils.get_model_from_layers( + layers, input_shape=input_shape + ) + # Run data through the Model to create save spec and weights. + model.predict(np.ones((10, 2, 3)), batch_size=10) + + # Test that the correct checkpointed values are loaded, whether the + # layer is created from the config or SavedModel. + layer_with_config.c.assign(2 * layer_with_config.c) + layer_without_config.c.assign(3 * layer_without_config.c) + + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path) + self._assert_revived_correctness(model, revived) + + +class TestModelRevive(ReviveTestBase): + def test_revive_subclassed_with_nested_model(self): + model = SubclassedModelNoConfig(1.0, 2.0) + # Run data through the Model to create save spec and weights. + model.predict(np.ones((10, 2, 3)), batch_size=10) + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path) + self._assert_revived_correctness(model, revived) + + def test_revive_subclassed_with_sparse_model(self): + model = SubclassedSparseModelNoConfig(1.0, 2.0) + # Run data through the Model to create save spec and weights. + x = tf.sparse.from_dense(np.ones((10, 2, 3), dtype=np.float32)) + model.predict(x, batch_size=10) + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path) + self._assert_revived_correctness(model, revived) + + def test_revive_unregistered_sequential(self): + model = UnregisteredCustomSequentialModel() + x = np.random.random((2, 2, 3)).astype(np.float32) + model(x) + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path) + self._assert_revived_correctness(model, revived) + + def test_revive_sequential_inputs(self): + model = keras.models.Sequential( + [ + keras.Input((None,), dtype=tf.string), + keras.layers.Lambda(tf.strings.lower), + ] + ) + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path) + revived_layers = list( + revived._flatten_layers(include_self=False, recursive=False) + ) + self.assertEqual(tf.string, revived_layers[0].dtype) + + @parameterized.named_parameters( + ("default_config", CustomNetworkDefaultConfig), + ("with_config", CustomNetworkWithConfig), + ("with_config_name", CustomNetworkWithConfigName), + ) + def test_revive_network(self, model_cls): + model = model_cls(8) + model.save(self.path, include_optimizer=False, save_format="tf") + revived = keras_load.load(self.path, compile=False) + self._assert_revived_correctness(model, revived) + + def test_functional_subclass(self): + model = FunctionalSubclassModel(32) + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path, compile=False) + self._assert_revived_correctness(model, revived) + + def test_functional_subclass_wrong_config(self): + model = FunctionalSubclassModelWrongConfig(32) + model.save(self.path, save_format="tf") + with self.assertRaisesRegex(TypeError, "required positional arguments"): + keras_load.load(self.path, compile=False) + + def test_load_compiled_metrics(self): + model = test_utils.get_small_sequential_mlp(1, 3) + + # Compile with dense categorical accuracy + model.compile("rmsprop", "mse", "acc") + x = np.random.random((5, 10)).astype(np.float32) + y_true = np.random.random((5, 3)).astype(np.float32) + model.train_on_batch(x, y_true) + + model.save(self.path, include_optimizer=True, save_format="tf") + revived = keras_load.load(self.path, compile=True) + self.assertAllClose( + model.test_on_batch(x, y_true), revived.test_on_batch(x, y_true) + ) + + # Compile with sparse categorical accuracy + model.compile("rmsprop", "mse", "acc") + y_true = np.random.randint(0, 3, (5, 1)).astype(np.float32) + model.train_on_batch(x, y_true) + model.save(self.path, include_optimizer=True, save_format="tf") + revived = keras_load.load(self.path, compile=True) + self.assertAllClose( + model.test_on_batch(x, y_true), revived.test_on_batch(x, y_true) + ) + + def test_revived_model_has_save_spec(self): + model = SubclassedModelWithConfig(2, 3) + model.predict(np.random.random((5, 10)).astype(np.float32)) + model.save(self.path, save_format="tf") + revived = keras_load.load(self.path, compile=True) + self.assertAllEqual( + model._get_save_spec(dynamic_batch=False), + revived._get_save_spec(dynamic_batch=False), + ) + + def test_load_model_with_name_conflict_registered_works(self): + model = WideDeepModel(2, 3) + model(np.random.random((5, 10)).astype(np.float32)) + model.save(self.path, save_format="tf") + keras_load.load(self.path, compile=True) + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + with CustomObjectScope( + { + "CustomLayerWithConfig": CustomLayerWithConfig, + "CustomNetworkWithConfig": CustomNetworkWithConfig, + "CustomNetworkWithConfigName": CustomNetworkWithConfigName, + "SubclassedModelWithConfig": SubclassedModelWithConfig, + "FunctionalSubclassModel": FunctionalSubclassModel, + "FunctionalSubclassModelWrongConfig": FunctionalSubclassModelWrongConfig, # noqa: E501 + "WideDeepModel": WideDeepModel, + } + ): + tf.test.main() diff --git a/keras/saving/legacy/saved_model/save.py b/keras/saving/legacy/saved_model/save.py new file mode 100644 index 000000000000..9126275cf3b3 --- /dev/null +++ b/keras/saving/legacy/saved_model/save.py @@ -0,0 +1,157 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keras legacy SavedModel saving.""" + +import os + +import tensorflow.compat.v2 as tf +from absl import logging + +from keras import backend +from keras.protobuf import saved_metadata_pb2 +from keras.protobuf import versions_pb2 +from keras.saving.legacy import saving_utils +from keras.saving.legacy import serialization +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import save_impl +from keras.saving.legacy.saved_model import utils +from keras.utils.generic_utils import LazyLoader +from keras.utils.io_utils import ask_to_proceed_with_overwrite + +# isort: off +from tensorflow.python.saved_model import save as save_lib + +# To avoid circular dependencies between keras/engine and keras/saving, +# code in keras/saving must delay imports. + +base_layer = LazyLoader("base_layer", globals(), "keras.engine.base_layer") +training_lib = LazyLoader("training_lib", globals(), "keras.engine.training") + + +def save( + model, + filepath, + overwrite, + include_optimizer, + signatures=None, + options=None, + save_traces=True, +): + """Saves a model as a SavedModel to the filepath. + + Args: + model: Keras model instance to be saved. + filepath: String path to save the model. + overwrite: whether to overwrite the existing filepath. + include_optimizer: If True, save the model's optimizer state. + signatures: Signatures to save with the SavedModel. Applicable to the 'tf' + format only. Please see the `signatures` argument in + `tf.saved_model.save` for details. + options: (only applies to SavedModel format) `tf.saved_model.SaveOptions` + object that specifies options for saving to SavedModel. + save_traces: (only applies to SavedModel format) When enabled, the + SavedModel will store the function traces for each layer. This + can be disabled, so that only the configs of each layer are stored. + Disabling this will decrease serialization time and file size, but + it requires that all custom layers/models implement a + `get_config()` method. Defaults to `True`. + + Raises: + ValueError: if the model's inputs have not been defined. + """ + # If file exists and should not be overwritten. + if not overwrite and os.path.exists(filepath): + proceed = ask_to_proceed_with_overwrite(filepath) + if not proceed: + return + + if save_traces: + if save_impl.should_skip_serialization(model): + saving_utils.raise_model_input_error(model) + + if not include_optimizer: + orig_optimizer = model.optimizer + model.optimizer = None + # TODO(b/180760306) Change to del model.optimizer if Layer's __delattr__ + # calls AutoTrackable's __delattr__. + model._delete_tracking("optimizer") + + # Trace all functions and signatures with `training=0` instead of using an + # already-set learning phase placeholder. + # This is needed for compatibility reasons until learning phase setting + # is removed from the public apis. + with serialization.SharedObjectSavingScope(): + with backend.deprecated_internal_learning_phase_scope(0): + with utils.keras_option_scope(save_traces): + saved_nodes, node_paths = save_lib.save_and_return_nodes( + model, filepath, signatures, options + ) + + # Save all metadata to a separate file in the SavedModel directory. + metadata = generate_keras_metadata(saved_nodes, node_paths) + + with tf.io.gfile.GFile( + tf.io.gfile.join(filepath, constants.SAVED_METADATA_PATH), "wb" + ) as w: + w.write(metadata.SerializeToString(deterministic=True)) + + if not include_optimizer: + model.optimizer = orig_optimizer + + +def generate_keras_metadata(saved_nodes, node_paths): + """Constructs a KerasMetadata proto with the metadata of each object.""" + metadata = saved_metadata_pb2.SavedMetadata() + for node_id, node in enumerate(saved_nodes): + if isinstance(node, base_layer.Layer): + path = node_paths[node] + if not path: + node_path = "root" + else: + node_path = f"root.{'.'.join([ref.name for ref in path])}" + + metadata.nodes.add( + node_id=node_id, + node_path=node_path, + version=versions_pb2.VersionDef( + producer=2, min_consumer=1, bad_consumers=[] + ), + identifier=node._object_identifier, + metadata=node._tracking_metadata, + ) + + # Log warning if the node's class name conflicts with a Keras + # built-in object. + class_name = node.__class__.__name__ + from keras.layers import serialization as layers_serialization + + builtin_layer = layers_serialization.get_builtin_layer(class_name) + if builtin_layer: + if not isinstance(node, builtin_layer): + logging.warning( + "%s has the same name '%s' as a built-in Keras " + "object. Consider renaming %s to avoid naming " + "conflicts when loading with " + "`tf.keras.models.load_model`. " + "If renaming is not possible, pass " + "the object in the `custom_objects` " + "parameter of the load " + "function.", + node, + class_name, + node.__class__, + ) + + return metadata diff --git a/keras/saving/legacy/saved_model/save_impl.py b/keras/saving/legacy/saved_model/save_impl.py new file mode 100644 index 000000000000..a3e769c47618 --- /dev/null +++ b/keras/saving/legacy/saved_model/save_impl.py @@ -0,0 +1,781 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keras SavedModel serialization. + +TODO (kathywu): Move to layer_serialization.py. Some model-specific logic should +go to model_serialization.py. +""" + +import functools +import threading +import weakref + +import tensorflow.compat.v1.logging as logging +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.engine import base_layer_utils +from keras.engine import input_spec +from keras.mixed_precision import autocast_variable +from keras.saving.legacy import saving_utils +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import load as keras_load +from keras.saving.legacy.saved_model import serialized_attributes +from keras.saving.legacy.saved_model import utils +from keras.utils import layer_utils +from keras.utils import tf_contextlib +from keras.utils import tf_utils +from keras.utils import version_utils +from keras.utils.generic_utils import LazyLoader + +# To avoid circular dependencies between keras/engine and keras/saving, +# code in keras/saving must delay imports. + +# TODO(b/134426265): Switch back to single-quotes to match the rest of the file +# once the issue with copybara is fixed. + +base_layer = LazyLoader("base_layer", globals(), "keras.engine.base_layer") +metrics = LazyLoader("metrics", globals(), "keras.metrics") +input_layer = LazyLoader("input_layer", globals(), "keras.engine.input_layer") +training_lib = LazyLoader("training_lib", globals(), "keras.engine.training") +sequential_lib = LazyLoader( + "sequential_lib", globals(), "keras.engine.sequential" +) + + +def should_skip_serialization(layer): + """Skip serializing extra objects and functions if layer inputs aren't + set.""" + saved_model_input_spec_set = ( + isinstance(layer, training_lib.Model) + and layer._saved_model_inputs_spec is not None + ) + if not layer.built and not saved_model_input_spec_set: + logging.warning( + "Skipping full serialization of Keras layer {}, because " + "it is not built.".format(layer) + ) + return True + return False + + +def _filter_shards(variables): + return [var for var in variables if not hasattr(var, "_sharded_container")] + + +def wrap_layer_objects(layer, serialization_cache): + """Returns extra trackable objects to attach to the serialized layer. + + Args: + layer: Keras Layer object. + serialization_cache: Dictionary shared between all objects during + serialization. + + Returns: + A dictionary containing all checkpointable objects from a + SerializedAttributes object. See LayerAttributes and ModelAttributes for + entire list of objects + """ + # Wrap all regularization losses as tf.functions. + # First, generate list of all regularization losses in this layer and + # sublayers. + all_losses = layer._callable_losses[:] + for child_layer in utils.list_all_layers(layer): + all_losses.extend(child_layer._callable_losses) + # Next, wrap all loss functions as tf.functions. Use the serialization cache + # to store already-wrapped functions. + keras_loss_cache = serialization_cache.setdefault("keras_losses", {}) + wrapped_loss_functions = [] + for loss_fn in all_losses: + if loss_fn in keras_loss_cache: + wrapped_loss_functions.append(keras_loss_cache[loss_fn]) + else: + wrapped_loss = _wrap_unconditional_loss( + loss_fn, len(keras_loss_cache) + ) + keras_loss_cache[loss_fn] = wrapped_loss + wrapped_loss_functions.append(wrapped_loss) + wrapped_layer_losses = [ + keras_loss_cache[fn] for fn in layer._callable_losses[:] + ] + + layer_metrics = tf.__internal__.tracking.wrap( + {m.name: m for m in layer._metrics} + ) + + # Avoid duplicate creation of shard Variables on loading. + # `layer.variables` will return the shard Variables rather than the + # ShardedVariables (b/224541446), but Keras loading will create new + # ShardedVariables (and thus shard Variables) from Keras metadata if needed. + # There's no need to also save the shard Variables here, so filter them out. + variables = _filter_shards(layer.variables) + trainable_variables = _filter_shards(layer.trainable_variables) + non_trainable_variables = _filter_shards(layer.non_trainable_variables) + return dict( + variables=tf.__internal__.tracking.wrap(variables), + trainable_variables=tf.__internal__.tracking.wrap(trainable_variables), + non_trainable_variables=tf.__internal__.tracking.wrap( + non_trainable_variables + ), + layers=tf.__internal__.tracking.wrap(utils.list_all_layers(layer)), + metrics=tf.__internal__.tracking.wrap(layer.metrics), + regularization_losses=tf.__internal__.tracking.wrap( + wrapped_loss_functions + ), + layer_regularization_losses=tf.__internal__.tracking.wrap( + wrapped_layer_losses + ), + layer_metrics=layer_metrics, + ) + + +def wrap_layer_functions(layer, serialization_cache): + """Returns dict of wrapped layer call function and losses in tf.functions. + + Args: + layer: Keras Layer object. + serialization_cache: Dictionary shared between all objects during + serialization. + + Returns: + A dictionary containing all keras tf.functions to serialize. See + LayerAttributes and ModelAttributes for the list of all attributes. + """ + # Since Sequential models may be modified in place using model.add() or + # model.pop(), don't use saved functions. + if isinstance(layer, keras_load.RevivedLayer) and not isinstance( + layer, sequential_lib.Sequential + ): + return { + fn_name: getattr(layer.keras_api, fn_name, None) + for fn_name in serialized_attributes.LayerAttributes.all_functions + } + + # Reset the losses of the layer and its children. The call function in each + # child layer is replaced with tf.functions. + original_fns = _replace_child_layer_functions(layer, serialization_cache) + original_losses = _reset_layer_losses(layer) + + # Wrap all the layer call and activity regularizer functions. + + # Use LayerCallCollection to ensure that all layer call functions (__call__, + # call with losses) are traced with the same inputs. + call_collection = LayerCallCollection(layer) + call_fn_with_losses = call_collection.add_function( + _wrap_call_and_conditional_losses(layer), + f"{layer.name}_layer_call_and_return_conditional_losses", + # If any of this layer's child layers use the training arg, the traced + # call functions of this layer will have a training keyword argument. If + # the original layer does not expect the training arg, then it will have + # to be removed (by setting `match_layer_training_arg`). + match_layer_training_arg=True, + ) + call_fn = call_collection.add_function( + _extract_outputs_from_fn(layer, call_fn_with_losses), + f"{layer.name}_layer_call_fn", + # Since `call_fn` wraps call_fn_with_losses and not the original call + # function, `match_layer_training_arg` should be set to False. + match_layer_training_arg=False, + ) + + fns = { + "call_and_return_conditional_losses": call_fn_with_losses, + "__call__": call_fn, + } + + if layer._activity_regularizer is not None: + fns["activity_regularizer_fn"] = _wrap_activity_regularizer(layer) + fns[ + "call_and_return_all_conditional_losses" + ] = call_collection.add_function( + _append_activity_regularizer_loss( + layer, call_fn_with_losses, fns["activity_regularizer_fn"] + ), + f"{layer.name}_layer_call_and_return_all_conditional_losses", + match_layer_training_arg=False, + ) + else: + fns["activity_regularizer_fn"] = None + fns["call_and_return_all_conditional_losses"] = call_fn_with_losses + + # Manually trigger traces before restoring the overwritten functions. The + # functions are traced within the layer call context to ensure that layer + # functions (e.g. add_loss) behave as though running in graph mode. + with tracing_scope(): + call_collection.trace_with_input_signature() + with base_layer_utils.call_context().enter( + layer, inputs=None, build_graph=True, training=None, saving=True + ): + for fn in fns.values(): + if fn is not None and not isinstance(fn, LayerCall): + fn.get_concrete_function() + + # Restore overwritten functions and losses + _restore_child_layer_functions(original_fns) + _restore_layer_losses(original_losses) + + return fns + + +def default_save_signature(layer): + original_losses = _reset_layer_losses(layer) + fn = saving_utils.trace_model_call(layer) + _restore_layer_losses(original_losses) + return fn + + +def _replace_child_layer_functions(layer, serialization_cache): + """Replaces functions in the children layers with wrapped tf.functions. + + This step allows functions from parent layers to reference the wrapped + functions from their children layers instead of retracing the ops. + + This function also resets all losses stored in the layer. These are stored + in the returned dictionary. Use `_restore_child_layer_functions` to restore + the original attributes. + + Args: + layer: Keras Layer object. + serialization_cache: Dictionary shared between all objects during + serialization. + + Returns: + Dictionary mapping layer objects -> original functions and losses: + { Child layer 1: { + 'losses': Original losses, + 'call': Original call function + '_activity_regularizer': Original activity regularizer}, + Child layer 2: ... + } + """ + + original_fns = {} + + def replace_layer_functions(child_layer, serialized_fns): + """Replaces layer call and activity regularizer with wrapped + functions.""" + original_fns[child_layer] = { + "call": child_layer.call, + "_activity_regularizer": child_layer._activity_regularizer, + } + with utils.no_automatic_dependency_tracking_scope(child_layer): + try: + child_layer._activity_regularizer = serialized_fns.get( + "activity_regularizer_fn" + ) + except AttributeError: + # Some layers have an unsettable activity regularizer. + pass + child_layer.call = utils.use_wrapped_call( + child_layer, + serialized_fns["call_and_return_conditional_losses"], + child_layer._call_spec, + default_training_value=False, + ) + + def replace_metric_functions(child_layer, serialized_fns): + """Replaces metric functions with wrapped functions.""" + original_fns[child_layer] = { + "__call__": child_layer.__call__, + "result": child_layer.result, + "update_state": child_layer.update_state, + } + with utils.no_automatic_dependency_tracking_scope(child_layer): + child_layer.__call__ = serialized_fns["__call__"] + child_layer.result = serialized_fns["result"] + child_layer.update_state = serialized_fns["update_state"] + + for child_layer in utils.list_all_layers(layer): + if isinstance(child_layer, input_layer.InputLayer): + continue + + if child_layer not in serialization_cache[constants.KERAS_CACHE_KEY]: + serialized_functions = child_layer._trackable_saved_model_saver._get_serialized_attributes( # noqa: E501 + serialization_cache + ).functions + else: + serialized_functions = serialization_cache[ + constants.KERAS_CACHE_KEY + ][child_layer].functions + if not serialized_functions: + # This indicates either: + # - circular dependency, which means the current layer's functions + # should be wrapped first. + # - Child layer's inputs are not defined, so its functions have + # not been wrapped. In this case, no replacement is necessary so + # move on to the next child. + continue + + if isinstance(child_layer, metrics.Metric): + replace_metric_functions(child_layer, serialized_functions) + else: + replace_layer_functions(child_layer, serialized_functions) + + return original_fns + + +def _restore_child_layer_functions(original_fns): + """Restores attributes replaced with `_replace_child_layer_functions`.""" + for child_layer, fns in original_fns.items(): + with utils.no_automatic_dependency_tracking_scope(child_layer): + for fn_name, fn in fns.items(): + try: + setattr(child_layer, fn_name, fn) + except AttributeError: + # In the case of _activity_regularizer, setting the + # attribute may be disallowed. + pass + + +def _reset_layer_losses(parent_layer): + """Resets losses of layer and its sublayers, and returns original losses.""" + losses_dict = {} + for layer in utils.list_all_layers_and_sublayers(parent_layer): + losses_dict[layer] = { + "losses": layer._losses[:], + "eager_losses": layer._eager_losses[:], + } + with utils.no_automatic_dependency_tracking_scope(layer): + layer._losses = [] + layer._eager_losses = [] + return losses_dict + + +def _restore_layer_losses(losses_dict): + for layer in losses_dict: + with utils.no_automatic_dependency_tracking_scope(layer): + layer._losses = losses_dict[layer]["losses"] + layer._eager_losses = losses_dict[layer]["eager_losses"] + + +class LayerTracingContext(threading.local): + def __init__(self): + super().__init__() + self.enable_call_tracing = False + self.trace_queue = [] + + +_thread_local_data = LayerTracingContext() + + +@tf_contextlib.contextmanager +def tracing_scope(): + """Enables tracing scope.""" + # This enables the LayerCallCollection's tracing mechanism to trace all call + # functions in the collection. + previous_value = _thread_local_data.enable_call_tracing + previous_queue = _thread_local_data.trace_queue + try: + _thread_local_data.enable_call_tracing = True + _thread_local_data.trace_queue = [] + yield + finally: + # Run traces from the queue. + while _thread_local_data.trace_queue: + fn, args, kwargs, training = _thread_local_data.trace_queue.pop(0) + if training is not None: + with backend.deprecated_internal_learning_phase_scope(training): + fn.get_concrete_function(*args, **kwargs) + else: + fn.get_concrete_function(*args, **kwargs) + _thread_local_data.trace_queue = previous_queue + _thread_local_data.enable_call_tracing = previous_value + + +def add_trace_to_queue(fn, args, kwargs, training=None): + if tracing_enabled(): + _thread_local_data.trace_queue.append( + (fn, args[:], kwargs.copy(), training) + ) + + +def tracing_enabled(): + """Whether to add extra traces to the queue.""" + return _thread_local_data.enable_call_tracing + + +class LayerCallCollection: + """Groups wrapped layer call functions. + + This is used to ensure that all layer call functions are traced with the + same inputs- + - call + - call_and_return_conditional_losses + - call_and_return_all_conditional_losses + """ + + def __init__(self, layer): + self.layer = layer + + self.layer_call_method = _get_layer_call_method(layer) + self._expects_training_arg = utils.layer_uses_training_bool(layer) + self._call_spec = layer._call_spec + + # Create new call spec if the layer itself does not accept a training + # arg, but one of its child layers does. When this layer's call + # functions are traced, they will be traced with an added `training` + # keyword argument. + if not self.layer._expects_training_arg and self._expects_training_arg: + arg_spec = utils.set_training_arg_spec( + self._call_spec.full_argspec, False + ) + self._call_spec = layer_utils.CallFunctionSpec(arg_spec) + + self._layer_inputs = self._get_layer_inputs(layer) + self._functions = weakref.WeakValueDictionary() + + # Get the input argument name from the args. + if self._call_spec.arg_names: + self._input_arg_name = self._call_spec.arg_names[0] + else: + # Layer could be defined with only varargs, in which case use a + # default name. + self._input_arg_name = "inputs" + + def _get_layer_inputs(self, layer): + """Inspects layer object and returns the inferred input signature. + + Args: + layer: Layer object. + + Returns: + List of possibly nested TensorSpecs of the layer call function inputs + in the form of `(args, kwargs)` + """ + if ( + isinstance(layer.call, tf.__internal__.function.Function) + and layer.call.input_signature is not None + ): + return layer.call.input_signature, {} + elif isinstance(layer, training_lib.Model): + return saving_utils.model_call_inputs(layer) + elif ( + layer.input_spec is not None + and layer._use_input_spec_as_call_signature + ): + + def to_tensor_spec_or_none(x): + spec = input_spec.to_tensor_spec(x, layer._compute_dtype) + # If the shape is too general (e.g. multiple dimensions are + # allowed), return None so that separate functions can be + # generated for each inferred input signature. + # TODO(b/134962016): currently partial signatures are not + # supported. + if spec.shape == tf.TensorShape(None): + return None, None + return spec + + input_signature = [ + tf.nest.map_structure(to_tensor_spec_or_none, layer.input_spec) + ] + + return input_signature, {} + else: + return None, None + + def add_trace(self, *args, **kwargs): + """Traces all functions with the same args and kwargs. + + Args: + *args: Positional args passed to the original function. + **kwargs: Keyword args passed to the original function. + """ + args = list(args) + kwargs = kwargs.copy() + + for fn in self._functions.values(): + # TODO(kathywu): Replace arguments with broader shapes defined in + # the input signature. + if self._expects_training_arg: + + def trace_with_training(value, fn=fn): + nonlocal args, kwargs + (args, kwargs,) = self._call_spec.set_arg_value( + "training", value, args, kwargs, inputs_in_args=True + ) + add_trace_to_queue(fn, args, kwargs, value) + + trace_with_training(True) + trace_with_training(False) + else: + add_trace_to_queue(fn, args, kwargs) + + def training_arg_was_passed(self, args, kwargs): + return self._call_spec.arg_was_passed( + "training", args, kwargs, inputs_in_args=True + ) + + def get_training_arg_value(self, args, kwargs): + try: + return self._call_spec.get_arg_value( + "training", args, kwargs, inputs_in_args=True + ) + except KeyError: # Training is not in args or kwargs. + return None + + def get_input_arg_value(self, args, kwargs): + return self._call_spec.get_arg_value( + self._input_arg_name, args, kwargs, inputs_in_args=True + ) + + def _maybe_wrap_with_training_arg(self, call_fn, match_layer_training_arg): + """Wraps call function with added training argument if necessary.""" + if not self.layer._expects_training_arg and self._expects_training_arg: + # Add training arg to wrapper function. + def wrap_with_training_arg(*args, **kwargs): + if match_layer_training_arg: + # Remove the training value, since the original call_fn does + # not expect a training arg. Instead, the training value + # will be propagated using the call context created in + # LayerCall. + args = list(args) + kwargs = kwargs.copy() + (args, kwargs,) = self._call_spec.set_arg_value( + "training", + None, + args, + kwargs, + inputs_in_args=True, + pop_kwarg_if_none=True, + ) + return call_fn(*args, **kwargs) + + return tf.__internal__.decorator.make_decorator( + target=call_fn, + decorator_func=wrap_with_training_arg, + decorator_argspec=self._call_spec.full_argspec, + ) + + return call_fn + + def add_function(self, call_fn, name, match_layer_training_arg): + """Adds a layer call function to the collection. + + Args: + call_fn: a python function + name: Name of call function + match_layer_training_arg: If True, removes the `training` from the + function arguments when calling `call_fn`. + + Returns: + LayerCall (tf.function) + """ + fn = LayerCall( + self, + self._maybe_wrap_with_training_arg( + call_fn, match_layer_training_arg + ), + name, + ) + self._functions[name] = fn.wrapped_call + return fn + + def trace_with_input_signature(self): + """Trace with the layer/models inferred input signature if possible.""" + if self._layer_inputs[0] is None: + return + + args, kwargs = self._layer_inputs + if self._expects_training_arg: + args, kwargs = self._call_spec.set_arg_value( + "training", False, args, kwargs, inputs_in_args=True + ) + if None not in tf.nest.flatten([args, kwargs]): + # Manually add traces for layers that have keyword arguments and + # have a fully defined input signature. + self.add_trace(*args, **kwargs) + + +def _filtered_inputs(inputs): + return list(filter(tf_utils.is_tensor_or_variable, tf.nest.flatten(inputs))) + + +def layer_call_wrapper(call_collection, method, name): + """Ensures layer losses are kept the same, and runs method in call + context.""" + + # Create wrapper that deals with losses and call context. + def wrapper(*args, **kwargs): + """Calls method within call context.""" + layer = call_collection.layer + training = None + inputs = _filtered_inputs([args, kwargs]) + + if (args or kwargs) and call_collection.training_arg_was_passed( + args, kwargs + ): + training = call_collection.get_training_arg_value(args, kwargs) + + original_losses = _reset_layer_losses(layer) + with base_layer_utils.call_context().enter( + layer, + inputs=inputs, + build_graph=False, + training=training, + saving=True, + ): + with autocast_variable.enable_auto_cast_variables( + layer._compute_dtype_object + ): + ret = method(*args, **kwargs) + _restore_layer_losses(original_losses) + return ret + + # Rename to `name`, since tf.function doesn't have a name argument. Without + # this, all functions returned by this method will be named "call", which + # would be a nightmare to debug. + fn = tf.__internal__.decorator.make_decorator( + target=method, decorator_func=wrapper + ) + fn.__name__ = name + return fn + + +class LayerCall: + """Function that triggers traces of other functions in the same + collection.""" + + def __init__(self, call_collection, call_fn, name): + """Initializes a LayerCall object. + + Args: + call_collection: a LayerCallCollection, which contains the other layer + call functions (e.g. call_with_conditional_losses, call). These + functions should be traced with the same arguments. + call_fn: A call function. + name: Name of the call function. + """ + self.call_collection = call_collection + self.wrapped_call = tf.function( + layer_call_wrapper(call_collection, call_fn, name) + ) + + def _maybe_trace(self, args, kwargs): + # Trigger traces of other call functions + extra training-arg traces. + if tracing_enabled(): + self.call_collection.add_trace(*args, **kwargs) + + def __call__(self, *args, **kwargs): + self._maybe_trace(args, kwargs) + return self.wrapped_call(*args, **kwargs) + + def get_concrete_function(self, *args, **kwargs): + self._maybe_trace(args, kwargs) + return self.wrapped_call.get_concrete_function(*args, **kwargs) + + +def _wrap_call_and_conditional_losses(layer): + """Wraps call function that returns a tuple of (outputs, losses). + + The losses returned are conditional on the inputs passed to the call + function. Unconditional losses (e.g. weight regularizeration) are wrapped + separately. + + Args: + layer: a Keras layer object + + Returns: + python call function that returns outputs and conditional losses -- + excludes activity regularizer + """ + # Create function that generates both outputs and losses + layer_call = _get_layer_call_method(layer) + + def call_and_return_conditional_losses(*args, **kwargs): + """Returns layer (call_output, conditional losses) tuple.""" + call_output = layer_call(*args, **kwargs) + if version_utils.is_v1_layer_or_model(layer): + conditional_losses = layer.get_losses_for( + _filtered_inputs([args, kwargs]) + ) + else: + conditional_losses = [ + l for l in layer.losses if not hasattr(l, "_unconditional_loss") + ] + return call_output, conditional_losses + + return _create_call_fn_decorator(layer, call_and_return_conditional_losses) + + +def _extract_outputs_from_fn(layer, call_and_return_conditional_losses): + """Returns a function that returns only call function outputs.""" + if isinstance(layer, keras_load.RevivedLayer): + return layer.keras_api.__call__ + + def call(inputs, *args, **kwargs): + return call_and_return_conditional_losses(inputs, *args, **kwargs)[0] + + return _create_call_fn_decorator(layer, call) + + +def _append_activity_regularizer_loss( + layer, call_fn_with_losses, activity_regularizer_fn +): + """Appends activity regularizer loss to losses returned by the wrapped + fn.""" + + def fn(inputs, *args, **kwargs): + outputs, losses = call_fn_with_losses(inputs, *args, **kwargs) + losses.append(activity_regularizer_fn(outputs)) + return outputs, losses + + return _create_call_fn_decorator(layer, fn) + + +def _create_call_fn_decorator(layer, wrapped_call): + call_fn = _get_layer_call_method(layer) + fn, arg_spec = utils.maybe_add_training_arg( + layer._call_spec, + wrapped_call, + layer._expects_training_arg, + default_training_value=False, + ) + return tf.__internal__.decorator.make_decorator( + target=call_fn, decorator_func=fn, decorator_argspec=arg_spec + ) + + +def _wrap_unconditional_loss(loss_fn, index): + """Wraps callable/unconditional loss, returning a serializable function.""" + # Extract original loss function from partial function + fn = loss_fn.args[0] if isinstance(loss_fn, functools.partial) else loss_fn + if isinstance(fn, tf.__internal__.function.Function): + return fn + else: + return tf.__internal__.function.Function( + fn, f"loss_fn_{index}", input_signature=[] + ) + + +def _wrap_activity_regularizer(layer): + """Wraps the activity regularizer.""" + + if isinstance( + layer._activity_regularizer, tf.__internal__.function.Function + ): + return layer._activity_regularizer + return tf.__internal__.function.Function( + layer._activity_regularizer, + f"{layer.name}_activity_regularizer", + input_signature=[ + tf.TensorSpec(None, layer._compute_dtype or backend.floatx()) + ], + ) + + +def _get_layer_call_method(layer): + if isinstance(layer.call, (tf.__internal__.function.Function)): + return layer.call.python_function + return layer.call diff --git a/keras/saving/legacy/saved_model/saved_model_test.py b/keras/saving/legacy/saved_model/saved_model_test.py new file mode 100644 index 000000000000..7ae94743645d --- /dev/null +++ b/keras/saving/legacy/saved_model/saved_model_test.py @@ -0,0 +1,1630 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for saving and loading Keras models and layers from SavedModel. + +These should ensure that all layer properties are correctly assigned after +loading from the SavedModel. + +Tests that focus on the model structure should go in revive_test.py +""" + +import os +import shutil +import sys + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 + +import keras +from keras import regularizers +from keras.feature_column.dense_features import DenseFeatures +from keras.protobuf import saved_metadata_pb2 +from keras.protobuf import versions_pb2 +from keras.saving import object_registration +from keras.saving.legacy.saved_model import json_utils +from keras.saving.legacy.saved_model import load as keras_load +from keras.saving.legacy.saved_model import save_impl as keras_save +from keras.saving.legacy.saved_model import utils as saved_model_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import control_flow_util +from keras.utils import tf_contextlib +from keras.utils import tf_inspect + + +class LayerWithLearningPhase(keras.engine.base_layer.Layer): + def build(self, input_shape): + self.input_spec = keras.layers.InputSpec( + shape=[None] * len(input_shape) + ) + self.built = True + + def call(self, x, training=None): + if training is None: + training = keras.backend.learning_phase() + output = control_flow_util.smart_cond( + training, lambda: x * 0, lambda: tf.identity(x) + ) + if not tf.executing_eagerly(): + output._uses_learning_phase = True + return output + + def compute_output_shape(self, input_shape): + return input_shape + + @property + def _use_input_spec_as_call_signature(self): + return True + + +class LayerWithLoss(keras.layers.Layer): + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + return inputs * 2 + + +class LayerWithUpdate(keras.layers.Layer): + def build(self, _): + self.v = self.add_weight( + "v", + shape=[], + initializer=keras.initializers.zeros, + trainable=False, + dtype=tf.float32, + ) + + def call(self, inputs, training=True): + if training: + self.add_update(self.v.assign_add(1.0)) + return inputs * 2.0 + + +@object_registration.register_keras_serializable("Testing") +class GlobalLayerThatShouldFailIfNotAdded(keras.layers.Layer): + _must_restore_from_config = True + + +@test_combinations.run_all_keras_modes +class TestSavedModelFormatAllModes(test_combinations.TestCase): + def _save_model_dir(self, dirname="saved_model"): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + return os.path.join(temp_dir, dirname) + + def _get_model(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.layers[-1].activity_regularizer = regularizers.get("l2") + model.activity_regularizer = regularizers.get("l2") + model.compile(loss="mse", optimizer="rmsprop") + + def callable_loss(): + return tf.reduce_sum(model.weights[0]) + + model.add_loss(callable_loss) + return model + + def _train_model(self, model, use_dataset=False): + x = np.random.random((1, 3)) + y = np.random.random((1, 4)) + + if not tf.__internal__.tf2.enabled(): + # The layer autocast behavior only runs when autocast is enabled, so + # in V1, the numpy inputs still need to be cast to float32. + x = x.astype(np.float32) + y = y.astype(np.float32) + + if use_dataset: + dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(1) + model.fit(dataset) + else: + model.train_on_batch(x, y) + + def _save_and_load(self, model): + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + return loaded + + def _test_evaluation(self, model, loaded): + # Assert that original and loaded models have the same results when + # called. + self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) + self.assertAllClose( + self.evaluate(model.weights), self.evaluate(loaded.weights) + ) + + input_arr = tf.constant(np.random.random((1, 3)).astype(np.float32)) + self.assertAllClose( + self.evaluate(model(input_arr)), self.evaluate(loaded(input_arr)) + ) + # Validate losses. The order of conditional losses may change between + # the model and loaded model, so sort the losses first. + if tf.executing_eagerly(): + self.assertAllClose( + sorted(self.evaluate(model.losses)), + sorted(self.evaluate(loaded.losses)), + ) + + @test_combinations.run_with_all_model_types + def test_model_save_and_load(self): + model = self._get_model() + self._train_model(model, use_dataset=False) + loaded = self._save_and_load(model) + self._test_evaluation(model, loaded) + + @test_combinations.run_with_all_model_types + def test_model_save_and_load_dataset(self): + model = self._get_model() + self._train_model(model, use_dataset=True) + loaded = self._save_and_load(model) + self._test_evaluation(model, loaded) + + def test_trainable_weights(self): + """Tests that trainable status of individual weights is preserved.""" + layer = keras.layers.Dense(4, name="custom_layer") + layer.build([None, 3]) + layer.add_weight( + "extra_weight", + shape=[], + initializer=tf.compat.v1.constant_initializer(11), + trainable=True, + ) + layer.add_weight( + "extra_weight_2", + shape=[], + initializer=tf.compat.v1.constant_initializer(12), + trainable=False, + ) + model = keras.Sequential( + [ + keras.Input( + [ + 3, + ] + ), + layer, + ] + ) + + saved_model_dir = self._save_model_dir() + self.evaluate(tf.compat.v1.variables_initializer(layer.variables)) + model.save(saved_model_dir, save_format="tf") + loaded_model = keras_load.load(saved_model_dir) + self.evaluate( + tf.compat.v1.variables_initializer(loaded_model.variables) + ) + + loaded = loaded_model.layers[-1] + + equal_attrs = ["name", "_expects_training_arg", "trainable"] + for attr in equal_attrs: + self.assertEqual(getattr(layer, attr), getattr(loaded, attr)) + + all_close = ["weights", "trainable_weights", "non_trainable_weights"] + for attr in all_close: + self.assertAllClose( + self.evaluate(getattr(layer, attr)), + self.evaluate(getattr(loaded, attr)), + ) + + @test_combinations.run_with_all_model_types + def test_trainable_layers(self): + """Tests that trainable status of individual layers is preserved.""" + model = model = self._get_model() + # Set the last layer to *not* be trainable. + model.layers[-1].trainable = False + self._train_model(model, use_dataset=True) + loaded = self._save_and_load(model) + + self._test_evaluation(model, loaded) + self.assertFalse(model.layers[-1].trainable) + self.assertFalse(loaded.layers[-1].trainable) + + def test_trainable_custom_model_false(self): + """Tests that overall False trainable status of Model is preserved.""" + # Set all layers to *not* be trainable. + model = test_utils.SmallSubclassMLP(1, 4, trainable=False) + model.compile(loss="mse", optimizer="rmsprop") + self._train_model(model, use_dataset=False) + loaded = self._save_and_load(model) + + self._test_evaluation(model, loaded) + self.assertEmpty(model.trainable_variables) + self.assertEmpty(loaded.trainable_variables) + + def test_maintains_losses(self): + """Tests that the layer losses do not change before and after export.""" + model = keras.models.Sequential([LayerWithLoss()]) + model.compile(loss="mse", optimizer="rmsprop") + input_arr = np.random.random((1, 3)) + target_arr = np.random.random((1, 3)) + + # Test that symbolic losses are maintained (train_on_batch saves + # symbolic losses.) + model.train_on_batch(input_arr, target_arr) + previous_losses = model.losses[:] + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + with previous_losses[0].graph.as_default(): + # If we try to compare symbolic Tensors in eager mode assertAllEqual + # will return False even if they are the same Tensor. + self.assertEqual(previous_losses, model.losses) + + if tf.executing_eagerly(): + # Test that eager losses are maintained. + model(input_arr) # Calls model eagerly, creating eager losses. + previous_losses = model.losses[:] + model.save(saved_model_dir, save_format="tf") + self.assertAllEqual(previous_losses, model.losses) + + def test_layer_with_learning_phase(self): + layer = LayerWithLearningPhase() + layer.build([None, None]) + saved_model_dir = self._save_model_dir() + model = test_utils.get_model_from_layers( + [layer], input_shape=[None], model_type="functional" + ) + model.save(saved_model_dir, save_format="tf") + loaded_model = keras_load.load(saved_model_dir) + loaded = loaded_model.layers[-1] + input_arr = tf.ones((4, 3)) + + # Run the layer, and use the keras backend learning phase + keras.backend.set_learning_phase(0) + self.assertAllEqual(input_arr, loaded(input_arr)) + keras.backend.set_learning_phase(1) + self.assertAllEqual(tf.zeros((4, 3)), loaded(input_arr)) + + # Run the layer while explicitly setting the training argument + self.assertAllEqual( + input_arr, loaded(input_arr, training=tf.constant(False)) + ) + self.assertAllEqual( + tf.zeros((4, 3)), loaded(input_arr, training=tf.constant(True)) + ) + + @test_combinations.run_with_all_model_types + def test_standard_loader(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.activity_regularizer = regularizers.get("l2") + + def eager_loss(): + return tf.reduce_sum(model.weights[0]) + + model.add_loss(eager_loss) + + # Call predict to ensure that all layers are built and inputs are set. + model.predict(np.random.random((1, 3)).astype(np.float32)) + saved_model_dir = self._save_model_dir() + + model.save(saved_model_dir, save_format="tf") + + loaded = tf.saved_model.load(saved_model_dir) + self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) + all_close = [ + "variables", + "trainable_variables", + "non_trainable_variables", + ] + for attr in all_close: + self.assertAllClose( + self.evaluate(getattr(model, attr)), + self.evaluate(getattr(loaded.keras_api, attr)), + ) + self.assertLen(loaded.regularization_losses, 1) + expected_layers = len(model.layers) + self.assertEqual(expected_layers, len(loaded.keras_api.layers)) + input_arr = tf.ones((4, 3)) + self.assertAllClose( + self.evaluate(model(input_arr)), + self.evaluate(loaded(input_arr, training=False)), + ) + + @test_combinations.run_with_all_model_types + def test_compiled_model(self): + # TODO(b/134519980): Issue with model.fit if the model call function + # uses a tf.function (Graph mode only). + if not tf.executing_eagerly(): + return + + input_arr = np.random.random((1, 3)) + target_arr = np.random.random((1, 4)) + + model = test_utils.get_small_mlp(1, 4, input_dim=3) + expected_predict = model.predict(input_arr) + + # Compile and save model. + model.compile("rmsprop", "mse") + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + actual_predict = loaded.predict(input_arr) + self.assertAllClose(expected_predict, actual_predict) + + loss_before = loaded.evaluate(input_arr, target_arr) + loaded.fit(input_arr, target_arr) + loss_after = loaded.evaluate(input_arr, target_arr) + self.assertLess(loss_after, loss_before) + predict = loaded.predict(input_arr) + + ckpt_path = os.path.join(self.get_temp_dir(), "weights") + loaded.save_weights(ckpt_path) + + # Ensure that the checkpoint is compatible with the original model. + model.load_weights(ckpt_path) + self.assertAllClose(predict, model.predict(input_arr)) + + def test_metadata_input_spec(self): + class LayerWithNestedSpec(keras.layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = { + "a": keras.layers.InputSpec(max_ndim=3, axes={-1: 2}), + "b": keras.layers.InputSpec( + shape=(None, 2, 3), dtype="int32" + ), + } + + @property + def _use_input_spec_as_call_signature(self): + return True + + layer = LayerWithNestedSpec() + saved_model_dir = self._save_model_dir() + model = test_utils.get_model_from_layers([layer], model_type="subclass") + model( + { + "a": tf.constant([[2, 4]]), + "b": tf.ones([1, 2, 3], dtype=tf.int32), + } + ) + model.save(saved_model_dir, save_format="tf") + loaded_model = keras_load.load(saved_model_dir) + loaded = loaded_model.layers[-1] + self.assertEqual(3, loaded.input_spec["a"].max_ndim) + self.assertEqual({-1: 2}, loaded.input_spec["a"].axes) + self.assertAllEqual([None, 2, 3], loaded.input_spec["b"].shape) + self.assertEqual("int32", loaded.input_spec["b"].dtype) + + def test_must_restore_from_config_fails_if_layer_is_not_in_scope(self): + class LayerThatShouldFailIfNotAdded(keras.layers.Layer): + _must_restore_from_config = True + + layer = LayerThatShouldFailIfNotAdded() + saved_model_dir = self._save_model_dir() + model = test_utils.get_model_from_layers( + [layer], input_shape=[3], model_type="functional" + ) + model.save(saved_model_dir, save_format="tf") + with self.assertRaisesRegex( + ValueError, "Unknown layer: 'LayerThatShouldFailIfNotAdded'" + ): + _ = keras_load.load(saved_model_dir) + + def test_must_restore_from_config_custom_object_scope(self): + class LayerThatShouldFailIfNotAdded(keras.layers.Layer): + _must_restore_from_config = True + + layer = LayerThatShouldFailIfNotAdded() + model = test_utils.get_model_from_layers( + [layer], input_shape=[3], model_type="functional" + ) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + with object_registration.CustomObjectScope( + {"LayerThatShouldFailIfNotAdded": LayerThatShouldFailIfNotAdded} + ): + _ = keras_load.load(saved_model_dir) + + def test_must_restore_from_config_registration(self): + layer = GlobalLayerThatShouldFailIfNotAdded() + saved_model_dir = self._save_model_dir() + model = test_utils.get_model_from_layers( + [layer], input_shape=[3], model_type="functional" + ) + model.save(saved_model_dir, save_format="tf") + _ = keras_load.load(saved_model_dir) + + def test_multi_input_model(self): + input_1 = keras.layers.Input(shape=(3,)) + input_2 = keras.layers.Input(shape=(5,)) + model = keras.Model([input_1, input_2], [input_1, input_2]) + saved_model_dir = self._save_model_dir() + + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + input_arr_1 = np.random.random((1, 3)).astype("float32") + input_arr_2 = np.random.random((1, 5)).astype("float32") + + outputs = loaded([input_arr_1, input_arr_2]) + self.assertAllEqual(input_arr_1, outputs[0]) + self.assertAllEqual(input_arr_2, outputs[1]) + + def test_revived_sequential(self): + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 5, input_shape=(3,), kernel_regularizer=regularizers.get("l2") + ) + ) + model.add( + keras.layers.Dense(2, kernel_regularizer=regularizers.get("l2")) + ) + + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + + self.assertLen(loaded.layers, 2) + self.assertLen(loaded.losses, 2) + + loaded.pop() + + self.assertLen(loaded.layers, 1) + self.assertLen(loaded.losses, 1) + + loaded.add( + keras.layers.Dense(2, kernel_regularizer=regularizers.get("l2")) + ) + + self.assertLen(loaded.layers, 2) + self.assertLen(loaded.losses, 2) + + def testBatchNormUpdates(self): + model = keras.models.Sequential( + keras.layers.BatchNormalization(input_shape=(1,)) + ) + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + saved_model_dir = self._save_model_dir() + + with self.captureWritesToStream(sys.stderr) as captured_logs: + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + + # Assert that saving does not log deprecation warnings + # (even if it needs to set learning phase for compat reasons) + if tf.executing_eagerly(): + self.assertNotIn("deprecated", captured_logs.contents()) + + input_arr = tf.constant([[11], [12], [13]], dtype=tf.float32) + input_arr2 = tf.constant([[14], [15], [16]], dtype=tf.float32) + self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0]) + + self.evaluate(loaded(input_arr, training=True)) + if not tf.executing_eagerly(): + self.evaluate(loaded.get_updates_for(input_arr)) + self.assertAllClose( + self.evaluate(loaded.layers[-1].moving_mean), [0.12] + ) + + self.evaluate(loaded(input_arr2, training=False)) + if not tf.executing_eagerly(): + self.evaluate(loaded.get_updates_for(input_arr2)) + self.assertAllClose( + self.evaluate(loaded.layers[-1].moving_mean), [0.12] + ) + + def testDisablingBatchNormTrainableBeforeSaving(self): + # We disable trainable on the batchnorm layers before saving + model = keras.models.Sequential( + keras.layers.BatchNormalization(input_shape=(1,)) + ) + model.trainable = False + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) + input_arr = tf.constant([[11], [12], [13]], dtype=tf.float32) + input_arr2 = tf.constant([[14], [15], [16]], dtype=tf.float32) + self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0]) + + # Trainable should still be disabled after loading + self.evaluate(loaded(input_arr, training=True)) + if not tf.executing_eagerly(): + self.evaluate(loaded.get_updates_for(input_arr)) + self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.0]) + + # Re-enabling trainable on the loaded model should cause the batchnorm + # layer to start training again. + # Note: this only works in v2. + if tf.executing_eagerly(): + loaded.trainable = True + self.evaluate(loaded(input_arr, training=True)) + self.assertAllClose( + self.evaluate(loaded.layers[-1].moving_mean), [0.12] + ) + + self.evaluate(loaded(input_arr2, training=False)) + self.assertAllClose( + self.evaluate(loaded.layers[-1].moving_mean), [0.12] + ) + + def testSaveWithSignatures(self): + model = keras.models.Sequential() + model.add( + keras.layers.Dense( + 5, input_shape=(3,), kernel_regularizer=regularizers.get("l2") + ) + ) + model.add(keras.layers.Dropout(0.5)) + model.add( + keras.layers.Dense(4, kernel_regularizer=regularizers.get("l2")) + ) + + input_arr = np.random.random((2, 3)) + target_arr = np.random.random((2, 4)) + + model.compile(loss="mse", optimizer="rmsprop") + model.train_on_batch(input_arr, target_arr) + + @tf.function(input_signature=[tf.TensorSpec((None, 3))]) + def predict(inputs): + return {"predictions": model(inputs)} + + feature_configs = { + "inputs": tf.io.FixedLenFeature(shape=[2, 3], dtype=tf.float32) + } + + @tf.function(input_signature=[tf.TensorSpec([None], tf.string)]) + def parse_and_predict(examples): + features = tf.compat.v1.parse_single_example( + examples[0], feature_configs + ) + return { + "predictions": model(features["inputs"]), + "layer_1_outputs": model.layers[0](features["inputs"]), + } + + saved_model_dir = self._save_model_dir() + model.save( + saved_model_dir, + save_format="tf", + signatures={ + "predict": predict, + "parse_and_predict": parse_and_predict, + }, + ) + model.save( + "/tmp/saved", + save_format="tf", + signatures={ + "predict": predict, + "parse_and_predict": parse_and_predict, + }, + ) + + loaded = keras_load.load(saved_model_dir) + + self.assertAllClose( + model.predict(input_arr), + loaded.signatures["predict"]( + tf.convert_to_tensor(input_arr.astype("float32")) + )["predictions"], + ) + + feature = { + "inputs": feature_pb2.Feature( + float_list=feature_pb2.FloatList( + value=input_arr.astype("float32").flatten() + ) + ) + } + example = example_pb2.Example( + features=feature_pb2.Features(feature=feature) + ) + outputs = loaded.signatures["parse_and_predict"]( + tf.convert_to_tensor([example.SerializeToString()]) + ) + self.assertAllClose(model.predict(input_arr), outputs["predictions"]) + self.assertAllClose( + model.layers[0](input_arr), outputs["layer_1_outputs"] + ) + + def testTrainingDefaults(self): + def assert_training_default(fn, default_value): + arg_spec = tf_inspect.getfullargspec(fn) + fn_defaults = arg_spec.defaults or [] + defaults = dict() + # The call arg defaults are an n-tuple of the last n elements of the + # args list. (n = # of elements that have a default argument) + for i in range(-1 * len(fn_defaults), 0): + defaults[arg_spec.args[i]] = fn_defaults[i] + # The default training arg will be any (non-None) default specified + # in the method signature, or None if no value is specified. + defaults.update(arg_spec.kwonlydefaults or {}) + self.assertEqual(defaults["training"], default_value) + + class LayerWithTrainingRequiredArg(keras.engine.base_layer.Layer): + def call(self, inputs, training): + return control_flow_util.smart_cond( + training, lambda: inputs * 0, lambda: tf.identity(inputs) + ) + + class LayerWithTrainingDefaultTrue(keras.engine.base_layer.Layer): + def call(self, inputs, training=True): + return control_flow_util.smart_cond( + training, lambda: inputs * 0, lambda: tf.identity(inputs) + ) + + class Model(keras.models.Model): + def __init__(self): + super().__init__() + self.layer_with_training_default_none = LayerWithLearningPhase() + self.layer_with_training_default_true = ( + LayerWithTrainingDefaultTrue() + ) + self.layer_with_required_training_arg = ( + LayerWithTrainingRequiredArg() + ) + + def call(self, inputs): + x = self.layer_with_training_default_none(inputs) + x += self.layer_with_training_default_true(inputs) + x += self.layer_with_required_training_arg(inputs, False) + return x + + model = Model() + # Build and set model inputs + model.predict(np.ones([1, 3]).astype("float32")) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + load = tf.saved_model.load(saved_model_dir) + + # Ensure that the Keras loader is able to load and build the model. + _ = keras_load.load(saved_model_dir) + + assert_training_default(load.__call__, False) + assert_training_default( + load.layer_with_training_default_none.__call__, False + ) + assert_training_default( + load.layer_with_training_default_true.__call__, True + ) + + # Assert that there are no defaults for layer with required training arg + arg_spec = tf_inspect.getfullargspec( + load.layer_with_required_training_arg.__call__ + ) + self.assertFalse(arg_spec.defaults) # defaults is None or empty + + def testTraceModelWithKwarg(self): + class Model(keras.models.Model): + def call(self, inputs, keyword=None): + return tf.identity(inputs) + + model = Model() + prediction = model.predict(np.ones([1, 3]).astype("float32")) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + with object_registration.custom_object_scope({"Model": Model}): + loaded = keras_load.load(saved_model_dir) + self.assertAllClose( + prediction, loaded.predict(np.ones([1, 3]).astype("float32")) + ) + + loaded_without_scope = keras_load.load(saved_model_dir) + if tf.__internal__.tf2.enabled(): + with self.assertRaises(NotImplementedError): + loaded_without_scope.predict(np.ones([1, 3]).astype("float32")) + + def testFeatureColumns(self): + # TODO(b/120099662): Error with table initialization with Keras models + # in graph mode. + if tf.executing_eagerly(): + numeric = tf.feature_column.numeric_column("a") + bucketized = tf.feature_column.bucketized_column( + numeric, boundaries=[5, 10, 15] + ) + cat_vocab = ( + tf.feature_column.categorical_column_with_vocabulary_list( + "b", ["1", "2", "3"] + ) + ) + one_hot = tf.feature_column.indicator_column(cat_vocab) + embedding = tf.feature_column.embedding_column( + cat_vocab, dimension=8 + ) + feature_layer = DenseFeatures([bucketized, one_hot, embedding]) + model = keras.models.Sequential(feature_layer) + + features = {"a": np.array([13, 15]), "b": np.array(["1", "2"])} + predictions = model.predict(features) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + loaded_predictions = loaded.predict(features) + self.assertAllClose(predictions, loaded_predictions) + + def testSaveTensorKwarg(self): + class LayerWithTensorKwarg(keras.layers.Layer): + def call(self, inputs, tensor=None): + if tensor is not None: + return inputs * tf.cast(tensor, tf.float32) + else: + return inputs + + t = self.evaluate(tf.sequence_mask(1)) + inputs = keras.layers.Input(shape=(3)) + model = keras.models.Model(inputs, LayerWithTensorKwarg()(inputs, t)) + + input_arr = np.random.random((1, 3)) + predictions = model.predict(input_arr) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + loaded_predictions = loaded.predict(input_arr) + self.assertAllClose(predictions, loaded_predictions) + + def testModelWithTfFunctionCall(self): + class Subclass(keras.models.Model): + @tf.function + def call(self, inputs, training=False): + return inputs * tf.cast(training, tf.float32) + + model = Subclass() + model.predict(tf.ones((1, 2)), steps=1) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + self.assertAllEqual( + [[1, 5]], + self.evaluate(loaded(tf.constant([[1, 5.0]]), training=True)), + ) + self.assertAllEqual( + [[0, 0]], + self.evaluate(loaded(tf.constant([[1, 5.0]]), training=False)), + ) + + def testReviveFunctionalModel(self): + class CustomAdd(keras.layers.Add): + def build(self, input_shape): + self.w = self.add_weight("w", shape=[]) + super().build(input_shape) + + def call(self, inputs): + outputs = super().call(inputs) + return outputs * self.w + + input1 = keras.layers.Input(shape=(None, 3), name="input_1") + input2 = keras.layers.Input(shape=(None, 3), name="input_2") + + d = keras.layers.Dense(4, name="dense_with_two_inbound_nodes") + output1 = d(input1) + output2 = d(input2) + + # Use a custom layer in this model to ensure that layers aren't being + # recreated directly from the config. + outputs = CustomAdd(name="custom")([output1, output2]) + model = keras.models.Model([input1, input2], outputs, name="save_model") + + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + self.assertEqual("save_model", loaded.name) + self.assertLen( + loaded.get_layer("dense_with_two_inbound_nodes")._inbound_nodes, 2 + ) + self.assertEqual("CustomAdd", type(loaded.get_layer("custom")).__name__) + self.assertLen(loaded.get_layer("custom").weights, 1) + + def _testAddUpdate(self, scope): + with scope: + layer_with_update = LayerWithUpdate() + model = test_utils.get_model_from_layers( + [layer_with_update], input_shape=(3,) + ) + + x = np.ones((10, 3)) + if test_utils.get_model_type() == "subclass": + model.predict(x, batch_size=10) + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + loaded_layer = loaded.layers[-1] + self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) + self.assertEqual(self.evaluate(loaded_layer.v), 0.0) + + loaded.compile("sgd", "mse") + loaded.fit(x, x, batch_size=10) + self.assertEqual(self.evaluate(loaded_layer.v), 1.0) + + @test_combinations.run_with_all_model_types + def testSaveLayerWithUpdates(self): + @tf_contextlib.contextmanager + def nullcontextmanager(): + yield + + self._testAddUpdate(nullcontextmanager()) + + @test_combinations.run_with_all_model_types + def testSaveInStrategyScope(self): + self._testAddUpdate(tf.distribute.MirroredStrategy().scope()) + + def testSaveTimeDistributedLayer(self): + model = keras.Sequential( + [ + keras.layers.TimeDistributed( + keras.layers.Dense( + 1, kernel_regularizer=regularizers.get("l2") + ), + input_shape=(None, 1), + ) + ] + ) + predictions = model.predict_on_batch(tf.ones((3, 2, 1))) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + self.assertAllClose( + loaded.predict_on_batch(tf.ones((3, 2, 1))), predictions + ) + + @parameterized.named_parameters( + [("with_unrolling", True), ("no_unrolling", False)] + ) + def testSaveStatefulRNN(self, unroll): + batch = 12 + timesteps = 10 + input_dim = 8 + input_arr = np.ones((batch, timesteps, input_dim)).astype("float32") + + cells = [keras.layers.LSTMCell(32), keras.layers.LSTMCell(64)] + if unroll: + x = keras.Input(batch_shape=(batch, timesteps, input_dim)) + else: + x = keras.Input(batch_shape=(batch, None, input_dim)) + layer = keras.layers.RNN(cells, stateful=True, unroll=unroll) + y = layer(x) + + model = keras.Model(x, y) + model.compile( + "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() + ) + model.train_on_batch( + np.zeros((batch, timesteps, input_dim)).astype("float32"), + np.zeros((batch, 64)).astype("float32"), + ) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + loaded_layer = loaded.layers[1] + + if not tf.executing_eagerly(): + keras.backend.get_session() # force variable initialization + + self.assertAllClose(layer.states, loaded_layer.states) + self.assertAllClose(model(input_arr), loaded(input_arr)) + + def testSaveBidirectionalLSTM(self): + # Make sure that the input spec of an unrolled RNN is not used when + # wrapped in a Bidirectional layer. + # https://github.com/keras-team/keras/issues/15454 + input_layer = keras.Input( + batch_input_shape=(1, 15, 128), name="input", dtype=tf.float32 + ) + lstm_layer = keras.layers.Bidirectional( + keras.layers.LSTM( + units=64, + name="lstm", + dropout=0.2, + trainable=False, + unroll=True, + ) + ) + output_layer = lstm_layer(input_layer) + model = keras.Model(input_layer, output_layer) + saved_model_dir = self._save_model_dir() + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + input_arr = np.random.random((1, 15, 128)).astype("float32") + self.assertAllClose(model(input_arr), loaded(input_arr)) + + @parameterized.named_parameters([("stateful", True), ("stateless", False)]) + def testSaveConvLSTM2D(self, stateful): + data_format = "channels_first" + batch, timesteps, channels, rows, cols = 12, 10, 8, 4, 4 + input_arr = np.ones((batch, timesteps, channels, rows, cols)).astype( + "float32" + ) + layer = keras.layers.ConvLSTM2D( + filters=16, + kernel_size=(1, 1), + data_format=data_format, + stateful=stateful, + ) + x = keras.Input(batch_shape=(batch, timesteps, channels, rows, cols)) + y = layer(x) + model = keras.Model(x, y) + + predict_1 = model(input_arr) + self.evaluate([v.initializer for v in model.variables]) + saved_model_dir = self._save_model_dir() + + model.save(saved_model_dir, save_format="tf") + del model + + loaded = keras_load.load(saved_model_dir) + self.evaluate([v.initializer for v in loaded.variables]) + if stateful: + loaded.reset_states() + predict_2 = loaded(input_arr) + self.assertAllClose(predict_1, predict_2) + + def testSaveWithRaggedInputs(self): + class EmbeddingMerger(keras.layers.Layer): + def __init__(self, list_features, **kwargs): + super().__init__(**kwargs) + self._supports_ragged_inputs = True + self.embeddings = { + feature: keras.layers.Embedding(10, 3) + for feature in list_features + } + self.mean = keras.layers.Lambda( + tf.reduce_mean, arguments=dict(axis=1) + ) + + def call(self, inputs): + tensors = [self.embeddings[col](inputs[col]) for col in inputs] + tensors = [self.mean(inp) for inp in tensors] + return keras.layers.Add()(tensors) + + list_features = ["feature_1", "feature_2"] + feature_1 = tf.ragged.constant([[0.0], [1, 3]]) + feature_2 = tf.ragged.constant([[1.0, 2], [4]]) + f = {"feature_1": feature_1, "feature_2": feature_2} + f_inputs = { + "feature_1": keras.Input( + shape=(None,), name="feature_1", ragged=True + ), + "feature_2": keras.Input( + shape=(None,), name="feature_2", ragged=True + ), + } + + out = EmbeddingMerger(list_features)(f_inputs) + model = keras.Model(f_inputs, out) + self.evaluate(tf.compat.v1.variables_initializer(model.variables)) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) + self.assertAllClose(model.predict(f), loaded.predict(f)) + + def testSaveMultipleInputs(self): + class CustomLayer(keras.layers.Layer): + def call(self, *input_list): + self.add_loss(input_list[-2] * 2) + return sum( + input_list[:-1] + ) # The test's last input is a non-tensor arg + + class CustomModel(keras.Model): + def build(self, _): + self.layer = CustomLayer() + + def call(self, *inputs): + inputs = list(inputs) + inputs.append( + object() + ) # Test that the layer handles non-tensor inputs + return self.layer(*inputs) + + model = CustomModel() + inp = [ + tf.constant(i, shape=[1, 1], dtype=tf.float32) for i in range(1, 5) + ] + expected = model(*inp) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + actual = loaded(*inp) + self.assertAllEqual(self.evaluate(expected), self.evaluate(actual)) + + def testSaveMultipleInputsWithTraining(self): + class CustomModel(keras.Model): + def call(self, input_1, training, input_2): + if training: + return input_1 + else: + return input_2 + + inp1 = tf.constant(1.0, shape=[1]) + inp2 = tf.constant(2.0, shape=[1]) + + model = CustomModel() + self.assertEqual(self.evaluate(model(inp1, True, inp2)), 1.0) + self.assertEqual(self.evaluate(model(inp1, False, inp2)), 2.0) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + self.assertEqual(self.evaluate(loaded(inp1, True, inp2)), 1.0) + self.assertEqual(self.evaluate(loaded(inp1, False, inp2)), 2.0) + + def test_wrapped_layer_training(self): + class Custom(keras.models.Model): + def __init__(self): + super().__init__() + self.layer = LayerWithLearningPhase() + + def call(self, inputs): + return self.layer(inputs) + + model = Custom() + x = tf.constant(1.0, shape=[1, 1]) + expected_default = model(x) + expected_training_true = model(x, training=True) + expected_training_false = model(x, training=False) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + def assert_loaded_model(loaded): + actual_default = loaded(x) + actual_training_true = loaded(x, training=True) + actual_training_false = loaded(x, training=False) + self.assertAllClose( + [ + expected_default, + expected_training_true, + expected_training_false, + ], + [actual_default, actual_training_true, actual_training_false], + ) + + assert_loaded_model(keras_load.load(saved_model_dir)) + assert_loaded_model(tf.saved_model.load(saved_model_dir)) + + @parameterized.named_parameters([("true", True), ("false", False)]) + def test_save_layer_autocast(self, autocast): + class CustomLayer(keras.layers.Layer): + def __init__(self): + super().__init__(autocast=autocast) + + class CustomModel(keras.Model): + def __init__(self): + super().__init__(autocast=autocast) + + def call(self, inputs): + return inputs + + x = tf.constant([3], dtype=tf.float64) + + x_in = keras.Input((1,)) + output = CustomLayer()(x_in) + output = CustomModel()(output) + model = keras.Model(inputs=x_in, outputs=output) + + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + loaded = keras_load.load(saved_model_dir) + self.assertEqual(autocast, loaded.layers[-1]._autocast) + self.assertEqual(autocast, loaded.layers[-2]._autocast) + self.assertEqual(self.evaluate(model(x)), self.evaluate(loaded(x))) + + +class TestSavedModelFormat(tf.test.TestCase): + def _save_model_dir(self, dirname="saved_model"): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + return os.path.join(temp_dir, dirname) + + def test_load_with_custom_model_and_layer(self): + class CustomLayer(keras.layers.Layer): + def __call__(self, inputs): + return inputs + + class Model(keras.models.Model): + def __init__(self): + super().__init__() + self.layer = CustomLayer() # noqa: F821 + + @tf.function(input_signature=[tf.TensorSpec([None, 1])]) + def call(self, inputs): + return self.layer(inputs) + + model = Model() + inp = tf.constant([[1.0]]) + model(inp) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + # Even if the `CustomLayer` is not provided in `custom_object_scope`, + # `Model` still has that reference. + with object_registration.custom_object_scope({"Model": Model}): + loaded = keras_load.load(saved_model_dir) + self.assertAllEqual([[1.0]], self.evaluate(loaded(inp))) + self.assertAllEqual([[1.0]], self.evaluate(loaded.layer(inp))) + self.assertIsInstance(loaded.layer, CustomLayer) + + # If `CustomLayer` is provided in `custom_object_scope`, it should of + # course use that custom class. + with object_registration.custom_object_scope( + {"Model": Model, "CustomLayer": CustomLayer} + ): + loaded = keras_load.load(saved_model_dir) + self.assertAllEqual([[1.0]], self.evaluate(loaded(inp))) + self.assertAllEqual([[1.0]], self.evaluate(loaded.layer(inp))) + self.assertIsInstance(loaded.layer, CustomLayer) + + def test_save_without_tracing(self): + class DoNotTrace(keras.layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = keras.layers.InputSpec(shape=[None]) + self.built = True + + def call(self, inputs): + raise ValueError("I said do not trace") + + def get_config(self): + return {} + + @property + def _use_input_spec_as_call_signature(self): + return True + + root = keras.models.Sequential() + root.add(keras.layers.Input(shape=(3,))) + root.attached_layer = DoNotTrace() + + saved_model_dir = self._save_model_dir() + + # With the default settings, the call function is traced. + with self.assertRaisesRegex(ValueError, "do not trace"): + root.save(saved_model_dir, save_format="tf") + + # When saving the config only, the layer call function should not be not + # traced. + root.save(saved_model_dir, save_format="tf", save_traces=False) + loaded = tf.saved_model.load(saved_model_dir) + self.assertTrue(hasattr(loaded, "attached_layer")) + + # This should raise an error when loaded without the custom object + loaded = keras_load.load(saved_model_dir) + with self.assertRaisesRegex(ValueError, "Cannot call custom layer"): + loaded.attached_layer(tf.constant([1.0])) + + # Try loading with the custom objects + with object_registration.CustomObjectScope({"DoNotTrace": DoNotTrace}): + loaded = keras_load.load(saved_model_dir) + with self.assertRaisesRegex(ValueError, "I said do not trace"): + loaded.attached_layer(tf.constant([1.0])) + + def test_load_non_keras_saved_model(self): + model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) + saved_model_dir = self._save_model_dir() + tf.saved_model.save(model, saved_model_dir) + with self.assertRaisesRegex( + ValueError, "Unable to create a Keras model" + ): + keras_load.load(saved_model_dir) + + def test_random_generator_custom_layer(self): + class CustomDropout(keras.layers.Layer): + def __init__(self, dropout_rate=0.1, **kwargs): + super().__init__(**kwargs) + self.dropout_rate = dropout_rate + self.dropout = keras.layers.Dropout( + dropout_rate, rng_type="stateful" + ) + + def call(self, inputs, training=False): + return self.dropout(inputs, training=training) + + root = keras.models.Sequential( + [keras.layers.Input(shape=(3,)), CustomDropout()] + ) + saved_model_dir = self._save_model_dir() + root.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + + output = loaded(tf.random.uniform([1, 3]), training=True) + self.assertAllEqual([1, 3], output.shape) + + def test_random_generator_with_tracing(self): + # This test is to ensure we trace the training = True function first, + # otherwise tf.function will raise error about creating variables in the + # non-first call. + class LayerWithDropout(keras.layers.Layer): + def __init__(self, dropout_rate): + super().__init__() + self.dropout_rate = dropout_rate + self.dropout_layer = keras.layers.Dropout(self.dropout_rate) + + def call(self, inputs, training=None): + if not training: + return inputs + else: + return self.dropout_layer(inputs, training=training) + + root = keras.models.Sequential( + [keras.layers.Input(shape=(3,)), LayerWithDropout(0.1)] + ) + saved_model_dir = self._save_model_dir() + root.save(saved_model_dir, save_format="tf") + + loaded = keras_load.load(saved_model_dir) + + output = loaded(tf.random.uniform([1, 3]), training=True) + self.assertAllEqual([1, 3], output.shape) + + +class TestLayerCallTracing(tf.test.TestCase, parameterized.TestCase): + def test_functions_have_same_trace(self): + class Layer(keras.engine.base_layer.Layer): + def call(self, inputs): + return inputs + + def call2(self, inputs): + return inputs * 2 + + layer = Layer() + + call_collection = keras_save.LayerCallCollection(layer) + fn = call_collection.add_function(layer.call, "call", True) + fn2 = call_collection.add_function(layer.call2, "call2", True) + + with keras_save.tracing_scope(): + fn(np.ones((2, 3))) + fn(np.ones((4, 5))) + + self.assertLen( + fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2 + ) + self.assertLen( + fn2.wrapped_call._list_all_concrete_functions_for_serialization(), 2 + ) + + # Check that the shapes are correct + self.assertEqual( + {(2, 3), (4, 5)}, + set( + tuple(c.structured_input_signature[0][0].shape.as_list()) + for c in fn2.wrapped_call._list_all_concrete_functions_for_serialization() # noqa: E501 + ), + ) + + def test_training_arg_replacement(self): + def assert_num_traces(layer_cls, training_keyword): + layer = layer_cls() + call_collection = keras_save.LayerCallCollection(layer) + fn = call_collection.add_function(layer.call, "call", True) + + with keras_save.tracing_scope(): + fn(np.ones((2, 3)), training=True) + self.assertLen( + fn.wrapped_call._list_all_concrete_functions_for_serialization(), # noqa: E501 + 2, + ) + with keras_save.tracing_scope(): + fn(np.ones((2, 4)), training=False) + self.assertLen( + fn.wrapped_call._list_all_concrete_functions_for_serialization(), # noqa: E501 + 4, + ) + + if training_keyword: + with keras_save.tracing_scope(): + fn(np.ones((2, 5)), True) + self.assertLen( + fn.wrapped_call._list_all_concrete_functions_for_serialization(), # noqa: E501 + 6, + ) + with keras_save.tracing_scope(): + fn(np.ones((2, 6))) + self.assertLen( + fn.wrapped_call._list_all_concrete_functions_for_serialization(), # noqa: E501 + 8, + ) + + class LayerWithTrainingKeyword(keras.engine.base_layer.Layer): + def call(self, inputs, training=False): + return inputs * training + + assert_num_traces(LayerWithTrainingKeyword, training_keyword=True) + + class LayerWithKwargs(keras.engine.base_layer.Layer): + def call(self, inputs, **kwargs): + return inputs * kwargs["training"] + + assert_num_traces(LayerWithKwargs, training_keyword=False) + + class LayerWithChildLayer(keras.engine.base_layer.Layer): + def __init__(self): + self.child = LayerWithKwargs() + super().__init__() + + def call(self, inputs): + return self.child(inputs) + + assert_num_traces(LayerWithChildLayer, training_keyword=False) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_maintains_losses(self): + layer = LayerWithLoss() + layer(np.ones((2, 3))) + previous_losses = layer.losses[:] + + call_collection = keras_save.LayerCallCollection(layer) + fn = call_collection.add_function(layer.call, "call", True) + fn(np.ones((2, 3))) + + self.assertAllEqual( + self.evaluate(previous_losses), self.evaluate(layer.losses) + ) + + +@object_registration.register_keras_serializable("Testing") +class CustomMeanMetric(keras.metrics.Mean): + def update_state(self, *args): + # Sometimes built-in metrics return an op in update_state. Custom + # metrics don't support returning ops, so wrap the update_state method + # while returning nothing. + super().update_state(*args) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) +class MetricTest(tf.test.TestCase, parameterized.TestCase): + def _save_model_dir(self, dirname="saved_model"): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + return os.path.join(temp_dir, dirname) + + def generate_inputs(self, num_tensor_args, shape=(1, 5)): + return [ + np.random.uniform(0, 1, shape).astype("float32") + for _ in range(num_tensor_args) + ] + + def _test_metric_save_and_load( + self, + metric, + save_dir, + num_tensor_args, + shape=(1, 5), + test_sample_weight=True, + ): + with self.cached_session(): + model = test_utils.get_model_from_layers( + [keras.layers.Layer()], input_shape=[3], model_type="functional" + ) + model.saved_metric = metric + model.save(save_dir, save_format="tf") + loaded_model = keras_load.load(save_dir) + loaded = loaded_model.saved_metric + self.evaluate([v.initializer for v in loaded.variables]) + self.assertEqual(metric.name, loaded.name) + self.assertEqual(metric.dtype, loaded.dtype) + + inputs = self.generate_inputs(num_tensor_args, shape) + actual = self.evaluate(metric(*inputs)) + self.assertAllClose(actual, loaded(*inputs)) + self.assertAllClose(metric.variables, loaded.variables) + + # Test with separate calls to update state and result. + inputs = self.generate_inputs(num_tensor_args, shape) + self.evaluate(metric.update_state(*inputs)) + self.evaluate(loaded.update_state(*inputs)) + actual = self.evaluate(metric.result()) + self.assertAllClose(actual, loaded.result()) + + if test_sample_weight: + # Test with sample weights input. + inputs = self.generate_inputs(num_tensor_args, shape) + sample_weight = self.generate_inputs(1, [])[0] + inputs.append(sample_weight) + + actual = self.evaluate(metric(*inputs)) + self.assertAllClose(actual, loaded(*inputs)) + return loaded + + @parameterized.named_parameters( + [ + ("mean", keras.metrics.Mean, 1, (1, 5)), + ("false_positives", keras.metrics.FalsePositives, 2, (1, 5)), + ( + "precision_at_top_k", + keras.metrics.Precision, + 2, + (2, 3, 4), + {"top_k": 2, "class_id": 1}, + ), + ( + "precision_at_recall", + keras.metrics.PrecisionAtRecall, + 2, + (1, 5), + {"recall": 0.8}, + ), + ("auc", keras.metrics.AUC, 2, (1, 5), {"multi_label": True}), + ("cosine_similarity", keras.metrics.CosineSimilarity, 2, (2, 3, 1)), + ] + ) + def test_metric(self, metric_cls, num_tensor_args, shape, init_kwargs=None): + init_kwargs = init_kwargs or {} + metric = metric_cls(**init_kwargs) + metric(*self.generate_inputs(num_tensor_args, shape)) + self.evaluate([v.initializer for v in metric.variables]) + loaded = self._test_metric_save_and_load( + metric, self._save_model_dir(), num_tensor_args, shape + ) + self.assertEqual(type(loaded), type(metric)) + + @parameterized.named_parameters( + [ + ("mean", keras.metrics.Mean, 1, False), + ("auc", keras.metrics.AUC, 2, False), + ("mean_tensor", keras.metrics.MeanTensor, 1, True), + ] + ) + def test_custom_metric(self, base_cls, num_tensor_args, requires_build): + class CustomMetric(base_cls): + def update_state(self, *args): + # Sometimes built-in metrics return an op in update_state. + # Custom metrics don't support returning ops, so wrap the + # update_state method while returning nothing. + super().update_state(*args) + + with self.cached_session(): + metric = CustomMetric() + save_dir = self._save_model_dir("first_save") + + if requires_build: + metric(*self.generate_inputs(num_tensor_args)) + + self.evaluate([v.initializer for v in metric.variables]) + + with self.assertRaisesRegex( + ValueError, "Unable to restore custom object" + ): + self._test_metric_save_and_load( + metric, save_dir, num_tensor_args + ) + with object_registration.CustomObjectScope( + {"CustomMetric": CustomMetric} + ): + loaded = self._test_metric_save_and_load( + metric, save_dir, num_tensor_args, test_sample_weight=False + ) + + self._test_metric_save_and_load( + loaded, + self._save_model_dir("second_save"), + num_tensor_args, + test_sample_weight=False, + ) + + def test_registered_custom_metric(self): + + with self.cached_session(): + metric = CustomMeanMetric() + save_dir = self._save_model_dir("first_save") + self.evaluate([v.initializer for v in metric.variables]) + loaded = self._test_metric_save_and_load( + metric, save_dir, num_tensor_args=1, test_sample_weight=False + ) + + self._test_metric_save_and_load( + loaded, + self._save_model_dir("second_save"), + num_tensor_args=1, + test_sample_weight=False, + ) + + def test_custom_metric_wrapped_call(self): + class NegativeMean(keras.metrics.Mean): + @tf.function(input_signature=[tf.TensorSpec(None, tf.float32)]) + def update_state(self, value): + super().update_state(-value) + + metric = NegativeMean() + self.evaluate([v.initializer for v in metric.variables]) + with object_registration.CustomObjectScope( + {"NegativeMean": NegativeMean} + ): + self._test_metric_save_and_load( + metric, self._save_model_dir(), 1, test_sample_weight=False + ) + + @test_combinations.run_with_all_model_types + def test_custom_metric_model(self): + # TODO(b/134519980): Issue with `model.fit` if the model call function + # uses a `tf.function` in graph mode. + if not tf.executing_eagerly(): + return + + x = np.random.random((1, 3)) + y = np.random.random((1, 4)) + + class CustomMetric(keras.metrics.MeanSquaredError): + pass + + def zero_metric(y_true, y_pred): + del y_true, y_pred + return 0 + + model = test_utils.get_small_mlp(1, 4, input_dim=3) + model.compile( + loss="mse", optimizer="SGD", metrics=[CustomMetric(), zero_metric] + ) + model.fit(x, y) + saved_model_dir = self._save_model_dir() + model.save(saved_model_dir, save_format="tf") + + with self.assertRaisesRegex(ValueError, "custom_objects"): + keras_load.load(saved_model_dir) + + with object_registration.CustomObjectScope( + {"CustomMetric": CustomMetric, "zero_metric": zero_metric} + ): + loaded = keras_load.load(saved_model_dir) + + self.evaluate([v.initializer for v in loaded.variables]) + loaded.fit(x, y) + + +class TestUpdateMetadata(tf.test.TestCase): + def testAddFullSaveSpec(self): + save_spec = tf.TensorSpec([3, 5], dtype=tf.int32) + node_metadata = json_utils.Encoder().encode({"save_spec": save_spec}) + + metadata = saved_metadata_pb2.SavedMetadata() + metadata.nodes.add( + version=versions_pb2.VersionDef( + producer=1, min_consumer=1, bad_consumers=[] + ), + identifier="_tf_keras_model", + metadata=node_metadata, + ) + + new_metadata = keras_load._update_to_current_version(metadata) + node_metadata = json_utils.decode(new_metadata.nodes[0].metadata) + expected_full_spec = ([tf.TensorSpec(shape=(3, 5), dtype=tf.int32)], {}) + self.assertAllEqual( + expected_full_spec, node_metadata.get("full_save_spec") + ) + + +if __name__ == "__main__": + with saved_model_utils.keras_option_scope( + save_traces=False, in_tf_saved_model_scope=True + ): + tf.test.main() diff --git a/keras/saving/legacy/saved_model/serialized_attributes.py b/keras/saving/legacy/saved_model/serialized_attributes.py new file mode 100644 index 000000000000..6780ad669b94 --- /dev/null +++ b/keras/saving/legacy/saved_model/serialized_attributes.py @@ -0,0 +1,376 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Helper classes that list&validate all attributes to serialize to SavedModel. +""" + +import tensorflow.compat.v2 as tf + +from keras.saving.legacy.saved_model import constants +from keras.saving.legacy.saved_model import order_preserving_set as ops +from keras.saving.legacy.saved_model import save_impl +from keras.utils.generic_utils import LazyLoader + +# TODO(b/134426265): Switch back to single-quotes to match the rest of the file +# once the issue with copybara is fixed. + +base_layer = LazyLoader("base_layer", globals(), "keras.engine.base_layer") +training_lib = LazyLoader("training_lib", globals(), "keras.engine.training") +metrics = LazyLoader("metrics", globals(), "keras.metrics") +base_rnn = LazyLoader("base_rnn", globals(), "keras.layers.rnn.base_rnn") + + +class SerializedAttributes: + """Class that tracks and validates all serialization attributes. + + Keras models contain many Python-defined components. For example, the + trainable_variable property lists the model's trainable variables by + recursively retrieving the trainable variables from each of the child + layers. Another example is model.call, a python function that calls child + layers and adds ops to the backend graph. + + Only Tensorflow checkpointable objects and functions can be serialized to + SavedModel. Serializing a Keras model as-is results in a checkpointable + object that does not resemble a Keras model at all. Thus, extra + checkpointable objects and functions must be created during serialization. + + **Defining new serialized attributes** + Child classes should be defined using: + SerializedAttributes.with_attributes( + 'name', checkpointable_objects=[...], + functions=[...], copy_from=[...]) + This class is used to cache generated checkpointable objects and functions, + ensuring that new objects and functions are generated a single time. + + **Usage during serialization** + Each Layer/Model object should have a corresponding instance of + SerializedAttributes. Create a new instance by calling + `SerializedAttributes.new(obj)`. Objects and functions may be saved using + `.set_and_validate_checkpointable_objects`/`.set_and_and_validate_functions`. + The properties `.checkpointable_objects` and `.functions` returns the cached + values. + + **Adding/changing attributes to save to SavedModel** + 1. Change the call to `SerializedAttributes.with_attributes` in the correct + class: + - CommonEndpoints: Base attributes to be added during serialization. If + these attributes are present in a Trackable object, it can be + deserialized to a Keras Model. + - LayerAttributes: Attributes to serialize for Layer objects. + - ModelAttributes: Attributes to serialize for Model objects. + 2. Update class docstring + 3. Update arguments to any calls to `set_and_validate_*`. For example, if + `call_raw_tensors` is added to the ModelAttributes function list, then + a `call_raw_tensors` function should be passed to + `set_and_validate_functions`. + + **Common endpoints vs other attributes** + Only common endpoints are attached directly to the root object. + Keras-specific attributes are saved to a separate trackable object with the + name "keras_api". The number of objects attached to the root is limited + because any naming conflicts will cause user code to break. + + Another reason is that this will only affect users who call + `tf.saved_model.load` instead of `tf.keras.models.load_model`. These are + advanced users who are likely to have defined their own tf.functions and + trackable objects. The added Keras-specific attributes are kept out of the + way in the "keras_api" namespace. + + Properties defined in this class may be used to filter out keras-specific + attributes: + - `functions_to_serialize`: Returns dict of functions to attach to the root + object. + - `checkpointable_objects_to_serialize`: Returns dict of objects to attach + to the root object (including separate trackable object containing + keras-specific attributes) + + All changes to the serialized attributes must be backwards-compatible, so + attributes should not be removed or modified without sufficient + justification. + """ + + @staticmethod + def with_attributes( + name, checkpointable_objects=None, functions=None, copy_from=None + ): + """Creates a subclass with all attributes as specified in the arguments. + + Args: + name: Name of subclass + checkpointable_objects: List of checkpointable objects to be + serialized in the SavedModel. + functions: List of functions to be serialized in the SavedModel. + copy_from: List of other SerializedAttributes subclasses. The returned + class will copy checkpoint objects/functions from each subclass. + + Returns: + Child class with attributes as defined in the `checkpointable_objects` + and `functions` lists. + """ + checkpointable_objects = checkpointable_objects or [] + functions = functions or [] + + if copy_from is not None: + for cls in copy_from: + checkpointable_objects.extend(cls.all_checkpointable_objects) + functions.extend(cls.all_functions) + + # OrderPreservingSets are used here to guarantee serialization + # determinism of Keras objects. + classdict = { + "all_checkpointable_objects": ops.OrderPreservingSet( + checkpointable_objects + ), + "all_functions": ops.OrderPreservingSet(functions), + } + return type(name, (SerializedAttributes,), classdict) + + @staticmethod + def new(obj): + """Returns a new SerializedAttribute object.""" + if isinstance(obj, training_lib.Model): + return ModelAttributes() + elif isinstance(obj, metrics.Metric): + return MetricAttributes() + elif isinstance(obj, base_rnn.RNN): + return RNNAttributes() + elif isinstance(obj, base_layer.Layer): + return LayerAttributes() + else: + raise TypeError( + "Internal error during serialization. Expected Keras " + f"Layer object. Received: {obj} " + f"(of type {type(obj)})" + ) + + def __init__(self): + self._object_dict = {} + self._function_dict = {} + self._keras_trackable = tf.__internal__.tracking.AutoTrackable() + + @property + def functions(self): + """Returns dictionary of all functions.""" + return { + key: value + for key, value in self._function_dict.items() + if value is not None + } + + @property + def checkpointable_objects(self): + """Returns dictionary of all checkpointable objects.""" + return { + key: value + for key, value in self._object_dict.items() + if value is not None + } + + @property + def functions_to_serialize(self): + """Returns functions to attach to the root object during + serialization.""" + functions = {} + for key, v in self.functions.items(): + if key in CommonEndpoints.all_functions: + functions[key] = ( + v.wrapped_call if isinstance(v, save_impl.LayerCall) else v + ) + return functions + + @property + def objects_to_serialize(self): + """Returns objects to attach to the root object during serialization.""" + objects = { + key: value + for key, value in self.checkpointable_objects.items() + if key in CommonEndpoints.all_checkpointable_objects + } + objects[constants.KERAS_ATTR] = self._keras_trackable + return objects + + def set_and_validate_functions(self, function_dict): + """Saves function dictionary, and validates dictionary values.""" + for key in self.all_functions: + if key in function_dict: + if function_dict[ + key + # Not all functions are required + ] is not None and not isinstance( + function_dict[key], + ( + tf.__internal__.function.Function, + tf.types.experimental.ConcreteFunction, + save_impl.LayerCall, + ), + ): + raise ValueError( + "The tf.function dictionary contained a non-function " + f"object: {function_dict[key]} (for key {key}). Only " + "tf.function instances or ConcreteFunction instances " + "should be passed." + ) + fn = function_dict[key] + self._function_dict[key] = fn + + # Extract TensorFlow `Function` from LayerCall. + tf_fn = ( + fn.wrapped_call + if isinstance(fn, save_impl.LayerCall) + else fn + ) + setattr(self._keras_trackable, key, tf_fn) + else: + raise ValueError( + f"Function {key} missing from serialized " + "tf.function dictionary." + ) + return self.functions + + def set_and_validate_objects(self, object_dict): + """Saves objects to a dictionary, and validates the values.""" + for key in self.all_checkpointable_objects: + if key in object_dict: + if not isinstance( + object_dict[key], tf.__internal__.tracking.Trackable + ): + raise ValueError( + "The object dictionary contained a non-trackable " + f"object: {object_dict[key]} (for key {key}). " + "Only trackable objects are " + "allowed, such as Keras layers/models or " + "tf.Module instances." + ) + self._object_dict[key] = object_dict[key] + setattr(self._keras_trackable, key, object_dict[key]) + else: + raise ValueError( + f"Object {key} missing from serialized object dictionary." + ) + return self.checkpointable_objects + + +class CommonEndpoints( + SerializedAttributes.with_attributes( + "CommonEndpoints", + checkpointable_objects=[ + "variables", + "trainable_variables", + "regularization_losses", + ], + functions=[ + "__call__", + "call_and_return_all_conditional_losses", + "_default_save_signature", + ], + ) +): + """Common endpoints shared by all models loadable by Keras. + + List of all attributes: + variables: List of all variables in the model and its sublayers. + trainable_variables: List of all trainable variables in the model and its + sublayers. + regularization_losses: List of all unconditional losses (losses not + dependent on the inputs) in the model and its sublayers. + __call__: Function that takes inputs and returns the outputs of the model + call function. + call_and_return_all_conditional_losses: Function that returns a tuple of + (call function outputs, list of all losses that depend on the inputs). + _default_save_signature: Traced model call function. This is only included + if the top level exported object is a Keras model. + """ + + +class LayerAttributes( + SerializedAttributes.with_attributes( + "LayerAttributes", + checkpointable_objects=[ + "non_trainable_variables", + "layers", + "metrics", + "layer_regularization_losses", + "layer_metrics", + ], + functions=[ + "call_and_return_conditional_losses", + "activity_regularizer_fn", + ], + copy_from=[CommonEndpoints], + ) +): + """Layer checkpointable objects + functions saved to the SavedModel. + + List of all attributes: + All attributes from CommonEndpoints + non_trainable_variables: List of non-trainable variables in the layer and + its sublayers. + layers: List of all sublayers. + metrics: List of all metrics in the layer and its sublayers. + call_and_return_conditional_losses: Function that takes inputs and returns + a tuple of (outputs of the call function, list of input-dependent + losses). The list of losses excludes the activity regularizer function, + which is separate to allow the deserialized Layer object to define a + different activity regularizer. + activity_regularizer_fn: Callable that returns the activity regularizer + loss + layer_regularization_losses: List of losses owned only by this layer. + layer_metrics: List of metrics owned by this layer. + """ + + +class ModelAttributes( + SerializedAttributes.with_attributes( + "ModelAttributes", copy_from=[LayerAttributes] + ) +): + """Model checkpointable objects + functions saved to the SavedModel. + + List of all attributes: + All attributes from LayerAttributes (including CommonEndpoints) + """ + + # TODO(kathywu): Add attributes `compile_losses` and `compile_metrics`, + # which list all losses and metrics defined by `model.compile`. + + +class MetricAttributes( + SerializedAttributes.with_attributes( + "MetricAttributes", + checkpointable_objects=["variables"], + functions=[], + ) +): + """Attributes that are added to Metric objects when saved to SavedModel. + + List of all attributes: + variables: list of all variables + """ + + pass + + +class RNNAttributes( + SerializedAttributes.with_attributes( + "RNNAttributes", + checkpointable_objects=["states"], + copy_from=[LayerAttributes], + ) +): + """RNN checkpointable objects + functions that are saved to the SavedModel. + + List of all attributes: + All attributes from LayerAttributes (including CommonEndpoints) + states: List of state variables + """ diff --git a/keras/saving/legacy/saved_model/utils.py b/keras/saving/legacy/saved_model/utils.py new file mode 100644 index 000000000000..62c49f7785b1 --- /dev/null +++ b/keras/saving/legacy/saved_model/utils.py @@ -0,0 +1,289 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utility functions shared between SavedModel saving/loading +implementations.""" + +import copy +import itertools +import threading +import types + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.engine import base_layer_utils +from keras.utils import control_flow_util +from keras.utils import tf_contextlib +from keras.utils.generic_utils import LazyLoader +from keras.utils.layer_utils import CallFunctionSpec + +training_lib = LazyLoader("training_lib", globals(), "keras.engine.training") + + +def use_wrapped_call( + layer, call_fn, call_spec, default_training_value=None, return_method=False +): + """Creates fn that adds losses returned by call_fn & returns the outputs. + + Args: + layer: A Keras layer object + call_fn: tf.function that takes layer inputs (and possibly a training + arg), and returns a tuple of (outputs, list of losses). + call_spec: The `CallFunctionSpec` for the layer's call function. + default_training_value: Default value of the training kwarg. If `None`, + the default is `tf.keras.backend.learning_phase()`. + return_method: Whether to return a method bound to the layer. + + Returns: + function that calls call_fn and returns the outputs. Losses returned by + call_fn are added to the layer losses. + """ + expects_training_arg = layer_uses_training_bool(layer) + + fn, arg_spec = maybe_add_training_arg( + call_spec, call_fn, expects_training_arg, default_training_value + ) + + def return_outputs_and_add_losses(*args, **kwargs): + """Returns the outputs from the layer call function, and adds the + losses.""" + if return_method: + args = args[1:] + + outputs, losses = fn(*args, **kwargs) + layer.add_loss(losses) + + # TODO(kathywu): This is a temporary hack. When a network of layers is + # revived from SavedModel, only the top-level layer will have losses. + # This causes issues in eager mode because the child layers may have + # graph losses (thus model.losses returns a mix of Eager and graph + # tensors). To fix this, whenever eager losses are added to one layer, + # add eager losses to all child layers. This causes `.losses` to only + # return eager losses. + + if tf.executing_eagerly(): + for i in layer._flatten_layers(): + if i is not layer: + i._eager_losses = [ + base_layer_utils.REVIVED_LOSS_PLACEHOLDER + ] + + return outputs + + decorated = tf.__internal__.decorator.make_decorator( + target=call_fn, + decorator_func=return_outputs_and_add_losses, + decorator_argspec=arg_spec, + ) + + if return_method: + return types.MethodType(decorated, layer) + else: + return decorated + + +def layer_uses_training_bool(layer): + """Returns whether this layer or any of its children uses the training + arg.""" + if layer._expects_training_arg: + return True + visited = {layer} + to_visit = list_all_layers(layer) + while to_visit: + layer = to_visit.pop() + if layer in visited: + continue + if getattr(layer, "_expects_training_arg", True): + return True + visited.add(layer) + to_visit.extend(list_all_layers(layer)) + return False + + +def list_all_layers(obj): + if isinstance(obj, training_lib.Model): + # Handle special case of Sequential, which doesn't return + # the `Input` layer. + return obj.layers + else: + return list(obj._flatten_layers(include_self=False, recursive=False)) + + +def list_all_layers_and_sublayers(obj): + s = set([obj]) + s.update( + itertools.chain.from_iterable( + list_all_layers_and_sublayers(layer) + for layer in list_all_layers(obj) + ) + ) + return s + + +def maybe_add_training_arg( + call_spec, wrapped_call, expects_training_arg, default_training_value +): + """Decorate call and optionally adds training argument. + + If a layer expects a training argument, this function ensures that + 'training' is present in the layer args or kwonly args, with the default + training value. + + Args: + call_spec: CallFunctionSpec of the layer. + wrapped_call: Wrapped call function. + expects_training_arg: Whether to include 'training' argument. + default_training_value: Default value of the training kwarg to include in + the arg spec. If `None`, the default is + `tf.keras.backend.learning_phase()`. + + Returns: + Tuple of ( + function that calls `wrapped_call` and sets the training arg, + Argspec of returned function or `None` if the argspec is unchanged) + """ + if not expects_training_arg: + return wrapped_call, None + + arg_spec = set_training_arg_spec( + call_spec.full_argspec, default_training_value + ) + call_spec = CallFunctionSpec(arg_spec) + + def wrap_with_training_arg(*args, **kwargs): + """Wrap the `wrapped_call` function, and set training argument.""" + try: + training = call_spec.get_arg_value( + "training", args, kwargs, inputs_in_args=True + ) + except KeyError: + training = None + + if training is None: + training = ( + default_training_value + or base_layer_utils.call_context().training + or backend.learning_phase() + ) + + args = list(args) + kwargs = kwargs.copy() + + def replace_training_and_call(training): + new_args, new_kwargs = call_spec.set_arg_value( + "training", training, args, kwargs, inputs_in_args=True + ) + return wrapped_call(*new_args, **new_kwargs) + + return control_flow_util.smart_cond( + training, + lambda: replace_training_and_call(True), + lambda: replace_training_and_call(False), + ) + + return wrap_with_training_arg, arg_spec + + +def set_training_arg_spec(arg_spec, default_training_value): + """Set `training=DEFAULT` argument in an ArgSpec.""" + if "training" in arg_spec.args: + # If `training` is already in the args list, try to set the default + # value. + index = arg_spec.args.index("training") + training_default_index = len(arg_spec.args) - index + defaults = ( + list(arg_spec.defaults) if arg_spec.defaults is not None else [] + ) + if ( + arg_spec.defaults + and len(arg_spec.defaults) >= training_default_index + and defaults[-training_default_index] is None + ): + defaults[-training_default_index] = default_training_value + return arg_spec._replace(defaults=defaults) + elif "training" not in arg_spec.kwonlyargs: + kwonlyargs = arg_spec.kwonlyargs + ["training"] + kwonlydefaults = copy.copy(arg_spec.kwonlydefaults) or {} + kwonlydefaults["training"] = default_training_value + return arg_spec._replace( + kwonlyargs=kwonlyargs, kwonlydefaults=kwonlydefaults + ) + + return arg_spec + + +class SaveOptionsContext(threading.local): + def __init__(self): + super().__init__() + self.save_traces = True + self.in_tf_saved_model_scope = False + + +_save_options_context = SaveOptionsContext() + + +@tf_contextlib.contextmanager +def keras_option_scope(save_traces, in_tf_saved_model_scope=True): + save_traces_previous_value = _save_options_context.save_traces + in_scope_previous_value = _save_options_context.in_tf_saved_model_scope + try: + _save_options_context.save_traces = save_traces + _save_options_context.in_tf_saved_model_scope = in_tf_saved_model_scope + yield + finally: + _save_options_context.save_traces = save_traces_previous_value + _save_options_context.in_tf_saved_model_scope = in_scope_previous_value + + +def should_save_traces(): + """Whether to trace layer functions-can be disabled in the save_traces + arg.""" + return _save_options_context.save_traces + + +def in_tf_saved_model_scope(): + return _save_options_context.in_tf_saved_model_scope + + +@tf_contextlib.contextmanager +def no_automatic_dependency_tracking_scope(obj): + """Context that disables automatic dependency tracking when assigning attrs. + + Objects that inherit from Autotrackable automatically creates dependencies + to trackable objects through attribute assignments, and wraps data + structures (lists or dicts) with trackable classes. This scope may be used + to temporarily disable this behavior. This works similar to the decorator + `no_automatic_dependency_tracking`. + + Example usage: + ``` + model = tf.keras.Model() + model.arr1 = [] # Creates a ListWrapper object + with no_automatic_dependency_tracking_scope(model): + model.arr2 = [] # Creates a regular, untracked python list + ``` + + Args: + obj: A trackable object. + + Yields: + a scope in which the object doesn't track dependencies. + """ + previous_value = getattr(obj, "_setattr_tracking", True) + obj._setattr_tracking = False + try: + yield + finally: + obj._setattr_tracking = previous_value diff --git a/keras/saving/legacy/saving_utils.py b/keras/saving/legacy/saving_utils.py new file mode 100644 index 000000000000..3522f2214bef --- /dev/null +++ b/keras/saving/legacy/saving_utils.py @@ -0,0 +1,371 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utils related to keras model saving.""" + +import copy +import os + +import tensorflow.compat.v2 as tf + +import keras +from keras import backend +from keras import losses +from keras import optimizers +from keras.engine import base_layer_utils +from keras.optimizers import optimizer_v1 +from keras.saving.legacy import serialization +from keras.utils import version_utils +from keras.utils.io_utils import ask_to_proceed_with_overwrite + +# isort: off +from tensorflow.python.platform import tf_logging as logging + + +def extract_model_metrics(model): + """Convert metrics from a Keras model `compile` API to dictionary. + + This is used for converting Keras models to Estimators and SavedModels. + + Args: + model: A `tf.keras.Model` object. + + Returns: + Dictionary mapping metric names to metric instances. May return `None` if + the model does not contain any metrics. + """ + if getattr(model, "_compile_metrics", None): + # TODO(psv/kathywu): use this implementation in model to estimator flow. + # We are not using model.metrics here because we want to exclude the + # metrics added using `add_metric` API. + return {m.name: m for m in model._compile_metric_functions} + return None + + +def model_call_inputs(model, keep_original_batch_size=False): + """Inspect model to get its input signature. + + The model's input signature is a list with a single (possibly-nested) + object. This is due to the Keras-enforced restriction that tensor inputs + must be passed in as the first argument. + + For example, a model with input {'feature1': , 'feature2': } + will have input signature: + [{'feature1': TensorSpec, 'feature2': TensorSpec}] + + Args: + model: Keras Model object. + keep_original_batch_size: A boolean indicating whether we want to keep + using the original batch size or set it to None. Default is `False`, + which means that the batch dim of the returned input signature will + always be set to `None`. + + Returns: + A tuple containing `(args, kwargs)` TensorSpecs of the model call function + inputs. + `kwargs` does not contain the `training` argument. + """ + input_specs = model.save_spec(dynamic_batch=not keep_original_batch_size) + if input_specs is None: + return None, None + input_specs = _enforce_names_consistency(input_specs) + return input_specs + + +def raise_model_input_error(model): + if isinstance(model, keras.models.Sequential): + raise ValueError( + f"Model {model} cannot be saved because the input shape is not " + "available. Please specify an input shape either by calling " + "`build(input_shape)` directly, or by calling the model on actual " + "data using `Model()`, `Model.fit()`, or `Model.predict()`." + ) + + # If the model is not a `Sequential`, it is intended to be a subclassed + # model. + raise ValueError( + f"Model {model} cannot be saved either because the input shape is not " + "available or because the forward pass of the model is not defined." + "To define a forward pass, please override `Model.call()`. To specify " + "an input shape, either call `build(input_shape)` directly, or call " + "the model on actual data using `Model()`, `Model.fit()`, or " + "`Model.predict()`. If you have a custom training step, please make " + "sure to invoke the forward pass in train step through " + "`Model.__call__`, i.e. `model(inputs)`, as opposed to `model.call()`." + ) + + +def trace_model_call(model, input_signature=None): + """Trace the model call to create a tf.function for exporting a Keras model. + + Args: + model: A Keras model. + input_signature: optional, a list of tf.TensorSpec objects specifying the + inputs to the model. + + Returns: + A tf.function wrapping the model's call function with input signatures + set. + + Raises: + ValueError: if input signature cannot be inferred from the model. + """ + if input_signature is None: + if isinstance(model.call, tf.__internal__.function.Function): + input_signature = model.call.input_signature + + if input_signature: + model_args = input_signature + model_kwargs = {} + else: + model_args, model_kwargs = model_call_inputs(model) + + if model_args is None: + raise_model_input_error(model) + + @tf.function + def _wrapped_model(*args, **kwargs): + """A concrete tf.function that wraps the model's call function.""" + (args, kwargs,) = model._call_spec.set_arg_value( + "training", False, args, kwargs, inputs_in_args=True + ) + + with base_layer_utils.call_context().enter( + model, inputs=None, build_graph=False, training=False, saving=True + ): + outputs = model(*args, **kwargs) + + # Outputs always has to be a flat dict. + output_names = model.output_names # Functional Model. + if output_names is None: # Subclassed Model. + from keras.engine import compile_utils + + output_names = compile_utils.create_pseudo_output_names(outputs) + outputs = tf.nest.flatten(outputs) + return {name: output for name, output in zip(output_names, outputs)} + + return _wrapped_model.get_concrete_function(*model_args, **model_kwargs) + + +def model_metadata(model, include_optimizer=True, require_config=True): + """Returns a dictionary containing the model metadata.""" + from keras import __version__ as keras_version + from keras.optimizers.legacy import optimizer_v2 + + model_config = {"class_name": model.__class__.__name__} + try: + model_config["config"] = model.get_config() + except NotImplementedError as e: + if require_config: + raise e + + metadata = dict( + keras_version=str(keras_version), + backend=backend.backend(), + model_config=model_config, + ) + if model.optimizer and include_optimizer: + if isinstance(model.optimizer, optimizer_v1.TFOptimizer): + logging.warning( + "TensorFlow optimizers do not " + "make it possible to access " + "optimizer attributes or optimizer state " + "after instantiation. " + "As a result, we cannot save the optimizer " + "as part of the model save file. " + "You will have to compile your model again after loading it. " + "Prefer using a Keras optimizer instead " + "(see keras.io/optimizers)." + ) + elif model._compile_was_called: + training_config = model._get_compile_args(user_metrics=False) + training_config.pop("optimizer", None) # Handled separately. + metadata["training_config"] = _serialize_nested_config( + training_config + ) + if isinstance(model.optimizer, optimizer_v2.RestoredOptimizer): + raise NotImplementedError( + "Optimizers loaded from a SavedModel cannot be saved. " + "If you are calling `model.save` or " + "`tf.keras.models.save_model`, " + "please set the `include_optimizer` option to `False`. For " + "`tf.saved_model.save`, " + "delete the optimizer from the model." + ) + else: + optimizer_config = { + "class_name": keras.utils.get_registered_name( + model.optimizer.__class__ + ), + "config": model.optimizer.get_config(), + } + metadata["training_config"]["optimizer_config"] = optimizer_config + return metadata + + +def should_overwrite(filepath, overwrite): + """Returns whether the filepath should be overwritten.""" + # If file exists and should not be overwritten. + if not overwrite and os.path.isfile(filepath): + return ask_to_proceed_with_overwrite(filepath) + return True + + +def compile_args_from_training_config(training_config, custom_objects=None): + """Return model.compile arguments from training config.""" + if custom_objects is None: + custom_objects = {} + + with keras.utils.CustomObjectScope(custom_objects): + optimizer_config = training_config["optimizer_config"] + optimizer = optimizers.deserialize(optimizer_config) + + # Recover losses. + loss = None + loss_config = training_config.get("loss", None) + if loss_config is not None: + loss = _deserialize_nested_config(losses.deserialize, loss_config) + + # Recover metrics. + metrics = None + metrics_config = training_config.get("metrics", None) + if metrics_config is not None: + metrics = _deserialize_nested_config( + _deserialize_metric, metrics_config + ) + + # Recover weighted metrics. + weighted_metrics = None + weighted_metrics_config = training_config.get("weighted_metrics", None) + if weighted_metrics_config is not None: + weighted_metrics = _deserialize_nested_config( + _deserialize_metric, weighted_metrics_config + ) + + sample_weight_mode = ( + training_config["sample_weight_mode"] + if hasattr(training_config, "sample_weight_mode") + else None + ) + loss_weights = training_config["loss_weights"] + + return dict( + optimizer=optimizer, + loss=loss, + metrics=metrics, + weighted_metrics=weighted_metrics, + loss_weights=loss_weights, + sample_weight_mode=sample_weight_mode, + ) + + +def _deserialize_nested_config(deserialize_fn, config): + """Deserializes arbitrary Keras `config` using `deserialize_fn`.""" + + def _is_single_object(obj): + if isinstance(obj, dict) and "class_name" in obj: + return True # Serialized Keras object. + if isinstance(obj, str): + return True # Serialized function or string. + return False + + if config is None: + return None + if _is_single_object(config): + return deserialize_fn(config) + elif isinstance(config, dict): + return { + k: _deserialize_nested_config(deserialize_fn, v) + for k, v in config.items() + } + elif isinstance(config, (tuple, list)): + return [ + _deserialize_nested_config(deserialize_fn, obj) for obj in config + ] + + raise ValueError( + "Saved configuration not understood. Configuration should be a " + f"dictionary, string, tuple or list. Received: config={config}." + ) + + +def _serialize_nested_config(config): + """Serialized a nested structure of Keras objects.""" + + def _serialize_fn(obj): + if callable(obj): + return serialization.serialize_keras_object(obj) + return obj + + return tf.nest.map_structure(_serialize_fn, config) + + +def _deserialize_metric(metric_config): + """Deserialize metrics, leaving special strings untouched.""" + from keras import metrics as metrics_module + + if metric_config in ["accuracy", "acc", "crossentropy", "ce"]: + # Do not deserialize accuracy and cross-entropy strings as we have + # special case handling for these in compile, based on model output + # shape. + return metric_config + return metrics_module.deserialize(metric_config) + + +def _enforce_names_consistency(specs): + """Enforces that either all specs have names or none do.""" + + def _has_name(spec): + return spec is None or (hasattr(spec, "name") and spec.name is not None) + + def _clear_name(spec): + spec = copy.deepcopy(spec) + if hasattr(spec, "name"): + spec._name = None + return spec + + flat_specs = tf.nest.flatten(specs) + name_inconsistency = any(_has_name(s) for s in flat_specs) and not all( + _has_name(s) for s in flat_specs + ) + + if name_inconsistency: + specs = tf.nest.map_structure(_clear_name, specs) + return specs + + +def try_build_compiled_arguments(model): + if ( + not version_utils.is_v1_layer_or_model(model) + and model.outputs is not None + ): + try: + if not model.compiled_loss.built: + model.compiled_loss.build(model.outputs) + if not model.compiled_metrics.built: + model.compiled_metrics.build(model.outputs, model.outputs) + except: # noqa: E722 + logging.warning( + "Compiled the loaded model, but the compiled metrics have " + "yet to be built. `model.compile_metrics` will be empty " + "until you train or evaluate the model." + ) + + +def is_hdf5_filepath(filepath): + return ( + filepath.endswith(".h5") + or filepath.endswith(".keras") + or filepath.endswith(".hdf5") + ) diff --git a/keras/saving/legacy/saving_utils_test.py b/keras/saving/legacy/saving_utils_test.py new file mode 100644 index 000000000000..3a34783f45e5 --- /dev/null +++ b/keras/saving/legacy/saving_utils_test.py @@ -0,0 +1,553 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for saving utility functions.""" + +import os + +import numpy as np +import tensorflow.compat.v2 as tf + +import keras +from keras import backend +from keras.engine import sequential +from keras.feature_column import dense_features +from keras.optimizers.legacy import gradient_descent +from keras.saving.legacy import saving_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + + +class TraceModelCallTest(test_combinations.TestCase): + def _assert_all_close(self, expected, actual): + if not tf.executing_eagerly(): + with self.cached_session() as sess: + backend._initialize_variables(sess) + self.assertAllClose(expected, actual) + else: + self.assertAllClose(expected, actual) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_trace_model_outputs(self): + input_dim = 5 if test_utils.get_model_type() == "functional" else None + model = test_utils.get_small_mlp(10, 3, input_dim) + inputs = tf.ones((8, 5)) + + if input_dim is None: + with self.assertRaisesRegex( + ValueError, ".*input shape is not availabl*" + ): + saving_utils.trace_model_call(model) + model._set_inputs(inputs) + + fn = saving_utils.trace_model_call(model) + signature_outputs = fn(inputs) + if model.output_names: + expected_outputs = {model.output_names[0]: model(inputs)} + else: + expected_outputs = {"output_1": model(inputs)} + + self._assert_all_close(expected_outputs, signature_outputs) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_trace_model_outputs_after_fitting(self): + input_dim = 5 if test_utils.get_model_type() == "functional" else None + model = test_utils.get_small_mlp(10, 3, input_dim) + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + x=np.random.random((8, 5)).astype(np.float32), + y=np.random.random((8, 3)).astype(np.float32), + epochs=2, + ) + + inputs = tf.ones((8, 5)) + + fn = saving_utils.trace_model_call(model) + signature_outputs = fn(inputs) + if model.output_names: + expected_outputs = {model.output_names[0]: model(inputs)} + else: + expected_outputs = {"output_1": model(inputs)} + + self._assert_all_close(expected_outputs, signature_outputs) + + @test_combinations.run_with_all_model_types(exclude_models="sequential") + @test_combinations.run_all_keras_modes + def test_trace_multi_io_model_outputs(self): + input_dim = 5 + num_classes = 3 + num_classes_b = 4 + input_a = keras.layers.Input(shape=(input_dim,), name="input_a") + input_b = keras.layers.Input(shape=(input_dim,), name="input_b") + + dense = keras.layers.Dense(num_classes, name="dense") + dense2 = keras.layers.Dense(num_classes_b, name="dense2") + dropout = keras.layers.Dropout(0.5, name="dropout") + branch_a = [input_a, dense] + branch_b = [input_b, dense, dense2, dropout] + + model = test_utils.get_multi_io_model(branch_a, branch_b) + + input_a_ts = tf.constant( + np.random.random((10, input_dim)).astype(np.float32) + ) + input_b_ts = tf.constant( + np.random.random((10, input_dim)).astype(np.float32) + ) + + if test_utils.get_model_type() == "subclass": + with self.assertRaisesRegex( + ValueError, ".*input shape is not availabl*" + ): + saving_utils.trace_model_call(model) + + model.compile( + optimizer="sgd", + loss="mse", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + x=[ + np.random.random((8, input_dim)).astype(np.float32), + np.random.random((8, input_dim)).astype(np.float32), + ], + y=[ + np.random.random((8, num_classes)).astype(np.float32), + np.random.random((8, num_classes_b)).astype(np.float32), + ], + epochs=2, + ) + + fn = saving_utils.trace_model_call(model) + # tf.function requires that the input structures match when calling a + # ConcreteFunction. For some reason V1 models defines the inputs as a + # list, while V2 models sets the inputs as a tuple. + if ( + not tf.executing_eagerly() + and test_utils.get_model_type() != "functional" + ): + signature_outputs = fn([input_a_ts, input_b_ts]) + else: + signature_outputs = fn((input_a_ts, input_b_ts)) + outputs = model([input_a_ts, input_b_ts]) + if model.output_names: + expected_outputs = { + model.output_names[0]: outputs[0], + model.output_names[1]: outputs[1], + } + else: + expected_outputs = {"output_1": outputs[0], "output_2": outputs[1]} + self._assert_all_close(expected_outputs, signature_outputs) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_trace_features_layer(self): + columns = [tf.feature_column.numeric_column("x")] + model = sequential.Sequential([dense_features.DenseFeatures(columns)]) + model_input = {"x": tf.constant([[1.0]])} + model.predict(model_input, steps=1) + fn = saving_utils.trace_model_call(model) + self.assertAllClose({"output_1": [[1.0]]}, fn(model_input)) + + columns = [ + tf.feature_column.numeric_column("x"), + tf.feature_column.numeric_column("y"), + ] + model = sequential.Sequential([dense_features.DenseFeatures(columns)]) + model_input = {"x": tf.constant([[1.0]]), "y": tf.constant([[2.0]])} + model.predict(model_input, steps=1) + fn = saving_utils.trace_model_call(model) + self.assertAllClose({"output_1": [[1.0, 2.0]]}, fn(model_input)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_specify_input_signature(self): + model = test_utils.get_small_sequential_mlp(10, 3, None) + inputs = tf.ones((8, 5)) + + with self.assertRaisesRegex( + ValueError, ".*input shape is not availabl*" + ): + saving_utils.trace_model_call(model) + + fn = saving_utils.trace_model_call( + model, [tf.TensorSpec(shape=[None, 5], dtype=tf.float32)] + ) + signature_outputs = fn(inputs) + if model.output_names: + expected_outputs = {model.output_names[0]: model(inputs)} + else: + expected_outputs = {"output_1": model(inputs)} + self._assert_all_close(expected_outputs, signature_outputs) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_subclassed_model_with_input_signature(self): + class Model(keras.Model): + def __init__(self): + super().__init__() + self.dense = keras.layers.Dense(3, name="dense") + + @tf.function( + input_signature=[ + [ + tf.TensorSpec([None, 5], tf.float32), + tf.TensorSpec([None], tf.float32), + ] + ], + ) + def call(self, inputs, *args): + x, y = inputs + return self.dense(x) + y + + model = Model() + fn = saving_utils.trace_model_call(model) + x = tf.ones((8, 5), dtype=tf.float32) + y = tf.ones((3,), dtype=tf.float32) + expected_outputs = {"output_1": model([x, y])} + signature_outputs = fn([x, y]) + self._assert_all_close(expected_outputs, signature_outputs) + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def test_model_with_fixed_input_dim(self): + """Ensure that the batch_dim is removed when saving. + + When serving or retraining, it is important to reset the batch dim. + This can be an issue inside of tf.function. See b/132783590 for context. + """ + model = test_utils.get_small_mlp(10, 3, 5) + + loss_object = keras.losses.MeanSquaredError() + optimizer = gradient_descent.SGD() + + @tf.function + def train_step(data, labels): + with tf.GradientTape() as tape: + predictions = model(data) + loss = loss_object(labels, predictions) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + x = np.random.random((8, 5)) + y = np.random.random((8, 3)) + + train_step(x, y) + + fn = saving_utils.trace_model_call(model) + self.assertEqual( + fn.structured_input_signature[0][0].shape.as_list(), + tf.TensorShape([None, 5]).as_list(), + ) + + +def _import_and_infer(save_dir, inputs): + """Import a SavedModel into a TF 1.x-style graph and run `signature_key`.""" + graph = tf.Graph() + with graph.as_default(), tf.compat.v1.Session() as session: + model = tf.compat.v1.saved_model.load( + session, [tf.saved_model.SERVING], save_dir + ) + signature = model.signature_def[ + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] + assert set(inputs.keys()) == set( + signature.inputs.keys() + ), f"expected {signature.inputs.keys()}, found {inputs.keys()}" + feed_dict = {} + for arg_name in inputs.keys(): + feed_dict[ + graph.get_tensor_by_name(signature.inputs[arg_name].name) + ] = inputs[arg_name] + output_dict = {} + for output_name, output_tensor_info in signature.outputs.items(): + output_dict[output_name] = graph.get_tensor_by_name( + output_tensor_info.name + ) + return session.run(output_dict, feed_dict=feed_dict) + + +class AutographedMetric(keras.metrics.Metric): + def build(self, input_shape): + pass + + def update_state(self, values): + if tf.constant(False): + x = 1 + else: + x = 2 + return x + + def reset_states(self): + pass + + def result(self): + return tf.constant(0) + + def GetMean(self): + return tf.constant(0) + + def GetCount(self): + return tf.constant(0) + + +class BasicAutographedMetricLayer(keras.layers.Layer): + def build(self, input_shape): + self._metric = AutographedMetric() + + def call(self, inp): + self._metric.update_state(inp) + # TODO(b/172853147): Test control flow here. + return inp + + +class BasicAutographedMetricModel(keras.models.Model): + def __init__(self): + super().__init__(name="test_model") + self._layer = BasicAutographedMetricLayer() + + def call(self, inputs, **kwargs): + return self._layer(inputs) + + +@test_combinations.run_with_all_model_types +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class ModelSaveTest(test_combinations.TestCase): + def test_model_save_preserves_autograph(self): + model = BasicAutographedMetricModel() + inputs = tf.ones((8, 5)) + model._set_inputs(inputs) + + save_dir = os.path.join(self.get_temp_dir(), "saved_model") + tf.saved_model.save(model, save_dir) + + if model.output_names: + output_name = model.output_names[0] + input_name = model.input_names[0] + else: + output_name = "output_1" + input_name = "input_1" + + self.assertAllClose( + {output_name: model.predict_on_batch(inputs)}, + _import_and_infer(save_dir, {input_name: np.ones((8, 5))}), + ) + + # Test v2 loading. + # TODO(mdan): tests using _import_and_infer should uniformly do this. + self.assertAllClose( + model.predict_on_batch(inputs), + tf.saved_model.load(save_dir)(inputs), + ) + + def test_model_save(self): + input_dim = 5 + model = test_utils.get_small_mlp(10, 3, input_dim) + inputs = tf.ones((8, 5)) + + if test_utils.get_model_type() == "subclass": + model._set_inputs(inputs) + + save_dir = os.path.join(self.get_temp_dir(), "saved_model") + tf.saved_model.save(model, save_dir) + + if model.output_names: + output_name = model.output_names[0] + input_name = model.input_names[0] + else: + output_name = "output_1" + input_name = "input_1" + + self.assertAllClose( + {output_name: model.predict_on_batch(inputs)}, + _import_and_infer(save_dir, {input_name: np.ones((8, 5))}), + ) + + +class ExtractModelMetricsTest(test_combinations.TestCase): + def test_extract_model_metrics(self): + # saving_utils.extract_model_metrics is used in V1 only API + # keras.experimental.export_saved_model. + with tf.Graph().as_default(): + a = keras.layers.Input(shape=(3,), name="input_a") + b = keras.layers.Input(shape=(3,), name="input_b") + + dense = keras.layers.Dense(4, name="dense") + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name="dropout")(c) + + model = keras.models.Model([a, b], [d, e]) + extract_metrics = saving_utils.extract_model_metrics(model) + self.assertEqual(None, extract_metrics) + + extract_metric_names = [ + "dense_binary_accuracy", + "dropout_binary_accuracy", + "dense_mean_squared_error", + "dropout_mean_squared_error", + ] + if tf.__internal__.tf2.enabled(): + extract_metric_names.extend(["dense_mae", "dropout_mae"]) + else: + extract_metric_names.extend( + ["dense_mean_absolute_error", "dropout_mean_absolute_error"] + ) + + model_metric_names = [ + "loss", + "dense_loss", + "dropout_loss", + ] + extract_metric_names + model.compile( + loss="mae", + metrics=[ + keras.metrics.BinaryAccuracy(), + "mae", + keras.metrics.mean_squared_error, + ], + optimizer=tf.compat.v1.train.RMSPropOptimizer( + learning_rate=0.01 + ), + ) + extract_metrics = saving_utils.extract_model_metrics(model) + self.assertEqual(set(model_metric_names), set(model.metrics_names)) + self.assertEqual( + set(extract_metric_names), set(extract_metrics.keys()) + ) + + +class UnbuiltModelSavingErrorMessageTest(test_combinations.TestCase): + def setUp(self): + super().setUp() + if not tf.__internal__.tf2.enabled(): + self.skipTest("The test does not intend to cover TF1.") + + def test_sequential(self): + model = sequential.Sequential([keras.layers.Dense(10)]) + optimizer = gradient_descent.SGD() + model.compile(optimizer, loss="mse", steps_per_execution=10) + + # Forward pass not called yet. Input shape not available and thus error. + with self.assertRaisesRegex( + ValueError, + "Model.*cannot be saved." + "*specify an input shape either by calling.*", + ): + model.save(os.path.join(self.get_temp_dir(), "my_saved_model")) + + def test_functional(self): + inputs = keras.Input(shape=(32,)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + + x = np.random.random((1000, 32)) + y = np.random.random((1000, 1)) + model.fit(x, y, epochs=3) + + # Functional model always has an input shape, so should save just fine. + model.save(os.path.join(self.get_temp_dir(), "my_saved_model")) + + def test_subclass_forward_pass_by_layer_underscore_call(self): + class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dense1 = keras.layers.Dense(1) + + def train_step(self, data): + x, y = data + with tf.GradientTape() as tape: + y_pred = self.dense1(x, training=True) + loss = self.compiled_loss(y, y_pred) + + gradients = tape.gradient(loss, self.trainable_variables) + self.optimizer.apply_gradients( + zip(gradients, self.trainable_variables) + ) + return {} + + subclassed_model = CustomModel() + subclassed_model.compile(optimizer="adam", loss="mse") + + x = np.random.random((1000, 32)) + y = np.random.random((1000, 1)) + subclassed_model.fit(x, y, epochs=1) + + # Saving of this subclassed model is supposed to raise an error, even if + # `fit` has been called. This is because the model does not have + # `call()` overridden. Forward pass using `layer.__call__` works for + # training, but saving requires that `call()` be used. + with self.assertRaisesRegex( + ValueError, + r"Model.*cannot be saved.*as opposed to `model.call\(\).*", + ): + subclassed_model.save( + os.path.join(self.get_temp_dir(), "my_saved_model") + ) + + def test_subclass_forward_pass_by_model_call(self): + class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dense1 = keras.layers.Dense(1) + + def call(self, inputs): + return self.dense1(inputs) + + def train_step(self, data): + x, y = data + with tf.GradientTape() as tape: + y_pred = self.call(x) + loss = self.compiled_loss(y, y_pred) + + gradients = tape.gradient(loss, self.trainable_variables) + self.optimizer.apply_gradients( + zip(gradients, self.trainable_variables) + ) + return {} + + subclassed_model = CustomModel() + subclassed_model.compile(optimizer="adam", loss="mse") + + x = np.random.random((1000, 32)) + y = np.random.random((1000, 1)) + subclassed_model.fit(x, y, epochs=1) + + # Saving of this subclassed model is supposed to raise an error, even if + # `fit` has been called. This is because the model has `call()` + # overridden, but the forward pass uses `Model.call` as opposed to + # `Model.__call__`, and as a result the `Model` is not really built. The + # error message hints the user to use `Model.__call__`, i.e., + # `Model(inputs)` instead. + with self.assertRaisesRegex( + ValueError, + r"Model.*cannot be saved.*as opposed to `model.call\(\).*", + ): + subclassed_model.save( + os.path.join(self.get_temp_dir(), "my_saved_model") + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/legacy/serialization.py b/keras/saving/legacy/serialization.py new file mode 100644 index 000000000000..7d55d92f58ca --- /dev/null +++ b/keras/saving/legacy/serialization.py @@ -0,0 +1,570 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Legacy serialization logic for Keras models.""" + +import threading +import weakref + +import tensorflow.compat.v2 as tf + +from keras.utils import tf_contextlib +from keras.utils import tf_inspect + +# isort: off +from tensorflow.python.util.tf_export import keras_export + +# Flag that determines whether to skip the NotImplementedError when calling +# get_config in custom models and layers. This is only enabled when saving to +# SavedModel, when the config isn't required. +_SKIP_FAILED_SERIALIZATION = False +# If a layer does not have a defined config, then the returned config will be a +# dictionary with the below key. +_LAYER_UNDEFINED_CONFIG_KEY = "layer was saved without config" + +# Store a unique, per-object ID for shared objects. +# +# We store a unique ID for each object so that we may, at loading time, +# re-create the network properly. Without this ID, we would have no way of +# determining whether a config is a description of a new object that +# should be created or is merely a reference to an already-created object. +SHARED_OBJECT_KEY = "shared_object_id" + +SHARED_OBJECT_DISABLED = threading.local() +SHARED_OBJECT_LOADING = threading.local() +SHARED_OBJECT_SAVING = threading.local() + + +# Attributes on the threadlocal variable must be set per-thread, thus we +# cannot initialize these globally. Instead, we have accessor functions with +# default values. +def _shared_object_disabled(): + """Get whether shared object handling is disabled in a threadsafe manner.""" + return getattr(SHARED_OBJECT_DISABLED, "disabled", False) + + +def _shared_object_loading_scope(): + """Get the current shared object saving scope in a threadsafe manner.""" + return getattr(SHARED_OBJECT_LOADING, "scope", NoopLoadingScope()) + + +def _shared_object_saving_scope(): + """Get the current shared object saving scope in a threadsafe manner.""" + return getattr(SHARED_OBJECT_SAVING, "scope", None) + + +class DisableSharedObjectScope: + """A context manager for disabling handling of shared objects. + + Disables shared object handling for both saving and loading. + + Created primarily for use with `clone_model`, which does extra surgery that + is incompatible with shared objects. + """ + + def __enter__(self): + SHARED_OBJECT_DISABLED.disabled = True + self._orig_loading_scope = _shared_object_loading_scope() + self._orig_saving_scope = _shared_object_saving_scope() + + def __exit__(self, *args, **kwargs): + SHARED_OBJECT_DISABLED.disabled = False + SHARED_OBJECT_LOADING.scope = self._orig_loading_scope + SHARED_OBJECT_SAVING.scope = self._orig_saving_scope + + +class NoopLoadingScope: + """The default shared object loading scope. It does nothing. + + Created to simplify serialization code that doesn't care about shared + objects (e.g. when serializing a single object). + """ + + def get(self, unused_object_id): + return None + + def set(self, object_id, obj): + pass + + +class SharedObjectLoadingScope: + """A context manager for keeping track of loaded objects. + + During the deserialization process, we may come across objects that are + shared across multiple layers. In order to accurately restore the network + structure to its original state, `SharedObjectLoadingScope` allows us to + re-use shared objects rather than cloning them. + """ + + def __enter__(self): + if _shared_object_disabled(): + return NoopLoadingScope() + + global SHARED_OBJECT_LOADING + SHARED_OBJECT_LOADING.scope = self + self._obj_ids_to_obj = {} + return self + + def get(self, object_id): + """Given a shared object ID, returns a previously instantiated object. + + Args: + object_id: shared object ID to use when attempting to find + already-loaded object. + + Returns: + The object, if we've seen this ID before. Else, `None`. + """ + # Explicitly check for `None` internally to make external calling code a + # bit cleaner. + if object_id is None: + return + return self._obj_ids_to_obj.get(object_id) + + def set(self, object_id, obj): + """Stores an instantiated object for future lookup and sharing.""" + if object_id is None: + return + self._obj_ids_to_obj[object_id] = obj + + def __exit__(self, *args, **kwargs): + global SHARED_OBJECT_LOADING + SHARED_OBJECT_LOADING.scope = NoopLoadingScope() + + +class SharedObjectConfig(dict): + """A configuration container that keeps track of references. + + `SharedObjectConfig` will automatically attach a shared object ID to any + configs which are referenced more than once, allowing for proper shared + object reconstruction at load time. + + In most cases, it would be more proper to subclass something like + `collections.UserDict` or `collections.Mapping` rather than `dict` directly. + Unfortunately, python's json encoder does not support `Mapping`s. This is + important functionality to retain, since we are dealing with serialization. + + We should be safe to subclass `dict` here, since we aren't actually + overriding any core methods, only augmenting with a new one for reference + counting. + """ + + def __init__(self, base_config, object_id, **kwargs): + self.ref_count = 1 + self.object_id = object_id + super().__init__(base_config, **kwargs) + + def increment_ref_count(self): + # As soon as we've seen the object more than once, we want to attach the + # shared object ID. This allows us to only attach the shared object ID + # when it's strictly necessary, making backwards compatibility breakage + # less likely. + if self.ref_count == 1: + self[SHARED_OBJECT_KEY] = self.object_id + self.ref_count += 1 + + +class SharedObjectSavingScope: + """Keeps track of shared object configs when serializing.""" + + def __enter__(self): + if _shared_object_disabled(): + return None + + global SHARED_OBJECT_SAVING + + # Serialization can happen at a number of layers for a number of + # reasons. We may end up with a case where we're opening a saving scope + # within another saving scope. In that case, we'd like to use the + # outermost scope available and ignore inner scopes, since there is not + # (yet) a reasonable use case for having these nested and distinct. + if _shared_object_saving_scope() is not None: + self._passthrough = True + return _shared_object_saving_scope() + else: + self._passthrough = False + + SHARED_OBJECT_SAVING.scope = self + self._shared_objects_config = weakref.WeakKeyDictionary() + self._next_id = 0 + return self + + def get_config(self, obj): + """Gets a `SharedObjectConfig` if one has already been seen for `obj`. + + Args: + obj: The object for which to retrieve the `SharedObjectConfig`. + + Returns: + The SharedObjectConfig for a given object, if already seen. Else, + `None`. + """ + try: + shared_object_config = self._shared_objects_config[obj] + except (TypeError, KeyError): + # If the object is unhashable (e.g. a subclass of + # `AbstractBaseClass` that has not overridden `__hash__`), a + # `TypeError` will be thrown. We'll just continue on without shared + # object support. + return None + shared_object_config.increment_ref_count() + return shared_object_config + + def create_config(self, base_config, obj): + """Create a new SharedObjectConfig for a given object.""" + shared_object_config = SharedObjectConfig(base_config, self._next_id) + self._next_id += 1 + try: + self._shared_objects_config[obj] = shared_object_config + except TypeError: + # If the object is unhashable (e.g. a subclass of + # `AbstractBaseClass` that has not overridden `__hash__`), a + # `TypeError` will be thrown. We'll just continue on without shared + # object support. + pass + return shared_object_config + + def __exit__(self, *args, **kwargs): + if not getattr(self, "_passthrough", False): + global SHARED_OBJECT_SAVING + SHARED_OBJECT_SAVING.scope = None + + +def serialize_keras_class_and_config( + cls_name, cls_config, obj=None, shared_object_id=None +): + """Returns the serialization of the class with the given config.""" + base_config = {"class_name": cls_name, "config": cls_config} + + # We call `serialize_keras_class_and_config` for some branches of the load + # path. In that case, we may already have a shared object ID we'd like to + # retain. + if shared_object_id is not None: + base_config[SHARED_OBJECT_KEY] = shared_object_id + + # If we have an active `SharedObjectSavingScope`, check whether we've + # already serialized this config. If so, just use that config. This will + # store an extra ID field in the config, allowing us to re-create the shared + # object relationship at load time. + if _shared_object_saving_scope() is not None and obj is not None: + shared_object_config = _shared_object_saving_scope().get_config(obj) + if shared_object_config is None: + return _shared_object_saving_scope().create_config(base_config, obj) + return shared_object_config + + return base_config + + +@tf_contextlib.contextmanager +def skip_failed_serialization(): + global _SKIP_FAILED_SERIALIZATION + prev = _SKIP_FAILED_SERIALIZATION + try: + _SKIP_FAILED_SERIALIZATION = True + yield + finally: + _SKIP_FAILED_SERIALIZATION = prev + + +@keras_export("keras.utils.legacy.serialize_keras_object") +def serialize_keras_object(instance): + """Serialize a Keras object into a JSON-compatible representation. + + Calls to `serialize_keras_object` while underneath the + `SharedObjectSavingScope` context manager will cause any objects re-used + across multiple layers to be saved with a special shared object ID. This + allows the network to be re-created properly during deserialization. + + Args: + instance: The object to serialize. + + Returns: + A dict-like, JSON-compatible representation of the object's config. + """ + from keras.saving import object_registration + + _, instance = tf.__internal__.decorator.unwrap(instance) + if instance is None: + return None + + if hasattr(instance, "get_config"): + name = object_registration.get_registered_name(instance.__class__) + try: + config = instance.get_config() + except NotImplementedError as e: + if _SKIP_FAILED_SERIALIZATION: + return serialize_keras_class_and_config( + name, {_LAYER_UNDEFINED_CONFIG_KEY: True} + ) + raise e + serialization_config = {} + for key, item in config.items(): + if isinstance(item, str): + serialization_config[key] = item + continue + + # Any object of a different type needs to be converted to string or + # dict for serialization (e.g. custom functions, custom classes) + try: + serialized_item = serialize_keras_object(item) + if isinstance(serialized_item, dict) and not isinstance( + item, dict + ): + serialized_item["__passive_serialization__"] = True + serialization_config[key] = serialized_item + except ValueError: + serialization_config[key] = item + + name = object_registration.get_registered_name(instance.__class__) + return serialize_keras_class_and_config( + name, serialization_config, instance + ) + if hasattr(instance, "__name__"): + return object_registration.get_registered_name(instance) + raise ValueError( + f"Cannot serialize {instance} because it doesn't implement " + "`get_config()`." + ) + + +def class_and_config_for_serialized_keras_object( + config, + module_objects=None, + custom_objects=None, + printable_module_name="object", +): + """Returns the class name and config for a serialized keras object.""" + from keras.saving import object_registration + + if ( + not isinstance(config, dict) + or "class_name" not in config + or "config" not in config + ): + raise ValueError( + f"Improper config format for {config}. " + "Expecting python dict contains `class_name` and `config` as keys" + ) + + class_name = config["class_name"] + cls = object_registration.get_registered_object( + class_name, custom_objects, module_objects + ) + if cls is None: + raise ValueError( + f"Unknown {printable_module_name}: '{class_name}'. " + "Please ensure you are using a `keras.utils.custom_object_scope` " + "and that this object is included in the scope. See " + "https://www.tensorflow.org/guide/keras/save_and_serialize" + "#registering_the_custom_object for details." + ) + + cls_config = config["config"] + # Check if `cls_config` is a list. If it is a list, return the class and the + # associated class configs for recursively deserialization. This case will + # happen on the old version of sequential model (e.g. `keras_version` == + # "2.0.6"), which is serialized in a different structure, for example + # "{'class_name': 'Sequential', + # 'config': [{'class_name': 'Embedding', 'config': ...}, {}, ...]}". + if isinstance(cls_config, list): + return (cls, cls_config) + + deserialized_objects = {} + for key, item in cls_config.items(): + if key == "name": + # Assume that the value of 'name' is a string that should not be + # deserialized as a function. This avoids the corner case where + # cls_config['name'] has an identical name to a custom function and + # gets converted into that function. + deserialized_objects[key] = item + elif isinstance(item, dict) and "__passive_serialization__" in item: + deserialized_objects[key] = deserialize_keras_object( + item, + module_objects=module_objects, + custom_objects=custom_objects, + printable_module_name="config_item", + ) + # TODO(momernick): Should this also have 'module_objects'? + elif isinstance(item, str) and tf_inspect.isfunction( + object_registration.get_registered_object(item, custom_objects) + ): + # Handle custom functions here. When saving functions, we only save + # the function's name as a string. If we find a matching string in + # the custom objects during deserialization, we convert the string + # back to the original function. + # Note that a potential issue is that a string field could have a + # naming conflict with a custom function name, but this should be a + # rare case. This issue does not occur if a string field has a + # naming conflict with a custom object, since the config of an + # object will always be a dict. + deserialized_objects[ + key + ] = object_registration.get_registered_object(item, custom_objects) + for key, item in deserialized_objects.items(): + cls_config[key] = deserialized_objects[key] + + return (cls, cls_config) + + +@keras_export("keras.utils.legacy.deserialize_keras_object") +def deserialize_keras_object( + identifier, + module_objects=None, + custom_objects=None, + printable_module_name="object", +): + """Turns the serialized form of a Keras object back into an actual object. + + This function is for mid-level library implementers rather than end users. + + Importantly, this utility requires you to provide the dict of + `module_objects` to use for looking up the object config; this is not + populated by default. If you need a deserialization utility that has + preexisting knowledge of built-in Keras objects, use e.g. + `keras.layers.deserialize(config)`, `keras.metrics.deserialize(config)`, + etc. + + Calling `deserialize_keras_object` while underneath the + `SharedObjectLoadingScope` context manager will cause any already-seen + shared objects to be returned as-is rather than creating a new object. + + Args: + identifier: the serialized form of the object. + module_objects: A dictionary of built-in objects to look the name up in. + Generally, `module_objects` is provided by midlevel library + implementers. + custom_objects: A dictionary of custom objects to look the name up in. + Generally, `custom_objects` is provided by the end user. + printable_module_name: A human-readable string representing the type of + the object. Printed in case of exception. + + Returns: + The deserialized object. + + Example: + + A mid-level library implementer might want to implement a utility for + retrieving an object from its config, as such: + + ```python + def deserialize(config, custom_objects=None): + return deserialize_keras_object( + identifier, + module_objects=globals(), + custom_objects=custom_objects, + name="MyObjectType", + ) + ``` + + This is how e.g. `keras.layers.deserialize()` is implemented. + """ + from keras.saving import object_registration + + if identifier is None: + return None + + if isinstance(identifier, dict): + # In this case we are dealing with a Keras config dictionary. + config = identifier + (cls, cls_config) = class_and_config_for_serialized_keras_object( + config, module_objects, custom_objects, printable_module_name + ) + + # If this object has already been loaded (i.e. it's shared between + # multiple objects), return the already-loaded object. + shared_object_id = config.get(SHARED_OBJECT_KEY) + shared_object = _shared_object_loading_scope().get(shared_object_id) + if shared_object is not None: + return shared_object + + if hasattr(cls, "from_config"): + arg_spec = tf_inspect.getfullargspec(cls.from_config) + custom_objects = custom_objects or {} + + if "custom_objects" in arg_spec.args: + tlco = object_registration._THREAD_LOCAL_CUSTOM_OBJECTS.__dict__ + deserialized_obj = cls.from_config( + cls_config, + custom_objects={ + **object_registration._GLOBAL_CUSTOM_OBJECTS, + **tlco, + **custom_objects, + }, + ) + else: + with object_registration.CustomObjectScope(custom_objects): + deserialized_obj = cls.from_config(cls_config) + else: + # Then `cls` may be a function returning a class. + # in this case by convention `config` holds + # the kwargs of the function. + custom_objects = custom_objects or {} + with object_registration.CustomObjectScope(custom_objects): + deserialized_obj = cls(**cls_config) + + # Add object to shared objects, in case we find it referenced again. + _shared_object_loading_scope().set(shared_object_id, deserialized_obj) + + return deserialized_obj + + elif isinstance(identifier, str): + object_name = identifier + if custom_objects and object_name in custom_objects: + obj = custom_objects.get(object_name) + elif ( + object_name + in object_registration._THREAD_LOCAL_CUSTOM_OBJECTS.__dict__ + ): + obj = object_registration._THREAD_LOCAL_CUSTOM_OBJECTS.__dict__[ + object_name + ] + elif object_name in object_registration._GLOBAL_CUSTOM_OBJECTS: + obj = object_registration._GLOBAL_CUSTOM_OBJECTS[object_name] + else: + obj = module_objects.get(object_name) + if obj is None: + raise ValueError( + f"Unknown {printable_module_name}: '{object_name}'. " + "Please ensure you are using a " + "`keras.utils.custom_object_scope` " + "and that this object is included in the scope. See " + "https://www.tensorflow.org/guide/keras/save_and_serialize" + "#registering_the_custom_object for details." + ) + + # Classes passed by name are instantiated with no args, functions are + # returned as-is. + if tf_inspect.isclass(obj): + return obj() + return obj + elif tf_inspect.isfunction(identifier): + # If a function has already been deserialized, return as is. + return identifier + else: + raise ValueError( + "Could not interpret serialized " + f"{printable_module_name}: {identifier}" + ) + + +def validate_config(config): + """Determines whether config appears to be a valid layer config.""" + return ( + isinstance(config, dict) and _LAYER_UNDEFINED_CONFIG_KEY not in config + ) + + +def is_default(method): + """Check if a method is decorated with the `default` wrapper.""" + return getattr(method, "_is_default", False) diff --git a/keras/saving/losses_serialization_test.py b/keras/saving/losses_serialization_test.py deleted file mode 100644 index 354e67bf735d..000000000000 --- a/keras/saving/losses_serialization_test.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras losses serialization.""" - -import tensorflow.compat.v2 as tf - -import os -import shutil - -from absl.testing import parameterized -import numpy as np - -import keras -from keras.testing_infra import test_combinations -from keras import layers -from keras import losses -from keras.optimizers import optimizer_v2 -from keras.testing_infra import test_utils -from keras.utils import generic_utils -from keras.utils import losses_utils - -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - - -# Custom loss class -class MyMeanAbsoluteError(losses.LossFunctionWrapper): - - def __init__(self, - reduction=losses_utils.ReductionV2.AUTO, - name='mean_absolute_error'): - super().__init__( - my_mae, name=name, reduction=reduction) - - -# Custom loss function -def my_mae(y_true, y_pred): - return keras.backend.mean(tf.abs(y_pred - y_true), axis=-1) - - -def _get_multi_io_model(): - inp_1 = layers.Input(shape=(1,), name='input_1') - inp_2 = layers.Input(shape=(1,), name='input_2') - d = test_utils.Bias(name='output') - out_1 = d(inp_1) - out_2 = d(inp_2) - return keras.Model([inp_1, inp_2], [out_1, out_2]) - - -@test_combinations.run_all_keras_modes -@parameterized.named_parameters([ - dict(testcase_name='string', value='mae'), - dict(testcase_name='built_in_fn', value=losses.mae), - dict(testcase_name='built_in_class', value=losses.MeanAbsoluteError()), - dict(testcase_name='custom_fn', value=my_mae), - dict(testcase_name='custom_class', value=MyMeanAbsoluteError()), - dict(testcase_name='list_of_strings', value=['mae', 'mae']), - dict(testcase_name='list_of_built_in_fns', value=[losses.mae, losses.mae]), - dict( - testcase_name='list_of_built_in_classes', - value=[losses.MeanAbsoluteError(), - losses.MeanAbsoluteError()]), - dict(testcase_name='list_of_custom_fns', value=[my_mae, my_mae]), - dict( - testcase_name='list_of_custom_classes', - value=[MyMeanAbsoluteError(), - MyMeanAbsoluteError()]), - dict( - testcase_name='dict_of_string', - value={ - 'output': 'mae', - 'output_1': 'mae', - }), - dict( - testcase_name='dict_of_built_in_fn', - value={ - 'output': losses.mae, - 'output_1': losses.mae, - }), - dict( - testcase_name='dict_of_built_in_class', - value={ - 'output': losses.MeanAbsoluteError(), - 'output_1': losses.MeanAbsoluteError(), - }), - dict( - testcase_name='dict_of_custom_fn', - value={ - 'output': my_mae, - 'output_1': my_mae - }), - dict( - testcase_name='dict_of_custom_class', - value={ - 'output': MyMeanAbsoluteError(), - 'output_1': MyMeanAbsoluteError(), - }), -]) -class LossesSerialization(test_combinations.TestCase): - - def setUp(self): - super(LossesSerialization, self).setUp() - tmpdir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, tmpdir) - self.model_filename = os.path.join(tmpdir, 'tmp_model_loss.h5') - self.x = np.array([[0.], [1.], [2.]], dtype='float32') - self.y = np.array([[0.5], [2.], [3.5]], dtype='float32') - self.w = np.array([1.25, 0.5, 1.25], dtype='float32') - - def test_serializing_model_with_loss_with_custom_object_scope(self, value): - with generic_utils.custom_object_scope({ - 'MyMeanAbsoluteError': MyMeanAbsoluteError, - 'my_mae': my_mae, - 'Bias': test_utils.Bias, - }): - model = _get_multi_io_model() - model.compile( - optimizer_v2.gradient_descent.SGD(0.1), - loss=value, - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit([self.x, self.x], [self.y, self.y], - batch_size=3, - epochs=3, - sample_weight=[self.w, self.w]) - - # Assert training. - self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3) - eval_results = model.evaluate([self.x, self.x], [self.y, self.y], - sample_weight=[self.w, self.w]) - - if h5py is None: - return - model.save(self.model_filename) - loaded_model = keras.models.load_model(self.model_filename) - loaded_model.predict([self.x, self.x]) - loaded_eval_results = loaded_model.evaluate( - [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w]) - - # Assert all evaluation results are the same. - self.assertAllClose(eval_results, loaded_eval_results, 1e-9) - - def test_serializing_model_with_loss_with_custom_objects(self, value): - model = _get_multi_io_model() - model.compile( - optimizer_v2.gradient_descent.SGD(0.1), - loss=value, - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit([self.x, self.x], [self.y, self.y], - batch_size=3, - epochs=3, - sample_weight=[self.w, self.w]) - - # Assert training. - self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3) - eval_results = model.evaluate([self.x, self.x], [self.y, self.y], - sample_weight=[self.w, self.w]) - - if h5py is None: - return - model.save(self.model_filename) - loaded_model = keras.models.load_model( - self.model_filename, - custom_objects={ - 'MyMeanAbsoluteError': MyMeanAbsoluteError, - 'my_mae': my_mae, - 'Bias': test_utils.Bias, - }) - loaded_model.predict([self.x, self.x]) - loaded_eval_results = loaded_model.evaluate([self.x, self.x], - [self.y, self.y], - sample_weight=[self.w, self.w]) - - # Assert all evaluation results are the same. - self.assertAllClose(eval_results, loaded_eval_results, 1e-9) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/metrics_serialization_test.py b/keras/saving/metrics_serialization_test.py deleted file mode 100644 index abbe99d122f9..000000000000 --- a/keras/saving/metrics_serialization_test.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras metrics serialization.""" - -import tensorflow.compat.v2 as tf - -import os -import shutil - -from absl.testing import parameterized -import numpy as np - -import keras -from keras.testing_infra import test_combinations -from keras import layers -from keras import metrics -from keras.optimizers import optimizer_v2 -from keras.testing_infra import test_utils -from keras.utils import generic_utils - -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - - -# Custom metric -class MyMeanAbsoluteError(metrics.MeanMetricWrapper): - - def __init__(self, name='my_mae', dtype=None): - super().__init__(_my_mae, name, dtype=dtype) - - -# Custom metric function -def _my_mae(y_true, y_pred): - return keras.backend.mean(tf.abs(y_pred - y_true), axis=-1) - - -def _get_multi_io_model(): - inp_1 = layers.Input(shape=(1,), name='input_1') - inp_2 = layers.Input(shape=(1,), name='input_2') - d = test_utils.Bias(name='output') - out_1 = d(inp_1) - out_2 = d(inp_2) - return keras.Model([inp_1, inp_2], [out_1, out_2]) - - -@test_combinations.run_all_keras_modes -@parameterized.named_parameters( - dict(testcase_name='string', value=['mae']), - dict(testcase_name='built_in_fn', value=[metrics.mae]), - dict(testcase_name='built_in_class', value=[metrics.MeanAbsoluteError]), - dict(testcase_name='custom_fn', value=[_my_mae]), - dict(testcase_name='custom_class', value=[MyMeanAbsoluteError]), - dict( - testcase_name='list_of_built_in_fn_and_list', - value=[metrics.mae, [metrics.mae]]), - dict( - testcase_name='list_of_built_in_class_and_list', - value=[metrics.MeanAbsoluteError, [metrics.MeanAbsoluteError]]), - dict( - testcase_name='list_of_custom_fn_and_list', value=[_my_mae, [_my_mae]]), - dict( - testcase_name='list_of_custom_class_and_list', - value=[MyMeanAbsoluteError, [MyMeanAbsoluteError]]), - dict( - testcase_name='list_of_lists_of_custom_fns', - value=[[_my_mae], [_my_mae, 'mae']]), - dict( - testcase_name='list_of_lists_of_custom_classes', - value=[[MyMeanAbsoluteError], [MyMeanAbsoluteError, 'mae']]), - dict( - testcase_name='dict_of_list_of_string', - value={ - 'output': ['mae'], - 'output_1': ['mae'], - }), - dict( - testcase_name='dict_of_list_of_built_in_fn', - value={ - 'output': [metrics.mae], - 'output_1': [metrics.mae], - }), - dict( - testcase_name='dict_of_list_of_built_in_class', - value={ - 'output': [metrics.MeanAbsoluteError], - 'output_1': [metrics.MeanAbsoluteError], - }), - dict( - testcase_name='dict_of_list_of_custom_fn', - value={ - 'output': [_my_mae], - 'output_1': [_my_mae], - }), - dict( - testcase_name='dict_of_list_of_custom_class', - value={ - 'output': [MyMeanAbsoluteError], - 'output_1': [MyMeanAbsoluteError], - }), - dict( - testcase_name='dict_of_string', - value={ - 'output': 'mae', - 'output_1': 'mae', - }), - dict( - testcase_name='dict_of_built_in_fn', - value={ - 'output': metrics.mae, - 'output_1': metrics.mae, - }), - dict( - testcase_name='dict_of_built_in_class', - value={ - 'output': metrics.MeanAbsoluteError, - 'output_1': metrics.MeanAbsoluteError, - }), - dict( - testcase_name='dict_of_custom_fn', - value={ - 'output': _my_mae, - 'output_1': _my_mae - }), - dict( - testcase_name='dict_of_custom_class', - value={ - 'output': MyMeanAbsoluteError, - 'output_1': MyMeanAbsoluteError, - }), -) -class MetricsSerialization(test_combinations.TestCase): - - def setUp(self): - super(MetricsSerialization, self).setUp() - tmpdir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, tmpdir) - self.model_filename = os.path.join(tmpdir, 'tmp_model_metric.h5') - self.x = np.array([[0.], [1.], [2.]], dtype='float32') - self.y = np.array([[0.5], [2.], [3.5]], dtype='float32') - self.w = np.array([1.25, 0.5, 1.25], dtype='float32') - - def test_serializing_model_with_metric_with_custom_object_scope(self, value): - - def get_instance(x): - if isinstance(x, str): - return x - if isinstance(x, type) and issubclass(x, metrics.Metric): - return x() - return x - - metric_input = tf.nest.map_structure(get_instance, value) - weighted_metric_input = tf.nest.map_structure(get_instance, value) - - with generic_utils.custom_object_scope({ - 'MyMeanAbsoluteError': MyMeanAbsoluteError, - '_my_mae': _my_mae, - 'Bias': test_utils.Bias, - }): - model = _get_multi_io_model() - model.compile( - optimizer_v2.gradient_descent.SGD(0.1), - 'mae', - metrics=metric_input, - weighted_metrics=weighted_metric_input, - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit([self.x, self.x], [self.y, self.y], - batch_size=3, - epochs=3, - sample_weight=[self.w, self.w]) - - # Assert training. - self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3) - eval_results = model.evaluate([self.x, self.x], [self.y, self.y], - sample_weight=[self.w, self.w]) - - if h5py is None: - return - model.save(self.model_filename) - loaded_model = keras.models.load_model(self.model_filename) - loaded_model.predict([self.x, self.x]) - loaded_eval_results = loaded_model.evaluate( - [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w]) - - # Assert all evaluation results are the same. - self.assertAllClose(eval_results, loaded_eval_results, 1e-9) - - def test_serializing_model_with_metric_with_custom_objects(self, value): - - def get_instance(x): - if isinstance(x, str): - return x - if isinstance(x, type) and issubclass(x, metrics.Metric): - return x() - return x - - metric_input = tf.nest.map_structure(get_instance, value) - weighted_metric_input = tf.nest.map_structure(get_instance, value) - - model = _get_multi_io_model() - model.compile( - optimizer_v2.gradient_descent.SGD(0.1), - 'mae', - metrics=metric_input, - weighted_metrics=weighted_metric_input, - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit([self.x, self.x], [self.y, self.y], - batch_size=3, - epochs=3, - sample_weight=[self.w, self.w]) - - # Assert training. - self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3) - eval_results = model.evaluate([self.x, self.x], [self.y, self.y], - sample_weight=[self.w, self.w]) - - if h5py is None: - return - model.save(self.model_filename) - loaded_model = keras.models.load_model( - self.model_filename, - custom_objects={ - 'MyMeanAbsoluteError': MyMeanAbsoluteError, - '_my_mae': _my_mae, - 'Bias': test_utils.Bias, - }) - loaded_model.predict([self.x, self.x]) - loaded_eval_results = loaded_model.evaluate([self.x, self.x], - [self.y, self.y], - sample_weight=[self.w, self.w]) - - # Assert all evaluation results are the same. - self.assertAllClose(eval_results, loaded_eval_results, 1e-9) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/model_config.py b/keras/saving/model_config.py deleted file mode 100644 index c0590cce79b0..000000000000 --- a/keras/saving/model_config.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=protected-access -"""Functions that save the model's config into different formats.""" - -from tensorflow.python.util.tf_export import keras_export - - -@keras_export('keras.models.model_from_config') -def model_from_config(config, custom_objects=None): - """Instantiates a Keras model from its config. - - Usage: - ``` - # for a Functional API model - tf.keras.Model().from_config(model.get_config()) - - # for a Sequential model - tf.keras.Sequential().from_config(model.get_config()) - ``` - - Args: - config: Configuration dictionary. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - A Keras model instance (uncompiled). - - Raises: - TypeError: if `config` is not a dictionary. - """ - if isinstance(config, list): - raise TypeError('`model_from_config` expects a dictionary, not a list. ' - f'Received: config={config}. Did you meant to use ' - '`Sequential.from_config(config)`?') - from keras.layers import deserialize # pylint: disable=g-import-not-at-top - return deserialize(config, custom_objects=custom_objects) - - -@keras_export('keras.models.model_from_yaml') -def model_from_yaml(yaml_string, custom_objects=None): - """Parses a yaml model configuration file and returns a model instance. - - Note: Since TF 2.6, this method is no longer supported and will raise a - RuntimeError. - - Args: - yaml_string: YAML string or open file encoding a model configuration. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - A Keras model instance (uncompiled). - - Raises: - RuntimeError: announces that the method poses a security risk - """ - raise RuntimeError( - 'Method `model_from_yaml()` has been removed due to security risk of ' - 'arbitrary code execution. Please use `Model.to_json()` and ' - '`model_from_json()` instead.' - ) - - -@keras_export('keras.models.model_from_json') -def model_from_json(json_string, custom_objects=None): - """Parses a JSON model configuration string and returns a model instance. - - Usage: - - >>> model = tf.keras.Sequential([ - ... tf.keras.layers.Dense(5, input_shape=(3,)), - ... tf.keras.layers.Softmax()]) - >>> config = model.to_json() - >>> loaded_model = tf.keras.models.model_from_json(config) - - Args: - json_string: JSON string encoding a model configuration. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - A Keras model instance (uncompiled). - """ - from keras.layers import deserialize_from_json # pylint: disable=g-import-not-at-top - return deserialize_from_json(json_string, custom_objects=custom_objects) diff --git a/keras/saving/object_registration.py b/keras/saving/object_registration.py new file mode 100644 index 000000000000..a64b21f3313f --- /dev/null +++ b/keras/saving/object_registration.py @@ -0,0 +1,226 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python utilities required by Keras.""" + +import inspect +import threading + +# isort: off +from tensorflow.python.util.tf_export import keras_export + +_GLOBAL_CUSTOM_OBJECTS = {} +_GLOBAL_CUSTOM_NAMES = {} +# Thread-local custom objects set by custom_object_scope. +_THREAD_LOCAL_CUSTOM_OBJECTS = threading.local() + + +@keras_export( + "keras.saving.custom_object_scope", + "keras.utils.custom_object_scope", + "keras.utils.CustomObjectScope", +) +class CustomObjectScope: + """Exposes custom classes/functions to Keras deserialization internals. + + Under a scope `with custom_object_scope(objects_dict)`, Keras methods such + as `tf.keras.models.load_model` or `tf.keras.models.model_from_config` + will be able to deserialize any custom object referenced by a + saved config (e.g. a custom layer or metric). + + Example: + + Consider a custom regularizer `my_regularizer`: + + ```python + layer = Dense(3, kernel_regularizer=my_regularizer) + # Config contains a reference to `my_regularizer` + config = layer.get_config() + ... + # Later: + with custom_object_scope({'my_regularizer': my_regularizer}): + layer = Dense.from_config(config) + ``` + + Args: + *args: Dictionary or dictionaries of `{name: object}` pairs. + """ + + def __init__(self, *args): + self.custom_objects = args + self.backup = None + + def __enter__(self): + self.backup = _THREAD_LOCAL_CUSTOM_OBJECTS.__dict__.copy() + for objects in self.custom_objects: + _THREAD_LOCAL_CUSTOM_OBJECTS.__dict__.update(objects) + return self + + def __exit__(self, *args, **kwargs): + _THREAD_LOCAL_CUSTOM_OBJECTS.__dict__.clear() + _THREAD_LOCAL_CUSTOM_OBJECTS.__dict__.update(self.backup) + + +@keras_export( + "keras.saving.get_custom_objects", "keras.utils.get_custom_objects" +) +def get_custom_objects(): + """Retrieves a live reference to the global dictionary of custom objects. + + Custom objects set using using `custom_object_scope` are not added to the + global dictionary of custom objects, and will not appear in the returned + dictionary. + + Example: + + ```python + get_custom_objects().clear() + get_custom_objects()['MyObject'] = MyObject + ``` + + Returns: + Global dictionary mapping registered class names to classes. + """ + return _GLOBAL_CUSTOM_OBJECTS + + +@keras_export( + "keras.saving.register_keras_serializable", + "keras.utils.register_keras_serializable", +) +def register_keras_serializable(package="Custom", name=None): + """Registers an object with the Keras serialization framework. + + This decorator injects the decorated class or function into the Keras custom + object dictionary, so that it can be serialized and deserialized without + needing an entry in the user-provided custom object dict. It also injects a + function that Keras will call to get the object's serializable string key. + + Note that to be serialized and deserialized, classes must implement the + `get_config()` method. Functions do not have this requirement. + + The object will be registered under the key 'package>name' where `name`, + defaults to the object name if not passed. + + Example: + + ```python + # Note that `'my_package'` is used as the `package` argument here, and since + # the `name` argument is not provided, `'MyDense'` is used as the `name`. + @keras.saving.register_keras_serializable('my_package') + class MyDense(keras.layers.Dense): + pass + + assert keras.saving.get_registered_object('my_package>MyDense') == MyDense + assert keras.saving.get_registered_name(MyDense) == 'my_package>MyDense' + ``` + + Args: + package: The package that this class belongs to. This is used for the + `key` (which is `"package>name"`) to idenfify the class. Note that this + is the first argument passed into the decorator. + name: The name to serialize this class under in this package. If not + provided or `None`, the class' name will be used (note that this is the + case when the decorator is used with only one argument, which becomes + the `package`). + + Returns: + A decorator that registers the decorated class with the passed names. + """ + + def decorator(arg): + """Registers a class with the Keras serialization framework.""" + class_name = name if name is not None else arg.__name__ + registered_name = package + ">" + class_name + + if inspect.isclass(arg) and not hasattr(arg, "get_config"): + raise ValueError( + "Cannot register a class that does not have a " + "get_config() method." + ) + + _GLOBAL_CUSTOM_OBJECTS[registered_name] = arg + _GLOBAL_CUSTOM_NAMES[arg] = registered_name + + return arg + + return decorator + + +@keras_export( + "keras.saving.get_registered_name", "keras.utils.get_registered_name" +) +def get_registered_name(obj): + """Returns the name registered to an object within the Keras framework. + + This function is part of the Keras serialization and deserialization + framework. It maps objects to the string names associated with those objects + for serialization/deserialization. + + Args: + obj: The object to look up. + + Returns: + The name associated with the object, or the default Python name if the + object is not registered. + """ + if obj in _GLOBAL_CUSTOM_NAMES: + return _GLOBAL_CUSTOM_NAMES[obj] + else: + return obj.__name__ + + +@keras_export( + "keras.saving.get_registered_object", "keras.utils.get_registered_object" +) +def get_registered_object(name, custom_objects=None, module_objects=None): + """Returns the class associated with `name` if it is registered with Keras. + + This function is part of the Keras serialization and deserialization + framework. It maps strings to the objects associated with them for + serialization/deserialization. + + Example: + + ```python + def from_config(cls, config, custom_objects=None): + if 'my_custom_object_name' in config: + config['hidden_cls'] = tf.keras.saving.get_registered_object( + config['my_custom_object_name'], custom_objects=custom_objects) + ``` + + Args: + name: The name to look up. + custom_objects: A dictionary of custom objects to look the name up in. + Generally, custom_objects is provided by the user. + module_objects: A dictionary of custom objects to look the name up in. + Generally, module_objects is provided by midlevel library implementers. + + Returns: + An instantiable class associated with `name`, or `None` if no such class + exists. + """ + if name in _THREAD_LOCAL_CUSTOM_OBJECTS.__dict__: + return _THREAD_LOCAL_CUSTOM_OBJECTS.__dict__[name] + elif name in _GLOBAL_CUSTOM_OBJECTS: + return _GLOBAL_CUSTOM_OBJECTS[name] + elif custom_objects and name in custom_objects: + return custom_objects[name] + elif module_objects and name in module_objects: + return module_objects[name] + return None + + +# Aliases +custom_object_scope = CustomObjectScope diff --git a/keras/saving/object_registration_test.py b/keras/saving/object_registration_test.py new file mode 100644 index 000000000000..3b1a95ca57a7 --- /dev/null +++ b/keras/saving/object_registration_test.py @@ -0,0 +1,144 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras serializable object registration functionality.""" + +import tensorflow.compat.v2 as tf + +import keras +from keras.saving import object_registration +from keras.saving import serialization_lib + + +class TestObjectRegistration(tf.test.TestCase): + def test_custom_object_scope(self): + def custom_fn(): + pass + + class CustomClass: + pass + + def check_get_in_thread(): + with object_registration.custom_object_scope( + {"CustomClass": CustomClass, "custom_fn": custom_fn} + ): + actual_custom_fn = keras.activations.get("custom_fn") + self.assertEqual(actual_custom_fn, custom_fn) + actual_custom_class = keras.regularizers.get("CustomClass") + self.assertEqual(actual_custom_class.__class__, CustomClass) + + with object_registration.custom_object_scope( + {"CustomClass": CustomClass, "custom_fn": custom_fn} + ): + actual_custom_fn = keras.activations.get("custom_fn") + self.assertEqual(actual_custom_fn, custom_fn) + actual_custom_class = keras.regularizers.get("CustomClass") + self.assertEqual(actual_custom_class.__class__, CustomClass) + checked_thread = self.checkedThread(check_get_in_thread) + checked_thread.start() + checked_thread.join() + + def test_serialize_custom_class_with_default_name(self): + @object_registration.register_keras_serializable() + class TestClass: + def __init__(self, value): + self._value = value + + def get_config(self): + return {"value": self._value} + + serialized_name = "Custom>TestClass" + inst = TestClass(value=10) + class_name = object_registration._GLOBAL_CUSTOM_NAMES[TestClass] + self.assertEqual(serialized_name, class_name) + config = serialization_lib.serialize_keras_object(inst) + self.assertEqual(class_name, config["class_name"]) + new_inst = serialization_lib.deserialize_keras_object(config) + self.assertIsNot(inst, new_inst) + self.assertIsInstance(new_inst, TestClass) + self.assertEqual(10, new_inst._value) + + # Make sure registering a new class with same name will fail. + with self.assertRaisesRegex( + ValueError, ".*has already been registered.*" + ): + + @object_registration.register_keras_serializable() + class TestClass: + def __init__(self, value): + self._value = value + + def get_config(self): + return {"value": self._value} + + def test_serialize_custom_class_with_custom_name(self): + @object_registration.register_keras_serializable( + "TestPackage", "CustomName" + ) + class OtherTestClass: + def __init__(self, val): + self._val = val + + def get_config(self): + return {"val": self._val} + + serialized_name = "TestPackage>CustomName" + inst = OtherTestClass(val=5) + class_name = object_registration._GLOBAL_CUSTOM_NAMES[OtherTestClass] + self.assertEqual(serialized_name, class_name) + fn_class_name = object_registration.get_registered_name(OtherTestClass) + self.assertEqual(fn_class_name, class_name) + + cls = object_registration.get_registered_object(fn_class_name) + self.assertEqual(OtherTestClass, cls) + + config = keras.utils.serialization.serialize_keras_object(inst) + self.assertEqual(class_name, config["class_name"]) + new_inst = keras.utils.serialization.deserialize_keras_object(config) + self.assertIsNot(inst, new_inst) + self.assertIsInstance(new_inst, OtherTestClass) + self.assertEqual(5, new_inst._val) + + def test_serialize_custom_function(self): + @object_registration.register_keras_serializable() + def my_fn(): + return 42 + + serialized_name = "Custom>my_fn" + class_name = object_registration._GLOBAL_CUSTOM_NAMES[my_fn] + self.assertEqual(serialized_name, class_name) + fn_class_name = object_registration.get_registered_name(my_fn) + self.assertEqual(fn_class_name, class_name) + + config = keras.utils.serialization.serialize_keras_object(my_fn) + self.assertEqual(class_name, config) + fn = keras.utils.serialization.deserialize_keras_object(config) + self.assertEqual(42, fn()) + + fn_2 = object_registration.get_registered_object(fn_class_name) + self.assertEqual(42, fn_2()) + + def test_serialize_custom_class_without_get_config_fails(self): + + with self.assertRaisesRegex( + ValueError, + "Cannot register a class that does not have a get_config.*", + ): + + @object_registration.register_keras_serializable( + "TestPackage", "TestClass" + ) + class TestClass: + def __init__(self, value): + self._value = value diff --git a/keras/saving/pickle_utils.py b/keras/saving/pickle_utils.py index 93931a92e481..fe84b548f154 100644 --- a/keras/saving/pickle_utils.py +++ b/keras/saving/pickle_utils.py @@ -13,69 +13,65 @@ # limitations under the License. # ============================================================================== """Saving utilities to support Python's Pickle protocol.""" -# pylint: disable=g-bad-import-order -import tensorflow.compat.v2 as tf - import os -import tarfile -import io -import uuid -import numpy +import tempfile + +import tensorflow.compat.v2 as tf -from keras.saving import save as save_module +from keras.saving import saving_lib def deserialize_model_from_bytecode(serialized_model): - """Reconstruct a Model from the output of `serialize_model_as_bytecode`. + """Reconstruct a Model from the output of `serialize_model_as_bytecode`. - Args: - serialized_model: (np.array) return value from - `serialize_model_as_bytecode`. + Args: + serialized_model: (bytes) return value from + `serialize_model_as_bytecode`. - Returns: - keras.Model: Keras Model instance. - """ - temp_dir = f"ram://{uuid.uuid4()}" - b = io.BytesIO(serialized_model) - with tarfile.open(fileobj=b, mode="r") as archive: - for name in archive.getnames(): - dest_path = tf.io.gfile.join(temp_dir, name) - member = archive.getmember(name) - tf.io.gfile.makedirs(os.path.dirname(dest_path)) - if member.isfile(): - with tf.io.gfile.GFile(dest_path, "wb") as f: - f.write(archive.extractfile(name).read()) - model = save_module.load_model(temp_dir) - tf.io.gfile.rmtree(temp_dir) - return model + Returns: + Keras Model instance. + """ + # Note: we don't use a RAM path for this because zipfile cannot write + # to such paths. + temp_dir = tempfile.mkdtemp() + try: + filepath = os.path.join(temp_dir, "model.keras") + with open(filepath, "wb") as f: + f.write(serialized_model) + # When loading, direct import will work for most custom objects + # though it will require get_config() to be implemented. + # Some custom objects (e.g. an activation in a Dense layer, + # serialized as a string by Dense.get_config()) will require + # a custom_object_scope. + model = saving_lib.load_model(filepath, safe_mode=False) + except Exception as e: + raise e + else: + return model + finally: + tf.io.gfile.rmtree(temp_dir) def serialize_model_as_bytecode(model): - """Convert a Keras Model into a bytecode representation for pickling. + """Convert a Keras Model into a bytecode representation for pickling. - Args: - model: (tf.keras.Model) Keras Model instance. + Args: + model: Keras Model instance. - Returns: - tuple: tuple of arguments that can be sent to - `deserialize_from_bytecode`. - """ - temp_dir = f"ram://{uuid.uuid4()}" - model.save(temp_dir) - b = io.BytesIO() - with tarfile.open(fileobj=b, mode="w") as archive: - for root, dirs, filenames in tf.io.gfile.walk(temp_dir): - for dirname in dirs: - dest_path = tf.io.gfile.join(root, dirname) - t = tarfile.TarInfo(dest_path) - t.type = tarfile.DIRTYPE - archive.addfile(t) - for filename in filenames: - dest_path = tf.io.gfile.join(root, filename) - with tf.io.gfile.GFile(dest_path, "rb") as f: - info = tarfile.TarInfo(name=os.path.relpath(dest_path, temp_dir)) - info.size = f.size() - archive.addfile(tarinfo=info, fileobj=f) - tf.io.gfile.rmtree(temp_dir) - b.seek(0) - return (numpy.asarray(memoryview(b.read())),) + Returns: + Tuple that can be read by `deserialize_from_bytecode`. + """ + # Note: we don't use a RAM path for this because zipfile cannot write + # to such paths. + temp_dir = tempfile.mkdtemp() + try: + filepath = os.path.join(temp_dir, "model.keras") + saving_lib.save_model(model, filepath) + with open(filepath, "rb") as f: + data = f.read() + except Exception as e: + raise e + else: + return data + finally: + tf.io.gfile.rmtree(temp_dir) diff --git a/keras/saving/pickle_utils_test.py b/keras/saving/pickle_utils_test.py index c4f06d39b37b..0d487ea8422f 100644 --- a/keras/saving/pickle_utils_test.py +++ b/keras/saving/pickle_utils_test.py @@ -13,68 +13,86 @@ # limitations under the License. # ============================================================================== """Tests for pickling / deepcopying of Keras Models.""" -# pylint: disable=g-bad-import-order -import tensorflow.compat.v2 as tf - import copy import pickle + import numpy as np +import tensorflow.compat.v2 as tf from keras.testing_infra import test_combinations from keras.testing_infra import test_utils +@test_utils.run_v2_only class TestPickleProtocol(test_combinations.TestCase): - """Tests pickle protoocol support.""" + """Tests pickle protocol support.""" - @test_combinations.run_with_all_model_types - @test_combinations.parameterized.named_parameters( - ('copy', copy.copy), ('deepcopy', copy.deepcopy), - *((f'pickle_protocol_level_{protocol}', - lambda model: pickle.loads(pickle.dumps(model, protocol=protocol))) # pylint: disable=cell-var-from-loop - for protocol in range(pickle.HIGHEST_PROTOCOL + 1))) - def test_built_models(self, serializer): - """Built models should be copyable and picklable for all model types.""" - if not tf.__internal__.tf2.enabled(): - self.skipTest('pickle model only available in v2 when tf format is used.') - model = test_utils.get_small_mlp( - num_hidden=1, num_classes=2, input_dim=3) - model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy') + @test_combinations.run_with_all_model_types + @test_combinations.parameterized.named_parameters( + ("copy", copy.copy), + ("deepcopy", copy.deepcopy), + *( + ( + f"pickle_protocol_level_{protocol}", + lambda model: pickle.loads( + pickle.dumps(model, protocol=protocol) + ), + ) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1) + ), + ) + def test_built_models(self, serializer): + """Built models should be copyable and pickleable for all model + types.""" + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "pickle model only available in v2 when tf format is used." + ) + model = test_utils.get_small_mlp( + num_hidden=1, num_classes=2, input_dim=3 + ) + model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy") - # train - x = np.random.random(size=(1000, 3)) - y = np.random.randint(low=0, high=2, size=(1000,)) - model.fit(x, y) # builds model - y1 = model.predict(x) - # roundtrip with training - model = serializer(model) - y2 = model.predict(x) - # check that the predictions are the same - self.assertAllClose(y1, y2) - # and that we can continue training - model.fit(x, y) - y3 = model.predict(x) - # check that the predictions are the same - self.assertNotAllClose(y2, y3) + # train + x = np.random.random(size=(10, 3)) + y = np.random.randint(low=0, high=2, size=(10,)) + model.fit(x, y) # builds model + y1 = model.predict(x) + # roundtrip with training + model = serializer(model) + y2 = model.predict(x) + # check that the predictions are the same + self.assertAllClose(y1, y2) + # and that we can continue training + model.fit(x, y) + y3 = model.predict(x) + # check that the predictions are the same + self.assertNotAllClose(y2, y3) - @test_combinations.run_with_all_model_types - @test_combinations.parameterized.named_parameters( - ('copy', copy.copy), - ('deepcopy', copy.deepcopy), - ) - def test_unbuilt_models(self, serializer): - """Unbuilt models should be copyable & deepcopyable for all model types.""" - if not tf.__internal__.tf2.enabled(): - self.skipTest('pickle model only available in v2 when tf format is used.') - original_model = test_utils.get_small_mlp( - num_hidden=1, num_classes=2, input_dim=3) - # roundtrip without compiling or training - model = serializer(original_model) - # compile - model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy') - # roundtrip compiled but not trained - model = serializer(model) + @test_combinations.run_with_all_model_types + @test_combinations.parameterized.named_parameters( + ("copy", copy.copy), + ("deepcopy", copy.deepcopy), + ) + def test_unbuilt_models(self, serializer): + """Unbuilt models should be copyable & deepcopyable for all model + types.""" + if not tf.__internal__.tf2.enabled(): + self.skipTest( + "pickle model only available in v2 when tf format is used." + ) + original_model = test_utils.get_small_mlp( + num_hidden=1, num_classes=2, input_dim=3 + ) + # roundtrip without compiling or training + model = serializer(original_model) + # compile + model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy") + if hasattr(model.optimizer, "_distribution_strategy"): + model.optimizer._distribution_strategy = None + # roundtrip compiled but not trained + model = serializer(model) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/save.py b/keras/saving/save.py deleted file mode 100644 index 270a6cdca8b4..000000000000 --- a/keras/saving/save.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras model saving code.""" - -import tensorflow.compat.v2 as tf -from keras.saving import hdf5_format -from keras.saving import saving_utils -from keras.saving.saved_model import load as saved_model_load -from keras.saving.saved_model import load_context -from keras.saving.saved_model import save as saved_model_save -from keras.utils import generic_utils -from keras.utils import traceback_utils -from keras.utils.io_utils import path_to_string -from tensorflow.python.util.tf_export import keras_export - -# pylint: disable=g-import-not-at-top -try: - import h5py -except ImportError: - h5py = None -# pylint: enable=g-import-not-at-top - - -@keras_export('keras.models.save_model') -@traceback_utils.filter_traceback -def save_model(model, - filepath, - overwrite=True, - include_optimizer=True, - save_format=None, - signatures=None, - options=None, - save_traces=True): - # pylint: disable=line-too-long - """Saves a model as a TensorFlow SavedModel or HDF5 file. - - See the [Serialization and Saving guide](https://keras.io/guides/serialization_and_saving/) - for details. - - Usage: - - >>> model = tf.keras.Sequential([ - ... tf.keras.layers.Dense(5, input_shape=(3,)), - ... tf.keras.layers.Softmax()]) - >>> model.save('/tmp/model') - >>> loaded_model = tf.keras.models.load_model('/tmp/model') - >>> x = tf.random.uniform((10, 3)) - >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) - - Note that `model.save()` is an alias for `tf.keras.models.save_model()`. - - The SavedModel and HDF5 file contains: - - - the model's configuration (topology) - - the model's weights - - the model's optimizer's state (if any) - - Thus models can be reinstantiated in the exact same state, without any of the - code used for model definition or training. - - Note that the model weights may have different scoped names after being - loaded. Scoped names include the model/layer names, such as - `"dense_1/kernel:0"`. It is recommended that you use the layer properties to - access specific variables, e.g. `model.get_layer("dense_1").kernel`. - - __SavedModel serialization format__ - - Keras SavedModel uses `tf.saved_model.save` to save the model and all - trackable objects attached to the model (e.g. layers and variables). The model - config, weights, and optimizer are saved in the SavedModel. Additionally, for - every Keras layer attached to the model, the SavedModel stores: - - * the config and metadata -- e.g. name, dtype, trainable status - * traced call and loss functions, which are stored as TensorFlow subgraphs. - - The traced functions allow the SavedModel format to save and load custom - layers without the original class definition. - - You can choose to not save the traced functions by disabling the `save_traces` - option. This will decrease the time it takes to save the model and the - amount of disk space occupied by the output SavedModel. If you enable this - option, then you _must_ provide all custom class definitions when loading - the model. See the `custom_objects` argument in `tf.keras.models.load_model`. - - Args: - model: Keras model instance to be saved. - filepath: One of the following: - - String or `pathlib.Path` object, path where to save the model - - `h5py.File` object where to save the model - overwrite: Whether we should overwrite any existing model at the target - location, or instead ask the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - save_format: Either 'tf' or 'h5', indicating whether to save the model - to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' - in TF 1.X. - signatures: Signatures to save with the SavedModel. Applicable to the 'tf' - format only. Please see the `signatures` argument in - `tf.saved_model.save` for details. - options: (only applies to SavedModel format) `tf.saved_model.SaveOptions` - object that specifies options for saving to SavedModel. - save_traces: (only applies to SavedModel format) When enabled, the - SavedModel will store the function traces for each layer. This - can be disabled, so that only the configs of each layer are stored. - Defaults to `True`. Disabling this will decrease serialization time and - reduce file size, but it requires that all custom layers/models - implement a `get_config()` method. - - Raises: - ImportError: If save format is hdf5, and h5py is not available. - """ - # pylint: enable=line-too-long - from keras.engine import sequential # pylint: disable=g-import-not-at-top - - default_format = 'tf' if tf.__internal__.tf2.enabled() else 'h5' - save_format = save_format or default_format - - filepath = path_to_string(filepath) - - # If the user has not already called fit or built the underlying metrics, we - # should do that before saving to ensure the metric names have all - # appropriate name transformations applied. - saving_utils.try_build_compiled_arguments(model) - - if (save_format == 'h5' or - (h5py is not None and isinstance(filepath, h5py.File)) or - saving_utils.is_hdf5_filepath(filepath)): - # TODO(b/130258301): add utility method for detecting model type. - if (not model._is_graph_network and # pylint:disable=protected-access - not isinstance(model, sequential.Sequential)): - raise NotImplementedError( - 'Saving the model to HDF5 format requires the model to be a ' - 'Functional model or a Sequential model. It does not work for ' - 'subclassed models, because such models are defined via the body of ' - 'a Python method, which isn\'t safely serializable. Consider saving ' - 'to the Tensorflow SavedModel format (by setting save_format="tf") ' - 'or using `save_weights`.') - hdf5_format.save_model_to_hdf5( - model, filepath, overwrite, include_optimizer) - else: - with generic_utils.SharedObjectSavingScope(): - saved_model_save.save(model, filepath, overwrite, include_optimizer, - signatures, options, save_traces) - - -@keras_export('keras.models.load_model') -@traceback_utils.filter_traceback -def load_model(filepath, custom_objects=None, compile=True, options=None): # pylint: disable=redefined-builtin - """Loads a model saved via `model.save()`. - - Usage: - - >>> model = tf.keras.Sequential([ - ... tf.keras.layers.Dense(5, input_shape=(3,)), - ... tf.keras.layers.Softmax()]) - >>> model.save('/tmp/model') - >>> loaded_model = tf.keras.models.load_model('/tmp/model') - >>> x = tf.random.uniform((10, 3)) - >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) - - Note that the model weights may have different scoped names after being - loaded. Scoped names include the model/layer names, such as - `"dense_1/kernel:0"`. It is recommended that you use the layer properties to - access specific variables, e.g. `model.get_layer("dense_1").kernel`. - - Args: - filepath: One of the following: - - String or `pathlib.Path` object, path to the saved model - - `h5py.File` object from which to load the model - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - compile: Boolean, whether to compile the model - after loading. - options: Optional `tf.saved_model.LoadOptions` object that specifies - options for loading from SavedModel. - - Returns: - A Keras model instance. If the original model was compiled, and saved with - the optimizer, then the returned model will be compiled. Otherwise, the - model will be left uncompiled. In the case that an uncompiled model is - returned, a warning is displayed if the `compile` argument is set to - `True`. - - Raises: - ImportError: if loading from an hdf5 file and h5py is not available. - IOError: In case of an invalid savefile. - """ - with generic_utils.SharedObjectLoadingScope(): - with generic_utils.CustomObjectScope(custom_objects or {}): - with load_context.load_context(options): - filepath_str = path_to_string(filepath) - if isinstance(filepath_str, str): - if not tf.io.gfile.exists(filepath_str): - raise IOError(f'No file or directory found at {filepath_str}') - - if tf.io.gfile.isdir(filepath_str): - return saved_model_load.load(filepath_str, compile, options) - else: - if h5py is None: - raise ImportError( - 'Filepath looks like a hdf5 file but h5py is not available.' - f' filepath={filepath_str}') - return hdf5_format.load_model_from_hdf5( - tf.io.gfile.GFile(filepath_str, mode='rb'), custom_objects, - compile) - elif h5py is not None and isinstance(filepath, h5py.File): - return hdf5_format.load_model_from_hdf5(filepath, custom_objects, - compile) - - raise IOError( - 'Unable to load model. Filepath is not an hdf5 file (or h5py is not ' - f'available) or SavedModel. Received: filepath={filepath}') - -# Inject the load_model function to keras_deps to remove the dependency -# from TFLite to Keras. -tf.__internal__.register_load_model_function(load_model) diff --git a/keras/saving/save_test.py b/keras/saving/save_test.py deleted file mode 100644 index 27fde3a312f5..000000000000 --- a/keras/saving/save_test.py +++ /dev/null @@ -1,1385 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras model saving code.""" - -import tensorflow.compat.v2 as tf - -import collections -import os -import pathlib -import shutil -import tempfile -import warnings - -from absl.testing import parameterized -import numpy as np - -import keras -from keras import losses -from keras.optimizers import optimizer_v1 -from keras import optimizers -from keras.engine import functional -from keras.engine import sequential -from keras.feature_column import dense_features -from keras.feature_column import sequence_feature_column as ksfc -from keras.layers import core -from keras.premade_models.linear import LinearModel -from keras.saving import model_config -from keras.saving import save -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.utils import generic_utils - - -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - - -class TestSaveModel(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super().setUp() - self.model = test_utils.get_small_sequential_mlp(1, 2, 3) - self.subclassed_model = test_utils.get_small_subclass_mlp(1, 2) - - def assert_h5_format(self, path): - if h5py is not None: - self.assertTrue(h5py.is_hdf5(path), - 'Model saved at path {} is not a valid hdf5 file.' - .format(path)) - - def assert_saved_model(self, path): - tf.__internal__.saved_model.parse_saved_model(path) - - @test_utils.run_v2_only - def test_load_file_not_found(self): - path = pathlib.Path(self.get_temp_dir()) / 'does_not_exist' - with self.assertRaisesRegex(IOError, 'No file or directory found at'): - save.load_model(path) - - @test_utils.run_v2_only - def test_save_format_defaults(self): - path = os.path.join(self.get_temp_dir(), 'model_path') - save.save_model(self.model, path) - self.assert_saved_model(path) - - @test_utils.run_v2_only - def test_save_format_defaults_pathlib(self): - path = pathlib.Path(self.get_temp_dir()) / 'model_path' - save.save_model(self.model, path) - self.assert_saved_model(path) - - @test_utils.run_v2_only - def test_save_hdf5(self): - path = os.path.join(self.get_temp_dir(), 'model') - save.save_model(self.model, path, save_format='h5') - self.assert_h5_format(path) - with self.assertRaisesRegex( - NotImplementedError, - 'requires the model to be a Functional model or a Sequential model.'): - save.save_model(self.subclassed_model, path, save_format='h5') - - @test_utils.run_v2_only - def test_save_load_hdf5_pathlib(self): - path = pathlib.Path(self.get_temp_dir()) / 'model' - save.save_model(self.model, path, save_format='h5') - save.load_model(path) - - @test_utils.run_v2_only - def test_save_tf(self): - path = os.path.join(self.get_temp_dir(), 'model') - save.save_model(self.model, path, save_format='tf') - self.assert_saved_model(path) - with self.assertRaisesRegex( - ValueError, r'Model.*cannot be saved.*as opposed to `model.call\(\).*'): - save.save_model(self.subclassed_model, path, save_format='tf') - self.subclassed_model.predict(np.random.random((3, 5))) - save.save_model(self.subclassed_model, path, save_format='tf') - self.assert_saved_model(path) - - @test_utils.run_v2_only - def test_save_load_tf_string(self): - path = os.path.join(self.get_temp_dir(), 'model') - save.save_model(self.model, path, save_format='tf') - save.load_model(path) - - @test_utils.run_v2_only - def test_save_load_tf_pathlib(self): - path = pathlib.Path(self.get_temp_dir()) / 'model' - save.save_model(self.model, path, save_format='tf') - save.load_model(path) - - @test_utils.run_v2_only - def test_save_load_weights_tf_pathlib(self): - path = pathlib.Path(self.get_temp_dir()) / 'model' - self.model.save_weights(path, save_format='tf') - self.model.load_weights(path) - - @test_utils.run_v2_only - def test_save_load_weights_hdf5_pathlib(self): - path = pathlib.Path(self.get_temp_dir()) / 'model' - self.model.save_weights(path, save_format='h5') - self.model.load_weights(path) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_saving_h5_for_rnn_layers(self): - # See https://github.com/tensorflow/tensorflow/issues/35731 for details. - inputs = keras.Input([10, 91], name='train_input') - rnn_layers = [ - keras.layers.LSTMCell(size, recurrent_dropout=0, name='rnn_cell%d' % i) - for i, size in enumerate([512, 512]) - ] - rnn_output = keras.layers.RNN( - rnn_layers, return_sequences=True, name='rnn_layer')(inputs) - pred_feat = keras.layers.Dense(91, name='prediction_features')(rnn_output) - pred = keras.layers.Softmax()(pred_feat) - model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat]) - path = os.path.join(self.get_temp_dir(), 'model_path.h5') - model.save(path) - - # Make sure the variable name is unique. - self.assertNotEqual(rnn_layers[0].kernel.name, - rnn_layers[1].kernel.name) - self.assertIn('rnn_cell1', rnn_layers[1].kernel.name) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_saving_optimizer_weights(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.layer = keras.layers.Dense(1) - - def call(self, x): - return self.layer(x) - - path = os.path.join(self.get_temp_dir(), 'weights_path') - x, y = np.ones((10, 10)), np.ones((10, 1)) - - model = MyModel() - model.compile('rmsprop', loss='bce') - model.train_on_batch(x, y) - model.reset_metrics() - model.save_weights(path, save_format='tf') - - batch_loss = model.train_on_batch(x, y) - - new_model = MyModel() - new_model.compile('rmsprop', loss='bce') - new_model.train_on_batch(x, y) - new_model.reset_metrics() - - new_model.load_weights(path) - new_batch_loss = new_model.train_on_batch(x, y) - - self.assertAllClose(batch_loss, new_batch_loss) - - @test_combinations.generate( - test_combinations.combine(mode=['eager', 'graph'])) - def test_save_include_optimizer_false(self): - - def get_variables(file_name): - reader = tf.train.load_checkpoint( - os.path.join(file_name, 'variables/variables')) - shape_from_key = reader.get_variable_to_shape_map() - return sorted(shape_from_key.keys()) - - path = os.path.join(self.get_temp_dir(), 'no_optimizer') - x, y = np.ones((10, 10)), np.ones((10, 1)) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(1)) - model.compile('adam', loss='mse') - model.train_on_batch(x, y) - model.save(path, save_format='tf', include_optimizer=False) - variables = get_variables(path) - - for v in variables: - self.assertNotIn('optimizer', v) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_saving_model_with_custom_object(self): - with generic_utils.custom_object_scope(), self.cached_session(): - - @generic_utils.register_keras_serializable() - class CustomLoss(losses.MeanSquaredError): - pass - - model = sequential.Sequential( - [core.Dense(units=1, input_shape=(1,))]) - model.compile(optimizer='sgd', loss=CustomLoss()) - model.fit(np.zeros([10, 1]), np.zeros([10, 1])) - - temp_dir = self.get_temp_dir() - filepath = os.path.join(temp_dir, 'saving') - model.save(filepath) - - # Make sure the model can be correctly load back. - _ = save.load_model(filepath, compile=True) - - def test_saving_model_with_name_conflict(self): - - class Sequential(keras.Model): - - def __init__(self): - super().__init__() - self.layer = keras.layers.Dense(1) - - def call(self, x): - return self.layer(x) - - model = Sequential() - model(tf.ones((10, 10))) - temp_dir = self.get_temp_dir() - filepath = os.path.join(temp_dir, 'Sequential') - - with self.assertLogs() as logs: - model.save(filepath, save_format='tf') - - expected_substring = 'has the same name \'Sequential\' as a built-in Keras' - matched = [log for log in logs.output if expected_substring in log] - self.assertNotEmpty(matched) - - def test_saving_built_in_model(self): - model = LinearModel() - model(tf.constant([[5.]])) - temp_dir = self.get_temp_dir() - filepath = os.path.join(temp_dir, 'LinearModel') - with self.assertLogs() as logs: - model.save(filepath, save_format='tf') - - expected_substring = 'has the same name \'LinearModel\' as a built-in Keras' - matched = [log for log in logs.output if expected_substring in log] - # Check that a warning is *not* logged for a premade model. - self.assertEmpty(matched) - - -@generic_utils.register_keras_serializable(package='Foo') -class RegisteredSubLayer(keras.layers.Layer): - pass - - -class TestJson(test_combinations.TestCase): - """Tests to_json()/from_json().""" - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_saving_with_dense_features(self): - cols = [ - tf.feature_column.numeric_column('a'), - tf.feature_column.indicator_column( - tf.feature_column.categorical_column_with_vocabulary_list( - 'b', ['one', 'two'])) - ] - input_layers = { - 'a': keras.layers.Input(shape=(1,), name='a'), - 'b': keras.layers.Input(shape=(1,), name='b', dtype='string') - } - - fc_layer = dense_features.DenseFeatures(cols)(input_layers) - output = keras.layers.Dense(10)(fc_layer) - - model = keras.models.Model(input_layers, output) - - model.compile( - loss=keras.losses.MSE, - optimizer='rmsprop', - metrics=[keras.metrics.categorical_accuracy]) - - config = model.to_json() - loaded_model = model_config.model_from_json(config) - - inputs_a = np.arange(10).reshape(10, 1) - inputs_b = np.arange(10).reshape(10, 1).astype('str') - - with self.cached_session(): - # Initialize tables for V1 lookup. - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_saving_with_sequence_features(self): - cols = [ - tf.feature_column.sequence_numeric_column('a'), - tf.feature_column.indicator_column( - tf.feature_column.sequence_categorical_column_with_vocabulary_list( - 'b', ['one', 'two'])) - ] - input_layers = { - 'a': - keras.layers.Input(shape=(None, 1), sparse=True, name='a'), - 'b': - keras.layers.Input( - shape=(None, 1), sparse=True, name='b', dtype='string') - } - - fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) - # TODO(tibell): Figure out the right dtype and apply masking. - # sequence_length_mask = array_ops.sequence_mask(sequence_length) - # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) - x = keras.layers.GRU(32)(fc_layer) - output = keras.layers.Dense(10)(x) - - model = keras.models.Model(input_layers, output) - - model.compile( - loss=keras.losses.MSE, - optimizer='rmsprop', - metrics=[keras.metrics.categorical_accuracy]) - - config = model.to_json() - loaded_model = model_config.model_from_json(config) - - batch_size = 10 - timesteps = 1 - - values_a = np.arange(10, dtype=np.float32) - indices_a = np.zeros((10, 3), dtype=np.int64) - indices_a[:, 0] = np.arange(10) - inputs_a = tf.SparseTensor(indices_a, values_a, - (batch_size, timesteps, 1)) - - values_b = np.zeros(10, dtype=np.str) - indices_b = np.zeros((10, 3), dtype=np.int64) - indices_b[:, 0] = np.arange(10) - inputs_b = tf.SparseTensor(indices_b, values_b, - (batch_size, timesteps, 1)) - - with self.cached_session(): - # Initialize tables for V1 lookup. - if not tf.executing_eagerly(): - self.evaluate(tf.compat.v1.tables_initializer()) - - self.assertLen( - loaded_model.predict({ - 'a': inputs_a, - 'b': inputs_b - }, steps=1), batch_size) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_nested_layers(self): - - class MyLayer(keras.layers.Layer): - - def __init__(self, sublayers, **kwargs): - super().__init__(**kwargs) - self.sublayers = sublayers - - def get_config(self): - config = super().get_config() - config['sublayers'] = self.sublayers - return config - - layer = MyLayer([keras.layers.Dense(2, name='MyDense'), - RegisteredSubLayer(name='MySubLayer')]) - model = keras.Sequential([keras.Input([None]), layer]) - model_json = model.to_json() - - self.assertIn('Foo>RegisteredSubLayer', model_json) - - loaded_model = model_config.model_from_json( - model_json, custom_objects={'MyLayer': MyLayer}) - loaded_layer = loaded_model.layers[0] - self.assertIsInstance(loaded_layer.sublayers[0], keras.layers.Dense) - self.assertEqual(loaded_layer.sublayers[0].name, 'MyDense') - self.assertIsInstance(loaded_layer.sublayers[1], RegisteredSubLayer) - self.assertEqual(loaded_layer.sublayers[1].name, 'MySubLayer') - - -class MaskedTensor(tf.experimental.ExtensionType): - __name__ = 'MaskedTensor_save_test' - values: tf.Tensor - mask: tf.Tensor - class Spec(tf.TypeSpec): - - @property - def shape(self): - return self.values.shape - - @property - def dtype(self): - return self.values.dtype - - def with_shape(self, shape): - values_spec = tf.TensorSpec( - shape, dtype=self.values.dtype, name=self.values.name) - mask_spec = tf.TensorSpec( - shape, dtype=self.mask.dtype, name=self.mask.name) - return MaskedTensor.Spec(values_spec, mask_spec) - - -@test_combinations.run_with_all_saved_model_formats -class TestWholeModelSaving(test_combinations.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - def _assert_same_weights_and_metrics(self, model, loaded_model): - """Checks that the loaded weights and metrics are the same as the original. - - Args: - model: original model - loaded_model: loaded model - """ - self.assertAllClose(model.weights, loaded_model.weights) - - if loaded_model.optimizer: - if test_utils.get_save_format() == 'tf': - # TODO(b/153110928): Keras TF format doesn't restore optimizer weights - # currently. - return - self.assertAllClose(model.optimizer.weights, - loaded_model.optimizer.weights) - - # In V1/Graph mode, the model isn't built, so the metrics are not loaded - # immediately (requires model to be called on some data before building - # metrics). - check_metrics = tf.__internal__.tf2.enabled() and tf.executing_eagerly() - - if check_metrics: - self.assertAllEqual([m.name for m in model.metrics], - [m.name for m in loaded_model.metrics]) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_save_and_load(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - save_kwargs = test_utils.get_save_kwargs() - - if ((save_format == 'h5' or not save_kwargs.get('save_traces', True)) and - test_utils.get_model_type() == 'subclass'): - # HDF5 format currently does not allow saving subclassed models. - # When saving with `save_traces=False`, the subclassed model must have a - # get_config/from_config, which the autogenerated model does not have. - return - - with self.cached_session(): - model = test_utils.get_model_from_layers( - [keras.layers.Dense(2), - keras.layers.RepeatVector(3), - keras.layers.TimeDistributed(keras.layers.Dense(3))], - input_shape=(3,)) - model.compile( - loss=keras.losses.MSE, - optimizer=keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.0001), - metrics=[ - keras.metrics.categorical_accuracy, - keras.metrics.CategoricalCrossentropy( - name='cce', label_smoothing=tf.constant(0.2)), - ], - weighted_metrics=[ - keras.metrics.categorical_crossentropy, - keras.metrics.CategoricalCrossentropy( - name='cce', label_smoothing=tf.constant(0.2)), - ], - sample_weight_mode='temporal') - - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - keras.models.save_model( - model, saved_model_dir, save_format=save_format, - **save_kwargs) - - loaded_model = keras.models.load_model(saved_model_dir) - self._assert_same_weights_and_metrics(model, loaded_model) - - out2 = loaded_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - eval_out = model.evaluate(x, y) - eval_out2 = loaded_model.evaluate(x, y) - self.assertArrayNear(eval_out, eval_out2, 0.001) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sequential_model_saving_without_input_shape(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2)) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - model.compile( - loss=keras.losses.MSE, - optimizer='rmsprop', - metrics=[ - keras.metrics.categorical_accuracy, - keras.metrics.CategoricalAccuracy(name='cat_acc') - ], - weighted_metrics=[ - keras.metrics.categorical_accuracy, - keras.metrics.CategoricalAccuracy(name='cat_acc2') - ], - sample_weight_mode='temporal') - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - model.save(saved_model_dir, save_format=save_format) - - new_model = keras.models.load_model(saved_model_dir) - - self._assert_same_weights_and_metrics(model, new_model) - - out2 = new_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_sequential_model_saving_without_compile(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - - x = np.random.random((1, 3)) - out = model.predict(x) - - # Save the model without any compilation or training. - keras.models.save_model(model, saved_model_dir, save_format=save_format) - - new_model = keras.models.load_model(saved_model_dir) - self._assert_same_weights_and_metrics(model, new_model) - - out2 = new_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_sequential_model_saving_2(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - - with tf.Graph().as_default(), self.cached_session(): - # test with custom optimizer, loss - - class CustomOp(optimizer_v1.RMSprop): - pass - - def custom_loss(y_true, y_pred): - return keras.losses.mse(y_true, y_pred) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - keras.models.save_model(model, saved_model_dir, save_format=save_format) - - new_model = keras.models.load_model( - saved_model_dir, - custom_objects={'CustomOp': CustomOp, - 'custom_loss': custom_loss}) - self._assert_same_weights_and_metrics(model, new_model) - - out2 = new_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_saving_without_compilation(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - def test_saving_with_tf_optimizer(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', - optimizer=tf.compat.v1.train.AdadeltaOptimizer(0.1), - metrics=['acc']) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - def test_saving_right_after_compilation(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - if not tf.compat.v1.executing_eagerly_outside_functions(): - model._make_train_function() - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - def test_saving_lambda_numpy_array_arguments(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - - if h5py is None: - self.skipTest('h5py required to run this test') - - mean = np.random.random((4, 2, 3)) - std = np.abs(np.random.random((4, 2, 3))) + 1e-5 - inputs = keras.layers.Input(shape=(4, 2, 3)) - output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, - arguments={'mu': mean, 'std': std})(inputs) - model = keras.models.Model(inputs, output) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - - model = keras.models.load_model(saved_model_dir) - - self.assertAllClose(mean, model.layers[1].arguments['mu']) - self.assertAllClose(std, model.layers[1].arguments['std']) - - def test_saving_model_with_long_layer_names(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - with self.cached_session(): - # This layer name will make the `layers_name` HDF5 attribute blow - # out of proportion. Note that it fits into the internal HDF5 - # attribute memory limit on its own but because h5py converts - # the list of layer names into numpy array, which uses the same - # amount of memory for every item, it increases the memory - # requirements substantially. - x = keras.Input(shape=(2,), name='input_' + ('x' * (2**15))) - f = x - for i in range(4): - f = keras.layers.Dense(2, name='dense_%d' % (i,))(f) - model = keras.Model(inputs=[x], outputs=[f]) - model.compile( - 'adam', loss=keras.losses.MeanSquaredError(), metrics=['acc']) - - x = np.random.random((1, 2)) - y = np.random.random((1, 2)) - model.train_on_batch(x, y) - out = model.predict(x) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - if save_format in ['tf', 'tensorflow']: - return - # Check that the HDF5 files contains chunked array - # of layer names. - with h5py.File(saved_model_dir, 'r') as h5file: - num_names_arrays = len([attr for attr in h5file['model_weights'].attrs - if attr.startswith('layer_names')]) - # The chunking of layer names array should have happened. - self.assertGreater(num_names_arrays, 0) - out2 = model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_saving_model_with_long_weights_names(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - - with self.cached_session(): - x = keras.Input(shape=(2,), name='nested_model_input') - f = x - for i in range(4): - f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f) - # This layer name will make the `weights_name` - # HDF5 attribute blow out of proportion. - f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f) - nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model') - - x = keras.Input(shape=(2,), name='outer_model_input') - f = nested_model(x) - f = keras.layers.Dense(2, name='outer_model_output')(f) - - model = keras.Model(inputs=[x], outputs=[f]) - model.compile(loss='mse', optimizer='adam', metrics=['acc']) - - x = np.random.random((1, 2)) - y = np.random.random((1, 2)) - model.train_on_batch(x, y) - out = model.predict(x) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - if save_format in ['h5', 'hdf5', 'keras']: - # Check that the HDF5 files contains chunked array - # of weight names. - with h5py.File(saved_model_dir, 'r') as h5file: - num_weight_arrays = len( - [attr for attr in h5file['model_weights']['nested_model'].attrs - if attr.startswith('weight_names')]) - # The chunking of layer names array should have happened. - self.assertGreater(num_weight_arrays, 0) - out2 = model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_model_saving_to_pre_created_h5py_file(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - with tf.Graph().as_default(), self.cached_session(): - inputs = keras.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - outputs = keras.layers.Dense(3)(x) - - model = keras.Model(inputs, outputs) - model.compile( - loss=keras.losses.MSE, - optimizer=optimizer_v1.Adam(), - metrics=[ - keras.metrics.categorical_accuracy, - keras.metrics.CategoricalAccuracy() - ]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - loaded_model = keras.models.load_model(saved_model_dir) - out1 = loaded_model.predict(x) - self.assertAllClose(out, out1, atol=1e-05) - if save_format in ['tf', 'tensorflow']: - return - - # Test h5 format specifically - fd, fname = tempfile.mkstemp('.h5') - with h5py.File(fname, mode='r+') as h5file: - keras.models.save_model(model, h5file) - loaded_model = keras.models.load_model(h5file) - out2 = loaded_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - # Test non-default options in h5 - with h5py.File( - '_', driver='core', mode='w', backing_store=False) as h5file: - keras.models.save_model(model, h5file) - loaded_model = keras.models.load_model(h5file) - out2 = loaded_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - # Cleanup - os.close(fd) - os.remove(fname) - - def test_model_saving_to_new_dir_path(self): - saved_model_dir = os.path.join(self._save_model_dir(), 'newdir', - 'saved_model') - save_format = test_utils.get_save_format() - - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - - x = np.random.random((1, 3)) - out = model.predict(x) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - - new_model = keras.models.load_model(saved_model_dir) - self._assert_same_weights_and_metrics(model, new_model) - - out2 = new_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_model_raise_exception_with_failed_saving(self): - if h5py is None: - self.skipTest('h5py required to run this test') - - saved_model_dir = self._save_model_dir() - saved_model_path = os.path.join(saved_model_dir, 'saved_model.h5') - - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - - with self.assertRaisesRegex(OSError, 'Unable to create file'): - with h5py.File(saved_model_path, 'w'): - keras.models.save_model(model, saved_model_path) - - def test_saving_constant_initializer_with_numpy(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - - model = keras.models.Sequential() - model.add( - keras.layers.Dense( - 2, - input_shape=(3,), - kernel_initializer=keras.initializers.Constant(np.ones((3, 2))))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - def test_saving_group_naming_h5py(self): - # Test saving model with layer which name is prefix to a previous layer - # name. - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - h5_path = os.path.join(temp_dir, 'test.h5') - - input_layer = keras.layers.Input((None, None, 3), name='test_input') - x = keras.layers.Conv2D(1, 1, name='conv1/conv')(input_layer) - x = keras.layers.Activation('relu', name='conv1')(x) - model = keras.models.Model(inputs=input_layer, outputs=x) - - model.save_weights(h5_path) - model.load_weights(h5_path) - - def test_primitive_attrs_contain_no_extraneous_strings(self): - if h5py is None: - self.skipTest('h5py required to run this test') - - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_shape=[2])) - model.save(saved_model_dir, save_format=save_format) - if save_format in ['tf', 'tensorflow']: - return - - h5file = h5py.File(saved_model_dir, 'r') - self.assertRegex(h5file.attrs['keras_version'], r'^[\d]+\.[\d]+\.[\S]+$') - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_functional_model_with_custom_loss_and_metric(self): - def _make_model(): - inputs = keras.Input(shape=(4,)) - x = keras.layers.Dense(8, activation='relu')(inputs) - outputs = keras.layers.Dense(3, activation='softmax')(x) - model = keras.Model(inputs=inputs, outputs=outputs) - custom_loss = keras.layers.Lambda(lambda x: keras.backend.sum(x * x))(x) - model.add_loss(custom_loss) - model.add_metric(custom_loss, aggregation='mean', name='custom_loss') - return model - - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - - with self.cached_session(): - model = _make_model() - model.compile( - loss=keras.losses.SparseCategoricalCrossentropy(), - optimizer=optimizers.gradient_descent_v2.SGD(), - metrics=[keras.metrics.SparseCategoricalCrossentropy()]) - x = np.random.normal(size=(32, 4)) - y = np.random.randint(0, 3, size=32) - model.train_on_batch(x, y) - evaluation_results = model.evaluate(x, y) - # Save and reload model. - model.save(saved_model_dir, save_format=save_format) - del model # Prevent misuse. - loaded_model = keras.models.load_model(saved_model_dir) - loaded_model_eval_results = loaded_model.evaluate(x, y) - # Assert all evaluation results are the same. - self.assertAllClose(evaluation_results, loaded_model_eval_results, 1e-9) - # Check correctness of the loss calculation. - self.assertAllGreater(evaluation_results, 0.) - evaluation_results = dict( - zip(loaded_model.metrics_names, evaluation_results)) - self.assertNear( - evaluation_results['sparse_categorical_crossentropy'] + - evaluation_results['custom_loss'], evaluation_results['loss'], 1e-6) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_save_uncompiled_model_with_optimizer(self): - with self.cached_session() as session: - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(3,))]) - # Set the model's optimizer but don't compile. This can happen if the - # model is trained with a custom training loop. - model.optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.0001) - if not tf.executing_eagerly(): - session.run([v.initializer for v in model.variables]) - model.save(saved_model_dir, save_format=save_format) - - if save_format in ['tf', 'tensorflow']: - loaded = keras.models.load_model(saved_model_dir) - self.assertIsInstance( - loaded.optimizer, - keras.optimizers.optimizer_v2.optimizer_v2.OptimizerV2) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_functional_model_with_getitem_op_layer(self): - inp = keras.Input(shape=(8)) - - out = inp[:] - model = keras.Model( - inputs=[inp], - outputs=out) - batch_size = 7 - x = tf.stack([ - tf.range(8) for _ in range(batch_size)]) - args = [x] - expected = x[:] - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded. - save_format = test_utils.get_save_format() - saved_model_dir = self._save_model_dir() - keras.models.save_model(model, saved_model_dir, save_format=save_format) - - loaded_model = keras.models.load_model(saved_model_dir) - - self.assertAllEqual(loaded_model(args), expected) - self.assertAllEqual(loaded_model.predict(args, batch_size=batch_size), - expected) - - @test_combinations.generate(test_combinations.combine( - mode=['eager', 'graph'])) - def test_custom_functional_registered(self): - - def _get_cls_definition(): - class CustomModel(keras.Model): - - def c(self): - return 'c' - - return CustomModel - - cls = _get_cls_definition() - self.assertEqual(cls.__bases__[0], keras.Model) - - with self.cached_session() as sess: - input_ = keras.layers.Input(shape=(1,)) - output = keras.layers.Dense(1)(input_) - model = cls(input_, output) - # `cls` now inherits from `Functional` class. - self.assertEqual(cls.__bases__[0], functional.Functional) - - if not tf.executing_eagerly(): - sess.run([v.initializer for v in model.variables]) - - save_format = test_utils.get_save_format() - saved_model_dir = self._save_model_dir() - keras.models.save_model(model, saved_model_dir, save_format=save_format) - - loaded_model = keras.models.load_model( - saved_model_dir, custom_objects={'CustomModel': cls}) - self.assertIsInstance(loaded_model, cls) - - # Check with "new" `CustomModel` class definition. - new_cls = _get_cls_definition() - # The new `CustomModel` class is *not* derived from `Functional`. - self.assertEqual(new_cls.__bases__[0], keras.Model) - reloaded_model = keras.models.load_model( - saved_model_dir, custom_objects={'CustomModel': new_cls}) - self.assertIsInstance(reloaded_model, new_cls) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_shared_objects(self): - class OuterLayer(keras.layers.Layer): - - def __init__(self, inner_layer): - super().__init__() - self.inner_layer = inner_layer - - def call(self, inputs): - return self.inner_layer(inputs) - - def get_config(self): - return { - 'inner_layer': generic_utils.serialize_keras_object( - self.inner_layer) - } - - @classmethod - def from_config(cls, config): - return cls(generic_utils.deserialize_keras_object( - config['inner_layer'])) - - class InnerLayer(keras.layers.Layer): - - def __init__(self): - super().__init__() - self.v = self.add_weight(name='v', shape=[], dtype=tf.float32) - - def call(self, inputs): - return self.v + inputs - - @classmethod - def from_config(cls, config): - return cls() - - # Create a model with 2 output layers that share the same inner layer. - inner_layer = InnerLayer() - outer_layer_1 = OuterLayer(inner_layer) - outer_layer_2 = OuterLayer(inner_layer) - input_ = keras.Input(shape=(1,)) - model = keras.Model( - inputs=input_, outputs=[outer_layer_1(input_), outer_layer_2(input_)]) - - # Changes to the shared layer should affect both outputs. - model.layers[1].inner_layer.v.assign(5) - self.assertAllEqual(model(1), [6.0, 6.0]) - model.layers[1].inner_layer.v.assign(3) - self.assertAllEqual(model(1), [4.0, 4.0]) - - # After loading, changes to the shared layer should still affect both - # outputs. - def _do_assertions(loaded): - loaded.layers[1].inner_layer.v.assign(5) - self.assertAllEqual(loaded(1), [6.0, 6.0]) - loaded.layers[1].inner_layer.v.assign(3) - self.assertAllEqual(loaded(1), [4.0, 4.0]) - loaded.layers[2].inner_layer.v.assign(5) - self.assertAllEqual(loaded(1), [6.0, 6.0]) - loaded.layers[2].inner_layer.v.assign(3) - self.assertAllEqual(loaded(1), [4.0, 4.0]) - - # We'd like to make sure we only attach shared object IDs when strictly - # necessary, so we'll recursively traverse the generated config to count - # whether we have the exact number we expect. - def _get_all_keys_recursive(dict_or_iterable): - if isinstance(dict_or_iterable, dict): - for key in dict_or_iterable.keys(): - yield key - for key in _get_all_keys_recursive(dict_or_iterable.values()): - yield key - elif isinstance(dict_or_iterable, str): - return - else: - try: - for item in dict_or_iterable: - for key in _get_all_keys_recursive(item): - yield key - # Not an iterable or dictionary - except TypeError: - return - - with generic_utils.CustomObjectScope({ - 'OuterLayer': OuterLayer, 'InnerLayer': InnerLayer}): - - # Test saving and loading to disk - save_format = test_utils.get_save_format() - saved_model_dir = self._save_model_dir() - keras.models.save_model(model, saved_model_dir, save_format=save_format) - loaded = keras.models.load_model(saved_model_dir) - _do_assertions(loaded) - - # Test recreating directly from config - config = model.get_config() - key_count = collections.Counter(_get_all_keys_recursive(config)) - self.assertEqual(key_count[generic_utils.SHARED_OBJECT_KEY], 2) - loaded = keras.Model.from_config(config) - _do_assertions(loaded) - - @test_combinations.generate(test_combinations.combine(mode=['eager'])) - def test_shared_objects_wrapper(self): - """Tests that shared layers wrapped with `Wrapper` restore correctly.""" - input_ = keras.Input(shape=(1,)) - unwrapped = keras.layers.Layer(name='unwrapped') - wrapped = keras.layers.Wrapper(unwrapped, name='wrapped') - model = keras.Model(inputs=input_, - outputs=[unwrapped(input_), wrapped(input_)]) - - # Test recreating directly from config - config = model.get_config() - loaded = keras.Model.from_config(config) - self.assertIs(loaded.layers[1], loaded.layers[2].layer) - - # Test saving and loading to disk - save_format = test_utils.get_save_format() - saved_model_dir = self._save_model_dir() - keras.models.save_model(model, saved_model_dir, save_format=save_format) - loaded = keras.models.load_model(saved_model_dir) - self.assertIs(loaded.layers[1], loaded.layers[2].layer) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'], fit=[True, False])) - def test_multi_output_metrics_name_stay_same(self, fit): - """Tests that metric names don't change with each save/load cycle. - - e.g. "head_0_accuracy" should not become "head_0_head_0_accuracy" after - saving and loading a model. - - Arguments: - fit: Whether the model should be fit before saving. - """ - # This doesn't work at all, so we can't check whether metric names are - # correct. - if not tf.executing_eagerly() and not fit: - self.skipTest('b/181767784') - - input_ = keras.Input((4,)) - model = keras.Model( - input_, - [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)), - keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))]) - metric = keras.metrics.BinaryAccuracy() - model.compile(optimizer='rmsprop', - loss='mse', - metrics={'head_0': [metric, 'accuracy']}) - - x = np.random.rand(2, 4) - y = {'head_0': np.random.randint(2, size=(2, 3)), - 'head_1': np.random.randint(2, size=(2, 5))} - - # Make sure metrix prefixing works the same regardless of whether the user - # has fit the model before saving. - if fit: - model.fit(x, y, verbose=0) - - # Save and reload. - save_format = test_utils.get_save_format() - saved_model_dir = self._save_model_dir() - keras.models.save_model(model, saved_model_dir, save_format=save_format) - loaded = keras.models.load_model(saved_model_dir) - - # Make sure the metrics names from the model before saving match the loaded - # model. - self.assertSequenceEqual(model.metrics_names, loaded.metrics_names) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_warning_when_saving_invalid_custom_mask_layer(self): - - class MyMasking(keras.layers.Layer): - - def call(self, inputs): - return inputs - - def compute_mask(self, inputs, mask=None): - mask = tf.not_equal(inputs, 0) - return mask - - class MyLayer(keras.layers.Layer): - - def call(self, inputs, mask=None): - return tf.identity(inputs) - - samples = np.random.random((2, 2)) - model = keras.Sequential([MyMasking(), MyLayer()]) - model.predict(samples) - with warnings.catch_warnings(record=True) as w: - model.save(self._save_model_dir(), test_utils.get_save_format()) - self.assertIn(generic_utils.CustomMaskWarning, - {warning.category for warning in w}) - - # Test that setting up a custom mask correctly does not issue a warning. - class MyCorrectMasking(keras.layers.Layer): - - def call(self, inputs): - return inputs - - def compute_mask(self, inputs, mask=None): - mask = tf.not_equal(inputs, 0) - return mask - - # This get_config doesn't actually do anything because our mask is - # static and doesn't need any external information to work. We do need a - # dummy get_config method to prevent the warning from appearing, however. - def get_config(self, *args, **kwargs): - return {} - - model = keras.Sequential([MyCorrectMasking(), MyLayer()]) - model.predict(samples) - with warnings.catch_warnings(record=True) as w: - model.save(self._save_model_dir(), test_utils.get_save_format()) - self.assertNotIn(generic_utils.CustomMaskWarning, - {warning.category for warning in w}) - - # Test only in eager mode because ragged tensor inputs - # cannot be used in graph mode. - @test_combinations.generate( - test_combinations.combine(mode=['eager'])) - @test_utils.run_v2_only - def test_save_functional_with_ragged_constant_input(self): - input1 = keras.Input(shape=[]) - input2 = tf.ragged.constant([[1., 2.], [3.]]) - outputs = keras.layers.Add()([input1, input2]) - model = keras.Model(input1, outputs) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir) - keras.models.load_model(saved_model_dir) - - @test_combinations.generate( - test_combinations.combine(mode=['eager'])) - @test_utils.run_v2_only - def test_save_functional_with_constant_input(self): - input1 = keras.Input(shape=[2]) - input2 = tf.constant([[1., 2.]]) - outputs = keras.layers.Add()([input1, input2]) - model = keras.Model(input1, outputs) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir) - keras.models.load_model(saved_model_dir) - - @test_combinations.generate( - test_combinations.combine(mode=['eager'])) - @test_utils.run_v2_only - def test_save_inputs_spec_with_composite_tensor_names(self): - - class KerasModel(keras.Model): - - def call(self, inputs): - return inputs - - spec = MaskedTensor.Spec( - tf.TensorSpec([None], name='x__values'), - tf.TensorSpec([None], dtype=tf.bool, name='x__mask') - ) - km1 = KerasModel() - inputs = keras.Input(type_spec=spec) - km1(inputs) - self.assertEqual(km1.save_spec()[0][0].mask.name, 'x__mask') - - -# Factory functions to create models that will be serialized inside a Network. -def _make_graph_network(input_size, output_size): - inputs = keras.Input(input_size) - x = keras.layers.Dense(8, activation='relu')(inputs) - y = keras.layers.Dense(output_size)(x) - return keras.Model(inputs=inputs, outputs=y) - - -def _make_sequential(input_size, output_size): - del input_size - return keras.Sequential([ - keras.layers.Dense(8, activation='relu'), - keras.layers.Dense(output_size), - ]) - - -def _make_sequential_built(input_size, output_size): - model = _make_sequential(input_size, output_size) - model.build((None, input_size)) - return model - - -def _make_sequential_graph_network(input_size, output_size): - return keras.Sequential([ - keras.layers.InputLayer(input_size), - keras.layers.Dense(8, activation='relu'), - keras.layers.Dense(output_size), - ]) - - -def _make_sequential_input_shape(input_size, output_size): - return keras.Sequential([ - keras.layers.Dense(8, activation='relu', input_shape=(input_size,)), - keras.layers.Dense(output_size), - ]) - - -class _make_subclassed(keras.Model): # pylint: disable=invalid-name - - def __init__(self, input_size, output_size): - super().__init__() - self._config = {'input_size': input_size, 'output_size': output_size} - self._hidden_layer = keras.layers.Dense(8, activation='relu', name='hidden') - self._logits_layer = keras.layers.Dense(output_size, name='logits') - - def call(self, inputs): - x = self._hidden_layer(inputs) - return self._logits_layer(x) - - def get_config(self): - return self._config - - @classmethod - def from_config(cls, config): - return cls(**config) - - -class _make_subclassed_built(_make_subclassed): # pylint: disable=invalid-name - - def __init__(self, input_size, output_size): - super().__init__(input_size, output_size) - self.build((None, input_size)) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TestWholeModelSavingWithNesting(tf.test.TestCase, parameterized.TestCase): - """Tests saving a whole model that contains other models.""" - - @parameterized.named_parameters([ - ('graph_network', _make_graph_network), - ('sequential', _make_sequential), - ('sequential_built', _make_sequential_built), - ('sequential_graph_network', _make_sequential_graph_network), - ('sequential_input_shape', _make_sequential_input_shape), - ('subclassed', _make_subclassed), - ('subclassed_built', _make_subclassed_built), - ]) - def test_functional(self, model_fn): - """Tests serializing a model that uses a nested model to share weights.""" - if h5py is None: - self.skipTest('h5py required to run this test') - - def _make_model(): - inputs = (keras.Input(shape=(4,), name='examples'), - keras.Input(shape=(4,), name='neighbors')) - base_model = model_fn(inputs[0].shape.as_list()[-1], 2) - outputs = keras.layers.add([base_model(inputs[0]), base_model(inputs[1])]) - return keras.Model(inputs=inputs, outputs=outputs) - - with self.cached_session(): - x = (np.random.normal(size=(16, 4)).astype(np.float32), - np.random.normal(size=(16, 4)).astype(np.float32)) - model = _make_model() - predictions = model(x) - # Save and reload. - model_path = os.path.join(self.get_temp_dir(), 'model.h5') - model.save(model_path) - del model - loaded_model = keras.models.load_model( - model_path, - custom_objects={ - '_make_subclassed': _make_subclassed, - '_make_subclassed_built': _make_subclassed_built, - }, - compile=False) - self.assertAllClose(loaded_model(x), predictions, 1e-9) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/save_weights_test.py b/keras/saving/save_weights_test.py deleted file mode 100644 index ba7a2703d95d..000000000000 --- a/keras/saving/save_weights_test.py +++ /dev/null @@ -1,677 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#,============================================================================ -"""Tests for model saving in the HDF5 format.""" - -import tensorflow.compat.v2 as tf - -import os -import shutil -import uuid - -from absl.testing import parameterized -import numpy as np - -import keras -from keras.testing_infra import test_combinations -from keras.optimizers import optimizer_v1 -from keras.testing_infra import test_utils -from keras.engine import training -from keras.saving import hdf5_format - -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class TestWeightSavingAndLoading(tf.test.TestCase, parameterized.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - @test_combinations.run_with_all_weight_formats - def test_weight_loading(self): - saved_model_dir = self._save_model_dir() - save_format = test_utils.get_save_format() - with self.cached_session(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3)(a) - b = keras.layers.Dense(1)(x) - model = keras.models.Model(a, b) - - x = np.random.random((3, 2)) - ref_y = model.predict(x) - weights = model.get_weights() - model.set_weights(weights) - y = model.predict(x) - self.assertAllClose(ref_y, y) - - with self.assertRaises(ValueError): - model.set_weights(weights[1:]) - with self.assertRaises(ValueError): - model.set_weights(weights[::-1]) - - model.save_weights(saved_model_dir, save_format=save_format) - model.load_weights(saved_model_dir) - y = model.predict(x) - self.assertAllClose(ref_y, y) - - def test_weight_preprocessing(self): - input_dim = 3 - output_dim = 3 - size = 2 - cases = [ - [ - (keras.layers.Bidirectional(keras.layers.SimpleRNN(2))), - [np.random.random((2, 1)), np.random.random((2, 1))], - (None, 3, 2), - ], - [ - (keras.layers.TimeDistributed(keras.layers.Dense(1))), - [np.random.random((2, 1)), np.random.random((1,))], - (None, 3, 2), - ], - [ - (keras.layers.Conv1D(output_dim, size, use_bias=False)), - [np.random.random((output_dim, input_dim, size, 1))], - (None, 4, input_dim), - ], - [ - (keras.layers.Conv2D(output_dim, size, - use_bias=False, data_format='channels_first')), - [np.random.random((output_dim, input_dim, size, size))], - (None, input_dim, 4, 4), - ], - [ - (keras.layers.Conv2DTranspose(output_dim, size, - use_bias=False, - data_format='channels_first')), - [np.random.random((output_dim, input_dim, size, size))], - (None, input_dim, 4, 4), - ], - [ - (keras.layers.Conv2DTranspose(output_dim, size, - use_bias=False, - data_format='channels_last')), - [np.random.random((size, size, input_dim, output_dim))], - (None, 4, 4, input_dim), - ], - [ - (keras.layers.Conv3D(output_dim, size, - use_bias=False, data_format='channels_first')), - [np.random.random((output_dim, input_dim, size, size, size))], - (None, input_dim, 4, 4, 4), - ], - [ - (keras.layers.GRUV1(output_dim)), - [np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,))], - (None, 4, input_dim), - ], - [ - (keras.layers.LSTMV1(output_dim)), - [np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,))], - (None, 4, input_dim), - ], - ] - for layer, weights, input_shape in cases: - layer.build(input_shape) - _ = hdf5_format.preprocess_weights_for_loading( - layer, weights, original_keras_version='1') - - model = keras.models.Sequential([keras.layers.Dense(2, input_dim=2)]) - _ = hdf5_format.preprocess_weights_for_loading( - model, model.weights, original_keras_version='1') - - x = keras.Input((2,)) - y = keras.layers.Dense(2)(x) - model = keras.models.Model(x, y) - _ = hdf5_format.preprocess_weights_for_loading( - model, model.weights, original_keras_version='1') - - @parameterized.named_parameters( - ('gru', keras.layers.GRU, { - 'units': 2, - 'input_shape': (3, 5) - }), - ('gru_with_reset_after', keras.layers.GRU, { - 'units': 2, - 'input_shape': (3, 5), - 'reset_after': True - }), - ('lstm', keras.layers.LSTM, { - 'units': 2, - 'input_shape': (3, 5) - }), - ('cudnngru', keras.layers.CuDNNGRU, { - 'units': 2, - 'input_shape': (3, 5) - }), - ('cudnnlstm', keras.layers.CuDNNLSTM, { - 'units': 2, - 'input_shape': (3, 5) - })) - def test_preprocess_weights_for_loading_rnn_should_be_idempotent( - self, layer_class, layer_args): - with self.cached_session(): - layer = layer_class(**layer_args) - layer.build(input_shape=layer_args.get('input_shape')) - weights1 = layer.get_weights() - weights2 = hdf5_format.preprocess_weights_for_loading( - layer, weights1) - _ = [ - self.assertAllClose(x, y, rtol=1e-05) - for (x, y) in zip(weights1, weights2) - ] - - def test_sequential_weight_loading(self): - if h5py is None: - return - - h5_path = self._save_model_dir('test.h5') - - num_hidden = 5 - input_dim = 3 - batch_size = 5 - num_classes = 2 - - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.add(keras.layers.Dense(num_classes)) - - x = np.random.random((batch_size, input_dim)) - ref_y = model.predict(x) - - model.save_weights(h5_path) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.add(keras.layers.Dense(num_classes)) - model.load_weights(h5_path) - y = model.predict(x) - - self.assertAllClose(y, ref_y) - - @test_combinations.run_with_all_saved_model_formats( - exclude_formats=['tf_no_traces']) - def test_nested_model_weight_loading(self): - save_format = test_utils.get_save_format() - saved_model_dir = self._save_model_dir() - - batch_size = 5 - shape = (None, None, 3) - - with self.cached_session(): - def gen_model(): - - def seq_model(): - model = keras.models.Sequential([ - keras.layers.Conv2D(3, 1, input_shape=shape), - keras.layers.BatchNormalization()]) - return model - - x = inner_inputs = keras.layers.Input((None, None, 3)) - x = seq_model()(x) - x = seq_model()(x) - inner_model = keras.models.Model(inner_inputs, x) - - inputs = keras.layers.Input(shape) - return keras.models.Model(inputs, inner_model(inputs)) - - model = gen_model() - x = np.random.random((batch_size, 1, 1, 3)) - ref_y = model.predict(x) - - model.save_weights(saved_model_dir, save_format=save_format) - - model = gen_model() - model.load_weights(saved_model_dir) - y = model.predict(x) - - self.assertAllClose(y, ref_y) - - def test_sequential_weight_loading_group_name_with_incorrect_length(self): - if h5py is None: - return - - h5_path = self._save_model_dir('test.h5') - - num_hidden = 5 - input_dim = 3 - num_classes = 2 - with self.cached_session(): - ref_model = keras.models.Sequential() - ref_model.add(keras.layers.Dense(num_hidden, input_dim=input_dim, - name='d1')) - ref_model.add(keras.layers.Dense(num_classes, name='d2')) - ref_model.compile(loss=keras.losses.MSE, - optimizer='rmsprop', - metrics=[keras.metrics.categorical_accuracy]) - - f_ref_model = h5py.File(h5_path, 'w') - hdf5_format.save_weights_to_hdf5_group(f_ref_model, ref_model) - - f_model = h5py.File(h5_path, 'r') - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, use_bias=False, - input_dim=input_dim, name='d1')) - model.add(keras.layers.Dense(num_classes, name='d2')) - model.compile(loss=keras.losses.MSE, - optimizer='rmsprop', - metrics=[keras.metrics.categorical_accuracy]) - with self.assertRaises( - ValueError, - msg='Weight count mismatch for layer #0 (named d1). ' - 'Layer expects 1 weight(s). Received 2 saved weight(s)'): - hdf5_format.load_weights_from_hdf5_group_by_name(f_model, model) - - hdf5_format.load_weights_from_hdf5_group_by_name( - f_model, model, skip_mismatch=True) - self.assertAllClose(keras.backend.get_value(ref_model.layers[1].kernel), - keras.backend.get_value(model.layers[1].kernel)) - - def test_sequential_weight_loading_group_name_with_incorrect_shape(self): - if h5py is None: - return - - h5_path = self._save_model_dir('test.h5') - - num_hidden = 5 - input_dim = 3 - num_classes = 2 - with tf.Graph().as_default(), self.cached_session(): - ref_model = keras.models.Sequential() - ref_model.add(keras.layers.Dense(num_hidden, input_dim=input_dim, - name='d1')) - ref_model.add(keras.layers.Dense(num_classes, name='d2')) - ref_model.compile(loss=keras.losses.MSE, - optimizer=optimizer_v1.RMSprop(lr=0.0001), - metrics=[keras.metrics.categorical_accuracy]) - - f_ref_model = h5py.File(h5_path, 'w') - keras.backend.set_value(ref_model.layers[1].bias, [3.5] * num_classes) - hdf5_format.save_weights_to_hdf5_group(f_ref_model, ref_model) - - f_model = h5py.File(h5_path, 'r') - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden + 5, input_dim=input_dim, - name='d1')) - model.add(keras.layers.Dense(num_classes, name='d2')) - model.compile(loss=keras.losses.MSE, - optimizer=optimizer_v1.RMSprop(lr=0.0001), - metrics=[keras.metrics.categorical_accuracy]) - with self.assertRaises( - ValueError, - msg='Shape mismatch in layer #0 (named d1) for weight d1_1/kernel:0. ' - 'Weight expects shape (3, 10). ' - 'Received saved weight with shape (3, 5)'): - hdf5_format.load_weights_from_hdf5_group_by_name(f_model, model) - - hdf5_format.load_weights_from_hdf5_group_by_name( - f_model, model, skip_mismatch=True) - self.assertAllClose([3.5] * num_classes, - keras.backend.get_value(model.layers[1].bias)) - - @test_combinations.run_with_all_saved_model_formats( - exclude_formats=['tf_no_traces']) - @test_combinations.run_with_all_model_types - def test_load_weights_from_saved_model(self): - save_path = self._save_model_dir() - save_format = test_utils.get_save_format() - - if save_format == 'h5' and test_utils.get_model_type() == 'subclass': - # TODO(b/173646281): HDF5 format currently does not allow saving - # subclassed models. - return - - with self.cached_session(): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - data = np.random.random((1, 3)) - labels = np.random.random((1, 4)) - model.compile(loss='mse', optimizer='rmsprop') - model.fit(data, labels) - model.save(save_path, save_format=save_format) - new_model = test_utils.get_small_mlp(1, 4, input_dim=3) - if test_utils.get_model_type() == 'subclass': - # Call on test data to build the model. - new_model.predict(data) - new_model.load_weights(save_path) - self.assertAllClose(model.weights, new_model.weights) - - -class SubclassedModel(training.Model): - - def __init__(self): - super().__init__() - self.x_layer = keras.layers.Dense(3) - self.b_layer = keras.layers.Dense(1) - - def call(self, a): - return self.b_layer(self.x_layer(a)) - - -class TestWeightSavingAndLoadingTFFormat(tf.test.TestCase, parameterized.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_tensorflow_format_overwrite(self): - with self.cached_session() as session: - model = SubclassedModel() - temp_dir = self.get_temp_dir() - prefix = os.path.join(temp_dir, 'ckpt') - - x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) - executing_eagerly = tf.executing_eagerly() - model(x) # pylint: disable=not-callable - if not executing_eagerly: - session.run([v.initializer for v in model.variables]) - model.save_weights(prefix, save_format='tensorflow') - model.save_weights(prefix, save_format='tensorflow', overwrite=True) - with self.assertRaises(EOFError): - # Indirectly tests that the user is prompted - model.save_weights(prefix, save_format='tensorflow', overwrite=False) - - def test_no_default_session(self): - with tf.Graph().as_default(): - self.assertFalse(tf.compat.v1.get_default_session()) - data = np.random.random((1000, 32)).astype(np.float32) - labels = np.random.random((1000, 10)).astype(np.float32) - - model = keras.models.Sequential([ - keras.layers.Dense(10, activation='softmax'), - keras.layers.Dense(10, activation='softmax')]) - - model.compile(optimizer=tf.compat.v1.train.RMSPropOptimizer(0.001), - loss='categorical_crossentropy', - metrics=['accuracy']) - - model.fit(data, labels) - fname = os.path.join(self.get_temp_dir(), 'weights', 'ckpt') - model.save_weights(fname) - model.load_weights(fname) - - def test_no_graph_pollution(self): - with tf.compat.v1.get_default_graph().as_default(): - graph = tf.Graph() - with graph.as_default(), self.session(graph) as session: - model = SubclassedModel() - temp_dir = self.get_temp_dir() - prefix = os.path.join(temp_dir, 'ckpt') - - x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) - model(x) # pylint: disable=not-callable - session.run([v.initializer for v in model.variables]) - model.save_weights(prefix, save_format='tensorflow') - op_count = len(graph.get_operations()) - model.save_weights(prefix, save_format='tensorflow') - self.assertLen(graph.get_operations(), op_count) - - model.load_weights(prefix) - op_count = len(graph.get_operations()) - model.load_weights(prefix) - self.assertLen(graph.get_operations(), op_count) - - def _weight_loading_test_template(self, make_model_fn): - with self.cached_session(): - model = make_model_fn() - model.compile( - loss='mse', - optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), - metrics=['acc', keras.metrics.CategoricalAccuracy()]) - temp_dir = self.get_temp_dir() - prefix = os.path.join(temp_dir, 'ckpt') - train_x = np.random.random((3, 2)) - train_y = np.random.random((3,)) - x = tf.constant(train_x, dtype=tf.float32) - - model.train_on_batch(train_x, train_y) - model.save_weights(prefix, save_format='tf') - ref_y_before_train = model.predict(train_x) - model.train_on_batch(train_x, train_y) - ref_y_after_train = model.predict(train_x) - for v in model.variables: - self.evaluate( - v.assign(tf.random.normal(shape=tf.shape(v)))) - - self.addCleanup(shutil.rmtree, temp_dir) - - model.load_weights(prefix) - self.assertAllClose(ref_y_before_train, self.evaluate(model(x))) - - # Test restore-on-create if this is a subclassed Model (graph Networks - # will have already created their variables). - load_model = make_model_fn() - load_model.load_weights(prefix) - self.assertAllClose( - ref_y_before_train, - self.evaluate(load_model(x))) - load_model = make_model_fn() - load_model.load_weights(prefix) - # We need to run some of the restore ops for predict(), but not all - # variables have been created yet (optimizer slot variables). Tests - # incremental restore. - load_model.predict(train_x) - load_model.compile( - loss='mse', - optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), - metrics=['acc', keras.metrics.CategoricalAccuracy()]) - load_model.train_on_batch(train_x, train_y) - self.assertAllClose(ref_y_after_train, self.evaluate(load_model(x))) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_weight_loading_graph_model(self): - def _make_graph_model(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3)(a) - b = keras.layers.Dense(1)(x) - return keras.models.Model(a, b) - - self._weight_loading_test_template(_make_graph_model) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_weight_loading_subclassed_model(self): - self._weight_loading_test_template(SubclassedModel) - - def _new_layer_weight_loading_test_template( - self, first_model_fn, second_model_fn): - with self.cached_session() as session: - model = first_model_fn() - temp_dir = self.get_temp_dir() - prefix = os.path.join(temp_dir, 'ckpt') - - x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) - executing_eagerly = tf.executing_eagerly() - ref_y_tensor = model(x) - if not executing_eagerly: - session.run([v.initializer for v in model.variables]) - ref_y = self.evaluate(ref_y_tensor) - model.save_weights(prefix) - self.assertEqual( - prefix, - tf.train.latest_checkpoint(temp_dir)) - for v in model.variables: - self.evaluate( - v.assign(tf.random.normal(shape=tf.shape(v)))) - - self.addCleanup(shutil.rmtree, temp_dir) - - second_model = second_model_fn() - status = second_model.load_weights(prefix) - second_model(x) - status.run_restore_ops() - second_model.save_weights(prefix) - # Check that the second model's checkpoint loads into the original model - status = model.load_weights(prefix) - status.run_restore_ops(session) - y = self.evaluate(model(x)) - self.assertAllClose(ref_y, y) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_weight_loading_graph_model_added_layer(self): - def _save_graph_model(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3, name='first')(a) - b = keras.layers.Dense(1, name='second')(x) - return keras.models.Model(a, b) - def _restore_graph_model(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3, name='first')(a) - y = keras.layers.Dense(1, name='second')(x) - b = keras.layers.Dense(3, name='secondjr')(y) - return keras.models.Model(a, b) - - self._new_layer_weight_loading_test_template( - _save_graph_model, _restore_graph_model) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_weight_loading_graph_model_added_no_weight_layer(self): - def _save_graph_model(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3, name='first')(a) - b = keras.layers.Dense(1, name='second')(x) - return keras.models.Model(a, b) - def _restore_graph_model(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3, name='first')(a) - b = keras.layers.Dense(1, name='second')(x) - y = keras.layers.Dropout(rate=0.1)(b) - return keras.models.Model(a, y) - - self._new_layer_weight_loading_test_template( - _save_graph_model, _restore_graph_model) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_weight_loading_subclassed_model_added_layer(self): - - class SubclassedModelRestore(training.Model): - - def __init__(self): - super().__init__() - self.x_layer = keras.layers.Dense(3) - self.y_layer = keras.layers.Dense(3) - self.b_layer = keras.layers.Dense(1) - - def call(self, a): - return self.b_layer(self.y_layer(self.x_layer(a))) - - self._new_layer_weight_loading_test_template( - SubclassedModel, SubclassedModelRestore) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_incompatible_checkpoint(self): - save_path = tf.train.Checkpoint().save( - os.path.join(self.get_temp_dir(), 'ckpt')) - m = DummySubclassModel() - with self.assertRaisesRegex(AssertionError, 'Nothing to load'): - m.load_weights(save_path) - m.dense = keras.layers.Dense(2) - m.dense(tf.constant([[1.]])) - with self.assertRaisesRegex(AssertionError, - 'Nothing except the root object matched'): - m.load_weights(save_path) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_directory_passed(self): - with self.cached_session(): - m = DummySubclassModel() - v = m.add_weight(name='v', shape=[]) - self.evaluate(v.assign(42.)) - prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'ckpt/') - m.save_weights(prefix) - self.evaluate(v.assign(2.)) - m.load_weights(prefix) - self.assertEqual(42., self.evaluate(v)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_relative_path(self): - with self.cached_session(): - m = DummySubclassModel() - v = m.add_weight(name='v', shape=[]) - os.chdir(self.get_temp_dir()) - - prefix = 'ackpt' - self.evaluate(v.assign(42.)) - m.save_weights(prefix) - self.assertTrue(tf.io.gfile.exists('ackpt.index')) - self.evaluate(v.assign(1.)) - m.load_weights(prefix) - self.assertEqual(42., self.evaluate(v)) - - prefix = 'subdir/ackpt' - self.evaluate(v.assign(43.)) - m.save_weights(prefix) - self.assertTrue(tf.io.gfile.exists('subdir/ackpt.index')) - self.evaluate(v.assign(2.)) - m.load_weights(prefix) - self.assertEqual(43., self.evaluate(v)) - - prefix = 'ackpt/' - self.evaluate(v.assign(44.)) - m.save_weights(prefix) - self.assertTrue(tf.io.gfile.exists('ackpt/.index')) - self.evaluate(v.assign(3.)) - m.load_weights(prefix) - self.assertEqual(44., self.evaluate(v)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_nonexistent_prefix_directory(self): - with self.cached_session(): - m = DummySubclassModel() - v = m.add_weight(name='v', shape=[]) - self.evaluate(v.assign(42.)) - prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'bckpt') - m.save_weights(prefix) - self.evaluate(v.assign(2.)) - m.load_weights(prefix) - self.assertEqual(42., self.evaluate(v)) - - -class DummySubclassModel(training.Model): - pass - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/saved_model/base_serialization.py b/keras/saving/saved_model/base_serialization.py deleted file mode 100644 index 97b7c67ae8c1..000000000000 --- a/keras/saving/saved_model/base_serialization.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Helper classes that list&validate all attributes to serialize to SavedModel.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc - -from keras.saving.saved_model import json_utils -from keras.saving.saved_model import utils - - -class SavedModelSaver(object, metaclass=abc.ABCMeta): - """Saver defining the methods and properties used to serialize Keras objects. - """ - - def __init__(self, obj): - self.obj = obj - - @abc.abstractproperty - def object_identifier(self): - """String stored in object identifier field in the SavedModel proto. - - Returns: - A string with the object identifier, which is used at load time. - """ - raise NotImplementedError - - @property - def tracking_metadata(self): - """String stored in metadata field in the SavedModel proto. - - Returns: - A serialized JSON storing information necessary for recreating this layer. - """ - # TODO(kathywu): check that serialized JSON can be loaded (e.g., if an - # object is in the python property) - return json_utils.Encoder().encode(self.python_properties) - - def trackable_children(self, serialization_cache): - """Lists all Trackable children connected to this object.""" - if not utils.should_save_traces(): - return {} - - children = self.objects_to_serialize(serialization_cache) - children.update(self.functions_to_serialize(serialization_cache)) - return children - - @abc.abstractproperty - def python_properties(self): - """Returns dictionary of python properties to save in the metadata. - - This dictionary must be serializable and deserializable to/from JSON. - - When loading, the items in this dict are used to initialize the object and - define attributes in the revived object. - """ - raise NotImplementedError - - @abc.abstractmethod - def objects_to_serialize(self, serialization_cache): - """Returns dictionary of extra checkpointable objects to serialize. - - See `functions_to_serialize` for an explanation of this function's - effects. - - Args: - serialization_cache: Dictionary passed to all objects in the same object - graph during serialization. - - Returns: - A dictionary mapping attribute names to checkpointable objects. - """ - raise NotImplementedError - - @abc.abstractmethod - def functions_to_serialize(self, serialization_cache): - """Returns extra functions to include when serializing a Keras object. - - Normally, when calling exporting an object to SavedModel, only the - functions and objects defined by the user are saved. For example: - - ``` - obj = tf.Module() - obj.v = tf.Variable(1.) - - @tf.function - def foo(...): ... - - obj.foo = foo - - w = tf.Variable(1.) - - tf.saved_model.save(obj, 'path/to/saved/model') - loaded = tf.saved_model.load('path/to/saved/model') - - loaded.v # Variable with the same value as obj.v - loaded.foo # Equivalent to obj.foo - loaded.w # AttributeError - ``` - - Assigning trackable objects to attributes creates a graph, which is used for - both checkpointing and SavedModel serialization. - - When the graph generated from attribute tracking is insufficient, extra - objects and functions may be added at serialization time. For example, - most models do not have their call function wrapped with a @tf.function - decorator. This results in `model.call` not being saved. Since Keras objects - should be revivable from the SavedModel format, the call function is added - as an extra function to serialize. - - This function and `objects_to_serialize` is called multiple times when - exporting to SavedModel. Please use the cache to avoid generating new - functions and objects. A fresh cache is created for each SavedModel export. - - Args: - serialization_cache: Dictionary passed to all objects in the same object - graph during serialization. - - Returns: - A dictionary mapping attribute names to `Function` or - `ConcreteFunction`. - """ - raise NotImplementedError diff --git a/keras/saving/saved_model/create_test_saved_model.py b/keras/saving/saved_model/create_test_saved_model.py deleted file mode 100644 index 832da70ac1b1..000000000000 --- a/keras/saving/saved_model/create_test_saved_model.py +++ /dev/null @@ -1,36 +0,0 @@ -"""A binary that creates a serialized SavedModel from a keras model. - -This is used in tests to ensure that model serialization is deterministic across -different processes. -""" - -from absl import app -from absl import flags -from keras import regularizers -from keras.testing_infra import test_utils - -import tensorflow.compat.v2 as tf - -flags.DEFINE_string('output_path', '', 'The path to write the SavedModel at.') - -FLAGS = flags.FLAGS - - -def main(_) -> None: - with test_utils.model_type_scope('functional'): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.layers[-1].activity_regularizer = regularizers.get('l2') - model.activity_regularizer = regularizers.get('l2') - model.compile( - loss='mse', - optimizer='rmsprop') - def callable_loss(): - return tf.reduce_sum(model.weights[0]) - model.add_loss(callable_loss) - - print(f'_____Writing saved model to: {FLAGS.output_path}') - model.save(FLAGS.output_path) - - -if __name__ == '__main__': - app.run(main) diff --git a/keras/saving/saved_model/determinism_test.py b/keras/saving/saved_model/determinism_test.py deleted file mode 100755 index 9f9ee2e499a7..000000000000 --- a/keras/saving/saved_model/determinism_test.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Saves the same model twice and ensures that they are serialized the same.""" - -import subprocess - -from absl import flags -import tensorflow.compat.v2 as tf - - -from tensorflow.core.protobuf import saved_model_pb2 - - -FLAGS = flags.FLAGS - - -class DeterminismTest(tf.test.TestCase): - - def test_saving_is_deterministic(self): - create_saved_model = f'{FLAGS.test_srcdir}/create_test_saved_model.par' - saved_model_a_path = f'{FLAGS.test_tmpdir}/a' - saved_model_b_path = f'{FLAGS.test_tmpdir}/b' - - save_a = subprocess.Popen( - [create_saved_model, '--output_path', saved_model_a_path]) - save_b = subprocess.Popen( - [create_saved_model, '--output_path', saved_model_b_path]) - save_a.wait() - save_b.wait() - saved_model_a = saved_model_pb2.SavedModel() - with tf.io.gfile.GFile(f'{saved_model_a_path}/saved_model.pb') as f: - saved_model_a.MergeFromString(f.read()) - saved_model_b = saved_model_pb2.SavedModel() - with tf.io.gfile.GFile(f'{saved_model_b_path}/saved_model.pb') as f: - saved_model_b.MergeFromString(f.read()) - - self.assertProtoEquals(saved_model_a, saved_model_b) diff --git a/keras/saving/saved_model/json_utils.py b/keras/saving/saved_model/json_utils.py deleted file mode 100644 index 7b81c2da26ce..000000000000 --- a/keras/saving/saved_model/json_utils.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils for creating and loading the Layer metadata for SavedModel. - -These are required to retain the original format of the build input shape, since -layers and models may have different build behaviors depending on if the shape -is a list, tuple, or TensorShape. For example, Network.build() will create -separate inputs if the given input_shape is a list, and will create a single -input if the given shape is a tuple. -""" - -import tensorflow.compat.v2 as tf - -import collections -import functools -import enum -import json -import numpy as np -import wrapt - -from keras.utils import generic_utils - - -from tensorflow.python.framework import type_spec - - -_EXTENSION_TYPE_SPEC = '_EXTENSION_TYPE_SPEC' - - -class Encoder(json.JSONEncoder): - """JSON encoder and decoder that handles TensorShapes and tuples.""" - - def default(self, obj): # pylint: disable=method-hidden - """Encodes objects for types that aren't handled by the default encoder.""" - if isinstance(obj, tf.TensorShape): - items = obj.as_list() if obj.rank is not None else None - return {'class_name': 'TensorShape', 'items': items} - return get_json_type(obj) - - def encode(self, obj): - return super().encode(_encode_tuple(obj)) - - -def _encode_tuple(x): - if isinstance(x, tuple): - return {'class_name': '__tuple__', - 'items': tuple(_encode_tuple(i) for i in x)} - elif isinstance(x, list): - return [_encode_tuple(i) for i in x] - elif isinstance(x, dict): - return {key: _encode_tuple(value) for key, value in x.items()} - else: - return x - - -def decode(json_string): - return json.loads(json_string, object_hook=_decode_helper) - - -def decode_and_deserialize(json_string, module_objects=None, - custom_objects=None): - """Decodes the JSON and deserializes any Keras objects found in the dict.""" - return json.loads(json_string, - object_hook=functools.partial( - _decode_helper, - deserialize=True, - module_objects=module_objects, - custom_objects=custom_objects)) - - -def _decode_helper(obj, deserialize=False, module_objects=None, - custom_objects=None): - """A decoding helper that is TF-object aware. - - Args: - obj: A decoded dictionary that may represent an object. - deserialize: Boolean, defaults to False. When True, deserializes any Keras - objects found in `obj`. - module_objects: A dictionary of built-in objects to look the name up in. - Generally, `module_objects` is provided by midlevel library implementers. - custom_objects: A dictionary of custom objects to look the name up in. - Generally, `custom_objects` is provided by the end user. - - Returns: - The decoded object. - """ - if isinstance(obj, dict) and 'class_name' in obj: - if obj['class_name'] == 'TensorShape': - return tf.TensorShape(obj['items']) - elif obj['class_name'] == 'TypeSpec': - return type_spec.lookup(obj['type_spec'])._deserialize( # pylint: disable=protected-access - _decode_helper(obj['serialized'])) - elif obj['class_name'] == 'CompositeTensor': - spec = obj['spec'] - tensors = [] - for dtype, tensor in obj['tensors']: - tensors.append(tf.constant(tensor, dtype=tf.dtypes.as_dtype(dtype))) - return tf.nest.pack_sequence_as( - _decode_helper(spec), - tensors, - expand_composites=True) - elif obj['class_name'] == '__tuple__': - return tuple(_decode_helper(i) for i in obj['items']) - elif obj['class_name'] == '__ellipsis__': - return Ellipsis - elif deserialize and '__passive_serialization__' in obj: - # __passive_serialization__ is added by the JSON encoder when encoding - # an object that has a `get_config()` method. - try: - return generic_utils.deserialize_keras_object( - obj, - module_objects=module_objects, - custom_objects=custom_objects) - except ValueError: - pass - return obj - - -def get_json_type(obj): - """Serializes any object to a JSON-serializable structure. - - Args: - obj: the object to serialize - - Returns: - JSON-serializable structure representing `obj`. - - Raises: - TypeError: if `obj` cannot be serialized. - """ - # if obj is a serializable Keras class instance - # e.g. optimizer, layer - if hasattr(obj, 'get_config'): - serialized = generic_utils.serialize_keras_object(obj) - serialized['__passive_serialization__'] = True - return serialized - - # if obj is any numpy type - if type(obj).__module__ == np.__name__: - if isinstance(obj, np.ndarray): - return obj.tolist() - else: - return obj.item() - - # misc functions (e.g. loss function) - if callable(obj): - return obj.__name__ - - # if obj is a python 'type' - if type(obj).__name__ == type.__name__: - return obj.__name__ - - if isinstance(obj, tf.compat.v1.Dimension): - return obj.value - - if isinstance(obj, tf.TensorShape): - return obj.as_list() - - if isinstance(obj, tf.DType): - return obj.name - - if isinstance(obj, collections.abc.Mapping): - return dict(obj) - - if obj is Ellipsis: - return {'class_name': '__ellipsis__'} - - if isinstance(obj, wrapt.ObjectProxy): - return obj.__wrapped__ - - if isinstance(obj, tf.TypeSpec): - try: - type_spec_name = type_spec.get_name(type(obj)) - return {'class_name': 'TypeSpec', 'type_spec': type_spec_name, - 'serialized': obj._serialize()} # pylint: disable=protected-access - except ValueError: - raise ValueError( - f'Unable to serialize {obj} to JSON, because the TypeSpec ' - f'class {type(obj)} has not been registered.') - if isinstance(obj, tf.__internal__.CompositeTensor): - spec = tf.type_spec_from_value(obj) - tensors = [] - for tensor in tf.nest.flatten(obj, expand_composites=True): - tensors.append((tensor.dtype.name, tensor.numpy().tolist())) - return {'class_name': 'CompositeTensor', - 'spec': get_json_type(spec), - 'tensors': tensors} - - if isinstance(obj, enum.Enum): - return obj.value - - raise TypeError( - f'Unable to serialize {obj} to JSON. Unrecognized type {type(obj)}.') diff --git a/keras/saving/saved_model/json_utils_test.py b/keras/saving/saved_model/json_utils_test.py deleted file mode 100644 index 4f1e01447b9a..000000000000 --- a/keras/saving/saved_model/json_utils_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=protected-access -"""Tests the JSON encoder and decoder.""" - -import tensorflow.compat.v2 as tf - -import enum -from keras.saving.saved_model import json_utils -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils - - -class JsonUtilsTest(test_combinations.TestCase): - - def test_encode_decode_tensor_shape(self): - metadata = { - 'key1': tf.TensorShape(None), - 'key2': [tf.TensorShape([None]), - tf.TensorShape([3, None, 5])]} - string = json_utils.Encoder().encode(metadata) - loaded = json_utils.decode(string) - - self.assertEqual(set(loaded.keys()), {'key1', 'key2'}) - self.assertAllEqual(loaded['key1'].rank, None) - self.assertAllEqual(loaded['key2'][0].as_list(), [None]) - self.assertAllEqual(loaded['key2'][1].as_list(), [3, None, 5]) - - def test_encode_decode_tuple(self): - metadata = { - 'key1': (3, 5), - 'key2': [(1, (3, 4)), (1,)]} - string = json_utils.Encoder().encode(metadata) - loaded = json_utils.decode(string) - - self.assertEqual(set(loaded.keys()), {'key1', 'key2'}) - self.assertAllEqual(loaded['key1'], (3, 5)) - self.assertAllEqual(loaded['key2'], [(1, (3, 4)), (1,)]) - - def test_encode_decode_type_spec(self): - spec = tf.TensorSpec((1, 5), tf.float32) - string = json_utils.Encoder().encode(spec) - loaded = json_utils.decode(string) - self.assertEqual(spec, loaded) - - invalid_type_spec = {'class_name': 'TypeSpec', 'type_spec': 'Invalid Type', - 'serialized': None} - string = json_utils.Encoder().encode(invalid_type_spec) - with self.assertRaisesRegexp(ValueError, 'No TypeSpec has been registered'): - loaded = json_utils.decode(string) - - def test_encode_decode_enum(self): - class Enum(enum.Enum): - CLASS_A = 'a' - CLASS_B = 'b' - config = {'key': Enum.CLASS_A, 'key2': Enum.CLASS_B} - string = json_utils.Encoder().encode(config) - loaded = json_utils.decode(string) - self.assertAllEqual({'key': 'a', 'key2': 'b'}, loaded) - - @test_utils.run_v2_only - def test_encode_decode_ragged_tensor(self): - x = tf.ragged.constant([[1., 2.], [3.]]) - string = json_utils.Encoder().encode(x) - loaded = json_utils.decode(string) - self.assertAllEqual(loaded, x) - - @test_utils.run_v2_only - def test_encode_decode_extension_type_tensor(self): - class MaskedTensor(tf.experimental.ExtensionType): - __name__ = 'MaskedTensor' - values: tf.Tensor - mask: tf.Tensor - x = MaskedTensor(values=[[1, 2, 3], [4, 5, 6]], - mask=[[True, True, False], [True, False, True]]) - string = json_utils.Encoder().encode(x) - loaded = json_utils.decode(string) - self.assertAllEqual(loaded, x) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/saved_model/layer_serialization.py b/keras/saving/saved_model/layer_serialization.py deleted file mode 100644 index a4945c0b012e..000000000000 --- a/keras/saving/saved_model/layer_serialization.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Classes and functions implementing Layer SavedModel serialization.""" - -from keras.mixed_precision import policy -from keras.saving.saved_model import base_serialization -from keras.saving.saved_model import constants -from keras.saving.saved_model import save_impl -from keras.saving.saved_model import serialized_attributes -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - - -class LayerSavedModelSaver(base_serialization.SavedModelSaver): - """Implements Layer SavedModel serialization.""" - - @property - def object_identifier(self): - return constants.LAYER_IDENTIFIER - - @property - def python_properties(self): - # TODO(kathywu): Add python property validator - return self._python_properties_internal() - - def _python_properties_internal(self): - """Returns dictionary of all python properties.""" - # TODO(kathywu): Add support for metrics serialization. - # TODO(kathywu): Synchronize with the keras spec (go/keras-json-spec) once - # the python config serialization has caught up. - metadata = dict( - name=self.obj.name, - trainable=self.obj.trainable, - expects_training_arg=self.obj._expects_training_arg, # pylint: disable=protected-access - dtype=policy.serialize(self.obj._dtype_policy), # pylint: disable=protected-access - batch_input_shape=getattr(self.obj, '_batch_input_shape', None), - stateful=self.obj.stateful, - must_restore_from_config=self.obj._must_restore_from_config, # pylint: disable=protected-access - ) - - metadata.update(get_serialized(self.obj)) - if self.obj.input_spec is not None: - # Layer's input_spec has already been type-checked in the property setter. - metadata['input_spec'] = tf.nest.map_structure( - lambda x: generic_utils.serialize_keras_object(x) if x else None, - self.obj.input_spec) - if (self.obj.activity_regularizer is not None and - hasattr(self.obj.activity_regularizer, 'get_config')): - metadata['activity_regularizer'] = generic_utils.serialize_keras_object( - self.obj.activity_regularizer) - if self.obj._build_input_shape is not None: # pylint: disable=protected-access - metadata['build_input_shape'] = self.obj._build_input_shape # pylint: disable=protected-access - return metadata - - def objects_to_serialize(self, serialization_cache): - return (self._get_serialized_attributes( - serialization_cache).objects_to_serialize) - - def functions_to_serialize(self, serialization_cache): - return (self._get_serialized_attributes( - serialization_cache).functions_to_serialize) - - def _get_serialized_attributes(self, serialization_cache): - """Generates or retrieves serialized attributes from cache.""" - keras_cache = serialization_cache.setdefault(constants.KERAS_CACHE_KEY, {}) - if self.obj in keras_cache: - return keras_cache[self.obj] - - serialized_attr = keras_cache[self.obj] = ( - serialized_attributes.SerializedAttributes.new(self.obj)) - - if (save_impl.should_skip_serialization(self.obj) or - self.obj._must_restore_from_config): # pylint: disable=protected-access - return serialized_attr - - object_dict, function_dict = self._get_serialized_attributes_internal( - serialization_cache) - - serialized_attr.set_and_validate_objects(object_dict) - serialized_attr.set_and_validate_functions(function_dict) - return serialized_attr - - def _get_serialized_attributes_internal(self, serialization_cache): - """Returns dictionary of serialized attributes.""" - objects = save_impl.wrap_layer_objects(self.obj, serialization_cache) - functions = save_impl.wrap_layer_functions(self.obj, serialization_cache) - # Attribute validator requires that the default save signature is added to - # function dict, even if the value is None. - functions['_default_save_signature'] = None - return objects, functions - - -# TODO(kathywu): Move serialization utils (and related utils from -# generic_utils.py) to a separate file. -def get_serialized(obj): - with generic_utils.skip_failed_serialization(): - # Store the config dictionary, which may be used when reviving the object. - # When loading, the program will attempt to revive the object from config, - # and if that fails, the object will be revived from the SavedModel. - return generic_utils.serialize_keras_object(obj) - - -class InputLayerSavedModelSaver(base_serialization.SavedModelSaver): - """InputLayer serialization.""" - - @property - def object_identifier(self): - return constants.INPUT_LAYER_IDENTIFIER - - @property - def python_properties(self): - - return dict( - class_name=type(self.obj).__name__, - name=self.obj.name, - dtype=self.obj.dtype, - sparse=self.obj.sparse, - ragged=self.obj.ragged, - batch_input_shape=self.obj._batch_input_shape, # pylint: disable=protected-access - config=self.obj.get_config()) - - def objects_to_serialize(self, serialization_cache): - return {} - - def functions_to_serialize(self, serialization_cache): - return {} - - -class RNNSavedModelSaver(LayerSavedModelSaver): - """RNN layer serialization.""" - - @property - def object_identifier(self): - return constants.RNN_LAYER_IDENTIFIER - - def _get_serialized_attributes_internal(self, serialization_cache): - objects, functions = ( - super()._get_serialized_attributes_internal( - serialization_cache)) - states = tf.__internal__.tracking.wrap(self.obj.states) - # SaveModel require all the objects to be Trackable when saving. - # If the states is still a tuple after wrap_or_unwrap, it means it doesn't - # contain any trackable item within it, eg empty tuple or (None, None) for - # stateless ConvLSTM2D. We convert them to list so that wrap_or_unwrap can - # make it a Trackable again for saving. When loaded, ConvLSTM2D is - # able to handle the tuple/list conversion. - if isinstance(states, tuple): - states = tf.__internal__.tracking.wrap(list(states)) - objects['states'] = states - return objects, functions - - -class VocabularySavedModelSaver(LayerSavedModelSaver): - """Handles vocabulary layer serialization. - - This class is needed for StringLookup, IntegerLookup, and TextVectorization, - which all have a vocabulary as part of the config. Currently, we keep this - vocab as part of the config until saving, when we need to clear it to avoid - initializing a StaticHashTable twice (once when restoring the config and once - when restoring restoring module resources). After clearing the vocab, we - persist a property to the layer indicating it was constructed with a vocab. - """ - - @property - def python_properties(self): - # TODO(kathywu): Add python property validator - metadata = self._python_properties_internal() - # Clear the vocabulary from the config during saving. - metadata['config']['vocabulary'] = None - # Persist a property to track that a vocabulary was passed on construction. - metadata['config']['has_input_vocabulary'] = self.obj._has_input_vocabulary # pylint: disable=protected-access - return metadata diff --git a/keras/saving/saved_model/load.py b/keras/saving/saved_model/load.py deleted file mode 100644 index a36b5c3305fb..000000000000 --- a/keras/saving/saved_model/load.py +++ /dev/null @@ -1,1210 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras SavedModel deserialization.""" - -import re -import types -import warnings - -from keras import backend -from keras import regularizers -from keras.engine import input_spec -from keras.optimizers.optimizer_v2 import optimizer_v2 -from keras.protobuf import saved_metadata_pb2 -from keras.protobuf import versions_pb2 -from keras.saving import saving_utils -from keras.saving.saved_model import constants -from keras.saving.saved_model import json_utils -from keras.saving.saved_model import utils -from keras.saving.saved_model.serialized_attributes import CommonEndpoints -from keras.utils import layer_utils -from keras.utils import generic_utils -from keras.utils import metrics_utils -from keras.utils import tf_inspect -from keras.utils.generic_utils import LazyLoader -import tensorflow.compat.v1.logging as logging -import tensorflow.compat.v2 as tf - -from google.protobuf import message - -# To avoid circular dependencies between keras/engine and keras/saving, -# code in keras/saving must delay imports. - -# TODO(b/134426265): Switch back to single-quotes to match the rest of the file -# once the issue with copybara is fixed. -# pylint:disable=g-inconsistent-quotes -models_lib = LazyLoader('models_lib', globals(), 'keras.models') -base_layer = LazyLoader('base_layer', globals(), 'keras.engine.base_layer') -layers_module = LazyLoader('layers_module', globals(), 'keras.layers') -input_layer = LazyLoader('input_layer', globals(), 'keras.engine.input_layer') -functional_lib = LazyLoader('functional_lib', globals(), - 'keras.engine.functional') -training_lib = LazyLoader('training_lib', globals(), 'keras.engine.training') -training_lib_v1 = LazyLoader('training_lib_v1', globals(), - 'keras.engine.training_v1') -metrics = LazyLoader('metrics', globals(), 'keras.metrics') -base_rnn = LazyLoader('base_rnn', globals(), 'keras.layers.rnn.base_rnn') -# pylint:enable=g-inconsistent-quotes - -PUBLIC_ATTRIBUTES = CommonEndpoints.all_functions.union( - CommonEndpoints.all_checkpointable_objects) -PUBLIC_ATTRIBUTES.add(constants.KERAS_ATTR) - - -def load(path, compile=True, options=None): # pylint: disable=redefined-builtin - """Loads Keras objects from a SavedModel. - - Any Keras layer or model saved to the SavedModel will be loaded back - as Keras objects. Other objects are loaded as regular trackable objects (same - as `tf.saved_model.load`). - - Currently, Keras saving/loading only retains the Keras object's weights, - losses, and call function. - - The loaded model can be re-compiled, but the original optimizer, compiled loss - functions, and metrics are not retained. This is temporary, and `model.save` - will soon be able to serialize compiled models. - - Args: - path: Path to SavedModel. - compile: If true, compile the model after loading it. - options: Optional `tf.saved_model.LoadOptions` object that specifies options - for loading from SavedModel. - - Returns: - Object loaded from SavedModel. - """ - # TODO(kathywu): Add saving/loading of optimizer, compiled losses and metrics. - # TODO(kathywu): Add code to load from objects that contain all endpoints - - # Look for metadata file or parse the SavedModel - metadata = saved_metadata_pb2.SavedMetadata() - meta_graph_def = tf.__internal__.saved_model.parse_saved_model( - path).meta_graphs[0] - object_graph_def = meta_graph_def.object_graph_def - path_to_metadata_pb = tf.io.gfile.join(path, constants.SAVED_METADATA_PATH) - if tf.compat.v1.gfile.Exists(path_to_metadata_pb): - try: - with tf.io.gfile.GFile(path_to_metadata_pb, 'rb') as f: - file_content = f.read() - metadata.ParseFromString(file_content) - except message.DecodeError as e: - raise IOError( - f'Cannot parse keras metadata at path {path_to_metadata_pb}: ' - f'Received error: {e}') - else: - logging.warning('SavedModel saved prior to TF 2.5 detected when loading ' - 'Keras model. Please ensure that you are saving the model ' - 'with model.save() or tf.keras.models.save_model(), *NOT* ' - 'tf.saved_model.save(). To confirm, there should be a file ' - 'named "keras_metadata.pb" in the SavedModel directory.') - _read_legacy_metadata(object_graph_def, metadata, path) - - if not metadata.nodes: - # When there are no Keras objects, return the results from the core loader - return tf.saved_model.load(path, options=options) - - metadata = _update_to_current_version(metadata) - # Recreate layers and metrics using the info stored in the metadata. - keras_loader = KerasObjectLoader(metadata, object_graph_def) - keras_loader.load_layers(compile=compile) - - # Generate a dictionary of all loaded nodes. - nodes_to_load = {'root': None} - for node_id, loaded_node in keras_loader.loaded_nodes.items(): - nodes_to_load[keras_loader.get_path(node_id)] = loaded_node - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', message='Trying to load ShardedVariables') - loaded = tf.__internal__.saved_model.load_partial( - path, nodes_to_load, options=options) - - # Finalize the loaded layers and remove the extra tracked dependencies. - keras_loader.finalize_objects() - keras_loader.del_tracking() - - model = loaded['root'] - - # pylint: disable=protected-access - if isinstance(model, training_lib.Model) and compile: - # TODO(kathywu): Use compiled objects from SavedModel, instead of - # creating new objects from the training config. - training_config = model._serialized_attributes['metadata'].get( - 'training_config', None) - if training_config is not None: - model.compile( - **saving_utils.compile_args_from_training_config(training_config), - from_serialized=True) - saving_utils.try_build_compiled_arguments(model) - if isinstance(model.optimizer, optimizer_v2.OptimizerV2): - if model.optimizer.get_slot_names(): - logging.warning('Your optimizer uses slots. ' - 'Slots cannot be restored from saved_model, ' - 'as a result, your model is starting with ' - 'a new initialized optimizer.') - else: - logging.warning('No training configuration found in save file, so the ' - 'model was *not* compiled. Compile it manually.') - # pylint: enable=protected-access - - # Force variables and resources to initialize. - if not tf.executing_eagerly(): - sess = backend.get_session() # Variables are initialized by this call. - sess.run( - tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS)) - - return model - - -def _update_to_current_version(metadata): - """Applies version updates to the metadata proto for backwards compat.""" - for node in metadata.nodes: - if node.version.producer == 1 and node.identifier in [ - constants.MODEL_IDENTIFIER, constants.SEQUENTIAL_IDENTIFIER, - constants.NETWORK_IDENTIFIER - ]: - node_metadata = json_utils.decode(node.metadata) - save_spec = node_metadata.get('save_spec') - - if save_spec is not None: - node_metadata['full_save_spec'] = ([save_spec], {}) - node.metadata = json_utils.Encoder().encode(node_metadata) - return metadata - - -def _read_legacy_metadata(object_graph_def, metadata, path): - """Builds a KerasMetadata proto from the SavedModel ObjectGraphDef.""" - # Older SavedModels store the metadata directly in the proto instead of the - # separate pb file. - node_paths = _generate_object_paths(object_graph_def) - for node_id, proto in enumerate(object_graph_def.nodes): - if (proto.WhichOneof('kind') == 'user_object' and - proto.user_object.identifier in constants.KERAS_OBJECT_IDENTIFIERS): - if not proto.user_object.metadata: - raise ValueError( - f'Unable to create a Keras model from SavedModel at {path}. ' - 'This SavedModel was exported with `tf.saved_model.save`, and ' - 'lacks the Keras metadata file. Please save your Keras model by ' - 'calling `model.save`or `tf.keras.models.save_model`. Note that ' - 'you can still load this SavedModel with `tf.saved_model.load`.') - metadata.nodes.add( - node_id=node_id, - node_path=node_paths[node_id], - version=versions_pb2.VersionDef( - producer=1, min_consumer=1, bad_consumers=[]), - identifier=proto.user_object.identifier, - metadata=proto.user_object.metadata) - - -def _generate_object_paths(object_graph_def): - """Traverses through an ObjectGraphDef and builds a map of all node paths.""" - paths = {0: 'root'} - nodes_to_visit = [0] - - while nodes_to_visit: - current_node = nodes_to_visit.pop() - current_path = paths[current_node] - for reference in object_graph_def.nodes[current_node].children: - if reference.node_id in paths: - continue - paths[reference.node_id] = '{}.{}'.format(current_path, - reference.local_name) - nodes_to_visit.append(reference.node_id) - - return paths - - -def _is_graph_network(layer): - """Determines whether the layer is a graph network.""" - # pylint: disable=protected-access - if isinstance(layer, RevivedNetwork): - return False - elif isinstance(layer, functional_lib.Functional): - return layer._is_graph_network or isinstance(layer, models_lib.Sequential) - return False - - -class KerasObjectLoader: - """Loader that recreates Keras objects (e.g. - - layers, models). - - Layers and models are revived from either the config or SavedModel following - these rules: - 1. If object is a graph network (i.e. Sequential or Functional) then it will - be initialized using the structure from the config only after the children - layers have been created. Graph networks must be initialized with inputs - and outputs, so all child layers must be created beforehand. - 2. If object's config exists and the class can be found, then revive from - config. - 3. Object may have already been created if its parent was revived from config. - In this case, do nothing. - 4. If nothing of the above applies, compose the various artifacts from the - SavedModel to create a subclassed layer or model. At this time, custom - metrics are not supported. - - """ - - def __init__(self, metadata, object_graph_def): - self._metadata = {x.node_id: x for x in metadata.nodes} - self._proto = object_graph_def - - self._node_paths = { - node_data.node_id: node_data.node_path for node_data in metadata.nodes - } - self.loaded_nodes = {} # Maps node path -> loaded node - - # Store all node ids that have already been traversed when tracking nodes - # that were recreated from the config. - self._traversed_nodes_from_config = set() - - # Maps model id -> (blank model obj, list of child layer or their node ids) - # This tracks all layers in functional and sequential models. These models - # are only reconstructed after all of their child layers have been created. - self.model_layer_dependencies = {} - self._models_to_reconstruct = [] - - def del_tracking(self): - """Removes tracked references that are only used when loading the model.""" - # Now that the node object has been fully loaded, and the checkpoint has - # been restored, the object no longer needs to track objects added from - # SerializedAttributes. (Note that saving a training checkpoint still - # functions correctly, because layers and variables are tracked separately - # by the Layer object.) - # TODO(kathywu): Instead of outright deleting these nodes (which would - # make restoring from a different checkpoint tricky), mark them as extra - # dependencies that are OK to overwrite. - for node in self.loaded_nodes.values(): - node = node[0] - if not isinstance(node, base_layer.Layer): - # Loaded nodes can contain other trackable objects created when - # loading layers from the config, such as variables. - continue - for name in PUBLIC_ATTRIBUTES: - node._delete_tracking(name) # pylint: disable=protected-access - - if isinstance(node, functional_lib.Functional): - # Delete the temporary layer dependencies, which were used to restore - # the checkpointed values. When the model is live, the user can delete - # or add layers to the model at any time, so these layer dependencies - # may be obsolete. - dependencies = list(node._self_unconditional_dependency_names) # pylint: disable=protected-access - for name in dependencies: - if re.match(r'^layer(_with_weights)?-[\d+]', name) is not None: - node._delete_tracking(name) # pylint: disable=protected-access - - def _add_children_recreated_from_config(self, obj, proto, node_id): - """Recursively records objects recreated from config.""" - # pylint: disable=protected-access - if node_id in self._traversed_nodes_from_config: - return - - parent_path = self._node_paths[node_id] - self._traversed_nodes_from_config.add(node_id) - obj._maybe_initialize_trackable() - if isinstance(obj, base_layer.Layer) and not obj.built: - metadata = json_utils.decode(self._metadata[node_id].metadata) - self._try_build_layer(obj, node_id, metadata.get('build_input_shape')) - - # Create list of all possible children - children = [] - # Look for direct children - for reference in proto.children: - obj_child = obj._lookup_dependency(reference.local_name) - children.append((obj_child, reference.node_id, reference.local_name)) - - # Add metrics that may have been added to the layer._metrics list. - # This is stored in the SavedModel as layer.keras_api.layer_metrics in - # SavedModels created after Tf 2.2. - metric_list_node_id = self._search_for_child_node( - node_id, [constants.KERAS_ATTR, 'layer_metrics']) - if metric_list_node_id is not None and hasattr(obj, '_metrics'): - obj_metrics = {m.name: m for m in obj._metrics} - for reference in self._proto.nodes[metric_list_node_id].children: - metric = obj_metrics.get(reference.local_name) - if metric is not None: - metric_path = '{}.layer_metrics.{}'.format(constants.KERAS_ATTR, - reference.local_name) - children.append((metric, reference.node_id, metric_path)) - - for (obj_child, child_id, child_name) in children: - child_proto = self._proto.nodes[child_id] - - if not isinstance(obj_child, tf.__internal__.tracking.Trackable): - continue - if (child_proto.user_object.identifier - in tf.__internal__.saved_model.load.registered_identifiers()): - setter = tf.__internal__.saved_model.load.get_setter( - child_proto.user_object) - elif obj_child._object_identifier in constants.KERAS_OBJECT_IDENTIFIERS: - setter = _revive_setter - else: - setter = setattr - # pylint: enable=protected-access - - if child_id in self.loaded_nodes: - if self.loaded_nodes[child_id][0] is not obj_child: - # This means that the same trackable object is referenced by two - # different objects that were recreated from the config. - logging.warning( - 'Looks like there is an object (perhaps variable or ' - 'layer) that is shared between different layers/models. ' - 'This may cause issues when restoring the variable ' - 'values. Object: {}'.format(obj_child)) - continue - - # Overwrite variable names with the ones saved in the SavedModel. - if (child_proto.WhichOneof('kind') == 'variable' and - child_proto.variable.name): - obj_child._handle_name = child_proto.variable.name + ':0' # pylint: disable=protected-access - - if isinstance(obj_child, tf.__internal__.tracking.TrackableDataStructure): - setter = lambda *args: None - - child_path = '{}.{}'.format(parent_path, child_name) - self._node_paths[child_id] = child_path - self._add_children_recreated_from_config(obj_child, child_proto, child_id) - self.loaded_nodes[child_id] = obj_child, setter - - def load_layers(self, compile=True): # pylint: disable=redefined-builtin - """Load all layer nodes from the metadata.""" - # Load metrics after models and layers, since it's likely that models - # and layers will create the metric when initialized (this avoids wasting - # time by creating objects multiple times). - metric_list = [] - for node_metadata in self._metadata.values(): - if node_metadata.identifier == constants.METRIC_IDENTIFIER: - metric_list.append(node_metadata) - continue - - self.loaded_nodes[node_metadata.node_id] = self._load_layer( - node_metadata.node_id, node_metadata.identifier, - node_metadata.metadata) - - for node_metadata in metric_list: - try: - self.loaded_nodes[node_metadata.node_id] = self._load_layer( - node_metadata.node_id, node_metadata.identifier, - node_metadata.metadata) - except ValueError as e: - # Metrics are only needed when the model is compiled later. We ignore - # errors when trying to load custom metrics when `compile=False` until - # custom metrics are serialized properly (b/135550038). - if compile: - raise e - logging.warning('Unable to restore custom metric. Please ensure that ' - 'the layer implements `get_config` and `from_config` ' - 'when saving. In addition, please use the ' - '`custom_objects` arg when calling `load_model()`.') - - def _load_layer(self, node_id, identifier, metadata): - """Load a single layer from a SavedUserObject proto.""" - metadata = json_utils.decode(metadata) - - # If node was already created - if node_id in self.loaded_nodes: - node, setter = self.loaded_nodes[node_id] - - # Revive setter requires the object to have a `_serialized_attributes` - # property. Add it here. - _maybe_add_serialized_attributes(node, metadata) - - config = metadata.get('config') - if _is_graph_network(node) and generic_utils.validate_config(config): - child_nodes = self._get_child_layer_node_ids(node_id) - self.model_layer_dependencies[node_id] = (node, child_nodes) - if not child_nodes: - self._models_to_reconstruct.append(node_id) - return node, setter - - # Detect whether this object can be revived from the config. If not, then - # revive from the SavedModel instead. - obj, setter = self._revive_from_config(identifier, metadata, node_id) - if obj is None: - obj, setter = revive_custom_object(identifier, metadata) - - # Add an attribute that stores the extra functions/objects saved in the - # SavedModel. Most of these functions/objects are ignored, but some are - # used later in the loading process (e.g. the list of regularization - # losses, or the training config of compiled models). - _maybe_add_serialized_attributes(obj, metadata) - return obj, setter - - def _revive_from_config(self, identifier, metadata, node_id): - """Revives a layer/model from config, or returns None.""" - if identifier == constants.METRIC_IDENTIFIER: - obj = self._revive_metric_from_config(metadata) - else: - obj = ( - self._revive_graph_network(identifier, metadata, node_id) or - self._revive_layer_or_model_from_config(metadata, node_id)) - - if obj is None: - return None, None - - setter = self._config_node_setter(_revive_setter) - self._add_children_recreated_from_config(obj, self._proto.nodes[node_id], - node_id) - return obj, setter - - def _revive_graph_network(self, identifier, metadata, node_id): - """Revives a graph network from config.""" - # Determine whether the metadata contains information for reviving a - # functional or Sequential model. - config = metadata.get('config') - if not generic_utils.validate_config(config): - return None - - class_name = tf.compat.as_str(metadata['class_name']) - if generic_utils.get_registered_object(class_name) is not None: - return None - model_is_functional_or_sequential = ( - metadata.get('is_graph_network', False) or class_name == 'Sequential' or - class_name == 'Functional') - if not model_is_functional_or_sequential: - return None - - # Revive functional and sequential models as blank model objects for now ( - # must be initialized to enable setattr tracking and attribute caching). - # Reconstruction of the network is deferred until all of the model's layers - # have been revived. - if class_name == 'Sequential': - model = models_lib.Sequential(name=config['name']) - # The model is a custom Sequential model. - elif identifier == constants.SEQUENTIAL_IDENTIFIER: - # Uses the custom class name, since the config does not have one. - model = models_lib.Sequential(name=class_name) - else: - model = models_lib.Functional(inputs=[], outputs=[], name=config['name']) - - # Record this model and its layers. This will later be used to reconstruct - # the model. - layers = self._get_child_layer_node_ids(node_id) - self.model_layer_dependencies[node_id] = (model, layers) - if not layers: - self._models_to_reconstruct.append(node_id) - return model - - def _revive_layer_or_model_from_config(self, metadata, node_id): - """Revives a layer/custom model from config; returns None if infeasible.""" - # Check that the following requirements are met for reviving from config: - # 1. Object can be deserialized from config. - # 2. If the object needs to be built, then the build input shape can be - # found. - class_name = metadata.get('class_name') - config = metadata.get('config') - shared_object_id = metadata.get('shared_object_id') - must_restore_from_config = metadata.get('must_restore_from_config') - if not generic_utils.validate_config(config): - return None - - try: - obj = layers_module.deserialize( - generic_utils.serialize_keras_class_and_config( - class_name, config, shared_object_id=shared_object_id)) - except (TypeError, KeyError) as e: - # A name conflict has occurred. The `class_name` is in the Keras native - # framework; however, the value in the framework is different from the - # user's class definition which confuses the KerasObjectLoader. - builtin_layer = layers_module.get_builtin_layer(class_name) - if builtin_layer: - raise RuntimeError( - f'Unable to restore object of class \'{class_name}\' likely due to ' - f'name conflict with built-in Keras class \'{builtin_layer}\'. To ' - 'override the built-in Keras definition of the object, decorate ' - 'your class with `@keras.utils.register_keras_serializable` and ' - 'include that file in your program, or pass your class in a ' - '`keras.utils.CustomObjectScope` that wraps this load call.') from e - else: - raise - except ValueError as e: - if must_restore_from_config: - raise e - else: - return None - - # Use the dtype, name, and trainable status. Often times these are not - # specified in custom configs, so retrieve their values from the metadata. - # pylint: disable=protected-access - obj._name = metadata['name'] - if metadata.get('trainable') is not None: - obj.trainable = metadata['trainable'] - if metadata.get('dtype') is not None: - obj._set_dtype_policy(metadata['dtype']) - if metadata.get('stateful') is not None: - obj.stateful = metadata['stateful'] - # Restore model save spec for subclassed models. (layers do not store a - # SaveSpec) - if isinstance(obj, training_lib.Model): - full_save_spec = metadata.get('full_save_spec') - if full_save_spec is not None: - args_spec, kwargs_spec = full_save_spec - inputs_spec = args_spec.pop(0) - obj._set_save_spec(inputs_spec, args_spec, kwargs_spec) - # pylint: enable=protected-access - - build_input_shape = metadata.get('build_input_shape') - built = self._try_build_layer(obj, node_id, build_input_shape) - - if not built: - # If the layer cannot be built, revive a custom layer instead. - return None - return obj - - def _revive_metric_from_config(self, metadata): - """Revives a metric object using the config saved in the metadata.""" - class_name = tf.compat.as_str(metadata['class_name']) - config = metadata.get('config') - - if not generic_utils.validate_config(config): - return None - - try: - obj = metrics.deserialize( - generic_utils.serialize_keras_class_and_config(class_name, config)) - except ValueError: - return None - - build_input_shape = metadata.get('build_input_shape') - if build_input_shape is not None and hasattr(obj, '_build'): - obj._build(build_input_shape) # pylint: disable=protected-access - - return obj - - def _try_build_layer(self, obj, node_id, build_input_shape): - """Attempts to build the layer.""" - if obj.built or hasattr(obj.build, '_is_default'): - obj.built = True - return True - - if build_input_shape is None: - build_input_shape = self._infer_inputs(node_id, convert_to_shapes=True) - - if build_input_shape is not None: - obj.build(build_input_shape) - base_layer.Layer.build(obj, build_input_shape) - return True - - return False - - def get_path(self, node_id): - return self._node_paths[node_id] - - def finalize_objects(self): - """Finish setting up Keras objects. - - This function is executed after all objects and functions have been created. - Call functions and losses are attached to each layer, and once all layers - have been fully set up, graph networks are initialized. - - Subclassed models that are revived from the SavedModel are treated like - layers, and have their call/loss functions attached here. - """ - # Finish setting up layers and subclassed models. This step attaches call - # functions and losses to each object, and sets model inputs/outputs. - layers_revived_from_config = [] - layers_revived_from_saved_model = [] - for node_id, (node, _) in self.loaded_nodes.items(): - if (not isinstance(node, base_layer.Layer) or - # Don't finalize models until all layers have finished loading. - node_id in self.model_layer_dependencies): - continue - - self._unblock_model_reconstruction(node_id, node) - - if isinstance(node, input_layer.InputLayer): - continue - elif isinstance(node, metrics.Metric): - continue - - if isinstance(node, (RevivedLayer, RevivedInputLayer)): - layers_revived_from_saved_model.append(node) - else: - layers_revived_from_config.append(node) - - _finalize_saved_model_layers(layers_revived_from_saved_model) - _finalize_config_layers(layers_revived_from_config) - - # Initialize graph networks, now that layer dependencies have been resolved. - self._reconstruct_all_models() - - def _unblock_model_reconstruction(self, layer_id, layer): - """Removes layer from blocking model reconstruction.""" - for model_id, v in self.model_layer_dependencies.items(): - _, layers = v - if layer_id not in layers: - continue - layers[layers.index(layer_id)] = layer - if all(isinstance(x, base_layer.Layer) for x in layers): - self._models_to_reconstruct.append(model_id) - - def _reconstruct_all_models(self): - """Reconstructs the network structure of all models.""" - all_initialized_models = set() - while self._models_to_reconstruct: - model_id = self._models_to_reconstruct.pop(0) - all_initialized_models.add(model_id) - model, layers = self.model_layer_dependencies[model_id] - self._reconstruct_model(model_id, model, layers) - _finalize_config_layers([model]) - - if all_initialized_models != set(self.model_layer_dependencies.keys()): - # This should not happen. - uninitialized_model_ids = ( - set(self.model_layer_dependencies.keys()) - all_initialized_models) - uninitialized_model_names = [ - self.model_layer_dependencies[model_id][0].name - for model_id in uninitialized_model_ids - ] - raise ValueError(f'Error loading model(s) in the SavedModel format. ' - f'The following model(s) could not be initialized: ' - f'{uninitialized_model_names}') - - def _reconstruct_model(self, model_id, model, layers): - """Reconstructs the network structure.""" - config = json_utils.decode(self._metadata[model_id].metadata)['config'] - - # Set up model inputs - if model.inputs: - # Inputs may already be created if the model is instantiated in another - # object's __init__. - pass - elif isinstance(model, models_lib.Sequential): - if not layers or not isinstance(layers[0], input_layer.InputLayer): - if config['layers'][0]['class_name'] == 'InputLayer': - layers.insert( - 0, - input_layer.InputLayer.from_config(config['layers'][0]['config'])) - elif 'batch_input_shape' in config['layers'][0]['config']: - batch_input_shape = config['layers'][0]['config']['batch_input_shape'] - layers.insert( - 0, - input_layer.InputLayer( - input_shape=batch_input_shape[1:], - batch_size=batch_input_shape[0], - dtype=layers[0].dtype, - name=layers[0].name + '_input')) - model.__init__(layers, name=config['name']) - if not model.inputs: - first_layer = self._get_child_layer_node_ids(model_id)[0] - input_specs = self._infer_inputs(first_layer) - input_shapes = self._infer_inputs(first_layer, convert_to_shapes=True) - model._set_inputs(input_specs) # pylint: disable=protected-access - if not model.built and not isinstance(input_specs, dict): - model.build(input_shapes) - else: # Reconstruct functional model - (inputs, outputs, - created_layers) = functional_lib.reconstruct_from_config( - config, created_layers={layer.name: layer for layer in layers}) - model.__init__(inputs, outputs, name=config['name']) - functional_lib.connect_ancillary_layers(model, created_layers) - - # Set model dtype. - _set_network_attributes_from_metadata(model) - - # Unblock models that are dependent on this model. - self._unblock_model_reconstruction(model_id, model) - - def _get_child_layer_node_ids(self, node_id): - """Returns the node ids of each layer in a Sequential/Functional model.""" - # Sequential and Functional track layers with names following the format - # "layer-N". Use this to generate the list of layers. - num_layers = 0 - child_layers = {} - pattern = re.compile('layer-(\\d+)') - - for child in self._proto.nodes[node_id].children: - m = pattern.match(child.local_name) - if m is None: - continue - layer_n = int(m.group(1)) - num_layers = max(layer_n + 1, num_layers) - child_layers[layer_n] = child.node_id - - ordered = [] - for n in range(num_layers): - child = child_layers.get(n) - if child is None: - break - ordered.append(child) - return ordered - - def _search_for_child_node(self, parent_id, path_to_child): - """Returns node id of child node. - - A helper method for traversing the object graph proto. - - As an example, say that the object graph proto in the SavedModel contains an - object with the following child and grandchild attributes: - - `parent.child_a.child_b` - - This method can be used to retrieve the node id of `child_b` using the - parent's node id by calling: - - `_search_for_child_node(parent_id, ['child_a', 'child_b'])`. - - Args: - parent_id: node id of parent node - path_to_child: list of children names. - - Returns: - node_id of child, or None if child isn't found. - """ - if not path_to_child: - return parent_id - - for child in self._proto.nodes[parent_id].children: - if child.local_name == path_to_child[0]: - return self._search_for_child_node(child.node_id, path_to_child[1:]) - return None - - def _infer_inputs(self, layer_node_id, convert_to_shapes=False): - """Infers input shape of layer from SavedModel functions.""" - call_fn_id = self._search_for_child_node( - layer_node_id, ['call_and_return_all_conditional_losses']) - if call_fn_id is None: - return None - - concrete_functions = ( - self._proto.nodes[call_fn_id].function.concrete_functions) - if not concrete_functions: - return None - call_fn_name = concrete_functions[0] - call_fn_proto = self._proto.concrete_functions[call_fn_name] - structured_input_signature = tf.__internal__.saved_model.decode_proto( - call_fn_proto.canonicalized_input_signature) - inputs = structured_input_signature[0][0] - if convert_to_shapes: - return tf.nest.map_structure(lambda spec: spec.shape, inputs) - else: - return inputs - - def _config_node_setter(self, setter): - """Creates edges for nodes that are recreated from config.""" - - def setattr_wrapper(obj, name, value): - # Avoid overwriting attributes of objects recreated from the config. - if obj._lookup_dependency(name) is None: # pylint: disable=protected-access - setter(obj, name, value) - - return setattr_wrapper - - -def _finalize_saved_model_layers(layers): - """Runs the final steps of loading Keras Layers from SavedModel.""" - # pylint: disable=protected-access - # 1. Set up call functions for all layers initialized from the SavedModel ( - # and not the config) - for layer in layers: - layer.built = True - layer_call = getattr( - _get_keras_attr(layer), 'call_and_return_conditional_losses', None) - if layer_call and layer_call.concrete_functions: - call_spec = layer_utils.CallFunctionSpec( - tf_inspect.getfullargspec(layer_call)) - layer.call = utils.use_wrapped_call(layer, layer_call, call_spec, - return_method=True) - expects_training_arg = layer._serialized_attributes['metadata'][ - 'expects_training_arg'] - if 'training' in layer_call.function_spec.arg_names: - # This could change the value of `expects_training_arg` if this layer - # doesn't expect a training arg, but has a child layer that does. - expects_training_arg = True - layer._init_call_fn_args(expects_training_arg) - else: - layer.call = types.MethodType( - _unable_to_call_layer_due_to_serialization_issue, layer) - - for layer in layers: - # 2. Set model inputs and outputs. - if isinstance(layer, RevivedNetwork): - _set_network_attributes_from_metadata(layer) - - if hasattr(_get_keras_attr(layer), 'call_and_return_conditional_losses'): - call_fn = _get_keras_attr(layer).call_and_return_conditional_losses - if not call_fn.concrete_functions: - continue - if call_fn.input_signature is None: - args, kwargs = infer_inputs_from_restored_call_function(call_fn) - args = list(args) - inputs = args.pop(0) - else: - args = call_fn.input_signature - args = list(args) - inputs = args.pop(0) - kwargs = None - layer._set_save_spec(inputs, args, kwargs) # pylint: disable=protected-access - - # V1 models require calling _set_inputs to set the `.inputs` attr. - # Skip this step when there are multiple tensor inputs (this behavior - # is not well supported in V1 models). - if not any( - isinstance(x, tf.TensorSpec) - for x in tf.nest.flatten([args, kwargs])): - layer._set_inputs(inputs) - - # 3. Add losses that aren't generated by the layer.call function. - _restore_layer_unconditional_losses(layer) - _restore_layer_activation_loss(layer) - - # 4. Restore metrics list - _restore_layer_metrics(layer) - - # pylint: enable=protected-access - - -def _unable_to_call_layer_due_to_serialization_issue(layer, *unused_args, - **unused_kwargs): - """Replaces the `layer.call` if the layer was not fully serialized. - - Keras Model/Layer serialization is relatively relaxed because SavedModels - are not always loaded back as keras models. Thus, when there is an issue - tracing a non-signature function, a warning is logged instead of raising an - error. This results in a SavedModel where the model's call function is saved, - but the internal layer call functions are not. - - When deserialized with `tf.keras.models.load_model`, the internal layers - which do not have serialized call functions should raise an error when called. - - Args: - layer: Layer without the serialized call function. - - Raises: - ValueError - """ - - raise ValueError( - f'Cannot call custom layer {layer.name} of type {type(layer)}, because ' - 'the call function was not serialized to the SavedModel.' - 'Please try one of the following methods to fix this issue:' - '\n\n(1) Implement `get_config` and `from_config` in the layer/model ' - 'class, and pass the object to the `custom_objects` argument when ' - 'loading the model. For more details, see: ' - 'https://www.tensorflow.org/guide/keras/save_and_serialize' - '\n\n(2) Ensure that the subclassed model or layer overwrites `call` ' - 'and not `__call__`. The input shape and dtype will be automatically ' - 'recorded when the object is called, and used when saving. To manually ' - 'specify the input shape/dtype, decorate the call function with ' - '`@tf.function(input_signature=...)`.') - - -def _finalize_config_layers(layers): - """Runs the final steps of loading Keras Layers from config.""" - for layer in layers: - # It is assumed that layers define their unconditional losses after being - # recreated from the config and built. The exceptions to this - # are Functional and Sequential models, which only store conditional losses - # (losses dependent on the inputs) in the config. Unconditional losses like - # weight regularization must be revived from the SavedModel. - if _is_graph_network(layer): - _restore_layer_unconditional_losses(layer) - - # Some layers, like Dense, record their activation loss function in the - # config. However, not all layers do this, so the activation loss may be - # missing when restored from the config/hdf5. - # TODO(kathywu): Investigate ways to improve the config to ensure consistent - # loading behavior between HDF5 and SavedModel. - _restore_layer_activation_loss(layer) - - # Restore metrics list. - _restore_layer_metrics(layer) - - # Restore RNN layer states. - if (isinstance(layer, base_rnn.RNN) and layer.stateful and - hasattr(_get_keras_attr(layer), 'states')): - layer.states = getattr(_get_keras_attr(layer), 'states', None) - for variable in tf.nest.flatten(layer.states): - backend.track_variable(variable) - - # Perform any layer defined finalization of the layer state. - layer.finalize_state() - - -def _finalize_metric(metric): - metric.update_state = types.MethodType( - metrics_utils.update_state_wrapper(metric.keras_api.update_state), metric) - metric.result = metric.keras_api.result - - -def _restore_layer_unconditional_losses(layer): - """Restore unconditional losses from SavedModel.""" - if hasattr(_get_keras_attr(layer), 'layer_regularization_losses'): - losses = getattr(_get_keras_attr(layer), 'layer_regularization_losses', []) - else: - # Some earlier SavedModels may not have layer_regularization_losses - # serialized separately. Fall back to using the regularization_losses - # list if it does not exist. - losses = layer._serialized_attributes.get('regularization_losses', []) # pylint: disable=protected-access - for loss in losses: - layer.add_loss(loss) - - -def _restore_layer_activation_loss(layer): - """Restore actiation loss from SavedModel.""" - # Use wrapped activity regularizer function if the layer's activity - # regularizer wasn't created during initialization. - activity_regularizer = getattr( - _get_keras_attr(layer), 'activity_regularizer_fn', None) - if activity_regularizer and not layer.activity_regularizer: - try: - layer.activity_regularizer = activity_regularizer - except AttributeError: - # This may happen if a layer wrapper is saved with an activity - # regularizer. The wrapper object's activity regularizer is unsettable. - pass - - -def revive_custom_object(identifier, metadata): - """Revives object from SavedModel.""" - if tf.compat.v1.executing_eagerly_outside_functions(): - model_class = training_lib.Model - else: - model_class = training_lib_v1.Model - - revived_classes = { - constants.INPUT_LAYER_IDENTIFIER: - (RevivedInputLayer, input_layer.InputLayer), - constants.LAYER_IDENTIFIER: (RevivedLayer, base_layer.Layer), - constants.MODEL_IDENTIFIER: (RevivedNetwork, model_class), - constants.NETWORK_IDENTIFIER: (RevivedNetwork, functional_lib.Functional), - constants.SEQUENTIAL_IDENTIFIER: (RevivedNetwork, models_lib.Sequential), - } - parent_classes = revived_classes.get(identifier, None) - - if parent_classes is not None: - parent_classes = revived_classes[identifier] - revived_cls = type( - tf.compat.as_str(metadata['class_name']), parent_classes, {}) - return revived_cls._init_from_metadata(metadata) # pylint: disable=protected-access - else: - raise ValueError( - f'Unable to restore custom object of type {identifier}. ' - f'Please make sure that any custom layers are included in the ' - f'`custom_objects` arg when calling `load_model()` and make sure that ' - f'all layers implement `get_config` and `from_config`.') - - -def _restore_layer_metrics(layer): - metrics_list = getattr(_get_keras_attr(layer), 'layer_metrics', {}) - layer_metrics = {m.name: m for m in layer._metrics} # pylint: disable=protected-access - for name, metric in metrics_list.items(): - if name not in layer_metrics: - # Metrics may be added during initialization/building of custom layers. - layer._metrics.append(metric) # pylint: disable=protected-access - - -# TODO(kathywu): Centrally define keys and functions for both serialization and -# deserialization. -class RevivedLayer: - """Keras layer loaded from a SavedModel.""" - - @classmethod - def _init_from_metadata(cls, metadata): - """Create revived layer from metadata stored in the SavedModel proto.""" - init_args = dict(name=metadata['name'], trainable=metadata['trainable']) - if metadata.get('dtype') is not None: - init_args['dtype'] = metadata['dtype'] - if metadata.get('batch_input_shape') is not None: - init_args['batch_input_shape'] = metadata['batch_input_shape'] - - revived_obj = cls(**init_args) - - with utils.no_automatic_dependency_tracking_scope(revived_obj): - # pylint:disable=protected-access - revived_obj._call_spec.expects_training_arg = metadata[ - 'expects_training_arg'] - config = metadata.get('config') - if generic_utils.validate_config(config): - revived_obj._config = config - if metadata.get('input_spec') is not None: - revived_obj.input_spec = recursively_deserialize_keras_object( - metadata['input_spec'], - module_objects={'InputSpec': input_spec.InputSpec}) - if metadata.get('activity_regularizer') is not None: - revived_obj.activity_regularizer = regularizers.deserialize( - metadata['activity_regularizer']) - if metadata.get('_is_feature_layer') is not None: - revived_obj._is_feature_layer = metadata['_is_feature_layer'] - if metadata.get('stateful') is not None: - revived_obj.stateful = metadata['stateful'] - # pylint:enable=protected-access - - return revived_obj, _revive_setter - - @property - def keras_api(self): - return self._serialized_attributes.get(constants.KERAS_ATTR, None) - - def get_config(self): - if hasattr(self, '_config'): - return self._config - else: - raise NotImplementedError - - -def _revive_setter(layer, name, value): - """Setter function that saves some attributes to separate dictionary.""" - # Many attributes in the SavedModel conflict with properties defined in - # Layer and Model. Save these attributes to a separate dictionary. - if name in PUBLIC_ATTRIBUTES: - # pylint: disable=protected-access - if isinstance(value, tf.__internal__.tracking.Trackable): - layer._track_trackable(value, name=name) - layer._serialized_attributes[name] = value - # pylint: enable=protected-access - elif (isinstance(layer, functional_lib.Functional) and - re.match(r'^layer(_with_weights)?-[\d+]', name) is not None): - # Edges named "layer-n" or "layer_with_weights-n", which are tracked in - # network._track_layers, should not be added as an attribute. They should - # be temporarily added as a dependency so that checkpointed values can be - # restored. These dependencies are manually deleted in - # KerasObjectLoader.del_tracking. - - # Set `overwrite=True` in the case that `layer` already tracks a different - # layer-n. This may cause variable values to not be loaded properly in the - # original layer-n, but we already warn the users about this - # (ctrl-f "shared between different layers/models"). - layer._track_trackable(value, name, overwrite=True) # pylint: disable=protected-access - elif getattr(layer, name, None) is not None: - # Don't overwrite already defined attributes. - pass - else: - setattr(layer, name, value) - - -class RevivedInputLayer: - """InputLayer loaded from a SavedModel.""" - - @classmethod - def _init_from_metadata(cls, metadata): - """Revives the saved InputLayer from the Metadata.""" - init_args = dict( - name=metadata['name'], - dtype=metadata['dtype'], - sparse=metadata['sparse'], - ragged=metadata['ragged'], - batch_input_shape=metadata['batch_input_shape']) - revived_obj = cls(**init_args) - with utils.no_automatic_dependency_tracking_scope(revived_obj): - revived_obj._config = metadata['config'] # pylint:disable=protected-access - - return revived_obj, setattr - - def get_config(self): - return self._config - - -def recursively_deserialize_keras_object(config, module_objects=None): - """Deserialize Keras object from a nested structure.""" - if isinstance(config, dict): - if 'class_name' in config: - return generic_utils.deserialize_keras_object( - config, module_objects=module_objects) - else: - return { - key: recursively_deserialize_keras_object(config[key], module_objects) - for key in config - } - elif isinstance(config, (tuple, list)): - return [ - recursively_deserialize_keras_object(x, module_objects) for x in config - ] - else: - raise ValueError( - f'Unable to decode Keras layer config. Config should be a dictionary, ' - f'tuple or list. Received: config={config}') - - -def infer_inputs_from_restored_call_function(fn): - """Returns TypeSpec of inputs from a restored call function. - - Args: - fn: Restored layer call function. It is assumed that `fn` has at least one - concrete function and that the inputs are in the first argument. - - Returns: - TypeSpec of call function inputs in the form of (args, kwargs) - """ - - def common_spec(x, y): - if not isinstance(x, tf.TypeSpec): - # Doesn't particularly matter what is returned in this case because the - # result will be filtered out in _set_input_shape. - return x - # pylint:disable=protected-access - result = x._without_tensor_names().most_specific_common_supertype( - [y._without_tensor_names()]) - if result is None: - # Please file a bug if you are being hindered by this error. - raise TypeError(f'No common supertype of {x} and {y}.') - return result - - spec = fn.concrete_functions[0].structured_input_signature - for concrete in fn.concrete_functions[1:]: - spec2 = concrete.structured_input_signature - spec = tf.nest.map_structure(common_spec, spec, spec2) - return spec - - -class RevivedNetwork(RevivedLayer): - """Keras network of layers loaded from a SavedModel.""" - - @classmethod - def _init_from_metadata(cls, metadata): - """Create revived network from metadata stored in the SavedModel proto.""" - revived_obj = cls(name=metadata['name']) - - # Store attributes revived from SerializedAttributes in a un-tracked - # dictionary. The attributes are the ones listed in CommonEndpoints or - # "keras_api" for keras-specific attributes. - with utils.no_automatic_dependency_tracking_scope(revived_obj): - # pylint:disable=protected-access - revived_obj._call_spec.expects_training_arg = metadata[ - 'expects_training_arg'] - config = metadata.get('config') - if generic_utils.validate_config(config): - revived_obj._config = config - - if metadata.get('activity_regularizer') is not None: - revived_obj.activity_regularizer = regularizers.deserialize( - metadata['activity_regularizer']) - # pylint:enable=protected-access - - return revived_obj, _revive_setter # pylint:disable=protected-access - - -def _set_network_attributes_from_metadata(revived_obj): - """Sets attributes recorded in the metadata.""" - with utils.no_automatic_dependency_tracking_scope(revived_obj): - # pylint:disable=protected-access - metadata = revived_obj._serialized_attributes['metadata'] - if metadata.get('dtype') is not None: - revived_obj._set_dtype_policy(metadata['dtype']) - revived_obj._trainable = metadata['trainable'] - # pylint:enable=protected-access - - -def _maybe_add_serialized_attributes(layer, metadata): - # Store attributes revived from SerializedAttributes in a un-tracked - # dictionary. The attributes are the ones listed in CommonEndpoints or - # "keras_api" for keras-specific attributes. - if not hasattr(layer, '_serialized_attributes'): - with utils.no_automatic_dependency_tracking_scope(layer): - layer._serialized_attributes = {'metadata': metadata} # pylint: disable=protected-access - - -def _get_keras_attr(layer): - return getattr(layer, '_serialized_attributes', - {}).get(constants.KERAS_ATTR, None) diff --git a/keras/saving/saved_model/metric_serialization.py b/keras/saving/saved_model/metric_serialization.py deleted file mode 100644 index 88f060b3a46d..000000000000 --- a/keras/saving/saved_model/metric_serialization.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Classes and functions implementing Metrics SavedModel serialization.""" - -from keras.saving.saved_model import constants -from keras.saving.saved_model import layer_serialization -from keras.utils import generic_utils -import tensorflow.compat.v2 as tf - - -class MetricSavedModelSaver(layer_serialization.LayerSavedModelSaver): - """Metric serialization.""" - - @property - def object_identifier(self): - return constants.METRIC_IDENTIFIER - - def _python_properties_internal(self): - metadata = dict( - class_name=generic_utils.get_registered_name(type(self.obj)), - name=self.obj.name, - dtype=self.obj.dtype) - metadata.update(layer_serialization.get_serialized(self.obj)) - if self.obj._build_input_shape is not None: # pylint: disable=protected-access - metadata['build_input_shape'] = self.obj._build_input_shape # pylint: disable=protected-access - return metadata - - def _get_serialized_attributes_internal(self, unused_serialization_cache): - return ( - dict(variables=tf.__internal__.tracking.wrap(self.obj.variables)), - # TODO(b/135550038): save functions to enable saving custom metrics. - {}, - ) diff --git a/keras/saving/saved_model/model_serialization.py b/keras/saving/saved_model/model_serialization.py deleted file mode 100644 index d43d6fae6268..000000000000 --- a/keras/saving/saved_model/model_serialization.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Classes and functions implementing to Model SavedModel serialization.""" - -from keras.saving import saving_utils -from keras.saving.saved_model import constants -from keras.saving.saved_model import layer_serialization -from keras.saving.saved_model import save_impl - - -class ModelSavedModelSaver(layer_serialization.LayerSavedModelSaver): - """Model SavedModel serialization.""" - - @property - def object_identifier(self): - return constants.MODEL_IDENTIFIER - - def _python_properties_internal(self): - metadata = super()._python_properties_internal() - # Network stateful property is dependent on the child layers. - metadata.pop('stateful') - metadata['is_graph_network'] = self.obj._is_graph_network # pylint: disable=protected-access - spec = self.obj.save_spec(dynamic_batch=False) - metadata['full_save_spec'] = spec - # save_spec is saved for forward compatibility on older TF versions. - metadata['save_spec'] = None if spec is None else spec[0][0] - - metadata.update( - saving_utils.model_metadata( - self.obj, include_optimizer=True, require_config=False)) - return metadata - - def _get_serialized_attributes_internal(self, serialization_cache): - default_signature = None - - # Create a default signature function if this is the only object in the - # cache (i.e. this is the root level object). - if len(serialization_cache[constants.KERAS_CACHE_KEY]) == 1: - default_signature = save_impl.default_save_signature(self.obj) - - # Other than the default signature function, all other attributes match with - # the ones serialized by Layer. - objects, functions = ( - super()._get_serialized_attributes_internal( - serialization_cache)) - functions['_default_save_signature'] = default_signature - return objects, functions - - -class SequentialSavedModelSaver(ModelSavedModelSaver): - - @property - def object_identifier(self): - return constants.SEQUENTIAL_IDENTIFIER diff --git a/keras/saving/saved_model/order_preserving_set.py b/keras/saving/saved_model/order_preserving_set.py deleted file mode 100644 index 9f02b6152ebc..000000000000 --- a/keras/saving/saved_model/order_preserving_set.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A set based on dict so that it preserves key insertion order. - -Python Dicts are order-preserving since 3.6 -(https://mail.python.org/pipermail/python-dev/2017-December/151283.html), -but sets are not. This class implements a set on top of a dict so that we get -deterministic iteration order across runs. -""" - -import collections.abc - - -class OrderPreservingSet(collections.abc.MutableSet): - """A set based on dict so that it preserves key insertion order.""" - - def __init__(self, iterable=None): - self._dict = {item: None for item in (iterable or [])} - - # abstract from collections.MutableSet - def __len__(self): - return len(self._dict) - - # abstract from collections.MutableSet - def __contains__(self, value): - return value in self._dict - - # override from collections.MutableSet - def __iter__(self): - return iter(self._dict) - - # abstract from collections.MutableSet - def add(self, item): - self._dict[item] = None - - # abstract from collections.MutableSet - def discard(self, value): - del self._dict[value] - - # override from collections.MutableSet - def clear(self): - self._dict = {} - - # override from collections.Set - def __eq__(self, other): - if not isinstance(other, OrderPreservingSet): - return NotImplemented - return self._dict.keys() == other._dict.keys() - - # override from collections.Set - def __le__(self, other): - if not isinstance(other, OrderPreservingSet): - return NotImplemented - return self._dict.keys() <= other._dict.keys() - - # override from collections.Set - def __ge__(self, other): - if not isinstance(other, OrderPreservingSet): - return NotImplemented - return self._dict.keys() >= other._dict.keys() - - # override from collections.Set - def __and__(self, other): - # collections.Set defaults to the ordering in other, we want to use self - return self._from_iterable(value for value in self if value in other) - - # override from collections.Set - def __or__(self, other): - # ensure that other is ordered before performing __or__ - if not isinstance(other, OrderPreservingSet): - raise TypeError( - "cannot union an 'OrderPreservingSet' with an unordered iterable.") - result = self._from_iterable(value for value in self) - for value in other: - result._dict[value] = None - return result - - def union(self, other): - return self | other diff --git a/keras/saving/saved_model/revive_test.py b/keras/saving/saved_model/revive_test.py deleted file mode 100644 index 21659a9d746f..000000000000 --- a/keras/saving/saved_model/revive_test.py +++ /dev/null @@ -1,448 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=protected-access -"""Tests reviving models from config and SavedModel. - -These tests ensure that a model revived from a combination of config and -SavedModel have the expected structure. -""" - -import tensorflow.compat.v2 as tf -# TODO(kathywu): Move relevant tests from saved_model_test to -import shutil - -from absl.testing import parameterized -import numpy as np - -import keras -from keras import backend -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.saving.saved_model import load as keras_load -from keras.utils import generic_utils - - -class SubclassedModelNoConfig(keras.Model): - - def __init__(self, a, b): - super().__init__() - - self.a = a - self.b = b - self.shared = CustomLayerNoConfig(a, b) - self.all_layers = [] - - def build(self, input_shape): - self.all_layers.extend([ - self.shared, - CustomLayerWithConfig(self.a + 1, self.b + 2), - CustomLayerNoConfig(self.a + 3, self.b + 4), - keras.Sequential([ - # TODO(b/145029112): Bug with losses when there are shared layers. - # self.shared, <-- Enable when bug is fixed. - CustomLayerNoConfig(self.a + 5, self.b + 6)])]) - super().build(input_shape) - - def call(self, inputs): - x = inputs - for layer in self.all_layers: - x = layer(x) - return x - - -class SparseDense(keras.layers.Dense): - - def call(self, inputs): - input_shape = tf.stack( - (tf.reduce_prod(tf.shape(inputs)[:-1]), - self.kernel.shape[0])) - output_shape = tf.concat( - (tf.shape(inputs)[:-1], [self.kernel.shape[1]]), -1) - x = tf.sparse.reshape(inputs, input_shape) - return tf.reshape( - self.activation( - tf.sparse.sparse_dense_matmul(x, self.kernel) + self.bias), - output_shape) - - -class SubclassedSparseModelNoConfig(keras.Model): - - def __init__(self, a, b): - super().__init__() - self.a = a - self.shared = CustomLayerNoConfig(a, b) - self.all_layers = [SparseDense(4)] - - def call(self, inputs): - x = inputs - for layer in self.all_layers: - x = layer(x) - return self.shared(x + self.a) - - -class SubclassedModelWithConfig(SubclassedModelNoConfig): - - def get_config(self): - return {'a': self.a, - 'b': self.b} - - @classmethod - def from_config(cls, config): - return cls(**config) - - -class CustomLayerNoConfig(keras.layers.Layer): - - def __init__(self, a, b, name=None): - super().__init__(name=name) - self.a = tf.Variable(a, name='a') - self.b = b - def a_regularizer(): - return self.a * 2 - self.add_loss(a_regularizer) - self.sum_metric = keras.metrics.Sum(name='inputs_sum') - self.unused_metric = keras.metrics.Sum(name='not_added_to_metrics') - - def build(self, input_shape): - self.c = tf.Variable( - tf.constant(1.0, shape=input_shape[1:]), name=self.name+'_c') - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - self.add_metric(self.sum_metric(inputs)) - self.add_metric(inputs, aggregation='mean', name='mean') - - return inputs + self.c - - -class CustomLayerWithConfig(CustomLayerNoConfig): - - def get_config(self): - return {'a': backend.get_value(self.a), - 'b': self.b, - 'name': self.name} - - -class CustomNetworkDefaultConfig(keras.Model): - - def __init__(self, num_classes, name=None): - inputs = keras.Input((2, 3), name='inputs') - x = keras.layers.Flatten(name='flatten')(inputs) - y = keras.layers.Dense(num_classes, name='outputs')(x) - super().__init__(inputs, y, name=name) - - -class CustomNetworkWithConfig(CustomNetworkDefaultConfig): - - def __init__(self, num_classes, name=None): - super().__init__(num_classes, name=name) - self._config_dict = dict(num_classes=num_classes) - - def get_config(self): - return self._config_dict - - @classmethod - def from_config(cls, config): - return cls(config['num_classes'], name=config.get('name')) - - -class CustomNetworkWithConfigName(CustomNetworkWithConfig): - - def __init__(self, num_classes, name=None): - super().__init__(num_classes, name=name) - self._config_dict['name'] = self.name - - -class UnregisteredCustomSequentialModel(keras.Sequential): - # This class is *not* registered in the CustomObjectScope. - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.add(keras.layers.InputLayer(input_shape=(2, 3))) - - -class FunctionalSubclassModel(keras.Model): - - def __init__(self, units): - self.units = units - my_input = keras.Input(shape=(2, 3), name='inputs') - dense = keras.layers.Dense(self.units, activation='relu', name='dense') - output = dense(my_input) - outputs = {'output': output} - super().__init__(inputs=[my_input], outputs=outputs) - - def get_config(self): - return {'units': self.units} - - -class FunctionalSubclassModelWrongConfig(FunctionalSubclassModel): - - def get_config(self): - return {} - - -# The WideDeepModel, whose name conflicts with a Keras built-in model, is -# registered in these tests. -class WideDeepModel(SubclassedModelWithConfig): - pass - - -class ReviveTestBase(test_combinations.TestCase): - - def setUp(self): - super().setUp() - self.path = self.get_temp_dir() - self.addCleanup(shutil.rmtree, self.path, ignore_errors=True) - - def _assert_revived_correctness(self, model, revived): - self.assertAllEqual(model.input_names, revived.input_names) - self.assertAllEqual(model.output_names, revived.output_names) - if model.inputs is not None: - self.assertTrue( - all([ - i.shape.as_list() == r.shape.as_list() and i.dtype == r.dtype - for (i, r) in zip(model.inputs, revived.inputs) - ])) - self.assertTrue( - all([ - i.shape.as_list() == r.shape.as_list() and i.dtype == r.dtype - for (i, r) in zip(model.outputs, revived.outputs) - ])) - - self.assertAllClose(self.evaluate(model.weights), - self.evaluate(revived.weights)) - input_arr = tf.constant( - np.random.random((2, 2, 3)).astype(np.float32)) - if isinstance(revived.save_spec()[0][0], - tf.SparseTensorSpec): - input_arr = tf.sparse.from_dense(input_arr) - - self.assertAllClose(model(input_arr), revived(input_arr)) - self.assertAllClose(sum(model.losses), sum(revived.losses)) - self.assertAllClose(len(model.losses), len(revived.losses)) - self.assertEqual(len(model.metrics), len(revived.metrics)) - # TODO(b/150403085): Investigate why the metric order changes when running - # this test in tf-nightly. - self.assertAllClose(sorted([m.result() for m in model.metrics]), - sorted([m.result() for m in revived.metrics])) - model_layers = {layer.name: layer for layer in model.layers} - revived_layers = {layer.name: layer for layer in revived.layers} - self.assertAllEqual(model_layers.keys(), revived_layers.keys()) - - for name in model_layers: - model_layer = model_layers[name] - revived_layer = revived_layers[name] - self.assertEqual(model_layer.name, revived_layer.name) - self.assertEqual(model_layer.dtype, revived_layer.dtype) - self.assertEqual(model_layer.trainable, revived_layer.trainable) - if 'WithConfig' in type(model_layer).__name__: - self.assertEqual(type(model_layer), type(revived_layer)) - else: - # When loading layers from SavedModel, a new class is dynamically - # created with the same name. - self.assertEqual(type(model_layer).__name__, - type(revived_layer).__name__) - - -# These tests take a while to run, so each should run in a separate shard -# (putting them in the same TestCase resolves this). -class TestBigModelRevive(ReviveTestBase): - - @test_combinations.run_with_all_model_types - def test_revive(self): - input_shape = None - if test_utils.get_model_type() == 'functional': - input_shape = (2, 3) - - layer_with_config = CustomLayerWithConfig(1., 2) - layer_without_config = CustomLayerNoConfig(3., 4) - subclassed_with_config = SubclassedModelWithConfig(4., 6.) - subclassed_without_config = SubclassedModelNoConfig(7., 8.) - - inputs = keras.Input((2, 3)) - x = CustomLayerWithConfig(1., 2)(inputs) - x = CustomLayerNoConfig(3., 4)(x) - x = SubclassedModelWithConfig(4., 6.)(x) - x = SubclassedModelNoConfig(7., 8.)(x) - inner_model_functional = keras.Model(inputs, x) - - inner_model_sequential = keras.Sequential( - [CustomLayerWithConfig(1., 2), - CustomLayerNoConfig(3., 4), - SubclassedModelWithConfig(4., 6.), - SubclassedModelNoConfig(7., 8.)]) - - class SubclassedModel(keras.Model): - - def __init__(self): - super().__init__() - self.all_layers = [CustomLayerWithConfig(1., 2), - CustomLayerNoConfig(3., 4), - SubclassedModelWithConfig(4., 6.), - SubclassedModelNoConfig(7., 8.)] - - def call(self, inputs): - x = inputs - for layer in self.all_layers: - x = layer(x) - return x - - inner_model_subclassed = SubclassedModel() - - layers = [layer_with_config, - layer_without_config, - subclassed_with_config, - subclassed_without_config, - inner_model_functional, - inner_model_sequential, - inner_model_subclassed] - model = test_utils.get_model_from_layers( - layers, input_shape=input_shape) - # Run data through the Model to create save spec and weights. - model.predict(np.ones((10, 2, 3)), batch_size=10) - - # Test that the correct checkpointed values are loaded, whether the layer is - # created from the config or SavedModel. - layer_with_config.c.assign(2 * layer_with_config.c) - layer_without_config.c.assign(3 * layer_without_config.c) - - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path) - self._assert_revived_correctness(model, revived) - - -class TestModelRevive(ReviveTestBase): - - def test_revive_subclassed_with_nested_model(self): - model = SubclassedModelNoConfig(1., 2.) - # Run data through the Model to create save spec and weights. - model.predict(np.ones((10, 2, 3)), batch_size=10) - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path) - self._assert_revived_correctness(model, revived) - - def test_revive_subclassed_with_sparse_model(self): - model = SubclassedSparseModelNoConfig(1., 2.) - # Run data through the Model to create save spec and weights. - x = tf.sparse.from_dense(np.ones((10, 2, 3), dtype=np.float32)) - model.predict(x, batch_size=10) - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path) - self._assert_revived_correctness(model, revived) - - def test_revive_unregistered_sequential(self): - model = UnregisteredCustomSequentialModel() - x = np.random.random((2, 2, 3)).astype(np.float32) - model(x) - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path) - self._assert_revived_correctness(model, revived) - - def test_revive_sequential_inputs(self): - model = keras.models.Sequential([ - keras.Input((None,), dtype=tf.string), - keras.layers.Lambda(tf.strings.lower) - ]) - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path) - revived_layers = list( - revived._flatten_layers(include_self=False, recursive=False)) - self.assertEqual(tf.string, revived_layers[0].dtype) - - @parameterized.named_parameters( - ('default_config', CustomNetworkDefaultConfig), - ('with_config', CustomNetworkWithConfig), - ('with_config_name', CustomNetworkWithConfigName)) - def test_revive_network(self, model_cls): - model = model_cls(8) - model.save(self.path, include_optimizer=False, save_format='tf') - revived = keras_load.load(self.path, compile=False) - self._assert_revived_correctness(model, revived) - - def test_functional_subclass(self): - model = FunctionalSubclassModel(32) - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path, compile=False) - self._assert_revived_correctness(model, revived) - - def test_functional_subclass_wrong_config(self): - model = FunctionalSubclassModelWrongConfig(32) - model.save(self.path, save_format='tf') - with self.assertRaisesRegex(TypeError, 'Unable to revive model'): - keras_load.load(self.path, compile=False) - - def test_load_compiled_metrics(self): - model = test_utils.get_small_sequential_mlp(1, 3) - - # Compile with dense categorical accuracy - model.compile('rmsprop', 'mse', 'acc') - x = np.random.random((5, 10)).astype(np.float32) - y_true = np.random.random((5, 3)).astype(np.float32) - model.train_on_batch(x, y_true) - - model.save(self.path, include_optimizer=True, save_format='tf') - revived = keras_load.load(self.path, compile=True) - self.assertAllClose(model.test_on_batch(x, y_true), - revived.test_on_batch(x, y_true)) - - # Compile with sparse categorical accuracy - model.compile('rmsprop', 'mse', 'acc') - y_true = np.random.randint(0, 3, (5, 1)).astype(np.float32) - model.train_on_batch(x, y_true) - model.save(self.path, include_optimizer=True, save_format='tf') - revived = keras_load.load(self.path, compile=True) - self.assertAllClose(model.test_on_batch(x, y_true), - revived.test_on_batch(x, y_true)) - - def test_revived_model_has_save_spec(self): - model = SubclassedModelWithConfig(2, 3) - model.predict(np.random.random((5, 10)).astype(np.float32)) - model.save(self.path, save_format='tf') - revived = keras_load.load(self.path, compile=True) - self.assertAllEqual( - model._get_save_spec(dynamic_batch=False), - revived._get_save_spec(dynamic_batch=False)) - - def test_load_model_with_name_conflict_raises_error(self): - - class LinearModel(SubclassedModelWithConfig): - pass - - model = LinearModel(2, 3) - model(np.random.random((5, 10)).astype(np.float32)) - model.save(self.path, save_format='tf') - with self.assertRaisesRegex( - RuntimeError, 'Unable to restore object of class \'LinearModel\''): - keras_load.load(self.path, compile=True) - - def test_load_model_with_name_conflict_registered_works(self): - model = WideDeepModel(2, 3) - model(np.random.random((5, 10)).astype(np.float32)) - model.save(self.path, save_format='tf') - keras_load.load(self.path, compile=True) - - -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - with generic_utils.CustomObjectScope({ - 'CustomLayerWithConfig': CustomLayerWithConfig, - 'CustomNetworkWithConfig': CustomNetworkWithConfig, - 'CustomNetworkWithConfigName': CustomNetworkWithConfigName, - 'SubclassedModelWithConfig': SubclassedModelWithConfig, - 'FunctionalSubclassModel': FunctionalSubclassModel, - 'FunctionalSubclassModelWrongConfig': FunctionalSubclassModelWrongConfig, - 'WideDeepModel': WideDeepModel - }): - tf.test.main() diff --git a/keras/saving/saved_model/save.py b/keras/saving/saved_model/save.py deleted file mode 100644 index 5c916c31da62..000000000000 --- a/keras/saving/saved_model/save.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras SavedModel serialization.""" - -import os -from absl import logging - -from keras import backend -from keras.layers import serialization -from keras.protobuf import saved_metadata_pb2 -from keras.protobuf import versions_pb2 -from keras.saving import saving_utils -from keras.saving.saved_model import constants -from keras.saving.saved_model import save_impl -from keras.saving.saved_model import utils -from keras.utils.generic_utils import LazyLoader -from keras.utils.io_utils import ask_to_proceed_with_overwrite -import tensorflow.compat.v2 as tf - -from tensorflow.python.saved_model import save as save_lib - - -# To avoid circular dependencies between keras/engine and keras/saving, -# code in keras/saving must delay imports. - -base_layer = LazyLoader( - "base_layer", globals(), - "keras.engine.base_layer") -training_lib = LazyLoader( - "training_lib", globals(), - "keras.engine.training") - - -def save(model, filepath, overwrite, include_optimizer, signatures=None, - options=None, save_traces=True): - """Saves a model as a SavedModel to the filepath. - - Args: - model: Keras model instance to be saved. - filepath: String path to save the model. - overwrite: whether to overwrite the existing filepath. - include_optimizer: If True, save the model's optimizer state. - signatures: Signatures to save with the SavedModel. Applicable to the 'tf' - format only. Please see the `signatures` argument in `tf.saved_model.save` - for details. - options: (only applies to SavedModel format) `tf.saved_model.SaveOptions` - object that specifies options for saving to SavedModel. - save_traces: (only applies to SavedModel format) When enabled, the - SavedModel will store the function traces for each layer. This - can be disabled, so that only the configs of each layer are stored. - Defaults to `True`. Disabling this will decrease serialization time - and reduce file size, but it requires that all custom layers/models - implement a `get_config()` method. - - Raises: - ValueError: if the model's inputs have not been defined. - """ - # If file exists and should not be overwritten. - if not overwrite and os.path.exists(filepath): - proceed = ask_to_proceed_with_overwrite(filepath) - if not proceed: - return - - if save_traces: - if save_impl.should_skip_serialization(model): - saving_utils.raise_model_input_error(model) - - if not include_optimizer: - orig_optimizer = model.optimizer - model.optimizer = None - # TODO(b/180760306) Change to del model.optimizer if Layer's __delattr__ - # calls AutoTrackable's __delattr__. - model._delete_tracking("optimizer") # pylint: disable=protected-access - - # Trace all functions and signatures with `training=0` instead of using an - # already-set learning phase placeholder. - # This is needed for compatibility reasons until learning phase setting - # is removed from the public apis. - with backend.deprecated_internal_learning_phase_scope(0): - with utils.keras_option_scope(save_traces): - saved_nodes, node_paths = save_lib.save_and_return_nodes( - model, filepath, signatures, options) - - # Save all metadata to a separate file in the SavedModel directory. - metadata = generate_keras_metadata(saved_nodes, node_paths) - - with tf.io.gfile.GFile( - tf.io.gfile.join(filepath, constants.SAVED_METADATA_PATH), "wb") as w: - w.write(metadata.SerializeToString(deterministic=True)) - - if not include_optimizer: - model.optimizer = orig_optimizer - - -def generate_keras_metadata(saved_nodes, node_paths): - """Constructs a KerasMetadata proto with the metadata of each keras object.""" - metadata = saved_metadata_pb2.SavedMetadata() - for node_id, node in enumerate(saved_nodes): - if isinstance(node, base_layer.Layer): - path = node_paths[node] - if not path: - node_path = "root" - else: - node_path = "root.{}".format( - ".".join([ref.name for ref in path])) - - metadata.nodes.add( - node_id=node_id, - node_path=node_path, - version=versions_pb2.VersionDef( - producer=2, min_consumer=1, bad_consumers=[]), - identifier=node._object_identifier, # pylint: disable=protected-access - metadata=node._tracking_metadata) # pylint: disable=protected-access - - # Log warning if the node's class name conflicts with a Keras built-in - # object. - class_name = node.__class__.__name__ - builtin_layer = serialization.get_builtin_layer(class_name) - if builtin_layer: - if not isinstance(node, builtin_layer): - logging.warning( - "%s has the same name '%s' as a built-in Keras " - "object. Consider renaming %s to avoid naming " - "conflicts when loading with " - "`tf.keras.models.load_model`. If renaming is not possible, pass " - "the object in the `custom_objects` parameter of the load " - "function.", node, class_name, node.__class__) - - return metadata diff --git a/keras/saving/saved_model/save_impl.py b/keras/saving/saved_model/save_impl.py deleted file mode 100644 index ac980ef4253a..000000000000 --- a/keras/saving/saved_model/save_impl.py +++ /dev/null @@ -1,731 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras SavedModel serialization. - -TODO (kathywu): Move to layer_serialization.py. Some model-specific logic should -go to model_serialization.py. -""" - -import functools -import threading -import weakref - -from keras import backend -from keras.engine import base_layer_utils -from keras.engine import input_spec -from keras.mixed_precision import autocast_variable -from keras.saving import saving_utils -from keras.saving.saved_model import constants -from keras.saving.saved_model import load as keras_load -from keras.saving.saved_model import serialized_attributes -from keras.saving.saved_model import utils -from keras.utils import layer_utils -from keras.utils import tf_contextlib -from keras.utils import tf_utils -from keras.utils import version_utils -from keras.utils.generic_utils import LazyLoader -import tensorflow.compat.v1.logging as logging -import tensorflow.compat.v2 as tf - -# To avoid circular dependencies between keras/engine and keras/saving, -# code in keras/saving must delay imports. - -# TODO(b/134426265): Switch back to single-quotes to match the rest of the file -# once the issue with copybara is fixed. -# pylint:disable=g-inconsistent-quotes -base_layer = LazyLoader('base_layer', globals(), 'keras.engine.base_layer') -metrics = LazyLoader('metrics', globals(), 'keras.metrics') -input_layer = LazyLoader('input_layer', globals(), 'keras.engine.input_layer') -training_lib = LazyLoader('training_lib', globals(), 'keras.engine.training') -sequential_lib = LazyLoader('sequential_lib', globals(), - 'keras.engine.sequential') -# pylint:enable=g-inconsistent-quotes - - -def should_skip_serialization(layer): - """Skip serializing extra objects and functions if layer inputs aren't set.""" - saved_model_input_spec_set = (isinstance(layer, training_lib.Model) and - layer._saved_model_inputs_spec is not None) # pylint: disable=protected-access - if not layer.built and not saved_model_input_spec_set: - logging.warning('Skipping full serialization of Keras layer {}, because ' - 'it is not built.'.format(layer)) - return True - return False - - -def _filter_shards(variables): - return [var for var in variables if not hasattr(var, '_sharded_container')] - - -def wrap_layer_objects(layer, serialization_cache): - """Returns extra trackable objects to attach to the serialized layer. - - Args: - layer: Keras Layer object. - serialization_cache: Dictionary shared between all objects during - serialization. - - Returns: - A dictionary containing all checkpointable objects from a - SerializedAttributes object. See LayerAttributes and ModelAttributes for - entire list of objects - """ - # Wrap all regularization losses as tf.functions. - # First, generate list of all regularization losses in this layer and - # sublayers. - all_losses = layer._callable_losses[:] # pylint: disable=protected-access - for child_layer in utils.list_all_layers(layer): - all_losses.extend(child_layer._callable_losses) # pylint: disable=protected-access - # Next, wrap all loss functions as tf.functions. Use the serialization cache - # to store already-wrapped functions. - keras_loss_cache = serialization_cache.setdefault('keras_losses', {}) - wrapped_loss_functions = [] - for loss_fn in all_losses: - if loss_fn in keras_loss_cache: - wrapped_loss_functions.append(keras_loss_cache[loss_fn]) - else: - wrapped_loss = _wrap_unconditional_loss(loss_fn, len(keras_loss_cache)) - keras_loss_cache[loss_fn] = wrapped_loss - wrapped_loss_functions.append(wrapped_loss) - wrapped_layer_losses = [ - keras_loss_cache[fn] for fn in layer._callable_losses[:] # pylint: disable=protected-access - ] - - layer_metrics = tf.__internal__.tracking.wrap( - {m.name: m for m in layer._metrics}) # pylint: disable=protected-access - - # Avoid duplicate creation of shard Variables on loading. - # `layer.variables` will return the shard Variables rather than the - # ShardedVariables (b/224541446), but Keras loading will create new - # ShardedVariables (and thus shard Variables) from Keras metadata if needed. - # There's no need to also save the shard Variables here, so filter them out. - variables = _filter_shards(layer.variables) - trainable_variables = _filter_shards(layer.trainable_variables) - non_trainable_variables = _filter_shards(layer.non_trainable_variables) - return dict( - variables=tf.__internal__.tracking.wrap(variables), - trainable_variables=tf.__internal__.tracking.wrap(trainable_variables), - non_trainable_variables=tf.__internal__.tracking.wrap( - non_trainable_variables), - layers=tf.__internal__.tracking.wrap(utils.list_all_layers(layer)), - metrics=tf.__internal__.tracking.wrap(layer.metrics), - regularization_losses=tf.__internal__.tracking.wrap( - wrapped_loss_functions), - layer_regularization_losses=tf.__internal__.tracking.wrap( - wrapped_layer_losses), - layer_metrics=layer_metrics) - # pylint: disable=protected-access - - -def wrap_layer_functions(layer, serialization_cache): - """Returns dict of wrapped layer call function and losses in tf.functions. - - Args: - layer: Keras Layer object. - serialization_cache: Dictionary shared between all objects during - serialization. - - Returns: - A dictionary containing all keras tf.functions to serialize. See - LayerAttributes and ModelAttributes for the list of all attributes. - """ - # Since Sequential models may be modified in place using model.add() or - # model.pop(), don't use saved functions. - if (isinstance(layer, keras_load.RevivedLayer) and - not isinstance(layer, sequential_lib.Sequential)): - return { - fn_name: getattr(layer.keras_api, fn_name, None) - for fn_name in serialized_attributes.LayerAttributes.all_functions - } - - # Reset the losses of the layer and its children. The call function in each - # child layer is replaced with tf.functions. - original_fns = _replace_child_layer_functions(layer, serialization_cache) - original_losses = _reset_layer_losses(layer) - - # Wrap all the layer call and activity regularizer functions. - - # Use LayerCallCollection to ensure that all layer call functions (__call__, - # call with losses) are traced with the same inputs. - call_collection = LayerCallCollection(layer) - call_fn_with_losses = call_collection.add_function( - _wrap_call_and_conditional_losses(layer), - '{}_layer_call_and_return_conditional_losses'.format(layer.name), - # If any of this layer's child layers use the training arg, the traced - # call functions of this layer will have a training keyword argument. If - # the original layer does not expect the training arg, then it will have - # to be removed (by setting `match_layer_training_arg`). - match_layer_training_arg=True) - call_fn = call_collection.add_function( - _extract_outputs_from_fn(layer, call_fn_with_losses), - '{}_layer_call_fn'.format(layer.name), - # Since `call_fn` wraps call_fn_with_losses and not the original call - # function, `match_layer_training_arg` should be set to False. - match_layer_training_arg=False) - - fns = { - 'call_and_return_conditional_losses': call_fn_with_losses, - '__call__': call_fn - } - - if layer._activity_regularizer is not None: # pylint: disable=protected-access - fns['activity_regularizer_fn'] = _wrap_activity_regularizer(layer) - fns['call_and_return_all_conditional_losses'] = ( - call_collection.add_function( - _append_activity_regularizer_loss(layer, call_fn_with_losses, - fns['activity_regularizer_fn']), - '{}_layer_call_and_return_all_conditional_losses'.format( - layer.name), - match_layer_training_arg=False)) - else: - fns['activity_regularizer_fn'] = None - fns['call_and_return_all_conditional_losses'] = call_fn_with_losses - - # Manually trigger traces before restoring the overwritten functions. The - # functions are traced within the layer call context to ensure that layer - # functions (e.g. add_loss) behave as though running in graph mode. - with tracing_scope(): - call_collection.trace_with_input_signature() - with base_layer_utils.call_context().enter( - layer, inputs=None, build_graph=True, training=None, saving=True): - for fn in fns.values(): - if fn is not None and not isinstance(fn, LayerCall): - fn.get_concrete_function() - - # Restore overwritten functions and losses - _restore_child_layer_functions(original_fns) - _restore_layer_losses(original_losses) - - return fns - - -def default_save_signature(layer): - original_losses = _reset_layer_losses(layer) - fn = saving_utils.trace_model_call(layer) - _restore_layer_losses(original_losses) - return fn - - -def _replace_child_layer_functions(layer, serialization_cache): - """Replaces functions in the children layers with wrapped tf.functions. - - This step allows functions from parent layers to reference the wrapped - functions from their children layers instead of retracing the ops. - - This function also resets all losses stored in the layer. These are stored in - the returned dictionary. Use `_restore_child_layer_functions` to restore - the original attributes. - - Args: - layer: Keras Layer object. - serialization_cache: Dictionary shared between all objects during - serialization. - - Returns: - Dictionary mapping layer objects -> original functions and losses: - { Child layer 1: { - 'losses': Original losses, - 'call': Original call function - '_activity_regularizer': Original activity regularizer}, - Child layer 2: ... - } - """ - # pylint: disable=protected-access - original_fns = {} - - def replace_layer_functions(child_layer, serialized_fns): - """Replaces layer call and activity regularizer with wrapped functions.""" - original_fns[child_layer] = { - 'call': child_layer.call, - '_activity_regularizer': child_layer._activity_regularizer - } - with utils.no_automatic_dependency_tracking_scope(child_layer): - try: - child_layer._activity_regularizer = serialized_fns.get( - 'activity_regularizer_fn') - except AttributeError: - # Some layers have an unsettable activity regularizer. - pass - child_layer.call = utils.use_wrapped_call( - child_layer, - serialized_fns['call_and_return_conditional_losses'], - child_layer._call_spec, - default_training_value=False) - - def replace_metric_functions(child_layer, serialized_fns): - """Replaces metric functions with wrapped functions.""" - original_fns[child_layer] = { - '__call__': child_layer.__call__, - 'result': child_layer.result, - 'update_state': child_layer.update_state - } - with utils.no_automatic_dependency_tracking_scope(child_layer): - child_layer.__call__ = serialized_fns['__call__'] - child_layer.result = serialized_fns['result'] - child_layer.update_state = serialized_fns['update_state'] - - for child_layer in utils.list_all_layers(layer): - if isinstance(child_layer, input_layer.InputLayer): - continue - - if child_layer not in serialization_cache[constants.KERAS_CACHE_KEY]: - serialized_functions = ( - child_layer._trackable_saved_model_saver._get_serialized_attributes( - serialization_cache).functions) - else: - serialized_functions = ( - serialization_cache[constants.KERAS_CACHE_KEY][child_layer].functions) - if not serialized_functions: - # This indicates either: - # - circular dependency, which means the current layer's functions - # should be wrapped first. - # - Child layer's inputs are not defined, so its functions have not been - # wrapped. In this case, no replacement is necessary so move on to the - # next child. - continue - - if isinstance(child_layer, metrics.Metric): - replace_metric_functions(child_layer, serialized_functions) - else: - replace_layer_functions(child_layer, serialized_functions) - - return original_fns - # pylint: enable=protected-access - - -def _restore_child_layer_functions(original_fns): - """Restores attributes replaced with `_replace_child_layer_functions`.""" - for child_layer, fns in original_fns.items(): - with utils.no_automatic_dependency_tracking_scope(child_layer): - for fn_name, fn in fns.items(): - try: - setattr(child_layer, fn_name, fn) # pylint: disable=protected-access - except AttributeError: - pass # In the case of _activity_regularizer, setting the attribute - # may be disallowed. - - -# pylint: disable=protected-access -def _reset_layer_losses(parent_layer): - """Resets losses of layer and its sublayers, and returns original losses.""" - losses_dict = {} - for layer in utils.list_all_layers_and_sublayers(parent_layer): - losses_dict[layer] = { - 'losses': layer._losses[:], - 'eager_losses': layer._eager_losses[:] - } - with utils.no_automatic_dependency_tracking_scope(layer): - layer._losses = [] - layer._eager_losses = [] - return losses_dict - - -def _restore_layer_losses(losses_dict): - for layer in losses_dict: - with utils.no_automatic_dependency_tracking_scope(layer): - layer._losses = losses_dict[layer]['losses'] - layer._eager_losses = losses_dict[layer]['eager_losses'] - - -# pylint: enable=protected-access - - -class LayerTracingContext(threading.local): - - def __init__(self): - super().__init__() - self.enable_call_tracing = False - self.trace_queue = [] - - -_thread_local_data = LayerTracingContext() - - -@tf_contextlib.contextmanager -def tracing_scope(): - """Enables tracing scope.""" - # This enables the LayerCallCollection's tracing mechanism to trace all call - # functions in the collection. - previous_value = _thread_local_data.enable_call_tracing - previous_queue = _thread_local_data.trace_queue - try: - _thread_local_data.enable_call_tracing = True - _thread_local_data.trace_queue = [] - yield - finally: - # Run traces from the queue. - while _thread_local_data.trace_queue: - fn, args, kwargs, training = _thread_local_data.trace_queue.pop() - if training is not None: - with backend.deprecated_internal_learning_phase_scope(training): - fn.get_concrete_function(*args, **kwargs) - else: - fn.get_concrete_function(*args, **kwargs) - _thread_local_data.trace_queue = previous_queue - _thread_local_data.enable_call_tracing = previous_value - - -def add_trace_to_queue(fn, args, kwargs, training=None): - if tracing_enabled(): - _thread_local_data.trace_queue.append( - (fn, args[:], kwargs.copy(), training)) - - -def tracing_enabled(): - """Whether to add extra traces to the queue.""" - return _thread_local_data.enable_call_tracing - - -class LayerCallCollection: - """Groups wrapped layer call functions. - - This is used to ensure that all layer call functions are traced with the same - inputs- - - call - - call_and_return_conditional_losses - - call_and_return_all_conditional_losses - """ - - def __init__(self, layer): - self.layer = layer - - self.layer_call_method = _get_layer_call_method(layer) - self._expects_training_arg = utils.layer_uses_training_bool(layer) - self._call_spec = layer._call_spec # pylint: disable=protected-access - - # Create new call spec if the layer itself does not accept a training arg, - # but one of its child layers does. When this layer's call functions are - # traced, they will be traced with an added `training` keyword argument. - if not self.layer._expects_training_arg and self._expects_training_arg: # pylint: disable=protected-access - arg_spec = utils.set_training_arg_spec(self._call_spec.full_argspec, - False) - self._call_spec = layer_utils.CallFunctionSpec(arg_spec) - - self._layer_inputs = self._get_layer_inputs(layer) - self._functions = weakref.WeakValueDictionary() - - # Get the input argument name from the args. - if self._call_spec.arg_names: - self._input_arg_name = self._call_spec.arg_names[0] - else: - # Layer could be defined with only varargs, in which case use a default - # name. - self._input_arg_name = 'inputs' - - def _get_layer_inputs(self, layer): - """Inspects layer object and returns the inferred input signature. - - Args: - layer: Layer object. - - Returns: - List of possibly nested TensorSpecs of the layer call function inputs in - the form of `(args, kwargs)` - """ - if (isinstance(layer.call, tf.__internal__.function.Function) and - layer.call.input_signature is not None): - return layer.call.input_signature, {} - elif isinstance(layer, training_lib.Model): - return saving_utils.model_call_inputs(layer) - elif (layer.input_spec is not None and - layer._use_input_spec_as_call_signature): # pylint: disable=protected-access - - def to_tensor_spec_or_none(x): - spec = input_spec.to_tensor_spec(x, layer._compute_dtype) # pylint: disable=protected-access - # If the shape is too general (e.g. multiple dimensions are allowed), - # return None so that separate functions can be generated for each - # inferred input signature. - # TODO(b/134962016): currently partial signatures are not supported. - if spec.shape == tf.TensorShape(None): - return None, None - return spec - - input_signature = [ - tf.nest.map_structure(to_tensor_spec_or_none, layer.input_spec) - ] - - return input_signature, {} - else: - return None, None - - def add_trace(self, *args, **kwargs): - """Traces all functions with the same args and kwargs. - - Args: - *args: Positional args passed to the original function. - **kwargs: Keyword args passed to the original function. - """ - args = list(args) - kwargs = kwargs.copy() - - for fn in self._functions.values(): - # TODO(kathywu): Replace arguments with broader shapes defined in the - # input signature. - if self._expects_training_arg: - - def trace_with_training(value, fn=fn): - nonlocal args, kwargs - args, kwargs = self._call_spec.set_arg_value( # pylint: disable=protected-access - 'training', value, args, kwargs, inputs_in_args=True) - add_trace_to_queue(fn, args, kwargs, value) - - trace_with_training(True) - trace_with_training(False) - else: - add_trace_to_queue(fn, args, kwargs) - - def training_arg_was_passed(self, args, kwargs): - return self._call_spec.arg_was_passed( # pylint: disable=protected-access - 'training', - args, - kwargs, - inputs_in_args=True) - - def get_training_arg_value(self, args, kwargs): - try: - return self._call_spec.get_arg_value( # pylint: disable=protected-access - 'training', - args, - kwargs, - inputs_in_args=True) - except KeyError: # Training is not in args or kwargs. - return None - - def get_input_arg_value(self, args, kwargs): - return self._call_spec.get_arg_value( # pylint: disable=protected-access - self._input_arg_name, - args, - kwargs, - inputs_in_args=True) - - def _maybe_wrap_with_training_arg(self, call_fn, match_layer_training_arg): - """Wraps call function with added training argument if necessary.""" - if not self.layer._expects_training_arg and self._expects_training_arg: # pylint: disable=protected-access - # Add training arg to wrapper function. # pylint: disable=protected-access - def wrap_with_training_arg(*args, **kwargs): - if match_layer_training_arg: - # Remove the training value, since the original call_fn does not - # expect a training arg. Instead, the training value will be - # propagated using the call context created in LayerCall. - args = list(args) - kwargs = kwargs.copy() - args, kwargs = self._call_spec.set_arg_value( # pylint: disable=protected-access - 'training', None, args, kwargs, inputs_in_args=True, - pop_kwarg_if_none=True) - return call_fn(*args, **kwargs) - - return tf.__internal__.decorator.make_decorator( - target=call_fn, - decorator_func=wrap_with_training_arg, - decorator_argspec=self._call_spec.full_argspec) - - return call_fn - - def add_function(self, call_fn, name, match_layer_training_arg): - """Adds a layer call function to the collection. - - Args: - call_fn: a python function - name: Name of call function - match_layer_training_arg: If True, removes the `training` from the - function arguments when calling `call_fn`. - - Returns: - LayerCall (tf.function) - """ - fn = LayerCall( - self, - self._maybe_wrap_with_training_arg(call_fn, match_layer_training_arg), - name) - self._functions[name] = fn.wrapped_call - return fn - - def trace_with_input_signature(self): - """Trace with the layer/models inferred input signature if possible.""" - if self._layer_inputs[0] is None: - return - - args, kwargs = self._layer_inputs - if self._expects_training_arg: - args, kwargs = self._call_spec.set_arg_value('training', False, args, - kwargs, inputs_in_args=True) - if None not in tf.nest.flatten([args, kwargs]): - # Manually add traces for layers that have keyword arguments and have - # a fully defined input signature. - self.add_trace(*args, **kwargs) - - -def _filtered_inputs(inputs): - return list(filter(tf_utils.is_tensor_or_variable, tf.nest.flatten(inputs))) - - -def layer_call_wrapper(call_collection, method, name): - """Ensures layer losses are kept the same, and runs method in call context.""" - - # Create wrapper that deals with losses and call context. - def wrapper(*args, **kwargs): - """Calls method within call context.""" - layer = call_collection.layer - training = None - inputs = _filtered_inputs([args, kwargs]) - # pylint: disable=protected-access - if (args or kwargs) and call_collection.training_arg_was_passed( - args, kwargs): - training = call_collection.get_training_arg_value(args, kwargs) - # pylint: enable=protected-access - original_losses = _reset_layer_losses(layer) - with base_layer_utils.call_context().enter( - layer, inputs=inputs, build_graph=False, training=training, - saving=True): - with autocast_variable.enable_auto_cast_variables( - layer._compute_dtype_object): # pylint: disable=protected-access - ret = method(*args, **kwargs) - _restore_layer_losses(original_losses) - return ret - - # Rename to `name`, since tf.function doesn't have a name argument. Without - # this, all functions returned by this method will be named "call", which - # would be a nightmare to debug. - fn = tf.__internal__.decorator.make_decorator( - target=method, decorator_func=wrapper) - fn.__name__ = name - return fn - - -class LayerCall: - """Function that triggers traces of other functions in the same collection.""" - - def __init__(self, call_collection, call_fn, name): - """Initializes a LayerCall object. - - Args: - call_collection: a LayerCallCollection, which contains the other layer - call functions (e.g. call_with_conditional_losses, call). These - functions should be traced with the same arguments. - call_fn: A call function. - name: Name of the call function. - """ - self.call_collection = call_collection - self.wrapped_call = tf.function( - layer_call_wrapper(call_collection, call_fn, name)) - - def _maybe_trace(self, args, kwargs): - # Trigger traces of other call functions + extra training-arg traces. - if tracing_enabled(): - self.call_collection.add_trace(*args, **kwargs) - - def __call__(self, *args, **kwargs): - self._maybe_trace(args, kwargs) - return self.wrapped_call(*args, **kwargs) - - def get_concrete_function(self, *args, **kwargs): - self._maybe_trace(args, kwargs) - return self.wrapped_call.get_concrete_function(*args, **kwargs) - - -def _wrap_call_and_conditional_losses(layer): - """Wraps call function that returns a tuple of (outputs, losses). - - The losses returned are conditional on the inputs passed to the call function. - Unconditional losses (e.g. weight regularizeration) are wrapped separately. - - Args: - layer: a Keras layer object - - Returns: - python call function that returns outputs and conditional losses -- excludes - activity regularizer - """ - # Create function that generates both outputs and losses - layer_call = _get_layer_call_method(layer) - - def call_and_return_conditional_losses(*args, **kwargs): - """Returns layer (call_output, conditional losses) tuple.""" - call_output = layer_call(*args, **kwargs) - if version_utils.is_v1_layer_or_model(layer): - conditional_losses = layer.get_losses_for( - _filtered_inputs([args, kwargs])) - else: - conditional_losses = [ - l for l in layer.losses if not hasattr(l, '_unconditional_loss') - ] - return call_output, conditional_losses - - return _create_call_fn_decorator(layer, call_and_return_conditional_losses) - - -def _extract_outputs_from_fn(layer, call_and_return_conditional_losses): - """Returns a function that returns only call function outputs.""" - if isinstance(layer, keras_load.RevivedLayer): - return layer.keras_api.__call__ # pylint: disable=protected-access - - def call(inputs, *args, **kwargs): - return call_and_return_conditional_losses(inputs, *args, **kwargs)[0] - - return _create_call_fn_decorator(layer, call) - - -def _append_activity_regularizer_loss(layer, call_fn_with_losses, - activity_regularizer_fn): - """Appends activity regularizer loss to losses returned by the wrapped fn.""" - - def fn(inputs, *args, **kwargs): - outputs, losses = call_fn_with_losses(inputs, *args, **kwargs) - losses.append(activity_regularizer_fn(outputs)) - return outputs, losses - - return _create_call_fn_decorator(layer, fn) - - -def _create_call_fn_decorator(layer, wrapped_call): - call_fn = _get_layer_call_method(layer) - fn, arg_spec = utils.maybe_add_training_arg( - layer._call_spec, # pylint: disable=protected-access - wrapped_call, - layer._expects_training_arg, # pylint: disable=protected-access - default_training_value=False) - return tf.__internal__.decorator.make_decorator( - target=call_fn, decorator_func=fn, decorator_argspec=arg_spec) - - -def _wrap_unconditional_loss(loss_fn, index): - """Wraps callable/unconditional loss, returning a serializable function.""" - # Extract original loss function from partial function - fn = loss_fn.args[0] if isinstance(loss_fn, functools.partial) else loss_fn - if isinstance(fn, tf.__internal__.function.Function): - return fn - else: - return tf.__internal__.function.Function( - fn, 'loss_fn_{}'.format(index), input_signature=[]) - - -def _wrap_activity_regularizer(layer): - """Wraps the activity regularizer.""" - # pylint: disable=protected-access - if isinstance(layer._activity_regularizer, tf.__internal__.function.Function): - return layer._activity_regularizer - return tf.__internal__.function.Function( - layer._activity_regularizer, - '{}_activity_regularizer'.format(layer.name), - input_signature=[ - tf.TensorSpec(None, layer._compute_dtype or backend.floatx()) - ]) - # pylint: enable=protected-access - - -def _get_layer_call_method(layer): - if isinstance(layer.call, (tf.__internal__.function.Function)): - return layer.call.python_function - return layer.call diff --git a/keras/saving/saved_model/saved_model_test.py b/keras/saving/saved_model/saved_model_test.py deleted file mode 100644 index 972126691d85..000000000000 --- a/keras/saving/saved_model/saved_model_test.py +++ /dev/null @@ -1,1418 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=protected-access -"""Tests for saving and loading Keras models and layers from SavedModel. - -These should ensure that all layer properties are correctly assigned after -loading from the SavedModel. - -Tests that focus on the model structure should go in revive_test.py -""" - -import os -import shutil -import sys - -from absl.testing import parameterized -import keras -from keras import regularizers -from keras.feature_column.dense_features import DenseFeatures -from keras.protobuf import saved_metadata_pb2 -from keras.protobuf import versions_pb2 -from keras.saving.saved_model import json_utils -from keras.saving.saved_model import load as keras_load -from keras.saving.saved_model import save_impl as keras_save -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.utils import control_flow_util -from keras.utils import generic_utils -from keras.utils import tf_contextlib -from keras.utils import tf_inspect -import numpy as np -import tensorflow.compat.v2 as tf - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 - - -class LayerWithLearningPhase(keras.engine.base_layer.Layer): - - def build(self, input_shape): - self.input_spec = keras.layers.InputSpec(shape=[None] * len(input_shape)) - self.built = True - - def call(self, x, training=None): - if training is None: - training = keras.backend.learning_phase() - output = control_flow_util.smart_cond(training, lambda: x * 0, - lambda: tf.identity(x)) - if not tf.executing_eagerly(): - output._uses_learning_phase = True # pylint: disable=protected-access - return output - - def compute_output_shape(self, input_shape): - return input_shape - - @property - def _use_input_spec_as_call_signature(self): - return True - - -class LayerWithLoss(keras.layers.Layer): - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - return inputs * 2 - - -class LayerWithUpdate(keras.layers.Layer): - - def build(self, _): - self.v = self.add_weight( - 'v', - shape=[], - initializer=keras.initializers.zeros, - trainable=False, - dtype=tf.float32) - - def call(self, inputs, training=True): - if training: - self.add_update(self.v.assign_add(1.)) - return inputs * 2. - - -@generic_utils.register_keras_serializable('Testing') -class GlobalLayerThatShouldFailIfNotAdded(keras.layers.Layer): - _must_restore_from_config = True - - -@test_combinations.run_all_keras_modes -class TestSavedModelFormatAllModes(test_combinations.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - def _get_model(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.layers[-1].activity_regularizer = regularizers.get('l2') - model.activity_regularizer = regularizers.get('l2') - model.compile( - loss='mse', - optimizer='rmsprop') - def callable_loss(): - return tf.reduce_sum(model.weights[0]) - model.add_loss(callable_loss) - return model - - def _train_model(self, model, use_dataset=False): - x = np.random.random((1, 3)) - y = np.random.random((1, 4)) - - if not tf.__internal__.tf2.enabled(): - # The layer autocast behavior only runs when autocast is enabled, so - # in V1, the numpy inputs still need to be cast to float32. - x = x.astype(np.float32) - y = y.astype(np.float32) - - if use_dataset: - dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(1) - model.fit(dataset) - else: - model.train_on_batch(x, y) - - def _save_and_load(self, model): - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - return loaded - - def _test_evaluation(self, model, loaded): - # Assert that original and loaded models have the same results when called. - self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) - self.assertAllClose(self.evaluate(model.weights), - self.evaluate(loaded.weights)) - - input_arr = tf.constant( - np.random.random((1, 3)).astype(np.float32)) - self.assertAllClose(self.evaluate(model(input_arr)), - self.evaluate(loaded(input_arr))) - # Validate losses. The order of conditional losses may change between the - # model and loaded model, so sort the losses first. - if tf.executing_eagerly(): - self.assertAllClose(sorted(self.evaluate(model.losses)), - sorted(self.evaluate(loaded.losses))) - - @test_combinations.run_with_all_model_types - def test_model_save_and_load(self): - model = self._get_model() - self._train_model(model, use_dataset=False) - loaded = self._save_and_load(model) - self._test_evaluation(model, loaded) - - @test_combinations.run_with_all_model_types - def test_model_save_and_load_dataset(self): - model = self._get_model() - self._train_model(model, use_dataset=True) - loaded = self._save_and_load(model) - self._test_evaluation(model, loaded) - - def test_trainable_weights(self): - """Tests that trainable status of individual weights is preserved.""" - layer = keras.layers.Dense(4, name='custom_layer') - layer.build([None, 3]) - layer.add_weight( - 'extra_weight', shape=[], - initializer=tf.compat.v1.constant_initializer(11), - trainable=True) - layer.add_weight( - 'extra_weight_2', shape=[], - initializer=tf.compat.v1.constant_initializer(12), - trainable=False) - model = keras.Sequential([keras.Input([3,]), layer]) - - saved_model_dir = self._save_model_dir() - self.evaluate(tf.compat.v1.variables_initializer(layer.variables)) - model.save(saved_model_dir, save_format='tf') - loaded_model = keras_load.load(saved_model_dir) - self.evaluate(tf.compat.v1.variables_initializer(loaded_model.variables)) - - loaded = loaded_model.layers[-1] - - equal_attrs = ['name', '_expects_training_arg', 'trainable'] - for attr in equal_attrs: - self.assertEqual(getattr(layer, attr), getattr(loaded, attr)) - - all_close = ['weights', 'trainable_weights', 'non_trainable_weights'] - for attr in all_close: - self.assertAllClose(self.evaluate(getattr(layer, attr)), - self.evaluate(getattr(loaded, attr))) - - @test_combinations.run_with_all_model_types - def test_trainable_layers(self): - """Tests that trainable status of individual layers is preserved.""" - model = model = self._get_model() - # Set the last layer to *not* be trainable. - model.layers[-1].trainable = False - self._train_model(model, use_dataset=True) - loaded = self._save_and_load(model) - - self._test_evaluation(model, loaded) - self.assertFalse(model.layers[-1].trainable) - self.assertFalse(loaded.layers[-1].trainable) - - def test_trainable_custom_model_false(self): - """Tests that overall False trainable status of Model is preserved.""" - # Set all layers to *not* be trainable. - model = test_utils.SmallSubclassMLP(1, 4, trainable=False) - model.compile(loss='mse', optimizer='rmsprop') - self._train_model(model, use_dataset=False) - loaded = self._save_and_load(model) - - self._test_evaluation(model, loaded) - self.assertEmpty(model.trainable_variables) - self.assertEmpty(loaded.trainable_variables) - - def test_maintains_losses(self): - """Tests that the layer losses do not change before and after export.""" - model = keras.models.Sequential([LayerWithLoss()]) - model.compile( - loss='mse', - optimizer='rmsprop') - input_arr = np.random.random((1, 3)) - target_arr = np.random.random((1, 3)) - - # Test that symbolic losses are maintained (train_on_batch saves symbolic - # losses.) - model.train_on_batch(input_arr, target_arr) - previous_losses = model.losses[:] - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - with previous_losses[0].graph.as_default(): - # If we try to compare symbolic Tensors in eager mode assertAllEqual will - # return False even if they are the same Tensor. - self.assertEqual(previous_losses, model.losses) - - if tf.executing_eagerly(): - # Test that eager losses are maintained. - model(input_arr) # Calls model eagerly, creating eager losses. - previous_losses = model.losses[:] - model.save(saved_model_dir, save_format='tf') - self.assertAllEqual(previous_losses, model.losses) - - def test_layer_with_learning_phase(self): - layer = LayerWithLearningPhase() - layer.build([None, None]) - saved_model_dir = self._save_model_dir() - model = test_utils.get_model_from_layers( - [layer], input_shape=[None], model_type='functional') - model.save(saved_model_dir, save_format='tf') - loaded_model = keras_load.load(saved_model_dir) - loaded = loaded_model.layers[-1] - input_arr = tf.ones((4, 3)) - - # Run the layer, and use the keras backend learning phase - keras.backend.set_learning_phase(0) - self.assertAllEqual(input_arr, loaded(input_arr)) - keras.backend.set_learning_phase(1) - self.assertAllEqual(tf.zeros((4, 3)), loaded(input_arr)) - - # Run the layer while explicitly setting the training argument - self.assertAllEqual( - input_arr, loaded(input_arr, training=tf.constant(False))) - self.assertAllEqual( - tf.zeros((4, 3)), - loaded(input_arr, training=tf.constant(True))) - - @test_combinations.run_with_all_model_types - def test_standard_loader(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.activity_regularizer = regularizers.get('l2') - def eager_loss(): - return tf.reduce_sum(model.weights[0]) - model.add_loss(eager_loss) - - # Call predict to ensure that all layers are built and inputs are set. - model.predict(np.random.random((1, 3)).astype(np.float32)) - saved_model_dir = self._save_model_dir() - - model.save(saved_model_dir, save_format='tf') - - loaded = tf.saved_model.load(saved_model_dir) - self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) - all_close = ['variables', 'trainable_variables', - 'non_trainable_variables'] - for attr in all_close: - self.assertAllClose(self.evaluate(getattr(model, attr)), - self.evaluate(getattr(loaded.keras_api, attr))) - self.assertLen(loaded.regularization_losses, 1) - expected_layers = len(model.layers) - self.assertEqual(expected_layers, len(loaded.keras_api.layers)) - input_arr = tf.ones((4, 3)) - self.assertAllClose(self.evaluate(model(input_arr)), - self.evaluate(loaded(input_arr, training=False))) - - @test_combinations.run_with_all_model_types - def test_compiled_model(self): - # TODO(b/134519980): Issue with model.fit if the model call function uses - # a tf.function (Graph mode only). - if not tf.executing_eagerly(): - return - - input_arr = np.random.random((1, 3)) - target_arr = np.random.random((1, 4)) - - model = test_utils.get_small_mlp(1, 4, input_dim=3) - expected_predict = model.predict(input_arr) - - # Compile and save model. - model.compile('rmsprop', 'mse') - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - loaded = keras_load.load(saved_model_dir) - actual_predict = loaded.predict(input_arr) - self.assertAllClose(expected_predict, actual_predict) - - loss_before = loaded.evaluate(input_arr, target_arr) - loaded.fit(input_arr, target_arr) - loss_after = loaded.evaluate(input_arr, target_arr) - self.assertLess(loss_after, loss_before) - predict = loaded.predict(input_arr) - - ckpt_path = os.path.join(self.get_temp_dir(), 'weights') - loaded.save_weights(ckpt_path) - - # Ensure that the checkpoint is compatible with the original model. - model.load_weights(ckpt_path) - self.assertAllClose(predict, model.predict(input_arr)) - - def test_metadata_input_spec(self): - class LayerWithNestedSpec(keras.layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = { - 'a': keras.layers.InputSpec(max_ndim=3, axes={-1: 2}), - 'b': keras.layers.InputSpec(shape=(None, 2, 3), dtype='int32')} - - @property - def _use_input_spec_as_call_signature(self): - return True - - layer = LayerWithNestedSpec() - saved_model_dir = self._save_model_dir() - model = test_utils.get_model_from_layers( - [layer], model_type='subclass') - model({'a': tf.constant([[2, 4]]), - 'b': tf.ones([1, 2, 3], dtype=tf.int32)}) - model.save(saved_model_dir, save_format='tf') - loaded_model = keras_load.load(saved_model_dir) - loaded = loaded_model.layers[-1] - self.assertEqual(3, loaded.input_spec['a'].max_ndim) - self.assertEqual({-1: 2}, loaded.input_spec['a'].axes) - self.assertAllEqual([None, 2, 3], loaded.input_spec['b'].shape) - self.assertEqual('int32', loaded.input_spec['b'].dtype) - - def test_must_restore_from_config_fails_if_layer_is_not_in_scope(self): - - class LayerThatShouldFailIfNotAdded(keras.layers.Layer): - _must_restore_from_config = True - - layer = LayerThatShouldFailIfNotAdded() - saved_model_dir = self._save_model_dir() - model = test_utils.get_model_from_layers( - [layer], input_shape=[3], model_type='functional') - model.save(saved_model_dir, save_format='tf') - with self.assertRaisesRegex(ValueError, - 'Unknown layer: LayerThatShouldFailIfNotAdded'): - _ = keras_load.load(saved_model_dir) - - def test_must_restore_from_config_custom_object_scope(self): - - class LayerThatShouldFailIfNotAdded(keras.layers.Layer): - _must_restore_from_config = True - - layer = LayerThatShouldFailIfNotAdded() - model = test_utils.get_model_from_layers( - [layer], input_shape=[3], model_type='functional') - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - with generic_utils.CustomObjectScope( - {'LayerThatShouldFailIfNotAdded': LayerThatShouldFailIfNotAdded}): - _ = keras_load.load(saved_model_dir) - - def test_must_restore_from_config_registration(self): - layer = GlobalLayerThatShouldFailIfNotAdded() - saved_model_dir = self._save_model_dir() - model = test_utils.get_model_from_layers( - [layer], input_shape=[3], model_type='functional') - model.save(saved_model_dir, save_format='tf') - _ = keras_load.load(saved_model_dir) - - def test_multi_input_model(self): - input_1 = keras.layers.Input(shape=(3,)) - input_2 = keras.layers.Input(shape=(5,)) - model = keras.Model([input_1, input_2], [input_1, input_2]) - saved_model_dir = self._save_model_dir() - - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - input_arr_1 = np.random.random((1, 3)).astype('float32') - input_arr_2 = np.random.random((1, 5)).astype('float32') - - outputs = loaded([input_arr_1, input_arr_2]) - self.assertAllEqual(input_arr_1, outputs[0]) - self.assertAllEqual(input_arr_2, outputs[1]) - - def test_revived_sequential(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(5, input_shape=(3,), - kernel_regularizer=regularizers.get('l2'))) - model.add(keras.layers.Dense(2, kernel_regularizer=regularizers.get('l2'))) - - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - - self.assertLen(loaded.layers, 2) - self.assertLen(loaded.losses, 2) - - loaded.pop() - - self.assertLen(loaded.layers, 1) - self.assertLen(loaded.losses, 1) - - loaded.add(keras.layers.Dense(2, kernel_regularizer=regularizers.get('l2'))) - - self.assertLen(loaded.layers, 2) - self.assertLen(loaded.losses, 2) - - def testBatchNormUpdates(self): - model = keras.models.Sequential( - keras.layers.BatchNormalization(input_shape=(1,))) - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - saved_model_dir = self._save_model_dir() - - with self.captureWritesToStream(sys.stderr) as captured_logs: - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - - # Assert that saving does not log deprecation warnings - # (even if it needs to set learning phase for compat reasons) - if tf.executing_eagerly(): - self.assertNotIn('deprecated', captured_logs.contents()) - - input_arr = tf.constant([[11], [12], [13]], dtype=tf.float32) - input_arr2 = tf.constant([[14], [15], [16]], dtype=tf.float32) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0]) - - self.evaluate(loaded(input_arr, training=True)) - if not tf.executing_eagerly(): - self.evaluate(loaded.get_updates_for(input_arr)) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12]) - - self.evaluate(loaded(input_arr2, training=False)) - if not tf.executing_eagerly(): - self.evaluate(loaded.get_updates_for(input_arr2)) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12]) - - def testDisablingBatchNormTrainableBeforeSaving(self): - # We disable trainable on the batchnorm layers before saving - model = keras.models.Sequential( - keras.layers.BatchNormalization(input_shape=(1,))) - model.trainable = False - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) - input_arr = tf.constant([[11], [12], [13]], dtype=tf.float32) - input_arr2 = tf.constant([[14], [15], [16]], dtype=tf.float32) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0]) - - # Trainable should still be disabled after loading - self.evaluate(loaded(input_arr, training=True)) - if not tf.executing_eagerly(): - self.evaluate(loaded.get_updates_for(input_arr)) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.0]) - - # Re-enabling trainable on the loaded model should cause the batchnorm - # layer to start training again. - # Note: this only works in v2. - if tf.executing_eagerly(): - loaded.trainable = True - self.evaluate(loaded(input_arr, training=True)) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12]) - - self.evaluate(loaded(input_arr2, training=False)) - self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12]) - - def testSaveWithSignatures(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(5, input_shape=(3,), - kernel_regularizer=regularizers.get('l2'))) - model.add(keras.layers.Dropout(0.5)) - model.add(keras.layers.Dense(4, kernel_regularizer=regularizers.get('l2'))) - - input_arr = np.random.random((2, 3)) - target_arr = np.random.random((2, 4)) - - model.compile( - loss='mse', - optimizer='rmsprop') - model.train_on_batch(input_arr, target_arr) - - @tf.function(input_signature=[tf.TensorSpec((None, 3))]) - def predict(inputs): - return {'predictions': model(inputs)} - - feature_configs = { - 'inputs': tf.io.FixedLenFeature( - shape=[2, 3], dtype=tf.float32)} - - @tf.function( - input_signature=[tf.TensorSpec([None], tf.string)]) - def parse_and_predict(examples): - features = tf.compat.v1.parse_single_example(examples[0], feature_configs) - return {'predictions': model(features['inputs']), - 'layer_1_outputs': model.layers[0](features['inputs'])} - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf', signatures={ - 'predict': predict, - 'parse_and_predict': parse_and_predict}) - model.save('/tmp/saved', save_format='tf', signatures={ - 'predict': predict, - 'parse_and_predict': parse_and_predict}) - - loaded = keras_load.load(saved_model_dir) - - self.assertAllClose( - model.predict(input_arr), - loaded.signatures['predict'](tf.convert_to_tensor( - input_arr.astype('float32')))['predictions']) - - feature = { - 'inputs': feature_pb2.Feature( - float_list=feature_pb2.FloatList( - value=input_arr.astype('float32').flatten()))} - example = example_pb2.Example( - features=feature_pb2.Features(feature=feature)) - outputs = loaded.signatures['parse_and_predict']( - tf.convert_to_tensor([example.SerializeToString()])) - self.assertAllClose(model.predict(input_arr), outputs['predictions']) - self.assertAllClose(model.layers[0](input_arr), outputs['layer_1_outputs']) - - def testTrainingDefaults(self): - def assert_training_default(fn, default_value): - arg_spec = tf_inspect.getfullargspec(fn) - fn_defaults = arg_spec.defaults or [] - defaults = dict() - # The call arg defaults are an n-tuple of the last n elements of the args - # list. (n = # of elements that have a default argument) - for i in range(-1 * len(fn_defaults), 0): - defaults[arg_spec.args[i]] = fn_defaults[i] - # The default training arg will be any (non-None) default specified in the - # method signature, or None if no value is specified. - defaults.update(arg_spec.kwonlydefaults or {}) - self.assertEqual(defaults['training'], default_value) - - class LayerWithTrainingRequiredArg(keras.engine.base_layer.Layer): - - def call(self, inputs, training): - return control_flow_util.smart_cond(training, lambda: inputs * 0, - lambda: tf.identity(inputs)) - - class LayerWithTrainingDefaultTrue(keras.engine.base_layer.Layer): - - def call(self, inputs, training=True): - return control_flow_util.smart_cond(training, lambda: inputs * 0, - lambda: tf.identity(inputs)) - - class Model(keras.models.Model): - - def __init__(self): - super().__init__() - self.layer_with_training_default_none = LayerWithLearningPhase() - self.layer_with_training_default_true = LayerWithTrainingDefaultTrue() - self.layer_with_required_training_arg = LayerWithTrainingRequiredArg() - - def call(self, inputs): - x = self.layer_with_training_default_none(inputs) - x += self.layer_with_training_default_true(inputs) - x += self.layer_with_required_training_arg(inputs, False) - return x - - model = Model() - # Build and set model inputs - model.predict(np.ones([1, 3]).astype('float32')) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - load = tf.saved_model.load(saved_model_dir) - - # Ensure that the Keras loader is able to load and build the model. - _ = keras_load.load(saved_model_dir) - - assert_training_default(load.__call__, False) - assert_training_default( - load.layer_with_training_default_none.__call__, False) - assert_training_default( - load.layer_with_training_default_true.__call__, True) - - # Assert that there are no defaults for layer with required training arg - arg_spec = tf_inspect.getfullargspec( - load.layer_with_required_training_arg.__call__) - self.assertFalse(arg_spec.defaults) # defaults is None or empty - - def testTraceModelWithKwarg(self): - class Model(keras.models.Model): - - def call(self, inputs, keyword=None): - return tf.identity(inputs) - - model = Model() - prediction = model.predict(np.ones([1, 3]).astype('float32')) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - with keras.utils.generic_utils.custom_object_scope({'Model': Model}): - loaded = keras_load.load(saved_model_dir) - self.assertAllClose(prediction, - loaded.predict(np.ones([1, 3]).astype('float32'))) - - loaded_without_scope = keras_load.load(saved_model_dir) - if tf.__internal__.tf2.enabled(): - with self.assertRaises(NotImplementedError): - loaded_without_scope.predict(np.ones([1, 3]).astype('float32')) - - def testFeatureColumns(self): - # TODO(b/120099662): Error with table initialization with Keras models in - # graph mode. - if tf.executing_eagerly(): - numeric = tf.feature_column.numeric_column('a') - bucketized = tf.feature_column.bucketized_column( - numeric, boundaries=[5, 10, 15]) - cat_vocab = tf.feature_column.categorical_column_with_vocabulary_list( - 'b', ['1', '2', '3']) - one_hot = tf.feature_column.indicator_column(cat_vocab) - embedding = tf.feature_column.embedding_column(cat_vocab, dimension=8) - feature_layer = DenseFeatures([bucketized, one_hot, embedding]) - model = keras.models.Sequential(feature_layer) - - features = {'a': np.array([13, 15]), 'b': np.array(['1', '2'])} - predictions = model.predict(features) - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - loaded_predictions = loaded.predict(features) - self.assertAllClose(predictions, loaded_predictions) - - def testSaveTensorKwarg(self): - - class LayerWithTensorKwarg(keras.layers.Layer): - - def call(self, inputs, tensor=None): - if tensor is not None: - return inputs * tf.cast(tensor, tf.float32) - else: - return inputs - - t = self.evaluate(tf.sequence_mask(1)) - inputs = keras.layers.Input(shape=(3)) - model = keras.models.Model(inputs, LayerWithTensorKwarg()(inputs, t)) - - input_arr = np.random.random((1, 3)) - predictions = model.predict(input_arr) - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - loaded_predictions = loaded.predict(input_arr) - self.assertAllClose(predictions, loaded_predictions) - - def testModelWithTfFunctionCall(self): - class Subclass(keras.models.Model): - - @tf.function - def call(self, inputs, training=False): - return inputs * tf.cast(training, tf.float32) - - model = Subclass() - model.predict(tf.ones((1, 2)), steps=1) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - self.assertAllEqual( - [[1, 5]], - self.evaluate(loaded(tf.constant([[1, 5.]]), training=True))) - self.assertAllEqual( - [[0, 0]], - self.evaluate(loaded(tf.constant([[1, 5.]]), training=False))) - - def testReviveFunctionalModel(self): - - class CustomAdd(keras.layers.Add): - - def build(self, input_shape): - self.w = self.add_weight('w', shape=[]) - super().build(input_shape) - - def call(self, inputs): - outputs = super().call(inputs) - return outputs * self.w - - input1 = keras.layers.Input(shape=(None, 3), name='input_1') - input2 = keras.layers.Input(shape=(None, 3), name='input_2') - - d = keras.layers.Dense(4, name='dense_with_two_inbound_nodes') - output1 = d(input1) - output2 = d(input2) - - # Use a custom layer in this model to ensure that layers aren't being - # recreated directly from the config. - outputs = CustomAdd(name='custom')([output1, output2]) - model = keras.models.Model([input1, input2], outputs, name='save_model') - - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - loaded = keras_load.load(saved_model_dir) - self.assertEqual('save_model', loaded.name) - self.assertLen( - loaded.get_layer('dense_with_two_inbound_nodes')._inbound_nodes, 2) - self.assertEqual('CustomAdd', type(loaded.get_layer('custom')).__name__) - self.assertLen(loaded.get_layer('custom').weights, 1) - - def _testAddUpdate(self, scope): - with scope: - layer_with_update = LayerWithUpdate() - model = test_utils.get_model_from_layers([layer_with_update], - input_shape=(3,)) - - x = np.ones((10, 3)) - if test_utils.get_model_type() == 'subclass': - model.predict(x, batch_size=10) - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - loaded = keras_load.load(saved_model_dir) - loaded_layer = loaded.layers[-1] - self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) - self.assertEqual(self.evaluate(loaded_layer.v), 0.) - - loaded.compile('sgd', 'mse') - loaded.fit(x, x, batch_size=10) - self.assertEqual(self.evaluate(loaded_layer.v), 1.) - - @test_combinations.run_with_all_model_types - def testSaveLayerWithUpdates(self): - @tf_contextlib.contextmanager - def nullcontextmanager(): - yield - self._testAddUpdate(nullcontextmanager()) - - @test_combinations.run_with_all_model_types - def testSaveInStrategyScope(self): - self._testAddUpdate(tf.distribute.MirroredStrategy().scope()) - - def testSaveTimeDistributedLayer(self): - model = keras.Sequential([ - keras.layers.TimeDistributed( - keras.layers.Dense(1, kernel_regularizer=regularizers.get('l2')), - input_shape=(None, 1))]) - predictions = model.predict_on_batch(tf.ones((3, 2, 1))) - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - loaded = keras_load.load(saved_model_dir) - self.assertAllClose(loaded.predict_on_batch(tf.ones((3, 2, 1))), - predictions) - - @parameterized.named_parameters([ - ('with_unrolling', True), - ('no_unrolling', False) - ]) - def testSaveStatefulRNN(self, unroll): - batch = 12 - timesteps = 10 - input_dim = 8 - input_arr = np.ones((batch, timesteps, input_dim)).astype('float32') - - cells = [keras.layers.LSTMCell(32), keras.layers.LSTMCell(64)] - if unroll: - x = keras.Input(batch_shape=(batch, timesteps, input_dim)) - else: - x = keras.Input(batch_shape=(batch, None, input_dim)) - layer = keras.layers.RNN(cells, stateful=True, unroll=unroll) - y = layer(x) - - model = keras.Model(x, y) - model.compile('rmsprop', 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch( - np.zeros((batch, timesteps, input_dim)).astype('float32'), - np.zeros((batch, 64)).astype('float32')) - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - loaded = keras_load.load(saved_model_dir) - loaded_layer = loaded.layers[1] - - if not tf.executing_eagerly(): - keras.backend.get_session() # force variable initialization - - self.assertAllClose(layer.states, loaded_layer.states) - self.assertAllClose(model(input_arr), loaded(input_arr)) - - def testSaveBidirectionalLSTM(self): - # Make sure that the input spec of an unrolled RNN is not used when wrapped - # in a Bidirectional layer. https://github.com/keras-team/keras/issues/15454 - input_layer = keras.Input( - batch_input_shape=(1, 15, 128), name='input', dtype=tf.float32) - lstm_layer = keras.layers.Bidirectional( - keras.layers.LSTM( - units=64, - name='lstm', - dropout=0.2, - trainable=False, - unroll=True, - ) - ) - output_layer = lstm_layer(input_layer) - model = keras.Model(input_layer, output_layer) - saved_model_dir = self._save_model_dir() - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - input_arr = np.random.random((1, 15, 128)).astype('float32') - self.assertAllClose(model(input_arr), loaded(input_arr)) - - @parameterized.named_parameters([('stateful', True), ('stateless', False)]) - def testSaveConvLSTM2D(self, stateful): - data_format = 'channels_first' - batch, timesteps, channels, rows, cols = 12, 10, 8, 4, 4 - input_arr = np.ones( - (batch, timesteps, channels, rows, cols)).astype('float32') - layer = keras.layers.ConvLSTM2D( - filters=16, kernel_size=(1, 1), data_format=data_format, - stateful=stateful) - x = keras.Input(batch_shape=(batch, timesteps, channels, rows, cols)) - y = layer(x) - model = keras.Model(x, y) - - predict_1 = model(input_arr) - self.evaluate([v.initializer for v in model.variables]) - saved_model_dir = self._save_model_dir() - - model.save(saved_model_dir, save_format='tf') - del model - - loaded = keras_load.load(saved_model_dir) - self.evaluate([v.initializer for v in loaded.variables]) - if stateful: - loaded.reset_states() - predict_2 = loaded(input_arr) - self.assertAllClose(predict_1, predict_2) - - def testSaveWithRaggedInputs(self): - - class EmbeddingMerger(keras.layers.Layer): - - def __init__(self, list_features, **kwargs): - super().__init__(**kwargs) - self._supports_ragged_inputs = True - self.embeddings = { - feature: keras.layers.Embedding(10, 3) for feature in list_features} - self.mean = keras.layers.Lambda( - tf.reduce_mean, arguments=dict(axis=1)) - - def call(self, inputs): - tensors = [self.embeddings[col](inputs[col]) for col in inputs] - tensors = [self.mean(inp) for inp in tensors] - return keras.layers.Add()(tensors) - - list_features = ['feature_1', 'feature_2'] - feature_1 = tf.ragged.constant([[0.], [1, 3]]) - feature_2 = tf.ragged.constant([[1., 2], [4]]) - f = {'feature_1': feature_1, - 'feature_2': feature_2} - f_inputs = { - 'feature_1': keras.Input(shape=(None,), name='feature_1', ragged=True), - 'feature_2': keras.Input(shape=(None,), name='feature_2', ragged=True)} - - out = EmbeddingMerger(list_features)(f_inputs) - model = keras.Model(f_inputs, out) - self.evaluate(tf.compat.v1.variables_initializer(model.variables)) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - loaded = keras_load.load(saved_model_dir) - self.evaluate(tf.compat.v1.variables_initializer(loaded.variables)) - self.assertAllClose(model.predict(f), loaded.predict(f)) - - def testSaveMultipleInputs(self): - class CustomLayer(keras.layers.Layer): - - def call(self, *input_list): - self.add_loss(input_list[-2] * 2) - return sum(input_list[:-1]) # The test's last input is a non-tensor arg - - class CustomModel(keras.Model): - - def build(self, _): - self.layer = CustomLayer() - - def call(self, *inputs): - inputs = list(inputs) - inputs.append(object()) # Test that the layer handles non-tensor inputs - return self.layer(*inputs) - - model = CustomModel() - inp = [tf.constant(i, shape=[1, 1], dtype=tf.float32) - for i in range(1, 5)] - expected = model(*inp) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - actual = loaded(*inp) - self.assertAllEqual(self.evaluate(expected), - self.evaluate(actual)) - - def testSaveMultipleInputsWithTraining(self): - - class CustomModel(keras.Model): - def call(self, input_1, training, input_2): - if training: - return input_1 - else: - return input_2 - - inp1 = tf.constant(1., shape=[1]) - inp2 = tf.constant(2., shape=[1]) - - model = CustomModel() - self.assertEqual(self.evaluate(model(inp1, True, inp2)), 1.) - self.assertEqual(self.evaluate(model(inp1, False, inp2)), 2.) - - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - loaded = keras_load.load(saved_model_dir) - self.assertEqual(self.evaluate(loaded(inp1, True, inp2)), 1.) - self.assertEqual(self.evaluate(loaded(inp1, False, inp2)), 2.) - - def test_wrapped_layer_training(self): - class Custom(keras.models.Model): - - def __init__(self): - super().__init__() - self.layer = LayerWithLearningPhase() - - def call(self, inputs): - return self.layer(inputs) - model = Custom() - x = tf.constant(1., shape=[1, 1]) - expected_default = model(x) - expected_training_true = model(x, training=True) - expected_training_false = model(x, training=False) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - def assert_loaded_model(loaded): - actual_default = loaded(x) - actual_training_true = loaded(x, training=True) - actual_training_false = loaded(x, training=False) - self.assertAllClose( - [expected_default, expected_training_true, expected_training_false], - [actual_default, actual_training_true, actual_training_false]) - - assert_loaded_model(keras_load.load(saved_model_dir)) - assert_loaded_model(tf.saved_model.load(saved_model_dir)) - - -class TestSavedModelFormat(tf.test.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - def test_load_with_custom_model_and_layer(self): - - class CustomLayer(keras.layers.Layer): - - def __call__(self, inputs): - return inputs - - class Model(keras.models.Model): - - def __init__(self): - super().__init__() - self.layer = CustomLayer() - - @tf.function( - input_signature=[tf.TensorSpec([None, 1])]) - def call(self, inputs): - return self.layer(inputs) - - model = Model() - inp = tf.constant([[1.0]]) - model(inp) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - # Even if the `CustomLayer` is not provided in `custom_object_scope`, - # `Model` still has that reference. - with keras.utils.generic_utils.custom_object_scope({'Model': Model}): - loaded = keras_load.load(saved_model_dir) - self.assertAllEqual([[1.0]], self.evaluate(loaded(inp))) - self.assertAllEqual([[1.0]], self.evaluate(loaded.layer(inp))) - self.assertIsInstance(loaded.layer, CustomLayer) - - # If `CustomLayer` is provided in `custom_object_scope`, it should of - # course use that custom class. - with keras.utils.generic_utils.custom_object_scope({ - 'Model': Model, - 'CustomLayer': CustomLayer - }): - loaded = keras_load.load(saved_model_dir) - self.assertAllEqual([[1.0]], self.evaluate(loaded(inp))) - self.assertAllEqual([[1.0]], self.evaluate(loaded.layer(inp))) - self.assertIsInstance(loaded.layer, CustomLayer) - - # If the symbol is no longer available, loading should raise an error. - del CustomLayer - with keras.utils.generic_utils.custom_object_scope({'Model': Model}): - with self.assertRaisesRegex( - NameError, 'free variable \'CustomLayer\' referenced ' - 'before assignment in enclosing scope'): - loaded = keras_load.load(saved_model_dir) - - def test_save_without_tracing(self): - - class DoNotTrace(keras.layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = keras.layers.InputSpec(shape=[None]) - self.built = True - - def call(self, inputs): - raise ValueError('I said do not trace') - - def get_config(self): - return {} - - @property - def _use_input_spec_as_call_signature(self): - return True - - root = keras.models.Sequential() - root.add(keras.layers.Input(shape=(3,))) - root.attached_layer = DoNotTrace() - - saved_model_dir = self._save_model_dir() - - # With the default settings, the call function is traced. - with self.assertRaisesRegex(ValueError, 'do not trace'): - root.save(saved_model_dir, save_format='tf') - - # When saving the config only, the layer call function should not be not - # traced. - root.save(saved_model_dir, save_format='tf', save_traces=False) - loaded = tf.saved_model.load(saved_model_dir) - self.assertTrue(hasattr(loaded, 'attached_layer')) - - # This should raise an error when loaded without the custom object - loaded = keras_load.load(saved_model_dir) - with self.assertRaisesRegex(ValueError, 'Cannot call custom layer'): - loaded.attached_layer(tf.constant([1.])) - - # Try loading with the custom objects - with generic_utils.CustomObjectScope({'DoNotTrace': DoNotTrace}): - loaded = keras_load.load(saved_model_dir) - with self.assertRaisesRegex(ValueError, 'I said do not trace'): - loaded.attached_layer(tf.constant([1.])) - - def test_load_non_keras_saved_model(self): - model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) - saved_model_dir = self._save_model_dir() - tf.saved_model.save(model, saved_model_dir) - with self.assertRaisesRegex(ValueError, 'Unable to create a Keras model'): - keras_load.load(saved_model_dir) - - -class TestLayerCallTracing(tf.test.TestCase, parameterized.TestCase): - - def test_functions_have_same_trace(self): - - class Layer(keras.engine.base_layer.Layer): - - def call(self, inputs): - return inputs - - def call2(self, inputs): - return inputs * 2 - - layer = Layer() - - call_collection = keras_save.LayerCallCollection(layer) - fn = call_collection.add_function(layer.call, 'call', True) - fn2 = call_collection.add_function(layer.call2, 'call2', True) - - with keras_save.tracing_scope(): - fn(np.ones((2, 3))) - fn(np.ones((4, 5))) - - self.assertLen( - fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2) - self.assertLen( - fn2.wrapped_call._list_all_concrete_functions_for_serialization(), 2) - - # Check that the shapes are correct - self.assertEqual( - {(2, 3), (4, 5)}, - set(tuple(c.structured_input_signature[0][0].shape.as_list()) for c in - fn2.wrapped_call._list_all_concrete_functions_for_serialization())) - - def test_training_arg_replacement(self): - - def assert_num_traces(layer_cls, training_keyword): - layer = layer_cls() - call_collection = keras_save.LayerCallCollection(layer) - fn = call_collection.add_function(layer.call, 'call', True) - - with keras_save.tracing_scope(): - fn(np.ones((2, 3)), training=True) - self.assertLen( - fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2) - with keras_save.tracing_scope(): - fn(np.ones((2, 4)), training=False) - self.assertLen( - fn.wrapped_call._list_all_concrete_functions_for_serialization(), 4) - - if training_keyword: - with keras_save.tracing_scope(): - fn(np.ones((2, 5)), True) - self.assertLen( - fn.wrapped_call._list_all_concrete_functions_for_serialization(), 6) - with keras_save.tracing_scope(): - fn(np.ones((2, 6))) - self.assertLen( - fn.wrapped_call._list_all_concrete_functions_for_serialization(), 8) - - class LayerWithTrainingKeyword(keras.engine.base_layer.Layer): - - def call(self, inputs, training=False): - return inputs * training - - assert_num_traces(LayerWithTrainingKeyword, training_keyword=True) - - class LayerWithKwargs(keras.engine.base_layer.Layer): - - def call(self, inputs, **kwargs): - return inputs * kwargs['training'] - - assert_num_traces(LayerWithKwargs, training_keyword=False) - - class LayerWithChildLayer(keras.engine.base_layer.Layer): - - def __init__(self): - self.child = LayerWithKwargs() - super().__init__() - - def call(self, inputs): - return self.child(inputs) - - assert_num_traces(LayerWithChildLayer, training_keyword=False) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_maintains_losses(self): - layer = LayerWithLoss() - layer(np.ones((2, 3))) - previous_losses = layer.losses[:] - - call_collection = keras_save.LayerCallCollection(layer) - fn = call_collection.add_function(layer.call, 'call', True) - fn(np.ones((2, 3))) - - self.assertAllEqual(self.evaluate(previous_losses), - self.evaluate(layer.losses)) - - -@generic_utils.register_keras_serializable('Testing') -class CustomMeanMetric(keras.metrics.Mean): - - def update_state(self, *args): # pylint: disable=useless-super-delegation - # Sometimes built-in metrics return an op in update_state. Custom - # metrics don't support returning ops, so wrap the update_state method - # while returning nothing. - super().update_state(*args) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) -class MetricTest(tf.test.TestCase, parameterized.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - def generate_inputs(self, num_tensor_args, shape=(1, 5)): - return [ - np.random.uniform(0, 1, shape).astype('float32') - for _ in range(num_tensor_args) - ] - - def _test_metric_save_and_load(self, - metric, - save_dir, - num_tensor_args, - shape=(1, 5), - test_sample_weight=True): - with self.cached_session(): - model = test_utils.get_model_from_layers( - [keras.layers.Layer()], input_shape=[3], model_type='functional') - model.saved_metric = metric - model.save(save_dir, save_format='tf') - loaded_model = keras_load.load(save_dir) - loaded = loaded_model.saved_metric - self.evaluate([v.initializer for v in loaded.variables]) - self.assertEqual(metric.name, loaded.name) - self.assertEqual(metric.dtype, loaded.dtype) - - inputs = self.generate_inputs(num_tensor_args, shape) - actual = self.evaluate(metric(*inputs)) - self.assertAllClose(actual, loaded(*inputs)) - self.assertAllClose(metric.variables, loaded.variables) - - # Test with separate calls to update state and result. - inputs = self.generate_inputs(num_tensor_args, shape) - self.evaluate(metric.update_state(*inputs)) - self.evaluate(loaded.update_state(*inputs)) - actual = self.evaluate(metric.result()) - self.assertAllClose(actual, loaded.result()) - - if test_sample_weight: - # Test with sample weights input. - inputs = self.generate_inputs(num_tensor_args, shape) - sample_weight = self.generate_inputs(1, [])[0] - inputs.append(sample_weight) - - actual = self.evaluate(metric(*inputs)) - self.assertAllClose(actual, loaded(*inputs)) - return loaded - - @parameterized.named_parameters([ - ('mean', keras.metrics.Mean, 1, (1, 5)), - ('false_positives', keras.metrics.FalsePositives, 2, (1, 5)), - ('precision_at_top_k', keras.metrics.Precision, 2, (2, 3, 4), { - 'top_k': 2, - 'class_id': 1 - }), - ('precision_at_recall', keras.metrics.PrecisionAtRecall, 2, (1, 5), { - 'recall': .8 - }), ('auc', keras.metrics.AUC, 2, (1, 5), { - 'multi_label': True - }), ('cosine_similarity', keras.metrics.CosineSimilarity, 2, (2, 3, 1)) - ]) - def test_metric(self, metric_cls, num_tensor_args, shape, init_kwargs=None): - init_kwargs = init_kwargs or {} - metric = metric_cls(**init_kwargs) - metric(*self.generate_inputs(num_tensor_args, shape)) - self.evaluate([v.initializer for v in metric.variables]) - loaded = self._test_metric_save_and_load(metric, self._save_model_dir(), - num_tensor_args, shape) - self.assertEqual(type(loaded), type(metric)) - - @parameterized.named_parameters([ - ('mean', keras.metrics.Mean, 1, False), - ('auc', keras.metrics.AUC, 2, False), - ('mean_tensor', keras.metrics.MeanTensor, 1, True)]) - def test_custom_metric(self, base_cls, num_tensor_args, requires_build): - - class CustomMetric(base_cls): - - def update_state(self, *args): # pylint: disable=useless-super-delegation - # Sometimes built-in metrics return an op in update_state. Custom - # metrics don't support returning ops, so wrap the update_state method - # while returning nothing. - super().update_state(*args) - - with self.cached_session(): - metric = CustomMetric() - save_dir = self._save_model_dir('first_save') - - if requires_build: - metric(*self.generate_inputs(num_tensor_args)) # pylint: disable=not-callable - - self.evaluate([v.initializer for v in metric.variables]) - - with self.assertRaisesRegex(ValueError, - 'Unable to restore custom object'): - self._test_metric_save_and_load(metric, save_dir, num_tensor_args) - with generic_utils.CustomObjectScope({'CustomMetric': CustomMetric}): - loaded = self._test_metric_save_and_load( - metric, - save_dir, - num_tensor_args, - test_sample_weight=False) - - self._test_metric_save_and_load( - loaded, - self._save_model_dir('second_save'), - num_tensor_args, - test_sample_weight=False) - - def test_registered_custom_metric(self): - - with self.cached_session(): - metric = CustomMeanMetric() - save_dir = self._save_model_dir('first_save') - self.evaluate([v.initializer for v in metric.variables]) - loaded = self._test_metric_save_and_load( - metric, - save_dir, - num_tensor_args=1, - test_sample_weight=False) - - self._test_metric_save_and_load( - loaded, - self._save_model_dir('second_save'), - num_tensor_args=1, - test_sample_weight=False) - - def test_custom_metric_wrapped_call(self): - - class NegativeMean(keras.metrics.Mean): - - @tf.function( - input_signature=[tf.TensorSpec(None, tf.float32)]) - def update_state(self, value): - super().update_state(-value) - - metric = NegativeMean() - self.evaluate([v.initializer for v in metric.variables]) - with generic_utils.CustomObjectScope({'NegativeMean': NegativeMean}): - self._test_metric_save_and_load( - metric, self._save_model_dir(), 1, test_sample_weight=False) - - @test_combinations.run_with_all_model_types - def test_custom_metric_model(self): - # TODO(b/134519980): Issue with `model.fit` if the model call function uses - # a `tf.function` in graph mode. - if not tf.executing_eagerly(): - return - - x = np.random.random((1, 3)) - y = np.random.random((1, 4)) - - class CustomMetric(keras.metrics.MeanSquaredError): - pass - - def zero_metric(y_true, y_pred): - del y_true, y_pred - return 0 - - model = test_utils.get_small_mlp(1, 4, input_dim=3) - model.compile(loss='mse', optimizer='SGD', - metrics=[CustomMetric(), zero_metric]) - model.fit(x, y) - saved_model_dir = self._save_model_dir() - model.save(saved_model_dir, save_format='tf') - - with self.assertRaisesRegex(ValueError, 'custom_objects'): - keras_load.load(saved_model_dir) - - with generic_utils.CustomObjectScope( - {'CustomMetric': CustomMetric, 'zero_metric': zero_metric}): - loaded = keras_load.load(saved_model_dir) - - self.evaluate([v.initializer for v in loaded.variables]) - loaded.fit(x, y) - - -class TestUpdateMetadata(tf.test.TestCase): - - def testAddFullSaveSpec(self): - save_spec = tf.TensorSpec([3, 5], dtype=tf.int32) - node_metadata = json_utils.Encoder().encode({'save_spec': save_spec}) - - metadata = saved_metadata_pb2.SavedMetadata() - metadata.nodes.add( - version=versions_pb2.VersionDef( - producer=1, min_consumer=1, bad_consumers=[]), - identifier='_tf_keras_model', - metadata=node_metadata) # pylint: disable=protected-access - - new_metadata = keras_load._update_to_current_version(metadata) - node_metadata = json_utils.decode(new_metadata.nodes[0].metadata) - expected_full_spec = ([tf.TensorSpec(shape=(3, 5), dtype=tf.int32)], {}) - self.assertAllEqual(expected_full_spec, node_metadata.get('full_save_spec')) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/saved_model/serialized_attributes.py b/keras/saving/saved_model/serialized_attributes.py deleted file mode 100644 index 1431a33b4283..000000000000 --- a/keras/saving/saved_model/serialized_attributes.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Helper classes that list&validate all attributes to serialize to SavedModel. -""" - -from keras.saving.saved_model import constants -from keras.saving.saved_model import order_preserving_set as ops -from keras.saving.saved_model import save_impl -from keras.utils.generic_utils import LazyLoader -import tensorflow.compat.v2 as tf - -# TODO(b/134426265): Switch back to single-quotes to match the rest of the file -# once the issue with copybara is fixed. -# pylint:disable=g-inconsistent-quotes -base_layer = LazyLoader( - "base_layer", globals(), - "keras.engine.base_layer") -training_lib = LazyLoader( - "training_lib", globals(), - "keras.engine.training") -metrics = LazyLoader("metrics", globals(), - "keras.metrics") -base_rnn = LazyLoader( - "base_rnn", globals(), - "keras.layers.rnn.base_rnn") -# pylint:enable=g-inconsistent-quotes - - -class SerializedAttributes: - """Class that tracks and validates all serialization attributes. - - Keras models contain many Python-defined components. For example, the - trainable_variable property lists the model's trainable variables by - recursively retrieving the trainable variables from each of the child layers. - Another example is model.call, a python function that calls child layers and - adds ops to the backend graph. - - Only Tensorflow checkpointable objects and functions can be serialized to - SavedModel. Serializing a Keras model as-is results in a checkpointable object - that does not resemble a Keras model at all. Thus, extra checkpointable - objects and functions must be created during serialization. - - **Defining new serialized attributes** - Child classes should be defined using: - SerializedAttributes.with_attributes( - 'name', checkpointable_objects=[...], functions=[...], copy_from=[...]) - This class is used to cache generated checkpointable objects and functions, - ensuring that new objects and functions are generated a single time. - - **Usage during serialization** - Each Layer/Model object should have a corresponding instance of - SerializedAttributes. Create a new instance by calling - `SerializedAttributes.new(obj)`. Objects and functions may be saved using - `.set_and_validate_checkpointable_objects`/`.set_and_and_validate_functions`. - The properties `.checkpointable_objects` and `.functions` returns the cached - values. - - **Adding/changing attributes to save to SavedModel** - 1. Change the call to `SerializedAttributes.with_attributes` in the correct - class: - - CommonEndpoints: Base attributes to be added during serialization. If - these attributes are present in a Trackable object, it can be - deserialized to a Keras Model. - - LayerAttributes: Attributes to serialize for Layer objects. - - ModelAttributes: Attributes to serialize for Model objects. - 2. Update class docstring - 3. Update arguments to any calls to `set_and_validate_*`. For example, if - `call_raw_tensors` is added to the ModelAttributes function list, then - a `call_raw_tensors` function should be passed to - `set_and_validate_functions`. - - **Common endpoints vs other attributes** - Only common endpoints are attached directly to the root object. Keras-specific - attributes are saved to a separate trackable object with the name "keras_api". - The number of objects attached to the root is limited because any naming - conflicts will cause user code to break. - - Another reason is that this will only affect users who call - `tf.saved_model.load` instead of `tf.keras.models.load_model`. These are - advanced users who are likely to have defined their own tf.functions and - trackable objects. The added Keras-specific attributes are kept out of the way - in the "keras_api" namespace. - - Properties defined in this class may be used to filter out keras-specific - attributes: - - `functions_to_serialize`: Returns dict of functions to attach to the root - object. - - `checkpointable_objects_to_serialize`: Returns dict of objects to attach to - the root object (including separate trackable object containing - keras-specific attributes) - - All changes to the serialized attributes must be backwards-compatible, so - attributes should not be removed or modified without sufficient justification. - """ - - @staticmethod - def with_attributes( - name, checkpointable_objects=None, functions=None, copy_from=None): - """Creates a subclass with all attributes as specified in the arguments. - - Args: - name: Name of subclass - checkpointable_objects: List of checkpointable objects to be serialized - in the SavedModel. - functions: List of functions to be serialized in the SavedModel. - copy_from: List of other SerializedAttributes subclasses. The returned - class will copy checkpoint objects/functions from each subclass. - - Returns: - Child class with attributes as defined in the `checkpointable_objects` - and `functions` lists. - """ - checkpointable_objects = checkpointable_objects or [] - functions = functions or [] - - if copy_from is not None: - for cls in copy_from: - checkpointable_objects.extend(cls.all_checkpointable_objects) - functions.extend(cls.all_functions) - - # OrderPreservingSets are used here to guarantee serialization determinism - # of Keras objects. - classdict = { - 'all_checkpointable_objects': - ops.OrderPreservingSet(checkpointable_objects), - 'all_functions': - ops.OrderPreservingSet(functions), - } - return type(name, (SerializedAttributes,), classdict) - - @staticmethod - def new(obj): - """Returns a new SerializedAttribute object.""" - if isinstance(obj, training_lib.Model): - return ModelAttributes() - elif isinstance(obj, metrics.Metric): - return MetricAttributes() - elif isinstance(obj, base_rnn.RNN): - return RNNAttributes() - elif isinstance(obj, base_layer.Layer): - return LayerAttributes() - else: - raise TypeError('Internal error during serialization. Expected Keras ' - f'Layer object. Received: {obj} ' - f'(of type {type(obj)})') - - def __init__(self): - self._object_dict = {} - self._function_dict = {} - self._keras_trackable = tf.__internal__.tracking.AutoTrackable() - - @property - def functions(self): - """Returns dictionary of all functions.""" - return {key: value for key, value in self._function_dict.items() - if value is not None} - - @property - def checkpointable_objects(self): - """Returns dictionary of all checkpointable objects.""" - return {key: value for key, value in self._object_dict.items() - if value is not None} - - @property - def functions_to_serialize(self): - """Returns functions to attach to the root object during serialization.""" - functions = {} - for key, v in self.functions.items(): - if key in CommonEndpoints.all_functions: - functions[key] = (v.wrapped_call if isinstance(v, save_impl.LayerCall) - else v) - return functions - - @property - def objects_to_serialize(self): - """Returns objects to attach to the root object during serialization.""" - objects = {key: value for key, value in self.checkpointable_objects.items() - if key in CommonEndpoints.all_checkpointable_objects} - objects[constants.KERAS_ATTR] = self._keras_trackable - return objects - - def set_and_validate_functions(self, function_dict): - """Saves function dictionary, and validates dictionary values.""" - for key in self.all_functions: - if key in function_dict: - if (function_dict[key] is not None and # Not all functions are required - not isinstance(function_dict[key], - (tf.__internal__.function.Function, - tf.types.experimental.ConcreteFunction, - save_impl.LayerCall))): - raise ValueError( - 'The tf.function dictionary contained a non-function object: ' - f'{function_dict[key]} (for key {key}). Only tf.function ' - 'instances or ConcreteFunction instances should be passed.') - fn = function_dict[key] - self._function_dict[key] = fn - - # Extract TensorFlow `Function` from LayerCall. - tf_fn = fn.wrapped_call if isinstance(fn, save_impl.LayerCall) else fn - setattr(self._keras_trackable, key, tf_fn) - else: - raise ValueError( - f'Function {key} missing from serialized tf.function dictionary.') - return self.functions - - def set_and_validate_objects(self, object_dict): - """Saves objects to a dictionary, and validates the values.""" - for key in self.all_checkpointable_objects: - if key in object_dict: - if not isinstance(object_dict[key], tf.__internal__.tracking.Trackable): - raise ValueError( - 'The object dictionary contained a non-trackable object: ' - f'{object_dict[key]} (for key {key}). Only trackable objects are ' - f'allowed, such as Keras layers/models or tf.Module instances.') - self._object_dict[key] = object_dict[key] - setattr(self._keras_trackable, key, object_dict[key]) - else: - raise ValueError( - f'Object {key} missing from serialized object dictionary.') - return self.checkpointable_objects - - -class CommonEndpoints(SerializedAttributes.with_attributes( - 'CommonEndpoints', - checkpointable_objects=['variables', 'trainable_variables', - 'regularization_losses'], - functions=['__call__', 'call_and_return_all_conditional_losses', - '_default_save_signature'])): - """Common endpoints shared by all models loadable by Keras. - - List of all attributes: - variables: List of all variables in the model and its sublayers. - trainable_variables: List of all trainable variables in the model and its - sublayers. - regularization_losses: List of all unconditional losses (losses not - dependent on the inputs) in the model and its sublayers. - __call__: Function that takes inputs and returns the outputs of the model - call function. - call_and_return_all_conditional_losses: Function that returns a tuple of - (call function outputs, list of all losses that depend on the inputs). - _default_save_signature: Traced model call function. This is only included - if the top level exported object is a Keras model. - """ - - -class LayerAttributes(SerializedAttributes.with_attributes( - 'LayerAttributes', - checkpointable_objects=['non_trainable_variables', 'layers', 'metrics', - 'layer_regularization_losses', 'layer_metrics'], - functions=['call_and_return_conditional_losses', 'activity_regularizer_fn'], - copy_from=[CommonEndpoints] - )): - """Layer checkpointable objects + functions that are saved to the SavedModel. - - List of all attributes: - All attributes from CommonEndpoints - non_trainable_variables: List of non-trainable variables in the layer and - its sublayers. - layers: List of all sublayers. - metrics: List of all metrics in the layer and its sublayers. - call_and_return_conditional_losses: Function that takes inputs and returns a - tuple of (outputs of the call function, list of input-dependent losses). - The list of losses excludes the activity regularizer function, which is - separate to allow the deserialized Layer object to define a different - activity regularizer. - activity_regularizer_fn: Callable that returns the activity regularizer loss - layer_regularization_losses: List of losses owned only by this layer. - layer_metrics: List of metrics owned by this layer. - """ - - -class ModelAttributes(SerializedAttributes.with_attributes( - 'ModelAttributes', - copy_from=[LayerAttributes])): - """Model checkpointable objects + functions that are saved to the SavedModel. - - List of all attributes: - All attributes from LayerAttributes (including CommonEndpoints) - """ - # TODO(kathywu): Add attributes `compile_losses` and `compile_metrics`, which - # list all losses and metrics defined by `model.compile`. - - -class MetricAttributes( - SerializedAttributes.with_attributes( - 'MetricAttributes', - checkpointable_objects=['variables'], - functions=[], - )): - """Attributes that are added to Metric objects when saved to SavedModel. - - List of all attributes: - variables: list of all variables - """ - pass - - -class RNNAttributes(SerializedAttributes.with_attributes( - 'RNNAttributes', - checkpointable_objects=['states'], - copy_from=[LayerAttributes])): - """RNN checkpointable objects + functions that are saved to the SavedModel. - - List of all attributes: - All attributes from LayerAttributes (including CommonEndpoints) - states: List of state variables - """ diff --git a/keras/saving/saved_model/utils.py b/keras/saving/saved_model/utils.py deleted file mode 100644 index 1ea0ac916284..000000000000 --- a/keras/saving/saved_model/utils.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions shared between SavedModel saving/loading implementations.""" - -import copy -import inspect as _inspect -import itertools -import threading -import types - -from keras import backend -from keras.engine import base_layer_utils -from keras.utils import control_flow_util -from keras.utils import layer_utils -from keras.utils import tf_contextlib -from keras.utils.generic_utils import LazyLoader - -import tensorflow.compat.v2 as tf - - -# pylint:disable=g-inconsistent-quotes -training_lib = LazyLoader( - "training_lib", globals(), - "keras.engine.training") -# pylint:enable=g-inconsistent-quotes - - -def use_wrapped_call(layer, call_fn, call_spec, - default_training_value=None, - return_method=False): - """Creates fn that adds the losses returned by call_fn & returns the outputs. - - Args: - layer: A Keras layer object - call_fn: tf.function that takes layer inputs (and possibly a training arg), - and returns a tuple of (outputs, list of losses). - call_spec: The `CallFunctionSpec` for the layer's call function. - default_training_value: Default value of the training kwarg. If `None`, the - default is `tf.keras.backend.learning_phase()`. - return_method: Whether to return a method bound to the layer. - - Returns: - function that calls call_fn and returns the outputs. Losses returned by - call_fn are added to the layer losses. - """ - expects_training_arg = layer_uses_training_bool(layer) - - fn, arg_spec = maybe_add_training_arg( - call_spec, - call_fn, expects_training_arg, default_training_value) - - def return_outputs_and_add_losses(*args, **kwargs): - """Returns the outputs from the layer call function, and adds the losses.""" - if return_method: - args = args[1:] - - outputs, losses = fn(*args, **kwargs) - layer.add_loss(losses) - - # TODO(kathywu): This is a temporary hack. When a network of layers is - # revived from SavedModel, only the top-level layer will have losses. This - # causes issues in eager mode because the child layers may have graph losses - # (thus model.losses returns a mix of Eager and graph tensors). To fix this, - # whenever eager losses are added to one layer, add eager losses to all - # child layers. This causes `.losses` to only return eager losses. - # pylint: disable=protected-access - if tf.executing_eagerly(): - for i in layer._flatten_layers(): - if i is not layer: - i._eager_losses = [base_layer_utils.REVIVED_LOSS_PLACEHOLDER] - # pylint: enable=protected-access - return outputs - - decorated = tf.__internal__.decorator.make_decorator( - target=call_fn, - decorator_func=return_outputs_and_add_losses, - decorator_argspec=arg_spec) - - if return_method: - return types.MethodType(decorated, layer) - else: - return decorated - - -def layer_uses_training_bool(layer): - """Returns whether this layer or any of its children uses the training arg.""" - if layer._expects_training_arg: # pylint: disable=protected-access - return True - visited = {layer} - to_visit = list_all_layers(layer) - while to_visit: - layer = to_visit.pop() - if layer in visited: - continue - if getattr(layer, '_expects_training_arg', True): - return True - visited.add(layer) - to_visit.extend(list_all_layers(layer)) - return False - - -def list_all_layers(obj): - if isinstance(obj, training_lib.Model): - # Handle special case of Sequential, which doesn't return - # the `Input` layer. - return obj.layers - else: - return list(obj._flatten_layers(include_self=False, recursive=False)) # pylint: disable=protected-access - - -def list_all_layers_and_sublayers(obj): - s = set([obj]) - s.update(itertools.chain.from_iterable( - list_all_layers_and_sublayers(layer) for layer in list_all_layers(obj))) - return s - - -def maybe_add_training_arg( - call_spec, wrapped_call, expects_training_arg, - default_training_value): - """Decorate call and optionally adds training argument. - - If a layer expects a training argument, this function ensures that 'training' - is present in the layer args or kwonly args, with the default training value. - - Args: - call_spec: CallFunctionSpec of the layer. - wrapped_call: Wrapped call function. - expects_training_arg: Whether to include 'training' argument. - default_training_value: Default value of the training kwarg to include in - the arg spec. If `None`, the default is - `tf.keras.backend.learning_phase()`. - - Returns: - Tuple of ( - function that calls `wrapped_call` and sets the training arg, - Argspec of returned function or `None` if the argspec is unchanged) - """ - if not expects_training_arg: - return wrapped_call, None - - arg_spec = set_training_arg_spec(call_spec.full_argspec, - default_training_value) - call_spec = layer_utils.CallFunctionSpec(arg_spec) - - def wrap_with_training_arg(*args, **kwargs): - """Wrap the `wrapped_call` function, and set training argument.""" - try: - training = call_spec.get_arg_value('training', args, kwargs, - inputs_in_args=True) - except KeyError: - training = None - - if training is None: - training = (default_training_value or - base_layer_utils.call_context().training or - backend.learning_phase()) - - args = list(args) - kwargs = kwargs.copy() - - def replace_training_and_call(training): - new_args, new_kwargs = call_spec.set_arg_value('training', training, args, kwargs, inputs_in_args=True) - return wrapped_call(*new_args, **new_kwargs) - - return control_flow_util.smart_cond( - training, lambda: replace_training_and_call(True), - lambda: replace_training_and_call(False)) - - return wrap_with_training_arg, arg_spec - - -def set_training_arg_spec(arg_spec, default_training_value): - """Set `training=DEFAULT` argument in an ArgSpec.""" - if 'training' in arg_spec.args: - # If `training` is already in the args list, try to set the default value. - index = arg_spec.args.index('training') - training_default_index = len(arg_spec.args) - index - defaults = list(arg_spec.defaults) if arg_spec.defaults is not None else [] - if (arg_spec.defaults and - len(arg_spec.defaults) >= training_default_index and - defaults[-training_default_index] is None): - defaults[-training_default_index] = default_training_value - return arg_spec._replace(defaults=defaults) - elif 'training' not in arg_spec.kwonlyargs: - kwonlyargs = arg_spec.kwonlyargs + ['training'] - kwonlydefaults = copy.copy(arg_spec.kwonlydefaults) or {} - kwonlydefaults['training'] = default_training_value - return arg_spec._replace(kwonlyargs=kwonlyargs, - kwonlydefaults=kwonlydefaults) - - return arg_spec - - -class SaveOptionsContext(threading.local): - - def __init__(self): - super().__init__() - self.save_traces = True - - -_save_options_context = SaveOptionsContext() - - -@tf_contextlib.contextmanager -def keras_option_scope(save_traces): - previous_value = _save_options_context.save_traces - try: - _save_options_context.save_traces = save_traces - yield - finally: - _save_options_context.save_traces = previous_value - - -def should_save_traces(): - """Whether to trace layer functions-can be disabled in the save_traces arg.""" - return _save_options_context.save_traces - - -@tf_contextlib.contextmanager -def no_automatic_dependency_tracking_scope(obj): - """A context that disables automatic dependency tracking when assigning attrs. - - Objects that inherit from Autotrackable automatically creates dependencies - to trackable objects through attribute assignments, and wraps data structures - (lists or dicts) with trackable classes. This scope may be used to temporarily - disable this behavior. This works similar to the decorator - `no_automatic_dependency_tracking`. - - Example usage: - ``` - model = tf.keras.Model() - model.arr1 = [] # Creates a ListWrapper object - with no_automatic_dependency_tracking_scope(model): - model.arr2 = [] # Creates a regular, untracked python list - ``` - - Args: - obj: A trackable object. - - Yields: - a scope in which the object doesn't track dependencies. - """ - previous_value = getattr(obj, '_setattr_tracking', True) - obj._setattr_tracking = False # pylint: disable=protected-access - try: - yield - finally: - obj._setattr_tracking = previous_value # pylint: disable=protected-access diff --git a/keras/saving/saved_model_experimental.py b/keras/saving/saved_model_experimental.py deleted file mode 100644 index df3d86813baa..000000000000 --- a/keras/saving/saved_model_experimental.py +++ /dev/null @@ -1,465 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Deprecated experimental Keras SavedModel implementation.""" - -import warnings - -from keras import backend -from keras.optimizers import optimizer_v1 -from keras.optimizers.optimizer_v2 import optimizer_v2 -from keras.saving import model_config -from keras.saving import saving_utils -from keras.saving import utils_v1 as model_utils -from keras.utils import mode_keys -from keras.utils.generic_utils import LazyLoader - -import tensorflow.compat.v2 as tf - -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util.tf_export import keras_export - -# To avoid circular dependencies between keras/engine and keras/saving, -# code in keras/saving must delay imports. - -# TODO(b/134426265): Switch back to single-quotes to match the rest of the file -# once the issue with copybara is fixed. -# pylint:disable=g-inconsistent-quotes -metrics_lib = LazyLoader("metrics_lib", globals(), - "keras.metrics") -models_lib = LazyLoader("models_lib", globals(), - "keras.models") -sequential = LazyLoader( - "sequential", globals(), - "keras.engine.sequential") -# pylint:enable=g-inconsistent-quotes - - -# File name for json format of SavedModel. -SAVED_MODEL_FILENAME_JSON = 'saved_model.json' - - -@keras_export(v1=['keras.experimental.export_saved_model']) -def export_saved_model(model, - saved_model_path, - custom_objects=None, - as_text=False, - input_signature=None, - serving_only=False): - """Exports a `tf.keras.Model` as a Tensorflow SavedModel. - - Note that at this time, subclassed models can only be saved using - `serving_only=True`. - - The exported `SavedModel` is a standalone serialization of Tensorflow objects, - and is supported by TF language APIs and the Tensorflow Serving system. - To load the model, use the function - `tf.keras.experimental.load_from_saved_model`. - - The `SavedModel` contains: - - 1. a checkpoint containing the model weights. - 2. a `SavedModel` proto containing the Tensorflow backend graph. Separate - graphs are saved for prediction (serving), train, and evaluation. If - the model has not been compiled, then only the graph computing predictions - will be exported. - 3. the model's json config. If the model is subclassed, this will only be - included if the model's `get_config()` method is overwritten. - - Example: - - ```python - import tensorflow as tf - - # Create a tf.keras model. - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(1, input_shape=[10])) - model.summary() - - # Save the tf.keras model in the SavedModel format. - path = '/tmp/simple_keras_model' - tf.keras.experimental.export_saved_model(model, path) - - # Load the saved keras model back. - new_model = tf.keras.experimental.load_from_saved_model(path) - new_model.summary() - ``` - - Args: - model: A `tf.keras.Model` to be saved. If the model is subclassed, the flag - `serving_only` must be set to True. - saved_model_path: a string specifying the path to the SavedModel directory. - custom_objects: Optional dictionary mapping string names to custom classes - or functions (e.g. custom loss functions). - as_text: bool, `False` by default. Whether to write the `SavedModel` proto - in text format. Currently unavailable in serving-only mode. - input_signature: A possibly nested sequence of `tf.TensorSpec` objects, used - to specify the expected model inputs. See `tf.function` for more details. - serving_only: bool, `False` by default. When this is true, only the - prediction graph is saved. - - Raises: - NotImplementedError: If the model is a subclassed model, and serving_only is - False. - ValueError: If the input signature cannot be inferred from the model. - AssertionError: If the SavedModel directory already exists and isn't empty. - """ - warnings.warn( - '`tf.keras.experimental.export_saved_model` is deprecated' - 'and will be removed in a future version. ' - 'Please use `model.save(..., save_format="tf")` or ' - '`tf.keras.models.save_model(..., save_format="tf")`.', - stacklevel=2) - if serving_only: - tf.saved_model.save( - model, - saved_model_path, - signatures=saving_utils.trace_model_call(model, input_signature)) - else: - _save_v1_format(model, saved_model_path, custom_objects, as_text, - input_signature) - - try: - _export_model_json(model, saved_model_path) - except NotImplementedError: - logging.warning('Skipped saving model JSON, subclassed model does not have ' - 'get_config() defined.') - - -def _export_model_json(model, saved_model_path): - """Saves model configuration as a json string under assets folder.""" - model_json = model.to_json() - model_json_filepath = tf.io.gfile.join( - _get_or_create_assets_dir(saved_model_path), - tf.compat.as_text(SAVED_MODEL_FILENAME_JSON)) - with tf.io.gfile.GFile(model_json_filepath, 'w') as f: - f.write(model_json) - - -def _export_model_variables(model, saved_model_path): - """Saves model weights in checkpoint format under variables folder.""" - _get_or_create_variables_dir(saved_model_path) - checkpoint_prefix = _get_variables_path(saved_model_path) - model.save_weights(checkpoint_prefix, save_format='tf', overwrite=True) - return checkpoint_prefix - - -def _save_v1_format(model, path, custom_objects, as_text, input_signature): - """Exports model to v1 SavedModel format.""" - if not model._is_graph_network: # pylint: disable=protected-access - if isinstance(model, sequential.Sequential): - # If input shape is not directly set in the model, the exported model - # will infer the expected shapes of the input from the model. - if not model.built: - raise ValueError('Weights for sequential model have not yet been ' - 'created. Weights are created when the Model is first ' - 'called on inputs or `build()` is called with an ' - '`input_shape`, or the first layer in the model has ' - '`input_shape` during construction.') - # TODO(kathywu): Build the model with input_signature to create the - # weights before _export_model_variables(). - else: - raise NotImplementedError( - 'Subclassed models can only be exported for serving. Please set ' - 'argument serving_only=True.') - - builder = tf.__internal__.saved_model.SavedModelBuilder(path) # pylint: disable=protected-access - - # Manually save variables to export them in an object-based checkpoint. This - # skips the `builder.add_meta_graph_and_variables()` step, which saves a - # named-based checkpoint. - # TODO(b/113134168): Add fn to Builder to save with object-based saver. - # TODO(b/113178242): This should only export the model json structure. Only - # one save is needed once the weights can be copied from the model to clone. - checkpoint_path = _export_model_variables(model, path) - - # Export each mode. Use ModeKeys enums defined for `Estimator` to ensure that - # Keras models and `Estimator`s are exported with the same format. - # Every time a mode is exported, the code checks to see if new variables have - # been created (e.g. optimizer slot variables). If that is the case, the - # checkpoint is re-saved to include the new variables. - export_args = {'builder': builder, - 'model': model, - 'custom_objects': custom_objects, - 'checkpoint_path': checkpoint_path, - 'input_signature': input_signature} - - has_saved_vars = False - if model.optimizer: - if isinstance(model.optimizer, (optimizer_v1.TFOptimizer, - optimizer_v2.OptimizerV2)): - _export_mode(mode_keys.ModeKeys.TRAIN, has_saved_vars, **export_args) - has_saved_vars = True - _export_mode(mode_keys.ModeKeys.TEST, has_saved_vars, **export_args) - else: - logging.warning( - 'Model was compiled with an optimizer, but the optimizer is not from ' - '`tf.train` (e.g. `tf.train.AdagradOptimizer`). Only the serving ' - 'graph was exported. The train and evaluate graphs were not added to ' - 'the SavedModel.') - _export_mode(mode_keys.ModeKeys.PREDICT, has_saved_vars, **export_args) - - builder.save(as_text) - - -def _get_var_list(model): - """Returns list of all checkpointed saveable objects in the model.""" - var_list, _, _ = tf.__internal__.tracking.ObjectGraphView(model).serialize_object_graph() - return var_list - - -def create_placeholder(spec): - return backend.placeholder(shape=spec.shape, dtype=spec.dtype, name=spec.name) - - -def _export_mode( - mode, has_saved_vars, builder, model, custom_objects, checkpoint_path, - input_signature): - """Exports a model, and optionally saves new vars from the clone model. - - Args: - mode: A `tf.estimator.ModeKeys` string. - has_saved_vars: A `boolean` indicating whether the SavedModel has already - exported variables. - builder: A `SavedModelBuilder` object. - model: A `tf.keras.Model` object. - custom_objects: A dictionary mapping string names to custom classes - or functions. - checkpoint_path: String path to checkpoint. - input_signature: Nested TensorSpec containing the expected inputs. Can be - `None`, in which case the signature will be inferred from the model. - - Raises: - ValueError: If the train/eval mode is being exported, but the model does - not have an optimizer. - """ - compile_clone = (mode != mode_keys.ModeKeys.PREDICT) - if compile_clone and not model.optimizer: - raise ValueError( - f'Model {model.name} does not have an optimizer. ' - f'Cannot export mode {mode}.') - - model_graph = tf.compat.v1.get_default_graph() - with tf.Graph().as_default() as g, backend.learning_phase_scope( - mode == mode_keys.ModeKeys.TRAIN): - - if input_signature is None: - input_tensors = None - else: - input_tensors = tf.nest.map_structure(create_placeholder, input_signature) - - # Clone the model into blank graph. This will create placeholders for inputs - # and targets. - clone = models_lib.clone_and_build_model( - model, input_tensors=input_tensors, custom_objects=custom_objects, - compile_clone=compile_clone) - - # Make sure that iterations variable is added to the global step collection, - # to ensure that, when the SavedModel graph is loaded, the iterations - # variable is returned by `tf.compat.v1.train.get_global_step()`. This is - # required for compatibility with the SavedModelEstimator. - if compile_clone: - g.add_to_collection(tf.compat.v1.GraphKeys.GLOBAL_STEP, clone.optimizer.iterations) - - # Extract update and train ops from train/test/predict functions. - train_op = None - if mode == mode_keys.ModeKeys.TRAIN: - clone._make_train_function() # pylint: disable=protected-access - train_op = clone.train_function.updates_op - elif mode == mode_keys.ModeKeys.TEST: - clone._make_test_function() # pylint: disable=protected-access - else: - clone._make_predict_function() # pylint: disable=protected-access - g.get_collection_ref(tf.compat.v1.GraphKeys.UPDATE_OPS).extend(clone.state_updates) - - with tf.compat.v1.Session().as_default(): - clone_var_list = _get_var_list(clone) - if has_saved_vars: - # Confirm all variables in the clone have an entry in the checkpoint. - status = clone.load_weights(checkpoint_path) - status.assert_existing_objects_matched() - else: - # Confirm that variables between the clone and model match up exactly, - # not counting optimizer objects. Optimizer objects are ignored because - # if the model has not trained, the slot variables will not have been - # created yet. - # TODO(b/113179535): Replace with trackable equivalence. - _assert_same_non_optimizer_objects(model, model_graph, clone, g) - - # TODO(b/113178242): Use value transfer for trackable objects. - clone.load_weights(checkpoint_path) - - # Add graph and variables to SavedModel. - # TODO(b/113134168): Switch to add_meta_graph_and_variables. - clone.save_weights(checkpoint_path, save_format='tf', overwrite=True) - builder._has_saved_variables = True # pylint: disable=protected-access - - # Add graph to the SavedModel builder. - builder.add_meta_graph( - model_utils.EXPORT_TAG_MAP[mode], - signature_def_map=_create_signature_def_map(clone, mode), - saver=tf.compat.v1.train.Saver( - clone_var_list, - # Allow saving Models with no variables. This is somewhat odd, but - # it's not necessarily a bug. - allow_empty=True), - init_op=tf.compat.v1.local_variables_initializer(), - train_op=train_op) - return None - - -def _create_signature_def_map(model, mode): - """Creates a SignatureDef map from a Keras model.""" - inputs_dict = {name: x for name, x in zip(model.input_names, model.inputs)} - if model.optimizer: - targets_dict = {x.name.split(':')[0]: x - for x in model._targets if x is not None} # pylint: disable=protected-access - inputs_dict.update(targets_dict) - outputs_dict = {name: x - for name, x in zip(model.output_names, model.outputs)} - metrics = saving_utils.extract_model_metrics(model) - - # Add metric variables to the `LOCAL_VARIABLES` collection. Metric variables - # are by default not added to any collections. We are doing this here, so - # that metric variables get initialized. - local_vars = set(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.LOCAL_VARIABLES)) - vars_to_add = set() - if metrics is not None: - for key, value in metrics.items(): - if isinstance(value, metrics_lib.Metric): - vars_to_add.update(value.variables) - # Convert Metric instances to (value_tensor, update_op) tuple. - metrics[key] = (value.result(), value.updates[0]) - # Remove variables that are in the local variables collection already. - vars_to_add = vars_to_add.difference(local_vars) - for v in vars_to_add: - tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.LOCAL_VARIABLES, v) - - export_outputs = model_utils.export_outputs_for_mode( - mode, - predictions=outputs_dict, - loss=model.total_loss if model.optimizer else None, - metrics=metrics) - return model_utils.build_all_signature_defs( - inputs_dict, - export_outputs=export_outputs, - serving_only=(mode == mode_keys.ModeKeys.PREDICT)) - - -def _assert_same_non_optimizer_objects(model, model_graph, clone, clone_graph): # pylint: disable=unused-argument - """Asserts model and clone contain the same trackable objects.""" - - # TODO(fchollet, kathywu): make sure this works in eager mode. - return True - - -@keras_export(v1=['keras.experimental.load_from_saved_model']) -def load_from_saved_model(saved_model_path, custom_objects=None): - """Loads a keras Model from a SavedModel created by `export_saved_model()`. - - This function reinstantiates model state by: - 1) loading model topology from json (this will eventually come - from metagraph). - 2) loading model weights from checkpoint. - - Example: - - ```python - import tensorflow as tf - - # Create a tf.keras model. - model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(1, input_shape=[10])) - model.summary() - - # Save the tf.keras model in the SavedModel format. - path = '/tmp/simple_keras_model' - tf.keras.experimental.export_saved_model(model, path) - - # Load the saved keras model back. - new_model = tf.keras.experimental.load_from_saved_model(path) - new_model.summary() - ``` - - Args: - saved_model_path: a string specifying the path to an existing SavedModel. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - a keras.Model instance. - """ - warnings.warn( - '`tf.keras.experimental.load_from_saved_model` is deprecated' - 'and will be removed in a future version. ' - 'Please switch to `tf.keras.models.load_model`.', - stacklevel=2) - # restore model topology from json string - model_json_filepath = tf.io.gfile.join( - tf.compat.as_bytes(saved_model_path), - tf.compat.as_bytes(tf.saved_model.ASSETS_DIRECTORY), - tf.compat.as_bytes(SAVED_MODEL_FILENAME_JSON)) - with tf.io.gfile.GFile(model_json_filepath, 'r') as f: - model_json = f.read() - model = model_config.model_from_json( - model_json, custom_objects=custom_objects) - - # restore model weights - checkpoint_prefix = tf.io.gfile.join( - tf.compat.as_text(saved_model_path), - tf.compat.as_text(tf.saved_model.VARIABLES_DIRECTORY), - tf.compat.as_text(tf.saved_model.VARIABLES_FILENAME)) - model.load_weights(checkpoint_prefix) - return model - - -#### Directory / path helpers - - -def _get_or_create_variables_dir(export_dir): - """Return variables sub-directory, or create one if it doesn't exist.""" - variables_dir = _get_variables_dir(export_dir) - tf.io.gfile.makedirs(variables_dir) - return variables_dir - - -def _get_variables_dir(export_dir): - """Return variables sub-directory in the SavedModel.""" - return tf.io.gfile.join( - tf.compat.as_text(export_dir), - tf.compat.as_text(tf.saved_model.VARIABLES_DIRECTORY)) - - -def _get_variables_path(export_dir): - """Return the variables path, used as the prefix for checkpoint files.""" - return tf.io.gfile.join( - tf.compat.as_text(_get_variables_dir(export_dir)), - tf.compat.as_text(tf.saved_model.VARIABLES_FILENAME)) - - -def _get_or_create_assets_dir(export_dir): - """Return assets sub-directory, or create one if it doesn't exist.""" - assets_destination_dir = _get_assets_dir(export_dir) - - tf.io.gfile.makedirs(assets_destination_dir) - - return assets_destination_dir - - -def _get_assets_dir(export_dir): - """Return path to asset directory in the SavedModel.""" - return tf.io.gfile.join( - tf.compat.as_text(export_dir), - tf.compat.as_text(tf.saved_model.ASSETS_DIRECTORY)) diff --git a/keras/saving/saved_model_experimental_test.py b/keras/saving/saved_model_experimental_test.py deleted file mode 100644 index 4b42076ee085..000000000000 --- a/keras/saving/saved_model_experimental_test.py +++ /dev/null @@ -1,540 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=protected-access -"""Tests for saving/loading function for keras Model.""" - -import tensorflow.compat.v2 as tf - -import os -import shutil - -from absl.testing import parameterized -import numpy as np - -import keras -from keras.optimizers import optimizer_v1 -from keras.engine import training as model_lib -from keras.optimizers.optimizer_v2 import adadelta -from keras.optimizers.optimizer_v2 import rmsprop -from keras.saving import saved_model_experimental as keras_saved_model -from keras.saving import utils_v1 as model_utils -from keras.utils import control_flow_util -from keras.utils import mode_keys - - -class TestModelSavingandLoading(parameterized.TestCase, tf.test.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - def test_saving_sequential_model(self): - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - model.compile( - loss=keras.losses.MSE, - optimizer=rmsprop.RMSprop(lr=0.0001), - metrics=[keras.metrics.categorical_accuracy], - sample_weight_mode='temporal') - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - - ref_y = model.predict(x) - - saved_model_dir = self._save_model_dir() - keras_saved_model.export_saved_model(model, saved_model_dir) - - loaded_model = keras_saved_model.load_from_saved_model(saved_model_dir) - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - def test_saving_sequential_model_without_compile(self): - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - - x = np.random.random((1, 3)) - ref_y = model.predict(x) - - saved_model_dir = self._save_model_dir() - keras_saved_model.export_saved_model(model, saved_model_dir) - loaded_model = keras_saved_model.load_from_saved_model(saved_model_dir) - - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - def test_saving_functional_model(self): - with self.cached_session(): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - output = keras.layers.Dense(3)(x) - - model = keras.models.Model(inputs, output) - model.compile( - loss=keras.losses.MSE, - optimizer=rmsprop.RMSprop(lr=0.0001), - metrics=[keras.metrics.categorical_accuracy]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - ref_y = model.predict(x) - - saved_model_dir = self._save_model_dir() - keras_saved_model.export_saved_model(model, saved_model_dir) - loaded_model = keras_saved_model.load_from_saved_model(saved_model_dir) - - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - def test_saving_functional_model_without_compile(self): - with self.cached_session(): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - output = keras.layers.Dense(3)(x) - - model = keras.models.Model(inputs, output) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - - ref_y = model.predict(x) - - saved_model_dir = self._save_model_dir() - keras_saved_model.export_saved_model(model, saved_model_dir) - loaded_model = keras_saved_model.load_from_saved_model(saved_model_dir) - - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - def test_saving_with_tf_optimizer(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile( - loss='mse', - optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), - metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - ref_y = model.predict(x) - - saved_model_dir = self._save_model_dir() - keras_saved_model.export_saved_model(model, saved_model_dir) - loaded_model = keras_saved_model.load_from_saved_model(saved_model_dir) - loaded_model.compile( - loss='mse', - optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), - metrics=['acc']) - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - # test that new updates are the same with both models - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - - ref_loss = model.train_on_batch(x, y) - loss = loaded_model.train_on_batch(x, y) - self.assertAllClose(ref_loss, loss, atol=1e-05) - - ref_y = model.predict(x) - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - # test saving/loading again - saved_model_dir2 = self._save_model_dir('saved_model_2') - keras_saved_model.export_saved_model(loaded_model, saved_model_dir2) - loaded_model = keras_saved_model.load_from_saved_model(saved_model_dir2) - y = loaded_model.predict(x) - self.assertAllClose(ref_y, y, atol=1e-05) - - def test_saving_subclassed_model_raise_error(self): - # For now, saving subclassed model should raise an error. It should be - # avoided later with loading from SavedModel.pb. - - class SubclassedModel(model_lib.Model): - - def __init__(self): - super().__init__() - self.layer1 = keras.layers.Dense(3) - self.layer2 = keras.layers.Dense(1) - - def call(self, inp): - return self.layer2(self.layer1(inp)) - - model = SubclassedModel() - - saved_model_dir = self._save_model_dir() - with self.assertRaises(NotImplementedError): - keras_saved_model.export_saved_model(model, saved_model_dir) - - -class LayerWithLearningPhase(keras.engine.base_layer.Layer): - - def build(self, input_shape): - self.input_spec = keras.layers.InputSpec(shape=[None] * len(input_shape)) - self.built = True - - def call(self, x, training=None): - if training is None: - training = keras.backend.learning_phase() - output = control_flow_util.smart_cond(training, lambda: x * 0, - lambda: tf.identity(x)) - if not tf.executing_eagerly(): - output._uses_learning_phase = True # pylint: disable=protected-access - return output - - def compute_output_shape(self, input_shape): - return input_shape - - -def functional_model(uses_learning_phase=True): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - x = keras.layers.Dense(3)(x) - if uses_learning_phase: - x = LayerWithLearningPhase()(x) - return keras.models.Model(inputs, x) - - -def sequential_model(uses_learning_phase=True): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - if uses_learning_phase: - model.add(LayerWithLearningPhase()) - return model - - -def sequential_model_without_input_shape(uses_learning_phase=True): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2)) - model.add(keras.layers.Dense(3)) - if uses_learning_phase: - model.add(LayerWithLearningPhase()) - return model - - -class Subclassed(keras.models.Model): - - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(2) - self.dense2 = keras.layers.Dense(3) - - def call(self, inputs): - x = self.dense1(inputs) - x = self.dense2(x) - return x - - -def subclassed_model(): - return Subclassed() - - -def load_model(sess, path, mode): - tags = model_utils.EXPORT_TAG_MAP[mode] - sig_def_key = model_utils.SIGNATURE_KEY_MAP[mode] - - meta_graph_def = tf.compat.v1.saved_model.load(sess, tags, path) - inputs = { - k: sess.graph.get_tensor_by_name(v.name) - for k, v in meta_graph_def.signature_def[sig_def_key].inputs.items()} - outputs = { - k: sess.graph.get_tensor_by_name(v.name) - for k, v in meta_graph_def.signature_def[sig_def_key].outputs.items()} - return inputs, outputs, meta_graph_def - - -def get_train_op(meta_graph_def): - graph = tf.compat.v1.get_default_graph() - signature_def = meta_graph_def.signature_def['__saved_model_train_op'] - op_name = signature_def.outputs['__saved_model_train_op'].name - return graph.as_graph_element(op_name) - - -class TestModelSavedModelExport(tf.test.TestCase, parameterized.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - @parameterized.parameters( - { - 'model_builder': functional_model, - 'uses_learning_phase': True, - 'optimizer_cls': adadelta.Adadelta, - 'train_before_export': True}, - { - 'model_builder': functional_model, - 'uses_learning_phase': True, - 'optimizer_cls': tf.compat.v1.train.AdadeltaOptimizer, - 'train_before_export': False}, - { - 'model_builder': functional_model, - 'uses_learning_phase': False, - 'optimizer_cls': None, - 'train_before_export': False}, - { - 'model_builder': sequential_model, - 'uses_learning_phase': True, - 'optimizer_cls': tf.compat.v1.train.AdadeltaOptimizer, - 'train_before_export': True}, - { - 'model_builder': sequential_model, - 'uses_learning_phase': True, - 'optimizer_cls': adadelta.Adadelta, - 'train_before_export': False}, - { - 'model_builder': sequential_model, - 'uses_learning_phase': False, - 'optimizer_cls': None, - 'train_before_export': False}, - { - 'model_builder': sequential_model_without_input_shape, - 'uses_learning_phase': True, - 'optimizer_cls': tf.compat.v1.train.AdadeltaOptimizer, - 'train_before_export': False}) - def testSaveAndLoadSavedModelExport( - self, model_builder, uses_learning_phase, optimizer_cls, - train_before_export): - optimizer = None if optimizer_cls is None else optimizer_cls() - - saved_model_dir = self._save_model_dir() - - np.random.seed(130) - input_arr = np.random.random((1, 3)) - target_arr = np.random.random((1, 3)) - - model = model_builder(uses_learning_phase) - if optimizer is not None: - model.compile( - loss='mse', - optimizer=optimizer, - metrics=['mae']) - if train_before_export: - model.train_on_batch(input_arr, target_arr) - - ref_loss, ref_mae = model.evaluate(input_arr, target_arr) - - ref_predict = model.predict(input_arr) - - # Export SavedModel - keras_saved_model.export_saved_model(model, saved_model_dir) - - input_name = model.input_names[0] - output_name = model.output_names[0] - target_name = output_name + '_target' - - # Load predict graph, and test predictions - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - inputs, outputs, _ = load_model(sess, saved_model_dir, - mode_keys.ModeKeys.PREDICT) - - predictions = sess.run(outputs[output_name], - {inputs[input_name]: input_arr}) - self.assertAllClose(ref_predict, predictions, atol=1e-05) - - if optimizer: - # Load eval graph, and test predictions, loss and metric values - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - inputs, outputs, _ = load_model(sess, saved_model_dir, - mode_keys.ModeKeys.TEST) - - # First obtain the loss and predictions, and run the metric update op by - # feeding in the inputs and targets. - metrics_name = 'mae' if tf.__internal__.tf2.enabled() else 'mean_absolute_error' - metrics_update_op_key = 'metrics/' + metrics_name + '/update_op' - metrics_value_op_key = 'metrics/' + metrics_name + '/value' - - loss, predictions, _ = sess.run( - (outputs['loss'], outputs['predictions/' + output_name], - outputs[metrics_update_op_key]), { - inputs[input_name]: input_arr, - inputs[target_name]: target_arr - }) - - # The metric value should be run after the update op, to ensure that it - # reflects the correct value. - metric_value = sess.run(outputs[metrics_value_op_key]) - - self.assertEqual(int(train_before_export), - sess.run(tf.compat.v1.train.get_global_step())) - self.assertAllClose(ref_loss, loss, atol=1e-05) - self.assertAllClose(ref_mae, metric_value, atol=1e-05) - self.assertAllClose(ref_predict, predictions, atol=1e-05) - - # Load train graph, and check for the train op, and prediction values - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - inputs, outputs, meta_graph_def = load_model( - sess, saved_model_dir, mode_keys.ModeKeys.TRAIN) - self.assertEqual(int(train_before_export), - sess.run(tf.compat.v1.train.get_global_step())) - self.assertIn('loss', outputs) - self.assertIn(metrics_update_op_key, outputs) - self.assertIn(metrics_value_op_key, outputs) - self.assertIn('predictions/' + output_name, outputs) - - # Train for a step - train_op = get_train_op(meta_graph_def) - train_outputs, _ = sess.run( - [outputs, train_op], {inputs[input_name]: input_arr, - inputs[target_name]: target_arr}) - self.assertEqual(int(train_before_export) + 1, - sess.run(tf.compat.v1.train.get_global_step())) - - if uses_learning_phase: - self.assertAllClose( - [[0, 0, 0]], train_outputs['predictions/' + output_name], - atol=1e-05) - else: - self.assertNotAllClose( - [[0, 0, 0]], train_outputs['predictions/' + output_name], - atol=1e-05) - - def testSaveAndLoadSavedModelWithCustomObject(self): - saved_model_dir = self._save_model_dir() - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - def relu6(x): - return keras.backend.relu(x, max_value=6) - inputs = keras.layers.Input(shape=(1,)) - outputs = keras.layers.Activation(relu6)(inputs) - model = keras.models.Model(inputs, outputs) - keras_saved_model.export_saved_model( - model, saved_model_dir, custom_objects={'relu6': relu6}) - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - inputs, outputs, _ = load_model(sess, saved_model_dir, - mode_keys.ModeKeys.PREDICT) - input_name = model.input_names[0] - output_name = model.output_names[0] - predictions = sess.run( - outputs[output_name], {inputs[input_name]: [[7], [-3], [4]]}) - self.assertAllEqual([[6], [0], [4]], predictions) - - def testAssertModelCloneSameObjectsIgnoreOptimizer(self): - input_arr = np.random.random((1, 3)) - target_arr = np.random.random((1, 3)) - - model_graph = tf.Graph() - clone_graph = tf.Graph() - - # Create two models with the same layers but different optimizers. - with tf.compat.v1.Session(graph=model_graph): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - x = keras.layers.Dense(3)(x) - model = keras.models.Model(inputs, x) - - model.compile(loss='mse', optimizer=tf.compat.v1.train.AdadeltaOptimizer()) - model.train_on_batch(input_arr, target_arr) - - with tf.compat.v1.Session(graph=clone_graph): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - x = keras.layers.Dense(3)(x) - clone = keras.models.Model(inputs, x) - clone.compile(loss='mse', optimizer=optimizer_v1.RMSprop(lr=0.0001)) - clone.train_on_batch(input_arr, target_arr) - - keras_saved_model._assert_same_non_optimizer_objects( - model, model_graph, clone, clone_graph) - - def testAssertModelCloneSameObjectsThrowError(self): - input_arr = np.random.random((1, 3)) - target_arr = np.random.random((1, 3)) - - model_graph = tf.Graph() - clone_graph = tf.Graph() - - # Create two models with the same layers but different optimizers. - with tf.compat.v1.Session(graph=model_graph): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - x = keras.layers.Dense(3)(x) - model = keras.models.Model(inputs, x) - - model.compile(loss='mse', optimizer=tf.compat.v1.train.AdadeltaOptimizer()) - model.train_on_batch(input_arr, target_arr) - - with tf.compat.v1.Session(graph=clone_graph): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - x = keras.layers.Dense(4)(x) - x = keras.layers.Dense(3)(x) - clone = keras.models.Model(inputs, x) - clone.compile(loss='mse', optimizer=optimizer_v1.RMSprop(lr=0.0001)) - clone.train_on_batch(input_arr, target_arr) - - def testSaveSequentialModelWithoutInputShapes(self): - model = sequential_model_without_input_shape(True) - # A Sequential model that hasn't been built should raise an error. - with self.assertRaisesRegex( - ValueError, 'Weights for sequential model have not yet been created'): - keras_saved_model.export_saved_model(model, '') - - # Even with input_signature, the model's weights has not been created. - with self.assertRaisesRegex( - ValueError, 'Weights for sequential model have not yet been created'): - saved_model_dir = self._save_model_dir() - keras_saved_model.export_saved_model( - model, - saved_model_dir, - input_signature=tf.TensorSpec( - shape=(10, 11, 12, 13, 14), dtype=tf.float32, - name='spec_input')) - - @parameterized.parameters( - { - 'model_builder': sequential_model_without_input_shape, - 'input_signature': [tf.TensorSpec(shape=[None, 3], - dtype=tf.float32)]}, - { - 'model_builder': subclassed_model, - 'input_signature': [tf.TensorSpec(shape=[None, 3], - dtype=tf.float32)]}) - def testServingOnly(self, model_builder, input_signature): - if tf.executing_eagerly(): - saved_model_dir = self._save_model_dir() - input_arr = np.random.random((5, 3)).astype(np.float32) - model = model_builder() - ref_predict = model.predict(input_arr) - - keras_saved_model.export_saved_model( - model, - saved_model_dir, - serving_only=True, - input_signature=input_signature) - - # Load predict graph, and test predictions - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - inputs, outputs, _ = load_model(sess, saved_model_dir, - mode_keys.ModeKeys.PREDICT) - predictions = sess.run(outputs[next(iter(outputs.keys()))], - {inputs[next(iter(inputs.keys()))]: input_arr}) - self.assertAllClose(ref_predict, predictions, atol=1e-05) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/saving_api.py b/keras/saving/saving_api.py new file mode 100644 index 000000000000..32c01da30558 --- /dev/null +++ b/keras/saving/saving_api.py @@ -0,0 +1,349 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Public API surface for saving APIs.""" + +import os +import warnings +import zipfile + +import tensorflow.compat.v2 as tf +from tensorflow.python.util.tf_export import keras_export + +from keras.saving import saving_lib +from keras.saving.legacy import save as legacy_sm_saving_lib +from keras.utils import io_utils + +try: + import h5py +except ImportError: + h5py = None + +is_oss = True + + +def _support_gcs_uri(filepath, save_format, is_oss): + """Supports GCS URIs through bigstore via a temporary file.""" + gs_filepath = None + if str(filepath).startswith("gs://") and save_format != "tf": + gs_filepath = filepath + if not is_oss: + gs_filepath = filepath.replace("gs://", "/bigstore/") + filepath = os.path.join( + saving_lib.get_temp_dir(), os.path.basename(gs_filepath) + ) + return gs_filepath, filepath + + +@keras_export("keras.saving.save_model", "keras.models.save_model") +def save_model(model, filepath, overwrite=True, save_format=None, **kwargs): + """Saves a model as a TensorFlow SavedModel or HDF5 file. + + See the [Serialization and Saving guide]( + https://keras.io/guides/serialization_and_saving/) for details. + + Args: + model: Keras model instance to be saved. + filepath: `str` or `pathlib.Path` object. Path where to save the model. + overwrite: Whether we should overwrite any existing model at the target + location, or instead ask the user via an interactive prompt. + save_format: Either `"keras"`, `"tf"`, `"h5"`, + indicating whether to save the model + in the native Keras format (`.keras`), + in the TensorFlow SavedModel format (referred to as "SavedModel" + below), or in the legacy HDF5 format (`.h5`). + Defaults to `"tf"` in TF 2.X, and `"h5"` in TF 1.X. + + SavedModel format arguments: + include_optimizer: Only applied to SavedModel and legacy HDF5 formats. + If False, do not save the optimizer state. Defaults to True. + signatures: Only applies to SavedModel format. Signatures to save + with the SavedModel. See the `signatures` argument in + `tf.saved_model.save` for details. + options: Only applies to SavedModel format. + `tf.saved_model.SaveOptions` object that specifies SavedModel + saving options. + save_traces: Only applies to SavedModel format. When enabled, the + SavedModel will store the function traces for each layer. This + can be disabled, so that only the configs of each layer are stored. + Defaults to `True`. Disabling this will decrease serialization time + and reduce file size, but it requires that all custom layers/models + implement a `get_config()` method. + + Example: + + ```python + model = tf.keras.Sequential([ + tf.keras.layers.Dense(5, input_shape=(3,)), + tf.keras.layers.Softmax()]) + model.save("model.keras") + loaded_model = tf.keras.saving.load_model("model.keras") + x = tf.random.uniform((10, 3)) + assert np.allclose(model.predict(x), loaded_model.predict(x)) + ``` + + Note that `model.save()` is an alias for `tf.keras.saving.save_model()`. + + The SavedModel or HDF5 file contains: + + - The model's configuration (architecture) + - The model's weights + - The model's optimizer's state (if any) + + Thus models can be reinstantiated in the exact same state, without any of + the code used for model definition or training. + + Note that the model weights may have different scoped names after being + loaded. Scoped names include the model/layer names, such as + `"dense_1/kernel:0"`. It is recommended that you use the layer properties to + access specific variables, e.g. `model.get_layer("dense_1").kernel`. + + __SavedModel serialization format__ + + With `save_format="tf"`, the model and all trackable objects attached + to the it (e.g. layers and variables) are saved as a TensorFlow SavedModel. + The model config, weights, and optimizer are included in the SavedModel. + Additionally, for every Keras layer attached to the model, the SavedModel + stores: + + * The config and metadata -- e.g. name, dtype, trainable status + * Traced call and loss functions, which are stored as TensorFlow + subgraphs. + + The traced functions allow the SavedModel format to save and load custom + layers without the original class definition. + + You can choose to not save the traced functions by disabling the + `save_traces` option. This will decrease the time it takes to save the model + and the amount of disk space occupied by the output SavedModel. If you + enable this option, then you _must_ provide all custom class definitions + when loading the model. See the `custom_objects` argument in + `tf.keras.saving.load_model`. + """ + save_format = get_save_format(filepath, save_format) + + # Supports GCS URIs through bigstore via a temporary file + gs_filepath, filepath = _support_gcs_uri(filepath, save_format, is_oss) + + # Deprecation warnings + if save_format == "h5": + warnings.warn( + "You are saving your model as an HDF5 file via `model.save()`. " + "This file format is considered legacy. " + "We recommend using instead the native Keras format, " + "e.g. `model.save('my_model.keras')`.", + stacklevel=2, + ) + + if save_format == "keras": + # If file exists and should not be overwritten. + try: + exists = os.path.exists(filepath) + except TypeError: + exists = False + if exists and not overwrite: + proceed = io_utils.ask_to_proceed_with_overwrite(filepath) + if not proceed: + return + if kwargs: + raise ValueError( + "The following argument(s) are not supported " + f"with the native Keras format: {list(kwargs.keys())}" + ) + saving_lib.save_model(model, filepath) + else: + # Legacy case + return legacy_sm_saving_lib.save_model( + model, + filepath, + overwrite=overwrite, + save_format=save_format, + **kwargs, + ) + + +@keras_export("keras.saving.load_model", "keras.models.load_model") +def load_model( + filepath, custom_objects=None, compile=True, safe_mode=True, **kwargs +): + """Loads a model saved via `model.save()`. + + Args: + filepath: `str` or `pathlib.Path` object, path to the saved model file. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + compile: Boolean, whether to compile the model after loading. + safe_mode: Boolean, whether to disallow unsafe `lambda` deserialization. + When `safe_mode=False`, loading an object has the potential to + trigger arbitrary code execution. This argument is only + applicable to the Keras v3 model format. Defaults to True. + + SavedModel format arguments: + options: Only applies to SavedModel format. + Optional `tf.saved_model.LoadOptions` object that specifies + SavedModel loading options. + + Returns: + A Keras model instance. If the original model was compiled, + and the argument `compile=True` is set, then the returned model + will be compiled. Otherwise, the model will be left uncompiled. + + Example: + + ```python + model = tf.keras.Sequential([ + tf.keras.layers.Dense(5, input_shape=(3,)), + tf.keras.layers.Softmax()]) + model.save("model.keras") + loaded_model = tf.keras.saving.load_model("model.keras") + x = tf.random.uniform((10, 3)) + assert np.allclose(model.predict(x), loaded_model.predict(x)) + ``` + + Note that the model variables may have different name values + (`var.name` property, e.g. `"dense_1/kernel:0"`) after being reloaded. + It is recommended that you use layer attributes to + access specific variables, e.g. `model.get_layer("dense_1").kernel`. + """ + # Supports GCS URIs by copying data to temporary file + save_format = get_save_format(filepath, save_format=None) + gs_filepath, filepath = _support_gcs_uri(filepath, save_format, is_oss) + if gs_filepath is not None: + tf.io.gfile.copy(gs_filepath, filepath, overwrite=True) + + is_keras_zip = str(filepath).endswith(".keras") and zipfile.is_zipfile( + filepath + ) + + # Support for remote zip files + if ( + saving_lib.is_remote_path(filepath) + and not tf.io.gfile.isdir(filepath) + and not is_keras_zip + ): + local_path = os.path.join( + saving_lib.get_temp_dir(), os.path.basename(filepath) + ) + + # Copy from remote to temporary local directory + tf.io.gfile.copy(filepath, local_path, overwrite=True) + + # Switch filepath to local zipfile for loading model + if zipfile.is_zipfile(local_path): + filepath = local_path + is_keras_zip = True + + if is_keras_zip: + if kwargs: + raise ValueError( + "The following argument(s) are not supported " + f"with the native Keras format: {list(kwargs.keys())}" + ) + return saving_lib.load_model( + filepath, + custom_objects=custom_objects, + compile=compile, + safe_mode=safe_mode, + ) + + # Legacy case. + return legacy_sm_saving_lib.load_model( + filepath, custom_objects=custom_objects, compile=compile, **kwargs + ) + + +def save_weights(model, filepath, overwrite=True, **kwargs): + # Supports GCS URIs through bigstore via a temporary file + save_format = get_save_format(filepath, save_format=None) + gs_filepath, filepath = _support_gcs_uri(filepath, save_format, is_oss) + + if str(filepath).endswith(".weights.h5"): + # If file exists and should not be overwritten. + try: + exists = os.path.exists(filepath) + except TypeError: + exists = False + if exists and not overwrite: + proceed = io_utils.ask_to_proceed_with_overwrite(filepath) + if not proceed: + return + saving_lib.save_weights_only(model, filepath) + else: + legacy_sm_saving_lib.save_weights( + model, filepath, overwrite=overwrite, **kwargs + ) + + +def load_weights(model, filepath, skip_mismatch=False, **kwargs): + # Supports GCS URIs by copying data to temporary file + save_format = get_save_format(filepath, save_format=None) + gs_filepath, filepath = _support_gcs_uri(filepath, save_format, is_oss) + if gs_filepath is not None: + tf.io.gfile.copy(gs_filepath, filepath, overwrite=True) + + if str(filepath).endswith(".keras") and zipfile.is_zipfile(filepath): + saving_lib.load_weights_only( + model, filepath, skip_mismatch=skip_mismatch + ) + elif str(filepath).endswith(".weights.h5"): + saving_lib.load_weights_only( + model, filepath, skip_mismatch=skip_mismatch + ) + else: + return legacy_sm_saving_lib.load_weights( + model, filepath, skip_mismatch=skip_mismatch, **kwargs + ) + + +def get_save_format(filepath, save_format): + if save_format: + if save_format == "keras_v3": + return "keras" + if save_format == "keras": + if saving_lib.saving_v3_enabled(): + return "keras" + else: + return "h5" + if save_format in ("h5", "hdf5"): + return "h5" + if save_format in ("tf", "tensorflow"): + return "tf" + + raise ValueError( + "Unknown `save_format` argument. Expected one of " + "'keras', 'tf', or 'h5'. " + f"Received: save_format{save_format}" + ) + + # No save format specified: infer from filepath. + + if str(filepath).endswith(".keras"): + if saving_lib.saving_v3_enabled(): + return "keras" + else: + return "h5" + + if str(filepath).endswith((".h5", ".hdf5")): + return "h5" + + if h5py is not None and isinstance(filepath, h5py.File): + return "h5" + + # No recognizable file format: default to TF in TF2 and h5 in TF1. + + if tf.__internal__.tf2.enabled(): + return "tf" + else: + return "h5" diff --git a/keras/saving/saving_lib.py b/keras/saving/saving_lib.py new file mode 100644 index 000000000000..a50dd1998ee1 --- /dev/null +++ b/keras/saving/saving_lib.py @@ -0,0 +1,743 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python-based idempotent model-saving functionality.""" + +import datetime +import io +import json +import os +import re +import tempfile +import threading +import warnings +import zipfile + +import numpy as np +import tensorflow.compat.v2 as tf + +import keras +from keras import losses +from keras.engine import base_layer +from keras.optimizers import optimizer +from keras.saving.serialization_lib import ObjectSharingScope +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object +from keras.utils import generic_utils +from keras.utils import io_utils + +try: + import h5py +except ImportError: + h5py = None + +keras_saving_gauge = tf.__internal__.monitoring.BoolGauge( + "/tensorflow/api/keras/saving", "keras saving usage", "method" +) + +# isort: off + +_CONFIG_FILENAME = "config.json" +_METADATA_FILENAME = "metadata.json" +_VARS_FNAME = "model.weights" # Will become e.g. "model.weights.h5" +_ASSETS_DIRNAME = "assets" + +# A temporary flag to enable the new idempotent saving framework. +_SAVING_V3_ENABLED = threading.local() +_SAVING_V3_ENABLED.value = True + +ATTR_SKIPLIST = frozenset( + { + "_callable_losses", + "_captured_weight_regularizer", + "_checkpoint_dependencies", + "_deferred_dependencies", + "_eager_losses", + "_inbound_nodes", + "_inbound_nodes_value", + "_output_layers", + "_input_layers", + "_keras_api_names", + "_keras_api_names_v1", + "_name_based_restores", + "_non_trainable_weights", + "_outbound_nodes", + "_outbound_nodes_value", + "_saved_model_arg_spec", + "_self_name_based_restores", + "_self_saveable_object_factories", + "_self_tracked_trackables", + "_saved_model_inputs_spec", + "_self_unconditional_checkpoint_dependencies", + "_self_unconditional_deferred_dependencies", + "_self_unconditional_dependency_names", + "_tf_api_names", + "_tf_api_names_v1", + "_trainable_weights", + "_non_trainable_weights", + "_unconditional_checkpoint_dependencies", + "_unconditional_dependency_names", + "_updates", + "_layer_call_argspecs", + "inbound_nodes", + "outbound_nodes", + "input_shape", + "output_shape", + "submodules", + "weights", + "non_trainable_weights", + "trainable_weights", + "variables", + "non_trainable_variables", + "trainable_variables", + "updates", # Would raise a warning if visited. + "state_updates", # Would raise a warning if visited. + } +) + + +def save_model(model, filepath, weights_format="h5"): + """Save a zip-archive representing a Keras model to the given filepath. + + The zip-based archive contains the following structure: + + - JSON-based configuration file (config.json): Records of model, layer, and + other trackables' configuration. + - NPZ-based trackable state files, found in respective directories, such as + model/states.npz, model/dense_layer/states.npz, etc. + - Metadata file. + + The states of Keras trackables (layers, optimizers, loss, and metrics) are + automatically saved as long as they can be discovered through the attributes + returned by `dir(Model)`. Typically, the state includes the variables + associated with the trackable, but some specially purposed layers may + contain more such as the vocabularies stored in the hashmaps. The trackables + define how their states are saved by exposing `save_state()` and + `load_state()` APIs. + + For the case of layer states, the variables will be visited as long as + they are either 1) referenced via layer attributes, or 2) referenced via a + container (list, tuple, or dict), and the container is referenced via a + layer attribute. + """ + + # API usage tracking for Keras V3 saving + keras_saving_gauge.get_cell("save_model_v3").set(True) + + filepath = str(filepath) + if not filepath.endswith(".keras"): + raise ValueError( + "Invalid `filepath` argument: expected a `.keras` extension. " + f"Received: filepath={filepath}" + ) + if weights_format == "h5" and h5py is None: + raise ImportError("h5py must be installed in order to save a model.") + + if not model.built: + warnings.warn( + "You are saving a model that has not yet been built. " + "It might not contain any weights yet. " + "Consider building the model first by calling it " + "on some data.", + stacklevel=2, + ) + saving_v3_enabled_value = getattr(_SAVING_V3_ENABLED, "value", False) + _SAVING_V3_ENABLED.value = True + + with ObjectSharingScope(): + serialized_model_dict = serialize_keras_object(model) + config_json = json.dumps(serialized_model_dict) + metadata_json = json.dumps( + { + "keras_version": keras.__version__, + "date_saved": datetime.datetime.now().strftime("%Y-%m-%d@%H:%M:%S"), + } + ) + # TODO(rameshsampath): Need a better logic for local vs remote path + if is_remote_path(filepath): + # Remote path. Zip to local drive and copy to remote + zip_filepath = os.path.join(get_temp_dir(), "tmp_model.keras") + else: + zip_filepath = filepath + try: + with zipfile.ZipFile(zip_filepath, "w") as zf: + with zf.open(_METADATA_FILENAME, "w") as f: + f.write(metadata_json.encode()) + with zf.open(_CONFIG_FILENAME, "w") as f: + f.write(config_json.encode()) + + if weights_format == "h5": + weights_store = H5IOStore( + _VARS_FNAME + ".h5", archive=zf, mode="w" + ) + elif weights_format == "npz": + weights_store = NpzIOStore( + _VARS_FNAME + ".npz", archive=zf, mode="w" + ) + else: + raise ValueError( + "Unknown `weights_format` argument. " + "Expected 'h5' or 'npz'. " + f"Received: weights_format={weights_format}" + ) + + asset_store = DiskIOStore(_ASSETS_DIRNAME, archive=zf, mode="w") + + _save_state( + model, + weights_store=weights_store, + assets_store=asset_store, + inner_path="", + visited_trackables=set(), + ) + weights_store.close() + asset_store.close() + + if is_remote_path(filepath): + # Using tf.io.gfile context manager doesn't close zip file when + # writing to GCS. Hence writing to local and copying to filepath. + tf.io.gfile.copy(zip_filepath, filepath, overwrite=True) + os.remove(zip_filepath) + except Exception as e: + raise e + finally: + _SAVING_V3_ENABLED.value = saving_v3_enabled_value + + +def load_model(filepath, custom_objects=None, compile=True, safe_mode=True): + """Load a zip archive representing a Keras model.""" + + filepath = str(filepath) + if not filepath.endswith(".keras"): + raise ValueError( + "Invalid filename: expected a `.keras` extension. " + f"Received: filepath={filepath}" + ) + + saving_v3_enabled_value = getattr(_SAVING_V3_ENABLED, "value", False) + _SAVING_V3_ENABLED.value = True + + try: + with tf.io.gfile.GFile( + filepath, mode="r+b" + ) as gfile_handle, zipfile.ZipFile(gfile_handle, "r") as zf: + with zf.open(_CONFIG_FILENAME, "r") as f: + config_json = f.read() + + # Note: we should NOT use a custom JSON decoder. Anything that + # needs custom decoding must be handled in deserialize_keras_object. + config_dict = json.loads(config_json) + if not compile: + # Disable compilation + config_dict["compile_config"] = None + # Construct the model from the configuration file in the archive. + with ObjectSharingScope(): + model = deserialize_keras_object( + config_dict, custom_objects, safe_mode=safe_mode + ) + + all_filenames = zf.namelist() + if _VARS_FNAME + ".h5" in all_filenames: + weights_store = H5IOStore( + _VARS_FNAME + ".h5", archive=zf, mode="r" + ) + elif _VARS_FNAME + ".npz" in all_filenames: + weights_store = NpzIOStore( + _VARS_FNAME + ".npz", archive=zf, mode="r" + ) + else: + raise ValueError( + f"Expected a {_VARS_FNAME}.h5 or {_VARS_FNAME}.npz file." + ) + + if len(all_filenames) > 3: + asset_store = DiskIOStore(_ASSETS_DIRNAME, archive=zf, mode="r") + else: + asset_store = None + + _load_state( + model, + weights_store=weights_store, + assets_store=asset_store, + inner_path="", + visited_trackables=set(), + ) + weights_store.close() + if asset_store: + asset_store.close() + + except Exception as e: + raise e + else: + return model + finally: + _SAVING_V3_ENABLED.value = saving_v3_enabled_value + + +def save_weights_only(model, filepath): + """Save only the weights of a model to a target filepath (.weights.h5). + + Note: only supports h5 for now. + """ + # TODO: if h5 filepath is remote, create the file in a temporary directory + # then upload it + + # API usage tracking for Keras V3 saving + keras_saving_gauge.get_cell("save_weights_v3").set(True) + + filepath = str(filepath) + if not filepath.endswith(".weights.h5"): + raise ValueError( + "Invalid `filepath` argument: expected a `.weights.h5` extension. " + f"Received: filepath={filepath}" + ) + weights_store = H5IOStore(filepath, mode="w") + _save_state( + model, + weights_store=weights_store, + assets_store=None, + inner_path="", + visited_trackables=set(), + ) + weights_store.close() + + +def load_weights_only(model, filepath, skip_mismatch=False): + """Load the weights of a model from a filepath (.keras or .weights.h5). + + Note: only supports h5 for now. + """ + temp_dir = None + archive = None + filepath = str(filepath) + if filepath.endswith(".weights.h5"): + # TODO: download file if h5 filepath is remote + weights_store = H5IOStore(filepath, mode="r") + elif filepath.endswith(".keras"): + archive = zipfile.ZipFile(filepath, "r") + weights_store = H5IOStore( + _VARS_FNAME + ".h5", archive=archive, mode="r" + ) + + _load_state( + model, + weights_store=weights_store, + assets_store=None, + inner_path="", + skip_mismatch=skip_mismatch, + visited_trackables=set(), + ) + weights_store.close() + if temp_dir and tf.io.gfile.exists(temp_dir): + tf.io.gfile.rmtree(temp_dir) + if archive: + archive.close() + + +def is_remote_path(filepath): + if re.match(r"^(/cns|/cfs|/gcs|.*://).*$", str(filepath)): + return True + return False + + +def _write_to_zip_recursively(zipfile_to_save, system_path, zip_path): + if not tf.io.gfile.isdir(system_path): + zipfile_to_save.write(system_path, zip_path) + else: + for file_name in tf.io.gfile.listdir(system_path): + system_file_path = tf.io.gfile.join(system_path, file_name) + zip_file_path = tf.io.gfile.join(zip_path, file_name) + _write_to_zip_recursively( + zipfile_to_save, system_file_path, zip_file_path + ) + + +def _walk_trackable(trackable): + for child_attr in dir(trackable): + if child_attr.startswith("__") or child_attr in ATTR_SKIPLIST: + continue + try: + child_obj = getattr(trackable, child_attr) + except Exception: + # Avoid raising the exception when visiting the attributes. + continue + yield child_attr, child_obj + + +def _save_state( + trackable, weights_store, assets_store, inner_path, visited_trackables +): + # If the trackable has already been saved, skip it. + if id(trackable) in visited_trackables: + return + + if hasattr(trackable, "save_own_variables") and weights_store: + trackable.save_own_variables(weights_store.make(inner_path)) + if hasattr(trackable, "save_assets") and assets_store: + trackable.save_assets(assets_store.make(inner_path)) + + visited_trackables.add(id(trackable)) + + # Recursively save state of children trackables (layers, optimizers, etc.) + for child_attr, child_obj in _walk_trackable(trackable): + if _is_keras_trackable(child_obj): + _save_state( + child_obj, + weights_store, + assets_store, + inner_path=tf.io.gfile.join(inner_path, child_attr), + visited_trackables=visited_trackables, + ) + elif isinstance(child_obj, (list, dict, tuple, set)): + _save_container_state( + child_obj, + weights_store, + assets_store, + inner_path=tf.io.gfile.join(inner_path, child_attr), + visited_trackables=visited_trackables, + ) + + +def _load_state( + trackable, + weights_store, + assets_store, + inner_path, + skip_mismatch=False, + visited_trackables=None, +): + if visited_trackables and id(trackable) in visited_trackables: + return + + if hasattr(trackable, "load_own_variables") and weights_store: + if skip_mismatch: + try: + trackable.load_own_variables(weights_store.get(inner_path)) + except Exception as e: + warnings.warn( + f"Could not load weights in object {trackable}. " + "Skipping object. " + f"Exception encountered: {e}", + stacklevel=2, + ) + else: + trackable.load_own_variables(weights_store.get(inner_path)) + + if hasattr(trackable, "load_assets") and assets_store: + if skip_mismatch: + try: + trackable.load_assets(assets_store.get(inner_path)) + except Exception as e: + warnings.warn( + f"Could not load assets in object {trackable}. " + "Skipping object. " + f"Exception encountered: {e}", + stacklevel=2, + ) + else: + trackable.load_assets(assets_store.get(inner_path)) + + if visited_trackables is not None: + visited_trackables.add(id(trackable)) + + # Recursively load states for Keras trackables such as layers/optimizers. + for child_attr, child_obj in _walk_trackable(trackable): + if _is_keras_trackable(child_obj): + _load_state( + child_obj, + weights_store, + assets_store, + inner_path=tf.io.gfile.join(inner_path, child_attr), + skip_mismatch=skip_mismatch, + visited_trackables=visited_trackables, + ) + elif isinstance(child_obj, (list, dict, tuple, set)): + _load_container_state( + child_obj, + weights_store, + assets_store, + inner_path=tf.io.gfile.join(inner_path, child_attr), + skip_mismatch=skip_mismatch, + visited_trackables=visited_trackables, + ) + + +def _save_container_state( + container, weights_store, assets_store, inner_path, visited_trackables +): + used_names = {} + if isinstance(container, dict): + container = list(container.values()) + + for trackable in container: + if _is_keras_trackable(trackable): + # Keeps layer name indexing in proper order + # when duplicate layers are in container. + if id(trackable) in visited_trackables: + continue + # Do NOT address the trackable via `trackable.name`, since + # names are usually autogenerated and thus not reproducible + # (i.e. they may vary across two instances of the same model). + name = generic_utils.to_snake_case(trackable.__class__.__name__) + if name in used_names: + used_names[name] += 1 + name = f"{name}_{used_names[name]}" + else: + used_names[name] = 0 + _save_state( + trackable, + weights_store, + assets_store, + inner_path=tf.io.gfile.join(inner_path, name), + visited_trackables=visited_trackables, + ) + + +def _load_container_state( + container, + weights_store, + assets_store, + inner_path, + skip_mismatch, + visited_trackables, +): + used_names = {} + if isinstance(container, dict): + container = list(container.values()) + + for trackable in container: + if _is_keras_trackable(trackable): + # Keeps layer name indexing in proper order + # when duplicate layers are in container. + if visited_trackables and id(trackable) in visited_trackables: + continue + # Do NOT address the trackable via `trackable.name`, since + # names are usually autogenerated and thus not reproducible + # (i.e. they may vary across two instances of the same model). + name = generic_utils.to_snake_case(trackable.__class__.__name__) + if name in used_names: + used_names[name] += 1 + name = f"{name}_{used_names[name]}" + else: + used_names[name] = 0 + _load_state( + trackable, + weights_store, + assets_store, + inner_path=tf.io.gfile.join(inner_path, name), + skip_mismatch=skip_mismatch, + visited_trackables=visited_trackables, + ) + + +class DiskIOStore: + """Asset store backed by disk storage. + + If `archive` is specified, then `root_path` refers to the filename + inside the archive. + + If `archive` is not specified, then `root_path` refers to the full path of + the target directory. + """ + + def __init__(self, root_path, archive=None, mode=None): + self.mode = mode + self.root_path = root_path + self.archive = archive + self.tmp_dir = None + if self.archive: + self.tmp_dir = get_temp_dir() + if self.mode == "r": + self.archive.extractall(path=self.tmp_dir) + self.working_dir = tf.io.gfile.join(self.tmp_dir, self.root_path) + if self.mode == "w": + tf.io.gfile.makedirs(self.working_dir) + else: + if mode == "r": + self.working_dir = root_path + else: + self.tmp_dir = get_temp_dir() + self.working_dir = tf.io.gfile.join( + self.tmp_dir, self.root_path + ) + tf.io.gfile.makedirs(self.working_dir) + + def make(self, path): + if not path: + return self.working_dir + path = tf.io.gfile.join(self.working_dir, path) + if not tf.io.gfile.exists(path): + tf.io.gfile.makedirs(path) + return path + + def get(self, path): + if not path: + return self.working_dir + path = tf.io.gfile.join(self.working_dir, path) + if tf.io.gfile.exists(path): + return path + return None + + def close(self): + if self.mode == "w" and self.archive: + _write_to_zip_recursively( + self.archive, self.working_dir, self.root_path + ) + if self.tmp_dir and tf.io.gfile.exists(self.tmp_dir): + tf.io.gfile.rmtree(self.tmp_dir) + + +class H5IOStore: + def __init__(self, root_path, archive=None, mode="r"): + """Numerical variable store backed by HDF5. + + If `archive` is specified, then `root_path` refers to the filename + inside the archive. + + If `archive` is not specified, then `root_path` refers to the path of + the h5 file on disk. + """ + self.root_path = root_path + self.mode = mode + self.archive = archive + self.io_file = None + + if self.archive: + if self.mode == "w": + self.io_file = io.BytesIO() + else: + self.io_file = self.archive.open(self.root_path, "r") + self.h5_file = h5py.File(self.io_file, mode=self.mode) + else: + self.h5_file = h5py.File(root_path, mode=self.mode) + + def make(self, path): + if not path: + return self.h5_file.create_group("vars") + return self.h5_file.create_group(path).create_group("vars") + + def get(self, path): + if not path: + return self.h5_file["vars"] + if path in self.h5_file and "vars" in self.h5_file[path]: + return self.h5_file[path]["vars"] + return {} + + def close(self): + self.h5_file.close() + if self.mode == "w" and self.archive: + self.archive.writestr(self.root_path, self.io_file.getvalue()) + if self.io_file: + self.io_file.close() + + +class NpzIOStore: + def __init__(self, root_path, archive=None, mode="r"): + """Numerical variable store backed by NumPy.savez/load. + + If `archive` is specified, then `root_path` refers to the filename + inside the archive. + + If `archive` is not specified, then `root_path` refers to the path of + the npz file on disk. + """ + self.root_path = root_path + self.mode = mode + self.archive = archive + if mode == "w": + self.contents = {} + else: + if self.archive: + self.f = archive.open(root_path, mode="r") + else: + self.f = open(root_path, mode="rb") + self.contents = np.load(self.f, allow_pickle=True) + + def make(self, path): + if not path: + self.contents["__root__"] = {} + return self.contents["__root__"] + self.contents[path] = {} + return self.contents[path] + + def get(self, path): + if not path: + if "__root__" in self.contents: + return dict(self.contents["__root__"]) + return {} + if path in self.contents: + return self.contents[path].tolist() + return {} + + def close(self): + if self.mode == "w": + if self.archive: + self.f = self.archive.open( + self.root_path, mode="w", force_zip64=True + ) + else: + self.f = open(self.root_path, mode="wb") + np.savez(self.f, **self.contents) + self.f.close() + + +def get_temp_dir(): + temp_dir = tempfile.mkdtemp() + testfile = tempfile.TemporaryFile(dir=temp_dir) + testfile.close() + return temp_dir + + +def _is_keras_trackable(obj): + from keras.metrics import base_metric # To avoid circular import + + return isinstance( + obj, + ( + base_layer.Layer, + optimizer.Optimizer, + base_metric.Metric, + losses.Loss, + ), + ) + + +def saving_v3_enabled(): + return getattr(_SAVING_V3_ENABLED, "value", True) + + +# Some debugging utilities. + + +def _print_h5_file(h5_file, prefix="", action=None): + if not prefix: + print(f"Keras weights file ({h5_file}) {action}:") + if not hasattr(h5_file, "keys"): + return + for key in h5_file.keys(): + print(f"...{prefix}{key}") + _print_h5_file(h5_file[key], prefix=prefix + "...") + + +def _print_zip_file(zipfile, action): + io_utils.print_msg(f"Keras model archive {action}:") + # Same as `ZipFile.printdir()` except for using Keras' printing utility. + io_utils.print_msg( + "%-46s %19s %12s" % ("File Name", "Modified ", "Size") + ) + for zinfo in zipfile.filelist: + date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] + io_utils.print_msg( + "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) + ) diff --git a/keras/saving/saving_lib_test.py b/keras/saving/saving_lib_test.py new file mode 100644 index 000000000000..d13c3457a59f --- /dev/null +++ b/keras/saving/saving_lib_test.py @@ -0,0 +1,886 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras python-based idempotent saving functions.""" +import os +import sys +import zipfile +from pathlib import Path +from unittest import mock + +import h5py +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized +from tensorflow.python.platform import tf_logging as logging + +import keras +from keras import backend +from keras.optimizers import adam +from keras.saving import object_registration +from keras.saving import saving_lib +from keras.saving.legacy.saved_model import json_utils +from keras.testing_infra import test_utils +from keras.utils import io_utils + +train_step_message = "This is my training step" +assets_data = "These are my assets" +variables_data = np.random.random((10,)) + + +@keras.utils.register_keras_serializable(package="my_custom_package") +class MyDense(keras.layers.Dense): + def build(self, input_shape): + self.additional_weights = [ + self.add_weight( + "my_additional_weight", + initializer="ones", + trainable=True, + ), + self.add_weight( + "my_additional_weight_2", + initializer="ones", + trainable=True, + ), + ] + self.weights_in_dict = { + "my_weight": self.add_weight( + "my_dict_weight", + initializer="ones", + trainable=True, + ), + } + self.nested_layer = keras.layers.Dense(1) + return super().build(input_shape) + + def call(self, inputs): + call_result = super().call(inputs) + return self.nested_layer(call_result) + + def two(self): + return 2 + + +@keras.utils.register_keras_serializable(package="my_custom_package") +class LayerWithCustomSaving(MyDense): + def build(self, input_shape): + self.assets = assets_data + self.stored_variables = variables_data + return super().build(input_shape) + + def save_assets(self, inner_path): + with open(os.path.join(inner_path, "assets.txt"), "w") as f: + f.write(self.assets) + + def save_own_variables(self, store): + store["variables"] = self.stored_variables + + def load_assets(self, inner_path): + with open(os.path.join(inner_path, "assets.txt"), "r") as f: + text = f.read() + self.assets = text + + def load_own_variables(self, store): + self.stored_variables = np.array(store["variables"]) + + +@keras.utils.register_keras_serializable(package="my_custom_package") +class CustomModelX(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dense1 = MyDense(1) + self.dense2 = MyDense(1) + + def call(self, inputs): + out = self.dense1(inputs) + return self.dense2(out) + + def train_step(self, data): + tf.print(train_step_message) + x, y = data + with tf.GradientTape() as tape: + y_pred = self(x) + loss = self.compiled_loss(y, y_pred) + + gradients = tape.gradient(loss, self.trainable_variables) + self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) + return {} + + def one(self): + return 1 + + +@keras.utils.register_keras_serializable(package="my_custom_package") +class ModelWithCustomSaving(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.custom_dense = LayerWithCustomSaving(1) + + def call(self, inputs): + return self.custom_dense(inputs) + + +@keras.utils.register_keras_serializable(package="my_custom_package") +class CompileOverridingModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dense1 = MyDense(1) + + def compile(self, *args, **kwargs): + super().compile(*args, **kwargs) + + def call(self, inputs): + return self.dense1(inputs) + + +@keras.utils.register_keras_serializable(package="my_custom_package") +class CompileOverridingSequential(keras.Sequential): + def compile(self, *args, **kwargs): + super().compile(*args, **kwargs) + + +@keras.utils.register_keras_serializable(package="my_custom_package") +def my_mean_squared_error(y_true, y_pred): + """Identical to built-in `mean_squared_error`, added here as a custom + + func. + """ + return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) + + +module_my_mean_squared_error = my_mean_squared_error + + +@test_utils.run_v2_only +class SavingV3Test(tf.test.TestCase, parameterized.TestCase): + def _get_subclassed_model(self): + subclassed_model = CustomModelX() + subclassed_model.compile( + optimizer=adam.Adam(), + loss=[ + "mse", + keras.losses.mean_squared_error, + keras.losses.MeanSquaredError(), + my_mean_squared_error, + ], + ) + return subclassed_model + + def _get_sequential_model(self): + sequential_model = keras.Sequential([MyDense(1), MyDense(1)]) + sequential_model.compile( + optimizer="adam", loss=["mse", keras.losses.mean_squared_error] + ) + return sequential_model + + def _get_functional_model(self): + inputs = keras.Input(shape=(32,)) + x = MyDense(1, name="first_dense")(inputs) + outputs = MyDense(1, name="second_dense")(x) + functional_model = keras.Model(inputs, outputs) + functional_model.compile( + optimizer="adam", loss=["mse", keras.losses.mean_squared_error] + ) + return functional_model + + def test_saving_after_compile_but_before_fit(self): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + subclassed_model = self._get_subclassed_model() + subclassed_model._save_experimental(temp_filepath) + + # This is so that we can register another function with the same custom + # object key, and make sure the newly registered function is used while + # loading. + del object_registration._GLOBAL_CUSTOM_OBJECTS[ + "my_custom_package>my_mean_squared_error" + ] + + @keras.utils.register_keras_serializable(package="my_custom_package") + def my_mean_squared_error(y_true, y_pred): + """Function-local `mean_squared_error`.""" + return backend.mean( + tf.math.squared_difference(y_pred, y_true), axis=-1 + ) + + loaded_model = saving_lib.load_model(temp_filepath) + self.assertEqual( + subclassed_model._is_compiled, loaded_model._is_compiled + ) + + # Everything should be the same class or function for the original model + # and the loaded model. + for model in [subclassed_model, loaded_model]: + self.assertIs( + model.optimizer.__class__, + adam.Adam, + ) + self.assertIs( + model.compiled_loss.__class__, + keras.engine.compile_utils.LossesContainer, + ) + self.assertEqual(model.compiled_loss._losses[0], "mse") + self.assertIs( + model.compiled_loss._losses[1], keras.losses.mean_squared_error + ) + self.assertIs( + model.compiled_loss._losses[2].__class__, + keras.losses.MeanSquaredError, + ) + self.assertIs( + model.compiled_loss._total_loss_mean.__class__, + keras.metrics.base_metric.Mean, + ) + + # Except for a custom function used because the loaded model is supposed + # to be using the newly registered custom function. + self.assertIs( + subclassed_model.compiled_loss._losses[3], + module_my_mean_squared_error, + ) + self.assertIs( + loaded_model.compiled_loss._losses[3], my_mean_squared_error + ) + self.assertIsNot(module_my_mean_squared_error, my_mean_squared_error) + + def test_saving_after_fit(self): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + subclassed_model = self._get_subclassed_model() + + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + subclassed_model.fit(x, y, epochs=1) + subclassed_model._save_experimental(temp_filepath) + loaded_model = saving_lib.load_model(temp_filepath) + self.assertEqual( + subclassed_model._is_compiled, loaded_model._is_compiled + ) + + io_utils.enable_interactive_logging() + # `tf.print` writes to stderr. This is to make sure the custom training + # step is used. + with self.captureWritesToStream(sys.stderr) as printed: + loaded_model.fit(x, y, epochs=1) + self.assertRegex(printed.contents(), train_step_message) + + # Check that the custom classes do get used. + self.assertIsInstance(loaded_model, CustomModelX) + self.assertIsInstance(loaded_model.dense1, MyDense) + # Check that the custom method is available. + self.assertEqual(loaded_model.one(), 1) + self.assertEqual(loaded_model.dense1.two(), 2) + + # Everything should be the same class or function for the original model + # and the loaded model. + for model in [subclassed_model, loaded_model]: + self.assertIs( + model.optimizer.__class__, + adam.Adam, + ) + self.assertIs( + model.compiled_loss.__class__, + keras.engine.compile_utils.LossesContainer, + ) + self.assertIs( + model.compiled_loss._losses[0].__class__, + keras.losses.LossFunctionWrapper, + ) + self.assertIs( + model.compiled_loss._losses[1].__class__, + keras.losses.LossFunctionWrapper, + ) + self.assertIs( + model.compiled_loss._losses[2].__class__, + keras.losses.MeanSquaredError, + ) + self.assertIs( + model.compiled_loss._losses[3].__class__, + keras.losses.LossFunctionWrapper, + ) + self.assertIs( + model.compiled_loss._total_loss_mean.__class__, + keras.metrics.base_metric.Mean, + ) + + def test_saving_preserve_unbuilt_state(self): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + subclassed_model = CustomModelX() + subclassed_model._save_experimental(temp_filepath) + loaded_model = saving_lib.load_model(temp_filepath) + self.assertEqual( + subclassed_model._is_compiled, loaded_model._is_compiled + ) + self.assertFalse(subclassed_model.built) + self.assertFalse(loaded_model.built) + + def test_saving_preserve_built_state(self): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + model = self._get_subclassed_model() + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + model.fit(x, y, epochs=1) + model._save_experimental(temp_filepath) + loaded_model = saving_lib.load_model(temp_filepath) + self.assertEqual(model._is_compiled, loaded_model._is_compiled) + self.assertTrue(model.built) + self.assertTrue(loaded_model.built) + self.assertEqual( + model._build_input_shape, loaded_model._build_input_shape + ) + self.assertEqual( + tf.TensorShape([None, 32]), loaded_model._build_input_shape + ) + + def test_saved_module_paths_and_class_names(self): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + subclassed_model = self._get_subclassed_model() + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + subclassed_model.fit(x, y, epochs=1) + subclassed_model._save_experimental(temp_filepath) + + with zipfile.ZipFile(temp_filepath, "r") as z: + with z.open(saving_lib._CONFIG_FILENAME, "r") as c: + config_json = c.read() + config_dict = json_utils.decode(config_json) + self.assertEqual( + config_dict["registered_name"], "my_custom_package>CustomModelX" + ) + self.assertEqual( + config_dict["compile_config"]["optimizer"]["config"][ + "is_legacy_optimizer" + ], + False, + ) + self.assertEqual( + config_dict["compile_config"]["optimizer"]["class_name"], + "Adam", + ) + self.assertLen(config_dict["compile_config"]["loss"], 4) + self.assertEqual( + config_dict["compile_config"]["loss"][0], + "mse", + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + layer=["tf_op_lambda", "lambda"], + ) + ) + def test_functional_model_with_tf_op_lambda_layer(self, layer): + class ToString: + def __init__(self): + self.contents = "" + + def __call__(self, msg): + self.contents += msg + "\n" + + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + + if layer == "lambda": + func = tf.function(lambda x: tf.math.cos(x) + tf.math.sin(x)) + inputs = keras.layers.Input(shape=(32,)) + outputs = keras.layers.Dense(1)(inputs) + outputs = keras.layers.Lambda(func._python_function)(outputs) + + elif layer == "tf_op_lambda": + inputs = keras.layers.Input(shape=(32,)) + outputs = keras.layers.Dense(1)(inputs) + outputs = outputs + inputs + + functional_model = keras.Model(inputs, outputs) + functional_to_string = ToString() + functional_model.summary(print_fn=functional_to_string) + functional_model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + + x = np.random.random((1000, 32)) + y = np.random.random((1000, 1)) + functional_model.fit(x, y, epochs=3) + functional_model._save_experimental(temp_filepath) + loaded_model = saving_lib.load_model(temp_filepath, safe_mode=False) + self.assertEqual( + functional_model._is_compiled, loaded_model._is_compiled + ) + + loaded_model.fit(x, y, epochs=3) + loaded_to_string = ToString() + loaded_model.summary(print_fn=loaded_to_string) + + # Confirming the original and saved/loaded model have same structure. + self.assertEqual( + functional_to_string.contents, loaded_to_string.contents + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + model_type=["sequential", "functional", "subclassed"], + ) + ) + def test_saving_model_state(self, model_type): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + model = getattr(self, f"_get_{model_type}_model")() + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + model.fit(x, y, epochs=1) + + # Assert that the archive has not been saved. + self.assertFalse(os.path.exists(temp_filepath)) + + # Mutate the `Dense` layer custom weights to ensure that list and + # dict-contained weights get restored. + model.layers[1].additional_weights[0].assign(2) + model.layers[1].weights_in_dict["my_weight"].assign(2) + model.layers[1].nested_layer.kernel.assign([[1]]) + + model._save_experimental(temp_filepath) + + # Assert that the archive has been saved. + self.assertTrue(os.path.exists(temp_filepath)) + loaded_model = saving_lib.load_model(temp_filepath) + self.assertEqual(model._is_compiled, loaded_model._is_compiled) + + # The weights are supposed to be the same (between original and loaded + # models). + for original_weights, loaded_weights in zip( + model.get_weights(), loaded_model.get_weights() + ): + np.testing.assert_allclose(original_weights, loaded_weights) + + # The optimizer variables are supposed to be the same (between original + # and loaded models). + for original_weights, loaded_weights in zip( + model.optimizer.variables, loaded_model.optimizer.variables + ): + np.testing.assert_allclose(original_weights, loaded_weights) + + def test_saving_custom_assets_and_variables(self): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + model = ModelWithCustomSaving() + model.compile( + optimizer=adam.Adam(), + loss=[ + "mse", + keras.losses.mean_squared_error, + keras.losses.MeanSquaredError(), + my_mean_squared_error, + ], + ) + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + model.fit(x, y, epochs=1) + + # Assert that the archive has not been saved. + self.assertFalse(os.path.exists(temp_filepath)) + + model._save_experimental(temp_filepath) + + loaded_model = saving_lib.load_model(temp_filepath) + self.assertEqual(loaded_model.custom_dense.assets, assets_data) + self.assertEqual( + loaded_model.custom_dense.stored_variables.tolist(), + variables_data.tolist(), + ) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + model_type=["subclassed", "sequential"], + ) + ) + def test_compile_overridden_model_raises_if_no_from_config_overridden( + self, model_type + ): + temp_filepath = os.path.join(self.get_temp_dir(), "my_model.keras") + model = ( + CompileOverridingModel() + if model_type == "subclassed" + else CompileOverridingSequential( + [keras.layers.Embedding(4, 1), MyDense(1), MyDense(1)] + ) + ) + model.compile("rmsprop", "mse") + model._save_experimental(temp_filepath) + + with mock.patch.object(logging, "warning") as mock_warn: + saving_lib.load_model(temp_filepath) + if not mock_warn.call_args_list: + raise AssertionError("Did not warn.") + self.assertIn( + "`compile()` was not called as part of model loading " + "because the model's `compile()` method is custom. ", + mock_warn.call_args_list[0][0][0], + ) + + def test_metadata(self): + temp_filepath = Path( + os.path.join(self.get_temp_dir(), "my_model.keras") + ) + model = CompileOverridingModel() + model._save_experimental(temp_filepath) + with zipfile.ZipFile(temp_filepath, "r") as z: + with z.open(saving_lib._METADATA_FILENAME, "r") as c: + metadata_json = c.read() + metadata = json_utils.decode(metadata_json) + self.assertIn("keras_version", metadata) + self.assertIn("date_saved", metadata) + + def test_gfile_copy_local_called(self): + temp_filepath = Path( + os.path.join(self.get_temp_dir(), "my_model.keras") + ) + model = CompileOverridingModel() + with mock.patch("re.match", autospec=True) as mock_re_match, mock.patch( + "tensorflow.compat.v2.io.gfile.copy", autospec=True + ) as mock_copy: + # Mock Remote Path check to true to test gfile copy logic + mock_re_match.return_value = True + model._save_experimental(temp_filepath) + mock_re_match.assert_called() + mock_copy.assert_called() + self.assertIn(str(temp_filepath), mock_re_match.call_args.args) + self.assertIn(str(temp_filepath), mock_copy.call_args.args) + + def test_load_model_api_endpoint(self): + temp_filepath = Path(os.path.join(self.get_temp_dir(), "mymodel.keras")) + model = self._get_functional_model() + ref_input = np.random.random((10, 32)) + ref_output = model.predict(ref_input) + model.save(temp_filepath, save_format="keras_v3") + model = keras.models.load_model(temp_filepath) + self.assertAllClose(model.predict(ref_input), ref_output, atol=1e-6) + + def test_save_load_weights_only(self): + temp_filepath = Path( + os.path.join(self.get_temp_dir(), "mymodel.weights.h5") + ) + model = self._get_functional_model() + ref_input = np.random.random((10, 32)) + ref_output = model.predict(ref_input) + saving_lib.save_weights_only(model, temp_filepath) + model = self._get_functional_model() + saving_lib.load_weights_only(model, temp_filepath) + self.assertAllClose(model.predict(ref_input), ref_output, atol=1e-6) + # Test with Model method + model = self._get_functional_model() + model.load_weights(temp_filepath) + self.assertAllClose(model.predict(ref_input), ref_output, atol=1e-6) + + def test_load_weights_only_with_keras_file(self): + # Test loading weights from whole saved model + temp_filepath = Path(os.path.join(self.get_temp_dir(), "mymodel.keras")) + model = self._get_functional_model() + ref_input = np.random.random((10, 32)) + ref_output = model.predict(ref_input) + saving_lib.save_model(model, temp_filepath) + model = self._get_functional_model() + saving_lib.load_weights_only(model, temp_filepath) + self.assertAllClose(model.predict(ref_input), ref_output, atol=1e-6) + # Test with Model method + model = self._get_functional_model() + model.load_weights(temp_filepath) + self.assertAllClose(model.predict(ref_input), ref_output, atol=1e-6) + + def test_compile_arg(self): + temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.keras") + model = self._get_functional_model() + model.compile("rmsprop", "mse") + model.fit(np.random.random((10, 32)), np.random.random((10, 1))) + saving_lib.save_model(model, temp_filepath) + + model = saving_lib.load_model(temp_filepath) + self.assertEqual(model._is_compiled, True) + model = saving_lib.load_model(temp_filepath, compile=False) + self.assertEqual(model._is_compiled, False) + + def test_overwrite(self): + temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.keras") + model = self._get_functional_model() + model.save(temp_filepath, save_format="keras_v3") + model.save(temp_filepath, save_format="keras_v3", overwrite=True) + with self.assertRaises(EOFError): + model.save(temp_filepath, save_format="keras_v3", overwrite=False) + + temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.weights.h5") + model = self._get_functional_model() + model.save_weights(temp_filepath) + model.save_weights(temp_filepath, overwrite=True) + with self.assertRaises(EOFError): + model.save_weights(temp_filepath, overwrite=False) + + def test_partial_load(self): + temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.keras") + original_model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Dense(4), + keras.layers.Dense(5), + ] + ) + original_model.save(temp_filepath, save_format="keras_v3") + + # Test with a model that has a differently shaped layer + new_model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Dense(4), + keras.layers.Dense(6), + ] + ) + new_layer_kernel_value = new_model.layers[1].kernel.numpy() + with self.assertRaisesRegex(ValueError, "Shape mismatch"): + # Doesn't work by default + new_model.load_weights(temp_filepath) + # Now it works + new_model.load_weights(temp_filepath, skip_mismatch=True) + self.assertAllClose( + original_model.layers[0].get_weights(), + new_model.layers[0].get_weights(), + ) + self.assertAllClose( + new_model.layers[1].kernel.numpy(), new_layer_kernel_value + ) + + # Test with a model that has a new layer + new_model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Dense(4), + keras.layers.Dense(5), + keras.layers.Dense(5), + ] + ) + new_layer_kernel_value = new_model.layers[2].kernel.numpy() + with self.assertRaisesRegex(ValueError, "received 0 variables"): + # Doesn't work by default + new_model.load_weights(temp_filepath) + # Now it works + new_model.load_weights(temp_filepath, skip_mismatch=True) + self.assertAllClose( + original_model.layers[0].get_weights(), + new_model.layers[0].get_weights(), + ) + self.assertAllClose( + original_model.layers[1].get_weights(), + new_model.layers[1].get_weights(), + ) + self.assertAllClose( + new_model.layers[2].kernel.numpy(), new_layer_kernel_value + ) + + def test_api_errors(self): + temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.notkeras") + model = self._get_functional_model() + with self.assertRaisesRegex(ValueError, "Unknown `save_format`"): + model.save(temp_filepath, save_format="invalid") + with self.assertRaisesRegex(ValueError, "Invalid `filepath` argument"): + model.save(temp_filepath, save_format="keras_v3") + + temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.keras") + with self.assertRaisesRegex(ValueError, "not supported"): + model.save( + temp_filepath, include_optimizer=False, save_format="keras_v3" + ) + + def test_safe_mode(self): + temp_filepath = os.path.join(self.get_temp_dir(), "unsafe_model.keras") + model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Lambda(lambda x: x * 2), + ] + ) + model.save(temp_filepath, save_format="keras_v3") + with self.assertRaisesRegex(ValueError, "arbitrary code execution"): + model = saving_lib.load_model(temp_filepath) + model = saving_lib.load_model(temp_filepath, safe_mode=False) + + def test_normalization_kpl(self): + # With adapt + temp_filepath = os.path.join(self.get_temp_dir(), "norm_model.keras") + model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Normalization(), + ] + ) + data = np.random.random((3, 3)) + model.layers[0].adapt(data) + ref_out = model(data) + model.save(temp_filepath, save_format="keras_v3") + model = saving_lib.load_model(temp_filepath) + out = model(data) + self.assertAllClose(ref_out, out, atol=1e-6) + + # Without adapt + model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Normalization( + mean=np.random.random((3,)), variance=np.random.random((3,)) + ), + ] + ) + ref_out = model(data) + model.save(temp_filepath, save_format="keras_v3") + model = saving_lib.load_model(temp_filepath) + out = model(data) + self.assertAllClose(ref_out, out, atol=1e-6) + + def test_layer_index_naming(self): + weights_filepath = os.path.join(self.get_temp_dir(), "model.weights.h5") + model = keras.Sequential( + [ + keras.layers.Dense(10), + keras.layers.Dense(10), + keras.layers.Dense(10), + keras.layers.Dense(10), + ] + ) + model.build([1, 20]) + model.save_weights(weights_filepath) + with h5py.File(weights_filepath, "r") as f: + self.assertAllEqual( + list(f["_layer_checkpoint_dependencies"].keys()), + ["dense", "dense_1", "dense_2", "dense_3"], + ) + + +# This custom class lacks custom object registration. +class CustomRNN(keras.layers.Layer): + def __init__(self, units): + super(CustomRNN, self).__init__() + self.units = units + self.projection_1 = keras.layers.Dense(units=units, activation="tanh") + self.projection_2 = keras.layers.Dense(units=units, activation="tanh") + self.classifier = keras.layers.Dense(1) + + def call(self, inputs): + outputs = [] + state = tf.zeros(shape=(inputs.shape[0], self.units)) + for t in range(inputs.shape[1]): + x = inputs[:, t, :] + h = self.projection_1(x) + y = h + self.projection_2(state) + state = y + outputs.append(y) + features = tf.stack(outputs, axis=1) + return self.classifier(features) + + +# This class is properly registered with a `get_config()` method. +# However, since it does not subclass keras.layers.Layer, it lacks +# `from_config()` for deserialization. +@keras.utils.register_keras_serializable() +class GrowthFactor: + def __init__(self, factor): + self.factor = factor + + def __call__(self, inputs): + return inputs * self.factor + + def get_config(self): + return {"factor": self.factor} + + +@keras.utils.register_keras_serializable(package="Complex") +class FactorLayer(keras.layers.Layer): + def __init__(self, factor): + super().__init__() + self.factor = factor + + def call(self, x): + return x * self.factor + + def get_config(self): + return {"factor": self.factor} + + +# This custom model does not explicitly deserialize the layers it includes +# in its `get_config`. Explicit deserialization in a `from_config` override +# or `__init__` is needed here, or an error will be thrown at loading time. +@keras.utils.register_keras_serializable(package="Complex") +class ComplexModel(keras.layers.Layer): + def __init__(self, first_layer, second_layer=None, **kwargs): + super().__init__(**kwargs) + self.first_layer = first_layer + if second_layer is not None: + self.second_layer = second_layer + else: + self.second_layer = keras.layers.Dense(8) + + def get_config(self): + config = super().get_config() + config.update( + { + "first_layer": self.first_layer, + "second_layer": self.second_layer, + } + ) + return config + + def call(self, inputs): + return self.first_layer(self.second_layer(inputs)) + + +@test_utils.run_v2_only +class SavingV3BattleTest(tf.test.TestCase, parameterized.TestCase): + def test_custom_model_without_registration_error(self): + temp_filepath = os.path.join( + self.get_temp_dir(), "my_custom_model.keras" + ) + timesteps = 10 + input_dim = 5 + batch_size = 16 + + inputs = keras.Input(batch_shape=(batch_size, timesteps, input_dim)) + x = keras.layers.Conv1D(32, 3)(inputs) + outputs = CustomRNN(32)(x) + + model = keras.Model(inputs, outputs) + + with self.assertRaisesRegex( + TypeError, "is a custom class, please register it" + ): + model.save(temp_filepath) + _ = keras.models.load_model(temp_filepath) + + def test_custom_object_without_from_config(self): + temp_filepath = os.path.join( + self.get_temp_dir(), "custom_fn_model.keras" + ) + + inputs = keras.Input(shape=(4, 4)) + outputs = keras.layers.Dense(1, activation=GrowthFactor(0.5))(inputs) + model = keras.Model(inputs, outputs) + + model.save(temp_filepath) + + with self.assertRaisesRegex( + TypeError, "Unable to reconstruct an instance" + ): + _ = keras.models.load_model(temp_filepath) + + def test_complex_model_without_explicit_deserialization(self): + temp_filepath = os.path.join(self.get_temp_dir(), "complex_model.keras") + + inputs = keras.Input((32,)) + outputs = ComplexModel(first_layer=FactorLayer(0.5))(inputs) + model = keras.Model(inputs, outputs) + + model.save(temp_filepath) + + with self.assertRaisesRegex(TypeError, "are explicitly deserialized"): + _ = keras.models.load_model(temp_filepath) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/saving_utils.py b/keras/saving/saving_utils.py deleted file mode 100644 index 9dd5e4290698..000000000000 --- a/keras/saving/saving_utils.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils related to keras model saving.""" - -# pylint: disable=g-bad-import-order, g-direct-tensorflow-import -import tensorflow.compat.v2 as tf -import keras - -import copy -import os -from keras import backend -from keras import losses -from keras.optimizers import optimizer_v1 -from keras import optimizers -from keras.engine import base_layer_utils -from keras.utils import generic_utils -from keras.utils import version_utils -from keras.utils.io_utils import ask_to_proceed_with_overwrite -from tensorflow.python.platform import tf_logging as logging -# pylint: enable=g-bad-import-order, g-direct-tensorflow-import - - -def extract_model_metrics(model): - """Convert metrics from a Keras model `compile` API to dictionary. - - This is used for converting Keras models to Estimators and SavedModels. - - Args: - model: A `tf.keras.Model` object. - - Returns: - Dictionary mapping metric names to metric instances. May return `None` if - the model does not contain any metrics. - """ - if getattr(model, '_compile_metrics', None): - # TODO(psv/kathywu): use this implementation in model to estimator flow. - # We are not using model.metrics here because we want to exclude the metrics - # added using `add_metric` API. - return {m.name: m for m in model._compile_metric_functions} # pylint: disable=protected-access - return None - - -def model_call_inputs(model, keep_original_batch_size=False): - """Inspect model to get its input signature. - - The model's input signature is a list with a single (possibly-nested) object. - This is due to the Keras-enforced restriction that tensor inputs must be - passed in as the first argument. - - For example, a model with input {'feature1': , 'feature2': } - will have input signature: [{'feature1': TensorSpec, 'feature2': TensorSpec}] - - Args: - model: Keras Model object. - keep_original_batch_size: A boolean indicating whether we want to keep using - the original batch size or set it to None. Default is `False`, which means - that the batch dim of the returned input signature will always be set to - `None`. - - Returns: - A tuple containing `(args, kwargs)` TensorSpecs of the model call function - inputs. - `kwargs` does not contain the `training` argument. - """ - input_specs = model.save_spec(dynamic_batch=not keep_original_batch_size) - if input_specs is None: - return None, None - input_specs = _enforce_names_consistency(input_specs) - return input_specs - - -def raise_model_input_error(model): - if isinstance(model, keras.models.Sequential): - raise ValueError( - f'Model {model} cannot be saved because the input shape is not ' - 'available. Please specify an input shape either by calling ' - '`build(input_shape)` directly, or by calling the model on actual ' - 'data using `Model()`, `Model.fit()`, or `Model.predict()`.') - - # If the model is not a `Sequential`, it is intended to be a subclassed model. - raise ValueError( - f'Model {model} cannot be saved either because the input shape is not ' - 'available or because the forward pass of the model is not defined.' - 'To define a forward pass, please override `Model.call()`. To specify ' - 'an input shape, either call `build(input_shape)` directly, or call ' - 'the model on actual data using `Model()`, `Model.fit()`, or ' - '`Model.predict()`. If you have a custom training step, please make ' - 'sure to invoke the forward pass in train step through ' - '`Model.__call__`, i.e. `model(inputs)`, as opposed to `model.call()`.') - - -def trace_model_call(model, input_signature=None): - """Trace the model call to create a tf.function for exporting a Keras model. - - Args: - model: A Keras model. - input_signature: optional, a list of tf.TensorSpec objects specifying the - inputs to the model. - - Returns: - A tf.function wrapping the model's call function with input signatures set. - - Raises: - ValueError: if input signature cannot be inferred from the model. - """ - if input_signature is None: - if isinstance(model.call, tf.__internal__.function.Function): - input_signature = model.call.input_signature - - if input_signature: - model_args = input_signature - model_kwargs = {} - else: - model_args, model_kwargs = model_call_inputs(model) - - if model_args is None: - raise_model_input_error(model) - - @tf.function - def _wrapped_model(*args, **kwargs): - """A concrete tf.function that wraps the model's call function.""" - args, kwargs = model._call_spec.set_arg_value( # pylint: disable=protected-access - 'training', False, args, kwargs, inputs_in_args=True) - - with base_layer_utils.call_context().enter( - model, inputs=None, build_graph=False, training=False, saving=True): - outputs = model(*args, **kwargs) - - # Outputs always has to be a flat dict. - output_names = model.output_names # Functional Model. - if output_names is None: # Subclassed Model. - from keras.engine import compile_utils # pylint: disable=g-import-not-at-top - output_names = compile_utils.create_pseudo_output_names(outputs) - outputs = tf.nest.flatten(outputs) - return {name: output for name, output in zip(output_names, outputs)} - - return _wrapped_model.get_concrete_function(*model_args, **model_kwargs) - - -def model_metadata(model, include_optimizer=True, require_config=True): - """Returns a dictionary containing the model metadata.""" - from keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - from keras.optimizers.optimizer_v2 import optimizer_v2 # pylint: disable=g-import-not-at-top - - model_config = {'class_name': model.__class__.__name__} - try: - model_config['config'] = model.get_config() - except NotImplementedError as e: - if require_config: - raise e - - metadata = dict( - keras_version=str(keras_version), - backend=backend.backend(), - model_config=model_config) - if model.optimizer and include_optimizer: - if isinstance(model.optimizer, optimizer_v1.TFOptimizer): - logging.warning( - 'TensorFlow optimizers do not ' - 'make it possible to access ' - 'optimizer attributes or optimizer state ' - 'after instantiation. ' - 'As a result, we cannot save the optimizer ' - 'as part of the model save file. ' - 'You will have to compile your model again after loading it. ' - 'Prefer using a Keras optimizer instead ' - '(see keras.io/optimizers).') - elif model._compile_was_called: # pylint: disable=protected-access - training_config = model._get_compile_args(user_metrics=False) # pylint: disable=protected-access - training_config.pop('optimizer', None) # Handled separately. - metadata['training_config'] = _serialize_nested_config(training_config) - if isinstance(model.optimizer, optimizer_v2.RestoredOptimizer): - raise NotImplementedError( - 'Optimizers loaded from a SavedModel cannot be saved. ' - 'If you are calling `model.save` or `tf.keras.models.save_model`, ' - 'please set the `include_optimizer` option to `False`. For ' - '`tf.saved_model.save`, delete the optimizer from the model.') - else: - optimizer_config = { - 'class_name': - generic_utils.get_registered_name(model.optimizer.__class__), - 'config': - model.optimizer.get_config() - } - metadata['training_config']['optimizer_config'] = optimizer_config - return metadata - - -def should_overwrite(filepath, overwrite): - """Returns whether the filepath should be overwritten.""" - # If file exists and should not be overwritten. - if not overwrite and os.path.isfile(filepath): - return ask_to_proceed_with_overwrite(filepath) - return True - - -def compile_args_from_training_config(training_config, custom_objects=None): - """Return model.compile arguments from training config.""" - if custom_objects is None: - custom_objects = {} - - with generic_utils.CustomObjectScope(custom_objects): - optimizer_config = training_config['optimizer_config'] - optimizer = optimizers.deserialize(optimizer_config) - - # Recover losses. - loss = None - loss_config = training_config.get('loss', None) - if loss_config is not None: - loss = _deserialize_nested_config(losses.deserialize, loss_config) - - # Recover metrics. - metrics = None - metrics_config = training_config.get('metrics', None) - if metrics_config is not None: - metrics = _deserialize_nested_config(_deserialize_metric, metrics_config) - - # Recover weighted metrics. - weighted_metrics = None - weighted_metrics_config = training_config.get('weighted_metrics', None) - if weighted_metrics_config is not None: - weighted_metrics = _deserialize_nested_config(_deserialize_metric, - weighted_metrics_config) - - sample_weight_mode = training_config['sample_weight_mode'] if hasattr( - training_config, 'sample_weight_mode') else None - loss_weights = training_config['loss_weights'] - - return dict( - optimizer=optimizer, - loss=loss, - metrics=metrics, - weighted_metrics=weighted_metrics, - loss_weights=loss_weights, - sample_weight_mode=sample_weight_mode) - - -def _deserialize_nested_config(deserialize_fn, config): - """Deserializes arbitrary Keras `config` using `deserialize_fn`.""" - - def _is_single_object(obj): - if isinstance(obj, dict) and 'class_name' in obj: - return True # Serialized Keras object. - if isinstance(obj, str): - return True # Serialized function or string. - return False - - if config is None: - return None - if _is_single_object(config): - return deserialize_fn(config) - elif isinstance(config, dict): - return { - k: _deserialize_nested_config(deserialize_fn, v) - for k, v in config.items() - } - elif isinstance(config, (tuple, list)): - return [_deserialize_nested_config(deserialize_fn, obj) for obj in config] - - raise ValueError( - 'Saved configuration not understood. Configuration should be a ' - f'dictionary, string, tuple or list. Received: config={config}.') - - -def _serialize_nested_config(config): - """Serialized a nested structure of Keras objects.""" - - def _serialize_fn(obj): - if callable(obj): - return generic_utils.serialize_keras_object(obj) - return obj - - return tf.nest.map_structure(_serialize_fn, config) - - -def _deserialize_metric(metric_config): - """Deserialize metrics, leaving special strings untouched.""" - from keras import metrics as metrics_module # pylint:disable=g-import-not-at-top - if metric_config in ['accuracy', 'acc', 'crossentropy', 'ce']: - # Do not deserialize accuracy and cross-entropy strings as we have special - # case handling for these in compile, based on model output shape. - return metric_config - return metrics_module.deserialize(metric_config) - - -def _enforce_names_consistency(specs): - """Enforces that either all specs have names or none do.""" - - def _has_name(spec): - return spec is None or (hasattr(spec, 'name') and spec.name is not None) - - def _clear_name(spec): - spec = copy.deepcopy(spec) - if hasattr(spec, 'name'): - spec._name = None # pylint:disable=protected-access - return spec - - flat_specs = tf.nest.flatten(specs) - name_inconsistency = ( - any(_has_name(s) for s in flat_specs) and - not all(_has_name(s) for s in flat_specs)) - - if name_inconsistency: - specs = tf.nest.map_structure(_clear_name, specs) - return specs - - -def try_build_compiled_arguments(model): - if (not version_utils.is_v1_layer_or_model(model) and - model.outputs is not None): - try: - if not model.compiled_loss.built: - model.compiled_loss.build(model.outputs) - if not model.compiled_metrics.built: - model.compiled_metrics.build(model.outputs, model.outputs) - except: # pylint: disable=bare-except - logging.warning( - 'Compiled the loaded model, but the compiled metrics have yet to ' - 'be built. `model.compile_metrics` will be empty until you train ' - 'or evaluate the model.') - - -def is_hdf5_filepath(filepath): - return (filepath.endswith('.h5') or filepath.endswith('.keras') or - filepath.endswith('.hdf5')) diff --git a/keras/saving/saving_utils_test.py b/keras/saving/saving_utils_test.py deleted file mode 100644 index f9bb9939db35..000000000000 --- a/keras/saving/saving_utils_test.py +++ /dev/null @@ -1,502 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for saving utility functions.""" - -import tensorflow.compat.v2 as tf - -import os - -import numpy as np - -import keras -from keras import backend -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils -from keras.engine import sequential -from keras.feature_column import dense_features -from keras.optimizers.optimizer_v2 import gradient_descent -from keras.saving import saving_utils - - -class TraceModelCallTest(test_combinations.TestCase): - - def _assert_all_close(self, expected, actual): - if not tf.executing_eagerly(): - with self.cached_session() as sess: - backend._initialize_variables(sess) - self.assertAllClose(expected, actual) - else: - self.assertAllClose(expected, actual) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_trace_model_outputs(self): - input_dim = 5 if test_utils.get_model_type() == 'functional' else None - model = test_utils.get_small_mlp(10, 3, input_dim) - inputs = tf.ones((8, 5)) - - if input_dim is None: - with self.assertRaisesRegex(ValueError, '.*input shape is not availabl*'): - saving_utils.trace_model_call(model) - model._set_inputs(inputs) - - fn = saving_utils.trace_model_call(model) - signature_outputs = fn(inputs) - if model.output_names: - expected_outputs = {model.output_names[0]: model(inputs)} - else: - expected_outputs = {'output_1': model(inputs)} - - self._assert_all_close(expected_outputs, signature_outputs) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_trace_model_outputs_after_fitting(self): - input_dim = 5 if test_utils.get_model_type() == 'functional' else None - model = test_utils.get_small_mlp(10, 3, input_dim) - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit( - x=np.random.random((8, 5)).astype(np.float32), - y=np.random.random((8, 3)).astype(np.float32), - epochs=2) - - inputs = tf.ones((8, 5)) - - fn = saving_utils.trace_model_call(model) - signature_outputs = fn(inputs) - if model.output_names: - expected_outputs = {model.output_names[0]: model(inputs)} - else: - expected_outputs = {'output_1': model(inputs)} - - self._assert_all_close(expected_outputs, signature_outputs) - - @test_combinations.run_with_all_model_types(exclude_models='sequential') - @test_combinations.run_all_keras_modes - def test_trace_multi_io_model_outputs(self): - input_dim = 5 - num_classes = 3 - num_classes_b = 4 - input_a = keras.layers.Input(shape=(input_dim,), name='input_a') - input_b = keras.layers.Input(shape=(input_dim,), name='input_b') - - dense = keras.layers.Dense(num_classes, name='dense') - dense2 = keras.layers.Dense(num_classes_b, name='dense2') - dropout = keras.layers.Dropout(0.5, name='dropout') - branch_a = [input_a, dense] - branch_b = [input_b, dense, dense2, dropout] - - model = test_utils.get_multi_io_model(branch_a, branch_b) - - input_a_ts = tf.constant( - np.random.random((10, input_dim)).astype(np.float32)) - input_b_ts = tf.constant( - np.random.random((10, input_dim)).astype(np.float32)) - - if test_utils.get_model_type() == 'subclass': - with self.assertRaisesRegex(ValueError, '.*input shape is not availabl*'): - saving_utils.trace_model_call(model) - - model.compile( - optimizer='sgd', - loss='mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x=[np.random.random((8, input_dim)).astype(np.float32), - np.random.random((8, input_dim)).astype(np.float32)], - y=[np.random.random((8, num_classes)).astype(np.float32), - np.random.random((8, num_classes_b)).astype(np.float32)], - epochs=2) - - fn = saving_utils.trace_model_call(model) - # tf.function requires that the input structures match when calling a - # ConcreteFunction. For some reason V1 models defines the inputs as a list, - # while V2 models sets the inputs as a tuple. - if (not tf.executing_eagerly() and - test_utils.get_model_type() != 'functional'): - signature_outputs = fn([input_a_ts, input_b_ts]) - else: - signature_outputs = fn((input_a_ts, input_b_ts)) - outputs = model([input_a_ts, input_b_ts]) - if model.output_names: - expected_outputs = { - model.output_names[0]: outputs[0], - model.output_names[1]: outputs[1] - } - else: - expected_outputs = {'output_1': outputs[0], 'output_2': outputs[1]} - self._assert_all_close(expected_outputs, signature_outputs) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_trace_features_layer(self): - columns = [tf.feature_column.numeric_column('x')] - model = sequential.Sequential([dense_features.DenseFeatures(columns)]) - model_input = {'x': tf.constant([[1.]])} - model.predict(model_input, steps=1) - fn = saving_utils.trace_model_call(model) - self.assertAllClose({'output_1': [[1.]]}, fn(model_input)) - - columns = [ - tf.feature_column.numeric_column('x'), - tf.feature_column.numeric_column('y') - ] - model = sequential.Sequential([dense_features.DenseFeatures(columns)]) - model_input = {'x': tf.constant([[1.]]), - 'y': tf.constant([[2.]])} - model.predict(model_input, steps=1) - fn = saving_utils.trace_model_call(model) - self.assertAllClose({'output_1': [[1., 2.]]}, fn(model_input)) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_specify_input_signature(self): - model = test_utils.get_small_sequential_mlp(10, 3, None) - inputs = tf.ones((8, 5)) - - with self.assertRaisesRegex(ValueError, '.*input shape is not availabl*'): - saving_utils.trace_model_call(model) - - fn = saving_utils.trace_model_call( - model, [tf.TensorSpec(shape=[None, 5], dtype=tf.float32)]) - signature_outputs = fn(inputs) - if model.output_names: - expected_outputs = {model.output_names[0]: model(inputs)} - else: - expected_outputs = {'output_1': model(inputs)} - self._assert_all_close(expected_outputs, signature_outputs) - - @test_combinations.generate( - test_combinations.combine(mode=['graph', 'eager'])) - def test_subclassed_model_with_input_signature(self): - - class Model(keras.Model): - - def __init__(self): - super().__init__() - self.dense = keras.layers.Dense(3, name='dense') - - @tf.function( - input_signature=[[tf.TensorSpec([None, 5], tf.float32), - tf.TensorSpec([None], tf.float32)]],) - def call(self, inputs, *args): - x, y = inputs - return self.dense(x) + y - - model = Model() - fn = saving_utils.trace_model_call(model) - x = tf.ones((8, 5), dtype=tf.float32) - y = tf.ones((3,), dtype=tf.float32) - expected_outputs = {'output_1': model([x, y])} - signature_outputs = fn([x, y]) - self._assert_all_close(expected_outputs, signature_outputs) - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def test_model_with_fixed_input_dim(self): - """Ensure that the batch_dim is removed when saving. - - When serving or retraining, it is important to reset the batch dim. - This can be an issue inside of tf.function. See b/132783590 for context. - """ - model = test_utils.get_small_mlp(10, 3, 5) - - loss_object = keras.losses.MeanSquaredError() - optimizer = gradient_descent.SGD() - - @tf.function - def train_step(data, labels): - with tf.GradientTape() as tape: - predictions = model(data) - loss = loss_object(labels, predictions) - gradients = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - - x = np.random.random((8, 5)) - y = np.random.random((8, 3)) - - train_step(x, y) - - fn = saving_utils.trace_model_call(model) - self.assertEqual(fn.structured_input_signature[0][0].shape.as_list(), - tf.TensorShape([None, 5]).as_list()) - - -def _import_and_infer(save_dir, inputs): - """Import a SavedModel into a TF 1.x-style graph and run `signature_key`.""" - graph = tf.Graph() - with graph.as_default(), tf.compat.v1.Session() as session: - model = tf.compat.v1.saved_model.load(session, [tf.saved_model.SERVING], save_dir) - signature = model.signature_def[ - tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] - assert set(inputs.keys()) == set( - signature.inputs.keys()), ('expected {}, found {}'.format( - signature.inputs.keys(), inputs.keys())) - feed_dict = {} - for arg_name in inputs.keys(): - feed_dict[graph.get_tensor_by_name(signature.inputs[arg_name].name)] = ( - inputs[arg_name]) - output_dict = {} - for output_name, output_tensor_info in signature.outputs.items(): - output_dict[output_name] = graph.get_tensor_by_name( - output_tensor_info.name) - return session.run(output_dict, feed_dict=feed_dict) - - -class AutographedMetric(keras.metrics.Metric): - - def build(self, input_shape): - pass - - def update_state(self, values): - if tf.constant(False): - x = 1 - else: - x = 2 - return x - - def reset_states(self): - pass - - def result(self): - return tf.constant(0) - - def GetMean(self): - return tf.constant(0) - - def GetCount(self): - return tf.constant(0) - - -class BasicAutographedMetricLayer(keras.layers.Layer): - - def build(self, input_shape): - self._metric = AutographedMetric() - - def call(self, inp): - self._metric.update_state(inp) - # TODO(b/172853147): Test control flow here. - return inp - - -class BasicAutographedMetricModel(keras.models.Model): - - def __init__(self): - super().__init__(name='test_model') - self._layer = BasicAutographedMetricLayer() - - def call(self, inputs, **kwargs): - return self._layer(inputs) - - -@test_combinations.run_with_all_model_types -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class ModelSaveTest(test_combinations.TestCase): - - def test_model_save_preserves_autograph(self): - model = BasicAutographedMetricModel() - inputs = tf.ones((8, 5)) - model._set_inputs(inputs) - - save_dir = os.path.join(self.get_temp_dir(), 'saved_model') - tf.saved_model.save(model, save_dir) - - if model.output_names: - output_name = model.output_names[0] - input_name = model.input_names[0] - else: - output_name = 'output_1' - input_name = 'input_1' - - self.assertAllClose({output_name: model.predict_on_batch(inputs)}, - _import_and_infer(save_dir, - {input_name: np.ones((8, 5))})) - - # Test v2 loading. - # TODO(mdan): tests using _import_and_infer should uniformly do this. - self.assertAllClose(model.predict_on_batch(inputs), - tf.saved_model.load(save_dir)(inputs)) - - def test_model_save(self): - input_dim = 5 - model = test_utils.get_small_mlp(10, 3, input_dim) - inputs = tf.ones((8, 5)) - - if test_utils.get_model_type() == 'subclass': - model._set_inputs(inputs) - - save_dir = os.path.join(self.get_temp_dir(), 'saved_model') - tf.saved_model.save(model, save_dir) - - if model.output_names: - output_name = model.output_names[0] - input_name = model.input_names[0] - else: - output_name = 'output_1' - input_name = 'input_1' - - self.assertAllClose({output_name: model.predict_on_batch(inputs)}, - _import_and_infer(save_dir, - {input_name: np.ones((8, 5))})) - - -class ExtractModelMetricsTest(test_combinations.TestCase): - - def test_extract_model_metrics(self): - # saving_utils.extract_model_metrics is used in V1 only API - # keras.experimental.export_saved_model. - with tf.Graph().as_default(): - a = keras.layers.Input(shape=(3,), name='input_a') - b = keras.layers.Input(shape=(3,), name='input_b') - - dense = keras.layers.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = keras.layers.Dropout(0.5, name='dropout')(c) - - model = keras.models.Model([a, b], [d, e]) - extract_metrics = saving_utils.extract_model_metrics(model) - self.assertEqual(None, extract_metrics) - - extract_metric_names = [ - 'dense_binary_accuracy', 'dropout_binary_accuracy', - 'dense_mean_squared_error', 'dropout_mean_squared_error' - ] - if tf.__internal__.tf2.enabled(): - extract_metric_names.extend(['dense_mae', 'dropout_mae']) - else: - extract_metric_names.extend( - ['dense_mean_absolute_error', 'dropout_mean_absolute_error']) - - model_metric_names = ['loss', 'dense_loss', 'dropout_loss' - ] + extract_metric_names - model.compile( - loss='mae', - metrics=[ - keras.metrics.BinaryAccuracy(), 'mae', - keras.metrics.mean_squared_error - ], - optimizer=tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.01)) - extract_metrics = saving_utils.extract_model_metrics(model) - self.assertEqual(set(model_metric_names), set(model.metrics_names)) - self.assertEqual(set(extract_metric_names), set(extract_metrics.keys())) - - -class UnbuiltModelSavingErrorMessageTest(test_combinations.TestCase): - - def setUp(self): - super().setUp() - if not tf.__internal__.tf2.enabled(): - self.skipTest('The test does not intend to cover TF1.') - - def test_sequential(self): - model = sequential.Sequential([keras.layers.Dense(10)]) - optimizer = gradient_descent.SGD() - model.compile(optimizer, loss='mse', steps_per_execution=10) - - # Forward pass not called yet. Input shape not available and thus error. - with self.assertRaisesRegex( - ValueError, - 'Model.*cannot be saved.*specify an input shape either by calling.*'): - model.save(os.path.join(self.get_temp_dir(), 'my_saved_model')) - - def test_functional(self): - inputs = keras.Input(shape=(32,)) - outputs = keras.layers.Dense(1)(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='adam', loss='mse', metrics=['mae']) - - x = np.random.random((1000, 32)) - y = np.random.random((1000, 1)) - model.fit(x, y, epochs=3) - - # Functional model always has an input shape, so should save just fine. - model.save(os.path.join(self.get_temp_dir(), 'my_saved_model')) - - def test_subclass_forward_pass_by_layer_underscore_call(self): - - class CustomModel(keras.Model): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dense1 = keras.layers.Dense(1) - - def train_step(self, data): - x, y = data - with tf.GradientTape() as tape: - y_pred = self.dense1(x, training=True) - loss = self.compiled_loss(y, y_pred) - - gradients = tape.gradient(loss, self.trainable_variables) - self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) - return {} - - subclassed_model = CustomModel() - subclassed_model.compile(optimizer='adam', loss='mse') - - x = np.random.random((1000, 32)) - y = np.random.random((1000, 1)) - subclassed_model.fit(x, y, epochs=1) - - # Saving of this subclassed model is supposed to raise an error, even if - # `fit` has been called. This is because the model does not have `call()` - # overridden. Forward pass using `layer.__call__` works for training, but - # saving requires that `call()` be used. - with self.assertRaisesRegex( - ValueError, r'Model.*cannot be saved.*as opposed to `model.call\(\).*'): - subclassed_model.save(os.path.join(self.get_temp_dir(), 'my_saved_model')) - - def test_subclass_forward_pass_by_model_call(self): - - class CustomModel(keras.Model): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dense1 = keras.layers.Dense(1) - - def call(self, inputs): - return self.dense1(inputs) - - def train_step(self, data): - x, y = data - with tf.GradientTape() as tape: - y_pred = self.call(x) - loss = self.compiled_loss(y, y_pred) - - gradients = tape.gradient(loss, self.trainable_variables) - self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) - return {} - - subclassed_model = CustomModel() - subclassed_model.compile(optimizer='adam', loss='mse') - - x = np.random.random((1000, 32)) - y = np.random.random((1000, 1)) - subclassed_model.fit(x, y, epochs=1) - - # Saving of this subclassed model is supposed to raise an error, even if - # `fit` has been called. This is because the model has `call()` overridden, - # but the forward pass uses `Model.call` as opposed to `Model.__call__`, and - # as a result the `Model` is not really built. The error message hints the - # user to use `Model.__call__`, i.e., `Model(inputs)` instead. - with self.assertRaisesRegex( - ValueError, r'Model.*cannot be saved.*as opposed to `model.call\(\).*'): - subclassed_model.save(os.path.join(self.get_temp_dir(), 'my_saved_model')) - - -if __name__ == '__main__': - tf.test.main() diff --git a/keras/saving/serialization_lib.py b/keras/saving/serialization_lib.py new file mode 100644 index 000000000000..6f72af9f64b7 --- /dev/null +++ b/keras/saving/serialization_lib.py @@ -0,0 +1,832 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Object config serialization and deserialization logic.""" + +import importlib +import inspect +import threading +import types +import warnings + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras.saving import object_registration +from keras.saving.legacy import serialization as legacy_serialization +from keras.saving.legacy.saved_model.utils import in_tf_saved_model_scope +from keras.utils import generic_utils + +# isort: off +from tensorflow.python.util import tf_export +from tensorflow.python.util.tf_export import keras_export + +PLAIN_TYPES = (str, int, float, bool) +SHARED_OBJECTS = threading.local() +SAFE_MODE = threading.local() +# TODO(nkovela): Debug serialization of decorated functions inside lambdas +# to allow for serialization of custom_gradient. +NON_SERIALIZABLE_CLASS_MODULES = ("tensorflow.python.ops.custom_gradient",) + +# List of Keras modules with built-in string representations for Keras defaults +BUILTIN_MODULES = ( + "activations", + "constraints", + "initializers", + "losses", + "metrics", + "optimizers", + "regularizers", +) + + +class Config: + def __init__(self, **config): + self.config = config + + def serialize(self): + return serialize_keras_object(self.config) + + +class SafeModeScope: + """Scope to propagate safe mode flag to nested deserialization calls.""" + + def __init__(self, safe_mode=True): + self.safe_mode = safe_mode + + def __enter__(self): + self.original_value = in_safe_mode() + SAFE_MODE.safe_mode = self.safe_mode + + def __exit__(self, *args, **kwargs): + SAFE_MODE.safe_mode = self.original_value + + +@keras_export("keras.__internal__.enable_unsafe_deserialization") +def enable_unsafe_deserialization(): + """Disables safe mode globally, allowing deserialization of lambdas.""" + SAFE_MODE.safe_mode = False + + +def in_safe_mode(): + return getattr(SAFE_MODE, "safe_mode", None) + + +class ObjectSharingScope: + """Scope to enable detection and reuse of previously seen objects.""" + + def __enter__(self): + SHARED_OBJECTS.enabled = True + SHARED_OBJECTS.id_to_obj_map = {} + SHARED_OBJECTS.id_to_config_map = {} + + def __exit__(self, *args, **kwargs): + SHARED_OBJECTS.enabled = False + SHARED_OBJECTS.id_to_obj_map = {} + SHARED_OBJECTS.id_to_config_map = {} + + +def get_shared_object(obj_id): + """Retrieve an object previously seen during deserialization.""" + if getattr(SHARED_OBJECTS, "enabled", False): + return SHARED_OBJECTS.id_to_obj_map.get(obj_id, None) + + +def record_object_after_serialization(obj, config): + """Call after serializing an object, to keep track of its config.""" + if config["module"] == "__main__": + config["module"] = None # Ensures module is None when no module found + if not getattr(SHARED_OBJECTS, "enabled", False): + return # Not in a sharing scope + obj_id = int(id(obj)) + if obj_id not in SHARED_OBJECTS.id_to_config_map: + SHARED_OBJECTS.id_to_config_map[obj_id] = config + else: + config["shared_object_id"] = obj_id + prev_config = SHARED_OBJECTS.id_to_config_map[obj_id] + prev_config["shared_object_id"] = obj_id + + +def record_object_after_deserialization(obj, obj_id): + """Call after deserializing an object, to keep track of it in the future.""" + if not getattr(SHARED_OBJECTS, "enabled", False): + return # Not in a sharing scope + SHARED_OBJECTS.id_to_obj_map[obj_id] = obj + + +@keras_export( + "keras.saving.serialize_keras_object", "keras.utils.serialize_keras_object" +) +def serialize_keras_object(obj): + """Retrieve the config dict by serializing the Keras object. + + `serialize_keras_object()` serializes a Keras object to a python dictionary + that represents the object, and is a reciprocal function of + `deserialize_keras_object()`. See `deserialize_keras_object()` for more + information about the config format. + + Args: + obj: the Keras object to serialize. + + Returns: + A python dict that represents the object. The python dict can be + deserialized via `deserialize_keras_object()`. + """ + # Fall back to legacy serialization for all TF1 users or if + # wrapped by in_tf_saved_model_scope() to explicitly use legacy + # saved_model logic. + if not tf.__internal__.tf2.enabled() or in_tf_saved_model_scope(): + return legacy_serialization.serialize_keras_object(obj) + + if obj is None: + return obj + + if isinstance(obj, PLAIN_TYPES): + return obj + + if isinstance(obj, (list, tuple)): + config_arr = [serialize_keras_object(x) for x in obj] + return tuple(config_arr) if isinstance(obj, tuple) else config_arr + if isinstance(obj, dict): + return serialize_dict(obj) + + # Special cases: + if isinstance(obj, bytes): + return { + "class_name": "__bytes__", + "config": {"value": obj.decode("utf-8")}, + } + if isinstance(obj, tf.TensorShape): + return obj.as_list() if obj._dims is not None else None + if isinstance(obj, tf.Tensor): + return { + "class_name": "__tensor__", + "config": { + "value": obj.numpy().tolist(), + "dtype": obj.dtype.name, + }, + } + if type(obj).__module__ == np.__name__: + if isinstance(obj, np.ndarray) and obj.ndim > 0: + return { + "class_name": "__numpy__", + "config": { + "value": obj.tolist(), + "dtype": obj.dtype.name, + }, + } + else: + # Treat numpy floats / etc as plain types. + return obj.item() + if isinstance(obj, tf.DType): + return obj.name + if isinstance(obj, tf.compat.v1.Dimension): + return obj.value + if isinstance(obj, types.FunctionType) and obj.__name__ == "": + warnings.warn( + "The object being serialized includes a `lambda`. This is unsafe. " + "In order to reload the object, you will have to pass " + "`safe_mode=False` to the loading function. " + "Please avoid using `lambda` in the " + "future, and use named Python functions instead. " + f"This is the `lambda` being serialized: {inspect.getsource(obj)}", + stacklevel=2, + ) + return { + "class_name": "__lambda__", + "config": { + "value": generic_utils.func_dump(obj), + }, + } + if isinstance(obj, tf.TypeSpec): + ts_config = obj._serialize() + # TensorShape and tf.DType conversion + ts_config = list( + map( + lambda x: x.as_list() + if isinstance(x, tf.TensorShape) + else (x.name if isinstance(x, tf.DType) else x), + ts_config, + ) + ) + spec_name = obj.__class__.__name__ + registered_name = None + if hasattr(obj, "_tf_extension_type_fields"): + # Special casing for ExtensionType + ts_config = tf.experimental.extension_type.as_dict(obj) + ts_config = serialize_dict(ts_config) + registered_name = object_registration.get_registered_name( + obj.__class__ + ) + return { + "class_name": "__typespec__", + "spec_name": spec_name, + "module": obj.__class__.__module__, + "config": ts_config, + "registered_name": registered_name, + } + + inner_config = _get_class_or_fn_config(obj) + config_with_public_class = serialize_with_public_class( + obj.__class__, inner_config + ) + + # TODO(nkovela): Add TF ops dispatch handler serialization for + # ops.EagerTensor that contains nested numpy array. + # Target: NetworkConstructionTest.test_constant_initializer_with_numpy + if isinstance(inner_config, str) and inner_config == "op_dispatch_handler": + return obj + + if config_with_public_class is not None: + + # Special case for non-serializable class modules + if any( + mod in config_with_public_class["module"] + for mod in NON_SERIALIZABLE_CLASS_MODULES + ): + return obj + + get_build_and_compile_config(obj, config_with_public_class) + record_object_after_serialization(obj, config_with_public_class) + return config_with_public_class + + # Any custom object or otherwise non-exported object + if isinstance(obj, types.FunctionType): + module = obj.__module__ + else: + module = obj.__class__.__module__ + class_name = obj.__class__.__name__ + + if module == "builtins": + registered_name = None + else: + if isinstance(obj, types.FunctionType): + registered_name = object_registration.get_registered_name(obj) + else: + registered_name = object_registration.get_registered_name( + obj.__class__ + ) + + config = { + "module": module, + "class_name": class_name, + "config": inner_config, + "registered_name": registered_name, + } + get_build_and_compile_config(obj, config) + record_object_after_serialization(obj, config) + return config + + +def get_build_and_compile_config(obj, config): + if hasattr(obj, "get_build_config"): + build_config = obj.get_build_config() + if build_config is not None: + config["build_config"] = serialize_dict(build_config) + if hasattr(obj, "get_compile_config"): + compile_config = obj.get_compile_config() + if compile_config is not None: + config["compile_config"] = serialize_dict(compile_config) + return + + +def serialize_with_public_class(cls, inner_config=None): + """Serializes classes from public Keras API or object registration. + + Called to check and retrieve the config of any class that has a public + Keras API or has been registered as serializable via + `keras.saving.register_keras_serializable()`. + """ + # This gets the `keras.*` exported name, such as "keras.optimizers.Adam". + keras_api_name = tf_export.get_canonical_name_for_symbol( + cls, api_name="keras" + ) + + # Case of custom or unknown class object + if keras_api_name is None: + registered_name = object_registration.get_registered_name(cls) + if registered_name is None: + return None + + # Return custom object config with corresponding registration name + return { + "module": cls.__module__, + "class_name": cls.__name__, + "config": inner_config, + "registered_name": registered_name, + } + + # Split the canonical Keras API name into a Keras module and class name. + parts = keras_api_name.split(".") + return { + "module": ".".join(parts[:-1]), + "class_name": parts[-1], + "config": inner_config, + "registered_name": None, + } + + +def serialize_with_public_fn(fn, config, fn_module_name=None): + """Serializes functions from public Keras API or object registration. + + Called to check and retrieve the config of any function that has a public + Keras API or has been registered as serializable via + `keras.saving.register_keras_serializable()`. If function's module name is + already known, returns corresponding config. + """ + if fn_module_name: + return { + "module": fn_module_name, + "class_name": "function", + "config": config, + "registered_name": config, + } + keras_api_name = tf_export.get_canonical_name_for_symbol( + fn, api_name="keras" + ) + if keras_api_name: + parts = keras_api_name.split(".") + return { + "module": ".".join(parts[:-1]), + "class_name": "function", + "config": config, + "registered_name": config, + } + else: + registered_name = object_registration.get_registered_name(fn) + if not registered_name and not fn.__module__ == "builtins": + return None + return { + "module": fn.__module__, + "class_name": "function", + "config": config, + "registered_name": registered_name, + } + + +def _get_class_or_fn_config(obj): + """Return the object's config depending on its type.""" + # Functions / lambdas: + if isinstance(obj, types.FunctionType): + return obj.__name__ + # All classes: + if hasattr(obj, "get_config"): + config = obj.get_config() + if not isinstance(config, dict): + raise TypeError( + f"The `get_config()` method of {obj} should return " + f"a dict. It returned: {config}" + ) + return serialize_dict(config) + elif hasattr(obj, "__name__"): + return object_registration.get_registered_name(obj) + else: + raise TypeError( + f"Cannot serialize object {obj} of type {type(obj)}. " + "To be serializable, " + "a class must implement the `get_config()` method." + ) + + +def serialize_dict(obj): + return {key: serialize_keras_object(value) for key, value in obj.items()} + + +@keras_export( + "keras.saving.deserialize_keras_object", + "keras.utils.deserialize_keras_object", +) +def deserialize_keras_object( + config, custom_objects=None, safe_mode=True, **kwargs +): + """Retrieve the object by deserializing the config dict. + + The config dict is a Python dictionary that consists of a set of key-value + pairs, and represents a Keras object, such as an `Optimizer`, `Layer`, + `Metrics`, etc. The saving and loading library uses the following keys to + record information of a Keras object: + + - `class_name`: String. This is the name of the class, + as exactly defined in the source + code, such as "LossesContainer". + - `config`: Dict. Library-defined or user-defined key-value pairs that store + the configuration of the object, as obtained by `object.get_config()`. + - `module`: String. The path of the python module, such as + "keras.engine.compile_utils". Built-in Keras classes + expect to have prefix `keras`. + - `registered_name`: String. The key the class is registered under via + `keras.saving.register_keras_serializable(package, name)` API. The key has + the format of '{package}>{name}', where `package` and `name` are the + arguments passed to `register_keras_serializable()`. If `name` is not + provided, it uses the class name. If `registered_name` successfully + resolves to a class (that was registered), the `class_name` and `config` + values in the dict will not be used. `registered_name` is only used for + non-built-in classes. + + For example, the following dictionary represents the built-in Adam optimizer + with the relevant config: + + ```python + dict_structure = { + "class_name": "Adam", + "config": { + "amsgrad": false, + "beta_1": 0.8999999761581421, + "beta_2": 0.9990000128746033, + "decay": 0.0, + "epsilon": 1e-07, + "learning_rate": 0.0010000000474974513, + "name": "Adam" + }, + "module": "keras.optimizers", + "registered_name": None + } + # Returns an `Adam` instance identical to the original one. + deserialize_keras_object(dict_structure) + ``` + + If the class does not have an exported Keras namespace, the library tracks + it by its `module` and `class_name`. For example: + + ```python + dict_structure = { + "class_name": "LossesContainer", + "config": { + "losses": [...], + "total_loss_mean": {...}, + }, + "module": "keras.engine.compile_utils", + "registered_name": "LossesContainer" + } + + # Returns a `LossesContainer` instance identical to the original one. + deserialize_keras_object(dict_structure) + ``` + + And the following dictionary represents a user-customized `MeanSquaredError` + loss: + + ```python + @keras.saving.register_keras_serializable(package='my_package') + class ModifiedMeanSquaredError(keras.losses.MeanSquaredError): + ... + + dict_structure = { + "class_name": "ModifiedMeanSquaredError", + "config": { + "fn": "mean_squared_error", + "name": "mean_squared_error", + "reduction": "auto" + }, + "registered_name": "my_package>ModifiedMeanSquaredError" + } + # Returns the `ModifiedMeanSquaredError` object + deserialize_keras_object(dict_structure) + ``` + + Args: + config: Python dict describing the object. + custom_objects: Python dict containing a mapping between custom + object names the corresponding classes or functions. + safe_mode: Boolean, whether to disallow unsafe `lambda` deserialization. + When `safe_mode=False`, loading an object has the potential to + trigger arbitrary code execution. This argument is only + applicable to the Keras v3 model format. Defaults to `True`. + + Returns: + The object described by the `config` dictionary. + + """ + safe_scope_arg = in_safe_mode() # Enforces SafeModeScope + safe_mode = safe_scope_arg if safe_scope_arg is not None else safe_mode + + module_objects = kwargs.pop("module_objects", None) + custom_objects = custom_objects or {} + tlco = object_registration._THREAD_LOCAL_CUSTOM_OBJECTS.__dict__ + gco = object_registration._GLOBAL_CUSTOM_OBJECTS + custom_objects = {**custom_objects, **tlco, **gco} + + # Optional deprecated argument for legacy deserialization call + printable_module_name = kwargs.pop("printable_module_name", "object") + if kwargs: + raise ValueError( + "The following argument(s) are not supported: " + f"{list(kwargs.keys())}" + ) + + # Fall back to legacy deserialization for all TF1 users or if + # wrapped by in_tf_saved_model_scope() to explicitly use legacy + # saved_model logic. + if not tf.__internal__.tf2.enabled() or in_tf_saved_model_scope(): + return legacy_serialization.deserialize_keras_object( + config, module_objects, custom_objects, printable_module_name + ) + + if config is None: + return None + + if ( + isinstance(config, str) + and custom_objects + and custom_objects.get(config) is not None + ): + # This is to deserialize plain functions which are serialized as + # string names by legacy saving formats. + return custom_objects[config] + + if isinstance(config, (list, tuple)): + return [ + deserialize_keras_object( + x, custom_objects=custom_objects, safe_mode=safe_mode + ) + for x in config + ] + + if module_objects is not None: + inner_config, fn_module_name, has_custom_object = None, None, False + if isinstance(config, dict): + if "config" in config: + inner_config = config["config"] + if "class_name" not in config: + raise ValueError( + f"Unknown `config` as a `dict`, config={config}" + ) + + # Check case where config is function or class and in custom objects + if custom_objects and ( + config["class_name"] in custom_objects + or config.get("registered_name") in custom_objects + or ( + isinstance(inner_config, str) + and inner_config in custom_objects + ) + ): + has_custom_object = True + + # Case where config is function but not in custom objects + elif config["class_name"] == "function": + fn_module_name = config["module"] + if fn_module_name == "builtins": + config = config["config"] + else: + config = config["registered_name"] + + # Case where config is class but not in custom objects + else: + if config.get("module", "_") is None: + raise TypeError( + "Cannot deserialize object of type " + f"`{config['class_name']}`. If " + f"`{config['class_name']}` is a custom class, please " + "register it using the " + "`@keras.saving.register_keras_serializable()` " + "decorator." + ) + config = config["class_name"] + if not has_custom_object: + # Return if not found in either module objects or custom objects + if config not in module_objects: + # Object has already been deserialized + return config + if isinstance(module_objects[config], types.FunctionType): + return deserialize_keras_object( + serialize_with_public_fn( + module_objects[config], config, fn_module_name + ), + custom_objects=custom_objects, + ) + return deserialize_keras_object( + serialize_with_public_class( + module_objects[config], inner_config=inner_config + ), + custom_objects=custom_objects, + ) + + if isinstance(config, PLAIN_TYPES): + return config + if not isinstance(config, dict): + raise TypeError(f"Could not parse config: {config}") + + if "class_name" not in config or "config" not in config: + return { + key: deserialize_keras_object( + value, custom_objects=custom_objects, safe_mode=safe_mode + ) + for key, value in config.items() + } + + class_name = config["class_name"] + inner_config = config["config"] or {} + custom_objects = custom_objects or {} + + # Special cases: + if class_name == "__tensor__": + return tf.constant(inner_config["value"], dtype=inner_config["dtype"]) + if class_name == "__numpy__": + return np.array(inner_config["value"], dtype=inner_config["dtype"]) + if config["class_name"] == "__bytes__": + return inner_config["value"].encode("utf-8") + if config["class_name"] == "__lambda__": + if safe_mode: + raise ValueError( + "Requested the deserialization of a `lambda` object. " + "This carries a potential risk of arbitrary code execution " + "and thus it is disallowed by default. If you trust the " + "source of the saved model, you can pass `safe_mode=False` to " + "the loading function in order to allow `lambda` loading." + ) + return generic_utils.func_load(inner_config["value"]) + + if config["class_name"] == "__typespec__": + cls = _retrieve_class_or_fn( + config["spec_name"], + config["registered_name"], + config["module"], + obj_type="class", + full_config=config, + custom_objects=custom_objects, + ) + + # Special casing for ExtensionType.Spec + if hasattr(cls, "_tf_extension_type_fields"): + inner_config = { + key: deserialize_keras_object( + value, custom_objects=custom_objects, safe_mode=safe_mode + ) + for key, value in inner_config.items() + } # Deserialization of dict created by ExtensionType.as_dict() + return cls(**inner_config) # Instantiate ExtensionType.Spec + + if config["registered_name"] is not None: + return cls.from_config(inner_config) + + # Conversion to TensorShape and tf.DType + inner_config = map( + lambda x: tf.TensorShape(x) + if isinstance(x, list) + else (getattr(tf, x) if hasattr(tf.dtypes, str(x)) else x), + inner_config, + ) + return cls._deserialize(tuple(inner_config)) + + # Below: classes and functions. + module = config.get("module", None) + registered_name = config.get("registered_name", class_name) + + if class_name == "function": + fn_name = inner_config + return _retrieve_class_or_fn( + fn_name, + registered_name, + module, + obj_type="function", + full_config=config, + custom_objects=custom_objects, + ) + + # Below, handling of all classes. + # First, is it a shared object? + if "shared_object_id" in config: + obj = get_shared_object(config["shared_object_id"]) + if obj is not None: + return obj + + cls = _retrieve_class_or_fn( + class_name, + registered_name, + module, + obj_type="class", + full_config=config, + custom_objects=custom_objects, + ) + + if isinstance(cls, types.FunctionType): + return cls + if not hasattr(cls, "from_config"): + raise TypeError( + f"Unable to reconstruct an instance of '{class_name}' because " + f"the class is missing a `from_config()` method. " + f"Full object config: {config}" + ) + + # Instantiate the class from its config inside a custom object scope + # so that we can catch any custom objects that the config refers to. + custom_obj_scope = object_registration.custom_object_scope(custom_objects) + safe_mode_scope = SafeModeScope(safe_mode) + with custom_obj_scope, safe_mode_scope: + instance = cls.from_config(inner_config) + build_config = config.get("build_config", None) + if build_config: + instance.build_from_config(build_config) + compile_config = config.get("compile_config", None) + if compile_config: + instance.compile_from_config(compile_config) + + if "shared_object_id" in config: + record_object_after_deserialization( + instance, config["shared_object_id"] + ) + return instance + + +def _retrieve_class_or_fn( + name, registered_name, module, obj_type, full_config, custom_objects=None +): + # If there is a custom object registered via + # `register_keras_serializable()`, that takes precedence. + if obj_type == "function": + custom_obj = object_registration.get_registered_object( + name, custom_objects=custom_objects + ) + else: + custom_obj = object_registration.get_registered_object( + registered_name, custom_objects=custom_objects + ) + if custom_obj is not None: + return custom_obj + + if module: + # If it's a Keras built-in object, + # we cannot always use direct import, because the exported + # module name might not match the package structure + # (e.g. experimental symbols). + if module == "keras" or module.startswith("keras."): + api_name = module + "." + name + + # Legacy internal APIs are stored in TF API naming dict + # with `compat.v1` prefix + if "__internal__.legacy" in api_name: + api_name = "compat.v1." + api_name + + obj = tf_export.get_symbol_from_name(api_name) + if obj is not None: + return obj + + # Configs of Keras built-in functions do not contain identifying + # information other than their name (e.g. 'acc' or 'tanh'). This special + # case searches the Keras modules that contain built-ins to retrieve + # the corresponding function from the identifying string. + if obj_type == "function" and module == "builtins": + for mod in BUILTIN_MODULES: + obj = tf_export.get_symbol_from_name( + "keras." + mod + "." + name + ) + if obj is not None: + return obj + + # Retrieval of registered custom function in a package + filtered_dict = { + k: v + for k, v in custom_objects.items() + if k.endswith(full_config["config"]) + } + if filtered_dict: + return next(iter(filtered_dict.values())) + + # Otherwise, attempt to retrieve the class object given the `module` + # and `class_name`. Import the module, find the class. + try: + mod = importlib.import_module(module) + except ModuleNotFoundError: + raise TypeError( + f"Could not deserialize {obj_type} '{name}' because " + f"its parent module {module} cannot be imported. " + f"Full object config: {full_config}" + ) + obj = vars(mod).get(name, None) + + if obj is None: + # Special case for keras.metrics.metrics + if registered_name is not None: + obj = vars(mod).get(registered_name, None) + + # Support for `__qualname__` + if name.count(".") == 1: + outer_name, inner_name = name.split(".") + outer_obj = vars(mod).get(outer_name, None) + obj = ( + getattr(outer_obj, inner_name, None) + if outer_obj is not None + else None + ) + + if obj is not None: + return obj + + raise TypeError( + f"Could not locate {obj_type} '{name}'. " + "Make sure custom classes are decorated with " + "`@keras.saving.register_keras_serializable()`. " + f"Full object config: {full_config}" + ) diff --git a/keras/saving/serialization_lib_test.py b/keras/saving/serialization_lib_test.py new file mode 100644 index 000000000000..6645ee9b777f --- /dev/null +++ b/keras/saving/serialization_lib_test.py @@ -0,0 +1,488 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for serialization_lib.""" + +import json + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras.saving import serialization_lib +from keras.saving.legacy import serialization as legacy_serialization +from keras.testing_infra import test_utils + + +def custom_fn(x): + return x**2 + + +class CustomLayer(keras.layers.Layer): + def __init__(self, factor): + super().__init__() + self.factor = factor + + def call(self, x): + return x * self.factor + + def get_config(self): + return {"factor": self.factor} + + +class NestedCustomLayer(keras.layers.Layer): + def __init__(self, factor, dense=None, activation=None): + super().__init__() + self.factor = factor + + if dense is None: + self.dense = keras.layers.Dense(1, activation=custom_fn) + else: + self.dense = serialization_lib.deserialize_keras_object(dense) + if activation is None: + self.activation = keras.layers.Activation("relu") + else: + self.activation = serialization_lib.deserialize_keras_object( + activation + ) + + def call(self, x): + return self.dense(x * self.factor) + + def get_config(self): + return { + "factor": self.factor, + "dense": self.dense, + "activation": self.activation, + } + + +class WrapperLayer(keras.layers.Layer): + def __init__(self, layer, **kwargs): + super().__init__(**kwargs) + self.layer = layer + + def call(self, x): + return self.layer(x) + + def get_config(self): + config = super().get_config() + return {"layer": self.layer, **config} + + +@test_utils.run_v2_only +class SerializationLibTest(tf.test.TestCase, parameterized.TestCase): + def roundtrip(self, obj, custom_objects=None, safe_mode=True): + serialized = serialization_lib.serialize_keras_object(obj) + json_data = json.dumps(serialized) + json_data = json.loads(json_data) + deserialized = serialization_lib.deserialize_keras_object( + json_data, custom_objects=custom_objects, safe_mode=safe_mode + ) + reserialized = serialization_lib.serialize_keras_object(deserialized) + return serialized, deserialized, reserialized + + @parameterized.named_parameters( + ("str", "hello"), + ("bytes", b"hello"), + ("nparray_int", np.array([0, 1])), + ("nparray_float", np.array([0.0, 1.0])), + ("nparray_item", np.float32(1.0)), + ("plain_types_list", ["hello", 0, "world", 1.0, True]), + ("plain_types_dict", {"1": "hello", "2": 0, "3": True}), + ("plain_types_nested_dict", {"1": "hello", "2": [True, False]}), + ) + def test_simple_objects(self, obj): + serialized, _, reserialized = self.roundtrip(obj) + self.assertEqual(serialized, reserialized) + + def test_builtin_layers(self): + serialized, _, reserialized = self.roundtrip(keras.layers.Dense(3)) + self.assertEqual(serialized, reserialized) + + def test_tensors_and_tensorshape(self): + x = tf.random.normal((2, 2), dtype="float64") + obj = {"x": x} + _, new_obj, _ = self.roundtrip(obj) + self.assertAllClose(x, new_obj["x"], atol=1e-5) + + obj = {"x.shape": x.shape} + _, new_obj, _ = self.roundtrip(obj) + self.assertListEqual(x.shape.as_list(), new_obj["x.shape"]) + + def test_custom_fn(self): + obj = {"activation": custom_fn} + serialized, _, reserialized = self.roundtrip( + obj, custom_objects={"custom_fn": custom_fn} + ) + self.assertEqual(serialized, reserialized) + + # Test inside layer + dense = keras.layers.Dense(1, activation=custom_fn) + dense.build((None, 2)) + _, new_dense, _ = self.roundtrip( + dense, custom_objects={"custom_fn": custom_fn} + ) + x = tf.random.normal((2, 2)) + y1 = dense(x) + _ = new_dense(x) + new_dense.set_weights(dense.get_weights()) + y2 = new_dense(x) + self.assertAllClose(y1, y2, atol=1e-5) + + def test_custom_layer(self): + layer = CustomLayer(factor=2) + x = tf.random.normal((2, 2)) + y1 = layer(x) + _, new_layer, _ = self.roundtrip( + layer, custom_objects={"CustomLayer": CustomLayer} + ) + y2 = new_layer(x) + self.assertAllClose(y1, y2, atol=1e-5) + + layer = NestedCustomLayer(factor=2) + x = tf.random.normal((2, 2)) + y1 = layer(x) + _, new_layer, _ = self.roundtrip( + layer, + custom_objects={ + "NestedCustomLayer": NestedCustomLayer, + "custom_fn": custom_fn, + }, + ) + _ = new_layer(x) + new_layer.set_weights(layer.get_weights()) + y2 = new_layer(x) + self.assertAllClose(y1, y2, atol=1e-5) + + def test_lambda_fn(self): + obj = {"activation": lambda x: x**2} + with self.assertRaisesRegex(ValueError, "arbitrary code execution"): + self.roundtrip(obj, safe_mode=True) + + _, new_obj, _ = self.roundtrip(obj, safe_mode=False) + self.assertEqual(obj["activation"](3), new_obj["activation"](3)) + + def test_lambda_layer(self): + lmbda = keras.layers.Lambda(lambda x: x**2) + with self.assertRaisesRegex(ValueError, "arbitrary code execution"): + self.roundtrip(lmbda, safe_mode=True) + + _, new_lmbda, _ = self.roundtrip(lmbda, safe_mode=False) + x = tf.random.normal((2, 2)) + y1 = lmbda(x) + y2 = new_lmbda(x) + self.assertAllClose(y1, y2, atol=1e-5) + + def test_safe_mode_scope(self): + lmbda = keras.layers.Lambda(lambda x: x**2) + with serialization_lib.SafeModeScope(safe_mode=True): + with self.assertRaisesRegex(ValueError, "arbitrary code execution"): + self.roundtrip(lmbda) + with serialization_lib.SafeModeScope(safe_mode=False): + _, new_lmbda, _ = self.roundtrip(lmbda) + x = tf.random.normal((2, 2)) + y1 = lmbda(x) + y2 = new_lmbda(x) + self.assertAllClose(y1, y2, atol=1e-5) + + def test_tensorspec(self): + inputs = keras.Input(type_spec=tf.TensorSpec((2, 2), tf.float32)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + _, new_model, _ = self.roundtrip(model) + x = tf.random.normal((2, 2)) + y1 = model(x) + new_model.set_weights(model.get_weights()) + y2 = new_model(x) + self.assertAllClose(y1, y2, atol=1e-5) + + def shared_inner_layer(self): + input_1 = keras.Input((2,)) + input_2 = keras.Input((2,)) + shared_layer = keras.layers.Dense(1) + output_1 = shared_layer(input_1) + wrapper_layer = WrapperLayer(shared_layer) + output_2 = wrapper_layer(input_2) + model = keras.Model([input_1, input_2], [output_1, output_2]) + _, new_model, _ = self.roundtrip( + model, custom_objects={"WrapperLayer": WrapperLayer} + ) + + self.assertIs(model.layers[2], model.layers[3].layer) + self.assertIs(new_model.layers[2], new_model.layers[3].layer) + + def test_functional_subclass(self): + class PlainFunctionalSubclass(keras.Model): + pass + + inputs = keras.Input((2,)) + outputs = keras.layers.Dense(1)(inputs) + model = PlainFunctionalSubclass(inputs, outputs) + x = tf.random.normal((2, 2)) + y1 = model(x) + _, new_model, _ = self.roundtrip( + model, + custom_objects={"PlainFunctionalSubclass": PlainFunctionalSubclass}, + ) + new_model.set_weights(model.get_weights()) + y2 = new_model(x) + self.assertAllClose(y1, y2, atol=1e-5) + self.assertIsInstance(new_model, PlainFunctionalSubclass) + + class FunctionalSubclassWCustomInit(keras.Model): + def __init__(self, num_units=1, **kwargs): + inputs = keras.Input((2,)) + outputs = keras.layers.Dense(num_units)(inputs) + super().__init__(inputs, outputs) + + model = FunctionalSubclassWCustomInit(num_units=2) + x = tf.random.normal((2, 2)) + y1 = model(x) + _, new_model, _ = self.roundtrip( + model, + custom_objects={ + "FunctionalSubclassWCustomInit": FunctionalSubclassWCustomInit + }, + ) + new_model.set_weights(model.get_weights()) + y2 = new_model(x) + self.assertAllClose(y1, y2, atol=1e-5) + self.assertIsInstance(new_model, FunctionalSubclassWCustomInit) + + def test_shared_object(self): + class MyLayer(keras.layers.Layer): + def __init__(self, activation, **kwargs): + super().__init__(**kwargs) + if isinstance(activation, dict): + self.activation = ( + serialization_lib.deserialize_keras_object(activation) + ) + else: + self.activation = activation + + def call(self, x): + return self.activation(x) + + def get_config(self): + config = super().get_config() + config["activation"] = self.activation + return config + + class SharedActivation: + def __call__(self, x): + return x**2 + + def get_config(self): + return {} + + @classmethod + def from_config(cls, config): + return cls() + + shared_act = SharedActivation() + layer_1 = MyLayer(activation=shared_act) + layer_2 = MyLayer(activation=shared_act) + layers = [layer_1, layer_2] + + with serialization_lib.ObjectSharingScope(): + serialized, new_layers, reserialized = self.roundtrip( + layers, + custom_objects={ + "MyLayer": MyLayer, + "SharedActivation": SharedActivation, + }, + ) + self.assertIn("shared_object_id", serialized[0]["config"]["activation"]) + obj_id = serialized[0]["config"]["activation"] + self.assertIn("shared_object_id", serialized[1]["config"]["activation"]) + self.assertEqual(obj_id, serialized[1]["config"]["activation"]) + self.assertIs(layers[0].activation, layers[1].activation) + self.assertIs(new_layers[0].activation, new_layers[1].activation) + + def test_legacy_internal_object(self): + from keras.layers.rnn.legacy_cells import ( + LSTMCell, # pylint: disable=C6204 + ) + + # tf.nn.rnn_cell.LSTMCell belongs to keras.__internal__.legacy namespace + cell = LSTMCell(32) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer="rmsprop", loss="mse") + + x_in = np.random.random((3, 5, 5)) + y_out_1 = model.predict(x_in) + weights = model.get_weights() + + # serialize and deserialize + config = serialization_lib.serialize_keras_object(layer) + layer = serialization_lib.deserialize_keras_object( + config, + custom_objects={"LSTMCell": LSTMCell}, + ) + + # Restore RNN cell into model with weights + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_out_2 = model.predict(x_in) + + self.assertAllClose(y_out_1, y_out_2, atol=1e-5) + + +@keras.utils.register_keras_serializable() +class MyDense(keras.layers.Layer): + def __init__( + self, + units, + *, + kernel_regularizer=None, + kernel_initializer=None, + **kwargs + ): + super().__init__(**kwargs) + self._units = units + self._kernel_regularizer = kernel_regularizer + self._kernel_initializer = kernel_initializer + + def get_config(self): + return dict( + units=self._units, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + **super().get_config() + ) + + def build(self, input_shape): + unused_batch_size, input_units = input_shape + self._kernel = self.add_weight( + "kernel", + [input_units, self._units], + dtype=tf.float32, + regularizer=self._kernel_regularizer, + initializer=self._kernel_initializer, + ) + + def call(self, inputs): + return tf.matmul(inputs, self._kernel) + + +@keras.utils.register_keras_serializable() +class MyWrapper(keras.layers.Layer): + def __init__(self, wrapped, **kwargs): + super().__init__(**kwargs) + self._wrapped = wrapped + + def get_config(self): + return dict(wrapped=self._wrapped, **super().get_config()) + + @classmethod + def from_config(cls, config): + config["wrapped"] = keras.utils.deserialize_keras_object( + config["wrapped"] + ) + return cls(**config) + + def call(self, inputs): + return self._wrapped(inputs) + + +@test_utils.run_v2_only +class JsonSerializationTest(tf.test.TestCase, parameterized.TestCase): + def test_serialize_deserialize_custom_layer_json(self): + reg = keras.regularizers.L2(0.101) + ini = keras.initializers.Constant(1.0) + dense = MyDense(4, kernel_regularizer=reg, kernel_initializer=ini) + inputs = keras.layers.Input(shape=[3]) + outputs = dense(inputs) + model = keras.Model(inputs, outputs) + + model_json = model.to_json() + model2 = keras.models.model_from_json(model_json) + + self.assertEqual(model_json, model2.to_json()) + + def test_serialize_deserialize_custom_layer_with_wrapper_json(self): + reg = keras.regularizers.L2(0.101) + ini = keras.initializers.Constant(1.0) + dense = MyDense(4, kernel_regularizer=reg, kernel_initializer=ini) + wrapper = MyWrapper(dense) + inputs = keras.layers.Input(shape=[3]) + outputs = wrapper(inputs) + model = keras.Model(inputs, outputs) + + model_json = model.to_json() + model2 = keras.models.model_from_json(model_json) + + self.assertEqual(model_json, model2.to_json()) + + +@test_utils.run_v2_only +class BackwardsCompatibilityTest(tf.test.TestCase, parameterized.TestCase): + def assert_old_format_can_be_deserialized(self, obj, custom_objects=None): + old_config = legacy_serialization.serialize_keras_object(obj) + revived = serialization_lib.deserialize_keras_object( + old_config, custom_objects=custom_objects + ) + new_config_1 = serialization_lib.serialize_keras_object(obj) + new_config_2 = serialization_lib.serialize_keras_object(revived) + self.assertEqual(new_config_1, new_config_2) + + def test_backwards_compatibility_with_old_serialized_format(self): + optimizer = keras.optimizers.Adam(learning_rate=0.1) + self.assert_old_format_can_be_deserialized( + optimizer, custom_objects=vars(keras.optimizers) + ) + activation = keras.activations.relu + self.assert_old_format_can_be_deserialized( + activation, custom_objects=vars(keras.activations) + ) + initializer = keras.initializers.VarianceScaling(scale=2.0) + self.assert_old_format_can_be_deserialized( + initializer, custom_objects=vars(keras.initializers) + ) + regularizer = keras.regularizers.L2(0.3) + self.assert_old_format_can_be_deserialized( + regularizer, custom_objects=vars(keras.regularizers) + ) + constraint = keras.constraints.UnitNorm() + self.assert_old_format_can_be_deserialized( + constraint, custom_objects=vars(keras.constraints) + ) + layer = keras.layers.Dense(2) + self.assert_old_format_can_be_deserialized( + layer, custom_objects=vars(keras.layers) + ) + layer = keras.layers.MultiHeadAttention(2, 4) + self.assert_old_format_can_be_deserialized( + layer, custom_objects=vars(keras.layers) + ) + + # Custom objects + layer = CustomLayer(2) + self.assert_old_format_can_be_deserialized( + layer, custom_objects={"CustomLayer": CustomLayer} + ) + layer = keras.layers.Dense(1, activation=custom_fn) + self.assert_old_format_can_be_deserialized( + layer, custom_objects={**vars(keras.layers), "custom_fn": custom_fn} + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/saving/utils_v1/BUILD b/keras/saving/utils_v1/BUILD deleted file mode 100644 index 3af65e18274d..000000000000 --- a/keras/saving/utils_v1/BUILD +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Description: -# Keras saving and loading libraries. - -# buildifier: disable=same-origin-load - -package( - default_visibility = [ - "//keras:friends", - ], - licenses = ["notice"], -) - -py_library( - name = "utils_v1", - srcs = [ - "__init__.py", - "export_output.py", - "export_utils.py", - "mode_keys.py", - "signature_def_utils.py", - "unexported_constants.py", - ], - srcs_version = "PY3", - deps = [ - "//:expect_tensorflow_installed", - ], -) diff --git a/keras/saving/utils_v1/__init__.py b/keras/saving/utils_v1/__init__.py deleted file mode 100644 index 12a1cafa1c0d..000000000000 --- a/keras/saving/utils_v1/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# LINT.IfChange -"""Utils for saving a Keras Model or Estimator to the SavedModel format.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=wildcard-import -from keras.saving.utils_v1.export_output import * -from keras.saving.utils_v1.export_utils import build_all_signature_defs -from keras.saving.utils_v1.export_utils import export_outputs_for_mode -from keras.saving.utils_v1.export_utils import EXPORT_TAG_MAP -from keras.saving.utils_v1.export_utils import get_export_outputs -from keras.saving.utils_v1.export_utils import get_temp_export_dir -from keras.saving.utils_v1.export_utils import get_timestamped_export_dir -from keras.saving.utils_v1.export_utils import SIGNATURE_KEY_MAP -# pylint: enable=wildcard-import -# LINT.ThenChange(//tensorflow/python/saved_model/model_utils/__init__.py) diff --git a/keras/saving/utils_v1/export_output.py b/keras/saving/utils_v1/export_output.py deleted file mode 100644 index efcf20ef11e6..000000000000 --- a/keras/saving/utils_v1/export_output.py +++ /dev/null @@ -1,421 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# LINT.IfChange -"""Classes for different types of export output.""" - -import tensorflow.compat.v2 as tf - -import abc -from keras.saving.utils_v1 import signature_def_utils as unexported_signature_utils - - -class ExportOutput: - """Represents an output of a model that can be served. - - These typically correspond to model heads. - """ - - __metaclass__ = abc.ABCMeta - - _SEPARATOR_CHAR = '/' - - @abc.abstractmethod - def as_signature_def(self, receiver_tensors): - """Generate a SignatureDef proto for inclusion in a MetaGraphDef. - - The SignatureDef will specify outputs as described in this ExportOutput, - and will use the provided receiver_tensors as inputs. - - Args: - receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying - input nodes that will be fed. - """ - pass - - def _check_output_key(self, key, error_label): - # For multi-head models, the key can be a tuple. - if isinstance(key, tuple): - key = self._SEPARATOR_CHAR.join(key) - - if not isinstance(key, str): - raise ValueError( - '{} output key must be a string; got {}.'.format(error_label, key)) - return key - - def _wrap_and_check_outputs( - self, outputs, single_output_default_name, error_label=None): - """Wraps raw tensors as dicts and checks type. - - Note that we create a new dict here so that we can overwrite the keys - if necessary. - - Args: - outputs: A `Tensor` or a dict of string to `Tensor`. - single_output_default_name: A string key for use in the output dict - if the provided `outputs` is a raw tensor. - error_label: descriptive string for use in error messages. If none, - single_output_default_name will be used. - - Returns: - A dict of tensors - - Raises: - ValueError: if the outputs dict keys are not strings or tuples of strings - or the values are not Tensors. - """ - if not isinstance(outputs, dict): - outputs = {single_output_default_name: outputs} - - output_dict = {} - for key, value in outputs.items(): - error_name = error_label or single_output_default_name - key = self._check_output_key(key, error_name) - if not isinstance(value, tf.Tensor): - raise ValueError( - '{} output value must be a Tensor; got {}.'.format( - error_name, value)) - - output_dict[key] = value - return output_dict - - -class ClassificationOutput(ExportOutput): - """Represents the output of a classification head. - - Either classes or scores or both must be set. - - The classes `Tensor` must provide string labels, not integer class IDs. - - If only classes is set, it is interpreted as providing top-k results in - descending order. - - If only scores is set, it is interpreted as providing a score for every class - in order of class ID. - - If both classes and scores are set, they are interpreted as zipped, so each - score corresponds to the class at the same index. Clients should not depend - on the order of the entries. - """ - - def __init__(self, scores=None, classes=None): - """Constructor for `ClassificationOutput`. - - Args: - scores: A float `Tensor` giving scores (sometimes but not always - interpretable as probabilities) for each class. May be `None`, but - only if `classes` is set. Interpretation varies-- see class doc. - classes: A string `Tensor` giving predicted class labels. May be `None`, - but only if `scores` is set. Interpretation varies-- see class doc. - - Raises: - ValueError: if neither classes nor scores is set, or one of them is not a - `Tensor` with the correct dtype. - """ - if (scores is not None - and not (isinstance(scores, tf.Tensor) - and scores.dtype.is_floating)): - raise ValueError('Classification scores must be a float32 Tensor; ' - 'got {}'.format(scores)) - if (classes is not None - and not (isinstance(classes, tf.Tensor) - and tf.as_dtype(classes.dtype) == tf.string)): - raise ValueError('Classification classes must be a string Tensor; ' - 'got {}'.format(classes)) - if scores is None and classes is None: - raise ValueError('Cannot create a ClassificationOutput with empty ' - 'arguments. At least one of `scores` and `classes` ' - 'must be defined.') - self._scores = scores - self._classes = classes - - @property - def scores(self): - return self._scores - - @property - def classes(self): - return self._classes - - def as_signature_def(self, receiver_tensors): - if len(receiver_tensors) != 1: - raise ValueError( - 'Classification signatures can only accept a single tensor input of ' - 'type tf.string. Please check to make sure that you have structured ' - 'the serving_input_receiver_fn so that it creates a single string ' - 'placeholder. If your model function expects multiple inputs, then ' - 'use `tf.io.parse_example()` to parse the string into multiple ' - f'tensors.\n Received: {receiver_tensors}') - (_, examples), = receiver_tensors.items() - if tf.as_dtype(examples.dtype) != tf.string: - raise ValueError( - 'Classification signatures can only accept a single tensor input of ' - 'type tf.string. Please check to make sure that you have structured ' - 'the serving_input_receiver_fn so that it creates a single string ' - 'placeholder. If your model function expects multiple inputs, then ' - 'use `tf.io.parse_example()` to parse the string into multiple ' - f'tensors.\n Received: {receiver_tensors}') - return tf.compat.v1.saved_model.classification_signature_def( - examples, self.classes, self.scores) - - -class RegressionOutput(ExportOutput): - """Represents the output of a regression head.""" - - def __init__(self, value): - """Constructor for `RegressionOutput`. - - Args: - value: a float `Tensor` giving the predicted values. Required. - - Raises: - ValueError: if the value is not a `Tensor` with dtype tf.float32. - """ - if not (isinstance(value, tf.Tensor) and value.dtype.is_floating): - raise ValueError('Regression output value must be a float32 Tensor; ' - 'got {}'.format(value)) - self._value = value - - @property - def value(self): - return self._value - - def as_signature_def(self, receiver_tensors): - if len(receiver_tensors) != 1: - raise ValueError( - 'Regression signatures can only accept a single tensor input of ' - 'type tf.string. Please check to make sure that you have structured ' - 'the serving_input_receiver_fn so that it creates a single string ' - 'placeholder. If your model function expects multiple inputs, then ' - 'use `tf.io.parse_example()` to parse the string into multiple ' - f'tensors.\n Received: {receiver_tensors}') - (_, examples), = receiver_tensors.items() - if tf.as_dtype(examples.dtype) != tf.string: - raise ValueError( - 'Regression signatures can only accept a single tensor input of ' - 'type tf.string. Please check to make sure that you have structured ' - 'the serving_input_receiver_fn so that it creates a single string ' - 'placeholder. If your model function expects multiple inputs, then ' - 'use `tf.io.parse_example()` to parse the string into multiple ' - f'tensors.\n Received: {receiver_tensors}') - return tf.compat.v1.saved_model.regression_signature_def(examples, self.value) - - -class PredictOutput(ExportOutput): - """Represents the output of a generic prediction head. - - A generic prediction need not be either a classification or a regression. - - Named outputs must be provided as a dict from string to `Tensor`, - """ - _SINGLE_OUTPUT_DEFAULT_NAME = 'output' - - def __init__(self, outputs): - """Constructor for PredictOutput. - - Args: - outputs: A `Tensor` or a dict of string to `Tensor` representing the - predictions. - - Raises: - ValueError: if the outputs is not dict, or any of its keys are not - strings, or any of its values are not `Tensor`s. - """ - - self._outputs = self._wrap_and_check_outputs( - outputs, self._SINGLE_OUTPUT_DEFAULT_NAME, error_label='Prediction') - - @property - def outputs(self): - return self._outputs - - def as_signature_def(self, receiver_tensors): - return tf.compat.v1.saved_model.predict_signature_def(receiver_tensors, - self.outputs) - - -class _SupervisedOutput(ExportOutput): - """Represents the output of a supervised training or eval process.""" - __metaclass__ = abc.ABCMeta - - LOSS_NAME = 'loss' - PREDICTIONS_NAME = 'predictions' - METRICS_NAME = 'metrics' - - METRIC_VALUE_SUFFIX = 'value' - METRIC_UPDATE_SUFFIX = 'update_op' - - _loss = None - _predictions = None - _metrics = None - - def __init__(self, loss=None, predictions=None, metrics=None): - """Constructor for SupervisedOutput (ie, Train or Eval output). - - Args: - loss: dict of Tensors or single Tensor representing calculated loss. - predictions: dict of Tensors or single Tensor representing model - predictions. - metrics: Dict of metric results keyed by name. - The values of the dict can be one of the following: - (1) instance of `Metric` class. - (2) (metric_value, update_op) tuples, or a single tuple. - metric_value must be a Tensor, and update_op must be a Tensor or Op. - - Raises: - ValueError: if any of the outputs' dict keys are not strings or tuples of - strings or the values are not Tensors (or Operations in the case of - update_op). - """ - - if loss is not None: - loss_dict = self._wrap_and_check_outputs(loss, self.LOSS_NAME) - self._loss = self._prefix_output_keys(loss_dict, self.LOSS_NAME) - if predictions is not None: - pred_dict = self._wrap_and_check_outputs( - predictions, self.PREDICTIONS_NAME) - self._predictions = self._prefix_output_keys( - pred_dict, self.PREDICTIONS_NAME) - if metrics is not None: - self._metrics = self._wrap_and_check_metrics(metrics) - - def _prefix_output_keys(self, output_dict, output_name): - """Prepend output_name to the output_dict keys if it doesn't exist. - - This produces predictable prefixes for the pre-determined outputs - of SupervisedOutput. - - Args: - output_dict: dict of string to Tensor, assumed valid. - output_name: prefix string to prepend to existing keys. - - Returns: - dict with updated keys and existing values. - """ - - new_outputs = {} - for key, val in output_dict.items(): - key = self._prefix_key(key, output_name) - new_outputs[key] = val - return new_outputs - - def _prefix_key(self, key, output_name): - if key.find(output_name) != 0: - key = output_name + self._SEPARATOR_CHAR + key - return key - - def _wrap_and_check_metrics(self, metrics): - """Handle the saving of metrics. - - Metrics is either a tuple of (value, update_op), or a dict of such tuples. - Here, we separate out the tuples and create a dict with names to tensors. - - Args: - metrics: Dict of metric results keyed by name. - The values of the dict can be one of the following: - (1) instance of `Metric` class. - (2) (metric_value, update_op) tuples, or a single tuple. - metric_value must be a Tensor, and update_op must be a Tensor or Op. - - Returns: - dict of output_names to tensors - - Raises: - ValueError: if the dict key is not a string, or the metric values or ops - are not tensors. - """ - if not isinstance(metrics, dict): - metrics = {self.METRICS_NAME: metrics} - - outputs = {} - for key, value in metrics.items(): - if isinstance(value, tuple): - metric_val, metric_op = value - else: # value is a keras.Metrics object - metric_val = value.result() - assert len(value.updates) == 1 # We expect only one update op. - metric_op = value.updates[0] - key = self._check_output_key(key, self.METRICS_NAME) - key = self._prefix_key(key, self.METRICS_NAME) - - val_name = key + self._SEPARATOR_CHAR + self.METRIC_VALUE_SUFFIX - op_name = key + self._SEPARATOR_CHAR + self.METRIC_UPDATE_SUFFIX - if not isinstance(metric_val, tf.Tensor): - raise ValueError( - '{} output value must be a Tensor; got {}.'.format( - key, metric_val)) - if not (tf.is_tensor(metric_op) or - isinstance(metric_op, tf.Operation)): - raise ValueError( - '{} update_op must be a Tensor or Operation; got {}.'.format( - key, metric_op)) - - # We must wrap any ops (or variables) in a Tensor before export, as the - # SignatureDef proto expects tensors only. See b/109740581 - metric_op_tensor = metric_op - if not isinstance(metric_op, tf.Tensor): - with tf.control_dependencies([metric_op]): - metric_op_tensor = tf.constant([], name='metric_op_wrapper') - - outputs[val_name] = metric_val - outputs[op_name] = metric_op_tensor - - return outputs - - @property - def loss(self): - return self._loss - - @property - def predictions(self): - return self._predictions - - @property - def metrics(self): - return self._metrics - - @abc.abstractmethod - def _get_signature_def_fn(self): - """Returns a function that produces a SignatureDef given desired outputs.""" - pass - - def as_signature_def(self, receiver_tensors): - signature_def_fn = self._get_signature_def_fn() - return signature_def_fn( - receiver_tensors, self.loss, self.predictions, self.metrics) - - -class TrainOutput(_SupervisedOutput): - """Represents the output of a supervised training process. - - This class generates the appropriate signature def for exporting - training output by type-checking and wrapping loss, predictions, and metrics - values. - """ - - def _get_signature_def_fn(self): - return unexported_signature_utils.supervised_train_signature_def - - -class EvalOutput(_SupervisedOutput): - """Represents the output of a supervised eval process. - - This class generates the appropriate signature def for exporting - eval output by type-checking and wrapping loss, predictions, and metrics - values. - """ - - def _get_signature_def_fn(self): - return unexported_signature_utils.supervised_eval_signature_def -# LINT.ThenChange(//tensorflow/python/saved_model/model_utils/export_output.py) diff --git a/keras/saving/utils_v1/export_utils.py b/keras/saving/utils_v1/export_utils.py deleted file mode 100644 index ceb1cf91df93..000000000000 --- a/keras/saving/utils_v1/export_utils.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# LINT.IfChange -"""Utilities for creating SavedModels.""" - -import collections -import os -import time - -from keras.saving.utils_v1 import export_output as export_output_lib -from keras.saving.utils_v1 import mode_keys -from keras.saving.utils_v1 import unexported_constants -from keras.saving.utils_v1.mode_keys import KerasModeKeys as ModeKeys -import tensorflow.compat.v2 as tf - -from tensorflow.python.platform import tf_logging as logging - - -# Mapping of the modes to appropriate MetaGraph tags in the SavedModel. -EXPORT_TAG_MAP = mode_keys.ModeKeyMap(**{ - ModeKeys.PREDICT: [tf.saved_model.SERVING], - ModeKeys.TRAIN: [tf.saved_model.TRAINING], - ModeKeys.TEST: [unexported_constants.EVAL]}) - -# For every exported mode, a SignatureDef map should be created using the -# functions `export_outputs_for_mode` and `build_all_signature_defs`. By -# default, this map will contain a single Signature that defines the input -# tensors and output predictions, losses, and/or metrics (depending on the mode) -# The default keys used in the SignatureDef map are defined below. -SIGNATURE_KEY_MAP = mode_keys.ModeKeyMap(**{ - ModeKeys.PREDICT: tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY, - ModeKeys.TRAIN: unexported_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY, - ModeKeys.TEST: unexported_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY}) - -# Default names used in the SignatureDef input map, which maps strings to -# TensorInfo protos. -SINGLE_FEATURE_DEFAULT_NAME = 'feature' -SINGLE_RECEIVER_DEFAULT_NAME = 'input' -SINGLE_LABEL_DEFAULT_NAME = 'label' - -### Below utilities are specific to SavedModel exports. - - -def build_all_signature_defs(receiver_tensors, - export_outputs, - receiver_tensors_alternatives=None, - serving_only=True): - """Build `SignatureDef`s for all export outputs. - - Args: - receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying - input nodes where this receiver expects to be fed by default. Typically, - this is a single placeholder expecting serialized `tf.Example` protos. - export_outputs: a dict of ExportOutput instances, each of which has - an as_signature_def instance method that will be called to retrieve - the signature_def for all export output tensors. - receiver_tensors_alternatives: a dict of string to additional - groups of receiver tensors, each of which may be a `Tensor` or a dict of - string to `Tensor`. These named receiver tensor alternatives generate - additional serving signatures, which may be used to feed inputs at - different points within the input receiver subgraph. A typical usage is - to allow feeding raw feature `Tensor`s *downstream* of the - tf.io.parse_example() op. Defaults to None. - serving_only: boolean; if true, resulting signature defs will only include - valid serving signatures. If false, all requested signatures will be - returned. - - Returns: - signature_def representing all passed args. - - Raises: - ValueError: if export_outputs is not a dict - """ - if not isinstance(receiver_tensors, dict): - receiver_tensors = {SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors} - if export_outputs is None or not isinstance(export_outputs, dict): - raise ValueError('`export_outputs` must be a dict. Received ' - f'{export_outputs} with type ' - f'{type(export_outputs).__name__}.') - - signature_def_map = {} - excluded_signatures = {} - for output_key, export_output in export_outputs.items(): - signature_name = '{}'.format(output_key or 'None') - try: - signature = export_output.as_signature_def(receiver_tensors) - signature_def_map[signature_name] = signature - except ValueError as e: - excluded_signatures[signature_name] = str(e) - - if receiver_tensors_alternatives: - for receiver_name, receiver_tensors_alt in ( - receiver_tensors_alternatives.items()): - if not isinstance(receiver_tensors_alt, dict): - receiver_tensors_alt = { - SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt - } - for output_key, export_output in export_outputs.items(): - signature_name = '{}:{}'.format(receiver_name or 'None', output_key or - 'None') - try: - signature = export_output.as_signature_def(receiver_tensors_alt) - signature_def_map[signature_name] = signature - except ValueError as e: - excluded_signatures[signature_name] = str(e) - - _log_signature_report(signature_def_map, excluded_signatures) - - # The above calls to export_output_lib.as_signature_def should return only - # valid signatures; if there is a validity problem, they raise a ValueError, - # in which case we exclude that signature from signature_def_map above. - # The is_valid_signature check ensures that the signatures produced are - # valid for serving, and acts as an additional sanity check for export - # signatures produced for serving. We skip this check for training and eval - # signatures, which are not intended for serving. - if serving_only: - signature_def_map = { - k: v - for k, v in signature_def_map.items() - if tf.compat.v1.saved_model.is_valid_signature(v) - } - return signature_def_map - - -_FRIENDLY_METHOD_NAMES = { - tf.saved_model.CLASSIFY_METHOD_NAME: 'Classify', - tf.saved_model.REGRESS_METHOD_NAME: 'Regress', - tf.saved_model.PREDICT_METHOD_NAME: 'Predict', - unexported_constants.SUPERVISED_TRAIN_METHOD_NAME: 'Train', - unexported_constants.SUPERVISED_EVAL_METHOD_NAME: 'Eval', -} - - -def _log_signature_report(signature_def_map, excluded_signatures): - """Log a report of which signatures were produced.""" - sig_names_by_method_name = collections.defaultdict(list) - - # We'll collect whatever method_names are present, but also we want to make - # sure to output a line for each of the three standard methods even if they - # have no signatures. - for method_name in _FRIENDLY_METHOD_NAMES: - sig_names_by_method_name[method_name] = [] - - for signature_name, sig in signature_def_map.items(): - sig_names_by_method_name[sig.method_name].append(signature_name) - - # TODO(b/67733540): consider printing the full signatures, not just names - for method_name, sig_names in sig_names_by_method_name.items(): - if method_name in _FRIENDLY_METHOD_NAMES: - method_name = _FRIENDLY_METHOD_NAMES[method_name] - logging.info('Signatures INCLUDED in export for {}: {}'.format( - method_name, sig_names if sig_names else 'None')) - - if excluded_signatures: - logging.info('Signatures EXCLUDED from export because they cannot be ' - 'be served via TensorFlow Serving APIs:') - for signature_name, message in excluded_signatures.items(): - logging.info('\'{}\' : {}'.format(signature_name, message)) - - if not signature_def_map: - logging.warning('Export includes no signatures!') - elif (tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY not in - signature_def_map): - logging.warning('Export includes no default signature!') - - -# When we create a timestamped directory, there is a small chance that the -# directory already exists because another process is also creating these -# directories. In this case we just wait one second to get a new timestamp and -# try again. If this fails several times in a row, then something is seriously -# wrong. -MAX_DIRECTORY_CREATION_ATTEMPTS = 10 - - -def get_timestamped_export_dir(export_dir_base): - """Builds a path to a new subdirectory within the base directory. - - Each export is written into a new subdirectory named using the - current time. This guarantees monotonically increasing version - numbers even across multiple runs of the pipeline. - The timestamp used is the number of seconds since epoch UTC. - - Args: - export_dir_base: A string containing a directory to write the exported - graph and checkpoints. - Returns: - The full path of the new subdirectory (which is not actually created yet). - - Raises: - RuntimeError: if repeated attempts fail to obtain a unique timestamped - directory name. - """ - attempts = 0 - while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS: - timestamp = int(time.time()) - - result_dir = tf.io.gfile.join( - tf.compat.as_bytes(export_dir_base), tf.compat.as_bytes(str(timestamp))) - if not tf.compat.v1.gfile.Exists(result_dir): - # Collisions are still possible (though extremely unlikely): this - # directory is not actually created yet, but it will be almost - # instantly on return from this function. - return result_dir - time.sleep(1) - attempts += 1 - logging.warning( - 'Directory {} already exists; retrying (attempt {}/{})'.format( - tf.compat.as_str(result_dir), attempts, - MAX_DIRECTORY_CREATION_ATTEMPTS)) - raise RuntimeError('Failed to obtain a unique export directory name after ' - f'{MAX_DIRECTORY_CREATION_ATTEMPTS} attempts.') - - -def get_temp_export_dir(timestamped_export_dir): - """Builds a directory name based on the argument but starting with 'temp-'. - - This relies on the fact that TensorFlow Serving ignores subdirectories of - the base directory that can't be parsed as integers. - - Args: - timestamped_export_dir: the name of the eventual export directory, e.g. - /foo/bar/ - - Returns: - A sister directory prefixed with 'temp-', e.g. /foo/bar/temp-. - """ - (dirname, basename) = os.path.split(timestamped_export_dir) - if isinstance(basename, bytes): - str_name = basename.decode('utf-8') - else: - str_name = str(basename) - temp_export_dir = tf.io.gfile.join( - tf.compat.as_bytes(dirname), - tf.compat.as_bytes('temp-{}'.format(str_name))) - return temp_export_dir - - -def export_outputs_for_mode( - mode, serving_export_outputs=None, predictions=None, loss=None, - metrics=None): - """Util function for constructing a `ExportOutput` dict given a mode. - - The returned dict can be directly passed to `build_all_signature_defs` helper - function as the `export_outputs` argument, used for generating a SignatureDef - map. - - Args: - mode: A `ModeKeys` specifying the mode. - serving_export_outputs: Describes the output signatures to be exported to - `SavedModel` and used during serving. Should be a dict or None. - predictions: A dict of Tensors or single Tensor representing model - predictions. This argument is only used if serving_export_outputs is not - set. - loss: A dict of Tensors or single Tensor representing calculated loss. - metrics: A dict of (metric_value, update_op) tuples, or a single tuple. - metric_value must be a Tensor, and update_op must be a Tensor or Op - - Returns: - Dictionary mapping the a key to an `tf.estimator.export.ExportOutput` object - The key is the expected SignatureDef key for the mode. - - Raises: - ValueError: if an appropriate ExportOutput cannot be found for the mode. - """ - if mode not in SIGNATURE_KEY_MAP: - raise ValueError( - f'Export output type not found for `mode`: {mode}. Expected one of: ' - f'{list(SIGNATURE_KEY_MAP.keys())}.\n' - 'One likely error is that V1 Estimator Modekeys were somehow passed to ' - 'this function. Please ensure that you are using the new ModeKeys.') - signature_key = SIGNATURE_KEY_MAP[mode] - if mode_keys.is_predict(mode): - return get_export_outputs(serving_export_outputs, predictions) - elif mode_keys.is_train(mode): - return {signature_key: export_output_lib.TrainOutput( - loss=loss, predictions=predictions, metrics=metrics)} - else: - return {signature_key: export_output_lib.EvalOutput( - loss=loss, predictions=predictions, metrics=metrics)} - - -def get_export_outputs(export_outputs, predictions): - """Validate export_outputs or create default export_outputs. - - Args: - export_outputs: Describes the output signatures to be exported to - `SavedModel` and used during serving. Should be a dict or None. - predictions: Predictions `Tensor` or dict of `Tensor`. - - Returns: - Valid export_outputs dict - - Raises: - TypeError: if export_outputs is not a dict or its values are not - ExportOutput instances. - """ - if export_outputs is None: - default_output = export_output_lib.PredictOutput(predictions) - export_outputs = { - tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: default_output} - - if not isinstance(export_outputs, dict): - raise TypeError( - f'`export_outputs` must be dict, received: {export_outputs}.') - for v in export_outputs.values(): - if not isinstance(v, export_output_lib.ExportOutput): - raise TypeError( - 'Values in `export_outputs` must be ExportOutput objects, ' - f'received: {export_outputs}.') - - _maybe_add_default_serving_output(export_outputs) - - return export_outputs - - -def _maybe_add_default_serving_output(export_outputs): - """Add a default serving output to the export_outputs if not present. - - Args: - export_outputs: Describes the output signatures to be exported to - `SavedModel` and used during serving. Should be a dict. - - Returns: - export_outputs dict with default serving signature added if necessary - - Raises: - ValueError: if multiple export_outputs were provided without a default - serving key. - """ - if len(export_outputs) == 1: - (key, value), = export_outputs.items() - if key != tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - export_outputs[ - tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = value - if len(export_outputs) > 1: - if (tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY - not in export_outputs): - raise ValueError( - 'Multiple `export_outputs` were provided, but none of them are ' - 'specified as the default. Use' - '`tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY` to ' - 'specify a default.') - - return export_outputs -# LINT.ThenChange(//tensorflow/python/saved_model/model_utils/export_utils.py) diff --git a/keras/saving/utils_v1/mode_keys.py b/keras/saving/utils_v1/mode_keys.py deleted file mode 100644 index d777cc562962..000000000000 --- a/keras/saving/utils_v1/mode_keys.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# LINT.IfChange -"""Utils for managing different mode strings used by Keras and Estimator models. -""" - -import collections - - -class KerasModeKeys: - """Standard names for model modes. - - The following standard keys are defined: - - * `TRAIN`: training/fitting mode. - * `TEST`: testing/evaluation mode. - * `PREDICT`: prediction/inference mode. - """ - - TRAIN = 'train' - TEST = 'test' - PREDICT = 'predict' - - -# TODO(kathywu): Remove copy in Estimator after nightlies -class EstimatorModeKeys: - """Standard names for Estimator model modes. - - The following standard keys are defined: - - * `TRAIN`: training/fitting mode. - * `EVAL`: testing/evaluation mode. - * `PREDICT`: predication/inference mode. - """ - - TRAIN = 'train' - EVAL = 'eval' - PREDICT = 'infer' - - -def is_predict(mode): - return mode in [KerasModeKeys.PREDICT, EstimatorModeKeys.PREDICT] - - -def is_eval(mode): - return mode in [KerasModeKeys.TEST, EstimatorModeKeys.EVAL] - - -def is_train(mode): - return mode in [KerasModeKeys.TRAIN, EstimatorModeKeys.TRAIN] - - -class ModeKeyMap(collections.abc.Mapping): - """Map using ModeKeys as keys. - - This class creates an immutable mapping from modes to values. For example, - SavedModel export of Keras and Estimator models use this to map modes to their - corresponding MetaGraph tags/SignatureDef keys. - - Since this class uses modes, rather than strings, as keys, both "predict" - (Keras's PREDICT ModeKey) and "infer" (Estimator's PREDICT ModeKey) map to the - same value. - """ - - def __init__(self, **kwargs): - self._internal_dict = {} - self._keys = [] - for key in kwargs: - self._keys.append(key) - dict_key = self._get_internal_key(key) - if dict_key in self._internal_dict: - raise ValueError( - 'Error creating ModeKeyMap. Multiple keys/values found for {} mode.' - .format(dict_key)) - self._internal_dict[dict_key] = kwargs[key] - - def _get_internal_key(self, key): - """Return keys used for the internal dictionary.""" - if is_train(key): - return KerasModeKeys.TRAIN - if is_eval(key): - return KerasModeKeys.TEST - if is_predict(key): - return KerasModeKeys.PREDICT - raise ValueError('Invalid mode key: {}.'.format(key)) - - def __getitem__(self, key): - return self._internal_dict[self._get_internal_key(key)] - - def __iter__(self): - return iter(self._keys) - - def __len__(self): - return len(self._keys) -# LINT.ThenChange(//tensorflow/python/saved_model/model_utils/mode_keys.py) diff --git a/keras/saving/utils_v1/signature_def_utils.py b/keras/saving/utils_v1/signature_def_utils.py deleted file mode 100644 index b91d2097b76b..000000000000 --- a/keras/saving/utils_v1/signature_def_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SignatureDef utility functions implementation.""" - -import tensorflow.compat.v2 as tf - -from keras.saving.utils_v1 import unexported_constants - - -# LINT.IfChange -def supervised_train_signature_def( - inputs, loss, predictions=None, metrics=None): - return _supervised_signature_def( - unexported_constants.SUPERVISED_TRAIN_METHOD_NAME, inputs, loss=loss, - predictions=predictions, metrics=metrics) - - -def supervised_eval_signature_def( - inputs, loss, predictions=None, metrics=None): - return _supervised_signature_def( - unexported_constants.SUPERVISED_EVAL_METHOD_NAME, inputs, loss=loss, - predictions=predictions, metrics=metrics) - - -def _supervised_signature_def( - method_name, inputs, loss=None, predictions=None, - metrics=None): - """Creates a signature for training and eval data. - - This function produces signatures that describe the inputs and outputs - of a supervised process, such as training or evaluation, that - results in loss, metrics, and the like. Note that this function only requires - inputs to be not None. - - Args: - method_name: Method name of the SignatureDef as a string. - inputs: dict of string to `Tensor`. - loss: dict of string to `Tensor` representing computed loss. - predictions: dict of string to `Tensor` representing the output predictions. - metrics: dict of string to `Tensor` representing metric ops. - - Returns: - A train- or eval-flavored signature_def. - - Raises: - ValueError: If inputs or outputs is `None`. - """ - if inputs is None or not inputs: - raise ValueError(f'{method_name} `inputs` cannot be None or empty.') - - signature_inputs = {key: tf.compat.v1.saved_model.build_tensor_info(tensor) - for key, tensor in inputs.items()} - - signature_outputs = {} - for output_set in (loss, predictions, metrics): - if output_set is not None: - sig_out = {key: tf.compat.v1.saved_model.build_tensor_info(tensor) - for key, tensor in output_set.items()} - signature_outputs.update(sig_out) - - signature_def = tf.compat.v1.saved_model.build_signature_def( - signature_inputs, signature_outputs, method_name) - - return signature_def -# LINT.ThenChange(//keras/saving/utils_v1/signature_def_utils.py) diff --git a/keras/saving/utils_v1/unexported_constants.py b/keras/saving/utils_v1/unexported_constants.py deleted file mode 100644 index 9936f095df88..000000000000 --- a/keras/saving/utils_v1/unexported_constants.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Signature constants for SavedModel save and restore operations. - -These are the private constants that have not been exported. -""" - -# LINT.IfChange -DEFAULT_TRAIN_SIGNATURE_DEF_KEY = "train" - -DEFAULT_EVAL_SIGNATURE_DEF_KEY = "eval" - -SUPERVISED_TRAIN_METHOD_NAME = "tensorflow/supervised/training" - -SUPERVISED_EVAL_METHOD_NAME = "tensorflow/supervised/eval" -# LINT.ThenChange(//tensorflow/python/saved_model/signature_constants.py) - -# LINT.IfChange -EVAL = "eval" -# LINT.ThenChange(//tensorflow/python/saved_model/tag_constants.py) diff --git a/keras/testing_infra/BUILD b/keras/testing_infra/BUILD index 8f5f1f29eab2..caee29ae0216 100644 --- a/keras/testing_infra/BUILD +++ b/keras/testing_infra/BUILD @@ -1,15 +1,13 @@ # Description: # Contains the Keras testing infrastructure. +# Placeholder: load unaliased py_library +# Placeholder: load unaliased py_test load("@org_keras//keras:keras.bzl", "tf_py_test") package( - default_visibility = [ - "//keras:friends", - "//third_party/py/language/common/layers:__subpackages__", - "//third_party/py/tensorflow_probability:__subpackages__", - "//third_party/tensorflow_text:__subpackages__", - ], + # copybara:uncomment default_applicable_licenses = ["//keras:license"], + default_visibility = ["//keras:friends"], licenses = ["notice"], ) @@ -40,7 +38,7 @@ py_library( "//keras/engine:base_layer_utils", "//keras/layers", "//keras/models", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/utils:tf_contextlib", "//keras/utils:tf_inspect", ], diff --git a/keras/testing_infra/keras_doctest_lib.py b/keras/testing_infra/keras_doctest_lib.py index 0aaa67d039f8..101eb2394854 100644 --- a/keras/testing_infra/keras_doctest_lib.py +++ b/keras/testing_infra/keras_doctest_lib.py @@ -22,21 +22,21 @@ class _FloatExtractor(object): - """Class for extracting floats from a string. + """Class for extracting floats from a string. - For example: + For example: - >>> text_parts, floats = _FloatExtractor()("Text 1.0 Text") - >>> text_parts - ['Text ', ' Text'] - >>> floats - array([1.]) - """ + >>> text_parts, floats = _FloatExtractor()("Text 1.0 Text") + >>> text_parts + ['Text ', ' Text'] + >>> floats + array([1.]) + """ - # Note: non-capturing groups "(?" are not returned in matched groups, or by - # re.split. - _FLOAT_RE = re.compile( - r""" + # Note: non-capturing groups "(?" are not returned in matched groups, or by + # re.split. + _FLOAT_RE = re.compile( + r""" ( # Captures the float value. (?: [-+]| # Start with a sign is okay anywhere. @@ -58,154 +58,166 @@ class _FloatExtractor(object): [^\w.] # * Next char is not a word char or "." ) """.format( - # Digits, a "." and optional more digits: "1.1". - digits_dot_maybe_digits=r'(?:[0-9]+\.(?:[0-9]*))', - # A "." with trailing digits ".23" - dot_digits=r'(?:\.[0-9]+)', - # digits: "12" - digits=r'(?:[0-9]+)', - # The exponent: An "e" or "E", optional sign, and at least one digit. - # "e-123", "E+12", "e12" - exponent=r'(?:[eE][-+]?[0-9]+)'), - re.VERBOSE) - - def __call__(self, string): - """Extracts floats from a string. - - >>> text_parts, floats = _FloatExtractor()("Text 1.0 Text") - >>> text_parts - ['Text ', ' Text'] - >>> floats - array([1.]) - - Args: - string: the string to extract floats from. - - Returns: - A (string, array) pair, where `string` has each float replaced by "..." - and `array` is a `float32` `numpy.array` containing the extracted floats. - """ - texts = [] - floats = [] - for i, part in enumerate(self._FLOAT_RE.split(string)): - if i % 2 == 0: - texts.append(part) - else: - floats.append(float(part)) - - return texts, np.array(floats) + # Digits, a "." and optional more digits: "1.1". + digits_dot_maybe_digits=r"(?:[0-9]+\.(?:[0-9]*))", + # A "." with trailing digits ".23" + dot_digits=r"(?:\.[0-9]+)", + # digits: "12" + digits=r"(?:[0-9]+)", + # The exponent: An "e" or "E", optional sign, and at least one + # digit. "e-123", "E+12", "e12" + exponent=r"(?:[eE][-+]?[0-9]+)", + ), + re.VERBOSE, + ) + + def __call__(self, string): + """Extracts floats from a string. + + >>> text_parts, floats = _FloatExtractor()("Text 1.0 Text") + >>> text_parts + ['Text ', ' Text'] + >>> floats + array([1.]) + + Args: + string: the string to extract floats from. + + Returns: + A (string, array) pair, where `string` has each float replaced by + "..." and `array` is a `float32` `numpy.array` containing the + extracted floats. + """ + texts = [] + floats = [] + for i, part in enumerate(self._FLOAT_RE.split(string)): + if i % 2 == 0: + texts.append(part) + else: + floats.append(float(part)) + + return texts, np.array(floats) class KerasDoctestOutputChecker(doctest.OutputChecker, object): - """Customizes how `want` and `got` are compared, see `check_output`.""" + """Customizes how `want` and `got` are compared, see `check_output`.""" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.extract_floats = _FloatExtractor() - self.text_good = None - self.float_size_good = None + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.extract_floats = _FloatExtractor() + self.text_good = None + self.float_size_good = None - _ADDRESS_RE = re.compile(r'\bat 0x[0-9a-f]*?>') - # TODO(yashkatariya): Add other tensor's string substitutions too. - # tf.RaggedTensor doesn't need one. - _NUMPY_OUTPUT_RE = re.compile(r'', re.DOTALL) + _ADDRESS_RE = re.compile(r"\bat 0x[0-9a-f]*?>") + # TODO(yashkatariya): Add other tensor's string substitutions too. + # tf.RaggedTensor doesn't need one. + _NUMPY_OUTPUT_RE = re.compile(r"", re.DOTALL) - def _allclose(self, want, got, rtol=1e-3, atol=1e-3): - return np.allclose(want, got, rtol=rtol, atol=atol) + def _allclose(self, want, got, rtol=1e-3, atol=1e-3): + return np.allclose(want, got, rtol=rtol, atol=atol) - def _tf_tensor_numpy_output(self, string): - modified_string = self._NUMPY_OUTPUT_RE.sub(r'\1', string) - return modified_string, modified_string != string + def _tf_tensor_numpy_output(self, string): + modified_string = self._NUMPY_OUTPUT_RE.sub(r"\1", string) + return modified_string, modified_string != string - MESSAGE = textwrap.dedent("""\n + MESSAGE = textwrap.dedent( + """\n ############################################################# Check the documentation (go/testable-docstrings) on how to write testable docstrings. - #############################################################""") + #############################################################""" + ) - def check_output(self, want, got, optionflags): - """Compares the docstring output to the output gotten by running the code. + def check_output(self, want, got, optionflags): + """Compares the docstring output to the output gotten by running the + code. - Python addresses in the output are replaced with wildcards. + Python addresses in the output are replaced with wildcards. - Float values in the output compared as using `np.allclose`: + Float values in the output compared as using `np.allclose`: - * Float values are extracted from the text and replaced with wildcards. - * The wildcard text is compared to the actual output. - * The float values are compared using `np.allclose`. + * Float values are extracted from the text and replaced with + wildcards. + * The wildcard text is compared to the actual output. + * The float values are compared using `np.allclose`. - The method returns `True` if both the text comparison and the numeric - comparison are successful. + The method returns `True` if both the text comparison and the numeric + comparison are successful. - The numeric comparison will fail if either: + The numeric comparison will fail if either: - * The wrong number of floats are found. - * The float values are not within tolerence. + * The wrong number of floats are found. + * The float values are not within tolerence. - Args: - want: The output in the docstring. - got: The output generated after running the snippet. - optionflags: Flags passed to the doctest. + Args: + want: The output in the docstring. + got: The output generated after running the snippet. + optionflags: Flags passed to the doctest. - Returns: - A bool, indicating if the check was successful or not. - """ + Returns: + A bool, indicating if the check was successful or not. + """ + + # If the docstring's output is empty and there is some output generated + # after running the snippet, return True. This is because if the user + # doesn't want to display output, respect that over what the doctest + # wants. + if got and not want: + return True - # If the docstring's output is empty and there is some output generated - # after running the snippet, return True. This is because if the user - # doesn't want to display output, respect that over what the doctest wants. - if got and not want: - return True - - if want is None: - want = '' - - # Replace python's addresses with ellipsis (`...`) since it can change on - # each execution. - want = self._ADDRESS_RE.sub('at ...>', want) - - # Replace tf.Tensor strings with only their numpy field values. - want, want_changed = self._tf_tensor_numpy_output(want) - if want_changed: - got, _ = self._tf_tensor_numpy_output(got) - - # Separate out the floats, and replace `want` with the wild-card version - # "result=7.0" => "result=..." - want_text_parts, self.want_floats = self.extract_floats(want) - want_text_wild = '...'.join(want_text_parts) - - # Find the floats in the string returned by the test - _, self.got_floats = self.extract_floats(got) - - self.text_good = super().check_output( - want=want_text_wild, got=got, optionflags=optionflags) - if not self.text_good: - return False - - if self.want_floats.size == 0: - # If there are no floats in the "want" string, ignore all the floats in - # the result. "np.array([ ... ])" matches "np.array([ 1.0, 2.0 ])" - return True - - self.float_size_good = (self.want_floats.size == self.got_floats.size) - - if self.float_size_good: - return self._allclose(self.want_floats, self.got_floats) - else: - return False - - def output_difference(self, example, got, optionflags): - got = [got] - - # If the some of the float output is hidden with `...`, `float_size_good` - # will be False. This is because the floats extracted from the string is - # converted into a 1-D numpy array. Hence hidding floats is not allowed - # anymore. - if self.text_good: - if not self.float_size_good: - got.append("\n\nCAUTION: tf_doctest doesn't work if *some* of the " - "*float output* is hidden with a \"...\".") - - got.append(self.MESSAGE) - got = '\n'.join(got) - return super().output_difference(example, got, optionflags) + if want is None: + want = "" + + # Replace python's addresses with ellipsis (`...`) since it can change + # on each execution. + want = self._ADDRESS_RE.sub("at ...>", want) + + # Replace tf.Tensor strings with only their numpy field values. + want, want_changed = self._tf_tensor_numpy_output(want) + if want_changed: + got, _ = self._tf_tensor_numpy_output(got) + + # Separate out the floats, and replace `want` with the wild-card version + # "result=7.0" => "result=..." + want_text_parts, self.want_floats = self.extract_floats(want) + want_text_wild = "...".join(want_text_parts) + + # Find the floats in the string returned by the test + _, self.got_floats = self.extract_floats(got) + + self.text_good = super().check_output( + want=want_text_wild, got=got, optionflags=optionflags + ) + if not self.text_good: + return False + + if self.want_floats.size == 0: + # If there are no floats in the "want" string, ignore all the floats + # in the result. "np.array([ ... ])" matches "np.array([ 1.0, 2.0 + # ])" + return True + + self.float_size_good = self.want_floats.size == self.got_floats.size + + if self.float_size_good: + return self._allclose(self.want_floats, self.got_floats) + else: + return False + + def output_difference(self, example, got, optionflags): + got = [got] + + # If the some of the float output is hidden with `...`, + # `float_size_good` will be False. This is because the floats extracted + # from the string is converted into a 1-D numpy array. Hence hidding + # floats is not allowed anymore. + if self.text_good: + if not self.float_size_good: + got.append( + "\n\nCAUTION: tf_doctest doesn't work if *some* of the " + '*float output* is hidden with a "...".' + ) + + got.append(self.MESSAGE) + got = "\n".join(got) + return super().output_difference(example, got, optionflags) diff --git a/keras/testing_infra/keras_doctest_lib_test.py b/keras/testing_infra/keras_doctest_lib_test.py index ede34e3deebc..c31f8f05fe15 100644 --- a/keras/testing_infra/keras_doctest_lib_test.py +++ b/keras/testing_infra/keras_doctest_lib_test.py @@ -16,188 +16,210 @@ import doctest +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras.testing_infra import keras_doctest_lib -import tensorflow.compat.v2 as tf class KerasDoctestOutputCheckerTest(parameterized.TestCase): + @parameterized.parameters( + # Don't match ints. + ["result = 1", []], + # Match floats. + ["0.0", [0.0]], + ["text 1.0 text", [1.0]], + ["text 1. text", [1.0]], + ["text .1 text", [0.1]], + ["text 1e3 text", [1000.0]], + ["text 1.e3 text", [1000.0]], + ["text +1. text", [1.0]], + ["text -1. text", [-1.0]], + ["text 1e+3 text", [1000.0]], + ["text 1e-3 text", [0.001]], + ["text +1E3 text", [1000.0]], + ["text -1E3 text", [-1000.0]], + ["text +1e-3 text", [0.001]], + ["text -1e+3 text", [-1000.0]], + # Match at the start and end of a string. + [".1", [0.1]], + [".1 text", [0.1]], + ["text .1", [0.1]], + ["0.1 text", [0.1]], + ["text 0.1", [0.1]], + ["0. text", [0.0]], + ["text 0.", [0.0]], + ["1e-1 text", [0.1]], + ["text 1e-1", [0.1]], + # Don't match floats mixed into text + ["text1.0 text", []], + ["text 1.0text", []], + ["text1.0text", []], + ["0x12e4", []], # not 12000 + ["TensorBoard: http://128.0.0.1:8888", []], + # With a newline + ["1.0 text\n 2.0 3.0 text", [1.0, 2.0, 3.0]], + # With ints and a float. + ["shape (1,2,3) value -1e9", [-1e9]], + # "." after a float. + ["No floats at end of sentence: 1.0.", []], + ["No floats with ellipsis: 1.0...", []], + # A numpy array + [ + """array([[1., 2., 3.], + [4., 5., 6.]], dtype=float32)""", + [1, 2, 3, 4, 5, 6], + ], + # Match both parts of a complex number + # python style + ["(0.0002+30000j)", [0.0002, 30000]], + ["(2.3e-10-3.34e+9j)", [2.3e-10, -3.34e9]], + # numpy style + ["array([1.27+5.j])", [1.27, 5]], + ["(2.3e-10+3.34e+9j)", [2.3e-10, 3.34e9]], + [ + """array([1.27e-09+5.e+00j, + 2.30e+01-1.e-03j])""", + [1.27e-09, 5.0e00, 2.30e01, -1.0e-03], + ], + # Check examples in tolerence. + ["1e-6", [0]], + ["0.0", [1e-6]], + ["1.000001e9", [1e9]], + ["1e9", [1.000001e9]], + ) + def test_extract_floats(self, text, expected_floats): + extract_floats = keras_doctest_lib._FloatExtractor() + output_checker = keras_doctest_lib.KerasDoctestOutputChecker() + + (text_parts, extracted_floats) = extract_floats(text) + text_with_wildcards = "...".join(text_parts) + + # Check that the lengths match before doing anything else. + try: + self.assertLen(extracted_floats, len(expected_floats)) + except AssertionError as e: + msg = "\n\n expected: {}\n found: {}".format( + expected_floats, extracted_floats + ) + e.args = (e.args[0] + msg,) + raise e + + # The floats should match according to allclose + try: + self.assertTrue( + output_checker._allclose(expected_floats, extracted_floats) + ) + except AssertionError as e: + msg = "\n\nexpected: {}\nfound: {}".format( + expected_floats, extracted_floats + ) + e.args = (e.args[0] + msg,) + raise e + + # The wildcard text should match the input text, according to the + # OutputChecker base class. + try: + self.assertTrue( + doctest.OutputChecker().check_output( + want=text_with_wildcards, + got=text, + optionflags=doctest.ELLIPSIS, + ) + ) + except AssertionError as e: + msg = f"\n\n expected: {text_with_wildcards}\n found: {text}" + e.args = (e.args[0] + msg,) + raise e + + @parameterized.parameters( + # CHeck examples out of tolerence. + ["1.001e-2", [0]], + ["0.0", [1.001e-3]], + ) + def test_fail_tolerences(self, text, expected_floats): + extract_floats = keras_doctest_lib._FloatExtractor() + output_checker = keras_doctest_lib.KerasDoctestOutputChecker() + + (_, extracted_floats) = extract_floats(text) + + # These floats should not match according to allclose + try: + self.assertFalse( + output_checker._allclose(expected_floats, extracted_floats) + ) + except AssertionError as e: + msg = ( + "\n\nThese matched! They should not have.\n" + "\n\n Expected: {}\n found: {}".format( + expected_floats, extracted_floats + ) + ) + e.args = (e.args[0] + msg,) + raise e + + def test_no_floats(self): + want = "text ... text" + got = "text 1.0 1.2 1.9 text" + output_checker = keras_doctest_lib.KerasDoctestOutputChecker() + self.assertTrue( + output_checker.check_output( + want=want, got=got, optionflags=doctest.ELLIPSIS + ) + ) + + @parameterized.parameters( + ["1.0, ..., 1.0", "1.0, 1.0, 1.0"], + ["1.0, 1.0..., 1.0", "1.0, 1.002, 1.0"], + ) + def test_warning_messages(self, want, got): + output_checker = keras_doctest_lib.KerasDoctestOutputChecker() - @parameterized.parameters( - # Don't match ints. - ['result = 1', []], - # Match floats. - ['0.0', [0.]], - ['text 1.0 text', [1.]], - ['text 1. text', [1.]], - ['text .1 text', [.1]], - ['text 1e3 text', [1000.]], - ['text 1.e3 text', [1000.]], - ['text +1. text', [1.]], - ['text -1. text', [-1.]], - ['text 1e+3 text', [1000.]], - ['text 1e-3 text', [0.001]], - ['text +1E3 text', [1000.]], - ['text -1E3 text', [-1000.]], - ['text +1e-3 text', [0.001]], - ['text -1e+3 text', [-1000.]], - # Match at the start and end of a string. - ['.1', [.1]], - ['.1 text', [.1]], - ['text .1', [.1]], - ['0.1 text', [.1]], - ['text 0.1', [.1]], - ['0. text', [0.]], - ['text 0.', [0.]], - ['1e-1 text', [.1]], - ['text 1e-1', [.1]], - # Don't match floats mixed into text - ['text1.0 text', []], - ['text 1.0text', []], - ['text1.0text', []], - ['0x12e4', []], # not 12000 - ['TensorBoard: http://128.0.0.1:8888', []], - # With a newline - ['1.0 text\n 2.0 3.0 text', [1., 2., 3.]], - # With ints and a float. - ['shape (1,2,3) value -1e9', [-1e9]], - # "." after a float. - ['No floats at end of sentence: 1.0.', []], - ['No floats with ellipsis: 1.0...', []], - # A numpy array - [ - """array([[1., 2., 3.], - [4., 5., 6.]], dtype=float32)""", [1, 2, 3, 4, 5, 6] - ], - # Match both parts of a complex number - # python style - ['(0.0002+30000j)', [0.0002, 30000]], - ['(2.3e-10-3.34e+9j)', [2.3e-10, -3.34e+9]], - # numpy style - ['array([1.27+5.j])', [1.27, 5]], - ['(2.3e-10+3.34e+9j)', [2.3e-10, 3.34e+9]], - [ - """array([1.27e-09+5.e+00j, - 2.30e+01-1.e-03j])""", [1.27e-09, 5.e+00, 2.30e+01, -1.e-03] - ], - # Check examples in tolerence. - ['1e-6', [0]], - ['0.0', [1e-6]], - ['1.000001e9', [1e9]], - ['1e9', [1.000001e9]], - ) - def test_extract_floats(self, text, expected_floats): - extract_floats = keras_doctest_lib._FloatExtractor() - output_checker = keras_doctest_lib.KerasDoctestOutputChecker() - - (text_parts, extracted_floats) = extract_floats(text) - text_with_wildcards = '...'.join(text_parts) - - # Check that the lengths match before doing anything else. - try: - self.assertLen(extracted_floats, len(expected_floats)) - except AssertionError as e: - msg = '\n\n expected: {}\n found: {}'.format( - expected_floats, extracted_floats) - e.args = (e.args[0] + msg,) - raise e - - # The floats should match according to allclose - try: - self.assertTrue( - output_checker._allclose(expected_floats, extracted_floats)) - except AssertionError as e: - msg = '\n\nexpected: {}\nfound: {}'.format(expected_floats, - extracted_floats) - e.args = (e.args[0] + msg,) - raise e - - # The wildcard text should match the input text, according to the - # OutputChecker base class. - try: - self.assertTrue(doctest.OutputChecker().check_output( - want=text_with_wildcards, got=text, optionflags=doctest.ELLIPSIS)) - except AssertionError as e: - msg = '\n\n expected: {}\n found: {}'.format( - text_with_wildcards, text) - e.args = (e.args[0] + msg,) - raise e - - @parameterized.parameters( - # CHeck examples out of tolerence. - ['1.001e-2', [0]], - ['0.0', [1.001e-3]], - ) - def test_fail_tolerences(self, text, expected_floats): - extract_floats = keras_doctest_lib._FloatExtractor() - output_checker = keras_doctest_lib.KerasDoctestOutputChecker() - - (_, extracted_floats) = extract_floats(text) - - # These floats should not match according to allclose - try: - self.assertFalse( - output_checker._allclose(expected_floats, extracted_floats)) - except AssertionError as e: - msg = ('\n\nThese matched! They should not have.\n' - '\n\n Expected: {}\n found: {}'.format( - expected_floats, extracted_floats)) - e.args = (e.args[0] + msg,) - raise e - - def test_no_floats(self): - want = 'text ... text' - got = 'text 1.0 1.2 1.9 text' - output_checker = keras_doctest_lib.KerasDoctestOutputChecker() - self.assertTrue( output_checker.check_output( - want=want, got=got, optionflags=doctest.ELLIPSIS)) - - @parameterized.parameters(['1.0, ..., 1.0', '1.0, 1.0, 1.0'], - ['1.0, 1.0..., 1.0', '1.0, 1.002, 1.0']) - def test_warning_messages(self, want, got): - output_checker = keras_doctest_lib.KerasDoctestOutputChecker() - - output_checker.check_output( - want=want, got=got, optionflags=doctest.ELLIPSIS) - - example = doctest.Example('None', want=want) - result = output_checker.output_difference( - example=example, got=got, optionflags=doctest.ELLIPSIS) - self.assertIn("doesn't work if *some* of the", result) - - @parameterized.parameters( - ['<...>', ('<...>', False)], - ['TensorFlow', ('TensorFlow', False)], - [ - 'tf.Variable([[1, 2], [3, 4]])', - ('tf.Variable([[1, 2], [3, 4]])', False) - ], - ['', ('inf', True)], - [ - '', - ('', False) - ], - [ - """", ("<...>", False)], + ["TensorFlow", ("TensorFlow", False)], + [ + "tf.Variable([[1, 2], [3, 4]])", + ("tf.Variable([[1, 2], [3, 4]])", False), + ], + ["", ("inf", True)], + [ + "", + ("", False), + ], + [ + """""", - ('\n array([[2, 2],\n [3, 5]], ' + - 'dtype=int32)', True) - ], - [ - '[, ' + - ']', - ('[array([1, 2], dtype=int32), array([3, 4], dtype=int32)]', True) - ], - ) - def test_tf_tensor_numpy_output(self, string, expected_output): - output_checker = keras_doctest_lib.KerasDoctestOutputChecker() - output = output_checker._tf_tensor_numpy_output(string) - self.assertEqual(expected_output, output) - - -if __name__ == '__main__': - tf.test.main() + ( + "\n array([[2, 2],\n [3, 5]], " + + "dtype=int32)", + True, + ), + ], + [ + "[, " + + "]", + ("[array([1, 2], dtype=int32), array([3, 4], dtype=int32)]", True), + ], + ) + def test_tf_tensor_numpy_output(self, string, expected_output): + output_checker = keras_doctest_lib.KerasDoctestOutputChecker() + output = output_checker._tf_tensor_numpy_output(string) + self.assertEqual(expected_output, output) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/testing_infra/test_combinations.py b/keras/testing_infra/test_combinations.py index 0e9fc2a0689f..2f29e1e3d5fa 100644 --- a/keras/testing_infra/test_combinations.py +++ b/keras/testing_infra/test_combinations.py @@ -13,548 +13,578 @@ # limitations under the License. # ============================================================================== """Utilities for unit-testing Keras.""" -# pylint: disable=g-bad-import-order -import tensorflow.compat.v2 as tf import collections import functools import itertools import unittest +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras from keras.testing_infra import test_utils try: - import h5py # pylint:disable=g-import-not-at-top + import h5py except ImportError: - h5py = None + h5py = None -KERAS_MODEL_TYPES = ['functional', 'subclass', 'sequential'] +KERAS_MODEL_TYPES = ["functional", "subclass", "sequential"] class TestCase(tf.test.TestCase, parameterized.TestCase): + def tearDown(self): + keras.backend.clear_session() + super().tearDown() - def tearDown(self): - keras.backend.clear_session() - super().tearDown() +def run_with_all_saved_model_formats(test_or_class=None, exclude_formats=None): + """Execute the decorated test with all Keras saved model formats). -def run_with_all_saved_model_formats( - test_or_class=None, - exclude_formats=None): - """Execute the decorated test with all Keras saved model formats). + This decorator is intended to be applied either to individual test methods + in a `test_combinations.TestCase` class, or directly to a test class that + extends it. Doing so will cause the contents of the individual test method + (or all test methods in the class) to be executed multiple times - once for + each Keras saved model format. + + The Keras saved model formats include: + 1. HDF5: 'h5' + 2. SavedModel: 'tf' + + Note: if stacking this decorator with absl.testing's parameterized + decorators, those should be at the bottom of the stack. - This decorator is intended to be applied either to individual test methods in - a `test_combinations.TestCase` class, or directly to a test class that - extends it. Doing so will cause the contents of the individual test - method (or all test methods in the class) to be executed multiple times - once - for each Keras saved model format. + Various methods in `testing_utils` to get file path for saved models will + auto-generate a string of the two saved model formats. This allows unittests + to confirm the equivalence between the two Keras saved model formats. - The Keras saved model formats include: - 1. HDF5: 'h5' - 2. SavedModel: 'tf' + For example, consider the following unittest: - Note: if stacking this decorator with absl.testing's parameterized decorators, - those should be at the bottom of the stack. + ```python + class MyTests(test_utils.KerasTestCase): - Various methods in `testing_utils` to get file path for saved models will - auto-generate a string of the two saved model formats. This allows unittests - to confirm the equivalence between the two Keras saved model formats. + @test_utils.run_with_all_saved_model_formats + def test_foo(self): + save_format = test_utils.get_save_format() + saved_model_dir = '/tmp/saved_model/' + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - For example, consider the following unittest: + keras.models.save_model(model, saved_model_dir, save_format=save_format) + model = keras.models.load_model(saved_model_dir) - ```python - class MyTests(test_utils.KerasTestCase): + if __name__ == "__main__": + tf.test.main() + ``` + This test tries to save the model into the formats of 'hdf5', 'h5', 'keras', + 'tensorflow', and 'tf'. + + We can also annotate the whole class if we want this to apply to all tests + in the class: + ```python @test_utils.run_with_all_saved_model_formats - def test_foo(self): - save_format = test_utils.get_save_format() - saved_model_dir = '/tmp/saved_model/' - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = keras.models.load_model(saved_model_dir) - - if __name__ == "__main__": - tf.test.main() - ``` - - This test tries to save the model into the formats of 'hdf5', 'h5', 'keras', - 'tensorflow', and 'tf'. - - We can also annotate the whole class if we want this to apply to all tests in - the class: - ```python - @test_utils.run_with_all_saved_model_formats - class MyTests(test_utils.KerasTestCase): - - def test_foo(self): - save_format = test_utils.get_save_format() - saved_model_dir = '/tmp/saved_model/' - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - keras.models.save_model(model, saved_model_dir, save_format=save_format) - model = tf.keras.models.load_model(saved_model_dir) - - if __name__ == "__main__": - tf.test.main() - ``` - - Args: - test_or_class: test method or class to be annotated. If None, - this method returns a decorator that can be applied to a test method or - test class. If it is not None this returns the decorator applied to the - test or class. - exclude_formats: A collection of Keras saved model formats to not run. - (May also be a single format not wrapped in a collection). - Defaults to None. - - Returns: - Returns a decorator that will run the decorated test method multiple times: - once for each desired Keras saved model format. - - Raises: - ImportError: If abseil parameterized is not installed or not included as - a target dependency. - """ - # Exclude h5 save format if H5py isn't available. - if h5py is None: - exclude_formats.append(['h5']) - saved_model_formats = ['h5', 'tf', 'tf_no_traces'] - params = [('_%s' % saved_format, saved_format) - for saved_format in saved_model_formats - if saved_format not in tf.nest.flatten(exclude_formats)] - - def single_method_decorator(f): - """Decorator that constructs the test cases.""" - # Use named_parameters so it can be individually run from the command line - @parameterized.named_parameters(*params) - @functools.wraps(f) - def decorated(self, saved_format, *args, **kwargs): - """A run of a single test case w/ the specified model type.""" - if saved_format == 'h5': - _test_h5_saved_model_format(f, self, *args, **kwargs) - elif saved_format == 'tf': - _test_tf_saved_model_format(f, self, *args, **kwargs) - elif saved_format == 'tf_no_traces': - _test_tf_saved_model_format_no_traces(f, self, *args, **kwargs) - else: - raise ValueError('Unknown model type: %s' % (saved_format,)) - return decorated - - return _test_or_class_decorator(test_or_class, single_method_decorator) + class MyTests(test_utils.KerasTestCase): + + def test_foo(self): + save_format = test_utils.get_save_format() + saved_model_dir = '/tmp/saved_model/' + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss='mse', optimizer='sgd', metrics=['acc']) + + keras.models.save_model(model, saved_model_dir, save_format=save_format) + model = tf.keras.models.load_model(saved_model_dir) + + if __name__ == "__main__": + tf.test.main() + ``` + + Args: + test_or_class: test method or class to be annotated. If None, + this method returns a decorator that can be applied to a test method or + test class. If it is not None this returns the decorator applied to the + test or class. + exclude_formats: A collection of Keras saved model formats to not run. + (May also be a single format not wrapped in a collection). + Defaults to `None`. + + Returns: + Returns a decorator that will run the decorated test method multiple + times: once for each desired Keras saved model format. + + Raises: + ImportError: If abseil parameterized is not installed or not included as + a target dependency. + """ + # Exclude h5 save format if H5py isn't available. + if h5py is None: + exclude_formats.append(["h5"]) + saved_model_formats = ["h5", "tf", "tf_no_traces"] + params = [ + (f"_{saved_format}", saved_format) + for saved_format in saved_model_formats + if saved_format not in tf.nest.flatten(exclude_formats) + ] + + def single_method_decorator(f): + """Decorator that constructs the test cases.""" + # Use named_parameters so it can be individually run from the command + # line + @parameterized.named_parameters(*params) + @functools.wraps(f) + def decorated(self, saved_format, *args, **kwargs): + """A run of a single test case w/ the specified model type.""" + if saved_format == "h5": + _test_h5_saved_model_format(f, self, *args, **kwargs) + elif saved_format == "tf": + _test_tf_saved_model_format(f, self, *args, **kwargs) + elif saved_format == "tf_no_traces": + _test_tf_saved_model_format_no_traces(f, self, *args, **kwargs) + else: + raise ValueError(f"Unknown model type: {saved_format}") + + return decorated + + return _test_or_class_decorator(test_or_class, single_method_decorator) def _test_h5_saved_model_format(f, test_or_class, *args, **kwargs): - with test_utils.saved_model_format_scope('h5'): - f(test_or_class, *args, **kwargs) + with test_utils.saved_model_format_scope("h5"): + f(test_or_class, *args, **kwargs) def _test_tf_saved_model_format(f, test_or_class, *args, **kwargs): - with test_utils.saved_model_format_scope('tf'): - f(test_or_class, *args, **kwargs) + with test_utils.saved_model_format_scope("tf"): + f(test_or_class, *args, **kwargs) def _test_tf_saved_model_format_no_traces(f, test_or_class, *args, **kwargs): - with test_utils.saved_model_format_scope('tf', save_traces=False): - f(test_or_class, *args, **kwargs) + with test_utils.saved_model_format_scope("tf", save_traces=False): + f(test_or_class, *args, **kwargs) def run_with_all_weight_formats(test_or_class=None, exclude_formats=None): - """Runs all tests with the supported formats for saving weights.""" - exclude_formats = exclude_formats or [] - exclude_formats.append('tf_no_traces') # Only applies to saving models - return run_with_all_saved_model_formats(test_or_class, exclude_formats) + """Runs all tests with the supported formats for saving weights.""" + exclude_formats = exclude_formats or [] + exclude_formats.append("tf_no_traces") # Only applies to saving models + return run_with_all_saved_model_formats(test_or_class, exclude_formats) # TODO(kaftan): Possibly enable 'subclass_custom_build' when tests begin to pass # it. Or perhaps make 'subclass' always use a custom build method. -def run_with_all_model_types( - test_or_class=None, - exclude_models=None): - """Execute the decorated test with all Keras model types. - - This decorator is intended to be applied either to individual test methods in - a `test_combinations.TestCase` class, or directly to a test class that - extends it. Doing so will cause the contents of the individual test - method (or all test methods in the class) to be executed multiple times - once - for each Keras model type. - - The Keras model types are: ['functional', 'subclass', 'sequential'] - - Note: if stacking this decorator with absl.testing's parameterized decorators, - those should be at the bottom of the stack. - - Various methods in `testing_utils` to get models will auto-generate a model - of the currently active Keras model type. This allows unittests to confirm - the equivalence between different Keras models. - - For example, consider the following unittest: - - ```python - class MyTests(test_utils.KerasTestCase): - - @test_utils.run_with_all_model_types( - exclude_models = ['sequential']) - def test_foo(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - optimizer = RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - metrics = ['mae'] - model.compile(optimizer, loss, metrics=metrics) - - inputs = np.zeros((10, 3)) - targets = np.zeros((10, 4)) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - - if __name__ == "__main__": - tf.test.main() - ``` - - This test tries building a small mlp as both a functional model and as a - subclass model. - - We can also annotate the whole class if we want this to apply to all tests in - the class: - ```python - @test_utils.run_with_all_model_types(exclude_models = ['sequential']) - class MyTests(test_utils.KerasTestCase): - - def test_foo(self): - model = test_utils.get_small_mlp(1, 4, input_dim=3) - optimizer = RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - metrics = ['mae'] - model.compile(optimizer, loss, metrics=metrics) - - inputs = np.zeros((10, 3)) - targets = np.zeros((10, 4)) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - - if __name__ == "__main__": - tf.test.main() - ``` - - - Args: - test_or_class: test method or class to be annotated. If None, - this method returns a decorator that can be applied to a test method or - test class. If it is not None this returns the decorator applied to the - test or class. - exclude_models: A collection of Keras model types to not run. - (May also be a single model type not wrapped in a collection). - Defaults to None. - - Returns: - Returns a decorator that will run the decorated test method multiple times: - once for each desired Keras model type. - - Raises: - ImportError: If abseil parameterized is not installed or not included as - a target dependency. - """ - model_types = ['functional', 'subclass', 'sequential'] - params = [('_%s' % model, model) for model in model_types - if model not in tf.nest.flatten(exclude_models)] - - def single_method_decorator(f): - """Decorator that constructs the test cases.""" - # Use named_parameters so it can be individually run from the command line - @parameterized.named_parameters(*params) - @functools.wraps(f) - def decorated(self, model_type, *args, **kwargs): - """A run of a single test case w/ the specified model type.""" - if model_type == 'functional': - _test_functional_model_type(f, self, *args, **kwargs) - elif model_type == 'subclass': - _test_subclass_model_type(f, self, *args, **kwargs) - elif model_type == 'sequential': - _test_sequential_model_type(f, self, *args, **kwargs) - else: - raise ValueError('Unknown model type: %s' % (model_type,)) - return decorated - - return _test_or_class_decorator(test_or_class, single_method_decorator) +def run_with_all_model_types(test_or_class=None, exclude_models=None): + """Execute the decorated test with all Keras model types. + + This decorator is intended to be applied either to individual test methods + in a `test_combinations.TestCase` class, or directly to a test class that + extends it. Doing so will cause the contents of the individual test method + (or all test methods in the class) to be executed multiple times - once for + each Keras model type. + + The Keras model types are: ['functional', 'subclass', 'sequential'] + + Note: if stacking this decorator with absl.testing's parameterized + decorators, those should be at the bottom of the stack. + + Various methods in `testing_utils` to get models will auto-generate a model + of the currently active Keras model type. This allows unittests to confirm + the equivalence between different Keras models. + + For example, consider the following unittest: + + ```python + class MyTests(test_utils.KerasTestCase): + + @test_utils.run_with_all_model_types( + exclude_models = ['sequential']) + def test_foo(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + + if __name__ == "__main__": + tf.test.main() + ``` + + This test tries building a small mlp as both a functional model and as a + subclass model. + + We can also annotate the whole class if we want this to apply to all tests + in the class: + ```python + @test_utils.run_with_all_model_types(exclude_models = ['sequential']) + class MyTests(test_utils.KerasTestCase): + + def test_foo(self): + model = test_utils.get_small_mlp(1, 4, input_dim=3) + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + + if __name__ == "__main__": + tf.test.main() + ``` + + + Args: + test_or_class: test method or class to be annotated. If None, + this method returns a decorator that can be applied to a test method or + test class. If it is not None this returns the decorator applied to the + test or class. + exclude_models: A collection of Keras model types to not run. + (May also be a single model type not wrapped in a collection). + Defaults to `None`. + + Returns: + Returns a decorator that will run the decorated test method multiple + times: once for each desired Keras model type. + + Raises: + ImportError: If abseil parameterized is not installed or not included as + a target dependency. + """ + model_types = ["functional", "subclass", "sequential"] + params = [ + (f"_{model}", model) + for model in model_types + if model not in tf.nest.flatten(exclude_models) + ] + + def single_method_decorator(f): + """Decorator that constructs the test cases.""" + # Use named_parameters so it can be individually run from the command + # line + @parameterized.named_parameters(*params) + @functools.wraps(f) + def decorated(self, model_type, *args, **kwargs): + """A run of a single test case w/ the specified model type.""" + if model_type == "functional": + _test_functional_model_type(f, self, *args, **kwargs) + elif model_type == "subclass": + _test_subclass_model_type(f, self, *args, **kwargs) + elif model_type == "sequential": + _test_sequential_model_type(f, self, *args, **kwargs) + else: + raise ValueError(f"Unknown model type: {model_type}") + + return decorated + + return _test_or_class_decorator(test_or_class, single_method_decorator) def _test_functional_model_type(f, test_or_class, *args, **kwargs): - with test_utils.model_type_scope('functional'): - f(test_or_class, *args, **kwargs) + with test_utils.model_type_scope("functional"): + f(test_or_class, *args, **kwargs) def _test_subclass_model_type(f, test_or_class, *args, **kwargs): - with test_utils.model_type_scope('subclass'): - f(test_or_class, *args, **kwargs) + with test_utils.model_type_scope("subclass"): + f(test_or_class, *args, **kwargs) def _test_sequential_model_type(f, test_or_class, *args, **kwargs): - with test_utils.model_type_scope('sequential'): - f(test_or_class, *args, **kwargs) - - -def run_all_keras_modes(test_or_class=None, - config=None, - always_skip_v1=False, - always_skip_eager=False, - **kwargs): - """Execute the decorated test with all keras execution modes. - - This decorator is intended to be applied either to individual test methods in - a `test_combinations.TestCase` class, or directly to a test class that - extends it. Doing so will cause the contents of the individual test - method (or all test methods in the class) to be executed multiple times - - once executing in legacy graph mode, once running eagerly and with - `should_run_eagerly` returning True, and once running eagerly with - `should_run_eagerly` returning False. - - If Tensorflow v2 behavior is enabled, legacy graph mode will be skipped, and - the test will only run twice. - - Note: if stacking this decorator with absl.testing's parameterized decorators, - those should be at the bottom of the stack. - - For example, consider the following unittest: - - ```python - class MyTests(test_utils.KerasTestCase): - - @test_utils.run_all_keras_modes - def test_foo(self): - model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) - optimizer = RMSPropOptimizer(learning_rate=0.001) - loss = 'mse' - metrics = ['mae'] - model.compile( - optimizer, loss, metrics=metrics, - run_eagerly=test_utils.should_run_eagerly()) - - inputs = np.zeros((10, 3)) - targets = np.zeros((10, 4)) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) - - if __name__ == "__main__": - tf.test.main() - ``` - - This test will try compiling & fitting the small functional mlp using all - three Keras execution modes. - - Args: - test_or_class: test method or class to be annotated. If None, - this method returns a decorator that can be applied to a test method or - test class. If it is not None this returns the decorator applied to the - test or class. - config: An optional config_pb2.ConfigProto to use to configure the - session when executing graphs. - always_skip_v1: If True, does not try running the legacy graph mode even - when Tensorflow v2 behavior is not enabled. - always_skip_eager: If True, does not execute the decorated test - with eager execution modes. - **kwargs: Additional kwargs for configuring tests for - in-progress Keras behaviors/ refactorings that we haven't fully - rolled out yet - - Returns: - Returns a decorator that will run the decorated test method multiple times. - - Raises: - ImportError: If abseil parameterized is not installed or not included as - a target dependency. - """ - if kwargs: - raise ValueError('Unrecognized keyword args: {}'.format(kwargs)) - - params = [('_v2_function', 'v2_function')] - if not always_skip_eager: - params.append(('_v2_eager', 'v2_eager')) - if not (always_skip_v1 or tf.__internal__.tf2.enabled()): - params.append(('_v1_session', 'v1_session')) - - def single_method_decorator(f): - """Decorator that constructs the test cases.""" - - # Use named_parameters so it can be individually run from the command line - @parameterized.named_parameters(*params) - @functools.wraps(f) - def decorated(self, run_mode, *args, **kwargs): - """A run of a single test case w/ specified run mode.""" - if run_mode == 'v1_session': - _v1_session_test(f, self, config, *args, **kwargs) - elif run_mode == 'v2_eager': - _v2_eager_test(f, self, *args, **kwargs) - elif run_mode == 'v2_function': - _v2_function_test(f, self, *args, **kwargs) - else: - return ValueError('Unknown run mode %s' % run_mode) - - return decorated - - return _test_or_class_decorator(test_or_class, single_method_decorator) + with test_utils.model_type_scope("sequential"): + f(test_or_class, *args, **kwargs) + + +def run_all_keras_modes( + test_or_class=None, + config=None, + always_skip_v1=False, + always_skip_eager=False, + **kwargs, +): + """Execute the decorated test with all keras execution modes. + + This decorator is intended to be applied either to individual test methods + in a `test_combinations.TestCase` class, or directly to a test class that + extends it. Doing so will cause the contents of the individual test method + (or all test methods in the class) to be executed multiple times - once + executing in legacy graph mode, once running eagerly and with + `should_run_eagerly` returning True, and once running eagerly with + `should_run_eagerly` returning False. + + If Tensorflow v2 behavior is enabled, legacy graph mode will be skipped, and + the test will only run twice. + + Note: if stacking this decorator with absl.testing's parameterized + decorators, those should be at the bottom of the stack. + + For example, consider the following unittest: + + ```python + class MyTests(test_utils.KerasTestCase): + + @test_utils.run_all_keras_modes + def test_foo(self): + model = test_utils.get_small_functional_mlp(1, 4, input_dim=3) + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile( + optimizer, loss, metrics=metrics, + run_eagerly=test_utils.should_run_eagerly()) + + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + + if __name__ == "__main__": + tf.test.main() + ``` + + This test will try compiling & fitting the small functional mlp using all + three Keras execution modes. + + Args: + test_or_class: test method or class to be annotated. If None, + this method returns a decorator that can be applied to a test method or + test class. If it is not None this returns the decorator applied to the + test or class. + config: An optional config_pb2.ConfigProto to use to configure the + session when executing graphs. + always_skip_v1: If True, does not try running the legacy graph mode even + when Tensorflow v2 behavior is not enabled. + always_skip_eager: If True, does not execute the decorated test + with eager execution modes. + **kwargs: Additional kwargs for configuring tests for + in-progress Keras behaviors/ refactorings that we haven't fully + rolled out yet + + Returns: + Returns a decorator that will run the decorated test method multiple + times. + + Raises: + ImportError: If abseil parameterized is not installed or not included as + a target dependency. + """ + if kwargs: + raise ValueError(f"Unrecognized keyword args: {kwargs}") + + params = [("_v2_function", "v2_function")] + if not always_skip_eager: + params.append(("_v2_eager", "v2_eager")) + if not (always_skip_v1 or tf.__internal__.tf2.enabled()): + params.append(("_v1_session", "v1_session")) + + def single_method_decorator(f): + """Decorator that constructs the test cases.""" + + # Use named_parameters so it can be individually run from the command + # line + @parameterized.named_parameters(*params) + @functools.wraps(f) + def decorated(self, run_mode, *args, **kwargs): + """A run of a single test case w/ specified run mode.""" + if run_mode == "v1_session": + _v1_session_test(f, self, config, *args, **kwargs) + elif run_mode == "v2_eager": + _v2_eager_test(f, self, *args, **kwargs) + elif run_mode == "v2_function": + _v2_function_test(f, self, *args, **kwargs) + else: + return ValueError(f"Unknown run mode {run_mode}") + + return decorated + + return _test_or_class_decorator(test_or_class, single_method_decorator) def _v1_session_test(f, test_or_class, config, *args, **kwargs): - with tf.compat.v1.get_default_graph().as_default(): - with test_utils.run_eagerly_scope(False): - with test_or_class.test_session(config=config): - f(test_or_class, *args, **kwargs) + with tf.compat.v1.get_default_graph().as_default(): + with test_utils.run_eagerly_scope(False): + with test_or_class.test_session(config=config): + f(test_or_class, *args, **kwargs) def _v2_eager_test(f, test_or_class, *args, **kwargs): - with tf.__internal__.eager_context.eager_mode(): - with test_utils.run_eagerly_scope(True): - f(test_or_class, *args, **kwargs) + with tf.__internal__.eager_context.eager_mode(): + with test_utils.run_eagerly_scope(True): + f(test_or_class, *args, **kwargs) def _v2_function_test(f, test_or_class, *args, **kwargs): - with tf.__internal__.eager_context.eager_mode(): - with test_utils.run_eagerly_scope(False): - f(test_or_class, *args, **kwargs) + with tf.__internal__.eager_context.eager_mode(): + with test_utils.run_eagerly_scope(False): + f(test_or_class, *args, **kwargs) def _test_or_class_decorator(test_or_class, single_method_decorator): - """Decorate a test or class with a decorator intended for one method. - - If the test_or_class is a class: - This will apply the decorator to all test methods in the class. - - If the test_or_class is an iterable of already-parameterized test cases: - This will apply the decorator to all the cases, and then flatten the - resulting cross-product of test cases. This allows stacking the Keras - parameterized decorators w/ each other, and to apply them to test methods - that have already been marked with an absl parameterized decorator. - - Otherwise, treat the obj as a single method and apply the decorator directly. - - Args: - test_or_class: A test method (that may have already been decorated with a - parameterized decorator, or a test class that extends - test_combinations.TestCase - single_method_decorator: - A parameterized decorator intended for a single test method. - Returns: - The decorated result. - """ - def _decorate_test_or_class(obj): - if isinstance(obj, collections.abc.Iterable): - return itertools.chain.from_iterable( - single_method_decorator(method) for method in obj) - if isinstance(obj, type): - cls = obj - for name, value in cls.__dict__.copy().items(): - if callable(value) and name.startswith( - unittest.TestLoader.testMethodPrefix): - setattr(cls, name, single_method_decorator(value)) - - cls = type(cls).__new__(type(cls), cls.__name__, cls.__bases__, - cls.__dict__.copy()) - return cls - - return single_method_decorator(obj) - - if test_or_class is not None: - return _decorate_test_or_class(test_or_class) - - return _decorate_test_or_class + """Decorate a test or class with a decorator intended for one method. + + If the test_or_class is a class: + This will apply the decorator to all test methods in the class. + + If the test_or_class is an iterable of already-parameterized test cases: + This will apply the decorator to all the cases, and then flatten the + resulting cross-product of test cases. This allows stacking the Keras + parameterized decorators w/ each other, and to apply them to test methods + that have already been marked with an absl parameterized decorator. + + Otherwise, treat the obj as a single method and apply the decorator + directly. + + Args: + test_or_class: A test method (that may have already been decorated with a + parameterized decorator, or a test class that extends + test_combinations.TestCase + single_method_decorator: + A parameterized decorator intended for a single test method. + Returns: + The decorated result. + """ + + def _decorate_test_or_class(obj): + if isinstance(obj, collections.abc.Iterable): + return itertools.chain.from_iterable( + single_method_decorator(method) for method in obj + ) + if isinstance(obj, type): + cls = obj + for name, value in cls.__dict__.copy().items(): + if callable(value) and name.startswith( + unittest.TestLoader.testMethodPrefix + ): + setattr(cls, name, single_method_decorator(value)) + + cls = type(cls).__new__( + type(cls), cls.__name__, cls.__bases__, cls.__dict__.copy() + ) + return cls + + return single_method_decorator(obj) + + if test_or_class is not None: + return _decorate_test_or_class(test_or_class) + + return _decorate_test_or_class def keras_mode_combinations(mode=None, run_eagerly=None): - """Returns the default test combinations for tf.keras tests. - - Note that if tf2 is enabled, then v1 session test will be skipped. - - Args: - mode: List of modes to run the tests. The valid options are 'graph' and - 'eager'. Default to ['graph', 'eager'] if not specified. If a empty list - is provide, then the test will run under the context based on tf's - version, eg graph for v1 and eager for v2. - run_eagerly: List of `run_eagerly` value to be run with the tests. - Default to [True, False] if not specified. Note that for `graph` mode, - run_eagerly value will only be False. - - Returns: - A list contains all the combinations to be used to generate test cases. - """ - if mode is None: - mode = ['eager'] if tf.__internal__.tf2.enabled() else ['graph', 'eager'] - if run_eagerly is None: - run_eagerly = [True, False] - result = [] - if 'eager' in mode: - result += tf.__internal__.test.combinations.combine(mode=['eager'], run_eagerly=run_eagerly) - if 'graph' in mode: - result += tf.__internal__.test.combinations.combine(mode=['graph'], run_eagerly=[False]) - return result + """Returns the default test combinations for tf.keras tests. + + Note that if tf2 is enabled, then v1 session test will be skipped. + + Args: + mode: List of modes to run the tests. The valid options are 'graph' and + 'eager'. If None, uses ['graph', 'eager']. If an empty + list is provided, then the test will run under the context based on + tensorflow's version, e.g., graph for v1 and eager for v2. Defaults to + `None`. + run_eagerly: List of `run_eagerly` value to be run with the tests. + When None, uses [True, False]. Note that for `graph` mode, + run_eagerly value will only be False. Defaults to `None`. + + Returns: + A list contains all the combinations to be used to generate test cases. + """ + if mode is None: + mode = ( + ["eager"] if tf.__internal__.tf2.enabled() else ["graph", "eager"] + ) + if run_eagerly is None: + run_eagerly = [True, False] + result = [] + if "eager" in mode: + result += tf.__internal__.test.combinations.combine( + mode=["eager"], run_eagerly=run_eagerly + ) + if "graph" in mode: + result += tf.__internal__.test.combinations.combine( + mode=["graph"], run_eagerly=[False] + ) + return result def keras_model_type_combinations(): - return tf.__internal__.test.combinations.combine(model_type=KERAS_MODEL_TYPES) + return tf.__internal__.test.combinations.combine( + model_type=KERAS_MODEL_TYPES + ) class KerasModeCombination(tf.__internal__.test.combinations.TestCombination): - """Combination for Keras test mode. + """Combination for Keras test mode. - It by default includes v1_session, v2_eager and v2_tf_function. - """ + It by default includes v1_session, v2_eager and v2_tf_function. + """ - def context_managers(self, kwargs): - run_eagerly = kwargs.pop('run_eagerly', None) + def context_managers(self, kwargs): + run_eagerly = kwargs.pop("run_eagerly", None) - if run_eagerly is not None: - return [test_utils.run_eagerly_scope(run_eagerly)] - else: - return [] + if run_eagerly is not None: + return [test_utils.run_eagerly_scope(run_eagerly)] + else: + return [] - def parameter_modifiers(self): - return [tf.__internal__.test.combinations.OptionalParameter('run_eagerly')] + def parameter_modifiers(self): + return [ + tf.__internal__.test.combinations.OptionalParameter("run_eagerly") + ] -class KerasModelTypeCombination(tf.__internal__.test.combinations.TestCombination): - """Combination for Keras model types when doing model test. +class KerasModelTypeCombination( + tf.__internal__.test.combinations.TestCombination +): + """Combination for Keras model types when doing model test. - It by default includes 'functional', 'subclass', 'sequential'. + It by default includes 'functional', 'subclass', 'sequential'. - Various methods in `testing_utils` to get models will auto-generate a model - of the currently active Keras model type. This allows unittests to confirm - the equivalence between different Keras models. - """ + Various methods in `testing_utils` to get models will auto-generate a model + of the currently active Keras model type. This allows unittests to confirm + the equivalence between different Keras models. + """ - def context_managers(self, kwargs): - model_type = kwargs.pop('model_type', None) - if model_type in KERAS_MODEL_TYPES: - return [test_utils.model_type_scope(model_type)] - else: - return [] + def context_managers(self, kwargs): + model_type = kwargs.pop("model_type", None) + if model_type in KERAS_MODEL_TYPES: + return [test_utils.model_type_scope(model_type)] + else: + return [] - def parameter_modifiers(self): - return [tf.__internal__.test.combinations.OptionalParameter('model_type')] + def parameter_modifiers(self): + return [ + tf.__internal__.test.combinations.OptionalParameter("model_type") + ] -_defaults = tf.__internal__.test.combinations.generate.keywords['test_combinations'] +_defaults = tf.__internal__.test.combinations.generate.keywords[ + "test_combinations" +] generate = functools.partial( tf.__internal__.test.combinations.generate, - test_combinations=_defaults + - (KerasModeCombination(), KerasModelTypeCombination())) + test_combinations=_defaults + + (KerasModeCombination(), KerasModelTypeCombination()), +) combine = tf.__internal__.test.combinations.combine times = tf.__internal__.test.combinations.times NamedObject = tf.__internal__.test.combinations.NamedObject diff --git a/keras/testing_infra/test_combinations_test.py b/keras/testing_infra/test_combinations_test.py index e835152873e2..30493842b873 100644 --- a/keras/testing_infra/test_combinations_test.py +++ b/keras/testing_infra/test_combinations_test.py @@ -14,680 +14,714 @@ # ============================================================================== """Tests for Keras test_utils.""" -import tensorflow.compat.v2 as tf - import unittest + +import tensorflow.compat.v2 as tf from absl.testing import parameterized import keras from keras import models as keras_models -from keras.testing_infra import test_utils from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils class CombinationsTest(tf.test.TestCase): - - def test_run_all_keras_modes(self): - test_params = [] - - class ExampleTest(parameterized.TestCase): - - def runTest(self): - pass - - @test_combinations.generate(test_combinations.keras_mode_combinations()) - def testBody(self): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - test_params.append((mode, should_run_eagerly)) - - e = ExampleTest() - if not tf.__internal__.tf2.enabled(): - e.testBody_test_mode_graph_runeagerly_False() - e.testBody_test_mode_eager_runeagerly_True() - e.testBody_test_mode_eager_runeagerly_False() - - if not tf.__internal__.tf2.enabled(): - self.assertLen(test_params, 3) - self.assertAllEqual(test_params, [ - ("graph", False), - ("eager", True), - ("eager", False), - ]) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - self.assertLen(test_params, 6) - else: - self.assertLen(test_params, 2) - self.assertAllEqual(test_params, [ - ("eager", True), - ("eager", False), - ]) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - self.assertLen(test_params, 4) - - def test_generate_keras_mode_eager_only(self): - result = test_combinations.keras_mode_combinations(mode=["eager"]) - self.assertLen(result, 2) - self.assertEqual(result[0], {"mode": "eager", "run_eagerly": True}) - self.assertEqual(result[1], {"mode": "eager", "run_eagerly": False}) - - def test_generate_keras_mode_skip_run_eagerly(self): - result = test_combinations.keras_mode_combinations(run_eagerly=[False]) - if tf.__internal__.tf2.enabled(): - self.assertLen(result, 1) - self.assertEqual(result[0], {"mode": "eager", "run_eagerly": False}) - else: - self.assertLen(result, 2) - self.assertEqual(result[0], {"mode": "eager", "run_eagerly": False}) - self.assertEqual(result[1], {"mode": "graph", "run_eagerly": False}) - - def test_run_all_keras_model_types(self): - model_types = [] - models = [] - - class ExampleTest(parameterized.TestCase): - - def runTest(self): - pass - - @test_combinations.generate( - test_combinations.keras_model_type_combinations()) - def testBody(self): - model_types.append(test_utils.get_model_type()) - models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) - - e = ExampleTest() - e.testBody_test_modeltype_functional() - e.testBody_test_modeltype_subclass() - e.testBody_test_modeltype_sequential() - - self.assertLen(model_types, 3) - self.assertAllEqual(model_types, [ - "functional", - "subclass", - "sequential" - ]) - - # Validate that the models are what they should be - self.assertTrue(models[0]._is_graph_network) - self.assertFalse(models[1]._is_graph_network) - self.assertNotIsInstance(models[0], keras_models.Sequential) - self.assertNotIsInstance(models[1], keras_models.Sequential) - self.assertIsInstance(models[2], keras_models.Sequential) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(model_types, 6) - - def test_combine_combinations(self): - test_cases = [] - - @test_combinations.generate(test_combinations.times( - test_combinations.keras_mode_combinations(), - test_combinations.keras_model_type_combinations())) - class ExampleTest(parameterized.TestCase): - - def runTest(self): - pass - - @parameterized.named_parameters(dict(testcase_name="_arg", - arg=True)) - def testBody(self, arg): - del arg - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - test_cases.append((mode, should_run_eagerly, - test_utils.get_model_type())) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - expected_combinations = [ - ("eager", False, "functional"), - ("eager", False, "sequential"), - ("eager", False, "subclass"), - ("eager", True, "functional"), - ("eager", True, "sequential"), - ("eager", True, "subclass"), - ] - - if not tf.__internal__.tf2.enabled(): - expected_combinations.extend([ - ("graph", False, "functional"), - ("graph", False, "sequential"), - ("graph", False, "subclass"), - ]) - - self.assertAllEqual(sorted(test_cases), expected_combinations) + def test_run_all_keras_modes(self): + test_params = [] + + class ExampleTest(parameterized.TestCase): + def runTest(self): + pass + + @test_combinations.generate( + test_combinations.keras_mode_combinations() + ) + def testBody(self): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + test_params.append((mode, should_run_eagerly)) + + e = ExampleTest() + if not tf.__internal__.tf2.enabled(): + e.testBody_test_mode_graph_runeagerly_False() + e.testBody_test_mode_eager_runeagerly_True() + e.testBody_test_mode_eager_runeagerly_False() + + if not tf.__internal__.tf2.enabled(): + self.assertLen(test_params, 3) + self.assertAllEqual( + test_params, + [ + ("graph", False), + ("eager", True), + ("eager", False), + ], + ) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + self.assertLen(test_params, 6) + else: + self.assertLen(test_params, 2) + self.assertAllEqual( + test_params, + [ + ("eager", True), + ("eager", False), + ], + ) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + self.assertLen(test_params, 4) + + def test_generate_keras_mode_eager_only(self): + result = test_combinations.keras_mode_combinations(mode=["eager"]) + self.assertLen(result, 2) + self.assertEqual(result[0], {"mode": "eager", "run_eagerly": True}) + self.assertEqual(result[1], {"mode": "eager", "run_eagerly": False}) + + def test_generate_keras_mode_skip_run_eagerly(self): + result = test_combinations.keras_mode_combinations(run_eagerly=[False]) + if tf.__internal__.tf2.enabled(): + self.assertLen(result, 1) + self.assertEqual(result[0], {"mode": "eager", "run_eagerly": False}) + else: + self.assertLen(result, 2) + self.assertEqual(result[0], {"mode": "eager", "run_eagerly": False}) + self.assertEqual(result[1], {"mode": "graph", "run_eagerly": False}) + + def test_run_all_keras_model_types(self): + model_types = [] + models = [] + + class ExampleTest(parameterized.TestCase): + def runTest(self): + pass + + @test_combinations.generate( + test_combinations.keras_model_type_combinations() + ) + def testBody(self): + model_types.append(test_utils.get_model_type()) + models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) + + e = ExampleTest() + e.testBody_test_modeltype_functional() + e.testBody_test_modeltype_subclass() + e.testBody_test_modeltype_sequential() + + self.assertLen(model_types, 3) + self.assertAllEqual( + model_types, ["functional", "subclass", "sequential"] + ) + + # Validate that the models are what they should be + self.assertTrue(models[0]._is_graph_network) + self.assertFalse(models[1]._is_graph_network) + self.assertNotIsInstance(models[0], keras_models.Sequential) + self.assertNotIsInstance(models[1], keras_models.Sequential) + self.assertIsInstance(models[2], keras_models.Sequential) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(model_types, 6) + + def test_combine_combinations(self): + test_cases = [] + + @test_combinations.generate( + test_combinations.times( + test_combinations.keras_mode_combinations(), + test_combinations.keras_model_type_combinations(), + ) + ) + class ExampleTest(parameterized.TestCase): + def runTest(self): + pass + + @parameterized.named_parameters( + dict(testcase_name="_arg", arg=True) + ) + def testBody(self, arg): + del arg + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + test_cases.append( + (mode, should_run_eagerly, test_utils.get_model_type()) + ) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + expected_combinations = [ + ("eager", False, "functional"), + ("eager", False, "sequential"), + ("eager", False, "subclass"), + ("eager", True, "functional"), + ("eager", True, "sequential"), + ("eager", True, "subclass"), + ] + + if not tf.__internal__.tf2.enabled(): + expected_combinations.extend( + [ + ("graph", False, "functional"), + ("graph", False, "sequential"), + ("graph", False, "subclass"), + ] + ) + + self.assertAllEqual(sorted(test_cases), expected_combinations) class KerasParameterizedTest(test_combinations.TestCase): + def test_run_with_all_model_types(self): + model_types = [] + models = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_with_all_model_types + def testBody(self): + model_types.append(test_utils.get_model_type()) + models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) + + e = ExampleTest() + e.testBody_functional() + e.testBody_subclass() + e.testBody_sequential() + + self.assertLen(model_types, 3) + self.assertAllEqual( + model_types, ["functional", "subclass", "sequential"] + ) + + # Validate that the models are what they should be + self.assertTrue(models[0]._is_graph_network) + self.assertFalse(models[1]._is_graph_network) + self.assertNotIsInstance(models[0], keras.models.Sequential) + self.assertNotIsInstance(models[1], keras.models.Sequential) + self.assertIsInstance(models[2], keras.models.Sequential) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(model_types, 6) + + def test_run_with_all_model_types_and_extra_params(self): + model_types = [] + models = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_with_all_model_types + @parameterized.named_parameters( + [ + dict(testcase_name="_0", with_brackets=True), + dict(testcase_name="_1", with_brackets=False), + ] + ) + def testBody(self, with_brackets): + with_brackets = ( + "with_brackets" if with_brackets else "without_brackets" + ) + model_types.append((with_brackets, test_utils.get_model_type())) + models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) + + e = ExampleTest() + e.testBody_0_functional() + e.testBody_0_subclass() + e.testBody_0_sequential() + e.testBody_1_functional() + e.testBody_1_subclass() + e.testBody_1_sequential() + + self.assertLen(model_types, 6) + self.assertAllEqual( + model_types, + [ + ("with_brackets", "functional"), + ("with_brackets", "subclass"), + ("with_brackets", "sequential"), + ("without_brackets", "functional"), + ("without_brackets", "subclass"), + ("without_brackets", "sequential"), + ], + ) + + # Validate that the models are what they should be + self.assertTrue(models[0]._is_graph_network) + self.assertFalse(models[1]._is_graph_network) + self.assertNotIsInstance(models[0], keras.models.Sequential) + self.assertNotIsInstance(models[1], keras.models.Sequential) + self.assertIsInstance(models[2], keras.models.Sequential) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(model_types, 12) + + def test_run_with_all_model_types_exclude_one(self): + model_types = [] + models = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_with_all_model_types( + exclude_models="sequential" + ) + def testBody(self): + model_types.append(test_utils.get_model_type()) + models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) + + e = ExampleTest() + if hasattr(e, "testBody_functional"): + e.testBody_functional() + if hasattr(e, "testBody_subclass"): + e.testBody_subclass() + if hasattr(e, "testBody_sequential"): + e.testBody_sequential() + + self.assertLen(model_types, 2) + self.assertAllEqual(model_types, ["functional", "subclass"]) + + # Validate that the models are what they should be + self.assertTrue(models[0]._is_graph_network) + self.assertFalse(models[1]._is_graph_network) + self.assertNotIsInstance(models[0], keras.models.Sequential) + self.assertNotIsInstance(models[1], keras.models.Sequential) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(model_types, 4) + + def test_run_with_all_model_types_exclude_multiple(self): + model_types = [] + models = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_with_all_model_types( + exclude_models=["sequential", "functional"] + ) + def testBody(self): + model_types.append(test_utils.get_model_type()) + models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) + + e = ExampleTest() + if hasattr(e, "testBody_functional"): + e.testBody_functional() + if hasattr(e, "testBody_subclass"): + e.testBody_subclass() + if hasattr(e, "testBody_sequential"): + e.testBody_sequential() + + self.assertLen(model_types, 1) + self.assertAllEqual(model_types, ["subclass"]) + + # Validate that the models are what they should be + self.assertFalse(models[0]._is_graph_network) + self.assertNotIsInstance(models[0], keras.models.Sequential) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(model_types, 2) + + def test_run_all_keras_modes(self): + l = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_all_keras_modes() + def testBody(self): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + l.append((mode, should_run_eagerly)) + + e = ExampleTest() + if not tf.__internal__.tf2.enabled(): + e.testBody_v1_session() + e.testBody_v2_eager() + e.testBody_v2_function() + + if not tf.__internal__.tf2.enabled(): + self.assertLen(l, 3) + self.assertAllEqual( + l, + [ + ("graph", False), + ("eager", True), + ("eager", False), + ], + ) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + self.assertLen(l, 6) + else: + self.assertLen(l, 2) + self.assertAllEqual( + l, + [ + ("eager", True), + ("eager", False), + ], + ) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + self.assertLen(l, 4) + + def test_run_all_keras_modes_extra_params(self): + l = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_all_keras_modes() + @parameterized.named_parameters( + [ + dict(testcase_name="_0", with_brackets=True), + dict(testcase_name="_1", with_brackets=False), + ] + ) + def testBody(self, with_brackets): + mode = "eager" if tf.executing_eagerly() else "graph" + with_brackets = ( + "with_brackets" if with_brackets else "without_brackets" + ) + should_run_eagerly = test_utils.should_run_eagerly() + l.append((with_brackets, mode, should_run_eagerly)) + + e = ExampleTest() + if not tf.__internal__.tf2.enabled(): + e.testBody_0_v1_session() + e.testBody_1_v1_session() + + e.testBody_0_v2_eager() + e.testBody_0_v2_function() + e.testBody_1_v2_eager() + e.testBody_1_v2_function() + + expected_combinations = { + ("with_brackets", "eager", True), + ("with_brackets", "eager", False), + ("without_brackets", "eager", True), + ("without_brackets", "eager", False), + } + + if not tf.__internal__.tf2.enabled(): + expected_combinations = expected_combinations.union( + { + ("with_brackets", "graph", False), + ("without_brackets", "graph", False), + } + ) + + self.assertLen(l, len(expected_combinations)) + self.assertEqual(set(l), expected_combinations) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(l, len(expected_combinations) * 2) + + def test_run_all_keras_modes_always_skip_v1(self): + l = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def testBody(self): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + l.append((mode, should_run_eagerly)) + + e = ExampleTest() + if hasattr(e, "testBody_v1_session"): + e.testBody_v1_session() + if hasattr(e, "testBody_v2_eager"): + e.testBody_v2_eager() + if hasattr(e, "testBody_v2_function"): + e.testBody_v2_function() + + self.assertLen(l, 2) + self.assertEqual( + set(l), + { + ("eager", True), + ("eager", False), + }, + ) + + def test_run_all_keras_modes_with_all_model_types(self): + l = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + def testBody(self): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + l.append( + (mode, should_run_eagerly, test_utils.get_model_type()) + ) + + e = ExampleTest() + e.testBody_v2_eager_functional() + e.testBody_v2_function_functional() + e.testBody_v2_eager_sequential() + e.testBody_v2_function_sequential() + e.testBody_v2_eager_subclass() + e.testBody_v2_function_subclass() + + if not tf.__internal__.tf2.enabled(): + e.testBody_v1_session_functional() + e.testBody_v1_session_sequential() + e.testBody_v1_session_subclass() + + expected_combinations = { + ("eager", True, "functional"), + ("eager", False, "functional"), + ("eager", True, "sequential"), + ("eager", False, "sequential"), + ("eager", True, "subclass"), + ("eager", False, "subclass"), + } + + if not tf.__internal__.tf2.enabled(): + expected_combinations = expected_combinations.union( + { + ("graph", False, "functional"), + ("graph", False, "sequential"), + ("graph", False, "subclass"), + } + ) + + self.assertLen(l, len(expected_combinations)) + self.assertEqual(set(l), expected_combinations) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(l, len(expected_combinations) * 2) + + def test_run_all_model_types_with_all_keras_modes(self): + l = [] + + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def testBody(self): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + l.append( + (mode, should_run_eagerly, test_utils.get_model_type()) + ) + + e = ExampleTest() + e.testBody_functional_v2_eager() + e.testBody_functional_v2_function() + e.testBody_sequential_v2_eager() + e.testBody_sequential_v2_function() + e.testBody_subclass_v2_eager() + e.testBody_subclass_v2_function() + + if not tf.__internal__.tf2.enabled(): + e.testBody_functional_v1_session() + e.testBody_sequential_v1_session() + e.testBody_subclass_v1_session() + + expected_combinations = { + ("eager", True, "functional"), + ("eager", False, "functional"), + ("eager", True, "sequential"), + ("eager", False, "sequential"), + ("eager", True, "subclass"), + ("eager", False, "subclass"), + } + + if not tf.__internal__.tf2.enabled(): + expected_combinations = expected_combinations.union( + { + ("graph", False, "functional"), + ("graph", False, "sequential"), + ("graph", False, "subclass"), + } + ) + + self.assertLen(l, len(expected_combinations)) + self.assertEqual(set(l), expected_combinations) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(l, len(expected_combinations) * 2) + + def test_run_all_keras_modes_with_all_model_types_annotate_class(self): + l = [] + + @test_combinations.run_with_all_model_types + @test_combinations.run_all_keras_modes + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @parameterized.named_parameters( + dict(testcase_name="_arg", arg=True) + ) + def testBody(self, arg): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + l.append( + (mode, should_run_eagerly, test_utils.get_model_type()) + ) + + e = ExampleTest() + e.testBody_arg_v2_eager_functional() + e.testBody_arg_v2_function_functional() + e.testBody_arg_v2_eager_sequential() + e.testBody_arg_v2_function_sequential() + e.testBody_arg_v2_eager_subclass() + e.testBody_arg_v2_function_subclass() + + if not tf.__internal__.tf2.enabled(): + e.testBody_arg_v1_session_functional() + e.testBody_arg_v1_session_sequential() + e.testBody_arg_v1_session_subclass() + + expected_combinations = { + ("eager", True, "functional"), + ("eager", False, "functional"), + ("eager", True, "sequential"), + ("eager", False, "sequential"), + ("eager", True, "subclass"), + ("eager", False, "subclass"), + } + + if not tf.__internal__.tf2.enabled(): + expected_combinations = expected_combinations.union( + { + ("graph", False, "functional"), + ("graph", False, "sequential"), + ("graph", False, "subclass"), + } + ) + + self.assertLen(l, len(expected_combinations)) + self.assertEqual(set(l), expected_combinations) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(l, len(expected_combinations) * 2) + + def test_run_all_keras_modes_with_all_model_types_annotate_class_2(self): + l = [] + + @test_combinations.run_with_all_model_types + class ExampleTest(test_combinations.TestCase): + def runTest(self): + pass + + @test_combinations.run_all_keras_modes + @parameterized.named_parameters( + dict(testcase_name="_arg", arg=True) + ) + def testBody(self, arg): + mode = "eager" if tf.executing_eagerly() else "graph" + should_run_eagerly = test_utils.should_run_eagerly() + l.append( + (mode, should_run_eagerly, test_utils.get_model_type()) + ) + + e = ExampleTest() + e.testBody_arg_v2_eager_functional() + e.testBody_arg_v2_function_functional() + e.testBody_arg_v2_eager_sequential() + e.testBody_arg_v2_function_sequential() + e.testBody_arg_v2_eager_subclass() + e.testBody_arg_v2_function_subclass() + + if not tf.__internal__.tf2.enabled(): + e.testBody_arg_v1_session_functional() + e.testBody_arg_v1_session_sequential() + e.testBody_arg_v1_session_subclass() + + expected_combinations = { + ("eager", True, "functional"), + ("eager", False, "functional"), + ("eager", True, "sequential"), + ("eager", False, "sequential"), + ("eager", True, "subclass"), + ("eager", False, "subclass"), + } + + if not tf.__internal__.tf2.enabled(): + expected_combinations = expected_combinations.union( + { + ("graph", False, "functional"), + ("graph", False, "sequential"), + ("graph", False, "subclass"), + } + ) + + self.assertLen(l, len(expected_combinations)) + self.assertEqual(set(l), expected_combinations) + + ts = unittest.makeSuite(ExampleTest) + res = unittest.TestResult() + ts.run(res) + + self.assertLen(l, len(expected_combinations) * 2) - def test_run_with_all_model_types(self): - model_types = [] - models = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_with_all_model_types - def testBody(self): - model_types.append(test_utils.get_model_type()) - models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) - - e = ExampleTest() - e.testBody_functional() - e.testBody_subclass() - e.testBody_sequential() - - self.assertLen(model_types, 3) - self.assertAllEqual(model_types, [ - "functional", - "subclass", - "sequential" - ]) - - # Validate that the models are what they should be - self.assertTrue(models[0]._is_graph_network) - self.assertFalse(models[1]._is_graph_network) - self.assertNotIsInstance(models[0], keras.models.Sequential) - self.assertNotIsInstance(models[1], keras.models.Sequential) - self.assertIsInstance(models[2], keras.models.Sequential) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(model_types, 6) - - def test_run_with_all_model_types_and_extra_params(self): - model_types = [] - models = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_with_all_model_types - @parameterized.named_parameters( - [dict(testcase_name="_0", with_brackets=True), - dict(testcase_name="_1", with_brackets=False)]) - def testBody(self, with_brackets): - with_brackets = "with_brackets" if with_brackets else "without_brackets" - model_types.append((with_brackets, test_utils.get_model_type())) - models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) - - e = ExampleTest() - e.testBody_0_functional() - e.testBody_0_subclass() - e.testBody_0_sequential() - e.testBody_1_functional() - e.testBody_1_subclass() - e.testBody_1_sequential() - - self.assertLen(model_types, 6) - self.assertAllEqual(model_types, [ - ("with_brackets", "functional"), - ("with_brackets", "subclass"), - ("with_brackets", "sequential"), - ("without_brackets", "functional"), - ("without_brackets", "subclass"), - ("without_brackets", "sequential"), - ]) - - # Validate that the models are what they should be - self.assertTrue(models[0]._is_graph_network) - self.assertFalse(models[1]._is_graph_network) - self.assertNotIsInstance(models[0], keras.models.Sequential) - self.assertNotIsInstance(models[1], keras.models.Sequential) - self.assertIsInstance(models[2], keras.models.Sequential) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(model_types, 12) - - def test_run_with_all_model_types_exclude_one(self): - model_types = [] - models = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_with_all_model_types(exclude_models="sequential") - def testBody(self): - model_types.append(test_utils.get_model_type()) - models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) - - e = ExampleTest() - if hasattr(e, "testBody_functional"): - e.testBody_functional() - if hasattr(e, "testBody_subclass"): - e.testBody_subclass() - if hasattr(e, "testBody_sequential"): - e.testBody_sequential() - - self.assertLen(model_types, 2) - self.assertAllEqual(model_types, [ - "functional", - "subclass" - ]) - - # Validate that the models are what they should be - self.assertTrue(models[0]._is_graph_network) - self.assertFalse(models[1]._is_graph_network) - self.assertNotIsInstance(models[0], keras.models.Sequential) - self.assertNotIsInstance(models[1], keras.models.Sequential) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(model_types, 4) - - def test_run_with_all_model_types_exclude_multiple(self): - model_types = [] - models = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_with_all_model_types( - exclude_models=["sequential", "functional"]) - def testBody(self): - model_types.append(test_utils.get_model_type()) - models.append(test_utils.get_small_mlp(1, 4, input_dim=3)) - - e = ExampleTest() - if hasattr(e, "testBody_functional"): - e.testBody_functional() - if hasattr(e, "testBody_subclass"): - e.testBody_subclass() - if hasattr(e, "testBody_sequential"): - e.testBody_sequential() - - self.assertLen(model_types, 1) - self.assertAllEqual(model_types, [ - "subclass" - ]) - - # Validate that the models are what they should be - self.assertFalse(models[0]._is_graph_network) - self.assertNotIsInstance(models[0], keras.models.Sequential) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(model_types, 2) - - def test_run_all_keras_modes(self): - l = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_all_keras_modes() - def testBody(self): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((mode, should_run_eagerly)) - - e = ExampleTest() - if not tf.__internal__.tf2.enabled(): - e.testBody_v1_session() - e.testBody_v2_eager() - e.testBody_v2_function() - - if not tf.__internal__.tf2.enabled(): - self.assertLen(l, 3) - self.assertAllEqual(l, [ - ("graph", False), - ("eager", True), - ("eager", False), - ]) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - self.assertLen(l, 6) - else: - self.assertLen(l, 2) - self.assertAllEqual(l, [ - ("eager", True), - ("eager", False), - ]) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - self.assertLen(l, 4) - - def test_run_all_keras_modes_extra_params(self): - l = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_all_keras_modes() - @parameterized.named_parameters( - [dict(testcase_name="_0", with_brackets=True), - dict(testcase_name="_1", with_brackets=False)]) - def testBody(self, with_brackets): - mode = "eager" if tf.executing_eagerly() else "graph" - with_brackets = "with_brackets" if with_brackets else "without_brackets" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((with_brackets, mode, should_run_eagerly)) - - e = ExampleTest() - if not tf.__internal__.tf2.enabled(): - e.testBody_0_v1_session() - e.testBody_1_v1_session() - - e.testBody_0_v2_eager() - e.testBody_0_v2_function() - e.testBody_1_v2_eager() - e.testBody_1_v2_function() - - expected_combinations = { - ("with_brackets", "eager", True), - ("with_brackets", "eager", False), - ("without_brackets", "eager", True), - ("without_brackets", "eager", False), - } - - if not tf.__internal__.tf2.enabled(): - expected_combinations = expected_combinations.union({ - ("with_brackets", "graph", False), - ("without_brackets", "graph", False), - }) - - self.assertLen(l, len(expected_combinations)) - self.assertEqual(set(l), expected_combinations) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(l, len(expected_combinations) * 2) - - def test_run_all_keras_modes_always_skip_v1(self): - l = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def testBody(self): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((mode, should_run_eagerly)) - - e = ExampleTest() - if hasattr(e, "testBody_v1_session"): - e.testBody_v1_session() - if hasattr(e, "testBody_v2_eager"): - e.testBody_v2_eager() - if hasattr(e, "testBody_v2_function"): - e.testBody_v2_function() - - self.assertLen(l, 2) - self.assertEqual( - set(l), { - ("eager", True), - ("eager", False), - }) - - def test_run_all_keras_modes_with_all_model_types(self): - l = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_with_all_model_types - @test_combinations.run_all_keras_modes - def testBody(self): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((mode, should_run_eagerly, test_utils.get_model_type())) - - e = ExampleTest() - e.testBody_v2_eager_functional() - e.testBody_v2_function_functional() - e.testBody_v2_eager_sequential() - e.testBody_v2_function_sequential() - e.testBody_v2_eager_subclass() - e.testBody_v2_function_subclass() - - if not tf.__internal__.tf2.enabled(): - e.testBody_v1_session_functional() - e.testBody_v1_session_sequential() - e.testBody_v1_session_subclass() - - expected_combinations = { - ("eager", True, "functional"), - ("eager", False, "functional"), - ("eager", True, "sequential"), - ("eager", False, "sequential"), - ("eager", True, "subclass"), - ("eager", False, "subclass"), - } - - if not tf.__internal__.tf2.enabled(): - expected_combinations = expected_combinations.union({ - ("graph", False, "functional"), - ("graph", False, "sequential"), - ("graph", False, "subclass"), - }) - - self.assertLen(l, len(expected_combinations)) - self.assertEqual(set(l), expected_combinations) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(l, len(expected_combinations) * 2) - - def test_run_all_model_types_with_all_keras_modes(self): - l = [] - - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def testBody(self): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((mode, should_run_eagerly, test_utils.get_model_type())) - - e = ExampleTest() - e.testBody_functional_v2_eager() - e.testBody_functional_v2_function() - e.testBody_sequential_v2_eager() - e.testBody_sequential_v2_function() - e.testBody_subclass_v2_eager() - e.testBody_subclass_v2_function() - - if not tf.__internal__.tf2.enabled(): - e.testBody_functional_v1_session() - e.testBody_sequential_v1_session() - e.testBody_subclass_v1_session() - - expected_combinations = { - ("eager", True, "functional"), - ("eager", False, "functional"), - ("eager", True, "sequential"), - ("eager", False, "sequential"), - ("eager", True, "subclass"), - ("eager", False, "subclass"), - } - - if not tf.__internal__.tf2.enabled(): - expected_combinations = expected_combinations.union({ - ("graph", False, "functional"), - ("graph", False, "sequential"), - ("graph", False, "subclass"), - }) - - self.assertLen(l, len(expected_combinations)) - self.assertEqual(set(l), expected_combinations) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(l, len(expected_combinations) * 2) - - def test_run_all_keras_modes_with_all_model_types_annotate_class(self): - l = [] - - @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @parameterized.named_parameters(dict(testcase_name="_arg", - arg=True)) - def testBody(self, arg): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((mode, should_run_eagerly, test_utils.get_model_type())) - - e = ExampleTest() - e.testBody_arg_v2_eager_functional() - e.testBody_arg_v2_function_functional() - e.testBody_arg_v2_eager_sequential() - e.testBody_arg_v2_function_sequential() - e.testBody_arg_v2_eager_subclass() - e.testBody_arg_v2_function_subclass() - - if not tf.__internal__.tf2.enabled(): - e.testBody_arg_v1_session_functional() - e.testBody_arg_v1_session_sequential() - e.testBody_arg_v1_session_subclass() - - expected_combinations = { - ("eager", True, "functional"), - ("eager", False, "functional"), - ("eager", True, "sequential"), - ("eager", False, "sequential"), - ("eager", True, "subclass"), - ("eager", False, "subclass"), - } - - if not tf.__internal__.tf2.enabled(): - expected_combinations = expected_combinations.union({ - ("graph", False, "functional"), - ("graph", False, "sequential"), - ("graph", False, "subclass"), - }) - - self.assertLen(l, len(expected_combinations)) - self.assertEqual(set(l), expected_combinations) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(l, len(expected_combinations) * 2) - - def test_run_all_keras_modes_with_all_model_types_annotate_class_2(self): - l = [] + @parameterized.named_parameters(dict(testcase_name="argument", arg=True)) + def test_run_all_keras_modes_extra_params_2(self, arg): + self.assertEqual(arg, True) @test_combinations.run_with_all_model_types - class ExampleTest(test_combinations.TestCase): - - def runTest(self): - pass - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters(dict(testcase_name="_arg", - arg=True)) - def testBody(self, arg): - mode = "eager" if tf.executing_eagerly() else "graph" - should_run_eagerly = test_utils.should_run_eagerly() - l.append((mode, should_run_eagerly, test_utils.get_model_type())) - - e = ExampleTest() - e.testBody_arg_v2_eager_functional() - e.testBody_arg_v2_function_functional() - e.testBody_arg_v2_eager_sequential() - e.testBody_arg_v2_function_sequential() - e.testBody_arg_v2_eager_subclass() - e.testBody_arg_v2_function_subclass() - - if not tf.__internal__.tf2.enabled(): - e.testBody_arg_v1_session_functional() - e.testBody_arg_v1_session_sequential() - e.testBody_arg_v1_session_subclass() - - expected_combinations = { - ("eager", True, "functional"), - ("eager", False, "functional"), - ("eager", True, "sequential"), - ("eager", False, "sequential"), - ("eager", True, "subclass"), - ("eager", False, "subclass"), - } - - if not tf.__internal__.tf2.enabled(): - expected_combinations = expected_combinations.union({ - ("graph", False, "functional"), - ("graph", False, "sequential"), - ("graph", False, "subclass"), - }) - - self.assertLen(l, len(expected_combinations)) - self.assertEqual(set(l), expected_combinations) - - ts = unittest.makeSuite(ExampleTest) - res = unittest.TestResult() - ts.run(res) - - self.assertLen(l, len(expected_combinations) * 2) - - @test_combinations.run_all_keras_modes - @parameterized.named_parameters(dict(testcase_name="argument", - arg=True)) - def test_run_all_keras_modes_extra_params_2(self, arg): - self.assertEqual(arg, True) - - @test_combinations.run_with_all_model_types - @parameterized.named_parameters(dict(testcase_name="argument", - arg=True)) - def test_run_with_all_model_types_extra_params_2(self, arg): - self.assertEqual(arg, True) + @parameterized.named_parameters(dict(testcase_name="argument", arg=True)) + def test_run_with_all_model_types_extra_params_2(self, arg): + self.assertEqual(arg, True) + if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/testing_infra/test_utils.py b/keras/testing_infra/test_utils.py index bb4441855460..0c138c1aea80 100644 --- a/keras/testing_infra/test_utils.py +++ b/keras/testing_infra/test_utils.py @@ -21,283 +21,327 @@ import itertools import threading import unittest + +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras import layers from keras import models from keras.engine import base_layer_utils -from keras.optimizers.optimizer_v2 import adadelta as adadelta_v2 -from keras.optimizers.optimizer_v2 import adagrad as adagrad_v2 -from keras.optimizers.optimizer_v2 import adam as adam_v2 -from keras.optimizers.optimizer_v2 import adamax as adamax_v2 -from keras.optimizers.optimizer_v2 import gradient_descent as gradient_descent_v2 -from keras.optimizers.optimizer_v2 import nadam as nadam_v2 -from keras.optimizers.optimizer_v2 import rmsprop as rmsprop_v2 +from keras.optimizers.legacy import adadelta as adadelta_v2 +from keras.optimizers.legacy import adagrad as adagrad_v2 +from keras.optimizers.legacy import adam as adam_v2 +from keras.optimizers.legacy import adamax as adamax_v2 +from keras.optimizers.legacy import gradient_descent as gradient_descent_v2 +from keras.optimizers.legacy import nadam as nadam_v2 +from keras.optimizers.legacy import rmsprop as rmsprop_v2 from keras.utils import tf_contextlib from keras.utils import tf_inspect -import numpy as np -import tensorflow.compat.v2 as tf -from tensorflow.python.framework import test_util as tf_test_utils + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) from tensorflow.python.util.tf_export import keras_export def string_test(actual, expected): - np.testing.assert_array_equal(actual, expected) + np.testing.assert_array_equal(actual, expected) def numeric_test(actual, expected): - np.testing.assert_allclose(actual, expected, rtol=1e-3, atol=1e-6) - - -def get_test_data(train_samples, - test_samples, - input_shape, - num_classes, - random_seed=None): - """Generates test data to train a model on. - - Args: - train_samples: Integer, how many training samples to generate. - test_samples: Integer, how many test samples to generate. - input_shape: Tuple of integers, shape of the inputs. - num_classes: Integer, number of classes for the data and targets. - random_seed: Integer, random seed used by numpy to generate data. - - Returns: - A tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - """ - if random_seed is not None: - np.random.seed(random_seed) - num_sample = train_samples + test_samples - templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) - y = np.random.randint(0, num_classes, size=(num_sample,)) - x = np.zeros((num_sample,) + input_shape, dtype=np.float32) - for i in range(num_sample): - x[i] = templates[y[i]] + np.random.normal(loc=0, scale=1., size=input_shape) - return ((x[:train_samples], y[:train_samples]), - (x[train_samples:], y[train_samples:])) - - -@keras_export('keras.__internal__.utils.layer_test', v1=[]) + np.testing.assert_allclose(actual, expected, rtol=1e-3, atol=1e-6) + + +def get_test_data( + train_samples, test_samples, input_shape, num_classes, random_seed=None +): + """Generates test data to train a model on. + + Args: + train_samples: Integer, how many training samples to generate. + test_samples: Integer, how many test samples to generate. + input_shape: Tuple of integers, shape of the inputs. + num_classes: Integer, number of classes for the data and targets. + random_seed: Integer, random seed used by numpy to generate data. + + Returns: + A tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. + """ + if random_seed is not None: + np.random.seed(random_seed) + num_sample = train_samples + test_samples + templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) + y = np.random.randint(0, num_classes, size=(num_sample,)) + x = np.zeros((num_sample,) + input_shape, dtype=np.float32) + for i in range(num_sample): + x[i] = templates[y[i]] + np.random.normal( + loc=0, scale=1.0, size=input_shape + ) + return ( + (x[:train_samples], y[:train_samples]), + (x[train_samples:], y[train_samples:]), + ) + + +@keras_export("keras.__internal__.utils.layer_test", v1=[]) @tf_test_utils.disable_cudnn_autotune -def layer_test(layer_cls, - kwargs=None, - input_shape=None, - input_dtype=None, - input_data=None, - expected_output=None, - expected_output_dtype=None, - expected_output_shape=None, - validate_training=True, - adapt_data=None, - custom_objects=None, - test_harness=None, - supports_masking=None): - """Test routine for a layer with a single input and single output. - - Args: - layer_cls: Layer class object. - kwargs: Optional dictionary of keyword arguments for instantiating the - layer. - input_shape: Input shape tuple. - input_dtype: Data type of the input data. - input_data: Numpy array of input data. - expected_output: Numpy array of the expected output. - expected_output_dtype: Data type expected for the output. - expected_output_shape: Shape tuple for the expected shape of the output. - validate_training: Whether to attempt to validate training on this layer. - This might be set to False for non-differentiable layers that output - string or integer values. - adapt_data: Optional data for an 'adapt' call. If None, adapt() will not - be tested for this layer. This is only relevant for PreprocessingLayers. - custom_objects: Optional dictionary mapping name strings to custom objects - in the layer class. This is helpful for testing custom layers. - test_harness: The Tensorflow test, if any, that this function is being - called in. - supports_masking: Optional boolean to check the `supports_masking` property - of the layer. If None, the check will not be performed. - - Returns: - The output data (Numpy array) returned by the layer, for additional - checks to be done by the calling code. - - Raises: - ValueError: if `input_shape is None`. - """ - if input_data is None: - if input_shape is None: - raise ValueError('input_shape is None') - if not input_dtype: - input_dtype = 'float32' - input_data_shape = list(input_shape) - for i, e in enumerate(input_data_shape): - if e is None: - input_data_shape[i] = np.random.randint(1, 4) - input_data = 10 * np.random.random(input_data_shape) - if input_dtype[:5] == 'float': - input_data -= 0.5 - input_data = input_data.astype(input_dtype) - elif input_shape is None: - input_shape = input_data.shape - if input_dtype is None: - input_dtype = input_data.dtype - if expected_output_dtype is None: - expected_output_dtype = input_dtype - - if tf.as_dtype(expected_output_dtype) == tf.string: - if test_harness: - assert_equal = test_harness.assertAllEqual - else: - assert_equal = string_test - else: - if test_harness: - assert_equal = test_harness.assertAllClose +def layer_test( + layer_cls, + kwargs=None, + input_shape=None, + input_dtype=None, + input_data=None, + expected_output=None, + expected_output_dtype=None, + expected_output_shape=None, + validate_training=True, + adapt_data=None, + custom_objects=None, + test_harness=None, + supports_masking=None, +): + """Test routine for a layer with a single input and single output. + + Args: + layer_cls: Layer class object. + kwargs: Optional dictionary of keyword arguments for instantiating the + layer. + input_shape: Input shape tuple. + input_dtype: Data type of the input data. + input_data: Numpy array of input data. + expected_output: Numpy array of the expected output. + expected_output_dtype: Data type expected for the output. + expected_output_shape: Shape tuple for the expected shape of the output. + validate_training: Whether to attempt to validate training on this layer. + This might be set to False for non-differentiable layers that output + string or integer values. + adapt_data: Optional data for an 'adapt' call. If None, adapt() will not + be tested for this layer. This is only relevant for PreprocessingLayers. + custom_objects: Optional dictionary mapping name strings to custom objects + in the layer class. This is helpful for testing custom layers. + test_harness: The Tensorflow test, if any, that this function is being + called in. + supports_masking: Optional boolean to check the `supports_masking` + property of the layer. If None, the check will not be performed. + + Returns: + The output data (Numpy array) returned by the layer, for additional + checks to be done by the calling code. + + Raises: + ValueError: if `input_shape is None`. + """ + if input_data is None: + if input_shape is None: + raise ValueError("input_shape is None") + if not input_dtype: + input_dtype = "float32" + input_data_shape = list(input_shape) + for i, e in enumerate(input_data_shape): + if e is None: + input_data_shape[i] = np.random.randint(1, 4) + input_data = 10 * np.random.random(input_data_shape) + if input_dtype[:5] == "float": + input_data -= 0.5 + input_data = input_data.astype(input_dtype) + elif input_shape is None: + input_shape = input_data.shape + if input_dtype is None: + input_dtype = input_data.dtype + if expected_output_dtype is None: + expected_output_dtype = input_dtype + + if tf.as_dtype(expected_output_dtype) == tf.string: + if test_harness: + assert_equal = test_harness.assertAllEqual + else: + assert_equal = string_test else: - assert_equal = numeric_test - - # instantiation - kwargs = kwargs or {} - layer = layer_cls(**kwargs) - - if (supports_masking is not None - and layer.supports_masking != supports_masking): - raise AssertionError( - 'When testing layer %s, the `supports_masking` property is %r' - 'but expected to be %r.\nFull kwargs: %s' % - (layer_cls.__name__, layer.supports_masking, supports_masking, kwargs)) - - # Test adapt, if data was passed. - if adapt_data is not None: - layer.adapt(adapt_data) - - # test get_weights , set_weights at layer level - weights = layer.get_weights() - layer.set_weights(weights) - - # test and instantiation from weights - if 'weights' in tf_inspect.getargspec(layer_cls.__init__): - kwargs['weights'] = weights + if test_harness: + assert_equal = test_harness.assertAllClose + else: + assert_equal = numeric_test + + # instantiation + kwargs = kwargs or {} layer = layer_cls(**kwargs) - # test in functional API - x = layers.Input(shape=input_shape[1:], dtype=input_dtype) - y = layer(x) - if backend.dtype(y) != expected_output_dtype: - raise AssertionError('When testing layer %s, for input %s, found output ' - 'dtype=%s but expected to find %s.\nFull kwargs: %s' % - (layer_cls.__name__, x, backend.dtype(y), - expected_output_dtype, kwargs)) - - def assert_shapes_equal(expected, actual): - """Asserts that the output shape from the layer matches the actual shape.""" - if len(expected) != len(actual): - raise AssertionError( - 'When testing layer %s, for input %s, found output_shape=' - '%s but expected to find %s.\nFull kwargs: %s' % - (layer_cls.__name__, x, actual, expected, kwargs)) - - for expected_dim, actual_dim in zip(expected, actual): - if isinstance(expected_dim, tf.compat.v1.Dimension): - expected_dim = expected_dim.value - if isinstance(actual_dim, tf.compat.v1.Dimension): - actual_dim = actual_dim.value - if expected_dim is not None and expected_dim != actual_dim: + if ( + supports_masking is not None + and layer.supports_masking != supports_masking + ): raise AssertionError( - 'When testing layer %s, for input %s, found output_shape=' - '%s but expected to find %s.\nFull kwargs: %s' % - (layer_cls.__name__, x, actual, expected, kwargs)) - - if expected_output_shape is not None: - assert_shapes_equal(tf.TensorShape(expected_output_shape), - y.shape) - - # check shape inference - model = models.Model(x, y) - computed_output_shape = tuple( - layer.compute_output_shape( - tf.TensorShape(input_shape)).as_list()) - computed_output_signature = layer.compute_output_signature( - tf.TensorSpec(shape=input_shape, dtype=input_dtype)) - actual_output = model.predict(input_data) - actual_output_shape = actual_output.shape - assert_shapes_equal(computed_output_shape, actual_output_shape) - assert_shapes_equal(computed_output_signature.shape, actual_output_shape) - if computed_output_signature.dtype != actual_output.dtype: - raise AssertionError( - 'When testing layer %s, for input %s, found output_dtype=' - '%s but expected to find %s.\nFull kwargs: %s' % - (layer_cls.__name__, x, actual_output.dtype, - computed_output_signature.dtype, kwargs)) - if expected_output is not None: - assert_equal(actual_output, expected_output) - - # test serialization, weight setting at model level - model_config = model.get_config() - recovered_model = models.Model.from_config(model_config, custom_objects) - if model.weights: - weights = model.get_weights() - recovered_model.set_weights(weights) - output = recovered_model.predict(input_data) - assert_equal(output, actual_output) - - # test training mode (e.g. useful for dropout tests) - # Rebuild the model to avoid the graph being reused between predict() and - # See b/120160788 for more details. This should be mitigated after 2.0. - layer_weights = layer.get_weights() # Get the layer weights BEFORE training. - if validate_training: - model = models.Model(x, layer(x)) - if _thread_local_data.run_eagerly is not None: - model.compile( - 'rmsprop', - 'mse', - weighted_metrics=['acc'], - run_eagerly=should_run_eagerly()) - else: - model.compile('rmsprop', 'mse', weighted_metrics=['acc']) - model.train_on_batch(input_data, actual_output) - - # test as first layer in Sequential API - layer_config = layer.get_config() - layer_config['batch_input_shape'] = input_shape - layer = layer.__class__.from_config(layer_config) - - # Test adapt, if data was passed. - if adapt_data is not None: - layer.adapt(adapt_data) - - model = models.Sequential() - model.add(layers.Input(shape=input_shape[1:], dtype=input_dtype)) - model.add(layer) - - layer.set_weights(layer_weights) - actual_output = model.predict(input_data) - actual_output_shape = actual_output.shape - for expected_dim, actual_dim in zip(computed_output_shape, - actual_output_shape): - if expected_dim is not None: - if expected_dim != actual_dim: + "When testing layer %s, the `supports_masking` property is %r" + "but expected to be %r.\nFull kwargs: %s" + % ( + layer_cls.__name__, + layer.supports_masking, + supports_masking, + kwargs, + ) + ) + + # Test adapt, if data was passed. + if adapt_data is not None: + layer.adapt(adapt_data) + + # test get_weights , set_weights at layer level + weights = layer.get_weights() + layer.set_weights(weights) + + # test and instantiation from weights + if "weights" in tf_inspect.getargspec(layer_cls.__init__): + kwargs["weights"] = weights + layer = layer_cls(**kwargs) + + # test in functional API + x = layers.Input(shape=input_shape[1:], dtype=input_dtype) + y = layer(x) + if backend.dtype(y) != expected_output_dtype: + raise AssertionError( + "When testing layer %s, for input %s, found output " + "dtype=%s but expected to find %s.\nFull kwargs: %s" + % ( + layer_cls.__name__, + x, + backend.dtype(y), + expected_output_dtype, + kwargs, + ) + ) + + def assert_shapes_equal(expected, actual): + """Asserts that the output shape from the layer matches the actual + shape.""" + if len(expected) != len(actual): + raise AssertionError( + "When testing layer %s, for input %s, found output_shape=" + "%s but expected to find %s.\nFull kwargs: %s" + % (layer_cls.__name__, x, actual, expected, kwargs) + ) + + for expected_dim, actual_dim in zip(expected, actual): + if isinstance(expected_dim, tf.compat.v1.Dimension): + expected_dim = expected_dim.value + if isinstance(actual_dim, tf.compat.v1.Dimension): + actual_dim = actual_dim.value + if expected_dim is not None and expected_dim != actual_dim: + raise AssertionError( + "When testing layer %s, for input %s, found output_shape=" + "%s but expected to find %s.\nFull kwargs: %s" + % (layer_cls.__name__, x, actual, expected, kwargs) + ) + + if expected_output_shape is not None: + assert_shapes_equal(tf.TensorShape(expected_output_shape), y.shape) + + # check shape inference + model = models.Model(x, y) + computed_output_shape = tuple( + layer.compute_output_shape(tf.TensorShape(input_shape)).as_list() + ) + computed_output_signature = layer.compute_output_signature( + tf.TensorSpec(shape=input_shape, dtype=input_dtype) + ) + actual_output = model.predict(input_data) + actual_output_shape = actual_output.shape + assert_shapes_equal(computed_output_shape, actual_output_shape) + assert_shapes_equal(computed_output_signature.shape, actual_output_shape) + if computed_output_signature.dtype != actual_output.dtype: raise AssertionError( - 'When testing layer %s **after deserialization**, ' - 'for input %s, found output_shape=' - '%s but expected to find inferred shape %s.\nFull kwargs: %s' % - (layer_cls.__name__, - x, - actual_output_shape, - computed_output_shape, - kwargs)) - if expected_output is not None: - assert_equal(actual_output, expected_output) - - # test serialization, weight setting at model level - model_config = model.get_config() - recovered_model = models.Sequential.from_config(model_config, custom_objects) - if model.weights: - weights = model.get_weights() - recovered_model.set_weights(weights) - output = recovered_model.predict(input_data) - assert_equal(output, actual_output) - - # for further checks in the caller function - return actual_output + "When testing layer %s, for input %s, found output_dtype=" + "%s but expected to find %s.\nFull kwargs: %s" + % ( + layer_cls.__name__, + x, + actual_output.dtype, + computed_output_signature.dtype, + kwargs, + ) + ) + if expected_output is not None: + assert_equal(actual_output, expected_output) + + # test serialization, weight setting at model level + model_config = model.get_config() + recovered_model = models.Model.from_config(model_config, custom_objects) + if model.weights: + weights = model.get_weights() + recovered_model.set_weights(weights) + output = recovered_model.predict(input_data) + assert_equal(output, actual_output) + + # test training mode (e.g. useful for dropout tests) + # Rebuild the model to avoid the graph being reused between predict() and + # See b/120160788 for more details. This should be mitigated after 2.0. + layer_weights = ( + layer.get_weights() + ) # Get the layer weights BEFORE training. + if validate_training: + model = models.Model(x, layer(x)) + if _thread_local_data.run_eagerly is not None: + model.compile( + "rmsprop", + "mse", + weighted_metrics=["acc"], + run_eagerly=should_run_eagerly(), + ) + else: + model.compile("rmsprop", "mse", weighted_metrics=["acc"]) + model.train_on_batch(input_data, actual_output) + + # test as first layer in Sequential API + layer_config = layer.get_config() + layer_config["batch_input_shape"] = input_shape + layer = layer.__class__.from_config(layer_config) + + # Test adapt, if data was passed. + if adapt_data is not None: + layer.adapt(adapt_data) + + model = models.Sequential() + model.add(layers.Input(shape=input_shape[1:], dtype=input_dtype)) + model.add(layer) + + layer.set_weights(layer_weights) + actual_output = model.predict(input_data) + actual_output_shape = actual_output.shape + for expected_dim, actual_dim in zip( + computed_output_shape, actual_output_shape + ): + if expected_dim is not None: + if expected_dim != actual_dim: + raise AssertionError( + "When testing layer %s **after deserialization**, " + "for input %s, found output_shape=" + "%s but expected to find inferred shape %s.\n" + "Full kwargs: %s" + % ( + layer_cls.__name__, + x, + actual_output_shape, + computed_output_shape, + kwargs, + ) + ) + if expected_output is not None: + assert_equal(actual_output, expected_output) + + # test serialization, weight setting at model level + model_config = model.get_config() + recovered_model = models.Sequential.from_config( + model_config, custom_objects + ) + if model.weights: + weights = model.get_weights() + recovered_model.set_weights(weights) + output = recovered_model.predict(input_data) + assert_equal(output, actual_output) + + # for further checks in the caller function + return actual_output _thread_local_data = threading.local() @@ -309,781 +353,836 @@ def assert_shapes_equal(expected, actual): @tf_contextlib.contextmanager def model_type_scope(value): - """Provides a scope within which the model type to test is equal to `value`. + """Provides a scope within which the model type to test is equal to `value`. - The model type gets restored to its original value upon exiting the scope. + The model type gets restored to its original value upon exiting the scope. - Args: - value: model type value + Args: + value: model type value - Yields: - The provided value. - """ - previous_value = _thread_local_data.model_type - try: - _thread_local_data.model_type = value - yield value - finally: - # Restore model type to initial value. - _thread_local_data.model_type = previous_value + Yields: + The provided value. + """ + previous_value = _thread_local_data.model_type + try: + _thread_local_data.model_type = value + yield value + finally: + # Restore model type to initial value. + _thread_local_data.model_type = previous_value @tf_contextlib.contextmanager def run_eagerly_scope(value): - """Provides a scope within which we compile models to run eagerly or not. + """Provides a scope within which we compile models to run eagerly or not. - The boolean gets restored to its original value upon exiting the scope. + The boolean gets restored to its original value upon exiting the scope. - Args: - value: Bool specifying if we should run models eagerly in the active test. - Should be True or False. + Args: + value: Bool specifying if we should run models eagerly in the active + test. Should be True or False. - Yields: - The provided value. - """ - previous_value = _thread_local_data.run_eagerly - try: - _thread_local_data.run_eagerly = value - yield value - finally: - # Restore model type to initial value. - _thread_local_data.run_eagerly = previous_value + Yields: + The provided value. + """ + previous_value = _thread_local_data.run_eagerly + try: + _thread_local_data.run_eagerly = value + yield value + finally: + # Restore model type to initial value. + _thread_local_data.run_eagerly = previous_value def should_run_eagerly(): - """Returns whether the models we are testing should be run eagerly.""" - if _thread_local_data.run_eagerly is None: - raise ValueError('Cannot call `should_run_eagerly()` outside of a ' - '`run_eagerly_scope()` or `run_all_keras_modes` ' - 'decorator.') + """Returns whether the models we are testing should be run eagerly.""" + if _thread_local_data.run_eagerly is None: + raise ValueError( + "Cannot call `should_run_eagerly()` outside of a " + "`run_eagerly_scope()` or `run_all_keras_modes` " + "decorator." + ) - return _thread_local_data.run_eagerly and tf.executing_eagerly() + return _thread_local_data.run_eagerly and tf.executing_eagerly() @tf_contextlib.contextmanager def saved_model_format_scope(value, **kwargs): - """Provides a scope within which the savde model format to test is `value`. - - The saved model format gets restored to its original value upon exiting the - scope. - - Args: - value: saved model format value - **kwargs: optional kwargs to pass to the save function. - - Yields: - The provided value. - """ - previous_format = _thread_local_data.saved_model_format - previous_kwargs = _thread_local_data.save_kwargs - try: - _thread_local_data.saved_model_format = value - _thread_local_data.save_kwargs = kwargs - yield - finally: - # Restore saved model format to initial value. - _thread_local_data.saved_model_format = previous_format - _thread_local_data.save_kwargs = previous_kwargs + """Provides a scope within which the savde model format to test is `value`. + + The saved model format gets restored to its original value upon exiting the + scope. + + Args: + value: saved model format value + **kwargs: optional kwargs to pass to the save function. + + Yields: + The provided value. + """ + previous_format = _thread_local_data.saved_model_format + previous_kwargs = _thread_local_data.save_kwargs + try: + _thread_local_data.saved_model_format = value + _thread_local_data.save_kwargs = kwargs + yield + finally: + # Restore saved model format to initial value. + _thread_local_data.saved_model_format = previous_format + _thread_local_data.save_kwargs = previous_kwargs def get_save_format(): - if _thread_local_data.saved_model_format is None: - raise ValueError( - 'Cannot call `get_save_format()` outside of a ' - '`saved_model_format_scope()` or `run_with_all_saved_model_formats` ' - 'decorator.') - return _thread_local_data.saved_model_format + if _thread_local_data.saved_model_format is None: + raise ValueError( + "Cannot call `get_save_format()` outside of a " + "`saved_model_format_scope()` or " + "`run_with_all_saved_model_formats` decorator." + ) + return _thread_local_data.saved_model_format def get_save_kwargs(): - if _thread_local_data.save_kwargs is None: - raise ValueError( - 'Cannot call `get_save_kwargs()` outside of a ' - '`saved_model_format_scope()` or `run_with_all_saved_model_formats` ' - 'decorator.') - return _thread_local_data.save_kwargs or {} + if _thread_local_data.save_kwargs is None: + raise ValueError( + "Cannot call `get_save_kwargs()` outside of a " + "`saved_model_format_scope()` or " + "`run_with_all_saved_model_formats` decorator." + ) + return _thread_local_data.save_kwargs or {} def get_model_type(): - """Gets the model type that should be tested.""" - if _thread_local_data.model_type is None: - raise ValueError('Cannot call `get_model_type()` outside of a ' - '`model_type_scope()` or `run_with_all_model_types` ' - 'decorator.') + """Gets the model type that should be tested.""" + if _thread_local_data.model_type is None: + raise ValueError( + "Cannot call `get_model_type()` outside of a " + "`model_type_scope()` or `run_with_all_model_types` " + "decorator." + ) - return _thread_local_data.model_type + return _thread_local_data.model_type def get_small_sequential_mlp(num_hidden, num_classes, input_dim=None): - model = models.Sequential() - if input_dim: - model.add(layers.Dense(num_hidden, activation='relu', input_dim=input_dim)) - else: - model.add(layers.Dense(num_hidden, activation='relu')) - activation = 'sigmoid' if num_classes == 1 else 'softmax' - model.add(layers.Dense(num_classes, activation=activation)) - return model + model = models.Sequential() + if input_dim: + model.add( + layers.Dense(num_hidden, activation="relu", input_dim=input_dim) + ) + else: + model.add(layers.Dense(num_hidden, activation="relu")) + activation = "sigmoid" if num_classes == 1 else "softmax" + model.add(layers.Dense(num_classes, activation=activation)) + return model def get_small_functional_mlp(num_hidden, num_classes, input_dim): - inputs = layers.Input(shape=(input_dim,)) - outputs = layers.Dense(num_hidden, activation='relu')(inputs) - activation = 'sigmoid' if num_classes == 1 else 'softmax' - outputs = layers.Dense(num_classes, activation=activation)(outputs) - return models.Model(inputs, outputs) + inputs = layers.Input(shape=(input_dim,)) + outputs = layers.Dense(num_hidden, activation="relu")(inputs) + activation = "sigmoid" if num_classes == 1 else "softmax" + outputs = layers.Dense(num_classes, activation=activation)(outputs) + return models.Model(inputs, outputs) class SmallSubclassMLP(models.Model): - """A subclass model based small MLP.""" - - def __init__(self, - num_hidden, - num_classes, - use_bn=False, - use_dp=False, - **kwargs): - super().__init__(name='test_model', **kwargs) - self.use_bn = use_bn - self.use_dp = use_dp - - self.layer_a = layers.Dense(num_hidden, activation='relu') - activation = 'sigmoid' if num_classes == 1 else 'softmax' - self.layer_b = layers.Dense(num_classes, activation=activation) - if self.use_dp: - self.dp = layers.Dropout(0.5) - if self.use_bn: - self.bn = layers.BatchNormalization(axis=-1) - - def call(self, inputs, **kwargs): - x = self.layer_a(inputs) - if self.use_dp: - x = self.dp(x) - if self.use_bn: - x = self.bn(x) - return self.layer_b(x) + """A subclass model based small MLP.""" + + def __init__( + self, num_hidden, num_classes, use_bn=False, use_dp=False, **kwargs + ): + super().__init__(name="test_model", **kwargs) + self.num_hidden = num_hidden + self.num_classes = num_classes + self.use_bn = use_bn + self.use_dp = use_dp + + self.layer_a = layers.Dense(num_hidden, activation="relu") + activation = "sigmoid" if num_classes == 1 else "softmax" + self.layer_b = layers.Dense(num_classes, activation=activation) + if self.use_dp: + self.dp = layers.Dropout(0.5) + if self.use_bn: + self.bn = layers.BatchNormalization(axis=-1) + + def call(self, inputs, **kwargs): + x = self.layer_a(inputs) + if self.use_dp: + x = self.dp(x) + if self.use_bn: + x = self.bn(x) + return self.layer_b(x) + + def get_config(self): + config = super().get_config() + config.update( + { + "num_hidden": self.num_hidden, + "num_classes": self.num_classes, + "use_bn": self.use_bn, + "use_dp": self.use_dp, + } + ) + return config class _SmallSubclassMLPCustomBuild(models.Model): - """A subclass model small MLP that uses a custom build method.""" + """A subclass model small MLP that uses a custom build method.""" - def __init__(self, num_hidden, num_classes): - super().__init__() - self.layer_a = None - self.layer_b = None - self.num_hidden = num_hidden - self.num_classes = num_classes + def __init__(self, num_hidden, num_classes): + super().__init__() + self.layer_a = None + self.layer_b = None + self.num_hidden = num_hidden + self.num_classes = num_classes - def build(self, input_shape): - self.layer_a = layers.Dense(self.num_hidden, activation='relu') - activation = 'sigmoid' if self.num_classes == 1 else 'softmax' - self.layer_b = layers.Dense(self.num_classes, activation=activation) + def build(self, input_shape): + self.layer_a = layers.Dense(self.num_hidden, activation="relu") + activation = "sigmoid" if self.num_classes == 1 else "softmax" + self.layer_b = layers.Dense(self.num_classes, activation=activation) - def call(self, inputs, **kwargs): - x = self.layer_a(inputs) - return self.layer_b(x) + def call(self, inputs, **kwargs): + x = self.layer_a(inputs) + return self.layer_b(x) def get_small_subclass_mlp(num_hidden, num_classes): - return SmallSubclassMLP(num_hidden, num_classes) + return SmallSubclassMLP(num_hidden, num_classes) def get_small_subclass_mlp_with_custom_build(num_hidden, num_classes): - return _SmallSubclassMLPCustomBuild(num_hidden, num_classes) + return _SmallSubclassMLPCustomBuild(num_hidden, num_classes) def get_small_mlp(num_hidden, num_classes, input_dim): - """Get a small mlp of the model type specified by `get_model_type`.""" - model_type = get_model_type() - if model_type == 'subclass': - return get_small_subclass_mlp(num_hidden, num_classes) - if model_type == 'subclass_custom_build': - return get_small_subclass_mlp_with_custom_build(num_hidden, num_classes) - if model_type == 'sequential': - return get_small_sequential_mlp(num_hidden, num_classes, input_dim) - if model_type == 'functional': - return get_small_functional_mlp(num_hidden, num_classes, input_dim) - raise ValueError('Unknown model type {}'.format(model_type)) + """Get a small mlp of the model type specified by `get_model_type`.""" + model_type = get_model_type() + if model_type == "subclass": + return get_small_subclass_mlp(num_hidden, num_classes) + if model_type == "subclass_custom_build": + return get_small_subclass_mlp_with_custom_build(num_hidden, num_classes) + if model_type == "sequential": + return get_small_sequential_mlp(num_hidden, num_classes, input_dim) + if model_type == "functional": + return get_small_functional_mlp(num_hidden, num_classes, input_dim) + raise ValueError(f"Unknown model type {model_type}") class _SubclassModel(models.Model): - """A Keras subclass model.""" + """A Keras subclass model.""" - def __init__(self, model_layers, *args, **kwargs): - """Instantiate a model. + def __init__(self, model_layers, *args, **kwargs): + """Instantiate a model. - Args: - model_layers: a list of layers to be added to the model. - *args: Model's args - **kwargs: Model's keyword args, at most one of input_tensor -> the input - tensor required for ragged/sparse input. - """ + Args: + model_layers: a list of layers to be added to the model. + *args: Model's args + **kwargs: Model's keyword args, at most one of input_tensor -> the + input tensor required for ragged/sparse input. + """ - inputs = kwargs.pop('input_tensor', None) - super().__init__(*args, **kwargs) - # Note that clone and build doesn't support lists of layers in subclassed - # models. Adding each layer directly here. - for i, layer in enumerate(model_layers): - setattr(self, self._layer_name_for_i(i), layer) + inputs = kwargs.pop("input_tensor", None) + super().__init__(*args, **kwargs) + # Note that clone and build doesn't support lists of layers in + # subclassed models. Adding each layer directly here. + for i, layer in enumerate(model_layers): + setattr(self, self._layer_name_for_i(i), layer) - self.num_layers = len(model_layers) + self.num_layers = len(model_layers) - if inputs is not None: - self._set_inputs(inputs) + if inputs is not None: + self._set_inputs(inputs) - def _layer_name_for_i(self, i): - return 'layer{}'.format(i) + def _layer_name_for_i(self, i): + return f"layer{i}" - def call(self, inputs, **kwargs): - x = inputs - for i in range(self.num_layers): - layer = getattr(self, self._layer_name_for_i(i)) - x = layer(x) - return x + def call(self, inputs, **kwargs): + x = inputs + for i in range(self.num_layers): + layer = getattr(self, self._layer_name_for_i(i)) + x = layer(x) + return x - def get_config(self): - # This test model relies on the default Keras serialization of a model, - # rather than providing the details of `model_layers`. - raise NotImplementedError + def get_config(self): + # This test model relies on the default Keras serialization of a model, + # rather than providing the details of `model_layers`. + raise NotImplementedError class _SubclassModelCustomBuild(models.Model): - """A Keras subclass model that uses a custom build method.""" - - def __init__(self, layer_generating_func, *args, **kwargs): - super().__init__(*args, **kwargs) - self.all_layers = None - self._layer_generating_func = layer_generating_func - - def build(self, input_shape): - model_layers = [] - for layer in self._layer_generating_func(): - model_layers.append(layer) - self.all_layers = model_layers - - def call(self, inputs, **kwargs): - x = inputs - for layer in self.all_layers: - x = layer(x) - return x - - -def get_model_from_layers(model_layers, - input_shape=None, - input_dtype=None, - name=None, - input_ragged=None, - input_sparse=None, - model_type=None): - """Builds a model from a sequence of layers. - - Args: - model_layers: The layers used to build the network. - input_shape: Shape tuple of the input or 'TensorShape' instance. - input_dtype: Datatype of the input. - name: Name for the model. - input_ragged: Boolean, whether the input data is a ragged tensor. - input_sparse: Boolean, whether the input data is a sparse tensor. - model_type: One of "subclass", "subclass_custom_build", "sequential", or - "functional". When None, defaults to `get_model_type`. - - Returns: - A Keras model. - """ - if model_type is None: - model_type = get_model_type() - if model_type == 'subclass': - inputs = None - if input_ragged or input_sparse: - inputs = layers.Input( - shape=input_shape, - dtype=input_dtype, - ragged=input_ragged, - sparse=input_sparse) - return _SubclassModel(model_layers, name=name, input_tensor=inputs) - - if model_type == 'subclass_custom_build': - layer_generating_func = lambda: model_layers - return _SubclassModelCustomBuild(layer_generating_func, name=name) - - if model_type == 'sequential': - model = models.Sequential(name=name) - if input_shape: - model.add( - layers.InputLayer( - input_shape=input_shape, - dtype=input_dtype, - ragged=input_ragged, - sparse=input_sparse)) - for layer in model_layers: - model.add(layer) - return model - - if model_type == 'functional': - if not input_shape: - raise ValueError('Cannot create a functional model from layers with no ' - 'input shape.') - inputs = layers.Input( - shape=input_shape, - dtype=input_dtype, - ragged=input_ragged, - sparse=input_sparse) - outputs = inputs - for layer in model_layers: - outputs = layer(outputs) - return models.Model(inputs, outputs, name=name) + """A Keras subclass model that uses a custom build method.""" + + def __init__(self, layer_generating_func, *args, **kwargs): + super().__init__(*args, **kwargs) + self.all_layers = None + self._layer_generating_func = layer_generating_func + + def build(self, input_shape): + model_layers = [] + for layer in self._layer_generating_func(): + model_layers.append(layer) + self.all_layers = model_layers + + def call(self, inputs, **kwargs): + x = inputs + for layer in self.all_layers: + x = layer(x) + return x + + +def get_model_from_layers( + model_layers, + input_shape=None, + input_dtype=None, + name=None, + input_ragged=None, + input_sparse=None, + model_type=None, +): + """Builds a model from a sequence of layers. - raise ValueError('Unknown model type {}'.format(model_type)) + Args: + model_layers: The layers used to build the network. + input_shape: Shape tuple of the input or 'TensorShape' instance. + input_dtype: Datatype of the input. + name: Name for the model. + input_ragged: Boolean, whether the input data is a ragged tensor. + input_sparse: Boolean, whether the input data is a sparse tensor. + model_type: One of "subclass", "subclass_custom_build", "sequential", or + "functional". When None, defaults to `get_model_type`. + + Returns: + A Keras model. + """ + if model_type is None: + model_type = get_model_type() + if model_type == "subclass": + inputs = None + if input_ragged or input_sparse: + inputs = layers.Input( + shape=input_shape, + dtype=input_dtype, + ragged=input_ragged, + sparse=input_sparse, + ) + return _SubclassModel(model_layers, name=name, input_tensor=inputs) + + if model_type == "subclass_custom_build": + layer_generating_func = lambda: model_layers + return _SubclassModelCustomBuild(layer_generating_func, name=name) + + if model_type == "sequential": + model = models.Sequential(name=name) + if input_shape: + model.add( + layers.InputLayer( + input_shape=input_shape, + dtype=input_dtype, + ragged=input_ragged, + sparse=input_sparse, + ) + ) + for layer in model_layers: + model.add(layer) + return model + + if model_type == "functional": + if not input_shape: + raise ValueError( + "Cannot create a functional model from layers with no " + "input shape." + ) + inputs = layers.Input( + shape=input_shape, + dtype=input_dtype, + ragged=input_ragged, + sparse=input_sparse, + ) + outputs = inputs + for layer in model_layers: + outputs = layer(outputs) + return models.Model(inputs, outputs, name=name) + + raise ValueError(f"Unknown model type {model_type}") class Bias(layers.Layer): + def build(self, input_shape): + self.bias = self.add_weight("bias", (1,), initializer="zeros") - def build(self, input_shape): - self.bias = self.add_weight('bias', (1,), initializer='zeros') - - def call(self, inputs): - return inputs + self.bias + def call(self, inputs): + return inputs + self.bias class _MultiIOSubclassModel(models.Model): - """Multi IO Keras subclass model.""" - - def __init__(self, branch_a, branch_b, shared_input_branch=None, - shared_output_branch=None, name=None): - super().__init__(name=name) - self._shared_input_branch = shared_input_branch - self._branch_a = branch_a - self._branch_b = branch_b - self._shared_output_branch = shared_output_branch - - def call(self, inputs, **kwargs): - if self._shared_input_branch: - for layer in self._shared_input_branch: - inputs = layer(inputs) - a = inputs - b = inputs - elif isinstance(inputs, dict): - a = inputs['input_1'] - b = inputs['input_2'] - else: - a, b = inputs - - for layer in self._branch_a: - a = layer(a) - for layer in self._branch_b: - b = layer(b) - outs = [a, b] - - if self._shared_output_branch: - for layer in self._shared_output_branch: - outs = layer(outs) - - return outs + """Multi IO Keras subclass model.""" + + def __init__( + self, + branch_a, + branch_b, + shared_input_branch=None, + shared_output_branch=None, + name=None, + ): + super().__init__(name=name) + self._shared_input_branch = shared_input_branch + self._branch_a = branch_a + self._branch_b = branch_b + self._shared_output_branch = shared_output_branch + + def call(self, inputs, **kwargs): + if self._shared_input_branch: + for layer in self._shared_input_branch: + inputs = layer(inputs) + a = inputs + b = inputs + elif isinstance(inputs, dict): + a = inputs["input_1"] + b = inputs["input_2"] + else: + a, b = inputs + + for layer in self._branch_a: + a = layer(a) + for layer in self._branch_b: + b = layer(b) + outs = [a, b] + + if self._shared_output_branch: + for layer in self._shared_output_branch: + outs = layer(outs) + + return outs class _MultiIOSubclassModelCustomBuild(models.Model): - """Multi IO Keras subclass model that uses a custom build method.""" - - def __init__(self, branch_a_func, branch_b_func, - shared_input_branch_func=None, - shared_output_branch_func=None): - super().__init__() - self._shared_input_branch_func = shared_input_branch_func - self._branch_a_func = branch_a_func - self._branch_b_func = branch_b_func - self._shared_output_branch_func = shared_output_branch_func - - self._shared_input_branch = None - self._branch_a = None - self._branch_b = None - self._shared_output_branch = None - - def build(self, input_shape): - if self._shared_input_branch_func(): - self._shared_input_branch = self._shared_input_branch_func() - self._branch_a = self._branch_a_func() - self._branch_b = self._branch_b_func() - - if self._shared_output_branch_func(): - self._shared_output_branch = self._shared_output_branch_func() - - def call(self, inputs, **kwargs): - if self._shared_input_branch: - for layer in self._shared_input_branch: - inputs = layer(inputs) - a = inputs - b = inputs - else: - a, b = inputs - - for layer in self._branch_a: - a = layer(a) - for layer in self._branch_b: - b = layer(b) - outs = a, b - - if self._shared_output_branch: - for layer in self._shared_output_branch: - outs = layer(outs) - - return outs + """Multi IO Keras subclass model that uses a custom build method.""" + + def __init__( + self, + branch_a_func, + branch_b_func, + shared_input_branch_func=None, + shared_output_branch_func=None, + ): + super().__init__() + self._shared_input_branch_func = shared_input_branch_func + self._branch_a_func = branch_a_func + self._branch_b_func = branch_b_func + self._shared_output_branch_func = shared_output_branch_func + + self._shared_input_branch = None + self._branch_a = None + self._branch_b = None + self._shared_output_branch = None + + def build(self, input_shape): + if self._shared_input_branch_func(): + self._shared_input_branch = self._shared_input_branch_func() + self._branch_a = self._branch_a_func() + self._branch_b = self._branch_b_func() + + if self._shared_output_branch_func(): + self._shared_output_branch = self._shared_output_branch_func() + + def call(self, inputs, **kwargs): + if self._shared_input_branch: + for layer in self._shared_input_branch: + inputs = layer(inputs) + a = inputs + b = inputs + else: + a, b = inputs + + for layer in self._branch_a: + a = layer(a) + for layer in self._branch_b: + b = layer(b) + outs = a, b + + if self._shared_output_branch: + for layer in self._shared_output_branch: + outs = layer(outs) + + return outs def get_multi_io_model( - branch_a, - branch_b, - shared_input_branch=None, - shared_output_branch=None): - """Builds a multi-io model that contains two branches. - - The produced model will be of the type specified by `get_model_type`. - - To build a two-input, two-output model: - Specify a list of layers for branch a and branch b, but do not specify any - shared input branch or shared output branch. The resulting model will apply - each branch to a different input, to produce two outputs. - - The first value in branch_a must be the Keras 'Input' layer for branch a, - and the first value in branch_b must be the Keras 'Input' layer for - branch b. - - example usage: - ``` - branch_a = [Input(shape=(2,), name='a'), Dense(), Dense()] - branch_b = [Input(shape=(3,), name='b'), Dense(), Dense()] - - model = get_multi_io_model(branch_a, branch_b) - ``` - - To build a two-input, one-output model: - Specify a list of layers for branch a and branch b, and specify a - shared output branch. The resulting model will apply - each branch to a different input. It will then apply the shared output - branch to a tuple containing the intermediate outputs of each branch, - to produce a single output. The first layer in the shared_output_branch - must be able to merge a tuple of two tensors. - - The first value in branch_a must be the Keras 'Input' layer for branch a, - and the first value in branch_b must be the Keras 'Input' layer for - branch b. - - example usage: - ``` - input_branch_a = [Input(shape=(2,), name='a'), Dense(), Dense()] - input_branch_b = [Input(shape=(3,), name='b'), Dense(), Dense()] - shared_output_branch = [Concatenate(), Dense(), Dense()] - - model = get_multi_io_model(input_branch_a, input_branch_b, - shared_output_branch=shared_output_branch) - ``` - To build a one-input, two-output model: - Specify a list of layers for branch a and branch b, and specify a - shared input branch. The resulting model will take one input, and apply - the shared input branch to it. It will then respectively apply each branch - to that intermediate result in parallel, to produce two outputs. - - The first value in the shared_input_branch must be the Keras 'Input' layer - for the whole model. Branch a and branch b should not contain any Input - layers. - - example usage: - ``` - shared_input_branch = [Input(shape=(2,), name='in'), Dense(), Dense()] - output_branch_a = [Dense(), Dense()] - output_branch_b = [Dense(), Dense()] - - - model = get_multi_io_model(output__branch_a, output_branch_b, - shared_input_branch=shared_input_branch) - ``` - - Args: - branch_a: A sequence of layers for branch a of the model. - branch_b: A sequence of layers for branch b of the model. - shared_input_branch: An optional sequence of layers to apply to a single - input, before applying both branches to that intermediate result. If set, - the model will take only one input instead of two. Defaults to None. - shared_output_branch: An optional sequence of layers to merge the - intermediate results produced by branch a and branch b. If set, - the model will produce only one output instead of two. Defaults to None. - - Returns: - A multi-io model of the type specified by `get_model_type`, specified - by the different branches. - """ - # Extract the functional inputs from the layer lists - if shared_input_branch: - inputs = shared_input_branch[0] - shared_input_branch = shared_input_branch[1:] - else: - inputs = branch_a[0], branch_b[0] - branch_a = branch_a[1:] - branch_b = branch_b[1:] - - model_type = get_model_type() - if model_type == 'subclass': - return _MultiIOSubclassModel(branch_a, branch_b, shared_input_branch, - shared_output_branch) - - if model_type == 'subclass_custom_build': - return _MultiIOSubclassModelCustomBuild((lambda: branch_a), - (lambda: branch_b), - (lambda: shared_input_branch), - (lambda: shared_output_branch)) - - if model_type == 'sequential': - raise ValueError('Cannot use `get_multi_io_model` to construct ' - 'sequential models') - - if model_type == 'functional': + branch_a, branch_b, shared_input_branch=None, shared_output_branch=None +): + """Builds a multi-io model that contains two branches. + + The produced model will be of the type specified by `get_model_type`. + + To build a two-input, two-output model: + Specify a list of layers for branch a and branch b, but do not specify any + shared input branch or shared output branch. The resulting model will + apply each branch to a different input, to produce two outputs. + + The first value in branch_a must be the Keras 'Input' layer for branch a, + and the first value in branch_b must be the Keras 'Input' layer for + branch b. + + example usage: + ``` + branch_a = [Input(shape=(2,), name='a'), Dense(), Dense()] + branch_b = [Input(shape=(3,), name='b'), Dense(), Dense()] + + model = get_multi_io_model(branch_a, branch_b) + ``` + + To build a two-input, one-output model: + Specify a list of layers for branch a and branch b, and specify a + shared output branch. The resulting model will apply + each branch to a different input. It will then apply the shared output + branch to a tuple containing the intermediate outputs of each branch, + to produce a single output. The first layer in the shared_output_branch + must be able to merge a tuple of two tensors. + + The first value in branch_a must be the Keras 'Input' layer for branch a, + and the first value in branch_b must be the Keras 'Input' layer for + branch b. + + example usage: + ``` + input_branch_a = [Input(shape=(2,), name='a'), Dense(), Dense()] + input_branch_b = [Input(shape=(3,), name='b'), Dense(), Dense()] + shared_output_branch = [Concatenate(), Dense(), Dense()] + + model = get_multi_io_model(input_branch_a, input_branch_b, + shared_output_branch=shared_output_branch) + ``` + To build a one-input, two-output model: + Specify a list of layers for branch a and branch b, and specify a + shared input branch. The resulting model will take one input, and apply + the shared input branch to it. It will then respectively apply each branch + to that intermediate result in parallel, to produce two outputs. + + The first value in the shared_input_branch must be the Keras 'Input' layer + for the whole model. Branch a and branch b should not contain any Input + layers. + + example usage: + ``` + shared_input_branch = [Input(shape=(2,), name='in'), Dense(), Dense()] + output_branch_a = [Dense(), Dense()] + output_branch_b = [Dense(), Dense()] + + + model = get_multi_io_model(output__branch_a, output_branch_b, + shared_input_branch=shared_input_branch) + ``` + + Args: + branch_a: A sequence of layers for branch a of the model. + branch_b: A sequence of layers for branch b of the model. + shared_input_branch: An optional sequence of layers to apply to a single + input, before applying both branches to that intermediate result. If + set, the model will take only one input instead of two. Defaults to + `None`. + shared_output_branch: An optional sequence of layers to merge the + intermediate results produced by branch a and branch b. If set, + the model will produce only one output instead of two. + Defaults to `None`. + + Returns: + A multi-io model of the type specified by `get_model_type`, specified + by the different branches. + """ + # Extract the functional inputs from the layer lists if shared_input_branch: - a_and_b = inputs - for layer in shared_input_branch: - a_and_b = layer(a_and_b) - a = a_and_b - b = a_and_b + inputs = shared_input_branch[0] + shared_input_branch = shared_input_branch[1:] else: - a, b = inputs + inputs = branch_a[0], branch_b[0] + branch_a = branch_a[1:] + branch_b = branch_b[1:] - for layer in branch_a: - a = layer(a) - for layer in branch_b: - b = layer(b) - outputs = a, b - - if shared_output_branch: - for layer in shared_output_branch: - outputs = layer(outputs) - - return models.Model(inputs, outputs) - - raise ValueError('Unknown model type {}'.format(model_type)) + model_type = get_model_type() + if model_type == "subclass": + return _MultiIOSubclassModel( + branch_a, branch_b, shared_input_branch, shared_output_branch + ) + + if model_type == "subclass_custom_build": + return _MultiIOSubclassModelCustomBuild( + (lambda: branch_a), + (lambda: branch_b), + (lambda: shared_input_branch), + (lambda: shared_output_branch), + ) + + if model_type == "sequential": + raise ValueError( + "Cannot use `get_multi_io_model` to construct sequential models" + ) + + if model_type == "functional": + if shared_input_branch: + a_and_b = inputs + for layer in shared_input_branch: + a_and_b = layer(a_and_b) + a = a_and_b + b = a_and_b + else: + a, b = inputs + + for layer in branch_a: + a = layer(a) + for layer in branch_b: + b = layer(b) + outputs = a, b + + if shared_output_branch: + for layer in shared_output_branch: + outputs = layer(outputs) + + return models.Model(inputs, outputs) + + raise ValueError(f"Unknown model type {model_type}") _V2_OPTIMIZER_MAP = { - 'adadelta': adadelta_v2.Adadelta, - 'adagrad': adagrad_v2.Adagrad, - 'adam': adam_v2.Adam, - 'adamax': adamax_v2.Adamax, - 'nadam': nadam_v2.Nadam, - 'rmsprop': rmsprop_v2.RMSprop, - 'sgd': gradient_descent_v2.SGD + "adadelta": adadelta_v2.Adadelta, + "adagrad": adagrad_v2.Adagrad, + "adam": adam_v2.Adam, + "adamax": adamax_v2.Adamax, + "nadam": nadam_v2.Nadam, + "rmsprop": rmsprop_v2.RMSprop, + "sgd": gradient_descent_v2.SGD, } def get_v2_optimizer(name, **kwargs): - """Get the v2 optimizer requested. + """Get the v2 optimizer requested. - This is only necessary until v2 are the default, as we are testing in Eager, - and Eager + v1 optimizers fail tests. When we are in v2, the strings alone - should be sufficient, and this mapping can theoretically be removed. + This is only necessary until v2 are the default, as we are testing in Eager, + and Eager + v1 optimizers fail tests. When we are in v2, the strings alone + should be sufficient, and this mapping can theoretically be removed. - Args: - name: string name of Keras v2 optimizer. - **kwargs: any kwargs to pass to the optimizer constructor. + Args: + name: string name of Keras v2 optimizer. + **kwargs: any kwargs to pass to the optimizer constructor. - Returns: - Initialized Keras v2 optimizer. + Returns: + Initialized Keras v2 optimizer. - Raises: - ValueError: if an unknown name was passed. - """ - try: - return _V2_OPTIMIZER_MAP[name](**kwargs) - except KeyError: - raise ValueError( - 'Could not find requested v2 optimizer: {}\nValid choices: {}'.format( - name, list(_V2_OPTIMIZER_MAP.keys()))) + Raises: + ValueError: if an unknown name was passed. + """ + try: + return _V2_OPTIMIZER_MAP[name](**kwargs) + except KeyError: + raise ValueError( + "Could not find requested v2 optimizer: " + "{}\nValid choices: {}".format(name, list(_V2_OPTIMIZER_MAP.keys())) + ) -def get_expected_metric_variable_names(var_names, name_suffix=''): - """Returns expected metric variable names given names and prefix/suffix.""" - if tf.__internal__.tf2.enabled() or tf.executing_eagerly(): - # In V1 eager mode and V2 variable names are not made unique. - return [n + ':0' for n in var_names] - # In V1 graph mode variable names are made unique using a suffix. - return [n + name_suffix + ':0' for n in var_names] +def get_expected_metric_variable_names(var_names, name_suffix=""): + """Returns expected metric variable names given names and prefix/suffix.""" + if tf.__internal__.tf2.enabled() or tf.executing_eagerly(): + # In V1 eager mode and V2 variable names are not made unique. + return [n + ":0" for n in var_names] + # In V1 graph mode variable names are made unique using a suffix. + return [n + name_suffix + ":0" for n in var_names] def enable_v2_dtype_behavior(fn): - """Decorator for enabling the layer V2 dtype behavior on a test.""" - return _set_v2_dtype_behavior(fn, True) + """Decorator for enabling the layer V2 dtype behavior on a test.""" + return _set_v2_dtype_behavior(fn, True) def disable_v2_dtype_behavior(fn): - """Decorator for disabling the layer V2 dtype behavior on a test.""" - return _set_v2_dtype_behavior(fn, False) + """Decorator for disabling the layer V2 dtype behavior on a test.""" + return _set_v2_dtype_behavior(fn, False) def _set_v2_dtype_behavior(fn, enabled): - """Returns version of 'fn' that runs with v2 dtype behavior on or off.""" - @functools.wraps(fn) - def wrapper(*args, **kwargs): - v2_dtype_behavior = base_layer_utils.V2_DTYPE_BEHAVIOR - base_layer_utils.V2_DTYPE_BEHAVIOR = enabled - try: - return fn(*args, **kwargs) - finally: - base_layer_utils.V2_DTYPE_BEHAVIOR = v2_dtype_behavior + """Returns version of 'fn' that runs with v2 dtype behavior on or off.""" - return tf.__internal__.decorator.make_decorator(fn, wrapper) + @functools.wraps(fn) + def wrapper(*args, **kwargs): + v2_dtype_behavior = base_layer_utils.V2_DTYPE_BEHAVIOR + base_layer_utils.V2_DTYPE_BEHAVIOR = enabled + try: + return fn(*args, **kwargs) + finally: + base_layer_utils.V2_DTYPE_BEHAVIOR = v2_dtype_behavior + + return tf.__internal__.decorator.make_decorator(fn, wrapper) @contextlib.contextmanager def device(should_use_gpu): - """Uses gpu when requested and available.""" - if should_use_gpu and tf.test.is_gpu_available(): - dev = '/device:GPU:0' - else: - dev = '/device:CPU:0' - with tf.device(dev): - yield + """Uses gpu when requested and available.""" + if should_use_gpu and tf.test.is_gpu_available(): + dev = "/device:GPU:0" + else: + dev = "/device:CPU:0" + with tf.device(dev): + yield @contextlib.contextmanager def use_gpu(): - """Uses gpu when requested and available.""" - with device(should_use_gpu=True): - yield + """Uses gpu when requested and available.""" + with device(should_use_gpu=True): + yield def for_all_test_methods(decorator, *args, **kwargs): - """Generate class-level decorator from given method-level decorator. + """Generate class-level decorator from given method-level decorator. - It is expected for the given decorator to take some arguments and return - a method that is then called on the test method to produce a decorated - method. + It is expected for the given decorator to take some arguments and return + a method that is then called on the test method to produce a decorated + method. - Args: - decorator: The decorator to apply. - *args: Positional arguments - **kwargs: Keyword arguments - Returns: Function that will decorate a given classes test methods with the - decorator. - """ + Args: + decorator: The decorator to apply. + *args: Positional arguments + **kwargs: Keyword arguments + Returns: Function that will decorate a given classes test methods with the + decorator. + """ - def all_test_methods_impl(cls): - """Apply decorator to all test methods in class.""" - for name in dir(cls): - value = getattr(cls, name) - if callable(value) and name.startswith('test') and (name != - 'test_session'): - setattr(cls, name, decorator(*args, **kwargs)(value)) - return cls + def all_test_methods_impl(cls): + """Apply decorator to all test methods in class.""" + for name in dir(cls): + value = getattr(cls, name) + if ( + callable(value) + and name.startswith("test") + and (name != "test_session") + ): + setattr(cls, name, decorator(*args, **kwargs)(value)) + return cls - return all_test_methods_impl + return all_test_methods_impl # The description is just for documentation purposes. -def run_without_tensor_float_32(description): # pylint: disable=unused-argument - """Execute test with TensorFloat-32 disabled. - - While almost every real-world deep learning model runs fine with - TensorFloat-32, many tests use assertAllClose or similar methods. - TensorFloat-32 matmuls typically will cause such methods to fail with the - default tolerances. +def run_without_tensor_float_32(description): + """Execute test with TensorFloat-32 disabled. - Args: - description: A description used for documentation purposes, describing why - the test requires TensorFloat-32 to be disabled. + While almost every real-world deep learning model runs fine with + TensorFloat-32, many tests use assertAllClose or similar methods. + TensorFloat-32 matmuls typically will cause such methods to fail with the + default tolerances. - Returns: - Decorator which runs a test with TensorFloat-32 disabled. - """ + Args: + description: A description used for documentation purposes, describing why + the test requires TensorFloat-32 to be disabled. - def decorator(f): + Returns: + Decorator which runs a test with TensorFloat-32 disabled. + """ - @functools.wraps(f) - def decorated(self, *args, **kwargs): - allowed = tf.config.experimental.tensor_float_32_execution_enabled() - try: - tf.config.experimental.enable_tensor_float_32_execution(False) - f(self, *args, **kwargs) - finally: - tf.config.experimental.enable_tensor_float_32_execution(allowed) + def decorator(f): + @functools.wraps(f) + def decorated(self, *args, **kwargs): + allowed = tf.config.experimental.tensor_float_32_execution_enabled() + try: + tf.config.experimental.enable_tensor_float_32_execution(False) + f(self, *args, **kwargs) + finally: + tf.config.experimental.enable_tensor_float_32_execution(allowed) - return decorated + return decorated - return decorator + return decorator # The description is just for documentation purposes. -def run_all_without_tensor_float_32(description): # pylint: disable=unused-argument - """Execute all tests in a class with TensorFloat-32 disabled.""" - return for_all_test_methods(run_without_tensor_float_32, description) +def run_all_without_tensor_float_32( + description, +): + """Execute all tests in a class with TensorFloat-32 disabled.""" + return for_all_test_methods(run_without_tensor_float_32, description) def run_v2_only(obj=None): - """Execute the decorated test only if running in v2 mode. + """Execute the decorated test only if running in v2 mode. - This function is intended to be applied to tests that exercise v2 only - functionality. If the test is run in v1 mode it will simply be skipped. + This function is intended to be applied to tests that exercise v2 only + functionality. If the test is run in v1 mode it will simply be skipped. - See go/tf-test-decorator-cheatsheet for the decorators to use in different - v1/v2/eager/graph combinations. + See go/tf-test-decorator-cheatsheet for the decorators to use in different + v1/v2/eager/graph combinations. - Args: - obj: function to be annotated. If None, return a - decorator the can be applied to a function or class. If `obj` is not None, - return the decorator applied to `obj`. + Args: + obj: function to be annotated. If None, return a + decorator the can be applied to a function or class. If `obj` is not + None, return the decorator applied to `obj`. + + Returns: + Returns a decorator that will conditionally skip the decorated test + method. + """ + condition = not tf.__internal__.tf2.enabled() + reason = "Test is only compatible with TF v2." - Returns: - Returns a decorator that will conditionally skip the decorated test method. - """ - condition = not tf.__internal__.tf2.enabled() - reason = 'Test is only compatible with TF v2.' + def decorator(f): + if tf_inspect.isclass(f): + return unittest.skipIf(condition=condition, reason=reason)(obj) - def decorator(f): - if tf_inspect.isclass(f): - return unittest.skipIf(condition=condition, reason=reason)(obj) + def decorated(self, *args, **kwargs): + if condition: + self.skipTest(reason) + return f(self, *args, **kwargs) - def decorated(self, *args, **kwargs): - if condition: - self.skipTest(reason) - return f(self, *args, **kwargs) - return decorated + return decorated - if obj is not None: - return decorator(obj) + if obj is not None: + return decorator(obj) - return decorator + return decorator def generate_combinations_with_testcase_name(**kwargs): - """Generate combinations based on its keyword arguments using combine(). - - This function calls combine() and appends a testcase name to the list of - dictionaries returned. The 'testcase_name' key is a required for named - parameterized tests. - - Args: - **kwargs: keyword arguments of form `option=[possibilities, ...]` or - `option=the_only_possibility`. - - Returns: - a list of dictionaries for each combination. Keys in the dictionaries are - the keyword argument names. Each key has one value - one of the - corresponding keyword argument values. - """ - sort_by_key = lambda k: k[0] - combinations = [] - for key, values in sorted(kwargs.items(), key=sort_by_key): - if not isinstance(values, list): - values = [values] - combinations.append([(key, value) for value in values]) - - combinations = [collections.OrderedDict(result) - for result in itertools.product(*combinations)] - named_combinations = [] - for combination in combinations: - assert isinstance(combination, collections.OrderedDict) - name = ''.join([ - '_{}_{}'.format(''.join(filter(str.isalnum, key)), - ''.join(filter(str.isalnum, str(value)))) - for key, value in combination.items() - ]) - named_combinations.append( - collections.OrderedDict( - list(combination.items()) + - [('testcase_name', '_test{}'.format(name))])) - - return named_combinations + """Generate combinations based on its keyword arguments using combine(). + + This function calls combine() and appends a testcase name to the list of + dictionaries returned. The 'testcase_name' key is a required for named + parameterized tests. + + Args: + **kwargs: keyword arguments of form `option=[possibilities, ...]` or + `option=the_only_possibility`. + + Returns: + a list of dictionaries for each combination. Keys in the dictionaries are + the keyword argument names. Each key has one value - one of the + corresponding keyword argument values. + """ + sort_by_key = lambda k: k[0] + combinations = [] + for key, values in sorted(kwargs.items(), key=sort_by_key): + if not isinstance(values, list): + values = [values] + combinations.append([(key, value) for value in values]) + + combinations = [ + collections.OrderedDict(result) + for result in itertools.product(*combinations) + ] + named_combinations = [] + for combination in combinations: + assert isinstance(combination, collections.OrderedDict) + name = "".join( + [ + "_{}_{}".format( + "".join(filter(str.isalnum, key)), + "".join(filter(str.isalnum, str(value))), + ) + for key, value in combination.items() + ] + ) + named_combinations.append( + collections.OrderedDict( + list(combination.items()) + [("testcase_name", f"_test{name}")] + ) + ) + + return named_combinations diff --git a/keras/tests/BUILD b/keras/tests/BUILD index 6e782cad7492..62681c407b38 100644 --- a/keras/tests/BUILD +++ b/keras/tests/BUILD @@ -1,6 +1,9 @@ # Description: # Contains Keras test utils and integration tests. +# Placeholder: load unaliased py_library +# Placeholder: load unaliased py_test + # buildifier: disable=same-origin-load load("@org_keras//keras:keras.bzl", "cuda_py_test") @@ -9,6 +12,7 @@ load("@org_keras//keras:keras.bzl", "tf_py_test") load("@org_keras//keras:keras.bzl", "tpu_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], default_visibility = [ "//keras:friends", "//third_party/tensorflow/tools/pip_package:__pkg__", @@ -256,7 +260,7 @@ tf_py_test( "//keras/api:keras_api", "//keras/layers/core", "//keras/metrics", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", ], ) @@ -334,7 +338,7 @@ tf_py_test( "//keras/api:keras_api", "//keras/engine", "//keras/layers/core", - "//keras/optimizers/optimizer_v2", + "//keras/optimizers/legacy:optimizers", "//keras/testing_infra:test_combinations", ], ) diff --git a/keras/tests/__init__.py b/keras/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/keras/tests/add_loss_correctness_test.py b/keras/tests/add_loss_correctness_test.py index 62aa6d50e763..5bf87c9ce670 100644 --- a/keras/tests/add_loss_correctness_test.py +++ b/keras/tests/add_loss_correctness_test.py @@ -14,39 +14,43 @@ # ============================================================================== """Tests add_loss API correctness.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras import Input -from keras.testing_infra import test_combinations -from keras import layers -from keras import losses from keras import Model -from keras.optimizers import optimizer_v2 from keras import Sequential +from keras import layers +from keras import losses +from keras.optimizers import legacy as optimizer_legacy +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils + +# isort: off from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training.rmsprop import RMSPropOptimizer +from tensorflow.python.training.rmsprop import ( + RMSPropOptimizer, +) MAE = losses.MeanAbsoluteError mae = losses.mean_absolute_error def get_ctl_train_step(model): - optimizer = optimizer_v2.gradient_descent.SGD(0.05) + optimizer = optimizer_legacy.gradient_descent.SGD(0.05) - def train_step(x, y, w=None): - with tf.GradientTape() as tape: - if w is not None: - model([x, y, w]) - else: - model([x, y]) - loss = tf.reduce_sum(model.losses) - gradients = tape.gradient(loss, model.trainable_weights) - optimizer.apply_gradients(zip(gradients, model.trainable_weights)) - return loss + def train_step(x, y, w=None): + with tf.GradientTape() as tape: + if w is not None: + model([x, y, w]) + else: + model([x, y]) + loss = tf.reduce_sum(model.losses) + gradients = tape.gradient(loss, model.trainable_weights) + optimizer.apply_gradients(zip(gradients, model.trainable_weights)) + return loss - return train_step + return train_step # TODO(psv): Add tests cases where a model is used in loss function but is @@ -54,402 +58,411 @@ def train_step(x, y, w=None): class TestAddLossCorrectness(test_combinations.TestCase): - - def setUp(self): - super().setUp() - self.x = np.array([[0.], [1.], [2.]], dtype='float32') - self.y = np.array([[0.5], [2.], [3.5]], dtype='float32') - self.w = np.array([[1.25], [0.5], [1.25]], dtype='float32') - - @test_combinations.run_all_keras_modes - def test_loss_on_model_fit(self): - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model([inputs, targets], outputs) - model.add_loss(MAE()(targets, outputs)) - model.add_loss(tf.reduce_mean(mae(targets, outputs))) - model.compile( - optimizer_v2.gradient_descent.SGD(0.05), - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit([self.x, self.y], batch_size=3, epochs=5) - self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - @test_combinations.run_with_all_model_types(exclude_models=['sequential']) - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_loss_callable_on_model_fit(self): - model = test_utils.get_model_from_layers([test_utils.Bias()], - input_shape=(1,)) - - def callable_loss(): - return tf.reduce_sum(model.weights) - - model.add_loss(callable_loss) - model.compile( - optimizer_v2.gradient_descent.SGD(0.1), - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit(self.x, batch_size=3, epochs=5) - self.assertAllClose(history.history['loss'], [0., -.1, -.2, -.3, -.4], 1e-3) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_loss_on_model_ctl(self): - def get_model_and_train_step(): - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model([inputs, targets], outputs) - model.add_loss(MAE()(targets, outputs)) - model.add_loss(tf.reduce_mean(mae(targets, outputs))) - return get_ctl_train_step(model) - - train_step = get_model_and_train_step() - loss = [train_step(self.x, self.y) for _ in range(5)] - self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - train_step = tf.function(get_model_and_train_step()) - loss = [train_step(self.x, self.y) for _ in range(5)] - self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_loss_callable_on_model_ctl(self): - def get_model_and_train_step(): - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model([inputs, targets], outputs) - - def callable_loss(): - return tf.reduce_sum(model.weights) - - model.add_loss(callable_loss) - return get_ctl_train_step(model) - - train_step = get_model_and_train_step() - loss = [train_step(self.x, self.y) for _ in range(5)] - self.assertAllClose(loss, [0., -0.05, -0.1, -0.15, -0.2], 1e-3) - - train_step = tf.function(get_model_and_train_step()) - loss = [train_step(self.x, self.y) for _ in range(5)] - self.assertAllClose(loss, [0., -0.05, -0.1, -0.15, -0.2], 1e-3) - - @test_combinations.run_all_keras_modes - def test_loss_with_sample_weight_on_model_fit(self): - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - sw = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model([inputs, targets, sw], outputs) - model.add_loss(MAE()(targets, outputs, sw)) - model.add_loss(3 * tf.reduce_mean(sw * mae(targets, outputs))) - model.compile( - optimizer_v2.gradient_descent.SGD(0.025), - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) - self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_loss_with_sample_weight_on_model_ctl(self): - def get_model_and_train_step(): - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - sw = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model([inputs, targets, sw], outputs) - model.add_loss(MAE()(targets, outputs, sw)) - model.add_loss(tf.reduce_mean(sw * mae(targets, outputs))) - return get_ctl_train_step(model) - - train_step = get_model_and_train_step() - loss = [train_step(self.x, self.y, self.w) for _ in range(5)] - self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - train_step = tf.function(get_model_and_train_step()) - loss = [train_step(self.x, self.y, self.w) for _ in range(5)] - self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - @test_combinations.run_all_keras_modes - def test_loss_with_sample_weight_in_model_call(self): - - class MyModel(Model): - - def __init__(self): - super().__init__() - self.bias = test_utils.Bias() - - def call(self, inputs): - outputs = self.bias(inputs[0]) - self.add_loss(MAE()(inputs[1], outputs, inputs[2])) - self.add_loss(tf.reduce_mean(inputs[2] * mae(inputs[1], outputs))) - return outputs - - model = MyModel() - model.predict([self.x, self.y, self.w]) - model.compile( - optimizer_v2.gradient_descent.SGD(0.05), - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) - self.assertEqual(len(model.losses), 2) - self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - eval_out = model.evaluate([self.x, self.y, self.w]) - self.assertAlmostEqual(eval_out, 1.0, 3) - - @test_combinations.run_all_keras_modes - def test_loss_with_sample_weight_in_layer_call(self): - - class MyLayer(layers.Layer): - - def __init__(self): - super().__init__() - self.bias = test_utils.Bias() - - def call(self, inputs): - out = self.bias(inputs[0]) - self.add_loss(MAE()(inputs[1], out, inputs[2])) - self.add_loss(tf.reduce_mean(inputs[2] * mae(inputs[1], out))) - return out - - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - sw = Input(shape=(1,)) - - outputs = MyLayer()([inputs, targets, sw]) - model = Model([inputs, targets, sw], outputs) - model.predict([self.x, self.y, self.w]) - model.compile( - optimizer_v2.gradient_descent.SGD(0.05), - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) - self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - - output = model.evaluate([self.x, self.y, self.w]) - self.assertAlmostEqual(output, 1.0, 3) - - output = model.test_on_batch([self.x, self.y, self.w]) - self.assertAlmostEqual(output, 1.0, 3) - - @test_combinations.run_all_keras_modes - def test_loss_on_layer(self): - - class MyLayer(layers.Layer): - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - return inputs - - inputs = Input((3,)) - layer = MyLayer() - outputs = layer(inputs) - model = Model(inputs, outputs) - self.assertEqual(len(model.losses), 1) - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) - self.assertEqual(loss, 2 * 3) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_activity_regularizer(self): - loss = {} - for reg in [None, 'l2']: - model_layers = [ - layers.Dense( - 10, - activation='relu', - activity_regularizer=reg, - kernel_initializer='ones', - use_bias=False), - layers.Dense( - 1, - activation='sigmoid', - kernel_initializer='ones', - use_bias=False), - ] - - model = test_utils.get_model_from_layers( - model_layers, input_shape=(10,)) - - x = np.ones((10, 10), 'float32') - y = np.zeros((10, 1), 'float32') - - optimizer = RMSPropOptimizer(learning_rate=0.001) - model.compile( - optimizer, - 'binary_crossentropy', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x, y, batch_size=2, epochs=5) - loss[reg] = model.evaluate(x, y) - self.assertLess(loss[None], loss['l2']) - - @test_combinations.run_all_keras_modes - @test_combinations.run_with_all_model_types - def test_activity_regularizer_loss_value(self): - layer = layers.Dense( - 1, - kernel_initializer='zeros', - bias_initializer='ones', - activity_regularizer='l2') - - model = test_utils.get_model_from_layers([layer], input_shape=(10,)) - - x = np.ones((10, 10), 'float32') - optimizer = RMSPropOptimizer(learning_rate=0.001) - model.compile( - optimizer, - run_eagerly=test_utils.should_run_eagerly()) - loss = model.test_on_batch(x) - self.assertAlmostEqual(0.01, loss, places=4) - - @test_combinations.run_all_keras_modes - def test_activity_regularizer_batch_independent(self): - inputs = layers.Input(shape=(10,)) - x = layers.Dense(10, activation='relu', activity_regularizer='l2')(inputs) - outputs = layers.Dense(1, activation='sigmoid')(x) - model = Model(inputs, outputs) - - optimizer = RMSPropOptimizer(learning_rate=0.001) - model.compile( - optimizer, - run_eagerly=test_utils.should_run_eagerly()) - - loss_small_batch = model.test_on_batch(np.ones((10, 10), 'float32')) - loss_big_batch = model.test_on_batch(np.ones((20, 10), 'float32')) - self.assertAlmostEqual(loss_small_batch, loss_big_batch, places=4) - - @test_combinations.run_all_keras_modes - def test_with_shared_layer(self): - - class LayerWithLoss(layers.Layer): - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - return inputs * 2 - - shared_layer = LayerWithLoss() - - m = Sequential([shared_layer]) - m2 = Sequential([shared_layer, m]) - m2(tf.constant([1, 2, 3])) - self.assertEqual(len(m2.losses), 2) - self.assertAllClose(m2.losses, [6, 12]) - - @test_combinations.run_all_keras_modes - def test_with_shared_nested_layer(self): - - class LayerWithLoss(layers.Layer): - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - return inputs * 2 - - class LayerWithNestedLayerWithLoss(layers.Layer): - - def __init__(self): - super().__init__() - self.loss_layer = LayerWithLoss() - - def call(self, inputs): - return self.loss_layer(inputs) - - shared_layer = LayerWithNestedLayerWithLoss() - - m = Sequential([shared_layer]) - m2 = Sequential([shared_layer, m]) - m2(tf.constant([1, 2, 3])) - self.assertEqual(len(m2.losses), 2) - self.assertAllClose(m2.losses, [6, 12]) - - @test_combinations.run_all_keras_modes - def test_clear_losses(self): - - class LayerWithSharedNestedLossLayer(layers.Layer): - - def __init__(self): - super().__init__() - self.loss_layer = layers.ActivityRegularization(l2=0.001) - self.add_weight(shape=(1,), regularizer='l2') - - def call(self, x): - x = self.loss_layer(x) - return self.loss_layer(x) - - inputs = Input(shape=(1,)) - l = LayerWithSharedNestedLossLayer() # Weight loss + 2 activity losses. - - x1 = tf.ones((1, 1)) - _ = l(x1) - if not tf.executing_eagerly(): - self.assertEqual(len(l.get_losses_for(x1)), 2) - self.assertEqual(len(l.get_losses_for(None)), 1) - - x2 = tf.ones((1, 1)) - _ = l(x2) - if not tf.executing_eagerly(): - self.assertEqual(len(l.get_losses_for(x1)), 2) - self.assertEqual(len(l.get_losses_for(x2)), 2) - self.assertEqual(len(l.get_losses_for(None)), 1) - - outputs = l(inputs) - model = Model(inputs, outputs) - if not tf.executing_eagerly(): - self.assertEqual(len(model.losses), 7) - self.assertEqual(len(l.get_losses_for(x1)), 2) - self.assertEqual(len(l.get_losses_for(x2)), 2) - self.assertEqual(len(l.get_losses_for(None)), 1) - - x3 = tf.ones((1, 1)) - model(x3) - x4 = tf.ones((1, 1)) - model(x4) - if tf.executing_eagerly(): - # Eager losses are cleared every `__call__`. - self.assertEqual(len(model.losses), 3) - else: - self.assertEqual(len(model.losses), 11) - self.assertEqual(len(model.get_losses_for(x3)), 2) - self.assertEqual(len(model.get_losses_for(x4)), 2) - self.assertEqual(len(model.get_losses_for(None)), 1) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_invalid_constant_input(self): - inputs = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model(inputs, outputs) - with self.assertRaisesRegex( - ValueError, - 'Expected a symbolic Tensors or a callable for the loss value'): - model.add_loss(1.) - - @test_combinations.run_all_keras_modes(always_skip_v1=True) - def test_invalid_variable_input(self): - inputs = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model(inputs, outputs) - with self.assertRaisesRegex( - ValueError, - 'Expected a symbolic Tensors or a callable for the loss value'): - model.add_loss(model.weights[0]) - - @test_combinations.run_all_keras_modes - def test_add_entropy_loss_on_functional_model(self): - inputs = Input(shape=(1,)) - targets = Input(shape=(1,)) - outputs = test_utils.Bias()(inputs) - model = Model([inputs, targets], outputs) - model.add_loss(losses.binary_crossentropy(targets, outputs)) - model.compile('sgd', run_eagerly=test_utils.should_run_eagerly()) - with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: - model.fit([self.x, self.y], batch_size=3, epochs=5) - self.assertNotIn('Gradients do not exist for variables', - str(mock_log.call_args)) - - -if __name__ == '__main__': - tf.test.main() + def setUp(self): + super().setUp() + self.x = np.array([[0.0], [1.0], [2.0]], dtype="float32") + self.y = np.array([[0.5], [2.0], [3.5]], dtype="float32") + self.w = np.array([[1.25], [0.5], [1.25]], dtype="float32") + + @test_combinations.run_all_keras_modes + def test_loss_on_model_fit(self): + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model([inputs, targets], outputs) + model.add_loss(MAE()(targets, outputs)) + model.add_loss(tf.reduce_mean(mae(targets, outputs))) + model.compile( + optimizer_legacy.gradient_descent.SGD(0.05), + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit([self.x, self.y], batch_size=3, epochs=5) + self.assertAllClose( + history.history["loss"], [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3 + ) + + @test_combinations.run_with_all_model_types(exclude_models=["sequential"]) + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_loss_callable_on_model_fit(self): + model = test_utils.get_model_from_layers( + [test_utils.Bias()], input_shape=(1,) + ) + + def callable_loss(): + return tf.reduce_sum(model.weights) + + model.add_loss(callable_loss) + model.compile( + optimizer_legacy.gradient_descent.SGD(0.1), + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit(self.x, batch_size=3, epochs=5) + self.assertAllClose( + history.history["loss"], [0.0, -0.1, -0.2, -0.3, -0.4], 1e-3 + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_loss_on_model_ctl(self): + def get_model_and_train_step(): + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model([inputs, targets], outputs) + model.add_loss(MAE()(targets, outputs)) + model.add_loss(tf.reduce_mean(mae(targets, outputs))) + return get_ctl_train_step(model) + + train_step = get_model_and_train_step() + loss = [train_step(self.x, self.y) for _ in range(5)] + self.assertAllClose(loss, [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3) + + train_step = tf.function(get_model_and_train_step()) + loss = [train_step(self.x, self.y) for _ in range(5)] + self.assertAllClose(loss, [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_loss_callable_on_model_ctl(self): + def get_model_and_train_step(): + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model([inputs, targets], outputs) + + def callable_loss(): + return tf.reduce_sum(model.weights) + + model.add_loss(callable_loss) + return get_ctl_train_step(model) + + train_step = get_model_and_train_step() + loss = [train_step(self.x, self.y) for _ in range(5)] + self.assertAllClose(loss, [0.0, -0.05, -0.1, -0.15, -0.2], 1e-3) + + train_step = tf.function(get_model_and_train_step()) + loss = [train_step(self.x, self.y) for _ in range(5)] + self.assertAllClose(loss, [0.0, -0.05, -0.1, -0.15, -0.2], 1e-3) + + @test_combinations.run_all_keras_modes + def test_loss_with_sample_weight_on_model_fit(self): + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + sw = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model([inputs, targets, sw], outputs) + model.add_loss(MAE()(targets, outputs, sw)) + model.add_loss(3 * tf.reduce_mean(sw * mae(targets, outputs))) + model.compile( + optimizer_legacy.gradient_descent.SGD(0.025), + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) + self.assertAllClose( + history.history["loss"], [4.0, 3.6, 3.2, 2.8, 2.4], 1e-3 + ) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_loss_with_sample_weight_on_model_ctl(self): + def get_model_and_train_step(): + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + sw = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model([inputs, targets, sw], outputs) + model.add_loss(MAE()(targets, outputs, sw)) + model.add_loss(tf.reduce_mean(sw * mae(targets, outputs))) + return get_ctl_train_step(model) + + train_step = get_model_and_train_step() + loss = [train_step(self.x, self.y, self.w) for _ in range(5)] + self.assertAllClose(loss, [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3) + + train_step = tf.function(get_model_and_train_step()) + loss = [train_step(self.x, self.y, self.w) for _ in range(5)] + self.assertAllClose(loss, [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3) + + @test_combinations.run_all_keras_modes + def test_loss_with_sample_weight_in_model_call(self): + class MyModel(Model): + def __init__(self): + super().__init__() + self.bias = test_utils.Bias() + + def call(self, inputs): + outputs = self.bias(inputs[0]) + self.add_loss(MAE()(inputs[1], outputs, inputs[2])) + self.add_loss( + tf.reduce_mean(inputs[2] * mae(inputs[1], outputs)) + ) + return outputs + + model = MyModel() + model.predict([self.x, self.y, self.w]) + model.compile( + optimizer_legacy.gradient_descent.SGD(0.05), + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) + self.assertEqual(len(model.losses), 2) + self.assertAllClose( + history.history["loss"], [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3 + ) + + eval_out = model.evaluate([self.x, self.y, self.w]) + self.assertAlmostEqual(eval_out, 1.0, 3) + + @test_combinations.run_all_keras_modes + def test_loss_with_sample_weight_in_layer_call(self): + class MyLayer(layers.Layer): + def __init__(self): + super().__init__() + self.bias = test_utils.Bias() + + def call(self, inputs): + out = self.bias(inputs[0]) + self.add_loss(MAE()(inputs[1], out, inputs[2])) + self.add_loss(tf.reduce_mean(inputs[2] * mae(inputs[1], out))) + return out + + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + sw = Input(shape=(1,)) + + outputs = MyLayer()([inputs, targets, sw]) + model = Model([inputs, targets, sw], outputs) + model.predict([self.x, self.y, self.w]) + model.compile( + optimizer_legacy.gradient_descent.SGD(0.05), + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) + self.assertAllClose( + history.history["loss"], [2.0, 1.8, 1.6, 1.4, 1.2], 1e-3 + ) + + output = model.evaluate([self.x, self.y, self.w]) + self.assertAlmostEqual(output, 1.0, 3) + + output = model.test_on_batch([self.x, self.y, self.w]) + self.assertAlmostEqual(output, 1.0, 3) + + @test_combinations.run_all_keras_modes + def test_loss_on_layer(self): + class MyLayer(layers.Layer): + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + return inputs + + inputs = Input((3,)) + layer = MyLayer() + outputs = layer(inputs) + model = Model(inputs, outputs) + self.assertLen(model.losses, 1) + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) + self.assertEqual(loss, 2 * 3) + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def test_activity_regularizer(self): + loss = {} + for reg in [None, "l2"]: + model_layers = [ + layers.Dense( + 10, + activation="relu", + activity_regularizer=reg, + kernel_initializer="ones", + use_bias=False, + ), + layers.Dense( + 1, + activation="sigmoid", + kernel_initializer="ones", + use_bias=False, + ), + ] + + model = test_utils.get_model_from_layers( + model_layers, input_shape=(10,) + ) + + x = np.ones((10, 10), "float32") + y = np.zeros((10, 1), "float32") + + optimizer = RMSPropOptimizer(learning_rate=0.001) + model.compile( + optimizer, + "binary_crossentropy", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit(x, y, batch_size=2, epochs=5) + loss[reg] = model.evaluate(x, y) + self.assertLess(loss[None], loss["l2"]) + + @test_combinations.run_all_keras_modes + @test_combinations.run_with_all_model_types + def test_activity_regularizer_loss_value(self): + layer = layers.Dense( + 1, + kernel_initializer="zeros", + bias_initializer="ones", + activity_regularizer="l2", + ) + + model = test_utils.get_model_from_layers([layer], input_shape=(10,)) + + x = np.ones((10, 10), "float32") + optimizer = RMSPropOptimizer(learning_rate=0.001) + model.compile(optimizer, run_eagerly=test_utils.should_run_eagerly()) + loss = model.test_on_batch(x) + self.assertAlmostEqual(0.01, loss, places=4) + + @test_combinations.run_all_keras_modes + def test_activity_regularizer_batch_independent(self): + inputs = layers.Input(shape=(10,)) + x = layers.Dense(10, activation="relu", activity_regularizer="l2")( + inputs + ) + outputs = layers.Dense(1, activation="sigmoid")(x) + model = Model(inputs, outputs) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + model.compile(optimizer, run_eagerly=test_utils.should_run_eagerly()) + + loss_small_batch = model.test_on_batch(np.ones((10, 10), "float32")) + loss_big_batch = model.test_on_batch(np.ones((20, 10), "float32")) + self.assertAlmostEqual(loss_small_batch, loss_big_batch, places=4) + + @test_combinations.run_all_keras_modes + def test_with_shared_layer(self): + class LayerWithLoss(layers.Layer): + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + return inputs * 2 + + shared_layer = LayerWithLoss() + + m = Sequential([shared_layer]) + m2 = Sequential([shared_layer, m]) + m2(tf.constant([1, 2, 3])) + self.assertEqual(len(m2.losses), 2) + self.assertAllClose(m2.losses, [6, 12]) + + @test_combinations.run_all_keras_modes + def test_with_shared_nested_layer(self): + class LayerWithLoss(layers.Layer): + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + return inputs * 2 + + class LayerWithNestedLayerWithLoss(layers.Layer): + def __init__(self): + super().__init__() + self.loss_layer = LayerWithLoss() + + def call(self, inputs): + return self.loss_layer(inputs) + + shared_layer = LayerWithNestedLayerWithLoss() + + m = Sequential([shared_layer]) + m2 = Sequential([shared_layer, m]) + m2(tf.constant([1, 2, 3])) + self.assertLen(m2.losses, 2) + self.assertAllClose(m2.losses, [6, 12]) + + @test_combinations.run_all_keras_modes + def test_clear_losses(self): + class LayerWithSharedNestedLossLayer(layers.Layer): + def __init__(self): + super().__init__() + self.loss_layer = layers.ActivityRegularization(l2=0.001) + self.add_weight(shape=(1,), regularizer="l2") + + def call(self, x): + x = self.loss_layer(x) + return self.loss_layer(x) + + inputs = Input(shape=(1,)) + l = LayerWithSharedNestedLossLayer() # Weight loss + 2 activity losses. + + x1 = tf.ones((1, 1)) + _ = l(x1) + if not tf.executing_eagerly(): + self.assertLen(l.get_losses_for(x1), 2) + self.assertLen(l.get_losses_for(None), 1) + + x2 = tf.ones((1, 1)) + _ = l(x2) + if not tf.executing_eagerly(): + self.assertLen(l.get_losses_for(x1), 2) + self.assertLen(l.get_losses_for(x2), 2) + self.assertLen(l.get_losses_for(None), 1) + + outputs = l(inputs) + model = Model(inputs, outputs) + if not tf.executing_eagerly(): + self.assertLen(model.losses, 7) + self.assertLen(l.get_losses_for(x1), 2) + self.assertLen(l.get_losses_for(x2), 2) + self.assertLen(l.get_losses_for(None), 1) + + x3 = tf.ones((1, 1)) + model(x3) + x4 = tf.ones((1, 1)) + model(x4) + if tf.executing_eagerly(): + # Eager losses are cleared every `__call__`. + self.assertLen(model.losses, 3) + else: + self.assertLen(model.losses, 11) + self.assertLen(l.get_losses_for(x3), 2) + self.assertLen(l.get_losses_for(x4), 2) + self.assertLen(l.get_losses_for(None), 1) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_invalid_constant_input(self): + inputs = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model(inputs, outputs) + with self.assertRaisesRegex( + ValueError, + "Expected a symbolic Tensors or a callable for the loss value", + ): + model.add_loss(1.0) + + @test_combinations.run_all_keras_modes(always_skip_v1=True) + def test_invalid_variable_input(self): + inputs = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model(inputs, outputs) + with self.assertRaisesRegex( + ValueError, + "Expected a symbolic Tensors or a callable for the loss value", + ): + model.add_loss(model.weights[0]) + + @test_combinations.run_all_keras_modes + def test_add_entropy_loss_on_functional_model(self): + inputs = Input(shape=(1,)) + targets = Input(shape=(1,)) + outputs = test_utils.Bias()(inputs) + model = Model([inputs, targets], outputs) + model.add_loss(losses.binary_crossentropy(targets, outputs)) + model.compile("sgd", run_eagerly=test_utils.should_run_eagerly()) + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + model.fit([self.x, self.y], batch_size=3, epochs=5) + self.assertNotIn( + "Gradients do not exist for variables", str(mock_log.call_args) + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/automatic_outside_compilation_test.py b/keras/tests/automatic_outside_compilation_test.py index be09248516fd..254679be8913 100644 --- a/keras/tests/automatic_outside_compilation_test.py +++ b/keras/tests/automatic_outside_compilation_test.py @@ -17,7 +17,10 @@ import collections import os +import numpy as np +import tensorflow.compat.v2 as tf from absl import flags + from keras import callbacks from keras.distribute import distribute_strategy_test from keras.engine import base_layer @@ -29,262 +32,305 @@ from keras.layers import regularization as regularization_layer_lib from keras.layers import reshaping as reshaping_layer_lib from keras.testing_infra import test_utils -import numpy as np -import tensorflow.compat.v2 as tf -from tensorboard.plugins.histogram import summary_v2 as histogram_summary_v2 -from tensorboard.plugins.image import summary_v2 as image_summary_v2 -from tensorboard.plugins.scalar import summary_v2 as scalar_summary_v2 -from tensorflow.python.eager.context import set_soft_device_placement -from tensorflow.python.framework import test_util as tf_test_utils +# isort: off +from tensorboard.plugins.histogram import ( + summary_v2 as histogram_summary_v2, +) +from tensorboard.plugins.image import ( + summary_v2 as image_summary_v2, +) +from tensorboard.plugins.scalar import ( + summary_v2 as scalar_summary_v2, +) +from tensorflow.python.eager.context import ( + set_soft_device_placement, +) +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) NUM_CLASSES = 4 FLAGS = flags.FLAGS -flags.DEFINE_string('tpu', '', 'Name of TPU to connect to.') -flags.DEFINE_string('project', None, 'Name of GCP project with TPU.') -flags.DEFINE_string('zone', None, 'Name of GCP zone with TPU.') +flags.DEFINE_string("tpu", "", "Name of TPU to connect to.") +flags.DEFINE_string("project", None, "Name of GCP project with TPU.") +flags.DEFINE_string("zone", None, "Name of GCP zone with TPU.") def get_tpu_cluster_resolver(): - resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=FLAGS.tpu, - zone=FLAGS.zone, - project=FLAGS.project, - ) - return resolver + resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu=FLAGS.tpu, + zone=FLAGS.zone, + project=FLAGS.project, + ) + return resolver def get_tpu_strategy(): - resolver = get_tpu_cluster_resolver() - tf.config.experimental_connect_to_cluster(resolver) - tf.tpu.experimental.initialize_tpu_system(resolver) - return tf.distribute.experimental.TPUStrategy(resolver) + resolver = get_tpu_cluster_resolver() + tf.config.experimental_connect_to_cluster(resolver) + tf.tpu.experimental.initialize_tpu_system(resolver) + return tf.distribute.experimental.TPUStrategy(resolver) class LayerForScalarSummary(base_layer.Layer): - """A pass-through layer that only records scalar values to summary.""" + """A pass-through layer that only records scalar values to summary.""" - def call(self, x): - # Add summary scalar using compat v2 implementation. - scalar_summary_v2.scalar('custom_scalar_summary_v2', tf.reduce_sum(x)) - return x + def call(self, x): + # Add summary scalar using compat v2 implementation. + scalar_summary_v2.scalar("custom_scalar_summary_v2", tf.reduce_sum(x)) + return x class LayerForImageSummary(base_layer.Layer): - """A pass-through layer that only records image values to summary.""" + """A pass-through layer that only records image values to summary.""" - def call(self, x): - # Add summary image using compat v2 implementation. - image_summary_v2.image('custom_image_summary_v2', x) + def call(self, x): + # Add summary image using compat v2 implementation. + image_summary_v2.image("custom_image_summary_v2", x) - return x + return x class LayerForHistogramSummary(base_layer.Layer): - """A pass-through layer that records histogram values to summary.""" + """A pass-through layer that records histogram values to summary.""" - def call(self, x): - # Add summary histogram using compat v2 implementation. - histogram_summary_v2.histogram('custom_histogram_summary_v2', x) + def call(self, x): + # Add summary histogram using compat v2 implementation. + histogram_summary_v2.histogram("custom_histogram_summary_v2", x) - return x + return x class CustomModel(training.Model): - """Custom model with summary ops in model call definition.""" - - def __init__(self, name=None, enable_histograms=True): - super().__init__() - self._my_layers = [ - layer_lib.Dense( - 4096, - name='dense1', - kernel_initializer=tf.compat.v1.glorot_normal_initializer(seed=0), - use_bias=False), - layer_lib.Dense( - 4, - name='dense2', - kernel_initializer=tf.compat.v1.glorot_normal_initializer(seed=0), - use_bias=False), - ] - if enable_histograms: - self.histogram_summary_layer = LayerForHistogramSummary() - else: - self.histogram_summary_layer = base_layer.Layer() # no-op pass through - self.scalar_summary_layer = LayerForScalarSummary() - - def call(self, x): - for layer in self._my_layers: - x = layer(x) - x = self.scalar_summary_layer(x) - return self.histogram_summary_layer(x) + """Custom model with summary ops in model call definition.""" + + def __init__(self, name=None, enable_histograms=True): + super().__init__() + self._my_layers = [ + layer_lib.Dense( + 4096, + name="dense1", + kernel_initializer=tf.compat.v1.glorot_normal_initializer( + seed=0 + ), + use_bias=False, + ), + layer_lib.Dense( + 4, + name="dense2", + kernel_initializer=tf.compat.v1.glorot_normal_initializer( + seed=0 + ), + use_bias=False, + ), + ] + if enable_histograms: + self.histogram_summary_layer = LayerForHistogramSummary() + else: + self.histogram_summary_layer = ( + base_layer.Layer() + ) # no-op pass through + self.scalar_summary_layer = LayerForScalarSummary() + + def call(self, x): + for layer in self._my_layers: + x = layer(x) + x = self.scalar_summary_layer(x) + return self.histogram_summary_layer(x) def get_image_dataset(): - inputs = np.zeros((10, 28, 28, 3), dtype=np.float32) - targets = np.zeros((10, NUM_CLASSES), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10, drop_remainder=True) - return dataset + inputs = np.zeros((10, 28, 28, 3), dtype=np.float32) + targets = np.zeros((10, NUM_CLASSES), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10, drop_remainder=True) + return dataset def mnist_model(input_shape, enable_histograms=True): - """Creates a MNIST model.""" - model = sequential_model_lib.Sequential() - - # Adding custom pass-through layer to visualize input images. - model.add(LayerForImageSummary()) - - model.add( - conv_layer_lib.Conv2D( - 32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) - model.add(conv_layer_lib.Conv2D(64, (3, 3), activation='relu')) - model.add(pool_layer_lib.MaxPooling2D(pool_size=(2, 2))) - model.add(regularization_layer_lib.Dropout(0.25)) - model.add(reshaping_layer_lib.Flatten()) - model.add(layer_lib.Dense(128, activation='relu')) - model.add(regularization_layer_lib.Dropout(0.5)) - model.add(layer_lib.Dense(NUM_CLASSES, activation='softmax')) - - # Adding custom pass-through layer for summary recording. - if enable_histograms: - model.add(LayerForHistogramSummary()) - return model + """Creates a MNIST model.""" + model = sequential_model_lib.Sequential() + + # Adding custom pass-through layer to visualize input images. + model.add(LayerForImageSummary()) + + model.add( + conv_layer_lib.Conv2D( + 32, kernel_size=(3, 3), activation="relu", input_shape=input_shape + ) + ) + model.add(conv_layer_lib.Conv2D(64, (3, 3), activation="relu")) + model.add(pool_layer_lib.MaxPooling2D(pool_size=(2, 2))) + model.add(regularization_layer_lib.Dropout(0.25)) + model.add(reshaping_layer_lib.Flatten()) + model.add(layer_lib.Dense(128, activation="relu")) + model.add(regularization_layer_lib.Dropout(0.5)) + model.add(layer_lib.Dense(NUM_CLASSES, activation="softmax")) + + # Adding custom pass-through layer for summary recording. + if enable_histograms: + model.add(LayerForHistogramSummary()) + return model @test_utils.run_v2_only class AutoOutsideCompilationWithKerasTest(tf.test.TestCase): - - def setUp(self): - super().setUp() - set_soft_device_placement(True) - self.summary_dir = self.get_temp_dir() - - def validate_recorded_sumary_file(self, event_files, expected_event_counts): - event_counts = collections.defaultdict(int) - for event_file in event_files: - for e in tf.compat.v1.train.summary_iterator(event_file): - for v in e.summary.value: - event_counts[v.tag] += 1 - - event_counts = dict(event_counts) # Avoid defaultdict type in repr below. - # Populate a count of 0 for tags that were expected but not found. - actual_event_counts = { - tag: event_counts.get(tag, 0) for tag in expected_event_counts - } - self.assertEqual( - expected_event_counts, - actual_event_counts, - msg='expected counts not found; all event counts: %r' % event_counts) - - def testV2SummaryWithKerasSequentialModel(self): - # Histogram summaries require the MLIR bridge; see b/178826597#comment107. - # TODO(https://github.com/tensorflow/tensorboard/issues/2885): remove this - # if histogram summaries are supported fully on non-MLIR bridge or - # non-MLIR bridge is no longer run. - enable_histograms = tf_test_utils.is_mlir_bridge_enabled() - strategy = get_tpu_strategy() - - with strategy.scope(): - model = mnist_model((28, 28, 3), enable_histograms=enable_histograms) - model.compile('sgd', 'mse') - - dataset = get_image_dataset() - tensorboard_callback = callbacks.TensorBoard( - self.summary_dir, update_freq=2) - model.fit( - dataset, - steps_per_epoch=10, - epochs=1, - callbacks=[tensorboard_callback]) - - event_files = tf.io.gfile.glob( - os.path.join(self.summary_dir, 'train', 'event*')) - # Since total of 10 steps are ran and summary ops should be invoked - # every 2 batches, we should see total of 5 event logs for each summary. - expected_event_counts = { - 'sequential/layer_for_histogram_summary/custom_histogram_summary_v2': - 5 if enable_histograms else 0, - 'sequential/layer_for_image_summary/custom_image_summary_v2': - 5, - } - self.validate_recorded_sumary_file(event_files, expected_event_counts) - - def testV2SummaryWithKerasSubclassedModel(self): - # Histogram summaries require the MLIR bridge; see b/178826597#comment107. - # TODO(https://github.com/tensorflow/tensorboard/issues/2885): remove this - # if histogram summaries are supported fully on non-MLIR bridge or - # non-MLIR bridge is no longer run. - enable_histograms = tf_test_utils.is_mlir_bridge_enabled() - strategy = get_tpu_strategy() - with strategy.scope(): - model = CustomModel(enable_histograms=enable_histograms) - model.compile('sgd', 'mse') - - dataset = distribute_strategy_test.get_dataset(strategy) - tensorboard_callback = callbacks.TensorBoard( - self.summary_dir, update_freq=2) - model.fit( - dataset, - steps_per_epoch=10, - epochs=1, - callbacks=[tensorboard_callback]) - - event_files = tf.io.gfile.glob( - os.path.join(self.summary_dir, 'train', 'event*')) - # Since total of 10 steps are ran and summary ops should be invoked - # every 2 batches, we should see total of 5 event logs for each summary. - expected_event_counts = { - ('custom_model/layer_for_scalar_summary/' - 'custom_scalar_summary_v2'): - 5, - ('custom_model/layer_for_histogram_summary/' - 'custom_histogram_summary_v2'): - 5 if enable_histograms else 0, - } - self.validate_recorded_sumary_file(event_files, expected_event_counts) - - def testSummaryWithCustomTrainingLoop(self): - strategy = get_tpu_strategy() - - writer = tf.summary.create_file_writer(self.summary_dir) - with strategy.scope(): - model = distribute_strategy_test.get_model() - model.compile('sgd', 'mse') - - @tf.function - def custom_function(dataset): - - def _custom_step(features, labels): - del labels - logits = model(features) - with tf.summary.record_if(True), writer.as_default(): - scalar_summary_v2.scalar( - 'logits', - tf.reduce_sum(logits), - step=model.optimizer.iterations) - return logits - - iterator = iter(dataset) - output = strategy.unwrap( - strategy.run(_custom_step, args=(next(iterator)))) - return output - - dataset = strategy.experimental_distribute_dataset( - distribute_strategy_test.get_dataset(strategy)) - - custom_function(dataset) - writer.close() - - event_files = tf.io.gfile.glob( - os.path.join(self.summary_dir, 'event*')) - expected_event_counts = { - 'logits': 1, - } - self.validate_recorded_sumary_file(event_files, expected_event_counts) - - -if __name__ == '__main__': - tf.test.main() + def setUp(self): + super().setUp() + set_soft_device_placement(True) + self.summary_dir = self.get_temp_dir() + + def validate_recorded_sumary_file(self, event_files, expected_event_counts): + event_counts = collections.defaultdict(int) + for event_file in event_files: + for e in tf.compat.v1.train.summary_iterator(event_file): + for v in e.summary.value: + event_counts[v.tag] += 1 + + event_counts = dict( + event_counts + ) # Avoid defaultdict type in repr below. + # Populate a count of 0 for tags that were expected but not found. + actual_event_counts = { + tag: event_counts.get(tag, 0) for tag in expected_event_counts + } + self.assertEqual( + expected_event_counts, + actual_event_counts, + msg="expected counts not found; all event counts: %r" + % event_counts, + ) + + def testV2SummaryWithKerasSequentialModel(self): + # Histogram summaries require the MLIR bridge; see + # b/178826597#comment107. + # TODO(https://github.com/tensorflow/tensorboard/issues/2885): remove + # this if histogram summaries are supported fully on non-MLIR bridge or + # non-MLIR bridge is no longer run. + enable_histograms = tf_test_utils.is_mlir_bridge_enabled() + strategy = get_tpu_strategy() + + with strategy.scope(): + model = mnist_model( + (28, 28, 3), enable_histograms=enable_histograms + ) + model.compile("sgd", "mse") + + dataset = get_image_dataset() + tensorboard_callback = callbacks.TensorBoard( + self.summary_dir, update_freq=2 + ) + model.fit( + dataset, + steps_per_epoch=10, + epochs=1, + callbacks=[tensorboard_callback], + ) + + event_files = tf.io.gfile.glob( + os.path.join(self.summary_dir, "train", "event*") + ) + # Since total of 10 steps are ran and summary ops should be invoked + # every 2 batches, we should see total of 5 event logs for each + # summary. + expected_event_counts = { + "sequential/layer_for_histogram_summary/custom_histogram_summary_v2": 5 # noqa: E501 + if enable_histograms + else 0, + "sequential/layer_for_image_summary/custom_image_summary_v2": 5, + } + self.validate_recorded_sumary_file( + event_files, expected_event_counts + ) + + def testV2SummaryWithKerasSubclassedModel(self): + # Histogram summaries require the MLIR bridge; see + # b/178826597#comment107. + # TODO(https://github.com/tensorflow/tensorboard/issues/2885): remove + # this if histogram summaries are supported fully on non-MLIR bridge or + # non-MLIR bridge is no longer run. + enable_histograms = tf_test_utils.is_mlir_bridge_enabled() + strategy = get_tpu_strategy() + with strategy.scope(): + model = CustomModel(enable_histograms=enable_histograms) + model.compile("sgd", "mse") + + dataset = distribute_strategy_test.get_dataset(strategy) + tensorboard_callback = callbacks.TensorBoard( + self.summary_dir, update_freq=2 + ) + model.fit( + dataset, + steps_per_epoch=10, + epochs=1, + callbacks=[tensorboard_callback], + ) + + event_files = tf.io.gfile.glob( + os.path.join(self.summary_dir, "train", "event*") + ) + # Since total of 10 steps are ran and summary ops should be invoked + # every 2 batches, we should see total of 5 event logs for each + # summary. + expected_event_counts = { + ( + "custom_model/layer_for_scalar_summary/" + "custom_scalar_summary_v2" + ): 5, + ( + "custom_model/layer_for_histogram_summary/" + "custom_histogram_summary_v2" + ): 5 + if enable_histograms + else 0, + } + self.validate_recorded_sumary_file( + event_files, expected_event_counts + ) + + def testSummaryWithCustomTrainingLoop(self): + strategy = get_tpu_strategy() + + writer = tf.summary.create_file_writer(self.summary_dir) + with strategy.scope(): + model = distribute_strategy_test.get_model() + model.compile("sgd", "mse") + + @tf.function + def custom_function(dataset): + def _custom_step(features, labels): + del labels + logits = model(features) + with tf.summary.record_if(True), writer.as_default(): + scalar_summary_v2.scalar( + "logits", + tf.reduce_sum(logits), + step=model.optimizer.iterations, + ) + return logits + + iterator = iter(dataset) + output = strategy.unwrap( + strategy.run(_custom_step, args=(next(iterator))) + ) + return output + + dataset = strategy.experimental_distribute_dataset( + distribute_strategy_test.get_dataset(strategy) + ) + + custom_function(dataset) + writer.close() + + event_files = tf.io.gfile.glob(os.path.join(self.summary_dir, "event*")) + expected_event_counts = { + "logits": 1, + } + self.validate_recorded_sumary_file(event_files, expected_event_counts) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/convert_to_constants_test.py b/keras/tests/convert_to_constants_test.py index 8e5a6425f0bc..bb743c84103b 100644 --- a/keras/tests/convert_to_constants_test.py +++ b/keras/tests/convert_to_constants_test.py @@ -14,157 +14,167 @@ # ============================================================================== """Tests for convert_to_constants.py.""" -import tensorflow.compat.v2 as tf - import os import numpy as np +import tensorflow.compat.v2 as tf import keras -from tensorflow.python.framework import convert_to_constants from keras.testing_infra import test_utils + +# isort: off +from tensorflow.python.framework import convert_to_constants from tensorflow.python.saved_model.load import load from tensorflow.python.saved_model.save import save class VariablesToConstantsTest(tf.test.TestCase): - - def _freezeModel(self, model): - """Freezes the model. - - Args: - model: Function. - - Returns: - root: AutoTrackable object with original ConcreteFunction. - output_func: frozen ConcreteFunction. - """ - root = tf.Module() - root.f = model - input_func = root.f.get_concrete_function() - - output_func = convert_to_constants.convert_variables_to_constants_v2( - input_func, lower_control_flow=False) - return root, output_func - - def _hasStatefulPartitionedCallOp(self, graph_def): - """Determines if a StatefulPartitionedCall op exists in the graph.""" - for node in graph_def.node: - if node.op == "StatefulPartitionedCall": - return True - return False - - def _getNumVariables(self, graph_def): - """Returns the number of ReadVariableOp in the graph.""" - return sum(node.op == "ReadVariableOp" for node in graph_def.node) - - def _testConvertedFunction(self, obj, func, converted_concrete_func, - input_data): - # Ensure the converted graph has no variables and no function calls. - constant_graph_def = converted_concrete_func.graph.as_graph_def() - self.assertEqual(0, self._getNumVariables(constant_graph_def)) - self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def)) - - # Check that the converted ConcreteFunction produces the same result as the - # original Function. - expected_value = tf.nest.flatten(func(**input_data)) - actual_value = tf.nest.flatten(converted_concrete_func(**input_data)) - - for expected, actual in zip(expected_value, actual_value): - np.testing.assert_almost_equal(expected.numpy(), actual.numpy()) - - # Ensure the shape is retained. - for tensor in converted_concrete_func.inputs: - actual_shape = input_data[tensor.name.split(":")[0]].shape - self.assertEqual(tensor.shape, actual_shape) - - # Save the converted ConcreteFunction as a signature. - save_dir = os.path.join(self.get_temp_dir(), "frozen_saved_model") - root = tf.Module() - root.f = converted_concrete_func - save(root, save_dir, {"mykey": converted_concrete_func}) - - # Load it back and make sure it works. - loaded_obj = load(save_dir) - actual_value = tf.nest.flatten(loaded_obj.signatures["mykey"](**input_data)) - for expected, actual in zip(expected_value, actual_value): - np.testing.assert_almost_equal(expected.numpy(), actual.numpy()) - - @test_utils.run_v2_only - def testKerasModel(self): - """Test a basic Keras model with Variables.""" - input_data = {"x": tf.constant(1., shape=[1, 1])} - - # Create a simple Keras model. - x = [-1, 0, 1, 2, 3, 4] - y = [-3, -1, 1, 3, 5, 7] - - model = keras.models.Sequential( - [keras.layers.Dense(units=1, input_shape=[1])]) - model.compile(optimizer="sgd", loss="mean_squared_error") - model.fit(x, y, epochs=1) - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[1, 1], dtype=tf.float32) - ]) - def to_save(x): - return model(x) - - root, output_func = self._freezeModel(to_save) - self._testConvertedFunction(root, root.f, output_func, input_data) - - @test_utils.run_v2_only - def testKerasLSTM(self): - """Test a Keras LSTM containing dynamic_rnn ops.""" - input_data = { - "x": - tf.constant( + def _freezeModel(self, model): + """Freezes the model. + + Args: + model: Function. + + Returns: + root: AutoTrackable object with original ConcreteFunction. + output_func: frozen ConcreteFunction. + """ + root = tf.Module() + root.f = model + input_func = root.f.get_concrete_function() + + output_func = convert_to_constants.convert_variables_to_constants_v2( + input_func, lower_control_flow=False + ) + return root, output_func + + def _hasStatefulPartitionedCallOp(self, graph_def): + """Determines if a StatefulPartitionedCall op exists in the graph.""" + for node in graph_def.node: + if node.op == "StatefulPartitionedCall": + return True + return False + + def _getNumVariables(self, graph_def): + """Returns the number of ReadVariableOp in the graph.""" + return sum(node.op == "ReadVariableOp" for node in graph_def.node) + + def _testConvertedFunction( + self, obj, func, converted_concrete_func, input_data + ): + # Ensure the converted graph has no variables and no function calls. + constant_graph_def = converted_concrete_func.graph.as_graph_def() + self.assertEqual(0, self._getNumVariables(constant_graph_def)) + self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def)) + + # Check that the converted ConcreteFunction produces the same result as + # the original Function. + expected_value = tf.nest.flatten(func(**input_data)) + actual_value = tf.nest.flatten(converted_concrete_func(**input_data)) + + for expected, actual in zip(expected_value, actual_value): + np.testing.assert_almost_equal(expected.numpy(), actual.numpy()) + + # Ensure the shape is retained. + for tensor in converted_concrete_func.inputs: + actual_shape = input_data[tensor.name.split(":")[0]].shape + self.assertEqual(tensor.shape, actual_shape) + + # Save the converted ConcreteFunction as a signature. + save_dir = os.path.join(self.get_temp_dir(), "frozen_saved_model") + root = tf.Module() + root.f = converted_concrete_func + save(root, save_dir, {"mykey": converted_concrete_func}) + + # Load it back and make sure it works. + loaded_obj = load(save_dir) + actual_value = tf.nest.flatten( + loaded_obj.signatures["mykey"](**input_data) + ) + for expected, actual in zip(expected_value, actual_value): + np.testing.assert_almost_equal(expected.numpy(), actual.numpy()) + + @test_utils.run_v2_only + def testKerasModel(self): + """Test a basic Keras model with Variables.""" + input_data = {"x": tf.constant(1.0, shape=[1, 1])} + + # Create a simple Keras model. + x = [-1, 0, 1, 2, 3, 4] + y = [-3, -1, 1, 3, 5, 7] + + model = keras.models.Sequential( + [keras.layers.Dense(units=1, input_shape=[1])] + ) + model.compile(optimizer="sgd", loss="mean_squared_error") + model.fit(x, y, epochs=1) + + @tf.function( + input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.float32)] + ) + def to_save(x): + return model(x) + + root, output_func = self._freezeModel(to_save) + self._testConvertedFunction(root, root.f, output_func, input_data) + + @test_utils.run_v2_only + def testKerasLSTM(self): + """Test a Keras LSTM containing dynamic_rnn ops.""" + input_data = { + "x": tf.constant( np.array( - np.random.random_sample((10, 10, 10)), dtype=np.float32)) - } - - model = keras.models.Sequential( - [keras.layers.LSTM(units=10, input_shape=(10, 10))]) - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[10, 10, 10], dtype=tf.float32) - ]) - def to_save(x): - return model(x) - - root, output_func = self._freezeModel(to_save) - self._testConvertedFunction(root, root.f, output_func, input_data) - - @test_utils.run_v2_only - def testEmbeddings(self): - """Test model with embeddings.""" - input_data = { - "x": - tf.constant( - np.array(np.random.random_sample((20)), dtype=np.int32)) - } - - class EmbeddingModel(keras.Model): - - def __init__(self): - super().__init__() - self.shared_weights = self.add_weight( - "weights", - shape=(2000, 300), - dtype=tf.float32, - initializer=tf.compat.v1.random_normal_initializer( - mean=0.0, stddev=300**(-0.5))) - - @tf.function(input_signature=[ - tf.TensorSpec(shape=(20), dtype=tf.int32) - ]) - def func(self, x): - return tf.gather(self.shared_weights, x) - - model = EmbeddingModel() - root, output_func = self._freezeModel(model.func) - self._testConvertedFunction(root, root.f, output_func, input_data) + np.random.random_sample((10, 10, 10)), dtype=np.float32 + ) + ) + } + + model = keras.models.Sequential( + [keras.layers.LSTM(units=10, input_shape=(10, 10))] + ) + + @tf.function( + input_signature=[ + tf.TensorSpec(shape=[10, 10, 10], dtype=tf.float32) + ] + ) + def to_save(x): + return model(x) + + root, output_func = self._freezeModel(to_save) + self._testConvertedFunction(root, root.f, output_func, input_data) + + @test_utils.run_v2_only + def testEmbeddings(self): + """Test model with embeddings.""" + input_data = { + "x": tf.constant( + np.array(np.random.random_sample((20)), dtype=np.int32) + ) + } + + class EmbeddingModel(keras.Model): + def __init__(self): + super().__init__() + self.shared_weights = self.add_weight( + "weights", + shape=(2000, 300), + dtype=tf.float32, + initializer=tf.compat.v1.random_normal_initializer( + mean=0.0, stddev=300 ** (-0.5) + ), + ) + + @tf.function( + input_signature=[tf.TensorSpec(shape=(20), dtype=tf.int32)] + ) + def func(self, x): + return tf.gather(self.shared_weights, x) + + model = EmbeddingModel() + root, output_func = self._freezeModel(model.func) + self._testConvertedFunction(root, root.f, output_func, input_data) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/tests/custom_training_loop_test.py b/keras/tests/custom_training_loop_test.py index 891633cd4dd7..c9be92dbf2ea 100644 --- a/keras/tests/custom_training_loop_test.py +++ b/keras/tests/custom_training_loop_test.py @@ -14,10 +14,9 @@ # ============================================================================== """Tests for custom training loops.""" +import numpy as np import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy as np import keras from keras.testing_infra import test_combinations @@ -25,211 +24,220 @@ class LayerWithLosses(keras.layers.Layer): + def build(self, input_shape): + self.v = self.add_weight( + name="hey", + shape=(), + initializer="ones", + regularizer=keras.regularizers.l1(100), + ) - def build(self, input_shape): - self.v = self.add_weight( - name='hey', - shape=(), - initializer='ones', - regularizer=keras.regularizers.l1(100)) - - def call(self, inputs): - self.add_loss(tf.reduce_sum(inputs)) - return self.v * inputs + def call(self, inputs): + self.add_loss(tf.reduce_sum(inputs)) + return self.v * inputs class LayerWithMetrics(keras.layers.Layer): + def build(self, input_shape): + self.mean = keras.metrics.Mean(name="mean_object") - def build(self, input_shape): - self.mean = keras.metrics.Mean(name='mean_object') - - def call(self, inputs): - self.add_metric( - tf.reduce_mean(inputs), name='mean_tensor', aggregation='mean') - self.add_metric(self.mean(inputs)) - return inputs + def call(self, inputs): + self.add_metric( + tf.reduce_mean(inputs), name="mean_tensor", aggregation="mean" + ) + self.add_metric(self.mean(inputs)) + return inputs class LayerWithTrainingArg(keras.layers.Layer): - - def call(self, inputs, training=None): - self.training = training - if training: - return inputs - else: - return 0. * inputs + def call(self, inputs, training=None): + self.training = training + if training: + return inputs + else: + return 0.0 * inputs def add_loss_step(defun): - optimizer = keras.optimizers.optimizer_v2.adam.Adam() - model = test_utils.get_model_from_layers([LayerWithLosses()], - input_shape=(10,)) - - def train_step(x): - with tf.GradientTape() as tape: - model(x) - assert len(model.losses) == 2 - loss = tf.reduce_sum(model.losses) - gradients = tape.gradient(loss, model.trainable_weights) - optimizer.apply_gradients(zip(gradients, model.trainable_weights)) - return loss - - if defun: - train_step = tf.function(train_step) - - x = tf.ones((10, 10)) - return train_step(x) - - -def batch_norm_step(defun): - optimizer = keras.optimizers.optimizer_v2.adadelta.Adadelta() - model = test_utils.get_model_from_layers([ - keras.layers.BatchNormalization(momentum=0.9), - keras.layers.Dense(1, kernel_initializer='zeros', activation='softmax') - ], input_shape=(10,)) - - def train_step(x, y): - with tf.GradientTape() as tape: - y_pred = model(x, training=True) - loss = keras.losses.binary_crossentropy(y, y_pred) - gradients = tape.gradient(loss, model.trainable_weights) - optimizer.apply_gradients(zip(gradients, model.trainable_weights)) - return loss, model(x, training=False) - - if defun: - train_step = tf.function(train_step) - - x, y = tf.ones((10, 10)), tf.ones((10, 1)) - return train_step(x, y) - - -def add_metric_step(defun): - optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop() - model = test_utils.get_model_from_layers([ - LayerWithMetrics(), - keras.layers.Dense(1, kernel_initializer='zeros', activation='softmax') - ], input_shape=(10,)) - - def train_step(x, y): - with tf.GradientTape() as tape: - y_pred_1 = model(x) - y_pred_2 = model(2 * x) - y_pred = y_pred_1 + y_pred_2 - loss = keras.losses.mean_squared_error(y, y_pred) - gradients = tape.gradient(loss, model.trainable_weights) - optimizer.apply_gradients(zip(gradients, model.trainable_weights)) - assert len(model.metrics) == 2 - return [m.result() for m in model.metrics] - - if defun: - train_step = tf.function(train_step) - - x, y = tf.ones((10, 10)), tf.zeros((10, 1)) - metrics = train_step(x, y) - assert np.allclose(metrics[0], 1.5) - assert np.allclose(metrics[1], 1.5) - return metrics - - -@test_combinations.run_with_all_model_types -class CustomTrainingLoopTest(test_combinations.TestCase): - - @parameterized.named_parameters(('add_loss_step', add_loss_step), - ('add_metric_step', add_metric_step), - ('batch_norm_step', batch_norm_step)) - def test_eager_and_tf_function(self, train_step): - eager_result = train_step(defun=False) - fn_result = train_step(defun=True) - self.assertAllClose(eager_result, fn_result) - - @parameterized.named_parameters(('eager', False), ('defun', True)) - def test_training_arg_propagation(self, defun): - - model = test_utils.get_model_from_layers([LayerWithTrainingArg()], - input_shape=(1,)) + optimizer = keras.optimizers.legacy.adam.Adam() + model = test_utils.get_model_from_layers( + [LayerWithLosses()], input_shape=(10,) + ) def train_step(x): - return model(x), model(x, training=False), model(x, training=True) + with tf.GradientTape() as tape: + model(x) + assert len(model.losses) == 2 + loss = tf.reduce_sum(model.losses) + gradients = tape.gradient(loss, model.trainable_weights) + optimizer.apply_gradients(zip(gradients, model.trainable_weights)) + return loss if defun: - train_step = tf.function(train_step) + train_step = tf.function(train_step) - x = tf.ones((1, 1)) - results = train_step(x) - self.assertAllClose(results[0], tf.zeros((1, 1))) - self.assertAllClose(results[1], tf.zeros((1, 1))) - self.assertAllClose(results[2], tf.ones((1, 1))) + x = tf.ones((10, 10)) + return train_step(x) - @parameterized.named_parameters(('eager', False), ('defun', True)) - def test_learning_phase_propagation(self, defun): - class MyModel(keras.layers.Layer): - - def __init__(self): - super().__init__() - self.layer = LayerWithTrainingArg() - - def call(self, inputs): - return self.layer(inputs) - - model = MyModel() - - def train_step(x): - no_learning_phase_out = model(x) - self.assertFalse(model.layer.training) - with keras.backend.learning_phase_scope(0): - inf_learning_phase_out = model(x) - self.assertEqual(model.layer.training, 0) - with keras.backend.learning_phase_scope(1): - train_learning_phase_out = model(x) - self.assertEqual(model.layer.training, 1) - return [ - no_learning_phase_out, inf_learning_phase_out, - train_learning_phase_out - ] +def batch_norm_step(defun): + optimizer = keras.optimizers.legacy.adadelta.Adadelta() + model = test_utils.get_model_from_layers( + [ + keras.layers.BatchNormalization(momentum=0.9), + keras.layers.Dense( + 1, kernel_initializer="zeros", activation="softmax" + ), + ], + input_shape=(10,), + ) + + def train_step(x, y): + with tf.GradientTape() as tape: + y_pred = model(x, training=True) + loss = keras.losses.binary_crossentropy(y, y_pred) + gradients = tape.gradient(loss, model.trainable_weights) + optimizer.apply_gradients(zip(gradients, model.trainable_weights)) + return loss, model(x, training=False) if defun: - train_step = tf.function(train_step) - - x = tf.ones((1, 1)) - results = train_step(x) - self.assertAllClose(results[0], tf.zeros((1, 1))) - self.assertAllClose(results[1], tf.zeros((1, 1))) - self.assertAllClose(results[2], tf.ones((1, 1))) - - @parameterized.named_parameters(('eager', False), ('defun', True)) - def test_training_arg_priorities(self, defun): - - class MyModel(keras.layers.Layer): - - def __init__(self): - super().__init__() - self.layer = LayerWithTrainingArg() + train_step = tf.function(train_step) - def call(self, inputs, training=False): - return self.layer(inputs) + x, y = tf.ones((10, 10)), tf.ones((10, 1)) + return train_step(x, y) - model = MyModel() - def train_step(x): - explicit_out = model(x, training=True) - default_out = model(x) - with keras.backend.learning_phase_scope(1): - parent_out = model(x, training=False) - lr_out = model(x) - return [explicit_out, default_out, parent_out, lr_out] +def add_metric_step(defun): + optimizer = keras.optimizers.legacy.rmsprop.RMSprop() + model = test_utils.get_model_from_layers( + [ + LayerWithMetrics(), + keras.layers.Dense( + 1, kernel_initializer="zeros", activation="softmax" + ), + ], + input_shape=(10,), + ) + + def train_step(x, y): + with tf.GradientTape() as tape: + y_pred_1 = model(x) + y_pred_2 = model(2 * x) + y_pred = y_pred_1 + y_pred_2 + loss = keras.losses.mean_squared_error(y, y_pred) + gradients = tape.gradient(loss, model.trainable_weights) + optimizer.apply_gradients(zip(gradients, model.trainable_weights)) + assert len(model.metrics) == 2 + return [m.result() for m in model.metrics] if defun: - train_step = tf.function(train_step) + train_step = tf.function(train_step) - x = tf.ones((1, 1)) - results = train_step(x) - self.assertAllClose(results[0], tf.ones((1, 1))) - self.assertAllClose(results[1], tf.zeros((1, 1))) - self.assertAllClose(results[2], tf.zeros((1, 1))) - self.assertAllClose(results[3], tf.ones((1, 1))) + x, y = tf.ones((10, 10)), tf.zeros((10, 1)) + metrics = train_step(x, y) + assert np.allclose(metrics[0], 1.5) + assert np.allclose(metrics[1], 1.5) + return metrics -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() +@test_combinations.run_with_all_model_types +class CustomTrainingLoopTest(test_combinations.TestCase): + @parameterized.named_parameters( + ("add_loss_step", add_loss_step), + ("add_metric_step", add_metric_step), + ("batch_norm_step", batch_norm_step), + ) + def test_eager_and_tf_function(self, train_step): + eager_result = train_step(defun=False) + fn_result = train_step(defun=True) + self.assertAllClose(eager_result, fn_result) + + @parameterized.named_parameters(("eager", False), ("defun", True)) + def test_training_arg_propagation(self, defun): + + model = test_utils.get_model_from_layers( + [LayerWithTrainingArg()], input_shape=(1,) + ) + + def train_step(x): + return model(x), model(x, training=False), model(x, training=True) + + if defun: + train_step = tf.function(train_step) + + x = tf.ones((1, 1)) + results = train_step(x) + self.assertAllClose(results[0], tf.zeros((1, 1))) + self.assertAllClose(results[1], tf.zeros((1, 1))) + self.assertAllClose(results[2], tf.ones((1, 1))) + + @parameterized.named_parameters(("eager", False), ("defun", True)) + def test_learning_phase_propagation(self, defun): + class MyModel(keras.layers.Layer): + def __init__(self): + super().__init__() + self.layer = LayerWithTrainingArg() + + def call(self, inputs): + return self.layer(inputs) + + model = MyModel() + + def train_step(x): + no_learning_phase_out = model(x) + self.assertFalse(model.layer.training) + with keras.backend.learning_phase_scope(0): + inf_learning_phase_out = model(x) + self.assertEqual(model.layer.training, 0) + with keras.backend.learning_phase_scope(1): + train_learning_phase_out = model(x) + self.assertEqual(model.layer.training, 1) + return [ + no_learning_phase_out, + inf_learning_phase_out, + train_learning_phase_out, + ] + + if defun: + train_step = tf.function(train_step) + + x = tf.ones((1, 1)) + results = train_step(x) + self.assertAllClose(results[0], tf.zeros((1, 1))) + self.assertAllClose(results[1], tf.zeros((1, 1))) + self.assertAllClose(results[2], tf.ones((1, 1))) + + @parameterized.named_parameters(("eager", False), ("defun", True)) + def test_training_arg_priorities(self, defun): + class MyModel(keras.layers.Layer): + def __init__(self): + super().__init__() + self.layer = LayerWithTrainingArg() + + def call(self, inputs, training=False): + return self.layer(inputs) + + model = MyModel() + + def train_step(x): + explicit_out = model(x, training=True) + default_out = model(x) + with keras.backend.learning_phase_scope(1): + parent_out = model(x, training=False) + lr_out = model(x) + return [explicit_out, default_out, parent_out, lr_out] + + if defun: + train_step = tf.function(train_step) + + x = tf.ones((1, 1)) + results = train_step(x) + self.assertAllClose(results[0], tf.ones((1, 1))) + self.assertAllClose(results[1], tf.zeros((1, 1))) + self.assertAllClose(results[2], tf.zeros((1, 1))) + self.assertAllClose(results[3], tf.ones((1, 1))) + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/tests/get_config_samples.py b/keras/tests/get_config_samples.py index 3ef1b630264c..12f9f7df84ed 100644 --- a/keras/tests/get_config_samples.py +++ b/keras/tests/get_config_samples.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Sample `get_config` results for testing backwards compatibility.""" # inputs = tf.keras.Input(10) @@ -20,75 +20,69 @@ # outputs = tf.keras.layers.Dense(1)(x) # model = tf.keras.Model(inputs, outputs) FUNCTIONAL_DNN = { - 'input_layers': [['input_1', 0, 0]], - 'layers': [{ - 'class_name': 'InputLayer', - 'config': { - 'batch_input_shape': (None, 10), - 'dtype': 'float32', - 'name': 'input_1', - 'ragged': False, - 'sparse': False - }, - 'inbound_nodes': [], - 'name': 'input_1' - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'relu', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} + "input_layers": [["input_1", 0, 0]], + "layers": [ + { + "class_name": "InputLayer", + "config": { + "batch_input_shape": (None, 10), + "dtype": "float32", + "name": "input_1", + "ragged": False, + "sparse": False, }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } - }, - 'kernel_regularizer': None, - 'name': 'dense', - 'trainable': True, - 'units': 10, - 'use_bias': True + "inbound_nodes": [], + "name": "input_1", }, - 'inbound_nodes': [[['input_1', 0, 0, {}]]], - 'name': 'dense' - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} + { + "class_name": "Dense", + "config": { + "activation": "relu", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense", + "trainable": True, + "units": 10, + "use_bias": True, }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + "inbound_nodes": [[["input_1", 0, 0, {}]]], + "name": "dense", + }, + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_1", + "trainable": True, + "units": 1, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_1', - 'trainable': True, - 'units': 1, - 'use_bias': True + "inbound_nodes": [[["dense", 0, 0, {}]]], + "name": "dense_1", }, - 'inbound_nodes': [[['dense', 0, 0, {}]]], - 'name': 'dense_1' - }], - 'name': 'model', - 'output_layers': [['dense_1', 0, 0]] + ], + "name": "model", + "output_layers": [["dense_1", 0, 0]], } # inputs = tf.keras.Input((256, 256, 3)) @@ -97,90 +91,85 @@ # outputs = tf.keras.layers.Dense(1)(x) # model = tf.keras.Model(inputs, outputs) FUNCTIONAL_CNN = { - 'input_layers': [['input_2', 0, 0]], - 'layers': [{ - 'class_name': 'InputLayer', - 'config': { - 'batch_input_shape': (None, 256, 256, 3), - 'dtype': 'float32', - 'name': 'input_2', - 'ragged': False, - 'sparse': False - }, - 'inbound_nodes': [], - 'name': 'input_2' - }, { - 'class_name': 'Conv2D', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} - }, - 'bias_regularizer': None, - 'data_format': 'channels_last', - 'dilation_rate': (1, 1), - 'dtype': 'float32', - 'filters': 3, - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + "input_layers": [["input_2", 0, 0]], + "layers": [ + { + "class_name": "InputLayer", + "config": { + "batch_input_shape": (None, 256, 256, 3), + "dtype": "float32", + "name": "input_2", + "ragged": False, + "sparse": False, }, - 'kernel_regularizer': None, - 'kernel_size': (3, 3), - 'name': 'conv2d', - 'padding': 'valid', - 'strides': (1, 1), - 'trainable': True, - 'use_bias': True + "inbound_nodes": [], + "name": "input_2", }, - 'inbound_nodes': [[['input_2', 0, 0, {}]]], - 'name': 'conv2d' - }, { - 'class_name': 'Flatten', - 'config': { - 'data_format': 'channels_last', - 'dtype': 'float32', - 'name': 'flatten', - 'trainable': True + { + "class_name": "Conv2D", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "data_format": "channels_last", + "dilation_rate": (1, 1), + "dtype": "float32", + "filters": 3, + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "kernel_size": (3, 3), + "name": "conv2d", + "padding": "valid", + "strides": (1, 1), + "trainable": True, + "use_bias": True, + }, + "inbound_nodes": [[["input_2", 0, 0, {}]]], + "name": "conv2d", }, - 'inbound_nodes': [[['conv2d', 0, 0, {}]]], - 'name': 'flatten' - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} + { + "class_name": "Flatten", + "config": { + "data_format": "channels_last", + "dtype": "float32", + "name": "flatten", + "trainable": True, }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + "inbound_nodes": [[["conv2d", 0, 0, {}]]], + "name": "flatten", + }, + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_2", + "trainable": True, + "units": 1, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_2', - 'trainable': True, - 'units': 1, - 'use_bias': True + "inbound_nodes": [[["flatten", 0, 0, {}]]], + "name": "dense_2", }, - 'inbound_nodes': [[['flatten', 0, 0, {}]]], - 'name': 'dense_2' - }], - 'name': 'model_1', - 'output_layers': [['dense_2', 0, 0]] + ], + "name": "model_1", + "output_layers": [["dense_2", 0, 0]], } # inputs = tf.keras.Input((10, 3)) @@ -188,153 +177,137 @@ # outputs = tf.keras.layers.Dense(1)(x) # model = tf.keras.Model(inputs, outputs) FUNCTIONAL_LSTM = { - 'input_layers': [['input_5', 0, 0]], - 'layers': [{ - 'class_name': 'InputLayer', - 'config': { - 'batch_input_shape': (None, 10, 3), - 'dtype': 'float32', - 'name': 'input_5', - 'ragged': False, - 'sparse': False - }, - 'inbound_nodes': [], - 'name': 'input_5' - }, { - 'class_name': 'LSTM', - 'config': { - 'activation': 'tanh', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} - }, - 'bias_regularizer': None, - 'dropout': 0.0, - 'dtype': 'float32', - 'go_backwards': False, - 'implementation': 2, - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + "input_layers": [["input_5", 0, 0]], + "layers": [ + { + "class_name": "InputLayer", + "config": { + "batch_input_shape": (None, 10, 3), + "dtype": "float32", + "name": "input_5", + "ragged": False, + "sparse": False, }, - 'kernel_regularizer': None, - 'name': 'lstm_2', - 'recurrent_activation': 'sigmoid', - 'recurrent_constraint': None, - 'recurrent_dropout': 0.0, - 'recurrent_initializer': { - 'class_name': 'Orthogonal', - 'config': { - 'gain': 1.0, - 'seed': None - } - }, - 'recurrent_regularizer': None, - 'return_sequences': False, - 'return_state': False, - 'stateful': False, - 'time_major': False, - 'trainable': True, - 'unit_forget_bias': True, - 'units': 10, - 'unroll': False, - 'use_bias': True + "inbound_nodes": [], + "name": "input_5", }, - 'inbound_nodes': [[['input_5', 0, 0, {}]]], - 'name': 'lstm_2' - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} + { + "class_name": "LSTM", + "config": { + "activation": "tanh", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dropout": 0.0, + "dtype": "float32", + "go_backwards": False, + "implementation": 2, + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "lstm_2", + "recurrent_activation": "sigmoid", + "recurrent_constraint": None, + "recurrent_dropout": 0.0, + "recurrent_initializer": { + "class_name": "Orthogonal", + "config": {"gain": 1.0, "seed": None}, + }, + "recurrent_regularizer": None, + "return_sequences": False, + "return_state": False, + "stateful": False, + "time_major": False, + "trainable": True, + "unit_forget_bias": True, + "units": 10, + "unroll": False, + "use_bias": True, }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + "inbound_nodes": [[["input_5", 0, 0, {}]]], + "name": "lstm_2", + }, + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_4", + "trainable": True, + "units": 1, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_4', - 'trainable': True, - 'units': 1, - 'use_bias': True + "inbound_nodes": [[["lstm_2", 0, 0, {}]]], + "name": "dense_4", }, - 'inbound_nodes': [[['lstm_2', 0, 0, {}]]], - 'name': 'dense_4' - }], - 'name': 'model_3', - 'output_layers': [['dense_4', 0, 0]] + ], + "name": "model_3", + "output_layers": [["dense_4", 0, 0]], } # model = tf.keras.Sequential() # model.add(tf.keras.layers.Dense(10)) # model.add(tf.keras.layers.Dense(1)) SEQUENTIAL_DNN = { - 'layers': [{ - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} - }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + "layers": [ + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_2", + "trainable": True, + "units": 10, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_2', - 'trainable': True, - 'units': 10, - 'use_bias': True - } - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} - }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + }, + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_3", + "trainable": True, + "units": 1, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_3', - 'trainable': True, - 'units': 1, - 'use_bias': True - } - }], - 'name': 'sequential_1' + }, + ], + "name": "sequential_1", } # model = tf.keras.Sequential() @@ -342,147 +315,131 @@ # model.add(tf.keras.layers.Flatten()) # model.add(tf.keras.layers.Dense(1)) SEQUENTIAL_CNN = { - 'layers': [{ - 'class_name': 'Conv2D', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} + "layers": [ + { + "class_name": "Conv2D", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "data_format": "channels_last", + "dilation_rate": (1, 1), + "dtype": "float32", + "filters": 32, + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "kernel_size": (3, 3), + "name": "conv2d_1", + "padding": "valid", + "strides": (1, 1), + "trainable": True, + "use_bias": True, }, - 'bias_regularizer': None, - 'data_format': 'channels_last', - 'dilation_rate': (1, 1), - 'dtype': 'float32', - 'filters': 32, - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } - }, - 'kernel_regularizer': None, - 'kernel_size': (3, 3), - 'name': 'conv2d_1', - 'padding': 'valid', - 'strides': (1, 1), - 'trainable': True, - 'use_bias': True - } - }, { - 'class_name': 'Flatten', - 'config': { - 'data_format': 'channels_last', - 'dtype': 'float32', - 'name': 'flatten_1', - 'trainable': True - } - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} + }, + { + "class_name": "Flatten", + "config": { + "data_format": "channels_last", + "dtype": "float32", + "name": "flatten_1", + "trainable": True, }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + }, + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_6", + "trainable": True, + "units": 1, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_6', - 'trainable': True, - 'units': 1, - 'use_bias': True - } - }], - 'name': 'sequential_4' + }, + ], + "name": "sequential_4", } # model = tf.keras.Sequential() # model.add(tf.keras.layers.LSTM(10)) # model.add(tf.keras.layers.Dense(1)) SEQUENTIAL_LSTM = { - 'layers': [{ - 'class_name': 'LSTM', - 'config': { - 'activation': 'tanh', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} - }, - 'bias_regularizer': None, - 'dropout': 0.0, - 'dtype': 'float32', - 'go_backwards': False, - 'implementation': 2, - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } - }, - 'kernel_regularizer': None, - 'name': 'lstm', - 'recurrent_activation': 'sigmoid', - 'recurrent_constraint': None, - 'recurrent_dropout': 0.0, - 'recurrent_initializer': { - 'class_name': 'Orthogonal', - 'config': { - 'gain': 1.0, - 'seed': None - } + "layers": [ + { + "class_name": "LSTM", + "config": { + "activation": "tanh", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dropout": 0.0, + "dtype": "float32", + "go_backwards": False, + "implementation": 2, + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "lstm", + "recurrent_activation": "sigmoid", + "recurrent_constraint": None, + "recurrent_dropout": 0.0, + "recurrent_initializer": { + "class_name": "Orthogonal", + "config": {"gain": 1.0, "seed": None}, + }, + "recurrent_regularizer": None, + "return_sequences": False, + "return_state": False, + "stateful": False, + "time_major": False, + "trainable": True, + "unit_forget_bias": True, + "units": 10, + "unroll": False, + "use_bias": True, }, - 'recurrent_regularizer': None, - 'return_sequences': False, - 'return_state': False, - 'stateful': False, - 'time_major': False, - 'trainable': True, - 'unit_forget_bias': True, - 'units': 10, - 'unroll': False, - 'use_bias': True - } - }, { - 'class_name': 'Dense', - 'config': { - 'activation': 'linear', - 'activity_regularizer': None, - 'bias_constraint': None, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': {} - }, - 'bias_regularizer': None, - 'dtype': 'float32', - 'kernel_constraint': None, - 'kernel_initializer': { - 'class_name': 'GlorotUniform', - 'config': { - 'seed': None - } + }, + { + "class_name": "Dense", + "config": { + "activation": "linear", + "activity_regularizer": None, + "bias_constraint": None, + "bias_initializer": {"class_name": "Zeros", "config": {}}, + "bias_regularizer": None, + "dtype": "float32", + "kernel_constraint": None, + "kernel_initializer": { + "class_name": "GlorotUniform", + "config": {"seed": None}, + }, + "kernel_regularizer": None, + "name": "dense_4", + "trainable": True, + "units": 1, + "use_bias": True, }, - 'kernel_regularizer': None, - 'name': 'dense_4', - 'trainable': True, - 'units': 1, - 'use_bias': True - } - }], - 'name': 'sequential_2' + }, + ], + "name": "sequential_2", } diff --git a/keras/tests/get_config_test.py b/keras/tests/get_config_test.py index b5d42a589913..73c24a920e4b 100644 --- a/keras/tests/get_config_test.py +++ b/keras/tests/get_config_test.py @@ -11,44 +11,49 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#,============================================================================ +# ,============================================================================ """Tests for `get_config` backwards compatibility.""" +import tensorflow.compat.v2 as tf + from keras.engine import sequential from keras.engine import training from keras.testing_infra import test_combinations from keras.tests import get_config_samples -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class TestGetConfigBackwardsCompatible(test_combinations.TestCase): - - def test_functional_dnn(self): - model = training.Model.from_config(get_config_samples.FUNCTIONAL_DNN) - self.assertLen(model.layers, 3) - - def test_functional_cnn(self): - model = training.Model.from_config(get_config_samples.FUNCTIONAL_CNN) - self.assertLen(model.layers, 4) - - def test_functional_lstm(self): - model = training.Model.from_config(get_config_samples.FUNCTIONAL_LSTM) - self.assertLen(model.layers, 3) - - def test_sequential_dnn(self): - model = sequential.Sequential.from_config(get_config_samples.SEQUENTIAL_DNN) - self.assertLen(model.layers, 2) - - def test_sequential_cnn(self): - model = sequential.Sequential.from_config(get_config_samples.SEQUENTIAL_CNN) - self.assertLen(model.layers, 3) - - def test_sequential_lstm(self): - model = sequential.Sequential.from_config( - get_config_samples.SEQUENTIAL_LSTM) - self.assertLen(model.layers, 2) - - -if __name__ == '__main__': - tf.test.main() + def test_functional_dnn(self): + model = training.Model.from_config(get_config_samples.FUNCTIONAL_DNN) + self.assertLen(model.layers, 3) + + def test_functional_cnn(self): + model = training.Model.from_config(get_config_samples.FUNCTIONAL_CNN) + self.assertLen(model.layers, 4) + + def test_functional_lstm(self): + model = training.Model.from_config(get_config_samples.FUNCTIONAL_LSTM) + self.assertLen(model.layers, 3) + + def test_sequential_dnn(self): + model = sequential.Sequential.from_config( + get_config_samples.SEQUENTIAL_DNN + ) + self.assertLen(model.layers, 2) + + def test_sequential_cnn(self): + model = sequential.Sequential.from_config( + get_config_samples.SEQUENTIAL_CNN + ) + self.assertLen(model.layers, 3) + + def test_sequential_lstm(self): + model = sequential.Sequential.from_config( + get_config_samples.SEQUENTIAL_LSTM + ) + self.assertLen(model.layers, 2) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/graph_util_test.py b/keras/tests/graph_util_test.py index 6ebbcc72a08d..40884cf9d880 100644 --- a/keras/tests/graph_util_test.py +++ b/keras/tests/graph_util_test.py @@ -14,133 +14,164 @@ # ============================================================================== """Tests for tensorflow.python.client.graph_util.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np -from tensorflow.core.protobuf import meta_graph_pb2 import keras + +# isort: off +from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python.grappler import tf_optimizer -from tensorflow.python.training.saver import export_meta_graph +from tensorflow.python.training.saver import ( + export_meta_graph, +) class ConvertVariablesToConstantsTest(tf.test.TestCase): - - def _get_tensors(self, sess, tensor_list): - """Returns a list of Tensor objects from the Session.""" - return [ - sess.graph.get_tensor_by_name(tensor.name) for tensor in tensor_list - ] - - def _get_tensor_names(self, tensors): - """Returns a list of string names for the tensors specified.""" - return [tensor.name.split(":")[0] for tensor in tensors] - - def _evaluate_graph_def(self, graph_def, inputs, outputs, input_data): - """Evaluates the GraphDef using Sessions.""" - with tf.Graph().as_default() as graph: - tf.import_graph_def(graph_def, name="") - sess = tf.compat.v1.Session(graph=graph) - - input_tensors = self._get_tensors(sess, inputs) - output_tensors = self._get_tensors(sess, outputs) - return sess.run( - output_tensors, feed_dict=dict(zip(input_tensors, input_data))) - - def _ensure_no_variables_in_graph(self, graph_def): - """Ensures there are no variables in the graph.""" - for node in graph_def.node: - self.assertNotIn( - node.op, ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"]) - - def _test_converted_keras_model(self, model, constant_graph_def, input_data): - """Compares the converted Keras model.""" - expected_value = model.predict(input_data) - actual_value = self._evaluate_graph_def(constant_graph_def, model.inputs, - model.outputs, [input_data]) - np.testing.assert_almost_equal(np.array([expected_value]), actual_value, 5) - - def _inline_functions(self, graph_def, arrays): - meta_graph = export_meta_graph(graph_def=graph_def) - fetch_collection = meta_graph_pb2.CollectionDef() - for name in arrays: - fetch_collection.node_list.value.append(name) - meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) - - # Initialize RewriterConfig with everything disabled except function - # inlining. - config = tf.compat.v1.ConfigProto() - rewrite_options = config.graph_options.rewrite_options - rewrite_options.optimizers.append("function") - return tf_optimizer.OptimizeGraph(config, meta_graph) - - def testWithEmbeddings(self): - """Freezes a graph with embeddings.""" - state_input = keras.layers.Input( - shape=(1,), name="state_input", dtype="int32") - output = keras.layers.Embedding( - output_dim=16, input_dim=100, input_length=1, name="state")( - state_input) - model = keras.models.Model(inputs=[state_input], outputs=[output]) - model.compile( - loss={"state": "sparse_categorical_crossentropy"}, optimizer="adam") - - # Freeze the graph. - sess = keras.backend.get_session() - variable_graph_def = sess.graph_def - output_tensor = self._get_tensor_names(model.outputs) - constant_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( - sess, variable_graph_def, output_tensor) - - # Validate converted graph. - input_data = np.array(np.random.random_sample([1, 1]), dtype=np.int32) - self._ensure_no_variables_in_graph(constant_graph_def) - self._test_converted_keras_model(model, constant_graph_def, input_data) - - def testKerasBatchNorm(self): - """Freezes a graph with Keras batch norm.""" - inputs = keras.layers.Input(shape=(128, 128, 1)) - batch_norm = keras.layers.BatchNormalization()(inputs) - model = keras.models.Model(inputs, batch_norm, name="test") - model.compile( - optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) - tensor_names = [tensor.name for tensor in model.inputs + model.outputs] - - # Freeze the graph. - sess = keras.backend.get_session() - variable_graph_def = sess.graph_def - variable_graph_def = self._inline_functions(variable_graph_def, - tensor_names) - output_tensor = self._get_tensor_names(model.outputs) - constant_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( - sess, variable_graph_def, output_tensor) - - # Validate converted graph. - input_data = np.array( - np.random.random_sample([1, 128, 128, 1]), dtype=np.int32) - self._ensure_no_variables_in_graph(constant_graph_def) - self._test_converted_keras_model(model, constant_graph_def, input_data) - - def testLSTM(self): - """Freezes a Keras LSTM.""" - model = keras.models.Sequential( - [keras.layers.LSTM(units=10, input_shape=(10, 10))]) - tensor_names = [tensor.name for tensor in model.inputs + model.outputs] - - # Freeze the model. - sess = keras.backend.get_session() - variable_graph_def = sess.graph_def - variable_graph_def = self._inline_functions(variable_graph_def, - tensor_names) - output_tensor = self._get_tensor_names(model.outputs) - constant_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( - sess, variable_graph_def, output_tensor) - - # Validate converted graph. - input_data = np.array(np.random.random_sample([10, 10, 10]), dtype=np.int32) - self._ensure_no_variables_in_graph(constant_graph_def) - self._test_converted_keras_model(model, constant_graph_def, input_data) + def _get_tensors(self, sess, tensor_list): + """Returns a list of Tensor objects from the Session.""" + return [ + sess.graph.get_tensor_by_name(tensor.name) for tensor in tensor_list + ] + + def _get_tensor_names(self, tensors): + """Returns a list of string names for the tensors specified.""" + return [tensor.name.split(":")[0] for tensor in tensors] + + def _evaluate_graph_def(self, graph_def, inputs, outputs, input_data): + """Evaluates the GraphDef using Sessions.""" + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name="") + sess = tf.compat.v1.Session(graph=graph) + + input_tensors = self._get_tensors(sess, inputs) + output_tensors = self._get_tensors(sess, outputs) + return sess.run( + output_tensors, feed_dict=dict(zip(input_tensors, input_data)) + ) + + def _ensure_no_variables_in_graph(self, graph_def): + """Ensures there are no variables in the graph.""" + for node in graph_def.node: + self.assertNotIn( + node.op, + ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"], + ) + + def _test_converted_keras_model( + self, model, constant_graph_def, input_data + ): + """Compares the converted Keras model.""" + expected_value = model.predict(input_data) + actual_value = self._evaluate_graph_def( + constant_graph_def, model.inputs, model.outputs, [input_data] + ) + np.testing.assert_almost_equal( + np.array([expected_value]), actual_value, 5 + ) + + def _inline_functions(self, graph_def, arrays): + meta_graph = export_meta_graph(graph_def=graph_def) + fetch_collection = meta_graph_pb2.CollectionDef() + for name in arrays: + fetch_collection.node_list.value.append(name) + meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) + + # Initialize RewriterConfig with everything disabled except function + # inlining. + config = tf.compat.v1.ConfigProto() + rewrite_options = config.graph_options.rewrite_options + rewrite_options.optimizers.append("function") + return tf_optimizer.OptimizeGraph(config, meta_graph) + + def testWithEmbeddings(self): + """Freezes a graph with embeddings.""" + state_input = keras.layers.Input( + shape=(1,), name="state_input", dtype="int32" + ) + output = keras.layers.Embedding( + output_dim=16, input_dim=100, input_length=1, name="state" + )(state_input) + model = keras.models.Model(inputs=[state_input], outputs=[output]) + model.compile( + loss={"state": "sparse_categorical_crossentropy"}, optimizer="adam" + ) + + # Freeze the graph. + sess = keras.backend.get_session() + variable_graph_def = sess.graph_def + output_tensor = self._get_tensor_names(model.outputs) + constant_graph_def = ( + tf.compat.v1.graph_util.convert_variables_to_constants( + sess, variable_graph_def, output_tensor + ) + ) + + # Validate converted graph. + input_data = np.array(np.random.random_sample([1, 1]), dtype=np.int32) + self._ensure_no_variables_in_graph(constant_graph_def) + self._test_converted_keras_model(model, constant_graph_def, input_data) + + def testKerasBatchNorm(self): + """Freezes a graph with Keras batch norm.""" + inputs = keras.layers.Input(shape=(128, 128, 1)) + batch_norm = keras.layers.BatchNormalization()(inputs) + model = keras.models.Model(inputs, batch_norm, name="test") + model.compile( + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + tensor_names = [tensor.name for tensor in model.inputs + model.outputs] + + # Freeze the graph. + sess = keras.backend.get_session() + variable_graph_def = sess.graph_def + variable_graph_def = self._inline_functions( + variable_graph_def, tensor_names + ) + output_tensor = self._get_tensor_names(model.outputs) + constant_graph_def = ( + tf.compat.v1.graph_util.convert_variables_to_constants( + sess, variable_graph_def, output_tensor + ) + ) + + # Validate converted graph. + input_data = np.array( + np.random.random_sample([1, 128, 128, 1]), dtype=np.int32 + ) + self._ensure_no_variables_in_graph(constant_graph_def) + self._test_converted_keras_model(model, constant_graph_def, input_data) + + def testLSTM(self): + """Freezes a Keras LSTM.""" + model = keras.models.Sequential( + [keras.layers.LSTM(units=10, input_shape=(10, 10))] + ) + tensor_names = [tensor.name for tensor in model.inputs + model.outputs] + + # Freeze the model. + sess = keras.backend.get_session() + variable_graph_def = sess.graph_def + variable_graph_def = self._inline_functions( + variable_graph_def, tensor_names + ) + output_tensor = self._get_tensor_names(model.outputs) + constant_graph_def = ( + tf.compat.v1.graph_util.convert_variables_to_constants( + sess, variable_graph_def, output_tensor + ) + ) + + # Validate converted graph. + input_data = np.array( + np.random.random_sample([10, 10, 10]), dtype=np.int32 + ) + self._ensure_no_variables_in_graph(constant_graph_def) + self._test_converted_keras_model(model, constant_graph_def, input_data) if __name__ == "__main__": - tf.compat.v1.disable_eager_execution() - tf.test.main() + tf.compat.v1.disable_eager_execution() + tf.test.main() diff --git a/keras/tests/integration_test.py b/keras/tests/integration_test.py index cc9c577c7ac6..1ccfa02ae2b1 100644 --- a/keras/tests/integration_test.py +++ b/keras/tests/integration_test.py @@ -14,361 +14,435 @@ # ============================================================================== """Integration tests for Keras.""" -import tensorflow.compat.v2 as tf - import os import random import numpy as np +import tensorflow.compat.v2 as tf import keras -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils +from keras import utils from keras.layers.rnn import legacy_cells from keras.legacy_tf_layers import base as base_layer -from keras import utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils class KerasIntegrationTest(test_combinations.TestCase): - - def _save_and_reload_model(self, model): - self.temp_dir = self.get_temp_dir() - fpath = os.path.join(self.temp_dir, - 'test_model_%s' % (random.randint(0, 1e7),)) - if tf.executing_eagerly(): - save_format = 'tf' - else: - if (not isinstance(model, keras.Sequential) and - not model._is_graph_network): - return model # Not supported - save_format = 'h5' - model.save(fpath, save_format=save_format) - model = keras.models.load_model(fpath) - return model + def _save_and_reload_model(self, model): + self.temp_dir = self.get_temp_dir() + fpath = os.path.join( + self.temp_dir, f"test_model_{random.randint(0, 10000000.0)}" + ) + if tf.executing_eagerly(): + save_format = "tf" + else: + if ( + not isinstance(model, keras.Sequential) + and not model._is_graph_network + ): + return model # Not supported + save_format = "h5" + model.save(fpath, save_format=save_format) + model = keras.models.load_model(fpath) + return model @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class VectorClassificationIntegrationTest(test_combinations.TestCase): - - def test_vector_classification(self): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(10,), - num_classes=2) - y_train = utils.to_categorical(y_train) - - model = test_utils.get_model_from_layers( - [keras.layers.Dense(16, activation='relu'), - keras.layers.Dropout(0.1), - keras.layers.Dense(y_train.shape[-1], activation='softmax')], - input_shape=x_train.shape[1:]) - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(x_train, y_train, epochs=10, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.7) - _, val_acc = model.evaluate(x_train, y_train) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(x_train) - self.assertEqual(predictions.shape, (x_train.shape[0], 2)) - - def test_vector_classification_shared_model(self): - # Test that Sequential models that feature internal updates - # and internal losses can be shared. - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(10,), - num_classes=2) - y_train = utils.to_categorical(y_train) - - base_model = test_utils.get_model_from_layers( - [keras.layers.Dense(16, - activation='relu', - kernel_regularizer=keras.regularizers.l2(1e-5), - bias_regularizer=keras.regularizers.l2(1e-5)), - keras.layers.BatchNormalization()], - input_shape=x_train.shape[1:]) - x = keras.layers.Input(x_train.shape[1:]) - y = base_model(x) - y = keras.layers.Dense(y_train.shape[-1], activation='softmax')(y) - model = keras.models.Model(x, y) - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - self.assertLen(model.losses, 2) - if not tf.executing_eagerly(): - self.assertLen(model.get_updates_for(x), 2) - history = model.fit(x_train, y_train, epochs=10, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.7) - _, val_acc = model.evaluate(x_train, y_train) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(x_train) - self.assertEqual(predictions.shape, (x_train.shape[0], 2)) + def test_vector_classification(self): + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, test_samples=0, input_shape=(10,), num_classes=2 + ) + y_train = utils.to_categorical(y_train) + + model = test_utils.get_model_from_layers( + [ + keras.layers.Dense(16, activation="relu"), + keras.layers.Dropout(0.1), + keras.layers.Dense(y_train.shape[-1], activation="softmax"), + ], + input_shape=x_train.shape[1:], + ) + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + x_train, + y_train, + epochs=10, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + self.assertGreater(history.history["val_acc"][-1], 0.7) + _, val_acc = model.evaluate(x_train, y_train) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(x_train) + self.assertEqual(predictions.shape, (x_train.shape[0], 2)) + + def test_vector_classification_shared_model(self): + # Test that Sequential models that feature internal updates + # and internal losses can be shared. + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, test_samples=0, input_shape=(10,), num_classes=2 + ) + y_train = utils.to_categorical(y_train) + + base_model = test_utils.get_model_from_layers( + [ + keras.layers.Dense( + 16, + activation="relu", + kernel_regularizer=keras.regularizers.l2(1e-5), + bias_regularizer=keras.regularizers.l2(1e-5), + ), + keras.layers.BatchNormalization(), + ], + input_shape=x_train.shape[1:], + ) + x = keras.layers.Input(x_train.shape[1:]) + y = base_model(x) + y = keras.layers.Dense(y_train.shape[-1], activation="softmax")(y) + model = keras.models.Model(x, y) + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + self.assertLen(model.losses, 2) + if not tf.executing_eagerly(): + self.assertLen(model.get_updates_for(x), 2) + history = model.fit( + x_train, + y_train, + epochs=10, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + self.assertGreater(history.history["val_acc"][-1], 0.7) + _, val_acc = model.evaluate(x_train, y_train) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(x_train) + self.assertEqual(predictions.shape, (x_train.shape[0], 2)) @test_combinations.run_all_keras_modes class SequentialIntegrationTest(KerasIntegrationTest): - - def test_sequential_save_and_pop(self): - # Test the following sequence of actions: - # - construct a Sequential model and train it - # - save it - # - load it - # - pop its last layer and add a new layer instead - # - continue training - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(10,), - num_classes=2) - y_train = utils.to_categorical(y_train) - model = keras.Sequential([ - keras.layers.Dense(16, activation='relu'), - keras.layers.Dropout(0.1), - keras.layers.Dense(y_train.shape[-1], activation='softmax') - ]) - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x_train, y_train, epochs=1, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - model = self._save_and_reload_model(model) - - model.pop() - model.add(keras.layers.Dense(y_train.shape[-1], activation='softmax')) - - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(x_train, y_train, epochs=10, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.7) - model = self._save_and_reload_model(model) - _, val_acc = model.evaluate(x_train, y_train) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(x_train) - self.assertEqual(predictions.shape, (x_train.shape[0], 2)) + def test_sequential_save_and_pop(self): + # Test the following sequence of actions: + # - construct a Sequential model and train it + # - save it + # - load it + # - pop its last layer and add a new layer instead + # - continue training + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, test_samples=0, input_shape=(10,), num_classes=2 + ) + y_train = utils.to_categorical(y_train) + model = keras.Sequential( + [ + keras.layers.Dense(16, activation="relu"), + keras.layers.Dropout(0.1), + keras.layers.Dense(y_train.shape[-1], activation="softmax"), + ] + ) + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + x_train, + y_train, + epochs=1, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + model = self._save_and_reload_model(model) + + model.pop() + model.add(keras.layers.Dense(y_train.shape[-1], activation="softmax")) + + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + x_train, + y_train, + epochs=10, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + self.assertGreater(history.history["val_acc"][-1], 0.7) + model = self._save_and_reload_model(model) + _, val_acc = model.evaluate(x_train, y_train) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(x_train) + self.assertEqual(predictions.shape, (x_train.shape[0], 2)) # See b/122473407 @test_combinations.run_all_keras_modes(always_skip_v1=True) class TimeseriesClassificationIntegrationTest(test_combinations.TestCase): - - @test_combinations.run_with_all_model_types - def test_timeseries_classification(self): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(4, 10), - num_classes=2) - y_train = utils.to_categorical(y_train) - - layers = [ - keras.layers.LSTM(5, return_sequences=True), - keras.layers.GRU(y_train.shape[-1], activation='softmax') - ] - model = test_utils.get_model_from_layers( - layers, input_shape=x_train.shape[1:]) - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(x_train, y_train, epochs=15, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.7) - _, val_acc = model.evaluate(x_train, y_train) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(x_train) - self.assertEqual(predictions.shape, (x_train.shape[0], 2)) - - def test_timeseries_classification_sequential_tf_rnn(self): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(4, 10), - num_classes=2) - y_train = utils.to_categorical(y_train) - - with base_layer.keras_style_scope(): - model = keras.models.Sequential() - model.add(keras.layers.RNN(legacy_cells.LSTMCell(5), - return_sequences=True, - input_shape=x_train.shape[1:])) - model.add(keras.layers.RNN(legacy_cells.GRUCell(y_train.shape[-1], - activation='softmax', - dtype=tf.float32))) - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - history = model.fit(x_train, y_train, epochs=15, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.7) - _, val_acc = model.evaluate(x_train, y_train) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(x_train) - self.assertEqual(predictions.shape, (x_train.shape[0], 2)) + @test_combinations.run_with_all_model_types + def test_timeseries_classification(self): + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, + test_samples=0, + input_shape=(4, 10), + num_classes=2, + ) + y_train = utils.to_categorical(y_train) + + layers = [ + keras.layers.LSTM(5, return_sequences=True), + keras.layers.GRU(y_train.shape[-1], activation="softmax"), + ] + model = test_utils.get_model_from_layers( + layers, input_shape=x_train.shape[1:] + ) + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + x_train, + y_train, + epochs=15, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + self.assertGreater(history.history["val_acc"][-1], 0.7) + _, val_acc = model.evaluate(x_train, y_train) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(x_train) + self.assertEqual(predictions.shape, (x_train.shape[0], 2)) + + def test_timeseries_classification_sequential_tf_rnn(self): + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, + test_samples=0, + input_shape=(4, 10), + num_classes=2, + ) + y_train = utils.to_categorical(y_train) + + with base_layer.keras_style_scope(): + model = keras.models.Sequential() + model.add( + keras.layers.RNN( + legacy_cells.LSTMCell(5), + return_sequences=True, + input_shape=x_train.shape[1:], + ) + ) + model.add( + keras.layers.RNN( + legacy_cells.GRUCell( + y_train.shape[-1], + activation="softmax", + dtype=tf.float32, + ) + ) + ) + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + history = model.fit( + x_train, + y_train, + epochs=15, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + self.assertGreater(history.history["val_acc"][-1], 0.7) + _, val_acc = model.evaluate(x_train, y_train) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(x_train) + self.assertEqual(predictions.shape, (x_train.shape[0], 2)) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class ImageClassificationIntegrationTest(test_combinations.TestCase): - - def test_image_classification(self): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(10, 10, 3), - num_classes=2) - y_train = utils.to_categorical(y_train) - - layers = [ - keras.layers.Conv2D(4, 3, padding='same', activation='relu'), - keras.layers.Conv2D(8, 3, padding='same'), - keras.layers.BatchNormalization(), - keras.layers.Conv2D(8, 3, padding='same'), - keras.layers.Flatten(), - keras.layers.Dense(y_train.shape[-1], activation='softmax') - ] - model = test_utils.get_model_from_layers( - layers, input_shape=x_train.shape[1:]) - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - history = model.fit(x_train, y_train, epochs=10, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.7) - _, val_acc = model.evaluate(x_train, y_train) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(x_train) - self.assertEqual(predictions.shape, (x_train.shape[0], 2)) + def test_image_classification(self): + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, + test_samples=0, + input_shape=(10, 10, 3), + num_classes=2, + ) + y_train = utils.to_categorical(y_train) + + layers = [ + keras.layers.Conv2D(4, 3, padding="same", activation="relu"), + keras.layers.Conv2D(8, 3, padding="same"), + keras.layers.BatchNormalization(), + keras.layers.Conv2D(8, 3, padding="same"), + keras.layers.Flatten(), + keras.layers.Dense(y_train.shape[-1], activation="softmax"), + ] + model = test_utils.get_model_from_layers( + layers, input_shape=x_train.shape[1:] + ) + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + history = model.fit( + x_train, + y_train, + epochs=10, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + self.assertGreater(history.history["val_acc"][-1], 0.7) + _, val_acc = model.evaluate(x_train, y_train) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(x_train) + self.assertEqual(predictions.shape, (x_train.shape[0], 2)) @test_combinations.run_all_keras_modes class ActivationV2IntegrationTest(test_combinations.TestCase): - """Tests activation function V2 in model exporting and loading. - - This test is to verify in TF 2.x, when 'tf.nn.softmax' is used as an - activation function, its model exporting and loading work as expected. - Check b/123041942 for details. - """ - - def test_serialization_v2_model(self): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data( - train_samples=100, - test_samples=0, - input_shape=(10,), - num_classes=2) - y_train = utils.to_categorical(y_train) - - model = keras.Sequential([ - keras.layers.Flatten(input_shape=x_train.shape[1:]), - keras.layers.Dense(10, activation=tf.nn.relu), - # To mimic 'tf.nn.softmax' used in TF 2.x. - keras.layers.Dense(y_train.shape[-1], activation=tf.math.softmax), - ]) - - # Check if 'softmax' is in model.get_config(). - last_layer_activation = model.get_layer(index=2).get_config()['activation'] - self.assertEqual(last_layer_activation, 'softmax') - - model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.optimizer_v2.adam.Adam(0.005), - metrics=['accuracy'], - run_eagerly=test_utils.should_run_eagerly()) - model.fit(x_train, y_train, epochs=2, batch_size=10, - validation_data=(x_train, y_train), - verbose=2) - - output_path = os.path.join(self.get_temp_dir(), 'tf_keras_saved_model') - model.save(output_path, save_format='tf') - loaded_model = keras.models.load_model(output_path) - self.assertEqual(model.summary(), loaded_model.summary()) + """Tests activation function V2 in model exporting and loading. + + This test is to verify in TF 2.x, when 'tf.nn.softmax' is used as an + activation function, its model exporting and loading work as expected. + Check b/123041942 for details. + """ + + def test_serialization_v2_model(self): + np.random.seed(1337) + (x_train, y_train), _ = test_utils.get_test_data( + train_samples=100, test_samples=0, input_shape=(10,), num_classes=2 + ) + y_train = utils.to_categorical(y_train) + + model = keras.Sequential( + [ + keras.layers.Flatten(input_shape=x_train.shape[1:]), + keras.layers.Dense(10, activation=tf.nn.relu), + # To mimic 'tf.nn.softmax' used in TF 2.x. + keras.layers.Dense( + y_train.shape[-1], activation=tf.math.softmax + ), + ] + ) + + # Check if 'softmax' is in model.get_config(). + last_layer_activation = model.get_layer(index=2).get_config()[ + "activation" + ] + self.assertEqual(last_layer_activation, "softmax") + + model.compile( + loss="categorical_crossentropy", + optimizer=keras.optimizers.legacy.adam.Adam(0.005), + metrics=["accuracy"], + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit( + x_train, + y_train, + epochs=2, + batch_size=10, + validation_data=(x_train, y_train), + verbose=2, + ) + + output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") + model.save(output_path, save_format="tf") + loaded_model = keras.models.load_model(output_path) + self.assertEqual(model.summary(), loaded_model.summary()) @test_combinations.run_with_all_model_types @test_utils.run_v2_only class TokenClassificationIntegrationTest(test_combinations.TestCase): - """Tests a very simple token classification model. - - The main purpose of this test is to verify that everything works as expected - when input sequences have variable length, and batches are padded only to the - maximum length of each batch. This is very common in NLP, and results in the - sequence dimension varying with each batch step for both the features - and the labels. - """ - - def test_token_classification(self): - - def densify(x, y): - return x.to_tensor(), y.to_tensor() - - utils.set_random_seed(1337) - data = tf.ragged.stack([ - np.random.randint(low=0, high=16, size=random.randint(4, 16)) - for _ in range(100) - ]) - labels = tf.ragged.stack( - [np.random.randint(low=0, high=3, size=len(arr)) for arr in data]) - features_dataset = tf.data.Dataset.from_tensor_slices(data) - labels_dataset = tf.data.Dataset.from_tensor_slices(labels) - dataset = tf.data.Dataset.zip((features_dataset, labels_dataset)) - dataset = dataset.batch(batch_size=10) - dataset = dataset.map(densify) # Pads with 0 values by default - - layers = [ - keras.layers.Embedding(16, 4), - keras.layers.Conv1D(4, 5, padding='same', activation='relu'), - keras.layers.Conv1D(8, 5, padding='same'), - keras.layers.BatchNormalization(), - keras.layers.Conv1D(3, 5, padding='same', activation='softmax'), - ] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - model.compile( - loss='sparse_categorical_crossentropy', - optimizer='adam', - metrics=['acc']) - history = model.fit(dataset, epochs=10, validation_data=dataset, verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.5) - _, val_acc = model.evaluate(dataset) - self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) - predictions = model.predict(dataset) - self.assertIsInstance(predictions, tf.RaggedTensor) - self.assertEqual(predictions.shape[0], len(dataset) * 10) - self.assertEqual(predictions.shape[-1], 3) - -if __name__ == '__main__': - tf.test.main() + """Tests a very simple token classification model. + + The main purpose of this test is to verify that everything works as expected + when input sequences have variable length, and batches are padded only to + the maximum length of each batch. This is very common in NLP, and results in + the sequence dimension varying with each batch step for both the features + and the labels. + """ + + def test_token_classification(self): + def densify(x, y): + return x.to_tensor(), y.to_tensor() + + utils.set_random_seed(1337) + data = tf.ragged.stack( + [ + np.random.randint(low=0, high=16, size=random.randint(4, 16)) + for _ in range(100) + ] + ) + labels = tf.ragged.stack( + [np.random.randint(low=0, high=3, size=len(arr)) for arr in data] + ) + features_dataset = tf.data.Dataset.from_tensor_slices(data) + labels_dataset = tf.data.Dataset.from_tensor_slices(labels) + dataset = tf.data.Dataset.zip((features_dataset, labels_dataset)) + dataset = dataset.batch(batch_size=10) + dataset = dataset.map(densify) # Pads with 0 values by default + + layers = [ + keras.layers.Embedding(16, 4), + keras.layers.Conv1D(4, 5, padding="same", activation="relu"), + keras.layers.Conv1D(8, 5, padding="same"), + keras.layers.BatchNormalization(), + keras.layers.Conv1D(3, 5, padding="same", activation="softmax"), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + model.compile( + loss="sparse_categorical_crossentropy", + optimizer="adam", + metrics=["acc"], + ) + history = model.fit( + dataset, epochs=10, validation_data=dataset, verbose=2 + ) + self.assertGreater(history.history["val_acc"][-1], 0.5) + _, val_acc = model.evaluate(dataset) + self.assertAlmostEqual(history.history["val_acc"][-1], val_acc) + predictions = model.predict(dataset) + self.assertIsInstance(predictions, tf.RaggedTensor) + self.assertEqual(predictions.shape[0], len(dataset) * 10) + self.assertEqual(predictions.shape[-1], 3) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/keras_doctest.py b/keras/tests/keras_doctest.py index 139432849685..90f2c66b6d4e 100644 --- a/keras/tests/keras_doctest.py +++ b/keras/tests/keras_doctest.py @@ -21,138 +21,138 @@ import os import sys +import numpy as np +import tensorflow.compat.v2 as tf from absl import flags from absl.testing import absltest + from keras.testing_infra import keras_doctest_lib -import numpy as np -import tensorflow as tf -import tensorflow.compat.v2 as tf tf.compat.v1.enable_v2_behavior() # We put doctest after absltest so that it picks up the unittest monkeypatch. # Otherwise doctest tests aren't runnable at all. -import doctest # pylint: disable=g-import-not-at-top,g-bad-import-order +import doctest # noqa: E402 FLAGS = flags.FLAGS -flags.DEFINE_string('module', None, 'A specific module to run doctest on.') -flags.DEFINE_boolean('list', None, - 'List all the modules in the core package imported.') -flags.DEFINE_string('file', None, 'A specific file to run doctest on.') +flags.DEFINE_string("module", None, "A specific module to run doctest on.") +flags.DEFINE_boolean( + "list", None, "List all the modules in the core package imported." +) +flags.DEFINE_string("file", None, "A specific file to run doctest on.") -flags.mark_flags_as_mutual_exclusive(['module', 'file']) -flags.mark_flags_as_mutual_exclusive(['list', 'file']) +flags.mark_flags_as_mutual_exclusive(["module", "file"]) +flags.mark_flags_as_mutual_exclusive(["list", "file"]) -PACKAGE = 'keras.' +PACKAGE = "keras." def find_modules(): - """Finds all the modules in the core package imported. + """Finds all the modules in the core package imported. - Returns: - A list containing all the modules in tensorflow.python. - """ + Returns: + A list containing all the modules in tensorflow.python. + """ - tf_modules = [] - for name, module in sys.modules.items(): - if name.startswith(PACKAGE): - tf_modules.append(module) + tf_modules = [] + for name, module in sys.modules.items(): + if name.startswith(PACKAGE): + tf_modules.append(module) - return tf_modules + return tf_modules def filter_on_submodules(all_modules, submodule): - """Filters all the modules based on the module flag. + """Filters all the modules based on the module flag. - The module flag has to be relative to the core package imported. - For example, if `submodule=keras.layers` then, this function will return - all the modules in the submodule. + The module flag has to be relative to the core package imported. + For example, if `submodule=keras.layers` then, this function will return + all the modules in the submodule. - Args: - all_modules: All the modules in the core package. - submodule: Submodule to filter from all the modules. + Args: + all_modules: All the modules in the core package. + submodule: Submodule to filter from all the modules. - Returns: - All the modules in the submodule. - """ + Returns: + All the modules in the submodule. + """ - filtered_modules = [ - mod for mod in all_modules if PACKAGE + submodule in mod.__name__ - ] - return filtered_modules + filtered_modules = [ + mod for mod in all_modules if PACKAGE + submodule in mod.__name__ + ] + return filtered_modules def get_module_and_inject_docstring(file_path): - """Replaces the docstring of the module with the changed file's content. + """Replaces the docstring of the module with the changed file's content. - Args: - file_path: Path to the file + Args: + file_path: Path to the file - Returns: - A list containing the module changed by the file. - """ + Returns: + A list containing the module changed by the file. + """ - file_path = os.path.abspath(file_path) - mod_index = file_path.find(PACKAGE.replace('.', os.sep)) - file_mod_name, _ = os.path.splitext(file_path[mod_index:]) - file_module = sys.modules[file_mod_name.replace(os.sep, '.')] + file_path = os.path.abspath(file_path) + mod_index = file_path.find(PACKAGE.replace(".", os.sep)) + file_mod_name, _ = os.path.splitext(file_path[mod_index:]) + file_module = sys.modules[file_mod_name.replace(os.sep, ".")] - with open(file_path, 'r') as f: - content = f.read() + with open(file_path, "r") as f: + content = f.read() - file_module.__doc__ = content + file_module.__doc__ = content - return [file_module] + return [file_module] class TfTestCase(tf.test.TestCase): + def set_up(self, _): + self.setUp() - def set_up(self, _): - self.setUp() - - def tear_down(self, _): - self.tearDown() + def tear_down(self, _): + self.tearDown() def load_tests(unused_loader, tests, unused_ignore): - """Loads all the tests in the docstrings and runs them.""" - - tf_modules = find_modules() - - if FLAGS.module: - tf_modules = filter_on_submodules(tf_modules, FLAGS.module) - - if FLAGS.list: - print('**************************************************') - for mod in tf_modules: - print(mod.__name__) - print('**************************************************') + """Loads all the tests in the docstrings and runs them.""" + + tf_modules = find_modules() + + if FLAGS.module: + tf_modules = filter_on_submodules(tf_modules, FLAGS.module) + + if FLAGS.list: + print("**************************************************") + for mod in tf_modules: + print(mod.__name__) + print("**************************************************") + return tests + + if FLAGS.file: + tf_modules = get_module_and_inject_docstring(FLAGS.file) + + for module in tf_modules: + testcase = TfTestCase() + tests.addTests( + doctest.DocTestSuite( + module, + test_finder=doctest.DocTestFinder(exclude_empty=False), + extraglobs={"tf": tf, "np": np, "os": os}, + setUp=testcase.set_up, + tearDown=testcase.tear_down, + checker=keras_doctest_lib.KerasDoctestOutputChecker(), + optionflags=( + doctest.ELLIPSIS + | doctest.NORMALIZE_WHITESPACE + | doctest.IGNORE_EXCEPTION_DETAIL + | doctest.DONT_ACCEPT_BLANKLINE + ), + ) + ) return tests - if FLAGS.file: - tf_modules = get_module_and_inject_docstring(FLAGS.file) - - for module in tf_modules: - testcase = TfTestCase() - tests.addTests( - doctest.DocTestSuite( - module, - test_finder=doctest.DocTestFinder(exclude_empty=False), - extraglobs={ - 'tf': tf, - 'np': np, - 'os': os - }, - setUp=testcase.set_up, - tearDown=testcase.tear_down, - checker=keras_doctest_lib.KerasDoctestOutputChecker(), - optionflags=(doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE - | doctest.IGNORE_EXCEPTION_DETAIL - | doctest.DONT_ACCEPT_BLANKLINE), - )) - return tests - - -if __name__ == '__main__': - absltest.main() + +if __name__ == "__main__": + absltest.main() diff --git a/keras/tests/memory_checker_test.py b/keras/tests/memory_checker_test.py index 429aee5f2d8a..23373a20a7d3 100644 --- a/keras/tests/memory_checker_test.py +++ b/keras/tests/memory_checker_test.py @@ -13,64 +13,70 @@ # limitations under the License. # ============================================================================= +import tensorflow.compat.v2 as tf + import keras -import tensorflow.compat.v2 as tf -from tensorflow.python.framework.memory_checker import MemoryChecker +# isort: off +from tensorflow.python.framework.memory_checker import ( + MemoryChecker, +) class MemoryCheckerTest(tf.test.TestCase): - - def testKerasBasic(self): - # TODO(kkb): Fix the slowness on Forge. - self.skipTest('This test is too slow on Forge so disabled for now.') - - x = tf.zeros([1, 1]) - y = tf.constant([[3]]) - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_dim=1)) - model.compile(loss='mean_squared_error') - - with MemoryChecker() as memory_checker: - for _ in range(10): - model.fit(x, y) - model.evaluate(x, y) - memory_checker.record_snapshot() - - memory_checker.report() - memory_checker.assert_no_leak_if_all_possibly_except_one() - - def testKerasAdvanced(self): - # TODO(kkb): Fix the slowness on Forge. - self.skipTest('This test is too slow on Forge so disabled for now.') - - # A real world example taken from the following. - # https://github.com/tensorflow/tensorflow/issues/32500 - # b/142150794 - - with MemoryChecker() as memory_checker: - rows = 6 - columns = 7 - model = keras.Sequential([ - keras.layers.Flatten(input_shape=[rows * columns, 3]), - keras.layers.Dense(7, input_shape=[rows * columns * 3]), - ]) - - model.compile( - optimizer=keras.optimizers.optimizer_v2.gradient_descent.SGD(lr=0.01), - loss='mean_squared_error', - metrics=['accuracy']) - states = [[1] * rows * columns for _ in range(20)] - f = tf.one_hot(states, dtype='float32', depth=3) - - for _ in range(20): - model.predict(f, steps=10) - memory_checker.record_snapshot() - - memory_checker.report() - memory_checker.assert_no_leak_if_all_possibly_except_one() - - -if __name__ == '__main__': - tf.compat.v1.enable_eager_execution() - tf.test.main() + def testKerasBasic(self): + # TODO(kkb): Fix the slowness on Forge. + self.skipTest("This test is too slow on Forge so disabled for now.") + + x = tf.zeros([1, 1]) + y = tf.constant([[3]]) + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_dim=1)) + model.compile(loss="mean_squared_error") + + with MemoryChecker() as memory_checker: + for _ in range(10): + model.fit(x, y) + model.evaluate(x, y) + memory_checker.record_snapshot() + + memory_checker.report() + memory_checker.assert_no_leak_if_all_possibly_except_one() + + def testKerasAdvanced(self): + # TODO(kkb): Fix the slowness on Forge. + self.skipTest("This test is too slow on Forge so disabled for now.") + + # A real world example taken from the following. + # https://github.com/tensorflow/tensorflow/issues/32500 + # b/142150794 + + with MemoryChecker() as memory_checker: + rows = 6 + columns = 7 + model = keras.Sequential( + [ + keras.layers.Flatten(input_shape=[rows * columns, 3]), + keras.layers.Dense(7, input_shape=[rows * columns * 3]), + ] + ) + + model.compile( + optimizer=keras.optimizers.legacy.gradient_descent.SGD(lr=0.01), + loss="mean_squared_error", + metrics=["accuracy"], + ) + states = [[1] * rows * columns for _ in range(20)] + f = tf.one_hot(states, dtype="float32", depth=3) + + for _ in range(20): + model.predict(f, steps=10) + memory_checker.record_snapshot() + + memory_checker.report() + memory_checker.assert_no_leak_if_all_possibly_except_one() + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/tests/memory_test.py b/keras/tests/memory_test.py index ffba441cafe3..4f3cb4f9cea3 100644 --- a/keras/tests/memory_test.py +++ b/keras/tests/memory_test.py @@ -23,52 +23,55 @@ import tensorflow.compat.v2 as tf import keras -from tensorflow.python.eager.memory_tests import memory_test_util + +# isort: off +from tensorflow.python.eager.memory_tests import ( + memory_test_util, +) class SingleLayerNet(keras.Model): - """Simple keras model used to ensure that there are no leaks.""" + """Simple keras model used to ensure that there are no leaks.""" - def __init__(self): - super().__init__() - self.fc1 = keras.layers.Dense(5) + def __init__(self): + super().__init__() + self.fc1 = keras.layers.Dense(5) - def call(self, x): - return self.fc1(x) + def call(self, x): + return self.fc1(x) class MemoryTest(tf.test.TestCase): + def testMemoryLeakInSimpleModelForwardOnly(self): + if not memory_test_util.memory_profiler_is_available(): + self.skipTest("memory_profiler required to run this test") - def testMemoryLeakInSimpleModelForwardOnly(self): - if not memory_test_util.memory_profiler_is_available(): - self.skipTest("memory_profiler required to run this test") - - inputs = tf.zeros([32, 100], tf.float32) - net = SingleLayerNet() + inputs = tf.zeros([1000, 1000], tf.float32) + net = SingleLayerNet() - def f(): - with tf.GradientTape(): - net(inputs) + def f(): + with tf.GradientTape(): + net(inputs) - memory_test_util.assert_no_leak(f) + memory_test_util.assert_no_leak(f, num_iters=1000) - def testMemoryLeakInSimpleModelForwardAndBackward(self): - if not memory_test_util.memory_profiler_is_available(): - self.skipTest("memory_profiler required to run this test") + def testMemoryLeakInSimpleModelForwardAndBackward(self): + if not memory_test_util.memory_profiler_is_available(): + self.skipTest("memory_profiler required to run this test") - inputs = tf.zeros([32, 100], tf.float32) - net = SingleLayerNet() + inputs = tf.zeros([1000, 1000], tf.float32) + net = SingleLayerNet() - def f(): - with tf.GradientTape() as tape: - result = net(inputs) + def f(): + with tf.GradientTape() as tape: + result = net(inputs) - tape.gradient(result, net.variables) + tape.gradient(result, net.variables) - del tape + del tape - memory_test_util.assert_no_leak(f) + memory_test_util.assert_no_leak(f, num_iters=1000) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/tests/model_architectures.py b/keras/tests/model_architectures.py index e6237dfe4ec8..b3bd88641990 100644 --- a/keras/tests/model_architectures.py +++ b/keras/tests/model_architectures.py @@ -19,275 +19,297 @@ import keras # Declaring namedtuple() -ModelFn = collections.namedtuple('ModelFn', - ['model', 'input_shape', 'target_shape']) +ModelFn = collections.namedtuple( + "ModelFn", ["model", "input_shape", "target_shape"] +) def basic_sequential(): - """Basic sequential model.""" - model = keras.Sequential([ - keras.layers.Dense(3, activation='relu', input_shape=(3,)), - keras.layers.Dense(2, activation='softmax'), - ]) - return ModelFn(model, (None, 3), (None, 2)) + """Basic sequential model.""" + model = keras.Sequential( + [ + keras.layers.Dense(3, activation="relu", input_shape=(3,)), + keras.layers.Dense(2, activation="softmax"), + ] + ) + return ModelFn(model, (None, 3), (None, 2)) def basic_sequential_deferred(): - """Sequential model with deferred input shape.""" - model = keras.Sequential([ - keras.layers.Dense(3, activation='relu'), - keras.layers.Dense(2, activation='softmax'), - ]) - return ModelFn(model, (None, 3), (None, 2)) + """Sequential model with deferred input shape.""" + model = keras.Sequential( + [ + keras.layers.Dense(3, activation="relu"), + keras.layers.Dense(2, activation="softmax"), + ] + ) + return ModelFn(model, (None, 3), (None, 2)) def stacked_rnn(): - """Stacked RNN model.""" - inputs = keras.Input((None, 3)) - layer = keras.layers.RNN([keras.layers.LSTMCell(2) for _ in range(3)]) - x = layer(inputs) - outputs = keras.layers.Dense(2)(x) - model = keras.Model(inputs, outputs) - return ModelFn(model, (None, 4, 3), (None, 2)) + """Stacked RNN model.""" + inputs = keras.Input((None, 3)) + layer = keras.layers.RNN([keras.layers.LSTMCell(2) for _ in range(3)]) + x = layer(inputs) + outputs = keras.layers.Dense(2)(x) + model = keras.Model(inputs, outputs) + return ModelFn(model, (None, 4, 3), (None, 2)) def lstm(): - """LSTM model.""" - inputs = keras.Input((None, 3)) - x = keras.layers.LSTM(4, return_sequences=True)(inputs) - x = keras.layers.LSTM(3, return_sequences=True)(x) - x = keras.layers.LSTM(2, return_sequences=False)(x) - outputs = keras.layers.Dense(2)(x) - model = keras.Model(inputs, outputs) - return ModelFn(model, (None, 4, 3), (None, 2)) + """LSTM model.""" + inputs = keras.Input((None, 3)) + x = keras.layers.LSTM(4, return_sequences=True)(inputs) + x = keras.layers.LSTM(3, return_sequences=True)(x) + x = keras.layers.LSTM(2, return_sequences=False)(x) + outputs = keras.layers.Dense(2)(x) + model = keras.Model(inputs, outputs) + return ModelFn(model, (None, 4, 3), (None, 2)) def multi_input_multi_output(): - """Multi-input Multi-output model.""" - body_input = keras.Input(shape=(None,), name='body') - tags_input = keras.Input(shape=(2,), name='tags') + """Multi-input Multi-output model.""" + body_input = keras.Input(shape=(None,), name="body") + tags_input = keras.Input(shape=(2,), name="tags") - x = keras.layers.Embedding(10, 4)(body_input) - body_features = keras.layers.LSTM(5)(x) - x = keras.layers.concatenate([body_features, tags_input]) + x = keras.layers.Embedding(10, 4)(body_input) + body_features = keras.layers.LSTM(5)(x) + x = keras.layers.concatenate([body_features, tags_input]) - pred_1 = keras.layers.Dense(2, activation='sigmoid', name='priority')(x) - pred_2 = keras.layers.Dense(3, activation='softmax', name='department')(x) + pred_1 = keras.layers.Dense(2, activation="sigmoid", name="priority")(x) + pred_2 = keras.layers.Dense(3, activation="softmax", name="department")(x) - model = keras.Model( - inputs=[body_input, tags_input], outputs=[pred_1, pred_2]) - return ModelFn(model, [(None, 1), (None, 2)], [(None, 2), (None, 3)]) + model = keras.Model( + inputs=[body_input, tags_input], outputs=[pred_1, pred_2] + ) + return ModelFn(model, [(None, 1), (None, 2)], [(None, 2), (None, 3)]) def nested_sequential_in_functional(): - """A sequential model nested in a functional model.""" - inner_model = keras.Sequential([ - keras.layers.Dense(3, activation='relu', input_shape=(3,)), - keras.layers.Dense(2, activation='relu'), - ]) + """A sequential model nested in a functional model.""" + inner_model = keras.Sequential( + [ + keras.layers.Dense(3, activation="relu", input_shape=(3,)), + keras.layers.Dense(2, activation="relu"), + ] + ) - inputs = keras.Input(shape=(3,)) - x = inner_model(inputs) - outputs = keras.layers.Dense(2, activation='softmax')(x) - model = keras.Model(inputs, outputs) - return ModelFn(model, (None, 3), (None, 2)) + inputs = keras.Input(shape=(3,)) + x = inner_model(inputs) + outputs = keras.layers.Dense(2, activation="softmax")(x) + model = keras.Model(inputs, outputs) + return ModelFn(model, (None, 3), (None, 2)) def seq_to_seq(): - """Sequence to sequence model.""" - num_encoder_tokens = 3 - num_decoder_tokens = 3 - latent_dim = 2 - encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) - encoder = keras.layers.LSTM(latent_dim, return_state=True) - _, state_h, state_c = encoder(encoder_inputs) - encoder_states = [state_h, state_c] - decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) - decoder_lstm = keras.layers.LSTM( - latent_dim, return_sequences=True, return_state=True) - decoder_outputs, _, _ = decoder_lstm( - decoder_inputs, initial_state=encoder_states) - decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') - decoder_outputs = decoder_dense(decoder_outputs) - model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) - return ModelFn( - model, [(None, 2, num_encoder_tokens), (None, 2, num_decoder_tokens)], - (None, 2, num_decoder_tokens)) + """Sequence to sequence model.""" + num_encoder_tokens = 3 + num_decoder_tokens = 3 + latent_dim = 2 + encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) + encoder = keras.layers.LSTM(latent_dim, return_state=True) + _, state_h, state_c = encoder(encoder_inputs) + encoder_states = [state_h, state_c] + decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) + decoder_lstm = keras.layers.LSTM( + latent_dim, return_sequences=True, return_state=True + ) + decoder_outputs, _, _ = decoder_lstm( + decoder_inputs, initial_state=encoder_states + ) + decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax") + decoder_outputs = decoder_dense(decoder_outputs) + model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + return ModelFn( + model, + [(None, 2, num_encoder_tokens), (None, 2, num_decoder_tokens)], + (None, 2, num_decoder_tokens), + ) def shared_layer_functional(): - """Shared layer in a functional model.""" - main_input = keras.Input(shape=(10,), dtype='int32', name='main_input') - x = keras.layers.Embedding( - output_dim=5, input_dim=4, input_length=10)(main_input) - lstm_out = keras.layers.LSTM(3)(x) - auxiliary_output = keras.layers.Dense( - 1, activation='sigmoid', name='aux_output')(lstm_out) - auxiliary_input = keras.Input(shape=(5,), name='aux_input') - x = keras.layers.concatenate([lstm_out, auxiliary_input]) - x = keras.layers.Dense(2, activation='relu')(x) - main_output = keras.layers.Dense( - 1, activation='sigmoid', name='main_output')(x) - model = keras.Model( - inputs=[main_input, auxiliary_input], - outputs=[main_output, auxiliary_output]) - return ModelFn(model, [(None, 10), (None, 5)], [(None, 1), (None, 1)]) + """Shared layer in a functional model.""" + main_input = keras.Input(shape=(10,), dtype="int32", name="main_input") + x = keras.layers.Embedding(output_dim=5, input_dim=4, input_length=10)( + main_input + ) + lstm_out = keras.layers.LSTM(3)(x) + auxiliary_output = keras.layers.Dense( + 1, activation="sigmoid", name="aux_output" + )(lstm_out) + auxiliary_input = keras.Input(shape=(5,), name="aux_input") + x = keras.layers.concatenate([lstm_out, auxiliary_input]) + x = keras.layers.Dense(2, activation="relu")(x) + main_output = keras.layers.Dense( + 1, activation="sigmoid", name="main_output" + )(x) + model = keras.Model( + inputs=[main_input, auxiliary_input], + outputs=[main_output, auxiliary_output], + ) + return ModelFn(model, [(None, 10), (None, 5)], [(None, 1), (None, 1)]) def shared_sequential(): - """Shared sequential model in a functional model.""" - inner_model = keras.Sequential([ - keras.layers.Conv2D(2, 3, activation='relu'), - keras.layers.Conv2D(2, 3, activation='relu'), - ]) - inputs_1 = keras.Input((5, 5, 3)) - inputs_2 = keras.Input((5, 5, 3)) - x1 = inner_model(inputs_1) - x2 = inner_model(inputs_2) - x = keras.layers.concatenate([x1, x2]) - outputs = keras.layers.GlobalAveragePooling2D()(x) - model = keras.Model([inputs_1, inputs_2], outputs) - return ModelFn(model, [(None, 5, 5, 3), (None, 5, 5, 3)], (None, 4)) + """Shared sequential model in a functional model.""" + inner_model = keras.Sequential( + [ + keras.layers.Conv2D(2, 3, activation="relu"), + keras.layers.Conv2D(2, 3, activation="relu"), + ] + ) + inputs_1 = keras.Input((5, 5, 3)) + inputs_2 = keras.Input((5, 5, 3)) + x1 = inner_model(inputs_1) + x2 = inner_model(inputs_2) + x = keras.layers.concatenate([x1, x2]) + outputs = keras.layers.GlobalAveragePooling2D()(x) + model = keras.Model([inputs_1, inputs_2], outputs) + return ModelFn(model, [(None, 5, 5, 3), (None, 5, 5, 3)], (None, 4)) class MySubclassModel(keras.Model): - """A subclass model.""" + """A subclass model.""" - def __init__(self, input_dim=3): - super().__init__(name='my_subclass_model') - self._config = {'input_dim': input_dim} - self.dense1 = keras.layers.Dense(8, activation='relu') - self.dense2 = keras.layers.Dense(2, activation='softmax') - self.bn = keras.layers.BatchNormalization() - self.dp = keras.layers.Dropout(0.5) + def __init__(self, input_dim=3): + super().__init__(name="my_subclass_model") + self._config = {"input_dim": input_dim} + self.dense1 = keras.layers.Dense(8, activation="relu") + self.dense2 = keras.layers.Dense(2, activation="softmax") + self.bn = keras.layers.BatchNormalization() + self.dp = keras.layers.Dropout(0.5) - def call(self, inputs, **kwargs): - x = self.dense1(inputs) - x = self.dp(x) - x = self.bn(x) - return self.dense2(x) + def call(self, inputs, **kwargs): + x = self.dense1(inputs) + x = self.dp(x) + x = self.bn(x) + return self.dense2(x) - def get_config(self): - return self._config + def get_config(self): + return self._config - @classmethod - def from_config(cls, config): - return cls(**config) + @classmethod + def from_config(cls, config): + return cls(**config) def nested_subclassed_model(): - """A subclass model nested in another subclass model.""" + """A subclass model nested in another subclass model.""" - class NestedSubclassModel(keras.Model): - """A nested subclass model.""" + class NestedSubclassModel(keras.Model): + """A nested subclass model.""" - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(4, activation='relu') - self.dense2 = keras.layers.Dense(2, activation='relu') - self.bn = keras.layers.BatchNormalization() - self.inner_subclass_model = MySubclassModel() + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(4, activation="relu") + self.dense2 = keras.layers.Dense(2, activation="relu") + self.bn = keras.layers.BatchNormalization() + self.inner_subclass_model = MySubclassModel() - def call(self, inputs): - x = self.dense1(inputs) - x = self.bn(x) - x = self.inner_subclass_model(x) - return self.dense2(x) + def call(self, inputs): + x = self.dense1(inputs) + x = self.bn(x) + x = self.inner_subclass_model(x) + return self.dense2(x) - return ModelFn(NestedSubclassModel(), (None, 3), (None, 2)) + return ModelFn(NestedSubclassModel(), (None, 3), (None, 2)) def nested_subclassed_in_functional_model(): - """A subclass model nested in a functional model.""" - inner_subclass_model = MySubclassModel() - inputs = keras.Input(shape=(3,)) - x = inner_subclass_model(inputs) - x = keras.layers.BatchNormalization()(x) - outputs = keras.layers.Dense(2, activation='softmax')(x) - model = keras.Model(inputs, outputs) - return ModelFn(model, (None, 3), (None, 2)) + """A subclass model nested in a functional model.""" + inner_subclass_model = MySubclassModel() + inputs = keras.Input(shape=(3,)) + x = inner_subclass_model(inputs) + x = keras.layers.BatchNormalization()(x) + outputs = keras.layers.Dense(2, activation="softmax")(x) + model = keras.Model(inputs, outputs) + return ModelFn(model, (None, 3), (None, 2)) def nested_functional_in_subclassed_model(): - """A functional model nested in a subclass model.""" - def get_functional_model(): - inputs = keras.Input(shape=(4,)) - x = keras.layers.Dense(4, activation='relu')(inputs) - x = keras.layers.BatchNormalization()(x) - outputs = keras.layers.Dense(2)(x) - return keras.Model(inputs, outputs) + """A functional model nested in a subclass model.""" - class NestedFunctionalInSubclassModel(keras.Model): - """A functional nested in subclass model.""" + def get_functional_model(): + inputs = keras.Input(shape=(4,)) + x = keras.layers.Dense(4, activation="relu")(inputs) + x = keras.layers.BatchNormalization()(x) + outputs = keras.layers.Dense(2)(x) + return keras.Model(inputs, outputs) - def __init__(self): - super().__init__( - name='nested_functional_in_subclassed_model') - self.dense1 = keras.layers.Dense(4, activation='relu') - self.dense2 = keras.layers.Dense(2, activation='relu') - self.inner_functional_model = get_functional_model() + class NestedFunctionalInSubclassModel(keras.Model): + """A functional nested in subclass model.""" - def call(self, inputs): - x = self.dense1(inputs) - x = self.inner_functional_model(x) - return self.dense2(x) - return ModelFn(NestedFunctionalInSubclassModel(), (None, 3), (None, 2)) + def __init__(self): + super().__init__(name="nested_functional_in_subclassed_model") + self.dense1 = keras.layers.Dense(4, activation="relu") + self.dense2 = keras.layers.Dense(2, activation="relu") + self.inner_functional_model = get_functional_model() + + def call(self, inputs): + x = self.dense1(inputs) + x = self.inner_functional_model(x) + return self.dense2(x) + + return ModelFn(NestedFunctionalInSubclassModel(), (None, 3), (None, 2)) def shared_layer_subclassed_model(): - """Shared layer in a subclass model.""" + """Shared layer in a subclass model.""" + + class SharedLayerSubclassModel(keras.Model): + """A subclass model with shared layers.""" - class SharedLayerSubclassModel(keras.Model): - """A subclass model with shared layers.""" + def __init__(self): + super().__init__(name="shared_layer_subclass_model") + self.dense = keras.layers.Dense(3, activation="relu") + self.dp = keras.layers.Dropout(0.5) + self.bn = keras.layers.BatchNormalization() - def __init__(self): - super().__init__( - name='shared_layer_subclass_model') - self.dense = keras.layers.Dense(3, activation='relu') - self.dp = keras.layers.Dropout(0.5) - self.bn = keras.layers.BatchNormalization() + def call(self, inputs): + x = self.dense(inputs) + x = self.dp(x) + x = self.bn(x) + return self.dense(x) - def call(self, inputs): - x = self.dense(inputs) - x = self.dp(x) - x = self.bn(x) - return self.dense(x) - return ModelFn(SharedLayerSubclassModel(), (None, 3), (None, 3)) + return ModelFn(SharedLayerSubclassModel(), (None, 3), (None, 3)) def functional_with_keyword_args(): - """A functional model with keyword args.""" - inputs = keras.Input(shape=(3,)) - x = keras.layers.Dense(4)(inputs) - x = keras.layers.BatchNormalization()(x) - outputs = keras.layers.Dense(2)(x) + """A functional model with keyword args.""" + inputs = keras.Input(shape=(3,)) + x = keras.layers.Dense(4)(inputs) + x = keras.layers.BatchNormalization()(x) + outputs = keras.layers.Dense(2)(x) - model = keras.Model(inputs, outputs, name='m', trainable=False) - return ModelFn(model, (None, 3), (None, 2)) + model = keras.Model(inputs, outputs, name="m", trainable=False) + return ModelFn(model, (None, 3), (None, 2)) ALL_MODELS = [ - ('basic_sequential', basic_sequential), - ('basic_sequential_deferred', basic_sequential_deferred), - ('stacked_rnn', stacked_rnn), - ('lstm', lstm), - ('multi_input_multi_output', multi_input_multi_output), - ('nested_sequential_in_functional', nested_sequential_in_functional), - ('seq_to_seq', seq_to_seq), - ('shared_layer_functional', shared_layer_functional), - ('shared_sequential', shared_sequential), - ('nested_subclassed_model', nested_subclassed_model), - ('nested_subclassed_in_functional_model', - nested_subclassed_in_functional_model), - ('nested_functional_in_subclassed_model', - nested_functional_in_subclassed_model), - ('shared_layer_subclassed_model', shared_layer_subclassed_model), - ('functional_with_keyword_args', functional_with_keyword_args) + ("basic_sequential", basic_sequential), + ("basic_sequential_deferred", basic_sequential_deferred), + ("stacked_rnn", stacked_rnn), + ("lstm", lstm), + ("multi_input_multi_output", multi_input_multi_output), + ("nested_sequential_in_functional", nested_sequential_in_functional), + ("seq_to_seq", seq_to_seq), + ("shared_layer_functional", shared_layer_functional), + ("shared_sequential", shared_sequential), + ("nested_subclassed_model", nested_subclassed_model), + ( + "nested_subclassed_in_functional_model", + nested_subclassed_in_functional_model, + ), + ( + "nested_functional_in_subclassed_model", + nested_functional_in_subclassed_model, + ), + ("shared_layer_subclassed_model", shared_layer_subclassed_model), + ("functional_with_keyword_args", functional_with_keyword_args), ] def get_models(exclude_models=None): - """Get all models excluding the specified ones.""" - models = [model for model in ALL_MODELS - if model[0] not in exclude_models] - return models + """Get all models excluding the specified ones.""" + models = [model for model in ALL_MODELS if model[0] not in exclude_models] + return models diff --git a/keras/tests/model_architectures_test.py b/keras/tests/model_architectures_test.py index b8f4637d7430..73193c3b1117 100644 --- a/keras/tests/model_architectures_test.py +++ b/keras/tests/model_architectures_test.py @@ -12,97 +12,96 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -"""Tests for saving/loading function for keras Model.""" -import tensorflow.compat.v2 as tf +"""Tests for saving/loading function for keras Model.""" import os import shutil -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from keras.testing_infra import test_combinations from keras.optimizers import optimizer_v1 +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.tests import model_architectures @test_combinations.run_with_all_saved_model_formats class TestModelArchitectures(test_combinations.TestCase): - - def _save_model_dir(self, dirname='saved_model'): - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - return os.path.join(temp_dir, dirname) - - def get_test_data(self, input_shape, target_shape): - """Generate test dataset for testing.""" - if isinstance(input_shape, list): - x = [ - np.random.random((2,) + input_shape[i][1:]) - for i in range(len(input_shape)) - ] - else: - x = np.random.random((2,) + input_shape[1:]) - - if isinstance(target_shape, list): - y = [ - np.random.random((2,) + target_shape[i][1:]) - for i in range(len(target_shape)) - ] - else: - y = np.random.random((2,) + target_shape[1:]) - - return x, y - - def get_custom_objects(self): - """Define custom_objects.""" - - class CustomOpt(optimizer_v1.SGD): - pass - - def custom_loss(y_true, y_pred): - return keras.losses.mse(y_true, y_pred) - - return {'CustomOpt': CustomOpt, - 'custom_loss': custom_loss} - - @parameterized.named_parameters(*model_architectures.ALL_MODELS) - def test_basic_saving_and_loading(self, model_fn): - save_format = test_utils.get_save_format() - custom_objects = self.get_custom_objects() - if 'subclassed_in_functional' in model_fn.__name__: - subclass_custom_objects = { - 'MySubclassModel': - model_architectures.MySubclassModel, - } - custom_objects.update(subclass_custom_objects) - elif ('subclassed' in model_fn.__name__ and save_format == 'h5'): - self.skipTest('Saving the model to HDF5 format requires the model to be ' - 'a Functional model or a Sequential model.') - - saved_model_dir = self._save_model_dir() - model_data = model_fn() - model = model_data.model - x_test, y_test = self.get_test_data( - model_data.input_shape, model_data.target_shape) - model.compile('rmsprop', 'mse') - model.train_on_batch(x_test, y_test) - - # Save model. - out1 = model.predict(x_test) - keras.models.save_model(model, saved_model_dir, save_format=save_format) - # Load model. - loaded_model = keras.models.load_model( - saved_model_dir, - custom_objects=custom_objects) - out2 = loaded_model.predict(x_test) - - self.assertAllClose(out1, out2, atol=1e-05) - - -if __name__ == '__main__': - tf.test.main() + def _save_model_dir(self, dirname="saved_model"): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + return os.path.join(temp_dir, dirname) + + def get_test_data(self, input_shape, target_shape): + """Generate test dataset for testing.""" + if isinstance(input_shape, list): + x = [ + np.random.random((2,) + input_shape[i][1:]) + for i in range(len(input_shape)) + ] + else: + x = np.random.random((2,) + input_shape[1:]) + + if isinstance(target_shape, list): + y = [ + np.random.random((2,) + target_shape[i][1:]) + for i in range(len(target_shape)) + ] + else: + y = np.random.random((2,) + target_shape[1:]) + + return x, y + + def get_custom_objects(self): + """Define custom_objects.""" + + class CustomOpt(optimizer_v1.SGD): + pass + + def custom_loss(y_true, y_pred): + return keras.losses.mse(y_true, y_pred) + + return {"CustomOpt": CustomOpt, "custom_loss": custom_loss} + + @parameterized.named_parameters(*model_architectures.ALL_MODELS) + def test_basic_saving_and_loading(self, model_fn): + save_format = test_utils.get_save_format() + custom_objects = self.get_custom_objects() + if "subclassed_in_functional" in model_fn.__name__: + subclass_custom_objects = { + "MySubclassModel": model_architectures.MySubclassModel, + } + custom_objects.update(subclass_custom_objects) + elif "subclassed" in model_fn.__name__ and save_format == "h5": + self.skipTest( + "Saving the model to HDF5 format requires the model to be " + "a Functional model or a Sequential model." + ) + + saved_model_dir = self._save_model_dir() + model_data = model_fn() + model = model_data.model + x_test, y_test = self.get_test_data( + model_data.input_shape, model_data.target_shape + ) + model.compile("rmsprop", "mse") + model.train_on_batch(x_test, y_test) + + # Save model. + out1 = model.predict(x_test) + keras.models.save_model(model, saved_model_dir, save_format=save_format) + # Load model. + loaded_model = keras.models.load_model( + saved_model_dir, custom_objects=custom_objects + ) + out2 = loaded_model.predict(x_test) + + self.assertAllClose(out1, out2, atol=1e-05) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/model_subclassing_compiled_test.py b/keras/tests/model_subclassing_compiled_test.py index fd60b326d9fb..1a93734f4f20 100644 --- a/keras/tests/model_subclassing_compiled_test.py +++ b/keras/tests/model_subclassing_compiled_test.py @@ -14,11 +14,10 @@ # ============================================================================== """Tests for compiled Model subclassing.""" -import tensorflow.compat.v2 as tf - import os import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.testing_infra import test_combinations @@ -26,413 +25,455 @@ from keras.tests import model_subclassing_test_util as model_util try: - import h5py # pylint:disable=g-import-not-at-top + import h5py except ImportError: - h5py = None + h5py = None @test_combinations.run_all_keras_modes class ModelSubclassCompiledTest(test_combinations.TestCase): - - def test_single_io_workflow_with_np_arrays(self): - num_classes = 2 - num_samples = 100 - input_dim = 50 - - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=['acc', keras.metrics.CategoricalAccuracy()], - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) - - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) - - def test_multi_io_workflow_with_np_arrays(self): - num_classes = (2, 3) - num_samples = 1000 - input_dim = 50 - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_dp=True, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) - - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - _ = model.evaluate([x1, x2], [y1, y2], verbose=0) - - def test_single_io_workflow_with_datasets(self): - num_classes = 2 - num_samples = 10 - input_dim = 50 - - with self.cached_session(): - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((num_samples, input_dim), dtype=np.float32) - y = np.zeros((num_samples, num_classes), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - model.fit(dataset, epochs=2, steps_per_epoch=10, verbose=0) - _ = model.evaluate(dataset, steps=10, verbose=0) - - def test_attributes(self): - # layers, weights, trainable_weights, non_trainable_weights, inputs, outputs - - num_classes = (2, 3) - num_samples = 100 - input_dim = 50 - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) - - self.assertEqual(model.name, 'test_model') - self.assertEqual(model.built, False) - self.assertEqual(len(model.weights), 0) - - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch([x1, x2], [y1, y2]) - - self.assertEqual(model.built, True) - self.assertEqual(len(model.layers), 4) - self.assertEqual(len(model.weights), 10) - self.assertEqual(len(model.trainable_weights), 8) - self.assertEqual(len(model.non_trainable_weights), 2) - - def test_updates(self): - # test that updates get run during training - num_samples = 100 - input_dim = 50 - - class BNNet(keras.Model): - - def __init__(self): - super().__init__() - self.bn = keras.layers.BatchNormalization(beta_initializer='ones', - gamma_initializer='ones') - - def call(self, inputs): - return self.bn(inputs) - - x = np.ones((num_samples, input_dim)) - y = np.ones((num_samples, input_dim)) - - model = BNNet() - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - y_ref = model.predict(x) - - model.train_on_batch(x, y) - y_new = model.predict(x) - self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) - - def test_training_and_inference_behavior(self): - # test that dropout is applied in training and not inference - - num_samples = 100 - input_dim = 50 - - class DPNet(keras.Model): - - def __init__(self): - super().__init__() - self.dp = keras.layers.Dropout(0.5) - self.dense = keras.layers.Dense(1, - use_bias=False, - kernel_initializer='ones') - - def call(self, inputs): - x = self.dp(inputs) - return self.dense(x) - - model = DPNet() - x = np.ones((num_samples, input_dim)) - y = model.predict(x) - self.assertEqual(np.sum(y), np.sum(x)) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(x, y) - self.assertGreater(loss, 0.1) - - def test_training_methods(self): - # test fit, train_on_batch - # on different input types: list, dict - - num_classes = (2, 3) - num_samples = 100 - input_dim = 50 - - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - model.fit({'input_1': x1, 'input_2': x2}, - {'output_1': y1, 'output_2': y2}, - epochs=2, batch_size=32) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0, - validation_data=([x1, x2], [y1, y2])) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.train_on_batch([x1, x2], [y1, y2]) - model.train_on_batch({'input_1': x1, 'input_2': x2}, - {'output_1': y1, 'output_2': y2}) - - def test_inference_methods(self): - # test predict, evaluate, test_on_batch, predict_on_batch - # on different input types: list, dict - num_classes = (2, 3) - num_samples = 100 - input_dim = 50 - - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.evaluate([x1, x2], [y1, y2]) - model.test_on_batch([x1, x2], [y1, y2]) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - model.predict([x1, x2]) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - model.predict_on_batch([x1, x2]) - - def test_saving(self): - num_classes = (2, 3) - num_samples = 100 - input_dim = 50 - - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - y_ref_1, y_ref_2 = model.predict([x1, x2]) - - tf_format_name = os.path.join(self.get_temp_dir(), 'ckpt') - model.save_weights(tf_format_name) - if h5py is not None: - hdf5_format_name = os.path.join(self.get_temp_dir(), 'weights.h5') - model.save_weights(hdf5_format_name) - - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_bn=True) - - if h5py is not None: - with self.assertRaises(ValueError): - model.load_weights(hdf5_format_name) - - model.load_weights(tf_format_name) - - y1, y2 = model.predict([x1, x2]) - self.assertAllClose(y_ref_1, y1, atol=1e-5) - self.assertAllClose(y_ref_2, y2, atol=1e-5) - - if h5py is not None: - model.load_weights(hdf5_format_name) - - y1, y2 = model.predict([x1, x2]) - self.assertAllClose(y_ref_1, y1, atol=1e-5) - self.assertAllClose(y_ref_2, y2, atol=1e-5) - - def test_subclass_nested_in_subclass(self): - num_classes = 2 - num_samples = 100 - input_dim = 50 - - model = model_util.NestedTestModel1(num_classes=num_classes) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) - - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) - - self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) - self.assertEqual(len(model.non_trainable_weights), - 2 + len(model.test_net.non_trainable_weights)) - self.assertEqual(len(model.trainable_weights), - 6 + len(model.test_net.trainable_weights)) - - def test_graph_nested_in_subclass(self): - num_classes = 2 - num_samples = 100 - input_dim = 50 - - model = model_util.NestedTestModel2(num_classes=num_classes) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) - - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) - - self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) - self.assertEqual(len(model.non_trainable_weights), - 2 + len(model.test_net.non_trainable_weights)) - self.assertEqual(len(model.trainable_weights), - 6 + len(model.test_net.trainable_weights)) - - def test_subclass_nested_in_graph(self): - num_classes = 2 - num_samples = 100 - input_dim = 50 - - model = model_util.get_nested_model_3( - input_dim=input_dim, num_classes=num_classes) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) - - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) - - self.assertEqual(len(model.weights), 16) - self.assertEqual(len(model.non_trainable_weights), 4) - self.assertEqual(len(model.trainable_weights), 12) - - def test_subclass_nested_in_sequential(self): - num_classes = 2 - num_samples = 100 - input_dim = 50 - - class Inner(keras.Model): - - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(32, activation='relu') - self.dense2 = keras.layers.Dense(num_classes, activation='relu') - self.bn = keras.layers.BatchNormalization() - - def call(self, inputs): - x = self.dense1(inputs) - x = self.dense2(x) - return self.bn(x) - - model = keras.Sequential([Inner()]) - model.compile( - loss='mse', - optimizer='rmsprop', - metrics=['acc'], - run_eagerly=test_utils.should_run_eagerly()) - - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) - - self.assertEqual(len(model.weights), 8) - self.assertEqual(len(model.non_trainable_weights), 2) - self.assertEqual(len(model.trainable_weights), 6) - - def test_support_for_manual_training_arg(self): - # In most cases, the `training` argument is left unspecified, in which - # case it defaults to value corresponding to the Model method being used - # (fit -> True, predict -> False, etc). - # If the user writes their model `call` method to take - # an explicit `training` argument, we must check that the correct value - # is being passed to the model for each method call. - - class DPNet(keras.Model): - - def __init__(self): - super().__init__() - self.dp = keras.layers.Dropout(0.5) - self.dense = keras.layers.Dense(1, - use_bias=False, - kernel_initializer='ones') - - def call(self, inputs, training=False): - x = self.dp(inputs, training=training) - return self.dense(x) - - model = DPNet() - x = np.ones((10, 10)) - y = model.predict(x) - self.assertEqual(np.sum(y), np.sum(x)) - model.compile( - loss='mse', - optimizer='rmsprop', - run_eagerly=test_utils.should_run_eagerly()) - loss = model.train_on_batch(x, y) - self.assertGreater(loss, 0.1) - - -if __name__ == '__main__': - tf.test.main() + def test_single_io_workflow_with_np_arrays(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=["acc", keras.metrics.CategoricalAccuracy()], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + def test_multi_io_workflow_with_np_arrays(self): + num_classes = (2, 3) + num_samples = 1000 + input_dim = 50 + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_dp=True, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + + def test_single_io_workflow_with_datasets(self): + num_classes = 2 + num_samples = 10 + input_dim = 50 + + with self.cached_session(): + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((num_samples, input_dim), dtype=np.float32) + y = np.zeros((num_samples, num_classes), dtype=np.float32) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=2, steps_per_epoch=10, verbose=0) + _ = model.evaluate(dataset, steps=10, verbose=0) + + def test_attributes(self): + # layers, weights, trainable_weights, non_trainable_weights, inputs, + # outputs + + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + self.assertEqual(model.name, "test_model") + self.assertEqual(model.built, False) + self.assertEqual(len(model.weights), 0) + + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch([x1, x2], [y1, y2]) + + self.assertEqual(model.built, True) + self.assertEqual(len(model.layers), 4) + self.assertEqual(len(model.weights), 10) + self.assertEqual(len(model.trainable_weights), 8) + self.assertEqual(len(model.non_trainable_weights), 2) + + def test_updates(self): + # test that updates get run during training + num_samples = 100 + input_dim = 50 + + class BNNet(keras.Model): + def __init__(self): + super().__init__() + self.bn = keras.layers.BatchNormalization( + beta_initializer="ones", gamma_initializer="ones" + ) + + def call(self, inputs): + return self.bn(inputs) + + x = np.ones((num_samples, input_dim)) + y = np.ones((num_samples, input_dim)) + + model = BNNet() + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + y_ref = model.predict(x) + + model.train_on_batch(x, y) + y_new = model.predict(x) + self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) + + def test_training_and_inference_behavior(self): + # test that dropout is applied in training and not inference + + num_samples = 100 + input_dim = 50 + + class DPNet(keras.Model): + def __init__(self): + super().__init__() + self.dp = keras.layers.Dropout(0.5) + self.dense = keras.layers.Dense( + 1, use_bias=False, kernel_initializer="ones" + ) + + def call(self, inputs): + x = self.dp(inputs) + return self.dense(x) + + model = DPNet() + x = np.ones((num_samples, input_dim)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) + + def test_training_methods(self): + # test fit, train_on_batch + # on different input types: list, dict + + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + model.fit( + {"input_1": x1, "input_2": x2}, + {"output_1": y1, "output_2": y2}, + epochs=2, + batch_size=32, + ) + model.fit( + [x1, x2], + [y1, y2], + epochs=2, + batch_size=32, + verbose=0, + validation_data=([x1, x2], [y1, y2]), + ) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.train_on_batch([x1, x2], [y1, y2]) + model.train_on_batch( + {"input_1": x1, "input_2": x2}, {"output_1": y1, "output_2": y2} + ) + + def test_inference_methods(self): + # test predict, evaluate, test_on_batch, predict_on_batch + # on different input types: list, dict + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.evaluate([x1, x2], [y1, y2]) + model.test_on_batch([x1, x2], [y1, y2]) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + model.predict([x1, x2]) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + model.predict_on_batch([x1, x2]) + + def test_saving(self): + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + y_ref_1, y_ref_2 = model.predict([x1, x2]) + + tf_format_name = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(tf_format_name) + if h5py is not None: + hdf5_format_name = os.path.join(self.get_temp_dir(), "weights.h5") + model.save_weights(hdf5_format_name) + + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_bn=True + ) + + if h5py is not None: + with self.assertRaises(ValueError): + model.load_weights(hdf5_format_name) + + model.load_weights(tf_format_name) + + y1, y2 = model.predict([x1, x2]) + self.assertAllClose(y_ref_1, y1, atol=1e-5) + self.assertAllClose(y_ref_2, y2, atol=1e-5) + + if h5py is not None: + model.load_weights(hdf5_format_name) + + y1, y2 = model.predict([x1, x2]) + self.assertAllClose(y_ref_1, y1, atol=1e-5) + self.assertAllClose(y_ref_2, y2, atol=1e-5) + + def test_subclass_nested_in_subclass(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + model = model_util.NestedTestModel1(num_classes=num_classes) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual( + len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights), + ) + self.assertEqual( + len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights), + ) + + def test_graph_nested_in_subclass(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + model = model_util.NestedTestModel2(num_classes=num_classes) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual( + len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights), + ) + self.assertEqual( + len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights), + ) + + def test_subclass_nested_in_graph(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + model = model_util.get_nested_model_3( + input_dim=input_dim, num_classes=num_classes + ) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 16) + self.assertEqual(len(model.non_trainable_weights), 4) + self.assertEqual(len(model.trainable_weights), 12) + + def test_subclass_nested_in_sequential(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + class Inner(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(32, activation="relu") + self.dense2 = keras.layers.Dense(num_classes, activation="relu") + self.bn = keras.layers.BatchNormalization() + + def call(self, inputs): + x = self.dense1(inputs) + x = self.dense2(x) + return self.bn(x) + + model = keras.Sequential([Inner()]) + model.compile( + loss="mse", + optimizer="rmsprop", + metrics=["acc"], + run_eagerly=test_utils.should_run_eagerly(), + ) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 8) + self.assertEqual(len(model.non_trainable_weights), 2) + self.assertEqual(len(model.trainable_weights), 6) + + def test_support_for_manual_training_arg(self): + # In most cases, the `training` argument is left unspecified, in which + # case it defaults to value corresponding to the Model method being used + # (fit -> True, predict -> False, etc). + # If the user writes their model `call` method to take + # an explicit `training` argument, we must check that the correct value + # is being passed to the model for each method call. + + class DPNet(keras.Model): + def __init__(self): + super().__init__() + self.dp = keras.layers.Dropout(0.5) + self.dense = keras.layers.Dense( + 1, use_bias=False, kernel_initializer="ones" + ) + + def call(self, inputs, training=False): + x = self.dp(inputs, training=training) + return self.dense(x) + + model = DPNet() + x = np.ones((10, 10)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile( + loss="mse", + optimizer="rmsprop", + run_eagerly=test_utils.should_run_eagerly(), + ) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/model_subclassing_test.py b/keras/tests/model_subclassing_test.py index 8f86af2e11b9..dc56912e187b 100644 --- a/keras/tests/model_subclassing_test.py +++ b/keras/tests/model_subclassing_test.py @@ -14,739 +14,800 @@ # ============================================================================== """Tests for Model subclassing.""" -import tensorflow.compat.v2 as tf - import copy import os -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from tensorflow.python.framework import test_util as tf_test_utils from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.tests import model_subclassing_test_util as model_util -from tensorflow.python.training.tracking import data_structures + +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) +from tensorflow.python.trackable import data_structures try: - import h5py # pylint:disable=g-import-not-at-top + import h5py except ImportError: - h5py = None + h5py = None @test_combinations.run_all_keras_modes class ModelSubclassingTest(test_combinations.TestCase): - - def test_custom_build(self): - class DummyModel(keras.Model): - - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(32, activation='relu') - self.uses_custom_build = False - - def call(self, inputs): - return self.dense1(inputs) - - def build(self, input_shape): - self.uses_custom_build = True - - test_model = DummyModel() - dummy_data = tf.ones((32, 50)) - test_model(dummy_data) - self.assertTrue(test_model.uses_custom_build, 'Model should use user ' - 'defined build when called.') - - def test_attribute_conflict_error(self): - - class ModelWithProperty(keras.Model): - - @property - def read_only(self): - return 1. - - m = ModelWithProperty() - with self.assertRaisesRegex(AttributeError, 'read_only'): - m.read_only = 2. - - def test_custom_build_with_fit(self): - - class DummyModel(keras.Model): - - def __init__(self): - super().__init__() - self.layer1 = keras.layers.Dense(10, activation='relu') - - def build(self, input_shape): - self.layer2 = keras.layers.Dense(1, activation='relu') - - def call(self, inputs): - return self.layer2(self.layer1(inputs)) - - model = DummyModel() - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - model.fit(np.ones((10, 10)), np.ones((10, 1)), batch_size=2, epochs=2) - self.assertLen(model.layers, 2) - self.assertLen(model.trainable_variables, 4) - - def test_dataset_dict_with_fit(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(1) - self.dense2 = keras.layers.Dense(1) - self.add = keras.layers.Add() - - def call(self, x): - return self.add([self.dense1(x['a']), self.dense2(x['b'])]) - - model = MyModel() - model.compile( - 'sgd', - 'mse', - run_eagerly=test_utils.should_run_eagerly()) - - data = tf.data.Dataset.from_tensor_slices(({ - 'a': np.ones((32, 10)), - 'b': np.ones((32, 20)) - }, np.ones((32, 1)))).batch(2) - model.fit(data, epochs=2) - - def test_invalid_input_shape_build(self): - num_classes = 2 - input_dim = 50 - - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True) - - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - with self.assertRaisesRegex(ValueError, - 'input shape is not one of the valid types'): - model.build(input_shape=tf.compat.v1.Dimension(input_dim)) - - def test_embed_dtype_with_subclass_build(self): - class Embedding(keras.layers.Layer): - """An Embedding layer.""" - - def __init__(self, vocab_size, embedding_dim, **kwargs): - super().__init__(**kwargs) - self.vocab_size = vocab_size - self.embedding_dim = embedding_dim - - def build(self, _): - self.embedding = self.add_weight( - 'embedding_kernel', - shape=[self.vocab_size, self.embedding_dim], - dtype=np.float32, - initializer=tf.compat.v1.random_uniform_initializer(-0.1, 0.1), - trainable=True) - - def call(self, x): - return tf.compat.v1.nn.embedding_lookup(self.embedding, x) - - class EmbedModel(keras.Model): - - def __init__(self, vocab_size, embed_size): - super().__init__() - self.embed1 = Embedding(vocab_size, embed_size) - - def call(self, inputs): - return self.embed1(inputs) - - model = EmbedModel(100, 20) - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - with self.assertRaisesRegex( - ValueError, 'if your layers do not support float type inputs'): - model.build(input_shape=(35, 20)) - - def test_single_time_step_rnn_build(self): - dim = 4 - timesteps = 1 - batch_input_shape = (None, timesteps, dim) - units = 3 - - class SimpleRNNModel(keras.Model): - - def __init__(self): - super().__init__() - self.lstm = keras.layers.LSTM(units) - - def call(self, inputs): - return self.lstm(inputs) - - model = SimpleRNNModel() - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build(batch_input_shape) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - model(tf.ones((32, timesteps, dim))) - - def test_single_io_subclass_build(self): - num_classes = 2 - input_dim = 50 - batch_size = None - - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True) - - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build(input_shape=(batch_size, input_dim)) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - model(tf.ones((32, input_dim))) - - def test_single_io_dimension_subclass_build(self): - num_classes = 2 - input_dim = tf.compat.v1.Dimension(50) - batch_size = tf.compat.v1.Dimension(None) - - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True) - - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build(input_shape=(batch_size, input_dim)) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - model(tf.ones((32, input_dim))) - - def test_multidim_io_subclass_build(self): - num_classes = 10 - # Input size, e.g. image - batch_size = 32 - input_shape = (32, 32, 3) - - model = model_util.SimpleConvTestModel(num_classes) - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - batch_input_shape = (batch_size,) + input_shape - model.build(input_shape=batch_input_shape) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - - model(tf.ones(batch_input_shape)) - - def test_tensorshape_io_subclass_build(self): - num_classes = 10 - # Input size, e.g. image - batch_size = None - input_shape = (32, 32, 3) - - model = model_util.SimpleConvTestModel(num_classes) - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build( - input_shape=tf.TensorShape((batch_size,) + input_shape)) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - - model(tf.ones((32,) + input_shape)) - - def test_subclass_save_model(self): - num_classes = 10 - # Input size, e.g. image - batch_size = None - input_shape = (32, 32, 3) - - model = model_util.SimpleConvTestModel(num_classes) - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build( - input_shape=tf.TensorShape((batch_size,) + input_shape)) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - weights = model.get_weights() - - tf_format_name = os.path.join(self.get_temp_dir(), 'ckpt') - model.save_weights(tf_format_name) - if h5py is not None: - hdf5_format_name = os.path.join(self.get_temp_dir(), 'weights.h5') - model.save_weights(hdf5_format_name) - - model = model_util.SimpleConvTestModel(num_classes) - model.build( - input_shape=tf.TensorShape((batch_size,) + input_shape)) - if h5py is not None: - model.load_weights(hdf5_format_name) - self.assertAllClose(weights, model.get_weights()) - model.load_weights(tf_format_name) - self.assertAllClose(weights, model.get_weights()) - - def test_multi_io_subclass_build(self): - batch_size = None - num_samples = 1000 - input_dim = 50 - model = model_util.get_multi_io_subclass_model() - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - batch_input_shape = tf.TensorShape((batch_size, input_dim)) - model.build( - input_shape=[batch_input_shape, batch_input_shape]) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - x1 = tf.ones((num_samples, input_dim)) - x2 = tf.ones((num_samples, input_dim)) - model([x1, x2]) - - def test_summary(self): - - class ToString: - - def __init__(self): - self.contents = '' - - def __call__(self, msg): - self.contents += msg + '\n' - - # Single-io - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=4, use_bn=True, use_dp=True) - model(np.ones((3, 4))) # need to build model first - print_fn = ToString() - model.summary(print_fn=print_fn) - self.assertIn('Trainable params: 356', print_fn.contents) - - # Multi-io - model = model_util.get_multi_io_subclass_model( - num_classes=(5, 6), use_bn=True, use_dp=True) - model([np.ones((3, 4)), np.ones((3, 4))]) # need to build model first - print_fn = ToString() - model.summary(print_fn=print_fn) - self.assertIn('Trainable params: 587', print_fn.contents) - - # Single-io with unused layer - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=4, use_bn=True, use_dp=True) - model.unused_layer = keras.layers.Dense(10) - model(np.ones((3, 4))) # need to build model first - print_fn = ToString() - model.summary(print_fn=print_fn) - self.assertIn('Trainable params: 356', print_fn.contents) - self.assertIn('0 (unused)', print_fn.contents) - - def test_no_dependency(self): - class Foo(keras.Model): - - def __init__(self): - super().__init__() - self.isdep = keras.layers.Dense(1) - self.notdep = data_structures.NoDependency(keras.layers.Dense(2)) - self.notdep_var = data_structures.NoDependency( - tf.Variable(1., name='notdep_var')) - - m = Foo() - self.assertEqual([m.isdep, m.notdep], m.layers) - self.assertEqual(1, len(m._trackable_children())) - self.assertIs(m.isdep, m._trackable_children()['isdep']) - self.assertEqual('notdep_var:0', m.notdep_var.name) - - def test_extra_variable(self): - - class ExtraVar(keras.Model): - - def __init__(self): - super().__init__() - self.dense = keras.layers.Dense(1) - self.var = tf.Variable(1.) - self.not_trainable_var = tf.Variable(2., trainable=False) - - def call(self, inputs): - return self.dense(inputs + self.var) - - m = ExtraVar() - self.assertTrue(m.trainable) - self.assertEqual([m.dense], m.layers) - self.assertEqual([m.var, m.not_trainable_var], m.variables) - self.assertEqual([m.var], m.trainable_variables) - self.assertEqual([m.not_trainable_var], m.non_trainable_variables) - self.assertLen(m.get_weights(), 2) - m.trainable = False - self.assertEqual([m.var, m.not_trainable_var], m.variables) - self.assertEqual([], m.trainable_variables) - self.assertEqual([m.var, m.not_trainable_var], m.non_trainable_variables) - self.assertLen(m.get_weights(), 2) - m.trainable = True - - m(tf.ones([1, 1])) - - self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.variables) - self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.weights) - - self.assertLen(m.get_weights(), 4) - self.assertEqual([m.dense.kernel, m.dense.bias, m.var, m.not_trainable_var], - m.variables) - self.assertEqual([m.dense.kernel, m.dense.bias, m.var], - m.trainable_variables) - self.assertEqual([m.not_trainable_var], m.non_trainable_variables) - - m.dense.trainable = False - self.assertEqual( - [m.dense.kernel, m.dense.bias, m.var, m.not_trainable_var], - m.variables) - self.assertEqual([m.var], m.trainable_variables) - self.assertEqual([m.dense.kernel, m.dense.bias, m.not_trainable_var], - m.non_trainable_variables) - self.assertLen(m.get_weights(), 4) - - def test_add_weight_in_model(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.b = self.add_weight('bias', (10,)) - self.c = self.add_weight('bias2', (10,), trainable=False) - - def call(self, inputs): - return inputs + self.b + self.c - - x = tf.convert_to_tensor(np.ones((10, 10), 'float32')) - model = MyModel() - model(x) - self.assertEqual(1, len(model.trainable_weights)) - self.assertEqual(1, len(model.non_trainable_weights)) - self.assertEqual(2, len(model.weights)) - - class MyModelCustomBuild(keras.Model): - - def build(self, input_shape): - self.b = self.add_weight('bias', (10,)) - self.c = self.add_weight('bias2', (10,), trainable=False) - - def call(self, inputs): - return inputs + self.b + self.c - - x = tf.convert_to_tensor(np.ones((10, 10), 'float32')) - model = MyModelCustomBuild() - model(x) - self.assertEqual(1, len(model.trainable_weights)) - self.assertEqual(1, len(model.non_trainable_weights)) - self.assertEqual(2, len(model.weights)) - - def test_add_update_in_model(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.b = self.add_weight('bias', (10,)) - self.c = self.add_weight('bias2', (10,)) - - def call(self, inputs): - # Unconditional - self.add_update(self.b.assign(self.b * 2)) - # Conditional - self.add_update(self.c.assign(inputs[1, :])) - return inputs + self.b + self.c - - x = tf.convert_to_tensor(np.ones((10, 10), 'float32')) - model = MyModel() - model(x) - - if tf.executing_eagerly(): - self.assertEqual(0, len(model.updates)) - else: - self.assertEqual(2, len(model.updates)) - - -class GraphSpecificModelSubclassingTests(tf.test.TestCase): - - def test_single_io_workflow_with_tensors(self): - num_classes = 2 - num_samples = 10 - input_dim = 50 - - with tf.Graph().as_default(), self.cached_session(): - model = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True) - model.compile(loss='mse', optimizer='rmsprop') - - x = tf.ones((num_samples, input_dim)) - y = tf.zeros((num_samples, num_classes)) - - model.fit(x, y, epochs=2, steps_per_epoch=10, verbose=0) - _ = model.evaluate(steps=10, verbose=0) - - def test_multi_io_workflow_with_tensors(self): - num_classes = (2, 3) - num_samples = 10 - input_dim = 50 - - with tf.Graph().as_default(), self.cached_session(): - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_dp=True, use_bn=True) - model.compile(loss='mse', optimizer='rmsprop') - - x1 = tf.ones((num_samples, input_dim)) - x2 = tf.ones((num_samples, input_dim)) - y1 = tf.zeros((num_samples, num_classes[0])) - y2 = tf.zeros((num_samples, num_classes[1])) - - model.fit([x1, x2], [y1, y2], epochs=2, steps_per_epoch=10, verbose=0) - _ = model.evaluate(steps=10, verbose=0) - - def test_updates_and_losses_for_nested_models_in_subclassed_model(self): - - # Case 1: deferred-build sequential nested in subclass. - class TestModel1(keras.Model): - - def __init__(self): - super().__init__() - self.fc = keras.layers.Dense(10, input_shape=(784,), - activity_regularizer='l1') - self.bn = keras.Sequential([keras.layers.BatchNormalization(axis=1)]) - - def call(self, x): - return self.bn(self.fc(x)) - - with tf.compat.v1.get_default_graph().as_default(), self.cached_session(): - model = TestModel1() - - x = tf.ones(shape=[100, 784], dtype='float32') - model(x) - self.assertLen(model.updates, 2) - self.assertLen(model.losses, 1) - - # Case 2: placeholder-sequential nested in subclass. - class TestModel2(keras.Model): - - def __init__(self): - super().__init__() - self.fc = keras.layers.Dense(10, input_shape=(784,), - activity_regularizer='l1') - self.bn = keras.Sequential( - [keras.layers.BatchNormalization(axis=1, input_shape=(10,))]) - - def call(self, x): - return self.bn(self.fc(x)) - - with tf.compat.v1.get_default_graph().as_default(), self.cached_session(): - model = TestModel2() - - x = tf.ones(shape=[100, 784], dtype='float32') - model(x) - self.assertEqual(len(model.get_updates_for(x)), 2) - self.assertEqual(len(model.get_losses_for(x)), 1) - - # Case 3: functional-API model nested in subclass. - with tf.compat.v1.get_default_graph().as_default(): - inputs = keras.Input((10,)) - outputs = keras.layers.BatchNormalization(axis=1)(inputs) - bn = keras.Model(inputs, outputs) - - class TestModel3(keras.Model): - - def __init__(self): - super().__init__() - self.fc = keras.layers.Dense(10, input_shape=(784,), - activity_regularizer='l1') - self.bn = bn - - def call(self, x): - return self.bn(self.fc(x)) - - with self.cached_session(): - model = TestModel3() - - x = tf.ones(shape=[100, 784], dtype='float32') + def test_custom_build(self): + class DummyModel(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(32, activation="relu") + self.uses_custom_build = False + + def call(self, inputs): + return self.dense1(inputs) + + def build(self, input_shape): + self.uses_custom_build = True + + test_model = DummyModel() + dummy_data = tf.ones((32, 50)) + test_model(dummy_data) + self.assertTrue( + test_model.uses_custom_build, + "Model should use user defined build when called.", + ) + + def test_attribute_conflict_error(self): + class ModelWithProperty(keras.Model): + @property + def read_only(self): + return 1.0 + + m = ModelWithProperty() + with self.assertRaisesRegex(AttributeError, "read_only"): + m.read_only = 2.0 + + def test_custom_build_with_fit(self): + class DummyModel(keras.Model): + def __init__(self): + super().__init__() + self.layer1 = keras.layers.Dense(10, activation="relu") + + def build(self, input_shape): + self.layer2 = keras.layers.Dense(1, activation="relu") + + def call(self, inputs): + return self.layer2(self.layer1(inputs)) + + model = DummyModel() + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + model.fit(np.ones((10, 10)), np.ones((10, 1)), batch_size=2, epochs=2) + self.assertLen(model.layers, 2) + self.assertLen(model.trainable_variables, 4) + + def test_dataset_dict_with_fit(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(1) + self.dense2 = keras.layers.Dense(1) + self.add = keras.layers.Add() + + def call(self, x): + return self.add([self.dense1(x["a"]), self.dense2(x["b"])]) + + model = MyModel() + model.compile("sgd", "mse", run_eagerly=test_utils.should_run_eagerly()) + + data = tf.data.Dataset.from_tensor_slices( + ({"a": np.ones((32, 10)), "b": np.ones((32, 20))}, np.ones((32, 1))) + ).batch(2) + model.fit(data, epochs=2) + + def test_invalid_input_shape_build(self): + num_classes = 2 + input_dim = 50 + + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True + ) + + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + with self.assertRaisesRegex( + ValueError, "input shape is not one of the valid types" + ): + model.build(input_shape=tf.compat.v1.Dimension(input_dim)) + + def test_embed_dtype_with_subclass_build(self): + class Embedding(keras.layers.Layer): + """An Embedding layer.""" + + def __init__(self, vocab_size, embedding_dim, **kwargs): + super().__init__(**kwargs) + self.vocab_size = vocab_size + self.embedding_dim = embedding_dim + + def build(self, _): + self.embedding = self.add_weight( + "embedding_kernel", + shape=[self.vocab_size, self.embedding_dim], + dtype=np.float32, + initializer=tf.compat.v1.random_uniform_initializer( + -0.1, 0.1 + ), + trainable=True, + ) + + def call(self, x): + return tf.compat.v1.nn.embedding_lookup(self.embedding, x) + + class EmbedModel(keras.Model): + def __init__(self, vocab_size, embed_size): + super().__init__() + self.embed1 = Embedding(vocab_size, embed_size) + + def call(self, inputs): + return self.embed1(inputs) + + model = EmbedModel(100, 20) + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + with self.assertRaisesRegex( + ValueError, "if your layers do not support float type inputs" + ): + model.build(input_shape=(35, 20)) + + def test_single_time_step_rnn_build(self): + dim = 4 + timesteps = 1 + batch_input_shape = (None, timesteps, dim) + units = 3 + + class SimpleRNNModel(keras.Model): + def __init__(self): + super().__init__() + self.lstm = keras.layers.LSTM(units) + + def call(self, inputs): + return self.lstm(inputs) + + model = SimpleRNNModel() + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build(batch_input_shape) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + model(tf.ones((32, timesteps, dim))) + + def test_single_io_subclass_build(self): + num_classes = 2 + input_dim = 50 + batch_size = None + + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True + ) + + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build(input_shape=(batch_size, input_dim)) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + model(tf.ones((32, input_dim))) + + def test_single_io_dimension_subclass_build(self): + num_classes = 2 + input_dim = tf.compat.v1.Dimension(50) + batch_size = tf.compat.v1.Dimension(None) + + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True + ) + + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build(input_shape=(batch_size, input_dim)) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + model(tf.ones((32, input_dim))) + + def test_multidim_io_subclass_build(self): + num_classes = 10 + # Input size, e.g. image + batch_size = 32 + input_shape = (32, 32, 3) + + model = model_util.SimpleConvTestModel(num_classes) + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + batch_input_shape = (batch_size,) + input_shape + model.build(input_shape=batch_input_shape) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + + model(tf.ones(batch_input_shape)) + + def test_tensorshape_io_subclass_build(self): + num_classes = 10 + # Input size, e.g. image + batch_size = None + input_shape = (32, 32, 3) + + model = model_util.SimpleConvTestModel(num_classes) + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build(input_shape=tf.TensorShape((batch_size,) + input_shape)) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + + model(tf.ones((32,) + input_shape)) + + def test_subclass_save_model(self): + num_classes = 10 + # Input size, e.g. image + batch_size = None + input_shape = (32, 32, 3) + + model = model_util.SimpleConvTestModel(num_classes) + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build(input_shape=tf.TensorShape((batch_size,) + input_shape)) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + weights = model.get_weights() + + tf_format_name = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(tf_format_name) + if h5py is not None: + hdf5_format_name = os.path.join(self.get_temp_dir(), "weights.h5") + model.save_weights(hdf5_format_name) + + model = model_util.SimpleConvTestModel(num_classes) + model.build(input_shape=tf.TensorShape((batch_size,) + input_shape)) + if h5py is not None: + model.load_weights(hdf5_format_name) + self.assertAllClose(weights, model.get_weights()) + model.load_weights(tf_format_name) + self.assertAllClose(weights, model.get_weights()) + + def test_multi_io_subclass_build(self): + batch_size = None + num_samples = 1000 + input_dim = 50 + model = model_util.get_multi_io_subclass_model() + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + batch_input_shape = tf.TensorShape((batch_size, input_dim)) + model.build(input_shape=[batch_input_shape, batch_input_shape]) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + x1 = tf.ones((num_samples, input_dim)) + x2 = tf.ones((num_samples, input_dim)) + model([x1, x2]) + + def test_summary(self): + class ToString: + def __init__(self): + self.contents = "" + + def __call__(self, msg): + self.contents += msg + "\n" + + # Single-io + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=4, use_bn=True, use_dp=True + ) + model(np.ones((3, 4))) # need to build model first + print_fn = ToString() + model.summary(print_fn=print_fn) + self.assertIn("Trainable params: 356", print_fn.contents) + + # Multi-io + model = model_util.get_multi_io_subclass_model( + num_classes=(5, 6), use_bn=True, use_dp=True + ) + model([np.ones((3, 4)), np.ones((3, 4))]) # need to build model first + print_fn = ToString() + model.summary(print_fn=print_fn) + self.assertIn("Trainable params: 587", print_fn.contents) + + # Single-io with unused layer + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=4, use_bn=True, use_dp=True + ) + model.unused_layer = keras.layers.Dense(10) + model(np.ones((3, 4))) # need to build model first + print_fn = ToString() + model.summary(print_fn=print_fn) + self.assertIn("Trainable params: 356", print_fn.contents) + self.assertIn("0 (unused)", print_fn.contents) + + def test_no_dependency(self): + class Foo(keras.Model): + def __init__(self): + super().__init__() + self.isdep = keras.layers.Dense(1) + self.notdep = data_structures.NoDependency( + keras.layers.Dense(2) + ) + self.notdep_var = data_structures.NoDependency( + tf.Variable(1.0, name="notdep_var") + ) + + m = Foo() + self.assertEqual([m.isdep, m.notdep], m.layers) + self.assertEqual(1, len(m._trackable_children())) + self.assertIs(m.isdep, m._trackable_children()["isdep"]) + self.assertEqual("notdep_var:0", m.notdep_var.name) + + def test_extra_variable(self): + class ExtraVar(keras.Model): + def __init__(self): + super().__init__() + self.dense = keras.layers.Dense(1) + self.var = tf.Variable(1.0) + self.not_trainable_var = tf.Variable(2.0, trainable=False) + + def call(self, inputs): + return self.dense(inputs + self.var) + + m = ExtraVar() + self.assertTrue(m.trainable) + self.assertEqual([m.dense], m.layers) + self.assertEqual([m.var, m.not_trainable_var], m.variables) + self.assertEqual([m.var], m.trainable_variables) + self.assertEqual([m.not_trainable_var], m.non_trainable_variables) + self.assertLen(m.get_weights(), 2) + m.trainable = False + self.assertEqual([m.var, m.not_trainable_var], m.variables) + self.assertEqual([], m.trainable_variables) + self.assertEqual( + [m.var, m.not_trainable_var], m.non_trainable_variables + ) + self.assertLen(m.get_weights(), 2) + m.trainable = True + + m(tf.ones([1, 1])) + + self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.variables) + self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.weights) + + self.assertLen(m.get_weights(), 4) + self.assertEqual( + [m.dense.kernel, m.dense.bias, m.var, m.not_trainable_var], + m.variables, + ) + self.assertEqual( + [m.dense.kernel, m.dense.bias, m.var], m.trainable_variables + ) + self.assertEqual([m.not_trainable_var], m.non_trainable_variables) + + m.dense.trainable = False + self.assertEqual( + [m.dense.kernel, m.dense.bias, m.var, m.not_trainable_var], + m.variables, + ) + self.assertEqual([m.var], m.trainable_variables) + self.assertEqual( + [m.dense.kernel, m.dense.bias, m.not_trainable_var], + m.non_trainable_variables, + ) + self.assertLen(m.get_weights(), 4) + + def test_add_weight_in_model(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.b = self.add_weight("bias", (10,)) + self.c = self.add_weight("bias2", (10,), trainable=False) + + def call(self, inputs): + return inputs + self.b + self.c + + x = tf.convert_to_tensor(np.ones((10, 10), "float32")) + model = MyModel() model(x) - self.assertEqual(len(model.get_updates_for(x)), 2) - self.assertEqual(len(model.get_losses_for(x)), 1) - - def test_multi_io_workflow_with_numpy_arrays_and_custom_placeholders(self): - num_classes = (2, 3) - num_samples = 1000 - input_dim = 50 + self.assertEqual(1, len(model.trainable_weights)) + self.assertEqual(1, len(model.non_trainable_weights)) + self.assertEqual(2, len(model.weights)) - with tf.Graph().as_default(), self.cached_session(): - model = model_util.get_multi_io_subclass_model( - num_classes=num_classes, use_dp=True, use_bn=True) - model.compile(loss='mse', optimizer='rmsprop') + class MyModelCustomBuild(keras.Model): + def build(self, input_shape): + self.b = self.add_weight("bias", (10,)) + self.c = self.add_weight("bias2", (10,), trainable=False) - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) + def call(self, inputs): + return inputs + self.b + self.c - x2_placeholder = tf.compat.v1.placeholder( - dtype='float32', shape=(None, input_dim)) - model._set_inputs([x1, x2_placeholder]) + x = tf.convert_to_tensor(np.ones((10, 10), "float32")) + model = MyModelCustomBuild() + model(x) + self.assertEqual(1, len(model.trainable_weights)) + self.assertEqual(1, len(model.non_trainable_weights)) + self.assertEqual(2, len(model.weights)) + + def test_add_update_in_model(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.b = self.add_weight("bias", (10,)) + self.c = self.add_weight("bias2", (10,)) + + def call(self, inputs): + # Unconditional + self.add_update(self.b.assign(self.b * 2)) + # Conditional + self.add_update(self.c.assign(inputs[1, :])) + return inputs + self.b + self.c + + x = tf.convert_to_tensor(np.ones((10, 10), "float32")) + model = MyModel() + model(x) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + if tf.executing_eagerly(): + self.assertLen(model.updates, 0) + else: + self.assertLen(model.updates, 2) -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +class GraphSpecificModelSubclassingTests(tf.test.TestCase): + def test_single_io_workflow_with_tensors(self): + num_classes = 2 + num_samples = 10 + input_dim = 50 + + with tf.Graph().as_default(), self.cached_session(): + model = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True + ) + model.compile(loss="mse", optimizer="rmsprop") + + x = tf.ones((num_samples, input_dim)) + y = tf.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, steps_per_epoch=10, verbose=0) + _ = model.evaluate(steps=10, verbose=0) + + def test_multi_io_workflow_with_tensors(self): + num_classes = (2, 3) + num_samples = 10 + input_dim = 50 + + with tf.Graph().as_default(), self.cached_session(): + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_dp=True, use_bn=True + ) + model.compile(loss="mse", optimizer="rmsprop") + + x1 = tf.ones((num_samples, input_dim)) + x2 = tf.ones((num_samples, input_dim)) + y1 = tf.zeros((num_samples, num_classes[0])) + y2 = tf.zeros((num_samples, num_classes[1])) + + model.fit( + [x1, x2], [y1, y2], epochs=2, steps_per_epoch=10, verbose=0 + ) + _ = model.evaluate(steps=10, verbose=0) + + def test_updates_and_losses_for_nested_models_in_subclassed_model(self): + # Case 1: deferred-build sequential nested in subclass. + class TestModel1(keras.Model): + def __init__(self): + super().__init__() + self.fc = keras.layers.Dense( + 10, input_shape=(784,), activity_regularizer="l1" + ) + self.bn = keras.Sequential( + [keras.layers.BatchNormalization(axis=1)] + ) + + def call(self, x): + return self.bn(self.fc(x)) + + with tf.compat.v1.get_default_graph().as_default(), self.cached_session(): # noqa: E501 + model = TestModel1() + + x = tf.ones(shape=[100, 784], dtype="float32") + model(x) + self.assertLen(model.updates, 2) + self.assertLen(model.losses, 1) + + # Case 2: placeholder-sequential nested in subclass. + class TestModel2(keras.Model): + def __init__(self): + super().__init__() + self.fc = keras.layers.Dense( + 10, input_shape=(784,), activity_regularizer="l1" + ) + self.bn = keras.Sequential( + [keras.layers.BatchNormalization(axis=1, input_shape=(10,))] + ) + + def call(self, x): + return self.bn(self.fc(x)) + + with tf.compat.v1.get_default_graph().as_default(), self.cached_session(): # noqa: E501 + model = TestModel2() + + x = tf.ones(shape=[100, 784], dtype="float32") + model(x) + self.assertEqual(len(model.get_updates_for(x)), 2) + self.assertEqual(len(model.get_losses_for(x)), 1) + + # Case 3: functional-API model nested in subclass. + with tf.compat.v1.get_default_graph().as_default(): + inputs = keras.Input((10,)) + outputs = keras.layers.BatchNormalization(axis=1)(inputs) + bn = keras.Model(inputs, outputs) + + class TestModel3(keras.Model): + def __init__(self): + super().__init__() + self.fc = keras.layers.Dense( + 10, input_shape=(784,), activity_regularizer="l1" + ) + self.bn = bn + + def call(self, x): + return self.bn(self.fc(x)) + + with self.cached_session(): + model = TestModel3() + + x = tf.ones(shape=[100, 784], dtype="float32") + model(x) + self.assertEqual(len(model.get_updates_for(x)), 2) + self.assertEqual(len(model.get_losses_for(x)), 1) + + def test_multi_io_workflow_with_numpy_arrays_and_custom_placeholders(self): + num_classes = (2, 3) + num_samples = 1000 + input_dim = 50 + + with tf.Graph().as_default(), self.cached_session(): + model = model_util.get_multi_io_subclass_model( + num_classes=num_classes, use_dp=True, use_bn=True + ) + model.compile(loss="mse", optimizer="rmsprop") + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + x2_placeholder = tf.compat.v1.placeholder( + dtype="float32", shape=(None, input_dim) + ) + model._set_inputs([x1, x2_placeholder]) + + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class CustomCallSignatureTests(tf.test.TestCase, parameterized.TestCase): - - def test_no_inputs_in_signature(self): - model = model_util.CustomCallModel() - first = tf.ones([2, 3]) - second = tf.ones([2, 5]) - output = model(first, second) - self.evaluate([v.initializer for v in model.variables]) - expected_output = self.evaluate(model.dense1(first) + model.dense2(second)) - self.assertAllClose(expected_output, self.evaluate(output)) - output = model(first, second, fiddle_with_output='yes') - self.assertAllClose(10. * expected_output, self.evaluate(output)) - output = model(first, second=second, training=False) - self.assertAllClose(expected_output, self.evaluate(output)) - - def test_training_args_call_build(self): - input_dim = 2 - - model = model_util.TrainingNoDefaultModel() - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build((None, input_dim)) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - - def test_training_and_mask_args_call_build(self): - input_dim = 2 - - model = model_util.TrainingMaskingModel() - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - model.build((None, input_dim)) - self.assertTrue(model.weights, ('Model should have weights now that it ' - 'has been properly built.')) - self.assertTrue(model.built, 'Model should be built after calling `build`.') - - def test_custom_call_kwargs_and_build(self): - first_input_shape = (2, 3) - second_input_shape = (2, 5) - - model = model_util.CustomCallModel() - self.assertFalse(model.built, 'Model should not have been built') - self.assertFalse(model.weights, ('Model should have no weights since it ' - 'has not been built.')) - with self.assertRaisesRegex(ValueError, - 'cannot build your model if it has positional'): - model.build(input_shape=[first_input_shape, second_input_shape]) - - def test_kwargs_in_signature(self): - - class HasKwargs(keras.Model): - - def call(self, x, y=3, **kwargs): - return x - - model = HasKwargs() - arg = tf.ones([1]) - model(arg, a=3) - if not tf.executing_eagerly(): - self.assertLen(model.inputs, 1) - - @tf_test_utils.assert_no_new_tensors - @tf_test_utils.assert_no_garbage_created - def test_training_no_default(self): - if not tf.executing_eagerly(): - return - model = model_util.TrainingNoDefaultModel() - arg = tf.ones([1, 1]) - model(arg, True) - - def test_positional_arg_in_call(self): - - class ModelWithPositionalArgs(keras.Model): - - def call(self, x, x2, x3=None): - return x + x2 - - x = np.ones((10, 1)) - y = np.ones((10, 1)) - m = ModelWithPositionalArgs() - m.compile('sgd', 'mse') - with self.assertRaisesRegex(ValueError, r'Models passed to `fit`'): - m.fit(x, y, batch_size=2) - with self.assertRaisesRegex(ValueError, r'Models passed to `evaluate`'): - m.evaluate(x, y, batch_size=2) - with self.assertRaisesRegex(ValueError, r'Models passed to `predict`'): - m.predict(x, batch_size=2) - with self.assertRaisesRegex(ValueError, - r'Models passed to `train_on_batch`'): - m.train_on_batch(x, y) - with self.assertRaisesRegex(ValueError, - r'Models passed to `test_on_batch`'): - m.test_on_batch(x, y) - with self.assertRaisesRegex(ValueError, - r'Models passed to `predict_on_batch`'): - m.predict_on_batch(x) - - def test_deepcopy(self): - if not tf.executing_eagerly(): - self.skipTest('Run in eager mode only.') - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.my_variable = tf.Variable(0.0, trainable=False) - self.layer = keras.layers.Dense(4) - - def call(self, obs): - return self.layer(obs) - - model = MyModel() - model.my_variable.assign_add(1.0) - - new_model = copy.deepcopy(model) - self.assertEqual(model.my_variable.numpy(), 1.0) - self.assertEqual(new_model.my_variable.numpy(), 1.0) - - model.my_variable.assign_add(1.0) - self.assertEqual(model.my_variable.numpy(), 2.0) - self.assertEqual(new_model.my_variable.numpy(), 1.0) - - # Check that Trackable logic still works. - self.assertLen(new_model.variables, 1) - self.assertLen(new_model.layers, 1) - - def test_batch_counters_not_in_variables(self): - - class MyModel(keras.Model): - - def __init__(self): - super().__init__() - self.layer = keras.layers.Dense(4) - - def call(self, obs): - return self.layer(obs) - - model = MyModel() - model(np.ones((10, 10))) - self.assertLen(model.variables, 2) - - -if __name__ == '__main__': - tf.test.main() + def test_no_inputs_in_signature(self): + model = model_util.CustomCallModel() + first = tf.ones([2, 3]) + second = tf.ones([2, 5]) + output = model(first, second) + self.evaluate([v.initializer for v in model.variables]) + expected_output = self.evaluate( + model.dense1(first) + model.dense2(second) + ) + self.assertAllClose(expected_output, self.evaluate(output)) + output = model(first, second, fiddle_with_output="yes") + self.assertAllClose(10.0 * expected_output, self.evaluate(output)) + output = model(first, second=second, training=False) + self.assertAllClose(expected_output, self.evaluate(output)) + + def test_training_args_call_build(self): + input_dim = 2 + + model = model_util.TrainingNoDefaultModel() + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build((None, input_dim)) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + + def test_training_and_mask_args_call_build(self): + input_dim = 2 + + model = model_util.TrainingMaskingModel() + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + model.build((None, input_dim)) + self.assertTrue( + model.weights, + "Model should have weights now that it has been properly built.", + ) + self.assertTrue( + model.built, "Model should be built after calling `build`." + ) + + def test_custom_call_kwargs_and_build(self): + first_input_shape = (2, 3) + second_input_shape = (2, 5) + + model = model_util.CustomCallModel() + self.assertFalse(model.built, "Model should not have been built") + self.assertFalse( + model.weights, + "Model should have no weights since it has not been built.", + ) + with self.assertRaisesRegex( + ValueError, "cannot build your model if it has positional" + ): + model.build(input_shape=[first_input_shape, second_input_shape]) + + def test_kwargs_in_signature(self): + class HasKwargs(keras.Model): + def call(self, x, y=3, **kwargs): + return x + + model = HasKwargs() + arg = tf.ones([1]) + model(arg, a=3) + if not tf.executing_eagerly(): + self.assertLen(model.inputs, 1) + + @tf_test_utils.assert_no_new_tensors + @tf_test_utils.assert_no_garbage_created + def test_training_no_default(self): + if not tf.executing_eagerly(): + return + model = model_util.TrainingNoDefaultModel() + arg = tf.ones([1, 1]) + model(arg, True) + + def test_positional_arg_in_call(self): + class ModelWithPositionalArgs(keras.Model): + def call(self, x, x2, x3=None): + return x + x2 + + x = np.ones((10, 1)) + y = np.ones((10, 1)) + m = ModelWithPositionalArgs() + m.compile("sgd", "mse") + with self.assertRaisesRegex(ValueError, r"Models passed to `fit`"): + m.fit(x, y, batch_size=2) + with self.assertRaisesRegex(ValueError, r"Models passed to `evaluate`"): + m.evaluate(x, y, batch_size=2) + with self.assertRaisesRegex(ValueError, r"Models passed to `predict`"): + m.predict(x, batch_size=2) + with self.assertRaisesRegex( + ValueError, r"Models passed to `train_on_batch`" + ): + m.train_on_batch(x, y) + with self.assertRaisesRegex( + ValueError, r"Models passed to `test_on_batch`" + ): + m.test_on_batch(x, y) + with self.assertRaisesRegex( + ValueError, r"Models passed to `predict_on_batch`" + ): + m.predict_on_batch(x) + + def test_deepcopy(self): + if not tf.executing_eagerly(): + self.skipTest("Run in eager mode only.") + + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.my_variable = tf.Variable(0.0, trainable=False) + self.layer = keras.layers.Dense(4) + + def call(self, obs): + return self.layer(obs) + + model = MyModel() + model.my_variable.assign_add(1.0) + + new_model = copy.deepcopy(model) + self.assertEqual(model.my_variable.numpy(), 1.0) + self.assertEqual(new_model.my_variable.numpy(), 1.0) + + model.my_variable.assign_add(1.0) + self.assertEqual(model.my_variable.numpy(), 2.0) + self.assertEqual(new_model.my_variable.numpy(), 1.0) + + # Check that Trackable logic still works. + self.assertLen(new_model.variables, 1) + self.assertLen(new_model.layers, 1) + + def test_batch_counters_not_in_variables(self): + class MyModel(keras.Model): + def __init__(self): + super().__init__() + self.layer = keras.layers.Dense(4) + + def call(self, obs): + return self.layer(obs) + + model = MyModel() + model(np.ones((10, 10))) + self.assertLen(model.variables, 2) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/model_subclassing_test_util.py b/keras/tests/model_subclassing_test_util.py index 2fd2dcf073e1..5d06f6c4540a 100644 --- a/keras/tests/model_subclassing_test_util.py +++ b/keras/tests/model_subclassing_test_util.py @@ -18,147 +18,141 @@ from keras.testing_infra import test_utils -# pylint: disable=missing-docstring,not-callable class SimpleConvTestModel(keras.Model): + def __init__(self, num_classes=10): + super().__init__(name="test_model") + self.num_classes = num_classes - def __init__(self, num_classes=10): - super().__init__(name='test_model') - self.num_classes = num_classes + self.conv1 = keras.layers.Conv2D(32, (3, 3), activation="relu") + self.flatten = keras.layers.Flatten() + self.dense1 = keras.layers.Dense(num_classes, activation="softmax") - self.conv1 = keras.layers.Conv2D(32, (3, 3), activation='relu') - self.flatten = keras.layers.Flatten() - self.dense1 = keras.layers.Dense(num_classes, activation='softmax') - - def call(self, x): - x = self.conv1(x) - x = self.flatten(x) - return self.dense1(x) + def call(self, x): + x = self.conv1(x) + x = self.flatten(x) + return self.dense1(x) def get_multi_io_subclass_model(use_bn=False, use_dp=False, num_classes=(2, 3)): - """Creates MultiIOModel for the tests of subclass model.""" - shared_layer = keras.layers.Dense(32, activation='relu') - branch_a = [shared_layer] - if use_dp: - branch_a.append(keras.layers.Dropout(0.5)) - branch_a.append(keras.layers.Dense(num_classes[0], activation='softmax')) + """Creates MultiIOModel for the tests of subclass model.""" + shared_layer = keras.layers.Dense(32, activation="relu") + branch_a = [shared_layer] + if use_dp: + branch_a.append(keras.layers.Dropout(0.5)) + branch_a.append(keras.layers.Dense(num_classes[0], activation="softmax")) - branch_b = [shared_layer] - if use_bn: - branch_b.append(keras.layers.BatchNormalization()) - branch_b.append(keras.layers.Dense(num_classes[1], activation='softmax')) + branch_b = [shared_layer] + if use_bn: + branch_b.append(keras.layers.BatchNormalization()) + branch_b.append(keras.layers.Dense(num_classes[1], activation="softmax")) - model = ( - test_utils._MultiIOSubclassModel( # pylint: disable=protected-access - branch_a, branch_b, name='test_model')) - return model + model = test_utils._MultiIOSubclassModel( + branch_a, branch_b, name="test_model" + ) + return model class NestedTestModel1(keras.Model): - """A model subclass nested inside a model subclass. - """ - - def __init__(self, num_classes=2): - super().__init__(name='nested_model_1') - self.num_classes = num_classes - self.dense1 = keras.layers.Dense(32, activation='relu') - self.dense2 = keras.layers.Dense(num_classes, activation='relu') - self.bn = keras.layers.BatchNormalization() - self.test_net = test_utils.SmallSubclassMLP( - num_hidden=32, num_classes=4, use_bn=True, use_dp=True) + """A model subclass nested inside a model subclass.""" + + def __init__(self, num_classes=2): + super().__init__(name="nested_model_1") + self.num_classes = num_classes + self.dense1 = keras.layers.Dense(32, activation="relu") + self.dense2 = keras.layers.Dense(num_classes, activation="relu") + self.bn = keras.layers.BatchNormalization() + self.test_net = test_utils.SmallSubclassMLP( + num_hidden=32, num_classes=4, use_bn=True, use_dp=True + ) - def call(self, inputs): - x = self.dense1(inputs) - x = self.bn(x) - x = self.test_net(x) - return self.dense2(x) + def call(self, inputs): + x = self.dense1(inputs) + x = self.bn(x) + x = self.test_net(x) + return self.dense2(x) class NestedTestModel2(keras.Model): - """A model subclass with a functional-API graph network inside. - """ - - def __init__(self, num_classes=2): - super().__init__(name='nested_model_2') - self.num_classes = num_classes - self.dense1 = keras.layers.Dense(32, activation='relu') - self.dense2 = keras.layers.Dense(num_classes, activation='relu') - self.bn = self.bn = keras.layers.BatchNormalization() - self.test_net = self.get_functional_graph_model(32, 4) - - @staticmethod - def get_functional_graph_model(input_dim, num_classes): - # A simple functional-API model (a.k.a. graph network) - inputs = keras.Input(shape=(input_dim,)) - x = keras.layers.Dense(32, activation='relu')(inputs) - x = keras.layers.BatchNormalization()(x) - outputs = keras.layers.Dense(num_classes)(x) - return keras.Model(inputs, outputs) + """A model subclass with a functional-API graph network inside.""" + + def __init__(self, num_classes=2): + super().__init__(name="nested_model_2") + self.num_classes = num_classes + self.dense1 = keras.layers.Dense(32, activation="relu") + self.dense2 = keras.layers.Dense(num_classes, activation="relu") + self.bn = self.bn = keras.layers.BatchNormalization() + self.test_net = self.get_functional_graph_model(32, 4) + + @staticmethod + def get_functional_graph_model(input_dim, num_classes): + # A simple functional-API model (a.k.a. graph network) + inputs = keras.Input(shape=(input_dim,)) + x = keras.layers.Dense(32, activation="relu")(inputs) + x = keras.layers.BatchNormalization()(x) + outputs = keras.layers.Dense(num_classes)(x) + return keras.Model(inputs, outputs) - def call(self, inputs): - x = self.dense1(inputs) - x = self.bn(x) - x = self.test_net(x) - return self.dense2(x) + def call(self, inputs): + x = self.dense1(inputs) + x = self.bn(x) + x = self.test_net(x) + return self.dense2(x) def get_nested_model_3(input_dim, num_classes): - # A functional-API model with a subclassed model inside. - # NOTE: this requires the inner subclass to implement `compute_output_shape`. + # A functional-API model with a subclassed model inside. + # NOTE: this requires the inner subclass to implement + # `compute_output_shape`. - inputs = keras.Input(shape=(input_dim,)) - x = keras.layers.Dense(32, activation='relu')(inputs) - x = keras.layers.BatchNormalization()(x) + inputs = keras.Input(shape=(input_dim,)) + x = keras.layers.Dense(32, activation="relu")(inputs) + x = keras.layers.BatchNormalization()(x) - class Inner(keras.Model): + class Inner(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(32, activation="relu") + self.dense2 = keras.layers.Dense(5, activation="relu") + self.bn = keras.layers.BatchNormalization() - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(32, activation='relu') - self.dense2 = keras.layers.Dense(5, activation='relu') - self.bn = keras.layers.BatchNormalization() + def call(self, inputs): + x = self.dense1(inputs) + x = self.dense2(x) + return self.bn(x) - def call(self, inputs): - x = self.dense1(inputs) - x = self.dense2(x) - return self.bn(x) - - test_model = Inner() - x = test_model(x) - outputs = keras.layers.Dense(num_classes)(x) - return keras.Model(inputs, outputs, name='nested_model_3') + test_model = Inner() + x = test_model(x) + outputs = keras.layers.Dense(num_classes)(x) + return keras.Model(inputs, outputs, name="nested_model_3") class CustomCallModel(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(1, activation="relu") + self.dense2 = keras.layers.Dense(1, activation="softmax") - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(1, activation='relu') - self.dense2 = keras.layers.Dense(1, activation='softmax') - - def call(self, first, second, fiddle_with_output='no', training=True): - combined = self.dense1(first) + self.dense2(second) - if fiddle_with_output == 'yes': - return 10. * combined - else: - return combined + def call(self, first, second, fiddle_with_output="no", training=True): + combined = self.dense1(first) + self.dense2(second) + if fiddle_with_output == "yes": + return 10.0 * combined + else: + return combined class TrainingNoDefaultModel(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(1) - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(1) - - def call(self, x, training): - return self.dense1(x) + def call(self, x, training): + return self.dense1(x) class TrainingMaskingModel(keras.Model): + def __init__(self): + super().__init__() + self.dense1 = keras.layers.Dense(1) - def __init__(self): - super().__init__() - self.dense1 = keras.layers.Dense(1) - - def call(self, x, training=False, mask=None): - return self.dense1(x) + def call(self, x, training=False, mask=None): + return self.dense1(x) diff --git a/keras/tests/saved_model_test.py b/keras/tests/saved_model_test.py index f20a34c8b46a..dd80c7d007c0 100644 --- a/keras/tests/saved_model_test.py +++ b/keras/tests/saved_model_test.py @@ -14,47 +14,52 @@ # ============================================================================== """Tests for trackable object SavedModel save.""" +import os + import tensorflow.compat.v2 as tf -import os -from tensorflow.python.framework import test_util as tf_test_utils from keras.layers import core -from keras.optimizers.optimizer_v2 import adam +from keras.optimizers.legacy import adam +# isort: off +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) -class _ModelWithOptimizerUsingDefun(tf.train.Checkpoint): - def __init__(self): - self.dense = core.Dense(1) - self.optimizer = adam.Adam(0.01) +class _ModelWithOptimizerUsingDefun(tf.train.Checkpoint): + def __init__(self): + self.dense = core.Dense(1) + self.optimizer = adam.Adam(0.01) - @tf.function( - input_signature=(tf.TensorSpec([None, 2], tf.float32), - tf.TensorSpec([None], tf.float32)), - ) - def call(self, x, y): - with tf.GradientTape() as tape: - loss = tf.reduce_mean((self.dense(x) - y) ** 2.) - trainable_variables = self.dense.trainable_variables - gradients = tape.gradient(loss, trainable_variables) - self.optimizer.apply_gradients(zip(gradients, trainable_variables)) - return {"loss": loss} + @tf.function( + input_signature=( + tf.TensorSpec([None, 2], tf.float32), + tf.TensorSpec([None], tf.float32), + ), + ) + def call(self, x, y): + with tf.GradientTape() as tape: + loss = tf.reduce_mean((self.dense(x) - y) ** 2.0) + trainable_variables = self.dense.trainable_variables + gradients = tape.gradient(loss, trainable_variables) + self.optimizer.apply_gradients(zip(gradients, trainable_variables)) + return {"loss": loss} class MemoryTests(tf.test.TestCase): + def setUp(self): + super().setUp() + self._model = _ModelWithOptimizerUsingDefun() - def setUp(self): - super().setUp() - self._model = _ModelWithOptimizerUsingDefun() - - @tf_test_utils.assert_no_garbage_created - def DISABLED_test_no_reference_cycles(self): - x = tf.constant([[3., 4.]]) - y = tf.constant([2.]) - self._model.call(x, y) - save_dir = os.path.join(self.get_temp_dir(), "saved_model") - tf.saved_model.save(self._model, save_dir, self._model.call) + @tf_test_utils.assert_no_garbage_created + def DISABLED_test_no_reference_cycles(self): + x = tf.constant([[3.0, 4.0]]) + y = tf.constant([2.0]) + self._model.call(x, y) + save_dir = os.path.join(self.get_temp_dir(), "saved_model") + tf.saved_model.save(self._model, save_dir, self._model.call) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/tests/saver_test.py b/keras/tests/saver_test.py index 922662553c05..bed83b35bdcb 100644 --- a/keras/tests/saver_test.py +++ b/keras/tests/saver_test.py @@ -14,132 +14,151 @@ # ============================================================================= """Tests for tensorflow.python.training.saver.py.""" -import tensorflow.compat.v2 as tf - import functools import os + +import tensorflow.compat.v2 as tf + from keras.engine import training from keras.layers import core -from tensorflow.python.training.tracking import util as trackable_utils +# isort: off +from tensorflow.python.checkpoint import ( + checkpoint as trackable_utils, +) -class NonLayerTrackable(tf.Module): - def __init__(self): - super().__init__() - self.a_variable = trackable_utils.add_variable( - self, name="a_variable", shape=[]) +class NonLayerTrackable(tf.Module): + def __init__(self): + super().__init__() + self.a_variable = trackable_utils.add_variable( + self, name="a_variable", shape=[] + ) class MyModel(training.Model): - """A concrete Model for testing.""" + """A concrete Model for testing.""" - def __init__(self): - super().__init__() - self._named_dense = core.Dense(1, use_bias=True) - self._second = core.Dense(1, use_bias=False) - # We can still track Trackables which aren't Layers. - self._non_layer = NonLayerTrackable() + def __init__(self): + super().__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + # We can still track Trackables which aren't Layers. + self._non_layer = NonLayerTrackable() - def call(self, values): - ret = self._second(self._named_dense(values)) - return ret + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret class TrackableCompatibilityTests(tf.test.TestCase): - - def _initialized_model(self): - input_value = tf.constant([[3.]]) - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - optimizer_step = tf.compat.v1.train.get_or_create_global_step() - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model, optimizer_step=optimizer_step) - train_op = optimizer.minimize( - functools.partial(model, input_value), - global_step=optimizer_step) - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - # A regular variable, a slot variable, and a non-slot Optimizer variable - # with known values to check when loading. - self.evaluate(model._named_dense.bias.assign([1.])) - self.evaluate(optimizer.get_slot( - var=model._named_dense.bias, name="m").assign([2.])) - beta1_power, _ = optimizer._get_beta_accumulators() - self.evaluate(beta1_power.assign(3.)) - return root_trackable - - def _set_sentinels(self, root_trackable): - self.evaluate(root_trackable.model._named_dense.bias.assign([101.])) - self.evaluate( - root_trackable.optimizer.get_slot( - var=root_trackable.model._named_dense.bias, name="m") - .assign([102.])) - beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() - self.evaluate(beta1_power.assign(103.)) - - def _check_sentinels(self, root_trackable): - self.assertAllEqual( - [1.], self.evaluate(root_trackable.model._named_dense.bias)) - self.assertAllEqual([2.], self.evaluate( - root_trackable.optimizer.get_slot( - var=root_trackable.model._named_dense.bias, name="m"))) - beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() - self.assertAllEqual(3., self.evaluate(beta1_power)) - - def testLoadFromObjectBasedGraph(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - - save_graph = tf.Graph() - with save_graph.as_default(), self.session(graph=save_graph) as sess: - root = self._initialized_model() - object_saver = tf.train.Checkpoint(root=root) - save_path = object_saver.save(file_prefix=checkpoint_prefix) - - # An incompatible object-based checkpoint to check error messages - var = tf.Variable(1., name="a") - self.evaluate(var.initializer) - second_saver = tf.train.Checkpoint(v=var) - second_path = second_saver.save(file_prefix=os.path.join( - checkpoint_directory, "second")) - - restore_graph = tf.Graph() - with restore_graph.as_default(), self.session( - graph=restore_graph) as sess: - root = self._initialized_model() - self._set_sentinels(root) - saver = tf.compat.v1.train.Saver() - saver.restore(sess=sess, save_path=save_path) - self._check_sentinels(root) - before_second_restore_ops = restore_graph.get_operations() - # Test that multiple restores do not pollute the graph - saver.restore(sess=sess, save_path=save_path) - self.assertEqual(before_second_restore_ops, - restore_graph.get_operations()) - with self.assertRaisesRegex(tf.errors.NotFoundError, - "Could not find some variables"): - saver.restore(sess=sess, save_path=second_path) - - def testLoadFromObjectBasedEager(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - - save_graph = tf.Graph() - with save_graph.as_default(), self.session(graph=save_graph): - root = self._initialized_model() - object_saver = tf.train.Checkpoint(root=root) - save_path = object_saver.save(file_prefix=checkpoint_prefix) - - with tf.__internal__.eager_context.eager_mode(): - root = self._initialized_model() - self._set_sentinels(root) - saver = tf.compat.v1.train.Saver( - root.model.variables + root.optimizer.variables()) - saver.restore(sess=None, save_path=save_path) - self._check_sentinels(root) + def _initialized_model(self): + input_value = tf.constant([[3.0]]) + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + optimizer_step = tf.compat.v1.train.get_or_create_global_step() + root_trackable = tf.train.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step + ) + train_op = optimizer.minimize( + functools.partial(model, input_value), global_step=optimizer_step + ) + self.evaluate(trackable_utils.gather_initializers(root_trackable)) + self.evaluate(train_op) + # A regular variable, a slot variable, and a non-slot Optimizer variable + # with known values to check when loading. + self.evaluate(model._named_dense.bias.assign([1.0])) + self.evaluate( + optimizer.get_slot(var=model._named_dense.bias, name="m").assign( + [2.0] + ) + ) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.0)) + return root_trackable + + def _set_sentinels(self, root_trackable): + self.evaluate(root_trackable.model._named_dense.bias.assign([101.0])) + self.evaluate( + root_trackable.optimizer.get_slot( + var=root_trackable.model._named_dense.bias, name="m" + ).assign([102.0]) + ) + beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(103.0)) + + def _check_sentinels(self, root_trackable): + self.assertAllEqual( + [1.0], self.evaluate(root_trackable.model._named_dense.bias) + ) + self.assertAllEqual( + [2.0], + self.evaluate( + root_trackable.optimizer.get_slot( + var=root_trackable.model._named_dense.bias, name="m" + ) + ), + ) + beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() + self.assertAllEqual(3.0, self.evaluate(beta1_power)) + + def testLoadFromObjectBasedGraph(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + save_graph = tf.Graph() + with save_graph.as_default(), self.session(graph=save_graph) as sess: + root = self._initialized_model() + object_saver = tf.train.Checkpoint(root=root) + save_path = object_saver.save(file_prefix=checkpoint_prefix) + + # An incompatible object-based checkpoint to check error messages + var = tf.Variable(1.0, name="a") + self.evaluate(var.initializer) + second_saver = tf.train.Checkpoint(v=var) + second_path = second_saver.save( + file_prefix=os.path.join(checkpoint_directory, "second") + ) + + restore_graph = tf.Graph() + with restore_graph.as_default(), self.session( + graph=restore_graph + ) as sess: + root = self._initialized_model() + self._set_sentinels(root) + saver = tf.compat.v1.train.Saver() + saver.restore(sess=sess, save_path=save_path) + self._check_sentinels(root) + before_second_restore_ops = restore_graph.get_operations() + # Test that multiple restores do not pollute the graph + saver.restore(sess=sess, save_path=save_path) + self.assertEqual( + before_second_restore_ops, restore_graph.get_operations() + ) + with self.assertRaisesRegex( + tf.errors.NotFoundError, "Could not find some variables" + ): + saver.restore(sess=sess, save_path=second_path) + + def testLoadFromObjectBasedEager(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + save_graph = tf.Graph() + with save_graph.as_default(), self.session(graph=save_graph): + root = self._initialized_model() + object_saver = tf.train.Checkpoint(root=root) + save_path = object_saver.save(file_prefix=checkpoint_prefix) + + with tf.__internal__.eager_context.eager_mode(): + root = self._initialized_model() + self._set_sentinels(root) + saver = tf.compat.v1.train.Saver( + root.model.variables + root.optimizer.variables() + ) + saver.restore(sess=None, save_path=save_path) + self._check_sentinels(root) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/tests/serialization_util_test.py b/keras/tests/serialization_util_test.py index a50373f34c4f..71652e63e5db 100644 --- a/keras/tests/serialization_util_test.py +++ b/keras/tests/serialization_util_test.py @@ -14,48 +14,55 @@ # ============================================================================== """Tests for serialization functions.""" +import json + import tensorflow.compat.v2 as tf -import json -from keras.testing_infra import test_combinations from keras.engine import input_layer from keras.engine import sequential from keras.engine import training from keras.layers import core -from keras.saving.saved_model import json_utils +from keras.saving.legacy.saved_model import json_utils +from keras.testing_infra import test_combinations @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SerializationTests(test_combinations.TestCase): + def test_serialize_dense(self): + dense = core.Dense(3) + dense(tf.constant([[4.0]])) + round_trip = json.loads( + json.dumps(dense, default=json_utils.get_json_type) + ) + self.assertEqual(3, round_trip["config"]["units"]) + + def test_serialize_sequential(self): + model = sequential.Sequential() + model.add(core.Dense(4)) + model.add(core.Dense(5)) + model(tf.constant([[1.0]])) + sequential_round_trip = json.loads( + json.dumps(model, default=json_utils.get_json_type) + ) + self.assertEqual( + # Note that `config['layers'][0]` will be an InputLayer in V2 + # (but not in V1) + 5, + sequential_round_trip["config"]["layers"][-1]["config"]["units"], + ) + + def test_serialize_model(self): + x = input_layer.Input(shape=[3]) + y = core.Dense(10)(x) + model = training.Model(x, y) + model(tf.constant([[1.0, 1.0, 1.0]])) + model_round_trip = json.loads( + json.dumps(model, default=json_utils.get_json_type) + ) + self.assertEqual( + 10, model_round_trip["config"]["layers"][1]["config"]["units"] + ) - def test_serialize_dense(self): - dense = core.Dense(3) - dense(tf.constant([[4.]])) - round_trip = json.loads(json.dumps( - dense, default=json_utils.get_json_type)) - self.assertEqual(3, round_trip["config"]["units"]) - - def test_serialize_sequential(self): - model = sequential.Sequential() - model.add(core.Dense(4)) - model.add(core.Dense(5)) - model(tf.constant([[1.]])) - sequential_round_trip = json.loads( - json.dumps(model, default=json_utils.get_json_type)) - self.assertEqual( - # Note that `config['layers'][0]` will be an InputLayer in V2 - # (but not in V1) - 5, sequential_round_trip["config"]["layers"][-1]["config"]["units"]) - - def test_serialize_model(self): - x = input_layer.Input(shape=[3]) - y = core.Dense(10)(x) - model = training.Model(x, y) - model(tf.constant([[1., 1., 1.]])) - model_round_trip = json.loads( - json.dumps(model, default=json_utils.get_json_type)) - self.assertEqual( - 10, model_round_trip["config"]["layers"][1]["config"]["units"]) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/tests/temporal_sample_weights_correctness_test.py b/keras/tests/temporal_sample_weights_correctness_test.py index c5d758766b4d..f6efd8117c2d 100644 --- a/keras/tests/temporal_sample_weights_correctness_test.py +++ b/keras/tests/temporal_sample_weights_correctness_test.py @@ -14,502 +14,582 @@ # ============================================================================== """Tests temporal sample weights correctness using Keras model.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf -from keras.testing_infra import test_combinations from keras import layers from keras import metrics -from keras.optimizers import optimizer_v2 +from keras.optimizers import legacy as optimizer_legacy +from keras.testing_infra import test_combinations from keras.testing_infra import test_utils class Bias(layers.Layer): - """Layer that add a bias to its inputs.""" + """Layer that add a bias to its inputs.""" - def build(self, input_shape): - self.bias = self.add_weight('bias', (1,), initializer='zeros') + def build(self, input_shape): + self.bias = self.add_weight("bias", (1,), initializer="zeros") - def call(self, inputs): - return inputs + self.bias + def call(self, inputs): + return inputs + self.bias - def compute_output_shape(self, input_shape): - return input_shape + def compute_output_shape(self, input_shape): + return input_shape def get_multi_io_temporal_model(): - timesteps = 2 - inp_1 = layers.Input(shape=(1,), name='input_1') - inp_2 = layers.Input(shape=(1,), name='input_2') - x = layers.RepeatVector(timesteps) - out_1 = layers.TimeDistributed(Bias(), name='output_1') - out_2 = layers.TimeDistributed(Bias(), name='output_2') + timesteps = 2 + inp_1 = layers.Input(shape=(1,), name="input_1") + inp_2 = layers.Input(shape=(1,), name="input_2") + x = layers.RepeatVector(timesteps) + out_1 = layers.TimeDistributed(Bias(), name="output_1") + out_2 = layers.TimeDistributed(Bias(), name="output_2") - branch_a = [inp_1, x, out_1] - branch_b = [inp_2, x, out_2] - return test_utils.get_multi_io_model(branch_a, branch_b) + branch_a = [inp_1, x, out_1] + branch_b = [inp_2, x, out_2] + return test_utils.get_multi_io_model(branch_a, branch_b) def get_compiled_multi_io_model_temporal(sample_weight_mode): - model = get_multi_io_temporal_model() - model.compile( - optimizer=optimizer_v2.gradient_descent.SGD(0.1), - loss='mae', - metrics=[metrics.MeanAbsoluteError(name='mae')], - weighted_metrics=[metrics.MeanAbsoluteError(name='mae_2')], - sample_weight_mode=sample_weight_mode, - run_eagerly=test_utils.should_run_eagerly()) - return model + model = get_multi_io_temporal_model() + model.compile( + optimizer=optimizer_legacy.gradient_descent.SGD(0.1), + loss="mae", + metrics=[metrics.MeanAbsoluteError(name="mae")], + weighted_metrics=[metrics.MeanAbsoluteError(name="mae_2")], + sample_weight_mode=sample_weight_mode, + run_eagerly=test_utils.should_run_eagerly(), + ) + return model def run_with_different_sample_weight_mode_inputs(fn, partial_sw=True): - """Executes the given function with different sample weight mode inputs. - - Args: - fn: Training or eval function to execute. - partial_sw: Boolean flag to indicate whether temporal sample weight mode - should be set partially just for one output. - """ - model = get_compiled_multi_io_model_temporal(sample_weight_mode='temporal') - fn(model) - - model = get_compiled_multi_io_model_temporal( - sample_weight_mode=['temporal', 'temporal']) - fn(model) - - model = get_compiled_multi_io_model_temporal(sample_weight_mode={ - 'output_1': 'temporal', - 'output_2': 'temporal' - }) - fn(model) - - if partial_sw: - model = get_compiled_multi_io_model_temporal( - sample_weight_mode=[None, 'temporal']) + """Executes the given function with different sample weight mode inputs. + + Args: + fn: Training or eval function to execute. + partial_sw: Boolean flag to indicate whether temporal sample weight mode + should be set partially just for one output. + """ + model = get_compiled_multi_io_model_temporal(sample_weight_mode="temporal") fn(model) - # TODO(b/129700800): Enable after bug is fixed. - # model = get_compiled_multi_io_model_temporal(sample_weight_mode={ - # 'output_2': 'temporal' - # }) - # fn(model) + model = get_compiled_multi_io_model_temporal( + sample_weight_mode=["temporal", "temporal"] + ) + fn(model) + model = get_compiled_multi_io_model_temporal( + sample_weight_mode={"output_1": "temporal", "output_2": "temporal"} + ) + fn(model) -@test_combinations.run_with_all_model_types(exclude_models=['sequential']) -@test_combinations.run_all_keras_modes(always_skip_v1=True) -class TestMetricsCorrectnessMultiIOTemporal(test_combinations.TestCase): + if partial_sw: + model = get_compiled_multi_io_model_temporal( + sample_weight_mode=[None, "temporal"] + ) + fn(model) - def custom_generator_multi_io_temporal(self, sample_weights=None): - """Generator for getting data for temporal multi io model. + # TODO(b/129700800): Enable after bug is fixed. + # model = get_compiled_multi_io_model_temporal(sample_weight_mode={ + # 'output_2': 'temporal' + # }) + # fn(model) - Args: - sample_weights: List of sample_weights. - Yields: - Tuple of inputs, label, sample weights data. - """ - batch_size = 3 - num_samples = 3 - iteration = 0 - while True: - batch_index = iteration * batch_size % num_samples - iteration += 1 - start = batch_index - end = start + batch_size - x = [self.x[start:end], self.x[start:end]] - y = [self.y1[start:end], self.y2[start:end]] - if sample_weights: - sw = tf.nest.map_structure(lambda w: w[start:end], sample_weights) - else: - sw = None - yield x, y, sw - - def setUp(self): - super(TestMetricsCorrectnessMultiIOTemporal, self).setUp() - - self.x = np.asarray([[0.], [1.], [2.]]) - self.y1 = np.asarray([[[.5], [1.]], [[2.], [2.5]], [[3.5], [2.5]]]) - self.y2 = np.asarray([[[.5], [1.5]], [[2.], [1.5]], [[3.5], [3.]]]) - - # Without weights: - # Epoch 1 - bias = 0 - # y_pred_1 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] - # y_pred_2 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] - # mae (y1 - y_pred_1) = [[[.5], [1.]], [[1.], [1.5]], [[1.5], [.5]]] - # mae = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 - # mae_2 (y2 - y_pred_2) = [[[.5], [1.5]], [[1.], [.5]], [[1.5], [1.]]] - # mae_2 = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 - - # Epoch 2 - bias = 0.1 (2/2 * 0.1) - # y_pred_1 = [[[.1], [.1]], [[1.1], [1.1]], [[2.1], [2.1]]] - # y_pred_2 = [[[.1], [.1]], [[1.1], [1.1]], [[2.1], [2.1]]] - # mae (y1 - y_pred_1) = [[[.4], [.9]], [[.9], [1.4]], [[1.4], [.4]]] - # mae = [[2.7/3, 2.7/3]] = [[0.9, 0.9]] = 1.8/2 = 0.9 - # mae_2 (y2 - y_pred_2) = [[[.4], [1.4]], [[.9], [.4]], [[1.4], [.9]]] - # mae_2 = [[2.7/3, 2.7/3]] = [[0.9, 0.9]] = 1.8/2 = 0.9 - - self.expected_fit_result = { - 'output_1_mae': [1, 0.9], - 'output_2_mae': [1, 0.9], - 'output_1_mae_2': [1, 0.9], - 'output_2_mae_2': [1, 0.9], - 'loss': [2., 1.8], - 'output_1_loss': [1, 0.9], - 'output_2_loss': [1, 0.9], - } - - self.sample_weight_1 = np.asarray([[.5, 2.], [.5, 2.], [.5, 2.]]) - self.sample_weight_2 = np.asarray([[2., .5], [2., .5], [2., .5]]) - - # With weights: - # Epoch 1 - # y_pred_1 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] - # y_pred_2 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] - # mae (y1 - y_pred_1) = [[[.5], [1.]], [[1.], [1.5]], [[1.5], [.5]]] - # with weights = [[[.5 * .5], [1 * 2]], - # [[1 * .5], [1.5 * 2]], - # [[1.5 * .5], [.5 * 2]]] - # mae (w/o weights) = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 - # mae (weighted mean) = [[1.5/1.5, 6/6]] = [[1, 1]] = 2/2 = 1 - # mae (sum over bs) = [[1.5/3, 6/3]] = [[.5, 2]] = 2.5/2 = 1.25 - - # mae_2 (y2 - y_pred_2) = [[[.5], [1.5]], [[1.], [.5]], [[1.5], [1.]]] - # with weights = [[[.5 * 2], [1.5 * .5]], - # [[1. * 2], [.5 * .5]], - # [[1.5 * 2], [1. * .5]]] - # mae_2 (w/o weights) = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 - # mae_2 (weighted mean) = [[6/6, 1.5/1.5]] = [[1, 1]] = 2/2 = 1 - # mae_2 (sum over bs) = [[6/3, 1.5/3]] = [[2, .5]] = 2.5/2 = 1.25 - - # Epoch 2 - bias = 0.125 (2.5/2 * 0.1) - # y_pred_1 = [[[0.125], [0.125]], [[1.125], [1.125]], [[2.125], [2.125]]] - # y_pred_2 = [[[0.125], [0.125]], [[1.125], [1.125]], [[2.125], [2.125]]] - - # mae (y1 - y_pred_1) = [[[.375], [.875]], - # [[.875], [1.375]], - # [[1.375], [.375]]] - # with weights = [[[.375 * .5], [.875 * 2.]], - # [[.875 * .5], [1.375 * 2.]], - # [[1.375 * .5], [.375 * 2.]]] - # mae (w/o weights) = [[2.625/3, 2.625/3]] = (.875+.875)/2 = .875 - # mae (weighted mean) = [[1.3125/1.5, 5.25/6]] = (.875+.875)/2 = .875 - # mae (sum over bs) = [[1.3125/3, 5.25/3]] = (0.4375+1.75)/2 = 1.09375 - - # mae_2 (y2 - y_pred_2) = [[[.375], [1.375]], - # [[.875], [.375]], - # [[1.375], [.875]]] - # with weights = [[[.375 * 2.], [1.375 * .5]], - # [[.875 * 2.], [.375 * .5]], - # [[1.375 * 2.], [.875 * .5]]] - # mae_2 (w/o weights) = [[2.625/3, 2.625/3]] = (.875+.875)/2 = .875 - # mae_2 (weighted mean) = [[5.25/6, 1.3125/1.5]] = (.875+.875)/2 = .875 - # mae_2 (sum over bs) = [[5.25/3, 1.3125/3]] = (1.75+0.4375)/2 = 1.09375 - - self.expected_fit_result_with_weights = { - 'output_1_mae': [1, 0.875], - 'output_2_mae': [1, 0.875], - 'output_1_mae_2': [1, 0.875], - 'output_2_mae_2': [1, 0.875], - 'loss': [2.5, 2.1875], - 'output_1_loss': [1.25, 1.09375], - 'output_2_loss': [1.25, 1.09375], - } - - self.expected_fit_result_with_weights_output_2 = { - 'output_1_mae': [1., 0.9], - 'output_2_mae': [1, 0.875], - 'output_1_mae_2': [1., 0.9], - 'output_2_mae_2': [1., 0.875], - 'loss': [2.25, 1.99375], - 'output_1_loss': [1., 0.9], - 'output_2_loss': [1.25, 1.09375], - } - - # In the order: 'loss', 'output_1_loss', 'output_2_loss', - # 'output_1_mae', 'output_1_mae_2', - # 'output_2_mae', 'output_2_mae_2' - self.expected_batch_result_with_weights = [ - 2.1875, 1.09375, 1.09375, 0.875, 0.875, 0.875, 0.875 - ] - self.expected_batch_result_with_weights_output_2 = [ - 1.99375, 0.9, 1.09375, 0.9, 0.9, 0.875, 0.875 - ] - self.expected_batch_result = [1.8, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9] - - def test_fit(self): - - def _train_and_assert(model): - history = model.fit([self.x, self.x], [self.y1, self.y2], - batch_size=3, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - def test_fit_with_sample_weight(self): - - def _train_and_assert(model): - history = model.fit([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }, - batch_size=3, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - run_with_different_sample_weight_mode_inputs( - _train_and_assert, partial_sw=False) - - def test_fit_with_partial_sample_weight(self): - - def _train_and_assert(model): - history = model.fit([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }, - batch_size=3, - epochs=2, - shuffle=False) - for key, value in self.expected_fit_result_with_weights_output_2.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - def test_eval(self): - - def _eval_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2]) - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=3) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) - - run_with_different_sample_weight_mode_inputs(_eval_and_assert) - - def test_eval_with_sample_weight(self): - - def _eval_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=3, - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(eval_result, self.expected_batch_result_with_weights, - 1e-3) - - run_with_different_sample_weight_mode_inputs( - _eval_and_assert, partial_sw=False) - - def test_eval_with_partial_sample_weight(self): - - def _eval_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], - batch_size=3, - sample_weight={ - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(eval_result, - self.expected_batch_result_with_weights_output_2, - 1e-3) - - run_with_different_sample_weight_mode_inputs(_eval_and_assert) - - def test_train_on_batch(self): - - def _train_and_assert(model): - for _ in range(2): - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2]) - self.assertAllClose(result, self.expected_batch_result, 1e-3) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - def test_train_on_batch_with_sample_weight(self): - - def _train_and_assert(model): - for _ in range(2): - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - run_with_different_sample_weight_mode_inputs( - _train_and_assert, partial_sw=False) - - def test_train_on_batch_with_partial_sample_weight(self): - - def _train_and_assert(model): - for _ in range(2): - result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, - self.expected_batch_result_with_weights_output_2, - 1e-3) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - def test_test_on_batch(self): - - def _test_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2]) - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2]) - self.assertAllClose(result, self.expected_batch_result, 1e-3) - - run_with_different_sample_weight_mode_inputs(_test_and_assert) - - def test_test_on_batch_with_sample_weight(self): - - def _test_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) - - run_with_different_sample_weight_mode_inputs( - _test_and_assert, partial_sw=False) - - def test_test_on_batch_with_partial_sample_weight(self): - - def _test_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - self.assertAllClose(result, - self.expected_batch_result_with_weights_output_2, - 1e-3) - - run_with_different_sample_weight_mode_inputs(_test_and_assert) - - def test_fit_generator(self): - - def _train_and_assert(model): - history = model.fit_generator( - self.custom_generator_multi_io_temporal(), - steps_per_epoch=1, - epochs=2) - for key, value in self.expected_fit_result.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - def test_fit_generator_with_sample_weight(self): - - def _train_and_assert(model): - history = model.fit_generator( - self.custom_generator_multi_io_temporal( - sample_weights=[self.sample_weight_1, self.sample_weight_2]), - steps_per_epoch=1, - epochs=2) - for key, value in self.expected_fit_result_with_weights.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - run_with_different_sample_weight_mode_inputs( - _train_and_assert, partial_sw=False) - - def test_fit_generator_with_partial_sample_weight(self): - - def _train_and_assert(model): - history = model.fit_generator( - self.custom_generator_multi_io_temporal( - sample_weights={'output_2': self.sample_weight_2}), - steps_per_epoch=1, - epochs=2) - for key, value in self.expected_fit_result_with_weights_output_2.items(): - self.assertAllClose(history.history[key], value, 1e-3) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - def test_eval_generator(self): - - def _test_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2]) - eval_result = model.evaluate_generator( - self.custom_generator_multi_io_temporal(), steps=1) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) - - run_with_different_sample_weight_mode_inputs(_test_and_assert) - - def test_eval_generator_with_sample_weight(self): - - def _test_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_1': self.sample_weight_1, - 'output_2': self.sample_weight_2, - }) - eval_result = model.evaluate_generator( - self.custom_generator_multi_io_temporal( - sample_weights=[self.sample_weight_1, self.sample_weight_2]), - steps=2) - self.assertAllClose(eval_result, self.expected_batch_result_with_weights, - 1e-3) - - run_with_different_sample_weight_mode_inputs( - _test_and_assert, partial_sw=False) - - def test_eval_generator_with_partial_sample_weight(self): - - def _test_and_assert(model): - model.train_on_batch([self.x, self.x], [self.y1, self.y2], - sample_weight={ - 'output_2': self.sample_weight_2, - }) - eval_result = model.evaluate_generator( - self.custom_generator_multi_io_temporal( - sample_weights={'output_2': self.sample_weight_2}), - steps=2) - self.assertAllClose(eval_result, - self.expected_batch_result_with_weights_output_2, - 1e-3) - - run_with_different_sample_weight_mode_inputs(_test_and_assert) - - def test_error_on_fit_with_class_weight(self): - - def _train_and_assert(model): - with self.assertRaises(ValueError): - model.fit([self.x, self.x], [self.y1, self.y2], - class_weight={'output_1': { - .5: .5, - 2.: .5, - 3.5: .5 - }}, - batch_size=3, - epochs=2, - shuffle=False) - - run_with_different_sample_weight_mode_inputs(_train_and_assert) - - -if __name__ == '__main__': - tf.test.main() +@test_combinations.run_with_all_model_types(exclude_models=["sequential"]) +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class TestMetricsCorrectnessMultiIOTemporal(test_combinations.TestCase): + def custom_generator_multi_io_temporal(self, sample_weights=None): + """Generator for getting data for temporal multi io model. + + Args: + sample_weights: List of sample_weights. + + Yields: + Tuple of inputs, label, sample weights data. + """ + batch_size = 3 + num_samples = 3 + iteration = 0 + while True: + batch_index = iteration * batch_size % num_samples + iteration += 1 + start = batch_index + end = start + batch_size + x = [self.x[start:end], self.x[start:end]] + y = [self.y1[start:end], self.y2[start:end]] + if sample_weights: + sw = tf.nest.map_structure( + lambda w: w[start:end], sample_weights + ) + else: + sw = None + yield x, y, sw + + def setUp(self): + super(TestMetricsCorrectnessMultiIOTemporal, self).setUp() + + self.x = np.asarray([[0.0], [1.0], [2.0]]) + self.y1 = np.asarray([[[0.5], [1.0]], [[2.0], [2.5]], [[3.5], [2.5]]]) + self.y2 = np.asarray([[[0.5], [1.5]], [[2.0], [1.5]], [[3.5], [3.0]]]) + + # Without weights: + # Epoch 1 - bias = 0 + # y_pred_1 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] + # y_pred_2 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] + # mae (y1 - y_pred_1) = [[[.5], [1.]], [[1.], [1.5]], [[1.5], [.5]]] + # mae = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 + # mae_2 (y2 - y_pred_2) = [[[.5], [1.5]], [[1.], [.5]], [[1.5], [1.]]] + # mae_2 = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 + + # Epoch 2 - bias = 0.1 (2/2 * 0.1) + # y_pred_1 = [[[.1], [.1]], [[1.1], [1.1]], [[2.1], [2.1]]] + # y_pred_2 = [[[.1], [.1]], [[1.1], [1.1]], [[2.1], [2.1]]] + # mae (y1 - y_pred_1) = [[[.4], [.9]], [[.9], [1.4]], [[1.4], [.4]]] + # mae = [[2.7/3, 2.7/3]] = [[0.9, 0.9]] = 1.8/2 = 0.9 + # mae_2 (y2 - y_pred_2) = [[[.4], [1.4]], [[.9], [.4]], [[1.4], [.9]]] + # mae_2 = [[2.7/3, 2.7/3]] = [[0.9, 0.9]] = 1.8/2 = + # 0.9 + + self.expected_fit_result = { + "output_1_mae": [1, 0.9], + "output_2_mae": [1, 0.9], + "output_1_mae_2": [1, 0.9], + "output_2_mae_2": [1, 0.9], + "loss": [2.0, 1.8], + "output_1_loss": [1, 0.9], + "output_2_loss": [1, 0.9], + } + + self.sample_weight_1 = np.asarray([[0.5, 2.0], [0.5, 2.0], [0.5, 2.0]]) + self.sample_weight_2 = np.asarray([[2.0, 0.5], [2.0, 0.5], [2.0, 0.5]]) + + # With weights: + # Epoch 1 + # y_pred_1 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] + # y_pred_2 = [[[0.], [0.]], [[1.], [1.]], [[2.], [2.]]] + # mae (y1 - y_pred_1) = [[[.5], [1.]], [[1.], [1.5]], [[1.5], [.5]]] + # with weights = [[[.5 * .5], [1 * 2]], + # [[1 * .5], [1.5 * 2]], + # [[1.5 * .5], [.5 * 2]]] + # mae (w/o weights) = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 + # mae (weighted mean) = [[1.5/1.5, 6/6]] = [[1, 1]] = 2/2 = 1 + # mae (sum over bs) = [[1.5/3, 6/3]] = [[.5, 2]] = 2.5/2 = 1.25 + + # mae_2 (y2 - y_pred_2) = [[[.5], [1.5]], [[1.], [.5]], [[1.5], [1.]]] + # with weights = [[[.5 * 2], [1.5 * .5]], + # [[1. * 2], [.5 * .5]], + # [[1.5 * 2], [1. * .5]]] + # mae_2 (w/o weights) = [[3/3, 3/3]] = [[1, 1]] = 2/2 = 1 + # mae_2 (weighted mean) = [[6/6, 1.5/1.5]] = [[1, 1]] = 2/2 = 1 + # mae_2 (sum over bs) = [[6/3, 1.5/3]] = [[2, .5]] = 2.5/2 = 1.25 + + # Epoch 2 - bias = 0.125 (2.5/2 * 0.1) + # y_pred_1 = [[[0.125], [0.125]], [[1.125], [1.125]], [[2.125], + # [2.125]]] + # y_pred_2 = [[[0.125], [0.125]], [[1.125], [1.125]], [[2.125], + # [2.125]]] + + # mae (y1 - y_pred_1) = [[[.375], [.875]], + # [[.875], [1.375]], + # [[1.375], [.375]]] + # with weights = [[[.375 * .5], [.875 * 2.]], + # [[.875 * .5], [1.375 * 2.]], + # [[1.375 * .5], [.375 * 2.]]] + # mae (w/o weights) = [[2.625/3, 2.625/3]] = (.875+.875)/2 = .875 + # mae (weighted mean) = [[1.3125/1.5, 5.25/6]] = (.875+.875)/2 = .875 + # mae (sum over bs) = [[1.3125/3, 5.25/3]] = (0.4375+1.75)/2 = + # 1.09375 + + # mae_2 (y2 - y_pred_2) = [[[.375], [1.375]], + # [[.875], [.375]], + # [[1.375], [.875]]] + # with weights = [[[.375 * 2.], [1.375 * .5]], + # [[.875 * 2.], [.375 * .5]], + # [[1.375 * 2.], [.875 * .5]]] + # mae_2 (w/o weights) = [[2.625/3, 2.625/3]] = (.875+.875)/2 = .875 + # mae_2 (weighted mean) = [[5.25/6, 1.3125/1.5]] = (.875+.875)/2 = + # .875 + # mae_2 (sum over bs) = [[5.25/3, 1.3125/3]] = (1.75+0.4375)/2 = + # 1.09375 + + self.expected_fit_result_with_weights = { + "output_1_mae": [1, 0.875], + "output_2_mae": [1, 0.875], + "output_1_mae_2": [1, 0.875], + "output_2_mae_2": [1, 0.875], + "loss": [2.5, 2.1875], + "output_1_loss": [1.25, 1.09375], + "output_2_loss": [1.25, 1.09375], + } + + self.expected_fit_result_with_weights_output_2 = { + "output_1_mae": [1.0, 0.9], + "output_2_mae": [1, 0.875], + "output_1_mae_2": [1.0, 0.9], + "output_2_mae_2": [1.0, 0.875], + "loss": [2.25, 1.99375], + "output_1_loss": [1.0, 0.9], + "output_2_loss": [1.25, 1.09375], + } + + # In the order: 'loss', 'output_1_loss', 'output_2_loss', + # 'output_1_mae', 'output_1_mae_2', + # 'output_2_mae', 'output_2_mae_2' + self.expected_batch_result_with_weights = [ + 2.1875, + 1.09375, + 1.09375, + 0.875, + 0.875, + 0.875, + 0.875, + ] + self.expected_batch_result_with_weights_output_2 = [ + 1.99375, + 0.9, + 1.09375, + 0.9, + 0.9, + 0.875, + 0.875, + ] + self.expected_batch_result = [1.8, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9] + + def test_fit(self): + def _train_and_assert(model): + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + batch_size=3, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + def test_fit_with_sample_weight(self): + def _train_and_assert(model): + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + batch_size=3, + epochs=2, + shuffle=False, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + run_with_different_sample_weight_mode_inputs( + _train_and_assert, partial_sw=False + ) + + def test_fit_with_partial_sample_weight(self): + def _train_and_assert(model): + history = model.fit( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + batch_size=3, + epochs=2, + shuffle=False, + ) + for ( + key, + value, + ) in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + def test_eval(self): + def _eval_and_assert(model): + model.train_on_batch([self.x, self.x], [self.y1, self.y2]) + eval_result = model.evaluate( + [self.x, self.x], [self.y1, self.y2], batch_size=3 + ) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + run_with_different_sample_weight_mode_inputs(_eval_and_assert) + + def test_eval_with_sample_weight(self): + def _eval_and_assert(model): + model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + eval_result = model.evaluate( + [self.x, self.x], + [self.y1, self.y2], + batch_size=3, + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights, 1e-3 + ) + + run_with_different_sample_weight_mode_inputs( + _eval_and_assert, partial_sw=False + ) + + def test_eval_with_partial_sample_weight(self): + def _eval_and_assert(model): + model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + eval_result = model.evaluate( + [self.x, self.x], + [self.y1, self.y2], + batch_size=3, + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + eval_result, + self.expected_batch_result_with_weights_output_2, + 1e-3, + ) + + run_with_different_sample_weight_mode_inputs(_eval_and_assert) + + def test_train_on_batch(self): + def _train_and_assert(model): + for _ in range(2): + result = model.train_on_batch( + [self.x, self.x], [self.y1, self.y2] + ) + self.assertAllClose(result, self.expected_batch_result, 1e-3) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + def test_train_on_batch_with_sample_weight(self): + def _train_and_assert(model): + for _ in range(2): + result = model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + run_with_different_sample_weight_mode_inputs( + _train_and_assert, partial_sw=False + ) + + def test_train_on_batch_with_partial_sample_weight(self): + def _train_and_assert(model): + for _ in range(2): + result = model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights_output_2, 1e-3 + ) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + def test_test_on_batch(self): + def _test_and_assert(model): + model.train_on_batch([self.x, self.x], [self.y1, self.y2]) + result = model.test_on_batch([self.x, self.x], [self.y1, self.y2]) + self.assertAllClose(result, self.expected_batch_result, 1e-3) + + run_with_different_sample_weight_mode_inputs(_test_and_assert) + + def test_test_on_batch_with_sample_weight(self): + def _test_and_assert(model): + model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + result = model.test_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights, 1e-3 + ) + + run_with_different_sample_weight_mode_inputs( + _test_and_assert, partial_sw=False + ) + + def test_test_on_batch_with_partial_sample_weight(self): + def _test_and_assert(model): + model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + result = model.test_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + self.assertAllClose( + result, self.expected_batch_result_with_weights_output_2, 1e-3 + ) + + run_with_different_sample_weight_mode_inputs(_test_and_assert) + + def test_fit_generator(self): + def _train_and_assert(model): + history = model.fit_generator( + self.custom_generator_multi_io_temporal(), + steps_per_epoch=1, + epochs=2, + ) + for key, value in self.expected_fit_result.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + def test_fit_generator_with_sample_weight(self): + def _train_and_assert(model): + history = model.fit_generator( + self.custom_generator_multi_io_temporal( + sample_weights=[self.sample_weight_1, self.sample_weight_2] + ), + steps_per_epoch=1, + epochs=2, + ) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + run_with_different_sample_weight_mode_inputs( + _train_and_assert, partial_sw=False + ) + + def test_fit_generator_with_partial_sample_weight(self): + def _train_and_assert(model): + history = model.fit_generator( + self.custom_generator_multi_io_temporal( + sample_weights={"output_2": self.sample_weight_2} + ), + steps_per_epoch=1, + epochs=2, + ) + for ( + key, + value, + ) in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + def test_eval_generator(self): + def _test_and_assert(model): + model.train_on_batch([self.x, self.x], [self.y1, self.y2]) + eval_result = model.evaluate_generator( + self.custom_generator_multi_io_temporal(), steps=1 + ) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + run_with_different_sample_weight_mode_inputs(_test_and_assert) + + def test_eval_generator_with_sample_weight(self): + def _test_and_assert(model): + model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_1": self.sample_weight_1, + "output_2": self.sample_weight_2, + }, + ) + eval_result = model.evaluate_generator( + self.custom_generator_multi_io_temporal( + sample_weights=[self.sample_weight_1, self.sample_weight_2] + ), + steps=2, + ) + self.assertAllClose( + eval_result, self.expected_batch_result_with_weights, 1e-3 + ) + + run_with_different_sample_weight_mode_inputs( + _test_and_assert, partial_sw=False + ) + + def test_eval_generator_with_partial_sample_weight(self): + def _test_and_assert(model): + model.train_on_batch( + [self.x, self.x], + [self.y1, self.y2], + sample_weight={ + "output_2": self.sample_weight_2, + }, + ) + eval_result = model.evaluate_generator( + self.custom_generator_multi_io_temporal( + sample_weights={"output_2": self.sample_weight_2} + ), + steps=2, + ) + self.assertAllClose( + eval_result, + self.expected_batch_result_with_weights_output_2, + 1e-3, + ) + + run_with_different_sample_weight_mode_inputs(_test_and_assert) + + def test_error_on_fit_with_class_weight(self): + def _train_and_assert(model): + with self.assertRaises(ValueError): + model.fit( + [self.x, self.x], + [self.y1, self.y2], + class_weight={"output_1": {0.5: 0.5, 2.0: 0.5, 3.5: 0.5}}, + batch_size=3, + epochs=2, + shuffle=False, + ) + + run_with_different_sample_weight_mode_inputs(_train_and_assert) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/tests/tracking_test.py b/keras/tests/tracking_test.py index de6b8ba56512..c8c639dcd360 100644 --- a/keras/tests/tracking_test.py +++ b/keras/tests/tracking_test.py @@ -15,586 +15,625 @@ import os +import numpy import tensorflow.compat.v2 as tf - from absl.testing import parameterized -import numpy -from keras.testing_infra import test_combinations + from keras.engine import sequential from keras.engine import training from keras.layers import core from keras.layers.normalization import batch_normalization_v1 -from tensorflow.python.training.tracking import data_structures -from tensorflow.python.training.tracking import util - - -class HasList(training.Model): - - def __init__(self): - super().__init__() - self.layer_list = tf.__internal__.tracking.wrap([core.Dense(3)]) - self.layer_list.append(core.Dense(4)) - self.layer_list.extend( - [core.Dense(5), - core.Dense(6, kernel_regularizer=tf.reduce_sum)]) - self.layer_list += [ - core.Dense(7, bias_regularizer=tf.reduce_sum), - core.Dense(8) - ] - self.layer_list += ( - tf.__internal__.tracking.wrap([core.Dense(9)]) + - tf.__internal__.tracking.wrap([core.Dense(10)])) - self.layer_list.extend( - tf.__internal__.tracking.wrap( - list([core.Dense(11)]) + [core.Dense(12)])) - self.layers_with_updates = tf.__internal__.tracking.wrap( - [batch_normalization_v1.BatchNormalization()]) - - def call(self, x): - aggregation = 0. - for l in self.layer_list: - x = l(x) - aggregation += tf.reduce_sum(x) - bn, = self.layers_with_updates - return bn(x) / aggregation - - -class ListTests(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testTracking(self): - with self.test_session(): - model = HasList() - output = model(tf.ones([32, 2])) - self.assertAllEqual([32, 12], output.shape) - self.assertEqual(11, len(model.layers)) - self.assertEqual(10, len(model.layer_list.layers)) - self.assertEqual( - len(model.layers), - len(model.layer_list.layers + model.layers_with_updates)) - for index in range(10): - self.assertEqual(3 + index, model.layer_list.layers[index].units) - children = model._trackable_children() - self.assertLen(children, 2) - self.assertIs(model.layer_list, children["layer_list"]) - self.assertIs(model.layers_with_updates, - children["layers_with_updates"]) - self.assertLen( - children["layer_list"]._trackable_children(), 10) - self.evaluate([v.initializer for v in model.variables]) - self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) - model.load_weights(save_path) - self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], - self.evaluate(model.variables[0])) - v = tf.Variable(1.) - model.var_list = [v] - self.assertTrue(any(v is t for t in model.variables)) - self.assertTrue(any(v is t for t in model.trainable_variables)) - self.assertFalse(any(v is t for t in model.non_trainable_variables)) - self.assertTrue(any(model.layer_list[0].trainable_weights[0] - is t for t in model.trainable_weights)) - - def testSubModelTracking(self): - model = training.Model() - model.v = tf.Variable(1.) - self.assertIn(model.v, model.trainable_weights) - model2 = training.Model() - model2.m = [model] - self.assertIn(model.v, model2.trainable_weights) - - def testSubSequentialTracking(self): - - class _Subclassed(training.Model): - - def __init__(self, wrapped): - super().__init__() - self._wrapped = wrapped - - def call(self, x): - return self._wrapped(x) - - model = sequential.Sequential() - layer = core.Dense(1) - model.add(layer) - model2 = _Subclassed(model) - model2(tf.ones([1, 2])) - model2.m = [model] - self.assertIn(layer.kernel, model2.trainable_weights) - - def testLayerTrackedThroughSequential(self): - class AttrDict(dict): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.__dict__ = self - - def ffnet(layer_sizes, name): - ff = sequential.Sequential(name=name) - for i, width in enumerate(layer_sizes): - ff.add(core.Dense( - width, - activation=("relu" if i < len(layer_sizes)-1 else None))) - return ff - - class MyModel2(training.Model): - - def __init__(self, config, name="my_model_2"): - super().__init__(name=name) - self._num_tokens = config.num_tokens - - # list of sub-models - self._ffnet = [ffnet(config.module_layers + (self._num_tokens,), "ff")] - - def null_input(self): - return tf.zeros([1, self._num_tokens], dtype=tf.float32) - - def call(self, input_, module_index=None): - return self._ffnet[0](input_) - - m2 = MyModel2(AttrDict( - num_tokens=5, - module_layers=(50, 30))) - - # Construct - m2(m2.null_input()) - self.assertLen(m2.trainable_variables, 6) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testUpdatesForwarded(self): - model = HasList() - model_input = tf.ones([32, 2]) - model(model_input) - if tf.executing_eagerly(): - self.assertEqual(0, len(model.updates)) - else: - self.assertGreater(len(model.layers_with_updates[0].updates), 0) - self.assertEqual(set(model.layers_with_updates[0].updates), - set(model.updates)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testLossesForwarded(self): - model = HasList() - model_input = tf.ones([32, 2]) - model(model_input) - self.assertEqual(2, len(model.losses)) - - def testModelContainersCompareEqual(self): - class HasEqualContainers(training.Model): - - def __init__(self): - super().__init__() - self.l1 = [] - self.l2 = [] - - model = HasEqualContainers() - first_layer = HasEqualContainers() - model.l1.append(first_layer) - second_layer = HasEqualContainers() - model.l2.append(second_layer) - self.assertEqual([first_layer, second_layer], model.layers) +from keras.testing_infra import test_combinations - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testTensorConversion(self): +# isort: off +from tensorflow.python.trackable import data_structures +from tensorflow.python.checkpoint import checkpoint as util - class ListToTensor(training.Model): - def __init__(self): +class HasList(training.Model): + def __init__(self): super().__init__() - self.l = [1., 2., 3.] + self.layer_list = tf.__internal__.tracking.wrap([core.Dense(3)]) + self.layer_list.append(core.Dense(4)) + self.layer_list.extend( + [core.Dense(5), core.Dense(6, kernel_regularizer=tf.reduce_sum)] + ) + self.layer_list += [ + core.Dense(7, bias_regularizer=tf.reduce_sum), + core.Dense(8), + ] + self.layer_list += tf.__internal__.tracking.wrap( + [core.Dense(9)] + ) + tf.__internal__.tracking.wrap([core.Dense(10)]) + self.layer_list.extend( + tf.__internal__.tracking.wrap( + list([core.Dense(11)]) + [core.Dense(12)] + ) + ) + self.layers_with_updates = tf.__internal__.tracking.wrap( + [batch_normalization_v1.BatchNormalization()] + ) + + def call(self, x): + aggregation = 0.0 + for l in self.layer_list: + x = l(x) + aggregation += tf.reduce_sum(x) + (bn,) = self.layers_with_updates + return bn(x) / aggregation - self.assertAllEqual( - [1., 2., 3.], - self.evaluate(tf.constant(ListToTensor().l))) - self.assertAllEqual( - [1., 2., 3.], - self.evaluate(tf.raw_ops.Pack(values=ListToTensor().l))) +class ListTests(test_combinations.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTracking(self): + with self.test_session(): + model = HasList() + output = model(tf.ones([32, 2])) + self.assertAllEqual([32, 12], output.shape) + self.assertEqual(11, len(model.layers)) + self.assertEqual(10, len(model.layer_list.layers)) + self.assertEqual( + len(model.layers), + len(model.layer_list.layers + model.layers_with_updates), + ) + for index in range(10): + self.assertEqual( + 3 + index, model.layer_list.layers[index].units + ) + children = model._trackable_children() + self.assertLen(children, 2) + self.assertIs(model.layer_list, children["layer_list"]) + self.assertIs( + model.layers_with_updates, children["layers_with_updates"] + ) + self.assertLen(children["layer_list"]._trackable_children(), 10) + self.evaluate([v.initializer for v in model.variables]) + self.evaluate( + model.variables[0].assign([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + ) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) + model.load_weights(save_path) + self.assertAllEqual( + [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], + self.evaluate(model.variables[0]), + ) + v = tf.Variable(1.0) + model.var_list = [v] + self.assertTrue(any(v is t for t in model.variables)) + self.assertTrue(any(v is t for t in model.trainable_variables)) + self.assertFalse(any(v is t for t in model.non_trainable_variables)) + self.assertTrue( + any( + model.layer_list[0].trainable_weights[0] is t + for t in model.trainable_weights + ) + ) + + def testSubModelTracking(self): + model = training.Model() + model.v = tf.Variable(1.0) + self.assertIn(model.v, model.trainable_weights) + model2 = training.Model() + model2.m = [model] + self.assertIn(model.v, model2.trainable_weights) + + def testSubSequentialTracking(self): + class _Subclassed(training.Model): + def __init__(self, wrapped): + super().__init__() + self._wrapped = wrapped + + def call(self, x): + return self._wrapped(x) + + model = sequential.Sequential() + layer = core.Dense(1) + model.add(layer) + model2 = _Subclassed(model) + model2(tf.ones([1, 2])) + model2.m = [model] + self.assertIn(layer.kernel, model2.trainable_weights) + + def testLayerTrackedThroughSequential(self): + class AttrDict(dict): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__dict__ = self + + def ffnet(layer_sizes, name): + ff = sequential.Sequential(name=name) + for i, width in enumerate(layer_sizes): + ff.add( + core.Dense( + width, + activation=( + "relu" if i < len(layer_sizes) - 1 else None + ), + ) + ) + return ff + + class MyModel2(training.Model): + def __init__(self, config, name="my_model_2"): + super().__init__(name=name) + self._num_tokens = config.num_tokens + + # list of sub-models + self._ffnet = [ + ffnet(config.module_layers + (self._num_tokens,), "ff") + ] + + def null_input(self): + return tf.zeros([1, self._num_tokens], dtype=tf.float32) + + def call(self, input_, module_index=None): + return self._ffnet[0](input_) + + m2 = MyModel2(AttrDict(num_tokens=5, module_layers=(50, 30))) + + # Construct + m2(m2.null_input()) + self.assertLen(m2.trainable_variables, 6) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testUpdatesForwarded(self): + model = HasList() + model_input = tf.ones([32, 2]) + model(model_input) + if tf.executing_eagerly(): + self.assertEqual(0, len(model.updates)) + else: + self.assertGreater(len(model.layers_with_updates[0].updates), 0) + self.assertEqual( + set(model.layers_with_updates[0].updates), set(model.updates) + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testLossesForwarded(self): + model = HasList() + model_input = tf.ones([32, 2]) + model(model_input) + self.assertEqual(2, len(model.losses)) + + def testModelContainersCompareEqual(self): + class HasEqualContainers(training.Model): + def __init__(self): + super().__init__() + self.l1 = [] + self.l2 = [] + + model = HasEqualContainers() + first_layer = HasEqualContainers() + model.l1.append(first_layer) + second_layer = HasEqualContainers() + model.l2.append(second_layer) + self.assertEqual([first_layer, second_layer], model.layers) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTensorConversion(self): + class ListToTensor(training.Model): + def __init__(self): + super().__init__() + self.l = [1.0, 2.0, 3.0] + + self.assertAllEqual( + [1.0, 2.0, 3.0], self.evaluate(tf.constant(ListToTensor().l)) + ) + + self.assertAllEqual( + [1.0, 2.0, 3.0], + self.evaluate(tf.raw_ops.Pack(values=ListToTensor().l)), + ) class ListWrapperTest(tf.test.TestCase): - - def testLayerCollectionWithExternalMutation(self): - l = [] - l_wrapper = tf.__internal__.tracking.wrap(l) - layer = core.Dense(1) - l.append(layer) - self.assertEqual([layer], l_wrapper.layers) + def testLayerCollectionWithExternalMutation(self): + l = [] + l_wrapper = tf.__internal__.tracking.wrap(l) + layer = core.Dense(1) + l.append(layer) + self.assertEqual([layer], l_wrapper.layers) class HasMapping(training.Model): - - def __init__(self): - super().__init__() - self.layer_dict = tf.__internal__.tracking.wrap(dict(output=core.Dense(7))) - self.layer_dict["norm"] = tf.__internal__.tracking.wrap([]) - self.layer_dict["dense"] = tf.__internal__.tracking.wrap([]) - self.layer_dict["dense"].extend( - [core.Dense(5), - core.Dense(6, kernel_regularizer=tf.reduce_sum)]) - self.layer_dict["norm"].append( - batch_normalization_v1.BatchNormalization()) - self.layer_dict["norm"].append( - batch_normalization_v1.BatchNormalization()) - - def call(self, x): - aggregation = 0. - for norm, dense in zip(self.layer_dict["norm"], self.layer_dict["dense"]): - x = norm(dense(x)) - aggregation += tf.reduce_sum(x) - return self.layer_dict["output"](x) / aggregation + def __init__(self): + super().__init__() + self.layer_dict = tf.__internal__.tracking.wrap( + dict(output=core.Dense(7)) + ) + self.layer_dict["norm"] = tf.__internal__.tracking.wrap([]) + self.layer_dict["dense"] = tf.__internal__.tracking.wrap([]) + self.layer_dict["dense"].extend( + [core.Dense(5), core.Dense(6, kernel_regularizer=tf.reduce_sum)] + ) + self.layer_dict["norm"].append( + batch_normalization_v1.BatchNormalization() + ) + self.layer_dict["norm"].append( + batch_normalization_v1.BatchNormalization() + ) + + def call(self, x): + aggregation = 0.0 + for norm, dense in zip( + self.layer_dict["norm"], self.layer_dict["dense"] + ): + x = norm(dense(x)) + aggregation += tf.reduce_sum(x) + return self.layer_dict["output"](x) / aggregation class MappingTests(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testTracking(self): - with self.test_session(): - model = HasMapping() - output = model(tf.ones([32, 2])) - self.assertAllEqual([32, 7], output.shape.as_list()) - self.assertEqual(5, len(model.layers)) - self.assertEqual(len(model.layers), len(model.layer_dict.layers)) - self.assertLen(model._trackable_children(), 1) - self.assertIs(model.layer_dict, model._trackable_children()["layer_dict"]) - self.evaluate([v.initializer for v in model.variables]) - test_var = model.layer_dict["output"].kernel - self.evaluate(test_var.assign(tf.ones([6, 7]))) - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - self.evaluate(test_var.assign(tf.zeros([6, 7]))) - model.load_weights(save_path) - self.assertAllEqual(numpy.ones([6, 7]), - self.evaluate(test_var)) - - def testLayerCollectionWithExternalMutation(self): - d = {} - root = tf.Module() - root.wrapper = d - self.assertEqual([], root.wrapper.layers) - self.assertEqual([], root.wrapper.trainable_weights) - layer1 = core.Dense(1) - layer2 = core.Dense(1) - d["a"] = layer1 - d["b"] = layer2 - self.assertEqual([layer1, layer2], root.wrapper.layers) - # The layers have still not created variables - self.assertEqual([], root.wrapper.trainable_weights) - - def testDictWrapperBadKeys(self): - a = tf.Module() - a.d = {} - a.d[1] = tf.__internal__.tracking.wrap([]) - model = training.Model() - model.sub = a - save_path = os.path.join(self.get_temp_dir(), "ckpt") - with self.assertRaisesRegex(ValueError, "non-string key"): - model.save_weights(save_path) - - def testDictWrapperNoDependency(self): - a = tf.Module() - a.d = data_structures.NoDependency({}) - a.d[1] = [3] - self.assertEqual([a], util.list_objects(a)) - model = training.Model() - model.sub = a - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - model.load_weights(save_path) - - def testNonStringKeyNotTrackableValue(self): - a = tf.Module() - a.d = {} - a.d["a"] = [3] - a.d[1] = data_structures.NoDependency([3]) - self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) - model = training.Model() - model.sub = a - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - model.load_weights(save_path) - - def testNonAppendNotTrackable(self): - # Non-append mutations (deleting or overwriting values) are OK when the - # values aren't tracked. - a = tf.Module() - a.d = {} - a.d["a"] = [3] - a.d[1] = 3 - a.d[1] = 2 - self.assertEqual(2, a.d[1]) - del a.d[1] - a.d[2] = data_structures.NoDependency(tf.Module()) - second = tf.Module() - a.d[2] = data_structures.NoDependency(second) - self.assertIs(second, a.d[2]) - self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) - model = training.Model() - model.sub = a - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - model.load_weights(save_path) - - def testPopNoSave(self): - model = training.Model() - model.d = {} - model.d["a"] = [] - model.d.pop("a") - save_path = os.path.join(self.get_temp_dir(), "ckpt") - with self.assertRaisesRegex(ValueError, "Unable to save"): - model.save_weights(save_path) - - def testExternalModificationNoSave(self): - model = training.Model() - external_reference = {} - model.d = external_reference - external_reference["a"] = [] - save_path = os.path.join(self.get_temp_dir(), "ckpt") - with self.assertRaisesRegex(ValueError, "modified outside the wrapper"): - model.save_weights(save_path) - - def testOverwriteCanStillSave(self): - model = training.Model() - model.d = {} - model.d["a"] = {} - model.d["a"] = {} - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - - def testIter(self): - model = training.Model() - model.d = {1: 3} - model.d[1] = 3 - self.assertEqual([1], list(model.d)) - new_dict = {} - # This update() is super tricky. If the dict wrapper subclasses dict, - # CPython will access its storage directly instead of calling any - # methods/properties on the object. So the options are either not to - # subclass dict (in which case update will call normal iter methods, but the - # object won't pass isinstance checks) or to subclass dict and keep that - # storage updated (no shadowing all its methods like ListWrapper). - new_dict.update(model.d) - self.assertEqual({1: 3}, new_dict) + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTracking(self): + with self.test_session(): + model = HasMapping() + output = model(tf.ones([32, 2])) + self.assertAllEqual([32, 7], output.shape.as_list()) + self.assertEqual(5, len(model.layers)) + self.assertEqual(len(model.layers), len(model.layer_dict.layers)) + self.assertLen(model._trackable_children(), 1) + self.assertIs( + model.layer_dict, model._trackable_children()["layer_dict"] + ) + self.evaluate([v.initializer for v in model.variables]) + test_var = model.layer_dict["output"].kernel + self.evaluate(test_var.assign(tf.ones([6, 7]))) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(test_var.assign(tf.zeros([6, 7]))) + model.load_weights(save_path) + self.assertAllEqual(numpy.ones([6, 7]), self.evaluate(test_var)) + + def testLayerCollectionWithExternalMutation(self): + d = {} + root = tf.Module() + root.wrapper = d + self.assertEqual([], root.wrapper.layers) + self.assertEqual([], root.wrapper.trainable_weights) + layer1 = core.Dense(1) + layer2 = core.Dense(1) + d["a"] = layer1 + d["b"] = layer2 + self.assertEqual([layer1, layer2], root.wrapper.layers) + # The layers have still not created variables + self.assertEqual([], root.wrapper.trainable_weights) + + def testDictWrapperBadKeys(self): + a = tf.Module() + a.d = {} + a.d[1] = tf.__internal__.tracking.wrap([]) + model = training.Model() + model.sub = a + save_path = os.path.join(self.get_temp_dir(), "ckpt") + with self.assertRaisesRegex(ValueError, "non-string key"): + model.save_weights(save_path) + + def testDictWrapperNoDependency(self): + a = tf.Module() + a.d = data_structures.NoDependency({}) + a.d[1] = [3] + self.assertEqual([a], util.list_objects(a)) + model = training.Model() + model.sub = a + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + model.load_weights(save_path) + + def testNonStringKeyNotTrackableValue(self): + a = tf.Module() + a.d = {} + a.d["a"] = [3] + a.d[1] = data_structures.NoDependency([3]) + self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) + model = training.Model() + model.sub = a + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + model.load_weights(save_path) + + def testNonAppendNotTrackable(self): + # Non-append mutations (deleting or overwriting values) are OK when the + # values aren't tracked. + a = tf.Module() + a.d = {} + a.d["a"] = [3] + a.d[1] = 3 + a.d[1] = 2 + self.assertEqual(2, a.d[1]) + del a.d[1] + a.d[2] = data_structures.NoDependency(tf.Module()) + second = tf.Module() + a.d[2] = data_structures.NoDependency(second) + self.assertIs(second, a.d[2]) + self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) + model = training.Model() + model.sub = a + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + model.load_weights(save_path) + + def testPopNoSave(self): + model = training.Model() + model.d = {} + model.d["a"] = [] + model.d.pop("a") + save_path = os.path.join(self.get_temp_dir(), "ckpt") + with self.assertRaisesRegex(ValueError, "Unable to save"): + model.save_weights(save_path) + + def testExternalModificationNoSave(self): + model = training.Model() + external_reference = {} + model.d = external_reference + external_reference["a"] = [] + save_path = os.path.join(self.get_temp_dir(), "ckpt") + with self.assertRaisesRegex(ValueError, "modified outside the wrapper"): + model.save_weights(save_path) + + def testOverwriteCanStillSave(self): + model = training.Model() + model.d = {} + model.d["a"] = {} + model.d["a"] = {} + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + + def testIter(self): + model = training.Model() + model.d = {1: 3} + model.d[1] = 3 + self.assertEqual([1], list(model.d)) + new_dict = {} + # This update() is super tricky. If the dict wrapper subclasses dict, + # CPython will access its storage directly instead of calling any + # methods/properties on the object. So the options are either not to + # subclass dict (in which case update will call normal iter methods, but + # the object won't pass isinstance checks) or to subclass dict and keep + # that storage updated (no shadowing all its methods like ListWrapper). + new_dict.update(model.d) + self.assertEqual({1: 3}, new_dict) class HasTuple(training.Model): - - def __init__(self): - super().__init__() - self.layer_list = ( - core.Dense(3), core.Dense(4), - core.Dense(5, kernel_regularizer=tf.reduce_sum)) - self.layers_with_updates = (batch_normalization_v1.BatchNormalization(),) - - def call(self, x): - aggregation = 0. - for l in self.layer_list: - x = l(x) - aggregation += tf.reduce_sum(x) - bn, = self.layers_with_updates - return bn(x) / aggregation - - -class TupleTests(test_combinations.TestCase): - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testTracking(self): - with self.test_session(): - model = HasTuple() - output = model(tf.ones([32, 2])) - self.assertAllEqual([32, 5], output.shape.as_list()) - self.assertLen(model.layers, 4) - self.assertLen(model.layer_list.layers, 3) - self.assertEqual( - len(model.layers), - len(tuple(model.layer_list.layers) + model.layers_with_updates)) - self.assertEqual(3, model.layer_list.layers[0].units) - self.assertEqual(4, model.layer_list.layers[1].units) - self.assertEqual(5, model.layer_list.layers[2].units) - self.assertLen(model._trackable_children(), 2) - self.assertIs(model.layer_list, model._trackable_children()["layer_list"]) - self.assertIs(model.layers_with_updates, - model._trackable_children()["layers_with_updates"]) - self.assertLen(model.layer_list._trackable_children(), 3) - self.evaluate([v.initializer for v in model.variables]) - self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) - model.load_weights(save_path) - self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], - self.evaluate(model.variables[0])) - v = tf.Variable(1.) - model.var_list = (v,) - self.assertIn(id(v), [id(obj) for obj in model.variables]) - self.assertIn(id(v), [id(obj) for obj in model.trainable_variables]) - self.assertNotIn(id(v), - [id(obj) for obj in model.non_trainable_variables]) - self.assertIn(id(model.layer_list[0].trainable_weights[0]), - [id(obj) for obj in model.trainable_weights]) - - @parameterized.named_parameters( - ("Module", tf.Module), - ("Model", training.Model), - ) - def testSubModelTracking(self, module_subclass): - model = module_subclass() - model.v = tf.Variable(1.) - self.assertIn(model.v, model.trainable_variables) - model2 = module_subclass() - model2.m = (model,) - self.assertIn(model.v, model2.trainable_variables) - - def testSubSequentialTracking(self): - - class _Subclassed(training.Model): - - def __init__(self, wrapped): - super().__init__() - self._wrapped = wrapped - - def call(self, x): - return self._wrapped(x) - - model = sequential.Sequential() - layer = core.Dense(1) - model.add(layer) - model2 = _Subclassed(model) - model2(tf.ones([1, 2])) - model2.m = (model,) - self.assertIn(layer.kernel, model2.trainable_weights) - - def testUpdatesForwarded(self): - with tf.Graph().as_default(): - model = HasTuple() - model_input = tf.ones([32, 2]) - model(model_input) - self.assertNotEmpty(model.layers_with_updates[0].updates) - self.assertEqual(set(model.layers_with_updates[0].updates), - set(model.updates)) - - model = HasTuple() - model_input = tf.ones([32, 2]) - model(model_input) - self.assertEmpty(model.updates) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testLossesForwarded(self): - model = HasTuple() - model_input = tf.ones([32, 2]) - model(model_input) - self.assertLen(model.losses, 1) - - def testModelContainersCompareEqual(self): - class HasEqualContainers(training.Model): - - def __init__(self): + def __init__(self): super().__init__() - self.l1 = () - self.l2 = () - - model = HasEqualContainers() - first_layer = HasEqualContainers() - model.l1 = (first_layer,) - second_layer = HasEqualContainers() - model.l2 = (second_layer,) - self.assertEqual((first_layer,), model.l1) - d = {model.l1: 1, model.l2: 2} - self.assertEqual(1, d[model.l1]) - self.assertEqual(1, d[(first_layer,)]) - self.assertEqual(2, d[model.l2]) - self.assertEqual(2, d[(second_layer,)]) - self.assertEqual([first_layer, second_layer], model.layers) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testTensorConversion(self): - - class TupleToTensor(training.Model): - - def __init__(self): - super().__init__() - self.l = (1., 2., 3.) + self.layer_list = ( + core.Dense(3), + core.Dense(4), + core.Dense(5, kernel_regularizer=tf.reduce_sum), + ) + self.layers_with_updates = ( + batch_normalization_v1.BatchNormalization(), + ) + + def call(self, x): + aggregation = 0.0 + for l in self.layer_list: + x = l(x) + aggregation += tf.reduce_sum(x) + (bn,) = self.layers_with_updates + return bn(x) / aggregation - self.assertAllEqual( - (1., 2., 3.), - self.evaluate(tf.constant(TupleToTensor().l))) - self.assertAllEqual( - (1., 2., 3.), - self.evaluate(tf.raw_ops.Pack(values=TupleToTensor().l))) +class TupleTests(test_combinations.TestCase): + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTracking(self): + with self.test_session(): + model = HasTuple() + output = model(tf.ones([32, 2])) + self.assertAllEqual([32, 5], output.shape.as_list()) + self.assertLen(model.layers, 4) + self.assertLen(model.layer_list.layers, 3) + self.assertEqual( + len(model.layers), + len(tuple(model.layer_list.layers) + model.layers_with_updates), + ) + self.assertEqual(3, model.layer_list.layers[0].units) + self.assertEqual(4, model.layer_list.layers[1].units) + self.assertEqual(5, model.layer_list.layers[2].units) + self.assertLen(model._trackable_children(), 2) + self.assertIs( + model.layer_list, model._trackable_children()["layer_list"] + ) + self.assertIs( + model.layers_with_updates, + model._trackable_children()["layers_with_updates"], + ) + self.assertLen(model.layer_list._trackable_children(), 3) + self.evaluate([v.initializer for v in model.variables]) + self.evaluate( + model.variables[0].assign([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + ) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) + model.load_weights(save_path) + self.assertAllEqual( + [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], + self.evaluate(model.variables[0]), + ) + v = tf.Variable(1.0) + model.var_list = (v,) + self.assertIn(id(v), [id(obj) for obj in model.variables]) + self.assertIn(id(v), [id(obj) for obj in model.trainable_variables]) + self.assertNotIn( + id(v), [id(obj) for obj in model.non_trainable_variables] + ) + self.assertIn( + id(model.layer_list[0].trainable_weights[0]), + [id(obj) for obj in model.trainable_weights], + ) + + @parameterized.named_parameters( + ("Module", tf.Module), + ("Model", training.Model), + ) + def testSubModelTracking(self, module_subclass): + model = module_subclass() + model.v = tf.Variable(1.0) + self.assertIn(model.v, model.trainable_variables) + model2 = module_subclass() + model2.m = (model,) + self.assertIn(model.v, model2.trainable_variables) + + def testSubSequentialTracking(self): + class _Subclassed(training.Model): + def __init__(self, wrapped): + super().__init__() + self._wrapped = wrapped + + def call(self, x): + return self._wrapped(x) + + model = sequential.Sequential() + layer = core.Dense(1) + model.add(layer) + model2 = _Subclassed(model) + model2(tf.ones([1, 2])) + model2.m = (model,) + self.assertIn(layer.kernel, model2.trainable_weights) + + def testUpdatesForwarded(self): + with tf.Graph().as_default(): + model = HasTuple() + model_input = tf.ones([32, 2]) + model(model_input) + self.assertNotEmpty(model.layers_with_updates[0].updates) + self.assertEqual( + set(model.layers_with_updates[0].updates), set(model.updates) + ) + + model = HasTuple() + model_input = tf.ones([32, 2]) + model(model_input) + self.assertEmpty(model.updates) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testLossesForwarded(self): + model = HasTuple() + model_input = tf.ones([32, 2]) + model(model_input) + self.assertLen(model.losses, 1) + + def testModelContainersCompareEqual(self): + class HasEqualContainers(training.Model): + def __init__(self): + super().__init__() + self.l1 = () + self.l2 = () + + model = HasEqualContainers() + first_layer = HasEqualContainers() + model.l1 = (first_layer,) + second_layer = HasEqualContainers() + model.l2 = (second_layer,) + self.assertEqual((first_layer,), model.l1) + d = {model.l1: 1, model.l2: 2} + self.assertEqual(1, d[model.l1]) + self.assertEqual(1, d[(first_layer,)]) + self.assertEqual(2, d[model.l2]) + self.assertEqual(2, d[(second_layer,)]) + self.assertEqual([first_layer, second_layer], model.layers) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testTensorConversion(self): + class TupleToTensor(training.Model): + def __init__(self): + super().__init__() + self.l = (1.0, 2.0, 3.0) + + self.assertAllEqual( + (1.0, 2.0, 3.0), self.evaluate(tf.constant(TupleToTensor().l)) + ) + + self.assertAllEqual( + (1.0, 2.0, 3.0), + self.evaluate(tf.raw_ops.Pack(values=TupleToTensor().l)), + ) class InterfaceTests(test_combinations.TestCase): - - def testNoDependency(self): - root = tf.Module() - hasdep = tf.Module() - root.hasdep = hasdep - nodep = tf.Module() - root.nodep = data_structures.NoDependency(nodep) - self.assertLen(root._trackable_children(), 1) - self.assertIs(root._trackable_children()["hasdep"], root.hasdep) - self.assertIs(root.hasdep, hasdep) - self.assertIs(root.nodep, nodep) - - class NoDependencyModel(training.Model): - - @tf.__internal__.tracking.no_automatic_dependency_tracking - def __init__(self): - super().__init__() - self.a = [] - self.b = tf.Module() - - nodeps = NoDependencyModel() - self.assertEqual([nodeps], util.list_objects(nodeps)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testDictionariesBasic(self): - a = training.Model() - b = training.Model() - a.attribute = {"b": b} - c = training.Model() - a.attribute["c"] = [] - a.attribute["c"].append(c) - a_deps = util.list_objects(a) - self.assertIn(b, a_deps) - self.assertIn(c, a_deps) - self.assertIs(b, a.attribute["b"]) - self.assertEqual({"b", "c"}, a.attribute._trackable_children().keys()) - self.assertEqual([b, c], a.layers) - self.assertEqual([b, c], a.attribute.layers) - self.assertEqual([c], a.attribute["c"].layers) - checkpoint = tf.train.Checkpoint(a=a) - save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) - with self.cached_session(): - checkpoint.restore(save_path).assert_consumed().initialize_or_restore() - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testNoDepList(self): - a = training.Model() - a.l1 = data_structures.NoDependency([]) - a.l1.insert(1, 0) - self.assertIsInstance(a.l1, list) - checkpoint = tf.train.Checkpoint(a=a) - checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) - a.l2 = [] - a.l2.insert(1, tf.Module()) - with self.assertRaisesRegex(ValueError, "A list element was replaced"): - checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) + def testNoDependency(self): + root = tf.Module() + hasdep = tf.Module() + root.hasdep = hasdep + nodep = tf.Module() + root.nodep = data_structures.NoDependency(nodep) + self.assertLen(root._trackable_children(), 1) + self.assertIs(root._trackable_children()["hasdep"], root.hasdep) + self.assertIs(root.hasdep, hasdep) + self.assertIs(root.nodep, nodep) + + class NoDependencyModel(training.Model): + @tf.__internal__.tracking.no_automatic_dependency_tracking + def __init__(self): + super().__init__() + self.a = [] + self.b = tf.Module() + + nodeps = NoDependencyModel() + self.assertEqual([nodeps], util.list_objects(nodeps)) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDictionariesBasic(self): + a = training.Model() + b = training.Model() + a.attribute = {"b": b} + c = training.Model() + a.attribute["c"] = [] + a.attribute["c"].append(c) + a_deps = util.list_objects(a) + self.assertIn(b, a_deps) + self.assertIn(c, a_deps) + self.assertIs(b, a.attribute["b"]) + self.assertEqual({"b", "c"}, a.attribute._trackable_children().keys()) + self.assertEqual([b, c], a.layers) + self.assertEqual([b, c], a.attribute.layers) + self.assertEqual([c], a.attribute["c"].layers) + checkpoint = tf.train.Checkpoint(a=a) + save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) + with self.cached_session(): + checkpoint.restore( + save_path + ).assert_consumed().initialize_or_restore() + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testNoDepList(self): + a = training.Model() + a.l1 = data_structures.NoDependency([]) + a.l1.insert(1, 0) + self.assertIsInstance(a.l1, list) + checkpoint = tf.train.Checkpoint(a=a) + checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) + a.l2 = [] + a.l2.insert(1, tf.Module()) + with self.assertRaisesRegex(ValueError, "A list element was replaced"): + checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) if __name__ == "__main__": - tf.compat.v1.enable_eager_execution() - tf.test.main() + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/tests/tracking_util_test.py b/keras/tests/tracking_util_test.py index a48d5e736b05..4ee3cbdf9733 100644 --- a/keras/tests/tracking_util_test.py +++ b/keras/tests/tracking_util_test.py @@ -14,886 +14,1029 @@ # ============================================================================== import functools - -import tensorflow.compat.v2 as tf import os import weakref -from tensorflow.python.eager import context -from tensorflow.python.framework import test_util as tf_test_utils -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils + +import tensorflow.compat.v2 as tf + from keras.engine import input_layer from keras.engine import sequential from keras.engine import training from keras.layers import core from keras.layers import reshaping -from keras.optimizers.optimizer_v2 import adam +from keras.optimizers.legacy import adam +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils + +# isort: off +from tensorflow.python.checkpoint import ( + checkpoint as trackable_utils, +) +from tensorflow.python.eager import context +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training.tracking import util as trackable_utils -# pylint: disable=not-callable class MyModel(training.Model): - """A concrete Model for testing.""" + """A concrete Model for testing.""" - def __init__(self): - super().__init__() - self._named_dense = core.Dense(1, use_bias=True) - self._second = core.Dense(1, use_bias=False) - # We can still track Trackables which aren't Layers. - self._non_layer = NonLayerTrackable() + def __init__(self): + super().__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + # We can still track Trackables which aren't Layers. + self._non_layer = NonLayerTrackable() - def call(self, values): - ret = self._second(self._named_dense(values)) - return ret + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret class NonLayerTrackable(tf.Module): - - def __init__(self): - super().__init__() - self.a_variable = trackable_utils.add_variable( - self, name="a_variable", shape=[]) + def __init__(self): + super().__init__() + self.a_variable = trackable_utils.add_variable( + self, name="a_variable", shape=[] + ) class InterfaceTests(tf.test.TestCase): - - def testLayerDeduplication(self): - model = training.Model() - layer_one = core.Dense(1) - layer_two = core.Dense(1) - model.other_path = [layer_one, layer_two] - model.l2 = layer_two - model.l1 = layer_one - self.assertEqual([layer_one, layer_two], model.layers) - - def testSaveWithOnlyKerasSession(self): - - with tf.Graph().as_default(), self.cached_session(): - inp = input_layer.Input([1]) - dense = core.Dense(1)(inp) - model = training.Model(inp, dense) - model.compile(optimizer="sgd", loss="mse") - model.fit([1.], [2.]) - checkpoint = tf.train.Checkpoint(model=model) - checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) + def testLayerDeduplication(self): + model = training.Model() + layer_one = core.Dense(1) + layer_two = core.Dense(1) + model.other_path = [layer_one, layer_two] + model.l2 = layer_two + model.l1 = layer_one + self.assertEqual([layer_one, layer_two], model.layers) + + def testSaveWithOnlyKerasSession(self): + + with tf.Graph().as_default(), self.cached_session(): + inp = input_layer.Input([1]) + dense = core.Dense(1)(inp) + model = training.Model(inp, dense) + model.compile(optimizer="sgd", loss="mse") + model.fit([1.0], [2.0]) + checkpoint = tf.train.Checkpoint(model=model) + checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) class CheckpointingTests(test_combinations.TestCase): + @tf_test_utils.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNamingWithOptimizer(self): + input_value = tf.constant([[3.0]]) + model = MyModel() + # A nuisance Model using the same optimizer. Its slot variables should + # not go in the checkpoint, since it is never depended on. + other_model = MyModel() + optimizer = adam.Adam(0.001) + step = tf.compat.v1.train.get_or_create_global_step() + root_trackable = tf.train.Checkpoint( + optimizer=optimizer, model=model, step=step + ) - @tf_test_utils.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testNamingWithOptimizer(self): - input_value = tf.constant([[3.]]) - model = MyModel() - # A nuisance Model using the same optimizer. Its slot variables should not - # go in the checkpoint, since it is never depended on. - other_model = MyModel() - optimizer = adam.Adam(0.001) - step = tf.compat.v1.train.get_or_create_global_step() - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model, step=step) - - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - train_op = tf.group( - optimizer.apply_gradients(zip(gradients, variables)), - step.assign_add(1)) - - with tf.GradientTape() as tape: - loss = other_model(input_value) - variables = other_model.trainable_variables - gradients = tape.gradient(loss, variables) - optimizer.apply_gradients(zip(gradients, variables)) - - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - named_variables, serialized_graph, _ = tf.__internal__.tracking.ObjectGraphView( - root_trackable).serialize_object_graph() - expected_slot_keys = ( - "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", - "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", - "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", - "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", - "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", - "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", - ) - expected_checkpoint_names = ( - # Created in the root node, so no prefix. - "step", - "model/_second/kernel", - "model/_named_dense/kernel", - "model/_named_dense/bias", - # non-Layer dependency of the model - "model/_non_layer/a_variable", - "optimizer/learning_rate", - "optimizer/beta_1", - "optimizer/beta_2", - "optimizer/iter", - "optimizer/decay", - ) + expected_slot_keys - suffix = "/.ATTRIBUTES/VARIABLE_VALUE" - expected_checkpoint_names = [ - name + suffix for name in expected_checkpoint_names] - named_variables = {v.name: v for v in named_variables} - self.assertEqual(len(expected_checkpoint_names), - len(named_variables.keys())) - # Check that we've mapped to the right variable objects (not exhaustive) - self.assertEqual( - "global_step", - named_variables["step" + suffix].full_name) - self.assertEqual( - "my_model/dense_1/kernel", - named_variables["model/_second/kernel" + suffix].full_name) - self.assertEqual( - "my_model/dense/kernel", - named_variables["model/_named_dense/kernel" + suffix].full_name) - self.assertEqual("Adam/beta_1", - named_variables["optimizer/beta_1" + suffix].full_name) - self.assertEqual("Adam/beta_2", - named_variables["optimizer/beta_2" + suffix].full_name) - # Spot check the generated protocol buffers. - self.assertEqual("optimizer", - serialized_graph.nodes[0].children[1].local_name) - optimizer_node = serialized_graph.nodes[ - serialized_graph.nodes[0].children[1].node_id] - children = [node.local_name for node in optimizer_node.children] - self.assertEqual( - # hyper variable dependencies - len(["beta_1", "beta_2", "iter", "decay", "learning_rate"]), - len(children)) - serialized_slot_keys = [] - for slot in optimizer_node.slot_variables: - for attribute in ( - serialized_graph.nodes[slot.slot_variable_node_id].attributes): - serialized_slot_keys.append(attribute.checkpoint_key) - self.assertEqual( - len([key + suffix for key in expected_slot_keys]), - len(serialized_slot_keys)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testSaveRestore(self): - with self.test_session(): - model = MyModel() - optimizer = adam.Adam(0.001) - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model) - input_value = tf.constant([[3.]]) - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - train_op = optimizer.apply_gradients(zip(gradients, variables)) - self.assertFalse(root_trackable.save_counter.trainable) - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(tf.compat.v1.assign(model._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") - self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) - save_path = root_trackable.save(file_prefix=prefix) - self.evaluate(tf.compat.v1.assign(model._named_dense.variables[1], [43.])) - self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) - optimizer_variables = self.evaluate( - sorted(optimizer.variables(), key=lambda v: v.name)) - self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.])) - # Immediate restoration - status = root_trackable.restore(save_path=save_path).assert_consumed() - status.run_restore_ops() - self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) - self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) - self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - if not tf.executing_eagerly(): - return # Restore-on-create is only supported when executing eagerly - on_create_model = MyModel() - on_create_optimizer = adam.Adam(0.001) - on_create_root = tf.train.Checkpoint( - optimizer=on_create_optimizer, model=on_create_model) - # Deferred restoration - status = on_create_root.restore(save_path=save_path) - status.assert_nontrivial_match() - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - on_create_model(tf.constant([[3.]])) # create variables - self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) - self.assertAllEqual([42.], - self.evaluate( - on_create_model._named_dense.variables[1])) - on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_model._named_dense.variables[1], "m") - status.assert_existing_objects_matched() - if not tf.executing_eagerly(): - with self.assertRaises(AssertionError): - status.assert_consumed() - # Optimizer slot variables are created when the original variable is - # restored. - self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - dummy_var = tf.Variable([1.]) - on_create_optimizer.minimize(loss=dummy_var.read_value, - var_list=[dummy_var]) - status.assert_existing_objects_matched() - status.assert_consumed() - self.assertAllEqual( - optimizer_variables, - # Creation order is different, so .variables() needs to be re-sorted. - self.evaluate(sorted(optimizer.variables(), key=lambda v: v.name))) - - # TODO(allenl): Debug garbage created by this test in python3. - def testDeferredRestorationUsageEager(self): - """An idiomatic eager execution example.""" - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - model = MyModel() - optimizer = adam.Adam(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model) - root.restore(tf.train.latest_checkpoint( - checkpoint_directory)) - for _ in range(num_training_steps): - # TODO(allenl): Use a Dataset and serialize/checkpoint it. - input_value = tf.constant([[3.]]) with tf.GradientTape() as tape: - loss = model(input_value) + loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) - optimizer.apply_gradients(zip(gradients, variables)) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - root.optimizer.iterations.numpy()) - - def testUsageGraph(self): - """Expected usage when graph building.""" - with context.graph_mode(): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - with tf.Graph().as_default(): - model = MyModel() - optimizer = adam.Adam(0.001) - root = tf.compat.v1.train.Checkpoint( - optimizer=optimizer, model=model) - input_value = tf.constant([[3.]]) - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - train_op = optimizer.apply_gradients(zip(gradients, variables)) - - checkpoint_path = tf.train.latest_checkpoint( - checkpoint_directory) - with self.session(graph=tf.compat.v1.get_default_graph()) as session: - status = root.restore(save_path=checkpoint_path) - status.initialize_or_restore(session=session) - if checkpoint_path is None: - self.assertEqual(0, training_continuation) - with self.assertRaises(AssertionError): - status.assert_consumed() - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - else: - status.assert_consumed() - status.assert_existing_objects_matched() - for _ in range(num_training_steps): - session.run(train_op) - root.save(file_prefix=checkpoint_prefix, session=session) - self.assertEqual((training_continuation + 1) * num_training_steps, - session.run(root.optimizer.iterations)) - self.assertEqual(training_continuation + 1, - session.run(root.save_counter)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testAgnosticUsage(self): - """Graph/eager agnostic usage.""" - # Does create garbage when executing eagerly due to ops.Graph() creation. - with self.test_session(): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - optimizer = adam.Adam(0.001) - def _train_fn(model, input_value): + train_op = tf.group( + optimizer.apply_gradients(zip(gradients, variables)), + step.assign_add(1), + ) + with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables + loss = other_model(input_value) + variables = other_model.trainable_variables gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - for training_continuation in range(3): - with test_utils.device(should_use_gpu=True): - model = MyModel() - root = tf.train.Checkpoint( - optimizer=optimizer, model=model) - manager = tf.train.CheckpointManager( - root, checkpoint_directory, max_to_keep=1) - status = root.restore(save_path=manager.latest_checkpoint) - input_value = tf.constant([[3.]]) - train_fn = functools.partial(_train_fn, model, input_value) - if not tf.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - manager.save() - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(root.optimizer.iterations)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testPartialRestoreWarningObject(self): - optimizer = adam.Adam(0.0) - original_root = tf.train.Checkpoint(v1=tf.Variable(2.), - v2=tf.Variable(3.), - optimizer=optimizer) - # Create a slot variable to save - optimizer.minimize(original_root.v1.read_value, [original_root.v1]) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - save_path = original_root.save(prefix) - partial_root = tf.train.Checkpoint(v1=tf.Variable(0.)) - weak_partial_root = weakref.ref(partial_root) - weak_v1 = weakref.ref(partial_root.v1) - partial_root.restore(save_path) - self.assertEqual(2., partial_root.v1.numpy()) - with tf.compat.v1.test.mock.patch.object(logging, "warning") as mock_log: - del partial_root - self.assertIsNone(weak_partial_root()) - self.assertIsNone(weak_v1()) - messages = str(mock_log.call_args_list) - self.assertIn("(root).v2'", messages) - self.assertIn("(root).optimizer's state 'm' for (root).v1", messages) - self.assertNotIn("(root).v1'", messages) - self.assertIn("expect_partial()", messages) - - # pylint: disable=cell-var-from-loop - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testWithDefun(self): - with self.test_session(): - num_training_steps = 2 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - with test_utils.device(should_use_gpu=True): - model = MyModel() - # Don't actually train so we can test variable values - optimizer = adam.Adam(0.) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model) - checkpoint_path = tf.train.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - def train_fn(): - @tf.function - def _call_model(x): - return model(x) + optimizer.apply_gradients(zip(gradients, variables)) + + self.evaluate(trackable_utils.gather_initializers(root_trackable)) + self.evaluate(train_op) + ( + named_variables, + serialized_graph, + _, + ) = tf.__internal__.tracking.ObjectGraphView( + root_trackable + ).serialize_object_graph() + expected_slot_keys = ( + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + ) + expected_checkpoint_names = ( + # Created in the root node, so no prefix. + "step", + "model/_second/kernel", + "model/_named_dense/kernel", + "model/_named_dense/bias", + # non-Layer dependency of the model + "model/_non_layer/a_variable", + "optimizer/learning_rate", + "optimizer/beta_1", + "optimizer/beta_2", + "optimizer/iter", + "optimizer/decay", + ) + expected_slot_keys + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + expected_checkpoint_names = [ + name + suffix for name in expected_checkpoint_names + ] + named_variables = {v.name: v for v in named_variables} + self.assertEqual( + len(expected_checkpoint_names), len(named_variables.keys()) + ) + # Check that we've created the right full_names of objects (not + # exhaustive) + expected_names = { + "step" + suffix: "global_step", + "model/_second/kernel" + suffix: "my_model/dense_1/kernel", + "model/_named_dense/kernel" + suffix: "my_model/dense/kernel", + "optimizer/beta_1" + suffix: "Adam/beta_1", + "optimizer/beta_2" + suffix: "Adam/beta_2", + } + for nodes in serialized_graph.nodes: + for attribute in nodes.attributes: + expected_name = expected_names.pop( + attribute.checkpoint_key, None + ) + if expected_name is not None: + self.assertEqual(expected_name, attribute.full_name) + self.assertEmpty(expected_names) + # Spot check the generated protocol buffers. + self.assertEqual( + "optimizer", serialized_graph.nodes[0].children[1].local_name + ) + optimizer_node = serialized_graph.nodes[ + serialized_graph.nodes[0].children[1].node_id + ] + children = [node.local_name for node in optimizer_node.children] + self.assertEqual( + # hyper variable dependencies + len(["beta_1", "beta_2", "iter", "decay", "learning_rate"]), + len(children), + ) + serialized_slot_keys = [] + for slot in optimizer_node.slot_variables: + for attribute in serialized_graph.nodes[ + slot.slot_variable_node_id + ].attributes: + serialized_slot_keys.append(attribute.checkpoint_key) + self.assertEqual( + len([key + suffix for key in expected_slot_keys]), + len(serialized_slot_keys), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testSaveRestore(self): + with self.test_session(): + model = MyModel() + optimizer = adam.Adam(0.001) + root_trackable = tf.train.Checkpoint( + optimizer=optimizer, model=model + ) + input_value = tf.constant([[3.0]]) with tf.GradientTape() as tape: - loss = _call_model(tf.constant([[3.]])) - gradients = tape.gradient(loss, model.variables) - return optimizer.apply_gradients(zip(gradients, model.variables)) - if not tf.executing_eagerly(): - train_fn = functools.partial( - self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - if training_continuation > 0: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + train_op = optimizer.apply_gradients(zip(gradients, variables)) + self.assertFalse(root_trackable.save_counter.trainable) + self.evaluate(trackable_utils.gather_initializers(root_trackable)) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate( + tf.compat.v1.assign(model._named_dense.variables[1], [42.0]) + ) + m_bias_slot = optimizer.get_slot( + model._named_dense.variables[1], "m" + ) + self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) + save_path = root_trackable.save(file_prefix=prefix) + self.evaluate( + tf.compat.v1.assign(model._named_dense.variables[1], [43.0]) + ) + self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) + optimizer_variables = self.evaluate( + sorted(optimizer.variables(), key=lambda v: v.name) + ) + self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.0])) + # Immediate restoration + status = root_trackable.restore( + save_path=save_path + ).assert_consumed() + status.run_restore_ops() + self.assertAllEqual( + [42.0], self.evaluate(model._named_dense.variables[1]) + ) + self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + if not tf.executing_eagerly(): + # Restore-on-create is only supported when executing eagerly + return + on_create_model = MyModel() + on_create_optimizer = adam.Adam(0.001) + on_create_root = tf.train.Checkpoint( + optimizer=on_create_optimizer, model=on_create_model + ) + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + status.assert_nontrivial_match() + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + on_create_model(tf.constant([[3.0]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual( + [42.0], self.evaluate(on_create_model._named_dense.variables[1]) + ) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_model._named_dense.variables[1], "m" + ) + status.assert_existing_objects_matched() + if not tf.executing_eagerly(): + with self.assertRaises(AssertionError): + status.assert_consumed() + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + dummy_var = tf.Variable([1.0]) + on_create_optimizer.minimize( + loss=dummy_var.read_value, var_list=[dummy_var] + ) + status.assert_existing_objects_matched() status.assert_consumed() - self.assertAllClose([[42.]], self.evaluate(model.variables[0])) - else: - self.evaluate(model.variables[0].assign([[42.]])) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(optimizer.iterations)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) - # pylint: enable=cell-var-from-loop - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testAnonymousVarsInInit(self): - - class Model(training.Model): - - def __init__(self): - super().__init__() - self.w = tf.Variable(0.0) - self.b = tf.Variable(0.0) - self.vars = [self.w, self.b] - - def call(self, x): - return x * self.w + self.b - - model = Model() - optimizer = adam.Adam(learning_rate=0.05) - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - checkpoint = tf.train.Checkpoint( - model=model, optimizer=optimizer) - for _ in range(2): - checkpoint.save(checkpoint_prefix) - with tf.GradientTape() as tape: - loss = (tf.constant(1.) - - model(tf.constant(1.))) ** 2 - grad = tape.gradient(loss, model.vars) - optimizer.apply_gradients( - [(g, v) for g, v in zip(grad, model.vars)]) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testDeferredSlotRestoration(self): - with self.test_session(): - checkpoint_directory = self.get_temp_dir() - - root = tf.train.Checkpoint() - root.var = trackable_utils.add_variable( - root, name="var", initializer=0.) - optimizer = adam.Adam(0.1) - variables = [root.var] - gradients = [1.] - train_op = optimizer.apply_gradients(zip(gradients, variables)) - # Note that `optimizer` has not been added as a dependency of - # `root`. Create a one-off grouping so that slot variables for `root.var` - # get initialized too. - self.evaluate(trackable_utils.gather_initializers( - tf.train.Checkpoint(root=root, optimizer=optimizer))) - self.evaluate(train_op) - self.evaluate(tf.compat.v1.assign(root.var, 12.)) - no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots")) - root.optimizer = optimizer - self.evaluate(tf.compat.v1.assign(root.var, 13.)) - self.evaluate(tf.compat.v1.assign( - optimizer.get_slot(slot_name="m", var=root.var), - 14.)) - slots_path = root.save(os.path.join(checkpoint_directory, "with_slots")) - new_root = tf.train.Checkpoint() - # Load the slot-containing checkpoint (deferred), then immediately - # overwrite the non-slot variable (also deferred). - slot_status = new_root.restore(slots_path) - no_slot_status = new_root.restore(no_slots_path) - with self.assertRaises(AssertionError): - no_slot_status.assert_consumed() - new_root.var = trackable_utils.add_variable( - new_root, name="var", shape=[]) - no_slot_status.assert_consumed() - no_slot_status.run_restore_ops() - self.assertEqual(12., self.evaluate(new_root.var)) - new_root.optimizer = adam.Adam(0.1) - slot_status.assert_existing_objects_matched() - if not tf.executing_eagerly(): - with self.assertRaisesRegex(AssertionError, "Unresolved object"): - slot_status.assert_consumed() - self.assertEqual(12., self.evaluate(new_root.var)) - if tf.executing_eagerly(): - # Slot variables are only created with restoring initializers when - # executing eagerly. - self.assertEqual(14., self.evaluate( - new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) - else: - # Slot variables are not created eagerly when graph building. - with self.assertRaises(KeyError): - new_root.optimizer.get_slot(slot_name="m", var=new_root.var) - variables = [new_root.var] - gradients = [1.] - train_op = new_root.optimizer.apply_gradients(zip(gradients, variables)) - # The slot variable now exists; restore() didn't create it, but we should - # now have a restore op for it. - slot_status.run_restore_ops() - if not tf.executing_eagerly(): - # The train op hasn't run when graph building, so the slot variable has - # its restored value. It has run in eager, so the value will - # be different. - self.assertEqual(14., self.evaluate( - new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) - self.evaluate(train_op) - slot_status.assert_consumed() - - def testManySavesGraph(self): - """Saves after the first should not modify the graph.""" - with context.graph_mode(): - graph = tf.Graph() - with graph.as_default(), self.session(graph): + self.assertAllEqual( + optimizer_variables, + # Creation order is different, so .variables() needs to be + # re-sorted. + self.evaluate( + sorted(optimizer.variables(), key=lambda v: v.name) + ), + ) + + # TODO(allenl): Debug garbage created by this test in python3. + def testDeferredRestorationUsageEager(self): + """An idiomatic eager execution example.""" + num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - obj = tf.train.Checkpoint() - obj.var = tf.Variable(0., name="v") - obj.opt = adam.Adam(0.1) - variables = [obj.var] - gradients = [1.] - obj.opt.apply_gradients(zip(gradients, variables)) - self.evaluate(trackable_utils.gather_initializers(obj)) - obj.save(checkpoint_prefix) - graph.finalize() - obj.save(checkpoint_prefix) - - def testManyRestoresGraph(self): - """Restores after the first should not modify the graph.""" - with context.graph_mode(): - graph = tf.Graph() - with graph.as_default(), self.session(graph): + for training_continuation in range(3): + model = MyModel() + optimizer = adam.Adam(0.001) + root = tf.train.Checkpoint(optimizer=optimizer, model=model) + root.restore(tf.train.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + # TODO(allenl): Use a Dataset and serialize/checkpoint it. + input_value = tf.constant([[3.0]]) + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + optimizer.apply_gradients(zip(gradients, variables)) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + root.optimizer.iterations.numpy(), + ) + + def testUsageGraph(self): + """Expected usage when graph building.""" + with context.graph_mode(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with tf.Graph().as_default(): + model = MyModel() + optimizer = adam.Adam(0.001) + root = tf.compat.v1.train.Checkpoint( + optimizer=optimizer, model=model + ) + input_value = tf.constant([[3.0]]) + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + train_op = optimizer.apply_gradients( + zip(gradients, variables) + ) + + checkpoint_path = tf.train.latest_checkpoint( + checkpoint_directory + ) + with self.session( + graph=tf.compat.v1.get_default_graph() + ) as session: + status = root.restore(save_path=checkpoint_path) + status.initialize_or_restore(session=session) + if checkpoint_path is None: + self.assertEqual(0, training_continuation) + with self.assertRaises(AssertionError): + status.assert_consumed() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + else: + status.assert_consumed() + status.assert_existing_objects_matched() + for _ in range(num_training_steps): + session.run(train_op) + root.save( + file_prefix=checkpoint_prefix, session=session + ) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + session.run(root.optimizer.iterations), + ) + self.assertEqual( + training_continuation + 1, + session.run(root.save_counter), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testAgnosticUsage(self): + """Graph/eager agnostic usage.""" + # Does create garbage when executing eagerly due to ops.Graph() + # creation. + with self.test_session(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + optimizer = adam.Adam(0.001) + + def _train_fn(model, input_value): + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + + for training_continuation in range(3): + with test_utils.device(should_use_gpu=True): + model = MyModel() + root = tf.train.Checkpoint(optimizer=optimizer, model=model) + manager = tf.train.CheckpointManager( + root, checkpoint_directory, max_to_keep=1 + ) + status = root.restore(save_path=manager.latest_checkpoint) + input_value = tf.constant([[3.0]]) + train_fn = functools.partial(_train_fn, model, input_value) + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + manager.save() + self.assertEqual( + (training_continuation + 1) * num_training_steps, + self.evaluate(root.optimizer.iterations), + ) + self.assertEqual( + training_continuation + 1, + self.evaluate(root.save_counter), + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testPartialRestoreWarningObject(self): + optimizer = adam.Adam(0.0) + original_root = tf.train.Checkpoint( + v1=tf.Variable(2.0), v2=tf.Variable(3.0), optimizer=optimizer + ) + # Create a slot variable to save + optimizer.minimize(original_root.v1.read_value, [original_root.v1]) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + save_path = original_root.save(prefix) + partial_root = tf.train.Checkpoint(v1=tf.Variable(0.0)) + weak_partial_root = weakref.ref(partial_root) + weak_v1 = weakref.ref(partial_root.v1) + partial_root.restore(save_path) + self.assertEqual(2.0, partial_root.v1.numpy()) + with tf.compat.v1.test.mock.patch.object( + logging, "warning" + ) as mock_log: + del partial_root + self.assertIsNone(weak_partial_root()) + self.assertIsNone(weak_v1()) + messages = str(mock_log.call_args_list) + self.assertIn("(root).v2'", messages) + self.assertIn("(root).optimizer's state 'm' for (root).v1", messages) + self.assertNotIn("(root).v1'", messages) + self.assertIn("expect_partial()", messages) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testWithDefun(self): + with self.test_session(): + num_training_steps = 2 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with test_utils.device(should_use_gpu=True): + model = MyModel() + # Don't actually train so we can test variable values + optimizer = adam.Adam(0.0) + root = tf.train.Checkpoint(optimizer=optimizer, model=model) + checkpoint_path = tf.train.latest_checkpoint( + checkpoint_directory + ) + status = root.restore(save_path=checkpoint_path) + + def train_fn(): + @tf.function + def _call_model(x): + return model(x) + + with tf.GradientTape() as tape: + loss = _call_model(tf.constant([[3.0]])) + gradients = tape.gradient(loss, model.variables) + return optimizer.apply_gradients( + zip(gradients, model.variables) + ) + + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + if training_continuation > 0: + status.assert_consumed() + self.assertAllClose( + [[42.0]], self.evaluate(model.variables[0]) + ) + else: + self.evaluate(model.variables[0].assign([[42.0]])) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + self.evaluate(optimizer.iterations), + ) + self.assertEqual( + training_continuation + 1, + self.evaluate(root.save_counter), + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testAnonymousVarsInInit(self): + class Model(training.Model): + def __init__(self): + super().__init__() + self.w = tf.Variable(0.0) + self.b = tf.Variable(0.0) + self.vars = [self.w, self.b] + + def call(self, x): + return x * self.w + self.b + + model = Model() + optimizer = adam.Adam(learning_rate=0.05) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - obj = tf.train.Checkpoint() - obj.var = tf.Variable(0., name="v") - obj.opt = adam.Adam(0.1) - variables = [obj.var] - gradients = [1.] - obj.opt.apply_gradients(zip(gradients, variables)) - self.evaluate(trackable_utils.gather_initializers(obj)) - save_path = obj.save(checkpoint_prefix) - obj.restore(save_path) - graph.finalize() - obj.restore(save_path) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def test_sequential(self): - with self.test_session(): - model = sequential.Sequential() - checkpoint = tf.train.Checkpoint(model=model) - model.add(core.Dense(4)) - second_dense = core.Dense(5) - model.add(second_dense) - model(tf.constant([[1.]])) - checkpoint.restore(None).initialize_or_restore() - self.evaluate(second_dense.bias.assign( - tf.constant([1., 2., 3., 4., 5.]))) - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = checkpoint.save(checkpoint_prefix) - self.evaluate(second_dense.bias.assign( - tf.constant([5., 6., 7., 8., 9.]))) - checkpoint.restore(save_path).assert_consumed().run_restore_ops() - self.assertAllEqual([1., 2., 3., 4., 5.], - self.evaluate(second_dense.bias)) - - deferred_sequential = sequential.Sequential() - deferred_sequential_checkpoint = tf.train.Checkpoint( - model=deferred_sequential) - status = deferred_sequential_checkpoint.restore(save_path) - deferred_sequential.add(core.Dense(4)) - deferred_second_dense = core.Dense(5) - deferred_sequential.add(deferred_second_dense) - deferred_sequential(tf.constant([[1.]])) - status.run_restore_ops() - self.assertAllEqual([1., 2., 3., 4., 5.], - self.evaluate(deferred_second_dense.bias)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def test_initialize_if_not_restoring(self): - with self.test_session(): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001) - root = tf.train.Checkpoint( - model=model) # Do not save the optimizer with the checkpoint. - optimizer_checkpoint = tf.train.Checkpoint( - optimizer=optimizer) - - checkpoint_path = tf.train.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - input_value = tf.constant([[3.]]) - def train_fn(): - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - if not tf.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - # TODO(tanzheny): Add hyper variables to .variables(), and set them with - # set_weights etc. - variables_not_in_the_variables_property = [ - obj for obj in optimizer._hyper.values() - if isinstance(obj, tf.Variable)] - self.evaluate([v.initializer for v - in optimizer.variables() - + variables_not_in_the_variables_property]) - train_fn() - model_save_path = root.save(file_prefix=checkpoint_prefix) - self.evaluate(optimizer.beta_1.assign(42.)) - optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix) - del train_fn - - # Restore into a graph with the optimizer - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model) - status = root.restore(save_path=model_save_path) - input_value = tf.constant([[3.]]) - def train_fn1(): - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - if not tf.executing_eagerly(): - train_fn1 = functools.partial(self.evaluate, train_fn1()) - status.initialize_or_restore() - train_fn1() - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - del train_fn1 - - # Make sure initialization doesn't clobber later restores - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001, beta_1=1.0) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model) - opt_root = tf.train.Checkpoint( - optimizer=optimizer) - status = root.restore(save_path=model_save_path) - init_only_optimizer_status = opt_root.restore(save_path=None) - optimizer_status = opt_root.restore(save_path=optimizer_save_path) - input_value = tf.constant([[3.]]) - def train_fn2(): - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - if not tf.executing_eagerly(): - train_fn2 = functools.partial(self.evaluate, train_fn2()) - optimizer_status.run_restore_ops() - status.initialize_or_restore() - init_only_optimizer_status.initialize_or_restore() - train_fn2() - self.assertEqual(42., self.evaluate(optimizer.beta_1)) + checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) + for _ in range(2): + checkpoint.save(checkpoint_prefix) + with tf.GradientTape() as tape: + loss = (tf.constant(1.0) - model(tf.constant(1.0))) ** 2 + grad = tape.gradient(loss, model.vars) + optimizer.apply_gradients( + [(g, v) for g, v in zip(grad, model.vars)] + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testDeferredSlotRestoration(self): + with self.test_session(): + checkpoint_directory = self.get_temp_dir() + + root = tf.train.Checkpoint() + root.var = trackable_utils.add_variable( + root, name="var", initializer=0.0 + ) + optimizer = adam.Adam(0.1) + variables = [root.var] + gradients = [1.0] + train_op = optimizer.apply_gradients(zip(gradients, variables)) + # Note that `optimizer` has not been added as a dependency of + # `root`. Create a one-off grouping so that slot variables for + # `root.var` get initialized too. + self.evaluate( + trackable_utils.gather_initializers( + tf.train.Checkpoint(root=root, optimizer=optimizer) + ) + ) + self.evaluate(train_op) + self.evaluate(tf.compat.v1.assign(root.var, 12.0)) + no_slots_path = root.save( + os.path.join(checkpoint_directory, "no_slots") + ) + root.optimizer = optimizer + self.evaluate(tf.compat.v1.assign(root.var, 13.0)) + self.evaluate( + tf.compat.v1.assign( + optimizer.get_slot(slot_name="m", var=root.var), 14.0 + ) + ) + slots_path = root.save( + os.path.join(checkpoint_directory, "with_slots") + ) + new_root = tf.train.Checkpoint() + # Load the slot-containing checkpoint (deferred), then immediately + # overwrite the non-slot variable (also deferred). + slot_status = new_root.restore(slots_path) + no_slot_status = new_root.restore(no_slots_path) + with self.assertRaises(AssertionError): + no_slot_status.assert_consumed() + new_root.var = trackable_utils.add_variable( + new_root, name="var", shape=[] + ) + no_slot_status.assert_consumed() + no_slot_status.run_restore_ops() + self.assertEqual(12.0, self.evaluate(new_root.var)) + new_root.optimizer = adam.Adam(0.1) + slot_status.assert_existing_objects_matched() + if not tf.executing_eagerly(): + with self.assertRaisesRegex( + AssertionError, "Unresolved object" + ): + slot_status.assert_consumed() + self.assertEqual(12.0, self.evaluate(new_root.var)) + if tf.executing_eagerly(): + # Slot variables are only created with restoring initializers + # when executing eagerly. + self.assertEqual( + 14.0, + self.evaluate( + new_root.optimizer.get_slot( + slot_name="m", var=new_root.var + ) + ), + ) + else: + # Slot variables are not created eagerly when graph building. + with self.assertRaises(KeyError): + new_root.optimizer.get_slot(slot_name="m", var=new_root.var) + variables = [new_root.var] + gradients = [1.0] + train_op = new_root.optimizer.apply_gradients( + zip(gradients, variables) + ) + # The slot variable now exists; restore() didn't create it, but we + # should now have a restore op for it. + slot_status.run_restore_ops() + if not tf.executing_eagerly(): + # The train op hasn't run when graph building, so the slot + # variable has its restored value. It has run in eager, so the + # value will be different. + self.assertEqual( + 14.0, + self.evaluate( + new_root.optimizer.get_slot( + slot_name="m", var=new_root.var + ) + ), + ) + self.evaluate(train_op) + slot_status.assert_consumed() + + def testManySavesGraph(self): + """Saves after the first should not modify the graph.""" + with context.graph_mode(): + graph = tf.Graph() + with graph.as_default(), self.session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = tf.train.Checkpoint() + obj.var = tf.Variable(0.0, name="v") + obj.opt = adam.Adam(0.1) + variables = [obj.var] + gradients = [1.0] + obj.opt.apply_gradients(zip(gradients, variables)) + self.evaluate(trackable_utils.gather_initializers(obj)) + obj.save(checkpoint_prefix) + graph.finalize() + obj.save(checkpoint_prefix) + + def testManyRestoresGraph(self): + """Restores after the first should not modify the graph.""" + with context.graph_mode(): + graph = tf.Graph() + with graph.as_default(), self.session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = tf.train.Checkpoint() + obj.var = tf.Variable(0.0, name="v") + obj.opt = adam.Adam(0.1) + variables = [obj.var] + gradients = [1.0] + obj.opt.apply_gradients(zip(gradients, variables)) + self.evaluate(trackable_utils.gather_initializers(obj)) + save_path = obj.save(checkpoint_prefix) + obj.restore(save_path) + graph.finalize() + obj.restore(save_path) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_sequential(self): + with self.test_session(): + model = sequential.Sequential() + checkpoint = tf.train.Checkpoint(model=model) + model.add(core.Dense(4)) + second_dense = core.Dense(5) + model.add(second_dense) + model(tf.constant([[1.0]])) + checkpoint.restore(None).initialize_or_restore() + self.evaluate( + second_dense.bias.assign(tf.constant([1.0, 2.0, 3.0, 4.0, 5.0])) + ) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate( + second_dense.bias.assign(tf.constant([5.0, 6.0, 7.0, 8.0, 9.0])) + ) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.assertAllEqual( + [1.0, 2.0, 3.0, 4.0, 5.0], self.evaluate(second_dense.bias) + ) + + deferred_sequential = sequential.Sequential() + deferred_sequential_checkpoint = tf.train.Checkpoint( + model=deferred_sequential + ) + status = deferred_sequential_checkpoint.restore(save_path) + deferred_sequential.add(core.Dense(4)) + deferred_second_dense = core.Dense(5) + deferred_sequential.add(deferred_second_dense) + deferred_sequential(tf.constant([[1.0]])) + status.run_restore_ops() + self.assertAllEqual( + [1.0, 2.0, 3.0, 4.0, 5.0], + self.evaluate(deferred_second_dense.bias), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_initialize_if_not_restoring(self): + with self.test_session(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001) + root = tf.train.Checkpoint( + model=model + ) # Do not save the optimizer with the checkpoint. + optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer) + + checkpoint_path = tf.train.latest_checkpoint( + checkpoint_directory + ) + status = root.restore(save_path=checkpoint_path) + input_value = tf.constant([[3.0]]) + + def train_fn(): + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + # TODO(tanzheny): Add hyper variables to .variables(), and set + # them with set_weights etc. + variables_not_in_the_variables_property = [ + obj + for obj in optimizer._hyper.values() + if isinstance(obj, tf.Variable) + ] + self.evaluate( + [ + v.initializer + for v in optimizer.variables() + + variables_not_in_the_variables_property + ] + ) + train_fn() + model_save_path = root.save(file_prefix=checkpoint_prefix) + self.evaluate(optimizer.beta_1.assign(42.0)) + optimizer_save_path = optimizer_checkpoint.save( + optimizer_only_prefix + ) + del train_fn + + # Restore into a graph with the optimizer + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001) + root = tf.train.Checkpoint(optimizer=optimizer, model=model) + status = root.restore(save_path=model_save_path) + input_value = tf.constant([[3.0]]) + + def train_fn1(): + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + + if not tf.executing_eagerly(): + train_fn1 = functools.partial(self.evaluate, train_fn1()) + status.initialize_or_restore() + train_fn1() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + del train_fn1 + + # Make sure initialization doesn't clobber later restores + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001, beta_1=1.0) + root = tf.train.Checkpoint(optimizer=optimizer, model=model) + opt_root = tf.train.Checkpoint(optimizer=optimizer) + status = root.restore(save_path=model_save_path) + init_only_optimizer_status = opt_root.restore(save_path=None) + optimizer_status = opt_root.restore( + save_path=optimizer_save_path + ) + input_value = tf.constant([[3.0]]) + + def train_fn2(): + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + + if not tf.executing_eagerly(): + train_fn2 = functools.partial(self.evaluate, train_fn2()) + optimizer_status.run_restore_ops() + status.initialize_or_restore() + init_only_optimizer_status.initialize_or_restore() + train_fn2() + self.assertEqual(42.0, self.evaluate(optimizer.beta_1)) class _ManualScope(tf.Module): + def __call__(self): + with tf.compat.v1.variable_scope("ManualScope") as vs: + self.variable_scope = vs + with trackable_utils.capture_dependencies(template=self): + return self._build() - def __call__(self): - with tf.compat.v1.variable_scope("ManualScope") as vs: - self.variable_scope = vs - with trackable_utils.capture_dependencies(template=self): - return self._build() - - def _build(self): - return tf.compat.v1.get_variable(name="in_manual_scope", shape=[]) + def _build(self): + return tf.compat.v1.get_variable(name="in_manual_scope", shape=[]) @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class TemplateTests(test_combinations.TestCase): - - def test_trackable_save_restore(self): - with self.test_session(): - def _templated(): - v = tf.compat.v1.get_variable( - "v", shape=[1], initializer=tf.compat.v1.zeros_initializer(), - use_resource=True) - v2 = tf.compat.v1.get_variable( - "v2", shape=[1], initializer=tf.compat.v1.zeros_initializer(), - use_resource=True) - manual = _ManualScope() - return v, v + 1., v2, manual, manual() - - save_template = tf.compat.v1.make_template("s1", _templated) - v1_save, _, v2_save, manual_scope, manual_scope_v = save_template() - self.assertEqual( - set([id(v1_save), id(v2_save), id(manual_scope), - id(manual_scope_v), id(save_template)]), - set(map(id, trackable_utils.list_objects(save_template)))) - self.assertDictEqual({"in_manual_scope": manual_scope_v}, - manual_scope._trackable_children()) - optimizer = adam.Adam(0.0) - save_root = tf.train.Checkpoint( - my_template=save_template, optimizer=optimizer) - optimizer.minimize(v1_save.read_value, - var_list=[v1_save]) - self.evaluate([v.initializer for v in save_template.variables]) - optimizer_variables = optimizer.variables() + list( - optimizer._hyper.values()) - self.evaluate([v.initializer for v in optimizer_variables]) - self.evaluate(v1_save.assign([12.])) - self.evaluate(v2_save.assign([14.])) - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = save_root.save(checkpoint_prefix) - - load_template = tf.compat.v1.make_template("s2", _templated) - load_optimizer = adam.Adam(0.0) - load_root = tf.train.Checkpoint( - my_template=load_template, optimizer=load_optimizer) - status = load_root.restore(save_path) - var, var_plus_one, var2, _, _ = load_template() - load_optimizer.minimize(var.read_value, var_list=[var]) - - children = load_template._trackable_children() - self.assertEqual({"v", "v2", "ManualScope"}, children.keys()) - status.assert_consumed().run_restore_ops() - self.assertAllEqual([12.], self.evaluate(var)) - self.assertAllEqual([13.], self.evaluate(var_plus_one)) - self.assertAllEqual([14.], self.evaluate(var2)) + def test_trackable_save_restore(self): + with self.test_session(): + + def _templated(): + v = tf.compat.v1.get_variable( + "v", + shape=[1], + initializer=tf.compat.v1.zeros_initializer(), + use_resource=True, + ) + v2 = tf.compat.v1.get_variable( + "v2", + shape=[1], + initializer=tf.compat.v1.zeros_initializer(), + use_resource=True, + ) + manual = _ManualScope() + return v, v + 1.0, v2, manual, manual() + + save_template = tf.compat.v1.make_template("s1", _templated) + v1_save, _, v2_save, manual_scope, manual_scope_v = save_template() + self.assertEqual( + set( + [ + id(v1_save), + id(v2_save), + id(manual_scope), + id(manual_scope_v), + id(save_template), + ] + ), + set(map(id, trackable_utils.list_objects(save_template))), + ) + self.assertDictEqual( + {"in_manual_scope": manual_scope_v}, + manual_scope._trackable_children(), + ) + optimizer = adam.Adam(0.0) + save_root = tf.train.Checkpoint( + my_template=save_template, optimizer=optimizer + ) + optimizer.minimize(v1_save.read_value, var_list=[v1_save]) + self.evaluate([v.initializer for v in save_template.variables]) + optimizer_variables = optimizer.variables() + list( + optimizer._hyper.values() + ) + self.evaluate([v.initializer for v in optimizer_variables]) + self.evaluate(v1_save.assign([12.0])) + self.evaluate(v2_save.assign([14.0])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_root.save(checkpoint_prefix) + + load_template = tf.compat.v1.make_template("s2", _templated) + load_optimizer = adam.Adam(0.0) + load_root = tf.train.Checkpoint( + my_template=load_template, optimizer=load_optimizer + ) + status = load_root.restore(save_path) + var, var_plus_one, var2, _, _ = load_template() + load_optimizer.minimize(var.read_value, var_list=[var]) + + children = load_template._trackable_children() + self.assertEqual({"v", "v2", "ManualScope"}, children.keys()) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([12.0], self.evaluate(var)) + self.assertAllEqual([13.0], self.evaluate(var_plus_one)) + self.assertAllEqual([14.0], self.evaluate(var2)) class CheckpointCompatibilityTests(test_combinations.TestCase): - - def _initialized_model(self): - input_value = tf.constant([[3.]]) - model = MyModel() - optimizer = adam.Adam(0.001) - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model) - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - train_op = optimizer.apply_gradients(zip(gradients, variables)) - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - # A regular variable, a slot variable, and a non-slot Optimizer variable - # with known values to check when loading. - self.evaluate(model._named_dense.bias.assign([1.])) - self.evaluate(optimizer.get_slot( - var=model._named_dense.bias, slot_name="m").assign([2.])) - self.evaluate(optimizer.beta_1.assign(3.)) - return root_trackable - - def _set_sentinels(self, root_trackable): - self.evaluate(root_trackable.model._named_dense.bias.assign([101.])) - self.evaluate( - root_trackable.optimizer.get_slot( - var=root_trackable.model._named_dense.bias, slot_name="m") - .assign([102.])) - self.evaluate(root_trackable.optimizer.beta_1.assign(103.)) - - def _check_sentinels(self, root_trackable): - self.assertAllEqual( - [1.], self.evaluate(root_trackable.model._named_dense.bias)) - self.assertAllEqual([2.], self.evaluate( - root_trackable.optimizer.get_slot( - var=root_trackable.model._named_dense.bias, slot_name="m"))) - self.assertAllEqual(3., - self.evaluate(root_trackable.optimizer.beta_1)) - - def _write_name_based_checkpoint(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with context.graph_mode(): - save_graph = tf.Graph() - with save_graph.as_default(), self.session( - graph=save_graph) as session: - root = self._initialized_model() - name_saver = tf.compat.v1.train.Saver() - return name_saver.save( - sess=session, - save_path=checkpoint_prefix, - global_step=root.optimizer.iterations) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testLoadFromNameBasedSaver(self): - """Save a name-based checkpoint, load it using the object-based API.""" - with test_utils.device(should_use_gpu=True): - with self.test_session(): - save_path = self._write_name_based_checkpoint() - root = self._initialized_model() - self._set_sentinels(root) - with self.assertRaises(AssertionError): - self._check_sentinels(root) - object_saver = tf.train.Checkpoint(root=root) - self._set_sentinels(root) - status = object_saver.read(save_path) - if tf.executing_eagerly(): - self._check_sentinels(root) - if tf.executing_eagerly(): - status.assert_consumed() - status.assert_existing_objects_matched() - status.assert_nontrivial_match() - else: - # When graph building, we haven't read any keys, so we don't know - # whether the restore will be complete. - with self.assertRaisesRegex(AssertionError, "not restored"): - status.assert_consumed() - with self.assertRaisesRegex(AssertionError, "not restored"): + def _initialized_model(self): + input_value = tf.constant([[3.0]]) + model = MyModel() + optimizer = adam.Adam(0.001) + root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model) + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + train_op = optimizer.apply_gradients(zip(gradients, variables)) + self.evaluate(trackable_utils.gather_initializers(root_trackable)) + self.evaluate(train_op) + # A regular variable, a slot variable, and a non-slot Optimizer variable + # with known values to check when loading. + self.evaluate(model._named_dense.bias.assign([1.0])) + self.evaluate( + optimizer.get_slot( + var=model._named_dense.bias, slot_name="m" + ).assign([2.0]) + ) + self.evaluate(optimizer.beta_1.assign(3.0)) + return root_trackable + + def _set_sentinels(self, root_trackable): + self.evaluate(root_trackable.model._named_dense.bias.assign([101.0])) + self.evaluate( + root_trackable.optimizer.get_slot( + var=root_trackable.model._named_dense.bias, slot_name="m" + ).assign([102.0]) + ) + self.evaluate(root_trackable.optimizer.beta_1.assign(103.0)) + + def _check_sentinels(self, root_trackable): + self.assertAllEqual( + [1.0], self.evaluate(root_trackable.model._named_dense.bias) + ) + self.assertAllEqual( + [2.0], + self.evaluate( + root_trackable.optimizer.get_slot( + var=root_trackable.model._named_dense.bias, slot_name="m" + ) + ), + ) + self.assertAllEqual(3.0, self.evaluate(root_trackable.optimizer.beta_1)) + + def _write_name_based_checkpoint(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = tf.Graph() + with save_graph.as_default(), self.session( + graph=save_graph + ) as session: + root = self._initialized_model() + name_saver = tf.compat.v1.train.Saver() + return name_saver.save( + sess=session, + save_path=checkpoint_prefix, + global_step=root.optimizer.iterations, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testLoadFromNameBasedSaver(self): + """Save a name-based checkpoint, load it using the object-based API.""" + with test_utils.device(should_use_gpu=True): + with self.test_session(): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = tf.train.Checkpoint(root=root) + self._set_sentinels(root) + status = object_saver.read(save_path) + if tf.executing_eagerly(): + self._check_sentinels(root) + if tf.executing_eagerly(): + status.assert_consumed() + status.assert_existing_objects_matched() + status.assert_nontrivial_match() + else: + # When graph building, we haven't read any keys, so we don't + # know whether the restore will be complete. + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_consumed() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_existing_objects_matched() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_nontrivial_match() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status = object_saver.read(save_path) + status.initialize_or_restore() + status.assert_nontrivial_match() + self._check_sentinels(root) + # Check that there is no error when keys are missing from the + # name-based checkpoint. + root.not_in_name_checkpoint = tf.Variable([1.0]) + status = object_saver.read(save_path) + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + + def testSaveGraphLoadEager(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = tf.Graph() + with save_graph.as_default(), self.session(graph=save_graph): + root = self._initialized_model() + save_path = root.save(file_prefix=checkpoint_prefix) + with tf.__internal__.eager_context.eager_mode(): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed() + self._check_sentinels(root) + + def testSaveEagerLoadGraph(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with tf.__internal__.eager_context.eager_mode(): + root = self._initialized_model() + save_path = root.save(file_prefix=checkpoint_prefix) + with context.graph_mode(): + save_graph = tf.Graph() + with save_graph.as_default(), self.session(graph=save_graph): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed().run_restore_ops() + self._check_sentinels(root) + + def testIgnoreSaveCounter(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with self.cached_session() as session: + # Create and save a model using Saver() before using a Checkpoint. + # This generates a snapshot without the Checkpoint's `save_counter`. + model = sequential.Sequential() + model.add(reshaping.Flatten(input_shape=(1,))) + model.add(core.Dense(1)) + name_saver = tf.compat.v1.train.Saver(model.trainable_variables) + save_path = name_saver.save( + sess=session, save_path=checkpoint_prefix, global_step=1 + ) + # Checkpoint.restore must successfully load that checkpoint. + ckpt = tf.train.Checkpoint(model=model) + status = ckpt.restore(save_path) status.assert_existing_objects_matched() - with self.assertRaisesRegex(AssertionError, "not restored"): - status.assert_nontrivial_match() - status.run_restore_ops() - self._check_sentinels(root) - self._set_sentinels(root) - status = object_saver.read(save_path) - status.initialize_or_restore() - status.assert_nontrivial_match() - self._check_sentinels(root) - # Check that there is no error when keys are missing from the name-based - # checkpoint. - root.not_in_name_checkpoint = tf.Variable([1.]) - status = object_saver.read(save_path) - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - - def testSaveGraphLoadEager(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with context.graph_mode(): - save_graph = tf.Graph() - with save_graph.as_default(), self.session( - graph=save_graph): - root = self._initialized_model() - save_path = root.save(file_prefix=checkpoint_prefix) - with tf.__internal__.eager_context.eager_mode(): - root = self._initialized_model() - self._set_sentinels(root) - root.restore(save_path).assert_consumed() - self._check_sentinels(root) - - def testSaveEagerLoadGraph(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with tf.__internal__.eager_context.eager_mode(): - root = self._initialized_model() - save_path = root.save(file_prefix=checkpoint_prefix) - with context.graph_mode(): - save_graph = tf.Graph() - with save_graph.as_default(), self.session( - graph=save_graph): - root = self._initialized_model() - self._set_sentinels(root) - root.restore(save_path).assert_consumed().run_restore_ops() - self._check_sentinels(root) - - def testIgnoreSaveCounter(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with self.cached_session() as session: - # Create and save a model using Saver() before using a Checkpoint. This - # generates a snapshot without the Checkpoint's `save_counter`. - model = sequential.Sequential() - model.add(reshaping.Flatten(input_shape=(1,))) - model.add(core.Dense(1)) - name_saver = tf.compat.v1.train.Saver(model.trainable_variables) - save_path = name_saver.save( - sess=session, save_path=checkpoint_prefix, global_step=1) - # Checkpoint.restore must successfully load that checkpoint. - ckpt = tf.train.Checkpoint(model=model) - status = ckpt.restore(save_path) - status.assert_existing_objects_matched() - # It should, however, refuse to load a checkpoint where an unrelated - # `save_counter` variable is missing. - model.layers[1].var = tf.Variable(0., name="save_counter") - status = ckpt.restore(save_path) - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() + # It should, however, refuse to load a checkpoint where an unrelated + # `save_counter` variable is missing. + model.layers[1].var = tf.Variable(0.0, name="save_counter") + status = ckpt.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() if __name__ == "__main__": - tf.compat.v1.enable_eager_execution() - tf.test.main() + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/tests/tracking_util_with_v1_optimizers_test.py b/keras/tests/tracking_util_with_v1_optimizers_test.py index c750ce177fd9..bf1d85ed7bba 100644 --- a/keras/tests/tracking_util_with_v1_optimizers_test.py +++ b/keras/tests/tracking_util_with_v1_optimizers_test.py @@ -14,673 +14,799 @@ # ============================================================================== """Tests for object-based saving which use tf.train.* optimizers.""" -import tensorflow.compat.v2 as tf - import functools import os -from tensorflow.python.eager import context -from tensorflow.python.framework import test_util as tf_test_utils -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils + +import tensorflow.compat.v2 as tf + from keras.engine import training from keras.layers import core -from tensorflow.python.training.tracking import util as trackable_utils +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +# isort: off +from tensorflow.python.checkpoint import ( + checkpoint as trackable_utils, +) +from tensorflow.python.eager import context +from tensorflow.python.framework import ( + test_util as tf_test_utils, +) -class NonLayerTrackable(tf.Module): - def __init__(self): - super().__init__() - self.a_variable = trackable_utils.add_variable( - self, name="a_variable", shape=[]) +class NonLayerTrackable(tf.Module): + def __init__(self): + super().__init__() + self.a_variable = trackable_utils.add_variable( + self, name="a_variable", shape=[] + ) -# pylint: disable=not-callable class MyModel(training.Model): - """A concrete Model for testing.""" + """A concrete Model for testing.""" - def __init__(self): - super().__init__() - self._named_dense = core.Dense(1, use_bias=True) - self._second = core.Dense(1, use_bias=False) - # We can still track Trackables which aren't Layers. - self._non_layer = NonLayerTrackable() + def __init__(self): + super().__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + # We can still track Trackables which aren't Layers. + self._non_layer = NonLayerTrackable() - def call(self, values): - ret = self._second(self._named_dense(values)) - return ret + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret class CheckpointingTests(test_combinations.TestCase): - - @tf_test_utils.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testNamingWithOptimizer(self): - input_value = tf.constant([[3.]]) - model = MyModel() - # A nuisance Model using the same optimizer. Its slot variables should not - # go in the checkpoint, since it is never depended on. - other_model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - optimizer_step = tf.compat.v1.train.get_or_create_global_step() - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model, optimizer_step=optimizer_step) - if tf.executing_eagerly(): - optimizer.minimize( - lambda: model(input_value), - global_step=optimizer_step) - optimizer.minimize( - lambda: other_model(input_value), - global_step=optimizer_step) - else: - train_op = optimizer.minimize( - model(input_value), global_step=optimizer_step) - optimizer.minimize( - other_model(input_value), - global_step=optimizer_step) - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - named_variables, serialized_graph, _ = tf.__internal__.tracking.ObjectGraphView( - root_trackable).serialize_object_graph() - expected_checkpoint_names = ( - # Created in the root node, so no prefix. - "optimizer_step", - "model/_second/kernel", - "model/_named_dense/kernel", - "model/_named_dense/bias", - # non-Layer dependency of the model - "model/_non_layer/a_variable", - # The optimizer creates two non-slot variables - "optimizer/beta1_power", - "optimizer/beta2_power", - # Slot variables - "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", - "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", - "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", - "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", - "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", - "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", - ) - suffix = "/.ATTRIBUTES/VARIABLE_VALUE" - expected_checkpoint_names = [ - name + suffix for name in expected_checkpoint_names] - named_variables = {v.name: v for v in named_variables} - self.assertEqual(len(expected_checkpoint_names), - len(named_variables.keys())) - # Check that we've mapped to the right variable objects (not exhaustive) - self.assertEqual( - "global_step", - named_variables["optimizer_step" + suffix].full_name) - self.assertEqual( - "my_model/dense_1/kernel", - named_variables["model/_second/kernel" + suffix].full_name) - self.assertEqual( - "my_model/dense/kernel", - named_variables["model/_named_dense/kernel" + suffix].full_name) - self.assertEqual( - "beta1_power", - named_variables["optimizer/beta1_power" + suffix].full_name) - self.assertEqual( - "beta2_power", - named_variables["optimizer/beta2_power" + suffix].full_name) - # Spot check the generated protocol buffers. - self.assertEqual("optimizer", - serialized_graph.nodes[0].children[1].local_name) - optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ - 1].node_id] - self.assertEqual("beta1_power", - optimizer_node.children[0].local_name) - self.assertEqual("beta1_power", - serialized_graph.nodes[optimizer_node.children[0].node_id] - .attributes[0].full_name) - self.assertEqual( - "my_model/dense/kernel", - serialized_graph.nodes[optimizer_node.slot_variables[0] - .original_variable_node_id] - .attributes[0].full_name) - # We strip off the :0 suffix, as variable.name-based saving does. - self.assertEqual( - "my_model/dense/kernel/Adam", - serialized_graph.nodes[optimizer_node.slot_variables[0] - .slot_variable_node_id] - .attributes[0].full_name) - self.assertEqual( - "my_model/dense/kernel/Adam:0", - optimizer.get_slot( - var=model._named_dense.kernel, - name="m").name) - self.assertEqual( - "model/_named_dense/kernel" + suffix, - serialized_graph.nodes[ - optimizer_node.slot_variables[0] - .original_variable_node_id].attributes[0].checkpoint_key) - self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) - self.assertEqual( - "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, - serialized_graph.nodes[ - optimizer_node.slot_variables[0] - .slot_variable_node_id].attributes[0].checkpoint_key) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testSaveRestore(self): - with self.test_session(): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model) - input_value = tf.constant([[3.]]) - if tf.executing_eagerly(): - optimizer.minimize( - lambda: model(input_value)) - else: - train_op = optimizer.minimize(model(input_value)) - # TODO(allenl): Make initialization more pleasant when graph building. - root_trackable.save_counter # pylint: disable=pointless-statement - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(tf.compat.v1.assign(model._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") - self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) - save_path = root_trackable.save(file_prefix=prefix) - self.evaluate(tf.compat.v1.assign(model._named_dense.variables[1], [43.])) - self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) - optimizer_variables = self.evaluate(optimizer.variables()) - self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.])) - # Immediate restoration - status = root_trackable.restore(save_path=save_path).assert_consumed() - status.run_restore_ops() - self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) - self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) - self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - if not tf.executing_eagerly(): - return # Restore-on-create is only supported when executing eagerly - on_create_model = MyModel() - on_create_optimizer = tf.compat.v1.train.AdamOptimizer( - 0.001, - # Preserve beta1_power and beta2_power when applying gradients - # so we can test that they've been restored correctly. - beta1=1.0, - beta2=1.0) - on_create_root = tf.train.Checkpoint( - optimizer=on_create_optimizer, model=on_create_model) - # Deferred restoration - status = on_create_root.restore(save_path=save_path) - status.assert_nontrivial_match() - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - on_create_model(tf.constant([[3.]])) # create variables - self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) - self.assertAllEqual([42.], - self.evaluate( - on_create_model._named_dense.variables[1])) - on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_model._named_dense.variables[1], "m") - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - # Optimizer slot variables are created when the original variable is - # restored. - self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - self.assertAllEqual(optimizer_variables[2:], - self.evaluate(on_create_optimizer.variables())) - dummy_var = tf.Variable([1.]) - on_create_optimizer.minimize(loss=dummy_var.read_value) - status.assert_existing_objects_matched() - status.assert_consumed() - beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() - self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) - self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) - - # TODO(allenl): Debug garbage created by this test in python3. - def testDeferredRestorationUsageEager(self): - """An idiomatic eager execution example.""" - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model, - optimizer_step=tf.compat.v1.train.get_or_create_global_step()) - root.restore(tf.train.latest_checkpoint( - checkpoint_directory)) - for _ in range(num_training_steps): - # TODO(allenl): Use a Dataset and serialize/checkpoint it. - input_value = tf.constant([[3.]]) - optimizer.minimize( - lambda: model(input_value), # pylint: disable=cell-var-from-loop - global_step=root.optimizer_step) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - root.optimizer_step.numpy()) - - def testEagerDistributionStrategy(self): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - - def _train_fn(optimizer, model, root): - input_value = tf.constant([[3.]]) - optimizer.minimize( - functools.partial(model, input_value), - global_step=root.optimizer_step) - - strategy = tf.distribute.MirroredStrategy() - with strategy.scope(): - for training_continuation in range(3): + @tf_test_utils.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNamingWithOptimizer(self): + input_value = tf.constant([[3.0]]) model = MyModel() + # A nuisance Model using the same optimizer. Its slot variables should + # not go in the checkpoint, since it is never depended on. + other_model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, - model=model, - optimizer_step=tf.compat.v1.train.get_or_create_global_step()) - root.restore( - tf.train.latest_checkpoint(checkpoint_directory)) - - for _ in range(num_training_steps): - strategy.extended.call_for_each_replica( - functools.partial(_train_fn, optimizer, model, root)) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - root.optimizer_step.numpy()) - - def testGraphDistributionStrategy(self): - self.skipTest("b/121381184") - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - - def _train_fn(optimizer, model, root): - input_value = tf.constant([[3.]]) - return optimizer.minimize( - functools.partial(model, input_value), - global_step=root.optimizer_step) - - for training_continuation in range(3): - with tf.Graph().as_default(): - strategy = tf.distribute.MirroredStrategy() - with strategy.scope(): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model, - optimizer_step=tf.compat.v1.train.get_or_create_global_step()) - status = root.restore(tf.train.latest_checkpoint( - checkpoint_directory)) - train_op = strategy.extended.call_for_each_replica( - functools.partial(_train_fn, optimizer, model, root)) - with self.session() as session: - if training_continuation > 0: - status.assert_consumed() - status.initialize_or_restore() - for _ in range(num_training_steps): - session.run(train_op) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - root.optimizer_step.numpy()) - - def testUsageGraph(self): - """Expected usage when graph building.""" - with context.graph_mode(): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - with tf.Graph().as_default(): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.compat.v1.train.Checkpoint( - optimizer=optimizer, model=model, - global_step=tf.compat.v1.train.get_or_create_global_step()) - input_value = tf.constant([[3.]]) - train_op = optimizer.minimize( - model(input_value), - global_step=root.global_step) - checkpoint_path = tf.train.latest_checkpoint( - checkpoint_directory) - with self.session(graph=tf.compat.v1.get_default_graph()) as session: - status = root.restore(save_path=checkpoint_path) - status.initialize_or_restore(session=session) - if checkpoint_path is None: - self.assertEqual(0, training_continuation) - with self.assertRaises(AssertionError): - status.assert_consumed() - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() + optimizer_step = tf.compat.v1.train.get_or_create_global_step() + root_trackable = tf.train.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step + ) + if tf.executing_eagerly(): + optimizer.minimize( + lambda: model(input_value), global_step=optimizer_step + ) + optimizer.minimize( + lambda: other_model(input_value), global_step=optimizer_step + ) + else: + train_op = optimizer.minimize( + model(input_value), global_step=optimizer_step + ) + optimizer.minimize( + other_model(input_value), global_step=optimizer_step + ) + self.evaluate(trackable_utils.gather_initializers(root_trackable)) + self.evaluate(train_op) + ( + named_variables, + serialized_graph, + _, + ) = tf.__internal__.tracking.ObjectGraphView( + root_trackable + ).serialize_object_graph() + expected_checkpoint_names = ( + # Created in the root node, so no prefix. + "optimizer_step", + "model/_second/kernel", + "model/_named_dense/kernel", + "model/_named_dense/bias", + # non-Layer dependency of the model + "model/_non_layer/a_variable", + # The optimizer creates two non-slot variables + "optimizer/beta1_power", + "optimizer/beta2_power", + # Slot variables + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + ) + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + expected_checkpoint_names = [ + name + suffix for name in expected_checkpoint_names + ] + named_variables = {v.name: v for v in named_variables} + self.assertEqual( + len(expected_checkpoint_names), len(named_variables.keys()) + ) + # Check that we've created the right full_names of objects (not + # exhaustive) + expected_names = { + "optimizer_step" + suffix: "global_step", + "model/_second/kernel" + suffix: "my_model/dense_1/kernel", + "model/_named_dense/kernel" + suffix: "my_model/dense/kernel", + "optimizer/beta1_power" + suffix: "beta1_power", + "optimizer/beta2_power" + suffix: "beta2_power", + } + for nodes in serialized_graph.nodes: + for attribute in nodes.attributes: + expected_name = expected_names.pop( + attribute.checkpoint_key, None + ) + if expected_name is not None: + self.assertEqual(expected_name, attribute.full_name) + self.assertEmpty(expected_names) + + # Spot check the generated protocol buffers. + self.assertEqual( + "optimizer", serialized_graph.nodes[0].children[1].local_name + ) + optimizer_node = serialized_graph.nodes[ + serialized_graph.nodes[0].children[1].node_id + ] + self.assertEqual("beta1_power", optimizer_node.children[0].local_name) + self.assertEqual( + "beta1_power", + serialized_graph.nodes[optimizer_node.children[0].node_id] + .attributes[0] + .full_name, + ) + self.assertEqual( + "my_model/dense/kernel", + serialized_graph.nodes[ + optimizer_node.slot_variables[0].original_variable_node_id + ] + .attributes[0] + .full_name, + ) + + # We strip off the :0 suffix, as variable.name-based saving does. + self.assertEqual( + "my_model/dense/kernel/Adam", + serialized_graph.nodes[ + optimizer_node.slot_variables[0].slot_variable_node_id + ] + .attributes[0] + .full_name, + ) + self.assertEqual( + "my_model/dense/kernel/Adam:0", + optimizer.get_slot(var=model._named_dense.kernel, name="m").name, + ) + self.assertEqual( + "model/_named_dense/kernel" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0].original_variable_node_id + ] + .attributes[0] + .checkpoint_key, + ) + self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) + self.assertEqual( + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0].slot_variable_node_id + ] + .attributes[0] + .checkpoint_key, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testSaveRestore(self): + with self.test_session(): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root_trackable = tf.train.Checkpoint( + optimizer=optimizer, model=model + ) + input_value = tf.constant([[3.0]]) + if tf.executing_eagerly(): + optimizer.minimize(lambda: model(input_value)) else: - status.assert_consumed() - status.assert_existing_objects_matched() + train_op = optimizer.minimize(model(input_value)) + # TODO(allenl): Make initialization more pleasant when graph + # building. + root_trackable.save_counter + self.evaluate( + trackable_utils.gather_initializers(root_trackable) + ) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate( + tf.compat.v1.assign(model._named_dense.variables[1], [42.0]) + ) + m_bias_slot = optimizer.get_slot( + model._named_dense.variables[1], "m" + ) + self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) + save_path = root_trackable.save(file_prefix=prefix) + self.evaluate( + tf.compat.v1.assign(model._named_dense.variables[1], [43.0]) + ) + self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) + optimizer_variables = self.evaluate(optimizer.variables()) + self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.0])) + # Immediate restoration + status = root_trackable.restore( + save_path=save_path + ).assert_consumed() + status.run_restore_ops() + self.assertAllEqual( + [42.0], self.evaluate(model._named_dense.variables[1]) + ) + self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + if not tf.executing_eagerly(): + # Restore-on-create is only supported when executing eagerly + return + on_create_model = MyModel() + on_create_optimizer = tf.compat.v1.train.AdamOptimizer( + 0.001, + # Preserve beta1_power and beta2_power when applying gradients + # so we can test that they've been restored correctly. + beta1=1.0, + beta2=1.0, + ) + on_create_root = tf.train.Checkpoint( + optimizer=on_create_optimizer, model=on_create_model + ) + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + status.assert_nontrivial_match() + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + on_create_model(tf.constant([[3.0]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual( + [42.0], self.evaluate(on_create_model._named_dense.variables[1]) + ) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_model._named_dense.variables[1], "m" + ) + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + self.assertAllEqual( + optimizer_variables[2:], + self.evaluate(on_create_optimizer.variables()), + ) + dummy_var = tf.Variable([1.0]) + on_create_optimizer.minimize(loss=dummy_var.read_value) + status.assert_existing_objects_matched() + status.assert_consumed() + ( + beta1_power, + beta2_power, + ) = on_create_optimizer._get_beta_accumulators() + self.assertAllEqual( + optimizer_variables[0], self.evaluate(beta1_power) + ) + self.assertAllEqual( + optimizer_variables[1], self.evaluate(beta2_power) + ) + + # TODO(allenl): Debug garbage created by this test in python3. + def testDeferredRestorationUsageEager(self): + """An idiomatic eager execution example.""" + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + optimizer_step=tf.compat.v1.train.get_or_create_global_step(), + ) + root.restore(tf.train.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): - session.run(train_op) - root.save(file_prefix=checkpoint_prefix, session=session) - self.assertEqual((training_continuation + 1) * num_training_steps, - session.run(root.global_step)) - self.assertEqual(training_continuation + 1, - session.run(root.save_counter)) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testAgnosticUsage(self): - """Graph/eager agnostic usage.""" - # Does create garbage when executing eagerly due to ops.Graph() creation. - with self.test_session(): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - for training_continuation in range(3): - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model, - global_step=tf.compat.v1.train.get_or_create_global_step()) - manager = tf.train.CheckpointManager( - root, checkpoint_directory, max_to_keep=1) - status = root.restore(save_path=manager.latest_checkpoint) - input_value = tf.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not tf.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - manager.save() - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(root.global_step)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) - - # pylint: disable=cell-var-from-loop - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testWithDefun(self): - with self.test_session(): - num_training_steps = 2 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - with test_utils.device(should_use_gpu=True): - model = MyModel() - # Don't actually train so we can test variable values - optimizer = tf.compat.v1.train.AdamOptimizer(0.) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model, - global_step=tf.compat.v1.train.get_or_create_global_step()) - checkpoint_path = tf.train.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - def train_fn(): - @tf.function - def _call_model(x): - return model(x) + # TODO(allenl): Use a Dataset and serialize/checkpoint it. + input_value = tf.constant([[3.0]]) + optimizer.minimize( + lambda: model(input_value), + global_step=root.optimizer_step, + ) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + root.optimizer_step.numpy(), + ) + + def testEagerDistributionStrategy(self): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + def _train_fn(optimizer, model, root): + input_value = tf.constant([[3.0]]) + optimizer.minimize( + functools.partial(model, input_value), + global_step=root.optimizer_step, + ) + + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + for training_continuation in range(3): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + optimizer_step=tf.compat.v1.train.get_or_create_global_step(), # noqa: E501 + ) + root.restore(tf.train.latest_checkpoint(checkpoint_directory)) + + for _ in range(num_training_steps): + strategy.extended.call_for_each_replica( + functools.partial(_train_fn, optimizer, model, root) + ) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + root.optimizer_step.numpy(), + ) + + def testGraphDistributionStrategy(self): + self.skipTest("b/121381184") + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + def _train_fn(optimizer, model, root): + input_value = tf.constant([[3.0]]) + return optimizer.minimize( + functools.partial(model, input_value), + global_step=root.optimizer_step, + ) + + for training_continuation in range(3): + with tf.Graph().as_default(): + strategy = tf.distribute.MirroredStrategy() + with strategy.scope(): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + optimizer_step=tf.compat.v1.train.get_or_create_global_step(), # noqa: E501 + ) + status = root.restore( + tf.train.latest_checkpoint(checkpoint_directory) + ) + train_op = strategy.extended.call_for_each_replica( + functools.partial(_train_fn, optimizer, model, root) + ) + with self.session() as session: + if training_continuation > 0: + status.assert_consumed() + status.initialize_or_restore() + for _ in range(num_training_steps): + session.run(train_op) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + root.optimizer_step.numpy(), + ) + + def testUsageGraph(self): + """Expected usage when graph building.""" + with context.graph_mode(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with tf.Graph().as_default(): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.compat.v1.train.Checkpoint( + optimizer=optimizer, + model=model, + global_step=tf.compat.v1.train.get_or_create_global_step(), # noqa: E501 + ) + input_value = tf.constant([[3.0]]) + train_op = optimizer.minimize( + model(input_value), global_step=root.global_step + ) + checkpoint_path = tf.train.latest_checkpoint( + checkpoint_directory + ) + with self.session( + graph=tf.compat.v1.get_default_graph() + ) as session: + status = root.restore(save_path=checkpoint_path) + status.initialize_or_restore(session=session) + if checkpoint_path is None: + self.assertEqual(0, training_continuation) + with self.assertRaises(AssertionError): + status.assert_consumed() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + else: + status.assert_consumed() + status.assert_existing_objects_matched() + for _ in range(num_training_steps): + session.run(train_op) + root.save( + file_prefix=checkpoint_prefix, session=session + ) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + session.run(root.global_step), + ) + self.assertEqual( + training_continuation + 1, + session.run(root.save_counter), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testAgnosticUsage(self): + """Graph/eager agnostic usage.""" + # Does create garbage when executing eagerly due to ops.Graph() + # creation. + with self.test_session(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + for training_continuation in range(3): + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + global_step=tf.compat.v1.train.get_or_create_global_step(), # noqa: E501 + ) + manager = tf.train.CheckpointManager( + root, checkpoint_directory, max_to_keep=1 + ) + status = root.restore(save_path=manager.latest_checkpoint) + input_value = tf.constant([[3.0]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step, + ) + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + manager.save() + self.assertEqual( + (training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step), + ) + self.assertEqual( + training_continuation + 1, + self.evaluate(root.save_counter), + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testWithDefun(self): + with self.test_session(): + num_training_steps = 2 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with test_utils.device(should_use_gpu=True): + model = MyModel() + # Don't actually train so we can test variable values + optimizer = tf.compat.v1.train.AdamOptimizer(0.0) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + global_step=tf.compat.v1.train.get_or_create_global_step(), # noqa: E501 + ) + checkpoint_path = tf.train.latest_checkpoint( + checkpoint_directory + ) + status = root.restore(save_path=checkpoint_path) + + def train_fn(): + @tf.function + def _call_model(x): + return model(x) + + with tf.GradientTape() as tape: + loss = _call_model(tf.constant([[3.0]])) + gradients = tape.gradient(loss, model.variables) + return optimizer.apply_gradients( + zip(gradients, model.variables), + global_step=root.global_step, + ) + + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + if training_continuation > 0: + status.assert_consumed() + self.assertAllClose( + [[42.0]], self.evaluate(model.variables[0]) + ) + else: + self.evaluate(model.variables[0].assign([[42.0]])) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual( + (training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step), + ) + self.assertEqual( + training_continuation + 1, + self.evaluate(root.save_counter), + ) + + @test_combinations.generate(test_combinations.combine(mode=["eager"])) + def testAnonymousVarsInInit(self): + class Model(training.Model): + def __init__(self): + super().__init__() + self.w = tf.Variable(0.0) + self.b = tf.Variable(0.0) + self.vars = [self.w, self.b] + + def call(self, x): + return x * self.w + self.b + + model = Model() + optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.05) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) + for _ in range(2): + checkpoint.save(checkpoint_prefix) with tf.GradientTape() as tape: - loss = _call_model(tf.constant([[3.]])) - gradients = tape.gradient(loss, model.variables) - return optimizer.apply_gradients(zip(gradients, model.variables), - global_step=root.global_step) - if not tf.executing_eagerly(): - train_fn = functools.partial( - self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - if training_continuation > 0: - status.assert_consumed() - self.assertAllClose([[42.]], self.evaluate(model.variables[0])) - else: - self.evaluate(model.variables[0].assign([[42.]])) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(root.global_step)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) - # pylint: enable=cell-var-from-loop - - @test_combinations.generate(test_combinations.combine(mode=["eager"])) - def testAnonymousVarsInInit(self): - - class Model(training.Model): - - def __init__(self): - super().__init__() - self.w = tf.Variable(0.0) - self.b = tf.Variable(0.0) - self.vars = [self.w, self.b] - - def call(self, x): - return x * self.w + self.b - - model = Model() - optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.05) - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - checkpoint = tf.train.Checkpoint( - model=model, optimizer=optimizer) - for _ in range(2): - checkpoint.save(checkpoint_prefix) - with tf.GradientTape() as tape: - loss = (tf.constant(1.) - - model(tf.constant(1.))) ** 2 - grad = tape.gradient(loss, model.vars) - optimizer.apply_gradients( - [(g, v) for g, v in zip(grad, model.vars)]) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def test_initialize_if_not_restoring(self): - with self.test_session(): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.train.Checkpoint( - model=model, # Do not save the optimizer with the checkpoint. - global_step=tf.compat.v1.train.get_or_create_global_step()) - optimizer_checkpoint = tf.train.Checkpoint( - optimizer=optimizer) - - checkpoint_path = tf.train.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - input_value = tf.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not tf.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - self.evaluate([v.initializer for v in optimizer.variables()]) - train_fn() - model_save_path = root.save(file_prefix=checkpoint_prefix) - self.evaluate(optimizer.variables()[0].assign(42.)) - optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix) - - # Restore into a graph with the optimizer - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model, - global_step=tf.compat.v1.train.get_or_create_global_step()) - status = root.restore(save_path=model_save_path) - input_value = tf.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not tf.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - train_fn() - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - - # Make sure initialization doesn't clobber later restores - with test_utils.device(should_use_gpu=True): - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001, beta1=1.0) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model, - global_step=tf.compat.v1.train.get_or_create_global_step()) - opt_root = tf.train.Checkpoint( - optimizer=optimizer) - status = root.restore(save_path=model_save_path) - init_only_optimizer_status = opt_root.restore(save_path=None) - optimizer_status = opt_root.restore(save_path=optimizer_save_path) - input_value = tf.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not tf.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - optimizer_status.run_restore_ops() - status.initialize_or_restore() - init_only_optimizer_status.initialize_or_restore() - train_fn() - self.assertEqual(42., self.evaluate(optimizer.variables()[0])) + loss = (tf.constant(1.0) - model(tf.constant(1.0))) ** 2 + grad = tape.gradient(loss, model.vars) + optimizer.apply_gradients( + [(g, v) for g, v in zip(grad, model.vars)] + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def test_initialize_if_not_restoring(self): + with self.test_session(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.train.Checkpoint( + # Do not save the optimizer with the checkpoint. + model=model, + global_step=tf.compat.v1.train.get_or_create_global_step(), + ) + optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer) + + checkpoint_path = tf.train.latest_checkpoint( + checkpoint_directory + ) + status = root.restore(save_path=checkpoint_path) + input_value = tf.constant([[3.0]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step, + ) + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + self.evaluate([v.initializer for v in optimizer.variables()]) + train_fn() + model_save_path = root.save(file_prefix=checkpoint_prefix) + self.evaluate(optimizer.variables()[0].assign(42.0)) + optimizer_save_path = optimizer_checkpoint.save( + optimizer_only_prefix + ) + + # Restore into a graph with the optimizer + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + global_step=tf.compat.v1.train.get_or_create_global_step(), + ) + status = root.restore(save_path=model_save_path) + input_value = tf.constant([[3.0]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step, + ) + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + train_fn() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + + # Make sure initialization doesn't clobber later restores + with test_utils.device(should_use_gpu=True): + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001, beta1=1.0) + root = tf.train.Checkpoint( + optimizer=optimizer, + model=model, + global_step=tf.compat.v1.train.get_or_create_global_step(), + ) + opt_root = tf.train.Checkpoint(optimizer=optimizer) + status = root.restore(save_path=model_save_path) + init_only_optimizer_status = opt_root.restore(save_path=None) + optimizer_status = opt_root.restore( + save_path=optimizer_save_path + ) + input_value = tf.constant([[3.0]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step, + ) + if not tf.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + optimizer_status.run_restore_ops() + status.initialize_or_restore() + init_only_optimizer_status.initialize_or_restore() + train_fn() + self.assertEqual(42.0, self.evaluate(optimizer.variables()[0])) class CheckpointCompatibilityTests(test_combinations.TestCase): - - def _initialized_model(self): - input_value = tf.constant([[3.]]) - model = MyModel() - optimizer = tf.compat.v1.train.AdamOptimizer(0.001) - optimizer_step = tf.compat.v1.train.get_or_create_global_step() - root_trackable = tf.train.Checkpoint( - optimizer=optimizer, model=model, optimizer_step=optimizer_step) - train_op = optimizer.minimize( - functools.partial(model, input_value), - global_step=optimizer_step) - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - # A regular variable, a slot variable, and a non-slot Optimizer variable - # with known values to check when loading. - self.evaluate(model._named_dense.bias.assign([1.])) - self.evaluate(optimizer.get_slot( - var=model._named_dense.bias, name="m").assign([2.])) - beta1_power, _ = optimizer._get_beta_accumulators() - self.evaluate(beta1_power.assign(3.)) - return root_trackable - - def _set_sentinels(self, root_trackable): - self.evaluate(root_trackable.model._named_dense.bias.assign([101.])) - self.evaluate( - root_trackable.optimizer.get_slot( - var=root_trackable.model._named_dense.bias, name="m") - .assign([102.])) - beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() - self.evaluate(beta1_power.assign(103.)) - - def _check_sentinels(self, root_trackable): - self.assertAllEqual( - [1.], self.evaluate(root_trackable.model._named_dense.bias)) - self.assertAllEqual([2.], self.evaluate( - root_trackable.optimizer.get_slot( - var=root_trackable.model._named_dense.bias, name="m"))) - beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() - self.assertAllEqual(3., self.evaluate(beta1_power)) - - def _write_name_based_checkpoint(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with context.graph_mode(): - save_graph = tf.Graph() - with save_graph.as_default(), self.session( - graph=save_graph) as session: - root = self._initialized_model() - name_saver = tf.compat.v1.train.Saver() - return name_saver.save( - sess=session, save_path=checkpoint_prefix, - global_step=root.optimizer_step) - - @test_combinations.generate( - test_combinations.combine(mode=["graph", "eager"])) - def testLoadFromNameBasedSaver(self): - """Save a name-based checkpoint, load it using the object-based API.""" - with test_utils.device(should_use_gpu=True): - with self.test_session(): - save_path = self._write_name_based_checkpoint() - root = self._initialized_model() - self._set_sentinels(root) - with self.assertRaises(AssertionError): - self._check_sentinels(root) - object_saver = tf.train.Checkpoint(root=root) - self._set_sentinels(root) - status = object_saver.read(save_path) - if tf.executing_eagerly(): - self._check_sentinels(root) - if tf.executing_eagerly(): - status.assert_consumed() - status.assert_existing_objects_matched() - status.assert_nontrivial_match() - else: - # When graph building, we haven't read any keys, so we don't know - # whether the restore will be complete. - with self.assertRaisesRegex(AssertionError, "not restored"): - status.assert_consumed() - with self.assertRaisesRegex(AssertionError, "not restored"): - status.assert_existing_objects_matched() - with self.assertRaisesRegex(AssertionError, "not restored"): - status.assert_nontrivial_match() - status.run_restore_ops() - self._check_sentinels(root) - self._set_sentinels(root) - status = object_saver.read(save_path) - status.initialize_or_restore() - self._check_sentinels(root) - # Check that there is no error when keys are missing from the name-based - # checkpoint. - root.not_in_name_checkpoint = tf.Variable([1.]) - status = object_saver.read(save_path) - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - - def testSaveGraphLoadEager(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with context.graph_mode(): - save_graph = tf.Graph() - with save_graph.as_default(), self.session( - graph=save_graph): - root = self._initialized_model() - save_path = root.save(file_prefix=checkpoint_prefix) - with tf.__internal__.eager_context.eager_mode(): - root = self._initialized_model() - self._set_sentinels(root) - root.restore(save_path).assert_consumed() - self._check_sentinels(root) - - def testSaveEagerLoadGraph(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - with tf.__internal__.eager_context.eager_mode(): - root = self._initialized_model() - save_path = root.save(file_prefix=checkpoint_prefix) - with context.graph_mode(): - save_graph = tf.Graph() - with save_graph.as_default(), self.session( - graph=save_graph): - root = self._initialized_model() - self._set_sentinels(root) - root.restore(save_path).assert_consumed().run_restore_ops() - self._check_sentinels(root) + def _initialized_model(self): + input_value = tf.constant([[3.0]]) + model = MyModel() + optimizer = tf.compat.v1.train.AdamOptimizer(0.001) + optimizer_step = tf.compat.v1.train.get_or_create_global_step() + root_trackable = tf.train.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step + ) + train_op = optimizer.minimize( + functools.partial(model, input_value), global_step=optimizer_step + ) + self.evaluate(trackable_utils.gather_initializers(root_trackable)) + self.evaluate(train_op) + # A regular variable, a slot variable, and a non-slot Optimizer variable + # with known values to check when loading. + self.evaluate(model._named_dense.bias.assign([1.0])) + self.evaluate( + optimizer.get_slot(var=model._named_dense.bias, name="m").assign( + [2.0] + ) + ) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.0)) + return root_trackable + + def _set_sentinels(self, root_trackable): + self.evaluate(root_trackable.model._named_dense.bias.assign([101.0])) + self.evaluate( + root_trackable.optimizer.get_slot( + var=root_trackable.model._named_dense.bias, name="m" + ).assign([102.0]) + ) + beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(103.0)) + + def _check_sentinels(self, root_trackable): + self.assertAllEqual( + [1.0], self.evaluate(root_trackable.model._named_dense.bias) + ) + self.assertAllEqual( + [2.0], + self.evaluate( + root_trackable.optimizer.get_slot( + var=root_trackable.model._named_dense.bias, name="m" + ) + ), + ) + beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() + self.assertAllEqual(3.0, self.evaluate(beta1_power)) + + def _write_name_based_checkpoint(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = tf.Graph() + with save_graph.as_default(), self.session( + graph=save_graph + ) as session: + root = self._initialized_model() + name_saver = tf.compat.v1.train.Saver() + return name_saver.save( + sess=session, + save_path=checkpoint_prefix, + global_step=root.optimizer_step, + ) + + @test_combinations.generate( + test_combinations.combine(mode=["graph", "eager"]) + ) + def testLoadFromNameBasedSaver(self): + """Save a name-based checkpoint, load it using the object-based API.""" + with test_utils.device(should_use_gpu=True): + with self.test_session(): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = tf.train.Checkpoint(root=root) + self._set_sentinels(root) + status = object_saver.read(save_path) + if tf.executing_eagerly(): + self._check_sentinels(root) + if tf.executing_eagerly(): + status.assert_consumed() + status.assert_existing_objects_matched() + status.assert_nontrivial_match() + else: + # When graph building, we haven't read any keys, so we don't + # know whether the restore will be complete. + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_consumed() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_existing_objects_matched() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_nontrivial_match() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status = object_saver.read(save_path) + status.initialize_or_restore() + self._check_sentinels(root) + # Check that there is no error when keys are missing from the + # name-based checkpoint. + root.not_in_name_checkpoint = tf.Variable([1.0]) + status = object_saver.read(save_path) + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + + def testSaveGraphLoadEager(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = tf.Graph() + with save_graph.as_default(), self.session(graph=save_graph): + root = self._initialized_model() + save_path = root.save(file_prefix=checkpoint_prefix) + with tf.__internal__.eager_context.eager_mode(): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed() + self._check_sentinels(root) + + def testSaveEagerLoadGraph(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with tf.__internal__.eager_context.eager_mode(): + root = self._initialized_model() + save_path = root.save(file_prefix=checkpoint_prefix) + with context.graph_mode(): + save_graph = tf.Graph() + with save_graph.as_default(), self.session(graph=save_graph): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed().run_restore_ops() + self._check_sentinels(root) if __name__ == "__main__": - tf.compat.v1.enable_eager_execution() - tf.test.main() + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/tests/tracking_util_xla_test.py b/keras/tests/tracking_util_xla_test.py index 7fb0ddbf607c..4867ab5f20d0 100644 --- a/keras/tests/tracking_util_xla_test.py +++ b/keras/tests/tracking_util_xla_test.py @@ -13,65 +13,70 @@ # limitations under the License. # ============================================================================== -from tensorflow.compiler.tests import xla_test - import tensorflow.compat.v2 as tf + from keras.engine import training from keras.layers import core -from keras.optimizers.optimizer_v2 import adam -from tensorflow.python.training.tracking import util as trackable_utils +from keras.optimizers.legacy import adam +# isort: off +from tensorflow.compiler.tests import xla_test +from tensorflow.python.checkpoint import ( + checkpoint as trackable_utils, +) -class NonLayerTrackable(tf.Module): - def __init__(self): - super().__init__() - self.a_variable = trackable_utils.add_variable( - self, name="a_variable", shape=[]) +class NonLayerTrackable(tf.Module): + def __init__(self): + super().__init__() + self.a_variable = trackable_utils.add_variable( + self, name="a_variable", shape=[] + ) class Subclassed(training.Model): - """A concrete Model for testing.""" + """A concrete Model for testing.""" - def __init__(self): - super().__init__() - self._named_dense = core.Dense(1, use_bias=True) - self._second = core.Dense(1, use_bias=False) - # We can still track Trackables which aren't Layers. - self._non_layer = NonLayerTrackable() + def __init__(self): + super().__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + # We can still track Trackables which aren't Layers. + self._non_layer = NonLayerTrackable() - def call(self, values): - ret = self._second(self._named_dense(values)) - return ret + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret class CheckpointingTests(xla_test.XLATestCase): - - def testDeferredRestorationUsageEager(self): - """An idiomatic eager execution example.""" - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - for training_continuation in range(3): - with self.test_scope(): - model = Subclassed() - optimizer = adam.Adam(0.001) - root = tf.train.Checkpoint( - optimizer=optimizer, model=model) - manager = tf.train.CheckpointManager( - root, checkpoint_directory, max_to_keep=2) - root.restore(manager.latest_checkpoint) - for _ in range(num_training_steps): - input_value = tf.constant([[3.]]) - with tf.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - optimizer.apply_gradients(zip(gradients, variables)) - manager.save() - self.assertEqual((training_continuation + 1) * num_training_steps, - root.optimizer.iterations.numpy()) + def testDeferredRestorationUsageEager(self): + """An idiomatic eager execution example.""" + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + for training_continuation in range(3): + with self.test_scope(): + model = Subclassed() + optimizer = adam.Adam(0.001) + root = tf.train.Checkpoint(optimizer=optimizer, model=model) + manager = tf.train.CheckpointManager( + root, checkpoint_directory, max_to_keep=2 + ) + root.restore(manager.latest_checkpoint) + for _ in range(num_training_steps): + input_value = tf.constant([[3.0]]) + with tf.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + optimizer.apply_gradients(zip(gradients, variables)) + manager.save() + self.assertEqual( + (training_continuation + 1) * num_training_steps, + root.optimizer.iterations.numpy(), + ) if __name__ == "__main__": - tf.compat.v1.enable_eager_execution() - tf.test.main() + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/keras/tools/bazel_build.sh b/keras/tools/bazel_build.sh new file mode 100644 index 000000000000..f58233646514 --- /dev/null +++ b/keras/tools/bazel_build.sh @@ -0,0 +1,21 @@ +#!/bin/bash +BAZEL_VERSION=5.4.0 +rm -rf ~/bazel +mkdir ~/bazel + +pushd ~/bazel +wget https://github.com/bazelbuild/bazel/releases/download/"${BAZEL_VERSION}"/bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh +chmod +x bazel-*.sh +./bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh --user +rm bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh +popd + +PATH="/home/kbuilder/bin:$PATH" +which bazel +bazel version + +TAG_FILTERS="-no_oss,-oss_excluded,-oss_serial,-gpu,-benchmark-test,-no_oss_py3,-no_pip,-nopip" +bazel build \ + --define=use_fast_cpp_protos=false \ + --build_tag_filters="${TAG_FILTERS}" \ + -- //keras/... diff --git a/keras/tools/pip_package/BUILD b/keras/tools/pip_package/BUILD index 33d7bc2415a2..5b086a4f01cc 100644 --- a/keras/tools/pip_package/BUILD +++ b/keras/tools/pip_package/BUILD @@ -27,6 +27,7 @@ COMMON_PIP_DEPS = [ "//keras/dtensor:test_util", "//keras/distribute:distribute_test_lib_pip", "//keras/integration_test:preprocessing_test_utils", + "//keras/integration_test/models:models", "//keras/layers/preprocessing:preprocessing_test_utils", "//keras/layers/preprocessing/benchmarks:feature_column_benchmark", "//keras/mixed_precision:test_util", diff --git a/keras/tools/pip_package/create_pip_helper.py b/keras/tools/pip_package/create_pip_helper.py index dd576e663852..02f380e78799 100644 --- a/keras/tools/pip_package/create_pip_helper.py +++ b/keras/tools/pip_package/create_pip_helper.py @@ -22,107 +22,122 @@ import fnmatch import os -PIP_EXCLUDED_FILES = frozenset([ - 'keras/api/create_python_api_wrapper.py', - 'keras/applications/efficientnet_weight_update_util.py', - 'keras/distribute/tpu_strategy_test_utils.py', - 'keras/saving/saved_model/create_test_saved_model.py', - 'keras/tools/pip_package/setup.py', - 'keras/tools/pip_package/create_pip_helper.py', -]) - -PIP_EXCLUDED_DIRS = frozenset([ - 'keras/benchmarks', - 'keras/integration_tests', - 'keras/tests', -]) +PIP_EXCLUDED_FILES = frozenset( + [ + "keras/api/create_python_api_wrapper.py", + "keras/applications/efficientnet_weight_update_util.py", + "keras/distribute/tpu_strategy_test_utils.py", + "keras/saving/legacy/saved_model/create_test_saved_model.py", + "keras/tools/pip_package/setup.py", + "keras/tools/pip_package/create_pip_helper.py", + ] +) + +PIP_EXCLUDED_DIRS = frozenset( + [ + "keras/benchmarks", + "keras/tests", + ] +) # Directories that should not have __init__.py files generated within them. -EXCLUDED_INIT_FILE_DIRECTORIES = frozenset([ - 'keras/benchmarks', - 'keras/tools', -]) +EXCLUDED_INIT_FILE_DIRECTORIES = frozenset( + [ + "keras/benchmarks", + "keras/tools", + ] +) class PipPackagingError(Exception): - pass + pass def create_init_files(pip_root): - """Create __init__.py in pip directory tree. - - These files are auto-generated by Bazel when doing typical build/test, but - do not get auto-generated by the pip build process. Currently, the entire - directory tree is just python files, so its fine to just create all of the - init files. - - Args: - pip_root: Root directory of code being packaged into pip. - """ - for path, subdirs, _ in os.walk(pip_root): - for subdir in subdirs: - init_file_path = os.path.join(path, subdir, '__init__.py') - if any(excluded_path in init_file_path - for excluded_path in EXCLUDED_INIT_FILE_DIRECTORIES): - continue - if not os.path.exists(init_file_path): - # Create empty file - open(init_file_path, 'w').close() + """Create __init__.py in pip directory tree. + + These files are auto-generated by Bazel when doing typical build/test, but + do not get auto-generated by the pip build process. Currently, the entire + directory tree is just python files, so its fine to just create all of the + init files. + + Args: + pip_root: Root directory of code being packaged into pip. + """ + for path, subdirs, _ in os.walk(pip_root): + for subdir in subdirs: + init_file_path = os.path.join(path, subdir, "__init__.py") + if any( + excluded_path in init_file_path + for excluded_path in EXCLUDED_INIT_FILE_DIRECTORIES + ): + continue + if not os.path.exists(init_file_path): + # Create empty file + open(init_file_path, "w").close() def verify_python_files_in_pip(pip_root, bazel_root): - """Verifies all expected files are packaged into Pip. - - Args: - pip_root: Root directory of code being packaged into pip. - bazel_root: Root directory of Keras Bazel workspace. - - Raises: - PipPackagingError: Missing file in pip. - """ - for path, _, files in os.walk(bazel_root): - if any(d for d in PIP_EXCLUDED_DIRS if d in path): - # Skip any directories that are exclude from PIP, eg tests. - continue - - python_files = set(fnmatch.filter(files, '*.py')) - python_test_files = set(fnmatch.filter(files, '*test.py')) - python_benchmark_files = set(fnmatch.filter(files, '*benchmark.py')) - # We only care about python files in the pip package, see create_init_files. - files = python_files - python_test_files - python_benchmark_files - for f in files: - pip_path = os.path.join(pip_root, os.path.relpath(path, bazel_root), f) - file_name = os.path.join(path, f) - path_exists = os.path.exists(pip_path) - file_excluded = file_name.lstrip('./') in PIP_EXCLUDED_FILES - if not path_exists and not file_excluded: - raise PipPackagingError( - ('Pip package missing the file %s. If this is expected, add it ' - 'to PIP_EXCLUDED_FILES in create_pip_helper.py. Otherwise, ' - 'make sure it is a build dependency of the pip package') % - file_name) - if path_exists and file_excluded: - raise PipPackagingError( - ('File in PIP_EXCLUDED_FILES included in pip. %s' % file_name)) + """Verifies all expected files are packaged into Pip. + + Args: + pip_root: Root directory of code being packaged into pip. + bazel_root: Root directory of Keras Bazel workspace. + + Raises: + PipPackagingError: Missing file in pip. + """ + for path, _, files in os.walk(bazel_root): + if any(d for d in PIP_EXCLUDED_DIRS if d in path): + # Skip any directories that are exclude from PIP, eg tests. + continue + + python_files = set(fnmatch.filter(files, "*.py")) + python_test_files = set(fnmatch.filter(files, "*test.py")) + python_benchmark_files = set(fnmatch.filter(files, "*benchmark.py")) + # We only care about python files in the pip package, see + # create_init_files. + files = python_files - python_test_files - python_benchmark_files + for f in files: + pip_path = os.path.join( + pip_root, os.path.relpath(path, bazel_root), f + ) + file_name = os.path.join(path, f) + path_exists = os.path.exists(pip_path) + file_excluded = file_name.lstrip("./") in PIP_EXCLUDED_FILES + if not path_exists and not file_excluded: + raise PipPackagingError( + "Pip package missing the file %s. If this is expected, " + "add it to PIP_EXCLUDED_FILES in " + "create_pip_helper.py. Otherwise, " + "make sure it is a build dependency of the pip package" + % file_name + ) + if path_exists and file_excluded: + raise PipPackagingError( + f"File in PIP_EXCLUDED_FILES included in pip. {file_name}" + ) def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--bazel-root', - type=str, - required=True, - help='Root directory of Keras Bazel workspace.') - parser.add_argument( - '--pip-root', - type=str, - required=True, - help='Root directory of code being packaged into pip.') - - args = parser.parse_args() - create_init_files(args.pip_root) - verify_python_files_in_pip(args.pip_root, args.bazel_root) - - -if __name__ == '__main__': - main() + parser = argparse.ArgumentParser() + parser.add_argument( + "--bazel-root", + type=str, + required=True, + help="Root directory of Keras Bazel workspace.", + ) + parser.add_argument( + "--pip-root", + type=str, + required=True, + help="Root directory of code being packaged into pip.", + ) + + args = parser.parse_args() + create_init_files(args.pip_root) + verify_python_files_in_pip(args.pip_root, args.bazel_root) + + +if __name__ == "__main__": + main() diff --git a/keras/tools/pip_package/setup.py b/keras/tools/pip_package/setup.py index 3c4eb033712c..b47a0b91acbc 100644 --- a/keras/tools/pip_package/setup.py +++ b/keras/tools/pip_package/setup.py @@ -23,58 +23,61 @@ from __future__ import print_function import sys + import setuptools -DOCLINES = __doc__.split('\n') +DOCLINES = __doc__.split("\n") # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '2.10.0' +_VERSION = "2.15.0" REQUIRED_PACKAGES = [ # We depend on TensorFlow's declared pip dependencies. # Add a new dep there if one is needed. ] -project_name = 'keras' -if '--project_name' in sys.argv: - project_name_idx = sys.argv.index('--project_name') - project_name = sys.argv[project_name_idx + 1] - sys.argv.remove('--project_name') - sys.argv.pop(project_name_idx) +project_name = "keras" +if "--project_name" in sys.argv: + project_name_idx = sys.argv.index("--project_name") + project_name = sys.argv[project_name_idx + 1] + sys.argv.remove("--project_name") + sys.argv.pop(project_name_idx) setuptools.setup( name=project_name, - version=_VERSION.replace('-', ''), - description='Deep learning for humans.', - long_description='\n'.join(DOCLINES[2:]), - url='https://keras.io/', - download_url='https://github.com/keras-team/keras/tags', - author='Keras team', - author_email='keras-users@googlegroups.com', + version=_VERSION.replace("-", ""), + description="Deep learning for humans.", + long_description="\n".join(DOCLINES[2:]), + url="https://keras.io/", + download_url="https://github.com/keras-team/keras/tags", + author="Keras team", + author_email="keras-users@googlegroups.com", packages=setuptools.find_packages(), install_requires=REQUIRED_PACKAGES, + # Supported Python versions + python_requires=">=3.9", # PyPI package information. classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", ], - license='Apache 2.0', - keywords=['keras', 'tensorflow', 'machine learning', 'deep learning'], + license="Apache 2.0", + keywords=["keras", "tensorflow", "machine learning", "deep learning"], ) diff --git a/keras/utils/BUILD b/keras/utils/BUILD index 50aaf6452996..034f587f1e5f 100644 --- a/keras/utils/BUILD +++ b/keras/utils/BUILD @@ -1,15 +1,13 @@ # Description: # Contains the Keras Utilities (internal TensorFlow version). +# Placeholder: load unaliased py_library load("@org_keras//keras:keras.bzl", "tf_py_test") package( + # copybara:uncomment default_applicable_licenses = ["//keras:license"], # TODO(scottzhu): Remove non-keras deps from TF. - default_visibility = [ - "//keras:friends", - "//third_party/tensorflow/python/feature_column:__pkg__", - "//third_party/tensorflow/tools/pip_package:__pkg__", - ], + default_visibility = ["//keras:friends"], licenses = ["notice"], ) @@ -17,17 +15,21 @@ py_library( name = "utils", srcs = [ "__init__.py", + "legacy/__init__.py", ], srcs_version = "PY3", deps = [ ":audio_dataset", ":data_utils", + ":feature_space", ":generic_utils", ":image_dataset", ":image_utils", ":layer_utils", ":np_utils", + ":sidecar_evaluator", ":text_dataset", + ":timed_threads", ":timeseries_dataset", ":vis_utils", ], @@ -299,6 +301,68 @@ py_library( ], ) +py_library( + name = "sidecar_evaluator", + srcs = ["sidecar_evaluator.py"], + srcs_version = "PY3", + deps = [ + "//:expect_tensorboard_installed", + "//:expect_tensorflow_installed", + ], +) + +py_library( + name = "feature_space", + srcs = ["feature_space.py"], + srcs_version = "PY3", + deps = [ + "//:expect_tensorflow_installed", + "//keras:backend", + "//keras/layers", + ], +) + +py_library( + name = "timed_threads", + srcs = ["timed_threads.py"], + srcs_version = "PY3", +) + +py_library( + name = "steps_per_execution_tuning", + srcs = ["steps_per_execution_tuning.py"], + srcs_version = "PY3", + deps = [ + "//:expect_numpy_installed", + ], +) + +tf_py_test( + name = "steps_per_execution_tuning_test", + srcs = ["steps_per_execution_tuning_test.py"], + python_version = "PY3", + deps = [ + ":steps_per_execution_tuning", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + ], +) + +tf_py_test( + name = "sidecar_evaluator_test", + size = "medium", + srcs = ["sidecar_evaluator_test.py"], + python_version = "PY3", + deps = [ + ":sidecar_evaluator", + "//:expect_absl_installed", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + tf_py_test( name = "dataset_creator_test", srcs = ["dataset_creator_test.py"], @@ -431,9 +495,14 @@ tf_py_test( python_version = "PY3", deps = [ ":layer_utils", + ":tf_utils", "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", + "//keras:backend", + "//keras/dtensor", + "//keras/dtensor:layout_map", + "//keras/dtensor:test_util", ], ) @@ -447,6 +516,7 @@ tf_py_test( "//:expect_numpy_installed", "//:expect_tensorflow_installed", "//keras", + "//keras/testing_infra:test_combinations", ], ) @@ -598,3 +668,45 @@ tf_py_test( "//keras/testing_infra:test_utils", ], ) + +tf_py_test( + name = "audio_dataset_with_tfio_test", + size = "small", + srcs = ["audio_dataset_with_tfio_test.py"], + python_version = "PY3", + deps = [ + ":audio_dataset", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//:expect_tensorflow_io_installed", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + +tf_py_test( + name = "feature_space_test", + size = "medium", + srcs = ["feature_space_test.py"], + python_version = "PY3", + deps = [ + ":feature_space", + "//:expect_numpy_installed", + "//:expect_tensorflow_installed", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) + +tf_py_test( + name = "timed_threads_test", + size = "small", + srcs = ["timed_threads_test.py"], + deps = [ + ":timed_threads", + "//:expect_tensorflow_installed", + "//keras", + "//keras/testing_infra:test_combinations", + "//keras/testing_infra:test_utils", + ], +) diff --git a/keras/utils/__init__.py b/keras/utils/__init__.py index 23509cfd2b16..db2063432e6d 100644 --- a/keras/utils/__init__.py +++ b/keras/utils/__init__.py @@ -13,19 +13,30 @@ # limitations under the License. # ============================================================================== """Public Keras utilities.""" -# pylint: disable=g-bad-import-order -from keras.utils.data_utils import get_file -from keras.utils.dataset_utils import split_dataset -from keras.utils.generic_utils import Progbar -from keras.utils.image_dataset import image_dataset_from_directory +# isort: off + +# Serialization related +from keras.saving.serialization_lib import deserialize_keras_object +from keras.saving.serialization_lib import serialize_keras_object +from keras.saving.object_registration import CustomObjectScope +from keras.saving.object_registration import custom_object_scope +from keras.saving.object_registration import get_custom_objects +from keras.saving.object_registration import get_registered_name +from keras.saving.object_registration import register_keras_serializable + +# Dataset related +from keras.utils.audio_dataset import audio_dataset_from_directory from keras.utils.text_dataset import text_dataset_from_directory -from keras.utils.tf_utils import set_random_seed from keras.utils.timeseries_dataset import timeseries_dataset_from_array -from keras.utils.vis_utils import model_to_dot -from keras.utils.vis_utils import plot_model -from keras.utils.np_utils import normalize -from keras.utils.np_utils import to_categorical +from keras.utils.image_dataset import image_dataset_from_directory +from keras.utils.dataset_utils import split_dataset + +# Sequence related +from keras.utils.data_utils import GeneratorEnqueuer +from keras.utils.data_utils import OrderedEnqueuer +from keras.utils.data_utils import Sequence +from keras.utils.data_utils import SequenceEnqueuer # Image related from keras.utils.image_utils import array_to_img @@ -33,22 +44,31 @@ from keras.utils.image_utils import load_img from keras.utils.image_utils import save_img -# Sequence related -from keras.utils.data_utils import Sequence -from keras.utils.data_utils import GeneratorEnqueuer -from keras.utils.data_utils import OrderedEnqueuer -from keras.utils.data_utils import SequenceEnqueuer -from keras.utils.data_utils import pad_sequences +# Python utils +from keras.utils.tf_utils import set_random_seed +from keras.utils.generic_utils import Progbar +from keras.utils.data_utils import get_file -# Serialization related -from keras.utils.generic_utils import custom_object_scope -from keras.utils.generic_utils import CustomObjectScope -from keras.utils.generic_utils import deserialize_keras_object -from keras.utils.generic_utils import get_custom_objects -from keras.utils.generic_utils import serialize_keras_object - -# Audio related -from keras.utils.audio_dataset import audio_dataset_from_directory +# Preprocessing utils +from keras.utils.feature_space import FeatureSpace # Internal from keras.utils.layer_utils import get_source_inputs +from keras.utils.layer_utils import warmstart_embedding_matrix + +# Deprecated +from keras.utils.np_utils import normalize +from keras.utils.np_utils import to_categorical +from keras.utils.np_utils import to_ordinal +from keras.utils.data_utils import pad_sequences + +# Evaluation related +from keras.utils.sidecar_evaluator import SidecarEvaluator +from keras.utils.sidecar_evaluator import SidecarEvaluatorModelExport + +# Timed Thread +from keras.utils.timed_threads import TimedThread + +# Visualization related +from keras.utils.vis_utils import model_to_dot +from keras.utils.vis_utils import plot_model diff --git a/keras/utils/audio_dataset.py b/keras/utils/audio_dataset.py index a9d821afcf31..60d2ec422769 100644 --- a/keras/utils/audio_dataset.py +++ b/keras/utils/audio_dataset.py @@ -14,19 +14,16 @@ # ============================================================================== """Keras audio dataset loading utilities.""" -import tensorflow.compat.v2 as tf - -# pylint: disable=g-classes-have-attributes - import numpy as np +import tensorflow.compat.v2 as tf from keras.utils import dataset_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -try: - import tensorflow_io as tfio -except ImportError: - tfio = None + +tfio = None # Import as-needed. ALLOWED_FORMATS = (".wav",) @@ -47,7 +44,7 @@ def audio_dataset_from_directory( subset=None, follow_links=False, ): - """Generates a `tf.data.Dataset` from audio files in a directory. + """Generates a `tf.data.Dataset` from audio files in a directory. If your directory structure is: @@ -70,202 +67,237 @@ def audio_dataset_from_directory( Only `.wav` files are supported at this time. Args: - directory: Directory where the data is located. If `labels` is "inferred", - it should contain subdirectories, each containing audio files for a - class. Otherwise, the directory structure is ignored. - labels: Either "inferred" (labels are generated from the directory - structure), None (no labels), or a list/tuple of integer labels of the - same size as the number of audio files found in the directory. Labels - should be sorted according to the alphanumeric order of the audio file - paths (obtained via `os.walk(directory)` in Python). - label_mode: String describing the encoding of `labels`. Options are: - - 'int': means that the labels are encoded as integers (e.g. for - `sparse_categorical_crossentropy` loss). - 'categorical' means that - the labels are encoded as a categorical vector (e.g. for - `categorical_crossentropy` loss). - 'binary' means that the labels - (there can be only 2) are encoded as `float32` scalars with values 0 - or 1 (e.g. for `binary_crossentropy`). - None (no labels). - class_names: Only valid if "labels" is "inferred". This is the explicit - list of class names (must match names of subdirectories). Used to - control the order of the classes (otherwise alphanumerical order is - used). - batch_size: Size of the batches of data. Default: 32. If `None`, the data - will not be batched (the dataset will yield individual samples). - sampling_rate: Audio sampling rate (in samples per second). - output_sequence_length: Maximum length of an audio sequence. Audio files - longer than this will be truncated to `output_sequence_length`. If set - to `None`, then all sequences in the same batch will be padded to the - length of the longest sequence in the batch. - ragged: Whether to return a Ragged dataset (where each sequence has its - own length). Default: False. - shuffle: Whether to shuffle the data. Default: True. If set to False, - sorts the data in alphanumeric order. - seed: Optional random seed for shuffling and transformations. - validation_split: Optional float between 0 and 1, fraction of data to - reserve for validation. - subset: Subset of the data to return. One of "training", "validation" or - "both". Only used if `validation_split` is set. - follow_links: Whether to visits subdirectories pointed to by symlinks. - Defaults to False. + directory: Directory where the data is located. + If `labels` is `"inferred"`, it should contain subdirectories, + each containing audio files for a class. Otherwise, the directory + structure is ignored. + labels: Either "inferred" (labels are generated from the directory + structure), `None` (no labels), or a list/tuple of integer labels + of the same size as the number of audio files found in + the directory. Labels should be sorted according to the + alphanumeric order of the audio file paths + (obtained via `os.walk(directory)` in Python). + label_mode: String describing the encoding of `labels`. Options are: + - `"int"`: means that the labels are encoded as integers (e.g. for + `sparse_categorical_crossentropy` loss). + - `"categorical"` means that the labels are encoded as a categorical + vector (e.g. for `categorical_crossentropy` loss) + - `"binary"` means that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 + or 1 (e.g. for `binary_crossentropy`). + - `None` (no labels). + class_names: Only valid if "labels" is `"inferred"`. + This is the explicit list of class names + (must match names of subdirectories). Used to control the order + of the classes (otherwise alphanumerical order is used). + batch_size: Size of the batches of data. Default: 32. If `None`, + the data will not be batched + (the dataset will yield individual samples). + sampling_rate: Audio sampling rate (in samples per second). + output_sequence_length: Maximum length of an audio sequence. Audio files + longer than this will be truncated to `output_sequence_length`. + If set to `None`, then all sequences in the same batch will + be padded to the + length of the longest sequence in the batch. + ragged: Whether to return a Ragged dataset (where each sequence has its + own length). Defaults to `False`. + shuffle: Whether to shuffle the data. Defaults to `True`. + If set to `False`, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, fraction of data to + reserve for validation. + subset: Subset of the data to return. One of `"training"`, + `"validation"` or `"both"`. Only used if `validation_split` is set. + follow_links: Whether to visits subdirectories pointed to by symlinks. + Defaults to `False`. Returns: - A `tf.data.Dataset` object. - - If `label_mode` is None, it yields `string` tensors of shape - `(batch_size,)`, containing the contents of a batch of audio files. - - Otherwise, it yields a tuple `(audio, labels)`, where `audio` - has shape `(batch_size, sequence_length, num_channels)` and `labels` - follows the format described - below. + + A `tf.data.Dataset` object. + + - If `label_mode` is `None`, it yields `string` tensors of shape + `(batch_size,)`, containing the contents of a batch of audio files. + - Otherwise, it yields a tuple `(audio, labels)`, where `audio` + has shape `(batch_size, sequence_length, num_channels)` and `labels` + follows the format described + below. Rules regarding labels format: - - if `label_mode` is `int`, the labels are an `int32` tensor of shape - `(batch_size,)`. - - if `label_mode` is `binary`, the labels are a `float32` tensor of - 1s and 0s of shape `(batch_size, 1)`. - - if `label_mode` is `categorical`, the labels are a `float32` tensor - of shape `(batch_size, num_classes)`, representing a one-hot - encoding of the class index. + + - if `label_mode` is `int`, the labels are an `int32` tensor of shape + `(batch_size,)`. + - if `label_mode` is `binary`, the labels are a `float32` tensor of + 1s and 0s of shape `(batch_size, 1)`. + - if `label_mode` is `categorical`, the labels are a `float32` tensor + of shape `(batch_size, num_classes)`, representing a one-hot + encoding of the class index. """ - if labels not in ("inferred", None): - if not isinstance(labels, (list, tuple)): - raise ValueError( - "The `labels` argument should be a list/tuple of integer labels, of " - "the same size as the number of audio files in the target " - "directory. If you wish to infer the labels from the subdirectory " - 'names in the target directory, pass `labels="inferred"`. ' - "If you wish to get a dataset that only contains audio samples " - f"(no labels), pass `labels=None`. Received: labels={labels}") - if class_names: - raise ValueError("You can only pass `class_names` if " - f'`labels="inferred"`. Received: labels={labels}, and ' - f"class_names={class_names}") - if label_mode not in {"int", "categorical", "binary", None}: - raise ValueError( - '`label_mode` argument must be one of "int", "categorical", "binary", ' - f'or None. Received: label_mode={label_mode}' + if labels not in ("inferred", None): + if not isinstance(labels, (list, tuple)): + raise ValueError( + "The `labels` argument should be a list/tuple of integer " + "labels, of the same size as the number of audio files in " + "the target directory. If you wish to infer the labels from " + "the subdirectory names in the target directory," + ' pass `labels="inferred"`. ' + "If you wish to get a dataset that only contains audio samples " + f"(no labels), pass `labels=None`. Received: labels={labels}" + ) + if class_names: + raise ValueError( + "You can only pass `class_names` if " + f'`labels="inferred"`. Received: labels={labels}, and ' + f"class_names={class_names}" + ) + if label_mode not in {"int", "categorical", "binary", None}: + raise ValueError( + '`label_mode` argument must be one of "int", "categorical", ' + '"binary", ' + f"or None. Received: label_mode={label_mode}" + ) + + if ragged and output_sequence_length is not None: + raise ValueError( + "Cannot set both `ragged` and `output_sequence_length`" + ) + + if sampling_rate is not None: + if not isinstance(sampling_rate, int): + raise ValueError( + "`sampling_rate` should have an integer value. " + f"Received: sampling_rate={sampling_rate}" + ) + + if sampling_rate <= 0: + raise ValueError( + "`sampling_rate` should be higher than 0. " + f"Received: sampling_rate={sampling_rate}" + ) + + global tfio + if tfio is None: + try: + import tensorflow_io as tfio + except ImportError: + raise ImportError( + "To use the argument `sampling_rate`, you should install " + "tensorflow_io. You can install it via `pip install " + "tensorflow-io`." + ) + + if labels is None or label_mode is None: + labels = None + label_mode = None + + dataset_utils.check_validation_split_arg( + validation_split, subset, shuffle, seed ) - if ragged and output_sequence_length is not None: - raise ValueError("Cannot set both `ragged` and `output_sequence_length`") - - if sampling_rate is not None: - if not isinstance(sampling_rate, int): - raise ValueError('`sampling_rate` should have an integer value. ' - f'Received: sampling_rate={sampling_rate}') - - if sampling_rate <= 0: - raise ValueError(f'`sampling_rate` should be higher than 0. ' - f'Received: sampling_rate={sampling_rate}') - - if tfio is None: - raise ImportError( - 'To use the argument `sampling_rate`, you should install ' - 'tensorflow_io. You can install it via `pip install tensorflow-io`.' - ) - - if labels is None or label_mode is None: - labels = None - label_mode = None - - dataset_utils.check_validation_split_arg(validation_split, subset, shuffle, - seed) - - if seed is None: - seed = np.random.randint(1e6) - - file_paths, labels, class_names = dataset_utils.index_directory( - directory, - labels, - formats=ALLOWED_FORMATS, - class_names=class_names, - shuffle=shuffle, - seed=seed, - follow_links=follow_links, - ) - - if label_mode == "binary" and len(class_names) != 2: - raise ValueError( - f'When passing `label_mode="binary"`, there must be exactly 2 ' - f"class_names. Received: class_names={class_names}") - - if subset == "both": - train_dataset, val_dataset = get_training_and_validation_dataset( - file_paths=file_paths, - labels=labels, - validation_split=validation_split, - directory=directory, - label_mode=label_mode, - class_names=class_names, - sampling_rate=sampling_rate, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) + if seed is None: + seed = np.random.randint(1e6) - train_dataset = prepare_dataset( - dataset=train_dataset, - batch_size=batch_size, - shuffle=shuffle, - seed=seed, + file_paths, labels, class_names = dataset_utils.index_directory( + directory, + labels, + formats=ALLOWED_FORMATS, class_names=class_names, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) - val_dataset = prepare_dataset( - dataset=val_dataset, - batch_size=batch_size, - shuffle=False, - seed=seed, - class_names=class_names, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) - return train_dataset, val_dataset - - else: - dataset = get_dataset( - file_paths=file_paths, - labels=labels, - directory=directory, - validation_split=validation_split, - subset=subset, - label_mode=label_mode, - class_names=class_names, - sampling_rate=sampling_rate, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) - - dataset = prepare_dataset( - dataset=dataset, - batch_size=batch_size, shuffle=shuffle, seed=seed, - class_names=class_names, - output_sequence_length=output_sequence_length, - ragged=ragged, + follow_links=follow_links, ) - return dataset - -def prepare_dataset(dataset, batch_size, shuffle, seed, class_names, - output_sequence_length, ragged): - dataset = dataset.prefetch(tf.data.AUTOTUNE) - if batch_size is not None: - if shuffle: - dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) + if label_mode == "binary" and len(class_names) != 2: + raise ValueError( + 'When passing `label_mode="binary"`, there must be exactly 2 ' + f"class_names. Received: class_names={class_names}" + ) + + if subset == "both": + train_dataset, val_dataset = get_training_and_validation_dataset( + file_paths=file_paths, + labels=labels, + validation_split=validation_split, + directory=directory, + label_mode=label_mode, + class_names=class_names, + sampling_rate=sampling_rate, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + + train_dataset = prepare_dataset( + dataset=train_dataset, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + class_names=class_names, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + val_dataset = prepare_dataset( + dataset=val_dataset, + batch_size=batch_size, + shuffle=False, + seed=seed, + class_names=class_names, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + return train_dataset, val_dataset - if output_sequence_length is None and not ragged: - dataset = dataset.padded_batch( - batch_size, padded_shapes=([None, None], [])) else: - dataset = dataset.batch(batch_size) - else: - if shuffle: - dataset = dataset.shuffle(buffer_size=1024, seed=seed) + dataset = get_dataset( + file_paths=file_paths, + labels=labels, + directory=directory, + validation_split=validation_split, + subset=subset, + label_mode=label_mode, + class_names=class_names, + sampling_rate=sampling_rate, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + + dataset = prepare_dataset( + dataset=dataset, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + class_names=class_names, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + return dataset + + +def prepare_dataset( + dataset, + batch_size, + shuffle, + seed, + class_names, + output_sequence_length, + ragged, +): + dataset = dataset.prefetch(tf.data.AUTOTUNE) + if batch_size is not None: + if shuffle: + dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) + + if output_sequence_length is None and not ragged: + dataset = dataset.padded_batch( + batch_size, padded_shapes=([None, None], []) + ) + else: + dataset = dataset.batch(batch_size) + else: + if shuffle: + dataset = dataset.shuffle(buffer_size=1024, seed=seed) - # Users may need to reference `class_names`. - dataset.class_names = class_names - return dataset + # Users may need to reference `class_names`. + dataset.class_names = class_names + return dataset def get_training_and_validation_dataset( @@ -279,40 +311,48 @@ def get_training_and_validation_dataset( output_sequence_length, ragged, ): - file_paths_train, labels_train = dataset_utils.get_training_or_validation_split( - file_paths, labels, validation_split, "training") - if not file_paths_train: - raise ValueError(f"No training audio files found in directory {directory}. " - f"Allowed format(s): {ALLOWED_FORMATS}") - - file_paths_val, labels_val = dataset_utils.get_training_or_validation_split( - file_paths, labels, validation_split, "validation") - if not file_paths_val: - raise ValueError( - f"No validation audio files found in directory {directory}. " - f"Allowed format(s): {ALLOWED_FORMATS}") - - train_dataset = paths_and_labels_to_dataset( - file_paths=file_paths_train, - labels=labels_train, - label_mode=label_mode, - num_classes=len(class_names), - sampling_rate=sampling_rate, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) - - val_dataset = paths_and_labels_to_dataset( - file_paths=file_paths_val, - labels=labels_val, - label_mode=label_mode, - num_classes=len(class_names), - sampling_rate=sampling_rate, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) - - return train_dataset, val_dataset + ( + file_paths_train, + labels_train, + ) = dataset_utils.get_training_or_validation_split( + file_paths, labels, validation_split, "training" + ) + if not file_paths_train: + raise ValueError( + f"No training audio files found in directory {directory}. " + f"Allowed format(s): {ALLOWED_FORMATS}" + ) + + file_paths_val, labels_val = dataset_utils.get_training_or_validation_split( + file_paths, labels, validation_split, "validation" + ) + if not file_paths_val: + raise ValueError( + f"No validation audio files found in directory {directory}. " + f"Allowed format(s): {ALLOWED_FORMATS}" + ) + + train_dataset = paths_and_labels_to_dataset( + file_paths=file_paths_train, + labels=labels_train, + label_mode=label_mode, + num_classes=len(class_names), + sampling_rate=sampling_rate, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + + val_dataset = paths_and_labels_to_dataset( + file_paths=file_paths_val, + labels=labels_val, + label_mode=label_mode, + num_classes=len(class_names), + sampling_rate=sampling_rate, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + + return train_dataset, val_dataset def get_dataset( @@ -327,42 +367,47 @@ def get_dataset( output_sequence_length, ragged, ): - file_paths, labels = dataset_utils.get_training_or_validation_split( - file_paths, labels, validation_split, subset) - if not file_paths: - raise ValueError(f"No audio files found in directory {directory}. " - f"Allowed format(s): {ALLOWED_FORMATS}") - - dataset = paths_and_labels_to_dataset( - file_paths=file_paths, - labels=labels, - label_mode=label_mode, - num_classes=len(class_names), - sampling_rate=sampling_rate, - output_sequence_length=output_sequence_length, - ragged=ragged, - ) - - return dataset - - -def read_and_decode_audio(path, - sampling_rate=None, - output_sequence_length=None): - """Reads and decodes audio file.""" - audio = tf.io.read_file(path) - - if output_sequence_length is None: - output_sequence_length = -1 - - audio, default_audio_rate = tf.audio.decode_wav( - contents=audio, desired_samples=output_sequence_length) - if sampling_rate is not None: - # default_audio_rate should have dtype=int64 - default_audio_rate = tf.cast(default_audio_rate, tf.int64) - audio = tfio.audio.resample( - input=audio, rate_in=default_audio_rate, rate_out=sampling_rate) - return audio + file_paths, labels = dataset_utils.get_training_or_validation_split( + file_paths, labels, validation_split, subset + ) + if not file_paths: + raise ValueError( + f"No audio files found in directory {directory}. " + f"Allowed format(s): {ALLOWED_FORMATS}" + ) + + dataset = paths_and_labels_to_dataset( + file_paths=file_paths, + labels=labels, + label_mode=label_mode, + num_classes=len(class_names), + sampling_rate=sampling_rate, + output_sequence_length=output_sequence_length, + ragged=ragged, + ) + + return dataset + + +def read_and_decode_audio( + path, sampling_rate=None, output_sequence_length=None +): + """Reads and decodes audio file.""" + audio = tf.io.read_file(path) + + if output_sequence_length is None: + output_sequence_length = -1 + + audio, default_audio_rate = tf.audio.decode_wav( + contents=audio, desired_samples=output_sequence_length + ) + if sampling_rate is not None: + # default_audio_rate should have dtype=int64 + default_audio_rate = tf.cast(default_audio_rate, tf.int64) + audio = tfio.audio.resample( + input=audio, rate_in=default_audio_rate, rate_out=sampling_rate + ) + return audio def paths_and_labels_to_dataset( @@ -374,20 +419,24 @@ def paths_and_labels_to_dataset( output_sequence_length, ragged, ): - """Constructs a fixed-size dataset of audio and labels.""" - path_ds = tf.data.Dataset.from_tensor_slices(file_paths) - audio_ds = path_ds.map( - lambda x: read_and_decode_audio(x, sampling_rate, output_sequence_length), - num_parallel_calls=tf.data.AUTOTUNE, - ) - - if ragged: - audio_ds = audio_ds.map( - lambda x: tf.RaggedTensor.from_tensor(x), + """Constructs a fixed-size dataset of audio and labels.""" + path_ds = tf.data.Dataset.from_tensor_slices(file_paths) + audio_ds = path_ds.map( + lambda x: read_and_decode_audio( + x, sampling_rate, output_sequence_length + ), num_parallel_calls=tf.data.AUTOTUNE, ) - if label_mode: - label_ds = dataset_utils.labels_to_dataset(labels, label_mode, num_classes) - audio_ds = tf.data.Dataset.zip((audio_ds, label_ds)) - return audio_ds + if ragged: + audio_ds = audio_ds.map( + lambda x: tf.RaggedTensor.from_tensor(x), + num_parallel_calls=tf.data.AUTOTUNE, + ) + + if label_mode: + label_ds = dataset_utils.labels_to_dataset( + labels, label_mode, num_classes + ) + audio_ds = tf.data.Dataset.zip((audio_ds, label_ds)) + return audio_ds diff --git a/keras/utils/audio_dataset_test.py b/keras/utils/audio_dataset_test.py index ed314a2202c3..c32dda318a2e 100644 --- a/keras/utils/audio_dataset_test.py +++ b/keras/utils/audio_dataset_test.py @@ -14,12 +14,12 @@ # ============================================================================== """Tests for audio_dataset.""" -import tensorflow.compat.v2 as tf - import os import shutil import numpy as np +import tensorflow.compat.v2 as tf + from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import audio_dataset @@ -27,356 +27,434 @@ @test_utils.run_v2_only class AudioDatasetFromDirectoryTest(test_combinations.TestCase): - - def _get_audio_samples(self, count=16, different_sequence_lengths=False): - sequence_length = 30 - num_channels = 1 - audio_samples = [] - for _ in range(count): - if different_sequence_lengths: - random_sequence_length = np.random.randint(10, sequence_length + 1) - audio = np.random.random((random_sequence_length, num_channels)) - else: - audio = np.random.random((sequence_length, num_channels)) - audio_samples.append(tf.audio.encode_wav(audio, 1000)) - return audio_samples - - def _prepare_directory( - self, - num_classes=2, - nested_dirs=False, - count=16, - different_sequence_lengths=False, - ): - # Get a unique temp directory - temp_dir = os.path.join(self.get_temp_dir(), str(np.random.randint(1e6))) - os.mkdir(temp_dir) - self.addCleanup(shutil.rmtree, temp_dir) - - # Generate paths to class subdirectories - paths = [] - for class_index in range(num_classes): - class_directory = "class_%s" % (class_index,) - if nested_dirs: - class_paths = [ - class_directory, - os.path.join(class_directory, "subfolder_1"), - os.path.join(class_directory, "subfolder_2"), - os.path.join(class_directory, "subfolder_1", "sub-subfolder"), + def _get_audio_samples(self, count=16, different_sequence_lengths=False): + sequence_length = 30 + num_channels = 1 + audio_samples = [] + for _ in range(count): + if different_sequence_lengths: + random_sequence_length = np.random.randint( + 10, sequence_length + 1 + ) + audio = np.random.random((random_sequence_length, num_channels)) + else: + audio = np.random.random((sequence_length, num_channels)) + audio_samples.append(tf.audio.encode_wav(audio, 1000)) + return audio_samples + + def _prepare_directory( + self, + num_classes=2, + nested_dirs=False, + count=16, + different_sequence_lengths=False, + ): + # Get a unique temp directory + temp_dir = os.path.join( + self.get_temp_dir(), str(np.random.randint(1e6)) + ) + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + # Generate paths to class subdirectories + paths = [] + for class_index in range(num_classes): + class_directory = f"class_{class_index}" + if nested_dirs: + class_paths = [ + class_directory, + os.path.join(class_directory, "subfolder_1"), + os.path.join(class_directory, "subfolder_2"), + os.path.join( + class_directory, "subfolder_1", "sub-subfolder" + ), + ] + else: + class_paths = [class_directory] + for path in class_paths: + os.mkdir(os.path.join(temp_dir, path)) + paths += class_paths + + # Save audio samples to the paths + i = 0 + for audio in self._get_audio_samples( + count=count, different_sequence_lengths=different_sequence_lengths + ): + path = paths[i % len(paths)] + ext = "wav" + filename = os.path.join(path, f"audio_{i}.{ext}") + with open(os.path.join(temp_dir, filename), "wb") as f: + f.write(audio.numpy()) + i += 1 + return temp_dir + + def test_audio_dataset_from_directory_standalone(self): + # Test retrieving audio samples withouts labels from a directory and its + # subdirs. + # Save a few extra audio in the parent directory. + directory = self._prepare_directory(count=7, num_classes=2) + for i, audio in enumerate(self._get_audio_samples(3)): + filename = f"audio_{i}.wav" + with open(os.path.join(directory, filename), "wb") as f: + f.write(audio.numpy()) + + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=5, output_sequence_length=30, labels=None + ) + batch = next(iter(dataset)) + # We return plain audio + self.assertEqual(batch.shape, (5, 30, 1)) + self.assertEqual(batch.dtype.name, "float32") + # Count samples + batch_count = 0 + sample_count = 0 + for batch in dataset: + batch_count += 1 + sample_count += batch.shape[0] + self.assertEqual(batch_count, 2) + self.assertEqual(sample_count, 10) + + def test_audio_dataset_from_directory_binary(self): + directory = self._prepare_directory(num_classes=2) + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=8, output_sequence_length=30, label_mode="int" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 30, 1)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8,)) + self.assertEqual(batch[1].dtype.name, "int32") + + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=8, + output_sequence_length=30, + label_mode="binary", + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 30, 1)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8, 1)) + self.assertEqual(batch[1].dtype.name, "float32") + + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=8, + output_sequence_length=30, + label_mode="categorical", + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 30, 1)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8, 2)) + self.assertEqual(batch[1].dtype.name, "float32") + + def test_static_shape_in_graph(self): + directory = self._prepare_directory(num_classes=2) + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=8, output_sequence_length=30, label_mode="int" + ) + test_case = self + + @tf.function + def symbolic_fn(ds): + for x, _ in ds.take(1): + test_case.assertListEqual(x.shape.as_list(), [None, 30, None]) + + symbolic_fn(dataset) + + def test_sample_count(self): + directory = self._prepare_directory(num_classes=4, count=15) + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=8, output_sequence_length=30, label_mode=None + ) + sample_count = 0 + for batch in dataset: + sample_count += batch.shape[0] + self.assertEqual(sample_count, 15) + + def test_audio_dataset_from_directory_multiclass(self): + directory = self._prepare_directory(num_classes=4, count=15) + + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=8, output_sequence_length=30, label_mode=None + ) + batch = next(iter(dataset)) + self.assertEqual(batch.shape, (8, 30, 1)) + + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=8, output_sequence_length=30, label_mode=None + ) + sample_count = 0 + iterator = iter(dataset) + for batch in dataset: + sample_count += next(iterator).shape[0] + self.assertEqual(sample_count, 15) + + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=8, output_sequence_length=30, label_mode="int" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 30, 1)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8,)) + self.assertEqual(batch[1].dtype.name, "int32") + + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=8, + output_sequence_length=30, + label_mode="categorical", + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 30, 1)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8, 4)) + self.assertEqual(batch[1].dtype.name, "float32") + + def test_audio_dataset_from_directory_validation_split(self): + directory = self._prepare_directory(num_classes=2, count=10) + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=10, + output_sequence_length=30, + validation_split=0.2, + subset="training", + seed=1337, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 30, 1)) + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=10, + output_sequence_length=30, + validation_split=0.2, + subset="validation", + seed=1337, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (2, 30, 1)) + + def test_audio_dataset_from_directory_manual_labels(self): + directory = self._prepare_directory(num_classes=2, count=2) + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=8, + output_sequence_length=30, + labels=[0, 1], + shuffle=False, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertAllClose(batch[1], [0, 1]) + + def test_audio_dataset_from_directory_follow_links(self): + directory = self._prepare_directory( + num_classes=2, count=25, nested_dirs=True + ) + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=8, + output_sequence_length=30, + label_mode=None, + follow_links=True, + ) + sample_count = 0 + for batch in dataset: + sample_count += batch.shape[0] + self.assertEqual(sample_count, 25) + + def test_audio_dataset_from_directory_no_audio(self): + directory = self._prepare_directory(num_classes=2, count=0) + with self.assertRaisesRegex( + ValueError, "No audio files found in directory" + ): + _ = audio_dataset.audio_dataset_from_directory(directory) + + def test_audio_dataset_from_directory_ragged(self): + directory = self._prepare_directory( + num_classes=2, count=16, different_sequence_lengths=True + ) + dataset = audio_dataset.audio_dataset_from_directory( + directory, ragged=True, batch_size=8 + ) + batch = next(iter(dataset)) + + self.assertEqual(batch[0].shape.as_list(), [8, None, None]) + + def test_audio_dataset_from_directory_no_output_sequence_length_no_ragged( + self, + ): + # This test case tests `audio_dataset_from_directory` when `ragged` and + # `output_sequence_length` are not passed while the input sequence + # lengths are different. + directory = self._prepare_directory( + num_classes=2, count=16, different_sequence_lengths=True + ) + # The tensor shapes are different and output_sequence_length is None + # should work fine and pad each sequence to the length of the longest + # sequence in it's batch + min_sequence_length, max_sequence_length = 10, 30 + possible_sequence_lengths = [ + i for i in range(min_sequence_length, max_sequence_length + 1) ] - else: - class_paths = [class_directory] - for path in class_paths: - os.mkdir(os.path.join(temp_dir, path)) - paths += class_paths - - # Save audio samples to the paths - i = 0 - for audio in self._get_audio_samples( - count=count, different_sequence_lengths=different_sequence_lengths): - path = paths[i % len(paths)] - ext = "wav" - filename = os.path.join(path, "audio_%s.%s" % (i, ext)) - with open(os.path.join(temp_dir, filename), "wb") as f: - f.write(audio.numpy()) - i += 1 - return temp_dir - - def test_audio_dataset_from_directory_standalone(self): - # Test retrieving audio samples withouts labels from a directory and its subdirs. - - # Save a few extra audio in the parent directory. - directory = self._prepare_directory(count=7, num_classes=2) - for i, audio in enumerate(self._get_audio_samples(3)): - filename = "audio_%s.wav" % (i,) - with open(os.path.join(directory, filename), "wb") as f: - f.write(audio.numpy()) - - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=5, output_sequence_length=30, labels=None) - batch = next(iter(dataset)) - # We return plain audio - self.assertEqual(batch.shape, (5, 30, 1)) - self.assertEqual(batch.dtype.name, "float32") - # Count samples - batch_count = 0 - sample_count = 0 - for batch in dataset: - batch_count += 1 - sample_count += batch.shape[0] - self.assertEqual(batch_count, 2) - self.assertEqual(sample_count, 10) - - def test_audio_dataset_from_directory_binary(self): - directory = self._prepare_directory(num_classes=2) - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode="int") - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 30, 1)) - self.assertEqual(batch[0].dtype.name, "float32") - self.assertEqual(batch[1].shape, (8,)) - self.assertEqual(batch[1].dtype.name, "int32") - - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode="binary") - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 30, 1)) - self.assertEqual(batch[0].dtype.name, "float32") - self.assertEqual(batch[1].shape, (8, 1)) - self.assertEqual(batch[1].dtype.name, "float32") - - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=8, - output_sequence_length=30, - label_mode="categorical") - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 30, 1)) - self.assertEqual(batch[0].dtype.name, "float32") - self.assertEqual(batch[1].shape, (8, 2)) - self.assertEqual(batch[1].dtype.name, "float32") - - def test_static_shape_in_graph(self): - directory = self._prepare_directory(num_classes=2) - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode="int") - test_case = self - - @tf.function - def symbolic_fn(ds): - for x, _ in ds.take(1): - test_case.assertListEqual(x.shape.as_list(), [None, 30, None]) - - symbolic_fn(dataset) - - def test_sample_count(self): - directory = self._prepare_directory(num_classes=4, count=15) - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode=None) - sample_count = 0 - for batch in dataset: - sample_count += batch.shape[0] - self.assertEqual(sample_count, 15) - - def test_audio_dataset_from_directory_multiclass(self): - directory = self._prepare_directory(num_classes=4, count=15) - - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode=None) - batch = next(iter(dataset)) - self.assertEqual(batch.shape, (8, 30, 1)) - - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode=None) - sample_count = 0 - iterator = iter(dataset) - for batch in dataset: - sample_count += next(iterator).shape[0] - self.assertEqual(sample_count, 15) - - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=8, output_sequence_length=30, label_mode="int") - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 30, 1)) - self.assertEqual(batch[0].dtype.name, "float32") - self.assertEqual(batch[1].shape, (8,)) - self.assertEqual(batch[1].dtype.name, "int32") - - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=8, - output_sequence_length=30, - label_mode="categorical") - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 30, 1)) - self.assertEqual(batch[0].dtype.name, "float32") - self.assertEqual(batch[1].shape, (8, 4)) - self.assertEqual(batch[1].dtype.name, "float32") - - def test_audio_dataset_from_directory_validation_split(self): - directory = self._prepare_directory(num_classes=2, count=10) - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=10, - output_sequence_length=30, - validation_split=0.2, - subset="training", - seed=1337, - ) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 30, 1)) - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=10, - output_sequence_length=30, - validation_split=0.2, - subset="validation", - seed=1337, - ) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (2, 30, 1)) - - def test_audio_dataset_from_directory_manual_labels(self): - directory = self._prepare_directory(num_classes=2, count=2) - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=8, - output_sequence_length=30, - labels=[0, 1], - shuffle=False, - ) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertAllClose(batch[1], [0, 1]) - - def test_audio_dataset_from_directory_follow_links(self): - directory = self._prepare_directory( - num_classes=2, count=25, nested_dirs=True) - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=8, - output_sequence_length=30, - label_mode=None, - follow_links=True, - ) - sample_count = 0 - for batch in dataset: - sample_count += batch.shape[0] - self.assertEqual(sample_count, 25) - - def test_audio_dataset_from_directory_no_audio(self): - directory = self._prepare_directory(num_classes=2, count=0) - with self.assertRaisesRegex(ValueError, - "No audio files found in directory"): - _ = audio_dataset.audio_dataset_from_directory(directory) - - def test_audio_dataset_from_directory_ragged(self): - directory = self._prepare_directory( - num_classes=2, count=16, different_sequence_lengths=True) - dataset = audio_dataset.audio_dataset_from_directory( - directory, ragged=True, batch_size=8) - batch = next(iter(dataset)) - - self.assertEqual(batch[0].shape.as_list(), [8, None, None]) - - def test_audio_dataset_from_directory_no_output_sequence_length_no_ragged( - self): - # This test case tests `audio_dataset_from_directory` when `ragged` and `output_sequence_length` - # are not passed while the input sequence lengths are different. - directory = self._prepare_directory( - num_classes=2, count=16, different_sequence_lengths=True) - # The tensor shapes are different and output_sequence_length is None - # should work fine and pad each sequence to the length of the longest sequence - # in it's batch - min_sequence_length, max_sequence_length = 10, 30 - possible_sequence_lengths = [ - i for i in range(min_sequence_length, max_sequence_length + 1) - ] - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=2) - sequence_lengths = list(set([b.shape[1] for b, _ in dataset])) - for seq_len in sequence_lengths: - self.assertIn(seq_len, possible_sequence_lengths) - - def test_audio_dataset_from_directory_no_output_sequence_length_same_lengths( - self): - # This test case tests `audio_dataset_from_directory` when `ragged` and `output_sequence_length` - # are not passed while the input sequence lengths are the same - directory = self._prepare_directory( - num_classes=2, count=16, different_sequence_lengths=False) - # The tensor shapes are different and output_sequence_length is None - # should work fine and pad each sequence to the length of the longest sequence - # in it's batch - dataset = audio_dataset.audio_dataset_from_directory( - directory, batch_size=2) - sequence_lengths = list(set([batch[0].shape[1] for batch in dataset])) - self.assertEqual(len(sequence_lengths), 1) - - def test_audio_dataset_from_directory_errors(self): - directory = self._prepare_directory(num_classes=3, count=5) - - with self.assertRaisesRegex( - ValueError, "`sampling_rate` should be higher than 0. Received:"): - _ = audio_dataset.audio_dataset_from_directory( - directory, ragged=False, output_sequence_length=10, sampling_rate=-1) - - with self.assertRaisesRegex( - ValueError, "`sampling_rate` should have an integer value. Received:"): - _ = audio_dataset.audio_dataset_from_directory( - directory, ragged=False, output_sequence_length=10, sampling_rate=1.2) - - with self.assertRaisesRegex( - ValueError, "Cannot set both `ragged` and `output_sequence_length`"): - _ = audio_dataset.audio_dataset_from_directory( - directory, ragged=True, output_sequence_length=30) - - with self.assertRaisesRegex(ValueError, "`labels` argument should be"): - _ = audio_dataset.audio_dataset_from_directory(directory, labels="other") - - with self.assertRaisesRegex(ValueError, "`label_mode` argument must be"): - _ = audio_dataset.audio_dataset_from_directory( - directory, label_mode="other") - - with self.assertRaisesRegex( - ValueError, 'only pass `class_names` if `labels="inferred"`'): - _ = audio_dataset.audio_dataset_from_directory( - directory, - labels=[0, 0, 1, 1, 1], - class_names=["class_0", "class_1", "class_2"], - ) - - with self.assertRaisesRegex( - ValueError, - "Expected the lengths of `labels` to match the number of files"): - _ = audio_dataset.audio_dataset_from_directory( - directory, labels=[0, 0, 1, 1]) - - with self.assertRaisesRegex(ValueError, - "`class_names` passed did not match"): - _ = audio_dataset.audio_dataset_from_directory( - directory, class_names=["class_0", "class_2"]) - - with self.assertRaisesRegex(ValueError, "there must be exactly 2"): - _ = audio_dataset.audio_dataset_from_directory( - directory, label_mode="binary") - - with self.assertRaisesRegex(ValueError, - "`validation_split` must be between 0 and 1"): - _ = audio_dataset.audio_dataset_from_directory( - directory, validation_split=2) - - with self.assertRaisesRegex(ValueError, - '`subset` must be either "training",'): - _ = audio_dataset.audio_dataset_from_directory( - directory, validation_split=0.2, subset="other") - - with self.assertRaisesRegex(ValueError, "`validation_split` must be set"): - _ = audio_dataset.audio_dataset_from_directory( - directory, validation_split=0, subset="training") - - with self.assertRaisesRegex(ValueError, "must provide a `seed`"): - _ = audio_dataset.audio_dataset_from_directory( - directory, validation_split=0.2, subset="training") - - def test_audio_dataset_from_directory_not_batched(self): - directory = self._prepare_directory(num_classes=2, count=2) - dataset = audio_dataset.audio_dataset_from_directory( - directory, - batch_size=None, - output_sequence_length=30, - label_mode=None, - shuffle=False, - ) - sample = next(iter(dataset)) - self.assertEqual(len(sample.shape), 2) + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=2 + ) + sequence_lengths = list(set([b.shape[1] for b, _ in dataset])) + for seq_len in sequence_lengths: + self.assertIn(seq_len, possible_sequence_lengths) + + def test_audio_dataset_from_directory_no_output_sequence_length_same_lengths( # noqa: E501 + self, + ): + # This test case tests `audio_dataset_from_directory` when `ragged` and + # `output_sequence_length` are not passed while the input sequence + # lengths are the same + directory = self._prepare_directory( + num_classes=2, count=16, different_sequence_lengths=False + ) + # The tensor shapes are different and output_sequence_length is None + # should work fine and pad each sequence to the length of the longest + # sequence in it's batch + dataset = audio_dataset.audio_dataset_from_directory( + directory, batch_size=2 + ) + sequence_lengths = list(set([batch[0].shape[1] for batch in dataset])) + self.assertEqual(len(sequence_lengths), 1) + + def test_audio_dataset_from_directory_errors(self): + directory = self._prepare_directory(num_classes=3, count=5) + + with self.assertRaisesRegex( + ValueError, "`sampling_rate` should be higher than 0. Received:" + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, + ragged=False, + output_sequence_length=10, + sampling_rate=-1, + ) + + with self.assertRaisesRegex( + ValueError, + "`sampling_rate` should have an integer value. Received:", + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, + ragged=False, + output_sequence_length=10, + sampling_rate=1.2, + ) + + # Only run this test case when we don't have tensorflow_io. + try: + import tensorflow_io # noqa: F401 + except ImportError: + with self.assertRaisesRegex( + ImportError, + "To use the argument `sampling_rate`.*tensorflow_io.*", + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, + ragged=False, + output_sequence_length=10, + sampling_rate=44100, + ) + + with self.assertRaisesRegex( + ValueError, "Cannot set both `ragged` and `output_sequence_length`" + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, ragged=True, output_sequence_length=30 + ) + + with self.assertRaisesRegex(ValueError, "`labels` argument should be"): + _ = audio_dataset.audio_dataset_from_directory( + directory, labels="other" + ) + + with self.assertRaisesRegex( + ValueError, "`label_mode` argument must be" + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, label_mode="other" + ) + + with self.assertRaisesRegex( + ValueError, 'only pass `class_names` if `labels="inferred"`' + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, + labels=[0, 0, 1, 1, 1], + class_names=["class_0", "class_1", "class_2"], + ) + + with self.assertRaisesRegex( + ValueError, + "Expected the lengths of `labels` to match the number of files", + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, labels=[0, 0, 1, 1] + ) + + with self.assertRaisesRegex( + ValueError, "`class_names` passed did not match" + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, class_names=["class_0", "class_2"] + ) + + with self.assertRaisesRegex(ValueError, "there must be exactly 2"): + _ = audio_dataset.audio_dataset_from_directory( + directory, label_mode="binary" + ) + + with self.assertRaisesRegex( + ValueError, "`validation_split` must be between 0 and 1" + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, validation_split=2 + ) + + with self.assertRaisesRegex( + ValueError, '`subset` must be either "training",' + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, validation_split=0.2, subset="other" + ) + + with self.assertRaisesRegex( + ValueError, "`validation_split` must be set" + ): + _ = audio_dataset.audio_dataset_from_directory( + directory, validation_split=0, subset="training" + ) + + with self.assertRaisesRegex(ValueError, "must provide a `seed`"): + _ = audio_dataset.audio_dataset_from_directory( + directory, validation_split=0.2, subset="training" + ) + + def test_audio_dataset_from_directory_not_batched(self): + directory = self._prepare_directory(num_classes=2, count=2) + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=None, + output_sequence_length=30, + label_mode=None, + shuffle=False, + ) + sample = next(iter(dataset)) + self.assertEqual(len(sample.shape), 2) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/utils/audio_dataset_with_tfio_test.py b/keras/utils/audio_dataset_with_tfio_test.py new file mode 100644 index 000000000000..75689d29c7ac --- /dev/null +++ b/keras/utils/audio_dataset_with_tfio_test.py @@ -0,0 +1,129 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for audio_dataset when tfio is available.""" + +import os +import shutil + +import numpy as np +import tensorflow.compat.v2 as tf + +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import audio_dataset + + +@test_utils.run_v2_only +class AudioDatasetFromDirectoryWithTfioTest(test_combinations.TestCase): + def _get_audio_samples(self, count=16, different_sequence_lengths=False): + sequence_length = 30 + num_channels = 1 + audio_samples = [] + for _ in range(count): + if different_sequence_lengths: + random_sequence_length = np.random.randint( + 10, sequence_length + 1 + ) + audio = np.random.random((random_sequence_length, num_channels)) + else: + audio = np.random.random((sequence_length, num_channels)) + audio_samples.append(tf.audio.encode_wav(audio, 1000)) + return audio_samples + + def _prepare_directory( + self, + num_classes=2, + nested_dirs=False, + count=16, + different_sequence_lengths=False, + ): + # Get a unique temp directory + temp_dir = os.path.join( + self.get_temp_dir(), str(np.random.randint(1e6)) + ) + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + # Generate paths to class subdirectories + paths = [] + for class_index in range(num_classes): + class_directory = f"class_{class_index}" + if nested_dirs: + class_paths = [ + class_directory, + os.path.join(class_directory, "subfolder_1"), + os.path.join(class_directory, "subfolder_2"), + os.path.join( + class_directory, "subfolder_1", "sub-subfolder" + ), + ] + else: + class_paths = [class_directory] + for path in class_paths: + os.mkdir(os.path.join(temp_dir, path)) + paths += class_paths + + # Save audio samples to the paths + i = 0 + for audio in self._get_audio_samples( + count=count, different_sequence_lengths=different_sequence_lengths + ): + path = paths[i % len(paths)] + ext = "wav" + filename = os.path.join(path, f"audio_{i}.{ext}") + with open(os.path.join(temp_dir, filename), "wb") as f: + f.write(audio.numpy()) + i += 1 + return temp_dir + + def test_audio_dataset_from_directory_standalone_with_resampling(self): + # Test retrieving audio samples withouts labels from a directory and its + # subdirs where we double the sampling rate. + # Save a few extra audio in the parent directory. + directory = self._prepare_directory(count=7, num_classes=2) + for i, audio in enumerate(self._get_audio_samples(3)): + filename = f"audio_{i}.wav" + with open(os.path.join(directory, filename), "wb") as f: + f.write(audio.numpy()) + + dataset = audio_dataset.audio_dataset_from_directory( + directory, + batch_size=5, + output_sequence_length=30, + labels=None, + sampling_rate=2000, # Twice the original sample rate. + ) + batch = next(iter(dataset)) + # We return plain audio. Expect twice as many samples now. + self.assertEqual(batch.shape, (5, 60, 1)) + self.assertEqual(batch.dtype.name, "float32") + # Count samples + batch_count = 0 + sample_count = 0 + for batch in dataset: + batch_count += 1 + sample_count += batch.shape[0] + self.assertEqual(batch_count, 2) + self.assertEqual(sample_count, 10) + + +if __name__ == "__main__": + try: + import tensorflow_io # noqa: F401 + + # Only run these tests if tensorflow_io is installed. + tf.test.main() + except ImportError: + pass diff --git a/keras/utils/composite_tensor_support_test.py b/keras/utils/composite_tensor_support_test.py index ae2e8f6f1f3e..25ce0cfd5451 100644 --- a/keras/utils/composite_tensor_support_test.py +++ b/keras/utils/composite_tensor_support_test.py @@ -14,296 +14,303 @@ # ============================================================================== """Tests for Keras composite tensor support.""" -import tensorflow.compat.v2 as tf - -from absl.testing import parameterized - import numpy as np import scipy.sparse +import tensorflow.compat.v2 as tf +from absl.testing import parameterized import keras -from keras.testing_infra import test_combinations -from keras.testing_infra import test_utils from keras.engine import input_layer -from keras.layers import core from keras.layers import Dense from keras.layers import Embedding from keras.layers import Layer +from keras.layers import core +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils # Define test-only Layer classes to validate passing Sparse and Ragged tensors # between layers. class ToDense(Layer): - """Create a dense (standard) tensor from the given input tensor.""" - - def __init__(self, default_value, **kwargs): - super().__init__(**kwargs) - self._default_value = default_value - - def call(self, inputs): - if isinstance(inputs, dict): # Dicts are no longer flattened. - # Always a single element in these tests. - inputs = tf.nest.flatten(inputs)[0] - - if isinstance(inputs, tf.RaggedTensor): - output = inputs.to_tensor(default_value=self._default_value) - elif isinstance(inputs, tf.SparseTensor): - output = tf.sparse.to_dense( - inputs, default_value=self._default_value) - elif isinstance(inputs, tf.Tensor): - output = inputs - else: - raise TypeError("Unexpected tensor type %s" % type(inputs).__name__) + """Create a dense (standard) tensor from the given input tensor.""" + + def __init__(self, default_value, **kwargs): + super().__init__(**kwargs) + self._default_value = default_value - # Return a float so that we can compile models with this as the final layer. - return tf.cast(output, tf.float32) + def call(self, inputs): + if isinstance(inputs, dict): # Dicts are no longer flattened. + # Always a single element in these tests. + inputs = tf.nest.flatten(inputs)[0] + + if isinstance(inputs, tf.RaggedTensor): + output = inputs.to_tensor(default_value=self._default_value) + elif isinstance(inputs, tf.SparseTensor): + output = tf.sparse.to_dense( + inputs, default_value=self._default_value + ) + elif isinstance(inputs, tf.Tensor): + output = inputs + else: + raise TypeError(f"Unexpected tensor type {type(inputs).__name__}") + + # Return a float so that we can compile models with this as the final + # layer. + return tf.cast(output, tf.float32) class ToRagged(Layer): - """Create a ragged tensor based on a given dense tensor.""" + """Create a ragged tensor based on a given dense tensor.""" - def __init__(self, padding, ragged_rank=1, **kwargs): - super().__init__(**kwargs) - self._padding = padding - self._ragged_rank = ragged_rank + def __init__(self, padding, ragged_rank=1, **kwargs): + super().__init__(**kwargs) + self._padding = padding + self._ragged_rank = ragged_rank - def call(self, inputs): - return tf.RaggedTensor.from_tensor( - inputs, padding=self._padding, ragged_rank=self._ragged_rank) + def call(self, inputs): + return tf.RaggedTensor.from_tensor( + inputs, padding=self._padding, ragged_rank=self._ragged_rank + ) class ToSparse(Layer): - """Create a sparse tensor based on a given dense tensor.""" + """Create a sparse tensor based on a given dense tensor.""" - def call(self, inputs): - indices = tf.where(tf.not_equal(inputs, 0)) - values = tf.gather_nd(inputs, indices) - shape = tf.shape(inputs, out_type=tf.int64) - return tf.SparseTensor(indices, values, dense_shape=shape) + def call(self, inputs): + indices = tf.where(tf.not_equal(inputs, 0)) + values = tf.gather_nd(inputs, indices) + shape = tf.shape(inputs, out_type=tf.int64) + return tf.SparseTensor(indices, values, dense_shape=shape) class _SubclassModel(keras.Model): - """A Keras subclass model.""" - - def __init__(self, layers, i_layer=None): - super().__init__() - # Note that clone and build doesn't support lists of layers in subclassed - # models. Adding each layer directly here. - for i, layer in enumerate(layers): - setattr(self, self._layer_name_for_i(i), layer) - self.num_layers = len(layers) - if i_layer is not None: - self._set_inputs(i_layer) - - def _layer_name_for_i(self, i): - return "layer{}".format(i) - - def call(self, inputs, **kwargs): - x = inputs - for i in range(self.num_layers): - layer = getattr(self, self._layer_name_for_i(i)) - x = layer(x) - return x - - -def get_model_from_layers_with_input(layers, - input_shape=None, - input_dtype=None, - model_input=None): - """Builds a model from a sequence of layers.""" - if model_input is not None and input_shape is not None: - raise ValueError("Cannot specify a model_input and an input shape.") - - model_type = test_utils.get_model_type() - if model_type == "subclass": - return _SubclassModel(layers, model_input) - - if model_type == "sequential": - model = keras.models.Sequential() - if model_input is not None: - model.add(model_input) - elif input_shape is not None: - model.add(keras.Input(shape=input_shape, dtype=input_dtype)) - for layer in layers: - model.add(layer) - return model - - if model_type == "functional": - if model_input is not None: - inputs = model_input - else: - if not input_shape: - raise ValueError("Cannot create a functional model from layers with no " - "input shape.") - inputs = keras.Input(shape=input_shape, dtype=input_dtype) - outputs = inputs - for layer in layers: - outputs = layer(outputs) - return keras.Model(inputs, outputs) - - raise ValueError("Unknown model type {}".format(model_type)) + """A Keras subclass model.""" + + def __init__(self, layers, i_layer=None): + super().__init__() + # Note that clone and build doesn't support lists of layers in + # subclassed models. Adding each layer directly here. + for i, layer in enumerate(layers): + setattr(self, self._layer_name_for_i(i), layer) + self.num_layers = len(layers) + if i_layer is not None: + self._set_inputs(i_layer) + + def _layer_name_for_i(self, i): + return f"layer{i}" + + def call(self, inputs, **kwargs): + x = inputs + for i in range(self.num_layers): + layer = getattr(self, self._layer_name_for_i(i)) + x = layer(x) + return x + + +def get_model_from_layers_with_input( + layers, input_shape=None, input_dtype=None, model_input=None +): + """Builds a model from a sequence of layers.""" + if model_input is not None and input_shape is not None: + raise ValueError("Cannot specify a model_input and an input shape.") + + model_type = test_utils.get_model_type() + if model_type == "subclass": + return _SubclassModel(layers, model_input) + + if model_type == "sequential": + model = keras.models.Sequential() + if model_input is not None: + model.add(model_input) + elif input_shape is not None: + model.add(keras.Input(shape=input_shape, dtype=input_dtype)) + for layer in layers: + model.add(layer) + return model + + if model_type == "functional": + if model_input is not None: + inputs = model_input + else: + if not input_shape: + raise ValueError( + "Cannot create a functional model from layers with no " + "input shape." + ) + inputs = keras.Input(shape=input_shape, dtype=input_dtype) + outputs = inputs + for layer in layers: + outputs = layer(outputs) + return keras.Model(inputs, outputs) + + raise ValueError(f"Unknown model type {model_type}") def get_test_mode_kwargs(): - run_eagerly = test_utils.should_run_eagerly() - return { - "run_eagerly": run_eagerly, - } + run_eagerly = test_utils.should_run_eagerly() + return { + "run_eagerly": run_eagerly, + } @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class CompositeTensorInternalTest(test_combinations.TestCase): - - def test_internal_ragged_tensors(self): - # Create a model that accepts an input, converts it to Ragged, and - # converts the ragged tensor back to a dense tensor. - layers = [ToRagged(padding=0), ToDense(default_value=-1)] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0]]) - expected_output = np.array([[1, -1], [2, 3]]) - output = model.predict(input_data) - self.assertAllEqual(expected_output, output) - - def test_internal_sparse_tensors(self): - # Create a model that accepts an input, converts it to Sparse, and - # converts the sparse tensor back to a dense tensor. - layers = [ToSparse(), ToDense(default_value=-1)] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0]]) - expected_output = np.array([[1, -1, -1], [2, 3, -1]]) - output = model.predict(input_data) - self.assertAllEqual(expected_output, output) - - def test_training_internal_ragged_tensors(self): - # Create a model that implements y=Mx. This is easy to learn and will - # demonstrate appropriate gradient passing. (We have to use RaggedTensors - # for this test, as ToSparse() doesn't support gradient propagation through - # the layer.) TODO(b/124796939): Investigate this. - layers = [core.Dense(2), ToRagged(padding=0), ToDense(default_value=-1)] - model = test_utils.get_model_from_layers(layers, input_shape=(1,)) - - input_data = np.random.rand(1024, 1) - expected_data = np.concatenate((input_data * 3, input_data * .5), axis=-1) - - model.compile(loss="mse", optimizer="adam", **get_test_mode_kwargs()) - history = model.fit(input_data, expected_data, epochs=10, verbose=0) - - # If the model trained, the loss stored at history[0] should be different - # than the one stored at history[-1]. - self.assertNotEqual(history.history["loss"][-1], history.history["loss"][0]) + def test_internal_ragged_tensors(self): + # Create a model that accepts an input, converts it to Ragged, and + # converts the ragged tensor back to a dense tensor. + layers = [ToRagged(padding=0), ToDense(default_value=-1)] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0]]) + expected_output = np.array([[1, -1], [2, 3]]) + output = model.predict(input_data) + self.assertAllEqual(expected_output, output) + + def test_internal_sparse_tensors(self): + # Create a model that accepts an input, converts it to Sparse, and + # converts the sparse tensor back to a dense tensor. + layers = [ToSparse(), ToDense(default_value=-1)] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0]]) + expected_output = np.array([[1, -1, -1], [2, 3, -1]]) + output = model.predict(input_data) + self.assertAllEqual(expected_output, output) + + def test_training_internal_ragged_tensors(self): + # Create a model that implements y=Mx. This is easy to learn and will + # demonstrate appropriate gradient passing. (We have to use + # RaggedTensors for this test, as ToSparse() doesn't support gradient + # propagation through the layer.) TODO(b/124796939): Investigate this. + layers = [core.Dense(2), ToRagged(padding=0), ToDense(default_value=-1)] + model = test_utils.get_model_from_layers(layers, input_shape=(1,)) + + input_data = np.random.rand(1024, 1) + expected_data = np.concatenate( + (input_data * 3, input_data * 0.5), axis=-1 + ) + + model.compile(loss="mse", optimizer="adam", **get_test_mode_kwargs()) + history = model.fit(input_data, expected_data, epochs=10, verbose=0) + + # If the model trained, the loss stored at history[0] should be + # different than the one stored at history[-1]. + self.assertNotEqual( + history.history["loss"][-1], history.history["loss"][0] + ) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes class CompositeTensorOutputTest(test_combinations.TestCase): - - def test_ragged_tensor_outputs(self): - # Create a model that accepts an input, converts it to Ragged, and - # converts the ragged tensor back to a dense tensor. - layers = [ToRagged(padding=0)] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - model._run_eagerly = test_utils.should_run_eagerly() - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0]]) - output = model.predict(input_data) - - expected_values = [[1], [2, 3]] - self.assertAllEqual(expected_values, output) - - def test_ragged_tensor_rebatched_outputs(self): - # Create a model that accepts an input, converts it to Ragged, and - # converts the ragged tensor back to a dense tensor. - layers = [ToRagged(padding=0)] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - model._run_eagerly = test_utils.should_run_eagerly() - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0], [4, 0, 0], [5, 6, 0]]) - output = model.predict(input_data, batch_size=2) - - expected_values = [[1], [2, 3], [4], [5, 6]] - self.assertAllEqual(expected_values, output) - - def test_sparse_tensor_outputs(self): - # Create a model that accepts an input, converts it to Ragged, and - # converts the ragged tensor back to a dense tensor. - layers = [ToSparse()] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - model._run_eagerly = test_utils.should_run_eagerly() - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0]]) - output = model.predict(input_data) - - expected_indices = np.array([[0, 0], [1, 0], [1, 1]]) - expected_values = np.array([1, 2, 3]) - expected_dense_shape = np.array([2, 3]) - - self.assertAllEqual(output.indices, expected_indices) - self.assertAllEqual(output.values, expected_values) - self.assertAllEqual(output.dense_shape, expected_dense_shape) - - def test_sparse_tensor_rebatched_outputs(self): - # Create a model that accepts an input, converts it to Ragged, and - # converts the ragged tensor back to a dense tensor. - layers = [ToSparse()] - model = test_utils.get_model_from_layers(layers, input_shape=(None,)) - model._run_eagerly = test_utils.should_run_eagerly() - - # Define some input data with additional padding. - input_data = np.array([[1, 0, 0], [2, 3, 0], [4, 0, 0], [5, 6, 0]]) - output = model.predict(input_data, batch_size=2) - - expected_indices = np.array([[0, 0], [1, 0], [1, 1], [2, 0], [3, 0], [3, - 1]]) - expected_values = np.array([1, 2, 3, 4, 5, 6]) - expected_dense_shape = np.array([4, 3]) - - self.assertAllEqual(output.indices, expected_indices) - self.assertAllEqual(output.values, expected_values) - self.assertAllEqual(output.dense_shape, expected_dense_shape) + def test_ragged_tensor_outputs(self): + # Create a model that accepts an input, converts it to Ragged, and + # converts the ragged tensor back to a dense tensor. + layers = [ToRagged(padding=0)] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + model._run_eagerly = test_utils.should_run_eagerly() + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0]]) + output = model.predict(input_data) + + expected_values = [[1], [2, 3]] + self.assertAllEqual(expected_values, output) + + def test_ragged_tensor_rebatched_outputs(self): + # Create a model that accepts an input, converts it to Ragged, and + # converts the ragged tensor back to a dense tensor. + layers = [ToRagged(padding=0)] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + model._run_eagerly = test_utils.should_run_eagerly() + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0], [4, 0, 0], [5, 6, 0]]) + output = model.predict(input_data, batch_size=2) + + expected_values = [[1], [2, 3], [4], [5, 6]] + self.assertAllEqual(expected_values, output) + + def test_sparse_tensor_outputs(self): + # Create a model that accepts an input, converts it to Ragged, and + # converts the ragged tensor back to a dense tensor. + layers = [ToSparse()] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + model._run_eagerly = test_utils.should_run_eagerly() + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0]]) + output = model.predict(input_data) + + expected_indices = np.array([[0, 0], [1, 0], [1, 1]]) + expected_values = np.array([1, 2, 3]) + expected_dense_shape = np.array([2, 3]) + + self.assertAllEqual(output.indices, expected_indices) + self.assertAllEqual(output.values, expected_values) + self.assertAllEqual(output.dense_shape, expected_dense_shape) + + def test_sparse_tensor_rebatched_outputs(self): + # Create a model that accepts an input, converts it to Ragged, and + # converts the ragged tensor back to a dense tensor. + layers = [ToSparse()] + model = test_utils.get_model_from_layers(layers, input_shape=(None,)) + model._run_eagerly = test_utils.should_run_eagerly() + + # Define some input data with additional padding. + input_data = np.array([[1, 0, 0], [2, 3, 0], [4, 0, 0], [5, 6, 0]]) + output = model.predict(input_data, batch_size=2) + + expected_indices = np.array( + [[0, 0], [1, 0], [1, 1], [2, 0], [3, 0], [3, 1]] + ) + expected_values = np.array([1, 2, 3, 4, 5, 6]) + expected_dense_shape = np.array([4, 3]) + + self.assertAllEqual(output.indices, expected_indices) + self.assertAllEqual(output.values, expected_values) + self.assertAllEqual(output.dense_shape, expected_dense_shape) def get_input_name(use_dict): - # Define the input name. - if not use_dict: - return None # This is the same as not setting 'name'. - elif test_utils.get_model_type() == "subclass": - return "input_1" # Subclass models don"t support input names. - else: - return "test_input_name" + # Define the input name. + if not use_dict: + return None # This is the same as not setting 'name'. + elif test_utils.get_model_type() == "subclass": + return "input_1" # Subclass models don"t support input names. + else: + return "test_input_name" def get_kwargs(use_dataset, action="predict"): - if use_dataset or not tf.executing_eagerly(): - if action == "fit": - return {"steps_per_epoch": 1} - return {"steps": 1} - else: - return {"batch_size": 2} + if use_dataset or not tf.executing_eagerly(): + if action == "fit": + return {"steps_per_epoch": 1} + return {"steps": 1} + else: + return {"batch_size": 2} def prepare_inputs(data, use_dict, use_dataset, action, input_name): - input_data, expected_output = data - batch_size = input_data.shape[0] - # Prepare the input data. - if use_dict: - input_data = {input_name: input_data} - if use_dataset: - if action == "predict": - input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( - batch_size) - else: - input_data = tf.data.Dataset.from_tensor_slices( - (input_data, expected_output)).batch(batch_size) - expected_output = None - return (input_data, expected_output) + input_data, expected_output = data + batch_size = input_data.shape[0] + # Prepare the input data. + if use_dict: + input_data = {input_name: input_data} + if use_dataset: + if action == "predict": + input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( + batch_size + ) + else: + input_data = tf.data.Dataset.from_tensor_slices( + (input_data, expected_output) + ).batch(batch_size) + expected_output = None + return (input_data, expected_output) @test_combinations.run_with_all_model_types @@ -312,163 +319,191 @@ def prepare_inputs(data, use_dict, use_dataset, action, input_name): *test_utils.generate_combinations_with_testcase_name( use_dict=[True, False], use_dataset=[True, False], - action=["predict", "evaluate", "fit"])) + action=["predict", "evaluate", "fit"], + ) +) class SparseTensorInputTest(test_combinations.TestCase): - - def test_sparse_tensors(self, use_dict, use_dataset, action): - data = [(tf.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1]], - [1, 2, 3], [2, 1, 3]), - np.array([[[1, -1, -1]], [[2, 3, -1]]])), - (tf.SparseTensor( - [[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]], [5, 6, 7, 8], - [3, 1, 4]), - np.array([[[5, -1, -1, -1]], [[6, 7, -1, -1]], [[-1, 8, -1, - -1]]]))] - # Prepare the model to test. - input_name = get_input_name(use_dict) - model_input = input_layer.Input( - shape=(1, None), sparse=True, name=input_name, dtype=tf.int32) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - **get_test_mode_kwargs()) - kwargs = get_kwargs(use_dataset, action) - - # Prepare the input data - for data_element in data: - input_data, expected_output = prepare_inputs(data_element, use_dict, - use_dataset, action, - input_name) - # Perform the action. - if action == "predict": - result = model.predict(input_data, **kwargs) - self.assertAllEqual(expected_output, result) - if action == "evaluate": - result = model.evaluate(input_data, expected_output, **kwargs) - self.assertAllEqual(1.0, result[-1]) - if action == "fit": - # TODO(momernick): What's the best way of validating that fit happened? - _ = model.fit(input_data, expected_output, shuffle=False, **kwargs) + def test_sparse_tensors(self, use_dict, use_dataset, action): + data = [ + ( + tf.SparseTensor( + [[0, 0, 0], [1, 0, 0], [1, 0, 1]], [1, 2, 3], [2, 1, 3] + ), + np.array([[[1, -1, -1]], [[2, 3, -1]]]), + ), + ( + tf.SparseTensor( + [[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]], + [5, 6, 7, 8], + [3, 1, 4], + ), + np.array( + [[[5, -1, -1, -1]], [[6, 7, -1, -1]], [[-1, 8, -1, -1]]] + ), + ), + ] + # Prepare the model to test. + input_name = get_input_name(use_dict) + model_input = input_layer.Input( + shape=(1, None), sparse=True, name=input_name, dtype=tf.int32 + ) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + model.compile( + optimizer="sgd", + loss="mse", + metrics=["accuracy"], + **get_test_mode_kwargs(), + ) + kwargs = get_kwargs(use_dataset, action) + + # Prepare the input data + for data_element in data: + input_data, expected_output = prepare_inputs( + data_element, use_dict, use_dataset, action, input_name + ) + # Perform the action. + if action == "predict": + result = model.predict(input_data, **kwargs) + self.assertAllEqual(expected_output, result) + if action == "evaluate": + result = model.evaluate(input_data, expected_output, **kwargs) + self.assertAllEqual(1.0, result[-1]) + if action == "fit": + # TODO(momernick): What's the best way of validating that fit + # happened? + _ = model.fit( + input_data, expected_output, shuffle=False, **kwargs + ) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes -class ScipySparseTensorInputTest(test_combinations.TestCase, - tf.test.TestCase): - - def test_sparse_scipy_predict_inputs_via_input_layer_args(self): - # Create a model that accepts a sparse input and converts the sparse tensor - # back to a dense tensor. Scipy sparse matrices are limited to 2D, so use - # a one-dimensional shape; note also that scipy's default dtype is int64. - model_input = input_layer.Input(shape=(3,), sparse=True, dtype=tf.int64) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - - input_data = scipy.sparse.coo_matrix(([1, 2, 3], ([0, 1, 1], [0, 0, 1])), - shape=[2, 3]) - expected_output = np.array([[1, -1, -1], [2, 3, -1]]) - output = model.predict(input_data, steps=1) - self.assertAllEqual(expected_output, output) - - input_data_2 = scipy.sparse.coo_matrix( - ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3]) - expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) - output_2 = model.predict(input_data_2, steps=1) - self.assertAllEqual(expected_output_2, output_2) - - def test_sparse_scipy_eval_inputs(self): - # Create a model that accepts a sparse input and converts the sparse tensor - # back to a dense tensor. Scipy sparse matrices are limited to 2D, so use - # a one-dimensional shape; note also that scipy's default dtype is int64. - model_input = input_layer.Input(shape=(3,), sparse=True, dtype=tf.int64) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"]) - - input_data = scipy.sparse.coo_matrix(([1, 2, 3], ([0, 1, 1], [0, 0, 1])), - shape=[2, 3]) - expected_output = np.array([[1, -1, -1], [2, 3, -1]]) - - output = model.evaluate(input_data, expected_output, steps=1) - self.assertAllEqual(1.0, output[-1]) - - input_data_2 = scipy.sparse.coo_matrix( - ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3]) - expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) - output_2 = model.evaluate(input_data_2, expected_output_2, steps=1) - self.assertAllEqual(1.0, output_2[-1]) - - def test_sparse_scipy_predict_input_dicts_via_input_layer_args(self): - # Create a model that accepts a sparse input and converts the sparse tensor - # back to a dense tensor. Scipy sparse matrices are limited to 2D, so use - # a one-dimensional shape; note also that scipy's default dtype is int64. - if test_utils.get_model_type() == "subclass": - input_name = "input_1" # Subclass models don"t support input names. - else: - input_name = "test_input_name" - model_input = input_layer.Input( - shape=(3,), sparse=True, name=input_name, dtype=tf.int64) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - - input_data = { - input_name: - scipy.sparse.coo_matrix(([1, 2, 3], ([0, 1, 1], [0, 0, 1])), - shape=[2, 3]) - } - expected_output = np.array([[1, -1, -1], [2, 3, -1]]) - output = model.predict(input_data, steps=1) - self.assertAllEqual(expected_output, output) - - input_data_2 = { - input_name: - scipy.sparse.coo_matrix( - ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3]) - } - expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) - output_2 = model.predict(input_data_2, steps=1) - self.assertAllEqual(expected_output_2, output_2) - - def test_sparse_scipy_eval_input_dicts(self): - # Create a model that accepts a sparse input and converts the sparse tensor - # back to a dense tensor. Scipy sparse matrices are limited to 2D, so use - # a one-dimensional shape; note also that scipy's default dtype is int64. - if test_utils.get_model_type() == "subclass": - input_name = "input_1" # Subclass models don"t support input names. - else: - input_name = "test_input_name" - model_input = input_layer.Input( - shape=(3,), sparse=True, name=input_name, dtype=tf.int64) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"]) - - input_data = { - input_name: - scipy.sparse.coo_matrix(([1, 2, 3], ([0, 1, 1], [0, 0, 1])), - shape=[2, 3]) - } - expected_output = np.array([[1, -1, -1], [2, 3, -1]]) - output = model.evaluate(input_data, expected_output, steps=1) - self.assertAllEqual(1.0, output[-1]) - - input_data_2 = { - input_name: - scipy.sparse.coo_matrix( - ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3]) - } - expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) - output_2 = model.evaluate(input_data_2, expected_output_2, steps=1) - self.assertAllEqual(1.0, output_2[-1]) +class ScipySparseTensorInputTest(test_combinations.TestCase, tf.test.TestCase): + def test_sparse_scipy_predict_inputs_via_input_layer_args(self): + # Create a model that accepts a sparse input and converts the sparse + # tensor back to a dense tensor. Scipy sparse matrices are limited to + # 2D, so use a one-dimensional shape; note also that scipy's default + # dtype is int64. + model_input = input_layer.Input(shape=(3,), sparse=True, dtype=tf.int64) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + + input_data = scipy.sparse.coo_matrix( + ([1, 2, 3], ([0, 1, 1], [0, 0, 1])), shape=[2, 3] + ) + expected_output = np.array([[1, -1, -1], [2, 3, -1]]) + output = model.predict(input_data, steps=1) + self.assertAllEqual(expected_output, output) + + input_data_2 = scipy.sparse.coo_matrix( + ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3] + ) + expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) + output_2 = model.predict(input_data_2, steps=1) + self.assertAllEqual(expected_output_2, output_2) + + def test_sparse_scipy_eval_inputs(self): + # Create a model that accepts a sparse input and converts the sparse + # tensor back to a dense tensor. Scipy sparse matrices are limited to + # 2D, so use a one-dimensional shape; note also that scipy's default + # dtype is int64. + model_input = input_layer.Input(shape=(3,), sparse=True, dtype=tf.int64) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + model.compile(optimizer="sgd", loss="mse", metrics=["accuracy"]) + + input_data = scipy.sparse.coo_matrix( + ([1, 2, 3], ([0, 1, 1], [0, 0, 1])), shape=[2, 3] + ) + expected_output = np.array([[1, -1, -1], [2, 3, -1]]) + + output = model.evaluate(input_data, expected_output, steps=1) + self.assertAllEqual(1.0, output[-1]) + + input_data_2 = scipy.sparse.coo_matrix( + ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3] + ) + expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) + output_2 = model.evaluate(input_data_2, expected_output_2, steps=1) + self.assertAllEqual(1.0, output_2[-1]) + + def test_sparse_scipy_predict_input_dicts_via_input_layer_args(self): + # Create a model that accepts a sparse input and converts the sparse + # tensor back to a dense tensor. Scipy sparse matrices are limited to + # 2D, so use a one-dimensional shape; note also that scipy's default + # dtype is int64. + if test_utils.get_model_type() == "subclass": + input_name = "input_1" # Subclass models don"t support input names. + else: + input_name = "test_input_name" + model_input = input_layer.Input( + shape=(3,), sparse=True, name=input_name, dtype=tf.int64 + ) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + + input_data = { + input_name: scipy.sparse.coo_matrix( + ([1, 2, 3], ([0, 1, 1], [0, 0, 1])), shape=[2, 3] + ) + } + expected_output = np.array([[1, -1, -1], [2, 3, -1]]) + output = model.predict(input_data, steps=1) + self.assertAllEqual(expected_output, output) + + input_data_2 = { + input_name: scipy.sparse.coo_matrix( + ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3] + ) + } + expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) + output_2 = model.predict(input_data_2, steps=1) + self.assertAllEqual(expected_output_2, output_2) + + def test_sparse_scipy_eval_input_dicts(self): + # Create a model that accepts a sparse input and converts the sparse + # tensor back to a dense tensor. Scipy sparse matrices are limited to + # 2D, so use a one-dimensional shape; note also that scipy's default + # dtype is int64. + if test_utils.get_model_type() == "subclass": + input_name = "input_1" # Subclass models don"t support input names. + else: + input_name = "test_input_name" + model_input = input_layer.Input( + shape=(3,), sparse=True, name=input_name, dtype=tf.int64 + ) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + model.compile(optimizer="sgd", loss="mse", metrics=["accuracy"]) + + input_data = { + input_name: scipy.sparse.coo_matrix( + ([1, 2, 3], ([0, 1, 1], [0, 0, 1])), shape=[2, 3] + ) + } + expected_output = np.array([[1, -1, -1], [2, 3, -1]]) + output = model.evaluate(input_data, expected_output, steps=1) + self.assertAllEqual(1.0, output[-1]) + + input_data_2 = { + input_name: scipy.sparse.coo_matrix( + ([5, 6, 7, 8], ([0, 1, 1, 2], [0, 0, 1, 1])), shape=[3, 3] + ) + } + expected_output_2 = np.array([[5, -1, -1], [6, 7, -1], [-1, 8, -1]]) + output_2 = model.evaluate(input_data_2, expected_output_2, steps=1) + self.assertAllEqual(1.0, output_2[-1]) @test_combinations.run_with_all_model_types @@ -477,165 +512,207 @@ def test_sparse_scipy_eval_input_dicts(self): *test_utils.generate_combinations_with_testcase_name( use_dict=[True, False], use_dataset=[True, False], - action=["predict", "evaluate", "fit"])) -class RaggedTensorInputTest(test_combinations.TestCase, - tf.test.TestCase): - - def test_ragged_input(self, use_dict, use_dataset, action): - data = [(tf.ragged.constant([[[1]], [[2, 3]]]), - np.array([[[1, -1]], [[2, 3]]]))] - - # Prepare the model to test. - input_name = get_input_name(use_dict) - model_input = input_layer.Input( - shape=(None, None), ragged=True, name=input_name, dtype=tf.int32, - batch_size=2) - self.assertIsInstance(model_input._type_spec, - tf.RaggedTensorSpec) - self.assertEqual(model_input.shape.as_list(), [2, None, None]) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - **get_test_mode_kwargs()) - - # Prepare the input data - for data_element in data: - input_data, expected_output = prepare_inputs(data_element, use_dict, - use_dataset, action, - input_name) - # Perform the action. - if action == "predict": - result = model.predict(input_data) - self.assertAllEqual(expected_output, result) - if action == "evaluate": - result = model.evaluate(input_data, expected_output) - self.assertAllEqual(1.0, result[-1]) - if action == "fit": - # TODO(momernick): What's the best way of validating that fit happened? - _ = model.fit(input_data, expected_output, shuffle=False) + action=["predict", "evaluate", "fit"], + ) +) +class RaggedTensorInputTest(test_combinations.TestCase, tf.test.TestCase): + def test_ragged_input(self, use_dict, use_dataset, action): + data = [ + ( + tf.ragged.constant([[[1]], [[2, 3]]]), + np.array([[[1, -1]], [[2, 3]]]), + ) + ] + + # Prepare the model to test. + input_name = get_input_name(use_dict) + model_input = input_layer.Input( + shape=(None, None), + ragged=True, + name=input_name, + dtype=tf.int32, + batch_size=2, + ) + self.assertIsInstance(model_input._type_spec, tf.RaggedTensorSpec) + self.assertEqual(model_input.shape.as_list(), [2, None, None]) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + model.compile( + optimizer="sgd", + loss="mse", + metrics=["accuracy"], + **get_test_mode_kwargs(), + ) + + # Prepare the input data + for data_element in data: + input_data, expected_output = prepare_inputs( + data_element, use_dict, use_dataset, action, input_name + ) + # Perform the action. + if action == "predict": + result = model.predict(input_data) + self.assertAllEqual(expected_output, result) + if action == "evaluate": + result = model.evaluate(input_data, expected_output) + self.assertAllEqual(1.0, result[-1]) + if action == "fit": + # TODO(momernick): What's the best way of validating that fit + # happened? + _ = model.fit(input_data, expected_output, shuffle=False) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes @parameterized.named_parameters( *test_utils.generate_combinations_with_testcase_name( - use_dict=[True, False], use_dataset=[True, False])) -class RaggedTensorInputValidationTest(test_combinations.TestCase, - tf.test.TestCase): - - def test_ragged_tensor_input_with_one_none_dimension(self, use_dict, - use_dataset): - # Define some input data. - data = [(tf.ragged.constant([[[1, 0]], [[2, 3]]], ragged_rank=1), - np.array([[[1, 0]], [[2, 3]]]))] - - # Prepare the model to test. - input_shape = (None, 2) # RaggedTensorInputTest uses (None, None). - input_name = get_input_name(use_dict) - model_input = input_layer.Input( - shape=input_shape, ragged=True, name=input_name, dtype=tf.int32) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - **get_test_mode_kwargs()) - - for data_element in data: - input_data, expected_output = prepare_inputs( - data_element, - use_dict, - use_dataset, - action="predict", - input_name=input_name) - result = model.predict(input_data) - self.assertAllEqual(expected_output, result) - - def test_ragged_tensor_input_with_no_none_dimension(self, use_dict, - use_dataset): - # Define some input data. - data = [(tf.ragged.constant([[[1, 0]], [[2, 3]]], ragged_rank=0), - np.array([[[1, 0]], [[2, 3]]]))] - - # Prepare the model to test. - input_shape = (1, 2) # RaggedTensorInputTest uses (None, None). - input_name = get_input_name(use_dict) - model_input = input_layer.Input( - shape=input_shape, ragged=True, name=input_name, dtype=tf.int32) - layers = [ToDense(default_value=-1)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - **get_test_mode_kwargs()) - kwargs = get_kwargs(use_dataset) - - for data_element in data: - input_data, expected_output = prepare_inputs( - data_element, - use_dict, - use_dataset, - action="predict", - input_name=input_name) - result = model.predict(input_data, **kwargs) - self.assertAllEqual(expected_output, result) + use_dict=[True, False], use_dataset=[True, False] + ) +) +class RaggedTensorInputValidationTest( + test_combinations.TestCase, tf.test.TestCase +): + def test_ragged_tensor_input_with_one_none_dimension( + self, use_dict, use_dataset + ): + # Define some input data. + data = [ + ( + tf.ragged.constant([[[1, 0]], [[2, 3]]], ragged_rank=1), + np.array([[[1, 0]], [[2, 3]]]), + ) + ] + + # Prepare the model to test. + input_shape = (None, 2) # RaggedTensorInputTest uses (None, None). + input_name = get_input_name(use_dict) + model_input = input_layer.Input( + shape=input_shape, ragged=True, name=input_name, dtype=tf.int32 + ) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + model.compile( + optimizer="sgd", + loss="mse", + metrics=["accuracy"], + **get_test_mode_kwargs(), + ) + + for data_element in data: + input_data, expected_output = prepare_inputs( + data_element, + use_dict, + use_dataset, + action="predict", + input_name=input_name, + ) + result = model.predict(input_data) + self.assertAllEqual(expected_output, result) + + def test_ragged_tensor_input_with_no_none_dimension( + self, use_dict, use_dataset + ): + # Define some input data. + data = [ + ( + tf.ragged.constant([[[1, 0]], [[2, 3]]], ragged_rank=0), + np.array([[[1, 0]], [[2, 3]]]), + ) + ] + + # Prepare the model to test. + input_shape = (1, 2) # RaggedTensorInputTest uses (None, None). + input_name = get_input_name(use_dict) + model_input = input_layer.Input( + shape=input_shape, ragged=True, name=input_name, dtype=tf.int32 + ) + layers = [ToDense(default_value=-1)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + model.compile( + optimizer="sgd", + loss="mse", + metrics=["accuracy"], + **get_test_mode_kwargs(), + ) + kwargs = get_kwargs(use_dataset) + + for data_element in data: + input_data, expected_output = prepare_inputs( + data_element, + use_dict, + use_dataset, + action="predict", + input_name=input_name, + ) + result = model.predict(input_data, **kwargs) + self.assertAllEqual(expected_output, result) @test_combinations.run_with_all_model_types() @test_combinations.run_all_keras_modes(always_skip_v1=True) class CompositeTensorModelPredictTest(test_combinations.TestCase): - - def _normalize_shape(self, shape): - if not isinstance(shape, tuple): - shape = tuple(shape.as_list()) - return shape - - def test_sparse_tensor_model_predict(self): - # Create a model that accepts a sparse input and runs a "Dense" layer on it. - model_input = input_layer.Input( - shape=(3,), sparse=True, dtype=tf.float32) - - self.assertEqual([None, 3], model_input.shape.as_list()) - - layers = [Dense(2)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - - sparse_input = tf.SparseTensor( - # A two-row matrix - indices=[(0, 0), (0, 1), (0, 2), (5, 0), (5, 1), (5, 2)], - values=[1., 1., 1., 1., 1., 1.], - dense_shape=(6, 3)) - - shape = model(sparse_input).shape - self.assertEqual((6, 2), self._normalize_shape(shape)) - - shape = model.predict(sparse_input, steps=1).shape - self.assertEqual((6, 2), self._normalize_shape(shape)) - - def test_ragged_tensor_model_predict(self): - # Create a model that accepts a sparse input and runs a "Dense" layer on it. - model_input = input_layer.Input(shape=(None,), ragged=True) - self.assertEqual([None, None], model_input.shape.as_list()) - - layers = [Embedding(input_dim=7, output_dim=5)] - model = get_model_from_layers_with_input(layers, model_input=model_input) - - ragged_input = tf.ragged.constant([ - [1, 2, 3, 4, 5], - [2, 4], - ]) - - shape = model(ragged_input).shape - self.assertEqual((2, None, 5), self._normalize_shape(shape)) - - shape = model.predict(ragged_input, steps=1).shape - self.assertEqual((2, None, 5), self._normalize_shape(shape)) + def _normalize_shape(self, shape): + if not isinstance(shape, tuple): + shape = tuple(shape.as_list()) + return shape + + def test_sparse_tensor_model_predict(self): + # Create a model that accepts a sparse input and runs a "Dense" layer on + # it. + model_input = input_layer.Input( + shape=(3,), sparse=True, dtype=tf.float32 + ) + + self.assertEqual([None, 3], model_input.shape.as_list()) + + layers = [Dense(2)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + + sparse_input = tf.SparseTensor( + # A two-row matrix + indices=[(0, 0), (0, 1), (0, 2), (5, 0), (5, 1), (5, 2)], + values=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + dense_shape=(6, 3), + ) + + shape = model(sparse_input).shape + self.assertEqual((6, 2), self._normalize_shape(shape)) + + shape = model.predict(sparse_input, steps=1).shape + self.assertEqual((6, 2), self._normalize_shape(shape)) + + def test_ragged_tensor_model_predict(self): + # Create a model that accepts a sparse input and runs a "Dense" layer on + # it. + model_input = input_layer.Input(shape=(None,), ragged=True) + self.assertEqual([None, None], model_input.shape.as_list()) + + layers = [Embedding(input_dim=7, output_dim=5)] + model = get_model_from_layers_with_input( + layers, model_input=model_input + ) + + ragged_input = tf.ragged.constant( + [ + [1, 2, 3, 4, 5], + [2, 4], + ] + ) + + shape = model(ragged_input).shape + self.assertEqual((2, None, 5), self._normalize_shape(shape)) + + shape = model.predict(ragged_input, steps=1).shape + self.assertEqual((2, None, 5), self._normalize_shape(shape)) if __name__ == "__main__": - tf.test.main() + tf.test.main() diff --git a/keras/utils/control_flow_util.py b/keras/utils/control_flow_util.py index 1d43c1221cbe..d895e93da68e 100644 --- a/keras/utils/control_flow_util.py +++ b/keras/utils/control_flow_util.py @@ -21,112 +21,118 @@ def InXlaContext(graph): - ctxt = graph._get_control_flow_context() # pylint: disable=protected-access - return GetContainingXLAContext(ctxt) is not None + ctxt = graph._get_control_flow_context() + return GetContainingXLAContext(ctxt) is not None def GraphOrParentsInXlaContext(graph): - while True: - if InXlaContext(graph): return True - try: - graph = graph.outer_graph - except AttributeError: - return False + while True: + if InXlaContext(graph): + return True + try: + graph = graph.outer_graph + except AttributeError: + return False def IsInWhileLoop(op): - ctxt = op._get_control_flow_context() # pylint: disable=protected-access - return GetContainingWhileContext(ctxt) is not None + ctxt = op._get_control_flow_context() + return GetContainingWhileContext(ctxt) is not None def GetContainingWhileContext(ctxt, stop_ctxt=None): - """Returns the first ancestor WhileContext of `ctxt`. - - Returns `ctxt` if `ctxt` is a WhileContext, or None if `ctxt` is not in a - while loop. - - Args: - ctxt: ControlFlowContext - stop_ctxt: ControlFlowContext, optional. If provided, the search will end - if it sees stop_ctxt. - - Returns: - `ctxt` if `ctxt` is a WhileContext, the most nested WhileContext containing - `ctxt`, or None if `ctxt` is not in a while loop. If `stop_ctxt` is not - `None`, this returns `ctxt` if it matches `stop_ctxt` in its traversal. - """ - while ctxt: - if ctxt.IsWhileContext() or ctxt == stop_ctxt: return ctxt - ctxt = ctxt.outer_context - return None + """Returns the first ancestor WhileContext of `ctxt`. + + Returns `ctxt` if `ctxt` is a WhileContext, or None if `ctxt` is not in a + while loop. + + Args: + ctxt: ControlFlowContext + stop_ctxt: ControlFlowContext, optional. If provided, the search will end + if it sees stop_ctxt. + + Returns: + `ctxt` if `ctxt` is a WhileContext, the most nested WhileContext + containing `ctxt`, or None if `ctxt` is not in a while loop. If + `stop_ctxt` is not `None`, this returns `ctxt` if it matches `stop_ctxt` + in its traversal. + """ + while ctxt: + if ctxt.IsWhileContext() or ctxt == stop_ctxt: + return ctxt + ctxt = ctxt.outer_context + return None def GetContainingXLAContext(ctxt): - """Returns the first ancestor XLAContext of `ctxt`. - - Returns `ctxt` if `ctxt` is a XLAContext, or None if `ctxt` is not in a - while loop. - - Args: - ctxt: ControlFlowContext - - Returns: - `ctxt` if `ctxt` is a XLAContext, the most nested XLAContext containing - `ctxt`, or None if `ctxt` is not in a while loop. - """ - while ctxt: - if ctxt.IsXLAContext(): return ctxt - ctxt = ctxt.outer_context - return None - - -def smart_cond(pred, true_fn=None, false_fn=None, name=None): # pylint: disable=invalid-name - """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. - - If `pred` is a bool or has a constant value, we return either `true_fn()` - or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. - - Args: - pred: A scalar determining whether to return the result of `true_fn` or - `false_fn`. - true_fn: The callable to be performed if pred is true. - false_fn: The callable to be performed if pred is false. - name: Optional name prefix when using `tf.cond`. - - Returns: - Tensors returned by the call to either `true_fn` or `false_fn`. - - Raises: - TypeError: If `true_fn` or `false_fn` is not callable. - """ - if isinstance(pred, tf.Variable): - return tf.cond( - pred, true_fn=true_fn, false_fn=false_fn, name=name) - return tf.__internal__.smart_cond.smart_cond( - pred, true_fn=true_fn, false_fn=false_fn, name=name) - - -def constant_value(pred): # pylint: disable=invalid-name - """Return the bool value for `pred`, or None if `pred` had a dynamic value. - - Args: - pred: A scalar, either a Python bool or a TensorFlow boolean variable - or tensor, or the Python integer 1 or 0. - - Returns: - True or False if `pred` has a constant boolean value, None otherwise. - - Raises: - TypeError: If `pred` is not a Variable, Tensor or bool, or Python - integer 1 or 0. - """ - if isinstance(pred, tf.Tensor): - return tf.get_static_value(pred) - if pred in {0, 1}: # Accept 1/0 as valid boolean values - return bool(pred) - if isinstance(pred, bool): - return pred - if isinstance(pred, tf.Variable): + """Returns the first ancestor XLAContext of `ctxt`. + + Returns `ctxt` if `ctxt` is a XLAContext, or None if `ctxt` is not in a + while loop. + + Args: + ctxt: ControlFlowContext + + Returns: + `ctxt` if `ctxt` is a XLAContext, the most nested XLAContext containing + `ctxt`, or None if `ctxt` is not in a while loop. + """ + while ctxt: + if ctxt.IsXLAContext(): + return ctxt + ctxt = ctxt.outer_context return None - raise TypeError("`pred` must be a Tensor, or a Python bool, or 1 or 0. " - f"Received: {type(pred)}") + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Args: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if isinstance(pred, tf.Variable): + return tf.cond(pred, true_fn=true_fn, false_fn=false_fn, name=name) + return tf.__internal__.smart_cond.smart_cond( + pred, true_fn=true_fn, false_fn=false_fn, name=name + ) + + +def constant_value(pred): + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Args: + pred: A scalar, either a Python bool or a TensorFlow boolean variable + or tensor, or the Python integer 1 or 0. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Variable, Tensor or bool, or Python + integer 1 or 0. + """ + if isinstance(pred, tf.Tensor): + return tf.get_static_value(pred) + if pred in {0, 1}: # Accept 1/0 as valid boolean values + return bool(pred) + if isinstance(pred, bool): + return pred + if isinstance(pred, tf.Variable): + return None + raise TypeError( + "`pred` must be a Tensor, or a Python bool, or 1 or 0. " + f"Received: {type(pred)}" + ) diff --git a/keras/utils/conv_utils.py b/keras/utils/conv_utils.py index 5940653999e0..930bbaf9fef9 100644 --- a/keras/utils/conv_utils.py +++ b/keras/utils/conv_utils.py @@ -14,517 +14,568 @@ # ============================================================================== """Utilities used by convolution layers.""" -import tensorflow.compat.v2 as tf - import itertools import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend def convert_data_format(data_format, ndim): - if data_format == 'channels_last': - if ndim == 3: - return 'NWC' - elif ndim == 4: - return 'NHWC' - elif ndim == 5: - return 'NDHWC' - else: - raise ValueError( - f'Input rank not supported: {ndim}. Expected values are [3, 4, 5]') - elif data_format == 'channels_first': - if ndim == 3: - return 'NCW' - elif ndim == 4: - return 'NCHW' - elif ndim == 5: - return 'NCDHW' + if data_format == "channels_last": + if ndim == 3: + return "NWC" + elif ndim == 4: + return "NHWC" + elif ndim == 5: + return "NDHWC" + else: + raise ValueError( + f"Input rank not supported: {ndim}. " + "Expected values are [3, 4, 5]" + ) + elif data_format == "channels_first": + if ndim == 3: + return "NCW" + elif ndim == 4: + return "NCHW" + elif ndim == 5: + return "NCDHW" + else: + raise ValueError( + f"Input rank not supported: {ndim}. " + "Expected values are [3, 4, 5]" + ) else: - raise ValueError( - f'Input rank not supported: {ndim}. Expected values are [3, 4, 5]') - else: - raise ValueError( - f'Invalid data_format: {data_format}. ' - 'Expected values are ["channels_first", "channels_last"]') + raise ValueError( + f"Invalid data_format: {data_format}. " + 'Expected values are ["channels_first", "channels_last"]' + ) def normalize_tuple(value, n, name, allow_zero=False): - """Transforms non-negative/positive integer/integers into an integer tuple. - - Args: - value: The value to validate and convert. Could an int, or any iterable of - ints. - n: The size of the tuple to be returned. - name: The name of the argument being validated, e.g. "strides" or - "kernel_size". This is only used to format error messages. - allow_zero: Default to False. A ValueError will raised if zero is received - and this param is False. - - Returns: - A tuple of n integers. - - Raises: - ValueError: If something else than an int/long or iterable thereof or a - negative value is - passed. - """ - error_msg = (f'The `{name}` argument must be a tuple of {n} ' - f'integers. Received: {value}') - - if isinstance(value, int): - value_tuple = (value,) * n - else: - try: - value_tuple = tuple(value) - except TypeError: - raise ValueError(error_msg) - if len(value_tuple) != n: - raise ValueError(error_msg) - for single_value in value_tuple: - try: - int(single_value) - except (ValueError, TypeError): - error_msg += (f'including element {single_value} of ' - f'type {type(single_value)}') + """Transforms non-negative/positive integer/integers into an integer tuple. + + Args: + value: The value to validate and convert. Could an int, or any iterable of + ints. + n: The size of the tuple to be returned. + name: The name of the argument being validated, e.g. "strides" or + "kernel_size". This is only used to format error messages. + allow_zero: A ValueError will be raised if zero is received + and this param is False. Defaults to `False`. + + Returns: + A tuple of n integers. + + Raises: + ValueError: If something else than an int/long or iterable thereof or a + negative value is + passed. + """ + error_msg = ( + f"The `{name}` argument must be a tuple of {n} " + f"integers. Received: {value}" + ) + + if isinstance(value, int): + value_tuple = (value,) * n + else: + try: + value_tuple = tuple(value) + except TypeError: + raise ValueError(error_msg) + if len(value_tuple) != n: + raise ValueError(error_msg) + for single_value in value_tuple: + try: + int(single_value) + except (ValueError, TypeError): + error_msg += ( + f"including element {single_value} of " + f"type {type(single_value)}" + ) + raise ValueError(error_msg) + + if allow_zero: + unqualified_values = {v for v in value_tuple if v < 0} + req_msg = ">= 0" + else: + unqualified_values = {v for v in value_tuple if v <= 0} + req_msg = "> 0" + + if unqualified_values: + error_msg += ( + f" including {unqualified_values}" + f" that does not satisfy the requirement `{req_msg}`." + ) raise ValueError(error_msg) - if allow_zero: - unqualified_values = {v for v in value_tuple if v < 0} - req_msg = '>= 0' - else: - unqualified_values = {v for v in value_tuple if v <= 0} - req_msg = '> 0' - - if unqualified_values: - error_msg += (f' including {unqualified_values}' - f' that does not satisfy the requirement `{req_msg}`.') - raise ValueError(error_msg) - - return value_tuple + return value_tuple def conv_output_length(input_length, filter_size, padding, stride, dilation=1): - """Determines output length of a convolution given input length. - - Args: - input_length: integer. - filter_size: integer. - padding: one of "same", "valid", "full", "causal" - stride: integer. - dilation: dilation rate, integer. - - Returns: - The output length (integer). - """ - if input_length is None: - return None - assert padding in {'same', 'valid', 'full', 'causal'} - dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) - if padding in ['same', 'causal']: - output_length = input_length - elif padding == 'valid': - output_length = input_length - dilated_filter_size + 1 - elif padding == 'full': - output_length = input_length + dilated_filter_size - 1 - return (output_length + stride - 1) // stride + """Determines output length of a convolution given input length. + + Args: + input_length: integer. + filter_size: integer. + padding: one of "same", "valid", "full", "causal" + stride: integer. + dilation: dilation rate, integer. + + Returns: + The output length (integer). + """ + if input_length is None: + return None + assert padding in {"same", "valid", "full", "causal"} + dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) + if padding in ["same", "causal"]: + output_length = input_length + elif padding == "valid": + output_length = input_length - dilated_filter_size + 1 + elif padding == "full": + output_length = input_length + dilated_filter_size - 1 + return (output_length + stride - 1) // stride def conv_input_length(output_length, filter_size, padding, stride): - """Determines input length of a convolution given output length. - - Args: - output_length: integer. - filter_size: integer. - padding: one of "same", "valid", "full". - stride: integer. - - Returns: - The input length (integer). - """ - if output_length is None: - return None - assert padding in {'same', 'valid', 'full'} - if padding == 'same': - pad = filter_size // 2 - elif padding == 'valid': - pad = 0 - elif padding == 'full': - pad = filter_size - 1 - return (output_length - 1) * stride - 2 * pad + filter_size - - -def deconv_output_length(input_length, - filter_size, - padding, - output_padding=None, - stride=0, - dilation=1): - """Determines output length of a transposed convolution given input length. - - Args: - input_length: Integer. - filter_size: Integer. - padding: one of `"same"`, `"valid"`, `"full"`. - output_padding: Integer, amount of padding along the output dimension. Can - be set to `None` in which case the output length is inferred. - stride: Integer. - dilation: Integer. - - Returns: - The output length (integer). - """ - assert padding in {'same', 'valid', 'full'} - if input_length is None: - return None - - # Get the dilated kernel size - filter_size = filter_size + (filter_size - 1) * (dilation - 1) - - # Infer length if output padding is None, else compute the exact length - if output_padding is None: - if padding == 'valid': - length = input_length * stride + max(filter_size - stride, 0) - elif padding == 'full': - length = input_length * stride - (stride + filter_size - 2) - elif padding == 'same': - length = input_length * stride - - else: - if padding == 'same': - pad = filter_size // 2 - elif padding == 'valid': - pad = 0 - elif padding == 'full': - pad = filter_size - 1 - - length = ((input_length - 1) * stride + filter_size - 2 * pad + - output_padding) - return length + """Determines input length of a convolution given output length. + + Args: + output_length: integer. + filter_size: integer. + padding: one of "same", "valid", "full". + stride: integer. + + Returns: + The input length (integer). + """ + if output_length is None: + return None + assert padding in {"same", "valid", "full"} + if padding == "same": + pad = filter_size // 2 + elif padding == "valid": + pad = 0 + elif padding == "full": + pad = filter_size - 1 + return (output_length - 1) * stride - 2 * pad + filter_size + + +def deconv_output_length( + input_length, + filter_size, + padding, + output_padding=None, + stride=0, + dilation=1, +): + """Determines output length of a transposed convolution given input length. + + Args: + input_length: Integer. + filter_size: Integer. + padding: one of `"same"`, `"valid"`, `"full"`. + output_padding: Integer, amount of padding along the output dimension. + Can be set to `None` in which case the output length is inferred. + stride: Integer. + dilation: Integer. + + Returns: + The output length (integer). + """ + assert padding in {"same", "valid", "full"} + if input_length is None: + return None + + # Get the dilated kernel size + filter_size = filter_size + (filter_size - 1) * (dilation - 1) + + # Infer length if output padding is None, else compute the exact length + if output_padding is None: + if padding == "valid": + length = input_length * stride + max(filter_size - stride, 0) + elif padding == "full": + length = input_length * stride - (stride + filter_size - 2) + elif padding == "same": + length = input_length * stride + + else: + if padding == "same": + pad = filter_size // 2 + elif padding == "valid": + pad = 0 + elif padding == "full": + pad = filter_size - 1 + + length = ( + (input_length - 1) * stride + filter_size - 2 * pad + output_padding + ) + return length def normalize_data_format(value): - if value is None: - value = backend.image_data_format() - data_format = value.lower() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('The `data_format` argument must be one of ' - f'"channels_first", "channels_last". Received: {value}') - return data_format + if value is None: + value = backend.image_data_format() + data_format = value.lower() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError( + "The `data_format` argument must be one of " + f'"channels_first", "channels_last". Received: {value}' + ) + return data_format def normalize_padding(value): - if isinstance(value, (list, tuple)): - return value - padding = value.lower() - if padding not in {'valid', 'same', 'causal'}: - raise ValueError('The `padding` argument must be a list/tuple or one of ' - '"valid", "same" (or "causal", only for `Conv1D). ' - f'Received: {padding}') - return padding + if isinstance(value, (list, tuple)): + return value + padding = value.lower() + if padding not in {"valid", "same", "causal"}: + raise ValueError( + "The `padding` argument must be a list/tuple or one of " + '"valid", "same" (or "causal", only for `Conv1D). ' + f"Received: {padding}" + ) + return padding def conv_kernel_mask(input_shape, kernel_shape, strides, padding): - """Compute a mask representing the connectivity of a convolution operation. - - Assume a convolution with given parameters is applied to an input having N - spatial dimensions with `input_shape = (d_in1, ..., d_inN)` to produce an - output with shape `(d_out1, ..., d_outN)`. This method returns a boolean array - of shape `(d_in1, ..., d_inN, d_out1, ..., d_outN)` with `True` entries - indicating pairs of input and output locations that are connected by a weight. - - Example: - - >>> input_shape = (4,) - >>> kernel_shape = (2,) - >>> strides = (1,) - >>> padding = "valid" - >>> conv_kernel_mask(input_shape, kernel_shape, strides, padding) - array([[ True, False, False], - [ True, True, False], - [False, True, True], - [False, False, True]]) - - where rows and columns correspond to inputs and outputs respectively. - - - Args: - input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the - input. - kernel_shape: tuple of size N, spatial shape of the convolutional kernel / - receptive field. - strides: tuple of size N, strides along each spatial dimension. - padding: type of padding, string `"same"` or `"valid"`. - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - - Returns: - A boolean 2N-D `np.ndarray` of shape - `(d_in1, ..., d_inN, d_out1, ..., d_outN)`, where `(d_out1, ..., d_outN)` - is the spatial shape of the output. `True` entries in the mask represent - pairs of input-output locations that are connected by a weight. - - Raises: - ValueError: if `input_shape`, `kernel_shape` and `strides` don't have the - same number of dimensions. - NotImplementedError: if `padding` is not in {`"same"`, `"valid"`}. - """ - if padding not in {'same', 'valid'}: - raise NotImplementedError(f'Padding type {padding} not supported. ' - 'Only "valid" and "same" are implemented.') - - in_dims = len(input_shape) - if isinstance(kernel_shape, int): - kernel_shape = (kernel_shape,) * in_dims - if isinstance(strides, int): - strides = (strides,) * in_dims - - kernel_dims = len(kernel_shape) - stride_dims = len(strides) - if kernel_dims != in_dims or stride_dims != in_dims: - raise ValueError('Number of strides, input and kernel dimensions must all ' - f'match. Received: stride_dims={stride_dims}, ' - f'in_dims={in_dims}, kernel_dims={kernel_dims}') - - output_shape = conv_output_shape(input_shape, kernel_shape, strides, padding) - - mask_shape = input_shape + output_shape - mask = np.zeros(mask_shape, np.bool) - - output_axes_ticks = [range(dim) for dim in output_shape] - for output_position in itertools.product(*output_axes_ticks): - input_axes_ticks = conv_connected_inputs(input_shape, kernel_shape, - output_position, strides, padding) - for input_position in itertools.product(*input_axes_ticks): - mask[input_position + output_position] = True - - return mask - - -def conv_kernel_idxs(input_shape, kernel_shape, strides, padding, filters_in, - filters_out, data_format): - """Yields output-input tuples of indices in a CNN layer. - - The generator iterates over all `(output_idx, input_idx)` tuples, where + """Compute a mask representing the connectivity of a convolution operation. + + Assume a convolution with given parameters is applied to an input having N + spatial dimensions with `input_shape = (d_in1, ..., d_inN)` to produce an + output with shape `(d_out1, ..., d_outN)`. This method returns a boolean + array of shape `(d_in1, ..., d_inN, d_out1, ..., d_outN)` with `True` + entries indicating pairs of input and output locations that are connected by + a weight. + + Example: + + >>> input_shape = (4,) + >>> kernel_shape = (2,) + >>> strides = (1,) + >>> padding = "valid" + >>> conv_kernel_mask(input_shape, kernel_shape, strides, padding) + array([[ True, False, False], + [ True, True, False], + [False, True, True], + [False, False, True]]) + + where rows and columns correspond to inputs and outputs respectively. + + + Args: + input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the + input. + kernel_shape: tuple of size N, spatial shape of the convolutional kernel / + receptive field. + strides: tuple of size N, strides along each spatial dimension. + padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + + Returns: + A boolean 2N-D `np.ndarray` of shape + `(d_in1, ..., d_inN, d_out1, ..., d_outN)`, where `(d_out1, ..., d_outN)` + is the spatial shape of the output. `True` entries in the mask represent + pairs of input-output locations that are connected by a weight. + + Raises: + ValueError: if `input_shape`, `kernel_shape` and `strides` don't have the + same number of dimensions. + NotImplementedError: if `padding` is not in {`"same"`, `"valid"`}. + """ + if padding not in {"same", "valid"}: + raise NotImplementedError( + f"Padding type {padding} not supported. " + 'Only "valid" and "same" are implemented.' + ) + + in_dims = len(input_shape) + if isinstance(kernel_shape, int): + kernel_shape = (kernel_shape,) * in_dims + if isinstance(strides, int): + strides = (strides,) * in_dims + + kernel_dims = len(kernel_shape) + stride_dims = len(strides) + if kernel_dims != in_dims or stride_dims != in_dims: + raise ValueError( + "Number of strides, input and kernel dimensions must all " + f"match. Received: stride_dims={stride_dims}, " + f"in_dims={in_dims}, kernel_dims={kernel_dims}" + ) + + output_shape = conv_output_shape( + input_shape, kernel_shape, strides, padding + ) + + mask_shape = input_shape + output_shape + mask = np.zeros(mask_shape, bool) + + output_axes_ticks = [range(dim) for dim in output_shape] + for output_position in itertools.product(*output_axes_ticks): + input_axes_ticks = conv_connected_inputs( + input_shape, kernel_shape, output_position, strides, padding + ) + for input_position in itertools.product(*input_axes_ticks): + mask[input_position + output_position] = True + + return mask + + +def conv_kernel_idxs( + input_shape, + kernel_shape, + strides, + padding, + filters_in, + filters_out, + data_format, +): + """Yields output-input tuples of indices in a CNN layer. + + The generator iterates over all `(output_idx, input_idx)` tuples, where `output_idx` is an integer index in a flattened tensor representing a single output image of a convolutional layer that is connected (via the layer weights) to the respective single input image at `input_idx` - Example: - - >>> input_shape = (2, 2) - >>> kernel_shape = (2, 1) - >>> strides = (1, 1) - >>> padding = "valid" - >>> filters_in = 1 - >>> filters_out = 1 - >>> data_format = "channels_last" - >>> list(conv_kernel_idxs(input_shape, kernel_shape, strides, padding, - ... filters_in, filters_out, data_format)) - [(0, 0), (0, 2), (1, 1), (1, 3)] - - Args: - input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the - input. - kernel_shape: tuple of size N, spatial shape of the convolutional kernel / - receptive field. - strides: tuple of size N, strides along each spatial dimension. - padding: type of padding, string `"same"` or `"valid"`. - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - filters_in: `int`, number if filters in the input to the layer. - filters_out: `int', number if filters in the output of the layer. - data_format: string, "channels_first" or "channels_last". - - Yields: - The next tuple `(output_idx, input_idx)`, where - `output_idx` is an integer index in a flattened tensor representing a single - output image of a convolutional layer that is connected (via the layer - weights) to the respective single input image at `input_idx`. - - Raises: - ValueError: if `data_format` is neither - `"channels_last"` nor `"channels_first"`, or if number of strides, input, - and kernel number of dimensions do not match. - - NotImplementedError: if `padding` is neither `"same"` nor `"valid"`. - """ - if padding not in ('same', 'valid'): - raise NotImplementedError(f'Padding type {padding} not supported. ' - 'Only "valid" and "same" are implemented.') - - in_dims = len(input_shape) - if isinstance(kernel_shape, int): - kernel_shape = (kernel_shape,) * in_dims - if isinstance(strides, int): - strides = (strides,) * in_dims - - kernel_dims = len(kernel_shape) - stride_dims = len(strides) - if kernel_dims != in_dims or stride_dims != in_dims: - raise ValueError('Number of strides, input and kernel dimensions must all ' - f'match. Received: stride_dims={stride_dims}, ' - f'in_dims={in_dims}, kernel_dims={kernel_dims}') - - output_shape = conv_output_shape(input_shape, kernel_shape, strides, padding) - output_axes_ticks = [range(dim) for dim in output_shape] - - if data_format == 'channels_first': - concat_idxs = lambda spatial_idx, filter_idx: (filter_idx,) + spatial_idx - elif data_format == 'channels_last': - concat_idxs = lambda spatial_idx, filter_idx: spatial_idx + (filter_idx,) - else: - raise ValueError( - f'Data format `{data_format}` not recognized.' - '`data_format` must be "channels_first" or "channels_last".') - - for output_position in itertools.product(*output_axes_ticks): - input_axes_ticks = conv_connected_inputs(input_shape, kernel_shape, - output_position, strides, padding) - for input_position in itertools.product(*input_axes_ticks): - for f_in in range(filters_in): - for f_out in range(filters_out): - out_idx = np.ravel_multi_index( - multi_index=concat_idxs(output_position, f_out), - dims=concat_idxs(output_shape, filters_out)) - in_idx = np.ravel_multi_index( - multi_index=concat_idxs(input_position, f_in), - dims=concat_idxs(input_shape, filters_in)) - yield (out_idx, in_idx) - - -def conv_connected_inputs(input_shape, kernel_shape, output_position, strides, - padding): - """Return locations of the input connected to an output position. - - Assume a convolution with given parameters is applied to an input having N - spatial dimensions with `input_shape = (d_in1, ..., d_inN)`. This method - returns N ranges specifying the input region that was convolved with the - kernel to produce the output at position - `output_position = (p_out1, ..., p_outN)`. - - Example: - - >>> input_shape = (4, 4) - >>> kernel_shape = (2, 1) - >>> output_position = (1, 1) - >>> strides = (1, 1) - >>> padding = "valid" - >>> conv_connected_inputs(input_shape, kernel_shape, output_position, - ... strides, padding) - [range(1, 3), range(1, 2)] - - Args: - input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the - input. - kernel_shape: tuple of size N, spatial shape of the convolutional kernel / - receptive field. - output_position: tuple of size N: `(p_out1, ..., p_outN)`, a single position - in the output of the convolution. - strides: tuple of size N, strides along each spatial dimension. - padding: type of padding, string `"same"` or `"valid"`. - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - - Returns: - N ranges `[[p_in_left1, ..., p_in_right1], ..., - [p_in_leftN, ..., p_in_rightN]]` specifying the region in the - input connected to output_position. - """ - ranges = [] - - ndims = len(input_shape) - for d in range(ndims): - left_shift = int(kernel_shape[d] / 2) - right_shift = kernel_shape[d] - left_shift - - center = output_position[d] * strides[d] - - if padding == 'valid': - center += left_shift - - start = max(0, center - left_shift) - end = min(input_shape[d], center + right_shift) - - ranges.append(range(start, end)) - - return ranges + Example: + + >>> input_shape = (2, 2) + >>> kernel_shape = (2, 1) + >>> strides = (1, 1) + >>> padding = "valid" + >>> filters_in = 1 + >>> filters_out = 1 + >>> data_format = "channels_last" + >>> list(conv_kernel_idxs(input_shape, kernel_shape, strides, padding, + ... filters_in, filters_out, data_format)) + [(0, 0), (0, 2), (1, 1), (1, 3)] + + Args: + input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the + input. + kernel_shape: tuple of size N, spatial shape of the convolutional kernel / + receptive field. + strides: tuple of size N, strides along each spatial dimension. + padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + filters_in: `int`, number if filters in the input to the layer. + filters_out: `int', number if filters in the output of the layer. + data_format: string, "channels_first" or "channels_last". + + Yields: + The next tuple `(output_idx, input_idx)`, where `output_idx` is an integer + index in a flattened tensor representing a single output image of a + convolutional layer that is connected (via the layer weights) to the + respective single input image at `input_idx`. + + Raises: + ValueError: if `data_format` is neither `"channels_last"` nor + `"channels_first"`, or if number of strides, input, and kernel number + of dimensions do not match. + + NotImplementedError: if `padding` is neither `"same"` nor `"valid"`. + """ + if padding not in ("same", "valid"): + raise NotImplementedError( + f"Padding type {padding} not supported. " + 'Only "valid" and "same" are implemented.' + ) + + in_dims = len(input_shape) + if isinstance(kernel_shape, int): + kernel_shape = (kernel_shape,) * in_dims + if isinstance(strides, int): + strides = (strides,) * in_dims + + kernel_dims = len(kernel_shape) + stride_dims = len(strides) + if kernel_dims != in_dims or stride_dims != in_dims: + raise ValueError( + "Number of strides, input and kernel dimensions must all " + f"match. Received: stride_dims={stride_dims}, " + f"in_dims={in_dims}, kernel_dims={kernel_dims}" + ) + + output_shape = conv_output_shape( + input_shape, kernel_shape, strides, padding + ) + output_axes_ticks = [range(dim) for dim in output_shape] + + if data_format == "channels_first": + concat_idxs = ( + lambda spatial_idx, filter_idx: (filter_idx,) + spatial_idx + ) + elif data_format == "channels_last": + concat_idxs = lambda spatial_idx, filter_idx: spatial_idx + ( + filter_idx, + ) + else: + raise ValueError( + f"Data format `{data_format}` not recognized." + '`data_format` must be "channels_first" or "channels_last".' + ) + + for output_position in itertools.product(*output_axes_ticks): + input_axes_ticks = conv_connected_inputs( + input_shape, kernel_shape, output_position, strides, padding + ) + for input_position in itertools.product(*input_axes_ticks): + for f_in in range(filters_in): + for f_out in range(filters_out): + out_idx = np.ravel_multi_index( + multi_index=concat_idxs(output_position, f_out), + dims=concat_idxs(output_shape, filters_out), + ) + in_idx = np.ravel_multi_index( + multi_index=concat_idxs(input_position, f_in), + dims=concat_idxs(input_shape, filters_in), + ) + yield (out_idx, in_idx) + + +def conv_connected_inputs( + input_shape, kernel_shape, output_position, strides, padding +): + """Return locations of the input connected to an output position. + + Assume a convolution with given parameters is applied to an input having N + spatial dimensions with `input_shape = (d_in1, ..., d_inN)`. This method + returns N ranges specifying the input region that was convolved with the + kernel to produce the output at position + `output_position = (p_out1, ..., p_outN)`. + + Example: + + >>> input_shape = (4, 4) + >>> kernel_shape = (2, 1) + >>> output_position = (1, 1) + >>> strides = (1, 1) + >>> padding = "valid" + >>> conv_connected_inputs(input_shape, kernel_shape, output_position, + ... strides, padding) + [range(1, 3), range(1, 2)] + + Args: + input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the + input. + kernel_shape: tuple of size N, spatial shape of the convolutional kernel / + receptive field. + output_position: tuple of size N: `(p_out1, ..., p_outN)`, a single + position in the output of the convolution. + strides: tuple of size N, strides along each spatial dimension. + padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + + Returns: + N ranges `[[p_in_left1, ..., p_in_right1], ..., + [p_in_leftN, ..., p_in_rightN]]` specifying the region in the + input connected to output_position. + """ + ranges = [] + + ndims = len(input_shape) + for d in range(ndims): + left_shift = int(kernel_shape[d] / 2) + right_shift = kernel_shape[d] - left_shift + + center = output_position[d] * strides[d] + + if padding == "valid": + center += left_shift + + start = max(0, center - left_shift) + end = min(input_shape[d], center + right_shift) + + ranges.append(range(start, end)) + + return ranges def conv_output_shape(input_shape, kernel_shape, strides, padding): - """Return the output shape of an N-D convolution. - - Forces dimensions where input is empty (size 0) to remain empty. - - Args: - input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the - input. - kernel_shape: tuple of size N, spatial shape of the convolutional kernel / - receptive field. - strides: tuple of size N, strides along each spatial dimension. - padding: type of padding, string `"same"` or `"valid"`. - `"valid"` means no padding. `"same"` results in padding evenly to - the left/right or up/down of the input such that output has the same - height/width dimension as the input. - - Returns: - tuple of size N: `(d_out1, ..., d_outN)`, spatial shape of the output. - """ - dims = range(len(kernel_shape)) - output_shape = [ - conv_output_length(input_shape[d], kernel_shape[d], padding, strides[d]) - for d in dims - ] - output_shape = tuple( - [0 if input_shape[d] == 0 else output_shape[d] for d in dims]) - return output_shape + """Return the output shape of an N-D convolution. + + Forces dimensions where input is empty (size 0) to remain empty. + + Args: + input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the + input. + kernel_shape: tuple of size N, spatial shape of the convolutional kernel / + receptive field. + strides: tuple of size N, strides along each spatial dimension. + padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + + Returns: + tuple of size N: `(d_out1, ..., d_outN)`, spatial shape of the output. + """ + dims = range(len(kernel_shape)) + output_shape = [ + conv_output_length(input_shape[d], kernel_shape[d], padding, strides[d]) + for d in dims + ] + output_shape = tuple( + [0 if input_shape[d] == 0 else output_shape[d] for d in dims] + ) + return output_shape def squeeze_batch_dims(inp, op, inner_rank): - """Returns `unsqueeze_batch(op(squeeze_batch(inp)))`. - - Where `squeeze_batch` reshapes `inp` to shape - `[prod(inp.shape[:-inner_rank])] + inp.shape[-inner_rank:]` - and `unsqueeze_batch` does the reverse reshape but on the output. - - Args: - inp: A tensor with dims `batch_shape + inner_shape` where `inner_shape` - is length `inner_rank`. - op: A callable that takes a single input tensor and returns a single. - output tensor. - inner_rank: A python integer. - - Returns: - `unsqueeze_batch_op(squeeze_batch(inp))`. - """ - with tf.name_scope('squeeze_batch_dims'): - shape = inp.shape - - inner_shape = shape[-inner_rank:] - if not inner_shape.is_fully_defined(): - inner_shape = tf.shape(inp)[-inner_rank:] - - batch_shape = shape[:-inner_rank] - if not batch_shape.is_fully_defined(): - batch_shape = tf.shape(inp)[:-inner_rank] - - if isinstance(inner_shape, tf.TensorShape): - inp_reshaped = tf.reshape(inp, [-1] + inner_shape.as_list()) - else: - inp_reshaped = tf.reshape( - inp, tf.concat(([-1], inner_shape), axis=-1)) - - out_reshaped = op(inp_reshaped) - - out_inner_shape = out_reshaped.shape[-inner_rank:] - if not out_inner_shape.is_fully_defined(): - out_inner_shape = tf.shape(out_reshaped)[-inner_rank:] - - out = tf.reshape( - out_reshaped, tf.concat((batch_shape, out_inner_shape), axis=-1)) - - out.set_shape(inp.shape[:-inner_rank] + out.shape[-inner_rank:]) - return out + """Returns `unsqueeze_batch(op(squeeze_batch(inp)))`. + + Where `squeeze_batch` reshapes `inp` to shape + `[prod(inp.shape[:-inner_rank])] + inp.shape[-inner_rank:]` + and `unsqueeze_batch` does the reverse reshape but on the output. + + Args: + inp: A tensor with dims `batch_shape + inner_shape` where `inner_shape` + is length `inner_rank`. + op: A callable that takes a single input tensor and returns a single. + output tensor. + inner_rank: A python integer. + + Returns: + `unsqueeze_batch_op(squeeze_batch(inp))`. + """ + with tf.name_scope("squeeze_batch_dims"): + shape = inp.shape + + inner_shape = shape[-inner_rank:] + if not inner_shape.is_fully_defined(): + inner_shape = tf.shape(inp)[-inner_rank:] + + batch_shape = shape[:-inner_rank] + if not batch_shape.is_fully_defined(): + batch_shape = tf.shape(inp)[:-inner_rank] + + if isinstance(inner_shape, tf.TensorShape): + inp_reshaped = tf.reshape(inp, [-1] + inner_shape.as_list()) + else: + inp_reshaped = tf.reshape( + inp, tf.concat(([-1], inner_shape), axis=-1) + ) + + out_reshaped = op(inp_reshaped) + + out_inner_shape = out_reshaped.shape[-inner_rank:] + if not out_inner_shape.is_fully_defined(): + out_inner_shape = tf.shape(out_reshaped)[-inner_rank:] + + out = tf.reshape( + out_reshaped, tf.concat((batch_shape, out_inner_shape), axis=-1) + ) + + out.set_shape(inp.shape[:-inner_rank] + out.shape[-inner_rank:]) + return out diff --git a/keras/utils/conv_utils_test.py b/keras/utils/conv_utils_test.py index cc4b66eed11b..f7a11ad0842f 100644 --- a/keras/utils/conv_utils_test.py +++ b/keras/utils/conv_utils_test.py @@ -14,18 +14,17 @@ # ============================================================================== """Tests for conv_utils.""" -import tensorflow.compat.v2 as tf - import itertools -from absl.testing import parameterized import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized from keras.utils import conv_utils def _get_const_output_shape(input_shape, dim): - return tuple([min(d, dim) for d in input_shape]) + return tuple([min(d, dim) for d in input_shape]) input_shapes = [ @@ -50,316 +49,354 @@ def _get_const_output_shape(input_shape, dim): class TestBasicConvUtilsTest(tf.test.TestCase): + def test_convert_data_format(self): + self.assertEqual( + "NCDHW", conv_utils.convert_data_format("channels_first", 5) + ) + self.assertEqual( + "NCHW", conv_utils.convert_data_format("channels_first", 4) + ) + self.assertEqual( + "NCW", conv_utils.convert_data_format("channels_first", 3) + ) + self.assertEqual( + "NHWC", conv_utils.convert_data_format("channels_last", 4) + ) + self.assertEqual( + "NWC", conv_utils.convert_data_format("channels_last", 3) + ) + self.assertEqual( + "NDHWC", conv_utils.convert_data_format("channels_last", 5) + ) + + with self.assertRaises(ValueError): + conv_utils.convert_data_format("invalid", 2) + + def test_normalize_tuple(self): + self.assertEqual( + (2, 2, 2), + conv_utils.normalize_tuple(2, n=3, name="strides", allow_zero=True), + ) + self.assertEqual( + (2, 1, 2), + conv_utils.normalize_tuple( + (2, 1, 2), n=3, name="strides", allow_zero=True + ), + ) + self.assertEqual( + ( + 1, + 2, + 3, + ), + conv_utils.normalize_tuple((1, 2, 3), n=3, name="pool_size"), + ) + self.assertEqual( + (3, 3, 3), conv_utils.normalize_tuple(3, n=3, name="pool_size") + ) - def test_convert_data_format(self): - self.assertEqual('NCDHW', conv_utils.convert_data_format( - 'channels_first', 5)) - self.assertEqual('NCHW', conv_utils.convert_data_format( - 'channels_first', 4)) - self.assertEqual('NCW', conv_utils.convert_data_format('channels_first', 3)) - self.assertEqual('NHWC', conv_utils.convert_data_format('channels_last', 4)) - self.assertEqual('NWC', conv_utils.convert_data_format('channels_last', 3)) - self.assertEqual('NDHWC', conv_utils.convert_data_format( - 'channels_last', 5)) - - with self.assertRaises(ValueError): - conv_utils.convert_data_format('invalid', 2) - - def test_normalize_tuple(self): - self.assertEqual( - (2, 2, 2), - conv_utils.normalize_tuple(2, n=3, name='strides', allow_zero=True)) - self.assertEqual((2, 1, 2), - conv_utils.normalize_tuple((2, 1, 2), - n=3, - name='strides', - allow_zero=True)) - self.assertEqual(( - 1, - 2, - 3, - ), conv_utils.normalize_tuple((1, 2, 3), n=3, name='pool_size')) - self.assertEqual((3, 3, 3), - conv_utils.normalize_tuple(3, n=3, name='pool_size')) - - with self.assertRaisesRegex( - ValueError, - r'including \{-1\} that does not satisfy the requirement `> 0`'): - conv_utils.normalize_tuple((3, -1, 3), n=3, name='negative_size') - - with self.assertRaisesRegex( - ValueError, - r'The `strides` argument .* a tuple of 3 integers.* \(2, 1\)$'): - conv_utils.normalize_tuple((2, 1), n=3, name='strides', allow_zero=True) - - with self.assertRaisesRegex( - ValueError, - r'The `kernel_size` argument .* tuple of 3 integers.* None$'): - conv_utils.normalize_tuple(None, n=3, name='kernel_size') - - with self.assertRaisesRegex(ValueError, - r'including \{-4\} that does not .* `>= 0`'): - conv_utils.normalize_tuple(-4, n=3, name='strides', allow_zero=True) - - with self.assertRaisesRegex(ValueError, - r'including \{0\} that does not .* `> 0`'): - conv_utils.normalize_tuple((0, 1, 2), n=3, name='pool_size') - - def test_normalize_data_format(self): - self.assertEqual('channels_last', - conv_utils.normalize_data_format('Channels_Last')) - self.assertEqual('channels_first', - conv_utils.normalize_data_format('CHANNELS_FIRST')) - - with self.assertRaises(ValueError): - conv_utils.normalize_data_format('invalid') - - def test_normalize_padding(self): - self.assertEqual('same', conv_utils.normalize_padding('SAME')) - self.assertEqual('valid', conv_utils.normalize_padding('VALID')) - - with self.assertRaises(ValueError): - conv_utils.normalize_padding('invalid') - - def test_conv_output_length(self): - self.assertEqual(4, conv_utils.conv_output_length(4, 2, 'same', 1, 1)) - self.assertEqual(2, conv_utils.conv_output_length(4, 2, 'same', 2, 1)) - self.assertEqual(3, conv_utils.conv_output_length(4, 2, 'valid', 1, 1)) - self.assertEqual(2, conv_utils.conv_output_length(4, 2, 'valid', 2, 1)) - self.assertEqual(5, conv_utils.conv_output_length(4, 2, 'full', 1, 1)) - self.assertEqual(3, conv_utils.conv_output_length(4, 2, 'full', 2, 1)) - self.assertEqual(2, conv_utils.conv_output_length(5, 2, 'valid', 2, 2)) - - def test_conv_input_length(self): - self.assertEqual(3, conv_utils.conv_input_length(4, 2, 'same', 1)) - self.assertEqual(2, conv_utils.conv_input_length(2, 2, 'same', 2)) - self.assertEqual(4, conv_utils.conv_input_length(3, 2, 'valid', 1)) - self.assertEqual(4, conv_utils.conv_input_length(2, 2, 'valid', 2)) - self.assertEqual(3, conv_utils.conv_input_length(4, 2, 'full', 1)) - self.assertEqual(4, conv_utils.conv_input_length(3, 2, 'full', 2)) - - def test_deconv_output_length(self): - self.assertEqual(4, conv_utils.deconv_output_length(4, 2, 'same', stride=1)) - self.assertEqual(8, conv_utils.deconv_output_length(4, 2, 'same', stride=2)) - self.assertEqual(5, conv_utils.deconv_output_length( - 4, 2, 'valid', stride=1)) - self.assertEqual(8, conv_utils.deconv_output_length( - 4, 2, 'valid', stride=2)) - self.assertEqual(3, conv_utils.deconv_output_length(4, 2, 'full', stride=1)) - self.assertEqual(6, conv_utils.deconv_output_length(4, 2, 'full', stride=2)) - self.assertEqual( - 5, - conv_utils.deconv_output_length( - 4, 2, 'same', output_padding=2, stride=1)) - self.assertEqual( - 7, - conv_utils.deconv_output_length( - 4, 2, 'same', output_padding=1, stride=2)) - self.assertEqual( - 7, - conv_utils.deconv_output_length( - 4, 2, 'valid', output_padding=2, stride=1)) - self.assertEqual( - 9, - conv_utils.deconv_output_length( - 4, 2, 'valid', output_padding=1, stride=2)) - self.assertEqual( - 5, - conv_utils.deconv_output_length( - 4, 2, 'full', output_padding=2, stride=1)) - self.assertEqual( - 7, - conv_utils.deconv_output_length( - 4, 2, 'full', output_padding=1, stride=2)) - self.assertEqual( - 5, - conv_utils.deconv_output_length( - 4, 2, 'same', output_padding=1, stride=1, dilation=2)) - self.assertEqual( - 12, - conv_utils.deconv_output_length( - 4, 2, 'valid', output_padding=2, stride=2, dilation=3)) - self.assertEqual( - 6, - conv_utils.deconv_output_length( - 4, 2, 'full', output_padding=2, stride=2, dilation=3)) + with self.assertRaisesRegex( + ValueError, + r"including \{-1\} that does not satisfy the requirement `> 0`", + ): + conv_utils.normalize_tuple((3, -1, 3), n=3, name="negative_size") + + with self.assertRaisesRegex( + ValueError, + r"The `strides` argument .* a tuple of 3 integers.* \(2, 1\)$", + ): + conv_utils.normalize_tuple( + (2, 1), n=3, name="strides", allow_zero=True + ) + + with self.assertRaisesRegex( + ValueError, + r"The `kernel_size` argument .* tuple of 3 integers.* None$", + ): + conv_utils.normalize_tuple(None, n=3, name="kernel_size") + + with self.assertRaisesRegex( + ValueError, r"including \{-4\} that does not .* `>= 0`" + ): + conv_utils.normalize_tuple(-4, n=3, name="strides", allow_zero=True) + + with self.assertRaisesRegex( + ValueError, r"including \{0\} that does not .* `> 0`" + ): + conv_utils.normalize_tuple((0, 1, 2), n=3, name="pool_size") + + def test_normalize_data_format(self): + self.assertEqual( + "channels_last", conv_utils.normalize_data_format("Channels_Last") + ) + self.assertEqual( + "channels_first", conv_utils.normalize_data_format("CHANNELS_FIRST") + ) + + with self.assertRaises(ValueError): + conv_utils.normalize_data_format("invalid") + + def test_normalize_padding(self): + self.assertEqual("same", conv_utils.normalize_padding("SAME")) + self.assertEqual("valid", conv_utils.normalize_padding("VALID")) + + with self.assertRaises(ValueError): + conv_utils.normalize_padding("invalid") + + def test_conv_output_length(self): + self.assertEqual(4, conv_utils.conv_output_length(4, 2, "same", 1, 1)) + self.assertEqual(2, conv_utils.conv_output_length(4, 2, "same", 2, 1)) + self.assertEqual(3, conv_utils.conv_output_length(4, 2, "valid", 1, 1)) + self.assertEqual(2, conv_utils.conv_output_length(4, 2, "valid", 2, 1)) + self.assertEqual(5, conv_utils.conv_output_length(4, 2, "full", 1, 1)) + self.assertEqual(3, conv_utils.conv_output_length(4, 2, "full", 2, 1)) + self.assertEqual(2, conv_utils.conv_output_length(5, 2, "valid", 2, 2)) + + def test_conv_input_length(self): + self.assertEqual(3, conv_utils.conv_input_length(4, 2, "same", 1)) + self.assertEqual(2, conv_utils.conv_input_length(2, 2, "same", 2)) + self.assertEqual(4, conv_utils.conv_input_length(3, 2, "valid", 1)) + self.assertEqual(4, conv_utils.conv_input_length(2, 2, "valid", 2)) + self.assertEqual(3, conv_utils.conv_input_length(4, 2, "full", 1)) + self.assertEqual(4, conv_utils.conv_input_length(3, 2, "full", 2)) + + def test_deconv_output_length(self): + self.assertEqual( + 4, conv_utils.deconv_output_length(4, 2, "same", stride=1) + ) + self.assertEqual( + 8, conv_utils.deconv_output_length(4, 2, "same", stride=2) + ) + self.assertEqual( + 5, conv_utils.deconv_output_length(4, 2, "valid", stride=1) + ) + self.assertEqual( + 8, conv_utils.deconv_output_length(4, 2, "valid", stride=2) + ) + self.assertEqual( + 3, conv_utils.deconv_output_length(4, 2, "full", stride=1) + ) + self.assertEqual( + 6, conv_utils.deconv_output_length(4, 2, "full", stride=2) + ) + self.assertEqual( + 5, + conv_utils.deconv_output_length( + 4, 2, "same", output_padding=2, stride=1 + ), + ) + self.assertEqual( + 7, + conv_utils.deconv_output_length( + 4, 2, "same", output_padding=1, stride=2 + ), + ) + self.assertEqual( + 7, + conv_utils.deconv_output_length( + 4, 2, "valid", output_padding=2, stride=1 + ), + ) + self.assertEqual( + 9, + conv_utils.deconv_output_length( + 4, 2, "valid", output_padding=1, stride=2 + ), + ) + self.assertEqual( + 5, + conv_utils.deconv_output_length( + 4, 2, "full", output_padding=2, stride=1 + ), + ) + self.assertEqual( + 7, + conv_utils.deconv_output_length( + 4, 2, "full", output_padding=1, stride=2 + ), + ) + self.assertEqual( + 5, + conv_utils.deconv_output_length( + 4, 2, "same", output_padding=1, stride=1, dilation=2 + ), + ) + self.assertEqual( + 12, + conv_utils.deconv_output_length( + 4, 2, "valid", output_padding=2, stride=2, dilation=3 + ), + ) + self.assertEqual( + 6, + conv_utils.deconv_output_length( + 4, 2, "full", output_padding=2, stride=2, dilation=3 + ), + ) @parameterized.parameters(input_shapes) class TestConvUtils(tf.test.TestCase, parameterized.TestCase): + def test_conv_kernel_mask_fc(self, *input_shape): + padding = "valid" + kernel_shape = input_shape + ndims = len(input_shape) + strides = (1,) * ndims + output_shape = _get_const_output_shape(input_shape, dim=1) + mask = np.ones(input_shape + output_shape, bool) + self.assertAllEqual( + mask, + conv_utils.conv_kernel_mask( + input_shape, kernel_shape, strides, padding + ), + ) + + def test_conv_kernel_mask_diag(self, *input_shape): + ndims = len(input_shape) + kernel_shape = (1,) * ndims + strides = (1,) * ndims + + for padding in ["valid", "same"]: + mask = np.identity(int(np.prod(input_shape)), bool) + mask = np.reshape(mask, input_shape * 2) + self.assertAllEqual( + mask, + conv_utils.conv_kernel_mask( + input_shape, kernel_shape, strides, padding + ), + ) + + def test_conv_kernel_mask_full_stride(self, *input_shape): + padding = "valid" + ndims = len(input_shape) + kernel_shape = (1,) * ndims + strides = tuple([max(d, 1) for d in input_shape]) + output_shape = _get_const_output_shape(input_shape, dim=1) + + mask = np.zeros(input_shape + output_shape, bool) + if all(d > 0 for d in mask.shape): + mask[(0,) * len(output_shape)] = True + + self.assertAllEqual( + mask, + conv_utils.conv_kernel_mask( + input_shape, kernel_shape, strides, padding + ), + ) + + def test_conv_kernel_mask_almost_full_stride(self, *input_shape): + padding = "valid" + ndims = len(input_shape) + kernel_shape = (1,) * ndims + strides = tuple([max(d - 1, 1) for d in input_shape]) + output_shape = _get_const_output_shape(input_shape, dim=2) + + mask = np.zeros(input_shape + output_shape, bool) + if all(d > 0 for d in mask.shape): + for in_position in itertools.product( + *[[0, d - 1] for d in input_shape] + ): + out_position = tuple([min(p, 1) for p in in_position]) + mask[in_position + out_position] = True + + self.assertAllEqual( + mask, + conv_utils.conv_kernel_mask( + input_shape, kernel_shape, strides, padding + ), + ) + + def test_conv_kernel_mask_rect_kernel(self, *input_shape): + padding = "valid" + ndims = len(input_shape) + strides = (1,) * ndims + + for d in range(ndims): + kernel_shape = [1] * ndims + kernel_shape[d] = input_shape[d] + + output_shape = list(input_shape) + output_shape[d] = min(1, input_shape[d]) + + mask = np.identity(int(np.prod(input_shape)), bool) + mask = np.reshape(mask, input_shape * 2) + + for p in itertools.product( + *[range(input_shape[dim]) for dim in range(ndims)] + ): + p = list(p) + p[d] = slice(None) + mask[tuple(p * 2)] = True + + mask = np.take(mask, range(0, min(1, input_shape[d])), ndims + d) + + self.assertAllEqual( + mask, + conv_utils.conv_kernel_mask( + input_shape, kernel_shape, strides, padding + ), + ) + + def test_conv_kernel_mask_wrong_padding(self, *input_shape): + ndims = len(input_shape) + kernel_shape = (1,) * ndims + strides = (1,) * ndims + + conv_utils.conv_kernel_mask(input_shape, kernel_shape, strides, "valid") + + conv_utils.conv_kernel_mask(input_shape, kernel_shape, strides, "same") - def test_conv_kernel_mask_fc(self, *input_shape): - padding = 'valid' - kernel_shape = input_shape - ndims = len(input_shape) - strides = (1,) * ndims - output_shape = _get_const_output_shape(input_shape, dim=1) - mask = np.ones(input_shape + output_shape, np.bool) - self.assertAllEqual( - mask, - conv_utils.conv_kernel_mask( + self.assertRaises( + NotImplementedError, + conv_utils.conv_kernel_mask, input_shape, kernel_shape, strides, - padding - ) - ) - - def test_conv_kernel_mask_diag(self, *input_shape): - ndims = len(input_shape) - kernel_shape = (1,) * ndims - strides = (1,) * ndims - - for padding in ['valid', 'same']: - mask = np.identity(int(np.prod(input_shape)), np.bool) - mask = np.reshape(mask, input_shape * 2) - self.assertAllEqual( - mask, - conv_utils.conv_kernel_mask( - input_shape, - kernel_shape, - strides, - padding - ) - ) - - def test_conv_kernel_mask_full_stride(self, *input_shape): - padding = 'valid' - ndims = len(input_shape) - kernel_shape = (1,) * ndims - strides = tuple([max(d, 1) for d in input_shape]) - output_shape = _get_const_output_shape(input_shape, dim=1) - - mask = np.zeros(input_shape + output_shape, np.bool) - if all(d > 0 for d in mask.shape): # pylint: disable=not-an-iterable - mask[(0,) * len(output_shape)] = True - - self.assertAllEqual( - mask, - conv_utils.conv_kernel_mask( + "full", + ) + + def test_conv_kernel_mask_wrong_dims(self, *input_shape): + kernel_shape = 1 + strides = 1 + + conv_utils.conv_kernel_mask(input_shape, kernel_shape, strides, "valid") + + ndims = len(input_shape) + + kernel_shape = (2,) * (ndims + 1) + self.assertRaises( + ValueError, + conv_utils.conv_kernel_mask, + input_shape, + kernel_shape, + strides, + "same", + ) + + strides = (1,) * ndims + self.assertRaises( + ValueError, + conv_utils.conv_kernel_mask, input_shape, kernel_shape, strides, - padding - ) - ) - - def test_conv_kernel_mask_almost_full_stride(self, *input_shape): - padding = 'valid' - ndims = len(input_shape) - kernel_shape = (1,) * ndims - strides = tuple([max(d - 1, 1) for d in input_shape]) - output_shape = _get_const_output_shape(input_shape, dim=2) - - mask = np.zeros(input_shape + output_shape, np.bool) - if all(d > 0 for d in mask.shape): # pylint: disable=not-an-iterable - for in_position in itertools.product(*[[0, d - 1] for d in input_shape]): - out_position = tuple([min(p, 1) for p in in_position]) - mask[in_position + out_position] = True - - self.assertAllEqual( - mask, - conv_utils.conv_kernel_mask( + "valid", + ) + + kernel_shape = (1,) * ndims + strides = (2,) * (ndims - 1) + self.assertRaises( + ValueError, + conv_utils.conv_kernel_mask, input_shape, kernel_shape, strides, - padding - ) - ) - - def test_conv_kernel_mask_rect_kernel(self, *input_shape): - padding = 'valid' - ndims = len(input_shape) - strides = (1,) * ndims - - for d in range(ndims): - kernel_shape = [1] * ndims - kernel_shape[d] = input_shape[d] - - output_shape = list(input_shape) - output_shape[d] = min(1, input_shape[d]) - - mask = np.identity(int(np.prod(input_shape)), np.bool) - mask = np.reshape(mask, input_shape * 2) - - for p in itertools.product(*[range(input_shape[dim]) - for dim in range(ndims)]): - p = list(p) - p[d] = slice(None) - mask[p * 2] = True - - mask = np.take(mask, range(0, min(1, input_shape[d])), ndims + d) - - self.assertAllEqual( - mask, - conv_utils.conv_kernel_mask( - input_shape, - kernel_shape, - strides, - padding - ) - ) - - def test_conv_kernel_mask_wrong_padding(self, *input_shape): - ndims = len(input_shape) - kernel_shape = (1,) * ndims - strides = (1,) * ndims - - conv_utils.conv_kernel_mask( - input_shape, - kernel_shape, - strides, - 'valid' - ) - - conv_utils.conv_kernel_mask( - input_shape, - kernel_shape, - strides, - 'same' - ) - - self.assertRaises(NotImplementedError, - conv_utils.conv_kernel_mask, - input_shape, kernel_shape, strides, 'full') - - def test_conv_kernel_mask_wrong_dims(self, *input_shape): - kernel_shape = 1 - strides = 1 - - conv_utils.conv_kernel_mask( - input_shape, - kernel_shape, - strides, - 'valid' - ) - - ndims = len(input_shape) - - kernel_shape = (2,) * (ndims + 1) - self.assertRaises(ValueError, - conv_utils.conv_kernel_mask, - input_shape, kernel_shape, strides, 'same') - - strides = (1,) * ndims - self.assertRaises(ValueError, - conv_utils.conv_kernel_mask, - input_shape, kernel_shape, strides, 'valid') - - kernel_shape = (1,) * ndims - strides = (2,) * (ndims - 1) - self.assertRaises(ValueError, - conv_utils.conv_kernel_mask, - input_shape, kernel_shape, strides, 'valid') - - strides = (2,) * ndims - conv_utils.conv_kernel_mask( - input_shape, - kernel_shape, - strides, - 'valid' - ) - - -if __name__ == '__main__': - tf.test.main() + "valid", + ) + + strides = (2,) * ndims + conv_utils.conv_kernel_mask(input_shape, kernel_shape, strides, "valid") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/data_utils.py b/keras/utils/data_utils.py index a281c740766b..21f48cb8c237 100644 --- a/keras/utils/data_utils.py +++ b/keras/utils/data_utils.py @@ -12,13 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-import-not-at-top -"""Utilities for file download and caching.""" -import tensorflow.compat.v2 as tf +"""Utilities for file download and caching.""" -from abc import abstractmethod -from contextlib import closing import functools import hashlib import multiprocessing.dummy @@ -32,486 +28,558 @@ import time import typing import urllib +import warnings import weakref import zipfile -from six.moves.urllib.parse import urlsplit +from abc import abstractmethod +from contextlib import closing import numpy as np -from six.moves.urllib.request import urlopen +import tensorflow.compat.v2 as tf +from six.moves.urllib.parse import urlsplit + +from keras.utils import io_utils from keras.utils import tf_inspect from keras.utils.generic_utils import Progbar -from keras.utils import io_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export +from six.moves.urllib.request import urlopen # Required to support google internal urlretrieve -if True: # This gets transformed to `if sys.version_info[0] == 2:` in OSS. # pylint: disable=using-constant-test +if True: # This gets transformed to `if sys.version_info[0] == 2:` in OSS. + + def urlretrieve(url, filename, reporthook=None, data=None): + """Replacement for `urlretrieve` for Python 2. + + Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy + `urllib` module, known to have issues with proxy management. + + Args: + url: url to retrieve. + filename: where to store the retrieved data locally. + reporthook: a hook function that will be called once on + establishment of the network connection and once after each block + read thereafter. The hook will be passed three arguments; a count + of blocks transferred so far, a block size in bytes, and the total + size of the file. + data: `data` argument passed to `urlopen`. + """ + + def chunk_read(response, chunk_size=8192, reporthook=None): + content_type = response.info().get("Content-Length") + total_size = -1 + if content_type is not None: + total_size = int(content_type.strip()) + count = 0 + while True: + chunk = response.read(chunk_size) + count += 1 + if reporthook is not None: + reporthook(count, chunk_size, total_size) + if chunk: + yield chunk + else: + break + + response = urlopen(url, data) + with open(filename, "wb") as fd: + for chunk in chunk_read(response, reporthook=reporthook): + fd.write(chunk) - def urlretrieve(url, filename, reporthook=None, data=None): - """Replacement for `urlretrieve` for Python 2. +else: + from urllib.request import urlretrieve - Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy - `urllib` module, known to have issues with proxy management. + +def is_generator_or_sequence(x): + """Check if `x` is a Keras generator type.""" + builtin_iterators = (str, list, tuple, dict, set, frozenset) + if isinstance(x, (tf.Tensor, np.ndarray) + builtin_iterators): + return False + return ( + tf_inspect.isgenerator(x) + or isinstance(x, Sequence) + or isinstance(x, typing.Iterator) + ) + + +def _resolve_path(path): + return os.path.realpath(os.path.abspath(path)) + + +def _is_path_in_dir(path, base_dir): + return _resolve_path(os.path.join(base_dir, path)).startswith(base_dir) + + +def _is_link_in_dir(info, base): + tip = _resolve_path(os.path.join(base, os.path.dirname(info.name))) + return _is_path_in_dir(info.linkname, base_dir=tip) + + +def _filter_safe_paths(members): + base_dir = _resolve_path(".") + for finfo in members: + valid_path = False + if _is_path_in_dir(finfo.name, base_dir): + valid_path = True + yield finfo + elif finfo.issym() or finfo.islnk(): + if _is_link_in_dir(finfo, base_dir): + valid_path = True + yield finfo + if not valid_path: + warnings.warn( + "Skipping invalid path during archive extraction: " + f"'{finfo.name}'." + ) + + +def _extract_archive(file_path, path=".", archive_format="auto"): + """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. Args: - url: url to retrieve. - filename: where to store the retrieved data locally. - reporthook: a hook function that will be called once on establishment of - the network connection and once after each block read thereafter. The - hook will be passed three arguments; a count of blocks transferred so - far, a block size in bytes, and the total size of the file. - data: `data` argument passed to `urlopen`. + file_path: Path to the archive file. + path: Where to extract the archive file. + archive_format: Archive format to try for extracting the file. + Options are `'auto'`, `'tar'`, `'zip'`, and `None`. + `'tar'` includes tar, tar.gz, and tar.bz files. + The default 'auto' is `['tar', 'zip']`. + `None` or an empty list will return no matches found. + + Returns: + True if a match was found and an archive extraction was completed, + False otherwise. """ + if archive_format is None: + return False + if archive_format == "auto": + archive_format = ["tar", "zip"] + if isinstance(archive_format, str): + archive_format = [archive_format] + + file_path = io_utils.path_to_string(file_path) + path = io_utils.path_to_string(path) + + for archive_type in archive_format: + if archive_type == "tar": + open_fn = tarfile.open + is_match_fn = tarfile.is_tarfile + if archive_type == "zip": + open_fn = zipfile.ZipFile + is_match_fn = zipfile.is_zipfile + + if is_match_fn(file_path): + with open_fn(file_path) as archive: + try: + if zipfile.is_zipfile(file_path): + # Zip archive. + archive.extractall(path) + else: + # Tar archive, perhaps unsafe. Filter paths. + archive.extractall( + path, members=_filter_safe_paths(archive) + ) + except (tarfile.TarError, RuntimeError, KeyboardInterrupt): + if os.path.exists(path): + if os.path.isfile(path): + os.remove(path) + else: + shutil.rmtree(path) + raise + return True + return False - def chunk_read(response, chunk_size=8192, reporthook=None): - content_type = response.info().get('Content-Length') - total_size = -1 - if content_type is not None: - total_size = int(content_type.strip()) - count = 0 - while True: - chunk = response.read(chunk_size) - count += 1 - if reporthook is not None: - reporthook(count, chunk_size, total_size) - if chunk: - yield chunk - else: - break - response = urlopen(url, data) - with open(filename, 'wb') as fd: - for chunk in chunk_read(response, reporthook=reporthook): - fd.write(chunk) -else: - from urllib.request import urlretrieve # pylint: disable=g-importing-member +@keras_export("keras.utils.get_file") +def get_file( + fname=None, + origin=None, + untar=False, + md5_hash=None, + file_hash=None, + cache_subdir="datasets", + hash_algorithm="auto", + extract=False, + archive_format="auto", + cache_dir=None, +): + """Downloads a file from a URL if it not already in the cache. + + By default the file at the url `origin` is downloaded to the + cache_dir `~/.keras`, placed in the cache_subdir `datasets`, + and given the filename `fname`. The final location of a file + `example.txt` would therefore be `~/.keras/datasets/example.txt`. + + Files in tar, tar.gz, tar.bz, and zip formats can also be extracted. + Passing a hash will verify the file after download. The command line + programs `shasum` and `sha256sum` can compute the hash. + + Example: + + ```python + path_to_downloaded_file = tf.keras.utils.get_file( + origin="https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz", + extract=True, + ) + ``` + Args: + fname: Name of the file. If an absolute path `/path/to/file.txt` is + specified the file will be saved at that location. If `None`, the + name of the file at `origin` will be used. + origin: Original URL of the file. + untar: Deprecated in favor of `extract` argument. + boolean, whether the file should be decompressed + md5_hash: Deprecated in favor of `file_hash` argument. + md5 hash of the file for verification + file_hash: The expected hash string of the file after download. + The sha256 and md5 hash algorithms are both supported. + cache_subdir: Subdirectory under the Keras cache dir where the file is + saved. If an absolute path `/path/to/folder` is + specified the file will be saved at that location. + hash_algorithm: Select the hash algorithm to verify the file. + options are `'md5'`, `'sha256'`, and `'auto'`. + The default 'auto' detects the hash algorithm in use. + extract: True tries extracting the file as an Archive, like tar or zip. + archive_format: Archive format to try for extracting the file. + Options are `'auto'`, `'tar'`, `'zip'`, and `None`. + `'tar'` includes tar, tar.gz, and tar.bz files. + The default `'auto'` corresponds to `['tar', 'zip']`. + None or an empty list will return no matches found. + cache_dir: Location to store cached files, when None it + defaults to `~/.keras/`. -def is_generator_or_sequence(x): - """Check if `x` is a Keras generator type.""" - builtin_iterators = (str, list, tuple, dict, set, frozenset) - if isinstance(x, (tf.Tensor, np.ndarray) + builtin_iterators): - return False - return (tf_inspect.isgenerator(x) or - isinstance(x, Sequence) or - isinstance(x, typing.Iterator)) - - -def _extract_archive(file_path, path='.', archive_format='auto'): - """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. - - Args: - file_path: path to the archive file - path: path to extract the archive file - archive_format: Archive format to try for extracting the file. - Options are 'auto', 'tar', 'zip', and None. - 'tar' includes tar, tar.gz, and tar.bz files. - The default 'auto' is ['tar', 'zip']. - None or an empty list will return no matches found. - - Returns: - True if a match was found and an archive extraction was completed, - False otherwise. - """ - if archive_format is None: - return False - if archive_format == 'auto': - archive_format = ['tar', 'zip'] - if isinstance(archive_format, str): - archive_format = [archive_format] - - file_path = io_utils.path_to_string(file_path) - path = io_utils.path_to_string(path) - - for archive_type in archive_format: - if archive_type == 'tar': - open_fn = tarfile.open - is_match_fn = tarfile.is_tarfile - if archive_type == 'zip': - open_fn = zipfile.ZipFile - is_match_fn = zipfile.is_zipfile - - if is_match_fn(file_path): - with open_fn(file_path) as archive: - try: - archive.extractall(path) - except (tarfile.TarError, RuntimeError, KeyboardInterrupt): - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - else: - shutil.rmtree(path) - raise - return True - return False - - -@keras_export('keras.utils.get_file') -def get_file(fname=None, - origin=None, - untar=False, - md5_hash=None, - file_hash=None, - cache_subdir='datasets', - hash_algorithm='auto', - extract=False, - archive_format='auto', - cache_dir=None): - """Downloads a file from a URL if it not already in the cache. - - By default the file at the url `origin` is downloaded to the - cache_dir `~/.keras`, placed in the cache_subdir `datasets`, - and given the filename `fname`. The final location of a file - `example.txt` would therefore be `~/.keras/datasets/example.txt`. - - Files in tar, tar.gz, tar.bz, and zip formats can also be extracted. - Passing a hash will verify the file after download. The command line - programs `shasum` and `sha256sum` can compute the hash. - - Example: - - ```python - path_to_downloaded_file = tf.keras.utils.get_file( - "flower_photos", - "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz", - untar=True) - ``` - - Args: - fname: Name of the file. If an absolute path `/path/to/file.txt` is - specified the file will be saved at that location. If `None`, the - name of the file at `origin` will be used. - origin: Original URL of the file. - untar: Deprecated in favor of `extract` argument. - boolean, whether the file should be decompressed - md5_hash: Deprecated in favor of `file_hash` argument. - md5 hash of the file for verification - file_hash: The expected hash string of the file after download. - The sha256 and md5 hash algorithms are both supported. - cache_subdir: Subdirectory under the Keras cache dir where the file is - saved. If an absolute path `/path/to/folder` is - specified the file will be saved at that location. - hash_algorithm: Select the hash algorithm to verify the file. - options are `'md5'`, `'sha256'`, and `'auto'`. - The default 'auto' detects the hash algorithm in use. - extract: True tries extracting the file as an Archive, like tar or zip. - archive_format: Archive format to try for extracting the file. - Options are `'auto'`, `'tar'`, `'zip'`, and `None`. - `'tar'` includes tar, tar.gz, and tar.bz files. - The default `'auto'` corresponds to `['tar', 'zip']`. - None or an empty list will return no matches found. - cache_dir: Location to store cached files, when None it - defaults to the default directory `~/.keras/`. - - Returns: - Path to the downloaded file - """ - if origin is None: - raise ValueError('Please specify the "origin" argument (URL of the file ' - 'to download).') - - if cache_dir is None: - cache_dir = os.path.join(os.path.expanduser('~'), '.keras') - if md5_hash is not None and file_hash is None: - file_hash = md5_hash - hash_algorithm = 'md5' - datadir_base = os.path.expanduser(cache_dir) - if not os.access(datadir_base, os.W_OK): - datadir_base = os.path.join('/tmp', '.keras') - datadir = os.path.join(datadir_base, cache_subdir) - _makedirs_exist_ok(datadir) - - fname = io_utils.path_to_string(fname) - if not fname: - fname = os.path.basename(urlsplit(origin).path) - if not fname: - raise ValueError( - f"Can't parse the file name from the origin provided: '{origin}'." - "Please specify the `fname` as the input param.") - - if untar: - if fname.endswith('.tar.gz'): - fname = pathlib.Path(fname) - # The 2 `.with_suffix()` are because of `.tar.gz` as pathlib - # considers it as 2 suffixes. - fname = fname.with_suffix('').with_suffix('') - fname = str(fname) - untar_fpath = os.path.join(datadir, fname) - fpath = untar_fpath + '.tar.gz' - else: - fpath = os.path.join(datadir, fname) - - download = False - if os.path.exists(fpath): - # File found; verify integrity if a hash was provided. - if file_hash is not None: - if not validate_file(fpath, file_hash, algorithm=hash_algorithm): - io_utils.print_msg( - 'A local file was found, but it seems to be ' - f'incomplete or outdated because the {hash_algorithm} ' - f'file hash does not match the original value of {file_hash} ' - 'so we will re-download the data.') - download = True - else: - download = True - - if download: - io_utils.print_msg(f'Downloading data from {origin}') - - class DLProgbar: - """Manage progress bar state for use in urlretrieve.""" - - def __init__(self): - self.progbar = None - self.finished = False - - def __call__(self, block_num, block_size, total_size): - if not self.progbar: - if total_size == -1: - total_size = None - self.progbar = Progbar(total_size) - current = block_num * block_size - if current < total_size: - self.progbar.update(current) - elif not self.finished: - self.progbar.update(self.progbar.target) - self.finished = True - - error_msg = 'URL fetch failure on {}: {} -- {}' - try: - try: - urlretrieve(origin, fpath, DLProgbar()) - except urllib.error.HTTPError as e: - raise Exception(error_msg.format(origin, e.code, e.msg)) - except urllib.error.URLError as e: - raise Exception(error_msg.format(origin, e.errno, e.reason)) - except (Exception, KeyboardInterrupt) as e: - if os.path.exists(fpath): - os.remove(fpath) - raise - - # Validate download if succeeded and user provided an expected hash - # Security conscious users would get the hash of the file from a separate - # channel and pass it to this API to prevent MITM / corruption: - if os.path.exists(fpath) and file_hash is not None: - if not validate_file(fpath, file_hash, algorithm=hash_algorithm): - raise ValueError( - f'Incomplete or corrupted file detected. The {hash_algorithm} ' - f'file hash does not match the provided value of {file_hash}.') + Returns: + Path to the downloaded file. - if untar: - if not os.path.exists(untar_fpath): - _extract_archive(fpath, datadir, archive_format='tar') - return untar_fpath + ⚠️ **Warning on malicious downloads** ⚠️ - if extract: - _extract_archive(fpath, datadir, archive_format) + Downloading something from the Internet carries a risk. + NEVER download a file/archive if you do not trust the source. + We recommend that you specify the `file_hash` argument + (if the hash of the source file is known) to make sure that the file you + are getting is the one you expect. + """ + if origin is None: + raise ValueError( + 'Please specify the "origin" argument (URL of the file ' + "to download)." + ) + + if cache_dir is None: + cache_dir = os.path.join(os.path.expanduser("~"), ".keras") + if md5_hash is not None and file_hash is None: + file_hash = md5_hash + hash_algorithm = "md5" + datadir_base = os.path.expanduser(cache_dir) + if not os.access(datadir_base, os.W_OK): + datadir_base = os.path.join("/tmp", ".keras") + datadir = os.path.join(datadir_base, cache_subdir) + _makedirs_exist_ok(datadir) + + fname = io_utils.path_to_string(fname) + if not fname: + fname = os.path.basename(urlsplit(origin).path) + if not fname: + raise ValueError( + "Can't parse the file name from the origin provided: " + f"'{origin}'." + "Please specify the `fname` as the input param." + ) + + if untar: + if fname.endswith(".tar.gz"): + fname = pathlib.Path(fname) + # The 2 `.with_suffix()` are because of `.tar.gz` as pathlib + # considers it as 2 suffixes. + fname = fname.with_suffix("").with_suffix("") + fname = str(fname) + untar_fpath = os.path.join(datadir, fname) + fpath = untar_fpath + ".tar.gz" + else: + fpath = os.path.join(datadir, fname) + + download = False + if os.path.exists(fpath): + # File found; verify integrity if a hash was provided. + if file_hash is not None: + if not validate_file(fpath, file_hash, algorithm=hash_algorithm): + io_utils.print_msg( + "A local file was found, but it seems to be " + f"incomplete or outdated because the {hash_algorithm} " + "file hash does not match the original value of " + f"{file_hash} " + "so we will re-download the data." + ) + download = True + else: + download = True - return fpath + if download: + io_utils.print_msg(f"Downloading data from {origin}") + + class DLProgbar: + """Manage progress bar state for use in urlretrieve.""" + + def __init__(self): + self.progbar = None + self.finished = False + + def __call__(self, block_num, block_size, total_size): + if not self.progbar: + if total_size == -1: + total_size = None + self.progbar = Progbar(total_size) + current = block_num * block_size + + if total_size is None: + self.progbar.update(current) + else: + if current < total_size: + self.progbar.update(current) + elif not self.finished: + self.progbar.update(self.progbar.target) + self.finished = True + + error_msg = "URL fetch failure on {}: {} -- {}" + try: + try: + urlretrieve(origin, fpath, DLProgbar()) + except urllib.error.HTTPError as e: + raise Exception(error_msg.format(origin, e.code, e.msg)) + except urllib.error.URLError as e: + raise Exception(error_msg.format(origin, e.errno, e.reason)) + except (Exception, KeyboardInterrupt): + if os.path.exists(fpath): + os.remove(fpath) + raise + + # Validate download if succeeded and user provided an expected hash + # Security conscious users would get the hash of the file from a + # separate channel and pass it to this API to prevent MITM / corruption: + if os.path.exists(fpath) and file_hash is not None: + if not validate_file(fpath, file_hash, algorithm=hash_algorithm): + raise ValueError( + "Incomplete or corrupted file detected. " + f"The {hash_algorithm} " + "file hash does not match the provided value " + f"of {file_hash}." + ) + + if untar: + if not os.path.exists(untar_fpath): + _extract_archive(fpath, datadir, archive_format="tar") + return untar_fpath + + if extract: + _extract_archive(fpath, datadir, archive_format) + + return fpath def _makedirs_exist_ok(datadir): - os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg + os.makedirs(datadir, exist_ok=True) def _resolve_hasher(algorithm, file_hash=None): - """Returns hash algorithm as hashlib function.""" - if algorithm == 'sha256': - return hashlib.sha256() + """Returns hash algorithm as hashlib function.""" + if algorithm == "sha256": + return hashlib.sha256() - if algorithm == 'auto' and file_hash is not None and len(file_hash) == 64: - return hashlib.sha256() + if algorithm == "auto" and file_hash is not None and len(file_hash) == 64: + return hashlib.sha256() - # This is used only for legacy purposes. - return hashlib.md5() + # This is used only for legacy purposes. + return hashlib.md5() -def _hash_file(fpath, algorithm='sha256', chunk_size=65535): - """Calculates a file sha256 or md5 hash. +def _hash_file(fpath, algorithm="sha256", chunk_size=65535): + """Calculates a file sha256 or md5 hash. - Example: + Example: - ```python - _hash_file('/path/to/file.zip') - 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' - ``` + ```python + _hash_file('/path/to/file.zip') + 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' + ``` - Args: - fpath: path to the file being validated - algorithm: hash algorithm, one of `'auto'`, `'sha256'`, or `'md5'`. - The default `'auto'` detects the hash algorithm in use. - chunk_size: Bytes to read at a time, important for large files. + Args: + fpath: Path to the file being validated. + algorithm: Hash algorithm, one of `'auto'`, `'sha256'`, or `'md5'`. + The default `'auto'` detects the hash algorithm in use. + chunk_size: Bytes to read at a time, important for large files. - Returns: - The file hash - """ - if isinstance(algorithm, str): - hasher = _resolve_hasher(algorithm) - else: - hasher = algorithm + Returns: + The file hash. + """ + if isinstance(algorithm, str): + hasher = _resolve_hasher(algorithm) + else: + hasher = algorithm - with open(fpath, 'rb') as fpath_file: - for chunk in iter(lambda: fpath_file.read(chunk_size), b''): - hasher.update(chunk) + with open(fpath, "rb") as fpath_file: + for chunk in iter(lambda: fpath_file.read(chunk_size), b""): + hasher.update(chunk) - return hasher.hexdigest() + return hasher.hexdigest() -def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535): - """Validates a file against a sha256 or md5 hash. +def validate_file(fpath, file_hash, algorithm="auto", chunk_size=65535): + """Validates a file against a sha256 or md5 hash. - Args: - fpath: path to the file being validated - file_hash: The expected hash string of the file. - The sha256 and md5 hash algorithms are both supported. - algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'. - The default 'auto' detects the hash algorithm in use. - chunk_size: Bytes to read at a time, important for large files. + Args: + fpath: path to the file being validated + file_hash: The expected hash string of the file. + The sha256 and md5 hash algorithms are both supported. + algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'. + The default 'auto' detects the hash algorithm in use. + chunk_size: Bytes to read at a time, important for large files. - Returns: - Whether the file is valid - """ - hasher = _resolve_hasher(algorithm, file_hash) + Returns: + Whether the file is valid + """ + hasher = _resolve_hasher(algorithm, file_hash) - if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash): - return True - else: - return False + if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash): + return True + else: + return False class ThreadsafeIter: - """Wrap an iterator with a lock and propagate exceptions to all threads.""" + """Wrap an iterator with a lock and propagate exceptions to all threads.""" - def __init__(self, it): - self.it = it - self.lock = threading.Lock() + def __init__(self, it): + self.it = it + self.lock = threading.Lock() - # After a generator throws an exception all subsequent next() calls raise a - # StopIteration Exception. This, however, presents an issue when mixing - # generators and threading because it means the order of retrieval need not - # match the order in which the generator was called. This can make it appear - # that a generator exited normally when in fact the terminating exception is - # just in a different thread. In order to provide thread safety, once - # self.it has thrown an exception we continue to throw the same exception. - self._exception = None + # After a generator throws an exception all subsequent next() calls + # raise a StopIteration Exception. This, however, presents an issue when + # mixing generators and threading because it means the order of + # retrieval need not match the order in which the generator was called. + # This can make it appear that a generator exited normally when in fact + # the terminating exception is just in a different thread. In order to + # provide thread safety, once self.it has thrown an exception we + # continue to throw the same exception. + self._exception = None - def __iter__(self): - return self + def __iter__(self): + return self - def next(self): - return self.__next__() + def next(self): + return self.__next__() - def __next__(self): - with self.lock: - if self._exception: - raise self._exception # pylint: disable=raising-bad-type + def __next__(self): + with self.lock: + if self._exception: + raise self._exception - try: - return next(self.it) - except Exception as e: - self._exception = e - raise + try: + return next(self.it) + except Exception as e: + self._exception = e + raise def threadsafe_generator(f): + @functools.wraps(f) + def g(*a, **kw): + return ThreadsafeIter(f(*a, **kw)) - @functools.wraps(f) - def g(*a, **kw): - return ThreadsafeIter(f(*a, **kw)) + return g - return g - -@keras_export('keras.utils.Sequence') +@keras_export("keras.utils.Sequence") class Sequence: - """Base object for fitting to a sequence of data, such as a dataset. + """Base object for fitting to a sequence of data, such as a dataset. - Every `Sequence` must implement the `__getitem__` and the `__len__` methods. - If you want to modify your dataset between epochs you may implement - `on_epoch_end`. - The method `__getitem__` should return a complete batch. + Every `Sequence` must implement the `__getitem__` and the `__len__` methods. + If you want to modify your dataset between epochs, you may implement + `on_epoch_end`. The method `__getitem__` should return a complete batch. - Notes: + Notes: - `Sequence` are a safer way to do multiprocessing. This structure guarantees - that the network will only train once - on each sample per epoch which is not the case with generators. + `Sequence` is a safer way to do multiprocessing. This structure guarantees + that the network will only train once on each sample per epoch, which is not + the case with generators. - Examples: + Examples: - ```python - from skimage.io import imread - from skimage.transform import resize - import numpy as np - import math + ```python + from skimage.io import imread + from skimage.transform import resize + import numpy as np + import math - # Here, `x_set` is list of path to the images - # and `y_set` are the associated classes. + # Here, `x_set` is list of path to the images + # and `y_set` are the associated classes. - class CIFAR10Sequence(tf.keras.utils.Sequence): + class CIFAR10Sequence(tf.keras.utils.Sequence): - def __init__(self, x_set, y_set, batch_size): - self.x, self.y = x_set, y_set - self.batch_size = batch_size + def __init__(self, x_set, y_set, batch_size): + self.x, self.y = x_set, y_set + self.batch_size = batch_size - def __len__(self): - return math.ceil(len(self.x) / self.batch_size) + def __len__(self): + return math.ceil(len(self.x) / self.batch_size) - def __getitem__(self, idx): - batch_x = self.x[idx * self.batch_size:(idx + 1) * - self.batch_size] - batch_y = self.y[idx * self.batch_size:(idx + 1) * - self.batch_size] + def __getitem__(self, idx): + low = idx * self.batch_size + # Cap upper bound at array length; the last batch may be smaller + # if the total number of items is not a multiple of batch size. + high = min(low + self.batch_size, len(self.x)) + batch_x = self.x[low:high] + batch_y = self.y[low:high] - return np.array([ - resize(imread(file_name), (200, 200)) - for file_name in batch_x]), np.array(batch_y) - ``` - """ + return np.array([ + resize(imread(file_name), (200, 200)) + for file_name in batch_x]), np.array(batch_y) + ``` + """ - @abstractmethod - def __getitem__(self, index): - """Gets batch at position `index`. + @abstractmethod + def __getitem__(self, index): + """Gets batch at position `index`. - Args: - index: position of the batch in the Sequence. + Args: + index: position of the batch in the Sequence. - Returns: - A batch - """ - raise NotImplementedError + Returns: + A batch + """ + raise NotImplementedError - @abstractmethod - def __len__(self): - """Number of batch in the Sequence. + @abstractmethod + def __len__(self): + """Number of batch in the Sequence. - Returns: - The number of batches in the Sequence. - """ - raise NotImplementedError + Returns: + The number of batches in the Sequence. + """ + raise NotImplementedError - def on_epoch_end(self): - """Method called at the end of every epoch. - """ - pass + def on_epoch_end(self): + """Method called at the end of every epoch.""" + pass - def __iter__(self): - """Create a generator that iterate over the Sequence.""" - for item in (self[i] for i in range(len(self))): - yield item + def __iter__(self): + """Create a generator that iterate over the Sequence.""" + for item in (self[i] for i in range(len(self))): + yield item def iter_sequence_infinite(seq): - """Iterates indefinitely over a Sequence. + """Iterates indefinitely over a Sequence. - Args: - seq: `Sequence` instance. + Args: + seq: `Sequence` instance. - Yields: - Batches of data from the `Sequence`. - """ - while True: - for item in seq: - yield item + Yields: + Batches of data from the `Sequence`. + """ + while True: + for item in seq: + yield item # Global variables to be shared across processes @@ -531,522 +599,548 @@ def iter_sequence_infinite(seq): def dont_use_multiprocessing_pool(f): - @functools.wraps(f) - def wrapped(*args, **kwargs): - with _FORCE_THREADPOOL_LOCK: - global _FORCE_THREADPOOL - old_force_threadpool, _FORCE_THREADPOOL = _FORCE_THREADPOOL, True - out = f(*args, **kwargs) - _FORCE_THREADPOOL = old_force_threadpool - return out - return wrapped + @functools.wraps(f) + def wrapped(*args, **kwargs): + with _FORCE_THREADPOOL_LOCK: + global _FORCE_THREADPOOL + old_force_threadpool, _FORCE_THREADPOOL = _FORCE_THREADPOOL, True + out = f(*args, **kwargs) + _FORCE_THREADPOOL = old_force_threadpool + return out + + return wrapped def get_pool_class(use_multiprocessing): - global _FORCE_THREADPOOL - if not use_multiprocessing or _FORCE_THREADPOOL: - return multiprocessing.dummy.Pool # ThreadPool - return multiprocessing.Pool + global _FORCE_THREADPOOL + if not use_multiprocessing or _FORCE_THREADPOOL: + return multiprocessing.dummy.Pool # ThreadPool + return multiprocessing.Pool def get_worker_id_queue(): - """Lazily create the queue to track worker ids.""" - global _WORKER_ID_QUEUE - if _WORKER_ID_QUEUE is None: - _WORKER_ID_QUEUE = multiprocessing.Queue() - return _WORKER_ID_QUEUE + """Lazily create the queue to track worker ids.""" + global _WORKER_ID_QUEUE + if _WORKER_ID_QUEUE is None: + _WORKER_ID_QUEUE = multiprocessing.Queue() + return _WORKER_ID_QUEUE def init_pool(seqs): - global _SHARED_SEQUENCES - _SHARED_SEQUENCES = seqs + global _SHARED_SEQUENCES + _SHARED_SEQUENCES = seqs def get_index(uid, i): - """Get the value from the Sequence `uid` at index `i`. + """Get the value from the Sequence `uid` at index `i`. - To allow multiple Sequences to be used at the same time, we use `uid` to - get a specific one. A single Sequence would cause the validation to - overwrite the training Sequence. + To allow multiple Sequences to be used at the same time, we use `uid` to + get a specific one. A single Sequence would cause the validation to + overwrite the training Sequence. - Args: - uid: int, Sequence identifier - i: index + Args: + uid: int, Sequence identifier + i: index - Returns: - The value at index `i`. - """ - return _SHARED_SEQUENCES[uid][i] + Returns: + The value at index `i`. + """ + return _SHARED_SEQUENCES[uid][i] -@keras_export('keras.utils.SequenceEnqueuer') +@keras_export("keras.utils.SequenceEnqueuer") class SequenceEnqueuer: - """Base class to enqueue inputs. - - The task of an Enqueuer is to use parallelism to speed up preprocessing. - This is done with processes or threads. - - Example: - - ```python - enqueuer = SequenceEnqueuer(...) - enqueuer.start() - datas = enqueuer.get() - for data in datas: - # Use the inputs; training, evaluating, predicting. - # ... stop sometime. - enqueuer.stop() - ``` - - The `enqueuer.get()` should be an infinite stream of data. - """ - - def __init__(self, sequence, - use_multiprocessing=False): - self.sequence = sequence - self.use_multiprocessing = use_multiprocessing - - global _SEQUENCE_COUNTER - if _SEQUENCE_COUNTER is None: - try: - _SEQUENCE_COUNTER = multiprocessing.Value('i', 0) - except OSError: - # In this case the OS does not allow us to use - # multiprocessing. We resort to an int - # for enqueuer indexing. - _SEQUENCE_COUNTER = 0 - - if isinstance(_SEQUENCE_COUNTER, int): - self.uid = _SEQUENCE_COUNTER - _SEQUENCE_COUNTER += 1 - else: - # Doing Multiprocessing.Value += x is not process-safe. - with _SEQUENCE_COUNTER.get_lock(): - self.uid = _SEQUENCE_COUNTER.value - _SEQUENCE_COUNTER.value += 1 + """Base class to enqueue inputs. - self.workers = 0 - self.executor_fn = None - self.queue = None - self.run_thread = None - self.stop_signal = None + The task of an Enqueuer is to use parallelism to speed up preprocessing. + This is done with processes or threads. - def is_running(self): - return self.stop_signal is not None and not self.stop_signal.is_set() + Example: - def start(self, workers=1, max_queue_size=10): - """Starts the handler's workers. + ```python + enqueuer = SequenceEnqueuer(...) + enqueuer.start() + datas = enqueuer.get() + for data in datas: + # Use the inputs; training, evaluating, predicting. + # ... stop sometime. + enqueuer.stop() + ``` - Args: - workers: Number of workers. - max_queue_size: queue size - (when full, workers could block on `put()`) + The `enqueuer.get()` should be an infinite stream of data. """ - if self.use_multiprocessing: - self.executor_fn = self._get_executor_init(workers) - else: - # We do not need the init since it's threads. - self.executor_fn = lambda _: get_pool_class(False)(workers) - self.workers = workers - self.queue = queue.Queue(max_queue_size) - self.stop_signal = threading.Event() - self.run_thread = threading.Thread(target=self._run) - self.run_thread.daemon = True - self.run_thread.start() - def _send_sequence(self): - """Sends current Iterable to all workers.""" - # For new processes that may spawn - _SHARED_SEQUENCES[self.uid] = self.sequence - - def stop(self, timeout=None): - """Stops running threads and wait for them to exit, if necessary. + def __init__(self, sequence, use_multiprocessing=False): + self.sequence = sequence + self.use_multiprocessing = use_multiprocessing + + global _SEQUENCE_COUNTER + if _SEQUENCE_COUNTER is None: + try: + _SEQUENCE_COUNTER = multiprocessing.Value("i", 0) + except OSError: + # In this case the OS does not allow us to use + # multiprocessing. We resort to an int + # for enqueuer indexing. + _SEQUENCE_COUNTER = 0 + + if isinstance(_SEQUENCE_COUNTER, int): + self.uid = _SEQUENCE_COUNTER + _SEQUENCE_COUNTER += 1 + else: + # Doing Multiprocessing.Value += x is not process-safe. + with _SEQUENCE_COUNTER.get_lock(): + self.uid = _SEQUENCE_COUNTER.value + _SEQUENCE_COUNTER.value += 1 + + self.workers = 0 + self.executor_fn = None + self.queue = None + self.run_thread = None + self.stop_signal = None + + def is_running(self): + return self.stop_signal is not None and not self.stop_signal.is_set() + + def start(self, workers=1, max_queue_size=10): + """Starts the handler's workers. + + Args: + workers: Number of workers. + max_queue_size: queue size + (when full, workers could block on `put()`) + """ + if self.use_multiprocessing: + self.executor_fn = self._get_executor_init(workers) + else: + # We do not need the init since it's threads. + self.executor_fn = lambda _: get_pool_class(False)(workers) + self.workers = workers + self.queue = queue.Queue(max_queue_size) + self.stop_signal = threading.Event() + self.run_thread = threading.Thread(target=self._run) + self.run_thread.daemon = True + self.run_thread.start() + + def _send_sequence(self): + """Sends current Iterable to all workers.""" + # For new processes that may spawn + _SHARED_SEQUENCES[self.uid] = self.sequence + + def stop(self, timeout=None): + """Stops running threads and wait for them to exit, if necessary. + + Should be called by the same thread which called `start()`. + + Args: + timeout: maximum time to wait on `thread.join()` + """ + self.stop_signal.set() + with self.queue.mutex: + self.queue.queue.clear() + self.queue.unfinished_tasks = 0 + self.queue.not_full.notify() + self.run_thread.join(timeout) + _SHARED_SEQUENCES[self.uid] = None + + def __del__(self): + if self.is_running(): + self.stop() - Should be called by the same thread which called `start()`. + @abstractmethod + def _run(self): + """Submits request to the executor and queue the `Future` objects.""" + raise NotImplementedError - Args: - timeout: maximum time to wait on `thread.join()` - """ - self.stop_signal.set() - with self.queue.mutex: - self.queue.queue.clear() - self.queue.unfinished_tasks = 0 - self.queue.not_full.notify() - self.run_thread.join(timeout) - _SHARED_SEQUENCES[self.uid] = None - - def __del__(self): - if self.is_running(): - self.stop() - - @abstractmethod - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - raise NotImplementedError - - @abstractmethod - def _get_executor_init(self, workers): - """Gets the Pool initializer for multiprocessing. + @abstractmethod + def _get_executor_init(self, workers): + """Gets the Pool initializer for multiprocessing. - Args: - workers: Number of workers. + Args: + workers: Number of workers. - Returns: - Function, a Function to initialize the pool - """ - raise NotImplementedError + Returns: + Function, a Function to initialize the pool + """ + raise NotImplementedError - @abstractmethod - def get(self): - """Creates a generator to extract data from the queue. + @abstractmethod + def get(self): + """Creates a generator to extract data from the queue. - Skip the data if it is `None`. - # Returns - Generator yielding tuples `(inputs, targets)` - or `(inputs, targets, sample_weights)`. - """ - raise NotImplementedError + Skip the data if it is `None`. + # Returns + Generator yielding tuples `(inputs, targets)` + or `(inputs, targets, sample_weights)`. + """ + raise NotImplementedError -@keras_export('keras.utils.OrderedEnqueuer') +@keras_export("keras.utils.OrderedEnqueuer") class OrderedEnqueuer(SequenceEnqueuer): - """Builds a Enqueuer from a Sequence. - - Args: - sequence: A `tf.keras.utils.data_utils.Sequence` object. - use_multiprocessing: use multiprocessing if True, otherwise threading - shuffle: whether to shuffle the data at the beginning of each epoch - """ - - def __init__(self, sequence, use_multiprocessing=False, shuffle=False): - super().__init__(sequence, use_multiprocessing) - self.shuffle = shuffle - - def _get_executor_init(self, workers): - """Gets the Pool initializer for multiprocessing. + """Builds a Enqueuer from a Sequence. Args: - workers: Number of workers. - - Returns: - Function, a Function to initialize the pool + sequence: A `tf.keras.utils.data_utils.Sequence` object. + use_multiprocessing: use multiprocessing if True, otherwise threading + shuffle: whether to shuffle the data at the beginning of each epoch """ - def pool_fn(seqs): - pool = get_pool_class(True)( - workers, initializer=init_pool_generator, - initargs=(seqs, None, get_worker_id_queue())) - _DATA_POOLS.add(pool) - return pool - - return pool_fn - - def _wait_queue(self): - """Wait for the queue to be empty.""" - while True: - time.sleep(0.1) - if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set(): - return - - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - sequence = list(range(len(self.sequence))) - self._send_sequence() # Share the initial sequence - while True: - if self.shuffle: - random.shuffle(sequence) - - with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: - for i in sequence: - if self.stop_signal.is_set(): - return - - self.queue.put( - executor.apply_async(get_index, (self.uid, i)), block=True) - - # Done with the current epoch, waiting for the final batches - self._wait_queue() - - if self.stop_signal.is_set(): - # We're done - return - - # Call the internal on epoch end. - self.sequence.on_epoch_end() - self._send_sequence() # Update the pool - - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - Yields: - The next element in the queue, i.e. a tuple - `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - """ - while self.is_running(): - try: - inputs = self.queue.get(block=True, timeout=5).get() - if self.is_running(): - self.queue.task_done() - if inputs is not None: - yield inputs - except queue.Empty: - pass - except Exception as e: # pylint: disable=broad-except - self.stop() - raise e + def __init__(self, sequence, use_multiprocessing=False, shuffle=False): + super().__init__(sequence, use_multiprocessing) + self.shuffle = shuffle + + def _get_executor_init(self, workers): + """Gets the Pool initializer for multiprocessing. + + Args: + workers: Number of workers. + + Returns: + Function, a Function to initialize the pool + """ + + def pool_fn(seqs): + pool = get_pool_class(True)( + workers, + initializer=init_pool_generator, + initargs=(seqs, None, get_worker_id_queue()), + ) + _DATA_POOLS.add(pool) + return pool + + return pool_fn + + def _wait_queue(self): + """Wait for the queue to be empty.""" + while True: + time.sleep(0.1) + if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set(): + return + + def _run(self): + """Submits request to the executor and queue the `Future` objects.""" + sequence = list(range(len(self.sequence))) + self._send_sequence() # Share the initial sequence + while True: + if self.shuffle: + random.shuffle(sequence) + + with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: + for i in sequence: + if self.stop_signal.is_set(): + return + + self.queue.put( + executor.apply_async(get_index, (self.uid, i)), + block=True, + ) + + # Done with the current epoch, waiting for the final batches + self._wait_queue() + + if self.stop_signal.is_set(): + # We're done + return + + # Call the internal on epoch end. + self.sequence.on_epoch_end() + self._send_sequence() # Update the pool + + def get(self): + """Creates a generator to extract data from the queue. + + Skip the data if it is `None`. + + Yields: + The next element in the queue, i.e. a tuple + `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + """ + while self.is_running(): + try: + inputs = self.queue.get(block=True, timeout=5).get() + if self.is_running(): + self.queue.task_done() + if inputs is not None: + yield inputs + except queue.Empty: + pass + except Exception as e: + self.stop() + raise e def init_pool_generator(gens, random_seed=None, id_queue=None): - """Initializer function for pool workers. + """Initializer function for pool workers. - Args: - gens: State which should be made available to worker processes. - random_seed: An optional value with which to seed child processes. - id_queue: A multiprocessing Queue of worker ids. This is used to indicate - that a worker process was created by Keras and can be terminated using - the cleanup_all_keras_forkpools utility. - """ - global _SHARED_SEQUENCES - _SHARED_SEQUENCES = gens + Args: + gens: State which should be made available to worker processes. + random_seed: An optional value with which to seed child processes. + id_queue: A multiprocessing Queue of worker ids. This is used to indicate + that a worker process was created by Keras and can be terminated using + the cleanup_all_keras_forkpools utility. + """ + global _SHARED_SEQUENCES + _SHARED_SEQUENCES = gens - worker_proc = multiprocessing.current_process() + worker_proc = multiprocessing.current_process() - # name isn't used for anything, but setting a more descriptive name is helpful - # when diagnosing orphaned processes. - worker_proc.name = 'Keras_worker_{}'.format(worker_proc.name) + # name isn't used for anything, but setting a more descriptive name is + # helpful when diagnosing orphaned processes. + worker_proc.name = f"Keras_worker_{worker_proc.name}" - if random_seed is not None: - np.random.seed(random_seed + worker_proc.ident) + if random_seed is not None: + np.random.seed(random_seed + worker_proc.ident) - if id_queue is not None: - # If a worker dies during init, the pool will just create a replacement. - id_queue.put(worker_proc.ident, block=True, timeout=0.1) + if id_queue is not None: + # If a worker dies during init, the pool will just create a replacement. + id_queue.put(worker_proc.ident, block=True, timeout=0.1) def next_sample(uid): - """Gets the next value from the generator `uid`. + """Gets the next value from the generator `uid`. - To allow multiple generators to be used at the same time, we use `uid` to - get a specific one. A single generator would cause the validation to - overwrite the training generator. + To allow multiple generators to be used at the same time, we use `uid` to + get a specific one. A single generator would cause the validation to + overwrite the training generator. - Args: - uid: int, generator identifier + Args: + uid: int, generator identifier - Returns: - The next value of generator `uid`. - """ - return next(_SHARED_SEQUENCES[uid]) + Returns: + The next value of generator `uid`. + """ + return next(_SHARED_SEQUENCES[uid]) -@keras_export('keras.utils.GeneratorEnqueuer') +@keras_export("keras.utils.GeneratorEnqueuer") class GeneratorEnqueuer(SequenceEnqueuer): - """Builds a queue out of a data generator. - - The provided generator can be finite in which case the class will throw - a `StopIteration` exception. + """Builds a queue out of a data generator. - Args: - generator: a generator function which yields data - use_multiprocessing: use multiprocessing if True, otherwise threading - random_seed: Initial seed for workers, - will be incremented by one for each worker. - """ + The provided generator can be finite in which case the class will throw + a `StopIteration` exception. - def __init__(self, generator, - use_multiprocessing=False, - random_seed=None): - super().__init__(generator, use_multiprocessing) - self.random_seed = random_seed + Args: + generator: a generator function which yields data + use_multiprocessing: use multiprocessing if True, otherwise threading + random_seed: Initial seed for workers, + will be incremented by one for each worker. + """ - def _get_executor_init(self, workers): - """Gets the Pool initializer for multiprocessing. + def __init__(self, generator, use_multiprocessing=False, random_seed=None): + super().__init__(generator, use_multiprocessing) + self.random_seed = random_seed + + def _get_executor_init(self, workers): + """Gets the Pool initializer for multiprocessing. + + Args: + workers: Number of works. + + Returns: + A Function to initialize the pool + """ + + def pool_fn(seqs): + pool = get_pool_class(True)( + workers, + initializer=init_pool_generator, + initargs=(seqs, self.random_seed, get_worker_id_queue()), + ) + _DATA_POOLS.add(pool) + return pool + + return pool_fn + + def _run(self): + """Submits request to the executor and queue the `Future` objects.""" + self._send_sequence() # Share the initial generator + with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: + while True: + if self.stop_signal.is_set(): + return + + self.queue.put( + executor.apply_async(next_sample, (self.uid,)), block=True + ) + + def get(self): + """Creates a generator to extract data from the queue. + + Skip the data if it is `None`. + + Yields: + The next element in the queue, i.e. a tuple + `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + """ + try: + while self.is_running(): + inputs = self.queue.get(block=True).get() + self.queue.task_done() + if inputs is not None: + yield inputs + except StopIteration: + # Special case for finite generators + last_ones = [] + while self.queue.qsize() > 0: + last_ones.append(self.queue.get(block=True)) + # Wait for them to complete + for f in last_ones: + f.wait() + # Keep the good ones + last_ones = [ + future.get() for future in last_ones if future.successful() + ] + for inputs in last_ones: + if inputs is not None: + yield inputs + except Exception as e: + self.stop() + if "generator already executing" in str(e): + raise RuntimeError( + "Your generator is NOT thread-safe. " + "Keras requires a thread-safe generator when " + "`use_multiprocessing=False, workers > 1`. " + ) + raise e + + +@keras_export( + "keras.utils.pad_sequences", "keras.preprocessing.sequence.pad_sequences" +) +def pad_sequences( + sequences, + maxlen=None, + dtype="int32", + padding="pre", + truncating="pre", + value=0.0, +): + """Pads sequences to the same length. + + This function transforms a list (of length `num_samples`) + of sequences (lists of integers) + into a 2D Numpy array of shape `(num_samples, num_timesteps)`. + `num_timesteps` is either the `maxlen` argument if provided, + or the length of the longest sequence in the list. + + Sequences that are shorter than `num_timesteps` + are padded with `value` until they are `num_timesteps` long. + + Sequences longer than `num_timesteps` are truncated + so that they fit the desired length. + + The position where padding or truncation happens is determined by + the arguments `padding` and `truncating`, respectively. + Pre-padding or removing values from the beginning of the sequence is the + default. + + >>> sequence = [[1], [2, 3], [4, 5, 6]] + >>> tf.keras.utils.pad_sequences(sequence) + array([[0, 0, 1], + [0, 2, 3], + [4, 5, 6]], dtype=int32) + + >>> tf.keras.utils.pad_sequences(sequence, value=-1) + array([[-1, -1, 1], + [-1, 2, 3], + [ 4, 5, 6]], dtype=int32) + + >>> tf.keras.utils.pad_sequences(sequence, padding='post') + array([[1, 0, 0], + [2, 3, 0], + [4, 5, 6]], dtype=int32) + + >>> tf.keras.utils.pad_sequences(sequence, maxlen=2) + array([[0, 1], + [2, 3], + [5, 6]], dtype=int32) Args: - workers: Number of works. + sequences: List of sequences (each sequence is a list of integers). + maxlen: Optional Int, maximum length of all sequences. If not provided, + sequences will be padded to the length of the longest individual + sequence. + dtype: (Optional). Type of the output sequences. + To pad sequences with variable length strings, you can use `object`. + Defaults to `"int32"`. + padding: String, "pre" or "post" (optional): + pad either before or after each sequence. Defaults to `"pre"`. + truncating: String, "pre" or "post" (optional): + remove values from sequences larger than + `maxlen`, either at the beginning or at the end of the sequences. + Defaults to `"pre"`. + value: Float or String, padding value. (Optional). Defaults to `0.`. Returns: - A Function to initialize the pool - """ - def pool_fn(seqs): - pool = get_pool_class(True)( - workers, initializer=init_pool_generator, - initargs=(seqs, self.random_seed, get_worker_id_queue())) - _DATA_POOLS.add(pool) - return pool - return pool_fn - - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - self._send_sequence() # Share the initial generator - with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: - while True: - if self.stop_signal.is_set(): - return - - self.queue.put( - executor.apply_async(next_sample, (self.uid,)), block=True) - - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. + Numpy array with shape `(len(sequences), maxlen)` - Yields: - The next element in the queue, i.e. a tuple - `(inputs, targets)` or - `(inputs, targets, sample_weights)`. + Raises: + ValueError: In case of invalid values for `truncating` or `padding`, + or in case of invalid shape for a `sequences` entry. """ - try: - while self.is_running(): - inputs = self.queue.get(block=True).get() - self.queue.task_done() - if inputs is not None: - yield inputs - except StopIteration: - # Special case for finite generators - last_ones = [] - while self.queue.qsize() > 0: - last_ones.append(self.queue.get(block=True)) - # Wait for them to complete - for f in last_ones: - f.wait() - # Keep the good ones - last_ones = [future.get() for future in last_ones if future.successful()] - for inputs in last_ones: - if inputs is not None: - yield inputs - except Exception as e: # pylint: disable=broad-except - self.stop() - if 'generator already executing' in str(e): - raise RuntimeError( - 'Your generator is NOT thread-safe. ' - 'Keras requires a thread-safe generator when ' - '`use_multiprocessing=False, workers > 1`. ') - raise e - - -@keras_export('keras.utils.pad_sequences', - 'keras.preprocessing.sequence.pad_sequences') -def pad_sequences(sequences, maxlen=None, dtype='int32', - padding='pre', truncating='pre', value=0.): - """Pads sequences to the same length. - - This function transforms a list (of length `num_samples`) - of sequences (lists of integers) - into a 2D Numpy array of shape `(num_samples, num_timesteps)`. - `num_timesteps` is either the `maxlen` argument if provided, - or the length of the longest sequence in the list. - - Sequences that are shorter than `num_timesteps` - are padded with `value` until they are `num_timesteps` long. - - Sequences longer than `num_timesteps` are truncated - so that they fit the desired length. - - The position where padding or truncation happens is determined by - the arguments `padding` and `truncating`, respectively. - Pre-padding or removing values from the beginning of the sequence is the - default. - - >>> sequence = [[1], [2, 3], [4, 5, 6]] - >>> tf.keras.preprocessing.sequence.pad_sequences(sequence) - array([[0, 0, 1], - [0, 2, 3], - [4, 5, 6]], dtype=int32) - - >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, value=-1) - array([[-1, -1, 1], - [-1, 2, 3], - [ 4, 5, 6]], dtype=int32) - - >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, padding='post') - array([[1, 0, 0], - [2, 3, 0], - [4, 5, 6]], dtype=int32) - - >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=2) - array([[0, 1], - [2, 3], - [5, 6]], dtype=int32) - - Args: - sequences: List of sequences (each sequence is a list of integers). - maxlen: Optional Int, maximum length of all sequences. If not provided, - sequences will be padded to the length of the longest individual - sequence. - dtype: (Optional, defaults to `"int32"`). Type of the output sequences. - To pad sequences with variable length strings, you can use `object`. - padding: String, "pre" or "post" (optional, defaults to `"pre"`): - pad either before or after each sequence. - truncating: String, "pre" or "post" (optional, defaults to `"pre"`): - remove values from sequences larger than - `maxlen`, either at the beginning or at the end of the sequences. - value: Float or String, padding value. (Optional, defaults to 0.) - - Returns: - Numpy array with shape `(len(sequences), maxlen)` - - Raises: - ValueError: In case of invalid values for `truncating` or `padding`, - or in case of invalid shape for a `sequences` entry. - """ - if not hasattr(sequences, '__len__'): - raise ValueError('`sequences` must be iterable.') - num_samples = len(sequences) - - lengths = [] - sample_shape = () - flag = True - - # take the sample shape from the first non empty sequence - # checking for consistency in the main loop below. - - for x in sequences: - try: - lengths.append(len(x)) - if flag and len(x): - sample_shape = np.asarray(x).shape[1:] - flag = False - except TypeError as e: - raise ValueError('`sequences` must be a list of iterables. ' - f'Found non-iterable: {str(x)}') from e - - if maxlen is None: - maxlen = np.max(lengths) - - is_dtype_str = np.issubdtype(dtype, np.str_) or np.issubdtype( - dtype, np.unicode_) - if isinstance(value, str) and dtype != object and not is_dtype_str: - raise ValueError( - f'`dtype` {dtype} is not compatible with `value`\'s type: ' - f'{type(value)}\nYou should set `dtype=object` for variable length ' - 'strings.') - - x = np.full((num_samples, maxlen) + sample_shape, value, dtype=dtype) - for idx, s in enumerate(sequences): - if not len(s): # pylint: disable=g-explicit-length-test - continue # empty list/array was found - if truncating == 'pre': - trunc = s[-maxlen:] # pylint: disable=invalid-unary-operand-type - elif truncating == 'post': - trunc = s[:maxlen] - else: - raise ValueError(f'Truncating type "{truncating}" not understood') - - # check `trunc` has expected shape - trunc = np.asarray(trunc, dtype=dtype) - if trunc.shape[1:] != sample_shape: - raise ValueError(f'Shape of sample {trunc.shape[1:]} of sequence at ' - f'position {idx} is different from expected shape ' - f'{sample_shape}') - - if padding == 'post': - x[idx, :len(trunc)] = trunc - elif padding == 'pre': - x[idx, -len(trunc):] = trunc - else: - raise ValueError(f'Padding type "{padding}" not understood') - return x + if not hasattr(sequences, "__len__"): + raise ValueError("`sequences` must be iterable.") + num_samples = len(sequences) + + lengths = [] + sample_shape = () + flag = True + + # take the sample shape from the first non empty sequence + # checking for consistency in the main loop below. + + for x in sequences: + try: + lengths.append(len(x)) + if flag and len(x): + sample_shape = np.asarray(x).shape[1:] + flag = False + except TypeError as e: + raise ValueError( + "`sequences` must be a list of iterables. " + f"Found non-iterable: {str(x)}" + ) from e + + if maxlen is None: + maxlen = np.max(lengths) + + is_dtype_str = np.issubdtype(dtype, np.str_) or np.issubdtype( + dtype, np.unicode_ + ) + if isinstance(value, str) and dtype != object and not is_dtype_str: + raise ValueError( + f"`dtype` {dtype} is not compatible with `value`'s type: " + f"{type(value)}\nYou should set `dtype=object` for variable length " + "strings." + ) + + x = np.full((num_samples, maxlen) + sample_shape, value, dtype=dtype) + for idx, s in enumerate(sequences): + if not len(s): + continue # empty list/array was found + if truncating == "pre": + trunc = s[-maxlen:] + elif truncating == "post": + trunc = s[:maxlen] + else: + raise ValueError(f'Truncating type "{truncating}" not understood') + + # check `trunc` has expected shape + trunc = np.asarray(trunc, dtype=dtype) + if trunc.shape[1:] != sample_shape: + raise ValueError( + f"Shape of sample {trunc.shape[1:]} of sequence at " + f"position {idx} is different from expected shape " + f"{sample_shape}" + ) + + if padding == "post": + x[idx, : len(trunc)] = trunc + elif padding == "pre": + x[idx, -len(trunc) :] = trunc + else: + raise ValueError(f'Padding type "{padding}" not understood') + return x diff --git a/keras/utils/data_utils_test.py b/keras/utils/data_utils_test.py index 7374311a7437..093281cda85c 100644 --- a/keras/utils/data_utils_test.py +++ b/keras/utils/data_utils_test.py @@ -14,417 +14,503 @@ # ============================================================================== """Tests for data_utils.""" -import tensorflow.compat.v2 as tf - -from itertools import cycle import os import tarfile import urllib import zipfile +from itertools import cycle import numpy as np +import tensorflow.compat.v2 as tf import keras from keras.utils import data_utils class TestGetFile(tf.test.TestCase): - - def test_get_file_and_validate_it(self): - """Tests get_file from a url, plus extraction and validation. - """ - dest_dir = self.get_temp_dir() - orig_dir = self.get_temp_dir() - - text_file_path = os.path.join(orig_dir, 'test.txt') - zip_file_path = os.path.join(orig_dir, 'test.zip') - tar_file_path = os.path.join(orig_dir, 'test.tar.gz') - - with open(text_file_path, 'w') as text_file: - text_file.write('Float like a butterfly, sting like a bee.') - - with tarfile.open(tar_file_path, 'w:gz') as tar_file: - tar_file.add(text_file_path) - - with zipfile.ZipFile(zip_file_path, 'w') as zip_file: - zip_file.write(text_file_path) - - origin = urllib.parse.urljoin( - 'file://', urllib.request.pathname2url(os.path.abspath(tar_file_path))) - - path = keras.utils.data_utils.get_file('test.txt', origin, - untar=True, cache_subdir=dest_dir) - filepath = path + '.tar.gz' - hashval_sha256 = keras.utils.data_utils._hash_file(filepath) - hashval_md5 = keras.utils.data_utils._hash_file(filepath, algorithm='md5') - path = keras.utils.data_utils.get_file( - 'test.txt', origin, md5_hash=hashval_md5, - untar=True, cache_subdir=dest_dir) - path = keras.utils.data_utils.get_file( - filepath, origin, file_hash=hashval_sha256, - extract=True, cache_subdir=dest_dir) - self.assertTrue(os.path.exists(filepath)) - self.assertTrue(keras.utils.data_utils.validate_file(filepath, - hashval_sha256)) - self.assertTrue(keras.utils.data_utils.validate_file(filepath, hashval_md5)) - os.remove(filepath) - - origin = urllib.parse.urljoin( - 'file://', urllib.request.pathname2url(os.path.abspath(zip_file_path))) - - hashval_sha256 = keras.utils.data_utils._hash_file(zip_file_path) - hashval_md5 = keras.utils.data_utils._hash_file(zip_file_path, - algorithm='md5') - path = keras.utils.data_utils.get_file( - 'test', origin, md5_hash=hashval_md5, - extract=True, cache_subdir=dest_dir) - path = keras.utils.data_utils.get_file( - 'test', origin, file_hash=hashval_sha256, - extract=True, cache_subdir=dest_dir) - self.assertTrue(os.path.exists(path)) - self.assertTrue(keras.utils.data_utils.validate_file(path, hashval_sha256)) - self.assertTrue(keras.utils.data_utils.validate_file(path, hashval_md5)) - os.remove(path) - - for file_path, extract in [(text_file_path, False), (tar_file_path, True), - (zip_file_path, True)]: - origin = urllib.parse.urljoin( - 'file://', urllib.request.pathname2url(os.path.abspath(file_path))) - hashval_sha256 = keras.utils.data_utils._hash_file(file_path) - path = keras.utils.data_utils.get_file( - origin=origin, - file_hash=hashval_sha256, - extract=extract, - cache_subdir=dest_dir) - self.assertTrue(os.path.exists(path)) - self.assertTrue( - keras.utils.data_utils.validate_file(path, hashval_sha256)) - os.remove(path) - - with self.assertRaisesRegexp(ValueError, 'Please specify the "origin".*'): - _ = keras.utils.data_utils.get_file() - - def test_get_file_with_tgz_extension(self): - """Tests get_file from a url, plus extraction and validation.""" - dest_dir = self.get_temp_dir() - orig_dir = self.get_temp_dir() - - text_file_path = os.path.join(orig_dir, 'test.txt') - tar_file_path = os.path.join(orig_dir, 'test.tar.gz') - - with open(text_file_path, 'w') as text_file: - text_file.write('Float like a butterfly, sting like a bee.') - - with tarfile.open(tar_file_path, 'w:gz') as tar_file: - tar_file.add(text_file_path) - - origin = urllib.parse.urljoin( - 'file://', urllib.request.pathname2url(os.path.abspath(tar_file_path))) - - path = keras.utils.data_utils.get_file( - 'test.txt.tar.gz', origin, untar=True, cache_subdir=dest_dir) - self.assertEndsWith(path, '.txt') - self.assertTrue(os.path.exists(path)) - - def test_get_file_with_integrity_check(self): - """Tests get_file with validation before download.""" - orig_dir = self.get_temp_dir() - file_path = os.path.join(orig_dir, 'test.txt') - - with open(file_path, 'w') as text_file: - text_file.write('Float like a butterfly, sting like a bee.') - - hashval = keras.utils.data_utils._hash_file(file_path) - - origin = urllib.parse.urljoin( - 'file://', urllib.request.pathname2url(os.path.abspath(file_path))) - - path = keras.utils.data_utils.get_file( - 'test.txt', origin, file_hash=hashval) - self.assertTrue(os.path.exists(path)) - - def test_get_file_with_failed_integrity_check(self): - """Tests get_file with validation before download.""" - orig_dir = self.get_temp_dir() - file_path = os.path.join(orig_dir, 'test.txt') - - with open(file_path, 'w') as text_file: - text_file.write('Float like a butterfly, sting like a bee.') - - hashval = '0' * 64 - - origin = urllib.parse.urljoin( - 'file://', urllib.request.pathname2url(os.path.abspath(file_path))) - - with self.assertRaisesRegex(ValueError, 'Incomplete or corrupted file.*'): - _ = keras.utils.data_utils.get_file('test.txt', origin, file_hash=hashval) + def test_get_file_and_validate_it(self): + """Tests get_file from a url, plus extraction and validation.""" + dest_dir = self.get_temp_dir() + orig_dir = self.get_temp_dir() + + text_file_path = os.path.join(orig_dir, "test.txt") + zip_file_path = os.path.join(orig_dir, "test.zip") + tar_file_path = os.path.join(orig_dir, "test.tar.gz") + + with open(text_file_path, "w") as text_file: + text_file.write("Float like a butterfly, sting like a bee.") + + with tarfile.open(tar_file_path, "w:gz") as tar_file: + tar_file.add(text_file_path) + + with zipfile.ZipFile(zip_file_path, "w") as zip_file: + zip_file.write(text_file_path) + + origin = urllib.parse.urljoin( + "file://", + urllib.request.pathname2url(os.path.abspath(tar_file_path)), + ) + + path = keras.utils.data_utils.get_file( + "test.txt", origin, untar=True, cache_subdir=dest_dir + ) + filepath = path + ".tar.gz" + hashval_sha256 = keras.utils.data_utils._hash_file(filepath) + hashval_md5 = keras.utils.data_utils._hash_file( + filepath, algorithm="md5" + ) + path = keras.utils.data_utils.get_file( + "test.txt", + origin, + md5_hash=hashval_md5, + untar=True, + cache_subdir=dest_dir, + ) + path = keras.utils.data_utils.get_file( + filepath, + origin, + file_hash=hashval_sha256, + extract=True, + cache_subdir=dest_dir, + ) + self.assertTrue(os.path.exists(filepath)) + self.assertTrue( + keras.utils.data_utils.validate_file(filepath, hashval_sha256) + ) + self.assertTrue( + keras.utils.data_utils.validate_file(filepath, hashval_md5) + ) + os.remove(filepath) + + origin = urllib.parse.urljoin( + "file://", + urllib.request.pathname2url(os.path.abspath(zip_file_path)), + ) + + hashval_sha256 = keras.utils.data_utils._hash_file(zip_file_path) + hashval_md5 = keras.utils.data_utils._hash_file( + zip_file_path, algorithm="md5" + ) + path = keras.utils.data_utils.get_file( + "test", + origin, + md5_hash=hashval_md5, + extract=True, + cache_subdir=dest_dir, + ) + path = keras.utils.data_utils.get_file( + "test", + origin, + file_hash=hashval_sha256, + extract=True, + cache_subdir=dest_dir, + ) + self.assertTrue(os.path.exists(path)) + self.assertTrue( + keras.utils.data_utils.validate_file(path, hashval_sha256) + ) + self.assertTrue(keras.utils.data_utils.validate_file(path, hashval_md5)) + os.remove(path) + + for file_path, extract in [ + (text_file_path, False), + (tar_file_path, True), + (zip_file_path, True), + ]: + origin = urllib.parse.urljoin( + "file://", + urllib.request.pathname2url(os.path.abspath(file_path)), + ) + hashval_sha256 = keras.utils.data_utils._hash_file(file_path) + path = keras.utils.data_utils.get_file( + origin=origin, + file_hash=hashval_sha256, + extract=extract, + cache_subdir=dest_dir, + ) + self.assertTrue(os.path.exists(path)) + self.assertTrue( + keras.utils.data_utils.validate_file(path, hashval_sha256) + ) + os.remove(path) + + with self.assertRaisesRegexp( + ValueError, 'Please specify the "origin".*' + ): + _ = keras.utils.data_utils.get_file() + + def test_get_file_with_tgz_extension(self): + """Tests get_file from a url, plus extraction and validation.""" + dest_dir = self.get_temp_dir() + orig_dir = self.get_temp_dir() + + text_file_path = os.path.join(orig_dir, "test.txt") + tar_file_path = os.path.join(orig_dir, "test.tar.gz") + + with open(text_file_path, "w") as text_file: + text_file.write("Float like a butterfly, sting like a bee.") + + with tarfile.open(tar_file_path, "w:gz") as tar_file: + tar_file.add(text_file_path) + + origin = urllib.parse.urljoin( + "file://", + urllib.request.pathname2url(os.path.abspath(tar_file_path)), + ) + + path = keras.utils.data_utils.get_file( + "test.txt.tar.gz", origin, untar=True, cache_subdir=dest_dir + ) + self.assertEndsWith(path, ".txt") + self.assertTrue(os.path.exists(path)) + + def test_get_file_with_integrity_check(self): + """Tests get_file with validation before download.""" + orig_dir = self.get_temp_dir() + file_path = os.path.join(orig_dir, "test.txt") + + with open(file_path, "w") as text_file: + text_file.write("Float like a butterfly, sting like a bee.") + + hashval = keras.utils.data_utils._hash_file(file_path) + + origin = urllib.parse.urljoin( + "file://", urllib.request.pathname2url(os.path.abspath(file_path)) + ) + + path = keras.utils.data_utils.get_file( + "test.txt", origin, file_hash=hashval + ) + self.assertTrue(os.path.exists(path)) + + def test_get_file_with_failed_integrity_check(self): + """Tests get_file with validation before download.""" + orig_dir = self.get_temp_dir() + file_path = os.path.join(orig_dir, "test.txt") + + with open(file_path, "w") as text_file: + text_file.write("Float like a butterfly, sting like a bee.") + + hashval = "0" * 64 + + origin = urllib.parse.urljoin( + "file://", urllib.request.pathname2url(os.path.abspath(file_path)) + ) + + with self.assertRaisesRegex( + ValueError, "Incomplete or corrupted file.*" + ): + _ = keras.utils.data_utils.get_file( + "test.txt", origin, file_hash=hashval + ) class TestSequence(keras.utils.data_utils.Sequence): + def __init__(self, shape, value=1.0): + self.shape = shape + self.inner = value - def __init__(self, shape, value=1.): - self.shape = shape - self.inner = value - - def __getitem__(self, item): - return np.ones(self.shape, dtype=np.uint32) * item * self.inner + def __getitem__(self, item): + return np.ones(self.shape, dtype=np.uint32) * item * self.inner - def __len__(self): - return 100 + def __len__(self): + return 100 - def on_epoch_end(self): - self.inner *= 5.0 + def on_epoch_end(self): + self.inner *= 5.0 class FaultSequence(keras.utils.data_utils.Sequence): + def __getitem__(self, item): + raise IndexError(item, "item is not present") - def __getitem__(self, item): - raise IndexError(item, 'item is not present') - - def __len__(self): - return 100 + def __len__(self): + return 100 @data_utils.threadsafe_generator def create_generator_from_sequence_threads(ds): - for i in cycle(range(len(ds))): - yield ds[i] + for i in cycle(range(len(ds))): + yield ds[i] def create_generator_from_sequence_pcs(ds): - for i in cycle(range(len(ds))): - yield ds[i] + for i in cycle(range(len(ds))): + yield ds[i] class TestEnqueuers(tf.test.TestCase): - - def test_generator_enqueuer_threads(self): - enqueuer = keras.utils.data_utils.GeneratorEnqueuer( - create_generator_from_sequence_threads(TestSequence([3, 200, 200, 3])), - use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for _ in range(100): - acc.append(int(next(gen_output)[0, 0, 0, 0])) - - self.assertEqual(len(set(acc) - set(range(100))), 0) - enqueuer.stop() - - @data_utils.dont_use_multiprocessing_pool - def test_generator_enqueuer_processes(self): - enqueuer = keras.utils.data_utils.GeneratorEnqueuer( - create_generator_from_sequence_threads(TestSequence([3, 200, 200, 3])), - use_multiprocessing=True) - enqueuer.start(4, 10) - gen_output = enqueuer.get() - acc = [] - for _ in range(300): - acc.append(int(next(gen_output)[0, 0, 0, 0])) - self.assertNotEqual(acc, list(range(100))) - enqueuer.stop() - - def test_generator_enqueuer_fail_threads(self): - enqueuer = keras.utils.data_utils.GeneratorEnqueuer( - create_generator_from_sequence_threads(FaultSequence()), - use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with self.assertRaises(IndexError): - next(gen_output) - - @data_utils.dont_use_multiprocessing_pool - def test_generator_enqueuer_fail_processes(self): - enqueuer = keras.utils.data_utils.GeneratorEnqueuer( - create_generator_from_sequence_threads(FaultSequence()), - use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with self.assertRaises(IndexError): - next(gen_output) - - def test_ordered_enqueuer_threads(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - TestSequence([3, 200, 200, 3]), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for _ in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - self.assertEqual(acc, list(range(100))) - enqueuer.stop() - - @data_utils.dont_use_multiprocessing_pool - def test_ordered_enqueuer_processes(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - TestSequence([3, 200, 200, 3]), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for _ in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - self.assertEqual(acc, list(range(100))) - enqueuer.stop() - - def test_ordered_enqueuer_fail_threads(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - FaultSequence(), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with self.assertRaises(IndexError): - next(gen_output) - - @data_utils.dont_use_multiprocessing_pool - def test_ordered_enqueuer_fail_processes(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - FaultSequence(), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with self.assertRaises(IndexError): - next(gen_output) - - @data_utils.dont_use_multiprocessing_pool - def test_on_epoch_end_processes(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - TestSequence([3, 200, 200, 3]), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for _ in range(200): - acc.append(next(gen_output)[0, 0, 0, 0]) - # Check that order was keep in GeneratorEnqueuer with processes - self.assertEqual(acc[100:], list([k * 5 for k in range(100)])) - enqueuer.stop() - - @data_utils.dont_use_multiprocessing_pool - def test_context_switch(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - TestSequence([3, 200, 200, 3]), use_multiprocessing=True) - enqueuer2 = keras.utils.data_utils.OrderedEnqueuer( - TestSequence([3, 200, 200, 3], value=15), use_multiprocessing=True) - enqueuer.start(3, 10) - enqueuer2.start(3, 10) - gen_output = enqueuer.get() - gen_output2 = enqueuer2.get() - acc = [] - for _ in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - self.assertEqual(acc[-1], 99) - # One epoch is completed so enqueuer will switch the Sequence - - acc = [] - self.skipTest('b/145555807 flakily timing out.') - for _ in range(100): - acc.append(next(gen_output2)[0, 0, 0, 0]) - self.assertEqual(acc[-1], 99 * 15) - # One epoch has been completed so enqueuer2 will switch - - # Be sure that both Sequence were updated - self.assertEqual(next(gen_output)[0, 0, 0, 0], 0) - self.assertEqual(next(gen_output)[0, 0, 0, 0], 5) - self.assertEqual(next(gen_output2)[0, 0, 0, 0], 0) - self.assertEqual(next(gen_output2)[0, 0, 0, 0], 15 * 5) - - # Tear down everything - enqueuer.stop() - enqueuer2.stop() - - def test_on_epoch_end_threads(self): - enqueuer = keras.utils.data_utils.OrderedEnqueuer( - TestSequence([3, 200, 200, 3]), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for _ in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - acc = [] - for _ in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - # Check that order was keep in GeneratorEnqueuer with processes - self.assertEqual(acc, list([k * 5 for k in range(100)])) - enqueuer.stop() + def test_generator_enqueuer_threads(self): + enqueuer = keras.utils.data_utils.GeneratorEnqueuer( + create_generator_from_sequence_threads( + TestSequence([3, 200, 200, 3]) + ), + use_multiprocessing=False, + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(100): + acc.append(int(next(gen_output)[0, 0, 0, 0])) + + self.assertEqual(len(set(acc) - set(range(100))), 0) + enqueuer.stop() + + @data_utils.dont_use_multiprocessing_pool + def test_generator_enqueuer_processes(self): + enqueuer = keras.utils.data_utils.GeneratorEnqueuer( + create_generator_from_sequence_threads( + TestSequence([3, 200, 200, 3]) + ), + use_multiprocessing=True, + ) + enqueuer.start(4, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(300): + acc.append(int(next(gen_output)[0, 0, 0, 0])) + self.assertNotEqual(acc, list(range(100))) + enqueuer.stop() + + def test_generator_enqueuer_fail_threads(self): + enqueuer = keras.utils.data_utils.GeneratorEnqueuer( + create_generator_from_sequence_threads(FaultSequence()), + use_multiprocessing=False, + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + with self.assertRaises(IndexError): + next(gen_output) + + @data_utils.dont_use_multiprocessing_pool + def test_generator_enqueuer_fail_processes(self): + enqueuer = keras.utils.data_utils.GeneratorEnqueuer( + create_generator_from_sequence_threads(FaultSequence()), + use_multiprocessing=True, + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + with self.assertRaises(IndexError): + next(gen_output) + + def test_ordered_enqueuer_threads(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=False + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + self.assertEqual(acc, list(range(100))) + enqueuer.stop() + + @data_utils.dont_use_multiprocessing_pool + def test_ordered_enqueuer_processes(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=True + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + self.assertEqual(acc, list(range(100))) + enqueuer.stop() + + def test_ordered_enqueuer_fail_threads(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + FaultSequence(), use_multiprocessing=False + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + with self.assertRaises(IndexError): + next(gen_output) + + @data_utils.dont_use_multiprocessing_pool + def test_ordered_enqueuer_fail_processes(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + FaultSequence(), use_multiprocessing=True + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + with self.assertRaises(IndexError): + next(gen_output) + + @data_utils.dont_use_multiprocessing_pool + def test_on_epoch_end_processes(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=True + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(200): + acc.append(next(gen_output)[0, 0, 0, 0]) + # Check that order was keep in GeneratorEnqueuer with processes + self.assertEqual(acc[100:], list([k * 5 for k in range(100)])) + enqueuer.stop() + + @data_utils.dont_use_multiprocessing_pool + def test_context_switch(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=True + ) + enqueuer2 = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3], value=15), use_multiprocessing=True + ) + enqueuer.start(3, 10) + enqueuer2.start(3, 10) + gen_output = enqueuer.get() + gen_output2 = enqueuer2.get() + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + self.assertEqual(acc[-1], 99) + # One epoch is completed so enqueuer will switch the Sequence + + acc = [] + self.skipTest("b/145555807 flakily timing out.") + for _ in range(100): + acc.append(next(gen_output2)[0, 0, 0, 0]) + self.assertEqual(acc[-1], 99 * 15) + # One epoch has been completed so enqueuer2 will switch + + # Be sure that both Sequence were updated + self.assertEqual(next(gen_output)[0, 0, 0, 0], 0) + self.assertEqual(next(gen_output)[0, 0, 0, 0], 5) + self.assertEqual(next(gen_output2)[0, 0, 0, 0], 0) + self.assertEqual(next(gen_output2)[0, 0, 0, 0], 15 * 5) + + # Tear down everything + enqueuer.stop() + enqueuer2.stop() + + def test_on_epoch_end_threads(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=False + ) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + # Check that order was keep in GeneratorEnqueuer with processes + self.assertEqual(acc, list([k * 5 for k in range(100)])) + enqueuer.stop() class PadSequencesTest(tf.test.TestCase): - - def test_pad_sequences(self): - a = [[1], [1, 2], [1, 2, 3]] - - # test padding - b = data_utils.pad_sequences(a, maxlen=3, padding='pre') - self.assertAllClose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) - b = data_utils.pad_sequences(a, maxlen=3, padding='post') - self.assertAllClose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) - - # test truncating - b = data_utils.pad_sequences(a, maxlen=2, truncating='pre') - self.assertAllClose(b, [[0, 1], [1, 2], [2, 3]]) - b = data_utils.pad_sequences(a, maxlen=2, truncating='post') - self.assertAllClose(b, [[0, 1], [1, 2], [1, 2]]) - - # test value - b = data_utils.pad_sequences(a, maxlen=3, value=1) - self.assertAllClose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) - - def test_pad_sequences_str(self): - a = [['1'], ['1', '2'], ['1', '2', '3']] - - # test padding - b = data_utils.pad_sequences( - a, maxlen=3, padding='pre', value='pad', dtype=object) - self.assertAllEqual( - b, [['pad', 'pad', '1'], ['pad', '1', '2'], ['1', '2', '3']]) - b = data_utils.pad_sequences( - a, maxlen=3, padding='post', value='pad', dtype='>> data = np.random.random(size=(1000, 4)) + >>> left_ds, right_ds = tf.keras.utils.split_dataset(data, left_size=0.8) + >>> int(left_ds.cardinality()) + 800 + >>> int(right_ds.cardinality()) + 200 + + """ + dataset_type_spec = _get_type_spec(dataset) + + if dataset_type_spec not in [tf.data.Dataset, list, tuple, np.ndarray]: + raise TypeError( + "The `dataset` argument must be either a `tf.data.Dataset` " + "object or a list/tuple of arrays. " + f"Received: dataset={dataset} of type {type(dataset)}" + ) + + if right_size is None and left_size is None: + raise ValueError( + "At least one of the `left_size` or `right_size` " + "must be specified. Received: left_size=None and " + "right_size=None" + ) + + dataset_as_list = _convert_dataset_to_list(dataset, dataset_type_spec) + + if shuffle: + if seed is None: + seed = random.randint(0, int(1e6)) + random.seed(seed) + random.shuffle(dataset_as_list) + + total_length = len(dataset_as_list) + + left_size, right_size = _rescale_dataset_split_sizes( + left_size, right_size, total_length + ) + left_split = list(dataset_as_list[:left_size]) + right_split = list(dataset_as_list[-right_size:]) + + left_split = _restore_dataset_from_list( + left_split, dataset_type_spec, dataset + ) + right_split = _restore_dataset_from_list( + right_split, dataset_type_spec, dataset + ) + + left_split = tf.data.Dataset.from_tensor_slices(left_split) + right_split = tf.data.Dataset.from_tensor_slices(right_split) + + # apply batching to the splits if the dataset is batched + if dataset_type_spec is tf.data.Dataset and is_batched(dataset): + batch_size = get_batch_size(dataset) + if batch_size is not None: + left_split = left_split.batch(batch_size) + right_split = right_split.batch(batch_size) + + left_split = left_split.prefetch(tf.data.AUTOTUNE) + right_split = right_split.prefetch(tf.data.AUTOTUNE) + + return left_split, right_split + + +def _convert_dataset_to_list( + dataset, + dataset_type_spec, + data_size_warning_flag=True, + ensure_shape_similarity=True, +): + """Convert `tf.data.Dataset` object or list/tuple of NumPy arrays to a list. + + Args: + dataset : A `tf.data.Dataset` object or a list/tuple of arrays. + dataset_type_spec : the type of the dataset + data_size_warning_flag (bool, optional): If set to True, a warning will + be issued if the dataset takes longer than 10 seconds to iterate. + Defaults to `True`. + ensure_shape_similarity (bool, optional): If set to True, the shape of + the first sample will be used to validate the shape of rest of the + samples. Defaults to `True`. + + Returns: + List: A list of tuples/NumPy arrays. + """ + dataset_iterator = _get_data_iterator_from_dataset( + dataset, dataset_type_spec + ) + dataset_as_list = [] + + start_time = time.time() + for sample in _get_next_sample( + dataset_iterator, + ensure_shape_similarity, + data_size_warning_flag, + start_time, + ): + if dataset_type_spec in [tuple, list]: + # The try-except here is for NumPy 1.24 compatibility, see: + # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html + try: + arr = np.array(sample) + except ValueError: + arr = np.array(sample, dtype=object) + dataset_as_list.append(arr) + else: + dataset_as_list.append(sample) + + return dataset_as_list def _get_data_iterator_from_dataset(dataset, dataset_type_spec): - """Get the iterator from a dataset. - - Args: - dataset : A `tf.data.Dataset` object or a list/tuple of arrays. - dataset_type_spec : the type of the dataset - - Raises: - ValueError: - - If the dataset is empty. - - If the dataset is not a `tf.data.Dataset` object - or a list/tuple of arrays. - - If the dataset is a list/tuple of arrays and the - length of the list/tuple is not equal to the number - - Returns: - iterator: An `iterator` object. - """ - if dataset_type_spec == list: - if len(dataset) == 0: - raise ValueError('Received an empty list dataset. ' - 'Please provide a non-empty list of arrays.') - - if _get_type_spec(dataset[0]) is np.ndarray: - expected_shape = dataset[0].shape - for i, element in enumerate(dataset): - if np.array(element).shape[0] != expected_shape[0]: - raise ValueError('Received a list of NumPy arrays with different ' - f'lengths. Mismatch found at index {i}, ' - f'Expected shape={expected_shape} ' - f'Received shape={np.array(element).shape}.' - f'Please provide a list of NumPy arrays with ' - f'the same length.') - else: - raise ValueError('Expected a list of `numpy.ndarray` objects,' - f'Received: {type(dataset[0])}') - - return iter(zip(*dataset)) - elif dataset_type_spec == tuple: - if len(dataset) == 0: - raise ValueError('Received an empty list dataset.' - 'Please provide a non-empty tuple of arrays.') - - if _get_type_spec(dataset[0]) is np.ndarray: - expected_shape = dataset[0].shape - for i, element in enumerate(dataset): - if np.array(element).shape[0] != expected_shape[0]: - raise ValueError('Received a tuple of NumPy arrays with different ' - f'lengths. Mismatch found at index {i}, ' - f'Expected shape={expected_shape} ' - f'Received shape={np.array(element).shape}.' - f'Please provide a tuple of NumPy arrays with ' - 'the same length.') - else: - raise ValueError('Expected a tuple of `numpy.ndarray` objects, ' - f'Received: {type(dataset[0])}') - - return iter(zip(*dataset)) - elif dataset_type_spec == tf.data.Dataset: - if is_batched(dataset): - dataset = dataset.unbatch() - return iter(dataset) - elif dataset_type_spec == np.ndarray: - return iter(dataset) - - -def _get_next_sample(dataset_iterator, ensure_shape_similarity, - data_size_warning_flag, start_time): - """"Yield data samples from the `dataset_iterator`. - - Args: - dataset_iterator : An `iterator` object. - ensure_shape_similarity (bool, optional): If set to True, the shape of - the first sample will be used to validate the shape of rest of the - samples. Defaults to True. - data_size_warning_flag (bool, optional): If set to True, a warning will be - issued if the dataset takes longer than 10 seconds to iterate. Defaults - to True. - start_time (float): the start time of the dataset iteration. this is used - only if `data_size_warning_flag` is set to true. - - Raises: - ValueError: - If the dataset is empty. - - If `ensure_shape_similarity` is set to True and the - shape of the first sample is not equal to the shape of - atleast one of the rest of the samples. - - Yields: - data_sample: A tuple/list of numpy arrays. - """ - try: - dataset_iterator = iter(dataset_iterator) - first_sample = next(dataset_iterator) - if isinstance(first_sample, (tf.Tensor, np.ndarray)): - first_sample_shape = np.array(first_sample).shape - else: - first_sample_shape = None - ensure_shape_similarity = False - yield first_sample - except StopIteration: - raise ValueError('Received an empty Dataset. `dataset` must ' - 'be a non-empty list/tuple of `numpy.ndarray` objects ' - 'or `tf.data.Dataset` objects.') - - for i, sample in enumerate(dataset_iterator): - if ensure_shape_similarity: - if first_sample_shape != np.array(sample).shape: - raise ValueError('All `dataset` samples must have same shape, ' - f'Expected shape: {np.array(first_sample).shape} ' - f'Received shape: {np.array(sample).shape} at index ' - f'{i}.') - if data_size_warning_flag: - if i % 10 == 0: - cur_time = time.time() - # warns user if the dataset is too large to iterate within 10s - if int(cur_time - start_time) > 10 and data_size_warning_flag: - warnings.warn( - 'The dataset is taking longer than 10 seconds to ' - 'iterate over. This may be due to the size of the dataset. ' - 'Keep in mind that the `split_dataset` utility is only for ' - 'small in-memory dataset (e.g. < 10,000 samples).', - category=ResourceWarning, - source='split_dataset') - data_size_warning_flag = False - yield sample - - -def _restore_dataset_from_list(dataset_as_list, dataset_type_spec, - original_dataset): - """Restore the dataset from the list of arrays.""" - if dataset_type_spec in [tuple, list]: - return tuple(np.array(sample) for sample in zip(*dataset_as_list)) - elif dataset_type_spec == tf.data.Dataset: - if isinstance(original_dataset.element_spec, dict): - restored_dataset = {} - for d in dataset_as_list: - for k, v in d.items(): - if k not in restored_dataset: - restored_dataset[k] = [v] - else: - restored_dataset[k].append(v) - return restored_dataset - else: - return tuple(np.array(sample) for sample in zip(*dataset_as_list)) - return dataset_as_list + """Get the iterator from a dataset. + + Args: + dataset : A `tf.data.Dataset` object or a list/tuple of arrays. + dataset_type_spec : the type of the dataset + + Raises: + ValueError: + - If the dataset is empty. + - If the dataset is not a `tf.data.Dataset` object + or a list/tuple of arrays. + - If the dataset is a list/tuple of arrays and the + length of the list/tuple is not equal to the number + + Returns: + iterator: An `iterator` object. + """ + if dataset_type_spec == list: + if len(dataset) == 0: + raise ValueError( + "Received an empty list dataset. " + "Please provide a non-empty list of arrays." + ) + + if _get_type_spec(dataset[0]) is np.ndarray: + expected_shape = dataset[0].shape + for i, element in enumerate(dataset): + if np.array(element).shape[0] != expected_shape[0]: + raise ValueError( + "Received a list of NumPy arrays with different " + f"lengths. Mismatch found at index {i}, " + f"Expected shape={expected_shape} " + f"Received shape={np.array(element).shape}." + "Please provide a list of NumPy arrays with " + "the same length." + ) + else: + raise ValueError( + "Expected a list of `numpy.ndarray` objects," + f"Received: {type(dataset[0])}" + ) + + return iter(zip(*dataset)) + elif dataset_type_spec == tuple: + if len(dataset) == 0: + raise ValueError( + "Received an empty list dataset." + "Please provide a non-empty tuple of arrays." + ) + + if _get_type_spec(dataset[0]) is np.ndarray: + expected_shape = dataset[0].shape + for i, element in enumerate(dataset): + if np.array(element).shape[0] != expected_shape[0]: + raise ValueError( + "Received a tuple of NumPy arrays with different " + f"lengths. Mismatch found at index {i}, " + f"Expected shape={expected_shape} " + f"Received shape={np.array(element).shape}." + "Please provide a tuple of NumPy arrays with " + "the same length." + ) + else: + raise ValueError( + "Expected a tuple of `numpy.ndarray` objects, " + f"Received: {type(dataset[0])}" + ) + + return iter(zip(*dataset)) + elif dataset_type_spec == tf.data.Dataset: + if is_batched(dataset): + dataset = dataset.unbatch() + return iter(dataset) + elif dataset_type_spec == np.ndarray: + return iter(dataset) + + +def _get_next_sample( + dataset_iterator, + ensure_shape_similarity, + data_size_warning_flag, + start_time, +): + """ "Yield data samples from the `dataset_iterator`. + + Args: + dataset_iterator : An `iterator` object. + ensure_shape_similarity (bool, optional): If set to True, the shape of + the first sample will be used to validate the shape of rest of the + samples. Defaults to `True`. + data_size_warning_flag (bool, optional): If set to True, a warning will + be issued if the dataset takes longer than 10 seconds to iterate. + Defaults to `True`. + start_time (float): the start time of the dataset iteration. this is + used only if `data_size_warning_flag` is set to true. + + Raises: + ValueError: - If the dataset is empty. + - If `ensure_shape_similarity` is set to True and the + shape of the first sample is not equal to the shape of + atleast one of the rest of the samples. + + Yields: + data_sample: A tuple/list of numpy arrays. + """ + try: + dataset_iterator = iter(dataset_iterator) + first_sample = next(dataset_iterator) + if isinstance(first_sample, (tf.Tensor, np.ndarray)): + first_sample_shape = np.array(first_sample).shape + else: + first_sample_shape = None + ensure_shape_similarity = False + yield first_sample + except StopIteration: + raise ValueError( + "Received an empty Dataset. `dataset` must " + "be a non-empty list/tuple of `numpy.ndarray` objects " + "or `tf.data.Dataset` objects." + ) + + for i, sample in enumerate(dataset_iterator): + if ensure_shape_similarity: + if first_sample_shape != np.array(sample).shape: + raise ValueError( + "All `dataset` samples must have same shape, " + f"Expected shape: {np.array(first_sample).shape} " + f"Received shape: {np.array(sample).shape} at index " + f"{i}." + ) + if data_size_warning_flag: + if i % 10 == 0: + cur_time = time.time() + # warns user if the dataset is too large to iterate within 10s + if int(cur_time - start_time) > 10 and data_size_warning_flag: + warnings.warn( + "The dataset is taking longer than 10 seconds to " + "iterate over. This may be due to the size of the " + "dataset. Keep in mind that the `split_dataset` " + "utility is only for small in-memory dataset " + "(e.g. < 10,000 samples).", + category=ResourceWarning, + source="split_dataset", + ) + data_size_warning_flag = False + yield sample + + +def _restore_dataset_from_list( + dataset_as_list, dataset_type_spec, original_dataset +): + """Restore the dataset from the list of arrays.""" + if dataset_type_spec in [tuple, list]: + return tuple(np.array(sample) for sample in zip(*dataset_as_list)) + elif dataset_type_spec == tf.data.Dataset: + if isinstance(original_dataset.element_spec, dict): + restored_dataset = {} + for d in dataset_as_list: + for k, v in d.items(): + if k not in restored_dataset: + restored_dataset[k] = [v] + else: + restored_dataset[k].append(v) + return restored_dataset + else: + return tuple(np.array(sample) for sample in zip(*dataset_as_list)) + return dataset_as_list def _rescale_dataset_split_sizes(left_size, right_size, total_length): - """Rescale the dataset split sizes. - - We want to ensure that the sum of - the split sizes is equal to the total length of the dataset. - - Args: - left_size : The size of the left dataset split. - right_size : The size of the right dataset split. - total_length : The total length of the dataset. - - Raises: - TypeError: - If `left_size` or `right_size` is not an integer or float. - ValueError: - If `left_size` or `right_size` is negative or greater - than 1 or greater than `total_length`. - - Returns: - tuple: A tuple of rescaled left_size and right_size - """ - left_size_type = type(left_size) - right_size_type = type(right_size) - - # check both left_size and right_size are integers or floats - if ((left_size is not None and left_size_type not in [int, float]) and - (right_size is not None and right_size_type not in [int, float])): - raise TypeError('Invalid `left_size` and `right_size` Types. Expected: ' - 'integer or float or None, Received: type(left_size)=' - f'{left_size_type} and type(right_size)={right_size_type}') - - # check left_size is a integer or float - if left_size is not None and left_size_type not in [int, float]: - raise TypeError('Invalid `left_size` Type. Expected: int or float or None, ' - f'Received: type(left_size)={left_size_type}. ') - - # check right_size is a integer or float - if right_size is not None and right_size_type not in [int, float]: - raise TypeError(f'Invalid `right_size` Type. ' - 'Expected: int or float or None,' - f'Received: type(right_size)={right_size_type}.') - - # check left_size and right_size are non-zero - if left_size == 0 and right_size == 0: - raise ValueError('Both `left_size` and `right_size` are zero. ' - 'At least one of the split sizes must be non-zero.') - - # check left_size is non-negative and less than 1 and less than total_length - if (left_size_type == int and (left_size <= 0 or left_size >= total_length) or - left_size_type == float and (left_size <= 0 or left_size >= 1)): - raise ValueError('`left_size` should be either a positive integer ' - f'smaller than {total_length}, or a float ' - 'within the range `[0, 1]`. Received: left_size=' - f'{left_size}') - - # check right_size is non-negative and less than 1 and less than total_length - if (right_size_type == int and - (right_size <= 0 or right_size >= total_length) or - right_size_type == float and (right_size <= 0 or right_size >= 1)): - raise ValueError('`right_size` should be either a positive integer ' - f'and smaller than {total_length} or a float ' - 'within the range `[0, 1]`. Received: right_size=' - f'{right_size}') - - # check sum of left_size and right_size is less than or equal to total_length - if right_size_type == left_size_type == float and right_size + left_size > 1: - raise ValueError('The sum of `left_size` and `right_size` is greater ' - 'than 1. It must be less than or equal to 1.') - - if left_size_type == float: - left_size = round(left_size * total_length) - elif left_size_type == int: - left_size = float(left_size) - - if right_size_type == float: - right_size = round(right_size * total_length) - elif right_size_type == int: - right_size = float(right_size) - - if left_size is None: - left_size = total_length - right_size - elif right_size is None: - right_size = total_length - left_size - - if left_size + right_size > total_length: - raise ValueError( - 'The sum of `left_size` and `right_size` should ' - 'be smaller than the {total_length}. ' - f'Received: left_size + right_size = {left_size+right_size}' - f'and total_length = {total_length}') - - for split, side in [(left_size, 'left'), (right_size, 'right')]: - if split == 0: - raise ValueError(f'With `dataset` of length={total_length}, `left_size`=' - f'{left_size} and `right_size`={right_size}.' - f'Resulting {side} side dataset split will be empty. ' - 'Adjust any of the aforementioned parameters') - - left_size, right_size = int(left_size), int(right_size) - return left_size, right_size + """Rescale the dataset split sizes. + + We want to ensure that the sum of + the split sizes is equal to the total length of the dataset. + + Args: + left_size : The size of the left dataset split. + right_size : The size of the right dataset split. + total_length : The total length of the dataset. + + Raises: + TypeError: - If `left_size` or `right_size` is not an integer or float. + ValueError: - If `left_size` or `right_size` is negative or greater + than 1 or greater than `total_length`. + + Returns: + tuple: A tuple of rescaled left_size and right_size + """ + left_size_type = type(left_size) + right_size_type = type(right_size) + + # check both left_size and right_size are integers or floats + if (left_size is not None and left_size_type not in [int, float]) and ( + right_size is not None and right_size_type not in [int, float] + ): + raise TypeError( + "Invalid `left_size` and `right_size` Types. Expected: " + "integer or float or None, Received: type(left_size)=" + f"{left_size_type} and type(right_size)={right_size_type}" + ) + + # check left_size is a integer or float + if left_size is not None and left_size_type not in [int, float]: + raise TypeError( + "Invalid `left_size` Type. Expected: int or float or None, " + f"Received: type(left_size)={left_size_type}. " + ) + + # check right_size is a integer or float + if right_size is not None and right_size_type not in [int, float]: + raise TypeError( + "Invalid `right_size` Type. " + "Expected: int or float or None," + f"Received: type(right_size)={right_size_type}." + ) + + # check left_size and right_size are non-zero + if left_size == 0 and right_size == 0: + raise ValueError( + "Both `left_size` and `right_size` are zero. " + "At least one of the split sizes must be non-zero." + ) + + # check left_size is non-negative and less than 1 and less than total_length + if ( + left_size_type == int + and (left_size <= 0 or left_size >= total_length) + or left_size_type == float + and (left_size <= 0 or left_size >= 1) + ): + raise ValueError( + "`left_size` should be either a positive integer " + f"smaller than {total_length}, or a float " + "within the range `[0, 1]`. Received: left_size=" + f"{left_size}" + ) + + # check right_size is non-negative and less than 1 and less than + # total_length + if ( + right_size_type == int + and (right_size <= 0 or right_size >= total_length) + or right_size_type == float + and (right_size <= 0 or right_size >= 1) + ): + raise ValueError( + "`right_size` should be either a positive integer " + f"and smaller than {total_length} or a float " + "within the range `[0, 1]`. Received: right_size=" + f"{right_size}" + ) + + # check sum of left_size and right_size is less than or equal to + # total_length + if ( + right_size_type == left_size_type == float + and right_size + left_size > 1 + ): + raise ValueError( + "The sum of `left_size` and `right_size` is greater " + "than 1. It must be less than or equal to 1." + ) + + if left_size_type == float: + left_size = round(left_size * total_length) + elif left_size_type == int: + left_size = float(left_size) + + if right_size_type == float: + right_size = round(right_size * total_length) + elif right_size_type == int: + right_size = float(right_size) + + if left_size is None: + left_size = total_length - right_size + elif right_size is None: + right_size = total_length - left_size + + if left_size + right_size > total_length: + raise ValueError( + "The sum of `left_size` and `right_size` should " + "be smaller than the {total_length}. " + f"Received: left_size + right_size = {left_size+right_size}" + f"and total_length = {total_length}" + ) + + for split, side in [(left_size, "left"), (right_size, "right")]: + if split == 0: + raise ValueError( + f"With `dataset` of length={total_length}, `left_size`=" + f"{left_size} and `right_size`={right_size}." + f"Resulting {side} side dataset split will be empty. " + "Adjust any of the aforementioned parameters" + ) + + left_size, right_size = int(left_size), int(right_size) + return left_size, right_size def _get_type_spec(dataset): - """Get the type spec of the dataset.""" - if isinstance(dataset, tuple): - return tuple - elif isinstance(dataset, list): - return list - elif isinstance(dataset, np.ndarray): - return np.ndarray - elif isinstance(dataset, dict): - return dict - elif isinstance(dataset, tf.data.Dataset): - return tf.data.Dataset - else: - return None + """Get the type spec of the dataset.""" + if isinstance(dataset, tuple): + return tuple + elif isinstance(dataset, list): + return list + elif isinstance(dataset, np.ndarray): + return np.ndarray + elif isinstance(dataset, dict): + return dict + elif isinstance(dataset, tf.data.Dataset): + return tf.data.Dataset + else: + return None def is_batched(tf_dataset): - """"Check if the `tf.data.Dataset` is batched.""" - try: - return tf_dataset.__class__.__name__ == 'BatchDataset' - except AttributeError: - return False + """ "Check if the `tf.data.Dataset` is batched.""" + return hasattr(tf_dataset, "_batch_size") def get_batch_size(tf_dataset): - """Get the batch size of the dataset.""" - if is_batched(tf_dataset): - return tf_dataset._batch_size # pylint: disable=protected-access - else: - return None - - -def index_directory(directory, - labels, - formats, - class_names=None, - shuffle=True, - seed=None, - follow_links=False): - """Make list of all files in the subdirs of `directory`, with their labels. - - Args: - directory: The target directory (string). - labels: Either "inferred" - (labels are generated from the directory structure), - None (no labels), - or a list/tuple of integer labels of the same size as the number of - valid files found in the directory. Labels should be sorted according - to the alphanumeric order of the image file paths - (obtained via `os.walk(directory)` in Python). - formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt"). - class_names: Only valid if "labels" is "inferred". This is the explicit - list of class names (must match names of subdirectories). Used - to control the order of the classes - (otherwise alphanumerical order is used). - shuffle: Whether to shuffle the data. Default: True. - If set to False, sorts the data in alphanumeric order. - seed: Optional random seed for shuffling. - follow_links: Whether to visits subdirectories pointed to by symlinks. - - Returns: - tuple (file_paths, labels, class_names). - file_paths: list of file paths (strings). - labels: list of matching integer labels (same length as file_paths) - class_names: names of the classes corresponding to these labels, in order. - """ - if labels is None: - # in the no-label case, index from the parent directory down. - subdirs = [''] - class_names = subdirs - else: - subdirs = [] - for subdir in sorted(tf.io.gfile.listdir(directory)): - if tf.io.gfile.isdir(tf.io.gfile.join(directory, subdir)): - if subdir.endswith('/'): - subdir = subdir[:-1] - subdirs.append(subdir) - if not class_names: - class_names = subdirs + """Get the batch size of the dataset.""" + if is_batched(tf_dataset): + return tf_dataset._batch_size else: - if set(class_names) != set(subdirs): - raise ValueError( - 'The `class_names` passed did not match the ' - 'names of the subdirectories of the target directory. ' - f'Expected: {subdirs}, but received: {class_names}') - class_indices = dict(zip(class_names, range(len(class_names)))) - - # Build an index of the files - # in the different class subfolders. - pool = multiprocessing.pool.ThreadPool() - results = [] - filenames = [] - - for dirpath in (tf.io.gfile.join(directory, subdir) for subdir in subdirs): - results.append( - pool.apply_async(index_subdirectory, - (dirpath, class_indices, follow_links, formats))) - labels_list = [] - for res in results: - partial_filenames, partial_labels = res.get() - labels_list.append(partial_labels) - filenames += partial_filenames - if labels not in ('inferred', None): - if len(labels) != len(filenames): - raise ValueError('Expected the lengths of `labels` to match the number ' - 'of files in the target directory. len(labels) is ' - f'{len(labels)} while we found {len(filenames)} files ' - f'in directory {directory}.') - else: - i = 0 - labels = np.zeros((len(filenames),), dtype='int32') - for partial_labels in labels_list: - labels[i:i + len(partial_labels)] = partial_labels - i += len(partial_labels) - - if labels is None: - print(f'Found {len(filenames)} files.') - else: - print(f'Found {len(filenames)} files belonging ' - f'to {len(class_names)} classes.') - pool.close() - pool.join() - file_paths = [tf.io.gfile.join(directory, fname) for fname in filenames] - - if shuffle: - # Shuffle globally to erase macro-structure - if seed is None: - seed = np.random.randint(1e6) - rng = np.random.RandomState(seed) - rng.shuffle(file_paths) - rng = np.random.RandomState(seed) - rng.shuffle(labels) - return file_paths, labels, class_names + return None + + +def index_directory( + directory, + labels, + formats, + class_names=None, + shuffle=True, + seed=None, + follow_links=False, +): + """Make list of all files in `directory`, with their labels. + + Args: + directory: Directory where the data is located. + If `labels` is "inferred", it should contain + subdirectories, each containing files for a class. + Otherwise, the directory structure is ignored. + labels: Either "inferred" + (labels are generated from the directory structure), + None (no labels), + or a list/tuple of integer labels of the same size as the number of + valid files found in the directory. Labels should be sorted according + to the alphanumeric order of the image file paths + (obtained via `os.walk(directory)` in Python). + formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt"). + class_names: Only valid if "labels" is "inferred". This is the explicit + list of class names (must match names of subdirectories). Used + to control the order of the classes + (otherwise alphanumerical order is used). + shuffle: Whether to shuffle the data. Default: True. + If set to False, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling. + follow_links: Whether to visits subdirectories pointed to by symlinks. + + Returns: + tuple (file_paths, labels, class_names). + file_paths: list of file paths (strings). + labels: list of matching integer labels (same length as file_paths) + class_names: names of the classes corresponding to these labels, in + order. + """ + if labels != "inferred": + # in the explicit/no-label cases, index from the parent directory down. + subdirs = [""] + class_names = subdirs + else: + subdirs = [] + for subdir in sorted(tf.io.gfile.listdir(directory)): + if tf.io.gfile.isdir(tf.io.gfile.join(directory, subdir)): + if not subdir.startswith("."): + if subdir.endswith("/"): + subdir = subdir[:-1] + subdirs.append(subdir) + if not class_names: + class_names = subdirs + else: + if set(class_names) != set(subdirs): + raise ValueError( + "The `class_names` passed did not match the " + "names of the subdirectories of the target directory. " + f"Expected: {subdirs}, but received: {class_names}" + ) + class_indices = dict(zip(class_names, range(len(class_names)))) + + # Build an index of the files + # in the different class subfolders. + pool = multiprocessing.pool.ThreadPool() + results = [] + filenames = [] + + for dirpath in (tf.io.gfile.join(directory, subdir) for subdir in subdirs): + results.append( + pool.apply_async( + index_subdirectory, + (dirpath, class_indices, follow_links, formats), + ) + ) + labels_list = [] + for res in results: + partial_filenames, partial_labels = res.get() + labels_list.append(partial_labels) + filenames += partial_filenames + if labels not in ("inferred", None): + if len(labels) != len(filenames): + raise ValueError( + "Expected the lengths of `labels` to match the number " + "of files in the target directory. len(labels) is " + f"{len(labels)} while we found {len(filenames)} files " + f"in directory {directory}." + ) + class_names = sorted(set(labels)) + else: + i = 0 + labels = np.zeros((len(filenames),), dtype="int32") + for partial_labels in labels_list: + labels[i : i + len(partial_labels)] = partial_labels + i += len(partial_labels) + + if labels is None: + io_utils.print_msg(f"Found {len(filenames)} files.") + else: + io_utils.print_msg( + f"Found {len(filenames)} files belonging " + f"to {len(class_names)} classes." + ) + pool.close() + pool.join() + file_paths = [tf.io.gfile.join(directory, fname) for fname in filenames] + + if shuffle: + # Shuffle globally to erase macro-structure + if seed is None: + seed = np.random.randint(1e6) + rng = np.random.RandomState(seed) + rng.shuffle(file_paths) + rng = np.random.RandomState(seed) + rng.shuffle(labels) + return file_paths, labels, class_names def iter_valid_files(directory, follow_links, formats): - if not follow_links: - walk = tf.io.gfile.walk(directory) - else: - walk = os.walk(directory, followlinks=follow_links) - for root, _, files in sorted(walk, key=lambda x: x[0]): - for fname in sorted(files): - if fname.lower().endswith(formats): - yield root, fname + if not follow_links: + walk = tf.io.gfile.walk(directory) + else: + walk = os.walk(directory, followlinks=follow_links) + for root, _, files in sorted(walk, key=lambda x: x[0]): + for fname in sorted(files): + if fname.lower().endswith(formats): + yield root, fname def index_subdirectory(directory, class_indices, follow_links, formats): - """Recursively walks directory and list image paths and their class index. - - Args: - directory: string, target directory. - class_indices: dict mapping class names to their index. - follow_links: boolean, whether to recursively follow subdirectories - (if False, we only list top-level images in `directory`). - formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt"). - - Returns: - tuple `(filenames, labels)`. `filenames` is a list of relative file - paths, and `labels` is a list of integer labels corresponding to these - files. - """ - dirname = os.path.basename(directory) - valid_files = iter_valid_files(directory, follow_links, formats) - labels = [] - filenames = [] - for root, fname in valid_files: - labels.append(class_indices[dirname]) - absolute_path = tf.io.gfile.join(root, fname) - relative_path = tf.io.gfile.join( - dirname, os.path.relpath(absolute_path, directory)) - filenames.append(relative_path) - return filenames, labels + """Recursively walks directory and list image paths and their class index. + + Args: + directory: string, target directory. + class_indices: dict mapping class names to their index. + follow_links: boolean, whether to recursively follow subdirectories + (if False, we only list top-level images in `directory`). + formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt"). + + Returns: + tuple `(filenames, labels)`. `filenames` is a list of relative file + paths, and `labels` is a list of integer labels corresponding to these + files. + """ + dirname = os.path.basename(directory) + valid_files = iter_valid_files(directory, follow_links, formats) + labels = [] + filenames = [] + for root, fname in valid_files: + labels.append(class_indices[dirname]) + absolute_path = tf.io.gfile.join(root, fname) + relative_path = tf.io.gfile.join( + dirname, os.path.relpath(absolute_path, directory) + ) + filenames.append(relative_path) + return filenames, labels def get_training_or_validation_split(samples, labels, validation_split, subset): - """Potentially restict samples & labels to a training or validation split. - - Args: - samples: List of elements. - labels: List of corresponding labels. - validation_split: Float, fraction of data to reserve for validation. - subset: Subset of the data to return. - Either "training", "validation", or None. If None, we return all of the - data. - - Returns: - tuple (samples, labels), potentially restricted to the specified subset. - """ - if not validation_split: + """Potentially restict samples & labels to a training or validation split. + + Args: + samples: List of elements. + labels: List of corresponding labels. + validation_split: Float, fraction of data to reserve for validation. + subset: Subset of the data to return. + Either "training", "validation", or None. If None, we return all of the + data. + + Returns: + tuple (samples, labels), potentially restricted to the specified subset. + """ + if not validation_split: + return samples, labels + + num_val_samples = int(validation_split * len(samples)) + if subset == "training": + io_utils.print_msg( + f"Using {len(samples) - num_val_samples} " f"files for training." + ) + samples = samples[:-num_val_samples] + labels = labels[:-num_val_samples] + elif subset == "validation": + io_utils.print_msg(f"Using {num_val_samples} files for validation.") + samples = samples[-num_val_samples:] + labels = labels[-num_val_samples:] + else: + raise ValueError( + '`subset` must be either "training" ' + f'or "validation", received: {subset}' + ) return samples, labels - num_val_samples = int(validation_split * len(samples)) - if subset == 'training': - print(f'Using {len(samples) - num_val_samples} files for training.') - samples = samples[:-num_val_samples] - labels = labels[:-num_val_samples] - elif subset == 'validation': - print(f'Using {num_val_samples} files for validation.') - samples = samples[-num_val_samples:] - labels = labels[-num_val_samples:] - else: - raise ValueError('`subset` must be either "training" ' - f'or "validation", received: {subset}') - return samples, labels - def labels_to_dataset(labels, label_mode, num_classes): - """Create a tf.data.Dataset from the list/tuple of labels. - - Args: - labels: list/tuple of labels to be converted into a tf.data.Dataset. - label_mode: String describing the encoding of `labels`. Options are: - - 'binary' indicates that the labels (there can be only 2) are encoded as - `float32` scalars with values 0 or 1 (e.g. for `binary_crossentropy`). - - 'categorical' means that the labels are mapped into a categorical vector. - (e.g. for `categorical_crossentropy` loss). - num_classes: number of classes of labels. - - Returns: - A `Dataset` instance. - """ - label_ds = tf.data.Dataset.from_tensor_slices(labels) - if label_mode == 'binary': - label_ds = label_ds.map( - lambda x: tf.expand_dims(tf.cast(x, 'float32'), axis=-1), - num_parallel_calls=tf.data.AUTOTUNE) - elif label_mode == 'categorical': - label_ds = label_ds.map(lambda x: tf.one_hot(x, num_classes), - num_parallel_calls=tf.data.AUTOTUNE) - return label_ds + """Create a tf.data.Dataset from the list/tuple of labels. + + Args: + labels: list/tuple of labels to be converted into a tf.data.Dataset. + label_mode: String describing the encoding of `labels`. Options are: + - 'binary' indicates that the labels (there can be only 2) are encoded as + `float32` scalars with values 0 or 1 (e.g. for `binary_crossentropy`). + - 'categorical' means that the labels are mapped into a categorical + vector. (e.g. for `categorical_crossentropy` loss). + num_classes: number of classes of labels. + + Returns: + A `Dataset` instance. + """ + label_ds = tf.data.Dataset.from_tensor_slices(labels) + if label_mode == "binary": + label_ds = label_ds.map( + lambda x: tf.expand_dims(tf.cast(x, "float32"), axis=-1), + num_parallel_calls=tf.data.AUTOTUNE, + ) + elif label_mode == "categorical": + label_ds = label_ds.map( + lambda x: tf.one_hot(x, num_classes), + num_parallel_calls=tf.data.AUTOTUNE, + ) + return label_ds def check_validation_split_arg(validation_split, subset, shuffle, seed): - """Raise errors in case of invalid argument values. - - Args: - validation_split: float between 0 and 1, fraction of data to reserve for - validation. - subset: One of "training", "validation" or "both". Only used if - `validation_split` is set. - shuffle: Whether to shuffle the data. Either True or False. - seed: random seed for shuffling and transformations. - """ - if validation_split and not 0 < validation_split < 1: - raise ValueError( - '`validation_split` must be between 0 and 1, ' - f'received: {validation_split}') - if (validation_split or subset) and not (validation_split and subset): - raise ValueError( - 'If `subset` is set, `validation_split` must be set, and inversely.') - if subset not in ('training', 'validation', 'both', None): - raise ValueError('`subset` must be either "training", ' - f'"validation" or "both", received: {subset}') - if validation_split and shuffle and seed is None: - raise ValueError( - 'If using `validation_split` and shuffling the data, you must provide ' - 'a `seed` argument, to make sure that there is no overlap between the ' - 'training and validation subset.') + """Raise errors in case of invalid argument values. + + Args: + validation_split: float between 0 and 1, fraction of data to reserve for + validation. + subset: One of "training", "validation" or "both". Only used if + `validation_split` is set. + shuffle: Whether to shuffle the data. Either True or False. + seed: random seed for shuffling and transformations. + """ + if validation_split and not 0 < validation_split < 1: + raise ValueError( + "`validation_split` must be between 0 and 1, " + f"received: {validation_split}" + ) + if (validation_split or subset) and not (validation_split and subset): + raise ValueError( + "If `subset` is set, `validation_split` must be set, and inversely." + ) + if subset not in ("training", "validation", "both", None): + raise ValueError( + '`subset` must be either "training", ' + f'"validation" or "both", received: {subset}' + ) + if validation_split and shuffle and seed is None: + raise ValueError( + "If using `validation_split` and shuffling the data, you must " + "provide a `seed` argument, to make sure that there is no " + "overlap between the training and validation subset." + ) diff --git a/keras/utils/dataset_utils_test.py b/keras/utils/dataset_utils_test.py index 43bfc3fad263..1de07df756bc 100644 --- a/keras/utils/dataset_utils_test.py +++ b/keras/utils/dataset_utils_test.py @@ -1,457 +1,593 @@ """Tests for Dataset Utils""" -import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes +import os +import shutil import numpy as np -from keras.datasets import mnist +import tensorflow.compat.v2 as tf + from keras.testing_infra import test_utils from keras.utils import dataset_utils @test_utils.run_v2_only class SplitDatasetTest(tf.test.TestCase): + def test_numpy_array(self): + dataset = np.ones(shape=(200, 32)) + res = dataset_utils.split_dataset( + dataset, left_size=0.8, right_size=0.2 + ) + + self.assertLen(res, 2) + left_split, right_split = res + + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(right_split, tf.data.Dataset) + + self.assertLen(left_split, 160) + self.assertLen(right_split, 40) + + self.assertAllEqual(dataset[:160], list(left_split)) + self.assertAllEqual(dataset[-40:], list(right_split)) + + def test_list_of_numpy_arrays(self): + # test with list of np arrays with same shapes + dataset = [np.ones(shape=(200, 32)), np.zeros(shape=(200, 32))] + res = dataset_utils.split_dataset(dataset, left_size=4) + + self.assertLen(res, 2) + left_split, right_split = res + + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(right_split, tf.data.Dataset) + + self.assertEqual(np.array(list(left_split)).shape, (4, 2, 32)) + self.assertEqual(np.array(list(right_split)).shape, (196, 2, 32)) + + # test with different shapes + dataset = [np.ones(shape=(5, 3)), np.ones(shape=(5,))] + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.3 + ) + + self.assertEqual(np.array(list(left_split), dtype=object).shape, (2, 2)) + self.assertEqual( + np.array(list(right_split), dtype=object).shape, (3, 2) + ) + + self.assertEqual( + np.array(list(left_split)[0], dtype=object).shape, (2,) + ) + self.assertEqual(np.array(list(left_split)[0][0]).shape, (3,)) + self.assertEqual(np.array(list(left_split)[0][1]).shape, ()) + + self.assertEqual( + np.array(list(right_split)[0], dtype=object).shape, (2,) + ) + self.assertEqual(np.array(list(right_split)[0][0]).shape, (3,)) + self.assertEqual(np.array(list(right_split)[0][1]).shape, ()) + + def test_dataset_with_invalid_shape(self): + with self.assertRaisesRegex( + ValueError, + "Received a list of NumPy arrays with different lengths", + ): + dataset = [np.ones(shape=(200, 32)), np.zeros(shape=(100, 32))] + dataset_utils.split_dataset(dataset, left_size=4) + + with self.assertRaisesRegex( + ValueError, + "Received a tuple of NumPy arrays with different lengths", + ): + dataset = (np.ones(shape=(200, 32)), np.zeros(shape=(201, 32))) + dataset_utils.split_dataset(dataset, left_size=4) + + def test_tuple_of_numpy_arrays(self): + dataset = (np.random.rand(4, 3), np.random.rand(4, 3)) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=2 + ) + + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(right_split, tf.data.Dataset) + + self.assertEqual(len(left_split), 2) + self.assertEqual(len(right_split), 2) + + self.assertEqual(np.array(list(left_split)[0]).shape, (2, 3)) + self.assertEqual(np.array(list(left_split)[1]).shape, (2, 3)) + + # test with fractional size + dataset = (np.random.rand(5, 32, 32), np.random.rand(5, 32, 32)) + left_split, right_split = dataset_utils.split_dataset( + dataset, right_size=0.4 + ) + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(right_split, tf.data.Dataset) + + self.assertEqual(np.array(list(left_split)).shape, (3, 2, 32, 32)) + self.assertEqual(np.array(list(right_split)).shape, (2, 2, 32, 32)) + + self.assertEqual(np.array(list(left_split))[0].shape, (2, 32, 32)) + self.assertEqual(np.array(list(left_split))[1].shape, (2, 32, 32)) + + self.assertEqual(np.array(list(right_split))[0].shape, (2, 32, 32)) + self.assertEqual(np.array(list(right_split))[1].shape, (2, 32, 32)) + + # test with tuple of np arrays with different shapes + dataset = ( + np.random.rand(5, 32, 32), + np.random.rand( + 5, + ), + ) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=2, right_size=3 + ) + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(right_split, tf.data.Dataset) + + self.assertEqual(np.array(list(left_split), dtype=object).shape, (2, 2)) + self.assertEqual( + np.array(list(right_split), dtype=object).shape, (3, 2) + ) + + self.assertEqual( + np.array(list(left_split)[0], dtype=object).shape, (2,) + ) + self.assertEqual(np.array(list(left_split)[0][0]).shape, (32, 32)) + self.assertEqual(np.array(list(left_split)[0][1]).shape, ()) + + self.assertEqual( + np.array(list(right_split)[0], dtype=object).shape, (2,) + ) + self.assertEqual(np.array(list(right_split)[0][0]).shape, (32, 32)) + self.assertEqual(np.array(list(right_split)[0][1]).shape, ()) + + def test_batched_tf_dataset_of_vectors(self): + vectors = np.ones(shape=(100, 32, 32, 1)) + dataset = tf.data.Dataset.from_tensor_slices(vectors) + dataset = dataset.batch(10) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=2 + ) + + # Ensure that the splits are batched + self.assertEqual(len(list(right_split)), 10) + + left_split, right_split = left_split.unbatch(), right_split.unbatch() + self.assertAllEqual(np.array(list(left_split)).shape, (2, 32, 32, 1)) + self.assertAllEqual(np.array(list(right_split)).shape, (98, 32, 32, 1)) + dataset = dataset.unbatch() + self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) + + def test_batched_tf_dataset_of_tuple_of_vectors(self): + tuple_of_vectors = ( + np.random.rand(10, 32, 32), + np.random.rand(10, 32, 32), + ) + dataset = tf.data.Dataset.from_tensor_slices(tuple_of_vectors) + dataset = dataset.batch(2) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=4 + ) + + # Ensure that the splits are batched + self.assertEqual(np.array(list(right_split)).shape, (3, 2, 2, 32, 32)) + self.assertEqual(np.array(list(left_split)).shape, (2, 2, 2, 32, 32)) + + left_split, right_split = left_split.unbatch(), right_split.unbatch() + self.assertAllEqual(np.array(list(left_split)).shape, (4, 2, 32, 32)) + self.assertAllEqual(np.array(list(right_split)).shape, (6, 2, 32, 32)) + + dataset = dataset.unbatch() + self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) + + def test_batched_tf_dataset_of_dict_of_vectors(self): + dict_samples = {"X": np.random.rand(10, 3), "Y": np.random.rand(10, 3)} + dataset = tf.data.Dataset.from_tensor_slices(dict_samples) + dataset = dataset.batch(2) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=2 + ) + + self.assertAllEqual(np.array(list(left_split)).shape, (1,)) + self.assertAllEqual(np.array(list(right_split)).shape, (4,)) + + left_split, right_split = left_split.unbatch(), right_split.unbatch() + self.assertEqual(len(list(left_split)), 2) + self.assertEqual(len(list(right_split)), 8) + for i in range(10): + if i < 2: + self.assertEqual( + list(left_split)[i], list(dataset.unbatch())[i] + ) + else: + self.assertEqual( + list(right_split)[i - 2], list(dataset.unbatch())[i] + ) + + # test with dict of np arrays with different shapes + dict_samples = { + "images": np.random.rand(10, 16, 16, 3), + "labels": np.random.rand( + 10, + ), + } + dataset = tf.data.Dataset.from_tensor_slices(dict_samples) + dataset = dataset.batch(1) + left_split, right_split = dataset_utils.split_dataset( + dataset, right_size=0.3 + ) + + self.assertAllEqual(np.array(list(left_split)).shape, (7,)) + self.assertAllEqual(np.array(list(right_split)).shape, (3,)) + + dataset = dataset.unbatch() + left_split, right_split = left_split.unbatch(), right_split.unbatch() + self.assertEqual(len(list(left_split)), 7) + self.assertEqual(len(list(right_split)), 3) + for i in range(10): + if i < 7: + self.assertEqual(list(left_split)[i], list(dataset)[i]) + else: + self.assertEqual(list(right_split)[i - 7], list(dataset)[i]) + + def test_unbatched_tf_dataset_of_vectors(self): + vectors = np.ones(shape=(100, 16, 16, 3)) + dataset = tf.data.Dataset.from_tensor_slices(vectors) + + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.25 + ) + + self.assertAllEqual(np.array(list(left_split)).shape, (25, 16, 16, 3)) + self.assertAllEqual(np.array(list(right_split)).shape, (75, 16, 16, 3)) + + self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) + + dataset = [np.random.rand(10, 3, 3) for _ in range(5)] + dataset = tf.data.Dataset.from_tensor_slices(dataset) + + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=2 + ) + self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) + + def test_unbatched_tf_dataset_of_tuple_of_vectors(self): + # test with tuple of np arrays with same shape + X, Y = (np.random.rand(10, 32, 32, 1), np.random.rand(10, 32, 32, 1)) + dataset = tf.data.Dataset.from_tensor_slices((X, Y)) + + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=5 + ) + + self.assertEqual(len(list(left_split)), 5) + self.assertEqual(len(list(right_split)), 5) + self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) + + # test with tuple of np arrays with different shapes + X, Y = ( + np.random.rand(5, 3, 3), + np.random.rand( + 5, + ), + ) + dataset = tf.data.Dataset.from_tensor_slices((X, Y)) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.5 + ) + + self.assertEqual(len(list(left_split)), 2) + self.assertEqual(len(list(right_split)), 3) + self.assertEqual(np.array(list(left_split)[0][0]).shape, (3, 3)) + self.assertEqual(np.array(list(left_split)[0][1]).shape, ()) + + def test_unbatched_tf_dataset_of_dict_of_vectors(self): + # test with dict of np arrays of same shape + dict_samples = {"X": np.random.rand(10, 2), "Y": np.random.rand(10, 2)} + dataset = tf.data.Dataset.from_tensor_slices(dict_samples) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=2 + ) + self.assertEqual(len(list(left_split)), 2) + self.assertEqual(len(list(right_split)), 8) + for i in range(10): + if i < 2: + self.assertEqual(list(left_split)[i], list(dataset)[i]) + else: + self.assertEqual(list(right_split)[i - 2], list(dataset)[i]) + + # test with dict of np arrays with different shapes + dict_samples = { + "images": np.random.rand(10, 16, 16, 3), + "labels": np.random.rand( + 10, + ), + } + dataset = tf.data.Dataset.from_tensor_slices(dict_samples) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.3 + ) + self.assertEqual(len(list(left_split)), 3) + self.assertEqual(len(list(right_split)), 7) + for i in range(10): + if i < 3: + self.assertEqual(list(left_split)[i], list(dataset)[i]) + else: + self.assertEqual(list(right_split)[i - 3], list(dataset)[i]) + + # test with dict of text arrays + txt_feature = ["abb", "bb", "cc", "d", "e", "f", "g", "h", "i", "j"] + dict_samples = { + "txt_feature": txt_feature, + "label": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + } + dataset = tf.data.Dataset.from_tensor_slices(dict_samples) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.45, right_size=0.55 + ) + self.assertEqual(len(list(left_split)), 4) + self.assertEqual(len(list(right_split)), 6) + for i in range(10): + if i < 4: + self.assertEqual(list(left_split)[i], list(dataset)[i]) + else: + self.assertEqual(list(right_split)[i - 4], list(dataset)[i]) + + def test_list_dataset(self): + dataset = [np.ones(shape=(10, 10, 10)) for _ in range(10)] + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=5, right_size=5 + ) + self.assertEqual(len(left_split), len(right_split)) + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(left_split, tf.data.Dataset) + + dataset = [np.ones(shape=(10, 10, 10)) for _ in range(10)] + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.6, right_size=0.4 + ) + self.assertEqual(len(left_split), 6) + self.assertEqual(len(right_split), 4) + + def test_invalid_dataset(self): + with self.assertRaisesRegex( + TypeError, + "The `dataset` argument must be either a `tf.data.Dataset` " + "object or a list/tuple of arrays.", + ): + dataset_utils.split_dataset(dataset=None, left_size=5) + with self.assertRaisesRegex( + TypeError, + "The `dataset` argument must be either a `tf.data.Dataset` " + "object or a list/tuple of arrays.", + ): + dataset_utils.split_dataset(dataset=1, left_size=5) + with self.assertRaisesRegex( + TypeError, + "The `dataset` argument must be either a `tf.data.Dataset` " + "object or a list/tuple of arrays.", + ): + dataset_utils.split_dataset(dataset=float(1.2), left_size=5) + with self.assertRaisesRegex( + TypeError, + "The `dataset` argument must be either a `tf.data.Dataset` " + "object or a list/tuple of arrays.", + ): + dataset_utils.split_dataset(dataset=dict({}), left_size=5) + with self.assertRaisesRegex( + TypeError, + "The `dataset` argument must be either a `tf.data.Dataset` " + "object or a list/tuple of arrays.", + ): + dataset_utils.split_dataset(dataset=float("INF"), left_size=5) + + def test_valid_left_and_right_sizes(self): + dataset = np.array([1, 2, 3]) + splitted_dataset = dataset_utils.split_dataset(dataset, 1, 2) + self.assertLen(splitted_dataset, 2) + left_split, right_split = splitted_dataset + self.assertEqual(len(left_split), 1) + self.assertEqual(len(right_split), 2) + self.assertEqual(list(left_split), [1]) + self.assertEqual(list(right_split), [2, 3]) + + dataset = np.ones(shape=(200, 32)) + res = dataset_utils.split_dataset(dataset, left_size=150, right_size=50) + self.assertLen(res, 2) + self.assertIsInstance(res[0], tf.data.Dataset) + self.assertIsInstance(res[1], tf.data.Dataset) + + self.assertLen(res[0], 150) + self.assertLen(res[1], 50) + + dataset = np.ones(shape=(200, 32)) + res = dataset_utils.split_dataset(dataset, left_size=120) + self.assertLen(res, 2) + self.assertIsInstance(res[0], tf.data.Dataset) + self.assertIsInstance(res[1], tf.data.Dataset) + + self.assertLen(res[0], 120) + self.assertLen(res[1], 80) + + dataset = np.ones(shape=(10000, 16)) + res = dataset_utils.split_dataset(dataset, right_size=20) + self.assertLen(res, 2) + self.assertIsInstance(res[0], tf.data.Dataset) + self.assertIsInstance(res[1], tf.data.Dataset) + + self.assertLen(res[0], 9980) + self.assertLen(res[1], 20) + + dataset = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + splitted_dataset = dataset_utils.split_dataset( + dataset, left_size=0.1, right_size=0.9 + ) + self.assertLen(splitted_dataset, 2) + left_split, right_split = splitted_dataset + self.assertEqual(len(left_split), 1) + self.assertEqual(len(right_split), 9) + self.assertEqual(list(left_split), [1]) + self.assertEqual(list(right_split), [2, 3, 4, 5, 6, 7, 8, 9, 10]) + + dataset = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + splitted_dataset = dataset_utils.split_dataset( + dataset, left_size=2, right_size=5 + ) + self.assertLen(splitted_dataset, 2) + left_split, right_split = splitted_dataset + self.assertEqual(len(left_split), 2) + self.assertEqual(len(right_split), 5) + self.assertEqual(list(left_split), [1, 2]) + self.assertEqual(list(right_split), [6, 7, 8, 9, 10]) + + def test_float_left_and_right_sizes(self): + X = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) + dataset = tf.data.Dataset.from_tensor_slices(X) + left_split, right_split = dataset_utils.split_dataset( + dataset, left_size=0.8, right_size=0.2 + ) + self.assertEqual(len(left_split), 2) + self.assertEqual(len(right_split), 1) + + def test_invalid_float_left_and_right_sizes(self): + expected_regex = ( + r"^(.*?(\bleft_size\b).*?(\bshould be\b)" + r".*?(\bwithin the range\b).*?(\b0\b).*?(\b1\b))" + ) + with self.assertRaisesRegexp(ValueError, expected_regex): + dataset = [ + np.ones(shape=(200, 32, 32)), + np.zeros(shape=(200, 32, 32)), + ] + dataset_utils.split_dataset(dataset, left_size=1.5, right_size=0.2) + + expected_regex = ( + r"^(.*?(\bright_size\b).*?(\bshould be\b)" + r".*?(\bwithin the range\b).*?(\b0\b).*?(\b1\b))" + ) + with self.assertRaisesRegex(ValueError, expected_regex): + dataset = [np.ones(shape=(200, 32)), np.zeros(shape=(200, 32))] + dataset_utils.split_dataset(dataset, left_size=0.8, right_size=-0.8) + + def test_None_and_zero_left_and_right_size(self): + expected_regex = ( + r"^.*?(\bleft_size\b).*?(\bright_size\b).*?(\bmust " + r"be specified\b).*?(\bReceived: left_size=None and" + r" right_size=None\b)" + ) + + with self.assertRaisesRegex(ValueError, expected_regex): + dataset_utils.split_dataset( + dataset=np.array([1, 2, 3]), left_size=None + ) + with self.assertRaisesRegex(ValueError, expected_regex): + dataset_utils.split_dataset( + np.array([1, 2, 3]), left_size=None, right_size=None + ) + + expected_regex = ( + r"^.*?(\bleft_size\b).*?(\bshould be\b)" + r".*?(\bpositive\b).*?(\bsmaller than 3\b)" + ) + with self.assertRaisesRegex(ValueError, expected_regex): + dataset_utils.split_dataset(np.array([1, 2, 3]), left_size=3) + + expected_regex = ( + "Both `left_size` and `right_size` are zero. " + "At least one of the split sizes must be non-zero." + ) + with self.assertRaisesRegex(ValueError, expected_regex): + dataset_utils.split_dataset( + np.array([1, 2, 3]), left_size=0, right_size=0 + ) + + def test_invalid_left_and_right_size_types(self): + expected_regex = ( + r"^.*?(\bInvalid `left_size` and `right_size` Types" + r"\b).*?(\bExpected: integer or float or None\b)" + ) + with self.assertRaisesRegex(TypeError, expected_regex): + dataset_utils.split_dataset( + np.array([1, 2, 3]), left_size="1", right_size="1" + ) + + expected_regex = r"^.*?(\bInvalid `right_size` Type\b)" + with self.assertRaisesRegex(TypeError, expected_regex): + dataset_utils.split_dataset( + np.array([1, 2, 3]), left_size=0, right_size="1" + ) + + expected_regex = r"^.*?(\bInvalid `left_size` Type\b)" + with self.assertRaisesRegex(TypeError, expected_regex): + dataset_utils.split_dataset( + np.array([1, 2, 3]), left_size="100", right_size=None + ) + + expected_regex = r"^.*?(\bInvalid `right_size` Type\b)" + with self.assertRaisesRegex(TypeError, expected_regex): + dataset_utils.split_dataset(np.array([1, 2, 3]), right_size="1") + + expected_regex = r"^.*?(\bInvalid `right_size` Type\b)" + with self.assertRaisesRegex(TypeError, expected_regex): + dataset_utils.split_dataset( + np.array([1, 2, 3]), left_size=0.5, right_size="1" + ) + + def test_end_to_end(self): + x_train = np.random.random((10000, 28, 28)) + y_train = np.random.randint(0, 10, size=(10000,)) + + left_split, right_split = dataset_utils.split_dataset( + (x_train, y_train), left_size=0.8 + ) + + self.assertIsInstance(left_split, tf.data.Dataset) + self.assertIsInstance(right_split, tf.data.Dataset) + + self.assertEqual(len(left_split), 8000) + self.assertEqual(len(right_split), 2000) - def test_numpy_array(self): - dataset = np.ones(shape=(200, 32)) - res = dataset_utils.split_dataset(dataset, left_size=0.8, right_size=0.2) - - self.assertLen(res, 2) - left_split, right_split = res - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(right_split, tf.data.Dataset) - - self.assertLen(left_split, 160) - self.assertLen(right_split, 40) - - self.assertAllEqual(dataset[:160], list(left_split)) - self.assertAllEqual(dataset[-40:], list(right_split)) +@test_utils.run_v2_only +class IndexDirectoryStructureTest(tf.test.TestCase): + def test_explicit_labels_and_unnested_files(self): - def test_list_of_numpy_arrays(self): - # test with list of np arrays with same shapes - dataset = [np.ones(shape=(200, 32)), np.zeros(shape=(200, 32))] - res = dataset_utils.split_dataset(dataset, left_size=4) + # Get a unique temp directory + temp_dir = os.path.join( + self.get_temp_dir(), str(np.random.randint(1e6)) + ) + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) - self.assertLen(res, 2) - left_split, right_split = res + # Number of temp files, each of which + # will have its own explicit label + num_files = 10 - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(right_split, tf.data.Dataset) + explicit_labels = np.random.randint(0, 10, size=num_files).tolist() - self.assertEqual(np.array(list(left_split)).shape, (4, 2, 32)) - self.assertEqual(np.array(list(right_split)).shape, (196, 2, 32)) + # Save empty text files to root of temp directory + # (content is not important, only location) + for i in range(len(explicit_labels)): + with open(os.path.join(temp_dir, f"file{i}.txt"), "w"): + pass - # test with different shapes - dataset = [np.ones(shape=(5, 3)), np.ones(shape=(5,))] - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.3) + file_paths, labels, class_names = dataset_utils.index_directory( + temp_dir, labels=explicit_labels, formats=".txt" + ) - self.assertEqual(np.array(list(left_split)).shape, (2, 2)) - self.assertEqual(np.array(list(right_split)).shape, (3, 2)) + # Files are found at the root of the temp directory, when + # `labels` are passed explicitly to `index_directory` and + # the number of returned and passed labels match + self.assertLen(file_paths, num_files) + self.assertLen(labels, num_files) - self.assertEqual(np.array(list(left_split)[0]).shape, (2,)) - self.assertEqual(np.array(list(left_split)[0][0]).shape, (3,)) - self.assertEqual(np.array(list(left_split)[0][1]).shape, ()) + # Class names are returned as a sorted list + expected_class_names = sorted(set(explicit_labels)) + self.assertEqual(expected_class_names, class_names) - self.assertEqual(np.array(list(right_split)[0]).shape, (2,)) - self.assertEqual(np.array(list(right_split)[0][0]).shape, (3,)) - self.assertEqual(np.array(list(right_split)[0][1]).shape, ()) - def test_dataset_with_invalid_shape(self): - with self.assertRaisesRegex( - ValueError, 'Received a list of NumPy arrays ' - 'with different lengths'): - dataset = [np.ones(shape=(200, 32)), np.zeros(shape=(100, 32))] - dataset_utils.split_dataset(dataset, left_size=4) - - with self.assertRaisesRegex( - ValueError, 'Received a tuple of NumPy arrays ' - 'with different lengths'): - dataset = (np.ones(shape=(200, 32)), np.zeros(shape=(201, 32))) - dataset_utils.split_dataset(dataset, left_size=4) - - def test_tuple_of_numpy_arrays(self): - dataset = (np.random.rand(4, 3), np.random.rand(4, 3)) - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=2) - - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(right_split, tf.data.Dataset) - - self.assertEqual(len(left_split), 2) - self.assertEqual(len(right_split), 2) - - self.assertEqual(np.array(list(left_split)[0]).shape, (2, 3)) - self.assertEqual(np.array(list(left_split)[1]).shape, (2, 3)) - - # test with fractional size - dataset = (np.random.rand(5, 32, 32), np.random.rand(5, 32, 32)) - left_split, right_split = dataset_utils.split_dataset( - dataset, right_size=0.4) - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(right_split, tf.data.Dataset) - - self.assertEqual(np.array(list(left_split)).shape, (3, 2, 32, 32)) - self.assertEqual(np.array(list(right_split)).shape, (2, 2, 32, 32)) - - self.assertEqual(np.array(list(left_split))[0].shape, (2, 32, 32)) - self.assertEqual(np.array(list(left_split))[1].shape, (2, 32, 32)) - - self.assertEqual(np.array(list(right_split))[0].shape, (2, 32, 32)) - self.assertEqual(np.array(list(right_split))[1].shape, (2, 32, 32)) - - # test with tuple of np arrays with different shapes - dataset = (np.random.rand(5, 32, 32), np.random.rand(5,)) - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=2, right_size=3) - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(right_split, tf.data.Dataset) - - self.assertEqual(np.array(list(left_split)).shape, (2, 2)) - self.assertEqual(np.array(list(right_split)).shape, (3, 2)) - - self.assertEqual(np.array(list(left_split)[0]).shape, (2,)) - self.assertEqual(np.array(list(left_split)[0][0]).shape, (32, 32)) - self.assertEqual(np.array(list(left_split)[0][1]).shape, ()) - - self.assertEqual(np.array(list(right_split)[0]).shape, (2,)) - self.assertEqual(np.array(list(right_split)[0][0]).shape, (32, 32)) - self.assertEqual(np.array(list(right_split)[0][1]).shape, ()) - - def test_batched_tf_dataset_of_vectors(self): - vectors = np.ones(shape=(100, 32, 32, 1)) - dataset = tf.data.Dataset.from_tensor_slices(vectors) - dataset = dataset.batch(10) - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=2) - - # Ensure that the splits are batched - self.assertEqual(len(list(right_split)), 10) - - left_split, right_split = left_split.unbatch(), right_split.unbatch() - self.assertAllEqual(np.array(list(left_split)).shape, (2, 32, 32, 1)) - self.assertAllEqual(np.array(list(right_split)).shape, (98, 32, 32, 1)) - dataset = dataset.unbatch() - self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) - - def test_batched_tf_dataset_of_tuple_of_vectors(self): - tuple_of_vectors = (np.random.rand(10, 32, 32), np.random.rand(10, 32, 32)) - dataset = tf.data.Dataset.from_tensor_slices(tuple_of_vectors) - dataset = dataset.batch(2) - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=4) - - # Ensure that the splits are batched - self.assertEqual(np.array(list(right_split)).shape, (3, 2, 2, 32, 32)) - self.assertEqual(np.array(list(left_split)).shape, (2, 2, 2, 32, 32)) - - left_split, right_split = left_split.unbatch(), right_split.unbatch() - self.assertAllEqual(np.array(list(left_split)).shape, (4, 2, 32, 32)) - self.assertAllEqual(np.array(list(right_split)).shape, (6, 2, 32, 32)) - - dataset = dataset.unbatch() - self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) - - def test_batched_tf_dataset_of_dict_of_vectors(self): - dict_samples = {'X': np.random.rand(10, 3), 'Y': np.random.rand(10, 3)} - dataset = tf.data.Dataset.from_tensor_slices(dict_samples) - dataset = dataset.batch(2) - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=2) - - self.assertAllEqual(np.array(list(left_split)).shape, (1,)) - self.assertAllEqual(np.array(list(right_split)).shape, (4,)) - - left_split, right_split = left_split.unbatch(), right_split.unbatch() - self.assertEqual(len(list(left_split)), 2) - self.assertEqual(len(list(right_split)), 8) - for i in range(10): - if i < 2: - self.assertEqual(list(left_split)[i], list(dataset.unbatch())[i]) - else: - self.assertEqual(list(right_split)[i - 2], list(dataset.unbatch())[i]) - - # test with dict of np arrays with different shapes - dict_samples = { - 'images': np.random.rand(10, 16, 16, 3), - 'labels': np.random.rand(10,) - } - dataset = tf.data.Dataset.from_tensor_slices(dict_samples) - dataset = dataset.batch(1) - left_split, right_split = dataset_utils.split_dataset( - dataset, right_size=0.3) - - self.assertAllEqual(np.array(list(left_split)).shape, (7,)) - self.assertAllEqual(np.array(list(right_split)).shape, (3,)) - - dataset = dataset.unbatch() - left_split, right_split = left_split.unbatch(), right_split.unbatch() - self.assertEqual(len(list(left_split)), 7) - self.assertEqual(len(list(right_split)), 3) - for i in range(10): - if i < 7: - self.assertEqual(list(left_split)[i], list(dataset)[i]) - else: - self.assertEqual(list(right_split)[i - 7], list(dataset)[i]) - - def test_unbatched_tf_dataset_of_vectors(self): - vectors = np.ones(shape=(100, 16, 16, 3)) - dataset = tf.data.Dataset.from_tensor_slices(vectors) - - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.25) - - self.assertAllEqual(np.array(list(left_split)).shape, (25, 16, 16, 3)) - self.assertAllEqual(np.array(list(right_split)).shape, (75, 16, 16, 3)) - - self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) - - dataset = [np.random.rand(10, 3, 3) for _ in range(5)] - dataset = tf.data.Dataset.from_tensor_slices(dataset) - - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=2) - self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) - - def test_unbatched_tf_dataset_of_tuple_of_vectors(self): - # test with tuple of np arrays with same shape - X, Y = (np.random.rand(10, 32, 32, 1), np.random.rand(10, 32, 32, 1)) - dataset = tf.data.Dataset.from_tensor_slices((X, Y)) - - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=5) - - self.assertEqual(len(list(left_split)), 5) - self.assertEqual(len(list(right_split)), 5) - self.assertAllEqual(list(dataset), list(left_split) + list(right_split)) - - # test with tuple of np arrays with different shapes - X, Y = (np.random.rand(5, 3, 3), np.random.rand(5,)) - dataset = tf.data.Dataset.from_tensor_slices((X, Y)) - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.5) - - self.assertEqual(len(list(left_split)), 2) - self.assertEqual(len(list(right_split)), 3) - self.assertEqual(np.array(list(left_split)[0][0]).shape, (3, 3)) - self.assertEqual(np.array(list(left_split)[0][1]).shape, ()) - - def test_unbatched_tf_dataset_of_dict_of_vectors(self): - # test with dict of np arrays of same shape - dict_samples = {'X': np.random.rand(10, 2), 'Y': np.random.rand(10, 2)} - dataset = tf.data.Dataset.from_tensor_slices(dict_samples) - left_split, right_split = dataset_utils.split_dataset(dataset, left_size=2) - self.assertEqual(len(list(left_split)), 2) - self.assertEqual(len(list(right_split)), 8) - for i in range(10): - if i < 2: - self.assertEqual(list(left_split)[i], list(dataset)[i]) - else: - self.assertEqual(list(right_split)[i - 2], list(dataset)[i]) - - # test with dict of np arrays with different shapes - dict_samples = { - 'images': np.random.rand(10, 16, 16, 3), - 'labels': np.random.rand(10,) - } - dataset = tf.data.Dataset.from_tensor_slices(dict_samples) - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.3) - self.assertEqual(len(list(left_split)), 3) - self.assertEqual(len(list(right_split)), 7) - for i in range(10): - if i < 3: - self.assertEqual(list(left_split)[i], list(dataset)[i]) - else: - self.assertEqual(list(right_split)[i - 3], list(dataset)[i]) - - # test with dict of text arrays - txt_feature = ['abb', 'bb', 'cc', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] - dict_samples = { - 'txt_feature': txt_feature, - 'label': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - } - dataset = tf.data.Dataset.from_tensor_slices(dict_samples) - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.45, right_size=0.55) - self.assertEqual(len(list(left_split)), 4) - self.assertEqual(len(list(right_split)), 6) - for i in range(10): - if i < 4: - self.assertEqual(list(left_split)[i], list(dataset)[i]) - else: - self.assertEqual(list(right_split)[i - 4], list(dataset)[i]) - - def test_list_dataset(self): - dataset = [np.ones(shape=(10, 10, 10)) for _ in range(10)] - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=5, right_size=5) - self.assertEqual(len(left_split), len(right_split)) - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(left_split, tf.data.Dataset) - - dataset = [np.ones(shape=(10, 10, 10)) for _ in range(10)] - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.6, right_size=0.4) - self.assertEqual(len(left_split), 6) - self.assertEqual(len(right_split), 4) - - def test_invalid_dataset(self): - with self.assertRaisesRegex( - TypeError, 'The `dataset` argument must be either a `tf.data.Dataset` ' - 'object or a list/tuple of arrays.'): - dataset_utils.split_dataset(dataset=None, left_size=5) - with self.assertRaisesRegex( - TypeError, 'The `dataset` argument must be either a `tf.data.Dataset` ' - 'object or a list/tuple of arrays.'): - dataset_utils.split_dataset(dataset=1, left_size=5) - with self.assertRaisesRegex( - TypeError, 'The `dataset` argument must be either a `tf.data.Dataset` ' - 'object or a list/tuple of arrays.'): - dataset_utils.split_dataset(dataset=float(1.2), left_size=5) - with self.assertRaisesRegex( - TypeError, 'The `dataset` argument must be either a `tf.data.Dataset` ' - 'object or a list/tuple of arrays.'): - dataset_utils.split_dataset(dataset=dict({}), left_size=5) - with self.assertRaisesRegex( - TypeError, 'The `dataset` argument must be either a `tf.data.Dataset` ' - 'object or a list/tuple of arrays.'): - dataset_utils.split_dataset(dataset=float('INF'), left_size=5) - - def test_valid_left_and_right_sizes(self): - dataset = np.array([1, 2, 3]) - splitted_dataset = dataset_utils.split_dataset(dataset, 1, 2) - self.assertLen(splitted_dataset, 2) - left_split, right_split = splitted_dataset - self.assertEqual(len(left_split), 1) - self.assertEqual(len(right_split), 2) - self.assertEqual(list(left_split), [1]) - self.assertEqual(list(right_split), [2, 3]) - - dataset = np.ones(shape=(200, 32)) - res = dataset_utils.split_dataset(dataset, left_size=150, right_size=50) - self.assertLen(res, 2) - self.assertIsInstance(res[0], tf.data.Dataset) - self.assertIsInstance(res[1], tf.data.Dataset) - - self.assertLen(res[0], 150) - self.assertLen(res[1], 50) - - dataset = np.ones(shape=(200, 32)) - res = dataset_utils.split_dataset(dataset, left_size=120) - self.assertLen(res, 2) - self.assertIsInstance(res[0], tf.data.Dataset) - self.assertIsInstance(res[1], tf.data.Dataset) - - self.assertLen(res[0], 120) - self.assertLen(res[1], 80) - - dataset = np.ones(shape=(10000, 16)) - res = dataset_utils.split_dataset(dataset, right_size=20) - self.assertLen(res, 2) - self.assertIsInstance(res[0], tf.data.Dataset) - self.assertIsInstance(res[1], tf.data.Dataset) - - self.assertLen(res[0], 9980) - self.assertLen(res[1], 20) - - dataset = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - splitted_dataset = dataset_utils.split_dataset( - dataset, left_size=0.1, right_size=0.9) - self.assertLen(splitted_dataset, 2) - left_split, right_split = splitted_dataset - self.assertEqual(len(left_split), 1) - self.assertEqual(len(right_split), 9) - self.assertEqual(list(left_split), [1]) - self.assertEqual(list(right_split), [2, 3, 4, 5, 6, 7, 8, 9, 10]) - - dataset = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - splitted_dataset = dataset_utils.split_dataset( - dataset, left_size=2, right_size=5) - self.assertLen(splitted_dataset, 2) - left_split, right_split = splitted_dataset - self.assertEqual(len(left_split), 2) - self.assertEqual(len(right_split), 5) - self.assertEqual(list(left_split), [1, 2]) - self.assertEqual(list(right_split), [6, 7, 8, 9, 10]) - - def test_float_left_and_right_sizes(self): - X = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) - dataset = tf.data.Dataset.from_tensor_slices(X) - left_split, right_split = dataset_utils.split_dataset( - dataset, left_size=0.8, right_size=0.2) - self.assertEqual(len(left_split), 2) - self.assertEqual(len(right_split), 1) - - def test_invalid_float_left_and_right_sizes(self): - expected_regex = (r'^(.*?(\bleft_size\b).*?(\bshould be\b)' - r'.*?(\bwithin the range\b).*?(\b0\b).*?(\b1\b))') - with self.assertRaisesRegexp(ValueError, expected_regex): - dataset = [np.ones(shape=(200, 32, 32)), np.zeros(shape=(200, 32, 32))] - dataset_utils.split_dataset(dataset, left_size=1.5, right_size=0.2) - - expected_regex = (r'^(.*?(\bright_size\b).*?(\bshould be\b)' - r'.*?(\bwithin the range\b).*?(\b0\b).*?(\b1\b))') - with self.assertRaisesRegex(ValueError, expected_regex): - dataset = [np.ones(shape=(200, 32)), np.zeros(shape=(200, 32))] - dataset_utils.split_dataset(dataset, left_size=0.8, right_size=-0.8) - - def test_None_and_zero_left_and_right_size(self): - expected_regex = (r'^.*?(\bleft_size\b).*?(\bright_size\b).*?(\bmust ' - r'be specified\b).*?(\bReceived: left_size=None and' - r' right_size=None\b)') - - with self.assertRaisesRegex(ValueError, expected_regex): - dataset_utils.split_dataset(dataset=np.array([1, 2, 3]), left_size=None) - with self.assertRaisesRegex(ValueError, expected_regex): - dataset_utils.split_dataset( - np.array([1, 2, 3]), left_size=None, right_size=None) - - expected_regex = (r'^.*?(\bleft_size\b).*?(\bshould be\b)' - r'.*?(\bpositive\b).*?(\bsmaller than 3\b)') - with self.assertRaisesRegex(ValueError, expected_regex): - dataset_utils.split_dataset(np.array([1, 2, 3]), left_size=3) - - expected_regex = ('Both `left_size` and `right_size` are zero. ' - 'At least one of the split sizes must be non-zero.') - with self.assertRaisesRegex(ValueError, expected_regex): - dataset_utils.split_dataset( - np.array([1, 2, 3]), left_size=0, right_size=0) - - def test_invalid_left_and_right_size_types(self): - expected_regex = (r'^.*?(\bInvalid `left_size` and `right_size` Types' - r'\b).*?(\bExpected: integer or float or None\b)') - with self.assertRaisesRegex(TypeError, expected_regex): - dataset_utils.split_dataset( - np.array([1, 2, 3]), left_size='1', right_size='1') - - expected_regex = (r'^.*?(\bInvalid `right_size` Type\b)') - with self.assertRaisesRegex(TypeError, expected_regex): - dataset_utils.split_dataset( - np.array([1, 2, 3]), left_size=0, right_size='1') - - expected_regex = (r'^.*?(\bInvalid `left_size` Type\b)') - with self.assertRaisesRegex(TypeError, expected_regex): - dataset_utils.split_dataset( - np.array([1, 2, 3]), left_size='100', right_size=None) - - expected_regex = (r'^.*?(\bInvalid `right_size` Type\b)') - with self.assertRaisesRegex(TypeError, expected_regex): - dataset_utils.split_dataset(np.array([1, 2, 3]), right_size='1') - - expected_regex = (r'^.*?(\bInvalid `right_size` Type\b)') - with self.assertRaisesRegex(TypeError, expected_regex): - dataset_utils.split_dataset( - np.array([1, 2, 3]), left_size=0.5, right_size='1') - - def test_end_to_end(self): - x_train = np.random.random((10000, 28, 28)) - y_train = np.random.randint(0, 10, size=(10000,)) - - left_split, right_split = dataset_utils.split_dataset( - (x_train, y_train), left_size=0.8) - - self.assertIsInstance(left_split, tf.data.Dataset) - self.assertIsInstance(right_split, tf.data.Dataset) - - self.assertEqual(len(left_split), 8000) - self.assertEqual(len(right_split), 2000) - - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/feature_space.py b/keras/utils/feature_space.py new file mode 100644 index 000000000000..e52e158dab05 --- /dev/null +++ b/keras/utils/feature_space.py @@ -0,0 +1,772 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""FeatureSpace structured data preprocessing & encoding utility.""" + +import tensorflow.compat.v2 as tf + +from keras import backend +from keras.engine import base_layer +from keras.saving import saving_lib +from keras.saving import serialization_lib +from keras.utils.generic_utils import LazyLoader + +# isort: off +from tensorflow.python.util.tf_export import keras_export + +layers = LazyLoader("layers", globals(), "keras.layers") + + +class Cross: + def __init__(self, feature_names, crossing_dim, output_mode="one_hot"): + if output_mode not in {"int", "one_hot"}: + raise ValueError( + "Invalid value for argument `output_mode`. " + "Expected one of {'int', 'one_hot'}. " + f"Received: output_mode={output_mode}" + ) + self.feature_names = tuple(feature_names) + self.crossing_dim = crossing_dim + self.output_mode = output_mode + + @property + def name(self): + return "_X_".join(self.feature_names) + + def get_config(self): + return { + "feature_names": self.feature_names, + "crossing_dim": self.crossing_dim, + "output_mode": self.output_mode, + } + + @classmethod + def from_config(cls, config): + return cls(**config) + + +class Feature: + def __init__(self, dtype, preprocessor, output_mode): + if output_mode not in {"int", "one_hot", "float"}: + raise ValueError( + "Invalid value for argument `output_mode`. " + "Expected one of {'int', 'one_hot', 'float'}. " + f"Received: output_mode={output_mode}" + ) + self.dtype = dtype + if isinstance(preprocessor, dict): + preprocessor = serialization_lib.deserialize_keras_object( + preprocessor + ) + self.preprocessor = preprocessor + self.output_mode = output_mode + + def get_config(self): + return { + "dtype": self.dtype, + "preprocessor": serialization_lib.serialize_keras_object( + self.preprocessor + ), + "output_mode": self.output_mode, + } + + @classmethod + def from_config(cls, config): + return cls(**config) + + +@keras_export("keras.utils.FeatureSpace", v1=[]) +class FeatureSpace(base_layer.Layer): + """One-stop utility for preprocessing and encoding structured data. + + Arguments: + feature_names: Dict mapping the names of your features to their + type specification, e.g. `{"my_feature": "integer_categorical"}` + or `{"my_feature": FeatureSpace.integer_categorical()}`. + For a complete list of all supported types, see + "Available feature types" paragraph below. + output_mode: One of `"concat"` or `"dict"`. In concat mode, all + features get concatenated together into a single vector. + In dict mode, the FeatureSpace returns a dict of individually + encoded features (with the same keys as the input dict keys). + crosses: List of features to be crossed together, e.g. + `crosses=[("feature_1", "feature_2")]`. The features will be + "crossed" by hashing their combined value into + a fixed-length vector. + crossing_dim: Default vector size for hashing crossed features. + Defaults to `32`. + hashing_dim: Default vector size for hashing features of type + `"integer_hashed"` and `"string_hashed"`. Defaults to `32`. + num_discretization_bins: Default number of bins to be used for + discretizing features of type `"float_discretized"`. + Defaults to `32`. + + **Available feature types:** + + Note that all features can be referred to by their string name, + e.g. `"integer_categorical"`. When using the string name, the default + argument values are used. + + ```python + # Plain float values. + FeatureSpace.float(name=None) + + # Float values to be preprocessed via featurewise standardization + # (i.e. via a `keras.layers.Normalization` layer). + FeatureSpace.float_normalized(name=None) + + # Float values to be preprocessed via linear rescaling + # (i.e. via a `keras.layers.Rescaling` layer). + FeatureSpace.float_rescaled(scale=1., offset=0., name=None) + + # Float values to be discretized. By default, the discrete + # representation will then be one-hot encoded. + FeatureSpace.float_discretized( + num_bins, bin_boundaries=None, output_mode="one_hot", name=None) + + # Integer values to be indexed. By default, the discrete + # representation will then be one-hot encoded. + FeatureSpace.integer_categorical( + max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None) + + # String values to be indexed. By default, the discrete + # representation will then be one-hot encoded. + FeatureSpace.string_categorical( + max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None) + + # Integer values to be hashed into a fixed number of bins. + # By default, the discrete representation will then be one-hot encoded. + FeatureSpace.integer_hashed(num_bins, output_mode="one_hot", name=None) + + # String values to be hashed into a fixed number of bins. + # By default, the discrete representation will then be one-hot encoded. + FeatureSpace.string_hashed(num_bins, output_mode="one_hot", name=None) + ``` + + Examples: + + **Basic usage with a dict of input data:** + + ```python + raw_data = { + "float_values": [0.0, 0.1, 0.2, 0.3], + "string_values": ["zero", "one", "two", "three"], + "int_values": [0, 1, 2, 3], + } + dataset = tf.data.Dataset.from_tensor_slices(raw_data) + + feature_space = FeatureSpace( + features={ + "float_values": "float_normalized", + "string_values": "string_categorical", + "int_values": "integer_categorical", + }, + crosses=[("string_values", "int_values")], + output_mode="concat", + ) + # Before you start using the FeatureSpace, + # you must `adapt()` it on some data. + feature_space.adapt(dataset) + + # You can call the FeatureSpace on a dict of data (batched or unbatched). + output_vector = feature_space(raw_data) + ``` + + **Basic usage with `tf.data`:** + + ```python + # Unlabeled data + preprocessed_ds = unlabeled_dataset.map(feature_space) + + # Labeled data + preprocessed_ds = labeled_dataset.map(lambda x, y: (feature_space(x), y)) + ``` + + **Basic usage with the Keras Functional API:** + + ```python + # Retrieve a dict Keras Input objects + inputs = feature_space.get_inputs() + # Retrieve the corresponding encoded Keras tensors + encoded_features = feature_space.get_encoded_features() + # Build a Functional model + outputs = keras.layers.Dense(1, activation="sigmoid")(encoded_features) + model = keras.Model(inputs, outputs) + ``` + + **Customizing each feature or feature cross:** + + ```python + feature_space = FeatureSpace( + features={ + "float_values": FeatureSpace.float_normalized(), + "string_values": FeatureSpace.string_categorical(max_tokens=10), + "int_values": FeatureSpace.integer_categorical(max_tokens=10), + }, + crosses=[ + FeatureSpace.cross(("string_values", "int_values"), crossing_dim=32) + ], + output_mode="concat", + ) + ``` + + **Returning a dict of integer-encoded features:** + + ```python + feature_space = FeatureSpace( + features={ + "string_values": FeatureSpace.string_categorical(output_mode="int"), + "int_values": FeatureSpace.integer_categorical(output_mode="int"), + }, + crosses=[ + FeatureSpace.cross( + feature_names=("string_values", "int_values"), + crossing_dim=32, + output_mode="int", + ) + ], + output_mode="dict", + ) + ``` + + **Specifying your own Keras preprocessing layer:** + + ```python + # Let's say that one of the features is a short text paragraph that + # we want to encode as a vector (one vector per paragraph) via TF-IDF. + data = { + "text": ["1st string", "2nd string", "3rd string"], + } + + # There's a Keras layer for this: TextVectorization. + custom_layer = layers.TextVectorization(output_mode="tf_idf") + + # We can use FeatureSpace.feature to create a custom feature + # that will use our preprocessing layer. + feature_space = FeatureSpace( + features={ + "text": FeatureSpace.feature( + preprocessor=custom_layer, dtype="string", output_mode="float" + ), + }, + output_mode="concat", + ) + feature_space.adapt(tf.data.Dataset.from_tensor_slices(data)) + output_vector = feature_space(data) + ``` + + **Retrieving the underlying Keras preprocessing layers:** + + ```python + # The preprocessing layer of each feature is available in `.preprocessors`. + preprocessing_layer = feature_space.preprocessors["feature1"] + + # The crossing layer of each feature cross is available in `.crossers`. + # It's an instance of keras.layers.HashedCrossing. + crossing_layer = feature_space.crossers["feature1_X_feature2"] + ``` + + **Saving and reloading a FeatureSpace:** + + ```python + feature_space.save("myfeaturespace.keras") + reloaded_feature_space = keras.models.load_model("myfeaturespace.keras") + ``` + """ + + @classmethod + def cross(cls, feature_names, crossing_dim, output_mode="one_hot"): + return Cross(feature_names, crossing_dim, output_mode=output_mode) + + @classmethod + def feature(cls, dtype, preprocessor, output_mode): + return Feature(dtype, preprocessor, output_mode) + + @classmethod + def float(cls, name=None): + from keras.layers.core import identity + + name = name or backend.unique_object_name("float") + preprocessor = identity.Identity( + dtype="float32", name=f"{name}_preprocessor" + ) + return Feature( + dtype="float32", preprocessor=preprocessor, output_mode="float" + ) + + @classmethod + def float_rescaled(cls, scale=1.0, offset=0.0, name=None): + name = name or backend.unique_object_name("float_rescaled") + preprocessor = layers.Rescaling( + scale=scale, offset=offset, name=f"{name}_preprocessor" + ) + return Feature( + dtype="float32", preprocessor=preprocessor, output_mode="float" + ) + + @classmethod + def float_normalized(cls, name=None): + name = name or backend.unique_object_name("float_normalized") + preprocessor = layers.Normalization( + axis=-1, name=f"{name}_preprocessor" + ) + return Feature( + dtype="float32", preprocessor=preprocessor, output_mode="float" + ) + + @classmethod + def float_discretized( + cls, num_bins, bin_boundaries=None, output_mode="one_hot", name=None + ): + name = name or backend.unique_object_name("float_discretized") + preprocessor = layers.Discretization( + num_bins=num_bins, + bin_boundaries=bin_boundaries, + name=f"{name}_preprocessor", + ) + return Feature( + dtype="float32", preprocessor=preprocessor, output_mode=output_mode + ) + + @classmethod + def integer_categorical( + cls, + max_tokens=None, + num_oov_indices=1, + output_mode="one_hot", + name=None, + ): + name = name or backend.unique_object_name("integer_categorical") + preprocessor = layers.IntegerLookup( + name=f"{name}_preprocessor", + max_tokens=max_tokens, + num_oov_indices=num_oov_indices, + ) + return Feature( + dtype="int64", preprocessor=preprocessor, output_mode=output_mode + ) + + @classmethod + def string_categorical( + cls, + max_tokens=None, + num_oov_indices=1, + output_mode="one_hot", + name=None, + ): + name = name or backend.unique_object_name("string_categorical") + preprocessor = layers.StringLookup( + name=f"{name}_preprocessor", + max_tokens=max_tokens, + num_oov_indices=num_oov_indices, + ) + return Feature( + dtype="string", preprocessor=preprocessor, output_mode=output_mode + ) + + @classmethod + def string_hashed(cls, num_bins, output_mode="one_hot", name=None): + name = name or backend.unique_object_name("string_hashed") + preprocessor = layers.Hashing( + name=f"{name}_preprocessor", num_bins=num_bins + ) + return Feature( + dtype="string", preprocessor=preprocessor, output_mode=output_mode + ) + + @classmethod + def integer_hashed(cls, num_bins, output_mode="one_hot", name=None): + name = name or backend.unique_object_name("integer_hashed") + preprocessor = layers.Hashing( + name=f"{name}_preprocessor", num_bins=num_bins + ) + return Feature( + dtype="int64", preprocessor=preprocessor, output_mode=output_mode + ) + + def __init__( + self, + features, + output_mode="concat", + crosses=None, + crossing_dim=32, + hashing_dim=32, + num_discretization_bins=32, + ): + if not features: + raise ValueError("The `features` argument cannot be None or empty.") + self.crossing_dim = crossing_dim + self.hashing_dim = hashing_dim + self.num_discretization_bins = num_discretization_bins + self.features = { + name: self._standardize_feature(name, value) + for name, value in features.items() + } + self.crosses = [] + if crosses: + feature_set = set(features.keys()) + for cross in crosses: + if isinstance(cross, dict): + cross = serialization_lib.deserialize_keras_object(cross) + if isinstance(cross, Cross): + self.crosses.append(cross) + else: + if not crossing_dim: + raise ValueError( + "When specifying `crosses`, the argument " + "`crossing_dim` " + "(dimensionality of the crossing space) " + "should be specified as well." + ) + for key in cross: + if key not in feature_set: + raise ValueError( + "All features referenced " + "in the `crosses` argument " + "should be present in the `features` dict. " + f"Received unknown features: {cross}" + ) + self.crosses.append(Cross(cross, crossing_dim=crossing_dim)) + self.crosses_by_name = {cross.name: cross for cross in self.crosses} + + if output_mode not in {"dict", "concat"}: + raise ValueError( + "Invalid value for argument `output_mode`. " + "Expected one of {'dict', 'concat'}. " + f"Received: output_mode={output_mode}" + ) + self.output_mode = output_mode + + self.inputs = { + name: self._feature_to_input(name, value) + for name, value in self.features.items() + } + self.preprocessors = { + name: value.preprocessor for name, value in self.features.items() + } + self.encoded_features = None + self.crossers = { + cross.name: self._cross_to_crosser(cross) for cross in self.crosses + } + self.one_hot_encoders = {} + self.built = False + self._is_adapted = False + self.concat = None + self._preprocessed_features_names = None + self._crossed_features_names = None + + def _feature_to_input(self, name, feature): + return layers.Input(shape=(1,), dtype=feature.dtype, name=name) + + def _standardize_feature(self, name, feature): + if isinstance(feature, Feature): + return feature + + if isinstance(feature, dict): + return serialization_lib.deserialize_keras_object(feature) + + if feature == "float": + return self.float(name=name) + elif feature == "float_normalized": + return self.float_normalized(name=name) + elif feature == "float_rescaled": + return self.float_rescaled(name=name) + elif feature == "float_discretized": + return self.float_discretized( + name=name, num_bins=self.num_discretization_bins + ) + elif feature == "integer_categorical": + return self.integer_categorical(name=name) + elif feature == "string_categorical": + return self.string_categorical(name=name) + elif feature == "integer_hashed": + return self.integer_hashed(self.hashing_dim, name=name) + elif feature == "string_hashed": + return self.string_hashed(self.hashing_dim, name=name) + else: + raise ValueError(f"Invalid feature type: {feature}") + + def _cross_to_crosser(self, cross): + return layers.HashedCrossing(cross.crossing_dim, name=cross.name) + + def _list_adaptable_preprocessors(self): + adaptable_preprocessors = [] + for name in self.features.keys(): + preprocessor = self.preprocessors[name] + # Special case: a Normalization layer with preset mean/variance. + # Not adaptable. + if isinstance(preprocessor, layers.Normalization): + if preprocessor.input_mean is not None: + continue + if hasattr(preprocessor, "adapt"): + adaptable_preprocessors.append(name) + return adaptable_preprocessors + + def adapt(self, dataset): + if not isinstance(dataset, tf.data.Dataset): + raise ValueError( + "`adapt()` can only be called on a tf.data.Dataset. " + f"Received instead: {dataset} (of type {type(dataset)})" + ) + + for name in self._list_adaptable_preprocessors(): + # Call adapt() on each individual adaptable layer. + + # TODO: consider rewriting this to instead iterate on the + # dataset once, split each batch into individual features, + # and call the layer's `_adapt_function` on each batch + # to simulate the behavior of adapt() in a more performant fashion. + + feature_dataset = dataset.map(lambda x: x[name]) + preprocessor = self.preprocessors[name] + # TODO: consider adding an adapt progress bar. + # Sample 1 element to check the rank + for x in feature_dataset.take(1): + pass + if x.shape.rank == 0: + # The dataset yields unbatched scalars; batch it. + feature_dataset = feature_dataset.batch(32) + if x.shape.rank in {0, 1}: + # If the rank is 1, add a dimension + # so we can reduce on axis=-1. + # Note: if rank was previously 0, it is now 1. + feature_dataset = feature_dataset.map( + lambda x: tf.expand_dims(x, -1) + ) + preprocessor.adapt(feature_dataset) + self._is_adapted = True + self.get_encoded_features() # Finish building the layer + self.built = True + + def get_inputs(self): + self._check_if_built() + return self.inputs + + def get_encoded_features(self): + self._check_if_adapted() + + if self.encoded_features is None: + preprocessed_features = self._preprocess_features(self.inputs) + crossed_features = self._cross_features(preprocessed_features) + merged_features = self._merge_features( + preprocessed_features, crossed_features + ) + self.encoded_features = merged_features + return self.encoded_features + + def _preprocess_features(self, features): + return { + name: self.preprocessors[name](features[name]) + for name in features.keys() + } + + def _cross_features(self, features): + all_outputs = {} + for cross in self.crosses: + inputs = [features[name] for name in cross.feature_names] + outputs = self.crossers[cross.name](inputs) + all_outputs[cross.name] = outputs + return all_outputs + + def _merge_features(self, preprocessed_features, crossed_features): + if not self._preprocessed_features_names: + self._preprocessed_features_names = sorted( + preprocessed_features.keys() + ) + self._crossed_features_names = sorted(crossed_features.keys()) + + all_names = ( + self._preprocessed_features_names + self._crossed_features_names + ) + all_features = [ + preprocessed_features[name] + for name in self._preprocessed_features_names + ] + [crossed_features[name] for name in self._crossed_features_names] + + if self.output_mode == "dict": + output_dict = {} + else: + features_to_concat = [] + + if self.built: + # Fast mode. + for name, feature in zip(all_names, all_features): + encoder = self.one_hot_encoders.get(name, None) + if encoder: + feature = encoder(feature) + if self.output_mode == "dict": + output_dict[name] = feature + else: + features_to_concat.append(feature) + if self.output_mode == "dict": + return output_dict + else: + return self.concat(features_to_concat) + + # If the object isn't built, + # we create the encoder and concat layers below + all_specs = [ + self.features[name] for name in self._preprocessed_features_names + ] + [ + self.crosses_by_name[name] for name in self._crossed_features_names + ] + for name, feature, spec in zip(all_names, all_features, all_specs): + dtype = feature.dtype.name + + if spec.output_mode == "one_hot": + preprocessor = self.preprocessors.get( + name + ) or self.crossers.get(name) + cardinality = None + if not feature.dtype.name.startswith("int"): + raise ValueError( + f"Feature '{name}' has `output_mode='one_hot'`. " + "Thus its preprocessor should return an int64 dtype. " + f"Instead it returns a {dtype} dtype." + ) + + if isinstance( + preprocessor, (layers.IntegerLookup, layers.StringLookup) + ): + cardinality = preprocessor.vocabulary_size() + elif isinstance(preprocessor, layers.CategoryEncoding): + cardinality = preprocessor.num_tokens + elif isinstance(preprocessor, layers.Discretization): + cardinality = preprocessor.num_bins + elif isinstance( + preprocessor, (layers.HashedCrossing, layers.Hashing) + ): + cardinality = preprocessor.num_bins + else: + raise ValueError( + f"Feature '{name}' has `output_mode='one_hot'`. " + "However it isn't a standard feature and the " + "dimensionality of its output space is not known, " + "thus it cannot be one-hot encoded. " + "Try using `output_mode='int'`." + ) + if cardinality is not None: + encoder = layers.CategoryEncoding( + num_tokens=cardinality, output_mode="multi_hot" + ) + self.one_hot_encoders[name] = encoder + feature = encoder(feature) + + if self.output_mode == "concat": + dtype = feature.dtype.name + if dtype.startswith("int") or dtype == "string": + raise ValueError( + f"Cannot concatenate features because feature '{name}' " + f"has not been encoded (it has dtype {dtype}). " + "Consider using `output_mode='dict'`." + ) + features_to_concat.append(feature) + else: + output_dict[name] = feature + + if self.output_mode == "concat": + self.concat = layers.Concatenate(axis=-1) + return self.concat(features_to_concat) + else: + return output_dict + + def _check_if_adapted(self): + if not self._is_adapted: + if not self._list_adaptable_preprocessors(): + self._is_adapted = True + else: + raise ValueError( + "You need to call `.adapt(dataset)` on the FeatureSpace " + "before you can start using it." + ) + + def _check_if_built(self): + if not self.built: + self._check_if_adapted() + # Finishes building + self.get_encoded_features() + self.built = True + + def __call__(self, data): + self._check_if_built() + if not isinstance(data, dict): + raise ValueError( + "A FeatureSpace can only be called with a dict. " + f"Received: data={data} (of type {type(data)}" + ) + + data = {key: tf.convert_to_tensor(value) for key, value in data.items()} + rebatched = False + for name, x in data.items(): + if x.shape.rank == 0: + data[name] = tf.reshape(x, [1, 1]) + rebatched = True + elif x.shape.rank == 1: + data[name] = tf.expand_dims(x, -1) + + preprocessed_data = self._preprocess_features(data) + crossed_data = self._cross_features(preprocessed_data) + merged_data = self._merge_features(preprocessed_data, crossed_data) + if rebatched: + if self.output_mode == "concat": + assert merged_data.shape[0] == 1 + return tf.squeeze(merged_data, axis=0) + else: + for name, x in merged_data.items(): + if x.shape.rank == 2 and x.shape[0] == 1: + merged_data[name] = tf.squeeze(x, axis=0) + return merged_data + + def get_config(self): + return { + "features": serialization_lib.serialize_keras_object(self.features), + "output_mode": self.output_mode, + "crosses": serialization_lib.serialize_keras_object(self.crosses), + "crossing_dim": self.crossing_dim, + "hashing_dim": self.hashing_dim, + "num_discretization_bins": self.num_discretization_bins, + } + + @classmethod + def from_config(cls, config): + return cls(**config) + + def get_build_config(self): + return { + name: feature.preprocessor.get_build_config() + for name, feature in self.features.items() + } + + def build_from_config(self, config): + for name in config.keys(): + self.features[name].preprocessor.build_from_config(config[name]) + self._is_adapted = True + + def save(self, filepath): + """Save the `FeatureSpace` instance to a `.keras` file. + + You can reload it via `keras.models.load_model()`: + + ```python + feature_space.save("myfeaturespace.keras") + reloaded_feature_space = keras.models.load_model("myfeaturespace.keras") + ``` + """ + saving_lib.save_model(self, filepath) + + def save_own_variables(self, store): + return + + def load_own_variables(self, store): + return diff --git a/keras/utils/feature_space_test.py b/keras/utils/feature_space_test.py new file mode 100644 index 000000000000..ee3a8770290c --- /dev/null +++ b/keras/utils/feature_space_test.py @@ -0,0 +1,400 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for FeatureSpace utility.""" + +import os + +import tensorflow.compat.v2 as tf + +import keras +from keras import layers +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import feature_space + + +@test_utils.run_v2_only +class FeatureSpaceTest(test_combinations.TestCase): + def _get_train_data_dict( + self, as_dataset=False, as_tf_tensors=False, as_labeled_dataset=False + ): + data = { + "float_1": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + "float_2": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + "float_3": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + "string_1": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], + "string_2": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], + "int_1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "int_2": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "int_3": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + } + if as_dataset: + return tf.data.Dataset.from_tensor_slices(data) + elif as_tf_tensors: + return tf.nest.map_structure(tf.convert_to_tensor, data) + elif as_labeled_dataset: + labels = [0, 1, 0, 1, 0, 0, 1, 0, 1, 1] + return tf.data.Dataset.from_tensor_slices((data, labels)) + return data + + def test_basic_usage(self): + fs = feature_space.FeatureSpace( + features={ + "float_1": "float", + "float_2": "float_normalized", + "float_3": "float_discretized", + "string_1": "string_categorical", + "string_2": "string_hashed", + "int_1": "integer_categorical", + "int_2": "integer_hashed", + "int_3": "integer_categorical", + }, + crosses=[("float_3", "string_1"), ("string_2", "int_2")], + output_mode="concat", + ) + # Test unbatched adapt + fs.adapt(self._get_train_data_dict(as_dataset=True)) + # Test batched adapt + fs.adapt(self._get_train_data_dict(as_dataset=True).batch(4)) + + # Test unbatched call on raw data + data = { + key: value[0] for key, value in self._get_train_data_dict().items() + } + out = fs(data) + self.assertEqual(out.shape.as_list(), [195]) + + # Test unbatched call on TF tensors + data = self._get_train_data_dict(as_tf_tensors=True) + data = {key: value[0] for key, value in data.items()} + out = fs(data) + self.assertEqual(out.shape.as_list(), [195]) + + # Test batched call on raw data + out = fs(self._get_train_data_dict()) + self.assertEqual(out.shape.as_list(), [10, 195]) + + # Test batched call on TF tensors + out = fs(self._get_train_data_dict(as_tf_tensors=True)) + self.assertEqual(out.shape.as_list(), [10, 195]) + + def test_output_mode_dict(self): + fs = feature_space.FeatureSpace( + features={ + "float_1": "float", + "float_2": "float_normalized", + "float_3": "float_discretized", + "string_1": "string_categorical", + "string_2": "string_hashed", + "int_1": "integer_categorical", + "int_2": "integer_hashed", + "int_3": "integer_categorical", + }, + crosses=[("float_3", "string_1"), ("string_2", "int_2")], + output_mode="dict", + ) + fs.adapt(self._get_train_data_dict(as_dataset=True)) + + # Test unbatched call on raw data + data = { + key: value[0] for key, value in self._get_train_data_dict().items() + } + out = fs(data) + self.assertIsInstance(out, dict) + self.assertLen(out, 10) + self.assertEqual(out["string_1"].shape.as_list(), [11]) + self.assertEqual(out["int_2"].shape.as_list(), [32]) + self.assertEqual(out["string_2_X_int_2"].shape.as_list(), [32]) + + # Test batched call on raw data + out = fs(self._get_train_data_dict()) + self.assertIsInstance(out, dict) + self.assertLen(out, 10) + self.assertEqual(out["string_1"].shape.as_list(), [10, 11]) + self.assertEqual(out["int_2"].shape.as_list(), [10, 32]) + self.assertEqual(out["string_2_X_int_2"].shape.as_list(), [10, 32]) + + # Test batched call on TF tensors + out = fs(self._get_train_data_dict(as_tf_tensors=True)) + self.assertIsInstance(out, dict) + self.assertLen(out, 10) + self.assertEqual(out["string_1"].shape.as_list(), [10, 11]) + self.assertEqual(out["int_2"].shape.as_list(), [10, 32]) + self.assertEqual(out["string_2_X_int_2"].shape.as_list(), [10, 32]) + + def test_output_mode_dict_of_ints(self): + cls = feature_space.FeatureSpace + fs = feature_space.FeatureSpace( + features={ + "float_1": "float", + "float_2": "float_normalized", + "float_3": "float_discretized", + "string_1": cls.string_categorical(output_mode="int"), + "string_2": cls.string_hashed(num_bins=32, output_mode="int"), + "int_1": cls.integer_categorical(output_mode="int"), + "int_2": cls.integer_hashed(num_bins=32, output_mode="int"), + "int_3": cls.integer_categorical(output_mode="int"), + }, + crosses=[ + cls.cross( + ("float_3", "string_1"), output_mode="int", crossing_dim=32 + ), + cls.cross( + ("string_2", "int_2"), output_mode="int", crossing_dim=32 + ), + ], + output_mode="dict", + ) + fs.adapt(self._get_train_data_dict(as_dataset=True)) + data = { + key: value[0] for key, value in self._get_train_data_dict().items() + } + out = fs(data) + self.assertIsInstance(out, dict) + self.assertLen(out, 10) + self.assertEqual(out["string_1"].shape.as_list(), [1]) + self.assertEqual(out["string_1"].dtype.name, "int64") + self.assertEqual(out["int_2"].shape.as_list(), [1]) + self.assertEqual(out["int_2"].dtype.name, "int64") + self.assertEqual(out["string_2_X_int_2"].shape.as_list(), [1]) + self.assertEqual(out["string_2_X_int_2"].dtype.name, "int64") + + def test_functional_api_sync_processing(self): + fs = feature_space.FeatureSpace( + features={ + "float_1": "float", + "float_2": "float_normalized", + "float_3": "float_discretized", + "string_1": "string_categorical", + "string_2": "string_hashed", + "int_1": "integer_categorical", + "int_2": "integer_hashed", + "int_3": "integer_categorical", + }, + crosses=[("float_3", "string_1"), ("string_2", "int_2")], + output_mode="concat", + ) + fs.adapt(self._get_train_data_dict(as_dataset=True)) + inputs = fs.get_inputs() + features = fs.get_encoded_features() + outputs = layers.Dense(1)(features) + model = keras.Model(inputs=inputs, outputs=outputs) + model.compile("adam", "mse") + ds = self._get_train_data_dict(as_labeled_dataset=True) + model.fit(ds.batch(4)) + model.evaluate(ds.batch(4)) + ds = self._get_train_data_dict(as_dataset=True) + model.predict(ds.batch(4)) + + def test_tf_data_async_processing(self): + fs = feature_space.FeatureSpace( + features={ + "float_1": "float", + "float_2": "float_normalized", + "float_3": "float_discretized", + "string_1": "string_categorical", + "string_2": "string_hashed", + "int_1": "integer_categorical", + "int_2": "integer_hashed", + "int_3": "integer_categorical", + }, + crosses=[("float_3", "string_1"), ("string_2", "int_2")], + output_mode="concat", + ) + fs.adapt(self._get_train_data_dict(as_dataset=True)) + features = fs.get_encoded_features() + outputs = layers.Dense(1)(features) + model = keras.Model(inputs=features, outputs=outputs) + model.compile("adam", "mse") + ds = self._get_train_data_dict(as_labeled_dataset=True) + # Try map before batch + ds = ds.map(lambda x, y: (fs(x), y)) + model.fit(ds.batch(4)) + # Try map after batch + ds = self._get_train_data_dict(as_labeled_dataset=True) + ds = ds.batch(4) + ds = ds.map(lambda x, y: (fs(x), y)) + model.evaluate(ds) + ds = self._get_train_data_dict(as_dataset=True) + ds = ds.map(fs) + model.predict(ds.batch(4)) + + def test_advanced_usage(self): + cls = feature_space.FeatureSpace + fs = feature_space.FeatureSpace( + features={ + "float_1": cls.float(), + "float_2": cls.float_normalized(), + "float_3": cls.float_discretized(num_bins=3), + "string_1": cls.string_categorical(max_tokens=5), + "string_2": cls.string_hashed(num_bins=32), + "int_1": cls.integer_categorical( + max_tokens=5, num_oov_indices=2 + ), + "int_2": cls.integer_hashed(num_bins=32), + "int_3": cls.integer_categorical(max_tokens=5), + }, + crosses=[ + cls.cross(("float_3", "string_1"), crossing_dim=32), + cls.cross(("string_2", "int_2"), crossing_dim=32), + ], + output_mode="concat", + ) + fs.adapt(self._get_train_data_dict(as_dataset=True)) + data = { + key: value[0] for key, value in self._get_train_data_dict().items() + } + out = fs(data) + self.assertEqual(out.shape.as_list(), [148]) + + def test_manual_kpl(self): + data = { + "text": ["1st string", "2nd string", "3rd string"], + } + cls = feature_space.FeatureSpace + + # Test with a tf-idf TextVectorization layer + tv = layers.TextVectorization(output_mode="tf_idf") + fs = feature_space.FeatureSpace( + features={ + "text": cls.feature( + preprocessor=tv, dtype="string", output_mode="float" + ), + }, + output_mode="concat", + ) + fs.adapt(tf.data.Dataset.from_tensor_slices(data)) + out = fs(data) + self.assertEqual(out.shape.as_list(), [3, 5]) + + def test_no_adapt(self): + data = { + "int_1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + } + fs = feature_space.FeatureSpace( + { + "int_1": "integer_hashed", + }, + output_mode="concat", + ) + out = fs(data) + self.assertEqual(out.shape.as_list(), [10, 32]) + + def test_saving(self): + cls = feature_space.FeatureSpace + fs = feature_space.FeatureSpace( + features={ + "float_1": cls.float(), + "float_2": cls.float_normalized(), + "float_3": cls.float_discretized(num_bins=3), + "string_1": cls.string_categorical(max_tokens=5), + "string_2": cls.string_hashed(num_bins=32), + "int_1": cls.integer_categorical( + max_tokens=5, num_oov_indices=2 + ), + "int_2": cls.integer_hashed(num_bins=32), + "int_3": cls.integer_categorical(max_tokens=5), + }, + crosses=[ + cls.cross(("float_3", "string_1"), crossing_dim=32), + cls.cross(("string_2", "int_2"), crossing_dim=32), + ], + output_mode="concat", + ) + fs.adapt(self._get_train_data_dict(as_dataset=True)) + data = { + key: value[0] for key, value in self._get_train_data_dict().items() + } + ref_out = fs(data) + + temp_filepath = os.path.join(self.get_temp_dir(), "fs.keras") + fs.save(temp_filepath) + fs = keras.models.load_model(temp_filepath) + + # Save again immediately after loading to test idempotency + temp_filepath = os.path.join(self.get_temp_dir(), "fs2.keras") + fs.save(temp_filepath) + + # Test correctness of the first saved FS + out = fs(data) + self.assertAllClose(out, ref_out) + + inputs = fs.get_inputs() + outputs = fs.get_encoded_features() + model = keras.Model(inputs=inputs, outputs=outputs) + ds = self._get_train_data_dict(as_dataset=True) + out = model.predict(ds.batch(4)) + self.assertAllClose(out[0], ref_out) + + # Test correctness of the re-saved FS + fs = keras.models.load_model(temp_filepath) + out = fs(data) + self.assertAllClose(out, ref_out) + + def test_errors(self): + # Test no features + with self.assertRaisesRegex(ValueError, "cannot be None or empty"): + feature_space.FeatureSpace(features={}) + # Test no crossing dim + with self.assertRaisesRegex(ValueError, "`crossing_dim`"): + feature_space.FeatureSpace( + features={ + "f1": "integer_categorical", + "f2": "integer_categorical", + }, + crosses=[("f1", "f2")], + crossing_dim=None, + ) + # Test wrong cross feature name + with self.assertRaisesRegex(ValueError, "should be present in "): + feature_space.FeatureSpace( + features={ + "f1": "integer_categorical", + "f2": "integer_categorical", + }, + crosses=[("f1", "unknown")], + crossing_dim=32, + ) + # Test wrong output mode + with self.assertRaisesRegex(ValueError, "for argument `output_mode`"): + feature_space.FeatureSpace( + features={ + "f1": "integer_categorical", + "f2": "integer_categorical", + }, + output_mode="unknown", + ) + # Test call before adapt + with self.assertRaisesRegex(ValueError, "You need to call `.adapt"): + fs = feature_space.FeatureSpace( + features={ + "f1": "integer_categorical", + "f2": "integer_categorical", + } + ) + fs({"f1": [0], "f2": [0]}) + # Test get_encoded_features before adapt + with self.assertRaisesRegex(ValueError, "You need to call `.adapt"): + fs = feature_space.FeatureSpace( + features={ + "f1": "integer_categorical", + "f2": "integer_categorical", + } + ) + fs.get_encoded_features() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/generic_utils.py b/keras/utils/generic_utils.py index 44d2a95e9e08..ba58673eec43 100644 --- a/keras/utils/generic_utils.py +++ b/keras/utils/generic_utils.py @@ -14,8 +14,6 @@ # ============================================================================== """Python utilities required by Keras.""" -import tensorflow.compat.v2 as tf - import binascii import codecs import importlib @@ -23,1222 +21,537 @@ import os import re import sys -import threading import time import types as python_types -import warnings -import weakref import numpy as np +import tensorflow.compat.v2 as tf from keras.utils import io_utils -from keras.utils import tf_contextlib from keras.utils import tf_inspect -from tensorflow.python.util.tf_export import keras_export - -_GLOBAL_CUSTOM_OBJECTS = {} -_GLOBAL_CUSTOM_NAMES = {} - -# Flag that determines whether to skip the NotImplementedError when calling -# get_config in custom models and layers. This is only enabled when saving to -# SavedModel, when the config isn't required. -_SKIP_FAILED_SERIALIZATION = False -# If a layer does not have a defined config, then the returned config will be a -# dictionary with the below key. -_LAYER_UNDEFINED_CONFIG_KEY = 'layer was saved without config' - - -@keras_export('keras.utils.custom_object_scope', # pylint: disable=g-classes-have-attributes - 'keras.utils.CustomObjectScope') -class CustomObjectScope: - """Exposes custom classes/functions to Keras deserialization internals. - - Under a scope `with custom_object_scope(objects_dict)`, Keras methods such - as `tf.keras.models.load_model` or `tf.keras.models.model_from_config` - will be able to deserialize any custom object referenced by a - saved config (e.g. a custom layer or metric). - - Example: - - Consider a custom regularizer `my_regularizer`: - - ```python - layer = Dense(3, kernel_regularizer=my_regularizer) - config = layer.get_config() # Config contains a reference to `my_regularizer` - ... - # Later: - with custom_object_scope({'my_regularizer': my_regularizer}): - layer = Dense.from_config(config) - ``` - - Args: - *args: Dictionary or dictionaries of `{name: object}` pairs. - """ - - def __init__(self, *args): - self.custom_objects = args - self.backup = None - - def __enter__(self): - self.backup = _GLOBAL_CUSTOM_OBJECTS.copy() - for objects in self.custom_objects: - _GLOBAL_CUSTOM_OBJECTS.update(objects) - return self - - def __exit__(self, *args, **kwargs): - _GLOBAL_CUSTOM_OBJECTS.clear() - _GLOBAL_CUSTOM_OBJECTS.update(self.backup) - - -@keras_export('keras.utils.get_custom_objects') -def get_custom_objects(): - """Retrieves a live reference to the global dictionary of custom objects. - - Updating and clearing custom objects using `custom_object_scope` - is preferred, but `get_custom_objects` can - be used to directly access the current collection of custom objects. - - Example: - - ```python - get_custom_objects().clear() - get_custom_objects()['MyObject'] = MyObject - ``` - - Returns: - Global dictionary of names to classes (`_GLOBAL_CUSTOM_OBJECTS`). - """ - return _GLOBAL_CUSTOM_OBJECTS - - -# Store a unique, per-object ID for shared objects. -# -# We store a unique ID for each object so that we may, at loading time, -# re-create the network properly. Without this ID, we would have no way of -# determining whether a config is a description of a new object that -# should be created or is merely a reference to an already-created object. -SHARED_OBJECT_KEY = 'shared_object_id' - - -SHARED_OBJECT_DISABLED = threading.local() -SHARED_OBJECT_LOADING = threading.local() -SHARED_OBJECT_SAVING = threading.local() +# isort: off +from tensorflow.python.util.tf_export import keras_export -# Attributes on the threadlocal variable must be set per-thread, thus we -# cannot initialize these globally. Instead, we have accessor functions with -# default values. -def _shared_object_disabled(): - """Get whether shared object handling is disabled in a threadsafe manner.""" - return getattr(SHARED_OBJECT_DISABLED, 'disabled', False) - - -def _shared_object_loading_scope(): - """Get the current shared object saving scope in a threadsafe manner.""" - return getattr(SHARED_OBJECT_LOADING, 'scope', NoopLoadingScope()) - - -def _shared_object_saving_scope(): - """Get the current shared object saving scope in a threadsafe manner.""" - return getattr(SHARED_OBJECT_SAVING, 'scope', None) - - -class DisableSharedObjectScope: - """A context manager for disabling handling of shared objects. - - Disables shared object handling for both saving and loading. - - Created primarily for use with `clone_model`, which does extra surgery that - is incompatible with shared objects. - """ - - def __enter__(self): - SHARED_OBJECT_DISABLED.disabled = True - self._orig_loading_scope = _shared_object_loading_scope() - self._orig_saving_scope = _shared_object_saving_scope() - - def __exit__(self, *args, **kwargs): - SHARED_OBJECT_DISABLED.disabled = False - SHARED_OBJECT_LOADING.scope = self._orig_loading_scope - SHARED_OBJECT_SAVING.scope = self._orig_saving_scope - - -class NoopLoadingScope: - """The default shared object loading scope. It does nothing. - - Created to simplify serialization code that doesn't care about shared objects - (e.g. when serializing a single object). - """ - - def get(self, unused_object_id): - return None - - def set(self, object_id, obj): - pass - - -class SharedObjectLoadingScope: - """A context manager for keeping track of loaded objects. - - During the deserialization process, we may come across objects that are - shared across multiple layers. In order to accurately restore the network - structure to its original state, `SharedObjectLoadingScope` allows us to - re-use shared objects rather than cloning them. - """ - - def __enter__(self): - if _shared_object_disabled(): - return NoopLoadingScope() - - global SHARED_OBJECT_LOADING - SHARED_OBJECT_LOADING.scope = self - self._obj_ids_to_obj = {} - return self - def get(self, object_id): - """Given a shared object ID, returns a previously instantiated object. +def func_dump(func): + """Serializes a user defined function. Args: - object_id: shared object ID to use when attempting to find already-loaded - object. + func: the function to serialize. Returns: - The object, if we've seen this ID before. Else, `None`. + A tuple `(code, defaults, closure)`. """ - # Explicitly check for `None` internally to make external calling code a - # bit cleaner. - if object_id is None: - return - return self._obj_ids_to_obj.get(object_id) - - def set(self, object_id, obj): - """Stores an instantiated object for future lookup and sharing.""" - if object_id is None: - return - self._obj_ids_to_obj[object_id] = obj - - def __exit__(self, *args, **kwargs): - global SHARED_OBJECT_LOADING - SHARED_OBJECT_LOADING.scope = NoopLoadingScope() - - -class SharedObjectConfig(dict): - """A configuration container that keeps track of references. - - `SharedObjectConfig` will automatically attach a shared object ID to any - configs which are referenced more than once, allowing for proper shared - object reconstruction at load time. - - In most cases, it would be more proper to subclass something like - `collections.UserDict` or `collections.Mapping` rather than `dict` directly. - Unfortunately, python's json encoder does not support `Mapping`s. This is - important functionality to retain, since we are dealing with serialization. - - We should be safe to subclass `dict` here, since we aren't actually - overriding any core methods, only augmenting with a new one for reference - counting. - """ - - def __init__(self, base_config, object_id, **kwargs): - self.ref_count = 1 - self.object_id = object_id - super().__init__(base_config, **kwargs) - - def increment_ref_count(self): - # As soon as we've seen the object more than once, we want to attach the - # shared object ID. This allows us to only attach the shared object ID when - # it's strictly necessary, making backwards compatibility breakage less - # likely. - if self.ref_count == 1: - self[SHARED_OBJECT_KEY] = self.object_id - self.ref_count += 1 - - -class SharedObjectSavingScope: - """Keeps track of shared object configs when serializing.""" - - def __enter__(self): - if _shared_object_disabled(): - return None - - global SHARED_OBJECT_SAVING - - # Serialization can happen at a number of layers for a number of reasons. - # We may end up with a case where we're opening a saving scope within - # another saving scope. In that case, we'd like to use the outermost scope - # available and ignore inner scopes, since there is not (yet) a reasonable - # use case for having these nested and distinct. - if _shared_object_saving_scope() is not None: - self._passthrough = True - return _shared_object_saving_scope() + if os.name == "nt": + raw_code = marshal.dumps(func.__code__).replace(b"\\", b"/") + code = codecs.encode(raw_code, "base64").decode("ascii") + else: + raw_code = marshal.dumps(func.__code__) + code = codecs.encode(raw_code, "base64").decode("ascii") + defaults = func.__defaults__ + if func.__closure__: + closure = tuple(c.cell_contents for c in func.__closure__) else: - self._passthrough = False + closure = None + return code, defaults, closure - SHARED_OBJECT_SAVING.scope = self - self._shared_objects_config = weakref.WeakKeyDictionary() - self._next_id = 0 - return self - def get_config(self, obj): - """Gets a `SharedObjectConfig` if one has already been seen for `obj`. +def func_load(code, defaults=None, closure=None, globs=None): + """Deserializes a user defined function. Args: - obj: The object for which to retrieve the `SharedObjectConfig`. + code: bytecode of the function. + defaults: defaults of the function. + closure: closure of the function. + globs: dictionary of global objects. Returns: - The SharedObjectConfig for a given object, if already seen. Else, - `None`. + A function object. """ - try: - shared_object_config = self._shared_objects_config[obj] - except (TypeError, KeyError): - # If the object is unhashable (e.g. a subclass of `AbstractBaseClass` - # that has not overridden `__hash__`), a `TypeError` will be thrown. - # We'll just continue on without shared object support. - return None - shared_object_config.increment_ref_count() - return shared_object_config - - def create_config(self, base_config, obj): - """Create a new SharedObjectConfig for a given object.""" - shared_object_config = SharedObjectConfig(base_config, self._next_id) - self._next_id += 1 - try: - self._shared_objects_config[obj] = shared_object_config - except TypeError: - # If the object is unhashable (e.g. a subclass of `AbstractBaseClass` - # that has not overridden `__hash__`), a `TypeError` will be thrown. - # We'll just continue on without shared object support. - pass - return shared_object_config - - def __exit__(self, *args, **kwargs): - if not getattr(self, '_passthrough', False): - global SHARED_OBJECT_SAVING - SHARED_OBJECT_SAVING.scope = None - - -def serialize_keras_class_and_config( - cls_name, cls_config, obj=None, shared_object_id=None): - """Returns the serialization of the class with the given config.""" - base_config = {'class_name': cls_name, 'config': cls_config} - - # We call `serialize_keras_class_and_config` for some branches of the load - # path. In that case, we may already have a shared object ID we'd like to - # retain. - if shared_object_id is not None: - base_config[SHARED_OBJECT_KEY] = shared_object_id - - # If we have an active `SharedObjectSavingScope`, check whether we've already - # serialized this config. If so, just use that config. This will store an - # extra ID field in the config, allowing us to re-create the shared object - # relationship at load time. - if _shared_object_saving_scope() is not None and obj is not None: - shared_object_config = _shared_object_saving_scope().get_config(obj) - if shared_object_config is None: - return _shared_object_saving_scope().create_config(base_config, obj) - return shared_object_config - - return base_config - - -@keras_export('keras.utils.register_keras_serializable') -def register_keras_serializable(package='Custom', name=None): - """Registers an object with the Keras serialization framework. - - This decorator injects the decorated class or function into the Keras custom - object dictionary, so that it can be serialized and deserialized without - needing an entry in the user-provided custom object dict. It also injects a - function that Keras will call to get the object's serializable string key. - - Note that to be serialized and deserialized, classes must implement the - `get_config()` method. Functions do not have this requirement. - - The object will be registered under the key 'package>name' where `name`, - defaults to the object name if not passed. - - Example: - - ```python - # Note that `'my_package'` is used as the `package` argument here, and since - # the `name` argument is not provided, `'MyDense'` is used as the `name`. - @keras.utils.register_keras_serializable('my_package') - class MyDense(keras.layers.Dense): - pass - - assert keras.utils.get_registered_object('my_package>MyDense') == MyDense - assert keras.utils.get_registered_name(MyDense) == 'my_package>MyDense' - ``` - - Args: - package: The package that this class belongs to. This is used for the `key` - (which is 'package>name') to idenfify the class. Note that this is the - first argument passed into the decorator. - name: The name to serialize this class under in this package. If not - provided or `None`, the class' name will be used (note that this is the - case when the decorator is used with only one argument, which becomes the - `package`). - - Returns: - A decorator that registers the decorated class with the passed names. - """ - - def decorator(arg): - """Registers a class with the Keras serialization framework.""" - class_name = name if name is not None else arg.__name__ - registered_name = package + '>' + class_name - - if tf_inspect.isclass(arg) and not hasattr(arg, 'get_config'): - raise ValueError( - 'Cannot register a class that does not have a get_config() method.') - - if registered_name in _GLOBAL_CUSTOM_OBJECTS: - raise ValueError( - f'{registered_name} has already been registered to ' - f'{_GLOBAL_CUSTOM_OBJECTS[registered_name]}') - - if arg in _GLOBAL_CUSTOM_NAMES: - raise ValueError( - f'{arg} has already been registered to {_GLOBAL_CUSTOM_NAMES[arg]}') - _GLOBAL_CUSTOM_OBJECTS[registered_name] = arg - _GLOBAL_CUSTOM_NAMES[arg] = registered_name - - return arg - - return decorator - - -@keras_export('keras.utils.get_registered_name') -def get_registered_name(obj): - """Returns the name registered to an object within the Keras framework. - - This function is part of the Keras serialization and deserialization - framework. It maps objects to the string names associated with those objects - for serialization/deserialization. - - Args: - obj: The object to look up. - - Returns: - The name associated with the object, or the default Python name if the - object is not registered. - """ - if obj in _GLOBAL_CUSTOM_NAMES: - return _GLOBAL_CUSTOM_NAMES[obj] - else: - return obj.__name__ - - -@tf_contextlib.contextmanager -def skip_failed_serialization(): - global _SKIP_FAILED_SERIALIZATION - prev = _SKIP_FAILED_SERIALIZATION - try: - _SKIP_FAILED_SERIALIZATION = True - yield - finally: - _SKIP_FAILED_SERIALIZATION = prev - - -@keras_export('keras.utils.get_registered_object') -def get_registered_object(name, custom_objects=None, module_objects=None): - """Returns the class associated with `name` if it is registered with Keras. - - This function is part of the Keras serialization and deserialization - framework. It maps strings to the objects associated with them for - serialization/deserialization. - - Example: - ``` - def from_config(cls, config, custom_objects=None): - if 'my_custom_object_name' in config: - config['hidden_cls'] = tf.keras.utils.get_registered_object( - config['my_custom_object_name'], custom_objects=custom_objects) - ``` - - Args: - name: The name to look up. - custom_objects: A dictionary of custom objects to look the name up in. - Generally, custom_objects is provided by the user. - module_objects: A dictionary of custom objects to look the name up in. - Generally, module_objects is provided by midlevel library implementers. - - Returns: - An instantiable class associated with 'name', or None if no such class - exists. - """ - if name in _GLOBAL_CUSTOM_OBJECTS: - return _GLOBAL_CUSTOM_OBJECTS[name] - elif custom_objects and name in custom_objects: - return custom_objects[name] - elif module_objects and name in module_objects: - return module_objects[name] - return None - - -# pylint: disable=g-bad-exception-name -class CustomMaskWarning(Warning): - pass -# pylint: enable=g-bad-exception-name - - -@keras_export('keras.utils.serialize_keras_object') -def serialize_keras_object(instance): - """Serialize a Keras object into a JSON-compatible representation. - - Calls to `serialize_keras_object` while underneath the - `SharedObjectSavingScope` context manager will cause any objects re-used - across multiple layers to be saved with a special shared object ID. This - allows the network to be re-created properly during deserialization. - - Args: - instance: The object to serialize. - - Returns: - A dict-like, JSON-compatible representation of the object's config. - """ - _, instance = tf.__internal__.decorator.unwrap(instance) - if instance is None: - return None - - # pylint: disable=protected-access - # - # For v1 layers, checking supports_masking is not enough. We have to also - # check whether compute_mask has been overridden. - supports_masking = (getattr(instance, 'supports_masking', False) - or (hasattr(instance, 'compute_mask') - and not is_default(instance.compute_mask))) - if supports_masking and is_default(instance.get_config): - warnings.warn( - 'Custom mask layers require a config and must override ' - 'get_config. When loading, the custom mask layer must be ' - 'passed to the custom_objects argument.', - category=CustomMaskWarning, - stacklevel=2) - # pylint: enable=protected-access - - if hasattr(instance, 'get_config'): - name = get_registered_name(instance.__class__) - try: - config = instance.get_config() - except NotImplementedError as e: - if _SKIP_FAILED_SERIALIZATION: - return serialize_keras_class_and_config( - name, {_LAYER_UNDEFINED_CONFIG_KEY: True}) - raise e - serialization_config = {} - for key, item in config.items(): - if isinstance(item, str): - serialization_config[key] = item - continue - - # Any object of a different type needs to be converted to string or dict - # for serialization (e.g. custom functions, custom classes) - try: - serialized_item = serialize_keras_object(item) - if isinstance(serialized_item, dict) and not isinstance(item, dict): - serialized_item['__passive_serialization__'] = True - serialization_config[key] = serialized_item - except ValueError: - serialization_config[key] = item - - name = get_registered_name(instance.__class__) - return serialize_keras_class_and_config( - name, serialization_config, instance) - if hasattr(instance, '__name__'): - return get_registered_name(instance) - raise ValueError(f'Cannot serialize {instance} since it doesn\'t implement ' - '`get_config()`, and also doesn\t have `__name__`') - - -def get_custom_objects_by_name(item, custom_objects=None): - """Returns the item if it is in either local or global custom objects.""" - if item in _GLOBAL_CUSTOM_OBJECTS: - return _GLOBAL_CUSTOM_OBJECTS[item] - elif custom_objects and item in custom_objects: - return custom_objects[item] - return None - - -def class_and_config_for_serialized_keras_object( - config, - module_objects=None, - custom_objects=None, - printable_module_name='object'): - """Returns the class name and config for a serialized keras object.""" - if (not isinstance(config, dict) - or 'class_name' not in config - or 'config' not in config): - raise ValueError( - f'Improper config format for {config}. ' - 'Expecting python dict contains `class_name` and `config` as keys') - - class_name = config['class_name'] - cls = get_registered_object(class_name, custom_objects, module_objects) - if cls is None: - raise ValueError( - f'Unknown {printable_module_name}: {class_name}. Please ensure this ' - 'object is passed to the `custom_objects` argument. See ' - 'https://www.tensorflow.org/guide/keras/save_and_serialize' - '#registering_the_custom_object for details.') - - cls_config = config['config'] - # Check if `cls_config` is a list. If it is a list, return the class and the - # associated class configs for recursively deserialization. This case will - # happen on the old version of sequential model (e.g. `keras_version` == - # "2.0.6"), which is serialized in a different structure, for example - # "{'class_name': 'Sequential', - # 'config': [{'class_name': 'Embedding', 'config': ...}, {}, ...]}". - if isinstance(cls_config, list): - return (cls, cls_config) - - deserialized_objects = {} - for key, item in cls_config.items(): - if key == 'name': - # Assume that the value of 'name' is a string that should not be - # deserialized as a function. This avoids the corner case where - # cls_config['name'] has an identical name to a custom function and - # gets converted into that function. - deserialized_objects[key] = item - elif isinstance(item, dict) and '__passive_serialization__' in item: - deserialized_objects[key] = deserialize_keras_object( - item, - module_objects=module_objects, - custom_objects=custom_objects, - printable_module_name='config_item') - # TODO(momernick): Should this also have 'module_objects'? - elif (isinstance(item, str) and - tf_inspect.isfunction(get_registered_object(item, custom_objects))): - # Handle custom functions here. When saving functions, we only save the - # function's name as a string. If we find a matching string in the custom - # objects during deserialization, we convert the string back to the - # original function. - # Note that a potential issue is that a string field could have a naming - # conflict with a custom function name, but this should be a rare case. - # This issue does not occur if a string field has a naming conflict with - # a custom object, since the config of an object will always be a dict. - deserialized_objects[key] = get_registered_object(item, custom_objects) - for key, item in deserialized_objects.items(): - cls_config[key] = deserialized_objects[key] - - return (cls, cls_config) - - -@keras_export('keras.utils.deserialize_keras_object') -def deserialize_keras_object(identifier, - module_objects=None, - custom_objects=None, - printable_module_name='object'): - """Turns the serialized form of a Keras object back into an actual object. - - This function is for mid-level library implementers rather than end users. - - Importantly, this utility requires you to provide the dict of `module_objects` - to use for looking up the object config; this is not populated by default. - If you need a deserialization utility that has preexisting knowledge of - built-in Keras objects, use e.g. `keras.layers.deserialize(config)`, - `keras.metrics.deserialize(config)`, etc. - - Calling `deserialize_keras_object` while underneath the - `SharedObjectLoadingScope` context manager will cause any already-seen shared - objects to be returned as-is rather than creating a new object. - - Args: - identifier: the serialized form of the object. - module_objects: A dictionary of built-in objects to look the name up in. - Generally, `module_objects` is provided by midlevel library implementers. - custom_objects: A dictionary of custom objects to look the name up in. - Generally, `custom_objects` is provided by the end user. - printable_module_name: A human-readable string representing the type of the - object. Printed in case of exception. - - Returns: - The deserialized object. - - Example: - - A mid-level library implementer might want to implement a utility for - retrieving an object from its config, as such: - - ```python - def deserialize(config, custom_objects=None): - return deserialize_keras_object( - identifier, - module_objects=globals(), - custom_objects=custom_objects, - name="MyObjectType", - ) - ``` - - This is how e.g. `keras.layers.deserialize()` is implemented. - """ - if identifier is None: - return None - - if isinstance(identifier, dict): - # In this case we are dealing with a Keras config dictionary. - config = identifier - (cls, cls_config) = class_and_config_for_serialized_keras_object( - config, module_objects, custom_objects, printable_module_name) - - # If this object has already been loaded (i.e. it's shared between multiple - # objects), return the already-loaded object. - shared_object_id = config.get(SHARED_OBJECT_KEY) - shared_object = _shared_object_loading_scope().get(shared_object_id) # pylint: disable=assignment-from-none - if shared_object is not None: - return shared_object - - if hasattr(cls, 'from_config'): - arg_spec = tf_inspect.getfullargspec(cls.from_config) - custom_objects = custom_objects or {} - - if 'custom_objects' in arg_spec.args: - deserialized_obj = cls.from_config( - cls_config, - custom_objects=dict( - list(_GLOBAL_CUSTOM_OBJECTS.items()) + - list(custom_objects.items()))) - else: - with CustomObjectScope(custom_objects): - deserialized_obj = cls.from_config(cls_config) - else: - # Then `cls` may be a function returning a class. - # in this case by convention `config` holds - # the kwargs of the function. - custom_objects = custom_objects or {} - with CustomObjectScope(custom_objects): - deserialized_obj = cls(**cls_config) - - # Add object to shared objects, in case we find it referenced again. - _shared_object_loading_scope().set(shared_object_id, deserialized_obj) - - return deserialized_obj - - elif isinstance(identifier, str): - object_name = identifier - if custom_objects and object_name in custom_objects: - obj = custom_objects.get(object_name) - elif object_name in _GLOBAL_CUSTOM_OBJECTS: - obj = _GLOBAL_CUSTOM_OBJECTS[object_name] - else: - obj = module_objects.get(object_name) - if obj is None: - raise ValueError( - f'Unknown {printable_module_name}: {object_name}. Please ensure ' - 'this object is passed to the `custom_objects` argument. See ' - 'https://www.tensorflow.org/guide/keras/save_and_serialize' - '#registering_the_custom_object for details.') - - # Classes passed by name are instantiated with no args, functions are - # returned as-is. - if tf_inspect.isclass(obj): - return obj() - return obj - elif tf_inspect.isfunction(identifier): - # If a function has already been deserialized, return as is. - return identifier - else: - raise ValueError( - f'Could not interpret serialized {printable_module_name}: {identifier}') + if isinstance(code, (tuple, list)): # unpack previous dump + code, defaults, closure = code + if isinstance(defaults, list): + defaults = tuple(defaults) + def ensure_value_to_cell(value): + """Ensures that a value is converted to a python cell object. -def func_dump(func): - """Serializes a user defined function. - - Args: - func: the function to serialize. - - Returns: - A tuple `(code, defaults, closure)`. - """ - if os.name == 'nt': - raw_code = marshal.dumps(func.__code__).replace(b'\\', b'/') - code = codecs.encode(raw_code, 'base64').decode('ascii') - else: - raw_code = marshal.dumps(func.__code__) - code = codecs.encode(raw_code, 'base64').decode('ascii') - defaults = func.__defaults__ - if func.__closure__: - closure = tuple(c.cell_contents for c in func.__closure__) - else: - closure = None - return code, defaults, closure + Args: + value: Any value that needs to be casted to the cell type + Returns: + A value wrapped as a cell object (see function "func_load") + """ -def func_load(code, defaults=None, closure=None, globs=None): - """Deserializes a user defined function. + def dummy_fn(): + + value # just access it so it gets captured in .__closure__ + + cell_value = dummy_fn.__closure__[0] + if not isinstance(value, type(cell_value)): + return cell_value + return value - Args: - code: bytecode of the function. - defaults: defaults of the function. - closure: closure of the function. - globs: dictionary of global objects. + if closure is not None: + closure = tuple(ensure_value_to_cell(_) for _ in closure) + try: + raw_code = codecs.decode(code.encode("ascii"), "base64") + except (UnicodeEncodeError, binascii.Error): + raw_code = code.encode("raw_unicode_escape") + code = marshal.loads(raw_code) + if globs is None: + globs = globals() + return python_types.FunctionType( + code, globs, name=code.co_name, argdefs=defaults, closure=closure + ) - Returns: - A function object. - """ - if isinstance(code, (tuple, list)): # unpack previous dump - code, defaults, closure = code - if isinstance(defaults, list): - defaults = tuple(defaults) - def ensure_value_to_cell(value): - """Ensures that a value is converted to a python cell object. +def has_arg(fn, name, accept_all=False): + """Checks if a callable accepts a given keyword argument. Args: - value: Any value that needs to be casted to the cell type + fn: Callable to inspect. + name: Check if `fn` can be called with `name` as a keyword argument. + accept_all: What to return if there is no parameter called `name` but + the function accepts a `**kwargs` argument. Returns: - A value wrapped as a cell object (see function "func_load") + bool, whether `fn` accepts a `name` keyword argument. """ + arg_spec = tf_inspect.getfullargspec(fn) + if accept_all and arg_spec.varkw is not None: + return True + return name in arg_spec.args or name in arg_spec.kwonlyargs - def dummy_fn(): - # pylint: disable=pointless-statement - value # just access it so it gets captured in .__closure__ - - cell_value = dummy_fn.__closure__[0] - if not isinstance(value, type(cell_value)): - return cell_value - return value - - if closure is not None: - closure = tuple(ensure_value_to_cell(_) for _ in closure) - try: - raw_code = codecs.decode(code.encode('ascii'), 'base64') - except (UnicodeEncodeError, binascii.Error): - raw_code = code.encode('raw_unicode_escape') - code = marshal.loads(raw_code) - if globs is None: - globs = globals() - return python_types.FunctionType( - code, globs, name=code.co_name, argdefs=defaults, closure=closure) - -def has_arg(fn, name, accept_all=False): - """Checks if a callable accepts a given keyword argument. - - Args: - fn: Callable to inspect. - name: Check if `fn` can be called with `name` as a keyword argument. - accept_all: What to return if there is no parameter called `name` but the - function accepts a `**kwargs` argument. - - Returns: - bool, whether `fn` accepts a `name` keyword argument. - """ - arg_spec = tf_inspect.getfullargspec(fn) - if accept_all and arg_spec.varkw is not None: - return True - return name in arg_spec.args or name in arg_spec.kwonlyargs - - -@keras_export('keras.utils.Progbar') +@keras_export("keras.utils.Progbar") class Progbar: - """Displays a progress bar. - - Args: - target: Total number of steps expected, None if unknown. - width: Progress bar width on screen. - verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose) - stateful_metrics: Iterable of string names of metrics that should *not* be - averaged over time. Metrics in this list will be displayed as-is. All - others will be averaged by the progbar before display. - interval: Minimum visual progress update interval (in seconds). - unit_name: Display name for step counts (usually "step" or "sample"). - """ - - def __init__(self, - target, - width=30, - verbose=1, - interval=0.05, - stateful_metrics=None, - unit_name='step'): - self.target = target - self.width = width - self.verbose = verbose - self.interval = interval - self.unit_name = unit_name - if stateful_metrics: - self.stateful_metrics = set(stateful_metrics) - else: - self.stateful_metrics = set() - - self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and - sys.stdout.isatty()) or - 'ipykernel' in sys.modules or - 'posix' in sys.modules or - 'PYCHARM_HOSTED' in os.environ) - self._total_width = 0 - self._seen_so_far = 0 - # We use a dict + list to avoid garbage collection - # issues found in OrderedDict - self._values = {} - self._values_order = [] - self._start = time.time() - self._last_update = 0 - self._time_at_epoch_start = self._start - self._time_at_epoch_end = None - self._time_after_first_step = None - - def update(self, current, values=None, finalize=None): - """Updates the progress bar. + """Displays a progress bar. Args: - current: Index of current step. - values: List of tuples: `(name, value_for_last_step)`. If `name` is in - `stateful_metrics`, `value_for_last_step` will be displayed as-is. - Else, an average of the metric over time will be displayed. - finalize: Whether this is the last update for the progress bar. If - `None`, defaults to `current >= self.target`. + target: Total number of steps expected, None if unknown. + width: Progress bar width on screen. + verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose) + stateful_metrics: Iterable of string names of metrics that should *not* + be averaged over time. Metrics in this list will be displayed as-is. + All others will be averaged by the progbar before display. + interval: Minimum visual progress update interval (in seconds). + unit_name: Display name for step counts (usually "step" or "sample"). """ - if finalize is None: - if self.target is None: - finalize = False - else: - finalize = current >= self.target - - values = values or [] - for k, v in values: - if k not in self._values_order: - self._values_order.append(k) - if k not in self.stateful_metrics: - # In the case that progress bar doesn't have a target value in the first - # epoch, both on_batch_end and on_epoch_end will be called, which will - # cause 'current' and 'self._seen_so_far' to have the same value. Force - # the minimal value to 1 here, otherwise stateful_metric will be 0s. - value_base = max(current - self._seen_so_far, 1) - if k not in self._values: - self._values[k] = [v * value_base, value_base] + + def __init__( + self, + target, + width=30, + verbose=1, + interval=0.05, + stateful_metrics=None, + unit_name="step", + ): + self.target = target + self.width = width + self.verbose = verbose + self.interval = interval + self.unit_name = unit_name + if stateful_metrics: + self.stateful_metrics = set(stateful_metrics) else: - self._values[k][0] += v * value_base - self._values[k][1] += value_base - else: - # Stateful metrics output a numeric value. This representation - # means "take an average from a single value" but keeps the - # numeric formatting. - self._values[k] = [v, 1] - self._seen_so_far = current - - message = '' - now = time.time() - info = ' - %.0fs' % (now - self._start) - if current == self.target: - self._time_at_epoch_end = now - if self.verbose == 1: - if now - self._last_update < self.interval and not finalize: - return - - prev_total_width = self._total_width - if self._dynamic_display: - message += '\b' * prev_total_width - message += '\r' - else: - message += '\n' - - if self.target is not None: - numdigits = int(np.log10(self.target)) + 1 - bar = ('%' + str(numdigits) + 'd/%d [') % (current, self.target) - prog = float(current) / self.target - prog_width = int(self.width * prog) - if prog_width > 0: - bar += ('=' * (prog_width - 1)) - if current < self.target: - bar += '>' - else: - bar += '=' - bar += ('.' * (self.width - prog_width)) - bar += ']' - else: - bar = '%7d/Unknown' % current - - self._total_width = len(bar) - message += bar - - time_per_unit = self._estimate_step_duration(current, now) - - if self.target is None or finalize: - info += self._format_time(time_per_unit, self.unit_name) - else: - eta = time_per_unit * (self.target - current) - if eta > 3600: - eta_format = '%d:%02d:%02d' % (eta // 3600, - (eta % 3600) // 60, eta % 60) - elif eta > 60: - eta_format = '%d:%02d' % (eta // 60, eta % 60) + self.stateful_metrics = set() + + self._dynamic_display = ( + (hasattr(sys.stdout, "isatty") and sys.stdout.isatty()) + or "ipykernel" in sys.modules + or "posix" in sys.modules + or "PYCHARM_HOSTED" in os.environ + ) + self._total_width = 0 + self._seen_so_far = 0 + # We use a dict + list to avoid garbage collection + # issues found in OrderedDict + self._values = {} + self._values_order = [] + self._start = time.time() + self._last_update = 0 + self._time_at_epoch_start = self._start + self._time_at_epoch_end = None + self._time_after_first_step = None + + def update(self, current, values=None, finalize=None): + """Updates the progress bar. + + Args: + current: Index of current step. + values: List of tuples: `(name, value_for_last_step)`. If `name` is + in `stateful_metrics`, `value_for_last_step` will be displayed + as-is. Else, an average of the metric over time will be + displayed. + finalize: Whether this is the last update for the progress bar. If + `None`, uses `current >= self.target`. Defaults to `None`. + """ + if finalize is None: + if self.target is None: + finalize = False + else: + finalize = current >= self.target + + values = values or [] + for k, v in values: + if k not in self._values_order: + self._values_order.append(k) + if k not in self.stateful_metrics: + # In the case that progress bar doesn't have a target value in + # the first epoch, both on_batch_end and on_epoch_end will be + # called, which will cause 'current' and 'self._seen_so_far' to + # have the same value. Force the minimal value to 1 here, + # otherwise stateful_metric will be 0s. + value_base = max(current - self._seen_so_far, 1) + if k not in self._values: + self._values[k] = [v * value_base, value_base] + else: + self._values[k][0] += v * value_base + self._values[k][1] += value_base + else: + # Stateful metrics output a numeric value. This representation + # means "take an average from a single value" but keeps the + # numeric formatting. + self._values[k] = [v, 1] + self._seen_so_far = current + + message = "" + now = time.time() + info = f" - {now - self._start:.0f}s" + if current == self.target: + self._time_at_epoch_end = now + if self.verbose == 1: + if now - self._last_update < self.interval and not finalize: + return + + prev_total_width = self._total_width + if self._dynamic_display: + message += "\b" * prev_total_width + message += "\r" + else: + message += "\n" + + if self.target is not None: + numdigits = int(np.log10(self.target)) + 1 + bar = ("%" + str(numdigits) + "d/%d [") % (current, self.target) + prog = float(current) / self.target + prog_width = int(self.width * prog) + if prog_width > 0: + bar += "=" * (prog_width - 1) + if current < self.target: + bar += ">" + else: + bar += "=" + bar += "." * (self.width - prog_width) + bar += "]" + else: + bar = "%7d/Unknown" % current + + self._total_width = len(bar) + message += bar + + time_per_unit = self._estimate_step_duration(current, now) + + if self.target is None or finalize: + info += self._format_time(time_per_unit, self.unit_name) + else: + eta = time_per_unit * (self.target - current) + if eta > 3600: + eta_format = "%d:%02d:%02d" % ( + eta // 3600, + (eta % 3600) // 60, + eta % 60, + ) + elif eta > 60: + eta_format = "%d:%02d" % (eta // 60, eta % 60) + else: + eta_format = "%ds" % eta + + info = f" - ETA: {eta_format}" + + for k in self._values_order: + info += f" - {k}:" + if isinstance(self._values[k], list): + avg = np.mean( + self._values[k][0] / max(1, self._values[k][1]) + ) + if abs(avg) > 1e-3: + info += f" {avg:.4f}" + else: + info += f" {avg:.4e}" + else: + info += f" {self._values[k]}" + + self._total_width += len(info) + if prev_total_width > self._total_width: + info += " " * (prev_total_width - self._total_width) + + if finalize: + info += "\n" + + message += info + io_utils.print_msg(message, line_break=False) + message = "" + + elif self.verbose == 2: + if finalize: + numdigits = int(np.log10(self.target)) + 1 + count = ("%" + str(numdigits) + "d/%d") % (current, self.target) + info = count + info + for k in self._values_order: + info += f" - {k}:" + avg = np.mean( + self._values[k][0] / max(1, self._values[k][1]) + ) + if avg > 1e-3: + info += f" {avg:.4f}" + else: + info += f" {avg:.4e}" + if self._time_at_epoch_end: + time_per_epoch = ( + self._time_at_epoch_end - self._time_at_epoch_start + ) + avg_time_per_step = time_per_epoch / self.target + self._time_at_epoch_start = now + self._time_at_epoch_end = None + info += " -" + self._format_time(time_per_epoch, "epoch") + info += " -" + self._format_time( + avg_time_per_step, self.unit_name + ) + info += "\n" + message += info + io_utils.print_msg(message, line_break=False) + message = "" + + self._last_update = now + + def add(self, n, values=None): + self.update(self._seen_so_far + n, values) + + def _format_time(self, time_per_unit, unit_name): + """format a given duration to display to the user. + + Given the duration, this function formats it in either milliseconds + or seconds and displays the unit (i.e. ms/step or s/epoch) + Args: + time_per_unit: the duration to display + unit_name: the name of the unit to display + Returns: + a string with the correctly formatted duration and units + """ + formatted = "" + if time_per_unit >= 1 or time_per_unit == 0: + formatted += f" {time_per_unit:.0f}s/{unit_name}" + elif time_per_unit >= 1e-3: + formatted += f" {time_per_unit * 1000.0:.0f}ms/{unit_name}" else: - eta_format = '%ds' % eta - - info = ' - ETA: %s' % eta_format - - for k in self._values_order: - info += ' - %s:' % k - if isinstance(self._values[k], list): - avg = np.mean(self._values[k][0] / max(1, self._values[k][1])) - if abs(avg) > 1e-3: - info += ' %.4f' % avg - else: - info += ' %.4e' % avg + formatted += f" {time_per_unit * 1000000.0:.0f}us/{unit_name}" + return formatted + + def _estimate_step_duration(self, current, now): + """Estimate the duration of a single step. + + Given the step number `current` and the corresponding time `now` this + function returns an estimate for how long a single step takes. If this + is called before one step has been completed (i.e. `current == 0`) then + zero is given as an estimate. The duration estimate ignores the duration + of the (assumed to be non-representative) first step for estimates when + more steps are available (i.e. `current>1`). + + Args: + current: Index of current step. + now: The current time. + + Returns: Estimate of the duration of a single step. + """ + if current: + # there are a few special scenarios here: + # 1) somebody is calling the progress bar without ever supplying + # step 1 + # 2) somebody is calling the progress bar and supplies step one + # multiple times, e.g. as part of a finalizing call + # in these cases, we just fall back to the simple calculation + if self._time_after_first_step is not None and current > 1: + time_per_unit = (now - self._time_after_first_step) / ( + current - 1 + ) + else: + time_per_unit = (now - self._start) / current + + if current == 1: + self._time_after_first_step = now + return time_per_unit else: - info += ' %s' % self._values[k] - - self._total_width += len(info) - if prev_total_width > self._total_width: - info += (' ' * (prev_total_width - self._total_width)) - - if finalize: - info += '\n' - - message += info - io_utils.print_msg(message, line_break=False) - message = '' - - elif self.verbose == 2: - if finalize: - numdigits = int(np.log10(self.target)) + 1 - count = ('%' + str(numdigits) + 'd/%d') % (current, self.target) - info = count + info - for k in self._values_order: - info += ' - %s:' % k - avg = np.mean(self._values[k][0] / max(1, self._values[k][1])) - if avg > 1e-3: - info += ' %.4f' % avg - else: - info += ' %.4e' % avg - if self._time_at_epoch_end: - time_per_epoch = self._time_at_epoch_end - self._time_at_epoch_start - avg_time_per_step = time_per_epoch / self.target - self._time_at_epoch_start = now - self._time_at_epoch_end = None - info += ' -' + self._format_time(time_per_epoch, 'epoch') - info += ' -' + self._format_time(avg_time_per_step, self.unit_name) - info += '\n' - message += info - io_utils.print_msg(message, line_break=False) - message = '' - - self._last_update = now - - def add(self, n, values=None): - self.update(self._seen_so_far + n, values) - - def _format_time(self, time_per_unit, unit_name): - """format a given duration to display to the user. - - Given the duration, this function formats it in either milliseconds - or seconds and displays the unit (i.e. ms/step or s/epoch) + return 0 + + def _update_stateful_metrics(self, stateful_metrics): + self.stateful_metrics = self.stateful_metrics.union(stateful_metrics) + + +def make_batches(size, batch_size): + """Returns a list of batch indices (tuples of indices). + Args: - time_per_unit: the duration to display - unit_name: the name of the unit to display + size: Integer, total size of the data to slice into batches. + batch_size: Integer, batch size. + Returns: - a string with the correctly formatted duration and units - """ - formatted = '' - if time_per_unit >= 1 or time_per_unit == 0: - formatted += ' %.0fs/%s' % (time_per_unit, unit_name) - elif time_per_unit >= 1e-3: - formatted += ' %.0fms/%s' % (time_per_unit * 1e3, unit_name) - else: - formatted += ' %.0fus/%s' % (time_per_unit * 1e6, unit_name) - return formatted - - def _estimate_step_duration(self, current, now): - """Estimate the duration of a single step. - - Given the step number `current` and the corresponding time `now` - this function returns an estimate for how long a single step - takes. If this is called before one step has been completed - (i.e. `current == 0`) then zero is given as an estimate. The duration - estimate ignores the duration of the (assumed to be non-representative) - first step for estimates when more steps are available (i.e. `current>1`). - Args: - current: Index of current step. - now: The current time. - Returns: Estimate of the duration of a single step. + A list of tuples of array indices. """ - if current: - # there are a few special scenarios here: - # 1) somebody is calling the progress bar without ever supplying step 1 - # 2) somebody is calling the progress bar and supplies step one multiple - # times, e.g. as part of a finalizing call - # in these cases, we just fall back to the simple calculation - if self._time_after_first_step is not None and current > 1: - time_per_unit = (now - self._time_after_first_step) / (current - 1) - else: - time_per_unit = (now - self._start) / current - - if current == 1: - self._time_after_first_step = now - return time_per_unit - else: - return 0 + num_batches = int(np.ceil(size / float(batch_size))) + return [ + (i * batch_size, min(size, (i + 1) * batch_size)) + for i in range(0, num_batches) + ] - def _update_stateful_metrics(self, stateful_metrics): - self.stateful_metrics = self.stateful_metrics.union(stateful_metrics) +def slice_arrays(arrays, start=None, stop=None): + """Slice an array or list of arrays. -def make_batches(size, batch_size): - """Returns a list of batch indices (tuples of indices). + This takes an array-like, or a list of + array-likes, and outputs: + - arrays[start:stop] if `arrays` is an array-like + - [x[start:stop] for x in arrays] if `arrays` is a list - Args: - size: Integer, total size of the data to slice into batches. - batch_size: Integer, batch size. + Can also work on list/array of indices: `slice_arrays(x, indices)` - Returns: - A list of tuples of array indices. - """ - num_batches = int(np.ceil(size / float(batch_size))) - return [(i * batch_size, min(size, (i + 1) * batch_size)) - for i in range(0, num_batches)] + Args: + arrays: Single array or list of arrays. + start: can be an integer index (start index) or a list/array of indices + stop: integer (stop index); should be None if `start` was a list. + Returns: + A slice of the array(s). -def slice_arrays(arrays, start=None, stop=None): - """Slice an array or list of arrays. - - This takes an array-like, or a list of - array-likes, and outputs: - - arrays[start:stop] if `arrays` is an array-like - - [x[start:stop] for x in arrays] if `arrays` is a list - - Can also work on list/array of indices: `slice_arrays(x, indices)` - - Args: - arrays: Single array or list of arrays. - start: can be an integer index (start index) or a list/array of indices - stop: integer (stop index); should be None if `start` was a list. - - Returns: - A slice of the array(s). - - Raises: - ValueError: If the value of start is a list and stop is not None. - """ - if arrays is None: - return [None] - if isinstance(start, list) and stop is not None: - raise ValueError('The stop argument has to be None if the value of start ' - f'is a list. Received start={start}, stop={stop}') - elif isinstance(arrays, list): - if hasattr(start, '__len__'): - # hdf5 datasets only support list objects as indices - if hasattr(start, 'shape'): - start = start.tolist() - return [None if x is None else x[start] for x in arrays] - return [ - None if x is None else - None if not hasattr(x, '__getitem__') else x[start:stop] for x in arrays - ] - else: - if hasattr(start, '__len__'): - if hasattr(start, 'shape'): - start = start.tolist() - return arrays[start] - if hasattr(start, '__getitem__'): - return arrays[start:stop] - return [None] + Raises: + ValueError: If the value of start is a list and stop is not None. + """ + if arrays is None: + return [None] + if isinstance(start, list) and stop is not None: + raise ValueError( + "The stop argument has to be None if the value of start " + f"is a list. Received start={start}, stop={stop}" + ) + elif isinstance(arrays, list): + if hasattr(start, "__len__"): + # hdf5 datasets only support list objects as indices + if hasattr(start, "shape"): + start = start.tolist() + return [None if x is None else x[start] for x in arrays] + return [ + None + if x is None + else None + if not hasattr(x, "__getitem__") + else x[start:stop] + for x in arrays + ] + else: + if hasattr(start, "__len__"): + if hasattr(start, "shape"): + start = start.tolist() + return arrays[start] + if hasattr(start, "__getitem__"): + return arrays[start:stop] + return [None] def to_list(x): - """Normalizes a list/tensor into a list. + """Normalizes a list/tensor into a list. - If a tensor is passed, we return - a list of size 1 containing the tensor. + If a tensor is passed, we return + a list of size 1 containing the tensor. - Args: - x: target object to be normalized. + Args: + x: target object to be normalized. - Returns: - A list. - """ - if isinstance(x, list): - return x - return [x] + Returns: + A list. + """ + if isinstance(x, list): + return x + return [x] def to_snake_case(name): - intermediate = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) - insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() - # If the class is private the name starts with "_" which is not secure - # for creating scopes. We prefix the name with "private" in this case. - if insecure[0] != '_': - return insecure - return 'private' + insecure + intermediate = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + insecure = re.sub("([a-z])([A-Z])", r"\1_\2", intermediate).lower() + # If the class is private the name starts with "_" which is not secure + # for creating scopes. We prefix the name with "private" in this case. + if insecure[0] != "_": + return insecure + return "private" + insecure def is_all_none(structure): - iterable = tf.nest.flatten(structure) - # We cannot use Python's `any` because the iterable may return Tensors. - for element in iterable: - if element is not None: - return False - return True + iterable = tf.nest.flatten(structure) + # We cannot use Python's `any` because the iterable may return Tensors. + for element in iterable: + if element is not None: + return False + return True def check_for_unexpected_keys(name, input_dict, expected_values): - unknown = set(input_dict.keys()).difference(expected_values) - if unknown: - raise ValueError( - f'Unknown entries in {name} dictionary: {list(unknown)}. Only expected ' - f'following keys: {expected_values}') - - -def validate_kwargs(kwargs, - allowed_kwargs, - error_message='Keyword argument not understood:'): - """Checks that all keyword arguments are in the set of allowed keys.""" - for kwarg in kwargs: - if kwarg not in allowed_kwargs: - raise TypeError(error_message, kwarg) + unknown = set(input_dict.keys()).difference(expected_values) + if unknown: + raise ValueError( + f"Unknown entries in {name} dictionary: {list(unknown)}. " + f"Only expected following keys: {expected_values}" + ) -def validate_config(config): - """Determines whether config appears to be a valid layer config.""" - return isinstance(config, dict) and _LAYER_UNDEFINED_CONFIG_KEY not in config +def validate_kwargs( + kwargs, allowed_kwargs, error_message="Keyword argument not understood:" +): + """Checks that all keyword arguments are in the set of allowed keys.""" + for kwarg in kwargs: + if kwarg not in allowed_kwargs: + raise TypeError(error_message, kwarg) def default(method): - """Decorates a method to detect overrides in subclasses.""" - method._is_default = True # pylint: disable=protected-access - return method + """Decorates a method to detect overrides in subclasses.""" + method._is_default = True + return method def is_default(method): - """Check if a method is decorated with the `default` wrapper.""" - return getattr(method, '_is_default', False) + """Check if a method is decorated with the `default` wrapper.""" + return getattr(method, "_is_default", False) def populate_dict_with_module_objects(target_dict, modules, obj_filter): - for module in modules: - for name in dir(module): - obj = getattr(module, name) - if obj_filter(obj): - target_dict[name] = obj + for module in modules: + for name in dir(module): + obj = getattr(module, name) + if obj_filter(obj): + target_dict[name] = obj class LazyLoader(python_types.ModuleType): - """Lazily import a module, mainly to avoid pulling in large dependencies.""" - - def __init__(self, local_name, parent_module_globals, name): - self._local_name = local_name - self._parent_module_globals = parent_module_globals - super().__init__(name) - - def _load(self): - """Load the module and insert it into the parent's globals.""" - # Import the target module and insert it into the parent's namespace - module = importlib.import_module(self.__name__) - self._parent_module_globals[self._local_name] = module - # Update this object's dict so that if someone keeps a reference to the - # LazyLoader, lookups are efficient (__getattr__ is only called on lookups - # that fail). - self.__dict__.update(module.__dict__) - return module - - def __getattr__(self, item): - module = self._load() - return getattr(module, item) - - -# Aliases - -custom_object_scope = CustomObjectScope # pylint: disable=invalid-name + """Lazily import a module, mainly to avoid pulling in large dependencies.""" + + def __init__(self, local_name, parent_module_globals, name): + self._local_name = local_name + self._parent_module_globals = parent_module_globals + super().__init__(name) + + def _load(self): + """Load the module and insert it into the parent's globals.""" + # Import the target module and insert it into the parent's namespace + module = importlib.import_module(self.__name__) + self._parent_module_globals[self._local_name] = module + # Update this object's dict so that if someone keeps a reference to the + # LazyLoader, lookups are efficient (__getattr__ is only called on + # lookups that fail). + self.__dict__.update(module.__dict__) + return module + + def __getattr__(self, item): + module = self._load() + return getattr(module, item) diff --git a/keras/utils/generic_utils_test.py b/keras/utils/generic_utils_test.py index 90868b8e3d63..4ed6242bda61 100644 --- a/keras/utils/generic_utils_test.py +++ b/keras/utils/generic_utils_test.py @@ -15,504 +15,450 @@ """Tests for Keras generic Python utils.""" -from functools import partial import os import sys +from functools import partial + +import numpy as np +import tensorflow.compat.v2 as tf import keras +from keras.saving import serialization_lib +from keras.saving.legacy import serialization +from keras.testing_infra import test_utils from keras.utils import generic_utils from keras.utils import io_utils -import numpy as np -import tensorflow.compat.v2 as tf class SnakeCaseTest(tf.test.TestCase): - - def test_snake_case(self): - self.assertEqual(generic_utils.to_snake_case('SomeClass'), 'some_class') - self.assertEqual(generic_utils.to_snake_case('Conv2D'), 'conv2d') - self.assertEqual(generic_utils.to_snake_case('ConvLSTM2D'), 'conv_lstm2d') + def test_snake_case(self): + self.assertEqual(generic_utils.to_snake_case("SomeClass"), "some_class") + self.assertEqual(generic_utils.to_snake_case("Conv2D"), "conv2d") + self.assertEqual( + generic_utils.to_snake_case("ConvLSTM2D"), "conv_lstm2d" + ) class HasArgTest(tf.test.TestCase): - - def test_has_arg(self): - - def f_x(x): - return x - - def f_x_args(x, *args): - _ = args - return x - - def f_x_kwargs(x, **kwargs): - _ = kwargs - return x - - def f(a, b, c): - return a + b + c - - partial_f = partial(f, b=1) - - self.assertTrue(keras.utils.generic_utils.has_arg( - f_x, 'x', accept_all=False)) - self.assertFalse(keras.utils.generic_utils.has_arg( - f_x, 'y', accept_all=False)) - self.assertTrue(keras.utils.generic_utils.has_arg( - f_x_args, 'x', accept_all=False)) - self.assertFalse(keras.utils.generic_utils.has_arg( - f_x_args, 'y', accept_all=False)) - self.assertTrue(keras.utils.generic_utils.has_arg( - f_x_kwargs, 'x', accept_all=False)) - self.assertFalse(keras.utils.generic_utils.has_arg( - f_x_kwargs, 'y', accept_all=False)) - self.assertTrue(keras.utils.generic_utils.has_arg( - f_x_kwargs, 'y', accept_all=True)) - self.assertTrue( - keras.utils.generic_utils.has_arg(partial_f, 'c', accept_all=True)) - - -class TestCustomObjectScope(tf.test.TestCase): - - def test_custom_object_scope(self): - - def custom_fn(): - pass - - class CustomClass: - pass - - with keras.utils.generic_utils.custom_object_scope( - {'CustomClass': CustomClass, 'custom_fn': custom_fn}): - act = keras.activations.get('custom_fn') - self.assertEqual(act, custom_fn) - cl = keras.regularizers.get('CustomClass') - self.assertEqual(cl.__class__, CustomClass) + def test_has_arg(self): + def f_x(x): + return x + + def f_x_args(x, *args): + _ = args + return x + + def f_x_kwargs(x, **kwargs): + _ = kwargs + return x + + def f(a, b, c): + return a + b + c + + partial_f = partial(f, b=1) + + self.assertTrue( + keras.utils.generic_utils.has_arg(f_x, "x", accept_all=False) + ) + self.assertFalse( + keras.utils.generic_utils.has_arg(f_x, "y", accept_all=False) + ) + self.assertTrue( + keras.utils.generic_utils.has_arg(f_x_args, "x", accept_all=False) + ) + self.assertFalse( + keras.utils.generic_utils.has_arg(f_x_args, "y", accept_all=False) + ) + self.assertTrue( + keras.utils.generic_utils.has_arg(f_x_kwargs, "x", accept_all=False) + ) + self.assertFalse( + keras.utils.generic_utils.has_arg(f_x_kwargs, "y", accept_all=False) + ) + self.assertTrue( + keras.utils.generic_utils.has_arg(f_x_kwargs, "y", accept_all=True) + ) + self.assertTrue( + keras.utils.generic_utils.has_arg(partial_f, "c", accept_all=True) + ) class SerializeKerasObjectTest(tf.test.TestCase): - - def test_serialize_none(self): - serialized = keras.utils.generic_utils.serialize_keras_object(None) - self.assertEqual(serialized, None) - deserialized = keras.utils.generic_utils.deserialize_keras_object( - serialized) - self.assertEqual(deserialized, None) - - def test_serialize_custom_class_with_default_name(self): - - @keras.utils.generic_utils.register_keras_serializable() - class TestClass: - - def __init__(self, value): - self._value = value - - def get_config(self): - return {'value': self._value} - - serialized_name = 'Custom>TestClass' - inst = TestClass(value=10) - class_name = keras.utils.generic_utils._GLOBAL_CUSTOM_NAMES[TestClass] - self.assertEqual(serialized_name, class_name) - config = keras.utils.generic_utils.serialize_keras_object(inst) - self.assertEqual(class_name, config['class_name']) - new_inst = keras.utils.generic_utils.deserialize_keras_object(config) - self.assertIsNot(inst, new_inst) - self.assertIsInstance(new_inst, TestClass) - self.assertEqual(10, new_inst._value) - - # Make sure registering a new class with same name will fail. - with self.assertRaisesRegex(ValueError, '.*has already been registered.*'): - @keras.utils.generic_utils.register_keras_serializable() # pylint: disable=function-redefined - class TestClass: # pylint: disable=function-redefined - - def __init__(self, value): - self._value = value - - def get_config(self): - return {'value': self._value} - - def test_serialize_custom_class_with_custom_name(self): - - @keras.utils.generic_utils.register_keras_serializable( - 'TestPackage', 'CustomName') - class OtherTestClass: - - def __init__(self, val): - self._val = val - - def get_config(self): - return {'val': self._val} - - serialized_name = 'TestPackage>CustomName' - inst = OtherTestClass(val=5) - class_name = keras.utils.generic_utils._GLOBAL_CUSTOM_NAMES[OtherTestClass] - self.assertEqual(serialized_name, class_name) - fn_class_name = keras.utils.generic_utils.get_registered_name( - OtherTestClass) - self.assertEqual(fn_class_name, class_name) - - cls = keras.utils.generic_utils.get_registered_object(fn_class_name) - self.assertEqual(OtherTestClass, cls) - - config = keras.utils.generic_utils.serialize_keras_object(inst) - self.assertEqual(class_name, config['class_name']) - new_inst = keras.utils.generic_utils.deserialize_keras_object(config) - self.assertIsNot(inst, new_inst) - self.assertIsInstance(new_inst, OtherTestClass) - self.assertEqual(5, new_inst._val) - - def test_serialize_custom_function(self): - - @keras.utils.generic_utils.register_keras_serializable() - def my_fn(): - return 42 - - serialized_name = 'Custom>my_fn' - class_name = keras.utils.generic_utils._GLOBAL_CUSTOM_NAMES[my_fn] - self.assertEqual(serialized_name, class_name) - fn_class_name = keras.utils.generic_utils.get_registered_name(my_fn) - self.assertEqual(fn_class_name, class_name) - - config = keras.utils.generic_utils.serialize_keras_object(my_fn) - self.assertEqual(class_name, config) - fn = keras.utils.generic_utils.deserialize_keras_object(config) - self.assertEqual(42, fn()) - - fn_2 = keras.utils.generic_utils.get_registered_object(fn_class_name) - self.assertEqual(42, fn_2()) - - def test_serialize_custom_class_without_get_config_fails(self): - - with self.assertRaisesRegex( - ValueError, 'Cannot register a class that does ' - 'not have a get_config.*'): - - @keras.utils.generic_utils.register_keras_serializable( # pylint: disable=unused-variable - 'TestPackage', 'TestClass') - class TestClass: - - def __init__(self, value): - self._value = value - - def test_serializable_object(self): - - class SerializableInt(int): - """A serializable object to pass out of a test layer's config.""" - - def __new__(cls, value): - return int.__new__(cls, value) - - def get_config(self): - return {'value': int(self)} - - @classmethod - def from_config(cls, config): - return cls(**config) - - layer = keras.layers.Dense( - SerializableInt(3), - activation='relu', - kernel_initializer='ones', - bias_regularizer='l2') - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize( - config, custom_objects={'SerializableInt': SerializableInt}) - self.assertEqual(new_layer.activation, keras.activations.relu) - self.assertEqual(new_layer.bias_regularizer.__class__, - keras.regularizers.L2) - self.assertEqual(new_layer.units.__class__, SerializableInt) - self.assertEqual(new_layer.units, 3) - - def test_nested_serializable_object(self): - class SerializableInt(int): - """A serializable object to pass out of a test layer's config.""" - - def __new__(cls, value): - return int.__new__(cls, value) - - def get_config(self): - return {'value': int(self)} - - @classmethod - def from_config(cls, config): - return cls(**config) - - class SerializableNestedInt(int): - """A serializable object containing another serializable object.""" - - def __new__(cls, value, int_obj): - obj = int.__new__(cls, value) - obj.int_obj = int_obj - return obj - - def get_config(self): - return {'value': int(self), 'int_obj': self.int_obj} - - @classmethod - def from_config(cls, config): - return cls(**config) - - nested_int = SerializableInt(4) - layer = keras.layers.Dense( - SerializableNestedInt(3, nested_int), - name='SerializableNestedInt', - activation='relu', - kernel_initializer='ones', - bias_regularizer='l2') - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize( - config, - custom_objects={ - 'SerializableInt': SerializableInt, - 'SerializableNestedInt': SerializableNestedInt - }) - # Make sure the string field doesn't get convert to custom object, even - # they have same value. - self.assertEqual(new_layer.name, 'SerializableNestedInt') - self.assertEqual(new_layer.activation, keras.activations.relu) - self.assertEqual(new_layer.bias_regularizer.__class__, - keras.regularizers.L2) - self.assertEqual(new_layer.units.__class__, SerializableNestedInt) - self.assertEqual(new_layer.units, 3) - self.assertEqual(new_layer.units.int_obj.__class__, SerializableInt) - self.assertEqual(new_layer.units.int_obj, 4) - - def test_nested_serializable_fn(self): - - def serializable_fn(x): - """A serializable function to pass out of a test layer's config.""" - return x - - class SerializableNestedInt(int): - """A serializable object containing a serializable function.""" - - def __new__(cls, value, fn): - obj = int.__new__(cls, value) - obj.fn = fn - return obj - - def get_config(self): - return {'value': int(self), 'fn': self.fn} - - @classmethod - def from_config(cls, config): - return cls(**config) - - layer = keras.layers.Dense( - SerializableNestedInt(3, serializable_fn), - activation='relu', - kernel_initializer='ones', - bias_regularizer='l2') - config = keras.layers.serialize(layer) - new_layer = keras.layers.deserialize( - config, - custom_objects={ - 'serializable_fn': serializable_fn, - 'SerializableNestedInt': SerializableNestedInt - }) - self.assertEqual(new_layer.activation, keras.activations.relu) - self.assertIsInstance(new_layer.bias_regularizer, keras.regularizers.L2) - self.assertIsInstance(new_layer.units, SerializableNestedInt) - self.assertEqual(new_layer.units, 3) - self.assertIs(new_layer.units.fn, serializable_fn) - - def test_serialize_type_object_initializer(self): - layer = keras.layers.Dense( - 1, - kernel_initializer=keras.initializers.ones, - bias_initializer=keras.initializers.zeros) - config = keras.layers.serialize(layer) - self.assertEqual(config['config']['bias_initializer']['class_name'], - 'Zeros') - self.assertEqual(config['config']['kernel_initializer']['class_name'], - 'Ones') - - def test_serializable_with_old_config(self): - # model config generated by tf-1.2.1 - old_model_config = { - 'class_name': - 'Sequential', - 'config': [{ - 'class_name': 'Dense', - 'config': { - 'name': 'dense_1', - 'trainable': True, - 'batch_input_shape': [None, 784], - 'dtype': 'float32', - 'units': 32, - 'activation': 'linear', - 'use_bias': True, - 'kernel_initializer': { - 'class_name': 'Ones', - 'config': { - 'dtype': 'float32' - } - }, - 'bias_initializer': { - 'class_name': 'Zeros', - 'config': { - 'dtype': 'float32' - } - }, - 'kernel_regularizer': None, - 'bias_regularizer': None, - 'activity_regularizer': None, - 'kernel_constraint': None, - 'bias_constraint': None - } - }] - } - old_model = keras.utils.generic_utils.deserialize_keras_object( - old_model_config, module_objects={'Sequential': keras.Sequential}) - new_model = keras.Sequential([ - keras.layers.Dense(32, input_dim=784, kernel_initializer='Ones'), - ]) - input_data = np.random.normal(2, 1, (5, 784)) - output = old_model.predict(input_data) - expected_output = new_model.predict(input_data) - self.assertAllEqual(output, expected_output) - - def test_deserialize_unknown_object(self): - - class CustomLayer(keras.layers.Layer): - pass - - layer = CustomLayer() - config = keras.utils.generic_utils.serialize_keras_object(layer) - with self.assertRaisesRegexp(ValueError, - 'passed to the `custom_objects` arg'): - keras.utils.generic_utils.deserialize_keras_object(config) - restored = keras.utils.generic_utils.deserialize_keras_object( - config, custom_objects={'CustomLayer': CustomLayer}) - self.assertIsInstance(restored, CustomLayer) + def test_serialize_none(self): + serialized = serialization_lib.serialize_keras_object(None) + self.assertEqual(serialized, None) + deserialized = serialization_lib.deserialize_keras_object(serialized) + self.assertEqual(deserialized, None) + + def test_serializable_object(self): + class SerializableInt(int): + """A serializable object to pass out of a test layer's config.""" + + def __new__(cls, value): + return int.__new__(cls, value) + + def get_config(self): + return {"value": int(self)} + + @classmethod + def from_config(cls, config): + return cls(**config) + + layer = keras.layers.Dense( + SerializableInt(3), + activation="relu", + kernel_initializer="ones", + bias_regularizer="l2", + ) + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize( + config, custom_objects={"SerializableInt": SerializableInt} + ) + self.assertEqual(new_layer.activation, keras.activations.relu) + self.assertEqual( + new_layer.bias_regularizer.__class__, keras.regularizers.L2 + ) + self.assertEqual(new_layer.units.__class__, SerializableInt) + self.assertEqual(new_layer.units, 3) + + def test_nested_serializable_object(self): + class SerializableInt(int): + """A serializable object to pass out of a test layer's config.""" + + def __new__(cls, value): + return int.__new__(cls, value) + + def get_config(self): + return {"value": int(self)} + + @classmethod + def from_config(cls, config): + return cls(**config) + + class SerializableNestedInt(int): + """A serializable object containing another serializable object.""" + + def __new__(cls, value, int_obj): + obj = int.__new__(cls, value) + obj.int_obj = int_obj + return obj + + def get_config(self): + return {"value": int(self), "int_obj": self.int_obj} + + @classmethod + def from_config(cls, config): + return cls(**config) + + nested_int = SerializableInt(4) + layer = keras.layers.Dense( + SerializableNestedInt(3, nested_int), + name="SerializableNestedInt", + activation="relu", + kernel_initializer="ones", + bias_regularizer="l2", + ) + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize( + config, + custom_objects={ + "SerializableInt": SerializableInt, + "SerializableNestedInt": SerializableNestedInt, + }, + ) + # Make sure the string field doesn't get convert to custom object, even + # they have same value. + self.assertEqual(new_layer.name, "SerializableNestedInt") + self.assertEqual(new_layer.activation, keras.activations.relu) + self.assertEqual( + new_layer.bias_regularizer.__class__, keras.regularizers.L2 + ) + self.assertEqual(new_layer.units.__class__, SerializableNestedInt) + self.assertEqual(new_layer.units, 3) + self.assertEqual(new_layer.units.int_obj.__class__, SerializableInt) + self.assertEqual(new_layer.units.int_obj, 4) + + def test_nested_serializable_fn(self): + def serializable_fn(x): + """A serializable function to pass out of a test layer's config.""" + return x + + class SerializableNestedInt(int): + """A serializable object containing a serializable function.""" + + def __new__(cls, value, fn): + obj = int.__new__(cls, value) + obj.fn = fn + return obj + + def get_config(self): + return {"value": int(self), "fn": self.fn} + + @classmethod + def from_config(cls, config): + return cls(**config) + + layer = keras.layers.Dense( + SerializableNestedInt(3, serializable_fn), + activation="relu", + kernel_initializer="ones", + bias_regularizer="l2", + ) + config = keras.layers.serialize(layer) + new_layer = keras.layers.deserialize( + config, + custom_objects={ + "serializable_fn": serializable_fn, + "SerializableNestedInt": SerializableNestedInt, + }, + ) + self.assertEqual(new_layer.activation, keras.activations.relu) + self.assertIsInstance(new_layer.bias_regularizer, keras.regularizers.L2) + self.assertIsInstance(new_layer.units, SerializableNestedInt) + self.assertEqual(new_layer.units, 3) + self.assertIs(new_layer.units.fn, serializable_fn) + + def test_serialize_type_object_initializer(self): + layer = keras.layers.Dense( + 1, + kernel_initializer=keras.initializers.ones, + bias_initializer=keras.initializers.zeros, + ) + config = keras.layers.serialize(layer) + self.assertEqual( + config["config"]["bias_initializer"]["class_name"], "Zeros" + ) + self.assertEqual( + config["config"]["kernel_initializer"]["class_name"], "Ones" + ) + + def test_serializable_with_old_config(self): + # model config generated by tf-1.2.1 + old_model_config = { + "class_name": "Sequential", + "config": [ + { + "class_name": "Dense", + "config": { + "name": "dense_1", + "trainable": True, + "batch_input_shape": [None, 784], + "dtype": "float32", + "units": 32, + "activation": "linear", + "use_bias": True, + "kernel_initializer": { + "class_name": "Ones", + "config": {"dtype": "float32"}, + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {"dtype": "float32"}, + }, + "kernel_regularizer": None, + "bias_regularizer": None, + "activity_regularizer": None, + "kernel_constraint": None, + "bias_constraint": None, + }, + } + ], + } + old_model = serialization_lib.deserialize_keras_object( + old_model_config, module_objects={"Sequential": keras.Sequential} + ) + new_model = keras.Sequential( + [ + keras.layers.Dense( + 32, input_dim=784, kernel_initializer="Ones" + ), + ] + ) + input_data = np.random.normal(2, 1, (5, 784)) + output = old_model.predict(input_data) + expected_output = new_model.predict(input_data) + self.assertAllEqual(output, expected_output) + + def test_deserialize_unknown_object(self): + class CustomLayer(keras.layers.Layer): + pass + + layer = CustomLayer() + config = serialization_lib.serialize_keras_object(layer) + if tf.__internal__.tf2.enabled(): + with self.assertRaisesRegex( + TypeError, + "Could not locate class 'CustomLayer'. Make sure custom classes", # noqa: E501 + ): + serialization_lib.deserialize_keras_object(config) + else: + with self.assertRaisesRegex( + ValueError, "using a `keras.utils.custom_object_scope`" + ): + serialization.deserialize_keras_object(config) + restored = serialization_lib.deserialize_keras_object( + config, custom_objects={"CustomLayer": CustomLayer} + ) + self.assertIsInstance(restored, CustomLayer) class SliceArraysTest(tf.test.TestCase): - - def test_slice_arrays(self): - input_a = list([1, 2, 3]) - self.assertEqual( - keras.utils.generic_utils.slice_arrays(input_a, start=0), - [None, None, None]) - self.assertEqual( - keras.utils.generic_utils.slice_arrays(input_a, stop=3), - [None, None, None]) - self.assertEqual( - keras.utils.generic_utils.slice_arrays(input_a, start=0, stop=1), - [None, None, None]) + def test_slice_arrays(self): + input_a = list([1, 2, 3]) + self.assertEqual( + keras.utils.generic_utils.slice_arrays(input_a, start=0), + [None, None, None], + ) + self.assertEqual( + keras.utils.generic_utils.slice_arrays(input_a, stop=3), + [None, None, None], + ) + self.assertEqual( + keras.utils.generic_utils.slice_arrays(input_a, start=0, stop=1), + [None, None, None], + ) # object() alone isn't compatible with WeakKeyDictionary, which we use to # track shared configs. class MaybeSharedObject: - pass - + pass -class SharedObjectScopeTest(tf.test.TestCase): - def test_shared_object_saving_scope_single_object_doesnt_export_id(self): - with generic_utils.SharedObjectSavingScope() as scope: - single_object = MaybeSharedObject() - self.assertIsNone(scope.get_config(single_object)) - single_object_config = scope.create_config({}, single_object) - self.assertIsNotNone(single_object_config) - self.assertNotIn(generic_utils.SHARED_OBJECT_KEY, - single_object_config) - - def test_shared_object_saving_scope_shared_object_exports_id(self): - with generic_utils.SharedObjectSavingScope() as scope: - shared_object = MaybeSharedObject() - self.assertIsNone(scope.get_config(shared_object)) - scope.create_config({}, shared_object) - first_object_config = scope.get_config(shared_object) - second_object_config = scope.get_config(shared_object) - self.assertIn(generic_utils.SHARED_OBJECT_KEY, - first_object_config) - self.assertIn(generic_utils.SHARED_OBJECT_KEY, - second_object_config) - self.assertIs(first_object_config, second_object_config) - - def test_shared_object_loading_scope_noop(self): - # Test that, without a context manager scope, adding configs will do - # nothing. - obj_id = 1 - obj = MaybeSharedObject() - generic_utils._shared_object_loading_scope().set(obj_id, obj) - self.assertIsNone(generic_utils._shared_object_loading_scope().get(obj_id)) - - def test_shared_object_loading_scope_returns_shared_obj(self): - obj_id = 1 - obj = MaybeSharedObject() - with generic_utils.SharedObjectLoadingScope() as scope: - scope.set(obj_id, obj) - self.assertIs(scope.get(obj_id), obj) - - def test_nested_shared_object_saving_scopes(self): - my_obj = MaybeSharedObject() - with generic_utils.SharedObjectSavingScope() as scope_1: - scope_1.create_config({}, my_obj) - with generic_utils.SharedObjectSavingScope() as scope_2: - # Nesting saving scopes should return the original scope and should - # not clear any objects we're tracking. - self.assertIs(scope_1, scope_2) - self.assertIsNotNone(scope_2.get_config(my_obj)) - self.assertIsNotNone(scope_1.get_config(my_obj)) - self.assertIsNone(generic_utils._shared_object_saving_scope()) - - def test_custom_object_scope_correct_class(self): - train_step_message = 'This is my training step' - temp_dir = os.path.join(self.get_temp_dir(), 'my_model') - - class CustomModelX(keras.Model): - - def __init__(self, *args, **kwargs): +class CustomModelX(keras.Model): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.dense1 = keras.layers.Dense(1) + self.train_step_message = "This is my training step" - def call(self, inputs): + def call(self, inputs): return self.dense1(inputs) - def train_step(self, data): - tf.print(train_step_message) + def train_step(self, data): + tf.print(self.train_step_message) x, y = data with tf.GradientTape() as tape: - y_pred = self(x) - loss = self.compiled_loss(y, y_pred) + y_pred = self(x) + loss = self.compiled_loss(y, y_pred) gradients = tape.gradient(loss, self.trainable_variables) self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) return {} - def func_that_returns_one(self): + def func_that_returns_one(self): return 1 - subclassed_model = CustomModelX() - subclassed_model.compile(optimizer='adam', loss='mse') - - x = np.random.random((100, 32)) - y = np.random.random((100, 1)) - subclassed_model.fit(x, y, epochs=1) - subclassed_model.save(temp_dir, save_format='tf') - - with keras.utils.generic_utils.custom_object_scope( - {'CustomModelX': CustomModelX}): - loaded_model = keras.models.load_model(temp_dir) - io_utils.enable_interactive_logging() - # `tf.print` writes to stderr. - with self.captureWritesToStream(sys.stderr) as printed: - loaded_model.fit(x, y, epochs=1) - if tf.__internal__.tf2.enabled(): - # `tf.print` message is only available in stderr in TF2. Check that - # custom `train_step` is used. - self.assertRegex(printed.contents(), train_step_message) - - # Check that the custom class does get used. - self.assertIsInstance(loaded_model, CustomModelX) - # Check that the custom method is available. - self.assertEqual(loaded_model.func_that_returns_one(), 1) - - -if __name__ == '__main__': - tf.test.main() +class SharedObjectScopeTest(tf.test.TestCase): + def test_shared_object_saving_scope_single_object_doesnt_export_id(self): + with serialization.SharedObjectSavingScope() as scope: + single_object = MaybeSharedObject() + self.assertIsNone(scope.get_config(single_object)) + single_object_config = scope.create_config({}, single_object) + self.assertIsNotNone(single_object_config) + self.assertNotIn( + serialization.SHARED_OBJECT_KEY, single_object_config + ) + + def test_shared_object_saving_scope_shared_object_exports_id(self): + with serialization.SharedObjectSavingScope() as scope: + shared_object = MaybeSharedObject() + self.assertIsNone(scope.get_config(shared_object)) + scope.create_config({}, shared_object) + first_object_config = scope.get_config(shared_object) + second_object_config = scope.get_config(shared_object) + self.assertIn(serialization.SHARED_OBJECT_KEY, first_object_config) + self.assertIn(serialization.SHARED_OBJECT_KEY, second_object_config) + self.assertIs(first_object_config, second_object_config) + + def test_shared_object_loading_scope_noop(self): + # Test that, without a context manager scope, adding configs will do + # nothing. + obj_id = 1 + obj = MaybeSharedObject() + serialization._shared_object_loading_scope().set(obj_id, obj) + self.assertIsNone( + serialization._shared_object_loading_scope().get(obj_id) + ) + + def test_shared_object_loading_scope_returns_shared_obj(self): + obj_id = 1 + obj = MaybeSharedObject() + with serialization.SharedObjectLoadingScope() as scope: + scope.set(obj_id, obj) + self.assertIs(scope.get(obj_id), obj) + + def test_nested_shared_object_saving_scopes(self): + my_obj = MaybeSharedObject() + with serialization.SharedObjectSavingScope() as scope_1: + scope_1.create_config({}, my_obj) + with serialization.SharedObjectSavingScope() as scope_2: + # Nesting saving scopes should return the original scope and + # should not clear any objects we're tracking. + self.assertIs(scope_1, scope_2) + self.assertIsNotNone(scope_2.get_config(my_obj)) + self.assertIsNotNone(scope_1.get_config(my_obj)) + self.assertIsNone(serialization._shared_object_saving_scope()) + + def test_custom_object_scope_correct_class_saved_model(self): + temp_dir = os.path.join(self.get_temp_dir(), "my_model") + + subclassed_model = CustomModelX() + subclassed_model.compile(optimizer="adam", loss="mse") + + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + subclassed_model.fit(x, y, epochs=1) + + subclassed_model.save(temp_dir, save_format="tf") + + with keras.utils.custom_object_scope({"CustomModelX": CustomModelX}): + loaded_model = keras.models.load_model(temp_dir) + + io_utils.enable_interactive_logging() + # `tf.print` writes to stderr. + with self.captureWritesToStream(sys.stderr) as printed: + loaded_model.fit(x, y, epochs=1) + if tf.__internal__.tf2.enabled(): + # `tf.print` message is only available in stderr in TF2. + # Check that custom `train_step` is used. + self.assertRegex(printed.contents(), "This is my training step") + + # Check that the custom class does get used. + self.assertIsInstance(loaded_model, CustomModelX) + # Check that the custom method is available. + self.assertEqual(loaded_model.func_that_returns_one(), 1) + + @test_utils.run_v2_only + def test_custom_object_scope_correct_class_keras_v3(self): + temp_dir = os.path.join(self.get_temp_dir(), "my_model.keras") + + subclassed_model = CustomModelX() + subclassed_model.compile(optimizer="adam", loss="mse") + + x = np.random.random((100, 32)) + y = np.random.random((100, 1)) + subclassed_model.fit(x, y, epochs=1) + + subclassed_model.save(temp_dir, save_format="keras_v3") + + with keras.utils.custom_object_scope({"CustomModelX": CustomModelX}): + loaded_model = keras.models.load_model(temp_dir) + + io_utils.enable_interactive_logging() + # `tf.print` writes to stderr. + with self.captureWritesToStream(sys.stderr) as printed: + loaded_model.fit(x, y, epochs=1) + if tf.__internal__.tf2.enabled(): + # `tf.print` message is only available in stderr in TF2. + # Check that custom `train_step` is used. + self.assertRegex(printed.contents(), "This is my training step") + + # Check that the custom class does get used. + self.assertIsInstance(loaded_model, CustomModelX) + # Check that the custom method is available. + self.assertEqual(loaded_model.func_that_returns_one(), 1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/image_dataset.py b/keras/utils/image_dataset.py index 96fe7d3dd01a..fef22dda4911 100644 --- a/keras/utils/image_dataset.py +++ b/keras/utils/image_dataset.py @@ -14,308 +14,365 @@ # ============================================================================== """Keras image dataset loading utilities.""" +import numpy as np import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes -import numpy as np from keras.utils import dataset_utils from keras.utils import image_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -ALLOWLIST_FORMATS = ('.bmp', '.gif', '.jpeg', '.jpg', '.png') - - -@keras_export('keras.utils.image_dataset_from_directory', - 'keras.preprocessing.image_dataset_from_directory', - v1=[]) -def image_dataset_from_directory(directory, - labels='inferred', - label_mode='int', - class_names=None, - color_mode='rgb', - batch_size=32, - image_size=(256, 256), - shuffle=True, - seed=None, - validation_split=None, - subset=None, - interpolation='bilinear', - follow_links=False, - crop_to_aspect_ratio=False, - **kwargs): - """Generates a `tf.data.Dataset` from image files in a directory. - - If your directory structure is: - - ``` - main_directory/ - ...class_a/ - ......a_image_1.jpg - ......a_image_2.jpg - ...class_b/ - ......b_image_1.jpg - ......b_image_2.jpg - ``` - - Then calling `image_dataset_from_directory(main_directory, labels='inferred')` - will return a `tf.data.Dataset` that yields batches of images from - the subdirectories `class_a` and `class_b`, together with labels - 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). - - Supported image formats: jpeg, png, bmp, gif. - Animated gifs are truncated to the first frame. - - Args: - directory: Directory where the data is located. - If `labels` is "inferred", it should contain - subdirectories, each containing images for a class. - Otherwise, the directory structure is ignored. - labels: Either "inferred" - (labels are generated from the directory structure), - None (no labels), - or a list/tuple of integer labels of the same size as the number of - image files found in the directory. Labels should be sorted according - to the alphanumeric order of the image file paths - (obtained via `os.walk(directory)` in Python). - label_mode: String describing the encoding of `labels`. Options are: - - 'int': means that the labels are encoded as integers - (e.g. for `sparse_categorical_crossentropy` loss). - - 'categorical' means that the labels are - encoded as a categorical vector - (e.g. for `categorical_crossentropy` loss). - - 'binary' means that the labels (there can be only 2) - are encoded as `float32` scalars with values 0 or 1 - (e.g. for `binary_crossentropy`). - - None (no labels). - class_names: Only valid if "labels" is "inferred". This is the explicit - list of class names (must match names of subdirectories). Used - to control the order of the classes - (otherwise alphanumerical order is used). - color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". - Whether the images will be converted to - have 1, 3, or 4 channels. - batch_size: Size of the batches of data. Default: 32. - If `None`, the data will not be batched - (the dataset will yield individual samples). - image_size: Size to resize images to after they are read from disk, - specified as `(height, width)`. Defaults to `(256, 256)`. - Since the pipeline processes batches of images that must all have - the same size, this must be provided. - shuffle: Whether to shuffle the data. Default: True. - If set to False, sorts the data in alphanumeric order. - seed: Optional random seed for shuffling and transformations. - validation_split: Optional float between 0 and 1, - fraction of data to reserve for validation. - subset: Subset of the data to return. - One of "training", "validation" or "both". - Only used if `validation_split` is set. - When `subset="both"`, the utility returns a tuple of two datasets - (the training and validation datasets respectively). - interpolation: String, the interpolation method used when resizing images. - Defaults to `bilinear`. Supports `bilinear`, `nearest`, `bicubic`, - `area`, `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic`. - follow_links: Whether to visits subdirectories pointed to by symlinks. - Defaults to False. - crop_to_aspect_ratio: If True, resize the images without aspect - ratio distortion. When the original aspect ratio differs from the target - aspect ratio, the output image will be cropped so as to return the largest - possible window in the image (of size `image_size`) that matches - the target aspect ratio. By default (`crop_to_aspect_ratio=False`), - aspect ratio may not be preserved. - **kwargs: Legacy keyword arguments. - - Returns: +ALLOWLIST_FORMATS = (".bmp", ".gif", ".jpeg", ".jpg", ".png") + + +@keras_export( + "keras.utils.image_dataset_from_directory", + "keras.preprocessing.image_dataset_from_directory", + v1=[], +) +def image_dataset_from_directory( + directory, + labels="inferred", + label_mode="int", + class_names=None, + color_mode="rgb", + batch_size=32, + image_size=(256, 256), + shuffle=True, + seed=None, + validation_split=None, + subset=None, + interpolation="bilinear", + follow_links=False, + crop_to_aspect_ratio=False, + **kwargs, +): + """Generates a `tf.data.Dataset` from image files in a directory. + + If your directory structure is: + + ``` + main_directory/ + ...class_a/ + ......a_image_1.jpg + ......a_image_2.jpg + ...class_b/ + ......b_image_1.jpg + ......b_image_2.jpg + ``` + + Then calling `image_dataset_from_directory(main_directory, + labels='inferred')` will return a `tf.data.Dataset` that yields batches of + images from the subdirectories `class_a` and `class_b`, together with labels + 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). + + Supported image formats: `.jpeg`, `.jpg`, `.png`, `.bmp`, `.gif`. + Animated gifs are truncated to the first frame. + + Args: + directory: Directory where the data is located. + If `labels` is `"inferred"`, it should contain + subdirectories, each containing images for a class. + Otherwise, the directory structure is ignored. + labels: Either `"inferred"` + (labels are generated from the directory structure), + `None` (no labels), + or a list/tuple of integer labels of the same size as the number of + image files found in the directory. Labels should be sorted + according to the alphanumeric order of the image file paths + (obtained via `os.walk(directory)` in Python). + label_mode: String describing the encoding of `labels`. Options are: + - `"int"`: means that the labels are encoded as integers + (e.g. for `sparse_categorical_crossentropy` loss). + - `"categorical"` means that the labels are + encoded as a categorical vector + (e.g. for `categorical_crossentropy` loss). + - `"binary"` means that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 or 1 + (e.g. for `binary_crossentropy`). + - `None` (no labels). + class_names: Only valid if `labels` is `"inferred"`. + This is the explicit list of class names + (must match names of subdirectories). Used to control the order + of the classes (otherwise alphanumerical order is used). + color_mode: One of `"grayscale"`, `"rgb"`, `"rgba"`. + Defaults to `"rgb"`. Whether the images will be converted to + have 1, 3, or 4 channels. + batch_size: Size of the batches of data. + If `None`, the data will not be batched + (the dataset will yield individual samples). Defaults to 32. + image_size: Size to resize images to after they are read from disk, + specified as `(height, width)`. + Since the pipeline processes batches of images that must all have + the same size, this must be provided. Defaults to `(256, 256)`. + shuffle: Whether to shuffle the data. Defaults to `True`. + If set to `False`, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, + fraction of data to reserve for validation. + subset: Subset of the data to return. + One of `"training"`, `"validation"`, or `"both"`. + Only used if `validation_split` is set. + When `subset="both"`, the utility returns a tuple of two datasets + (the training and validation datasets respectively). + interpolation: String, the interpolation method used when + resizing images. Defaults to `"bilinear"`. + Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, + `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + follow_links: Whether to visit subdirectories pointed to by symlinks. + Defaults to `False`. + crop_to_aspect_ratio: If `True`, resize the images without aspect + ratio distortion. When the original aspect ratio differs from the + target aspect ratio, the output image will be cropped so as to + return the largest possible window in the image + (of size `image_size`) that matches the target aspect ratio. By + default (`crop_to_aspect_ratio=False`), aspect ratio may not be + preserved. + **kwargs: Legacy keyword arguments. + + Returns: + A `tf.data.Dataset` object. - - If `label_mode` is None, it yields `float32` tensors of shape + + - If `label_mode` is `None`, it yields `float32` tensors of shape `(batch_size, image_size[0], image_size[1], num_channels)`, encoding images (see below for rules regarding `num_channels`). - - Otherwise, it yields a tuple `(images, labels)`, where `images` - has shape `(batch_size, image_size[0], image_size[1], num_channels)`, + - Otherwise, it yields a tuple `(images, labels)`, where `images` has + shape `(batch_size, image_size[0], image_size[1], num_channels)`, and `labels` follows the format described below. - Rules regarding labels format: - - if `label_mode` is `int`, the labels are an `int32` tensor of shape - `(batch_size,)`. - - if `label_mode` is `binary`, the labels are a `float32` tensor of - 1s and 0s of shape `(batch_size, 1)`. - - if `label_mode` is `categorical`, the labels are a `float32` tensor - of shape `(batch_size, num_classes)`, representing a one-hot - encoding of the class index. - - Rules regarding number of channels in the yielded images: - - if `color_mode` is `grayscale`, - there's 1 channel in the image tensors. - - if `color_mode` is `rgb`, - there are 3 channel in the image tensors. - - if `color_mode` is `rgba`, - there are 4 channel in the image tensors. - """ - if 'smart_resize' in kwargs: - crop_to_aspect_ratio = kwargs.pop('smart_resize') - if kwargs: - raise TypeError(f'Unknown keywords argument(s): {tuple(kwargs.keys())}') - if labels not in ('inferred', None): - if not isinstance(labels, (list, tuple)): - raise ValueError( - '`labels` argument should be a list/tuple of integer labels, of ' - 'the same size as the number of image files in the target ' - 'directory. If you wish to infer the labels from the subdirectory ' - 'names in the target directory, pass `labels="inferred"`. ' - 'If you wish to get a dataset that only contains images ' - f'(no labels), pass `labels=None`. Received: labels={labels}') - if class_names: - raise ValueError('You can only pass `class_names` if ' - f'`labels="inferred"`. Received: labels={labels}, and ' - f'class_names={class_names}') - if label_mode not in {'int', 'categorical', 'binary', None}: - raise ValueError( - '`label_mode` argument must be one of "int", "categorical", "binary", ' - f'or None. Received: label_mode={label_mode}') - if labels is None or label_mode is None: - labels = None - label_mode = None - if color_mode == 'rgb': - num_channels = 3 - elif color_mode == 'rgba': - num_channels = 4 - elif color_mode == 'grayscale': - num_channels = 1 - else: - raise ValueError( - '`color_mode` must be one of {"rgb", "rgba", "grayscale"}. ' - f'Received: color_mode={color_mode}') - interpolation = image_utils.get_interpolation(interpolation) - dataset_utils.check_validation_split_arg( - validation_split, subset, shuffle, seed) - - if seed is None: - seed = np.random.randint(1e6) - image_paths, labels, class_names = dataset_utils.index_directory( - directory, - labels, - formats=ALLOWLIST_FORMATS, - class_names=class_names, - shuffle=shuffle, - seed=seed, - follow_links=follow_links) - - if label_mode == 'binary' and len(class_names) != 2: - raise ValueError( - f'When passing `label_mode="binary"`, there must be exactly 2 ' - f'class_names. Received: class_names={class_names}') - - if subset == 'both': - image_paths_train, labels_train = dataset_utils.get_training_or_validation_split( - image_paths, labels, validation_split, 'training') - image_paths_val, labels_val = dataset_utils.get_training_or_validation_split( - image_paths, labels, validation_split, 'validation') - if not image_paths_train: - raise ValueError(f'No training images found in directory {directory}. ' - f'Allowed formats: {ALLOWLIST_FORMATS}') - if not image_paths_val: - raise ValueError(f'No validation images found in directory {directory}. ' - f'Allowed formats: {ALLOWLIST_FORMATS}') - train_dataset = paths_and_labels_to_dataset( - image_paths=image_paths_train, - image_size=image_size, - num_channels=num_channels, - labels=labels_train, - label_mode=label_mode, - num_classes=len(class_names), - interpolation=interpolation, - crop_to_aspect_ratio=crop_to_aspect_ratio) - val_dataset = paths_and_labels_to_dataset( - image_paths=image_paths_val, - image_size=image_size, - num_channels=num_channels, - labels=labels_val, - label_mode=label_mode, - num_classes=len(class_names), - interpolation=interpolation, - crop_to_aspect_ratio=crop_to_aspect_ratio) - train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE) - val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE) - if batch_size is not None: - if shuffle: - # Shuffle locally at each iteration - train_dataset = train_dataset.shuffle( - buffer_size=batch_size * 8, seed=seed) - train_dataset = train_dataset.batch(batch_size) - val_dataset = val_dataset.batch(batch_size) + Rules regarding labels format: + + - if `label_mode` is `"int"`, the labels are an `int32` tensor of shape + `(batch_size,)`. + - if `label_mode` is `"binary"`, the labels are a `float32` tensor of + 1s and 0s of shape `(batch_size, 1)`. + - if `label_mode` is `"categorical"`, the labels are a `float32` tensor + of shape `(batch_size, num_classes)`, representing a one-hot + encoding of the class index. + + Rules regarding number of channels in the yielded images: + + - if `color_mode` is `"grayscale"`, + there's 1 channel in the image tensors. + - if `color_mode` is `"rgb"`, + there are 3 channels in the image tensors. + - if `color_mode` is `"rgba"`, + there are 4 channels in the image tensors. + """ + if "smart_resize" in kwargs: + crop_to_aspect_ratio = kwargs.pop("smart_resize") + if kwargs: + raise TypeError(f"Unknown keywords argument(s): {tuple(kwargs.keys())}") + if labels not in ("inferred", None): + if not isinstance(labels, (list, tuple)): + raise ValueError( + "`labels` argument should be a list/tuple of integer labels, " + "of the same size as the number of image files in the target " + "directory. If you wish to infer the labels from the " + "subdirectory " + 'names in the target directory, pass `labels="inferred"`. ' + "If you wish to get a dataset that only contains images " + f"(no labels), pass `labels=None`. Received: labels={labels}" + ) + if class_names: + raise ValueError( + "You can only pass `class_names` if " + f'`labels="inferred"`. Received: labels={labels}, and ' + f"class_names={class_names}" + ) + if label_mode not in {"int", "categorical", "binary", None}: + raise ValueError( + '`label_mode` argument must be one of "int", ' + '"categorical", "binary", ' + f"or None. Received: label_mode={label_mode}" + ) + if labels is None or label_mode is None: + labels = None + label_mode = None + if color_mode == "rgb": + num_channels = 3 + elif color_mode == "rgba": + num_channels = 4 + elif color_mode == "grayscale": + num_channels = 1 else: - if shuffle: - train_dataset = train_dataset.shuffle(buffer_size=1024, seed=seed) - - # Users may need to reference `class_names`. - train_dataset.class_names = class_names - val_dataset.class_names = class_names - # Include file paths for images as attribute. - train_dataset.file_paths = image_paths_train - val_dataset.file_paths = image_paths_val - dataset = [train_dataset, val_dataset] - else: - image_paths, labels = dataset_utils.get_training_or_validation_split( - image_paths, labels, validation_split, subset) - if not image_paths: - raise ValueError(f'No images found in directory {directory}. ' - f'Allowed formats: {ALLOWLIST_FORMATS}') - - dataset = paths_and_labels_to_dataset( - image_paths=image_paths, - image_size=image_size, - num_channels=num_channels, - labels=labels, - label_mode=label_mode, - num_classes=len(class_names), - interpolation=interpolation, - crop_to_aspect_ratio=crop_to_aspect_ratio) - dataset = dataset.prefetch(tf.data.AUTOTUNE) - if batch_size is not None: - if shuffle: - # Shuffle locally at each iteration - dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) - dataset = dataset.batch(batch_size) + raise ValueError( + '`color_mode` must be one of {"rgb", "rgba", "grayscale"}. ' + f"Received: color_mode={color_mode}" + ) + interpolation = image_utils.get_interpolation(interpolation) + dataset_utils.check_validation_split_arg( + validation_split, subset, shuffle, seed + ) + + if seed is None: + seed = np.random.randint(1e6) + image_paths, labels, class_names = dataset_utils.index_directory( + directory, + labels, + formats=ALLOWLIST_FORMATS, + class_names=class_names, + shuffle=shuffle, + seed=seed, + follow_links=follow_links, + ) + + if label_mode == "binary" and len(class_names) != 2: + raise ValueError( + 'When passing `label_mode="binary"`, there must be exactly 2 ' + f"class_names. Received: class_names={class_names}" + ) + + if subset == "both": + ( + image_paths_train, + labels_train, + ) = dataset_utils.get_training_or_validation_split( + image_paths, labels, validation_split, "training" + ) + ( + image_paths_val, + labels_val, + ) = dataset_utils.get_training_or_validation_split( + image_paths, labels, validation_split, "validation" + ) + if not image_paths_train: + raise ValueError( + f"No training images found in directory {directory}. " + f"Allowed formats: {ALLOWLIST_FORMATS}" + ) + if not image_paths_val: + raise ValueError( + f"No validation images found in directory {directory}. " + f"Allowed formats: {ALLOWLIST_FORMATS}" + ) + train_dataset = paths_and_labels_to_dataset( + image_paths=image_paths_train, + image_size=image_size, + num_channels=num_channels, + labels=labels_train, + label_mode=label_mode, + num_classes=len(class_names), + interpolation=interpolation, + crop_to_aspect_ratio=crop_to_aspect_ratio, + ) + val_dataset = paths_and_labels_to_dataset( + image_paths=image_paths_val, + image_size=image_size, + num_channels=num_channels, + labels=labels_val, + label_mode=label_mode, + num_classes=len(class_names), + interpolation=interpolation, + crop_to_aspect_ratio=crop_to_aspect_ratio, + ) + + if batch_size is not None: + if shuffle: + # Shuffle locally at each iteration + train_dataset = train_dataset.shuffle( + buffer_size=batch_size * 8, seed=seed + ) + train_dataset = train_dataset.batch(batch_size) + val_dataset = val_dataset.batch(batch_size) + else: + if shuffle: + train_dataset = train_dataset.shuffle( + buffer_size=1024, seed=seed + ) + + train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE) + val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE) + + # Users may need to reference `class_names`. + train_dataset.class_names = class_names + val_dataset.class_names = class_names + + # Include file paths for images as attribute. + train_dataset.file_paths = image_paths_train + val_dataset.file_paths = image_paths_val + dataset = [train_dataset, val_dataset] + else: + image_paths, labels = dataset_utils.get_training_or_validation_split( + image_paths, labels, validation_split, subset + ) + if not image_paths: + raise ValueError( + f"No images found in directory {directory}. " + f"Allowed formats: {ALLOWLIST_FORMATS}" + ) + + dataset = paths_and_labels_to_dataset( + image_paths=image_paths, + image_size=image_size, + num_channels=num_channels, + labels=labels, + label_mode=label_mode, + num_classes=len(class_names), + interpolation=interpolation, + crop_to_aspect_ratio=crop_to_aspect_ratio, + ) + + if batch_size is not None: + if shuffle: + # Shuffle locally at each iteration + dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) + dataset = dataset.batch(batch_size) + else: + if shuffle: + dataset = dataset.shuffle(buffer_size=1024, seed=seed) + + dataset = dataset.prefetch(tf.data.AUTOTUNE) + + # Users may need to reference `class_names`. + dataset.class_names = class_names + + # Include file paths for images as attribute. + dataset.file_paths = image_paths + return dataset + + +def paths_and_labels_to_dataset( + image_paths, + image_size, + num_channels, + labels, + label_mode, + num_classes, + interpolation, + crop_to_aspect_ratio=False, +): + """Constructs a dataset of images and labels.""" + # TODO(fchollet): consider making num_parallel_calls settable + path_ds = tf.data.Dataset.from_tensor_slices(image_paths) + args = (image_size, num_channels, interpolation, crop_to_aspect_ratio) + img_ds = path_ds.map( + lambda x: load_image(x, *args), num_parallel_calls=tf.data.AUTOTUNE + ) + if label_mode: + label_ds = dataset_utils.labels_to_dataset( + labels, label_mode, num_classes + ) + img_ds = tf.data.Dataset.zip((img_ds, label_ds)) + return img_ds + + +def load_image( + path, image_size, num_channels, interpolation, crop_to_aspect_ratio=False +): + """Load an image from a path and resize it.""" + img = tf.io.read_file(path) + img = tf.image.decode_image( + img, channels=num_channels, expand_animations=False + ) + if crop_to_aspect_ratio: + img = image_utils.smart_resize( + img, image_size, interpolation=interpolation + ) else: - if shuffle: - dataset = dataset.shuffle(buffer_size=1024, seed=seed) - - # Users may need to reference `class_names`. - dataset.class_names = class_names - # Include file paths for images as attribute. - dataset.file_paths = image_paths - return dataset - - -def paths_and_labels_to_dataset(image_paths, - image_size, - num_channels, - labels, - label_mode, - num_classes, - interpolation, - crop_to_aspect_ratio=False): - """Constructs a dataset of images and labels.""" - # TODO(fchollet): consider making num_parallel_calls settable - path_ds = tf.data.Dataset.from_tensor_slices(image_paths) - args = (image_size, num_channels, interpolation, crop_to_aspect_ratio) - img_ds = path_ds.map( - lambda x: load_image(x, *args), num_parallel_calls=tf.data.AUTOTUNE) - if label_mode: - label_ds = dataset_utils.labels_to_dataset(labels, label_mode, num_classes) - img_ds = tf.data.Dataset.zip((img_ds, label_ds)) - return img_ds - - -def load_image(path, image_size, num_channels, interpolation, - crop_to_aspect_ratio=False): - """Load an image from a path and resize it.""" - img = tf.io.read_file(path) - img = tf.image.decode_image( - img, channels=num_channels, expand_animations=False) - if crop_to_aspect_ratio: - img = image_utils.smart_resize(img, image_size, interpolation=interpolation) - else: - img = tf.image.resize(img, image_size, method=interpolation) - img.set_shape((image_size[0], image_size[1], num_channels)) - return img + img = tf.image.resize(img, image_size, method=interpolation) + img.set_shape((image_size[0], image_size[1], num_channels)) + return img diff --git a/keras/utils/image_dataset_test.py b/keras/utils/image_dataset_test.py index fa6f9f61fafa..cc4c26c2408b 100644 --- a/keras/utils/image_dataset_test.py +++ b/keras/utils/image_dataset_test.py @@ -14,368 +14,442 @@ # ============================================================================== """Tests for image_dataset.""" -import tensorflow.compat.v2 as tf - import os import shutil import numpy as np +import tensorflow.compat.v2 as tf + from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import image_dataset from keras.utils import image_utils try: - import PIL # pylint:disable=g-import-not-at-top + import PIL except ImportError: - PIL = None + PIL = None @test_utils.run_v2_only class ImageDatasetFromDirectoryTest(test_combinations.TestCase): - - def _get_images(self, count=16, color_mode='rgb'): - width = height = 24 - imgs = [] - for _ in range(count): - if color_mode == 'grayscale': - img = np.random.randint(0, 256, size=(height, width, 1)) - elif color_mode == 'rgba': - img = np.random.randint(0, 256, size=(height, width, 4)) - else: - img = np.random.randint(0, 256, size=(height, width, 3)) - img = image_utils.array_to_img(img) - imgs.append(img) - return imgs - - def _prepare_directory(self, - num_classes=2, - grayscale=False, - nested_dirs=False, - color_mode='rgb', - count=16): - # Get a unique temp directory - temp_dir = os.path.join(self.get_temp_dir(), str(np.random.randint(1e6))) - os.mkdir(temp_dir) - self.addCleanup(shutil.rmtree, temp_dir) - - # Generate paths to class subdirectories - paths = [] - for class_index in range(num_classes): - class_directory = 'class_%s' % (class_index,) - if nested_dirs: - class_paths = [ - class_directory, os.path.join(class_directory, 'subfolder_1'), - os.path.join(class_directory, 'subfolder_2'), os.path.join( - class_directory, 'subfolder_1', 'sub-subfolder') - ] - else: - class_paths = [class_directory] - for path in class_paths: - os.mkdir(os.path.join(temp_dir, path)) - paths += class_paths - - # Save images to the paths - i = 0 - for img in self._get_images(color_mode=color_mode, count=count): - path = paths[i % len(paths)] - if color_mode == 'rgb': - ext = 'jpg' - else: - ext = 'png' - filename = os.path.join(path, 'image_%s.%s' % (i, ext)) - img.save(os.path.join(temp_dir, filename)) - i += 1 - return temp_dir - - def test_image_dataset_from_directory_standalone(self): - # Test retrieving images without labels from a directory and its subdirs. - if PIL is None: - return # Skip test if PIL is not available. - - # Save a few extra images in the parent directory. - directory = self._prepare_directory(count=7, num_classes=2) - for i, img in enumerate(self._get_images(3)): - filename = 'image_%s.jpg' % (i,) - img.save(os.path.join(directory, filename)) - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=5, image_size=(18, 18), labels=None) - batch = next(iter(dataset)) - # We return plain images - self.assertEqual(batch.shape, (5, 18, 18, 3)) - self.assertEqual(batch.dtype.name, 'float32') - # Count samples - batch_count = 0 - sample_count = 0 - for batch in dataset: - batch_count += 1 - sample_count += batch.shape[0] - self.assertEqual(batch_count, 2) - self.assertEqual(sample_count, 10) - - def test_image_dataset_from_directory_binary(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode='int') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - self.assertEqual(batch[0].dtype.name, 'float32') - self.assertEqual(batch[1].shape, (8,)) - self.assertEqual(batch[1].dtype.name, 'int32') - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode='binary') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - self.assertEqual(batch[0].dtype.name, 'float32') - self.assertEqual(batch[1].shape, (8, 1)) - self.assertEqual(batch[1].dtype.name, 'float32') - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode='categorical') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - self.assertEqual(batch[0].dtype.name, 'float32') - self.assertEqual(batch[1].shape, (8, 2)) - self.assertEqual(batch[1].dtype.name, 'float32') - - def test_static_shape_in_graph(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode='int') - test_case = self - - @tf.function - def symbolic_fn(ds): - for x, _ in ds.take(1): - test_case.assertListEqual(x.shape.as_list(), [None, 18, 18, 3]) - - symbolic_fn(dataset) - - def test_sample_count(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=4, count=15) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode=None) - sample_count = 0 - for batch in dataset: - sample_count += batch.shape[0] - self.assertEqual(sample_count, 15) - - def test_image_dataset_from_directory_multiclass(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=4, count=15) - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode=None) - batch = next(iter(dataset)) - self.assertEqual(batch.shape, (8, 18, 18, 3)) - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode=None) - sample_count = 0 - iterator = iter(dataset) - for batch in dataset: - sample_count += next(iterator).shape[0] - self.assertEqual(sample_count, 15) - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode='int') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - self.assertEqual(batch[0].dtype.name, 'float32') - self.assertEqual(batch[1].shape, (8,)) - self.assertEqual(batch[1].dtype.name, 'int32') - - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode='categorical') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - self.assertEqual(batch[0].dtype.name, 'float32') - self.assertEqual(batch[1].shape, (8, 4)) - self.assertEqual(batch[1].dtype.name, 'float32') - - def test_image_dataset_from_directory_color_modes(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=4, color_mode='rgba') - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), color_mode='rgba') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 4)) - self.assertEqual(batch[0].dtype.name, 'float32') - - directory = self._prepare_directory(num_classes=4, color_mode='grayscale') - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), color_mode='grayscale') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 1)) - self.assertEqual(batch[0].dtype.name, 'float32') - - def test_image_dataset_from_directory_validation_split(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2, count=10) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=10, image_size=(18, 18), - validation_split=0.2, subset='training', seed=1337) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=10, image_size=(18, 18), - validation_split=0.2, subset='validation', seed=1337) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (2, 18, 18, 3)) - - train_dataset, val_dataset = image_dataset.image_dataset_from_directory( - directory, - batch_size=10, - image_size=(18, 18), - validation_split=0.2, - subset='both', - seed=1337) - batch = next(iter(train_dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8, 18, 18, 3)) - batch = next(iter(val_dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (2, 18, 18, 3)) - - def test_image_dataset_from_directory_manual_labels(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2, count=2) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), - labels=[0, 1], shuffle=False) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertAllClose(batch[1], [0, 1]) - - def test_image_dataset_from_directory_follow_links(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2, count=25, - nested_dirs=True) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=8, image_size=(18, 18), label_mode=None, - follow_links=True) - sample_count = 0 - for batch in dataset: - sample_count += batch.shape[0] - self.assertEqual(sample_count, 25) - - def test_image_dataset_from_directory_no_images(self): - directory = self._prepare_directory(num_classes=2, count=0) - with self.assertRaisesRegex(ValueError, 'No images found.'): - _ = image_dataset.image_dataset_from_directory(directory) - - def test_image_dataset_from_directory_crop_to_aspect_ratio(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2, count=5) - dataset = image_dataset.image_dataset_from_directory( - directory, batch_size=5, image_size=(18, 18), crop_to_aspect_ratio=True) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (5, 18, 18, 3)) - - def test_image_dataset_from_directory_errors(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=3, count=5) - - with self.assertRaisesRegex(ValueError, '`labels` argument should be'): - _ = image_dataset.image_dataset_from_directory( - directory, labels='other') - - with self.assertRaisesRegex(ValueError, '`label_mode` argument must be'): - _ = image_dataset.image_dataset_from_directory( - directory, label_mode='other') - - with self.assertRaisesRegex(ValueError, '`color_mode` must be one of'): - _ = image_dataset.image_dataset_from_directory( - directory, color_mode='other') - - with self.assertRaisesRegex( - ValueError, 'only pass `class_names` if `labels="inferred"`'): - _ = image_dataset.image_dataset_from_directory( - directory, labels=[0, 0, 1, 1, 1], - class_names=['class_0', 'class_1', 'class_2']) - - with self.assertRaisesRegex( - ValueError, - 'Expected the lengths of `labels` to match the number of files'): - _ = image_dataset.image_dataset_from_directory( - directory, labels=[0, 0, 1, 1]) - - with self.assertRaisesRegex( - ValueError, '`class_names` passed did not match'): - _ = image_dataset.image_dataset_from_directory( - directory, class_names=['class_0', 'class_2']) - - with self.assertRaisesRegex(ValueError, 'there must be exactly 2'): - _ = image_dataset.image_dataset_from_directory( - directory, label_mode='binary') - - with self.assertRaisesRegex(ValueError, - '`validation_split` must be between 0 and 1'): - _ = image_dataset.image_dataset_from_directory( - directory, validation_split=2) - - with self.assertRaisesRegex( - ValueError, '`subset` must be either "training", ' - '"validation" or "both"'): - _ = image_dataset.image_dataset_from_directory( - directory, validation_split=0.2, subset='other') - - with self.assertRaisesRegex(ValueError, '`validation_split` must be set'): - _ = image_dataset.image_dataset_from_directory( - directory, validation_split=0, subset='training') - - with self.assertRaisesRegex(ValueError, 'must provide a `seed`'): - _ = image_dataset.image_dataset_from_directory( - directory, validation_split=0.2, subset='training') - - def test_image_dataset_from_directory_not_batched(self): - if PIL is None: - return # Skip test if PIL is not available. - - directory = self._prepare_directory(num_classes=2, count=2) - dataset = image_dataset.image_dataset_from_directory( - directory, - batch_size=None, - image_size=(18, 18), - label_mode=None, - shuffle=False) - sample = next(iter(dataset)) - self.assertEqual(len(sample.shape), 3) - -if __name__ == '__main__': - tf.test.main() + def _get_images(self, count=16, color_mode="rgb"): + width = height = 24 + imgs = [] + for _ in range(count): + if color_mode == "grayscale": + img = np.random.randint(0, 256, size=(height, width, 1)) + elif color_mode == "rgba": + img = np.random.randint(0, 256, size=(height, width, 4)) + else: + img = np.random.randint(0, 256, size=(height, width, 3)) + img = image_utils.array_to_img(img) + imgs.append(img) + return imgs + + def _prepare_directory( + self, + num_classes=2, + grayscale=False, + nested_dirs=False, + color_mode="rgb", + count=16, + ): + # Get a unique temp directory + temp_dir = os.path.join( + self.get_temp_dir(), str(np.random.randint(1e6)) + ) + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + # Generate paths to class subdirectories + paths = [] + for class_index in range(num_classes): + class_directory = f"class_{class_index}" + if nested_dirs: + class_paths = [ + class_directory, + os.path.join(class_directory, "subfolder_1"), + os.path.join(class_directory, "subfolder_2"), + os.path.join( + class_directory, "subfolder_1", "sub-subfolder" + ), + ] + else: + class_paths = [class_directory] + for path in class_paths: + os.mkdir(os.path.join(temp_dir, path)) + paths += class_paths + + # Save images to the paths + i = 0 + for img in self._get_images(color_mode=color_mode, count=count): + path = paths[i % len(paths)] + if color_mode == "rgb": + ext = "jpg" + else: + ext = "png" + filename = os.path.join(path, f"image_{i}.{ext}") + img.save(os.path.join(temp_dir, filename)) + i += 1 + return temp_dir + + def test_image_dataset_from_directory_standalone(self): + # Test retrieving images without labels from a directory and its + # subdirs. + if PIL is None: + return # Skip test if PIL is not available. + + # Save a few extra images in the parent directory. + directory = self._prepare_directory(count=7, num_classes=2) + for i, img in enumerate(self._get_images(3)): + filename = f"image_{i}.jpg" + img.save(os.path.join(directory, filename)) + + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=5, image_size=(18, 18), labels=None + ) + batch = next(iter(dataset)) + # We return plain images + self.assertEqual(batch.shape, (5, 18, 18, 3)) + self.assertEqual(batch.dtype.name, "float32") + # Count samples + batch_count = 0 + sample_count = 0 + for batch in dataset: + batch_count += 1 + sample_count += batch.shape[0] + self.assertEqual(batch_count, 2) + self.assertEqual(sample_count, 10) + + def test_image_dataset_from_directory_binary(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=2) + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode="int" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8,)) + self.assertEqual(batch[1].dtype.name, "int32") + + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode="binary" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8, 1)) + self.assertEqual(batch[1].dtype.name, "float32") + + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=8, + image_size=(18, 18), + label_mode="categorical", + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8, 2)) + self.assertEqual(batch[1].dtype.name, "float32") + + def test_static_shape_in_graph(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=2) + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode="int" + ) + test_case = self + + @tf.function + def symbolic_fn(ds): + for x, _ in ds.take(1): + test_case.assertListEqual(x.shape.as_list(), [None, 18, 18, 3]) + + symbolic_fn(dataset) + + def test_sample_count(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=4, count=15) + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode=None + ) + sample_count = 0 + for batch in dataset: + sample_count += batch.shape[0] + self.assertEqual(sample_count, 15) + + def test_image_dataset_from_directory_multiclass(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=4, count=15) + + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode=None + ) + batch = next(iter(dataset)) + self.assertEqual(batch.shape, (8, 18, 18, 3)) + + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode=None + ) + sample_count = 0 + iterator = iter(dataset) + for batch in dataset: + sample_count += next(iterator).shape[0] + self.assertEqual(sample_count, 15) + + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), label_mode="int" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8,)) + self.assertEqual(batch[1].dtype.name, "int32") + + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=8, + image_size=(18, 18), + label_mode="categorical", + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + self.assertEqual(batch[0].dtype.name, "float32") + self.assertEqual(batch[1].shape, (8, 4)) + self.assertEqual(batch[1].dtype.name, "float32") + + def test_image_dataset_from_directory_color_modes(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=4, color_mode="rgba") + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), color_mode="rgba" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 4)) + self.assertEqual(batch[0].dtype.name, "float32") + + directory = self._prepare_directory( + num_classes=4, color_mode="grayscale" + ) + dataset = image_dataset.image_dataset_from_directory( + directory, batch_size=8, image_size=(18, 18), color_mode="grayscale" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 1)) + self.assertEqual(batch[0].dtype.name, "float32") + + def test_image_dataset_from_directory_validation_split(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=2, count=10) + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=10, + image_size=(18, 18), + validation_split=0.2, + subset="training", + seed=1337, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=10, + image_size=(18, 18), + validation_split=0.2, + subset="validation", + seed=1337, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (2, 18, 18, 3)) + + train_dataset, val_dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=10, + image_size=(18, 18), + validation_split=0.2, + subset="both", + seed=1337, + ) + batch = next(iter(train_dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8, 18, 18, 3)) + batch = next(iter(val_dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (2, 18, 18, 3)) + + def test_image_dataset_from_directory_manual_labels(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=2, count=2) + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=8, + image_size=(18, 18), + labels=[0, 1], + shuffle=False, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertAllClose(batch[1], [0, 1]) + + def test_image_dataset_from_directory_follow_links(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory( + num_classes=2, count=25, nested_dirs=True + ) + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=8, + image_size=(18, 18), + label_mode=None, + follow_links=True, + ) + sample_count = 0 + for batch in dataset: + sample_count += batch.shape[0] + self.assertEqual(sample_count, 25) + + def test_image_dataset_from_directory_no_images(self): + directory = self._prepare_directory(num_classes=2, count=0) + with self.assertRaisesRegex(ValueError, "No images found."): + _ = image_dataset.image_dataset_from_directory(directory) + + def test_image_dataset_from_directory_crop_to_aspect_ratio(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=2, count=5) + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=5, + image_size=(18, 18), + crop_to_aspect_ratio=True, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (5, 18, 18, 3)) + + def test_image_dataset_from_directory_errors(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=3, count=5) + + with self.assertRaisesRegex(ValueError, "`labels` argument should be"): + _ = image_dataset.image_dataset_from_directory( + directory, labels="other" + ) + + with self.assertRaisesRegex( + ValueError, "`label_mode` argument must be" + ): + _ = image_dataset.image_dataset_from_directory( + directory, label_mode="other" + ) + + with self.assertRaisesRegex(ValueError, "`color_mode` must be one of"): + _ = image_dataset.image_dataset_from_directory( + directory, color_mode="other" + ) + + with self.assertRaisesRegex( + ValueError, 'only pass `class_names` if `labels="inferred"`' + ): + _ = image_dataset.image_dataset_from_directory( + directory, + labels=[0, 0, 1, 1, 1], + class_names=["class_0", "class_1", "class_2"], + ) + + with self.assertRaisesRegex( + ValueError, + "Expected the lengths of `labels` to match the number of files", + ): + _ = image_dataset.image_dataset_from_directory( + directory, labels=[0, 0, 1, 1] + ) + + with self.assertRaisesRegex( + ValueError, "`class_names` passed did not match" + ): + _ = image_dataset.image_dataset_from_directory( + directory, class_names=["class_0", "class_2"] + ) + + with self.assertRaisesRegex(ValueError, "there must be exactly 2"): + _ = image_dataset.image_dataset_from_directory( + directory, label_mode="binary" + ) + + with self.assertRaisesRegex( + ValueError, "`validation_split` must be between 0 and 1" + ): + _ = image_dataset.image_dataset_from_directory( + directory, validation_split=2 + ) + + with self.assertRaisesRegex( + ValueError, + '`subset` must be either "training", "validation" or "both"', + ): + _ = image_dataset.image_dataset_from_directory( + directory, validation_split=0.2, subset="other" + ) + + with self.assertRaisesRegex( + ValueError, "`validation_split` must be set" + ): + _ = image_dataset.image_dataset_from_directory( + directory, validation_split=0, subset="training" + ) + + with self.assertRaisesRegex(ValueError, "must provide a `seed`"): + _ = image_dataset.image_dataset_from_directory( + directory, validation_split=0.2, subset="training" + ) + + def test_image_dataset_from_directory_not_batched(self): + if PIL is None: + return # Skip test if PIL is not available. + + directory = self._prepare_directory(num_classes=2, count=2) + dataset = image_dataset.image_dataset_from_directory( + directory, + batch_size=None, + image_size=(18, 18), + label_mode=None, + shuffle=False, + ) + sample = next(iter(dataset)) + self.assertEqual(len(sample.shape), 3) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/image_utils.py b/keras/utils/image_utils.py index 2385af3f7944..94f4ebc2e631 100644 --- a/keras/utils/image_utils.py +++ b/keras/utils/image_utils.py @@ -14,431 +14,467 @@ # ============================================================================== """Utilities related to image handling.""" -# pylint: disable=g-import-not-at-top import io import pathlib import warnings -from keras import backend import numpy as np import tensorflow.compat.v2 as tf + +from keras import backend + +# isort: off from tensorflow.python.util.tf_export import keras_export try: - from PIL import Image as pil_image + from PIL import Image as pil_image + + try: + pil_image_resampling = pil_image.Resampling + except AttributeError: + pil_image_resampling = pil_image except ImportError: - pil_image = None + pil_image = None + pil_image_resampling = None -if pil_image is not None: - _PIL_INTERPOLATION_METHODS = { - 'nearest': pil_image.NEAREST, - 'bilinear': pil_image.BILINEAR, - 'bicubic': pil_image.BICUBIC, - 'hamming': pil_image.HAMMING, - 'box': pil_image.BOX, - 'lanczos': pil_image.LANCZOS, - } +if pil_image_resampling is not None: + _PIL_INTERPOLATION_METHODS = { + "nearest": pil_image_resampling.NEAREST, + "bilinear": pil_image_resampling.BILINEAR, + "bicubic": pil_image_resampling.BICUBIC, + "hamming": pil_image_resampling.HAMMING, + "box": pil_image_resampling.BOX, + "lanczos": pil_image_resampling.LANCZOS, + } ResizeMethod = tf.image.ResizeMethod _TF_INTERPOLATION_METHODS = { - 'bilinear': ResizeMethod.BILINEAR, - 'nearest': ResizeMethod.NEAREST_NEIGHBOR, - 'bicubic': ResizeMethod.BICUBIC, - 'area': ResizeMethod.AREA, - 'lanczos3': ResizeMethod.LANCZOS3, - 'lanczos5': ResizeMethod.LANCZOS5, - 'gaussian': ResizeMethod.GAUSSIAN, - 'mitchellcubic': ResizeMethod.MITCHELLCUBIC + "bilinear": ResizeMethod.BILINEAR, + "nearest": ResizeMethod.NEAREST_NEIGHBOR, + "bicubic": ResizeMethod.BICUBIC, + "area": ResizeMethod.AREA, + "lanczos3": ResizeMethod.LANCZOS3, + "lanczos5": ResizeMethod.LANCZOS5, + "gaussian": ResizeMethod.GAUSSIAN, + "mitchellcubic": ResizeMethod.MITCHELLCUBIC, } -@keras_export('keras.preprocessing.image.smart_resize', v1=[]) -def smart_resize(x, size, interpolation='bilinear'): - """Resize images to a target size without aspect ratio distortion. - - Warning: `tf.keras.preprocessing.image.smart_resize` is not recommended for - new code. Prefer `tf.keras.layers.Resizing`, which provides the same - functionality as a preprocessing layer and adds `tf.RaggedTensor` support. See - the [preprocessing layer guide]( - https://www.tensorflow.org/guide/keras/preprocessing_layers) - for an overview of preprocessing layers. - - TensorFlow image datasets typically yield images that have each a different - size. However, these images need to be batched before they can be - processed by Keras layers. To be batched, images need to share the same height - and width. - - You could simply do: - - ```python - size = (200, 200) - ds = ds.map(lambda img: tf.image.resize(img, size)) - ``` - - However, if you do this, you distort the aspect ratio of your images, since - in general they do not all have the same aspect ratio as `size`. This is - fine in many cases, but not always (e.g. for GANs this can be a problem). - - Note that passing the argument `preserve_aspect_ratio=True` to `resize` - will preserve the aspect ratio, but at the cost of no longer respecting the - provided target size. Because `tf.image.resize` doesn't crop images, - your output images will still have different sizes. - - This calls for: - - ```python - size = (200, 200) - ds = ds.map(lambda img: smart_resize(img, size)) - ``` - - Your output images will actually be `(200, 200)`, and will not be distorted. - Instead, the parts of the image that do not fit within the target size - get cropped out. - - The resizing process is: - - 1. Take the largest centered crop of the image that has the same aspect ratio - as the target size. For instance, if `size=(200, 200)` and the input image has - size `(340, 500)`, we take a crop of `(340, 340)` centered along the width. - 2. Resize the cropped image to the target size. In the example above, - we resize the `(340, 340)` crop to `(200, 200)`. - - Args: - x: Input image or batch of images (as a tensor or NumPy array). Must be in - format `(height, width, channels)` or `(batch_size, height, width, - channels)`. - size: Tuple of `(height, width)` integer. Target size. - interpolation: String, interpolation to use for resizing. Defaults to - `'bilinear'`. Supports `bilinear`, `nearest`, `bicubic`, `area`, - `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic`. - - Returns: - Array with shape `(size[0], size[1], channels)`. If the input image was a - NumPy array, the output is a NumPy array, and if it was a TF tensor, - the output is a TF tensor. - """ - if len(size) != 2: - raise ValueError('Expected `size` to be a tuple of 2 integers, ' - f'but got: {size}.') - img = tf.convert_to_tensor(x) - if img.shape.rank is not None: - if img.shape.rank < 3 or img.shape.rank > 4: - raise ValueError( - 'Expected an image array with shape `(height, width, channels)`, ' - 'or `(batch_size, height, width, channels)`, but ' - f'got input with incorrect rank, of shape {img.shape}.') - shape = tf.shape(img) - height, width = shape[-3], shape[-2] - target_height, target_width = size - if img.shape.rank is not None: - static_num_channels = img.shape[-1] - else: - static_num_channels = None - - crop_height = tf.cast( - tf.cast(width * target_height, 'float32') / target_width, 'int32') - crop_width = tf.cast( - tf.cast(height * target_width, 'float32') / target_height, 'int32') - - # Set back to input height / width if crop_height / crop_width is not smaller. - crop_height = tf.minimum(height, crop_height) - crop_width = tf.minimum(width, crop_width) - - crop_box_hstart = tf.cast( - tf.cast(height - crop_height, 'float32') / 2, 'int32') - crop_box_wstart = tf.cast(tf.cast(width - crop_width, 'float32') / 2, 'int32') - - if img.shape.rank == 4: - crop_box_start = tf.stack([0, crop_box_hstart, crop_box_wstart, 0]) - crop_box_size = tf.stack([-1, crop_height, crop_width, -1]) - else: - crop_box_start = tf.stack([crop_box_hstart, crop_box_wstart, 0]) - crop_box_size = tf.stack([crop_height, crop_width, -1]) - - img = tf.slice(img, crop_box_start, crop_box_size) - img = tf.image.resize(images=img, size=size, method=interpolation) - # Apparent bug in resize_images_v2 may cause shape to be lost - if img.shape.rank is not None: +@keras_export("keras.preprocessing.image.smart_resize", v1=[]) +def smart_resize(x, size, interpolation="bilinear"): + """Resize images to a target size without aspect ratio distortion. + + Warning: `tf.keras.preprocessing.image.smart_resize` is not recommended for + new code. Prefer `tf.keras.layers.Resizing`, which provides the same + functionality as a preprocessing layer and adds `tf.RaggedTensor` support. + See the [preprocessing layer guide]( + https://www.tensorflow.org/guide/keras/preprocessing_layers) + for an overview of preprocessing layers. + + TensorFlow image datasets typically yield images that have each a different + size. However, these images need to be batched before they can be + processed by Keras layers. To be batched, images need to share the same + height and width. + + You could simply do: + + ```python + size = (200, 200) + ds = ds.map(lambda img: tf.image.resize(img, size)) + ``` + + However, if you do this, you distort the aspect ratio of your images, since + in general they do not all have the same aspect ratio as `size`. This is + fine in many cases, but not always (e.g. for GANs this can be a problem). + + Note that passing the argument `preserve_aspect_ratio=True` to `resize` + will preserve the aspect ratio, but at the cost of no longer respecting the + provided target size. Because `tf.image.resize` doesn't crop images, + your output images will still have different sizes. + + This calls for: + + ```python + size = (200, 200) + ds = ds.map(lambda img: smart_resize(img, size)) + ``` + + Your output images will actually be `(200, 200)`, and will not be distorted. + Instead, the parts of the image that do not fit within the target size + get cropped out. + + The resizing process is: + + 1. Take the largest centered crop of the image that has the same aspect + ratio as the target size. For instance, if `size=(200, 200)` and the input + image has size `(340, 500)`, we take a crop of `(340, 340)` centered along + the width. + 2. Resize the cropped image to the target size. In the example above, + we resize the `(340, 340)` crop to `(200, 200)`. + + Args: + x: Input image or batch of images (as a tensor or NumPy array). Must be in + format `(height, width, channels)` or `(batch_size, height, width, + channels)`. + size: Tuple of `(height, width)` integer. Target size. + interpolation: String, interpolation to use for resizing. Supports + `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`, + `gaussian`, `mitchellcubic`. Defaults to `'bilinear'`. + + Returns: + Array with shape `(size[0], size[1], channels)`. If the input image was a + NumPy array, the output is a NumPy array, and if it was a TF tensor, + the output is a TF tensor. + """ + if len(size) != 2: + raise ValueError( + f"Expected `size` to be a tuple of 2 integers, but got: {size}." + ) + img = tf.convert_to_tensor(x) + if img.shape.rank is not None: + if img.shape.rank < 3 or img.shape.rank > 4: + raise ValueError( + "Expected an image array with shape `(height, width, " + "channels)`, or `(batch_size, height, width, channels)`, but " + f"got input with incorrect rank, of shape {img.shape}." + ) + shape = tf.shape(img) + height, width = shape[-3], shape[-2] + target_height, target_width = size + if img.shape.rank is not None: + static_num_channels = img.shape[-1] + else: + static_num_channels = None + + crop_height = tf.cast( + tf.cast(width * target_height, "float32") / target_width, "int32" + ) + crop_width = tf.cast( + tf.cast(height * target_width, "float32") / target_height, "int32" + ) + + # Set back to input height / width if crop_height / crop_width is not + # smaller. + crop_height = tf.minimum(height, crop_height) + crop_width = tf.minimum(width, crop_width) + + crop_box_hstart = tf.cast( + tf.cast(height - crop_height, "float32") / 2, "int32" + ) + crop_box_wstart = tf.cast( + tf.cast(width - crop_width, "float32") / 2, "int32" + ) + if img.shape.rank == 4: - img.set_shape((None, None, None, static_num_channels)) - if img.shape.rank == 3: - img.set_shape((None, None, static_num_channels)) - if isinstance(x, np.ndarray): - return img.numpy() - return img + crop_box_start = tf.stack([0, crop_box_hstart, crop_box_wstart, 0]) + crop_box_size = tf.stack([-1, crop_height, crop_width, -1]) + else: + crop_box_start = tf.stack([crop_box_hstart, crop_box_wstart, 0]) + crop_box_size = tf.stack([crop_height, crop_width, -1]) + + img = tf.slice(img, crop_box_start, crop_box_size) + img = tf.image.resize(images=img, size=size, method=interpolation) + # Apparent bug in resize_images_v2 may cause shape to be lost + if img.shape.rank is not None: + if img.shape.rank == 4: + img.set_shape((None, None, None, static_num_channels)) + if img.shape.rank == 3: + img.set_shape((None, None, static_num_channels)) + if isinstance(x, np.ndarray): + return img.numpy() + return img def get_interpolation(interpolation): - interpolation = interpolation.lower() - if interpolation not in _TF_INTERPOLATION_METHODS: - raise NotImplementedError( - 'Value not recognized for `interpolation`: {}. Supported values ' - 'are: {}'.format(interpolation, _TF_INTERPOLATION_METHODS.keys())) - return _TF_INTERPOLATION_METHODS[interpolation] + interpolation = interpolation.lower() + if interpolation not in _TF_INTERPOLATION_METHODS: + raise NotImplementedError( + "Value not recognized for `interpolation`: {}. Supported values " + "are: {}".format(interpolation, _TF_INTERPOLATION_METHODS.keys()) + ) + return _TF_INTERPOLATION_METHODS[interpolation] + + +@keras_export( + "keras.utils.array_to_img", "keras.preprocessing.image.array_to_img" +) +def array_to_img(x, data_format=None, scale=True, dtype=None): + """Converts a 3D Numpy array to a PIL Image instance. + + Usage: + + ```python + from PIL import Image + img = np.random.random(size=(100, 100, 3)) + pil_img = tf.keras.utils.array_to_img(img) + ``` + + + Args: + x: Input data, in any form that can be converted to a Numpy array. + data_format: Image data format, can be either `"channels_first"` or + `"channels_last"`. None means the global + setting `tf.keras.backend.image_data_format()` is used (unless you + changed it, it uses `"channels_last"`). Defaults to `None`. + scale: Whether to rescale the image such that minimum and maximum values + are 0 and 255 respectively. Defaults to `True`. + dtype: Dtype to use. None makes the global setting + `tf.keras.backend.floatx()` to be used (unless you changed it, it + uses `"float32"`). Defaults to `None`. + + Returns: + A PIL Image instance. + + Raises: + ImportError: if PIL is not available. + ValueError: if invalid `x` or `data_format` is passed. + """ + + if data_format is None: + data_format = backend.image_data_format() + if dtype is None: + dtype = backend.floatx() + if pil_image is None: + raise ImportError( + "Could not import PIL.Image. " + "The use of `array_to_img` requires PIL." + ) + x = np.asarray(x, dtype=dtype) + if x.ndim != 3: + raise ValueError( + "Expected image array to have rank 3 (single image). " + f"Got array with shape: {x.shape}" + ) + + if data_format not in {"channels_first", "channels_last"}: + raise ValueError(f"Invalid data_format: {data_format}") + + # Original Numpy array x has format (height, width, channel) + # or (channel, height, width) + # but target PIL image has format (width, height, channel) + if data_format == "channels_first": + x = x.transpose(1, 2, 0) + if scale: + x = x - np.min(x) + x_max = np.max(x) + if x_max != 0: + x /= x_max + x *= 255 + if x.shape[2] == 4: + # RGBA + return pil_image.fromarray(x.astype("uint8"), "RGBA") + elif x.shape[2] == 3: + # RGB + return pil_image.fromarray(x.astype("uint8"), "RGB") + elif x.shape[2] == 1: + # grayscale + if np.max(x) > 255: + # 32-bit signed integer grayscale image. PIL mode "I" + return pil_image.fromarray(x[:, :, 0].astype("int32"), "I") + return pil_image.fromarray(x[:, :, 0].astype("uint8"), "L") + else: + raise ValueError(f"Unsupported channel number: {x.shape[2]}") -@keras_export('keras.utils.array_to_img', - 'keras.preprocessing.image.array_to_img') -def array_to_img(x, data_format=None, scale=True, dtype=None): - """Converts a 3D Numpy array to a PIL Image instance. - - Usage: - - ```python - from PIL import Image - img = np.random.random(size=(100, 100, 3)) - pil_img = tf.keras.preprocessing.image.array_to_img(img) - ``` - - - Args: - x: Input data, in any form that can be converted to a Numpy array. - data_format: Image data format, can be either `"channels_first"` or - `"channels_last"`. Defaults to `None`, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, - it defaults to `"channels_last"`). - scale: Whether to rescale the image such that minimum and maximum values - are 0 and 255 respectively. Defaults to `True`. - dtype: Dtype to use. Default to `None`, in which case the global setting - `tf.keras.backend.floatx()` is used (unless you changed it, it defaults - to `"float32"`) - - Returns: - A PIL Image instance. - - Raises: - ImportError: if PIL is not available. - ValueError: if invalid `x` or `data_format` is passed. - """ - - if data_format is None: - data_format = backend.image_data_format() - if dtype is None: - dtype = backend.floatx() - if pil_image is None: - raise ImportError('Could not import PIL.Image. ' - 'The use of `array_to_img` requires PIL.') - x = np.asarray(x, dtype=dtype) - if x.ndim != 3: - raise ValueError('Expected image array to have rank 3 (single image). ' - f'Got array with shape: {x.shape}') - - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError(f'Invalid data_format: {data_format}') - - # Original Numpy array x has format (height, width, channel) - # or (channel, height, width) - # but target PIL image has format (width, height, channel) - if data_format == 'channels_first': - x = x.transpose(1, 2, 0) - if scale: - x = x - np.min(x) - x_max = np.max(x) - if x_max != 0: - x /= x_max - x *= 255 - if x.shape[2] == 4: - # RGBA - return pil_image.fromarray(x.astype('uint8'), 'RGBA') - elif x.shape[2] == 3: - # RGB - return pil_image.fromarray(x.astype('uint8'), 'RGB') - elif x.shape[2] == 1: - # grayscale - if np.max(x) > 255: - # 32-bit signed integer grayscale image. PIL mode "I" - return pil_image.fromarray(x[:, :, 0].astype('int32'), 'I') - return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L') - else: - raise ValueError(f'Unsupported channel number: {x.shape[2]}') - - -@keras_export('keras.utils.img_to_array', - 'keras.preprocessing.image.img_to_array') +@keras_export( + "keras.utils.img_to_array", "keras.preprocessing.image.img_to_array" +) def img_to_array(img, data_format=None, dtype=None): - """Converts a PIL Image instance to a Numpy array. - - Usage: - - ```python - from PIL import Image - img_data = np.random.random(size=(100, 100, 3)) - img = tf.keras.preprocessing.image.array_to_img(img_data) - array = tf.keras.preprocessing.image.img_to_array(img) - ``` - - - Args: - img: Input PIL Image instance. - data_format: Image data format, can be either `"channels_first"` or - `"channels_last"`. Defaults to `None`, in which case the global setting - `tf.keras.backend.image_data_format()` is used (unless you changed it, - it defaults to `"channels_last"`). - dtype: Dtype to use. Default to `None`, in which case the global setting - `tf.keras.backend.floatx()` is used (unless you changed it, it defaults - to `"float32"`). - - Returns: - A 3D Numpy array. - - Raises: - ValueError: if invalid `img` or `data_format` is passed. - """ - - if data_format is None: - data_format = backend.image_data_format() - if dtype is None: - dtype = backend.floatx() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError(f'Unknown data_format: {data_format}') - # Numpy array x has format (height, width, channel) - # or (channel, height, width) - # but original PIL image has format (width, height, channel) - x = np.asarray(img, dtype=dtype) - if len(x.shape) == 3: - if data_format == 'channels_first': - x = x.transpose(2, 0, 1) - elif len(x.shape) == 2: - if data_format == 'channels_first': - x = x.reshape((1, x.shape[0], x.shape[1])) + """Converts a PIL Image instance to a Numpy array. + + Usage: + + ```python + from PIL import Image + img_data = np.random.random(size=(100, 100, 3)) + img = tf.keras.utils.array_to_img(img_data) + array = tf.keras.utils.image.img_to_array(img) + ``` + + + Args: + img: Input PIL Image instance. + data_format: Image data format, can be either `"channels_first"` or + `"channels_last"`. None means the global + setting `tf.keras.backend.image_data_format()` is used (unless you + changed it, it uses `"channels_last"`). Defaults to `None`. + dtype: Dtype to use. None makes the global setting + `tf.keras.backend.floatx()` to be used (unless you changed it, it + uses `"float32"`). Defaults to `None`. + + Returns: + A 3D Numpy array. + + Raises: + ValueError: if invalid `img` or `data_format` is passed. + """ + + if data_format is None: + data_format = backend.image_data_format() + if dtype is None: + dtype = backend.floatx() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError(f"Unknown data_format: {data_format}") + # Numpy array x has format (height, width, channel) + # or (channel, height, width) + # but original PIL image has format (width, height, channel) + x = np.asarray(img, dtype=dtype) + if len(x.shape) == 3: + if data_format == "channels_first": + x = x.transpose(2, 0, 1) + elif len(x.shape) == 2: + if data_format == "channels_first": + x = x.reshape((1, x.shape[0], x.shape[1])) + else: + x = x.reshape((x.shape[0], x.shape[1], 1)) else: - x = x.reshape((x.shape[0], x.shape[1], 1)) - else: - raise ValueError(f'Unsupported image shape: {x.shape}') - return x + raise ValueError(f"Unsupported image shape: {x.shape}") + return x -@keras_export('keras.utils.save_img', 'keras.preprocessing.image.save_img') +@keras_export("keras.utils.save_img", "keras.preprocessing.image.save_img") def save_img(path, x, data_format=None, file_format=None, scale=True, **kwargs): - """Saves an image stored as a Numpy array to a path or file object. - - Args: - path: Path or file object. - x: Numpy array. - data_format: Image data format, either `"channels_first"` or - `"channels_last"`. - file_format: Optional file format override. If omitted, the format to use - is determined from the filename extension. If a file object was used - instead of a filename, this parameter should always be used. - scale: Whether to rescale image values to be within `[0, 255]`. - **kwargs: Additional keyword arguments passed to `PIL.Image.save()`. - """ - if data_format is None: - data_format = backend.image_data_format() - img = array_to_img(x, data_format=data_format, scale=scale) - if img.mode == 'RGBA' and (file_format == 'jpg' or file_format == 'jpeg'): - warnings.warn('The JPG format does not support ' - 'RGBA images, converting to RGB.') - img = img.convert('RGB') - img.save(path, format=file_format, **kwargs) - - -@keras_export('keras.utils.load_img', 'keras.preprocessing.image.load_img') -def load_img(path, - grayscale=False, - color_mode='rgb', - target_size=None, - interpolation='nearest', - keep_aspect_ratio=False): - """Loads an image into PIL format. - - Usage: - - ``` - image = tf.keras.preprocessing.image.load_img(image_path) - input_arr = tf.keras.preprocessing.image.img_to_array(image) - input_arr = np.array([input_arr]) # Convert single image to a batch. - predictions = model.predict(input_arr) - ``` - - Args: - path: Path to image file. - grayscale: DEPRECATED use `color_mode="grayscale"`. - color_mode: One of `"grayscale"`, `"rgb"`, `"rgba"`. Default: `"rgb"`. - The desired image format. - target_size: Either `None` (default to original size) or tuple of ints - `(img_height, img_width)`. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. Supported - methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version - 1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL - version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also - supported. By default, `"nearest"` is used. - keep_aspect_ratio: Boolean, whether to resize images to a target - size without aspect ratio distortion. The image is cropped in - the center with target aspect ratio before resizing. - - Returns: - A PIL Image instance. - - Raises: - ImportError: if PIL is not available. - ValueError: if interpolation method is not supported. - """ - if grayscale: - warnings.warn('grayscale is deprecated. Please use ' - 'color_mode = "grayscale"') - color_mode = 'grayscale' - if pil_image is None: - raise ImportError('Could not import PIL.Image. ' - 'The use of `load_img` requires PIL.') - if isinstance(path, io.BytesIO): - img = pil_image.open(path) - elif isinstance(path, (pathlib.Path, bytes, str)): - if isinstance(path, pathlib.Path): - path = str(path.resolve()) - with open(path, 'rb') as f: - img = pil_image.open(io.BytesIO(f.read())) - else: - raise TypeError('path should be path-like or io.BytesIO' - ', not {}'.format(type(path))) - - if color_mode == 'grayscale': - # if image is not already an 8-bit, 16-bit or 32-bit grayscale image - # convert it to an 8-bit grayscale image. - if img.mode not in ('L', 'I;16', 'I'): - img = img.convert('L') - elif color_mode == 'rgba': - if img.mode != 'RGBA': - img = img.convert('RGBA') - elif color_mode == 'rgb': - if img.mode != 'RGB': - img = img.convert('RGB') - else: - raise ValueError('color_mode must be "grayscale", "rgb", or "rgba"') - if target_size is not None: - width_height_tuple = (target_size[1], target_size[0]) - if img.size != width_height_tuple: - if interpolation not in _PIL_INTERPOLATION_METHODS: - raise ValueError('Invalid interpolation method {} specified. Supported ' - 'methods are {}'.format( - interpolation, - ', '.join(_PIL_INTERPOLATION_METHODS.keys()))) - resample = _PIL_INTERPOLATION_METHODS[interpolation] - - if keep_aspect_ratio: - width, height = img.size - target_width, target_height = width_height_tuple - - crop_height = (width * target_height) // target_width - crop_width = (height * target_width) // target_height - - # Set back to input height / width - # if crop_height / crop_width is not smaller. - crop_height = min(height, crop_height) - crop_width = min(width, crop_width) - - crop_box_hstart = (height - crop_height) // 2 - crop_box_wstart = (width - crop_width) // 2 - crop_box_wend = crop_box_wstart + crop_width - crop_box_hend = crop_box_hstart + crop_height - crop_box = [ - crop_box_wstart, crop_box_hstart, crop_box_wend, crop_box_hend - ] - img = img.resize(width_height_tuple, resample, box=crop_box) - else: - img = img.resize(width_height_tuple, resample) - return img + """Saves an image stored as a Numpy array to a path or file object. + + Args: + path: Path or file object. + x: Numpy array. + data_format: Image data format, either `"channels_first"` or + `"channels_last"`. + file_format: Optional file format override. If omitted, the format to + use is determined from the filename extension. If a file object was + used instead of a filename, this parameter should always be used. + scale: Whether to rescale image values to be within `[0, 255]`. + **kwargs: Additional keyword arguments passed to `PIL.Image.save()`. + """ + if data_format is None: + data_format = backend.image_data_format() + img = array_to_img(x, data_format=data_format, scale=scale) + if img.mode == "RGBA" and (file_format == "jpg" or file_format == "jpeg"): + warnings.warn( + "The JPG format does not support RGBA images, converting to RGB." + ) + img = img.convert("RGB") + img.save(path, format=file_format, **kwargs) + + +@keras_export("keras.utils.load_img", "keras.preprocessing.image.load_img") +def load_img( + path, + grayscale=False, + color_mode="rgb", + target_size=None, + interpolation="nearest", + keep_aspect_ratio=False, +): + """Loads an image into PIL format. + + Usage: + + ```python + image = tf.keras.utils.load_img(image_path) + input_arr = tf.keras.utils.img_to_array(image) + input_arr = np.array([input_arr]) # Convert single image to a batch. + predictions = model.predict(input_arr) + ``` + + Args: + path: Path to image file. + grayscale: DEPRECATED use `color_mode="grayscale"`. + color_mode: One of `"grayscale"`, `"rgb"`, `"rgba"`. Default: `"rgb"`. + The desired image format. + target_size: Either `None` (default to original size) or tuple of ints + `(img_height, img_width)`. + interpolation: Interpolation method used to resample the image if the + target size is different from that of the loaded image. Supported + methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version + 1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL + version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also + supported. By default, `"nearest"` is used. + keep_aspect_ratio: Boolean, whether to resize images to a target + size without aspect ratio distortion. The image is cropped in + the center with target aspect ratio before resizing. + + Returns: + A PIL Image instance. + + Raises: + ImportError: if PIL is not available. + ValueError: if interpolation method is not supported. + """ + if grayscale: + warnings.warn( + 'grayscale is deprecated. Please use color_mode = "grayscale"' + ) + color_mode = "grayscale" + if pil_image is None: + raise ImportError( + "Could not import PIL.Image. The use of `load_img` requires PIL." + ) + if isinstance(path, io.BytesIO): + img = pil_image.open(path) + elif isinstance(path, (pathlib.Path, bytes, str)): + if isinstance(path, pathlib.Path): + path = str(path.resolve()) + with open(path, "rb") as f: + img = pil_image.open(io.BytesIO(f.read())) + else: + raise TypeError( + f"path should be path-like or io.BytesIO, not {type(path)}" + ) + + if color_mode == "grayscale": + # if image is not already an 8-bit, 16-bit or 32-bit grayscale image + # convert it to an 8-bit grayscale image. + if img.mode not in ("L", "I;16", "I"): + img = img.convert("L") + elif color_mode == "rgba": + if img.mode != "RGBA": + img = img.convert("RGBA") + elif color_mode == "rgb": + if img.mode != "RGB": + img = img.convert("RGB") + else: + raise ValueError('color_mode must be "grayscale", "rgb", or "rgba"') + if target_size is not None: + width_height_tuple = (target_size[1], target_size[0]) + if img.size != width_height_tuple: + if interpolation not in _PIL_INTERPOLATION_METHODS: + raise ValueError( + "Invalid interpolation method {} specified. Supported " + "methods are {}".format( + interpolation, + ", ".join(_PIL_INTERPOLATION_METHODS.keys()), + ) + ) + resample = _PIL_INTERPOLATION_METHODS[interpolation] + + if keep_aspect_ratio: + width, height = img.size + target_width, target_height = width_height_tuple + + crop_height = (width * target_height) // target_width + crop_width = (height * target_width) // target_height + + # Set back to input height / width + # if crop_height / crop_width is not smaller. + crop_height = min(height, crop_height) + crop_width = min(width, crop_width) + + crop_box_hstart = (height - crop_height) // 2 + crop_box_wstart = (width - crop_width) // 2 + crop_box_wend = crop_box_wstart + crop_width + crop_box_hend = crop_box_hstart + crop_height + crop_box = [ + crop_box_wstart, + crop_box_hstart, + crop_box_wend, + crop_box_hend, + ] + img = img.resize(width_height_tuple, resample, box=crop_box) + else: + img = img.resize(width_height_tuple, resample) + return img diff --git a/keras/utils/image_utils_test.py b/keras/utils/image_utils_test.py index ff88e939a3e3..07e103c00390 100644 --- a/keras/utils/image_utils_test.py +++ b/keras/utils/image_utils_test.py @@ -18,420 +18,486 @@ import os import pathlib +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import image_utils -import numpy as np -import tensorflow.compat.v2 as tf @test_utils.run_v2_only class TestImageUtils(test_combinations.TestCase): - - def test_smart_resize(self): - test_input = np.random.random((20, 40, 3)) - output = image_utils.smart_resize(test_input, size=(50, 50)) - self.assertIsInstance(output, np.ndarray) - self.assertListEqual(list(output.shape), [50, 50, 3]) - output = image_utils.smart_resize(test_input, size=(10, 10)) - self.assertListEqual(list(output.shape), [10, 10, 3]) - output = image_utils.smart_resize(test_input, size=(100, 50)) - self.assertListEqual(list(output.shape), [100, 50, 3]) - output = image_utils.smart_resize(test_input, size=(5, 15)) - self.assertListEqual(list(output.shape), [5, 15, 3]) - - @parameterized.named_parameters(('size1', (50, 50)), ('size2', (10, 10)), - ('size3', (100, 50)), ('size4', (5, 15))) - def test_smart_resize_tf_dataset(self, size): - test_input_np = np.random.random((2, 20, 40, 3)) - test_ds = tf.data.Dataset.from_tensor_slices(test_input_np) - - resize = lambda img: image_utils.smart_resize(img, size=size) - test_ds = test_ds.map(resize) - for sample in test_ds.as_numpy_iterator(): - self.assertIsInstance(sample, np.ndarray) - self.assertListEqual(list(sample.shape), [size[0], size[1], 3]) - - def test_smart_resize_batch(self): - img = np.random.random((2, 20, 40, 3)) - out = image_utils.smart_resize(img, size=(20, 20)) - self.assertListEqual(list(out.shape), [2, 20, 20, 3]) - self.assertAllClose(out, img[:, :, 10:-10, :]) - - def test_smart_resize_errors(self): - with self.assertRaisesRegex(ValueError, 'a tuple of 2 integers'): - image_utils.smart_resize(np.random.random((20, 20, 2)), size=(10, 5, 3)) - with self.assertRaisesRegex(ValueError, 'incorrect rank'): - image_utils.smart_resize(np.random.random((2, 4)), size=(10, 5)) - with self.assertRaisesRegex(ValueError, 'incorrect rank'): - image_utils.smart_resize(np.random.random((2, 4, 4, 5, 3)), size=(10, 5)) + def test_smart_resize(self): + test_input = np.random.random((20, 40, 3)) + output = image_utils.smart_resize(test_input, size=(50, 50)) + self.assertIsInstance(output, np.ndarray) + self.assertListEqual(list(output.shape), [50, 50, 3]) + output = image_utils.smart_resize(test_input, size=(10, 10)) + self.assertListEqual(list(output.shape), [10, 10, 3]) + output = image_utils.smart_resize(test_input, size=(100, 50)) + self.assertListEqual(list(output.shape), [100, 50, 3]) + output = image_utils.smart_resize(test_input, size=(5, 15)) + self.assertListEqual(list(output.shape), [5, 15, 3]) + + @parameterized.named_parameters( + ("size1", (50, 50)), + ("size2", (10, 10)), + ("size3", (100, 50)), + ("size4", (5, 15)), + ) + def test_smart_resize_tf_dataset(self, size): + test_input_np = np.random.random((2, 20, 40, 3)) + test_ds = tf.data.Dataset.from_tensor_slices(test_input_np) + + resize = lambda img: image_utils.smart_resize(img, size=size) + test_ds = test_ds.map(resize) + for sample in test_ds.as_numpy_iterator(): + self.assertIsInstance(sample, np.ndarray) + self.assertListEqual(list(sample.shape), [size[0], size[1], 3]) + + def test_smart_resize_batch(self): + img = np.random.random((2, 20, 40, 3)) + out = image_utils.smart_resize(img, size=(20, 20)) + self.assertListEqual(list(out.shape), [2, 20, 20, 3]) + self.assertAllClose(out, img[:, :, 10:-10, :]) + + def test_smart_resize_errors(self): + with self.assertRaisesRegex(ValueError, "a tuple of 2 integers"): + image_utils.smart_resize( + np.random.random((20, 20, 2)), size=(10, 5, 3) + ) + with self.assertRaisesRegex(ValueError, "incorrect rank"): + image_utils.smart_resize(np.random.random((2, 4)), size=(10, 5)) + with self.assertRaisesRegex(ValueError, "incorrect rank"): + image_utils.smart_resize( + np.random.random((2, 4, 4, 5, 3)), size=(10, 5) + ) @test_utils.run_v2_only class TestImageLoading(test_combinations.TestCase): - - def test_load_img(self): - tmpdir = self.create_tempdir() - filename_rgb = os.path.join(tmpdir.full_path, 'rgb_utils.png') - filename_rgba = os.path.join(tmpdir.full_path, 'rgba_utils.png') - filename_grayscale_8bit = os.path.join(tmpdir.full_path, - 'grayscale_8bit_utils.png') - filename_grayscale_16bit = os.path.join(tmpdir.full_path, - 'grayscale_16bit_utils.tiff') - filename_grayscale_32bit = os.path.join(tmpdir.full_path, - 'grayscale_32bit_utils.tiff') - - original_rgb_array = np.array( - 255 * np.random.rand(100, 100, 3), dtype=np.uint8) - original_rgb = image_utils.array_to_img(original_rgb_array, scale=False) - original_rgb.save(filename_rgb) - - original_rgba_array = np.array( - 255 * np.random.rand(100, 100, 4), dtype=np.uint8) - original_rgba = image_utils.array_to_img(original_rgba_array, scale=False) - original_rgba.save(filename_rgba) - - original_grayscale_8bit_array = np.array( - 255 * np.random.rand(100, 100, 1), dtype=np.uint8) - original_grayscale_8bit = image_utils.array_to_img( - original_grayscale_8bit_array, scale=False) - original_grayscale_8bit.save(filename_grayscale_8bit) - - original_grayscale_16bit_array = np.array( - np.random.randint(-2147483648, 2147483647, (100, 100, 1)), - dtype=np.int16) - original_grayscale_16bit = image_utils.array_to_img( - original_grayscale_16bit_array, scale=False, dtype='int16') - original_grayscale_16bit.save(filename_grayscale_16bit) - - original_grayscale_32bit_array = np.array( - np.random.randint(-2147483648, 2147483647, (100, 100, 1)), - dtype=np.int32) - original_grayscale_32bit = image_utils.array_to_img( - original_grayscale_32bit_array, scale=False, dtype='int32') - original_grayscale_32bit.save(filename_grayscale_32bit) - - # Test that loaded image is exactly equal to original. - - loaded_im = image_utils.load_img(filename_rgb) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, original_rgb_array.shape) - self.assertAllClose(loaded_im_array, original_rgb_array) - - loaded_im = image_utils.load_img(filename_rgba, color_mode='rgba') - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, original_rgba_array.shape) - self.assertAllClose(loaded_im_array, original_rgba_array) - - loaded_im = image_utils.load_img(filename_rgb, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual( - loaded_im_array.shape, - (original_rgb_array.shape[0], original_rgb_array.shape[1], 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_8bit, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, original_grayscale_8bit_array.shape) - self.assertAllClose(loaded_im_array, original_grayscale_8bit_array) - - loaded_im = image_utils.load_img( - filename_grayscale_16bit, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int16') - self.assertEqual(loaded_im_array.shape, - original_grayscale_16bit_array.shape) - self.assertAllClose(loaded_im_array, original_grayscale_16bit_array) - # test casting int16 image to float32 - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertAllClose(loaded_im_array, original_grayscale_16bit_array) - - loaded_im = image_utils.load_img( - filename_grayscale_32bit, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int32') - self.assertEqual(loaded_im_array.shape, - original_grayscale_32bit_array.shape) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - # test casting int32 image to float32 - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - - # Test that nothing is changed when target size is equal to original. - - loaded_im = image_utils.load_img(filename_rgb, target_size=(100, 100)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, original_rgb_array.shape) - self.assertAllClose(loaded_im_array, original_rgb_array) - - loaded_im = image_utils.load_img( - filename_rgba, color_mode='rgba', target_size=(100, 100)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, original_rgba_array.shape) - self.assertAllClose(loaded_im_array, original_rgba_array) - - loaded_im = image_utils.load_img( - filename_rgb, color_mode='grayscale', target_size=(100, 100)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual( - loaded_im_array.shape, - (original_rgba_array.shape[0], original_rgba_array.shape[1], 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_8bit, color_mode='grayscale', target_size=(100, 100)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, original_grayscale_8bit_array.shape) - self.assertAllClose(loaded_im_array, original_grayscale_8bit_array) - - loaded_im = image_utils.load_img( - filename_grayscale_16bit, - color_mode='grayscale', - target_size=(100, 100)) - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int16') - self.assertEqual(loaded_im_array.shape, - original_grayscale_16bit_array.shape) - self.assertAllClose(loaded_im_array, original_grayscale_16bit_array) - - loaded_im = image_utils.load_img( - filename_grayscale_32bit, - color_mode='grayscale', - target_size=(100, 100)) - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int32') - self.assertEqual(loaded_im_array.shape, - original_grayscale_32bit_array.shape) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - - # Test down-sampling with bilinear interpolation. - - loaded_im = image_utils.load_img(filename_rgb, target_size=(25, 25)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, (25, 25, 3)) - - loaded_im = image_utils.load_img( - filename_rgba, color_mode='rgba', target_size=(25, 25)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, (25, 25, 4)) - - loaded_im = image_utils.load_img( - filename_rgb, color_mode='grayscale', target_size=(25, 25)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_8bit, color_mode='grayscale', target_size=(25, 25)) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_16bit, color_mode='grayscale', target_size=(25, 25)) - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int16') - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_32bit, color_mode='grayscale', target_size=(25, 25)) - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int32') - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - # Test down-sampling with nearest neighbor interpolation. - - loaded_im_nearest = image_utils.load_img( - filename_rgb, target_size=(25, 25), interpolation='nearest') - loaded_im_array_nearest = image_utils.img_to_array(loaded_im_nearest) - self.assertEqual(loaded_im_array_nearest.shape, (25, 25, 3)) - self.assertTrue(np.any(loaded_im_array_nearest != loaded_im_array)) - - loaded_im_nearest = image_utils.load_img( - filename_rgba, - color_mode='rgba', - target_size=(25, 25), - interpolation='nearest') - loaded_im_array_nearest = image_utils.img_to_array(loaded_im_nearest) - self.assertEqual(loaded_im_array_nearest.shape, (25, 25, 4)) - self.assertTrue(np.any(loaded_im_array_nearest != loaded_im_array)) - - loaded_im = image_utils.load_img( - filename_grayscale_8bit, - color_mode='grayscale', - target_size=(25, 25), - interpolation='nearest') - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_16bit, - color_mode='grayscale', - target_size=(25, 25), - interpolation='nearest') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int16') - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - loaded_im = image_utils.load_img( - filename_grayscale_32bit, - color_mode='grayscale', - target_size=(25, 25), - interpolation='nearest') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype='int32') - self.assertEqual(loaded_im_array.shape, (25, 25, 1)) - - # Test different path type - with open(filename_grayscale_32bit, 'rb') as f: - path_ = io.BytesIO(f.read()) # io.Bytesio - loaded_im = image_utils.load_img(path_, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - - path_ = filename_grayscale_32bit # str - loaded_im = image_utils.load_img(path_, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - - path_ = filename_grayscale_32bit.encode() # bytes - loaded_im = image_utils.load_img(path_, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - - path_ = pathlib.Path( - os.path.join(tmpdir.full_path, 'grayscale_32bit_utils.tiff')) - loaded_im = image_utils.load_img(path_, color_mode='grayscale') - loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) - self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) - - # Check that exception is raised if interpolation not supported. - - loaded_im = image_utils.load_img(filename_rgb, interpolation='unsupported') - with self.assertRaises(ValueError): - loaded_im = image_utils.load_img( - filename_rgb, target_size=(25, 25), interpolation='unsupported') - - # Check that the aspect ratio of a square is the same - - filename_red_square = os.path.join(tmpdir.full_path, 'red_square_utils.png') - arr = np.zeros((50, 100, 3), dtype=np.uint8) # rectangle image 100x50 - arr[20:30, 45:55, 0] = 255 # red square 10x10 - red_square_array = np.array(arr) - red_square = image_utils.array_to_img(red_square_array, scale=False) - red_square.save(filename_red_square) - - loaded_im = image_utils.load_img( - filename_red_square, target_size=(25, 25), keep_aspect_ratio=True) - loaded_im_array = image_utils.img_to_array(loaded_im) - self.assertEqual(loaded_im_array.shape, (25, 25, 3)) - - red_channel_arr = loaded_im_array[:, :, 0].astype(np.bool) - square_width = np.sum(np.sum(red_channel_arr, axis=0)) - square_height = np.sum(np.sum(red_channel_arr, axis=1)) - aspect_ratio_result = square_width / square_height - - # original square had 1:1 ratio - self.assertNear(aspect_ratio_result, 1.0, 0.01) - - def test_array_to_img_and_img_to_array(self): - height, width = 10, 8 - - # Test the data format - # Test RGB 3D - x = np.random.random((3, height, width)) - img = image_utils.array_to_img(x, data_format='channels_first') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_first') - self.assertEqual(x.shape, (3, height, width)) - - # Test RGBA 3D - x = np.random.random((4, height, width)) - img = image_utils.array_to_img(x, data_format='channels_first') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_first') - self.assertEqual(x.shape, (4, height, width)) - - # Test 2D - x = np.random.random((1, height, width)) - img = image_utils.array_to_img(x, data_format='channels_first') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_first') - self.assertEqual(x.shape, (1, height, width)) - - # grayscale 32-bit signed integer - x = np.array( - np.random.randint(-2147483648, 2147483647, (1, height, width)), - dtype=np.int32) - img = image_utils.array_to_img(x, data_format='channels_first') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_first') - self.assertEqual(x.shape, (1, height, width)) - - # Test tf data format - # Test RGB 3D - x = np.random.random((height, width, 3)) - img = image_utils.array_to_img(x, data_format='channels_last') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_last') - self.assertEqual(x.shape, (height, width, 3)) - - # Test RGBA 3D - x = np.random.random((height, width, 4)) - img = image_utils.array_to_img(x, data_format='channels_last') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_last') - self.assertEqual(x.shape, (height, width, 4)) - - # Test 2D - x = np.random.random((height, width, 1)) - img = image_utils.array_to_img(x, data_format='channels_last') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_last') - self.assertEqual(x.shape, (height, width, 1)) - - # grayscale 16-bit signed integer - x = np.array( - np.random.randint(-2147483648, 2147483647, (height, width, 1)), - dtype=np.int16) - img = image_utils.array_to_img(x, data_format='channels_last') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_last') - self.assertEqual(x.shape, (height, width, 1)) - - # grayscale 32-bit signed integer - x = np.array( - np.random.randint(-2147483648, 2147483647, (height, width, 1)), - dtype=np.int32) - img = image_utils.array_to_img(x, data_format='channels_last') - self.assertEqual(img.size, (width, height)) - - x = image_utils.img_to_array(img, data_format='channels_last') - self.assertEqual(x.shape, (height, width, 1)) - - # Test invalid use case - with self.assertRaises(ValueError): - x = np.random.random((height, width)) # not 3D - img = image_utils.array_to_img(x, data_format='channels_first') - - with self.assertRaises(ValueError): - x = np.random.random((height, width, 3)) - # unknown data_format - img = image_utils.array_to_img(x, data_format='channels') - - with self.assertRaises(ValueError): - # neither RGB, RGBA, or gray-scale - x = np.random.random((height, width, 5)) - img = image_utils.array_to_img(x, data_format='channels_last') - - with self.assertRaises(ValueError): - x = np.random.random((height, width, 3)) - # unknown data_format - img = image_utils.img_to_array(x, data_format='channels') - - with self.assertRaises(ValueError): - # neither RGB, RGBA, or gray-scale - x = np.random.random((height, width, 5, 3)) - img = image_utils.img_to_array(x, data_format='channels_last') - - -if __name__ == '__main__': - tf.test.main() + def test_load_img(self): + tmpdir = self.create_tempdir() + filename_rgb = os.path.join(tmpdir.full_path, "rgb_utils.png") + filename_rgba = os.path.join(tmpdir.full_path, "rgba_utils.png") + filename_grayscale_8bit = os.path.join( + tmpdir.full_path, "grayscale_8bit_utils.png" + ) + filename_grayscale_16bit = os.path.join( + tmpdir.full_path, "grayscale_16bit_utils.tiff" + ) + filename_grayscale_32bit = os.path.join( + tmpdir.full_path, "grayscale_32bit_utils.tiff" + ) + + original_rgb_array = np.array( + 255 * np.random.rand(100, 100, 3), dtype=np.uint8 + ) + original_rgb = image_utils.array_to_img(original_rgb_array, scale=False) + original_rgb.save(filename_rgb) + + original_rgba_array = np.array( + 255 * np.random.rand(100, 100, 4), dtype=np.uint8 + ) + original_rgba = image_utils.array_to_img( + original_rgba_array, scale=False + ) + original_rgba.save(filename_rgba) + + original_grayscale_8bit_array = np.array( + 255 * np.random.rand(100, 100, 1), dtype=np.uint8 + ) + original_grayscale_8bit = image_utils.array_to_img( + original_grayscale_8bit_array, scale=False + ) + original_grayscale_8bit.save(filename_grayscale_8bit) + + original_grayscale_16bit_array = np.array( + np.random.randint(-2147483648, 2147483647, (100, 100, 1)), + dtype=np.int16, + ) + original_grayscale_16bit = image_utils.array_to_img( + original_grayscale_16bit_array, scale=False, dtype="int16" + ) + original_grayscale_16bit.save(filename_grayscale_16bit) + + original_grayscale_32bit_array = np.array( + np.random.randint(-2147483648, 2147483647, (100, 100, 1)), + dtype=np.int32, + ) + original_grayscale_32bit = image_utils.array_to_img( + original_grayscale_32bit_array, scale=False, dtype="int32" + ) + original_grayscale_32bit.save(filename_grayscale_32bit) + + # Test that loaded image is exactly equal to original. + + loaded_im = image_utils.load_img(filename_rgb) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, original_rgb_array.shape) + self.assertAllClose(loaded_im_array, original_rgb_array) + + loaded_im = image_utils.load_img(filename_rgba, color_mode="rgba") + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, original_rgba_array.shape) + self.assertAllClose(loaded_im_array, original_rgba_array) + + loaded_im = image_utils.load_img(filename_rgb, color_mode="grayscale") + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual( + loaded_im_array.shape, + (original_rgb_array.shape[0], original_rgb_array.shape[1], 1), + ) + + loaded_im = image_utils.load_img( + filename_grayscale_8bit, color_mode="grayscale" + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual( + loaded_im_array.shape, original_grayscale_8bit_array.shape + ) + self.assertAllClose(loaded_im_array, original_grayscale_8bit_array) + + loaded_im = image_utils.load_img( + filename_grayscale_16bit, color_mode="grayscale" + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int16") + self.assertEqual( + loaded_im_array.shape, original_grayscale_16bit_array.shape + ) + self.assertAllClose(loaded_im_array, original_grayscale_16bit_array) + # test casting int16 image to float32 + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertAllClose(loaded_im_array, original_grayscale_16bit_array) + + loaded_im = image_utils.load_img( + filename_grayscale_32bit, color_mode="grayscale" + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int32") + self.assertEqual( + loaded_im_array.shape, original_grayscale_32bit_array.shape + ) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + # test casting int32 image to float32 + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + + # Test that nothing is changed when target size is equal to original. + + loaded_im = image_utils.load_img(filename_rgb, target_size=(100, 100)) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, original_rgb_array.shape) + self.assertAllClose(loaded_im_array, original_rgb_array) + + loaded_im = image_utils.load_img( + filename_rgba, color_mode="rgba", target_size=(100, 100) + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, original_rgba_array.shape) + self.assertAllClose(loaded_im_array, original_rgba_array) + + loaded_im = image_utils.load_img( + filename_rgb, color_mode="grayscale", target_size=(100, 100) + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual( + loaded_im_array.shape, + (original_rgba_array.shape[0], original_rgba_array.shape[1], 1), + ) + + loaded_im = image_utils.load_img( + filename_grayscale_8bit, + color_mode="grayscale", + target_size=(100, 100), + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual( + loaded_im_array.shape, original_grayscale_8bit_array.shape + ) + self.assertAllClose(loaded_im_array, original_grayscale_8bit_array) + + loaded_im = image_utils.load_img( + filename_grayscale_16bit, + color_mode="grayscale", + target_size=(100, 100), + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int16") + self.assertEqual( + loaded_im_array.shape, original_grayscale_16bit_array.shape + ) + self.assertAllClose(loaded_im_array, original_grayscale_16bit_array) + + loaded_im = image_utils.load_img( + filename_grayscale_32bit, + color_mode="grayscale", + target_size=(100, 100), + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int32") + self.assertEqual( + loaded_im_array.shape, original_grayscale_32bit_array.shape + ) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + + # Test down-sampling with bilinear interpolation. + + loaded_im = image_utils.load_img(filename_rgb, target_size=(25, 25)) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, (25, 25, 3)) + + loaded_im = image_utils.load_img( + filename_rgba, color_mode="rgba", target_size=(25, 25) + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, (25, 25, 4)) + + loaded_im = image_utils.load_img( + filename_rgb, color_mode="grayscale", target_size=(25, 25) + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + loaded_im = image_utils.load_img( + filename_grayscale_8bit, + color_mode="grayscale", + target_size=(25, 25), + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + loaded_im = image_utils.load_img( + filename_grayscale_16bit, + color_mode="grayscale", + target_size=(25, 25), + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int16") + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + loaded_im = image_utils.load_img( + filename_grayscale_32bit, + color_mode="grayscale", + target_size=(25, 25), + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int32") + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + # Test down-sampling with nearest neighbor interpolation. + + loaded_im_nearest = image_utils.load_img( + filename_rgb, target_size=(25, 25), interpolation="nearest" + ) + loaded_im_array_nearest = image_utils.img_to_array(loaded_im_nearest) + self.assertEqual(loaded_im_array_nearest.shape, (25, 25, 3)) + self.assertTrue(np.any(loaded_im_array_nearest != loaded_im_array)) + + loaded_im_nearest = image_utils.load_img( + filename_rgba, + color_mode="rgba", + target_size=(25, 25), + interpolation="nearest", + ) + loaded_im_array_nearest = image_utils.img_to_array(loaded_im_nearest) + self.assertEqual(loaded_im_array_nearest.shape, (25, 25, 4)) + self.assertTrue(np.any(loaded_im_array_nearest != loaded_im_array)) + + loaded_im = image_utils.load_img( + filename_grayscale_8bit, + color_mode="grayscale", + target_size=(25, 25), + interpolation="nearest", + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + loaded_im = image_utils.load_img( + filename_grayscale_16bit, + color_mode="grayscale", + target_size=(25, 25), + interpolation="nearest", + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int16") + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + loaded_im = image_utils.load_img( + filename_grayscale_32bit, + color_mode="grayscale", + target_size=(25, 25), + interpolation="nearest", + ) + loaded_im_array = image_utils.img_to_array(loaded_im, dtype="int32") + self.assertEqual(loaded_im_array.shape, (25, 25, 1)) + + # Test different path type + with open(filename_grayscale_32bit, "rb") as f: + path_ = io.BytesIO(f.read()) # io.Bytesio + loaded_im = image_utils.load_img(path_, color_mode="grayscale") + loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + + path_ = filename_grayscale_32bit # str + loaded_im = image_utils.load_img(path_, color_mode="grayscale") + loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + + path_ = filename_grayscale_32bit.encode() # bytes + loaded_im = image_utils.load_img(path_, color_mode="grayscale") + loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + + path_ = pathlib.Path( + os.path.join(tmpdir.full_path, "grayscale_32bit_utils.tiff") + ) + loaded_im = image_utils.load_img(path_, color_mode="grayscale") + loaded_im_array = image_utils.img_to_array(loaded_im, dtype=np.int32) + self.assertAllClose(loaded_im_array, original_grayscale_32bit_array) + + # Check that exception is raised if interpolation not supported. + + loaded_im = image_utils.load_img( + filename_rgb, interpolation="unsupported" + ) + with self.assertRaises(ValueError): + loaded_im = image_utils.load_img( + filename_rgb, target_size=(25, 25), interpolation="unsupported" + ) + + # Check that the aspect ratio of a square is the same + + filename_red_square = os.path.join( + tmpdir.full_path, "red_square_utils.png" + ) + arr = np.zeros((50, 100, 3), dtype=np.uint8) # rectangle image 100x50 + arr[20:30, 45:55, 0] = 255 # red square 10x10 + red_square_array = np.array(arr) + red_square = image_utils.array_to_img(red_square_array, scale=False) + red_square.save(filename_red_square) + + loaded_im = image_utils.load_img( + filename_red_square, target_size=(25, 25), keep_aspect_ratio=True + ) + loaded_im_array = image_utils.img_to_array(loaded_im) + self.assertEqual(loaded_im_array.shape, (25, 25, 3)) + + red_channel_arr = loaded_im_array[:, :, 0].astype(bool) + square_width = np.sum(np.sum(red_channel_arr, axis=0)) + square_height = np.sum(np.sum(red_channel_arr, axis=1)) + aspect_ratio_result = square_width / square_height + + # original square had 1:1 ratio + self.assertNear(aspect_ratio_result, 1.0, 0.01) + + def test_array_to_img_and_img_to_array(self): + height, width = 10, 8 + + # Test the data format + # Test RGB 3D + x = np.random.random((3, height, width)) + img = image_utils.array_to_img(x, data_format="channels_first") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_first") + self.assertEqual(x.shape, (3, height, width)) + + # Test RGBA 3D + x = np.random.random((4, height, width)) + img = image_utils.array_to_img(x, data_format="channels_first") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_first") + self.assertEqual(x.shape, (4, height, width)) + + # Test 2D + x = np.random.random((1, height, width)) + img = image_utils.array_to_img(x, data_format="channels_first") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_first") + self.assertEqual(x.shape, (1, height, width)) + + # grayscale 32-bit signed integer + x = np.array( + np.random.randint(-2147483648, 2147483647, (1, height, width)), + dtype=np.int32, + ) + img = image_utils.array_to_img(x, data_format="channels_first") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_first") + self.assertEqual(x.shape, (1, height, width)) + + # Test tf data format + # Test RGB 3D + x = np.random.random((height, width, 3)) + img = image_utils.array_to_img(x, data_format="channels_last") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_last") + self.assertEqual(x.shape, (height, width, 3)) + + # Test RGBA 3D + x = np.random.random((height, width, 4)) + img = image_utils.array_to_img(x, data_format="channels_last") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_last") + self.assertEqual(x.shape, (height, width, 4)) + + # Test 2D + x = np.random.random((height, width, 1)) + img = image_utils.array_to_img(x, data_format="channels_last") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_last") + self.assertEqual(x.shape, (height, width, 1)) + + # grayscale 16-bit signed integer + x = np.array( + np.random.randint(-2147483648, 2147483647, (height, width, 1)), + dtype=np.int16, + ) + img = image_utils.array_to_img(x, data_format="channels_last") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_last") + self.assertEqual(x.shape, (height, width, 1)) + + # grayscale 32-bit signed integer + x = np.array( + np.random.randint(-2147483648, 2147483647, (height, width, 1)), + dtype=np.int32, + ) + img = image_utils.array_to_img(x, data_format="channels_last") + self.assertEqual(img.size, (width, height)) + + x = image_utils.img_to_array(img, data_format="channels_last") + self.assertEqual(x.shape, (height, width, 1)) + + # Test invalid use case + with self.assertRaises(ValueError): + x = np.random.random((height, width)) # not 3D + img = image_utils.array_to_img(x, data_format="channels_first") + + with self.assertRaises(ValueError): + x = np.random.random((height, width, 3)) + # unknown data_format + img = image_utils.array_to_img(x, data_format="channels") + + with self.assertRaises(ValueError): + # neither RGB, RGBA, or gray-scale + x = np.random.random((height, width, 5)) + img = image_utils.array_to_img(x, data_format="channels_last") + + with self.assertRaises(ValueError): + x = np.random.random((height, width, 3)) + # unknown data_format + img = image_utils.img_to_array(x, data_format="channels") + + with self.assertRaises(ValueError): + # neither RGB, RGBA, or gray-scale + x = np.random.random((height, width, 5, 3)) + img = image_utils.img_to_array(x, data_format="channels_last") + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/io_utils.py b/keras/utils/io_utils.py index 3f3e0173dd33..461ac8a18686 100644 --- a/keras/utils/io_utils.py +++ b/keras/utils/io_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=g-import-not-at-top + """Utilities related to disk I/O.""" import os @@ -20,102 +20,109 @@ import threading from absl import logging + from keras.utils import keras_logging +# isort: off from tensorflow.python.util.tf_export import keras_export - INTERACTIVE_LOGGING = threading.local() INTERACTIVE_LOGGING.enable = keras_logging.INTERACTIVE_LOGGING_DEFAULT -@keras_export('keras.utils.enable_interactive_logging') +@keras_export("keras.utils.enable_interactive_logging") def enable_interactive_logging(): - """Turn on interactive logging. + """Turn on interactive logging. - When interactive logging is enabled, Keras displays logs via stdout. - This provides the best experience when using Keras in an interactive - environment such as a shell or a notebook. - """ - INTERACTIVE_LOGGING.enable = True + When interactive logging is enabled, Keras displays logs via stdout. + This provides the best experience when using Keras in an interactive + environment such as a shell or a notebook. + """ + INTERACTIVE_LOGGING.enable = True -@keras_export('keras.utils.disable_interactive_logging') +@keras_export("keras.utils.disable_interactive_logging") def disable_interactive_logging(): - """Turn off interactive logging. + """Turn off interactive logging. - When interactive logging is disabled, Keras sends logs to `absl.logging`. - This is the best option when using Keras in a non-interactive - way, such as running a training or inference job on a server. - """ - INTERACTIVE_LOGGING.enable = False + When interactive logging is disabled, Keras sends logs to `absl.logging`. + This is the best option when using Keras in a non-interactive + way, such as running a training or inference job on a server. + """ + INTERACTIVE_LOGGING.enable = False -@keras_export('keras.utils.is_interactive_logging_enabled') +@keras_export("keras.utils.is_interactive_logging_enabled") def is_interactive_logging_enabled(): - """Check if interactive logging is enabled. + """Check if interactive logging is enabled. - To switch between writing logs to stdout and `absl.logging`, you may use - `keras.utils.enable_interactive_logging()` and - `keras.utils.disable_interactie_logging()`. + To switch between writing logs to stdout and `absl.logging`, you may use + `keras.utils.enable_interactive_logging()` and + `keras.utils.disable_interactive_logging()`. - Returns: - Boolean (True if interactive logging is enabled and False otherwise). - """ - # Use `getattr` in case `INTERACTIVE_LOGGING` - # does not have the `enable` attribute. - return getattr(INTERACTIVE_LOGGING, 'enable', - keras_logging.INTERACTIVE_LOGGING_DEFAULT) + Returns: + Boolean (True if interactive logging is enabled and False otherwise). + """ + # Use `getattr` in case `INTERACTIVE_LOGGING` + # does not have the `enable` attribute. + return getattr( + INTERACTIVE_LOGGING, "enable", keras_logging.INTERACTIVE_LOGGING_DEFAULT + ) +@logging.skip_log_prefix def print_msg(message, line_break=True): - """Print the message to absl logging or stdout.""" - if is_interactive_logging_enabled(): - if line_break: - sys.stdout.write(message + '\n') + """Print the message to absl logging or stdout.""" + if is_interactive_logging_enabled(): + if line_break: + sys.stdout.write(message + "\n") + else: + sys.stdout.write(message) + sys.stdout.flush() else: - sys.stdout.write(message) - sys.stdout.flush() - else: - logging.info(message) + logging.info(message) def path_to_string(path): - """Convert `PathLike` objects to their string representation. + """Convert `PathLike` objects to their string representation. - If given a non-string typed path object, converts it to its string - representation. + If given a non-string typed path object, converts it to its string + representation. - If the object passed to `path` is not among the above, then it is - returned unchanged. This allows e.g. passthrough of file objects - through this function. + If the object passed to `path` is not among the above, then it is + returned unchanged. This allows e.g. passthrough of file objects + through this function. - Args: - path: `PathLike` object that represents a path + Args: + path: `PathLike` object that represents a path - Returns: - A string representation of the path argument, if Python support exists. - """ - if isinstance(path, os.PathLike): - return os.fspath(path) - return path + Returns: + A string representation of the path argument, if Python support exists. + """ + if isinstance(path, os.PathLike): + return os.fspath(path) + return path def ask_to_proceed_with_overwrite(filepath): - """Produces a prompt asking about overwriting a file. - - Args: - filepath: the path to the file to be overwritten. - - Returns: - True if we can proceed with overwrite, False otherwise. - """ - overwrite = input('[WARNING] %s already exists - overwrite? ' - '[y/n]' % (filepath)).strip().lower() - while overwrite not in ('y', 'n'): - overwrite = input('Enter "y" (overwrite) or "n" ' - '(cancel).').strip().lower() - if overwrite == 'n': - return False - print_msg('[TIP] Next time specify overwrite=True!') - return True + """Produces a prompt asking about overwriting a file. + + Args: + filepath: the path to the file to be overwritten. + + Returns: + True if we can proceed with overwrite, False otherwise. + """ + overwrite = ( + input(f"[WARNING] {filepath} already exists - overwrite? [y/n]") + .strip() + .lower() + ) + while overwrite not in ("y", "n"): + overwrite = ( + input('Enter "y" (overwrite) or "n" (cancel).').strip().lower() + ) + if overwrite == "n": + return False + print_msg("[TIP] Next time specify overwrite=True!") + return True diff --git a/keras/utils/io_utils_test.py b/keras/utils/io_utils_test.py index a25cda6854f3..445bbaab76d8 100644 --- a/keras/utils/io_utils_test.py +++ b/keras/utils/io_utils_test.py @@ -15,69 +15,74 @@ """Tests for io_utils.""" import builtins -from pathlib import Path import sys +from pathlib import Path + +import tensorflow.compat.v2 as tf from keras.testing_infra import test_combinations from keras.utils import io_utils -import tensorflow.compat.v2 as tf class TestIOUtils(test_combinations.TestCase): - - def test_ask_to_proceed_with_overwrite(self): - with tf.compat.v1.test.mock.patch.object(builtins, 'input') as mock_log: - mock_log.return_value = 'y' - self.assertTrue(io_utils.ask_to_proceed_with_overwrite('/tmp/not_exists')) - - mock_log.return_value = 'n' - self.assertFalse( - io_utils.ask_to_proceed_with_overwrite('/tmp/not_exists')) - - mock_log.side_effect = ['m', 'y'] - self.assertTrue(io_utils.ask_to_proceed_with_overwrite('/tmp/not_exists')) - - mock_log.side_effect = ['m', 'n'] - self.assertFalse( - io_utils.ask_to_proceed_with_overwrite('/tmp/not_exists')) - - def test_path_to_string(self): - - class PathLikeDummy: - - def __fspath__(self): - return 'dummypath' - - dummy = object() - # conversion of PathLike - self.assertEqual(io_utils.path_to_string(Path('path')), 'path') - self.assertEqual(io_utils.path_to_string(PathLikeDummy()), 'dummypath') - - # pass-through, works for all versions of python - self.assertEqual(io_utils.path_to_string('path'), 'path') - self.assertIs(io_utils.path_to_string(dummy), dummy) - - def test_print_msg(self): - enabled = io_utils.is_interactive_logging_enabled() - - io_utils.disable_interactive_logging() - self.assertFalse(io_utils.is_interactive_logging_enabled()) - - with self.assertLogs(level='INFO') as logged: - io_utils.print_msg('Testing Message') - self.assertIn('Testing Message', logged.output[0]) - - io_utils.enable_interactive_logging() - self.assertTrue(io_utils.is_interactive_logging_enabled()) - - with self.captureWritesToStream(sys.stdout) as printed: - io_utils.print_msg('Testing Message') - self.assertEqual('Testing Message\n', printed.contents()) - - if enabled: - io_utils.enable_interactive_logging() - else: - io_utils.disable_interactive_logging() - -if __name__ == '__main__': - tf.test.main() + def test_ask_to_proceed_with_overwrite(self): + with tf.compat.v1.test.mock.patch.object(builtins, "input") as mock_log: + mock_log.return_value = "y" + self.assertTrue( + io_utils.ask_to_proceed_with_overwrite("/tmp/not_exists") + ) + + mock_log.return_value = "n" + self.assertFalse( + io_utils.ask_to_proceed_with_overwrite("/tmp/not_exists") + ) + + mock_log.side_effect = ["m", "y"] + self.assertTrue( + io_utils.ask_to_proceed_with_overwrite("/tmp/not_exists") + ) + + mock_log.side_effect = ["m", "n"] + self.assertFalse( + io_utils.ask_to_proceed_with_overwrite("/tmp/not_exists") + ) + + def test_path_to_string(self): + class PathLikeDummy: + def __fspath__(self): + return "dummypath" + + dummy = object() + # conversion of PathLike + self.assertEqual(io_utils.path_to_string(Path("path")), "path") + self.assertEqual(io_utils.path_to_string(PathLikeDummy()), "dummypath") + + # pass-through, works for all versions of python + self.assertEqual(io_utils.path_to_string("path"), "path") + self.assertIs(io_utils.path_to_string(dummy), dummy) + + def test_print_msg(self): + enabled = io_utils.is_interactive_logging_enabled() + + io_utils.disable_interactive_logging() + self.assertFalse(io_utils.is_interactive_logging_enabled()) + + with self.assertLogs(level="INFO") as logged: + io_utils.print_msg("Testing Message") + self.assertIn("Testing Message", logged.output[0]) + + io_utils.enable_interactive_logging() + self.assertTrue(io_utils.is_interactive_logging_enabled()) + + with self.captureWritesToStream(sys.stdout) as printed: + io_utils.print_msg("Testing Message") + self.assertEqual("Testing Message\n", printed.contents()) + + if enabled: + io_utils.enable_interactive_logging() + else: + io_utils.disable_interactive_logging() + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/kernelized_utils.py b/keras/utils/kernelized_utils.py index 75b20fd11227..22fee770824d 100644 --- a/keras/utils/kernelized_utils.py +++ b/keras/utils/kernelized_utils.py @@ -18,95 +18,95 @@ def _to_matrix(u): - """If input tensor is a vector (i.e., has rank 1), converts it to matrix.""" - u_rank = len(u.shape) - if u_rank not in [1, 2]: - raise ValueError('The input tensor should have rank 1 or 2. ' - f'Received rank: {u_rank}') - if u_rank == 1: - return tf.expand_dims(u, 0) - return u + """If input tensor is a vector (i.e., has rank 1), converts it to matrix.""" + u_rank = len(u.shape) + if u_rank not in [1, 2]: + raise ValueError( + f"The input tensor should have rank 1 or 2. Received rank: {u_rank}" + ) + if u_rank == 1: + return tf.expand_dims(u, 0) + return u def _align_matrices(x, y): - """Aligns x and y tensors to allow computations over pairs of their rows.""" - x_matrix = _to_matrix(x) - y_matrix = _to_matrix(y) - x_shape = x_matrix.shape - y_shape = y_matrix.shape - if y_shape[1] != x_shape[1]: # dimensions do not match. - raise ValueError( - 'The outermost dimensions of the input tensors should match. ' - f'Received y = {y_shape[1]} vs x = {x_shape[1]}.') - - x_tile = tf.tile( - tf.expand_dims(x_matrix, 1), [1, y_shape[0], 1]) - y_tile = tf.tile( - tf.expand_dims(y_matrix, 0), [x_shape[0], 1, 1]) - return x_tile, y_tile + """Aligns x and y tensors to allow computations over pairs of their rows.""" + x_matrix = _to_matrix(x) + y_matrix = _to_matrix(y) + x_shape = x_matrix.shape + y_shape = y_matrix.shape + if y_shape[1] != x_shape[1]: # dimensions do not match. + raise ValueError( + "The outermost dimensions of the input tensors should match. " + f"Received y = {y_shape[1]} vs x = {x_shape[1]}." + ) + + x_tile = tf.tile(tf.expand_dims(x_matrix, 1), [1, y_shape[0], 1]) + y_tile = tf.tile(tf.expand_dims(y_matrix, 0), [x_shape[0], 1, 1]) + return x_tile, y_tile def inner_product(u, v): - u = _to_matrix(u) - v = _to_matrix(v) - return tf.matmul(u, v, transpose_b=True) + u = _to_matrix(u) + v = _to_matrix(v) + return tf.matmul(u, v, transpose_b=True) def exact_gaussian_kernel(x, y, stddev): - r"""Computes exact Gaussian kernel value(s) for tensors x and y and stddev. - - The Gaussian kernel for vectors u, v is defined as follows: - K(u, v) = exp(-||u-v||^2 / (2* stddev^2)) - where the norm is the l2-norm. x, y can be either vectors or matrices. If they - are vectors, they must have the same dimension. If they are matrices, they - must have the same number of columns. In the latter case, the method returns - (as a matrix) K(u, v) values for all pairs (u, v) where u is a row from x and - v is a row from y. - - Args: - x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim]. - y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim]. - stddev: The width of the Gaussian kernel. - - Returns: - A single value (scalar) with shape (1, 1) (if x, y are vectors) or a matrix - of shape (m, n) with entries K(u, v) (where K is the Gaussian kernel) for - all (u,v) pairs where u, v are rows from x and y respectively. - - Raises: - ValueError: if the shapes of x, y are not compatible. - """ - x_aligned, y_aligned = _align_matrices(x, y) - diff_squared_l2_norm = tf.reduce_sum( - tf.math.squared_difference(x_aligned, y_aligned), 2) - return tf.exp(-diff_squared_l2_norm / (2 * stddev * stddev)) + r"""Computes exact Gaussian kernel value(s) for tensors x and y and stddev. + + The Gaussian kernel for vectors u, v is defined as follows: + K(u, v) = exp(-||u-v||^2 / (2* stddev^2)) + where the norm is the l2-norm. x, y can be either vectors or matrices. If + they are vectors, they must have the same dimension. If they are matrices, + they must have the same number of columns. In the latter case, the method + returns (as a matrix) K(u, v) values for all pairs (u, v) where u is a row + from x and v is a row from y. + + Args: + x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim]. + y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim]. + stddev: The width of the Gaussian kernel. + + Returns: + A single value (scalar) with shape (1, 1) (if x, y are vectors) or a + matrix of shape (m, n) with entries K(u, v) (where K is the Gaussian + kernel) for all (u,v) pairs where u, v are rows from x and y respectively. + + Raises: + ValueError: if the shapes of x, y are not compatible. + """ + x_aligned, y_aligned = _align_matrices(x, y) + diff_squared_l2_norm = tf.reduce_sum( + tf.math.squared_difference(x_aligned, y_aligned), 2 + ) + return tf.exp(-diff_squared_l2_norm / (2 * stddev * stddev)) def exact_laplacian_kernel(x, y, stddev): - r"""Computes exact Laplacian kernel value(s) for tensors x and y using stddev. - - The Laplacian kernel for vectors u, v is defined as follows: - K(u, v) = exp(-||u-v|| / stddev) - where the norm is the l1-norm. x, y can be either vectors or matrices. If they - are vectors, they must have the same dimension. If they are matrices, they - must have the same number of columns. In the latter case, the method returns - (as a matrix) K(u, v) values for all pairs (u, v) where u is a row from x and - v is a row from y. - - Args: - x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim]. - y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim]. - stddev: The width of the Gaussian kernel. - - Returns: - A single value (scalar) with shape (1, 1) if x, y are vectors or a matrix - of shape (m, n) with entries K(u, v) (where K is the Laplacian kernel) for - all (u,v) pairs where u, v are rows from x and y respectively. - - Raises: - ValueError: if the shapes of x, y are not compatible. - """ - x_aligned, y_aligned = _align_matrices(x, y) - diff_l1_norm = tf.reduce_sum( - tf.abs(tf.subtract(x_aligned, y_aligned)), 2) - return tf.exp(-diff_l1_norm / stddev) + r"""Computes exact Laplacian kernel value(s) for tensors x & y using stddev. + + The Laplacian kernel for vectors u, v is defined as follows: + K(u, v) = exp(-||u-v|| / stddev) + where the norm is the l1-norm. x, y can be either vectors or matrices. If + they are vectors, they must have the same dimension. If they are matrices, + they must have the same number of columns. In the latter case, the method + returns (as a matrix) K(u, v) values for all pairs (u, v) where u is a row + from x and v is a row from y. + + Args: + x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim]. + y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim]. + stddev: The width of the Gaussian kernel. + + Returns: + A single value (scalar) with shape (1, 1) if x, y are vectors or a matrix + of shape (m, n) with entries K(u, v) (where K is the Laplacian kernel) for + all (u,v) pairs where u, v are rows from x and y respectively. + + Raises: + ValueError: if the shapes of x, y are not compatible. + """ + x_aligned, y_aligned = _align_matrices(x, y) + diff_l1_norm = tf.reduce_sum(tf.abs(tf.subtract(x_aligned, y_aligned)), 2) + return tf.exp(-diff_l1_norm / stddev) diff --git a/keras/utils/kernelized_utils_test.py b/keras/utils/kernelized_utils_test.py index 4985e6b7b8f3..cc562325eaf6 100644 --- a/keras/utils/kernelized_utils_test.py +++ b/keras/utils/kernelized_utils_test.py @@ -14,98 +14,114 @@ # ============================================================================== """Tests for kernelized_utils.py.""" -import tensorflow.compat.v2 as tf - import functools +import tensorflow.compat.v2 as tf from absl.testing import parameterized + from keras.utils import kernelized_utils def _exact_gaussian(stddev): - return functools.partial( - kernelized_utils.exact_gaussian_kernel, stddev=stddev) + return functools.partial( + kernelized_utils.exact_gaussian_kernel, stddev=stddev + ) def _exact_laplacian(stddev): - return functools.partial( - kernelized_utils.exact_laplacian_kernel, stddev=stddev) + return functools.partial( + kernelized_utils.exact_laplacian_kernel, stddev=stddev + ) class KernelizedUtilsTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - ('gaussian', _exact_gaussian(stddev=10.0), [[1.0]]), - ('laplacian', _exact_laplacian(stddev=50.0), [[1.0]])) - def test_equal_vectors(self, exact_kernel_fn, expected_values): - """Identical vectors give exactly the identity kernel value.""" - x = tf.constant([0.5, -0.5, -0.5, 0.5]) - y = tf.constant([0.5, -0.5, -0.5, 0.5]) - exact_kernel = exact_kernel_fn(x, y) - shape = exact_kernel.shape.as_list() - self.assertLen(shape, 2) - # x and y are identical and therefore K(x, y) will be precisely equal to - # the identity value of the kernel. - self.assertAllClose(expected_values, exact_kernel, atol=1e-6) - - @parameterized.named_parameters( - ('gaussian', _exact_gaussian(stddev=10.0), [[1.0]]), - ('laplacian', _exact_laplacian(stddev=50.0), [[1.0]])) - def test_almost_identical_vectors(self, exact_kernel_fn, expected_values): - """Almost identical vectors give the identity kernel value.""" - x = tf.constant([1.0, 0.4, -2.1, -1.1]) - y = tf.constant([1.01, 0.39, -2.099, -1.101]) - exact_kernel = exact_kernel_fn(x, y) - shape = exact_kernel.shape.as_list() - self.assertLen(shape, 2) - # x and y are almost identical and therefore K(x, y) will be almost equal to - # the identity value of the kernel. - self.assertAllClose(expected_values, exact_kernel, atol=1e-3) - - @parameterized.named_parameters( - ('gaussian', _exact_gaussian(stddev=1.0), [[0.99], [0.977]]), - ('laplacian', _exact_laplacian(stddev=5.0), [[0.96], [0.94]])) - def test_similar_matrices(self, exact_kernel_fn, expected_values): - """Pairwise "close" vectors give high kernel values (similarity scores).""" - x = tf.constant([1.0, 3.4, -2.1, 0.9, 3.3, -2.0], shape=[2, 3]) - y = tf.constant([1.1, 3.35, -2.05]) - exact_kernel = exact_kernel_fn(x, y) - shape = exact_kernel.shape.as_list() - self.assertLen(shape, 2) - # The 2 rows of x are close to y. The pairwise kernel values (similarity - # scores) are somewhat close to the identity value of the kernel. - self.assertAllClose(expected_values, exact_kernel, atol=1e-2) - - @parameterized.named_parameters( - ('gaussian', _exact_gaussian(stddev=2.0), [[.997, .279], [.251, 1.], - [.164, 0.019]]), - ('laplacian', _exact_laplacian(stddev=2.0), [[.904, .128], [.116, 1.], - [.07, 0.027]])) - def test_matrices_varying_similarity(self, exact_kernel_fn, expected_values): - """Test matrices with row vectors of varying pairwise similarity.""" - x = tf.constant([1.0, 2., -2., 0.9, 3.3, -1.0], shape=[3, 2]) - y = tf.constant([1.1, 2.1, -2., 0.9], shape=[2, 2]) - exact_kernel = exact_kernel_fn(x, y) - - shape = exact_kernel.shape.as_list() - self.assertLen(shape, 2) - self.assertAllClose(expected_values, exact_kernel, atol=1e-2) - - @parameterized.named_parameters( - ('gaussian', _exact_gaussian(stddev=1.0), [[0.0]]), - ('laplacian', _exact_laplacian(stddev=1.0), [[0.0]])) - def test_completely_dissimilar_vectors(self, exact_kernel_fn, - expected_values): - """Very dissimilar vectors give very low similarity scores.""" - x = tf.constant([1.0, 3.4, -2.1, -5.1]) - y = tf.constant([0.5, 2.1, 1.0, 3.0]) - exact_kernel = exact_kernel_fn(x, y) - shape = exact_kernel.shape.as_list() - self.assertLen(shape, 2) - # x and y are very "far" from each other and so the corresponding kernel - # value will be very low. - self.assertAllClose(expected_values, exact_kernel, atol=1e-2) - - -if __name__ == '__main__': - tf.test.main() + @parameterized.named_parameters( + ("gaussian", _exact_gaussian(stddev=10.0), [[1.0]]), + ("laplacian", _exact_laplacian(stddev=50.0), [[1.0]]), + ) + def test_equal_vectors(self, exact_kernel_fn, expected_values): + """Identical vectors give exactly the identity kernel value.""" + x = tf.constant([0.5, -0.5, -0.5, 0.5]) + y = tf.constant([0.5, -0.5, -0.5, 0.5]) + exact_kernel = exact_kernel_fn(x, y) + shape = exact_kernel.shape.as_list() + self.assertLen(shape, 2) + # x and y are identical and therefore K(x, y) will be precisely equal to + # the identity value of the kernel. + self.assertAllClose(expected_values, exact_kernel, atol=1e-6) + + @parameterized.named_parameters( + ("gaussian", _exact_gaussian(stddev=10.0), [[1.0]]), + ("laplacian", _exact_laplacian(stddev=50.0), [[1.0]]), + ) + def test_almost_identical_vectors(self, exact_kernel_fn, expected_values): + """Almost identical vectors give the identity kernel value.""" + x = tf.constant([1.0, 0.4, -2.1, -1.1]) + y = tf.constant([1.01, 0.39, -2.099, -1.101]) + exact_kernel = exact_kernel_fn(x, y) + shape = exact_kernel.shape.as_list() + self.assertLen(shape, 2) + # x and y are almost identical and therefore K(x, y) will be almost + # equal to the identity value of the kernel. + self.assertAllClose(expected_values, exact_kernel, atol=1e-3) + + @parameterized.named_parameters( + ("gaussian", _exact_gaussian(stddev=1.0), [[0.99], [0.977]]), + ("laplacian", _exact_laplacian(stddev=5.0), [[0.96], [0.94]]), + ) + def test_similar_matrices(self, exact_kernel_fn, expected_values): + """Pairwise "close" vectors give high kernel values (similarity + scores).""" + x = tf.constant([1.0, 3.4, -2.1, 0.9, 3.3, -2.0], shape=[2, 3]) + y = tf.constant([1.1, 3.35, -2.05]) + exact_kernel = exact_kernel_fn(x, y) + shape = exact_kernel.shape.as_list() + self.assertLen(shape, 2) + # The 2 rows of x are close to y. The pairwise kernel values (similarity + # scores) are somewhat close to the identity value of the kernel. + self.assertAllClose(expected_values, exact_kernel, atol=1e-2) + + @parameterized.named_parameters( + ( + "gaussian", + _exact_gaussian(stddev=2.0), + [[0.997, 0.279], [0.251, 1.0], [0.164, 0.019]], + ), + ( + "laplacian", + _exact_laplacian(stddev=2.0), + [[0.904, 0.128], [0.116, 1.0], [0.07, 0.027]], + ), + ) + def test_matrices_varying_similarity( + self, exact_kernel_fn, expected_values + ): + """Test matrices with row vectors of varying pairwise similarity.""" + x = tf.constant([1.0, 2.0, -2.0, 0.9, 3.3, -1.0], shape=[3, 2]) + y = tf.constant([1.1, 2.1, -2.0, 0.9], shape=[2, 2]) + exact_kernel = exact_kernel_fn(x, y) + + shape = exact_kernel.shape.as_list() + self.assertLen(shape, 2) + self.assertAllClose(expected_values, exact_kernel, atol=1e-2) + + @parameterized.named_parameters( + ("gaussian", _exact_gaussian(stddev=1.0), [[0.0]]), + ("laplacian", _exact_laplacian(stddev=1.0), [[0.0]]), + ) + def test_completely_dissimilar_vectors( + self, exact_kernel_fn, expected_values + ): + """Very dissimilar vectors give very low similarity scores.""" + x = tf.constant([1.0, 3.4, -2.1, -5.1]) + y = tf.constant([0.5, 2.1, 1.0, 3.0]) + exact_kernel = exact_kernel_fn(x, y) + shape = exact_kernel.shape.as_list() + self.assertLen(shape, 2) + # x and y are very "far" from each other and so the corresponding kernel + # value will be very low. + self.assertAllClose(expected_values, exact_kernel, atol=1e-2) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/kpl_test_utils.py b/keras/utils/kpl_test_utils.py index 30232a842274..e96677f447fb 100644 --- a/keras/utils/kpl_test_utils.py +++ b/keras/utils/kpl_test_utils.py @@ -14,167 +14,192 @@ # ============================================================================== """Test related utilities for KPL + tf.distribute.""" -import tensorflow.compat.v2 as tf - import random import tempfile +import tensorflow.compat.v2 as tf + import keras from keras.layers.preprocessing import string_lookup class DistributeKplTestUtils(tf.test.TestCase): - """Utils for test of tf.distribute + KPL.""" - FEATURE_VOCAB = [ - "avenger", "ironman", "batman", "hulk", "spiderman", "kingkong", - "wonder_woman" - ] - LABEL_VOCAB = ["yes", "no"] - - def define_kpls_for_training(self, use_adapt): - """Function that defines KPL used for unit tests of tf.distribute. - - Args: - use_adapt: if adapt will be called. False means there will be precomputed - statistics. - - Returns: - feature_mapper: a simple keras model with one keras StringLookup layer - which maps feature to index. - label_mapper: similar to feature_mapper, but maps label to index. - - """ - if use_adapt: - feature_lookup_layer = ( - string_lookup.StringLookup( - num_oov_indices=1)) - feature_lookup_layer.adapt(self.FEATURE_VOCAB) - label_lookup_layer = ( - string_lookup.StringLookup( - num_oov_indices=0, mask_token=None)) - label_lookup_layer.adapt(self.LABEL_VOCAB) - else: - feature_lookup_layer = ( - string_lookup.StringLookup( - vocabulary=self.FEATURE_VOCAB, num_oov_indices=1)) - label_lookup_layer = ( - string_lookup.StringLookup( - vocabulary=self.LABEL_VOCAB, num_oov_indices=0, mask_token=None)) - - raw_feature_input = keras.layers.Input( - shape=(3,), dtype=tf.string, name="feature", ragged=True) - feature_id_input = feature_lookup_layer(raw_feature_input) - feature_mapper = keras.Model({"features": raw_feature_input}, - feature_id_input) - - raw_label_input = keras.layers.Input( - shape=(1,), dtype=tf.string, name="label") - label_id_input = label_lookup_layer(raw_label_input) - label_mapper = keras.Model({"label": raw_label_input}, label_id_input) - - return feature_mapper, label_mapper - - def dataset_fn(self, feature_mapper, label_mapper): - """Function that generates dataset for test of tf.distribute + KPL. - - Args: - feature_mapper: a simple keras model with one keras StringLookup layer - which maps feature to index. - label_mapper: similar to feature_mapper, but maps label to index. - - Returns: - Generated dataset for test of tf.distribute + KPL. - - """ - - def feature_and_label_gen(): - # Generator of dataset. - while True: - features = random.sample(self.FEATURE_VOCAB, 3) - label = ["yes"] if self.FEATURE_VOCAB[0] in features else ["no"] - yield {"features": features, "label": label} - - raw_dataset = tf.data.Dataset.from_generator( - feature_and_label_gen, - output_signature={ - "features": tf.TensorSpec([3], tf.string), - "label": tf.TensorSpec([1], tf.string) - }).shuffle(100).batch(32) - - train_dataset = raw_dataset.map(lambda x: ( # pylint: disable=g-long-lambda - { - "features": feature_mapper(x["features"]) - }, label_mapper(x["label"]))) - return train_dataset - - def define_model(self): - """A simple model for test of tf.distribute + KPL.""" - # Create the model. The input needs to be compatible with KPLs. - model_input = keras.layers.Input( - shape=(3,), dtype=tf.int64, name="model_input") - - # input_dim includes a mask token and an oov token. - emb_output = keras.layers.Embedding( - input_dim=len(self.FEATURE_VOCAB) + 2, output_dim=20)( - model_input) - emb_output = tf.reduce_mean(emb_output, axis=1) - dense_output = keras.layers.Dense( - units=1, activation="sigmoid")( - emb_output) - model = keras.Model({"features": model_input}, dense_output) - return model - - def define_reverse_lookup_layer(self): - """Create string reverse lookup layer for serving.""" - - label_inverse_lookup_layer = string_lookup.StringLookup( - num_oov_indices=0, - mask_token=None, - vocabulary=self.LABEL_VOCAB, - invert=True) - return label_inverse_lookup_layer - - def create_serving_signature(self, model, feature_mapper, - label_inverse_lookup_layer): - """Create serving signature for the given model.""" - - @tf.function - def serve_fn(raw_features): - raw_features = tf.expand_dims(raw_features, axis=0) - transformed_features = model.feature_mapper(raw_features) - outputs = model(transformed_features) - outputs = tf.squeeze(outputs, axis=0) - outputs = tf.cast(tf.greater(outputs, 0.5), tf.int64) - decoded_outputs = model.label_inverse_lookup_layer(outputs) - return tf.squeeze(decoded_outputs, axis=0) - - model.feature_mapper = feature_mapper - model.label_inverse_lookup_layer = label_inverse_lookup_layer - # serving does NOT have batch dimension - return serve_fn.get_concrete_function( - tf.TensorSpec( - shape=(3), dtype=tf.string, name="example")) - - def test_save_load_serving_model(self, model, feature_mapper, - label_inverse_lookup_layer): - """Test save/load/serving model.""" - - serving_fn = self.create_serving_signature(model, feature_mapper, - label_inverse_lookup_layer) - - saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - model.save(saved_model_dir, save_format="tf", - signatures={"serving_default": serving_fn}) - - # Test the saved_model. - loaded_serving_fn = keras.saving.save.load_model( - saved_model_dir).signatures["serving_default"] - - # check the result w/ and w/o avenger. - prediction0 = loaded_serving_fn( - tf.constant(["avenger", "ironman", "avenger"]))["output_0"] - self.assertIn(prediction0.numpy().decode("UTF-8"), ("yes", "no")) - - prediction1 = loaded_serving_fn( - tf.constant(["ironman", "ironman", "unknown"]))["output_0"] - self.assertIn(prediction1.numpy().decode("UTF-8"), ("yes", "no")) + """Utils for test of tf.distribute + KPL.""" + + FEATURE_VOCAB = [ + "avenger", + "ironman", + "batman", + "hulk", + "spiderman", + "kingkong", + "wonder_woman", + ] + LABEL_VOCAB = ["yes", "no"] + + def define_kpls_for_training(self, use_adapt): + """Function that defines KPL used for unit tests of tf.distribute. + + Args: + use_adapt: if adapt will be called. False means there will be + precomputed statistics. + + Returns: + feature_mapper: a simple keras model with one keras StringLookup layer + which maps feature to index. + label_mapper: similar to feature_mapper, but maps label to index. + + """ + if use_adapt: + feature_lookup_layer = string_lookup.StringLookup(num_oov_indices=1) + feature_lookup_layer.adapt(self.FEATURE_VOCAB) + label_lookup_layer = string_lookup.StringLookup( + num_oov_indices=0, mask_token=None + ) + label_lookup_layer.adapt(self.LABEL_VOCAB) + else: + feature_lookup_layer = string_lookup.StringLookup( + vocabulary=self.FEATURE_VOCAB, num_oov_indices=1 + ) + label_lookup_layer = string_lookup.StringLookup( + vocabulary=self.LABEL_VOCAB, num_oov_indices=0, mask_token=None + ) + + raw_feature_input = keras.layers.Input( + shape=(3,), dtype=tf.string, name="feature", ragged=True + ) + feature_id_input = feature_lookup_layer(raw_feature_input) + feature_mapper = keras.Model( + {"features": raw_feature_input}, feature_id_input + ) + + raw_label_input = keras.layers.Input( + shape=(1,), dtype=tf.string, name="label" + ) + label_id_input = label_lookup_layer(raw_label_input) + label_mapper = keras.Model({"label": raw_label_input}, label_id_input) + + return feature_mapper, label_mapper + + def dataset_fn(self, feature_mapper, label_mapper): + """Function that generates dataset for test of tf.distribute + KPL. + + Args: + feature_mapper: a simple keras model with one keras StringLookup layer + which maps feature to index. + label_mapper: similar to feature_mapper, but maps label to index. + + Returns: + Generated dataset for test of tf.distribute + KPL. + + """ + + def feature_and_label_gen(): + # Generator of dataset. + while True: + features = random.sample(self.FEATURE_VOCAB, 3) + label = ["yes"] if self.FEATURE_VOCAB[0] in features else ["no"] + yield {"features": features, "label": label} + + raw_dataset = ( + tf.data.Dataset.from_generator( + feature_and_label_gen, + output_signature={ + "features": tf.TensorSpec([3], tf.string), + "label": tf.TensorSpec([1], tf.string), + }, + ) + .shuffle(100) + .batch(32) + ) + + train_dataset = raw_dataset.map( + lambda x: ( + {"features": feature_mapper(x["features"])}, + label_mapper(x["label"]), + ) + ) + return train_dataset + + def define_model(self): + """A simple model for test of tf.distribute + KPL.""" + # Create the model. The input needs to be compatible with KPLs. + model_input = keras.layers.Input( + shape=(3,), dtype=tf.int64, name="model_input" + ) + + # input_dim includes a mask token and an oov token. + emb_output = keras.layers.Embedding( + input_dim=len(self.FEATURE_VOCAB) + 2, output_dim=20 + )(model_input) + emb_output = tf.reduce_mean(emb_output, axis=1) + dense_output = keras.layers.Dense(units=1, activation="sigmoid")( + emb_output + ) + model = keras.Model({"features": model_input}, dense_output) + return model + + def define_reverse_lookup_layer(self): + """Create string reverse lookup layer for serving.""" + + label_inverse_lookup_layer = string_lookup.StringLookup( + num_oov_indices=0, + mask_token=None, + vocabulary=self.LABEL_VOCAB, + invert=True, + ) + return label_inverse_lookup_layer + + def create_serving_signature( + self, model, feature_mapper, label_inverse_lookup_layer + ): + """Create serving signature for the given model.""" + + @tf.function + def serve_fn(raw_features): + raw_features = tf.expand_dims(raw_features, axis=0) + transformed_features = model.feature_mapper(raw_features) + outputs = model(transformed_features) + outputs = tf.squeeze(outputs, axis=0) + outputs = tf.cast(tf.greater(outputs, 0.5), tf.int64) + decoded_outputs = model.label_inverse_lookup_layer(outputs) + return tf.squeeze(decoded_outputs, axis=0) + + model.feature_mapper = feature_mapper + model.label_inverse_lookup_layer = label_inverse_lookup_layer + # serving does NOT have batch dimension + return serve_fn.get_concrete_function( + tf.TensorSpec(shape=(3), dtype=tf.string, name="example") + ) + + def test_save_load_serving_model( + self, model, feature_mapper, label_inverse_lookup_layer + ): + """Test save/load/serving model.""" + + serving_fn = self.create_serving_signature( + model, feature_mapper, label_inverse_lookup_layer + ) + + saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + model.save( + saved_model_dir, + save_format="tf", + signatures={"serving_default": serving_fn}, + ) + + # Test the saved_model. + loaded_serving_fn = keras.saving.legacy.save.load_model( + saved_model_dir + ).signatures["serving_default"] + + # check the result w/ and w/o avenger. + prediction0 = loaded_serving_fn( + tf.constant(["avenger", "ironman", "avenger"]) + )["output_0"] + self.assertIn(prediction0.numpy().decode("UTF-8"), ("yes", "no")) + + prediction1 = loaded_serving_fn( + tf.constant(["ironman", "ironman", "unknown"]) + )["output_0"] + self.assertIn(prediction1.numpy().decode("UTF-8"), ("yes", "no")) diff --git a/keras/utils/layer_utils.py b/keras/utils/layer_utils.py index df81f85b090f..c15434667043 100644 --- a/keras/utils/layer_utils.py +++ b/keras/utils/layer_utils.py @@ -12,699 +12,1101 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Utilities related to layer/model functionality.""" import copy import functools +import re import weakref -from keras.utils import io_utils -from keras.utils import tf_inspect import numpy as np - import tensorflow.compat.v2 as tf + +from keras import initializers +from keras.utils import io_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.utils.get_source_inputs') +@keras_export("keras.utils.get_source_inputs") def get_source_inputs(tensor, layer=None, node_index=None): - """Returns the list of input tensors necessary to compute `tensor`. - - Output will always be a list of tensors - (potentially with 1 element). - - Args: - tensor: The tensor to start from. - layer: Origin layer of the tensor. Will be - determined via tensor._keras_history if not provided. - node_index: Origin node index of the tensor. - - Returns: - List of input tensors. - """ - if not hasattr(tensor, '_keras_history'): - return tensor - - if layer is None or node_index: - layer, node_index, _ = tensor._keras_history - if not layer._inbound_nodes: - return [tensor] - else: - node = layer._inbound_nodes[node_index] - if node.is_input: - # Reached an Input layer, stop recursion. - return tf.nest.flatten(node.input_tensors) + """Returns the list of input tensors necessary to compute `tensor`. + + Output will always be a list of tensors + (potentially with 1 element). + + Args: + tensor: The tensor to start from. + layer: Origin layer of the tensor. Will be + determined via tensor._keras_history if not provided. + node_index: Origin node index of the tensor. + + Returns: + List of input tensors. + """ + if not hasattr(tensor, "_keras_history"): + return tensor + + if layer is None or node_index: + layer, node_index, _ = tensor._keras_history + if not layer._inbound_nodes: + return [tensor] else: - source_tensors = [] - for layer, node_index, _, tensor in node.iterate_inbound(): - previous_sources = get_source_inputs(tensor, layer, node_index) - # Avoid input redundancy. - for x in previous_sources: - if all(x is not t for t in source_tensors): - source_tensors.append(x) - return source_tensors - - -def validate_string_arg(input_data, - allowable_strings, - layer_name, - arg_name, - allow_none=False, - allow_callables=False): - """Validates the correctness of a string-based arg.""" - if allow_none and input_data is None: - return - elif allow_callables and callable(input_data): - return - elif isinstance(input_data, str) and input_data in allowable_strings: - return - else: - allowed_args = '`None`, ' if allow_none else '' - allowed_args += 'a `Callable`, ' if allow_callables else '' - allowed_args += 'or one of the following values: %s' % (allowable_strings,) - if allow_callables: - callable_note = ( - f'If restoring a model and `{arg_name}` is a custom callable, ' - 'please ensure the callable is registered as a custom object. ' - 'See https://www.tensorflow.org/guide/keras/save_and_serialize' - '#registering_the_custom_object for details. ') + node = layer._inbound_nodes[node_index] + if node.is_input: + # Reached an Input layer, stop recursion. + return tf.nest.flatten(node.input_tensors) + else: + source_tensors = [] + for layer, node_index, _, tensor in node.iterate_inbound(): + previous_sources = get_source_inputs(tensor, layer, node_index) + # Avoid input redundancy. + for x in previous_sources: + if all(x is not t for t in source_tensors): + source_tensors.append(x) + return source_tensors + + +def validate_string_arg( + input_data, + allowable_strings, + layer_name, + arg_name, + allow_none=False, + allow_callables=False, +): + """Validates the correctness of a string-based arg.""" + if allow_none and input_data is None: + return + elif allow_callables and callable(input_data): + return + elif isinstance(input_data, str) and input_data in allowable_strings: + return else: - callable_note = '' - raise ValueError( - f'Unkown value for `{arg_name}` argument of layer {layer_name}. ' - f'{callable_note}Allowed values are: {allowed_args}. Received: ' - f'{input_data}') + allowed_args = "`None`, " if allow_none else "" + allowed_args += "a `Callable`, " if allow_callables else "" + allowed_args += f"or one of the following values: {allowable_strings}" + if allow_callables: + callable_note = ( + f"If restoring a model and `{arg_name}` is a custom callable, " + "please ensure the callable is registered as a custom object. " + "See https://www.tensorflow.org/guide/keras/save_and_serialize" + "#registering_the_custom_object for details. " + ) + else: + callable_note = "" + raise ValueError( + f"Unkown value for `{arg_name}` argument of layer {layer_name}. " + f"{callable_note}Allowed values are: {allowed_args}. Received: " + f"{input_data}" + ) def count_params(weights): - """Count the total number of scalars composing the weights. - - Args: - weights: An iterable containing the weights on which to compute params - - Returns: - The total number of scalars composing the weights - """ - unique_weights = {id(w): w for w in weights}.values() - # Ignore TrackableWeightHandlers, which will not have a shape defined. - unique_weights = [w for w in unique_weights if hasattr(w, 'shape')] - weight_shapes = [w.shape.as_list() for w in unique_weights] - standardized_weight_shapes = [ - [0 if w_i is None else w_i for w_i in w] for w in weight_shapes - ] - return int(sum(np.prod(p) for p in standardized_weight_shapes)) - - -def print_summary(model, - line_length=None, - positions=None, - print_fn=None, - expand_nested=False, - show_trainable=False): - """Prints a summary of a model. - - Args: - model: Keras model instance. - line_length: Total length of printed lines - (e.g. set this to adapt the display to different - terminal window sizes). - positions: Relative or absolute positions of log elements in each line. - If not provided, defaults to `[.33, .55, .67, 1.]`. - print_fn: Print function to use. - It will be called on each line of the summary. - You can set it to a custom function - in order to capture the string summary. - It defaults to `print` (prints to stdout). - expand_nested: Whether to expand the nested models. - If not provided, defaults to `False`. - show_trainable: Whether to show if a layer is trainable. - If not provided, defaults to `False`. - """ - if print_fn is None: - print_fn = io_utils.print_msg - - if model.__class__.__name__ == 'Sequential': - sequential_like = True - elif not model._is_graph_network: - # We treat subclassed models as a simple sequence of layers, for logging - # purposes. - sequential_like = True - else: - sequential_like = True - nodes_by_depth = model._nodes_by_depth.values() - nodes = [] - for v in nodes_by_depth: - if (len(v) > 1) or (len(v) == 1 and - len(tf.nest.flatten(v[0].keras_inputs)) > 1): - # if the model has multiple nodes - # or if the nodes have multiple inbound_layers - # the model is no longer sequential - sequential_like = False - break - nodes += v - if sequential_like: - # search for shared layers - for layer in model.layers: - flag = False - for node in layer._inbound_nodes: - if node in nodes: - if flag: - sequential_like = False - break - else: - flag = True - if not sequential_like: - break - - if sequential_like: - line_length = line_length or 65 - positions = positions or [.45, .85, 1.] - if positions[-1] <= 1: - positions = [int(line_length * p) for p in positions] - # header names for the different log elements - to_display = ['Layer (type)', 'Output Shape', 'Param #'] - else: - line_length = line_length or 98 - positions = positions or [.33, .55, .67, 1.] - if positions[-1] <= 1: - positions = [int(line_length * p) for p in positions] - # header names for the different log elements - to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Connected to'] - relevant_nodes = [] - for v in model._nodes_by_depth.values(): - relevant_nodes += v - - if show_trainable: - line_length += 11 - positions.append(line_length) - to_display.append('Trainable') - - def print_row(fields, positions, nested_level=0): - left_to_print = [str(x) for x in fields] - while any(left_to_print): - line = '' - for col in range(len(left_to_print)): - if col > 0: - start_pos = positions[col - 1] - else: - start_pos = 0 - end_pos = positions[col] - # Leave room for 2 spaces to delineate columns - # we don't need any if we are printing the last column - space = 2 if col != len(positions) - 1 else 0 - cutoff = end_pos - start_pos - space - fit_into_line = left_to_print[col][:cutoff] - # For nicer formatting we line-break on seeing end of - # tuple/dict etc. - line_break_conditions = ('),', '},', '],', "',") - candidate_cutoffs = [ - fit_into_line.find(x) + len(x) - for x in line_break_conditions - if fit_into_line.find(x) >= 0 - ] - if candidate_cutoffs: - cutoff = min(candidate_cutoffs) - fit_into_line = fit_into_line[:cutoff] - - if col == 0: - line += '|' * nested_level + ' ' - line += fit_into_line - line += ' ' * space if space else '' - left_to_print[col] = left_to_print[col][cutoff:] - - # Pad out to the next position - if nested_level: - line += ' ' * (positions[col] - len(line) - nested_level) - else: - line += ' ' * (positions[col] - len(line)) - line += '|' * nested_level - print_fn(line) + """Count the total number of scalars composing the weights. - print_fn('Model: "{}"'.format(model.name)) - print_fn('_' * line_length) - print_row(to_display, positions) - print_fn('=' * line_length) + Args: + weights: An iterable containing the weights on which to compute params - def print_layer_summary(layer, nested_level=0): - """Prints a summary for a single layer. + Returns: + The total number of scalars composing the weights + """ + unique_weights = {id(w): w for w in weights}.values() + # Ignore TrackableWeightHandlers, which will not have a shape defined. + unique_weights = [w for w in unique_weights if hasattr(w, "shape")] + weight_shapes = [w.shape.as_list() for w in unique_weights] + standardized_weight_shapes = [ + [0 if w_i is None else w_i for w_i in w] for w in weight_shapes + ] + return int(sum(np.prod(p) for p in standardized_weight_shapes)) + + +def weight_memory_size(weights): + """Calculate the memory footprint for weights based on their dtypes. + + Args: + weights: An iterable contains the weights to compute weight size. + + Returns: + The total memory size (in Bytes) of the weights. + """ + unique_weights = {id(w): w for w in weights}.values() + + total_memory_size = 0 + for w in unique_weights: + # Ignore TrackableWeightHandlers, which will not have a shape defined. + if not hasattr(w, "shape"): + continue + elif None in w.shape.as_list(): + continue + weight_shape = np.prod(w.shape.as_list()) + per_param_size = w.dtype.size + total_memory_size += weight_shape * per_param_size + return total_memory_size + + +def dtensor_variable_summary(weights): + """Group and calculate DTensor based weights memory size. + + Since DTensor weights can be sharded across multiple device, the result + will be grouped by the layout/sharding spec for the variables, so that + the accurate per-device memory size can be calculated. Args: - layer: target layer. - nested_level: level of nesting of the layer inside its parent layer - (e.g. 0 for a top-level layer, 1 for a nested layer). + weights: An iterable contains the weights to compute weight size. + + Returns: + total_weight_count, total_memory_size and per_sharing_spec_result which + is a dict with normalized layout spec as key and tuple of weight count + and weight size as value. """ - try: - output_shape = layer.output_shape - except AttributeError: - output_shape = 'multiple' - except RuntimeError: # output_shape unknown in Eager mode. - output_shape = '?' - name = layer.name - cls_name = layer.__class__.__name__ - if not layer.built and not getattr(layer, '_is_graph_network', False): - # If a subclassed model has a layer that is not called in Model.call, the - # layer will not be built and we cannot call layer.count_params(). - params = '0 (unused)' + unique_weights = {id(w): w for w in weights}.values() + total_weight_count = 0 + total_memory_size = 0 + per_sharing_spec_result = {} + for w in unique_weights: + # Ignore TrackableWeightHandlers, which will not have a shape defined. + if not hasattr(w, "shape"): + continue + if not isinstance(w, tf.experimental.dtensor.DVariable): + continue + layout = w.layout + # Remove all the duplication axis, and sort the column name. + # 1D replicated and 2D replicated variable will still be fully + # replicated, and [batch, model] sharding will have same memory + # footprint as the [model, batch] layout. + reduced_sharding_spec = list(sorted(set(layout.sharding_specs))) + if tf.experimental.dtensor.UNSHARDED in reduced_sharding_spec: + reduced_sharding_spec.remove(tf.experimental.dtensor.UNSHARDED) + reduced_sharding_spec = tuple(reduced_sharding_spec) # For dict key + weight_count, memory_size = per_sharing_spec_result.get( + reduced_sharding_spec, (0, 0) + ) + reduced_weight_shape = np.prod(w.shape.as_list()) + per_param_size = w.dtype.size + weight_count += reduced_weight_shape + memory_size += reduced_weight_shape * per_param_size + per_sharing_spec_result[reduced_sharding_spec] = ( + weight_count, + memory_size, + ) + total_weight_count += reduced_weight_shape + total_memory_size += reduced_weight_shape * per_param_size + return total_weight_count, total_memory_size, per_sharing_spec_result + + +def print_dtensor_variable_summary(model, print_fn, line_length): + if getattr(model, "_layout_map", None) is not None: + mesh = model._layout_map.get_default_mesh() + elif hasattr(model, "distribute_strategy") and hasattr( + model.distribute_strategy, "_mesh" + ): + mesh = model.distribute_strategy._mesh else: - params = layer.count_params() - fields = [name + ' (' + cls_name + ')', output_shape, params] + # Not running with DTensor + mesh = None + if mesh: + ( + total_weight_count, + total_memory_size, + per_sharing_spec_result, + ) = dtensor_variable_summary(model.weights) + total_per_device_memory_size = 0 + for sharding_spec in sorted(per_sharing_spec_result.keys()): + count, memory_size = per_sharing_spec_result[sharding_spec] + if len(sharding_spec) == 0: + print_fn( + f"{count} / {total_weight_count} params " + f"({readable_memory_size(memory_size)}) " + "are fully replicated" + ) + per_device_size = memory_size + else: + sharding_factor = np.prod( + [mesh.dim_size(s) for s in sharding_spec] + ) + per_device_size = memory_size / sharding_factor + print_fn( + f"{count} / {total_weight_count} params " + f"({readable_memory_size(memory_size)}) are sharded based " + f"on spec '{sharding_spec}' and across {sharding_factor} " + f"devices." + ) + total_per_device_memory_size += per_device_size + print_fn( + "Overall per device memory usage: " + f"{readable_memory_size(total_per_device_memory_size)}" + ) + print_fn( + "Overall sharding factor: {:.2f}".format( + total_memory_size / total_per_device_memory_size + ) + ) + print_fn("_" * line_length) + + +def readable_memory_size(weight_memory_size): + """Convert the weight memory size (Bytes) to a readable string.""" + units = ["Byte", "KB", "MB", "GB", "TB", "PB"] + scale = 1024 + for unit in units: + if weight_memory_size / scale < 1: + return "{:.2f} {}".format(weight_memory_size, unit) + else: + weight_memory_size /= scale + return "{:.2f} {}".format(weight_memory_size, units[-1]) - if show_trainable: - fields.append('Y' if layer.trainable else 'N') - print_row(fields, positions, nested_level) +def get_layer_index_bound_by_layer_name(model, layer_range=None): + """Get the layer indexes from the model based on layer names. - def print_layer_summary_with_connections(layer, nested_level=0): - """Prints a summary for a single layer (including topological connections). + The layer indexes can be used to slice the model into sub models for + display. Args: - layer: target layer. - nested_level: level of nesting of the layer inside its parent layer - (e.g. 0 for a top-level layer, 1 for a nested layer). + model: `tf.keras.Model` instance. + layer_names: a list or tuple of 2 strings, the starting layer name and + ending layer name (both inclusive) for the result. All layers will + be included when `None` is provided. + + Returns: + The index value of layer based on its unique name (layer_names). + Output will be [first_layer_index, last_layer_index + 1]. """ - try: - output_shape = layer.output_shape - except AttributeError: - output_shape = 'multiple' - connections = [] - for node in layer._inbound_nodes: - if relevant_nodes and node not in relevant_nodes: - # node is not part of the current network - continue - - for inbound_layer, node_index, tensor_index, _ in node.iterate_inbound(): - connections.append('{}[{}][{}]'.format(inbound_layer.name, node_index, - tensor_index)) - - name = layer.name - cls_name = layer.__class__.__name__ - fields = [ - name + ' (' + cls_name + ')', output_shape, - layer.count_params(), connections + if layer_range is not None: + if len(layer_range) != 2: + raise ValueError( + "layer_range must be a list or tuple of length 2. Received: " + f"layer_range = {layer_range} of length {len(layer_range)}" + ) + if not isinstance(layer_range[0], str) or not isinstance( + layer_range[1], str + ): + raise ValueError( + "layer_range should contain string type only. " + f"Received: {layer_range}" + ) + else: + return [0, len(model.layers)] + + lower_index = [ + idx + for idx, layer in enumerate(model.layers) + if re.match(layer_range[0], layer.name) + ] + upper_index = [ + idx + for idx, layer in enumerate(model.layers) + if re.match(layer_range[1], layer.name) ] - if show_trainable: - fields.append('Y' if layer.trainable else 'N') + if not lower_index or not upper_index: + raise ValueError( + "Passed layer_names do not match the layer names in the model. " + f"Received: {layer_range}" + ) + + if min(lower_index) > max(upper_index): + return [min(upper_index), max(lower_index) + 1] + return [min(lower_index), max(upper_index) + 1] + - print_row(fields, positions, nested_level) +def print_summary( + model, + line_length=None, + positions=None, + print_fn=None, + expand_nested=False, + show_trainable=False, + layer_range=None, +): + """Prints a summary of a model. + + Args: + model: Keras model instance. + line_length: Total length of printed lines + (e.g. set this to adapt the display to different + terminal window sizes). + positions: Relative or absolute positions of log elements in each line. + If not provided, defaults to `[0.3, 0.6, 0.70, 1.]`. + print_fn: Print function to use. + It will be called on each line of the summary. + You can set it to a custom function + in order to capture the string summary. + When `None`, uses `print` (prints to stdout). + Defaults to `None`. + expand_nested: Whether to expand the nested models. + Defaults to `False`. + show_trainable: Whether to show if a layer is trainable. + Defaults to `False`. + layer_range: List or tuple containing two strings, + the starting layer name and ending layer name (both inclusive), + indicating the range of layers to be printed in the summary. The + strings could also be regexes instead of an exact name. In this + case, the starting layer will be the first layer that matches + `layer_range[0]` and the ending layer will be the last element that + matches `layer_range[1]`. By default (`None`) all + layers in the model are included in the summary. + """ + if print_fn is None: + print_fn = io_utils.print_msg + + if model.__class__.__name__ == "Sequential": + sequential_like = True + elif not model._is_graph_network: + # We treat subclassed models as a simple sequence of layers, for logging + # purposes. + sequential_like = True + else: + sequential_like = True + nodes_by_depth = model._nodes_by_depth.values() + nodes = [] + for v in nodes_by_depth: + if (len(v) > 1) or ( + len(v) == 1 and len(tf.nest.flatten(v[0].keras_inputs)) > 1 + ): + # if the model has multiple nodes + # or if the nodes have multiple inbound_layers + # the model is no longer sequential + sequential_like = False + break + nodes += v + if sequential_like: + # search for shared layers + for layer in model.layers: + flag = False + for node in layer._inbound_nodes: + if node in nodes: + if flag: + sequential_like = False + break + else: + flag = True + if not sequential_like: + break - def print_layer(layer, nested_level=0, is_nested_last=False): if sequential_like: - print_layer_summary(layer, nested_level) + line_length = line_length or 65 + positions = positions or [0.45, 0.85, 1.0] + if positions[-1] <= 1: + positions = [int(line_length * p) for p in positions] + # header names for the different log elements + to_display = ["Layer (type)", "Output Shape", "Param #"] else: - print_layer_summary_with_connections(layer, nested_level) - - if expand_nested and hasattr(layer, 'layers') and layer.layers: - print_fn('|' * (nested_level + 1) + '¯' * - (line_length - 2 * nested_level - 2) + '|' * (nested_level + 1)) - - nested_layer = layer.layers - is_nested_last = False - for i in range(len(nested_layer)): - if i == len(nested_layer) - 1: - is_nested_last = True - print_layer(nested_layer[i], nested_level + 1, is_nested_last) - - print_fn('|' * nested_level + '¯' * (line_length - 2 * nested_level) + - '|' * nested_level) - - if not is_nested_last: - print_fn('|' * nested_level + ' ' * (line_length - 2 * nested_level) + - '|' * nested_level) - - layers = model.layers - for layer in layers: - print_layer(layer) - print_fn('=' * line_length) - - if hasattr(model, '_collected_trainable_weights'): - trainable_count = count_params(model._collected_trainable_weights) - else: - trainable_count = count_params(model.trainable_weights) - - non_trainable_count = count_params(model.non_trainable_weights) - - print_fn('Total params: {:,}'.format(trainable_count + non_trainable_count)) - print_fn('Trainable params: {:,}'.format(trainable_count)) - print_fn('Non-trainable params: {:,}'.format(non_trainable_count)) - print_fn('_' * line_length) - - -def convert_dense_weights_data_format(dense, - previous_feature_map_shape, - target_data_format='channels_first'): - """Utility useful when changing a convnet's `data_format`. - - When porting the weights of a convnet from one data format to the other, - if the convnet includes a `Flatten` layer - (applied to the last convolutional feature map) - followed by a `Dense` layer, the weights of that `Dense` layer - should be updated to reflect the new dimension ordering. - - Args: - dense: The target `Dense` layer. - previous_feature_map_shape: A shape tuple of 3 integers, - e.g. `(512, 7, 7)`. The shape of the convolutional - feature map right before the `Flatten` layer that - came before the target `Dense` layer. - target_data_format: One of "channels_last", "channels_first". - Set it "channels_last" - if converting a "channels_first" model to "channels_last", - or reciprocally. - """ - assert target_data_format in {'channels_last', 'channels_first'} - kernel, bias = dense.get_weights() - for i in range(kernel.shape[1]): - if target_data_format == 'channels_first': - c, h, w = previous_feature_map_shape - original_fm_shape = (h, w, c) - ki = kernel[:, i].reshape(original_fm_shape) - ki = np.transpose(ki, (2, 0, 1)) # last -> first + line_length = line_length or 98 + positions = positions or [0.3, 0.6, 0.70, 1.0] + if positions[-1] <= 1: + positions = [int(line_length * p) for p in positions] + # header names for the different log elements + to_display = ["Layer (type)", "Output Shape", "Param #", "Connected to"] + relevant_nodes = [] + for v in model._nodes_by_depth.values(): + relevant_nodes += v + + if show_trainable: + line_length += 11 + positions.append(line_length) + to_display.append("Trainable") + + layer_range = get_layer_index_bound_by_layer_name(model, layer_range) + + def print_row(fields, positions, nested_level=0): + left_to_print = [str(x) for x in fields] + while any(left_to_print): + line = "" + for col in range(len(left_to_print)): + if col > 0: + start_pos = positions[col - 1] + else: + start_pos = 0 + end_pos = positions[col] + # Leave room for 2 spaces to delineate columns + # we don't need any if we are printing the last column + space = 2 if col != len(positions) - 1 else 0 + cutoff = end_pos - start_pos - space + # Except for last col, offset by one to align the start of col + if col != len(positions) - 1: + cutoff -= 1 + if col == 0: + cutoff -= nested_level + fit_into_line = left_to_print[col][:cutoff] + # For nicer formatting we line-break on seeing end of + # tuple/dict etc. + line_break_conditions = ("),", "},", "],", "',") + candidate_cutoffs = [ + fit_into_line.find(x) + len(x) + for x in line_break_conditions + if fit_into_line.find(x) >= 0 + ] + if candidate_cutoffs: + cutoff = min(candidate_cutoffs) + fit_into_line = fit_into_line[:cutoff] + + if col == 0: + line += "|" * nested_level + " " + line += fit_into_line + line += " " * space if space else "" + left_to_print[col] = left_to_print[col][cutoff:] + + # Pad out to the next position + # Make space for nested_level for last column + if nested_level and col == len(positions) - 1: + line += " " * (positions[col] - len(line) - nested_level) + else: + line += " " * (positions[col] - len(line)) + line += "|" * nested_level + print_fn(line) + + print_fn(f'Model: "{model.name}"') + print_fn("_" * line_length) + print_row(to_display, positions) + print_fn("=" * line_length) + + def print_layer_summary(layer, nested_level=0): + """Prints a summary for a single layer. + + Args: + layer: target layer. + nested_level: level of nesting of the layer inside its parent layer + (e.g. 0 for a top-level layer, 1 for a nested layer). + """ + try: + output_shape = layer.output_shape + except AttributeError: + output_shape = "multiple" + except RuntimeError: # output_shape unknown in Eager mode. + output_shape = "?" + name = layer.name + cls_name = layer.__class__.__name__ + if not layer.built and not getattr(layer, "_is_graph_network", False): + # If a subclassed model has a layer that is not called in + # Model.call, the layer will not be built and we cannot call + # layer.count_params(). + params = "0 (unused)" + else: + params = layer.count_params() + fields = [name + " (" + cls_name + ")", output_shape, params] + + if show_trainable: + fields.append("Y" if layer.trainable else "N") + + print_row(fields, positions, nested_level) + + def print_layer_summary_with_connections(layer, nested_level=0): + """Prints a summary for a single layer (including its connections). + + Args: + layer: target layer. + nested_level: level of nesting of the layer inside its parent layer + (e.g. 0 for a top-level layer, 1 for a nested layer). + """ + try: + output_shape = layer.output_shape + except AttributeError: + output_shape = "multiple" + connections = [] + for node in layer._inbound_nodes: + if relevant_nodes and node not in relevant_nodes: + # node is not part of the current network + continue + + for ( + inbound_layer, + node_index, + tensor_index, + _, + ) in node.iterate_inbound(): + connections.append( + f"{inbound_layer.name}[{node_index}][{tensor_index}]" + ) + + name = layer.name + cls_name = layer.__class__.__name__ + fields = [ + name + " (" + cls_name + ")", + output_shape, + layer.count_params(), + connections, + ] + + if show_trainable: + fields.append("Y" if layer.trainable else "N") + + print_row(fields, positions, nested_level) + + def print_layer(layer, nested_level=0, is_nested_last=False): + if sequential_like: + print_layer_summary(layer, nested_level) + else: + print_layer_summary_with_connections(layer, nested_level) + + if expand_nested and hasattr(layer, "layers") and layer.layers: + print_fn( + "|" * (nested_level + 1) + + "¯" * (line_length - 2 * nested_level - 2) + + "|" * (nested_level + 1) + ) + + nested_layer = layer.layers + is_nested_last = False + for i in range(len(nested_layer)): + if i == len(nested_layer) - 1: + is_nested_last = True + print_layer(nested_layer[i], nested_level + 1, is_nested_last) + + print_fn( + "|" * nested_level + + "¯" * (line_length - 2 * nested_level) + + "|" * nested_level + ) + + if not is_nested_last: + print_fn( + "|" * nested_level + + " " * (line_length - 2 * nested_level) + + "|" * nested_level + ) + + for layer in model.layers[layer_range[0] : layer_range[1]]: + print_layer(layer) + print_fn("=" * line_length) + + if hasattr(model, "_collected_trainable_weights"): + trainable_count = count_params(model._collected_trainable_weights) + trainable_memory_size = weight_memory_size( + model._collected_trainable_weights + ) else: - h, w, c = previous_feature_map_shape - original_fm_shape = (c, h, w) - ki = kernel[:, i].reshape(original_fm_shape) - ki = np.transpose(ki, (1, 2, 0)) # first -> last - kernel[:, i] = np.reshape(ki, (np.prod(previous_feature_map_shape),)) - dense.set_weights([kernel, bias]) + trainable_count = count_params(model.trainable_weights) + trainable_memory_size = weight_memory_size(model.trainable_weights) + + non_trainable_count = count_params(model.non_trainable_weights) + non_trainable_memory_size = weight_memory_size(model.non_trainable_weights) + + total_memory_size = trainable_memory_size + non_trainable_memory_size + + print_fn( + f"Total params: {trainable_count + non_trainable_count} " + f"({readable_memory_size(total_memory_size)})" + ) + print_fn( + f"Trainable params: {trainable_count} " + f"({readable_memory_size(trainable_memory_size)})" + ) + print_fn( + f"Non-trainable params: {non_trainable_count} " + f"({readable_memory_size(non_trainable_memory_size)})" + ) + print_fn("_" * line_length) + + print_dtensor_variable_summary(model, print_fn, line_length) + + +def convert_dense_weights_data_format( + dense, previous_feature_map_shape, target_data_format="channels_first" +): + """Utility useful when changing a convnet's `data_format`. + + When porting the weights of a convnet from one data format to the other, + if the convnet includes a `Flatten` layer + (applied to the last convolutional feature map) + followed by a `Dense` layer, the weights of that `Dense` layer + should be updated to reflect the new dimension ordering. + + Args: + dense: The target `Dense` layer. + previous_feature_map_shape: A shape tuple of 3 integers, + e.g. `(512, 7, 7)`. The shape of the convolutional + feature map right before the `Flatten` layer that + came before the target `Dense` layer. + target_data_format: One of "channels_last", "channels_first". + Set it "channels_last" + if converting a "channels_first" model to "channels_last", + or reciprocally. + """ + assert target_data_format in {"channels_last", "channels_first"} + kernel, bias = dense.get_weights() + for i in range(kernel.shape[1]): + if target_data_format == "channels_first": + c, h, w = previous_feature_map_shape + original_fm_shape = (h, w, c) + ki = kernel[:, i].reshape(original_fm_shape) + ki = np.transpose(ki, (2, 0, 1)) # last -> first + else: + h, w, c = previous_feature_map_shape + original_fm_shape = (c, h, w) + ki = kernel[:, i].reshape(original_fm_shape) + ki = np.transpose(ki, (1, 2, 0)) # first -> last + kernel[:, i] = np.reshape(ki, (np.prod(previous_feature_map_shape),)) + dense.set_weights([kernel, bias]) def is_builtin_layer(layer): - if not getattr(layer, '_keras_api_names', None): - return False + if not getattr(layer, "_keras_api_names", None): + return False - # Subclasses of `Layer` that are not exported inherit the export name - # of the base layer class. - return (layer._keras_api_names != ('keras.layers.Layer',) and - layer._keras_api_names_v1 != ('keras.layers.Layer',)) + # Subclasses of `Layer` that are not exported inherit the export name + # of the base layer class. + return layer._keras_api_names != ( + "keras.layers.Layer", + ) and layer._keras_api_names_v1 != ("keras.layers.Layer",) def cached_per_instance(f): - """Lightweight decorator for caching lazily constructed properties. - - When to use: - This decorator provides simple caching with minimal overhead. It is designed - for properties which are expensive to compute and static over the life of a - class instance, and provides no mechanism for cache invalidation. Thus it is - best suited for lazily exposing derived properties of other static data. + """Lightweight decorator for caching lazily constructed properties. + + When to use: + This decorator provides simple caching with minimal overhead. It is designed + for properties which are expensive to compute and static over the life of a + class instance, and provides no mechanism for cache invalidation. Thus it is + best suited for lazily exposing derived properties of other static data. + + For classes with custom getattr / setattr behavior (such as trackable + objects), storing cache results as object attributes is not performant. + Instead, a specialized cache can significantly reduce property lookup + overhead. (While still allowing the decorated property to be lazily + computed.) Consider the following class: + + ``` + class MyClass: + def __setattr__(self, key, value): + # Some expensive class specific code + # ... + # ... + + super(MyClass, self).__setattr__(key, value) + + @property + def thing(self): + # `thing` is expensive to compute (and may not even be requested), so we + # want to lazily compute it and then cache it. + output = getattr(self, '_thing', None) + if output is None: + self._thing = output = compute_thing(self) + return output + ``` - For classes with custom getattr / setattr behavior (such as trackable - objects), storing cache results as object attributes is not performant. - Instead, a specialized cache can significantly reduce property lookup - overhead. (While still allowing the decorated property to be lazily computed.) - Consider the following class: + It's also worth noting that ANY overriding of __setattr__, even something as + simple as: + ``` + def __setattr__(self, key, value): + super(MyClass, self).__setattr__(key, value) + ``` - ``` - class MyClass: - def __setattr__(self, key, value): - # Some expensive class specific code - # ... - # ... + Slows down attribute assignment by nearly 10x. - super(MyClass, self).__setattr__(key, value) + By contrast, replacing the definition of `thing` with the following + sidesteps the expensive __setattr__ altogether: + ''' @property + @tracking.cached_per_instance def thing(self): # `thing` is expensive to compute (and may not even be requested), so we # want to lazily compute it and then cache it. - output = getattr(self, '_thing', None) - if output is None: - self._thing = output = compute_thing(self) - return output - ``` - - It's also worth noting that ANY overriding of __setattr__, even something as - simple as: - ``` - def __setattr__(self, key, value): - super(MyClass, self).__setattr__(key, value) - ``` - - Slows down attribute assignment by nearly 10x. - - By contrast, replacing the definition of `thing` with the following sidesteps - the expensive __setattr__ altogether: - - ''' - @property - @tracking.cached_per_instance - def thing(self): - # `thing` is expensive to compute (and may not even be requested), so we - # want to lazily compute it and then cache it. - return compute_thing(self) - ''' - - Performance: - The overhead for this decorator is ~0.4 us / call. A much lower overhead - implementation (~0.085 us / call) can be achieved by using a custom dict type: - - ``` - def dict_based_cache(f): - class Cache(dict): - __slots__ = () - def __missing__(self, key): - self[key] = output = f(key) - return output - - return property(Cache().__getitem__) - ``` + return compute_thing(self) + ''' + + Performance: + The overhead for this decorator is ~0.4 us / call. A much lower overhead + implementation (~0.085 us / call) can be achieved by using a custom dict + type: + + ``` + def dict_based_cache(f): + class Cache(dict): + __slots__ = () + def __missing__(self, key): + self[key] = output = f(key) + return output + + return property(Cache().__getitem__) + ``` + + However, that implementation holds class instances as keys, and as a result + blocks garbage collection. (And modifying it to use weakref's as keys raises + the lookup overhead to ~0.4 us) As a result, the WeakKeyDictionary + implementation below turns out to be more prudent. - However, that implementation holds class instances as keys, and as a result - blocks garbage collection. (And modifying it to use weakref's as keys raises - the lookup overhead to ~0.4 us) As a result, the WeakKeyDictionary - implementation below turns out to be more prudent. - - Args: - f: The function to cache. + Args: + f: The function to cache. - Returns: - f decorated with simple caching behavior. - """ + Returns: + f decorated with simple caching behavior. + """ - cache = weakref.WeakKeyDictionary() + cache = weakref.WeakKeyDictionary() - @functools.wraps(f) - def wrapped(item): - output = cache.get(item) - if output is None: - cache[item] = output = f(item) - return output + @functools.wraps(f) + def wrapped(item): + output = cache.get(item) + if output is None: + cache[item] = output = f(item) + return output - wrapped.cache = cache - return wrapped + wrapped.cache = cache + return wrapped def filter_empty_layer_containers(layer_list): - """Filter out empty Layer-like containers and uniquify.""" - # TODO(b/130381733): Make this an attribute in base_layer.Layer. - existing = set() - to_visit = layer_list[::-1] - while to_visit: - obj = to_visit.pop() - if id(obj) in existing: - continue - existing.add(id(obj)) - if hasattr(obj, '_is_layer') and not isinstance(obj, type): - yield obj - else: - sub_layers = getattr(obj, 'layers', None) or [] + """Filter out empty Layer-like containers and uniquify.""" + # TODO(b/130381733): Make this an attribute in base_layer.Layer. + existing = set() + to_visit = layer_list[::-1] + while to_visit: + obj = to_visit.pop() + if id(obj) in existing: + continue + existing.add(id(obj)) + if hasattr(obj, "_is_layer") and not isinstance(obj, type): + yield obj + else: + sub_layers = getattr(obj, "layers", None) or [] - # Trackable data structures will not show up in ".layers" lists, but - # the layers they contain will. - to_visit.extend(sub_layers[::-1]) + # Trackable data structures will not show up in ".layers" lists, but + # the layers they contain will. + to_visit.extend(sub_layers[::-1]) class CallFunctionSpec: - """Caches the spec and provides utilities for handling call function args.""" + """Caches the spec and provides utilities for handling call function + args.""" + + def __init__(self, full_argspec): + """Initialies a `CallFunctionSpec`. + + Args: + full_argspec: the FullArgSpec of a call function of a layer. + """ + self._full_argspec = full_argspec + + self._arg_names = list(self._full_argspec.args) + # Scrub `self` that appears if a decorator was applied. + if self._arg_names and self._arg_names[0] == "self": + self._arg_names = self._arg_names[1:] + self._arg_names += self._full_argspec.kwonlyargs or [] + + call_accepts_kwargs = self._full_argspec.varkw is not None + self._expects_training_arg = ( + "training" in self._arg_names or call_accepts_kwargs + ) + self._expects_mask_arg = ( + "mask" in self._arg_names or call_accepts_kwargs + ) + + call_fn_defaults = self._full_argspec.defaults or [] + defaults = dict() + # The call arg defaults are an n-tuple of the last n elements of the + # args list. (n = # of elements that have a default argument) + for i in range(-1 * len(call_fn_defaults), 0): + defaults[self._arg_names[i]] = call_fn_defaults[i] + # The default training arg will be any (non-None) default specified in + # the method signature, or None if no value is specified. + defaults.update(self._full_argspec.kwonlydefaults or {}) + self._default_training_arg = defaults.get("training") - def __init__(self, full_argspec): - """Initialies a `CallFunctionSpec`. + @property + def full_argspec(self): + """Returns the FullArgSpec of the call function.""" + return self._full_argspec - Args: - full_argspec: the FullArgSpec of a call function of a layer. - """ - self._full_argspec = full_argspec - - self._arg_names = list(self._full_argspec.args) - # Scrub `self` that appears if a decorator was applied. - if self._arg_names and self._arg_names[0] == 'self': - self._arg_names = self._arg_names[1:] - self._arg_names += self._full_argspec.kwonlyargs or [] - - call_accepts_kwargs = self._full_argspec.varkw is not None - self._expects_training_arg = ('training' in self._arg_names or - call_accepts_kwargs) - self._expects_mask_arg = 'mask' in self._arg_names or call_accepts_kwargs - - call_fn_defaults = self._full_argspec.defaults or [] - defaults = dict() - # The call arg defaults are an n-tuple of the last n elements of the args - # list. (n = # of elements that have a default argument) - for i in range(-1 * len(call_fn_defaults), 0): - defaults[self._arg_names[i]] = call_fn_defaults[i] - # The default training arg will be any (non-None) default specified in the - # method signature, or None if no value is specified. - defaults.update(self._full_argspec.kwonlydefaults or {}) - self._default_training_arg = defaults.get('training') - - @property - def full_argspec(self): - """Returns the FullArgSpec of the call function.""" - return self._full_argspec - - @property - def arg_names(self): - """List of names of args and kwonlyargs.""" - # `arg_names` is not accurate if the layer has variable positional args. - return self._arg_names - - @arg_names.setter - def arg_names(self, value): - self._arg_names = value - - @property - @cached_per_instance - def arg_positions(self): - """Returns a dict mapping arg names to their index positions.""" - # `arg_positions` is not accurate if the layer has variable positional args. - call_fn_arg_positions = dict() - for pos, arg in enumerate(self._arg_names): - call_fn_arg_positions[arg] = pos - return call_fn_arg_positions - - @property - def expects_training_arg(self): - """Whether the call function uses 'training' as a parameter.""" - return self._expects_training_arg - - @expects_training_arg.setter - def expects_training_arg(self, value): - self._expects_training_arg = value - - @property - def expects_mask_arg(self): - """Whether the call function uses `mask` as a parameter.""" - return self._expects_mask_arg - - @expects_mask_arg.setter - def expects_mask_arg(self, value): - self._expects_mask_arg = value - - @property - def default_training_arg(self): - """The default value given to the "training" argument.""" - return self._default_training_arg - - def arg_was_passed(self, arg_name, args, kwargs, inputs_in_args=False): - """Returns true if argument is present in `args` or `kwargs`. + @property + def arg_names(self): + """List of names of args and kwonlyargs.""" + # `arg_names` is not accurate if the layer has variable positional args. + return self._arg_names - Args: - arg_name: String name of the argument to find. - args: Tuple of args passed to the call function. - kwargs: Dictionary of kwargs passed to the call function. - inputs_in_args: Whether the input argument (the first argument in the call - function) is included in `args`. Defaults to `False`. + @arg_names.setter + def arg_names(self, value): + self._arg_names = value - Returns: - True if argument with `arg_name` is present in `args` or `kwargs`. - """ - # Performance optimization: do no work in most common case. - if not args and not kwargs: - return False + @property + @cached_per_instance + def arg_positions(self): + """Returns a dict mapping arg names to their index positions.""" + # `arg_positions` is not accurate if the layer has variable positional + # args. + call_fn_arg_positions = dict() + for pos, arg in enumerate(self._arg_names): + call_fn_arg_positions[arg] = pos + return call_fn_arg_positions - if arg_name in kwargs: - return True - call_fn_args = self._arg_names - if not inputs_in_args: - # Ignore `inputs` arg. - call_fn_args = call_fn_args[1:] - return arg_name in dict(zip(call_fn_args, args)) + @property + def expects_training_arg(self): + """Whether the call function uses 'training' as a parameter.""" + return self._expects_training_arg - def get_arg_value(self, arg_name, args, kwargs, inputs_in_args=False): - """Retrieves the value for the argument with name `arg_name`. + @expects_training_arg.setter + def expects_training_arg(self, value): + self._expects_training_arg = value - Args: - arg_name: String name of the argument to find. - args: Tuple of args passed to the call function. - kwargs: Dictionary of kwargs passed to the call function. - inputs_in_args: Whether the input argument (the first argument in the call - function) is included in `args`. Defaults to `False`. + @property + def expects_mask_arg(self): + """Whether the call function uses `mask` as a parameter.""" + return self._expects_mask_arg - Returns: - The value of the argument with name `arg_name`, extracted from `args` or - `kwargs`. + @expects_mask_arg.setter + def expects_mask_arg(self, value): + self._expects_mask_arg = value - Raises: - KeyError if the value of `arg_name` cannot be found. - """ - if arg_name in kwargs: - return kwargs[arg_name] - call_fn_args = self._arg_names - if not inputs_in_args: - # Ignore `inputs` arg. - call_fn_args = call_fn_args[1:] - args_dict = dict(zip(call_fn_args, args)) - return args_dict[arg_name] - - def set_arg_value(self, - arg_name, - new_value, - args, - kwargs, - inputs_in_args=False, - pop_kwarg_if_none=False): - """Sets the value of an argument into the given args/kwargs. + @property + def default_training_arg(self): + """The default value given to the "training" argument.""" + return self._default_training_arg + + def arg_was_passed(self, arg_name, args, kwargs, inputs_in_args=False): + """Returns true if argument is present in `args` or `kwargs`. + + Args: + arg_name: String name of the argument to find. + args: Tuple of args passed to the call function. + kwargs: Dictionary of kwargs passed to the call function. + inputs_in_args: Whether the input argument (the first argument in the + call function) is included in `args`. Defaults to `False`. + + Returns: + True if argument with `arg_name` is present in `args` or `kwargs`. + """ + # Performance optimization: do no work in most common case. + if not args and not kwargs: + return False + + if arg_name in kwargs: + return True + call_fn_args = self._arg_names + if not inputs_in_args: + # Ignore `inputs` arg. + call_fn_args = call_fn_args[1:] + return arg_name in dict(zip(call_fn_args, args)) + + def get_arg_value(self, arg_name, args, kwargs, inputs_in_args=False): + """Retrieves the value for the argument with name `arg_name`. + + Args: + arg_name: String name of the argument to find. + args: Tuple of args passed to the call function. + kwargs: Dictionary of kwargs passed to the call function. + inputs_in_args: Whether the input argument (the first argument in the + call function) is included in `args`. Defaults to `False`. + + Returns: + The value of the argument with name `arg_name`, extracted from `args` + or `kwargs`. + + Raises: + KeyError if the value of `arg_name` cannot be found. + """ + if arg_name in kwargs: + return kwargs[arg_name] + call_fn_args = self._arg_names + if not inputs_in_args: + # Ignore `inputs` arg. + call_fn_args = call_fn_args[1:] + args_dict = dict(zip(call_fn_args, args)) + return args_dict[arg_name] + + def set_arg_value( + self, + arg_name, + new_value, + args, + kwargs, + inputs_in_args=False, + pop_kwarg_if_none=False, + ): + """Sets the value of an argument into the given args/kwargs. + + Args: + arg_name: String name of the argument to find. + new_value: New value to give to the argument. + args: Tuple of args passed to the call function. + kwargs: Dictionary of kwargs passed to the call function. + inputs_in_args: Whether the input argument (the first argument in the + call function) is included in `args`. Defaults to `False`. + pop_kwarg_if_none: If the new value is `None`, and this is `True`, + then the argument is deleted from `kwargs`. + + Returns: + The updated `(args, kwargs)`. + """ + if self.full_argspec.varargs: + try: + arg_pos = self.full_argspec.args.index(arg_name) + if self.full_argspec.args[0] == "self": + arg_pos -= 1 + except ValueError: + arg_pos = None + else: + arg_pos = self.arg_positions.get(arg_name, None) + + if arg_pos is not None: + if not inputs_in_args: + # Ignore `inputs` arg. + arg_pos = arg_pos - 1 + if len(args) > arg_pos: + args = list(args) + args[arg_pos] = new_value + return tuple(args), kwargs + if new_value is None and pop_kwarg_if_none: + kwargs.pop(arg_name, None) + else: + kwargs[arg_name] = new_value + return args, kwargs + + def split_out_first_arg(self, args, kwargs): + """Splits (args, kwargs) into (inputs, args, kwargs).""" + # Grab the argument corresponding to the first argument in the + # layer's `call` method spec. This will either be the first positional + # argument, or it will be provided as a keyword argument. + if args: + inputs = args[0] + args = args[1:] + elif self._arg_names[0] in kwargs: + kwargs = copy.copy(kwargs) + inputs = kwargs.pop(self._arg_names[0]) + else: + raise ValueError( + "The first argument to `Layer.call` must always be passed." + ) + return inputs, args, kwargs + + +@keras_export("keras.utils.warmstart_embedding_matrix") +def warmstart_embedding_matrix( + base_vocabulary, + new_vocabulary, + base_embeddings, + new_embeddings_initializer="uniform", +): + """Warm start embedding matrix with changing vocab. + + This util can be used to warmstart the embedding layer matrix when + vocabulary changes between previously saved checkpoint and model. + Vocabulary change could mean, the size of the new vocab is different or the + vocabulary is reshuffled or new vocabulary has been added to old vocabulary. + If the vocabulary size changes, size of the embedding layer matrix also + changes. This util remaps the old vocabulary embeddings to the new embedding + layer matrix. + + Example: + Here is an example that demonstrates how to use the + `warmstart_embedding_matrix` util. + >>> import keras + >>> vocab_base = tf.convert_to_tensor(["unk", "a", "b", "c"]) + >>> vocab_new = tf.convert_to_tensor( + ... ["unk", "unk", "a", "b", "c", "d", "e"]) + >>> vectorized_vocab_base = np.random.rand(vocab_base.shape[0], 3) + >>> vectorized_vocab_new = np.random.rand(vocab_new.shape[0], 3) + >>> warmstarted_embedding_matrix = warmstart_embedding_matrix( + ... base_vocabulary=vocab_base, + ... new_vocabulary=vocab_new, + ... base_embeddings=vectorized_vocab_base, + ... new_embeddings_initializer=keras.initializers.Constant( + ... vectorized_vocab_new)) + + Here is an example that demonstrates how to get vocabulary and embedding + weights from layers, use the `warmstart_embedding_matrix` util to remap the + layer embeddings and continue with model training. + ``` + # get old and new vocabulary by using layer.get_vocabulary() + # for example assume TextVectorization layer is used + base_vocabulary = old_text_vectorization_layer.get_vocabulary() + new_vocabulary = new_text_vectorization_layer.get_vocabulary() + # get previous embedding layer weights + embedding_weights_base = model.get_layer('embedding').get_weights()[0] + warmstarted_embedding = keras.utils.warmstart_embedding_matrix( + base_vocabulary, + new_vocabulary, + base_embeddings=embedding_weights_base, + new_embeddings_initializer="uniform") + updated_embedding_variable = tf.Variable(warmstarted_embedding) + + # update embedding layer weights + model.layers[1].embeddings = updated_embedding_variable + model.fit(..) + # continue with model training + + ``` Args: - arg_name: String name of the argument to find. - new_value: New value to give to the argument. - args: Tuple of args passed to the call function. - kwargs: Dictionary of kwargs passed to the call function. - inputs_in_args: Whether the input argument (the first argument in the call - function) is included in `args`. Defaults to `False`. - pop_kwarg_if_none: If the new value is `None`, and this is `True`, then - the argument is deleted from `kwargs`. + base_vocabulary: The list of vocabulary terms that + the preexisting embedding matrix `base_embeddings` represents. + It can be either a 1D array/tensor or a tuple/list of vocabulary + terms (strings), or a path to a vocabulary text file. If passing a + file path, the file should contain one line per term in the + vocabulary. + new_vocabulary: The list of vocabulary terms for the new vocabulary + (same format as above). + base_embeddings: NumPy array or tensor representing the preexisting + embedding matrix. + new_embeddings_initializer: Initializer for embedding vectors for + previously unseen terms to be added to the new embedding matrix (see + `keras.initializers`). new_embedding matrix + needs to be specified with "constant" initializer. + matrix. None means "uniform". Default value is None. Returns: - The updated `(args, kwargs)`. + tf.tensor of remapped embedding layer matrix + """ - if self.full_argspec.varargs: - try: - arg_pos = self.full_argspec.args.index(arg_name) - if self.full_argspec.args[0] == 'self': - arg_pos -= 1 - except ValueError: - arg_pos = None - else: - arg_pos = self.arg_positions.get(arg_name, None) - - if arg_pos is not None: - if not inputs_in_args: - # Ignore `inputs` arg. - arg_pos = arg_pos - 1 - if len(args) > arg_pos: - args = list(args) - args[arg_pos] = new_value - return tuple(args), kwargs - if new_value is None and pop_kwarg_if_none: - kwargs.pop(arg_name, None) - else: - kwargs[arg_name] = new_value - return args, kwargs - - def split_out_first_arg(self, args, kwargs): - """Splits (args, kwargs) into (inputs, args, kwargs).""" - # Grab the argument corresponding to the first argument in the - # layer's `call` method spec. This will either be the first positional - # argument, or it will be provided as a keyword argument. - if args: - inputs = args[0] - args = args[1:] - elif self._arg_names[0] in kwargs: - kwargs = copy.copy(kwargs) - inputs = kwargs.pop(self._arg_names[0]) + # convert vocab to list + base_vocabulary = convert_vocab_to_list(base_vocabulary) + new_vocabulary = convert_vocab_to_list(new_vocabulary) + + # Initialize the new embedding layer matrix + new_embeddings_initializer = initializers.get(new_embeddings_initializer) + new_embedding = new_embeddings_initializer( + shape=(len(new_vocabulary), base_embeddings.shape[1]), + dtype=base_embeddings.dtype, + ) + + # create mapping dict {vocab:index} + base_vocabulary_dict = dict( + zip(base_vocabulary, range(len(base_vocabulary))) + ) + + indices_base_vocabulary = [] + indices_new_vocabulary = [] + for index, key in enumerate(new_vocabulary): + if key in base_vocabulary_dict: + indices_base_vocabulary.append(base_vocabulary_dict[key]) + indices_new_vocabulary.append(int(index)) + + # update embedding matrix + if indices_base_vocabulary: + values_to_update = tf.gather(base_embeddings, indices_base_vocabulary) + new_embedding = tf.tensor_scatter_nd_update( + new_embedding, + tf.expand_dims(indices_new_vocabulary, axis=1), + values_to_update, + ) + return new_embedding + + +def convert_vocab_to_list(vocab): + """Convert input vacabulary to list.""" + vocab_list = [] + if tf.is_tensor(vocab): + vocab_list = list(vocab.numpy()) + elif isinstance(vocab, (np.ndarray, tuple, list)): + vocab_list = list(vocab) + elif isinstance(vocab, str): + if not tf.io.gfile.exists(vocab): + raise ValueError(f"Vocabulary file {vocab} does not exist.") + with tf.io.gfile.GFile(vocab, "r") as vocabulary_file: + vocab_list = vocabulary_file.read().splitlines() else: - raise ValueError( - 'The first argument to `Layer.call` must always be passed.') - return inputs, args, kwargs + raise ValueError( + "Vocabulary is expected to be either a NumPy array, " + "list, 1D tensor or a vocabulary text file. Instead type " + f"{type(vocab)} was received." + ) + if len(vocab_list) == 0: + raise ValueError( + "Vocabulary is expected to be either a NumPy array, " + "list, 1D tensor or a vocabulary text file with at least one token." + " Received 0 instead." + ) + return vocab_list diff --git a/keras/utils/layer_utils_test.py b/keras/utils/layer_utils_test.py index a4e8ce2000b8..7fd128a9bea9 100644 --- a/keras/utils/layer_utils_test.py +++ b/keras/utils/layer_utils_test.py @@ -14,476 +14,951 @@ # ============================================================================== """Tests for layer_utils.""" -import keras -import tensorflow.compat.v2 as tf - import collections import contextlib +import io import multiprocessing.dummy import os import pickle import shutil import sys +import tempfile import time import timeit import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras import backend +from keras import layers +from keras.dtensor import dtensor_api as dtensor +from keras.dtensor import layout_map as layout_map_lib +from keras.dtensor import test_util +from keras.testing_infra import test_utils from keras.utils import io_utils from keras.utils import layer_utils - +from keras.utils import tf_utils _PICKLEABLE_CALL_COUNT = collections.Counter() class MyPickleableObject(tf.__internal__.tracking.AutoTrackable): - """Needed for InterfaceTests.test_property_cache_serialization. - - This class must be at the top level. This is a constraint of pickle, - unrelated to `cached_per_instance`. - """ - - @property - @layer_utils.cached_per_instance - def my_id(self): - _PICKLEABLE_CALL_COUNT[self] += 1 - return id(self) - - -class LayerUtilsTest(tf.test.TestCase): - - def test_print_summary(self): - model = keras.Sequential() - model.add( - keras.layers.Conv2D( - filters=2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv')) - model.add(keras.layers.Flatten(name='flat')) - model.add(keras.layers.Dense(5, name='dense')) - - file_name = 'model_1.txt' - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - fpath = os.path.join(temp_dir, file_name) - writer = open(fpath, 'w') - - def print_to_file(text): - print(text, file=writer) - - try: - layer_utils.print_summary(model, print_fn=print_to_file) - self.assertTrue(tf.io.gfile.exists(fpath)) - writer.close() - reader = open(fpath, 'r') - lines = reader.readlines() - reader.close() - self.assertEqual(len(lines), 15) - except ImportError: - pass - - def test_print_summary_without_print_fn(self): - model = keras.Sequential([ - keras.layers.Dense(5, input_shape=(10,), name='dense')]) - io_utils.enable_interactive_logging() - with self.captureWritesToStream(sys.stdout) as printed: - layer_utils.print_summary(model) - self.assertIn('dense (Dense)', printed.contents()) - - def test_print_summary_expand_nested(self): - shape = (None, None, 3) - - def make_model(): - x = inputs = keras.Input(shape) - x = keras.layers.Conv2D(3, 1)(x) - x = keras.layers.BatchNormalization()(x) - return keras.Model(inputs, x) - - x = inner_inputs = keras.Input(shape) - x = make_model()(x) - inner_model = keras.Model(inner_inputs, x) - - inputs = keras.Input(shape) - model = keras.Model(inputs, inner_model(inputs)) - - file_name = 'model_2.txt' - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - fpath = os.path.join(temp_dir, file_name) - writer = open(fpath, 'w') - - def print_to_file(text): - print(text, file=writer) - - try: - layer_utils.print_summary( - model, print_fn=print_to_file, expand_nested=True) - self.assertTrue(tf.io.gfile.exists(fpath)) - writer.close() - reader = open(fpath, 'r') - lines = reader.readlines() - reader.close() - check_str = ( - 'Model: "model_2"\n' - '_________________________________________________________________\n' - ' Layer (type) Output Shape Param # \n' - '=================================================================\n' - ' input_3 (InputLayer) [(None, None, None, 3)] 0 \n' - ' \n' - ' model_1 (Functional) (None, None, None, 3) 24 \n' - '|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n' - '| input_1 (InputLayer) [(None, None, None, 3)] 0 |\n' - '| |\n' - '| model (Functional) (None, None, None, 3) 24 |\n' - '||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||\n' - '|| input_2 (InputLayer) [(None, None, None, 3)] 0 ||\n' - '|| ||\n' - '|| conv2d (Conv2D) (None, None, None, 3) 12 ||\n' - '|| ||\n' - '|| batch_normalization (BatchN (None, None, None, 3) 12 ||\n' - '|| ormalization) ||\n' - '|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n' - '¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n' - '=================================================================\n' - 'Total params: 24\n' - 'Trainable params: 18\n' - 'Non-trainable params: 6\n' - '_________________________________________________________________\n') - - fin_str = '' - for line in lines: - fin_str += line - - self.assertIn(fin_str, check_str) - self.assertEqual(len(lines), 25) - except ImportError: - pass - - def test_summary_subclass_model_expand_nested(self): - - class Sequential(keras.Model): - - def __init__(self, *args): - super().__init__() - self.module_list = list(args) if args else [] - - def call(self, x): - for module in self.module_list: - x = module(x) - return x - - class Block(keras.Model): - - def __init__(self): - super().__init__() - self.module = Sequential( - keras.layers.Dense(10), - keras.layers.Dense(10), - ) - - def call(self, input_tensor): - x = self.module(input_tensor) - return x - - class Base(keras.Model): - - def __init__(self): - super().__init__() - self.module = Sequential(Block(), Block()) - - def call(self, input_tensor): - x = self.module(input_tensor) - y = self.module(x) - return x, y - - class Network(keras.Model): - - def __init__(self): - super().__init__() - self.child = Base() - - def call(self, inputs): - return self.child(inputs) - - net = Network() - inputs = keras.Input(shape=(10,)) - outputs = net(inputs) - model = keras.models.Model(inputs=inputs, outputs=outputs) - - file_name = 'model_3.txt' - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - fpath = os.path.join(temp_dir, file_name) - writer = open(fpath, 'w') - - def print_to_file(text): - print(text, file=writer) - - try: - layer_utils.print_summary( - model, line_length=120, print_fn=print_to_file, expand_nested=True) - self.assertTrue(tf.io.gfile.exists(fpath)) - writer.close() - reader = open(fpath, 'r') - lines = reader.readlines() - reader.close() - # The output content are slightly different for the input shapes between - # v1 and v2. - if tf.__internal__.tf2.enabled(): - self.assertEqual(len(lines), 39) - else: - self.assertEqual(len(lines), 40) - except ImportError: - pass - - def test_print_summary_show_trainable(self): - model = keras.Sequential(name='trainable') - untrained = keras.layers.Conv2D( - filters=2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv') - model.add(untrained) - model.add(keras.layers.Flatten(name='flat')) - model.add(keras.layers.Dense(5, name='dense')) - - untrained.trainable = False - - file_name = 'model_4.txt' - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - fpath = os.path.join(temp_dir, file_name) - writer = open(fpath, 'w') - - def print_to_file(text): - print(text, file=writer) - - try: - layer_utils.print_summary( - model, print_fn=print_to_file, show_trainable=True) - self.assertTrue(tf.io.gfile.exists(fpath)) - writer.close() - reader = open(fpath, 'r') - lines = reader.readlines() - reader.close() - check_str = ( - 'Model: ' - '"trainable"\n____________________________________________________________________________\n' - ' Layer (type) Output Shape Param # ' - 'Trainable ' - '\n============================================================================\n' - ' conv (Conv2D) (None, 2, 3, 2) 62 N' - ' \n' - ' ' - '\n flat (Flatten) (None, 12) 0 ' - 'Y \n' - ' ' - '\n dense (Dense) (None, 5) 65 ' - 'Y \n' - ' ' - '\n============================================================================\nTotal' - ' params: 127\nTrainable params: 65\nNon-trainable params: ' - '62\n____________________________________________________________________________\n' - '____________________________________________________________________________\n' - ) - - fin_str = '' - for line in lines: - fin_str += line - - self.assertIn(fin_str, check_str) - self.assertEqual(len(lines), 15) - except ImportError: - pass - - def test_print_summary_expand_nested_show_trainable(self): - shape = (None, None, 3) - - def make_model(): - x = inputs = keras.Input(shape, name='input2') - untrainable = keras.layers.Conv2D(3, 1) - untrainable.trainable = False - x = untrainable(x) - x = keras.layers.BatchNormalization()(x) - return keras.Model(inputs, x) - - x = inner_inputs = keras.Input(shape, name='input1') - x = make_model()(x) - inner_model = keras.Model(inner_inputs, x) - - inputs = keras.Input(shape, name='input3') - model = keras.Model(inputs, inner_model(inputs)) - - file_name = 'model_6.txt' - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - fpath = os.path.join(temp_dir, file_name) - writer = open(fpath, 'w') - - def print_to_file(text): - print(text, file=writer) - - try: - layer_utils.print_summary( - model, - print_fn=print_to_file, - expand_nested=True, - show_trainable=True) - self.assertTrue(tf.io.gfile.exists(fpath)) - writer.close() - reader = open(fpath, 'r') - lines = reader.readlines() - reader.close() - check_str = ( - 'Model: ' - '"model_2"\n____________________________________________________________________________\n' - ' Layer (type) Output Shape Param # ' - 'Trainable ' - '\n============================================================================\n' - ' input3 (InputLayer) [(None, None, None, 3)] 0 Y' - ' \n' - ' ' - '\n model_1 (Functional) (None, None, None, 3) 24 ' - 'Y ' - '\n|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n|' - ' input1 (InputLayer) [(None, None, None, 3)] 0 Y' - ' |\n|' - ' ' - '|\n| model (Functional) (None, None, None, 3) 24 ' - 'Y ' - '|\n||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||\n||' - ' input2 (InputLayer) [(None, None, None, 3)] 0 Y' - ' ||\n||' - ' ' - '||\n|| conv2d (Conv2D) (None, None, None, 3) 12 ' - 'N ||\n||' - ' ' - '||\n|| batch_normalization (BatchN (None, None, None, 3) 12 ' - 'Y ||\n|| ormalization)' - ' ' - '||\n|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n============================================================================\nTotal' - ' params: 24\nTrainable params: 6\nNon-trainable params: ' - '18\n____________________________________________________________________________\n' - '____________________________________________________________________________\n' - ) - - fin_str = '' - for line in lines: - fin_str += line - - self.assertIn(fin_str, check_str) - self.assertEqual(len(lines), 25) - except ImportError: - pass - - def test_property_cache(self): - test_counter = collections.Counter() - - class MyObject(tf.__internal__.tracking.AutoTrackable): - - def __init__(self): - super().__init__() - self._frozen = True - - def __setattr__(self, key, value): - """Enforce that cache does not set attribute on MyObject.""" - if getattr(self, '_frozen', False): - raise ValueError('Cannot mutate when frozen.') - return super().__setattr__(key, value) - - @property - @layer_utils.cached_per_instance - def test_property(self): - test_counter[id(self)] += 1 + """Needed for InterfaceTests.test_property_cache_serialization. + + This class must be at the top level. This is a constraint of pickle, + unrelated to `cached_per_instance`. + """ + + @property + @layer_utils.cached_per_instance + def my_id(self): + _PICKLEABLE_CALL_COUNT[self] += 1 return id(self) - first_object = MyObject() - second_object = MyObject() - - # Make sure the objects return the correct values - self.assertEqual(first_object.test_property, id(first_object)) - self.assertEqual(second_object.test_property, id(second_object)) - - # Make sure the cache does not share across objects - self.assertNotEqual(first_object.test_property, second_object.test_property) - - # Check again (Now the values should be cached.) - self.assertEqual(first_object.test_property, id(first_object)) - self.assertEqual(second_object.test_property, id(second_object)) - - # Count the function calls to make sure the cache is actually being used. - self.assertAllEqual(tuple(test_counter.values()), (1, 1)) - - def test_property_cache_threaded(self): - call_count = collections.Counter() - - class MyObject(tf.__internal__.tracking.AutoTrackable): - - @property - @layer_utils.cached_per_instance - def test_property(self): - # Random sleeps to ensure that the execution thread changes - # mid-computation. - call_count['test_property'] += 1 - time.sleep(np.random.random() + 1.) - - # Use a RandomState which is seeded off the instance's id (the mod is - # because numpy limits the range of seeds) to ensure that an instance - # returns the same value in different threads, but different instances - # return different values. - return int(np.random.RandomState(id(self) % (2 ** 31)).randint(2 ** 16)) - - def get_test_property(self, _): - """Function provided to .map for threading test.""" - return self.test_property - - # Test that multiple threads return the same value. This requires that - # the underlying function is repeatable, as cached_property makes no attempt - # to prioritize the first call. - test_obj = MyObject() - with contextlib.closing(multiprocessing.dummy.Pool(32)) as pool: - # Intentionally make a large pool (even when there are only a small number - # of cpus) to ensure that the runtime switches threads. - results = pool.map(test_obj.get_test_property, range(64)) - self.assertEqual(len(set(results)), 1) - - # Make sure we actually are testing threaded behavior. - self.assertGreater(call_count['test_property'], 1) - - # Make sure new threads still cache hit. - with contextlib.closing(multiprocessing.dummy.Pool(2)) as pool: - start_time = timeit.default_timer() # Don't time pool instantiation. - results = pool.map(test_obj.get_test_property, range(4)) - total_time = timeit.default_timer() - start_time - - # Note(taylorrobie): The reason that it is safe to time a unit test is that - # a cache hit will be << 1 second, and a cache miss is - # guaranteed to be >= 1 second. Empirically confirmed by - # 100,000 runs with no flakes. - self.assertLess(total_time, 0.95) - - def test_property_cache_serialization(self): - # Reset call count. .keys() must be wrapped in a list, because otherwise we - # would mutate the iterator while iterating. - for k in list(_PICKLEABLE_CALL_COUNT.keys()): - _PICKLEABLE_CALL_COUNT.pop(k) - - first_instance = MyPickleableObject() - self.assertEqual(id(first_instance), first_instance.my_id) - - # Test that we can pickle and un-pickle - second_instance = pickle.loads(pickle.dumps(first_instance)) - - self.assertEqual(id(second_instance), second_instance.my_id) - self.assertNotEqual(first_instance.my_id, second_instance.my_id) - - # Make sure de-serialized object uses the cache. - self.assertEqual(_PICKLEABLE_CALL_COUNT[second_instance], 1) - - # Make sure the decorator cache is not being serialized with the object. - expected_size = len(pickle.dumps(second_instance)) - for _ in range(5): - # Add some more entries to the cache. - _ = MyPickleableObject().my_id - self.assertEqual(len(_PICKLEABLE_CALL_COUNT), 7) - size_check_instance = MyPickleableObject() - _ = size_check_instance.my_id - self.assertEqual(expected_size, len(pickle.dumps(size_check_instance))) - - -if __name__ == '__main__': - tf.test.main() + +class LayerUtilsTest(tf.test.TestCase, parameterized.TestCase): + def setUp(self): + super().setUp() + # Reset the UID so that all the layer/model ID will always start with 1. + # This will help remove the undetermined IDs from the model.summary() + backend.reset_uids() + + def test_print_summary(self): + model = keras.Sequential() + model.add( + keras.layers.Conv2D( + filters=2, + kernel_size=(2, 3), + input_shape=(3, 5, 5), + name="conv", + ) + ) + model.add(keras.layers.Flatten(name="flat")) + model.add(keras.layers.Dense(5, name="dense")) + + file_name = "model_1.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary(model, print_fn=print_to_file) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + with open(fpath, "r") as reader: + lines = reader.readlines() + self.assertEqual(len(lines), 15) + except ImportError: + pass + + def test_print_summary_without_print_fn(self): + model = keras.Sequential( + [keras.layers.Dense(5, input_shape=(10,), name="dense")] + ) + io_utils.enable_interactive_logging() + with self.captureWritesToStream(sys.stdout) as printed: + layer_utils.print_summary(model) + self.assertIn("dense (Dense)", printed.contents()) + + def test_print_summary_format_long_names(self): + shape = (8, 8, 3) + + model = keras.Sequential( + [ + keras.Input(shape), + keras.layers.Conv2D(4, 3, name="Really-Long-name-test"), + keras.layers.Conv2D(4, 3, name="Another-long-name-test"), + keras.layers.Flatten(), + keras.layers.Dense(2, name="long-name-test-output"), + ] + ) + file_name = "sequential.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + layer_utils.print_summary(model, print_fn=print_to_file) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + reader = open(fpath, "r") + lines = reader.readlines() + reader.close() + check_str = ( + 'Model: "sequential"\n' + "_________________________________________________________________\n" # noqa: E501 + " Layer (type) Output Shape Param # \n" # noqa: E501 + "=================================================================\n" # noqa: E501 + " Really-Long-name-test (Con (None, 6, 6, 4) 112 \n" # noqa: E501 + " v2D) \n" # noqa: E501 + " \n" # noqa: E501 + " Another-long-name-test (Co (None, 4, 4, 4) 148 \n" # noqa: E501 + " nv2D) \n" # noqa: E501 + " \n" # noqa: E501 + " flatten (Flatten) (None, 64) 0 \n" # noqa: E501 + " \n" # noqa: E501 + " long-name-test-output (Den (None, 2) 130 \n" # noqa: E501 + " se) \n" # noqa: E501 + " \n" # noqa: E501 + "=================================================================\n" # noqa: E501 + "Total params: 390 (1.52 KB)\n" + "Trainable params: 390 (1.52 KB)\n" + "Non-trainable params: 0 (0.00 Byte)\n" + "_________________________________________________________________\n" # noqa: E501 + ) + fin_str = "".join(lines) + self.assertIn(fin_str, check_str) + self.assertEqual(len(lines), 20) + + def test_print_summary_expand_nested(self): + shape = (None, None, 3) + + def make_model(): + x = inputs = keras.Input(shape) + x = keras.layers.Conv2D(3, 1)(x) + x = keras.layers.BatchNormalization()(x) + return keras.Model(inputs, x) + + x = inner_inputs = keras.Input(shape) + x = make_model()(x) + inner_model = keras.Model(inner_inputs, x) + + inputs = keras.Input(shape) + model = keras.Model(inputs, inner_model(inputs)) + + file_name = "model_2.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary( + model, print_fn=print_to_file, expand_nested=True + ) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + reader = open(fpath, "r") + lines = reader.readlines() + reader.close() + check_str = ( + 'Model: "model_2"\n' + "_________________________________________________________________\n" # noqa: E501 + " Layer (type) Output Shape Param # \n" # noqa: E501 + "=================================================================\n" # noqa: E501 + " input_3 (InputLayer) [(None, None, None, 3)] 0 \n" # noqa: E501 + " \n" # noqa: E501 + " model_1 (Functional) (None, None, None, 3) 24 \n" # noqa: E501 + "|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n" # noqa: E501 + "| input_1 (InputLayer) [(None, None, None, 3)] 0 |\n" # noqa: E501 + "| |\n" # noqa: E501 + "| model (Functional) (None, None, None, 3) 24 |\n" # noqa: E501 + "||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||\n" # noqa: E501 + "|| input_2 (InputLayer) [(None, None, None, 3)] 0 ||\n" # noqa: E501 + "|| ||\n" # noqa: E501 + "|| conv2d (Conv2D) (None, None, None, 3) 12 ||\n" # noqa: E501 + "|| ||\n" # noqa: E501 + "|| batch_normalization (Bat (None, None, None, 3) 12 ||\n" # noqa: E501 + "|| chNormalization) ||\n" # noqa: E501 + "|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n" # noqa: E501 + "¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n" # noqa: E501 + "=================================================================\n" # noqa: E501 + "Total params: 24 (96.00 Byte)\n" + "Trainable params: 18 (72.00 Byte)\n" + "Non-trainable params: 6 (24.00 Byte)\n" + "_________________________________________________________________\n" # noqa: E501 + ) + + fin_str = "".join(lines) + + self.assertIn(fin_str, check_str) + self.assertEqual(len(lines), 25) + except ImportError: + pass + + def test_summary_subclass_model_expand_nested(self): + class Sequential(keras.Model): + def __init__(self, *args): + super().__init__() + self.module_list = list(args) if args else [] + + def call(self, x): + for module in self.module_list: + x = module(x) + return x + + class Block(keras.Model): + def __init__(self): + super().__init__() + self.module = Sequential( + keras.layers.Dense(10), + keras.layers.Dense(10), + ) + + def call(self, input_tensor): + x = self.module(input_tensor) + return x + + class Base(keras.Model): + def __init__(self): + super().__init__() + self.module = Sequential(Block(), Block()) + + def call(self, input_tensor): + x = self.module(input_tensor) + y = self.module(x) + return x, y + + class Network(keras.Model): + def __init__(self): + super().__init__() + self.child = Base() + + def call(self, inputs): + return self.child(inputs) + + net = Network() + inputs = keras.Input(shape=(10,)) + outputs = net(inputs) + model = keras.models.Model(inputs=inputs, outputs=outputs) + + file_name = "model_3.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary( + model, + line_length=120, + print_fn=print_to_file, + expand_nested=True, + ) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + with open(fpath, "r") as reader: + lines = reader.readlines() + # The output content are slightly different for the input shapes + # between v1 and v2. + if tf.__internal__.tf2.enabled(): + self.assertEqual(len(lines), 39) + else: + self.assertEqual(len(lines), 40) + except ImportError: + pass + + def test_print_summary_show_trainable(self): + model = keras.Sequential(name="trainable") + untrained = keras.layers.Conv2D( + filters=2, kernel_size=(2, 3), input_shape=(3, 5, 5), name="conv" + ) + model.add(untrained) + model.add(keras.layers.Flatten(name="flat")) + model.add(keras.layers.Dense(5, name="dense")) + + untrained.trainable = False + + file_name = "model_4.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary( + model, print_fn=print_to_file, show_trainable=True + ) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + with open(fpath, "r") as reader: + lines = reader.readlines() + check_str = ( + 'Model: "trainable"\n' + "____________________________________________________________________________\n" # noqa: E501 + " Layer (type) Output Shape Param # Trainable \n" # noqa: E501 + "============================================================================\n" # noqa: E501 + " conv (Conv2D) (None, 2, 3, 2) 62 N \n" # noqa: E501 + " \n" # noqa: E501 + " flat (Flatten) (None, 12) 0 Y \n" # noqa: E501 + " \n" # noqa: E501 + " dense (Dense) (None, 5) 65 Y \n" # noqa: E501 + " \n" # noqa: E501 + "============================================================================\n" # noqa: E501 + "Total params: 127 (508.00 Byte)\n" + "Trainable params: 65 (260.00 Byte)\n" + "Non-trainable params: 62 (248.00 Byte)\n" + "____________________________________________________________________________\n" # noqa: E501 + "____________________________________________________________________________\n" # noqa: E501 + ) + + fin_str = "".join(lines) + + self.assertIn(fin_str, check_str) + self.assertEqual(len(lines), 15) + except ImportError: + pass + + def test_print_summary_expand_nested_show_trainable(self): + shape = (None, None, 3) + + def make_model(): + x = inputs = keras.Input(shape, name="input2") + untrainable = keras.layers.Conv2D(3, 1) + untrainable.trainable = False + x = untrainable(x) + x = keras.layers.BatchNormalization()(x) + return keras.Model(inputs, x) + + x = inner_inputs = keras.Input(shape, name="input1") + x = make_model()(x) + inner_model = keras.Model(inner_inputs, x) + + inputs = keras.Input(shape, name="input3") + model = keras.Model(inputs, inner_model(inputs)) + + file_name = "model_6.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary( + model, + print_fn=print_to_file, + expand_nested=True, + show_trainable=True, + ) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + with open(fpath, "r") as reader: + lines = reader.readlines() + check_str = ( + 'Model: "model_2"\n' + "____________________________________________________________________________\n" # noqa: E501 + " Layer (type) Output Shape Param # Trainable \n" # noqa: E501 + "============================================================================\n" # noqa: E501 + " input3 (InputLayer) [(None, None, None, 3)] 0 Y \n" # noqa: E501 + " \n" # noqa: E501 + " model_1 (Functional) (None, None, None, 3) 24 Y \n" # noqa: E501 + "|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n" # noqa: E501 + "| input1 (InputLayer) [(None, None, None, 3)] 0 Y |\n" # noqa: E501 + "| |\n" # noqa: E501 + "| model (Functional) (None, None, None, 3) 24 Y |\n" # noqa: E501 + "||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||\n" # noqa: E501 + "|| input2 (InputLayer) [(None, None, None, 3)] 0 Y ||\n" # noqa: E501 + "|| ||\n" # noqa: E501 + "|| conv2d (Conv2D) (None, None, None, 3) 12 N ||\n" # noqa: E501 + "|| ||\n" # noqa: E501 + "|| batch_normalization (Bat (None, None, None, 3) 12 Y ||\n" # noqa: E501 + "|| chNormalization) ||\n" # noqa: E501 + "|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n" # noqa: E501 + "¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n" # noqa: E501 + "============================================================================\n" # noqa: E501 + "Total params: 24 (96.00 Byte)\n" + "Trainable params: 6 (24.00 Byte)\n" + "Non-trainable params: 18 (72.00 Byte)\n" + "____________________________________________________________________________\n" # noqa: E501 + ) + + fin_str = "".join(lines) + + self.assertIn(fin_str, check_str) + self.assertEqual(len(lines), 25) + except ImportError: + pass + + def test_print_summary_layer_range(self): + model = keras.Sequential() + model.add( + keras.layers.Conv2D( + filters=2, + kernel_size=(2, 3), + input_shape=(3, 5, 5), + name="conv", + ) + ) + model.add(keras.layers.Flatten(name="flat")) + model.add(keras.layers.Dense(5, name="dense")) + + file_name = "model_7.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary( + model, print_fn=print_to_file, layer_range=["conv", "flat"] + ) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + with open(fpath, "r") as reader: + lines = reader.readlines() + # The expected lenght with no layer filter is 15 + # we filtered out 2 lines by excluding the layer 'dense' + self.assertEqual(len(lines), 15 - 2) + except ImportError: + pass + + def test_print_summary_layer_range_with_expand_nested(self): + shape = (None, None, 3) + + def make_model(): + x = inputs = keras.Input(shape, name="input_2") + x = keras.layers.Conv2D(3, 1)(x) + x = keras.layers.BatchNormalization()(x) + return keras.Model(inputs, x, name="2nd_inner") + + x = inner_inputs = keras.Input(shape, name="input_1") + x = make_model()(x) + inner_model = keras.Model(inner_inputs, x, name="1st_inner") + + inputs = keras.Input(shape, name="input_3") + model = keras.Model(inputs, inner_model(inputs)) + + file_name = "model_8.txt" + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + fpath = os.path.join(temp_dir, file_name) + writer = open(fpath, "w") + + def print_to_file(text): + print(text, file=writer) + + try: + layer_utils.print_summary( + model, + print_fn=print_to_file, + expand_nested=True, + layer_range=["1st_inner", "1st_inner"], + ) + layer_utils.print_summary( + model, + expand_nested=True, + layer_range=["1st_inner", "1st_inner"], + ) + self.assertTrue(tf.io.gfile.exists(fpath)) + writer.close() + with open(fpath, "r") as reader: + lines = reader.readlines() + check_str = ( + 'Model: "model"\n' + "_________________________________________________________________\n" # noqa: E501 + " Layer (type) Output Shape Param # \n" # noqa: E501 + "=================================================================\n" # noqa: E501 + " 1st_inner (Functional) (None, None, None, 3) 24 \n" # noqa: E501 + "|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n" # noqa: E501 + "| input_1 (InputLayer) [(None, None, None, 3)] 0 |\n" # noqa: E501 + "| |\n" # noqa: E501 + "| 2nd_inner (Functional) (None, None, None, 3) 24 |\n" # noqa: E501 + "||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||\n" # noqa: E501 + "|| input_2 (InputLayer) [(None, None, None, 3)] 0 ||\n" # noqa: E501 + "|| ||\n" # noqa: E501 + "|| conv2d (Conv2D) (None, None, None, 3) 12 ||\n" # noqa: E501 + "|| ||\n" # noqa: E501 + "|| batch_normalization (Bat (None, None, None, 3) 12 ||\n" # noqa: E501 + "|| chNormalization) ||\n" # noqa: E501 + "|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|\n" # noqa: E501 + "¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n" # noqa: E501 + "=================================================================\n" # noqa: E501 + "Total params: 24 (96.00 Byte)\n" + "Trainable params: 18 (72.00 Byte)\n" + "Non-trainable params: 6 (24.00 Byte)\n" + "_________________________________________________________________\n" # noqa: E501 + ) + + check_lines = check_str.split("\n")[ + :-1 + ] # Removing final empty string which is not a line + + fin_str = "".join(lines) + self.assertIn(fin_str, check_str) + self.assertEqual(len(lines), len(check_lines)) + except ImportError: + pass + + def test_weight_memory_size(self): + v1 = tf.Variable(tf.zeros(shape=(1, 2), dtype=tf.float32)) + v2 = tf.Variable(tf.zeros(shape=(2, 3), dtype=tf.float64)) + v3 = tf.Variable(tf.zeros(shape=(4, 5), dtype=tf.int16)) + v4 = tf.Variable(tf.zeros(shape=(6,), dtype=tf.uint8)) + + weights = [v1, v1, v2, v3, v4] + weight_memory_size = layer_utils.weight_memory_size(weights) + expected_memory_size = 1 * 2 * 4 + 2 * 3 * 8 + 4 * 5 * 2 + 6 * 1 + self.assertEqual(weight_memory_size, expected_memory_size) + + @parameterized.parameters( + (0, "0.00 Byte"), + (1000, "1000.00 Byte"), + (1024, "1.00 KB"), + (1024 * 2 - 1, "2.00 KB"), + (1024 * 2 + 1, "2.00 KB"), + (1024**2 + 1, "1.00 MB"), + (1024**3 - 1, "1024.00 MB"), + (1024**3, "1.00 GB"), + (1024**4, "1.00 TB"), + (1024**5, "1.00 PB"), + (1024**5 * 1.41415, "1.41 PB"), + ) + def test_readable_weight_memory_size(self, size, expected_result): + result = layer_utils.readable_memory_size(size) + self.assertEqual(result, expected_result) + + def test_property_cache(self): + test_counter = collections.Counter() + + class MyObject(tf.__internal__.tracking.AutoTrackable): + def __init__(self): + super().__init__() + self._frozen = True + + def __setattr__(self, key, value): + """Enforce that cache does not set attribute on MyObject.""" + if getattr(self, "_frozen", False): + raise ValueError("Cannot mutate when frozen.") + return super().__setattr__(key, value) + + @property + @layer_utils.cached_per_instance + def test_property(self): + test_counter[id(self)] += 1 + return id(self) + + first_object = MyObject() + second_object = MyObject() + + # Make sure the objects return the correct values + self.assertEqual(first_object.test_property, id(first_object)) + self.assertEqual(second_object.test_property, id(second_object)) + + # Make sure the cache does not share across objects + self.assertNotEqual( + first_object.test_property, second_object.test_property + ) + + # Check again (Now the values should be cached.) + self.assertEqual(first_object.test_property, id(first_object)) + self.assertEqual(second_object.test_property, id(second_object)) + + # Count the function calls to make sure the cache is actually being + # used. + self.assertAllEqual(tuple(test_counter.values()), (1, 1)) + + def test_property_cache_threaded(self): + call_count = collections.Counter() + + class MyObject(tf.__internal__.tracking.AutoTrackable): + @property + @layer_utils.cached_per_instance + def test_property(self): + # Random sleeps to ensure that the execution thread changes + # mid-computation. + call_count["test_property"] += 1 + time.sleep(np.random.random() + 1.0) + + # Use a RandomState which is seeded off the instance's id (the + # mod is because numpy limits the range of seeds) to ensure that + # an instance returns the same value in different threads, but + # different instances return different values. + return int( + np.random.RandomState(id(self) % (2**31)).randint(2**16) + ) + + def get_test_property(self, _): + """Function provided to .map for threading test.""" + return self.test_property + + # Test that multiple threads return the same value. This requires that + # the underlying function is repeatable, as cached_property makes no + # attempt to prioritize the first call. + test_obj = MyObject() + with contextlib.closing(multiprocessing.dummy.Pool(32)) as pool: + # Intentionally make a large pool (even when there are only a small + # number of cpus) to ensure that the runtime switches threads. + results = pool.map(test_obj.get_test_property, range(64)) + self.assertEqual(len(set(results)), 1) + + # Make sure we actually are testing threaded behavior. + self.assertGreater(call_count["test_property"], 1) + + # Make sure new threads still cache hit. + with contextlib.closing(multiprocessing.dummy.Pool(2)) as pool: + start_time = ( + timeit.default_timer() + ) # Don't time pool instantiation. + results = pool.map(test_obj.get_test_property, range(4)) + total_time = timeit.default_timer() - start_time + + # Note(taylorrobie): The reason that it is safe to time a unit test is + # that a cache hit will be << 1 second, and a cache miss is guaranteed + # to be >= 1 second. Empirically confirmed by 100,000 runs with no + # flakes. + self.assertLess(total_time, 0.95) + + def test_property_cache_serialization(self): + # Reset call count. .keys() must be wrapped in a list, because otherwise + # we would mutate the iterator while iterating. + for k in list(_PICKLEABLE_CALL_COUNT.keys()): + _PICKLEABLE_CALL_COUNT.pop(k) + + first_instance = MyPickleableObject() + self.assertEqual(id(first_instance), first_instance.my_id) + + # Test that we can pickle and un-pickle + second_instance = pickle.loads(pickle.dumps(first_instance)) + + self.assertEqual(id(second_instance), second_instance.my_id) + self.assertNotEqual(first_instance.my_id, second_instance.my_id) + + # Make sure de-serialized object uses the cache. + self.assertEqual(_PICKLEABLE_CALL_COUNT[second_instance], 1) + + # Make sure the decorator cache is not being serialized with the object. + expected_size = len(pickle.dumps(second_instance)) + for _ in range(5): + # Add some more entries to the cache. + _ = MyPickleableObject().my_id + self.assertEqual(len(_PICKLEABLE_CALL_COUNT), 7) + size_check_instance = MyPickleableObject() + _ = size_check_instance.my_id + self.assertEqual(expected_size, len(pickle.dumps(size_check_instance))) + + def test_warmstart_embedding_matrix_with_list(self): + vocab_base = ["unk", "a", "b", "c"] + vocab_new = ["unk", "unk", "a", "b", "c", "d", "e"] + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + vectorized_vocab_new = np.random.rand(len(vocab_new), 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer=keras.initializers.Constant( + vectorized_vocab_new + ), + ) + self.assertAllEqual( + warmstarted_embedding_matrix[2], + vectorized_vocab_base[1], + ) + + def test_warmstart_embedding_matrix_with_nparray(self): + vocab_base = np.array(["unk", "a", "b", "c"]) + vocab_new = np.array(["unk", "unk", "a", "b", "c", "d", "e"]) + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + vectorized_vocab_new = np.random.rand(len(vocab_new), 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer=keras.initializers.Constant( + vectorized_vocab_new + ), + ) + self.assertAllEqual( + warmstarted_embedding_matrix[2], + vectorized_vocab_base[1], + ) + + @test_utils.run_v2_only + def test_warmstart_embedding_matrix_with_tensor(self): + vocab_base = tf.convert_to_tensor(["unk", "a", "b", "c"]) + vocab_new = tf.convert_to_tensor( + ["unk", "unk", "a", "b", "c", "d", "e"] + ) + vectorized_vocab_base = np.random.rand(vocab_base.shape[0], 3) + vectorized_vocab_new = np.random.rand(vocab_new.shape[0], 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer=keras.initializers.Constant( + vectorized_vocab_new + ), + ) + self.assertAllEqual( + warmstarted_embedding_matrix[2], + vectorized_vocab_base[1], + ) + + def test_warmstart_embedding_matrix_with_file_name(self): + def _write_list_to_file(filename, content_list): + with tf.io.gfile.GFile(filename, "w") as output_file: + for line in content_list: + output_file.write(line + "\n") + + vocab_base = ["UNK", "a", "b", "c"] + vocab_base_file = tempfile.mktemp(".tsv") + _write_list_to_file(vocab_base_file, vocab_base) + vocab_new = ["UNK", "UNK", "a", "b", "c", "d", "e"] + vocab_new_file = tempfile.mktemp(".tsv") + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + vectorized_vocab_new = np.random.rand(len(vocab_new), 3) + _write_list_to_file(vocab_new_file, vocab_new) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base_file, + new_vocabulary=vocab_new_file, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer=keras.initializers.Constant( + vectorized_vocab_new + ), + ) + self.assertAllEqual( + warmstarted_embedding_matrix[3], + vectorized_vocab_base[2], + ) + + def test_warmstart_default_initialization(self): + def _write_list_to_file(filename, content_list): + with tf.io.gfile.GFile(filename, "w") as output_file: + for line in content_list: + output_file.write(line + "\n") + + vocab_base = ["UNK", "a", "b", "c"] + vocab_base_file = tempfile.mktemp(".tsv") + _write_list_to_file(vocab_base_file, vocab_base) + vocab_new = ["UNK", "UNK", "a", "b", "c", "d", "e"] + vocab_new_file = tempfile.mktemp(".tsv") + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + _write_list_to_file(vocab_new_file, vocab_new) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base_file, + new_vocabulary=vocab_new_file, + base_embeddings=vectorized_vocab_base, + ) + self.assertAllEqual( + warmstarted_embedding_matrix[3], + vectorized_vocab_base[2], + ) + + def test_warmstart_default_value(self): + vocab_base = np.array(["unk", "a", "b", "c"]) + vocab_new = np.array(["unk", "unk", "a", "b", "c", "d", "e"]) + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + ) + self.assertAllEqual( + warmstarted_embedding_matrix[2], + vectorized_vocab_base[1], + ) + + def test_warmstart_with_randomuniform_initializer(self): + vocab_base = np.array(["unk", "a", "b", "c"]) + vocab_new = np.array(["unk", "unk", "a", "b", "c", "d", "e"]) + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer="RandomUniform", + ) + self.assertAllEqual( + warmstarted_embedding_matrix[2], + vectorized_vocab_base[1], + ) + + def test_warmstart_with_nothing_in_common(self): + vocab_base = np.array(["unk", "a", "b", "c"]) + vocab_new = np.array(["d", "e", "f", "g", "h"]) + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + vectorized_vocab_new = np.random.rand(len(vocab_new), 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer=keras.initializers.Constant( + vectorized_vocab_new + ), + ) + self.assertAllEqual( + warmstarted_embedding_matrix, + vectorized_vocab_new, + ) + + def test_warmstart_with_new_vocab_smaller(self): + vocab_base = np.array(["unk", "a", "b", "c"]) + vocab_new = np.array(["d", "e", "f", "a"]) + vectorized_vocab_base = np.random.rand(len(vocab_base), 3) + warmstarted_embedding_matrix = layer_utils.warmstart_embedding_matrix( + base_vocabulary=vocab_base, + new_vocabulary=vocab_new, + base_embeddings=vectorized_vocab_base, + new_embeddings_initializer="uniform", + ) + self.assertAllEqual( + warmstarted_embedding_matrix[3], + vectorized_vocab_base[1], + ) + + +@test_utils.run_v2_only +class DTensorVariableSummaryTest(test_util.DTensorBaseTest): + def setUp(self): + super().setUp() + backend.reset_uids() + backend.enable_tf_random_generator() + tf_utils.set_random_seed(1337) + global_ids = test_util.create_device_ids_array((2, 2)) + local_device_ids = np.ravel(global_ids).tolist() + mesh_dict = { + "CPU": dtensor.Mesh( + ["batch", "model"], + global_ids, + local_device_ids, + test_util.create_device_list((2, 2), "CPU"), + ) + } + self.mesh = self.configTestMesh(mesh_dict) + self.replicated_2d = dtensor.Layout.replicated(self.mesh, rank=2) + self.replicated_1d = dtensor.Layout.replicated(self.mesh, rank=1) + self.sharded_2d = dtensor.Layout(["model", "batch"], self.mesh) + self.sharded_1d = dtensor.Layout(["model"], self.mesh) + + def test_model_summary(self): + layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) + layout_map["d1.kernel"] = self.replicated_2d + layout_map["d1.bias"] = self.replicated_1d + layout_map["d2.kernel"] = self.sharded_2d + layout_map["d2.bias"] = self.sharded_1d + + with layout_map.scope(): + inputs = layers.Input((10,), batch_size=10) + x = layers.Dense(20, name="d1")(inputs) + x = layers.Dropout(0.1)(x) + output = layers.Dense(30, name="d2")(x) + + model = keras.Model(inputs, output) + + # For dtype = float32, following value are expected from memory stats + expected_result = {} + replicated_var_count = 10 * 20 + 20 # For d1 kernel and bias + model_batch_shard_var_count = 30 * 20 # For d2 kernel + model_shard_var_count = 30 # For d2 bias + expected_result[()] = (replicated_var_count, replicated_var_count * 4) + expected_result[("batch", "model")] = ( + model_batch_shard_var_count, + model_batch_shard_var_count * 4, + ) + expected_result[("model",)] = ( + model_shard_var_count, + model_shard_var_count * 4, + ) + + expected_total_weight_count = ( + replicated_var_count + + model_batch_shard_var_count + + model_shard_var_count + ) + expected_total_memory_size = expected_total_weight_count * 4 + + ( + total_weight_count, + total_memory_size, + per_sharing_spec_result, + ) = layer_utils.dtensor_variable_summary(model.weights) + + self.assertEqual(total_weight_count, expected_total_weight_count) + self.assertEqual(total_memory_size, expected_total_memory_size) + self.assertDictEqual(per_sharing_spec_result, expected_result) + + output_buffer = io.StringIO() + + def print_to_buffer(content): + output_buffer.write(content) + + model.summary(print_fn=print_to_buffer) + + self.assertRegex( + output_buffer.getvalue(), + f"{replicated_var_count} / {expected_total_weight_count} params " + ".* are fully replicated", + ) + self.assertRegex( + output_buffer.getvalue(), + f"{model_batch_shard_var_count} / {expected_total_weight_count} " + r"params .* are sharded based on spec .*batch.*model" + r".* across 4 devices", + ) + self.assertRegex( + output_buffer.getvalue(), + f"{model_shard_var_count} / {expected_total_weight_count} " + r"params .* are sharded based on spec .*model" + r".* across 2 devices", + ) + self.assertIn( + "Overall per device memory usage: 1.50 KB", output_buffer.getvalue() + ) + self.assertIn("Overall sharding factor: 2.21", output_buffer.getvalue()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/optimizers/optimizer_v2/__init__.py b/keras/utils/legacy/__init__.py similarity index 69% rename from keras/optimizers/optimizer_v2/__init__.py rename to keras/utils/legacy/__init__.py index 78cb171abbaf..d4dd953bea8f 100644 --- a/keras/optimizers/optimizer_v2/__init__.py +++ b/keras/utils/legacy/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,3 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +"""Legacy public Keras utilities.""" + +# isort: off + +# Serialization related +from keras.saving.legacy.serialization import deserialize_keras_object +from keras.saving.legacy.serialization import serialize_keras_object diff --git a/keras/utils/losses_utils.py b/keras/utils/losses_utils.py index ab99e2115793..28a450bce298 100644 --- a/keras/utils/losses_utils.py +++ b/keras/utils/losses_utils.py @@ -12,360 +12,423 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Utilities related to loss functions.""" import tensorflow.compat.v2 as tf + from keras import backend from keras.engine import keras_tensor from keras.utils import tf_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.losses.Reduction', v1=[]) +@keras_export("keras.losses.Reduction", v1=[]) class ReductionV2: - """Types of loss reduction. - - Contains the following values: - - * `AUTO`: Indicates that the reduction option will be determined by the usage - context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When - used with `tf.distribute.Strategy`, outside of built-in training loops such - as `tf.keras` `compile` and `fit`, we expect reduction value to be - `SUM` or `NONE`. Using `AUTO` in that case will raise an error. - * `NONE`: No **additional** reduction is applied to the output of the wrapped - loss function. When non-scalar losses are returned to Keras functions like - `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer - but the reported loss will be a scalar value. - - Caution: **Verify the shape of the outputs when using** `Reduction.NONE`. - The builtin loss functions wrapped by the loss classes reduce - one dimension (`axis=-1`, or `axis` if specified by loss function). - `Reduction.NONE` just means that no **additional** reduction is applied by - the class wrapper. For categorical losses with an example input shape of - `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For - pointwise losses you must include a dummy axis so that `[batch, W, H, 1]` - is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]` - will be incorrectly reduced to `[batch, W]`. - - * `SUM`: Scalar sum of weighted losses. - * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. - This reduction type is not supported when used with - `tf.distribute.Strategy` outside of built-in training loops like `tf.keras` - `compile`/`fit`. - - You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like: - ``` - with strategy.scope(): - loss_obj = tf.keras.losses.CategoricalCrossentropy( - reduction=tf.keras.losses.Reduction.NONE) - .... - loss = tf.reduce_sum(loss_obj(labels, predictions)) * - (1. / global_batch_size) - ``` - - Please see the [custom training guide]( - https://www.tensorflow.org/tutorials/distribute/custom_training) for more - details on this. - """ - - AUTO = 'auto' - NONE = 'none' - SUM = 'sum' - SUM_OVER_BATCH_SIZE = 'sum_over_batch_size' - - @classmethod - def all(cls): - return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE) - - @classmethod - def validate(cls, key): - if key not in cls.all(): - raise ValueError( - f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"') + """Types of loss reduction. + + Contains the following values: + + * `AUTO`: Indicates that the reduction option will be determined by the + usage context. For almost all cases this uses `SUM_OVER_BATCH_SIZE`. + When used with `tf.distribute.Strategy`, outside of built-in training + loops such as `tf.keras` `compile` and `fit`, we expect reduction + value to be `SUM` or `NONE`. Using `AUTO` in that case will raise an + error. + * `NONE`: No **additional** reduction is applied to the output of the + wrapped loss function. When non-scalar losses are returned to Keras + functions like `fit`/`evaluate`, the unreduced vector loss is passed to + the optimizer but the reported loss will be a scalar value. + + Caution: **Verify the shape of the outputs when using** `Reduction.NONE`. + The builtin loss functions wrapped by the loss classes reduce one + dimension (`axis=-1`, or `axis` if specified by loss function). + `Reduction.NONE` just means that no **additional** reduction is applied + by the class wrapper. For categorical losses with an example input shape + of `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For + pointwise losses you must include a dummy axis so that `[batch, W, H, 1]` + is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]` + will be incorrectly reduced to `[batch, W]`. + + * `SUM`: Scalar sum of weighted losses. + * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in + losses. This reduction type is not supported when used with + `tf.distribute.Strategy` outside of built-in training loops like + `tf.keras` `compile`/`fit`. + + You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like: + ``` + with strategy.scope(): + loss_obj = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE) + .... + loss = tf.reduce_sum(loss_obj(labels, predictions)) * + (1. / global_batch_size) + ``` + + Please see the [custom training guide]( + https://www.tensorflow.org/tutorials/distribute/custom_training) for more + details on this. + """ + + AUTO = "auto" + NONE = "none" + SUM = "sum" + SUM_OVER_BATCH_SIZE = "sum_over_batch_size" + + @classmethod + def all(cls): + return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE) + + @classmethod + def validate(cls, key): + if key not in cls.all(): + raise ValueError( + f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"' + ) def remove_squeezable_dimensions( - labels, predictions, expected_rank_diff=0, name=None): - """Squeeze last dim if ranks differ from expected by exactly 1. - - In the common case where we expect shapes to match, `expected_rank_diff` - defaults to 0, and we squeeze the last dimension of the larger rank if they - differ by 1. - - But, for example, if `labels` contains class IDs and `predictions` contains 1 - probability per class, we expect `predictions` to have 1 more dimension than - `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze - `labels` if `rank(predictions) - rank(labels) == 0`, and - `predictions` if `rank(predictions) - rank(labels) == 2`. - - This will use static shape if available. Otherwise, it will add graph - operations, which could result in a performance hit. - - Args: - labels: Label values, a `Tensor` whose dimensions match `predictions`. - predictions: Predicted values, a `Tensor` of arbitrary dimensions. - expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. - name: Name of the op. - - Returns: - Tuple of `labels` and `predictions`, possibly with last dim squeezed. - """ - with backend.name_scope(name or 'remove_squeezable_dimensions'): - if not tf_utils.is_tensor_or_extension_type(predictions): - predictions = tf.convert_to_tensor(predictions) - if not tf_utils.is_tensor_or_extension_type(labels): - labels = tf.convert_to_tensor(labels) - predictions_shape = predictions.shape - predictions_rank = predictions_shape.ndims - labels_shape = labels.shape - labels_rank = labels_shape.ndims - if (labels_rank is not None) and (predictions_rank is not None): - # Use static rank. - rank_diff = predictions_rank - labels_rank - if (rank_diff == expected_rank_diff + 1 and - predictions_shape.dims[-1].is_compatible_with(1)): - predictions = tf.squeeze(predictions, [-1]) - elif (rank_diff == expected_rank_diff - 1 and - labels_shape.dims[-1].is_compatible_with(1)): - labels = tf.squeeze(labels, [-1]) - return labels, predictions - - # Use dynamic rank. - rank_diff = tf.rank(predictions) - tf.rank(labels) - if (predictions_rank is None) or ( - predictions_shape.dims[-1].is_compatible_with(1)): - predictions = tf.cond( - tf.equal(expected_rank_diff + 1, rank_diff), - lambda: tf.squeeze(predictions, [-1]), - lambda: predictions) - if (labels_rank is None) or ( - labels_shape.dims[-1].is_compatible_with(1)): - labels = tf.cond( - tf.equal(expected_rank_diff - 1, rank_diff), - lambda: tf.squeeze(labels, [-1]), - lambda: labels) - return labels, predictions + labels, predictions, expected_rank_diff=0, name=None +): + """Squeeze last dim if ranks differ from expected by exactly 1. + + In the common case where we expect shapes to match, `expected_rank_diff` + defaults to 0, and we squeeze the last dimension of the larger rank if they + differ by 1. + + But, for example, if `labels` contains class IDs and `predictions` contains + 1 probability per class, we expect `predictions` to have 1 more dimension + than `labels`, so `expected_rank_diff` would be 1. In this case, we'd + squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and + `predictions` if `rank(predictions) - rank(labels) == 2`. + + This will use static shape if available. Otherwise, it will add graph + operations, which could result in a performance hit. + + Args: + labels: Label values, a `Tensor` whose dimensions match `predictions`. + predictions: Predicted values, a `Tensor` of arbitrary dimensions. + expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. + name: Name of the op. + + Returns: + Tuple of `labels` and `predictions`, possibly with last dim squeezed. + """ + with backend.name_scope(name or "remove_squeezable_dimensions"): + if not tf_utils.is_tensor_or_extension_type(predictions): + predictions = tf.convert_to_tensor(predictions) + if not tf_utils.is_tensor_or_extension_type(labels): + labels = tf.convert_to_tensor(labels) + predictions_shape = predictions.shape + predictions_rank = predictions_shape.ndims + labels_shape = labels.shape + labels_rank = labels_shape.ndims + if (labels_rank is not None) and (predictions_rank is not None): + # Use static rank. + rank_diff = predictions_rank - labels_rank + if rank_diff == expected_rank_diff + 1 and predictions_shape.dims[ + -1 + ].is_compatible_with(1): + predictions = tf.squeeze(predictions, [-1]) + elif rank_diff == expected_rank_diff - 1 and labels_shape.dims[ + -1 + ].is_compatible_with(1): + labels = tf.squeeze(labels, [-1]) + return labels, predictions + + # Use dynamic rank. + rank_diff = tf.rank(predictions) - tf.rank(labels) + if (predictions_rank is None) or ( + predictions_shape.dims[-1].is_compatible_with(1) + ): + predictions = tf.cond( + tf.equal(expected_rank_diff + 1, rank_diff), + lambda: tf.squeeze(predictions, [-1]), + lambda: predictions, + ) + if (labels_rank is None) or ( + labels_shape.dims[-1].is_compatible_with(1) + ): + labels = tf.cond( + tf.equal(expected_rank_diff - 1, rank_diff), + lambda: tf.squeeze(labels, [-1]), + lambda: labels, + ) + return labels, predictions def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): - """Squeeze or expand last dimension if needed. - - 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 - (using `remove_squeezable_dimensions`). - 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 - from the new rank of `y_pred`. - If `sample_weight` is scalar, it is kept scalar. - - This will use static shape if available. Otherwise, it will add graph - operations, which could result in a performance hit. - - Args: - y_pred: Predicted values, a `Tensor` of arbitrary dimensions. - y_true: Optional label `Tensor` whose dimensions match `y_pred`. - sample_weight: Optional weight scalar or `Tensor` whose dimensions match - `y_pred`. - - Returns: - Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has - the last dimension squeezed, - `sample_weight` could be extended by one dimension. - If `sample_weight` is None, (y_pred, y_true) is returned. - """ - y_pred_shape = y_pred.shape - y_pred_rank = y_pred_shape.ndims - if y_true is not None: - - # If sparse matrix is provided as `y_true`, the last dimension in `y_pred` - # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), - # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) - # In this case, we should not try to remove squeezable dimension. - y_true_shape = y_true.shape - y_true_rank = y_true_shape.ndims - if (y_true_rank is not None) and (y_pred_rank is not None): - # Use static rank for `y_true` and `y_pred`. - if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: - y_true, y_pred = remove_squeezable_dimensions( - y_true, y_pred) - else: - # Use dynamic rank. - rank_diff = tf.rank(y_pred) - tf.rank(y_true) - squeeze_dims = lambda: remove_squeezable_dimensions( # pylint: disable=g-long-lambda - y_true, y_pred) - is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1]) - maybe_squeeze_dims = lambda: tf.cond( # pylint: disable=g-long-lambda - is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)) - y_true, y_pred = tf.cond( - tf.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims) - - if sample_weight is None: - return y_pred, y_true - - weights_shape = sample_weight.shape - weights_rank = weights_shape.ndims - if weights_rank == 0: # If weights is scalar, do nothing. - return y_pred, y_true, sample_weight + """Squeeze or expand last dimension if needed. + + 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 + (using `remove_squeezable_dimensions`). + 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 + from the new rank of `y_pred`. + If `sample_weight` is scalar, it is kept scalar. + + This will use static shape if available. Otherwise, it will add graph + operations, which could result in a performance hit. + + Args: + y_pred: Predicted values, a `Tensor` of arbitrary dimensions. + y_true: Optional label `Tensor` whose dimensions match `y_pred`. + sample_weight: Optional weight scalar or `Tensor` whose dimensions match + `y_pred`. + + Returns: + Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has + the last dimension squeezed, + `sample_weight` could be extended by one dimension. + If `sample_weight` is None, (y_pred, y_true) is returned. + """ + y_pred_shape = y_pred.shape + y_pred_rank = y_pred_shape.ndims + if y_true is not None: + + # If sparse matrix is provided as `y_true`, the last dimension in + # `y_pred` may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), y_pred = + # [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) In + # this case, we should not try to remove squeezable dimension. + y_true_shape = y_true.shape + y_true_rank = y_true_shape.ndims + if (y_true_rank is not None) and (y_pred_rank is not None): + # Use static rank for `y_true` and `y_pred`. + if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: + y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred) + else: + # Use dynamic rank. + rank_diff = tf.rank(y_pred) - tf.rank(y_true) + squeeze_dims = lambda: remove_squeezable_dimensions(y_true, y_pred) + is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1]) + maybe_squeeze_dims = lambda: tf.cond( + is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred) + ) + y_true, y_pred = tf.cond( + tf.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims + ) + + if sample_weight is None: + return y_pred, y_true + + weights_shape = sample_weight.shape + weights_rank = weights_shape.ndims + if weights_rank == 0: # If weights is scalar, do nothing. + return y_pred, y_true, sample_weight + + if (y_pred_rank is not None) and (weights_rank is not None): + # Use static rank. + if weights_rank - y_pred_rank == 1: + sample_weight = tf.squeeze(sample_weight, [-1]) + elif y_pred_rank - weights_rank == 1: + sample_weight = tf.expand_dims(sample_weight, [-1]) + return y_pred, y_true, sample_weight - if (y_pred_rank is not None) and (weights_rank is not None): - # Use static rank. - if weights_rank - y_pred_rank == 1: - sample_weight = tf.squeeze(sample_weight, [-1]) - elif y_pred_rank - weights_rank == 1: - sample_weight = tf.expand_dims(sample_weight, [-1]) + # Use dynamic rank. + weights_rank_tensor = tf.rank(sample_weight) + rank_diff = weights_rank_tensor - tf.rank(y_pred) + maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1]) + + def _maybe_expand_weights(): + expand_weights = lambda: tf.expand_dims(sample_weight, [-1]) + return tf.cond( + tf.equal(rank_diff, -1), expand_weights, lambda: sample_weight + ) + + def _maybe_adjust_weights(): + return tf.cond( + tf.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights + ) + + # squeeze or expand last dim of `sample_weight` if its rank differs by 1 + # from the new rank of `y_pred`. + sample_weight = tf.cond( + tf.equal(weights_rank_tensor, 0), + lambda: sample_weight, + _maybe_adjust_weights, + ) return y_pred, y_true, sample_weight - # Use dynamic rank. - weights_rank_tensor = tf.rank(sample_weight) - rank_diff = weights_rank_tensor - tf.rank(y_pred) - maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1]) - - def _maybe_expand_weights(): - expand_weights = lambda: tf.expand_dims(sample_weight, [-1]) - return tf.cond( - tf.equal(rank_diff, -1), expand_weights, lambda: sample_weight) - - def _maybe_adjust_weights(): - return tf.cond( - tf.equal(rank_diff, 1), maybe_squeeze_weights, - _maybe_expand_weights) - - # squeeze or expand last dim of `sample_weight` if its rank differs by 1 - # from the new rank of `y_pred`. - sample_weight = tf.cond( - tf.equal(weights_rank_tensor, 0), lambda: sample_weight, - _maybe_adjust_weights) - return y_pred, y_true, sample_weight - def _safe_mean(losses, num_present): - """Computes a safe mean of the losses. + """Computes a safe mean of the losses. - Args: - losses: `Tensor` whose elements contain individual loss measurements. - num_present: The number of measurable elements in `losses`. + Args: + losses: `Tensor` whose elements contain individual loss measurements. + num_present: The number of measurable elements in `losses`. - Returns: - A scalar representing the mean of `losses`. If `num_present` is zero, - then zero is returned. - """ - total_loss = tf.reduce_sum(losses) - return tf.math.divide_no_nan(total_loss, num_present, name='value') + Returns: + A scalar representing the mean of `losses`. If `num_present` is zero, + then zero is returned. + """ + total_loss = tf.reduce_sum(losses) + return tf.math.divide_no_nan(total_loss, num_present, name="value") def _num_elements(losses): - """Computes the number of elements in `losses` tensor.""" - with backend.name_scope('num_elements') as scope: - return tf.cast(tf.size(losses, name=scope), dtype=losses.dtype) - - -def reduce_weighted_loss(weighted_losses, - reduction=ReductionV2.SUM_OVER_BATCH_SIZE): - """Reduces the individual weighted loss measurements.""" - if reduction == ReductionV2.NONE: - loss = weighted_losses - else: - loss = tf.reduce_sum(weighted_losses) - if reduction == ReductionV2.SUM_OVER_BATCH_SIZE: - loss = _safe_mean(loss, _num_elements(weighted_losses)) - return loss - - -@keras_export('keras.__internal__.losses.compute_weighted_loss', v1=[]) -def compute_weighted_loss(losses, - sample_weight=None, - reduction=ReductionV2.SUM_OVER_BATCH_SIZE, - name=None): - """Computes the weighted loss. - - Args: - losses: `Tensor` of shape `[batch_size, d1, ... dN]`. - sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as - `losses`, or be broadcastable to `losses`. - reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. - Default value is `SUM_OVER_BATCH_SIZE`. - name: Optional name for the op. - - Raises: - ValueError: If the shape of `sample_weight` is not compatible with `losses`. - - Returns: - Weighted loss `Tensor` of the same type as `losses`. If `reduction` is - `NONE`, this has the same shape as `losses`; otherwise, it is scalar. - """ - ReductionV2.validate(reduction) - - # If this function is called directly, then we just default 'AUTO' to - # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases. - if reduction == ReductionV2.AUTO: - reduction = ReductionV2.SUM_OVER_BATCH_SIZE - if sample_weight is None: - sample_weight = 1.0 - with backend.name_scope(name or 'weighted_loss'): - # Save the `reduction` argument for loss normalization when distributing - # to multiple replicas. Used only for estimator + v1 optimizer flow. - tf.compat.v1.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access - - if not isinstance(losses, - (keras_tensor.KerasTensor, tf.RaggedTensor)): - losses = tf.convert_to_tensor(losses) - - if not isinstance(sample_weight, - (keras_tensor.KerasTensor, tf.RaggedTensor)): - sample_weight = tf.convert_to_tensor(sample_weight) - - # Convert any non float dtypes to floats, to avoid it loss any precision for - # dtype like int or bool. - if not losses.dtype.is_floating: - input_dtype = losses.dtype - losses = tf.cast(losses, 'float32') - input_casted = True + """Computes the number of elements in `losses` tensor.""" + with backend.name_scope("num_elements") as scope: + return tf.cast(tf.size(losses, name=scope), dtype=losses.dtype) + + +def reduce_weighted_loss( + weighted_losses, reduction=ReductionV2.SUM_OVER_BATCH_SIZE +): + """Reduces the individual weighted loss measurements.""" + if reduction == ReductionV2.NONE: + loss = weighted_losses else: - input_casted = False - sample_weight = tf.cast(sample_weight, losses.dtype) - # Update dimensions of `sample_weight` to match with `losses` if possible. - losses, _, sample_weight = squeeze_or_expand_dimensions( # pylint: disable=unbalanced-tuple-unpacking - losses, None, sample_weight) - weighted_losses = tf.multiply(losses, sample_weight) - - # Apply reduction function to the individual weighted losses. - loss = reduce_weighted_loss(weighted_losses, reduction) - if input_casted: - # Convert the result back to the input type. - loss = tf.cast(loss, input_dtype) + loss = tf.reduce_sum(weighted_losses) + if reduction == ReductionV2.SUM_OVER_BATCH_SIZE: + loss = _safe_mean(loss, _num_elements(weighted_losses)) return loss +@keras_export("keras.__internal__.losses.compute_weighted_loss", v1=[]) +def compute_weighted_loss( + losses, + sample_weight=None, + reduction=ReductionV2.SUM_OVER_BATCH_SIZE, + name=None, +): + """Computes the weighted loss. + + Args: + losses: `Tensor` of shape `[batch_size, d1, ... dN]`. + sample_weight: Optional `Tensor` whose rank is either 0, or the same rank + as `losses`, or be broadcastable to `losses`. + reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `SUM_OVER_BATCH_SIZE`. + name: Optional name for the op. + + Raises: + ValueError: If the shape of `sample_weight` is not compatible with + `losses`. + + Returns: + Weighted loss `Tensor` of the same type as `losses`. If `reduction` is + `NONE`, this has the same shape as `losses`; otherwise, it is scalar. + """ + ReductionV2.validate(reduction) + + # If this function is called directly, then we just default 'AUTO' to + # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases. + if reduction == ReductionV2.AUTO: + reduction = ReductionV2.SUM_OVER_BATCH_SIZE + if sample_weight is None: + sample_weight = 1.0 + with backend.name_scope(name or "weighted_loss"): + # Save the `reduction` argument for loss normalization when distributing + # to multiple replicas. Used only for estimator + v1 optimizer flow. + tf.compat.v1.get_default_graph()._last_loss_reduction = reduction + + if not isinstance(losses, (keras_tensor.KerasTensor, tf.RaggedTensor)): + losses = tf.convert_to_tensor(losses) + + if not isinstance( + sample_weight, (keras_tensor.KerasTensor, tf.RaggedTensor) + ): + sample_weight = tf.convert_to_tensor(sample_weight) + + # Convert any non float dtypes to floats, to avoid it loss any precision + # for dtype like int or bool. + if not losses.dtype.is_floating: + input_dtype = losses.dtype + losses = tf.cast(losses, "float32") + input_casted = True + else: + input_casted = False + sample_weight = tf.cast(sample_weight, losses.dtype) + # Update dimensions of `sample_weight` to match with `losses` if + # possible. + ( + losses, + _, + sample_weight, + ) = squeeze_or_expand_dimensions(losses, None, sample_weight) + weighted_losses = tf.multiply(losses, sample_weight) + + # Apply reduction function to the individual weighted losses. + loss = reduce_weighted_loss(weighted_losses, reduction) + if input_casted: + # Convert the result back to the input type. + loss = tf.cast(loss, input_dtype) + return loss + + def scale_loss_for_distribution(loss_value): - """Scales and returns the given loss value by the number of replicas.""" - num_replicas = ( - tf.distribute.get_strategy().num_replicas_in_sync) - if num_replicas > 1: - loss_value *= (1. / num_replicas) - return loss_value + """Scales and returns the given loss value by the number of replicas.""" + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + if num_replicas > 1: + loss_value *= 1.0 / num_replicas + return loss_value def cast_losses_to_common_dtype(losses): - """Cast a list of losses to a common dtype. - - If any loss is floating-point, they will all be casted to the most-precise - floating-point loss. Otherwise the losses are not casted. We also skip casting - losses if there are any complex losses. - - Args: - losses: A list of losses. - - Returns: - `losses`, but they have been casted to a common dtype. - """ - highest_float = None - for loss in losses: - if loss.dtype.is_floating: - if highest_float is None or loss.dtype.size > highest_float.size: - highest_float = loss.dtype - elif {loss.dtype, highest_float} == {'bfloat16', 'float16'}: - highest_float = 'float32' - if loss.dtype.is_complex: - return losses # If we find any complex losses, do not cast any losses - if highest_float: - losses = [tf.cast(loss, highest_float) for loss in losses] - return losses + """Cast a list of losses to a common dtype. + + If any loss is floating-point, they will all be casted to the most-precise + floating-point loss. Otherwise the losses are not casted. We also skip + casting losses if there are any complex losses. + + Args: + losses: A list of losses. + + Returns: + `losses`, but they have been casted to a common dtype. + """ + highest_float = None + for loss in losses: + if loss.dtype.is_floating: + if highest_float is None or loss.dtype.size > highest_float.size: + highest_float = loss.dtype + elif {loss.dtype, highest_float} == {"bfloat16", "float16"}: + highest_float = "float32" + if loss.dtype.is_complex: + return ( + losses # If we find any complex losses, do not cast any losses + ) + if highest_float: + losses = [tf.cast(loss, highest_float) for loss in losses] + return losses + + +def get_mask(y_p): + """Returns Keras mask from tensor.""" + return getattr(y_p, "_keras_mask", None) + + +def apply_mask(y_p, sw, mask): + """Applies any mask on predictions to sample weights.""" + if mask is not None: + mask = tf.cast(mask, y_p.dtype) + if sw is not None: + sw = tf.cast(sw, mask.dtype) + mask, _, sw = squeeze_or_expand_dimensions(mask, sample_weight=sw) + sw *= mask + else: + sw = mask + return sw + + +def apply_valid_mask(losses, sw, mask, reduction): + """Redistribute sample weights considering only valid entries.""" + if mask is not None: + mask = tf.cast(mask, losses.dtype) + + if reduction in (ReductionV2.AUTO, ReductionV2.SUM_OVER_BATCH_SIZE): + # Valid entries have weight `total/valid`, while invalid ones + # have 0. When summed over batch, they will be reduced to: + # + # mean(loss * sample_weight * total / valid) + # = sum(loss * sample_weight * total / valid) / total + # = sum(loss * sample_weight) / total * total / valid + # = sum(loss * sample_weight) / valid + + total = tf.cast(tf.size(mask), losses.dtype) + valid = tf.reduce_sum(mask) + mask *= total / valid + + return apply_mask(losses, sw, mask) diff --git a/keras/utils/losses_utils_test.py b/keras/utils/losses_utils_test.py index 0dfa21dfc750..03c531bf1db0 100644 --- a/keras/utils/losses_utils_test.py +++ b/keras/utils/losses_utils_test.py @@ -14,64 +14,69 @@ # ============================================================================== """Tests for losses_utils.""" +import tensorflow.compat.v2 as tf + from keras.testing_infra import test_combinations from keras.utils import losses_utils -import tensorflow.compat.v2 as tf -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class RemoveSqueezableTest(tf.test.TestCase): - """Test remove_squeezable_dimensions""" + """Test remove_squeezable_dimensions""" - def test_ragged_3d_same_shape(self): - """ shape (2, (sequence={1, 2}), 3)""" - x = tf.ragged.constant([[[1, 2, 3]], [[4, 5, 6], [7, 8, 9]]]) - rank = x.shape.ndims - x_p, _ = losses_utils.remove_squeezable_dimensions(x, x) - self.assertEqual(x_p.shape.ndims, rank) + def test_ragged_3d_same_shape(self): + """shape (2, (sequence={1, 2}), 3)""" + x = tf.ragged.constant([[[1, 2, 3]], [[4, 5, 6], [7, 8, 9]]]) + rank = x.shape.ndims + x_p, _ = losses_utils.remove_squeezable_dimensions(x, x) + self.assertEqual(x_p.shape.ndims, rank) - def test_ragged_3d_4d_squeezable(self): - """ shapes: + def test_ragged_3d_4d_squeezable(self): + """shapes: x: (2, (sequence={1, 2}), 3) y: (2, (sequence={1, 2}), 3, 1) - """ - x = tf.ragged.constant([[[1, 2, 3]], [[4, 5, 6], [7, 8, 9]]]) - y = tf.expand_dims(x, axis=-1) - self.assertEqual(x.shape.ndims, 3) - self.assertEqual(y.shape.ndims, 4) - _, y_p = losses_utils.remove_squeezable_dimensions(x, y) - y_p.shape.assert_is_compatible_with(x.shape) - self.assertEqual(y_p.shape.ndims, 3) + """ + x = tf.ragged.constant([[[1, 2, 3]], [[4, 5, 6], [7, 8, 9]]]) + y = tf.expand_dims(x, axis=-1) + self.assertEqual(x.shape.ndims, 3) + self.assertEqual(y.shape.ndims, 4) + _, y_p = losses_utils.remove_squeezable_dimensions(x, y) + y_p.shape.assert_is_compatible_with(x.shape) + self.assertEqual(y_p.shape.ndims, 3) - x_p, _ = losses_utils.remove_squeezable_dimensions(y, x) - x_p.shape.assert_is_compatible_with(x.shape) - self.assertEqual(x_p.shape.ndims, 3) + x_p, _ = losses_utils.remove_squeezable_dimensions(y, x) + x_p.shape.assert_is_compatible_with(x.shape) + self.assertEqual(x_p.shape.ndims, 3) - def test_dense_2d_3d_squeezable(self): - x = tf.constant([[1, 2], [3, 4]]) - y = tf.constant([[[1], [2]], [[3], [4]]]) - _, y_p = losses_utils.remove_squeezable_dimensions(x, y) - y_p.shape.assert_is_compatible_with(x.shape) - self.assertEqual(y_p.shape.ndims, x.shape.ndims) - x_p, _ = losses_utils.remove_squeezable_dimensions(y, x) - x_p.shape.assert_is_compatible_with(x.shape) + def test_dense_2d_3d_squeezable(self): + x = tf.constant([[1, 2], [3, 4]]) + y = tf.constant([[[1], [2]], [[3], [4]]]) + _, y_p = losses_utils.remove_squeezable_dimensions(x, y) + y_p.shape.assert_is_compatible_with(x.shape) + self.assertEqual(y_p.shape.ndims, x.shape.ndims) + x_p, _ = losses_utils.remove_squeezable_dimensions(y, x) + x_p.shape.assert_is_compatible_with(x.shape) class RemoveSqueezableTestGraphOnly(tf.test.TestCase): - """Test remove_squeezable_dimensions (graph-mode only).""" + """Test remove_squeezable_dimensions (graph-mode only).""" - def test_placeholder(self): - """Test dynamic rank tensors.""" - with tf.Graph().as_default(): - x = tf.compat.v1.placeholder_with_default([1., 2., 3.], shape=None) - y = tf.compat.v1.placeholder_with_default([[1.], [2.], [3.]], shape=None) - _, y_p = losses_utils.remove_squeezable_dimensions(x, y) - y_p.shape.assert_is_compatible_with(x.shape) - self.assertAllEqual(tf.shape(x), tf.shape(y_p)) - x_p, _ = losses_utils.remove_squeezable_dimensions(y, x) - x_p.shape.assert_is_compatible_with(x.shape) + def test_placeholder(self): + """Test dynamic rank tensors.""" + with tf.Graph().as_default(): + x = tf.compat.v1.placeholder_with_default( + [1.0, 2.0, 3.0], shape=None + ) + y = tf.compat.v1.placeholder_with_default( + [[1.0], [2.0], [3.0]], shape=None + ) + _, y_p = losses_utils.remove_squeezable_dimensions(x, y) + y_p.shape.assert_is_compatible_with(x.shape) + self.assertAllEqual(tf.shape(x), tf.shape(y_p)) + x_p, _ = losses_utils.remove_squeezable_dimensions(y, x) + x_p.shape.assert_is_compatible_with(x.shape) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/metrics_utils.py b/keras/utils/metrics_utils.py index 18a191709a37..0edd82d703de 100644 --- a/keras/utils/metrics_utils.py +++ b/keras/utils/metrics_utils.py @@ -12,251 +12,277 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Utils related to keras metrics.""" -from enum import Enum import functools import weakref +from enum import Enum + +import numpy as np +import tensorflow.compat.v2 as tf + from keras import backend from keras.utils import losses_utils from keras.utils import tf_utils from keras.utils.generic_utils import to_list -import numpy as np -import tensorflow.compat.v2 as tf NEG_INF = -1e10 class Reduction(Enum): - """Types of metrics reduction. + """Types of metrics reduction. - Contains the following values: + Contains the following values: - * `SUM`: Scalar sum of weighted values. - * `SUM_OVER_BATCH_SIZE`: Scalar sum of weighted values divided by - number of elements. - * `WEIGHTED_MEAN`: Scalar sum of weighted values divided by sum of weights. - """ - SUM = 'sum' - SUM_OVER_BATCH_SIZE = 'sum_over_batch_size' - WEIGHTED_MEAN = 'weighted_mean' + * `SUM`: Scalar sum of weighted values. + * `SUM_OVER_BATCH_SIZE`: Scalar sum of weighted values divided by + number of elements. + * `WEIGHTED_MEAN`: Scalar sum of weighted values divided by sum of weights. + """ + SUM = "sum" + SUM_OVER_BATCH_SIZE = "sum_over_batch_size" + WEIGHTED_MEAN = "weighted_mean" -def update_state_wrapper(update_state_fn): - """Decorator to wrap metric `update_state()` with `add_update()`. - Args: - update_state_fn: function that accumulates metric statistics. - - Returns: - Decorated function that wraps `update_state_fn()` with `add_update()`. - """ - - def decorated(metric_obj, *args, **kwargs): - """Decorated function with `add_update()`.""" - strategy = tf.distribute.get_strategy() - - for weight in metric_obj.weights: - if (backend.is_tpu_strategy(strategy) and - not strategy.extended.variable_created_in_scope(weight) - and not tf.distribute.in_cross_replica_context()): - raise ValueError( - 'Trying to run metric.update_state in replica context when ' - 'the metric was not created in TPUStrategy scope. ' - 'Make sure the keras Metric is created in TPUstrategy scope. ') - - with tf_utils.graph_context_for_symbolic_tensors(*args, **kwargs): - update_op = update_state_fn(*args, **kwargs) - if update_op is not None: # update_op will be None in eager execution. - metric_obj.add_update(update_op) - return update_op - - return tf.__internal__.decorator.make_decorator(update_state_fn, decorated) +def update_state_wrapper(update_state_fn): + """Decorator to wrap metric `update_state()` with `add_update()`. + + Args: + update_state_fn: function that accumulates metric statistics. + + Returns: + Decorated function that wraps `update_state_fn()` with `add_update()`. + """ + + def decorated(metric_obj, *args, **kwargs): + """Decorated function with `add_update()`.""" + strategy = tf.distribute.get_strategy() + + for weight in metric_obj.weights: + if ( + backend.is_tpu_strategy(strategy) + and not strategy.extended.variable_created_in_scope(weight) + and not tf.distribute.in_cross_replica_context() + ): + raise ValueError( + "Trying to run metric.update_state in replica context when " + "the metric was not created in TPUStrategy scope. " + "Make sure the keras Metric is created in TPUstrategy " + "scope. " + ) + + with tf_utils.graph_context_for_symbolic_tensors(*args, **kwargs): + result = update_state_fn(*args, **kwargs) + if not tf.executing_eagerly(): + result = tf.compat.v1.get_default_graph().get_operations()[-1] + metric_obj.add_update(result) + return result + + return tf.__internal__.decorator.make_decorator(update_state_fn, decorated) def result_wrapper(result_fn): - """Decorator to wrap metric `result()` function in `merge_call()`. - - Result computation is an idempotent operation that simply calculates the - metric value using the state variables. - - If metric state variables are distributed across replicas/devices and - `result()` is requested from the context of one device - This function wraps - `result()` in a distribution strategy `merge_call()`. With this, - the metric state variables will be aggregated across devices. - - Args: - result_fn: function that computes the metric result. - - Returns: - Decorated function that wraps `result_fn()` in distribution strategy - `merge_call()`. - """ - - def decorated(metric_obj, *args): - """Decorated function with merge_call.""" - replica_context = tf.distribute.get_replica_context() - - # The purpose of using `merge_call` to call `result()` is to trigger cross - # replica aggregation of metric state variables (SyncOnReadVariable). After - # we introduced `variable_sync_on_read_context`, in principle there is no - # need to use `merge_call` here. However the branch still exists because: - # - # 1. Keras V1 training code sometimes assumes `result_t` is the same tensor - # across replicas (achieved by `merge_call`). With - # `variable_sync_on_read_context` each replica gets their own tensors - # residing on replica's device, thus breaking the assumption. - # 2. Keras c/fit creates a tf.function (a.k.a, train_function) that returns - # the metric values of the first replica. With - # `variable_sync_on_read_context` since each replica gets their own - # tensors, the metric result tensors on the non-first replicas are not in - # the return value of train_function, making TF graph optimizer prune the - # branch that computes and aggregates those metric results. As a result, - # if NCCL is used to do the aggregation, the program will hang because - # NCCL ops are only launched on the non-pruned first replica. - # - # We condition on strategy_supports_no_merge_call() since we know if it is - # True, the program uses `jit_compile` to compile replica fn, meaning it is - # not V1 training (hence #1 is okay), and no pruning will happen as - # compiled functions are not inlined (hence #2 is okay). - if (replica_context is None or - tf.__internal__.distribute.strategy_supports_no_merge_call()): - with tf.__internal__.distribute.variable_sync_on_read_context(): - raw_result = result_fn(*args) - # Results need to be wrapped in a `tf.identity` op to ensure - # correct execution order. - if isinstance(raw_result, - (tf.Tensor, tf.Variable, float, int)): - result_t = tf.identity(raw_result) - elif isinstance(raw_result, dict): - result_t = { - key: tf.identity(value) - for key, value in raw_result.items() - } + """Decorator to wrap metric `result()` function in `merge_call()`. + + Result computation is an idempotent operation that simply calculates the + metric value using the state variables. + + If metric state variables are distributed across replicas/devices and + `result()` is requested from the context of one device - This function wraps + `result()` in a distribution strategy `merge_call()`. With this, + the metric state variables will be aggregated across devices. + + Args: + result_fn: function that computes the metric result. + + Returns: + Decorated function that wraps `result_fn()` in distribution strategy + `merge_call()`. + """ + + def decorated(metric_obj, *args): + """Decorated function with merge_call.""" + replica_context = tf.distribute.get_replica_context() + + # The purpose of using `merge_call` to call `result()` is to trigger + # cross replica aggregation of metric state variables + # (SyncOnReadVariable). After we introduced + # `variable_sync_on_read_context`, in principle there is no need to use + # `merge_call` here. However the branch still exists because: + # + # 1. Keras V1 training code sometimes assumes `result_t` is the same + # tensor across replicas (achieved by `merge_call`). With + # `variable_sync_on_read_context` each replica gets their own tensors + # residing on replica's device, thus breaking the assumption. + # 2. Keras c/fit creates a tf.function (a.k.a, train_function) that + # returns the metric values of the first replica. With + # `variable_sync_on_read_context` since each replica gets their own + # tensors, the metric result tensors on the non-first replicas are + # not in the return value of train_function, making TF graph + # optimizer prune the branch that computes and aggregates those + # metric results. As a result, if NCCL is used to do the aggregation, + # the program will hang because NCCL ops are only launched on the + # non-pruned first replica. + # + # We condition on strategy_supports_no_merge_call() since we know if it + # is True, the program uses `jit_compile` to compile replica fn, meaning + # it is not V1 training (hence #1 is okay), and no pruning will happen + # as compiled functions are not inlined (hence #2 is okay). + if ( + replica_context is None + or tf.__internal__.distribute.strategy_supports_no_merge_call() + ): + with tf.__internal__.distribute.variable_sync_on_read_context(): + raw_result = result_fn(*args) + # Results need to be wrapped in a `tf.identity` op to ensure + # correct execution order. + if isinstance(raw_result, (tf.Tensor, tf.Variable, float, int)): + result_t = tf.identity(raw_result) + elif isinstance(raw_result, dict): + result_t = tf.nest.map_structure(tf.identity, raw_result) + else: + try: + result_t = tf.identity(raw_result) + except (ValueError, TypeError): + raise RuntimeError( + "The output of `metric.result()` can only be a " + "single Tensor/Variable, or a dict of " + "Tensors/Variables. " + f"For metric {metric_obj.name}, " + f"got result {raw_result}." + ) else: - try: - result_t = tf.identity(raw_result) - except (ValueError, TypeError): - raise RuntimeError( - 'The output of `metric.result()` can only be a single ' - 'Tensor/Variable, or a dict of Tensors/Variables. ' - f'For metric {metric_obj.name}, got result {raw_result}.') - else: - # TODO(psv): Test distribution of metrics using different distribution - # strategies. - - # Creating a wrapper for merge_fn. merge_call invokes the given merge_fn - # with distribution object as the first parameter. We create a wrapper - # here so that the result function need not have that parameter. - def merge_fn_wrapper(distribution, merge_fn, *args): - # We will get `PerReplica` merge function. Taking the first one as all - # are identical copies of the function that we had passed below. - result = distribution.experimental_local_results(merge_fn)[0](*args) - - # Wrapping result in identity so that control dependency between - # update_op from `update_state` and result works in case result returns - # a tensor. - return tf.identity(result) - - # Wrapping result in merge_call. merge_call is used when we want to leave - # replica mode and compute a value in cross replica mode. - result_t = replica_context.merge_call( - merge_fn_wrapper, args=(result_fn,) + args) - - # We are saving the result op here to be used in train/test execution - # functions. This basically gives the result op that was generated with a - # control dep to the updates for these workflows. - metric_obj._call_result = result_t - return result_t - - return tf.__internal__.decorator.make_decorator(result_fn, decorated) + # TODO(psv): Test distribution of metrics using different + # distribution strategies. + + # Creating a wrapper for merge_fn. merge_call invokes the given + # merge_fn with distribution object as the first parameter. We + # create a wrapper here so that the result function need not have + # that parameter. + def merge_fn_wrapper(distribution, merge_fn, *args): + # We will get `PerReplica` merge function. Taking the first one + # as all are identical copies of the function that we had passed + # below. + result = distribution.experimental_local_results(merge_fn)[0]( + *args + ) + + # Wrapping result in identity so that control dependency between + # update_op from `update_state` and result works in case result + # returns a tensor. + return tf.nest.map_structure(tf.identity, result) + + # Wrapping result in merge_call. merge_call is used when we want to + # leave replica mode and compute a value in cross replica mode. + result_t = replica_context.merge_call( + merge_fn_wrapper, args=(result_fn,) + args + ) + + # We are saving the result op here to be used in train/test execution + # functions. This basically gives the result op that was generated with + # a control dep to the updates for these workflows. + metric_obj._call_result = result_t + return result_t + + return tf.__internal__.decorator.make_decorator(result_fn, decorated) def weakmethod(method): - """Creates a weak reference to the bound method.""" + """Creates a weak reference to the bound method.""" - cls = method.im_class - func = method.im_func - instance_ref = weakref.ref(method.im_self) + cls = method.im_class + func = method.im_func + instance_ref = weakref.ref(method.im_self) - @functools.wraps(method) - def inner(*args, **kwargs): - return func.__get__(instance_ref(), cls)(*args, **kwargs) + @functools.wraps(method) + def inner(*args, **kwargs): + return func.__get__(instance_ref(), cls)(*args, **kwargs) - del method - return inner + del method + return inner def assert_thresholds_range(thresholds): - if thresholds is not None: - invalid_thresholds = [t for t in thresholds if t is None or t < 0 or t > 1] - if invalid_thresholds: - raise ValueError( - f'Threshold values must be in [0, 1]. Received: {invalid_thresholds}') + if thresholds is not None: + invalid_thresholds = [ + t for t in thresholds if t is None or t < 0 or t > 1 + ] + if invalid_thresholds: + raise ValueError( + "Threshold values must be in [0, 1]. " + f"Received: {invalid_thresholds}" + ) def parse_init_thresholds(thresholds, default_threshold=0.5): - if thresholds is not None: - assert_thresholds_range(to_list(thresholds)) - thresholds = to_list(default_threshold if thresholds is None else thresholds) - return thresholds + if thresholds is not None: + assert_thresholds_range(to_list(thresholds)) + thresholds = to_list( + default_threshold if thresholds is None else thresholds + ) + return thresholds class ConfusionMatrix(Enum): - TRUE_POSITIVES = 'tp' - FALSE_POSITIVES = 'fp' - TRUE_NEGATIVES = 'tn' - FALSE_NEGATIVES = 'fn' + TRUE_POSITIVES = "tp" + FALSE_POSITIVES = "fp" + TRUE_NEGATIVES = "tn" + FALSE_NEGATIVES = "fn" class AUCCurve(Enum): - """Type of AUC Curve (ROC or PR).""" - ROC = 'ROC' - PR = 'PR' - - @staticmethod - def from_str(key): - if key in ('pr', 'PR'): - return AUCCurve.PR - elif key in ('roc', 'ROC'): - return AUCCurve.ROC - else: - raise ValueError( - f'Invalid AUC curve value: "{key}". ' - 'Expected values are ["PR", "ROC"]') + """Type of AUC Curve (ROC or PR).""" + + ROC = "ROC" + PR = "PR" + + @staticmethod + def from_str(key): + if key in ("pr", "PR"): + return AUCCurve.PR + elif key in ("roc", "ROC"): + return AUCCurve.ROC + else: + raise ValueError( + f'Invalid AUC curve value: "{key}". ' + 'Expected values are ["PR", "ROC"]' + ) class AUCSummationMethod(Enum): - """Type of AUC summation method. - - https://en.wikipedia.org/wiki/Riemann_sum) - - Contains the following values: - * 'interpolation': Applies mid-point summation scheme for `ROC` curve. For - `PR` curve, interpolates (true/false) positives but not the ratio that is - precision (see Davis & Goadrich 2006 for details). - * 'minoring': Applies left summation for increasing intervals and right - summation for decreasing intervals. - * 'majoring': Applies right summation for increasing intervals and left - summation for decreasing intervals. - """ - INTERPOLATION = 'interpolation' - MAJORING = 'majoring' - MINORING = 'minoring' - - @staticmethod - def from_str(key): - if key in ('interpolation', 'Interpolation'): - return AUCSummationMethod.INTERPOLATION - elif key in ('majoring', 'Majoring'): - return AUCSummationMethod.MAJORING - elif key in ('minoring', 'Minoring'): - return AUCSummationMethod.MINORING - else: - raise ValueError( - f'Invalid AUC summation method value: "{key}". ' - 'Expected values are ["interpolation", "majoring", "minoring"]') + """Type of AUC summation method. + + https://en.wikipedia.org/wiki/Riemann_sum) + + Contains the following values: + * 'interpolation': Applies mid-point summation scheme for `ROC` curve. For + `PR` curve, interpolates (true/false) positives but not the ratio that is + precision (see Davis & Goadrich 2006 for details). + * 'minoring': Applies left summation for increasing intervals and right + summation for decreasing intervals. + * 'majoring': Applies right summation for increasing intervals and left + summation for decreasing intervals. + """ + + INTERPOLATION = "interpolation" + MAJORING = "majoring" + MINORING = "minoring" + + @staticmethod + def from_str(key): + if key in ("interpolation", "Interpolation"): + return AUCSummationMethod.INTERPOLATION + elif key in ("majoring", "Majoring"): + return AUCSummationMethod.MAJORING + elif key in ("minoring", "Minoring"): + return AUCSummationMethod.MINORING + else: + raise ValueError( + f'Invalid AUC summation method value: "{key}". ' + 'Expected values are ["interpolation", "majoring", "minoring"]' + ) def _update_confusion_matrix_variables_optimized( @@ -267,659 +293,722 @@ def _update_confusion_matrix_variables_optimized( multi_label=False, sample_weights=None, label_weights=None, - thresholds_with_epsilon=False): - """Update confusion matrix variables with memory efficient alternative. - - Note that the thresholds need to be evenly distributed within the list, eg, - the diff between consecutive elements are the same. - - To compute TP/FP/TN/FN, we are measuring a binary classifier - C(t) = (predictions >= t) - at each threshold 't'. So we have - TP(t) = sum( C(t) * true_labels ) - FP(t) = sum( C(t) * false_labels ) - - But, computing C(t) requires computation for each t. To make it fast, - observe that C(t) is a cumulative integral, and so if we have - thresholds = [t_0, ..., t_{n-1}]; t_0 < ... < t_{n-1} - where n = num_thresholds, and if we can compute the bucket function - B(i) = Sum( (predictions == t), t_i <= t < t{i+1} ) - then we get - C(t_i) = sum( B(j), j >= i ) - which is the reversed cumulative sum in tf.cumsum(). - - We can compute B(i) efficiently by taking advantage of the fact that - our thresholds are evenly distributed, in that - width = 1.0 / (num_thresholds - 1) - thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0] - Given a prediction value p, we can map it to its bucket by - bucket_index(p) = floor( p * (num_thresholds - 1) ) - so we can use tf.math.unsorted_segment_sum() to update the buckets in one - pass. - - Consider following example: - y_true = [0, 0, 1, 1] - y_pred = [0.1, 0.5, 0.3, 0.9] - thresholds = [0.0, 0.5, 1.0] - num_buckets = 2 # [0.0, 1.0], (1.0, 2.0] - bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets) - = tf.math.floor([0.2, 1.0, 0.6, 1.8]) - = [0, 0, 0, 1] - # The meaning of this bucket is that if any of the label is true, - # then 1 will be added to the corresponding bucket with the index. - # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the - # label for 1.8 is true, then 1 will be added to bucket 1. - # - # Note the second item "1.0" is floored to 0, since the value need to be - # strictly larger than the bucket lower bound. - # In the implementation, we use tf.math.ceil() - 1 to achieve this. - tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices, - num_segments=num_thresholds) - = [1, 1, 0] - # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0, - # and 1 value contributed by bucket 1. When we aggregate them to together, - # the result become [a + b + c, b + c, c], since large thresholds will always - # contribute to the value for smaller thresholds. - true_positive = tf.math.cumsum(tp_bucket_value, reverse=True) - = [2, 1, 0] - - This implementation exhibits a run time and space complexity of O(T + N), - where T is the number of thresholds and N is the size of predictions. - Metrics that rely on standard implementation instead exhibit a complexity of - O(T * N). - - Args: - variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys - and corresponding variables to update as values. - y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast - to `bool`. - y_pred: A floating point `Tensor` of arbitrary shape and whose values are in - the range `[0, 1]`. - thresholds: A sorted floating point `Tensor` with value in `[0, 1]`. - It need to be evenly distributed (the diff between each element need to be - the same). - multi_label: Optional boolean indicating whether multidimensional - prediction/labels should be treated as multilabel responses, or flattened - into a single label. When True, the valus of `variables_to_update` must - have a second dimension equal to the number of labels in y_true and - y_pred, and those tensors must not be RaggedTensors. - sample_weights: Optional `Tensor` whose rank is either 0, or the same rank - as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions - must be either `1`, or the same as the corresponding `y_true` dimension). - label_weights: Optional tensor of non-negative weights for multilabel - data. The weights are applied when calculating TP, FP, FN, and TN without - explicit multilabel handling (i.e. when the data is to be flattened). - thresholds_with_epsilon: Optional boolean indicating whether the leading and - tailing thresholds has any epsilon added for floating point imprecisions. - It will change how we handle the leading and tailing bucket. - - Returns: - Update op. - """ - num_thresholds = thresholds.shape.as_list()[0] - - if sample_weights is None: - sample_weights = 1.0 - else: - sample_weights = tf.__internal__.ops.broadcast_weights( - tf.cast(sample_weights, dtype=y_pred.dtype), y_pred) - if not multi_label: - sample_weights = tf.reshape(sample_weights, [-1]) - if label_weights is None: - label_weights = 1.0 - else: - label_weights = tf.expand_dims(label_weights, 0) - label_weights = tf.__internal__.ops.broadcast_weights(label_weights, - y_pred) + thresholds_with_epsilon=False, +): + """Update confusion matrix variables with memory efficient alternative. + + Note that the thresholds need to be evenly distributed within the list, eg, + the diff between consecutive elements are the same. + + To compute TP/FP/TN/FN, we are measuring a binary classifier + C(t) = (predictions >= t) + at each threshold 't'. So we have + TP(t) = sum( C(t) * true_labels ) + FP(t) = sum( C(t) * false_labels ) + + But, computing C(t) requires computation for each t. To make it fast, + observe that C(t) is a cumulative integral, and so if we have + thresholds = [t_0, ..., t_{n-1}]; t_0 < ... < t_{n-1} + where n = num_thresholds, and if we can compute the bucket function + B(i) = Sum( (predictions == t), t_i <= t < t{i+1} ) + then we get + C(t_i) = sum( B(j), j >= i ) + which is the reversed cumulative sum in tf.cumsum(). + + We can compute B(i) efficiently by taking advantage of the fact that + our thresholds are evenly distributed, in that + width = 1.0 / (num_thresholds - 1) + thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0] + Given a prediction value p, we can map it to its bucket by + bucket_index(p) = floor( p * (num_thresholds - 1) ) + so we can use tf.math.unsorted_segment_sum() to update the buckets in one + pass. + + Consider following example: + y_true = [0, 0, 1, 1] + y_pred = [0.1, 0.5, 0.3, 0.9] + thresholds = [0.0, 0.5, 1.0] + num_buckets = 2 # [0.0, 1.0], (1.0, 2.0] + bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets) + = tf.math.floor([0.2, 1.0, 0.6, 1.8]) + = [0, 0, 0, 1] + # The meaning of this bucket is that if any of the label is true, + # then 1 will be added to the corresponding bucket with the index. + # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the + # label for 1.8 is true, then 1 will be added to bucket 1. + # + # Note the second item "1.0" is floored to 0, since the value need to be + # strictly larger than the bucket lower bound. + # In the implementation, we use tf.math.ceil() - 1 to achieve this. + tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices, + num_segments=num_thresholds) + = [1, 1, 0] + # For [1, 1, 0] here, it means there is 1 true value contributed by bucket + # 0, and 1 value contributed by bucket 1. When we aggregate them to + # together, the result become [a + b + c, b + c, c], since large thresholds + # will always contribute to the value for smaller thresholds. + true_positive = tf.math.cumsum(tp_bucket_value, reverse=True) + = [2, 1, 0] + + This implementation exhibits a run time and space complexity of O(T + N), + where T is the number of thresholds and N is the size of predictions. + Metrics that rely on standard implementation instead exhibit a complexity of + O(T * N). + + Args: + variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys + and corresponding variables to update as values. + y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be + cast to `bool`. + y_pred: A floating point `Tensor` of arbitrary shape and whose values are + in the range `[0, 1]`. + thresholds: A sorted floating point `Tensor` with value in `[0, 1]`. + It need to be evenly distributed (the diff between each element need to + be the same). + multi_label: Optional boolean indicating whether multidimensional + prediction/labels should be treated as multilabel responses, or + flattened into a single label. When True, the valus of + `variables_to_update` must have a second dimension equal to the number + of labels in y_true and y_pred, and those tensors must not be + RaggedTensors. + sample_weights: Optional `Tensor` whose rank is either 0, or the same rank + as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions + must be either `1`, or the same as the corresponding `y_true` + dimension). + label_weights: Optional tensor of non-negative weights for multilabel + data. The weights are applied when calculating TP, FP, FN, and TN + without explicit multilabel handling (i.e. when the data is to be + flattened). + thresholds_with_epsilon: Optional boolean indicating whether the leading + and tailing thresholds has any epsilon added for floating point + imprecisions. It will change how we handle the leading and tailing + bucket. + + Returns: + Update op. + """ + num_thresholds = thresholds.shape.as_list()[0] + + if sample_weights is None: + sample_weights = 1.0 + else: + sample_weights = tf.__internal__.ops.broadcast_weights( + tf.cast(sample_weights, dtype=y_pred.dtype), y_pred + ) + if not multi_label: + sample_weights = tf.reshape(sample_weights, [-1]) + if label_weights is None: + label_weights = 1.0 + else: + label_weights = tf.expand_dims(label_weights, 0) + label_weights = tf.__internal__.ops.broadcast_weights( + label_weights, y_pred + ) + if not multi_label: + label_weights = tf.reshape(label_weights, [-1]) + weights = tf.cast(tf.multiply(sample_weights, label_weights), y_true.dtype) + + # We shouldn't need this, but in case there are predict value that is out of + # the range of [0.0, 1.0] + y_pred = tf.clip_by_value(y_pred, clip_value_min=0.0, clip_value_max=1.0) + + y_true = tf.cast(tf.cast(y_true, tf.bool), y_true.dtype) if not multi_label: - label_weights = tf.reshape(label_weights, [-1]) - weights = tf.multiply(sample_weights, label_weights) - - # We shouldn't need this, but in case there are predict value that is out of - # the range of [0.0, 1.0] - y_pred = tf.clip_by_value(y_pred, - clip_value_min=0.0, clip_value_max=1.0) - - y_true = tf.cast(tf.cast(y_true, tf.bool), y_true.dtype) - if not multi_label: - y_true = tf.reshape(y_true, [-1]) - y_pred = tf.reshape(y_pred, [-1]) - - true_labels = tf.multiply(y_true, weights) - false_labels = tf.multiply((1.0 - y_true), weights) - - # Compute the bucket indices for each prediction value. - # Since the predict value has to be strictly greater than the thresholds, - # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket. - # We have to use math.ceil(val) - 1 for the bucket. - bucket_indices = tf.math.ceil(y_pred * (num_thresholds - 1)) - 1 - - if thresholds_with_epsilon: - # In this case, the first bucket should actually take into account since - # the any prediction between [0.0, 1.0] should be larger than the first - # threshold. We change the bucket value from -1 to 0. - bucket_indices = tf.nn.relu(bucket_indices) - - bucket_indices = tf.cast(bucket_indices, tf.int32) - - if multi_label: - # We need to run bucket segment sum for each of the label class. In the - # multi_label case, the rank of the label is 2. We first transpose it so - # that the label dim becomes the first and we can parallel run though them. - true_labels = tf.transpose(true_labels) - false_labels = tf.transpose(false_labels) - bucket_indices = tf.transpose(bucket_indices) - - def gather_bucket(label_and_bucket_index): - label, bucket_index = label_and_bucket_index[0], label_and_bucket_index[1] - return tf.math.unsorted_segment_sum( - data=label, segment_ids=bucket_index, num_segments=num_thresholds) - tp_bucket_v = tf.vectorized_map( - gather_bucket, (true_labels, bucket_indices)) - fp_bucket_v = tf.vectorized_map( - gather_bucket, (false_labels, bucket_indices)) - tp = tf.transpose( - tf.cumsum(tp_bucket_v, reverse=True, axis=1)) - fp = tf.transpose( - tf.cumsum(fp_bucket_v, reverse=True, axis=1)) - else: - tp_bucket_v = tf.math.unsorted_segment_sum( - data=true_labels, segment_ids=bucket_indices, - num_segments=num_thresholds) - fp_bucket_v = tf.math.unsorted_segment_sum( - data=false_labels, segment_ids=bucket_indices, - num_segments=num_thresholds) - tp = tf.cumsum(tp_bucket_v, reverse=True) - fp = tf.cumsum(fp_bucket_v, reverse=True) - - # fn = sum(true_labels) - tp - # tn = sum(false_labels) - fp - if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update or - ConfusionMatrix.FALSE_NEGATIVES in variables_to_update): + y_true = tf.reshape(y_true, [-1]) + y_pred = tf.reshape(y_pred, [-1]) + + true_labels = tf.multiply(y_true, weights) + false_labels = tf.multiply((1.0 - y_true), weights) + + # Compute the bucket indices for each prediction value. + # Since the predict value has to be strictly greater than the thresholds, + # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket. + # We have to use math.ceil(val) - 1 for the bucket. + bucket_indices = tf.math.ceil(y_pred * (num_thresholds - 1)) - 1 + + if thresholds_with_epsilon: + # In this case, the first bucket should actually take into account since + # the any prediction between [0.0, 1.0] should be larger than the first + # threshold. We change the bucket value from -1 to 0. + bucket_indices = tf.nn.relu(bucket_indices) + + bucket_indices = tf.cast(bucket_indices, tf.int32) + if multi_label: - total_true_labels = tf.reduce_sum(true_labels, axis=1) - total_false_labels = tf.reduce_sum(false_labels, axis=1) + # We need to run bucket segment sum for each of the label class. In the + # multi_label case, the rank of the label is 2. We first transpose it so + # that the label dim becomes the first and we can parallel run though + # them. + true_labels = tf.transpose(true_labels) + false_labels = tf.transpose(false_labels) + bucket_indices = tf.transpose(bucket_indices) + + def gather_bucket(label_and_bucket_index): + label, bucket_index = ( + label_and_bucket_index[0], + label_and_bucket_index[1], + ) + return tf.math.unsorted_segment_sum( + data=label, + segment_ids=bucket_index, + num_segments=num_thresholds, + ) + + tp_bucket_v = tf.vectorized_map( + gather_bucket, (true_labels, bucket_indices), warn=False + ) + fp_bucket_v = tf.vectorized_map( + gather_bucket, (false_labels, bucket_indices), warn=False + ) + tp = tf.transpose(tf.cumsum(tp_bucket_v, reverse=True, axis=1)) + fp = tf.transpose(tf.cumsum(fp_bucket_v, reverse=True, axis=1)) else: - total_true_labels = tf.reduce_sum(true_labels) - total_false_labels = tf.reduce_sum(false_labels) - - update_ops = [] - if ConfusionMatrix.TRUE_POSITIVES in variables_to_update: - variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES] - update_ops.append(variable.assign_add(tp)) - if ConfusionMatrix.FALSE_POSITIVES in variables_to_update: - variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES] - update_ops.append(variable.assign_add(fp)) - if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update: - variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES] - tn = total_false_labels - fp - update_ops.append(variable.assign_add(tn)) - if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update: - variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES] - fn = total_true_labels - tp - update_ops.append(variable.assign_add(fn)) - return tf.group(update_ops) + tp_bucket_v = tf.math.unsorted_segment_sum( + data=true_labels, + segment_ids=bucket_indices, + num_segments=num_thresholds, + ) + fp_bucket_v = tf.math.unsorted_segment_sum( + data=false_labels, + segment_ids=bucket_indices, + num_segments=num_thresholds, + ) + tp = tf.cumsum(tp_bucket_v, reverse=True) + fp = tf.cumsum(fp_bucket_v, reverse=True) + + # fn = sum(true_labels) - tp + # tn = sum(false_labels) - fp + if ( + ConfusionMatrix.TRUE_NEGATIVES in variables_to_update + or ConfusionMatrix.FALSE_NEGATIVES in variables_to_update + ): + if multi_label: + total_true_labels = tf.reduce_sum(true_labels, axis=1) + total_false_labels = tf.reduce_sum(false_labels, axis=1) + else: + total_true_labels = tf.reduce_sum(true_labels) + total_false_labels = tf.reduce_sum(false_labels) + + update_ops = [] + if ConfusionMatrix.TRUE_POSITIVES in variables_to_update: + variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES] + update_ops.append(variable.assign_add(tp)) + if ConfusionMatrix.FALSE_POSITIVES in variables_to_update: + variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES] + update_ops.append(variable.assign_add(fp)) + if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update: + variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES] + tn = total_false_labels - fp + update_ops.append(variable.assign_add(tn)) + if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update: + variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES] + fn = total_true_labels - tp + update_ops.append(variable.assign_add(fn)) + return tf.group(update_ops) def is_evenly_distributed_thresholds(thresholds): - """Check if the thresholds list is evenly distributed. - - We could leverage evenly distributed thresholds to use less memory when - calculate metrcis like AUC where each individual threshold need to be - evaluated. - - Args: - thresholds: A python list or tuple, or 1D numpy array whose value is ranged - in [0, 1]. - - Returns: - boolean, whether the values in the inputs are evenly distributed. - """ - # Check the list value and see if it is evenly distributed. - num_thresholds = len(thresholds) - if num_thresholds < 3: - return False - even_thresholds = np.arange(num_thresholds, - dtype=np.float32) / (num_thresholds - 1) - return np.allclose(thresholds, even_thresholds, atol=backend.epsilon()) - - -def update_confusion_matrix_variables(variables_to_update, - y_true, - y_pred, - thresholds, - top_k=None, - class_id=None, - sample_weight=None, - multi_label=False, - label_weights=None, - thresholds_distributed_evenly=False): - """Returns op to update the given confusion matrix variables. - - For every pair of values in y_true and y_pred: - - true_positive: y_true == True and y_pred > thresholds - false_negatives: y_true == True and y_pred <= thresholds - true_negatives: y_true == False and y_pred <= thresholds - false_positive: y_true == False and y_pred > thresholds - - The results will be weighted and added together. When multiple thresholds are - provided, we will repeat the same for every threshold. - - For estimation of these metrics over a stream of data, the function creates an - `update_op` operation that updates the given variables. - - If `sample_weight` is `None`, weights default to 1. - Use weights of 0 to mask values. - - Args: - variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys - and corresponding variables to update as values. - y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. - y_pred: A floating point `Tensor` of arbitrary shape and whose values are in - the range `[0, 1]`. - thresholds: A float value, float tensor, python list, or tuple of float - thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). - top_k: Optional int, indicates that the positive labels should be limited to - the top k predictions. - class_id: Optional int, limits the prediction and labels to the class - specified by this argument. - sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as - `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must - be either `1`, or the same as the corresponding `y_true` dimension). - multi_label: Optional boolean indicating whether multidimensional - prediction/labels should be treated as multilabel responses, or flattened - into a single label. When True, the valus of `variables_to_update` must - have a second dimension equal to the number of labels in y_true and - y_pred, and those tensors must not be RaggedTensors. - label_weights: (optional) tensor of non-negative weights for multilabel - data. The weights are applied when calculating TP, FP, FN, and TN without - explicit multilabel handling (i.e. when the data is to be flattened). - thresholds_distributed_evenly: Boolean, whether the thresholds are evenly - distributed within the list. An optimized method will be used if this is - the case. See _update_confusion_matrix_variables_optimized() for more - details. - - Returns: - Update op. - - Raises: - ValueError: If `y_pred` and `y_true` have mismatched shapes, or if - `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if - `variables_to_update` contains invalid keys. - """ - if multi_label and label_weights is not None: - raise ValueError('`label_weights` for multilabel data should be handled ' - 'outside of `update_confusion_matrix_variables` when ' - '`multi_label` is True.') - if variables_to_update is None: - return - if not any( - key for key in variables_to_update if key in list(ConfusionMatrix)): - raise ValueError( - 'Please provide at least one valid confusion matrix ' - 'variable to update. Valid variable key options are: ' - f'"{list(ConfusionMatrix)}". Received: "{variables_to_update.keys()}"') - - variable_dtype = list(variables_to_update.values())[0].dtype - - y_true = tf.cast(y_true, dtype=variable_dtype) - y_pred = tf.cast(y_pred, dtype=variable_dtype) - - if thresholds_distributed_evenly: - # Check whether the thresholds has any leading or tailing epsilon added - # for floating point imprecision. The leading and tailing threshold will be - # handled bit differently as the corner case. - # At this point, thresholds should be a list/array with more than 2 items, - # and ranged between [0, 1]. See is_evenly_distributed_thresholds() for more - # details. - thresholds_with_epsilon = thresholds[0] < 0.0 or thresholds[-1] > 1.0 - - thresholds = tf.convert_to_tensor( - thresholds, dtype=variable_dtype) - num_thresholds = thresholds.shape.as_list()[0] - - if multi_label: - one_thresh = tf.equal( - tf.cast(1, dtype=tf.int32), - tf.rank(thresholds), - name='one_set_of_thresholds_cond') - else: - [y_pred, - y_true], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true], - sample_weight) - one_thresh = tf.cast(True, dtype=tf.bool) - - invalid_keys = [ - key for key in variables_to_update if key not in list(ConfusionMatrix) - ] - if invalid_keys: - raise ValueError( - f'Invalid keys: "{invalid_keys}". ' - f'Valid variable key options are: "{list(ConfusionMatrix)}"') - - if sample_weight is None: - y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( - y_pred, y_true) - else: - sample_weight = tf.cast(sample_weight, dtype=variable_dtype) - y_pred, y_true, sample_weight = ( - losses_utils.squeeze_or_expand_dimensions( - y_pred, y_true, sample_weight=sample_weight)) - y_pred.shape.assert_is_compatible_with(y_true.shape) - - if top_k is not None: - y_pred = _filter_top_k(y_pred, top_k) - if class_id is not None: - y_true = y_true[..., class_id] - y_pred = y_pred[..., class_id] - - if thresholds_distributed_evenly: - return _update_confusion_matrix_variables_optimized( - variables_to_update, y_true, y_pred, thresholds, - multi_label=multi_label, sample_weights=sample_weight, - label_weights=label_weights, - thresholds_with_epsilon=thresholds_with_epsilon) - - pred_shape = tf.shape(y_pred) - num_predictions = pred_shape[0] - if y_pred.shape.ndims == 1: - num_labels = 1 - else: - num_labels = tf.math.reduce_prod(pred_shape[1:], axis=0) - thresh_label_tile = tf.where(one_thresh, num_labels, - tf.ones([], dtype=tf.int32)) - - # Reshape predictions and labels, adding a dim for thresholding. - if multi_label: - predictions_extra_dim = tf.expand_dims(y_pred, 0) - labels_extra_dim = tf.expand_dims( - tf.cast(y_true, dtype=tf.bool), 0) - else: - # Flatten predictions and labels when not multilabel. - predictions_extra_dim = tf.reshape(y_pred, [1, -1]) - labels_extra_dim = tf.reshape( - tf.cast(y_true, dtype=tf.bool), [1, -1]) - - # Tile the thresholds for every prediction. - if multi_label: - thresh_pretile_shape = [num_thresholds, 1, -1] - thresh_tiles = [1, num_predictions, thresh_label_tile] - data_tiles = [num_thresholds, 1, 1] - else: - thresh_pretile_shape = [num_thresholds, -1] - thresh_tiles = [1, num_predictions * num_labels] - data_tiles = [num_thresholds, 1] - - thresh_tiled = tf.tile( - tf.reshape(thresholds, thresh_pretile_shape), - tf.stack(thresh_tiles)) - - # Tile the predictions for every threshold. - preds_tiled = tf.tile(predictions_extra_dim, data_tiles) - - # Compare predictions and threshold. - pred_is_pos = tf.greater(preds_tiled, thresh_tiled) - - # Tile labels by number of thresholds - label_is_pos = tf.tile(labels_extra_dim, data_tiles) - - if sample_weight is not None: - sample_weight = tf.__internal__.ops.broadcast_weights( - tf.cast(sample_weight, dtype=variable_dtype), y_pred) - weights_tiled = tf.tile( - tf.reshape(sample_weight, thresh_tiles), data_tiles) - else: - weights_tiled = None - - if label_weights is not None and not multi_label: - label_weights = tf.expand_dims(label_weights, 0) - label_weights = tf.__internal__.ops.broadcast_weights(label_weights, - y_pred) - label_weights_tiled = tf.tile( - tf.reshape(label_weights, thresh_tiles), data_tiles) - if weights_tiled is None: - weights_tiled = label_weights_tiled + """Check if the thresholds list is evenly distributed. + + We could leverage evenly distributed thresholds to use less memory when + calculate metrcis like AUC where each individual threshold need to be + evaluated. + + Args: + thresholds: A python list or tuple, or 1D numpy array whose value is + ranged in [0, 1]. + + Returns: + boolean, whether the values in the inputs are evenly distributed. + """ + # Check the list value and see if it is evenly distributed. + num_thresholds = len(thresholds) + if num_thresholds < 3: + return False + even_thresholds = np.arange(num_thresholds, dtype=np.float32) / ( + num_thresholds - 1 + ) + return np.allclose(thresholds, even_thresholds, atol=backend.epsilon()) + + +def update_confusion_matrix_variables( + variables_to_update, + y_true, + y_pred, + thresholds, + top_k=None, + class_id=None, + sample_weight=None, + multi_label=False, + label_weights=None, + thresholds_distributed_evenly=False, +): + """Returns op to update the given confusion matrix variables. + + For every pair of values in y_true and y_pred: + + true_positive: y_true == True and y_pred > thresholds + false_negatives: y_true == True and y_pred <= thresholds + true_negatives: y_true == False and y_pred <= thresholds + false_positive: y_true == False and y_pred > thresholds + + The results will be weighted and added together. When multiple thresholds + are provided, we will repeat the same for every threshold. + + For estimation of these metrics over a stream of data, the function creates + an `update_op` operation that updates the given variables. + + If `sample_weight` is `None`, weights default to 1. + Use weights of 0 to mask values. + + Args: + variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys + and corresponding variables to update as values. + y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. + y_pred: A floating point `Tensor` of arbitrary shape and whose values are + in the range `[0, 1]`. + thresholds: A float value, float tensor, python list, or tuple of float + thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). + top_k: Optional int, indicates that the positive labels should be limited + to the top k predictions. + class_id: Optional int, limits the prediction and labels to the class + specified by this argument. + sample_weight: Optional `Tensor` whose rank is either 0, or the same rank + as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions + must be either `1`, or the same as the corresponding `y_true` + dimension). + multi_label: Optional boolean indicating whether multidimensional + prediction/labels should be treated as multilabel responses, or + flattened into a single label. When True, the valus of + `variables_to_update` must have a second dimension equal to the number + of labels in y_true and y_pred, and those tensors must not be + RaggedTensors. + label_weights: (optional) tensor of non-negative weights for multilabel + data. The weights are applied when calculating TP, FP, FN, and TN + without explicit multilabel handling (i.e. when the data is to be + flattened). + thresholds_distributed_evenly: Boolean, whether the thresholds are evenly + distributed within the list. An optimized method will be used if this is + the case. See _update_confusion_matrix_variables_optimized() for more + details. + + Returns: + Update op. + + Raises: + ValueError: If `y_pred` and `y_true` have mismatched shapes, or if + `sample_weight` is not `None` and its shape doesn't match `y_pred`, or + if `variables_to_update` contains invalid keys. + """ + if multi_label and label_weights is not None: + raise ValueError( + "`label_weights` for multilabel data should be handled " + "outside of `update_confusion_matrix_variables` when " + "`multi_label` is True." + ) + if variables_to_update is None: + return + if not any( + key for key in variables_to_update if key in list(ConfusionMatrix) + ): + raise ValueError( + "Please provide at least one valid confusion matrix " + "variable to update. Valid variable key options are: " + f'"{list(ConfusionMatrix)}". ' + f'Received: "{variables_to_update.keys()}"' + ) + + variable_dtype = list(variables_to_update.values())[0].dtype + + y_true = tf.cast(y_true, dtype=variable_dtype) + y_pred = tf.cast(y_pred, dtype=variable_dtype) + + if thresholds_distributed_evenly: + # Check whether the thresholds has any leading or tailing epsilon added + # for floating point imprecision. The leading and tailing threshold will + # be handled bit differently as the corner case. At this point, + # thresholds should be a list/array with more than 2 items, and ranged + # between [0, 1]. See is_evenly_distributed_thresholds() for more + # details. + thresholds_with_epsilon = thresholds[0] < 0.0 or thresholds[-1] > 1.0 + + thresholds = tf.convert_to_tensor(thresholds, dtype=variable_dtype) + num_thresholds = thresholds.shape.as_list()[0] + + if multi_label: + one_thresh = tf.equal( + tf.cast(1, dtype=tf.int32), + tf.rank(thresholds), + name="one_set_of_thresholds_cond", + ) + else: + [y_pred, y_true], _ = ragged_assert_compatible_and_get_flat_values( + [y_pred, y_true], sample_weight + ) + one_thresh = tf.cast(True, dtype=tf.bool) + + invalid_keys = [ + key for key in variables_to_update if key not in list(ConfusionMatrix) + ] + if invalid_keys: + raise ValueError( + f'Invalid keys: "{invalid_keys}". ' + f'Valid variable key options are: "{list(ConfusionMatrix)}"' + ) + + if sample_weight is None: + y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true + ) + else: + sample_weight = tf.cast(sample_weight, dtype=variable_dtype) + ( + y_pred, + y_true, + sample_weight, + ) = losses_utils.squeeze_or_expand_dimensions( + y_pred, y_true, sample_weight=sample_weight + ) + y_pred.shape.assert_is_compatible_with(y_true.shape) + + if top_k is not None: + y_pred = _filter_top_k(y_pred, top_k) + if class_id is not None: + # Preserve dimension to match with sample_weight + y_true = y_true[..., class_id, None] + y_pred = y_pred[..., class_id, None] + + if thresholds_distributed_evenly: + return _update_confusion_matrix_variables_optimized( + variables_to_update, + y_true, + y_pred, + thresholds, + multi_label=multi_label, + sample_weights=sample_weight, + label_weights=label_weights, + thresholds_with_epsilon=thresholds_with_epsilon, + ) + + pred_shape = tf.shape(y_pred) + num_predictions = pred_shape[0] + if y_pred.shape.ndims == 1: + num_labels = 1 else: - weights_tiled = tf.multiply(weights_tiled, label_weights_tiled) + num_labels = tf.math.reduce_prod(pred_shape[1:], axis=0) + thresh_label_tile = tf.where( + one_thresh, num_labels, tf.ones([], dtype=tf.int32) + ) - update_ops = [] + # Reshape predictions and labels, adding a dim for thresholding. + if multi_label: + predictions_extra_dim = tf.expand_dims(y_pred, 0) + labels_extra_dim = tf.expand_dims(tf.cast(y_true, dtype=tf.bool), 0) + else: + # Flatten predictions and labels when not multilabel. + predictions_extra_dim = tf.reshape(y_pred, [1, -1]) + labels_extra_dim = tf.reshape(tf.cast(y_true, dtype=tf.bool), [1, -1]) - def weighted_assign_add(label, pred, weights, var): - label_and_pred = tf.cast( - tf.logical_and(label, pred), dtype=var.dtype) - if weights is not None: - label_and_pred *= tf.cast(weights, dtype=var.dtype) - return var.assign_add(tf.reduce_sum(label_and_pred, 1)) + # Tile the thresholds for every prediction. + if multi_label: + thresh_pretile_shape = [num_thresholds, 1, -1] + thresh_tiles = [1, num_predictions, thresh_label_tile] + data_tiles = [num_thresholds, 1, 1] + else: + thresh_pretile_shape = [num_thresholds, -1] + thresh_tiles = [1, num_predictions * num_labels] + data_tiles = [num_thresholds, 1] + + thresh_tiled = tf.tile( + tf.reshape(thresholds, thresh_pretile_shape), tf.stack(thresh_tiles) + ) + + # Tile the predictions for every threshold. + preds_tiled = tf.tile(predictions_extra_dim, data_tiles) + + # Compare predictions and threshold. + pred_is_pos = tf.greater(preds_tiled, thresh_tiled) + + # Tile labels by number of thresholds + label_is_pos = tf.tile(labels_extra_dim, data_tiles) + + if sample_weight is not None: + sample_weight = tf.__internal__.ops.broadcast_weights( + tf.cast(sample_weight, dtype=variable_dtype), y_pred + ) + weights_tiled = tf.tile( + tf.reshape(sample_weight, thresh_tiles), data_tiles + ) + else: + weights_tiled = None + + if label_weights is not None and not multi_label: + label_weights = tf.expand_dims(label_weights, 0) + label_weights = tf.__internal__.ops.broadcast_weights( + label_weights, y_pred + ) + label_weights_tiled = tf.tile( + tf.reshape(label_weights, thresh_tiles), data_tiles + ) + if weights_tiled is None: + weights_tiled = label_weights_tiled + else: + weights_tiled = tf.multiply(weights_tiled, label_weights_tiled) + + update_ops = [] - loop_vars = { - ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), - } - update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update - update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update - update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update + def weighted_assign_add(label, pred, weights, var): + label_and_pred = tf.cast(tf.logical_and(label, pred), dtype=var.dtype) + if weights is not None: + label_and_pred *= tf.cast(weights, dtype=var.dtype) + return var.assign_add(tf.reduce_sum(label_and_pred, 1)) - if update_fn or update_tn: - pred_is_neg = tf.logical_not(pred_is_pos) - loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) + loop_vars = { + ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), + } + update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update + update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update + update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update - if update_fp or update_tn: - label_is_neg = tf.logical_not(label_is_pos) - loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) - if update_tn: - loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) + if update_fn or update_tn: + pred_is_neg = tf.logical_not(pred_is_pos) + loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) - for matrix_cond, (label, pred) in loop_vars.items(): + if update_fp or update_tn: + label_is_neg = tf.logical_not(label_is_pos) + loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) + if update_tn: + loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = ( + label_is_neg, + pred_is_neg, + ) - if matrix_cond in variables_to_update: - update_ops.append( - weighted_assign_add(label, pred, weights_tiled, - variables_to_update[matrix_cond])) + for matrix_cond, (label, pred) in loop_vars.items(): - return tf.group(update_ops) + if matrix_cond in variables_to_update: + update_ops.append( + weighted_assign_add( + label, pred, weights_tiled, variables_to_update[matrix_cond] + ) + ) + + return tf.group(update_ops) def _filter_top_k(x, k): - """Filters top-k values in the last dim of x and set the rest to NEG_INF. + """Filters top-k values in the last dim of x and set the rest to NEG_INF. - Used for computing top-k prediction values in dense labels (which has the same - shape as predictions) for recall and precision top-k metrics. + Used for computing top-k prediction values in dense labels (which has the + same shape as predictions) for recall and precision top-k metrics. - Args: - x: tensor with any dimensions. - k: the number of values to keep. + Args: + x: tensor with any dimensions. + k: the number of values to keep. - Returns: - tensor with same shape and dtype as x. - """ - _, top_k_idx = tf.math.top_k(x, k, sorted=False) - top_k_mask = tf.reduce_sum( - tf.one_hot(top_k_idx, tf.shape(x)[-1], axis=-1), axis=-2) - return x * top_k_mask + NEG_INF * (1 - top_k_mask) + Returns: + tensor with same shape and dtype as x. + """ + _, top_k_idx = tf.math.top_k(x, k, sorted=False) + top_k_mask = tf.reduce_sum( + tf.one_hot(top_k_idx, tf.shape(x)[-1], axis=-1), axis=-2 + ) + return x * top_k_mask + NEG_INF * (1 - top_k_mask) def ragged_assert_compatible_and_get_flat_values(values, mask=None): - """If ragged, it checks the compatibility and then returns the flat_values. - - Note: If two tensors are dense, it does not check their compatibility. - Note: Although two ragged tensors with different ragged ranks could have - identical overall rank and dimension sizes and hence be compatible, - we do not support those cases. - Args: - values: A list of potentially ragged tensor of the same ragged_rank. - mask: A potentially ragged tensor of the same ragged_rank as elements in - Values. - - Returns: - A tuple in which the first element is the list of tensors and the second - is the mask tensor. ([Values], mask). Mask and the element in Values - are equal to the flat_values of the input arguments (if they were ragged). - """ - if isinstance(values, list): - is_all_ragged = \ - all(isinstance(rt, tf.RaggedTensor) for rt in values) - is_any_ragged = \ - any(isinstance(rt, tf.RaggedTensor) for rt in values) - else: - is_all_ragged = isinstance(values, tf.RaggedTensor) - is_any_ragged = is_all_ragged - if (is_all_ragged and - ((mask is None) or isinstance(mask, tf.RaggedTensor))): - to_be_stripped = False - if not isinstance(values, list): - values = [values] - to_be_stripped = True - - # NOTE: we leave the flat_values compatibility to - # tf.TensorShape `assert_is_compatible_with` - # check if both dynamic dimensions are equal and then use the flat_values. - nested_row_split_list = [rt.nested_row_splits for rt in values] - assertion_list = _assert_splits_match(nested_row_split_list) - - # if both are ragged sample_weights also should be ragged with same dims. - if isinstance(mask, tf.RaggedTensor): - assertion_list_for_mask = _assert_splits_match( - [nested_row_split_list[0], mask.nested_row_splits]) - with tf.control_dependencies(assertion_list_for_mask): - mask = tf.expand_dims(mask.flat_values, -1) - - # values has at least 1 element. - flat_values = [] - for value in values: - with tf.control_dependencies(assertion_list): - flat_values.append(tf.expand_dims(value.flat_values, -1)) - - values = flat_values[0] if to_be_stripped else flat_values - - elif is_any_ragged: - raise TypeError('Some of the inputs are not tf.RaggedTensor. ' - f'Input received: {values}') - # values are empty or value are not ragged and mask is ragged. - elif isinstance(mask, tf.RaggedTensor): - raise TypeError('Ragged mask is not allowed with non-ragged inputs. ' - f'Input received: {values}, mask received: {mask}') - - return values, mask + """If ragged, it checks the compatibility and then returns the flat_values. + + Note: If two tensors are dense, it does not check their compatibility. + Note: Although two ragged tensors with different ragged ranks could have + identical overall rank and dimension sizes and hence be compatible, + we do not support those cases. + Args: + values: A list of potentially ragged tensor of the same ragged_rank. + mask: A potentially ragged tensor of the same ragged_rank as elements in + Values. + + Returns: + A tuple in which the first element is the list of tensors and the second + is the mask tensor. ([Values], mask). Mask and the element in Values + are equal to the flat_values of the input arguments (if they were + ragged). + """ + if isinstance(values, list): + is_all_ragged = all(isinstance(rt, tf.RaggedTensor) for rt in values) + is_any_ragged = any(isinstance(rt, tf.RaggedTensor) for rt in values) + else: + is_all_ragged = isinstance(values, tf.RaggedTensor) + is_any_ragged = is_all_ragged + if is_all_ragged and ((mask is None) or isinstance(mask, tf.RaggedTensor)): + to_be_stripped = False + if not isinstance(values, list): + values = [values] + to_be_stripped = True + + # NOTE: we leave the flat_values compatibility to + # tf.TensorShape `assert_is_compatible_with` check if both dynamic + # dimensions are equal and then use the flat_values. + nested_row_split_list = [rt.nested_row_splits for rt in values] + assertion_list = _assert_splits_match(nested_row_split_list) + + # if both are ragged sample_weights also should be ragged with same + # dims. + if isinstance(mask, tf.RaggedTensor): + assertion_list_for_mask = _assert_splits_match( + [nested_row_split_list[0], mask.nested_row_splits] + ) + with tf.control_dependencies(assertion_list_for_mask): + mask = tf.expand_dims(mask.flat_values, -1) + + # values has at least 1 element. + flat_values = [] + for value in values: + with tf.control_dependencies(assertion_list): + flat_values.append(tf.expand_dims(value.flat_values, -1)) + + values = flat_values[0] if to_be_stripped else flat_values + + elif is_any_ragged: + raise TypeError( + "Some of the inputs are not tf.RaggedTensor. " + f"Input received: {values}" + ) + # values are empty or value are not ragged and mask is ragged. + elif isinstance(mask, tf.RaggedTensor): + raise TypeError( + "Ragged mask is not allowed with non-ragged inputs. " + f"Input received: {values}, mask received: {mask}" + ) + + return values, mask def _assert_splits_match(nested_splits_lists): - """Checks that the given splits lists are identical. - - Performs static tests to ensure that the given splits lists are identical, - and returns a list of control dependency op tensors that check that they are - fully identical. - - Args: - nested_splits_lists: A list of nested_splits_lists, where each split_list is - a list of `splits` tensors from a `RaggedTensor`, ordered from outermost - ragged dimension to innermost ragged dimension. - - Returns: - A list of control dependency op tensors. - Raises: - ValueError: If the splits are not identical. - """ - error_msg = ('Inputs must have identical ragged splits. ' - f'Input received: {nested_splits_lists}') - for splits_list in nested_splits_lists: - if len(splits_list) != len(nested_splits_lists[0]): - raise ValueError(error_msg) - return [ - tf.debugging.assert_equal(s1, s2, message=error_msg) # pylint: disable=g-complex-comprehension - for splits_list in nested_splits_lists[1:] - for (s1, s2) in zip(nested_splits_lists[0], splits_list) - ] + """Checks that the given splits lists are identical. + + Performs static tests to ensure that the given splits lists are identical, + and returns a list of control dependency op tensors that check that they are + fully identical. + + Args: + nested_splits_lists: A list of nested_splits_lists, where each split_list + is a list of `splits` tensors from a `RaggedTensor`, ordered from + outermost ragged dimension to innermost ragged dimension. + + Returns: + A list of control dependency op tensors. + Raises: + ValueError: If the splits are not identical. + """ + error_msg = ( + "Inputs must have identical ragged splits. " + f"Input received: {nested_splits_lists}" + ) + for splits_list in nested_splits_lists: + if len(splits_list) != len(nested_splits_lists[0]): + raise ValueError(error_msg) + return [ + tf.debugging.assert_equal(s1, s2, message=error_msg) + for splits_list in nested_splits_lists[1:] + for (s1, s2) in zip(nested_splits_lists[0], splits_list) + ] def binary_matches(y_true, y_pred, threshold=0.5): - """Creates int Tensor, 1 for label-prediction match, 0 for mismatch. + """Creates int Tensor, 1 for label-prediction match, 0 for mismatch. - Args: - y_true: Ground truth values, of shape (batch_size, d0, .. dN). - y_pred: The predicted values, of shape (batch_size, d0, .. dN). - threshold: (Optional) Float representing the threshold for deciding whether - prediction values are 1 or 0. + Args: + y_true: Ground truth values, of shape (batch_size, d0, .. dN). + y_pred: The predicted values, of shape (batch_size, d0, .. dN). + threshold: (Optional) Float representing the threshold for deciding + whether prediction values are 1 or 0. - Returns: - Binary matches, of shape (batch_size, d0, .. dN). - """ - y_pred = tf.convert_to_tensor(y_pred) - threshold = tf.cast(threshold, y_pred.dtype) - y_pred = tf.cast(y_pred > threshold, y_pred.dtype) - return tf.cast(tf.equal(y_true, y_pred), backend.floatx()) + Returns: + Binary matches, of shape (batch_size, d0, .. dN). + """ + y_pred = tf.convert_to_tensor(y_pred) + threshold = tf.cast(threshold, y_pred.dtype) + y_pred = tf.cast(y_pred > threshold, y_pred.dtype) + return tf.cast(tf.equal(y_true, y_pred), backend.floatx()) def sparse_categorical_matches(y_true, y_pred): - """Creates float Tensor, 1.0 for label-prediction match, 0.0 for mismatch. - - You can provide logits of classes as `y_pred`, since argmax of - logits and probabilities are same. - - Args: - y_true: Integer ground truth values. - y_pred: The prediction values. - - Returns: - Match tensor: 1.0 for label-prediction match, 0.0 for mismatch. - """ - reshape_matches = False - y_pred = tf.convert_to_tensor(y_pred) - y_true = tf.convert_to_tensor(y_true) - y_true_org_shape = tf.shape(y_true) - y_pred_rank = y_pred.shape.ndims - y_true_rank = y_true.shape.ndims - - # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) - if (y_true_rank is not None) and (y_pred_rank is not None) and (len( - backend.int_shape(y_true)) == len(backend.int_shape(y_pred))): - y_true = tf.squeeze(y_true, [-1]) - reshape_matches = True - y_pred = tf.math.argmax(y_pred, axis=-1) - - # If the predicted output and actual output types don't match, force cast them - # to match. - if backend.dtype(y_pred) != backend.dtype(y_true): - y_pred = tf.cast(y_pred, backend.dtype(y_true)) - matches = tf.cast(tf.equal(y_true, y_pred), backend.floatx()) - if reshape_matches: - matches = tf.reshape(matches, shape=y_true_org_shape) - return matches + """Creates float Tensor, 1.0 for label-prediction match, 0.0 for mismatch. + + You can provide logits of classes as `y_pred`, since argmax of + logits and probabilities are same. + + Args: + y_true: Integer ground truth values. + y_pred: The prediction values. + + Returns: + Match tensor: 1.0 for label-prediction match, 0.0 for mismatch. + """ + reshape_matches = False + y_pred = tf.convert_to_tensor(y_pred) + y_true = tf.convert_to_tensor(y_true) + y_true_org_shape = tf.shape(y_true) + y_pred_rank = y_pred.shape.ndims + y_true_rank = y_true.shape.ndims + + # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) + if ( + (y_true_rank is not None) + and (y_pred_rank is not None) + and (len(backend.int_shape(y_true)) == len(backend.int_shape(y_pred))) + ): + y_true = tf.squeeze(y_true, [-1]) + reshape_matches = True + y_pred = tf.math.argmax(y_pred, axis=-1) + + # If the predicted output and actual output types don't match, force cast + # them to match. + if backend.dtype(y_pred) != backend.dtype(y_true): + y_pred = tf.cast(y_pred, backend.dtype(y_true)) + matches = tf.cast(tf.equal(y_true, y_pred), backend.floatx()) + if reshape_matches: + matches = tf.reshape(matches, shape=y_true_org_shape) + return matches def sparse_top_k_categorical_matches(y_true, y_pred, k=5): - """Creates float Tensor, 1.0 for label-TopK_prediction match, 0.0 for mismatch. - - Args: - y_true: tensor of true targets. - y_pred: tensor of predicted targets. - k: (Optional) Number of top elements to look at for computing accuracy. - Defaults to 5. - - Returns: - Match tensor: 1.0 for label-prediction match, 0.0 for mismatch. - """ - reshape_matches = False - y_true = tf.convert_to_tensor(y_true) - y_pred = tf.convert_to_tensor(y_pred) - y_true_rank = y_true.shape.ndims - y_pred_rank = y_pred.shape.ndims - y_true_org_shape = tf.shape(y_true) - - # Flatten y_pred to (batch_size, num_samples) and y_true to (num_samples,) - if (y_true_rank is not None) and (y_pred_rank is not None): - if y_pred_rank > 2: - y_pred = tf.reshape(y_pred, [-1, y_pred.shape[-1]]) - if y_true_rank > 1: - reshape_matches = True - y_true = tf.reshape(y_true, [-1]) - - matches = tf.cast( - tf.math.in_top_k( - predictions=y_pred, targets=tf.cast(y_true, 'int32'), k=k), - dtype=backend.floatx()) - - # returned matches is expected to have same shape as y_true input - if reshape_matches: - return tf.reshape(matches, shape=y_true_org_shape) - - return matches + """Creates float Tensor, 1.0 for label-TopK_prediction match, 0.0 for + mismatch. + + Args: + y_true: tensor of true targets. + y_pred: tensor of predicted targets. + k: (Optional) Number of top elements to look at for computing accuracy. + Defaults to `5`. + + Returns: + Match tensor: 1.0 for label-prediction match, 0.0 for mismatch. + """ + reshape_matches = False + y_true = tf.convert_to_tensor(y_true) + y_pred = tf.convert_to_tensor(y_pred) + y_true_rank = y_true.shape.ndims + y_pred_rank = y_pred.shape.ndims + y_true_org_shape = tf.shape(y_true) + + # Flatten y_pred to (batch_size, num_samples) and y_true to (num_samples,) + if (y_true_rank is not None) and (y_pred_rank is not None): + if y_pred_rank > 2: + y_pred = tf.reshape(y_pred, [-1, y_pred.shape[-1]]) + if y_true_rank > 1: + reshape_matches = True + y_true = tf.reshape(y_true, [-1]) + + matches = tf.cast( + tf.math.in_top_k( + predictions=y_pred, targets=tf.cast(y_true, "int32"), k=k + ), + dtype=backend.floatx(), + ) + + # returned matches is expected to have same shape as y_true input + if reshape_matches: + return tf.reshape(matches, shape=y_true_org_shape) + + return matches diff --git a/keras/utils/metrics_utils_test.py b/keras/utils/metrics_utils_test.py index 42284a06a953..e099781b4fb7 100644 --- a/keras/utils/metrics_utils_test.py +++ b/keras/utils/metrics_utils_test.py @@ -14,422 +14,535 @@ # ============================================================================== """Tests for metrics_utils.""" +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized from keras import backend from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils from keras.utils import metrics_utils -import numpy as np -import tensorflow.compat.v2 as tf - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class RaggedSizeOpTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - { - 'x_list': [1], - 'y_list': [2] - }, - { - 'x_list': [1, 2], - 'y_list': [2, 3] - }, - { - 'x_list': [1, 2, 4], - 'y_list': [2, 3, 5] - }, - { - 'x_list': [[1, 2], [3, 4]], - 'y_list': [[2, 3], [5, 6]] - }, - ]) - def test_passing_dense_tensors(self, x_list, y_list): - x = tf.constant(x_list) - y = tf.constant(y_list) - [x, - y], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x, y]) - x.shape.assert_is_compatible_with(y.shape) - - @parameterized.parameters([ - { - 'x_list': [1], - }, - { - 'x_list': [1, 2], - }, - { - 'x_list': [1, 2, 4], - }, - { - 'x_list': [[1, 2], [3, 4]], - }, - ]) - def test_passing_one_dense_tensor(self, x_list): - x = tf.constant(x_list) - [x], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x]) - - @parameterized.parameters([ - { - 'x_list': [1], - 'y_list': [2] - }, - { - 'x_list': [1, 2], - 'y_list': [2, 3] - }, - { - 'x_list': [1, 2, 4], - 'y_list': [2, 3, 5] - }, - { - 'x_list': [[1, 2], [3, 4]], - 'y_list': [[2, 3], [5, 6]] - }, - { - 'x_list': [[1, 2], [3, 4], [1]], - 'y_list': [[2, 3], [5, 6], [3]] - }, - { - 'x_list': [[1, 2], [], [1]], - 'y_list': [[2, 3], [], [3]] - }, - ]) - def test_passing_both_ragged(self, x_list, y_list): - x = tf.ragged.constant(x_list) - y = tf.ragged.constant(y_list) - [x, - y], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x, y]) - x.shape.assert_is_compatible_with(y.shape) - - @parameterized.parameters([ - { - 'x_list': [1], - }, - { - 'x_list': [1, 2], - }, - { - 'x_list': [1, 2, 4], - }, - { - 'x_list': [[1, 2], [3, 4]], - }, - { - 'x_list': [[1, 2], [3, 4], [1]], - }, - { - 'x_list': [[1, 2], [], [1]], - }, - ]) - def test_passing_one_ragged(self, x_list): - x = tf.ragged.constant(x_list) - [x], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x]) - - @parameterized.parameters([ - { - 'x_list': [1], - 'y_list': [2], - 'mask_list': [0] - }, - { - 'x_list': [1, 2], - 'y_list': [2, 3], - 'mask_list': [0, 1] - }, - { - 'x_list': [1, 2, 4], - 'y_list': [2, 3, 5], - 'mask_list': [1, 1, 1] - }, - { - 'x_list': [[1, 2], [3, 4]], - 'y_list': [[2, 3], [5, 6]], - 'mask_list': [[1, 1], [0, 1]] - }, - { - 'x_list': [[1, 2], [3, 4], [1]], - 'y_list': [[2, 3], [5, 6], [3]], - 'mask_list': [[1, 1], [0, 0], [1]] - }, - { - 'x_list': [[1, 2], [], [1]], - 'y_list': [[2, 3], [], [3]], - 'mask_list': [[1, 1], [], [0]] - }, - ]) - def test_passing_both_ragged_with_mask(self, x_list, y_list, mask_list): - x = tf.ragged.constant(x_list) - y = tf.ragged.constant(y_list) - mask = tf.ragged.constant(mask_list) - [x, y], mask = \ - metrics_utils.ragged_assert_compatible_and_get_flat_values([x, y], mask) - x.shape.assert_is_compatible_with(y.shape) - y.shape.assert_is_compatible_with(mask.shape) - - @parameterized.parameters([ - { - 'x_list': [1], - 'mask_list': [0] - }, - { - 'x_list': [1, 2], - 'mask_list': [0, 1] - }, - { - 'x_list': [1, 2, 4], - 'mask_list': [1, 1, 1] - }, - { - 'x_list': [[1, 2], [3, 4]], - 'mask_list': [[1, 1], [0, 1]] - }, - { - 'x_list': [[1, 2], [3, 4], [1]], - 'mask_list': [[1, 1], [0, 0], [1]] - }, - { - 'x_list': [[1, 2], [], [1]], - 'mask_list': [[1, 1], [], [0]] - }, - ]) - def test_passing_one_ragged_with_mask(self, x_list, mask_list): - x = tf.ragged.constant(x_list) - mask = tf.ragged.constant(mask_list) - [x], mask = \ - metrics_utils.ragged_assert_compatible_and_get_flat_values([x], mask) - x.shape.assert_is_compatible_with(mask.shape) - - @parameterized.parameters([ - { - 'x_list': [[[1, 3]]], - 'y_list': [[2, 3]] - }, - ]) - def test_failing_different_ragged_and_dense_ranks(self, x_list, y_list): - x = tf.ragged.constant(x_list) - y = tf.ragged.constant(y_list) - with self.assertRaises(ValueError): # pylint: disable=g-error-prone-assert-raises - [x, y - ], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x, y]) - - @parameterized.parameters([ - { - 'x_list': [[[1, 3]]], - 'y_list': [[[2, 3]]], - 'mask_list': [[0, 1]] - }, - ]) - def test_failing_different_mask_ranks(self, x_list, y_list, mask_list): - x = tf.ragged.constant(x_list) - y = tf.ragged.constant(y_list) - mask = tf.ragged.constant(mask_list) - with self.assertRaises(ValueError): # pylint: disable=g-error-prone-assert-raises - [x, y - ], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x, y], - mask) - - # we do not support such cases that ragged_ranks are different but overall - # dimension shapes and sizes are identical due to adding too much performance - # overheads to the overall use cases. - def test_failing_different_ragged_ranks(self): - dt = tf.constant([[[1, 2]]]) - # adding a ragged dimension - x = tf.RaggedTensor.from_row_splits(dt, row_splits=[0, 1]) - y = tf.ragged.constant([[[[1, 2]]]]) - with self.assertRaises(ValueError): # pylint: disable=g-error-prone-assert-raises - [x, y], _ = \ - metrics_utils.ragged_assert_compatible_and_get_flat_values([x, y]) - - -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) + @parameterized.parameters( + [ + {"x_list": [1], "y_list": [2]}, + {"x_list": [1, 2], "y_list": [2, 3]}, + {"x_list": [1, 2, 4], "y_list": [2, 3, 5]}, + {"x_list": [[1, 2], [3, 4]], "y_list": [[2, 3], [5, 6]]}, + ] + ) + def test_passing_dense_tensors(self, x_list, y_list): + x = tf.constant(x_list) + y = tf.constant(y_list) + [x, y], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x, y] + ) + x.shape.assert_is_compatible_with(y.shape) + + @parameterized.parameters( + [ + { + "x_list": [1], + }, + { + "x_list": [1, 2], + }, + { + "x_list": [1, 2, 4], + }, + { + "x_list": [[1, 2], [3, 4]], + }, + ] + ) + def test_passing_one_dense_tensor(self, x_list): + x = tf.constant(x_list) + [x], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x]) + + @parameterized.parameters( + [ + {"x_list": [1], "y_list": [2]}, + {"x_list": [1, 2], "y_list": [2, 3]}, + {"x_list": [1, 2, 4], "y_list": [2, 3, 5]}, + {"x_list": [[1, 2], [3, 4]], "y_list": [[2, 3], [5, 6]]}, + {"x_list": [[1, 2], [3, 4], [1]], "y_list": [[2, 3], [5, 6], [3]]}, + {"x_list": [[1, 2], [], [1]], "y_list": [[2, 3], [], [3]]}, + ] + ) + def test_passing_both_ragged(self, x_list, y_list): + x = tf.ragged.constant(x_list) + y = tf.ragged.constant(y_list) + [x, y], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x, y] + ) + x.shape.assert_is_compatible_with(y.shape) + + @parameterized.parameters( + [ + { + "x_list": [1], + }, + { + "x_list": [1, 2], + }, + { + "x_list": [1, 2, 4], + }, + { + "x_list": [[1, 2], [3, 4]], + }, + { + "x_list": [[1, 2], [3, 4], [1]], + }, + { + "x_list": [[1, 2], [], [1]], + }, + ] + ) + def test_passing_one_ragged(self, x_list): + x = tf.ragged.constant(x_list) + [x], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values([x]) + + @parameterized.parameters( + [ + {"x_list": [1], "y_list": [2], "mask_list": [0]}, + {"x_list": [1, 2], "y_list": [2, 3], "mask_list": [0, 1]}, + {"x_list": [1, 2, 4], "y_list": [2, 3, 5], "mask_list": [1, 1, 1]}, + { + "x_list": [[1, 2], [3, 4]], + "y_list": [[2, 3], [5, 6]], + "mask_list": [[1, 1], [0, 1]], + }, + { + "x_list": [[1, 2], [3, 4], [1]], + "y_list": [[2, 3], [5, 6], [3]], + "mask_list": [[1, 1], [0, 0], [1]], + }, + { + "x_list": [[1, 2], [], [1]], + "y_list": [[2, 3], [], [3]], + "mask_list": [[1, 1], [], [0]], + }, + ] + ) + def test_passing_both_ragged_with_mask(self, x_list, y_list, mask_list): + x = tf.ragged.constant(x_list) + y = tf.ragged.constant(y_list) + mask = tf.ragged.constant(mask_list) + [ + x, + y, + ], mask = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x, y], mask + ) + x.shape.assert_is_compatible_with(y.shape) + y.shape.assert_is_compatible_with(mask.shape) + + @parameterized.parameters( + [ + {"x_list": [1], "mask_list": [0]}, + {"x_list": [1, 2], "mask_list": [0, 1]}, + {"x_list": [1, 2, 4], "mask_list": [1, 1, 1]}, + {"x_list": [[1, 2], [3, 4]], "mask_list": [[1, 1], [0, 1]]}, + { + "x_list": [[1, 2], [3, 4], [1]], + "mask_list": [[1, 1], [0, 0], [1]], + }, + {"x_list": [[1, 2], [], [1]], "mask_list": [[1, 1], [], [0]]}, + ] + ) + def test_passing_one_ragged_with_mask(self, x_list, mask_list): + x = tf.ragged.constant(x_list) + mask = tf.ragged.constant(mask_list) + [x], mask = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x], mask + ) + x.shape.assert_is_compatible_with(mask.shape) + + @parameterized.parameters( + [ + {"x_list": [[[1, 3]]], "y_list": [[2, 3]]}, + ] + ) + def test_failing_different_ragged_and_dense_ranks(self, x_list, y_list): + x = tf.ragged.constant(x_list) + y = tf.ragged.constant(y_list) + with self.assertRaises(ValueError): + [ + x, + y, + ], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x, y] + ) + + @parameterized.parameters( + [ + {"x_list": [[[1, 3]]], "y_list": [[[2, 3]]], "mask_list": [[0, 1]]}, + ] + ) + def test_failing_different_mask_ranks(self, x_list, y_list, mask_list): + x = tf.ragged.constant(x_list) + y = tf.ragged.constant(y_list) + mask = tf.ragged.constant(mask_list) + with self.assertRaises(ValueError): + [ + x, + y, + ], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x, y], mask + ) + + # we do not support such cases that ragged_ranks are different but overall + # dimension shapes and sizes are identical due to adding too much + # performance overheads to the overall use cases. + def test_failing_different_ragged_ranks(self): + dt = tf.constant([[[1, 2]]]) + # adding a ragged dimension + x = tf.RaggedTensor.from_row_splits(dt, row_splits=[0, 1]) + y = tf.ragged.constant([[[[1, 2]]]]) + with self.assertRaises(ValueError): + [ + x, + y, + ], _ = metrics_utils.ragged_assert_compatible_and_get_flat_values( + [x, y] + ) + + +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class FilterTopKTest(tf.test.TestCase, parameterized.TestCase): - - def test_one_dimensional(self): - x = tf.constant([.3, .1, .2, -.5, 42.]) - top_1 = self.evaluate(metrics_utils._filter_top_k(x=x, k=1)) - top_2 = self.evaluate(metrics_utils._filter_top_k(x=x, k=2)) - top_3 = self.evaluate(metrics_utils._filter_top_k(x=x, k=3)) - - self.assertAllClose(top_1, [ - metrics_utils.NEG_INF, metrics_utils.NEG_INF, metrics_utils.NEG_INF, - metrics_utils.NEG_INF, 42. - ]) - self.assertAllClose(top_2, [ - .3, metrics_utils.NEG_INF, metrics_utils.NEG_INF, metrics_utils.NEG_INF, - 42. - ]) - self.assertAllClose( - top_3, [.3, metrics_utils.NEG_INF, .2, metrics_utils.NEG_INF, 42.]) - - def test_three_dimensional(self): - x = tf.constant([[[.3, .1, .2], [-.3, -.2, -.1]], - [[5., .2, 42.], [-.3, -.6, -.99]]]) - top_2 = self.evaluate(metrics_utils._filter_top_k(x=x, k=2)) - - self.assertAllClose( - top_2, - [[[.3, metrics_utils.NEG_INF, .2], [metrics_utils.NEG_INF, -.2, -.1]], - [[5., metrics_utils.NEG_INF, 42.], [-.3, -.6, metrics_utils.NEG_INF]]]) - - def test_handles_dynamic_shapes(self): - # See b/150281686. # GOOGLE_INTERNAL - - def _identity(x): - return x - - def _filter_top_k(x): - # This loses the static shape. - x = tf.numpy_function(_identity, (x,), tf.float32) - - return metrics_utils._filter_top_k(x=x, k=2) - - x = tf.constant([.3, .1, .2, -.5, 42.]) - top_2 = self.evaluate(_filter_top_k(x)) - self.assertAllClose(top_2, [ - .3, metrics_utils.NEG_INF, metrics_utils.NEG_INF, metrics_utils.NEG_INF, - 42. - ]) + def test_one_dimensional(self): + x = tf.constant([0.3, 0.1, 0.2, -0.5, 42.0]) + top_1 = self.evaluate(metrics_utils._filter_top_k(x=x, k=1)) + top_2 = self.evaluate(metrics_utils._filter_top_k(x=x, k=2)) + top_3 = self.evaluate(metrics_utils._filter_top_k(x=x, k=3)) + + self.assertAllClose( + top_1, + [ + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + 42.0, + ], + ) + self.assertAllClose( + top_2, + [ + 0.3, + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + 42.0, + ], + ) + self.assertAllClose( + top_3, + [0.3, metrics_utils.NEG_INF, 0.2, metrics_utils.NEG_INF, 42.0], + ) + + def test_three_dimensional(self): + x = tf.constant( + [ + [[0.3, 0.1, 0.2], [-0.3, -0.2, -0.1]], + [[5.0, 0.2, 42.0], [-0.3, -0.6, -0.99]], + ] + ) + top_2 = self.evaluate(metrics_utils._filter_top_k(x=x, k=2)) + + self.assertAllClose( + top_2, + [ + [ + [0.3, metrics_utils.NEG_INF, 0.2], + [metrics_utils.NEG_INF, -0.2, -0.1], + ], + [ + [5.0, metrics_utils.NEG_INF, 42.0], + [-0.3, -0.6, metrics_utils.NEG_INF], + ], + ], + ) + + def test_handles_dynamic_shapes(self): + # See b/150281686. # GOOGLE_INTERNAL + + def _identity(x): + return x + + def _filter_top_k(x): + # This loses the static shape. + x = tf.numpy_function(_identity, (x,), tf.float32) + + return metrics_utils._filter_top_k(x=x, k=2) + + x = tf.constant([0.3, 0.1, 0.2, -0.5, 42.0]) + top_2 = self.evaluate(_filter_top_k(x)) + self.assertAllClose( + top_2, + [ + 0.3, + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + metrics_utils.NEG_INF, + 42.0, + ], + ) class MatchesMethodsTest(tf.test.TestCase, parameterized.TestCase): - - def test_sparse_categorical_matches(self): - matches_method = metrics_utils.sparse_categorical_matches - - # Test return tensor is type float - y_true = tf.constant(np.random.randint(0, 7, (6,))) - y_pred = tf.constant(np.random.random((6, 7))) - self.assertEqual(matches_method(y_true, y_pred).dtype, backend.floatx()) - - # Tests that resulting Tensor always has same shape as y_true. Tests from - # 1 dim to 4 dims - dims = [] - for _ in range(4): - dims.append(np.random.randint(1, 7)) - y_true = tf.constant(np.random.randint(0, 7, dims)) - y_pred = tf.constant(np.random.random(dims + [3])) - self.assertEqual( - matches_method(y_true, y_pred).shape, y_true.shape) - - # Test correctness if the shape of y_true is (num_samples,) - y_true = tf.constant([1., 0., 0., 0.]) - y_pred = tf.constant([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) - self.assertAllEqual( - matches_method(y_true, y_pred), [0., 1., 1., 1.]) - - # Test correctness if the shape of y_true is (num_samples, 1) - y_true = tf.constant([[1.], [0.], [0.], [0.]]) - y_pred = tf.constant([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) - self.assertAllEqual( - matches_method(y_true, y_pred), [[0.], [1.], [1.], [1.]]) - - # Test correctness if the shape of y_true is (batch_size, seq_length) and - # y_pred is (batch_size, seq_length, num_classes) - y_pred = tf.constant([[[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], - [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]]]) - y_true = tf.constant([[1, 0], [1, 0]]) - self.assertAllEqual( - matches_method(y_true, y_pred), [[1., 0.], [0., 1.]]) - - def test_sparse_top_k_categorical_matches(self): - matches_method = metrics_utils.sparse_top_k_categorical_matches - - # Test return tensor is type float - y_true = tf.constant(np.random.randint(0, 7, (6,))) - y_pred = tf.constant(np.random.random((6, 7)), dtype=tf.float32) - self.assertEqual( - matches_method(y_true, y_pred, 1).dtype, backend.floatx()) - - # Tests that resulting Tensor always has same shape as y_true. Tests from - # 1 dim to 4 dims - dims = [] - for _ in range(4): - dims.append(np.random.randint(1, 7)) - y_true = tf.constant(np.random.randint(0, 7, dims)) - y_pred = tf.constant(np.random.random(dims + [3]), dtype=tf.float32) - self.assertEqual( - matches_method(y_true, y_pred, 1).shape, y_true.shape) - - # Test correctness if the shape of y_true is (num_samples,) for k = 1,2,3 - y_true = tf.constant([1., 0., 0., 0.]) - y_pred = tf.constant([[0.7, 0.2, 0.1], [0.5, 0.3, 0.2], [0.6, 0.3, 0.1], - [0.0, 0.1, 0.9]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 1), [0., 1., 1., 0.]) - self.assertAllEqual( - matches_method(y_true, y_pred, 2), [1., 1., 1., 0.]) - self.assertAllEqual( - matches_method(y_true, y_pred, 3), [1., 1., 1., 1.]) - - # Test correctness if the shape of y_true is (num_samples, 1) - # for k = 1,2,3 - y_true = tf.constant([[1.], [0.], [0.], [0.]]) - y_pred = tf.constant([[0.7, 0.2, 0.1], [0.5, 0.3, 0.2], [0.6, 0.3, 0.1], - [0.0, 0.1, 0.9]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 1), - [[0.], [1.], [1.], [0.]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 2), - [[1.], [1.], [1.], [0.]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 3), - [[1.], [1.], [1.], [1.]]) - - # Test correctness if the shape of y_true is (batch_size, seq_length) and - # y_pred is (batch_size, seq_length, num_classes) for k = 1,2,3 - y_pred = tf.constant([[[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], - [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]]]) - y_true = tf.constant([[1, 0], [1, 0]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 1), [[1., 0.], [0., 1.]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 2), [[1., 0.], [1., 1.]]) - self.assertAllEqual( - matches_method(y_true, y_pred, 3), [[1., 1.], [1., 1.]]) - - def test_binary_matches(self): - matches_method = metrics_utils.binary_matches - - # Test return tensor is type float - y_true = tf.constant(np.random.random((6, 7))) - y_pred = tf.constant(np.random.random((6, 7))) - self.assertEqual( - matches_method(y_true, y_pred, .5).dtype, - backend.floatx()) - - # Tests that resulting Tensor always has same shape as y_true. Tests from - # 1 dim to 4 dims. - dims = [] - for _ in range(4): - dims.append(np.random.randint(1, 7)) - y_true = y_pred = tf.constant(np.random.random(dims)) - self.assertEqual( - matches_method(y_true, y_pred, 0.).shape, y_true.shape) - - # Testing for correctness shape (num_samples, 1) - y_true = tf.constant([[1.], [0.], [1.], [1.]]) - y_pred = tf.constant([[.75], [.2], [.2], [.75]]) - self.assertAllEqual( - matches_method(y_true, y_pred, .5), - [[1.], [1.], [0.], [1.]]) - - # Testing for correctness shape (num_samples,) - y_true = tf.constant([1., 0., 1., 1.]) - y_pred = tf.constant([.75, .2, .2, .75]) - self.assertAllEqual( - matches_method(y_true, y_pred, .5), [1., 1., 0., 1.]) - - # Testing for correctness batches of sequences - # shape (num_samples, seq_len) - y_true = tf.constant([[1., 0.], [0., 1.], [1., 0.], [1., 0.]]) - y_pred = tf.constant([[.75, .2], [.2, .75], [.2, .75], [.75, .2]]) - self.assertAllEqual( - matches_method(y_true, y_pred, .5), - [[1., 1.], [1., 1.], [0., 0.], [1., 1.]]) - - -if __name__ == '__main__': - tf.test.main() + def test_sparse_categorical_matches(self): + matches_method = metrics_utils.sparse_categorical_matches + + # Test return tensor is type float + y_true = tf.constant(np.random.randint(0, 7, (6,))) + y_pred = tf.constant(np.random.random((6, 7))) + self.assertEqual(matches_method(y_true, y_pred).dtype, backend.floatx()) + + # Tests that resulting Tensor always has same shape as y_true. Tests + # from 1 dim to 4 dims + dims = [] + for _ in range(4): + dims.append(np.random.randint(1, 7)) + y_true = tf.constant(np.random.randint(0, 7, dims)) + y_pred = tf.constant(np.random.random(dims + [3])) + self.assertEqual(matches_method(y_true, y_pred).shape, y_true.shape) + + # Test correctness if the shape of y_true is (num_samples,) + y_true = tf.constant([1.0, 0.0, 0.0, 0.0]) + y_pred = tf.constant([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) + self.assertAllEqual( + matches_method(y_true, y_pred), [0.0, 1.0, 1.0, 1.0] + ) + + # Test correctness if the shape of y_true is (num_samples, 1) + y_true = tf.constant([[1.0], [0.0], [0.0], [0.0]]) + y_pred = tf.constant([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) + self.assertAllEqual( + matches_method(y_true, y_pred), [[0.0], [1.0], [1.0], [1.0]] + ) + + # Test correctness if the shape of y_true is (batch_size, seq_length) + # and y_pred is (batch_size, seq_length, num_classes) + y_pred = tf.constant( + [ + [[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], + [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]], + ] + ) + y_true = tf.constant([[1, 0], [1, 0]]) + self.assertAllEqual( + matches_method(y_true, y_pred), [[1.0, 0.0], [0.0, 1.0]] + ) + + def test_sparse_top_k_categorical_matches(self): + matches_method = metrics_utils.sparse_top_k_categorical_matches + + # Test return tensor is type float + y_true = tf.constant(np.random.randint(0, 7, (6,))) + y_pred = tf.constant(np.random.random((6, 7)), dtype=tf.float32) + self.assertEqual( + matches_method(y_true, y_pred, 1).dtype, backend.floatx() + ) + + # Tests that resulting Tensor always has same shape as y_true. Tests + # from 1 dim to 4 dims + dims = [] + for _ in range(4): + dims.append(np.random.randint(1, 7)) + y_true = tf.constant(np.random.randint(0, 7, dims)) + y_pred = tf.constant(np.random.random(dims + [3]), dtype=tf.float32) + self.assertEqual( + matches_method(y_true, y_pred, 1).shape, y_true.shape + ) + + # Test correctness if the shape of y_true is (num_samples,) for k = + # 1,2,3 + y_true = tf.constant([1.0, 0.0, 0.0, 0.0]) + y_pred = tf.constant( + [[0.7, 0.2, 0.1], [0.5, 0.3, 0.2], [0.6, 0.3, 0.1], [0.0, 0.1, 0.9]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 1), [0.0, 1.0, 1.0, 0.0] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 2), [1.0, 1.0, 1.0, 0.0] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 3), [1.0, 1.0, 1.0, 1.0] + ) + + # Test correctness if the shape of y_true is (num_samples, 1) + # for k = 1,2,3 + y_true = tf.constant([[1.0], [0.0], [0.0], [0.0]]) + y_pred = tf.constant( + [[0.7, 0.2, 0.1], [0.5, 0.3, 0.2], [0.6, 0.3, 0.1], [0.0, 0.1, 0.9]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 1), [[0.0], [1.0], [1.0], [0.0]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 2), [[1.0], [1.0], [1.0], [0.0]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 3), [[1.0], [1.0], [1.0], [1.0]] + ) + + # Test correctness if the shape of y_true is (batch_size, seq_length) + # and y_pred is (batch_size, seq_length, num_classes) for k = 1,2,3 + y_pred = tf.constant( + [ + [[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], + [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]], + ] + ) + y_true = tf.constant([[1, 0], [1, 0]]) + self.assertAllEqual( + matches_method(y_true, y_pred, 1), [[1.0, 0.0], [0.0, 1.0]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 2), [[1.0, 0.0], [1.0, 1.0]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 3), [[1.0, 1.0], [1.0, 1.0]] + ) + + def test_binary_matches(self): + matches_method = metrics_utils.binary_matches + + # Test return tensor is type float + y_true = tf.constant(np.random.random((6, 7))) + y_pred = tf.constant(np.random.random((6, 7))) + self.assertEqual( + matches_method(y_true, y_pred, 0.5).dtype, backend.floatx() + ) + + # Tests that resulting Tensor always has same shape as y_true. Tests + # from 1 dim to 4 dims. + dims = [] + for _ in range(4): + dims.append(np.random.randint(1, 7)) + y_true = y_pred = tf.constant(np.random.random(dims)) + self.assertEqual( + matches_method(y_true, y_pred, 0.0).shape, y_true.shape + ) + + # Testing for correctness shape (num_samples, 1) + y_true = tf.constant([[1.0], [0.0], [1.0], [1.0]]) + y_pred = tf.constant([[0.75], [0.2], [0.2], [0.75]]) + self.assertAllEqual( + matches_method(y_true, y_pred, 0.5), [[1.0], [1.0], [0.0], [1.0]] + ) + + # Testing for correctness shape (num_samples,) + y_true = tf.constant([1.0, 0.0, 1.0, 1.0]) + y_pred = tf.constant([0.75, 0.2, 0.2, 0.75]) + self.assertAllEqual( + matches_method(y_true, y_pred, 0.5), [1.0, 1.0, 0.0, 1.0] + ) + + # Testing for correctness batches of sequences + # shape (num_samples, seq_len) + y_true = tf.constant([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]) + y_pred = tf.constant( + [[0.75, 0.2], [0.2, 0.75], [0.2, 0.75], [0.75, 0.2]] + ) + self.assertAllEqual( + matches_method(y_true, y_pred, 0.5), + [[1.0, 1.0], [1.0, 1.0], [0.0, 0.0], [1.0, 1.0]], + ) + + +@test_utils.run_v2_only +class UpdateConfusionMatrixVarTest(tf.test.TestCase, parameterized.TestCase): + def setUp(self): + self.tp = metrics_utils.ConfusionMatrix.TRUE_POSITIVES + self.tn = metrics_utils.ConfusionMatrix.TRUE_NEGATIVES + self.fp = metrics_utils.ConfusionMatrix.FALSE_POSITIVES + self.fn = metrics_utils.ConfusionMatrix.FALSE_NEGATIVES + self.variables_to_update = { + self.tp: tf.Variable([0], dtype=tf.float32), + self.tn: tf.Variable([0], dtype=tf.float32), + self.fp: tf.Variable([0], dtype=tf.float32), + self.fn: tf.Variable([0], dtype=tf.float32), + } + + def test_without_sample_weight(self): + y_true = tf.constant([[1, 1, 0], [0, 0, 1]]) + y_pred = tf.constant([[0.8, 0.7, 0.1], [0.1, 0.6, 0.4]]) + thresholds = [0.5] + + metrics_utils.update_confusion_matrix_variables( + variables_to_update=self.variables_to_update, + y_true=y_true, + y_pred=y_pred, + thresholds=thresholds, + ) + self.assertEqual(self.variables_to_update[self.tp].numpy()[0], 2) + self.assertEqual(self.variables_to_update[self.tn].numpy()[0], 2) + self.assertEqual(self.variables_to_update[self.fp].numpy()[0], 1) + self.assertEqual(self.variables_to_update[self.fn].numpy()[0], 1) + + def test_with_sample_weight(self): + y_true = tf.constant([[1, 1, 0], [0, 0, 1]]) + y_pred = tf.constant([[0.8, 0.7, 0.1], [0.1, 0.6, 0.4]]) + thresholds = [0.5] + sample_weight = [2, 1] + + metrics_utils.update_confusion_matrix_variables( + variables_to_update=self.variables_to_update, + y_true=y_true, + y_pred=y_pred, + thresholds=thresholds, + sample_weight=sample_weight, + ) + self.assertEqual(self.variables_to_update[self.tp].numpy()[0], 4) + self.assertEqual(self.variables_to_update[self.tn].numpy()[0], 3) + self.assertEqual(self.variables_to_update[self.fp].numpy()[0], 1) + self.assertEqual(self.variables_to_update[self.fn].numpy()[0], 1) + + def test_with_class_id(self): + y_true = tf.constant([[1, 1, 0], [0, 0, 1]]) + y_pred = tf.constant([[0.8, 0.7, 0.1], [0.1, 0.6, 0.4]]) + thresholds = [0.5] + class_id = 2 + + metrics_utils.update_confusion_matrix_variables( + variables_to_update=self.variables_to_update, + y_true=y_true, + y_pred=y_pred, + thresholds=thresholds, + class_id=class_id, + ) + self.assertEqual(self.variables_to_update[self.tp].numpy()[0], 0) + self.assertEqual(self.variables_to_update[self.tn].numpy()[0], 1) + self.assertEqual(self.variables_to_update[self.fp].numpy()[0], 0) + self.assertEqual(self.variables_to_update[self.fn].numpy()[0], 1) + + def test_with_sample_weight_and_classid(self): + y_true = tf.constant([[1, 1, 0], [0, 0, 1]]) + y_pred = tf.constant([[0.8, 0.7, 0.1], [0.1, 0.6, 0.4]]) + thresholds = [0.5] + sample_weight = [2, 1] + class_id = 2 + + metrics_utils.update_confusion_matrix_variables( + variables_to_update=self.variables_to_update, + y_true=y_true, + y_pred=y_pred, + thresholds=thresholds, + sample_weight=sample_weight, + class_id=class_id, + ) + self.assertEqual(self.variables_to_update[self.tp].numpy()[0], 0) + self.assertEqual(self.variables_to_update[self.tn].numpy()[0], 2) + self.assertEqual(self.variables_to_update[self.fp].numpy()[0], 0) + self.assertEqual(self.variables_to_update[self.fn].numpy()[0], 1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/mode_keys.py b/keras/utils/mode_keys.py index 38881970937b..7ba5a17585ec 100644 --- a/keras/utils/mode_keys.py +++ b/keras/utils/mode_keys.py @@ -14,6 +14,7 @@ # ============================================================================== """Keras model mode constants.""" -# pylint: disable=unused-import -from tensorflow.python.saved_model.model_utils.mode_keys import KerasModeKeys as ModeKeys -# pylint: enable=unused-import +# isort: off +from tensorflow.python.saved_model.model_utils.mode_keys import ( # noqa: F401,E501 + KerasModeKeys as ModeKeys, +) diff --git a/keras/utils/np_utils.py b/keras/utils/np_utils.py index d2b7492fd0c0..60cad3fa6197 100644 --- a/keras/utils/np_utils.py +++ b/keras/utils/np_utils.py @@ -15,77 +15,128 @@ """Numpy-related utilities.""" import numpy as np + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.utils.to_categorical') -def to_categorical(y, num_classes=None, dtype='float32'): - """Converts a class vector (integers) to binary class matrix. +@keras_export("keras.utils.to_categorical") +def to_categorical(y, num_classes=None, dtype="float32"): + """Converts a class vector (integers) to binary class matrix. - E.g. for use with `categorical_crossentropy`. + E.g. for use with `categorical_crossentropy`. - Args: - y: Array-like with class values to be converted into a matrix - (integers from 0 to `num_classes - 1`). - num_classes: Total number of classes. If `None`, this would be inferred - as `max(y) + 1`. - dtype: The data type expected by the input. Default: `'float32'`. + Args: + y: Array-like with class values to be converted into a matrix + (integers from 0 to `num_classes - 1`). + num_classes: Total number of classes. If `None`, this would be inferred + as `max(y) + 1`. + dtype: The data type expected by the input. Default: `'float32'`. - Returns: - A binary matrix representation of the input. The class axis is placed - last. + Returns: + A binary matrix representation of the input as a NumPy array. The class + axis is placed last. - Example: + Example: - >>> a = tf.keras.utils.to_categorical([0, 1, 2, 3], num_classes=4) - >>> a = tf.constant(a, shape=[4, 4]) - >>> print(a) - tf.Tensor( + >>> a = tf.keras.utils.to_categorical([0, 1, 2, 3], num_classes=4) + >>> print(a) [[1. 0. 0. 0.] [0. 1. 0. 0.] [0. 0. 1. 0.] - [0. 0. 0. 1.]], shape=(4, 4), dtype=float32) - - >>> b = tf.constant([.9, .04, .03, .03, - ... .3, .45, .15, .13, - ... .04, .01, .94, .05, - ... .12, .21, .5, .17], - ... shape=[4, 4]) - >>> loss = tf.keras.backend.categorical_crossentropy(a, b) - >>> print(np.around(loss, 5)) - [0.10536 0.82807 0.1011 1.77196] - - >>> loss = tf.keras.backend.categorical_crossentropy(a, a) - >>> print(np.around(loss, 5)) - [0. 0. 0. 0.] - """ - y = np.array(y, dtype='int') - input_shape = y.shape - if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: - input_shape = tuple(input_shape[:-1]) - y = y.ravel() - if not num_classes: - num_classes = np.max(y) + 1 - n = y.shape[0] - categorical = np.zeros((n, num_classes), dtype=dtype) - categorical[np.arange(n), y] = 1 - output_shape = input_shape + (num_classes,) - categorical = np.reshape(categorical, output_shape) - return categorical - - -@keras_export('keras.utils.normalize') + [0. 0. 0. 1.]] + + >>> b = tf.constant([.9, .04, .03, .03, + ... .3, .45, .15, .13, + ... .04, .01, .94, .05, + ... .12, .21, .5, .17], + ... shape=[4, 4]) + >>> loss = tf.keras.backend.categorical_crossentropy(a, b) + >>> print(np.around(loss, 5)) + [0.10536 0.82807 0.1011 1.77196] + + >>> loss = tf.keras.backend.categorical_crossentropy(a, a) + >>> print(np.around(loss, 5)) + [0. 0. 0. 0.] + """ + y = np.array(y, dtype="int") + input_shape = y.shape + + # Shrink the last dimension if the shape is (..., 1). + if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: + input_shape = tuple(input_shape[:-1]) + + y = y.reshape(-1) + if not num_classes: + num_classes = np.max(y) + 1 + n = y.shape[0] + categorical = np.zeros((n, num_classes), dtype=dtype) + categorical[np.arange(n), y] = 1 + output_shape = input_shape + (num_classes,) + categorical = np.reshape(categorical, output_shape) + return categorical + + +@keras_export("keras.utils.to_ordinal") +def to_ordinal(y, num_classes=None, dtype="float32"): + """Converts a class vector (integers) to an ordinal regression matrix. + + This utility encodes class vector to ordinal regression/classification + matrix where each sample is indicated by a row and rank of that sample is + indicated by number of ones in that row. + + Args: + y: Array-like with class values to be converted into a matrix + (integers from 0 to `num_classes - 1`). + num_classes: Total number of classes. If `None`, this would be inferred + as `max(y) + 1`. + dtype: The data type expected by the input. Default: `'float32'`. + + Returns: + An ordinal regression matrix representation of the input as a NumPy + array. The class axis is placed last. + + Example: + + >>> a = tf.keras.utils.to_ordinal([0, 1, 2, 3], num_classes=4) + >>> print(a) + [[0. 0. 0.] + [1. 0. 0.] + [1. 1. 0.] + [1. 1. 1.]] + """ + y = np.array(y, dtype="int") + input_shape = y.shape + + # Shrink the last dimension if the shape is (..., 1). + if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: + input_shape = tuple(input_shape[:-1]) + + y = y.reshape(-1) + if not num_classes: + num_classes = np.max(y) + 1 + n = y.shape[0] + range_values = np.arange(num_classes - 1) + range_values = np.tile(np.expand_dims(range_values, 0), [n, 1]) + ordinal = np.zeros((n, num_classes - 1), dtype=dtype) + ordinal[range_values < np.expand_dims(y, -1)] = 1 + output_shape = input_shape + (num_classes - 1,) + ordinal = np.reshape(ordinal, output_shape) + return ordinal + + +@keras_export("keras.utils.normalize") def normalize(x, axis=-1, order=2): - """Normalizes a Numpy array. - - Args: - x: Numpy array to normalize. - axis: axis along which to normalize. - order: Normalization order (e.g. `order=2` for L2 norm). - - Returns: - A normalized copy of the array. - """ - l2 = np.atleast_1d(np.linalg.norm(x, order, axis)) - l2[l2 == 0] = 1 - return x / np.expand_dims(l2, axis) + """Normalizes a Numpy array. + + Args: + x: Numpy array to normalize. + axis: axis along which to normalize. + order: Normalization order (e.g. `order=2` for L2 norm). + + Returns: + A normalized copy of the array. + """ + l2 = np.atleast_1d(np.linalg.norm(x, order, axis)) + l2[l2 == 0] = 1 + return x / np.expand_dims(l2, axis) diff --git a/keras/utils/np_utils_test.py b/keras/utils/np_utils_test.py index ff2a68a54741..d108e10dd61a 100644 --- a/keras/utils/np_utils_test.py +++ b/keras/utils/np_utils_test.py @@ -14,35 +14,71 @@ # ============================================================================== """Tests for np_utils.""" -import tensorflow.compat.v2 as tf - import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized +from keras.testing_infra import test_combinations from keras.utils import np_utils +NUM_CLASSES = 5 + + +class TestNPUtils(test_combinations.TestCase): + @parameterized.parameters( + [ + ((1,), (1, NUM_CLASSES)), + ((3,), (3, NUM_CLASSES)), + ((4, 3), (4, 3, NUM_CLASSES)), + ((5, 4, 3), (5, 4, 3, NUM_CLASSES)), + ((3, 1), (3, NUM_CLASSES)), + ((3, 2, 1), (3, 2, NUM_CLASSES)), + ] + ) + def test_to_categorical(self, shape, expected_shape): + label = np.random.randint(0, NUM_CLASSES, shape) + one_hot = np_utils.to_categorical(label, NUM_CLASSES) + # Check shape + self.assertEqual(one_hot.shape, expected_shape) + # Make sure there is only one 1 in a row + self.assertTrue(np.all(one_hot.sum(axis=-1) == 1)) + # Get original labels back from one hots + self.assertTrue( + np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) + ) + + def test_to_categorial_without_num_classes(self): + label = [0, 2, 5] + one_hot = np_utils.to_categorical(label) + self.assertEqual(one_hot.shape, (3, 5 + 1)) + + @parameterized.parameters( + [ + ((1,), (1, NUM_CLASSES - 1)), + ((3,), (3, NUM_CLASSES - 1)), + ((4, 3), (4, 3, NUM_CLASSES - 1)), + ((5, 4, 3), (5, 4, 3, NUM_CLASSES - 1)), + ((3, 1), (3, NUM_CLASSES - 1)), + ((3, 2, 1), (3, 2, NUM_CLASSES - 1)), + ] + ) + def test_to_ordinal(self, shape, expected_shape): + label = np.random.randint(0, NUM_CLASSES, shape) + ordinal = np_utils.to_ordinal(label, NUM_CLASSES) + # Check shape + self.assertEqual(ordinal.shape, expected_shape) + # Make sure all the values are either 0 or 1 + self.assertTrue(np.all(np.logical_or(ordinal == 0, ordinal == 1))) + # Get original labels back from ordinal matrix + self.assertTrue( + np.all(ordinal.cumprod(-1).sum(-1).reshape(label.shape) == label) + ) + + def test_to_ordinal_without_num_classes(self): + label = [0, 2, 5] + one_hot = np_utils.to_ordinal(label) + self.assertEqual(one_hot.shape, (3, 5)) + -class TestNPUtils(tf.test.TestCase): - - def test_to_categorical(self): - num_classes = 5 - shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)] - expected_shapes = [(1, num_classes), (3, num_classes), (4, 3, num_classes), - (5, 4, 3, num_classes), (3, num_classes), - (3, 2, num_classes)] - labels = [np.random.randint(0, num_classes, shape) for shape in shapes] - one_hots = [ - np_utils.to_categorical(label, num_classes) for label in labels] - for label, one_hot, expected_shape in zip(labels, - one_hots, - expected_shapes): - # Check shape - self.assertEqual(one_hot.shape, expected_shape) - # Make sure there is only one 1 in a row - self.assertTrue(np.all(one_hot.sum(axis=-1) == 1)) - # Get original labels back from one hots - self.assertTrue(np.all( - np.argmax(one_hot, -1).reshape(label.shape) == label)) - - -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/object_identity.py b/keras/utils/object_identity.py index db5a313045b0..856e61820233 100644 --- a/keras/utils/object_identity.py +++ b/keras/utils/object_identity.py @@ -20,227 +20,234 @@ # LINT.IfChange class _ObjectIdentityWrapper: - """Wraps an object, mapping __eq__ on wrapper to "is" on wrapped. + """Wraps an object, mapping __eq__ on wrapper to "is" on wrapped. - Since __eq__ is based on object identity, it's safe to also define __hash__ - based on object ids. This lets us add unhashable types like trackable - _ListWrapper objects to object-identity collections. - """ + Since __eq__ is based on object identity, it's safe to also define __hash__ + based on object ids. This lets us add unhashable types like trackable + _ListWrapper objects to object-identity collections. + """ - __slots__ = ["_wrapped", "__weakref__"] + __slots__ = ["_wrapped", "__weakref__"] - def __init__(self, wrapped): - self._wrapped = wrapped + def __init__(self, wrapped): + self._wrapped = wrapped - @property - def unwrapped(self): - return self._wrapped + @property + def unwrapped(self): + return self._wrapped - def _assert_type(self, other): - if not isinstance(other, _ObjectIdentityWrapper): - raise TypeError( - "Cannot compare wrapped object with unwrapped object. " - f"Expect the object to be `_ObjectIdentityWrapper`. Got: {other}") + def _assert_type(self, other): + if not isinstance(other, _ObjectIdentityWrapper): + raise TypeError( + "Cannot compare wrapped object with unwrapped object. " + "Expect the object to be `_ObjectIdentityWrapper`. " + f"Got: {other}" + ) - def __lt__(self, other): - self._assert_type(other) - return id(self._wrapped) < id(other._wrapped) # pylint: disable=protected-access + def __lt__(self, other): + self._assert_type(other) + return id(self._wrapped) < id(other._wrapped) - def __gt__(self, other): - self._assert_type(other) - return id(self._wrapped) > id(other._wrapped) # pylint: disable=protected-access + def __gt__(self, other): + self._assert_type(other) + return id(self._wrapped) > id(other._wrapped) - def __eq__(self, other): - if other is None: - return False - self._assert_type(other) - return self._wrapped is other._wrapped # pylint: disable=protected-access + def __eq__(self, other): + if other is None: + return False + self._assert_type(other) + return self._wrapped is other._wrapped - def __ne__(self, other): - return not self.__eq__(other) + def __ne__(self, other): + return not self.__eq__(other) - def __hash__(self): - # Wrapper id() is also fine for weakrefs. In fact, we rely on - # id(weakref.ref(a)) == id(weakref.ref(a)) and weakref.ref(a) is - # weakref.ref(a) in _WeakObjectIdentityWrapper. - return id(self._wrapped) + def __hash__(self): + # Wrapper id() is also fine for weakrefs. In fact, we rely on + # id(weakref.ref(a)) == id(weakref.ref(a)) and weakref.ref(a) is + # weakref.ref(a) in _WeakObjectIdentityWrapper. + return id(self._wrapped) - def __repr__(self): - return "<{} wrapping {!r}>".format(type(self).__name__, self._wrapped) + def __repr__(self): + return f"<{type(self).__name__} wrapping {self._wrapped!r}>" class _WeakObjectIdentityWrapper(_ObjectIdentityWrapper): - __slots__ = () + __slots__ = () - def __init__(self, wrapped): - super().__init__(weakref.ref(wrapped)) + def __init__(self, wrapped): + super().__init__(weakref.ref(wrapped)) - @property - def unwrapped(self): - return self._wrapped() + @property + def unwrapped(self): + return self._wrapped() class Reference(_ObjectIdentityWrapper): - """Reference that refers an object. + """Reference that refers an object. - ```python - x = [1] - y = [1] + ```python + x = [1] + y = [1] - x_ref1 = Reference(x) - x_ref2 = Reference(x) - y_ref2 = Reference(y) + x_ref1 = Reference(x) + x_ref2 = Reference(x) + y_ref2 = Reference(y) - print(x_ref1 == x_ref2) - ==> True + print(x_ref1 == x_ref2) + ==> True - print(x_ref1 == y) - ==> False - ``` - """ + print(x_ref1 == y) + ==> False + ``` + """ - __slots__ = () + __slots__ = () - # Disabling super class' unwrapped field. - unwrapped = property() + # Disabling super class' unwrapped field. + unwrapped = property() - def deref(self): - """Returns the referenced object. + def deref(self): + """Returns the referenced object. - ```python - x_ref = Reference(x) - print(x is x_ref.deref()) - ==> True - ``` - """ - return self._wrapped + ```python + x_ref = Reference(x) + print(x is x_ref.deref()) + ==> True + ``` + """ + return self._wrapped class ObjectIdentityDictionary(collections.abc.MutableMapping): - """A mutable mapping data structure which compares using "is". + """A mutable mapping data structure which compares using "is". - This is necessary because we have trackable objects (_ListWrapper) which - have behavior identical to built-in Python lists (including being unhashable - and comparing based on the equality of their contents by default). - """ + This is necessary because we have trackable objects (_ListWrapper) which + have behavior identical to built-in Python lists (including being unhashable + and comparing based on the equality of their contents by default). + """ - __slots__ = ["_storage"] + __slots__ = ["_storage"] - def __init__(self): - self._storage = {} + def __init__(self): + self._storage = {} - def _wrap_key(self, key): - return _ObjectIdentityWrapper(key) + def _wrap_key(self, key): + return _ObjectIdentityWrapper(key) - def __getitem__(self, key): - return self._storage[self._wrap_key(key)] + def __getitem__(self, key): + return self._storage[self._wrap_key(key)] - def __setitem__(self, key, value): - self._storage[self._wrap_key(key)] = value + def __setitem__(self, key, value): + self._storage[self._wrap_key(key)] = value - def __delitem__(self, key): - del self._storage[self._wrap_key(key)] + def __delitem__(self, key): + del self._storage[self._wrap_key(key)] - def __len__(self): - return len(self._storage) + def __len__(self): + return len(self._storage) - def __iter__(self): - for key in self._storage: - yield key.unwrapped + def __iter__(self): + for key in self._storage: + yield key.unwrapped - def __repr__(self): - return "ObjectIdentityDictionary(%s)" % repr(self._storage) + def __repr__(self): + return f"ObjectIdentityDictionary({repr(self._storage)})" class ObjectIdentityWeakKeyDictionary(ObjectIdentityDictionary): - """Like weakref.WeakKeyDictionary, but compares objects with "is".""" + """Like weakref.WeakKeyDictionary, but compares objects with "is".""" - __slots__ = ["__weakref__"] + __slots__ = ["__weakref__"] - def _wrap_key(self, key): - return _WeakObjectIdentityWrapper(key) + def _wrap_key(self, key): + return _WeakObjectIdentityWrapper(key) - def __len__(self): - # Iterate, discarding old weak refs - return len(list(self._storage)) + def __len__(self): + # Iterate, discarding old weak refs + return len(list(self._storage)) - def __iter__(self): - keys = self._storage.keys() - for key in keys: - unwrapped = key.unwrapped - if unwrapped is None: - del self[key] - else: - yield unwrapped + def __iter__(self): + keys = self._storage.keys() + for key in keys: + unwrapped = key.unwrapped + if unwrapped is None: + del self[key] + else: + yield unwrapped class ObjectIdentitySet(collections.abc.MutableSet): - """Like the built-in set, but compares objects with "is".""" + """Like the built-in set, but compares objects with "is".""" - __slots__ = ["_storage", "__weakref__"] + __slots__ = ["_storage", "__weakref__"] - def __init__(self, *args): - self._storage = set(self._wrap_key(obj) for obj in list(*args)) + def __init__(self, *args): + self._storage = set(self._wrap_key(obj) for obj in list(*args)) - @staticmethod - def _from_storage(storage): - result = ObjectIdentitySet() - result._storage = storage # pylint: disable=protected-access - return result + @staticmethod + def _from_storage(storage): + result = ObjectIdentitySet() + result._storage = storage + return result - def _wrap_key(self, key): - return _ObjectIdentityWrapper(key) + def _wrap_key(self, key): + return _ObjectIdentityWrapper(key) - def __contains__(self, key): - return self._wrap_key(key) in self._storage + def __contains__(self, key): + return self._wrap_key(key) in self._storage - def discard(self, key): - self._storage.discard(self._wrap_key(key)) + def discard(self, key): + self._storage.discard(self._wrap_key(key)) - def add(self, key): - self._storage.add(self._wrap_key(key)) + def add(self, key): + self._storage.add(self._wrap_key(key)) - def update(self, items): - self._storage.update([self._wrap_key(item) for item in items]) + def update(self, items): + self._storage.update([self._wrap_key(item) for item in items]) - def clear(self): - self._storage.clear() + def clear(self): + self._storage.clear() - def intersection(self, items): - return self._storage.intersection([self._wrap_key(item) for item in items]) + def intersection(self, items): + return self._storage.intersection( + [self._wrap_key(item) for item in items] + ) - def difference(self, items): - return ObjectIdentitySet._from_storage( - self._storage.difference([self._wrap_key(item) for item in items])) + def difference(self, items): + return ObjectIdentitySet._from_storage( + self._storage.difference([self._wrap_key(item) for item in items]) + ) - def __len__(self): - return len(self._storage) + def __len__(self): + return len(self._storage) - def __iter__(self): - keys = list(self._storage) - for key in keys: - yield key.unwrapped + def __iter__(self): + keys = list(self._storage) + for key in keys: + yield key.unwrapped class ObjectIdentityWeakSet(ObjectIdentitySet): - """Like weakref.WeakSet, but compares objects with "is".""" + """Like weakref.WeakSet, but compares objects with "is".""" + + __slots__ = () + + def _wrap_key(self, key): + return _WeakObjectIdentityWrapper(key) - __slots__ = () + def __len__(self): + # Iterate, discarding old weak refs + return len([_ for _ in self]) - def _wrap_key(self, key): - return _WeakObjectIdentityWrapper(key) + def __iter__(self): + keys = list(self._storage) + for key in keys: + unwrapped = key.unwrapped + if unwrapped is None: + self.discard(key) + else: + yield unwrapped - def __len__(self): - # Iterate, discarding old weak refs - return len([_ for _ in self]) - def __iter__(self): - keys = list(self._storage) - for key in keys: - unwrapped = key.unwrapped - if unwrapped is None: - self.discard(key) - else: - yield unwrapped # LINT.ThenChange(//tensorflow/python/util/object_identity.py) diff --git a/keras/utils/sidecar_evaluator.py b/keras/utils/sidecar_evaluator.py new file mode 100644 index 000000000000..82b3c1df04d5 --- /dev/null +++ b/keras/utils/sidecar_evaluator.py @@ -0,0 +1,432 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python module for evaluation loop.""" + +import re + +import tensorflow as tf + +# isort: off +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import deprecation +from keras.callbacks import ModelCheckpoint +from keras.optimizers import optimizer +from tensorflow.python.util.tf_export import keras_export + +_PRINT_EVAL_STEP_EVERY_SEC = 60.0 +_ITERATIONS_UNINITIALIZED = -1 +_CHECKPOINT_TIMEOUT_SEC = 30 + + +def list_checkpoint_attributes(ckpt_dir_or_file): + """Lists all the attributes in a checkpoint. + + Checkpoint keys are paths in a checkpoint graph, and attribute is the first + element in the path. e.g. with a checkpoint key + "optimizer/iter/.ATTRIBUTES/VARIABLE_VALUE", optimizer is the attribute. The + attribute is also used to save/restore a variable in a checkpoint, + e.g. tf.train.Checkpoint(optimizer=optimizer, model=model). + + Args: + ckpt_dir_or_file: Directory with checkpoints file or path to checkpoint. + + Returns: + Set of attributes in a checkpoint. + """ + reader = tf.train.load_checkpoint(ckpt_dir_or_file) + variable_map = reader.get_variable_to_shape_map() + return {name.split("/")[0] for name in variable_map.keys()} + + +@keras_export("keras.utils.SidecarEvaluator", v1=[]) +class SidecarEvaluator: + """A class designed for a dedicated evaluator task. + + `SidecarEvaluator` is expected to be run in a process on a separate machine + from the training cluster. It is meant for the purpose of a dedicated + evaluator, evaluating the metric results of a training cluster which has one + or more workers performing the training, and saving checkpoints. + + The `SidecarEvaluator` API is compatible with both Custom Training Loop + (CTL), and Keras `Model.fit` to be used in the training cluster. Using the + model (with compiled metrics) provided at `__init__`, `SidecarEvaluator` + repeatedly performs evaluation "epochs" when it finds a checkpoint that has + not yet been used. Depending on the `steps` argument, an eval epoch is + evaluation over all eval data, or up to certain number of steps (batches). + See examples below for how the training program should save the checkpoints + in order to be recognized by `SidecarEvaluator`. + + Since under the hood, `SidecarEvaluator` uses `model.evaluate` for + evaluation, it also supports arbitrary Keras callbacks. That is, if one or + more callbacks are provided, their `on_test_batch_begin` and + `on_test_batch_end` methods are called at the start and end of a batch, and + their `on_test_begin` and `on_test_end` are called at the start and end of + an evaluation epoch. Note that `SidecarEvaluator` may skip some checkpoints + because it always picks up the latest checkpoint available, and during an + evaluation epoch, multiple checkpoints can be produced from the training + side. + + Example: + ```python + model = tf.keras.models.Sequential(...) + model.compile(metrics=tf.keras.metrics.SparseCategoricalAccuracy( + name="eval_metrics")) + data = tf.data.Dataset.from_tensor_slices(...) + + tf.keras.SidecarEvaluator( + model=model, + data=data, + # dir for training-saved checkpoint + checkpoint_dir='/tmp/checkpoint_dir', + steps=None, # Eval until dataset is exhausted + max_evaluations=None, # The evaluation needs to be stopped manually + callbacks=[tf.keras.callbacks.TensorBoard(log_dir='/tmp/log_dir')] + ).start() + ``` + + `SidecarEvaluator.start` writes a series of summary files which can be + visualized by tensorboard (which provides a webpage link): + + ```bash + $ tensorboard --logdir=/tmp/log_dir/validation + ... + TensorBoard 2.4.0a0 at http://host:port (Press CTRL+C to quit) + ``` + + If the training cluster uses a CTL, the `checkpoint_dir` should contain + checkpoints that track both `model` and `optimizer`, to fulfill + `SidecarEvaluator`'s expectation. This can be done by a + `tf.train.Checkpoint` and a `tf.train.CheckpointManager`: + + ```python + # Same `checkpoint_dir` supplied to `SidecarEvaluator`. + checkpoint_dir = ... + checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, checkpoint_dir=..., max_to_keep=...) + checkpoint_manager.save() + ``` + + If the training cluster uses Keras `Model.fit` API, a + `tf.keras.callbacks.ModelCheckpoint` should be used, with + `save_weights_only=True`, and the `filepath` should have 'ckpt-{epoch}' + appended: + + ```python + # Same `checkpoint_dir` supplied to `SidecarEvaluator`. + checkpoint_dir = ... + model_checkpoint = tf.keras.callbacks.ModelCheckpoint( + filepath=os.path.join(checkpoint_dir, 'ckpt-{epoch}'), + save_weights_only=True) + model.fit(dataset, epochs, callbacks=[model_checkpoint]) + ``` + """ + + def __init__( + self, + model, + data, + checkpoint_dir, + steps=None, + max_evaluations=None, + callbacks=None, + ): + """Initializes an `SidecarEvaluator` object. + + Args: + model: Model to use for evaluation. The model object used here should + be a `tf.keras.Model`, and should be the same as the one that is + used in training, where `tf.keras.Model`s are checkpointed. The + model should have one or more metrics compiled before using + `SidecarEvaluator`. + data: The input data for evaluation. `SidecarEvaluator` supports all + data types that Keras `model.evaluate` supports as the input data + `x`, such as a `tf.data.Dataset`. + checkpoint_dir: Directory where checkpoint files are saved. + steps: Number of steps to perform evaluation for, when evaluating a + single checkpoint file. If `None`, evaluation continues until the + dataset is exhausted. For repeated evaluation dataset, user must + specify `steps` to avoid infinite evaluation loop. + max_evaluations: Maximum number of the checkpoint file to be + evaluated, for `SidecarEvaluator` to know when to stop. The + evaluator will stop after it evaluates a checkpoint filepath ending + with '-'. If using + `tf.train.CheckpointManager.save` for saving checkpoints, the kth + saved checkpoint has the filepath suffix '-' (k=1 for + the first saved), and if checkpoints are saved every epoch after + training, the filepath saved at the kth epoch would end with + '-. Thus, if training runs for n epochs, and the + evaluator should end after the training finishes, use n for this + parameter. Note that this is not necessarily equal to the number of + total evaluations, since some checkpoints may be skipped if + evaluation is slower than checkpoint creation. If `None`, + `SidecarEvaluator` will evaluate indefinitely, and the user must + terminate evaluator program themselves. + callbacks: List of `keras.callbacks.Callback` instances to apply + during evaluation. See + [callbacks](/api_docs/python/tf/keras/callbacks). + """ + self.model = model + self.data = data + self.checkpoint_dir = checkpoint_dir + self._iterations = tf.Variable( + name="iterations", + initial_value=_ITERATIONS_UNINITIALIZED, + dtype=tf.int64, + ) + self.max_evaluations = max_evaluations + self.steps = steps + self.callbacks = callbacks or [] + + def _timeout_fn(self): + logging.info( + "No checkpoints appear to be found after " + f"{_CHECKPOINT_TIMEOUT_SEC} seconds. " + "Please check if you are properly using a " + "`tf.train.Checkpoint/CheckpointManager` or " + "`tf.keras.callbacks.ModelCheckpoint(save_weights_only=True)` to " + "save checkpoints by the training. See " + "`tf.keras.SidecarEvaluator` doc for recommended flows " + "of saving checkpoints." + ) + return False + + def start(self): + """Starts the evaluation loop.""" + if self.model.optimizer and isinstance( + self.model.optimizer, optimizer.Optimizer + ): + checkpoint = tf.train.Checkpoint( + model=self.model, optimizer=self.model.optimizer + ) + else: + optimizer_checkpoint = tf.train.Checkpoint(iter=self._iterations) + checkpoint = tf.train.Checkpoint( + model=self.model, optimizer=optimizer_checkpoint + ) + for latest_checkpoint in tf.train.checkpoints_iterator( + self.checkpoint_dir, + timeout=_CHECKPOINT_TIMEOUT_SEC, + timeout_fn=self._timeout_fn, + ): + try: + # `expect_partial` because the checkpoint can have other + # `Trackable`s such as `optimizer`. + checkpoint.restore(latest_checkpoint).expect_partial() + checkpoint_attributes = list_checkpoint_attributes( + latest_checkpoint + ) + # The checkpoint should contain model and optimizer for + # SidecarEvaluator to work. But the model weights saved by + # ModelCheckpoint callback does not contain model as an + # attribute. To make SidecarEvaluator compatibly work in this + # case, use model.load_weights to load the model's weights, + # while self._iterations is still restored by checkpoint + # variable. + if "model" not in checkpoint_attributes: + self.model.load_weights(latest_checkpoint) + # The model checkpoint might not include optimizer in cases, + # e.g. using a custom training loop. Directly assign the + # iterations property to be used in callbacks. + if self.model.optimizer and not isinstance( + self.model.optimizer, + optimizer.Optimizer, + ): + # experimental optimizer automatically restores the + # iteration value. + self.model.optimizer.iterations.assign(self._iterations) + except (tf.errors.OpError,) as e: + if isinstance(e, tf.errors.UnavailableError): + # With distribute training, worker preemption can result in + # `UnavailableError`. Raise this to be handled outside the + # evaluation loop. + raise e + + # A couple errors can happen here with the coordinator racing to + # write checkpoint: + # 1) OpError: open failed for : No such file or + # directory + # 2) NotFoundError (subclass of OpError): Unsuccessful + # TensorSliceReader constructor. + # TODO(rchao): Remove this except block once b/150954027 is + # resolved. + logging.info( + "SidecarEvaluator encountered an error when loading the " + f"checkpoint at {latest_checkpoint}. Retrying. " + f"Error: {e.__class__.__name__}: {e}" + ) + continue + if ( + self._iterations.numpy() == _ITERATIONS_UNINITIALIZED + and not isinstance( + self.model.optimizer, + optimizer.Optimizer, + ) + ): + raise RuntimeError( + "Variable `iterations` cannot be loaded from the " + f"checkpoint file at {self.checkpoint_dir}. " + "Please ensure `iterations` is " + "included in the checkpoint saved during training." + ) + + logging.info( + "Evaluation starts: Model weights loaded from latest " + f"checkpoint file {latest_checkpoint}" + ) + self.model.evaluate( + self.data, steps=self.steps, callbacks=self.callbacks, verbose=2 + ) + + return_metrics = {} + for metric in self.model.metrics: + result = metric.result() + if isinstance(result, dict): + return_metrics.update(result) + else: + return_metrics[metric.name] = result + + logging.info( + "End of evaluation. Metrics: %s", + " ".join( + [ + f"{name}={value.numpy()}" + for name, value in return_metrics.items() + ] + ), + ) + + if self.max_evaluations and ( + self.max_evaluations <= int(latest_checkpoint.split("-")[-1]) + ): + # Exit the loop because we have evaluated the final checkpoint + # file. + logging.info( + "Last checkpoint evaluated. SidecarEvaluator stops." + ) + return + + +@keras_export("keras.experimental.SidecarEvaluator", v1=[]) +@deprecation.deprecated_endpoints("keras.experimental.SidecarEvaluator") +class SidecarEvaluatorExperimental(SidecarEvaluator): + """Deprecated. Please use `tf.keras.utils.SidecarEvaluator` instead. + + Caution: `tf.keras.experimental.SidecarEvaluator` endpoint is + deprecated and will be removed in a future release. Please use + `tf.keras.utils.SidecarEvaluator`. + """ + + def __init__(self, *args, **kwargs): + logging.warning( + "`tf.keras.experimental.SidecarEvaluator` endpoint is " + "deprecated and will be removed in a future release. Please use " + "`tf.keras.utils.SidecarEvaluator`." + ) + super().__init__(*args, **kwargs) + + +@keras_export("keras.callbacks.SidecarEvaluatorModelExport") +class SidecarEvaluatorModelExport(ModelCheckpoint): + """Callback to save the best Keras model. + + It expands the functionality of the existing ModelCheckpoint callback to + enable exporting the best models after evaluation with validation dataset. + + When using the `SidecarEvaluatorModelExport` callback in conjunction with + `keras.utils.SidecarEvaluator`, users should provide the `filepath`, which + is the path for this callback to export model or save weights to, and + `ckpt_filepath`, which is where the checkpoint is available to extract + the epoch number from. The callback will then export the model that the + evaluator deems as the best (among the checkpoints saved by the training + counterpart) to the specified `filepath`. This callback is intended to be + used by SidecarEvaluator only. + + Example: + + ```python + model.compile(loss=..., optimizer=..., + metrics=['accuracy']) + sidecar_evaluator = keras.utils.SidecarEvaluator( + model=model, + data=dataset, + checkpoint_dir=checkpoint_dir, + max_evaluations=1, + callbacks=[ + SidecarEvaluatorModelExport( + export_filepath=os.path.join(checkpoint_dir, + 'best_model_eval', + 'best-model-{epoch:04d}'), + checkpoint_filepath=os.path.join(checkpoint_dir, + 'ckpt-{epoch:04d}'), + save_freq="eval", + save_weights_only=True, + monitor="loss", + mode="min", + verbose=1, + ), + ], + ) + sidecar_evaluator.start() + # Model weights are saved if evaluator deems it's the best seen so far. + + Args: + export_filepath: Path where best models should be saved by this + `SidecarEvaluatorModelExport` callback. Epoch formatting options, such + as `os.path.join(best_model_dir, 'best-model-{epoch:04d}')`, can be + used to allow saved model to preserve epoch information in the file + name. SidecarEvaluatorModelExport will use the "training epoch" at + which the checkpoint was saved by training to fill the epoch + placeholder in the path. + checkpoint_filepath: Path where checkpoints were saved by training. This + should be the same as what is provided to `filepath` argument of + `ModelCheckpoint` on the training side, such as + `os.path.join(checkpoint_dir, 'ckpt-{epoch:04d}')`. + """ + + def __init__(self, export_filepath, checkpoint_filepath, **kwargs): + super().__init__( + filepath=export_filepath, + save_best_only=True, + **kwargs, + ) + + self._checkpoint_filepath = checkpoint_filepath + + def on_test_begin(self, logs=None): + """Updates export_index to the latest checkpoint.""" + + most_recent_filepath = ( + self._get_most_recently_modified_file_matching_pattern( + self._checkpoint_filepath + ) + ) + if most_recent_filepath is not None: + self.export_index = ( + int( + re.match(r".*ckpt-(?P\d+)", most_recent_filepath)[ + "ckpt" + ] + ) + - 1 + ) + else: + self.export_index = 0 + + def on_test_end(self, logs): + """Saves best model at the end of an evaluation epoch.""" + + self.epochs_since_last_save += 1 + self._save_model(epoch=self.export_index, batch=None, logs=logs) diff --git a/keras/utils/sidecar_evaluator_test.py b/keras/utils/sidecar_evaluator_test.py new file mode 100644 index 000000000000..f336393470e3 --- /dev/null +++ b/keras/utils/sidecar_evaluator_test.py @@ -0,0 +1,460 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test covering sidecar_evaluator.py.""" + +import enum +import os +import shutil +import threading +import time + +import numpy as np +import tensorflow.compat.v2 as tf +from absl.testing import parameterized + +import keras +from keras.optimizers import sgd +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import np_utils +from keras.utils import sidecar_evaluator as sidecar_evaluator_lib +from keras.utils.sidecar_evaluator import SidecarEvaluatorModelExport + +# isort: off +from tensorflow.python.platform import tf_logging as logging + +_BATCH_SIZE = 32 +TRAIN_SAMPLES = 20 +TEST_SAMPLES = 20 +INPUT_DIM = 3 +NUM_CLASSES = 2 +NUM_HIDDEN = 5 +BATCH_SIZE = 5 + + +class TestModel(keras.Model): + def __init__(self): + super().__init__(name="test_model") + self.dense = keras.layers.Dense(10) + + def call(self, inputs): + return self.dense(inputs) + + +class DictMetric(keras.metrics.MeanSquaredError): + def result(self): + res = super().result() + return {"mean_squared_error_1": res, "mean_squared_error_2": res} + + +class ModelType(enum.Enum): + SEQUENTIAL = "sequential" + SUBCLASS = "subclass" + + +def _test_model_builder(model_type: ModelType, compile_model, build_model): + if model_type == ModelType.SEQUENTIAL: + model = keras.Sequential([keras.layers.Dense(10)]) + elif model_type == ModelType.SUBCLASS: + model = TestModel() + + if compile_model: + model.compile( + sgd.SGD(), + loss="mse", + metrics=[keras.metrics.CategoricalAccuracy(), DictMetric()], + ) + if build_model: + model.build((None, 32)) + + return model + + +@test_utils.run_v2_only +class SidecarEvaluatorTest(tf.test.TestCase, parameterized.TestCase): + def assertSummaryEventsWritten(self, log_dir): + # Asserts summary files do get written when log_dir is provided. + summary_files = tf.io.gfile.listdir(log_dir) + self.assertNotEmpty( + summary_files, + "Summary should have been written and log_dir should not be empty.", + ) + + # Asserts the content of the summary file. + event_pb_written = False + event_tags = [] + for summary_file in summary_files: + for event_pb in tf.compat.v1.train.summary_iterator( + os.path.join(log_dir, summary_file) + ): + if event_pb.step > 0: + self.assertEqual(event_pb.step, 32) + event_tags.append(event_pb.summary.value[0].tag) + event_pb_written = True + self.assertCountEqual( + event_tags, + [ + "evaluation_categorical_accuracy_vs_iterations", + "evaluation_loss_vs_iterations", + "evaluation_mean_squared_error_1_vs_iterations", + "evaluation_mean_squared_error_2_vs_iterations", + ], + ) + + # Verifying at least one non-zeroth step is written to summary. + self.assertTrue(event_pb_written) + + def assertModelsSameVariables(self, model_a, model_b): + # Check both have the same number of variables. + self.assertEqual(len(model_a.variables), len(model_b.variables)) + + # Check variable values to be equal. + for var_a, var_b in zip(model_a.variables, model_b.variables): + self.assertAllEqual(var_a.numpy(), var_b.numpy()) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], + ) + ) + def testIterationsNotSavedWillRaiseError(self, model_type): + model = _test_model_builder( + model_type=model_type, compile_model=False, build_model=True + ) + + checkpoint_dir = self.get_temp_dir() + checkpoint = tf.train.Checkpoint(model=model) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, checkpoint_dir, max_to_keep=2 + ) + checkpoint_manager.save() + + sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( + model, data=None, checkpoint_dir=checkpoint_dir + ) + with self.assertRaisesRegex( + RuntimeError, + "`iterations` cannot be loaded from the checkpoint file.", + ): + sidecar_evaluator.start() + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], + ) + ) + def testModelNotBuiltRaiseError(self, model_type): + model = _test_model_builder( + model_type=model_type, compile_model=False, build_model=False + ) + + checkpoint_dir = self.get_temp_dir() + checkpoint = tf.train.Checkpoint(model=model) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, checkpoint_dir, max_to_keep=2 + ) + checkpoint_manager.save() + + sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( + model, data=None, checkpoint_dir=checkpoint_dir + ) + with self.assertRaisesRegex(AssertionError, "Nothing to load."): + sidecar_evaluator.start() + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], + build_model=[True, False], + ) + ) + def testSidecarEvaluatorOutputsSummary(self, model_type, build_model): + # Create a model with synthetic data, and fit for one epoch. + model = _test_model_builder( + model_type=model_type, compile_model=True, build_model=False + ) + data = np.random.random((1000, 32)) + labels = np.random.random((1000, 10)) + dataset = tf.data.Dataset.from_tensor_slices((data, labels)) + dataset = dataset.batch(32) + model.fit(dataset, epochs=1) + + # Save a checkpoint. + checkpoint_dir = os.path.join(self.get_temp_dir(), "ckpt") + log_dir = os.path.join(self.get_temp_dir(), "summary") + logging.info( + "checkpoint_dir = %s, log_dir = %s", checkpoint_dir, log_dir + ) + checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, checkpoint_dir, max_to_keep=2 + ) + logging.info( + "Checkpoint manager saved to: %s", checkpoint_manager.save() + ) + self.assertNotEmpty( + tf.io.gfile.listdir(checkpoint_dir), + "Checkpoint should have been written and " + "checkpoint_dir should not be empty.", + ) + + # Create a new model used for evaluation. + eval_model = _test_model_builder( + model_type=model_type, compile_model=True, build_model=build_model + ) + # Have a sidecar_evaluator evaluate once. + sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( + eval_model, + data=dataset, + checkpoint_dir=checkpoint_dir, + max_evaluations=1, + callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)], + ) + sidecar_evaluator.start() + # Eval model has been restored to the same state as the original model, + # so their weights should match. If not, restoration of the model didn't + # work. + self.assertModelsSameVariables(model, eval_model) + + self.assertSummaryEventsWritten(os.path.join(log_dir, "validation")) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], + build_model=[True, False], + ) + ) + def testSidecarEvaluatorOutputsSummarySavedWithCallback( + self, model_type, build_model + ): + checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoints") + log_dir = os.path.join(self.get_temp_dir(), "summary") + # Create a model with synthetic data, and fit for one epoch. + model = _test_model_builder( + model_type=model_type, compile_model=True, build_model=False + ) + data = np.random.random((1000, 32)) + labels = np.random.random((1000, 10)) + dataset = tf.data.Dataset.from_tensor_slices((data, labels)) + dataset = dataset.batch(_BATCH_SIZE) + save_callback = keras.callbacks.ModelCheckpoint( + filepath=os.path.join(checkpoint_dir, "ckpt-{epoch}"), + save_weights_only=True, + ) + model.fit(dataset, epochs=1, callbacks=[save_callback]) + self.assertNotEmpty( + tf.io.gfile.listdir(checkpoint_dir), + "Checkpoint should have been written and " + "checkpoint_dir should not be empty.", + ) + + # Create a new model used for evaluation. + eval_model = _test_model_builder( + model_type=model_type, compile_model=True, build_model=build_model + ) + # Have an sidecar_evaluator evaluate once. + sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( + eval_model, + data=dataset, + checkpoint_dir=checkpoint_dir, + max_evaluations=1, + callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)], + ) + with self.assertLogs() as cm: + sidecar_evaluator.start() + + metrics_logging = [ + line for line in cm.output if "End of evaluation" in line + ] + self.assertLen(metrics_logging, 1) + expected_logged_metrics = [ + "loss", + "categorical_accuracy", + "mean_squared_error_1", + "mean_squared_error_2", + ] + for metric_name in expected_logged_metrics: + self.assertRegex(metrics_logging[0], f"{metric_name}=") + + # Eval model has been restored to the same state as the original model, + # so their weights should match. If not, restoration of the model didn't + # work. + self.assertModelsSameVariables(model, eval_model) + + # check the iterations is restored. + self.assertEqual( + sidecar_evaluator.model.optimizer.iterations.numpy(), _BATCH_SIZE + ) + + self.assertSummaryEventsWritten(os.path.join(log_dir, "validation")) + + @tf.__internal__.distribute.combinations.generate( + tf.__internal__.test.combinations.combine( + mode=["eager"], + model_type=[ModelType.SEQUENTIAL, ModelType.SUBCLASS], + build_model=[True, False], + ) + ) + def testTimeoutFunction(self, model_type, build_model): + checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoints") + # Create a model with synthetic data, and fit for one epoch. + data = np.random.random((1000, 32)) + labels = np.random.random((1000, 10)) + dataset = tf.data.Dataset.from_tensor_slices((data, labels)) + dataset = dataset.batch(_BATCH_SIZE) + + # Create a new model used for evaluation. + eval_model = _test_model_builder( + model_type=model_type, compile_model=True, build_model=build_model + ) + # Have an sidecar_evaluator evaluate once. + sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( + eval_model, + data=dataset, + checkpoint_dir=checkpoint_dir, + max_evaluations=1, + ) + with self.assertLogs() as cm: + threading.Thread( + target=sidecar_evaluator.start, daemon=True + ).start() + time.sleep(50) + + metrics_logging = [ + l for l in cm.output if "No checkpoints appear to be found" in l + ] + self.assertGreaterEqual(len(metrics_logging), 1) + + def testExperimentalDeprecatedMessage(self): + + warning_messages = [] + + def warning(msg): + warning_messages.append(msg) + + with tf.compat.v1.test.mock.patch.object(logging, "warning", warning): + sidecar_evaluator_lib.SidecarEvaluatorExperimental(None, None, None) + + warning_msg = ( + "`tf.keras.experimental.SidecarEvaluator` endpoint is deprecated" + ) + self.assertIn(warning_msg, "\n".join(warning_messages)) + + @test_combinations.run_with_all_model_types + def test_best_model_exporter_with_sidecarevaluator(self): + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + # Create a model with synthetic data, and fit for 20 epochs. + layers = [ + keras.layers.Dense( + NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu" + ), + keras.layers.Dense(NUM_CLASSES, activation="softmax"), + ] + model = test_utils.get_model_from_layers(layers, input_shape=(3,)) + model.compile( + loss="categorical_crossentropy", + optimizer="rmsprop", + metrics=["acc"], + ) + + (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES, + ) + y_test = np_utils.to_categorical(y_test) + y_train = np_utils.to_categorical(y_train) + + callbacks = [ + keras.callbacks.ModelCheckpoint( + filepath=os.path.join( + os.path.join(temp_dir, "ckpt"), "ckpt-{epoch:04d}" + ), + monitor="loss", + save_best_only=True, + save_weights_only=True, + save_freq="epoch", + mode="min", + ) + ] + + model.fit( + x_train, + y_train, + batch_size=BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=callbacks, + epochs=20, + verbose=0, + ) + self.assertNotEmpty( + tf.io.gfile.listdir(os.path.join(temp_dir, "ckpt")), + "Checkpoints should have been written and " + "checkpoint_dir should not be empty.", + ) + + # Have a sidecar_evaluator evaluate once. + dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)) + dataset = dataset.batch(BATCH_SIZE) + sidecar_evaluator = keras.utils.SidecarEvaluator( + model=model, + data=dataset, + checkpoint_dir=os.path.join(temp_dir, "ckpt"), + max_evaluations=1, + callbacks=[ + SidecarEvaluatorModelExport( + export_filepath=os.path.join( + os.path.join(temp_dir, "ckpt"), + "best_model_eval", + "best-model-{epoch:04d}", + ), + checkpoint_filepath=os.path.join( + os.path.join(temp_dir, "ckpt"), "ckpt-{epoch:04d}" + ), + save_weights_only=False, + monitor="loss", + mode="min", + verbose=1, + ), + ], + ) + sidecar_evaluator.start() + + # Asserts output directory exists. + assert os.path.exists( + os.path.join(os.path.join(temp_dir, "ckpt"), "best_model_eval") + ) + + # Asserts best model files do get written. + self.assertRegex( + str( + tf.io.gfile.listdir( + os.path.join( + os.path.join(temp_dir, "ckpt"), "best_model_eval" + ) + ) + ), + r"(.*best-model.*)+", + ) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/steps_per_execution_tuning.py b/keras/utils/steps_per_execution_tuning.py new file mode 100644 index 000000000000..ade47a736da5 --- /dev/null +++ b/keras/utils/steps_per_execution_tuning.py @@ -0,0 +1,264 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Steps per execution autotuning for Keras engine.""" + +import logging +import threading +import time + +import numpy as np +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.utils.StepsPerExecutionTuner") +class StepsPerExecutionTuner: + """Steps per execution tuner class. + + Args: + optimizer: The optimizer used for training/evaluation/prediction. Used + to measure iterations and global throughput + (`optimizer.iterations`/second). + spe_variable: A `tf.Variable` representing the `steps_per_execution` + variable used during training/evaluation/prediction. Must be + updatable with `spe_variable.assign`. + interval: Optional int, the amount of seconds to wait between calls to + measure throughput and tune `spe_variable`. Defaults to 5. + change_spe_interval: Optional int, the number of throughput measurements + before tuning. Defaults to 10. + change_threshold: Optional float, the percent different in throughput to + trigger a `steps_per_execution` change. For example, `0.1` triggers + changes if throughput changes more than 10%. + + Examples: + + If you're using `model.compile` and `model.fit`, this functionality is + available at compile time with `steps_per_execution='auto'` + + ```python + model.compile(..., steps_per_execution='auto') + ``` + + Custom training loop usage: + + ```python + # Get model + inputs = keras.Input(shape=(784,), name="digits") + x = layers.Dense(64, activation="relu", name="dense_1")(inputs) + x = layers.Dense(64, activation="relu", name="dense_2")(x) + outputs = layers.Dense(10, name="predictions")(x) + model = keras.Model(inputs=inputs, outputs=outputs) + + # Instantiate an optimizer to train the model. + optimizer = keras.optimizers.SGD(learning_rate=1e-3) + # Instantiate a loss function. + loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + + # Prepare the training dataset. + batch_size = 64 + (x_train, y_train), (_, _) = keras.datasets.mnist.load_data() + x_train = np.reshape(x_train, (-1, 784)) + train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + + # Create our steps per execution variable + steps_per_execution = tf.Variable( + 1, + dtype="int64", + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA + ) + + # Create the tuner + tuner = StepsPerExecutionTuner( + optimizer, steps_per_execution + ) + + # Create a step function that runs a single training step + @tf.function + def step_fn(iterator): + batch_data, labels = next(iterator) + with tf.GradientTape() as tape: + logits = model(batch_data, training=True) + loss_value = loss_fn(labels, logits) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply_gradients(zip(grads, model.trainable_weights)) + + # We can now pack multiple execution steps into one call + @tf.function + def multi_step_train_fn(iterator, steps_per_execution): + for _ in tf.range(steps_per_execution): + outputs = step_fn(iterator) + return + + initial_steps_per_execution = 1 + steps_per_epoch = 100 + epochs = 2 + + # Start the tuner before training + tuner.start() + + # We can now call our multi step training with our data + for epoch in range(epochs): + for _ in range(steps_per_epoch): + multi_step_train_fn(iterator, steps_per_execution) + + # End the tuner after training + tuner.stop() + ``` + """ + + def __init__( + self, + optimizer, + spe_variable, + interval=5, + change_spe_interval=10, + change_threshold=0.1, + ): + self.optimizer = optimizer + self._steps_per_execution = spe_variable + self.interval = interval + self.change_spe_interval = change_spe_interval + self.spe_change_threshold = change_threshold + self.steps_per_execution_stop_event = threading.Event() + self.thread = None + + def start(self): + """Starts steps per execution tuning thread. + + Returns a `threading.Thread` which will run every `self.interval` + seconds to measure throughput and tune steps per execution. + """ + if self.thread and self.thread.is_alive(): + return self.thread + self._begin_tuning() + self.thread = threading.Thread( + target=self._steps_per_execution_interval_call, daemon=True + ) # needed to shut down successfully + self.thread.start() + return self.thread + + @property + def steps_per_execution(self): + """Settable attribute representing`steps_per_execution` variable.""" + return self._steps_per_execution + + @steps_per_execution.setter + def steps_per_execution(self, value): + self._steps_per_execution.assign(value) + self.init_spe = value + + def _steps_per_execution_interval_call(self): + while not self.steps_per_execution_stop_event.is_set(): + self._measure_and_tune() + self.steps_per_execution_stop_event.wait(self.interval) + + def _begin_tuning(self): + self.start_time = time.time() + self.init_iterations = self.optimizer.iterations.numpy() + self.init_spe = self._steps_per_execution.numpy().item() + self.spe_last_logged = { + "iteration": self.init_iterations, + "time_secs": self.start_time, + } + self.rgsps = [] # rgsps = recent global steps per second + self.avg_rgsps = 0 + self.prev_avg_rgsps = 0 + self.spe_tune_last_action_add = True + self.spe_measurement_count = 0 + + def stop(self): + """Stops steps per execution tuning thread.""" + if not self.steps_per_execution_stop_event.is_set(): + self.steps_per_execution_stop_event.set() + + def _should_tune(self): + epoch_boundary = False + if self.rgsps[-1] == 0: + epoch_boundary = True + + return ( + self.spe_measurement_count % self.change_spe_interval == 0 + and self.rgsps + and not epoch_boundary + ) + + def _tune(self): + """Changes the steps per execution using the following algorithm. + + If there is more than a 10% increase in the throughput, then the last + recorded action is repeated (i.e. if increasing the SPE caused an + increase in throughput, it is increased again). If there is more than a + 10% decrease in the throughput, then the opposite of the last action is + performed (i.e. if increasing the SPE decreased the throughput, then the + SPE is decreased). + """ + self.avg_rgsps = sum(self.rgsps) / len(self.rgsps) + fast_threshold = (1 + self.spe_change_threshold) * self.prev_avg_rgsps + slow_threshold = (1 - self.spe_change_threshold) * self.prev_avg_rgsps + + if self.spe_tune_last_action_add: + repeat_action_mult = 1.5 + opposite_action_mult = 0.5 + else: + repeat_action_mult = 0.5 + opposite_action_mult = 1.5 + + spe_variable = self._steps_per_execution + spe_limit = spe_variable.dtype.max / 1.5 + current_spe = spe_variable.numpy().item() + if self.avg_rgsps > fast_threshold: + # Note that our first iteration will always trigger this as our + # threshold should be 0 + new_spe = current_spe * repeat_action_mult + elif self.avg_rgsps < slow_threshold: + new_spe = current_spe * opposite_action_mult + self.spe_tune_last_action_add = not self.spe_tune_last_action_add + else: + new_spe = current_spe + + if current_spe >= spe_limit: + new_spe = current_spe + elif current_spe == 0: + new_spe = self.init_spe + + self._steps_per_execution.assign(np.round(new_spe)) + self.prev_avg_rgsps = self.avg_rgsps + + def _measure_and_tune(self): + self.spe_measurement_count += 1 + + cur_iteration = self.optimizer.iterations.numpy() + + cur_time_secs = time.time() + recent_gsps = (cur_iteration - self.spe_last_logged["iteration"]) / ( + cur_time_secs - self.spe_last_logged["time_secs"] + ) + + self.rgsps.append(recent_gsps) + if len(self.rgsps) > self.change_spe_interval: + self.rgsps.pop(0) + + if cur_iteration == 0: # No need to tune, we have no measurements + self.start_time = cur_time_secs + return + + self.spe_last_logged["iteration"] = cur_iteration + self.spe_last_logged["time_secs"] = cur_time_secs + + try: + if self._should_tune(): + self._tune() + except RuntimeError: + logging.exception("Steps per execution autotuner failed to run.") + return diff --git a/keras/utils/steps_per_execution_tuning_test.py b/keras/utils/steps_per_execution_tuning_test.py new file mode 100644 index 000000000000..163a20932376 --- /dev/null +++ b/keras/utils/steps_per_execution_tuning_test.py @@ -0,0 +1,140 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test steps_per_execution_tuning.""" + +import time + +import tensorflow.compat.v2 as tf + +from keras import Input +from keras import Model +from keras import losses +from keras import optimizers +from keras.layers import Dense +from keras.testing_infra import test_combinations +from keras.utils import steps_per_execution_tuning + + +class mockOptimizer: + def __init__(self, iterations): + self.iterations = tf.Variable(iterations) + + +@test_combinations.run_with_all_model_types +@test_combinations.run_all_keras_modes(always_skip_v1=True) +class StepsPerExecutionTuningTest(test_combinations.TestCase): + def test_variables(self): + spe_variable = tf.Variable(1) + tuner = steps_per_execution_tuning.StepsPerExecutionTuner( + mockOptimizer(5), spe_variable, 5, 50, 0.5 + ) + assert tuner.optimizer.iterations.numpy() == 5 + assert tuner._steps_per_execution.numpy().item() == 1 + assert tuner.interval == 5 + assert tuner.change_spe_interval == 50 + assert tuner.spe_change_threshold == 0.5 + assert not tuner.steps_per_execution_stop_event.is_set() + + def test_start_stop(self): + spe_variable = tf.Variable(1) + tuner = steps_per_execution_tuning.StepsPerExecutionTuner( + mockOptimizer(5), spe_variable, interval=0.2 + ) + tuner.start() + assert not tuner.steps_per_execution_stop_event.is_set() + assert tuner.start_time > 0 + time.sleep(0.5) # should be enough time for 2 measurements + tuner.stop() + assert tuner.steps_per_execution_stop_event.is_set() + assert tuner.spe_measurement_count > 0 + + def test_settable_steps_per_execution(self): + spe_variable = tf.Variable(1) + tuner = steps_per_execution_tuning.StepsPerExecutionTuner( + mockOptimizer(5), spe_variable, interval=0.2 + ) + tuner.start() + tuner.stop() + assert tuner.init_spe == 1 + tuner.steps_per_execution = 5 + assert spe_variable.numpy().item() == 5 + assert tuner.init_spe == 5 + + def test_custom_training_loop(self): + dataset = _get_dataset() + iterator = iter(dataset) + + inputs = Input(shape=(784,), name="digits") + x = Dense(64, activation="relu", name="dense_1")(inputs) + x = Dense(64, activation="relu", name="dense_2")(x) + outputs = Dense(10, name="predictions")(x) + model = Model(inputs=inputs, outputs=outputs) + optimizer = optimizers.SGD(learning_rate=1e-3) + loss_fn = losses.SparseCategoricalCrossentropy(from_logits=True) + + # Create our steps per execution variable + steps_per_execution = tf.Variable( + 1, + dtype="int64", + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + ) + + # Create the tuner + tuner = steps_per_execution_tuning.StepsPerExecutionTuner( + optimizer, steps_per_execution + ) + + # Create a step function that runs a single training step + @tf.function + def step_fn(iterator): + batch_data, labels = next(iterator) + print(batch_data.shape, labels.shape) + with tf.GradientTape() as tape: + logits = model(batch_data, training=True) + loss_value = loss_fn(labels, logits) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply_gradients(zip(grads, model.trainable_weights)) + + # We can now pack multiple execution steps into one call + @tf.function + def multi_step_train_fn(iterator, steps_per_execution): + for _ in tf.range(steps_per_execution): + step_fn(iterator) + return + + steps_per_epoch = 10 + epochs = 2 + + # Start the tuner before training + tuner.start() + + for _ in range(epochs): + for _ in range(steps_per_epoch): + multi_step_train_fn(iterator, steps_per_execution) + + # End the tuner after training + tuner.stop() + + +def _get_dataset(): + inputs = tf.zeros((1000, 784), dtype=tf.float32) + targets = tf.zeros((1000,), dtype=tf.float32) + dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.batch(10) + return dataset + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/text_dataset.py b/keras/utils/text_dataset.py index c7cec37b3c15..37ba1a94b10c 100644 --- a/keras/utils/text_dataset.py +++ b/keras/utils/text_dataset.py @@ -14,234 +14,272 @@ # ============================================================================== """Keras text dataset generation utilities.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras.utils import dataset_utils + +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.utils.text_dataset_from_directory', - 'keras.preprocessing.text_dataset_from_directory', - v1=[]) -def text_dataset_from_directory(directory, - labels='inferred', - label_mode='int', - class_names=None, - batch_size=32, - max_length=None, - shuffle=True, - seed=None, - validation_split=None, - subset=None, - follow_links=False): - """Generates a `tf.data.Dataset` from text files in a directory. - - If your directory structure is: - - ``` - main_directory/ - ...class_a/ - ......a_text_1.txt - ......a_text_2.txt - ...class_b/ - ......b_text_1.txt - ......b_text_2.txt - ``` - - Then calling `text_dataset_from_directory(main_directory, labels='inferred')` - will return a `tf.data.Dataset` that yields batches of texts from - the subdirectories `class_a` and `class_b`, together with labels - 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). - - Only `.txt` files are supported at this time. - - Args: - directory: Directory where the data is located. - If `labels` is "inferred", it should contain - subdirectories, each containing text files for a class. - Otherwise, the directory structure is ignored. - labels: Either "inferred" - (labels are generated from the directory structure), - None (no labels), - or a list/tuple of integer labels of the same size as the number of - text files found in the directory. Labels should be sorted according - to the alphanumeric order of the text file paths - (obtained via `os.walk(directory)` in Python). - label_mode: String describing the encoding of `labels`. Options are: - - 'int': means that the labels are encoded as integers - (e.g. for `sparse_categorical_crossentropy` loss). - - 'categorical' means that the labels are - encoded as a categorical vector - (e.g. for `categorical_crossentropy` loss). - - 'binary' means that the labels (there can be only 2) - are encoded as `float32` scalars with values 0 or 1 - (e.g. for `binary_crossentropy`). - - None (no labels). - class_names: Only valid if "labels" is "inferred". This is the explicit - list of class names (must match names of subdirectories). Used - to control the order of the classes - (otherwise alphanumerical order is used). - batch_size: Size of the batches of data. Default: 32. - If `None`, the data will not be batched - (the dataset will yield individual samples). - max_length: Maximum size of a text string. Texts longer than this will - be truncated to `max_length`. - shuffle: Whether to shuffle the data. Default: True. - If set to False, sorts the data in alphanumeric order. - seed: Optional random seed for shuffling and transformations. - validation_split: Optional float between 0 and 1, - fraction of data to reserve for validation. - subset: Subset of the data to return. - One of "training", "validation" or "both". - Only used if `validation_split` is set. - When `subset="both"`, the utility returns a tuple of two datasets - (the training and validation datasets respectively). - follow_links: Whether to visits subdirectories pointed to by symlinks. - Defaults to False. - - Returns: +@keras_export( + "keras.utils.text_dataset_from_directory", + "keras.preprocessing.text_dataset_from_directory", + v1=[], +) +def text_dataset_from_directory( + directory, + labels="inferred", + label_mode="int", + class_names=None, + batch_size=32, + max_length=None, + shuffle=True, + seed=None, + validation_split=None, + subset=None, + follow_links=False, +): + """Generates a `tf.data.Dataset` from text files in a directory. + + If your directory structure is: + + ``` + main_directory/ + ...class_a/ + ......a_text_1.txt + ......a_text_2.txt + ...class_b/ + ......b_text_1.txt + ......b_text_2.txt + ``` + + Then calling `text_dataset_from_directory(main_directory, + labels='inferred')` will return a `tf.data.Dataset` that yields batches of + texts from the subdirectories `class_a` and `class_b`, together with labels + 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). + + Only `.txt` files are supported at this time. + + Args: + directory: Directory where the data is located. + If `labels` is `"inferred"`, it should contain + subdirectories, each containing text files for a class. + Otherwise, the directory structure is ignored. + labels: Either `"inferred"` + (labels are generated from the directory structure), + `None` (no labels), + or a list/tuple of integer labels of the same size as the number of + text files found in the directory. Labels should be sorted according + to the alphanumeric order of the text file paths + (obtained via `os.walk(directory)` in Python). + label_mode: String describing the encoding of `labels`. Options are: + - `"int"`: means that the labels are encoded as integers + (e.g. for `sparse_categorical_crossentropy` loss). + - `"categorical"` means that the labels are + encoded as a categorical vector + (e.g. for `categorical_crossentropy` loss). + - `"binary"` means that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 or 1 + (e.g. for `binary_crossentropy`). + - `None` (no labels). + class_names: Only valid if `"labels"` is `"inferred"`. + This is the explicit list of class names + (must match names of subdirectories). Used to control the order + of the classes (otherwise alphanumerical order is used). + batch_size: Size of the batches of data. Defaults to 32. + If `None`, the data will not be batched + (the dataset will yield individual samples). + max_length: Maximum size of a text string. Texts longer than this will + be truncated to `max_length`. + shuffle: Whether to shuffle the data. Defaults to `True`. + If set to `False`, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, + fraction of data to reserve for validation. + subset: Subset of the data to return. + One of `"training"`, `"validation"` or `"both"`. + Only used if `validation_split` is set. + When `subset="both"`, the utility returns a tuple of two datasets + (the training and validation datasets respectively). + follow_links: Whether to visits subdirectories pointed to by symlinks. + Defaults to `False`. + + Returns: + A `tf.data.Dataset` object. - - If `label_mode` is None, it yields `string` tensors of shape + + - If `label_mode` is `None`, it yields `string` tensors of shape `(batch_size,)`, containing the contents of a batch of text files. - - Otherwise, it yields a tuple `(texts, labels)`, where `texts` + - Otherwise, it yields a tuple `(texts, labels)`, where `texts` has shape `(batch_size,)` and `labels` follows the format described below. - Rules regarding labels format: + Rules regarding labels format: + - if `label_mode` is `int`, the labels are an `int32` tensor of shape - `(batch_size,)`. + `(batch_size,)`. - if `label_mode` is `binary`, the labels are a `float32` tensor of - 1s and 0s of shape `(batch_size, 1)`. + 1s and 0s of shape `(batch_size, 1)`. - if `label_mode` is `categorical`, the labels are a `float32` tensor - of shape `(batch_size, num_classes)`, representing a one-hot - encoding of the class index. - """ - if labels not in ('inferred', None): - if not isinstance(labels, (list, tuple)): - raise ValueError( - '`labels` argument should be a list/tuple of integer labels, of ' - 'the same size as the number of text files in the target ' - 'directory. If you wish to infer the labels from the subdirectory ' - 'names in the target directory, pass `labels="inferred"`. ' - 'If you wish to get a dataset that only contains text samples ' - f'(no labels), pass `labels=None`. Received: labels={labels}') - if class_names: - raise ValueError('You can only pass `class_names` if ' - f'`labels="inferred"`. Received: labels={labels}, and ' - f'class_names={class_names}') - if label_mode not in {'int', 'categorical', 'binary', None}: - raise ValueError( - '`label_mode` argument must be one of "int", "categorical", "binary", ' - f'or None. Received: label_mode={label_mode}') - if labels is None or label_mode is None: - labels = None - label_mode = None - dataset_utils.check_validation_split_arg( - validation_split, subset, shuffle, seed) - - if seed is None: - seed = np.random.randint(1e6) - file_paths, labels, class_names = dataset_utils.index_directory( - directory, - labels, - formats=('.txt',), - class_names=class_names, - shuffle=shuffle, - seed=seed, - follow_links=follow_links) - - if label_mode == 'binary' and len(class_names) != 2: - raise ValueError( - f'When passing `label_mode="binary"`, there must be exactly 2 ' - f'class_names. Received: class_names={class_names}') - - if subset == 'both': - file_paths_train, labels_train = dataset_utils.get_training_or_validation_split( - file_paths, labels, validation_split, 'training') - file_paths_val, labels_val = dataset_utils.get_training_or_validation_split( - file_paths, labels, validation_split, 'validation') - if not file_paths_train: - raise ValueError( - f'No training text files found in directory {directory}. ' - f'Allowed format: .txt') - if not file_paths_val: - raise ValueError( - f'No validation text files found in directory {directory}. ' - f'Allowed format: .txt') - train_dataset = paths_and_labels_to_dataset( - file_paths=file_paths_train, - labels=labels_train, - label_mode=label_mode, - num_classes=len(class_names), - max_length=max_length) - val_dataset = paths_and_labels_to_dataset( - file_paths=file_paths_val, - labels=labels_val, - label_mode=label_mode, - num_classes=len(class_names), - max_length=max_length) - - train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE) - val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE) - if batch_size is not None: - if shuffle: - # Shuffle locally at each iteration - train_dataset = train_dataset.shuffle( - buffer_size=batch_size * 8, seed=seed) - train_dataset = train_dataset.batch(batch_size) - val_dataset = val_dataset.batch(batch_size) - else: - if shuffle: - train_dataset = train_dataset.shuffle(buffer_size=1024, seed=seed) - # Users may need to reference `class_names`. - train_dataset.class_names = class_names - val_dataset.class_names = class_names - dataset = [train_dataset, val_dataset] - else: - file_paths, labels = dataset_utils.get_training_or_validation_split( - file_paths, labels, validation_split, subset) - if not file_paths: - raise ValueError(f'No text files found in directory {directory}. ' - f'Allowed format: .txt') - dataset = paths_and_labels_to_dataset( - file_paths=file_paths, - labels=labels, - label_mode=label_mode, - num_classes=len(class_names), - max_length=max_length) - dataset = dataset.prefetch(tf.data.AUTOTUNE) - if batch_size is not None: - if shuffle: - # Shuffle locally at each iteration - dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) - dataset = dataset.batch(batch_size) + of shape `(batch_size, num_classes)`, representing a one-hot + encoding of the class index. + """ + if labels not in ("inferred", None): + if not isinstance(labels, (list, tuple)): + raise ValueError( + "`labels` argument should be a list/tuple of integer labels, " + "of the same size as the number of text files in the target " + "directory. If you wish to infer the labels from the " + "subdirectory names in the target directory, " + 'pass `labels="inferred"`. ' + "If you wish to get a dataset that only contains text samples " + f"(no labels), pass `labels=None`. Received: labels={labels}" + ) + if class_names: + raise ValueError( + "You can only pass `class_names` if " + f'`labels="inferred"`. Received: labels={labels}, and ' + f"class_names={class_names}" + ) + if label_mode not in {"int", "categorical", "binary", None}: + raise ValueError( + '`label_mode` argument must be one of "int", ' + '"categorical", "binary", ' + f"or None. Received: label_mode={label_mode}" + ) + if labels is None or label_mode is None: + labels = None + label_mode = None + dataset_utils.check_validation_split_arg( + validation_split, subset, shuffle, seed + ) + + if seed is None: + seed = np.random.randint(1e6) + file_paths, labels, class_names = dataset_utils.index_directory( + directory, + labels, + formats=(".txt",), + class_names=class_names, + shuffle=shuffle, + seed=seed, + follow_links=follow_links, + ) + + if label_mode == "binary" and len(class_names) != 2: + raise ValueError( + 'When passing `label_mode="binary"`, there must be exactly 2 ' + f"class_names. Received: class_names={class_names}" + ) + + if subset == "both": + ( + file_paths_train, + labels_train, + ) = dataset_utils.get_training_or_validation_split( + file_paths, labels, validation_split, "training" + ) + ( + file_paths_val, + labels_val, + ) = dataset_utils.get_training_or_validation_split( + file_paths, labels, validation_split, "validation" + ) + if not file_paths_train: + raise ValueError( + f"No training text files found in directory {directory}. " + "Allowed format: .txt" + ) + if not file_paths_val: + raise ValueError( + f"No validation text files found in directory {directory}. " + "Allowed format: .txt" + ) + train_dataset = paths_and_labels_to_dataset( + file_paths=file_paths_train, + labels=labels_train, + label_mode=label_mode, + num_classes=len(class_names), + max_length=max_length, + ) + val_dataset = paths_and_labels_to_dataset( + file_paths=file_paths_val, + labels=labels_val, + label_mode=label_mode, + num_classes=len(class_names), + max_length=max_length, + ) + + train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE) + val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE) + if batch_size is not None: + if shuffle: + # Shuffle locally at each iteration + train_dataset = train_dataset.shuffle( + buffer_size=batch_size * 8, seed=seed + ) + train_dataset = train_dataset.batch(batch_size) + val_dataset = val_dataset.batch(batch_size) + else: + if shuffle: + train_dataset = train_dataset.shuffle( + buffer_size=1024, seed=seed + ) + # Users may need to reference `class_names`. + train_dataset.class_names = class_names + val_dataset.class_names = class_names + dataset = [train_dataset, val_dataset] else: - if shuffle: - dataset = dataset.shuffle(buffer_size=1024, seed=seed) - # Users may need to reference `class_names`. - dataset.class_names = class_names - return dataset - - -def paths_and_labels_to_dataset(file_paths, - labels, - label_mode, - num_classes, - max_length): - """Constructs a dataset of text strings and labels.""" - path_ds = tf.data.Dataset.from_tensor_slices(file_paths) - string_ds = path_ds.map( - lambda x: path_to_string_content(x, max_length), - num_parallel_calls=tf.data.AUTOTUNE) - if label_mode: - label_ds = dataset_utils.labels_to_dataset(labels, label_mode, num_classes) - string_ds = tf.data.Dataset.zip((string_ds, label_ds)) - return string_ds + file_paths, labels = dataset_utils.get_training_or_validation_split( + file_paths, labels, validation_split, subset + ) + if not file_paths: + raise ValueError( + f"No text files found in directory {directory}. " + "Allowed format: .txt" + ) + dataset = paths_and_labels_to_dataset( + file_paths=file_paths, + labels=labels, + label_mode=label_mode, + num_classes=len(class_names), + max_length=max_length, + ) + dataset = dataset.prefetch(tf.data.AUTOTUNE) + if batch_size is not None: + if shuffle: + # Shuffle locally at each iteration + dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) + dataset = dataset.batch(batch_size) + else: + if shuffle: + dataset = dataset.shuffle(buffer_size=1024, seed=seed) + # Users may need to reference `class_names`. + dataset.class_names = class_names + return dataset + + +def paths_and_labels_to_dataset( + file_paths, labels, label_mode, num_classes, max_length +): + """Constructs a dataset of text strings and labels.""" + path_ds = tf.data.Dataset.from_tensor_slices(file_paths) + string_ds = path_ds.map( + lambda x: path_to_string_content(x, max_length), + num_parallel_calls=tf.data.AUTOTUNE, + ) + if label_mode: + label_ds = dataset_utils.labels_to_dataset( + labels, label_mode, num_classes + ) + string_ds = tf.data.Dataset.zip((string_ds, label_ds)) + return string_ds def path_to_string_content(path, max_length): - txt = tf.io.read_file(path) - if max_length is not None: - txt = tf.compat.v1.strings.substr(txt, 0, max_length) - return txt + txt = tf.io.read_file(path) + if max_length is not None: + txt = tf.compat.v1.strings.substr(txt, 0, max_length) + return txt diff --git a/keras/utils/text_dataset_test.py b/keras/utils/text_dataset_test.py index e050fae7c45c..532eb06cf848 100644 --- a/keras/utils/text_dataset_test.py +++ b/keras/utils/text_dataset_test.py @@ -14,12 +14,13 @@ # ============================================================================== """Tests for text_dataset.""" -import tensorflow.compat.v2 as tf - import os import random import shutil import string + +import tensorflow.compat.v2 as tf + from keras.testing_infra import test_combinations from keras.testing_infra import test_utils from keras.utils import text_dataset @@ -27,251 +28,297 @@ @test_utils.run_v2_only class TextDatasetFromDirectoryTest(test_combinations.TestCase): - - def _prepare_directory(self, - num_classes=2, - nested_dirs=False, - count=16, - length=20): - # Get a unique temp directory - temp_dir = os.path.join(self.get_temp_dir(), str(random.randint(0, 1e6))) - os.mkdir(temp_dir) - self.addCleanup(shutil.rmtree, temp_dir) - - # Generate paths to class subdirectories - paths = [] - for class_index in range(num_classes): - class_directory = 'class_%s' % (class_index,) - if nested_dirs: - class_paths = [ - class_directory, os.path.join(class_directory, 'subfolder_1'), - os.path.join(class_directory, 'subfolder_2'), os.path.join( - class_directory, 'subfolder_1', 'sub-subfolder') - ] - else: - class_paths = [class_directory] - for path in class_paths: - os.mkdir(os.path.join(temp_dir, path)) - paths += class_paths - - for i in range(count): - path = paths[i % len(paths)] - filename = os.path.join(path, 'text_%s.txt' % (i,)) - f = open(os.path.join(temp_dir, filename), 'w') - text = ''.join([random.choice(string.printable) for _ in range(length)]) - f.write(text) - f.close() - return temp_dir - - def test_text_dataset_from_directory_standalone(self): - # Test retrieving txt files without labels from a directory and its subdirs. - # Save a few extra files in the parent directory. - directory = self._prepare_directory(count=7, num_classes=2) - for i in range(3): - filename = 'text_%s.txt' % (i,) - f = open(os.path.join(directory, filename), 'w') - text = ''.join([random.choice(string.printable) for _ in range(20)]) - f.write(text) - f.close() - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=5, label_mode=None, max_length=10) - batch = next(iter(dataset)) - # We just return the texts, no labels - self.assertEqual(batch.shape, (5,)) - self.assertEqual(batch.dtype.name, 'string') - # Count samples - batch_count = 0 - sample_count = 0 - for batch in dataset: - batch_count += 1 - sample_count += batch.shape[0] - self.assertEqual(batch_count, 2) - self.assertEqual(sample_count, 10) - - def test_text_dataset_from_directory_binary(self): - directory = self._prepare_directory(num_classes=2) - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode='int', max_length=10) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - self.assertEqual(batch[0].dtype.name, 'string') - self.assertEqual(len(batch[0].numpy()[0]), 10) # Test max_length - self.assertEqual(batch[1].shape, (8,)) - self.assertEqual(batch[1].dtype.name, 'int32') - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode='binary') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - self.assertEqual(batch[0].dtype.name, 'string') - self.assertEqual(batch[1].shape, (8, 1)) - self.assertEqual(batch[1].dtype.name, 'float32') - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode='categorical') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - self.assertEqual(batch[0].dtype.name, 'string') - self.assertEqual(batch[1].shape, (8, 2)) - self.assertEqual(batch[1].dtype.name, 'float32') - - def test_sample_count(self): - directory = self._prepare_directory(num_classes=4, count=15) - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode=None) - sample_count = 0 - for batch in dataset: - sample_count += batch.shape[0] - self.assertEqual(sample_count, 15) - - def test_text_dataset_from_directory_multiclass(self): - directory = self._prepare_directory(num_classes=4, count=15) - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode=None) - batch = next(iter(dataset)) - self.assertEqual(batch.shape, (8,)) - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode=None) - sample_count = 0 - iterator = iter(dataset) - for batch in dataset: - sample_count += next(iterator).shape[0] - self.assertEqual(sample_count, 15) - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode='int') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - self.assertEqual(batch[0].dtype.name, 'string') - self.assertEqual(batch[1].shape, (8,)) - self.assertEqual(batch[1].dtype.name, 'int32') - - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode='categorical') - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - self.assertEqual(batch[0].dtype.name, 'string') - self.assertEqual(batch[1].shape, (8, 4)) - self.assertEqual(batch[1].dtype.name, 'float32') - - def test_text_dataset_from_directory_validation_split(self): - directory = self._prepare_directory(num_classes=2, count=10) - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=10, validation_split=0.2, subset='training', - seed=1337) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=10, validation_split=0.2, subset='validation', - seed=1337) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (2,)) - - train_dataset, val_dataset = text_dataset.text_dataset_from_directory( - directory, - batch_size=10, - validation_split=0.2, - subset='both', - seed=1337) - batch = next(iter(train_dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (8,)) - batch = next(iter(val_dataset)) - self.assertLen(batch, 2) - self.assertEqual(batch[0].shape, (2,)) - - def test_text_dataset_from_directory_manual_labels(self): - directory = self._prepare_directory(num_classes=2, count=2) - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, labels=[0, 1], shuffle=False) - batch = next(iter(dataset)) - self.assertLen(batch, 2) - self.assertAllClose(batch[1], [0, 1]) - - def test_text_dataset_from_directory_follow_links(self): - directory = self._prepare_directory(num_classes=2, count=25, - nested_dirs=True) - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=8, label_mode=None, follow_links=True) - sample_count = 0 - for batch in dataset: - sample_count += batch.shape[0] - self.assertEqual(sample_count, 25) - - def test_text_dataset_from_directory_no_files(self): - directory = self._prepare_directory(num_classes=2, count=0) - with self.assertRaisesRegex(ValueError, 'No text files found'): - _ = text_dataset.text_dataset_from_directory(directory) - - def test_text_dataset_from_directory_errors(self): - directory = self._prepare_directory(num_classes=3, count=5) - - with self.assertRaisesRegex(ValueError, '`labels` argument should be'): - _ = text_dataset.text_dataset_from_directory( - directory, labels='other') - - with self.assertRaisesRegex(ValueError, '`label_mode` argument must be'): - _ = text_dataset.text_dataset_from_directory( - directory, label_mode='other') - - with self.assertRaisesRegex( - ValueError, 'only pass `class_names` if `labels="inferred"`'): - _ = text_dataset.text_dataset_from_directory( - directory, labels=[0, 0, 1, 1, 1], - class_names=['class_0', 'class_1', 'class_2']) - - with self.assertRaisesRegex( - ValueError, - 'Expected the lengths of `labels` to match the number of files'): - _ = text_dataset.text_dataset_from_directory( - directory, labels=[0, 0, 1, 1]) - - with self.assertRaisesRegex( - ValueError, '`class_names` passed did not match'): - _ = text_dataset.text_dataset_from_directory( - directory, class_names=['class_0', 'class_2']) - - with self.assertRaisesRegex(ValueError, 'there must be exactly 2'): - _ = text_dataset.text_dataset_from_directory( - directory, label_mode='binary') - - with self.assertRaisesRegex(ValueError, - '`validation_split` must be between 0 and 1'): - _ = text_dataset.text_dataset_from_directory( - directory, validation_split=2) - - with self.assertRaisesRegex( - ValueError, '`subset` must be either "training", ' - '"validation" or "both"'): - _ = text_dataset.text_dataset_from_directory( - directory, validation_split=0.2, subset='other') - - with self.assertRaisesRegex(ValueError, '`validation_split` must be set'): - _ = text_dataset.text_dataset_from_directory( - directory, validation_split=0, subset='training') - - with self.assertRaisesRegex(ValueError, 'must provide a `seed`'): - _ = text_dataset.text_dataset_from_directory( - directory, validation_split=0.2, subset='training') - - def test_text_dataset_from_directory_not_batched(self): - directory = self._prepare_directory() - dataset = text_dataset.text_dataset_from_directory( - directory, batch_size=None, label_mode=None, follow_links=True) - - sample = next(iter(dataset)) - self.assertEqual(len(sample.shape), 0) - - -if __name__ == '__main__': - tf.test.main() + def _prepare_directory( + self, num_classes=2, nested_dirs=False, count=16, length=20 + ): + # Get a unique temp directory + temp_dir = os.path.join( + self.get_temp_dir(), str(random.randint(0, 1e6)) + ) + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + # Generate paths to class subdirectories + paths = [] + for class_index in range(num_classes): + class_directory = f"class_{class_index}" + if nested_dirs: + class_paths = [ + class_directory, + os.path.join(class_directory, "subfolder_1"), + os.path.join(class_directory, "subfolder_2"), + os.path.join( + class_directory, "subfolder_1", "sub-subfolder" + ), + ] + else: + class_paths = [class_directory] + for path in class_paths: + os.mkdir(os.path.join(temp_dir, path)) + paths += class_paths + + for i in range(count): + path = paths[i % len(paths)] + filename = os.path.join(path, f"text_{i}.txt") + with open(os.path.join(temp_dir, filename), "w") as f: + text = "".join( + [random.choice(string.printable) for _ in range(length)] + ) + f.write(text) + return temp_dir + + def test_text_dataset_from_directory_standalone(self): + # Test retrieving txt files without labels from a directory and its + # subdirs. Save a few extra files in the parent directory. + directory = self._prepare_directory(count=7, num_classes=2) + for i in range(3): + filename = f"text_{i}.txt" + with open(os.path.join(directory, filename), "w") as f: + text = "".join( + [random.choice(string.printable) for _ in range(20)] + ) + f.write(text) + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=5, label_mode=None, max_length=10 + ) + batch = next(iter(dataset)) + # We just return the texts, no labels + self.assertEqual(batch.shape, (5,)) + self.assertEqual(batch.dtype.name, "string") + # Count samples + batch_count = 0 + sample_count = 0 + for batch in dataset: + batch_count += 1 + sample_count += batch.shape[0] + self.assertEqual(batch_count, 2) + self.assertEqual(sample_count, 10) + + def test_text_dataset_from_directory_binary(self): + directory = self._prepare_directory(num_classes=2) + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode="int", max_length=10 + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + self.assertEqual(batch[0].dtype.name, "string") + self.assertEqual(len(batch[0].numpy()[0]), 10) # Test max_length + self.assertEqual(batch[1].shape, (8,)) + self.assertEqual(batch[1].dtype.name, "int32") + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode="binary" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + self.assertEqual(batch[0].dtype.name, "string") + self.assertEqual(batch[1].shape, (8, 1)) + self.assertEqual(batch[1].dtype.name, "float32") + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode="categorical" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + self.assertEqual(batch[0].dtype.name, "string") + self.assertEqual(batch[1].shape, (8, 2)) + self.assertEqual(batch[1].dtype.name, "float32") + + def test_sample_count(self): + directory = self._prepare_directory(num_classes=4, count=15) + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode=None + ) + sample_count = 0 + for batch in dataset: + sample_count += batch.shape[0] + self.assertEqual(sample_count, 15) + + def test_text_dataset_from_directory_multiclass(self): + directory = self._prepare_directory(num_classes=4, count=15) + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode=None + ) + batch = next(iter(dataset)) + self.assertEqual(batch.shape, (8,)) + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode=None + ) + sample_count = 0 + iterator = iter(dataset) + for batch in dataset: + sample_count += next(iterator).shape[0] + self.assertEqual(sample_count, 15) + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode="int" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + self.assertEqual(batch[0].dtype.name, "string") + self.assertEqual(batch[1].shape, (8,)) + self.assertEqual(batch[1].dtype.name, "int32") + + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode="categorical" + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + self.assertEqual(batch[0].dtype.name, "string") + self.assertEqual(batch[1].shape, (8, 4)) + self.assertEqual(batch[1].dtype.name, "float32") + + def test_text_dataset_from_directory_validation_split(self): + directory = self._prepare_directory(num_classes=2, count=10) + dataset = text_dataset.text_dataset_from_directory( + directory, + batch_size=10, + validation_split=0.2, + subset="training", + seed=1337, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + dataset = text_dataset.text_dataset_from_directory( + directory, + batch_size=10, + validation_split=0.2, + subset="validation", + seed=1337, + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (2,)) + + train_dataset, val_dataset = text_dataset.text_dataset_from_directory( + directory, + batch_size=10, + validation_split=0.2, + subset="both", + seed=1337, + ) + batch = next(iter(train_dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (8,)) + batch = next(iter(val_dataset)) + self.assertLen(batch, 2) + self.assertEqual(batch[0].shape, (2,)) + + def test_text_dataset_from_directory_manual_labels(self): + directory = self._prepare_directory(num_classes=2, count=2) + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, labels=[0, 1], shuffle=False + ) + batch = next(iter(dataset)) + self.assertLen(batch, 2) + self.assertAllClose(batch[1], [0, 1]) + + def test_text_dataset_from_directory_follow_links(self): + directory = self._prepare_directory( + num_classes=2, count=25, nested_dirs=True + ) + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=8, label_mode=None, follow_links=True + ) + sample_count = 0 + for batch in dataset: + sample_count += batch.shape[0] + self.assertEqual(sample_count, 25) + + def test_text_dataset_from_directory_no_files(self): + directory = self._prepare_directory(num_classes=2, count=0) + with self.assertRaisesRegex(ValueError, "No text files found"): + _ = text_dataset.text_dataset_from_directory(directory) + + def test_text_dataset_from_directory_errors(self): + directory = self._prepare_directory(num_classes=3, count=5) + + with self.assertRaisesRegex(ValueError, "`labels` argument should be"): + _ = text_dataset.text_dataset_from_directory( + directory, labels="other" + ) + + with self.assertRaisesRegex( + ValueError, "`label_mode` argument must be" + ): + _ = text_dataset.text_dataset_from_directory( + directory, label_mode="other" + ) + + with self.assertRaisesRegex( + ValueError, 'only pass `class_names` if `labels="inferred"`' + ): + _ = text_dataset.text_dataset_from_directory( + directory, + labels=[0, 0, 1, 1, 1], + class_names=["class_0", "class_1", "class_2"], + ) + + with self.assertRaisesRegex( + ValueError, + "Expected the lengths of `labels` to match the number of files", + ): + _ = text_dataset.text_dataset_from_directory( + directory, labels=[0, 0, 1, 1] + ) + + with self.assertRaisesRegex( + ValueError, "`class_names` passed did not match" + ): + _ = text_dataset.text_dataset_from_directory( + directory, class_names=["class_0", "class_2"] + ) + + with self.assertRaisesRegex(ValueError, "there must be exactly 2"): + _ = text_dataset.text_dataset_from_directory( + directory, label_mode="binary" + ) + + with self.assertRaisesRegex( + ValueError, "`validation_split` must be between 0 and 1" + ): + _ = text_dataset.text_dataset_from_directory( + directory, validation_split=2 + ) + + with self.assertRaisesRegex( + ValueError, + '`subset` must be either "training", "validation" or "both"', + ): + _ = text_dataset.text_dataset_from_directory( + directory, validation_split=0.2, subset="other" + ) + + with self.assertRaisesRegex( + ValueError, "`validation_split` must be set" + ): + _ = text_dataset.text_dataset_from_directory( + directory, validation_split=0, subset="training" + ) + + with self.assertRaisesRegex(ValueError, "must provide a `seed`"): + _ = text_dataset.text_dataset_from_directory( + directory, validation_split=0.2, subset="training" + ) + + def test_text_dataset_from_directory_not_batched(self): + directory = self._prepare_directory() + dataset = text_dataset.text_dataset_from_directory( + directory, batch_size=None, label_mode=None, follow_links=True + ) + + sample = next(iter(dataset)) + self.assertEqual(len(sample.shape), 0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/tf_contextlib.py b/keras/utils/tf_contextlib.py index 73103e7996ba..d988badaaf55 100644 --- a/keras/utils/tf_contextlib.py +++ b/keras/utils/tf_contextlib.py @@ -14,20 +14,22 @@ # ============================================================================== """TFDecorator-aware replacements for the contextlib module.""" -import tensorflow.compat.v2 as tf - import contextlib as _contextlib +import tensorflow.compat.v2 as tf + def contextmanager(target): - """A tf_decorator-aware wrapper for `contextlib.contextmanager`. + """A tf_decorator-aware wrapper for `contextlib.contextmanager`. - Usage is identical to `contextlib.contextmanager`. + Usage is identical to `contextlib.contextmanager`. - Args: - target: A callable to be wrapped in a contextmanager. - Returns: - A callable that can be used inside of a `with` statement. - """ - context_manager = _contextlib.contextmanager(target) - return tf.__internal__.decorator.make_decorator(target, context_manager, 'contextmanager') + Args: + target: A callable to be wrapped in a contextmanager. + Returns: + A callable that can be used inside of a `with` statement. + """ + context_manager = _contextlib.contextmanager(target) + return tf.__internal__.decorator.make_decorator( + target, context_manager, "contextmanager" + ) diff --git a/keras/utils/tf_inspect.py b/keras/utils/tf_inspect.py index c69ece159490..d9ea152cd278 100644 --- a/keras/utils/tf_inspect.py +++ b/keras/utils/tf_inspect.py @@ -13,390 +13,430 @@ # limitations under the License. # ============================================================================== """TFDecorator-aware replacements for the inspect module.""" -# pylint: disable=g-classes-have-attributes -import tensorflow.compat.v2 as tf - import collections import functools import inspect as _inspect -ArgSpec = _inspect.ArgSpec - +import tensorflow.compat.v2 as tf -if hasattr(_inspect, 'FullArgSpec'): - FullArgSpec = _inspect.FullArgSpec # pylint: disable=invalid-name +if hasattr(_inspect, "ArgSpec"): + ArgSpec = _inspect.ArgSpec +else: + ArgSpec = collections.namedtuple( + "ArgSpec", + [ + "args", + "varargs", + "keywords", + "defaults", + ], + ) + +if hasattr(_inspect, "FullArgSpec"): + FullArgSpec = _inspect.FullArgSpec else: - FullArgSpec = collections.namedtuple('FullArgSpec', [ - 'args', 'varargs', 'varkw', 'defaults', 'kwonlyargs', 'kwonlydefaults', - 'annotations' - ]) + FullArgSpec = collections.namedtuple( + "FullArgSpec", + [ + "args", + "varargs", + "varkw", + "defaults", + "kwonlyargs", + "kwonlydefaults", + "annotations", + ], + ) def _convert_maybe_argspec_to_fullargspec(argspec): - if isinstance(argspec, FullArgSpec): - return argspec - return FullArgSpec( - args=argspec.args, - varargs=argspec.varargs, - varkw=argspec.keywords, - defaults=argspec.defaults, - kwonlyargs=[], - kwonlydefaults=None, - annotations={}) + if isinstance(argspec, FullArgSpec): + return argspec + return FullArgSpec( + args=argspec.args, + varargs=argspec.varargs, + varkw=argspec.keywords, + defaults=argspec.defaults, + kwonlyargs=[], + kwonlydefaults=None, + annotations={}, + ) + + +if hasattr(_inspect, "getfullargspec"): + _getfullargspec = _inspect.getfullargspec + + def _getargspec(target): + """A python3 version of getargspec. + + Calls `getfullargspec` and assigns args, varargs, + varkw, and defaults to a python 2/3 compatible `ArgSpec`. + + The parameter name 'varkw' is changed to 'keywords' to fit the + `ArgSpec` struct. + + Args: + target: the target object to inspect. + + Returns: + An ArgSpec with args, varargs, keywords, and defaults parameters + from FullArgSpec. + """ + fullargspecs = getfullargspec(target) + argspecs = ArgSpec( + args=fullargspecs.args, + varargs=fullargspecs.varargs, + keywords=fullargspecs.varkw, + defaults=fullargspecs.defaults, + ) + return argspecs -if hasattr(_inspect, 'getfullargspec'): - _getfullargspec = _inspect.getfullargspec # pylint: disable=invalid-name +else: + _getargspec = _inspect.getargspec - def _getargspec(target): - """A python3 version of getargspec. + def _getfullargspec(target): + """A python2 version of getfullargspec. - Calls `getfullargspec` and assigns args, varargs, - varkw, and defaults to a python 2/3 compatible `ArgSpec`. + Args: + target: the target object to inspect. - The parameter name 'varkw' is changed to 'keywords' to fit the - `ArgSpec` struct. + Returns: + A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations. + """ + return _convert_maybe_argspec_to_fullargspec(getargspec(target)) - Args: - target: the target object to inspect. - Returns: - An ArgSpec with args, varargs, keywords, and defaults parameters - from FullArgSpec. - """ - fullargspecs = getfullargspec(target) - argspecs = ArgSpec( - args=fullargspecs.args, - varargs=fullargspecs.varargs, - keywords=fullargspecs.varkw, - defaults=fullargspecs.defaults) - return argspecs -else: - _getargspec = _inspect.getargspec +def currentframe(): + """TFDecorator-aware replacement for inspect.currentframe.""" + return _inspect.stack()[1][0] + - def _getfullargspec(target): - """A python2 version of getfullargspec. +def getargspec(obj): + """TFDecorator-aware replacement for `inspect.getargspec`. + + Note: `getfullargspec` is recommended as the python 2/3 compatible + replacement for this function. Args: - target: the target object to inspect. + obj: A function, partial function, or callable object, possibly decorated. Returns: - A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations. + The `ArgSpec` that describes the signature of the outermost decorator that + changes the callable's signature, or the `ArgSpec` that describes + the object if not decorated. + + Raises: + ValueError: When callable's signature can not be expressed with + ArgSpec. + TypeError: For objects of unsupported types. """ - return _convert_maybe_argspec_to_fullargspec(getargspec(target)) - + if isinstance(obj, functools.partial): + return _get_argspec_for_partial(obj) + + decorators, target = tf.__internal__.decorator.unwrap(obj) + + spec = next( + ( + d.decorator_argspec + for d in decorators + if d.decorator_argspec is not None + ), + None, + ) + if spec: + return spec -def currentframe(): - """TFDecorator-aware replacement for inspect.currentframe.""" - return _inspect.stack()[1][0] - - -def getargspec(obj): - """TFDecorator-aware replacement for `inspect.getargspec`. - - Note: `getfullargspec` is recommended as the python 2/3 compatible - replacement for this function. - - Args: - obj: A function, partial function, or callable object, possibly decorated. - - Returns: - The `ArgSpec` that describes the signature of the outermost decorator that - changes the callable's signature, or the `ArgSpec` that describes - the object if not decorated. - - Raises: - ValueError: When callable's signature can not be expressed with - ArgSpec. - TypeError: For objects of unsupported types. - """ - if isinstance(obj, functools.partial): - return _get_argspec_for_partial(obj) - - decorators, target = tf.__internal__.decorator.unwrap(obj) - - spec = next((d.decorator_argspec - for d in decorators - if d.decorator_argspec is not None), None) - if spec: - return spec - - try: - # Python3 will handle most callables here (not partial). - return _getargspec(target) - except TypeError: - pass - - if isinstance(target, type): try: - return _getargspec(target.__init__) + # Python3 will handle most callables here (not partial). + return _getargspec(target) except TypeError: - pass + pass - try: - return _getargspec(target.__new__) - except TypeError: - pass + if isinstance(target, type): + try: + return _getargspec(target.__init__) + except TypeError: + pass + + try: + return _getargspec(target.__new__) + except TypeError: + pass - # The `type(target)` ensures that if a class is received we don't return - # the signature of its __call__ method. - return _getargspec(type(target).__call__) + # The `type(target)` ensures that if a class is received we don't return + # the signature of its __call__ method. + return _getargspec(type(target).__call__) def _get_argspec_for_partial(obj): - """Implements `getargspec` for `functools.partial` objects. - - Args: - obj: The `functools.partial` object - Returns: - An `inspect.ArgSpec` - Raises: - ValueError: When callable's signature can not be expressed with - ArgSpec. - """ - # When callable is a functools.partial object, we construct its ArgSpec with - # following strategy: - # - If callable partial contains default value for positional arguments (ie. - # object.args), then final ArgSpec doesn't contain those positional arguments. - # - If callable partial contains default value for keyword arguments (ie. - # object.keywords), then we merge them with wrapped target. Default values - # from callable partial takes precedence over those from wrapped target. - # - # However, there is a case where it is impossible to construct a valid - # ArgSpec. Python requires arguments that have no default values must be - # defined before those with default values. ArgSpec structure is only valid - # when this presumption holds true because default values are expressed as a - # tuple of values without keywords and they are always assumed to belong to - # last K arguments where K is number of default values present. - # - # Since functools.partial can give default value to any argument, this - # presumption may no longer hold in some cases. For example: - # - # def func(m, n): - # return 2 * m + n - # partialed = functools.partial(func, m=1) - # - # This example will result in m having a default value but n doesn't. This is - # usually not allowed in Python and can not be expressed in ArgSpec correctly. - # - # Thus, we must detect cases like this by finding first argument with default - # value and ensures all following arguments also have default values. When - # this is not true, a ValueError is raised. - - n_prune_args = len(obj.args) - partial_keywords = obj.keywords or {} - - args, varargs, keywords, defaults = getargspec(obj.func) - - # Pruning first n_prune_args arguments. - args = args[n_prune_args:] - - # Partial function may give default value to any argument, therefore length - # of default value list must be len(args) to allow each argument to - # potentially be given a default value. - no_default = object() - all_defaults = [no_default] * len(args) - - if defaults: - all_defaults[-len(defaults):] = defaults - - # Fill in default values provided by partial function in all_defaults. - for kw, default in partial_keywords.items(): - if kw in args: - idx = args.index(kw) - all_defaults[idx] = default - elif not keywords: - raise ValueError('Function does not have **kwargs parameter, but ' - 'contains an unknown partial keyword.') - - # Find first argument with default value set. - first_default = next( - (idx for idx, x in enumerate(all_defaults) if x is not no_default), None) - - # If no default values are found, return ArgSpec with defaults=None. - if first_default is None: - return ArgSpec(args, varargs, keywords, None) - - # Checks if all arguments have default value set after first one. - invalid_default_values = [ - args[i] for i, j in enumerate(all_defaults) - if j is no_default and i > first_default - ] - - if invalid_default_values: - raise ValueError(f'Some arguments {invalid_default_values} do not have ' - 'default value, but they are positioned after those with ' - 'default values. This can not be expressed with ArgSpec.') - - return ArgSpec(args, varargs, keywords, tuple(all_defaults[first_default:])) + """Implements `getargspec` for `functools.partial` objects. + + Args: + obj: The `functools.partial` object + Returns: + An `inspect.ArgSpec` + Raises: + ValueError: When callable's signature can not be expressed with + ArgSpec. + """ + # When callable is a functools.partial object, we construct its ArgSpec with + # following strategy: + # - If callable partial contains default value for positional arguments (ie. + # object.args), then final ArgSpec doesn't contain those positional + # arguments. + # - If callable partial contains default value for keyword arguments (ie. + # object.keywords), then we merge them with wrapped target. Default values + # from callable partial takes precedence over those from wrapped target. + # + # However, there is a case where it is impossible to construct a valid + # ArgSpec. Python requires arguments that have no default values must be + # defined before those with default values. ArgSpec structure is only valid + # when this presumption holds true because default values are expressed as a + # tuple of values without keywords and they are always assumed to belong to + # last K arguments where K is number of default values present. + # + # Since functools.partial can give default value to any argument, this + # presumption may no longer hold in some cases. For example: + # + # def func(m, n): + # return 2 * m + n + # partialed = functools.partial(func, m=1) + # + # This example will result in m having a default value but n doesn't. This + # is usually not allowed in Python and can not be expressed in ArgSpec + # correctly. + # + # Thus, we must detect cases like this by finding first argument with + # default value and ensures all following arguments also have default + # values. When this is not true, a ValueError is raised. + + n_prune_args = len(obj.args) + partial_keywords = obj.keywords or {} + + args, varargs, keywords, defaults = getargspec(obj.func) + + # Pruning first n_prune_args arguments. + args = args[n_prune_args:] + + # Partial function may give default value to any argument, therefore length + # of default value list must be len(args) to allow each argument to + # potentially be given a default value. + no_default = object() + all_defaults = [no_default] * len(args) + + if defaults: + all_defaults[-len(defaults) :] = defaults + + # Fill in default values provided by partial function in all_defaults. + for kw, default in partial_keywords.items(): + if kw in args: + idx = args.index(kw) + all_defaults[idx] = default + elif not keywords: + raise ValueError( + "Function does not have **kwargs parameter, but " + "contains an unknown partial keyword." + ) + + # Find first argument with default value set. + first_default = next( + (idx for idx, x in enumerate(all_defaults) if x is not no_default), None + ) + + # If no default values are found, return ArgSpec with defaults=None. + if first_default is None: + return ArgSpec(args, varargs, keywords, None) + + # Checks if all arguments have default value set after first one. + invalid_default_values = [ + args[i] + for i, j in enumerate(all_defaults) + if j is no_default and i > first_default + ] + + if invalid_default_values: + raise ValueError( + f"Some arguments {invalid_default_values} do not have " + "default value, but they are positioned after those with " + "default values. This can not be expressed with ArgSpec." + ) + + return ArgSpec(args, varargs, keywords, tuple(all_defaults[first_default:])) def getfullargspec(obj): - """TFDecorator-aware replacement for `inspect.getfullargspec`. + """TFDecorator-aware replacement for `inspect.getfullargspec`. - This wrapper emulates `inspect.getfullargspec` in[^)]* Python2. + This wrapper emulates `inspect.getfullargspec` in[^)]* Python2. - Args: - obj: A callable, possibly decorated. + Args: + obj: A callable, possibly decorated. - Returns: - The `FullArgSpec` that describes the signature of - the outermost decorator that changes the callable's signature. If the - callable is not decorated, `inspect.getfullargspec()` will be called - directly on the callable. - """ - decorators, target = tf.__internal__.decorator.unwrap(obj) + Returns: + The `FullArgSpec` that describes the signature of + the outermost decorator that changes the callable's signature. If the + callable is not decorated, `inspect.getfullargspec()` will be called + directly on the callable. + """ + decorators, target = tf.__internal__.decorator.unwrap(obj) - for d in decorators: - if d.decorator_argspec is not None: - return _convert_maybe_argspec_to_fullargspec(d.decorator_argspec) - return _getfullargspec(target) + for d in decorators: + if d.decorator_argspec is not None: + return _convert_maybe_argspec_to_fullargspec(d.decorator_argspec) + return _getfullargspec(target) def getcallargs(*func_and_positional, **named): - """TFDecorator-aware replacement for inspect.getcallargs. - - Args: - *func_and_positional: A callable, possibly decorated, followed by any - positional arguments that would be passed to `func`. - **named: The named argument dictionary that would be passed to `func`. - - Returns: - A dictionary mapping `func`'s named arguments to the values they would - receive if `func(*positional, **named)` were called. - - `getcallargs` will use the argspec from the outermost decorator that provides - it. If no attached decorators modify argspec, the final unwrapped target's - argspec will be used. - """ - func = func_and_positional[0] - positional = func_and_positional[1:] - argspec = getfullargspec(func) - call_args = named.copy() - this = getattr(func, 'im_self', None) or getattr(func, '__self__', None) - if ismethod(func) and this: - positional = (this,) + positional - remaining_positionals = [arg for arg in argspec.args if arg not in call_args] - call_args.update(dict(zip(remaining_positionals, positional))) - default_count = 0 if not argspec.defaults else len(argspec.defaults) - if default_count: - for arg, value in zip(argspec.args[-default_count:], argspec.defaults): - if arg not in call_args: - call_args[arg] = value - if argspec.kwonlydefaults is not None: - for k, v in argspec.kwonlydefaults.items(): - if k not in call_args: - call_args[k] = v - return call_args + """TFDecorator-aware replacement for inspect.getcallargs. + + Args: + *func_and_positional: A callable, possibly decorated, followed by any + positional arguments that would be passed to `func`. + **named: The named argument dictionary that would be passed to `func`. + + Returns: + A dictionary mapping `func`'s named arguments to the values they would + receive if `func(*positional, **named)` were called. + + `getcallargs` will use the argspec from the outermost decorator that + provides it. If no attached decorators modify argspec, the final unwrapped + target's argspec will be used. + """ + func = func_and_positional[0] + positional = func_and_positional[1:] + argspec = getfullargspec(func) + call_args = named.copy() + this = getattr(func, "im_self", None) or getattr(func, "__self__", None) + if ismethod(func) and this: + positional = (this,) + positional + remaining_positionals = [ + arg for arg in argspec.args if arg not in call_args + ] + call_args.update(dict(zip(remaining_positionals, positional))) + default_count = 0 if not argspec.defaults else len(argspec.defaults) + if default_count: + for arg, value in zip(argspec.args[-default_count:], argspec.defaults): + if arg not in call_args: + call_args[arg] = value + if argspec.kwonlydefaults is not None: + for k, v in argspec.kwonlydefaults.items(): + if k not in call_args: + call_args[k] = v + return call_args def getframeinfo(*args, **kwargs): - return _inspect.getframeinfo(*args, **kwargs) + return _inspect.getframeinfo(*args, **kwargs) def getdoc(obj): - """TFDecorator-aware replacement for inspect.getdoc. + """TFDecorator-aware replacement for inspect.getdoc. - Args: - obj: An object, possibly decorated. + Args: + obj: An object, possibly decorated. - Returns: - The docstring associated with the object. + Returns: + The docstring associated with the object. - The outermost-decorated object is intended to have the most complete - documentation, so the decorated parameter is not unwrapped. - """ - return _inspect.getdoc(obj) + The outermost-decorated object is intended to have the most complete + documentation, so the decorated parameter is not unwrapped. + """ + return _inspect.getdoc(obj) def getfile(obj): - """TFDecorator-aware replacement for inspect.getfile.""" - unwrapped_object = tf.__internal__.decorator.unwrap(obj)[1] + """TFDecorator-aware replacement for inspect.getfile.""" + unwrapped_object = tf.__internal__.decorator.unwrap(obj)[1] - # Work around for the case when object is a stack frame - # and only .pyc files are used. In this case, getfile - # might return incorrect path. So, we get the path from f_globals - # instead. - if (hasattr(unwrapped_object, 'f_globals') and - '__file__' in unwrapped_object.f_globals): - return unwrapped_object.f_globals['__file__'] - return _inspect.getfile(unwrapped_object) + # Work around for the case when object is a stack frame + # and only .pyc files are used. In this case, getfile + # might return incorrect path. So, we get the path from f_globals + # instead. + if ( + hasattr(unwrapped_object, "f_globals") + and "__file__" in unwrapped_object.f_globals + ): + return unwrapped_object.f_globals["__file__"] + return _inspect.getfile(unwrapped_object) def getmembers(obj, predicate=None): - """TFDecorator-aware replacement for inspect.getmembers.""" - return _inspect.getmembers(obj, predicate) + """TFDecorator-aware replacement for inspect.getmembers.""" + return _inspect.getmembers(obj, predicate) def getmodule(obj): - """TFDecorator-aware replacement for inspect.getmodule.""" - return _inspect.getmodule(obj) + """TFDecorator-aware replacement for inspect.getmodule.""" + return _inspect.getmodule(obj) def getmro(cls): - """TFDecorator-aware replacement for inspect.getmro.""" - return _inspect.getmro(cls) + """TFDecorator-aware replacement for inspect.getmro.""" + return _inspect.getmro(cls) def getsource(obj): - """TFDecorator-aware replacement for inspect.getsource.""" - return _inspect.getsource(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.getsource.""" + return _inspect.getsource(tf.__internal__.decorator.unwrap(obj)[1]) def getsourcefile(obj): - """TFDecorator-aware replacement for inspect.getsourcefile.""" - return _inspect.getsourcefile(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.getsourcefile.""" + return _inspect.getsourcefile(tf.__internal__.decorator.unwrap(obj)[1]) def getsourcelines(obj): - """TFDecorator-aware replacement for inspect.getsourcelines.""" - return _inspect.getsourcelines(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.getsourcelines.""" + return _inspect.getsourcelines(tf.__internal__.decorator.unwrap(obj)[1]) def isbuiltin(obj): - """TFDecorator-aware replacement for inspect.isbuiltin.""" - return _inspect.isbuiltin(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.isbuiltin.""" + return _inspect.isbuiltin(tf.__internal__.decorator.unwrap(obj)[1]) def isclass(obj): - """TFDecorator-aware replacement for inspect.isclass.""" - return _inspect.isclass(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.isclass.""" + return _inspect.isclass(tf.__internal__.decorator.unwrap(obj)[1]) def isfunction(obj): - """TFDecorator-aware replacement for inspect.isfunction.""" - return _inspect.isfunction(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.isfunction.""" + return _inspect.isfunction(tf.__internal__.decorator.unwrap(obj)[1]) def isframe(obj): - """TFDecorator-aware replacement for inspect.ismodule.""" - return _inspect.isframe(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.ismodule.""" + return _inspect.isframe(tf.__internal__.decorator.unwrap(obj)[1]) def isgenerator(obj): - """TFDecorator-aware replacement for inspect.isgenerator.""" - return _inspect.isgenerator(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.isgenerator.""" + return _inspect.isgenerator(tf.__internal__.decorator.unwrap(obj)[1]) def isgeneratorfunction(obj): - """TFDecorator-aware replacement for inspect.isgeneratorfunction.""" - return _inspect.isgeneratorfunction(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.isgeneratorfunction.""" + return _inspect.isgeneratorfunction( + tf.__internal__.decorator.unwrap(obj)[1] + ) def ismethod(obj): - """TFDecorator-aware replacement for inspect.ismethod.""" - return _inspect.ismethod(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.ismethod.""" + return _inspect.ismethod(tf.__internal__.decorator.unwrap(obj)[1]) def ismodule(obj): - """TFDecorator-aware replacement for inspect.ismodule.""" - return _inspect.ismodule(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.ismodule.""" + return _inspect.ismodule(tf.__internal__.decorator.unwrap(obj)[1]) def isroutine(obj): - """TFDecorator-aware replacement for inspect.isroutine.""" - return _inspect.isroutine(tf.__internal__.decorator.unwrap(obj)[1]) + """TFDecorator-aware replacement for inspect.isroutine.""" + return _inspect.isroutine(tf.__internal__.decorator.unwrap(obj)[1]) def stack(context=1): - """TFDecorator-aware replacement for inspect.stack.""" - return _inspect.stack(context)[1:] + """TFDecorator-aware replacement for inspect.stack.""" + return _inspect.stack(context)[1:] diff --git a/keras/utils/tf_utils.py b/keras/utils/tf_utils.py index f9e7d807ffc7..2ca549e0cdfe 100644 --- a/keras/utils/tf_utils.py +++ b/keras/utils/tf_utils.py @@ -15,607 +15,741 @@ """TensorFlow-related utilities.""" import collections +import contextlib import copy +import platform import random +import threading + +import numpy as np +import tensorflow.compat.v2 as tf +from absl import logging from keras import backend from keras.engine import keras_tensor from keras.utils import object_identity from keras.utils import tf_contextlib -import numpy as np - -import tensorflow.compat.v2 as tf - +# isort: off from tensorflow.python.framework import ops from tensorflow.python.util.tf_export import keras_export +from tensorflow.python import pywrap_tfe - -@keras_export('keras.utils.set_random_seed', v1=[]) +@keras_export("keras.utils.set_random_seed", v1=[]) def set_random_seed(seed): - """Sets all random seeds for the program (Python, NumPy, and TensorFlow). - - You can use this utility to make almost any Keras program fully deterministic. - Some limitations apply in cases where network communications are involved - (e.g. parameter server distribution), which creates additional sources of - randomness, or when certain non-deterministic cuDNN ops are involved. - - Calling this utility is equivalent to the following: - - ```python - import random - import numpy as np - import tensorflow as tf - random.seed(seed) - np.random.seed(seed) - tf.random.set_seed(seed) - ``` - - Arguments: - seed: Integer, the random seed to use. - """ - if not isinstance(seed, int): - raise ValueError( - 'Expected `seed` argument to be an integer. ' - f'Received: seed={seed} (of type {type(seed)})') - random.seed(seed) - np.random.seed(seed) - tf.random.set_seed(seed) - backend._SEED_GENERATOR.generator = random.Random(seed) # pylint:disable=protected-access + """Sets all random seeds for the program (Python, NumPy, and TensorFlow). + + You can use this utility to make almost any Keras program fully + deterministic. Some limitations apply in cases where network communications + are involved (e.g. parameter server distribution), which creates additional + sources of randomness, or when certain non-deterministic cuDNN ops are + involved. + + Calling this utility is equivalent to the following: + + ```python + import random + import numpy as np + import tensorflow as tf + random.seed(seed) + np.random.seed(seed) + tf.random.set_seed(seed) + ``` + + Arguments: + seed: Integer, the random seed to use. + """ + if not isinstance(seed, int): + raise ValueError( + "Expected `seed` argument to be an integer. " + f"Received: seed={seed} (of type {type(seed)})" + ) + random.seed(seed) + np.random.seed(seed) + tf.random.set_seed(seed) + backend._SEED_GENERATOR.generator = random.Random(seed) + + +def get_random_seed(): + """Retrieve a seed value to seed a random generator. + + Returns: + the random seed as an integer. + """ + if getattr(backend._SEED_GENERATOR, "generator", None): + return backend._SEED_GENERATOR.generator.randint(1, 1e9) + else: + return random.randint(1, 1e9) def is_tensor_or_tensor_list(v): - v = tf.nest.flatten(v) - if v and isinstance(v[0], tf.Tensor): - return True - else: - return False + v = tf.nest.flatten(v) + if v and isinstance(v[0], tf.Tensor): + return True + else: + return False def get_reachable_from_inputs(inputs, targets=None): - """Returns the set of tensors/ops reachable from `inputs`. - - Stops if all targets have been found (target is optional). - - Only valid in Symbolic mode, not Eager mode. - - Args: - inputs: List of tensors. - targets: List of tensors. - - Returns: - A set of tensors reachable from the inputs (includes the inputs themselves). - """ - inputs = tf.nest.flatten(inputs, expand_composites=True) - reachable = object_identity.ObjectIdentitySet(inputs) - if targets: - remaining_targets = object_identity.ObjectIdentitySet(tf.nest.flatten(targets)) - queue = collections.deque(inputs) - - while queue: - x = queue.pop() - if isinstance(x, tuple(_user_convertible_tensor_types)): - # Can't find consumers of user-specific types. - continue - - if isinstance(x, tf.Operation): - outputs = x.outputs[:] or [] - outputs += x._control_outputs # pylint: disable=protected-access - elif isinstance(x, tf.Variable): - try: - outputs = [x.op] - except AttributeError: - # Variables can be created in an Eager context. - outputs = [] - elif tf.is_tensor(x): - outputs = x.consumers() - else: - raise TypeError( - f'Expected tf.Operation, tf.Variable, or tf.Tensor. Received: {x}') - - for y in outputs: - if y not in reachable: - reachable.add(y) - if targets: - remaining_targets.discard(y) - queue.appendleft(y) + """Returns the set of tensors/ops reachable from `inputs`. + + Stops if all targets have been found (target is optional). + + Only valid in Symbolic mode, not Eager mode. + + Args: + inputs: List of tensors. + targets: List of tensors. + + Returns: + A set of tensors reachable from the inputs (includes the inputs + themselves). + """ + inputs = tf.nest.flatten(inputs, expand_composites=True) + reachable = object_identity.ObjectIdentitySet(inputs) + if targets: + remaining_targets = object_identity.ObjectIdentitySet( + tf.nest.flatten(targets) + ) + queue = collections.deque(inputs) + + while queue: + x = queue.pop() + if isinstance(x, tuple(_user_convertible_tensor_types)): + # Can't find consumers of user-specific types. + continue + + if isinstance(x, tf.Operation): + outputs = x.outputs[:] or [] + outputs += x._control_outputs + elif isinstance(x, tf.Variable): + try: + outputs = [x.op] + except AttributeError: + # Variables can be created in an Eager context. + outputs = [] + elif tf.is_tensor(x): + outputs = x.consumers() + else: + raise TypeError( + "Expected tf.Operation, tf.Variable, or tf.Tensor. " + f"Received: {x}" + ) + + for y in outputs: + if y not in reachable: + reachable.add(y) + if targets: + remaining_targets.discard(y) + queue.appendleft(y) + + if targets and not remaining_targets: + return reachable + + return reachable - if targets and not remaining_targets: - return reachable - return reachable +# This function needs access to private functions of `nest`. -# This function needs access to private functions of `nest`. -# pylint: disable=protected-access def map_structure_with_atomic(is_atomic_fn, map_fn, nested): - """Maps the atomic elements of a nested structure. - - Args: - is_atomic_fn: A function that determines if an element of `nested` is - atomic. - map_fn: The function to apply to atomic elements of `nested`. - nested: A nested structure. - - Returns: - The nested structure, with atomic elements mapped according to `map_fn`. - - Raises: - ValueError: If an element that is neither atomic nor a sequence is - encountered. - """ - if is_atomic_fn(nested): - return map_fn(nested) - - # Recursively convert. - if not tf.nest.is_nested(nested): - raise ValueError( - f'Received non-atomic and non-sequence element: {nested} ' - f'of type {type(nested)}') - if tf.__internal__.nest.is_mapping(nested): - values = [nested[k] for k in sorted(nested.keys())] - elif tf.__internal__.nest.is_attrs(nested): - values = _astuple(nested) - else: - values = nested - mapped_values = [ - map_structure_with_atomic(is_atomic_fn, map_fn, ele) for ele in values - ] - return tf.__internal__.nest.sequence_like(nested, mapped_values) + """Maps the atomic elements of a nested structure. + + Args: + is_atomic_fn: A function that determines if an element of `nested` is + atomic. + map_fn: The function to apply to atomic elements of `nested`. + nested: A nested structure. + + Returns: + The nested structure, with atomic elements mapped according to `map_fn`. + + Raises: + ValueError: If an element that is neither atomic nor a sequence is + encountered. + """ + if is_atomic_fn(nested): + return map_fn(nested) + + # Recursively convert. + if not tf.nest.is_nested(nested): + raise ValueError( + f"Received non-atomic and non-sequence element: {nested} " + f"of type {type(nested)}" + ) + if tf.__internal__.nest.is_mapping(nested): + values = [nested[k] for k in sorted(nested.keys())] + elif tf.__internal__.nest.is_attrs(nested): + values = _astuple(nested) + else: + values = nested + mapped_values = [ + map_structure_with_atomic(is_atomic_fn, map_fn, ele) for ele in values + ] + return tf.__internal__.nest.sequence_like(nested, mapped_values) def get_shapes(tensors): - """Gets shapes from tensors.""" - return tf.nest.map_structure( - lambda x: x.shape if hasattr(x, 'shape') else None, tensors) - - -# pylint: enable=protected-access + """Gets shapes from tensors.""" + return tf.nest.map_structure( + lambda x: x.shape if hasattr(x, "shape") else None, tensors + ) def convert_shapes(input_shape, to_tuples=True): - """Converts nested shape representations to desired format. - - Performs: - - TensorShapes -> tuples if `to_tuples=True`. - tuples of int or None -> TensorShapes if `to_tuples=False`. - - Valid objects to be converted are: - - TensorShapes - - tuples with elements of type int or None. - - ints - - None - - Args: - input_shape: A nested structure of objects to be converted to TensorShapes. - to_tuples: If `True`, converts all TensorShape to tuples. Otherwise converts - all tuples representing shapes to TensorShapes. - - Returns: - Nested structure of shapes in desired format. - - Raises: - ValueError: when the input tensor shape can't be converted to tuples, eg - unknown tensor shape. - """ - - def _is_shape_component(value): - return value is None or isinstance(value, (int, tf.compat.v1.Dimension)) - - def _is_atomic_shape(input_shape): - # Ex: TensorShape or (None, 10, 32) or 5 or `None` - if _is_shape_component(input_shape): - return True - if isinstance(input_shape, tf.TensorShape): - return True - if (isinstance(input_shape, (tuple, list)) and - all(_is_shape_component(ele) for ele in input_shape)): - return True - return False - - def _convert_shape(input_shape): - input_shape = tf.TensorShape(input_shape) - if to_tuples: - input_shape = tuple(input_shape.as_list()) - return input_shape - - return map_structure_with_atomic(_is_atomic_shape, _convert_shape, - input_shape) + """Converts nested shape representations to desired format. + + Performs: + + TensorShapes -> tuples if `to_tuples=True`. + tuples of int or None -> TensorShapes if `to_tuples=False`. + + Valid objects to be converted are: + - TensorShapes + - tuples with elements of type int or None. + - ints + - None + + Args: + input_shape: A nested structure of objects to be converted to + TensorShapes. + to_tuples: If `True`, converts all TensorShape to tuples. Otherwise + converts all tuples representing shapes to TensorShapes. + + Returns: + Nested structure of shapes in desired format. + + Raises: + ValueError: when the input tensor shape can't be converted to tuples, eg + unknown tensor shape. + """ + + def _is_shape_component(value): + return value is None or isinstance(value, (int, tf.compat.v1.Dimension)) + + def _is_atomic_shape(input_shape): + # Ex: TensorShape or (None, 10, 32) or 5 or `None` + if _is_shape_component(input_shape): + return True + if isinstance(input_shape, tf.TensorShape): + return True + if isinstance(input_shape, (tuple, list)) and all( + _is_shape_component(ele) for ele in input_shape + ): + return True + return False + + def _convert_shape(input_shape): + input_shape = tf.TensorShape(input_shape) + if to_tuples: + input_shape = tuple(input_shape.as_list()) + return input_shape + + return map_structure_with_atomic( + _is_atomic_shape, _convert_shape, input_shape + ) def validate_axis(axis, input_shape): - """Validate an axis value and returns its standardized form. - - Args: - axis: Value to validate. Can be an integer or a list/tuple of integers. - Integers may be negative. - input_shape: Reference input shape that the axis/axes refer to. - - Returns: - Normalized form of `axis`, i.e. a list with all-positive values. - """ - input_shape = tf.TensorShape(input_shape) - rank = input_shape.rank - if not rank: - raise ValueError( - f'Input has undefined rank. Received: input_shape={input_shape}') - - # Convert axis to list and resolve negatives - if isinstance(axis, int): - axis = [axis] - else: - axis = list(axis) - for idx, x in enumerate(axis): - if x < 0: - axis[idx] = rank + x - - # Validate axes - for x in axis: - if x < 0 or x >= rank: - raise ValueError( - 'Invalid value for `axis` argument. ' - 'Expected 0 <= axis < inputs.rank (with ' - f'inputs.rank={rank}). Received: axis={tuple(axis)}') - if len(axis) != len(set(axis)): - raise ValueError(f'Duplicate axis: {tuple(axis)}') - return axis + """Validate an axis value and returns its standardized form. + + Args: + axis: Value to validate. Can be an integer or a list/tuple of integers. + Integers may be negative. + input_shape: Reference input shape that the axis/axes refer to. + + Returns: + Normalized form of `axis`, i.e. a list with all-positive values. + """ + input_shape = tf.TensorShape(input_shape) + rank = input_shape.rank + if not rank: + raise ValueError( + f"Input has undefined rank. Received: input_shape={input_shape}" + ) + + # Convert axis to list and resolve negatives + if isinstance(axis, int): + axis = [axis] + else: + axis = list(axis) + for idx, x in enumerate(axis): + if x < 0: + axis[idx] = rank + x + + # Validate axes + for x in axis: + if x < 0 or x >= rank: + raise ValueError( + "Invalid value for `axis` argument. " + "Expected 0 <= axis < inputs.rank (with " + f"inputs.rank={rank}). Received: axis={tuple(axis)}" + ) + if len(axis) != len(set(axis)): + raise ValueError(f"Duplicate axis: {tuple(axis)}") + return axis class ListWrapper: - """A wrapper for lists to be treated as elements for `nest`.""" + """A wrapper for lists to be treated as elements for `nest`.""" - def __init__(self, list_to_wrap): - self._list = list_to_wrap + def __init__(self, list_to_wrap): + self._list = list_to_wrap - def as_list(self): - return self._list + def as_list(self): + return self._list def convert_inner_node_data(nested, wrap=False): - """Either wraps or unwraps innermost node data lists in `ListWrapper` objects. - - Args: - nested: A nested data structure. - wrap: If `True`, wrap innermost lists in `ListWrapper` objects. If `False`, - unwraps `ListWrapper` objects into lists. - - Returns: - Structure of same type as nested, with lists wrapped/unwrapped. - """ - - def _is_serialized_node_data(nested): - # Node data can be of form `[layer_name, node_id, tensor_id]` or - # `[layer_name, node_id, tensor_id, kwargs]`. - if (isinstance(nested, list) and (len(nested) in [3, 4]) and - isinstance(nested[0], str)): - return True - return False - - def _is_atomic_nested(nested): - """Returns `True` if `nested` is a list representing node data.""" - if isinstance(nested, ListWrapper): - return True - if _is_serialized_node_data(nested): - return True - return not tf.nest.is_nested(nested) - - def _convert_object_or_list(nested): - """Convert b/t `ListWrapper` object and list representations.""" - if wrap: - if isinstance(nested, ListWrapper): - return nested - if _is_serialized_node_data(nested): - return ListWrapper(nested) - return nested - else: - if isinstance(nested, ListWrapper): - return nested.as_list() - return nested - - return map_structure_with_atomic(_is_atomic_nested, _convert_object_or_list, - nested) + """Either wraps or unwraps innermost node data lists in `ListWrapper` + objects. + + Args: + nested: A nested data structure. + wrap: If `True`, wrap innermost lists in `ListWrapper` objects. If + `False`, unwraps `ListWrapper` objects into lists. + + Returns: + Structure of same type as nested, with lists wrapped/unwrapped. + """ + + def _is_serialized_node_data(nested): + # Node data can be of form `[layer_name, node_id, tensor_id]` or + # `[layer_name, node_id, tensor_id, kwargs]`. + if ( + isinstance(nested, list) + and (len(nested) in [3, 4]) + and isinstance(nested[0], str) + ): + return True + return False + + def _is_atomic_nested(nested): + """Returns `True` if `nested` is a list representing node data.""" + if isinstance(nested, ListWrapper): + return True + if _is_serialized_node_data(nested): + return True + return not tf.nest.is_nested(nested) + + def _convert_object_or_list(nested): + """Convert b/t `ListWrapper` object and list representations.""" + if wrap: + if isinstance(nested, ListWrapper): + return nested + if _is_serialized_node_data(nested): + return ListWrapper(nested) + return nested + else: + if isinstance(nested, ListWrapper): + return nested.as_list() + return nested + + return map_structure_with_atomic( + _is_atomic_nested, _convert_object_or_list, nested + ) def shape_type_conversion(fn): - """Decorator that handles tuple/TensorShape conversion. + """Decorator that handles tuple/TensorShape conversion. - Used in `compute_output_shape` and `build`. + Used in `compute_output_shape` and `build`. - Args: - fn: function to wrap. + Args: + fn: function to wrap. - Returns: - Wrapped function. - """ + Returns: + Wrapped function. + """ - def wrapper(instance, input_shape): - # Pass shapes as tuples to `fn` - # This preserves compatibility with external Keras. - if input_shape is not None: - input_shape = convert_shapes(input_shape, to_tuples=True) - output_shape = fn(instance, input_shape) - # Return shapes from `fn` as TensorShapes. - if output_shape is not None: - output_shape = convert_shapes(output_shape, to_tuples=False) - return output_shape + def wrapper(instance, input_shape): + # Pass shapes as tuples to `fn` + # This preserves compatibility with external Keras. + if input_shape is not None: + input_shape = convert_shapes(input_shape, to_tuples=True) + output_shape = fn(instance, input_shape) + # Return shapes from `fn` as TensorShapes. + if output_shape is not None: + output_shape = convert_shapes(output_shape, to_tuples=False) + return output_shape - return wrapper + return wrapper def are_all_symbolic_tensors(tensors): - return all(map(is_symbolic_tensor, tensors)) + return all(map(is_symbolic_tensor, tensors)) _user_convertible_tensor_types = set() def is_extension_type(tensor): - """Returns whether a tensor is of an ExtensionType. + """Returns whether a tensor is of an ExtensionType. - github.com/tensorflow/community/pull/269 - Currently it works by checking if `tensor` is a `CompositeTensor` instance, - but this will be changed to use an appropriate extensiontype protocol - check once ExtensionType is made public. + github.com/tensorflow/community/pull/269 + Currently it works by checking if `tensor` is a `CompositeTensor` instance, + but this will be changed to use an appropriate extensiontype protocol + check once ExtensionType is made public. - Args: - tensor: An object to test + Args: + tensor: An object to test - Returns: - True if the tensor is an extension type object, false if not. - """ - return isinstance(tensor, tf.__internal__.CompositeTensor) + Returns: + True if the tensor is an extension type object, false if not. + """ + return isinstance(tensor, tf.__internal__.CompositeTensor) def is_symbolic_tensor(tensor): - """Returns whether a tensor is symbolic (from a TF graph) or an eager tensor. - - A Variable can be seen as either: it is considered symbolic - when we are in a graph scope, and eager when we are in an eager scope. - - Args: - tensor: A tensor instance to test. - - Returns: - True for symbolic tensors, False for eager tensors. - """ - if isinstance(tensor, tf.Tensor): - return hasattr(tensor, 'graph') - elif is_extension_type(tensor): - component_tensors = tf.nest.flatten(tensor, expand_composites=True) - return any(hasattr(t, 'graph') for t in component_tensors) - elif isinstance(tensor, tf.Variable): - # Variables that are output of a Keras Layer in Functional API mode - # should be considered symbolic. - # TODO(omalleyt): We need a better way to check this in order to - # enable `run_eagerly=True` for Models containing Layers that - # return Variables as outputs. - return (getattr(tensor, '_keras_history', False) or - not tf.executing_eagerly()) - elif isinstance(tensor, tuple(_user_convertible_tensor_types)): - tensor = ops.convert_to_tensor_or_composite(tensor) - return is_symbolic_tensor(tensor) - else: - return False - - -@keras_export('keras.__internal__.utils.register_symbolic_tensor_type', v1=[]) + """Returns whether a tensor is symbolic (from a TF graph) or an eager + tensor. + + A Variable can be seen as either: it is considered symbolic + when we are in a graph scope, and eager when we are in an eager scope. + + Args: + tensor: A tensor instance to test. + + Returns: + True for symbolic tensors, False for eager tensors. + """ + if isinstance(tensor, tf.Tensor): + return hasattr(tensor, "graph") + elif is_extension_type(tensor): + component_tensors = tf.nest.flatten(tensor, expand_composites=True) + return any(hasattr(t, "graph") for t in component_tensors) + elif isinstance(tensor, tf.Variable): + # Variables that are output of a Keras Layer in Functional API mode + # should be considered symbolic. + # TODO(omalleyt): We need a better way to check this in order to + # enable `run_eagerly=True` for Models containing Layers that + # return Variables as outputs. + return ( + getattr(tensor, "_keras_history", False) + or not tf.executing_eagerly() + ) + elif isinstance(tensor, tuple(_user_convertible_tensor_types)): + tensor = ops.convert_to_tensor_or_composite(tensor) + return is_symbolic_tensor(tensor) + else: + return False + + +@keras_export("keras.__internal__.utils.register_symbolic_tensor_type", v1=[]) def register_symbolic_tensor_type(cls): - """Allows users to specify types regarded as symbolic `Tensor`s. + """Allows users to specify types regarded as symbolic `Tensor`s. - Used in conjunction with `tf.register_tensor_conversion_function`, calling - `tf.keras.__internal__.utils.register_symbolic_tensor_type(cls)` - allows non-`Tensor` objects to be plumbed through Keras layers. + Used in conjunction with `tf.register_tensor_conversion_function`, calling + `tf.keras.__internal__.utils.register_symbolic_tensor_type(cls)` + allows non-`Tensor` objects to be plumbed through Keras layers. - Example: + Example: - ```python - # One-time setup. - class Foo: - def __init__(self, input_): - self._input = input_ - def value(self): - return tf.constant(42.) + ```python + # One-time setup. + class Foo: + def __init__(self, input_): + self._input = input_ + def value(self): + return tf.constant(42.) - tf.register_tensor_conversion_function( - Foo, lambda x, *args, **kwargs: x.value()) + tf.register_tensor_conversion_function( + Foo, lambda x, *args, **kwargs: x.value()) - tf.keras.__internal__.utils.register_symbolic_tensor_type(Foo) + tf.keras.__internal__.utils.register_symbolic_tensor_type(Foo) - # User-land. - layer = tf.keras.layers.Lambda(lambda input_: Foo(input_)) - ``` + # User-land. + layer = tf.keras.layers.Lambda(lambda input_: Foo(input_)) + ``` - Args: - cls: A `class` type which shall be regarded as a symbolic `Tensor`. - """ - global _user_convertible_tensor_types - if cls not in _user_convertible_tensor_types: - keras_tensor.register_keras_tensor_specialization( - cls, keras_tensor.UserRegisteredTypeKerasTensor) - _user_convertible_tensor_types.add(cls) + Args: + cls: A `class` type which shall be regarded as a symbolic `Tensor`. + """ + global _user_convertible_tensor_types + if cls not in _user_convertible_tensor_types: + keras_tensor.register_keras_tensor_specialization( + cls, keras_tensor.UserRegisteredTypeKerasTensor + ) + _user_convertible_tensor_types.add(cls) def type_spec_from_value(value): - """Grab type_spec without converting array-likes to tensors.""" - if is_extension_type(value): - return value._type_spec # pylint: disable=protected-access - # Get a TensorSpec for array-like data without - # converting the data to a Tensor - if hasattr(value, 'shape') and hasattr(value, 'dtype'): - return tf.TensorSpec(value.shape, value.dtype) - else: - return tf.type_spec_from_value(value) + """Grab type_spec without converting array-likes to tensors.""" + if is_extension_type(value): + return value._type_spec + # Get a TensorSpec for array-like data without + # converting the data to a Tensor + if hasattr(value, "shape") and hasattr(value, "dtype"): + return tf.TensorSpec(value.shape, value.dtype) + else: + return tf.type_spec_from_value(value) def is_ragged(tensor): - """Returns true if `tensor` is a ragged tensor or ragged tensor value.""" - return isinstance( - tensor, - (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)) + """Returns true if `tensor` is a ragged tensor or ragged tensor value.""" + return isinstance( + tensor, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue) + ) def is_sparse(tensor): - """Returns true if `tensor` is a sparse tensor or sparse tensor value.""" - return isinstance( - tensor, - (tf.SparseTensor, tf.compat.v1.SparseTensorValue)) + """Returns true if `tensor` is a sparse tensor or sparse tensor value.""" + return isinstance(tensor, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)) def is_tensor_or_variable(x): - return tf.is_tensor(x) or isinstance(x, tf.Variable) + return tf.is_tensor(x) or isinstance(x, tf.Variable) def is_tensor_or_extension_type(x): - """Returns true if 'x' is a TF-native type or an ExtensionType.""" - return tf.is_tensor(x) or is_extension_type(x) + """Returns true if 'x' is a TF-native type or an ExtensionType.""" + return tf.is_tensor(x) or is_extension_type(x) + + +def convert_variables_to_tensors(values): + """Converts `Variable`s in `values` to `Tensor`s. + + This is a Keras version of `convert_variables_to_tensors` in TensorFlow + variable_utils.py. + + If an object in `values` is an `ExtensionType` and it overrides its + `_convert_variables_to_tensors` method, its `ResourceVariable` components + will also be converted to `Tensor`s. Objects other than `ResourceVariable`s + in `values` will be returned unchanged. + + Args: + values: A nested structure of `ResourceVariable`s, or any other objects. + + Returns: + A new structure with `ResourceVariable`s in `values` converted to + `Tensor`s. + """ + + def _convert_resource_variable_to_tensor(x): + if isinstance(x, tf.Variable): + return tf.convert_to_tensor(x) + elif is_extension_type(x): + return x._convert_variables_to_tensors() + else: + return x + + return tf.nest.map_structure(_convert_resource_variable_to_tensor, values) def assert_no_legacy_layers(layers): - """Prevent tf.layers.Layers from being used with Keras. + """Prevent tf.layers.Layers from being used with Keras. - Certain legacy layers inherit from their keras analogs; however they are - not supported with keras and can lead to subtle and hard to diagnose bugs. + Certain legacy layers inherit from their keras analogs; however they are + not supported with keras and can lead to subtle and hard to diagnose bugs. - Args: - layers: A list of layers to check + Args: + layers: A list of layers to check - Raises: - TypeError: If any elements of layers are tf.layers.Layers - """ + Raises: + TypeError: If any elements of layers are tf.layers.Layers + """ - # isinstance check for tf.layers.Layer introduces a circular dependency. - legacy_layers = [l for l in layers if getattr(l, '_is_legacy_layer', None)] - if legacy_layers: - layer_str = '\n'.join(' ' + str(l) for l in legacy_layers) - raise TypeError( - f'The following are legacy tf.layers.Layers:\n{layer_str}\n' - 'To use keras as a ' - 'framework (for instance using the Network, Model, or Sequential ' - 'classes), please use the tf.keras.layers implementation instead. ' - '(Or, if writing custom layers, subclass from tf.keras.layers rather ' - 'than tf.layers)') + # isinstance check for tf.layers.Layer introduces a circular dependency. + legacy_layers = [l for l in layers if getattr(l, "_is_legacy_layer", None)] + if legacy_layers: + layer_str = "\n".join(" " + str(l) for l in legacy_layers) + raise TypeError( + f"The following are legacy tf.layers.Layers:\n{layer_str}\n" + "To use keras as a " + "framework (for instance using the Network, Model, or Sequential " + "classes), please use the tf.keras.layers implementation instead. " + "(Or, if writing custom layers, subclass from tf.keras.layers " + "rather than tf.layers)" + ) @tf_contextlib.contextmanager def maybe_init_scope(layer): - """Open an `init_scope` if in V2 mode and using the keras graph. - - Args: - layer: The Layer/Model that is currently active. - - Yields: - None - """ - # Don't open an init_scope in V1 mode or when using legacy tf.layers. - if (tf.compat.v1.executing_eagerly_outside_functions() and - getattr(layer, '_keras_style', True)): - with tf.init_scope(): - yield - else: - yield + """Open an `init_scope` if in V2 mode and using the keras graph. + + Args: + layer: The Layer/Model that is currently active. + + Yields: + None + """ + # Don't open an init_scope in V1 mode, when using legacy tf.layers, or in a + # local-variable scope. + # The local-variable scope should ensure that created variables are local to + # the function being executed, rather than lifted out of the graph by + # `init_scope`. This way the variables are freely usable and mutable within + # the function, which enables a visitation guarantee for model evaluation, + # when the scope is applied to metric variable creation. + if ( + tf.compat.v1.executing_eagerly_outside_functions() + and getattr(layer, "_keras_style", True) + and not in_local_vars_context() + ): + with tf.init_scope(): + yield + else: + yield @tf_contextlib.contextmanager def graph_context_for_symbolic_tensors(*args, **kwargs): - """Returns graph context manager if any of the inputs is a symbolic tensor.""" - if any(is_symbolic_tensor(v) for v in list(args) + list(kwargs.values())): - with backend.get_graph().as_default(): - yield - else: - yield + """Returns graph context manager if any of the inputs is a symbolic + tensor.""" + if any(is_symbolic_tensor(v) for v in list(args) + list(kwargs.values())): + with backend.get_graph().as_default(): + yield + else: + yield def dataset_is_infinite(dataset): - """True if the passed dataset is infinite.""" - if tf.compat.v1.executing_eagerly_outside_functions(): - return tf.equal( - tf.data.experimental.cardinality(dataset), tf.data.experimental.INFINITE_CARDINALITY) - else: - dataset_size = backend.get_session().run( - tf.data.experimental.cardinality(dataset)) - return dataset_size == tf.data.experimental.INFINITE_CARDINALITY + """True if the passed dataset is infinite.""" + if tf.compat.v1.executing_eagerly_outside_functions(): + return tf.equal( + tf.data.experimental.cardinality(dataset), + tf.data.experimental.INFINITE_CARDINALITY, + ) + else: + dataset_size = backend.get_session().run( + tf.data.experimental.cardinality(dataset) + ) + return dataset_size == tf.data.experimental.INFINITE_CARDINALITY def get_tensor_spec(t, dynamic_batch=False, name=None): - """Returns a `TensorSpec` given a single `Tensor` or `TensorSpec`.""" - # pylint: disable=protected-access - if isinstance(t, tf.TypeSpec): - spec = t - elif is_extension_type(t): - # TODO(b/148821952): Should these specs have a name attr? - spec = t._type_spec - elif (hasattr(t, '_keras_history') and - hasattr(t._keras_history[0], '_type_spec')): - return t._keras_history[0]._type_spec - elif isinstance(t, keras_tensor.KerasTensor): - spec = t.type_spec - elif hasattr(t, 'shape') and hasattr(t, 'dtype'): - spec = tf.TensorSpec(shape=t.shape, dtype=t.dtype, name=name) - else: - return None # Allow non-Tensors to pass through. - # pylint: enable=protected-access - - if not dynamic_batch: - return spec - - shape = spec.shape - if shape.rank is None or shape.rank == 0: - return spec - - shape_list = shape.as_list() - shape_list[0] = None - # TODO(b/203201161) Remove this deepcopy one type_spec_with_shape has been - # updated to not mutate spec. - spec = copy.deepcopy(spec) - return keras_tensor.type_spec_with_shape(spec, tf.TensorShape(shape_list)) + """Returns a `TensorSpec` given a single `Tensor` or `TensorSpec`.""" + + if isinstance(t, tf.TypeSpec): + spec = t + elif is_extension_type(t): + # TODO(b/148821952): Should these specs have a name attr? + spec = t._type_spec + elif hasattr(t, "_keras_history") and hasattr( + t._keras_history[0], "_type_spec" + ): + return t._keras_history[0]._type_spec + elif isinstance(t, keras_tensor.KerasTensor): + spec = t.type_spec + elif hasattr(t, "shape") and hasattr(t, "dtype"): + spec = tf.TensorSpec(shape=t.shape, dtype=t.dtype, name=name) + else: + return None # Allow non-Tensors to pass through. + + if not dynamic_batch: + return spec + + shape = spec.shape + if shape.rank is None or shape.rank == 0: + return spec + + shape_list = shape.as_list() + shape_list[0] = None + # TODO(b/203201161) Remove this deepcopy one type_spec_with_shape has been + # updated to not mutate spec. + spec = copy.deepcopy(spec) + return keras_tensor.type_spec_with_shape(spec, tf.TensorShape(shape_list)) def sync_to_numpy_or_python_type(tensors): - """Syncs and converts a structure of `Tensor`s to `NumPy` arrays or Python scalar types. + """Syncs and converts a structure of `Tensor`s to `NumPy` arrays or Python + scalar types. + + For each tensor, it calls `tensor.numpy()`. If the result is a scalar value, + it converts it to a Python type, such as a float or int, by calling + `result.item()`. + + Numpy scalars are converted, as Python types are often more convenient to + deal with. This is especially useful for bfloat16 Numpy scalars, which don't + support as many operations as other Numpy values. + + Async strategies (such as `TPUStrategy` and `ParameterServerStrategy`) are + forced to + sync during this process. + + Args: + tensors: A structure of tensors. + + Returns: + `tensors`, but scalar tensors are converted to Python types and non-scalar + tensors are converted to Numpy arrays. + """ + if isinstance(tensors, tf.distribute.experimental.coordinator.RemoteValue): + tensors = tensors.fetch() + if isinstance(tensors, list) and isinstance( + tensors[0], tf.distribute.experimental.coordinator.RemoteValue + ): + tensors = tf.nest.map_structure(lambda t: t.fetch(), tensors) + + def _to_single_numpy_or_python_type(t): + # Don't turn ragged or sparse tensors to NumPy. + if isinstance(t, tf.Tensor): + t = t.numpy() + # Strings, ragged and sparse tensors don't have .item(). Return them + # as-is. + if not isinstance(t, (np.ndarray, np.generic)): + return t + return t.item() if np.ndim(t) == 0 else t + + return tf.nest.map_structure(_to_single_numpy_or_python_type, tensors) - For each tensor, it calls `tensor.numpy()`. If the result is a scalar value, - it converts it to a Python type, such as a float or int, by calling - `result.item()`. - Numpy scalars are converted, as Python types are often more convenient to deal - with. This is especially useful for bfloat16 Numpy scalars, which don't - support as many operations as other Numpy values. +def _astuple(attrs): + """Converts the given attrs to tuple non-recursively.""" + cls = type(attrs) + fields = getattr(cls, "__attrs_attrs__", None) + if fields is None: + raise ValueError(f"{cls} is not an attrs-decorated class.") + values = [] + for field in fields: + values.append(getattr(attrs, field.name)) + return tuple(values) + + +def can_jit_compile(warn=False): + """Returns True if TensorFlow XLA is available for the platform.""" + if platform.system() == "Darwin" and "arm" in platform.processor().lower(): + if warn: + logging.warning( + "XLA (`jit_compile`) is not yet supported on Apple M1/M2 ARM " + "processors. Falling back to `jit_compile=False`." + ) + return False + if pywrap_tfe.TF_ListPluggablePhysicalDevices(): + if warn: + logging.warning( + "XLA (`jit_compile`) is not supported on your system. " + "Falling back to `jit_compile=False`." + ) + return False + return True - Async strategies (such as `TPUStrategy` and `ParameterServerStrategy`) are - forced to - sync during this process. - Args: - tensors: A structure of tensors. +_metric_local_vars_scope = threading.local() - Returns: - `tensors`, but scalar tensors are converted to Python types and non-scalar - tensors are converted to Numpy arrays. - """ - if isinstance(tensors, tf.distribute.experimental.coordinator.RemoteValue): - tensors = tensors.fetch() - def _to_single_numpy_or_python_type(t): - # Don't turn ragged or sparse tensors to NumPy. - if isinstance(t, tf.Tensor): - t = t.numpy() - # Strings, ragged and sparse tensors don't have .item(). Return them as-is. - if not isinstance(t, (np.ndarray, np.generic)): - return t - return t.item() if np.ndim(t) == 0 else t +def get_metric_local_vars_scope(): + try: + return _metric_local_vars_scope.current + except AttributeError: + return None - return tf.nest.map_structure(_to_single_numpy_or_python_type, tensors) +def in_local_vars_context(): + ctx = get_metric_local_vars_scope() + return ctx is not None -def _astuple(attrs): - """Converts the given attrs to tuple non-recursively.""" - cls = type(attrs) - fields = getattr(cls, '__attrs_attrs__', None) - if fields is None: - raise ValueError(f'{cls} is not an attrs-decorated class.') - values = [] - for field in fields: - values.append(getattr(attrs, field.name)) - return tuple(values) + +@contextlib.contextmanager +def with_metric_local_vars_scope(): + previous_scope = getattr(_metric_local_vars_scope, "current", None) + _metric_local_vars_scope.current = MetricLocalVarsScope() + yield + _metric_local_vars_scope.current = previous_scope + + +class MetricLocalVarsScope: + """Turn on local variable creation for Metrics. + + No functionality is needed here, it just exists to modulate Metric's + variable creation.""" diff --git a/keras/utils/tf_utils_test.py b/keras/utils/tf_utils_test.py index e02e3922f95b..023cd123f040 100644 --- a/keras/utils/tf_utils_test.py +++ b/keras/utils/tf_utils_test.py @@ -14,342 +14,475 @@ # ============================================================================== """Tests for Keras TF utils.""" +from unittest.mock import MagicMock +from unittest.mock import patch + +import numpy as np +import tensorflow.compat.v2 as tf from absl.testing import parameterized + import keras from keras.testing_infra import test_combinations from keras.utils import tf_utils -import numpy as np -import tensorflow.compat.v2 as tf try: - import attr # pylint:disable=g-import-not-at-top + import attr except ImportError: - attr = None + attr = None -@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) +@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class TestIsSymbolicTensor(tf.test.TestCase, parameterized.TestCase): - - def test_default_behavior(self): - if tf.executing_eagerly(): - self.assertFalse(tf_utils.is_symbolic_tensor( - tf.Variable(name='blah', initial_value=0.))) - self.assertFalse( - tf_utils.is_symbolic_tensor( - tf.convert_to_tensor(0.))) - self.assertFalse(tf_utils.is_symbolic_tensor( - tf.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))) - else: - self.assertTrue(tf_utils.is_symbolic_tensor( - tf.Variable(name='blah', initial_value=0.))) - self.assertTrue( - tf_utils.is_symbolic_tensor( - tf.convert_to_tensor(0.))) - self.assertTrue(tf_utils.is_symbolic_tensor( - tf.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))) - - def test_works_with_registered(self): - - class CustomClass: - - def value(self): - return tf.convert_to_tensor(42.) - - tf.register_tensor_conversion_function( - CustomClass, lambda value, **_: value.value()) - - tf_utils.register_symbolic_tensor_type(CustomClass) - - if tf.executing_eagerly(): - self.assertFalse(tf_utils.is_symbolic_tensor( - tf.Variable(name='blah', initial_value=0.))) - self.assertFalse( - tf_utils.is_symbolic_tensor( - tf.convert_to_tensor(0.))) - self.assertFalse(tf_utils.is_symbolic_tensor( - tf.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))) - self.assertFalse(tf_utils.is_symbolic_tensor(CustomClass())) - else: - self.assertTrue(tf_utils.is_symbolic_tensor( - tf.Variable(name='blah', initial_value=0.))) - self.assertTrue( - tf_utils.is_symbolic_tensor( - tf.convert_to_tensor(0.))) - self.assertTrue(tf_utils.is_symbolic_tensor( - tf.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))) - self.assertTrue(tf_utils.is_symbolic_tensor(CustomClass())) - - def test_enables_nontensor_plumbing(self): - if tf.executing_eagerly(): - self.skipTest('`compile` functionality changed.') - # Setup. - - class Foo: - - def __init__(self, input_): - self._input = input_ - self.value = tf.convert_to_tensor([[42.]]) - - @property - def dtype(self): - return self.value.dtype - - tf.register_tensor_conversion_function( - Foo, lambda x, *args, **kwargs: x.value) - tf_utils.register_symbolic_tensor_type(Foo) - - class PlumbingLayer(keras.layers.Lambda): - - def __init__(self, fn, **kwargs): - def _fn(*fargs, **fkwargs): - d = fn(*fargs, **fkwargs) - x = tf.convert_to_tensor(d) - d.shape = x.shape - d.get_shape = x.get_shape - return d, x - super().__init__(_fn, **kwargs) - self._enter_dunder_call = False - - def __call__(self, inputs, *args, **kwargs): - self._enter_dunder_call = True - d, _ = super().__call__(inputs, *args, **kwargs) - self._enter_dunder_call = False - return d - - def call(self, inputs, *args, **kwargs): - d, v = super().call(inputs, *args, **kwargs) - if self._enter_dunder_call: - return d, v - return d - - # User-land. - model = keras.Sequential([ - keras.layers.InputLayer((1,)), - PlumbingLayer(Foo), # Makes a `Foo` object. - ]) - # Let's ensure Keras graph history is preserved by composing the models. - model = keras.Model(model.inputs, model(model.outputs)) - # Now we instantiate the model and verify we have a `Foo` object, not a - # `Tensor`. - y = model(tf.convert_to_tensor([[7.]])) - self.assertIsInstance(y, Foo) - # Confirm that (custom) loss sees `Foo` instance, not Tensor. - obtained_prediction_box = [None] - def custom_loss(y_obs, y_pred): - del y_obs - obtained_prediction_box[0] = y_pred - return y_pred - # Apparently `compile` calls the loss function enough to trigger the - # side-effect. - model.compile('SGD', loss=custom_loss) - self.assertIsInstance(obtained_prediction_box[0], Foo) + def test_default_behavior(self): + if tf.executing_eagerly(): + self.assertFalse( + tf_utils.is_symbolic_tensor( + tf.Variable(name="blah", initial_value=0.0) + ) + ) + self.assertFalse( + tf_utils.is_symbolic_tensor(tf.convert_to_tensor(0.0)) + ) + self.assertFalse( + tf_utils.is_symbolic_tensor( + tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=[1, 2], + dense_shape=[3, 4], + ) + ) + ) + else: + self.assertTrue( + tf_utils.is_symbolic_tensor( + tf.Variable(name="blah", initial_value=0.0) + ) + ) + self.assertTrue( + tf_utils.is_symbolic_tensor(tf.convert_to_tensor(0.0)) + ) + self.assertTrue( + tf_utils.is_symbolic_tensor( + tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=[1, 2], + dense_shape=[3, 4], + ) + ) + ) + + def test_works_with_registered(self): + class CustomClass: + def value(self): + return tf.convert_to_tensor(42.0) + + tf.register_tensor_conversion_function( + CustomClass, lambda value, **_: value.value() + ) + + tf_utils.register_symbolic_tensor_type(CustomClass) + + if tf.executing_eagerly(): + self.assertFalse( + tf_utils.is_symbolic_tensor( + tf.Variable(name="blah", initial_value=0.0) + ) + ) + self.assertFalse( + tf_utils.is_symbolic_tensor(tf.convert_to_tensor(0.0)) + ) + self.assertFalse( + tf_utils.is_symbolic_tensor( + tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=[1, 2], + dense_shape=[3, 4], + ) + ) + ) + self.assertFalse(tf_utils.is_symbolic_tensor(CustomClass())) + else: + self.assertTrue( + tf_utils.is_symbolic_tensor( + tf.Variable(name="blah", initial_value=0.0) + ) + ) + self.assertTrue( + tf_utils.is_symbolic_tensor(tf.convert_to_tensor(0.0)) + ) + self.assertTrue( + tf_utils.is_symbolic_tensor( + tf.SparseTensor( + indices=[[0, 0], [1, 2]], + values=[1, 2], + dense_shape=[3, 4], + ) + ) + ) + self.assertTrue(tf_utils.is_symbolic_tensor(CustomClass())) + + def test_enables_nontensor_plumbing(self): + if tf.executing_eagerly(): + self.skipTest("`compile` functionality changed.") + # Setup. + + class Foo: + def __init__(self, input_): + self._input = input_ + self.value = tf.convert_to_tensor([[42.0]]) + + @property + def dtype(self): + return self.value.dtype + + tf.register_tensor_conversion_function( + Foo, lambda x, *args, **kwargs: x.value + ) + tf_utils.register_symbolic_tensor_type(Foo) + + class PlumbingLayer(keras.layers.Lambda): + def __init__(self, fn, **kwargs): + def _fn(*fargs, **fkwargs): + d = fn(*fargs, **fkwargs) + x = tf.convert_to_tensor(d) + d.shape = x.shape + d.get_shape = x.get_shape + return d, x + + super().__init__(_fn, **kwargs) + self._enter_dunder_call = False + + def __call__(self, inputs, *args, **kwargs): + self._enter_dunder_call = True + d, _ = super().__call__(inputs, *args, **kwargs) + self._enter_dunder_call = False + return d + + def call(self, inputs, *args, **kwargs): + d, v = super().call(inputs, *args, **kwargs) + if self._enter_dunder_call: + return d, v + return d + + # User-land. + model = keras.Sequential( + [ + keras.layers.InputLayer((1,)), + PlumbingLayer(Foo), # Makes a `Foo` object. + ] + ) + # Let's ensure Keras graph history is preserved by composing the models. + model = keras.Model(model.inputs, model(model.outputs)) + # Now we instantiate the model and verify we have a `Foo` object, not a + # `Tensor`. + y = model(tf.convert_to_tensor([[7.0]])) + self.assertIsInstance(y, Foo) + # Confirm that (custom) loss sees `Foo` instance, not Tensor. + obtained_prediction_box = [None] + + def custom_loss(y_obs, y_pred): + del y_obs + obtained_prediction_box[0] = y_pred + return y_pred + + # Apparently `compile` calls the loss function enough to trigger the + # side-effect. + model.compile("SGD", loss=custom_loss) + self.assertIsInstance(obtained_prediction_box[0], Foo) class ConvertInnerNodeDataTest(tf.test.TestCase): - - def test_convert_inner_node_data(self): - data = tf_utils.convert_inner_node_data((tf_utils.ListWrapper(['l', 2, 3]), - tf_utils.ListWrapper(['l', 5, 6]))) - self.assertEqual(data, (['l', 2, 3], ['l', 5, 6])) - - data = tf_utils.convert_inner_node_data(((['l', 2, 3], ['l', 5, 6])), - wrap=True) - self.assertTrue(all(isinstance(ele, tf_utils.ListWrapper) for ele in data)) + def test_convert_inner_node_data(self): + data = tf_utils.convert_inner_node_data( + ( + tf_utils.ListWrapper(["l", 2, 3]), + tf_utils.ListWrapper(["l", 5, 6]), + ) + ) + self.assertEqual(data, (["l", 2, 3], ["l", 5, 6])) + + data = tf_utils.convert_inner_node_data( + ((["l", 2, 3], ["l", 5, 6])), wrap=True + ) + self.assertTrue( + all(isinstance(ele, tf_utils.ListWrapper) for ele in data) + ) class AttrsTest(tf.test.TestCase): + def test_map_structure_with_atomic_accept_attr(self): + if attr is None: + self.skipTest("attr module is unavailable.") - def test_map_structure_with_atomic_accept_attr(self): - if attr is None: - self.skipTest('attr module is unavailable.') + @attr.s(frozen=True) + class Foo: - @attr.s(frozen=True) - class Foo: + bar = attr.ib() - bar = attr.ib() - - self.assertEqual( - Foo(2), - tf_utils.map_structure_with_atomic( - is_atomic_fn=lambda x: isinstance(x, int), - map_fn=lambda x: x + 1, - nested=Foo(1))) + self.assertEqual( + Foo(2), + tf_utils.map_structure_with_atomic( + is_atomic_fn=lambda x: isinstance(x, int), + map_fn=lambda x: x + 1, + nested=Foo(1), + ), + ) class TestIsRagged(tf.test.TestCase): + def test_is_ragged_return_true_for_ragged_tensor(self): + tensor = tf.RaggedTensor.from_row_splits( + values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] + ) + self.assertTrue(tf_utils.is_ragged(tensor)) - def test_is_ragged_return_true_for_ragged_tensor(self): - tensor = tf.RaggedTensor.from_row_splits( - values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) - self.assertTrue(tf_utils.is_ragged(tensor)) - - def test_is_ragged_return_false_for_list(self): - tensor = [1., 2., 3.] - self.assertFalse(tf_utils.is_ragged(tensor)) + def test_is_ragged_return_false_for_list(self): + tensor = [1.0, 2.0, 3.0] + self.assertFalse(tf_utils.is_ragged(tensor)) class TestIsSparse(tf.test.TestCase): + def test_is_sparse_return_true_for_sparse_tensor(self): + tensor = tf.SparseTensor( + indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4] + ) + self.assertTrue(tf_utils.is_sparse(tensor)) - def test_is_sparse_return_true_for_sparse_tensor(self): - tensor = tf.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) - self.assertTrue(tf_utils.is_sparse(tensor)) - - def test_is_sparse_return_true_for_sparse_tensor_value(self): - tensor = tf.compat.v1.SparseTensorValue( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) - self.assertTrue(tf_utils.is_sparse(tensor)) + def test_is_sparse_return_true_for_sparse_tensor_value(self): + tensor = tf.compat.v1.SparseTensorValue( + indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4] + ) + self.assertTrue(tf_utils.is_sparse(tensor)) - def test_is_sparse_return_false_for_list(self): - tensor = [1., 2., 3.] - self.assertFalse(tf_utils.is_sparse(tensor)) + def test_is_sparse_return_false_for_list(self): + tensor = [1.0, 2.0, 3.0] + self.assertFalse(tf_utils.is_sparse(tensor)) class TestIsExtensionType(tf.test.TestCase): + def test_is_extension_type_return_true_for_ragged_tensor(self): + self.assertTrue( + tf_utils.is_extension_type(tf.ragged.constant([[1, 2], [3]])) + ) - def test_is_extension_type_return_true_for_ragged_tensor(self): - self.assertTrue(tf_utils.is_extension_type( - tf.ragged.constant([[1, 2], [3]]))) + def test_is_extension_type_return_true_for_sparse_tensor(self): + self.assertTrue( + tf_utils.is_extension_type(tf.sparse.from_dense([[1, 2], [3, 4]])) + ) - def test_is_extension_type_return_true_for_sparse_tensor(self): - self.assertTrue(tf_utils.is_extension_type( - tf.sparse.from_dense([[1, 2], [3, 4]]))) + def test_is_extension_type_return_false_for_dense_tensor(self): + self.assertFalse( + tf_utils.is_extension_type(tf.constant([[1, 2], [3, 4]])) + ) - def test_is_extension_type_return_false_for_dense_tensor(self): - self.assertFalse(tf_utils.is_extension_type( - tf.constant([[1, 2], [3, 4]]))) - - def test_is_extension_type_return_false_for_list(self): - tensor = [1., 2., 3.] - self.assertFalse(tf_utils.is_extension_type(tensor)) + def test_is_extension_type_return_false_for_list(self): + tensor = [1.0, 2.0, 3.0] + self.assertFalse(tf_utils.is_extension_type(tensor)) class TestIsTensorOrExtensionType(tf.test.TestCase): - - def test_is_tensor_or_extension_type_return_true_for_ragged_tensor(self): - self.assertTrue(tf_utils.is_tensor_or_extension_type( - tf.ragged.constant([[1, 2], [3]]))) - - def test_is_tensor_or_extension_type_return_true_for_sparse_tensor(self): - self.assertTrue(tf_utils.is_tensor_or_extension_type( - tf.sparse.from_dense([[1, 2], [3, 4]]))) - - def test_is_tensor_or_extension_type_return_true_for_dense_tensor(self): - self.assertTrue(tf_utils.is_tensor_or_extension_type( - tf.constant([[1, 2], [3, 4]]))) - - def test_is_tensor_or_extension_type_return_true_for_custom_ext_types(self): - class DummyExtensionType(tf.experimental.ExtensionType): - ... - self.assertTrue(tf_utils.is_tensor_or_extension_type(DummyExtensionType())) - - def test_is_tensor_or_extension_type_return_false_for_list(self): - self.assertFalse(tf_utils.is_tensor_or_extension_type([1., 2., 3.])) + def test_is_tensor_or_extension_type_return_true_for_ragged_tensor(self): + self.assertTrue( + tf_utils.is_tensor_or_extension_type( + tf.ragged.constant([[1, 2], [3]]) + ) + ) + + def test_is_tensor_or_extension_type_return_true_for_sparse_tensor(self): + self.assertTrue( + tf_utils.is_tensor_or_extension_type( + tf.sparse.from_dense([[1, 2], [3, 4]]) + ) + ) + + def test_is_tensor_or_extension_type_return_true_for_dense_tensor(self): + self.assertTrue( + tf_utils.is_tensor_or_extension_type(tf.constant([[1, 2], [3, 4]])) + ) + + def test_is_tensor_or_extension_type_return_true_for_custom_ext_types(self): + class DummyExtensionType(tf.experimental.ExtensionType): + ... + + self.assertTrue( + tf_utils.is_tensor_or_extension_type(DummyExtensionType()) + ) + + def test_is_tensor_or_extension_type_return_false_for_list(self): + self.assertFalse(tf_utils.is_tensor_or_extension_type([1.0, 2.0, 3.0])) + + +@test_combinations.generate(test_combinations.combine(mode=["eager"])) +class TestConvertVariablesToTensors(tf.test.TestCase): + def test_convert_variables_to_tensors(self): + x = tf.Variable([1.0]) + result = tf_utils.convert_variables_to_tensors(x) + self.assertIsInstance(result, tf.Tensor) + self.assertAllEqual(result, [1.0]) + + def test_convert_variables_in_list_to_tensors(self): + x = [tf.Variable([1.0]), tf.constant([2.0])] + result = tf_utils.convert_variables_to_tensors(x) + self.assertLen(result, 2) + self.assertIsInstance(result[0], tf.Tensor) + self.assertAllEqual(result[0], [1.0]) + self.assertIs(result[1], x[1]) + + def test_convert_variables_in_composite_tensor_to_tensors(self): + class Spec(tf.TypeSpec): + value_type = property(lambda self: CompositeVariable) + + def _serialize(self): + pass + + def _component_specs(self): + pass + + def _to_components(self, value): + return value.variables + + def _from_components(self, variable_list): + return CompositeVariable(variable_list) + + class CompositeVariable(tf.__internal__.CompositeTensor): + def __init__(self, variable_list): + self.variables = variable_list + + @property + def _type_spec(self): + return Spec() + + def _convert_variables_to_tensors(self): + self.variables = tf.nest.map_structure( + tf_utils.convert_variables_to_tensors, self.variables + ) + return self + + cv = CompositeVariable([tf.Variable([1.0])]) + self.assertIsInstance(cv.variables[0], tf.Variable) + result = tf_utils.convert_variables_to_tensors(cv) + self.assertLen(result.variables, 1) + self.assertIsInstance(result.variables[0], tf.Tensor) + self.assertAllEqual(result.variables[0], [1.0]) class TestRandomSeedSetting(tf.test.TestCase): - - def test_seeds(self): - if not tf.__internal__.tf2.enabled(): - self.skipTest('set_random_seed() is only expected to work in tf2.') - def get_model_output(): - model = keras.Sequential([ - keras.layers.Dense(10), - keras.layers.Dropout(0.5), - keras.layers.Dense(10), - ]) - x = np.random.random((32, 10)).astype('float32') - ds = tf.data.Dataset.from_tensor_slices(x).shuffle(32).batch(16) - return model.predict(ds) - - tf_utils.set_random_seed(42) - y1 = get_model_output() - tf_utils.set_random_seed(42) - y2 = get_model_output() - self.assertAllClose(y1, y2, atol=1e-6) + def test_seeds(self): + if not tf.__internal__.tf2.enabled(): + self.skipTest("set_random_seed() is only expected to work in tf2.") + + def get_model_output(): + model = keras.Sequential( + [ + keras.layers.Dense(10), + keras.layers.Dropout(0.5), + keras.layers.Dense(10), + ] + ) + x = np.random.random((32, 10)).astype("float32") + ds = tf.data.Dataset.from_tensor_slices(x).shuffle(32).batch(16) + return model.predict(ds) + + tf_utils.set_random_seed(42) + y1 = get_model_output() + tf_utils.set_random_seed(42) + y2 = get_model_output() + self.assertAllClose(y1, y2, atol=1e-6) class CustomTypeSpec(tf.TypeSpec): - """Stubbed-out custom type spec, for testing.""" + """Stubbed-out custom type spec, for testing.""" - def __init__(self, shape, dtype): - self.shape = tf.TensorShape(shape) - self.dtype = tf.dtypes.as_dtype(dtype) + def __init__(self, shape, dtype): + self.shape = tf.TensorShape(shape) + self.dtype = tf.dtypes.as_dtype(dtype) - def with_shape(self, new_shape): - return CustomTypeSpec(new_shape, self.dtype) + def with_shape(self, new_shape): + return CustomTypeSpec(new_shape, self.dtype) - # Stub implementations for all the TypeSpec methods: - value_type = None - _to_components = lambda self, value: None - _from_components = lambda self, components: None - _component_specs = property(lambda self: None) - _serialize = lambda self: (self.shape, self.dtype) + # Stub implementations for all the TypeSpec methods: + value_type = None + _to_components = lambda self, value: None + _from_components = lambda self, components: None + _component_specs = property(lambda self: None) + _serialize = lambda self: (self.shape, self.dtype) class TestGetTensorSpec(parameterized.TestCase): - - @parameterized.parameters([ - (lambda: tf.constant([[1, 2]]), [1, 2]), - (tf.TensorSpec([8, 3], tf.int32), [8, 3]), - (tf.TensorSpec([8], tf.int32), [8]), - (tf.TensorSpec([], tf.int32), []), - (tf.TensorSpec(None, tf.int32), None), - (tf.RaggedTensorSpec([8, 3], tf.int32), [8, 3]), - (tf.SparseTensorSpec([8, 3], tf.int32), [8, 3]), - ]) - def test_without_dynamic_batch(self, t, expected_shape): - if callable(t): - t = t() - result = tf_utils.get_tensor_spec(t) - self.assertTrue(result.is_compatible_with(t)) - if expected_shape is None: - self.assertIsNone(result.shape.rank) - else: - self.assertEqual(result.shape.as_list(), expected_shape) - - @parameterized.parameters([ - (lambda: tf.constant([[1, 2]]), [None, 2]), - (tf.TensorSpec([8, 3], tf.int32), [None, 3]), - (tf.TensorSpec([8], tf.int32), [None]), - (tf.TensorSpec([], tf.int32), []), - (tf.TensorSpec(None, tf.int32), None), - (tf.RaggedTensorSpec([8, 3], tf.int32), [None, 3]), - (tf.SparseTensorSpec([8, 3], tf.int32), [None, 3]), - ]) - def test_with_dynamic_batch(self, t, expected_shape): - if callable(t): - t = t() - result = tf_utils.get_tensor_spec(t, True) - self.assertTrue(result.is_compatible_with(t)) - if expected_shape is None: - self.assertIsNone(result.shape.rank) - else: - self.assertEqual(result.shape.as_list(), expected_shape) - - def test_with_keras_tensor_with_ragged_spec(self): - t = keras.engine.keras_tensor.KerasTensor( - tf.RaggedTensorSpec(shape=(None, None, 1))) - self.assertIsInstance(tf_utils.get_tensor_spec(t), tf.RaggedTensorSpec) + @parameterized.parameters( + [ + (lambda: tf.constant([[1, 2]]), [1, 2]), + (tf.TensorSpec([8, 3], tf.int32), [8, 3]), + (tf.TensorSpec([8], tf.int32), [8]), + (tf.TensorSpec([], tf.int32), []), + (tf.TensorSpec(None, tf.int32), None), + (tf.RaggedTensorSpec([8, 3], tf.int32), [8, 3]), + (tf.SparseTensorSpec([8, 3], tf.int32), [8, 3]), + ] + ) + def test_without_dynamic_batch(self, t, expected_shape): + if callable(t): + t = t() + result = tf_utils.get_tensor_spec(t) + self.assertTrue(result.is_compatible_with(t)) + if expected_shape is None: + self.assertIsNone(result.shape.rank) + else: + self.assertEqual(result.shape.as_list(), expected_shape) + + @parameterized.parameters( + [ + (lambda: tf.constant([[1, 2]]), [None, 2]), + (tf.TensorSpec([8, 3], tf.int32), [None, 3]), + (tf.TensorSpec([8], tf.int32), [None]), + (tf.TensorSpec([], tf.int32), []), + (tf.TensorSpec(None, tf.int32), None), + (tf.RaggedTensorSpec([8, 3], tf.int32), [None, 3]), + (tf.SparseTensorSpec([8, 3], tf.int32), [None, 3]), + ] + ) + def test_with_dynamic_batch(self, t, expected_shape): + if callable(t): + t = t() + result = tf_utils.get_tensor_spec(t, True) + self.assertTrue(result.is_compatible_with(t)) + if expected_shape is None: + self.assertIsNone(result.shape.rank) + else: + self.assertEqual(result.shape.as_list(), expected_shape) + + def test_with_keras_tensor_with_ragged_spec(self): + t = keras.engine.keras_tensor.KerasTensor( + tf.RaggedTensorSpec(shape=(None, None, 1)) + ) + self.assertIsInstance(tf_utils.get_tensor_spec(t), tf.RaggedTensorSpec) class TestSyncToNumpyOrPythonType(parameterized.TestCase): + @parameterized.parameters( + [ + (0.5,), + (b"string value",), + ] + ) + def test_types(self, value): + if not tf.executing_eagerly(): + self.skipTest("`sync_to_numpy_or_python_type` only works in eager") + tensor = tf.constant(value) + + self.assertEqual(tf_utils.sync_to_numpy_or_python_type(tensor), value) + + +class TestCanJitCompile(tf.test.TestCase): + def test_darwin_arm_xla(self): + with patch("platform.processor", MagicMock(return_value="arm")): + with patch("platform.system", MagicMock(return_value="Darwin")): + self.assertFalse(tf_utils.can_jit_compile()) - @parameterized.parameters([ - (0.5,), - (b'string value',), - ]) - def test_types(self, value): - if not tf.executing_eagerly(): - self.skipTest('`sync_to_numpy_or_python_type` only works in eager') - tensor = tf.constant(value) + def test_linux_xla(self): + with patch("platform.system", MagicMock(return_value="Linux")): + self.assertTrue(tf_utils.can_jit_compile()) - self.assertEqual(tf_utils.sync_to_numpy_or_python_type( - tensor), value) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/timed_threads.py b/keras/utils/timed_threads.py new file mode 100644 index 000000000000..794fd243c42b --- /dev/null +++ b/keras/utils/timed_threads.py @@ -0,0 +1,148 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Thread utilities.""" + +import abc +import threading + +from absl import logging +from tensorflow.python.util.tf_export import keras_export + + +@keras_export("keras.utils.TimedThread", v1=[]) +class TimedThread: + """Time-based interval Threads. + + Runs a timed thread every x seconds. It can be used to run a threaded + function alongside model training or any other snippet of code. + + Args: + interval: The interval, in seconds, to wait between calls to the + `on_interval` function. + **kwargs: additional args that are passed to `threading.Thread`. By + default, `Thread` is started as a `daemon` thread unless + overridden by the user in `kwargs`. + + Examples: + + ```python + class TimedLogIterations(keras.utils.TimedThread): + def __init__(self, model, interval): + self.model = model + super().__init__(interval) + + def on_interval(self): + # Logs Optimizer iterations every x seconds + try: + opt_iterations = self.model.optimizer.iterations.numpy() + print(f"Epoch: {epoch}, Optimizer Iterations: {opt_iterations}") + except Exception as e: + print(str(e)) # To prevent thread from getting killed + + # `start` and `stop` the `TimerThread` manually. If the `on_interval` call + # requires access to `model` or other objects, override `__init__` method. + # Wrap it in a `try-except` to handle exceptions and `stop` the thread run. + timed_logs = TimedLogIterations(model=model, interval=5) + timed_logs.start() + try: + model.fit(...) + finally: + timed_logs.stop() + + # Alternatively, run the `TimedThread` in a context manager + with TimedLogIterations(model=model, interval=5): + model.fit(...) + + # If the timed thread instance needs access to callback events, + # subclass both `TimedThread` and `Callback`. Note that when calling + # `super`, they will have to called for each parent class if both of them + # have the method that needs to be run. Also, note that `Callback` has + # access to `model` as an attribute and need not be explictly provided. + class LogThreadCallback( + keras.utils.TimedThread, keras.callbacks.Callback + ): + def __init__(self, interval): + self._epoch = 0 + keras.utils.TimedThread.__init__(self, interval) + keras.callbacks.Callback.__init__(self) + + def on_interval(self): + if self.epoch: + opt_iter = self.model.optimizer.iterations.numpy() + logging.info(f"Epoch: {self._epoch}, Opt Iteration: {opt_iter}") + + def on_epoch_begin(self, epoch, logs=None): + self._epoch = epoch + + with LogThreadCallback(interval=5) as thread_callback: + # It's required to pass `thread_callback` to also `callbacks` arg of + # `model.fit` to be triggered on callback events. + model.fit(..., callbacks=[thread_callback]) + ``` + """ + + def __init__(self, interval, **kwargs): + self.interval = interval + self.daemon = kwargs.pop("daemon", True) + self.thread_kwargs = kwargs + self.thread = None + self.thread_stop_event = None + + def _call_on_interval(self): + # Runs indefinitely once thread is started + while not self.thread_stop_event.is_set(): + self.on_interval() + self.thread_stop_event.wait(self.interval) + + def start(self): + """Creates and starts the thread run.""" + if self.thread and self.thread.is_alive(): + logging.warning("Thread is already running.") + return + self.thread = threading.Thread( + target=self._call_on_interval, + daemon=self.daemon, + **self.thread_kwargs + ) + self.thread_stop_event = threading.Event() + self.thread.start() + + def stop(self): + """Stops the thread run.""" + if self.thread_stop_event: + self.thread_stop_event.set() + + def is_alive(self): + """Returns True if thread is running. Otherwise returns False.""" + if self.thread: + return self.thread.is_alive() + return False + + def __enter__(self): + # Starts the thread in context manager + self.start() + return self + + def __exit__(self, *args, **kwargs): + # Stops the thread run. + self.stop() + + @abc.abstractmethod + def on_interval(self): + """User-defined behavior that is called in the thread.""" + raise NotImplementedError( + "Runs every x interval seconds. Needs to be " + "implemented in subclasses of `TimedThread`" + ) diff --git a/keras/utils/timed_threads_test.py b/keras/utils/timed_threads_test.py new file mode 100644 index 000000000000..011603feb268 --- /dev/null +++ b/keras/utils/timed_threads_test.py @@ -0,0 +1,119 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for timed_threads.""" + +import time + +import tensorflow.compat.v2 as tf +from absl import logging + +import keras +from keras.testing_infra import test_combinations +from keras.testing_infra import test_utils +from keras.utils import timed_threads + + +@test_utils.run_v2_only +class TimedThreadTest(test_combinations.TestCase): + def test_timed_thread_run(self): + class LogThread(timed_threads.TimedThread): + def on_interval(self): + logging.info("Thread Run") + + log_thread = LogThread(interval=0.1) + with self.assertLogs(level="INFO") as logs: + log_thread.start() + time.sleep(1) + self.assertTrue(log_thread.is_alive()) + log_thread.stop() + self.assertIn("INFO:absl:Thread Run", logs.output) + time.sleep(0.1) + self.assertFalse(log_thread.is_alive()) + + def test_timed_thread_restart(self): + # Verfiy that thread can be started and stopped multiple times. + class LogThread(timed_threads.TimedThread): + def on_interval(self): + logging.info("Thread Run") + + log_thread = LogThread(interval=0.1) + for _ in range(2): + self.assertFalse(log_thread.is_alive()) + with self.assertLogs(level="INFO") as logs: + log_thread.start() + time.sleep(1) + self.assertTrue(log_thread.is_alive()) + log_thread.stop() + self.assertIn("INFO:absl:Thread Run", logs.output) + time.sleep(0.1) + self.assertFalse(log_thread.is_alive()) + + def test_timed_thread_running_warning(self): + # Verfiy thread start warning if its already running + class LogThread(timed_threads.TimedThread): + def on_interval(self): + logging.info("Thread Run") + + log_thread = LogThread(interval=0.1) + self.assertFalse(log_thread.is_alive()) + with self.assertLogs(level="INFO") as logs: + log_thread.start() + time.sleep(1) + self.assertTrue(log_thread.is_alive()) + self.assertIn("INFO:absl:Thread Run", logs.output) + with self.assertLogs(level="WARNING") as logs: + log_thread.start() + self.assertIn( + "WARNING:absl:Thread is already running.", logs.output + ) + self.assertTrue(log_thread.is_alive()) + log_thread.stop() + time.sleep(0.1) + self.assertFalse(log_thread.is_alive()) + + def test_timed_thread_callback_model_fit(self): + class LogThreadCallback( + timed_threads.TimedThread, keras.callbacks.Callback + ): + def __init__(self, interval): + self._epoch = 0 + timed_threads.TimedThread.__init__(self, interval=interval) + keras.callbacks.Callback.__init__(self) + + def on_interval(self): + if self._epoch: + # Verify that `model` is accessible. + _ = self.model.optimizer.iterations.numpy() + logging.info(f"Thread Run Epoch: {self._epoch}") + + def on_epoch_begin(self, epoch, logs=None): + self._epoch = epoch + time.sleep(1) + + x = tf.random.normal((32, 2)) + y = tf.ones((32, 1), dtype=tf.float32) + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile(loss="mse") + with self.assertLogs(level="INFO") as logs, LogThreadCallback( + interval=0.1 + ) as log_thread_callback: + self.assertIsNone(log_thread_callback.model) + model.fit(x, y, epochs=2, callbacks=[log_thread_callback]) + self.assertIsNotNone(log_thread_callback.model) + self.assertIn("INFO:absl:Thread Run Epoch: 1", logs.output) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/timeseries_dataset.py b/keras/utils/timeseries_dataset.py index 92fa2eb08588..c81dc18ef32c 100644 --- a/keras/utils/timeseries_dataset.py +++ b/keras/utils/timeseries_dataset.py @@ -14,16 +14,18 @@ # ============================================================================== """Keras timeseries dataset utilities.""" +import numpy as np import tensorflow.compat.v2 as tf -# pylint: disable=g-classes-have-attributes -import numpy as np +# isort: off from tensorflow.python.util.tf_export import keras_export -@keras_export('keras.utils.timeseries_dataset_from_array', - 'keras.preprocessing.timeseries_dataset_from_array', - v1=[]) +@keras_export( + "keras.utils.timeseries_dataset_from_array", + "keras.preprocessing.timeseries_dataset_from_array", + v1=[], +) def timeseries_dataset_from_array( data, targets, @@ -34,212 +36,242 @@ def timeseries_dataset_from_array( shuffle=False, seed=None, start_index=None, - end_index=None): - """Creates a dataset of sliding windows over a timeseries provided as array. - - This function takes in a sequence of data-points gathered at - equal intervals, along with time series parameters such as - length of the sequences/windows, spacing between two sequence/windows, etc., - to produce batches of timeseries inputs and targets. - - Args: - data: Numpy array or eager tensor - containing consecutive data points (timesteps). - Axis 0 is expected to be the time dimension. - targets: Targets corresponding to timesteps in `data`. - `targets[i]` should be the target - corresponding to the window that starts at index `i` - (see example 2 below). - Pass None if you don't have target data (in this case the dataset will - only yield the input data). - sequence_length: Length of the output sequences (in number of timesteps). - sequence_stride: Period between successive output sequences. - For stride `s`, output samples would - start at index `data[i]`, `data[i + s]`, `data[i + 2 * s]`, etc. - sampling_rate: Period between successive individual timesteps - within sequences. For rate `r`, timesteps - `data[i], data[i + r], ... data[i + sequence_length]` - are used for creating a sample sequence. - batch_size: Number of timeseries samples in each batch - (except maybe the last one). If `None`, the data will not be batched - (the dataset will yield individual samples). - shuffle: Whether to shuffle output samples, - or instead draw them in chronological order. - seed: Optional int; random seed for shuffling. - start_index: Optional int; data points earlier (exclusive) - than `start_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - end_index: Optional int; data points later (exclusive) than `end_index` - will not be used in the output sequences. - This is useful to reserve part of the data for test or validation. - - Returns: - A tf.data.Dataset instance. If `targets` was passed, the dataset yields + end_index=None, +): + """Creates a dataset of sliding windows over a timeseries provided as array. + + This function takes in a sequence of data-points gathered at + equal intervals, along with time series parameters such as + length of the sequences/windows, spacing between two sequence/windows, etc., + to produce batches of timeseries inputs and targets. + + Args: + data: Numpy array or eager tensor + containing consecutive data points (timesteps). + Axis 0 is expected to be the time dimension. + targets: Targets corresponding to timesteps in `data`. + `targets[i]` should be the target + corresponding to the window that starts at index `i` + (see example 2 below). + Pass `None` if you don't have target data (in this case the dataset + will only yield the input data). + sequence_length: Length of the output sequences + (in number of timesteps). + sequence_stride: Period between successive output sequences. + For stride `s`, output samples would + start at index `data[i]`, `data[i + s]`, `data[i + 2 * s]`, etc. + sampling_rate: Period between successive individual timesteps + within sequences. For rate `r`, timesteps + `data[i], data[i + r], ... data[i + sequence_length]` + are used for creating a sample sequence. + batch_size: Number of timeseries samples in each batch + (except maybe the last one). If `None`, the data will not be batched + (the dataset will yield individual samples). + shuffle: Whether to shuffle output samples, + or instead draw them in chronological order. + seed: Optional int; random seed for shuffling. + start_index: Optional int; data points earlier (exclusive) + than `start_index` will not be used + in the output sequences. This is useful to reserve part of the + data for test or validation. + end_index: Optional int; data points later (exclusive) than `end_index` + will not be used in the output sequences. + This is useful to reserve part of the data for test or validation. + + Returns: + + A `tf.data.Dataset` instance. If `targets` was passed, the dataset yields tuple `(batch_of_sequences, batch_of_targets)`. If not, the dataset yields only `batch_of_sequences`. - Example 1: - - Consider indices `[0, 1, ... 99]`. - With `sequence_length=10, sampling_rate=2, sequence_stride=3`, - `shuffle=False`, the dataset will yield batches of sequences - composed of the following indices: - - ``` - First sequence: [0 2 4 6 8 10 12 14 16 18] - Second sequence: [3 5 7 9 11 13 15 17 19 21] - Third sequence: [6 8 10 12 14 16 18 20 22 24] - ... - Last sequence: [78 80 82 84 86 88 90 92 94 96] - ``` - - In this case the last 3 data points are discarded since no full sequence - can be generated to include them (the next sequence would have started - at index 81, and thus its last step would have gone over 99). - - Example 2: Temporal regression. - - Consider an array `data` of scalar values, of shape `(steps,)`. - To generate a dataset that uses the past 10 - timesteps to predict the next timestep, you would use: - - ```python - input_data = data[:-10] - targets = data[10:] - dataset = tf.keras.preprocessing.timeseries_dataset_from_array( - input_data, targets, sequence_length=10) - for batch in dataset: - inputs, targets = batch - assert np.array_equal(inputs[0], data[:10]) # First sequence: steps [0-9] - assert np.array_equal(targets[0], data[10]) # Corresponding target: step 10 - break - ``` - - Example 3: Temporal regression for many-to-many architectures. - - Consider two arrays of scalar values `X` and `Y`, - both of shape `(100,)`. The resulting dataset should consist samples with - 20 timestamps each. The samples should not overlap. - To generate a dataset that uses the current timestamp - to predict the corresponding target timestep, you would use: - - ```python - X = np.arange(100) - Y = X*2 - - sample_length = 20 - input_dataset = tf.keras.preprocessing.timeseries_dataset_from_array( - X, None, sequence_length=sample_length, sequence_stride=sample_length) - target_dataset = tf.keras.preprocessing.timeseries_dataset_from_array( - Y, None, sequence_length=sample_length, sequence_stride=sample_length) - - for batch in zip(input_dataset, target_dataset): - inputs, targets = batch - assert np.array_equal(inputs[0], X[:sample_length]) - - # second sample equals output timestamps 20-40 - assert np.array_equal(targets[1], Y[sample_length:2*sample_length]) - break - ``` - """ - if start_index: - if start_index < 0: - raise ValueError(f'`start_index` must be 0 or greater. Received: ' - f'start_index={start_index}') - if start_index >= len(data): - raise ValueError(f'`start_index` must be lower than the length of the ' - f'data. Received: start_index={start_index}, for data ' - f'of length {len(data)}') - if end_index: - if start_index and end_index <= start_index: - raise ValueError(f'`end_index` must be higher than `start_index`. ' - f'Received: start_index={start_index}, and ' - f'end_index={end_index} ') - if end_index >= len(data): - raise ValueError(f'`end_index` must be lower than the length of the ' - f'data. Received: end_index={end_index}, for data of ' - f'length {len(data)}') - if end_index <= 0: - raise ValueError('`end_index` must be higher than 0. ' - f'Received: end_index={end_index}') - - # Validate strides - if sampling_rate <= 0: - raise ValueError(f'`sampling_rate` must be higher than 0. Received: ' - f'sampling_rate={sampling_rate}') - if sampling_rate >= len(data): - raise ValueError(f'`sampling_rate` must be lower than the length of the ' - f'data. Received: sampling_rate={sampling_rate}, for data ' - f'of length {len(data)}') - if sequence_stride <= 0: - raise ValueError(f'`sequence_stride` must be higher than 0. Received: ' - f'sequence_stride={sequence_stride}') - if sequence_stride >= len(data): - raise ValueError(f'`sequence_stride` must be lower than the length of the ' - f'data. Received: sequence_stride={sequence_stride}, for ' - f'data of length {len(data)}') - - if start_index is None: - start_index = 0 - if end_index is None: - end_index = len(data) - - # Determine the lowest dtype to store start positions (to lower memory usage). - num_seqs = end_index - start_index - (sequence_length * sampling_rate) + 1 - if targets is not None: - num_seqs = min(num_seqs, len(targets)) - if num_seqs < 2147483647: - index_dtype = 'int32' - else: - index_dtype = 'int64' - - # Generate start positions - start_positions = np.arange(0, num_seqs, sequence_stride, dtype=index_dtype) - if shuffle: - if seed is None: - seed = np.random.randint(1e6) - rng = np.random.RandomState(seed) - rng.shuffle(start_positions) - - sequence_length = tf.cast(sequence_length, dtype=index_dtype) - sampling_rate = tf.cast(sampling_rate, dtype=index_dtype) - - positions_ds = tf.data.Dataset.from_tensors(start_positions).repeat() - - # For each initial window position, generates indices of the window elements - indices = tf.data.Dataset.zip( - (tf.data.Dataset.range(len(start_positions)), positions_ds)).map( - lambda i, positions: tf.range( # pylint: disable=g-long-lambda - positions[i], - positions[i] + sequence_length * sampling_rate, - sampling_rate), - num_parallel_calls=tf.data.AUTOTUNE) - - dataset = sequences_from_indices(data, indices, start_index, end_index) - if targets is not None: + Example 1: + + Consider indices `[0, 1, ... 98]`. + With `sequence_length=10, sampling_rate=2, sequence_stride=3`, + `shuffle=False`, the dataset will yield batches of sequences + composed of the following indices: + + ``` + First sequence: [0 2 4 6 8 10 12 14 16 18] + Second sequence: [3 5 7 9 11 13 15 17 19 21] + Third sequence: [6 8 10 12 14 16 18 20 22 24] + ... + Last sequence: [78 80 82 84 86 88 90 92 94 96] + ``` + + In this case the last 2 data points are discarded since no full sequence + can be generated to include them (the next sequence would have started + at index 81, and thus its last step would have gone over 98). + + Example 2: Temporal regression. + + Consider an array `data` of scalar values, of shape `(steps,)`. + To generate a dataset that uses the past 10 + timesteps to predict the next timestep, you would use: + + ```python + input_data = data[:-10] + targets = data[10:] + dataset = tf.keras.utils.timeseries_dataset_from_array( + input_data, targets, sequence_length=10) + for batch in dataset: + inputs, targets = batch + assert np.array_equal(inputs[0], data[:10]) # First sequence: steps [0-9] + # Corresponding target: step 10 + assert np.array_equal(targets[0], data[10]) + break + ``` + + Example 3: Temporal regression for many-to-many architectures. + + Consider two arrays of scalar values `X` and `Y`, + both of shape `(100,)`. The resulting dataset should consist samples with + 20 timestamps each. The samples should not overlap. + To generate a dataset that uses the current timestamp + to predict the corresponding target timestep, you would use: + + ```python + X = np.arange(100) + Y = X*2 + + sample_length = 20 + input_dataset = tf.keras.utils.timeseries_dataset_from_array( + X, None, sequence_length=sample_length, sequence_stride=sample_length) + target_dataset = tf.keras.utils.timeseries_dataset_from_array( + Y, None, sequence_length=sample_length, sequence_stride=sample_length) + + for batch in zip(input_dataset, target_dataset): + inputs, targets = batch + assert np.array_equal(inputs[0], X[:sample_length]) + + # second sample equals output timestamps 20-40 + assert np.array_equal(targets[1], Y[sample_length:2*sample_length]) + break + ``` + """ + if start_index: + if start_index < 0: + raise ValueError( + "`start_index` must be 0 or greater. Received: " + f"start_index={start_index}" + ) + if start_index >= len(data): + raise ValueError( + "`start_index` must be lower than the length of the " + f"data. Received: start_index={start_index}, for data " + f"of length {len(data)}" + ) + if end_index: + if start_index and end_index <= start_index: + raise ValueError( + "`end_index` must be higher than `start_index`. " + f"Received: start_index={start_index}, and " + f"end_index={end_index} " + ) + if end_index >= len(data): + raise ValueError( + "`end_index` must be lower than the length of the " + f"data. Received: end_index={end_index}, for data of " + f"length {len(data)}" + ) + if end_index <= 0: + raise ValueError( + "`end_index` must be higher than 0. " + f"Received: end_index={end_index}" + ) + + # Validate strides + if sampling_rate <= 0: + raise ValueError( + "`sampling_rate` must be higher than 0. Received: " + f"sampling_rate={sampling_rate}" + ) + if sampling_rate >= len(data): + raise ValueError( + "`sampling_rate` must be lower than the length of the " + f"data. Received: sampling_rate={sampling_rate}, for data " + f"of length {len(data)}" + ) + if sequence_stride <= 0: + raise ValueError( + "`sequence_stride` must be higher than 0. Received: " + f"sequence_stride={sequence_stride}" + ) + if sequence_stride >= len(data): + raise ValueError( + "`sequence_stride` must be lower than the length of the " + f"data. Received: sequence_stride={sequence_stride}, for " + f"data of length {len(data)}" + ) + + if start_index is None: + start_index = 0 + if end_index is None: + end_index = len(data) + + # Determine the lowest dtype to store start positions (to lower memory + # usage). + num_seqs = end_index - start_index - (sequence_length - 1) * sampling_rate + if targets is not None: + num_seqs = min(num_seqs, len(targets)) + if num_seqs < 2147483647: + index_dtype = "int32" + else: + index_dtype = "int64" + + # Generate start positions + start_positions = np.arange(0, num_seqs, sequence_stride, dtype=index_dtype) + if shuffle: + if seed is None: + seed = np.random.randint(1e6) + rng = np.random.RandomState(seed) + rng.shuffle(start_positions) + + sequence_length = tf.cast(sequence_length, dtype=index_dtype) + sampling_rate = tf.cast(sampling_rate, dtype=index_dtype) + + positions_ds = tf.data.Dataset.from_tensors(start_positions).repeat() + + # For each initial window position, generates indices of the window elements indices = tf.data.Dataset.zip( - (tf.data.Dataset.range(len(start_positions)), positions_ds)).map( + (tf.data.Dataset.range(len(start_positions)), positions_ds) + ).map( + lambda i, positions: tf.range( + positions[i], + positions[i] + sequence_length * sampling_rate, + sampling_rate, + ), + num_parallel_calls=tf.data.AUTOTUNE, + ) + + dataset = sequences_from_indices(data, indices, start_index, end_index) + if targets is not None: + indices = tf.data.Dataset.zip( + (tf.data.Dataset.range(len(start_positions)), positions_ds) + ).map( lambda i, positions: positions[i], - num_parallel_calls=tf.data.AUTOTUNE) - target_ds = sequences_from_indices( - targets, indices, start_index, end_index) - dataset = tf.data.Dataset.zip((dataset, target_ds)) - dataset = dataset.prefetch(tf.data.AUTOTUNE) - if batch_size is not None: - if shuffle: - # Shuffle locally at each iteration - dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) - dataset = dataset.batch(batch_size) - else: - if shuffle: - dataset = dataset.shuffle(buffer_size=1024, seed=seed) - return dataset + num_parallel_calls=tf.data.AUTOTUNE, + ) + target_ds = sequences_from_indices( + targets, indices, start_index, end_index + ) + dataset = tf.data.Dataset.zip((dataset, target_ds)) + dataset = dataset.prefetch(tf.data.AUTOTUNE) + if batch_size is not None: + if shuffle: + # Shuffle locally at each iteration + dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed) + dataset = dataset.batch(batch_size) + else: + if shuffle: + dataset = dataset.shuffle(buffer_size=1024, seed=seed) + return dataset def sequences_from_indices(array, indices_ds, start_index, end_index): - dataset = tf.data.Dataset.from_tensors(array[start_index : end_index]) - dataset = tf.data.Dataset.zip((dataset.repeat(), indices_ds)).map( - lambda steps, inds: tf.gather(steps, inds), # pylint: disable=unnecessary-lambda - num_parallel_calls=tf.data.AUTOTUNE) - return dataset + dataset = tf.data.Dataset.from_tensors(array[start_index:end_index]) + dataset = tf.data.Dataset.zip((dataset.repeat(), indices_ds)).map( + lambda steps, inds: tf.gather(steps, inds), + num_parallel_calls=tf.data.AUTOTUNE, + ) + return dataset diff --git a/keras/utils/timeseries_dataset_test.py b/keras/utils/timeseries_dataset_test.py index cda8db59c343..77f6acd33d3a 100644 --- a/keras/utils/timeseries_dataset_test.py +++ b/keras/utils/timeseries_dataset_test.py @@ -14,175 +14,213 @@ # ============================================================================== """Tests for timeseries_dataset.""" +import numpy as np import tensorflow.compat.v2 as tf -import numpy as np from keras.testing_infra import test_utils from keras.utils import timeseries_dataset @test_utils.run_v2_only class TimeseriesDatasetTest(tf.test.TestCase): + def test_basics(self): + # Test ordering, targets, sequence length, batch size + data = np.arange(100) + targets = data * 2 + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, targets, sequence_length=9, batch_size=5 + ) + # Expect 19 batches + for i, batch in enumerate(dataset): + self.assertLen(batch, 2) + inputs, targets = batch + if i < 18: + self.assertEqual(inputs.shape, (5, 9)) + if i == 18: + # Last batch: size 2 + self.assertEqual(inputs.shape, (2, 9)) + # Check target values + self.assertAllClose(targets, inputs[:, 0] * 2) + for j in range(min(5, len(inputs))): + # Check each sample in the batch + self.assertAllClose( + inputs[j], np.arange(i * 5 + j, i * 5 + j + 9) + ) - def test_basics(self): - # Test ordering, targets, sequence length, batch size - data = np.arange(100) - targets = data * 2 - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, targets, sequence_length=9, batch_size=5) - # Expect 19 batches - for i, batch in enumerate(dataset): - self.assertLen(batch, 2) - inputs, targets = batch - if i < 18: - self.assertEqual(inputs.shape, (5, 9)) - if i == 18: - # Last batch: size 2 - self.assertEqual(inputs.shape, (2, 9)) - # Check target values - self.assertAllClose(targets, inputs[:, 0] * 2) - for j in range(min(5, len(inputs))): - # Check each sample in the batch - self.assertAllClose(inputs[j], np.arange(i * 5 + j, i * 5 + j + 9)) + def test_timeseries_regression(self): + # Test simple timeseries regression use case + data = np.arange(10) + offset = 3 + targets = data[offset:] + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, targets, sequence_length=offset, batch_size=1 + ) + i = 0 + for batch in dataset: + self.assertLen(batch, 2) + inputs, targets = batch + self.assertEqual(inputs.shape, (1, 3)) + # Check values + self.assertAllClose(targets[0], data[offset + i]) + self.assertAllClose(inputs[0], data[i : i + offset]) + i += 1 + self.assertEqual(i, 7) # Expect 7 batches - def test_timeseries_regression(self): - # Test simple timeseries regression use case - data = np.arange(10) - offset = 3 - targets = data[offset:] - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, targets, sequence_length=offset, batch_size=1) - i = 0 - for batch in dataset: - self.assertLen(batch, 2) - inputs, targets = batch - self.assertEqual(inputs.shape, (1, 3)) - # Check values - self.assertAllClose(targets[0], data[offset + i]) - self.assertAllClose(inputs[0], data[i : i + offset]) - i += 1 - self.assertEqual(i, 7) # Expect 7 batches + def test_no_targets(self): + data = np.arange(50) + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, None, sequence_length=10, batch_size=5 + ) + # Expect 9 batches + i = None + for i, batch in enumerate(dataset): + if i < 8: + self.assertEqual(batch.shape, (5, 10)) + elif i == 8: + self.assertEqual(batch.shape, (1, 10)) + for j in range(min(5, len(batch))): + # Check each sample in the batch + self.assertAllClose( + batch[j], np.arange(i * 5 + j, i * 5 + j + 10) + ) + self.assertEqual(i, 8) - def test_no_targets(self): - data = np.arange(50) - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, None, sequence_length=10, batch_size=5) - # Expect 9 batches - i = None - for i, batch in enumerate(dataset): - if i < 8: - self.assertEqual(batch.shape, (5, 10)) - elif i == 8: - self.assertEqual(batch.shape, (1, 10)) - for j in range(min(5, len(batch))): - # Check each sample in the batch - self.assertAllClose(batch[j], np.arange(i * 5 + j, i * 5 + j + 10)) - self.assertEqual(i, 8) + def test_shuffle(self): + # Test cross-epoch random order and seed determinism + data = np.arange(10) + targets = data * 2 + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, + targets, + sequence_length=5, + batch_size=1, + shuffle=True, + seed=123, + ) + first_seq = None + for x, y in dataset.take(1): + self.assertNotAllClose(x, np.arange(0, 5)) + self.assertAllClose(x[:, 0] * 2, y) + first_seq = x + # Check that a new iteration with the same dataset yields different + # results + for x, _ in dataset.take(1): + self.assertNotAllClose(x, first_seq) + # Check determism with same seed + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, + targets, + sequence_length=5, + batch_size=1, + shuffle=True, + seed=123, + ) + for x, _ in dataset.take(1): + self.assertAllClose(x, first_seq) - def test_shuffle(self): - # Test cross-epoch random order and seed determinism - data = np.arange(10) - targets = data * 2 - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, targets, sequence_length=5, batch_size=1, shuffle=True, seed=123) - first_seq = None - for x, y in dataset.take(1): - self.assertNotAllClose(x, np.arange(0, 5)) - self.assertAllClose(x[:, 0] * 2, y) - first_seq = x - # Check that a new iteration with the same dataset yields different results - for x, _ in dataset.take(1): - self.assertNotAllClose(x, first_seq) - # Check determism with same seed - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, targets, sequence_length=5, batch_size=1, shuffle=True, seed=123) - for x, _ in dataset.take(1): - self.assertAllClose(x, first_seq) + def test_sampling_rate(self): + data = np.arange(100) + targets = data * 2 + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, targets, sequence_length=9, batch_size=5, sampling_rate=2 + ) + for i, batch in enumerate(dataset): + self.assertLen(batch, 2) + inputs, targets = batch + if i < 16: + self.assertEqual(inputs.shape, (5, 9)) + if i == 16: + # Last batch: size 4 + self.assertEqual(inputs.shape, (4, 9)) + # Check target values + self.assertAllClose(inputs[:, 0] * 2, targets) + for j in range(min(5, len(inputs))): + # Check each sample in the batch + start_index = i * 5 + j + end_index = start_index + 9 * 2 + self.assertAllClose( + inputs[j], np.arange(start_index, end_index, 2) + ) - def test_sampling_rate(self): - data = np.arange(100) - targets = data * 2 - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, targets, sequence_length=9, batch_size=5, sampling_rate=2) - for i, batch in enumerate(dataset): - self.assertLen(batch, 2) - inputs, targets = batch - if i < 16: - self.assertEqual(inputs.shape, (5, 9)) - if i == 16: - # Last batch: size 3 - self.assertEqual(inputs.shape, (3, 9)) - # Check target values - self.assertAllClose(inputs[:, 0] * 2, targets) - for j in range(min(5, len(inputs))): - # Check each sample in the batch - start_index = i * 5 + j - end_index = start_index + 9 * 2 - self.assertAllClose(inputs[j], np.arange(start_index, end_index, 2)) + def test_sequence_stride(self): + data = np.arange(100) + targets = data * 2 + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, targets, sequence_length=9, batch_size=5, sequence_stride=3 + ) + for i, batch in enumerate(dataset): + self.assertLen(batch, 2) + inputs, targets = batch + if i < 6: + self.assertEqual(inputs.shape, (5, 9)) + if i == 6: + # Last batch: size 1 + self.assertEqual(inputs.shape, (1, 9)) + # Check target values + self.assertAllClose(inputs[:, 0] * 2, targets) + for j in range(min(5, len(inputs))): + # Check each sample in the batch + start_index = i * 5 * 3 + j * 3 + end_index = start_index + 9 + self.assertAllClose( + inputs[j], np.arange(start_index, end_index) + ) - def test_sequence_stride(self): - data = np.arange(100) - targets = data * 2 - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, targets, sequence_length=9, batch_size=5, sequence_stride=3) - for i, batch in enumerate(dataset): - self.assertLen(batch, 2) - inputs, targets = batch - if i < 6: - self.assertEqual(inputs.shape, (5, 9)) - if i == 6: - # Last batch: size 1 - self.assertEqual(inputs.shape, (1, 9)) - # Check target values - self.assertAllClose(inputs[:, 0] * 2, targets) - for j in range(min(5, len(inputs))): - # Check each sample in the batch - start_index = i * 5 * 3 + j * 3 - end_index = start_index + 9 - self.assertAllClose(inputs[j], - np.arange(start_index, end_index)) + def test_start_and_end_index(self): + data = np.arange(100) + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, + None, + sequence_length=9, + batch_size=5, + sequence_stride=3, + sampling_rate=2, + start_index=10, + end_index=90, + ) + for batch in dataset: + self.assertAllLess(batch[0], 90) + self.assertAllGreater(batch[0], 9) - def test_start_and_end_index(self): - data = np.arange(100) - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, None, - sequence_length=9, batch_size=5, sequence_stride=3, sampling_rate=2, - start_index=10, end_index=90) - for batch in dataset: - self.assertAllLess(batch[0], 90) - self.assertAllGreater(batch[0], 9) + def test_errors(self): + # bad start index + with self.assertRaisesRegex(ValueError, "`start_index` must be "): + _ = timeseries_dataset.timeseries_dataset_from_array( + np.arange(10), None, 3, start_index=-1 + ) + with self.assertRaisesRegex(ValueError, "`start_index` must be "): + _ = timeseries_dataset.timeseries_dataset_from_array( + np.arange(10), None, 3, start_index=11 + ) + # bad end index + with self.assertRaisesRegex(ValueError, "`end_index` must be "): + _ = timeseries_dataset.timeseries_dataset_from_array( + np.arange(10), None, 3, end_index=-1 + ) + with self.assertRaisesRegex(ValueError, "`end_index` must be "): + _ = timeseries_dataset.timeseries_dataset_from_array( + np.arange(10), None, 3, end_index=11 + ) + # bad sampling_rate + with self.assertRaisesRegex(ValueError, "`sampling_rate` must be "): + _ = timeseries_dataset.timeseries_dataset_from_array( + np.arange(10), None, 3, sampling_rate=0 + ) + # bad sequence stride + with self.assertRaisesRegex(ValueError, "`sequence_stride` must be "): + _ = timeseries_dataset.timeseries_dataset_from_array( + np.arange(10), None, 3, sequence_stride=0 + ) - def test_errors(self): - # bad start index - with self.assertRaisesRegex(ValueError, '`start_index` must be '): - _ = timeseries_dataset.timeseries_dataset_from_array( - np.arange(10), None, 3, start_index=-1) - with self.assertRaisesRegex(ValueError, '`start_index` must be '): - _ = timeseries_dataset.timeseries_dataset_from_array( - np.arange(10), None, 3, start_index=11) - # bad end index - with self.assertRaisesRegex(ValueError, '`end_index` must be '): - _ = timeseries_dataset.timeseries_dataset_from_array( - np.arange(10), None, 3, end_index=-1) - with self.assertRaisesRegex(ValueError, '`end_index` must be '): - _ = timeseries_dataset.timeseries_dataset_from_array( - np.arange(10), None, 3, end_index=11) - # bad sampling_rate - with self.assertRaisesRegex(ValueError, '`sampling_rate` must be '): - _ = timeseries_dataset.timeseries_dataset_from_array( - np.arange(10), None, 3, sampling_rate=0) - # bad sequence stride - with self.assertRaisesRegex(ValueError, '`sequence_stride` must be '): - _ = timeseries_dataset.timeseries_dataset_from_array( - np.arange(10), None, 3, sequence_stride=0) + def test_not_batched(self): + data = np.arange(100) - def test_not_batched(self): - data = np.arange(100) + dataset = timeseries_dataset.timeseries_dataset_from_array( + data, None, sequence_length=9, batch_size=None, shuffle=True + ) + sample = next(iter(dataset)) + self.assertEqual(len(sample.shape), 1) - dataset = timeseries_dataset.timeseries_dataset_from_array( - data, None, sequence_length=9, batch_size=None, shuffle=True) - sample = next(iter(dataset)) - self.assertEqual(len(sample.shape), 1) -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/traceback_utils.py b/keras/utils/traceback_utils.py index 31bc1e5f12a3..6cbc804319e7 100644 --- a/keras/utils/traceback_utils.py +++ b/keras/utils/traceback_utils.py @@ -19,142 +19,150 @@ import sys import traceback import types -import tensorflow.compat.v2 as tf +import tensorflow.compat.v2 as tf _EXCLUDED_PATHS = ( - os.path.abspath(os.path.join(__file__, '..', '..')), - os.path.join('tensorflow', 'python'), + os.path.abspath(os.path.join(__file__, "..", "..")), + os.path.join("tensorflow", "python"), ) def include_frame(fname): - for exclusion in _EXCLUDED_PATHS: - if exclusion in fname: - return False - return True + for exclusion in _EXCLUDED_PATHS: + if exclusion in fname: + return False + return True def _process_traceback_frames(tb): - """Iterate through traceback frames and return a new, filtered traceback.""" - last_tb = None - tb_list = list(traceback.walk_tb(tb)) - for f, line_no in reversed(tb_list): - if include_frame(f.f_code.co_filename): - last_tb = types.TracebackType(last_tb, f, f.f_lasti, line_no) - if last_tb is None and tb_list: - # If no frames were kept during filtering, create a new traceback - # from the outermost function. - f, line_no = tb_list[-1] - last_tb = types.TracebackType(last_tb, f, f.f_lasti, line_no) - return last_tb + """Iterate through traceback frames and return a new, filtered traceback.""" + last_tb = None + tb_list = list(traceback.walk_tb(tb)) + for f, line_no in reversed(tb_list): + if include_frame(f.f_code.co_filename): + last_tb = types.TracebackType(last_tb, f, f.f_lasti, line_no) + if last_tb is None and tb_list: + # If no frames were kept during filtering, create a new traceback + # from the outermost function. + f, line_no = tb_list[-1] + last_tb = types.TracebackType(last_tb, f, f.f_lasti, line_no) + return last_tb def filter_traceback(fn): - """Filter out Keras-internal stack trace frames in exceptions raised by fn.""" - if sys.version_info.major != 3 or sys.version_info.minor < 7: - return fn + """Filter out Keras-internal stack trace frames in exceptions raised by + fn.""" + if sys.version_info.major != 3 or sys.version_info.minor < 7: + return fn - def error_handler(*args, **kwargs): - if not tf.debugging.is_traceback_filtering_enabled(): - return fn(*args, **kwargs) + def error_handler(*args, **kwargs): + if not tf.debugging.is_traceback_filtering_enabled(): + return fn(*args, **kwargs) - filtered_tb = None - try: - return fn(*args, **kwargs) - except Exception as e: # pylint: disable=broad-except - filtered_tb = _process_traceback_frames(e.__traceback__) - # To get the full stack trace, call: - # `tf.debugging.disable_traceback_filtering()` - raise e.with_traceback(filtered_tb) from None - finally: - del filtered_tb + filtered_tb = None + try: + return fn(*args, **kwargs) + except Exception as e: + filtered_tb = _process_traceback_frames(e.__traceback__) + # To get the full stack trace, call: + # `tf.debugging.disable_traceback_filtering()` + raise e.with_traceback(filtered_tb) from None + finally: + del filtered_tb - return tf.__internal__.decorator.make_decorator(fn, error_handler) + return tf.__internal__.decorator.make_decorator(fn, error_handler) def inject_argument_info_in_traceback(fn, object_name=None): - """Add information about call argument values to an error message. - - Arguments: - fn: Function to wrap. Exceptions raised by the this function will be - re-raised with additional information added to the error message, - displaying the values of the different arguments that the function - was called with. - object_name: String, display name of the class/function being called, - e.g. `'layer "layer_name" (LayerClass)'`. - - Returns: - A wrapped version of `fn`. - """ - def error_handler(*args, **kwargs): - signature = None - bound_signature = None - try: - return fn(*args, **kwargs) - except Exception as e: # pylint: disable=broad-except - if hasattr(e, '_keras_call_info_injected'): - # Only inject info for the innermost failing call - raise e - signature = inspect.signature(fn) - try: - # The first argument is `self`, so filter it out - bound_signature = signature.bind(*args, **kwargs) - except TypeError: - # Likely unbindable arguments - raise e - - # Add argument context - arguments_context = [] - for arg in list(signature.parameters.values()): - if arg.name in bound_signature.arguments: - value = tf.nest.map_structure( - format_argument_value, bound_signature.arguments[arg.name]) - else: - value = arg.default - arguments_context.append(f' • {arg.name}={value}') - - if arguments_context: - arguments_context = '\n'.join(arguments_context) - # Get original error message and append information to it. - if isinstance(e, tf.errors.OpError): - message = e.message - elif e.args: - # Canonically, the 1st argument in an exception is the error message. - # This works for all built-in Python exceptions. - message = e.args[0] - else: - message = '' - display_name = f'{object_name if object_name else fn.__name__}' - message = ( - f'Exception encountered when calling {display_name}.\n\n' - f'{message}\n\n' - f'Call arguments received by {display_name}:\n' - f'{arguments_context}') - - # Reraise exception, with added context - if isinstance(e, tf.errors.OpError): - new_e = e.__class__(e.node_def, e.op, message, e.error_code) - else: - try: - # For standard exceptions such as ValueError, TypeError, etc. - new_e = e.__class__(message) - except TypeError: - # For any custom error that doesn't have a standard signature. - new_e = RuntimeError(message) - new_e._keras_call_info_injected = True # pylint: disable=protected-access - else: - new_e = e - raise new_e.with_traceback(e.__traceback__) from None - finally: - del signature - del bound_signature - return tf.__internal__.decorator.make_decorator(fn, error_handler) + """Add information about call argument values to an error message. + + Arguments: + fn: Function to wrap. Exceptions raised by the this function will be + re-raised with additional information added to the error message, + displaying the values of the different arguments that the function + was called with. + object_name: String, display name of the class/function being called, + e.g. `'layer "layer_name" (LayerClass)'`. + + Returns: + A wrapped version of `fn`. + """ + + def error_handler(*args, **kwargs): + signature = None + bound_signature = None + try: + return fn(*args, **kwargs) + except Exception as e: + if hasattr(e, "_keras_call_info_injected"): + # Only inject info for the innermost failing call + raise e + signature = inspect.signature(fn) + try: + # The first argument is `self`, so filter it out + bound_signature = signature.bind(*args, **kwargs) + except TypeError: + # Likely unbindable arguments + raise e + + # Add argument context + arguments_context = [] + for arg in list(signature.parameters.values()): + if arg.name in bound_signature.arguments: + value = tf.nest.map_structure( + format_argument_value, + bound_signature.arguments[arg.name], + ) + else: + value = arg.default + arguments_context.append(f" • {arg.name}={value}") + + if arguments_context: + arguments_context = "\n".join(arguments_context) + # Get original error message and append information to it. + if isinstance(e, tf.errors.OpError): + message = e.message + elif e.args: + # Canonically, the 1st argument in an exception is the error + # message. This works for all built-in Python exceptions. + message = e.args[0] + else: + message = "" + display_name = f"{object_name if object_name else fn.__name__}" + message = ( + f"Exception encountered when calling {display_name}.\n\n" + f"{message}\n\n" + f"Call arguments received by {display_name}:\n" + f"{arguments_context}" + ) + + # Reraise exception, with added context + if isinstance(e, tf.errors.OpError): + new_e = e.__class__(e.node_def, e.op, message, e.error_code) + else: + try: + # For standard exceptions such as ValueError, TypeError, + # etc. + new_e = e.__class__(message) + except TypeError: + # For any custom error that doesn't have a standard + # signature. + new_e = RuntimeError(message) + new_e._keras_call_info_injected = True + else: + new_e = e + raise new_e.with_traceback(e.__traceback__) from None + finally: + del signature + del bound_signature + + return tf.__internal__.decorator.make_decorator(fn, error_handler) def format_argument_value(value): - if isinstance(value, tf.Tensor): - # Simplified representation for eager / graph tensors - # to keep messages readable - return f'tf.Tensor(shape={value.shape}, dtype={value.dtype.name})' - return repr(value) + if isinstance(value, tf.Tensor): + # Simplified representation for eager / graph tensors + # to keep messages readable + return f"tf.Tensor(shape={value.shape}, dtype={value.dtype.name})" + return repr(value) diff --git a/keras/utils/traceback_utils_test.py b/keras/utils/traceback_utils_test.py index cb7cd449c71e..cb223f38b313 100644 --- a/keras/utils/traceback_utils_test.py +++ b/keras/utils/traceback_utils_test.py @@ -14,180 +14,176 @@ # ============================================================================== """Tests for traceback_utils.""" +import tensorflow.compat.v2 as tf + from keras import layers from keras.utils import traceback_utils -import tensorflow.compat.v2 as tf class TracebackUtilsTest(tf.test.TestCase): - - def test_info_injection_basics(self): - def error_fn(arg_1, arg_2, keyword_arg_1=None, keyword_arg_2=None): - raise ValueError('Original message') - - with self.assertRaises(ValueError) as e: - traceback_utils.inject_argument_info_in_traceback( - error_fn, 'ObjName')(1, 2, keyword_arg_1=3, keyword_arg_2=4) - self.assertIn('Original message', str(e.exception)) - self.assertIn('Exception encountered when calling ObjName', - str(e.exception)) - self.assertIn('Call arguments received', str(e.exception)) - self.assertIn('arg_1=1', str(e.exception)) - self.assertIn('arg_2=2', str(e.exception)) - self.assertIn('keyword_arg_1=3', str(e.exception)) - self.assertIn('keyword_arg_2=4', str(e.exception)) - - with self.assertRaises(ValueError) as e: - traceback_utils.inject_argument_info_in_traceback( - error_fn)(1, 2, keyword_arg_1=3, keyword_arg_2=4) - self.assertIn('Exception encountered when calling error_fn', - str(e.exception)) - - def test_info_injection_no_args(self): - def error_fn(): - raise ValueError('Original message') - - with self.assertRaises(ValueError) as e: - traceback_utils.inject_argument_info_in_traceback(error_fn)() - self.assertEqual(str(e.exception).count('Call arguments received'), 0) - - def test_info_injection_unbindable(self): - def error_fn(arg_1, keyword_arg_1=1): - return arg_1 + keyword_arg_1 - - with self.assertRaises(TypeError) as e: - traceback_utils.inject_argument_info_in_traceback(error_fn)() - self.assertIn('missing 1 required positional argument', str(e.exception)) - - def test_info_injection_nested(self): - def inner_fn(arg_1): - raise ValueError('Original message') - - def outer_fn(arg_1): - return inner_fn(arg_1) - - with self.assertRaises(ValueError) as e: - traceback_utils.inject_argument_info_in_traceback( - outer_fn)(1) - self.assertEqual(str(e.exception).count('Call arguments received'), 1) - - def test_info_injection_tf_op_error(self): - def error_fn(arg_1, keyword_arg_1=1): - return arg_1 + keyword_arg_1 + tf.zeros((2, 3)) - - with self.assertRaises(tf.errors.InvalidArgumentError) as e: - traceback_utils.inject_argument_info_in_traceback(error_fn)( - tf.zeros((3, 3))) - self.assertIn('Incompatible shapes', str(e.exception)) - self.assertIn('Call arguments received', str(e.exception)) + def test_info_injection_basics(self): + def error_fn(arg_1, arg_2, keyword_arg_1=None, keyword_arg_2=None): + raise ValueError("Original message") + + with self.assertRaises(ValueError) as e: + traceback_utils.inject_argument_info_in_traceback( + error_fn, "ObjName" + )(1, 2, keyword_arg_1=3, keyword_arg_2=4) + self.assertIn("Original message", str(e.exception)) + self.assertIn( + "Exception encountered when calling ObjName", str(e.exception) + ) + self.assertIn("Call arguments received", str(e.exception)) + self.assertIn("arg_1=1", str(e.exception)) + self.assertIn("arg_2=2", str(e.exception)) + self.assertIn("keyword_arg_1=3", str(e.exception)) + self.assertIn("keyword_arg_2=4", str(e.exception)) + + with self.assertRaises(ValueError) as e: + traceback_utils.inject_argument_info_in_traceback(error_fn)( + 1, 2, keyword_arg_1=3, keyword_arg_2=4 + ) + self.assertIn( + "Exception encountered when calling error_fn", str(e.exception) + ) + + def test_info_injection_no_args(self): + def error_fn(): + raise ValueError("Original message") + + with self.assertRaises(ValueError) as e: + traceback_utils.inject_argument_info_in_traceback(error_fn)() + self.assertEqual(str(e.exception).count("Call arguments received"), 0) + + def test_info_injection_unbindable(self): + def error_fn(arg_1, keyword_arg_1=1): + return arg_1 + keyword_arg_1 + + with self.assertRaises(TypeError) as e: + traceback_utils.inject_argument_info_in_traceback(error_fn)() + self.assertIn( + "missing 1 required positional argument", str(e.exception) + ) + + def test_info_injection_nested(self): + def inner_fn(arg_1): + raise ValueError("Original message") + + def outer_fn(arg_1): + return inner_fn(arg_1) + + with self.assertRaises(ValueError) as e: + traceback_utils.inject_argument_info_in_traceback(outer_fn)(1) + self.assertEqual(str(e.exception).count("Call arguments received"), 1) + + def test_info_injection_tf_op_error(self): + def error_fn(arg_1, keyword_arg_1=1): + return arg_1 + keyword_arg_1 + tf.zeros((2, 3)) + + with self.assertRaises(tf.errors.InvalidArgumentError) as e: + traceback_utils.inject_argument_info_in_traceback(error_fn)( + tf.zeros((3, 3)) + ) + self.assertIn("Incompatible shapes", str(e.exception)) + self.assertIn("Call arguments received", str(e.exception)) class LayerCallInfoInjectionTest(tf.test.TestCase): - - def assert_info_injected(self, fn): - tf.debugging.enable_traceback_filtering() - try: - fn() - except Exception as e: # pylint: disable=broad-except - # Info should be injected exactly once. - self.assertEqual(str(e).count('Call arguments received'), 1) # pylint: disable=g-assert-in-except - - def test_custom_layer_call_nested(self): - - class InnerLayer(layers.Layer): - - def call(self, inputs, training=False, mask=None): - return inputs + tf.zeros((3, 4)) - - class OuterLayer(layers.Layer): - - def __init__(self): - super().__init__() - self.inner = InnerLayer() - - def call(self, inputs, training=True): - return self.inner(inputs) - - def fn(): - layer = OuterLayer() - layer(tf.zeros((3, 5)), training=False) - - self.assert_info_injected(fn) - - def test_custom_layer_call_eager_dense_input(self): - - class MyLayer(layers.Layer): - - def call(self, inputs, training=False, mask=None): - return inputs + tf.zeros((3, 4)) - - def fn(): - layer = MyLayer() - layer(tf.zeros((3, 5)), training=False) - - self.assert_info_injected(fn) - - def test_custom_layer_call_eager_sparse_input(self): - - class MyLayer(layers.Layer): - - def call(self, inputs, training=False, mask=None): - return inputs + tf.zeros((3, 4)) - - def fn(): - layer = MyLayer() - layer( - tf.SparseTensor(indices=[[0, 0]], values=[1], dense_shape=[3, 5]), - training=False) - - self.assert_info_injected(fn) - - def test_custom_layer_call_eager_ragged_input(self): - - class MyLayer(layers.Layer): - - def call(self, inputs, training=False, mask=None): - return inputs + tf.zeros((3, 4)) - - def fn(): - layer = MyLayer() - layer(tf.ragged.constant([[0, 0, 0], [0, 0]]), training=False) - - self.assert_info_injected(fn) - - def test_custom_layer_call_symbolic(self): - - class MyLayer(layers.Layer): - - def call(self, inputs, training=False, mask=None): - return inputs + tf.zeros((3, 4)) - - def fn(): - layer = MyLayer() - layer(layers.Input((3, 5)), training=False) - - self.assert_info_injected(fn) - - def test_custom_layer_call_unbindable(self): - - class MyLayer(layers.Layer): - - def __init__(self): - super().__init__() - self.input_spec = layers.InputSpec(shape=(3, 4)) - - def call(self, inputs, training=False, mask=None): - return inputs + tf.zeros((3, 4)) - - def fn(): - layer = MyLayer() - layer(bad=True, arguments=True) - - with self.assertRaisesRegex( - ValueError, 'The first argument to `Layer.call` must always'): - fn() - - -if __name__ == '__main__': - if tf.__internal__.tf2.enabled(): - tf.test.main() + def assert_info_injected(self, fn): + tf.debugging.enable_traceback_filtering() + try: + fn() + except Exception as e: + # Info should be injected exactly once. + self.assertEqual(str(e).count("Call arguments received"), 1) + + def test_custom_layer_call_nested(self): + class InnerLayer(layers.Layer): + def call(self, inputs, training=False, mask=None): + return inputs + tf.zeros((3, 4)) + + class OuterLayer(layers.Layer): + def __init__(self): + super().__init__() + self.inner = InnerLayer() + + def call(self, inputs, training=True): + return self.inner(inputs) + + def fn(): + layer = OuterLayer() + layer(tf.zeros((3, 5)), training=False) + + self.assert_info_injected(fn) + + def test_custom_layer_call_eager_dense_input(self): + class MyLayer(layers.Layer): + def call(self, inputs, training=False, mask=None): + return inputs + tf.zeros((3, 4)) + + def fn(): + layer = MyLayer() + layer(tf.zeros((3, 5)), training=False) + + self.assert_info_injected(fn) + + def test_custom_layer_call_eager_sparse_input(self): + class MyLayer(layers.Layer): + def call(self, inputs, training=False, mask=None): + return inputs + tf.zeros((3, 4)) + + def fn(): + layer = MyLayer() + layer( + tf.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[3, 5] + ), + training=False, + ) + + self.assert_info_injected(fn) + + def test_custom_layer_call_eager_ragged_input(self): + class MyLayer(layers.Layer): + def call(self, inputs, training=False, mask=None): + return inputs + tf.zeros((3, 4)) + + def fn(): + layer = MyLayer() + layer(tf.ragged.constant([[0, 0, 0], [0, 0]]), training=False) + + self.assert_info_injected(fn) + + def test_custom_layer_call_symbolic(self): + class MyLayer(layers.Layer): + def call(self, inputs, training=False, mask=None): + return inputs + tf.zeros((3, 4)) + + def fn(): + layer = MyLayer() + layer(layers.Input((3, 5)), training=False) + + self.assert_info_injected(fn) + + def test_custom_layer_call_unbindable(self): + class MyLayer(layers.Layer): + def __init__(self): + super().__init__() + self.input_spec = layers.InputSpec(shape=(3, 4)) + + def call(self, inputs, training=False, mask=None): + return inputs + tf.zeros((3, 4)) + + def fn(): + layer = MyLayer() + layer(bad=True, arguments=True) + + with self.assertRaisesRegex( + ValueError, "The first argument to `Layer.call` must always" + ): + fn() + + +if __name__ == "__main__": + if tf.__internal__.tf2.enabled(): + tf.test.main() diff --git a/keras/utils/version_utils.py b/keras/utils/version_utils.py index f17107877487..ba73509210b1 100644 --- a/keras/utils/version_utils.py +++ b/keras/utils/version_utils.py @@ -12,121 +12,119 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access + """Utilities for Keras classes with v1 and v2 versions.""" import tensorflow.compat.v2 as tf + from keras.utils.generic_utils import LazyLoader # TODO(b/134426265): Switch back to single-quotes once the issue # with copybara is fixed. -# pylint: disable=g-inconsistent-quotes -training = LazyLoader( - "training", globals(), - "keras.engine.training") -training_v1 = LazyLoader( - "training_v1", globals(), - "keras.engine.training_v1") -base_layer = LazyLoader( - "base_layer", globals(), - "keras.engine.base_layer") -base_layer_v1 = LazyLoader( - "base_layer_v1", globals(), - "keras.engine.base_layer_v1") -callbacks = LazyLoader( - "callbacks", globals(), - "keras.callbacks") -callbacks_v1 = LazyLoader( - "callbacks_v1", globals(), - "keras.callbacks_v1") - -# pylint: enable=g-inconsistent-quotes +training = LazyLoader("training", globals(), "keras.engine.training") +training_v1 = LazyLoader("training_v1", globals(), "keras.engine.training_v1") +base_layer = LazyLoader("base_layer", globals(), "keras.engine.base_layer") +base_layer_v1 = LazyLoader( + "base_layer_v1", globals(), "keras.engine.base_layer_v1" +) +callbacks = LazyLoader("callbacks", globals(), "keras.callbacks") +callbacks_v1 = LazyLoader("callbacks_v1", globals(), "keras.callbacks_v1") class ModelVersionSelector: - """Chooses between Keras v1 and v2 Model class.""" + """Chooses between Keras v1 and v2 Model class.""" - def __new__(cls, *args, **kwargs): # pylint: disable=unused-argument - use_v2 = should_use_v2() - cls = swap_class(cls, training.Model, training_v1.Model, use_v2) # pylint: disable=self-cls-assignment - return super(ModelVersionSelector, cls).__new__(cls) + def __new__(cls, *args, **kwargs): + use_v2 = should_use_v2() + cls = swap_class(cls, training.Model, training_v1.Model, use_v2) + return super(ModelVersionSelector, cls).__new__(cls) class LayerVersionSelector: - """Chooses between Keras v1 and v2 Layer class.""" + """Chooses between Keras v1 and v2 Layer class.""" - def __new__(cls, *args, **kwargs): # pylint: disable=unused-argument - use_v2 = should_use_v2() - cls = swap_class(cls, base_layer.Layer, base_layer_v1.Layer, use_v2) # pylint: disable=self-cls-assignment - return super(LayerVersionSelector, cls).__new__(cls) + def __new__(cls, *args, **kwargs): + use_v2 = should_use_v2() + cls = swap_class(cls, base_layer.Layer, base_layer_v1.Layer, use_v2) + return super(LayerVersionSelector, cls).__new__(cls) class TensorBoardVersionSelector: - """Chooses between Keras v1 and v2 TensorBoard callback class.""" - - def __new__(cls, *args, **kwargs): # pylint: disable=unused-argument - use_v2 = should_use_v2() - start_cls = cls - cls = swap_class(start_cls, callbacks.TensorBoard, callbacks_v1.TensorBoard, - use_v2) - if start_cls == callbacks_v1.TensorBoard and cls == callbacks.TensorBoard: - # Since the v2 class is not a subclass of the v1 class, __init__ has to - # be called manually. - return cls(*args, **kwargs) - return super(TensorBoardVersionSelector, cls).__new__(cls) + """Chooses between Keras v1 and v2 TensorBoard callback class.""" + + def __new__(cls, *args, **kwargs): + use_v2 = should_use_v2() + start_cls = cls + cls = swap_class( + start_cls, callbacks.TensorBoard, callbacks_v1.TensorBoard, use_v2 + ) + if ( + start_cls == callbacks_v1.TensorBoard + and cls == callbacks.TensorBoard + ): + # Since the v2 class is not a subclass of the v1 class, __init__ has + # to be called manually. + return cls(*args, **kwargs) + return super(TensorBoardVersionSelector, cls).__new__(cls) def should_use_v2(): - """Determine if v1 or v2 version should be used.""" - if tf.executing_eagerly(): - return True - elif tf.compat.v1.executing_eagerly_outside_functions(): - # Check for a v1 `wrap_function` FuncGraph. - # Code inside a `wrap_function` is treated like v1 code. - graph = tf.compat.v1.get_default_graph() - if (getattr(graph, "name", False) and - graph.name.startswith("wrapped_function")): - return False - return True - else: - return False + """Determine if v1 or v2 version should be used.""" + if tf.executing_eagerly(): + return True + elif tf.compat.v1.executing_eagerly_outside_functions(): + # Check for a v1 `wrap_function` FuncGraph. + # Code inside a `wrap_function` is treated like v1 code. + graph = tf.compat.v1.get_default_graph() + if getattr(graph, "name", False) and graph.name.startswith( + "wrapped_function" + ): + return False + return True + else: + return False def swap_class(cls, v2_cls, v1_cls, use_v2): - """Swaps in v2_cls or v1_cls depending on graph mode.""" - if cls == object: + """Swaps in v2_cls or v1_cls depending on graph mode.""" + if cls == object: + return cls + if cls in (v2_cls, v1_cls): + return v2_cls if use_v2 else v1_cls + + # Recursively search superclasses to swap in the right Keras class. + new_bases = [] + for base in cls.__bases__: + if ( + use_v2 + and issubclass(base, v1_cls) + # `v1_cls` often extends `v2_cls`, so it may still call `swap_class` + # even if it doesn't need to. That being said, it may be the safest + # not to over optimize this logic for the sake of correctness, + # especially if we swap v1 & v2 classes that don't extend each + # other, or when the inheritance order is different. + or (not use_v2 and issubclass(base, v2_cls)) + ): + new_base = swap_class(base, v2_cls, v1_cls, use_v2) + else: + new_base = base + new_bases.append(new_base) + cls.__bases__ = tuple(new_bases) return cls - if cls in (v2_cls, v1_cls): - return v2_cls if use_v2 else v1_cls - - # Recursively search superclasses to swap in the right Keras class. - new_bases = [] - for base in cls.__bases__: - if ((use_v2 and issubclass(base, v1_cls) - # `v1_cls` often extends `v2_cls`, so it may still call `swap_class` - # even if it doesn't need to. That being said, it may be the safest - # not to over optimize this logic for the sake of correctness, - # especially if we swap v1 & v2 classes that don't extend each other, - # or when the inheritance order is different. - or (not use_v2 and issubclass(base, v2_cls)))): - new_base = swap_class(base, v2_cls, v1_cls, use_v2) - else: - new_base = base - new_bases.append(new_base) - cls.__bases__ = tuple(new_bases) - return cls def disallow_legacy_graph(cls_name, method_name): - if not tf.compat.v1.executing_eagerly_outside_functions(): - error_msg = ( - f"Calling `{cls_name}.{method_name}` in graph mode is not supported " - f"when the `{cls_name}` instance was constructed with eager mode " - f"enabled. Please construct your `{cls_name}` instance in graph mode or" - f" call `{cls_name}.{method_name}` with eager mode enabled.") - raise ValueError(error_msg) + if not tf.compat.v1.executing_eagerly_outside_functions(): + error_msg = ( + f"Calling `{cls_name}.{method_name}` in graph mode is not " + f"supported when the `{cls_name}` instance was constructed with " + f"eager mode enabled. Please construct your `{cls_name}` instance " + f"in graph mode or call `{cls_name}.{method_name}` with " + "eager mode enabled." + ) + raise ValueError(error_msg) def is_v1_layer_or_model(obj): - return isinstance(obj, (base_layer_v1.Layer, training_v1.Model)) + return isinstance(obj, (base_layer_v1.Layer, training_v1.Model)) diff --git a/keras/utils/version_utils_test.py b/keras/utils/version_utils_test.py index 176debee170f..6c73cda93a26 100644 --- a/keras/utils/version_utils_test.py +++ b/keras/utils/version_utils_test.py @@ -16,166 +16,160 @@ import abc +import numpy as np +import tensorflow.compat.v2 as tf + import keras from keras.engine import base_layer from keras.engine import base_layer_v1 from keras.engine import training from keras.engine import training_v1 from keras.testing_infra import test_combinations -import numpy as np -import tensorflow.compat.v2 as tf @test_combinations.run_all_keras_modes class SplitUtilsTest(test_combinations.TestCase): - - def _check_model_class(self, model_class): - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertEqual(model_class, training.Model) - else: - self.assertEqual(model_class, training_v1.Model) - - def _check_layer_class(self, layer): - if tf.compat.v1.executing_eagerly_outside_functions(): - self.assertIsInstance(layer, base_layer.Layer) - self.assertNotIsInstance(layer, base_layer_v1.Layer) - else: - self.assertIsInstance(layer, base_layer_v1.Layer) - - def test_functional_model(self): - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - model = keras.Model(inputs, outputs) - self._check_model_class(model.__class__.__bases__[0]) - self._check_layer_class(model) - - def test_subclass_model_with_functional_init(self): - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - - class MyModel(keras.Model): - pass - - model = MyModel(inputs, outputs) - model_class = model.__class__.__bases__[0].__bases__[0] - self._check_model_class(model_class) - self._check_layer_class(model) - - def test_subclass_model_with_functional_init_interleaved_v1_functional(self): - with tf.Graph().as_default(): - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - _ = keras.Model(inputs, outputs) - - inputs = keras.Input(10) - outputs = keras.layers.Dense(1)(inputs) - - class MyModel(keras.Model): - pass - - model = MyModel(inputs, outputs) - model_class = model.__class__.__bases__[0].__bases__[0] - self._check_model_class(model_class) - self._check_layer_class(model) - - def test_sequential_model(self): - model = keras.Sequential([keras.layers.Dense(1)]) - model_class = model.__class__.__bases__[0].__bases__[0] - self._check_model_class(model_class) - self._check_layer_class(model) - - def test_subclass_model(self): - - class MyModel(keras.Model): - - def call(self, x): - return 2 * x - - model = MyModel() - model_class = model.__class__.__bases__[0] - self._check_model_class(model_class) - self._check_layer_class(model) - - def test_layer(self): - class IdentityLayer(base_layer.Layer): - """A layer that returns it's input. - - Useful for testing a layer without a variable. - """ - - def call(self, inputs): - return inputs - - layer = IdentityLayer() - self._check_layer_class(layer) - - def test_multiple_subclass_model(self): - - class Model1(keras.Model): - pass - - class Model2(Model1): - - def call(self, x): - return 2 * x - - model = Model2() - model_class = model.__class__.__bases__[0].__bases__[0] - self._check_model_class(model_class) - self._check_layer_class(model) - - def test_user_provided_metaclass(self): - - class AbstractModel(keras.Model, metaclass=abc.ABCMeta): - - @abc.abstractmethod - def call(self, inputs): - """Calls the model.""" - - class MyModel(AbstractModel): - - def call(self, inputs): - return 2 * inputs - - with self.assertRaisesRegex(TypeError, 'instantiate abstract class'): - AbstractModel() # pylint: disable=abstract-class-instantiated - - model = MyModel() - model_class = model.__class__.__bases__[0].__bases__[0] - self._check_model_class(model_class) - self._check_layer_class(model) - - def test_multiple_inheritance(self): - - class Return2: - - def return_2(self): - return 2 - - class MyModel(keras.Model, Return2): - - def call(self, x): - return self.return_2() * x - - model = MyModel() - bases = model.__class__.__bases__ - self._check_model_class(bases[0]) - self.assertEqual(bases[1], Return2) - self.assertEqual(model.return_2(), 2) - self._check_layer_class(model) - - def test_fit_error(self): - if not tf.compat.v1.executing_eagerly_outside_functions(): - # Error only appears on the v2 class. - return - - model = keras.Sequential([keras.layers.Dense(1)]) - model.compile('sgd', 'mse') - x, y = np.ones((10, 10)), np.ones((10, 1)) - with tf.compat.v1.get_default_graph().as_default(): - with self.assertRaisesRegex( - ValueError, 'instance was constructed with eager mode enabled'): - model.fit(x, y, batch_size=2) - -if __name__ == '__main__': - tf.test.main() + def _check_model_class(self, model_class): + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertEqual(model_class, training.Model) + else: + self.assertEqual(model_class, training_v1.Model) + + def _check_layer_class(self, layer): + if tf.compat.v1.executing_eagerly_outside_functions(): + self.assertIsInstance(layer, base_layer.Layer) + self.assertNotIsInstance(layer, base_layer_v1.Layer) + else: + self.assertIsInstance(layer, base_layer_v1.Layer) + + def test_functional_model(self): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + self._check_model_class(model.__class__.__bases__[0]) + self._check_layer_class(model) + + def test_subclass_model_with_functional_init(self): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + + class MyModel(keras.Model): + pass + + model = MyModel(inputs, outputs) + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_subclass_model_with_functional_init_interleaved_v1_functional( + self, + ): + with tf.Graph().as_default(): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + _ = keras.Model(inputs, outputs) + + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + + class MyModel(keras.Model): + pass + + model = MyModel(inputs, outputs) + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_sequential_model(self): + model = keras.Sequential([keras.layers.Dense(1)]) + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_subclass_model(self): + class MyModel(keras.Model): + def call(self, x): + return 2 * x + + model = MyModel() + model_class = model.__class__.__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_layer(self): + class IdentityLayer(base_layer.Layer): + """A layer that returns it's input. + + Useful for testing a layer without a variable. + """ + + def call(self, inputs): + return inputs + + layer = IdentityLayer() + self._check_layer_class(layer) + + def test_multiple_subclass_model(self): + class Model1(keras.Model): + pass + + class Model2(Model1): + def call(self, x): + return 2 * x + + model = Model2() + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_user_provided_metaclass(self): + class AbstractModel(keras.Model, metaclass=abc.ABCMeta): + @abc.abstractmethod + def call(self, inputs): + """Calls the model.""" + + class MyModel(AbstractModel): + def call(self, inputs): + return 2 * inputs + + with self.assertRaisesRegex(TypeError, "instantiate abstract class"): + AbstractModel() + + model = MyModel() + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_multiple_inheritance(self): + class Return2: + def return_2(self): + return 2 + + class MyModel(keras.Model, Return2): + def call(self, x): + return self.return_2() * x + + model = MyModel() + bases = model.__class__.__bases__ + self._check_model_class(bases[0]) + self.assertEqual(bases[1], Return2) + self.assertEqual(model.return_2(), 2) + self._check_layer_class(model) + + def test_fit_error(self): + if not tf.compat.v1.executing_eagerly_outside_functions(): + # Error only appears on the v2 class. + return + + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile("sgd", "mse") + x, y = np.ones((10, 10)), np.ones((10, 1)) + with tf.compat.v1.get_default_graph().as_default(): + with self.assertRaisesRegex( + ValueError, "instance was constructed with eager mode enabled" + ): + model.fit(x, y, batch_size=2) + + +if __name__ == "__main__": + tf.test.main() diff --git a/keras/utils/vis_utils.py b/keras/utils/vis_utils.py index accf546f1bf6..7cb0115992b2 100644 --- a/keras/utils/vis_utils.py +++ b/keras/utils/vis_utils.py @@ -12,448 +12,486 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=protected-access -# pylint: disable=g-import-not-at-top -"""Utilities related to model visualization.""" -import tensorflow.compat.v2 as tf + +"""Utilities related to model visualization.""" import os import sys -import re + +import tensorflow.compat.v2 as tf from keras.utils import io_utils -from tensorflow.python.util.tf_export import keras_export +from keras.utils import layer_utils +# isort: off +from tensorflow.python.util.tf_export import keras_export try: - # pydot-ng is a fork of pydot that is better maintained. - import pydot_ng as pydot + # pydot-ng is a fork of pydot that is better maintained. + import pydot_ng as pydot except ImportError: - # pydotplus is an improved version of pydot - try: - import pydotplus as pydot - except ImportError: - # Fall back on pydot if necessary. + # pydotplus is an improved version of pydot try: - import pydot + import pydotplus as pydot except ImportError: - pydot = None + # Fall back on pydot if necessary. + try: + import pydot + except ImportError: + pydot = None def check_pydot(): - """Returns True if PyDot is available.""" - return pydot is not None + """Returns True if PyDot is available.""" + return pydot is not None def check_graphviz(): - """Returns True if both PyDot and Graphviz are available.""" - if not check_pydot(): - return False - try: - # Attempt to create an image of a blank graph - # to check the pydot/graphviz installation. - pydot.Dot.create(pydot.Dot()) - return True - except (OSError, pydot.InvocationException): - return False + """Returns True if both PyDot and Graphviz are available.""" + if not check_pydot(): + return False + try: + # Attempt to create an image of a blank graph + # to check the pydot/graphviz installation. + pydot.Dot.create(pydot.Dot()) + return True + except (OSError, pydot.InvocationException): + return False def is_wrapped_model(layer): - from keras.engine import functional - from keras.layers import Wrapper - return (isinstance(layer, Wrapper) and - isinstance(layer.layer, functional.Functional)) + from keras.engine import functional + from keras.layers import Wrapper + + return isinstance(layer, Wrapper) and isinstance( + layer.layer, functional.Functional + ) def add_edge(dot, src, dst): - if not dot.get_edge(src, dst): - dot.add_edge(pydot.Edge(src, dst)) - - -def get_layer_index_bound_by_layer_name(model, layer_names): - """Return specific range of layers to plot, mainly for sub-graph plot models. - - Args: - model: tf.keras.Model - layer_names: unique name of layer of the model, type(str) - - Returns: - return the index value of layer based on its unique name (layer_names) - """ - lower_index = [] - upper_index = [] - for idx, layer in enumerate(model.layers): - if re.match(layer_names[0], layer.name): - lower_index.append(idx) - if re.match(layer_names[1], layer.name): - upper_index.append(idx) - if not lower_index or not upper_index: - raise ValueError( - 'Passed layer_names does not match to layers in the model. ' - f'Recieved: {layer_names}') - if min(lower_index) > max(upper_index): - return [min(upper_index), max(lower_index)] - return [min(lower_index), max(upper_index)] - - -@keras_export('keras.utils.model_to_dot') -def model_to_dot(model, - show_shapes=False, - show_dtype=False, - show_layer_names=True, - rankdir='TB', - expand_nested=False, - dpi=96, - subgraph=False, - layer_range=None, - show_layer_activations=False): - """Convert a Keras model to dot format. - - Args: - model: A Keras model instance. - show_shapes: whether to display shape information. - show_dtype: whether to display layer dtypes. - show_layer_names: whether to display layer names. - rankdir: `rankdir` argument passed to PyDot, - a string specifying the format of the plot: - 'TB' creates a vertical plot; - 'LR' creates a horizontal plot. - expand_nested: whether to expand nested models into clusters. - dpi: Dots per inch. - subgraph: whether to return a `pydot.Cluster` instance. - layer_range: input of `list` containing two `str` items, which is the + if not dot.get_edge(src, dst): + dot.add_edge(pydot.Edge(src, dst)) + + +@keras_export("keras.utils.model_to_dot") +def model_to_dot( + model, + show_shapes=False, + show_dtype=False, + show_layer_names=True, + rankdir="TB", + expand_nested=False, + dpi=96, + subgraph=False, + layer_range=None, + show_layer_activations=False, + show_trainable=False, +): + """Convert a Keras model to dot format. + + Args: + model: A Keras model instance. + show_shapes: whether to display shape information. + show_dtype: whether to display layer dtypes. + show_layer_names: whether to display layer names. + rankdir: `rankdir` argument passed to PyDot, + a string specifying the format of the plot: + 'TB' creates a vertical plot; + 'LR' creates a horizontal plot. + expand_nested: whether to expand nested models into clusters. + dpi: Dots per inch. + subgraph: whether to return a `pydot.Cluster` instance. + layer_range: input of `list` containing two `str` items, which is the + starting layer name and ending layer name (both inclusive) indicating + the range of layers for which the `pydot.Dot` will be generated. It + also accepts regex patterns instead of exact name. In such case, start + predicate will be the first element it matches to `layer_range[0]` + and the end predicate will be the last element it matches to + `layer_range[1]`. By default `None` which considers all layers of + model. Note that you must pass range such that the resultant subgraph + must be complete. + show_layer_activations: Display layer activations (only for layers that + have an `activation` property). + show_trainable: whether to display if a layer is trainable. Displays 'T' + when the layer is trainable and 'NT' when it is not trainable. + + Returns: + A `pydot.Dot` instance representing the Keras model or + a `pydot.Cluster` instance representing nested model if + `subgraph=True`. + + Raises: + ValueError: if `model_to_dot` is called before the model is built. + ImportError: if pydot is not available. + """ + + if not model.built: + raise ValueError( + "This model has not yet been built. " + "Build the model first by calling `build()` or by calling " + "the model on a batch of data." + ) + + from keras.engine import functional + from keras.engine import sequential + from keras.layers import Wrapper + + if not check_pydot(): + raise ImportError( + "You must install pydot (`pip install pydot`) for " + "model_to_dot to work." + ) + + if subgraph: + dot = pydot.Cluster(style="dashed", graph_name=model.name) + dot.set("label", model.name) + dot.set("labeljust", "l") + else: + dot = pydot.Dot() + dot.set("rankdir", rankdir) + dot.set("concentrate", True) + dot.set("dpi", dpi) + dot.set_node_defaults(shape="record") + + if layer_range is not None: + if len(layer_range) != 2: + raise ValueError( + "layer_range must be of shape (2,). Received: " + f"layer_range = {layer_range} of length {len(layer_range)}" + ) + if not isinstance(layer_range[0], str) or not isinstance( + layer_range[1], str + ): + raise ValueError( + "layer_range should contain string type only. " + f"Received: {layer_range}" + ) + layer_range = layer_utils.get_layer_index_bound_by_layer_name( + model, layer_range + ) + if layer_range[0] < 0 or layer_range[1] > len(model.layers): + raise ValueError( + "Both values in layer_range should be in range (0, " + f"{len(model.layers)}. Received: {layer_range}" + ) + + sub_n_first_node = {} + sub_n_last_node = {} + sub_w_first_node = {} + sub_w_last_node = {} + + layers = model.layers + if not model._is_graph_network: + node = pydot.Node(str(id(model)), label=model.name) + dot.add_node(node) + return dot + elif isinstance(model, sequential.Sequential): + if not model.built: + model.build() + layers = super(sequential.Sequential, model).layers + + # Create graph nodes. + for i, layer in enumerate(layers): + if (layer_range) and (i < layer_range[0] or i >= layer_range[1]): + continue + + layer_id = str(id(layer)) + + # Append a wrapped layer's label to node's label, if it exists. + layer_name = layer.name + class_name = layer.__class__.__name__ + + if isinstance(layer, Wrapper): + if expand_nested and isinstance(layer.layer, functional.Functional): + submodel_wrapper = model_to_dot( + layer.layer, + show_shapes, + show_dtype, + show_layer_names, + rankdir, + expand_nested, + subgraph=True, + show_layer_activations=show_layer_activations, + show_trainable=show_trainable, + ) + # sub_w : submodel_wrapper + sub_w_nodes = submodel_wrapper.get_nodes() + sub_w_first_node[layer.layer.name] = sub_w_nodes[0] + sub_w_last_node[layer.layer.name] = sub_w_nodes[-1] + dot.add_subgraph(submodel_wrapper) + else: + layer_name = f"{layer_name}({layer.layer.name})" + child_class_name = layer.layer.__class__.__name__ + class_name = f"{class_name}({child_class_name})" + + if expand_nested and isinstance(layer, functional.Functional): + submodel_not_wrapper = model_to_dot( + layer, + show_shapes, + show_dtype, + show_layer_names, + rankdir, + expand_nested, + subgraph=True, + show_layer_activations=show_layer_activations, + show_trainable=show_trainable, + ) + # sub_n : submodel_not_wrapper + sub_n_nodes = submodel_not_wrapper.get_nodes() + sub_n_first_node[layer.name] = sub_n_nodes[0] + sub_n_last_node[layer.name] = sub_n_nodes[-1] + dot.add_subgraph(submodel_not_wrapper) + + # Create node's label. + label = class_name + + # Rebuild the label as a table including the layer's activation. + if ( + show_layer_activations + and hasattr(layer, "activation") + and layer.activation is not None + ): + if hasattr(layer.activation, "name"): + activation_name = layer.activation.name + elif hasattr(layer.activation, "__name__"): + activation_name = layer.activation.__name__ + else: + activation_name = str(layer.activation) + label = "{%s|%s}" % (label, activation_name) + + # Rebuild the label as a table including the layer's name. + if show_layer_names: + label = f"{layer_name}|{label}" + + # Rebuild the label as a table including the layer's dtype. + if show_dtype: + + def format_dtype(dtype): + if dtype is None: + return "?" + else: + return str(dtype) + + label = f"{label}|{format_dtype(layer.dtype)}" + + # Rebuild the label as a table including input/output shapes. + if show_shapes: + + def format_shape(shape): + return ( + str(shape) + .replace(str(None), "None") + .replace("{", r"\{") + .replace("}", r"\}") + ) + + try: + outputlabels = format_shape(layer.output_shape) + except AttributeError: + outputlabels = "?" + if hasattr(layer, "input_shape"): + inputlabels = format_shape(layer.input_shape) + elif hasattr(layer, "input_shapes"): + inputlabels = ", ".join( + [format_shape(ishape) for ishape in layer.input_shapes] + ) + else: + inputlabels = "?" + label = "{%s}|{input:|output:}|{{%s}|{%s}}" % ( + label, + inputlabels, + outputlabels, + ) + + # Rebuild the label as a table including trainable status + if show_trainable: + label = f"{'T' if layer.trainable else 'NT'}|{label}" + + if not expand_nested or not isinstance(layer, functional.Functional): + node = pydot.Node(layer_id, label=label) + dot.add_node(node) + + # Connect nodes with edges. + for i, layer in enumerate(layers): + if (layer_range) and (i <= layer_range[0] or i >= layer_range[1]): + continue + layer_id = str(id(layer)) + for i, node in enumerate(layer._inbound_nodes): + node_key = layer.name + "_ib-" + str(i) + if node_key in model._network_nodes: + for inbound_layer in tf.nest.flatten(node.inbound_layers): + inbound_layer_id = str(id(inbound_layer)) + if not expand_nested: + assert dot.get_node(inbound_layer_id) + assert dot.get_node(layer_id) + add_edge(dot, inbound_layer_id, layer_id) + else: + # if inbound_layer is not Model or wrapped Model + if not isinstance( + inbound_layer, functional.Functional + ) and not is_wrapped_model(inbound_layer): + # if current layer is not Model or wrapped Model + if not isinstance( + layer, functional.Functional + ) and not is_wrapped_model(layer): + assert dot.get_node(inbound_layer_id) + assert dot.get_node(layer_id) + add_edge(dot, inbound_layer_id, layer_id) + # if current layer is Model + elif isinstance(layer, functional.Functional): + add_edge( + dot, + inbound_layer_id, + sub_n_first_node[layer.name].get_name(), + ) + # if current layer is wrapped Model + elif is_wrapped_model(layer): + add_edge(dot, inbound_layer_id, layer_id) + name = sub_w_first_node[ + layer.layer.name + ].get_name() + add_edge(dot, layer_id, name) + # if inbound_layer is Model + elif isinstance(inbound_layer, functional.Functional): + name = sub_n_last_node[ + inbound_layer.name + ].get_name() + if isinstance(layer, functional.Functional): + output_name = sub_n_first_node[ + layer.name + ].get_name() + add_edge(dot, name, output_name) + else: + add_edge(dot, name, layer_id) + # if inbound_layer is wrapped Model + elif is_wrapped_model(inbound_layer): + inbound_layer_name = inbound_layer.layer.name + add_edge( + dot, + sub_w_last_node[inbound_layer_name].get_name(), + layer_id, + ) + return dot + + +@keras_export("keras.utils.plot_model") +def plot_model( + model, + to_file="model.png", + show_shapes=False, + show_dtype=False, + show_layer_names=True, + rankdir="TB", + expand_nested=False, + dpi=96, + layer_range=None, + show_layer_activations=False, + show_trainable=False, +): + """Converts a Keras model to dot format and save to a file. + + Example: + + ```python + input = tf.keras.Input(shape=(100,), dtype='int32', name='input') + x = tf.keras.layers.Embedding( + output_dim=512, input_dim=10000, input_length=100)(input) + x = tf.keras.layers.LSTM(32)(x) + x = tf.keras.layers.Dense(64, activation='relu')(x) + x = tf.keras.layers.Dense(64, activation='relu')(x) + x = tf.keras.layers.Dense(64, activation='relu')(x) + output = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x) + model = tf.keras.Model(inputs=[input], outputs=[output]) + dot_img_file = '/tmp/model_1.png' + tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True) + ``` + + Args: + model: A Keras model instance + to_file: File name of the plot image. + show_shapes: whether to display shape information. + show_dtype: whether to display layer dtypes. + show_layer_names: whether to display layer names. + rankdir: `rankdir` argument passed to PyDot, + a string specifying the format of the plot: 'TB' creates a vertical + plot; 'LR' creates a horizontal plot. + expand_nested: Whether to expand nested models into clusters. + dpi: Dots per inch. + layer_range: input of `list` containing two `str` items, which is the starting layer name and ending layer name (both inclusive) indicating - the range of layers for which the `pydot.Dot` will be generated. It - also accepts regex patterns instead of exact name. In such case, start - predicate will be the first element it matches to `layer_range[0]` - and the end predicate will be the last element it matches to - `layer_range[1]`. By default `None` which considers all layers of - model. Note that you must pass range such that the resultant subgraph - must be complete. - show_layer_activations: Display layer activations (only for layers that + the range of layers for which the plot will be generated. It also + accepts regex patterns instead of exact name. In such case, start + predicate will be the first element it matches to `layer_range[0]` and + the end predicate will be the last element it matches to + `layer_range[1]`. By default `None` which considers all layers of model. + Note that you must pass range such that the resultant subgraph must be + complete. + show_layer_activations: Display layer activations (only for layers that have an `activation` property). + show_trainable: whether to display if a layer is trainable. Displays 'T' + when the layer is trainable and 'NT' when it is not trainable. + + Raises: + ImportError: if graphviz or pydot are not available. + ValueError: if `plot_model` is called before the model is built. + + Returns: + A Jupyter notebook Image object if Jupyter is installed. + This enables in-line display of the model plots in notebooks. + """ - Returns: - A `pydot.Dot` instance representing the Keras model or - a `pydot.Cluster` instance representing nested model if - `subgraph=True`. - - Raises: - ValueError: if `model_to_dot` is called before the model is built. - ImportError: if pydot is not available. - """ - - if not model.built: - raise ValueError('This model has not yet been built. ' - 'Build the model first by calling `build()` or by calling ' - 'the model on a batch of data.') - - from keras.layers import Wrapper - from keras.engine import sequential - from keras.engine import functional - - if not check_pydot(): - raise ImportError('You must install pydot (`pip install pydot`) for ' - 'model_to_dot to work.') - - if subgraph: - dot = pydot.Cluster(style='dashed', graph_name=model.name) - dot.set('label', model.name) - dot.set('labeljust', 'l') - else: - dot = pydot.Dot() - dot.set('rankdir', rankdir) - dot.set('concentrate', True) - dot.set('dpi', dpi) - dot.set_node_defaults(shape='record') - - if layer_range is not None: - if len(layer_range) != 2: - raise ValueError( - 'layer_range must be of shape (2,). Received: ' - f'layer_range = {layer_range} of length {len(layer_range)}') - if (not isinstance(layer_range[0], str) or - not isinstance(layer_range[1], str)): - raise ValueError( - 'layer_range should contain string type only. ' - f'Received: {layer_range}') - layer_range = get_layer_index_bound_by_layer_name(model, layer_range) - if layer_range[0] < 0 or layer_range[1] > len(model.layers): - raise ValueError('Both values in layer_range should be in range (0, ' - f'{len(model.layers)}. Received: {layer_range}') - - sub_n_first_node = {} - sub_n_last_node = {} - sub_w_first_node = {} - sub_w_last_node = {} - - layers = model.layers - if not model._is_graph_network: - node = pydot.Node(str(id(model)), label=model.name) - dot.add_node(node) - return dot - elif isinstance(model, sequential.Sequential): if not model.built: - model.build() - layers = super(sequential.Sequential, model).layers - - # Create graph nodes. - for i, layer in enumerate(layers): - if (layer_range) and (i < layer_range[0] or i > layer_range[1]): - continue - - layer_id = str(id(layer)) - - # Append a wrapped layer's label to node's label, if it exists. - layer_name = layer.name - class_name = layer.__class__.__name__ - - if isinstance(layer, Wrapper): - if expand_nested and isinstance(layer.layer, - functional.Functional): - submodel_wrapper = model_to_dot( - layer.layer, - show_shapes, - show_dtype, - show_layer_names, - rankdir, - expand_nested, - subgraph=True) - # sub_w : submodel_wrapper - sub_w_nodes = submodel_wrapper.get_nodes() - sub_w_first_node[layer.layer.name] = sub_w_nodes[0] - sub_w_last_node[layer.layer.name] = sub_w_nodes[-1] - dot.add_subgraph(submodel_wrapper) - else: - layer_name = '{}({})'.format(layer_name, layer.layer.name) - child_class_name = layer.layer.__class__.__name__ - class_name = '{}({})'.format(class_name, child_class_name) - - if expand_nested and isinstance(layer, functional.Functional): - submodel_not_wrapper = model_to_dot( - layer, - show_shapes, - show_dtype, - show_layer_names, - rankdir, - expand_nested, - subgraph=True) - # sub_n : submodel_not_wrapper - sub_n_nodes = submodel_not_wrapper.get_nodes() - sub_n_first_node[layer.name] = sub_n_nodes[0] - sub_n_last_node[layer.name] = sub_n_nodes[-1] - dot.add_subgraph(submodel_not_wrapper) - - # Create node's label. - label = class_name - - # Rebuild the label as a table including the layer's activation. - if (show_layer_activations and hasattr(layer, 'activation') and - layer.activation is not None): - if hasattr(layer.activation, 'name'): - activation_name = layer.activation.name - elif hasattr(layer.activation, '__name__'): - activation_name = layer.activation.__name__ - else: - activation_name = str(layer.activation) - label = '{%s|%s}' % (label, activation_name) - - # Rebuild the label as a table including the layer's name. - if show_layer_names: - label = '%s|%s' % (layer_name, label) - - # Rebuild the label as a table including the layer's dtype. - if show_dtype: - - def format_dtype(dtype): - if dtype is None: - return '?' + raise ValueError( + "This model has not yet been built. " + "Build the model first by calling `build()` or by calling " + "the model on a batch of data." + ) + + if not check_graphviz(): + message = ( + "You must install pydot (`pip install pydot`) " + "and install graphviz " + "(see instructions at https://graphviz.gitlab.io/download/) " + "for plot_model to work." + ) + if "IPython.core.magics.namespace" in sys.modules: + # We don't raise an exception here in order to avoid crashing + # notebook tests where graphviz is not available. + io_utils.print_msg(message) + return else: - return str(dtype) - - label = '%s|%s' % (label, format_dtype(layer.dtype)) - - # Rebuild the label as a table including input/output shapes. - if show_shapes: - - def format_shape(shape): - return str(shape).replace(str(None), 'None') - - try: - outputlabels = format_shape(layer.output_shape) - except AttributeError: - outputlabels = '?' - if hasattr(layer, 'input_shape'): - inputlabels = format_shape(layer.input_shape) - elif hasattr(layer, 'input_shapes'): - inputlabels = ', '.join( - [format_shape(ishape) for ishape in layer.input_shapes]) - else: - inputlabels = '?' - label = '{%s}|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, - outputlabels) - if not expand_nested or not isinstance( - layer, functional.Functional): - node = pydot.Node(layer_id, label=label) - dot.add_node(node) - - # Connect nodes with edges. - for i, layer in enumerate(layers): - if (layer_range) and (i <= layer_range[0] or i > layer_range[1]): - continue - layer_id = str(id(layer)) - for i, node in enumerate(layer._inbound_nodes): - node_key = layer.name + '_ib-' + str(i) - if node_key in model._network_nodes: - for inbound_layer in tf.nest.flatten(node.inbound_layers): - inbound_layer_id = str(id(inbound_layer)) - if not expand_nested: - assert dot.get_node(inbound_layer_id) - assert dot.get_node(layer_id) - add_edge(dot, inbound_layer_id, layer_id) - else: - # if inbound_layer is not Model or wrapped Model - if (not isinstance(inbound_layer, - functional.Functional) and - not is_wrapped_model(inbound_layer)): - # if current layer is not Model or wrapped Model - if (not isinstance(layer, functional.Functional) and - not is_wrapped_model(layer)): - assert dot.get_node(inbound_layer_id) - assert dot.get_node(layer_id) - add_edge(dot, inbound_layer_id, layer_id) - # if current layer is Model - elif isinstance(layer, functional.Functional): - add_edge(dot, inbound_layer_id, - sub_n_first_node[layer.name].get_name()) - # if current layer is wrapped Model - elif is_wrapped_model(layer): - add_edge(dot, inbound_layer_id, layer_id) - name = sub_w_first_node[layer.layer.name].get_name() - add_edge(dot, layer_id, name) - # if inbound_layer is Model - elif isinstance(inbound_layer, functional.Functional): - name = sub_n_last_node[inbound_layer.name].get_name() - if isinstance(layer, functional.Functional): - output_name = sub_n_first_node[layer.name].get_name() - add_edge(dot, name, output_name) - else: - add_edge(dot, name, layer_id) - # if inbound_layer is wrapped Model - elif is_wrapped_model(inbound_layer): - inbound_layer_name = inbound_layer.layer.name - add_edge(dot, - sub_w_last_node[inbound_layer_name].get_name(), - layer_id) - return dot - - -@keras_export('keras.utils.plot_model') -def plot_model(model, - to_file='model.png', - show_shapes=False, - show_dtype=False, - show_layer_names=True, - rankdir='TB', - expand_nested=False, - dpi=96, - layer_range=None, - show_layer_activations=False): - """Converts a Keras model to dot format and save to a file. - - Example: - - ```python - input = tf.keras.Input(shape=(100,), dtype='int32', name='input') - x = tf.keras.layers.Embedding( - output_dim=512, input_dim=10000, input_length=100)(input) - x = tf.keras.layers.LSTM(32)(x) - x = tf.keras.layers.Dense(64, activation='relu')(x) - x = tf.keras.layers.Dense(64, activation='relu')(x) - x = tf.keras.layers.Dense(64, activation='relu')(x) - output = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x) - model = tf.keras.Model(inputs=[input], outputs=[output]) - dot_img_file = '/tmp/model_1.png' - tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True) - ``` - - Args: - model: A Keras model instance - to_file: File name of the plot image. - show_shapes: whether to display shape information. - show_dtype: whether to display layer dtypes. - show_layer_names: whether to display layer names. - rankdir: `rankdir` argument passed to PyDot, - a string specifying the format of the plot: 'TB' creates a vertical - plot; 'LR' creates a horizontal plot. - expand_nested: Whether to expand nested models into clusters. - dpi: Dots per inch. - layer_range: input of `list` containing two `str` items, which is the - starting layer name and ending layer name (both inclusive) indicating the - range of layers for which the plot will be generated. It also accepts - regex patterns instead of exact name. In such case, start predicate will - be the first element it matches to `layer_range[0]` and the end predicate - will be the last element it matches to `layer_range[1]`. By default `None` - which considers all layers of model. Note that you must pass range such - that the resultant subgraph must be complete. - show_layer_activations: Display layer activations (only for layers that - have an `activation` property). - - Raises: - ImportError: if graphviz or pydot are not available. - ValueError: if `plot_model` is called before the model is built. - - Returns: - A Jupyter notebook Image object if Jupyter is installed. - This enables in-line display of the model plots in notebooks. - """ - - if not model.built: - raise ValueError('This model has not yet been built. ' - 'Build the model first by calling `build()` or by calling ' - 'the model on a batch of data.') - - if not check_graphviz(): - message = ( - 'You must install pydot (`pip install pydot`) ' - 'and install graphviz ' - '(see instructions at https://graphviz.gitlab.io/download/) ' - 'for plot_model to work.') - if 'IPython.core.magics.namespace' in sys.modules: - # We don't raise an exception here in order to avoid crashing notebook - # tests where graphviz is not available. - io_utils.print_msg(message) - return + raise ImportError(message) + + dot = model_to_dot( + model, + show_shapes=show_shapes, + show_dtype=show_dtype, + show_layer_names=show_layer_names, + rankdir=rankdir, + expand_nested=expand_nested, + dpi=dpi, + layer_range=layer_range, + show_layer_activations=show_layer_activations, + show_trainable=show_trainable, + ) + to_file = io_utils.path_to_string(to_file) + if dot is None: + return + _, extension = os.path.splitext(to_file) + if not extension: + extension = "png" else: - raise ImportError(message) - - dot = model_to_dot( - model, - show_shapes=show_shapes, - show_dtype=show_dtype, - show_layer_names=show_layer_names, - rankdir=rankdir, - expand_nested=expand_nested, - dpi=dpi, - layer_range=layer_range, - show_layer_activations=show_layer_activations) - to_file = io_utils.path_to_string(to_file) - if dot is None: - return - _, extension = os.path.splitext(to_file) - if not extension: - extension = 'png' - else: - extension = extension[1:] - # Save image to disk. - dot.write(to_file, format=extension) - # Return the image as a Jupyter Image object, to be displayed in-line. - # Note that we cannot easily detect whether the code is running in a - # notebook, and thus we always return the Image if Jupyter is available. - if extension != 'pdf': - try: - from IPython import display - return display.Image(filename=to_file) - except ImportError: - pass + extension = extension[1:] + # Save image to disk. + dot.write(to_file, format=extension) + # Return the image as a Jupyter Image object, to be displayed in-line. + # Note that we cannot easily detect whether the code is running in a + # notebook, and thus we always return the Image if Jupyter is available. + if extension != "pdf": + try: + from IPython import display + + return display.Image(filename=to_file) + except ImportError: + pass diff --git a/keras/utils/vis_utils_test.py b/keras/utils/vis_utils_test.py index 185b83ef0e89..1665c8b0268d 100644 --- a/keras/utils/vis_utils_test.py +++ b/keras/utils/vis_utils_test.py @@ -15,228 +15,303 @@ """Tests for Keras Vis utils.""" import tensorflow.compat.v2 as tf - from absl.testing import parameterized import keras from keras.applications import efficientnet +from keras.utils import layer_utils from keras.utils import vis_utils class ModelToDotFormatTest(tf.test.TestCase, parameterized.TestCase): + def test_plot_model_cnn(self): + model = keras.Sequential() + model.add( + keras.layers.Conv2D( + filters=2, + kernel_size=(2, 3), + input_shape=(3, 5, 5), + name="conv", + ) + ) + model.add(keras.layers.Flatten(name="flat")) + model.add(keras.layers.Dense(5, name="dense")) + dot_img_file = "model_1.png" + try: + vis_utils.plot_model( + model, to_file=dot_img_file, show_shapes=True, show_dtype=True + ) + self.assertTrue(tf.io.gfile.exists(dot_img_file)) + tf.io.gfile.remove(dot_img_file) + except ImportError: + pass + + def test_plot_model_with_wrapped_layers_and_models(self): + inputs = keras.Input(shape=(None, 3)) + lstm = keras.layers.LSTM(6, return_sequences=True, name="lstm") + x = lstm(inputs) + # Add layer inside a Wrapper + bilstm = keras.layers.Bidirectional( + keras.layers.LSTM(16, return_sequences=True, name="bilstm") + ) + x = bilstm(x) + # Add model inside a Wrapper + submodel = keras.Sequential( + [keras.layers.Dense(32, name="dense", input_shape=(None, 32))] + ) + wrapped_dense = keras.layers.TimeDistributed(submodel) + x = wrapped_dense(x) + # Add shared submodel + outputs = submodel(x) + model = keras.Model(inputs, outputs) + dot_img_file = "model_2.png" + try: + vis_utils.plot_model( + model, + to_file=dot_img_file, + show_shapes=True, + show_dtype=True, + expand_nested=True, + ) + self.assertTrue(tf.io.gfile.exists(dot_img_file)) + tf.io.gfile.remove(dot_img_file) + except ImportError: + pass + + def test_plot_model_with_add_loss(self): + inputs = keras.Input(shape=(None, 3)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + model.add_loss(tf.reduce_mean(outputs)) + dot_img_file = "model_3.png" + try: + vis_utils.plot_model( + model, + to_file=dot_img_file, + show_shapes=True, + show_dtype=True, + expand_nested=True, + ) + self.assertTrue(tf.io.gfile.exists(dot_img_file)) + tf.io.gfile.remove(dot_img_file) + except ImportError: + pass + + model = keras.Sequential( + [keras.Input(shape=(None, 3)), keras.layers.Dense(1)] + ) + model.add_loss(tf.reduce_mean(model.output)) + dot_img_file = "model_4.png" + try: + vis_utils.plot_model( + model, + to_file=dot_img_file, + show_shapes=True, + show_dtype=True, + expand_nested=True, + ) + self.assertTrue(tf.io.gfile.exists(dot_img_file)) + tf.io.gfile.remove(dot_img_file) + except ImportError: + pass + + @parameterized.parameters( + {"show_shapes": False, "show_dtype": False}, + {"show_shapes": False, "show_dtype": True}, + {"show_shapes": True, "show_dtype": False}, + {"show_shapes": True, "show_dtype": True}, + ) + def test_plot_model_cnn_with_activations(self, show_shapes, show_dtype): + model = keras.Sequential() + model.add( + keras.layers.Conv2D( + filters=2, + kernel_size=2, + input_shape=(9, 9, 3), + activation="relu", + ) + ) + model.add( + keras.layers.Conv2D( + filters=4, kernel_size=2, strides=(2, 2), activation="relu" + ) + ) + model.add(keras.layers.Flatten(name="flat")) + model.add(keras.layers.Dense(5, name="head", activation="softmax")) + dot_img_file = "model_5.png" + try: + vis_utils.plot_model( + model, + to_file=dot_img_file, + show_shapes=show_shapes, + show_dtype=show_dtype, + show_layer_activations=True, + ) + self.assertTrue(tf.io.gfile.exists(dot_img_file)) + tf.io.gfile.remove(dot_img_file) + except ImportError: + pass + + @parameterized.parameters( + {"layer_range": ["block1a_project_conv", "block1a_activation"]}, + {"layer_range": ["block1a_activation", "block1a_project_conv"]}, + {"layer_range": [r"block*", "block2a_se_excite"]}, + {"layer_range": [r"block\da_activation", r"block\da_project_bn"]}, + ) + def test_dot_layer_range(self, layer_range): + model = efficientnet.EfficientNetB0(weights=None) + layer_ids_from_model = get_layer_ids_from_model(model, layer_range) + try: + dot = vis_utils.model_to_dot(model, layer_range=layer_range) + dot_edges = dot.get_edges() + layer_ids_from_dot = get_layer_ids_from_dot(dot_edges) + self.assertAllEqual( + sorted(layer_ids_from_model), sorted(layer_ids_from_dot) + ) + except ImportError: + pass + + @parameterized.parameters( + {"layer_range": ["block1a_project_conv", "block1a_activation"]}, + {"layer_range": ["block1a_activation", "block1a_project_conv"]}, + {"layer_range": [r"block*", "block2a_se_excite"]}, + {"layer_range": [r"block\da_activation", r"block\da_project_bn"]}, + ) + def test_plot_layer_range(self, layer_range): + model = efficientnet.EfficientNetB0(weights=None) + effnet_subplot = "model_effnet.png" + try: + vis_utils.plot_model( + model, to_file=effnet_subplot, layer_range=layer_range + ) + self.assertTrue(tf.io.gfile.exists(effnet_subplot)) + except ImportError: + pass + finally: + if tf.io.gfile.exists(effnet_subplot): + tf.io.gfile.remove(effnet_subplot) - def test_plot_model_cnn(self): - model = keras.Sequential() - model.add( - keras.layers.Conv2D( - filters=2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv')) - model.add(keras.layers.Flatten(name='flat')) - model.add(keras.layers.Dense(5, name='dense')) - dot_img_file = 'model_1.png' - try: - vis_utils.plot_model( - model, to_file=dot_img_file, show_shapes=True, show_dtype=True) - self.assertTrue(tf.io.gfile.exists(dot_img_file)) - tf.io.gfile.remove(dot_img_file) - except ImportError: - pass - - def test_plot_model_with_wrapped_layers_and_models(self): - inputs = keras.Input(shape=(None, 3)) - lstm = keras.layers.LSTM(6, return_sequences=True, name='lstm') - x = lstm(inputs) - # Add layer inside a Wrapper - bilstm = keras.layers.Bidirectional( - keras.layers.LSTM(16, return_sequences=True, name='bilstm')) - x = bilstm(x) - # Add model inside a Wrapper - submodel = keras.Sequential( - [keras.layers.Dense(32, name='dense', input_shape=(None, 32))] + @parameterized.parameters( + {"layer_range": ["block1a_se_squeeze", "block2a_project_conv"]}, + {"layer_range": [r"block\da_se_reshape", r"block*"]}, ) - wrapped_dense = keras.layers.TimeDistributed(submodel) - x = wrapped_dense(x) - # Add shared submodel - outputs = submodel(x) - model = keras.Model(inputs, outputs) - dot_img_file = 'model_2.png' - try: - vis_utils.plot_model( - model, - to_file=dot_img_file, - show_shapes=True, - show_dtype=True, - expand_nested=True) - self.assertTrue(tf.io.gfile.exists(dot_img_file)) - tf.io.gfile.remove(dot_img_file) - except ImportError: - pass - - def test_plot_model_with_add_loss(self): - inputs = keras.Input(shape=(None, 3)) - outputs = keras.layers.Dense(1)(inputs) - model = keras.Model(inputs, outputs) - model.add_loss(tf.reduce_mean(outputs)) - dot_img_file = 'model_3.png' - try: - vis_utils.plot_model( - model, - to_file=dot_img_file, - show_shapes=True, - show_dtype=True, - expand_nested=True) - self.assertTrue(tf.io.gfile.exists(dot_img_file)) - tf.io.gfile.remove(dot_img_file) - except ImportError: - pass - - model = keras.Sequential([ - keras.Input(shape=(None, 3)), keras.layers.Dense(1)]) - model.add_loss(tf.reduce_mean(model.output)) - dot_img_file = 'model_4.png' - try: - vis_utils.plot_model( - model, - to_file=dot_img_file, - show_shapes=True, - show_dtype=True, - expand_nested=True) - self.assertTrue(tf.io.gfile.exists(dot_img_file)) - tf.io.gfile.remove(dot_img_file) - except ImportError: - pass - - @parameterized.parameters({ - 'show_shapes': False, - 'show_dtype': False - }, { - 'show_shapes': False, - 'show_dtype': True - }, { - 'show_shapes': True, - 'show_dtype': False - }, { - 'show_shapes': True, - 'show_dtype': True - }) - def test_plot_model_cnn_with_activations(self, show_shapes, show_dtype): - model = keras.Sequential() - model.add( - keras.layers.Conv2D( - filters=2, kernel_size=2, input_shape=(9, 9, 3), activation='relu')) - model.add( - keras.layers.Conv2D( - filters=4, kernel_size=2, strides=(2, 2), activation='relu')) - model.add(keras.layers.Flatten(name='flat')) - model.add(keras.layers.Dense(5, name='head', activation='softmax')) - dot_img_file = 'model_5.png' - try: - vis_utils.plot_model( - model, - to_file=dot_img_file, - show_shapes=show_shapes, - show_dtype=show_dtype, - show_layer_activations=True) - self.assertTrue(tf.io.gfile.exists(dot_img_file)) - tf.io.gfile.remove(dot_img_file) - except ImportError: - pass - - @parameterized.parameters( - {'layer_range': ['block1a_project_conv', 'block1a_activation']}, - {'layer_range': ['block1a_activation', 'block1a_project_conv']}, - {'layer_range': [r'block*', 'block2a_se_excite']}, - {'layer_range': [r'block\da_activation', r'block\da_project_bn']}) - def test_dot_layer_range(self, layer_range): - model = efficientnet.EfficientNetB0(weights=None) - layer_ids_from_model = get_layer_ids_from_model(model, layer_range) - try: - dot = vis_utils.model_to_dot(model, layer_range=layer_range) - dot_edges = dot.get_edges() - layer_ids_from_dot = get_layer_ids_from_dot(dot_edges) - self.assertAllEqual( - sorted(layer_ids_from_model), sorted(layer_ids_from_dot)) - except ImportError: - pass - - @parameterized.parameters( - {'layer_range': ['block1a_project_conv', 'block1a_activation']}, - {'layer_range': ['block1a_activation', 'block1a_project_conv']}, - {'layer_range': [r'block*', 'block2a_se_excite']}, - {'layer_range': [r'block\da_activation', r'block\da_project_bn']}) - def test_plot_layer_range(self, layer_range): - model = efficientnet.EfficientNetB0(weights=None) - effnet_subplot = 'model_effnet.png' - try: - vis_utils.plot_model( - model, to_file=effnet_subplot, layer_range=layer_range) - self.assertTrue(tf.io.gfile.exists(effnet_subplot)) - except ImportError: - pass - finally: - if tf.io.gfile.exists(effnet_subplot): - tf.io.gfile.remove(effnet_subplot) - - @parameterized.parameters( - {'layer_range': ['block1a_se_squeeze', 'block2a_project_conv']}, - {'layer_range': [r'block\da_se_reshape', r'block*']}) - def test_layer_range_assertion_fail(self, layer_range): - model = efficientnet.EfficientNetB0(weights=None) - try: - with self.assertRaises(AssertionError): - vis_utils.model_to_dot(model, layer_range=layer_range) - with self.assertRaises(AssertionError): - vis_utils.plot_model(model, layer_range=layer_range) - except ImportError: - pass - - @parameterized.parameters( - {'layer_range': ['block1a_activation']}, - {'layer_range': []}, - {'layer_range': ['input', 'block1a_activation', 'block1a_project_conv']}, - {'layer_range': [9, 'block1a_activation']}, - {'layer_range': [29, 9]}, - {'layer_range': ['block8a_se_reshape', 'block*']}) - def test_layer_range_value_fail(self, layer_range): - model = efficientnet.EfficientNetB0(weights=None) - try: - with self.assertRaises(ValueError): - vis_utils.model_to_dot(model, layer_range=layer_range) - with self.assertRaises(ValueError): - vis_utils.plot_model(model, layer_range=layer_range) - except ImportError: - pass - - def test_model_with_tf_op(self): - # Test fix for a bug in which inputs to a TFOp layer past the 1st one - # were not connected in the Keras model plot. - a = keras.Input((2,)) - b = keras.Input((2,)) - model = keras.Model(inputs=[a, b], outputs=a + b) - try: - dot = vis_utils.model_to_dot(model) - self.assertLen(dot.get_edges(), 2) # This model has 2 edges. - except ImportError: - pass + def test_layer_range_assertion_fail(self, layer_range): + model = efficientnet.EfficientNetB0(weights=None) + try: + with self.assertRaises(AssertionError): + vis_utils.model_to_dot(model, layer_range=layer_range) + with self.assertRaises(AssertionError): + vis_utils.plot_model(model, layer_range=layer_range) + except ImportError: + pass + + @parameterized.parameters( + {"layer_range": ["block1a_activation"]}, + {"layer_range": []}, + { + "layer_range": [ + "input", + "block1a_activation", + "block1a_project_conv", + ] + }, + {"layer_range": [9, "block1a_activation"]}, + {"layer_range": [29, 9]}, + {"layer_range": ["block8a_se_reshape", "block*"]}, + ) + def test_layer_range_value_fail(self, layer_range): + model = efficientnet.EfficientNetB0(weights=None) + try: + with self.assertRaises(ValueError): + vis_utils.model_to_dot(model, layer_range=layer_range) + with self.assertRaises(ValueError): + vis_utils.plot_model(model, layer_range=layer_range) + except ImportError: + pass + + def test_model_with_tf_op(self): + # Test fix for a bug in which inputs to a TFOp layer past the 1st one + # were not connected in the Keras model plot. + a = keras.Input((2,)) + b = keras.Input((2,)) + model = keras.Model(inputs=[a, b], outputs=a + b) + try: + dot = vis_utils.model_to_dot(model) + self.assertLen(dot.get_edges(), 2) # This model has 2 edges. + except ImportError: + pass + + def test_model_with_brackets_in_shape(self): + # Test fix for a bug in which plotting the model shapes fails if + # any labels contain brackets + class DictLayer(keras.layers.Layer): + def call(self, inputs) -> tf.Tensor: + tensor_input, dict_input = inputs + return tf.concat(list(dict_input.values()), axis=1) + + inputs = { + "a": keras.Input(name="a", shape=(1), dtype=tf.float32), + "b": keras.Input(name="b", shape=(1), dtype=tf.float32), + } + outputs = DictLayer()((inputs["a"], inputs)) + model = keras.Model( + inputs=inputs, + outputs=outputs, + ) + try: + vis_utils.model_to_dot( + model, show_shapes=True, show_dtype=True, show_layer_names=True + ) + except ImportError: + pass + + def test_plot_model_with_show_trainable(self): + model = keras.Sequential(name="trainable") + + untrained = keras.layers.Conv2D( + filters=2, kernel_size=(2, 3), input_shape=(3, 5, 5), name="conv" + ) + model.add(untrained) + model.add(keras.layers.Flatten(name="flat")) + model.add(keras.layers.Dense(5, name="dense")) + + # Should display as Non Trainable + untrained.trainable = False + + dot_img_file = "model_trainable.png" + try: + vis_utils.plot_model( + model, + to_file=dot_img_file, + show_shapes=True, + show_dtype=True, + show_trainable=True, + ) + self.assertTrue(tf.io.gfile.exists(dot_img_file)) + tf.io.gfile.remove(dot_img_file) + except ImportError: + pass def get_layer_ids_from_model(model, layer_range): - layer_range = vis_utils.get_layer_index_bound_by_layer_name( - model, layer_range) - layer_ids_from_model = [] - for i, layer in enumerate(model.layers): - if i >= layer_range[0] and i <= layer_range[1]: - layer_ids_from_model.append(str(id(layer))) - return layer_ids_from_model + layer_range = layer_utils.get_layer_index_bound_by_layer_name( + model, layer_range + ) + layer_ids_from_model = [ + str(id(layer)) + for layer in model.layers[layer_range[0] : layer_range[1]] + ] + return layer_ids_from_model def get_layer_ids_from_dot(dot_edges): - layer_ids_from_dot = [] - for edge in dot_edges: - for pt in edge.obj_dict['points']: - if pt not in layer_ids_from_dot: - layer_ids_from_dot.append(pt) - return layer_ids_from_dot + layer_ids_from_dot = [] + for edge in dot_edges: + for pt in edge.obj_dict["points"]: + if pt not in layer_ids_from_dot: + layer_ids_from_dot.append(pt) + return layer_ids_from_dot -if __name__ == '__main__': - tf.test.main() +if __name__ == "__main__": + tf.test.main() diff --git a/keras/wrappers/BUILD b/keras/wrappers/BUILD deleted file mode 100644 index c76c1cfcfb94..000000000000 --- a/keras/wrappers/BUILD +++ /dev/null @@ -1,40 +0,0 @@ -# Description: -# Contains the Keras wrapper API (internal TensorFlow version). - -load("@org_keras//keras:keras.bzl", "tf_py_test") - -package( - default_visibility = ["//keras:friends"], - licenses = ["notice"], -) - -py_library( - name = "wrappers", - srcs = [ - "__init__.py", - "scikit_learn.py", - ], - srcs_version = "PY3", - deps = [ - "//:expect_numpy_installed", - "//:expect_tensorflow_installed", - "//keras:engine", - "//keras:losses", - "//keras/utils:generic_utils", - ], -) - -tf_py_test( - name = "scikit_learn_test", - size = "small", - srcs = ["scikit_learn_test.py"], - python_version = "PY3", - tags = ["notsan"], - deps = [ - ":wrappers", - "//:expect_numpy_installed", - "//:expect_tensorflow_installed", - "//keras/testing_infra:test_utils", - "//keras/utils:np_utils", - ], -) diff --git a/keras/wrappers/scikit_learn.py b/keras/wrappers/scikit_learn.py deleted file mode 100644 index 348ccdd14ecb..000000000000 --- a/keras/wrappers/scikit_learn.py +++ /dev/null @@ -1,386 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Wrapper for using the Scikit-Learn API with Keras models.""" -# pylint: disable=g-classes-have-attributes - -import copy -import types -import warnings - -import numpy as np - -from keras import losses -from keras.models import Sequential -from keras.utils.generic_utils import has_arg -from keras.utils.np_utils import to_categorical -from tensorflow.python.util.tf_export import keras_export -from tensorflow.tools.docs import doc_controls - - -class BaseWrapper: - """Base class for the Keras scikit-learn wrapper. - - Warning: This class should not be used directly. - Use descendant classes instead. - - Args: - build_fn: callable function or class instance - **sk_params: model parameters & fitting parameters - - The `build_fn` should construct, compile and return a Keras model, which - will then be used to fit/predict. One of the following - three values could be passed to `build_fn`: - 1. A function - 2. An instance of a class that implements the `__call__` method - 3. None. This means you implement a class that inherits from either - `KerasClassifier` or `KerasRegressor`. The `__call__` method of the - present class will then be treated as the default `build_fn`. - - `sk_params` takes both model parameters and fitting parameters. Legal model - parameters are the arguments of `build_fn`. Note that like all other - estimators in scikit-learn, `build_fn` should provide default values for - its arguments, so that you could create the estimator without passing any - values to `sk_params`. - - `sk_params` could also accept parameters for calling `fit`, `predict`, - `predict_proba`, and `score` methods (e.g., `epochs`, `batch_size`). - fitting (predicting) parameters are selected in the following order: - - 1. Values passed to the dictionary arguments of - `fit`, `predict`, `predict_proba`, and `score` methods - 2. Values passed to `sk_params` - 3. The default values of the `keras.models.Sequential` - `fit`, `predict` methods. - - When using scikit-learn's `grid_search` API, legal tunable parameters are - those you could pass to `sk_params`, including fitting parameters. - In other words, you could use `grid_search` to search for the best - `batch_size` or `epochs` as well as the model parameters. - """ - - def __init__(self, build_fn=None, **sk_params): - self.build_fn = build_fn - self.sk_params = sk_params - self.check_params(sk_params) - - def check_params(self, params): - """Checks for user typos in `params`. - - Args: - params: dictionary; the parameters to be checked - - Raises: - ValueError: if any member of `params` is not a valid argument. - """ - legal_params_fns = [ - Sequential.fit, Sequential.predict, Sequential.evaluate - ] - if self.build_fn is None: - legal_params_fns.append(self.__call__) - elif (not isinstance(self.build_fn, types.FunctionType) and - not isinstance(self.build_fn, types.MethodType)): - legal_params_fns.append(self.build_fn.__call__) - else: - legal_params_fns.append(self.build_fn) - - for params_name in params: - for fn in legal_params_fns: - if has_arg(fn, params_name): - break - else: - if params_name != 'nb_epoch': - raise ValueError('{} is not a legal parameter'.format(params_name)) - - def get_params(self, **params): # pylint: disable=unused-argument - """Gets parameters for this estimator. - - Args: - **params: ignored (exists for API compatibility). - - Returns: - Dictionary of parameter names mapped to their values. - """ - res = self.sk_params.copy() - res.update({'build_fn': self.build_fn}) - return res - - def set_params(self, **params): - """Sets the parameters of this estimator. - - Args: - **params: Dictionary of parameter names mapped to their values. - - Returns: - self - """ - self.check_params(params) - self.sk_params.update(params) - return self - - def fit(self, x, y, **kwargs): - """Constructs a new model with `build_fn` & fit the model to `(x, y)`. - - Args: - x : array-like, shape `(n_samples, n_features)` - Training samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.fit` - - Returns: - history : object - details about the training history at each epoch. - """ - if self.build_fn is None: - self.model = self.__call__(**self.filter_sk_params(self.__call__)) - elif (not isinstance(self.build_fn, types.FunctionType) and - not isinstance(self.build_fn, types.MethodType)): - self.model = self.build_fn( - **self.filter_sk_params(self.build_fn.__call__)) - else: - self.model = self.build_fn(**self.filter_sk_params(self.build_fn)) - - if (losses.is_categorical_crossentropy(self.model.loss) and - len(y.shape) != 2): - y = to_categorical(y) - - fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit)) - fit_args.update(kwargs) - - history = self.model.fit(x, y, **fit_args) - - return history - - def filter_sk_params(self, fn, override=None): - """Filters `sk_params` and returns those in `fn`'s arguments. - - Args: - fn : arbitrary function - override: dictionary, values to override `sk_params` - - Returns: - res : dictionary containing variables - in both `sk_params` and `fn`'s arguments. - """ - override = override or {} - res = {} - for name, value in self.sk_params.items(): - if has_arg(fn, name): - res.update({name: value}) - res.update(override) - return res - - -@keras_export('keras.wrappers.scikit_learn.KerasClassifier') -@doc_controls.do_not_generate_docs -class KerasClassifier(BaseWrapper): - """Implementation of the scikit-learn classifier API for Keras. - - DEPRECATED. Use [Sci-Keras](https://github.com/adriangb/scikeras) instead. - See https://www.adriangb.com/scikeras/stable/migration.html - for help migrating. - """ - - def __init__(self, build_fn=None, **sk_params): - warnings.warn( - 'KerasClassifier is deprecated, ' - 'use Sci-Keras (https://github.com/adriangb/scikeras) instead. ' - 'See https://www.adriangb.com/scikeras/stable/migration.html ' - 'for help migrating.', - DeprecationWarning, - stacklevel=2) - super().__init__(build_fn, **sk_params) - - def fit(self, x, y, **kwargs): - """Constructs a new model with `build_fn` & fit the model to `(x, y)`. - - Args: - x : array-like, shape `(n_samples, n_features)` - Training samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.fit` - - Returns: - history : object - details about the training history at each epoch. - - Raises: - ValueError: In case of invalid shape for `y` argument. - """ - y = np.array(y) - if len(y.shape) == 2 and y.shape[1] > 1: - self.classes_ = np.arange(y.shape[1]) - elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1: - self.classes_ = np.unique(y) - y = np.searchsorted(self.classes_, y) - else: - raise ValueError('Invalid shape for y: ' + str(y.shape)) - self.n_classes_ = len(self.classes_) - return super().fit(x, y, **kwargs) - - def predict(self, x, **kwargs): - """Returns the class predictions for the given test data. - - Args: - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - **kwargs: dictionary arguments - Legal arguments are the arguments - of `Sequential.predict`. - - Returns: - preds: array-like, shape `(n_samples,)` - Class predictions. - """ - proba = self.model.predict(x, **kwargs) - if proba.shape[-1] > 1: - classes = proba.argmax(axis=-1) - else: - classes = (proba > 0.5).astype('int32') - return self.classes_[classes] - - def predict_proba(self, x, **kwargs): - """Returns class probability estimates for the given test data. - - Args: - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - **kwargs: dictionary arguments - Legal arguments are the arguments - of `Sequential.predict`. - - Returns: - proba: array-like, shape `(n_samples, n_outputs)` - Class probability estimates. - In the case of binary classification, - to match the scikit-learn API, - will return an array of shape `(n_samples, 2)` - (instead of `(n_sample, 1)` as in Keras). - """ - probs = self.model.predict(x, **kwargs) - - # check if binary classification - if probs.shape[1] == 1: - # first column is probability of class 0 and second is of class 1 - probs = np.hstack([1 - probs, probs]) - return probs - - def score(self, x, y, **kwargs): - """Returns the mean accuracy on the given test data and labels. - - Args: - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.evaluate`. - - Returns: - score: float - Mean accuracy of predictions on `x` wrt. `y`. - - Raises: - ValueError: If the underlying model isn't configured to - compute accuracy. You should pass `metrics=["accuracy"]` to - the `.compile()` method of the model. - """ - y = np.searchsorted(self.classes_, y) - kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) - - loss_name = self.model.loss - if hasattr(loss_name, '__name__'): - loss_name = loss_name.__name__ - if loss_name == 'categorical_crossentropy' and len(y.shape) != 2: - y = to_categorical(y) - - outputs = self.model.evaluate(x, y, **kwargs) - if not isinstance(outputs, list): - outputs = [outputs] - for name, output in zip(self.model.metrics_names, outputs): - if name in ['accuracy', 'acc']: - return output - raise ValueError('The model is not configured to compute accuracy. ' - 'You should pass `metrics=["accuracy"]` to ' - 'the `model.compile()` method.') - - -@keras_export('keras.wrappers.scikit_learn.KerasRegressor') -@doc_controls.do_not_generate_docs -class KerasRegressor(BaseWrapper): - """Implementation of the scikit-learn regressor API for Keras. - - DEPRECATED. Use [Sci-Keras](https://github.com/adriangb/scikeras) instead. - See https://www.adriangb.com/scikeras/stable/migration.html - for help migrating. - """ - - @doc_controls.do_not_doc_inheritable - def __init__(self, build_fn=None, **sk_params): - warnings.warn( - 'KerasRegressor is deprecated, ' - 'use Sci-Keras (https://github.com/adriangb/scikeras) instead. ' - 'See https://www.adriangb.com/scikeras/stable/migration.html ' - 'for help migrating.', - DeprecationWarning, - stacklevel=2) - super().__init__(build_fn, **sk_params) - - def predict(self, x, **kwargs): - """Returns predictions for the given test data. - - Args: - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.predict`. - - Returns: - preds: array-like, shape `(n_samples,)` - Predictions. - """ - kwargs = self.filter_sk_params(Sequential.predict, kwargs) - return np.squeeze(self.model.predict(x, **kwargs)) - - def score(self, x, y, **kwargs): - """Returns the mean loss on the given test data and labels. - - Args: - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y: array-like, shape `(n_samples,)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.evaluate`. - - Returns: - score: float - Mean accuracy of predictions on `x` wrt. `y`. - """ - kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) - loss = self.model.evaluate(x, y, **kwargs) - if isinstance(loss, list): - return -loss[0] - return -loss diff --git a/keras/wrappers/scikit_learn_test.py b/keras/wrappers/scikit_learn_test.py deleted file mode 100644 index d00e9df8da34..000000000000 --- a/keras/wrappers/scikit_learn_test.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Scikit-learn API wrapper.""" - -import warnings - -import tensorflow.compat.v2 as tf - -import numpy as np - -import keras -from keras.testing_infra import test_utils -from keras.wrappers import scikit_learn - -INPUT_DIM = 5 -HIDDEN_DIM = 5 -TRAIN_SAMPLES = 10 -TEST_SAMPLES = 5 -NUM_CLASSES = 2 -BATCH_SIZE = 5 -EPOCHS = 1 - - -def build_fn_clf(hidden_dim): - model = keras.models.Sequential() - model.add(keras.layers.Dense(INPUT_DIM, input_shape=(INPUT_DIM,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(hidden_dim)) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(NUM_CLASSES)) - model.add(keras.layers.Activation('softmax')) - model.compile( - optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) - return model - - -def assert_classification_works(clf): - np.random.seed(42) - (x_train, y_train), (x_test, _) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - - clf.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS) - - score = clf.score(x_train, y_train, batch_size=BATCH_SIZE) - assert np.isscalar(score) and np.isfinite(score) - - preds = clf.predict(x_test, batch_size=BATCH_SIZE) - assert preds.shape == (TEST_SAMPLES,) - for prediction in np.unique(preds): - assert prediction in range(NUM_CLASSES) - - proba = clf.predict_proba(x_test, batch_size=BATCH_SIZE) - assert proba.shape == (TEST_SAMPLES, NUM_CLASSES) - assert np.allclose(np.sum(proba, axis=1), np.ones(TEST_SAMPLES)) - - -def build_fn_reg(hidden_dim): - model = keras.models.Sequential() - model.add(keras.layers.Dense(INPUT_DIM, input_shape=(INPUT_DIM,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(hidden_dim)) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(1)) - model.add(keras.layers.Activation('linear')) - model.compile( - optimizer='sgd', loss='mean_absolute_error', metrics=['accuracy']) - return model - - -def assert_regression_works(reg): - np.random.seed(42) - (x_train, y_train), (x_test, _) = test_utils.get_test_data( - train_samples=TRAIN_SAMPLES, - test_samples=TEST_SAMPLES, - input_shape=(INPUT_DIM,), - num_classes=NUM_CLASSES) - - reg.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS) - - score = reg.score(x_train, y_train, batch_size=BATCH_SIZE) - assert np.isscalar(score) and np.isfinite(score) - - preds = reg.predict(x_test, batch_size=BATCH_SIZE) - assert preds.shape == (TEST_SAMPLES,) - - -class ScikitLearnAPIWrapperTest(tf.test.TestCase): - - def test_classify_build_fn(self): - with self.cached_session(): - clf = scikit_learn.KerasClassifier( - build_fn=build_fn_clf, - hidden_dim=HIDDEN_DIM, - batch_size=BATCH_SIZE, - epochs=EPOCHS) - - assert_classification_works(clf) - - def test_classify_class_build_fn(self): - - class ClassBuildFnClf: - - def __call__(self, hidden_dim): - return build_fn_clf(hidden_dim) - - with self.cached_session(): - clf = scikit_learn.KerasClassifier( - build_fn=ClassBuildFnClf(), - hidden_dim=HIDDEN_DIM, - batch_size=BATCH_SIZE, - epochs=EPOCHS) - - assert_classification_works(clf) - - def test_classify_inherit_class_build_fn(self): - - class InheritClassBuildFnClf(scikit_learn.KerasClassifier): - - def __call__(self, hidden_dim): - return build_fn_clf(hidden_dim) - - with self.cached_session(): - clf = InheritClassBuildFnClf( - build_fn=None, - hidden_dim=HIDDEN_DIM, - batch_size=BATCH_SIZE, - epochs=EPOCHS) - - assert_classification_works(clf) - - def test_regression_build_fn(self): - with self.cached_session(): - reg = scikit_learn.KerasRegressor( - build_fn=build_fn_reg, - hidden_dim=HIDDEN_DIM, - batch_size=BATCH_SIZE, - epochs=EPOCHS) - - assert_regression_works(reg) - - def test_regression_class_build_fn(self): - - class ClassBuildFnReg: - - def __call__(self, hidden_dim): - return build_fn_reg(hidden_dim) - - with self.cached_session(): - reg = scikit_learn.KerasRegressor( - build_fn=ClassBuildFnReg(), - hidden_dim=HIDDEN_DIM, - batch_size=BATCH_SIZE, - epochs=EPOCHS) - - assert_regression_works(reg) - - def test_regression_inherit_class_build_fn(self): - - class InheritClassBuildFnReg(scikit_learn.KerasRegressor): - - def __call__(self, hidden_dim): - return build_fn_reg(hidden_dim) - - with self.cached_session(): - reg = InheritClassBuildFnReg( - build_fn=None, - hidden_dim=HIDDEN_DIM, - batch_size=BATCH_SIZE, - epochs=EPOCHS) - - assert_regression_works(reg) - - def test_regressor_deprecated(self): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - scikit_learn.KerasRegressor(build_fn_reg) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert 'KerasRegressor is deprecated' in str(w[-1].message) - - def test_classifier_deprecated(self): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - scikit_learn.KerasClassifier(build_fn_clf) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert 'KerasClassifier is deprecated' in str(w[-1].message) - - -if __name__ == '__main__': - tf.test.main() diff --git a/oss_setup.py b/oss_setup.py new file mode 100644 index 000000000000..07db3105ccbf --- /dev/null +++ b/oss_setup.py @@ -0,0 +1,92 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Setup script for the Keras pip package.""" + +import os + +import setuptools + +DESCRIPTION = """Keras is a deep learning API written in Python, +running on top of the machine learning platform TensorFlow. + +It was developed with a focus on enabling fast experimentation and +providing a delightful developer experience. +The purpose of Keras is to give an *unfair advantage* to any developer +looking to ship ML-powered apps. + +Keras is: + +- **Simple** -- but not simplistic. Keras reduces developer *cognitive load* + to free you to focus on the parts of the problem that really matter. + Keras focuses on ease of use, debugging speed, code elegance & conciseness, + maintainability, and deployability (via TFServing, TFLite, TF.js). +- **Flexible** -- Keras adopts the principle of *progressive disclosure of + complexity*: simple workflows should be quick and easy, while arbitrarily + advanced workflows should be *possible* via a clear path that builds upon + what you've already learned. +- **Powerful** -- Keras provides industry-strength performance and + scalability: it is used by organizations and companies including NASA, + YouTube, and Waymo. That's right -- your YouTube recommendations are + powered by Keras, and so is the world's most advanced driverless vehicle. +""" + +with open(os.path.abspath(__file__)) as f: + contents = f.read() + if contents.count("{PACKAGE}") > 1 or contents.count("{VERSION}") > 1: + raise ValueError( + "You must fill the 'PACKAGE' and 'VERSION' " + "tags before running setup.py. If you are trying to " + "build a fresh package, you should be using " + "`pip_build.py` instead of `setup.py`." + ) + +setuptools.setup( + name="{{PACKAGE}}", + # Version strings with `-` characters are semver compatible, + # but incompatible with pip. For pip, we will remove all `-`` characters. + version="{{VERSION}}", + description="Deep learning for humans.", + long_description=DESCRIPTION, + url="https://keras.io/", + download_url="https://github.com/keras-team/keras/tags", + author="Keras team", + author_email="keras-users@googlegroups.com", + packages=setuptools.find_packages(), + install_requires=[], + # Supported Python versions + python_requires=">=3.8", + # PyPI package information. + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + license="Apache 2.0", + keywords=["keras", "tensorflow", "machine learning", "deep learning"], +) diff --git a/pip_build.py b/pip_build.py new file mode 100644 index 000000000000..6c09c1ccb7a3 --- /dev/null +++ b/pip_build.py @@ -0,0 +1,518 @@ +"""Build the Keras pip package. + +The steps are as follows: + +0. Run bazel build in the Keras root directory to obtain protobuf Python files. +1. Create a temporary build directory (e.g. `/tmp/keras_build`) +2. Copy the Keras codebase to it (to `/tmp/keras_build/keras/src`) + and rewrite internal imports so that they refer to `keras.src` rather than + just `keras`. +3. Also copy `setup.py` to the build directory. +4. List and import every file in the codebase (in `/tmp/keras_build/keras/src`), + so we can inspect the symbols the codebase contains. +5. Use the annotations left by the `keras_export` decorator to filter the + symbols that should be exported, as well as their export path (default one + and v1 one). +6. Use this information to generate `__init__.py` files in + `tmp/keras_build/keras/`. +7. Run the setup script to write out build artifacts to `tmp/keras_build/dist`. +8. Copy the artifacts out. This is what should be uploaded to PyPI. + +This script borrows heavily from Namex (https://github.com/fchollet/namex). + +Notes: + +* This script should be run on the Keras codebase as obtained from GitHub + (OSS-facing), not the Google-internal one. The files are expect to be already + converted to their public form. +* This script only targets Linux x86 64. It could be adapted to MacOS + relatively easily by changing requirements.txt and the bazel build script. +* This script should be run from an environment that has all Keras dependencies + installed. Note that their specific version is not important; the only + thing that matters is that we should be able to import the Keras codebase + in its current state (so we can perform step 4). If you install the + dependencies used by the latest TF-nightly you should be good. +""" + +import argparse +import datetime +import glob +import importlib +import inspect +import os +import pathlib +import shutil +import subprocess +import sys +import tempfile + +PACKAGE_NAME = "keras" +DIST_DIRNAME = "dist" +SRC_DIRNAME = "src" +TMP_BUILD_DIRNAME = "keras_build" +TMP_TEST_DIRNAME = "keras_test" +VERBOSE = True +INIT_FILE_HEADER = """AUTOGENERATED. DO NOT EDIT.""" +# These are symbols that have export issues and that we skip for now. +SYMBOLS_TO_SKIP = ["layer_test"] + + +def copy_keras_codebase(source_dir, target_dir): + disallowed = [ + "tools", + "integration_test", + ] + + def ignore(path, names): + to_ignore = [] + for name in names: + if name.endswith("_test.py"): + to_ignore.append(name) + elif name in disallowed: + to_ignore.append(name) + return to_ignore + + shutil.copytree(source_dir, target_dir, ignore=ignore) + + +def convert_keras_imports(src_directory): + def _convert_line(line): + if "import keras.protobuf" in line or "from keras.protobuf" in line: + return line + # Imports starting from `root_name`. + if line.strip() == f"import {PACKAGE_NAME}": + line = line.replace( + f"import {PACKAGE_NAME}", + f"import {PACKAGE_NAME}.{SRC_DIRNAME} as {PACKAGE_NAME}", + ) + return line + + line = line.replace( + f"import {PACKAGE_NAME}.", + f"import {PACKAGE_NAME}.{SRC_DIRNAME}.", + ) + line = line.replace( + f"from {PACKAGE_NAME}.", + f"from {PACKAGE_NAME}.{SRC_DIRNAME}.", + ) + line = line.replace( + f"from {PACKAGE_NAME} import", + f"from {PACKAGE_NAME}.{SRC_DIRNAME} import", + ) + # A way to catch LazyLoader calls. Hacky. + line = line.replace('globals(), "keras.', 'globals(), "keras.src.') + return line + + for root, _, files in os.walk(src_directory): + for fname in files: + if fname.endswith(".py") and not fname.endswith("_pb2.py"): + fpath = os.path.join(root, fname) + if VERBOSE: + print(f"...processing {fpath}") + with open(fpath) as f: + contents = f.read() + lines = contents.split("\n") + in_string = False + new_lines = [] + for line in lines: + if line.strip().startswith('"""') or line.strip().endswith( + '"""' + ): + if line.count('"') % 2 == 1: + in_string = not in_string + else: + line = _convert_line(line) + new_lines.append(line) + + with open(fpath, "w") as f: + f.write("\n".join(new_lines) + "\n") + + +def generate_keras_api_files(package_directory, src_directory): + if VERBOSE: + print("# Compiling codebase entry points.") + + codebase_walk_entry_points = [] + for root, _, files in os.walk(src_directory): + for fname in files: + parts = root.split("/") + parts = parts[parts.index("keras") :] + base_entry_point = ".".join(parts) + if fname == "__init__.py": + codebase_walk_entry_points.append(base_entry_point) + elif fname.endswith(".py") and not fname.endswith("_test.py"): + module_name = fname[:-3] + codebase_walk_entry_points.append( + base_entry_point + "." + module_name + ) + + # Import all Python modules found in the code directory. + modules = [] + sys.path.insert(0, os.getcwd()) + for entry_point in codebase_walk_entry_points: + if VERBOSE: + print(f"Load entry point: {entry_point}") + mod = importlib.import_module(entry_point, package=".") + modules.append(mod) + + if VERBOSE: + print("# Compiling list of symbols to export.") + + # Populate list of all symbols to register. + all_symbols = set() + processed = set() + from tensorflow.python.util import tf_decorator + + for module in modules: + for name in dir(module): + if name in SYMBOLS_TO_SKIP: + continue + symbol = getattr(module, name) + + # Get the real symbol behind any TF decorator + try: + _, symbol = tf_decorator.unwrap(symbol) + except ModuleNotFoundError: + # unwrap will not work on a ModuleSpec (which can't be + # an API symbol anyway) + continue + + # Skip if already seen + if id(symbol) in processed: + continue + processed.add(id(symbol)) + + try: + if not hasattr(symbol, "_keras_api_names"): + continue + except: # noqa: E722 + if VERBOSE: + print( + f"[!] Could not inspect symbol '{name}' from {module}." + ) + continue + # If the symbol is a non-registered subclass of + # a registered symbol, skip it. + skip = False + + def has_same_metadata(a, b): + if ( + hasattr(a, "_keras_api_names") + and hasattr(b, "_keras_api_names") + and a._keras_api_names == b._keras_api_names + and a._keras_api_names_v1 == b._keras_api_names_v1 + ): + return True + return False + + try: + classes = inspect.getmro(symbol) + if len(classes) >= 2: + parents = classes[1:] + for p in parents: + if has_same_metadata(p, symbol): + skip = True + except AttributeError: + # getmro will error out on a non-class + # (in which case there can be no subclassing issues). + pass + if not skip: + all_symbols.add(symbol) + + # Generate __init__ files content. + if VERBOSE: + print("# Processing export path data for each symbol.") + init_files_content = grab_symbol_metadata(all_symbols, is_v1=False) + init_files_content_v1 = grab_symbol_metadata(all_symbols, is_v1=True) + + if VERBOSE: + print("# Writing out API files.") + write_out_api_files( + init_files_content, + target_dir=pathlib.Path(package_directory).parent.resolve(), + ) + v1_path = os.path.join(package_directory, "api", "_v1") + v2_path = os.path.join(package_directory, "api", "_v2") + write_out_api_files( + init_files_content, + target_dir=v2_path, + root_offset=["api", "_v2", "keras"], + ) + write_out_api_files( + init_files_content_v1, + target_dir=v1_path, + root_offset=["api", "_v1", "keras"], + ) + # Add missing __init__ files in api dirs. + with open(os.path.join(package_directory, "api", "__init__.py"), "w"): + pass + with open(os.path.join(v1_path, "__init__.py"), "w"): + pass + with open(os.path.join(v2_path, "__init__.py"), "w"): + pass + + +def grab_symbol_metadata(all_symbols, is_v1=False): + # init_files_content is a dict mapping a directory path to a list of + # symbol metadata entries to populate the __init__ file for the directory. + # Each entry is a dict with keys 'symbol' and 'export_name'. + init_files_content = {} + for symbol in all_symbols: + if VERBOSE: + print(f"...processing symbol '{symbol.__name__}'") + if is_v1: + api_names = symbol._keras_api_names_v1 + else: + api_names = symbol._keras_api_names + for export_path in api_names: + export_modules = export_path.split(".") + export_name = export_modules[-1] + parent_path = os.path.join(*export_modules[:-1]) + if parent_path not in init_files_content: + init_files_content[parent_path] = [] + init_files_content[parent_path].append( + {"symbol": symbol, "export_name": export_name} + ) + for i in range(1, len(export_modules[:-1])): + intermediate_path = os.path.join(*export_modules[:i]) + if intermediate_path not in init_files_content: + init_files_content[intermediate_path] = [] + init_files_content[intermediate_path].append( + { + "module": export_modules[i], + "location": ".".join(export_modules[:i]), + } + ) + return init_files_content + + +def write_out_api_files(init_files_content, target_dir, root_offset=None): + # Go over init_files_content, make dirs, + # create __init__.py file, populate file with public symbol imports. + root_offset = root_offset or [] + for path, contents in init_files_content.items(): + os.makedirs(os.path.join(target_dir, path), exist_ok=True) + init_file_lines = [] + modules_included = set() + for symbol_metadata in contents: + if "symbol" in symbol_metadata: + symbol = symbol_metadata["symbol"] + name = symbol_metadata["export_name"] + if name == symbol.__name__: + init_file_lines.append( + f"from {symbol.__module__} import {symbol.__name__}" + ) + else: + init_file_lines.append( + f"from {symbol.__module__} " + f"import {symbol.__name__} as {name}" + ) + elif "module" in symbol_metadata: + if symbol_metadata["module"] not in modules_included: + parts = path.split("/") + parts = [parts[0]] + root_offset + parts[1:] + module_location = ".".join(parts) + init_file_lines.append( + f"from {module_location} " + f"import {symbol_metadata['module']}" + ) + modules_included.add(symbol_metadata["module"]) + + init_path = os.path.join(target_dir, path, "__init__.py") + if VERBOSE: + print(f"...writing {init_path}") + init_file_lines = sorted(init_file_lines) + with open(init_path, "w") as f: + contents = ( + f'"""{INIT_FILE_HEADER}"""\n\n' + + "\n".join(init_file_lines) + + "\n" + ) + f.write(contents) + + +def build_pip_package( + keras_root_directory, + build_directory, + package_directory, + src_directory, + dist_directory, + is_nightly=False, + rc=None, +): + # Build Keras with Bazel to get the protobuf .py files + os.chdir(keras_root_directory) + os.system(f"sh {os.path.join('keras', 'tools', 'bazel_build.sh')}") + os.chdir(build_directory) + + # Copy sources (`keras/` directory and setup files) to build directory + copy_keras_codebase( + os.path.join(keras_root_directory, "keras"), src_directory + ) + shutil.copy( + os.path.join(keras_root_directory, "oss_setup.py"), + os.path.join(build_directory, "setup.py"), + ) + + # Add blank __init__.py file at package root + # to make the package directory importable. + with open(os.path.join(package_directory, "__init__.py"), "w") as f: + pass + + # Move protobuf .py files to package root. + shutil.rmtree(os.path.join(src_directory, "protobuf")) + shutil.move( + os.path.join(keras_root_directory, "bazel-bin", "keras", "protobuf"), + package_directory, + ) + # Add blank __init__.py file in protobuf dir. + with open( + os.path.join(package_directory, "protobuf", "__init__.py"), "w" + ) as f: + pass + + # Convert imports from `keras.xyz` to `keras.src.xyz`. + convert_keras_imports(src_directory) + + # Generate API __init__.py files in `keras/` + generate_keras_api_files(package_directory, src_directory) + + # Make sure to export the __version__ string + version = getattr( + importlib.import_module("keras.src", package="."), "__version__" + ) + if is_nightly: + date = datetime.datetime.now() + version += f".dev{date.strftime('%Y%m%d%H')}" + elif rc: + version += rc + with open(os.path.join(package_directory, "__init__.py")) as f: + init_contents = f.read() + with open(os.path.join(package_directory, "__init__.py"), "w") as f: + f.write(init_contents + "\n\n" + f'__version__ = "{version}"\n') + + # Insert {{PACKAGE}} and {{VERSION}} strings in setup.py + if is_nightly: + package = PACKAGE_NAME + "-nightly" + else: + package = PACKAGE_NAME + with open(os.path.join(build_directory, "setup.py")) as f: + setup_contents = f.read() + with open(os.path.join(build_directory, "setup.py"), "w") as f: + setup_contents = setup_contents.replace("{{VERSION}}", version) + setup_contents = setup_contents.replace("{{PACKAGE}}", package) + f.write(setup_contents) + + # Build the package + os.system("python3 -m build") + + # Save the dist files generated by the build process + saved_filenames = [] + for filename in glob.glob(os.path.join(build_directory, "dist", "*.*")): + if VERBOSE: + print(f"Saving build artifact {filename}") + shutil.copy(filename, dist_directory) + saved_filenames.append(filename) + if VERBOSE: + print(f"Saved artifacts to {dist_directory}") + return saved_filenames, version + + +def test_wheel(wheel_path, expected_version, requirements_path): + test_directory = os.path.join(tempfile.gettempdir(), TMP_TEST_DIRNAME) + os.mkdir(test_directory) + os.chdir(test_directory) + symbols_to_check = [ + "keras.layers", + "keras.Input", + "keras.__internal__", + "keras.experimental", + ] + checks = ";".join(symbols_to_check) + script = ( + "#!/bin/bash\n" + "virtualenv kenv\n" + f"source {os.path.join('kenv', 'bin', 'activate')}\n" + f"pip3 install -r {requirements_path}\n" + f"pip3 install {wheel_path} --force-reinstall\n" + f"python3 -c 'import keras;{checks};print(keras.__version__)'\n" + f"python3 -c 'import tensorflow as tf;tf.compat.v1.layers.Dense'\n" + ) + try: + # Check version is correct + output = subprocess.check_output(script.encode(), shell=True) + output = output.decode().rstrip().split("\n")[-1].strip() + if not output == expected_version: + raise ValueError( + "Incorrect version; expected " + f"{expected_version} but received {output}" + ) + finally: + shutil.rmtree(test_directory) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--nightly", + action="store_true", + help="Whether this is for the `keras-nightly` package.", + ) + parser.add_argument( + "--RC", + type=str, + help="Whether this is for the release candidate.", + ) + args = parser.parse_args() + is_nightly = args.nightly + rc = args.RC + + build_directory = os.path.join(tempfile.gettempdir(), TMP_BUILD_DIRNAME) + keras_root_directory = pathlib.Path(__file__).parent.resolve() + dist_directory = os.path.join(keras_root_directory, DIST_DIRNAME) + package_directory = os.path.join(build_directory, PACKAGE_NAME) + src_directory = os.path.join(build_directory, PACKAGE_NAME, SRC_DIRNAME) + if VERBOSE: + print( + "Using:\n" + f"build_directory={build_directory}\n" + f"keras_root_directory={keras_root_directory}\n" + f"dist_directory={dist_directory}\n" + f"package_directory={package_directory}\n" + f"src_directory={src_directory}\n" + f"is_nightly={is_nightly}\n" + f"rc={rc}" + ) + if os.path.exists(build_directory): + raise ValueError(f"Directory already exists: {build_directory}") + os.mkdir(build_directory) + os.mkdir(package_directory) + if not os.path.exists(dist_directory): + os.mkdir(dist_directory) + try: + saved_filenames, version = build_pip_package( + keras_root_directory, + build_directory, + package_directory, + src_directory, + dist_directory, + is_nightly, + rc, + ) + wheel_filename = [f for f in saved_filenames if f.endswith(".whl")][0] + if VERBOSE: + print("Testing wheel artifact.") + test_wheel( + wheel_path=os.path.join(dist_directory, wheel_filename), + expected_version=version, + requirements_path=os.path.join( + keras_root_directory, "requirements.txt" + ), + ) + if VERBOSE: + print("Test successful.") + finally: + # Clean up: remove the build directory (no longer needed) + if VERBOSE: + print(f"Deleting temp build directory at {build_directory}...") + shutil.rmtree(build_directory) diff --git a/requirements.txt b/requirements.txt index d311f9368af7..412ef5fb6a63 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,18 @@ # The rest of the packages are mostly used for testing purpose. pandas pydot -scipy ~= 1.5.2 +scipy ~= 1.9.2 +# Remove once both TensorFlow and Keras nightly builds pass. +# Temporarily enforce 3.20.3 version, as the only version which is compatible +# with both new and old protobuf stubs. This is needed to resolve +# Keras-TensorFlow circular dependency issue, when one of them gets a dependency +# incompatible with another one (protobuf in this specific case). +protobuf==3.20.3 tf-nightly portpicker pyyaml Pillow -numpy ~= 1.21.4 # Sync with the numpy version used in TF -pylint +numpy ~= 1.24.3 # Sync with the numpy version used in TF black==22.3.0 +isort==5.10.1 +flake8==4.0.1 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000000..2f53d6d6975b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,11 @@ +[isort] +force_single_line=True +known_first_party=keras +line_length=80 +profile=black + +[flake8] +# imported but unused in __init__.py, that's ok. +per-file-ignores=*__init__.py:F401 +ignore=E203,W503,W605,F632,E266,E731,E712,E741 +max-line-length=80 diff --git a/shell/format.sh b/shell/format.sh new file mode 100755 index 000000000000..234634b3727f --- /dev/null +++ b/shell/format.sh @@ -0,0 +1,4 @@ +#!/bin/bash +isort --sl keras +black --line-length 80 keras +flake8 keras diff --git a/shell/lint.sh b/shell/lint.sh new file mode 100755 index 000000000000..0f06e65ca391 --- /dev/null +++ b/shell/lint.sh @@ -0,0 +1,23 @@ +#!/bin/bash +isort --check --sl -c keras +if ! [ $? -eq 0 ] +then + echo "Please run \"sh shell/format.sh\" to format the code." + exit 1 +fi +echo "no issues with isort" +flake8 keras +if ! [ $? -eq 0 ] +then + echo "Please fix the code style issue." + exit 1 +fi +echo "no issues with flake8" +black --check --line-length 80 keras +if ! [ $? -eq 0 ] +then + echo "Please run \"sh shell/format.sh\" to format the code." + exit 1 +fi +echo "no issues with black" +echo "linting success!"